RubyGems - viddl-rb - Versions diffs - 0.7 → 0.8 - Mend

viddl-rb 0.7 → 0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

data/Gemfile.lock +21 -19
data/README.md +23 -11
data/Rakefile +5 -1
data/bin/helper/downloader.rb +2 -7
data/bin/helper/parameter-parser.rb +93 -51
data/bin/viddl-rb +10 -4
data/helper/audio-helper.rb +2 -3
data/helper/download-helper.rb +69 -61
data/helper/utility-helper.rb +31 -2
data/lib/viddl-rb.rb +1 -11
data/plugins/soundcloud.rb +18 -15
data/plugins/youtube.rb +271 -196
metadata +25 -27
data/CHANGELOG.txt +0 -14

data/helper/utility-helper.rb CHANGED Viewed

@@ -10,7 +10,36 @@ module ViddlRb
         ViddlRb.class_eval(File.read(plugin))
       end
     end
-  end
-end
+    #checks to see whether the os has a certain utility like wget or curl
+    #`` returns the standard output of the process
+    #system returns the exit code of the process
+    def self.os_has?(utility)
+      windows = ENV['OS'] =~ /windows/i
+      unless windows
+        `which #{utility}`.include?(utility.to_s)
+      else
+        if !system("where /q where").nil?   #if Windows has the where utility
+          system("where /q #{utility}")     #/q is the quiet mode flag
+        else
+          begin                             #as a fallback we just run the utility itself
+            system(utility)
+          rescue Errno::ENOENT
+            false
+          end
+        end
+      end
+    end
+    #recursively get the final location (after following all redirects) for an url.
+    def self.get_final_location(url)
+      Net::HTTP.get_response(URI(url)) do |res|
+        location = res["location"]
+        return url if location.nil?
+        return get_final_location(location)
+      end
+    end
+  end
+end

data/lib/viddl-rb.rb CHANGED Viewed

@@ -95,19 +95,9 @@ module ViddlRb
   def self.follow_all_redirects(urls_filenames)
     urls_filenames.map do |uf|
       url = uf[:url]
-      final_location = get_final_location(url)
+      final_location = UtilityHelper.get_final_location(url)
       {:url => final_location, :name => uf[:name]}
     end
   end
   private_class_method :follow_all_redirects
-  #recursively get the final location (after following all redirects) for an url.
-  def self.get_final_location(url)
-    Net::HTTP.get_response(URI(url)) do |res|
-      location = res["location"]
-      return url if location.nil?
-      return get_final_location(location)
-    end
-  end
-  private_class_method :get_final_location
 end

data/plugins/soundcloud.rb CHANGED Viewed

@@ -1,4 +1,4 @@
-require 'rest_client'
+require 'open-uri'
 class Soundcloud < PluginBase
   # this will be called by the main app to check whether this plugin is responsible for the url passed
   def self.matches_provider?(url)
@@ -7,7 +7,7 @@ class Soundcloud < PluginBase
   # return the url for original video file and title
   def self.get_urls_and_filenames(url, options = {})
-    doc          = Nokogiri::HTML(RestClient.get(url).body)
+    doc          = Nokogiri::HTML(open(get_http_url(url)))
     download_filename = doc.at("#main-content-inner img[class=waveform]").attributes["src"].value.to_s.match(/\.com\/(.+)\_/)[1]
     download_url = "http://media.soundcloud.com/stream/#{download_filename}"
     file_name    = transliterate("#{doc.at('//h1/em').text.chomp}") + ".mp3"
@@ -16,24 +16,27 @@ class Soundcloud < PluginBase
   end
   def self.transliterate(str)
-  # Based on permalink_fu by Rick Olsen
+    # Based on permalink_fu by Rick Olsen
-  # Downcase string
-  str.downcase!
+    # Downcase string
+    str.downcase!
-  # Remove apostrophes so isn't changes to isnt
-  str.gsub!(/'/, '')
+    # Remove apostrophes so isn't changes to isnt
+    str.gsub!(/'/, '')
-  # Replace any non-letter or non-number character with a space
-  str.gsub!(/[^A-Za-z0-9]+/, ' ')
+    # Replace any non-letter or non-number character with a space
+    str.gsub!(/[^A-Za-z0-9]+/, ' ')
-  # Remove spaces from beginning and end of string
-  str.strip!
+    # Remove spaces from beginning and end of string
+    str.strip!
-  # Replace groups of spaces with single hyphen
-  str.gsub!(/\ +/, '-')
+    # Replace groups of spaces with single hyphen
+    str.gsub!(/\ +/, '-')
-  str
-end
+    str
+  end
+  def self.get_http_url(url)
+    url.sub(/https?:\/\//, "http:\/\/")
+  end
 end

data/plugins/youtube.rb CHANGED Viewed

@@ -1,196 +1,271 @@
-class Youtube < PluginBase
-  #this will be called by the main app to check whether this plugin is responsible for the url passed
-  def self.matches_provider?(url)
-    url.include?("youtube.com") || url.include?("youtu.be")
-  end
-  #get all videos and return their urls in an array
-  def self.get_video_urls(feed_url)
-    puts "[YOUTUBE] Retrieving videos..."
-    urls_titles = Hash.new
-    result_feed = Nokogiri::XML(open(feed_url))
-    urls_titles.merge!(grab_ut(result_feed))
-    #as long as the feed has a next link we follow it and add the resulting video urls
-    loop do
-      next_link = result_feed.search("//feed/link[@rel='next']").first
-      break if next_link.nil?
-      result_feed = Nokogiri::HTML(open(next_link["href"]))
-      urls_titles.merge!(grab_ut(result_feed))
-    end
-    self.filter_urls(urls_titles)
-  end
-  #returns only the urls that match the --filter argument regex (if present)
-  def self.filter_urls(url_hash)
-    if @filter
-      puts "[YOUTUBE] Using filter: #{@filter}"
-      filtered = url_hash.select { |url, title| title =~ @filter }
-      filtered.keys
-    else
-      url_hash.keys
-    end
-  end
-  #extract all video urls and their titles from a feed and return in a hash
-  def self.grab_ut(feed)
-    feed.remove_namespaces!  #so that we can get to the titles easily
-    urls   = feed.search("//entry/link[@rel='alternate']").map { |link| link["href"] }
-    titles = feed.search("//entry/group/title").map { |title| title.text }
-    Hash[urls.zip(titles)]    #hash like this: url => title
-  end
-  def self.parse_playlist(url)
-    #http://www.youtube.com/view_play_list?p=F96B063007B44E1E&search_query=welt+auf+schwäbisch
-    #http://www.youtube.com/watch?v=9WEP5nCxkEY&videos=jKY836_WMhE&playnext_from=TL&playnext=1
-    #http://www.youtube.com/watch?v=Tk78sr5JMIU&videos=jKY836_WMhE
-    playlist_ID = url[/(?:list=PL|p=)(\w{16})&?/,1]
-    puts "[YOUTUBE] Playlist ID: #{playlist_ID}"
-    feed_url = "http://gdata.youtube.com/feeds/api/playlists/#{playlist_ID}?&max-results=50&v=2"
-    url_array = self.get_video_urls(feed_url)
-    puts "[YOUTUBE] #{url_array.size} links found!"
-    url_array
-  end
-  def self.parse_user(username)
-    puts "[YOUTUBE] User: #{username}"
-    feed_url = "http://gdata.youtube.com/feeds/api/users/#{username}/uploads?&max-results=50&v=2"
-    url_array = get_video_urls(feed_url)
-    puts "[YOUTUBE] #{url_array.size} links found!"
-    url_array
-  end
-  def self.get_urls_and_filenames(url, options = {})
-    @filter = options[:playlist_filter]                                    #used to filter a playlist in self.filter_urls
-    return_values = []
-    if url.include?("view_play_list") || url.include?("playlist?list=")    #if playlist
-      puts "[YOUTUBE] playlist found! analyzing..."
-      files = self.parse_playlist(url)
-      puts "[YOUTUBE] Starting playlist download"
-      files.each do |file|
-        puts "[YOUTUBE] Downloading next movie on the playlist (#{file})"
-        return_values << self.grab_single_url_filename(file)
-      end
-    elsif match = url.match(/\/user\/([\w\d]+)$/)                          #if user url, e.g. youtube.com/user/woot
-      username = match[1]
-      video_urls = self.parse_user(username)
-      puts "[YOUTUBE] Starting user videos download"
-      video_urls.each do |url|
-        puts "[YOUTUBE] Downloading next user video (#{url})"
-        return_values << self.grab_single_url_filename(url)
-      end
-    else                                                                   #if single video
-      return_values << self.grab_single_url_filename(url)
-    end
-    return_values.reject! { |value| value == :no_embed }   #remove results that can not be downloaded
-    if return_values.empty?
-      raise CouldNotDownloadVideoError, "No videos could be downloaded - embedding disabled."
-    else
-      return_values
-    end
-  end
-  def self.grab_single_url_filename(url)
-    #the youtube video ID looks like this: [...]v=abc5a5_afe5agae6g&[...], we only want the ID (the \w in the brackets)
-    #addition: might also look like this /v/abc5-a5afe5agae6g
-    # alternative:  video_id = url[/v[\/=]([\w-]*)&?/, 1]
-    # First get the redirect
-    if url.include?("youtu.be")
-      url = open(url).base_uri.to_s
-    end
-    video_id = url[/(v|embed)[=\/]([^\/\?\&]*)/,2]
-    if video_id.nil?
-      raise CouldNotDownloadVideoError, "No video id found."
-    else
-      puts "[YOUTUBE] ID FOUND: #{video_id}"
-    end
-    #let's get some infos about the video. data is urlencoded
-    yt_url = "http://www.youtube.com/get_video_info?video_id=#{video_id}"
-    video_info = RestClient.get(yt_url).body
-    #converting the huge infostring into a hash. simply by splitting it at the & and then splitting it into key and value arround the =
-    #[...]blabla=blubb&narf=poit&marc=awesome[...]
-    video_info_hash = Hash[*video_info.split("&").collect { |v|
-      key, encoded_value = v.split("=")
-      if encoded_value.to_s.empty?
-        value = ""
-      else
-      #decode until everything is "normal"
-        while (encoded_value != CGI::unescape(encoded_value)) do
-          #"decoding"
-          encoded_value = CGI::unescape(encoded_value)
-        end
-        value = encoded_value
-      end
-      if key =~ /_map/
-        orig_value = value
-        value = value.split(",")
-        if key == "url_encoded_fmt_stream_map"
-          url_array = orig_value.split("url=").map{|url_string| url_string.chomp(",")}
-          result_hash = {}
-          url_array.each do |url|
-            next if url.to_s.empty? || url.to_s.match(/^itag/)
-            format_id = url[/\&itag=(\d+)/, 1]
-            result_hash[format_id] = url
-          end
-          value = result_hash
-        elsif key == "fmt_map"
-          value = Hash[*value.collect { |v|
-              k2, *v2 = v.split("/")
-              [k2, v2]
-            }.flatten(1)]
-        elsif key == "fmt_url_map" || key == "fmt_stream_map"
-          Hash[*value.collect { |v| v.split("|")}.flatten]
-        end
-      end
-      [key, value]
-    }.flatten]
-    if video_info_hash["status"] == "fail"
-      return :no_embed
-    end
-    title = video_info_hash["title"]
-    length_s = video_info_hash["length_seconds"]
-    token = video_info_hash["token"]
-    #for the formats, see: http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs
-    fmt_list = video_info_hash["fmt_list"].split(",")
-    available_formats = fmt_list.map{|format| format.split("/").first}
-    format_ext = {}
-    format_ext["38"] = {:extension => "mp4", :name => "MP4 Highest Quality 4096x3027 (H.264, AAC)"}
-    format_ext["37"] = {:extension => "mp4", :name => "MP4 Highest Quality 1920x1080 (H.264, AAC)"}
-    format_ext["22"] = {:extension => "mp4", :name => "MP4 1280x720 (H.264, AAC)"}
-    format_ext["45"] = {:extension => "webm", :name => "WebM 1280x720 (VP8, Vorbis)"}
-    format_ext["44"] = {:extension => "webm", :name => "WebM 854x480 (VP8, Vorbis)"}
-    format_ext["18"] = {:extension => "mp4", :name => "MP4 640x360 (H.264, AAC)"}
-    format_ext["35"] = {:extension => "flv", :name => "FLV 854x480 (H.264, AAC)"}
-    format_ext["34"] = {:extension => "flv", :name => "FLV 640x360 (H.264, AAC)"}
-    format_ext["5"] = {:extension => "flv", :name => "FLV 400x240 (Soerenson H.263)"}
-    format_ext["17"] = {:extension => "3gp", :name => "3gp"}
-    #since 1.8 doesn't do ordered hashes
-    prefered_order = ["38","37","22","45","44","18","35","34","5","17"]
-    selected_format = prefered_order.select{|possible_format| available_formats.include?(possible_format)}.first
-    puts "[YOUTUBE] Title: #{title}"
-    puts "[YOUTUBE] Length: #{length_s} s"
-    puts "[YOUTUBE] t-parameter: #{token}"
-    #best quality seems always to be firsts
-    puts "[YOUTUBE] formats available: #{available_formats.inspect} (downloading format #{selected_format} -> #{format_ext[selected_format][:name]})"
-    #video_info_hash.keys.sort.each{|key| puts "#{key} : #{video_info_hash[key]}" }
-    download_url = video_info_hash["url_encoded_fmt_stream_map"][selected_format]
-    #if download url ends with a ';' followed by a codec string remove that part because it stops URI.parse from working
-    download_url = $1 if download_url =~ /(.*?);\scodecs=/
-    file_name = PluginBase.make_filename_safe(title) + "." + format_ext[selected_format][:extension]
-    puts "downloading to " + file_name
-    {:url => download_url, :name => file_name}
-  end
-end
+class Youtube < PluginBase
+  # see http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs
+  # TODO: we don't have all the formats from the wiki article here
+  VIDEO_FORMATS = {
+    "38" => {:extension => "mp4",  :name => "MP4 Highest Quality 4096x3027 (H.264, AAC)"},
+    "37" => {:extension => "mp4",  :name => "MP4 Highest Quality 1920x1080 (H.264, AAC)"},
+    "22" => {:extension => "mp4",  :name => "MP4 1280x720 (H.264, AAC)"},
+    "46" => {:extension => "webm", :name => "WebM 1920x1080 (VP8, Vorbis)"},
+    "45" => {:extension => "webm", :name => "WebM 1280x720 (VP8, Vorbis)"},
+    "44" => {:extension => "webm", :name => "WebM 854x480 (VP8, Vorbis)"},
+    "43" => {:extension => "webm", :name => "WebM 480×360 (VP8, Vorbis)"},
+    "18" => {:extension => "mp4",  :name => "MP4 640x360 (H.264, AAC)"},
+    "35" => {:extension => "flv",  :name => "FLV 854x480 (H.264, AAC)"},
+    "34" => {:extension => "flv",  :name => "FLV 640x360 (H.264, AAC)"},
+    "5"  => {:extension => "flv",  :name => "FLV 400x240 (Soerenson H.263)"},
+    "17" => {:extension => "3gp",  :name => "3gp"}
+  }
+  DEFAULT_FORMAT_ORDER = %w[38 37 22 46 45 44 43 18 35 34 5 17]
+  VIDEO_INFO_URL       = "http://www.youtube.com/get_video_info?video_id="
+  VIDEO_INFO_PARMS     = "&ps=default&eurl=&gl=US&hl=en"
+  # this will be called by the main app to check whether this plugin is responsible for the url passed
+  def self.matches_provider?(url)
+    url.include?("youtube.com") || url.include?("youtu.be")
+  end
+  def self.get_urls_and_filenames(url, options = {})
+    @quality    = options[:quality]
+    filter      = options[:playlist_filter]
+    parser      = PlaylistParser.new
+    return_vals = []
+    if playlist_urls = parser.get_playlist_urls(url, filter)
+      playlist_urls.each { |url| return_vals << grab_single_url_filename(url) }
+    else
+      return_vals << grab_single_url_filename(url)
+    end
+    clean_return_values(return_vals)
+  end
+  def self.clean_return_values(return_values)
+    cleaned = return_values.reject { |val| val == :no_embed }
+    if cleaned.empty?
+      download_error("No videos could be downloaded.")
+    else
+      cleaned
+    end
+  end
+  def self.grab_single_url_filename(url)
+    grab_url_embeddable(url) || grab_url_non_embeddable(url)
+  end
+  def self.grab_url_embeddable(url)
+    video_info   = get_video_info(url)
+    video_params = extract_video_parameters(video_info)
+    unless video_params[:embeddable]
+      notify("VIDEO IS NOT EMBEDDABLE")
+      return false
+    end
+    urls_formats    = extract_urls_formats(video_info)
+    selected_format = choose_format(urls_formats)
+    title           = video_params[:title]
+    file_name       = PluginBase.make_filename_safe(title) + "." + VIDEO_FORMATS[selected_format][:extension]
+    {:url => urls_formats[selected_format], :name => file_name}
+  end
+  def self.grab_url_non_embeddable(url)
+    video_info = open(url).read
+    stream_map = video_info[/url_encoded_fmt_stream_map\" *: *\"([^\"]+)\"/,1]
+    urls_formats = parse_stream_map(url_decode(stream_map))
+    selected_format = choose_format(urls_formats)
+    title = video_info[/<meta name="title" content="([^"]*)">/, 1]
+    file_name = PluginBase.make_filename_safe(title) + "." + VIDEO_FORMATS[selected_format][:extension]
+    # cleaning
+    clean_url = urls_formats[selected_format].gsub(/\\u0026[^&]*/, "")
+    {:url => clean_url, :name => file_name}
+  end
+  def self.get_video_info(url)
+    id = extract_video_id(url)
+    request_url = VIDEO_INFO_URL + id + VIDEO_INFO_PARMS
+    open(request_url).read
+  end
+  def self.extract_video_id(url)
+    # the youtube video ID looks like this: [...]v=abc5a5_afe5agae6g&[...], we only want the ID (the \w in the brackets)
+    # addition: might also look like this /v/abc5-a5afe5agae6g
+    # alternative:  video_id = url[/v[\/=]([\w-]*)&?/, 1]
+    url = open(url).base_uri.to_s if url.include?("youtu.be")
+    video_id = url[/(v|embed)[=\/]([^\/\?\&]*)/, 2]
+    if video_id
+      notify("ID FOUND: #{video_id}")
+      video_id
+    else
+      download_error("No video id found.")
+    end
+  end
+  def self.extract_video_parameters(video_info)
+    decoded = url_decode(video_info)
+    {:title      => decoded[/title=(.+?)(?:&|$)/, 1],
+     :length_sec => decoded[/length_seconds=(.+?)(?:&|$)/, 1],
+     :author     => decoded[/author=(.+?)(?:&|$)/, 1],
+     :embeddable => !decoded.include?("status=fail")}
+  end
+  def self.extract_urls_formats(video_info)
+    stream_map = video_info[/url_encoded_fmt_stream_map=(.+?)(?:&|$)/, 1]
+    parse_stream_map(stream_map)
+  end
+  def self.parse_stream_map(stream_map)
+    urls = extract_download_urls(stream_map)
+    formats_urls = {}
+    urls.each do |url|
+      format = url[/itag=(\d+)/, 1]
+      formats_urls[format] = url
+    end
+    formats_urls
+  end
+  def self.extract_download_urls(stream_map)
+    entries = stream_map.split("%2C")
+    decoded = entries.map { |entry| url_decode(entry) }
+    decoded.map do |entry|
+      url = entry[/url=(.*?itag=.+?)(?:itag=|;|$)/, 1]
+      sig = entry[/sig=(.+?)(?:&|$)/, 1]
+      url + "&signature=#{sig}"
+    end
+  end
+  def self.choose_format(urls_formats)
+    available_formats = urls_formats.keys
+    if @quality                        #if the user specified a format
+      ext = @quality[:extension]
+      res = @quality[:resolution]
+      #gets a nested array with all the formats of the same res as the user wanted
+      requested = VIDEO_FORMATS.select { |id, format| format[:name].include?(res) }.to_a
+      if requested.empty?
+        notify "Requested format \"#{res}:#{ext}\" not found. Downloading default format."
+        get_default_format(available_formats)
+      else
+        pick = requested.find { |format| format[1][:extension] == ext }             # get requsted extension if possible
+        pick ? pick.first : get_default_format(requested.map { |req| req.first })   # else return the default format
+      end
+    else
+      get_default_format(available_formats)
+    end
+  end
+  def self.get_default_format(available)
+    DEFAULT_FORMAT_ORDER.find { |default| available.include?(default) }
+  end
+  def self.url_decode(text)
+    while text != (decoded = CGI::unescape(text)) do
+      text = decoded
+    end
+    text
+  end
+  def self.notify(message)
+    puts "[YOUTUBE] #{message}"
+  end
+  def self.download_error(message)
+    raise CouldNotDownloadVideoError, message
+  end
+  #
+  # class PlaylistParser
+  #_____________________
+  class PlaylistParser
+    PLAYLIST_FEED = "http://gdata.youtube.com/feeds/api/playlists/%s?&max-results=50&v=2"
+    USER_FEED     = "http://gdata.youtube.com/feeds/api/users/%s/uploads?&max-results=50&v=2"
+    def get_playlist_urls(url, filter = nil)
+      @filter = filter
+      if url.include?("view_play_list") || url.include?("playlist?list=")     # if playlist URL
+        parse_playlist(url)
+      elsif username = url[/\/user\/([\w\d]+)(?:\/|$)/, 1]                       # if user URL
+        parse_user(username)
+      else                                                                    # if neither return nil
+        nil
+      end
+    end
+    def parse_playlist(url)
+      #http://www.youtube.com/view_play_list?p=F96B063007B44E1E&search_query=welt+auf+schwäbisch
+      #http://www.youtube.com/watch?v=9WEP5nCxkEY&videos=jKY836_WMhE&playnext_from=TL&playnext=1
+      #http://www.youtube.com/watch?v=Tk78sr5JMIU&videos=jKY836_WMhE
+      playlist_ID = url[/(?:list=PL|p=)(.+?)(?:&|\/|$)/, 1]
+      notify "Playlist ID: #{playlist_ID}"
+      feed_url = PLAYLIST_FEED % playlist_ID
+      url_array = get_video_urls(feed_url)
+      notify "#{url_array.size} links found!"
+      url_array
+    end
+    def parse_user(username)
+      notify "User: #{username}"
+      feed_url = USER_FEED % username
+      url_array = get_video_urls(feed_url)
+      notify "#{url_array.size} links found!"
+      url_array
+    end
+    #get all videos and return their urls in an array
+    def get_video_urls(feed_url)
+      notify "Retrieving videos..."
+      urls_titles = {}
+      result_feed = Nokogiri::XML(open(feed_url))
+      urls_titles.merge!(grab_urls_and_titles(result_feed))
+      #as long as the feed has a next link we follow it and add the resulting video urls
+      loop do
+        next_link = result_feed.search("//feed/link[@rel='next']").first
+        break if next_link.nil?
+        result_feed = Nokogiri::HTML(open(next_link["href"]))
+        urls_titles.merge!(grab_urls_and_titles(result_feed))
+      end
+      filter_urls(urls_titles)
+    end
+    #extract all video urls and their titles from a feed and return in a hash
+    def grab_urls_and_titles(feed)
+      feed.remove_namespaces!  #so that we can get to the titles easily
+      urls   = feed.search("//entry/link[@rel='alternate']").map { |link| link["href"] }
+      titles = feed.search("//entry/group/title").map { |title| title.text }
+      Hash[urls.zip(titles)]    #hash like this: url => title
+    end
+    #returns only the urls that match the --filter argument regex (if present)
+    def filter_urls(url_hash)
+      if @filter
+        notify "Using filter: #{@filter}"
+        filtered = url_hash.select { |url, title| title =~ @filter }
+        filtered.keys
+      else
+        url_hash.keys
+      end
+    end
+    def notify(message)
+      Youtube.notify(message)
+    end
+  end
+end