viddl-rb 0.6 → 0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -3,59 +3,58 @@
3
3
  # Vidoes that have URLs that look like this: http://www.metacafe.com/watch/cb-q78rA_lp9s1_9EJsqKJ5BdIHdDNuHa1l/ cannot be downloaded.
4
4
 
5
5
  class Metacafe < PluginBase
6
- BASE_FILE_URL = "http://v.mccont.com/ItemFiles/%5BFrom%20www.metacafe.com%5D%20"
7
- API_BASE = "http://www.metacafe.com/api/"
8
-
9
- #this will be called by the main app to check whether this plugin is responsible for the url passed
10
- def self.matches_provider?(url)
11
- url.include?("metacafe.com")
12
- end
13
-
14
- def self.get_urls_and_filenames(url)
15
- video_id = get_video_id(url)
16
- info_url = API_BASE + "item/#{video_id}" #use the API to get the full video url
17
- info_doc = Nokogiri::XML(open(info_url))
18
-
19
- video_swf_url = get_video_swf_url(info_doc, video_id)
20
-
21
- #by getting the video swf url we get a http redirect url with all info needed
22
- http_response = Net::HTTP.get_response(URI(video_swf_url))
23
- redirect_url = http_response['location']
24
-
25
- file_info = get_file_info(redirect_url, video_id)
26
- key_string = get_file_key(redirect_url)
27
- file_url_with_key = file_info[:file_url] + "?__gda__=#{key_string}"
28
- escaped_url = CGI::escape(file_url_with_key)
29
-
30
- [{:url => escaped_url, :name => get_video_name(video_swf_url) + file_info[:extension]}]
31
- end
32
-
33
- def self.get_video_id(url)
34
- id = url[/watch\/(\d+)/, 1]
35
- unless id
36
- puts "ERROR: Can only download videos that has the ID in the URL."
37
- exit
38
- end
39
- id
40
- end
6
+ BASE_FILE_URL = "http://v.mccont.com/ItemFiles/%5BFrom%20www.metacafe.com%5D%20"
7
+ API_BASE = "http://www.metacafe.com/api/"
8
+
9
+ #this will be called by the main app to check whether this plugin is responsible for the url passed
10
+ def self.matches_provider?(url)
11
+ url.include?("metacafe.com")
12
+ end
13
+
14
+ def self.get_urls_and_filenames(url, options = {})
15
+ video_id = get_video_id(url)
16
+ info_url = API_BASE + "item/#{video_id}" #use the API to get the full video url
17
+ info_doc = Nokogiri::XML(open(info_url))
18
+
19
+ video_swf_url = get_video_swf_url(info_doc, video_id)
20
+
21
+ #by getting the video swf url we get a http redirect url with all info needed
22
+ http_response = Net::HTTP.get_response(URI(video_swf_url))
23
+ redirect_url = CGI::unescape(http_response['location'])
41
24
 
42
- def self.get_video_swf_url(info_doc, video_id)
43
- video_url = info_doc.xpath("//rss/channel/item/link").text
44
- video_url.sub!("watch", "fplayer")
45
- video_url.sub!(/\/\z/, ".swf") # remove last '/' and add .swf in it's place
46
- end
47
-
48
- #$1 = file name part 1, $2 = file name part 2, $3 = file extension
49
- def self.get_file_info(redirect_url, video_id)
50
- redirect_url =~ /mediaURL.+?metacafe\.com%.+?%\d+\.(\d+)\.(\d+)(\.[\d\w]+)/
51
- {:file_url => "#{BASE_FILE_URL}#{video_id}\.#{$1}\.#{$2}#{$3}", :extension => $3}
52
- end
53
-
54
- def self.get_file_key(redirect_url)
55
- redirect_url[/key.+?\%22([\w\d]+?)\%22/, 1]
56
- end
57
-
58
- def self.get_video_name(url)
59
- url[/fplayer\/\d+\/([\d\w]+)\.swf/, 1]
60
- end
25
+ file_info = get_file_info(redirect_url, video_id)
26
+ key_string = get_file_key(redirect_url)
27
+ file_url_with_key = file_info[:file_url] + "?__gda__=#{key_string}"
28
+
29
+ [{:url => file_url_with_key, :name => get_video_name(video_swf_url) + file_info[:extension]}]
30
+ end
31
+
32
+ def self.get_video_id(url)
33
+ id = url[/watch\/(\d+)/, 1]
34
+ unless id
35
+ raise CouldNotDownloadVideoError, "Can only download videos that has the ID in the URL."
36
+ end
37
+ id
38
+ end
39
+
40
+ def self.get_video_swf_url(info_doc, video_id)
41
+ video_url = info_doc.xpath("//rss/channel/item/link").text
42
+ video_url.sub!("watch", "fplayer")
43
+ video_url.sub!(/\/\z/, ".swf") # remove last '/' and add .swf in it's place
44
+ end
45
+
46
+ #$1 = file name part 1, $2 = file name part 2, $3 = file extension
47
+ def self.get_file_info(redirect_url, video_id)
48
+ redirect_url =~ /mediaURL.+?metacafe\.com%.+?%\d+\.(\d+)\.(\d+)(\.[\d\w]+)/
49
+ {:file_url => "#{BASE_FILE_URL}#{video_id}\.#{$1}\.#{$2}#{$3}", :extension => $3}
50
+ end
51
+
52
+ def self.get_file_key(redirect_url)
53
+ redirect_url[/key.+?value":"([\w\d]+)"/, 1]
54
+ end
55
+
56
+ def self.get_video_name(url)
57
+ name = url[/fplayer\/\d+\/([\d\w]+)\.swf/, 1]
58
+ PluginBase.make_filename_safe(name)
59
+ end
61
60
  end
@@ -1,13 +1,13 @@
1
+ require 'rest_client'
1
2
  class Soundcloud < PluginBase
2
- require 'iconv'
3
3
  # this will be called by the main app to check whether this plugin is responsible for the url passed
4
4
  def self.matches_provider?(url)
5
5
  url.include?("soundcloud.com")
6
6
  end
7
7
 
8
8
  # return the url for original video file and title
9
- def self.get_urls_and_filenames(url)
10
- doc = Nokogiri::XML(open(url))
9
+ def self.get_urls_and_filenames(url, options = {})
10
+ doc = Nokogiri::HTML(RestClient.get(url).body)
11
11
  download_filename = doc.at("#main-content-inner img[class=waveform]").attributes["src"].value.to_s.match(/\.com\/(.+)\_/)[1]
12
12
  download_url = "http://media.soundcloud.com/stream/#{download_filename}"
13
13
  file_name = transliterate("#{doc.at('//h1/em').text.chomp}") + ".mp3"
@@ -18,25 +18,22 @@ class Soundcloud < PluginBase
18
18
  def self.transliterate(str)
19
19
  # Based on permalink_fu by Rick Olsen
20
20
 
21
- # Escape str by transliterating to UTF-8 with Iconv
22
- s = Iconv.iconv('ascii//ignore//translit', 'utf-8', str).to_s
23
-
24
21
  # Downcase string
25
- s.downcase!
22
+ str.downcase!
26
23
 
27
24
  # Remove apostrophes so isn't changes to isnt
28
- s.gsub!(/'/, '')
25
+ str.gsub!(/'/, '')
29
26
 
30
27
  # Replace any non-letter or non-number character with a space
31
- s.gsub!(/[^A-Za-z0-9]+/, ' ')
28
+ str.gsub!(/[^A-Za-z0-9]+/, ' ')
32
29
 
33
30
  # Remove spaces from beginning and end of string
34
- s.strip!
31
+ str.strip!
35
32
 
36
33
  # Replace groups of spaces with single hyphen
37
- s.gsub!(/\ +/, '-')
34
+ str.gsub!(/\ +/, '-')
38
35
 
39
- return s
36
+ str
40
37
  end
41
38
 
42
39
  end
@@ -1,46 +1,45 @@
1
1
  class Veoh < PluginBase
2
- VEOH_API_BASE = "http://www.veoh.com/api/"
3
- PREFERRED_FORMATS = [:mp4, :flash] # mp4 is preferred because it enables downloading full videos and not just previews
4
-
5
- #this will be called by the main app to check whether this plugin is responsible for the url passed
6
- def self.matches_provider?(url)
7
- url.include?("veoh.com")
8
- end
2
+ VEOH_API_BASE = "http://www.veoh.com/api/"
3
+ PREFERRED_FORMATS = [:mp4, :flash] # mp4 is preferred because it enables downloading full videos and not just previews
4
+
5
+ #this will be called by the main app to check whether this plugin is responsible for the url passed
6
+ def self.matches_provider?(url)
7
+ url.include?("veoh.com")
8
+ end
9
9
 
10
- def self.get_urls_and_filenames(url)
11
- veoh_id = url[/\/watch\/([\w\d]+)/, 1]
12
- info_url = "#{VEOH_API_BASE}findByPermalink?permalink=#{veoh_id}"
13
- info_doc = Nokogiri::XML(open(info_url))
10
+ def self.get_urls_and_filenames(url, options = {})
11
+ veoh_id = url[/\/watch\/([\w\d]+)/, 1]
12
+ info_url = "#{VEOH_API_BASE}findByPermalink?permalink=#{veoh_id}"
13
+ info_doc = Nokogiri::XML(open(info_url))
14
14
 
15
- download_url = get_download_url(info_doc)
16
- file_name = get_file_name(info_doc, download_url)
15
+ download_url = get_download_url(info_doc)
16
+ file_name = get_file_name(info_doc, download_url)
17
17
 
18
- [{:url => download_url, :name => file_name}]
19
- end
20
-
21
- #returns the first valid download url string, in order of the prefered formats, that is found for the video
22
- def self.get_download_url(info_doc)
23
- PREFERRED_FORMATS.each do |format|
24
- a = get_attribute(format)
25
- download_attr = info_doc.xpath('//rsp/videoList/video').first.attributes[a]
26
- return(download_attr.content) unless download_attr.nil? || download_attr.content.empty?
27
- end
28
- end
29
-
30
- #the file name string is a combination of the video name and the extension
31
- def self.get_file_name(info_doc, download_url)
32
- name = info_doc.xpath('//rsp/videoList/video').first.attributes['title'].content
33
- name.gsub!(" ", "_") # replace spaces with underscores
34
- extension = download_url[/\/[\w\d]+(\.[\w\d]+)\?ct/, 1]
35
- name + extension
36
- end
18
+ [{:url => download_url, :name => file_name}]
19
+ end
20
+
21
+ #returns the first valid download url string, in order of the prefered formats, that is found for the video
22
+ def self.get_download_url(info_doc)
23
+ PREFERRED_FORMATS.each do |format|
24
+ a = get_attribute(format)
25
+ download_attr = info_doc.xpath('//rsp/videoList/video').first.attributes[a]
26
+ return(download_attr.content) unless download_attr.nil? || download_attr.content.empty?
27
+ end
28
+ end
29
+
30
+ #the file name string is a combination of the video name and the extension
31
+ def self.get_file_name(info_doc, download_url)
32
+ name = info_doc.xpath('//rsp/videoList/video').first.attributes['title'].content
33
+ extension = download_url[/\/[\w\d]+(\.[\w\d]+)\?ct/, 1]
34
+ PluginBase.make_filename_safe(name) + extension
35
+ end
37
36
 
38
- def self.get_attribute(format)
39
- case format
40
- when :mp4
41
- "ipodUrl"
42
- when :flash
43
- "previewUrl"
44
- end
45
- end
46
- end
37
+ def self.get_attribute(format)
38
+ case format
39
+ when :mp4
40
+ "ipodUrl"
41
+ when :flash
42
+ "previewUrl"
43
+ end
44
+ end
45
+ end
@@ -1,25 +1,32 @@
1
1
  class Vimeo < PluginBase
2
- #this will be called by the main app to check whether this plugin is responsible for the url passed
3
- def self.matches_provider?(url)
4
- url.include?("vimeo.com")
5
- end
6
-
7
- def self.get_urls_and_filenames(url)
8
- #the vimeo ID consists of 7 decimal numbers in the URL
9
- vimeo_id = url[/\d{7,8}/]
10
- doc = Nokogiri::XML(open("http://www.vimeo.com/moogaloop/load/clip:#{vimeo_id}"))
11
- title = doc.at("//video/caption").inner_text
12
- puts "[VIMEO] Title: #{title}"
13
- request_signature = doc.at("//request_signature").inner_text
14
- request_signature_expires = doc.at("//request_signature_expires").inner_text
15
-
2
+ #this will be called by the main app to check whether this plugin is responsible for the url passed
3
+ def self.matches_provider?(url)
4
+ url.include?("vimeo.com")
5
+ end
6
+
7
+ def self.get_urls_and_filenames(url, options = {})
8
+ #the vimeo ID consists of 7 decimal numbers in the URL
9
+ vimeo_id = url[/\d{7,8}/]
16
10
 
17
- puts "[VIMEO] Request Signature: #{request_signature} expires: #{request_signature_expires}"
18
-
19
- download_url = "http://www.vimeo.com/moogaloop/play/clip:#{vimeo_id}/#{request_signature}/#{request_signature_expires}/?q=hd"
20
- #todo: put the filename cleaning stuff into a seperate helper
21
- file_name = title.delete("\"'").gsub(/[^0-9A-Za-z]/, '_') + ".flv"
22
- puts "downloading to " + file_name
23
- [{:url => download_url, :name => file_name}]
24
- end
25
- end
11
+ agent = Mechanize.new #use Mechanize for the automatic cookie handeling
12
+ agent.redirect_ok = false #don't follow redirects so we do not download the video when we get it's url
13
+
14
+ video_page = agent.get("http://vimeo.com/#{vimeo_id}")
15
+ page_html = video_page.root.inner_html
16
+ doc = Nokogiri::HTML(page_html)
17
+ title = doc.at('meta[property="og:title"]').attributes['content'].value
18
+ puts "[VIMEO] Title: #{title.inspect}"
19
+
20
+ #the timestamp and sig info is in the embedded player javascript in the video page
21
+ timestamp = page_html[/"timestamp":(\d+),/, 1]
22
+ signature = page_html[/"signature":"([\d\w]+)",/, 1]
23
+
24
+ redirect_url = "http://player.vimeo.com/play_redirect?clip_id=#{vimeo_id}&sig=#{signature}&time=#{timestamp}&quality=hd,sd&codecs=H264,VP8,VP6"
25
+
26
+ #the download url is the value of the location (redirect) header
27
+ download_url = agent.get(redirect_url).header["location"]
28
+ file_name = PluginBase.make_filename_safe(title) + ".mp4"
29
+
30
+ [{:url => download_url, :name => file_name}]
31
+ end
32
+ end
@@ -1,143 +1,196 @@
1
- class Youtube < PluginBase
2
- #this will be called by the main app to check whether this plugin is responsible for the url passed
3
- def self.matches_provider?(url)
4
- url.include?("youtube.com") || url.include?("youtu.be")
5
- end
6
-
7
- def self.parse_playlist(url)
8
- #http://www.youtube.com/view_play_list?p=F96B063007B44E1E&search_query=welt+auf+schwäbisch
9
- #http://www.youtube.com/watch?v=9WEP5nCxkEY&videos=jKY836_WMhE&playnext_from=TL&playnext=1
10
- #http://www.youtube.com/watch?v=Tk78sr5JMIU&videos=jKY836_WMhE
11
-
12
- playlist_ID = url[/p=(\w{16})&?/,1]
13
- puts "[YOUTUBE] Playlist ID: #{playlist_ID}"
14
- url_array = Array.new
15
- video_info = Nokogiri::HTML(open("http://gdata.youtube.com/feeds/api/playlists/#{playlist_ID}?v=2"))
16
- video_info.search("//content").each do |video|
17
- url_array << video["url"] if video["url"].include?("http://www.youtube.com/v/") #filters out rtsp links
18
- end
19
-
20
- puts "[YOUTUBE] #{url_array.size} links found!"
21
- url_array
22
- end
23
-
24
-
25
- def self.get_urls_and_filenames(url)
26
- return_values = []
27
- if url.include?("view_play_list")
28
- puts "[YOUTUBE] playlist found! analyzing..."
29
- files = self.parse_playlist(url)
30
- puts "[YOUTUBE] Starting playlist download"
31
- files.each do |file|
32
- puts "[YOUTUBE] Downloading next movie on the playlist (#{file})"
33
- return_values << self.grab_single_url_filename(url)
34
- end
35
- else
36
- return_values << self.grab_single_url_filename(url)
37
- end
38
- return_values
39
- end
40
-
41
- def self.grab_single_url_filename(url)
42
- #the youtube video ID looks like this: [...]v=abc5a5_afe5agae6g&[...], we only want the ID (the \w in the brackets)
43
- #addition: might also look like this /v/abc5-a5afe5agae6g
44
- # alternative: video_id = url[/v[\/=]([\w-]*)&?/, 1]
45
- # First get the redirect
46
- if url.include?("youtu.be")
47
- url = open(url).base_uri.to_s
48
- end
49
- video_id = url[/(v|embed)[\/=]([^\/\?\&]*)/,2]
50
- if video_id.nil?
51
- puts "no video id found."
52
- exit
53
- else
54
- puts "[YOUTUBE] ID FOUND: #{video_id}"
55
- end
56
- #let's get some infos about the video. data is urlencoded
57
- yt_url = "http://www.youtube.com/get_video_info?video_id=#{video_id}"
58
- video_info = open(yt_url).read
59
- #converting the huge infostring into a hash. simply by splitting it at the & and then splitting it into key and value arround the =
60
- #[...]blabla=blubb&narf=poit&marc=awesome[...]
61
- video_info_hash = Hash[*video_info.split("&").collect { |v|
62
- key, encoded_value = v.split("=")
63
- if encoded_value.to_s.empty?
64
- value = ""
65
- else
66
- #decode until everything is "normal"
67
- while (encoded_value != CGI::unescape(encoded_value)) do
68
- #"decoding"
69
- encoded_value = CGI::unescape(encoded_value)
70
- end
71
- value = encoded_value
72
- end
73
-
74
- if key =~ /_map/
75
- orig_value = value
76
- value = value.split(",")
77
- if key == "url_encoded_fmt_stream_map"
78
- url_array = orig_value.split("url=").map{|url_string| url_string.chomp(",")}
79
- result_hash = {}
80
- url_array.each do |url|
81
- next if url.to_s.empty?
82
- format_id = url.match(/\&itag=(\d+)/)[1]
83
- result_hash[format_id] = url
84
- end
85
- value = result_hash
86
- elsif key == "fmt_map"
87
- value = Hash[*value.collect{ |v|
88
- k2, *v2 = v.split("/")
89
- [k2, v2]
90
- }.flatten(1)]
91
- elsif key == "fmt_url_map" || key == "fmt_stream_map"
92
- Hash[*value.collect { |v| v.split("|")}.flatten]
93
- end
94
- end
95
- [key, value]
96
- }.flatten]
97
-
98
- if video_info_hash["status"] == "fail"
99
- puts "Error: embedding disabled, no video info found"
100
- exit
101
- end
102
-
103
- title = video_info_hash["title"]
104
- length_s = video_info_hash["length_seconds"]
105
- token = video_info_hash["token"]
106
-
107
-
108
- #for the formats, see: http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs
109
- fmt_list = video_info_hash["fmt_list"].split(",")
110
- available_formats = fmt_list.map{|format| format.split("/").first}
111
-
112
- format_ext = {}
113
- format_ext["38"] = {:extension => "mp4", :name => "MP4 Highest Quality 4096x3027 (H.264, AAC)"}
114
- format_ext["37"] = {:extension => "mp4", :name => "MP4 Highest Quality 1920x1080 (H.264, AAC)"}
115
- format_ext["22"] = {:extension => "mp4", :name => "MP4 1280x720 (H.264, AAC)"}
116
- format_ext["45"] = {:extension => "webm", :name => "WebM 1280x720 (VP8, Vorbis)"}
117
- format_ext["44"] = {:extension => "webm", :name => "WebM 854x480 (VP8, Vorbis)"}
118
- format_ext["18"] = {:extension => "mp4", :name => "MP4 640x360 (H.264, AAC)"}
119
- format_ext["35"] = {:extension => "flv", :name => "FLV 854x480 (H.264, AAC)"}
120
- format_ext["34"] = {:extension => "flv", :name => "FLV 640x360 (H.264, AAC)"}
121
- format_ext["5"] = {:extension => "flv", :name => "FLV 400x240 (Soerenson H.263)"}
122
- format_ext["17"] = {:extension => "3gp", :name => "3gp"}
123
-
124
- #since 1.8 doesn't do ordered hashes
125
- prefered_order = ["38","37","22","45","44","18","35","34","5","17"]
126
-
127
- selected_format = prefered_order.select{|possible_format| available_formats.include?(possible_format)}.first
128
-
129
- puts "[YOUTUBE] Title: #{title}"
130
- puts "[YOUTUBE] Length: #{length_s} s"
131
- puts "[YOUTUBE] t-parameter: #{token}"
132
- #best quality seems always to be firsts
133
- puts "[YOUTUBE] formats available: #{available_formats.inspect} (downloading format #{selected_format} -> #{format_ext[selected_format][:name]})"
134
-
135
- #video_info_hash.keys.sort.each{|key| puts "#{key} : #{video_info_hash[key]}" }
136
- download_url = video_info_hash["url_encoded_fmt_stream_map"][selected_format]
137
- #if download url ends with a ';' followed by a codec string remove that part because it stops URI.parse from working
138
- download_url = $1 if download_url =~ /(.*?);\scodecs=/
139
- file_name = title.delete("\"'").gsub(/[^0-9A-Za-z]/, '_') + "." + format_ext[selected_format][:extension]
140
- puts "downloading to " + file_name
141
- {:url => download_url, :name => file_name}
142
- end
143
- end
1
+
2
+ class Youtube < PluginBase
3
+ #this will be called by the main app to check whether this plugin is responsible for the url passed
4
+ def self.matches_provider?(url)
5
+ url.include?("youtube.com") || url.include?("youtu.be")
6
+ end
7
+
8
+ #get all videos and return their urls in an array
9
+ def self.get_video_urls(feed_url)
10
+ puts "[YOUTUBE] Retrieving videos..."
11
+ urls_titles = Hash.new
12
+ result_feed = Nokogiri::XML(open(feed_url))
13
+ urls_titles.merge!(grab_ut(result_feed))
14
+
15
+ #as long as the feed has a next link we follow it and add the resulting video urls
16
+ loop do
17
+ next_link = result_feed.search("//feed/link[@rel='next']").first
18
+ break if next_link.nil?
19
+ result_feed = Nokogiri::HTML(open(next_link["href"]))
20
+ urls_titles.merge!(grab_ut(result_feed))
21
+ end
22
+
23
+ self.filter_urls(urls_titles)
24
+ end
25
+
26
+ #returns only the urls that match the --filter argument regex (if present)
27
+ def self.filter_urls(url_hash)
28
+ if @filter
29
+ puts "[YOUTUBE] Using filter: #{@filter}"
30
+ filtered = url_hash.select { |url, title| title =~ @filter }
31
+ filtered.keys
32
+ else
33
+ url_hash.keys
34
+ end
35
+ end
36
+
37
+ #extract all video urls and their titles from a feed and return in a hash
38
+ def self.grab_ut(feed)
39
+ feed.remove_namespaces! #so that we can get to the titles easily
40
+ urls = feed.search("//entry/link[@rel='alternate']").map { |link| link["href"] }
41
+ titles = feed.search("//entry/group/title").map { |title| title.text }
42
+ Hash[urls.zip(titles)] #hash like this: url => title
43
+ end
44
+
45
+ def self.parse_playlist(url)
46
+ #http://www.youtube.com/view_play_list?p=F96B063007B44E1E&search_query=welt+auf+schwäbisch
47
+ #http://www.youtube.com/watch?v=9WEP5nCxkEY&videos=jKY836_WMhE&playnext_from=TL&playnext=1
48
+ #http://www.youtube.com/watch?v=Tk78sr5JMIU&videos=jKY836_WMhE
49
+
50
+ playlist_ID = url[/(?:list=PL|p=)(\w{16})&?/,1]
51
+ puts "[YOUTUBE] Playlist ID: #{playlist_ID}"
52
+ feed_url = "http://gdata.youtube.com/feeds/api/playlists/#{playlist_ID}?&max-results=50&v=2"
53
+ url_array = self.get_video_urls(feed_url)
54
+ puts "[YOUTUBE] #{url_array.size} links found!"
55
+ url_array
56
+ end
57
+
58
+ def self.parse_user(username)
59
+ puts "[YOUTUBE] User: #{username}"
60
+ feed_url = "http://gdata.youtube.com/feeds/api/users/#{username}/uploads?&max-results=50&v=2"
61
+ url_array = get_video_urls(feed_url)
62
+ puts "[YOUTUBE] #{url_array.size} links found!"
63
+ url_array
64
+ end
65
+
66
+ def self.get_urls_and_filenames(url, options = {})
67
+ @filter = options[:playlist_filter] #used to filter a playlist in self.filter_urls
68
+ return_values = []
69
+ if url.include?("view_play_list") || url.include?("playlist?list=") #if playlist
70
+ puts "[YOUTUBE] playlist found! analyzing..."
71
+ files = self.parse_playlist(url)
72
+ puts "[YOUTUBE] Starting playlist download"
73
+ files.each do |file|
74
+ puts "[YOUTUBE] Downloading next movie on the playlist (#{file})"
75
+ return_values << self.grab_single_url_filename(file)
76
+ end
77
+ elsif match = url.match(/\/user\/([\w\d]+)$/) #if user url, e.g. youtube.com/user/woot
78
+ username = match[1]
79
+ video_urls = self.parse_user(username)
80
+ puts "[YOUTUBE] Starting user videos download"
81
+ video_urls.each do |url|
82
+ puts "[YOUTUBE] Downloading next user video (#{url})"
83
+ return_values << self.grab_single_url_filename(url)
84
+ end
85
+ else #if single video
86
+ return_values << self.grab_single_url_filename(url)
87
+ end
88
+ return_values.reject! { |value| value == :no_embed } #remove results that can not be downloaded
89
+
90
+ if return_values.empty?
91
+ raise CouldNotDownloadVideoError, "No videos could be downloaded - embedding disabled."
92
+ else
93
+ return_values
94
+ end
95
+ end
96
+
97
+ def self.grab_single_url_filename(url)
98
+ #the youtube video ID looks like this: [...]v=abc5a5_afe5agae6g&[...], we only want the ID (the \w in the brackets)
99
+ #addition: might also look like this /v/abc5-a5afe5agae6g
100
+ # alternative: video_id = url[/v[\/=]([\w-]*)&?/, 1]
101
+ # First get the redirect
102
+ if url.include?("youtu.be")
103
+ url = open(url).base_uri.to_s
104
+ end
105
+ video_id = url[/(v|embed)[=\/]([^\/\?\&]*)/,2]
106
+ if video_id.nil?
107
+ raise CouldNotDownloadVideoError, "No video id found."
108
+ else
109
+ puts "[YOUTUBE] ID FOUND: #{video_id}"
110
+ end
111
+ #let's get some infos about the video. data is urlencoded
112
+ yt_url = "http://www.youtube.com/get_video_info?video_id=#{video_id}"
113
+ video_info = RestClient.get(yt_url).body
114
+ #converting the huge infostring into a hash. simply by splitting it at the & and then splitting it into key and value arround the =
115
+ #[...]blabla=blubb&narf=poit&marc=awesome[...]
116
+ video_info_hash = Hash[*video_info.split("&").collect { |v|
117
+ key, encoded_value = v.split("=")
118
+ if encoded_value.to_s.empty?
119
+ value = ""
120
+ else
121
+ #decode until everything is "normal"
122
+ while (encoded_value != CGI::unescape(encoded_value)) do
123
+ #"decoding"
124
+ encoded_value = CGI::unescape(encoded_value)
125
+ end
126
+ value = encoded_value
127
+ end
128
+
129
+ if key =~ /_map/
130
+ orig_value = value
131
+ value = value.split(",")
132
+ if key == "url_encoded_fmt_stream_map"
133
+ url_array = orig_value.split("url=").map{|url_string| url_string.chomp(",")}
134
+ result_hash = {}
135
+ url_array.each do |url|
136
+ next if url.to_s.empty? || url.to_s.match(/^itag/)
137
+ format_id = url[/\&itag=(\d+)/, 1]
138
+ result_hash[format_id] = url
139
+ end
140
+ value = result_hash
141
+ elsif key == "fmt_map"
142
+ value = Hash[*value.collect { |v|
143
+ k2, *v2 = v.split("/")
144
+ [k2, v2]
145
+ }.flatten(1)]
146
+ elsif key == "fmt_url_map" || key == "fmt_stream_map"
147
+ Hash[*value.collect { |v| v.split("|")}.flatten]
148
+ end
149
+ end
150
+ [key, value]
151
+ }.flatten]
152
+
153
+ if video_info_hash["status"] == "fail"
154
+ return :no_embed
155
+ end
156
+
157
+ title = video_info_hash["title"]
158
+ length_s = video_info_hash["length_seconds"]
159
+ token = video_info_hash["token"]
160
+
161
+ #for the formats, see: http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs
162
+ fmt_list = video_info_hash["fmt_list"].split(",")
163
+ available_formats = fmt_list.map{|format| format.split("/").first}
164
+
165
+ format_ext = {}
166
+ format_ext["38"] = {:extension => "mp4", :name => "MP4 Highest Quality 4096x3027 (H.264, AAC)"}
167
+ format_ext["37"] = {:extension => "mp4", :name => "MP4 Highest Quality 1920x1080 (H.264, AAC)"}
168
+ format_ext["22"] = {:extension => "mp4", :name => "MP4 1280x720 (H.264, AAC)"}
169
+ format_ext["45"] = {:extension => "webm", :name => "WebM 1280x720 (VP8, Vorbis)"}
170
+ format_ext["44"] = {:extension => "webm", :name => "WebM 854x480 (VP8, Vorbis)"}
171
+ format_ext["18"] = {:extension => "mp4", :name => "MP4 640x360 (H.264, AAC)"}
172
+ format_ext["35"] = {:extension => "flv", :name => "FLV 854x480 (H.264, AAC)"}
173
+ format_ext["34"] = {:extension => "flv", :name => "FLV 640x360 (H.264, AAC)"}
174
+ format_ext["5"] = {:extension => "flv", :name => "FLV 400x240 (Soerenson H.263)"}
175
+ format_ext["17"] = {:extension => "3gp", :name => "3gp"}
176
+
177
+ #since 1.8 doesn't do ordered hashes
178
+ prefered_order = ["38","37","22","45","44","18","35","34","5","17"]
179
+
180
+ selected_format = prefered_order.select{|possible_format| available_formats.include?(possible_format)}.first
181
+
182
+ puts "[YOUTUBE] Title: #{title}"
183
+ puts "[YOUTUBE] Length: #{length_s} s"
184
+ puts "[YOUTUBE] t-parameter: #{token}"
185
+ #best quality seems always to be firsts
186
+ puts "[YOUTUBE] formats available: #{available_formats.inspect} (downloading format #{selected_format} -> #{format_ext[selected_format][:name]})"
187
+
188
+ #video_info_hash.keys.sort.each{|key| puts "#{key} : #{video_info_hash[key]}" }
189
+ download_url = video_info_hash["url_encoded_fmt_stream_map"][selected_format]
190
+ #if download url ends with a ';' followed by a codec string remove that part because it stops URI.parse from working
191
+ download_url = $1 if download_url =~ /(.*?);\scodecs=/
192
+ file_name = PluginBase.make_filename_safe(title) + "." + format_ext[selected_format][:extension]
193
+ puts "downloading to " + file_name
194
+ {:url => download_url, :name => file_name}
195
+ end
196
+ end