viddl-rb 0.6 → 0.7

Sign up to get free protection for your applications and to get access to all the features.
@@ -3,59 +3,58 @@
3
3
  # Vidoes that have URLs that look like this: http://www.metacafe.com/watch/cb-q78rA_lp9s1_9EJsqKJ5BdIHdDNuHa1l/ cannot be downloaded.
4
4
 
5
5
  class Metacafe < PluginBase
6
- BASE_FILE_URL = "http://v.mccont.com/ItemFiles/%5BFrom%20www.metacafe.com%5D%20"
7
- API_BASE = "http://www.metacafe.com/api/"
8
-
9
- #this will be called by the main app to check whether this plugin is responsible for the url passed
10
- def self.matches_provider?(url)
11
- url.include?("metacafe.com")
12
- end
13
-
14
- def self.get_urls_and_filenames(url)
15
- video_id = get_video_id(url)
16
- info_url = API_BASE + "item/#{video_id}" #use the API to get the full video url
17
- info_doc = Nokogiri::XML(open(info_url))
18
-
19
- video_swf_url = get_video_swf_url(info_doc, video_id)
20
-
21
- #by getting the video swf url we get a http redirect url with all info needed
22
- http_response = Net::HTTP.get_response(URI(video_swf_url))
23
- redirect_url = http_response['location']
24
-
25
- file_info = get_file_info(redirect_url, video_id)
26
- key_string = get_file_key(redirect_url)
27
- file_url_with_key = file_info[:file_url] + "?__gda__=#{key_string}"
28
- escaped_url = CGI::escape(file_url_with_key)
29
-
30
- [{:url => escaped_url, :name => get_video_name(video_swf_url) + file_info[:extension]}]
31
- end
32
-
33
- def self.get_video_id(url)
34
- id = url[/watch\/(\d+)/, 1]
35
- unless id
36
- puts "ERROR: Can only download videos that has the ID in the URL."
37
- exit
38
- end
39
- id
40
- end
6
+ BASE_FILE_URL = "http://v.mccont.com/ItemFiles/%5BFrom%20www.metacafe.com%5D%20"
7
+ API_BASE = "http://www.metacafe.com/api/"
8
+
9
+ #this will be called by the main app to check whether this plugin is responsible for the url passed
10
+ def self.matches_provider?(url)
11
+ url.include?("metacafe.com")
12
+ end
13
+
14
+ def self.get_urls_and_filenames(url, options = {})
15
+ video_id = get_video_id(url)
16
+ info_url = API_BASE + "item/#{video_id}" #use the API to get the full video url
17
+ info_doc = Nokogiri::XML(open(info_url))
18
+
19
+ video_swf_url = get_video_swf_url(info_doc, video_id)
20
+
21
+ #by getting the video swf url we get a http redirect url with all info needed
22
+ http_response = Net::HTTP.get_response(URI(video_swf_url))
23
+ redirect_url = CGI::unescape(http_response['location'])
41
24
 
42
- def self.get_video_swf_url(info_doc, video_id)
43
- video_url = info_doc.xpath("//rss/channel/item/link").text
44
- video_url.sub!("watch", "fplayer")
45
- video_url.sub!(/\/\z/, ".swf") # remove last '/' and add .swf in it's place
46
- end
47
-
48
- #$1 = file name part 1, $2 = file name part 2, $3 = file extension
49
- def self.get_file_info(redirect_url, video_id)
50
- redirect_url =~ /mediaURL.+?metacafe\.com%.+?%\d+\.(\d+)\.(\d+)(\.[\d\w]+)/
51
- {:file_url => "#{BASE_FILE_URL}#{video_id}\.#{$1}\.#{$2}#{$3}", :extension => $3}
52
- end
53
-
54
- def self.get_file_key(redirect_url)
55
- redirect_url[/key.+?\%22([\w\d]+?)\%22/, 1]
56
- end
57
-
58
- def self.get_video_name(url)
59
- url[/fplayer\/\d+\/([\d\w]+)\.swf/, 1]
60
- end
25
+ file_info = get_file_info(redirect_url, video_id)
26
+ key_string = get_file_key(redirect_url)
27
+ file_url_with_key = file_info[:file_url] + "?__gda__=#{key_string}"
28
+
29
+ [{:url => file_url_with_key, :name => get_video_name(video_swf_url) + file_info[:extension]}]
30
+ end
31
+
32
+ def self.get_video_id(url)
33
+ id = url[/watch\/(\d+)/, 1]
34
+ unless id
35
+ raise CouldNotDownloadVideoError, "Can only download videos that has the ID in the URL."
36
+ end
37
+ id
38
+ end
39
+
40
+ def self.get_video_swf_url(info_doc, video_id)
41
+ video_url = info_doc.xpath("//rss/channel/item/link").text
42
+ video_url.sub!("watch", "fplayer")
43
+ video_url.sub!(/\/\z/, ".swf") # remove last '/' and add .swf in it's place
44
+ end
45
+
46
+ #$1 = file name part 1, $2 = file name part 2, $3 = file extension
47
+ def self.get_file_info(redirect_url, video_id)
48
+ redirect_url =~ /mediaURL.+?metacafe\.com%.+?%\d+\.(\d+)\.(\d+)(\.[\d\w]+)/
49
+ {:file_url => "#{BASE_FILE_URL}#{video_id}\.#{$1}\.#{$2}#{$3}", :extension => $3}
50
+ end
51
+
52
+ def self.get_file_key(redirect_url)
53
+ redirect_url[/key.+?value":"([\w\d]+)"/, 1]
54
+ end
55
+
56
+ def self.get_video_name(url)
57
+ name = url[/fplayer\/\d+\/([\d\w]+)\.swf/, 1]
58
+ PluginBase.make_filename_safe(name)
59
+ end
61
60
  end
@@ -1,13 +1,13 @@
1
+ require 'rest_client'
1
2
  class Soundcloud < PluginBase
2
- require 'iconv'
3
3
  # this will be called by the main app to check whether this plugin is responsible for the url passed
4
4
  def self.matches_provider?(url)
5
5
  url.include?("soundcloud.com")
6
6
  end
7
7
 
8
8
  # return the url for original video file and title
9
- def self.get_urls_and_filenames(url)
10
- doc = Nokogiri::XML(open(url))
9
+ def self.get_urls_and_filenames(url, options = {})
10
+ doc = Nokogiri::HTML(RestClient.get(url).body)
11
11
  download_filename = doc.at("#main-content-inner img[class=waveform]").attributes["src"].value.to_s.match(/\.com\/(.+)\_/)[1]
12
12
  download_url = "http://media.soundcloud.com/stream/#{download_filename}"
13
13
  file_name = transliterate("#{doc.at('//h1/em').text.chomp}") + ".mp3"
@@ -18,25 +18,22 @@ class Soundcloud < PluginBase
18
18
  def self.transliterate(str)
19
19
  # Based on permalink_fu by Rick Olsen
20
20
 
21
- # Escape str by transliterating to UTF-8 with Iconv
22
- s = Iconv.iconv('ascii//ignore//translit', 'utf-8', str).to_s
23
-
24
21
  # Downcase string
25
- s.downcase!
22
+ str.downcase!
26
23
 
27
24
  # Remove apostrophes so isn't changes to isnt
28
- s.gsub!(/'/, '')
25
+ str.gsub!(/'/, '')
29
26
 
30
27
  # Replace any non-letter or non-number character with a space
31
- s.gsub!(/[^A-Za-z0-9]+/, ' ')
28
+ str.gsub!(/[^A-Za-z0-9]+/, ' ')
32
29
 
33
30
  # Remove spaces from beginning and end of string
34
- s.strip!
31
+ str.strip!
35
32
 
36
33
  # Replace groups of spaces with single hyphen
37
- s.gsub!(/\ +/, '-')
34
+ str.gsub!(/\ +/, '-')
38
35
 
39
- return s
36
+ str
40
37
  end
41
38
 
42
39
  end
@@ -1,46 +1,45 @@
1
1
  class Veoh < PluginBase
2
- VEOH_API_BASE = "http://www.veoh.com/api/"
3
- PREFERRED_FORMATS = [:mp4, :flash] # mp4 is preferred because it enables downloading full videos and not just previews
4
-
5
- #this will be called by the main app to check whether this plugin is responsible for the url passed
6
- def self.matches_provider?(url)
7
- url.include?("veoh.com")
8
- end
2
+ VEOH_API_BASE = "http://www.veoh.com/api/"
3
+ PREFERRED_FORMATS = [:mp4, :flash] # mp4 is preferred because it enables downloading full videos and not just previews
4
+
5
+ #this will be called by the main app to check whether this plugin is responsible for the url passed
6
+ def self.matches_provider?(url)
7
+ url.include?("veoh.com")
8
+ end
9
9
 
10
- def self.get_urls_and_filenames(url)
11
- veoh_id = url[/\/watch\/([\w\d]+)/, 1]
12
- info_url = "#{VEOH_API_BASE}findByPermalink?permalink=#{veoh_id}"
13
- info_doc = Nokogiri::XML(open(info_url))
10
+ def self.get_urls_and_filenames(url, options = {})
11
+ veoh_id = url[/\/watch\/([\w\d]+)/, 1]
12
+ info_url = "#{VEOH_API_BASE}findByPermalink?permalink=#{veoh_id}"
13
+ info_doc = Nokogiri::XML(open(info_url))
14
14
 
15
- download_url = get_download_url(info_doc)
16
- file_name = get_file_name(info_doc, download_url)
15
+ download_url = get_download_url(info_doc)
16
+ file_name = get_file_name(info_doc, download_url)
17
17
 
18
- [{:url => download_url, :name => file_name}]
19
- end
20
-
21
- #returns the first valid download url string, in order of the prefered formats, that is found for the video
22
- def self.get_download_url(info_doc)
23
- PREFERRED_FORMATS.each do |format|
24
- a = get_attribute(format)
25
- download_attr = info_doc.xpath('//rsp/videoList/video').first.attributes[a]
26
- return(download_attr.content) unless download_attr.nil? || download_attr.content.empty?
27
- end
28
- end
29
-
30
- #the file name string is a combination of the video name and the extension
31
- def self.get_file_name(info_doc, download_url)
32
- name = info_doc.xpath('//rsp/videoList/video').first.attributes['title'].content
33
- name.gsub!(" ", "_") # replace spaces with underscores
34
- extension = download_url[/\/[\w\d]+(\.[\w\d]+)\?ct/, 1]
35
- name + extension
36
- end
18
+ [{:url => download_url, :name => file_name}]
19
+ end
20
+
21
+ #returns the first valid download url string, in order of the prefered formats, that is found for the video
22
+ def self.get_download_url(info_doc)
23
+ PREFERRED_FORMATS.each do |format|
24
+ a = get_attribute(format)
25
+ download_attr = info_doc.xpath('//rsp/videoList/video').first.attributes[a]
26
+ return(download_attr.content) unless download_attr.nil? || download_attr.content.empty?
27
+ end
28
+ end
29
+
30
+ #the file name string is a combination of the video name and the extension
31
+ def self.get_file_name(info_doc, download_url)
32
+ name = info_doc.xpath('//rsp/videoList/video').first.attributes['title'].content
33
+ extension = download_url[/\/[\w\d]+(\.[\w\d]+)\?ct/, 1]
34
+ PluginBase.make_filename_safe(name) + extension
35
+ end
37
36
 
38
- def self.get_attribute(format)
39
- case format
40
- when :mp4
41
- "ipodUrl"
42
- when :flash
43
- "previewUrl"
44
- end
45
- end
46
- end
37
+ def self.get_attribute(format)
38
+ case format
39
+ when :mp4
40
+ "ipodUrl"
41
+ when :flash
42
+ "previewUrl"
43
+ end
44
+ end
45
+ end
@@ -1,25 +1,32 @@
1
1
  class Vimeo < PluginBase
2
- #this will be called by the main app to check whether this plugin is responsible for the url passed
3
- def self.matches_provider?(url)
4
- url.include?("vimeo.com")
5
- end
6
-
7
- def self.get_urls_and_filenames(url)
8
- #the vimeo ID consists of 7 decimal numbers in the URL
9
- vimeo_id = url[/\d{7,8}/]
10
- doc = Nokogiri::XML(open("http://www.vimeo.com/moogaloop/load/clip:#{vimeo_id}"))
11
- title = doc.at("//video/caption").inner_text
12
- puts "[VIMEO] Title: #{title}"
13
- request_signature = doc.at("//request_signature").inner_text
14
- request_signature_expires = doc.at("//request_signature_expires").inner_text
15
-
2
+ #this will be called by the main app to check whether this plugin is responsible for the url passed
3
+ def self.matches_provider?(url)
4
+ url.include?("vimeo.com")
5
+ end
6
+
7
+ def self.get_urls_and_filenames(url, options = {})
8
+ #the vimeo ID consists of 7 decimal numbers in the URL
9
+ vimeo_id = url[/\d{7,8}/]
16
10
 
17
- puts "[VIMEO] Request Signature: #{request_signature} expires: #{request_signature_expires}"
18
-
19
- download_url = "http://www.vimeo.com/moogaloop/play/clip:#{vimeo_id}/#{request_signature}/#{request_signature_expires}/?q=hd"
20
- #todo: put the filename cleaning stuff into a seperate helper
21
- file_name = title.delete("\"'").gsub(/[^0-9A-Za-z]/, '_') + ".flv"
22
- puts "downloading to " + file_name
23
- [{:url => download_url, :name => file_name}]
24
- end
25
- end
11
+ agent = Mechanize.new #use Mechanize for the automatic cookie handeling
12
+ agent.redirect_ok = false #don't follow redirects so we do not download the video when we get it's url
13
+
14
+ video_page = agent.get("http://vimeo.com/#{vimeo_id}")
15
+ page_html = video_page.root.inner_html
16
+ doc = Nokogiri::HTML(page_html)
17
+ title = doc.at('meta[property="og:title"]').attributes['content'].value
18
+ puts "[VIMEO] Title: #{title.inspect}"
19
+
20
+ #the timestamp and sig info is in the embedded player javascript in the video page
21
+ timestamp = page_html[/"timestamp":(\d+),/, 1]
22
+ signature = page_html[/"signature":"([\d\w]+)",/, 1]
23
+
24
+ redirect_url = "http://player.vimeo.com/play_redirect?clip_id=#{vimeo_id}&sig=#{signature}&time=#{timestamp}&quality=hd,sd&codecs=H264,VP8,VP6"
25
+
26
+ #the download url is the value of the location (redirect) header
27
+ download_url = agent.get(redirect_url).header["location"]
28
+ file_name = PluginBase.make_filename_safe(title) + ".mp4"
29
+
30
+ [{:url => download_url, :name => file_name}]
31
+ end
32
+ end
@@ -1,143 +1,196 @@
1
- class Youtube < PluginBase
2
- #this will be called by the main app to check whether this plugin is responsible for the url passed
3
- def self.matches_provider?(url)
4
- url.include?("youtube.com") || url.include?("youtu.be")
5
- end
6
-
7
- def self.parse_playlist(url)
8
- #http://www.youtube.com/view_play_list?p=F96B063007B44E1E&search_query=welt+auf+schwäbisch
9
- #http://www.youtube.com/watch?v=9WEP5nCxkEY&videos=jKY836_WMhE&playnext_from=TL&playnext=1
10
- #http://www.youtube.com/watch?v=Tk78sr5JMIU&videos=jKY836_WMhE
11
-
12
- playlist_ID = url[/p=(\w{16})&?/,1]
13
- puts "[YOUTUBE] Playlist ID: #{playlist_ID}"
14
- url_array = Array.new
15
- video_info = Nokogiri::HTML(open("http://gdata.youtube.com/feeds/api/playlists/#{playlist_ID}?v=2"))
16
- video_info.search("//content").each do |video|
17
- url_array << video["url"] if video["url"].include?("http://www.youtube.com/v/") #filters out rtsp links
18
- end
19
-
20
- puts "[YOUTUBE] #{url_array.size} links found!"
21
- url_array
22
- end
23
-
24
-
25
- def self.get_urls_and_filenames(url)
26
- return_values = []
27
- if url.include?("view_play_list")
28
- puts "[YOUTUBE] playlist found! analyzing..."
29
- files = self.parse_playlist(url)
30
- puts "[YOUTUBE] Starting playlist download"
31
- files.each do |file|
32
- puts "[YOUTUBE] Downloading next movie on the playlist (#{file})"
33
- return_values << self.grab_single_url_filename(url)
34
- end
35
- else
36
- return_values << self.grab_single_url_filename(url)
37
- end
38
- return_values
39
- end
40
-
41
- def self.grab_single_url_filename(url)
42
- #the youtube video ID looks like this: [...]v=abc5a5_afe5agae6g&[...], we only want the ID (the \w in the brackets)
43
- #addition: might also look like this /v/abc5-a5afe5agae6g
44
- # alternative: video_id = url[/v[\/=]([\w-]*)&?/, 1]
45
- # First get the redirect
46
- if url.include?("youtu.be")
47
- url = open(url).base_uri.to_s
48
- end
49
- video_id = url[/(v|embed)[\/=]([^\/\?\&]*)/,2]
50
- if video_id.nil?
51
- puts "no video id found."
52
- exit
53
- else
54
- puts "[YOUTUBE] ID FOUND: #{video_id}"
55
- end
56
- #let's get some infos about the video. data is urlencoded
57
- yt_url = "http://www.youtube.com/get_video_info?video_id=#{video_id}"
58
- video_info = open(yt_url).read
59
- #converting the huge infostring into a hash. simply by splitting it at the & and then splitting it into key and value arround the =
60
- #[...]blabla=blubb&narf=poit&marc=awesome[...]
61
- video_info_hash = Hash[*video_info.split("&").collect { |v|
62
- key, encoded_value = v.split("=")
63
- if encoded_value.to_s.empty?
64
- value = ""
65
- else
66
- #decode until everything is "normal"
67
- while (encoded_value != CGI::unescape(encoded_value)) do
68
- #"decoding"
69
- encoded_value = CGI::unescape(encoded_value)
70
- end
71
- value = encoded_value
72
- end
73
-
74
- if key =~ /_map/
75
- orig_value = value
76
- value = value.split(",")
77
- if key == "url_encoded_fmt_stream_map"
78
- url_array = orig_value.split("url=").map{|url_string| url_string.chomp(",")}
79
- result_hash = {}
80
- url_array.each do |url|
81
- next if url.to_s.empty?
82
- format_id = url.match(/\&itag=(\d+)/)[1]
83
- result_hash[format_id] = url
84
- end
85
- value = result_hash
86
- elsif key == "fmt_map"
87
- value = Hash[*value.collect{ |v|
88
- k2, *v2 = v.split("/")
89
- [k2, v2]
90
- }.flatten(1)]
91
- elsif key == "fmt_url_map" || key == "fmt_stream_map"
92
- Hash[*value.collect { |v| v.split("|")}.flatten]
93
- end
94
- end
95
- [key, value]
96
- }.flatten]
97
-
98
- if video_info_hash["status"] == "fail"
99
- puts "Error: embedding disabled, no video info found"
100
- exit
101
- end
102
-
103
- title = video_info_hash["title"]
104
- length_s = video_info_hash["length_seconds"]
105
- token = video_info_hash["token"]
106
-
107
-
108
- #for the formats, see: http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs
109
- fmt_list = video_info_hash["fmt_list"].split(",")
110
- available_formats = fmt_list.map{|format| format.split("/").first}
111
-
112
- format_ext = {}
113
- format_ext["38"] = {:extension => "mp4", :name => "MP4 Highest Quality 4096x3027 (H.264, AAC)"}
114
- format_ext["37"] = {:extension => "mp4", :name => "MP4 Highest Quality 1920x1080 (H.264, AAC)"}
115
- format_ext["22"] = {:extension => "mp4", :name => "MP4 1280x720 (H.264, AAC)"}
116
- format_ext["45"] = {:extension => "webm", :name => "WebM 1280x720 (VP8, Vorbis)"}
117
- format_ext["44"] = {:extension => "webm", :name => "WebM 854x480 (VP8, Vorbis)"}
118
- format_ext["18"] = {:extension => "mp4", :name => "MP4 640x360 (H.264, AAC)"}
119
- format_ext["35"] = {:extension => "flv", :name => "FLV 854x480 (H.264, AAC)"}
120
- format_ext["34"] = {:extension => "flv", :name => "FLV 640x360 (H.264, AAC)"}
121
- format_ext["5"] = {:extension => "flv", :name => "FLV 400x240 (Soerenson H.263)"}
122
- format_ext["17"] = {:extension => "3gp", :name => "3gp"}
123
-
124
- #since 1.8 doesn't do ordered hashes
125
- prefered_order = ["38","37","22","45","44","18","35","34","5","17"]
126
-
127
- selected_format = prefered_order.select{|possible_format| available_formats.include?(possible_format)}.first
128
-
129
- puts "[YOUTUBE] Title: #{title}"
130
- puts "[YOUTUBE] Length: #{length_s} s"
131
- puts "[YOUTUBE] t-parameter: #{token}"
132
- #best quality seems always to be firsts
133
- puts "[YOUTUBE] formats available: #{available_formats.inspect} (downloading format #{selected_format} -> #{format_ext[selected_format][:name]})"
134
-
135
- #video_info_hash.keys.sort.each{|key| puts "#{key} : #{video_info_hash[key]}" }
136
- download_url = video_info_hash["url_encoded_fmt_stream_map"][selected_format]
137
- #if download url ends with a ';' followed by a codec string remove that part because it stops URI.parse from working
138
- download_url = $1 if download_url =~ /(.*?);\scodecs=/
139
- file_name = title.delete("\"'").gsub(/[^0-9A-Za-z]/, '_') + "." + format_ext[selected_format][:extension]
140
- puts "downloading to " + file_name
141
- {:url => download_url, :name => file_name}
142
- end
143
- end
1
+
2
+ class Youtube < PluginBase
3
+ #this will be called by the main app to check whether this plugin is responsible for the url passed
4
+ def self.matches_provider?(url)
5
+ url.include?("youtube.com") || url.include?("youtu.be")
6
+ end
7
+
8
+ #get all videos and return their urls in an array
9
+ def self.get_video_urls(feed_url)
10
+ puts "[YOUTUBE] Retrieving videos..."
11
+ urls_titles = Hash.new
12
+ result_feed = Nokogiri::XML(open(feed_url))
13
+ urls_titles.merge!(grab_ut(result_feed))
14
+
15
+ #as long as the feed has a next link we follow it and add the resulting video urls
16
+ loop do
17
+ next_link = result_feed.search("//feed/link[@rel='next']").first
18
+ break if next_link.nil?
19
+ result_feed = Nokogiri::HTML(open(next_link["href"]))
20
+ urls_titles.merge!(grab_ut(result_feed))
21
+ end
22
+
23
+ self.filter_urls(urls_titles)
24
+ end
25
+
26
+ #returns only the urls that match the --filter argument regex (if present)
27
+ def self.filter_urls(url_hash)
28
+ if @filter
29
+ puts "[YOUTUBE] Using filter: #{@filter}"
30
+ filtered = url_hash.select { |url, title| title =~ @filter }
31
+ filtered.keys
32
+ else
33
+ url_hash.keys
34
+ end
35
+ end
36
+
37
+ #extract all video urls and their titles from a feed and return in a hash
38
+ def self.grab_ut(feed)
39
+ feed.remove_namespaces! #so that we can get to the titles easily
40
+ urls = feed.search("//entry/link[@rel='alternate']").map { |link| link["href"] }
41
+ titles = feed.search("//entry/group/title").map { |title| title.text }
42
+ Hash[urls.zip(titles)] #hash like this: url => title
43
+ end
44
+
45
+ def self.parse_playlist(url)
46
+ #http://www.youtube.com/view_play_list?p=F96B063007B44E1E&search_query=welt+auf+schwäbisch
47
+ #http://www.youtube.com/watch?v=9WEP5nCxkEY&videos=jKY836_WMhE&playnext_from=TL&playnext=1
48
+ #http://www.youtube.com/watch?v=Tk78sr5JMIU&videos=jKY836_WMhE
49
+
50
+ playlist_ID = url[/(?:list=PL|p=)(\w{16})&?/,1]
51
+ puts "[YOUTUBE] Playlist ID: #{playlist_ID}"
52
+ feed_url = "http://gdata.youtube.com/feeds/api/playlists/#{playlist_ID}?&max-results=50&v=2"
53
+ url_array = self.get_video_urls(feed_url)
54
+ puts "[YOUTUBE] #{url_array.size} links found!"
55
+ url_array
56
+ end
57
+
58
+ def self.parse_user(username)
59
+ puts "[YOUTUBE] User: #{username}"
60
+ feed_url = "http://gdata.youtube.com/feeds/api/users/#{username}/uploads?&max-results=50&v=2"
61
+ url_array = get_video_urls(feed_url)
62
+ puts "[YOUTUBE] #{url_array.size} links found!"
63
+ url_array
64
+ end
65
+
66
+ def self.get_urls_and_filenames(url, options = {})
67
+ @filter = options[:playlist_filter] #used to filter a playlist in self.filter_urls
68
+ return_values = []
69
+ if url.include?("view_play_list") || url.include?("playlist?list=") #if playlist
70
+ puts "[YOUTUBE] playlist found! analyzing..."
71
+ files = self.parse_playlist(url)
72
+ puts "[YOUTUBE] Starting playlist download"
73
+ files.each do |file|
74
+ puts "[YOUTUBE] Downloading next movie on the playlist (#{file})"
75
+ return_values << self.grab_single_url_filename(file)
76
+ end
77
+ elsif match = url.match(/\/user\/([\w\d]+)$/) #if user url, e.g. youtube.com/user/woot
78
+ username = match[1]
79
+ video_urls = self.parse_user(username)
80
+ puts "[YOUTUBE] Starting user videos download"
81
+ video_urls.each do |url|
82
+ puts "[YOUTUBE] Downloading next user video (#{url})"
83
+ return_values << self.grab_single_url_filename(url)
84
+ end
85
+ else #if single video
86
+ return_values << self.grab_single_url_filename(url)
87
+ end
88
+ return_values.reject! { |value| value == :no_embed } #remove results that can not be downloaded
89
+
90
+ if return_values.empty?
91
+ raise CouldNotDownloadVideoError, "No videos could be downloaded - embedding disabled."
92
+ else
93
+ return_values
94
+ end
95
+ end
96
+
97
+ def self.grab_single_url_filename(url)
98
+ #the youtube video ID looks like this: [...]v=abc5a5_afe5agae6g&[...], we only want the ID (the \w in the brackets)
99
+ #addition: might also look like this /v/abc5-a5afe5agae6g
100
+ # alternative: video_id = url[/v[\/=]([\w-]*)&?/, 1]
101
+ # First get the redirect
102
+ if url.include?("youtu.be")
103
+ url = open(url).base_uri.to_s
104
+ end
105
+ video_id = url[/(v|embed)[=\/]([^\/\?\&]*)/,2]
106
+ if video_id.nil?
107
+ raise CouldNotDownloadVideoError, "No video id found."
108
+ else
109
+ puts "[YOUTUBE] ID FOUND: #{video_id}"
110
+ end
111
+ #let's get some infos about the video. data is urlencoded
112
+ yt_url = "http://www.youtube.com/get_video_info?video_id=#{video_id}"
113
+ video_info = RestClient.get(yt_url).body
114
+ #converting the huge infostring into a hash. simply by splitting it at the & and then splitting it into key and value arround the =
115
+ #[...]blabla=blubb&narf=poit&marc=awesome[...]
116
+ video_info_hash = Hash[*video_info.split("&").collect { |v|
117
+ key, encoded_value = v.split("=")
118
+ if encoded_value.to_s.empty?
119
+ value = ""
120
+ else
121
+ #decode until everything is "normal"
122
+ while (encoded_value != CGI::unescape(encoded_value)) do
123
+ #"decoding"
124
+ encoded_value = CGI::unescape(encoded_value)
125
+ end
126
+ value = encoded_value
127
+ end
128
+
129
+ if key =~ /_map/
130
+ orig_value = value
131
+ value = value.split(",")
132
+ if key == "url_encoded_fmt_stream_map"
133
+ url_array = orig_value.split("url=").map{|url_string| url_string.chomp(",")}
134
+ result_hash = {}
135
+ url_array.each do |url|
136
+ next if url.to_s.empty? || url.to_s.match(/^itag/)
137
+ format_id = url[/\&itag=(\d+)/, 1]
138
+ result_hash[format_id] = url
139
+ end
140
+ value = result_hash
141
+ elsif key == "fmt_map"
142
+ value = Hash[*value.collect { |v|
143
+ k2, *v2 = v.split("/")
144
+ [k2, v2]
145
+ }.flatten(1)]
146
+ elsif key == "fmt_url_map" || key == "fmt_stream_map"
147
+ Hash[*value.collect { |v| v.split("|")}.flatten]
148
+ end
149
+ end
150
+ [key, value]
151
+ }.flatten]
152
+
153
+ if video_info_hash["status"] == "fail"
154
+ return :no_embed
155
+ end
156
+
157
+ title = video_info_hash["title"]
158
+ length_s = video_info_hash["length_seconds"]
159
+ token = video_info_hash["token"]
160
+
161
+ #for the formats, see: http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs
162
+ fmt_list = video_info_hash["fmt_list"].split(",")
163
+ available_formats = fmt_list.map{|format| format.split("/").first}
164
+
165
+ format_ext = {}
166
+ format_ext["38"] = {:extension => "mp4", :name => "MP4 Highest Quality 4096x3027 (H.264, AAC)"}
167
+ format_ext["37"] = {:extension => "mp4", :name => "MP4 Highest Quality 1920x1080 (H.264, AAC)"}
168
+ format_ext["22"] = {:extension => "mp4", :name => "MP4 1280x720 (H.264, AAC)"}
169
+ format_ext["45"] = {:extension => "webm", :name => "WebM 1280x720 (VP8, Vorbis)"}
170
+ format_ext["44"] = {:extension => "webm", :name => "WebM 854x480 (VP8, Vorbis)"}
171
+ format_ext["18"] = {:extension => "mp4", :name => "MP4 640x360 (H.264, AAC)"}
172
+ format_ext["35"] = {:extension => "flv", :name => "FLV 854x480 (H.264, AAC)"}
173
+ format_ext["34"] = {:extension => "flv", :name => "FLV 640x360 (H.264, AAC)"}
174
+ format_ext["5"] = {:extension => "flv", :name => "FLV 400x240 (Soerenson H.263)"}
175
+ format_ext["17"] = {:extension => "3gp", :name => "3gp"}
176
+
177
+ #since 1.8 doesn't do ordered hashes
178
+ prefered_order = ["38","37","22","45","44","18","35","34","5","17"]
179
+
180
+ selected_format = prefered_order.select{|possible_format| available_formats.include?(possible_format)}.first
181
+
182
+ puts "[YOUTUBE] Title: #{title}"
183
+ puts "[YOUTUBE] Length: #{length_s} s"
184
+ puts "[YOUTUBE] t-parameter: #{token}"
185
+ #best quality seems always to be firsts
186
+ puts "[YOUTUBE] formats available: #{available_formats.inspect} (downloading format #{selected_format} -> #{format_ext[selected_format][:name]})"
187
+
188
+ #video_info_hash.keys.sort.each{|key| puts "#{key} : #{video_info_hash[key]}" }
189
+ download_url = video_info_hash["url_encoded_fmt_stream_map"][selected_format]
190
+ #if download url ends with a ';' followed by a codec string remove that part because it stops URI.parse from working
191
+ download_url = $1 if download_url =~ /(.*?);\scodecs=/
192
+ file_name = PluginBase.make_filename_safe(title) + "." + format_ext[selected_format][:extension]
193
+ puts "downloading to " + file_name
194
+ {:url => download_url, :name => file_name}
195
+ end
196
+ end