viddl-rb 0.64 → 0.65

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. data/README.md +5 -0
  2. data/plugins/youtube.rb +153 -120
  3. metadata +4 -4
data/README.md CHANGED
@@ -18,6 +18,11 @@ Download a video and extract the audio:
18
18
 
19
19
  In both cases we'll name the output file according to the video title.
20
20
 
21
+ Download all videos on a Youtube playlist:
22
+ viddl-rb http://www.youtube.com/playlist?list=PL7E8DA0A515924126
23
+
24
+ Download all videos from a Youtube user:
25
+ viddl-rb http://www.youtube.com/user/tedtalksdirector
21
26
 
22
27
  __Requirements:__
23
28
 
data/plugins/youtube.rb CHANGED
@@ -1,144 +1,177 @@
1
1
 
2
2
  class Youtube < PluginBase
3
- #this will be called by the main app to check whether this plugin is responsible for the url passed
4
- def self.matches_provider?(url)
5
- url.include?("youtube.com") || url.include?("youtu.be")
6
- end
7
-
8
- def self.parse_playlist(url)
9
- #http://www.youtube.com/view_play_list?p=F96B063007B44E1E&search_query=welt+auf+schwäbisch
10
- #http://www.youtube.com/watch?v=9WEP5nCxkEY&videos=jKY836_WMhE&playnext_from=TL&playnext=1
11
- #http://www.youtube.com/watch?v=Tk78sr5JMIU&videos=jKY836_WMhE
3
+ #this will be called by the main app to check whether this plugin is responsible for the url passed
4
+ def self.matches_provider?(url)
5
+ url.include?("youtube.com") || url.include?("youtu.be")
6
+ end
7
+
8
+ #get all videos and return their urls in an array
9
+ def self.get_video_urls(feed_url)
10
+ urls = []
11
+ result_feed = Nokogiri::HTML(open(feed_url))
12
+ urls << grab_urls(result_feed)
12
13
 
13
- playlist_ID = url[/(?:list=PL|p=)(\w{16})&?/,1]
14
- puts "[YOUTUBE] Playlist ID: #{playlist_ID}"
15
- url_array = Array.new
16
- video_info = Nokogiri::HTML(open("http://gdata.youtube.com/feeds/api/playlists/#{playlist_ID}?v=2"))
17
- video_info.search("//content").each do |video|
18
- url_array << video["url"] if video["url"].include?("http://www.youtube.com/v/") #filters out rtsp links
19
- end
14
+ #as long as the feed has a next link we follow it and add the resulting video urls
15
+ loop do
16
+ next_link = result_feed.search("//feed/link[@rel='next']").first
17
+ break if next_link.nil?
18
+ result_feed = Nokogiri::HTML(open(next_link["href"]))
19
+ urls << grab_urls(result_feed)
20
+ end
21
+ urls.flatten
22
+ end
20
23
 
21
- puts "[YOUTUBE] #{url_array.size} links found!"
22
- url_array
23
- end
24
-
25
-
26
- def self.get_urls_and_filenames(url)
27
- return_values = []
28
- if url.include?("view_play_list") || url.include?("playlist?list=")
29
- puts "[YOUTUBE] playlist found! analyzing..."
30
- files = self.parse_playlist(url)
31
- puts "[YOUTUBE] Starting playlist download"
32
- files.each do |file|
33
- puts "[YOUTUBE] Downloading next movie on the playlist (#{file})"
34
- return_values << self.grab_single_url_filename(file)
35
- end
36
- else
37
- return_values << self.grab_single_url_filename(url)
38
- end
39
- return_values
40
- end
41
-
42
- def self.grab_single_url_filename(url)
43
- #the youtube video ID looks like this: [...]v=abc5a5_afe5agae6g&[...], we only want the ID (the \w in the brackets)
44
- #addition: might also look like this /v/abc5-a5afe5agae6g
45
- # alternative: video_id = url[/v[\/=]([\w-]*)&?/, 1]
46
- # First get the redirect
47
- if url.include?("youtu.be")
48
- url = open(url).base_uri.to_s
49
- end
50
- video_id = url[/(v|embed)[\/=]([^\/\?\&]*)/,2]
51
- if video_id.nil?
52
- puts "no video id found."
53
- exit
54
- else
55
- puts "[YOUTUBE] ID FOUND: #{video_id}"
56
- end
57
- #let's get some infos about the video. data is urlencoded
58
- yt_url = "http://www.youtube.com/get_video_info?video_id=#{video_id}"
59
- video_info = open(yt_url).read
60
- #converting the huge infostring into a hash. simply by splitting it at the & and then splitting it into key and value arround the =
61
- #[...]blabla=blubb&narf=poit&marc=awesome[...]
62
- video_info_hash = Hash[*video_info.split("&").collect { |v|
24
+ #extract all video urls form a feed an return in an array
25
+ def self.grab_urls(feed)
26
+ feed.search("//entry/link[@rel='alternate']").map { |link| link["href"] }
27
+ end
28
+
29
+ def self.parse_playlist(url)
30
+ #http://www.youtube.com/view_play_list?p=F96B063007B44E1E&search_query=welt+auf+schwäbisch
31
+ #http://www.youtube.com/watch?v=9WEP5nCxkEY&videos=jKY836_WMhE&playnext_from=TL&playnext=1
32
+ #http://www.youtube.com/watch?v=Tk78sr5JMIU&videos=jKY836_WMhE
33
+
34
+ playlist_ID = url[/(?:list=PL|p=)(\w{16})&?/,1]
35
+ puts "[YOUTUBE] Playlist ID: #{playlist_ID}"
36
+ feed_url = "http://gdata.youtube.com/feeds/api/playlists/#{playlist_ID}?&max-results=50&v=2"
37
+ url_array = self.get_video_urls(feed_url)
38
+ puts "[YOUTUBE] #{url_array.size} links found!"
39
+ url_array
40
+ end
41
+
42
+ def self.parse_user(username)
43
+ puts "[YOUTUBE] User: #{username}"
44
+ feed_url = "http://gdata.youtube.com/feeds/api/users/#{username}/uploads?&max-results=50&v=2"
45
+ url_array = get_video_urls(feed_url)
46
+ puts "[YOUTUBE] #{url_array.size} links found!"
47
+ url_array
48
+ end
49
+
50
+ def self.get_urls_and_filenames(url)
51
+ return_values = []
52
+ if url.include?("view_play_list") || url.include?("playlist?list=") #if playlist
53
+ puts "[YOUTUBE] playlist found! analyzing..."
54
+ files = self.parse_playlist(url)
55
+ puts "[YOUTUBE] Starting playlist download"
56
+ files.each do |file|
57
+ puts "[YOUTUBE] Downloading next movie on the playlist (#{file})"
58
+ return_values << self.grab_single_url_filename(file)
59
+ end
60
+ elsif match = url.match(/\/user\/([\w\d]+)$/) #if user url, e.g. youtube.com/user/woot
61
+ username = match[1]
62
+ video_urls = self.parse_user(username)
63
+ puts "[YOUTUBE] Starting user videos download"
64
+ video_urls.each do |url|
65
+ puts "[YOUTUBE] Downloading next user video (#{url})"
66
+ return_values << self.grab_single_url_filename(url)
67
+ end
68
+ else #if single video
69
+ return_values << self.grab_single_url_filename(url)
70
+ end
71
+
72
+ return_values.reject! { |value| value == :no_embed } #remove results that can not be downloaded
73
+ return_values.empty? ? exit : return_values #if no videos could be downloaded exit
74
+ end
75
+
76
+ def self.grab_single_url_filename(url)
77
+ #the youtube video ID looks like this: [...]v=abc5a5_afe5agae6g&[...], we only want the ID (the \w in the brackets)
78
+ #addition: might also look like this /v/abc5-a5afe5agae6g
79
+ # alternative: video_id = url[/v[\/=]([\w-]*)&?/, 1]
80
+ # First get the redirect
81
+ if url.include?("youtu.be")
82
+ url = open(url).base_uri.to_s
83
+ end
84
+ video_id = url[/(v|embed)[\/=]([^\/\?\&]*)/,2]
85
+ if video_id.nil?
86
+ puts "no video id found."
87
+ exit
88
+ else
89
+ puts "[YOUTUBE] ID FOUND: #{video_id}"
90
+ end
91
+ #let's get some infos about the video. data is urlencoded
92
+ yt_url = "http://www.youtube.com/get_video_info?video_id=#{video_id}"
93
+ video_info = open(yt_url).read
94
+ #converting the huge infostring into a hash. simply by splitting it at the & and then splitting it into key and value arround the =
95
+ #[...]blabla=blubb&narf=poit&marc=awesome[...]
96
+ video_info_hash = Hash[*video_info.split("&").collect { |v|
63
97
  key, encoded_value = v.split("=")
64
98
  if encoded_value.to_s.empty?
65
- value = ""
99
+ value = ""
66
100
  else
67
101
  #decode until everything is "normal"
68
- while (encoded_value != CGI::unescape(encoded_value)) do
69
- #"decoding"
70
- encoded_value = CGI::unescape(encoded_value)
71
- end
72
- value = encoded_value
102
+ while (encoded_value != CGI::unescape(encoded_value)) do
103
+ #"decoding"
104
+ encoded_value = CGI::unescape(encoded_value)
105
+ end
106
+ value = encoded_value
73
107
  end
74
108
 
75
109
  if key =~ /_map/
76
- orig_value = value
110
+ orig_value = value
77
111
  value = value.split(",")
78
112
  if key == "url_encoded_fmt_stream_map"
79
- url_array = orig_value.split("url=").map{|url_string| url_string.chomp(",")}
80
- result_hash = {}
81
- url_array.each do |url|
82
- next if url.to_s.empty?
83
- format_id = url.match(/\&itag=(\d+)/)[1]
84
- result_hash[format_id] = url
85
- end
86
- value = result_hash
113
+ url_array = orig_value.split("url=").map{|url_string| url_string.chomp(",")}
114
+ result_hash = {}
115
+ url_array.each do |url|
116
+ next if url.to_s.empty?
117
+ format_id = url.match(/\&itag=(\d+)/)[1]
118
+ result_hash[format_id] = url
119
+ end
120
+ value = result_hash
87
121
  elsif key == "fmt_map"
88
- value = Hash[*value.collect{ |v|
122
+ value = Hash[*value.collect { |v|
89
123
  k2, *v2 = v.split("/")
90
124
  [k2, v2]
91
125
  }.flatten(1)]
92
- elsif key == "fmt_url_map" || key == "fmt_stream_map"
93
- Hash[*value.collect { |v| v.split("|")}.flatten]
126
+ elsif key == "fmt_url_map" || key == "fmt_stream_map"
127
+ Hash[*value.collect { |v| v.split("|")}.flatten]
94
128
  end
95
129
  end
96
- [key, value]
97
- }.flatten]
98
-
99
- if video_info_hash["status"] == "fail"
100
- puts "Error: embedding disabled, no video info found"
101
- exit
102
- end
103
-
104
- title = video_info_hash["title"]
105
- length_s = video_info_hash["length_seconds"]
106
- token = video_info_hash["token"]
130
+ [key, value]
131
+ }.flatten]
132
+
133
+ if video_info_hash["status"] == "fail"
134
+ puts "Error: embedding disabled, no video info found"
135
+ return :no_embed
136
+ end
137
+
138
+ title = video_info_hash["title"]
139
+ length_s = video_info_hash["length_seconds"]
140
+ token = video_info_hash["token"]
107
141
 
108
-
109
- #for the formats, see: http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs
110
- fmt_list = video_info_hash["fmt_list"].split(",")
111
- available_formats = fmt_list.map{|format| format.split("/").first}
112
-
113
- format_ext = {}
114
- format_ext["38"] = {:extension => "mp4", :name => "MP4 Highest Quality 4096x3027 (H.264, AAC)"}
115
- format_ext["37"] = {:extension => "mp4", :name => "MP4 Highest Quality 1920x1080 (H.264, AAC)"}
116
- format_ext["22"] = {:extension => "mp4", :name => "MP4 1280x720 (H.264, AAC)"}
117
- format_ext["45"] = {:extension => "webm", :name => "WebM 1280x720 (VP8, Vorbis)"}
118
- format_ext["44"] = {:extension => "webm", :name => "WebM 854x480 (VP8, Vorbis)"}
119
- format_ext["18"] = {:extension => "mp4", :name => "MP4 640x360 (H.264, AAC)"}
120
- format_ext["35"] = {:extension => "flv", :name => "FLV 854x480 (H.264, AAC)"}
121
- format_ext["34"] = {:extension => "flv", :name => "FLV 640x360 (H.264, AAC)"}
122
- format_ext["5"] = {:extension => "flv", :name => "FLV 400x240 (Soerenson H.263)"}
123
- format_ext["17"] = {:extension => "3gp", :name => "3gp"}
124
-
125
- #since 1.8 doesn't do ordered hashes
126
- prefered_order = ["38","37","22","45","44","18","35","34","5","17"]
127
-
128
- selected_format = prefered_order.select{|possible_format| available_formats.include?(possible_format)}.first
129
-
130
- puts "[YOUTUBE] Title: #{title}"
131
- puts "[YOUTUBE] Length: #{length_s} s"
132
- puts "[YOUTUBE] t-parameter: #{token}"
133
- #best quality seems always to be firsts
134
- puts "[YOUTUBE] formats available: #{available_formats.inspect} (downloading format #{selected_format} -> #{format_ext[selected_format][:name]})"
142
+ #for the formats, see: http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs
143
+ fmt_list = video_info_hash["fmt_list"].split(",")
144
+ available_formats = fmt_list.map{|format| format.split("/").first}
145
+
146
+ format_ext = {}
147
+ format_ext["38"] = {:extension => "mp4", :name => "MP4 Highest Quality 4096x3027 (H.264, AAC)"}
148
+ format_ext["37"] = {:extension => "mp4", :name => "MP4 Highest Quality 1920x1080 (H.264, AAC)"}
149
+ format_ext["22"] = {:extension => "mp4", :name => "MP4 1280x720 (H.264, AAC)"}
150
+ format_ext["45"] = {:extension => "webm", :name => "WebM 1280x720 (VP8, Vorbis)"}
151
+ format_ext["44"] = {:extension => "webm", :name => "WebM 854x480 (VP8, Vorbis)"}
152
+ format_ext["18"] = {:extension => "mp4", :name => "MP4 640x360 (H.264, AAC)"}
153
+ format_ext["35"] = {:extension => "flv", :name => "FLV 854x480 (H.264, AAC)"}
154
+ format_ext["34"] = {:extension => "flv", :name => "FLV 640x360 (H.264, AAC)"}
155
+ format_ext["5"] = {:extension => "flv", :name => "FLV 400x240 (Soerenson H.263)"}
156
+ format_ext["17"] = {:extension => "3gp", :name => "3gp"}
157
+
158
+ #since 1.8 doesn't do ordered hashes
159
+ prefered_order = ["38","37","22","45","44","18","35","34","5","17"]
160
+
161
+ selected_format = prefered_order.select{|possible_format| available_formats.include?(possible_format)}.first
162
+
163
+ puts "[YOUTUBE] Title: #{title}"
164
+ puts "[YOUTUBE] Length: #{length_s} s"
165
+ puts "[YOUTUBE] t-parameter: #{token}"
166
+ #best quality seems always to be firsts
167
+ puts "[YOUTUBE] formats available: #{available_formats.inspect} (downloading format #{selected_format} -> #{format_ext[selected_format][:name]})"
135
168
 
136
- #video_info_hash.keys.sort.each{|key| puts "#{key} : #{video_info_hash[key]}" }
169
+ #video_info_hash.keys.sort.each{|key| puts "#{key} : #{video_info_hash[key]}" }
137
170
  download_url = video_info_hash["url_encoded_fmt_stream_map"][selected_format]
138
171
  #if download url ends with a ';' followed by a codec string remove that part because it stops URI.parse from working
139
172
  download_url = $1 if download_url =~ /(.*?);\scodecs=/
140
- file_name = title.delete("\"'").gsub(/[^0-9A-Za-z]/, '_') + "." + format_ext[selected_format][:extension]
141
- puts "downloading to " + file_name
142
- {:url => download_url, :name => file_name}
143
- end
144
- end
173
+ file_name = title.delete("\"'").gsub(/[^0-9A-Za-z]/, '_') + "." + format_ext[selected_format][:extension]
174
+ puts "downloading to " + file_name
175
+ {:url => download_url, :name => file_name}
176
+ end
177
+ end
metadata CHANGED
@@ -1,12 +1,12 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: viddl-rb
3
3
  version: !ruby/object:Gem::Version
4
- hash: 139
4
+ hash: 137
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
- - 64
9
- version: "0.64"
8
+ - 65
9
+ version: "0.65"
10
10
  platform: ruby
11
11
  authors:
12
12
  - Marc Seeger
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2012-04-15 00:00:00 Z
17
+ date: 2012-05-02 00:00:00 Z
18
18
  dependencies:
19
19
  - !ruby/object:Gem::Dependency
20
20
  name: nokogiri