viddl-rb 0.64 → 0.65

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. data/README.md +5 -0
  2. data/plugins/youtube.rb +153 -120
  3. metadata +4 -4
data/README.md CHANGED
@@ -18,6 +18,11 @@ Download a video and extract the audio:
18
18
 
19
19
  In both cases we'll name the output file according to the video title.
20
20
 
21
+ Download all videos on a Youtube playlist:
22
+ viddl-rb http://www.youtube.com/playlist?list=PL7E8DA0A515924126
23
+
24
+ Download all videos from a Youtube user:
25
+ viddl-rb http://www.youtube.com/user/tedtalksdirector
21
26
 
22
27
  __Requirements:__
23
28
 
data/plugins/youtube.rb CHANGED
@@ -1,144 +1,177 @@
1
1
 
2
2
  class Youtube < PluginBase
3
- #this will be called by the main app to check whether this plugin is responsible for the url passed
4
- def self.matches_provider?(url)
5
- url.include?("youtube.com") || url.include?("youtu.be")
6
- end
7
-
8
- def self.parse_playlist(url)
9
- #http://www.youtube.com/view_play_list?p=F96B063007B44E1E&search_query=welt+auf+schwäbisch
10
- #http://www.youtube.com/watch?v=9WEP5nCxkEY&videos=jKY836_WMhE&playnext_from=TL&playnext=1
11
- #http://www.youtube.com/watch?v=Tk78sr5JMIU&videos=jKY836_WMhE
3
+ #this will be called by the main app to check whether this plugin is responsible for the url passed
4
+ def self.matches_provider?(url)
5
+ url.include?("youtube.com") || url.include?("youtu.be")
6
+ end
7
+
8
+ #get all videos and return their urls in an array
9
+ def self.get_video_urls(feed_url)
10
+ urls = []
11
+ result_feed = Nokogiri::HTML(open(feed_url))
12
+ urls << grab_urls(result_feed)
12
13
 
13
- playlist_ID = url[/(?:list=PL|p=)(\w{16})&?/,1]
14
- puts "[YOUTUBE] Playlist ID: #{playlist_ID}"
15
- url_array = Array.new
16
- video_info = Nokogiri::HTML(open("http://gdata.youtube.com/feeds/api/playlists/#{playlist_ID}?v=2"))
17
- video_info.search("//content").each do |video|
18
- url_array << video["url"] if video["url"].include?("http://www.youtube.com/v/") #filters out rtsp links
19
- end
14
+ #as long as the feed has a next link we follow it and add the resulting video urls
15
+ loop do
16
+ next_link = result_feed.search("//feed/link[@rel='next']").first
17
+ break if next_link.nil?
18
+ result_feed = Nokogiri::HTML(open(next_link["href"]))
19
+ urls << grab_urls(result_feed)
20
+ end
21
+ urls.flatten
22
+ end
20
23
 
21
- puts "[YOUTUBE] #{url_array.size} links found!"
22
- url_array
23
- end
24
-
25
-
26
- def self.get_urls_and_filenames(url)
27
- return_values = []
28
- if url.include?("view_play_list") || url.include?("playlist?list=")
29
- puts "[YOUTUBE] playlist found! analyzing..."
30
- files = self.parse_playlist(url)
31
- puts "[YOUTUBE] Starting playlist download"
32
- files.each do |file|
33
- puts "[YOUTUBE] Downloading next movie on the playlist (#{file})"
34
- return_values << self.grab_single_url_filename(file)
35
- end
36
- else
37
- return_values << self.grab_single_url_filename(url)
38
- end
39
- return_values
40
- end
41
-
42
- def self.grab_single_url_filename(url)
43
- #the youtube video ID looks like this: [...]v=abc5a5_afe5agae6g&[...], we only want the ID (the \w in the brackets)
44
- #addition: might also look like this /v/abc5-a5afe5agae6g
45
- # alternative: video_id = url[/v[\/=]([\w-]*)&?/, 1]
46
- # First get the redirect
47
- if url.include?("youtu.be")
48
- url = open(url).base_uri.to_s
49
- end
50
- video_id = url[/(v|embed)[\/=]([^\/\?\&]*)/,2]
51
- if video_id.nil?
52
- puts "no video id found."
53
- exit
54
- else
55
- puts "[YOUTUBE] ID FOUND: #{video_id}"
56
- end
57
- #let's get some infos about the video. data is urlencoded
58
- yt_url = "http://www.youtube.com/get_video_info?video_id=#{video_id}"
59
- video_info = open(yt_url).read
60
- #converting the huge infostring into a hash. simply by splitting it at the & and then splitting it into key and value arround the =
61
- #[...]blabla=blubb&narf=poit&marc=awesome[...]
62
- video_info_hash = Hash[*video_info.split("&").collect { |v|
24
+ #extract all video urls form a feed an return in an array
25
+ def self.grab_urls(feed)
26
+ feed.search("//entry/link[@rel='alternate']").map { |link| link["href"] }
27
+ end
28
+
29
+ def self.parse_playlist(url)
30
+ #http://www.youtube.com/view_play_list?p=F96B063007B44E1E&search_query=welt+auf+schwäbisch
31
+ #http://www.youtube.com/watch?v=9WEP5nCxkEY&videos=jKY836_WMhE&playnext_from=TL&playnext=1
32
+ #http://www.youtube.com/watch?v=Tk78sr5JMIU&videos=jKY836_WMhE
33
+
34
+ playlist_ID = url[/(?:list=PL|p=)(\w{16})&?/,1]
35
+ puts "[YOUTUBE] Playlist ID: #{playlist_ID}"
36
+ feed_url = "http://gdata.youtube.com/feeds/api/playlists/#{playlist_ID}?&max-results=50&v=2"
37
+ url_array = self.get_video_urls(feed_url)
38
+ puts "[YOUTUBE] #{url_array.size} links found!"
39
+ url_array
40
+ end
41
+
42
+ def self.parse_user(username)
43
+ puts "[YOUTUBE] User: #{username}"
44
+ feed_url = "http://gdata.youtube.com/feeds/api/users/#{username}/uploads?&max-results=50&v=2"
45
+ url_array = get_video_urls(feed_url)
46
+ puts "[YOUTUBE] #{url_array.size} links found!"
47
+ url_array
48
+ end
49
+
50
+ def self.get_urls_and_filenames(url)
51
+ return_values = []
52
+ if url.include?("view_play_list") || url.include?("playlist?list=") #if playlist
53
+ puts "[YOUTUBE] playlist found! analyzing..."
54
+ files = self.parse_playlist(url)
55
+ puts "[YOUTUBE] Starting playlist download"
56
+ files.each do |file|
57
+ puts "[YOUTUBE] Downloading next movie on the playlist (#{file})"
58
+ return_values << self.grab_single_url_filename(file)
59
+ end
60
+ elsif match = url.match(/\/user\/([\w\d]+)$/) #if user url, e.g. youtube.com/user/woot
61
+ username = match[1]
62
+ video_urls = self.parse_user(username)
63
+ puts "[YOUTUBE] Starting user videos download"
64
+ video_urls.each do |url|
65
+ puts "[YOUTUBE] Downloading next user video (#{url})"
66
+ return_values << self.grab_single_url_filename(url)
67
+ end
68
+ else #if single video
69
+ return_values << self.grab_single_url_filename(url)
70
+ end
71
+
72
+ return_values.reject! { |value| value == :no_embed } #remove results that can not be downloaded
73
+ return_values.empty? ? exit : return_values #if no videos could be downloaded exit
74
+ end
75
+
76
+ def self.grab_single_url_filename(url)
77
+ #the youtube video ID looks like this: [...]v=abc5a5_afe5agae6g&[...], we only want the ID (the \w in the brackets)
78
+ #addition: might also look like this /v/abc5-a5afe5agae6g
79
+ # alternative: video_id = url[/v[\/=]([\w-]*)&?/, 1]
80
+ # First get the redirect
81
+ if url.include?("youtu.be")
82
+ url = open(url).base_uri.to_s
83
+ end
84
+ video_id = url[/(v|embed)[\/=]([^\/\?\&]*)/,2]
85
+ if video_id.nil?
86
+ puts "no video id found."
87
+ exit
88
+ else
89
+ puts "[YOUTUBE] ID FOUND: #{video_id}"
90
+ end
91
+ #let's get some infos about the video. data is urlencoded
92
+ yt_url = "http://www.youtube.com/get_video_info?video_id=#{video_id}"
93
+ video_info = open(yt_url).read
94
+ #converting the huge infostring into a hash. simply by splitting it at the & and then splitting it into key and value arround the =
95
+ #[...]blabla=blubb&narf=poit&marc=awesome[...]
96
+ video_info_hash = Hash[*video_info.split("&").collect { |v|
63
97
  key, encoded_value = v.split("=")
64
98
  if encoded_value.to_s.empty?
65
- value = ""
99
+ value = ""
66
100
  else
67
101
  #decode until everything is "normal"
68
- while (encoded_value != CGI::unescape(encoded_value)) do
69
- #"decoding"
70
- encoded_value = CGI::unescape(encoded_value)
71
- end
72
- value = encoded_value
102
+ while (encoded_value != CGI::unescape(encoded_value)) do
103
+ #"decoding"
104
+ encoded_value = CGI::unescape(encoded_value)
105
+ end
106
+ value = encoded_value
73
107
  end
74
108
 
75
109
  if key =~ /_map/
76
- orig_value = value
110
+ orig_value = value
77
111
  value = value.split(",")
78
112
  if key == "url_encoded_fmt_stream_map"
79
- url_array = orig_value.split("url=").map{|url_string| url_string.chomp(",")}
80
- result_hash = {}
81
- url_array.each do |url|
82
- next if url.to_s.empty?
83
- format_id = url.match(/\&itag=(\d+)/)[1]
84
- result_hash[format_id] = url
85
- end
86
- value = result_hash
113
+ url_array = orig_value.split("url=").map{|url_string| url_string.chomp(",")}
114
+ result_hash = {}
115
+ url_array.each do |url|
116
+ next if url.to_s.empty?
117
+ format_id = url.match(/\&itag=(\d+)/)[1]
118
+ result_hash[format_id] = url
119
+ end
120
+ value = result_hash
87
121
  elsif key == "fmt_map"
88
- value = Hash[*value.collect{ |v|
122
+ value = Hash[*value.collect { |v|
89
123
  k2, *v2 = v.split("/")
90
124
  [k2, v2]
91
125
  }.flatten(1)]
92
- elsif key == "fmt_url_map" || key == "fmt_stream_map"
93
- Hash[*value.collect { |v| v.split("|")}.flatten]
126
+ elsif key == "fmt_url_map" || key == "fmt_stream_map"
127
+ Hash[*value.collect { |v| v.split("|")}.flatten]
94
128
  end
95
129
  end
96
- [key, value]
97
- }.flatten]
98
-
99
- if video_info_hash["status"] == "fail"
100
- puts "Error: embedding disabled, no video info found"
101
- exit
102
- end
103
-
104
- title = video_info_hash["title"]
105
- length_s = video_info_hash["length_seconds"]
106
- token = video_info_hash["token"]
130
+ [key, value]
131
+ }.flatten]
132
+
133
+ if video_info_hash["status"] == "fail"
134
+ puts "Error: embedding disabled, no video info found"
135
+ return :no_embed
136
+ end
137
+
138
+ title = video_info_hash["title"]
139
+ length_s = video_info_hash["length_seconds"]
140
+ token = video_info_hash["token"]
107
141
 
108
-
109
- #for the formats, see: http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs
110
- fmt_list = video_info_hash["fmt_list"].split(",")
111
- available_formats = fmt_list.map{|format| format.split("/").first}
112
-
113
- format_ext = {}
114
- format_ext["38"] = {:extension => "mp4", :name => "MP4 Highest Quality 4096x3027 (H.264, AAC)"}
115
- format_ext["37"] = {:extension => "mp4", :name => "MP4 Highest Quality 1920x1080 (H.264, AAC)"}
116
- format_ext["22"] = {:extension => "mp4", :name => "MP4 1280x720 (H.264, AAC)"}
117
- format_ext["45"] = {:extension => "webm", :name => "WebM 1280x720 (VP8, Vorbis)"}
118
- format_ext["44"] = {:extension => "webm", :name => "WebM 854x480 (VP8, Vorbis)"}
119
- format_ext["18"] = {:extension => "mp4", :name => "MP4 640x360 (H.264, AAC)"}
120
- format_ext["35"] = {:extension => "flv", :name => "FLV 854x480 (H.264, AAC)"}
121
- format_ext["34"] = {:extension => "flv", :name => "FLV 640x360 (H.264, AAC)"}
122
- format_ext["5"] = {:extension => "flv", :name => "FLV 400x240 (Soerenson H.263)"}
123
- format_ext["17"] = {:extension => "3gp", :name => "3gp"}
124
-
125
- #since 1.8 doesn't do ordered hashes
126
- prefered_order = ["38","37","22","45","44","18","35","34","5","17"]
127
-
128
- selected_format = prefered_order.select{|possible_format| available_formats.include?(possible_format)}.first
129
-
130
- puts "[YOUTUBE] Title: #{title}"
131
- puts "[YOUTUBE] Length: #{length_s} s"
132
- puts "[YOUTUBE] t-parameter: #{token}"
133
- #best quality seems always to be firsts
134
- puts "[YOUTUBE] formats available: #{available_formats.inspect} (downloading format #{selected_format} -> #{format_ext[selected_format][:name]})"
142
+ #for the formats, see: http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs
143
+ fmt_list = video_info_hash["fmt_list"].split(",")
144
+ available_formats = fmt_list.map{|format| format.split("/").first}
145
+
146
+ format_ext = {}
147
+ format_ext["38"] = {:extension => "mp4", :name => "MP4 Highest Quality 4096x3027 (H.264, AAC)"}
148
+ format_ext["37"] = {:extension => "mp4", :name => "MP4 Highest Quality 1920x1080 (H.264, AAC)"}
149
+ format_ext["22"] = {:extension => "mp4", :name => "MP4 1280x720 (H.264, AAC)"}
150
+ format_ext["45"] = {:extension => "webm", :name => "WebM 1280x720 (VP8, Vorbis)"}
151
+ format_ext["44"] = {:extension => "webm", :name => "WebM 854x480 (VP8, Vorbis)"}
152
+ format_ext["18"] = {:extension => "mp4", :name => "MP4 640x360 (H.264, AAC)"}
153
+ format_ext["35"] = {:extension => "flv", :name => "FLV 854x480 (H.264, AAC)"}
154
+ format_ext["34"] = {:extension => "flv", :name => "FLV 640x360 (H.264, AAC)"}
155
+ format_ext["5"] = {:extension => "flv", :name => "FLV 400x240 (Soerenson H.263)"}
156
+ format_ext["17"] = {:extension => "3gp", :name => "3gp"}
157
+
158
+ #since 1.8 doesn't do ordered hashes
159
+ prefered_order = ["38","37","22","45","44","18","35","34","5","17"]
160
+
161
+ selected_format = prefered_order.select{|possible_format| available_formats.include?(possible_format)}.first
162
+
163
+ puts "[YOUTUBE] Title: #{title}"
164
+ puts "[YOUTUBE] Length: #{length_s} s"
165
+ puts "[YOUTUBE] t-parameter: #{token}"
166
+ #best quality seems always to be firsts
167
+ puts "[YOUTUBE] formats available: #{available_formats.inspect} (downloading format #{selected_format} -> #{format_ext[selected_format][:name]})"
135
168
 
136
- #video_info_hash.keys.sort.each{|key| puts "#{key} : #{video_info_hash[key]}" }
169
+ #video_info_hash.keys.sort.each{|key| puts "#{key} : #{video_info_hash[key]}" }
137
170
  download_url = video_info_hash["url_encoded_fmt_stream_map"][selected_format]
138
171
  #if download url ends with a ';' followed by a codec string remove that part because it stops URI.parse from working
139
172
  download_url = $1 if download_url =~ /(.*?);\scodecs=/
140
- file_name = title.delete("\"'").gsub(/[^0-9A-Za-z]/, '_') + "." + format_ext[selected_format][:extension]
141
- puts "downloading to " + file_name
142
- {:url => download_url, :name => file_name}
143
- end
144
- end
173
+ file_name = title.delete("\"'").gsub(/[^0-9A-Za-z]/, '_') + "." + format_ext[selected_format][:extension]
174
+ puts "downloading to " + file_name
175
+ {:url => download_url, :name => file_name}
176
+ end
177
+ end
metadata CHANGED
@@ -1,12 +1,12 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: viddl-rb
3
3
  version: !ruby/object:Gem::Version
4
- hash: 139
4
+ hash: 137
5
5
  prerelease:
6
6
  segments:
7
7
  - 0
8
- - 64
9
- version: "0.64"
8
+ - 65
9
+ version: "0.65"
10
10
  platform: ruby
11
11
  authors:
12
12
  - Marc Seeger
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2012-04-15 00:00:00 Z
17
+ date: 2012-05-02 00:00:00 Z
18
18
  dependencies:
19
19
  - !ruby/object:Gem::Dependency
20
20
  name: nokogiri