viddl-rb 0.64 → 0.65
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +5 -0
- data/plugins/youtube.rb +153 -120
- metadata +4 -4
data/README.md
CHANGED
@@ -18,6 +18,11 @@ Download a video and extract the audio:
|
|
18
18
|
|
19
19
|
In both cases we'll name the output file according to the video title.
|
20
20
|
|
21
|
+
Download all videos on a Youtube playlist:
|
22
|
+
viddl-rb http://www.youtube.com/playlist?list=PL7E8DA0A515924126
|
23
|
+
|
24
|
+
Download all videos from a Youtube user:
|
25
|
+
viddl-rb http://www.youtube.com/user/tedtalksdirector
|
21
26
|
|
22
27
|
__Requirements:__
|
23
28
|
|
data/plugins/youtube.rb
CHANGED
@@ -1,144 +1,177 @@
|
|
1
1
|
|
2
2
|
class Youtube < PluginBase
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
3
|
+
#this will be called by the main app to check whether this plugin is responsible for the url passed
|
4
|
+
def self.matches_provider?(url)
|
5
|
+
url.include?("youtube.com") || url.include?("youtu.be")
|
6
|
+
end
|
7
|
+
|
8
|
+
#get all videos and return their urls in an array
|
9
|
+
def self.get_video_urls(feed_url)
|
10
|
+
urls = []
|
11
|
+
result_feed = Nokogiri::HTML(open(feed_url))
|
12
|
+
urls << grab_urls(result_feed)
|
12
13
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
14
|
+
#as long as the feed has a next link we follow it and add the resulting video urls
|
15
|
+
loop do
|
16
|
+
next_link = result_feed.search("//feed/link[@rel='next']").first
|
17
|
+
break if next_link.nil?
|
18
|
+
result_feed = Nokogiri::HTML(open(next_link["href"]))
|
19
|
+
urls << grab_urls(result_feed)
|
20
|
+
end
|
21
|
+
urls.flatten
|
22
|
+
end
|
20
23
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
24
|
+
#extract all video urls form a feed an return in an array
|
25
|
+
def self.grab_urls(feed)
|
26
|
+
feed.search("//entry/link[@rel='alternate']").map { |link| link["href"] }
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.parse_playlist(url)
|
30
|
+
#http://www.youtube.com/view_play_list?p=F96B063007B44E1E&search_query=welt+auf+schwäbisch
|
31
|
+
#http://www.youtube.com/watch?v=9WEP5nCxkEY&videos=jKY836_WMhE&playnext_from=TL&playnext=1
|
32
|
+
#http://www.youtube.com/watch?v=Tk78sr5JMIU&videos=jKY836_WMhE
|
33
|
+
|
34
|
+
playlist_ID = url[/(?:list=PL|p=)(\w{16})&?/,1]
|
35
|
+
puts "[YOUTUBE] Playlist ID: #{playlist_ID}"
|
36
|
+
feed_url = "http://gdata.youtube.com/feeds/api/playlists/#{playlist_ID}?&max-results=50&v=2"
|
37
|
+
url_array = self.get_video_urls(feed_url)
|
38
|
+
puts "[YOUTUBE] #{url_array.size} links found!"
|
39
|
+
url_array
|
40
|
+
end
|
41
|
+
|
42
|
+
def self.parse_user(username)
|
43
|
+
puts "[YOUTUBE] User: #{username}"
|
44
|
+
feed_url = "http://gdata.youtube.com/feeds/api/users/#{username}/uploads?&max-results=50&v=2"
|
45
|
+
url_array = get_video_urls(feed_url)
|
46
|
+
puts "[YOUTUBE] #{url_array.size} links found!"
|
47
|
+
url_array
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.get_urls_and_filenames(url)
|
51
|
+
return_values = []
|
52
|
+
if url.include?("view_play_list") || url.include?("playlist?list=") #if playlist
|
53
|
+
puts "[YOUTUBE] playlist found! analyzing..."
|
54
|
+
files = self.parse_playlist(url)
|
55
|
+
puts "[YOUTUBE] Starting playlist download"
|
56
|
+
files.each do |file|
|
57
|
+
puts "[YOUTUBE] Downloading next movie on the playlist (#{file})"
|
58
|
+
return_values << self.grab_single_url_filename(file)
|
59
|
+
end
|
60
|
+
elsif match = url.match(/\/user\/([\w\d]+)$/) #if user url, e.g. youtube.com/user/woot
|
61
|
+
username = match[1]
|
62
|
+
video_urls = self.parse_user(username)
|
63
|
+
puts "[YOUTUBE] Starting user videos download"
|
64
|
+
video_urls.each do |url|
|
65
|
+
puts "[YOUTUBE] Downloading next user video (#{url})"
|
66
|
+
return_values << self.grab_single_url_filename(url)
|
67
|
+
end
|
68
|
+
else #if single video
|
69
|
+
return_values << self.grab_single_url_filename(url)
|
70
|
+
end
|
71
|
+
|
72
|
+
return_values.reject! { |value| value == :no_embed } #remove results that can not be downloaded
|
73
|
+
return_values.empty? ? exit : return_values #if no videos could be downloaded exit
|
74
|
+
end
|
75
|
+
|
76
|
+
def self.grab_single_url_filename(url)
|
77
|
+
#the youtube video ID looks like this: [...]v=abc5a5_afe5agae6g&[...], we only want the ID (the \w in the brackets)
|
78
|
+
#addition: might also look like this /v/abc5-a5afe5agae6g
|
79
|
+
# alternative: video_id = url[/v[\/=]([\w-]*)&?/, 1]
|
80
|
+
# First get the redirect
|
81
|
+
if url.include?("youtu.be")
|
82
|
+
url = open(url).base_uri.to_s
|
83
|
+
end
|
84
|
+
video_id = url[/(v|embed)[\/=]([^\/\?\&]*)/,2]
|
85
|
+
if video_id.nil?
|
86
|
+
puts "no video id found."
|
87
|
+
exit
|
88
|
+
else
|
89
|
+
puts "[YOUTUBE] ID FOUND: #{video_id}"
|
90
|
+
end
|
91
|
+
#let's get some infos about the video. data is urlencoded
|
92
|
+
yt_url = "http://www.youtube.com/get_video_info?video_id=#{video_id}"
|
93
|
+
video_info = open(yt_url).read
|
94
|
+
#converting the huge infostring into a hash. simply by splitting it at the & and then splitting it into key and value arround the =
|
95
|
+
#[...]blabla=blubb&narf=poit&marc=awesome[...]
|
96
|
+
video_info_hash = Hash[*video_info.split("&").collect { |v|
|
63
97
|
key, encoded_value = v.split("=")
|
64
98
|
if encoded_value.to_s.empty?
|
65
|
-
|
99
|
+
value = ""
|
66
100
|
else
|
67
101
|
#decode until everything is "normal"
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
102
|
+
while (encoded_value != CGI::unescape(encoded_value)) do
|
103
|
+
#"decoding"
|
104
|
+
encoded_value = CGI::unescape(encoded_value)
|
105
|
+
end
|
106
|
+
value = encoded_value
|
73
107
|
end
|
74
108
|
|
75
109
|
if key =~ /_map/
|
76
|
-
|
110
|
+
orig_value = value
|
77
111
|
value = value.split(",")
|
78
112
|
if key == "url_encoded_fmt_stream_map"
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
113
|
+
url_array = orig_value.split("url=").map{|url_string| url_string.chomp(",")}
|
114
|
+
result_hash = {}
|
115
|
+
url_array.each do |url|
|
116
|
+
next if url.to_s.empty?
|
117
|
+
format_id = url.match(/\&itag=(\d+)/)[1]
|
118
|
+
result_hash[format_id] = url
|
119
|
+
end
|
120
|
+
value = result_hash
|
87
121
|
elsif key == "fmt_map"
|
88
|
-
value = Hash[*value.collect{ |v|
|
122
|
+
value = Hash[*value.collect { |v|
|
89
123
|
k2, *v2 = v.split("/")
|
90
124
|
[k2, v2]
|
91
125
|
}.flatten(1)]
|
92
|
-
|
93
|
-
|
126
|
+
elsif key == "fmt_url_map" || key == "fmt_stream_map"
|
127
|
+
Hash[*value.collect { |v| v.split("|")}.flatten]
|
94
128
|
end
|
95
129
|
end
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
130
|
+
[key, value]
|
131
|
+
}.flatten]
|
132
|
+
|
133
|
+
if video_info_hash["status"] == "fail"
|
134
|
+
puts "Error: embedding disabled, no video info found"
|
135
|
+
return :no_embed
|
136
|
+
end
|
137
|
+
|
138
|
+
title = video_info_hash["title"]
|
139
|
+
length_s = video_info_hash["length_seconds"]
|
140
|
+
token = video_info_hash["token"]
|
107
141
|
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
puts "[YOUTUBE] formats available: #{available_formats.inspect} (downloading format #{selected_format} -> #{format_ext[selected_format][:name]})"
|
142
|
+
#for the formats, see: http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs
|
143
|
+
fmt_list = video_info_hash["fmt_list"].split(",")
|
144
|
+
available_formats = fmt_list.map{|format| format.split("/").first}
|
145
|
+
|
146
|
+
format_ext = {}
|
147
|
+
format_ext["38"] = {:extension => "mp4", :name => "MP4 Highest Quality 4096x3027 (H.264, AAC)"}
|
148
|
+
format_ext["37"] = {:extension => "mp4", :name => "MP4 Highest Quality 1920x1080 (H.264, AAC)"}
|
149
|
+
format_ext["22"] = {:extension => "mp4", :name => "MP4 1280x720 (H.264, AAC)"}
|
150
|
+
format_ext["45"] = {:extension => "webm", :name => "WebM 1280x720 (VP8, Vorbis)"}
|
151
|
+
format_ext["44"] = {:extension => "webm", :name => "WebM 854x480 (VP8, Vorbis)"}
|
152
|
+
format_ext["18"] = {:extension => "mp4", :name => "MP4 640x360 (H.264, AAC)"}
|
153
|
+
format_ext["35"] = {:extension => "flv", :name => "FLV 854x480 (H.264, AAC)"}
|
154
|
+
format_ext["34"] = {:extension => "flv", :name => "FLV 640x360 (H.264, AAC)"}
|
155
|
+
format_ext["5"] = {:extension => "flv", :name => "FLV 400x240 (Soerenson H.263)"}
|
156
|
+
format_ext["17"] = {:extension => "3gp", :name => "3gp"}
|
157
|
+
|
158
|
+
#since 1.8 doesn't do ordered hashes
|
159
|
+
prefered_order = ["38","37","22","45","44","18","35","34","5","17"]
|
160
|
+
|
161
|
+
selected_format = prefered_order.select{|possible_format| available_formats.include?(possible_format)}.first
|
162
|
+
|
163
|
+
puts "[YOUTUBE] Title: #{title}"
|
164
|
+
puts "[YOUTUBE] Length: #{length_s} s"
|
165
|
+
puts "[YOUTUBE] t-parameter: #{token}"
|
166
|
+
#best quality seems always to be firsts
|
167
|
+
puts "[YOUTUBE] formats available: #{available_formats.inspect} (downloading format #{selected_format} -> #{format_ext[selected_format][:name]})"
|
135
168
|
|
136
|
-
|
169
|
+
#video_info_hash.keys.sort.each{|key| puts "#{key} : #{video_info_hash[key]}" }
|
137
170
|
download_url = video_info_hash["url_encoded_fmt_stream_map"][selected_format]
|
138
171
|
#if download url ends with a ';' followed by a codec string remove that part because it stops URI.parse from working
|
139
172
|
download_url = $1 if download_url =~ /(.*?);\scodecs=/
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
end
|
173
|
+
file_name = title.delete("\"'").gsub(/[^0-9A-Za-z]/, '_') + "." + format_ext[selected_format][:extension]
|
174
|
+
puts "downloading to " + file_name
|
175
|
+
{:url => download_url, :name => file_name}
|
176
|
+
end
|
177
|
+
end
|
metadata
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: viddl-rb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 137
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
version: "0.
|
8
|
+
- 65
|
9
|
+
version: "0.65"
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Marc Seeger
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2012-
|
17
|
+
date: 2012-05-02 00:00:00 Z
|
18
18
|
dependencies:
|
19
19
|
- !ruby/object:Gem::Dependency
|
20
20
|
name: nokogiri
|