viddl-rb 0.64 → 0.65
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +5 -0
- data/plugins/youtube.rb +153 -120
- metadata +4 -4
data/README.md
CHANGED
@@ -18,6 +18,11 @@ Download a video and extract the audio:
|
|
18
18
|
|
19
19
|
In both cases we'll name the output file according to the video title.
|
20
20
|
|
21
|
+
Download all videos on a Youtube playlist:
|
22
|
+
viddl-rb http://www.youtube.com/playlist?list=PL7E8DA0A515924126
|
23
|
+
|
24
|
+
Download all videos from a Youtube user:
|
25
|
+
viddl-rb http://www.youtube.com/user/tedtalksdirector
|
21
26
|
|
22
27
|
__Requirements:__
|
23
28
|
|
data/plugins/youtube.rb
CHANGED
@@ -1,144 +1,177 @@
|
|
1
1
|
|
2
2
|
class Youtube < PluginBase
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
3
|
+
#this will be called by the main app to check whether this plugin is responsible for the url passed
|
4
|
+
def self.matches_provider?(url)
|
5
|
+
url.include?("youtube.com") || url.include?("youtu.be")
|
6
|
+
end
|
7
|
+
|
8
|
+
#get all videos and return their urls in an array
|
9
|
+
def self.get_video_urls(feed_url)
|
10
|
+
urls = []
|
11
|
+
result_feed = Nokogiri::HTML(open(feed_url))
|
12
|
+
urls << grab_urls(result_feed)
|
12
13
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
14
|
+
#as long as the feed has a next link we follow it and add the resulting video urls
|
15
|
+
loop do
|
16
|
+
next_link = result_feed.search("//feed/link[@rel='next']").first
|
17
|
+
break if next_link.nil?
|
18
|
+
result_feed = Nokogiri::HTML(open(next_link["href"]))
|
19
|
+
urls << grab_urls(result_feed)
|
20
|
+
end
|
21
|
+
urls.flatten
|
22
|
+
end
|
20
23
|
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
24
|
+
#extract all video urls form a feed an return in an array
|
25
|
+
def self.grab_urls(feed)
|
26
|
+
feed.search("//entry/link[@rel='alternate']").map { |link| link["href"] }
|
27
|
+
end
|
28
|
+
|
29
|
+
def self.parse_playlist(url)
|
30
|
+
#http://www.youtube.com/view_play_list?p=F96B063007B44E1E&search_query=welt+auf+schwäbisch
|
31
|
+
#http://www.youtube.com/watch?v=9WEP5nCxkEY&videos=jKY836_WMhE&playnext_from=TL&playnext=1
|
32
|
+
#http://www.youtube.com/watch?v=Tk78sr5JMIU&videos=jKY836_WMhE
|
33
|
+
|
34
|
+
playlist_ID = url[/(?:list=PL|p=)(\w{16})&?/,1]
|
35
|
+
puts "[YOUTUBE] Playlist ID: #{playlist_ID}"
|
36
|
+
feed_url = "http://gdata.youtube.com/feeds/api/playlists/#{playlist_ID}?&max-results=50&v=2"
|
37
|
+
url_array = self.get_video_urls(feed_url)
|
38
|
+
puts "[YOUTUBE] #{url_array.size} links found!"
|
39
|
+
url_array
|
40
|
+
end
|
41
|
+
|
42
|
+
def self.parse_user(username)
|
43
|
+
puts "[YOUTUBE] User: #{username}"
|
44
|
+
feed_url = "http://gdata.youtube.com/feeds/api/users/#{username}/uploads?&max-results=50&v=2"
|
45
|
+
url_array = get_video_urls(feed_url)
|
46
|
+
puts "[YOUTUBE] #{url_array.size} links found!"
|
47
|
+
url_array
|
48
|
+
end
|
49
|
+
|
50
|
+
def self.get_urls_and_filenames(url)
|
51
|
+
return_values = []
|
52
|
+
if url.include?("view_play_list") || url.include?("playlist?list=") #if playlist
|
53
|
+
puts "[YOUTUBE] playlist found! analyzing..."
|
54
|
+
files = self.parse_playlist(url)
|
55
|
+
puts "[YOUTUBE] Starting playlist download"
|
56
|
+
files.each do |file|
|
57
|
+
puts "[YOUTUBE] Downloading next movie on the playlist (#{file})"
|
58
|
+
return_values << self.grab_single_url_filename(file)
|
59
|
+
end
|
60
|
+
elsif match = url.match(/\/user\/([\w\d]+)$/) #if user url, e.g. youtube.com/user/woot
|
61
|
+
username = match[1]
|
62
|
+
video_urls = self.parse_user(username)
|
63
|
+
puts "[YOUTUBE] Starting user videos download"
|
64
|
+
video_urls.each do |url|
|
65
|
+
puts "[YOUTUBE] Downloading next user video (#{url})"
|
66
|
+
return_values << self.grab_single_url_filename(url)
|
67
|
+
end
|
68
|
+
else #if single video
|
69
|
+
return_values << self.grab_single_url_filename(url)
|
70
|
+
end
|
71
|
+
|
72
|
+
return_values.reject! { |value| value == :no_embed } #remove results that can not be downloaded
|
73
|
+
return_values.empty? ? exit : return_values #if no videos could be downloaded exit
|
74
|
+
end
|
75
|
+
|
76
|
+
def self.grab_single_url_filename(url)
|
77
|
+
#the youtube video ID looks like this: [...]v=abc5a5_afe5agae6g&[...], we only want the ID (the \w in the brackets)
|
78
|
+
#addition: might also look like this /v/abc5-a5afe5agae6g
|
79
|
+
# alternative: video_id = url[/v[\/=]([\w-]*)&?/, 1]
|
80
|
+
# First get the redirect
|
81
|
+
if url.include?("youtu.be")
|
82
|
+
url = open(url).base_uri.to_s
|
83
|
+
end
|
84
|
+
video_id = url[/(v|embed)[\/=]([^\/\?\&]*)/,2]
|
85
|
+
if video_id.nil?
|
86
|
+
puts "no video id found."
|
87
|
+
exit
|
88
|
+
else
|
89
|
+
puts "[YOUTUBE] ID FOUND: #{video_id}"
|
90
|
+
end
|
91
|
+
#let's get some infos about the video. data is urlencoded
|
92
|
+
yt_url = "http://www.youtube.com/get_video_info?video_id=#{video_id}"
|
93
|
+
video_info = open(yt_url).read
|
94
|
+
#converting the huge infostring into a hash. simply by splitting it at the & and then splitting it into key and value arround the =
|
95
|
+
#[...]blabla=blubb&narf=poit&marc=awesome[...]
|
96
|
+
video_info_hash = Hash[*video_info.split("&").collect { |v|
|
63
97
|
key, encoded_value = v.split("=")
|
64
98
|
if encoded_value.to_s.empty?
|
65
|
-
|
99
|
+
value = ""
|
66
100
|
else
|
67
101
|
#decode until everything is "normal"
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
102
|
+
while (encoded_value != CGI::unescape(encoded_value)) do
|
103
|
+
#"decoding"
|
104
|
+
encoded_value = CGI::unescape(encoded_value)
|
105
|
+
end
|
106
|
+
value = encoded_value
|
73
107
|
end
|
74
108
|
|
75
109
|
if key =~ /_map/
|
76
|
-
|
110
|
+
orig_value = value
|
77
111
|
value = value.split(",")
|
78
112
|
if key == "url_encoded_fmt_stream_map"
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
113
|
+
url_array = orig_value.split("url=").map{|url_string| url_string.chomp(",")}
|
114
|
+
result_hash = {}
|
115
|
+
url_array.each do |url|
|
116
|
+
next if url.to_s.empty?
|
117
|
+
format_id = url.match(/\&itag=(\d+)/)[1]
|
118
|
+
result_hash[format_id] = url
|
119
|
+
end
|
120
|
+
value = result_hash
|
87
121
|
elsif key == "fmt_map"
|
88
|
-
value = Hash[*value.collect{ |v|
|
122
|
+
value = Hash[*value.collect { |v|
|
89
123
|
k2, *v2 = v.split("/")
|
90
124
|
[k2, v2]
|
91
125
|
}.flatten(1)]
|
92
|
-
|
93
|
-
|
126
|
+
elsif key == "fmt_url_map" || key == "fmt_stream_map"
|
127
|
+
Hash[*value.collect { |v| v.split("|")}.flatten]
|
94
128
|
end
|
95
129
|
end
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
130
|
+
[key, value]
|
131
|
+
}.flatten]
|
132
|
+
|
133
|
+
if video_info_hash["status"] == "fail"
|
134
|
+
puts "Error: embedding disabled, no video info found"
|
135
|
+
return :no_embed
|
136
|
+
end
|
137
|
+
|
138
|
+
title = video_info_hash["title"]
|
139
|
+
length_s = video_info_hash["length_seconds"]
|
140
|
+
token = video_info_hash["token"]
|
107
141
|
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
puts "[YOUTUBE] formats available: #{available_formats.inspect} (downloading format #{selected_format} -> #{format_ext[selected_format][:name]})"
|
142
|
+
#for the formats, see: http://en.wikipedia.org/wiki/YouTube#Quality_and_codecs
|
143
|
+
fmt_list = video_info_hash["fmt_list"].split(",")
|
144
|
+
available_formats = fmt_list.map{|format| format.split("/").first}
|
145
|
+
|
146
|
+
format_ext = {}
|
147
|
+
format_ext["38"] = {:extension => "mp4", :name => "MP4 Highest Quality 4096x3027 (H.264, AAC)"}
|
148
|
+
format_ext["37"] = {:extension => "mp4", :name => "MP4 Highest Quality 1920x1080 (H.264, AAC)"}
|
149
|
+
format_ext["22"] = {:extension => "mp4", :name => "MP4 1280x720 (H.264, AAC)"}
|
150
|
+
format_ext["45"] = {:extension => "webm", :name => "WebM 1280x720 (VP8, Vorbis)"}
|
151
|
+
format_ext["44"] = {:extension => "webm", :name => "WebM 854x480 (VP8, Vorbis)"}
|
152
|
+
format_ext["18"] = {:extension => "mp4", :name => "MP4 640x360 (H.264, AAC)"}
|
153
|
+
format_ext["35"] = {:extension => "flv", :name => "FLV 854x480 (H.264, AAC)"}
|
154
|
+
format_ext["34"] = {:extension => "flv", :name => "FLV 640x360 (H.264, AAC)"}
|
155
|
+
format_ext["5"] = {:extension => "flv", :name => "FLV 400x240 (Soerenson H.263)"}
|
156
|
+
format_ext["17"] = {:extension => "3gp", :name => "3gp"}
|
157
|
+
|
158
|
+
#since 1.8 doesn't do ordered hashes
|
159
|
+
prefered_order = ["38","37","22","45","44","18","35","34","5","17"]
|
160
|
+
|
161
|
+
selected_format = prefered_order.select{|possible_format| available_formats.include?(possible_format)}.first
|
162
|
+
|
163
|
+
puts "[YOUTUBE] Title: #{title}"
|
164
|
+
puts "[YOUTUBE] Length: #{length_s} s"
|
165
|
+
puts "[YOUTUBE] t-parameter: #{token}"
|
166
|
+
#best quality seems always to be firsts
|
167
|
+
puts "[YOUTUBE] formats available: #{available_formats.inspect} (downloading format #{selected_format} -> #{format_ext[selected_format][:name]})"
|
135
168
|
|
136
|
-
|
169
|
+
#video_info_hash.keys.sort.each{|key| puts "#{key} : #{video_info_hash[key]}" }
|
137
170
|
download_url = video_info_hash["url_encoded_fmt_stream_map"][selected_format]
|
138
171
|
#if download url ends with a ';' followed by a codec string remove that part because it stops URI.parse from working
|
139
172
|
download_url = $1 if download_url =~ /(.*?);\scodecs=/
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
end
|
173
|
+
file_name = title.delete("\"'").gsub(/[^0-9A-Za-z]/, '_') + "." + format_ext[selected_format][:extension]
|
174
|
+
puts "downloading to " + file_name
|
175
|
+
{:url => download_url, :name => file_name}
|
176
|
+
end
|
177
|
+
end
|
metadata
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: viddl-rb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 137
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
version: "0.
|
8
|
+
- 65
|
9
|
+
version: "0.65"
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Marc Seeger
|
@@ -14,7 +14,7 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2012-
|
17
|
+
date: 2012-05-02 00:00:00 Z
|
18
18
|
dependencies:
|
19
19
|
- !ruby/object:Gem::Dependency
|
20
20
|
name: nokogiri
|