viddl-rb 0.6 → 0.7
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.txt +5 -3
- data/Gemfile.lock +25 -4
- data/README.md +110 -34
- data/Rakefile +18 -8
- data/TODO.txt +3 -0
- data/bin/helper/downloader.rb +25 -0
- data/bin/helper/driver.rb +47 -0
- data/bin/helper/parameter-parser.rb +67 -0
- data/bin/viddl-rb +39 -117
- data/helper/audio-helper.rb +49 -0
- data/helper/download-helper.rb +86 -58
- data/helper/plugin-helper.rb +60 -10
- data/helper/utility-helper.rb +16 -0
- data/lib/viddl-rb.rb +113 -0
- data/plugins/blip.rb +3 -2
- data/plugins/dailymotion.rb +44 -0
- data/plugins/metacafe.rb +53 -54
- data/plugins/soundcloud.rb +9 -12
- data/plugins/veoh.rb +40 -41
- data/plugins/vimeo.rb +30 -23
- data/plugins/youtube.rb +196 -143
- metadata +34 -11
@@ -0,0 +1,49 @@
|
|
1
|
+
module ViddlRb
|
2
|
+
|
3
|
+
# This class is responsible for extracting audio from video files using ffmpeg.
|
4
|
+
class AudioHelper
|
5
|
+
|
6
|
+
def self.extract(file_path)
|
7
|
+
no_ext_filename = file_path.split('.')[0..-1][0]
|
8
|
+
#capture stderr because ffmpeg expects an output param and will error out
|
9
|
+
puts "Gathering information about the downloaded file."
|
10
|
+
file_info = Open3.popen3("ffmpeg -i #{file_path}") {|stdin, stdout, stderr, wait_thr| stderr.read }
|
11
|
+
puts "Done gathering information about the downloaded file."
|
12
|
+
|
13
|
+
if !file_info.to_s.empty?
|
14
|
+
audio_format_matches = file_info.match(/Audio: (\w*)/)
|
15
|
+
if audio_format_matches
|
16
|
+
audio_format = audio_format_matches[1]
|
17
|
+
puts "detected audio format: #{audio_format}"
|
18
|
+
else
|
19
|
+
raise "ERROR: Couldn't find any audio:\n#{file_info.inspect}"
|
20
|
+
end
|
21
|
+
|
22
|
+
extension_mapper = {
|
23
|
+
'aac' => 'm4a',
|
24
|
+
'mp3' => 'mp3',
|
25
|
+
'vorbis' => 'ogg'
|
26
|
+
}
|
27
|
+
|
28
|
+
if extension_mapper.key?(audio_format)
|
29
|
+
output_extension = extension_mapper[audio_format]
|
30
|
+
else
|
31
|
+
#lame fallback
|
32
|
+
puts "Unknown audio format: #{audio_format}, using name as extension: '.#{audio_format}'."
|
33
|
+
output_extension = audio_format
|
34
|
+
end
|
35
|
+
output_filename = "#{no_ext_filename}.#{output_extension}"
|
36
|
+
if File.exist?(output_filename)
|
37
|
+
puts "Audio file seems to exist already, removing it before extraction."
|
38
|
+
File.delete(output_filename)
|
39
|
+
end
|
40
|
+
Open3.popen3("ffmpeg -i #{file_path} -vn -acodec copy #{output_filename}") { |stdin, stdout, stderr, wait_thr| stdout.read }
|
41
|
+
puts "Done extracting audio to #{output_filename}"
|
42
|
+
else
|
43
|
+
raise "ERROR: Error while checking audio track of #{file_path}"
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
49
|
+
|
data/helper/download-helper.rb
CHANGED
@@ -1,58 +1,86 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
:
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
1
|
+
module ViddlRb
|
2
|
+
|
3
|
+
class DownloadHelper
|
4
|
+
#usually not called directly
|
5
|
+
def self.fetch_file(uri)
|
6
|
+
begin
|
7
|
+
require "progressbar" #http://github.com/nex3/ruby-progressbar
|
8
|
+
rescue LoadError
|
9
|
+
puts "ERROR: You don't seem to have curl or wget on your system. In this case you'll need to install the 'progressbar' gem."
|
10
|
+
exit
|
11
|
+
end
|
12
|
+
progress_bar = nil
|
13
|
+
open(uri, :proxy => nil,
|
14
|
+
:content_length_proc => lambda { |length|
|
15
|
+
if length && 0 < length
|
16
|
+
progress_bar = ProgressBar.new(uri.to_s, length)
|
17
|
+
progress_bar.file_transfer_mode #to show download speed and file size
|
18
|
+
end
|
19
|
+
},
|
20
|
+
:progress_proc => lambda { |progress|
|
21
|
+
progress_bar.set(progress) if progress_bar
|
22
|
+
}) {|file| return file.read}
|
23
|
+
end
|
24
|
+
|
25
|
+
#simple helper that will save a file from the web and save it with a progress bar
|
26
|
+
def self.save_file(file_uri, file_name, save_dir = ".", amount_of_retries = 6)
|
27
|
+
trap("SIGINT") { puts "goodbye"; exit }
|
28
|
+
|
29
|
+
file_path = File.absolute_path(File.join(save_dir, file_name))
|
30
|
+
#Some providers seem to flake out every now end then
|
31
|
+
amount_of_retries.times do |i|
|
32
|
+
if os_has?("wget")
|
33
|
+
puts "using wget"
|
34
|
+
`wget \"#{file_uri}\" -O #{file_path.inspect}`
|
35
|
+
elsif os_has?("curl")
|
36
|
+
puts "using curl"
|
37
|
+
#require "pry"; binding.pry; exit
|
38
|
+
#-L means: follow redirects, We set an agent because Vimeo seems to want one
|
39
|
+
`curl -A 'Wget/1.8.1' --retry 10 --retry-delay 5 --retry-max-time 4 -L \"#{file_uri}\" -o #{file_path.inspect}`
|
40
|
+
else
|
41
|
+
puts "using net/http"
|
42
|
+
open(file_path, 'wb') { |file|
|
43
|
+
file.write(fetch_file(file_uri)); puts
|
44
|
+
}
|
45
|
+
end
|
46
|
+
#we were successful, we're outta here
|
47
|
+
if $? == 0
|
48
|
+
break
|
49
|
+
else
|
50
|
+
puts "Download seems to have failed (retrying, attempt #{i+1}/#{amount_of_retries})"
|
51
|
+
sleep 2
|
52
|
+
end
|
53
|
+
end
|
54
|
+
$? == 0
|
55
|
+
end
|
56
|
+
|
57
|
+
#checks to see whether the os has a certain utility like wget or curl
|
58
|
+
#`` returns the standard output of the process
|
59
|
+
#system returns the exit code of the process
|
60
|
+
def self.os_has?(utility)
|
61
|
+
windows = ENV['OS'] =~ /windows/i
|
62
|
+
|
63
|
+
unless windows # if os is not Windows
|
64
|
+
`which #{utility}`.include?(utility)
|
65
|
+
else
|
66
|
+
if has_where?
|
67
|
+
system("where /q #{utility}") #/q is the quiet mode flag
|
68
|
+
else
|
69
|
+
begin #as a fallback we just run the utility itself
|
70
|
+
system(utility)
|
71
|
+
rescue Errno::ENOENT
|
72
|
+
false
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
#checks if Windows has the where utility (Server 2003 and later)
|
79
|
+
#system only return nil if the command is not found
|
80
|
+
def self.has_where?
|
81
|
+
!system("where /q where").nil?
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
end
|
86
|
+
|
data/helper/plugin-helper.rb
CHANGED
@@ -1,10 +1,60 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
class
|
4
|
-
|
5
|
-
|
6
|
-
#
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
1
|
+
module ViddlRb
|
2
|
+
|
3
|
+
class PluginBase
|
4
|
+
|
5
|
+
#this exception is raised by the plugins when it was not
|
6
|
+
#possible to donwload the video for some reason.
|
7
|
+
class CouldNotDownloadVideoError < StandardError; end
|
8
|
+
|
9
|
+
#some static stuff
|
10
|
+
class << self
|
11
|
+
attr_accessor :io
|
12
|
+
attr_reader :registered_plugins
|
13
|
+
end
|
14
|
+
|
15
|
+
#all calls to #puts, #print and #p from any plugin instance will be redirected to this object
|
16
|
+
@io = $stdout
|
17
|
+
@registered_plugins = []
|
18
|
+
|
19
|
+
#if you inherit from this class, the child gets added to the "registered plugins" array
|
20
|
+
def self.inherited(child)
|
21
|
+
PluginBase.registered_plugins << child
|
22
|
+
end
|
23
|
+
|
24
|
+
#takes a string a returns a new string that is file name safe
|
25
|
+
#deletes \"' and replaces anything else that is not a digit or letter with _
|
26
|
+
def self.make_filename_safe(string)
|
27
|
+
string.delete("\"'").gsub(/[^\d\w]/, '_')
|
28
|
+
end
|
29
|
+
|
30
|
+
#the following methods redirects the Kernel printing methods (except #p) to the
|
31
|
+
#PluginBase IO object. this is because sometimes we want plugins to
|
32
|
+
#write to something else than $stdout
|
33
|
+
|
34
|
+
def self.puts(*objects)
|
35
|
+
PluginBase.io.puts(*objects)
|
36
|
+
nil
|
37
|
+
end
|
38
|
+
|
39
|
+
def self.print(*objects)
|
40
|
+
PluginBase.io.print(*objects)
|
41
|
+
nil
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.putc(int)
|
45
|
+
PluginBase.io.putc(int)
|
46
|
+
nil
|
47
|
+
end
|
48
|
+
|
49
|
+
def self.printf(string, *objects)
|
50
|
+
if string.is_a?(IO) || string.is_a?(StringIO)
|
51
|
+
super(string, *objects) # so we don't redirect the printf that prints to a separate IO object
|
52
|
+
else
|
53
|
+
PluginBase.io.printf(string, *objects)
|
54
|
+
end
|
55
|
+
nil
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
60
|
+
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# This class contains utility methods that are used by both the bin utility and the library.
|
2
|
+
|
3
|
+
module ViddlRb
|
4
|
+
|
5
|
+
class UtilityHelper
|
6
|
+
#loads all plugins in the plugin directory.
|
7
|
+
#the plugin classes are dynamically added to the ViddlRb module.
|
8
|
+
def self.load_plugins
|
9
|
+
Dir[File.join(File.dirname(__FILE__), "../plugins/*.rb")].each do |plugin|
|
10
|
+
ViddlRb.class_eval(File.read(plugin))
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
16
|
+
|
data/lib/viddl-rb.rb
ADDED
@@ -0,0 +1,113 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
$LOAD_PATH << File.join(File.dirname(__FILE__), '..', 'helper')
|
3
|
+
|
4
|
+
require "rubygems"
|
5
|
+
require "net/http"
|
6
|
+
require "nokogiri"
|
7
|
+
require "mechanize"
|
8
|
+
require "cgi"
|
9
|
+
require "open-uri"
|
10
|
+
require "stringio"
|
11
|
+
require "download-helper.rb"
|
12
|
+
require "plugin-helper.rb"
|
13
|
+
require "utility-helper.rb"
|
14
|
+
|
15
|
+
#load all plugins
|
16
|
+
ViddlRb::UtilityHelper.load_plugins
|
17
|
+
|
18
|
+
module ViddlRb
|
19
|
+
class PluginError < StandardError; end
|
20
|
+
class DownloadError < StandardError; end
|
21
|
+
|
22
|
+
def self.io=(io_object)
|
23
|
+
PluginBase.io = io_object
|
24
|
+
end
|
25
|
+
|
26
|
+
#set the default PluginBase io object to a StringIO instance.
|
27
|
+
#this will suppress any standard output from the plugins.
|
28
|
+
self.io = StringIO.new
|
29
|
+
|
30
|
+
#returns an array of hashes containing the download url(s) and filenames(s)
|
31
|
+
#for the specified video url.
|
32
|
+
#if the url does not match any plugin, return nil and if a plugin
|
33
|
+
#throws an error, throw PluginError.
|
34
|
+
#the reason for returning an array is because some urls will give multiple
|
35
|
+
#download urls (for example a Youtube playlist url).
|
36
|
+
def self.get_urls_names(url)
|
37
|
+
plugin = PluginBase.registered_plugins.find { |p| p.matches_provider?(url) }
|
38
|
+
|
39
|
+
if plugin
|
40
|
+
begin
|
41
|
+
#we'll end up with an array of hashes with they keys :url and :name
|
42
|
+
urls_filenames = plugin.get_urls_and_filenames(url)
|
43
|
+
rescue PluginBase::CouldNotDownloadVideoError => e
|
44
|
+
raise_download_error(e)
|
45
|
+
rescue StandardError => e
|
46
|
+
raise_plugin_error(e, plugin)
|
47
|
+
end
|
48
|
+
follow_all_redirects(urls_filenames)
|
49
|
+
else
|
50
|
+
nil
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
#returns an array of download urls for the given video url.
|
55
|
+
def self.get_urls(url)
|
56
|
+
urls_filenames = get_urls_names(url)
|
57
|
+
urls_filenames.nil? ? nil : urls_filenames.map { |uf| uf[:url] }
|
58
|
+
end
|
59
|
+
|
60
|
+
#returns an array of filenames for the given video url.
|
61
|
+
def self.get_names(url)
|
62
|
+
urls_filenames = get_urls_names(url)
|
63
|
+
urls_filenames.nil? ? nil : urls_filenames.map { |uf| uf[:name] }
|
64
|
+
end
|
65
|
+
|
66
|
+
#same as get_urls_and_filenames but with the extensions only.
|
67
|
+
def self.get_urls_exts(url)
|
68
|
+
urls_filenames = get_urls_names(url)
|
69
|
+
urls_filenames.map do |uf|
|
70
|
+
ext = File.extname(uf[:name])
|
71
|
+
{:url => uf[:url], :ext => ext}
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
#<<< helper methods >>>
|
76
|
+
|
77
|
+
#the default error message when a plugin fails in some unexpected way.
|
78
|
+
def self.raise_plugin_error(e, plugin)
|
79
|
+
error = PluginError.new(e.message + " [Plugin: #{plugin.name}]")
|
80
|
+
error.set_backtrace(e.backtrace)
|
81
|
+
raise error
|
82
|
+
end
|
83
|
+
private_class_method :raise_plugin_error
|
84
|
+
|
85
|
+
#the default error message when a plugin fails to download a video for a known reason.
|
86
|
+
def self.raise_download_error(e)
|
87
|
+
error = DownloadError.new(e.message)
|
88
|
+
error.set_backtrace(e.backtrace)
|
89
|
+
raise error
|
90
|
+
end
|
91
|
+
private_class_method :raise_download_error
|
92
|
+
|
93
|
+
#takes a url-filenames array and returns a new array where the
|
94
|
+
#"location" header has been followed all the way to the end for all urls.
|
95
|
+
def self.follow_all_redirects(urls_filenames)
|
96
|
+
urls_filenames.map do |uf|
|
97
|
+
url = uf[:url]
|
98
|
+
final_location = get_final_location(url)
|
99
|
+
{:url => final_location, :name => uf[:name]}
|
100
|
+
end
|
101
|
+
end
|
102
|
+
private_class_method :follow_all_redirects
|
103
|
+
|
104
|
+
#recursively get the final location (after following all redirects) for an url.
|
105
|
+
def self.get_final_location(url)
|
106
|
+
Net::HTTP.get_response(URI(url)) do |res|
|
107
|
+
location = res["location"]
|
108
|
+
return url if location.nil?
|
109
|
+
return get_final_location(location)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
private_class_method :get_final_location
|
113
|
+
end
|
data/plugins/blip.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
|
1
2
|
class Blip < PluginBase
|
2
3
|
# this will be called by the main app to check whether this plugin is responsible for the url passed
|
3
4
|
def self.matches_provider?(url)
|
@@ -5,12 +6,12 @@ class Blip < PluginBase
|
|
5
6
|
end
|
6
7
|
|
7
8
|
# return the url for original video file and title
|
8
|
-
def self.get_urls_and_filenames(url)
|
9
|
+
def self.get_urls_and_filenames(url, options = {})
|
9
10
|
id = self.to_id(url)
|
10
11
|
xml_url = "http://blip.tv/rss/#{id}"
|
11
12
|
doc = Nokogiri::XML(open(xml_url))
|
12
13
|
user = doc.at("//channel/item/blip:user").inner_text
|
13
|
-
title = doc.at("//channel/item/title").inner_text
|
14
|
+
title = PluginBase.make_filename_safe(doc.at("//channel/item/title").inner_text)
|
14
15
|
download_url = doc.at("//channel/item/media:group/media:content").attributes["url"].value
|
15
16
|
extention = download_url.split(".").last
|
16
17
|
file_name = "#{id}-#{user}-#{title}.#{extention}"
|
@@ -0,0 +1,44 @@
|
|
1
|
+
|
2
|
+
class Dailymotion < PluginBase
|
3
|
+
|
4
|
+
#the video quality is choosen based on the following priority list:
|
5
|
+
QUALITY_PRIORITY = %w[hd1080 hd720 hq sd ld]
|
6
|
+
|
7
|
+
# this will be called by the main app to check whether this plugin is responsible for the url passed
|
8
|
+
def self.matches_provider?(url)
|
9
|
+
url.include?("dailymotion.com")
|
10
|
+
end
|
11
|
+
|
12
|
+
# return the url for original video file and title
|
13
|
+
def self.get_urls_and_filenames(url, options = {})
|
14
|
+
doc = Nokogiri::HTML(open(url))
|
15
|
+
|
16
|
+
#check to see that the video is hosted on dailymotion.com - if not raise exception
|
17
|
+
unless doc.xpath("//div[@class='dmco_html dmpi_video_partnerplayer']").empty?
|
18
|
+
raise CouldNotDownloadVideoError,
|
19
|
+
"This video is not hosted on dailymotion's own content servers. It can't be downloaded."
|
20
|
+
end
|
21
|
+
|
22
|
+
title = doc.xpath("//meta[@property='og:title']").attribute("content").value
|
23
|
+
urls = get_download_urls(doc)
|
24
|
+
quality = QUALITY_PRIORITY.find { |q| urls[q] } #quality is the first quality from the priority list that exists for the video
|
25
|
+
down_url = urls[quality]
|
26
|
+
extension = down_url[/(\.[\w\d]+)\?/, 1]
|
27
|
+
file_name = PluginBase.make_filename_safe(title) + extension
|
28
|
+
|
29
|
+
[{:url => unescape_url(down_url), :name => file_name}]
|
30
|
+
end
|
31
|
+
|
32
|
+
#returns a hash with the different video qualities mapped to their respective download urls
|
33
|
+
def self.get_download_urls(doc)
|
34
|
+
flashvars = doc.xpath("//div[@class='dmco_html player_box']/script").text #the flash player script
|
35
|
+
decoded = CGI::unescape(flashvars)
|
36
|
+
url_array = decoded.scan(/(ld|sd|hq|hd720|hd1080)URL":"(.+?)"/).flatten #group 1 = the quality, group 2 = the url
|
37
|
+
Hash[*url_array] #hash like this: {"quality" => "url"}
|
38
|
+
end
|
39
|
+
|
40
|
+
#remove backslashes
|
41
|
+
def self.unescape_url(url)
|
42
|
+
url.gsub("\\", "")
|
43
|
+
end
|
44
|
+
end
|