viddl-rb 0.6 → 0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG.txt +5 -3
- data/Gemfile.lock +25 -4
- data/README.md +110 -34
- data/Rakefile +18 -8
- data/TODO.txt +3 -0
- data/bin/helper/downloader.rb +25 -0
- data/bin/helper/driver.rb +47 -0
- data/bin/helper/parameter-parser.rb +67 -0
- data/bin/viddl-rb +39 -117
- data/helper/audio-helper.rb +49 -0
- data/helper/download-helper.rb +86 -58
- data/helper/plugin-helper.rb +60 -10
- data/helper/utility-helper.rb +16 -0
- data/lib/viddl-rb.rb +113 -0
- data/plugins/blip.rb +3 -2
- data/plugins/dailymotion.rb +44 -0
- data/plugins/metacafe.rb +53 -54
- data/plugins/soundcloud.rb +9 -12
- data/plugins/veoh.rb +40 -41
- data/plugins/vimeo.rb +30 -23
- data/plugins/youtube.rb +196 -143
- metadata +34 -11
@@ -0,0 +1,49 @@
|
|
1
|
+
module ViddlRb
|
2
|
+
|
3
|
+
# This class is responsible for extracting audio from video files using ffmpeg.
|
4
|
+
class AudioHelper
|
5
|
+
|
6
|
+
def self.extract(file_path)
|
7
|
+
no_ext_filename = file_path.split('.')[0..-1][0]
|
8
|
+
#capture stderr because ffmpeg expects an output param and will error out
|
9
|
+
puts "Gathering information about the downloaded file."
|
10
|
+
file_info = Open3.popen3("ffmpeg -i #{file_path}") {|stdin, stdout, stderr, wait_thr| stderr.read }
|
11
|
+
puts "Done gathering information about the downloaded file."
|
12
|
+
|
13
|
+
if !file_info.to_s.empty?
|
14
|
+
audio_format_matches = file_info.match(/Audio: (\w*)/)
|
15
|
+
if audio_format_matches
|
16
|
+
audio_format = audio_format_matches[1]
|
17
|
+
puts "detected audio format: #{audio_format}"
|
18
|
+
else
|
19
|
+
raise "ERROR: Couldn't find any audio:\n#{file_info.inspect}"
|
20
|
+
end
|
21
|
+
|
22
|
+
extension_mapper = {
|
23
|
+
'aac' => 'm4a',
|
24
|
+
'mp3' => 'mp3',
|
25
|
+
'vorbis' => 'ogg'
|
26
|
+
}
|
27
|
+
|
28
|
+
if extension_mapper.key?(audio_format)
|
29
|
+
output_extension = extension_mapper[audio_format]
|
30
|
+
else
|
31
|
+
#lame fallback
|
32
|
+
puts "Unknown audio format: #{audio_format}, using name as extension: '.#{audio_format}'."
|
33
|
+
output_extension = audio_format
|
34
|
+
end
|
35
|
+
output_filename = "#{no_ext_filename}.#{output_extension}"
|
36
|
+
if File.exist?(output_filename)
|
37
|
+
puts "Audio file seems to exist already, removing it before extraction."
|
38
|
+
File.delete(output_filename)
|
39
|
+
end
|
40
|
+
Open3.popen3("ffmpeg -i #{file_path} -vn -acodec copy #{output_filename}") { |stdin, stdout, stderr, wait_thr| stdout.read }
|
41
|
+
puts "Done extracting audio to #{output_filename}"
|
42
|
+
else
|
43
|
+
raise "ERROR: Error while checking audio track of #{file_path}"
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
49
|
+
|
data/helper/download-helper.rb
CHANGED
@@ -1,58 +1,86 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
:
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
1
|
+
module ViddlRb
|
2
|
+
|
3
|
+
class DownloadHelper
|
4
|
+
#usually not called directly
|
5
|
+
def self.fetch_file(uri)
|
6
|
+
begin
|
7
|
+
require "progressbar" #http://github.com/nex3/ruby-progressbar
|
8
|
+
rescue LoadError
|
9
|
+
puts "ERROR: You don't seem to have curl or wget on your system. In this case you'll need to install the 'progressbar' gem."
|
10
|
+
exit
|
11
|
+
end
|
12
|
+
progress_bar = nil
|
13
|
+
open(uri, :proxy => nil,
|
14
|
+
:content_length_proc => lambda { |length|
|
15
|
+
if length && 0 < length
|
16
|
+
progress_bar = ProgressBar.new(uri.to_s, length)
|
17
|
+
progress_bar.file_transfer_mode #to show download speed and file size
|
18
|
+
end
|
19
|
+
},
|
20
|
+
:progress_proc => lambda { |progress|
|
21
|
+
progress_bar.set(progress) if progress_bar
|
22
|
+
}) {|file| return file.read}
|
23
|
+
end
|
24
|
+
|
25
|
+
#simple helper that will save a file from the web and save it with a progress bar
|
26
|
+
def self.save_file(file_uri, file_name, save_dir = ".", amount_of_retries = 6)
|
27
|
+
trap("SIGINT") { puts "goodbye"; exit }
|
28
|
+
|
29
|
+
file_path = File.absolute_path(File.join(save_dir, file_name))
|
30
|
+
#Some providers seem to flake out every now end then
|
31
|
+
amount_of_retries.times do |i|
|
32
|
+
if os_has?("wget")
|
33
|
+
puts "using wget"
|
34
|
+
`wget \"#{file_uri}\" -O #{file_path.inspect}`
|
35
|
+
elsif os_has?("curl")
|
36
|
+
puts "using curl"
|
37
|
+
#require "pry"; binding.pry; exit
|
38
|
+
#-L means: follow redirects, We set an agent because Vimeo seems to want one
|
39
|
+
`curl -A 'Wget/1.8.1' --retry 10 --retry-delay 5 --retry-max-time 4 -L \"#{file_uri}\" -o #{file_path.inspect}`
|
40
|
+
else
|
41
|
+
puts "using net/http"
|
42
|
+
open(file_path, 'wb') { |file|
|
43
|
+
file.write(fetch_file(file_uri)); puts
|
44
|
+
}
|
45
|
+
end
|
46
|
+
#we were successful, we're outta here
|
47
|
+
if $? == 0
|
48
|
+
break
|
49
|
+
else
|
50
|
+
puts "Download seems to have failed (retrying, attempt #{i+1}/#{amount_of_retries})"
|
51
|
+
sleep 2
|
52
|
+
end
|
53
|
+
end
|
54
|
+
$? == 0
|
55
|
+
end
|
56
|
+
|
57
|
+
#checks to see whether the os has a certain utility like wget or curl
|
58
|
+
#`` returns the standard output of the process
|
59
|
+
#system returns the exit code of the process
|
60
|
+
def self.os_has?(utility)
|
61
|
+
windows = ENV['OS'] =~ /windows/i
|
62
|
+
|
63
|
+
unless windows # if os is not Windows
|
64
|
+
`which #{utility}`.include?(utility)
|
65
|
+
else
|
66
|
+
if has_where?
|
67
|
+
system("where /q #{utility}") #/q is the quiet mode flag
|
68
|
+
else
|
69
|
+
begin #as a fallback we just run the utility itself
|
70
|
+
system(utility)
|
71
|
+
rescue Errno::ENOENT
|
72
|
+
false
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
76
|
+
end
|
77
|
+
|
78
|
+
#checks if Windows has the where utility (Server 2003 and later)
|
79
|
+
#system only return nil if the command is not found
|
80
|
+
def self.has_where?
|
81
|
+
!system("where /q where").nil?
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
end
|
86
|
+
|
data/helper/plugin-helper.rb
CHANGED
@@ -1,10 +1,60 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
class
|
4
|
-
|
5
|
-
|
6
|
-
#
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
1
|
+
module ViddlRb
|
2
|
+
|
3
|
+
class PluginBase
|
4
|
+
|
5
|
+
#this exception is raised by the plugins when it was not
|
6
|
+
#possible to donwload the video for some reason.
|
7
|
+
class CouldNotDownloadVideoError < StandardError; end
|
8
|
+
|
9
|
+
#some static stuff
|
10
|
+
class << self
|
11
|
+
attr_accessor :io
|
12
|
+
attr_reader :registered_plugins
|
13
|
+
end
|
14
|
+
|
15
|
+
#all calls to #puts, #print and #p from any plugin instance will be redirected to this object
|
16
|
+
@io = $stdout
|
17
|
+
@registered_plugins = []
|
18
|
+
|
19
|
+
#if you inherit from this class, the child gets added to the "registered plugins" array
|
20
|
+
def self.inherited(child)
|
21
|
+
PluginBase.registered_plugins << child
|
22
|
+
end
|
23
|
+
|
24
|
+
#takes a string a returns a new string that is file name safe
|
25
|
+
#deletes \"' and replaces anything else that is not a digit or letter with _
|
26
|
+
def self.make_filename_safe(string)
|
27
|
+
string.delete("\"'").gsub(/[^\d\w]/, '_')
|
28
|
+
end
|
29
|
+
|
30
|
+
#the following methods redirects the Kernel printing methods (except #p) to the
|
31
|
+
#PluginBase IO object. this is because sometimes we want plugins to
|
32
|
+
#write to something else than $stdout
|
33
|
+
|
34
|
+
def self.puts(*objects)
|
35
|
+
PluginBase.io.puts(*objects)
|
36
|
+
nil
|
37
|
+
end
|
38
|
+
|
39
|
+
def self.print(*objects)
|
40
|
+
PluginBase.io.print(*objects)
|
41
|
+
nil
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.putc(int)
|
45
|
+
PluginBase.io.putc(int)
|
46
|
+
nil
|
47
|
+
end
|
48
|
+
|
49
|
+
def self.printf(string, *objects)
|
50
|
+
if string.is_a?(IO) || string.is_a?(StringIO)
|
51
|
+
super(string, *objects) # so we don't redirect the printf that prints to a separate IO object
|
52
|
+
else
|
53
|
+
PluginBase.io.printf(string, *objects)
|
54
|
+
end
|
55
|
+
nil
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
60
|
+
|
@@ -0,0 +1,16 @@
|
|
1
|
+
# This class contains utility methods that are used by both the bin utility and the library.
|
2
|
+
|
3
|
+
module ViddlRb
|
4
|
+
|
5
|
+
class UtilityHelper
|
6
|
+
#loads all plugins in the plugin directory.
|
7
|
+
#the plugin classes are dynamically added to the ViddlRb module.
|
8
|
+
def self.load_plugins
|
9
|
+
Dir[File.join(File.dirname(__FILE__), "../plugins/*.rb")].each do |plugin|
|
10
|
+
ViddlRb.class_eval(File.read(plugin))
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
16
|
+
|
data/lib/viddl-rb.rb
ADDED
@@ -0,0 +1,113 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
$LOAD_PATH << File.join(File.dirname(__FILE__), '..', 'helper')
|
3
|
+
|
4
|
+
require "rubygems"
|
5
|
+
require "net/http"
|
6
|
+
require "nokogiri"
|
7
|
+
require "mechanize"
|
8
|
+
require "cgi"
|
9
|
+
require "open-uri"
|
10
|
+
require "stringio"
|
11
|
+
require "download-helper.rb"
|
12
|
+
require "plugin-helper.rb"
|
13
|
+
require "utility-helper.rb"
|
14
|
+
|
15
|
+
#load all plugins
|
16
|
+
ViddlRb::UtilityHelper.load_plugins
|
17
|
+
|
18
|
+
module ViddlRb
|
19
|
+
class PluginError < StandardError; end
|
20
|
+
class DownloadError < StandardError; end
|
21
|
+
|
22
|
+
def self.io=(io_object)
|
23
|
+
PluginBase.io = io_object
|
24
|
+
end
|
25
|
+
|
26
|
+
#set the default PluginBase io object to a StringIO instance.
|
27
|
+
#this will suppress any standard output from the plugins.
|
28
|
+
self.io = StringIO.new
|
29
|
+
|
30
|
+
#returns an array of hashes containing the download url(s) and filenames(s)
|
31
|
+
#for the specified video url.
|
32
|
+
#if the url does not match any plugin, return nil and if a plugin
|
33
|
+
#throws an error, throw PluginError.
|
34
|
+
#the reason for returning an array is because some urls will give multiple
|
35
|
+
#download urls (for example a Youtube playlist url).
|
36
|
+
def self.get_urls_names(url)
|
37
|
+
plugin = PluginBase.registered_plugins.find { |p| p.matches_provider?(url) }
|
38
|
+
|
39
|
+
if plugin
|
40
|
+
begin
|
41
|
+
#we'll end up with an array of hashes with they keys :url and :name
|
42
|
+
urls_filenames = plugin.get_urls_and_filenames(url)
|
43
|
+
rescue PluginBase::CouldNotDownloadVideoError => e
|
44
|
+
raise_download_error(e)
|
45
|
+
rescue StandardError => e
|
46
|
+
raise_plugin_error(e, plugin)
|
47
|
+
end
|
48
|
+
follow_all_redirects(urls_filenames)
|
49
|
+
else
|
50
|
+
nil
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
#returns an array of download urls for the given video url.
|
55
|
+
def self.get_urls(url)
|
56
|
+
urls_filenames = get_urls_names(url)
|
57
|
+
urls_filenames.nil? ? nil : urls_filenames.map { |uf| uf[:url] }
|
58
|
+
end
|
59
|
+
|
60
|
+
#returns an array of filenames for the given video url.
|
61
|
+
def self.get_names(url)
|
62
|
+
urls_filenames = get_urls_names(url)
|
63
|
+
urls_filenames.nil? ? nil : urls_filenames.map { |uf| uf[:name] }
|
64
|
+
end
|
65
|
+
|
66
|
+
#same as get_urls_and_filenames but with the extensions only.
|
67
|
+
def self.get_urls_exts(url)
|
68
|
+
urls_filenames = get_urls_names(url)
|
69
|
+
urls_filenames.map do |uf|
|
70
|
+
ext = File.extname(uf[:name])
|
71
|
+
{:url => uf[:url], :ext => ext}
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
#<<< helper methods >>>
|
76
|
+
|
77
|
+
#the default error message when a plugin fails in some unexpected way.
|
78
|
+
def self.raise_plugin_error(e, plugin)
|
79
|
+
error = PluginError.new(e.message + " [Plugin: #{plugin.name}]")
|
80
|
+
error.set_backtrace(e.backtrace)
|
81
|
+
raise error
|
82
|
+
end
|
83
|
+
private_class_method :raise_plugin_error
|
84
|
+
|
85
|
+
#the default error message when a plugin fails to download a video for a known reason.
|
86
|
+
def self.raise_download_error(e)
|
87
|
+
error = DownloadError.new(e.message)
|
88
|
+
error.set_backtrace(e.backtrace)
|
89
|
+
raise error
|
90
|
+
end
|
91
|
+
private_class_method :raise_download_error
|
92
|
+
|
93
|
+
#takes a url-filenames array and returns a new array where the
|
94
|
+
#"location" header has been followed all the way to the end for all urls.
|
95
|
+
def self.follow_all_redirects(urls_filenames)
|
96
|
+
urls_filenames.map do |uf|
|
97
|
+
url = uf[:url]
|
98
|
+
final_location = get_final_location(url)
|
99
|
+
{:url => final_location, :name => uf[:name]}
|
100
|
+
end
|
101
|
+
end
|
102
|
+
private_class_method :follow_all_redirects
|
103
|
+
|
104
|
+
#recursively get the final location (after following all redirects) for an url.
|
105
|
+
def self.get_final_location(url)
|
106
|
+
Net::HTTP.get_response(URI(url)) do |res|
|
107
|
+
location = res["location"]
|
108
|
+
return url if location.nil?
|
109
|
+
return get_final_location(location)
|
110
|
+
end
|
111
|
+
end
|
112
|
+
private_class_method :get_final_location
|
113
|
+
end
|
data/plugins/blip.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
|
1
2
|
class Blip < PluginBase
|
2
3
|
# this will be called by the main app to check whether this plugin is responsible for the url passed
|
3
4
|
def self.matches_provider?(url)
|
@@ -5,12 +6,12 @@ class Blip < PluginBase
|
|
5
6
|
end
|
6
7
|
|
7
8
|
# return the url for original video file and title
|
8
|
-
def self.get_urls_and_filenames(url)
|
9
|
+
def self.get_urls_and_filenames(url, options = {})
|
9
10
|
id = self.to_id(url)
|
10
11
|
xml_url = "http://blip.tv/rss/#{id}"
|
11
12
|
doc = Nokogiri::XML(open(xml_url))
|
12
13
|
user = doc.at("//channel/item/blip:user").inner_text
|
13
|
-
title = doc.at("//channel/item/title").inner_text
|
14
|
+
title = PluginBase.make_filename_safe(doc.at("//channel/item/title").inner_text)
|
14
15
|
download_url = doc.at("//channel/item/media:group/media:content").attributes["url"].value
|
15
16
|
extention = download_url.split(".").last
|
16
17
|
file_name = "#{id}-#{user}-#{title}.#{extention}"
|
@@ -0,0 +1,44 @@
|
|
1
|
+
|
2
|
+
class Dailymotion < PluginBase
|
3
|
+
|
4
|
+
#the video quality is choosen based on the following priority list:
|
5
|
+
QUALITY_PRIORITY = %w[hd1080 hd720 hq sd ld]
|
6
|
+
|
7
|
+
# this will be called by the main app to check whether this plugin is responsible for the url passed
|
8
|
+
def self.matches_provider?(url)
|
9
|
+
url.include?("dailymotion.com")
|
10
|
+
end
|
11
|
+
|
12
|
+
# return the url for original video file and title
|
13
|
+
def self.get_urls_and_filenames(url, options = {})
|
14
|
+
doc = Nokogiri::HTML(open(url))
|
15
|
+
|
16
|
+
#check to see that the video is hosted on dailymotion.com - if not raise exception
|
17
|
+
unless doc.xpath("//div[@class='dmco_html dmpi_video_partnerplayer']").empty?
|
18
|
+
raise CouldNotDownloadVideoError,
|
19
|
+
"This video is not hosted on dailymotion's own content servers. It can't be downloaded."
|
20
|
+
end
|
21
|
+
|
22
|
+
title = doc.xpath("//meta[@property='og:title']").attribute("content").value
|
23
|
+
urls = get_download_urls(doc)
|
24
|
+
quality = QUALITY_PRIORITY.find { |q| urls[q] } #quality is the first quality from the priority list that exists for the video
|
25
|
+
down_url = urls[quality]
|
26
|
+
extension = down_url[/(\.[\w\d]+)\?/, 1]
|
27
|
+
file_name = PluginBase.make_filename_safe(title) + extension
|
28
|
+
|
29
|
+
[{:url => unescape_url(down_url), :name => file_name}]
|
30
|
+
end
|
31
|
+
|
32
|
+
#returns a hash with the different video qualities mapped to their respective download urls
|
33
|
+
def self.get_download_urls(doc)
|
34
|
+
flashvars = doc.xpath("//div[@class='dmco_html player_box']/script").text #the flash player script
|
35
|
+
decoded = CGI::unescape(flashvars)
|
36
|
+
url_array = decoded.scan(/(ld|sd|hq|hd720|hd1080)URL":"(.+?)"/).flatten #group 1 = the quality, group 2 = the url
|
37
|
+
Hash[*url_array] #hash like this: {"quality" => "url"}
|
38
|
+
end
|
39
|
+
|
40
|
+
#remove backslashes
|
41
|
+
def self.unescape_url(url)
|
42
|
+
url.gsub("\\", "")
|
43
|
+
end
|
44
|
+
end
|