viddl-rb 0.6 → 0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,49 @@
1
+ module ViddlRb
2
+
3
+ # This class is responsible for extracting audio from video files using ffmpeg.
4
+ class AudioHelper
5
+
6
+ def self.extract(file_path)
7
+ no_ext_filename = file_path.split('.')[0..-1][0]
8
+ #capture stderr because ffmpeg expects an output param and will error out
9
+ puts "Gathering information about the downloaded file."
10
+ file_info = Open3.popen3("ffmpeg -i #{file_path}") {|stdin, stdout, stderr, wait_thr| stderr.read }
11
+ puts "Done gathering information about the downloaded file."
12
+
13
+ if !file_info.to_s.empty?
14
+ audio_format_matches = file_info.match(/Audio: (\w*)/)
15
+ if audio_format_matches
16
+ audio_format = audio_format_matches[1]
17
+ puts "detected audio format: #{audio_format}"
18
+ else
19
+ raise "ERROR: Couldn't find any audio:\n#{file_info.inspect}"
20
+ end
21
+
22
+ extension_mapper = {
23
+ 'aac' => 'm4a',
24
+ 'mp3' => 'mp3',
25
+ 'vorbis' => 'ogg'
26
+ }
27
+
28
+ if extension_mapper.key?(audio_format)
29
+ output_extension = extension_mapper[audio_format]
30
+ else
31
+ #lame fallback
32
+ puts "Unknown audio format: #{audio_format}, using name as extension: '.#{audio_format}'."
33
+ output_extension = audio_format
34
+ end
35
+ output_filename = "#{no_ext_filename}.#{output_extension}"
36
+ if File.exist?(output_filename)
37
+ puts "Audio file seems to exist already, removing it before extraction."
38
+ File.delete(output_filename)
39
+ end
40
+ Open3.popen3("ffmpeg -i #{file_path} -vn -acodec copy #{output_filename}") { |stdin, stdout, stderr, wait_thr| stdout.read }
41
+ puts "Done extracting audio to #{output_filename}"
42
+ else
43
+ raise "ERROR: Error while checking audio track of #{file_path}"
44
+ end
45
+ end
46
+ end
47
+
48
+ end
49
+
@@ -1,58 +1,86 @@
1
- class DownloadHelper
2
- #usually not called directly
3
- def self.fetch_file(uri)
4
-
5
- begin
6
- require "progressbar" #http://github.com/nex3/ruby-progressbar
7
- rescue LoadError
8
- puts "ERROR: You don't seem to have curl or wget on your system. In this case you'll need to install the 'progressbar' gem."
9
- exit
10
- end
11
- progress_bar = nil
12
- open(uri, :proxy => nil,
13
- :content_length_proc => lambda { |length|
14
- if length && 0 < length
15
- progress_bar = ProgressBar.new(uri.to_s, length)
16
- end
17
- },
18
- :progress_proc => lambda { |progress|
19
- progress_bar.set(progress) if progress_bar
20
- }) {|file| return file.read}
21
- end
22
-
23
- #simple helper that will save a file from the web and save it with a progress bar
24
- def self.save_file(file_uri, file_name)
25
- unescaped_uri = CGI::unescape(file_uri)
26
- result = false
27
- if os_has?("wget")
28
- puts "using wget"
29
- result = system("wget \"#{unescaped_uri}\" -O #{file_name}")
30
- elsif os_has?("curl")
31
- puts "using curl"
32
- #-L means: follow redirects, We set an agent because Vimeo seems to want one
33
- result = system("curl -A 'Wget/1.8.1' -L \"#{unescaped_uri}\" -o #{file_name}")
34
- else
35
- puts "using net/http"
36
- open(file_name, 'wb') { |file|
37
- file.write(fetch_file(unescaped_uri)); puts
38
- }
39
- result = true
40
- end
41
- result
42
- end
43
-
44
- #checks to see whether the os has a certain utility like wget or curl
45
- def self.os_has?(utility)
46
- windows = ENV['OS'] =~ /windows/i
47
- unless windows # if os is something else than Windows
48
- return `which #{utility}`.include?(utility)
49
- else # OS is Windows
50
- begin
51
- `#{utility} --version` #if running the command does not throw an error, Windows has it. --version is for prettier console output.
52
- return true
53
- rescue Errno::ENOENT
54
- return false
55
- end
56
- end
57
- end
58
- end
1
+ module ViddlRb
2
+
3
+ class DownloadHelper
4
+ #usually not called directly
5
+ def self.fetch_file(uri)
6
+ begin
7
+ require "progressbar" #http://github.com/nex3/ruby-progressbar
8
+ rescue LoadError
9
+ puts "ERROR: You don't seem to have curl or wget on your system. In this case you'll need to install the 'progressbar' gem."
10
+ exit
11
+ end
12
+ progress_bar = nil
13
+ open(uri, :proxy => nil,
14
+ :content_length_proc => lambda { |length|
15
+ if length && 0 < length
16
+ progress_bar = ProgressBar.new(uri.to_s, length)
17
+ progress_bar.file_transfer_mode #to show download speed and file size
18
+ end
19
+ },
20
+ :progress_proc => lambda { |progress|
21
+ progress_bar.set(progress) if progress_bar
22
+ }) {|file| return file.read}
23
+ end
24
+
25
+ #simple helper that will save a file from the web and save it with a progress bar
26
+ def self.save_file(file_uri, file_name, save_dir = ".", amount_of_retries = 6)
27
+ trap("SIGINT") { puts "goodbye"; exit }
28
+
29
+ file_path = File.absolute_path(File.join(save_dir, file_name))
30
+ #Some providers seem to flake out every now end then
31
+ amount_of_retries.times do |i|
32
+ if os_has?("wget")
33
+ puts "using wget"
34
+ `wget \"#{file_uri}\" -O #{file_path.inspect}`
35
+ elsif os_has?("curl")
36
+ puts "using curl"
37
+ #require "pry"; binding.pry; exit
38
+ #-L means: follow redirects, We set an agent because Vimeo seems to want one
39
+ `curl -A 'Wget/1.8.1' --retry 10 --retry-delay 5 --retry-max-time 4 -L \"#{file_uri}\" -o #{file_path.inspect}`
40
+ else
41
+ puts "using net/http"
42
+ open(file_path, 'wb') { |file|
43
+ file.write(fetch_file(file_uri)); puts
44
+ }
45
+ end
46
+ #we were successful, we're outta here
47
+ if $? == 0
48
+ break
49
+ else
50
+ puts "Download seems to have failed (retrying, attempt #{i+1}/#{amount_of_retries})"
51
+ sleep 2
52
+ end
53
+ end
54
+ $? == 0
55
+ end
56
+
57
+ #checks to see whether the os has a certain utility like wget or curl
58
+ #`` returns the standard output of the process
59
+ #system returns the exit code of the process
60
+ def self.os_has?(utility)
61
+ windows = ENV['OS'] =~ /windows/i
62
+
63
+ unless windows # if os is not Windows
64
+ `which #{utility}`.include?(utility)
65
+ else
66
+ if has_where?
67
+ system("where /q #{utility}") #/q is the quiet mode flag
68
+ else
69
+ begin #as a fallback we just run the utility itself
70
+ system(utility)
71
+ rescue Errno::ENOENT
72
+ false
73
+ end
74
+ end
75
+ end
76
+ end
77
+
78
+ #checks if Windows has the where utility (Server 2003 and later)
79
+ #system only return nil if the command is not found
80
+ def self.has_where?
81
+ !system("where /q where").nil?
82
+ end
83
+ end
84
+
85
+ end
86
+
@@ -1,10 +1,60 @@
1
- class PluginBase
2
- #some static stuff
3
- class << self; attr_reader :registered_plugins end
4
- @registered_plugins = []
5
-
6
- #if you inherit from this class, the child gets added to the "registered plugins" array
7
- def self.inherited(child)
8
- PluginBase.registered_plugins << child
9
- end
10
- end
1
+ module ViddlRb
2
+
3
+ class PluginBase
4
+
5
+ #this exception is raised by the plugins when it was not
6
+ #possible to donwload the video for some reason.
7
+ class CouldNotDownloadVideoError < StandardError; end
8
+
9
+ #some static stuff
10
+ class << self
11
+ attr_accessor :io
12
+ attr_reader :registered_plugins
13
+ end
14
+
15
+ #all calls to #puts, #print and #p from any plugin instance will be redirected to this object
16
+ @io = $stdout
17
+ @registered_plugins = []
18
+
19
+ #if you inherit from this class, the child gets added to the "registered plugins" array
20
+ def self.inherited(child)
21
+ PluginBase.registered_plugins << child
22
+ end
23
+
24
+ #takes a string a returns a new string that is file name safe
25
+ #deletes \"' and replaces anything else that is not a digit or letter with _
26
+ def self.make_filename_safe(string)
27
+ string.delete("\"'").gsub(/[^\d\w]/, '_')
28
+ end
29
+
30
+ #the following methods redirects the Kernel printing methods (except #p) to the
31
+ #PluginBase IO object. this is because sometimes we want plugins to
32
+ #write to something else than $stdout
33
+
34
+ def self.puts(*objects)
35
+ PluginBase.io.puts(*objects)
36
+ nil
37
+ end
38
+
39
+ def self.print(*objects)
40
+ PluginBase.io.print(*objects)
41
+ nil
42
+ end
43
+
44
+ def self.putc(int)
45
+ PluginBase.io.putc(int)
46
+ nil
47
+ end
48
+
49
+ def self.printf(string, *objects)
50
+ if string.is_a?(IO) || string.is_a?(StringIO)
51
+ super(string, *objects) # so we don't redirect the printf that prints to a separate IO object
52
+ else
53
+ PluginBase.io.printf(string, *objects)
54
+ end
55
+ nil
56
+ end
57
+ end
58
+
59
+ end
60
+
@@ -0,0 +1,16 @@
1
+ # This class contains utility methods that are used by both the bin utility and the library.
2
+
3
+ module ViddlRb
4
+
5
+ class UtilityHelper
6
+ #loads all plugins in the plugin directory.
7
+ #the plugin classes are dynamically added to the ViddlRb module.
8
+ def self.load_plugins
9
+ Dir[File.join(File.dirname(__FILE__), "../plugins/*.rb")].each do |plugin|
10
+ ViddlRb.class_eval(File.read(plugin))
11
+ end
12
+ end
13
+ end
14
+
15
+ end
16
+
@@ -0,0 +1,113 @@
1
+ #!/usr/bin/env ruby
2
+ $LOAD_PATH << File.join(File.dirname(__FILE__), '..', 'helper')
3
+
4
+ require "rubygems"
5
+ require "net/http"
6
+ require "nokogiri"
7
+ require "mechanize"
8
+ require "cgi"
9
+ require "open-uri"
10
+ require "stringio"
11
+ require "download-helper.rb"
12
+ require "plugin-helper.rb"
13
+ require "utility-helper.rb"
14
+
15
+ #load all plugins
16
+ ViddlRb::UtilityHelper.load_plugins
17
+
18
+ module ViddlRb
19
+ class PluginError < StandardError; end
20
+ class DownloadError < StandardError; end
21
+
22
+ def self.io=(io_object)
23
+ PluginBase.io = io_object
24
+ end
25
+
26
+ #set the default PluginBase io object to a StringIO instance.
27
+ #this will suppress any standard output from the plugins.
28
+ self.io = StringIO.new
29
+
30
+ #returns an array of hashes containing the download url(s) and filenames(s)
31
+ #for the specified video url.
32
+ #if the url does not match any plugin, return nil and if a plugin
33
+ #throws an error, throw PluginError.
34
+ #the reason for returning an array is because some urls will give multiple
35
+ #download urls (for example a Youtube playlist url).
36
+ def self.get_urls_names(url)
37
+ plugin = PluginBase.registered_plugins.find { |p| p.matches_provider?(url) }
38
+
39
+ if plugin
40
+ begin
41
+ #we'll end up with an array of hashes with they keys :url and :name
42
+ urls_filenames = plugin.get_urls_and_filenames(url)
43
+ rescue PluginBase::CouldNotDownloadVideoError => e
44
+ raise_download_error(e)
45
+ rescue StandardError => e
46
+ raise_plugin_error(e, plugin)
47
+ end
48
+ follow_all_redirects(urls_filenames)
49
+ else
50
+ nil
51
+ end
52
+ end
53
+
54
+ #returns an array of download urls for the given video url.
55
+ def self.get_urls(url)
56
+ urls_filenames = get_urls_names(url)
57
+ urls_filenames.nil? ? nil : urls_filenames.map { |uf| uf[:url] }
58
+ end
59
+
60
+ #returns an array of filenames for the given video url.
61
+ def self.get_names(url)
62
+ urls_filenames = get_urls_names(url)
63
+ urls_filenames.nil? ? nil : urls_filenames.map { |uf| uf[:name] }
64
+ end
65
+
66
+ #same as get_urls_and_filenames but with the extensions only.
67
+ def self.get_urls_exts(url)
68
+ urls_filenames = get_urls_names(url)
69
+ urls_filenames.map do |uf|
70
+ ext = File.extname(uf[:name])
71
+ {:url => uf[:url], :ext => ext}
72
+ end
73
+ end
74
+
75
+ #<<< helper methods >>>
76
+
77
+ #the default error message when a plugin fails in some unexpected way.
78
+ def self.raise_plugin_error(e, plugin)
79
+ error = PluginError.new(e.message + " [Plugin: #{plugin.name}]")
80
+ error.set_backtrace(e.backtrace)
81
+ raise error
82
+ end
83
+ private_class_method :raise_plugin_error
84
+
85
+ #the default error message when a plugin fails to download a video for a known reason.
86
+ def self.raise_download_error(e)
87
+ error = DownloadError.new(e.message)
88
+ error.set_backtrace(e.backtrace)
89
+ raise error
90
+ end
91
+ private_class_method :raise_download_error
92
+
93
+ #takes a url-filenames array and returns a new array where the
94
+ #"location" header has been followed all the way to the end for all urls.
95
+ def self.follow_all_redirects(urls_filenames)
96
+ urls_filenames.map do |uf|
97
+ url = uf[:url]
98
+ final_location = get_final_location(url)
99
+ {:url => final_location, :name => uf[:name]}
100
+ end
101
+ end
102
+ private_class_method :follow_all_redirects
103
+
104
+ #recursively get the final location (after following all redirects) for an url.
105
+ def self.get_final_location(url)
106
+ Net::HTTP.get_response(URI(url)) do |res|
107
+ location = res["location"]
108
+ return url if location.nil?
109
+ return get_final_location(location)
110
+ end
111
+ end
112
+ private_class_method :get_final_location
113
+ end
@@ -1,3 +1,4 @@
1
+
1
2
  class Blip < PluginBase
2
3
  # this will be called by the main app to check whether this plugin is responsible for the url passed
3
4
  def self.matches_provider?(url)
@@ -5,12 +6,12 @@ class Blip < PluginBase
5
6
  end
6
7
 
7
8
  # return the url for original video file and title
8
- def self.get_urls_and_filenames(url)
9
+ def self.get_urls_and_filenames(url, options = {})
9
10
  id = self.to_id(url)
10
11
  xml_url = "http://blip.tv/rss/#{id}"
11
12
  doc = Nokogiri::XML(open(xml_url))
12
13
  user = doc.at("//channel/item/blip:user").inner_text
13
- title = doc.at("//channel/item/title").inner_text.gsub(" ", "_")
14
+ title = PluginBase.make_filename_safe(doc.at("//channel/item/title").inner_text)
14
15
  download_url = doc.at("//channel/item/media:group/media:content").attributes["url"].value
15
16
  extention = download_url.split(".").last
16
17
  file_name = "#{id}-#{user}-#{title}.#{extention}"
@@ -0,0 +1,44 @@
1
+
2
+ class Dailymotion < PluginBase
3
+
4
+ #the video quality is choosen based on the following priority list:
5
+ QUALITY_PRIORITY = %w[hd1080 hd720 hq sd ld]
6
+
7
+ # this will be called by the main app to check whether this plugin is responsible for the url passed
8
+ def self.matches_provider?(url)
9
+ url.include?("dailymotion.com")
10
+ end
11
+
12
+ # return the url for original video file and title
13
+ def self.get_urls_and_filenames(url, options = {})
14
+ doc = Nokogiri::HTML(open(url))
15
+
16
+ #check to see that the video is hosted on dailymotion.com - if not raise exception
17
+ unless doc.xpath("//div[@class='dmco_html dmpi_video_partnerplayer']").empty?
18
+ raise CouldNotDownloadVideoError,
19
+ "This video is not hosted on dailymotion's own content servers. It can't be downloaded."
20
+ end
21
+
22
+ title = doc.xpath("//meta[@property='og:title']").attribute("content").value
23
+ urls = get_download_urls(doc)
24
+ quality = QUALITY_PRIORITY.find { |q| urls[q] } #quality is the first quality from the priority list that exists for the video
25
+ down_url = urls[quality]
26
+ extension = down_url[/(\.[\w\d]+)\?/, 1]
27
+ file_name = PluginBase.make_filename_safe(title) + extension
28
+
29
+ [{:url => unescape_url(down_url), :name => file_name}]
30
+ end
31
+
32
+ #returns a hash with the different video qualities mapped to their respective download urls
33
+ def self.get_download_urls(doc)
34
+ flashvars = doc.xpath("//div[@class='dmco_html player_box']/script").text #the flash player script
35
+ decoded = CGI::unescape(flashvars)
36
+ url_array = decoded.scan(/(ld|sd|hq|hd720|hd1080)URL":"(.+?)"/).flatten #group 1 = the quality, group 2 = the url
37
+ Hash[*url_array] #hash like this: {"quality" => "url"}
38
+ end
39
+
40
+ #remove backslashes
41
+ def self.unescape_url(url)
42
+ url.gsub("\\", "")
43
+ end
44
+ end