opensubtitles 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +4 -0
  3. data/.rspec +1 -0
  4. data/.travis.yml +10 -0
  5. data/Gemfile +3 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +50 -0
  8. data/Rakefile +9 -0
  9. data/bin/getsub +157 -0
  10. data/lib/opensubtitles.rb +21 -0
  11. data/lib/opensubtitles/finder.rb +8 -0
  12. data/lib/opensubtitles/finder/first.rb +13 -0
  13. data/lib/opensubtitles/finder/interactive.rb +21 -0
  14. data/lib/opensubtitles/finder/score.rb +13 -0
  15. data/lib/opensubtitles/language.rb +84 -0
  16. data/lib/opensubtitles/movie.rb +16 -0
  17. data/lib/opensubtitles/movie_file.rb +67 -0
  18. data/lib/opensubtitles/search.rb +9 -0
  19. data/lib/opensubtitles/search/imdb.rb +27 -0
  20. data/lib/opensubtitles/search/movie_hash.rb +21 -0
  21. data/lib/opensubtitles/search/name.rb +28 -0
  22. data/lib/opensubtitles/search/path.rb +20 -0
  23. data/lib/opensubtitles/selector.rb +7 -0
  24. data/lib/opensubtitles/selector/format.rb +17 -0
  25. data/lib/opensubtitles/selector/movie.rb +29 -0
  26. data/lib/opensubtitles/server.rb +70 -0
  27. data/lib/opensubtitles/sub.rb +55 -0
  28. data/lib/opensubtitles/subtitle_finder.rb +30 -0
  29. data/lib/opensubtitles/version.rb +3 -0
  30. data/lib/opensubtitles/xmlrpc_monkey_patch.rb +88 -0
  31. data/opensubtitles.gemspec +26 -0
  32. data/spec/fixtures/http/check_movie_hash.yml +225 -0
  33. data/spec/fixtures/http/get_imdb_movie_details.yml +342 -0
  34. data/spec/fixtures/http/log_in.yml +86 -0
  35. data/spec/fixtures/http/log_out.yml +68 -0
  36. data/spec/fixtures/http/search_imdb.yml +761 -0
  37. data/spec/fixtures/http/search_subtitles_for_himym.yml +1189 -0
  38. data/spec/fixtures/http/search_subtitles_for_the_rock.yml +4124 -0
  39. data/spec/fixtures/http/server_info.yml +492 -0
  40. data/spec/fixtures/somemovie.avi +0 -0
  41. data/spec/opensubtitles/language_spec.rb +57 -0
  42. data/spec/opensubtitles/movie_file_spec.rb +18 -0
  43. data/spec/opensubtitles/server_spec.rb +123 -0
  44. data/spec/opensubtitles/sub_spec.rb +33 -0
  45. data/spec/spec_helper.rb +15 -0
  46. metadata +159 -0
@@ -0,0 +1,67 @@
1
+ module Opensubtitles
2
+ class MovieFile
3
+
4
+ EXTENSIONS = %w(avi mpg m4v mkv mov ogv mp4)
5
+
6
+ attr_reader :path, :language
7
+
8
+ def initialize(path, language = Opensubtitles.default_language)
9
+ @path = path
10
+ @language = language
11
+ end
12
+
13
+ def has_sub?
14
+ exist = false
15
+ %w(srt sub).each{ |ext| exist ||= File.exist?(sub_path(ext)) }
16
+ exist
17
+ end
18
+
19
+ def sub_path(format)
20
+ extension = if @language
21
+ ".#{@language}.#{format}"
22
+ else
23
+ ".#{format}"
24
+ end
25
+ File.join(File.dirname(path), File.basename(path, File.extname(path)) + extension)
26
+ end
27
+
28
+ def hash
29
+ @hash ||= self.class.compute_hash(path)
30
+ end
31
+
32
+ def size
33
+ @size ||= File.size(path)
34
+ end
35
+
36
+ def name
37
+ @name ||= File.basename(path, File.extname(path))
38
+ end
39
+
40
+ CHUNK_SIZE = 64 * 1024 # in bytes
41
+
42
+ # from http://trac.opensubtitles.org/projects/opensubtitles/wiki/HashSourceCodes
43
+ def self.compute_hash(path)
44
+ filesize = File.size(path)
45
+ hash = filesize
46
+
47
+ # Read 64 kbytes, divide up into 64 bits and add each
48
+ # to hash. Do for beginning and end of file.
49
+ File.open(path, 'rb') do |f|
50
+ # Q = unsigned long long = 64 bit
51
+ f.read(CHUNK_SIZE).unpack("Q*").each do |n|
52
+ hash = hash + n & 0xffffffffffffffff # to remain as 64 bit number
53
+ end
54
+
55
+ f.seek([0, filesize - CHUNK_SIZE].max, IO::SEEK_SET)
56
+
57
+ # And again for the end of the file
58
+ f.read(CHUNK_SIZE).unpack("Q*").each do |n|
59
+ hash = hash + n & 0xffffffffffffffff
60
+ end
61
+ end
62
+
63
+ sprintf("%016x", hash)
64
+ end
65
+
66
+ end
67
+ end
@@ -0,0 +1,9 @@
1
+ module Opensubtitles
2
+ module Search
3
+ base_path = File.expand_path(File.dirname(__FILE__) + '/search')
4
+ autoload :IMDB, "#{base_path}/imdb"
5
+ autoload :MovieHash, "#{base_path}/movie_hash"
6
+ autoload :Name, "#{base_path}/name"
7
+ autoload :Path, "#{base_path}/path"
8
+ end
9
+ end
@@ -0,0 +1,27 @@
1
+ module Opensubtitles
2
+ module Search
3
+
4
+ class IMDB
5
+
6
+ def initialize(server, selector=Finder::First.new)
7
+ @server = server
8
+ @selector = selector
9
+ end
10
+
11
+ def search_subs_for(movie, language)
12
+ imdb_results = @server.search_imdb(:query => movie.name)
13
+ return if imdb_results.size == 0
14
+ return if imdb_results.class != Hash
15
+ return if imdb_results[:imdbid] == nil
16
+ if imdb_result = @selector.chose(imdb_results)
17
+ @server.search_subtitles(
18
+ :sublanguageid => language,
19
+ :imdbid => imdb_result.imdbid
20
+ )
21
+ end
22
+ end
23
+
24
+ end
25
+
26
+ end
27
+ end
@@ -0,0 +1,21 @@
1
+ module Opensubtitles
2
+ module Search
3
+
4
+ class MovieHash
5
+
6
+ def initialize(server)
7
+ @server = server
8
+ end
9
+
10
+ def search_subs_for(movie, language)
11
+ @server.search_subtitles(
12
+ :moviehash => movie.hash,
13
+ :moviebytesize => movie.size.to_s,
14
+ :sublanguageid => language
15
+ )
16
+ end
17
+
18
+ end
19
+
20
+ end
21
+ end
@@ -0,0 +1,28 @@
1
+ module Opensubtitles
2
+ module Search
3
+
4
+ class Name
5
+
6
+ def initialize(server)
7
+ @server = server
8
+ end
9
+
10
+ def search_subs_for(movie, language)
11
+ subs = @server.search_subtitles(:sublanguageid => language, :query => movie.name)
12
+ normalized_movie_name = normalize_name(movie.name)
13
+ subs.select! do |sub|
14
+ normalize_name(sub.filename).index(normalized_movie_name) # MAYBE: Levenshtein ?
15
+ end
16
+ subs
17
+ end
18
+
19
+ protected
20
+
21
+ def normalize_name(name)
22
+ name.downcase.gsub(/[\s\.\-\_]+/, ' ')
23
+ end
24
+
25
+ end
26
+
27
+ end
28
+ end
@@ -0,0 +1,20 @@
1
+ module Opensubtitles
2
+ module Search
3
+
4
+ class Path
5
+
6
+ def initialize(server)
7
+ @server = server
8
+ end
9
+
10
+ def search_subs_for(movie, language)
11
+ @server.search_subtitles(
12
+ :sublanguageid => language,
13
+ :tag => movie.path
14
+ )
15
+ end
16
+
17
+ end
18
+
19
+ end
20
+ end
@@ -0,0 +1,7 @@
1
+ module Opensubtitles
2
+ module Selector
3
+ base_path = File.expand_path(File.dirname(__FILE__) + '/selector')
4
+ autoload :Format, "#{base_path}/format"
5
+ autoload :Movie, "#{base_path}/movie"
6
+ end
7
+ end
@@ -0,0 +1,17 @@
1
+ module Opensubtitles
2
+ module Selector
3
+
4
+ class Format
5
+
6
+ def initialize(formats)
7
+ @formats = formats
8
+ end
9
+
10
+ def select(subs, movie)
11
+ subs.select{ |s| @formats.include?(s.format) }
12
+ end
13
+
14
+ end
15
+
16
+ end
17
+ end
@@ -0,0 +1,29 @@
1
+ module Opensubtitles
2
+ module Selector
3
+
4
+ class Movie
5
+
6
+ def initialize(movie_finder=Finder::First.new)
7
+ @movie_finder = movie_finder
8
+ end
9
+
10
+ def select(subs, movie)
11
+ subs_by_movie = group_by_movie_name(subs)
12
+ return subs if subs_by_movie.length <= 1
13
+ movie_names = subs_by_movie.keys
14
+ movie_name = @movie_finder.chose(movie_names)
15
+ subs_by_movie[movie_name] || []
16
+ end
17
+
18
+ def group_by_movie_name(subs)
19
+ subs.inject({}) do |hash, sub|
20
+ hash[sub.movie_name] ||= []
21
+ hash[sub.movie_name] << sub
22
+ hash
23
+ end
24
+ end
25
+
26
+ end
27
+
28
+ end
29
+ end
@@ -0,0 +1,70 @@
1
+ require 'ostruct'
2
+
3
+ module Opensubtitles
4
+
5
+ class LoginFailed < ::Exception
6
+ end
7
+
8
+ class Server
9
+
10
+ attr_reader :username, :password, :language, :useragent, :client
11
+
12
+ CLIENT_ARGS = [:host, :path, :port, :proxy_host, :proxy_port, :http_user, :http_password, :use_ssl, :timeout]
13
+
14
+ DEFAULT_OPTIONS = {
15
+ :host => 'api.opensubtitles.org',
16
+ :path => '/xml-rpc',
17
+ :timeout => 10
18
+ }.freeze
19
+
20
+ def initialize(options={})
21
+ @username = options[:username] || ''
22
+ @password = options[:password] || ''
23
+ @language = options[:language] || 'eng'
24
+ @useragent = options[:useragent] || 'opensubtitles v0.1'
25
+ options = DEFAULT_OPTIONS.merge(options)
26
+ @client = ::XMLRPC::Client.new(*options.values_at(*CLIENT_ARGS))
27
+ end
28
+
29
+ def token
30
+ @token ||= login
31
+ end
32
+
33
+ def login
34
+ response = client.call('LogIn', username, password, language, useragent)
35
+ if response['status'] != '200 OK'
36
+ raise LoginFailed.new("Failed to login with #{username} : #{password}. Server return code: #{response['status']}")
37
+ end
38
+ response['token']
39
+ end
40
+
41
+ def logout
42
+ client.call('LogOut', token)
43
+ @token = nil
44
+ end
45
+
46
+ def check_movie_hash(*hashes)
47
+ client.call('CheckMovieHash', token, hashes)
48
+ end
49
+
50
+ def search_subtitles(*queries)
51
+ subs = client.call('SearchSubtitles', token, queries)['data']
52
+ subs ? subs.map{ |s| Sub.new(s) } : []
53
+ end
54
+
55
+ def search_imdb(options={})
56
+ query = options.delete(:query)
57
+ imdb = client.call('SearchMoviesOnIMDB', token, query)['data']
58
+ imdb.size > 0 ? imdb.map{ |i| OpenStruct.new(:imdbid => i['id'], :title => i['title']) } : []
59
+ end
60
+
61
+ def info
62
+ client.call('ServerInfo')
63
+ end
64
+
65
+ def get_imdb_movie_details(id)
66
+ Movie.new(client.call('GetIMDBMovieDetails', token, id)['data'])
67
+ end
68
+
69
+ end
70
+ end
@@ -0,0 +1,55 @@
1
+ require 'uri'
2
+ require 'net/http'
3
+ require 'zlib'
4
+ require 'stringio'
5
+
6
+ module Opensubtitles
7
+
8
+ class Sub
9
+
10
+ attr_reader :url, :format, :language, :rating, :user_ranks, :movie_name,
11
+ :filename, :raw_data, :downloads_count, :bad_reports_count
12
+
13
+ def initialize(data)
14
+ @url = URI.parse(data['SubDownloadLink'])
15
+ @format = data['SubFormat']
16
+ @language = Language.from_iso639_2b(data['SubLanguageID'])
17
+ @rating = data['SubRating'].to_f
18
+ @user_ranks = data['UserRank']
19
+ @movie_name = data['MovieName']
20
+ @filename = data['SubFileName']
21
+ @downloads_count = data['SubDownloadsCnt'].to_i
22
+ @bad_reports_count = data['SubBad'].to_i
23
+ @raw_data = data
24
+ end
25
+
26
+ def <=>(other)
27
+ rating <=> other.rating
28
+ end
29
+
30
+ # Totaly subjective formula to evaluate subtitle quality
31
+ # Originaly developed by runa (https://github.com/runa)
32
+ # https://github.com/byroot/opensubtitles/commit/9d71775#L0R122
33
+ def score
34
+ uploader_score * downloads_count.next * (rating + 1) - bad_reports_count / downloads_count.next
35
+ end
36
+
37
+ def uploader_score
38
+ user_ranks.empty? ? 1 : 2
39
+ end
40
+
41
+ def body
42
+ @body ||= fetch_body
43
+ end
44
+
45
+ def fetch_body
46
+ StringIO.open do |buffer|
47
+ buffer.write(Net::HTTP.get(url))
48
+ buffer.rewind
49
+ return Zlib::GzipReader.new(buffer).read
50
+ end
51
+ end
52
+
53
+ end
54
+
55
+ end
@@ -0,0 +1,30 @@
1
+ module Opensubtitles
2
+
3
+ class SubtitleFinder
4
+
5
+ def initialize(search_engines, finders, selectors=[])
6
+ @search_engines = search_engines
7
+ @finders = finders
8
+ @selectors = selectors
9
+ end
10
+
11
+ def find_sub_for(movie, language)
12
+ @search_engines.each do |engine|
13
+ subs = engine.search_subs_for(movie, language)
14
+ unless subs.nil?
15
+ subs = @selectors.inject(subs) do |subs, selector|
16
+ selector.select(subs, movie)
17
+ end
18
+ @finders.each do |finder|
19
+ sub = finder.chose(subs)
20
+ return sub if sub
21
+ end
22
+ end
23
+ end
24
+
25
+ nil
26
+ end
27
+
28
+ end
29
+
30
+ end
@@ -0,0 +1,3 @@
1
+ module Opensubtitles
2
+ VERSION = '0.0.1'
3
+ end
@@ -0,0 +1,88 @@
1
+ # OpenSubtitle.org return invalid content-length that mek the stdlib xmlrpc client raise
2
+ # This is a dirty monkey patch to workaround this. :'(
3
+
4
+ module XMLRPC
5
+ class Client
6
+ def do_rpc(request, async=false)
7
+ header = {
8
+ "User-Agent" => USER_AGENT,
9
+ "Content-Type" => "text/xml; charset=utf-8",
10
+ "Content-Length" => request.bytesize.to_s,
11
+ "Connection" => (async ? "close" : "keep-alive")
12
+ }
13
+
14
+ header["Cookie"] = @cookie if @cookie
15
+ header.update(@http_header_extra) if @http_header_extra
16
+
17
+ if @auth != nil
18
+ # add authorization header
19
+ header["Authorization"] = @auth
20
+ end
21
+
22
+ resp = nil
23
+ @http_last_response = nil
24
+
25
+ if async
26
+ # use a new HTTP object for each call
27
+ http = net_http(@host, @port, @proxy_host, @proxy_port)
28
+ http.use_ssl = @use_ssl if @use_ssl
29
+ http.read_timeout = @timeout
30
+ http.open_timeout = @timeout
31
+
32
+ # post request
33
+ http.start {
34
+ resp = http.request_post(@path, request, header)
35
+ }
36
+ else
37
+ # reuse the HTTP object for each call => connection alive is possible
38
+ # we must start connection explicitely first time so that http.request
39
+ # does not assume that we don't want keepalive
40
+ @http.start if not @http.started?
41
+
42
+ # post request
43
+ resp = @http.request_post(@path, request, header)
44
+ end
45
+
46
+ @http_last_response = resp
47
+
48
+ data = resp.body
49
+
50
+ if resp.code == "401"
51
+ # Authorization Required
52
+ raise "Authorization failed.\nHTTP-Error: #{resp.code} #{resp.message}"
53
+ elsif resp.code[0,1] != "2"
54
+ raise "HTTP-Error: #{resp.code} #{resp.message}"
55
+ end
56
+
57
+ # assume text/xml on instances where Content-Type header is not set
58
+ ct_expected = resp["Content-Type"] || 'text/xml'
59
+ ct = parse_content_type(ct_expected).first
60
+ if ct != "text/xml"
61
+ if ct == "text/html"
62
+ raise "Wrong content-type (received '#{ct}' but expected 'text/xml'): \n#{data}"
63
+ else
64
+ raise "Wrong content-type (received '#{ct}' but expected 'text/xml')"
65
+ end
66
+ end
67
+
68
+ expected = resp["Content-Length"] || "<unknown>"
69
+ if data.nil? or data.bytesize == 0
70
+ raise "Wrong size. Was #{data.bytesize}, should be #{expected}"
71
+ elsif expected != "<unknown>" and expected.to_i != data.bytesize and resp["Transfer-Encoding"].nil?
72
+ # HACK: here is the monkey patched line
73
+ # raise "Wrong size. Was #{data.bytesize}, should be #{expected}"
74
+ end
75
+
76
+ set_cookies = resp.get_fields("Set-Cookie")
77
+ if set_cookies and !set_cookies.empty?
78
+ require 'webrick/cookie'
79
+ @cookie = set_cookies.collect do |set_cookie|
80
+ cookie = WEBrick::Cookie.parse_set_cookie(set_cookie)
81
+ WEBrick::Cookie.new(cookie.name, cookie.value).to_s
82
+ end.join("; ")
83
+ end
84
+
85
+ return data
86
+ end
87
+ end
88
+ end