opensubtitles 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (46) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +4 -0
  3. data/.rspec +1 -0
  4. data/.travis.yml +10 -0
  5. data/Gemfile +3 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +50 -0
  8. data/Rakefile +9 -0
  9. data/bin/getsub +157 -0
  10. data/lib/opensubtitles.rb +21 -0
  11. data/lib/opensubtitles/finder.rb +8 -0
  12. data/lib/opensubtitles/finder/first.rb +13 -0
  13. data/lib/opensubtitles/finder/interactive.rb +21 -0
  14. data/lib/opensubtitles/finder/score.rb +13 -0
  15. data/lib/opensubtitles/language.rb +84 -0
  16. data/lib/opensubtitles/movie.rb +16 -0
  17. data/lib/opensubtitles/movie_file.rb +67 -0
  18. data/lib/opensubtitles/search.rb +9 -0
  19. data/lib/opensubtitles/search/imdb.rb +27 -0
  20. data/lib/opensubtitles/search/movie_hash.rb +21 -0
  21. data/lib/opensubtitles/search/name.rb +28 -0
  22. data/lib/opensubtitles/search/path.rb +20 -0
  23. data/lib/opensubtitles/selector.rb +7 -0
  24. data/lib/opensubtitles/selector/format.rb +17 -0
  25. data/lib/opensubtitles/selector/movie.rb +29 -0
  26. data/lib/opensubtitles/server.rb +70 -0
  27. data/lib/opensubtitles/sub.rb +55 -0
  28. data/lib/opensubtitles/subtitle_finder.rb +30 -0
  29. data/lib/opensubtitles/version.rb +3 -0
  30. data/lib/opensubtitles/xmlrpc_monkey_patch.rb +88 -0
  31. data/opensubtitles.gemspec +26 -0
  32. data/spec/fixtures/http/check_movie_hash.yml +225 -0
  33. data/spec/fixtures/http/get_imdb_movie_details.yml +342 -0
  34. data/spec/fixtures/http/log_in.yml +86 -0
  35. data/spec/fixtures/http/log_out.yml +68 -0
  36. data/spec/fixtures/http/search_imdb.yml +761 -0
  37. data/spec/fixtures/http/search_subtitles_for_himym.yml +1189 -0
  38. data/spec/fixtures/http/search_subtitles_for_the_rock.yml +4124 -0
  39. data/spec/fixtures/http/server_info.yml +492 -0
  40. data/spec/fixtures/somemovie.avi +0 -0
  41. data/spec/opensubtitles/language_spec.rb +57 -0
  42. data/spec/opensubtitles/movie_file_spec.rb +18 -0
  43. data/spec/opensubtitles/server_spec.rb +123 -0
  44. data/spec/opensubtitles/sub_spec.rb +33 -0
  45. data/spec/spec_helper.rb +15 -0
  46. metadata +159 -0
@@ -0,0 +1,67 @@
1
+ module Opensubtitles
2
+ class MovieFile
3
+
4
+ EXTENSIONS = %w(avi mpg m4v mkv mov ogv mp4)
5
+
6
+ attr_reader :path, :language
7
+
8
+ def initialize(path, language = Opensubtitles.default_language)
9
+ @path = path
10
+ @language = language
11
+ end
12
+
13
+ def has_sub?
14
+ exist = false
15
+ %w(srt sub).each{ |ext| exist ||= File.exist?(sub_path(ext)) }
16
+ exist
17
+ end
18
+
19
+ def sub_path(format)
20
+ extension = if @language
21
+ ".#{@language}.#{format}"
22
+ else
23
+ ".#{format}"
24
+ end
25
+ File.join(File.dirname(path), File.basename(path, File.extname(path)) + extension)
26
+ end
27
+
28
+ def hash
29
+ @hash ||= self.class.compute_hash(path)
30
+ end
31
+
32
+ def size
33
+ @size ||= File.size(path)
34
+ end
35
+
36
+ def name
37
+ @name ||= File.basename(path, File.extname(path))
38
+ end
39
+
40
+ CHUNK_SIZE = 64 * 1024 # in bytes
41
+
42
+ # from http://trac.opensubtitles.org/projects/opensubtitles/wiki/HashSourceCodes
43
+ def self.compute_hash(path)
44
+ filesize = File.size(path)
45
+ hash = filesize
46
+
47
+ # Read 64 kbytes, divide up into 64 bits and add each
48
+ # to hash. Do for beginning and end of file.
49
+ File.open(path, 'rb') do |f|
50
+ # Q = unsigned long long = 64 bit
51
+ f.read(CHUNK_SIZE).unpack("Q*").each do |n|
52
+ hash = hash + n & 0xffffffffffffffff # to remain as 64 bit number
53
+ end
54
+
55
+ f.seek([0, filesize - CHUNK_SIZE].max, IO::SEEK_SET)
56
+
57
+ # And again for the end of the file
58
+ f.read(CHUNK_SIZE).unpack("Q*").each do |n|
59
+ hash = hash + n & 0xffffffffffffffff
60
+ end
61
+ end
62
+
63
+ sprintf("%016x", hash)
64
+ end
65
+
66
+ end
67
+ end
@@ -0,0 +1,9 @@
1
+ module Opensubtitles
2
+ module Search
3
+ base_path = File.expand_path(File.dirname(__FILE__) + '/search')
4
+ autoload :IMDB, "#{base_path}/imdb"
5
+ autoload :MovieHash, "#{base_path}/movie_hash"
6
+ autoload :Name, "#{base_path}/name"
7
+ autoload :Path, "#{base_path}/path"
8
+ end
9
+ end
@@ -0,0 +1,27 @@
1
+ module Opensubtitles
2
+ module Search
3
+
4
+ class IMDB
5
+
6
+ def initialize(server, selector=Finder::First.new)
7
+ @server = server
8
+ @selector = selector
9
+ end
10
+
11
+ def search_subs_for(movie, language)
12
+ imdb_results = @server.search_imdb(:query => movie.name)
13
+ return if imdb_results.size == 0
14
+ return if imdb_results.class != Hash
15
+ return if imdb_results[:imdbid] == nil
16
+ if imdb_result = @selector.chose(imdb_results)
17
+ @server.search_subtitles(
18
+ :sublanguageid => language,
19
+ :imdbid => imdb_result.imdbid
20
+ )
21
+ end
22
+ end
23
+
24
+ end
25
+
26
+ end
27
+ end
@@ -0,0 +1,21 @@
1
+ module Opensubtitles
2
+ module Search
3
+
4
+ class MovieHash
5
+
6
+ def initialize(server)
7
+ @server = server
8
+ end
9
+
10
+ def search_subs_for(movie, language)
11
+ @server.search_subtitles(
12
+ :moviehash => movie.hash,
13
+ :moviebytesize => movie.size.to_s,
14
+ :sublanguageid => language
15
+ )
16
+ end
17
+
18
+ end
19
+
20
+ end
21
+ end
@@ -0,0 +1,28 @@
1
+ module Opensubtitles
2
+ module Search
3
+
4
+ class Name
5
+
6
+ def initialize(server)
7
+ @server = server
8
+ end
9
+
10
+ def search_subs_for(movie, language)
11
+ subs = @server.search_subtitles(:sublanguageid => language, :query => movie.name)
12
+ normalized_movie_name = normalize_name(movie.name)
13
+ subs.select! do |sub|
14
+ normalize_name(sub.filename).index(normalized_movie_name) # MAYBE: Levenshtein ?
15
+ end
16
+ subs
17
+ end
18
+
19
+ protected
20
+
21
+ def normalize_name(name)
22
+ name.downcase.gsub(/[\s\.\-\_]+/, ' ')
23
+ end
24
+
25
+ end
26
+
27
+ end
28
+ end
@@ -0,0 +1,20 @@
1
+ module Opensubtitles
2
+ module Search
3
+
4
+ class Path
5
+
6
+ def initialize(server)
7
+ @server = server
8
+ end
9
+
10
+ def search_subs_for(movie, language)
11
+ @server.search_subtitles(
12
+ :sublanguageid => language,
13
+ :tag => movie.path
14
+ )
15
+ end
16
+
17
+ end
18
+
19
+ end
20
+ end
@@ -0,0 +1,7 @@
1
+ module Opensubtitles
2
+ module Selector
3
+ base_path = File.expand_path(File.dirname(__FILE__) + '/selector')
4
+ autoload :Format, "#{base_path}/format"
5
+ autoload :Movie, "#{base_path}/movie"
6
+ end
7
+ end
@@ -0,0 +1,17 @@
1
+ module Opensubtitles
2
+ module Selector
3
+
4
+ class Format
5
+
6
+ def initialize(formats)
7
+ @formats = formats
8
+ end
9
+
10
+ def select(subs, movie)
11
+ subs.select{ |s| @formats.include?(s.format) }
12
+ end
13
+
14
+ end
15
+
16
+ end
17
+ end
@@ -0,0 +1,29 @@
1
+ module Opensubtitles
2
+ module Selector
3
+
4
+ class Movie
5
+
6
+ def initialize(movie_finder=Finder::First.new)
7
+ @movie_finder = movie_finder
8
+ end
9
+
10
+ def select(subs, movie)
11
+ subs_by_movie = group_by_movie_name(subs)
12
+ return subs if subs_by_movie.length <= 1
13
+ movie_names = subs_by_movie.keys
14
+ movie_name = @movie_finder.chose(movie_names)
15
+ subs_by_movie[movie_name] || []
16
+ end
17
+
18
+ def group_by_movie_name(subs)
19
+ subs.inject({}) do |hash, sub|
20
+ hash[sub.movie_name] ||= []
21
+ hash[sub.movie_name] << sub
22
+ hash
23
+ end
24
+ end
25
+
26
+ end
27
+
28
+ end
29
+ end
@@ -0,0 +1,70 @@
1
+ require 'ostruct'
2
+
3
+ module Opensubtitles
4
+
5
+ class LoginFailed < ::Exception
6
+ end
7
+
8
+ class Server
9
+
10
+ attr_reader :username, :password, :language, :useragent, :client
11
+
12
+ CLIENT_ARGS = [:host, :path, :port, :proxy_host, :proxy_port, :http_user, :http_password, :use_ssl, :timeout]
13
+
14
+ DEFAULT_OPTIONS = {
15
+ :host => 'api.opensubtitles.org',
16
+ :path => '/xml-rpc',
17
+ :timeout => 10
18
+ }.freeze
19
+
20
+ def initialize(options={})
21
+ @username = options[:username] || ''
22
+ @password = options[:password] || ''
23
+ @language = options[:language] || 'eng'
24
+ @useragent = options[:useragent] || 'opensubtitles v0.1'
25
+ options = DEFAULT_OPTIONS.merge(options)
26
+ @client = ::XMLRPC::Client.new(*options.values_at(*CLIENT_ARGS))
27
+ end
28
+
29
+ def token
30
+ @token ||= login
31
+ end
32
+
33
+ def login
34
+ response = client.call('LogIn', username, password, language, useragent)
35
+ if response['status'] != '200 OK'
36
+ raise LoginFailed.new("Failed to login with #{username} : #{password}. Server return code: #{response['status']}")
37
+ end
38
+ response['token']
39
+ end
40
+
41
+ def logout
42
+ client.call('LogOut', token)
43
+ @token = nil
44
+ end
45
+
46
+ def check_movie_hash(*hashes)
47
+ client.call('CheckMovieHash', token, hashes)
48
+ end
49
+
50
+ def search_subtitles(*queries)
51
+ subs = client.call('SearchSubtitles', token, queries)['data']
52
+ subs ? subs.map{ |s| Sub.new(s) } : []
53
+ end
54
+
55
+ def search_imdb(options={})
56
+ query = options.delete(:query)
57
+ imdb = client.call('SearchMoviesOnIMDB', token, query)['data']
58
+ imdb.size > 0 ? imdb.map{ |i| OpenStruct.new(:imdbid => i['id'], :title => i['title']) } : []
59
+ end
60
+
61
+ def info
62
+ client.call('ServerInfo')
63
+ end
64
+
65
+ def get_imdb_movie_details(id)
66
+ Movie.new(client.call('GetIMDBMovieDetails', token, id)['data'])
67
+ end
68
+
69
+ end
70
+ end
@@ -0,0 +1,55 @@
1
+ require 'uri'
2
+ require 'net/http'
3
+ require 'zlib'
4
+ require 'stringio'
5
+
6
+ module Opensubtitles
7
+
8
+ class Sub
9
+
10
+ attr_reader :url, :format, :language, :rating, :user_ranks, :movie_name,
11
+ :filename, :raw_data, :downloads_count, :bad_reports_count
12
+
13
+ def initialize(data)
14
+ @url = URI.parse(data['SubDownloadLink'])
15
+ @format = data['SubFormat']
16
+ @language = Language.from_iso639_2b(data['SubLanguageID'])
17
+ @rating = data['SubRating'].to_f
18
+ @user_ranks = data['UserRank']
19
+ @movie_name = data['MovieName']
20
+ @filename = data['SubFileName']
21
+ @downloads_count = data['SubDownloadsCnt'].to_i
22
+ @bad_reports_count = data['SubBad'].to_i
23
+ @raw_data = data
24
+ end
25
+
26
+ def <=>(other)
27
+ rating <=> other.rating
28
+ end
29
+
30
+ # Totaly subjective formula to evaluate subtitle quality
31
+ # Originaly developed by runa (https://github.com/runa)
32
+ # https://github.com/byroot/opensubtitles/commit/9d71775#L0R122
33
+ def score
34
+ uploader_score * downloads_count.next * (rating + 1) - bad_reports_count / downloads_count.next
35
+ end
36
+
37
+ def uploader_score
38
+ user_ranks.empty? ? 1 : 2
39
+ end
40
+
41
+ def body
42
+ @body ||= fetch_body
43
+ end
44
+
45
+ def fetch_body
46
+ StringIO.open do |buffer|
47
+ buffer.write(Net::HTTP.get(url))
48
+ buffer.rewind
49
+ return Zlib::GzipReader.new(buffer).read
50
+ end
51
+ end
52
+
53
+ end
54
+
55
+ end
@@ -0,0 +1,30 @@
1
+ module Opensubtitles
2
+
3
+ class SubtitleFinder
4
+
5
+ def initialize(search_engines, finders, selectors=[])
6
+ @search_engines = search_engines
7
+ @finders = finders
8
+ @selectors = selectors
9
+ end
10
+
11
+ def find_sub_for(movie, language)
12
+ @search_engines.each do |engine|
13
+ subs = engine.search_subs_for(movie, language)
14
+ unless subs.nil?
15
+ subs = @selectors.inject(subs) do |subs, selector|
16
+ selector.select(subs, movie)
17
+ end
18
+ @finders.each do |finder|
19
+ sub = finder.chose(subs)
20
+ return sub if sub
21
+ end
22
+ end
23
+ end
24
+
25
+ nil
26
+ end
27
+
28
+ end
29
+
30
+ end
@@ -0,0 +1,3 @@
1
+ module Opensubtitles
2
+ VERSION = '0.0.1'
3
+ end
@@ -0,0 +1,88 @@
1
+ # OpenSubtitle.org return invalid content-length that mek the stdlib xmlrpc client raise
2
+ # This is a dirty monkey patch to workaround this. :'(
3
+
4
+ module XMLRPC
5
+ class Client
6
+ def do_rpc(request, async=false)
7
+ header = {
8
+ "User-Agent" => USER_AGENT,
9
+ "Content-Type" => "text/xml; charset=utf-8",
10
+ "Content-Length" => request.bytesize.to_s,
11
+ "Connection" => (async ? "close" : "keep-alive")
12
+ }
13
+
14
+ header["Cookie"] = @cookie if @cookie
15
+ header.update(@http_header_extra) if @http_header_extra
16
+
17
+ if @auth != nil
18
+ # add authorization header
19
+ header["Authorization"] = @auth
20
+ end
21
+
22
+ resp = nil
23
+ @http_last_response = nil
24
+
25
+ if async
26
+ # use a new HTTP object for each call
27
+ http = net_http(@host, @port, @proxy_host, @proxy_port)
28
+ http.use_ssl = @use_ssl if @use_ssl
29
+ http.read_timeout = @timeout
30
+ http.open_timeout = @timeout
31
+
32
+ # post request
33
+ http.start {
34
+ resp = http.request_post(@path, request, header)
35
+ }
36
+ else
37
+ # reuse the HTTP object for each call => connection alive is possible
38
+ # we must start connection explicitely first time so that http.request
39
+ # does not assume that we don't want keepalive
40
+ @http.start if not @http.started?
41
+
42
+ # post request
43
+ resp = @http.request_post(@path, request, header)
44
+ end
45
+
46
+ @http_last_response = resp
47
+
48
+ data = resp.body
49
+
50
+ if resp.code == "401"
51
+ # Authorization Required
52
+ raise "Authorization failed.\nHTTP-Error: #{resp.code} #{resp.message}"
53
+ elsif resp.code[0,1] != "2"
54
+ raise "HTTP-Error: #{resp.code} #{resp.message}"
55
+ end
56
+
57
+ # assume text/xml on instances where Content-Type header is not set
58
+ ct_expected = resp["Content-Type"] || 'text/xml'
59
+ ct = parse_content_type(ct_expected).first
60
+ if ct != "text/xml"
61
+ if ct == "text/html"
62
+ raise "Wrong content-type (received '#{ct}' but expected 'text/xml'): \n#{data}"
63
+ else
64
+ raise "Wrong content-type (received '#{ct}' but expected 'text/xml')"
65
+ end
66
+ end
67
+
68
+ expected = resp["Content-Length"] || "<unknown>"
69
+ if data.nil? or data.bytesize == 0
70
+ raise "Wrong size. Was #{data.bytesize}, should be #{expected}"
71
+ elsif expected != "<unknown>" and expected.to_i != data.bytesize and resp["Transfer-Encoding"].nil?
72
+ # HACK: here is the monkey patched line
73
+ # raise "Wrong size. Was #{data.bytesize}, should be #{expected}"
74
+ end
75
+
76
+ set_cookies = resp.get_fields("Set-Cookie")
77
+ if set_cookies and !set_cookies.empty?
78
+ require 'webrick/cookie'
79
+ @cookie = set_cookies.collect do |set_cookie|
80
+ cookie = WEBrick::Cookie.parse_set_cookie(set_cookie)
81
+ WEBrick::Cookie.new(cookie.name, cookie.value).to_s
82
+ end.join("; ")
83
+ end
84
+
85
+ return data
86
+ end
87
+ end
88
+ end