opensubtitles 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +4 -0
- data/.rspec +1 -0
- data/.travis.yml +10 -0
- data/Gemfile +3 -0
- data/LICENSE.txt +22 -0
- data/README.md +50 -0
- data/Rakefile +9 -0
- data/bin/getsub +157 -0
- data/lib/opensubtitles.rb +21 -0
- data/lib/opensubtitles/finder.rb +8 -0
- data/lib/opensubtitles/finder/first.rb +13 -0
- data/lib/opensubtitles/finder/interactive.rb +21 -0
- data/lib/opensubtitles/finder/score.rb +13 -0
- data/lib/opensubtitles/language.rb +84 -0
- data/lib/opensubtitles/movie.rb +16 -0
- data/lib/opensubtitles/movie_file.rb +67 -0
- data/lib/opensubtitles/search.rb +9 -0
- data/lib/opensubtitles/search/imdb.rb +27 -0
- data/lib/opensubtitles/search/movie_hash.rb +21 -0
- data/lib/opensubtitles/search/name.rb +28 -0
- data/lib/opensubtitles/search/path.rb +20 -0
- data/lib/opensubtitles/selector.rb +7 -0
- data/lib/opensubtitles/selector/format.rb +17 -0
- data/lib/opensubtitles/selector/movie.rb +29 -0
- data/lib/opensubtitles/server.rb +70 -0
- data/lib/opensubtitles/sub.rb +55 -0
- data/lib/opensubtitles/subtitle_finder.rb +30 -0
- data/lib/opensubtitles/version.rb +3 -0
- data/lib/opensubtitles/xmlrpc_monkey_patch.rb +88 -0
- data/opensubtitles.gemspec +26 -0
- data/spec/fixtures/http/check_movie_hash.yml +225 -0
- data/spec/fixtures/http/get_imdb_movie_details.yml +342 -0
- data/spec/fixtures/http/log_in.yml +86 -0
- data/spec/fixtures/http/log_out.yml +68 -0
- data/spec/fixtures/http/search_imdb.yml +761 -0
- data/spec/fixtures/http/search_subtitles_for_himym.yml +1189 -0
- data/spec/fixtures/http/search_subtitles_for_the_rock.yml +4124 -0
- data/spec/fixtures/http/server_info.yml +492 -0
- data/spec/fixtures/somemovie.avi +0 -0
- data/spec/opensubtitles/language_spec.rb +57 -0
- data/spec/opensubtitles/movie_file_spec.rb +18 -0
- data/spec/opensubtitles/server_spec.rb +123 -0
- data/spec/opensubtitles/sub_spec.rb +33 -0
- data/spec/spec_helper.rb +15 -0
- metadata +159 -0
@@ -0,0 +1,67 @@
|
|
1
|
+
module Opensubtitles
|
2
|
+
class MovieFile
|
3
|
+
|
4
|
+
EXTENSIONS = %w(avi mpg m4v mkv mov ogv mp4)
|
5
|
+
|
6
|
+
attr_reader :path, :language
|
7
|
+
|
8
|
+
def initialize(path, language = Opensubtitles.default_language)
|
9
|
+
@path = path
|
10
|
+
@language = language
|
11
|
+
end
|
12
|
+
|
13
|
+
def has_sub?
|
14
|
+
exist = false
|
15
|
+
%w(srt sub).each{ |ext| exist ||= File.exist?(sub_path(ext)) }
|
16
|
+
exist
|
17
|
+
end
|
18
|
+
|
19
|
+
def sub_path(format)
|
20
|
+
extension = if @language
|
21
|
+
".#{@language}.#{format}"
|
22
|
+
else
|
23
|
+
".#{format}"
|
24
|
+
end
|
25
|
+
File.join(File.dirname(path), File.basename(path, File.extname(path)) + extension)
|
26
|
+
end
|
27
|
+
|
28
|
+
def hash
|
29
|
+
@hash ||= self.class.compute_hash(path)
|
30
|
+
end
|
31
|
+
|
32
|
+
def size
|
33
|
+
@size ||= File.size(path)
|
34
|
+
end
|
35
|
+
|
36
|
+
def name
|
37
|
+
@name ||= File.basename(path, File.extname(path))
|
38
|
+
end
|
39
|
+
|
40
|
+
CHUNK_SIZE = 64 * 1024 # in bytes
|
41
|
+
|
42
|
+
# from http://trac.opensubtitles.org/projects/opensubtitles/wiki/HashSourceCodes
|
43
|
+
def self.compute_hash(path)
|
44
|
+
filesize = File.size(path)
|
45
|
+
hash = filesize
|
46
|
+
|
47
|
+
# Read 64 kbytes, divide up into 64 bits and add each
|
48
|
+
# to hash. Do for beginning and end of file.
|
49
|
+
File.open(path, 'rb') do |f|
|
50
|
+
# Q = unsigned long long = 64 bit
|
51
|
+
f.read(CHUNK_SIZE).unpack("Q*").each do |n|
|
52
|
+
hash = hash + n & 0xffffffffffffffff # to remain as 64 bit number
|
53
|
+
end
|
54
|
+
|
55
|
+
f.seek([0, filesize - CHUNK_SIZE].max, IO::SEEK_SET)
|
56
|
+
|
57
|
+
# And again for the end of the file
|
58
|
+
f.read(CHUNK_SIZE).unpack("Q*").each do |n|
|
59
|
+
hash = hash + n & 0xffffffffffffffff
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
sprintf("%016x", hash)
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,9 @@
|
|
1
|
+
module Opensubtitles
|
2
|
+
module Search
|
3
|
+
base_path = File.expand_path(File.dirname(__FILE__) + '/search')
|
4
|
+
autoload :IMDB, "#{base_path}/imdb"
|
5
|
+
autoload :MovieHash, "#{base_path}/movie_hash"
|
6
|
+
autoload :Name, "#{base_path}/name"
|
7
|
+
autoload :Path, "#{base_path}/path"
|
8
|
+
end
|
9
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Opensubtitles
|
2
|
+
module Search
|
3
|
+
|
4
|
+
class IMDB
|
5
|
+
|
6
|
+
def initialize(server, selector=Finder::First.new)
|
7
|
+
@server = server
|
8
|
+
@selector = selector
|
9
|
+
end
|
10
|
+
|
11
|
+
def search_subs_for(movie, language)
|
12
|
+
imdb_results = @server.search_imdb(:query => movie.name)
|
13
|
+
return if imdb_results.size == 0
|
14
|
+
return if imdb_results.class != Hash
|
15
|
+
return if imdb_results[:imdbid] == nil
|
16
|
+
if imdb_result = @selector.chose(imdb_results)
|
17
|
+
@server.search_subtitles(
|
18
|
+
:sublanguageid => language,
|
19
|
+
:imdbid => imdb_result.imdbid
|
20
|
+
)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module Opensubtitles
|
2
|
+
module Search
|
3
|
+
|
4
|
+
class MovieHash
|
5
|
+
|
6
|
+
def initialize(server)
|
7
|
+
@server = server
|
8
|
+
end
|
9
|
+
|
10
|
+
def search_subs_for(movie, language)
|
11
|
+
@server.search_subtitles(
|
12
|
+
:moviehash => movie.hash,
|
13
|
+
:moviebytesize => movie.size.to_s,
|
14
|
+
:sublanguageid => language
|
15
|
+
)
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module Opensubtitles
|
2
|
+
module Search
|
3
|
+
|
4
|
+
class Name
|
5
|
+
|
6
|
+
def initialize(server)
|
7
|
+
@server = server
|
8
|
+
end
|
9
|
+
|
10
|
+
def search_subs_for(movie, language)
|
11
|
+
subs = @server.search_subtitles(:sublanguageid => language, :query => movie.name)
|
12
|
+
normalized_movie_name = normalize_name(movie.name)
|
13
|
+
subs.select! do |sub|
|
14
|
+
normalize_name(sub.filename).index(normalized_movie_name) # MAYBE: Levenshtein ?
|
15
|
+
end
|
16
|
+
subs
|
17
|
+
end
|
18
|
+
|
19
|
+
protected
|
20
|
+
|
21
|
+
def normalize_name(name)
|
22
|
+
name.downcase.gsub(/[\s\.\-\_]+/, ' ')
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Opensubtitles
|
2
|
+
module Search
|
3
|
+
|
4
|
+
class Path
|
5
|
+
|
6
|
+
def initialize(server)
|
7
|
+
@server = server
|
8
|
+
end
|
9
|
+
|
10
|
+
def search_subs_for(movie, language)
|
11
|
+
@server.search_subtitles(
|
12
|
+
:sublanguageid => language,
|
13
|
+
:tag => movie.path
|
14
|
+
)
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module Opensubtitles
|
2
|
+
module Selector
|
3
|
+
|
4
|
+
class Movie
|
5
|
+
|
6
|
+
def initialize(movie_finder=Finder::First.new)
|
7
|
+
@movie_finder = movie_finder
|
8
|
+
end
|
9
|
+
|
10
|
+
def select(subs, movie)
|
11
|
+
subs_by_movie = group_by_movie_name(subs)
|
12
|
+
return subs if subs_by_movie.length <= 1
|
13
|
+
movie_names = subs_by_movie.keys
|
14
|
+
movie_name = @movie_finder.chose(movie_names)
|
15
|
+
subs_by_movie[movie_name] || []
|
16
|
+
end
|
17
|
+
|
18
|
+
def group_by_movie_name(subs)
|
19
|
+
subs.inject({}) do |hash, sub|
|
20
|
+
hash[sub.movie_name] ||= []
|
21
|
+
hash[sub.movie_name] << sub
|
22
|
+
hash
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
require 'ostruct'
|
2
|
+
|
3
|
+
module Opensubtitles
|
4
|
+
|
5
|
+
class LoginFailed < ::Exception
|
6
|
+
end
|
7
|
+
|
8
|
+
class Server
|
9
|
+
|
10
|
+
attr_reader :username, :password, :language, :useragent, :client
|
11
|
+
|
12
|
+
CLIENT_ARGS = [:host, :path, :port, :proxy_host, :proxy_port, :http_user, :http_password, :use_ssl, :timeout]
|
13
|
+
|
14
|
+
DEFAULT_OPTIONS = {
|
15
|
+
:host => 'api.opensubtitles.org',
|
16
|
+
:path => '/xml-rpc',
|
17
|
+
:timeout => 10
|
18
|
+
}.freeze
|
19
|
+
|
20
|
+
def initialize(options={})
|
21
|
+
@username = options[:username] || ''
|
22
|
+
@password = options[:password] || ''
|
23
|
+
@language = options[:language] || 'eng'
|
24
|
+
@useragent = options[:useragent] || 'opensubtitles v0.1'
|
25
|
+
options = DEFAULT_OPTIONS.merge(options)
|
26
|
+
@client = ::XMLRPC::Client.new(*options.values_at(*CLIENT_ARGS))
|
27
|
+
end
|
28
|
+
|
29
|
+
def token
|
30
|
+
@token ||= login
|
31
|
+
end
|
32
|
+
|
33
|
+
def login
|
34
|
+
response = client.call('LogIn', username, password, language, useragent)
|
35
|
+
if response['status'] != '200 OK'
|
36
|
+
raise LoginFailed.new("Failed to login with #{username} : #{password}. Server return code: #{response['status']}")
|
37
|
+
end
|
38
|
+
response['token']
|
39
|
+
end
|
40
|
+
|
41
|
+
def logout
|
42
|
+
client.call('LogOut', token)
|
43
|
+
@token = nil
|
44
|
+
end
|
45
|
+
|
46
|
+
def check_movie_hash(*hashes)
|
47
|
+
client.call('CheckMovieHash', token, hashes)
|
48
|
+
end
|
49
|
+
|
50
|
+
def search_subtitles(*queries)
|
51
|
+
subs = client.call('SearchSubtitles', token, queries)['data']
|
52
|
+
subs ? subs.map{ |s| Sub.new(s) } : []
|
53
|
+
end
|
54
|
+
|
55
|
+
def search_imdb(options={})
|
56
|
+
query = options.delete(:query)
|
57
|
+
imdb = client.call('SearchMoviesOnIMDB', token, query)['data']
|
58
|
+
imdb.size > 0 ? imdb.map{ |i| OpenStruct.new(:imdbid => i['id'], :title => i['title']) } : []
|
59
|
+
end
|
60
|
+
|
61
|
+
def info
|
62
|
+
client.call('ServerInfo')
|
63
|
+
end
|
64
|
+
|
65
|
+
def get_imdb_movie_details(id)
|
66
|
+
Movie.new(client.call('GetIMDBMovieDetails', token, id)['data'])
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'uri'
|
2
|
+
require 'net/http'
|
3
|
+
require 'zlib'
|
4
|
+
require 'stringio'
|
5
|
+
|
6
|
+
module Opensubtitles
|
7
|
+
|
8
|
+
class Sub
|
9
|
+
|
10
|
+
attr_reader :url, :format, :language, :rating, :user_ranks, :movie_name,
|
11
|
+
:filename, :raw_data, :downloads_count, :bad_reports_count
|
12
|
+
|
13
|
+
def initialize(data)
|
14
|
+
@url = URI.parse(data['SubDownloadLink'])
|
15
|
+
@format = data['SubFormat']
|
16
|
+
@language = Language.from_iso639_2b(data['SubLanguageID'])
|
17
|
+
@rating = data['SubRating'].to_f
|
18
|
+
@user_ranks = data['UserRank']
|
19
|
+
@movie_name = data['MovieName']
|
20
|
+
@filename = data['SubFileName']
|
21
|
+
@downloads_count = data['SubDownloadsCnt'].to_i
|
22
|
+
@bad_reports_count = data['SubBad'].to_i
|
23
|
+
@raw_data = data
|
24
|
+
end
|
25
|
+
|
26
|
+
def <=>(other)
|
27
|
+
rating <=> other.rating
|
28
|
+
end
|
29
|
+
|
30
|
+
# Totaly subjective formula to evaluate subtitle quality
|
31
|
+
# Originaly developed by runa (https://github.com/runa)
|
32
|
+
# https://github.com/byroot/opensubtitles/commit/9d71775#L0R122
|
33
|
+
def score
|
34
|
+
uploader_score * downloads_count.next * (rating + 1) - bad_reports_count / downloads_count.next
|
35
|
+
end
|
36
|
+
|
37
|
+
def uploader_score
|
38
|
+
user_ranks.empty? ? 1 : 2
|
39
|
+
end
|
40
|
+
|
41
|
+
def body
|
42
|
+
@body ||= fetch_body
|
43
|
+
end
|
44
|
+
|
45
|
+
def fetch_body
|
46
|
+
StringIO.open do |buffer|
|
47
|
+
buffer.write(Net::HTTP.get(url))
|
48
|
+
buffer.rewind
|
49
|
+
return Zlib::GzipReader.new(buffer).read
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module Opensubtitles
|
2
|
+
|
3
|
+
class SubtitleFinder
|
4
|
+
|
5
|
+
def initialize(search_engines, finders, selectors=[])
|
6
|
+
@search_engines = search_engines
|
7
|
+
@finders = finders
|
8
|
+
@selectors = selectors
|
9
|
+
end
|
10
|
+
|
11
|
+
def find_sub_for(movie, language)
|
12
|
+
@search_engines.each do |engine|
|
13
|
+
subs = engine.search_subs_for(movie, language)
|
14
|
+
unless subs.nil?
|
15
|
+
subs = @selectors.inject(subs) do |subs, selector|
|
16
|
+
selector.select(subs, movie)
|
17
|
+
end
|
18
|
+
@finders.each do |finder|
|
19
|
+
sub = finder.chose(subs)
|
20
|
+
return sub if sub
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
nil
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
@@ -0,0 +1,88 @@
|
|
1
|
+
# OpenSubtitle.org return invalid content-length that mek the stdlib xmlrpc client raise
|
2
|
+
# This is a dirty monkey patch to workaround this. :'(
|
3
|
+
|
4
|
+
module XMLRPC
|
5
|
+
class Client
|
6
|
+
def do_rpc(request, async=false)
|
7
|
+
header = {
|
8
|
+
"User-Agent" => USER_AGENT,
|
9
|
+
"Content-Type" => "text/xml; charset=utf-8",
|
10
|
+
"Content-Length" => request.bytesize.to_s,
|
11
|
+
"Connection" => (async ? "close" : "keep-alive")
|
12
|
+
}
|
13
|
+
|
14
|
+
header["Cookie"] = @cookie if @cookie
|
15
|
+
header.update(@http_header_extra) if @http_header_extra
|
16
|
+
|
17
|
+
if @auth != nil
|
18
|
+
# add authorization header
|
19
|
+
header["Authorization"] = @auth
|
20
|
+
end
|
21
|
+
|
22
|
+
resp = nil
|
23
|
+
@http_last_response = nil
|
24
|
+
|
25
|
+
if async
|
26
|
+
# use a new HTTP object for each call
|
27
|
+
http = net_http(@host, @port, @proxy_host, @proxy_port)
|
28
|
+
http.use_ssl = @use_ssl if @use_ssl
|
29
|
+
http.read_timeout = @timeout
|
30
|
+
http.open_timeout = @timeout
|
31
|
+
|
32
|
+
# post request
|
33
|
+
http.start {
|
34
|
+
resp = http.request_post(@path, request, header)
|
35
|
+
}
|
36
|
+
else
|
37
|
+
# reuse the HTTP object for each call => connection alive is possible
|
38
|
+
# we must start connection explicitely first time so that http.request
|
39
|
+
# does not assume that we don't want keepalive
|
40
|
+
@http.start if not @http.started?
|
41
|
+
|
42
|
+
# post request
|
43
|
+
resp = @http.request_post(@path, request, header)
|
44
|
+
end
|
45
|
+
|
46
|
+
@http_last_response = resp
|
47
|
+
|
48
|
+
data = resp.body
|
49
|
+
|
50
|
+
if resp.code == "401"
|
51
|
+
# Authorization Required
|
52
|
+
raise "Authorization failed.\nHTTP-Error: #{resp.code} #{resp.message}"
|
53
|
+
elsif resp.code[0,1] != "2"
|
54
|
+
raise "HTTP-Error: #{resp.code} #{resp.message}"
|
55
|
+
end
|
56
|
+
|
57
|
+
# assume text/xml on instances where Content-Type header is not set
|
58
|
+
ct_expected = resp["Content-Type"] || 'text/xml'
|
59
|
+
ct = parse_content_type(ct_expected).first
|
60
|
+
if ct != "text/xml"
|
61
|
+
if ct == "text/html"
|
62
|
+
raise "Wrong content-type (received '#{ct}' but expected 'text/xml'): \n#{data}"
|
63
|
+
else
|
64
|
+
raise "Wrong content-type (received '#{ct}' but expected 'text/xml')"
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
expected = resp["Content-Length"] || "<unknown>"
|
69
|
+
if data.nil? or data.bytesize == 0
|
70
|
+
raise "Wrong size. Was #{data.bytesize}, should be #{expected}"
|
71
|
+
elsif expected != "<unknown>" and expected.to_i != data.bytesize and resp["Transfer-Encoding"].nil?
|
72
|
+
# HACK: here is the monkey patched line
|
73
|
+
# raise "Wrong size. Was #{data.bytesize}, should be #{expected}"
|
74
|
+
end
|
75
|
+
|
76
|
+
set_cookies = resp.get_fields("Set-Cookie")
|
77
|
+
if set_cookies and !set_cookies.empty?
|
78
|
+
require 'webrick/cookie'
|
79
|
+
@cookie = set_cookies.collect do |set_cookie|
|
80
|
+
cookie = WEBrick::Cookie.parse_set_cookie(set_cookie)
|
81
|
+
WEBrick::Cookie.new(cookie.name, cookie.value).to_s
|
82
|
+
end.join("; ")
|
83
|
+
end
|
84
|
+
|
85
|
+
return data
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|