opensubtitles 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +4 -0
- data/.rspec +1 -0
- data/.travis.yml +10 -0
- data/Gemfile +3 -0
- data/LICENSE.txt +22 -0
- data/README.md +50 -0
- data/Rakefile +9 -0
- data/bin/getsub +157 -0
- data/lib/opensubtitles.rb +21 -0
- data/lib/opensubtitles/finder.rb +8 -0
- data/lib/opensubtitles/finder/first.rb +13 -0
- data/lib/opensubtitles/finder/interactive.rb +21 -0
- data/lib/opensubtitles/finder/score.rb +13 -0
- data/lib/opensubtitles/language.rb +84 -0
- data/lib/opensubtitles/movie.rb +16 -0
- data/lib/opensubtitles/movie_file.rb +67 -0
- data/lib/opensubtitles/search.rb +9 -0
- data/lib/opensubtitles/search/imdb.rb +27 -0
- data/lib/opensubtitles/search/movie_hash.rb +21 -0
- data/lib/opensubtitles/search/name.rb +28 -0
- data/lib/opensubtitles/search/path.rb +20 -0
- data/lib/opensubtitles/selector.rb +7 -0
- data/lib/opensubtitles/selector/format.rb +17 -0
- data/lib/opensubtitles/selector/movie.rb +29 -0
- data/lib/opensubtitles/server.rb +70 -0
- data/lib/opensubtitles/sub.rb +55 -0
- data/lib/opensubtitles/subtitle_finder.rb +30 -0
- data/lib/opensubtitles/version.rb +3 -0
- data/lib/opensubtitles/xmlrpc_monkey_patch.rb +88 -0
- data/opensubtitles.gemspec +26 -0
- data/spec/fixtures/http/check_movie_hash.yml +225 -0
- data/spec/fixtures/http/get_imdb_movie_details.yml +342 -0
- data/spec/fixtures/http/log_in.yml +86 -0
- data/spec/fixtures/http/log_out.yml +68 -0
- data/spec/fixtures/http/search_imdb.yml +761 -0
- data/spec/fixtures/http/search_subtitles_for_himym.yml +1189 -0
- data/spec/fixtures/http/search_subtitles_for_the_rock.yml +4124 -0
- data/spec/fixtures/http/server_info.yml +492 -0
- data/spec/fixtures/somemovie.avi +0 -0
- data/spec/opensubtitles/language_spec.rb +57 -0
- data/spec/opensubtitles/movie_file_spec.rb +18 -0
- data/spec/opensubtitles/server_spec.rb +123 -0
- data/spec/opensubtitles/sub_spec.rb +33 -0
- data/spec/spec_helper.rb +15 -0
- metadata +159 -0
@@ -0,0 +1,67 @@
|
|
1
|
+
module Opensubtitles
|
2
|
+
class MovieFile
|
3
|
+
|
4
|
+
EXTENSIONS = %w(avi mpg m4v mkv mov ogv mp4)
|
5
|
+
|
6
|
+
attr_reader :path, :language
|
7
|
+
|
8
|
+
def initialize(path, language = Opensubtitles.default_language)
|
9
|
+
@path = path
|
10
|
+
@language = language
|
11
|
+
end
|
12
|
+
|
13
|
+
def has_sub?
|
14
|
+
exist = false
|
15
|
+
%w(srt sub).each{ |ext| exist ||= File.exist?(sub_path(ext)) }
|
16
|
+
exist
|
17
|
+
end
|
18
|
+
|
19
|
+
def sub_path(format)
|
20
|
+
extension = if @language
|
21
|
+
".#{@language}.#{format}"
|
22
|
+
else
|
23
|
+
".#{format}"
|
24
|
+
end
|
25
|
+
File.join(File.dirname(path), File.basename(path, File.extname(path)) + extension)
|
26
|
+
end
|
27
|
+
|
28
|
+
def hash
|
29
|
+
@hash ||= self.class.compute_hash(path)
|
30
|
+
end
|
31
|
+
|
32
|
+
def size
|
33
|
+
@size ||= File.size(path)
|
34
|
+
end
|
35
|
+
|
36
|
+
def name
|
37
|
+
@name ||= File.basename(path, File.extname(path))
|
38
|
+
end
|
39
|
+
|
40
|
+
CHUNK_SIZE = 64 * 1024 # in bytes
|
41
|
+
|
42
|
+
# from http://trac.opensubtitles.org/projects/opensubtitles/wiki/HashSourceCodes
|
43
|
+
def self.compute_hash(path)
|
44
|
+
filesize = File.size(path)
|
45
|
+
hash = filesize
|
46
|
+
|
47
|
+
# Read 64 kbytes, divide up into 64 bits and add each
|
48
|
+
# to hash. Do for beginning and end of file.
|
49
|
+
File.open(path, 'rb') do |f|
|
50
|
+
# Q = unsigned long long = 64 bit
|
51
|
+
f.read(CHUNK_SIZE).unpack("Q*").each do |n|
|
52
|
+
hash = hash + n & 0xffffffffffffffff # to remain as 64 bit number
|
53
|
+
end
|
54
|
+
|
55
|
+
f.seek([0, filesize - CHUNK_SIZE].max, IO::SEEK_SET)
|
56
|
+
|
57
|
+
# And again for the end of the file
|
58
|
+
f.read(CHUNK_SIZE).unpack("Q*").each do |n|
|
59
|
+
hash = hash + n & 0xffffffffffffffff
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
sprintf("%016x", hash)
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
67
|
+
end
|
@@ -0,0 +1,9 @@
|
|
1
|
+
module Opensubtitles
|
2
|
+
module Search
|
3
|
+
base_path = File.expand_path(File.dirname(__FILE__) + '/search')
|
4
|
+
autoload :IMDB, "#{base_path}/imdb"
|
5
|
+
autoload :MovieHash, "#{base_path}/movie_hash"
|
6
|
+
autoload :Name, "#{base_path}/name"
|
7
|
+
autoload :Path, "#{base_path}/path"
|
8
|
+
end
|
9
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Opensubtitles
|
2
|
+
module Search
|
3
|
+
|
4
|
+
class IMDB
|
5
|
+
|
6
|
+
def initialize(server, selector=Finder::First.new)
|
7
|
+
@server = server
|
8
|
+
@selector = selector
|
9
|
+
end
|
10
|
+
|
11
|
+
def search_subs_for(movie, language)
|
12
|
+
imdb_results = @server.search_imdb(:query => movie.name)
|
13
|
+
return if imdb_results.size == 0
|
14
|
+
return if imdb_results.class != Hash
|
15
|
+
return if imdb_results[:imdbid] == nil
|
16
|
+
if imdb_result = @selector.chose(imdb_results)
|
17
|
+
@server.search_subtitles(
|
18
|
+
:sublanguageid => language,
|
19
|
+
:imdbid => imdb_result.imdbid
|
20
|
+
)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
module Opensubtitles
|
2
|
+
module Search
|
3
|
+
|
4
|
+
class MovieHash
|
5
|
+
|
6
|
+
def initialize(server)
|
7
|
+
@server = server
|
8
|
+
end
|
9
|
+
|
10
|
+
def search_subs_for(movie, language)
|
11
|
+
@server.search_subtitles(
|
12
|
+
:moviehash => movie.hash,
|
13
|
+
:moviebytesize => movie.size.to_s,
|
14
|
+
:sublanguageid => language
|
15
|
+
)
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module Opensubtitles
|
2
|
+
module Search
|
3
|
+
|
4
|
+
class Name
|
5
|
+
|
6
|
+
def initialize(server)
|
7
|
+
@server = server
|
8
|
+
end
|
9
|
+
|
10
|
+
def search_subs_for(movie, language)
|
11
|
+
subs = @server.search_subtitles(:sublanguageid => language, :query => movie.name)
|
12
|
+
normalized_movie_name = normalize_name(movie.name)
|
13
|
+
subs.select! do |sub|
|
14
|
+
normalize_name(sub.filename).index(normalized_movie_name) # MAYBE: Levenshtein ?
|
15
|
+
end
|
16
|
+
subs
|
17
|
+
end
|
18
|
+
|
19
|
+
protected
|
20
|
+
|
21
|
+
def normalize_name(name)
|
22
|
+
name.downcase.gsub(/[\s\.\-\_]+/, ' ')
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Opensubtitles
|
2
|
+
module Search
|
3
|
+
|
4
|
+
class Path
|
5
|
+
|
6
|
+
def initialize(server)
|
7
|
+
@server = server
|
8
|
+
end
|
9
|
+
|
10
|
+
def search_subs_for(movie, language)
|
11
|
+
@server.search_subtitles(
|
12
|
+
:sublanguageid => language,
|
13
|
+
:tag => movie.path
|
14
|
+
)
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
module Opensubtitles
|
2
|
+
module Selector
|
3
|
+
|
4
|
+
class Movie
|
5
|
+
|
6
|
+
def initialize(movie_finder=Finder::First.new)
|
7
|
+
@movie_finder = movie_finder
|
8
|
+
end
|
9
|
+
|
10
|
+
def select(subs, movie)
|
11
|
+
subs_by_movie = group_by_movie_name(subs)
|
12
|
+
return subs if subs_by_movie.length <= 1
|
13
|
+
movie_names = subs_by_movie.keys
|
14
|
+
movie_name = @movie_finder.chose(movie_names)
|
15
|
+
subs_by_movie[movie_name] || []
|
16
|
+
end
|
17
|
+
|
18
|
+
def group_by_movie_name(subs)
|
19
|
+
subs.inject({}) do |hash, sub|
|
20
|
+
hash[sub.movie_name] ||= []
|
21
|
+
hash[sub.movie_name] << sub
|
22
|
+
hash
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
require 'ostruct'
|
2
|
+
|
3
|
+
module Opensubtitles
|
4
|
+
|
5
|
+
class LoginFailed < ::Exception
|
6
|
+
end
|
7
|
+
|
8
|
+
class Server
|
9
|
+
|
10
|
+
attr_reader :username, :password, :language, :useragent, :client
|
11
|
+
|
12
|
+
CLIENT_ARGS = [:host, :path, :port, :proxy_host, :proxy_port, :http_user, :http_password, :use_ssl, :timeout]
|
13
|
+
|
14
|
+
DEFAULT_OPTIONS = {
|
15
|
+
:host => 'api.opensubtitles.org',
|
16
|
+
:path => '/xml-rpc',
|
17
|
+
:timeout => 10
|
18
|
+
}.freeze
|
19
|
+
|
20
|
+
def initialize(options={})
|
21
|
+
@username = options[:username] || ''
|
22
|
+
@password = options[:password] || ''
|
23
|
+
@language = options[:language] || 'eng'
|
24
|
+
@useragent = options[:useragent] || 'opensubtitles v0.1'
|
25
|
+
options = DEFAULT_OPTIONS.merge(options)
|
26
|
+
@client = ::XMLRPC::Client.new(*options.values_at(*CLIENT_ARGS))
|
27
|
+
end
|
28
|
+
|
29
|
+
def token
|
30
|
+
@token ||= login
|
31
|
+
end
|
32
|
+
|
33
|
+
def login
|
34
|
+
response = client.call('LogIn', username, password, language, useragent)
|
35
|
+
if response['status'] != '200 OK'
|
36
|
+
raise LoginFailed.new("Failed to login with #{username} : #{password}. Server return code: #{response['status']}")
|
37
|
+
end
|
38
|
+
response['token']
|
39
|
+
end
|
40
|
+
|
41
|
+
def logout
|
42
|
+
client.call('LogOut', token)
|
43
|
+
@token = nil
|
44
|
+
end
|
45
|
+
|
46
|
+
def check_movie_hash(*hashes)
|
47
|
+
client.call('CheckMovieHash', token, hashes)
|
48
|
+
end
|
49
|
+
|
50
|
+
def search_subtitles(*queries)
|
51
|
+
subs = client.call('SearchSubtitles', token, queries)['data']
|
52
|
+
subs ? subs.map{ |s| Sub.new(s) } : []
|
53
|
+
end
|
54
|
+
|
55
|
+
def search_imdb(options={})
|
56
|
+
query = options.delete(:query)
|
57
|
+
imdb = client.call('SearchMoviesOnIMDB', token, query)['data']
|
58
|
+
imdb.size > 0 ? imdb.map{ |i| OpenStruct.new(:imdbid => i['id'], :title => i['title']) } : []
|
59
|
+
end
|
60
|
+
|
61
|
+
def info
|
62
|
+
client.call('ServerInfo')
|
63
|
+
end
|
64
|
+
|
65
|
+
def get_imdb_movie_details(id)
|
66
|
+
Movie.new(client.call('GetIMDBMovieDetails', token, id)['data'])
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
70
|
+
end
|
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'uri'
|
2
|
+
require 'net/http'
|
3
|
+
require 'zlib'
|
4
|
+
require 'stringio'
|
5
|
+
|
6
|
+
module Opensubtitles
|
7
|
+
|
8
|
+
class Sub
|
9
|
+
|
10
|
+
attr_reader :url, :format, :language, :rating, :user_ranks, :movie_name,
|
11
|
+
:filename, :raw_data, :downloads_count, :bad_reports_count
|
12
|
+
|
13
|
+
def initialize(data)
|
14
|
+
@url = URI.parse(data['SubDownloadLink'])
|
15
|
+
@format = data['SubFormat']
|
16
|
+
@language = Language.from_iso639_2b(data['SubLanguageID'])
|
17
|
+
@rating = data['SubRating'].to_f
|
18
|
+
@user_ranks = data['UserRank']
|
19
|
+
@movie_name = data['MovieName']
|
20
|
+
@filename = data['SubFileName']
|
21
|
+
@downloads_count = data['SubDownloadsCnt'].to_i
|
22
|
+
@bad_reports_count = data['SubBad'].to_i
|
23
|
+
@raw_data = data
|
24
|
+
end
|
25
|
+
|
26
|
+
def <=>(other)
|
27
|
+
rating <=> other.rating
|
28
|
+
end
|
29
|
+
|
30
|
+
# Totaly subjective formula to evaluate subtitle quality
|
31
|
+
# Originaly developed by runa (https://github.com/runa)
|
32
|
+
# https://github.com/byroot/opensubtitles/commit/9d71775#L0R122
|
33
|
+
def score
|
34
|
+
uploader_score * downloads_count.next * (rating + 1) - bad_reports_count / downloads_count.next
|
35
|
+
end
|
36
|
+
|
37
|
+
def uploader_score
|
38
|
+
user_ranks.empty? ? 1 : 2
|
39
|
+
end
|
40
|
+
|
41
|
+
def body
|
42
|
+
@body ||= fetch_body
|
43
|
+
end
|
44
|
+
|
45
|
+
def fetch_body
|
46
|
+
StringIO.open do |buffer|
|
47
|
+
buffer.write(Net::HTTP.get(url))
|
48
|
+
buffer.rewind
|
49
|
+
return Zlib::GzipReader.new(buffer).read
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module Opensubtitles
|
2
|
+
|
3
|
+
class SubtitleFinder
|
4
|
+
|
5
|
+
def initialize(search_engines, finders, selectors=[])
|
6
|
+
@search_engines = search_engines
|
7
|
+
@finders = finders
|
8
|
+
@selectors = selectors
|
9
|
+
end
|
10
|
+
|
11
|
+
def find_sub_for(movie, language)
|
12
|
+
@search_engines.each do |engine|
|
13
|
+
subs = engine.search_subs_for(movie, language)
|
14
|
+
unless subs.nil?
|
15
|
+
subs = @selectors.inject(subs) do |subs, selector|
|
16
|
+
selector.select(subs, movie)
|
17
|
+
end
|
18
|
+
@finders.each do |finder|
|
19
|
+
sub = finder.chose(subs)
|
20
|
+
return sub if sub
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
nil
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
29
|
+
|
30
|
+
end
|
@@ -0,0 +1,88 @@
|
|
1
|
+
# OpenSubtitle.org return invalid content-length that mek the stdlib xmlrpc client raise
|
2
|
+
# This is a dirty monkey patch to workaround this. :'(
|
3
|
+
|
4
|
+
module XMLRPC
|
5
|
+
class Client
|
6
|
+
def do_rpc(request, async=false)
|
7
|
+
header = {
|
8
|
+
"User-Agent" => USER_AGENT,
|
9
|
+
"Content-Type" => "text/xml; charset=utf-8",
|
10
|
+
"Content-Length" => request.bytesize.to_s,
|
11
|
+
"Connection" => (async ? "close" : "keep-alive")
|
12
|
+
}
|
13
|
+
|
14
|
+
header["Cookie"] = @cookie if @cookie
|
15
|
+
header.update(@http_header_extra) if @http_header_extra
|
16
|
+
|
17
|
+
if @auth != nil
|
18
|
+
# add authorization header
|
19
|
+
header["Authorization"] = @auth
|
20
|
+
end
|
21
|
+
|
22
|
+
resp = nil
|
23
|
+
@http_last_response = nil
|
24
|
+
|
25
|
+
if async
|
26
|
+
# use a new HTTP object for each call
|
27
|
+
http = net_http(@host, @port, @proxy_host, @proxy_port)
|
28
|
+
http.use_ssl = @use_ssl if @use_ssl
|
29
|
+
http.read_timeout = @timeout
|
30
|
+
http.open_timeout = @timeout
|
31
|
+
|
32
|
+
# post request
|
33
|
+
http.start {
|
34
|
+
resp = http.request_post(@path, request, header)
|
35
|
+
}
|
36
|
+
else
|
37
|
+
# reuse the HTTP object for each call => connection alive is possible
|
38
|
+
# we must start connection explicitely first time so that http.request
|
39
|
+
# does not assume that we don't want keepalive
|
40
|
+
@http.start if not @http.started?
|
41
|
+
|
42
|
+
# post request
|
43
|
+
resp = @http.request_post(@path, request, header)
|
44
|
+
end
|
45
|
+
|
46
|
+
@http_last_response = resp
|
47
|
+
|
48
|
+
data = resp.body
|
49
|
+
|
50
|
+
if resp.code == "401"
|
51
|
+
# Authorization Required
|
52
|
+
raise "Authorization failed.\nHTTP-Error: #{resp.code} #{resp.message}"
|
53
|
+
elsif resp.code[0,1] != "2"
|
54
|
+
raise "HTTP-Error: #{resp.code} #{resp.message}"
|
55
|
+
end
|
56
|
+
|
57
|
+
# assume text/xml on instances where Content-Type header is not set
|
58
|
+
ct_expected = resp["Content-Type"] || 'text/xml'
|
59
|
+
ct = parse_content_type(ct_expected).first
|
60
|
+
if ct != "text/xml"
|
61
|
+
if ct == "text/html"
|
62
|
+
raise "Wrong content-type (received '#{ct}' but expected 'text/xml'): \n#{data}"
|
63
|
+
else
|
64
|
+
raise "Wrong content-type (received '#{ct}' but expected 'text/xml')"
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
expected = resp["Content-Length"] || "<unknown>"
|
69
|
+
if data.nil? or data.bytesize == 0
|
70
|
+
raise "Wrong size. Was #{data.bytesize}, should be #{expected}"
|
71
|
+
elsif expected != "<unknown>" and expected.to_i != data.bytesize and resp["Transfer-Encoding"].nil?
|
72
|
+
# HACK: here is the monkey patched line
|
73
|
+
# raise "Wrong size. Was #{data.bytesize}, should be #{expected}"
|
74
|
+
end
|
75
|
+
|
76
|
+
set_cookies = resp.get_fields("Set-Cookie")
|
77
|
+
if set_cookies and !set_cookies.empty?
|
78
|
+
require 'webrick/cookie'
|
79
|
+
@cookie = set_cookies.collect do |set_cookie|
|
80
|
+
cookie = WEBrick::Cookie.parse_set_cookie(set_cookie)
|
81
|
+
WEBrick::Cookie.new(cookie.name, cookie.value).to_s
|
82
|
+
end.join("; ")
|
83
|
+
end
|
84
|
+
|
85
|
+
return data
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|