suby 0.3.1 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
data/bin/suby CHANGED
@@ -13,6 +13,10 @@ option_parser = OptionParser.new do |opts|
13
13
  options[:lang] = lang
14
14
  end
15
15
 
16
+ opts.on '-f', '--force', 'Force subtitles download even if already exists' do |lang|
17
+ options[:force] = true
18
+ end
19
+
16
20
  opts.on '-h', '--help', 'Show usage' do
17
21
  puts opts
18
22
  exit
@@ -2,6 +2,8 @@ module Suby
2
2
  class Downloader::Addic7ed < Downloader
3
3
  SITE = 'www.addic7ed.com'
4
4
  FORMAT = :file
5
+ SUBTITLE_TYPES = [:tvshow]
6
+
5
7
  LANG_IDS = {
6
8
  en: 1, es: 5, it: 7, fr: 8, pt: 10, de: 11, ca: 12, eu: 13, cs: 14,
7
9
  gl: 15, tr: 16, nl: 17, sv: 18, ru: 19, hu: 20, pl: 21, sl: 22, he: 23,
@@ -29,7 +31,7 @@ module Suby
29
31
  body.strip!
30
32
  raise NotFoundError, "show/season/episode not found" if body.empty?
31
33
  if body.include? FILTER_IGNORED
32
- raise NotFoundError, "no subtitle available"
34
+ raise NotFoundError, "no subtitles available"
33
35
  end
34
36
  body
35
37
  end
@@ -0,0 +1,83 @@
1
+ module Suby
2
+ # Based on https://github.com/byroot/ruby-osdb/blob/master/lib/osdb/server.rb
3
+ class Downloader::OpenSubtitles < Downloader
4
+ SITE = 'api.opensubtitles.org'
5
+ FORMAT = :gz
6
+ XMLRPC_PATH = '/xml-rpc'
7
+ SUBTITLE_TYPES = [:tvshow, :movie, :unknown]
8
+
9
+ USERNAME = ''
10
+ PASSWORD = ''
11
+ LOGIN_LANGUAGE = 'eng'
12
+ USER_AGENT = 'Suby v0.4'
13
+
14
+ SEARCH_QUERIES_ORDER = [:hash, :name] #There is also search using imdbid but i dont think it usefull as it
15
+ #returns subtitles for many different versions
16
+
17
+ # OpenSubtitles needs ISO 639-22B language codes for subtitles search
18
+ # See http://www.opensubtitles.org/addons/export_languages.php
19
+ # and http://en.wikipedia.org/wiki/List_of_ISO_639-2_codes
20
+ LANG_MAPPING = {
21
+ ar: "ara", bg: "bul", bn: "ben", br: "bre", bs: "bos", ca: "cat", cs: "cze", da: "dan", de: "ger", el: "ell",
22
+ en: "eng", eo: "epo", es: "spa", et: "est", eu: "baq", fa: "per", fi: "fin", fr: "fre", gl: "glg", he: "heb",
23
+ hi: "hin", hr: "hrv", hu: "hun", hy: "arm", id: "ind", is: "ice", it: "ita", ja: "jpn", ka: "geo", kk: "kaz",
24
+ km: "khm", ko: "kor", lb: "ltz", lt: "lit", lv: "lav", mk: "mac", mn: "mon", ms: "may", nl: "dut", no: "nor",
25
+ oc: "oci", pb: "pob", pl: "pol", pt: "por", ro: "rum", ru: "rus", si: "sin", sk: "slo", sl: "slv", sq: "alb",
26
+ sr: "scc", sv: "swe", sw: "swa", th: "tha", tl: "tgl", tr: "tur", uk: "ukr", ur: "urd", vi: "vie", zh: "chi"
27
+ }
28
+ LANG_MAPPING.default = 'all'
29
+
30
+ def download_url
31
+ SEARCH_QUERIES_ORDER.find(lambda { raise NotFoundError, "no subtitles available" }) { |type|
32
+ if subs = search_subtitles(search_query(type))['data']
33
+ @type = type
34
+ break subs
35
+ end
36
+ }.first['SubDownloadLink']
37
+ end
38
+
39
+ def search_subtitles(query)
40
+ return {} unless query
41
+ query = [query] unless query.kind_of? Array
42
+ xmlrpc.call('SearchSubtitles', token, query)
43
+ end
44
+
45
+ def token
46
+ @token ||= login
47
+ end
48
+
49
+ def login
50
+ response = xmlrpc.call('LogIn', USERNAME, PASSWORD, LOGIN_LANGUAGE, USER_AGENT)
51
+ unless response['status'] == '200 OK'
52
+ raise DownloaderError, "Failed to login with #{USERNAME}:#{PASSWORD}. " +
53
+ "Server return code: #{response['status']}"
54
+ end
55
+ response['token']
56
+ end
57
+
58
+ def search_query(type = :hash)
59
+ return nil unless query = send("search_query_by_#{type}")
60
+ query.merge(sublanguageid: language(lang))
61
+ end
62
+
63
+ def search_query_by_hash
64
+ { moviehash: MovieHasher.compute_hash(file), moviebytesize: file.size.to_s } if file.exist?
65
+ end
66
+
67
+ def search_query_by_name
68
+ season && episode ? { query: show, season: season, episode: episode } : { query: file.base.to_s }
69
+ end
70
+
71
+ def search_query_by_imdbid
72
+ { imdbid: imdbid } if imdbid
73
+ end
74
+
75
+ def language(lang)
76
+ LANG_MAPPING[lang.to_sym]
77
+ end
78
+
79
+ def success_message
80
+ "Found by #{@type}"
81
+ end
82
+ end
83
+ end
@@ -3,6 +3,7 @@ module Suby
3
3
  SITE = 'www.tvsubtitles.net'
4
4
  FORMAT = :zip
5
5
  SEARCH_URL = '/search.php'
6
+ SUBTITLE_TYPES = [:tvshow]
6
7
 
7
8
  # cache
8
9
  SHOW_URLS = {}
@@ -75,7 +76,7 @@ module Suby
75
76
  a = subtitles.css('div.left_articles a').find { |a|
76
77
  a.name == 'a' and a[:href].start_with?('/subtitle')
77
78
  }
78
- raise NotFoundError, "no subtitle available" unless a
79
+ raise NotFoundError, "no subtitles available" unless a
79
80
  url = a[:href]
80
81
  raise 'invalid subtitle url' unless url =~ /^\/subtitle-(\d+)\.html/
81
82
  url
@@ -83,8 +84,7 @@ module Suby
83
84
  end
84
85
 
85
86
  def download_url
86
- @download_url ||= URI.escape '/' +
87
- get_redirection(subtitles_url.sub('subtitle', 'download'))
87
+ URI.escape '/' + get_redirection(subtitles_url.sub('subtitle', 'download'))
88
88
  end
89
89
  end
90
90
  end
@@ -1,6 +1,9 @@
1
1
  require 'net/http'
2
2
  require 'cgi/util'
3
3
  require 'nokogiri'
4
+ require 'xmlrpc/client'
5
+ require 'zlib'
6
+ require 'stringio'
4
7
 
5
8
  module Suby
6
9
  class Downloader
@@ -9,22 +12,21 @@ module Suby
9
12
  DOWNLOADERS << downloader
10
13
  end
11
14
 
12
- attr_reader :show, :season, :episode, :file, :lang
15
+ attr_reader :show, :season, :episode, :video_data, :file, :lang
13
16
 
14
17
  def initialize(file, *args)
15
18
  @file = file
16
19
  @lang = (args.last || 'en').to_sym
17
- case args.size
18
- when 0..1
19
- @show, @season, @episode = FilenameParser.parse(file)
20
- when 3..4
21
- @show, @season, @episode = args
22
- else
23
- raise ArgumentError, "wrong number of arguments: #{args.size+1} for " +
24
- "(file, [show, season, episode], [lang])"
20
+ @video_data = FilenameParser.parse(file)
21
+ if video_data[:type] == :tvshow
22
+ @show, @season, @episode = video_data.values_at(:show, :season, :episode)
25
23
  end
26
24
  end
27
25
 
26
+ def support_video_type?
27
+ self.class::SUBTITLE_TYPES.include? video_data[:type]
28
+ end
29
+
28
30
  def to_s
29
31
  self.class.name.sub(/^.+::/, '')
30
32
  end
@@ -33,6 +35,10 @@ module Suby
33
35
  @http ||= Net::HTTP.new(self.class::SITE).start
34
36
  end
35
37
 
38
+ def xmlrpc
39
+ @xmlrpc ||= XMLRPC::Client.new(self.class::SITE, self.class::XMLRPC_PATH)
40
+ end
41
+
36
42
  def get(path, initheader = {}, parse_response = true)
37
43
  response = http.get(path, initheader)
38
44
  if parse_response
@@ -85,13 +91,22 @@ module Suby
85
91
  format = self.class::FORMAT
86
92
  case format
87
93
  when :file
88
- sub_name(contents).write contents
94
+ # nothing special to do
95
+ when :gz
96
+ begin
97
+ gz = Zlib::GzipReader.new(StringIO.new(contents))
98
+ contents = gz.read
99
+ ensure
100
+ gz.close if gz
101
+ end
89
102
  when :zip
90
103
  TEMP_ARCHIVE.write contents
91
- Suby.extract_sub_from_archive(TEMP_ARCHIVE, format, file)
104
+ Suby.extract_sub_from_archive(TEMP_ARCHIVE, format, TEMP_SUBTITLES)
105
+ contents = TEMP_SUBTITLES.read
92
106
  else
93
107
  raise "unknown subtitles format: #{format}"
94
108
  end
109
+ sub_name(contents).write encode contents
95
110
  end
96
111
 
97
112
  def sub_name(contents)
@@ -105,11 +120,54 @@ module Suby
105
120
  'sub'
106
121
  end
107
122
  end
123
+
124
+ def imdbid
125
+ @imdbid ||= begin
126
+ nfo_file = find_nfo_file
127
+ convert_to_utf8_from_latin1(nfo_file.read)[%r!imdb\.[^/]+/title/tt(\d+)!i, 1] if nfo_file
128
+ end
129
+ end
130
+
131
+ def find_nfo_file
132
+ @file.dir.children.find { |file| file.ext == "nfo" }
133
+ end
134
+
135
+ def convert_to_utf8_from_latin1(content)
136
+ if content.valid_encoding?
137
+ content
138
+ else
139
+ enc = content.encoding
140
+ if content.force_encoding("ISO-8859-1").valid_encoding?
141
+ yield if block_given?
142
+ content.encode("UTF-8")
143
+ else
144
+ # restore original encoding
145
+ subtitles.force_encoding(enc)
146
+ end
147
+ end
148
+ end
149
+
150
+ def success_message
151
+ "Found"
152
+ end
153
+
154
+ def encode(subtitles)
155
+ if @lang == :fr
156
+ convert_to_utf8_from_latin1(subtitles) do
157
+ def self.success_message
158
+ "#{super} (transcoded from ISO-8859-1)"
159
+ end
160
+ end
161
+ else
162
+ subtitles
163
+ end
164
+ end
108
165
  end
109
166
  end
110
167
 
111
168
  # Defines downloader order
112
169
  %w[
170
+ opensubtitles
113
171
  tvsubtitles
114
172
  addic7ed
115
173
  ].each { |downloader| require_relative "downloader/#{downloader}" }
@@ -4,7 +4,7 @@ module Suby
4
4
 
5
5
  # from tvnamer @ ab2c6c, with author's agreement, adapted
6
6
  # See https://github.com/dbr/tvnamer/blob/master/tvnamer/config_defaults.py
7
- FILENAME_PATTERNS = [
7
+ TVSHOW_PATTERNS = [
8
8
  # foo.s0101
9
9
  /^(?<show>.+?)
10
10
  [ \._\-]
@@ -68,22 +68,19 @@ module Suby
68
68
  (?<season>[0-9]{1})
69
69
  (?<episode>[0-9]{2})
70
70
  [\._ -][^\/]*$/x,
71
-
72
- # foo.0103*
73
- /^(?<show>.+)
74
- [ \._\-]
75
- (?<season>[0-9]{2})
76
- (?<episode>[0-9]{2,3})
77
- [\._ -][^\/]*$/x
78
71
  ]
72
+ MOVIE_PATTERN = /^(?<movie>.*)[.\[( ](?<year>(?:19|20)\d{2})/
79
73
 
80
74
  def parse(file)
81
75
  filename = file.basename.to_s
82
- found = FILENAME_PATTERNS.find { |pattern|
83
- pattern =~ filename
84
- }
85
- raise "Wrong file format (#{file})" unless found
86
- [clean_show_name($~[:show]), $~[:season].to_i, $~[:episode].to_i]
76
+ if TVSHOW_PATTERNS.find { |pattern| pattern.match(filename) }
77
+ m = $~
78
+ { type: :tvshow, show: clean_show_name(m[:show]), season: m[:season].to_i, episode: m[:episode].to_i }
79
+ elsif m = MOVIE_PATTERN.match(filename)
80
+ { type: :movie, name: clean_show_name(m[:movie]), year: m[:year].to_i }
81
+ else
82
+ { type: :unknown, name: filename }
83
+ end
87
84
  end
88
85
 
89
86
  # from https://github.com/dbr/tvnamer/blob/master/tvnamer/utils.py#L78-95
@@ -0,0 +1,31 @@
1
+ module Suby
2
+ # from http://trac.opensubtitles.org/projects/opensubtitles/wiki/HashSourceCodes
3
+ module MovieHasher
4
+
5
+ CHUNK_SIZE = 64 * 1024 # in bytes
6
+ MASK64 = 0xffffffffffffffff # 2^64 - 1
7
+
8
+ def self.compute_hash(file)
9
+ filesize = file.size
10
+ hash = filesize
11
+
12
+ # Read 64 kbytes, divide up into 64 bits and add each
13
+ # to hash. Do for beginning and end of file.
14
+ file.open('rb') do |f|
15
+ # Q = unsigned long long = 64 bit
16
+ f.read(CHUNK_SIZE).unpack("Q*").each do |n|
17
+ hash = (hash + n) & MASK64
18
+ end
19
+
20
+ f.seek([0, filesize - CHUNK_SIZE].max, IO::SEEK_SET)
21
+
22
+ # And again for the end of the file
23
+ f.read(CHUNK_SIZE).unpack("Q*").each do |n|
24
+ hash = (hash + n) & MASK64
25
+ end
26
+ end
27
+
28
+ "%016x" % hash
29
+ end
30
+ end
31
+ end
data/lib/suby.rb CHANGED
@@ -9,24 +9,29 @@ module Suby
9
9
  DownloaderError = Class.new StandardError
10
10
 
11
11
  SUB_EXTENSIONS = %w[srt sub]
12
- TEMP_ARCHIVE = Path('__archive__')
12
+ TMPDIR = Path.tmpdir
13
+ TEMP_ARCHIVE = TMPDIR / 'archive'
14
+ TEMP_SUBTITLES = TMPDIR / 'subtitles'
13
15
 
14
16
  class << self
15
17
  include Interface
16
18
 
17
19
  def download_subtitles(files, options = {})
20
+ Zip.options[:on_exists_proc] = options[:force]
18
21
  files.each { |file|
19
22
  file = Path(file)
20
23
  if file.dir?
21
24
  download_subtitles(file.children, options)
22
25
  elsif SUB_EXTENSIONS.include?(file.ext)
23
26
  # ignore already downloaded subtitles
24
- elsif SUB_EXTENSIONS.any? { |ext| f = file.sub_ext(ext) and f.exist? and !f.empty? }
27
+ elsif !options[:force] and SUB_EXTENSIONS.any? { |ext| f = file.sub_ext(ext) and f.exist? and !f.empty? }
25
28
  puts "Skipping: #{file}"
26
29
  elsif !file.exist? or video?(file)
27
30
  download_subtitles_for_file(file, options)
28
31
  end
29
32
  }
33
+ ensure
34
+ TMPDIR.rm_rf
30
35
  end
31
36
 
32
37
  def video?(file)
@@ -35,10 +40,9 @@ module Suby
35
40
 
36
41
  def download_subtitles_for_file(file, options)
37
42
  begin
38
- show, season, episode = FilenameParser.parse(file)
39
43
  puts file
40
44
  success = Downloader::DOWNLOADERS.find { |downloader_class|
41
- try_downloader(downloader_class.new(file, show, season, episode, options[:lang]))
45
+ try_downloader(downloader_class.new(file, options[:lang]))
42
46
  }
43
47
  error "\nNo downloader could find subtitles for #{file}" unless success
44
48
  rescue
@@ -49,6 +53,7 @@ module Suby
49
53
  end
50
54
 
51
55
  def try_downloader(downloader)
56
+ return false unless downloader.support_video_type?
52
57
  begin
53
58
  print " #{downloader.to_s.ljust(20)}"
54
59
  downloader.download
@@ -59,7 +64,7 @@ module Suby
59
64
  error "Error: #{error.message}"
60
65
  false
61
66
  else
62
- success "Found"
67
+ success downloader.success_message
63
68
  true
64
69
  end
65
70
  end
@@ -72,8 +77,7 @@ module Suby
72
77
  entry.to_s =~ /\.#{Regexp.union SUB_EXTENSIONS}$/
73
78
  }
74
79
  raise "no subtitles in #{archive}" unless sub
75
- name = file.sub_ext(Path(sub).ext)
76
- sub.extract(name.to_s)
80
+ sub.extract(file.to_s)
77
81
  }
78
82
  else
79
83
  raise "unknown archive type (#{archive})"
data/suby.gemspec CHANGED
@@ -16,5 +16,5 @@ Gem::Specification.new do |s|
16
16
  s.add_dependency 'term-ansicolor'
17
17
  s.add_dependency 'mime-types', '>= 1.19'
18
18
 
19
- s.version = '0.3.1'
19
+ s.version = '0.4.0'
20
20
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: suby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.4.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-08-08 00:00:00.000000000 Z
12
+ date: 2012-10-05 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: path
16
- requirement: !ruby/object:Gem::Requirement
16
+ requirement: &2156184680 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,15 +21,10 @@ dependencies:
21
21
  version: 1.3.0
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
- requirements:
27
- - - ! '>='
28
- - !ruby/object:Gem::Version
29
- version: 1.3.0
24
+ version_requirements: *2156184680
30
25
  - !ruby/object:Gem::Dependency
31
26
  name: nokogiri
32
- requirement: !ruby/object:Gem::Requirement
27
+ requirement: &2156184300 !ruby/object:Gem::Requirement
33
28
  none: false
34
29
  requirements:
35
30
  - - ! '>='
@@ -37,15 +32,10 @@ dependencies:
37
32
  version: '0'
38
33
  type: :runtime
39
34
  prerelease: false
40
- version_requirements: !ruby/object:Gem::Requirement
41
- none: false
42
- requirements:
43
- - - ! '>='
44
- - !ruby/object:Gem::Version
45
- version: '0'
35
+ version_requirements: *2156184300
46
36
  - !ruby/object:Gem::Dependency
47
37
  name: rubyzip
48
- requirement: !ruby/object:Gem::Requirement
38
+ requirement: &2156183760 !ruby/object:Gem::Requirement
49
39
  none: false
50
40
  requirements:
51
41
  - - ! '>='
@@ -53,15 +43,10 @@ dependencies:
53
43
  version: '0'
54
44
  type: :runtime
55
45
  prerelease: false
56
- version_requirements: !ruby/object:Gem::Requirement
57
- none: false
58
- requirements:
59
- - - ! '>='
60
- - !ruby/object:Gem::Version
61
- version: '0'
46
+ version_requirements: *2156183760
62
47
  - !ruby/object:Gem::Dependency
63
48
  name: term-ansicolor
64
- requirement: !ruby/object:Gem::Requirement
49
+ requirement: &2156183140 !ruby/object:Gem::Requirement
65
50
  none: false
66
51
  requirements:
67
52
  - - ! '>='
@@ -69,15 +54,10 @@ dependencies:
69
54
  version: '0'
70
55
  type: :runtime
71
56
  prerelease: false
72
- version_requirements: !ruby/object:Gem::Requirement
73
- none: false
74
- requirements:
75
- - - ! '>='
76
- - !ruby/object:Gem::Version
77
- version: '0'
57
+ version_requirements: *2156183140
78
58
  - !ruby/object:Gem::Dependency
79
59
  name: mime-types
80
- requirement: !ruby/object:Gem::Requirement
60
+ requirement: &2156182480 !ruby/object:Gem::Requirement
81
61
  none: false
82
62
  requirements:
83
63
  - - ! '>='
@@ -85,12 +65,7 @@ dependencies:
85
65
  version: '1.19'
86
66
  type: :runtime
87
67
  prerelease: false
88
- version_requirements: !ruby/object:Gem::Requirement
89
- none: false
90
- requirements:
91
- - - ! '>='
92
- - !ruby/object:Gem::Version
93
- version: '1.19'
68
+ version_requirements: *2156182480
94
69
  description: Find and download subtitles
95
70
  email: eregontp@gmail.com
96
71
  executables:
@@ -100,10 +75,12 @@ extra_rdoc_files: []
100
75
  files:
101
76
  - bin/suby
102
77
  - lib/suby/downloader/addic7ed.rb
78
+ - lib/suby/downloader/opensubtitles.rb
103
79
  - lib/suby/downloader/tvsubtitles.rb
104
80
  - lib/suby/downloader.rb
105
81
  - lib/suby/filename_parser.rb
106
82
  - lib/suby/interface.rb
83
+ - lib/suby/movie_hasher.rb
107
84
  - lib/suby.rb
108
85
  - .gitignore
109
86
  - README.md
@@ -128,9 +105,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
128
105
  version: '0'
129
106
  requirements: []
130
107
  rubyforge_project:
131
- rubygems_version: 1.8.23
108
+ rubygems_version: 1.8.11
132
109
  signing_key:
133
110
  specification_version: 3
134
111
  summary: Subtitles' downloader
135
112
  test_files: []
136
- has_rdoc: