suby 0.3.1 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/bin/suby CHANGED
@@ -13,6 +13,10 @@ option_parser = OptionParser.new do |opts|
13
13
  options[:lang] = lang
14
14
  end
15
15
 
16
+ opts.on '-f', '--force', 'Force subtitles download even if already exists' do |lang|
17
+ options[:force] = true
18
+ end
19
+
16
20
  opts.on '-h', '--help', 'Show usage' do
17
21
  puts opts
18
22
  exit
@@ -2,6 +2,8 @@ module Suby
2
2
  class Downloader::Addic7ed < Downloader
3
3
  SITE = 'www.addic7ed.com'
4
4
  FORMAT = :file
5
+ SUBTITLE_TYPES = [:tvshow]
6
+
5
7
  LANG_IDS = {
6
8
  en: 1, es: 5, it: 7, fr: 8, pt: 10, de: 11, ca: 12, eu: 13, cs: 14,
7
9
  gl: 15, tr: 16, nl: 17, sv: 18, ru: 19, hu: 20, pl: 21, sl: 22, he: 23,
@@ -29,7 +31,7 @@ module Suby
29
31
  body.strip!
30
32
  raise NotFoundError, "show/season/episode not found" if body.empty?
31
33
  if body.include? FILTER_IGNORED
32
- raise NotFoundError, "no subtitle available"
34
+ raise NotFoundError, "no subtitles available"
33
35
  end
34
36
  body
35
37
  end
@@ -0,0 +1,83 @@
1
+ module Suby
2
+ # Based on https://github.com/byroot/ruby-osdb/blob/master/lib/osdb/server.rb
3
+ class Downloader::OpenSubtitles < Downloader
4
+ SITE = 'api.opensubtitles.org'
5
+ FORMAT = :gz
6
+ XMLRPC_PATH = '/xml-rpc'
7
+ SUBTITLE_TYPES = [:tvshow, :movie, :unknown]
8
+
9
+ USERNAME = ''
10
+ PASSWORD = ''
11
+ LOGIN_LANGUAGE = 'eng'
12
+ USER_AGENT = 'Suby v0.4'
13
+
14
+ SEARCH_QUERIES_ORDER = [:hash, :name] #There is also search using imdbid but i dont think it usefull as it
15
+ #returns subtitles for many different versions
16
+
17
+ # OpenSubtitles needs ISO 639-22B language codes for subtitles search
18
+ # See http://www.opensubtitles.org/addons/export_languages.php
19
+ # and http://en.wikipedia.org/wiki/List_of_ISO_639-2_codes
20
+ LANG_MAPPING = {
21
+ ar: "ara", bg: "bul", bn: "ben", br: "bre", bs: "bos", ca: "cat", cs: "cze", da: "dan", de: "ger", el: "ell",
22
+ en: "eng", eo: "epo", es: "spa", et: "est", eu: "baq", fa: "per", fi: "fin", fr: "fre", gl: "glg", he: "heb",
23
+ hi: "hin", hr: "hrv", hu: "hun", hy: "arm", id: "ind", is: "ice", it: "ita", ja: "jpn", ka: "geo", kk: "kaz",
24
+ km: "khm", ko: "kor", lb: "ltz", lt: "lit", lv: "lav", mk: "mac", mn: "mon", ms: "may", nl: "dut", no: "nor",
25
+ oc: "oci", pb: "pob", pl: "pol", pt: "por", ro: "rum", ru: "rus", si: "sin", sk: "slo", sl: "slv", sq: "alb",
26
+ sr: "scc", sv: "swe", sw: "swa", th: "tha", tl: "tgl", tr: "tur", uk: "ukr", ur: "urd", vi: "vie", zh: "chi"
27
+ }
28
+ LANG_MAPPING.default = 'all'
29
+
30
+ def download_url
31
+ SEARCH_QUERIES_ORDER.find(lambda { raise NotFoundError, "no subtitles available" }) { |type|
32
+ if subs = search_subtitles(search_query(type))['data']
33
+ @type = type
34
+ break subs
35
+ end
36
+ }.first['SubDownloadLink']
37
+ end
38
+
39
+ def search_subtitles(query)
40
+ return {} unless query
41
+ query = [query] unless query.kind_of? Array
42
+ xmlrpc.call('SearchSubtitles', token, query)
43
+ end
44
+
45
+ def token
46
+ @token ||= login
47
+ end
48
+
49
+ def login
50
+ response = xmlrpc.call('LogIn', USERNAME, PASSWORD, LOGIN_LANGUAGE, USER_AGENT)
51
+ unless response['status'] == '200 OK'
52
+ raise DownloaderError, "Failed to login with #{USERNAME}:#{PASSWORD}. " +
53
+ "Server return code: #{response['status']}"
54
+ end
55
+ response['token']
56
+ end
57
+
58
+ def search_query(type = :hash)
59
+ return nil unless query = send("search_query_by_#{type}")
60
+ query.merge(sublanguageid: language(lang))
61
+ end
62
+
63
+ def search_query_by_hash
64
+ { moviehash: MovieHasher.compute_hash(file), moviebytesize: file.size.to_s } if file.exist?
65
+ end
66
+
67
+ def search_query_by_name
68
+ season && episode ? { query: show, season: season, episode: episode } : { query: file.base.to_s }
69
+ end
70
+
71
+ def search_query_by_imdbid
72
+ { imdbid: imdbid } if imdbid
73
+ end
74
+
75
+ def language(lang)
76
+ LANG_MAPPING[lang.to_sym]
77
+ end
78
+
79
+ def success_message
80
+ "Found by #{@type}"
81
+ end
82
+ end
83
+ end
@@ -3,6 +3,7 @@ module Suby
3
3
  SITE = 'www.tvsubtitles.net'
4
4
  FORMAT = :zip
5
5
  SEARCH_URL = '/search.php'
6
+ SUBTITLE_TYPES = [:tvshow]
6
7
 
7
8
  # cache
8
9
  SHOW_URLS = {}
@@ -75,7 +76,7 @@ module Suby
75
76
  a = subtitles.css('div.left_articles a').find { |a|
76
77
  a.name == 'a' and a[:href].start_with?('/subtitle')
77
78
  }
78
- raise NotFoundError, "no subtitle available" unless a
79
+ raise NotFoundError, "no subtitles available" unless a
79
80
  url = a[:href]
80
81
  raise 'invalid subtitle url' unless url =~ /^\/subtitle-(\d+)\.html/
81
82
  url
@@ -83,8 +84,7 @@ module Suby
83
84
  end
84
85
 
85
86
  def download_url
86
- @download_url ||= URI.escape '/' +
87
- get_redirection(subtitles_url.sub('subtitle', 'download'))
87
+ URI.escape '/' + get_redirection(subtitles_url.sub('subtitle', 'download'))
88
88
  end
89
89
  end
90
90
  end
@@ -1,6 +1,9 @@
1
1
  require 'net/http'
2
2
  require 'cgi/util'
3
3
  require 'nokogiri'
4
+ require 'xmlrpc/client'
5
+ require 'zlib'
6
+ require 'stringio'
4
7
 
5
8
  module Suby
6
9
  class Downloader
@@ -9,22 +12,21 @@ module Suby
9
12
  DOWNLOADERS << downloader
10
13
  end
11
14
 
12
- attr_reader :show, :season, :episode, :file, :lang
15
+ attr_reader :show, :season, :episode, :video_data, :file, :lang
13
16
 
14
17
  def initialize(file, *args)
15
18
  @file = file
16
19
  @lang = (args.last || 'en').to_sym
17
- case args.size
18
- when 0..1
19
- @show, @season, @episode = FilenameParser.parse(file)
20
- when 3..4
21
- @show, @season, @episode = args
22
- else
23
- raise ArgumentError, "wrong number of arguments: #{args.size+1} for " +
24
- "(file, [show, season, episode], [lang])"
20
+ @video_data = FilenameParser.parse(file)
21
+ if video_data[:type] == :tvshow
22
+ @show, @season, @episode = video_data.values_at(:show, :season, :episode)
25
23
  end
26
24
  end
27
25
 
26
+ def support_video_type?
27
+ self.class::SUBTITLE_TYPES.include? video_data[:type]
28
+ end
29
+
28
30
  def to_s
29
31
  self.class.name.sub(/^.+::/, '')
30
32
  end
@@ -33,6 +35,10 @@ module Suby
33
35
  @http ||= Net::HTTP.new(self.class::SITE).start
34
36
  end
35
37
 
38
+ def xmlrpc
39
+ @xmlrpc ||= XMLRPC::Client.new(self.class::SITE, self.class::XMLRPC_PATH)
40
+ end
41
+
36
42
  def get(path, initheader = {}, parse_response = true)
37
43
  response = http.get(path, initheader)
38
44
  if parse_response
@@ -85,13 +91,22 @@ module Suby
85
91
  format = self.class::FORMAT
86
92
  case format
87
93
  when :file
88
- sub_name(contents).write contents
94
+ # nothing special to do
95
+ when :gz
96
+ begin
97
+ gz = Zlib::GzipReader.new(StringIO.new(contents))
98
+ contents = gz.read
99
+ ensure
100
+ gz.close if gz
101
+ end
89
102
  when :zip
90
103
  TEMP_ARCHIVE.write contents
91
- Suby.extract_sub_from_archive(TEMP_ARCHIVE, format, file)
104
+ Suby.extract_sub_from_archive(TEMP_ARCHIVE, format, TEMP_SUBTITLES)
105
+ contents = TEMP_SUBTITLES.read
92
106
  else
93
107
  raise "unknown subtitles format: #{format}"
94
108
  end
109
+ sub_name(contents).write encode contents
95
110
  end
96
111
 
97
112
  def sub_name(contents)
@@ -105,11 +120,54 @@ module Suby
105
120
  'sub'
106
121
  end
107
122
  end
123
+
124
+ def imdbid
125
+ @imdbid ||= begin
126
+ nfo_file = find_nfo_file
127
+ convert_to_utf8_from_latin1(nfo_file.read)[%r!imdb\.[^/]+/title/tt(\d+)!i, 1] if nfo_file
128
+ end
129
+ end
130
+
131
+ def find_nfo_file
132
+ @file.dir.children.find { |file| file.ext == "nfo" }
133
+ end
134
+
135
+ def convert_to_utf8_from_latin1(content)
136
+ if content.valid_encoding?
137
+ content
138
+ else
139
+ enc = content.encoding
140
+ if content.force_encoding("ISO-8859-1").valid_encoding?
141
+ yield if block_given?
142
+ content.encode("UTF-8")
143
+ else
144
+ # restore original encoding
145
+ subtitles.force_encoding(enc)
146
+ end
147
+ end
148
+ end
149
+
150
+ def success_message
151
+ "Found"
152
+ end
153
+
154
+ def encode(subtitles)
155
+ if @lang == :fr
156
+ convert_to_utf8_from_latin1(subtitles) do
157
+ def self.success_message
158
+ "#{super} (transcoded from ISO-8859-1)"
159
+ end
160
+ end
161
+ else
162
+ subtitles
163
+ end
164
+ end
108
165
  end
109
166
  end
110
167
 
111
168
  # Defines downloader order
112
169
  %w[
170
+ opensubtitles
113
171
  tvsubtitles
114
172
  addic7ed
115
173
  ].each { |downloader| require_relative "downloader/#{downloader}" }
@@ -4,7 +4,7 @@ module Suby
4
4
 
5
5
  # from tvnamer @ ab2c6c, with author's agreement, adapted
6
6
  # See https://github.com/dbr/tvnamer/blob/master/tvnamer/config_defaults.py
7
- FILENAME_PATTERNS = [
7
+ TVSHOW_PATTERNS = [
8
8
  # foo.s0101
9
9
  /^(?<show>.+?)
10
10
  [ \._\-]
@@ -68,22 +68,19 @@ module Suby
68
68
  (?<season>[0-9]{1})
69
69
  (?<episode>[0-9]{2})
70
70
  [\._ -][^\/]*$/x,
71
-
72
- # foo.0103*
73
- /^(?<show>.+)
74
- [ \._\-]
75
- (?<season>[0-9]{2})
76
- (?<episode>[0-9]{2,3})
77
- [\._ -][^\/]*$/x
78
71
  ]
72
+ MOVIE_PATTERN = /^(?<movie>.*)[.\[( ](?<year>(?:19|20)\d{2})/
79
73
 
80
74
  def parse(file)
81
75
  filename = file.basename.to_s
82
- found = FILENAME_PATTERNS.find { |pattern|
83
- pattern =~ filename
84
- }
85
- raise "Wrong file format (#{file})" unless found
86
- [clean_show_name($~[:show]), $~[:season].to_i, $~[:episode].to_i]
76
+ if TVSHOW_PATTERNS.find { |pattern| pattern.match(filename) }
77
+ m = $~
78
+ { type: :tvshow, show: clean_show_name(m[:show]), season: m[:season].to_i, episode: m[:episode].to_i }
79
+ elsif m = MOVIE_PATTERN.match(filename)
80
+ { type: :movie, name: clean_show_name(m[:movie]), year: m[:year].to_i }
81
+ else
82
+ { type: :unknown, name: filename }
83
+ end
87
84
  end
88
85
 
89
86
  # from https://github.com/dbr/tvnamer/blob/master/tvnamer/utils.py#L78-95
@@ -0,0 +1,31 @@
1
+ module Suby
2
+ # from http://trac.opensubtitles.org/projects/opensubtitles/wiki/HashSourceCodes
3
+ module MovieHasher
4
+
5
+ CHUNK_SIZE = 64 * 1024 # in bytes
6
+ MASK64 = 0xffffffffffffffff # 2^64 - 1
7
+
8
+ def self.compute_hash(file)
9
+ filesize = file.size
10
+ hash = filesize
11
+
12
+ # Read 64 kbytes, divide up into 64 bits and add each
13
+ # to hash. Do for beginning and end of file.
14
+ file.open('rb') do |f|
15
+ # Q = unsigned long long = 64 bit
16
+ f.read(CHUNK_SIZE).unpack("Q*").each do |n|
17
+ hash = (hash + n) & MASK64
18
+ end
19
+
20
+ f.seek([0, filesize - CHUNK_SIZE].max, IO::SEEK_SET)
21
+
22
+ # And again for the end of the file
23
+ f.read(CHUNK_SIZE).unpack("Q*").each do |n|
24
+ hash = (hash + n) & MASK64
25
+ end
26
+ end
27
+
28
+ "%016x" % hash
29
+ end
30
+ end
31
+ end
data/lib/suby.rb CHANGED
@@ -9,24 +9,29 @@ module Suby
9
9
  DownloaderError = Class.new StandardError
10
10
 
11
11
  SUB_EXTENSIONS = %w[srt sub]
12
- TEMP_ARCHIVE = Path('__archive__')
12
+ TMPDIR = Path.tmpdir
13
+ TEMP_ARCHIVE = TMPDIR / 'archive'
14
+ TEMP_SUBTITLES = TMPDIR / 'subtitles'
13
15
 
14
16
  class << self
15
17
  include Interface
16
18
 
17
19
  def download_subtitles(files, options = {})
20
+ Zip.options[:on_exists_proc] = options[:force]
18
21
  files.each { |file|
19
22
  file = Path(file)
20
23
  if file.dir?
21
24
  download_subtitles(file.children, options)
22
25
  elsif SUB_EXTENSIONS.include?(file.ext)
23
26
  # ignore already downloaded subtitles
24
- elsif SUB_EXTENSIONS.any? { |ext| f = file.sub_ext(ext) and f.exist? and !f.empty? }
27
+ elsif !options[:force] and SUB_EXTENSIONS.any? { |ext| f = file.sub_ext(ext) and f.exist? and !f.empty? }
25
28
  puts "Skipping: #{file}"
26
29
  elsif !file.exist? or video?(file)
27
30
  download_subtitles_for_file(file, options)
28
31
  end
29
32
  }
33
+ ensure
34
+ TMPDIR.rm_rf
30
35
  end
31
36
 
32
37
  def video?(file)
@@ -35,10 +40,9 @@ module Suby
35
40
 
36
41
  def download_subtitles_for_file(file, options)
37
42
  begin
38
- show, season, episode = FilenameParser.parse(file)
39
43
  puts file
40
44
  success = Downloader::DOWNLOADERS.find { |downloader_class|
41
- try_downloader(downloader_class.new(file, show, season, episode, options[:lang]))
45
+ try_downloader(downloader_class.new(file, options[:lang]))
42
46
  }
43
47
  error "\nNo downloader could find subtitles for #{file}" unless success
44
48
  rescue
@@ -49,6 +53,7 @@ module Suby
49
53
  end
50
54
 
51
55
  def try_downloader(downloader)
56
+ return false unless downloader.support_video_type?
52
57
  begin
53
58
  print " #{downloader.to_s.ljust(20)}"
54
59
  downloader.download
@@ -59,7 +64,7 @@ module Suby
59
64
  error "Error: #{error.message}"
60
65
  false
61
66
  else
62
- success "Found"
67
+ success downloader.success_message
63
68
  true
64
69
  end
65
70
  end
@@ -72,8 +77,7 @@ module Suby
72
77
  entry.to_s =~ /\.#{Regexp.union SUB_EXTENSIONS}$/
73
78
  }
74
79
  raise "no subtitles in #{archive}" unless sub
75
- name = file.sub_ext(Path(sub).ext)
76
- sub.extract(name.to_s)
80
+ sub.extract(file.to_s)
77
81
  }
78
82
  else
79
83
  raise "unknown archive type (#{archive})"
data/suby.gemspec CHANGED
@@ -16,5 +16,5 @@ Gem::Specification.new do |s|
16
16
  s.add_dependency 'term-ansicolor'
17
17
  s.add_dependency 'mime-types', '>= 1.19'
18
18
 
19
- s.version = '0.3.1'
19
+ s.version = '0.4.0'
20
20
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: suby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.1
4
+ version: 0.4.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-08-08 00:00:00.000000000 Z
12
+ date: 2012-10-05 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: path
16
- requirement: !ruby/object:Gem::Requirement
16
+ requirement: &2156184680 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,15 +21,10 @@ dependencies:
21
21
  version: 1.3.0
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: !ruby/object:Gem::Requirement
25
- none: false
26
- requirements:
27
- - - ! '>='
28
- - !ruby/object:Gem::Version
29
- version: 1.3.0
24
+ version_requirements: *2156184680
30
25
  - !ruby/object:Gem::Dependency
31
26
  name: nokogiri
32
- requirement: !ruby/object:Gem::Requirement
27
+ requirement: &2156184300 !ruby/object:Gem::Requirement
33
28
  none: false
34
29
  requirements:
35
30
  - - ! '>='
@@ -37,15 +32,10 @@ dependencies:
37
32
  version: '0'
38
33
  type: :runtime
39
34
  prerelease: false
40
- version_requirements: !ruby/object:Gem::Requirement
41
- none: false
42
- requirements:
43
- - - ! '>='
44
- - !ruby/object:Gem::Version
45
- version: '0'
35
+ version_requirements: *2156184300
46
36
  - !ruby/object:Gem::Dependency
47
37
  name: rubyzip
48
- requirement: !ruby/object:Gem::Requirement
38
+ requirement: &2156183760 !ruby/object:Gem::Requirement
49
39
  none: false
50
40
  requirements:
51
41
  - - ! '>='
@@ -53,15 +43,10 @@ dependencies:
53
43
  version: '0'
54
44
  type: :runtime
55
45
  prerelease: false
56
- version_requirements: !ruby/object:Gem::Requirement
57
- none: false
58
- requirements:
59
- - - ! '>='
60
- - !ruby/object:Gem::Version
61
- version: '0'
46
+ version_requirements: *2156183760
62
47
  - !ruby/object:Gem::Dependency
63
48
  name: term-ansicolor
64
- requirement: !ruby/object:Gem::Requirement
49
+ requirement: &2156183140 !ruby/object:Gem::Requirement
65
50
  none: false
66
51
  requirements:
67
52
  - - ! '>='
@@ -69,15 +54,10 @@ dependencies:
69
54
  version: '0'
70
55
  type: :runtime
71
56
  prerelease: false
72
- version_requirements: !ruby/object:Gem::Requirement
73
- none: false
74
- requirements:
75
- - - ! '>='
76
- - !ruby/object:Gem::Version
77
- version: '0'
57
+ version_requirements: *2156183140
78
58
  - !ruby/object:Gem::Dependency
79
59
  name: mime-types
80
- requirement: !ruby/object:Gem::Requirement
60
+ requirement: &2156182480 !ruby/object:Gem::Requirement
81
61
  none: false
82
62
  requirements:
83
63
  - - ! '>='
@@ -85,12 +65,7 @@ dependencies:
85
65
  version: '1.19'
86
66
  type: :runtime
87
67
  prerelease: false
88
- version_requirements: !ruby/object:Gem::Requirement
89
- none: false
90
- requirements:
91
- - - ! '>='
92
- - !ruby/object:Gem::Version
93
- version: '1.19'
68
+ version_requirements: *2156182480
94
69
  description: Find and download subtitles
95
70
  email: eregontp@gmail.com
96
71
  executables:
@@ -100,10 +75,12 @@ extra_rdoc_files: []
100
75
  files:
101
76
  - bin/suby
102
77
  - lib/suby/downloader/addic7ed.rb
78
+ - lib/suby/downloader/opensubtitles.rb
103
79
  - lib/suby/downloader/tvsubtitles.rb
104
80
  - lib/suby/downloader.rb
105
81
  - lib/suby/filename_parser.rb
106
82
  - lib/suby/interface.rb
83
+ - lib/suby/movie_hasher.rb
107
84
  - lib/suby.rb
108
85
  - .gitignore
109
86
  - README.md
@@ -128,9 +105,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
128
105
  version: '0'
129
106
  requirements: []
130
107
  rubyforge_project:
131
- rubygems_version: 1.8.23
108
+ rubygems_version: 1.8.11
132
109
  signing_key:
133
110
  specification_version: 3
134
111
  summary: Subtitles' downloader
135
112
  test_files: []
136
- has_rdoc: