suby 0.0.6 → 0.0.8

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/bin/suby CHANGED
@@ -1,6 +1,6 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require 'suby'
3
+ require_relative '../lib/suby'
4
4
  require 'optparse'
5
5
 
6
6
  options = {}
data/lib/suby.rb CHANGED
@@ -13,7 +13,19 @@ module Suby
13
13
  File.exist? File.basename(file, File.extname(file)) + ".#{ext}" }
14
14
 
15
15
  begin
16
- Downloader.new(file, options[:lang]).download
16
+ success = Downloader::DOWNLOADERS.find do |downloader|
17
+ error = catch :downloader do
18
+ downloader.new(file, options[:lang]).download
19
+ :success
20
+ end
21
+ if error == :success
22
+ puts "#{downloader} found subtitles for #{file}"
23
+ else
24
+ puts "#{downloader} did not find subtitles for #{file} (#{error})"
25
+ end
26
+ error == :success
27
+ end
28
+ STDERR.puts "No downloader could find subtitles for #{file}" unless success
17
29
  rescue
18
30
  puts " The download of the subtitles failed for #{file}:"
19
31
  puts " #{$!.class}: #{$!.message}"
@@ -22,20 +34,20 @@ module Suby
22
34
  }
23
35
  end
24
36
 
25
- def extract_subs_from_archive(archive)
26
- case `file #{archive}`
27
- when /Zip archive data/
28
- subs = `unzip -qql #{archive}`.scan(/\d{2}:\d{2} (.+?(?:#{SUB_EXTENSIONS.join '|'}))$/).map(&:first)
29
- raise "no subtitles in #{archive}" if subs.empty?
30
- subs_for_unzip = subs.map { |sub| sub.gsub(/(\[|\])/) { "\\#{$1}" } }
31
- system 'unzip', archive, *subs_for_unzip, 1 => :close
32
- puts "found subtitles: #{subs.join(', ')}"
37
+ def extract_sub_from_archive(archive, format)
38
+ case format
39
+ when :zip
40
+ sub = `unzip -qql #{archive}`.scan(/\d{2}:\d{2} (.+?(?:#{SUB_EXTENSIONS.join '|'}))$/).map(&:first).first
41
+ raise "no subtitles in #{archive}" unless sub
42
+ sub_for_unzip = sub.gsub(/(\[|\])/) { "\\#{$1}" }
43
+ system 'unzip', archive, sub_for_unzip, 1 => :close
44
+ puts "found subtitle: #{sub}" if $VERBOSE
33
45
  else
34
46
  raise "unknown archive type (#{archive})"
35
47
  end
36
48
 
37
49
  # Cleaning
38
50
  File.unlink archive
39
- subs
51
+ sub
40
52
  end
41
53
  end
@@ -1,91 +1,63 @@
1
1
  require 'net/http'
2
+ require 'cgi/util'
2
3
  require 'nokogiri'
4
+ require_relative 'filename_parser'
3
5
 
4
6
  module Suby
5
7
  class Downloader
6
- SITE = 'www.tvsubtitles.net'
7
- SEARCH_URL = '/search.php'
8
-
9
- # cache
10
- SHOW_URLS = {}
11
- SHOW_PAGES = {}
8
+ DOWNLOADERS = []
9
+ def self.inherited(subclass)
10
+ DOWNLOADERS << subclass
11
+ end
12
12
 
13
13
  attr_reader :show, :season, :episode, :file, :lang
14
14
 
15
- def initialize file, lang = nil
16
- @file, @lang = file, lang || 'en'
17
- unless /^(?<show>.+) (?<season>\d{1,2})x(?<episode>\d{1,2})(?: - .+)?\.[a-z]+?$/ =~ file
18
- raise "wrong file format (#{file}). Must be:\n<show> <season>x<episode>[ - <title>].<ext>"
19
- end
20
- @show, @season, @episode = show, season.to_i, episode.to_i
15
+ def initialize(file, lang = nil)
16
+ @file, @lang = file, (lang || 'en').to_sym
17
+ @show, @season, @episode = FilenameParser.parse(file)
21
18
  end
22
19
 
23
20
  def http
24
- @http ||= Net::HTTP.new(SITE).start
21
+ @http ||= Net::HTTP.new(self.class::SITE).start
25
22
  end
26
23
 
27
- def show_url
28
- SHOW_URLS[show] ||= begin
29
- post = Net::HTTP::Post.new(SEARCH_URL)
30
- post.form_data = { 'q' => show }
31
- results = Nokogiri http.request(post).body
32
- url = results.css('ul li div a').first[:href]
33
-
34
- raise 'could not find the show' unless /^\/tvshow-(\d+)\.html$/ =~ url
35
- "/tvshow-#{$1}-#{season}.html"
36
- end
24
+ def get(path, initheader = {})
25
+ response = http.get(path, initheader)
26
+ raise "Invalid response for #{path}: #{response}" unless Net::HTTPSuccess === response
27
+ response.body
37
28
  end
38
29
 
39
- def episode_url
40
- @episode_url ||= begin
41
- SHOW_PAGES[show] ||= Nokogiri http.get(show_url).body
42
-
43
- url = nil
44
- SHOW_PAGES[show].css('div.left_articles table tr').find { |tr|
45
- tr.children.find { |td| td.name == 'td' && td.text =~ /\A#{season}x0?#{episode}\z/ }
46
- }.children.find { |td|
47
- td.children.find { |a|
48
- a.name == 'a' && a[:href].start_with?('episode') && url = a[:href]
49
- }
50
- }
51
- raise "invalid episode url: #{episode_url}" unless url =~ /^episode-(\d+)\.html$/
52
- "/episode-#{$1}-#{lang}.html"
30
+ def get_redirection(path, initheader = {})
31
+ response = http.get(path, initheader)
32
+ location = response['Location']
33
+ unless (Net::HTTPFound === response or Net::HTTPSuccess === response) and location
34
+ raise "Invalid response for #{path}: #{response}: location: #{location.inspect}"
53
35
  end
54
- end
55
-
56
- def subtitles_url
57
- @subtitles_url ||= begin
58
- subtitles = Nokogiri http.get(episode_url).body
59
-
60
- # TODO: choose 720p or most downloaded instead of first found
61
- url = subtitles.css('div.left_articles a').find { |a| a.name == 'a' && a[:href].start_with?('/subtitle') }[:href]
62
- raise 'invalid subtitle url' unless url =~ /^\/subtitle-(\d+)\.html/
63
- url
64
- end
65
- end
66
-
67
- def download_url
68
- @download_url ||= URI.escape '/' + http.get(subtitles_url.sub('subtitle', 'download'))['Location']
36
+ location
69
37
  end
70
38
 
71
39
  def download
72
- puts "Searching subtitles for #{file}:"
73
- puts "Show: #{show}, Season: #{season}, Episode: #{episode}"
74
-
75
- puts "show url: #{show_url}"
76
- puts "episode url: #{episode_url}"
77
- puts "subtitle url: #{subtitles_url}"
78
- puts "download url: #{download_url}"
40
+ extract download_url
41
+ end
79
42
 
80
- # extract
81
- zip = http.get(download_url).body
43
+ def extract(url)
44
+ contents = get(url)
82
45
  http.finish
83
- open(TEMP_ARCHIVE_NAME, 'wb') { |f| f.write zip }
84
- subs = Suby.extract_subs_from_archive(TEMP_ARCHIVE_NAME)
46
+ format = self.class::FORMAT
47
+ if format == :file
48
+ open(sub_name(url), 'wb') { |f| f.write contents }
49
+ else
50
+ open(TEMP_ARCHIVE_NAME, 'wb') { |f| f.write contents }
51
+ sub = Suby.extract_sub_from_archive(TEMP_ARCHIVE_NAME, format)
52
+ File.rename sub, sub_name(sub)
53
+ end
54
+ end
85
55
 
86
- new_name = File.basename(file, File.extname(file))+File.extname(subs.first)
87
- File.rename subs.first, new_name
88
- puts "Renaming to #{new_name}"
56
+ def sub_name(sub)
57
+ File.basename(file, File.extname(file)) + File.extname(sub)
89
58
  end
90
59
  end
91
60
  end
61
+
62
+ require_relative 'downloader/tvsubtitles'
63
+ require_relative 'downloader/addic7ed'
@@ -0,0 +1,27 @@
1
+ module Suby
2
+ class Downloader::Addic7ed < Downloader
3
+ SITE = 'www.addic7ed.com'
4
+ FORMAT = :file
5
+ LANG_IDS = {
6
+ en: 1,
7
+ es: 5,
8
+ fr: 8
9
+ }
10
+ FILTER_IGNORED = "Couldn't find any subs with the specified language. Filter ignored"
11
+
12
+ def download_url
13
+ subtitles_url = "/serie/#{CGI.escape show}/#{season}/#{episode}/#{LANG_IDS[lang]}"
14
+ response = http.get(subtitles_url)
15
+ throw :downloader, "show/season/episode not found" unless Net::HTTPSuccess === response
16
+ body = response.body
17
+ throw :downloader, "no subtitle available" if body.include? FILTER_IGNORED
18
+ download_url = Nokogiri(body).css('a').find { |a|
19
+ a[:href].start_with? '/original/' or
20
+ a[:href].start_with? '/updated/'
21
+ }[:href]
22
+ location = get_redirection download_url, 'Referer' => "http://#{SITE}#{subtitles_url}" # They check Referer
23
+ throw :downloader, "download exceeded" if location == '/downloadexceeded.php'
24
+ URI.escape location
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,73 @@
1
+ module Suby
2
+ class Downloader::TVSubtitles < Downloader
3
+ SITE = 'www.tvsubtitles.net'
4
+ FORMAT = :zip
5
+ SEARCH_URL = '/search.php'
6
+
7
+ # cache
8
+ SHOW_URLS = {}
9
+ SHOW_PAGES = {}
10
+
11
+ def show_url
12
+ SHOW_URLS[show] ||= begin
13
+ post = Net::HTTP::Post.new(SEARCH_URL)
14
+ post.form_data = { 'q' => show }
15
+ results = Nokogiri http.request(post).body
16
+ a = results.css('ul li div a').find { |a|
17
+ # "Show (2009-2011)" => "Show"
18
+ a.text.sub(/ \(\d{4}-\d{4}\)$/, '').casecmp(show) == 0
19
+ }
20
+ throw :downloader, "show not found" unless a
21
+ url = a[:href]
22
+
23
+ raise 'could not find the show' unless /^\/tvshow-(\d+)\.html$/ =~ url
24
+ "/tvshow-#{$1}.html"
25
+ end
26
+ end
27
+
28
+ def season_url
29
+ show_url.sub(/\.html$/, "-#{season}.html")
30
+ end
31
+
32
+ def episode_url
33
+ @episode_url ||= begin
34
+ SHOW_PAGES[show] ||= Nokogiri get season_url
35
+
36
+ season_text = /^Season #{season}$/
37
+ SHOW_PAGES[show].css('div.left_articles p.description b').find { |b|
38
+ b.text =~ season_text
39
+ } or throw :downloader, "season not found"
40
+
41
+ url = nil
42
+ SHOW_PAGES[show].css('div.left_articles table tr').find { |tr|
43
+ tr.children.find { |td| td.name == 'td' && td.text =~ /\A#{season}x0?#{episode}\z/ }
44
+ }.tap { |tr|
45
+ throw :downloader, "episode not found" unless tr
46
+ }.children.find { |td|
47
+ td.children.find { |a|
48
+ a.name == 'a' && a[:href].start_with?('episode') && url = a[:href]
49
+ }
50
+ }
51
+ raise "invalid episode url: #{episode_url}" unless url =~ /^episode-(\d+)\.html$/
52
+ "/episode-#{$1}-#{lang}.html"
53
+ end
54
+ end
55
+
56
+ def subtitles_url
57
+ @subtitles_url ||= begin
58
+ subtitles = Nokogiri get episode_url
59
+
60
+ # TODO: choose 720p or most downloaded instead of first found
61
+ a = subtitles.css('div.left_articles a').find { |a| a.name == 'a' && a[:href].start_with?('/subtitle') }
62
+ throw :downloader, "no subtitle available" unless a
63
+ url = a[:href]
64
+ raise 'invalid subtitle url' unless url =~ /^\/subtitle-(\d+)\.html/
65
+ url
66
+ end
67
+ end
68
+
69
+ def download_url
70
+ @download_url ||= URI.escape '/' + get_redirection(subtitles_url.sub('subtitle', 'download'))
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,105 @@
1
+ module Suby
2
+ module FilenameParser
3
+ extend self
4
+
5
+ # from tvnamer @ ab2c6c, with author's agreement, adapted
6
+ # See https://github.com/dbr/tvnamer/blob/master/tvnamer/config_defaults.py
7
+ FILENAME_PATTERNS = [
8
+ # foo.s0101
9
+ /^(?<show>.+?)
10
+ [ \._\-]
11
+ [Ss](?<season>[0-9]{2})
12
+ [\.\- ]?
13
+ (?<episode>[0-9]{2})
14
+ [^0-9]*$/x,
15
+
16
+ # foo.1x09*
17
+ /^(?<show>.+?)
18
+ [ \._\-]
19
+ \[?
20
+ (?<season>[0-9]+)
21
+ [xX]
22
+ (?<episode>[0-9]+)
23
+ \]?
24
+ [^\/]*$/x,
25
+
26
+ # foo.s01.e01, foo.s01_e01
27
+ /^(?<show>.+?)
28
+ [ \._\-]
29
+ \[?
30
+ [Ss](?<season>[0-9]+)[\. _-]?
31
+ [Ee]?(?<episode>[0-9]+)
32
+ \]?
33
+ [^\/]*$/x,
34
+
35
+ # foo - [01.09]
36
+ /^(?<show>.+?)
37
+ [ \._\-]?
38
+ \[
39
+ (?<season>[0-9]+?)
40
+ [.]
41
+ (?<episode>[0-9]+?)
42
+ \]
43
+ [ \._\-]?
44
+ [^\/]*$/x,
45
+
46
+ # Foo - S2 E 02 - etc
47
+ /^(?<show>.+?)
48
+ [ ]?[ \._\-][ ]?
49
+ [Ss](?<season>[0-9]+)[\.\- ]?
50
+ [Ee]?[ ]?(?<episode>[0-9]+)
51
+ [^\/]*$/x,
52
+
53
+ # Show - Episode 9999 [S 12 - Ep 131] - etc
54
+ /(?<show>.+)
55
+ [ ]-[ ]
56
+ [Ee]pisode[ ]\d+
57
+ [ ]
58
+ \[
59
+ [sS][ ]?(?<season>\d+)
60
+ ([ ]|[ ]-[ ]|-)
61
+ ([eE]|[eE]p)[ ]?(?<episode>\d+)
62
+ \]
63
+ .*$/x,
64
+
65
+ # foo.103*
66
+ /^(?<show>.+)
67
+ [ \._\-]
68
+ (?<season>[0-9]{1})
69
+ (?<episode>[0-9]{2})
70
+ [\._ -][^\/]*$/x,
71
+
72
+ # foo.0103*
73
+ /^(?<show>.+)
74
+ [ \._\-]
75
+ (?<season>[0-9]{2})
76
+ (?<episode>[0-9]{2,3})
77
+ [\._ -][^\/]*$/x
78
+ ]
79
+
80
+ def parse(file)
81
+ filename = File.basename(file)
82
+ FILENAME_PATTERNS.find { |pattern|
83
+ pattern =~ filename
84
+ } or raise "wrong file format (#{file})"
85
+ [clean_show_name($~[:show]), $~[:season].to_i, $~[:episode].to_i]
86
+ end
87
+
88
+ # from https://github.com/dbr/tvnamer/blob/master/tvnamer/utils.py#L78-95
89
+ # Cleans up series name by removing any . and _
90
+ # characters, along with any trailing hyphens.
91
+ #
92
+ # Is basically equivalent to replacing all _ and . with a
93
+ # space, but handles decimal numbers in string.
94
+ #
95
+ # clean_show_name("an.example.1.0.test") # => "an example 1.0 test"
96
+ # clean_show_name("an_example_1.0_test") # => "an example 1.0 test"
97
+ def clean_show_name show
98
+ show.gsub! /(?<!\d)[.]|[.](?!\d)/, ' '
99
+ show.tr! '_', ' '
100
+ show.chomp! '-'
101
+ show.strip!
102
+ show
103
+ end
104
+ end
105
+ end
data/suby.gemspec CHANGED
@@ -10,5 +10,5 @@ Gem::Specification.new do |s|
10
10
  s.required_ruby_version = '>= 1.9.2'
11
11
  s.add_dependency 'nokogiri'
12
12
 
13
- s.version = '0.0.6'
13
+ s.version = '0.0.8'
14
14
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: suby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.0.8
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-07-13 00:00:00.000000000 Z
12
+ date: 2011-07-27 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri
16
- requirement: &2153880860 !ruby/object:Gem::Requirement
16
+ requirement: &2157439960 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,7 +21,7 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *2153880860
24
+ version_requirements: *2157439960
25
25
  description: Find and download subtitles
26
26
  email:
27
27
  executables:
@@ -30,7 +30,10 @@ extensions: []
30
30
  extra_rdoc_files: []
31
31
  files:
32
32
  - bin/suby
33
+ - lib/suby/downloader/addic7ed.rb
34
+ - lib/suby/downloader/tvsubtitles.rb
33
35
  - lib/suby/downloader.rb
36
+ - lib/suby/filename_parser.rb
34
37
  - lib/suby.rb
35
38
  - .gitignore
36
39
  - README.md