suby 0.0.6 → 0.0.8

Sign up to get free protection for your applications and to get access to all the features.
data/bin/suby CHANGED
@@ -1,6 +1,6 @@
1
1
  #!/usr/bin/env ruby
2
2
 
3
- require 'suby'
3
+ require_relative '../lib/suby'
4
4
  require 'optparse'
5
5
 
6
6
  options = {}
data/lib/suby.rb CHANGED
@@ -13,7 +13,19 @@ module Suby
13
13
  File.exist? File.basename(file, File.extname(file)) + ".#{ext}" }
14
14
 
15
15
  begin
16
- Downloader.new(file, options[:lang]).download
16
+ success = Downloader::DOWNLOADERS.find do |downloader|
17
+ error = catch :downloader do
18
+ downloader.new(file, options[:lang]).download
19
+ :success
20
+ end
21
+ if error == :success
22
+ puts "#{downloader} found subtitles for #{file}"
23
+ else
24
+ puts "#{downloader} did not find subtitles for #{file} (#{error})"
25
+ end
26
+ error == :success
27
+ end
28
+ STDERR.puts "No downloader could find subtitles for #{file}" unless success
17
29
  rescue
18
30
  puts " The download of the subtitles failed for #{file}:"
19
31
  puts " #{$!.class}: #{$!.message}"
@@ -22,20 +34,20 @@ module Suby
22
34
  }
23
35
  end
24
36
 
25
- def extract_subs_from_archive(archive)
26
- case `file #{archive}`
27
- when /Zip archive data/
28
- subs = `unzip -qql #{archive}`.scan(/\d{2}:\d{2} (.+?(?:#{SUB_EXTENSIONS.join '|'}))$/).map(&:first)
29
- raise "no subtitles in #{archive}" if subs.empty?
30
- subs_for_unzip = subs.map { |sub| sub.gsub(/(\[|\])/) { "\\#{$1}" } }
31
- system 'unzip', archive, *subs_for_unzip, 1 => :close
32
- puts "found subtitles: #{subs.join(', ')}"
37
+ def extract_sub_from_archive(archive, format)
38
+ case format
39
+ when :zip
40
+ sub = `unzip -qql #{archive}`.scan(/\d{2}:\d{2} (.+?(?:#{SUB_EXTENSIONS.join '|'}))$/).map(&:first).first
41
+ raise "no subtitles in #{archive}" unless sub
42
+ sub_for_unzip = sub.gsub(/(\[|\])/) { "\\#{$1}" }
43
+ system 'unzip', archive, sub_for_unzip, 1 => :close
44
+ puts "found subtitle: #{sub}" if $VERBOSE
33
45
  else
34
46
  raise "unknown archive type (#{archive})"
35
47
  end
36
48
 
37
49
  # Cleaning
38
50
  File.unlink archive
39
- subs
51
+ sub
40
52
  end
41
53
  end
@@ -1,91 +1,63 @@
1
1
  require 'net/http'
2
+ require 'cgi/util'
2
3
  require 'nokogiri'
4
+ require_relative 'filename_parser'
3
5
 
4
6
  module Suby
5
7
  class Downloader
6
- SITE = 'www.tvsubtitles.net'
7
- SEARCH_URL = '/search.php'
8
-
9
- # cache
10
- SHOW_URLS = {}
11
- SHOW_PAGES = {}
8
+ DOWNLOADERS = []
9
+ def self.inherited(subclass)
10
+ DOWNLOADERS << subclass
11
+ end
12
12
 
13
13
  attr_reader :show, :season, :episode, :file, :lang
14
14
 
15
- def initialize file, lang = nil
16
- @file, @lang = file, lang || 'en'
17
- unless /^(?<show>.+) (?<season>\d{1,2})x(?<episode>\d{1,2})(?: - .+)?\.[a-z]+?$/ =~ file
18
- raise "wrong file format (#{file}). Must be:\n<show> <season>x<episode>[ - <title>].<ext>"
19
- end
20
- @show, @season, @episode = show, season.to_i, episode.to_i
15
+ def initialize(file, lang = nil)
16
+ @file, @lang = file, (lang || 'en').to_sym
17
+ @show, @season, @episode = FilenameParser.parse(file)
21
18
  end
22
19
 
23
20
  def http
24
- @http ||= Net::HTTP.new(SITE).start
21
+ @http ||= Net::HTTP.new(self.class::SITE).start
25
22
  end
26
23
 
27
- def show_url
28
- SHOW_URLS[show] ||= begin
29
- post = Net::HTTP::Post.new(SEARCH_URL)
30
- post.form_data = { 'q' => show }
31
- results = Nokogiri http.request(post).body
32
- url = results.css('ul li div a').first[:href]
33
-
34
- raise 'could not find the show' unless /^\/tvshow-(\d+)\.html$/ =~ url
35
- "/tvshow-#{$1}-#{season}.html"
36
- end
24
+ def get(path, initheader = {})
25
+ response = http.get(path, initheader)
26
+ raise "Invalid response for #{path}: #{response}" unless Net::HTTPSuccess === response
27
+ response.body
37
28
  end
38
29
 
39
- def episode_url
40
- @episode_url ||= begin
41
- SHOW_PAGES[show] ||= Nokogiri http.get(show_url).body
42
-
43
- url = nil
44
- SHOW_PAGES[show].css('div.left_articles table tr').find { |tr|
45
- tr.children.find { |td| td.name == 'td' && td.text =~ /\A#{season}x0?#{episode}\z/ }
46
- }.children.find { |td|
47
- td.children.find { |a|
48
- a.name == 'a' && a[:href].start_with?('episode') && url = a[:href]
49
- }
50
- }
51
- raise "invalid episode url: #{episode_url}" unless url =~ /^episode-(\d+)\.html$/
52
- "/episode-#{$1}-#{lang}.html"
30
+ def get_redirection(path, initheader = {})
31
+ response = http.get(path, initheader)
32
+ location = response['Location']
33
+ unless (Net::HTTPFound === response or Net::HTTPSuccess === response) and location
34
+ raise "Invalid response for #{path}: #{response}: location: #{location.inspect}"
53
35
  end
54
- end
55
-
56
- def subtitles_url
57
- @subtitles_url ||= begin
58
- subtitles = Nokogiri http.get(episode_url).body
59
-
60
- # TODO: choose 720p or most downloaded instead of first found
61
- url = subtitles.css('div.left_articles a').find { |a| a.name == 'a' && a[:href].start_with?('/subtitle') }[:href]
62
- raise 'invalid subtitle url' unless url =~ /^\/subtitle-(\d+)\.html/
63
- url
64
- end
65
- end
66
-
67
- def download_url
68
- @download_url ||= URI.escape '/' + http.get(subtitles_url.sub('subtitle', 'download'))['Location']
36
+ location
69
37
  end
70
38
 
71
39
  def download
72
- puts "Searching subtitles for #{file}:"
73
- puts "Show: #{show}, Season: #{season}, Episode: #{episode}"
74
-
75
- puts "show url: #{show_url}"
76
- puts "episode url: #{episode_url}"
77
- puts "subtitle url: #{subtitles_url}"
78
- puts "download url: #{download_url}"
40
+ extract download_url
41
+ end
79
42
 
80
- # extract
81
- zip = http.get(download_url).body
43
+ def extract(url)
44
+ contents = get(url)
82
45
  http.finish
83
- open(TEMP_ARCHIVE_NAME, 'wb') { |f| f.write zip }
84
- subs = Suby.extract_subs_from_archive(TEMP_ARCHIVE_NAME)
46
+ format = self.class::FORMAT
47
+ if format == :file
48
+ open(sub_name(url), 'wb') { |f| f.write contents }
49
+ else
50
+ open(TEMP_ARCHIVE_NAME, 'wb') { |f| f.write contents }
51
+ sub = Suby.extract_sub_from_archive(TEMP_ARCHIVE_NAME, format)
52
+ File.rename sub, sub_name(sub)
53
+ end
54
+ end
85
55
 
86
- new_name = File.basename(file, File.extname(file))+File.extname(subs.first)
87
- File.rename subs.first, new_name
88
- puts "Renaming to #{new_name}"
56
+ def sub_name(sub)
57
+ File.basename(file, File.extname(file)) + File.extname(sub)
89
58
  end
90
59
  end
91
60
  end
61
+
62
+ require_relative 'downloader/tvsubtitles'
63
+ require_relative 'downloader/addic7ed'
@@ -0,0 +1,27 @@
1
+ module Suby
2
+ class Downloader::Addic7ed < Downloader
3
+ SITE = 'www.addic7ed.com'
4
+ FORMAT = :file
5
+ LANG_IDS = {
6
+ en: 1,
7
+ es: 5,
8
+ fr: 8
9
+ }
10
+ FILTER_IGNORED = "Couldn't find any subs with the specified language. Filter ignored"
11
+
12
+ def download_url
13
+ subtitles_url = "/serie/#{CGI.escape show}/#{season}/#{episode}/#{LANG_IDS[lang]}"
14
+ response = http.get(subtitles_url)
15
+ throw :downloader, "show/season/episode not found" unless Net::HTTPSuccess === response
16
+ body = response.body
17
+ throw :downloader, "no subtitle available" if body.include? FILTER_IGNORED
18
+ download_url = Nokogiri(body).css('a').find { |a|
19
+ a[:href].start_with? '/original/' or
20
+ a[:href].start_with? '/updated/'
21
+ }[:href]
22
+ location = get_redirection download_url, 'Referer' => "http://#{SITE}#{subtitles_url}" # They check Referer
23
+ throw :downloader, "download exceeded" if location == '/downloadexceeded.php'
24
+ URI.escape location
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,73 @@
1
+ module Suby
2
+ class Downloader::TVSubtitles < Downloader
3
+ SITE = 'www.tvsubtitles.net'
4
+ FORMAT = :zip
5
+ SEARCH_URL = '/search.php'
6
+
7
+ # cache
8
+ SHOW_URLS = {}
9
+ SHOW_PAGES = {}
10
+
11
+ def show_url
12
+ SHOW_URLS[show] ||= begin
13
+ post = Net::HTTP::Post.new(SEARCH_URL)
14
+ post.form_data = { 'q' => show }
15
+ results = Nokogiri http.request(post).body
16
+ a = results.css('ul li div a').find { |a|
17
+ # "Show (2009-2011)" => "Show"
18
+ a.text.sub(/ \(\d{4}-\d{4}\)$/, '').casecmp(show) == 0
19
+ }
20
+ throw :downloader, "show not found" unless a
21
+ url = a[:href]
22
+
23
+ raise 'could not find the show' unless /^\/tvshow-(\d+)\.html$/ =~ url
24
+ "/tvshow-#{$1}.html"
25
+ end
26
+ end
27
+
28
+ def season_url
29
+ show_url.sub(/\.html$/, "-#{season}.html")
30
+ end
31
+
32
+ def episode_url
33
+ @episode_url ||= begin
34
+ SHOW_PAGES[show] ||= Nokogiri get season_url
35
+
36
+ season_text = /^Season #{season}$/
37
+ SHOW_PAGES[show].css('div.left_articles p.description b').find { |b|
38
+ b.text =~ season_text
39
+ } or throw :downloader, "season not found"
40
+
41
+ url = nil
42
+ SHOW_PAGES[show].css('div.left_articles table tr').find { |tr|
43
+ tr.children.find { |td| td.name == 'td' && td.text =~ /\A#{season}x0?#{episode}\z/ }
44
+ }.tap { |tr|
45
+ throw :downloader, "episode not found" unless tr
46
+ }.children.find { |td|
47
+ td.children.find { |a|
48
+ a.name == 'a' && a[:href].start_with?('episode') && url = a[:href]
49
+ }
50
+ }
51
+ raise "invalid episode url: #{episode_url}" unless url =~ /^episode-(\d+)\.html$/
52
+ "/episode-#{$1}-#{lang}.html"
53
+ end
54
+ end
55
+
56
+ def subtitles_url
57
+ @subtitles_url ||= begin
58
+ subtitles = Nokogiri get episode_url
59
+
60
+ # TODO: choose 720p or most downloaded instead of first found
61
+ a = subtitles.css('div.left_articles a').find { |a| a.name == 'a' && a[:href].start_with?('/subtitle') }
62
+ throw :downloader, "no subtitle available" unless a
63
+ url = a[:href]
64
+ raise 'invalid subtitle url' unless url =~ /^\/subtitle-(\d+)\.html/
65
+ url
66
+ end
67
+ end
68
+
69
+ def download_url
70
+ @download_url ||= URI.escape '/' + get_redirection(subtitles_url.sub('subtitle', 'download'))
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,105 @@
1
+ module Suby
2
+ module FilenameParser
3
+ extend self
4
+
5
+ # from tvnamer @ ab2c6c, with author's agreement, adapted
6
+ # See https://github.com/dbr/tvnamer/blob/master/tvnamer/config_defaults.py
7
+ FILENAME_PATTERNS = [
8
+ # foo.s0101
9
+ /^(?<show>.+?)
10
+ [ \._\-]
11
+ [Ss](?<season>[0-9]{2})
12
+ [\.\- ]?
13
+ (?<episode>[0-9]{2})
14
+ [^0-9]*$/x,
15
+
16
+ # foo.1x09*
17
+ /^(?<show>.+?)
18
+ [ \._\-]
19
+ \[?
20
+ (?<season>[0-9]+)
21
+ [xX]
22
+ (?<episode>[0-9]+)
23
+ \]?
24
+ [^\/]*$/x,
25
+
26
+ # foo.s01.e01, foo.s01_e01
27
+ /^(?<show>.+?)
28
+ [ \._\-]
29
+ \[?
30
+ [Ss](?<season>[0-9]+)[\. _-]?
31
+ [Ee]?(?<episode>[0-9]+)
32
+ \]?
33
+ [^\/]*$/x,
34
+
35
+ # foo - [01.09]
36
+ /^(?<show>.+?)
37
+ [ \._\-]?
38
+ \[
39
+ (?<season>[0-9]+?)
40
+ [.]
41
+ (?<episode>[0-9]+?)
42
+ \]
43
+ [ \._\-]?
44
+ [^\/]*$/x,
45
+
46
+ # Foo - S2 E 02 - etc
47
+ /^(?<show>.+?)
48
+ [ ]?[ \._\-][ ]?
49
+ [Ss](?<season>[0-9]+)[\.\- ]?
50
+ [Ee]?[ ]?(?<episode>[0-9]+)
51
+ [^\/]*$/x,
52
+
53
+ # Show - Episode 9999 [S 12 - Ep 131] - etc
54
+ /(?<show>.+)
55
+ [ ]-[ ]
56
+ [Ee]pisode[ ]\d+
57
+ [ ]
58
+ \[
59
+ [sS][ ]?(?<season>\d+)
60
+ ([ ]|[ ]-[ ]|-)
61
+ ([eE]|[eE]p)[ ]?(?<episode>\d+)
62
+ \]
63
+ .*$/x,
64
+
65
+ # foo.103*
66
+ /^(?<show>.+)
67
+ [ \._\-]
68
+ (?<season>[0-9]{1})
69
+ (?<episode>[0-9]{2})
70
+ [\._ -][^\/]*$/x,
71
+
72
+ # foo.0103*
73
+ /^(?<show>.+)
74
+ [ \._\-]
75
+ (?<season>[0-9]{2})
76
+ (?<episode>[0-9]{2,3})
77
+ [\._ -][^\/]*$/x
78
+ ]
79
+
80
+ def parse(file)
81
+ filename = File.basename(file)
82
+ FILENAME_PATTERNS.find { |pattern|
83
+ pattern =~ filename
84
+ } or raise "wrong file format (#{file})"
85
+ [clean_show_name($~[:show]), $~[:season].to_i, $~[:episode].to_i]
86
+ end
87
+
88
+ # from https://github.com/dbr/tvnamer/blob/master/tvnamer/utils.py#L78-95
89
+ # Cleans up series name by removing any . and _
90
+ # characters, along with any trailing hyphens.
91
+ #
92
+ # Is basically equivalent to replacing all _ and . with a
93
+ # space, but handles decimal numbers in string.
94
+ #
95
+ # clean_show_name("an.example.1.0.test") # => "an example 1.0 test"
96
+ # clean_show_name("an_example_1.0_test") # => "an example 1.0 test"
97
+ def clean_show_name show
98
+ show.gsub! /(?<!\d)[.]|[.](?!\d)/, ' '
99
+ show.tr! '_', ' '
100
+ show.chomp! '-'
101
+ show.strip!
102
+ show
103
+ end
104
+ end
105
+ end
data/suby.gemspec CHANGED
@@ -10,5 +10,5 @@ Gem::Specification.new do |s|
10
10
  s.required_ruby_version = '>= 1.9.2'
11
11
  s.add_dependency 'nokogiri'
12
12
 
13
- s.version = '0.0.6'
13
+ s.version = '0.0.8'
14
14
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: suby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.0.8
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,11 +9,11 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2011-07-13 00:00:00.000000000 Z
12
+ date: 2011-07-27 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri
16
- requirement: &2153880860 !ruby/object:Gem::Requirement
16
+ requirement: &2157439960 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,7 +21,7 @@ dependencies:
21
21
  version: '0'
22
22
  type: :runtime
23
23
  prerelease: false
24
- version_requirements: *2153880860
24
+ version_requirements: *2157439960
25
25
  description: Find and download subtitles
26
26
  email:
27
27
  executables:
@@ -30,7 +30,10 @@ extensions: []
30
30
  extra_rdoc_files: []
31
31
  files:
32
32
  - bin/suby
33
+ - lib/suby/downloader/addic7ed.rb
34
+ - lib/suby/downloader/tvsubtitles.rb
33
35
  - lib/suby/downloader.rb
36
+ - lib/suby/filename_parser.rb
34
37
  - lib/suby.rb
35
38
  - .gitignore
36
39
  - README.md