suby 0.0.6 → 0.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/bin/suby +1 -1
- data/lib/suby.rb +22 -10
- data/lib/suby/downloader.rb +38 -66
- data/lib/suby/downloader/addic7ed.rb +27 -0
- data/lib/suby/downloader/tvsubtitles.rb +73 -0
- data/lib/suby/filename_parser.rb +105 -0
- data/suby.gemspec +1 -1
- metadata +7 -4
data/bin/suby
CHANGED
data/lib/suby.rb
CHANGED
@@ -13,7 +13,19 @@ module Suby
|
|
13
13
|
File.exist? File.basename(file, File.extname(file)) + ".#{ext}" }
|
14
14
|
|
15
15
|
begin
|
16
|
-
Downloader.
|
16
|
+
success = Downloader::DOWNLOADERS.find do |downloader|
|
17
|
+
error = catch :downloader do
|
18
|
+
downloader.new(file, options[:lang]).download
|
19
|
+
:success
|
20
|
+
end
|
21
|
+
if error == :success
|
22
|
+
puts "#{downloader} found subtitles for #{file}"
|
23
|
+
else
|
24
|
+
puts "#{downloader} did not find subtitles for #{file} (#{error})"
|
25
|
+
end
|
26
|
+
error == :success
|
27
|
+
end
|
28
|
+
STDERR.puts "No downloader could find subtitles for #{file}" unless success
|
17
29
|
rescue
|
18
30
|
puts " The download of the subtitles failed for #{file}:"
|
19
31
|
puts " #{$!.class}: #{$!.message}"
|
@@ -22,20 +34,20 @@ module Suby
|
|
22
34
|
}
|
23
35
|
end
|
24
36
|
|
25
|
-
def
|
26
|
-
case
|
27
|
-
when
|
28
|
-
|
29
|
-
raise "no subtitles in #{archive}"
|
30
|
-
|
31
|
-
system 'unzip', archive,
|
32
|
-
puts "found
|
37
|
+
def extract_sub_from_archive(archive, format)
|
38
|
+
case format
|
39
|
+
when :zip
|
40
|
+
sub = `unzip -qql #{archive}`.scan(/\d{2}:\d{2} (.+?(?:#{SUB_EXTENSIONS.join '|'}))$/).map(&:first).first
|
41
|
+
raise "no subtitles in #{archive}" unless sub
|
42
|
+
sub_for_unzip = sub.gsub(/(\[|\])/) { "\\#{$1}" }
|
43
|
+
system 'unzip', archive, sub_for_unzip, 1 => :close
|
44
|
+
puts "found subtitle: #{sub}" if $VERBOSE
|
33
45
|
else
|
34
46
|
raise "unknown archive type (#{archive})"
|
35
47
|
end
|
36
48
|
|
37
49
|
# Cleaning
|
38
50
|
File.unlink archive
|
39
|
-
|
51
|
+
sub
|
40
52
|
end
|
41
53
|
end
|
data/lib/suby/downloader.rb
CHANGED
@@ -1,91 +1,63 @@
|
|
1
1
|
require 'net/http'
|
2
|
+
require 'cgi/util'
|
2
3
|
require 'nokogiri'
|
4
|
+
require_relative 'filename_parser'
|
3
5
|
|
4
6
|
module Suby
|
5
7
|
class Downloader
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
SHOW_URLS = {}
|
11
|
-
SHOW_PAGES = {}
|
8
|
+
DOWNLOADERS = []
|
9
|
+
def self.inherited(subclass)
|
10
|
+
DOWNLOADERS << subclass
|
11
|
+
end
|
12
12
|
|
13
13
|
attr_reader :show, :season, :episode, :file, :lang
|
14
14
|
|
15
|
-
def initialize
|
16
|
-
@file, @lang = file, lang || 'en'
|
17
|
-
|
18
|
-
raise "wrong file format (#{file}). Must be:\n<show> <season>x<episode>[ - <title>].<ext>"
|
19
|
-
end
|
20
|
-
@show, @season, @episode = show, season.to_i, episode.to_i
|
15
|
+
def initialize(file, lang = nil)
|
16
|
+
@file, @lang = file, (lang || 'en').to_sym
|
17
|
+
@show, @season, @episode = FilenameParser.parse(file)
|
21
18
|
end
|
22
19
|
|
23
20
|
def http
|
24
|
-
@http ||= Net::HTTP.new(SITE).start
|
21
|
+
@http ||= Net::HTTP.new(self.class::SITE).start
|
25
22
|
end
|
26
23
|
|
27
|
-
def
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
results = Nokogiri http.request(post).body
|
32
|
-
url = results.css('ul li div a').first[:href]
|
33
|
-
|
34
|
-
raise 'could not find the show' unless /^\/tvshow-(\d+)\.html$/ =~ url
|
35
|
-
"/tvshow-#{$1}-#{season}.html"
|
36
|
-
end
|
24
|
+
def get(path, initheader = {})
|
25
|
+
response = http.get(path, initheader)
|
26
|
+
raise "Invalid response for #{path}: #{response}" unless Net::HTTPSuccess === response
|
27
|
+
response.body
|
37
28
|
end
|
38
29
|
|
39
|
-
def
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
SHOW_PAGES[show].css('div.left_articles table tr').find { |tr|
|
45
|
-
tr.children.find { |td| td.name == 'td' && td.text =~ /\A#{season}x0?#{episode}\z/ }
|
46
|
-
}.children.find { |td|
|
47
|
-
td.children.find { |a|
|
48
|
-
a.name == 'a' && a[:href].start_with?('episode') && url = a[:href]
|
49
|
-
}
|
50
|
-
}
|
51
|
-
raise "invalid episode url: #{episode_url}" unless url =~ /^episode-(\d+)\.html$/
|
52
|
-
"/episode-#{$1}-#{lang}.html"
|
30
|
+
def get_redirection(path, initheader = {})
|
31
|
+
response = http.get(path, initheader)
|
32
|
+
location = response['Location']
|
33
|
+
unless (Net::HTTPFound === response or Net::HTTPSuccess === response) and location
|
34
|
+
raise "Invalid response for #{path}: #{response}: location: #{location.inspect}"
|
53
35
|
end
|
54
|
-
|
55
|
-
|
56
|
-
def subtitles_url
|
57
|
-
@subtitles_url ||= begin
|
58
|
-
subtitles = Nokogiri http.get(episode_url).body
|
59
|
-
|
60
|
-
# TODO: choose 720p or most downloaded instead of first found
|
61
|
-
url = subtitles.css('div.left_articles a').find { |a| a.name == 'a' && a[:href].start_with?('/subtitle') }[:href]
|
62
|
-
raise 'invalid subtitle url' unless url =~ /^\/subtitle-(\d+)\.html/
|
63
|
-
url
|
64
|
-
end
|
65
|
-
end
|
66
|
-
|
67
|
-
def download_url
|
68
|
-
@download_url ||= URI.escape '/' + http.get(subtitles_url.sub('subtitle', 'download'))['Location']
|
36
|
+
location
|
69
37
|
end
|
70
38
|
|
71
39
|
def download
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
puts "show url: #{show_url}"
|
76
|
-
puts "episode url: #{episode_url}"
|
77
|
-
puts "subtitle url: #{subtitles_url}"
|
78
|
-
puts "download url: #{download_url}"
|
40
|
+
extract download_url
|
41
|
+
end
|
79
42
|
|
80
|
-
|
81
|
-
|
43
|
+
def extract(url)
|
44
|
+
contents = get(url)
|
82
45
|
http.finish
|
83
|
-
|
84
|
-
|
46
|
+
format = self.class::FORMAT
|
47
|
+
if format == :file
|
48
|
+
open(sub_name(url), 'wb') { |f| f.write contents }
|
49
|
+
else
|
50
|
+
open(TEMP_ARCHIVE_NAME, 'wb') { |f| f.write contents }
|
51
|
+
sub = Suby.extract_sub_from_archive(TEMP_ARCHIVE_NAME, format)
|
52
|
+
File.rename sub, sub_name(sub)
|
53
|
+
end
|
54
|
+
end
|
85
55
|
|
86
|
-
|
87
|
-
File.
|
88
|
-
puts "Renaming to #{new_name}"
|
56
|
+
def sub_name(sub)
|
57
|
+
File.basename(file, File.extname(file)) + File.extname(sub)
|
89
58
|
end
|
90
59
|
end
|
91
60
|
end
|
61
|
+
|
62
|
+
require_relative 'downloader/tvsubtitles'
|
63
|
+
require_relative 'downloader/addic7ed'
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Suby
|
2
|
+
class Downloader::Addic7ed < Downloader
|
3
|
+
SITE = 'www.addic7ed.com'
|
4
|
+
FORMAT = :file
|
5
|
+
LANG_IDS = {
|
6
|
+
en: 1,
|
7
|
+
es: 5,
|
8
|
+
fr: 8
|
9
|
+
}
|
10
|
+
FILTER_IGNORED = "Couldn't find any subs with the specified language. Filter ignored"
|
11
|
+
|
12
|
+
def download_url
|
13
|
+
subtitles_url = "/serie/#{CGI.escape show}/#{season}/#{episode}/#{LANG_IDS[lang]}"
|
14
|
+
response = http.get(subtitles_url)
|
15
|
+
throw :downloader, "show/season/episode not found" unless Net::HTTPSuccess === response
|
16
|
+
body = response.body
|
17
|
+
throw :downloader, "no subtitle available" if body.include? FILTER_IGNORED
|
18
|
+
download_url = Nokogiri(body).css('a').find { |a|
|
19
|
+
a[:href].start_with? '/original/' or
|
20
|
+
a[:href].start_with? '/updated/'
|
21
|
+
}[:href]
|
22
|
+
location = get_redirection download_url, 'Referer' => "http://#{SITE}#{subtitles_url}" # They check Referer
|
23
|
+
throw :downloader, "download exceeded" if location == '/downloadexceeded.php'
|
24
|
+
URI.escape location
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
module Suby
|
2
|
+
class Downloader::TVSubtitles < Downloader
|
3
|
+
SITE = 'www.tvsubtitles.net'
|
4
|
+
FORMAT = :zip
|
5
|
+
SEARCH_URL = '/search.php'
|
6
|
+
|
7
|
+
# cache
|
8
|
+
SHOW_URLS = {}
|
9
|
+
SHOW_PAGES = {}
|
10
|
+
|
11
|
+
def show_url
|
12
|
+
SHOW_URLS[show] ||= begin
|
13
|
+
post = Net::HTTP::Post.new(SEARCH_URL)
|
14
|
+
post.form_data = { 'q' => show }
|
15
|
+
results = Nokogiri http.request(post).body
|
16
|
+
a = results.css('ul li div a').find { |a|
|
17
|
+
# "Show (2009-2011)" => "Show"
|
18
|
+
a.text.sub(/ \(\d{4}-\d{4}\)$/, '').casecmp(show) == 0
|
19
|
+
}
|
20
|
+
throw :downloader, "show not found" unless a
|
21
|
+
url = a[:href]
|
22
|
+
|
23
|
+
raise 'could not find the show' unless /^\/tvshow-(\d+)\.html$/ =~ url
|
24
|
+
"/tvshow-#{$1}.html"
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def season_url
|
29
|
+
show_url.sub(/\.html$/, "-#{season}.html")
|
30
|
+
end
|
31
|
+
|
32
|
+
def episode_url
|
33
|
+
@episode_url ||= begin
|
34
|
+
SHOW_PAGES[show] ||= Nokogiri get season_url
|
35
|
+
|
36
|
+
season_text = /^Season #{season}$/
|
37
|
+
SHOW_PAGES[show].css('div.left_articles p.description b').find { |b|
|
38
|
+
b.text =~ season_text
|
39
|
+
} or throw :downloader, "season not found"
|
40
|
+
|
41
|
+
url = nil
|
42
|
+
SHOW_PAGES[show].css('div.left_articles table tr').find { |tr|
|
43
|
+
tr.children.find { |td| td.name == 'td' && td.text =~ /\A#{season}x0?#{episode}\z/ }
|
44
|
+
}.tap { |tr|
|
45
|
+
throw :downloader, "episode not found" unless tr
|
46
|
+
}.children.find { |td|
|
47
|
+
td.children.find { |a|
|
48
|
+
a.name == 'a' && a[:href].start_with?('episode') && url = a[:href]
|
49
|
+
}
|
50
|
+
}
|
51
|
+
raise "invalid episode url: #{episode_url}" unless url =~ /^episode-(\d+)\.html$/
|
52
|
+
"/episode-#{$1}-#{lang}.html"
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def subtitles_url
|
57
|
+
@subtitles_url ||= begin
|
58
|
+
subtitles = Nokogiri get episode_url
|
59
|
+
|
60
|
+
# TODO: choose 720p or most downloaded instead of first found
|
61
|
+
a = subtitles.css('div.left_articles a').find { |a| a.name == 'a' && a[:href].start_with?('/subtitle') }
|
62
|
+
throw :downloader, "no subtitle available" unless a
|
63
|
+
url = a[:href]
|
64
|
+
raise 'invalid subtitle url' unless url =~ /^\/subtitle-(\d+)\.html/
|
65
|
+
url
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def download_url
|
70
|
+
@download_url ||= URI.escape '/' + get_redirection(subtitles_url.sub('subtitle', 'download'))
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,105 @@
|
|
1
|
+
module Suby
|
2
|
+
module FilenameParser
|
3
|
+
extend self
|
4
|
+
|
5
|
+
# from tvnamer @ ab2c6c, with author's agreement, adapted
|
6
|
+
# See https://github.com/dbr/tvnamer/blob/master/tvnamer/config_defaults.py
|
7
|
+
FILENAME_PATTERNS = [
|
8
|
+
# foo.s0101
|
9
|
+
/^(?<show>.+?)
|
10
|
+
[ \._\-]
|
11
|
+
[Ss](?<season>[0-9]{2})
|
12
|
+
[\.\- ]?
|
13
|
+
(?<episode>[0-9]{2})
|
14
|
+
[^0-9]*$/x,
|
15
|
+
|
16
|
+
# foo.1x09*
|
17
|
+
/^(?<show>.+?)
|
18
|
+
[ \._\-]
|
19
|
+
\[?
|
20
|
+
(?<season>[0-9]+)
|
21
|
+
[xX]
|
22
|
+
(?<episode>[0-9]+)
|
23
|
+
\]?
|
24
|
+
[^\/]*$/x,
|
25
|
+
|
26
|
+
# foo.s01.e01, foo.s01_e01
|
27
|
+
/^(?<show>.+?)
|
28
|
+
[ \._\-]
|
29
|
+
\[?
|
30
|
+
[Ss](?<season>[0-9]+)[\. _-]?
|
31
|
+
[Ee]?(?<episode>[0-9]+)
|
32
|
+
\]?
|
33
|
+
[^\/]*$/x,
|
34
|
+
|
35
|
+
# foo - [01.09]
|
36
|
+
/^(?<show>.+?)
|
37
|
+
[ \._\-]?
|
38
|
+
\[
|
39
|
+
(?<season>[0-9]+?)
|
40
|
+
[.]
|
41
|
+
(?<episode>[0-9]+?)
|
42
|
+
\]
|
43
|
+
[ \._\-]?
|
44
|
+
[^\/]*$/x,
|
45
|
+
|
46
|
+
# Foo - S2 E 02 - etc
|
47
|
+
/^(?<show>.+?)
|
48
|
+
[ ]?[ \._\-][ ]?
|
49
|
+
[Ss](?<season>[0-9]+)[\.\- ]?
|
50
|
+
[Ee]?[ ]?(?<episode>[0-9]+)
|
51
|
+
[^\/]*$/x,
|
52
|
+
|
53
|
+
# Show - Episode 9999 [S 12 - Ep 131] - etc
|
54
|
+
/(?<show>.+)
|
55
|
+
[ ]-[ ]
|
56
|
+
[Ee]pisode[ ]\d+
|
57
|
+
[ ]
|
58
|
+
\[
|
59
|
+
[sS][ ]?(?<season>\d+)
|
60
|
+
([ ]|[ ]-[ ]|-)
|
61
|
+
([eE]|[eE]p)[ ]?(?<episode>\d+)
|
62
|
+
\]
|
63
|
+
.*$/x,
|
64
|
+
|
65
|
+
# foo.103*
|
66
|
+
/^(?<show>.+)
|
67
|
+
[ \._\-]
|
68
|
+
(?<season>[0-9]{1})
|
69
|
+
(?<episode>[0-9]{2})
|
70
|
+
[\._ -][^\/]*$/x,
|
71
|
+
|
72
|
+
# foo.0103*
|
73
|
+
/^(?<show>.+)
|
74
|
+
[ \._\-]
|
75
|
+
(?<season>[0-9]{2})
|
76
|
+
(?<episode>[0-9]{2,3})
|
77
|
+
[\._ -][^\/]*$/x
|
78
|
+
]
|
79
|
+
|
80
|
+
def parse(file)
|
81
|
+
filename = File.basename(file)
|
82
|
+
FILENAME_PATTERNS.find { |pattern|
|
83
|
+
pattern =~ filename
|
84
|
+
} or raise "wrong file format (#{file})"
|
85
|
+
[clean_show_name($~[:show]), $~[:season].to_i, $~[:episode].to_i]
|
86
|
+
end
|
87
|
+
|
88
|
+
# from https://github.com/dbr/tvnamer/blob/master/tvnamer/utils.py#L78-95
|
89
|
+
# Cleans up series name by removing any . and _
|
90
|
+
# characters, along with any trailing hyphens.
|
91
|
+
#
|
92
|
+
# Is basically equivalent to replacing all _ and . with a
|
93
|
+
# space, but handles decimal numbers in string.
|
94
|
+
#
|
95
|
+
# clean_show_name("an.example.1.0.test") # => "an example 1.0 test"
|
96
|
+
# clean_show_name("an_example_1.0_test") # => "an example 1.0 test"
|
97
|
+
def clean_show_name show
|
98
|
+
show.gsub! /(?<!\d)[.]|[.](?!\d)/, ' '
|
99
|
+
show.tr! '_', ' '
|
100
|
+
show.chomp! '-'
|
101
|
+
show.strip!
|
102
|
+
show
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
data/suby.gemspec
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: suby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.8
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-07-
|
12
|
+
date: 2011-07-27 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
16
|
-
requirement: &
|
16
|
+
requirement: &2157439960 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,7 +21,7 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *2157439960
|
25
25
|
description: Find and download subtitles
|
26
26
|
email:
|
27
27
|
executables:
|
@@ -30,7 +30,10 @@ extensions: []
|
|
30
30
|
extra_rdoc_files: []
|
31
31
|
files:
|
32
32
|
- bin/suby
|
33
|
+
- lib/suby/downloader/addic7ed.rb
|
34
|
+
- lib/suby/downloader/tvsubtitles.rb
|
33
35
|
- lib/suby/downloader.rb
|
36
|
+
- lib/suby/filename_parser.rb
|
34
37
|
- lib/suby.rb
|
35
38
|
- .gitignore
|
36
39
|
- README.md
|