suby 0.0.6 → 0.0.8
Sign up to get free protection for your applications and to get access to all the features.
- data/bin/suby +1 -1
- data/lib/suby.rb +22 -10
- data/lib/suby/downloader.rb +38 -66
- data/lib/suby/downloader/addic7ed.rb +27 -0
- data/lib/suby/downloader/tvsubtitles.rb +73 -0
- data/lib/suby/filename_parser.rb +105 -0
- data/suby.gemspec +1 -1
- metadata +7 -4
data/bin/suby
CHANGED
data/lib/suby.rb
CHANGED
@@ -13,7 +13,19 @@ module Suby
|
|
13
13
|
File.exist? File.basename(file, File.extname(file)) + ".#{ext}" }
|
14
14
|
|
15
15
|
begin
|
16
|
-
Downloader.
|
16
|
+
success = Downloader::DOWNLOADERS.find do |downloader|
|
17
|
+
error = catch :downloader do
|
18
|
+
downloader.new(file, options[:lang]).download
|
19
|
+
:success
|
20
|
+
end
|
21
|
+
if error == :success
|
22
|
+
puts "#{downloader} found subtitles for #{file}"
|
23
|
+
else
|
24
|
+
puts "#{downloader} did not find subtitles for #{file} (#{error})"
|
25
|
+
end
|
26
|
+
error == :success
|
27
|
+
end
|
28
|
+
STDERR.puts "No downloader could find subtitles for #{file}" unless success
|
17
29
|
rescue
|
18
30
|
puts " The download of the subtitles failed for #{file}:"
|
19
31
|
puts " #{$!.class}: #{$!.message}"
|
@@ -22,20 +34,20 @@ module Suby
|
|
22
34
|
}
|
23
35
|
end
|
24
36
|
|
25
|
-
def
|
26
|
-
case
|
27
|
-
when
|
28
|
-
|
29
|
-
raise "no subtitles in #{archive}"
|
30
|
-
|
31
|
-
system 'unzip', archive,
|
32
|
-
puts "found
|
37
|
+
def extract_sub_from_archive(archive, format)
|
38
|
+
case format
|
39
|
+
when :zip
|
40
|
+
sub = `unzip -qql #{archive}`.scan(/\d{2}:\d{2} (.+?(?:#{SUB_EXTENSIONS.join '|'}))$/).map(&:first).first
|
41
|
+
raise "no subtitles in #{archive}" unless sub
|
42
|
+
sub_for_unzip = sub.gsub(/(\[|\])/) { "\\#{$1}" }
|
43
|
+
system 'unzip', archive, sub_for_unzip, 1 => :close
|
44
|
+
puts "found subtitle: #{sub}" if $VERBOSE
|
33
45
|
else
|
34
46
|
raise "unknown archive type (#{archive})"
|
35
47
|
end
|
36
48
|
|
37
49
|
# Cleaning
|
38
50
|
File.unlink archive
|
39
|
-
|
51
|
+
sub
|
40
52
|
end
|
41
53
|
end
|
data/lib/suby/downloader.rb
CHANGED
@@ -1,91 +1,63 @@
|
|
1
1
|
require 'net/http'
|
2
|
+
require 'cgi/util'
|
2
3
|
require 'nokogiri'
|
4
|
+
require_relative 'filename_parser'
|
3
5
|
|
4
6
|
module Suby
|
5
7
|
class Downloader
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
SHOW_URLS = {}
|
11
|
-
SHOW_PAGES = {}
|
8
|
+
DOWNLOADERS = []
|
9
|
+
def self.inherited(subclass)
|
10
|
+
DOWNLOADERS << subclass
|
11
|
+
end
|
12
12
|
|
13
13
|
attr_reader :show, :season, :episode, :file, :lang
|
14
14
|
|
15
|
-
def initialize
|
16
|
-
@file, @lang = file, lang || 'en'
|
17
|
-
|
18
|
-
raise "wrong file format (#{file}). Must be:\n<show> <season>x<episode>[ - <title>].<ext>"
|
19
|
-
end
|
20
|
-
@show, @season, @episode = show, season.to_i, episode.to_i
|
15
|
+
def initialize(file, lang = nil)
|
16
|
+
@file, @lang = file, (lang || 'en').to_sym
|
17
|
+
@show, @season, @episode = FilenameParser.parse(file)
|
21
18
|
end
|
22
19
|
|
23
20
|
def http
|
24
|
-
@http ||= Net::HTTP.new(SITE).start
|
21
|
+
@http ||= Net::HTTP.new(self.class::SITE).start
|
25
22
|
end
|
26
23
|
|
27
|
-
def
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
results = Nokogiri http.request(post).body
|
32
|
-
url = results.css('ul li div a').first[:href]
|
33
|
-
|
34
|
-
raise 'could not find the show' unless /^\/tvshow-(\d+)\.html$/ =~ url
|
35
|
-
"/tvshow-#{$1}-#{season}.html"
|
36
|
-
end
|
24
|
+
def get(path, initheader = {})
|
25
|
+
response = http.get(path, initheader)
|
26
|
+
raise "Invalid response for #{path}: #{response}" unless Net::HTTPSuccess === response
|
27
|
+
response.body
|
37
28
|
end
|
38
29
|
|
39
|
-
def
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
SHOW_PAGES[show].css('div.left_articles table tr').find { |tr|
|
45
|
-
tr.children.find { |td| td.name == 'td' && td.text =~ /\A#{season}x0?#{episode}\z/ }
|
46
|
-
}.children.find { |td|
|
47
|
-
td.children.find { |a|
|
48
|
-
a.name == 'a' && a[:href].start_with?('episode') && url = a[:href]
|
49
|
-
}
|
50
|
-
}
|
51
|
-
raise "invalid episode url: #{episode_url}" unless url =~ /^episode-(\d+)\.html$/
|
52
|
-
"/episode-#{$1}-#{lang}.html"
|
30
|
+
def get_redirection(path, initheader = {})
|
31
|
+
response = http.get(path, initheader)
|
32
|
+
location = response['Location']
|
33
|
+
unless (Net::HTTPFound === response or Net::HTTPSuccess === response) and location
|
34
|
+
raise "Invalid response for #{path}: #{response}: location: #{location.inspect}"
|
53
35
|
end
|
54
|
-
|
55
|
-
|
56
|
-
def subtitles_url
|
57
|
-
@subtitles_url ||= begin
|
58
|
-
subtitles = Nokogiri http.get(episode_url).body
|
59
|
-
|
60
|
-
# TODO: choose 720p or most downloaded instead of first found
|
61
|
-
url = subtitles.css('div.left_articles a').find { |a| a.name == 'a' && a[:href].start_with?('/subtitle') }[:href]
|
62
|
-
raise 'invalid subtitle url' unless url =~ /^\/subtitle-(\d+)\.html/
|
63
|
-
url
|
64
|
-
end
|
65
|
-
end
|
66
|
-
|
67
|
-
def download_url
|
68
|
-
@download_url ||= URI.escape '/' + http.get(subtitles_url.sub('subtitle', 'download'))['Location']
|
36
|
+
location
|
69
37
|
end
|
70
38
|
|
71
39
|
def download
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
puts "show url: #{show_url}"
|
76
|
-
puts "episode url: #{episode_url}"
|
77
|
-
puts "subtitle url: #{subtitles_url}"
|
78
|
-
puts "download url: #{download_url}"
|
40
|
+
extract download_url
|
41
|
+
end
|
79
42
|
|
80
|
-
|
81
|
-
|
43
|
+
def extract(url)
|
44
|
+
contents = get(url)
|
82
45
|
http.finish
|
83
|
-
|
84
|
-
|
46
|
+
format = self.class::FORMAT
|
47
|
+
if format == :file
|
48
|
+
open(sub_name(url), 'wb') { |f| f.write contents }
|
49
|
+
else
|
50
|
+
open(TEMP_ARCHIVE_NAME, 'wb') { |f| f.write contents }
|
51
|
+
sub = Suby.extract_sub_from_archive(TEMP_ARCHIVE_NAME, format)
|
52
|
+
File.rename sub, sub_name(sub)
|
53
|
+
end
|
54
|
+
end
|
85
55
|
|
86
|
-
|
87
|
-
File.
|
88
|
-
puts "Renaming to #{new_name}"
|
56
|
+
def sub_name(sub)
|
57
|
+
File.basename(file, File.extname(file)) + File.extname(sub)
|
89
58
|
end
|
90
59
|
end
|
91
60
|
end
|
61
|
+
|
62
|
+
require_relative 'downloader/tvsubtitles'
|
63
|
+
require_relative 'downloader/addic7ed'
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Suby
|
2
|
+
class Downloader::Addic7ed < Downloader
|
3
|
+
SITE = 'www.addic7ed.com'
|
4
|
+
FORMAT = :file
|
5
|
+
LANG_IDS = {
|
6
|
+
en: 1,
|
7
|
+
es: 5,
|
8
|
+
fr: 8
|
9
|
+
}
|
10
|
+
FILTER_IGNORED = "Couldn't find any subs with the specified language. Filter ignored"
|
11
|
+
|
12
|
+
def download_url
|
13
|
+
subtitles_url = "/serie/#{CGI.escape show}/#{season}/#{episode}/#{LANG_IDS[lang]}"
|
14
|
+
response = http.get(subtitles_url)
|
15
|
+
throw :downloader, "show/season/episode not found" unless Net::HTTPSuccess === response
|
16
|
+
body = response.body
|
17
|
+
throw :downloader, "no subtitle available" if body.include? FILTER_IGNORED
|
18
|
+
download_url = Nokogiri(body).css('a').find { |a|
|
19
|
+
a[:href].start_with? '/original/' or
|
20
|
+
a[:href].start_with? '/updated/'
|
21
|
+
}[:href]
|
22
|
+
location = get_redirection download_url, 'Referer' => "http://#{SITE}#{subtitles_url}" # They check Referer
|
23
|
+
throw :downloader, "download exceeded" if location == '/downloadexceeded.php'
|
24
|
+
URI.escape location
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
module Suby
|
2
|
+
class Downloader::TVSubtitles < Downloader
|
3
|
+
SITE = 'www.tvsubtitles.net'
|
4
|
+
FORMAT = :zip
|
5
|
+
SEARCH_URL = '/search.php'
|
6
|
+
|
7
|
+
# cache
|
8
|
+
SHOW_URLS = {}
|
9
|
+
SHOW_PAGES = {}
|
10
|
+
|
11
|
+
def show_url
|
12
|
+
SHOW_URLS[show] ||= begin
|
13
|
+
post = Net::HTTP::Post.new(SEARCH_URL)
|
14
|
+
post.form_data = { 'q' => show }
|
15
|
+
results = Nokogiri http.request(post).body
|
16
|
+
a = results.css('ul li div a').find { |a|
|
17
|
+
# "Show (2009-2011)" => "Show"
|
18
|
+
a.text.sub(/ \(\d{4}-\d{4}\)$/, '').casecmp(show) == 0
|
19
|
+
}
|
20
|
+
throw :downloader, "show not found" unless a
|
21
|
+
url = a[:href]
|
22
|
+
|
23
|
+
raise 'could not find the show' unless /^\/tvshow-(\d+)\.html$/ =~ url
|
24
|
+
"/tvshow-#{$1}.html"
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def season_url
|
29
|
+
show_url.sub(/\.html$/, "-#{season}.html")
|
30
|
+
end
|
31
|
+
|
32
|
+
def episode_url
|
33
|
+
@episode_url ||= begin
|
34
|
+
SHOW_PAGES[show] ||= Nokogiri get season_url
|
35
|
+
|
36
|
+
season_text = /^Season #{season}$/
|
37
|
+
SHOW_PAGES[show].css('div.left_articles p.description b').find { |b|
|
38
|
+
b.text =~ season_text
|
39
|
+
} or throw :downloader, "season not found"
|
40
|
+
|
41
|
+
url = nil
|
42
|
+
SHOW_PAGES[show].css('div.left_articles table tr').find { |tr|
|
43
|
+
tr.children.find { |td| td.name == 'td' && td.text =~ /\A#{season}x0?#{episode}\z/ }
|
44
|
+
}.tap { |tr|
|
45
|
+
throw :downloader, "episode not found" unless tr
|
46
|
+
}.children.find { |td|
|
47
|
+
td.children.find { |a|
|
48
|
+
a.name == 'a' && a[:href].start_with?('episode') && url = a[:href]
|
49
|
+
}
|
50
|
+
}
|
51
|
+
raise "invalid episode url: #{episode_url}" unless url =~ /^episode-(\d+)\.html$/
|
52
|
+
"/episode-#{$1}-#{lang}.html"
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
def subtitles_url
|
57
|
+
@subtitles_url ||= begin
|
58
|
+
subtitles = Nokogiri get episode_url
|
59
|
+
|
60
|
+
# TODO: choose 720p or most downloaded instead of first found
|
61
|
+
a = subtitles.css('div.left_articles a').find { |a| a.name == 'a' && a[:href].start_with?('/subtitle') }
|
62
|
+
throw :downloader, "no subtitle available" unless a
|
63
|
+
url = a[:href]
|
64
|
+
raise 'invalid subtitle url' unless url =~ /^\/subtitle-(\d+)\.html/
|
65
|
+
url
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def download_url
|
70
|
+
@download_url ||= URI.escape '/' + get_redirection(subtitles_url.sub('subtitle', 'download'))
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,105 @@
|
|
1
|
+
module Suby
|
2
|
+
module FilenameParser
|
3
|
+
extend self
|
4
|
+
|
5
|
+
# from tvnamer @ ab2c6c, with author's agreement, adapted
|
6
|
+
# See https://github.com/dbr/tvnamer/blob/master/tvnamer/config_defaults.py
|
7
|
+
FILENAME_PATTERNS = [
|
8
|
+
# foo.s0101
|
9
|
+
/^(?<show>.+?)
|
10
|
+
[ \._\-]
|
11
|
+
[Ss](?<season>[0-9]{2})
|
12
|
+
[\.\- ]?
|
13
|
+
(?<episode>[0-9]{2})
|
14
|
+
[^0-9]*$/x,
|
15
|
+
|
16
|
+
# foo.1x09*
|
17
|
+
/^(?<show>.+?)
|
18
|
+
[ \._\-]
|
19
|
+
\[?
|
20
|
+
(?<season>[0-9]+)
|
21
|
+
[xX]
|
22
|
+
(?<episode>[0-9]+)
|
23
|
+
\]?
|
24
|
+
[^\/]*$/x,
|
25
|
+
|
26
|
+
# foo.s01.e01, foo.s01_e01
|
27
|
+
/^(?<show>.+?)
|
28
|
+
[ \._\-]
|
29
|
+
\[?
|
30
|
+
[Ss](?<season>[0-9]+)[\. _-]?
|
31
|
+
[Ee]?(?<episode>[0-9]+)
|
32
|
+
\]?
|
33
|
+
[^\/]*$/x,
|
34
|
+
|
35
|
+
# foo - [01.09]
|
36
|
+
/^(?<show>.+?)
|
37
|
+
[ \._\-]?
|
38
|
+
\[
|
39
|
+
(?<season>[0-9]+?)
|
40
|
+
[.]
|
41
|
+
(?<episode>[0-9]+?)
|
42
|
+
\]
|
43
|
+
[ \._\-]?
|
44
|
+
[^\/]*$/x,
|
45
|
+
|
46
|
+
# Foo - S2 E 02 - etc
|
47
|
+
/^(?<show>.+?)
|
48
|
+
[ ]?[ \._\-][ ]?
|
49
|
+
[Ss](?<season>[0-9]+)[\.\- ]?
|
50
|
+
[Ee]?[ ]?(?<episode>[0-9]+)
|
51
|
+
[^\/]*$/x,
|
52
|
+
|
53
|
+
# Show - Episode 9999 [S 12 - Ep 131] - etc
|
54
|
+
/(?<show>.+)
|
55
|
+
[ ]-[ ]
|
56
|
+
[Ee]pisode[ ]\d+
|
57
|
+
[ ]
|
58
|
+
\[
|
59
|
+
[sS][ ]?(?<season>\d+)
|
60
|
+
([ ]|[ ]-[ ]|-)
|
61
|
+
([eE]|[eE]p)[ ]?(?<episode>\d+)
|
62
|
+
\]
|
63
|
+
.*$/x,
|
64
|
+
|
65
|
+
# foo.103*
|
66
|
+
/^(?<show>.+)
|
67
|
+
[ \._\-]
|
68
|
+
(?<season>[0-9]{1})
|
69
|
+
(?<episode>[0-9]{2})
|
70
|
+
[\._ -][^\/]*$/x,
|
71
|
+
|
72
|
+
# foo.0103*
|
73
|
+
/^(?<show>.+)
|
74
|
+
[ \._\-]
|
75
|
+
(?<season>[0-9]{2})
|
76
|
+
(?<episode>[0-9]{2,3})
|
77
|
+
[\._ -][^\/]*$/x
|
78
|
+
]
|
79
|
+
|
80
|
+
def parse(file)
|
81
|
+
filename = File.basename(file)
|
82
|
+
FILENAME_PATTERNS.find { |pattern|
|
83
|
+
pattern =~ filename
|
84
|
+
} or raise "wrong file format (#{file})"
|
85
|
+
[clean_show_name($~[:show]), $~[:season].to_i, $~[:episode].to_i]
|
86
|
+
end
|
87
|
+
|
88
|
+
# from https://github.com/dbr/tvnamer/blob/master/tvnamer/utils.py#L78-95
|
89
|
+
# Cleans up series name by removing any . and _
|
90
|
+
# characters, along with any trailing hyphens.
|
91
|
+
#
|
92
|
+
# Is basically equivalent to replacing all _ and . with a
|
93
|
+
# space, but handles decimal numbers in string.
|
94
|
+
#
|
95
|
+
# clean_show_name("an.example.1.0.test") # => "an example 1.0 test"
|
96
|
+
# clean_show_name("an_example_1.0_test") # => "an example 1.0 test"
|
97
|
+
def clean_show_name show
|
98
|
+
show.gsub! /(?<!\d)[.]|[.](?!\d)/, ' '
|
99
|
+
show.tr! '_', ' '
|
100
|
+
show.chomp! '-'
|
101
|
+
show.strip!
|
102
|
+
show
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
data/suby.gemspec
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: suby
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.8
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,11 +9,11 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2011-07-
|
12
|
+
date: 2011-07-27 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
16
|
-
requirement: &
|
16
|
+
requirement: &2157439960 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,7 +21,7 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :runtime
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *2157439960
|
25
25
|
description: Find and download subtitles
|
26
26
|
email:
|
27
27
|
executables:
|
@@ -30,7 +30,10 @@ extensions: []
|
|
30
30
|
extra_rdoc_files: []
|
31
31
|
files:
|
32
32
|
- bin/suby
|
33
|
+
- lib/suby/downloader/addic7ed.rb
|
34
|
+
- lib/suby/downloader/tvsubtitles.rb
|
33
35
|
- lib/suby/downloader.rb
|
36
|
+
- lib/suby/filename_parser.rb
|
34
37
|
- lib/suby.rb
|
35
38
|
- .gitignore
|
36
39
|
- README.md
|