undertexter 0.1.4 → 0.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile.lock +3 -3
- data/README.markdown +6 -6
- data/lib/subtitle.rb +50 -48
- data/lib/undertexter.rb +14 -29
- data/spec/subtitle_spec.rb +2 -0
- data/spec/undertexter_spec.rb +4 -0
- data/undertexter.gemspec +3 -7
- metadata +5 -33
data/Gemfile.lock
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
undertexter (0.1.
|
5
|
-
hpricot (= 0.8.2)
|
4
|
+
undertexter (0.1.4)
|
6
5
|
levenshteinish
|
7
6
|
mimer_plus
|
7
|
+
nokogiri
|
8
8
|
rest-client
|
9
9
|
|
10
10
|
GEM
|
@@ -12,11 +12,11 @@ GEM
|
|
12
12
|
specs:
|
13
13
|
diff-lcs (1.1.2)
|
14
14
|
hintable_levenshtein (0.0.3)
|
15
|
-
hpricot (0.8.2)
|
16
15
|
levenshteinish (0.0.1)
|
17
16
|
hintable_levenshtein
|
18
17
|
mime-types (1.16)
|
19
18
|
mimer_plus (0.0.4)
|
19
|
+
nokogiri (1.4.4)
|
20
20
|
rest-client (1.6.1)
|
21
21
|
mime-types (>= 1.16)
|
22
22
|
rspec (2.4.0)
|
data/README.markdown
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
|
4
4
|
Undertexter provides a basic search client that makes it possible to search for swedish and english subtitles on [Undertexter.se](http://undertexter.se)
|
5
5
|
|
6
|
-
*Support for other subtitles sites will be added in the future. Follow this project to know when
|
6
|
+
*Support for other subtitles sites will be added in the future. Follow this project to know when.*
|
7
7
|
|
8
8
|
## How to use
|
9
9
|
|
@@ -16,7 +16,7 @@ This is how to use it in `irb`.
|
|
16
16
|
# => true
|
17
17
|
|
18
18
|
$ subtite = Undertexter.find("tt0840361").first
|
19
|
-
=> #<Subtitle:0x1020fff98 @downloads=8328, @movie_title="The Town", @title="The.Town.2010....BRRip", @url="http://www.undertexter.se/?p=undertext&id=23711", @cds=1>
|
19
|
+
=> #<Container::Subtitle:0x1020fff98 @downloads=8328, @movie_title="The Town", @title="The.Town.2010....BRRip", @url="http://www.undertexter.se/?p=undertext&id=23711", @cds=1>
|
20
20
|
$ subtitle.downloads
|
21
21
|
=> 8328
|
22
22
|
$ subtitle.movie_title
|
@@ -58,7 +58,7 @@ This is how to use it in `irb`.
|
|
58
58
|
### Find the right subtitle based on the **release name** of the movie
|
59
59
|
|
60
60
|
$ Undertexter.find("tt0840361").based_on("The Town EXTENDED 2010 480p BRRip XviD AC3 FLAWL3SS")
|
61
|
-
=> #<Subtitle:0x00000101b739d0 @cds=1, @downloads=1644, @title="The.Town.EXTENDED.2010.480p.BRRip.XviD.AC3-FLAWL3SS", @details="http://www.undertexter.se/?p=undertext&id=23752", @movie_title="The Town", @language=:swedish>
|
61
|
+
=> #<Container::Subtitle:0x00000101b739d0 @cds=1, @downloads=1644, @title="The.Town.EXTENDED.2010.480p.BRRip.XviD.AC3-FLAWL3SS", @details="http://www.undertexter.se/?p=undertext&id=23752", @movie_title="The Town", @language=:swedish>
|
62
62
|
|
63
63
|
### Specify how sensitive the `based_on` method should be, from `0.0` to `1.0`
|
64
64
|
|
@@ -66,14 +66,14 @@ This is how to use it in `irb`.
|
|
66
66
|
=> nil
|
67
67
|
|
68
68
|
$ Undertexter.find("tt0840361").based_on("The Town EXTENDED 2010 480p BRRip XviD AC3 FLAWL3SS", limit: 0.4)
|
69
|
-
=> #<Subtitle:0x00000101b8d808 @cds=1, @downloads=1644, @title="The.Town.EXTENDED.2010.480p.BRRip.XviD.AC3-FLAWL3SS", @details="http://www.undertexter.se/?p=undertext&id=23752", @movie_title="The Town", @language=:swedish>
|
69
|
+
=> #<Container::Subtitle:0x00000101b8d808 @cds=1, @downloads=1644, @title="The.Town.EXTENDED.2010.480p.BRRip.XviD.AC3-FLAWL3SS", @details="http://www.undertexter.se/?p=undertext&id=23752", @movie_title="The Town", @language=:swedish>
|
70
70
|
|
71
71
|
$ Undertexter.find("tt0840361").based_on("The.Town.EXTENDED.2010.480p.BRRip.XviD.AC3-FLAWL3SS", limit: 0.0)
|
72
|
-
=> #<Subtitle:0x00000101b8d718 @cds=1, @downloads=1644, @title="The.Town.EXTENDED.2010.480p.BRRip.XviD.AC3-FLAWL3SS", @details="http://www.undertexter.se/?p=undertext&id=23752", @movie_title="The Town", @language=:swedish>
|
72
|
+
=> #<Container::Subtitle:0x00000101b8d718 @cds=1, @downloads=1644, @title="The.Town.EXTENDED.2010.480p.BRRip.XviD.AC3-FLAWL3SS", @details="http://www.undertexter.se/?p=undertext&id=23752", @movie_title="The Town", @language=:swedish>
|
73
73
|
|
74
74
|
## What is being returned?
|
75
75
|
|
76
|
-
The find method returns an `Array` with zero or more
|
76
|
+
The find method returns an `Array` with zero or more `Container::Subtitle` instances. Every object provides some basic accessors.
|
77
77
|
|
78
78
|
- `movie_title` (String) The official name of the movie.
|
79
79
|
- `cds` (Integer) The amount of cds that the release should contain.
|
data/lib/subtitle.rb
CHANGED
@@ -1,65 +1,67 @@
|
|
1
1
|
require 'fileutils'
|
2
2
|
require 'mimer_plus'
|
3
3
|
|
4
|
-
|
5
|
-
|
4
|
+
module Container
|
5
|
+
class Subtitle
|
6
|
+
attr_accessor :details, :downloads, :cds, :title, :movie_title, :url
|
6
7
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
def url
|
12
|
-
@language == :english ? "http://eng.undertexter.se/subtitle.php?id=#{id}" : "http://undertexter.se/laddatext.php?id=#{id}"
|
13
|
-
end
|
8
|
+
def initialize(args)
|
9
|
+
args.keys.each { |name| instance_variable_set "@" + name.to_s, args[name] }
|
10
|
+
end
|
14
11
|
|
15
|
-
|
16
|
-
|
17
|
-
if args[:to].nil?
|
18
|
-
dir = "/tmp"
|
19
|
-
file_name = "#{dir}/#{generate_file_name}"
|
20
|
-
else
|
21
|
-
dir = generate_custom_file_path(args)
|
22
|
-
file_name = "#{dir}/#{generate_file_name}"
|
12
|
+
def url
|
13
|
+
@language == :english ? "http://eng.undertexter.se/subtitle.php?id=#{id}" : "http://undertexter.se/laddatext.php?id=#{id}"
|
23
14
|
end
|
15
|
+
|
16
|
+
# Downloading the file and saves it disk
|
17
|
+
def download!(args = {})
|
18
|
+
if args[:to].nil?
|
19
|
+
dir = "/tmp"
|
20
|
+
file_name = "#{dir}/#{generate_file_name}"
|
21
|
+
else
|
22
|
+
dir = generate_custom_file_path(args)
|
23
|
+
file_name = "#{dir}/#{generate_file_name}"
|
24
|
+
end
|
24
25
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
26
|
+
data = RestClient.get(self.url, :timeout => 10) rescue nil
|
27
|
+
file = File.new(file_name, 'w')
|
28
|
+
file.write(data)
|
29
|
+
file.close
|
29
30
|
|
30
|
-
|
31
|
+
type = Mimer.identify(file_name)
|
31
32
|
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
33
|
+
if type.zip?
|
34
|
+
file_ending = ".zip"
|
35
|
+
elsif type.rar?
|
36
|
+
file_ending = ".rar"
|
37
|
+
else
|
38
|
+
file_ending = ""
|
39
|
+
end
|
39
40
|
|
40
|
-
|
41
|
+
new_file_name = "#{dir}/#{title.gsub(/\s+/, '.')}#{file_ending}"
|
41
42
|
|
42
|
-
|
43
|
-
|
43
|
+
# Changing the name on the file
|
44
|
+
FileUtils.mv(file_name, new_file_name)
|
44
45
|
|
45
|
-
|
46
|
-
|
47
|
-
end
|
48
|
-
|
49
|
-
private
|
50
|
-
def id
|
51
|
-
@details.match(/id=(\d+)/)[1]
|
46
|
+
# I like return :)
|
47
|
+
return new_file_name
|
52
48
|
end
|
49
|
+
|
50
|
+
private
|
51
|
+
def id
|
52
|
+
@details.match(/id=(\d+)/)[1]
|
53
|
+
end
|
53
54
|
|
54
|
-
|
55
|
-
|
56
|
-
|
55
|
+
def generate_file_name
|
56
|
+
(0...30).map{65.+(rand(25)).chr}.join.downcase
|
57
|
+
end
|
57
58
|
|
58
|
-
|
59
|
-
|
60
|
-
|
59
|
+
def generate_custom_file_path(args)
|
60
|
+
# If the path is relative
|
61
|
+
args[:to] = File.expand_path(args[:to]) unless args[:to].match(/^\//)
|
61
62
|
|
62
|
-
|
63
|
-
|
64
|
-
|
63
|
+
# Makes sure that every directory structure looks the same
|
64
|
+
Dir.new(args[:to]).path
|
65
|
+
end
|
66
|
+
end
|
65
67
|
end
|
data/lib/undertexter.rb
CHANGED
@@ -2,22 +2,10 @@
|
|
2
2
|
|
3
3
|
require 'rest-client'
|
4
4
|
require 'subtitle'
|
5
|
-
require '
|
5
|
+
require 'nokogiri'
|
6
6
|
require 'iconv'
|
7
7
|
require 'undertexter/array'
|
8
8
|
|
9
|
-
module Hpricot
|
10
|
-
|
11
|
-
# Monkeypatch to fix an Hpricot bug that causes HTML entities to be decoded
|
12
|
-
# incorrectly.
|
13
|
-
def self.uxs(str)
|
14
|
-
str.to_s.
|
15
|
-
gsub(/&(\w+);/) { [Hpricot::NamedCharacters[$1] || ??].pack("U*") }.
|
16
|
-
gsub(/\&\#(\d+);/) { [$1.to_i].pack("U*") }
|
17
|
-
end
|
18
|
-
|
19
|
-
end
|
20
|
-
|
21
9
|
class Undertexter
|
22
10
|
attr_accessor :raw_data, :base_details, :subtitles
|
23
11
|
|
@@ -57,43 +45,40 @@ class Undertexter
|
|
57
45
|
# Example output
|
58
46
|
# [["(1 cd)", "Nedladdningar: 11891", "Avatar (2009) PROPER DVDSCR XviD-MAXSPEED", "http://www.undertexter.se/?p=undertext&id=19751", "Avatar"]]
|
59
47
|
|
60
|
-
doc =
|
48
|
+
doc = Nokogiri::HTML(@raw_data)
|
61
49
|
@block = []
|
62
50
|
|
63
51
|
# Trying to find the {tbody} that does not contain any tbody's
|
64
|
-
tbody = doc.
|
65
|
-
not inner.
|
52
|
+
tbody = doc.css("tbody").to_a.reject do |inner, index|
|
53
|
+
not inner.content.match(/Nedladdningar/i)
|
66
54
|
end.sort_by do |inner|
|
67
|
-
inner.
|
55
|
+
inner.css('tbody').count
|
68
56
|
end.first
|
69
57
|
|
70
58
|
# Nothing found, okey!
|
71
59
|
return if tbody.nil?
|
72
60
|
|
73
|
-
tbody = tbody.
|
61
|
+
tbody = tbody.css('tr').drop(3)
|
74
62
|
|
75
63
|
tbody.each_with_index do |value, index|
|
76
64
|
next unless index % 3 == 0
|
77
65
|
length = @block.length
|
78
66
|
@block[length] = [] if @block[length].nil?
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
@block[length] << line[0]
|
84
|
-
@block[length] << line[2]
|
67
|
+
line = tbody[index + 1].content.split(/\n/).map(&:strip)
|
68
|
+
value = value.css('a')
|
69
|
+
@block[length] << line[1]
|
70
|
+
@block[length] << line[3]
|
85
71
|
@block[length] << line[4]
|
86
|
-
@block[length] << value.last.
|
87
|
-
@block[length] << value.last.
|
88
|
-
|
89
|
-
@block[length].map! {|i| i.gsub(/<\/?[^>]*>/, "").strip}
|
72
|
+
@block[length] << value.last.attr('href')
|
73
|
+
@block[length] << value.last.attr('title')
|
74
|
+
@block[length].map!(&:strip)
|
90
75
|
end
|
91
76
|
end
|
92
77
|
|
93
78
|
def build!
|
94
79
|
@block.each do |movie|
|
95
80
|
next unless movie.count == 5
|
96
|
-
@subtitles << Subtitle.new({
|
81
|
+
@subtitles << Container::Subtitle.new({
|
97
82
|
:cds => movie[0].match(/\d+/)[0].to_i,
|
98
83
|
:downloads => movie[1].match(/\d+$/)[0].to_i,
|
99
84
|
:title => movie[2],
|
data/spec/subtitle_spec.rb
CHANGED
data/spec/undertexter_spec.rb
CHANGED
@@ -84,6 +84,10 @@ describe Undertexter, "trying to search for a movie using a title" do
|
|
84
84
|
end
|
85
85
|
end
|
86
86
|
|
87
|
+
it "should only contain Container::Subtitle instances" do
|
88
|
+
@use.each { |subtitle| subtitle.should be_instance_of(Container::Subtitle) }
|
89
|
+
end
|
90
|
+
|
87
91
|
it "should not contain any attributes that contain any html tags" do
|
88
92
|
@use.each do |subtitle|
|
89
93
|
[:details, :downloads, :cds, :title, :movie_title, :url].each do |method|
|
data/undertexter.gemspec
CHANGED
@@ -3,7 +3,7 @@ $:.push File.expand_path("../lib", __FILE__)
|
|
3
3
|
|
4
4
|
Gem::Specification.new do |s|
|
5
5
|
s.name = "undertexter"
|
6
|
-
s.version = "0.1.
|
6
|
+
s.version = "0.1.5"
|
7
7
|
s.platform = Gem::Platform::RUBY
|
8
8
|
s.authors = ["Linus Oleander"]
|
9
9
|
s.email = ["linus@oleander.nu"]
|
@@ -18,12 +18,8 @@ Gem::Specification.new do |s|
|
|
18
18
|
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
19
19
|
s.require_paths = ["lib"]
|
20
20
|
|
21
|
-
s.add_dependency('rest-client')
|
22
|
-
|
23
|
-
# Using hpricot 0.8.2 due to a bug
|
24
|
-
# https://github.com/hpricot/hpricot/issues/#issue/33
|
25
|
-
|
26
|
-
s.add_dependency('hpricot', '0.8.2')
|
21
|
+
s.add_dependency('rest-client')
|
22
|
+
s.add_dependency('nokogiri')
|
27
23
|
s.add_dependency('mimer_plus')
|
28
24
|
s.add_dependency('levenshteinish')
|
29
25
|
s.add_development_dependency('rspec')
|
metadata
CHANGED
@@ -1,13 +1,8 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: undertexter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash: 19
|
5
4
|
prerelease:
|
6
|
-
|
7
|
-
- 0
|
8
|
-
- 1
|
9
|
-
- 4
|
10
|
-
version: 0.1.4
|
5
|
+
version: 0.1.5
|
11
6
|
platform: ruby
|
12
7
|
authors:
|
13
8
|
- Linus Oleander
|
@@ -15,7 +10,7 @@ autorequire:
|
|
15
10
|
bindir: bin
|
16
11
|
cert_chain: []
|
17
12
|
|
18
|
-
date: 2011-02-
|
13
|
+
date: 2011-02-14 00:00:00 +01:00
|
19
14
|
default_executable:
|
20
15
|
dependencies:
|
21
16
|
- !ruby/object:Gem::Dependency
|
@@ -26,26 +21,18 @@ dependencies:
|
|
26
21
|
requirements:
|
27
22
|
- - ">="
|
28
23
|
- !ruby/object:Gem::Version
|
29
|
-
hash: 3
|
30
|
-
segments:
|
31
|
-
- 0
|
32
24
|
version: "0"
|
33
25
|
type: :runtime
|
34
26
|
version_requirements: *id001
|
35
27
|
- !ruby/object:Gem::Dependency
|
36
|
-
name:
|
28
|
+
name: nokogiri
|
37
29
|
prerelease: false
|
38
30
|
requirement: &id002 !ruby/object:Gem::Requirement
|
39
31
|
none: false
|
40
32
|
requirements:
|
41
|
-
- - "
|
33
|
+
- - ">="
|
42
34
|
- !ruby/object:Gem::Version
|
43
|
-
|
44
|
-
segments:
|
45
|
-
- 0
|
46
|
-
- 8
|
47
|
-
- 2
|
48
|
-
version: 0.8.2
|
35
|
+
version: "0"
|
49
36
|
type: :runtime
|
50
37
|
version_requirements: *id002
|
51
38
|
- !ruby/object:Gem::Dependency
|
@@ -56,9 +43,6 @@ dependencies:
|
|
56
43
|
requirements:
|
57
44
|
- - ">="
|
58
45
|
- !ruby/object:Gem::Version
|
59
|
-
hash: 3
|
60
|
-
segments:
|
61
|
-
- 0
|
62
46
|
version: "0"
|
63
47
|
type: :runtime
|
64
48
|
version_requirements: *id003
|
@@ -70,9 +54,6 @@ dependencies:
|
|
70
54
|
requirements:
|
71
55
|
- - ">="
|
72
56
|
- !ruby/object:Gem::Version
|
73
|
-
hash: 3
|
74
|
-
segments:
|
75
|
-
- 0
|
76
57
|
version: "0"
|
77
58
|
type: :runtime
|
78
59
|
version_requirements: *id004
|
@@ -84,9 +65,6 @@ dependencies:
|
|
84
65
|
requirements:
|
85
66
|
- - ">="
|
86
67
|
- !ruby/object:Gem::Version
|
87
|
-
hash: 3
|
88
|
-
segments:
|
89
|
-
- 0
|
90
68
|
version: "0"
|
91
69
|
type: :development
|
92
70
|
version_requirements: *id005
|
@@ -129,18 +107,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
129
107
|
requirements:
|
130
108
|
- - ">="
|
131
109
|
- !ruby/object:Gem::Version
|
132
|
-
hash: 3
|
133
|
-
segments:
|
134
|
-
- 0
|
135
110
|
version: "0"
|
136
111
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
137
112
|
none: false
|
138
113
|
requirements:
|
139
114
|
- - ">="
|
140
115
|
- !ruby/object:Gem::Version
|
141
|
-
hash: 3
|
142
|
-
segments:
|
143
|
-
- 0
|
144
116
|
version: "0"
|
145
117
|
requirements: []
|
146
118
|
|