undertexter 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile.lock +3 -7
- data/lib/undertexter.rb +43 -29
- data/spec/undertexter_spec.rb +10 -2
- data/undertexter.gemspec +6 -2
- metadata +18 -9
- data/lib/.undertexter.rb.swp +0 -0
data/Gemfile.lock
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
undertexter (0.1.
|
4
|
+
undertexter (0.1.3)
|
5
|
+
hpricot (= 0.8.2)
|
5
6
|
levenshteinish
|
6
7
|
mimer_plus
|
7
|
-
nokogiri
|
8
8
|
rest-client
|
9
9
|
|
10
10
|
GEM
|
@@ -12,11 +12,11 @@ GEM
|
|
12
12
|
specs:
|
13
13
|
diff-lcs (1.1.2)
|
14
14
|
hintable_levenshtein (0.0.3)
|
15
|
+
hpricot (0.8.2)
|
15
16
|
levenshteinish (0.0.1)
|
16
17
|
hintable_levenshtein
|
17
18
|
mime-types (1.16)
|
18
19
|
mimer_plus (0.0.4)
|
19
|
-
nokogiri (1.4.4)
|
20
20
|
rest-client (1.6.1)
|
21
21
|
mime-types (>= 1.16)
|
22
22
|
rspec (2.4.0)
|
@@ -32,9 +32,5 @@ PLATFORMS
|
|
32
32
|
ruby
|
33
33
|
|
34
34
|
DEPENDENCIES
|
35
|
-
levenshteinish
|
36
|
-
mimer_plus
|
37
|
-
nokogiri
|
38
|
-
rest-client
|
39
35
|
rspec
|
40
36
|
undertexter!
|
data/lib/undertexter.rb
CHANGED
@@ -2,10 +2,22 @@
|
|
2
2
|
|
3
3
|
require 'rest-client'
|
4
4
|
require 'subtitle'
|
5
|
-
require '
|
5
|
+
require 'hpricot'
|
6
6
|
require 'iconv'
|
7
7
|
require 'undertexter/array'
|
8
8
|
|
9
|
+
module Hpricot
|
10
|
+
|
11
|
+
# Monkeypatch to fix an Hpricot bug that causes HTML entities to be decoded
|
12
|
+
# incorrectly.
|
13
|
+
def self.uxs(str)
|
14
|
+
str.to_s.
|
15
|
+
gsub(/&(\w+);/) { [Hpricot::NamedCharacters[$1] || ??].pack("U*") }.
|
16
|
+
gsub(/\&\#(\d+);/) { [$1.to_i].pack("U*") }
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
|
9
21
|
class Undertexter
|
10
22
|
attr_accessor :raw_data, :base_details, :subtitles
|
11
23
|
|
@@ -42,37 +54,39 @@ class Undertexter
|
|
42
54
|
end
|
43
55
|
|
44
56
|
def parse!
|
45
|
-
noko = Nokogiri::HTML(@raw_data)
|
46
|
-
|
47
57
|
# Example output
|
48
|
-
# [["(1 cd)", "Nedladdningar: 11891", "Avatar (2009) PROPER DVDSCR XviD-MAXSPEED", "http://www.undertexter.se/?p=undertext&id=19751"]]
|
58
|
+
# [["(1 cd)", "Nedladdningar: 11891", "Avatar (2009) PROPER DVDSCR XviD-MAXSPEED", "http://www.undertexter.se/?p=undertext&id=19751", "Avatar"]]
|
59
|
+
|
60
|
+
doc = Hpricot(@raw_data)
|
61
|
+
@block = []
|
49
62
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
end
|
57
|
-
end
|
58
|
-
|
59
|
-
next if @block.nil?
|
60
|
-
|
61
|
-
noko.css("table:nth-child(#{id}) a").to_a.reject do |inner|
|
62
|
-
details = inner.attr('href')
|
63
|
-
inner.content.empty? or details.nil? or ! details.match(/(p=undertext&id=\d+)|(p=subtitle&id=\d+)/i)
|
64
|
-
end.map do |y|
|
65
|
-
[y.attr('href'), y.content.strip]
|
66
|
-
end.reject do |list|
|
67
|
-
list.last.empty?
|
68
|
-
end.each_with_index do |value, index|
|
69
|
-
@block[index] << value.first
|
70
|
-
@block[index] << value.last
|
71
|
-
end
|
63
|
+
# Trying to find the {tbody} that does not contain any tbody's
|
64
|
+
tbody = doc.search("tbody").to_a.reject do |inner, index|
|
65
|
+
not inner.inner_html.match(/Nedladdningar/i)
|
66
|
+
end.sort_by do |inner|
|
67
|
+
inner.search('tbody').count
|
68
|
+
end.first
|
72
69
|
|
73
|
-
|
74
|
-
|
75
|
-
|
70
|
+
# Nothing found, okey!
|
71
|
+
return if tbody.nil?
|
72
|
+
|
73
|
+
tbody = tbody.search('tr').drop(3)
|
74
|
+
|
75
|
+
tbody.each_with_index do |value, index|
|
76
|
+
next unless index % 3 == 0
|
77
|
+
length = @block.length
|
78
|
+
@block[length] = [] if @block[length].nil?
|
79
|
+
|
80
|
+
line = tbody[index + 1].inner_html.split('<br />').map(&:strip)
|
81
|
+
value = value.search('a')
|
82
|
+
|
83
|
+
@block[length] << line[0]
|
84
|
+
@block[length] << line[2]
|
85
|
+
@block[length] << line[4]
|
86
|
+
@block[length] << value.last.attributes['href']
|
87
|
+
@block[length] << value.last.attributes['title']
|
88
|
+
|
89
|
+
@block[length].map! {|i| i.gsub(/<\/?[^>]*>/, "").strip}
|
76
90
|
end
|
77
91
|
end
|
78
92
|
|
data/spec/undertexter_spec.rb
CHANGED
@@ -58,8 +58,8 @@ describe Undertexter, "trying to search for a movie using a title" do
|
|
58
58
|
Undertexter.should have_at_least(41).find("die hard")
|
59
59
|
end
|
60
60
|
|
61
|
-
it "should
|
62
|
-
@use.
|
61
|
+
it "should have 6 die hard movies that does not contain any title" do
|
62
|
+
@use.reject{|subtitle| ! subtitle.title.empty?}.count.should be(6)
|
63
63
|
end
|
64
64
|
|
65
65
|
it "should contain the right details, again" do
|
@@ -83,6 +83,14 @@ describe Undertexter, "trying to search for a movie using a title" do
|
|
83
83
|
subtitle.url.match(/id=(\d+)/)[1].should eq(@use[index].details.match(/id=(\d+)/)[1])
|
84
84
|
end
|
85
85
|
end
|
86
|
+
|
87
|
+
it "should not contain any attributes that contain any html tags" do
|
88
|
+
@use.each do |subtitle|
|
89
|
+
[:details, :downloads, :cds, :title, :movie_title, :url].each do |method|
|
90
|
+
subtitle.send(method).to_s.should_not match(/<\/?[^>]*>/)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
86
94
|
end
|
87
95
|
|
88
96
|
describe Undertexter, "should work when trying to fetch some english subtitles" do
|
data/undertexter.gemspec
CHANGED
@@ -3,7 +3,7 @@ $:.push File.expand_path("../lib", __FILE__)
|
|
3
3
|
|
4
4
|
Gem::Specification.new do |s|
|
5
5
|
s.name = "undertexter"
|
6
|
-
s.version = "0.1.
|
6
|
+
s.version = "0.1.4"
|
7
7
|
s.platform = Gem::Platform::RUBY
|
8
8
|
s.authors = ["Linus Oleander"]
|
9
9
|
s.email = ["linus@oleander.nu"]
|
@@ -19,7 +19,11 @@ Gem::Specification.new do |s|
|
|
19
19
|
s.require_paths = ["lib"]
|
20
20
|
|
21
21
|
s.add_dependency('rest-client')
|
22
|
-
|
22
|
+
|
23
|
+
# Using hpricot 0.8.2 due to a bug
|
24
|
+
# https://github.com/hpricot/hpricot/issues/#issue/33
|
25
|
+
|
26
|
+
s.add_dependency('hpricot', '0.8.2')
|
23
27
|
s.add_dependency('mimer_plus')
|
24
28
|
s.add_dependency('levenshteinish')
|
25
29
|
s.add_development_dependency('rspec')
|
metadata
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: undertexter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
4
|
+
hash: 19
|
5
|
+
prerelease:
|
5
6
|
segments:
|
6
7
|
- 0
|
7
8
|
- 1
|
8
|
-
-
|
9
|
-
version: 0.1.
|
9
|
+
- 4
|
10
|
+
version: 0.1.4
|
10
11
|
platform: ruby
|
11
12
|
authors:
|
12
13
|
- Linus Oleander
|
@@ -14,7 +15,7 @@ autorequire:
|
|
14
15
|
bindir: bin
|
15
16
|
cert_chain: []
|
16
17
|
|
17
|
-
date: 2011-02-
|
18
|
+
date: 2011-02-06 00:00:00 +01:00
|
18
19
|
default_executable:
|
19
20
|
dependencies:
|
20
21
|
- !ruby/object:Gem::Dependency
|
@@ -25,22 +26,26 @@ dependencies:
|
|
25
26
|
requirements:
|
26
27
|
- - ">="
|
27
28
|
- !ruby/object:Gem::Version
|
29
|
+
hash: 3
|
28
30
|
segments:
|
29
31
|
- 0
|
30
32
|
version: "0"
|
31
33
|
type: :runtime
|
32
34
|
version_requirements: *id001
|
33
35
|
- !ruby/object:Gem::Dependency
|
34
|
-
name:
|
36
|
+
name: hpricot
|
35
37
|
prerelease: false
|
36
38
|
requirement: &id002 !ruby/object:Gem::Requirement
|
37
39
|
none: false
|
38
40
|
requirements:
|
39
|
-
- - "
|
41
|
+
- - "="
|
40
42
|
- !ruby/object:Gem::Version
|
43
|
+
hash: 59
|
41
44
|
segments:
|
42
45
|
- 0
|
43
|
-
|
46
|
+
- 8
|
47
|
+
- 2
|
48
|
+
version: 0.8.2
|
44
49
|
type: :runtime
|
45
50
|
version_requirements: *id002
|
46
51
|
- !ruby/object:Gem::Dependency
|
@@ -51,6 +56,7 @@ dependencies:
|
|
51
56
|
requirements:
|
52
57
|
- - ">="
|
53
58
|
- !ruby/object:Gem::Version
|
59
|
+
hash: 3
|
54
60
|
segments:
|
55
61
|
- 0
|
56
62
|
version: "0"
|
@@ -64,6 +70,7 @@ dependencies:
|
|
64
70
|
requirements:
|
65
71
|
- - ">="
|
66
72
|
- !ruby/object:Gem::Version
|
73
|
+
hash: 3
|
67
74
|
segments:
|
68
75
|
- 0
|
69
76
|
version: "0"
|
@@ -77,6 +84,7 @@ dependencies:
|
|
77
84
|
requirements:
|
78
85
|
- - ">="
|
79
86
|
- !ruby/object:Gem::Version
|
87
|
+
hash: 3
|
80
88
|
segments:
|
81
89
|
- 0
|
82
90
|
version: "0"
|
@@ -98,7 +106,6 @@ files:
|
|
98
106
|
- Gemfile.lock
|
99
107
|
- README.markdown
|
100
108
|
- Rakefile
|
101
|
-
- lib/.undertexter.rb.swp
|
102
109
|
- lib/subtitle.rb
|
103
110
|
- lib/undertexter.rb
|
104
111
|
- lib/undertexter/array.rb
|
@@ -122,6 +129,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
122
129
|
requirements:
|
123
130
|
- - ">="
|
124
131
|
- !ruby/object:Gem::Version
|
132
|
+
hash: 3
|
125
133
|
segments:
|
126
134
|
- 0
|
127
135
|
version: "0"
|
@@ -130,13 +138,14 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
130
138
|
requirements:
|
131
139
|
- - ">="
|
132
140
|
- !ruby/object:Gem::Version
|
141
|
+
hash: 3
|
133
142
|
segments:
|
134
143
|
- 0
|
135
144
|
version: "0"
|
136
145
|
requirements: []
|
137
146
|
|
138
147
|
rubyforge_project: undertexter
|
139
|
-
rubygems_version: 1.
|
148
|
+
rubygems_version: 1.5.0
|
140
149
|
signing_key:
|
141
150
|
specification_version: 3
|
142
151
|
summary: A subtitle search client for undertexter.se
|
data/lib/.undertexter.rb.swp
DELETED
Binary file
|