undertexter 0.1.3 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile.lock +3 -7
- data/lib/undertexter.rb +43 -29
- data/spec/undertexter_spec.rb +10 -2
- data/undertexter.gemspec +6 -2
- metadata +18 -9
- data/lib/.undertexter.rb.swp +0 -0
data/Gemfile.lock
CHANGED
@@ -1,10 +1,10 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
undertexter (0.1.
|
4
|
+
undertexter (0.1.3)
|
5
|
+
hpricot (= 0.8.2)
|
5
6
|
levenshteinish
|
6
7
|
mimer_plus
|
7
|
-
nokogiri
|
8
8
|
rest-client
|
9
9
|
|
10
10
|
GEM
|
@@ -12,11 +12,11 @@ GEM
|
|
12
12
|
specs:
|
13
13
|
diff-lcs (1.1.2)
|
14
14
|
hintable_levenshtein (0.0.3)
|
15
|
+
hpricot (0.8.2)
|
15
16
|
levenshteinish (0.0.1)
|
16
17
|
hintable_levenshtein
|
17
18
|
mime-types (1.16)
|
18
19
|
mimer_plus (0.0.4)
|
19
|
-
nokogiri (1.4.4)
|
20
20
|
rest-client (1.6.1)
|
21
21
|
mime-types (>= 1.16)
|
22
22
|
rspec (2.4.0)
|
@@ -32,9 +32,5 @@ PLATFORMS
|
|
32
32
|
ruby
|
33
33
|
|
34
34
|
DEPENDENCIES
|
35
|
-
levenshteinish
|
36
|
-
mimer_plus
|
37
|
-
nokogiri
|
38
|
-
rest-client
|
39
35
|
rspec
|
40
36
|
undertexter!
|
data/lib/undertexter.rb
CHANGED
@@ -2,10 +2,22 @@
|
|
2
2
|
|
3
3
|
require 'rest-client'
|
4
4
|
require 'subtitle'
|
5
|
-
require '
|
5
|
+
require 'hpricot'
|
6
6
|
require 'iconv'
|
7
7
|
require 'undertexter/array'
|
8
8
|
|
9
|
+
module Hpricot
|
10
|
+
|
11
|
+
# Monkeypatch to fix an Hpricot bug that causes HTML entities to be decoded
|
12
|
+
# incorrectly.
|
13
|
+
def self.uxs(str)
|
14
|
+
str.to_s.
|
15
|
+
gsub(/&(\w+);/) { [Hpricot::NamedCharacters[$1] || ??].pack("U*") }.
|
16
|
+
gsub(/\&\#(\d+);/) { [$1.to_i].pack("U*") }
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
20
|
+
|
9
21
|
class Undertexter
|
10
22
|
attr_accessor :raw_data, :base_details, :subtitles
|
11
23
|
|
@@ -42,37 +54,39 @@ class Undertexter
|
|
42
54
|
end
|
43
55
|
|
44
56
|
def parse!
|
45
|
-
noko = Nokogiri::HTML(@raw_data)
|
46
|
-
|
47
57
|
# Example output
|
48
|
-
# [["(1 cd)", "Nedladdningar: 11891", "Avatar (2009) PROPER DVDSCR XviD-MAXSPEED", "http://www.undertexter.se/?p=undertext&id=19751"]]
|
58
|
+
# [["(1 cd)", "Nedladdningar: 11891", "Avatar (2009) PROPER DVDSCR XviD-MAXSPEED", "http://www.undertexter.se/?p=undertext&id=19751", "Avatar"]]
|
59
|
+
|
60
|
+
doc = Hpricot(@raw_data)
|
61
|
+
@block = []
|
49
62
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
end
|
57
|
-
end
|
58
|
-
|
59
|
-
next if @block.nil?
|
60
|
-
|
61
|
-
noko.css("table:nth-child(#{id}) a").to_a.reject do |inner|
|
62
|
-
details = inner.attr('href')
|
63
|
-
inner.content.empty? or details.nil? or ! details.match(/(p=undertext&id=\d+)|(p=subtitle&id=\d+)/i)
|
64
|
-
end.map do |y|
|
65
|
-
[y.attr('href'), y.content.strip]
|
66
|
-
end.reject do |list|
|
67
|
-
list.last.empty?
|
68
|
-
end.each_with_index do |value, index|
|
69
|
-
@block[index] << value.first
|
70
|
-
@block[index] << value.last
|
71
|
-
end
|
63
|
+
# Trying to find the {tbody} that does not contain any tbody's
|
64
|
+
tbody = doc.search("tbody").to_a.reject do |inner, index|
|
65
|
+
not inner.inner_html.match(/Nedladdningar/i)
|
66
|
+
end.sort_by do |inner|
|
67
|
+
inner.search('tbody').count
|
68
|
+
end.first
|
72
69
|
|
73
|
-
|
74
|
-
|
75
|
-
|
70
|
+
# Nothing found, okey!
|
71
|
+
return if tbody.nil?
|
72
|
+
|
73
|
+
tbody = tbody.search('tr').drop(3)
|
74
|
+
|
75
|
+
tbody.each_with_index do |value, index|
|
76
|
+
next unless index % 3 == 0
|
77
|
+
length = @block.length
|
78
|
+
@block[length] = [] if @block[length].nil?
|
79
|
+
|
80
|
+
line = tbody[index + 1].inner_html.split('<br />').map(&:strip)
|
81
|
+
value = value.search('a')
|
82
|
+
|
83
|
+
@block[length] << line[0]
|
84
|
+
@block[length] << line[2]
|
85
|
+
@block[length] << line[4]
|
86
|
+
@block[length] << value.last.attributes['href']
|
87
|
+
@block[length] << value.last.attributes['title']
|
88
|
+
|
89
|
+
@block[length].map! {|i| i.gsub(/<\/?[^>]*>/, "").strip}
|
76
90
|
end
|
77
91
|
end
|
78
92
|
|
data/spec/undertexter_spec.rb
CHANGED
@@ -58,8 +58,8 @@ describe Undertexter, "trying to search for a movie using a title" do
|
|
58
58
|
Undertexter.should have_at_least(41).find("die hard")
|
59
59
|
end
|
60
60
|
|
61
|
-
it "should
|
62
|
-
@use.
|
61
|
+
it "should have 6 die hard movies that does not contain any title" do
|
62
|
+
@use.reject{|subtitle| ! subtitle.title.empty?}.count.should be(6)
|
63
63
|
end
|
64
64
|
|
65
65
|
it "should contain the right details, again" do
|
@@ -83,6 +83,14 @@ describe Undertexter, "trying to search for a movie using a title" do
|
|
83
83
|
subtitle.url.match(/id=(\d+)/)[1].should eq(@use[index].details.match(/id=(\d+)/)[1])
|
84
84
|
end
|
85
85
|
end
|
86
|
+
|
87
|
+
it "should not contain any attributes that contain any html tags" do
|
88
|
+
@use.each do |subtitle|
|
89
|
+
[:details, :downloads, :cds, :title, :movie_title, :url].each do |method|
|
90
|
+
subtitle.send(method).to_s.should_not match(/<\/?[^>]*>/)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
86
94
|
end
|
87
95
|
|
88
96
|
describe Undertexter, "should work when trying to fetch some english subtitles" do
|
data/undertexter.gemspec
CHANGED
@@ -3,7 +3,7 @@ $:.push File.expand_path("../lib", __FILE__)
|
|
3
3
|
|
4
4
|
Gem::Specification.new do |s|
|
5
5
|
s.name = "undertexter"
|
6
|
-
s.version = "0.1.
|
6
|
+
s.version = "0.1.4"
|
7
7
|
s.platform = Gem::Platform::RUBY
|
8
8
|
s.authors = ["Linus Oleander"]
|
9
9
|
s.email = ["linus@oleander.nu"]
|
@@ -19,7 +19,11 @@ Gem::Specification.new do |s|
|
|
19
19
|
s.require_paths = ["lib"]
|
20
20
|
|
21
21
|
s.add_dependency('rest-client')
|
22
|
-
|
22
|
+
|
23
|
+
# Using hpricot 0.8.2 due to a bug
|
24
|
+
# https://github.com/hpricot/hpricot/issues/#issue/33
|
25
|
+
|
26
|
+
s.add_dependency('hpricot', '0.8.2')
|
23
27
|
s.add_dependency('mimer_plus')
|
24
28
|
s.add_dependency('levenshteinish')
|
25
29
|
s.add_development_dependency('rspec')
|
metadata
CHANGED
@@ -1,12 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: undertexter
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
4
|
+
hash: 19
|
5
|
+
prerelease:
|
5
6
|
segments:
|
6
7
|
- 0
|
7
8
|
- 1
|
8
|
-
-
|
9
|
-
version: 0.1.
|
9
|
+
- 4
|
10
|
+
version: 0.1.4
|
10
11
|
platform: ruby
|
11
12
|
authors:
|
12
13
|
- Linus Oleander
|
@@ -14,7 +15,7 @@ autorequire:
|
|
14
15
|
bindir: bin
|
15
16
|
cert_chain: []
|
16
17
|
|
17
|
-
date: 2011-02-
|
18
|
+
date: 2011-02-06 00:00:00 +01:00
|
18
19
|
default_executable:
|
19
20
|
dependencies:
|
20
21
|
- !ruby/object:Gem::Dependency
|
@@ -25,22 +26,26 @@ dependencies:
|
|
25
26
|
requirements:
|
26
27
|
- - ">="
|
27
28
|
- !ruby/object:Gem::Version
|
29
|
+
hash: 3
|
28
30
|
segments:
|
29
31
|
- 0
|
30
32
|
version: "0"
|
31
33
|
type: :runtime
|
32
34
|
version_requirements: *id001
|
33
35
|
- !ruby/object:Gem::Dependency
|
34
|
-
name:
|
36
|
+
name: hpricot
|
35
37
|
prerelease: false
|
36
38
|
requirement: &id002 !ruby/object:Gem::Requirement
|
37
39
|
none: false
|
38
40
|
requirements:
|
39
|
-
- - "
|
41
|
+
- - "="
|
40
42
|
- !ruby/object:Gem::Version
|
43
|
+
hash: 59
|
41
44
|
segments:
|
42
45
|
- 0
|
43
|
-
|
46
|
+
- 8
|
47
|
+
- 2
|
48
|
+
version: 0.8.2
|
44
49
|
type: :runtime
|
45
50
|
version_requirements: *id002
|
46
51
|
- !ruby/object:Gem::Dependency
|
@@ -51,6 +56,7 @@ dependencies:
|
|
51
56
|
requirements:
|
52
57
|
- - ">="
|
53
58
|
- !ruby/object:Gem::Version
|
59
|
+
hash: 3
|
54
60
|
segments:
|
55
61
|
- 0
|
56
62
|
version: "0"
|
@@ -64,6 +70,7 @@ dependencies:
|
|
64
70
|
requirements:
|
65
71
|
- - ">="
|
66
72
|
- !ruby/object:Gem::Version
|
73
|
+
hash: 3
|
67
74
|
segments:
|
68
75
|
- 0
|
69
76
|
version: "0"
|
@@ -77,6 +84,7 @@ dependencies:
|
|
77
84
|
requirements:
|
78
85
|
- - ">="
|
79
86
|
- !ruby/object:Gem::Version
|
87
|
+
hash: 3
|
80
88
|
segments:
|
81
89
|
- 0
|
82
90
|
version: "0"
|
@@ -98,7 +106,6 @@ files:
|
|
98
106
|
- Gemfile.lock
|
99
107
|
- README.markdown
|
100
108
|
- Rakefile
|
101
|
-
- lib/.undertexter.rb.swp
|
102
109
|
- lib/subtitle.rb
|
103
110
|
- lib/undertexter.rb
|
104
111
|
- lib/undertexter/array.rb
|
@@ -122,6 +129,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
122
129
|
requirements:
|
123
130
|
- - ">="
|
124
131
|
- !ruby/object:Gem::Version
|
132
|
+
hash: 3
|
125
133
|
segments:
|
126
134
|
- 0
|
127
135
|
version: "0"
|
@@ -130,13 +138,14 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
130
138
|
requirements:
|
131
139
|
- - ">="
|
132
140
|
- !ruby/object:Gem::Version
|
141
|
+
hash: 3
|
133
142
|
segments:
|
134
143
|
- 0
|
135
144
|
version: "0"
|
136
145
|
requirements: []
|
137
146
|
|
138
147
|
rubyforge_project: undertexter
|
139
|
-
rubygems_version: 1.
|
148
|
+
rubygems_version: 1.5.0
|
140
149
|
signing_key:
|
141
150
|
specification_version: 3
|
142
151
|
summary: A subtitle search client for undertexter.se
|
data/lib/.undertexter.rb.swp
DELETED
Binary file
|