undertexter 0.1.3 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile.lock CHANGED
@@ -1,10 +1,10 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- undertexter (0.1.2)
4
+ undertexter (0.1.3)
5
+ hpricot (= 0.8.2)
5
6
  levenshteinish
6
7
  mimer_plus
7
- nokogiri
8
8
  rest-client
9
9
 
10
10
  GEM
@@ -12,11 +12,11 @@ GEM
12
12
  specs:
13
13
  diff-lcs (1.1.2)
14
14
  hintable_levenshtein (0.0.3)
15
+ hpricot (0.8.2)
15
16
  levenshteinish (0.0.1)
16
17
  hintable_levenshtein
17
18
  mime-types (1.16)
18
19
  mimer_plus (0.0.4)
19
- nokogiri (1.4.4)
20
20
  rest-client (1.6.1)
21
21
  mime-types (>= 1.16)
22
22
  rspec (2.4.0)
@@ -32,9 +32,5 @@ PLATFORMS
32
32
  ruby
33
33
 
34
34
  DEPENDENCIES
35
- levenshteinish
36
- mimer_plus
37
- nokogiri
38
- rest-client
39
35
  rspec
40
36
  undertexter!
data/lib/undertexter.rb CHANGED
@@ -2,10 +2,22 @@
2
2
 
3
3
  require 'rest-client'
4
4
  require 'subtitle'
5
- require 'nokogiri'
5
+ require 'hpricot'
6
6
  require 'iconv'
7
7
  require 'undertexter/array'
8
8
 
9
+ module Hpricot
10
+
11
+ # Monkeypatch to fix an Hpricot bug that causes HTML entities to be decoded
12
+ # incorrectly.
13
+ def self.uxs(str)
14
+ str.to_s.
15
+ gsub(/&(\w+);/) { [Hpricot::NamedCharacters[$1] || ??].pack("U*") }.
16
+ gsub(/\&\#(\d+);/) { [$1.to_i].pack("U*") }
17
+ end
18
+
19
+ end
20
+
9
21
  class Undertexter
10
22
  attr_accessor :raw_data, :base_details, :subtitles
11
23
 
@@ -42,37 +54,39 @@ class Undertexter
42
54
  end
43
55
 
44
56
  def parse!
45
- noko = Nokogiri::HTML(@raw_data)
46
-
47
57
  # Example output
48
- # [["(1 cd)", "Nedladdningar: 11891", "Avatar (2009) PROPER DVDSCR XviD-MAXSPEED", "http://www.undertexter.se/?p=undertext&id=19751"]]
58
+ # [["(1 cd)", "Nedladdningar: 11891", "Avatar (2009) PROPER DVDSCR XviD-MAXSPEED", "http://www.undertexter.se/?p=undertext&id=19751", "Avatar"]]
59
+
60
+ doc = Hpricot(@raw_data)
61
+ @block = []
49
62
 
50
- [12,15].each do |id|
51
- @block = noko.css("table:nth-child(#{id}) td").to_a.reject do |inner|
52
- inner.content.empty? or ! inner.content.match(/Nedladdningar/i)
53
- end.map do |inner|
54
- inner.content.split(/\n/).map do |i|
55
- i.gsub(/"/, "").strip
56
- end
57
- end
58
-
59
- next if @block.nil?
60
-
61
- noko.css("table:nth-child(#{id}) a").to_a.reject do |inner|
62
- details = inner.attr('href')
63
- inner.content.empty? or details.nil? or ! details.match(/(p=undertext&id=\d+)|(p=subtitle&id=\d+)/i)
64
- end.map do |y|
65
- [y.attr('href'), y.content.strip]
66
- end.reject do |list|
67
- list.last.empty?
68
- end.each_with_index do |value, index|
69
- @block[index] << value.first
70
- @block[index] << value.last
71
- end
63
+ # Trying to find the {tbody} that does not contain any tbody's
64
+ tbody = doc.search("tbody").to_a.reject do |inner, index|
65
+ not inner.inner_html.match(/Nedladdningar/i)
66
+ end.sort_by do |inner|
67
+ inner.search('tbody').count
68
+ end.first
72
69
 
73
- @block.map!{|value| value.reject(&:empty?)}
74
-
75
- break if @block.any?
70
+ # Nothing found, okey!
71
+ return if tbody.nil?
72
+
73
+ tbody = tbody.search('tr').drop(3)
74
+
75
+ tbody.each_with_index do |value, index|
76
+ next unless index % 3 == 0
77
+ length = @block.length
78
+ @block[length] = [] if @block[length].nil?
79
+
80
+ line = tbody[index + 1].inner_html.split('<br />').map(&:strip)
81
+ value = value.search('a')
82
+
83
+ @block[length] << line[0]
84
+ @block[length] << line[2]
85
+ @block[length] << line[4]
86
+ @block[length] << value.last.attributes['href']
87
+ @block[length] << value.last.attributes['title']
88
+
89
+ @block[length].map! {|i| i.gsub(/<\/?[^>]*>/, "").strip}
76
90
  end
77
91
  end
78
92
 
@@ -58,8 +58,8 @@ describe Undertexter, "trying to search for a movie using a title" do
58
58
  Undertexter.should have_at_least(41).find("die hard")
59
59
  end
60
60
 
61
- it "should return the right title, again" do
62
- @use.each{|subtitle| subtitle.title.should match(/die.*hard/i)}
61
+ it "should have 6 die hard movies that does not contain any title" do
62
+ @use.reject{|subtitle| ! subtitle.title.empty?}.count.should be(6)
63
63
  end
64
64
 
65
65
  it "should contain the right details, again" do
@@ -83,6 +83,14 @@ describe Undertexter, "trying to search for a movie using a title" do
83
83
  subtitle.url.match(/id=(\d+)/)[1].should eq(@use[index].details.match(/id=(\d+)/)[1])
84
84
  end
85
85
  end
86
+
87
+ it "should not contain any attributes that contain any html tags" do
88
+ @use.each do |subtitle|
89
+ [:details, :downloads, :cds, :title, :movie_title, :url].each do |method|
90
+ subtitle.send(method).to_s.should_not match(/<\/?[^>]*>/)
91
+ end
92
+ end
93
+ end
86
94
  end
87
95
 
88
96
  describe Undertexter, "should work when trying to fetch some english subtitles" do
data/undertexter.gemspec CHANGED
@@ -3,7 +3,7 @@ $:.push File.expand_path("../lib", __FILE__)
3
3
 
4
4
  Gem::Specification.new do |s|
5
5
  s.name = "undertexter"
6
- s.version = "0.1.3"
6
+ s.version = "0.1.4"
7
7
  s.platform = Gem::Platform::RUBY
8
8
  s.authors = ["Linus Oleander"]
9
9
  s.email = ["linus@oleander.nu"]
@@ -19,7 +19,11 @@ Gem::Specification.new do |s|
19
19
  s.require_paths = ["lib"]
20
20
 
21
21
  s.add_dependency('rest-client')
22
- s.add_dependency('nokogiri')
22
+
23
+ # Using hpricot 0.8.2 due to a bug
24
+ # https://github.com/hpricot/hpricot/issues/#issue/33
25
+
26
+ s.add_dependency('hpricot', '0.8.2')
23
27
  s.add_dependency('mimer_plus')
24
28
  s.add_dependency('levenshteinish')
25
29
  s.add_development_dependency('rspec')
metadata CHANGED
@@ -1,12 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: undertexter
3
3
  version: !ruby/object:Gem::Version
4
- prerelease: false
4
+ hash: 19
5
+ prerelease:
5
6
  segments:
6
7
  - 0
7
8
  - 1
8
- - 3
9
- version: 0.1.3
9
+ - 4
10
+ version: 0.1.4
10
11
  platform: ruby
11
12
  authors:
12
13
  - Linus Oleander
@@ -14,7 +15,7 @@ autorequire:
14
15
  bindir: bin
15
16
  cert_chain: []
16
17
 
17
- date: 2011-02-04 00:00:00 +01:00
18
+ date: 2011-02-06 00:00:00 +01:00
18
19
  default_executable:
19
20
  dependencies:
20
21
  - !ruby/object:Gem::Dependency
@@ -25,22 +26,26 @@ dependencies:
25
26
  requirements:
26
27
  - - ">="
27
28
  - !ruby/object:Gem::Version
29
+ hash: 3
28
30
  segments:
29
31
  - 0
30
32
  version: "0"
31
33
  type: :runtime
32
34
  version_requirements: *id001
33
35
  - !ruby/object:Gem::Dependency
34
- name: nokogiri
36
+ name: hpricot
35
37
  prerelease: false
36
38
  requirement: &id002 !ruby/object:Gem::Requirement
37
39
  none: false
38
40
  requirements:
39
- - - ">="
41
+ - - "="
40
42
  - !ruby/object:Gem::Version
43
+ hash: 59
41
44
  segments:
42
45
  - 0
43
- version: "0"
46
+ - 8
47
+ - 2
48
+ version: 0.8.2
44
49
  type: :runtime
45
50
  version_requirements: *id002
46
51
  - !ruby/object:Gem::Dependency
@@ -51,6 +56,7 @@ dependencies:
51
56
  requirements:
52
57
  - - ">="
53
58
  - !ruby/object:Gem::Version
59
+ hash: 3
54
60
  segments:
55
61
  - 0
56
62
  version: "0"
@@ -64,6 +70,7 @@ dependencies:
64
70
  requirements:
65
71
  - - ">="
66
72
  - !ruby/object:Gem::Version
73
+ hash: 3
67
74
  segments:
68
75
  - 0
69
76
  version: "0"
@@ -77,6 +84,7 @@ dependencies:
77
84
  requirements:
78
85
  - - ">="
79
86
  - !ruby/object:Gem::Version
87
+ hash: 3
80
88
  segments:
81
89
  - 0
82
90
  version: "0"
@@ -98,7 +106,6 @@ files:
98
106
  - Gemfile.lock
99
107
  - README.markdown
100
108
  - Rakefile
101
- - lib/.undertexter.rb.swp
102
109
  - lib/subtitle.rb
103
110
  - lib/undertexter.rb
104
111
  - lib/undertexter/array.rb
@@ -122,6 +129,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
122
129
  requirements:
123
130
  - - ">="
124
131
  - !ruby/object:Gem::Version
132
+ hash: 3
125
133
  segments:
126
134
  - 0
127
135
  version: "0"
@@ -130,13 +138,14 @@ required_rubygems_version: !ruby/object:Gem::Requirement
130
138
  requirements:
131
139
  - - ">="
132
140
  - !ruby/object:Gem::Version
141
+ hash: 3
133
142
  segments:
134
143
  - 0
135
144
  version: "0"
136
145
  requirements: []
137
146
 
138
147
  rubyforge_project: undertexter
139
- rubygems_version: 1.3.7
148
+ rubygems_version: 1.5.0
140
149
  signing_key:
141
150
  specification_version: 3
142
151
  summary: A subtitle search client for undertexter.se
Binary file