undertexter 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile.lock CHANGED
@@ -1,10 +1,10 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- undertexter (0.1.2)
4
+ undertexter (0.1.3)
5
+ hpricot (= 0.8.2)
5
6
  levenshteinish
6
7
  mimer_plus
7
- nokogiri
8
8
  rest-client
9
9
 
10
10
  GEM
@@ -12,11 +12,11 @@ GEM
12
12
  specs:
13
13
  diff-lcs (1.1.2)
14
14
  hintable_levenshtein (0.0.3)
15
+ hpricot (0.8.2)
15
16
  levenshteinish (0.0.1)
16
17
  hintable_levenshtein
17
18
  mime-types (1.16)
18
19
  mimer_plus (0.0.4)
19
- nokogiri (1.4.4)
20
20
  rest-client (1.6.1)
21
21
  mime-types (>= 1.16)
22
22
  rspec (2.4.0)
@@ -32,9 +32,5 @@ PLATFORMS
32
32
  ruby
33
33
 
34
34
  DEPENDENCIES
35
- levenshteinish
36
- mimer_plus
37
- nokogiri
38
- rest-client
39
35
  rspec
40
36
  undertexter!
data/lib/undertexter.rb CHANGED
@@ -2,10 +2,22 @@
2
2
 
3
3
  require 'rest-client'
4
4
  require 'subtitle'
5
- require 'nokogiri'
5
+ require 'hpricot'
6
6
  require 'iconv'
7
7
  require 'undertexter/array'
8
8
 
9
+ module Hpricot
10
+
11
+ # Monkeypatch to fix an Hpricot bug that causes HTML entities to be decoded
12
+ # incorrectly.
13
+ def self.uxs(str)
14
+ str.to_s.
15
+ gsub(/&(\w+);/) { [Hpricot::NamedCharacters[$1] || ??].pack("U*") }.
16
+ gsub(/\&\#(\d+);/) { [$1.to_i].pack("U*") }
17
+ end
18
+
19
+ end
20
+
9
21
  class Undertexter
10
22
  attr_accessor :raw_data, :base_details, :subtitles
11
23
 
@@ -42,37 +54,39 @@ class Undertexter
42
54
  end
43
55
 
44
56
  def parse!
45
- noko = Nokogiri::HTML(@raw_data)
46
-
47
57
  # Example output
48
- # [["(1 cd)", "Nedladdningar: 11891", "Avatar (2009) PROPER DVDSCR XviD-MAXSPEED", "http://www.undertexter.se/?p=undertext&id=19751"]]
58
+ # [["(1 cd)", "Nedladdningar: 11891", "Avatar (2009) PROPER DVDSCR XviD-MAXSPEED", "http://www.undertexter.se/?p=undertext&id=19751", "Avatar"]]
59
+
60
+ doc = Hpricot(@raw_data)
61
+ @block = []
49
62
 
50
- [12,15].each do |id|
51
- @block = noko.css("table:nth-child(#{id}) td").to_a.reject do |inner|
52
- inner.content.empty? or ! inner.content.match(/Nedladdningar/i)
53
- end.map do |inner|
54
- inner.content.split(/\n/).map do |i|
55
- i.gsub(/"/, "").strip
56
- end
57
- end
58
-
59
- next if @block.nil?
60
-
61
- noko.css("table:nth-child(#{id}) a").to_a.reject do |inner|
62
- details = inner.attr('href')
63
- inner.content.empty? or details.nil? or ! details.match(/(p=undertext&id=\d+)|(p=subtitle&id=\d+)/i)
64
- end.map do |y|
65
- [y.attr('href'), y.content.strip]
66
- end.reject do |list|
67
- list.last.empty?
68
- end.each_with_index do |value, index|
69
- @block[index] << value.first
70
- @block[index] << value.last
71
- end
63
+ # Trying to find the {tbody} that does not contain any tbody's
64
+ tbody = doc.search("tbody").to_a.reject do |inner, index|
65
+ not inner.inner_html.match(/Nedladdningar/i)
66
+ end.sort_by do |inner|
67
+ inner.search('tbody').count
68
+ end.first
72
69
 
73
- @block.map!{|value| value.reject(&:empty?)}
74
-
75
- break if @block.any?
70
+ # Nothing found, okey!
71
+ return if tbody.nil?
72
+
73
+ tbody = tbody.search('tr').drop(3)
74
+
75
+ tbody.each_with_index do |value, index|
76
+ next unless index % 3 == 0
77
+ length = @block.length
78
+ @block[length] = [] if @block[length].nil?
79
+
80
+ line = tbody[index + 1].inner_html.split('<br />').map(&:strip)
81
+ value = value.search('a')
82
+
83
+ @block[length] << line[0]
84
+ @block[length] << line[2]
85
+ @block[length] << line[4]
86
+ @block[length] << value.last.attributes['href']
87
+ @block[length] << value.last.attributes['title']
88
+
89
+ @block[length].map! {|i| i.gsub(/<\/?[^>]*>/, "").strip}
76
90
  end
77
91
  end
78
92
 
@@ -58,8 +58,8 @@ describe Undertexter, "trying to search for a movie using a title" do
58
58
  Undertexter.should have_at_least(41).find("die hard")
59
59
  end
60
60
 
61
- it "should return the right title, again" do
62
- @use.each{|subtitle| subtitle.title.should match(/die.*hard/i)}
61
+ it "should have 6 die hard movies that does not contain any title" do
62
+ @use.reject{|subtitle| ! subtitle.title.empty?}.count.should be(6)
63
63
  end
64
64
 
65
65
  it "should contain the right details, again" do
@@ -83,6 +83,14 @@ describe Undertexter, "trying to search for a movie using a title" do
83
83
  subtitle.url.match(/id=(\d+)/)[1].should eq(@use[index].details.match(/id=(\d+)/)[1])
84
84
  end
85
85
  end
86
+
87
+ it "should not contain any attributes that contain any html tags" do
88
+ @use.each do |subtitle|
89
+ [:details, :downloads, :cds, :title, :movie_title, :url].each do |method|
90
+ subtitle.send(method).to_s.should_not match(/<\/?[^>]*>/)
91
+ end
92
+ end
93
+ end
86
94
  end
87
95
 
88
96
  describe Undertexter, "should work when trying to fetch some english subtitles" do
data/undertexter.gemspec CHANGED
@@ -3,7 +3,7 @@ $:.push File.expand_path("../lib", __FILE__)
3
3
 
4
4
  Gem::Specification.new do |s|
5
5
  s.name = "undertexter"
6
- s.version = "0.1.3"
6
+ s.version = "0.1.4"
7
7
  s.platform = Gem::Platform::RUBY
8
8
  s.authors = ["Linus Oleander"]
9
9
  s.email = ["linus@oleander.nu"]
@@ -19,7 +19,11 @@ Gem::Specification.new do |s|
19
19
  s.require_paths = ["lib"]
20
20
 
21
21
  s.add_dependency('rest-client')
22
- s.add_dependency('nokogiri')
22
+
23
+ # Using hpricot 0.8.2 due to a bug
24
+ # https://github.com/hpricot/hpricot/issues/#issue/33
25
+
26
+ s.add_dependency('hpricot', '0.8.2')
23
27
  s.add_dependency('mimer_plus')
24
28
  s.add_dependency('levenshteinish')
25
29
  s.add_development_dependency('rspec')
metadata CHANGED
@@ -1,12 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: undertexter
3
3
  version: !ruby/object:Gem::Version
4
- prerelease: false
4
+ hash: 19
5
+ prerelease:
5
6
  segments:
6
7
  - 0
7
8
  - 1
8
- - 3
9
- version: 0.1.3
9
+ - 4
10
+ version: 0.1.4
10
11
  platform: ruby
11
12
  authors:
12
13
  - Linus Oleander
@@ -14,7 +15,7 @@ autorequire:
14
15
  bindir: bin
15
16
  cert_chain: []
16
17
 
17
- date: 2011-02-04 00:00:00 +01:00
18
+ date: 2011-02-06 00:00:00 +01:00
18
19
  default_executable:
19
20
  dependencies:
20
21
  - !ruby/object:Gem::Dependency
@@ -25,22 +26,26 @@ dependencies:
25
26
  requirements:
26
27
  - - ">="
27
28
  - !ruby/object:Gem::Version
29
+ hash: 3
28
30
  segments:
29
31
  - 0
30
32
  version: "0"
31
33
  type: :runtime
32
34
  version_requirements: *id001
33
35
  - !ruby/object:Gem::Dependency
34
- name: nokogiri
36
+ name: hpricot
35
37
  prerelease: false
36
38
  requirement: &id002 !ruby/object:Gem::Requirement
37
39
  none: false
38
40
  requirements:
39
- - - ">="
41
+ - - "="
40
42
  - !ruby/object:Gem::Version
43
+ hash: 59
41
44
  segments:
42
45
  - 0
43
- version: "0"
46
+ - 8
47
+ - 2
48
+ version: 0.8.2
44
49
  type: :runtime
45
50
  version_requirements: *id002
46
51
  - !ruby/object:Gem::Dependency
@@ -51,6 +56,7 @@ dependencies:
51
56
  requirements:
52
57
  - - ">="
53
58
  - !ruby/object:Gem::Version
59
+ hash: 3
54
60
  segments:
55
61
  - 0
56
62
  version: "0"
@@ -64,6 +70,7 @@ dependencies:
64
70
  requirements:
65
71
  - - ">="
66
72
  - !ruby/object:Gem::Version
73
+ hash: 3
67
74
  segments:
68
75
  - 0
69
76
  version: "0"
@@ -77,6 +84,7 @@ dependencies:
77
84
  requirements:
78
85
  - - ">="
79
86
  - !ruby/object:Gem::Version
87
+ hash: 3
80
88
  segments:
81
89
  - 0
82
90
  version: "0"
@@ -98,7 +106,6 @@ files:
98
106
  - Gemfile.lock
99
107
  - README.markdown
100
108
  - Rakefile
101
- - lib/.undertexter.rb.swp
102
109
  - lib/subtitle.rb
103
110
  - lib/undertexter.rb
104
111
  - lib/undertexter/array.rb
@@ -122,6 +129,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
122
129
  requirements:
123
130
  - - ">="
124
131
  - !ruby/object:Gem::Version
132
+ hash: 3
125
133
  segments:
126
134
  - 0
127
135
  version: "0"
@@ -130,13 +138,14 @@ required_rubygems_version: !ruby/object:Gem::Requirement
130
138
  requirements:
131
139
  - - ">="
132
140
  - !ruby/object:Gem::Version
141
+ hash: 3
133
142
  segments:
134
143
  - 0
135
144
  version: "0"
136
145
  requirements: []
137
146
 
138
147
  rubyforge_project: undertexter
139
- rubygems_version: 1.3.7
148
+ rubygems_version: 1.5.0
140
149
  signing_key:
141
150
  specification_version: 3
142
151
  summary: A subtitle search client for undertexter.se
Binary file