busk-ruby-readability 1.2.5 → 1.2.6
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile.lock +1 -1
- data/lib/readability.rb +20 -18
- data/ruby-readability.gemspec +1 -1
- metadata +4 -4
data/Gemfile.lock
CHANGED
data/lib/readability.rb
CHANGED
@@ -152,26 +152,28 @@ module Readability
|
|
152
152
|
sibling_score_threshold = [10, best_candidate[:content_score] * 0.2].max
|
153
153
|
output = Nokogiri::XML::Node.new('div', @document)
|
154
154
|
begin
|
155
|
-
best_candidate[:elem].
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
155
|
+
if best_candidate[:elem].try(:parent)
|
156
|
+
best_candidate[:elem].parent.try(:children).each do |sibling|
|
157
|
+
append = false
|
158
|
+
append = true if sibling == best_candidate[:elem]
|
159
|
+
append = true if candidates[sibling] && candidates[sibling][:content_score] >= sibling_score_threshold
|
160
|
+
|
161
|
+
if sibling.name.downcase == "p"
|
162
|
+
link_density = get_link_density(sibling)
|
163
|
+
node_content = sibling.text
|
164
|
+
node_length = node_content.length
|
165
|
+
|
166
|
+
if node_length > 80 && link_density < 0.25
|
167
|
+
append = true
|
168
|
+
elsif node_length < 80 && link_density == 0 && node_content =~ /\.( |$)/
|
169
|
+
append = true
|
170
|
+
end
|
169
171
|
end
|
170
|
-
end
|
171
172
|
|
172
|
-
|
173
|
-
|
174
|
-
|
173
|
+
if append
|
174
|
+
sibling.name = "div" unless %w[div p].include?(sibling.name.downcase)
|
175
|
+
output << sibling
|
176
|
+
end
|
175
177
|
end
|
176
178
|
end
|
177
179
|
end
|
data/ruby-readability.gemspec
CHANGED
@@ -2,7 +2,7 @@ Gem::Specification.new do |s|
|
|
2
2
|
s.authors = ["Fabio Mont Alegre", "Rodrigo Flores"]
|
3
3
|
s.email = "it-team@busk.com"
|
4
4
|
s.homepage = "http://github.com/busk/ruby-readability"
|
5
|
-
s.version = "1.2.
|
5
|
+
s.version = "1.2.6"
|
6
6
|
s.name = "busk-ruby-readability"
|
7
7
|
s.summary = "A rewrite of original ruby-readability"
|
8
8
|
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: busk-ruby-readability
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 19
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 1
|
8
8
|
- 2
|
9
|
-
-
|
10
|
-
version: 1.2.
|
9
|
+
- 6
|
10
|
+
version: 1.2.6
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Fabio Mont Alegre
|
@@ -16,7 +16,7 @@ autorequire:
|
|
16
16
|
bindir: bin
|
17
17
|
cert_chain: []
|
18
18
|
|
19
|
-
date: 2011-
|
19
|
+
date: 2011-03-01 00:00:00 -03:00
|
20
20
|
default_executable:
|
21
21
|
dependencies: []
|
22
22
|
|