busk-ruby-readability 1.2.5 → 1.2.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile.lock +1 -1
- data/lib/readability.rb +20 -18
- data/ruby-readability.gemspec +1 -1
- metadata +4 -4
data/Gemfile.lock
CHANGED
data/lib/readability.rb
CHANGED
@@ -152,26 +152,28 @@ module Readability
|
|
152
152
|
sibling_score_threshold = [10, best_candidate[:content_score] * 0.2].max
|
153
153
|
output = Nokogiri::XML::Node.new('div', @document)
|
154
154
|
begin
|
155
|
-
best_candidate[:elem].
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
155
|
+
if best_candidate[:elem].try(:parent)
|
156
|
+
best_candidate[:elem].parent.try(:children).each do |sibling|
|
157
|
+
append = false
|
158
|
+
append = true if sibling == best_candidate[:elem]
|
159
|
+
append = true if candidates[sibling] && candidates[sibling][:content_score] >= sibling_score_threshold
|
160
|
+
|
161
|
+
if sibling.name.downcase == "p"
|
162
|
+
link_density = get_link_density(sibling)
|
163
|
+
node_content = sibling.text
|
164
|
+
node_length = node_content.length
|
165
|
+
|
166
|
+
if node_length > 80 && link_density < 0.25
|
167
|
+
append = true
|
168
|
+
elsif node_length < 80 && link_density == 0 && node_content =~ /\.( |$)/
|
169
|
+
append = true
|
170
|
+
end
|
169
171
|
end
|
170
|
-
end
|
171
172
|
|
172
|
-
|
173
|
-
|
174
|
-
|
173
|
+
if append
|
174
|
+
sibling.name = "div" unless %w[div p].include?(sibling.name.downcase)
|
175
|
+
output << sibling
|
176
|
+
end
|
175
177
|
end
|
176
178
|
end
|
177
179
|
end
|
data/ruby-readability.gemspec
CHANGED
@@ -2,7 +2,7 @@ Gem::Specification.new do |s|
|
|
2
2
|
s.authors = ["Fabio Mont Alegre", "Rodrigo Flores"]
|
3
3
|
s.email = "it-team@busk.com"
|
4
4
|
s.homepage = "http://github.com/busk/ruby-readability"
|
5
|
-
s.version = "1.2.
|
5
|
+
s.version = "1.2.6"
|
6
6
|
s.name = "busk-ruby-readability"
|
7
7
|
s.summary = "A rewrite of original ruby-readability"
|
8
8
|
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: busk-ruby-readability
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 19
|
5
5
|
prerelease:
|
6
6
|
segments:
|
7
7
|
- 1
|
8
8
|
- 2
|
9
|
-
-
|
10
|
-
version: 1.2.
|
9
|
+
- 6
|
10
|
+
version: 1.2.6
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Fabio Mont Alegre
|
@@ -16,7 +16,7 @@ autorequire:
|
|
16
16
|
bindir: bin
|
17
17
|
cert_chain: []
|
18
18
|
|
19
|
-
date: 2011-
|
19
|
+
date: 2011-03-01 00:00:00 -03:00
|
20
20
|
default_executable:
|
21
21
|
dependencies: []
|
22
22
|
|