busk-ruby-readability 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/readability.rb +13 -11
- metadata +17 -5
data/lib/readability.rb
CHANGED
@@ -6,7 +6,7 @@ module Readability
|
|
6
6
|
TEXT_LENGTH_THRESHOLD = 25
|
7
7
|
RETRY_LENGTH = 250
|
8
8
|
|
9
|
-
attr_accessor :options, :html
|
9
|
+
attr_accessor :options, :html, :best_candidate
|
10
10
|
|
11
11
|
def initialize(input, options = {})
|
12
12
|
@input = input
|
@@ -83,17 +83,18 @@ module Readability
|
|
83
83
|
end
|
84
84
|
|
85
85
|
def select_best_candidate(candidates)
|
86
|
-
|
86
|
+
@best_candidate ||= begin
|
87
|
+
sorted_candidates = candidates.values.sort { |a, b| b[:content_score] <=> a[:content_score] }
|
87
88
|
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
best_candidate = sorted_candidates.first || { :elem => @html.css("body").first, :content_score => 0 }
|
94
|
-
debug("Best candidate #{best_candidate[:elem].name}##{best_candidate[:elem][:id]}.#{best_candidate[:elem][:class]} with score #{best_candidate[:content_score]}")
|
89
|
+
debug("Top 5 candidates:")
|
90
|
+
sorted_candidates[0...5].each do |candidate|
|
91
|
+
debug("Candidate #{candidate[:elem].andand.name}##{candidate[:elem][:id]}.#{candidate[:elem][:class]} with score #{candidate[:content_score]}")
|
92
|
+
end
|
95
93
|
|
96
|
-
|
94
|
+
best_candidate = sorted_candidates.first || { :elem => @html.css("body").first, :content_score => 0 }
|
95
|
+
#debug("Best candidate #{best_candidate[:elem].andand.name} with score #{best_candidate[:content_score]}")
|
96
|
+
best_candidate
|
97
|
+
end
|
97
98
|
end
|
98
99
|
|
99
100
|
def get_link_density(elem)
|
@@ -281,7 +282,8 @@ module Readability
|
|
281
282
|
el.attributes.each { |a, x| el.delete(a) unless @options[:attributes] && @options[:attributes].include?(a.to_s) }
|
282
283
|
# Otherwise, replace the element with its contents
|
283
284
|
else
|
284
|
-
|
285
|
+
# keep getting whiny nils with nokogiri
|
286
|
+
el.swap(el.text) rescue nil
|
285
287
|
end
|
286
288
|
|
287
289
|
end
|
metadata
CHANGED
@@ -1,7 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: busk-ruby-readability
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
4
|
+
hash: 21
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 1
|
8
|
+
- 0
|
9
|
+
- 1
|
10
|
+
version: 1.0.1
|
5
11
|
platform: ruby
|
6
12
|
authors: []
|
7
13
|
|
@@ -9,7 +15,7 @@ autorequire:
|
|
9
15
|
bindir: bin
|
10
16
|
cert_chain: []
|
11
17
|
|
12
|
-
date: 2010-09-
|
18
|
+
date: 2010-09-20 00:00:00 -03:00
|
13
19
|
default_executable:
|
14
20
|
dependencies: []
|
15
21
|
|
@@ -33,21 +39,27 @@ rdoc_options: []
|
|
33
39
|
require_paths:
|
34
40
|
- lib
|
35
41
|
required_ruby_version: !ruby/object:Gem::Requirement
|
42
|
+
none: false
|
36
43
|
requirements:
|
37
44
|
- - ">="
|
38
45
|
- !ruby/object:Gem::Version
|
46
|
+
hash: 3
|
47
|
+
segments:
|
48
|
+
- 0
|
39
49
|
version: "0"
|
40
|
-
version:
|
41
50
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
51
|
+
none: false
|
42
52
|
requirements:
|
43
53
|
- - ">="
|
44
54
|
- !ruby/object:Gem::Version
|
55
|
+
hash: 3
|
56
|
+
segments:
|
57
|
+
- 0
|
45
58
|
version: "0"
|
46
|
-
version:
|
47
59
|
requirements: []
|
48
60
|
|
49
61
|
rubyforge_project:
|
50
|
-
rubygems_version: 1.3.
|
62
|
+
rubygems_version: 1.3.7
|
51
63
|
signing_key:
|
52
64
|
specification_version: 3
|
53
65
|
summary: A rewrite of original ruby-readability
|