busk-ruby-readability 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/lib/readability.rb +13 -11
  2. metadata +17 -5
data/lib/readability.rb CHANGED
@@ -6,7 +6,7 @@ module Readability
6
6
  TEXT_LENGTH_THRESHOLD = 25
7
7
  RETRY_LENGTH = 250
8
8
 
9
- attr_accessor :options, :html
9
+ attr_accessor :options, :html, :best_candidate
10
10
 
11
11
  def initialize(input, options = {})
12
12
  @input = input
@@ -83,17 +83,18 @@ module Readability
83
83
  end
84
84
 
85
85
  def select_best_candidate(candidates)
86
- sorted_candidates = candidates.values.sort { |a, b| b[:content_score] <=> a[:content_score] }
86
+ @best_candidate ||= begin
87
+ sorted_candidates = candidates.values.sort { |a, b| b[:content_score] <=> a[:content_score] }
87
88
 
88
- debug("Top 5 canidates:")
89
- sorted_candidates[0...5].each do |candidate|
90
- debug("Candidate #{candidate[:elem].name}##{candidate[:elem][:id]}.#{candidate[:elem][:class]} with score #{candidate[:content_score]}")
91
- end
92
-
93
- best_candidate = sorted_candidates.first || { :elem => @html.css("body").first, :content_score => 0 }
94
- debug("Best candidate #{best_candidate[:elem].name}##{best_candidate[:elem][:id]}.#{best_candidate[:elem][:class]} with score #{best_candidate[:content_score]}")
89
+ debug("Top 5 candidates:")
90
+ sorted_candidates[0...5].each do |candidate|
91
+ debug("Candidate #{candidate[:elem].andand.name}##{candidate[:elem][:id]}.#{candidate[:elem][:class]} with score #{candidate[:content_score]}")
92
+ end
95
93
 
96
- best_candidate
94
+ best_candidate = sorted_candidates.first || { :elem => @html.css("body").first, :content_score => 0 }
95
+ #debug("Best candidate #{best_candidate[:elem].andand.name} with score #{best_candidate[:content_score]}")
96
+ best_candidate
97
+ end
97
98
  end
98
99
 
99
100
  def get_link_density(elem)
@@ -281,7 +282,8 @@ module Readability
281
282
  el.attributes.each { |a, x| el.delete(a) unless @options[:attributes] && @options[:attributes].include?(a.to_s) }
282
283
  # Otherwise, replace the element with its contents
283
284
  else
284
- el.swap(el.text)
285
+ # keep getting whiny nils with nokogiri
286
+ el.swap(el.text) rescue nil
285
287
  end
286
288
 
287
289
  end
metadata CHANGED
@@ -1,7 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: busk-ruby-readability
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ hash: 21
5
+ prerelease: false
6
+ segments:
7
+ - 1
8
+ - 0
9
+ - 1
10
+ version: 1.0.1
5
11
  platform: ruby
6
12
  authors: []
7
13
 
@@ -9,7 +15,7 @@ autorequire:
9
15
  bindir: bin
10
16
  cert_chain: []
11
17
 
12
- date: 2010-09-16 00:00:00 -03:00
18
+ date: 2010-09-20 00:00:00 -03:00
13
19
  default_executable:
14
20
  dependencies: []
15
21
 
@@ -33,21 +39,27 @@ rdoc_options: []
33
39
  require_paths:
34
40
  - lib
35
41
  required_ruby_version: !ruby/object:Gem::Requirement
42
+ none: false
36
43
  requirements:
37
44
  - - ">="
38
45
  - !ruby/object:Gem::Version
46
+ hash: 3
47
+ segments:
48
+ - 0
39
49
  version: "0"
40
- version:
41
50
  required_rubygems_version: !ruby/object:Gem::Requirement
51
+ none: false
42
52
  requirements:
43
53
  - - ">="
44
54
  - !ruby/object:Gem::Version
55
+ hash: 3
56
+ segments:
57
+ - 0
45
58
  version: "0"
46
- version:
47
59
  requirements: []
48
60
 
49
61
  rubyforge_project:
50
- rubygems_version: 1.3.5
62
+ rubygems_version: 1.3.7
51
63
  signing_key:
52
64
  specification_version: 3
53
65
  summary: A rewrite of original ruby-readability