boilerpipe-ruby 0.2.0 → 0.4.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (50) hide show
  1. checksums.yaml +5 -5
  2. data/.circleci/config.yml +6 -24
  3. data/.dockerignore +7 -0
  4. data/CHANGELOG.md +34 -1
  5. data/Dockerfile +14 -0
  6. data/README.md +32 -7
  7. data/Rakefile +3 -4
  8. data/bin/console +3 -3
  9. data/boilerpipe-ruby.gemspec +9 -9
  10. data/boilerpipe_flow.md +40 -0
  11. data/lib/boilerpipe.rb +14 -0
  12. data/lib/boilerpipe/document/text_block.rb +10 -12
  13. data/lib/boilerpipe/document/text_document.rb +4 -5
  14. data/lib/boilerpipe/extractors/article_sentence_extractor.rb +17 -0
  15. data/lib/boilerpipe/extractors/canola_extractor.rb +15 -0
  16. data/lib/boilerpipe/extractors/default_extractor.rb +0 -1
  17. data/lib/boilerpipe/extractors/keep_everything_extractor.rb +16 -0
  18. data/lib/boilerpipe/extractors/keep_everything_with_k_min_words_extractor.rb +20 -0
  19. data/lib/boilerpipe/extractors/largest_content_extractor.rb +18 -0
  20. data/lib/boilerpipe/extractors/num_words_rules_extractor.rb +14 -0
  21. data/lib/boilerpipe/filters/block_proximity_fusion.rb +7 -12
  22. data/lib/boilerpipe/filters/boilerplate_block_filter.rb +1 -4
  23. data/lib/boilerpipe/filters/canola_classifier.rb +27 -0
  24. data/lib/boilerpipe/filters/density_rules_classifier.rb +3 -2
  25. data/lib/boilerpipe/filters/document_title_match_classifier.rb +17 -19
  26. data/lib/boilerpipe/filters/expand_title_to_content_filter.rb +0 -3
  27. data/lib/boilerpipe/filters/heuristic_filter_base.rb +1 -1
  28. data/lib/boilerpipe/filters/ignore_blocks_after_content_filter.rb +5 -7
  29. data/lib/boilerpipe/filters/keep_largest_block_filter.rb +1 -4
  30. data/lib/boilerpipe/filters/large_block_same_tag_level_to_content_filter.rb +2 -4
  31. data/lib/boilerpipe/filters/list_at_end_filter.rb +1 -2
  32. data/lib/boilerpipe/filters/mark_everything_content_filter.rb +12 -0
  33. data/lib/boilerpipe/filters/min_clause_words_filter.rb +34 -0
  34. data/lib/boilerpipe/filters/min_words_filter.rb +14 -0
  35. data/lib/boilerpipe/filters/num_words_rules_classifier.rb +0 -4
  36. data/lib/boilerpipe/filters/simple_block_fusion_processor.rb +2 -2
  37. data/lib/boilerpipe/filters/split_paragraph_blocks_filter.rb +37 -0
  38. data/lib/boilerpipe/filters/terminating_blocks_finder.rb +6 -8
  39. data/lib/boilerpipe/filters/trailing_headline_to_boilerplate_filter.rb +0 -3
  40. data/lib/boilerpipe/labels/label_action.rb +1 -1
  41. data/lib/boilerpipe/sax/boilerpipe_html_parser.rb +3 -7
  42. data/lib/boilerpipe/sax/html_content_handler.rb +13 -16
  43. data/lib/boilerpipe/sax/tag_action_map.rb +0 -1
  44. data/lib/boilerpipe/sax/tag_actions/anchor_text.rb +2 -2
  45. data/lib/boilerpipe/sax/tag_actions/block_level.rb +2 -2
  46. data/lib/boilerpipe/sax/tag_actions/block_tag_label.rb +2 -2
  47. data/lib/boilerpipe/sax/tag_actions/body.rb +2 -2
  48. data/lib/boilerpipe/sax/tag_actions/font.rb +2 -2
  49. data/lib/boilerpipe/version.rb +1 -1
  50. metadata +38 -25
@@ -0,0 +1,14 @@
1
+ # Keeps only those content blocks which contain at least k words.
2
+
3
+ module Boilerpipe::Filters
4
+ class MinWordsFilter
5
+ def self.process(min_words, doc)
6
+ doc.text_blocks.each do |tb|
7
+ next if tb.is_not_content?
8
+
9
+ tb.content = false if tb.num_words < min_words
10
+ end
11
+ doc
12
+ end
13
+ end
14
+ end
@@ -1,5 +1,3 @@
1
- # encoding: utf-8
2
-
3
1
  # Classifies TextBlocks as content/not-content through rules that have been determined
4
2
  # using the C4.8 machine learning algorithm, as described in the paper
5
3
  # "Boilerplate Detection using Shallow Text Features" (WSDM 2010), particularly
@@ -7,7 +5,6 @@
7
5
 
8
6
  module Boilerpipe::Filters
9
7
  class NumWordsRulesClassifier
10
-
11
8
  def self.process(doc)
12
9
  empty = Boilerpipe::Document::TextBlock.empty_start
13
10
  text_blocks = [empty] + doc.text_blocks + [empty]
@@ -37,6 +34,5 @@ module Boilerpipe::Filters
37
34
 
38
35
  false
39
36
  end
40
-
41
37
  end
42
38
  end
@@ -1,4 +1,4 @@
1
- # Merges two subsequent blocks if their text densities are equal.
1
+ # Merges two subsequent blocks if their text densities are equal.
2
2
 
3
3
  module Boilerpipe::Filters
4
4
  class SimpleBlockFusionProcessor
@@ -17,7 +17,7 @@ module Boilerpipe::Filters
17
17
  end
18
18
  end
19
19
 
20
- doc.replace_text_blocks!( tbs - blocks_to_remove )
20
+ doc.replace_text_blocks!(tbs - blocks_to_remove)
21
21
  doc
22
22
  end
23
23
  end
@@ -0,0 +1,37 @@
1
+ # Splits TextBlocks at paragraph boundaries.
2
+ #
3
+ # NOTE: This is not fully supported (i.e., it will break highlighting support via
4
+ # #getContainedTextElements()), but this one probably is necessary for some other filters.
5
+ #
6
+ # see MinClauseWordsFilter
7
+
8
+ module Boilerpipe::Filters
9
+ class SplitParagraphBlocksFilter
10
+ def self.process(doc)
11
+ tbs = doc.text_blocks
12
+ new_blocks = []
13
+ changes = false
14
+ tbs.each do |tb|
15
+ paragraphs = tb.text.split(/[\n\r]+/)
16
+
17
+ if paragraphs.size < 2
18
+ new_blocks << tb
19
+ next
20
+ end
21
+
22
+ is_content = tb.is_content?
23
+ labels = tb.labels
24
+ paragraphs.each do |paragraph|
25
+ tbP = ::Boilerpipe::Document::TextBlock.new(paragraph)
26
+ tbP.content = is_content
27
+ tbP.add_labels(labels)
28
+ new_blocks << tbP
29
+ changes = true
30
+ end
31
+ end
32
+
33
+ doc.replace_text_blocks!(new_blocks) if changes
34
+ doc
35
+ end
36
+ end
37
+ end
@@ -1,15 +1,13 @@
1
- # encoding: utf-8
2
-
3
1
  # Finds blocks which are potentially indicating the end of an article
4
2
  # text and marks them with INDICATES_END_OF_TEXT. This can be used
5
3
  # in conjunction with a downstream IgnoreBlocksAfterContentFilter.
6
4
 
7
-
8
5
  module Boilerpipe::Filters
9
6
  class TerminatingBlocksFinder
10
7
  def self.process(doc)
11
8
  doc.text_blocks.each do |tb|
12
9
  next unless tb.num_words < 15
10
+
13
11
  if tb.text.length >= 8 && finds_match?(tb.text.downcase)
14
12
  tb.labels << :INDICATES_END_OF_TEXT
15
13
  elsif tb.link_density == 1.0 && tb.text == 'comment'
@@ -29,11 +27,11 @@ module Boilerpipe::Filters
29
27
  text.include?('what you think...') ||
30
28
  text.include?('add your comment') ||
31
29
  text.include?('add comment') ||
32
- #TODO add this and test
33
- #text.include?('leave a reply') ||
34
- #text.include?('leave a comment') ||
35
- #text.include?('show comments') ||
36
- #text.include?('Share this:') ||
30
+ # TODO add this and test
31
+ # text.include?('leave a reply') ||
32
+ # text.include?('leave a comment') ||
33
+ # text.include?('show comments') ||
34
+ # text.include?('Share this:') ||
37
35
  text.include?('reader views') ||
38
36
  text.include?('have your say') ||
39
37
  text.include?('reader comments') ||
@@ -1,4 +1,3 @@
1
-
2
1
  # Marks trailing headlines TextBlocks that have the label :#HEADING
3
2
  # as boilerplate. Trailing means they are marked content and are
4
3
  # below any other content block.
@@ -6,7 +5,6 @@
6
5
  module Boilerpipe::Filters
7
6
  class TrailingHeadlineToBoilerplateFilter
8
7
  def self.process(doc)
9
-
10
8
  doc.text_blocks.each do |tb|
11
9
  next unless tb.is_content?
12
10
 
@@ -19,6 +17,5 @@ module Boilerpipe::Filters
19
17
 
20
18
  doc
21
19
  end
22
-
23
20
  end
24
21
  end
@@ -2,7 +2,7 @@ module Boilerpipe::Labels
2
2
  class LabelAction
3
3
  attr_reader :labels
4
4
 
5
- def initialize(labels=[])
5
+ def initialize(labels = [])
6
6
  @labels = labels
7
7
  end
8
8
 
@@ -1,20 +1,16 @@
1
- require 'nokogiri'
2
1
  module Boilerpipe::SAX
3
2
  class BoilerpipeHTMLParser
4
3
  def self.parse(text)
5
-
6
- #script bug - delete script tags
7
- text = text.gsub(/\<script>.+?<\/script>/i, '')
4
+ # script bug - delete script tags
5
+ text.gsub!(/\<script>.+?<\/script>/i, '')
8
6
 
9
7
  # nokogiri uses libxml for mri and nekohtml for jruby
10
8
  # mri doesn't remove &nbsp; when missing the semicolon
11
- text = text.gsub(/(&nbsp) /, '\1; ')
12
-
9
+ text.gsub!(/(&nbsp) /, '\1; ')
13
10
 
14
11
  # use nokogiri to fix any bad tags, errors - keep experimenting with this
15
12
  text = Nokogiri::HTML(text).to_html
16
13
 
17
-
18
14
  handler = HTMLContentHandler.new
19
15
  noko_parser = Nokogiri::HTML::SAX::Parser.new(handler)
20
16
  noko_parser.parse(text)
@@ -1,11 +1,8 @@
1
- require 'nokogiri'
2
- require 'set'
3
-
4
1
  module Boilerpipe::SAX
5
2
  class HTMLContentHandler < Nokogiri::XML::SAX::Document
6
3
  attr_reader :in_ignorable_element, :label_stacks, :last_start_tag
7
4
 
8
- attr_accessor :in_anchor_tag, :token_buffer ,:font_size_stack
5
+ attr_accessor :in_anchor_tag, :token_buffer, :font_size_stack
9
6
  ANCHOR_TEXT_START = "$\ue00a<"
10
7
  ANCHOR_TEXT_END = ">\ue00a$"
11
8
 
@@ -34,7 +31,6 @@ module Boilerpipe::SAX
34
31
  @label_stacks << nil
35
32
  tag = name.upcase.intern
36
33
 
37
-
38
34
  tag_action = @tag_actions[tag]
39
35
  if tag_action
40
36
  @tag_level += 1 if tag_action.changes_tag_level?
@@ -51,15 +47,15 @@ module Boilerpipe::SAX
51
47
  def characters(text)
52
48
  flush_block if @flush
53
49
 
54
- return if @in_ignorable_element != 0
50
+ return if in_ignorable_element?
55
51
  return if text.empty?
56
52
 
57
53
  # replace all whitespace with simple space
58
54
  text.gsub!(/\s+/, ' ')
59
55
 
60
56
  # trim whitespace
61
- started_with_whitespace = text =~ /^\s/
62
- ended_with_whitespace = text =~ /\s$/
57
+ started_with_whitespace = text =~ /^\s/
58
+ ended_with_whitespace = text =~ /\s$/
63
59
  text.strip!
64
60
 
65
61
  # add a single space if the block was only whitespace
@@ -158,10 +154,10 @@ module Boilerpipe::SAX
158
154
  end
159
155
 
160
156
  text_block = ::Boilerpipe::Document::TextBlock.new(@text_buffer.strip,
161
- num_words,
162
- num_linked_words,
163
- num_words_in_wrapped_lines,
164
- num_wrapped_lines, @offset_blocks)
157
+ num_words,
158
+ num_linked_words,
159
+ num_words_in_wrapped_lines,
160
+ num_wrapped_lines, @offset_blocks)
165
161
 
166
162
  @offset_blocks += 1
167
163
  clear_buffers
@@ -187,10 +183,10 @@ module Boilerpipe::SAX
187
183
  # \p{No} -- a numeric character of other type
188
184
 
189
185
  def is_word?(word)
190
- word =~ VALID_WORD_CHARACTER
186
+ word =~ VALID_WORD_CHARACTER
191
187
  end
192
188
 
193
- #public void flushBlock() {
189
+ # public void flushBlock() {
194
190
  # int numWords = 0;
195
191
  # int numLinkedWords = 0;
196
192
  # int numWrappedLines = 0;
@@ -198,12 +194,13 @@ module Boilerpipe::SAX
198
194
  # final int maxLineLength = 80;
199
195
  # int numTokens = 0;
200
196
  # int numWordsCurrentLine = 0;
201
- #}
197
+ # }
202
198
 
203
199
  def increase_in_ignorable_element!
204
200
  @in_ignorable_element += 1
205
201
  end
206
202
 
203
+ # should we prevent less than zero here?
207
204
  def decrease_in_ignorable_element!
208
205
  @in_ignorable_element -= 1
209
206
  end
@@ -224,7 +221,6 @@ module Boilerpipe::SAX
224
221
  @in_anchor_tag > 0
225
222
  end
226
223
 
227
-
228
224
  def add_text_block(text_block)
229
225
  @label_stacks.each do |stack|
230
226
  next unless stack
@@ -239,6 +235,7 @@ module Boilerpipe::SAX
239
235
  # append space if last character wasn't already one
240
236
  def append_space
241
237
  return if @sb_last_was_whitespace
238
+
242
239
  @sb_last_was_whitespace = true
243
240
 
244
241
  @text_buffer << ' '
@@ -48,4 +48,3 @@ module Boilerpipe::SAX
48
48
  end
49
49
  end
50
50
  end
51
-
@@ -2,7 +2,7 @@ module Boilerpipe::SAX::TagActions
2
2
  class AnchorText
3
3
  # Marks this tag as "anchor" (this should usually only be set for the <A> tag). Anchor tags may not be nested.
4
4
  # There is a bug in certain versions of NekoHTML which still allows nested tags. If boilerpipe
5
- #* encounters such nestings, a SAXException is thrown.
5
+ # * encounters such nestings, a SAXException is thrown.
6
6
  def start(handler, name, attrs)
7
7
  if handler.in_anchor_tag?
8
8
  handler.in_anchor_tag += 1
@@ -42,7 +42,7 @@ module Boilerpipe::SAX::TagActions
42
42
  # - dunno about nokogiri???????
43
43
  # as nested A elements are not allowed per specification, we
44
44
  # are probably reaching this branch due to a bug in the XML parser
45
- #puts "Warning: SAX input contains nested A elements -- You have probably hit a bug in your HTML parser (e.g., NekoHTML bug #2909310). Please clean the HTML externally and feed it to boilerpipe again. Trying to recover somehow..."
45
+ # puts "Warning: SAX input contains nested A elements -- You have probably hit a bug in your HTML parser (e.g., NekoHTML bug #2909310). Please clean the HTML externally and feed it to boilerpipe again. Trying to recover somehow..."
46
46
  end_tag(handler, name)
47
47
  end
48
48
  end
@@ -1,6 +1,6 @@
1
1
  module Boilerpipe::SAX::TagActions
2
- # Explicitly marks this tag a simple "block-level" element,
3
- # which always generates whitespace
2
+ # Explicitly marks this tag a simple "block-level" element,
3
+ # which always generates whitespace
4
4
  class BlockLevel
5
5
  def start(handler, name, attrs)
6
6
  true
@@ -1,6 +1,6 @@
1
1
  module Boilerpipe::SAX::TagActions
2
- # for block-level elements, which triggers some LabelAction on
3
- # the generated TextBlock.
2
+ # for block-level elements, which triggers some LabelAction on
3
+ # the generated TextBlock.
4
4
  class BlockTagLabel
5
5
  def initialize(label_action)
6
6
  @label_action = label_action
@@ -1,6 +1,6 @@
1
1
  module Boilerpipe::SAX::TagActions
2
- # Marks this tag the body element (this should usually only
3
- # be set for the <BODY> tag).
2
+ # Marks this tag the body element (this should usually only
3
+ # be set for the <BODY> tag).
4
4
  class Body
5
5
  def start(handler, name, attrs)
6
6
  handler.flush_block
@@ -10,7 +10,7 @@ module Boilerpipe::SAX::TagActions
10
10
  rel = m[1]
11
11
  val = m[2].to_i # absolute
12
12
  size = rel.empty? ? val : relative(handler.font_size_stack, rel, val)
13
- handler.font_size_stack << size
13
+ handler.font_size_stack << size
14
14
  else
15
15
  handler.font_size_stack << nil
16
16
  end
@@ -27,7 +27,7 @@ module Boilerpipe::SAX::TagActions
27
27
  end
28
28
 
29
29
  def relative(font_size_stack, rel, val)
30
- prev_size = font_size_stack.reverse_each.find{|s| s != nil}
30
+ prev_size = font_size_stack.reverse_each.find { |s| !s.nil? }
31
31
  prev_size = 3 if prev_size.nil?
32
32
 
33
33
  size = if rel == '+'
@@ -1,3 +1,3 @@
1
1
  module Boilerpipe
2
- VERSION = '0.2.0'
2
+ VERSION = '0.4.3'
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: boilerpipe-ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.4.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gregory Ostermayr
8
- autorequire:
8
+ autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-09-11 00:00:00.000000000 Z
11
+ date: 2020-07-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -16,71 +16,71 @@ dependencies:
16
16
  requirements:
17
17
  - - "~>"
18
18
  - !ruby/object:Gem::Version
19
- version: '1.11'
19
+ version: '2.0'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
24
  - - "~>"
25
25
  - !ruby/object:Gem::Version
26
- version: '1.11'
26
+ version: '2.0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rake
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - "~>"
31
+ - - ">="
32
32
  - !ruby/object:Gem::Version
33
- version: '10.0'
33
+ version: 12.3.3
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - "~>"
38
+ - - ">="
39
39
  - !ruby/object:Gem::Version
40
- version: '10.0'
40
+ version: 12.3.3
41
41
  - !ruby/object:Gem::Dependency
42
- name: rspec
42
+ name: rickshaw
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
45
  - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '3.0'
47
+ version: 0.5.0
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
52
  - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '3.0'
54
+ version: 0.5.0
55
55
  - !ruby/object:Gem::Dependency
56
- name: rickshaw
56
+ name: rspec
57
57
  requirement: !ruby/object:Gem::Requirement
58
58
  requirements:
59
59
  - - "~>"
60
60
  - !ruby/object:Gem::Version
61
- version: 0.4.0
61
+ version: '3.9'
62
62
  type: :development
63
63
  prerelease: false
64
64
  version_requirements: !ruby/object:Gem::Requirement
65
65
  requirements:
66
66
  - - "~>"
67
67
  - !ruby/object:Gem::Version
68
- version: 0.4.0
68
+ version: '3.9'
69
69
  - !ruby/object:Gem::Dependency
70
70
  name: nokogiri
71
71
  requirement: !ruby/object:Gem::Requirement
72
72
  requirements:
73
- - - ">="
73
+ - - "~>"
74
74
  - !ruby/object:Gem::Version
75
- version: 1.6.6.2
75
+ version: '1.10'
76
76
  type: :runtime
77
77
  prerelease: false
78
78
  version_requirements: !ruby/object:Gem::Requirement
79
79
  requirements:
80
- - - ">="
80
+ - - "~>"
81
81
  - !ruby/object:Gem::Version
82
- version: 1.6.6.2
83
- description: A pure ruby implementation of the boilerpipe algorithm
82
+ version: '1.10'
83
+ description: A pure ruby implementation of the boilerpipe web content extraction algorithm
84
84
  email:
85
85
  - "<gregory.ostermayr@gmail.com>"
86
86
  executables: []
@@ -88,9 +88,11 @@ extensions: []
88
88
  extra_rdoc_files: []
89
89
  files:
90
90
  - ".circleci/config.yml"
91
+ - ".dockerignore"
91
92
  - ".gitignore"
92
93
  - ".rspec"
93
94
  - CHANGELOG.md
95
+ - Dockerfile
94
96
  - Gemfile
95
97
  - LICENSE.txt
96
98
  - README.md
@@ -98,14 +100,22 @@ files:
98
100
  - bin/console
99
101
  - bin/setup
100
102
  - boilerpipe-ruby.gemspec
103
+ - boilerpipe_flow.md
101
104
  - lib/boilerpipe.rb
102
105
  - lib/boilerpipe/document/text_block.rb
103
106
  - lib/boilerpipe/document/text_document.rb
104
107
  - lib/boilerpipe/errors.rb
105
108
  - lib/boilerpipe/extractors/article_extractor.rb
109
+ - lib/boilerpipe/extractors/article_sentence_extractor.rb
110
+ - lib/boilerpipe/extractors/canola_extractor.rb
106
111
  - lib/boilerpipe/extractors/default_extractor.rb
112
+ - lib/boilerpipe/extractors/keep_everything_extractor.rb
113
+ - lib/boilerpipe/extractors/keep_everything_with_k_min_words_extractor.rb
114
+ - lib/boilerpipe/extractors/largest_content_extractor.rb
115
+ - lib/boilerpipe/extractors/num_words_rules_extractor.rb
107
116
  - lib/boilerpipe/filters/block_proximity_fusion.rb
108
117
  - lib/boilerpipe/filters/boilerplate_block_filter.rb
118
+ - lib/boilerpipe/filters/canola_classifier.rb
109
119
  - lib/boilerpipe/filters/density_rules_classifier.rb
110
120
  - lib/boilerpipe/filters/document_title_match_classifier.rb
111
121
  - lib/boilerpipe/filters/expand_title_to_content_filter.rb
@@ -114,8 +124,12 @@ files:
114
124
  - lib/boilerpipe/filters/keep_largest_block_filter.rb
115
125
  - lib/boilerpipe/filters/large_block_same_tag_level_to_content_filter.rb
116
126
  - lib/boilerpipe/filters/list_at_end_filter.rb
127
+ - lib/boilerpipe/filters/mark_everything_content_filter.rb
128
+ - lib/boilerpipe/filters/min_clause_words_filter.rb
129
+ - lib/boilerpipe/filters/min_words_filter.rb
117
130
  - lib/boilerpipe/filters/num_words_rules_classifier.rb
118
131
  - lib/boilerpipe/filters/simple_block_fusion_processor.rb
132
+ - lib/boilerpipe/filters/split_paragraph_blocks_filter.rb
119
133
  - lib/boilerpipe/filters/terminating_blocks_finder.rb
120
134
  - lib/boilerpipe/filters/trailing_headline_to_boilerplate_filter.rb
121
135
  - lib/boilerpipe/labels/default.rb
@@ -140,7 +154,7 @@ homepage: https://github.com/gregors/boilerpipe-ruby
140
154
  licenses:
141
155
  - Apache 2.0
142
156
  metadata: {}
143
- post_install_message:
157
+ post_install_message:
144
158
  rdoc_options: []
145
159
  require_paths:
146
160
  - lib
@@ -155,9 +169,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
155
169
  - !ruby/object:Gem::Version
156
170
  version: '0'
157
171
  requirements: []
158
- rubyforge_project:
159
- rubygems_version: 2.6.12
160
- signing_key:
172
+ rubygems_version: 3.0.8
173
+ signing_key:
161
174
  specification_version: 4
162
- summary: A pure ruby implemenation of the boilerpipe algorithm
175
+ summary: A pure ruby implementation of the boilerpipe web content extraction algorithm
163
176
  test_files: []