jruby-boilerpipe 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e5f4a7aa89527c4a4ff728af27d20fbb3a70a63b1dfa143edfc176c1aa22f701
4
- data.tar.gz: b70726c8ee26c021ea0bb65ef7f8835398b3017f1829d97697e39b449acb66e1
3
+ metadata.gz: db2755efc4de0b6e6e62af2576813629fb45e2c0aaecb1a74ea6f0ae86dd6f71
4
+ data.tar.gz: d188580dc9d398cba9d29105a67075670614c654e61d5dadc47fd02df5a50f73
5
5
  SHA512:
6
- metadata.gz: 56e28d6ec541be25cb7b2c3895388cf5884808ec3fc0efdf777a9aa39efcd98eca7515d535868c9c4d815247ccaf0e40a09c3bce071a59294d67235d698dc29f
7
- data.tar.gz: 3c0126c84617f75027f770bf48a424a1c3f308d289c2ce5e1e1eea1e06bebd7e7206e15053904bc2b7aed8eed47430ea5b9551da61795a06504aa201489b8547
6
+ metadata.gz: c3567f79f5e6a8b53ce6a9f9f4c41088c522c2dc2b7f9e0a629bc40809a26c7f0f50c92f496291da24366c1192118e1a95c1ca6e76996312de4b3a25e9325c2e
7
+ data.tar.gz: e211fb08628a7f26d5427c37631027fd515389004a845bd0394209b1910eb6a53ee19b8e6d6375157c7d61776ef720a5e776157b30f049fc2d6667f1cfd51f7b
@@ -2,5 +2,15 @@ module Boilerpipe
2
2
  module Document
3
3
  java_import 'com.kohlschutter.boilerpipe.document.TextDocument'
4
4
  java_import 'com.kohlschutter.boilerpipe.document.TextBlock'
5
+
6
+ class TextBlock
7
+ # Adding a mapping from ruby symbols to the format string used on the java side
8
+ # e.g. de.l3s.boilerpipe/INDICATES_END_OF_TEXT is not the same as INDICATES_END_OF_TEXT
9
+ # This is only for when we do TextBlock#has_label? from jruby
10
+ def has_label?(l)
11
+ l = "de.l3s.boilerpipe/#{l.to_s}" if l.is_a?(Symbol)
12
+ self.hasLabel(l)
13
+ end
14
+ end
5
15
  end
6
16
  end
@@ -4,7 +4,9 @@ module Boilerpipe
4
4
  java_import 'com.kohlschutter.boilerpipe.filters.english.TerminatingBlocksFinder'
5
5
  java_import 'com.kohlschutter.boilerpipe.filters.english.NumWordsRulesClassifier'
6
6
  java_import 'com.kohlschutter.boilerpipe.filters.english.HeuristicFilterBase'
7
+ java_import 'com.kohlschutter.boilerpipe.filters.heuristics.BlockProximityFusion'
7
8
  java_import 'com.kohlschutter.boilerpipe.filters.heuristics.DocumentTitleMatchClassifier'
9
+ java_import 'com.kohlschutter.boilerpipe.filters.heuristics.TrailingHeadlineToBoilerplateFilter'
8
10
 
9
11
  class IgnoreBlocksAfterContentFilter
10
12
  def self.process(doc)
@@ -18,7 +20,13 @@ module Boilerpipe
18
20
  end
19
21
  end
20
22
 
21
- class NumWordsRulesClassifier
23
+ class TrailingHeadlineToBoilerplateFilter
24
+ def self.process(doc)
25
+ new.process(doc)
26
+ end
27
+ end
28
+
29
+ class NumWordsRulesClassifier
22
30
  def self.process(doc)
23
31
  new.process(doc)
24
32
  end
@@ -0,0 +1,3 @@
1
+ module Boilerpipe::Labels
2
+ java_import 'com.kohlschutter.boilerpipe.labels.DefaultLabels'
3
+ end
@@ -1,3 +1,3 @@
1
1
  module Boilerpipe
2
- VERSION = '0.0.4'
2
+ VERSION = '0.0.5'
3
3
  end
data/lib/boilerpipe.rb CHANGED
@@ -4,3 +4,4 @@ require 'boilerpipe/sax/boilerpipe_html_parser'
4
4
  require 'boilerpipe/document/document'
5
5
  require 'boilerpipe/extractors/article_extractor'
6
6
  require 'boilerpipe/filters/filters'
7
+ require 'boilerpipe/labels/labels'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jruby-boilerpipe
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gregory Ostermayr
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-09-04 00:00:00.000000000 Z
11
+ date: 2017-09-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -72,6 +72,7 @@ files:
72
72
  - lib/boilerpipe/document/document.rb
73
73
  - lib/boilerpipe/extractors/article_extractor.rb
74
74
  - lib/boilerpipe/filters/filters.rb
75
+ - lib/boilerpipe/labels/labels.rb
75
76
  - lib/boilerpipe/sax/boilerpipe_html_parser.rb
76
77
  - lib/boilerpipe/version.rb
77
78
  homepage: https://github.com/gregors/jruby-boilerpipe