jruby-boilerpipe 0.0.4 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e5f4a7aa89527c4a4ff728af27d20fbb3a70a63b1dfa143edfc176c1aa22f701
4
- data.tar.gz: b70726c8ee26c021ea0bb65ef7f8835398b3017f1829d97697e39b449acb66e1
3
+ metadata.gz: db2755efc4de0b6e6e62af2576813629fb45e2c0aaecb1a74ea6f0ae86dd6f71
4
+ data.tar.gz: d188580dc9d398cba9d29105a67075670614c654e61d5dadc47fd02df5a50f73
5
5
  SHA512:
6
- metadata.gz: 56e28d6ec541be25cb7b2c3895388cf5884808ec3fc0efdf777a9aa39efcd98eca7515d535868c9c4d815247ccaf0e40a09c3bce071a59294d67235d698dc29f
7
- data.tar.gz: 3c0126c84617f75027f770bf48a424a1c3f308d289c2ce5e1e1eea1e06bebd7e7206e15053904bc2b7aed8eed47430ea5b9551da61795a06504aa201489b8547
6
+ metadata.gz: c3567f79f5e6a8b53ce6a9f9f4c41088c522c2dc2b7f9e0a629bc40809a26c7f0f50c92f496291da24366c1192118e1a95c1ca6e76996312de4b3a25e9325c2e
7
+ data.tar.gz: e211fb08628a7f26d5427c37631027fd515389004a845bd0394209b1910eb6a53ee19b8e6d6375157c7d61776ef720a5e776157b30f049fc2d6667f1cfd51f7b
@@ -2,5 +2,15 @@ module Boilerpipe
2
2
  module Document
3
3
  java_import 'com.kohlschutter.boilerpipe.document.TextDocument'
4
4
  java_import 'com.kohlschutter.boilerpipe.document.TextBlock'
5
+
6
+ class TextBlock
7
+ # Adding a mapping from ruby symbols to the format string used on the java side
8
+ # e.g. de.l3s.boilerpipe/INDICATES_END_OF_TEXT is not the same as INDICATES_END_OF_TEXT
9
+ # This is only for when we do TextBlock#has_label? from jruby
10
+ def has_label?(l)
11
+ l = "de.l3s.boilerpipe/#{l.to_s}" if l.is_a?(Symbol)
12
+ self.hasLabel(l)
13
+ end
14
+ end
5
15
  end
6
16
  end
@@ -4,7 +4,9 @@ module Boilerpipe
4
4
  java_import 'com.kohlschutter.boilerpipe.filters.english.TerminatingBlocksFinder'
5
5
  java_import 'com.kohlschutter.boilerpipe.filters.english.NumWordsRulesClassifier'
6
6
  java_import 'com.kohlschutter.boilerpipe.filters.english.HeuristicFilterBase'
7
+ java_import 'com.kohlschutter.boilerpipe.filters.heuristics.BlockProximityFusion'
7
8
  java_import 'com.kohlschutter.boilerpipe.filters.heuristics.DocumentTitleMatchClassifier'
9
+ java_import 'com.kohlschutter.boilerpipe.filters.heuristics.TrailingHeadlineToBoilerplateFilter'
8
10
 
9
11
  class IgnoreBlocksAfterContentFilter
10
12
  def self.process(doc)
@@ -18,7 +20,13 @@ module Boilerpipe
18
20
  end
19
21
  end
20
22
 
21
- class NumWordsRulesClassifier
23
+ class TrailingHeadlineToBoilerplateFilter
24
+ def self.process(doc)
25
+ new.process(doc)
26
+ end
27
+ end
28
+
29
+ class NumWordsRulesClassifier
22
30
  def self.process(doc)
23
31
  new.process(doc)
24
32
  end
@@ -0,0 +1,3 @@
1
+ module Boilerpipe::Labels
2
+ java_import 'com.kohlschutter.boilerpipe.labels.DefaultLabels'
3
+ end
@@ -1,3 +1,3 @@
1
1
  module Boilerpipe
2
- VERSION = '0.0.4'
2
+ VERSION = '0.0.5'
3
3
  end
data/lib/boilerpipe.rb CHANGED
@@ -4,3 +4,4 @@ require 'boilerpipe/sax/boilerpipe_html_parser'
4
4
  require 'boilerpipe/document/document'
5
5
  require 'boilerpipe/extractors/article_extractor'
6
6
  require 'boilerpipe/filters/filters'
7
+ require 'boilerpipe/labels/labels'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jruby-boilerpipe
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - Gregory Ostermayr
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-09-04 00:00:00.000000000 Z
11
+ date: 2017-09-05 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  requirement: !ruby/object:Gem::Requirement
@@ -72,6 +72,7 @@ files:
72
72
  - lib/boilerpipe/document/document.rb
73
73
  - lib/boilerpipe/extractors/article_extractor.rb
74
74
  - lib/boilerpipe/filters/filters.rb
75
+ - lib/boilerpipe/labels/labels.rb
75
76
  - lib/boilerpipe/sax/boilerpipe_html_parser.rb
76
77
  - lib/boilerpipe/version.rb
77
78
  homepage: https://github.com/gregors/jruby-boilerpipe