jruby-boilerpipe 0.0.4 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/boilerpipe/document/document.rb +10 -0
- data/lib/boilerpipe/filters/filters.rb +9 -1
- data/lib/boilerpipe/labels/labels.rb +3 -0
- data/lib/boilerpipe/version.rb +1 -1
- data/lib/boilerpipe.rb +1 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: db2755efc4de0b6e6e62af2576813629fb45e2c0aaecb1a74ea6f0ae86dd6f71
|
4
|
+
data.tar.gz: d188580dc9d398cba9d29105a67075670614c654e61d5dadc47fd02df5a50f73
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c3567f79f5e6a8b53ce6a9f9f4c41088c522c2dc2b7f9e0a629bc40809a26c7f0f50c92f496291da24366c1192118e1a95c1ca6e76996312de4b3a25e9325c2e
|
7
|
+
data.tar.gz: e211fb08628a7f26d5427c37631027fd515389004a845bd0394209b1910eb6a53ee19b8e6d6375157c7d61776ef720a5e776157b30f049fc2d6667f1cfd51f7b
|
@@ -2,5 +2,15 @@ module Boilerpipe
|
|
2
2
|
module Document
|
3
3
|
java_import 'com.kohlschutter.boilerpipe.document.TextDocument'
|
4
4
|
java_import 'com.kohlschutter.boilerpipe.document.TextBlock'
|
5
|
+
|
6
|
+
class TextBlock
|
7
|
+
# Adding a mapping from ruby symbols to the format string used on the java side
|
8
|
+
# e.g. de.l3s.boilerpipe/INDICATES_END_OF_TEXT is not the same as INDICATES_END_OF_TEXT
|
9
|
+
# This is only for when we do TextBlock#has_label? from jruby
|
10
|
+
def has_label?(l)
|
11
|
+
l = "de.l3s.boilerpipe/#{l.to_s}" if l.is_a?(Symbol)
|
12
|
+
self.hasLabel(l)
|
13
|
+
end
|
14
|
+
end
|
5
15
|
end
|
6
16
|
end
|
@@ -4,7 +4,9 @@ module Boilerpipe
|
|
4
4
|
java_import 'com.kohlschutter.boilerpipe.filters.english.TerminatingBlocksFinder'
|
5
5
|
java_import 'com.kohlschutter.boilerpipe.filters.english.NumWordsRulesClassifier'
|
6
6
|
java_import 'com.kohlschutter.boilerpipe.filters.english.HeuristicFilterBase'
|
7
|
+
java_import 'com.kohlschutter.boilerpipe.filters.heuristics.BlockProximityFusion'
|
7
8
|
java_import 'com.kohlschutter.boilerpipe.filters.heuristics.DocumentTitleMatchClassifier'
|
9
|
+
java_import 'com.kohlschutter.boilerpipe.filters.heuristics.TrailingHeadlineToBoilerplateFilter'
|
8
10
|
|
9
11
|
class IgnoreBlocksAfterContentFilter
|
10
12
|
def self.process(doc)
|
@@ -18,7 +20,13 @@ module Boilerpipe
|
|
18
20
|
end
|
19
21
|
end
|
20
22
|
|
21
|
-
class
|
23
|
+
class TrailingHeadlineToBoilerplateFilter
|
24
|
+
def self.process(doc)
|
25
|
+
new.process(doc)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
class NumWordsRulesClassifier
|
22
30
|
def self.process(doc)
|
23
31
|
new.process(doc)
|
24
32
|
end
|
data/lib/boilerpipe/version.rb
CHANGED
data/lib/boilerpipe.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jruby-boilerpipe
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gregory Ostermayr
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-09-
|
11
|
+
date: 2017-09-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -72,6 +72,7 @@ files:
|
|
72
72
|
- lib/boilerpipe/document/document.rb
|
73
73
|
- lib/boilerpipe/extractors/article_extractor.rb
|
74
74
|
- lib/boilerpipe/filters/filters.rb
|
75
|
+
- lib/boilerpipe/labels/labels.rb
|
75
76
|
- lib/boilerpipe/sax/boilerpipe_html_parser.rb
|
76
77
|
- lib/boilerpipe/version.rb
|
77
78
|
homepage: https://github.com/gregors/jruby-boilerpipe
|