jruby-boilerpipe 0.0.4 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/boilerpipe/document/document.rb +10 -0
- data/lib/boilerpipe/filters/filters.rb +9 -1
- data/lib/boilerpipe/labels/labels.rb +3 -0
- data/lib/boilerpipe/version.rb +1 -1
- data/lib/boilerpipe.rb +1 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: db2755efc4de0b6e6e62af2576813629fb45e2c0aaecb1a74ea6f0ae86dd6f71
|
4
|
+
data.tar.gz: d188580dc9d398cba9d29105a67075670614c654e61d5dadc47fd02df5a50f73
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c3567f79f5e6a8b53ce6a9f9f4c41088c522c2dc2b7f9e0a629bc40809a26c7f0f50c92f496291da24366c1192118e1a95c1ca6e76996312de4b3a25e9325c2e
|
7
|
+
data.tar.gz: e211fb08628a7f26d5427c37631027fd515389004a845bd0394209b1910eb6a53ee19b8e6d6375157c7d61776ef720a5e776157b30f049fc2d6667f1cfd51f7b
|
@@ -2,5 +2,15 @@ module Boilerpipe
|
|
2
2
|
module Document
|
3
3
|
java_import 'com.kohlschutter.boilerpipe.document.TextDocument'
|
4
4
|
java_import 'com.kohlschutter.boilerpipe.document.TextBlock'
|
5
|
+
|
6
|
+
class TextBlock
|
7
|
+
# Adding a mapping from ruby symbols to the format string used on the java side
|
8
|
+
# e.g. de.l3s.boilerpipe/INDICATES_END_OF_TEXT is not the same as INDICATES_END_OF_TEXT
|
9
|
+
# This is only for when we do TextBlock#has_label? from jruby
|
10
|
+
def has_label?(l)
|
11
|
+
l = "de.l3s.boilerpipe/#{l.to_s}" if l.is_a?(Symbol)
|
12
|
+
self.hasLabel(l)
|
13
|
+
end
|
14
|
+
end
|
5
15
|
end
|
6
16
|
end
|
@@ -4,7 +4,9 @@ module Boilerpipe
|
|
4
4
|
java_import 'com.kohlschutter.boilerpipe.filters.english.TerminatingBlocksFinder'
|
5
5
|
java_import 'com.kohlschutter.boilerpipe.filters.english.NumWordsRulesClassifier'
|
6
6
|
java_import 'com.kohlschutter.boilerpipe.filters.english.HeuristicFilterBase'
|
7
|
+
java_import 'com.kohlschutter.boilerpipe.filters.heuristics.BlockProximityFusion'
|
7
8
|
java_import 'com.kohlschutter.boilerpipe.filters.heuristics.DocumentTitleMatchClassifier'
|
9
|
+
java_import 'com.kohlschutter.boilerpipe.filters.heuristics.TrailingHeadlineToBoilerplateFilter'
|
8
10
|
|
9
11
|
class IgnoreBlocksAfterContentFilter
|
10
12
|
def self.process(doc)
|
@@ -18,7 +20,13 @@ module Boilerpipe
|
|
18
20
|
end
|
19
21
|
end
|
20
22
|
|
21
|
-
class
|
23
|
+
class TrailingHeadlineToBoilerplateFilter
|
24
|
+
def self.process(doc)
|
25
|
+
new.process(doc)
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
class NumWordsRulesClassifier
|
22
30
|
def self.process(doc)
|
23
31
|
new.process(doc)
|
24
32
|
end
|
data/lib/boilerpipe/version.rb
CHANGED
data/lib/boilerpipe.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: jruby-boilerpipe
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.5
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Gregory Ostermayr
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-09-
|
11
|
+
date: 2017-09-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
requirement: !ruby/object:Gem::Requirement
|
@@ -72,6 +72,7 @@ files:
|
|
72
72
|
- lib/boilerpipe/document/document.rb
|
73
73
|
- lib/boilerpipe/extractors/article_extractor.rb
|
74
74
|
- lib/boilerpipe/filters/filters.rb
|
75
|
+
- lib/boilerpipe/labels/labels.rb
|
75
76
|
- lib/boilerpipe/sax/boilerpipe_html_parser.rb
|
76
77
|
- lib/boilerpipe/version.rb
|
77
78
|
homepage: https://github.com/gregors/jruby-boilerpipe
|