open_nlp 0.2.0-java → 0.3.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/.gitignore +1 -0
- data/.rspec +1 -0
- data/.rubocop.yml +24 -0
- data/.ruby-version +1 -1
- data/.travis.yml +1 -3
- data/Gemfile +2 -1
- data/Gemfile.lock +32 -13
- data/Rakefile +1 -1
- data/lib/open_nlp/categorizer.rb +2 -2
- data/lib/open_nlp/chunker.rb +21 -20
- data/lib/open_nlp/java_class.rb +2 -8
- data/lib/open_nlp/model.rb +4 -1
- data/lib/open_nlp/model/categorizer.rb +1 -1
- data/lib/open_nlp/model/chunker.rb +1 -1
- data/lib/open_nlp/model/detokenizer.rb +1 -1
- data/lib/open_nlp/model/parser.rb +1 -1
- data/lib/open_nlp/model/pos_tagger.rb +1 -1
- data/lib/open_nlp/model/sentence_detector.rb +1 -1
- data/lib/open_nlp/model/tokenizer.rb +1 -1
- data/lib/open_nlp/named_entity_detector.rb +2 -1
- data/lib/open_nlp/parser.rb +13 -12
- data/lib/open_nlp/parser/parse.rb +28 -7
- data/lib/open_nlp/pos_tagger.rb +6 -3
- data/lib/open_nlp/sentence_detector.rb +5 -3
- data/lib/open_nlp/tokenizer.rb +3 -2
- data/lib/open_nlp/tool.rb +6 -2
- data/lib/open_nlp/util/span.rb +25 -9
- data/lib/open_nlp/version.rb +1 -1
- data/open_nlp.gemspec +10 -11
- data/spec/model/tokenizer_spec.rb +1 -1
- data/spec/named_entity_detector_spec.rb +1 -1
- data/spec/parser/parse_spec.rb +40 -40
- data/spec/parser_spec.rb +2 -2
- data/spec/pos_tagger_spec.rb +2 -2
- data/spec/spec_helper.rb +25 -0
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 611fa39e5357a43ac259dc113b299aede41e5379a1912dcd0bb32120da05356d
|
4
|
+
data.tar.gz: 0ef94231098429a66a11f8d3ab5dc5708156fcc425caa5ab4e29278f5233ab1f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6e5fc1bbbd6059d818bbe3abc9408f497b845dd6e4a314b19a9d47a8e85a3435cdbe8d6c9fd2610f0a643a26974d4b20a0fc995c6660549149f9acaa688d3f52
|
7
|
+
data.tar.gz: c416e14a29512f1935a00289625ea162fac34f8dad4d320a8c8cf940253a6f9ac5c845e07bda12deca699ec5da809bbc733d9e392795e2895faa78e0d826a76c
|
data/.gitignore
CHANGED
data/.rspec
CHANGED
data/.rubocop.yml
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
Metrics/AbcSize:
|
2
|
+
Max: 41
|
3
|
+
|
4
|
+
Metrics/BlockLength:
|
5
|
+
Exclude:
|
6
|
+
- spec/**/*.rb
|
7
|
+
|
8
|
+
Metrics/LineLength:
|
9
|
+
Max: 153
|
10
|
+
|
11
|
+
Metrics/MethodLength:
|
12
|
+
Max: 15
|
13
|
+
|
14
|
+
Metrics/ParameterLists:
|
15
|
+
Max: 6
|
16
|
+
|
17
|
+
Style/ClassAndModuleChildren:
|
18
|
+
Enabled: false
|
19
|
+
|
20
|
+
Style/ColonMethodCall:
|
21
|
+
Enabled: false
|
22
|
+
|
23
|
+
Style/Documentation:
|
24
|
+
Enabled: false
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
jruby-9.
|
1
|
+
jruby-9.2.4.0
|
data/.travis.yml
CHANGED
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,24 +1,42 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
+
open_nlp (0.3.0-java)
|
4
5
|
|
5
6
|
GEM
|
6
7
|
remote: https://rubygems.org/
|
7
8
|
specs:
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
9
|
+
ast (2.4.0)
|
10
|
+
diff-lcs (1.3)
|
11
|
+
jaro_winkler (1.5.1-java)
|
12
|
+
parallel (1.12.1)
|
13
|
+
parser (2.5.3.0)
|
14
|
+
ast (~> 2.4.0)
|
15
|
+
powerpack (0.1.2)
|
16
|
+
rainbow (3.0.0)
|
17
|
+
rspec (3.8.0)
|
18
|
+
rspec-core (~> 3.8.0)
|
19
|
+
rspec-expectations (~> 3.8.0)
|
20
|
+
rspec-mocks (~> 3.8.0)
|
21
|
+
rspec-core (3.8.0)
|
22
|
+
rspec-support (~> 3.8.0)
|
23
|
+
rspec-expectations (3.8.2)
|
16
24
|
diff-lcs (>= 1.2.0, < 2.0)
|
17
|
-
rspec-support (~> 3.
|
18
|
-
rspec-mocks (3.
|
25
|
+
rspec-support (~> 3.8.0)
|
26
|
+
rspec-mocks (3.8.0)
|
19
27
|
diff-lcs (>= 1.2.0, < 2.0)
|
20
|
-
rspec-support (~> 3.
|
21
|
-
rspec-support (3.
|
28
|
+
rspec-support (~> 3.8.0)
|
29
|
+
rspec-support (3.8.0)
|
30
|
+
rubocop (0.60.0)
|
31
|
+
jaro_winkler (~> 1.5.1)
|
32
|
+
parallel (~> 1.10)
|
33
|
+
parser (>= 2.5, != 2.5.1.1)
|
34
|
+
powerpack (~> 0.1)
|
35
|
+
rainbow (>= 2.2.2, < 4.0)
|
36
|
+
ruby-progressbar (~> 1.7)
|
37
|
+
unicode-display_width (~> 1.4.0)
|
38
|
+
ruby-progressbar (1.10.0)
|
39
|
+
unicode-display_width (1.4.0)
|
22
40
|
|
23
41
|
PLATFORMS
|
24
42
|
java
|
@@ -26,6 +44,7 @@ PLATFORMS
|
|
26
44
|
DEPENDENCIES
|
27
45
|
open_nlp!
|
28
46
|
rspec
|
47
|
+
rubocop
|
29
48
|
|
30
49
|
BUNDLED WITH
|
31
|
-
1.
|
50
|
+
1.17.1
|
data/Rakefile
CHANGED
@@ -1 +1 @@
|
|
1
|
-
require
|
1
|
+
require 'bundler/gem_tasks'
|
data/lib/open_nlp/categorizer.rb
CHANGED
@@ -7,10 +7,10 @@ module OpenNlp
|
|
7
7
|
# @param [String] str string to be categorized
|
8
8
|
# @return [String] category
|
9
9
|
def categorize(str)
|
10
|
-
|
10
|
+
raise ArgumentError, 'str param must be a String' unless str.is_a?(String)
|
11
11
|
|
12
12
|
outcomes = j_instance.categorize(str)
|
13
13
|
j_instance.getBestCategory(outcomes)
|
14
14
|
end
|
15
15
|
end
|
16
|
-
end
|
16
|
+
end
|
data/lib/open_nlp/chunker.rb
CHANGED
@@ -2,16 +2,19 @@ module OpenNlp
|
|
2
2
|
class Chunker < Tool
|
3
3
|
self.java_class = Java::opennlp.tools.chunker.ChunkerME
|
4
4
|
|
5
|
+
# Initializes new instance of Chunker
|
6
|
+
#
|
7
|
+
# @param [OpenNlp::Model] model chunker model
|
8
|
+
# @param [Model::Tokenizer] token_model tokenizer model
|
9
|
+
# @param [Model::POSTagger] pos_model part-of-speech tagging model
|
5
10
|
def initialize(model, token_model, pos_model)
|
6
11
|
super(model)
|
7
12
|
|
8
|
-
|
9
|
-
|
10
|
-
end
|
13
|
+
token_model.is_a?(Model::Tokenizer) ||
|
14
|
+
raise(ArgumentError, 'token model must be an OpenNlp::Tokenizer::Model')
|
11
15
|
|
12
|
-
|
13
|
-
|
14
|
-
end
|
16
|
+
pos_model.is_a?(Model::POSTagger) ||
|
17
|
+
raise(ArgumentError, 'pos model must be an OpenNlp::POSTagger::Model')
|
15
18
|
|
16
19
|
@tokenizer = Tokenizer.new(token_model)
|
17
20
|
@pos_tagger = POSTagger.new(pos_model)
|
@@ -22,7 +25,7 @@ module OpenNlp
|
|
22
25
|
# @param [String] str string to chunk
|
23
26
|
# @return [Array] array of chunks with part-of-sentence information
|
24
27
|
def chunk(str)
|
25
|
-
|
28
|
+
raise ArgumentError, 'str must be a String' unless str.is_a?(String)
|
26
29
|
|
27
30
|
tokens = tokenizer.tokenize(str)
|
28
31
|
pos_tags = pos_tagger.tag(tokens).to_ary
|
@@ -39,24 +42,22 @@ module OpenNlp
|
|
39
42
|
def build_chunks(chunks, tokens, pos_tags)
|
40
43
|
data = tokens.zip(pos_tags, chunks)
|
41
44
|
|
42
|
-
data.
|
45
|
+
data.each_with_object([]) do |val, acc|
|
43
46
|
chunk = val[2]
|
44
|
-
acc << [{val[0] => val[1]}] if chunk[0] == 'B' # add token to chunk if it is a start of chunk
|
45
|
-
|
46
|
-
if chunk[0]
|
47
|
-
if acc.last
|
48
|
-
acc.last << {val[0] => val[1]} # add token to chunk if it is a continuation of chunk
|
49
|
-
else
|
50
|
-
acc << [{val[0] => val[1]}] # add token to new chunk if no chunks exists
|
51
|
-
end
|
52
|
-
end
|
47
|
+
acc << [{ val[0] => val[1] }] if chunk[0] == 'B' # add token to chunk if it is a start of chunk
|
48
|
+
|
49
|
+
next if chunk[0] != 'I'
|
53
50
|
|
54
|
-
acc
|
51
|
+
if acc.last
|
52
|
+
acc.last << { val[0] => val[1] } # add token to chunk if it is a continuation of chunk
|
53
|
+
else
|
54
|
+
acc << [{ val[0] => val[1] }] # add token to new chunk if no chunks exists
|
55
|
+
end
|
55
56
|
end
|
56
57
|
end
|
57
58
|
|
58
|
-
def
|
59
|
+
def last_probabilities
|
59
60
|
j_instance.probs.to_ary
|
60
61
|
end
|
61
62
|
end
|
62
|
-
end
|
63
|
+
end
|
data/lib/open_nlp/java_class.rb
CHANGED
data/lib/open_nlp/model.rb
CHANGED
@@ -4,6 +4,9 @@ module OpenNlp
|
|
4
4
|
|
5
5
|
attr_reader :j_model
|
6
6
|
|
7
|
+
# Initializes new instance of Model
|
8
|
+
#
|
9
|
+
# @param [String, java.io.FileInputStream] model
|
7
10
|
def initialize(model)
|
8
11
|
@j_model = self.class.java_class.new(model_stream(model))
|
9
12
|
end
|
@@ -17,7 +20,7 @@ module OpenNlp
|
|
17
20
|
when String
|
18
21
|
java.io.FileInputStream.new(model)
|
19
22
|
else
|
20
|
-
|
23
|
+
raise ArgumentError, 'Model must be either a string or a java.io.FileInputStream'
|
21
24
|
end
|
22
25
|
end
|
23
26
|
end
|
@@ -7,7 +7,8 @@ module OpenNlp
|
|
7
7
|
# @param [Array<String>] tokens tokens to run name detection on
|
8
8
|
# @return [Array<Java::opennlp.tools.util.Span>] names detected
|
9
9
|
def detect(tokens)
|
10
|
-
|
10
|
+
raise ArgumentError, 'tokens must be an instance of Array' unless tokens.is_a?(Array)
|
11
|
+
|
11
12
|
j_instance.find(tokens.to_java(:String)).to_ary
|
12
13
|
end
|
13
14
|
end
|
data/lib/open_nlp/parser.rb
CHANGED
@@ -1,13 +1,15 @@
|
|
1
1
|
module OpenNlp
|
2
2
|
class Parser < Tool
|
3
|
+
# Initializes new instance of Parser
|
4
|
+
#
|
5
|
+
# @param [OpenNlp::Model::Parser] parser_model
|
6
|
+
# @param [OpenNlp::Model::Tokenizer] token_model
|
3
7
|
def initialize(parser_model, token_model)
|
4
|
-
|
5
|
-
|
6
|
-
end
|
8
|
+
parser_model.is_a?(OpenNlp::Model::Parser) ||
|
9
|
+
raise(ArgumentError, 'parser_model must be an OpenNlp::Model')
|
7
10
|
|
8
|
-
|
9
|
-
|
10
|
-
end
|
11
|
+
token_model.is_a?(Model::Tokenizer) ||
|
12
|
+
raise(ArgumentError, 'token_model must be an OpenNlp::Tokenizer::Model')
|
11
13
|
|
12
14
|
@j_instance = Java::opennlp.tools.parser.ParserFactory.create(parser_model.j_model)
|
13
15
|
@tokenizer = Tokenizer.new(token_model)
|
@@ -19,6 +21,7 @@ module OpenNlp
|
|
19
21
|
# @return [OpenNlp::Parser::Parse]
|
20
22
|
def parse(text)
|
21
23
|
raise ArgumentError, 'passed text must be a String' unless text.is_a?(String)
|
24
|
+
|
22
25
|
text.empty? ? {} : parse_tokens(tokenizer.tokenize(text), text)
|
23
26
|
end
|
24
27
|
|
@@ -27,16 +30,14 @@ module OpenNlp
|
|
27
30
|
attr_reader :tokenizer
|
28
31
|
|
29
32
|
def get_token_offset(text, tokens, index)
|
30
|
-
|
31
|
-
return offset unless index > 0
|
33
|
+
return 0 if index.zero?
|
32
34
|
|
33
|
-
|
34
|
-
|
35
|
+
(1..index).inject(0) do |offset, i|
|
36
|
+
text.index(tokens[i], offset + tokens[i - 1].size)
|
35
37
|
end
|
36
|
-
offset
|
37
38
|
end
|
38
39
|
|
39
|
-
def build_parse_obj(text, span_start, span_end, type=Java::opennlp.tools.parser.AbstractBottomUpParser::INC_NODE, probability=1, token_index=0)
|
40
|
+
def build_parse_obj(text, span_start, span_end, type = Java::opennlp.tools.parser.AbstractBottomUpParser::INC_NODE, probability = 1, token_index = 0)
|
40
41
|
Java::opennlp.tools.parser.Parse.new(
|
41
42
|
text.to_java(:String),
|
42
43
|
Java::opennlp.tools.util.Span.new(span_start, span_end),
|
@@ -6,36 +6,57 @@ module OpenNlp
|
|
6
6
|
|
7
7
|
self.java_class = Java::opennlp.tools.parser.Parse
|
8
8
|
|
9
|
+
# Initializes instance of Parser::Parse
|
10
|
+
#
|
11
|
+
# @param [Java::opennlp.tools.parser.Parse] java_instance
|
9
12
|
def initialize(java_instance)
|
10
|
-
|
13
|
+
java_instance.is_a?(self.class.java_class) ||
|
14
|
+
raise(ArgumentError, "java_instance must be an instance of #{self.class.java_class.name}")
|
11
15
|
|
12
16
|
@j_instance = java_instance
|
13
17
|
end
|
14
18
|
|
19
|
+
# Composes tree bank string, nested string representation of sentence parts, parts-of-speech and words,
|
20
|
+
# for example:
|
21
|
+
# '(TOP (S (NP (DT The) (JJ red) (NN fox)) (VP (VBZ sleeps) (ADVP (RB soundly))) (. .)))'
|
22
|
+
#
|
23
|
+
# @return [String]
|
15
24
|
def tree_bank_string
|
16
|
-
span
|
17
|
-
|
25
|
+
span = j_instance.getSpan
|
26
|
+
text = j_instance.getText
|
27
|
+
type = j_instance.getType
|
28
|
+
res = ''
|
29
|
+
start = span.getStart
|
18
30
|
|
19
31
|
res << "(#{type} " if type != Java::opennlp.tools.parser.AbstractBottomUpParser::TOK_NODE
|
20
32
|
|
21
33
|
j_instance.getChildren.each do |child|
|
22
34
|
child_span = child.span
|
23
|
-
res << text[start..child_span.getStart-1] if start < child_span.getStart
|
35
|
+
res << text[start..child_span.getStart - 1] if start < child_span.getStart
|
24
36
|
res << self.class.new(child).tree_bank_string
|
25
37
|
start = child_span.getEnd
|
26
38
|
end
|
27
39
|
|
28
|
-
res << text[start..span.getEnd-1] if start < span.getEnd
|
29
|
-
res <<
|
40
|
+
res << text[start..span.getEnd - 1] if start < span.getEnd
|
41
|
+
res << ')' if type != Java::opennlp.tools.parser.AbstractBottomUpParser::TOK_NODE
|
30
42
|
|
31
43
|
res
|
32
44
|
end
|
33
45
|
|
46
|
+
# Composes array representation of sentence tree where
|
47
|
+
# each hash has following fields:
|
48
|
+
#
|
49
|
+
# :type => <[String] node type>,
|
50
|
+
# :parent_type => <[String] type of parent node>,
|
51
|
+
# :token => <[String] current token>,
|
52
|
+
# :children => <Array[Hash] array of child nodes hashes>
|
53
|
+
#
|
54
|
+
# @return [Array<Hash>]
|
34
55
|
def code_tree
|
35
56
|
kids = j_instance.getChildren
|
36
57
|
|
37
58
|
kids.each_with_object([]) do |kid, acc|
|
38
|
-
data
|
59
|
+
data = { type: kid.getType, parent_type: j_instance.getType, token: kid.toString }
|
39
60
|
subtree = self.class.new(kid).code_tree
|
40
61
|
data[:children] = subtree unless subtree.empty?
|
41
62
|
acc << data
|
data/lib/open_nlp/pos_tagger.rb
CHANGED
@@ -2,10 +2,13 @@ module OpenNlp
|
|
2
2
|
class POSTagger < Tool
|
3
3
|
self.java_class = Java::opennlp.tools.postag.POSTaggerME
|
4
4
|
|
5
|
+
# Adds tags to tokens passed as argument
|
6
|
+
#
|
7
|
+
# @param [Array<String>, String] tokens tokens to tag
|
8
|
+
# @return [Array<String>, String] array of part-of-speech tags or string with added part-of-speech tags
|
5
9
|
def tag(tokens)
|
6
|
-
|
7
|
-
|
8
|
-
end
|
10
|
+
!tokens.is_a?(Array) && !tokens.is_a?(String) &&
|
11
|
+
raise(ArgumentError, 'tokens must be an instance of String or Array')
|
9
12
|
|
10
13
|
j_instance.tag(tokens.to_java(:String))
|
11
14
|
end
|
@@ -7,7 +7,8 @@ module OpenNlp
|
|
7
7
|
# @param [String] string string to detect sentences in
|
8
8
|
# @return [Array<String>] array of detected sentences
|
9
9
|
def detect(str)
|
10
|
-
|
10
|
+
raise ArgumentError, 'str must be a String' unless str.is_a?(String)
|
11
|
+
|
11
12
|
j_instance.sentDetect(str).to_ary
|
12
13
|
end
|
13
14
|
|
@@ -16,10 +17,11 @@ module OpenNlp
|
|
16
17
|
# @param [String] str
|
17
18
|
# @return [Array<OpenNlp::Util::Span>] array of spans for detected sentences
|
18
19
|
def pos_detect(str)
|
19
|
-
|
20
|
+
raise ArgumentError, 'str must be a String' unless str.is_a?(String)
|
21
|
+
|
20
22
|
j_instance.sentPosDetect(str).map do |span|
|
21
23
|
OpenNlp::Util::Span.new(span.getStart, span.getEnd)
|
22
24
|
end
|
23
25
|
end
|
24
26
|
end
|
25
|
-
end
|
27
|
+
end
|
data/lib/open_nlp/tokenizer.rb
CHANGED
@@ -7,13 +7,14 @@ module OpenNlp
|
|
7
7
|
# @param [String] str string to tokenize
|
8
8
|
# @return [Array] array of string tokens
|
9
9
|
def tokenize(str)
|
10
|
-
|
10
|
+
raise ArgumentError, 'str must be a String' unless str.is_a?(String)
|
11
|
+
|
11
12
|
j_instance.tokenize(str).to_ary
|
12
13
|
end
|
13
14
|
|
14
15
|
private
|
15
16
|
|
16
|
-
def
|
17
|
+
def last_probabilities
|
17
18
|
j_instance.getTokenProbabilities.to_ary
|
18
19
|
end
|
19
20
|
end
|
data/lib/open_nlp/tool.rb
CHANGED
@@ -4,9 +4,13 @@ module OpenNlp
|
|
4
4
|
|
5
5
|
attr_reader :j_instance
|
6
6
|
|
7
|
+
# Initializes instance of Tool
|
8
|
+
#
|
9
|
+
# @param [OpenNlp::Model] model instance of model class to initialize a tool object
|
7
10
|
def initialize(model)
|
8
|
-
|
11
|
+
raise ArgumentError, 'model must be an OpenNlp::Model' unless model.is_a?(OpenNlp::Model)
|
12
|
+
|
9
13
|
@j_instance = self.class.java_class.new(model.j_model)
|
10
14
|
end
|
11
15
|
end
|
12
|
-
end
|
16
|
+
end
|
data/lib/open_nlp/util/span.rb
CHANGED
@@ -5,34 +5,50 @@ class OpenNlp::Util::Span
|
|
5
5
|
|
6
6
|
attr_reader :j_instance
|
7
7
|
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
8
|
+
# Initializes new instance of Util::Span
|
9
|
+
#
|
10
|
+
# @param [Integer] start start index of the span
|
11
|
+
# @param [Integer] end end index of the span
|
12
|
+
def initialize(start_pos, end_pos)
|
13
|
+
raise ArgumentError, 'start should be an integer' unless start_pos.is_a?(Integer)
|
14
|
+
raise ArgumentError, 'end should be an integer' unless end_pos.is_a?(Integer)
|
15
|
+
|
16
|
+
@j_instance = self.class.java_class.new(start_pos, end_pos)
|
13
17
|
end
|
14
18
|
|
19
|
+
# Returns end index of the span
|
20
|
+
#
|
21
|
+
# @return [Integer]
|
15
22
|
def start
|
16
23
|
j_instance.getStart
|
17
24
|
end
|
18
25
|
|
26
|
+
# Returns end index of the span
|
27
|
+
#
|
28
|
+
# @return [Integer]
|
19
29
|
def end
|
20
30
|
j_instance.getEnd
|
21
31
|
end
|
22
32
|
|
33
|
+
# Returns type of the span
|
34
|
+
#
|
35
|
+
# @return [String]
|
23
36
|
def type
|
24
37
|
j_instance.getType
|
25
38
|
end
|
26
39
|
|
40
|
+
# Returns length of the span
|
41
|
+
#
|
42
|
+
# @return [Integer]
|
27
43
|
def length
|
28
44
|
j_instance.length
|
29
45
|
end
|
30
46
|
|
31
|
-
def ==(
|
32
|
-
return false unless
|
47
|
+
def ==(other)
|
48
|
+
return false unless other.is_a?(self.class)
|
33
49
|
|
34
|
-
[
|
35
|
-
acc
|
50
|
+
%i[start end type].inject(true) do |acc, method|
|
51
|
+
acc && public_send(method) == other.public_send(method)
|
36
52
|
end
|
37
53
|
end
|
38
54
|
end
|
data/lib/open_nlp/version.rb
CHANGED
data/open_nlp.gemspec
CHANGED
@@ -1,20 +1,19 @@
|
|
1
|
-
|
2
|
-
lib = File.expand_path('../lib', __FILE__)
|
1
|
+
lib = File.expand_path('lib', __dir__)
|
3
2
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
3
|
require 'open_nlp/version'
|
5
4
|
|
6
5
|
Gem::Specification.new do |gem|
|
7
|
-
gem.name =
|
6
|
+
gem.name = 'open_nlp'
|
8
7
|
gem.version = OpenNlp::VERSION
|
9
|
-
gem.authors = [
|
10
|
-
gem.description =
|
11
|
-
gem.summary =
|
12
|
-
gem.homepage =
|
8
|
+
gem.authors = ['Hck']
|
9
|
+
gem.description = 'JRuby tools wrapper for Apache OpenNLP'
|
10
|
+
gem.summary = 'A JRuby wrapper for the Apache OpenNLP tools library'
|
11
|
+
gem.homepage = 'http://github.com/hck/open_nlp'
|
13
12
|
|
14
|
-
gem.files = `git ls-files`.split(
|
15
|
-
gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
|
13
|
+
gem.files = `git ls-files`.split($INPUT_RECORD_SEPARATOR)
|
14
|
+
gem.executables = gem.files.grep(%r{^bin/}).map { |f| File.basename(f) }
|
16
15
|
gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
|
17
|
-
gem.require_paths = [
|
16
|
+
gem.require_paths = ['lib']
|
18
17
|
|
19
|
-
gem.platform =
|
18
|
+
gem.platform = 'java'
|
20
19
|
end
|
@@ -3,7 +3,7 @@ require 'spec_helper'
|
|
3
3
|
RSpec.describe OpenNlp::NamedEntityDetector do
|
4
4
|
let(:model) { OpenNlp::Model::NamedEntityDetector.new(File.join(FIXTURES_DIR, 'en-ner-time.bin')) }
|
5
5
|
let(:ne_detector) { described_class.new(model) }
|
6
|
-
|
6
|
+
|
7
7
|
describe 'initialization' do
|
8
8
|
it 'initializes with a valid model' do
|
9
9
|
expect(ne_detector.j_instance).to be_a(described_class.java_class)
|
data/spec/parser/parse_spec.rb
CHANGED
@@ -40,66 +40,66 @@ RSpec.describe OpenNlp::Parser::Parse do
|
|
40
40
|
let(:expected_code_tree) do
|
41
41
|
[
|
42
42
|
{
|
43
|
-
:
|
44
|
-
:
|
45
|
-
:
|
46
|
-
:
|
43
|
+
type: 'S',
|
44
|
+
parent_type: 'TOP',
|
45
|
+
token: 'The red fox sleeps soundly .',
|
46
|
+
children: [
|
47
47
|
{
|
48
|
-
:
|
49
|
-
:
|
50
|
-
:
|
51
|
-
:
|
48
|
+
type: 'NP',
|
49
|
+
parent_type: 'S',
|
50
|
+
token: 'The red fox',
|
51
|
+
children: [
|
52
52
|
{
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
53
|
+
type: 'DT',
|
54
|
+
parent_type: 'NP',
|
55
|
+
token: 'The',
|
56
|
+
children: [{ type: 'TK', parent_type: 'DT', token: 'The' }]
|
57
57
|
},
|
58
58
|
{
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
59
|
+
type: 'JJ',
|
60
|
+
parent_type: 'NP',
|
61
|
+
token: 'red',
|
62
|
+
children: [{ type: 'TK', parent_type: 'JJ', token: 'red' }]
|
63
63
|
},
|
64
64
|
{
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
65
|
+
type: 'NN',
|
66
|
+
parent_type: 'NP',
|
67
|
+
token: 'fox',
|
68
|
+
children: [{ type: 'TK', parent_type: 'NN', token: 'fox' }]
|
69
69
|
}
|
70
70
|
]
|
71
71
|
},
|
72
72
|
{
|
73
|
-
:
|
74
|
-
:
|
75
|
-
:
|
76
|
-
:
|
73
|
+
type: 'VP',
|
74
|
+
parent_type: 'S',
|
75
|
+
token: 'sleeps soundly',
|
76
|
+
children: [
|
77
77
|
{
|
78
|
-
:
|
79
|
-
:
|
80
|
-
:
|
81
|
-
:
|
78
|
+
type: 'VBZ',
|
79
|
+
parent_type: 'VP',
|
80
|
+
token: 'sleeps',
|
81
|
+
children: [{ type: 'TK', parent_type: 'VBZ', token: 'sleeps' }]
|
82
82
|
},
|
83
83
|
{
|
84
|
-
:
|
85
|
-
:
|
86
|
-
:
|
87
|
-
:
|
84
|
+
type: 'ADVP',
|
85
|
+
parent_type: 'VP',
|
86
|
+
token: 'soundly',
|
87
|
+
children: [
|
88
88
|
{
|
89
|
-
:
|
90
|
-
:
|
91
|
-
:
|
92
|
-
:
|
89
|
+
type: 'RB',
|
90
|
+
parent_type: 'ADVP',
|
91
|
+
token: 'soundly',
|
92
|
+
children: [{ type: 'TK', parent_type: 'RB', token: 'soundly' }]
|
93
93
|
}
|
94
94
|
]
|
95
95
|
}
|
96
96
|
]
|
97
97
|
},
|
98
98
|
{
|
99
|
-
:
|
100
|
-
:
|
101
|
-
:
|
102
|
-
:
|
99
|
+
type: '.',
|
100
|
+
parent_type: 'S',
|
101
|
+
token: '.',
|
102
|
+
children: [{ type: 'TK', parent_type: '.', token: '.' }]
|
103
103
|
}
|
104
104
|
]
|
105
105
|
}
|
data/spec/parser_spec.rb
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
RSpec.describe OpenNlp::Parser do
|
4
|
-
let(:model) { OpenNlp::Model::Parser.new(File.join(FIXTURES_DIR,
|
5
|
-
let(:token_model) { OpenNlp::Model::Tokenizer.new(File.join(FIXTURES_DIR,
|
4
|
+
let(:model) { OpenNlp::Model::Parser.new(File.join(FIXTURES_DIR, 'en-parser-chunking.bin')) }
|
5
|
+
let(:token_model) { OpenNlp::Model::Tokenizer.new(File.join(FIXTURES_DIR, 'en-token.bin')) }
|
6
6
|
let(:parser) { described_class.new(model, token_model) }
|
7
7
|
|
8
8
|
describe 'initialization' do
|
data/spec/pos_tagger_spec.rb
CHANGED
@@ -21,8 +21,8 @@ RSpec.describe OpenNlp::POSTagger do
|
|
21
21
|
end
|
22
22
|
|
23
23
|
it 'tags provided tokens' do
|
24
|
-
tagged = pos_tagger.tag(%w
|
25
|
-
expect(tagged.to_ary).to eq(%w
|
24
|
+
tagged = pos_tagger.tag(%w[The quick brown fox jumps over the lazy dog .])
|
25
|
+
expect(tagged.to_ary).to eq(%w[DT JJ JJ NN NNS IN DT JJ NN .])
|
26
26
|
end
|
27
27
|
|
28
28
|
it 'raises an ArgumentError when nil is passed as an argument' do
|
data/spec/spec_helper.rb
CHANGED
@@ -3,3 +3,28 @@ require 'java'
|
|
3
3
|
require 'open_nlp'
|
4
4
|
|
5
5
|
FIXTURES_DIR = File.join(File.dirname(__FILE__), 'fixtures')
|
6
|
+
|
7
|
+
RSpec.configure do |config|
|
8
|
+
config.expect_with :rspec do |expectations|
|
9
|
+
expectations.include_chain_clauses_in_custom_matcher_descriptions = true
|
10
|
+
end
|
11
|
+
|
12
|
+
config.mock_with :rspec do |mocks|
|
13
|
+
mocks.verify_partial_doubles = true
|
14
|
+
end
|
15
|
+
|
16
|
+
config.filter_run :focus
|
17
|
+
config.run_all_when_everything_filtered = true
|
18
|
+
|
19
|
+
config.example_status_persistence_file_path = 'spec/examples.txt'
|
20
|
+
|
21
|
+
config.disable_monkey_patching!
|
22
|
+
|
23
|
+
config.warnings = true
|
24
|
+
|
25
|
+
config.profile_examples = 10
|
26
|
+
|
27
|
+
config.order = :random
|
28
|
+
|
29
|
+
Kernel.srand config.seed
|
30
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: open_nlp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: java
|
6
6
|
authors:
|
7
7
|
- Hck
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-11-28 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: JRuby tools wrapper for Apache OpenNLP
|
14
14
|
email:
|
@@ -18,6 +18,7 @@ extra_rdoc_files: []
|
|
18
18
|
files:
|
19
19
|
- ".gitignore"
|
20
20
|
- ".rspec"
|
21
|
+
- ".rubocop.yml"
|
21
22
|
- ".ruby-version"
|
22
23
|
- ".travis.yml"
|
23
24
|
- Gemfile
|
@@ -92,7 +93,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
92
93
|
version: '0'
|
93
94
|
requirements: []
|
94
95
|
rubyforge_project:
|
95
|
-
rubygems_version: 2.
|
96
|
+
rubygems_version: 2.7.6
|
96
97
|
signing_key:
|
97
98
|
specification_version: 4
|
98
99
|
summary: A JRuby wrapper for the Apache OpenNLP tools library
|