open_nlp 0.0.7-java → 0.1.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +2 -2
- data/.ruby-version +1 -0
- data/.travis.yml +6 -0
- data/Gemfile.lock +31 -0
- data/README.md +8 -1
- data/lib/open_nlp.rb +3 -3
- data/lib/open_nlp/categorizer.rb +7 -3
- data/lib/open_nlp/chunker.rb +19 -8
- data/lib/open_nlp/model.rb +13 -9
- data/lib/open_nlp/named_entity_detector.rb +6 -2
- data/lib/open_nlp/opennlp-maxent-3.0.3.jar +0 -0
- data/lib/open_nlp/opennlp-tools-1.5.3.jar +0 -0
- data/lib/open_nlp/parser.rb +43 -33
- data/lib/open_nlp/parser/parse.rb +12 -21
- data/lib/open_nlp/pos_tagger.rb +5 -2
- data/lib/open_nlp/sentence_detector.rb +16 -6
- data/lib/open_nlp/tokenizer.rb +8 -3
- data/lib/open_nlp/tool.rb +1 -1
- data/lib/open_nlp/util.rb +1 -2
- data/lib/open_nlp/util/span.rb +5 -5
- data/lib/open_nlp/version.rb +1 -1
- data/spec/categorizer_spec.rb +24 -22
- data/spec/chunker_spec.rb +29 -28
- data/spec/model/chunker_spec.rb +12 -15
- data/spec/model/detokenizer_spec.rb +11 -14
- data/spec/model/named_entity_detector_spec.rb +11 -14
- data/spec/model/pos_tagger_spec.rb +12 -15
- data/spec/model/sentence_detector_spec.rb +11 -14
- data/spec/model/tokenizer_spec.rb +11 -14
- data/spec/named_entity_detector_spec.rb +28 -27
- data/spec/parser/parse_spec.rb +64 -56
- data/spec/parser_spec.rb +26 -21
- data/spec/pos_tagger_spec.rb +22 -23
- data/spec/sentence_detector_spec.rb +39 -30
- data/spec/spec_helper.rb +1 -1
- data/spec/tokenizer_spec.rb +26 -22
- metadata +16 -17
- data/lib/open_nlp/opennlp-maxent-3.0.2-incubating.jar +0 -0
- data/lib/open_nlp/opennlp-tools-1.5.2-incubating.jar +0 -0
data/lib/open_nlp/tool.rb
CHANGED
@@ -5,7 +5,7 @@ module OpenNlp
|
|
5
5
|
attr_reader :j_instance
|
6
6
|
|
7
7
|
def initialize(model)
|
8
|
-
|
8
|
+
fail ArgumentError, 'model must be an OpenNlp::Model' unless model.is_a?(OpenNlp::Model)
|
9
9
|
@j_instance = self.class.java_class.new(model.j_model)
|
10
10
|
end
|
11
11
|
end
|
data/lib/open_nlp/util.rb
CHANGED
data/lib/open_nlp/util/span.rb
CHANGED
@@ -6,8 +6,8 @@ class OpenNlp::Util::Span
|
|
6
6
|
attr_reader :j_instance
|
7
7
|
|
8
8
|
def initialize(s, e)
|
9
|
-
|
10
|
-
|
9
|
+
fail ArgumentError, 's should be an integer' unless s.is_a?(Fixnum)
|
10
|
+
fail ArgumentError, 'e should be an integer' unless e.is_a?(Fixnum)
|
11
11
|
|
12
12
|
@j_instance = self.class.java_class.new(s, e)
|
13
13
|
end
|
@@ -31,8 +31,8 @@ class OpenNlp::Util::Span
|
|
31
31
|
def ==(obj)
|
32
32
|
return false unless obj.is_a?(self.class)
|
33
33
|
|
34
|
-
[:start, :end, :type].each_with_object(true) do |
|
35
|
-
|
34
|
+
[:start, :end, :type].each_with_object(true) do |method, acc|
|
35
|
+
acc = acc && self.public_send(method) == obj.public_send(method)
|
36
36
|
end
|
37
37
|
end
|
38
|
-
end
|
38
|
+
end
|
data/lib/open_nlp/version.rb
CHANGED
data/spec/categorizer_spec.rb
CHANGED
@@ -1,36 +1,38 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
|
-
describe OpenNlp::Categorizer do
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
categorizer
|
10
|
-
categorizer.should be_a(subject)
|
11
|
-
categorizer.j_instance.should be_a(subject.java_class)
|
3
|
+
RSpec.describe OpenNlp::Categorizer do
|
4
|
+
let(:model) { OpenNlp::Model::Categorizer.new(File.join(FIXTURES_DIR, 'en-doccat.bin')) }
|
5
|
+
|
6
|
+
describe 'initialization' do
|
7
|
+
it 'is initialized with a valid model' do
|
8
|
+
categorizer = described_class.new(model)
|
9
|
+
expect(categorizer.j_instance).to be_a(described_class.java_class)
|
12
10
|
end
|
13
11
|
|
14
|
-
it
|
15
|
-
|
12
|
+
it 'raises an ArgumentError without a valid model' do
|
13
|
+
expect { described_class.new(nil) }.to raise_error(ArgumentError)
|
16
14
|
end
|
17
15
|
end
|
18
16
|
|
19
|
-
describe
|
20
|
-
let(:categorizer) {
|
17
|
+
describe '#categorize' do
|
18
|
+
let(:categorizer) { described_class.new(model) }
|
19
|
+
|
20
|
+
it 'categorizes a provided document to positive' do
|
21
|
+
category = categorizer.categorize('The fox is a good worker.')
|
22
|
+
expect(category).to eq('Positive')
|
23
|
+
end
|
21
24
|
|
22
|
-
it
|
23
|
-
category = categorizer.categorize(
|
24
|
-
category.
|
25
|
+
it 'categorizes a provided document to negative' do
|
26
|
+
category = categorizer.categorize('Quick brown fox jumps very bad.')
|
27
|
+
expect(category).to eq('Negative')
|
25
28
|
end
|
26
29
|
|
27
|
-
it
|
28
|
-
|
29
|
-
category.should == "Negative"
|
30
|
+
it 'raises an ArgumentError when nil is passed as a param' do
|
31
|
+
expect { categorizer.categorize(nil) }.to raise_error(ArgumentError)
|
30
32
|
end
|
31
33
|
|
32
|
-
it
|
33
|
-
|
34
|
+
it 'raises an ArgumentError when Fixnum is passed a param' do
|
35
|
+
expect { categorizer.categorize(123) }.to raise_error(ArgumentError)
|
34
36
|
end
|
35
37
|
end
|
36
|
-
end
|
38
|
+
end
|
data/spec/chunker_spec.rb
CHANGED
@@ -1,46 +1,47 @@
|
|
1
|
-
require
|
1
|
+
require 'spec_helper'
|
2
2
|
|
3
|
-
describe OpenNlp::Chunker do
|
4
|
-
|
3
|
+
RSpec.describe OpenNlp::Chunker do
|
4
|
+
let(:model) { OpenNlp::Model::Chunker.new(File.join(FIXTURES_DIR, 'en-chunker.bin')) }
|
5
|
+
let(:token_model) { OpenNlp::Model::Tokenizer.new(File.join(FIXTURES_DIR, 'en-token.bin')) }
|
6
|
+
let(:pos_model) { OpenNlp::Model::POSTagger.new(File.join(FIXTURES_DIR, 'en-pos-maxent.bin')) }
|
7
|
+
let(:chunker) { described_class.new(model, token_model, pos_model) }
|
5
8
|
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
describe "initialization" do
|
11
|
-
it "should initialize a new chunker" do
|
12
|
-
chunker = subject.new(model, token_model, pos_model)
|
13
|
-
chunker.should be_a(subject)
|
9
|
+
describe 'initialization' do
|
10
|
+
it 'initializes a new chunker' do
|
11
|
+
expect(chunker).to be_a(described_class)
|
14
12
|
end
|
15
13
|
|
16
|
-
it
|
17
|
-
|
14
|
+
it 'raises an argument error when no model is specified' do
|
15
|
+
expect { subject.new(nil, nil, nil) }.to raise_error(ArgumentError)
|
18
16
|
end
|
19
17
|
|
20
|
-
it
|
21
|
-
|
18
|
+
it 'raises an argument error when no token_model is specified' do
|
19
|
+
expect { subject.new(model, nil, nil) }.to raise_error(ArgumentError)
|
22
20
|
end
|
23
21
|
|
24
|
-
it
|
25
|
-
|
22
|
+
it 'raises an argument error when no pos_model is specified' do
|
23
|
+
expect { subject.new(model, token_model, nil) }.to raise_error(ArgumentError)
|
26
24
|
end
|
27
25
|
end
|
28
26
|
|
29
|
-
describe
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
chunks = chunker.chunk("")
|
34
|
-
chunks.should == []
|
27
|
+
describe 'chunking a string' do
|
28
|
+
it 'chunks an empty string' do
|
29
|
+
chunks = chunker.chunk('')
|
30
|
+
expect(chunks).to eq([])
|
35
31
|
end
|
36
32
|
|
37
|
-
it
|
38
|
-
chunks = chunker.chunk(
|
39
|
-
chunks.
|
33
|
+
it 'chunks a sentence' do
|
34
|
+
chunks = chunker.chunk('The red fox sleeps soundly.')
|
35
|
+
expect(chunks).to eq(
|
36
|
+
[
|
37
|
+
[{ 'The' => 'DT' }, { 'red' => 'JJ' }, { 'fox' => 'NN' }, { 'sleeps' => 'NNS' }],
|
38
|
+
[{ 'soundly' => 'RB' }]
|
39
|
+
]
|
40
|
+
)
|
40
41
|
end
|
41
42
|
|
42
|
-
it
|
43
|
-
|
43
|
+
it 'raises an error when not passed a string' do
|
44
|
+
expect { chunker.chunk(nil) }.to raise_error(ArgumentError)
|
44
45
|
end
|
45
46
|
end
|
46
47
|
end
|
data/spec/model/chunker_spec.rb
CHANGED
@@ -1,23 +1,20 @@
|
|
1
|
-
require
|
1
|
+
require 'spec_helper'
|
2
2
|
|
3
|
-
describe OpenNlp::Model::Chunker do
|
4
|
-
|
5
|
-
let(:model_file_name) { File.join(FIXTURES_DIR, "en-chunker.bin") }
|
3
|
+
RSpec.describe OpenNlp::Model::Chunker do
|
4
|
+
let(:model_file_name) { File.join(FIXTURES_DIR, 'en-chunker.bin') }
|
6
5
|
|
7
|
-
it
|
8
|
-
chunker_model =
|
9
|
-
chunker_model.
|
10
|
-
chunker_model.j_model.should be_a(subject.java_class)
|
6
|
+
it 'accepts a string filename parameter' do
|
7
|
+
chunker_model = described_class.new(model_file_name)
|
8
|
+
expect(chunker_model.j_model).to be_a(described_class.java_class)
|
11
9
|
end
|
12
10
|
|
13
|
-
it
|
11
|
+
it 'accepts a java.io.FileInputStream object' do
|
14
12
|
file_input_stream = java.io.FileInputStream.new(model_file_name)
|
15
|
-
chunker_model =
|
16
|
-
chunker_model.
|
17
|
-
chunker_model.j_model.should be_a(subject.java_class)
|
13
|
+
chunker_model = described_class.new(file_input_stream)
|
14
|
+
expect(chunker_model.j_model).to be_a(described_class.java_class)
|
18
15
|
end
|
19
16
|
|
20
|
-
it
|
21
|
-
|
17
|
+
it 'raises an argument error when nil is passed as a model' do
|
18
|
+
expect { described_class.new(nil) }.to raise_error(ArgumentError)
|
22
19
|
end
|
23
|
-
end
|
20
|
+
end
|
@@ -1,23 +1,20 @@
|
|
1
|
-
require
|
1
|
+
require 'spec_helper'
|
2
2
|
|
3
|
-
describe OpenNlp::Model::Detokenizer do
|
4
|
-
|
5
|
-
let(:model_file_name) { File.join(FIXTURES_DIR, "en-detokenizer.xml") }
|
3
|
+
RSpec.describe OpenNlp::Model::Detokenizer do
|
4
|
+
let(:model_file_name) { File.join(FIXTURES_DIR, 'en-detokenizer.xml') }
|
6
5
|
|
7
|
-
it
|
8
|
-
model =
|
9
|
-
model.
|
10
|
-
model.j_model.should be_a(subject.java_class)
|
6
|
+
it 'accepts a string filename parameter' do
|
7
|
+
model = described_class.new(model_file_name)
|
8
|
+
expect(model.j_model).to be_a(described_class.java_class)
|
11
9
|
end
|
12
10
|
|
13
|
-
it
|
11
|
+
it 'accepts a java.io.FileInputStream object' do
|
14
12
|
file_input_stream = java.io.FileInputStream.new(model_file_name)
|
15
|
-
model =
|
16
|
-
model.
|
17
|
-
model.j_model.should be_a(subject.java_class)
|
13
|
+
model = described_class.new(file_input_stream)
|
14
|
+
expect(model.j_model).to be_a(described_class.java_class)
|
18
15
|
end
|
19
16
|
|
20
|
-
it
|
21
|
-
|
17
|
+
it 'raises an argument error when nil is passed as a model' do
|
18
|
+
expect { described_class.new(nil) }.to raise_error(ArgumentError)
|
22
19
|
end
|
23
20
|
end
|
@@ -1,23 +1,20 @@
|
|
1
|
-
require
|
1
|
+
require 'spec_helper'
|
2
2
|
|
3
|
-
describe OpenNlp::Model::NamedEntityDetector do
|
4
|
-
|
5
|
-
let(:model_file_name) { File.join(FIXTURES_DIR, "en-ner-time.bin") }
|
3
|
+
RSpec.describe OpenNlp::Model::NamedEntityDetector do
|
4
|
+
let(:model_file_name) { File.join(FIXTURES_DIR, 'en-ner-time.bin') }
|
6
5
|
|
7
|
-
it
|
8
|
-
model =
|
9
|
-
model.
|
10
|
-
model.j_model.should be_a(subject.java_class)
|
6
|
+
it 'accepts a string filename parameter' do
|
7
|
+
model = described_class.new(model_file_name)
|
8
|
+
expect(model.j_model).to be_a(described_class.java_class)
|
11
9
|
end
|
12
10
|
|
13
|
-
it
|
11
|
+
it 'should accept a java.io.FileInputStream object' do
|
14
12
|
file_input_stream = java.io.FileInputStream.new(model_file_name)
|
15
|
-
model =
|
16
|
-
model.
|
17
|
-
model.j_model.should be_a(subject.java_class)
|
13
|
+
model = described_class.new(file_input_stream)
|
14
|
+
expect(model.j_model).to be_a(described_class.java_class)
|
18
15
|
end
|
19
16
|
|
20
|
-
it
|
21
|
-
|
17
|
+
it 'raises an argument error when nil is passed as a model' do
|
18
|
+
expect { described_class.new(nil) }.to raise_error(ArgumentError)
|
22
19
|
end
|
23
20
|
end
|
@@ -1,23 +1,20 @@
|
|
1
|
-
require
|
1
|
+
require 'spec_helper'
|
2
2
|
|
3
|
-
describe OpenNlp::Model::POSTagger do
|
4
|
-
|
5
|
-
let(:model_file_name) { File.join(FIXTURES_DIR, "en-pos-maxent.bin") }
|
3
|
+
RSpec.describe OpenNlp::Model::POSTagger do
|
4
|
+
let(:model_file_name) { File.join(FIXTURES_DIR, 'en-pos-maxent.bin') }
|
6
5
|
|
7
|
-
it
|
8
|
-
model =
|
9
|
-
model.
|
10
|
-
model.j_model.should be_a(subject.java_class)
|
6
|
+
it 'accepts a string filename parameter' do
|
7
|
+
model = described_class.new(model_file_name)
|
8
|
+
expect(model.j_model).to be_a(described_class.java_class)
|
11
9
|
end
|
12
10
|
|
13
|
-
it
|
11
|
+
it 'accepts a java.io.FileInputStream object' do
|
14
12
|
file_input_stream = java.io.FileInputStream.new(model_file_name)
|
15
|
-
model =
|
16
|
-
model.
|
17
|
-
model.j_model.should be_a(subject.java_class)
|
13
|
+
model = described_class.new(file_input_stream)
|
14
|
+
expect(model.j_model).to be_a(described_class.java_class)
|
18
15
|
end
|
19
16
|
|
20
|
-
it
|
21
|
-
|
17
|
+
it 'raises an argument error when nil is passed as a model' do
|
18
|
+
expect { described_class.new(nil) }.to raise_error(ArgumentError)
|
22
19
|
end
|
23
|
-
end
|
20
|
+
end
|
@@ -1,23 +1,20 @@
|
|
1
|
-
require
|
1
|
+
require 'spec_helper'
|
2
2
|
|
3
|
-
describe OpenNlp::Model::SentenceDetector do
|
4
|
-
|
5
|
-
let(:model_file_name) { File.join(FIXTURES_DIR, "en-sent.bin") }
|
3
|
+
RSpec.describe OpenNlp::Model::SentenceDetector do
|
4
|
+
let(:model_file_name) { File.join(FIXTURES_DIR, 'en-sent.bin') }
|
6
5
|
|
7
|
-
it
|
8
|
-
model =
|
9
|
-
model.
|
10
|
-
model.j_model.should be_a(subject.java_class)
|
6
|
+
it 'accepts a string filename parameter' do
|
7
|
+
model = described_class.new(model_file_name)
|
8
|
+
expect(model.j_model).to be_a(described_class.java_class)
|
11
9
|
end
|
12
10
|
|
13
|
-
it
|
11
|
+
it 'accepts a java.io.FileInputStream object' do
|
14
12
|
file_input_stream = java.io.FileInputStream.new(model_file_name)
|
15
|
-
model =
|
16
|
-
model.
|
17
|
-
model.j_model.should be_a(subject.java_class)
|
13
|
+
model = described_class.new(file_input_stream)
|
14
|
+
expect(model.j_model).to be_a(described_class.java_class)
|
18
15
|
end
|
19
16
|
|
20
|
-
it
|
21
|
-
|
17
|
+
it 'raises an argument error when nil is passed as a model' do
|
18
|
+
expect { described_class.new(nil) }.to raise_error(ArgumentError)
|
22
19
|
end
|
23
20
|
end
|
@@ -1,23 +1,20 @@
|
|
1
|
-
require
|
1
|
+
require 'spec_helper'
|
2
2
|
|
3
3
|
describe OpenNlp::Model::Tokenizer do
|
4
|
-
|
5
|
-
let(:model_file_name) { File.join(FIXTURES_DIR, "en-token.bin") }
|
4
|
+
let(:model_file_name) { File.join(FIXTURES_DIR, 'en-token.bin') }
|
6
5
|
|
7
|
-
it
|
8
|
-
model =
|
9
|
-
model.
|
10
|
-
model.j_model.should be_a(subject.java_class)
|
6
|
+
it 'accept a string filename parameter' do
|
7
|
+
model = described_class.new(model_file_name)
|
8
|
+
expect(model.j_model).to be_a(described_class.java_class)
|
11
9
|
end
|
12
10
|
|
13
|
-
it
|
11
|
+
it 'should accept a java.io.FileInputStream object' do
|
14
12
|
file_input_stream = java.io.FileInputStream.new(model_file_name)
|
15
|
-
model =
|
16
|
-
model.
|
17
|
-
model.j_model.should be_a(subject.java_class)
|
13
|
+
model = described_class.new(file_input_stream)
|
14
|
+
expect(model.j_model).to be_a(described_class.java_class)
|
18
15
|
end
|
19
16
|
|
20
|
-
it
|
21
|
-
|
17
|
+
it 'raises an argument error when nil is passed as a model' do
|
18
|
+
expect { described_class.new(nil) }.to raise_error(ArgumentError)
|
22
19
|
end
|
23
|
-
end
|
20
|
+
end
|
@@ -1,42 +1,43 @@
|
|
1
|
-
require
|
1
|
+
require 'spec_helper'
|
2
2
|
|
3
|
-
describe OpenNlp::NamedEntityDetector do
|
4
|
-
|
5
|
-
|
6
|
-
let(:model) { OpenNlp::Model::NamedEntityDetector.new(File.join(FIXTURES_DIR, "en-ner-time.bin")) }
|
3
|
+
RSpec.describe OpenNlp::NamedEntityDetector do
|
4
|
+
let(:model) { OpenNlp::Model::NamedEntityDetector.new(File.join(FIXTURES_DIR, 'en-ner-time.bin')) }
|
5
|
+
let(:ne_detector) { described_class.new(model) }
|
7
6
|
|
8
|
-
describe
|
9
|
-
it
|
10
|
-
ne_detector
|
11
|
-
ne_detector.should be_a(subject)
|
7
|
+
describe 'initialization' do
|
8
|
+
it 'initializes with a valid model' do
|
9
|
+
expect(ne_detector.j_instance).to be_a(described_class.java_class)
|
12
10
|
end
|
13
11
|
|
14
|
-
it
|
15
|
-
|
12
|
+
it 'raises an ArgumentError otherwise' do
|
13
|
+
expect { subject.new(nil) }.to raise_error(ArgumentError)
|
16
14
|
end
|
17
15
|
end
|
18
16
|
|
19
|
-
describe
|
20
|
-
|
21
|
-
|
22
|
-
it "should detect nothing in an empty sentence" do
|
17
|
+
describe '#detect' do
|
18
|
+
it 'detects nothing for empty sentence' do
|
23
19
|
spans = ne_detector.detect([])
|
24
|
-
spans.
|
25
|
-
|
20
|
+
expect(spans).to eq([])
|
21
|
+
end
|
22
|
+
|
23
|
+
it 'detects the named entities' do
|
24
|
+
spans = ne_detector.detect(['The', 'time', 'is', '10', ':', '23', 'am'])
|
25
|
+
expect(spans.size).to eq(1)
|
26
|
+
expect(spans.first).to be_a(Java::opennlp.tools.util.Span)
|
27
|
+
expect(spans.first.getStart).to eq(3)
|
28
|
+
expect(spans.first.getEnd).to eq(7)
|
29
|
+
end
|
30
|
+
|
31
|
+
it 'raises an error if nil is passed as an argument' do
|
32
|
+
expect { ne_detector.detect(nil) }.to raise_error(ArgumentError)
|
26
33
|
end
|
27
34
|
|
28
|
-
it
|
29
|
-
|
30
|
-
spans.should be_a(Array)
|
31
|
-
spans[0].should be_a(Java::opennlp.tools.util.Span)
|
32
|
-
spans[0].getStart.should == 3
|
33
|
-
spans[0].getEnd.should == 7
|
35
|
+
it 'raises an error if string is passed as an argument' do
|
36
|
+
expect { ne_detector.detect('str') }.to raise_error(ArgumentError)
|
34
37
|
end
|
35
38
|
|
36
|
-
it
|
37
|
-
|
38
|
-
lambda { ne_detector.detect('str') }.should raise_error(ArgumentError)
|
39
|
-
lambda { ne_detector.detect(111) }.should raise_error(ArgumentError)
|
39
|
+
it 'raises an error if fixnum is passed as an argument' do
|
40
|
+
expect { ne_detector.detect(111) }.to raise_error(ArgumentError)
|
40
41
|
end
|
41
42
|
end
|
42
43
|
end
|