open_nlp 0.0.7-java → 0.1.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +2 -2
  3. data/.ruby-version +1 -0
  4. data/.travis.yml +6 -0
  5. data/Gemfile.lock +31 -0
  6. data/README.md +8 -1
  7. data/lib/open_nlp.rb +3 -3
  8. data/lib/open_nlp/categorizer.rb +7 -3
  9. data/lib/open_nlp/chunker.rb +19 -8
  10. data/lib/open_nlp/model.rb +13 -9
  11. data/lib/open_nlp/named_entity_detector.rb +6 -2
  12. data/lib/open_nlp/opennlp-maxent-3.0.3.jar +0 -0
  13. data/lib/open_nlp/opennlp-tools-1.5.3.jar +0 -0
  14. data/lib/open_nlp/parser.rb +43 -33
  15. data/lib/open_nlp/parser/parse.rb +12 -21
  16. data/lib/open_nlp/pos_tagger.rb +5 -2
  17. data/lib/open_nlp/sentence_detector.rb +16 -6
  18. data/lib/open_nlp/tokenizer.rb +8 -3
  19. data/lib/open_nlp/tool.rb +1 -1
  20. data/lib/open_nlp/util.rb +1 -2
  21. data/lib/open_nlp/util/span.rb +5 -5
  22. data/lib/open_nlp/version.rb +1 -1
  23. data/spec/categorizer_spec.rb +24 -22
  24. data/spec/chunker_spec.rb +29 -28
  25. data/spec/model/chunker_spec.rb +12 -15
  26. data/spec/model/detokenizer_spec.rb +11 -14
  27. data/spec/model/named_entity_detector_spec.rb +11 -14
  28. data/spec/model/pos_tagger_spec.rb +12 -15
  29. data/spec/model/sentence_detector_spec.rb +11 -14
  30. data/spec/model/tokenizer_spec.rb +11 -14
  31. data/spec/named_entity_detector_spec.rb +28 -27
  32. data/spec/parser/parse_spec.rb +64 -56
  33. data/spec/parser_spec.rb +26 -21
  34. data/spec/pos_tagger_spec.rb +22 -23
  35. data/spec/sentence_detector_spec.rb +39 -30
  36. data/spec/spec_helper.rb +1 -1
  37. data/spec/tokenizer_spec.rb +26 -22
  38. metadata +16 -17
  39. data/lib/open_nlp/opennlp-maxent-3.0.2-incubating.jar +0 -0
  40. data/lib/open_nlp/opennlp-tools-1.5.2-incubating.jar +0 -0
data/lib/open_nlp/tool.rb CHANGED
@@ -5,7 +5,7 @@ module OpenNlp
5
5
  attr_reader :j_instance
6
6
 
7
7
  def initialize(model)
8
- raise ArgumentError, "model must be an OpenNlp::Model" unless model.is_a?(OpenNlp::Model)
8
+ fail ArgumentError, 'model must be an OpenNlp::Model' unless model.is_a?(OpenNlp::Model)
9
9
  @j_instance = self.class.java_class.new(model.j_model)
10
10
  end
11
11
  end
data/lib/open_nlp/util.rb CHANGED
@@ -1,5 +1,4 @@
1
1
  module OpenNlp
2
2
  module Util
3
-
4
3
  end
5
- end
4
+ end
@@ -6,8 +6,8 @@ class OpenNlp::Util::Span
6
6
  attr_reader :j_instance
7
7
 
8
8
  def initialize(s, e)
9
- raise ArgumentError, "s should be an integer" unless s.is_a?(Fixnum)
10
- raise ArgumentError, "e should be an integer" unless e.is_a?(Fixnum)
9
+ fail ArgumentError, 's should be an integer' unless s.is_a?(Fixnum)
10
+ fail ArgumentError, 'e should be an integer' unless e.is_a?(Fixnum)
11
11
 
12
12
  @j_instance = self.class.java_class.new(s, e)
13
13
  end
@@ -31,8 +31,8 @@ class OpenNlp::Util::Span
31
31
  def ==(obj)
32
32
  return false unless obj.is_a?(self.class)
33
33
 
34
- [:start, :end, :type].each_with_object(true) do |m,res|
35
- res = res && self.public_send(m) == obj.public_send(m)
34
+ [:start, :end, :type].each_with_object(true) do |method, acc|
35
+ acc = acc && self.public_send(method) == obj.public_send(method)
36
36
  end
37
37
  end
38
- end
38
+ end
@@ -1,3 +1,3 @@
1
1
  module OpenNlp
2
- VERSION = '0.0.7'
2
+ VERSION = '0.1.0'
3
3
  end
@@ -1,36 +1,38 @@
1
1
  require 'spec_helper'
2
2
 
3
- describe OpenNlp::Categorizer do
4
- subject { OpenNlp::Categorizer }
5
- let(:model){ OpenNlp::Model::Categorizer.new(File.join(FIXTURES_DIR, "en-doccat.bin")) }
6
-
7
- describe "initialization" do
8
- it "should initialize with a valid model" do
9
- categorizer = subject.new(model)
10
- categorizer.should be_a(subject)
11
- categorizer.j_instance.should be_a(subject.java_class)
3
+ RSpec.describe OpenNlp::Categorizer do
4
+ let(:model) { OpenNlp::Model::Categorizer.new(File.join(FIXTURES_DIR, 'en-doccat.bin')) }
5
+
6
+ describe 'initialization' do
7
+ it 'is initialized with a valid model' do
8
+ categorizer = described_class.new(model)
9
+ expect(categorizer.j_instance).to be_a(described_class.java_class)
12
10
  end
13
11
 
14
- it "should raise an ArgumentError without a valid model" do
15
- lambda { subject.new(nil) }.should raise_error(ArgumentError)
12
+ it 'raises an ArgumentError without a valid model' do
13
+ expect { described_class.new(nil) }.to raise_error(ArgumentError)
16
14
  end
17
15
  end
18
16
 
19
- describe "categorizing a string" do
20
- let(:categorizer) { subject.new(model) }
17
+ describe '#categorize' do
18
+ let(:categorizer) { described_class.new(model) }
19
+
20
+ it 'categorizes a provided document to positive' do
21
+ category = categorizer.categorize('The fox is a good worker.')
22
+ expect(category).to eq('Positive')
23
+ end
21
24
 
22
- it "should categorize a provided document to positive" do
23
- category = categorizer.categorize("The fox is a good worker.")
24
- category.should == "Positive"
25
+ it 'categorizes a provided document to negative' do
26
+ category = categorizer.categorize('Quick brown fox jumps very bad.')
27
+ expect(category).to eq('Negative')
25
28
  end
26
29
 
27
- it "should categorize a provided document to negative" do
28
- category = categorizer.categorize("Quick brown fox jumps very bad.")
29
- category.should == "Negative"
30
+ it 'raises an ArgumentError when nil is passed as a param' do
31
+ expect { categorizer.categorize(nil) }.to raise_error(ArgumentError)
30
32
  end
31
33
 
32
- it "should raise an ArgumentError for a non-string" do
33
- lambda { categorizer.categorize(nil) }.should raise_error(ArgumentError)
34
+ it 'raises an ArgumentError when Fixnum is passed a param' do
35
+ expect { categorizer.categorize(123) }.to raise_error(ArgumentError)
34
36
  end
35
37
  end
36
- end
38
+ end
data/spec/chunker_spec.rb CHANGED
@@ -1,46 +1,47 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
- describe OpenNlp::Chunker do
4
- subject { OpenNlp::Chunker }
3
+ RSpec.describe OpenNlp::Chunker do
4
+ let(:model) { OpenNlp::Model::Chunker.new(File.join(FIXTURES_DIR, 'en-chunker.bin')) }
5
+ let(:token_model) { OpenNlp::Model::Tokenizer.new(File.join(FIXTURES_DIR, 'en-token.bin')) }
6
+ let(:pos_model) { OpenNlp::Model::POSTagger.new(File.join(FIXTURES_DIR, 'en-pos-maxent.bin')) }
7
+ let(:chunker) { described_class.new(model, token_model, pos_model) }
5
8
 
6
- let(:model) { OpenNlp::Model::Chunker.new(File.join(FIXTURES_DIR, "en-chunker.bin")) }
7
- let(:token_model) { OpenNlp::Model::Tokenizer.new(File.join(FIXTURES_DIR, "en-token.bin")) }
8
- let(:pos_model) { OpenNlp::Model::POSTagger.new(File.join(FIXTURES_DIR, "en-pos-maxent.bin")) }
9
-
10
- describe "initialization" do
11
- it "should initialize a new chunker" do
12
- chunker = subject.new(model, token_model, pos_model)
13
- chunker.should be_a(subject)
9
+ describe 'initialization' do
10
+ it 'initializes a new chunker' do
11
+ expect(chunker).to be_a(described_class)
14
12
  end
15
13
 
16
- it "should raise an argument error when no model is supplied" do
17
- lambda { subject.new(nil, nil, nil) }.should raise_error(ArgumentError)
14
+ it 'raises an argument error when no model is specified' do
15
+ expect { subject.new(nil, nil, nil) }.to raise_error(ArgumentError)
18
16
  end
19
17
 
20
- it "should raise an argument error when no token_model is supplied" do
21
- lambda { subject.new(model, nil, nil) }.should raise_error(ArgumentError)
18
+ it 'raises an argument error when no token_model is specified' do
19
+ expect { subject.new(model, nil, nil) }.to raise_error(ArgumentError)
22
20
  end
23
21
 
24
- it "should raise an argument error when no pos_model is supplied" do
25
- lambda { subject.new(model, token_model, nil) }.should raise_error(ArgumentError)
22
+ it 'raises an argument error when no pos_model is specified' do
23
+ expect { subject.new(model, token_model, nil) }.to raise_error(ArgumentError)
26
24
  end
27
25
  end
28
26
 
29
- describe "chunking a string" do
30
- let(:chunker) { subject.new(model, token_model, pos_model) }
31
-
32
- it "should chunk an empty string" do
33
- chunks = chunker.chunk("")
34
- chunks.should == []
27
+ describe 'chunking a string' do
28
+ it 'chunks an empty string' do
29
+ chunks = chunker.chunk('')
30
+ expect(chunks).to eq([])
35
31
  end
36
32
 
37
- it "should chunk a sentence" do
38
- chunks = chunker.chunk("The red fox sleeps soundly.")
39
- chunks.should == [[{"The"=>"DT"}, {"red"=>"JJ"}, {"fox"=>"NN"}, {"sleeps"=>"NNS"}], [{"soundly"=>"RB"}]]
33
+ it 'chunks a sentence' do
34
+ chunks = chunker.chunk('The red fox sleeps soundly.')
35
+ expect(chunks).to eq(
36
+ [
37
+ [{ 'The' => 'DT' }, { 'red' => 'JJ' }, { 'fox' => 'NN' }, { 'sleeps' => 'NNS' }],
38
+ [{ 'soundly' => 'RB' }]
39
+ ]
40
+ )
40
41
  end
41
42
 
42
- it "should raise an error when not passed a string" do
43
- lambda { chunker.chunk(nil) }.should raise_error(ArgumentError)
43
+ it 'raises an error when not passed a string' do
44
+ expect { chunker.chunk(nil) }.to raise_error(ArgumentError)
44
45
  end
45
46
  end
46
47
  end
@@ -1,23 +1,20 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
- describe OpenNlp::Model::Chunker do
4
- subject { OpenNlp::Model::Chunker }
5
- let(:model_file_name) { File.join(FIXTURES_DIR, "en-chunker.bin") }
3
+ RSpec.describe OpenNlp::Model::Chunker do
4
+ let(:model_file_name) { File.join(FIXTURES_DIR, 'en-chunker.bin') }
6
5
 
7
- it "should accept a string filename parameter" do
8
- chunker_model = subject.new(model_file_name)
9
- chunker_model.should be_a(subject)
10
- chunker_model.j_model.should be_a(subject.java_class)
6
+ it 'accepts a string filename parameter' do
7
+ chunker_model = described_class.new(model_file_name)
8
+ expect(chunker_model.j_model).to be_a(described_class.java_class)
11
9
  end
12
10
 
13
- it "should accept a java.io.FileInputStream object" do
11
+ it 'accepts a java.io.FileInputStream object' do
14
12
  file_input_stream = java.io.FileInputStream.new(model_file_name)
15
- chunker_model = subject.new(file_input_stream)
16
- chunker_model.should be_a(subject)
17
- chunker_model.j_model.should be_a(subject.java_class)
13
+ chunker_model = described_class.new(file_input_stream)
14
+ expect(chunker_model.j_model).to be_a(described_class.java_class)
18
15
  end
19
16
 
20
- it "should raise an argument error otherwise" do
21
- lambda { subject.new(nil) }.should raise_error(ArgumentError)
17
+ it 'raises an argument error when nil is passed as a model' do
18
+ expect { described_class.new(nil) }.to raise_error(ArgumentError)
22
19
  end
23
- end
20
+ end
@@ -1,23 +1,20 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
- describe OpenNlp::Model::Detokenizer do
4
- subject { OpenNlp::Model::Detokenizer }
5
- let(:model_file_name) { File.join(FIXTURES_DIR, "en-detokenizer.xml") }
3
+ RSpec.describe OpenNlp::Model::Detokenizer do
4
+ let(:model_file_name) { File.join(FIXTURES_DIR, 'en-detokenizer.xml') }
6
5
 
7
- it "should accept a string filename parameter" do
8
- model = subject.new(model_file_name)
9
- model.should be_a(subject)
10
- model.j_model.should be_a(subject.java_class)
6
+ it 'accepts a string filename parameter' do
7
+ model = described_class.new(model_file_name)
8
+ expect(model.j_model).to be_a(described_class.java_class)
11
9
  end
12
10
 
13
- it "should accept a java.io.FileInputStream object" do
11
+ it 'accepts a java.io.FileInputStream object' do
14
12
  file_input_stream = java.io.FileInputStream.new(model_file_name)
15
- model = subject.new(file_input_stream)
16
- model.should be_a(subject)
17
- model.j_model.should be_a(subject.java_class)
13
+ model = described_class.new(file_input_stream)
14
+ expect(model.j_model).to be_a(described_class.java_class)
18
15
  end
19
16
 
20
- it "should raise an argument error otherwise" do
21
- lambda { subject.new(nil) }.should raise_error(ArgumentError)
17
+ it 'raises an argument error when nil is passed as a model' do
18
+ expect { described_class.new(nil) }.to raise_error(ArgumentError)
22
19
  end
23
20
  end
@@ -1,23 +1,20 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
- describe OpenNlp::Model::NamedEntityDetector do
4
- subject { OpenNlp::Model::NamedEntityDetector }
5
- let(:model_file_name) { File.join(FIXTURES_DIR, "en-ner-time.bin") }
3
+ RSpec.describe OpenNlp::Model::NamedEntityDetector do
4
+ let(:model_file_name) { File.join(FIXTURES_DIR, 'en-ner-time.bin') }
6
5
 
7
- it "should accept a string filename parameter" do
8
- model = subject.new(model_file_name)
9
- model.should be_a(subject)
10
- model.j_model.should be_a(subject.java_class)
6
+ it 'accepts a string filename parameter' do
7
+ model = described_class.new(model_file_name)
8
+ expect(model.j_model).to be_a(described_class.java_class)
11
9
  end
12
10
 
13
- it "should accept a java.io.FileInputStream object" do
11
+ it 'should accept a java.io.FileInputStream object' do
14
12
  file_input_stream = java.io.FileInputStream.new(model_file_name)
15
- model = subject.new(file_input_stream)
16
- model.should be_a(subject)
17
- model.j_model.should be_a(subject.java_class)
13
+ model = described_class.new(file_input_stream)
14
+ expect(model.j_model).to be_a(described_class.java_class)
18
15
  end
19
16
 
20
- it "should raise an argument error otherwise" do
21
- lambda { subject.new(nil) }.should raise_error(ArgumentError)
17
+ it 'raises an argument error when nil is passed as a model' do
18
+ expect { described_class.new(nil) }.to raise_error(ArgumentError)
22
19
  end
23
20
  end
@@ -1,23 +1,20 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
- describe OpenNlp::Model::POSTagger do
4
- subject { OpenNlp::Model::POSTagger }
5
- let(:model_file_name) { File.join(FIXTURES_DIR, "en-pos-maxent.bin") }
3
+ RSpec.describe OpenNlp::Model::POSTagger do
4
+ let(:model_file_name) { File.join(FIXTURES_DIR, 'en-pos-maxent.bin') }
6
5
 
7
- it "should accept a string filename parameter" do
8
- model = subject.new(model_file_name)
9
- model.should be_a(subject)
10
- model.j_model.should be_a(subject.java_class)
6
+ it 'accepts a string filename parameter' do
7
+ model = described_class.new(model_file_name)
8
+ expect(model.j_model).to be_a(described_class.java_class)
11
9
  end
12
10
 
13
- it "should accept a java.io.FileInputStream object" do
11
+ it 'accepts a java.io.FileInputStream object' do
14
12
  file_input_stream = java.io.FileInputStream.new(model_file_name)
15
- model = subject.new(file_input_stream)
16
- model.should be_a(subject)
17
- model.j_model.should be_a(subject.java_class)
13
+ model = described_class.new(file_input_stream)
14
+ expect(model.j_model).to be_a(described_class.java_class)
18
15
  end
19
16
 
20
- it "should raise an argument error otherwise" do
21
- lambda { subject.new(nil) }.should raise_error(ArgumentError)
17
+ it 'raises an argument error when nil is passed as a model' do
18
+ expect { described_class.new(nil) }.to raise_error(ArgumentError)
22
19
  end
23
- end
20
+ end
@@ -1,23 +1,20 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
- describe OpenNlp::Model::SentenceDetector do
4
- subject { OpenNlp::Model::SentenceDetector }
5
- let(:model_file_name) { File.join(FIXTURES_DIR, "en-sent.bin") }
3
+ RSpec.describe OpenNlp::Model::SentenceDetector do
4
+ let(:model_file_name) { File.join(FIXTURES_DIR, 'en-sent.bin') }
6
5
 
7
- it "should accept a string filename parameter" do
8
- model = subject.new(model_file_name)
9
- model.should be_a(subject)
10
- model.j_model.should be_a(subject.java_class)
6
+ it 'accepts a string filename parameter' do
7
+ model = described_class.new(model_file_name)
8
+ expect(model.j_model).to be_a(described_class.java_class)
11
9
  end
12
10
 
13
- it "should accept a java.io.FileInputStream object" do
11
+ it 'accepts a java.io.FileInputStream object' do
14
12
  file_input_stream = java.io.FileInputStream.new(model_file_name)
15
- model = subject.new(file_input_stream)
16
- model.should be_a(subject)
17
- model.j_model.should be_a(subject.java_class)
13
+ model = described_class.new(file_input_stream)
14
+ expect(model.j_model).to be_a(described_class.java_class)
18
15
  end
19
16
 
20
- it "should raise an argument error otherwise" do
21
- lambda { subject.new(nil) }.should raise_error(ArgumentError)
17
+ it 'raises an argument error when nil is passed as a model' do
18
+ expect { described_class.new(nil) }.to raise_error(ArgumentError)
22
19
  end
23
20
  end
@@ -1,23 +1,20 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
3
  describe OpenNlp::Model::Tokenizer do
4
- subject { OpenNlp::Model::Tokenizer }
5
- let(:model_file_name) { File.join(FIXTURES_DIR, "en-token.bin") }
4
+ let(:model_file_name) { File.join(FIXTURES_DIR, 'en-token.bin') }
6
5
 
7
- it "should accept a string filename parameter" do
8
- model = subject.new(model_file_name)
9
- model.should be_a(subject)
10
- model.j_model.should be_a(subject.java_class)
6
+ it 'accept a string filename parameter' do
7
+ model = described_class.new(model_file_name)
8
+ expect(model.j_model).to be_a(described_class.java_class)
11
9
  end
12
10
 
13
- it "should accept a java.io.FileInputStream object" do
11
+ it 'should accept a java.io.FileInputStream object' do
14
12
  file_input_stream = java.io.FileInputStream.new(model_file_name)
15
- model = subject.new(file_input_stream)
16
- model.should be_a(subject)
17
- model.j_model.should be_a(subject.java_class)
13
+ model = described_class.new(file_input_stream)
14
+ expect(model.j_model).to be_a(described_class.java_class)
18
15
  end
19
16
 
20
- it "should raise an argument error otherwise" do
21
- lambda { subject.new(nil) }.should raise_error(ArgumentError)
17
+ it 'raises an argument error when nil is passed as a model' do
18
+ expect { described_class.new(nil) }.to raise_error(ArgumentError)
22
19
  end
23
- end
20
+ end
@@ -1,42 +1,43 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
- describe OpenNlp::NamedEntityDetector do
4
- subject { OpenNlp::NamedEntityDetector }
5
-
6
- let(:model) { OpenNlp::Model::NamedEntityDetector.new(File.join(FIXTURES_DIR, "en-ner-time.bin")) }
3
+ RSpec.describe OpenNlp::NamedEntityDetector do
4
+ let(:model) { OpenNlp::Model::NamedEntityDetector.new(File.join(FIXTURES_DIR, 'en-ner-time.bin')) }
5
+ let(:ne_detector) { described_class.new(model) }
7
6
 
8
- describe "initialization" do
9
- it "should initialize with a valid model" do
10
- ne_detector = subject.new(model)
11
- ne_detector.should be_a(subject)
7
+ describe 'initialization' do
8
+ it 'initializes with a valid model' do
9
+ expect(ne_detector.j_instance).to be_a(described_class.java_class)
12
10
  end
13
11
 
14
- it "should raise an ArgumentError otherwise" do
15
- lambda { subject.new(nil) }.should raise_error(ArgumentError)
12
+ it 'raises an ArgumentError otherwise' do
13
+ expect { subject.new(nil) }.to raise_error(ArgumentError)
16
14
  end
17
15
  end
18
16
 
19
- describe "detection" do
20
- let(:ne_detector) { subject.new(model) }
21
-
22
- it "should detect nothing in an empty sentence" do
17
+ describe '#detect' do
18
+ it 'detects nothing for empty sentence' do
23
19
  spans = ne_detector.detect([])
24
- spans.should be_a(Array)
25
- spans.length.should == 0
20
+ expect(spans).to eq([])
21
+ end
22
+
23
+ it 'detects the named entities' do
24
+ spans = ne_detector.detect(['The', 'time', 'is', '10', ':', '23', 'am'])
25
+ expect(spans.size).to eq(1)
26
+ expect(spans.first).to be_a(Java::opennlp.tools.util.Span)
27
+ expect(spans.first.getStart).to eq(3)
28
+ expect(spans.first.getEnd).to eq(7)
29
+ end
30
+
31
+ it 'raises an error if nil is passed as an argument' do
32
+ expect { ne_detector.detect(nil) }.to raise_error(ArgumentError)
26
33
  end
27
34
 
28
- it "should detect the named entities" do
29
- spans = ne_detector.detect(["The", "time", "is", "10", ":", "23", "am"])
30
- spans.should be_a(Array)
31
- spans[0].should be_a(Java::opennlp.tools.util.Span)
32
- spans[0].getStart.should == 3
33
- spans[0].getEnd.should == 7
35
+ it 'raises an error if string is passed as an argument' do
36
+ expect { ne_detector.detect('str') }.to raise_error(ArgumentError)
34
37
  end
35
38
 
36
- it "should raise an error if anything but an array is passed" do
37
- lambda { ne_detector.detect(nil) }.should raise_error(ArgumentError)
38
- lambda { ne_detector.detect('str') }.should raise_error(ArgumentError)
39
- lambda { ne_detector.detect(111) }.should raise_error(ArgumentError)
39
+ it 'raises an error if fixnum is passed as an argument' do
40
+ expect { ne_detector.detect(111) }.to raise_error(ArgumentError)
40
41
  end
41
42
  end
42
43
  end