open_nlp 0.0.7-java → 0.1.0-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +2 -2
  3. data/.ruby-version +1 -0
  4. data/.travis.yml +6 -0
  5. data/Gemfile.lock +31 -0
  6. data/README.md +8 -1
  7. data/lib/open_nlp.rb +3 -3
  8. data/lib/open_nlp/categorizer.rb +7 -3
  9. data/lib/open_nlp/chunker.rb +19 -8
  10. data/lib/open_nlp/model.rb +13 -9
  11. data/lib/open_nlp/named_entity_detector.rb +6 -2
  12. data/lib/open_nlp/opennlp-maxent-3.0.3.jar +0 -0
  13. data/lib/open_nlp/opennlp-tools-1.5.3.jar +0 -0
  14. data/lib/open_nlp/parser.rb +43 -33
  15. data/lib/open_nlp/parser/parse.rb +12 -21
  16. data/lib/open_nlp/pos_tagger.rb +5 -2
  17. data/lib/open_nlp/sentence_detector.rb +16 -6
  18. data/lib/open_nlp/tokenizer.rb +8 -3
  19. data/lib/open_nlp/tool.rb +1 -1
  20. data/lib/open_nlp/util.rb +1 -2
  21. data/lib/open_nlp/util/span.rb +5 -5
  22. data/lib/open_nlp/version.rb +1 -1
  23. data/spec/categorizer_spec.rb +24 -22
  24. data/spec/chunker_spec.rb +29 -28
  25. data/spec/model/chunker_spec.rb +12 -15
  26. data/spec/model/detokenizer_spec.rb +11 -14
  27. data/spec/model/named_entity_detector_spec.rb +11 -14
  28. data/spec/model/pos_tagger_spec.rb +12 -15
  29. data/spec/model/sentence_detector_spec.rb +11 -14
  30. data/spec/model/tokenizer_spec.rb +11 -14
  31. data/spec/named_entity_detector_spec.rb +28 -27
  32. data/spec/parser/parse_spec.rb +64 -56
  33. data/spec/parser_spec.rb +26 -21
  34. data/spec/pos_tagger_spec.rb +22 -23
  35. data/spec/sentence_detector_spec.rb +39 -30
  36. data/spec/spec_helper.rb +1 -1
  37. data/spec/tokenizer_spec.rb +26 -22
  38. metadata +16 -17
  39. data/lib/open_nlp/opennlp-maxent-3.0.2-incubating.jar +0 -0
  40. data/lib/open_nlp/opennlp-tools-1.5.2-incubating.jar +0 -0
data/lib/open_nlp/tool.rb CHANGED
@@ -5,7 +5,7 @@ module OpenNlp
5
5
  attr_reader :j_instance
6
6
 
7
7
  def initialize(model)
8
- raise ArgumentError, "model must be an OpenNlp::Model" unless model.is_a?(OpenNlp::Model)
8
+ fail ArgumentError, 'model must be an OpenNlp::Model' unless model.is_a?(OpenNlp::Model)
9
9
  @j_instance = self.class.java_class.new(model.j_model)
10
10
  end
11
11
  end
data/lib/open_nlp/util.rb CHANGED
@@ -1,5 +1,4 @@
1
1
  module OpenNlp
2
2
  module Util
3
-
4
3
  end
5
- end
4
+ end
@@ -6,8 +6,8 @@ class OpenNlp::Util::Span
6
6
  attr_reader :j_instance
7
7
 
8
8
  def initialize(s, e)
9
- raise ArgumentError, "s should be an integer" unless s.is_a?(Fixnum)
10
- raise ArgumentError, "e should be an integer" unless e.is_a?(Fixnum)
9
+ fail ArgumentError, 's should be an integer' unless s.is_a?(Fixnum)
10
+ fail ArgumentError, 'e should be an integer' unless e.is_a?(Fixnum)
11
11
 
12
12
  @j_instance = self.class.java_class.new(s, e)
13
13
  end
@@ -31,8 +31,8 @@ class OpenNlp::Util::Span
31
31
  def ==(obj)
32
32
  return false unless obj.is_a?(self.class)
33
33
 
34
- [:start, :end, :type].each_with_object(true) do |m,res|
35
- res = res && self.public_send(m) == obj.public_send(m)
34
+ [:start, :end, :type].each_with_object(true) do |method, acc|
35
+ acc = acc && self.public_send(method) == obj.public_send(method)
36
36
  end
37
37
  end
38
- end
38
+ end
@@ -1,3 +1,3 @@
1
1
  module OpenNlp
2
- VERSION = '0.0.7'
2
+ VERSION = '0.1.0'
3
3
  end
@@ -1,36 +1,38 @@
1
1
  require 'spec_helper'
2
2
 
3
- describe OpenNlp::Categorizer do
4
- subject { OpenNlp::Categorizer }
5
- let(:model){ OpenNlp::Model::Categorizer.new(File.join(FIXTURES_DIR, "en-doccat.bin")) }
6
-
7
- describe "initialization" do
8
- it "should initialize with a valid model" do
9
- categorizer = subject.new(model)
10
- categorizer.should be_a(subject)
11
- categorizer.j_instance.should be_a(subject.java_class)
3
+ RSpec.describe OpenNlp::Categorizer do
4
+ let(:model) { OpenNlp::Model::Categorizer.new(File.join(FIXTURES_DIR, 'en-doccat.bin')) }
5
+
6
+ describe 'initialization' do
7
+ it 'is initialized with a valid model' do
8
+ categorizer = described_class.new(model)
9
+ expect(categorizer.j_instance).to be_a(described_class.java_class)
12
10
  end
13
11
 
14
- it "should raise an ArgumentError without a valid model" do
15
- lambda { subject.new(nil) }.should raise_error(ArgumentError)
12
+ it 'raises an ArgumentError without a valid model' do
13
+ expect { described_class.new(nil) }.to raise_error(ArgumentError)
16
14
  end
17
15
  end
18
16
 
19
- describe "categorizing a string" do
20
- let(:categorizer) { subject.new(model) }
17
+ describe '#categorize' do
18
+ let(:categorizer) { described_class.new(model) }
19
+
20
+ it 'categorizes a provided document to positive' do
21
+ category = categorizer.categorize('The fox is a good worker.')
22
+ expect(category).to eq('Positive')
23
+ end
21
24
 
22
- it "should categorize a provided document to positive" do
23
- category = categorizer.categorize("The fox is a good worker.")
24
- category.should == "Positive"
25
+ it 'categorizes a provided document to negative' do
26
+ category = categorizer.categorize('Quick brown fox jumps very bad.')
27
+ expect(category).to eq('Negative')
25
28
  end
26
29
 
27
- it "should categorize a provided document to negative" do
28
- category = categorizer.categorize("Quick brown fox jumps very bad.")
29
- category.should == "Negative"
30
+ it 'raises an ArgumentError when nil is passed as a param' do
31
+ expect { categorizer.categorize(nil) }.to raise_error(ArgumentError)
30
32
  end
31
33
 
32
- it "should raise an ArgumentError for a non-string" do
33
- lambda { categorizer.categorize(nil) }.should raise_error(ArgumentError)
34
+ it 'raises an ArgumentError when Fixnum is passed a param' do
35
+ expect { categorizer.categorize(123) }.to raise_error(ArgumentError)
34
36
  end
35
37
  end
36
- end
38
+ end
data/spec/chunker_spec.rb CHANGED
@@ -1,46 +1,47 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
- describe OpenNlp::Chunker do
4
- subject { OpenNlp::Chunker }
3
+ RSpec.describe OpenNlp::Chunker do
4
+ let(:model) { OpenNlp::Model::Chunker.new(File.join(FIXTURES_DIR, 'en-chunker.bin')) }
5
+ let(:token_model) { OpenNlp::Model::Tokenizer.new(File.join(FIXTURES_DIR, 'en-token.bin')) }
6
+ let(:pos_model) { OpenNlp::Model::POSTagger.new(File.join(FIXTURES_DIR, 'en-pos-maxent.bin')) }
7
+ let(:chunker) { described_class.new(model, token_model, pos_model) }
5
8
 
6
- let(:model) { OpenNlp::Model::Chunker.new(File.join(FIXTURES_DIR, "en-chunker.bin")) }
7
- let(:token_model) { OpenNlp::Model::Tokenizer.new(File.join(FIXTURES_DIR, "en-token.bin")) }
8
- let(:pos_model) { OpenNlp::Model::POSTagger.new(File.join(FIXTURES_DIR, "en-pos-maxent.bin")) }
9
-
10
- describe "initialization" do
11
- it "should initialize a new chunker" do
12
- chunker = subject.new(model, token_model, pos_model)
13
- chunker.should be_a(subject)
9
+ describe 'initialization' do
10
+ it 'initializes a new chunker' do
11
+ expect(chunker).to be_a(described_class)
14
12
  end
15
13
 
16
- it "should raise an argument error when no model is supplied" do
17
- lambda { subject.new(nil, nil, nil) }.should raise_error(ArgumentError)
14
+ it 'raises an argument error when no model is specified' do
15
+ expect { subject.new(nil, nil, nil) }.to raise_error(ArgumentError)
18
16
  end
19
17
 
20
- it "should raise an argument error when no token_model is supplied" do
21
- lambda { subject.new(model, nil, nil) }.should raise_error(ArgumentError)
18
+ it 'raises an argument error when no token_model is specified' do
19
+ expect { subject.new(model, nil, nil) }.to raise_error(ArgumentError)
22
20
  end
23
21
 
24
- it "should raise an argument error when no pos_model is supplied" do
25
- lambda { subject.new(model, token_model, nil) }.should raise_error(ArgumentError)
22
+ it 'raises an argument error when no pos_model is specified' do
23
+ expect { subject.new(model, token_model, nil) }.to raise_error(ArgumentError)
26
24
  end
27
25
  end
28
26
 
29
- describe "chunking a string" do
30
- let(:chunker) { subject.new(model, token_model, pos_model) }
31
-
32
- it "should chunk an empty string" do
33
- chunks = chunker.chunk("")
34
- chunks.should == []
27
+ describe 'chunking a string' do
28
+ it 'chunks an empty string' do
29
+ chunks = chunker.chunk('')
30
+ expect(chunks).to eq([])
35
31
  end
36
32
 
37
- it "should chunk a sentence" do
38
- chunks = chunker.chunk("The red fox sleeps soundly.")
39
- chunks.should == [[{"The"=>"DT"}, {"red"=>"JJ"}, {"fox"=>"NN"}, {"sleeps"=>"NNS"}], [{"soundly"=>"RB"}]]
33
+ it 'chunks a sentence' do
34
+ chunks = chunker.chunk('The red fox sleeps soundly.')
35
+ expect(chunks).to eq(
36
+ [
37
+ [{ 'The' => 'DT' }, { 'red' => 'JJ' }, { 'fox' => 'NN' }, { 'sleeps' => 'NNS' }],
38
+ [{ 'soundly' => 'RB' }]
39
+ ]
40
+ )
40
41
  end
41
42
 
42
- it "should raise an error when not passed a string" do
43
- lambda { chunker.chunk(nil) }.should raise_error(ArgumentError)
43
+ it 'raises an error when not passed a string' do
44
+ expect { chunker.chunk(nil) }.to raise_error(ArgumentError)
44
45
  end
45
46
  end
46
47
  end
@@ -1,23 +1,20 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
- describe OpenNlp::Model::Chunker do
4
- subject { OpenNlp::Model::Chunker }
5
- let(:model_file_name) { File.join(FIXTURES_DIR, "en-chunker.bin") }
3
+ RSpec.describe OpenNlp::Model::Chunker do
4
+ let(:model_file_name) { File.join(FIXTURES_DIR, 'en-chunker.bin') }
6
5
 
7
- it "should accept a string filename parameter" do
8
- chunker_model = subject.new(model_file_name)
9
- chunker_model.should be_a(subject)
10
- chunker_model.j_model.should be_a(subject.java_class)
6
+ it 'accepts a string filename parameter' do
7
+ chunker_model = described_class.new(model_file_name)
8
+ expect(chunker_model.j_model).to be_a(described_class.java_class)
11
9
  end
12
10
 
13
- it "should accept a java.io.FileInputStream object" do
11
+ it 'accepts a java.io.FileInputStream object' do
14
12
  file_input_stream = java.io.FileInputStream.new(model_file_name)
15
- chunker_model = subject.new(file_input_stream)
16
- chunker_model.should be_a(subject)
17
- chunker_model.j_model.should be_a(subject.java_class)
13
+ chunker_model = described_class.new(file_input_stream)
14
+ expect(chunker_model.j_model).to be_a(described_class.java_class)
18
15
  end
19
16
 
20
- it "should raise an argument error otherwise" do
21
- lambda { subject.new(nil) }.should raise_error(ArgumentError)
17
+ it 'raises an argument error when nil is passed as a model' do
18
+ expect { described_class.new(nil) }.to raise_error(ArgumentError)
22
19
  end
23
- end
20
+ end
@@ -1,23 +1,20 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
- describe OpenNlp::Model::Detokenizer do
4
- subject { OpenNlp::Model::Detokenizer }
5
- let(:model_file_name) { File.join(FIXTURES_DIR, "en-detokenizer.xml") }
3
+ RSpec.describe OpenNlp::Model::Detokenizer do
4
+ let(:model_file_name) { File.join(FIXTURES_DIR, 'en-detokenizer.xml') }
6
5
 
7
- it "should accept a string filename parameter" do
8
- model = subject.new(model_file_name)
9
- model.should be_a(subject)
10
- model.j_model.should be_a(subject.java_class)
6
+ it 'accepts a string filename parameter' do
7
+ model = described_class.new(model_file_name)
8
+ expect(model.j_model).to be_a(described_class.java_class)
11
9
  end
12
10
 
13
- it "should accept a java.io.FileInputStream object" do
11
+ it 'accepts a java.io.FileInputStream object' do
14
12
  file_input_stream = java.io.FileInputStream.new(model_file_name)
15
- model = subject.new(file_input_stream)
16
- model.should be_a(subject)
17
- model.j_model.should be_a(subject.java_class)
13
+ model = described_class.new(file_input_stream)
14
+ expect(model.j_model).to be_a(described_class.java_class)
18
15
  end
19
16
 
20
- it "should raise an argument error otherwise" do
21
- lambda { subject.new(nil) }.should raise_error(ArgumentError)
17
+ it 'raises an argument error when nil is passed as a model' do
18
+ expect { described_class.new(nil) }.to raise_error(ArgumentError)
22
19
  end
23
20
  end
@@ -1,23 +1,20 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
- describe OpenNlp::Model::NamedEntityDetector do
4
- subject { OpenNlp::Model::NamedEntityDetector }
5
- let(:model_file_name) { File.join(FIXTURES_DIR, "en-ner-time.bin") }
3
+ RSpec.describe OpenNlp::Model::NamedEntityDetector do
4
+ let(:model_file_name) { File.join(FIXTURES_DIR, 'en-ner-time.bin') }
6
5
 
7
- it "should accept a string filename parameter" do
8
- model = subject.new(model_file_name)
9
- model.should be_a(subject)
10
- model.j_model.should be_a(subject.java_class)
6
+ it 'accepts a string filename parameter' do
7
+ model = described_class.new(model_file_name)
8
+ expect(model.j_model).to be_a(described_class.java_class)
11
9
  end
12
10
 
13
- it "should accept a java.io.FileInputStream object" do
11
+ it 'should accept a java.io.FileInputStream object' do
14
12
  file_input_stream = java.io.FileInputStream.new(model_file_name)
15
- model = subject.new(file_input_stream)
16
- model.should be_a(subject)
17
- model.j_model.should be_a(subject.java_class)
13
+ model = described_class.new(file_input_stream)
14
+ expect(model.j_model).to be_a(described_class.java_class)
18
15
  end
19
16
 
20
- it "should raise an argument error otherwise" do
21
- lambda { subject.new(nil) }.should raise_error(ArgumentError)
17
+ it 'raises an argument error when nil is passed as a model' do
18
+ expect { described_class.new(nil) }.to raise_error(ArgumentError)
22
19
  end
23
20
  end
@@ -1,23 +1,20 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
- describe OpenNlp::Model::POSTagger do
4
- subject { OpenNlp::Model::POSTagger }
5
- let(:model_file_name) { File.join(FIXTURES_DIR, "en-pos-maxent.bin") }
3
+ RSpec.describe OpenNlp::Model::POSTagger do
4
+ let(:model_file_name) { File.join(FIXTURES_DIR, 'en-pos-maxent.bin') }
6
5
 
7
- it "should accept a string filename parameter" do
8
- model = subject.new(model_file_name)
9
- model.should be_a(subject)
10
- model.j_model.should be_a(subject.java_class)
6
+ it 'accepts a string filename parameter' do
7
+ model = described_class.new(model_file_name)
8
+ expect(model.j_model).to be_a(described_class.java_class)
11
9
  end
12
10
 
13
- it "should accept a java.io.FileInputStream object" do
11
+ it 'accepts a java.io.FileInputStream object' do
14
12
  file_input_stream = java.io.FileInputStream.new(model_file_name)
15
- model = subject.new(file_input_stream)
16
- model.should be_a(subject)
17
- model.j_model.should be_a(subject.java_class)
13
+ model = described_class.new(file_input_stream)
14
+ expect(model.j_model).to be_a(described_class.java_class)
18
15
  end
19
16
 
20
- it "should raise an argument error otherwise" do
21
- lambda { subject.new(nil) }.should raise_error(ArgumentError)
17
+ it 'raises an argument error when nil is passed as a model' do
18
+ expect { described_class.new(nil) }.to raise_error(ArgumentError)
22
19
  end
23
- end
20
+ end
@@ -1,23 +1,20 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
- describe OpenNlp::Model::SentenceDetector do
4
- subject { OpenNlp::Model::SentenceDetector }
5
- let(:model_file_name) { File.join(FIXTURES_DIR, "en-sent.bin") }
3
+ RSpec.describe OpenNlp::Model::SentenceDetector do
4
+ let(:model_file_name) { File.join(FIXTURES_DIR, 'en-sent.bin') }
6
5
 
7
- it "should accept a string filename parameter" do
8
- model = subject.new(model_file_name)
9
- model.should be_a(subject)
10
- model.j_model.should be_a(subject.java_class)
6
+ it 'accepts a string filename parameter' do
7
+ model = described_class.new(model_file_name)
8
+ expect(model.j_model).to be_a(described_class.java_class)
11
9
  end
12
10
 
13
- it "should accept a java.io.FileInputStream object" do
11
+ it 'accepts a java.io.FileInputStream object' do
14
12
  file_input_stream = java.io.FileInputStream.new(model_file_name)
15
- model = subject.new(file_input_stream)
16
- model.should be_a(subject)
17
- model.j_model.should be_a(subject.java_class)
13
+ model = described_class.new(file_input_stream)
14
+ expect(model.j_model).to be_a(described_class.java_class)
18
15
  end
19
16
 
20
- it "should raise an argument error otherwise" do
21
- lambda { subject.new(nil) }.should raise_error(ArgumentError)
17
+ it 'raises an argument error when nil is passed as a model' do
18
+ expect { described_class.new(nil) }.to raise_error(ArgumentError)
22
19
  end
23
20
  end
@@ -1,23 +1,20 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
3
  describe OpenNlp::Model::Tokenizer do
4
- subject { OpenNlp::Model::Tokenizer }
5
- let(:model_file_name) { File.join(FIXTURES_DIR, "en-token.bin") }
4
+ let(:model_file_name) { File.join(FIXTURES_DIR, 'en-token.bin') }
6
5
 
7
- it "should accept a string filename parameter" do
8
- model = subject.new(model_file_name)
9
- model.should be_a(subject)
10
- model.j_model.should be_a(subject.java_class)
6
+ it 'accept a string filename parameter' do
7
+ model = described_class.new(model_file_name)
8
+ expect(model.j_model).to be_a(described_class.java_class)
11
9
  end
12
10
 
13
- it "should accept a java.io.FileInputStream object" do
11
+ it 'should accept a java.io.FileInputStream object' do
14
12
  file_input_stream = java.io.FileInputStream.new(model_file_name)
15
- model = subject.new(file_input_stream)
16
- model.should be_a(subject)
17
- model.j_model.should be_a(subject.java_class)
13
+ model = described_class.new(file_input_stream)
14
+ expect(model.j_model).to be_a(described_class.java_class)
18
15
  end
19
16
 
20
- it "should raise an argument error otherwise" do
21
- lambda { subject.new(nil) }.should raise_error(ArgumentError)
17
+ it 'raises an argument error when nil is passed as a model' do
18
+ expect { described_class.new(nil) }.to raise_error(ArgumentError)
22
19
  end
23
- end
20
+ end
@@ -1,42 +1,43 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
- describe OpenNlp::NamedEntityDetector do
4
- subject { OpenNlp::NamedEntityDetector }
5
-
6
- let(:model) { OpenNlp::Model::NamedEntityDetector.new(File.join(FIXTURES_DIR, "en-ner-time.bin")) }
3
+ RSpec.describe OpenNlp::NamedEntityDetector do
4
+ let(:model) { OpenNlp::Model::NamedEntityDetector.new(File.join(FIXTURES_DIR, 'en-ner-time.bin')) }
5
+ let(:ne_detector) { described_class.new(model) }
7
6
 
8
- describe "initialization" do
9
- it "should initialize with a valid model" do
10
- ne_detector = subject.new(model)
11
- ne_detector.should be_a(subject)
7
+ describe 'initialization' do
8
+ it 'initializes with a valid model' do
9
+ expect(ne_detector.j_instance).to be_a(described_class.java_class)
12
10
  end
13
11
 
14
- it "should raise an ArgumentError otherwise" do
15
- lambda { subject.new(nil) }.should raise_error(ArgumentError)
12
+ it 'raises an ArgumentError otherwise' do
13
+ expect { subject.new(nil) }.to raise_error(ArgumentError)
16
14
  end
17
15
  end
18
16
 
19
- describe "detection" do
20
- let(:ne_detector) { subject.new(model) }
21
-
22
- it "should detect nothing in an empty sentence" do
17
+ describe '#detect' do
18
+ it 'detects nothing for empty sentence' do
23
19
  spans = ne_detector.detect([])
24
- spans.should be_a(Array)
25
- spans.length.should == 0
20
+ expect(spans).to eq([])
21
+ end
22
+
23
+ it 'detects the named entities' do
24
+ spans = ne_detector.detect(['The', 'time', 'is', '10', ':', '23', 'am'])
25
+ expect(spans.size).to eq(1)
26
+ expect(spans.first).to be_a(Java::opennlp.tools.util.Span)
27
+ expect(spans.first.getStart).to eq(3)
28
+ expect(spans.first.getEnd).to eq(7)
29
+ end
30
+
31
+ it 'raises an error if nil is passed as an argument' do
32
+ expect { ne_detector.detect(nil) }.to raise_error(ArgumentError)
26
33
  end
27
34
 
28
- it "should detect the named entities" do
29
- spans = ne_detector.detect(["The", "time", "is", "10", ":", "23", "am"])
30
- spans.should be_a(Array)
31
- spans[0].should be_a(Java::opennlp.tools.util.Span)
32
- spans[0].getStart.should == 3
33
- spans[0].getEnd.should == 7
35
+ it 'raises an error if string is passed as an argument' do
36
+ expect { ne_detector.detect('str') }.to raise_error(ArgumentError)
34
37
  end
35
38
 
36
- it "should raise an error if anything but an array is passed" do
37
- lambda { ne_detector.detect(nil) }.should raise_error(ArgumentError)
38
- lambda { ne_detector.detect('str') }.should raise_error(ArgumentError)
39
- lambda { ne_detector.detect(111) }.should raise_error(ArgumentError)
39
+ it 'raises an error if fixnum is passed as an argument' do
40
+ expect { ne_detector.detect(111) }.to raise_error(ArgumentError)
40
41
  end
41
42
  end
42
43
  end