open_nlp 0.0.7-java → 0.1.0-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +2 -2
  3. data/.ruby-version +1 -0
  4. data/.travis.yml +6 -0
  5. data/Gemfile.lock +31 -0
  6. data/README.md +8 -1
  7. data/lib/open_nlp.rb +3 -3
  8. data/lib/open_nlp/categorizer.rb +7 -3
  9. data/lib/open_nlp/chunker.rb +19 -8
  10. data/lib/open_nlp/model.rb +13 -9
  11. data/lib/open_nlp/named_entity_detector.rb +6 -2
  12. data/lib/open_nlp/opennlp-maxent-3.0.3.jar +0 -0
  13. data/lib/open_nlp/opennlp-tools-1.5.3.jar +0 -0
  14. data/lib/open_nlp/parser.rb +43 -33
  15. data/lib/open_nlp/parser/parse.rb +12 -21
  16. data/lib/open_nlp/pos_tagger.rb +5 -2
  17. data/lib/open_nlp/sentence_detector.rb +16 -6
  18. data/lib/open_nlp/tokenizer.rb +8 -3
  19. data/lib/open_nlp/tool.rb +1 -1
  20. data/lib/open_nlp/util.rb +1 -2
  21. data/lib/open_nlp/util/span.rb +5 -5
  22. data/lib/open_nlp/version.rb +1 -1
  23. data/spec/categorizer_spec.rb +24 -22
  24. data/spec/chunker_spec.rb +29 -28
  25. data/spec/model/chunker_spec.rb +12 -15
  26. data/spec/model/detokenizer_spec.rb +11 -14
  27. data/spec/model/named_entity_detector_spec.rb +11 -14
  28. data/spec/model/pos_tagger_spec.rb +12 -15
  29. data/spec/model/sentence_detector_spec.rb +11 -14
  30. data/spec/model/tokenizer_spec.rb +11 -14
  31. data/spec/named_entity_detector_spec.rb +28 -27
  32. data/spec/parser/parse_spec.rb +64 -56
  33. data/spec/parser_spec.rb +26 -21
  34. data/spec/pos_tagger_spec.rb +22 -23
  35. data/spec/sentence_detector_spec.rb +39 -30
  36. data/spec/spec_helper.rb +1 -1
  37. data/spec/tokenizer_spec.rb +26 -22
  38. metadata +16 -17
  39. data/lib/open_nlp/opennlp-maxent-3.0.2-incubating.jar +0 -0
  40. data/lib/open_nlp/opennlp-tools-1.5.2-incubating.jar +0 -0
@@ -1,13 +1,13 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
- describe OpenNlp::Parser::Parse do
4
- subject { OpenNlp::Parser::Parse }
3
+ RSpec.describe OpenNlp::Parser::Parse do
5
4
  let(:text) { 'The red fox sleeps soundly .' }
6
- let(:model) { OpenNlp::Model::Parser.new(File.join(FIXTURES_DIR, "en-parser-chunking.bin")) }
7
- let(:token_model) { OpenNlp::Model::Tokenizer.new(File.join(FIXTURES_DIR, "en-token.bin")) }
5
+ let(:model) { OpenNlp::Model::Parser.new(File.join(FIXTURES_DIR, 'en-parser-chunking.bin')) }
6
+ let(:token_model) { OpenNlp::Model::Tokenizer.new(File.join(FIXTURES_DIR, 'en-token.bin')) }
7
+ let(:parser) { OpenNlp::Parser.new(model, token_model) }
8
8
 
9
- describe "initialization" do
10
- it "should initialize a new parse object" do
9
+ describe 'initialization' do
10
+ it 'initializes a new parse object' do
11
11
  j_parse = Java::opennlp.tools.parser.Parse.new(
12
12
  text.to_java(:String),
13
13
  Java::opennlp.tools.util.Span.new(0, text.size),
@@ -16,91 +16,99 @@ describe OpenNlp::Parser::Parse do
16
16
  0.to_java(:Integer)
17
17
  )
18
18
 
19
- subject.new(j_parse).should be_a(subject)
19
+ parse = described_class.new(j_parse)
20
+ expect(parse.j_instance).to be_a(described_class.java_class)
20
21
  end
21
22
 
22
- it "should raise an argument error when no model is supplied" do
23
- lambda { subject.new(nil) }.should raise_error(ArgumentError)
23
+ it 'raises an argument error when nil is passed as an argumenr' do
24
+ expect { described_class.new(nil) }.to raise_error(ArgumentError)
24
25
  end
25
26
  end
26
27
 
27
- describe "#tree_bank_string" do
28
- it "returns proper string value for parsed text" do
29
- parser = OpenNlp::Parser.new(model, token_model)
30
- expected = parser.parse(text).tree_bank_string
31
- expected.should == "(TOP (S (NP (DT The) (JJ red) (NN fox)) (VP (VBZ sleeps) (ADVP (RB soundly))) (. .)))"
28
+ describe '#tree_bank_string' do
29
+ let(:expected_tree_bank_str) do
30
+ '(TOP (S (NP (DT The) (JJ red) (NN fox)) (VP (VBZ sleeps) (ADVP (RB soundly))) (. .)))'
31
+ end
32
+
33
+ it 'returns proper string value for parsed text' do
34
+ tree_bank_string = parser.parse(text).tree_bank_string
35
+ expect(tree_bank_string).to eq(expected_tree_bank_str)
32
36
  end
33
37
  end
34
38
 
35
- describe "#code_tree" do
36
- it "returns proper structure for parsed text" do
37
- parser = OpenNlp::Parser.new(model, token_model)
38
- parser.parse(text).code_tree.should == [
39
+ describe '#code_tree' do
40
+ let(:expected_code_tree) do
41
+ [
39
42
  {
40
- :type => "S",
41
- :parent_type => "TOP",
42
- :token => "The red fox sleeps soundly .",
43
+ :type => 'S',
44
+ :parent_type => 'TOP',
45
+ :token => 'The red fox sleeps soundly .',
43
46
  :children => [
44
47
  {
45
- :type => "NP",
46
- :parent_type => "S",
47
- :token => "The red fox",
48
+ :type => 'NP',
49
+ :parent_type => 'S',
50
+ :token => 'The red fox',
48
51
  :children => [
49
52
  {
50
- :type => "DT",
51
- :parent_type => "NP",
52
- :token => "The",
53
- :children => [{:type => "TK", :parent_type => "DT", :token => "The"}]
53
+ :type => 'DT',
54
+ :parent_type => 'NP',
55
+ :token => 'The',
56
+ :children => [{:type => 'TK', :parent_type => 'DT', :token => 'The'}]
54
57
  },
55
58
  {
56
- :type => "JJ",
57
- :parent_type => "NP",
58
- :token => "red",
59
- :children => [{:type => "TK", :parent_type => "JJ", :token => "red"}]
59
+ :type => 'JJ',
60
+ :parent_type => 'NP',
61
+ :token => 'red',
62
+ :children => [{:type => 'TK', :parent_type => 'JJ', :token => 'red'}]
60
63
  },
61
64
  {
62
- :type => "NN",
63
- :parent_type => "NP",
64
- :token => "fox",
65
- :children => [{:type => "TK", :parent_type => "NN", :token => "fox"}]
65
+ :type => 'NN',
66
+ :parent_type => 'NP',
67
+ :token => 'fox',
68
+ :children => [{:type => 'TK', :parent_type => 'NN', :token => 'fox'}]
66
69
  }
67
70
  ]
68
71
  },
69
72
  {
70
- :type => "VP",
71
- :parent_type => "S",
72
- :token => "sleeps soundly",
73
+ :type => 'VP',
74
+ :parent_type => 'S',
75
+ :token => 'sleeps soundly',
73
76
  :children => [
74
77
  {
75
- :type => "VBZ",
76
- :parent_type => "VP",
77
- :token => "sleeps",
78
- :children => [{:type => "TK", :parent_type => "VBZ", :token => "sleeps"}]
78
+ :type => 'VBZ',
79
+ :parent_type => 'VP',
80
+ :token => 'sleeps',
81
+ :children => [{:type => 'TK', :parent_type => 'VBZ', :token => 'sleeps'}]
79
82
  },
80
83
  {
81
- :type => "ADVP",
82
- :parent_type => "VP",
83
- :token => "soundly",
84
+ :type => 'ADVP',
85
+ :parent_type => 'VP',
86
+ :token => 'soundly',
84
87
  :children => [
85
88
  {
86
- :type => "RB",
87
- :parent_type => "ADVP",
88
- :token => "soundly",
89
- :children => [{:type => "TK", :parent_type => "RB", :token => "soundly"}]
89
+ :type => 'RB',
90
+ :parent_type => 'ADVP',
91
+ :token => 'soundly',
92
+ :children => [{:type => 'TK', :parent_type => 'RB', :token => 'soundly'}]
90
93
  }
91
94
  ]
92
95
  }
93
96
  ]
94
97
  },
95
98
  {
96
- :type => ".",
97
- :parent_type => "S",
98
- :token => ".",
99
- :children => [{:type => "TK", :parent_type => ".", :token => "."}]
99
+ :type => '.',
100
+ :parent_type => 'S',
101
+ :token => '.',
102
+ :children => [{:type => 'TK', :parent_type => '.', :token => '.'}]
100
103
  }
101
104
  ]
102
105
  }
103
106
  ]
104
107
  end
108
+
109
+ it 'returns proper structure for parsed text' do
110
+ code_tree = parser.parse(text).code_tree
111
+ expect(code_tree).to eq(expected_code_tree)
112
+ end
105
113
  end
106
- end
114
+ end
data/spec/parser_spec.rb CHANGED
@@ -1,39 +1,44 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
- describe OpenNlp::Parser do
4
- subject { OpenNlp::Parser }
3
+ RSpec.describe OpenNlp::Parser do
5
4
  let(:model) { OpenNlp::Model::Parser.new(File.join(FIXTURES_DIR, "en-parser-chunking.bin")) }
6
5
  let(:token_model) { OpenNlp::Model::Tokenizer.new(File.join(FIXTURES_DIR, "en-token.bin")) }
6
+ let(:parser) { described_class.new(model, token_model) }
7
7
 
8
- describe "initialization" do
9
- it "should initialize a new parser" do
10
- parser = subject.new(model, token_model)
11
- parser.should be_a(subject)
8
+ describe 'initialization' do
9
+ it 'initializes a new parser' do
10
+ expect(parser.j_instance).to be_a(Java::opennlp.tools.parser.chunking.Parser)
12
11
  end
13
12
 
14
- it "should raise an argument error when no model is supplied" do
15
- lambda { subject.new(nil, nil) }.should raise_error(ArgumentError)
13
+ it 'raises an argument error when no model specified' do
14
+ expect { described_class.new(nil, nil) }.to raise_error(ArgumentError)
16
15
  end
17
16
 
18
- it "should raise an argument error when no token_model is supplied" do
19
- lambda { subject.new(model, nil) }.should raise_error(ArgumentError)
17
+ it 'raises an argument error when no token_model is specified' do
18
+ expect { described_class.new(model, nil) }.to raise_error(ArgumentError)
20
19
  end
21
20
  end
22
21
 
23
- describe "parsing a string" do
24
- let(:parser) { subject.new(model, token_model) }
22
+ describe '#parse' do
23
+ it 'parses an empty string' do
24
+ expect(parser.parse('')).to eq({})
25
+ end
26
+
27
+ it 'parses a sentence' do
28
+ res = parser.parse('The red fox sleeps soundly .')
29
+ expect(res).to be_a(OpenNlp::Parser::Parse)
30
+ end
25
31
 
26
- it "should parse an empty string" do
27
- parser.parse("").should == {}
32
+ it 'raises an error when nil is passed as an argument' do
33
+ expect { parser.parse(nil) }.to raise_error(ArgumentError)
28
34
  end
29
35
 
30
- it "should parse a sentence" do
31
- res = parser.parse("The red fox sleeps soundly .")
32
- res.class.should == OpenNlp::Parser::Parse
36
+ it 'raises an error when fixnum is passed as an argument' do
37
+ expect { parser.parse(111) }.to raise_error(ArgumentError)
33
38
  end
34
39
 
35
- it "should raise an error when not passed a string" do
36
- lambda { parser.parse(nil) }.should raise_error(ArgumentError)
40
+ it 'raises an error when array is passed as an argument' do
41
+ expect { parser.parse([1, 2]) }.to raise_error(ArgumentError)
37
42
  end
38
43
  end
39
- end
44
+ end
@@ -1,37 +1,36 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
- describe OpenNlp::POSTagger do
4
- subject { OpenNlp::POSTagger }
3
+ RSpec.describe OpenNlp::POSTagger do
4
+ let(:model) { OpenNlp::Model::POSTagger.new(File.join(FIXTURES_DIR, 'en-pos-maxent.bin')) }
5
+ let(:pos_tagger) { described_class.new(model) }
5
6
 
6
- let(:model) { OpenNlp::Model::POSTagger.new(File.join(FIXTURES_DIR, "en-pos-maxent.bin")) }
7
-
8
- describe "initialization" do
9
- it "should initialize with a valid model" do
10
- tagger = subject.new(model)
11
- tagger.should be_a(subject)
12
- tagger.j_instance.should be_a(subject.java_class)
7
+ describe 'initialization' do
8
+ it 'initialize with a valid model' do
9
+ expect(pos_tagger.j_instance).to be_a(described_class.java_class)
13
10
  end
14
11
 
15
- it "should raise an ArgumentError without a valid model" do
16
- lambda { subject.new(nil) }.should raise_error(ArgumentError)
12
+ it 'raises an ArgumentError without a valid model' do
13
+ expect { described_class.new(nil) }.to raise_error(ArgumentError)
17
14
  end
18
15
  end
19
16
 
20
- describe "pos tagging" do
21
- let(:pos_tagger) { subject.new(model) }
22
-
23
- it "should tag parts of a provided document" do
24
- tagged = pos_tagger.tag("The quick brown fox jumps over the lazy dog.")
25
- tagged.should == "The/DT quick/JJ brown/JJ fox/NN jumps/NNS over/IN the/DT lazy/JJ dog./NN"
17
+ describe '#tag' do
18
+ it 'tags parts of a provided document' do
19
+ tagged = pos_tagger.tag('The quick brown fox jumps over the lazy dog.')
20
+ expect(tagged).to eq('The/DT quick/JJ brown/JJ fox/NN jumps/NNS over/IN the/DT lazy/JJ dog./NN')
26
21
  end
27
22
 
28
- it "should tag provided tokens" do
23
+ it 'tags provided tokens' do
29
24
  tagged = pos_tagger.tag(%w(The quick brown fox jumps over the lazy dog .))
30
- tagged.to_ary.should == %w(DT JJ JJ NN NNS IN DT JJ NN .)
25
+ expect(tagged.to_ary).to eq(%w(DT JJ JJ NN NNS IN DT JJ NN .))
26
+ end
27
+
28
+ it 'raises an ArgumentError when nil is passed as an argument' do
29
+ expect { pos_tagger.tag(nil) }.to raise_error(ArgumentError)
31
30
  end
32
31
 
33
- it "should raise an ArgumentError for a non-string" do
34
- lambda { pos_tagger.tag(nil) }.should raise_error(ArgumentError)
32
+ it 'raises an ArgumentError when fixnum is passed as an argument' do
33
+ expect { pos_tagger.tag(111) }.to raise_error(ArgumentError)
35
34
  end
36
35
  end
37
- end
36
+ end
@@ -1,50 +1,59 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
- describe OpenNlp::SentenceDetector do
4
- subject { OpenNlp::SentenceDetector }
3
+ RSpec.describe OpenNlp::SentenceDetector do
4
+ let(:model) { OpenNlp::Model::SentenceDetector.new(File.join(FIXTURES_DIR, 'en-sent.bin')) }
5
+ let(:sentence_detector) { described_class.new(model) }
5
6
 
6
- let(:model) { OpenNlp::Model::SentenceDetector.new(File.join(FIXTURES_DIR, "en-sent.bin")) }
7
-
8
- describe "initialization" do
9
- it "should initialize with a valid model" do
10
- sent_detector = subject.new(model)
11
- sent_detector.should be_a(subject)
12
- sent_detector.j_instance.should be_a(subject.java_class)
7
+ describe 'initialization' do
8
+ it 'initializes with a valid model' do
9
+ expect(sentence_detector.j_instance).to be_a(described_class.java_class)
13
10
  end
14
11
 
15
- it "should raise an ArgumentError without a valid model" do
16
- lambda { subject.new(nil) }.should raise_error(ArgumentError)
12
+ it 'raises an ArgumentError without a valid model' do
13
+ expect { subject.new(nil) }.to raise_error(ArgumentError)
17
14
  end
18
15
  end
19
16
 
20
- describe "#detect" do
21
- let(:sent_detector) { subject.new(model) }
17
+ describe '#detect' do
18
+ it 'detects no sentences in an empty string' do
19
+ sentences = sentence_detector.detect('')
20
+ expect(sentences).to eq([])
21
+ end
22
+
23
+ it 'detects sentences in a string' do
24
+ sentences = sentence_detector.detect('The sky is blue. The Grass is green.')
25
+ expect(sentences).to eq(['The sky is blue.', 'The Grass is green.'])
26
+ end
22
27
 
23
- it "should detect no sentences in an empty string" do
24
- sentences = sent_detector.detect("")
25
- sentences.should == []
28
+ it 'raises an ArgumentError when nil is passed as an argument' do
29
+ expect { sentence_detector.detect(nil) }.to raise_error(ArgumentError)
26
30
  end
27
31
 
28
- it "should detect sentences in a string" do
29
- sentences = sent_detector.detect("The sky is blue. The Grass is green.")
30
- sentences.should == ["The sky is blue.", "The Grass is green."]
32
+ it 'raises an ArgumentError when fixnum is passed as an argument' do
33
+ expect { sentence_detector.detect(111) }.to raise_error(ArgumentError)
31
34
  end
32
35
 
33
- it "should raise an ArgumentError for a non-string" do
34
- lambda { sent_detector.detect(nil) }.should raise_error(ArgumentError)
36
+ it 'raises an ArgumentError when array is passed as an argument' do
37
+ expect { sentence_detector.detect([1, 2]) }.to raise_error(ArgumentError)
35
38
  end
36
39
  end
37
40
 
38
- describe "#pos_detect" do
39
- let(:sent_detector) { subject.new(model) }
41
+ describe '#pos_detect' do
42
+ it 'detects sentences in a string' do
43
+ sentences = sentence_detector.pos_detect('The sky is blue. The Grass is green.')
44
+ expect(sentences).to eq([OpenNlp::Util::Span.new(0, 16), OpenNlp::Util::Span.new(17, 36)])
45
+ end
46
+
47
+ it 'raises an ArgumentError when nil is passed as an argument' do
48
+ expect { sentence_detector.pos_detect(nil) }.to raise_error(ArgumentError)
49
+ end
40
50
 
41
- it "should detect sentences in a string" do
42
- sentences = sent_detector.pos_detect("The sky is blue. The Grass is green.")
43
- sentences.should == [OpenNlp::Util::Span.new(0, 16), OpenNlp::Util::Span.new(17, 36)]
51
+ it 'raises an ArgumentError when fixnum is passed as an argument' do
52
+ expect { sentence_detector.pos_detect(111) }.to raise_error(ArgumentError)
44
53
  end
45
54
 
46
- it "should raise an ArgumentError for a non-string" do
47
- expect { sent_detector.detect(nil) }.to raise_error(ArgumentError)
55
+ it 'raises an ArgumentError when array is passed as an argument' do
56
+ expect { sentence_detector.pos_detect([1, 2]) }.to raise_error(ArgumentError)
48
57
  end
49
58
  end
50
- end
59
+ end
data/spec/spec_helper.rb CHANGED
@@ -2,4 +2,4 @@ require 'rubygems'
2
2
  require 'java'
3
3
  require 'open_nlp'
4
4
 
5
- FIXTURES_DIR = File.join(File.dirname(__FILE__), "fixtures")
5
+ FIXTURES_DIR = File.join(File.dirname(__FILE__), 'fixtures')
@@ -1,36 +1,40 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
- describe OpenNlp::Tokenizer do
4
- subject { OpenNlp::Tokenizer }
3
+ RSpec.describe OpenNlp::Tokenizer do
4
+ let(:model) { OpenNlp::Model::Tokenizer.new(File.join(FIXTURES_DIR, 'en-token.bin')) }
5
+ let(:tokenizer) { described_class.new(model) }
5
6
 
6
- let(:model) { OpenNlp::Model::Tokenizer.new(File.join(FIXTURES_DIR, "en-token.bin")) }
7
-
8
- describe "initialization" do
9
- it "should initialize a new tokenizer" do
10
- tokenizer = subject.new(model)
11
- tokenizer.should be_a(subject)
7
+ describe 'initialization' do
8
+ it 'initialize a new tokenizer' do
9
+ expect(tokenizer.j_instance).to be_a(described_class.java_class)
12
10
  end
13
11
 
14
- it "should raise an argument error when no model is supplied" do
15
- lambda { subject.new(nil) }.should raise_error(ArgumentError)
12
+ it 'raises an argument error when no model is specified' do
13
+ expect { subject.new(nil) }.to raise_error(ArgumentError)
16
14
  end
17
15
  end
18
16
 
19
- describe "tokenize a string" do
20
- let(:tokenizer) { subject.new(model) }
17
+ describe 'tokenize a string' do
18
+ it 'tokenizes an empty string' do
19
+ tokens = tokenizer.tokenize('')
20
+ expect(tokens).to eq([])
21
+ end
22
+
23
+ it 'tokenizes a sentence' do
24
+ tokens = tokenizer.tokenize('The red fox sleeps soundly.')
25
+ expect(tokens).to eq(['The', 'red', 'fox', 'sleeps', 'soundly', '.'])
26
+ end
21
27
 
22
- it "should tokenize an empty string" do
23
- tokens = tokenizer.tokenize("")
24
- tokens.should == []
28
+ it 'raises an error when nil is passed as an argument' do
29
+ expect { tokenizer.tokenize(nil) }.to raise_error(ArgumentError)
25
30
  end
26
31
 
27
- it "should tokenize a sentence" do
28
- tokens = tokenizer.tokenize("The red fox sleeps soundly.")
29
- tokens.should == ["The", "red", "fox", "sleeps", "soundly", "."]
32
+ it 'raises an error when fixnum is passed as an argument' do
33
+ expect { tokenizer.tokenize(111) }.to raise_error(ArgumentError)
30
34
  end
31
35
 
32
- it "should raise an error when not passed a string" do
33
- lambda { tokenizer.tokenize(nil) }.should raise_error(ArgumentError)
36
+ it 'raises an error when array is passed as an argument' do
37
+ expect { tokenizer.tokenize([1, 2]) }.to raise_error(ArgumentError)
34
38
  end
35
39
  end
36
- end
40
+ end