open_nlp 0.0.7-java → 0.1.0-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +2 -2
  3. data/.ruby-version +1 -0
  4. data/.travis.yml +6 -0
  5. data/Gemfile.lock +31 -0
  6. data/README.md +8 -1
  7. data/lib/open_nlp.rb +3 -3
  8. data/lib/open_nlp/categorizer.rb +7 -3
  9. data/lib/open_nlp/chunker.rb +19 -8
  10. data/lib/open_nlp/model.rb +13 -9
  11. data/lib/open_nlp/named_entity_detector.rb +6 -2
  12. data/lib/open_nlp/opennlp-maxent-3.0.3.jar +0 -0
  13. data/lib/open_nlp/opennlp-tools-1.5.3.jar +0 -0
  14. data/lib/open_nlp/parser.rb +43 -33
  15. data/lib/open_nlp/parser/parse.rb +12 -21
  16. data/lib/open_nlp/pos_tagger.rb +5 -2
  17. data/lib/open_nlp/sentence_detector.rb +16 -6
  18. data/lib/open_nlp/tokenizer.rb +8 -3
  19. data/lib/open_nlp/tool.rb +1 -1
  20. data/lib/open_nlp/util.rb +1 -2
  21. data/lib/open_nlp/util/span.rb +5 -5
  22. data/lib/open_nlp/version.rb +1 -1
  23. data/spec/categorizer_spec.rb +24 -22
  24. data/spec/chunker_spec.rb +29 -28
  25. data/spec/model/chunker_spec.rb +12 -15
  26. data/spec/model/detokenizer_spec.rb +11 -14
  27. data/spec/model/named_entity_detector_spec.rb +11 -14
  28. data/spec/model/pos_tagger_spec.rb +12 -15
  29. data/spec/model/sentence_detector_spec.rb +11 -14
  30. data/spec/model/tokenizer_spec.rb +11 -14
  31. data/spec/named_entity_detector_spec.rb +28 -27
  32. data/spec/parser/parse_spec.rb +64 -56
  33. data/spec/parser_spec.rb +26 -21
  34. data/spec/pos_tagger_spec.rb +22 -23
  35. data/spec/sentence_detector_spec.rb +39 -30
  36. data/spec/spec_helper.rb +1 -1
  37. data/spec/tokenizer_spec.rb +26 -22
  38. metadata +16 -17
  39. data/lib/open_nlp/opennlp-maxent-3.0.2-incubating.jar +0 -0
  40. data/lib/open_nlp/opennlp-tools-1.5.2-incubating.jar +0 -0
@@ -1,13 +1,13 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
- describe OpenNlp::Parser::Parse do
4
- subject { OpenNlp::Parser::Parse }
3
+ RSpec.describe OpenNlp::Parser::Parse do
5
4
  let(:text) { 'The red fox sleeps soundly .' }
6
- let(:model) { OpenNlp::Model::Parser.new(File.join(FIXTURES_DIR, "en-parser-chunking.bin")) }
7
- let(:token_model) { OpenNlp::Model::Tokenizer.new(File.join(FIXTURES_DIR, "en-token.bin")) }
5
+ let(:model) { OpenNlp::Model::Parser.new(File.join(FIXTURES_DIR, 'en-parser-chunking.bin')) }
6
+ let(:token_model) { OpenNlp::Model::Tokenizer.new(File.join(FIXTURES_DIR, 'en-token.bin')) }
7
+ let(:parser) { OpenNlp::Parser.new(model, token_model) }
8
8
 
9
- describe "initialization" do
10
- it "should initialize a new parse object" do
9
+ describe 'initialization' do
10
+ it 'initializes a new parse object' do
11
11
  j_parse = Java::opennlp.tools.parser.Parse.new(
12
12
  text.to_java(:String),
13
13
  Java::opennlp.tools.util.Span.new(0, text.size),
@@ -16,91 +16,99 @@ describe OpenNlp::Parser::Parse do
16
16
  0.to_java(:Integer)
17
17
  )
18
18
 
19
- subject.new(j_parse).should be_a(subject)
19
+ parse = described_class.new(j_parse)
20
+ expect(parse.j_instance).to be_a(described_class.java_class)
20
21
  end
21
22
 
22
- it "should raise an argument error when no model is supplied" do
23
- lambda { subject.new(nil) }.should raise_error(ArgumentError)
23
+ it 'raises an argument error when nil is passed as an argumenr' do
24
+ expect { described_class.new(nil) }.to raise_error(ArgumentError)
24
25
  end
25
26
  end
26
27
 
27
- describe "#tree_bank_string" do
28
- it "returns proper string value for parsed text" do
29
- parser = OpenNlp::Parser.new(model, token_model)
30
- expected = parser.parse(text).tree_bank_string
31
- expected.should == "(TOP (S (NP (DT The) (JJ red) (NN fox)) (VP (VBZ sleeps) (ADVP (RB soundly))) (. .)))"
28
+ describe '#tree_bank_string' do
29
+ let(:expected_tree_bank_str) do
30
+ '(TOP (S (NP (DT The) (JJ red) (NN fox)) (VP (VBZ sleeps) (ADVP (RB soundly))) (. .)))'
31
+ end
32
+
33
+ it 'returns proper string value for parsed text' do
34
+ tree_bank_string = parser.parse(text).tree_bank_string
35
+ expect(tree_bank_string).to eq(expected_tree_bank_str)
32
36
  end
33
37
  end
34
38
 
35
- describe "#code_tree" do
36
- it "returns proper structure for parsed text" do
37
- parser = OpenNlp::Parser.new(model, token_model)
38
- parser.parse(text).code_tree.should == [
39
+ describe '#code_tree' do
40
+ let(:expected_code_tree) do
41
+ [
39
42
  {
40
- :type => "S",
41
- :parent_type => "TOP",
42
- :token => "The red fox sleeps soundly .",
43
+ :type => 'S',
44
+ :parent_type => 'TOP',
45
+ :token => 'The red fox sleeps soundly .',
43
46
  :children => [
44
47
  {
45
- :type => "NP",
46
- :parent_type => "S",
47
- :token => "The red fox",
48
+ :type => 'NP',
49
+ :parent_type => 'S',
50
+ :token => 'The red fox',
48
51
  :children => [
49
52
  {
50
- :type => "DT",
51
- :parent_type => "NP",
52
- :token => "The",
53
- :children => [{:type => "TK", :parent_type => "DT", :token => "The"}]
53
+ :type => 'DT',
54
+ :parent_type => 'NP',
55
+ :token => 'The',
56
+ :children => [{:type => 'TK', :parent_type => 'DT', :token => 'The'}]
54
57
  },
55
58
  {
56
- :type => "JJ",
57
- :parent_type => "NP",
58
- :token => "red",
59
- :children => [{:type => "TK", :parent_type => "JJ", :token => "red"}]
59
+ :type => 'JJ',
60
+ :parent_type => 'NP',
61
+ :token => 'red',
62
+ :children => [{:type => 'TK', :parent_type => 'JJ', :token => 'red'}]
60
63
  },
61
64
  {
62
- :type => "NN",
63
- :parent_type => "NP",
64
- :token => "fox",
65
- :children => [{:type => "TK", :parent_type => "NN", :token => "fox"}]
65
+ :type => 'NN',
66
+ :parent_type => 'NP',
67
+ :token => 'fox',
68
+ :children => [{:type => 'TK', :parent_type => 'NN', :token => 'fox'}]
66
69
  }
67
70
  ]
68
71
  },
69
72
  {
70
- :type => "VP",
71
- :parent_type => "S",
72
- :token => "sleeps soundly",
73
+ :type => 'VP',
74
+ :parent_type => 'S',
75
+ :token => 'sleeps soundly',
73
76
  :children => [
74
77
  {
75
- :type => "VBZ",
76
- :parent_type => "VP",
77
- :token => "sleeps",
78
- :children => [{:type => "TK", :parent_type => "VBZ", :token => "sleeps"}]
78
+ :type => 'VBZ',
79
+ :parent_type => 'VP',
80
+ :token => 'sleeps',
81
+ :children => [{:type => 'TK', :parent_type => 'VBZ', :token => 'sleeps'}]
79
82
  },
80
83
  {
81
- :type => "ADVP",
82
- :parent_type => "VP",
83
- :token => "soundly",
84
+ :type => 'ADVP',
85
+ :parent_type => 'VP',
86
+ :token => 'soundly',
84
87
  :children => [
85
88
  {
86
- :type => "RB",
87
- :parent_type => "ADVP",
88
- :token => "soundly",
89
- :children => [{:type => "TK", :parent_type => "RB", :token => "soundly"}]
89
+ :type => 'RB',
90
+ :parent_type => 'ADVP',
91
+ :token => 'soundly',
92
+ :children => [{:type => 'TK', :parent_type => 'RB', :token => 'soundly'}]
90
93
  }
91
94
  ]
92
95
  }
93
96
  ]
94
97
  },
95
98
  {
96
- :type => ".",
97
- :parent_type => "S",
98
- :token => ".",
99
- :children => [{:type => "TK", :parent_type => ".", :token => "."}]
99
+ :type => '.',
100
+ :parent_type => 'S',
101
+ :token => '.',
102
+ :children => [{:type => 'TK', :parent_type => '.', :token => '.'}]
100
103
  }
101
104
  ]
102
105
  }
103
106
  ]
104
107
  end
108
+
109
+ it 'returns proper structure for parsed text' do
110
+ code_tree = parser.parse(text).code_tree
111
+ expect(code_tree).to eq(expected_code_tree)
112
+ end
105
113
  end
106
- end
114
+ end
data/spec/parser_spec.rb CHANGED
@@ -1,39 +1,44 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
- describe OpenNlp::Parser do
4
- subject { OpenNlp::Parser }
3
+ RSpec.describe OpenNlp::Parser do
5
4
  let(:model) { OpenNlp::Model::Parser.new(File.join(FIXTURES_DIR, "en-parser-chunking.bin")) }
6
5
  let(:token_model) { OpenNlp::Model::Tokenizer.new(File.join(FIXTURES_DIR, "en-token.bin")) }
6
+ let(:parser) { described_class.new(model, token_model) }
7
7
 
8
- describe "initialization" do
9
- it "should initialize a new parser" do
10
- parser = subject.new(model, token_model)
11
- parser.should be_a(subject)
8
+ describe 'initialization' do
9
+ it 'initializes a new parser' do
10
+ expect(parser.j_instance).to be_a(Java::opennlp.tools.parser.chunking.Parser)
12
11
  end
13
12
 
14
- it "should raise an argument error when no model is supplied" do
15
- lambda { subject.new(nil, nil) }.should raise_error(ArgumentError)
13
+ it 'raises an argument error when no model specified' do
14
+ expect { described_class.new(nil, nil) }.to raise_error(ArgumentError)
16
15
  end
17
16
 
18
- it "should raise an argument error when no token_model is supplied" do
19
- lambda { subject.new(model, nil) }.should raise_error(ArgumentError)
17
+ it 'raises an argument error when no token_model is specified' do
18
+ expect { described_class.new(model, nil) }.to raise_error(ArgumentError)
20
19
  end
21
20
  end
22
21
 
23
- describe "parsing a string" do
24
- let(:parser) { subject.new(model, token_model) }
22
+ describe '#parse' do
23
+ it 'parses an empty string' do
24
+ expect(parser.parse('')).to eq({})
25
+ end
26
+
27
+ it 'parses a sentence' do
28
+ res = parser.parse('The red fox sleeps soundly .')
29
+ expect(res).to be_a(OpenNlp::Parser::Parse)
30
+ end
25
31
 
26
- it "should parse an empty string" do
27
- parser.parse("").should == {}
32
+ it 'raises an error when nil is passed as an argument' do
33
+ expect { parser.parse(nil) }.to raise_error(ArgumentError)
28
34
  end
29
35
 
30
- it "should parse a sentence" do
31
- res = parser.parse("The red fox sleeps soundly .")
32
- res.class.should == OpenNlp::Parser::Parse
36
+ it 'raises an error when fixnum is passed as an argument' do
37
+ expect { parser.parse(111) }.to raise_error(ArgumentError)
33
38
  end
34
39
 
35
- it "should raise an error when not passed a string" do
36
- lambda { parser.parse(nil) }.should raise_error(ArgumentError)
40
+ it 'raises an error when array is passed as an argument' do
41
+ expect { parser.parse([1, 2]) }.to raise_error(ArgumentError)
37
42
  end
38
43
  end
39
- end
44
+ end
@@ -1,37 +1,36 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
- describe OpenNlp::POSTagger do
4
- subject { OpenNlp::POSTagger }
3
+ RSpec.describe OpenNlp::POSTagger do
4
+ let(:model) { OpenNlp::Model::POSTagger.new(File.join(FIXTURES_DIR, 'en-pos-maxent.bin')) }
5
+ let(:pos_tagger) { described_class.new(model) }
5
6
 
6
- let(:model) { OpenNlp::Model::POSTagger.new(File.join(FIXTURES_DIR, "en-pos-maxent.bin")) }
7
-
8
- describe "initialization" do
9
- it "should initialize with a valid model" do
10
- tagger = subject.new(model)
11
- tagger.should be_a(subject)
12
- tagger.j_instance.should be_a(subject.java_class)
7
+ describe 'initialization' do
8
+ it 'initialize with a valid model' do
9
+ expect(pos_tagger.j_instance).to be_a(described_class.java_class)
13
10
  end
14
11
 
15
- it "should raise an ArgumentError without a valid model" do
16
- lambda { subject.new(nil) }.should raise_error(ArgumentError)
12
+ it 'raises an ArgumentError without a valid model' do
13
+ expect { described_class.new(nil) }.to raise_error(ArgumentError)
17
14
  end
18
15
  end
19
16
 
20
- describe "pos tagging" do
21
- let(:pos_tagger) { subject.new(model) }
22
-
23
- it "should tag parts of a provided document" do
24
- tagged = pos_tagger.tag("The quick brown fox jumps over the lazy dog.")
25
- tagged.should == "The/DT quick/JJ brown/JJ fox/NN jumps/NNS over/IN the/DT lazy/JJ dog./NN"
17
+ describe '#tag' do
18
+ it 'tags parts of a provided document' do
19
+ tagged = pos_tagger.tag('The quick brown fox jumps over the lazy dog.')
20
+ expect(tagged).to eq('The/DT quick/JJ brown/JJ fox/NN jumps/NNS over/IN the/DT lazy/JJ dog./NN')
26
21
  end
27
22
 
28
- it "should tag provided tokens" do
23
+ it 'tags provided tokens' do
29
24
  tagged = pos_tagger.tag(%w(The quick brown fox jumps over the lazy dog .))
30
- tagged.to_ary.should == %w(DT JJ JJ NN NNS IN DT JJ NN .)
25
+ expect(tagged.to_ary).to eq(%w(DT JJ JJ NN NNS IN DT JJ NN .))
26
+ end
27
+
28
+ it 'raises an ArgumentError when nil is passed as an argument' do
29
+ expect { pos_tagger.tag(nil) }.to raise_error(ArgumentError)
31
30
  end
32
31
 
33
- it "should raise an ArgumentError for a non-string" do
34
- lambda { pos_tagger.tag(nil) }.should raise_error(ArgumentError)
32
+ it 'raises an ArgumentError when fixnum is passed as an argument' do
33
+ expect { pos_tagger.tag(111) }.to raise_error(ArgumentError)
35
34
  end
36
35
  end
37
- end
36
+ end
@@ -1,50 +1,59 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
- describe OpenNlp::SentenceDetector do
4
- subject { OpenNlp::SentenceDetector }
3
+ RSpec.describe OpenNlp::SentenceDetector do
4
+ let(:model) { OpenNlp::Model::SentenceDetector.new(File.join(FIXTURES_DIR, 'en-sent.bin')) }
5
+ let(:sentence_detector) { described_class.new(model) }
5
6
 
6
- let(:model) { OpenNlp::Model::SentenceDetector.new(File.join(FIXTURES_DIR, "en-sent.bin")) }
7
-
8
- describe "initialization" do
9
- it "should initialize with a valid model" do
10
- sent_detector = subject.new(model)
11
- sent_detector.should be_a(subject)
12
- sent_detector.j_instance.should be_a(subject.java_class)
7
+ describe 'initialization' do
8
+ it 'initializes with a valid model' do
9
+ expect(sentence_detector.j_instance).to be_a(described_class.java_class)
13
10
  end
14
11
 
15
- it "should raise an ArgumentError without a valid model" do
16
- lambda { subject.new(nil) }.should raise_error(ArgumentError)
12
+ it 'raises an ArgumentError without a valid model' do
13
+ expect { subject.new(nil) }.to raise_error(ArgumentError)
17
14
  end
18
15
  end
19
16
 
20
- describe "#detect" do
21
- let(:sent_detector) { subject.new(model) }
17
+ describe '#detect' do
18
+ it 'detects no sentences in an empty string' do
19
+ sentences = sentence_detector.detect('')
20
+ expect(sentences).to eq([])
21
+ end
22
+
23
+ it 'detects sentences in a string' do
24
+ sentences = sentence_detector.detect('The sky is blue. The Grass is green.')
25
+ expect(sentences).to eq(['The sky is blue.', 'The Grass is green.'])
26
+ end
22
27
 
23
- it "should detect no sentences in an empty string" do
24
- sentences = sent_detector.detect("")
25
- sentences.should == []
28
+ it 'raises an ArgumentError when nil is passed as an argument' do
29
+ expect { sentence_detector.detect(nil) }.to raise_error(ArgumentError)
26
30
  end
27
31
 
28
- it "should detect sentences in a string" do
29
- sentences = sent_detector.detect("The sky is blue. The Grass is green.")
30
- sentences.should == ["The sky is blue.", "The Grass is green."]
32
+ it 'raises an ArgumentError when fixnum is passed as an argument' do
33
+ expect { sentence_detector.detect(111) }.to raise_error(ArgumentError)
31
34
  end
32
35
 
33
- it "should raise an ArgumentError for a non-string" do
34
- lambda { sent_detector.detect(nil) }.should raise_error(ArgumentError)
36
+ it 'raises an ArgumentError when array is passed as an argument' do
37
+ expect { sentence_detector.detect([1, 2]) }.to raise_error(ArgumentError)
35
38
  end
36
39
  end
37
40
 
38
- describe "#pos_detect" do
39
- let(:sent_detector) { subject.new(model) }
41
+ describe '#pos_detect' do
42
+ it 'detects sentences in a string' do
43
+ sentences = sentence_detector.pos_detect('The sky is blue. The Grass is green.')
44
+ expect(sentences).to eq([OpenNlp::Util::Span.new(0, 16), OpenNlp::Util::Span.new(17, 36)])
45
+ end
46
+
47
+ it 'raises an ArgumentError when nil is passed as an argument' do
48
+ expect { sentence_detector.pos_detect(nil) }.to raise_error(ArgumentError)
49
+ end
40
50
 
41
- it "should detect sentences in a string" do
42
- sentences = sent_detector.pos_detect("The sky is blue. The Grass is green.")
43
- sentences.should == [OpenNlp::Util::Span.new(0, 16), OpenNlp::Util::Span.new(17, 36)]
51
+ it 'raises an ArgumentError when fixnum is passed as an argument' do
52
+ expect { sentence_detector.pos_detect(111) }.to raise_error(ArgumentError)
44
53
  end
45
54
 
46
- it "should raise an ArgumentError for a non-string" do
47
- expect { sent_detector.detect(nil) }.to raise_error(ArgumentError)
55
+ it 'raises an ArgumentError when array is passed as an argument' do
56
+ expect { sentence_detector.pos_detect([1, 2]) }.to raise_error(ArgumentError)
48
57
  end
49
58
  end
50
- end
59
+ end
data/spec/spec_helper.rb CHANGED
@@ -2,4 +2,4 @@ require 'rubygems'
2
2
  require 'java'
3
3
  require 'open_nlp'
4
4
 
5
- FIXTURES_DIR = File.join(File.dirname(__FILE__), "fixtures")
5
+ FIXTURES_DIR = File.join(File.dirname(__FILE__), 'fixtures')
@@ -1,36 +1,40 @@
1
- require "spec_helper"
1
+ require 'spec_helper'
2
2
 
3
- describe OpenNlp::Tokenizer do
4
- subject { OpenNlp::Tokenizer }
3
+ RSpec.describe OpenNlp::Tokenizer do
4
+ let(:model) { OpenNlp::Model::Tokenizer.new(File.join(FIXTURES_DIR, 'en-token.bin')) }
5
+ let(:tokenizer) { described_class.new(model) }
5
6
 
6
- let(:model) { OpenNlp::Model::Tokenizer.new(File.join(FIXTURES_DIR, "en-token.bin")) }
7
-
8
- describe "initialization" do
9
- it "should initialize a new tokenizer" do
10
- tokenizer = subject.new(model)
11
- tokenizer.should be_a(subject)
7
+ describe 'initialization' do
8
+ it 'initialize a new tokenizer' do
9
+ expect(tokenizer.j_instance).to be_a(described_class.java_class)
12
10
  end
13
11
 
14
- it "should raise an argument error when no model is supplied" do
15
- lambda { subject.new(nil) }.should raise_error(ArgumentError)
12
+ it 'raises an argument error when no model is specified' do
13
+ expect { subject.new(nil) }.to raise_error(ArgumentError)
16
14
  end
17
15
  end
18
16
 
19
- describe "tokenize a string" do
20
- let(:tokenizer) { subject.new(model) }
17
+ describe 'tokenize a string' do
18
+ it 'tokenizes an empty string' do
19
+ tokens = tokenizer.tokenize('')
20
+ expect(tokens).to eq([])
21
+ end
22
+
23
+ it 'tokenizes a sentence' do
24
+ tokens = tokenizer.tokenize('The red fox sleeps soundly.')
25
+ expect(tokens).to eq(['The', 'red', 'fox', 'sleeps', 'soundly', '.'])
26
+ end
21
27
 
22
- it "should tokenize an empty string" do
23
- tokens = tokenizer.tokenize("")
24
- tokens.should == []
28
+ it 'raises an error when nil is passed as an argument' do
29
+ expect { tokenizer.tokenize(nil) }.to raise_error(ArgumentError)
25
30
  end
26
31
 
27
- it "should tokenize a sentence" do
28
- tokens = tokenizer.tokenize("The red fox sleeps soundly.")
29
- tokens.should == ["The", "red", "fox", "sleeps", "soundly", "."]
32
+ it 'raises an error when fixnum is passed as an argument' do
33
+ expect { tokenizer.tokenize(111) }.to raise_error(ArgumentError)
30
34
  end
31
35
 
32
- it "should raise an error when not passed a string" do
33
- lambda { tokenizer.tokenize(nil) }.should raise_error(ArgumentError)
36
+ it 'raises an error when array is passed as an argument' do
37
+ expect { tokenizer.tokenize([1, 2]) }.to raise_error(ArgumentError)
34
38
  end
35
39
  end
36
- end
40
+ end