open_nlp 0.0.6-java → 0.0.7-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +1 -0
- data/lib/open_nlp.rb +3 -0
- data/lib/open_nlp/sentence_detector.rb +5 -0
- data/lib/open_nlp/util.rb +5 -0
- data/lib/open_nlp/util/span.rb +38 -0
- data/lib/open_nlp/version.rb +1 -1
- data/spec/sentence_detector_spec.rb +15 -2
- data/spec/tokenizer_spec.rb +1 -1
- metadata +4 -2
data/.gitignore
CHANGED
data/lib/open_nlp.rb
CHANGED
@@ -17,6 +17,9 @@ require 'open_nlp/model/sentence_detector'
|
|
17
17
|
require 'open_nlp/model/tokenizer'
|
18
18
|
require 'open_nlp/model/parser'
|
19
19
|
|
20
|
+
require 'open_nlp/util'
|
21
|
+
require 'open_nlp/util/span'
|
22
|
+
|
20
23
|
require 'open_nlp/tool'
|
21
24
|
require 'open_nlp/categorizer'
|
22
25
|
require 'open_nlp/named_entity_detector'
|
@@ -6,5 +6,10 @@ module OpenNlp
|
|
6
6
|
raise ArgumentError, "string must be a String" unless string.is_a?(String)
|
7
7
|
@j_instance.sentDetect(string).to_ary
|
8
8
|
end
|
9
|
+
|
10
|
+
def pos_detect(string)
|
11
|
+
raise ArgumentError, "string must be a String" unless string.is_a?(String)
|
12
|
+
@j_instance.sentPosDetect(string).map{|span| OpenNlp::Util::Span.new(span.getStart, span.getEnd)}
|
13
|
+
end
|
9
14
|
end
|
10
15
|
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
class OpenNlp::Util::Span
|
2
|
+
include OpenNlp::JavaClass
|
3
|
+
|
4
|
+
self.java_class = Java::opennlp.tools.util.Span
|
5
|
+
|
6
|
+
attr_reader :j_instance
|
7
|
+
|
8
|
+
def initialize(s, e)
|
9
|
+
raise ArgumentError, "s should be an integer" unless s.is_a?(Fixnum)
|
10
|
+
raise ArgumentError, "e should be an integer" unless e.is_a?(Fixnum)
|
11
|
+
|
12
|
+
@j_instance = self.class.java_class.new(s, e)
|
13
|
+
end
|
14
|
+
|
15
|
+
def start
|
16
|
+
j_instance.getStart
|
17
|
+
end
|
18
|
+
|
19
|
+
def end
|
20
|
+
j_instance.getEnd
|
21
|
+
end
|
22
|
+
|
23
|
+
def type
|
24
|
+
j_instance.getType
|
25
|
+
end
|
26
|
+
|
27
|
+
def length
|
28
|
+
j_instance.length
|
29
|
+
end
|
30
|
+
|
31
|
+
def ==(obj)
|
32
|
+
return false unless obj.is_a?(self.class)
|
33
|
+
|
34
|
+
[:start, :end, :type].each_with_object(true) do |m,res|
|
35
|
+
res = res && self.public_send(m) == obj.public_send(m)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
data/lib/open_nlp/version.rb
CHANGED
@@ -17,7 +17,7 @@ describe OpenNlp::SentenceDetector do
|
|
17
17
|
end
|
18
18
|
end
|
19
19
|
|
20
|
-
describe "
|
20
|
+
describe "#detect" do
|
21
21
|
let(:sent_detector) { subject.new(model) }
|
22
22
|
|
23
23
|
it "should detect no sentences in an empty string" do
|
@@ -34,4 +34,17 @@ describe OpenNlp::SentenceDetector do
|
|
34
34
|
lambda { sent_detector.detect(nil) }.should raise_error(ArgumentError)
|
35
35
|
end
|
36
36
|
end
|
37
|
-
|
37
|
+
|
38
|
+
describe "#pos_detect" do
|
39
|
+
let(:sent_detector) { subject.new(model) }
|
40
|
+
|
41
|
+
it "should detect sentences in a string" do
|
42
|
+
sentences = sent_detector.pos_detect("The sky is blue. The Grass is green.")
|
43
|
+
sentences.should == [OpenNlp::Util::Span.new(0, 16), OpenNlp::Util::Span.new(17, 36)]
|
44
|
+
end
|
45
|
+
|
46
|
+
it "should raise an ArgumentError for a non-string" do
|
47
|
+
expect { sent_detector.detect(nil) }.to raise_error(ArgumentError)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
data/spec/tokenizer_spec.rb
CHANGED
metadata
CHANGED
@@ -2,14 +2,14 @@
|
|
2
2
|
name: open_nlp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 0.0.
|
5
|
+
version: 0.0.7
|
6
6
|
platform: java
|
7
7
|
authors:
|
8
8
|
- Hck
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2013-02-22 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
14
|
description: JRuby tools wrapper for Apache OpenNLP
|
15
15
|
email:
|
@@ -45,6 +45,8 @@ files:
|
|
45
45
|
- lib/open_nlp/sentence_detector.rb
|
46
46
|
- lib/open_nlp/tokenizer.rb
|
47
47
|
- lib/open_nlp/tool.rb
|
48
|
+
- lib/open_nlp/util.rb
|
49
|
+
- lib/open_nlp/util/span.rb
|
48
50
|
- lib/open_nlp/version.rb
|
49
51
|
- open_nlp.gemspec
|
50
52
|
- spec/categorizer_spec.rb
|