open_nlp 0.0.6-java → 0.0.7-java
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +1 -0
- data/lib/open_nlp.rb +3 -0
- data/lib/open_nlp/sentence_detector.rb +5 -0
- data/lib/open_nlp/util.rb +5 -0
- data/lib/open_nlp/util/span.rb +38 -0
- data/lib/open_nlp/version.rb +1 -1
- data/spec/sentence_detector_spec.rb +15 -2
- data/spec/tokenizer_spec.rb +1 -1
- metadata +4 -2
data/.gitignore
CHANGED
data/lib/open_nlp.rb
CHANGED
@@ -17,6 +17,9 @@ require 'open_nlp/model/sentence_detector'
|
|
17
17
|
require 'open_nlp/model/tokenizer'
|
18
18
|
require 'open_nlp/model/parser'
|
19
19
|
|
20
|
+
require 'open_nlp/util'
|
21
|
+
require 'open_nlp/util/span'
|
22
|
+
|
20
23
|
require 'open_nlp/tool'
|
21
24
|
require 'open_nlp/categorizer'
|
22
25
|
require 'open_nlp/named_entity_detector'
|
@@ -6,5 +6,10 @@ module OpenNlp
|
|
6
6
|
raise ArgumentError, "string must be a String" unless string.is_a?(String)
|
7
7
|
@j_instance.sentDetect(string).to_ary
|
8
8
|
end
|
9
|
+
|
10
|
+
def pos_detect(string)
|
11
|
+
raise ArgumentError, "string must be a String" unless string.is_a?(String)
|
12
|
+
@j_instance.sentPosDetect(string).map{|span| OpenNlp::Util::Span.new(span.getStart, span.getEnd)}
|
13
|
+
end
|
9
14
|
end
|
10
15
|
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
class OpenNlp::Util::Span
|
2
|
+
include OpenNlp::JavaClass
|
3
|
+
|
4
|
+
self.java_class = Java::opennlp.tools.util.Span
|
5
|
+
|
6
|
+
attr_reader :j_instance
|
7
|
+
|
8
|
+
def initialize(s, e)
|
9
|
+
raise ArgumentError, "s should be an integer" unless s.is_a?(Fixnum)
|
10
|
+
raise ArgumentError, "e should be an integer" unless e.is_a?(Fixnum)
|
11
|
+
|
12
|
+
@j_instance = self.class.java_class.new(s, e)
|
13
|
+
end
|
14
|
+
|
15
|
+
def start
|
16
|
+
j_instance.getStart
|
17
|
+
end
|
18
|
+
|
19
|
+
def end
|
20
|
+
j_instance.getEnd
|
21
|
+
end
|
22
|
+
|
23
|
+
def type
|
24
|
+
j_instance.getType
|
25
|
+
end
|
26
|
+
|
27
|
+
def length
|
28
|
+
j_instance.length
|
29
|
+
end
|
30
|
+
|
31
|
+
def ==(obj)
|
32
|
+
return false unless obj.is_a?(self.class)
|
33
|
+
|
34
|
+
[:start, :end, :type].each_with_object(true) do |m,res|
|
35
|
+
res = res && self.public_send(m) == obj.public_send(m)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
data/lib/open_nlp/version.rb
CHANGED
@@ -17,7 +17,7 @@ describe OpenNlp::SentenceDetector do
|
|
17
17
|
end
|
18
18
|
end
|
19
19
|
|
20
|
-
describe "
|
20
|
+
describe "#detect" do
|
21
21
|
let(:sent_detector) { subject.new(model) }
|
22
22
|
|
23
23
|
it "should detect no sentences in an empty string" do
|
@@ -34,4 +34,17 @@ describe OpenNlp::SentenceDetector do
|
|
34
34
|
lambda { sent_detector.detect(nil) }.should raise_error(ArgumentError)
|
35
35
|
end
|
36
36
|
end
|
37
|
-
|
37
|
+
|
38
|
+
describe "#pos_detect" do
|
39
|
+
let(:sent_detector) { subject.new(model) }
|
40
|
+
|
41
|
+
it "should detect sentences in a string" do
|
42
|
+
sentences = sent_detector.pos_detect("The sky is blue. The Grass is green.")
|
43
|
+
sentences.should == [OpenNlp::Util::Span.new(0, 16), OpenNlp::Util::Span.new(17, 36)]
|
44
|
+
end
|
45
|
+
|
46
|
+
it "should raise an ArgumentError for a non-string" do
|
47
|
+
expect { sent_detector.detect(nil) }.to raise_error(ArgumentError)
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
data/spec/tokenizer_spec.rb
CHANGED
metadata
CHANGED
@@ -2,14 +2,14 @@
|
|
2
2
|
name: open_nlp
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 0.0.
|
5
|
+
version: 0.0.7
|
6
6
|
platform: java
|
7
7
|
authors:
|
8
8
|
- Hck
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2013-02-22 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
14
|
description: JRuby tools wrapper for Apache OpenNLP
|
15
15
|
email:
|
@@ -45,6 +45,8 @@ files:
|
|
45
45
|
- lib/open_nlp/sentence_detector.rb
|
46
46
|
- lib/open_nlp/tokenizer.rb
|
47
47
|
- lib/open_nlp/tool.rb
|
48
|
+
- lib/open_nlp/util.rb
|
49
|
+
- lib/open_nlp/util/span.rb
|
48
50
|
- lib/open_nlp/version.rb
|
49
51
|
- open_nlp.gemspec
|
50
52
|
- spec/categorizer_spec.rb
|