open_nlp 0.0.6-java → 0.0.7-java

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore CHANGED
@@ -15,3 +15,4 @@ spec/reports
15
15
  test/tmp
16
16
  test/version_tmp
17
17
  tmp
18
+ .idea
data/lib/open_nlp.rb CHANGED
@@ -17,6 +17,9 @@ require 'open_nlp/model/sentence_detector'
17
17
  require 'open_nlp/model/tokenizer'
18
18
  require 'open_nlp/model/parser'
19
19
 
20
+ require 'open_nlp/util'
21
+ require 'open_nlp/util/span'
22
+
20
23
  require 'open_nlp/tool'
21
24
  require 'open_nlp/categorizer'
22
25
  require 'open_nlp/named_entity_detector'
@@ -6,5 +6,10 @@ module OpenNlp
6
6
  raise ArgumentError, "string must be a String" unless string.is_a?(String)
7
7
  @j_instance.sentDetect(string).to_ary
8
8
  end
9
+
10
+ def pos_detect(string)
11
+ raise ArgumentError, "string must be a String" unless string.is_a?(String)
12
+ @j_instance.sentPosDetect(string).map{|span| OpenNlp::Util::Span.new(span.getStart, span.getEnd)}
13
+ end
9
14
  end
10
15
  end
@@ -0,0 +1,5 @@
1
+ module OpenNlp
2
+ module Util
3
+
4
+ end
5
+ end
@@ -0,0 +1,38 @@
1
+ class OpenNlp::Util::Span
2
+ include OpenNlp::JavaClass
3
+
4
+ self.java_class = Java::opennlp.tools.util.Span
5
+
6
+ attr_reader :j_instance
7
+
8
+ def initialize(s, e)
9
+ raise ArgumentError, "s should be an integer" unless s.is_a?(Fixnum)
10
+ raise ArgumentError, "e should be an integer" unless e.is_a?(Fixnum)
11
+
12
+ @j_instance = self.class.java_class.new(s, e)
13
+ end
14
+
15
+ def start
16
+ j_instance.getStart
17
+ end
18
+
19
+ def end
20
+ j_instance.getEnd
21
+ end
22
+
23
+ def type
24
+ j_instance.getType
25
+ end
26
+
27
+ def length
28
+ j_instance.length
29
+ end
30
+
31
+ def ==(obj)
32
+ return false unless obj.is_a?(self.class)
33
+
34
+ [:start, :end, :type].each_with_object(true) do |m,res|
35
+ res = res && self.public_send(m) == obj.public_send(m)
36
+ end
37
+ end
38
+ end
@@ -1,3 +1,3 @@
1
1
  module OpenNlp
2
- VERSION = '0.0.6'
2
+ VERSION = '0.0.7'
3
3
  end
@@ -17,7 +17,7 @@ describe OpenNlp::SentenceDetector do
17
17
  end
18
18
  end
19
19
 
20
- describe "sentence detection" do
20
+ describe "#detect" do
21
21
  let(:sent_detector) { subject.new(model) }
22
22
 
23
23
  it "should detect no sentences in an empty string" do
@@ -34,4 +34,17 @@ describe OpenNlp::SentenceDetector do
34
34
  lambda { sent_detector.detect(nil) }.should raise_error(ArgumentError)
35
35
  end
36
36
  end
37
- end
37
+
38
+ describe "#pos_detect" do
39
+ let(:sent_detector) { subject.new(model) }
40
+
41
+ it "should detect sentences in a string" do
42
+ sentences = sent_detector.pos_detect("The sky is blue. The Grass is green.")
43
+ sentences.should == [OpenNlp::Util::Span.new(0, 16), OpenNlp::Util::Span.new(17, 36)]
44
+ end
45
+
46
+ it "should raise an ArgumentError for a non-string" do
47
+ expect { sent_detector.detect(nil) }.to raise_error(ArgumentError)
48
+ end
49
+ end
50
+ end
@@ -33,4 +33,4 @@ describe OpenNlp::Tokenizer do
33
33
  lambda { tokenizer.tokenize(nil) }.should raise_error(ArgumentError)
34
34
  end
35
35
  end
36
- end
36
+ end
metadata CHANGED
@@ -2,14 +2,14 @@
2
2
  name: open_nlp
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 0.0.6
5
+ version: 0.0.7
6
6
  platform: java
7
7
  authors:
8
8
  - Hck
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-10-31 00:00:00.000000000 Z
12
+ date: 2013-02-22 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description: JRuby tools wrapper for Apache OpenNLP
15
15
  email:
@@ -45,6 +45,8 @@ files:
45
45
  - lib/open_nlp/sentence_detector.rb
46
46
  - lib/open_nlp/tokenizer.rb
47
47
  - lib/open_nlp/tool.rb
48
+ - lib/open_nlp/util.rb
49
+ - lib/open_nlp/util/span.rb
48
50
  - lib/open_nlp/version.rb
49
51
  - open_nlp.gemspec
50
52
  - spec/categorizer_spec.rb