open_nlp 0.0.6-java → 0.0.7-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore CHANGED
@@ -15,3 +15,4 @@ spec/reports
15
15
  test/tmp
16
16
  test/version_tmp
17
17
  tmp
18
+ .idea
data/lib/open_nlp.rb CHANGED
@@ -17,6 +17,9 @@ require 'open_nlp/model/sentence_detector'
17
17
  require 'open_nlp/model/tokenizer'
18
18
  require 'open_nlp/model/parser'
19
19
 
20
+ require 'open_nlp/util'
21
+ require 'open_nlp/util/span'
22
+
20
23
  require 'open_nlp/tool'
21
24
  require 'open_nlp/categorizer'
22
25
  require 'open_nlp/named_entity_detector'
@@ -6,5 +6,10 @@ module OpenNlp
6
6
  raise ArgumentError, "string must be a String" unless string.is_a?(String)
7
7
  @j_instance.sentDetect(string).to_ary
8
8
  end
9
+
10
+ def pos_detect(string)
11
+ raise ArgumentError, "string must be a String" unless string.is_a?(String)
12
+ @j_instance.sentPosDetect(string).map{|span| OpenNlp::Util::Span.new(span.getStart, span.getEnd)}
13
+ end
9
14
  end
10
15
  end
@@ -0,0 +1,5 @@
1
+ module OpenNlp
2
+ module Util
3
+
4
+ end
5
+ end
@@ -0,0 +1,38 @@
1
+ class OpenNlp::Util::Span
2
+ include OpenNlp::JavaClass
3
+
4
+ self.java_class = Java::opennlp.tools.util.Span
5
+
6
+ attr_reader :j_instance
7
+
8
+ def initialize(s, e)
9
+ raise ArgumentError, "s should be an integer" unless s.is_a?(Fixnum)
10
+ raise ArgumentError, "e should be an integer" unless e.is_a?(Fixnum)
11
+
12
+ @j_instance = self.class.java_class.new(s, e)
13
+ end
14
+
15
+ def start
16
+ j_instance.getStart
17
+ end
18
+
19
+ def end
20
+ j_instance.getEnd
21
+ end
22
+
23
+ def type
24
+ j_instance.getType
25
+ end
26
+
27
+ def length
28
+ j_instance.length
29
+ end
30
+
31
+ def ==(obj)
32
+ return false unless obj.is_a?(self.class)
33
+
34
+ [:start, :end, :type].each_with_object(true) do |m,res|
35
+ res = res && self.public_send(m) == obj.public_send(m)
36
+ end
37
+ end
38
+ end
@@ -1,3 +1,3 @@
1
1
  module OpenNlp
2
- VERSION = '0.0.6'
2
+ VERSION = '0.0.7'
3
3
  end
@@ -17,7 +17,7 @@ describe OpenNlp::SentenceDetector do
17
17
  end
18
18
  end
19
19
 
20
- describe "sentence detection" do
20
+ describe "#detect" do
21
21
  let(:sent_detector) { subject.new(model) }
22
22
 
23
23
  it "should detect no sentences in an empty string" do
@@ -34,4 +34,17 @@ describe OpenNlp::SentenceDetector do
34
34
  lambda { sent_detector.detect(nil) }.should raise_error(ArgumentError)
35
35
  end
36
36
  end
37
- end
37
+
38
+ describe "#pos_detect" do
39
+ let(:sent_detector) { subject.new(model) }
40
+
41
+ it "should detect sentences in a string" do
42
+ sentences = sent_detector.pos_detect("The sky is blue. The Grass is green.")
43
+ sentences.should == [OpenNlp::Util::Span.new(0, 16), OpenNlp::Util::Span.new(17, 36)]
44
+ end
45
+
46
+ it "should raise an ArgumentError for a non-string" do
47
+ expect { sent_detector.detect(nil) }.to raise_error(ArgumentError)
48
+ end
49
+ end
50
+ end
@@ -33,4 +33,4 @@ describe OpenNlp::Tokenizer do
33
33
  lambda { tokenizer.tokenize(nil) }.should raise_error(ArgumentError)
34
34
  end
35
35
  end
36
- end
36
+ end
metadata CHANGED
@@ -2,14 +2,14 @@
2
2
  name: open_nlp
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 0.0.6
5
+ version: 0.0.7
6
6
  platform: java
7
7
  authors:
8
8
  - Hck
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-10-31 00:00:00.000000000 Z
12
+ date: 2013-02-22 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description: JRuby tools wrapper for Apache OpenNLP
15
15
  email:
@@ -45,6 +45,8 @@ files:
45
45
  - lib/open_nlp/sentence_detector.rb
46
46
  - lib/open_nlp/tokenizer.rb
47
47
  - lib/open_nlp/tool.rb
48
+ - lib/open_nlp/util.rb
49
+ - lib/open_nlp/util/span.rb
48
50
  - lib/open_nlp/version.rb
49
51
  - open_nlp.gemspec
50
52
  - spec/categorizer_spec.rb