RubyGems - open_nlp - Versions diffs - 0.0.1-java → 0.0.2-java - Mend

open_nlp 0.0.1-java → 0.0.2-java

Files changed (5) hide show

data/README.md CHANGED

@@ -1,6 +1,11 @@
 # OpenNlp
-TODO: Write a gem description
+A JRuby wrapper for the Apache OpenNLP tools library, that allows you execute common natural language processing tasks, such as
+ * sentence detection
+ * tokenize
+ * part-of-speech tagging
+ * named entity extraction
+ * chunks detection
 ## Installation
@@ -18,7 +23,37 @@ Or install it yourself as:
 ## Usage
-TODO: Write usage instructions here
+To use open_nlp classes, you need to require it in your sources
+    require 'open_nlp'
+Then you can create instances of open_nlp classes and use it for your nlp tasks
+    # sentence detection
+    sentence_detect_model = OpenNlp::Model::SentenceDetector.new("nlp_models/en-sent.bin")
+    sentence_detector = OpenNlp::SentenceDetector.new(sentence_detect_model)
+    sentence_detector.detect('The red fox sleeps soundly.')
+    # tokenize
+    token_model = OpenNlp::Model::Tokenizer.new("nlp_models/en-token.bin")
+    tokenizer = OpenNlp::Tokenizer.new(token_model)
+    tokenizer.tokenize('The red fox sleeps soundly.')
+    # part-of-speech tagging
+    pos_model = OpenNlp::Model::POSTagger.new(File.join("nlp_models/en-pos-maxent.bin"))
+    pos_tagger = OpenNlp::POSTagger.new(pos_model)
+    # to tag string call OpenNlp::POSTagger#tag with String argument
+    pos_tagger.tag('The red fox sleeps soundly.')
+    # to tag array of tokens call OpenNlp::POSTagger#tag with Array argument
+    pos_tagger.tag(%w|The red fox sleeps soundly .|)
+    # chunks detection (chunker also needs tokenizer and pos-tagger models because it uses tokenizing and pos-tagging inside chunk task)
+    chunk_model = OpenNlp::Model::Chunker.new(File.join("nlp_models/en-chunker.bin"))
+    token_model = OpenNlp::Model::Tokenizer.new("nlp_models/en-token.bin")
+    pos_model = OpenNlp::Model::POSTagger.new(File.join("nlp_models/en-pos-maxent.bin"))
+    chunker = OpenNlp::Chunker.new(chunk_model, token_model, pos_model)
 ## Contributing
@@ -26,4 +61,4 @@ TODO: Write usage instructions here
 2. Create your feature branch (`git checkout -b my-new-feature`)
 3. Commit your changes (`git commit -am 'Add some feature'`)
 4. Push to the branch (`git push origin my-new-feature`)
-5. Create new Pull Request
+5. Create new Pull Request

data/lib/open_nlp/chunker.rb CHANGED

@@ -3,7 +3,6 @@ module OpenNlp
     self.java_class = Java::opennlp.tools.chunker.ChunkerME
     def initialize(model, token_model, pos_model)
-      #raise ArgumentError, "model must be an OpenNlp::Chunker::Model" unless model.is_a?(Chunker::Model)
       super(model)
       raise ArgumentError, "model must be an OpenNlp::Tokenizer::Model" unless token_model.is_a?(Model::Tokenizer)
@@ -11,7 +10,6 @@ module OpenNlp
       @tokenizer = Tokenizer.new(token_model)
       @pos_tagger = POSTagger.new(pos_model)
-      #@j_instance = self.java_class.new(model.j_model)
     end
     def chunk(str)
@@ -32,8 +30,8 @@ module OpenNlp
       data.inject([]) do |acc, val|
         chunk = val[2]
-        acc << [val[0]] if chunk[0] == 'B'
-        acc.last << val[0] if chunk[0] == 'I'
+        acc << [{val[0] => val[1]}] if chunk[0] == 'B'
+        acc.last << {val[0] => val[1]} if chunk[0] == 'I'
         acc
       end

data/lib/open_nlp/version.rb CHANGED

@@ -1,3 +1,3 @@
 module OpenNlp
-  VERSION = '0.0.1'
+  VERSION = '0.0.2'
 end

data/spec/chunker_spec.rb CHANGED

@@ -28,7 +28,7 @@ describe OpenNlp::Chunker do
     it "should chunk a sentence" do
       chunks = chunker.chunk("The red fox sleeps soundly.")
-      chunks.should == [["The", "red", "fox", "sleeps"], ["soundly"]]
+      chunks.should == [[{"The"=>"DT"}, {"red"=>"JJ"}, {"fox"=>"NN"}, {"sleeps"=>"NNS"}], [{"soundly"=>"RB"}]]
     end
     it "should raise an error when not passed a string" do

metadata CHANGED

@@ -2,14 +2,14 @@
 name: open_nlp
 version: !ruby/object:Gem::Version
   prerelease:
-  version: 0.0.1
+  version: 0.0.2
 platform: java
 authors:
 - Hck
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2012-09-21 00:00:00.000000000 Z
+date: 2012-09-24 00:00:00.000000000 Z
 dependencies: []
 description: JRuby tools wrapper for Apache OpenNLP
 email: