RubyGems - open_nlp - Versions diffs - 0.0.1-java → 0.0.2-java - Mend

open_nlp 0.0.1-java → 0.0.2-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

data/README.md CHANGED

@@ -1,6 +1,11 @@
 # OpenNlp
-TODO: Write a gem description
+A JRuby wrapper for the Apache OpenNLP tools library, that allows you execute common natural language processing tasks, such as
+ * sentence detection
+ * tokenize
+ * part-of-speech tagging
+ * named entity extraction
+ * chunks detection
 ## Installation
@@ -18,7 +23,37 @@ Or install it yourself as:
 ## Usage
-TODO: Write usage instructions here
+To use open_nlp classes, you need to require it in your sources
+    require 'open_nlp'
+Then you can create instances of open_nlp classes and use it for your nlp tasks
+    # sentence detection
+    sentence_detect_model = OpenNlp::Model::SentenceDetector.new("nlp_models/en-sent.bin")
+    sentence_detector = OpenNlp::SentenceDetector.new(sentence_detect_model)
+    sentence_detector.detect('The red fox sleeps soundly.')
+    # tokenize
+    token_model = OpenNlp::Model::Tokenizer.new("nlp_models/en-token.bin")
+    tokenizer = OpenNlp::Tokenizer.new(token_model)
+    tokenizer.tokenize('The red fox sleeps soundly.')
+    # part-of-speech tagging
+    pos_model = OpenNlp::Model::POSTagger.new(File.join("nlp_models/en-pos-maxent.bin"))
+    pos_tagger = OpenNlp::POSTagger.new(pos_model)
+    # to tag string call OpenNlp::POSTagger#tag with String argument
+    pos_tagger.tag('The red fox sleeps soundly.')
+    # to tag array of tokens call OpenNlp::POSTagger#tag with Array argument
+    pos_tagger.tag(%w|The red fox sleeps soundly .|)
+    # chunks detection (chunker also needs tokenizer and pos-tagger models because it uses tokenizing and pos-tagging inside chunk task)
+    chunk_model = OpenNlp::Model::Chunker.new(File.join("nlp_models/en-chunker.bin"))
+    token_model = OpenNlp::Model::Tokenizer.new("nlp_models/en-token.bin")
+    pos_model = OpenNlp::Model::POSTagger.new(File.join("nlp_models/en-pos-maxent.bin"))
+    chunker = OpenNlp::Chunker.new(chunk_model, token_model, pos_model)
 ## Contributing
@@ -26,4 +61,4 @@ TODO: Write usage instructions here
 2. Create your feature branch (`git checkout -b my-new-feature`)
 3. Commit your changes (`git commit -am 'Add some feature'`)
 4. Push to the branch (`git push origin my-new-feature`)
-5. Create new Pull Request
+5. Create new Pull Request

data/lib/open_nlp/chunker.rb CHANGED

@@ -3,7 +3,6 @@ module OpenNlp
     self.java_class = Java::opennlp.tools.chunker.ChunkerME
     def initialize(model, token_model, pos_model)
-      #raise ArgumentError, "model must be an OpenNlp::Chunker::Model" unless model.is_a?(Chunker::Model)
       super(model)
       raise ArgumentError, "model must be an OpenNlp::Tokenizer::Model" unless token_model.is_a?(Model::Tokenizer)
@@ -11,7 +10,6 @@ module OpenNlp
       @tokenizer = Tokenizer.new(token_model)
       @pos_tagger = POSTagger.new(pos_model)
-      #@j_instance = self.java_class.new(model.j_model)
     end
     def chunk(str)
@@ -32,8 +30,8 @@ module OpenNlp
       data.inject([]) do |acc, val|
         chunk = val[2]
-        acc << [val[0]] if chunk[0] == 'B'
-        acc.last << val[0] if chunk[0] == 'I'
+        acc << [{val[0] => val[1]}] if chunk[0] == 'B'
+        acc.last << {val[0] => val[1]} if chunk[0] == 'I'
         acc
       end

data/lib/open_nlp/version.rb CHANGED

@@ -1,3 +1,3 @@
 module OpenNlp
-  VERSION = '0.0.1'
+  VERSION = '0.0.2'
 end

data/spec/chunker_spec.rb CHANGED

@@ -28,7 +28,7 @@ describe OpenNlp::Chunker do
     it "should chunk a sentence" do
       chunks = chunker.chunk("The red fox sleeps soundly.")
-      chunks.should == [["The", "red", "fox", "sleeps"], ["soundly"]]
+      chunks.should == [[{"The"=>"DT"}, {"red"=>"JJ"}, {"fox"=>"NN"}, {"sleeps"=>"NNS"}], [{"soundly"=>"RB"}]]
     end
     it "should raise an error when not passed a string" do

metadata CHANGED

@@ -2,14 +2,14 @@
 name: open_nlp
 version: !ruby/object:Gem::Version
   prerelease:
-  version: 0.0.1
+  version: 0.0.2
 platform: java
 authors:
 - Hck
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2012-09-21 00:00:00.000000000 Z
+date: 2012-09-24 00:00:00.000000000 Z
 dependencies: []
 description: JRuby tools wrapper for Apache OpenNLP
 email: