RubyGems - part_of_speech - Versions diffs - 0.0.0 → 0.0.1 - Mend

part_of_speech 0.0.0 → 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

data/README.rdoc +3 -1
data/VERSION +1 -1
data/lib/part_of_speech.rb +9 -9
data/part_of_speech.gemspec +3 -3
data/{lib/corpus → spec/files}/lexicon.txt +0 -0
data/spec/part_of_speech_spec.rb +2 -2
data/spec/spec_helper.rb +9 -0
metadata +3 -3

data/README.rdoc CHANGED Viewed

@@ -9,6 +9,8 @@ I've just cleaned it up a little bit and packaged it into a gem.
   gem sources -a http://gemcutter.org
   sudo gem install part_of_speech
+You can download a lexicon list from http://github.com/downloads/reddavis/Part-Of-Speech/lexicon.txt
 == How To Use
   require 'rubygems'
@@ -16,7 +18,7 @@ I've just cleaned it up a little bit and packaged it into a gem.
   text = "This is some text that I want analyzing"
-  PartOfSpeechTagger.analyze(text)
+  PartOfSpeechTagger.analyze(lexicon_path, text)
     #=> [["This", "DT"], ["is", "VBZ"], ["some", "DT"], ["text", "NN"], ["that", "IN"], ["I", "PRP"], ["want", "VBP"], ["analyzing", "VBG"]]
 == What Do These Letters Mean?

data/VERSION CHANGED Viewed

	@@ -1 +1 @@
1	- 0.0.0
1	+ 0.0.1

data/lib/part_of_speech.rb CHANGED Viewed

@@ -1,22 +1,22 @@
 class PartOfSpeech
   class << self
-    def analyze(text)
-      new.tag(text)
+    def analyze(lexicon_path, text)
+      new(lexicon_path).tag(text)
     end
   end
   # Place corpus into memory
-  def initialize
-    @lexicons = {}
-    File.open(corpus_path).each do |line|
+  def initialize(lexicon_path)
+    @lexicons = Hash.new {|hash, k| hash[k] = []}
+    File.open(lexicon_path, 'r').each do |line|
       line = line.split
       @lexicons[line.shift] = line
     end
   end
   def tag(text)
-    @text = text.split(/\s|\.|,|\:|\;|\'/)
+    @text = text.split(/ |,|\.|\:|\;|\'/)
     @pos = []
     @text.each do |word|
@@ -114,11 +114,11 @@ class PartOfSpeech
     ## rule 9: <noun> <noun 2> --> <noun> <verb> if <noun 2> can also be a verb
     return unless index > 0
-    if @pos[index-1] =~ /^NN/  && @pos[index] =~ /^NN/
-      if @lexicon[@text[index]].include?("VBN")
+    if @pos[index-1] =~ /^NN/  && @pos[index] =~ /^NN/
+      if @lexicons[@text[index]].include?("VBN")
         @pos[index] = "VBN"
       end
-      if @lexicon[@text[index]].include?("VBZ")
+      if @lexicons[@text[index]].include?("VBZ")
         @pos[index] = "VBZ"
       end
     end

data/part_of_speech.gemspec CHANGED Viewed

@@ -5,11 +5,11 @@
 Gem::Specification.new do |s|
   s.name = %q{part_of_speech}
-  s.version = "0.0.0"
+  s.version = "0.0.1"
   s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
   s.authors = ["reddavis"]
-  s.date = %q{2010-03-01}
+  s.date = %q{2010-03-02}
   s.description = %q{Part of speech tagger based off Mark Watsons code}
   s.email = %q{reddavis@gmail.com}
   s.extra_rdoc_files = [
@@ -23,9 +23,9 @@ Gem::Specification.new do |s|
      "README.rdoc",
      "Rakefile",
      "VERSION",
-     "lib/corpus/lexicon.txt",
      "lib/part_of_speech.rb",
      "part_of_speech.gemspec",
+     "spec/files/lexicon.txt",
      "spec/part_of_speech_spec.rb",
      "spec/spec.opts",
      "spec/spec_helper.rb"

data/{lib/corpus → spec/files}/lexicon.txt RENAMED Viewed

File without changes

data/spec/part_of_speech_spec.rb CHANGED Viewed

@@ -2,9 +2,9 @@ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
 describe "PartOfSpeech" do
   it "should properly tag 'the fast fox'" do
-    a = PartOfSpeech.analyze('the fast fox')
+    a = PartOfSpeech.analyze(lexicon_path, text_sample)
     a[0][1].should == "DT"
-    a[1][1].should == "RB"
+    a[1][1].should == "JJ"
     a[2][1].should == "NN"
   end
 end

data/spec/spec_helper.rb CHANGED Viewed

@@ -4,6 +4,15 @@ require 'part_of_speech'
 require 'spec'
 require 'spec/autorun'
+def text_sample
+  "This regular expression is read in the following manner: Zero or more adjectives or nouns, followed by an option group of a noun and a preposition, followed again by zero or more adjectives or nouns, followed by a single noun. A sequence of tags matching this pattern ensures that the corresponding words make up a noun phrase.
+  In addition to simply pulling out the phrases, it is common to do some simple post processing to link variants together (For example, unpluralizing plural variants)."
+end
+def lexicon_path
+  File.expand_path(File.dirname(__FILE__) + '/files/lexicon.txt')
+end
 Spec::Runner.configure do |config|
 end

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: part_of_speech
 version: !ruby/object:Gem::Version
-  version: 0.0.0
+  version: 0.0.1
 platform: ruby
 authors:
 - reddavis
@@ -9,7 +9,7 @@ autorequire:
 bindir: bin
 cert_chain: []
-date: 2010-03-01 00:00:00 +00:00
+date: 2010-03-02 00:00:00 +00:00
 default_executable:
 dependencies:
 - !ruby/object:Gem::Dependency
@@ -38,9 +38,9 @@ files:
 - README.rdoc
 - Rakefile
 - VERSION
-- lib/corpus/lexicon.txt
 - lib/part_of_speech.rb
 - part_of_speech.gemspec
+- spec/files/lexicon.txt
 - spec/part_of_speech_spec.rb
 - spec/spec.opts
 - spec/spec_helper.rb