RubyGems - nlp_backpack - Versions diffs - 0.0.0 - Mend

nlp_backpack 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (51) hide show

data/.document +5 -0
data/.gitignore +21 -0
data/LICENSE +20 -0
data/README.rdoc +22 -0
data/Rakefile +45 -0
data/VERSION +1 -0
data/lib/nlp_backpack.rb +10 -0
data/lib/nlp_backpack/chunker.rb +5 -0
data/lib/nlp_backpack/chunker/regex_chunker.rb +107 -0
data/lib/nlp_backpack/chunker/tag_pattern.rb +31 -0
data/lib/nlp_backpack/classifier.rb +5 -0
data/lib/nlp_backpack/classifier/base.rb +28 -0
data/lib/nlp_backpack/classifier/naive_bayes.rb +83 -0
data/lib/nlp_backpack/evaluation.rb +6 -0
data/lib/nlp_backpack/evaluation/accuracy.rb +46 -0
data/lib/nlp_backpack/evaluation/base.rb +12 -0
data/lib/nlp_backpack/evaluation/confusion_matrix.rb +66 -0
data/lib/nlp_backpack/frequency_distribution.rb +47 -0
data/lib/nlp_backpack/pos.rb +5 -0
data/lib/nlp_backpack/pos/brill_tagger.rb +142 -0
data/lib/nlp_backpack/pos/brill_tagger/lexicon.txt +93696 -0
data/lib/nlp_backpack/pos/pos_array.rb +32 -0
data/lib/nlp_backpack/stop_words.rb +17 -0
data/lib/nlp_backpack/stop_words/stop_words.txt +429 -0
data/lib/nlp_backpack/tokenizers/custom.rb +13 -0
data/lib/nlp_backpack/tokenizers/line.rb +13 -0
data/lib/nlp_backpack/tokenizers/space.rb +13 -0
data/lib/nlp_backpack/tokenizers/tab.rb +13 -0
data/lib/nlp_backpack/tokenizers/whitespace.rb +13 -0
data/lib/nlp_backpack/tokenizers/word.rb +13 -0
data/nlp_backpack.gemspec +109 -0
data/spec/chunkers/regex_chunker_spec.rb +46 -0
data/spec/chunkers/tag_pattern_spec.rb +40 -0
data/spec/classifiers/naive_bayes_spec.rb +68 -0
data/spec/evaluation/accuracy_spec.rb +29 -0
data/spec/evaluation/confusion_matrix_spec.rb +29 -0
data/spec/frequency_distribution_spec.rb +53 -0
data/spec/nlp_backpack_spec.rb +4 -0
data/spec/pos/brill_tagger_spec.rb +24 -0
data/spec/pos/pos_array_spec.rb +45 -0
data/spec/spec.opts +1 -0
data/spec/spec_helper.rb +18 -0
data/spec/stop_words_spec.rb +15 -0
data/spec/test_saves/naive.nb +1 -0
data/spec/tokenizers/custom_spec.rb +24 -0
data/spec/tokenizers/line_spec.rb +15 -0
data/spec/tokenizers/space_spec.rb +15 -0
data/spec/tokenizers/tab_spec.rb +15 -0
data/spec/tokenizers/whitespace_spec.rb +16 -0
data/spec/tokenizers/word_spec.rb +15 -0
metadata +141 -0

data/spec/nlp_backpack_spec.rb ADDED

@@ -0,0 +1,4 @@
+require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
+describe "NlpBackpack" do
+end

data/spec/pos/brill_tagger_spec.rb ADDED

@@ -0,0 +1,24 @@
+require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
+include NLPBackpack
+describe POS::BrillTagger do
+  before(:all) do
+    @pos = POS::BrillTagger.analyze(text)
+  end
+  it "should properly tag 'the fast fox'" do
+    @pos[0][1].should == "DT"
+    @pos[1][1].should == "JJ"
+    @pos[2][1].should == "NN"
+  end
+  it "should return a POSData object" do
+    @pos.should be_a(POS::POSArray)
+  end
+  def text
+    "This regular expression is read in the following manner: Zero or more adjectives or nouns, followed by an option group of a noun and a preposition, followed again by zero or more adjectives or nouns, followed by a single noun. A sequence of tags matching this pattern ensures that the corresponding words make up a noun phrase.
+    In addition to simply pulling out the phrases, it is common to do some simple post processing to link variants together (For example, unpluralizing plural variants)."
+  end
+end

data/spec/pos/pos_array_spec.rb ADDED

@@ -0,0 +1,45 @@
+require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
+require 'nlp_backpack/pos/pos_array'
+include NLPBackpack
+describe POS::POSArray do
+  before do
+    @pos_a = POS::POSArray.new
+  end
+  describe "Appending words" do
+    describe "Using <<" do
+      it "should raise an error if we insert too much data" do
+        lambda do
+          @pos_a << ["hello", :NN, "LOL"]
+        end.should raise_error
+      end
+      it "size should be 2" do
+        2.times { @pos_a << word }
+        @pos_a.size.should == 2
+      end
+    end
+    describe "Using #append" do
+      it "size should be 2" do
+        2.times { @pos_a.append(*word) }
+        @pos_a.size.should == 2
+      end
+    end
+  end
+  describe "to_s" do
+    it "should return properly formed string for ChunkGrammer" do
+      @pos_a << word
+      @pos_a << word
+      @pos_a.to_s.should == "hello/NN hello/NN"
+    end
+  end
+  def word
+    ["hello", :NN]
+  end
+end

data/spec/spec.opts ADDED

	@@ -0,0 +1 @@
1	+ --color

data/spec/spec_helper.rb ADDED

@@ -0,0 +1,18 @@
+$LOAD_PATH.unshift(File.dirname(__FILE__))
+$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
+require 'rubygems'
+require 'nlp_backpack'
+require 'spec'
+require 'spec/autorun'
+Spec::Runner.configure do |config|
+end
+def correct_results
+  [1,1,2,1,1,1,1,1,1,1]
+end
+def test_results
+  [1,1,3,1,1,1,1,1,1,1]
+end

data/spec/stop_words_spec.rb ADDED

@@ -0,0 +1,15 @@
+require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
+include NLPBackpack
+describe StopWords do
+  before(:all) { @stop_words = StopWords.all }
+  it "should return z" do
+    @stop_words.last.should == "z"
+  end
+  it "should return a" do
+    @stop_words.first.should == "a"
+  end
+end

data/spec/test_saves/naive.nb ADDED

	@@ -0,0 +1 @@
1	+ o:(NLPBackpack::Classifier::NaiveBayes :@features_count{: spam}" wordf1"badf1f0:ham}"wef1"badf1@:@db_filepath"M/Users/reddavis/Documents/projects/nlp_backpack/spec/test_saves/naive.nb:

data/spec/tokenizers/custom_spec.rb ADDED

@@ -0,0 +1,24 @@
+require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
+require 'nlp_backpack/tokenizers/custom'
+include NLPBackpack
+describe Tokenizer::Custom do
+  describe "split by ands" do
+    it "should return 3" do
+      a = Tokenizer::Custom.tokenize(text, "and")
+      a.size.should == 3
+    end
+  end
+  describe "split by any character" do
+    it "should return 3" do
+      a = Tokenizer::Custom.tokenize("and", //)
+      a.size.should == 3
+    end
+  end
+  def text
+    "hello and there and this"
+  end
+end

data/spec/tokenizers/line_spec.rb ADDED

@@ -0,0 +1,15 @@
+require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
+require 'nlp_backpack/tokenizers/line'
+include NLPBackpack
+describe Tokenizer::Line do
+  it "should split text by whitespace" do
+    a = Tokenizer::Line.tokenize(text)
+    a.size.should == 3
+  end
+  def text
+    "hello\n\nthere this"
+  end
+end

data/spec/tokenizers/space_spec.rb ADDED

@@ -0,0 +1,15 @@
+require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
+require 'nlp_backpack/tokenizers/space'
+include NLPBackpack
+describe Tokenizer::Space do
+  it "should split text by single spaces" do
+    a = Tokenizer::Space.tokenize(text)
+    a.size.should == 4
+  end
+  def text
+    "hello  there this"
+  end
+end

data/spec/tokenizers/tab_spec.rb ADDED

@@ -0,0 +1,15 @@
+require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
+require 'nlp_backpack/tokenizers/tab'
+include NLPBackpack
+describe Tokenizer::Tab do
+  it "should split text by single tabs" do
+    a = Tokenizer::Tab.tokenize(text)
+    a.size.should == 4
+  end
+  def text
+    "hello\tthere\t\tthis"
+  end
+end

data/spec/tokenizers/whitespace_spec.rb ADDED

@@ -0,0 +1,16 @@
+require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
+require 'nlp_backpack/tokenizers/whitespace'
+include NLPBackpack
+describe Tokenizer::Whitespace do
+  it "should split text by whitespace" do
+    a = Tokenizer::Whitespace.tokenize(text)
+    a.size.should == 3
+    a[0].should == "hello"
+  end
+  def text
+    "hello there this"
+  end
+end

data/spec/tokenizers/word_spec.rb ADDED

@@ -0,0 +1,15 @@
+require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
+require 'nlp_backpack/tokenizers/word'
+include NLPBackpack
+describe Tokenizer::Word do
+  it "should split text by words" do
+    a = Tokenizer::Word.tokenize(text)
+    a.size.should == 5
+  end
+  def text
+    "hello; there, this. that you're"
+  end
+end

metadata ADDED

@@ -0,0 +1,141 @@
+--- !ruby/object:Gem::Specification
+name: nlp_backpack
+version: !ruby/object:Gem::Version
+  prerelease: false
+  segments:
+  - 0
+  - 0
+  - 0
+  version: 0.0.0
+platform: ruby
+authors:
+- reddavis
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2010-06-21 00:00:00 +01:00
+default_executable:
+dependencies:
+- !ruby/object:Gem::Dependency
+  name: rspec
+  prerelease: false
+  requirement: &id001 !ruby/object:Gem::Requirement
+    requirements:
+    - - ">="
+      - !ruby/object:Gem::Version
+        segments:
+        - 1
+        - 2
+        - 9
+        version: 1.2.9
+  type: :development
+  version_requirements: *id001
+description: A backpack full of useful toys
+email: reddavis@gmail.com
+executables: []
+extensions: []
+extra_rdoc_files:
+- LICENSE
+- README.rdoc
+files:
+- .document
+- .gitignore
+- LICENSE
+- README.rdoc
+- Rakefile
+- VERSION
+- lib/nlp_backpack.rb
+- lib/nlp_backpack/chunker.rb
+- lib/nlp_backpack/chunker/regex_chunker.rb
+- lib/nlp_backpack/chunker/tag_pattern.rb
+- lib/nlp_backpack/classifier.rb
+- lib/nlp_backpack/classifier/base.rb
+- lib/nlp_backpack/classifier/naive_bayes.rb
+- lib/nlp_backpack/evaluation.rb
+- lib/nlp_backpack/evaluation/accuracy.rb
+- lib/nlp_backpack/evaluation/base.rb
+- lib/nlp_backpack/evaluation/confusion_matrix.rb
+- lib/nlp_backpack/frequency_distribution.rb
+- lib/nlp_backpack/pos.rb
+- lib/nlp_backpack/pos/brill_tagger.rb
+- lib/nlp_backpack/pos/brill_tagger/lexicon.txt
+- lib/nlp_backpack/pos/pos_array.rb
+- lib/nlp_backpack/stop_words.rb
+- lib/nlp_backpack/stop_words/stop_words.txt
+- lib/nlp_backpack/tokenizers/custom.rb
+- lib/nlp_backpack/tokenizers/line.rb
+- lib/nlp_backpack/tokenizers/space.rb
+- lib/nlp_backpack/tokenizers/tab.rb
+- lib/nlp_backpack/tokenizers/whitespace.rb
+- lib/nlp_backpack/tokenizers/word.rb
+- nlp_backpack.gemspec
+- spec/chunkers/regex_chunker_spec.rb
+- spec/chunkers/tag_pattern_spec.rb
+- spec/classifiers/naive_bayes_spec.rb
+- spec/evaluation/accuracy_spec.rb
+- spec/evaluation/confusion_matrix_spec.rb
+- spec/frequency_distribution_spec.rb
+- spec/nlp_backpack_spec.rb
+- spec/pos/brill_tagger_spec.rb
+- spec/pos/pos_array_spec.rb
+- spec/spec.opts
+- spec/spec_helper.rb
+- spec/stop_words_spec.rb
+- spec/test_saves/naive.nb
+- spec/tokenizers/custom_spec.rb
+- spec/tokenizers/line_spec.rb
+- spec/tokenizers/space_spec.rb
+- spec/tokenizers/tab_spec.rb
+- spec/tokenizers/whitespace_spec.rb
+- spec/tokenizers/word_spec.rb
+has_rdoc: true
+homepage: http://github.com/reddavis/NLP-Backpack
+licenses: []
+post_install_message:
+rdoc_options:
+- --charset=UTF-8
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      segments:
+      - 0
+      version: "0"
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      segments:
+      - 0
+      version: "0"
+requirements: []
+rubyforge_project:
+rubygems_version: 1.3.6
+signing_key:
+specification_version: 3
+summary: A backpack full of useful toys
+test_files:
+- spec/chunkers/regex_chunker_spec.rb
+- spec/chunkers/tag_pattern_spec.rb
+- spec/classifiers/naive_bayes_spec.rb
+- spec/evaluation/accuracy_spec.rb
+- spec/evaluation/confusion_matrix_spec.rb
+- spec/frequency_distribution_spec.rb
+- spec/nlp_backpack_spec.rb
+- spec/pos/brill_tagger_spec.rb
+- spec/pos/pos_array_spec.rb
+- spec/spec_helper.rb
+- spec/stop_words_spec.rb
+- spec/tokenizers/custom_spec.rb
+- spec/tokenizers/line_spec.rb
+- spec/tokenizers/space_spec.rb
+- spec/tokenizers/tab_spec.rb
+- spec/tokenizers/whitespace_spec.rb
+- spec/tokenizers/word_spec.rb