part_of_speech 0.0.0 → 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc CHANGED
@@ -9,6 +9,8 @@ I've just cleaned it up a little bit and packaged it into a gem.
9
9
  gem sources -a http://gemcutter.org
10
10
  sudo gem install part_of_speech
11
11
 
12
+ You can download a lexicon list from http://github.com/downloads/reddavis/Part-Of-Speech/lexicon.txt
13
+
12
14
  == How To Use
13
15
 
14
16
  require 'rubygems'
@@ -16,7 +18,7 @@ I've just cleaned it up a little bit and packaged it into a gem.
16
18
 
17
19
  text = "This is some text that I want analyzing"
18
20
 
19
- PartOfSpeechTagger.analyze(text)
21
+ PartOfSpeechTagger.analyze(lexicon_path, text)
20
22
  #=> [["This", "DT"], ["is", "VBZ"], ["some", "DT"], ["text", "NN"], ["that", "IN"], ["I", "PRP"], ["want", "VBP"], ["analyzing", "VBG"]]
21
23
 
22
24
  == What Do These Letters Mean?
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.0
1
+ 0.0.1
@@ -1,22 +1,22 @@
1
1
  class PartOfSpeech
2
2
 
3
3
  class << self
4
- def analyze(text)
5
- new.tag(text)
4
+ def analyze(lexicon_path, text)
5
+ new(lexicon_path).tag(text)
6
6
  end
7
7
  end
8
8
 
9
9
  # Place corpus into memory
10
- def initialize
11
- @lexicons = {}
12
- File.open(corpus_path).each do |line|
10
+ def initialize(lexicon_path)
11
+ @lexicons = Hash.new {|hash, k| hash[k] = []}
12
+ File.open(lexicon_path, 'r').each do |line|
13
13
  line = line.split
14
14
  @lexicons[line.shift] = line
15
15
  end
16
16
  end
17
17
 
18
18
  def tag(text)
19
- @text = text.split(/\s|\.|,|\:|\;|\'/)
19
+ @text = text.split(/ |,|\.|\:|\;|\'/)
20
20
 
21
21
  @pos = []
22
22
  @text.each do |word|
@@ -114,11 +114,11 @@ class PartOfSpeech
114
114
  ## rule 9: <noun> <noun 2> --> <noun> <verb> if <noun 2> can also be a verb
115
115
  return unless index > 0
116
116
 
117
- if @pos[index-1] =~ /^NN/ && @pos[index] =~ /^NN/
118
- if @lexicon[@text[index]].include?("VBN")
117
+ if @pos[index-1] =~ /^NN/ && @pos[index] =~ /^NN/
118
+ if @lexicons[@text[index]].include?("VBN")
119
119
  @pos[index] = "VBN"
120
120
  end
121
- if @lexicon[@text[index]].include?("VBZ")
121
+ if @lexicons[@text[index]].include?("VBZ")
122
122
  @pos[index] = "VBZ"
123
123
  end
124
124
  end
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{part_of_speech}
8
- s.version = "0.0.0"
8
+ s.version = "0.0.1"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["reddavis"]
12
- s.date = %q{2010-03-01}
12
+ s.date = %q{2010-03-02}
13
13
  s.description = %q{Part of speech tagger based off Mark Watsons code}
14
14
  s.email = %q{reddavis@gmail.com}
15
15
  s.extra_rdoc_files = [
@@ -23,9 +23,9 @@ Gem::Specification.new do |s|
23
23
  "README.rdoc",
24
24
  "Rakefile",
25
25
  "VERSION",
26
- "lib/corpus/lexicon.txt",
27
26
  "lib/part_of_speech.rb",
28
27
  "part_of_speech.gemspec",
28
+ "spec/files/lexicon.txt",
29
29
  "spec/part_of_speech_spec.rb",
30
30
  "spec/spec.opts",
31
31
  "spec/spec_helper.rb"
File without changes
@@ -2,9 +2,9 @@ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
2
 
3
3
  describe "PartOfSpeech" do
4
4
  it "should properly tag 'the fast fox'" do
5
- a = PartOfSpeech.analyze('the fast fox')
5
+ a = PartOfSpeech.analyze(lexicon_path, text_sample)
6
6
  a[0][1].should == "DT"
7
- a[1][1].should == "RB"
7
+ a[1][1].should == "JJ"
8
8
  a[2][1].should == "NN"
9
9
  end
10
10
  end
data/spec/spec_helper.rb CHANGED
@@ -4,6 +4,15 @@ require 'part_of_speech'
4
4
  require 'spec'
5
5
  require 'spec/autorun'
6
6
 
7
+ def text_sample
8
+ "This regular expression is read in the following manner: Zero or more adjectives or nouns, followed by an option group of a noun and a preposition, followed again by zero or more adjectives or nouns, followed by a single noun. A sequence of tags matching this pattern ensures that the corresponding words make up a noun phrase.
9
+ In addition to simply pulling out the phrases, it is common to do some simple post processing to link variants together (For example, unpluralizing plural variants)."
10
+ end
11
+
12
+ def lexicon_path
13
+ File.expand_path(File.dirname(__FILE__) + '/files/lexicon.txt')
14
+ end
15
+
7
16
  Spec::Runner.configure do |config|
8
17
 
9
18
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: part_of_speech
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.0
4
+ version: 0.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - reddavis
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2010-03-01 00:00:00 +00:00
12
+ date: 2010-03-02 00:00:00 +00:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -38,9 +38,9 @@ files:
38
38
  - README.rdoc
39
39
  - Rakefile
40
40
  - VERSION
41
- - lib/corpus/lexicon.txt
42
41
  - lib/part_of_speech.rb
43
42
  - part_of_speech.gemspec
43
+ - spec/files/lexicon.txt
44
44
  - spec/part_of_speech_spec.rb
45
45
  - spec/spec.opts
46
46
  - spec/spec_helper.rb