part_of_speech 0.0.0 → 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/README.rdoc CHANGED
@@ -9,6 +9,8 @@ I've just cleaned it up a little bit and packaged it into a gem.
9
9
  gem sources -a http://gemcutter.org
10
10
  sudo gem install part_of_speech
11
11
 
12
+ You can download a lexicon list from http://github.com/downloads/reddavis/Part-Of-Speech/lexicon.txt
13
+
12
14
  == How To Use
13
15
 
14
16
  require 'rubygems'
@@ -16,7 +18,7 @@ I've just cleaned it up a little bit and packaged it into a gem.
16
18
 
17
19
  text = "This is some text that I want analyzing"
18
20
 
19
- PartOfSpeechTagger.analyze(text)
21
+ PartOfSpeechTagger.analyze(lexicon_path, text)
20
22
  #=> [["This", "DT"], ["is", "VBZ"], ["some", "DT"], ["text", "NN"], ["that", "IN"], ["I", "PRP"], ["want", "VBP"], ["analyzing", "VBG"]]
21
23
 
22
24
  == What Do These Letters Mean?
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.0
1
+ 0.0.1
@@ -1,22 +1,22 @@
1
1
  class PartOfSpeech
2
2
 
3
3
  class << self
4
- def analyze(text)
5
- new.tag(text)
4
+ def analyze(lexicon_path, text)
5
+ new(lexicon_path).tag(text)
6
6
  end
7
7
  end
8
8
 
9
9
  # Place corpus into memory
10
- def initialize
11
- @lexicons = {}
12
- File.open(corpus_path).each do |line|
10
+ def initialize(lexicon_path)
11
+ @lexicons = Hash.new {|hash, k| hash[k] = []}
12
+ File.open(lexicon_path, 'r').each do |line|
13
13
  line = line.split
14
14
  @lexicons[line.shift] = line
15
15
  end
16
16
  end
17
17
 
18
18
  def tag(text)
19
- @text = text.split(/\s|\.|,|\:|\;|\'/)
19
+ @text = text.split(/ |,|\.|\:|\;|\'/)
20
20
 
21
21
  @pos = []
22
22
  @text.each do |word|
@@ -114,11 +114,11 @@ class PartOfSpeech
114
114
  ## rule 9: <noun> <noun 2> --> <noun> <verb> if <noun 2> can also be a verb
115
115
  return unless index > 0
116
116
 
117
- if @pos[index-1] =~ /^NN/ && @pos[index] =~ /^NN/
118
- if @lexicon[@text[index]].include?("VBN")
117
+ if @pos[index-1] =~ /^NN/ && @pos[index] =~ /^NN/
118
+ if @lexicons[@text[index]].include?("VBN")
119
119
  @pos[index] = "VBN"
120
120
  end
121
- if @lexicon[@text[index]].include?("VBZ")
121
+ if @lexicons[@text[index]].include?("VBZ")
122
122
  @pos[index] = "VBZ"
123
123
  end
124
124
  end
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{part_of_speech}
8
- s.version = "0.0.0"
8
+ s.version = "0.0.1"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["reddavis"]
12
- s.date = %q{2010-03-01}
12
+ s.date = %q{2010-03-02}
13
13
  s.description = %q{Part of speech tagger based off Mark Watsons code}
14
14
  s.email = %q{reddavis@gmail.com}
15
15
  s.extra_rdoc_files = [
@@ -23,9 +23,9 @@ Gem::Specification.new do |s|
23
23
  "README.rdoc",
24
24
  "Rakefile",
25
25
  "VERSION",
26
- "lib/corpus/lexicon.txt",
27
26
  "lib/part_of_speech.rb",
28
27
  "part_of_speech.gemspec",
28
+ "spec/files/lexicon.txt",
29
29
  "spec/part_of_speech_spec.rb",
30
30
  "spec/spec.opts",
31
31
  "spec/spec_helper.rb"
File without changes
@@ -2,9 +2,9 @@ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
2
 
3
3
  describe "PartOfSpeech" do
4
4
  it "should properly tag 'the fast fox'" do
5
- a = PartOfSpeech.analyze('the fast fox')
5
+ a = PartOfSpeech.analyze(lexicon_path, text_sample)
6
6
  a[0][1].should == "DT"
7
- a[1][1].should == "RB"
7
+ a[1][1].should == "JJ"
8
8
  a[2][1].should == "NN"
9
9
  end
10
10
  end
data/spec/spec_helper.rb CHANGED
@@ -4,6 +4,15 @@ require 'part_of_speech'
4
4
  require 'spec'
5
5
  require 'spec/autorun'
6
6
 
7
+ def text_sample
8
+ "This regular expression is read in the following manner: Zero or more adjectives or nouns, followed by an option group of a noun and a preposition, followed again by zero or more adjectives or nouns, followed by a single noun. A sequence of tags matching this pattern ensures that the corresponding words make up a noun phrase.
9
+ In addition to simply pulling out the phrases, it is common to do some simple post processing to link variants together (For example, unpluralizing plural variants)."
10
+ end
11
+
12
+ def lexicon_path
13
+ File.expand_path(File.dirname(__FILE__) + '/files/lexicon.txt')
14
+ end
15
+
7
16
  Spec::Runner.configure do |config|
8
17
 
9
18
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: part_of_speech
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.0
4
+ version: 0.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - reddavis
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2010-03-01 00:00:00 +00:00
12
+ date: 2010-03-02 00:00:00 +00:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -38,9 +38,9 @@ files:
38
38
  - README.rdoc
39
39
  - Rakefile
40
40
  - VERSION
41
- - lib/corpus/lexicon.txt
42
41
  - lib/part_of_speech.rb
43
42
  - part_of_speech.gemspec
43
+ - spec/files/lexicon.txt
44
44
  - spec/part_of_speech_spec.rb
45
45
  - spec/spec.opts
46
46
  - spec/spec_helper.rb