part_of_speech 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,131 @@
1
+ class PartOfSpeech
2
+
3
+ class << self
4
+ def analyze(text)
5
+ new.tag(text)
6
+ end
7
+ end
8
+
9
+ # Place corpus into memory
10
+ def initialize
11
+ @lexicons = {}
12
+ File.open(corpus_path).each do |line|
13
+ line = line.split
14
+ @lexicons[line.shift] = line
15
+ end
16
+ end
17
+
18
+ def tag(text)
19
+ @text = text.split(/\s|\.|,|\:|\;|\'/)
20
+
21
+ @pos = []
22
+ @text.each do |word|
23
+ if @lexicons.key?(word) || @lexicons.key?(word.downcase)
24
+ @pos << @lexicons[word][0]
25
+ else
26
+ @pos << "NN"
27
+ end
28
+ end
29
+
30
+ # Apply Transformational rules
31
+ @pos.each_index do |index|
32
+ rule_one(index)
33
+ rule_two(index)
34
+ rule_three(index)
35
+ rule_four(index)
36
+ rule_five(index)
37
+ rule_six(index)
38
+ rule_seven(index)
39
+ rule_eight(index)
40
+ rule_nine(index)
41
+ end
42
+
43
+ # Organize [word, pos]
44
+ results = []
45
+ @text.each_with_index do |word, i|
46
+ results << [word, @pos[i]]
47
+ end
48
+
49
+ results
50
+ end
51
+
52
+ private
53
+
54
+ def rule_one(index)
55
+ ## rule 1: DT, {VBD | VBP} --> DT, NN
56
+ return unless index > 0
57
+ if @pos[index - 1] == "DT" && (@pos[index] == "VBD" || @pos[index] == "VBP" || @pos[index] == "VB")
58
+ @pos[index] = "NN"
59
+ end
60
+ end
61
+
62
+ def rule_two(index)
63
+ ## rule 2: convert a noun to a number (CD) if "." appears in the word
64
+ if @pos[index] =~ /^N/ && @text[index] =~ /\./
65
+ @pos[index] = "CD"
66
+ end
67
+ end
68
+
69
+ def rule_three(index)
70
+ ## rule 3: convert a noun to a past participle if words[i] ends with "ed"
71
+ if @pos[index] =~ /^N/ && @text[index] =~ /ed$/
72
+ @pos[index] = "VBN"
73
+ end
74
+ end
75
+
76
+ def rule_four(index)
77
+ ## rule 4: convert any type to adverb if it ends in "ly"
78
+ if @text[index] =~ /ly$/
79
+ @pos[index] = "RB"
80
+ end
81
+ end
82
+
83
+ def rule_five(index)
84
+ ## rule 5: convert a common noun (NN or NNS) to a adjective if it ends with "al"
85
+ if @pos[index] =~ /^NN/ && @text[index] =~ /al$/
86
+ @pos[index] = "JJ"
87
+ end
88
+ end
89
+
90
+ def rule_six(index)
91
+ ## rule 6: convert a noun to a verb if the preceeding work is "would"
92
+ return unless index > 0
93
+ if @pos[index] =~ /^NN/ && @text[index-1].downcase == "would"
94
+ @pos[index] = "VB"
95
+ end
96
+ end
97
+
98
+ def rule_seven(index)
99
+ # rule 7: if a word has been categorized as a common noun and
100
+ # it ends with "s", then set its type to plural common noun (NNS)
101
+ if @pos[index] == "NN" && @text[index] =~ /s$/
102
+ @pos[index] = "NNS"
103
+ end
104
+ end
105
+
106
+ def rule_eight(index)
107
+ ## rule 8: convert a common noun to a present participle verb (i.e., a gerand)
108
+ if @pos[index] =~ /^NN/ && @text[index] =~ /ing$/
109
+ @pos[index] = "VBG"
110
+ end
111
+ end
112
+
113
+ def rule_nine(index)
114
+ ## rule 9: <noun> <noun 2> --> <noun> <verb> if <noun 2> can also be a verb
115
+ return unless index > 0
116
+
117
+ if @pos[index-1] =~ /^NN/ && @pos[index] =~ /^NN/
118
+ if @lexicon[@text[index]].include?("VBN")
119
+ @pos[index] = "VBN"
120
+ end
121
+ if @lexicon[@text[index]].include?("VBZ")
122
+ @pos[index] = "VBZ"
123
+ end
124
+ end
125
+ end
126
+
127
+ def corpus_path
128
+ File.expand_path(File.dirname(__FILE__) + '/corpus/lexicon.txt')
129
+ end
130
+
131
+ end
@@ -0,0 +1,56 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{part_of_speech}
8
+ s.version = "0.0.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["reddavis"]
12
+ s.date = %q{2010-03-01}
13
+ s.description = %q{Part of speech tagger based off Mark Watsons code}
14
+ s.email = %q{reddavis@gmail.com}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".document",
21
+ ".gitignore",
22
+ "LICENSE",
23
+ "README.rdoc",
24
+ "Rakefile",
25
+ "VERSION",
26
+ "lib/corpus/lexicon.txt",
27
+ "lib/part_of_speech.rb",
28
+ "part_of_speech.gemspec",
29
+ "spec/part_of_speech_spec.rb",
30
+ "spec/spec.opts",
31
+ "spec/spec_helper.rb"
32
+ ]
33
+ s.homepage = %q{http://github.com/reddavis/Part-Of-Speech}
34
+ s.rdoc_options = ["--charset=UTF-8"]
35
+ s.require_paths = ["lib"]
36
+ s.rubygems_version = %q{1.3.5}
37
+ s.summary = %q{Part of speech tagger based off Mark Watsons code}
38
+ s.test_files = [
39
+ "spec/part_of_speech_spec.rb",
40
+ "spec/spec_helper.rb"
41
+ ]
42
+
43
+ if s.respond_to? :specification_version then
44
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
45
+ s.specification_version = 3
46
+
47
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
48
+ s.add_development_dependency(%q<rspec>, [">= 1.2.9"])
49
+ else
50
+ s.add_dependency(%q<rspec>, [">= 1.2.9"])
51
+ end
52
+ else
53
+ s.add_dependency(%q<rspec>, [">= 1.2.9"])
54
+ end
55
+ end
56
+
@@ -0,0 +1,10 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/spec_helper')
2
+
3
+ describe "PartOfSpeech" do
4
+ it "should properly tag 'the fast fox'" do
5
+ a = PartOfSpeech.analyze('the fast fox')
6
+ a[0][1].should == "DT"
7
+ a[1][1].should == "RB"
8
+ a[2][1].should == "NN"
9
+ end
10
+ end
data/spec/spec.opts ADDED
@@ -0,0 +1 @@
1
+ --color
@@ -0,0 +1,9 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+ require 'part_of_speech'
4
+ require 'spec'
5
+ require 'spec/autorun'
6
+
7
+ Spec::Runner.configure do |config|
8
+
9
+ end
metadata ADDED
@@ -0,0 +1,77 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: part_of_speech
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0
5
+ platform: ruby
6
+ authors:
7
+ - reddavis
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2010-03-01 00:00:00 +00:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: rspec
17
+ type: :development
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 1.2.9
24
+ version:
25
+ description: Part of speech tagger based off Mark Watsons code
26
+ email: reddavis@gmail.com
27
+ executables: []
28
+
29
+ extensions: []
30
+
31
+ extra_rdoc_files:
32
+ - LICENSE
33
+ - README.rdoc
34
+ files:
35
+ - .document
36
+ - .gitignore
37
+ - LICENSE
38
+ - README.rdoc
39
+ - Rakefile
40
+ - VERSION
41
+ - lib/corpus/lexicon.txt
42
+ - lib/part_of_speech.rb
43
+ - part_of_speech.gemspec
44
+ - spec/part_of_speech_spec.rb
45
+ - spec/spec.opts
46
+ - spec/spec_helper.rb
47
+ has_rdoc: true
48
+ homepage: http://github.com/reddavis/Part-Of-Speech
49
+ licenses: []
50
+
51
+ post_install_message:
52
+ rdoc_options:
53
+ - --charset=UTF-8
54
+ require_paths:
55
+ - lib
56
+ required_ruby_version: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: "0"
61
+ version:
62
+ required_rubygems_version: !ruby/object:Gem::Requirement
63
+ requirements:
64
+ - - ">="
65
+ - !ruby/object:Gem::Version
66
+ version: "0"
67
+ version:
68
+ requirements: []
69
+
70
+ rubyforge_project:
71
+ rubygems_version: 1.3.5
72
+ signing_key:
73
+ specification_version: 3
74
+ summary: Part of speech tagger based off Mark Watsons code
75
+ test_files:
76
+ - spec/part_of_speech_spec.rb
77
+ - spec/spec_helper.rb