rbtagger 0.3.0 → 0.3.1
Sign up to get free protection for your applications and to get access to all the features.
- data/README.txt +1 -3
- data/lib/brill/tagger.rb +3 -0
- data/lib/rbtagger/version.rb +1 -1
- data/test/test_rule_tagger.rb +5 -0
- data/website/index.html +6 -5
- data/website/index.txt +4 -3
- metadata +1 -1
data/README.txt
CHANGED
@@ -19,9 +19,7 @@ This software is made available under the MIT License, see LICENSE
|
|
19
19
|
|
20
20
|
== SYNOPSIS:
|
21
21
|
|
22
|
-
tagger = Brill::Tagger.new
|
23
|
-
File.join(File.dirname(__FILE__),"LEXICALRULEFILE"),
|
24
|
-
File.join(File.dirname(__FILE__),"CONTEXTUALRULEFILE") )
|
22
|
+
tagger = Brill::Tagger.new
|
25
23
|
|
26
24
|
== INSTALL:
|
27
25
|
|
data/lib/brill/tagger.rb
CHANGED
@@ -18,10 +18,12 @@ module Brill
|
|
18
18
|
|
19
19
|
# given a body of text return a list of adjectives
|
20
20
|
def adjectives( text )
|
21
|
+
tag(text).select{|t| t.last == 'JJ' }
|
21
22
|
end
|
22
23
|
|
23
24
|
# given a body of text return a list of nouns
|
24
25
|
def nouns( text )
|
26
|
+
tag(text).select{|t| t.last.match(/NN/) }
|
25
27
|
end
|
26
28
|
|
27
29
|
# returns similar results as tag, but further reduced by only selecting nouns
|
@@ -92,6 +94,7 @@ module Brill
|
|
92
94
|
# returns an array like [[token,tag],[token,tag]...[token,tag]]
|
93
95
|
#
|
94
96
|
def tag( text )
|
97
|
+
# XXX: the list of contractions is much larger then this... find'em
|
95
98
|
text = text.gsub(/dont/,"don't").gsub(/Dont/,"Don't")
|
96
99
|
text = text.gsub(/youre/,"you're")
|
97
100
|
tokens = Brill::Tagger.tokenize( text )
|
data/lib/rbtagger/version.rb
CHANGED
data/test/test_rule_tagger.rb
CHANGED
@@ -139,6 +139,11 @@ TMZ.com: Britney celebrated getting overnights with her kids by going on a wild
|
|
139
139
|
puts results.inspect
|
140
140
|
end
|
141
141
|
|
142
|
+
def test_adjectives
|
143
|
+
results = tagger.adjectives("So happy i get to bring my baby boy home tomorrow. Hospital tv is horrible, ten channels no one watches")
|
144
|
+
assert_equal [["happy", "JJ"], ["horrible", "JJ"]], results
|
145
|
+
end
|
146
|
+
|
142
147
|
private
|
143
148
|
def tagger
|
144
149
|
$rtagger
|
data/website/index.html
CHANGED
@@ -160,7 +160,7 @@
|
|
160
160
|
<h1>rbtagger</h1>
|
161
161
|
<div id="version" class="clickable" onclick='document.location = "http://rubyforge.org/projects/rbtagger"; return false'>
|
162
162
|
<p>Get Version</p>
|
163
|
-
<a href="http://rubyforge.org/projects/rbtagger" class="numbers">0.3.
|
163
|
+
<a href="http://rubyforge.org/projects/rbtagger" class="numbers">0.3.1</a>
|
164
164
|
</div>
|
165
165
|
<h4 style="float:right;padding-right:10px;"> &#x2192; ‘rbtagger’</h4>
|
166
166
|
<h2>What</h2>
|
@@ -175,15 +175,16 @@ gem install rbtagger
|
|
175
175
|
<p><pre class='syntax'>
|
176
176
|
<span class="ident">require</span> <span class="punct">'</span><span class="string">rbtagger</span><span class="punct">'</span>
|
177
177
|
|
178
|
-
<span class="ident">tagger</span> <span class="punct">=</span> <span class="constant">Brill</span><span class="punct">::</span><span class="constant">Tagger</span><span class="punct">.</span><span class="ident">new</span
|
179
|
-
<span class="constant">File</span><span class="punct">.</span><span class="ident">join</span><span class="punct">(</span><span class="constant">File</span><span class="punct">.</span><span class="ident">dirname</span><span class="punct">(</span><span class="constant">__FILE__</span><span class="punct">),"</span><span class="string">LEXICALRULEFILE</span><span class="punct">"),</span>
|
180
|
-
<span class="constant">File</span><span class="punct">.</span><span class="ident">join</span><span class="punct">(</span><span class="constant">File</span><span class="punct">.</span><span class="ident">dirname</span><span class="punct">(</span><span class="constant">__FILE__</span><span class="punct">),"</span><span class="string">CONTEXTUALRULEFILE</span><span class="punct">")</span> <span class="punct">)</span>
|
178
|
+
<span class="ident">tagger</span> <span class="punct">=</span> <span class="constant">Brill</span><span class="punct">::</span><span class="constant">Tagger</span><span class="punct">.</span><span class="ident">new</span>
|
181
179
|
<span class="ident">docs</span><span class="punct">.</span><span class="ident">each</span> <span class="keyword">do</span><span class="punct">|</span><span class="ident">doc</span><span class="punct">|</span>
|
182
180
|
<span class="ident">tagger</span><span class="punct">.</span><span class="ident">tag</span><span class="punct">(</span> <span class="constant">File</span><span class="punct">.</span><span class="ident">read</span><span class="punct">(</span> <span class="ident">doc</span> <span class="punct">)</span> <span class="punct">)</span>
|
183
181
|
<span class="keyword">end</span>
|
184
182
|
|
185
183
|
<span class="ident">tagger</span><span class="punct">.</span><span class="ident">suggest</span><span class="punct">(</span> <span class="constant">File</span><span class="punct">.</span><span class="ident">read</span><span class="punct">("</span><span class="string">sample.txt</span><span class="punct">")</span> <span class="punct">)</span>
|
186
184
|
<span class="punct">=></span> <span class="punct">[["</span><span class="string">doctor</span><span class="punct">",</span> <span class="punct">"</span><span class="string">NN</span><span class="punct">",</span> <span class="number">3</span><span class="punct">],</span> <span class="punct">["</span><span class="string">treatment</span><span class="punct">",</span> <span class="punct">"</span><span class="string">NN</span><span class="punct">",</span> <span class="number">5</span><span class="punct">]]</span>
|
185
|
+
|
186
|
+
<span class="ident">tagger</span><span class="punct">.</span><span class="ident">nouns</span>
|
187
|
+
<span class="ident">tagger</span><span class="punct">.</span><span class="ident">adjectives</span>
|
187
188
|
</pre></p>
|
188
189
|
<h4>Using the word tagger</h4>
|
189
190
|
<p><pre class='syntax'>
|
@@ -210,7 +211,7 @@ rake install_gem</pre>
|
|
210
211
|
<h2>Contact</h2>
|
211
212
|
<p>Comments are welcome. Send an email to <a href="mailto:rb-brill-tagger@googlegroups.com">Todd A. Fisher</a> email via the <a href="http://groups.google.com/group/rb-brill-tagger">forum</a></p>
|
212
213
|
<p class="coda">
|
213
|
-
<a href="http://xullicious.blogspot.com/">Todd A. Fisher</a>,
|
214
|
+
<a href="http://xullicious.blogspot.com/">Todd A. Fisher</a>, 21st May 2009<br>
|
214
215
|
Theme extended from <a href="http://rb2js.rubyforge.org/">Paul Battley</a>
|
215
216
|
</p>
|
216
217
|
</div>
|
data/website/index.txt
CHANGED
@@ -21,15 +21,16 @@ h2. The basics
|
|
21
21
|
<pre syntax="ruby">
|
22
22
|
require 'rbtagger'
|
23
23
|
|
24
|
-
tagger = Brill::Tagger.new
|
25
|
-
File.join(File.dirname(__FILE__),"LEXICALRULEFILE"),
|
26
|
-
File.join(File.dirname(__FILE__),"CONTEXTUALRULEFILE") )
|
24
|
+
tagger = Brill::Tagger.new
|
27
25
|
docs.each do|doc|
|
28
26
|
tagger.tag( File.read( doc ) )
|
29
27
|
end
|
30
28
|
|
31
29
|
tagger.suggest( File.read("sample.txt") )
|
32
30
|
=> [["doctor", "NN", 3], ["treatment", "NN", 5]]
|
31
|
+
|
32
|
+
tagger.nouns
|
33
|
+
tagger.adjectives
|
33
34
|
</pre>
|
34
35
|
|
35
36
|
<h4>Using the word tagger</h4>
|