rbtagger 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.txt +1 -3
- data/lib/brill/tagger.rb +3 -0
- data/lib/rbtagger/version.rb +1 -1
- data/test/test_rule_tagger.rb +5 -0
- data/website/index.html +6 -5
- data/website/index.txt +4 -3
- metadata +1 -1
data/README.txt
CHANGED
@@ -19,9 +19,7 @@ This software is made available under the MIT License, see LICENSE
|
|
19
19
|
|
20
20
|
== SYNOPSIS:
|
21
21
|
|
22
|
-
tagger = Brill::Tagger.new
|
23
|
-
File.join(File.dirname(__FILE__),"LEXICALRULEFILE"),
|
24
|
-
File.join(File.dirname(__FILE__),"CONTEXTUALRULEFILE") )
|
22
|
+
tagger = Brill::Tagger.new
|
25
23
|
|
26
24
|
== INSTALL:
|
27
25
|
|
data/lib/brill/tagger.rb
CHANGED
@@ -18,10 +18,12 @@ module Brill
|
|
18
18
|
|
19
19
|
# given a body of text return a list of adjectives
|
20
20
|
def adjectives( text )
|
21
|
+
tag(text).select{|t| t.last == 'JJ' }
|
21
22
|
end
|
22
23
|
|
23
24
|
# given a body of text return a list of nouns
|
24
25
|
def nouns( text )
|
26
|
+
tag(text).select{|t| t.last.match(/NN/) }
|
25
27
|
end
|
26
28
|
|
27
29
|
# returns similar results as tag, but further reduced by only selecting nouns
|
@@ -92,6 +94,7 @@ module Brill
|
|
92
94
|
# returns an array like [[token,tag],[token,tag]...[token,tag]]
|
93
95
|
#
|
94
96
|
def tag( text )
|
97
|
+
# XXX: the list of contractions is much larger then this... find'em
|
95
98
|
text = text.gsub(/dont/,"don't").gsub(/Dont/,"Don't")
|
96
99
|
text = text.gsub(/youre/,"you're")
|
97
100
|
tokens = Brill::Tagger.tokenize( text )
|
data/lib/rbtagger/version.rb
CHANGED
data/test/test_rule_tagger.rb
CHANGED
@@ -139,6 +139,11 @@ TMZ.com: Britney celebrated getting overnights with her kids by going on a wild
|
|
139
139
|
puts results.inspect
|
140
140
|
end
|
141
141
|
|
142
|
+
def test_adjectives
|
143
|
+
results = tagger.adjectives("So happy i get to bring my baby boy home tomorrow. Hospital tv is horrible, ten channels no one watches")
|
144
|
+
assert_equal [["happy", "JJ"], ["horrible", "JJ"]], results
|
145
|
+
end
|
146
|
+
|
142
147
|
private
|
143
148
|
def tagger
|
144
149
|
$rtagger
|
data/website/index.html
CHANGED
@@ -160,7 +160,7 @@
|
|
160
160
|
<h1>rbtagger</h1>
|
161
161
|
<div id="version" class="clickable" onclick='document.location = "http://rubyforge.org/projects/rbtagger"; return false'>
|
162
162
|
<p>Get Version</p>
|
163
|
-
<a href="http://rubyforge.org/projects/rbtagger" class="numbers">0.3.
|
163
|
+
<a href="http://rubyforge.org/projects/rbtagger" class="numbers">0.3.1</a>
|
164
164
|
</div>
|
165
165
|
<h4 style="float:right;padding-right:10px;"> &#x2192; ‘rbtagger’</h4>
|
166
166
|
<h2>What</h2>
|
@@ -175,15 +175,16 @@ gem install rbtagger
|
|
175
175
|
<p><pre class='syntax'>
|
176
176
|
<span class="ident">require</span> <span class="punct">'</span><span class="string">rbtagger</span><span class="punct">'</span>
|
177
177
|
|
178
|
-
<span class="ident">tagger</span> <span class="punct">=</span> <span class="constant">Brill</span><span class="punct">::</span><span class="constant">Tagger</span><span class="punct">.</span><span class="ident">new</span
|
179
|
-
<span class="constant">File</span><span class="punct">.</span><span class="ident">join</span><span class="punct">(</span><span class="constant">File</span><span class="punct">.</span><span class="ident">dirname</span><span class="punct">(</span><span class="constant">__FILE__</span><span class="punct">),"</span><span class="string">LEXICALRULEFILE</span><span class="punct">"),</span>
|
180
|
-
<span class="constant">File</span><span class="punct">.</span><span class="ident">join</span><span class="punct">(</span><span class="constant">File</span><span class="punct">.</span><span class="ident">dirname</span><span class="punct">(</span><span class="constant">__FILE__</span><span class="punct">),"</span><span class="string">CONTEXTUALRULEFILE</span><span class="punct">")</span> <span class="punct">)</span>
|
178
|
+
<span class="ident">tagger</span> <span class="punct">=</span> <span class="constant">Brill</span><span class="punct">::</span><span class="constant">Tagger</span><span class="punct">.</span><span class="ident">new</span>
|
181
179
|
<span class="ident">docs</span><span class="punct">.</span><span class="ident">each</span> <span class="keyword">do</span><span class="punct">|</span><span class="ident">doc</span><span class="punct">|</span>
|
182
180
|
<span class="ident">tagger</span><span class="punct">.</span><span class="ident">tag</span><span class="punct">(</span> <span class="constant">File</span><span class="punct">.</span><span class="ident">read</span><span class="punct">(</span> <span class="ident">doc</span> <span class="punct">)</span> <span class="punct">)</span>
|
183
181
|
<span class="keyword">end</span>
|
184
182
|
|
185
183
|
<span class="ident">tagger</span><span class="punct">.</span><span class="ident">suggest</span><span class="punct">(</span> <span class="constant">File</span><span class="punct">.</span><span class="ident">read</span><span class="punct">("</span><span class="string">sample.txt</span><span class="punct">")</span> <span class="punct">)</span>
|
186
184
|
<span class="punct">=></span> <span class="punct">[["</span><span class="string">doctor</span><span class="punct">",</span> <span class="punct">"</span><span class="string">NN</span><span class="punct">",</span> <span class="number">3</span><span class="punct">],</span> <span class="punct">["</span><span class="string">treatment</span><span class="punct">",</span> <span class="punct">"</span><span class="string">NN</span><span class="punct">",</span> <span class="number">5</span><span class="punct">]]</span>
|
185
|
+
|
186
|
+
<span class="ident">tagger</span><span class="punct">.</span><span class="ident">nouns</span>
|
187
|
+
<span class="ident">tagger</span><span class="punct">.</span><span class="ident">adjectives</span>
|
187
188
|
</pre></p>
|
188
189
|
<h4>Using the word tagger</h4>
|
189
190
|
<p><pre class='syntax'>
|
@@ -210,7 +211,7 @@ rake install_gem</pre>
|
|
210
211
|
<h2>Contact</h2>
|
211
212
|
<p>Comments are welcome. Send an email to <a href="mailto:rb-brill-tagger@googlegroups.com">Todd A. Fisher</a> email via the <a href="http://groups.google.com/group/rb-brill-tagger">forum</a></p>
|
212
213
|
<p class="coda">
|
213
|
-
<a href="http://xullicious.blogspot.com/">Todd A. Fisher</a>,
|
214
|
+
<a href="http://xullicious.blogspot.com/">Todd A. Fisher</a>, 21st May 2009<br>
|
214
215
|
Theme extended from <a href="http://rb2js.rubyforge.org/">Paul Battley</a>
|
215
216
|
</p>
|
216
217
|
</div>
|
data/website/index.txt
CHANGED
@@ -21,15 +21,16 @@ h2. The basics
|
|
21
21
|
<pre syntax="ruby">
|
22
22
|
require 'rbtagger'
|
23
23
|
|
24
|
-
tagger = Brill::Tagger.new
|
25
|
-
File.join(File.dirname(__FILE__),"LEXICALRULEFILE"),
|
26
|
-
File.join(File.dirname(__FILE__),"CONTEXTUALRULEFILE") )
|
24
|
+
tagger = Brill::Tagger.new
|
27
25
|
docs.each do|doc|
|
28
26
|
tagger.tag( File.read( doc ) )
|
29
27
|
end
|
30
28
|
|
31
29
|
tagger.suggest( File.read("sample.txt") )
|
32
30
|
=> [["doctor", "NN", 3], ["treatment", "NN", 5]]
|
31
|
+
|
32
|
+
tagger.nouns
|
33
|
+
tagger.adjectives
|
33
34
|
</pre>
|
34
35
|
|
35
36
|
<h4>Using the word tagger</h4>
|