rbtagger 0.3.0 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
data/README.txt CHANGED
@@ -19,9 +19,7 @@ This software is made available under the MIT License, see LICENSE
19
19
 
20
20
  == SYNOPSIS:
21
21
 
22
- tagger = Brill::Tagger.new( File.join(File.dirname(__FILE__),"LEXICON"),
23
- File.join(File.dirname(__FILE__),"LEXICALRULEFILE"),
24
- File.join(File.dirname(__FILE__),"CONTEXTUALRULEFILE") )
22
+ tagger = Brill::Tagger.new
25
23
 
26
24
  == INSTALL:
27
25
 
data/lib/brill/tagger.rb CHANGED
@@ -18,10 +18,12 @@ module Brill
18
18
 
19
19
  # given a body of text return a list of adjectives
20
20
  def adjectives( text )
21
+ tag(text).select{|t| t.last == 'JJ' }
21
22
  end
22
23
 
23
24
  # given a body of text return a list of nouns
24
25
  def nouns( text )
26
+ tag(text).select{|t| t.last.match(/NN/) }
25
27
  end
26
28
 
27
29
  # returns similar results as tag, but further reduced by only selecting nouns
@@ -92,6 +94,7 @@ module Brill
92
94
  # returns an array like [[token,tag],[token,tag]...[token,tag]]
93
95
  #
94
96
  def tag( text )
97
+ # XXX: the list of contractions is much larger then this... find'em
95
98
  text = text.gsub(/dont/,"don't").gsub(/Dont/,"Don't")
96
99
  text = text.gsub(/youre/,"you're")
97
100
  tokens = Brill::Tagger.tokenize( text )
@@ -2,7 +2,7 @@ module RbTagger #:nodoc:
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 0
4
4
  MINOR = 3
5
- TINY = 0
5
+ TINY = 1
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
8
8
  end
@@ -139,6 +139,11 @@ TMZ.com: Britney celebrated getting overnights with her kids by going on a wild
139
139
  puts results.inspect
140
140
  end
141
141
 
142
+ def test_adjectives
143
+ results = tagger.adjectives("So happy i get to bring my baby boy home tomorrow. Hospital tv is horrible, ten channels no one watches")
144
+ assert_equal [["happy", "JJ"], ["horrible", "JJ"]], results
145
+ end
146
+
142
147
  private
143
148
  def tagger
144
149
  $rtagger
data/website/index.html CHANGED
@@ -160,7 +160,7 @@
160
160
  <h1>rbtagger</h1>
161
161
  <div id="version" class="clickable" onclick='document.location = "http://rubyforge.org/projects/rbtagger"; return false'>
162
162
  <p>Get Version</p>
163
- <a href="http://rubyforge.org/projects/rbtagger" class="numbers">0.3.0</a>
163
+ <a href="http://rubyforge.org/projects/rbtagger" class="numbers">0.3.1</a>
164
164
  </div>
165
165
  <h4 style="float:right;padding-right:10px;"> &amp;#x2192; &#8216;rbtagger&#8217;</h4>
166
166
  <h2>What</h2>
@@ -175,15 +175,16 @@ gem install rbtagger
175
175
  <p><pre class='syntax'>
176
176
  <span class="ident">require</span> <span class="punct">'</span><span class="string">rbtagger</span><span class="punct">'</span>
177
177
 
178
- <span class="ident">tagger</span> <span class="punct">=</span> <span class="constant">Brill</span><span class="punct">::</span><span class="constant">Tagger</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span> <span class="constant">File</span><span class="punct">.</span><span class="ident">join</span><span class="punct">(</span><span class="constant">File</span><span class="punct">.</span><span class="ident">dirname</span><span class="punct">(</span><span class="constant">__FILE__</span><span class="punct">),&quot;</span><span class="string">LEXICON</span><span class="punct">&quot;),</span>
179
- <span class="constant">File</span><span class="punct">.</span><span class="ident">join</span><span class="punct">(</span><span class="constant">File</span><span class="punct">.</span><span class="ident">dirname</span><span class="punct">(</span><span class="constant">__FILE__</span><span class="punct">),&quot;</span><span class="string">LEXICALRULEFILE</span><span class="punct">&quot;),</span>
180
- <span class="constant">File</span><span class="punct">.</span><span class="ident">join</span><span class="punct">(</span><span class="constant">File</span><span class="punct">.</span><span class="ident">dirname</span><span class="punct">(</span><span class="constant">__FILE__</span><span class="punct">),&quot;</span><span class="string">CONTEXTUALRULEFILE</span><span class="punct">&quot;)</span> <span class="punct">)</span>
178
+ <span class="ident">tagger</span> <span class="punct">=</span> <span class="constant">Brill</span><span class="punct">::</span><span class="constant">Tagger</span><span class="punct">.</span><span class="ident">new</span>
181
179
  <span class="ident">docs</span><span class="punct">.</span><span class="ident">each</span> <span class="keyword">do</span><span class="punct">|</span><span class="ident">doc</span><span class="punct">|</span>
182
180
  <span class="ident">tagger</span><span class="punct">.</span><span class="ident">tag</span><span class="punct">(</span> <span class="constant">File</span><span class="punct">.</span><span class="ident">read</span><span class="punct">(</span> <span class="ident">doc</span> <span class="punct">)</span> <span class="punct">)</span>
183
181
  <span class="keyword">end</span>
184
182
 
185
183
  <span class="ident">tagger</span><span class="punct">.</span><span class="ident">suggest</span><span class="punct">(</span> <span class="constant">File</span><span class="punct">.</span><span class="ident">read</span><span class="punct">(&quot;</span><span class="string">sample.txt</span><span class="punct">&quot;)</span> <span class="punct">)</span>
186
184
  <span class="punct">=&gt;</span> <span class="punct">[[&quot;</span><span class="string">doctor</span><span class="punct">&quot;,</span> <span class="punct">&quot;</span><span class="string">NN</span><span class="punct">&quot;,</span> <span class="number">3</span><span class="punct">],</span> <span class="punct">[&quot;</span><span class="string">treatment</span><span class="punct">&quot;,</span> <span class="punct">&quot;</span><span class="string">NN</span><span class="punct">&quot;,</span> <span class="number">5</span><span class="punct">]]</span>
185
+
186
+ <span class="ident">tagger</span><span class="punct">.</span><span class="ident">nouns</span>
187
+ <span class="ident">tagger</span><span class="punct">.</span><span class="ident">adjectives</span>
187
188
  </pre></p>
188
189
  <h4>Using the word tagger</h4>
189
190
  <p><pre class='syntax'>
@@ -210,7 +211,7 @@ rake install_gem</pre>
210
211
  <h2>Contact</h2>
211
212
  <p>Comments are welcome. Send an email to <a href="mailto:rb-brill-tagger@googlegroups.com">Todd A. Fisher</a> email via the <a href="http://groups.google.com/group/rb-brill-tagger">forum</a></p>
212
213
  <p class="coda">
213
- <a href="http://xullicious.blogspot.com/">Todd A. Fisher</a>, 23rd June 2008<br>
214
+ <a href="http://xullicious.blogspot.com/">Todd A. Fisher</a>, 21st May 2009<br>
214
215
  Theme extended from <a href="http://rb2js.rubyforge.org/">Paul Battley</a>
215
216
  </p>
216
217
  </div>
data/website/index.txt CHANGED
@@ -21,15 +21,16 @@ h2. The basics
21
21
  <pre syntax="ruby">
22
22
  require 'rbtagger'
23
23
 
24
- tagger = Brill::Tagger.new( File.join(File.dirname(__FILE__),"LEXICON"),
25
- File.join(File.dirname(__FILE__),"LEXICALRULEFILE"),
26
- File.join(File.dirname(__FILE__),"CONTEXTUALRULEFILE") )
24
+ tagger = Brill::Tagger.new
27
25
  docs.each do|doc|
28
26
  tagger.tag( File.read( doc ) )
29
27
  end
30
28
 
31
29
  tagger.suggest( File.read("sample.txt") )
32
30
  => [["doctor", "NN", 3], ["treatment", "NN", 5]]
31
+
32
+ tagger.nouns
33
+ tagger.adjectives
33
34
  </pre>
34
35
 
35
36
  <h4>Using the word tagger</h4>
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbtagger
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Todd A. Fisher