rbtagger 0.3.0 → 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.txt CHANGED
@@ -19,9 +19,7 @@ This software is made available under the MIT License, see LICENSE
19
19
 
20
20
  == SYNOPSIS:
21
21
 
22
- tagger = Brill::Tagger.new( File.join(File.dirname(__FILE__),"LEXICON"),
23
- File.join(File.dirname(__FILE__),"LEXICALRULEFILE"),
24
- File.join(File.dirname(__FILE__),"CONTEXTUALRULEFILE") )
22
+ tagger = Brill::Tagger.new
25
23
 
26
24
  == INSTALL:
27
25
 
data/lib/brill/tagger.rb CHANGED
@@ -18,10 +18,12 @@ module Brill
18
18
 
19
19
  # given a body of text return a list of adjectives
20
20
  def adjectives( text )
21
+ tag(text).select{|t| t.last == 'JJ' }
21
22
  end
22
23
 
23
24
  # given a body of text return a list of nouns
24
25
  def nouns( text )
26
+ tag(text).select{|t| t.last.match(/NN/) }
25
27
  end
26
28
 
27
29
  # returns similar results as tag, but further reduced by only selecting nouns
@@ -92,6 +94,7 @@ module Brill
92
94
  # returns an array like [[token,tag],[token,tag]...[token,tag]]
93
95
  #
94
96
  def tag( text )
97
+ # XXX: the list of contractions is much larger then this... find'em
95
98
  text = text.gsub(/dont/,"don't").gsub(/Dont/,"Don't")
96
99
  text = text.gsub(/youre/,"you're")
97
100
  tokens = Brill::Tagger.tokenize( text )
@@ -2,7 +2,7 @@ module RbTagger #:nodoc:
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 0
4
4
  MINOR = 3
5
- TINY = 0
5
+ TINY = 1
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
8
8
  end
@@ -139,6 +139,11 @@ TMZ.com: Britney celebrated getting overnights with her kids by going on a wild
139
139
  puts results.inspect
140
140
  end
141
141
 
142
+ def test_adjectives
143
+ results = tagger.adjectives("So happy i get to bring my baby boy home tomorrow. Hospital tv is horrible, ten channels no one watches")
144
+ assert_equal [["happy", "JJ"], ["horrible", "JJ"]], results
145
+ end
146
+
142
147
  private
143
148
  def tagger
144
149
  $rtagger
data/website/index.html CHANGED
@@ -160,7 +160,7 @@
160
160
  <h1>rbtagger</h1>
161
161
  <div id="version" class="clickable" onclick='document.location = "http://rubyforge.org/projects/rbtagger"; return false'>
162
162
  <p>Get Version</p>
163
- <a href="http://rubyforge.org/projects/rbtagger" class="numbers">0.3.0</a>
163
+ <a href="http://rubyforge.org/projects/rbtagger" class="numbers">0.3.1</a>
164
164
  </div>
165
165
  <h4 style="float:right;padding-right:10px;"> &amp;#x2192; &#8216;rbtagger&#8217;</h4>
166
166
  <h2>What</h2>
@@ -175,15 +175,16 @@ gem install rbtagger
175
175
  <p><pre class='syntax'>
176
176
  <span class="ident">require</span> <span class="punct">'</span><span class="string">rbtagger</span><span class="punct">'</span>
177
177
 
178
- <span class="ident">tagger</span> <span class="punct">=</span> <span class="constant">Brill</span><span class="punct">::</span><span class="constant">Tagger</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span> <span class="constant">File</span><span class="punct">.</span><span class="ident">join</span><span class="punct">(</span><span class="constant">File</span><span class="punct">.</span><span class="ident">dirname</span><span class="punct">(</span><span class="constant">__FILE__</span><span class="punct">),&quot;</span><span class="string">LEXICON</span><span class="punct">&quot;),</span>
179
- <span class="constant">File</span><span class="punct">.</span><span class="ident">join</span><span class="punct">(</span><span class="constant">File</span><span class="punct">.</span><span class="ident">dirname</span><span class="punct">(</span><span class="constant">__FILE__</span><span class="punct">),&quot;</span><span class="string">LEXICALRULEFILE</span><span class="punct">&quot;),</span>
180
- <span class="constant">File</span><span class="punct">.</span><span class="ident">join</span><span class="punct">(</span><span class="constant">File</span><span class="punct">.</span><span class="ident">dirname</span><span class="punct">(</span><span class="constant">__FILE__</span><span class="punct">),&quot;</span><span class="string">CONTEXTUALRULEFILE</span><span class="punct">&quot;)</span> <span class="punct">)</span>
178
+ <span class="ident">tagger</span> <span class="punct">=</span> <span class="constant">Brill</span><span class="punct">::</span><span class="constant">Tagger</span><span class="punct">.</span><span class="ident">new</span>
181
179
  <span class="ident">docs</span><span class="punct">.</span><span class="ident">each</span> <span class="keyword">do</span><span class="punct">|</span><span class="ident">doc</span><span class="punct">|</span>
182
180
  <span class="ident">tagger</span><span class="punct">.</span><span class="ident">tag</span><span class="punct">(</span> <span class="constant">File</span><span class="punct">.</span><span class="ident">read</span><span class="punct">(</span> <span class="ident">doc</span> <span class="punct">)</span> <span class="punct">)</span>
183
181
  <span class="keyword">end</span>
184
182
 
185
183
  <span class="ident">tagger</span><span class="punct">.</span><span class="ident">suggest</span><span class="punct">(</span> <span class="constant">File</span><span class="punct">.</span><span class="ident">read</span><span class="punct">(&quot;</span><span class="string">sample.txt</span><span class="punct">&quot;)</span> <span class="punct">)</span>
186
184
  <span class="punct">=&gt;</span> <span class="punct">[[&quot;</span><span class="string">doctor</span><span class="punct">&quot;,</span> <span class="punct">&quot;</span><span class="string">NN</span><span class="punct">&quot;,</span> <span class="number">3</span><span class="punct">],</span> <span class="punct">[&quot;</span><span class="string">treatment</span><span class="punct">&quot;,</span> <span class="punct">&quot;</span><span class="string">NN</span><span class="punct">&quot;,</span> <span class="number">5</span><span class="punct">]]</span>
185
+
186
+ <span class="ident">tagger</span><span class="punct">.</span><span class="ident">nouns</span>
187
+ <span class="ident">tagger</span><span class="punct">.</span><span class="ident">adjectives</span>
187
188
  </pre></p>
188
189
  <h4>Using the word tagger</h4>
189
190
  <p><pre class='syntax'>
@@ -210,7 +211,7 @@ rake install_gem</pre>
210
211
  <h2>Contact</h2>
211
212
  <p>Comments are welcome. Send an email to <a href="mailto:rb-brill-tagger@googlegroups.com">Todd A. Fisher</a> email via the <a href="http://groups.google.com/group/rb-brill-tagger">forum</a></p>
212
213
  <p class="coda">
213
- <a href="http://xullicious.blogspot.com/">Todd A. Fisher</a>, 23rd June 2008<br>
214
+ <a href="http://xullicious.blogspot.com/">Todd A. Fisher</a>, 21st May 2009<br>
214
215
  Theme extended from <a href="http://rb2js.rubyforge.org/">Paul Battley</a>
215
216
  </p>
216
217
  </div>
data/website/index.txt CHANGED
@@ -21,15 +21,16 @@ h2. The basics
21
21
  <pre syntax="ruby">
22
22
  require 'rbtagger'
23
23
 
24
- tagger = Brill::Tagger.new( File.join(File.dirname(__FILE__),"LEXICON"),
25
- File.join(File.dirname(__FILE__),"LEXICALRULEFILE"),
26
- File.join(File.dirname(__FILE__),"CONTEXTUALRULEFILE") )
24
+ tagger = Brill::Tagger.new
27
25
  docs.each do|doc|
28
26
  tagger.tag( File.read( doc ) )
29
27
  end
30
28
 
31
29
  tagger.suggest( File.read("sample.txt") )
32
30
  => [["doctor", "NN", 3], ["treatment", "NN", 5]]
31
+
32
+ tagger.nouns
33
+ tagger.adjectives
33
34
  </pre>
34
35
 
35
36
  <h4>Using the word tagger</h4>
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbtagger
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Todd A. Fisher