rbtagger 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.txt +1 -3
 - data/lib/brill/tagger.rb +3 -0
 - data/lib/rbtagger/version.rb +1 -1
 - data/test/test_rule_tagger.rb +5 -0
 - data/website/index.html +6 -5
 - data/website/index.txt +4 -3
 - metadata +1 -1
 
    
        data/README.txt
    CHANGED
    
    | 
         @@ -19,9 +19,7 @@ This software is made available under the MIT License, see LICENSE 
     | 
|
| 
       19 
19 
     | 
    
         | 
| 
       20 
20 
     | 
    
         
             
            == SYNOPSIS:
         
     | 
| 
       21 
21 
     | 
    
         | 
| 
       22 
     | 
    
         
            -
             tagger = Brill::Tagger.new 
     | 
| 
       23 
     | 
    
         
            -
                                         File.join(File.dirname(__FILE__),"LEXICALRULEFILE"),
         
     | 
| 
       24 
     | 
    
         
            -
                                         File.join(File.dirname(__FILE__),"CONTEXTUALRULEFILE") )
         
     | 
| 
      
 22 
     | 
    
         
            +
             tagger = Brill::Tagger.new
         
     | 
| 
       25 
23 
     | 
    
         | 
| 
       26 
24 
     | 
    
         
             
            == INSTALL:
         
     | 
| 
       27 
25 
     | 
    
         | 
    
        data/lib/brill/tagger.rb
    CHANGED
    
    | 
         @@ -18,10 +18,12 @@ module Brill 
     | 
|
| 
       18 
18 
     | 
    
         | 
| 
       19 
19 
     | 
    
         
             
                # given a body of text return a list of adjectives
         
     | 
| 
       20 
20 
     | 
    
         
             
                def adjectives( text )
         
     | 
| 
      
 21 
     | 
    
         
            +
                  tag(text).select{|t| t.last == 'JJ' }
         
     | 
| 
       21 
22 
     | 
    
         
             
                end
         
     | 
| 
       22 
23 
     | 
    
         | 
| 
       23 
24 
     | 
    
         
             
                # given a body of text return a list of nouns
         
     | 
| 
       24 
25 
     | 
    
         
             
                def nouns( text )
         
     | 
| 
      
 26 
     | 
    
         
            +
                  tag(text).select{|t| t.last.match(/NN/) }
         
     | 
| 
       25 
27 
     | 
    
         
             
                end
         
     | 
| 
       26 
28 
     | 
    
         | 
| 
       27 
29 
     | 
    
         
             
                # returns similar results as tag, but further reduced by only selecting nouns
         
     | 
| 
         @@ -92,6 +94,7 @@ module Brill 
     | 
|
| 
       92 
94 
     | 
    
         
             
                # returns an array like [[token,tag],[token,tag]...[token,tag]] 
         
     | 
| 
       93 
95 
     | 
    
         
             
                #
         
     | 
| 
       94 
96 
     | 
    
         
             
                def tag( text )
         
     | 
| 
      
 97 
     | 
    
         
            +
                  # XXX: the list of contractions is much larger then this... find'em
         
     | 
| 
       95 
98 
     | 
    
         
             
                  text = text.gsub(/dont/,"don't").gsub(/Dont/,"Don't")
         
     | 
| 
       96 
99 
     | 
    
         
             
                  text = text.gsub(/youre/,"you're")
         
     | 
| 
       97 
100 
     | 
    
         
             
                  tokens = Brill::Tagger.tokenize( text )
         
     | 
    
        data/lib/rbtagger/version.rb
    CHANGED
    
    
    
        data/test/test_rule_tagger.rb
    CHANGED
    
    | 
         @@ -139,6 +139,11 @@ TMZ.com: Britney celebrated getting overnights with her kids by going on a wild 
     | 
|
| 
       139 
139 
     | 
    
         
             
                puts results.inspect
         
     | 
| 
       140 
140 
     | 
    
         
             
              end
         
     | 
| 
       141 
141 
     | 
    
         | 
| 
      
 142 
     | 
    
         
            +
              def test_adjectives
         
     | 
| 
      
 143 
     | 
    
         
            +
                results = tagger.adjectives("So happy i get to bring my baby boy home tomorrow. Hospital tv is horrible, ten channels no one watches")
         
     | 
| 
      
 144 
     | 
    
         
            +
                assert_equal [["happy", "JJ"], ["horrible", "JJ"]], results
         
     | 
| 
      
 145 
     | 
    
         
            +
              end
         
     | 
| 
      
 146 
     | 
    
         
            +
             
     | 
| 
       142 
147 
     | 
    
         
             
            private
         
     | 
| 
       143 
148 
     | 
    
         
             
              def tagger
         
     | 
| 
       144 
149 
     | 
    
         
             
                $rtagger
         
     | 
    
        data/website/index.html
    CHANGED
    
    | 
         @@ -160,7 +160,7 @@ 
     | 
|
| 
       160 
160 
     | 
    
         
             
                <h1>rbtagger</h1>
         
     | 
| 
       161 
161 
     | 
    
         
             
                <div id="version" class="clickable" onclick='document.location = "http://rubyforge.org/projects/rbtagger"; return false'>
         
     | 
| 
       162 
162 
     | 
    
         
             
                  <p>Get Version</p>
         
     | 
| 
       163 
     | 
    
         
            -
                  <a href="http://rubyforge.org/projects/rbtagger" class="numbers">0.3. 
     | 
| 
      
 163 
     | 
    
         
            +
                  <a href="http://rubyforge.org/projects/rbtagger" class="numbers">0.3.1</a>
         
     | 
| 
       164 
164 
     | 
    
         
             
                </div>
         
     | 
| 
       165 
165 
     | 
    
         
             
                <h4 style="float:right;padding-right:10px;"> &#x2192; ‘rbtagger’</h4>
         
     | 
| 
       166 
166 
     | 
    
         
             
            <h2>What</h2>
         
     | 
| 
         @@ -175,15 +175,16 @@ gem install rbtagger 
     | 
|
| 
       175 
175 
     | 
    
         
             
            <p><pre class='syntax'>
         
     | 
| 
       176 
176 
     | 
    
         
             
            <span class="ident">require</span> <span class="punct">'</span><span class="string">rbtagger</span><span class="punct">'</span>
         
     | 
| 
       177 
177 
     | 
    
         | 
| 
       178 
     | 
    
         
            -
            <span class="ident">tagger</span> <span class="punct">=</span> <span class="constant">Brill</span><span class="punct">::</span><span class="constant">Tagger</span><span class="punct">.</span><span class="ident">new</span 
     | 
| 
       179 
     | 
    
         
            -
                                        <span class="constant">File</span><span class="punct">.</span><span class="ident">join</span><span class="punct">(</span><span class="constant">File</span><span class="punct">.</span><span class="ident">dirname</span><span class="punct">(</span><span class="constant">__FILE__</span><span class="punct">),"</span><span class="string">LEXICALRULEFILE</span><span class="punct">"),</span>
         
     | 
| 
       180 
     | 
    
         
            -
                                        <span class="constant">File</span><span class="punct">.</span><span class="ident">join</span><span class="punct">(</span><span class="constant">File</span><span class="punct">.</span><span class="ident">dirname</span><span class="punct">(</span><span class="constant">__FILE__</span><span class="punct">),"</span><span class="string">CONTEXTUALRULEFILE</span><span class="punct">")</span> <span class="punct">)</span>
         
     | 
| 
      
 178 
     | 
    
         
            +
            <span class="ident">tagger</span> <span class="punct">=</span> <span class="constant">Brill</span><span class="punct">::</span><span class="constant">Tagger</span><span class="punct">.</span><span class="ident">new</span>
         
     | 
| 
       181 
179 
     | 
    
         
             
            <span class="ident">docs</span><span class="punct">.</span><span class="ident">each</span> <span class="keyword">do</span><span class="punct">|</span><span class="ident">doc</span><span class="punct">|</span>
         
     | 
| 
       182 
180 
     | 
    
         
             
              <span class="ident">tagger</span><span class="punct">.</span><span class="ident">tag</span><span class="punct">(</span> <span class="constant">File</span><span class="punct">.</span><span class="ident">read</span><span class="punct">(</span> <span class="ident">doc</span> <span class="punct">)</span> <span class="punct">)</span>
         
     | 
| 
       183 
181 
     | 
    
         
             
            <span class="keyword">end</span>
         
     | 
| 
       184 
182 
     | 
    
         | 
| 
       185 
183 
     | 
    
         
             
            <span class="ident">tagger</span><span class="punct">.</span><span class="ident">suggest</span><span class="punct">(</span> <span class="constant">File</span><span class="punct">.</span><span class="ident">read</span><span class="punct">("</span><span class="string">sample.txt</span><span class="punct">")</span> <span class="punct">)</span>
         
     | 
| 
       186 
184 
     | 
    
         
             
            <span class="punct">=></span> <span class="punct">[["</span><span class="string">doctor</span><span class="punct">",</span> <span class="punct">"</span><span class="string">NN</span><span class="punct">",</span> <span class="number">3</span><span class="punct">],</span> <span class="punct">["</span><span class="string">treatment</span><span class="punct">",</span> <span class="punct">"</span><span class="string">NN</span><span class="punct">",</span> <span class="number">5</span><span class="punct">]]</span>
         
     | 
| 
      
 185 
     | 
    
         
            +
             
     | 
| 
      
 186 
     | 
    
         
            +
            <span class="ident">tagger</span><span class="punct">.</span><span class="ident">nouns</span>
         
     | 
| 
      
 187 
     | 
    
         
            +
            <span class="ident">tagger</span><span class="punct">.</span><span class="ident">adjectives</span>
         
     | 
| 
       187 
188 
     | 
    
         
             
            </pre></p>
         
     | 
| 
       188 
189 
     | 
    
         
             
            <h4>Using the word tagger</h4>
         
     | 
| 
       189 
190 
     | 
    
         
             
            <p><pre class='syntax'>
         
     | 
| 
         @@ -210,7 +211,7 @@ rake install_gem</pre> 
     | 
|
| 
       210 
211 
     | 
    
         
             
            <h2>Contact</h2>
         
     | 
| 
       211 
212 
     | 
    
         
             
            <p>Comments are welcome. Send an email to <a href="mailto:rb-brill-tagger@googlegroups.com">Todd A. Fisher</a> email via the <a href="http://groups.google.com/group/rb-brill-tagger">forum</a></p>
         
     | 
| 
       212 
213 
     | 
    
         
             
                <p class="coda">
         
     | 
| 
       213 
     | 
    
         
            -
                  <a href="http://xullicious.blogspot.com/">Todd A. Fisher</a>,  
     | 
| 
      
 214 
     | 
    
         
            +
                  <a href="http://xullicious.blogspot.com/">Todd A. Fisher</a>, 21st May 2009<br>
         
     | 
| 
       214 
215 
     | 
    
         
             
                  Theme extended from <a href="http://rb2js.rubyforge.org/">Paul Battley</a>
         
     | 
| 
       215 
216 
     | 
    
         
             
                </p>
         
     | 
| 
       216 
217 
     | 
    
         
             
            </div>
         
     | 
    
        data/website/index.txt
    CHANGED
    
    | 
         @@ -21,15 +21,16 @@ h2. The basics 
     | 
|
| 
       21 
21 
     | 
    
         
             
            <pre syntax="ruby">
         
     | 
| 
       22 
22 
     | 
    
         
             
            require 'rbtagger'
         
     | 
| 
       23 
23 
     | 
    
         | 
| 
       24 
     | 
    
         
            -
            tagger = Brill::Tagger.new 
     | 
| 
       25 
     | 
    
         
            -
                                        File.join(File.dirname(__FILE__),"LEXICALRULEFILE"),
         
     | 
| 
       26 
     | 
    
         
            -
                                        File.join(File.dirname(__FILE__),"CONTEXTUALRULEFILE") )
         
     | 
| 
      
 24 
     | 
    
         
            +
            tagger = Brill::Tagger.new
         
     | 
| 
       27 
25 
     | 
    
         
             
            docs.each do|doc|
         
     | 
| 
       28 
26 
     | 
    
         
             
              tagger.tag( File.read( doc ) )
         
     | 
| 
       29 
27 
     | 
    
         
             
            end
         
     | 
| 
       30 
28 
     | 
    
         | 
| 
       31 
29 
     | 
    
         
             
            tagger.suggest( File.read("sample.txt") )
         
     | 
| 
       32 
30 
     | 
    
         
             
            => [["doctor", "NN", 3], ["treatment", "NN", 5]]
         
     | 
| 
      
 31 
     | 
    
         
            +
             
     | 
| 
      
 32 
     | 
    
         
            +
            tagger.nouns
         
     | 
| 
      
 33 
     | 
    
         
            +
            tagger.adjectives
         
     | 
| 
       33 
34 
     | 
    
         
             
            </pre>
         
     | 
| 
       34 
35 
     | 
    
         | 
| 
       35 
36 
     | 
    
         
             
            <h4>Using the word tagger</h4>
         
     |