rbtagger 0.0.1 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Manifest.txt +9 -2
 - data/Rakefile +0 -29
 - data/config/hoe.rb +2 -2
 - data/ext/rule_tagger/mkmf.log +46 -0
 - data/ext/word_tagger/mkmf.log +24 -0
 - data/ext/word_tagger/rtagger.cc +2 -2
 - data/ext/word_tagger/tagger.cc +14 -4
 - data/lib/rbtagger/version.rb +2 -2
 - data/lib/rbtagger.rb +8 -1
 - data/lib/word/tagger.rb +18 -0
 - data/script/txt2html +1 -1
 - data/tasks/extconf/rule_tagger.rake +43 -0
 - data/tasks/extconf/word_tagger.rake +43 -0
 - data/tasks/extconf.rake +18 -0
 - data/test/fixtures/tags.txt +976 -0
 - data/test/{tagger_test.rb → test_rule_tagger.rb} +15 -14
 - data/test/test_word_tagger.rb +33 -0
 - data/website/index.html +12 -2
 - data/website/index.txt +11 -1
 - metadata +16 -6
 - data/ext/word_tagger/tagger.rb +0 -8
 
| 
         @@ -1,16 +1,7 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            require ' 
     | 
| 
       2 
     | 
    
         
            -
            $:.unshift File.join(File.dirname(__FILE__), "..", "lib")
         
     | 
| 
       3 
     | 
    
         
            -
            $:.unshift File.join(File.dirname(__FILE__), "..", "ext", "rule_tagger")
         
     | 
| 
      
 1 
     | 
    
         
            +
            require File.dirname(__FILE__) + '/test_helper'
         
     | 
| 
       4 
2 
     | 
    
         | 
| 
       5 
     | 
    
         
            -
            require 'brill/tagger'
         
     | 
| 
       6 
3 
     | 
    
         | 
| 
       7 
     | 
    
         
            -
             
     | 
| 
       8 
     | 
    
         
            -
            $tagger = Brill::Tagger.new( File.join(File.dirname(__FILE__),"LEXICON"),
         
     | 
| 
       9 
     | 
    
         
            -
                                         File.join(File.dirname(__FILE__),"LEXICALRULEFILE"),
         
     | 
| 
       10 
     | 
    
         
            -
                                         File.join(File.dirname(__FILE__),"CONTEXTUALRULEFILE") )
         
     | 
| 
       11 
     | 
    
         
            -
            puts "tagger loaded!"
         
     | 
| 
       12 
     | 
    
         
            -
             
     | 
| 
       13 
     | 
    
         
            -
            class TaggerTest < Test::Unit::TestCase
         
     | 
| 
      
 4 
     | 
    
         
            +
            class TestRuleTagger< Test::Unit::TestCase
         
     | 
| 
       14 
5 
     | 
    
         
             
            SAMPLE_DOC=%q(
         
     | 
| 
       15 
6 
     | 
    
         
             
            Take an active role in your care
         
     | 
| 
       16 
7 
     | 
    
         
             
            When it comes to making decisions about the goals and direction of treatment, don't sit back. Work closely and actively with your oncologist and the rest of your medical team.
         
     | 
| 
         @@ -33,13 +24,23 @@ Allow yourself time to discuss the emotional consequences of your illness and tr 
     | 
|
| 
       33 
24 
     | 
    
         
             
            Stay connected
         
     | 
| 
       34 
25 
     | 
    
         
             
            Although many newly diagnosed patients fear they will not be able to keep working during treatment, this is usually not the case. Working, even at a reduced schedule, helps you maintain valuable social connections and weekly structure.
         
     | 
| 
       35 
26 
     | 
    
         
             
            )
         
     | 
| 
      
 27 
     | 
    
         
            +
              def setup
         
     | 
| 
      
 28 
     | 
    
         
            +
                if !defined?($tagger)
         
     | 
| 
      
 29 
     | 
    
         
            +
                  puts "loading tagger..."
         
     | 
| 
      
 30 
     | 
    
         
            +
                  $rtagger = Brill::Tagger.new( File.join(File.dirname(__FILE__),"LEXICON"),
         
     | 
| 
      
 31 
     | 
    
         
            +
                                               File.join(File.dirname(__FILE__),"LEXICALRULEFILE"),
         
     | 
| 
      
 32 
     | 
    
         
            +
                                               File.join(File.dirname(__FILE__),"CONTEXTUALRULEFILE") )
         
     | 
| 
      
 33 
     | 
    
         
            +
                  puts "tagger loaded!"
         
     | 
| 
      
 34 
     | 
    
         
            +
                end
         
     | 
| 
      
 35 
     | 
    
         
            +
              end
         
     | 
| 
      
 36 
     | 
    
         
            +
             
     | 
| 
       36 
37 
     | 
    
         
             
              def test_simple_tagger
         
     | 
| 
       37 
38 
     | 
    
         
             
                pairs = tagger.tag( SAMPLE_DOC )
         
     | 
| 
       38 
39 
     | 
    
         
             
                assert_equal [["", ")"], ["", ")"], ["Take", "VB"], ["an", "DT"], ["active", "JJ"], ["role", "NN"], ["in", "IN"], ["your", "PRP$"], ["care", "NN"], ["When", "WRB"], ["it", "PRP"], ["comes", "VBZ"], ["to", "TO"], ["making", "VBG"], ["decisions", "NNS"], ["about", "IN"], ["the", "DT"], ["goals", "NNS"], ["and", "CC"], ["direction", "NN"], ["of", "IN"], ["treatment", "NN"], [",", ","], ["", ")"], ["do", "VBP"], ["", ")"], ["n't", "RB"], ["sit", "VB"], ["back.", "CD"], ["Work", "NN"], ["closely", "RB"], ["and", "CC"], ["actively", "RB"], ["with", "IN"], ["your", "PRP$"], ["oncologist", "NN"], ["and", "CC"], ["the", "DT"], ["rest", "NN"], ["of", "IN"], ["your", "PRP$"], ["medical", "JJ"], ["team.", "NNP"], ["Dont", "NNP"], ["overlook", "VB"], ["clinical", "JJ"], ["trials", "NNS"], ["If", "IN"], ["youre", "NN"], ["eligible", "JJ"], ["to", "TO"], ["enroll", "VB"], ["in", "IN"], ["clinical", "JJ"], ["trials", "NNS"], [",", ","], ["", ")"], ["select", "VB"], ["an", "DT"], ["oncologist", "NN"], ["who", "WP"], ["participates", "VBZ"], ["in", "IN"], ["them.", "JJ"], ["Patients", "NNS"], ["who", "WP"], ["enroll", "VBP"], ["in", "IN"], ["clinical", "JJ"], ["studies", "NNS"], ["receive", "VBP"], ["closer", "JJR"], ["follow-up", "NN"], [",", ","], ["", ")"], ["the", "DT"], ["highest", "JJS"], ["standard-of-care", "JJ"], ["treatment", "NN"], ["and", "CC"], ["access", "NN"], ["to", "TO"], ["experimental", "JJ"], ["therapies", "NNS"], ["at", "IN"], ["no", "DT"], ["extra", "JJ"], ["cost.", "NNP"], ["Maximize", "NNP"], ["your", "PRP$"], ["nutrition", "NN"], ["strategy", "NN"], ["Doing", "NNP"], ["your", "PRP$"], ["best", "JJS"], ["to", "TO"], ["eat", "VB"], ["a", "DT"], ["healthy", "JJ"], [",", ","], ["", ")"], ["well-balanced", "JJ"], ["diet", "NN"], ["is", "VBZ"], ["vital", "JJ"], ["to", "TO"], ["prompt", "VB"], ["healing", "NN"], ["after", "IN"], ["surgery", "NN"], ["and", "CC"], ["for", "IN"], ["recovery", "NN"], ["from", "IN"], ["radiation", "NN"], ["or", "CC"], ["chemotherapy.", "JJ"], ["Many", "JJ"], ["oncology", "NN"], ["practices", "NNS"], ["employ", "VBP"], ["registered", "VBN"], ["dieticians", "NNS"], ["who", "WP"], ["can", "MD"], ["help", "VB"], ["you", "PRP"], ["optimize", "VB"], ["your", "PRP$"], ["nutrition.", "JJ"], ["Steer", "VB"], ["clear", "JJ"], ["of", "IN"], ["", ")"], ["``", "``"], ["natural", "JJ"], ["cures", "NNS"], ["''", "''"], ["", ")"], ["Before", "IN"], ["trying", "VBG"], ["nutritional", "JJ"], ["supplements", "NNS"], ["or", "CC"], ["herbal", "JJ"], ["remedies", "NNS"], [",", ","], ["", ")"], ["be", "VB"], ["sure", "JJ"], ["to", "TO"], ["discuss", "VB"], ["your", "PRP$"], ["plans", "NNS"], ["with", "IN"], ["a", "DT"], ["doctor.", "JJ"], ["Most", "JJS"], ["have", "VBP"], ["not", "RB"], ["been", "VBN"], ["tested", "VBN"], ["in", "IN"], ["clinical", "JJ"], ["studies", "NNS"], [",", ","], ["", ")"], ["and", "CC"], ["some", "DT"], ["may", "MD"], ["actually", "RB"], ["interfere", "VB"], ["with", "IN"], ["your", "PRP$"], ["treatment.", "JJ"], ["Build", "VB"], ["a", "DT"], ["stronger", "JJR"], ["body", "NN"], ["Even", "RB"], ["walking", "VBG"], ["regularly", "RB"], ["is", "VBZ"], ["can", "MD"], ["help", "VB"], ["you", "PRP"], ["minimize", "VB"], ["long-term", "JJ"], ["muscle", "NN"], ["weakness", "NN"], ["caused", "VBN"], ["by", "IN"], ["illness", "NN"], ["or", "CC"], ["de-conditioning.", "NNP"], ["Focus", "NNP"], ["on", "IN"], ["overall", "JJ"], ["health", "NN"], ["Patients", "NNS"], ["may", "MD"], ["be", "VB"], ["cured", "VBN"], ["of", "IN"], ["cancer", "NN"], ["but", "CC"], ["still", "JJ"], ["face", "NN"], ["life-threatening", "JJ"], ["medical", "JJ"], ["problems", "NNS"], ["that", "WDT"], ["are", "VBP"], ["underemphasized", "JJ"], ["during", "IN"], ["cancer", "NN"], ["treatments", "NNS"], [",", ","], ["", ")"], ["such", "JJ"], ["as", "IN"], ["diabetes", "NN"], [",", ","], ["", ")"], ["high", "JJ"], ["blood", "NN"], ["pressure", "NN"], ["and", "CC"], ["heart", "NN"], ["disease.", "JJ"], ["Continue", "VB"], ["to", "TO"], ["monitor", "VB"], ["your", "PRP$"], ["overall", "JJ"], ["health.", "JJ"], ["Put", "NN"], ["the", "DT"], ["fire", "NN"], ["out", "IN"], ["for", "IN"], ["good", "JJ"], ["Smoking", "NNP"], ["impairs", "NNS"], ["healing", "NN"], ["after", "IN"], ["surgery", "NN"], ["and", "CC"], ["radiation", "NN"], ["and", "CC"], ["increases", "NNS"], ["your", "PRP$"], ["risk", "NN"], ["of", "IN"], ["cardiovascular", "JJ"], ["disease", "NN"], ["and", "CC"], ["many", "JJ"], ["types", "NNS"], ["of", "IN"], ["cancers.", "CD"], ["Ask", "VB"], ["your", "PRP$"], ["doctor", "NN"], ["for", "IN"], ["help", "NN"], ["identifying", "VBG"], ["and", "CC"], ["obtaining", "VBG"], ["the", "DT"], ["most", "RBS"], ["appropriate", "JJ"], ["cessation", "NN"], ["aids.", "NNP"], ["Map", "NNP"], ["a", "DT"], ["healthy", "JJ"], ["future", "NN"], ["Once", "RB"], ["youve", "VBP"], ["completed", "VBN"], ["treatment", "NN"], [",", ","], ["", ")"], ["discuss", "VB"], ["appropriate", "JJ"], ["follow-up", "NN"], ["plans", "NNS"], ["with", "IN"], ["your", "PRP$"], ["doctor", "NN"], ["and", "CC"], ["keep", "VB"], ["track", "NN"], ["of", "IN"], ["them", "PRP"], ["yourself.", "CD"], ["Intensified", "JJ"], ["screening", "NN"], ["over", "IN"], ["many", "JJ"], ["years", "NNS"], ["is", "VBZ"], ["frequently", "RB"], ["recommended", "VBN"], ["to", "TO"], ["identify", "VB"], ["and", "CC"], ["treat", "VB"], ["a", "DT"], ["recurrence", "NN"], ["early", "JJ"], ["on.", "CD"], ["Share", "VB"], ["your", "PRP$"], ["feelings", "NNS"], ["Allow", "VB"], ["yourself", "PRP"], ["time", "NN"], ["to", "TO"], ["discuss", "VB"], ["the", "DT"], ["emotional", "JJ"], ["consequences", "NNS"], ["of", "IN"], ["your", "PRP$"], ["illness", "NN"], ["and", "CC"], ["treatment", "NN"], ["with", "IN"], ["family", "NN"], [",", ","], ["", ")"], ["friends", "NNS"], [",", ","], ["", ")"], ["your", "PRP$"], ["doctor", "NN"], ["and", "CC"], [",", ","], ["", ")"], ["if", "IN"], ["necessary", "JJ"], [",", ","], ["", ")"], ["a", "DT"], ["professional", "JJ"], ["therapist.", "JJ"], ["Many", "JJ"], ["patients", "NNS"], ["also", "RB"], ["find", "VBP"], ["antidepressants", "NNS"], ["helpful", "JJ"], ["during", "IN"], ["treatment.", "JJ"], ["Stay", "VB"], ["connected", "VBN"], ["Although", "IN"], ["many", "JJ"], ["newly", "RB"], ["diagnosed", "VBN"], ["patients", "NNS"], ["fear", "VBP"], ["they", "PRP"], ["will", "MD"], ["not", "RB"], ["be", "VB"], ["able", "JJ"], ["to", "TO"], ["keep", "VB"], ["working", "VBG"], ["during", "IN"], ["treatment", "NN"], [",", ","], ["", ")"], ["this", "DT"], ["is", "VBZ"], ["usually", "RB"], ["not", "RB"], ["the", "DT"], ["case.", "CD"], ["Working", "NNP"], [",", ","], ["", ")"], ["even", "RB"], ["at", "IN"], ["a", "DT"], ["reduced", "VBN"], ["schedule", "NN"], [",", ","], ["", ")"], ["helps", "VBZ"], ["you", "PRP"], ["maintain", "VBP"], ["valuable", "JJ"], ["social", "JJ"], ["connections", "NNS"], ["and", "CC"], ["weekly", "JJ"], ["structure", "NN"], [".", "."]], pairs 
         
     | 
| 
       39 
40 
     | 
    
         
             
                #puts pairs.inspect
         
     | 
| 
       40 
41 
     | 
    
         
             
                # enable these lines for memory leak testing
         
     | 
| 
       41 
     | 
    
         
            -
                 
     | 
| 
       42 
     | 
    
         
            -
                ObjectSpace.garbage_collect
         
     | 
| 
      
 42 
     | 
    
         
            +
                #$tagger = nil
         
     | 
| 
      
 43 
     | 
    
         
            +
                #ObjectSpace.garbage_collect
         
     | 
| 
       43 
44 
     | 
    
         
             
              end
         
     | 
| 
       44 
45 
     | 
    
         | 
| 
       45 
46 
     | 
    
         
             
              def test_multiple_docs
         
     | 
| 
         @@ -55,6 +56,6 @@ Although many newly diagnosed patients fear they will not be able to keep workin 
     | 
|
| 
       55 
56 
     | 
    
         | 
| 
       56 
57 
     | 
    
         
             
            private
         
     | 
| 
       57 
58 
     | 
    
         
             
              def tagger
         
     | 
| 
       58 
     | 
    
         
            -
                $ 
     | 
| 
      
 59 
     | 
    
         
            +
                $rtagger
         
     | 
| 
       59 
60 
     | 
    
         
             
              end
         
     | 
| 
       60 
61 
     | 
    
         
             
            end
         
     | 
| 
         @@ -0,0 +1,33 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require File.dirname(__FILE__) + '/test_helper'
         
     | 
| 
      
 2 
     | 
    
         
            +
             
     | 
| 
      
 3 
     | 
    
         
            +
            class TestWordTagger < Test::Unit::TestCase
         
     | 
| 
      
 4 
     | 
    
         
            +
              
         
     | 
| 
      
 5 
     | 
    
         
            +
              def setup
         
     | 
| 
      
 6 
     | 
    
         
            +
                if !defined?($wtagger)
         
     | 
| 
      
 7 
     | 
    
         
            +
                  $wtagger = Word::Tagger.new( File.join(File.dirname(__FILE__),'fixtures','tags.txt'), :words => 4 )
         
     | 
| 
      
 8 
     | 
    
         
            +
                end
         
     | 
| 
      
 9 
     | 
    
         
            +
              end
         
     | 
| 
      
 10 
     | 
    
         
            +
             
     | 
| 
      
 11 
     | 
    
         
            +
              def test_basic
         
     | 
| 
      
 12 
     | 
    
         
            +
                timer = Time.now
         
     | 
| 
      
 13 
     | 
    
         
            +
                text = "This is a sa'mple doc[]ument lets see how cancer ngrams 4 works out for this interesting text!"
         
     | 
| 
      
 14 
     | 
    
         
            +
                tags = $wtagger.execute( text )
         
     | 
| 
      
 15 
     | 
    
         
            +
                assert_equal ['cancer','work'], tags
         
     | 
| 
      
 16 
     | 
    
         
            +
                puts "Duration: #{Time.now - timer} sec"
         
     | 
| 
      
 17 
     | 
    
         
            +
              end
         
     | 
| 
      
 18 
     | 
    
         
            +
             
     | 
| 
      
 19 
     | 
    
         
            +
              def test_ngram_size3
         
     | 
| 
      
 20 
     | 
    
         
            +
                timer = Time.now
         
     | 
| 
      
 21 
     | 
    
         
            +
                text = "This body of text contains something like ventricular septal defect"
         
     | 
| 
      
 22 
     | 
    
         
            +
                tags = $wtagger.execute( text )
         
     | 
| 
      
 23 
     | 
    
         
            +
                assert_equal ['ventricular septal defect'], tags
         
     | 
| 
      
 24 
     | 
    
         
            +
                puts "Duration: #{Time.now - timer} sec"
         
     | 
| 
      
 25 
     | 
    
         
            +
              end
         
     | 
| 
      
 26 
     | 
    
         
            +
             
     | 
| 
      
 27 
     | 
    
         
            +
              def test_cat_and_the_hat
         
     | 
| 
      
 28 
     | 
    
         
            +
                tagger = Word::Tagger.new( ['Cat','hat'], :words => 4 )
         
     | 
| 
      
 29 
     | 
    
         
            +
                tags = tagger.execute( 'the cAt and the hat' )
         
     | 
| 
      
 30 
     | 
    
         
            +
                assert_equal( ["Cat", "hat"], tags )
         
     | 
| 
      
 31 
     | 
    
         
            +
              end
         
     | 
| 
      
 32 
     | 
    
         
            +
             
     | 
| 
      
 33 
     | 
    
         
            +
            end
         
     | 
    
        data/website/index.html
    CHANGED
    
    | 
         @@ -33,7 +33,7 @@ 
     | 
|
| 
       33 
33 
     | 
    
         
             
                <h1>rbtagger</h1>
         
     | 
| 
       34 
34 
     | 
    
         
             
                <div id="version" class="clickable" onclick='document.location = "http://rubyforge.org/projects/ruletagger"; return false'>
         
     | 
| 
       35 
35 
     | 
    
         
             
                  <p>Get Version</p>
         
     | 
| 
       36 
     | 
    
         
            -
                  <a href="http://rubyforge.org/projects/ruletagger" class="numbers">0.0 
     | 
| 
      
 36 
     | 
    
         
            +
                  <a href="http://rubyforge.org/projects/ruletagger" class="numbers">0.2.0</a>
         
     | 
| 
       37 
37 
     | 
    
         
             
                </div>
         
     | 
| 
       38 
38 
     | 
    
         
             
                <h4 style="float:right;padding-right:10px;"> → ‘rb-brill-tagger’</h4>
         
     | 
| 
       39 
39 
     | 
    
         | 
| 
         @@ -49,19 +49,29 @@ 
     | 
|
| 
       49 
49 
     | 
    
         
             
            	<h2>Installing</h2>
         
     | 
| 
       50 
50 
     | 
    
         | 
| 
       51 
51 
     | 
    
         | 
| 
       52 
     | 
    
         
            -
            	<p><pre class='syntax'>sudo gem install  
     | 
| 
      
 52 
     | 
    
         
            +
            	<p><pre class='syntax'>sudo gem install ruletagger</pre></p>
         
     | 
| 
       53 
53 
     | 
    
         | 
| 
       54 
54 
     | 
    
         | 
| 
       55 
55 
     | 
    
         
             
            	<h2>The basics</h2>
         
     | 
| 
       56 
56 
     | 
    
         | 
| 
       57 
57 
     | 
    
         | 
| 
       58 
58 
     | 
    
         
             
            	<p><pre class='syntax'>
         
     | 
| 
      
 59 
     | 
    
         
            +
            <span class="ident">require</span> <span class="punct">'</span><span class="string">rbtagger</span><span class="punct">'</span>
         
     | 
| 
      
 60 
     | 
    
         
            +
             
     | 
| 
      
 61 
     | 
    
         
            +
            <span class="comment"># Using the rule tagger</span>
         
     | 
| 
       59 
62 
     | 
    
         
             
            <span class="ident">tagger</span> <span class="punct">=</span> <span class="constant">Brill</span><span class="punct">::</span><span class="constant">Tagger</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span> <span class="constant">File</span><span class="punct">.</span><span class="ident">join</span><span class="punct">(</span><span class="constant">File</span><span class="punct">.</span><span class="ident">dirname</span><span class="punct">(</span><span class="constant">__FILE__</span><span class="punct">),"</span><span class="string">LEXICON</span><span class="punct">"),</span>
         
     | 
| 
       60 
63 
     | 
    
         
             
                                        <span class="constant">File</span><span class="punct">.</span><span class="ident">join</span><span class="punct">(</span><span class="constant">File</span><span class="punct">.</span><span class="ident">dirname</span><span class="punct">(</span><span class="constant">__FILE__</span><span class="punct">),"</span><span class="string">LEXICALRULEFILE</span><span class="punct">"),</span>
         
     | 
| 
       61 
64 
     | 
    
         
             
                                        <span class="constant">File</span><span class="punct">.</span><span class="ident">join</span><span class="punct">(</span><span class="constant">File</span><span class="punct">.</span><span class="ident">dirname</span><span class="punct">(</span><span class="constant">__FILE__</span><span class="punct">),"</span><span class="string">CONTEXTUALRULEFILE</span><span class="punct">")</span> <span class="punct">)</span>
         
     | 
| 
       62 
65 
     | 
    
         
             
            <span class="ident">docs</span><span class="punct">.</span><span class="ident">each</span> <span class="keyword">do</span><span class="punct">|</span><span class="ident">doc</span><span class="punct">|</span>
         
     | 
| 
       63 
66 
     | 
    
         
             
              <span class="ident">tagger</span><span class="punct">.</span><span class="ident">tag</span><span class="punct">(</span> <span class="constant">File</span><span class="punct">.</span><span class="ident">read</span><span class="punct">(</span> <span class="ident">doc</span> <span class="punct">)</span> <span class="punct">)</span>
         
     | 
| 
       64 
67 
     | 
    
         
             
            <span class="keyword">end</span>
         
     | 
| 
      
 68 
     | 
    
         
            +
             
     | 
| 
      
 69 
     | 
    
         
            +
            <span class="comment"># Using the word tagger</span>
         
     | 
| 
      
 70 
     | 
    
         
            +
            <span class="ident">tagger</span> <span class="punct">=</span> <span class="constant">Word</span><span class="punct">::</span><span class="constant">Tagger</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span> <span class="punct">['</span><span class="string">cat</span><span class="punct">','</span><span class="string">hat</span><span class="punct">'],</span> <span class="symbol">:words</span> <span class="punct">=></span> <span class="number">4</span> <span class="punct">)</span>
         
     | 
| 
      
 71 
     | 
    
         
            +
            <span class="ident">tags</span> <span class="punct">=</span> <span class="ident">tagger</span><span class="punct">.</span><span class="ident">execute</span><span class="punct">(</span> <span class="punct">'</span><span class="string">the cat and the hat</span><span class="punct">'</span> <span class="punct">)</span>
         
     | 
| 
      
 72 
     | 
    
         
            +
            <span class="ident">assert_equal</span><span class="punct">(</span> <span class="punct">["</span><span class="string">cat</span><span class="punct">",</span> <span class="punct">"</span><span class="string">hat</span><span class="punct">"],</span> <span class="ident">tags</span> <span class="punct">)</span>
         
     | 
| 
      
 73 
     | 
    
         
            +
             
     | 
| 
      
 74 
     | 
    
         
            +
             
     | 
| 
       65 
75 
     | 
    
         
             
            </pre></p>
         
     | 
| 
       66 
76 
     | 
    
         | 
| 
       67 
77 
     | 
    
         | 
    
        data/website/index.txt
    CHANGED
    
    | 
         @@ -11,17 +11,27 @@ This work is based on the work of Eric Brill 
     | 
|
| 
       11 
11 
     | 
    
         | 
| 
       12 
12 
     | 
    
         
             
            h2. Installing
         
     | 
| 
       13 
13 
     | 
    
         | 
| 
       14 
     | 
    
         
            -
            <pre syntax="bash">sudo gem install  
     | 
| 
      
 14 
     | 
    
         
            +
            <pre syntax="bash">sudo gem install ruletagger</pre>
         
     | 
| 
       15 
15 
     | 
    
         | 
| 
       16 
16 
     | 
    
         
             
            h2. The basics
         
     | 
| 
       17 
17 
     | 
    
         | 
| 
       18 
18 
     | 
    
         
             
            <pre syntax="ruby">
         
     | 
| 
      
 19 
     | 
    
         
            +
            require 'rbtagger'
         
     | 
| 
      
 20 
     | 
    
         
            +
             
     | 
| 
      
 21 
     | 
    
         
            +
            # Using the rule tagger
         
     | 
| 
       19 
22 
     | 
    
         
             
            tagger = Brill::Tagger.new( File.join(File.dirname(__FILE__),"LEXICON"),
         
     | 
| 
       20 
23 
     | 
    
         
             
                                        File.join(File.dirname(__FILE__),"LEXICALRULEFILE"),
         
     | 
| 
       21 
24 
     | 
    
         
             
                                        File.join(File.dirname(__FILE__),"CONTEXTUALRULEFILE") )
         
     | 
| 
       22 
25 
     | 
    
         
             
            docs.each do|doc|
         
     | 
| 
       23 
26 
     | 
    
         
             
              tagger.tag( File.read( doc ) )
         
     | 
| 
       24 
27 
     | 
    
         
             
            end
         
     | 
| 
      
 28 
     | 
    
         
            +
             
     | 
| 
      
 29 
     | 
    
         
            +
            # Using the word tagger
         
     | 
| 
      
 30 
     | 
    
         
            +
            tagger = Word::Tagger.new( ['cat','hat'], :words => 4 )
         
     | 
| 
      
 31 
     | 
    
         
            +
            tags = tagger.execute( 'the cat and the hat' )
         
     | 
| 
      
 32 
     | 
    
         
            +
            assert_equal( ["cat", "hat"], tags )
         
     | 
| 
      
 33 
     | 
    
         
            +
             
     | 
| 
      
 34 
     | 
    
         
            +
             
     | 
| 
       25 
35 
     | 
    
         
             
            </pre>
         
     | 
| 
       26 
36 
     | 
    
         | 
| 
       27 
37 
     | 
    
         
             
            h2. Forum
         
     | 
    
        metadata
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification 
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: rbtagger
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version 
         
     | 
| 
       4 
     | 
    
         
            -
              version: 0.0 
     | 
| 
      
 4 
     | 
    
         
            +
              version: 0.2.0
         
     | 
| 
       5 
5 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       6 
6 
     | 
    
         
             
            authors: 
         
     | 
| 
       7 
7 
     | 
    
         
             
            - Todd A. Fisher
         
     | 
| 
         @@ -9,7 +9,7 @@ autorequire: 
     | 
|
| 
       9 
9 
     | 
    
         
             
            bindir: bin
         
     | 
| 
       10 
10 
     | 
    
         
             
            cert_chain: []
         
     | 
| 
       11 
11 
     | 
    
         | 
| 
       12 
     | 
    
         
            -
            date: 2008-05- 
     | 
| 
      
 12 
     | 
    
         
            +
            date: 2008-05-15 00:00:00 -04:00
         
     | 
| 
       13 
13 
     | 
    
         
             
            default_executable: 
         
     | 
| 
       14 
14 
     | 
    
         
             
            dependencies: []
         
     | 
| 
       15 
15 
     | 
    
         | 
| 
         @@ -38,6 +38,7 @@ extra_rdoc_files: 
     | 
|
| 
       38 
38 
     | 
    
         
             
            - test/docs/doc7.txt
         
     | 
| 
       39 
39 
     | 
    
         
             
            - test/docs/doc8.txt
         
     | 
| 
       40 
40 
     | 
    
         
             
            - test/docs/doc9.txt
         
     | 
| 
      
 41 
     | 
    
         
            +
            - test/fixtures/tags.txt
         
     | 
| 
       41 
42 
     | 
    
         
             
            - website/index.txt
         
     | 
| 
       42 
43 
     | 
    
         
             
            files: 
         
     | 
| 
       43 
44 
     | 
    
         
             
            - COPYING
         
     | 
| 
         @@ -60,6 +61,7 @@ files: 
     | 
|
| 
       60 
61 
     | 
    
         
             
            - ext/rule_tagger/lex.h
         
     | 
| 
       61 
62 
     | 
    
         
             
            - ext/rule_tagger/memory.c
         
     | 
| 
       62 
63 
     | 
    
         
             
            - ext/rule_tagger/memory.h
         
     | 
| 
      
 64 
     | 
    
         
            +
            - ext/rule_tagger/mkmf.log
         
     | 
| 
       63 
65 
     | 
    
         
             
            - ext/rule_tagger/rbtagger.c
         
     | 
| 
       64 
66 
     | 
    
         
             
            - ext/rule_tagger/registry.c
         
     | 
| 
       65 
67 
     | 
    
         
             
            - ext/rule_tagger/registry.h
         
     | 
| 
         @@ -73,12 +75,12 @@ files: 
     | 
|
| 
       73 
75 
     | 
    
         
             
            - ext/rule_tagger/useful.c
         
     | 
| 
       74 
76 
     | 
    
         
             
            - ext/rule_tagger/useful.h
         
     | 
| 
       75 
77 
     | 
    
         
             
            - ext/word_tagger/extconf.rb
         
     | 
| 
      
 78 
     | 
    
         
            +
            - ext/word_tagger/mkmf.log
         
     | 
| 
       76 
79 
     | 
    
         
             
            - ext/word_tagger/porter_stemmer.c
         
     | 
| 
       77 
80 
     | 
    
         
             
            - ext/word_tagger/porter_stemmer.h
         
     | 
| 
       78 
81 
     | 
    
         
             
            - ext/word_tagger/rtagger.cc
         
     | 
| 
       79 
82 
     | 
    
         
             
            - ext/word_tagger/tagger.cc
         
     | 
| 
       80 
83 
     | 
    
         
             
            - ext/word_tagger/tagger.h
         
     | 
| 
       81 
     | 
    
         
            -
            - ext/word_tagger/tagger.rb
         
     | 
| 
       82 
84 
     | 
    
         
             
            - ext/word_tagger/test.rb
         
     | 
| 
       83 
85 
     | 
    
         
             
            - ext/word_tagger/test/Makefile
         
     | 
| 
       84 
86 
     | 
    
         
             
            - ext/word_tagger/test/doc.txt
         
     | 
| 
         @@ -86,6 +88,7 @@ files: 
     | 
|
| 
       86 
88 
     | 
    
         
             
            - lib/brill/tagger.rb
         
     | 
| 
       87 
89 
     | 
    
         
             
            - lib/rbtagger.rb
         
     | 
| 
       88 
90 
     | 
    
         
             
            - lib/rbtagger/version.rb
         
     | 
| 
      
 91 
     | 
    
         
            +
            - lib/word/tagger.rb
         
     | 
| 
       89 
92 
     | 
    
         
             
            - script/console
         
     | 
| 
       90 
93 
     | 
    
         
             
            - script/destroy
         
     | 
| 
       91 
94 
     | 
    
         
             
            - script/generate
         
     | 
| 
         @@ -93,6 +96,9 @@ files: 
     | 
|
| 
       93 
96 
     | 
    
         
             
            - setup.rb
         
     | 
| 
       94 
97 
     | 
    
         
             
            - tasks/deployment.rake
         
     | 
| 
       95 
98 
     | 
    
         
             
            - tasks/environment.rake
         
     | 
| 
      
 99 
     | 
    
         
            +
            - tasks/extconf.rake
         
     | 
| 
      
 100 
     | 
    
         
            +
            - tasks/extconf/rule_tagger.rake
         
     | 
| 
      
 101 
     | 
    
         
            +
            - tasks/extconf/word_tagger.rake
         
     | 
| 
       96 
102 
     | 
    
         
             
            - tasks/website.rake
         
     | 
| 
       97 
103 
     | 
    
         
             
            - test/CONTEXTUALRULEFILE
         
     | 
| 
       98 
104 
     | 
    
         
             
            - test/LEXICALRULEFILE
         
     | 
| 
         @@ -107,8 +113,10 @@ files: 
     | 
|
| 
       107 
113 
     | 
    
         
             
            - test/docs/doc7.txt
         
     | 
| 
       108 
114 
     | 
    
         
             
            - test/docs/doc8.txt
         
     | 
| 
       109 
115 
     | 
    
         
             
            - test/docs/doc9.txt
         
     | 
| 
       110 
     | 
    
         
            -
            - test/ 
     | 
| 
      
 116 
     | 
    
         
            +
            - test/fixtures/tags.txt
         
     | 
| 
       111 
117 
     | 
    
         
             
            - test/test_helper.rb
         
     | 
| 
      
 118 
     | 
    
         
            +
            - test/test_rule_tagger.rb
         
     | 
| 
      
 119 
     | 
    
         
            +
            - test/test_word_tagger.rb
         
     | 
| 
       112 
120 
     | 
    
         
             
            - tools/rakehelp.rb
         
     | 
| 
       113 
121 
     | 
    
         
             
            - website/index.html
         
     | 
| 
       114 
122 
     | 
    
         
             
            - website/index.txt
         
     | 
| 
         @@ -116,7 +124,7 @@ files: 
     | 
|
| 
       116 
124 
     | 
    
         
             
            - website/stylesheets/screen.css
         
     | 
| 
       117 
125 
     | 
    
         
             
            - website/template.html.erb
         
     | 
| 
       118 
126 
     | 
    
         
             
            has_rdoc: true
         
     | 
| 
       119 
     | 
    
         
            -
            homepage: http:// 
     | 
| 
      
 127 
     | 
    
         
            +
            homepage: http://rbtagger.rubyforge.org
         
     | 
| 
       120 
128 
     | 
    
         
             
            post_install_message: |+
         
     | 
| 
       121 
129 
     | 
    
         | 
| 
       122 
130 
     | 
    
         
             
              For more information on rb-brill-tagger, see http://rb-brill-tagger.rubyforge.org
         
     | 
| 
         @@ -146,10 +154,12 @@ required_rubygems_version: !ruby/object:Gem::Requirement 
     | 
|
| 
       146 
154 
     | 
    
         
             
              version: 
         
     | 
| 
       147 
155 
     | 
    
         
             
            requirements: []
         
     | 
| 
       148 
156 
     | 
    
         | 
| 
       149 
     | 
    
         
            -
            rubyforge_project:  
     | 
| 
      
 157 
     | 
    
         
            +
            rubyforge_project: rbtagger
         
     | 
| 
       150 
158 
     | 
    
         
             
            rubygems_version: 1.1.1
         
     | 
| 
       151 
159 
     | 
    
         
             
            signing_key: 
         
     | 
| 
       152 
160 
     | 
    
         
             
            specification_version: 2
         
     | 
| 
       153 
161 
     | 
    
         
             
            summary: A Simple Ruby Rule-Based Part of Speech Tagger
         
     | 
| 
       154 
162 
     | 
    
         
             
            test_files: 
         
     | 
| 
       155 
163 
     | 
    
         
             
            - test/test_helper.rb
         
     | 
| 
      
 164 
     | 
    
         
            +
            - test/test_rule_tagger.rb
         
     | 
| 
      
 165 
     | 
    
         
            +
            - test/test_word_tagger.rb
         
     |