rbtagger 0.0.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,16 +1,7 @@
1
- require 'test/unit'
2
- $:.unshift File.join(File.dirname(__FILE__), "..", "lib")
3
- $:.unshift File.join(File.dirname(__FILE__), "..", "ext", "rule_tagger")
1
+ require File.dirname(__FILE__) + '/test_helper'
4
2
 
5
- require 'brill/tagger'
6
3
 
7
- puts "loading tagger..."
8
- $tagger = Brill::Tagger.new( File.join(File.dirname(__FILE__),"LEXICON"),
9
- File.join(File.dirname(__FILE__),"LEXICALRULEFILE"),
10
- File.join(File.dirname(__FILE__),"CONTEXTUALRULEFILE") )
11
- puts "tagger loaded!"
12
-
13
- class TaggerTest < Test::Unit::TestCase
4
+ class TestRuleTagger< Test::Unit::TestCase
14
5
  SAMPLE_DOC=%q(
15
6
  Take an active role in your care
16
7
  When it comes to making decisions about the goals and direction of treatment, don't sit back. Work closely and actively with your oncologist and the rest of your medical team.
@@ -33,13 +24,23 @@ Allow yourself time to discuss the emotional consequences of your illness and tr
33
24
  Stay connected
34
25
  Although many newly diagnosed patients fear they will not be able to keep working during treatment, this is usually not the case. Working, even at a reduced schedule, helps you maintain valuable social connections and weekly structure.
35
26
  )
27
+ def setup
28
+ if !defined?($tagger)
29
+ puts "loading tagger..."
30
+ $rtagger = Brill::Tagger.new( File.join(File.dirname(__FILE__),"LEXICON"),
31
+ File.join(File.dirname(__FILE__),"LEXICALRULEFILE"),
32
+ File.join(File.dirname(__FILE__),"CONTEXTUALRULEFILE") )
33
+ puts "tagger loaded!"
34
+ end
35
+ end
36
+
36
37
  def test_simple_tagger
37
38
  pairs = tagger.tag( SAMPLE_DOC )
38
39
  assert_equal [["", ")"], ["", ")"], ["Take", "VB"], ["an", "DT"], ["active", "JJ"], ["role", "NN"], ["in", "IN"], ["your", "PRP$"], ["care", "NN"], ["When", "WRB"], ["it", "PRP"], ["comes", "VBZ"], ["to", "TO"], ["making", "VBG"], ["decisions", "NNS"], ["about", "IN"], ["the", "DT"], ["goals", "NNS"], ["and", "CC"], ["direction", "NN"], ["of", "IN"], ["treatment", "NN"], [",", ","], ["", ")"], ["do", "VBP"], ["", ")"], ["n't", "RB"], ["sit", "VB"], ["back.", "CD"], ["Work", "NN"], ["closely", "RB"], ["and", "CC"], ["actively", "RB"], ["with", "IN"], ["your", "PRP$"], ["oncologist", "NN"], ["and", "CC"], ["the", "DT"], ["rest", "NN"], ["of", "IN"], ["your", "PRP$"], ["medical", "JJ"], ["team.", "NNP"], ["Dont", "NNP"], ["overlook", "VB"], ["clinical", "JJ"], ["trials", "NNS"], ["If", "IN"], ["youre", "NN"], ["eligible", "JJ"], ["to", "TO"], ["enroll", "VB"], ["in", "IN"], ["clinical", "JJ"], ["trials", "NNS"], [",", ","], ["", ")"], ["select", "VB"], ["an", "DT"], ["oncologist", "NN"], ["who", "WP"], ["participates", "VBZ"], ["in", "IN"], ["them.", "JJ"], ["Patients", "NNS"], ["who", "WP"], ["enroll", "VBP"], ["in", "IN"], ["clinical", "JJ"], ["studies", "NNS"], ["receive", "VBP"], ["closer", "JJR"], ["follow-up", "NN"], [",", ","], ["", ")"], ["the", "DT"], ["highest", "JJS"], ["standard-of-care", "JJ"], ["treatment", "NN"], ["and", "CC"], ["access", "NN"], ["to", "TO"], ["experimental", "JJ"], ["therapies", "NNS"], ["at", "IN"], ["no", "DT"], ["extra", "JJ"], ["cost.", "NNP"], ["Maximize", "NNP"], ["your", "PRP$"], ["nutrition", "NN"], ["strategy", "NN"], ["Doing", "NNP"], ["your", "PRP$"], ["best", "JJS"], ["to", "TO"], ["eat", "VB"], ["a", "DT"], ["healthy", "JJ"], [",", ","], ["", ")"], ["well-balanced", "JJ"], ["diet", "NN"], ["is", "VBZ"], ["vital", "JJ"], ["to", "TO"], ["prompt", "VB"], ["healing", "NN"], ["after", "IN"], ["surgery", "NN"], ["and", "CC"], ["for", "IN"], ["recovery", "NN"], ["from", "IN"], ["radiation", "NN"], ["or", "CC"], ["chemotherapy.", "JJ"], ["Many", "JJ"], ["oncology", "NN"], ["practices", "NNS"], ["employ", "VBP"], ["registered", "VBN"], ["dieticians", "NNS"], ["who", "WP"], ["can", "MD"], ["help", "VB"], ["you", "PRP"], ["optimize", "VB"], ["your", "PRP$"], ["nutrition.", "JJ"], ["Steer", "VB"], ["clear", "JJ"], ["of", "IN"], ["", ")"], ["``", "``"], ["natural", "JJ"], ["cures", "NNS"], ["''", "''"], ["", ")"], ["Before", "IN"], ["trying", "VBG"], ["nutritional", "JJ"], ["supplements", "NNS"], ["or", "CC"], ["herbal", "JJ"], ["remedies", "NNS"], [",", ","], ["", ")"], ["be", "VB"], ["sure", "JJ"], ["to", "TO"], ["discuss", "VB"], ["your", "PRP$"], ["plans", "NNS"], ["with", "IN"], ["a", "DT"], ["doctor.", "JJ"], ["Most", "JJS"], ["have", "VBP"], ["not", "RB"], ["been", "VBN"], ["tested", "VBN"], ["in", "IN"], ["clinical", "JJ"], ["studies", "NNS"], [",", ","], ["", ")"], ["and", "CC"], ["some", "DT"], ["may", "MD"], ["actually", "RB"], ["interfere", "VB"], ["with", "IN"], ["your", "PRP$"], ["treatment.", "JJ"], ["Build", "VB"], ["a", "DT"], ["stronger", "JJR"], ["body", "NN"], ["Even", "RB"], ["walking", "VBG"], ["regularly", "RB"], ["is", "VBZ"], ["can", "MD"], ["help", "VB"], ["you", "PRP"], ["minimize", "VB"], ["long-term", "JJ"], ["muscle", "NN"], ["weakness", "NN"], ["caused", "VBN"], ["by", "IN"], ["illness", "NN"], ["or", "CC"], ["de-conditioning.", "NNP"], ["Focus", "NNP"], ["on", "IN"], ["overall", "JJ"], ["health", "NN"], ["Patients", "NNS"], ["may", "MD"], ["be", "VB"], ["cured", "VBN"], ["of", "IN"], ["cancer", "NN"], ["but", "CC"], ["still", "JJ"], ["face", "NN"], ["life-threatening", "JJ"], ["medical", "JJ"], ["problems", "NNS"], ["that", "WDT"], ["are", "VBP"], ["underemphasized", "JJ"], ["during", "IN"], ["cancer", "NN"], ["treatments", "NNS"], [",", ","], ["", ")"], ["such", "JJ"], ["as", "IN"], ["diabetes", "NN"], [",", ","], ["", ")"], ["high", "JJ"], ["blood", "NN"], ["pressure", "NN"], ["and", "CC"], ["heart", "NN"], ["disease.", "JJ"], ["Continue", "VB"], ["to", "TO"], ["monitor", "VB"], ["your", "PRP$"], ["overall", "JJ"], ["health.", "JJ"], ["Put", "NN"], ["the", "DT"], ["fire", "NN"], ["out", "IN"], ["for", "IN"], ["good", "JJ"], ["Smoking", "NNP"], ["impairs", "NNS"], ["healing", "NN"], ["after", "IN"], ["surgery", "NN"], ["and", "CC"], ["radiation", "NN"], ["and", "CC"], ["increases", "NNS"], ["your", "PRP$"], ["risk", "NN"], ["of", "IN"], ["cardiovascular", "JJ"], ["disease", "NN"], ["and", "CC"], ["many", "JJ"], ["types", "NNS"], ["of", "IN"], ["cancers.", "CD"], ["Ask", "VB"], ["your", "PRP$"], ["doctor", "NN"], ["for", "IN"], ["help", "NN"], ["identifying", "VBG"], ["and", "CC"], ["obtaining", "VBG"], ["the", "DT"], ["most", "RBS"], ["appropriate", "JJ"], ["cessation", "NN"], ["aids.", "NNP"], ["Map", "NNP"], ["a", "DT"], ["healthy", "JJ"], ["future", "NN"], ["Once", "RB"], ["youve", "VBP"], ["completed", "VBN"], ["treatment", "NN"], [",", ","], ["", ")"], ["discuss", "VB"], ["appropriate", "JJ"], ["follow-up", "NN"], ["plans", "NNS"], ["with", "IN"], ["your", "PRP$"], ["doctor", "NN"], ["and", "CC"], ["keep", "VB"], ["track", "NN"], ["of", "IN"], ["them", "PRP"], ["yourself.", "CD"], ["Intensified", "JJ"], ["screening", "NN"], ["over", "IN"], ["many", "JJ"], ["years", "NNS"], ["is", "VBZ"], ["frequently", "RB"], ["recommended", "VBN"], ["to", "TO"], ["identify", "VB"], ["and", "CC"], ["treat", "VB"], ["a", "DT"], ["recurrence", "NN"], ["early", "JJ"], ["on.", "CD"], ["Share", "VB"], ["your", "PRP$"], ["feelings", "NNS"], ["Allow", "VB"], ["yourself", "PRP"], ["time", "NN"], ["to", "TO"], ["discuss", "VB"], ["the", "DT"], ["emotional", "JJ"], ["consequences", "NNS"], ["of", "IN"], ["your", "PRP$"], ["illness", "NN"], ["and", "CC"], ["treatment", "NN"], ["with", "IN"], ["family", "NN"], [",", ","], ["", ")"], ["friends", "NNS"], [",", ","], ["", ")"], ["your", "PRP$"], ["doctor", "NN"], ["and", "CC"], [",", ","], ["", ")"], ["if", "IN"], ["necessary", "JJ"], [",", ","], ["", ")"], ["a", "DT"], ["professional", "JJ"], ["therapist.", "JJ"], ["Many", "JJ"], ["patients", "NNS"], ["also", "RB"], ["find", "VBP"], ["antidepressants", "NNS"], ["helpful", "JJ"], ["during", "IN"], ["treatment.", "JJ"], ["Stay", "VB"], ["connected", "VBN"], ["Although", "IN"], ["many", "JJ"], ["newly", "RB"], ["diagnosed", "VBN"], ["patients", "NNS"], ["fear", "VBP"], ["they", "PRP"], ["will", "MD"], ["not", "RB"], ["be", "VB"], ["able", "JJ"], ["to", "TO"], ["keep", "VB"], ["working", "VBG"], ["during", "IN"], ["treatment", "NN"], [",", ","], ["", ")"], ["this", "DT"], ["is", "VBZ"], ["usually", "RB"], ["not", "RB"], ["the", "DT"], ["case.", "CD"], ["Working", "NNP"], [",", ","], ["", ")"], ["even", "RB"], ["at", "IN"], ["a", "DT"], ["reduced", "VBN"], ["schedule", "NN"], [",", ","], ["", ")"], ["helps", "VBZ"], ["you", "PRP"], ["maintain", "VBP"], ["valuable", "JJ"], ["social", "JJ"], ["connections", "NNS"], ["and", "CC"], ["weekly", "JJ"], ["structure", "NN"], [".", "."]], pairs
39
40
  #puts pairs.inspect
40
41
  # enable these lines for memory leak testing
41
- $tagger = nil
42
- ObjectSpace.garbage_collect
42
+ #$tagger = nil
43
+ #ObjectSpace.garbage_collect
43
44
  end
44
45
 
45
46
  def test_multiple_docs
@@ -55,6 +56,6 @@ Although many newly diagnosed patients fear they will not be able to keep workin
55
56
 
56
57
  private
57
58
  def tagger
58
- $tagger
59
+ $rtagger
59
60
  end
60
61
  end
@@ -0,0 +1,33 @@
1
+ require File.dirname(__FILE__) + '/test_helper'
2
+
3
+ class TestWordTagger < Test::Unit::TestCase
4
+
5
+ def setup
6
+ if !defined?($wtagger)
7
+ $wtagger = Word::Tagger.new( File.join(File.dirname(__FILE__),'fixtures','tags.txt'), :words => 4 )
8
+ end
9
+ end
10
+
11
+ def test_basic
12
+ timer = Time.now
13
+ text = "This is a sa'mple doc[]ument lets see how cancer ngrams 4 works out for this interesting text!"
14
+ tags = $wtagger.execute( text )
15
+ assert_equal ['cancer','work'], tags
16
+ puts "Duration: #{Time.now - timer} sec"
17
+ end
18
+
19
+ def test_ngram_size3
20
+ timer = Time.now
21
+ text = "This body of text contains something like ventricular septal defect"
22
+ tags = $wtagger.execute( text )
23
+ assert_equal ['ventricular septal defect'], tags
24
+ puts "Duration: #{Time.now - timer} sec"
25
+ end
26
+
27
+ def test_cat_and_the_hat
28
+ tagger = Word::Tagger.new( ['Cat','hat'], :words => 4 )
29
+ tags = tagger.execute( 'the cAt and the hat' )
30
+ assert_equal( ["Cat", "hat"], tags )
31
+ end
32
+
33
+ end
data/website/index.html CHANGED
@@ -33,7 +33,7 @@
33
33
  <h1>rbtagger</h1>
34
34
  <div id="version" class="clickable" onclick='document.location = "http://rubyforge.org/projects/ruletagger"; return false'>
35
35
  <p>Get Version</p>
36
- <a href="http://rubyforge.org/projects/ruletagger" class="numbers">0.0.1</a>
36
+ <a href="http://rubyforge.org/projects/ruletagger" class="numbers">0.2.0</a>
37
37
  </div>
38
38
  <h4 style="float:right;padding-right:10px;"> &#x2192; &#8216;rb-brill-tagger&#8217;</h4>
39
39
 
@@ -49,19 +49,29 @@
49
49
  <h2>Installing</h2>
50
50
 
51
51
 
52
- <p><pre class='syntax'>sudo gem install rbtagger</pre></p>
52
+ <p><pre class='syntax'>sudo gem install ruletagger</pre></p>
53
53
 
54
54
 
55
55
  <h2>The basics</h2>
56
56
 
57
57
 
58
58
  <p><pre class='syntax'>
59
+ <span class="ident">require</span> <span class="punct">'</span><span class="string">rbtagger</span><span class="punct">'</span>
60
+
61
+ <span class="comment"># Using the rule tagger</span>
59
62
  <span class="ident">tagger</span> <span class="punct">=</span> <span class="constant">Brill</span><span class="punct">::</span><span class="constant">Tagger</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span> <span class="constant">File</span><span class="punct">.</span><span class="ident">join</span><span class="punct">(</span><span class="constant">File</span><span class="punct">.</span><span class="ident">dirname</span><span class="punct">(</span><span class="constant">__FILE__</span><span class="punct">),&quot;</span><span class="string">LEXICON</span><span class="punct">&quot;),</span>
60
63
  <span class="constant">File</span><span class="punct">.</span><span class="ident">join</span><span class="punct">(</span><span class="constant">File</span><span class="punct">.</span><span class="ident">dirname</span><span class="punct">(</span><span class="constant">__FILE__</span><span class="punct">),&quot;</span><span class="string">LEXICALRULEFILE</span><span class="punct">&quot;),</span>
61
64
  <span class="constant">File</span><span class="punct">.</span><span class="ident">join</span><span class="punct">(</span><span class="constant">File</span><span class="punct">.</span><span class="ident">dirname</span><span class="punct">(</span><span class="constant">__FILE__</span><span class="punct">),&quot;</span><span class="string">CONTEXTUALRULEFILE</span><span class="punct">&quot;)</span> <span class="punct">)</span>
62
65
  <span class="ident">docs</span><span class="punct">.</span><span class="ident">each</span> <span class="keyword">do</span><span class="punct">|</span><span class="ident">doc</span><span class="punct">|</span>
63
66
  <span class="ident">tagger</span><span class="punct">.</span><span class="ident">tag</span><span class="punct">(</span> <span class="constant">File</span><span class="punct">.</span><span class="ident">read</span><span class="punct">(</span> <span class="ident">doc</span> <span class="punct">)</span> <span class="punct">)</span>
64
67
  <span class="keyword">end</span>
68
+
69
+ <span class="comment"># Using the word tagger</span>
70
+ <span class="ident">tagger</span> <span class="punct">=</span> <span class="constant">Word</span><span class="punct">::</span><span class="constant">Tagger</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span> <span class="punct">['</span><span class="string">cat</span><span class="punct">','</span><span class="string">hat</span><span class="punct">'],</span> <span class="symbol">:words</span> <span class="punct">=&gt;</span> <span class="number">4</span> <span class="punct">)</span>
71
+ <span class="ident">tags</span> <span class="punct">=</span> <span class="ident">tagger</span><span class="punct">.</span><span class="ident">execute</span><span class="punct">(</span> <span class="punct">'</span><span class="string">the cat and the hat</span><span class="punct">'</span> <span class="punct">)</span>
72
+ <span class="ident">assert_equal</span><span class="punct">(</span> <span class="punct">[&quot;</span><span class="string">cat</span><span class="punct">&quot;,</span> <span class="punct">&quot;</span><span class="string">hat</span><span class="punct">&quot;],</span> <span class="ident">tags</span> <span class="punct">)</span>
73
+
74
+
65
75
  </pre></p>
66
76
 
67
77
 
data/website/index.txt CHANGED
@@ -11,17 +11,27 @@ This work is based on the work of Eric Brill
11
11
 
12
12
  h2. Installing
13
13
 
14
- <pre syntax="bash">sudo gem install rbtagger</pre>
14
+ <pre syntax="bash">sudo gem install ruletagger</pre>
15
15
 
16
16
  h2. The basics
17
17
 
18
18
  <pre syntax="ruby">
19
+ require 'rbtagger'
20
+
21
+ # Using the rule tagger
19
22
  tagger = Brill::Tagger.new( File.join(File.dirname(__FILE__),"LEXICON"),
20
23
  File.join(File.dirname(__FILE__),"LEXICALRULEFILE"),
21
24
  File.join(File.dirname(__FILE__),"CONTEXTUALRULEFILE") )
22
25
  docs.each do|doc|
23
26
  tagger.tag( File.read( doc ) )
24
27
  end
28
+
29
+ # Using the word tagger
30
+ tagger = Word::Tagger.new( ['cat','hat'], :words => 4 )
31
+ tags = tagger.execute( 'the cat and the hat' )
32
+ assert_equal( ["cat", "hat"], tags )
33
+
34
+
25
35
  </pre>
26
36
 
27
37
  h2. Forum
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbtagger
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Todd A. Fisher
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-05-14 00:00:00 -04:00
12
+ date: 2008-05-15 00:00:00 -04:00
13
13
  default_executable:
14
14
  dependencies: []
15
15
 
@@ -38,6 +38,7 @@ extra_rdoc_files:
38
38
  - test/docs/doc7.txt
39
39
  - test/docs/doc8.txt
40
40
  - test/docs/doc9.txt
41
+ - test/fixtures/tags.txt
41
42
  - website/index.txt
42
43
  files:
43
44
  - COPYING
@@ -60,6 +61,7 @@ files:
60
61
  - ext/rule_tagger/lex.h
61
62
  - ext/rule_tagger/memory.c
62
63
  - ext/rule_tagger/memory.h
64
+ - ext/rule_tagger/mkmf.log
63
65
  - ext/rule_tagger/rbtagger.c
64
66
  - ext/rule_tagger/registry.c
65
67
  - ext/rule_tagger/registry.h
@@ -73,12 +75,12 @@ files:
73
75
  - ext/rule_tagger/useful.c
74
76
  - ext/rule_tagger/useful.h
75
77
  - ext/word_tagger/extconf.rb
78
+ - ext/word_tagger/mkmf.log
76
79
  - ext/word_tagger/porter_stemmer.c
77
80
  - ext/word_tagger/porter_stemmer.h
78
81
  - ext/word_tagger/rtagger.cc
79
82
  - ext/word_tagger/tagger.cc
80
83
  - ext/word_tagger/tagger.h
81
- - ext/word_tagger/tagger.rb
82
84
  - ext/word_tagger/test.rb
83
85
  - ext/word_tagger/test/Makefile
84
86
  - ext/word_tagger/test/doc.txt
@@ -86,6 +88,7 @@ files:
86
88
  - lib/brill/tagger.rb
87
89
  - lib/rbtagger.rb
88
90
  - lib/rbtagger/version.rb
91
+ - lib/word/tagger.rb
89
92
  - script/console
90
93
  - script/destroy
91
94
  - script/generate
@@ -93,6 +96,9 @@ files:
93
96
  - setup.rb
94
97
  - tasks/deployment.rake
95
98
  - tasks/environment.rake
99
+ - tasks/extconf.rake
100
+ - tasks/extconf/rule_tagger.rake
101
+ - tasks/extconf/word_tagger.rake
96
102
  - tasks/website.rake
97
103
  - test/CONTEXTUALRULEFILE
98
104
  - test/LEXICALRULEFILE
@@ -107,8 +113,10 @@ files:
107
113
  - test/docs/doc7.txt
108
114
  - test/docs/doc8.txt
109
115
  - test/docs/doc9.txt
110
- - test/tagger_test.rb
116
+ - test/fixtures/tags.txt
111
117
  - test/test_helper.rb
118
+ - test/test_rule_tagger.rb
119
+ - test/test_word_tagger.rb
112
120
  - tools/rakehelp.rb
113
121
  - website/index.html
114
122
  - website/index.txt
@@ -116,7 +124,7 @@ files:
116
124
  - website/stylesheets/screen.css
117
125
  - website/template.html.erb
118
126
  has_rdoc: true
119
- homepage: http://ruletagger.rubyforge.org
127
+ homepage: http://rbtagger.rubyforge.org
120
128
  post_install_message: |+
121
129
 
122
130
  For more information on rb-brill-tagger, see http://rb-brill-tagger.rubyforge.org
@@ -146,10 +154,12 @@ required_rubygems_version: !ruby/object:Gem::Requirement
146
154
  version:
147
155
  requirements: []
148
156
 
149
- rubyforge_project: ruletagger/rbtagger
157
+ rubyforge_project: rbtagger
150
158
  rubygems_version: 1.1.1
151
159
  signing_key:
152
160
  specification_version: 2
153
161
  summary: A Simple Ruby Rule-Based Part of Speech Tagger
154
162
  test_files:
155
163
  - test/test_helper.rb
164
+ - test/test_rule_tagger.rb
165
+ - test/test_word_tagger.rb
@@ -1,8 +0,0 @@
1
- module Tagger
2
- require 'rtagger'
3
- class SimpleTagger < Tagger::NWordTagger
4
- def execute( text )
5
- super( text.gsub(/[^\w]/,' ') )
6
- end
7
- end
8
- end