ruletagger 0.0.1 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,16 +1,7 @@
1
- require 'test/unit'
2
- $:.unshift File.join(File.dirname(__FILE__), "..", "lib")
3
- $:.unshift File.join(File.dirname(__FILE__), "..", "ext", "rule_tagger")
1
+ require File.dirname(__FILE__) + '/test_helper'
4
2
 
5
- require 'brill/tagger'
6
3
 
7
- puts "loading tagger..."
8
- $tagger = Brill::Tagger.new( File.join(File.dirname(__FILE__),"LEXICON"),
9
- File.join(File.dirname(__FILE__),"LEXICALRULEFILE"),
10
- File.join(File.dirname(__FILE__),"CONTEXTUALRULEFILE") )
11
- puts "tagger loaded!"
12
-
13
- class TaggerTest < Test::Unit::TestCase
4
+ class TestRuleTagger< Test::Unit::TestCase
14
5
  SAMPLE_DOC=%q(
15
6
  Take an active role in your care
16
7
  When it comes to making decisions about the goals and direction of treatment, don't sit back. Work closely and actively with your oncologist and the rest of your medical team.
@@ -33,6 +24,16 @@ Allow yourself time to discuss the emotional consequences of your illness and tr
33
24
  Stay connected
34
25
  Although many newly diagnosed patients fear they will not be able to keep working during treatment, this is usually not the case. Working, even at a reduced schedule, helps you maintain valuable social connections and weekly structure.
35
26
  )
27
+ def setup
28
+ if !defined?($tagger)
29
+ puts "loading tagger..."
30
+ $rtagger = Brill::Tagger.new( File.join(File.dirname(__FILE__),"LEXICON"),
31
+ File.join(File.dirname(__FILE__),"LEXICALRULEFILE"),
32
+ File.join(File.dirname(__FILE__),"CONTEXTUALRULEFILE") )
33
+ puts "tagger loaded!"
34
+ end
35
+ end
36
+
36
37
  def test_simple_tagger
37
38
  pairs = tagger.tag( SAMPLE_DOC )
38
39
  assert_equal [["", ")"], ["", ")"], ["Take", "VB"], ["an", "DT"], ["active", "JJ"], ["role", "NN"], ["in", "IN"], ["your", "PRP$"], ["care", "NN"], ["When", "WRB"], ["it", "PRP"], ["comes", "VBZ"], ["to", "TO"], ["making", "VBG"], ["decisions", "NNS"], ["about", "IN"], ["the", "DT"], ["goals", "NNS"], ["and", "CC"], ["direction", "NN"], ["of", "IN"], ["treatment", "NN"], [",", ","], ["", ")"], ["do", "VBP"], ["", ")"], ["n't", "RB"], ["sit", "VB"], ["back.", "CD"], ["Work", "NN"], ["closely", "RB"], ["and", "CC"], ["actively", "RB"], ["with", "IN"], ["your", "PRP$"], ["oncologist", "NN"], ["and", "CC"], ["the", "DT"], ["rest", "NN"], ["of", "IN"], ["your", "PRP$"], ["medical", "JJ"], ["team.", "NNP"], ["Dont", "NNP"], ["overlook", "VB"], ["clinical", "JJ"], ["trials", "NNS"], ["If", "IN"], ["youre", "NN"], ["eligible", "JJ"], ["to", "TO"], ["enroll", "VB"], ["in", "IN"], ["clinical", "JJ"], ["trials", "NNS"], [",", ","], ["", ")"], ["select", "VB"], ["an", "DT"], ["oncologist", "NN"], ["who", "WP"], ["participates", "VBZ"], ["in", "IN"], ["them.", "JJ"], ["Patients", "NNS"], ["who", "WP"], ["enroll", "VBP"], ["in", "IN"], ["clinical", "JJ"], ["studies", "NNS"], ["receive", "VBP"], ["closer", "JJR"], ["follow-up", "NN"], [",", ","], ["", ")"], ["the", "DT"], ["highest", "JJS"], ["standard-of-care", "JJ"], ["treatment", "NN"], ["and", "CC"], ["access", "NN"], ["to", "TO"], ["experimental", "JJ"], ["therapies", "NNS"], ["at", "IN"], ["no", "DT"], ["extra", "JJ"], ["cost.", "NNP"], ["Maximize", "NNP"], ["your", "PRP$"], ["nutrition", "NN"], ["strategy", "NN"], ["Doing", "NNP"], ["your", "PRP$"], ["best", "JJS"], ["to", "TO"], ["eat", "VB"], ["a", "DT"], ["healthy", "JJ"], [",", ","], ["", ")"], ["well-balanced", "JJ"], ["diet", "NN"], ["is", "VBZ"], ["vital", "JJ"], ["to", "TO"], ["prompt", "VB"], ["healing", "NN"], ["after", "IN"], ["surgery", "NN"], ["and", "CC"], ["for", "IN"], ["recovery", "NN"], ["from", "IN"], ["radiation", "NN"], ["or", "CC"], ["chemotherapy.", "JJ"], ["Many", "JJ"], ["oncology", "NN"], ["practices", "NNS"], ["employ", "VBP"], ["registered", "VBN"], ["dieticians", "NNS"], ["who", "WP"], ["can", "MD"], ["help", "VB"], ["you", "PRP"], ["optimize", "VB"], ["your", "PRP$"], ["nutrition.", "JJ"], ["Steer", "VB"], ["clear", "JJ"], ["of", "IN"], ["", ")"], ["``", "``"], ["natural", "JJ"], ["cures", "NNS"], ["''", "''"], ["", ")"], ["Before", "IN"], ["trying", "VBG"], ["nutritional", "JJ"], ["supplements", "NNS"], ["or", "CC"], ["herbal", "JJ"], ["remedies", "NNS"], [",", ","], ["", ")"], ["be", "VB"], ["sure", "JJ"], ["to", "TO"], ["discuss", "VB"], ["your", "PRP$"], ["plans", "NNS"], ["with", "IN"], ["a", "DT"], ["doctor.", "JJ"], ["Most", "JJS"], ["have", "VBP"], ["not", "RB"], ["been", "VBN"], ["tested", "VBN"], ["in", "IN"], ["clinical", "JJ"], ["studies", "NNS"], [",", ","], ["", ")"], ["and", "CC"], ["some", "DT"], ["may", "MD"], ["actually", "RB"], ["interfere", "VB"], ["with", "IN"], ["your", "PRP$"], ["treatment.", "JJ"], ["Build", "VB"], ["a", "DT"], ["stronger", "JJR"], ["body", "NN"], ["Even", "RB"], ["walking", "VBG"], ["regularly", "RB"], ["is", "VBZ"], ["can", "MD"], ["help", "VB"], ["you", "PRP"], ["minimize", "VB"], ["long-term", "JJ"], ["muscle", "NN"], ["weakness", "NN"], ["caused", "VBN"], ["by", "IN"], ["illness", "NN"], ["or", "CC"], ["de-conditioning.", "NNP"], ["Focus", "NNP"], ["on", "IN"], ["overall", "JJ"], ["health", "NN"], ["Patients", "NNS"], ["may", "MD"], ["be", "VB"], ["cured", "VBN"], ["of", "IN"], ["cancer", "NN"], ["but", "CC"], ["still", "JJ"], ["face", "NN"], ["life-threatening", "JJ"], ["medical", "JJ"], ["problems", "NNS"], ["that", "WDT"], ["are", "VBP"], ["underemphasized", "JJ"], ["during", "IN"], ["cancer", "NN"], ["treatments", "NNS"], [",", ","], ["", ")"], ["such", "JJ"], ["as", "IN"], ["diabetes", "NN"], [",", ","], ["", ")"], ["high", "JJ"], ["blood", "NN"], ["pressure", "NN"], ["and", "CC"], ["heart", "NN"], ["disease.", "JJ"], ["Continue", "VB"], ["to", "TO"], ["monitor", "VB"], ["your", "PRP$"], ["overall", "JJ"], ["health.", "JJ"], ["Put", "NN"], ["the", "DT"], ["fire", "NN"], ["out", "IN"], ["for", "IN"], ["good", "JJ"], ["Smoking", "NNP"], ["impairs", "NNS"], ["healing", "NN"], ["after", "IN"], ["surgery", "NN"], ["and", "CC"], ["radiation", "NN"], ["and", "CC"], ["increases", "NNS"], ["your", "PRP$"], ["risk", "NN"], ["of", "IN"], ["cardiovascular", "JJ"], ["disease", "NN"], ["and", "CC"], ["many", "JJ"], ["types", "NNS"], ["of", "IN"], ["cancers.", "CD"], ["Ask", "VB"], ["your", "PRP$"], ["doctor", "NN"], ["for", "IN"], ["help", "NN"], ["identifying", "VBG"], ["and", "CC"], ["obtaining", "VBG"], ["the", "DT"], ["most", "RBS"], ["appropriate", "JJ"], ["cessation", "NN"], ["aids.", "NNP"], ["Map", "NNP"], ["a", "DT"], ["healthy", "JJ"], ["future", "NN"], ["Once", "RB"], ["youve", "VBP"], ["completed", "VBN"], ["treatment", "NN"], [",", ","], ["", ")"], ["discuss", "VB"], ["appropriate", "JJ"], ["follow-up", "NN"], ["plans", "NNS"], ["with", "IN"], ["your", "PRP$"], ["doctor", "NN"], ["and", "CC"], ["keep", "VB"], ["track", "NN"], ["of", "IN"], ["them", "PRP"], ["yourself.", "CD"], ["Intensified", "JJ"], ["screening", "NN"], ["over", "IN"], ["many", "JJ"], ["years", "NNS"], ["is", "VBZ"], ["frequently", "RB"], ["recommended", "VBN"], ["to", "TO"], ["identify", "VB"], ["and", "CC"], ["treat", "VB"], ["a", "DT"], ["recurrence", "NN"], ["early", "JJ"], ["on.", "CD"], ["Share", "VB"], ["your", "PRP$"], ["feelings", "NNS"], ["Allow", "VB"], ["yourself", "PRP"], ["time", "NN"], ["to", "TO"], ["discuss", "VB"], ["the", "DT"], ["emotional", "JJ"], ["consequences", "NNS"], ["of", "IN"], ["your", "PRP$"], ["illness", "NN"], ["and", "CC"], ["treatment", "NN"], ["with", "IN"], ["family", "NN"], [",", ","], ["", ")"], ["friends", "NNS"], [",", ","], ["", ")"], ["your", "PRP$"], ["doctor", "NN"], ["and", "CC"], [",", ","], ["", ")"], ["if", "IN"], ["necessary", "JJ"], [",", ","], ["", ")"], ["a", "DT"], ["professional", "JJ"], ["therapist.", "JJ"], ["Many", "JJ"], ["patients", "NNS"], ["also", "RB"], ["find", "VBP"], ["antidepressants", "NNS"], ["helpful", "JJ"], ["during", "IN"], ["treatment.", "JJ"], ["Stay", "VB"], ["connected", "VBN"], ["Although", "IN"], ["many", "JJ"], ["newly", "RB"], ["diagnosed", "VBN"], ["patients", "NNS"], ["fear", "VBP"], ["they", "PRP"], ["will", "MD"], ["not", "RB"], ["be", "VB"], ["able", "JJ"], ["to", "TO"], ["keep", "VB"], ["working", "VBG"], ["during", "IN"], ["treatment", "NN"], [",", ","], ["", ")"], ["this", "DT"], ["is", "VBZ"], ["usually", "RB"], ["not", "RB"], ["the", "DT"], ["case.", "CD"], ["Working", "NNP"], [",", ","], ["", ")"], ["even", "RB"], ["at", "IN"], ["a", "DT"], ["reduced", "VBN"], ["schedule", "NN"], [",", ","], ["", ")"], ["helps", "VBZ"], ["you", "PRP"], ["maintain", "VBP"], ["valuable", "JJ"], ["social", "JJ"], ["connections", "NNS"], ["and", "CC"], ["weekly", "JJ"], ["structure", "NN"], [".", "."]], pairs
@@ -55,6 +56,6 @@ Although many newly diagnosed patients fear they will not be able to keep workin
55
56
 
56
57
  private
57
58
  def tagger
58
- $tagger
59
+ $rtagger
59
60
  end
60
61
  end
@@ -0,0 +1,28 @@
1
+ require File.dirname(__FILE__) + '/test_helper'
2
+
3
+ class TestWordTagger < Test::Unit::TestCase
4
+
5
+ def setup
6
+ if !defined?($wtagger)
7
+ $wtagger = Word::Tagger.new
8
+ $wtagger.load_tags( File.read(File.join(File.dirname(__FILE__),'fixtures','tags.txt') ).split("\n").map{|t| t.strip} )
9
+ $wtagger.set_words( 4 )
10
+ end
11
+ end
12
+
13
+ def test_basic
14
+ timer = Time.now
15
+ text = "This is a sa'mple doc[]ument lets see how cancer ngrams 4 works out for this interesting text!"
16
+ tags = $wtagger.execute( text )
17
+ assert_equal ['cancer','work'], tags
18
+ puts "Duration: #{Time.now - timer} sec"
19
+ end
20
+
21
+ def test_ngram_size3
22
+ timer = Time.now
23
+ text = "This body of text contains something like ventricular septal defect"
24
+ tags = $wtagger.execute( text )
25
+ assert_equal ['ventricular septal defect'], tags
26
+ puts "Duration: #{Time.now - timer} sec"
27
+ end
28
+ end
data/website/index.html CHANGED
@@ -33,7 +33,7 @@
33
33
  <h1>rbtagger</h1>
34
34
  <div id="version" class="clickable" onclick='document.location = "http://rubyforge.org/projects/ruletagger"; return false'>
35
35
  <p>Get Version</p>
36
- <a href="http://rubyforge.org/projects/ruletagger" class="numbers">0.0.1</a>
36
+ <a href="http://rubyforge.org/projects/ruletagger" class="numbers">0.1.0</a>
37
37
  </div>
38
38
  <h4 style="float:right;padding-right:10px;"> &#x2192; &#8216;rb-brill-tagger&#8217;</h4>
39
39
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ruletagger
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Todd A. Fisher
@@ -38,6 +38,7 @@ extra_rdoc_files:
38
38
  - test/docs/doc7.txt
39
39
  - test/docs/doc8.txt
40
40
  - test/docs/doc9.txt
41
+ - test/fixtures/tags.txt
41
42
  - website/index.txt
42
43
  files:
43
44
  - COPYING
@@ -60,6 +61,7 @@ files:
60
61
  - ext/rule_tagger/lex.h
61
62
  - ext/rule_tagger/memory.c
62
63
  - ext/rule_tagger/memory.h
64
+ - ext/rule_tagger/mkmf.log
63
65
  - ext/rule_tagger/rbtagger.c
64
66
  - ext/rule_tagger/registry.c
65
67
  - ext/rule_tagger/registry.h
@@ -73,12 +75,12 @@ files:
73
75
  - ext/rule_tagger/useful.c
74
76
  - ext/rule_tagger/useful.h
75
77
  - ext/word_tagger/extconf.rb
78
+ - ext/word_tagger/mkmf.log
76
79
  - ext/word_tagger/porter_stemmer.c
77
80
  - ext/word_tagger/porter_stemmer.h
78
81
  - ext/word_tagger/rtagger.cc
79
82
  - ext/word_tagger/tagger.cc
80
83
  - ext/word_tagger/tagger.h
81
- - ext/word_tagger/tagger.rb
82
84
  - ext/word_tagger/test.rb
83
85
  - ext/word_tagger/test/Makefile
84
86
  - ext/word_tagger/test/doc.txt
@@ -86,6 +88,7 @@ files:
86
88
  - lib/brill/tagger.rb
87
89
  - lib/rbtagger.rb
88
90
  - lib/rbtagger/version.rb
91
+ - lib/word/tagger.rb
89
92
  - script/console
90
93
  - script/destroy
91
94
  - script/generate
@@ -93,6 +96,9 @@ files:
93
96
  - setup.rb
94
97
  - tasks/deployment.rake
95
98
  - tasks/environment.rake
99
+ - tasks/extconf.rake
100
+ - tasks/extconf/rule_tagger.rake
101
+ - tasks/extconf/word_tagger.rake
96
102
  - tasks/website.rake
97
103
  - test/CONTEXTUALRULEFILE
98
104
  - test/LEXICALRULEFILE
@@ -107,8 +113,10 @@ files:
107
113
  - test/docs/doc7.txt
108
114
  - test/docs/doc8.txt
109
115
  - test/docs/doc9.txt
110
- - test/tagger_test.rb
116
+ - test/fixtures/tags.txt
111
117
  - test/test_helper.rb
118
+ - test/test_rule_tagger.rb
119
+ - test/test_word_tagger.rb
112
120
  - tools/rakehelp.rb
113
121
  - website/index.html
114
122
  - website/index.txt
@@ -153,3 +161,5 @@ specification_version: 2
153
161
  summary: A Simple Ruby Rule-Based Part of Speech Tagger
154
162
  test_files:
155
163
  - test/test_helper.rb
164
+ - test/test_rule_tagger.rb
165
+ - test/test_word_tagger.rb