ruletagger 0.0.1 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Manifest.txt +9 -2
- data/Rakefile +0 -29
- data/config/hoe.rb +1 -2
- data/ext/rule_tagger/mkmf.log +46 -0
- data/ext/word_tagger/mkmf.log +24 -0
- data/ext/word_tagger/rtagger.cc +2 -2
- data/lib/rbtagger.rb +3 -0
- data/lib/rbtagger/version.rb +2 -2
- data/{ext/word_tagger → lib/word}/tagger.rb +3 -3
- data/tasks/extconf.rake +18 -0
- data/tasks/extconf/rule_tagger.rake +43 -0
- data/tasks/extconf/word_tagger.rake +43 -0
- data/test/fixtures/tags.txt +976 -0
- data/test/{tagger_test.rb → test_rule_tagger.rb} +13 -12
- data/test/test_word_tagger.rb +28 -0
- data/website/index.html +1 -1
- metadata +13 -3
@@ -1,16 +1,7 @@
|
|
1
|
-
require '
|
2
|
-
$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
|
3
|
-
$:.unshift File.join(File.dirname(__FILE__), "..", "ext", "rule_tagger")
|
1
|
+
require File.dirname(__FILE__) + '/test_helper'
|
4
2
|
|
5
|
-
require 'brill/tagger'
|
6
3
|
|
7
|
-
|
8
|
-
$tagger = Brill::Tagger.new( File.join(File.dirname(__FILE__),"LEXICON"),
|
9
|
-
File.join(File.dirname(__FILE__),"LEXICALRULEFILE"),
|
10
|
-
File.join(File.dirname(__FILE__),"CONTEXTUALRULEFILE") )
|
11
|
-
puts "tagger loaded!"
|
12
|
-
|
13
|
-
class TaggerTest < Test::Unit::TestCase
|
4
|
+
class TestRuleTagger< Test::Unit::TestCase
|
14
5
|
SAMPLE_DOC=%q(
|
15
6
|
Take an active role in your care
|
16
7
|
When it comes to making decisions about the goals and direction of treatment, don't sit back. Work closely and actively with your oncologist and the rest of your medical team.
|
@@ -33,6 +24,16 @@ Allow yourself time to discuss the emotional consequences of your illness and tr
|
|
33
24
|
Stay connected
|
34
25
|
Although many newly diagnosed patients fear they will not be able to keep working during treatment, this is usually not the case. Working, even at a reduced schedule, helps you maintain valuable social connections and weekly structure.
|
35
26
|
)
|
27
|
+
def setup
|
28
|
+
if !defined?($tagger)
|
29
|
+
puts "loading tagger..."
|
30
|
+
$rtagger = Brill::Tagger.new( File.join(File.dirname(__FILE__),"LEXICON"),
|
31
|
+
File.join(File.dirname(__FILE__),"LEXICALRULEFILE"),
|
32
|
+
File.join(File.dirname(__FILE__),"CONTEXTUALRULEFILE") )
|
33
|
+
puts "tagger loaded!"
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
36
37
|
def test_simple_tagger
|
37
38
|
pairs = tagger.tag( SAMPLE_DOC )
|
38
39
|
assert_equal [["", ")"], ["", ")"], ["Take", "VB"], ["an", "DT"], ["active", "JJ"], ["role", "NN"], ["in", "IN"], ["your", "PRP$"], ["care", "NN"], ["When", "WRB"], ["it", "PRP"], ["comes", "VBZ"], ["to", "TO"], ["making", "VBG"], ["decisions", "NNS"], ["about", "IN"], ["the", "DT"], ["goals", "NNS"], ["and", "CC"], ["direction", "NN"], ["of", "IN"], ["treatment", "NN"], [",", ","], ["", ")"], ["do", "VBP"], ["", ")"], ["n't", "RB"], ["sit", "VB"], ["back.", "CD"], ["Work", "NN"], ["closely", "RB"], ["and", "CC"], ["actively", "RB"], ["with", "IN"], ["your", "PRP$"], ["oncologist", "NN"], ["and", "CC"], ["the", "DT"], ["rest", "NN"], ["of", "IN"], ["your", "PRP$"], ["medical", "JJ"], ["team.", "NNP"], ["Dont", "NNP"], ["overlook", "VB"], ["clinical", "JJ"], ["trials", "NNS"], ["If", "IN"], ["youre", "NN"], ["eligible", "JJ"], ["to", "TO"], ["enroll", "VB"], ["in", "IN"], ["clinical", "JJ"], ["trials", "NNS"], [",", ","], ["", ")"], ["select", "VB"], ["an", "DT"], ["oncologist", "NN"], ["who", "WP"], ["participates", "VBZ"], ["in", "IN"], ["them.", "JJ"], ["Patients", "NNS"], ["who", "WP"], ["enroll", "VBP"], ["in", "IN"], ["clinical", "JJ"], ["studies", "NNS"], ["receive", "VBP"], ["closer", "JJR"], ["follow-up", "NN"], [",", ","], ["", ")"], ["the", "DT"], ["highest", "JJS"], ["standard-of-care", "JJ"], ["treatment", "NN"], ["and", "CC"], ["access", "NN"], ["to", "TO"], ["experimental", "JJ"], ["therapies", "NNS"], ["at", "IN"], ["no", "DT"], ["extra", "JJ"], ["cost.", "NNP"], ["Maximize", "NNP"], ["your", "PRP$"], ["nutrition", "NN"], ["strategy", "NN"], ["Doing", "NNP"], ["your", "PRP$"], ["best", "JJS"], ["to", "TO"], ["eat", "VB"], ["a", "DT"], ["healthy", "JJ"], [",", ","], ["", ")"], ["well-balanced", "JJ"], ["diet", "NN"], ["is", "VBZ"], ["vital", "JJ"], ["to", "TO"], ["prompt", "VB"], ["healing", "NN"], ["after", "IN"], ["surgery", "NN"], ["and", "CC"], ["for", "IN"], ["recovery", "NN"], ["from", "IN"], ["radiation", "NN"], ["or", "CC"], ["chemotherapy.", "JJ"], ["Many", "JJ"], ["oncology", "NN"], ["practices", "NNS"], ["employ", "VBP"], ["registered", "VBN"], ["dieticians", "NNS"], ["who", "WP"], ["can", "MD"], ["help", "VB"], ["you", "PRP"], ["optimize", "VB"], ["your", "PRP$"], ["nutrition.", "JJ"], ["Steer", "VB"], ["clear", "JJ"], ["of", "IN"], ["", ")"], ["``", "``"], ["natural", "JJ"], ["cures", "NNS"], ["''", "''"], ["", ")"], ["Before", "IN"], ["trying", "VBG"], ["nutritional", "JJ"], ["supplements", "NNS"], ["or", "CC"], ["herbal", "JJ"], ["remedies", "NNS"], [",", ","], ["", ")"], ["be", "VB"], ["sure", "JJ"], ["to", "TO"], ["discuss", "VB"], ["your", "PRP$"], ["plans", "NNS"], ["with", "IN"], ["a", "DT"], ["doctor.", "JJ"], ["Most", "JJS"], ["have", "VBP"], ["not", "RB"], ["been", "VBN"], ["tested", "VBN"], ["in", "IN"], ["clinical", "JJ"], ["studies", "NNS"], [",", ","], ["", ")"], ["and", "CC"], ["some", "DT"], ["may", "MD"], ["actually", "RB"], ["interfere", "VB"], ["with", "IN"], ["your", "PRP$"], ["treatment.", "JJ"], ["Build", "VB"], ["a", "DT"], ["stronger", "JJR"], ["body", "NN"], ["Even", "RB"], ["walking", "VBG"], ["regularly", "RB"], ["is", "VBZ"], ["can", "MD"], ["help", "VB"], ["you", "PRP"], ["minimize", "VB"], ["long-term", "JJ"], ["muscle", "NN"], ["weakness", "NN"], ["caused", "VBN"], ["by", "IN"], ["illness", "NN"], ["or", "CC"], ["de-conditioning.", "NNP"], ["Focus", "NNP"], ["on", "IN"], ["overall", "JJ"], ["health", "NN"], ["Patients", "NNS"], ["may", "MD"], ["be", "VB"], ["cured", "VBN"], ["of", "IN"], ["cancer", "NN"], ["but", "CC"], ["still", "JJ"], ["face", "NN"], ["life-threatening", "JJ"], ["medical", "JJ"], ["problems", "NNS"], ["that", "WDT"], ["are", "VBP"], ["underemphasized", "JJ"], ["during", "IN"], ["cancer", "NN"], ["treatments", "NNS"], [",", ","], ["", ")"], ["such", "JJ"], ["as", "IN"], ["diabetes", "NN"], [",", ","], ["", ")"], ["high", "JJ"], ["blood", "NN"], ["pressure", "NN"], ["and", "CC"], ["heart", "NN"], ["disease.", "JJ"], ["Continue", "VB"], ["to", "TO"], ["monitor", "VB"], ["your", "PRP$"], ["overall", "JJ"], ["health.", "JJ"], ["Put", "NN"], ["the", "DT"], ["fire", "NN"], ["out", "IN"], ["for", "IN"], ["good", "JJ"], ["Smoking", "NNP"], ["impairs", "NNS"], ["healing", "NN"], ["after", "IN"], ["surgery", "NN"], ["and", "CC"], ["radiation", "NN"], ["and", "CC"], ["increases", "NNS"], ["your", "PRP$"], ["risk", "NN"], ["of", "IN"], ["cardiovascular", "JJ"], ["disease", "NN"], ["and", "CC"], ["many", "JJ"], ["types", "NNS"], ["of", "IN"], ["cancers.", "CD"], ["Ask", "VB"], ["your", "PRP$"], ["doctor", "NN"], ["for", "IN"], ["help", "NN"], ["identifying", "VBG"], ["and", "CC"], ["obtaining", "VBG"], ["the", "DT"], ["most", "RBS"], ["appropriate", "JJ"], ["cessation", "NN"], ["aids.", "NNP"], ["Map", "NNP"], ["a", "DT"], ["healthy", "JJ"], ["future", "NN"], ["Once", "RB"], ["youve", "VBP"], ["completed", "VBN"], ["treatment", "NN"], [",", ","], ["", ")"], ["discuss", "VB"], ["appropriate", "JJ"], ["follow-up", "NN"], ["plans", "NNS"], ["with", "IN"], ["your", "PRP$"], ["doctor", "NN"], ["and", "CC"], ["keep", "VB"], ["track", "NN"], ["of", "IN"], ["them", "PRP"], ["yourself.", "CD"], ["Intensified", "JJ"], ["screening", "NN"], ["over", "IN"], ["many", "JJ"], ["years", "NNS"], ["is", "VBZ"], ["frequently", "RB"], ["recommended", "VBN"], ["to", "TO"], ["identify", "VB"], ["and", "CC"], ["treat", "VB"], ["a", "DT"], ["recurrence", "NN"], ["early", "JJ"], ["on.", "CD"], ["Share", "VB"], ["your", "PRP$"], ["feelings", "NNS"], ["Allow", "VB"], ["yourself", "PRP"], ["time", "NN"], ["to", "TO"], ["discuss", "VB"], ["the", "DT"], ["emotional", "JJ"], ["consequences", "NNS"], ["of", "IN"], ["your", "PRP$"], ["illness", "NN"], ["and", "CC"], ["treatment", "NN"], ["with", "IN"], ["family", "NN"], [",", ","], ["", ")"], ["friends", "NNS"], [",", ","], ["", ")"], ["your", "PRP$"], ["doctor", "NN"], ["and", "CC"], [",", ","], ["", ")"], ["if", "IN"], ["necessary", "JJ"], [",", ","], ["", ")"], ["a", "DT"], ["professional", "JJ"], ["therapist.", "JJ"], ["Many", "JJ"], ["patients", "NNS"], ["also", "RB"], ["find", "VBP"], ["antidepressants", "NNS"], ["helpful", "JJ"], ["during", "IN"], ["treatment.", "JJ"], ["Stay", "VB"], ["connected", "VBN"], ["Although", "IN"], ["many", "JJ"], ["newly", "RB"], ["diagnosed", "VBN"], ["patients", "NNS"], ["fear", "VBP"], ["they", "PRP"], ["will", "MD"], ["not", "RB"], ["be", "VB"], ["able", "JJ"], ["to", "TO"], ["keep", "VB"], ["working", "VBG"], ["during", "IN"], ["treatment", "NN"], [",", ","], ["", ")"], ["this", "DT"], ["is", "VBZ"], ["usually", "RB"], ["not", "RB"], ["the", "DT"], ["case.", "CD"], ["Working", "NNP"], [",", ","], ["", ")"], ["even", "RB"], ["at", "IN"], ["a", "DT"], ["reduced", "VBN"], ["schedule", "NN"], [",", ","], ["", ")"], ["helps", "VBZ"], ["you", "PRP"], ["maintain", "VBP"], ["valuable", "JJ"], ["social", "JJ"], ["connections", "NNS"], ["and", "CC"], ["weekly", "JJ"], ["structure", "NN"], [".", "."]], pairs
|
@@ -55,6 +56,6 @@ Although many newly diagnosed patients fear they will not be able to keep workin
|
|
55
56
|
|
56
57
|
private
|
57
58
|
def tagger
|
58
|
-
$
|
59
|
+
$rtagger
|
59
60
|
end
|
60
61
|
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/test_helper'
|
2
|
+
|
3
|
+
class TestWordTagger < Test::Unit::TestCase
|
4
|
+
|
5
|
+
def setup
|
6
|
+
if !defined?($wtagger)
|
7
|
+
$wtagger = Word::Tagger.new
|
8
|
+
$wtagger.load_tags( File.read(File.join(File.dirname(__FILE__),'fixtures','tags.txt') ).split("\n").map{|t| t.strip} )
|
9
|
+
$wtagger.set_words( 4 )
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_basic
|
14
|
+
timer = Time.now
|
15
|
+
text = "This is a sa'mple doc[]ument lets see how cancer ngrams 4 works out for this interesting text!"
|
16
|
+
tags = $wtagger.execute( text )
|
17
|
+
assert_equal ['cancer','work'], tags
|
18
|
+
puts "Duration: #{Time.now - timer} sec"
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_ngram_size3
|
22
|
+
timer = Time.now
|
23
|
+
text = "This body of text contains something like ventricular septal defect"
|
24
|
+
tags = $wtagger.execute( text )
|
25
|
+
assert_equal ['ventricular septal defect'], tags
|
26
|
+
puts "Duration: #{Time.now - timer} sec"
|
27
|
+
end
|
28
|
+
end
|
data/website/index.html
CHANGED
@@ -33,7 +33,7 @@
|
|
33
33
|
<h1>rbtagger</h1>
|
34
34
|
<div id="version" class="clickable" onclick='document.location = "http://rubyforge.org/projects/ruletagger"; return false'>
|
35
35
|
<p>Get Version</p>
|
36
|
-
<a href="http://rubyforge.org/projects/ruletagger" class="numbers">0.0
|
36
|
+
<a href="http://rubyforge.org/projects/ruletagger" class="numbers">0.1.0</a>
|
37
37
|
</div>
|
38
38
|
<h4 style="float:right;padding-right:10px;"> → ‘rb-brill-tagger’</h4>
|
39
39
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruletagger
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Todd A. Fisher
|
@@ -38,6 +38,7 @@ extra_rdoc_files:
|
|
38
38
|
- test/docs/doc7.txt
|
39
39
|
- test/docs/doc8.txt
|
40
40
|
- test/docs/doc9.txt
|
41
|
+
- test/fixtures/tags.txt
|
41
42
|
- website/index.txt
|
42
43
|
files:
|
43
44
|
- COPYING
|
@@ -60,6 +61,7 @@ files:
|
|
60
61
|
- ext/rule_tagger/lex.h
|
61
62
|
- ext/rule_tagger/memory.c
|
62
63
|
- ext/rule_tagger/memory.h
|
64
|
+
- ext/rule_tagger/mkmf.log
|
63
65
|
- ext/rule_tagger/rbtagger.c
|
64
66
|
- ext/rule_tagger/registry.c
|
65
67
|
- ext/rule_tagger/registry.h
|
@@ -73,12 +75,12 @@ files:
|
|
73
75
|
- ext/rule_tagger/useful.c
|
74
76
|
- ext/rule_tagger/useful.h
|
75
77
|
- ext/word_tagger/extconf.rb
|
78
|
+
- ext/word_tagger/mkmf.log
|
76
79
|
- ext/word_tagger/porter_stemmer.c
|
77
80
|
- ext/word_tagger/porter_stemmer.h
|
78
81
|
- ext/word_tagger/rtagger.cc
|
79
82
|
- ext/word_tagger/tagger.cc
|
80
83
|
- ext/word_tagger/tagger.h
|
81
|
-
- ext/word_tagger/tagger.rb
|
82
84
|
- ext/word_tagger/test.rb
|
83
85
|
- ext/word_tagger/test/Makefile
|
84
86
|
- ext/word_tagger/test/doc.txt
|
@@ -86,6 +88,7 @@ files:
|
|
86
88
|
- lib/brill/tagger.rb
|
87
89
|
- lib/rbtagger.rb
|
88
90
|
- lib/rbtagger/version.rb
|
91
|
+
- lib/word/tagger.rb
|
89
92
|
- script/console
|
90
93
|
- script/destroy
|
91
94
|
- script/generate
|
@@ -93,6 +96,9 @@ files:
|
|
93
96
|
- setup.rb
|
94
97
|
- tasks/deployment.rake
|
95
98
|
- tasks/environment.rake
|
99
|
+
- tasks/extconf.rake
|
100
|
+
- tasks/extconf/rule_tagger.rake
|
101
|
+
- tasks/extconf/word_tagger.rake
|
96
102
|
- tasks/website.rake
|
97
103
|
- test/CONTEXTUALRULEFILE
|
98
104
|
- test/LEXICALRULEFILE
|
@@ -107,8 +113,10 @@ files:
|
|
107
113
|
- test/docs/doc7.txt
|
108
114
|
- test/docs/doc8.txt
|
109
115
|
- test/docs/doc9.txt
|
110
|
-
- test/
|
116
|
+
- test/fixtures/tags.txt
|
111
117
|
- test/test_helper.rb
|
118
|
+
- test/test_rule_tagger.rb
|
119
|
+
- test/test_word_tagger.rb
|
112
120
|
- tools/rakehelp.rb
|
113
121
|
- website/index.html
|
114
122
|
- website/index.txt
|
@@ -153,3 +161,5 @@ specification_version: 2
|
|
153
161
|
summary: A Simple Ruby Rule-Based Part of Speech Tagger
|
154
162
|
test_files:
|
155
163
|
- test/test_helper.rb
|
164
|
+
- test/test_rule_tagger.rb
|
165
|
+
- test/test_word_tagger.rb
|