ruletagger 0.0.1 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Manifest.txt +9 -2
- data/Rakefile +0 -29
- data/config/hoe.rb +1 -2
- data/ext/rule_tagger/mkmf.log +46 -0
- data/ext/word_tagger/mkmf.log +24 -0
- data/ext/word_tagger/rtagger.cc +2 -2
- data/lib/rbtagger.rb +3 -0
- data/lib/rbtagger/version.rb +2 -2
- data/{ext/word_tagger → lib/word}/tagger.rb +3 -3
- data/tasks/extconf.rake +18 -0
- data/tasks/extconf/rule_tagger.rake +43 -0
- data/tasks/extconf/word_tagger.rake +43 -0
- data/test/fixtures/tags.txt +976 -0
- data/test/{tagger_test.rb → test_rule_tagger.rb} +13 -12
- data/test/test_word_tagger.rb +28 -0
- data/website/index.html +1 -1
- metadata +13 -3
@@ -1,16 +1,7 @@
|
|
1
|
-
require '
|
2
|
-
$:.unshift File.join(File.dirname(__FILE__), "..", "lib")
|
3
|
-
$:.unshift File.join(File.dirname(__FILE__), "..", "ext", "rule_tagger")
|
1
|
+
require File.dirname(__FILE__) + '/test_helper'
|
4
2
|
|
5
|
-
require 'brill/tagger'
|
6
3
|
|
7
|
-
|
8
|
-
$tagger = Brill::Tagger.new( File.join(File.dirname(__FILE__),"LEXICON"),
|
9
|
-
File.join(File.dirname(__FILE__),"LEXICALRULEFILE"),
|
10
|
-
File.join(File.dirname(__FILE__),"CONTEXTUALRULEFILE") )
|
11
|
-
puts "tagger loaded!"
|
12
|
-
|
13
|
-
class TaggerTest < Test::Unit::TestCase
|
4
|
+
class TestRuleTagger< Test::Unit::TestCase
|
14
5
|
SAMPLE_DOC=%q(
|
15
6
|
Take an active role in your care
|
16
7
|
When it comes to making decisions about the goals and direction of treatment, don't sit back. Work closely and actively with your oncologist and the rest of your medical team.
|
@@ -33,6 +24,16 @@ Allow yourself time to discuss the emotional consequences of your illness and tr
|
|
33
24
|
Stay connected
|
34
25
|
Although many newly diagnosed patients fear they will not be able to keep working during treatment, this is usually not the case. Working, even at a reduced schedule, helps you maintain valuable social connections and weekly structure.
|
35
26
|
)
|
27
|
+
def setup
|
28
|
+
if !defined?($tagger)
|
29
|
+
puts "loading tagger..."
|
30
|
+
$rtagger = Brill::Tagger.new( File.join(File.dirname(__FILE__),"LEXICON"),
|
31
|
+
File.join(File.dirname(__FILE__),"LEXICALRULEFILE"),
|
32
|
+
File.join(File.dirname(__FILE__),"CONTEXTUALRULEFILE") )
|
33
|
+
puts "tagger loaded!"
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
36
37
|
def test_simple_tagger
|
37
38
|
pairs = tagger.tag( SAMPLE_DOC )
|
38
39
|
assert_equal [["", ")"], ["", ")"], ["Take", "VB"], ["an", "DT"], ["active", "JJ"], ["role", "NN"], ["in", "IN"], ["your", "PRP$"], ["care", "NN"], ["When", "WRB"], ["it", "PRP"], ["comes", "VBZ"], ["to", "TO"], ["making", "VBG"], ["decisions", "NNS"], ["about", "IN"], ["the", "DT"], ["goals", "NNS"], ["and", "CC"], ["direction", "NN"], ["of", "IN"], ["treatment", "NN"], [",", ","], ["", ")"], ["do", "VBP"], ["", ")"], ["n't", "RB"], ["sit", "VB"], ["back.", "CD"], ["Work", "NN"], ["closely", "RB"], ["and", "CC"], ["actively", "RB"], ["with", "IN"], ["your", "PRP$"], ["oncologist", "NN"], ["and", "CC"], ["the", "DT"], ["rest", "NN"], ["of", "IN"], ["your", "PRP$"], ["medical", "JJ"], ["team.", "NNP"], ["Dont", "NNP"], ["overlook", "VB"], ["clinical", "JJ"], ["trials", "NNS"], ["If", "IN"], ["youre", "NN"], ["eligible", "JJ"], ["to", "TO"], ["enroll", "VB"], ["in", "IN"], ["clinical", "JJ"], ["trials", "NNS"], [",", ","], ["", ")"], ["select", "VB"], ["an", "DT"], ["oncologist", "NN"], ["who", "WP"], ["participates", "VBZ"], ["in", "IN"], ["them.", "JJ"], ["Patients", "NNS"], ["who", "WP"], ["enroll", "VBP"], ["in", "IN"], ["clinical", "JJ"], ["studies", "NNS"], ["receive", "VBP"], ["closer", "JJR"], ["follow-up", "NN"], [",", ","], ["", ")"], ["the", "DT"], ["highest", "JJS"], ["standard-of-care", "JJ"], ["treatment", "NN"], ["and", "CC"], ["access", "NN"], ["to", "TO"], ["experimental", "JJ"], ["therapies", "NNS"], ["at", "IN"], ["no", "DT"], ["extra", "JJ"], ["cost.", "NNP"], ["Maximize", "NNP"], ["your", "PRP$"], ["nutrition", "NN"], ["strategy", "NN"], ["Doing", "NNP"], ["your", "PRP$"], ["best", "JJS"], ["to", "TO"], ["eat", "VB"], ["a", "DT"], ["healthy", "JJ"], [",", ","], ["", ")"], ["well-balanced", "JJ"], ["diet", "NN"], ["is", "VBZ"], ["vital", "JJ"], ["to", "TO"], ["prompt", "VB"], ["healing", "NN"], ["after", "IN"], ["surgery", "NN"], ["and", "CC"], ["for", "IN"], ["recovery", "NN"], ["from", "IN"], ["radiation", "NN"], ["or", "CC"], ["chemotherapy.", "JJ"], ["Many", "JJ"], ["oncology", "NN"], ["practices", "NNS"], ["employ", "VBP"], ["registered", "VBN"], ["dieticians", "NNS"], ["who", "WP"], ["can", "MD"], ["help", "VB"], ["you", "PRP"], ["optimize", "VB"], ["your", "PRP$"], ["nutrition.", "JJ"], ["Steer", "VB"], ["clear", "JJ"], ["of", "IN"], ["", ")"], ["``", "``"], ["natural", "JJ"], ["cures", "NNS"], ["''", "''"], ["", ")"], ["Before", "IN"], ["trying", "VBG"], ["nutritional", "JJ"], ["supplements", "NNS"], ["or", "CC"], ["herbal", "JJ"], ["remedies", "NNS"], [",", ","], ["", ")"], ["be", "VB"], ["sure", "JJ"], ["to", "TO"], ["discuss", "VB"], ["your", "PRP$"], ["plans", "NNS"], ["with", "IN"], ["a", "DT"], ["doctor.", "JJ"], ["Most", "JJS"], ["have", "VBP"], ["not", "RB"], ["been", "VBN"], ["tested", "VBN"], ["in", "IN"], ["clinical", "JJ"], ["studies", "NNS"], [",", ","], ["", ")"], ["and", "CC"], ["some", "DT"], ["may", "MD"], ["actually", "RB"], ["interfere", "VB"], ["with", "IN"], ["your", "PRP$"], ["treatment.", "JJ"], ["Build", "VB"], ["a", "DT"], ["stronger", "JJR"], ["body", "NN"], ["Even", "RB"], ["walking", "VBG"], ["regularly", "RB"], ["is", "VBZ"], ["can", "MD"], ["help", "VB"], ["you", "PRP"], ["minimize", "VB"], ["long-term", "JJ"], ["muscle", "NN"], ["weakness", "NN"], ["caused", "VBN"], ["by", "IN"], ["illness", "NN"], ["or", "CC"], ["de-conditioning.", "NNP"], ["Focus", "NNP"], ["on", "IN"], ["overall", "JJ"], ["health", "NN"], ["Patients", "NNS"], ["may", "MD"], ["be", "VB"], ["cured", "VBN"], ["of", "IN"], ["cancer", "NN"], ["but", "CC"], ["still", "JJ"], ["face", "NN"], ["life-threatening", "JJ"], ["medical", "JJ"], ["problems", "NNS"], ["that", "WDT"], ["are", "VBP"], ["underemphasized", "JJ"], ["during", "IN"], ["cancer", "NN"], ["treatments", "NNS"], [",", ","], ["", ")"], ["such", "JJ"], ["as", "IN"], ["diabetes", "NN"], [",", ","], ["", ")"], ["high", "JJ"], ["blood", "NN"], ["pressure", "NN"], ["and", "CC"], ["heart", "NN"], ["disease.", "JJ"], ["Continue", "VB"], ["to", "TO"], ["monitor", "VB"], ["your", "PRP$"], ["overall", "JJ"], ["health.", "JJ"], ["Put", "NN"], ["the", "DT"], ["fire", "NN"], ["out", "IN"], ["for", "IN"], ["good", "JJ"], ["Smoking", "NNP"], ["impairs", "NNS"], ["healing", "NN"], ["after", "IN"], ["surgery", "NN"], ["and", "CC"], ["radiation", "NN"], ["and", "CC"], ["increases", "NNS"], ["your", "PRP$"], ["risk", "NN"], ["of", "IN"], ["cardiovascular", "JJ"], ["disease", "NN"], ["and", "CC"], ["many", "JJ"], ["types", "NNS"], ["of", "IN"], ["cancers.", "CD"], ["Ask", "VB"], ["your", "PRP$"], ["doctor", "NN"], ["for", "IN"], ["help", "NN"], ["identifying", "VBG"], ["and", "CC"], ["obtaining", "VBG"], ["the", "DT"], ["most", "RBS"], ["appropriate", "JJ"], ["cessation", "NN"], ["aids.", "NNP"], ["Map", "NNP"], ["a", "DT"], ["healthy", "JJ"], ["future", "NN"], ["Once", "RB"], ["youve", "VBP"], ["completed", "VBN"], ["treatment", "NN"], [",", ","], ["", ")"], ["discuss", "VB"], ["appropriate", "JJ"], ["follow-up", "NN"], ["plans", "NNS"], ["with", "IN"], ["your", "PRP$"], ["doctor", "NN"], ["and", "CC"], ["keep", "VB"], ["track", "NN"], ["of", "IN"], ["them", "PRP"], ["yourself.", "CD"], ["Intensified", "JJ"], ["screening", "NN"], ["over", "IN"], ["many", "JJ"], ["years", "NNS"], ["is", "VBZ"], ["frequently", "RB"], ["recommended", "VBN"], ["to", "TO"], ["identify", "VB"], ["and", "CC"], ["treat", "VB"], ["a", "DT"], ["recurrence", "NN"], ["early", "JJ"], ["on.", "CD"], ["Share", "VB"], ["your", "PRP$"], ["feelings", "NNS"], ["Allow", "VB"], ["yourself", "PRP"], ["time", "NN"], ["to", "TO"], ["discuss", "VB"], ["the", "DT"], ["emotional", "JJ"], ["consequences", "NNS"], ["of", "IN"], ["your", "PRP$"], ["illness", "NN"], ["and", "CC"], ["treatment", "NN"], ["with", "IN"], ["family", "NN"], [",", ","], ["", ")"], ["friends", "NNS"], [",", ","], ["", ")"], ["your", "PRP$"], ["doctor", "NN"], ["and", "CC"], [",", ","], ["", ")"], ["if", "IN"], ["necessary", "JJ"], [",", ","], ["", ")"], ["a", "DT"], ["professional", "JJ"], ["therapist.", "JJ"], ["Many", "JJ"], ["patients", "NNS"], ["also", "RB"], ["find", "VBP"], ["antidepressants", "NNS"], ["helpful", "JJ"], ["during", "IN"], ["treatment.", "JJ"], ["Stay", "VB"], ["connected", "VBN"], ["Although", "IN"], ["many", "JJ"], ["newly", "RB"], ["diagnosed", "VBN"], ["patients", "NNS"], ["fear", "VBP"], ["they", "PRP"], ["will", "MD"], ["not", "RB"], ["be", "VB"], ["able", "JJ"], ["to", "TO"], ["keep", "VB"], ["working", "VBG"], ["during", "IN"], ["treatment", "NN"], [",", ","], ["", ")"], ["this", "DT"], ["is", "VBZ"], ["usually", "RB"], ["not", "RB"], ["the", "DT"], ["case.", "CD"], ["Working", "NNP"], [",", ","], ["", ")"], ["even", "RB"], ["at", "IN"], ["a", "DT"], ["reduced", "VBN"], ["schedule", "NN"], [",", ","], ["", ")"], ["helps", "VBZ"], ["you", "PRP"], ["maintain", "VBP"], ["valuable", "JJ"], ["social", "JJ"], ["connections", "NNS"], ["and", "CC"], ["weekly", "JJ"], ["structure", "NN"], [".", "."]], pairs
|
@@ -55,6 +56,6 @@ Although many newly diagnosed patients fear they will not be able to keep workin
|
|
55
56
|
|
56
57
|
private
|
57
58
|
def tagger
|
58
|
-
$
|
59
|
+
$rtagger
|
59
60
|
end
|
60
61
|
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/test_helper'
|
2
|
+
|
3
|
+
class TestWordTagger < Test::Unit::TestCase
|
4
|
+
|
5
|
+
def setup
|
6
|
+
if !defined?($wtagger)
|
7
|
+
$wtagger = Word::Tagger.new
|
8
|
+
$wtagger.load_tags( File.read(File.join(File.dirname(__FILE__),'fixtures','tags.txt') ).split("\n").map{|t| t.strip} )
|
9
|
+
$wtagger.set_words( 4 )
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_basic
|
14
|
+
timer = Time.now
|
15
|
+
text = "This is a sa'mple doc[]ument lets see how cancer ngrams 4 works out for this interesting text!"
|
16
|
+
tags = $wtagger.execute( text )
|
17
|
+
assert_equal ['cancer','work'], tags
|
18
|
+
puts "Duration: #{Time.now - timer} sec"
|
19
|
+
end
|
20
|
+
|
21
|
+
def test_ngram_size3
|
22
|
+
timer = Time.now
|
23
|
+
text = "This body of text contains something like ventricular septal defect"
|
24
|
+
tags = $wtagger.execute( text )
|
25
|
+
assert_equal ['ventricular septal defect'], tags
|
26
|
+
puts "Duration: #{Time.now - timer} sec"
|
27
|
+
end
|
28
|
+
end
|
data/website/index.html
CHANGED
@@ -33,7 +33,7 @@
|
|
33
33
|
<h1>rbtagger</h1>
|
34
34
|
<div id="version" class="clickable" onclick='document.location = "http://rubyforge.org/projects/ruletagger"; return false'>
|
35
35
|
<p>Get Version</p>
|
36
|
-
<a href="http://rubyforge.org/projects/ruletagger" class="numbers">0.0
|
36
|
+
<a href="http://rubyforge.org/projects/ruletagger" class="numbers">0.1.0</a>
|
37
37
|
</div>
|
38
38
|
<h4 style="float:right;padding-right:10px;"> → ‘rb-brill-tagger’</h4>
|
39
39
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ruletagger
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Todd A. Fisher
|
@@ -38,6 +38,7 @@ extra_rdoc_files:
|
|
38
38
|
- test/docs/doc7.txt
|
39
39
|
- test/docs/doc8.txt
|
40
40
|
- test/docs/doc9.txt
|
41
|
+
- test/fixtures/tags.txt
|
41
42
|
- website/index.txt
|
42
43
|
files:
|
43
44
|
- COPYING
|
@@ -60,6 +61,7 @@ files:
|
|
60
61
|
- ext/rule_tagger/lex.h
|
61
62
|
- ext/rule_tagger/memory.c
|
62
63
|
- ext/rule_tagger/memory.h
|
64
|
+
- ext/rule_tagger/mkmf.log
|
63
65
|
- ext/rule_tagger/rbtagger.c
|
64
66
|
- ext/rule_tagger/registry.c
|
65
67
|
- ext/rule_tagger/registry.h
|
@@ -73,12 +75,12 @@ files:
|
|
73
75
|
- ext/rule_tagger/useful.c
|
74
76
|
- ext/rule_tagger/useful.h
|
75
77
|
- ext/word_tagger/extconf.rb
|
78
|
+
- ext/word_tagger/mkmf.log
|
76
79
|
- ext/word_tagger/porter_stemmer.c
|
77
80
|
- ext/word_tagger/porter_stemmer.h
|
78
81
|
- ext/word_tagger/rtagger.cc
|
79
82
|
- ext/word_tagger/tagger.cc
|
80
83
|
- ext/word_tagger/tagger.h
|
81
|
-
- ext/word_tagger/tagger.rb
|
82
84
|
- ext/word_tagger/test.rb
|
83
85
|
- ext/word_tagger/test/Makefile
|
84
86
|
- ext/word_tagger/test/doc.txt
|
@@ -86,6 +88,7 @@ files:
|
|
86
88
|
- lib/brill/tagger.rb
|
87
89
|
- lib/rbtagger.rb
|
88
90
|
- lib/rbtagger/version.rb
|
91
|
+
- lib/word/tagger.rb
|
89
92
|
- script/console
|
90
93
|
- script/destroy
|
91
94
|
- script/generate
|
@@ -93,6 +96,9 @@ files:
|
|
93
96
|
- setup.rb
|
94
97
|
- tasks/deployment.rake
|
95
98
|
- tasks/environment.rake
|
99
|
+
- tasks/extconf.rake
|
100
|
+
- tasks/extconf/rule_tagger.rake
|
101
|
+
- tasks/extconf/word_tagger.rake
|
96
102
|
- tasks/website.rake
|
97
103
|
- test/CONTEXTUALRULEFILE
|
98
104
|
- test/LEXICALRULEFILE
|
@@ -107,8 +113,10 @@ files:
|
|
107
113
|
- test/docs/doc7.txt
|
108
114
|
- test/docs/doc8.txt
|
109
115
|
- test/docs/doc9.txt
|
110
|
-
- test/
|
116
|
+
- test/fixtures/tags.txt
|
111
117
|
- test/test_helper.rb
|
118
|
+
- test/test_rule_tagger.rb
|
119
|
+
- test/test_word_tagger.rb
|
112
120
|
- tools/rakehelp.rb
|
113
121
|
- website/index.html
|
114
122
|
- website/index.txt
|
@@ -153,3 +161,5 @@ specification_version: 2
|
|
153
161
|
summary: A Simple Ruby Rule-Based Part of Speech Tagger
|
154
162
|
test_files:
|
155
163
|
- test/test_helper.rb
|
164
|
+
- test/test_rule_tagger.rb
|
165
|
+
- test/test_word_tagger.rb
|