rbtagger 0.2.5 → 0.2.6

Sign up to get free protection for your applications and to get access to all the features.
data/lib/brill/tagger.rb CHANGED
@@ -12,14 +12,50 @@ module Brill
12
12
  # returns similar results as tag, but further reduced by only selecting nouns
13
13
  def suggest( text, max = 10 )
14
14
  tags = tag(text)
15
+ #puts tags.inspect
16
+ ptag = [nil,nil]
17
+ # join NNP's together for names
18
+ reduced_tags = []
19
+ mappings = {} # keep a mapping of the joined words to expand
20
+ tags.each{|tag|
21
+ if ptag.last == 'NNP' and tag.last == 'NNP' and !ptag.first.match(/\.$/)
22
+ ptag[0] += " " + tag.first
23
+ # before combining these two create a mapping for each word to each word
24
+ words = ptag.first.split(/\s/)
25
+ i = 0
26
+ #puts words.inspect
27
+ until (i + 1) == words.size
28
+ mappings[words[i]] = ptag.first
29
+ mappings[words[i+1]] = ptag.first
30
+ i += 1
31
+ end
32
+ #puts mappings.inspect
33
+ elsif tag.last == 'NNP'
34
+ ptag = tag
35
+ elsif tag.last != 'NNP' and ptag.first != nil
36
+ reduced_tags << ptag
37
+ reduced_tags << tag if tag.last.match( /^\w+$/ ) and tag.first.match(/^\w+$/)
38
+ ptag = [nil,nil]
39
+ elsif tag.last.match( /^\w+$/ ) and tag.first.match(/^\w+$/)
40
+ reduced_tags << tag
41
+ end
42
+ }
43
+ # now expand any NNP that appear
44
+ tags = reduced_tags.map{|tag|
45
+ if tag.last == 'NNP'
46
+ #puts "#{tag.first} => #{mappings[tag.first]}"
47
+ tag[0] = mappings[tag.first] if mappings.key?(tag.first)
48
+ end
49
+ tag
50
+ }
15
51
  results = tags.select{|tag| tag.last.match(/NN/) }
16
52
  if results.size > max
17
53
  counts = {}
18
54
  tags = []
19
55
  results.each {|tag| counts[tag.first] = 0 }
20
56
  results.each do |tag|
21
- counts[tag.first] += 1
22
- tags << tag if counts[tag.first] == 1
57
+ tags << tag if counts[tag.first] == 0
58
+ counts[tag.first] += tag.last == 'NNP' ? 3 : (tag.last == 'NNS' ? 2 : 1)
23
59
  end
24
60
  tags.map!{|tag| [tag.first, tag.last,counts[tag.first]]}
25
61
  t = 1
@@ -41,6 +77,8 @@ module Brill
41
77
  # returns an array like [[token,tag],[token,tag]...[token,tag]]
42
78
  #
43
79
  def tag( text )
80
+ text = text.gsub(/dont/,"don't").gsub(/Dont/,"Don't")
81
+ text = text.gsub(/youre/,"you're")
44
82
  tokens = Brill::Tagger.tokenize( text )
45
83
  tags = Brill::Tagger.tag_start( tokens )
46
84
 
@@ -2,7 +2,7 @@ module RbTagger #:nodoc:
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 0
4
4
  MINOR = 2
5
- TINY = 5
5
+ TINY = 6
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
8
8
  end
@@ -23,24 +23,96 @@ Share your feelings
23
23
  Allow yourself time to discuss the emotional consequences of your illness and treatment with family, friends, your doctor and, if necessary, a professional therapist. Many patients also find antidepressants helpful during treatment.
24
24
  Stay connected
25
25
  Although many newly diagnosed patients fear they will not be able to keep working during treatment, this is usually not the case. Working, even at a reduced schedule, helps you maintain valuable social connections and weekly structure.
26
+ )
27
+ SAMPLE_DOC2=%q(
28
+ Britney Spears was granted a change in her visitation schedule with her sons Sean Preston and Jayden James at a hearing Tuesday.
29
+ "There was a change in visitation status that was ordered by Commissioner Gordon this morning," Los Angeles Superior Court spokesperson Alan Parachini confirmed after the hearing, which both Kevin Federline and her father (and co-conservator) Jamie Spears attended. (Britney and Kevin did not address each other during the hearing.)
30
+ The details of her visitation, however, are unclear.
31
+ "I'm not at liberty to answer any questions about the nature of that change," Parachini said. (TMZ.com had reported that Spears wanted overnight visits.)
32
+ Asked by Us if she were happy with the court outcome, Spears (clutching an Ed Hardy purse) smiled and told Us, "Yes."
33
+ Next up: A status hearing set for July 15.
34
+ The couple last appeared in court May 6. Spears was granted extended visitation — three days a week from 9 a.m. to 5 p.m. — of Sean Preston, 2, and Jayden James, 20 months.
26
35
  )
27
36
  def setup
28
37
  if !defined?($tagger)
29
- puts "loading tagger..."
30
38
  $rtagger = Brill::Tagger.new( File.join(File.dirname(__FILE__),"LEXICON"),
31
- File.join(File.dirname(__FILE__),"LEXICALRULEFILE"),
32
- File.join(File.dirname(__FILE__),"CONTEXTUALRULEFILE") )
33
- puts "tagger loaded!"
39
+ File.join(File.dirname(__FILE__),"LEXICALRULEFILE"),
40
+ File.join(File.dirname(__FILE__),"CONTEXTUALRULEFILE") )
34
41
  end
35
42
  end
36
43
 
37
44
  def test_simple_tagger
38
45
  pairs = tagger.tag( SAMPLE_DOC )
39
- assert_equal [["", ")"], ["", ")"], ["Take", "VB"], ["an", "DT"], ["active", "JJ"], ["role", "NN"], ["in", "IN"], ["your", "PRP$"], ["care", "NN"], ["When", "WRB"], ["it", "PRP"], ["comes", "VBZ"], ["to", "TO"], ["making", "VBG"], ["decisions", "NNS"], ["about", "IN"], ["the", "DT"], ["goals", "NNS"], ["and", "CC"], ["direction", "NN"], ["of", "IN"], ["treatment", "NN"], [",", ","], ["", ")"], ["do", "VBP"], ["", ")"], ["n't", "RB"], ["sit", "VB"], ["back.", "CD"], ["Work", "NN"], ["closely", "RB"], ["and", "CC"], ["actively", "RB"], ["with", "IN"], ["your", "PRP$"], ["oncologist", "NN"], ["and", "CC"], ["the", "DT"], ["rest", "NN"], ["of", "IN"], ["your", "PRP$"], ["medical", "JJ"], ["team.", "NNP"], ["Dont", "NNP"], ["overlook", "VB"], ["clinical", "JJ"], ["trials", "NNS"], ["If", "IN"], ["youre", "NN"], ["eligible", "JJ"], ["to", "TO"], ["enroll", "VB"], ["in", "IN"], ["clinical", "JJ"], ["trials", "NNS"], [",", ","], ["", ")"], ["select", "VB"], ["an", "DT"], ["oncologist", "NN"], ["who", "WP"], ["participates", "VBZ"], ["in", "IN"], ["them.", "JJ"], ["Patients", "NNS"], ["who", "WP"], ["enroll", "VBP"], ["in", "IN"], ["clinical", "JJ"], ["studies", "NNS"], ["receive", "VBP"], ["closer", "JJR"], ["follow-up", "NN"], [",", ","], ["", ")"], ["the", "DT"], ["highest", "JJS"], ["standard-of-care", "JJ"], ["treatment", "NN"], ["and", "CC"], ["access", "NN"], ["to", "TO"], ["experimental", "JJ"], ["therapies", "NNS"], ["at", "IN"], ["no", "DT"], ["extra", "JJ"], ["cost.", "NNP"], ["Maximize", "NNP"], ["your", "PRP$"], ["nutrition", "NN"], ["strategy", "NN"], ["Doing", "NNP"], ["your", "PRP$"], ["best", "JJS"], ["to", "TO"], ["eat", "VB"], ["a", "DT"], ["healthy", "JJ"], [",", ","], ["", ")"], ["well-balanced", "JJ"], ["diet", "NN"], ["is", "VBZ"], ["vital", "JJ"], ["to", "TO"], ["prompt", "VB"], ["healing", "NN"], ["after", "IN"], ["surgery", "NN"], ["and", "CC"], ["for", "IN"], ["recovery", "NN"], ["from", "IN"], ["radiation", "NN"], ["or", "CC"], ["chemotherapy.", "JJ"], ["Many", "JJ"], ["oncology", "NN"], ["practices", "NNS"], ["employ", "VBP"], ["registered", "VBN"], ["dieticians", "NNS"], ["who", "WP"], ["can", "MD"], ["help", "VB"], ["you", "PRP"], ["optimize", "VB"], ["your", "PRP$"], ["nutrition.", "JJ"], ["Steer", "VB"], ["clear", "JJ"], ["of", "IN"], ["", ")"], ["``", "``"], ["natural", "JJ"], ["cures", "NNS"], ["''", "''"], ["", ")"], ["Before", "IN"], ["trying", "VBG"], ["nutritional", "JJ"], ["supplements", "NNS"], ["or", "CC"], ["herbal", "JJ"], ["remedies", "NNS"], [",", ","], ["", ")"], ["be", "VB"], ["sure", "JJ"], ["to", "TO"], ["discuss", "VB"], ["your", "PRP$"], ["plans", "NNS"], ["with", "IN"], ["a", "DT"], ["doctor.", "JJ"], ["Most", "JJS"], ["have", "VBP"], ["not", "RB"], ["been", "VBN"], ["tested", "VBN"], ["in", "IN"], ["clinical", "JJ"], ["studies", "NNS"], [",", ","], ["", ")"], ["and", "CC"], ["some", "DT"], ["may", "MD"], ["actually", "RB"], ["interfere", "VB"], ["with", "IN"], ["your", "PRP$"], ["treatment.", "JJ"], ["Build", "VB"], ["a", "DT"], ["stronger", "JJR"], ["body", "NN"], ["Even", "RB"], ["walking", "VBG"], ["regularly", "RB"], ["is", "VBZ"], ["can", "MD"], ["help", "VB"], ["you", "PRP"], ["minimize", "VB"], ["long-term", "JJ"], ["muscle", "NN"], ["weakness", "NN"], ["caused", "VBN"], ["by", "IN"], ["illness", "NN"], ["or", "CC"], ["de-conditioning.", "NNP"], ["Focus", "NNP"], ["on", "IN"], ["overall", "JJ"], ["health", "NN"], ["Patients", "NNS"], ["may", "MD"], ["be", "VB"], ["cured", "VBN"], ["of", "IN"], ["cancer", "NN"], ["but", "CC"], ["still", "JJ"], ["face", "NN"], ["life-threatening", "JJ"], ["medical", "JJ"], ["problems", "NNS"], ["that", "WDT"], ["are", "VBP"], ["underemphasized", "JJ"], ["during", "IN"], ["cancer", "NN"], ["treatments", "NNS"], [",", ","], ["", ")"], ["such", "JJ"], ["as", "IN"], ["diabetes", "NN"], [",", ","], ["", ")"], ["high", "JJ"], ["blood", "NN"], ["pressure", "NN"], ["and", "CC"], ["heart", "NN"], ["disease.", "JJ"], ["Continue", "VB"], ["to", "TO"], ["monitor", "VB"], ["your", "PRP$"], ["overall", "JJ"], ["health.", "JJ"], ["Put", "NN"], ["the", "DT"], ["fire", "NN"], ["out", "IN"], ["for", "IN"], ["good", "JJ"], ["Smoking", "NNP"], ["impairs", "NNS"], ["healing", "NN"], ["after", "IN"], ["surgery", "NN"], ["and", "CC"], ["radiation", "NN"], ["and", "CC"], ["increases", "NNS"], ["your", "PRP$"], ["risk", "NN"], ["of", "IN"], ["cardiovascular", "JJ"], ["disease", "NN"], ["and", "CC"], ["many", "JJ"], ["types", "NNS"], ["of", "IN"], ["cancers.", "CD"], ["Ask", "VB"], ["your", "PRP$"], ["doctor", "NN"], ["for", "IN"], ["help", "NN"], ["identifying", "VBG"], ["and", "CC"], ["obtaining", "VBG"], ["the", "DT"], ["most", "RBS"], ["appropriate", "JJ"], ["cessation", "NN"], ["aids.", "NNP"], ["Map", "NNP"], ["a", "DT"], ["healthy", "JJ"], ["future", "NN"], ["Once", "RB"], ["youve", "VBP"], ["completed", "VBN"], ["treatment", "NN"], [",", ","], ["", ")"], ["discuss", "VB"], ["appropriate", "JJ"], ["follow-up", "NN"], ["plans", "NNS"], ["with", "IN"], ["your", "PRP$"], ["doctor", "NN"], ["and", "CC"], ["keep", "VB"], ["track", "NN"], ["of", "IN"], ["them", "PRP"], ["yourself.", "CD"], ["Intensified", "JJ"], ["screening", "NN"], ["over", "IN"], ["many", "JJ"], ["years", "NNS"], ["is", "VBZ"], ["frequently", "RB"], ["recommended", "VBN"], ["to", "TO"], ["identify", "VB"], ["and", "CC"], ["treat", "VB"], ["a", "DT"], ["recurrence", "NN"], ["early", "JJ"], ["on.", "CD"], ["Share", "VB"], ["your", "PRP$"], ["feelings", "NNS"], ["Allow", "VB"], ["yourself", "PRP"], ["time", "NN"], ["to", "TO"], ["discuss", "VB"], ["the", "DT"], ["emotional", "JJ"], ["consequences", "NNS"], ["of", "IN"], ["your", "PRP$"], ["illness", "NN"], ["and", "CC"], ["treatment", "NN"], ["with", "IN"], ["family", "NN"], [",", ","], ["", ")"], ["friends", "NNS"], [",", ","], ["", ")"], ["your", "PRP$"], ["doctor", "NN"], ["and", "CC"], [",", ","], ["", ")"], ["if", "IN"], ["necessary", "JJ"], [",", ","], ["", ")"], ["a", "DT"], ["professional", "JJ"], ["therapist.", "JJ"], ["Many", "JJ"], ["patients", "NNS"], ["also", "RB"], ["find", "VBP"], ["antidepressants", "NNS"], ["helpful", "JJ"], ["during", "IN"], ["treatment.", "JJ"], ["Stay", "VB"], ["connected", "VBN"], ["Although", "IN"], ["many", "JJ"], ["newly", "RB"], ["diagnosed", "VBN"], ["patients", "NNS"], ["fear", "VBP"], ["they", "PRP"], ["will", "MD"], ["not", "RB"], ["be", "VB"], ["able", "JJ"], ["to", "TO"], ["keep", "VB"], ["working", "VBG"], ["during", "IN"], ["treatment", "NN"], [",", ","], ["", ")"], ["this", "DT"], ["is", "VBZ"], ["usually", "RB"], ["not", "RB"], ["the", "DT"], ["case.", "CD"], ["Working", "NNP"], [",", ","], ["", ")"], ["even", "RB"], ["at", "IN"], ["a", "DT"], ["reduced", "VBN"], ["schedule", "NN"], [",", ","], ["", ")"], ["helps", "VBZ"], ["you", "PRP"], ["maintain", "VBP"], ["valuable", "JJ"], ["social", "JJ"], ["connections", "NNS"], ["and", "CC"], ["weekly", "JJ"], ["structure", "NN"], [".", "."]], pairs
40
- #puts pairs.inspect
41
- # enable these lines for memory leak testing
42
- #$tagger = nil
43
- #ObjectSpace.garbage_collect
46
+ tags = [["", ")"], ["", ")"], ["Take", "VB"], ["an", "DT"], ["active", "JJ"], ["role", "NN"], ["in", "IN"],
47
+ ["your", "PRP$"], ["care", "NN"], ["When", "WRB"], ["it", "PRP"], ["comes", "VBZ"], ["to", "TO"],
48
+ ["making", "VBG"], ["decisions", "NNS"], ["about", "IN"], ["the", "DT"], ["goals", "NNS"], ["and", "CC"],
49
+ ["direction", "NN"], ["of", "IN"], ["treatment", "NN"], [",", ","], ["", ")"], ["do", "VBP"], ["", ")"],
50
+ ["n't", "RB"], ["sit", "VB"], ["back.", "CD"], ["Work", "NN"], ["closely", "RB"], ["and", "CC"],
51
+ ["actively", "RB"], ["with", "IN"], ["your", "PRP$"], ["oncologist", "NN"], ["and", "CC"], ["the", "DT"],
52
+ ["rest", "NN"], ["of", "IN"], ["your", "PRP$"], ["medical", "JJ"], ["team.", "JJ"], ["Do", "VBP"],
53
+ ["", ")"], ["n't", "RB"], ["overlook", "VB"], ["clinical", "JJ"], ["trials", "NNS"], ["If", "IN"],
54
+ ["you", "PRP"], ["'re", "VBP"], ["eligible", "JJ"], ["to", "TO"], ["enroll", "VB"], ["in", "IN"],
55
+ ["clinical", "JJ"], ["trials", "NNS"], [",", ","], ["", ")"], ["select", "VB"], ["an", "DT"],
56
+ ["oncologist", "NN"], ["who", "WP"], ["participates", "VBZ"], ["in", "IN"], ["them.", "JJ"],
57
+ ["Patients", "NNS"], ["who", "WP"], ["enroll", "VBP"], ["in", "IN"], ["clinical", "JJ"],
58
+ ["studies", "NNS"], ["receive", "VBP"], ["closer", "JJR"], ["follow-up", "NN"], [",", ","], ["", ")"],
59
+ ["the", "DT"], ["highest", "JJS"], ["standard-of-care", "JJ"], ["treatment", "NN"], ["and", "CC"],
60
+ ["access", "NN"], ["to", "TO"], ["experimental", "JJ"], ["therapies", "NNS"], ["at", "IN"], ["no", "DT"],
61
+ ["extra", "JJ"], ["cost.", "NNP"], ["Maximize", "NNP"], ["your", "PRP$"], ["nutrition", "NN"],
62
+ ["strategy", "NN"], ["Doing", "NNP"], ["your", "PRP$"], ["best", "JJS"], ["to", "TO"], ["eat", "VB"],
63
+ ["a", "DT"], ["healthy", "JJ"], [",", ","], ["", ")"], ["well-balanced", "JJ"], ["diet", "NN"],
64
+ ["is", "VBZ"], ["vital", "JJ"], ["to", "TO"], ["prompt", "VB"], ["healing", "NN"], ["after", "IN"],
65
+ ["surgery", "NN"], ["and", "CC"], ["for", "IN"], ["recovery", "NN"], ["from", "IN"], ["radiation", "NN"],
66
+ ["or", "CC"], ["chemotherapy.", "JJ"], ["Many", "JJ"], ["oncology", "NN"], ["practices", "NNS"],
67
+ ["employ", "VBP"], ["registered", "VBN"], ["dieticians", "NNS"], ["who", "WP"], ["can", "MD"],
68
+ ["help", "VB"], ["you", "PRP"], ["optimize", "VB"], ["your", "PRP$"], ["nutrition.", "JJ"],
69
+ ["Steer", "VB"], ["clear", "JJ"], ["of", "IN"], ["", ")"], ["``", "``"], ["natural", "JJ"],
70
+ ["cures", "NNS"], ["''", "''"], ["", ")"], ["Before", "IN"], ["trying", "VBG"], ["nutritional", "JJ"],
71
+ ["supplements", "NNS"], ["or", "CC"], ["herbal", "JJ"], ["remedies", "NNS"], [",", ","], ["", ")"],
72
+ ["be", "VB"], ["sure", "JJ"], ["to", "TO"], ["discuss", "VB"], ["your", "PRP$"], ["plans", "NNS"],
73
+ ["with", "IN"], ["a", "DT"], ["doctor.", "JJ"], ["Most", "JJS"], ["have", "VBP"], ["not", "RB"],
74
+ ["been", "VBN"], ["tested", "VBN"], ["in", "IN"], ["clinical", "JJ"], ["studies", "NNS"], [",", ","],
75
+ ["", ")"], ["and", "CC"], ["some", "DT"], ["may", "MD"], ["actually", "RB"], ["interfere", "VB"],
76
+ ["with", "IN"], ["your", "PRP$"], ["treatment.", "JJ"], ["Build", "VB"], ["a", "DT"], ["stronger", "JJR"],
77
+ ["body", "NN"], ["Even", "RB"], ["walking", "VBG"], ["regularly", "RB"], ["is", "VBZ"], ["can", "MD"],
78
+ ["help", "VB"], ["you", "PRP"], ["minimize", "VB"], ["long-term", "JJ"], ["muscle", "NN"],
79
+ ["weakness", "NN"], ["caused", "VBN"], ["by", "IN"], ["illness", "NN"], ["or", "CC"],
80
+ ["de-conditioning.", "NNP"], ["Focus", "NNP"], ["on", "IN"], ["overall", "JJ"], ["health", "NN"],
81
+ ["Patients", "NNS"], ["may", "MD"], ["be", "VB"], ["cured", "VBN"], ["of", "IN"], ["cancer", "NN"],
82
+ ["but", "CC"], ["still", "JJ"], ["face", "NN"], ["life-threatening", "JJ"], ["medical", "JJ"],
83
+ ["problems", "NNS"], ["that", "WDT"], ["are", "VBP"], ["underemphasized", "JJ"], ["during", "IN"],
84
+ ["cancer", "NN"], ["treatments", "NNS"], [",", ","], ["", ")"], ["such", "JJ"], ["as", "IN"],
85
+ ["diabetes", "NN"], [",", ","], ["", ")"], ["high", "JJ"], ["blood", "NN"], ["pressure", "NN"],
86
+ ["and", "CC"], ["heart", "NN"], ["disease.", "JJ"], ["Continue", "VB"], ["to", "TO"], ["monitor", "VB"],
87
+ ["your", "PRP$"], ["overall", "JJ"], ["health.", "JJ"], ["Put", "NN"], ["the", "DT"], ["fire", "NN"],
88
+ ["out", "IN"], ["for", "IN"], ["good", "JJ"], ["Smoking", "NNP"], ["impairs", "NNS"], ["healing", "NN"],
89
+ ["after", "IN"], ["surgery", "NN"], ["and", "CC"], ["radiation", "NN"], ["and", "CC"], ["increases", "NNS"],
90
+ ["your", "PRP$"], ["risk", "NN"], ["of", "IN"], ["cardiovascular", "JJ"], ["disease", "NN"], ["and", "CC"],
91
+ ["many", "JJ"], ["types", "NNS"], ["of", "IN"], ["cancers.", "CD"], ["Ask", "VB"], ["your", "PRP$"],
92
+ ["doctor", "NN"], ["for", "IN"], ["help", "NN"], ["identifying", "VBG"], ["and", "CC"], ["obtaining", "VBG"],
93
+ ["the", "DT"], ["most", "RBS"], ["appropriate", "JJ"], ["cessation", "NN"], ["aids.", "NNP"], ["Map", "NNP"],
94
+ ["a", "DT"], ["healthy", "JJ"], ["future", "NN"], ["Once", "RB"], ["youve", "VBP"], ["completed", "VBN"],
95
+ ["treatment", "NN"], [",", ","], ["", ")"], ["discuss", "VB"], ["appropriate", "JJ"], ["follow-up", "NN"],
96
+ ["plans", "NNS"], ["with", "IN"], ["your", "PRP$"], ["doctor", "NN"], ["and", "CC"], ["keep", "VB"],
97
+ ["track", "NN"], ["of", "IN"], ["them", "PRP"], ["yourself.", "CD"], ["Intensified", "JJ"], ["screening", "NN"],
98
+ ["over", "IN"], ["many", "JJ"], ["years", "NNS"], ["is", "VBZ"], ["frequently", "RB"], ["recommended", "VBN"],
99
+ ["to", "TO"], ["identify", "VB"], ["and", "CC"], ["treat", "VB"], ["a", "DT"], ["recurrence", "NN"], ["early", "JJ"],
100
+ ["on.", "CD"], ["Share", "VB"], ["your", "PRP$"], ["feelings", "NNS"], ["Allow", "VB"], ["yourself", "PRP"],
101
+ ["time", "NN"], ["to", "TO"], ["discuss", "VB"], ["the", "DT"], ["emotional", "JJ"], ["consequences", "NNS"],
102
+ ["of", "IN"], ["your", "PRP$"], ["illness", "NN"], ["and", "CC"], ["treatment", "NN"], ["with", "IN"],
103
+ ["family", "NN"], [",", ","], ["", ")"], ["friends", "NNS"], [",", ","], ["", ")"], ["your", "PRP$"],
104
+ ["doctor", "NN"], ["and", "CC"], [",", ","], ["", ")"], ["if", "IN"], ["necessary", "JJ"], [",", ","],
105
+ ["", ")"], ["a", "DT"], ["professional", "JJ"], ["therapist.", "JJ"], ["Many", "JJ"], ["patients", "NNS"],
106
+ ["also", "RB"], ["find", "VBP"], ["antidepressants", "NNS"], ["helpful", "JJ"], ["during", "IN"],
107
+ ["treatment.", "JJ"], ["Stay", "VB"], ["connected", "VBN"], ["Although", "IN"], ["many", "JJ"],
108
+ ["newly", "RB"], ["diagnosed", "VBN"], ["patients", "NNS"], ["fear", "VBP"], ["they", "PRP"], ["will", "MD"],
109
+ ["not", "RB"], ["be", "VB"], ["able", "JJ"], ["to", "TO"], ["keep", "VB"], ["working", "VBG"], ["during", "IN"],
110
+ ["treatment", "NN"], [",", ","], ["", ")"], ["this", "DT"], ["is", "VBZ"], ["usually", "RB"], ["not", "RB"],
111
+ ["the", "DT"], ["case.", "CD"], ["Working", "NNP"], [",", ","], ["", ")"], ["even", "RB"], ["at", "IN"],
112
+ ["a", "DT"], ["reduced", "VBN"], ["schedule", "NN"], [",", ","], ["", ")"], ["helps", "VBZ"], ["you", "PRP"],
113
+ ["maintain", "VBP"], ["valuable", "JJ"], ["social", "JJ"], ["connections", "NNS"], ["and", "CC"],
114
+ ["weekly", "JJ"], ["structure", "NN"], [".", "."]]
115
+ assert_equal tags, pairs
44
116
  end
45
117
 
46
118
  def test_multiple_docs
@@ -56,7 +128,12 @@ Although many newly diagnosed patients fear they will not be able to keep workin
56
128
 
57
129
  def test_suggest
58
130
  results = tagger.suggest( SAMPLE_DOC )
59
- assert_equal [["doctor", "NN", 3], ["treatment", "NN", 5]], results
131
+ # puts results.inspect
132
+ assert results.include?(["treatment", "NN", 5])
133
+ results = tagger.suggest( SAMPLE_DOC2 )
134
+ assert results.include?(["Britney Spears", "NNP", 6])
135
+ assert results.include?(["Jamie Spears", "NNP", 12])
136
+ # puts results.inspect
60
137
  end
61
138
 
62
139
  private
data/website/index.html CHANGED
@@ -160,7 +160,7 @@
160
160
  <h1>rbtagger</h1>
161
161
  <div id="version" class="clickable" onclick='document.location = "http://rubyforge.org/projects/rbtagger"; return false'>
162
162
  <p>Get Version</p>
163
- <a href="http://rubyforge.org/projects/rbtagger" class="numbers">0.2.4</a>
163
+ <a href="http://rubyforge.org/projects/rbtagger" class="numbers">0.2.5</a>
164
164
  </div>
165
165
  <h4 style="float:right;padding-right:10px;"> &#x2192; &#8216;rbtagger&#8217;</h4>
166
166
 
@@ -194,6 +194,9 @@ gem install rbtagger
194
194
  <span class="ident">docs</span><span class="punct">.</span><span class="ident">each</span> <span class="keyword">do</span><span class="punct">|</span><span class="ident">doc</span><span class="punct">|</span>
195
195
  <span class="ident">tagger</span><span class="punct">.</span><span class="ident">tag</span><span class="punct">(</span> <span class="constant">File</span><span class="punct">.</span><span class="ident">read</span><span class="punct">(</span> <span class="ident">doc</span> <span class="punct">)</span> <span class="punct">)</span>
196
196
  <span class="keyword">end</span>
197
+
198
+ <span class="ident">tagger</span><span class="punct">.</span><span class="ident">suggest</span><span class="punct">(</span> <span class="constant">File</span><span class="punct">.</span><span class="ident">read</span><span class="punct">(&quot;</span><span class="string">sample.txt</span><span class="punct">&quot;)</span> <span class="punct">)</span>
199
+ <span class="punct">=&gt;</span> <span class="punct">[[&quot;</span><span class="string">doctor</span><span class="punct">&quot;,</span> <span class="punct">&quot;</span><span class="string">NN</span><span class="punct">&quot;,</span> <span class="number">3</span><span class="punct">],</span> <span class="punct">[&quot;</span><span class="string">treatment</span><span class="punct">&quot;,</span> <span class="punct">&quot;</span><span class="string">NN</span><span class="punct">&quot;,</span> <span class="number">5</span><span class="punct">]]</span>
197
200
  </pre>
198
201
 
199
202
  <h4>Using the word tagger</h4>
@@ -242,7 +245,7 @@ rake install_gem</pre>
242
245
 
243
246
  <p>Comments are welcome. Send an email to <a href="mailto:rb-brill-tagger@googlegroups.com">Todd A. Fisher</a> email via the <a href="http://groups.google.com/group/rb-brill-tagger">forum</a></p>
244
247
  <p class="coda">
245
- <a href="http://xullicious.blogspot.com/">Todd A. Fisher</a>, 22nd June 2008<br>
248
+ <a href="http://xullicious.blogspot.com/">Todd A. Fisher</a>, 23rd June 2008<br>
246
249
  Theme extended from <a href="http://rb2js.rubyforge.org/">Paul Battley</a>
247
250
  </p>
248
251
  </div>
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbtagger
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.5
4
+ version: 0.2.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Todd A. Fisher
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-06-23 00:00:00 -04:00
12
+ date: 2008-06-25 00:00:00 -04:00
13
13
  default_executable:
14
14
  dependencies: []
15
15