rbtagger 0.2.9 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
data/Manifest.txt CHANGED
@@ -46,6 +46,9 @@ lib/brill/tagger.rb
46
46
  lib/rbtagger.rb
47
47
  lib/rbtagger/version.rb
48
48
  lib/word/tagger.rb
49
+ lib/brill/brown/CONTEXTUALRULEFILE
50
+ lib/brill/brown/LEXICALRULEFILE
51
+ lib/brill/brown/LEXICON
49
52
  script/console
50
53
  script/destroy
51
54
  script/generate
@@ -57,9 +60,6 @@ tasks/extconf.rake
57
60
  tasks/extconf/rule_tagger.rake
58
61
  tasks/extconf/word_tagger.rake
59
62
  tasks/website.rake
60
- test/CONTEXTUALRULEFILE
61
- test/LEXICALRULEFILE
62
- test/LEXICON
63
63
  test/docs/doc0.txt
64
64
  test/docs/doc1.txt
65
65
  test/docs/doc2.txt
@@ -1,6 +1,6 @@
1
1
  have_header: checking for stdlib.h... -------------------- yes
2
2
 
3
- "gcc -E -I. -I/usr/lib64/ruby/1.8/x86_64-linux -I. -O2 -g -pipe -Wall -Wp,-D_FORTIFY_SOURCE=2 -fexceptions -fstack-protector --param=ssp-buffer-size=4 -m64 -mtune=generic -Wall -fPIC conftest.c -o conftest.i"
3
+ "/usr/bin/cpp-4.0 -I. -I/opt/local/lib/ruby/1.8/i686-darwin9.6.0 -I. -I/opt/local/include -D_XOPEN_SOURCE -D_DARWIN_C_SOURCE -I/opt/local/include -O2 -fno-common -pipe -fno-common conftest.c -o conftest.i"
4
4
  checked program was:
5
5
  /* begin */
6
6
  1: #include <stdlib.h>
@@ -10,7 +10,7 @@ checked program was:
10
10
 
11
11
  have_header: checking for string.h... -------------------- yes
12
12
 
13
- "gcc -E -I. -I/usr/lib64/ruby/1.8/x86_64-linux -I. -O2 -g -pipe -Wall -Wp,-D_FORTIFY_SOURCE=2 -fexceptions -fstack-protector --param=ssp-buffer-size=4 -m64 -mtune=generic -Wall -fPIC conftest.c -o conftest.i"
13
+ "/usr/bin/cpp-4.0 -I. -I/opt/local/lib/ruby/1.8/i686-darwin9.6.0 -I. -I/opt/local/include -D_XOPEN_SOURCE -D_DARWIN_C_SOURCE -I/opt/local/include -O2 -fno-common -pipe -fno-common conftest.c -o conftest.i"
14
14
  checked program was:
15
15
  /* begin */
16
16
  1: #include <string.h>
@@ -20,7 +20,7 @@ checked program was:
20
20
 
21
21
  have_library: checking for main() in -lc... -------------------- yes
22
22
 
23
- "gcc -o conftest -I. -I/usr/lib64/ruby/1.8/x86_64-linux -I. -O2 -g -pipe -Wall -Wp,-D_FORTIFY_SOURCE=2 -fexceptions -fstack-protector --param=ssp-buffer-size=4 -m64 -mtune=generic -Wall -fPIC conftest.c -L. -L/usr/lib64 -L. -rdynamic -Wl,-export-dynamic -lruby-static -lc -lpthread -ldl -lcrypt -lm -lc"
23
+ "/usr/bin/gcc-4.0 -o conftest -I. -I/opt/local/lib/ruby/1.8/i686-darwin9.6.0 -I. -I/opt/local/include -D_XOPEN_SOURCE -D_DARWIN_C_SOURCE -I/opt/local/include -O2 -fno-common -pipe -fno-common conftest.c -L. -L/opt/local/lib -L. -L/opt/local/lib -lruby-static -lc -lpthread -ldl -lobjc "
24
24
  checked program was:
25
25
  /* begin */
26
26
  1: /*top*/
@@ -32,7 +32,7 @@ checked program was:
32
32
 
33
33
  have_func: checking for snprintf() in stdio.h... -------------------- yes
34
34
 
35
- "gcc -o conftest -I. -I/usr/lib64/ruby/1.8/x86_64-linux -I. -O2 -g -pipe -Wall -Wp,-D_FORTIFY_SOURCE=2 -fexceptions -fstack-protector --param=ssp-buffer-size=4 -m64 -mtune=generic -Wall -fPIC conftest.c -L. -L/usr/lib64 -L. -rdynamic -Wl,-export-dynamic -lc -lruby-static -lc -lpthread -ldl -lcrypt -lm -lc"
35
+ "/usr/bin/gcc-4.0 -o conftest -I. -I/opt/local/lib/ruby/1.8/i686-darwin9.6.0 -I. -I/opt/local/include -D_XOPEN_SOURCE -D_DARWIN_C_SOURCE -I/opt/local/include -O2 -fno-common -pipe -fno-common conftest.c -L. -L/opt/local/lib -L. -L/opt/local/lib -lc -lruby-static -lc -lpthread -ldl -lobjc "
36
36
  checked program was:
37
37
  /* begin */
38
38
  1: #include <stdio.h>
@@ -1,6 +1,6 @@
1
1
  have_library: checking for main() in -lc... -------------------- yes
2
2
 
3
- "gcc -o conftest -I. -I/usr/lib64/ruby/1.8/x86_64-linux -I. -O2 -g -pipe -Wall -Wp,-D_FORTIFY_SOURCE=2 -fexceptions -fstack-protector --param=ssp-buffer-size=4 -m64 -mtune=generic -Wall -fPIC conftest.c -L. -L/usr/lib64 -L. -rdynamic -Wl,-export-dynamic -lruby-static -lc -lpthread -ldl -lcrypt -lm -lc"
3
+ "/usr/bin/gcc-4.0 -o conftest -I. -I/opt/local/lib/ruby/1.8/i686-darwin9.6.0 -I. -I/opt/local/include -D_XOPEN_SOURCE -D_DARWIN_C_SOURCE -I/opt/local/include -O2 -fno-common -pipe -fno-common conftest.c -L. -L/opt/local/lib -L. -L/opt/local/lib -lruby-static -lc -lpthread -ldl -lobjc "
4
4
  checked program was:
5
5
  /* begin */
6
6
  1: /*top*/
@@ -12,7 +12,7 @@ checked program was:
12
12
 
13
13
  have_library: checking for main() in -lstdc++... -------------------- yes
14
14
 
15
- "gcc -o conftest -I. -I/usr/lib64/ruby/1.8/x86_64-linux -I. -O2 -g -pipe -Wall -Wp,-D_FORTIFY_SOURCE=2 -fexceptions -fstack-protector --param=ssp-buffer-size=4 -m64 -mtune=generic -Wall -fPIC conftest.c -L. -L/usr/lib64 -L. -rdynamic -Wl,-export-dynamic -lc -lruby-static -lstdc++ -lc -lpthread -ldl -lcrypt -lm -lc"
15
+ "/usr/bin/gcc-4.0 -o conftest -I. -I/opt/local/lib/ruby/1.8/i686-darwin9.6.0 -I. -I/opt/local/include -D_XOPEN_SOURCE -D_DARWIN_C_SOURCE -I/opt/local/include -O2 -fno-common -pipe -fno-common conftest.c -L. -L/opt/local/lib -L. -L/opt/local/lib -lc -lruby-static -lstdc++ -lc -lpthread -ldl -lobjc "
16
16
  checked program was:
17
17
  /* begin */
18
18
  1: /*top*/
File without changes
File without changes
File without changes
data/lib/brill/tagger.rb CHANGED
@@ -2,11 +2,26 @@ require 'rule_tagger'
2
2
 
3
3
  module Brill
4
4
  class Tagger
5
- def initialize( lexicon, lexical_rules, contextual_rules )
5
+ #
6
+ # will use the brown corpus as the default
7
+ #
8
+ def initialize( lexicon = nil, lexical_rules = nil, contextual_rules = nil)
6
9
  @tagger = ::Tagger::BrillTagger.new
7
- Brill::Tagger.load_lexicon(@tagger,lexicon)
8
- Brill::Tagger.load_lexical_rules(@tagger,lexical_rules)
9
- Brill::Tagger.load_contextual_rules(@tagger,contextual_rules)
10
+ lexicon ||= File.join(File.dirname(__FILE__),"brown","LEXICON")
11
+ lexical_rules ||= File.join(File.dirname(__FILE__),"brown","LEXICALRULEFILE")
12
+ contextual_rules ||= File.join(File.dirname(__FILE__),"brown","CONTEXTUALRULEFILE")
13
+
14
+ Brill::Tagger.load_lexicon(@tagger, lexicon )
15
+ Brill::Tagger.load_lexical_rules(@tagger, lexical_rules )
16
+ Brill::Tagger.load_contextual_rules(@tagger, contextual_rules )
17
+ end
18
+
19
+ # given a body of text return a list of adjectives
20
+ def adjectives( text )
21
+ end
22
+
23
+ # given a body of text return a list of nouns
24
+ def nouns( text )
10
25
  end
11
26
 
12
27
  # returns similar results as tag, but further reduced by only selecting nouns
@@ -1,8 +1,8 @@
1
1
  module RbTagger #:nodoc:
2
2
  module VERSION #:nodoc:
3
3
  MAJOR = 0
4
- MINOR = 2
5
- TINY = 9
4
+ MINOR = 3
5
+ TINY = 0
6
6
 
7
7
  STRING = [MAJOR, MINOR, TINY].join('.')
8
8
  end
@@ -32,12 +32,13 @@ The details of her visitation, however, are unclear.
32
32
  Asked by Us if she were happy with the court outcome, Spears (clutching an Ed Hardy purse) smiled and told Us, "Yes."
33
33
  Next up: A status hearing set for July 15.
34
34
  The couple last appeared in court May 6. Spears was granted extended visitation — three days a week from 9 a.m. to 5 p.m. — of Sean Preston, 2, and Jayden James, 20 months.
35
+ )
36
+ SAMPLE_DOC3=%q(
37
+ TMZ.com: Britney celebrated getting overnights with her kids by going on a wild shopping trip for herself.With L.A.'s finest at her service, it was a total clusterf**k outside of Fred Segal as Brit Brit made her way out. The scene was crazy -- and it was all... Read more
35
38
  )
36
39
  def setup
37
40
  if !defined?($tagger)
38
- $rtagger = Brill::Tagger.new( File.join(File.dirname(__FILE__),"LEXICON"),
39
- File.join(File.dirname(__FILE__),"LEXICALRULEFILE"),
40
- File.join(File.dirname(__FILE__),"CONTEXTUALRULEFILE") )
41
+ $rtagger = Brill::Tagger.new
41
42
  end
42
43
  end
43
44
 
@@ -134,6 +135,8 @@ The couple last appeared in court May 6. Spears was granted extended visitation
134
135
  assert results.include?(["Britney Spears", "NNP", 6])
135
136
  assert results.include?(["Jamie Spears", "NNP", 12])
136
137
  # puts results.inspect
138
+ results = tagger.suggest( SAMPLE_DOC3, 5 )
139
+ puts results.inspect
137
140
  end
138
141
 
139
142
  private
data/website/index.html CHANGED
@@ -160,32 +160,19 @@
160
160
  <h1>rbtagger</h1>
161
161
  <div id="version" class="clickable" onclick='document.location = "http://rubyforge.org/projects/rbtagger"; return false'>
162
162
  <p>Get Version</p>
163
- <a href="http://rubyforge.org/projects/rbtagger" class="numbers">0.2.6</a>
163
+ <a href="http://rubyforge.org/projects/rbtagger" class="numbers">0.3.0</a>
164
164
  </div>
165
- <h4 style="float:right;padding-right:10px;"> &#x2192; &#8216;rbtagger&#8217;</h4>
166
-
167
- <h2>What</h2>
168
-
169
-
170
- <p>A Simple Ruby Rule-Based Part of Speech Tagger</p>
171
-
172
-
173
- <p>This work is based on the work of Eric Brill</p>
174
-
175
-
176
- <h2>Installing</h2>
177
-
178
-
179
- <p><pre class='syntax'>
165
+ <h4 style="float:right;padding-right:10px;"> &amp;#x2192; &#8216;rbtagger&#8217;</h4>
166
+ <h2>What</h2>
167
+ <p>A Simple Ruby Rule-Based Part of Speech Tagger</p>
168
+ <p>This work is based on the work of Eric Brill</p>
169
+ <h2>Installing</h2>
170
+ <p><pre class='syntax'>
180
171
  gem install rbtagger
181
172
  </pre></p>
182
-
183
-
184
- <h2>The basics</h2>
185
-
186
-
173
+ <h2>The basics</h2>
187
174
  <h4>Using the rule tagger</h4>
188
- <pre class='syntax'>
175
+ <p><pre class='syntax'>
189
176
  <span class="ident">require</span> <span class="punct">'</span><span class="string">rbtagger</span><span class="punct">'</span>
190
177
 
191
178
  <span class="ident">tagger</span> <span class="punct">=</span> <span class="constant">Brill</span><span class="punct">::</span><span class="constant">Tagger</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span> <span class="constant">File</span><span class="punct">.</span><span class="ident">join</span><span class="punct">(</span><span class="constant">File</span><span class="punct">.</span><span class="ident">dirname</span><span class="punct">(</span><span class="constant">__FILE__</span><span class="punct">),&quot;</span><span class="string">LEXICON</span><span class="punct">&quot;),</span>
@@ -197,53 +184,31 @@ gem install rbtagger
197
184
 
198
185
  <span class="ident">tagger</span><span class="punct">.</span><span class="ident">suggest</span><span class="punct">(</span> <span class="constant">File</span><span class="punct">.</span><span class="ident">read</span><span class="punct">(&quot;</span><span class="string">sample.txt</span><span class="punct">&quot;)</span> <span class="punct">)</span>
199
186
  <span class="punct">=&gt;</span> <span class="punct">[[&quot;</span><span class="string">doctor</span><span class="punct">&quot;,</span> <span class="punct">&quot;</span><span class="string">NN</span><span class="punct">&quot;,</span> <span class="number">3</span><span class="punct">],</span> <span class="punct">[&quot;</span><span class="string">treatment</span><span class="punct">&quot;,</span> <span class="punct">&quot;</span><span class="string">NN</span><span class="punct">&quot;,</span> <span class="number">5</span><span class="punct">]]</span>
200
- </pre>
201
-
187
+ </pre></p>
202
188
  <h4>Using the word tagger</h4>
203
- <pre class='syntax'>
189
+ <p><pre class='syntax'>
204
190
  <span class="ident">require</span> <span class="punct">'</span><span class="string">rbtagger</span><span class="punct">'</span>
205
191
 
206
192
  <span class="ident">tagger</span> <span class="punct">=</span> <span class="constant">Word</span><span class="punct">::</span><span class="constant">Tagger</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span> <span class="punct">['</span><span class="string">cat</span><span class="punct">','</span><span class="string">hat</span><span class="punct">'],</span> <span class="symbol">:words</span> <span class="punct">=&gt;</span> <span class="number">4</span> <span class="punct">)</span>
207
193
  <span class="ident">tags</span> <span class="punct">=</span> <span class="ident">tagger</span><span class="punct">.</span><span class="ident">execute</span><span class="punct">(</span> <span class="punct">'</span><span class="string">the cat and the hat</span><span class="punct">'</span> <span class="punct">)</span>
208
194
  <span class="ident">assert_equal</span><span class="punct">(</span> <span class="punct">[&quot;</span><span class="string">cat</span><span class="punct">&quot;,</span> <span class="punct">&quot;</span><span class="string">hat</span><span class="punct">&quot;],</span> <span class="ident">tags</span> <span class="punct">)</span>
209
- </pre>
210
-
211
- <h2>Forum</h2>
212
-
213
-
214
- <p><a href="http://groups.google.com/group/rb-brill-tagger">http://groups.google.com/group/rb-brill-tagger</a></p>
215
-
216
-
217
- <h2>How to submit patches</h2>
218
-
219
-
220
- <p>Read the <a href="http://drnicwilliams.com/2007/06/01/8-steps-for-fixing-other-peoples-code/">8 steps for fixing other people&#8217;s code</a> and for section <a href="http://drnicwilliams.com/2007/06/01/8-steps-for-fixing-other-peoples-code/#8b-google-groups">8b: Submit patch to Google Groups</a>, use the Google Group above.</p>
221
-
222
-
223
- <ul>
195
+ </pre></p>
196
+ <h2>Forum</h2>
197
+ <p><a href="http://groups.google.com/group/rb-brill-tagger">http://groups.google.com/group/rb-brill-tagger</a></p>
198
+ <h2>How to submit patches</h2>
199
+ <p>Read the <a href="http://drnicwilliams.com/2007/06/01/8-steps-for-fixing-other-peoples-code/">8 steps for fixing other people&#8217;s code</a> and for section <a href="http://drnicwilliams.com/2007/06/01/8-steps-for-fixing-other-peoples-code/#8b-google-groups">8b: Submit patch to Google Groups</a>, use the Google Group above.</p>
200
+ <ul>
224
201
  <li>github: <a href="http://github.com/taf2/rb-brill-tagger/tree/master">http://github.com/taf2/rb-brill-tagger/tree/master</a></li>
225
- </ul>
226
-
227
-
202
+ </ul>
228
203
  <pre>git clone git://github.com/taf2/rb-brill-tagger.git</pre>
229
-
230
- <h3>Build and test instructions</h3>
231
-
232
-
204
+ <h3>Build and test instructions</h3>
233
205
  <pre>cd rb-brill-tagger
234
206
  rake test
235
207
  rake install_gem</pre>
236
-
237
- <h2>License</h2>
238
-
239
-
240
- <p>This code is free to use under the terms of the <span class="caps">MIT</span> license.</p>
241
-
242
-
243
- <h2>Contact</h2>
244
-
245
-
246
- <p>Comments are welcome. Send an email to <a href="mailto:rb-brill-tagger@googlegroups.com">Todd A. Fisher</a> email via the <a href="http://groups.google.com/group/rb-brill-tagger">forum</a></p>
208
+ <h2>License</h2>
209
+ <p>This code is free to use under the terms of the <span class="caps">MIT</span> license.</p>
210
+ <h2>Contact</h2>
211
+ <p>Comments are welcome. Send an email to <a href="mailto:rb-brill-tagger@googlegroups.com">Todd A. Fisher</a> email via the <a href="http://groups.google.com/group/rb-brill-tagger">forum</a></p>
247
212
  <p class="coda">
248
213
  <a href="http://xullicious.blogspot.com/">Todd A. Fisher</a>, 23rd June 2008<br>
249
214
  Theme extended from <a href="http://rb2js.rubyforge.org/">Paul Battley</a>
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rbtagger
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.9
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Todd A. Fisher
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2008-08-28 00:00:00 -04:00
12
+ date: 2009-05-21 00:00:00 -04:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -20,7 +20,7 @@ dependencies:
20
20
  requirements:
21
21
  - - ">="
22
22
  - !ruby/object:Gem::Version
23
- version: 1.7.0
23
+ version: 1.8.0
24
24
  version:
25
25
  description: A Simple Ruby Rule-Based Part of Speech Tagger
26
26
  email:
@@ -28,8 +28,8 @@ email:
28
28
  executables: []
29
29
 
30
30
  extensions:
31
- - ext/word_tagger/extconf.rb
32
31
  - ext/rule_tagger/extconf.rb
32
+ - ext/word_tagger/extconf.rb
33
33
  extra_rdoc_files:
34
34
  - History.txt
35
35
  - License.txt
@@ -98,6 +98,9 @@ files:
98
98
  - lib/rbtagger.rb
99
99
  - lib/rbtagger/version.rb
100
100
  - lib/word/tagger.rb
101
+ - lib/brill/brown/CONTEXTUALRULEFILE
102
+ - lib/brill/brown/LEXICALRULEFILE
103
+ - lib/brill/brown/LEXICON
101
104
  - script/console
102
105
  - script/destroy
103
106
  - script/generate
@@ -109,9 +112,6 @@ files:
109
112
  - tasks/extconf/rule_tagger.rake
110
113
  - tasks/extconf/word_tagger.rake
111
114
  - tasks/website.rake
112
- - test/CONTEXTUALRULEFILE
113
- - test/LEXICALRULEFILE
114
- - test/LEXICON
115
115
  - test/docs/doc0.txt
116
116
  - test/docs/doc1.txt
117
117
  - test/docs/doc2.txt
@@ -134,16 +134,18 @@ files:
134
134
  - website/template.html.erb
135
135
  has_rdoc: true
136
136
  homepage: http://rbtagger.rubyforge.org
137
+ licenses: []
138
+
137
139
  post_install_message: |
138
140
  For more information on rbtagger, see http://rbtagger.rubyforge.org
139
141
 
140
142
  rdoc_options:
141
143
  - --main
142
- - README.txt
144
+ - README
143
145
  require_paths:
144
146
  - lib
145
- - ext/word_tagger
146
147
  - ext/rule_tagger
148
+ - ext/word_tagger
147
149
  required_ruby_version: !ruby/object:Gem::Requirement
148
150
  requirements:
149
151
  - - ">="
@@ -159,11 +161,11 @@ required_rubygems_version: !ruby/object:Gem::Requirement
159
161
  requirements: []
160
162
 
161
163
  rubyforge_project: rbtagger
162
- rubygems_version: 1.2.0
164
+ rubygems_version: 1.3.2
163
165
  signing_key:
164
- specification_version: 2
166
+ specification_version: 3
165
167
  summary: A Simple Ruby Rule-Based Part of Speech Tagger
166
168
  test_files:
167
- - test/test_rule_tagger.rb
168
169
  - test/test_helper.rb
170
+ - test/test_rule_tagger.rb
169
171
  - test/test_word_tagger.rb