rbtagger 0.2.9 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Manifest.txt +3 -3
- data/ext/rule_tagger/mkmf.log +4 -4
- data/ext/word_tagger/mkmf.log +2 -2
- data/{test → lib/brill/brown}/CONTEXTUALRULEFILE +0 -0
- data/{test → lib/brill/brown}/LEXICALRULEFILE +0 -0
- data/{test → lib/brill/brown}/LEXICON +0 -0
- data/lib/brill/tagger.rb +19 -4
- data/lib/rbtagger/version.rb +2 -2
- data/test/test_rule_tagger.rb +6 -3
- data/website/index.html +23 -58
- metadata +14 -12
data/Manifest.txt
CHANGED
@@ -46,6 +46,9 @@ lib/brill/tagger.rb
|
|
46
46
|
lib/rbtagger.rb
|
47
47
|
lib/rbtagger/version.rb
|
48
48
|
lib/word/tagger.rb
|
49
|
+
lib/brill/brown/CONTEXTUALRULEFILE
|
50
|
+
lib/brill/brown/LEXICALRULEFILE
|
51
|
+
lib/brill/brown/LEXICON
|
49
52
|
script/console
|
50
53
|
script/destroy
|
51
54
|
script/generate
|
@@ -57,9 +60,6 @@ tasks/extconf.rake
|
|
57
60
|
tasks/extconf/rule_tagger.rake
|
58
61
|
tasks/extconf/word_tagger.rake
|
59
62
|
tasks/website.rake
|
60
|
-
test/CONTEXTUALRULEFILE
|
61
|
-
test/LEXICALRULEFILE
|
62
|
-
test/LEXICON
|
63
63
|
test/docs/doc0.txt
|
64
64
|
test/docs/doc1.txt
|
65
65
|
test/docs/doc2.txt
|
data/ext/rule_tagger/mkmf.log
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
have_header: checking for stdlib.h... -------------------- yes
|
2
2
|
|
3
|
-
"
|
3
|
+
"/usr/bin/cpp-4.0 -I. -I/opt/local/lib/ruby/1.8/i686-darwin9.6.0 -I. -I/opt/local/include -D_XOPEN_SOURCE -D_DARWIN_C_SOURCE -I/opt/local/include -O2 -fno-common -pipe -fno-common conftest.c -o conftest.i"
|
4
4
|
checked program was:
|
5
5
|
/* begin */
|
6
6
|
1: #include <stdlib.h>
|
@@ -10,7 +10,7 @@ checked program was:
|
|
10
10
|
|
11
11
|
have_header: checking for string.h... -------------------- yes
|
12
12
|
|
13
|
-
"
|
13
|
+
"/usr/bin/cpp-4.0 -I. -I/opt/local/lib/ruby/1.8/i686-darwin9.6.0 -I. -I/opt/local/include -D_XOPEN_SOURCE -D_DARWIN_C_SOURCE -I/opt/local/include -O2 -fno-common -pipe -fno-common conftest.c -o conftest.i"
|
14
14
|
checked program was:
|
15
15
|
/* begin */
|
16
16
|
1: #include <string.h>
|
@@ -20,7 +20,7 @@ checked program was:
|
|
20
20
|
|
21
21
|
have_library: checking for main() in -lc... -------------------- yes
|
22
22
|
|
23
|
-
"gcc -o conftest -I. -I/
|
23
|
+
"/usr/bin/gcc-4.0 -o conftest -I. -I/opt/local/lib/ruby/1.8/i686-darwin9.6.0 -I. -I/opt/local/include -D_XOPEN_SOURCE -D_DARWIN_C_SOURCE -I/opt/local/include -O2 -fno-common -pipe -fno-common conftest.c -L. -L/opt/local/lib -L. -L/opt/local/lib -lruby-static -lc -lpthread -ldl -lobjc "
|
24
24
|
checked program was:
|
25
25
|
/* begin */
|
26
26
|
1: /*top*/
|
@@ -32,7 +32,7 @@ checked program was:
|
|
32
32
|
|
33
33
|
have_func: checking for snprintf() in stdio.h... -------------------- yes
|
34
34
|
|
35
|
-
"gcc -o conftest -I. -I/
|
35
|
+
"/usr/bin/gcc-4.0 -o conftest -I. -I/opt/local/lib/ruby/1.8/i686-darwin9.6.0 -I. -I/opt/local/include -D_XOPEN_SOURCE -D_DARWIN_C_SOURCE -I/opt/local/include -O2 -fno-common -pipe -fno-common conftest.c -L. -L/opt/local/lib -L. -L/opt/local/lib -lc -lruby-static -lc -lpthread -ldl -lobjc "
|
36
36
|
checked program was:
|
37
37
|
/* begin */
|
38
38
|
1: #include <stdio.h>
|
data/ext/word_tagger/mkmf.log
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
have_library: checking for main() in -lc... -------------------- yes
|
2
2
|
|
3
|
-
"gcc -o conftest -I. -I/
|
3
|
+
"/usr/bin/gcc-4.0 -o conftest -I. -I/opt/local/lib/ruby/1.8/i686-darwin9.6.0 -I. -I/opt/local/include -D_XOPEN_SOURCE -D_DARWIN_C_SOURCE -I/opt/local/include -O2 -fno-common -pipe -fno-common conftest.c -L. -L/opt/local/lib -L. -L/opt/local/lib -lruby-static -lc -lpthread -ldl -lobjc "
|
4
4
|
checked program was:
|
5
5
|
/* begin */
|
6
6
|
1: /*top*/
|
@@ -12,7 +12,7 @@ checked program was:
|
|
12
12
|
|
13
13
|
have_library: checking for main() in -lstdc++... -------------------- yes
|
14
14
|
|
15
|
-
"gcc -o conftest -I. -I/
|
15
|
+
"/usr/bin/gcc-4.0 -o conftest -I. -I/opt/local/lib/ruby/1.8/i686-darwin9.6.0 -I. -I/opt/local/include -D_XOPEN_SOURCE -D_DARWIN_C_SOURCE -I/opt/local/include -O2 -fno-common -pipe -fno-common conftest.c -L. -L/opt/local/lib -L. -L/opt/local/lib -lc -lruby-static -lstdc++ -lc -lpthread -ldl -lobjc "
|
16
16
|
checked program was:
|
17
17
|
/* begin */
|
18
18
|
1: /*top*/
|
File without changes
|
File without changes
|
File without changes
|
data/lib/brill/tagger.rb
CHANGED
@@ -2,11 +2,26 @@ require 'rule_tagger'
|
|
2
2
|
|
3
3
|
module Brill
|
4
4
|
class Tagger
|
5
|
-
|
5
|
+
#
|
6
|
+
# will use the brown corpus as the default
|
7
|
+
#
|
8
|
+
def initialize( lexicon = nil, lexical_rules = nil, contextual_rules = nil)
|
6
9
|
@tagger = ::Tagger::BrillTagger.new
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
+
lexicon ||= File.join(File.dirname(__FILE__),"brown","LEXICON")
|
11
|
+
lexical_rules ||= File.join(File.dirname(__FILE__),"brown","LEXICALRULEFILE")
|
12
|
+
contextual_rules ||= File.join(File.dirname(__FILE__),"brown","CONTEXTUALRULEFILE")
|
13
|
+
|
14
|
+
Brill::Tagger.load_lexicon(@tagger, lexicon )
|
15
|
+
Brill::Tagger.load_lexical_rules(@tagger, lexical_rules )
|
16
|
+
Brill::Tagger.load_contextual_rules(@tagger, contextual_rules )
|
17
|
+
end
|
18
|
+
|
19
|
+
# given a body of text return a list of adjectives
|
20
|
+
def adjectives( text )
|
21
|
+
end
|
22
|
+
|
23
|
+
# given a body of text return a list of nouns
|
24
|
+
def nouns( text )
|
10
25
|
end
|
11
26
|
|
12
27
|
# returns similar results as tag, but further reduced by only selecting nouns
|
data/lib/rbtagger/version.rb
CHANGED
data/test/test_rule_tagger.rb
CHANGED
@@ -32,12 +32,13 @@ The details of her visitation, however, are unclear.
|
|
32
32
|
Asked by Us if she were happy with the court outcome, Spears (clutching an Ed Hardy purse) smiled and told Us, "Yes."
|
33
33
|
Next up: A status hearing set for July 15.
|
34
34
|
The couple last appeared in court May 6. Spears was granted extended visitation — three days a week from 9 a.m. to 5 p.m. — of Sean Preston, 2, and Jayden James, 20 months.
|
35
|
+
)
|
36
|
+
SAMPLE_DOC3=%q(
|
37
|
+
TMZ.com: Britney celebrated getting overnights with her kids by going on a wild shopping trip for herself.With L.A.'s finest at her service, it was a total clusterf**k outside of Fred Segal as Brit Brit made her way out. The scene was crazy -- and it was all... Read more
|
35
38
|
)
|
36
39
|
def setup
|
37
40
|
if !defined?($tagger)
|
38
|
-
$rtagger = Brill::Tagger.new
|
39
|
-
File.join(File.dirname(__FILE__),"LEXICALRULEFILE"),
|
40
|
-
File.join(File.dirname(__FILE__),"CONTEXTUALRULEFILE") )
|
41
|
+
$rtagger = Brill::Tagger.new
|
41
42
|
end
|
42
43
|
end
|
43
44
|
|
@@ -134,6 +135,8 @@ The couple last appeared in court May 6. Spears was granted extended visitation
|
|
134
135
|
assert results.include?(["Britney Spears", "NNP", 6])
|
135
136
|
assert results.include?(["Jamie Spears", "NNP", 12])
|
136
137
|
# puts results.inspect
|
138
|
+
results = tagger.suggest( SAMPLE_DOC3, 5 )
|
139
|
+
puts results.inspect
|
137
140
|
end
|
138
141
|
|
139
142
|
private
|
data/website/index.html
CHANGED
@@ -160,32 +160,19 @@
|
|
160
160
|
<h1>rbtagger</h1>
|
161
161
|
<div id="version" class="clickable" onclick='document.location = "http://rubyforge.org/projects/rbtagger"; return false'>
|
162
162
|
<p>Get Version</p>
|
163
|
-
<a href="http://rubyforge.org/projects/rbtagger" class="numbers">0.
|
163
|
+
<a href="http://rubyforge.org/projects/rbtagger" class="numbers">0.3.0</a>
|
164
164
|
</div>
|
165
|
-
<h4 style="float:right;padding-right:10px;">
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
<p>This work is based on the work of Eric Brill</p>
|
174
|
-
|
175
|
-
|
176
|
-
<h2>Installing</h2>
|
177
|
-
|
178
|
-
|
179
|
-
<p><pre class='syntax'>
|
165
|
+
<h4 style="float:right;padding-right:10px;"> &#x2192; ‘rbtagger’</h4>
|
166
|
+
<h2>What</h2>
|
167
|
+
<p>A Simple Ruby Rule-Based Part of Speech Tagger</p>
|
168
|
+
<p>This work is based on the work of Eric Brill</p>
|
169
|
+
<h2>Installing</h2>
|
170
|
+
<p><pre class='syntax'>
|
180
171
|
gem install rbtagger
|
181
172
|
</pre></p>
|
182
|
-
|
183
|
-
|
184
|
-
<h2>The basics</h2>
|
185
|
-
|
186
|
-
|
173
|
+
<h2>The basics</h2>
|
187
174
|
<h4>Using the rule tagger</h4>
|
188
|
-
<pre class='syntax'>
|
175
|
+
<p><pre class='syntax'>
|
189
176
|
<span class="ident">require</span> <span class="punct">'</span><span class="string">rbtagger</span><span class="punct">'</span>
|
190
177
|
|
191
178
|
<span class="ident">tagger</span> <span class="punct">=</span> <span class="constant">Brill</span><span class="punct">::</span><span class="constant">Tagger</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span> <span class="constant">File</span><span class="punct">.</span><span class="ident">join</span><span class="punct">(</span><span class="constant">File</span><span class="punct">.</span><span class="ident">dirname</span><span class="punct">(</span><span class="constant">__FILE__</span><span class="punct">),"</span><span class="string">LEXICON</span><span class="punct">"),</span>
|
@@ -197,53 +184,31 @@ gem install rbtagger
|
|
197
184
|
|
198
185
|
<span class="ident">tagger</span><span class="punct">.</span><span class="ident">suggest</span><span class="punct">(</span> <span class="constant">File</span><span class="punct">.</span><span class="ident">read</span><span class="punct">("</span><span class="string">sample.txt</span><span class="punct">")</span> <span class="punct">)</span>
|
199
186
|
<span class="punct">=></span> <span class="punct">[["</span><span class="string">doctor</span><span class="punct">",</span> <span class="punct">"</span><span class="string">NN</span><span class="punct">",</span> <span class="number">3</span><span class="punct">],</span> <span class="punct">["</span><span class="string">treatment</span><span class="punct">",</span> <span class="punct">"</span><span class="string">NN</span><span class="punct">",</span> <span class="number">5</span><span class="punct">]]</span>
|
200
|
-
</pre>
|
201
|
-
|
187
|
+
</pre></p>
|
202
188
|
<h4>Using the word tagger</h4>
|
203
|
-
<pre class='syntax'>
|
189
|
+
<p><pre class='syntax'>
|
204
190
|
<span class="ident">require</span> <span class="punct">'</span><span class="string">rbtagger</span><span class="punct">'</span>
|
205
191
|
|
206
192
|
<span class="ident">tagger</span> <span class="punct">=</span> <span class="constant">Word</span><span class="punct">::</span><span class="constant">Tagger</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span> <span class="punct">['</span><span class="string">cat</span><span class="punct">','</span><span class="string">hat</span><span class="punct">'],</span> <span class="symbol">:words</span> <span class="punct">=></span> <span class="number">4</span> <span class="punct">)</span>
|
207
193
|
<span class="ident">tags</span> <span class="punct">=</span> <span class="ident">tagger</span><span class="punct">.</span><span class="ident">execute</span><span class="punct">(</span> <span class="punct">'</span><span class="string">the cat and the hat</span><span class="punct">'</span> <span class="punct">)</span>
|
208
194
|
<span class="ident">assert_equal</span><span class="punct">(</span> <span class="punct">["</span><span class="string">cat</span><span class="punct">",</span> <span class="punct">"</span><span class="string">hat</span><span class="punct">"],</span> <span class="ident">tags</span> <span class="punct">)</span>
|
209
|
-
</pre>
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
<h2>How to submit patches</h2>
|
218
|
-
|
219
|
-
|
220
|
-
<p>Read the <a href="http://drnicwilliams.com/2007/06/01/8-steps-for-fixing-other-peoples-code/">8 steps for fixing other people’s code</a> and for section <a href="http://drnicwilliams.com/2007/06/01/8-steps-for-fixing-other-peoples-code/#8b-google-groups">8b: Submit patch to Google Groups</a>, use the Google Group above.</p>
|
221
|
-
|
222
|
-
|
223
|
-
<ul>
|
195
|
+
</pre></p>
|
196
|
+
<h2>Forum</h2>
|
197
|
+
<p><a href="http://groups.google.com/group/rb-brill-tagger">http://groups.google.com/group/rb-brill-tagger</a></p>
|
198
|
+
<h2>How to submit patches</h2>
|
199
|
+
<p>Read the <a href="http://drnicwilliams.com/2007/06/01/8-steps-for-fixing-other-peoples-code/">8 steps for fixing other people’s code</a> and for section <a href="http://drnicwilliams.com/2007/06/01/8-steps-for-fixing-other-peoples-code/#8b-google-groups">8b: Submit patch to Google Groups</a>, use the Google Group above.</p>
|
200
|
+
<ul>
|
224
201
|
<li>github: <a href="http://github.com/taf2/rb-brill-tagger/tree/master">http://github.com/taf2/rb-brill-tagger/tree/master</a></li>
|
225
|
-
|
226
|
-
|
227
|
-
|
202
|
+
</ul>
|
228
203
|
<pre>git clone git://github.com/taf2/rb-brill-tagger.git</pre>
|
229
|
-
|
230
|
-
<h3>Build and test instructions</h3>
|
231
|
-
|
232
|
-
|
204
|
+
<h3>Build and test instructions</h3>
|
233
205
|
<pre>cd rb-brill-tagger
|
234
206
|
rake test
|
235
207
|
rake install_gem</pre>
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
<p>This code is free to use under the terms of the <span class="caps">MIT</span> license.</p>
|
241
|
-
|
242
|
-
|
243
|
-
<h2>Contact</h2>
|
244
|
-
|
245
|
-
|
246
|
-
<p>Comments are welcome. Send an email to <a href="mailto:rb-brill-tagger@googlegroups.com">Todd A. Fisher</a> email via the <a href="http://groups.google.com/group/rb-brill-tagger">forum</a></p>
|
208
|
+
<h2>License</h2>
|
209
|
+
<p>This code is free to use under the terms of the <span class="caps">MIT</span> license.</p>
|
210
|
+
<h2>Contact</h2>
|
211
|
+
<p>Comments are welcome. Send an email to <a href="mailto:rb-brill-tagger@googlegroups.com">Todd A. Fisher</a> email via the <a href="http://groups.google.com/group/rb-brill-tagger">forum</a></p>
|
247
212
|
<p class="coda">
|
248
213
|
<a href="http://xullicious.blogspot.com/">Todd A. Fisher</a>, 23rd June 2008<br>
|
249
214
|
Theme extended from <a href="http://rb2js.rubyforge.org/">Paul Battley</a>
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rbtagger
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Todd A. Fisher
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date:
|
12
|
+
date: 2009-05-21 00:00:00 -04:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -20,7 +20,7 @@ dependencies:
|
|
20
20
|
requirements:
|
21
21
|
- - ">="
|
22
22
|
- !ruby/object:Gem::Version
|
23
|
-
version: 1.
|
23
|
+
version: 1.8.0
|
24
24
|
version:
|
25
25
|
description: A Simple Ruby Rule-Based Part of Speech Tagger
|
26
26
|
email:
|
@@ -28,8 +28,8 @@ email:
|
|
28
28
|
executables: []
|
29
29
|
|
30
30
|
extensions:
|
31
|
-
- ext/word_tagger/extconf.rb
|
32
31
|
- ext/rule_tagger/extconf.rb
|
32
|
+
- ext/word_tagger/extconf.rb
|
33
33
|
extra_rdoc_files:
|
34
34
|
- History.txt
|
35
35
|
- License.txt
|
@@ -98,6 +98,9 @@ files:
|
|
98
98
|
- lib/rbtagger.rb
|
99
99
|
- lib/rbtagger/version.rb
|
100
100
|
- lib/word/tagger.rb
|
101
|
+
- lib/brill/brown/CONTEXTUALRULEFILE
|
102
|
+
- lib/brill/brown/LEXICALRULEFILE
|
103
|
+
- lib/brill/brown/LEXICON
|
101
104
|
- script/console
|
102
105
|
- script/destroy
|
103
106
|
- script/generate
|
@@ -109,9 +112,6 @@ files:
|
|
109
112
|
- tasks/extconf/rule_tagger.rake
|
110
113
|
- tasks/extconf/word_tagger.rake
|
111
114
|
- tasks/website.rake
|
112
|
-
- test/CONTEXTUALRULEFILE
|
113
|
-
- test/LEXICALRULEFILE
|
114
|
-
- test/LEXICON
|
115
115
|
- test/docs/doc0.txt
|
116
116
|
- test/docs/doc1.txt
|
117
117
|
- test/docs/doc2.txt
|
@@ -134,16 +134,18 @@ files:
|
|
134
134
|
- website/template.html.erb
|
135
135
|
has_rdoc: true
|
136
136
|
homepage: http://rbtagger.rubyforge.org
|
137
|
+
licenses: []
|
138
|
+
|
137
139
|
post_install_message: |
|
138
140
|
For more information on rbtagger, see http://rbtagger.rubyforge.org
|
139
141
|
|
140
142
|
rdoc_options:
|
141
143
|
- --main
|
142
|
-
- README
|
144
|
+
- README
|
143
145
|
require_paths:
|
144
146
|
- lib
|
145
|
-
- ext/word_tagger
|
146
147
|
- ext/rule_tagger
|
148
|
+
- ext/word_tagger
|
147
149
|
required_ruby_version: !ruby/object:Gem::Requirement
|
148
150
|
requirements:
|
149
151
|
- - ">="
|
@@ -159,11 +161,11 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
159
161
|
requirements: []
|
160
162
|
|
161
163
|
rubyforge_project: rbtagger
|
162
|
-
rubygems_version: 1.2
|
164
|
+
rubygems_version: 1.3.2
|
163
165
|
signing_key:
|
164
|
-
specification_version:
|
166
|
+
specification_version: 3
|
165
167
|
summary: A Simple Ruby Rule-Based Part of Speech Tagger
|
166
168
|
test_files:
|
167
|
-
- test/test_rule_tagger.rb
|
168
169
|
- test/test_helper.rb
|
170
|
+
- test/test_rule_tagger.rb
|
169
171
|
- test/test_word_tagger.rb
|