rbtagger 0.2.9 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Manifest.txt +3 -3
- data/ext/rule_tagger/mkmf.log +4 -4
- data/ext/word_tagger/mkmf.log +2 -2
- data/{test → lib/brill/brown}/CONTEXTUALRULEFILE +0 -0
- data/{test → lib/brill/brown}/LEXICALRULEFILE +0 -0
- data/{test → lib/brill/brown}/LEXICON +0 -0
- data/lib/brill/tagger.rb +19 -4
- data/lib/rbtagger/version.rb +2 -2
- data/test/test_rule_tagger.rb +6 -3
- data/website/index.html +23 -58
- metadata +14 -12
data/Manifest.txt
CHANGED
|
@@ -46,6 +46,9 @@ lib/brill/tagger.rb
|
|
|
46
46
|
lib/rbtagger.rb
|
|
47
47
|
lib/rbtagger/version.rb
|
|
48
48
|
lib/word/tagger.rb
|
|
49
|
+
lib/brill/brown/CONTEXTUALRULEFILE
|
|
50
|
+
lib/brill/brown/LEXICALRULEFILE
|
|
51
|
+
lib/brill/brown/LEXICON
|
|
49
52
|
script/console
|
|
50
53
|
script/destroy
|
|
51
54
|
script/generate
|
|
@@ -57,9 +60,6 @@ tasks/extconf.rake
|
|
|
57
60
|
tasks/extconf/rule_tagger.rake
|
|
58
61
|
tasks/extconf/word_tagger.rake
|
|
59
62
|
tasks/website.rake
|
|
60
|
-
test/CONTEXTUALRULEFILE
|
|
61
|
-
test/LEXICALRULEFILE
|
|
62
|
-
test/LEXICON
|
|
63
63
|
test/docs/doc0.txt
|
|
64
64
|
test/docs/doc1.txt
|
|
65
65
|
test/docs/doc2.txt
|
data/ext/rule_tagger/mkmf.log
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
have_header: checking for stdlib.h... -------------------- yes
|
|
2
2
|
|
|
3
|
-
"
|
|
3
|
+
"/usr/bin/cpp-4.0 -I. -I/opt/local/lib/ruby/1.8/i686-darwin9.6.0 -I. -I/opt/local/include -D_XOPEN_SOURCE -D_DARWIN_C_SOURCE -I/opt/local/include -O2 -fno-common -pipe -fno-common conftest.c -o conftest.i"
|
|
4
4
|
checked program was:
|
|
5
5
|
/* begin */
|
|
6
6
|
1: #include <stdlib.h>
|
|
@@ -10,7 +10,7 @@ checked program was:
|
|
|
10
10
|
|
|
11
11
|
have_header: checking for string.h... -------------------- yes
|
|
12
12
|
|
|
13
|
-
"
|
|
13
|
+
"/usr/bin/cpp-4.0 -I. -I/opt/local/lib/ruby/1.8/i686-darwin9.6.0 -I. -I/opt/local/include -D_XOPEN_SOURCE -D_DARWIN_C_SOURCE -I/opt/local/include -O2 -fno-common -pipe -fno-common conftest.c -o conftest.i"
|
|
14
14
|
checked program was:
|
|
15
15
|
/* begin */
|
|
16
16
|
1: #include <string.h>
|
|
@@ -20,7 +20,7 @@ checked program was:
|
|
|
20
20
|
|
|
21
21
|
have_library: checking for main() in -lc... -------------------- yes
|
|
22
22
|
|
|
23
|
-
"gcc -o conftest -I. -I/
|
|
23
|
+
"/usr/bin/gcc-4.0 -o conftest -I. -I/opt/local/lib/ruby/1.8/i686-darwin9.6.0 -I. -I/opt/local/include -D_XOPEN_SOURCE -D_DARWIN_C_SOURCE -I/opt/local/include -O2 -fno-common -pipe -fno-common conftest.c -L. -L/opt/local/lib -L. -L/opt/local/lib -lruby-static -lc -lpthread -ldl -lobjc "
|
|
24
24
|
checked program was:
|
|
25
25
|
/* begin */
|
|
26
26
|
1: /*top*/
|
|
@@ -32,7 +32,7 @@ checked program was:
|
|
|
32
32
|
|
|
33
33
|
have_func: checking for snprintf() in stdio.h... -------------------- yes
|
|
34
34
|
|
|
35
|
-
"gcc -o conftest -I. -I/
|
|
35
|
+
"/usr/bin/gcc-4.0 -o conftest -I. -I/opt/local/lib/ruby/1.8/i686-darwin9.6.0 -I. -I/opt/local/include -D_XOPEN_SOURCE -D_DARWIN_C_SOURCE -I/opt/local/include -O2 -fno-common -pipe -fno-common conftest.c -L. -L/opt/local/lib -L. -L/opt/local/lib -lc -lruby-static -lc -lpthread -ldl -lobjc "
|
|
36
36
|
checked program was:
|
|
37
37
|
/* begin */
|
|
38
38
|
1: #include <stdio.h>
|
data/ext/word_tagger/mkmf.log
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
have_library: checking for main() in -lc... -------------------- yes
|
|
2
2
|
|
|
3
|
-
"gcc -o conftest -I. -I/
|
|
3
|
+
"/usr/bin/gcc-4.0 -o conftest -I. -I/opt/local/lib/ruby/1.8/i686-darwin9.6.0 -I. -I/opt/local/include -D_XOPEN_SOURCE -D_DARWIN_C_SOURCE -I/opt/local/include -O2 -fno-common -pipe -fno-common conftest.c -L. -L/opt/local/lib -L. -L/opt/local/lib -lruby-static -lc -lpthread -ldl -lobjc "
|
|
4
4
|
checked program was:
|
|
5
5
|
/* begin */
|
|
6
6
|
1: /*top*/
|
|
@@ -12,7 +12,7 @@ checked program was:
|
|
|
12
12
|
|
|
13
13
|
have_library: checking for main() in -lstdc++... -------------------- yes
|
|
14
14
|
|
|
15
|
-
"gcc -o conftest -I. -I/
|
|
15
|
+
"/usr/bin/gcc-4.0 -o conftest -I. -I/opt/local/lib/ruby/1.8/i686-darwin9.6.0 -I. -I/opt/local/include -D_XOPEN_SOURCE -D_DARWIN_C_SOURCE -I/opt/local/include -O2 -fno-common -pipe -fno-common conftest.c -L. -L/opt/local/lib -L. -L/opt/local/lib -lc -lruby-static -lstdc++ -lc -lpthread -ldl -lobjc "
|
|
16
16
|
checked program was:
|
|
17
17
|
/* begin */
|
|
18
18
|
1: /*top*/
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
data/lib/brill/tagger.rb
CHANGED
|
@@ -2,11 +2,26 @@ require 'rule_tagger'
|
|
|
2
2
|
|
|
3
3
|
module Brill
|
|
4
4
|
class Tagger
|
|
5
|
-
|
|
5
|
+
#
|
|
6
|
+
# will use the brown corpus as the default
|
|
7
|
+
#
|
|
8
|
+
def initialize( lexicon = nil, lexical_rules = nil, contextual_rules = nil)
|
|
6
9
|
@tagger = ::Tagger::BrillTagger.new
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
+
lexicon ||= File.join(File.dirname(__FILE__),"brown","LEXICON")
|
|
11
|
+
lexical_rules ||= File.join(File.dirname(__FILE__),"brown","LEXICALRULEFILE")
|
|
12
|
+
contextual_rules ||= File.join(File.dirname(__FILE__),"brown","CONTEXTUALRULEFILE")
|
|
13
|
+
|
|
14
|
+
Brill::Tagger.load_lexicon(@tagger, lexicon )
|
|
15
|
+
Brill::Tagger.load_lexical_rules(@tagger, lexical_rules )
|
|
16
|
+
Brill::Tagger.load_contextual_rules(@tagger, contextual_rules )
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
# given a body of text return a list of adjectives
|
|
20
|
+
def adjectives( text )
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# given a body of text return a list of nouns
|
|
24
|
+
def nouns( text )
|
|
10
25
|
end
|
|
11
26
|
|
|
12
27
|
# returns similar results as tag, but further reduced by only selecting nouns
|
data/lib/rbtagger/version.rb
CHANGED
data/test/test_rule_tagger.rb
CHANGED
|
@@ -32,12 +32,13 @@ The details of her visitation, however, are unclear.
|
|
|
32
32
|
Asked by Us if she were happy with the court outcome, Spears (clutching an Ed Hardy purse) smiled and told Us, "Yes."
|
|
33
33
|
Next up: A status hearing set for July 15.
|
|
34
34
|
The couple last appeared in court May 6. Spears was granted extended visitation — three days a week from 9 a.m. to 5 p.m. — of Sean Preston, 2, and Jayden James, 20 months.
|
|
35
|
+
)
|
|
36
|
+
SAMPLE_DOC3=%q(
|
|
37
|
+
TMZ.com: Britney celebrated getting overnights with her kids by going on a wild shopping trip for herself.With L.A.'s finest at her service, it was a total clusterf**k outside of Fred Segal as Brit Brit made her way out. The scene was crazy -- and it was all... Read more
|
|
35
38
|
)
|
|
36
39
|
def setup
|
|
37
40
|
if !defined?($tagger)
|
|
38
|
-
$rtagger = Brill::Tagger.new
|
|
39
|
-
File.join(File.dirname(__FILE__),"LEXICALRULEFILE"),
|
|
40
|
-
File.join(File.dirname(__FILE__),"CONTEXTUALRULEFILE") )
|
|
41
|
+
$rtagger = Brill::Tagger.new
|
|
41
42
|
end
|
|
42
43
|
end
|
|
43
44
|
|
|
@@ -134,6 +135,8 @@ The couple last appeared in court May 6. Spears was granted extended visitation
|
|
|
134
135
|
assert results.include?(["Britney Spears", "NNP", 6])
|
|
135
136
|
assert results.include?(["Jamie Spears", "NNP", 12])
|
|
136
137
|
# puts results.inspect
|
|
138
|
+
results = tagger.suggest( SAMPLE_DOC3, 5 )
|
|
139
|
+
puts results.inspect
|
|
137
140
|
end
|
|
138
141
|
|
|
139
142
|
private
|
data/website/index.html
CHANGED
|
@@ -160,32 +160,19 @@
|
|
|
160
160
|
<h1>rbtagger</h1>
|
|
161
161
|
<div id="version" class="clickable" onclick='document.location = "http://rubyforge.org/projects/rbtagger"; return false'>
|
|
162
162
|
<p>Get Version</p>
|
|
163
|
-
<a href="http://rubyforge.org/projects/rbtagger" class="numbers">0.
|
|
163
|
+
<a href="http://rubyforge.org/projects/rbtagger" class="numbers">0.3.0</a>
|
|
164
164
|
</div>
|
|
165
|
-
<h4 style="float:right;padding-right:10px;">
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
<p>This work is based on the work of Eric Brill</p>
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
<h2>Installing</h2>
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
<p><pre class='syntax'>
|
|
165
|
+
<h4 style="float:right;padding-right:10px;"> &#x2192; ‘rbtagger’</h4>
|
|
166
|
+
<h2>What</h2>
|
|
167
|
+
<p>A Simple Ruby Rule-Based Part of Speech Tagger</p>
|
|
168
|
+
<p>This work is based on the work of Eric Brill</p>
|
|
169
|
+
<h2>Installing</h2>
|
|
170
|
+
<p><pre class='syntax'>
|
|
180
171
|
gem install rbtagger
|
|
181
172
|
</pre></p>
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
<h2>The basics</h2>
|
|
185
|
-
|
|
186
|
-
|
|
173
|
+
<h2>The basics</h2>
|
|
187
174
|
<h4>Using the rule tagger</h4>
|
|
188
|
-
<pre class='syntax'>
|
|
175
|
+
<p><pre class='syntax'>
|
|
189
176
|
<span class="ident">require</span> <span class="punct">'</span><span class="string">rbtagger</span><span class="punct">'</span>
|
|
190
177
|
|
|
191
178
|
<span class="ident">tagger</span> <span class="punct">=</span> <span class="constant">Brill</span><span class="punct">::</span><span class="constant">Tagger</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span> <span class="constant">File</span><span class="punct">.</span><span class="ident">join</span><span class="punct">(</span><span class="constant">File</span><span class="punct">.</span><span class="ident">dirname</span><span class="punct">(</span><span class="constant">__FILE__</span><span class="punct">),"</span><span class="string">LEXICON</span><span class="punct">"),</span>
|
|
@@ -197,53 +184,31 @@ gem install rbtagger
|
|
|
197
184
|
|
|
198
185
|
<span class="ident">tagger</span><span class="punct">.</span><span class="ident">suggest</span><span class="punct">(</span> <span class="constant">File</span><span class="punct">.</span><span class="ident">read</span><span class="punct">("</span><span class="string">sample.txt</span><span class="punct">")</span> <span class="punct">)</span>
|
|
199
186
|
<span class="punct">=></span> <span class="punct">[["</span><span class="string">doctor</span><span class="punct">",</span> <span class="punct">"</span><span class="string">NN</span><span class="punct">",</span> <span class="number">3</span><span class="punct">],</span> <span class="punct">["</span><span class="string">treatment</span><span class="punct">",</span> <span class="punct">"</span><span class="string">NN</span><span class="punct">",</span> <span class="number">5</span><span class="punct">]]</span>
|
|
200
|
-
</pre>
|
|
201
|
-
|
|
187
|
+
</pre></p>
|
|
202
188
|
<h4>Using the word tagger</h4>
|
|
203
|
-
<pre class='syntax'>
|
|
189
|
+
<p><pre class='syntax'>
|
|
204
190
|
<span class="ident">require</span> <span class="punct">'</span><span class="string">rbtagger</span><span class="punct">'</span>
|
|
205
191
|
|
|
206
192
|
<span class="ident">tagger</span> <span class="punct">=</span> <span class="constant">Word</span><span class="punct">::</span><span class="constant">Tagger</span><span class="punct">.</span><span class="ident">new</span><span class="punct">(</span> <span class="punct">['</span><span class="string">cat</span><span class="punct">','</span><span class="string">hat</span><span class="punct">'],</span> <span class="symbol">:words</span> <span class="punct">=></span> <span class="number">4</span> <span class="punct">)</span>
|
|
207
193
|
<span class="ident">tags</span> <span class="punct">=</span> <span class="ident">tagger</span><span class="punct">.</span><span class="ident">execute</span><span class="punct">(</span> <span class="punct">'</span><span class="string">the cat and the hat</span><span class="punct">'</span> <span class="punct">)</span>
|
|
208
194
|
<span class="ident">assert_equal</span><span class="punct">(</span> <span class="punct">["</span><span class="string">cat</span><span class="punct">",</span> <span class="punct">"</span><span class="string">hat</span><span class="punct">"],</span> <span class="ident">tags</span> <span class="punct">)</span>
|
|
209
|
-
</pre>
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
<h2>How to submit patches</h2>
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
<p>Read the <a href="http://drnicwilliams.com/2007/06/01/8-steps-for-fixing-other-peoples-code/">8 steps for fixing other people’s code</a> and for section <a href="http://drnicwilliams.com/2007/06/01/8-steps-for-fixing-other-peoples-code/#8b-google-groups">8b: Submit patch to Google Groups</a>, use the Google Group above.</p>
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
<ul>
|
|
195
|
+
</pre></p>
|
|
196
|
+
<h2>Forum</h2>
|
|
197
|
+
<p><a href="http://groups.google.com/group/rb-brill-tagger">http://groups.google.com/group/rb-brill-tagger</a></p>
|
|
198
|
+
<h2>How to submit patches</h2>
|
|
199
|
+
<p>Read the <a href="http://drnicwilliams.com/2007/06/01/8-steps-for-fixing-other-peoples-code/">8 steps for fixing other people’s code</a> and for section <a href="http://drnicwilliams.com/2007/06/01/8-steps-for-fixing-other-peoples-code/#8b-google-groups">8b: Submit patch to Google Groups</a>, use the Google Group above.</p>
|
|
200
|
+
<ul>
|
|
224
201
|
<li>github: <a href="http://github.com/taf2/rb-brill-tagger/tree/master">http://github.com/taf2/rb-brill-tagger/tree/master</a></li>
|
|
225
|
-
|
|
226
|
-
|
|
227
|
-
|
|
202
|
+
</ul>
|
|
228
203
|
<pre>git clone git://github.com/taf2/rb-brill-tagger.git</pre>
|
|
229
|
-
|
|
230
|
-
<h3>Build and test instructions</h3>
|
|
231
|
-
|
|
232
|
-
|
|
204
|
+
<h3>Build and test instructions</h3>
|
|
233
205
|
<pre>cd rb-brill-tagger
|
|
234
206
|
rake test
|
|
235
207
|
rake install_gem</pre>
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
<p>This code is free to use under the terms of the <span class="caps">MIT</span> license.</p>
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
<h2>Contact</h2>
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
<p>Comments are welcome. Send an email to <a href="mailto:rb-brill-tagger@googlegroups.com">Todd A. Fisher</a> email via the <a href="http://groups.google.com/group/rb-brill-tagger">forum</a></p>
|
|
208
|
+
<h2>License</h2>
|
|
209
|
+
<p>This code is free to use under the terms of the <span class="caps">MIT</span> license.</p>
|
|
210
|
+
<h2>Contact</h2>
|
|
211
|
+
<p>Comments are welcome. Send an email to <a href="mailto:rb-brill-tagger@googlegroups.com">Todd A. Fisher</a> email via the <a href="http://groups.google.com/group/rb-brill-tagger">forum</a></p>
|
|
247
212
|
<p class="coda">
|
|
248
213
|
<a href="http://xullicious.blogspot.com/">Todd A. Fisher</a>, 23rd June 2008<br>
|
|
249
214
|
Theme extended from <a href="http://rb2js.rubyforge.org/">Paul Battley</a>
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: rbtagger
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.3.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Todd A. Fisher
|
|
@@ -9,7 +9,7 @@ autorequire:
|
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
11
|
|
|
12
|
-
date:
|
|
12
|
+
date: 2009-05-21 00:00:00 -04:00
|
|
13
13
|
default_executable:
|
|
14
14
|
dependencies:
|
|
15
15
|
- !ruby/object:Gem::Dependency
|
|
@@ -20,7 +20,7 @@ dependencies:
|
|
|
20
20
|
requirements:
|
|
21
21
|
- - ">="
|
|
22
22
|
- !ruby/object:Gem::Version
|
|
23
|
-
version: 1.
|
|
23
|
+
version: 1.8.0
|
|
24
24
|
version:
|
|
25
25
|
description: A Simple Ruby Rule-Based Part of Speech Tagger
|
|
26
26
|
email:
|
|
@@ -28,8 +28,8 @@ email:
|
|
|
28
28
|
executables: []
|
|
29
29
|
|
|
30
30
|
extensions:
|
|
31
|
-
- ext/word_tagger/extconf.rb
|
|
32
31
|
- ext/rule_tagger/extconf.rb
|
|
32
|
+
- ext/word_tagger/extconf.rb
|
|
33
33
|
extra_rdoc_files:
|
|
34
34
|
- History.txt
|
|
35
35
|
- License.txt
|
|
@@ -98,6 +98,9 @@ files:
|
|
|
98
98
|
- lib/rbtagger.rb
|
|
99
99
|
- lib/rbtagger/version.rb
|
|
100
100
|
- lib/word/tagger.rb
|
|
101
|
+
- lib/brill/brown/CONTEXTUALRULEFILE
|
|
102
|
+
- lib/brill/brown/LEXICALRULEFILE
|
|
103
|
+
- lib/brill/brown/LEXICON
|
|
101
104
|
- script/console
|
|
102
105
|
- script/destroy
|
|
103
106
|
- script/generate
|
|
@@ -109,9 +112,6 @@ files:
|
|
|
109
112
|
- tasks/extconf/rule_tagger.rake
|
|
110
113
|
- tasks/extconf/word_tagger.rake
|
|
111
114
|
- tasks/website.rake
|
|
112
|
-
- test/CONTEXTUALRULEFILE
|
|
113
|
-
- test/LEXICALRULEFILE
|
|
114
|
-
- test/LEXICON
|
|
115
115
|
- test/docs/doc0.txt
|
|
116
116
|
- test/docs/doc1.txt
|
|
117
117
|
- test/docs/doc2.txt
|
|
@@ -134,16 +134,18 @@ files:
|
|
|
134
134
|
- website/template.html.erb
|
|
135
135
|
has_rdoc: true
|
|
136
136
|
homepage: http://rbtagger.rubyforge.org
|
|
137
|
+
licenses: []
|
|
138
|
+
|
|
137
139
|
post_install_message: |
|
|
138
140
|
For more information on rbtagger, see http://rbtagger.rubyforge.org
|
|
139
141
|
|
|
140
142
|
rdoc_options:
|
|
141
143
|
- --main
|
|
142
|
-
- README
|
|
144
|
+
- README
|
|
143
145
|
require_paths:
|
|
144
146
|
- lib
|
|
145
|
-
- ext/word_tagger
|
|
146
147
|
- ext/rule_tagger
|
|
148
|
+
- ext/word_tagger
|
|
147
149
|
required_ruby_version: !ruby/object:Gem::Requirement
|
|
148
150
|
requirements:
|
|
149
151
|
- - ">="
|
|
@@ -159,11 +161,11 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
159
161
|
requirements: []
|
|
160
162
|
|
|
161
163
|
rubyforge_project: rbtagger
|
|
162
|
-
rubygems_version: 1.2
|
|
164
|
+
rubygems_version: 1.3.2
|
|
163
165
|
signing_key:
|
|
164
|
-
specification_version:
|
|
166
|
+
specification_version: 3
|
|
165
167
|
summary: A Simple Ruby Rule-Based Part of Speech Tagger
|
|
166
168
|
test_files:
|
|
167
|
-
- test/test_rule_tagger.rb
|
|
168
169
|
- test/test_helper.rb
|
|
170
|
+
- test/test_rule_tagger.rb
|
|
169
171
|
- test/test_word_tagger.rb
|