rbtagger 0.4.0 → 0.4.1
Sign up to get free protection for your applications and to get access to all the features.
- data/ext/rule_tagger/rbtagger.c +6 -6
- data/ext/rule_tagger/ruby-compat.h +8 -0
- data/ext/word_tagger/rtagger.cc +10 -1
- data/lib/brill/tagger.rb +4 -4
- data/lib/rbtagger/version.rb +1 -1
- metadata +1 -1
data/ext/rule_tagger/rbtagger.c
CHANGED
@@ -82,8 +82,8 @@ BrillTagger_apply_lexical_rules( VALUE self, VALUE tokens, VALUE tags, VALUE wor
|
|
82
82
|
{
|
83
83
|
TaggerContext *tc;
|
84
84
|
int i = 0;
|
85
|
-
int token_length =
|
86
|
-
int tags_length =
|
85
|
+
int token_length = RARRAY_LEN(tokens);
|
86
|
+
int tags_length = RARRAY_LEN(tags);
|
87
87
|
int rules_length;
|
88
88
|
VALUE fetched;
|
89
89
|
int EXTRAWDS = NUM2INT( extrawds );
|
@@ -150,8 +150,8 @@ BrillTagger_default_tag_finish( VALUE self, VALUE tokens, VALUE tags )
|
|
150
150
|
int i;
|
151
151
|
VALUE fetched, word;
|
152
152
|
char *tempstr;
|
153
|
-
int token_length =
|
154
|
-
int tags_length =
|
153
|
+
int token_length = RARRAY_LEN(tokens);
|
154
|
+
int tags_length = RARRAY_LEN(tags);
|
155
155
|
TaggerContext *tc;
|
156
156
|
|
157
157
|
Data_Get_Struct( self, TaggerContext, tc );
|
@@ -182,8 +182,8 @@ static VALUE
|
|
182
182
|
BrillTagger_apply_contextual_rules( VALUE self, VALUE tokens, VALUE tags, VALUE rmove )
|
183
183
|
{
|
184
184
|
int i;
|
185
|
-
int token_length =
|
186
|
-
int tags_length =
|
185
|
+
int token_length = RARRAY_LEN(tokens);
|
186
|
+
int tags_length = RARRAY_LEN(tags);
|
187
187
|
int rules_length;
|
188
188
|
int restrict_move = NUM2INT( rmove );
|
189
189
|
char **text_tags, **text_tokens;
|
data/ext/word_tagger/rtagger.cc
CHANGED
@@ -21,6 +21,15 @@
|
|
21
21
|
#define RSTRING_LEN(str) RSTRING(str)->len
|
22
22
|
#endif
|
23
23
|
|
24
|
+
#ifndef RARRAY_LEN
|
25
|
+
#define RARRAY_LEN(ar) RARRAY(ar)->len
|
26
|
+
#endif
|
27
|
+
|
28
|
+
#ifndef RARRAY_PTR
|
29
|
+
#define RARRAY_PTR(ar) RARRAY(ar)->ptr
|
30
|
+
#endif
|
31
|
+
|
32
|
+
|
24
33
|
static VALUE rb_Tagger;
|
25
34
|
static VALUE rb_NWordTagger;
|
26
35
|
|
@@ -83,7 +92,7 @@ VALUE Tagger_load_tags( VALUE self, VALUE tagarr )
|
|
83
92
|
NWordTagger *tagger;
|
84
93
|
Data_Get_Struct( self, NWordTagger, tagger );
|
85
94
|
std::set<std::string> tags;
|
86
|
-
int len =
|
95
|
+
int len = RARRAY_LEN(tagarr);
|
87
96
|
for( int i = 0; i < len; ++i ){
|
88
97
|
std::string tag = RSTRING_PTR( rb_ary_entry( tagarr, i ) );
|
89
98
|
tags.insert(tag);
|
data/lib/brill/tagger.rb
CHANGED
@@ -222,10 +222,10 @@ module Brill
|
|
222
222
|
text = text.gsub(/\s+/,' ')
|
223
223
|
|
224
224
|
# translate some common extended ascii characters to quotes
|
225
|
-
text.gsub!(
|
226
|
-
text.gsub!(
|
227
|
-
text.gsub!(
|
228
|
-
text.gsub!(
|
225
|
+
text.gsub!(/‘/,'`')
|
226
|
+
text.gsub!(/’/,"'")
|
227
|
+
text.gsub!(/“/,"``")
|
228
|
+
text.gsub!(/”/,"''")
|
229
229
|
|
230
230
|
# Attempt to get correct directional quotes
|
231
231
|
# s{\"\b} { `` }g;
|
data/lib/rbtagger/version.rb
CHANGED