rbtagger 0.4.0 → 0.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ext/rule_tagger/rbtagger.c +6 -6
- data/ext/rule_tagger/ruby-compat.h +8 -0
- data/ext/word_tagger/rtagger.cc +10 -1
- data/lib/brill/tagger.rb +4 -4
- data/lib/rbtagger/version.rb +1 -1
- metadata +1 -1
data/ext/rule_tagger/rbtagger.c
CHANGED
@@ -82,8 +82,8 @@ BrillTagger_apply_lexical_rules( VALUE self, VALUE tokens, VALUE tags, VALUE wor
|
|
82
82
|
{
|
83
83
|
TaggerContext *tc;
|
84
84
|
int i = 0;
|
85
|
-
int token_length =
|
86
|
-
int tags_length =
|
85
|
+
int token_length = RARRAY_LEN(tokens);
|
86
|
+
int tags_length = RARRAY_LEN(tags);
|
87
87
|
int rules_length;
|
88
88
|
VALUE fetched;
|
89
89
|
int EXTRAWDS = NUM2INT( extrawds );
|
@@ -150,8 +150,8 @@ BrillTagger_default_tag_finish( VALUE self, VALUE tokens, VALUE tags )
|
|
150
150
|
int i;
|
151
151
|
VALUE fetched, word;
|
152
152
|
char *tempstr;
|
153
|
-
int token_length =
|
154
|
-
int tags_length =
|
153
|
+
int token_length = RARRAY_LEN(tokens);
|
154
|
+
int tags_length = RARRAY_LEN(tags);
|
155
155
|
TaggerContext *tc;
|
156
156
|
|
157
157
|
Data_Get_Struct( self, TaggerContext, tc );
|
@@ -182,8 +182,8 @@ static VALUE
|
|
182
182
|
BrillTagger_apply_contextual_rules( VALUE self, VALUE tokens, VALUE tags, VALUE rmove )
|
183
183
|
{
|
184
184
|
int i;
|
185
|
-
int token_length =
|
186
|
-
int tags_length =
|
185
|
+
int token_length = RARRAY_LEN(tokens);
|
186
|
+
int tags_length = RARRAY_LEN(tags);
|
187
187
|
int rules_length;
|
188
188
|
int restrict_move = NUM2INT( rmove );
|
189
189
|
char **text_tags, **text_tokens;
|
data/ext/word_tagger/rtagger.cc
CHANGED
@@ -21,6 +21,15 @@
|
|
21
21
|
#define RSTRING_LEN(str) RSTRING(str)->len
|
22
22
|
#endif
|
23
23
|
|
24
|
+
#ifndef RARRAY_LEN
|
25
|
+
#define RARRAY_LEN(ar) RARRAY(ar)->len
|
26
|
+
#endif
|
27
|
+
|
28
|
+
#ifndef RARRAY_PTR
|
29
|
+
#define RARRAY_PTR(ar) RARRAY(ar)->ptr
|
30
|
+
#endif
|
31
|
+
|
32
|
+
|
24
33
|
static VALUE rb_Tagger;
|
25
34
|
static VALUE rb_NWordTagger;
|
26
35
|
|
@@ -83,7 +92,7 @@ VALUE Tagger_load_tags( VALUE self, VALUE tagarr )
|
|
83
92
|
NWordTagger *tagger;
|
84
93
|
Data_Get_Struct( self, NWordTagger, tagger );
|
85
94
|
std::set<std::string> tags;
|
86
|
-
int len =
|
95
|
+
int len = RARRAY_LEN(tagarr);
|
87
96
|
for( int i = 0; i < len; ++i ){
|
88
97
|
std::string tag = RSTRING_PTR( rb_ary_entry( tagarr, i ) );
|
89
98
|
tags.insert(tag);
|
data/lib/brill/tagger.rb
CHANGED
@@ -222,10 +222,10 @@ module Brill
|
|
222
222
|
text = text.gsub(/\s+/,' ')
|
223
223
|
|
224
224
|
# translate some common extended ascii characters to quotes
|
225
|
-
text.gsub!(
|
226
|
-
text.gsub!(
|
227
|
-
text.gsub!(
|
228
|
-
text.gsub!(
|
225
|
+
text.gsub!(/‘/,'`')
|
226
|
+
text.gsub!(/’/,"'")
|
227
|
+
text.gsub!(/“/,"``")
|
228
|
+
text.gsub!(/”/,"''")
|
229
229
|
|
230
230
|
# Attempt to get correct directional quotes
|
231
231
|
# s{\"\b} { `` }g;
|
data/lib/rbtagger/version.rb
CHANGED