isomorfeus-ferret 0.14.1 → 0.14.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5818fce6d84b9bd4814be3bbed270127e05297dcf85adeebc495c8f334430d88
4
- data.tar.gz: 77c9c3246c7777947084b47620d3aeeeb9eb76d7b0a17a4d30a37a38547a54da
3
+ metadata.gz: 2edd7c4bc27bf507a9003988136eaaa475fed8943eb4fe6a083995d02a9015c1
4
+ data.tar.gz: d779442eb4d5c2ce83490c0a0a425230701d21fd535f4f9b30970752557888ac
5
5
  SHA512:
6
- metadata.gz: 59632a0b46b9bd247da0f8b3908654a8027fbcef2aadc897f7681d25b03d4404191d037be323f666ef9bae679c72b135318aa853158e6bf0205b754ec3b2b18f
7
- data.tar.gz: 2a037003347c6bca0900bf80410e83f43d397400f37e22f112e6ef6893a568dba29561b12594f803f3b28baee9f5f1ae67595c244d91b7dffa9d06e4e493c891
6
+ metadata.gz: a3ac194cc68b44fe0600ed79a9016e1dbab3b511d6b341a48c8cee27610d95683488fc5dc6ee10ebe5ed537a7eba4a3f18aaa31c8d27b84d16efc51e4525724d
7
+ data.tar.gz: 5fc5ffae4692e5f710ff7c01ab8a5b0f465db7d03b6a1ce2bd5d0f6db6c6699fcdb52034c5143cb91699234fc1028140f3bd4d4b07d1f41de80ebf9836f1b4e2
@@ -19,57 +19,62 @@ static VALUE sym_with_positions_offsets;
19
19
 
20
20
  extern VALUE sym_boost;
21
21
 
22
- void frb_fi_get_params(VALUE roptions, FrtStoreValue *store, FrtCompressionType *compression, FrtIndexValue *index, FrtTermVectorValue *term_vector, float *boost) {
22
+ void frb_fi_get_params(VALUE roptions, unsigned int *bits, float *boost) {
23
23
  VALUE v;
24
24
  Check_Type(roptions, T_HASH);
25
25
  v = rb_hash_aref(roptions, sym_boost);
26
- if (Qnil != v) {
27
- *boost = (float)NUM2DBL(v);
28
- } else {
29
- *boost = 1.0f;
30
- }
26
+ if (Qnil != v) *boost = (float)NUM2DBL(v);
27
+ else *boost = 1.0f;
28
+
31
29
  v = rb_hash_aref(roptions, sym_store);
32
30
  if (Qnil != v) Check_Type(v, T_SYMBOL);
33
31
  if (v == sym_no || v == sym_false || v == Qfalse) {
34
- *store = FRT_STORE_NO;
32
+ *bits &= ~FRT_FI_IS_STORED_BM;
35
33
  } else if (v == sym_yes || v == sym_true || v == Qtrue) {
36
- *store = FRT_STORE_YES;
34
+ *bits |= FRT_FI_IS_STORED_BM;
37
35
  } else if (v == Qnil) {
38
36
  /* leave as default */
39
37
  } else {
40
- rb_raise(rb_eArgError, ":%s isn't a valid argument for :store. Please choose from [:yes, :no]",
41
- rb_id2name(SYM2ID(v)));
38
+ rb_raise(rb_eArgError, ":%s isn't a valid argument for :store. Please choose from [:yes, :no]", rb_id2name(SYM2ID(v)));
42
39
  }
43
40
 
44
41
  v = rb_hash_aref(roptions, sym_compression);
45
42
  if (Qnil != v) Check_Type(v, T_SYMBOL);
46
43
  if (v == sym_no || v == sym_false || v == Qfalse) {
47
- *compression = FRT_COMPRESSION_NONE;
44
+ *bits &= ~FRT_FI_IS_COMPRESSED_BM;
45
+ *bits &= ~FRT_FI_COMPRESSION_BROTLI_BM;
46
+ *bits &= ~FRT_FI_COMPRESSION_BZ2_BM;
47
+ *bits &= ~FRT_FI_COMPRESSION_LZ4_BM;
48
48
  } else if (v == sym_yes || v == sym_true || v == Qtrue || v == sym_brotli) {
49
- *compression = FRT_COMPRESSION_BROTLI;
49
+ *bits |= FRT_FI_IS_COMPRESSED_BM | FRT_FI_COMPRESSION_BROTLI_BM;
50
50
  } else if (v == sym_bz2) {
51
- *compression = FRT_COMPRESSION_BZ2;
51
+ *bits |= FRT_FI_IS_COMPRESSED_BM | FRT_FI_COMPRESSION_BZ2_BM;
52
52
  } else if (v == sym_lz4) {
53
- *compression = FRT_COMPRESSION_LZ4;
53
+ *bits |= FRT_FI_IS_COMPRESSED_BM | FRT_FI_COMPRESSION_LZ4_BM;
54
54
  } else if (v == Qnil) {
55
55
  /* leave as default */
56
56
  } else {
57
- rb_raise(rb_eArgError, ":%s isn't a valid argument for :compression. Please choose from [:yes, :no, :brotli, :bz2, :lz4]",
58
- rb_id2name(SYM2ID(v)));
57
+ rb_raise(rb_eArgError, ":%s isn't a valid argument for :compression. Please choose from [:yes, :no, :brotli, :bz2, :lz4]", rb_id2name(SYM2ID(v)));
59
58
  }
60
59
 
61
60
  v = rb_hash_aref(roptions, sym_index);
62
61
  if (Qnil != v) Check_Type(v, T_SYMBOL);
63
62
  if (v == sym_no || v == sym_false || v == Qfalse) {
64
- *index = FRT_INDEX_NO;
63
+ *bits &= ~FRT_FI_IS_INDEXED_BM;
64
+ *bits &= ~FRT_FI_IS_TOKENIZED_BM;
65
+ *bits &= ~FRT_FI_OMIT_NORMS_BM;
65
66
  } else if (v == sym_yes || v == sym_true || v == Qtrue) {
66
- *index = FRT_INDEX_YES;
67
+ *bits |= FRT_FI_IS_INDEXED_BM | FRT_FI_IS_TOKENIZED_BM;
68
+ *bits &= ~FRT_FI_OMIT_NORMS_BM;
67
69
  } else if (v == sym_untokenized) {
68
- *index = FRT_INDEX_UNTOKENIZED;
70
+ *bits |= FRT_FI_IS_INDEXED_BM;
71
+ *bits &= ~FRT_FI_IS_TOKENIZED_BM;
72
+ *bits &= ~FRT_FI_OMIT_NORMS_BM;
69
73
  } else if (v == sym_omit_norms) {
70
- *index = FRT_INDEX_YES_OMIT_NORMS;
74
+ *bits |= FRT_FI_IS_INDEXED_BM | FRT_FI_IS_TOKENIZED_BM | FRT_FI_OMIT_NORMS_BM;
71
75
  } else if (v == sym_untokenized_omit_norms) {
72
- *index = FRT_INDEX_UNTOKENIZED_OMIT_NORMS;
76
+ *bits |= FRT_FI_IS_INDEXED_BM | FRT_FI_OMIT_NORMS_BM;
77
+ *bits &= ~FRT_FI_IS_TOKENIZED_BM;
73
78
  } else if (v == Qnil) {
74
79
  /* leave as default */
75
80
  } else {
@@ -80,18 +85,28 @@ void frb_fi_get_params(VALUE roptions, FrtStoreValue *store, FrtCompressionType
80
85
  v = rb_hash_aref(roptions, sym_term_vector);
81
86
  if (Qnil != v) Check_Type(v, T_SYMBOL);
82
87
  if (v == sym_no || v == sym_false || v == Qfalse) {
83
- *term_vector = FRT_TERM_VECTOR_NO;
88
+ *bits &= ~FRT_FI_STORE_TERM_VECTOR_BM;
89
+ *bits &= ~FRT_FI_STORE_POSITIONS_BM;
90
+ *bits &= ~FRT_FI_STORE_OFFSETS_BM;
84
91
  } else if (v == sym_yes || v == sym_true || v == Qtrue) {
85
- *term_vector = FRT_TERM_VECTOR_YES;
92
+ *bits |= FRT_FI_STORE_TERM_VECTOR_BM;
93
+ *bits &= ~FRT_FI_STORE_POSITIONS_BM;
94
+ *bits &= ~FRT_FI_STORE_OFFSETS_BM;
86
95
  } else if (v == sym_with_positions) {
87
- *term_vector = FRT_TERM_VECTOR_WITH_POSITIONS;
96
+ *bits |= FRT_FI_STORE_TERM_VECTOR_BM | FRT_FI_STORE_POSITIONS_BM;
97
+ *bits &= ~FRT_FI_STORE_OFFSETS_BM;
88
98
  } else if (v == sym_with_offsets) {
89
- *term_vector = FRT_TERM_VECTOR_WITH_OFFSETS;
99
+ *bits |= FRT_FI_STORE_TERM_VECTOR_BM | FRT_FI_STORE_OFFSETS_BM;
100
+ *bits &= ~FRT_FI_STORE_POSITIONS_BM;
90
101
  } else if (v == sym_with_positions_offsets) {
91
- *term_vector = FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS;
102
+ *bits |= FRT_FI_STORE_TERM_VECTOR_BM | FRT_FI_STORE_POSITIONS_BM | FRT_FI_STORE_OFFSETS_BM;
92
103
  } else if (v == Qnil) {
93
104
  /* leave as default */
94
- if (*index == FRT_INDEX_NO) *term_vector = FRT_TERM_VECTOR_NO;
105
+ if ((*bits & FRT_FI_IS_INDEXED_BM) == 0) {
106
+ *bits &= ~FRT_FI_STORE_TERM_VECTOR_BM;
107
+ *bits &= ~FRT_FI_STORE_POSITIONS_BM;
108
+ *bits &= ~FRT_FI_STORE_OFFSETS_BM;
109
+ }
95
110
  } else {
96
111
  rb_raise(rb_eArgError, ":%s isn't a valid argument for :term_vector. Please choose from [:no, :yes, "
97
112
  ":with_positions, :with_offsets, :with_positions_offsets]", rb_id2name(SYM2ID(v)));
@@ -150,17 +165,12 @@ static VALUE frb_fi_init(int argc, VALUE *argv, VALUE self) {
150
165
  VALUE roptions, rname;
151
166
  FrtFieldInfo *fi;
152
167
  TypedData_Get_Struct(self, FrtFieldInfo, &frb_field_info_t, fi);
153
- FrtStoreValue store = FRT_STORE_YES;
154
- FrtCompressionType compression = FRT_COMPRESSION_NONE;
155
- FrtIndexValue index = FRT_INDEX_YES;
156
- FrtTermVectorValue term_vector = FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS;
168
+ unsigned int bits = FRT_FI_DEFAULTS_BM;
157
169
  float boost = 1.0f;
158
170
 
159
171
  rb_scan_args(argc, argv, "11", &rname, &roptions);
160
- if (argc > 1) {
161
- frb_fi_get_params(roptions, &store, &compression, &index, &term_vector, &boost);
162
- }
163
- fi = frt_fi_init(fi, frb_field(rname), store, compression, index, term_vector);
172
+ if (argc > 1) frb_fi_get_params(roptions, &bits, &boost);
173
+ fi = frt_fi_init(fi, frb_field(rname), bits);
164
174
  fi->boost = boost;
165
175
  fi->rfi = self;
166
176
  return self;
@@ -185,7 +195,7 @@ static VALUE frb_fi_name(VALUE self) {
185
195
  */
186
196
  static VALUE frb_fi_is_stored(VALUE self) {
187
197
  FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
188
- return fi_is_stored(fi) ? Qtrue : Qfalse;
198
+ return bits_is_stored(fi->bits) ? Qtrue : Qfalse;
189
199
  }
190
200
 
191
201
  /*
@@ -196,7 +206,7 @@ static VALUE frb_fi_is_stored(VALUE self) {
196
206
  */
197
207
  static VALUE frb_fi_is_compressed(VALUE self) {
198
208
  FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
199
- return fi_is_compressed(fi) ? Qtrue : Qfalse;
209
+ return bits_is_compressed(fi->bits) ? Qtrue : Qfalse;
200
210
  }
201
211
 
202
212
  /*
@@ -207,7 +217,7 @@ static VALUE frb_fi_is_compressed(VALUE self) {
207
217
  */
208
218
  static VALUE frb_fi_is_indexed(VALUE self) {
209
219
  FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
210
- return fi_is_indexed(fi) ? Qtrue : Qfalse;
220
+ return bits_is_indexed(fi->bits) ? Qtrue : Qfalse;
211
221
  }
212
222
 
213
223
  /*
@@ -223,7 +233,7 @@ static VALUE frb_fi_is_indexed(VALUE self) {
223
233
  */
224
234
  static VALUE frb_fi_is_tokenized(VALUE self) {
225
235
  FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
226
- return fi_is_tokenized(fi) ? Qtrue : Qfalse;
236
+ return bits_is_tokenized(fi->bits) ? Qtrue : Qfalse;
227
237
  }
228
238
 
229
239
  /*
@@ -239,7 +249,7 @@ static VALUE frb_fi_is_tokenized(VALUE self) {
239
249
  */
240
250
  static VALUE frb_fi_omit_norms(VALUE self) {
241
251
  FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
242
- return fi_omit_norms(fi) ? Qtrue : Qfalse;
252
+ return bits_omit_norms(fi->bits) ? Qtrue : Qfalse;
243
253
  }
244
254
 
245
255
  /*
@@ -250,7 +260,7 @@ static VALUE frb_fi_omit_norms(VALUE self) {
250
260
  */
251
261
  static VALUE frb_fi_store_term_vector(VALUE self) {
252
262
  FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
253
- return fi_store_term_vector(fi) ? Qtrue : Qfalse;
263
+ return bits_store_term_vector(fi->bits) ? Qtrue : Qfalse;
254
264
  }
255
265
 
256
266
  /*
@@ -261,7 +271,7 @@ static VALUE frb_fi_store_term_vector(VALUE self) {
261
271
  */
262
272
  static VALUE frb_fi_store_positions(VALUE self) {
263
273
  FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
264
- return fi_store_positions(fi) ? Qtrue : Qfalse;
274
+ return bits_store_positions(fi->bits) ? Qtrue : Qfalse;
265
275
  }
266
276
 
267
277
  /*
@@ -272,7 +282,7 @@ static VALUE frb_fi_store_positions(VALUE self) {
272
282
  */
273
283
  static VALUE frb_fi_store_offsets(VALUE self) {
274
284
  FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
275
- return fi_store_offsets(fi) ? Qtrue : Qfalse;
285
+ return bits_store_offsets(fi->bits) ? Qtrue : Qfalse;
276
286
  }
277
287
 
278
288
  /*
@@ -285,7 +295,7 @@ static VALUE frb_fi_store_offsets(VALUE self) {
285
295
  */
286
296
  static VALUE frb_fi_has_norms(VALUE self) {
287
297
  FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
288
- return fi_has_norms(fi) ? Qtrue : Qfalse;
298
+ return bits_has_norms(fi->bits) ? Qtrue : Qfalse;
289
299
  }
290
300
 
291
301
  /*
@@ -326,10 +336,10 @@ static VALUE frb_fi_to_h(VALUE self) {
326
336
  bool o;
327
337
 
328
338
  // :index
329
- if (!fi_is_indexed(fi)) val = sym_no;
339
+ if (!bits_is_indexed(fi->bits)) val = sym_no;
330
340
  else {
331
- bool t = fi_is_tokenized(fi);
332
- o = fi_omit_norms(fi);
341
+ bool t = bits_is_tokenized(fi->bits);
342
+ o = bits_omit_norms(fi->bits);
333
343
  if (!t && o) val = sym_untokenized_omit_norms;
334
344
  else if (t && o) val = sym_omit_norms;
335
345
  else if (!t && !o) val = sym_untokenized;
@@ -338,23 +348,23 @@ static VALUE frb_fi_to_h(VALUE self) {
338
348
  rb_hash_aset(hash, sym_index, val);
339
349
 
340
350
  // :store
341
- rb_hash_aset(hash, sym_store, fi_is_stored(fi) ? sym_yes : sym_no);
351
+ rb_hash_aset(hash, sym_store, bits_is_stored(fi->bits) ? sym_yes : sym_no);
342
352
 
343
353
  // :compress
344
- if (!fi_is_compressed(fi)) val = sym_no;
354
+ if (!bits_is_compressed(fi->bits)) val = sym_no;
345
355
  else {
346
- if (fi_is_compressed_brotli(fi)) val = sym_brotli;
347
- else if (fi_is_compressed_bz2(fi)) val = sym_bz2;
348
- else if (fi_is_compressed_lz4(fi)) val = sym_lz4;
356
+ if (bits_is_compressed_brotli(fi->bits)) val = sym_brotli;
357
+ else if (bits_is_compressed_bz2(fi->bits)) val = sym_bz2;
358
+ else if (bits_is_compressed_lz4(fi->bits)) val = sym_lz4;
349
359
  else val = sym_yes;
350
360
  }
351
361
  rb_hash_aset(hash, sym_compression, val);
352
362
 
353
363
  // :term_vector
354
- if (!fi_store_term_vector(fi)) val = sym_no;
364
+ if (!bits_store_term_vector(fi->bits)) val = sym_no;
355
365
  else {
356
- bool p = fi_store_positions(fi);
357
- o = fi_store_offsets(fi);
366
+ bool p = bits_store_positions(fi->bits);
367
+ o = bits_store_offsets(fi->bits);
358
368
  if (p && o) val = sym_with_positions_offsets;
359
369
  else if (o) val = sym_with_offsets;
360
370
  else if (p) val = sym_with_positions;
@@ -40,7 +40,7 @@ static ID id_boost;
40
40
 
41
41
  extern VALUE sym_each;
42
42
  extern rb_encoding *utf8_encoding;
43
- extern void frb_fi_get_params(VALUE roptions, FrtStoreValue *store, FrtCompressionType *compression, FrtIndexValue *index, FrtTermVectorValue *term_vector, float *boost);
43
+ extern void frb_fi_get_params(VALUE roptions, unsigned int *bits, float *boost);
44
44
  extern FrtAnalyzer *frb_get_cwrapped_analyzer(VALUE ranalyzer);
45
45
  extern VALUE frb_get_analyzer(FrtAnalyzer *a);
46
46
  extern VALUE frb_get_field_info(FrtFieldInfo *fi);
@@ -119,17 +119,12 @@ static VALUE frb_fis_init(int argc, VALUE *argv, VALUE self) {
119
119
  VALUE roptions;
120
120
  FrtFieldInfos *fis;
121
121
  TypedData_Get_Struct(self, FrtFieldInfos, &frb_field_infos_t, fis);
122
- FrtStoreValue store = FRT_STORE_YES;
123
- FrtCompressionType compression = FRT_COMPRESSION_NONE;
124
- FrtIndexValue index = FRT_INDEX_YES;
125
- FrtTermVectorValue term_vector = FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS;
122
+ unsigned int bits = FRT_FI_DEFAULTS_BM;
126
123
  float boost;
127
124
 
128
125
  rb_scan_args(argc, argv, "01", &roptions);
129
- if (argc > 0) {
130
- frb_fi_get_params(roptions, &store, &compression, &index, &term_vector, &boost);
131
- }
132
- fis = frt_fis_init(fis, store, compression, index, term_vector);
126
+ if (argc > 0) frb_fi_get_params(roptions, &bits, &boost);
127
+ fis = frt_fis_init(fis, bits);
133
128
  fis->rfis = self;
134
129
  return self;
135
130
  }
@@ -218,18 +213,15 @@ frb_fis_add_field(int argc, VALUE *argv, VALUE self)
218
213
  {
219
214
  FrtFieldInfos *fis = (FrtFieldInfos *)DATA_PTR(self);
220
215
  FrtFieldInfo *fi;
221
- FrtStoreValue store_val = fis->store_val;
222
- FrtCompressionType compression = fis->compression;
223
- FrtIndexValue index = fis->index;
224
- FrtTermVectorValue term_vector = fis->term_vector;
216
+ unsigned int bits = fis->bits;
225
217
  float boost = 1.0f;
226
218
  VALUE rname, roptions;
227
219
 
228
220
  rb_scan_args(argc, argv, "11", &rname, &roptions);
229
221
  if (argc > 1) {
230
- frb_fi_get_params(roptions, &store_val, &compression, &index, &term_vector, &boost);
222
+ frb_fi_get_params(roptions, &bits, &boost);
231
223
  }
232
- fi = frt_fi_new(frb_field(rname), store_val, compression, index, term_vector);
224
+ fi = frt_fi_new(frb_field(rname), bits);
233
225
  fi->boost = boost;
234
226
  frt_fis_add_field(fis, fi);
235
227
  return self;
@@ -340,7 +332,7 @@ frb_fis_get_tk_fields(VALUE self)
340
332
  VALUE rfield_names = rb_ary_new();
341
333
  int i;
342
334
  for (i = 0; i < fis->size; i++) {
343
- if (!fi_is_tokenized(fis->fields[i])) continue;
335
+ if (!bits_is_tokenized(fis->fields[i]->bits)) continue;
344
336
  rb_ary_push(rfield_names, ID2SYM(fis->fields[i]->name));
345
337
  }
346
338
  return rfield_names;
@@ -1082,7 +1074,7 @@ static VALUE frb_iw_init(int argc, VALUE *argv, VALUE self) {
1082
1074
  TypedData_Get_Struct(rval, FrtFieldInfos, &frb_field_infos_t, fis);
1083
1075
  frt_index_create(store, fis);
1084
1076
  } else {
1085
- fis = frt_fis_new(FRT_STORE_YES, FRT_COMPRESSION_NONE, FRT_INDEX_YES, FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS);
1077
+ fis = frt_fis_new(FRT_FI_DEFAULTS_BM);
1086
1078
  frt_index_create(store, fis);
1087
1079
  frt_fis_deref(fis);
1088
1080
  }
@@ -1132,6 +1124,8 @@ frb_hash_to_doc_i(VALUE key, VALUE value, VALUE arg)
1132
1124
  if (key == Qundef) {
1133
1125
  return ST_CONTINUE;
1134
1126
  } else {
1127
+ int ex_code = 0;
1128
+ const char *msg = NULL;
1135
1129
  FrtDocument *doc = (FrtDocument *)arg;
1136
1130
  ID field = frb_field(key);
1137
1131
  VALUE val;
@@ -1162,7 +1156,15 @@ frb_hash_to_doc_i(VALUE key, VALUE value, VALUE arg)
1162
1156
  frt_df_add_data_len(df, rstrdup(val), RSTRING_LEN(val), rb_enc_get(val));
1163
1157
  break;
1164
1158
  }
1165
- frt_doc_add_field(doc, df);
1159
+ FRT_TRY
1160
+ frt_doc_add_field(doc, df);
1161
+ FRT_XCATCHALL
1162
+ ex_code = xcontext.excode;
1163
+ msg = xcontext.msg;
1164
+ FRT_HANDLED();
1165
+ FRT_XENDTRY
1166
+
1167
+ if (ex_code && msg) { frb_raise(ex_code, msg); }
1166
1168
  }
1167
1169
  return ST_CONTINUE;
1168
1170
  }
@@ -2293,7 +2295,7 @@ frb_ir_tk_fields(VALUE self)
2293
2295
  VALUE rfield_names = rb_ary_new();
2294
2296
  int i;
2295
2297
  for (i = 0; i < fis->size; i++) {
2296
- if (!fi_is_tokenized(fis->fields[i])) continue;
2298
+ if (!bits_is_tokenized(fis->fields[i]->bits)) continue;
2297
2299
  rb_ary_push(rfield_names, rb_str_new_cstr(rb_id2name(fis->fields[i]->name)));
2298
2300
  }
2299
2301
  return rfield_names;
@@ -2548,7 +2550,7 @@ static void Init_TVTerm(void) {
2548
2550
  * == Example
2549
2551
  *
2550
2552
  * tv = index_reader.term_vector(doc_id, :content)
2551
- * tv_term = tv.find {|tvt| tvt.term = "fox"}
2553
+ * tv_term = tv.find {|tvt| tvt.term == "fox"}
2552
2554
  *
2553
2555
  * # get the term frequency
2554
2556
  * term_freq = tv_term.positions.size
@@ -66,7 +66,8 @@ static inline int get_cp(char *start, char *end, int *cp_len, rb_encoding *enc)
66
66
 
67
67
  FrtToken *frt_tk_set(FrtToken *tk, char *text, int tlen, frt_off_t start, frt_off_t end, int pos_inc, rb_encoding *encoding) {
68
68
  if (tlen >= FRT_MAX_WORD_SIZE) {
69
- tlen = FRT_MAX_WORD_SIZE - 1; // TODO: this may invalidate mbc's
69
+ char *head_last = rb_enc_left_char_head(text, text + FRT_MAX_WORD_SIZE - 1, text + tlen, encoding);
70
+ tlen = head_last - text;
70
71
  }
71
72
 
72
73
  if (encoding == utf8_encoding) {
@@ -1031,9 +1032,9 @@ static FrtToken *stemf_next(FrtTokenStream *ts) {
1031
1032
  stemmed = sb_stemmer_stem(stemmer, (sb_symbol *)tk->text, tk->len);
1032
1033
  len = sb_stemmer_length(stemmer);
1033
1034
  if (len >= FRT_MAX_WORD_SIZE) {
1034
- len = FRT_MAX_WORD_SIZE - 1;
1035
+ char *head_last = rb_enc_left_char_head(tk->text, tk->text + FRT_MAX_WORD_SIZE - 1, tk->text + len, utf8_encoding);
1036
+ len = head_last - tk->text;
1035
1037
  }
1036
-
1037
1038
  memcpy(tk->text, stemmed, len);
1038
1039
  tk->text[len] = '\0';
1039
1040
  tk->len = len;
@@ -261,30 +261,30 @@ void frt_register_for_cleanup(void *p, frt_free_ft free_func) {
261
261
  void frt_init(int argc, const char *const argv[]) {
262
262
  atexit(&frt_hash_finalize);
263
263
 
264
- utf8_encoding = rb_enc_find("UTF-8");
264
+ utf8_encoding = rb_utf8_encoding();
265
265
  utf8_mbmaxlen = rb_enc_mbmaxlen(utf8_encoding);
266
266
  char *p = "'";
267
267
  cp_apostrophe = rb_enc_mbc_to_codepoint(p, p + 1, utf8_encoding);
268
- p = ".";
269
- cp_dot = rb_enc_mbc_to_codepoint(p, p + 1, utf8_encoding);
270
- p = ",";
271
- cp_comma = rb_enc_mbc_to_codepoint(p, p + 1, utf8_encoding);
272
- p = "\\";
273
- cp_backslash = rb_enc_mbc_to_codepoint(p, p + 1, utf8_encoding);
274
- p = "/";
275
- cp_slash = rb_enc_mbc_to_codepoint(p, p + 1, utf8_encoding);
276
- p = "_";
277
- cp_underscore = rb_enc_mbc_to_codepoint(p, p + 1, utf8_encoding);
278
- p = "-";
279
- cp_dash = rb_enc_mbc_to_codepoint(p, p + 1, utf8_encoding);
280
- p = "\u2010";
281
- cp_hyphen = rb_enc_mbc_to_codepoint(p, p + 1, utf8_encoding);
282
- p = "@";
283
- cp_at = rb_enc_mbc_to_codepoint(p, p + 1, utf8_encoding);
284
- p = "&";
285
- cp_ampersand = rb_enc_mbc_to_codepoint(p, p + 1, utf8_encoding);
286
- p = ":";
287
- cp_colon = rb_enc_mbc_to_codepoint(p, p + 1, utf8_encoding);
268
+ char *q = ".";
269
+ cp_dot = rb_enc_mbc_to_codepoint(q, q + 1, utf8_encoding);
270
+ char *r = ",";
271
+ cp_comma = rb_enc_mbc_to_codepoint(r, r + 1, utf8_encoding);
272
+ char *s = "\\";
273
+ cp_backslash = rb_enc_mbc_to_codepoint(s, s + 1, utf8_encoding);
274
+ char *t = "/";
275
+ cp_slash = rb_enc_mbc_to_codepoint(t, t + 1, utf8_encoding);
276
+ char *u = "_";
277
+ cp_underscore = rb_enc_mbc_to_codepoint(u, u + 1, utf8_encoding);
278
+ char *v = "-";
279
+ cp_dash = rb_enc_mbc_to_codepoint(v, v + 1, utf8_encoding);
280
+ char *w = "\u2010";
281
+ cp_hyphen = rb_enc_mbc_to_codepoint(w, w + 1, utf8_encoding);
282
+ char *x = "@";
283
+ cp_at = rb_enc_mbc_to_codepoint(x, x + 1, utf8_encoding);
284
+ char *y = "&";
285
+ cp_ampersand = rb_enc_mbc_to_codepoint(y, y + 1, utf8_encoding);
286
+ char *z = ":";
287
+ cp_colon = rb_enc_mbc_to_codepoint(z, z + 1, utf8_encoding);
288
288
 
289
289
  FRT_SORT_FIELD_SCORE = frt_sort_field_alloc();
290
290
  FRT_SORT_FIELD_SCORE->field_index_class = NULL; /* field_index_class */
@@ -13,7 +13,7 @@
13
13
 
14
14
  #define FRT_MAX_WORD_SIZE 255
15
15
  #define FRT_MAX_FILE_PATH 1024
16
- #define FRT_BUFFER_SIZE 16384
16
+ #define FRT_BUFFER_SIZE 4096
17
17
 
18
18
  typedef enum {
19
19
  FRT_COMPRESSION_NONE = 0,
@@ -54,7 +54,7 @@ FrtIndex *frt_index_new(FrtStore *store, FrtAnalyzer *analyzer, FrtHashSet *def_
54
54
  }
55
55
 
56
56
  if (create) {
57
- FrtFieldInfos *fis = frt_fis_new(FRT_STORE_YES, FRT_COMPRESSION_NONE, FRT_INDEX_YES, FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS);
57
+ FrtFieldInfos *fis = frt_fis_new(FRT_FI_DEFAULTS_BM);
58
58
  frt_index_create(self->store, fis);
59
59
  frt_fis_deref(fis);
60
60
  }