isomorfeus-ferret 0.14.1 → 0.14.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/isomorfeus_ferret_ext/frb_field_info.c +65 -55
- data/ext/isomorfeus_ferret_ext/frb_index.c +22 -20
- data/ext/isomorfeus_ferret_ext/frt_analysis.c +4 -3
- data/ext/isomorfeus_ferret_ext/frt_global.c +21 -21
- data/ext/isomorfeus_ferret_ext/frt_global.h +1 -1
- data/ext/isomorfeus_ferret_ext/frt_ind.c +1 -1
- data/ext/isomorfeus_ferret_ext/frt_index.c +77 -156
- data/ext/isomorfeus_ferret_ext/frt_index.h +24 -47
- data/ext/isomorfeus_ferret_ext/frt_q_span.c +2 -2
- data/ext/isomorfeus_ferret_ext/test_1710.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_fields.c +39 -39
- data/ext/isomorfeus_ferret_ext/test_file_deleter.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_filter.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_highlighter.c +1 -2
- data/ext/isomorfeus_ferret_ext/test_index.c +36 -36
- data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +2 -2
- data/ext/isomorfeus_ferret_ext/test_q_span.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_search.c +4 -4
- data/ext/isomorfeus_ferret_ext/test_segments.c +1 -4
- data/ext/isomorfeus_ferret_ext/test_sort.c +2 -2
- data/ext/isomorfeus_ferret_ext/test_term_vectors.c +6 -6
- data/ext/isomorfeus_ferret_ext/test_threading.c +2 -2
- data/lib/isomorfeus/ferret/index/index.rb +28 -0
- data/lib/isomorfeus/ferret/version.rb +1 -1
- data/lib/isomorfeus-ferret.rb +2 -1
- metadata +8 -15
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2edd7c4bc27bf507a9003988136eaaa475fed8943eb4fe6a083995d02a9015c1
|
4
|
+
data.tar.gz: d779442eb4d5c2ce83490c0a0a425230701d21fd535f4f9b30970752557888ac
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a3ac194cc68b44fe0600ed79a9016e1dbab3b511d6b341a48c8cee27610d95683488fc5dc6ee10ebe5ed537a7eba4a3f18aaa31c8d27b84d16efc51e4525724d
|
7
|
+
data.tar.gz: 5fc5ffae4692e5f710ff7c01ab8a5b0f465db7d03b6a1ce2bd5d0f6db6c6699fcdb52034c5143cb91699234fc1028140f3bd4d4b07d1f41de80ebf9836f1b4e2
|
@@ -19,57 +19,62 @@ static VALUE sym_with_positions_offsets;
|
|
19
19
|
|
20
20
|
extern VALUE sym_boost;
|
21
21
|
|
22
|
-
void frb_fi_get_params(VALUE roptions,
|
22
|
+
void frb_fi_get_params(VALUE roptions, unsigned int *bits, float *boost) {
|
23
23
|
VALUE v;
|
24
24
|
Check_Type(roptions, T_HASH);
|
25
25
|
v = rb_hash_aref(roptions, sym_boost);
|
26
|
-
if (Qnil != v)
|
27
|
-
|
28
|
-
|
29
|
-
*boost = 1.0f;
|
30
|
-
}
|
26
|
+
if (Qnil != v) *boost = (float)NUM2DBL(v);
|
27
|
+
else *boost = 1.0f;
|
28
|
+
|
31
29
|
v = rb_hash_aref(roptions, sym_store);
|
32
30
|
if (Qnil != v) Check_Type(v, T_SYMBOL);
|
33
31
|
if (v == sym_no || v == sym_false || v == Qfalse) {
|
34
|
-
*
|
32
|
+
*bits &= ~FRT_FI_IS_STORED_BM;
|
35
33
|
} else if (v == sym_yes || v == sym_true || v == Qtrue) {
|
36
|
-
*
|
34
|
+
*bits |= FRT_FI_IS_STORED_BM;
|
37
35
|
} else if (v == Qnil) {
|
38
36
|
/* leave as default */
|
39
37
|
} else {
|
40
|
-
rb_raise(rb_eArgError, ":%s isn't a valid argument for :store. Please choose from [:yes, :no]",
|
41
|
-
rb_id2name(SYM2ID(v)));
|
38
|
+
rb_raise(rb_eArgError, ":%s isn't a valid argument for :store. Please choose from [:yes, :no]", rb_id2name(SYM2ID(v)));
|
42
39
|
}
|
43
40
|
|
44
41
|
v = rb_hash_aref(roptions, sym_compression);
|
45
42
|
if (Qnil != v) Check_Type(v, T_SYMBOL);
|
46
43
|
if (v == sym_no || v == sym_false || v == Qfalse) {
|
47
|
-
*
|
44
|
+
*bits &= ~FRT_FI_IS_COMPRESSED_BM;
|
45
|
+
*bits &= ~FRT_FI_COMPRESSION_BROTLI_BM;
|
46
|
+
*bits &= ~FRT_FI_COMPRESSION_BZ2_BM;
|
47
|
+
*bits &= ~FRT_FI_COMPRESSION_LZ4_BM;
|
48
48
|
} else if (v == sym_yes || v == sym_true || v == Qtrue || v == sym_brotli) {
|
49
|
-
*
|
49
|
+
*bits |= FRT_FI_IS_COMPRESSED_BM | FRT_FI_COMPRESSION_BROTLI_BM;
|
50
50
|
} else if (v == sym_bz2) {
|
51
|
-
*
|
51
|
+
*bits |= FRT_FI_IS_COMPRESSED_BM | FRT_FI_COMPRESSION_BZ2_BM;
|
52
52
|
} else if (v == sym_lz4) {
|
53
|
-
*
|
53
|
+
*bits |= FRT_FI_IS_COMPRESSED_BM | FRT_FI_COMPRESSION_LZ4_BM;
|
54
54
|
} else if (v == Qnil) {
|
55
55
|
/* leave as default */
|
56
56
|
} else {
|
57
|
-
rb_raise(rb_eArgError, ":%s isn't a valid argument for :compression. Please choose from [:yes, :no, :brotli, :bz2, :lz4]",
|
58
|
-
rb_id2name(SYM2ID(v)));
|
57
|
+
rb_raise(rb_eArgError, ":%s isn't a valid argument for :compression. Please choose from [:yes, :no, :brotli, :bz2, :lz4]", rb_id2name(SYM2ID(v)));
|
59
58
|
}
|
60
59
|
|
61
60
|
v = rb_hash_aref(roptions, sym_index);
|
62
61
|
if (Qnil != v) Check_Type(v, T_SYMBOL);
|
63
62
|
if (v == sym_no || v == sym_false || v == Qfalse) {
|
64
|
-
*
|
63
|
+
*bits &= ~FRT_FI_IS_INDEXED_BM;
|
64
|
+
*bits &= ~FRT_FI_IS_TOKENIZED_BM;
|
65
|
+
*bits &= ~FRT_FI_OMIT_NORMS_BM;
|
65
66
|
} else if (v == sym_yes || v == sym_true || v == Qtrue) {
|
66
|
-
*
|
67
|
+
*bits |= FRT_FI_IS_INDEXED_BM | FRT_FI_IS_TOKENIZED_BM;
|
68
|
+
*bits &= ~FRT_FI_OMIT_NORMS_BM;
|
67
69
|
} else if (v == sym_untokenized) {
|
68
|
-
*
|
70
|
+
*bits |= FRT_FI_IS_INDEXED_BM;
|
71
|
+
*bits &= ~FRT_FI_IS_TOKENIZED_BM;
|
72
|
+
*bits &= ~FRT_FI_OMIT_NORMS_BM;
|
69
73
|
} else if (v == sym_omit_norms) {
|
70
|
-
*
|
74
|
+
*bits |= FRT_FI_IS_INDEXED_BM | FRT_FI_IS_TOKENIZED_BM | FRT_FI_OMIT_NORMS_BM;
|
71
75
|
} else if (v == sym_untokenized_omit_norms) {
|
72
|
-
*
|
76
|
+
*bits |= FRT_FI_IS_INDEXED_BM | FRT_FI_OMIT_NORMS_BM;
|
77
|
+
*bits &= ~FRT_FI_IS_TOKENIZED_BM;
|
73
78
|
} else if (v == Qnil) {
|
74
79
|
/* leave as default */
|
75
80
|
} else {
|
@@ -80,18 +85,28 @@ void frb_fi_get_params(VALUE roptions, FrtStoreValue *store, FrtCompressionType
|
|
80
85
|
v = rb_hash_aref(roptions, sym_term_vector);
|
81
86
|
if (Qnil != v) Check_Type(v, T_SYMBOL);
|
82
87
|
if (v == sym_no || v == sym_false || v == Qfalse) {
|
83
|
-
*
|
88
|
+
*bits &= ~FRT_FI_STORE_TERM_VECTOR_BM;
|
89
|
+
*bits &= ~FRT_FI_STORE_POSITIONS_BM;
|
90
|
+
*bits &= ~FRT_FI_STORE_OFFSETS_BM;
|
84
91
|
} else if (v == sym_yes || v == sym_true || v == Qtrue) {
|
85
|
-
*
|
92
|
+
*bits |= FRT_FI_STORE_TERM_VECTOR_BM;
|
93
|
+
*bits &= ~FRT_FI_STORE_POSITIONS_BM;
|
94
|
+
*bits &= ~FRT_FI_STORE_OFFSETS_BM;
|
86
95
|
} else if (v == sym_with_positions) {
|
87
|
-
*
|
96
|
+
*bits |= FRT_FI_STORE_TERM_VECTOR_BM | FRT_FI_STORE_POSITIONS_BM;
|
97
|
+
*bits &= ~FRT_FI_STORE_OFFSETS_BM;
|
88
98
|
} else if (v == sym_with_offsets) {
|
89
|
-
*
|
99
|
+
*bits |= FRT_FI_STORE_TERM_VECTOR_BM | FRT_FI_STORE_OFFSETS_BM;
|
100
|
+
*bits &= ~FRT_FI_STORE_POSITIONS_BM;
|
90
101
|
} else if (v == sym_with_positions_offsets) {
|
91
|
-
*
|
102
|
+
*bits |= FRT_FI_STORE_TERM_VECTOR_BM | FRT_FI_STORE_POSITIONS_BM | FRT_FI_STORE_OFFSETS_BM;
|
92
103
|
} else if (v == Qnil) {
|
93
104
|
/* leave as default */
|
94
|
-
if (*
|
105
|
+
if ((*bits & FRT_FI_IS_INDEXED_BM) == 0) {
|
106
|
+
*bits &= ~FRT_FI_STORE_TERM_VECTOR_BM;
|
107
|
+
*bits &= ~FRT_FI_STORE_POSITIONS_BM;
|
108
|
+
*bits &= ~FRT_FI_STORE_OFFSETS_BM;
|
109
|
+
}
|
95
110
|
} else {
|
96
111
|
rb_raise(rb_eArgError, ":%s isn't a valid argument for :term_vector. Please choose from [:no, :yes, "
|
97
112
|
":with_positions, :with_offsets, :with_positions_offsets]", rb_id2name(SYM2ID(v)));
|
@@ -150,17 +165,12 @@ static VALUE frb_fi_init(int argc, VALUE *argv, VALUE self) {
|
|
150
165
|
VALUE roptions, rname;
|
151
166
|
FrtFieldInfo *fi;
|
152
167
|
TypedData_Get_Struct(self, FrtFieldInfo, &frb_field_info_t, fi);
|
153
|
-
|
154
|
-
FrtCompressionType compression = FRT_COMPRESSION_NONE;
|
155
|
-
FrtIndexValue index = FRT_INDEX_YES;
|
156
|
-
FrtTermVectorValue term_vector = FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS;
|
168
|
+
unsigned int bits = FRT_FI_DEFAULTS_BM;
|
157
169
|
float boost = 1.0f;
|
158
170
|
|
159
171
|
rb_scan_args(argc, argv, "11", &rname, &roptions);
|
160
|
-
if (argc > 1)
|
161
|
-
|
162
|
-
}
|
163
|
-
fi = frt_fi_init(fi, frb_field(rname), store, compression, index, term_vector);
|
172
|
+
if (argc > 1) frb_fi_get_params(roptions, &bits, &boost);
|
173
|
+
fi = frt_fi_init(fi, frb_field(rname), bits);
|
164
174
|
fi->boost = boost;
|
165
175
|
fi->rfi = self;
|
166
176
|
return self;
|
@@ -185,7 +195,7 @@ static VALUE frb_fi_name(VALUE self) {
|
|
185
195
|
*/
|
186
196
|
static VALUE frb_fi_is_stored(VALUE self) {
|
187
197
|
FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
|
188
|
-
return
|
198
|
+
return bits_is_stored(fi->bits) ? Qtrue : Qfalse;
|
189
199
|
}
|
190
200
|
|
191
201
|
/*
|
@@ -196,7 +206,7 @@ static VALUE frb_fi_is_stored(VALUE self) {
|
|
196
206
|
*/
|
197
207
|
static VALUE frb_fi_is_compressed(VALUE self) {
|
198
208
|
FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
|
199
|
-
return
|
209
|
+
return bits_is_compressed(fi->bits) ? Qtrue : Qfalse;
|
200
210
|
}
|
201
211
|
|
202
212
|
/*
|
@@ -207,7 +217,7 @@ static VALUE frb_fi_is_compressed(VALUE self) {
|
|
207
217
|
*/
|
208
218
|
static VALUE frb_fi_is_indexed(VALUE self) {
|
209
219
|
FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
|
210
|
-
return
|
220
|
+
return bits_is_indexed(fi->bits) ? Qtrue : Qfalse;
|
211
221
|
}
|
212
222
|
|
213
223
|
/*
|
@@ -223,7 +233,7 @@ static VALUE frb_fi_is_indexed(VALUE self) {
|
|
223
233
|
*/
|
224
234
|
static VALUE frb_fi_is_tokenized(VALUE self) {
|
225
235
|
FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
|
226
|
-
return
|
236
|
+
return bits_is_tokenized(fi->bits) ? Qtrue : Qfalse;
|
227
237
|
}
|
228
238
|
|
229
239
|
/*
|
@@ -239,7 +249,7 @@ static VALUE frb_fi_is_tokenized(VALUE self) {
|
|
239
249
|
*/
|
240
250
|
static VALUE frb_fi_omit_norms(VALUE self) {
|
241
251
|
FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
|
242
|
-
return
|
252
|
+
return bits_omit_norms(fi->bits) ? Qtrue : Qfalse;
|
243
253
|
}
|
244
254
|
|
245
255
|
/*
|
@@ -250,7 +260,7 @@ static VALUE frb_fi_omit_norms(VALUE self) {
|
|
250
260
|
*/
|
251
261
|
static VALUE frb_fi_store_term_vector(VALUE self) {
|
252
262
|
FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
|
253
|
-
return
|
263
|
+
return bits_store_term_vector(fi->bits) ? Qtrue : Qfalse;
|
254
264
|
}
|
255
265
|
|
256
266
|
/*
|
@@ -261,7 +271,7 @@ static VALUE frb_fi_store_term_vector(VALUE self) {
|
|
261
271
|
*/
|
262
272
|
static VALUE frb_fi_store_positions(VALUE self) {
|
263
273
|
FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
|
264
|
-
return
|
274
|
+
return bits_store_positions(fi->bits) ? Qtrue : Qfalse;
|
265
275
|
}
|
266
276
|
|
267
277
|
/*
|
@@ -272,7 +282,7 @@ static VALUE frb_fi_store_positions(VALUE self) {
|
|
272
282
|
*/
|
273
283
|
static VALUE frb_fi_store_offsets(VALUE self) {
|
274
284
|
FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
|
275
|
-
return
|
285
|
+
return bits_store_offsets(fi->bits) ? Qtrue : Qfalse;
|
276
286
|
}
|
277
287
|
|
278
288
|
/*
|
@@ -285,7 +295,7 @@ static VALUE frb_fi_store_offsets(VALUE self) {
|
|
285
295
|
*/
|
286
296
|
static VALUE frb_fi_has_norms(VALUE self) {
|
287
297
|
FrtFieldInfo *fi = (FrtFieldInfo *)DATA_PTR(self);
|
288
|
-
return
|
298
|
+
return bits_has_norms(fi->bits) ? Qtrue : Qfalse;
|
289
299
|
}
|
290
300
|
|
291
301
|
/*
|
@@ -326,10 +336,10 @@ static VALUE frb_fi_to_h(VALUE self) {
|
|
326
336
|
bool o;
|
327
337
|
|
328
338
|
// :index
|
329
|
-
if (!
|
339
|
+
if (!bits_is_indexed(fi->bits)) val = sym_no;
|
330
340
|
else {
|
331
|
-
bool t =
|
332
|
-
o =
|
341
|
+
bool t = bits_is_tokenized(fi->bits);
|
342
|
+
o = bits_omit_norms(fi->bits);
|
333
343
|
if (!t && o) val = sym_untokenized_omit_norms;
|
334
344
|
else if (t && o) val = sym_omit_norms;
|
335
345
|
else if (!t && !o) val = sym_untokenized;
|
@@ -338,23 +348,23 @@ static VALUE frb_fi_to_h(VALUE self) {
|
|
338
348
|
rb_hash_aset(hash, sym_index, val);
|
339
349
|
|
340
350
|
// :store
|
341
|
-
rb_hash_aset(hash, sym_store,
|
351
|
+
rb_hash_aset(hash, sym_store, bits_is_stored(fi->bits) ? sym_yes : sym_no);
|
342
352
|
|
343
353
|
// :compress
|
344
|
-
if (!
|
354
|
+
if (!bits_is_compressed(fi->bits)) val = sym_no;
|
345
355
|
else {
|
346
|
-
if (
|
347
|
-
else if (
|
348
|
-
else if (
|
356
|
+
if (bits_is_compressed_brotli(fi->bits)) val = sym_brotli;
|
357
|
+
else if (bits_is_compressed_bz2(fi->bits)) val = sym_bz2;
|
358
|
+
else if (bits_is_compressed_lz4(fi->bits)) val = sym_lz4;
|
349
359
|
else val = sym_yes;
|
350
360
|
}
|
351
361
|
rb_hash_aset(hash, sym_compression, val);
|
352
362
|
|
353
363
|
// :term_vector
|
354
|
-
if (!
|
364
|
+
if (!bits_store_term_vector(fi->bits)) val = sym_no;
|
355
365
|
else {
|
356
|
-
bool p =
|
357
|
-
o =
|
366
|
+
bool p = bits_store_positions(fi->bits);
|
367
|
+
o = bits_store_offsets(fi->bits);
|
358
368
|
if (p && o) val = sym_with_positions_offsets;
|
359
369
|
else if (o) val = sym_with_offsets;
|
360
370
|
else if (p) val = sym_with_positions;
|
@@ -40,7 +40,7 @@ static ID id_boost;
|
|
40
40
|
|
41
41
|
extern VALUE sym_each;
|
42
42
|
extern rb_encoding *utf8_encoding;
|
43
|
-
extern void frb_fi_get_params(VALUE roptions,
|
43
|
+
extern void frb_fi_get_params(VALUE roptions, unsigned int *bits, float *boost);
|
44
44
|
extern FrtAnalyzer *frb_get_cwrapped_analyzer(VALUE ranalyzer);
|
45
45
|
extern VALUE frb_get_analyzer(FrtAnalyzer *a);
|
46
46
|
extern VALUE frb_get_field_info(FrtFieldInfo *fi);
|
@@ -119,17 +119,12 @@ static VALUE frb_fis_init(int argc, VALUE *argv, VALUE self) {
|
|
119
119
|
VALUE roptions;
|
120
120
|
FrtFieldInfos *fis;
|
121
121
|
TypedData_Get_Struct(self, FrtFieldInfos, &frb_field_infos_t, fis);
|
122
|
-
|
123
|
-
FrtCompressionType compression = FRT_COMPRESSION_NONE;
|
124
|
-
FrtIndexValue index = FRT_INDEX_YES;
|
125
|
-
FrtTermVectorValue term_vector = FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS;
|
122
|
+
unsigned int bits = FRT_FI_DEFAULTS_BM;
|
126
123
|
float boost;
|
127
124
|
|
128
125
|
rb_scan_args(argc, argv, "01", &roptions);
|
129
|
-
if (argc > 0)
|
130
|
-
|
131
|
-
}
|
132
|
-
fis = frt_fis_init(fis, store, compression, index, term_vector);
|
126
|
+
if (argc > 0) frb_fi_get_params(roptions, &bits, &boost);
|
127
|
+
fis = frt_fis_init(fis, bits);
|
133
128
|
fis->rfis = self;
|
134
129
|
return self;
|
135
130
|
}
|
@@ -218,18 +213,15 @@ frb_fis_add_field(int argc, VALUE *argv, VALUE self)
|
|
218
213
|
{
|
219
214
|
FrtFieldInfos *fis = (FrtFieldInfos *)DATA_PTR(self);
|
220
215
|
FrtFieldInfo *fi;
|
221
|
-
|
222
|
-
FrtCompressionType compression = fis->compression;
|
223
|
-
FrtIndexValue index = fis->index;
|
224
|
-
FrtTermVectorValue term_vector = fis->term_vector;
|
216
|
+
unsigned int bits = fis->bits;
|
225
217
|
float boost = 1.0f;
|
226
218
|
VALUE rname, roptions;
|
227
219
|
|
228
220
|
rb_scan_args(argc, argv, "11", &rname, &roptions);
|
229
221
|
if (argc > 1) {
|
230
|
-
frb_fi_get_params(roptions, &
|
222
|
+
frb_fi_get_params(roptions, &bits, &boost);
|
231
223
|
}
|
232
|
-
fi = frt_fi_new(frb_field(rname),
|
224
|
+
fi = frt_fi_new(frb_field(rname), bits);
|
233
225
|
fi->boost = boost;
|
234
226
|
frt_fis_add_field(fis, fi);
|
235
227
|
return self;
|
@@ -340,7 +332,7 @@ frb_fis_get_tk_fields(VALUE self)
|
|
340
332
|
VALUE rfield_names = rb_ary_new();
|
341
333
|
int i;
|
342
334
|
for (i = 0; i < fis->size; i++) {
|
343
|
-
if (!
|
335
|
+
if (!bits_is_tokenized(fis->fields[i]->bits)) continue;
|
344
336
|
rb_ary_push(rfield_names, ID2SYM(fis->fields[i]->name));
|
345
337
|
}
|
346
338
|
return rfield_names;
|
@@ -1082,7 +1074,7 @@ static VALUE frb_iw_init(int argc, VALUE *argv, VALUE self) {
|
|
1082
1074
|
TypedData_Get_Struct(rval, FrtFieldInfos, &frb_field_infos_t, fis);
|
1083
1075
|
frt_index_create(store, fis);
|
1084
1076
|
} else {
|
1085
|
-
fis = frt_fis_new(
|
1077
|
+
fis = frt_fis_new(FRT_FI_DEFAULTS_BM);
|
1086
1078
|
frt_index_create(store, fis);
|
1087
1079
|
frt_fis_deref(fis);
|
1088
1080
|
}
|
@@ -1132,6 +1124,8 @@ frb_hash_to_doc_i(VALUE key, VALUE value, VALUE arg)
|
|
1132
1124
|
if (key == Qundef) {
|
1133
1125
|
return ST_CONTINUE;
|
1134
1126
|
} else {
|
1127
|
+
int ex_code = 0;
|
1128
|
+
const char *msg = NULL;
|
1135
1129
|
FrtDocument *doc = (FrtDocument *)arg;
|
1136
1130
|
ID field = frb_field(key);
|
1137
1131
|
VALUE val;
|
@@ -1162,7 +1156,15 @@ frb_hash_to_doc_i(VALUE key, VALUE value, VALUE arg)
|
|
1162
1156
|
frt_df_add_data_len(df, rstrdup(val), RSTRING_LEN(val), rb_enc_get(val));
|
1163
1157
|
break;
|
1164
1158
|
}
|
1165
|
-
|
1159
|
+
FRT_TRY
|
1160
|
+
frt_doc_add_field(doc, df);
|
1161
|
+
FRT_XCATCHALL
|
1162
|
+
ex_code = xcontext.excode;
|
1163
|
+
msg = xcontext.msg;
|
1164
|
+
FRT_HANDLED();
|
1165
|
+
FRT_XENDTRY
|
1166
|
+
|
1167
|
+
if (ex_code && msg) { frb_raise(ex_code, msg); }
|
1166
1168
|
}
|
1167
1169
|
return ST_CONTINUE;
|
1168
1170
|
}
|
@@ -2293,7 +2295,7 @@ frb_ir_tk_fields(VALUE self)
|
|
2293
2295
|
VALUE rfield_names = rb_ary_new();
|
2294
2296
|
int i;
|
2295
2297
|
for (i = 0; i < fis->size; i++) {
|
2296
|
-
if (!
|
2298
|
+
if (!bits_is_tokenized(fis->fields[i]->bits)) continue;
|
2297
2299
|
rb_ary_push(rfield_names, rb_str_new_cstr(rb_id2name(fis->fields[i]->name)));
|
2298
2300
|
}
|
2299
2301
|
return rfield_names;
|
@@ -2548,7 +2550,7 @@ static void Init_TVTerm(void) {
|
|
2548
2550
|
* == Example
|
2549
2551
|
*
|
2550
2552
|
* tv = index_reader.term_vector(doc_id, :content)
|
2551
|
-
* tv_term = tv.find {|tvt| tvt.term
|
2553
|
+
* tv_term = tv.find {|tvt| tvt.term == "fox"}
|
2552
2554
|
*
|
2553
2555
|
* # get the term frequency
|
2554
2556
|
* term_freq = tv_term.positions.size
|
@@ -66,7 +66,8 @@ static inline int get_cp(char *start, char *end, int *cp_len, rb_encoding *enc)
|
|
66
66
|
|
67
67
|
FrtToken *frt_tk_set(FrtToken *tk, char *text, int tlen, frt_off_t start, frt_off_t end, int pos_inc, rb_encoding *encoding) {
|
68
68
|
if (tlen >= FRT_MAX_WORD_SIZE) {
|
69
|
-
|
69
|
+
char *head_last = rb_enc_left_char_head(text, text + FRT_MAX_WORD_SIZE - 1, text + tlen, encoding);
|
70
|
+
tlen = head_last - text;
|
70
71
|
}
|
71
72
|
|
72
73
|
if (encoding == utf8_encoding) {
|
@@ -1031,9 +1032,9 @@ static FrtToken *stemf_next(FrtTokenStream *ts) {
|
|
1031
1032
|
stemmed = sb_stemmer_stem(stemmer, (sb_symbol *)tk->text, tk->len);
|
1032
1033
|
len = sb_stemmer_length(stemmer);
|
1033
1034
|
if (len >= FRT_MAX_WORD_SIZE) {
|
1034
|
-
|
1035
|
+
char *head_last = rb_enc_left_char_head(tk->text, tk->text + FRT_MAX_WORD_SIZE - 1, tk->text + len, utf8_encoding);
|
1036
|
+
len = head_last - tk->text;
|
1035
1037
|
}
|
1036
|
-
|
1037
1038
|
memcpy(tk->text, stemmed, len);
|
1038
1039
|
tk->text[len] = '\0';
|
1039
1040
|
tk->len = len;
|
@@ -261,30 +261,30 @@ void frt_register_for_cleanup(void *p, frt_free_ft free_func) {
|
|
261
261
|
void frt_init(int argc, const char *const argv[]) {
|
262
262
|
atexit(&frt_hash_finalize);
|
263
263
|
|
264
|
-
utf8_encoding =
|
264
|
+
utf8_encoding = rb_utf8_encoding();
|
265
265
|
utf8_mbmaxlen = rb_enc_mbmaxlen(utf8_encoding);
|
266
266
|
char *p = "'";
|
267
267
|
cp_apostrophe = rb_enc_mbc_to_codepoint(p, p + 1, utf8_encoding);
|
268
|
-
|
269
|
-
cp_dot = rb_enc_mbc_to_codepoint(
|
270
|
-
|
271
|
-
cp_comma = rb_enc_mbc_to_codepoint(
|
272
|
-
|
273
|
-
cp_backslash = rb_enc_mbc_to_codepoint(
|
274
|
-
|
275
|
-
cp_slash = rb_enc_mbc_to_codepoint(
|
276
|
-
|
277
|
-
cp_underscore = rb_enc_mbc_to_codepoint(
|
278
|
-
|
279
|
-
cp_dash = rb_enc_mbc_to_codepoint(
|
280
|
-
|
281
|
-
cp_hyphen = rb_enc_mbc_to_codepoint(
|
282
|
-
|
283
|
-
cp_at = rb_enc_mbc_to_codepoint(
|
284
|
-
|
285
|
-
cp_ampersand = rb_enc_mbc_to_codepoint(
|
286
|
-
|
287
|
-
cp_colon = rb_enc_mbc_to_codepoint(
|
268
|
+
char *q = ".";
|
269
|
+
cp_dot = rb_enc_mbc_to_codepoint(q, q + 1, utf8_encoding);
|
270
|
+
char *r = ",";
|
271
|
+
cp_comma = rb_enc_mbc_to_codepoint(r, r + 1, utf8_encoding);
|
272
|
+
char *s = "\\";
|
273
|
+
cp_backslash = rb_enc_mbc_to_codepoint(s, s + 1, utf8_encoding);
|
274
|
+
char *t = "/";
|
275
|
+
cp_slash = rb_enc_mbc_to_codepoint(t, t + 1, utf8_encoding);
|
276
|
+
char *u = "_";
|
277
|
+
cp_underscore = rb_enc_mbc_to_codepoint(u, u + 1, utf8_encoding);
|
278
|
+
char *v = "-";
|
279
|
+
cp_dash = rb_enc_mbc_to_codepoint(v, v + 1, utf8_encoding);
|
280
|
+
char *w = "\u2010";
|
281
|
+
cp_hyphen = rb_enc_mbc_to_codepoint(w, w + 1, utf8_encoding);
|
282
|
+
char *x = "@";
|
283
|
+
cp_at = rb_enc_mbc_to_codepoint(x, x + 1, utf8_encoding);
|
284
|
+
char *y = "&";
|
285
|
+
cp_ampersand = rb_enc_mbc_to_codepoint(y, y + 1, utf8_encoding);
|
286
|
+
char *z = ":";
|
287
|
+
cp_colon = rb_enc_mbc_to_codepoint(z, z + 1, utf8_encoding);
|
288
288
|
|
289
289
|
FRT_SORT_FIELD_SCORE = frt_sort_field_alloc();
|
290
290
|
FRT_SORT_FIELD_SCORE->field_index_class = NULL; /* field_index_class */
|
@@ -54,7 +54,7 @@ FrtIndex *frt_index_new(FrtStore *store, FrtAnalyzer *analyzer, FrtHashSet *def_
|
|
54
54
|
}
|
55
55
|
|
56
56
|
if (create) {
|
57
|
-
FrtFieldInfos *fis = frt_fis_new(
|
57
|
+
FrtFieldInfos *fis = frt_fis_new(FRT_FI_DEFAULTS_BM);
|
58
58
|
frt_index_create(self->store, fis);
|
59
59
|
frt_fis_deref(fis);
|
60
60
|
}
|