ferret 0.10.14 → 0.11.0
Sign up to get free protection for your applications and to get access to all the features.
- data/TODO +3 -0
- data/ext/analysis.c +5 -0
- data/ext/compound_io.c +46 -24
- data/ext/except.c +14 -0
- data/ext/except.h +29 -17
- data/ext/ferret.c +22 -1
- data/ext/ferret.h +2 -1
- data/ext/fs_store.c +9 -12
- data/ext/global.c +80 -0
- data/ext/global.h +10 -0
- data/ext/hash.c +0 -7
- data/ext/hash.h +0 -8
- data/ext/index.c +1289 -625
- data/ext/index.h +59 -14
- data/ext/q_boolean.c +12 -5
- data/ext/q_parser.c +570 -372
- data/ext/r_analysis.c +16 -16
- data/ext/r_index.c +41 -43
- data/ext/r_qparser.c +37 -36
- data/ext/r_search.c +10 -10
- data/ext/r_store.c +7 -7
- data/ext/ram_store.c +4 -3
- data/ext/search.c +3 -2
- data/ext/store.c +35 -19
- data/ext/store.h +3 -5
- data/lib/ferret/index.rb +4 -4
- data/lib/ferret_version.rb +1 -1
- data/test/threading/thread_safety_read_write_test.rb +76 -0
- data/test/threading/thread_safety_test.rb +17 -21
- data/test/unit/index/tc_index.rb +6 -2
- data/test/unit/index/tc_index_writer.rb +2 -2
- data/test/unit/query_parser/tc_query_parser.rb +20 -5
- data/test/unit/search/tc_index_searcher.rb +3 -1
- data/test/unit/search/tm_searcher.rb +3 -1
- metadata +3 -2
data/ext/r_analysis.c
CHANGED
@@ -73,7 +73,7 @@ get_stopwords(VALUE rstop_words)
|
|
73
73
|
stop_words[len] = NULL;
|
74
74
|
for (i = 0; i < len; i++) {
|
75
75
|
rstr = rb_obj_as_string(RARRAY(rstop_words)->ptr[i]);
|
76
|
-
stop_words[i] =
|
76
|
+
stop_words[i] = rs2s(rstr);
|
77
77
|
}
|
78
78
|
return stop_words;
|
79
79
|
}
|
@@ -131,7 +131,7 @@ frt_set_token(Token *tk, VALUE rt)
|
|
131
131
|
if (rt == Qnil) return NULL;
|
132
132
|
|
133
133
|
Data_Get_Struct(rt, RToken, rtk);
|
134
|
-
tk_set(tk,
|
134
|
+
tk_set(tk, rs2s(rtk->text), RSTRING(rtk->text)->len,
|
135
135
|
rtk->start, rtk->end, rtk->pos_inc);
|
136
136
|
return tk;
|
137
137
|
}
|
@@ -216,7 +216,7 @@ frt_token_cmp(VALUE self, VALUE rother)
|
|
216
216
|
} else if (token->end < other->end) {
|
217
217
|
cmp = -1;
|
218
218
|
} else {
|
219
|
-
cmp = strcmp(
|
219
|
+
cmp = strcmp(rs2s(token->text), rs2s(other->text));
|
220
220
|
}
|
221
221
|
}
|
222
222
|
return INT2FIX(cmp);
|
@@ -372,7 +372,7 @@ frt_token_to_s(VALUE self)
|
|
372
372
|
char *buf;
|
373
373
|
GET_TK(token, self);
|
374
374
|
buf = alloca(RSTRING(token->text)->len + 80);
|
375
|
-
sprintf(buf, "token[\"%s\":%d:%d:%d]",
|
375
|
+
sprintf(buf, "token[\"%s\":%d:%d:%d]", rs2s(token->text),
|
376
376
|
token->start, token->end, token->pos_inc);
|
377
377
|
return rb_str_new2(buf);
|
378
378
|
}
|
@@ -427,7 +427,7 @@ static inline VALUE
|
|
427
427
|
get_wrapped_ts(VALUE self, VALUE rstr, TokenStream *ts)
|
428
428
|
{
|
429
429
|
StringValue(rstr);
|
430
|
-
ts->reset(ts,
|
430
|
+
ts->reset(ts, rs2s(rstr));
|
431
431
|
Frt_Wrap_Struct(self, &frt_ts_mark, &frt_ts_free, ts);
|
432
432
|
object_add(&ts->text, rstr);
|
433
433
|
object_add(ts, self);
|
@@ -449,7 +449,7 @@ frt_ts_set_text(VALUE self, VALUE rtext)
|
|
449
449
|
TokenStream *ts;
|
450
450
|
Data_Get_Struct(self, TokenStream, ts);
|
451
451
|
StringValue(rtext);
|
452
|
-
ts->reset(ts,
|
452
|
+
ts->reset(ts, rs2s(rtext));
|
453
453
|
object_set(&ts->text, rtext);
|
454
454
|
|
455
455
|
return rtext;
|
@@ -703,7 +703,7 @@ rets_next(TokenStream *ts)
|
|
703
703
|
} else {
|
704
704
|
VALUE rtok = rb_str_new(rtext->ptr + beg, end - beg);
|
705
705
|
rtok = rb_funcall(RETS(ts)->proc, id_call, 1, rtok);
|
706
|
-
return tk_set(&(CachedTS(ts)->token),
|
706
|
+
return tk_set(&(CachedTS(ts)->token), rs2s(rtok),
|
707
707
|
RSTRING(rtok)->len, beg, end, 1);
|
708
708
|
}
|
709
709
|
}
|
@@ -977,7 +977,7 @@ static __inline void frt_add_mapping_i(TokenStream *mf, VALUE from, char *to)
|
|
977
977
|
{
|
978
978
|
switch (TYPE(from)) {
|
979
979
|
case T_STRING:
|
980
|
-
mapping_filter_add(mf,
|
980
|
+
mapping_filter_add(mf, rs2s(from), to);
|
981
981
|
break;
|
982
982
|
case T_SYMBOL:
|
983
983
|
mapping_filter_add(mf, rb_id2name(SYM2ID(from)), to);
|
@@ -985,7 +985,7 @@ static __inline void frt_add_mapping_i(TokenStream *mf, VALUE from, char *to)
|
|
985
985
|
default:
|
986
986
|
rb_raise(rb_eArgError,
|
987
987
|
"cannot map from %s with MappingFilter",
|
988
|
-
|
988
|
+
rs2s(rb_obj_as_string(from)));
|
989
989
|
break;
|
990
990
|
}
|
991
991
|
}
|
@@ -999,7 +999,7 @@ static int frt_add_mappings_i(VALUE key, VALUE value, VALUE arg)
|
|
999
999
|
char *to;
|
1000
1000
|
switch (TYPE(value)) {
|
1001
1001
|
case T_STRING:
|
1002
|
-
to =
|
1002
|
+
to = rs2s(value);
|
1003
1003
|
break;
|
1004
1004
|
case T_SYMBOL:
|
1005
1005
|
to = rb_id2name(SYM2ID(value));
|
@@ -1007,7 +1007,7 @@ static int frt_add_mappings_i(VALUE key, VALUE value, VALUE arg)
|
|
1007
1007
|
default:
|
1008
1008
|
rb_raise(rb_eArgError,
|
1009
1009
|
"cannot map to %s with MappingFilter",
|
1010
|
-
|
1010
|
+
rs2s(rb_obj_as_string(key)));
|
1011
1011
|
break;
|
1012
1012
|
}
|
1013
1013
|
if (TYPE(key) == T_ARRAY) {
|
@@ -1088,8 +1088,8 @@ frt_stem_filter_init(int argc, VALUE *argv, VALUE self)
|
|
1088
1088
|
rb_scan_args(argc, argv, "12", &rsub_ts, &ralgorithm, &rcharenc);
|
1089
1089
|
ts = frt_get_cwrapped_rts(rsub_ts);
|
1090
1090
|
switch (argc) {
|
1091
|
-
case 3: charenc =
|
1092
|
-
case 2: algorithm =
|
1091
|
+
case 3: charenc = rs2s(rb_obj_as_string(rcharenc));
|
1092
|
+
case 2: algorithm = rs2s(rb_obj_as_string(ralgorithm));
|
1093
1093
|
}
|
1094
1094
|
ts = stem_filter_new(ts, algorithm, charenc);
|
1095
1095
|
object_add(&(TkFilt(ts)->sub_ts), rsub_ts);
|
@@ -1198,7 +1198,7 @@ frt_analyzer_token_stream(VALUE self, VALUE rfield, VALUE rstring)
|
|
1198
1198
|
|
1199
1199
|
StringValue(rstring);
|
1200
1200
|
|
1201
|
-
ts = a_get_ts(a, frt_field(rfield),
|
1201
|
+
ts = a_get_ts(a, frt_field(rfield), rs2s(rstring));
|
1202
1202
|
|
1203
1203
|
/* Make sure that there is no entry already */
|
1204
1204
|
object_set(&ts->text, rstring);
|
@@ -1503,7 +1503,7 @@ frt_re_analyzer_token_stream(VALUE self, VALUE rfield, VALUE rtext)
|
|
1503
1503
|
|
1504
1504
|
StringValue(rtext);
|
1505
1505
|
|
1506
|
-
ts = a_get_ts(a, frt_field(rfield),
|
1506
|
+
ts = a_get_ts(a, frt_field(rfield), rs2s(rtext));
|
1507
1507
|
|
1508
1508
|
/* Make sure that there is no entry already */
|
1509
1509
|
object_set(&ts->text, rtext);
|
@@ -1546,7 +1546,7 @@ static VALUE frt_get_locale(VALUE self, VALUE locale)
|
|
1546
1546
|
*/
|
1547
1547
|
static VALUE frt_set_locale(VALUE self, VALUE locale)
|
1548
1548
|
{
|
1549
|
-
char *l = ((locale == Qnil) ? NULL :
|
1549
|
+
char *l = ((locale == Qnil) ? NULL : rs2s(rb_obj_as_string(locale)));
|
1550
1550
|
frt_locale = setlocale(LC_CTYPE, l);
|
1551
1551
|
return frt_locale ? rb_str_new2(frt_locale) : Qnil;
|
1552
1552
|
}
|
data/ext/r_index.c
CHANGED
@@ -479,7 +479,7 @@ frt_fis_get(VALUE self, VALUE ridx)
|
|
479
479
|
break;
|
480
480
|
default:
|
481
481
|
rb_raise(rb_eArgError, "Can't index FieldInfos with %s",
|
482
|
-
|
482
|
+
rs2s(rb_obj_as_string(ridx)));
|
483
483
|
break;
|
484
484
|
}
|
485
485
|
return rfi;
|
@@ -599,7 +599,7 @@ frt_fis_create_index(VALUE self, VALUE rdir)
|
|
599
599
|
} else {
|
600
600
|
StringValue(rdir);
|
601
601
|
frt_create_dir(rdir);
|
602
|
-
store = open_fs_store(
|
602
|
+
store = open_fs_store(rs2s(rdir));
|
603
603
|
}
|
604
604
|
index_create(store, fis);
|
605
605
|
store_deref(store);
|
@@ -753,6 +753,7 @@ frt_te_each(VALUE self)
|
|
753
753
|
int term_cnt = 0;
|
754
754
|
VALUE vals = rb_ary_new2(2);
|
755
755
|
RARRAY(vals)->len = 2;
|
756
|
+
rb_mem_clear(RARRAY(vals)->ptr, 2);
|
756
757
|
|
757
758
|
|
758
759
|
/* each is being called so there will be no current term */
|
@@ -947,6 +948,7 @@ frt_tde_each(VALUE self)
|
|
947
948
|
TermDocEnum *tde = (TermDocEnum *)DATA_PTR(self);
|
948
949
|
VALUE vals = rb_ary_new2(2);
|
949
950
|
RARRAY(vals)->len = 2;
|
951
|
+
rb_mem_clear(RARRAY(vals)->ptr, 2);
|
950
952
|
|
951
953
|
while (tde->next(tde)) {
|
952
954
|
doc_cnt++;
|
@@ -1038,10 +1040,10 @@ frt_get_tv_term(TVTerm *tv_term)
|
|
1038
1040
|
int *positions = tv_term->positions;
|
1039
1041
|
rpositions = rb_ary_new2(freq);
|
1040
1042
|
rpos = RARRAY(rpositions)->ptr;
|
1041
|
-
RARRAY(rpositions)->len = freq;
|
1042
1043
|
for (i = 0; i < freq; i++) {
|
1043
1044
|
rpos[i] = INT2FIX(positions[i]);
|
1044
1045
|
}
|
1046
|
+
RARRAY(rpositions)->len = freq;
|
1045
1047
|
}
|
1046
1048
|
return rb_struct_new(cTVTerm, rtext, rpositions, NULL);
|
1047
1049
|
}
|
@@ -1064,10 +1066,10 @@ frt_get_tv(TermVector *tv)
|
|
1064
1066
|
rfield = ID2SYM(rb_intern(tv->field));
|
1065
1067
|
|
1066
1068
|
rterms = rb_ary_new2(t_cnt);
|
1067
|
-
RARRAY(rterms)->len = t_cnt;
|
1068
1069
|
rts = RARRAY(rterms)->ptr;
|
1069
1070
|
for (i = 0; i < t_cnt; i++) {
|
1070
1071
|
rts[i] = frt_get_tv_term(&terms[i]);
|
1072
|
+
RARRAY(rterms)->len++;
|
1071
1073
|
}
|
1072
1074
|
|
1073
1075
|
if (tv->offsets) {
|
@@ -1075,9 +1077,9 @@ frt_get_tv(TermVector *tv)
|
|
1075
1077
|
Offset *offsets = tv->offsets;
|
1076
1078
|
roffsets = rb_ary_new2(o_cnt);
|
1077
1079
|
ros = RARRAY(roffsets)->ptr;
|
1078
|
-
RARRAY(roffsets)->len = o_cnt;
|
1079
1080
|
for (i = 0; i < o_cnt; i++) {
|
1080
1081
|
ros[i] = frt_get_tv_offsets(&offsets[i]);
|
1082
|
+
RARRAY(roffsets)->len++;
|
1081
1083
|
}
|
1082
1084
|
}
|
1083
1085
|
|
@@ -1167,7 +1169,7 @@ frt_iw_init(int argc, VALUE *argv, VALUE self)
|
|
1167
1169
|
} else if ((rval = rb_hash_aref(roptions, sym_path)) != Qnil) {
|
1168
1170
|
StringValue(rval);
|
1169
1171
|
frt_create_dir(rval);
|
1170
|
-
store = open_fs_store(
|
1172
|
+
store = open_fs_store(rs2s(rval));
|
1171
1173
|
DEREF(store);
|
1172
1174
|
}
|
1173
1175
|
|
@@ -1203,7 +1205,7 @@ frt_iw_init(int argc, VALUE *argv, VALUE self)
|
|
1203
1205
|
store = open_ram_store();
|
1204
1206
|
DEREF(store);
|
1205
1207
|
}
|
1206
|
-
if (!create && create_if_missing && !store->exists(store, "segments")) {
|
1208
|
+
if (!create && create_if_missing && !store->exists(store, "segments.gen")) {
|
1207
1209
|
create = true;
|
1208
1210
|
}
|
1209
1211
|
if (create) {
|
@@ -1246,14 +1248,6 @@ frt_iw_get_doc_count(VALUE self)
|
|
1246
1248
|
return INT2FIX(iw_doc_count(iw));
|
1247
1249
|
}
|
1248
1250
|
|
1249
|
-
static char *
|
1250
|
-
nstrdup(const char *str, int len)
|
1251
|
-
{
|
1252
|
-
char *new = ALLOC_N(char, len + 1);
|
1253
|
-
memcpy(new, str, len + 1);
|
1254
|
-
return new;
|
1255
|
-
}
|
1256
|
-
|
1257
1251
|
static int
|
1258
1252
|
frt_hash_to_doc_i(VALUE key, VALUE value, VALUE arg)
|
1259
1253
|
{
|
@@ -1266,7 +1260,7 @@ frt_hash_to_doc_i(VALUE key, VALUE value, VALUE arg)
|
|
1266
1260
|
DocField *df;
|
1267
1261
|
switch (TYPE(key)) {
|
1268
1262
|
case T_STRING:
|
1269
|
-
field =
|
1263
|
+
field = rs2s(key);
|
1270
1264
|
break;
|
1271
1265
|
case T_SYMBOL:
|
1272
1266
|
field = rb_id2name(SYM2ID(key));
|
@@ -1274,7 +1268,7 @@ frt_hash_to_doc_i(VALUE key, VALUE value, VALUE arg)
|
|
1274
1268
|
default:
|
1275
1269
|
rb_raise(rb_eArgError,
|
1276
1270
|
"%s cannot be a key to a field. Field keys must "
|
1277
|
-
" be symbols.",
|
1271
|
+
" be symbols.", rs2s(rb_obj_as_string(key)));
|
1278
1272
|
break;
|
1279
1273
|
}
|
1280
1274
|
if (NULL == (df = doc_get_field(doc, field))) {
|
@@ -1290,22 +1284,17 @@ frt_hash_to_doc_i(VALUE key, VALUE value, VALUE arg)
|
|
1290
1284
|
df->destroy_data = true;
|
1291
1285
|
for (i = 0; i < RARRAY(value)->len; i++) {
|
1292
1286
|
val = rb_obj_as_string(RARRAY(value)->ptr[i]);
|
1293
|
-
df_add_data_len(df,
|
1294
|
-
nstrdup(RSTRING(val)->ptr,
|
1295
|
-
RSTRING(val)->len),
|
1296
|
-
RSTRING(val)->len);
|
1287
|
+
df_add_data_len(df, nstrdup(val), RSTRING(val)->len);
|
1297
1288
|
}
|
1298
1289
|
}
|
1299
1290
|
break;
|
1300
1291
|
case T_STRING:
|
1301
|
-
df_add_data_len(df,
|
1292
|
+
df_add_data_len(df, rs2s(value), RSTRING(value)->len);
|
1302
1293
|
break;
|
1303
1294
|
default:
|
1304
1295
|
val = rb_obj_as_string(value);
|
1305
1296
|
df->destroy_data = true;
|
1306
|
-
df_add_data_len(df,
|
1307
|
-
nstrdup(RSTRING(val)->ptr, RSTRING(val)->len),
|
1308
|
-
RSTRING(val)->len);
|
1297
|
+
df_add_data_len(df, nstrdup(val), RSTRING(val)->len);
|
1309
1298
|
break;
|
1310
1299
|
}
|
1311
1300
|
doc_add_field(doc, df);
|
@@ -1335,10 +1324,7 @@ frt_get_doc(VALUE rdoc)
|
|
1335
1324
|
df->destroy_data = true;
|
1336
1325
|
for (i = 0; i < RARRAY(rdoc)->len; i++) {
|
1337
1326
|
val = rb_obj_as_string(RARRAY(rdoc)->ptr[i]);
|
1338
|
-
df_add_data_len(df,
|
1339
|
-
nstrdup(RSTRING(val)->ptr,
|
1340
|
-
RSTRING(val)->len),
|
1341
|
-
RSTRING(val)->len);
|
1327
|
+
df_add_data_len(df, nstrdup(val), RSTRING(val)->len);
|
1342
1328
|
}
|
1343
1329
|
doc_add_field(doc, df);
|
1344
1330
|
}
|
@@ -1348,15 +1334,13 @@ frt_get_doc(VALUE rdoc)
|
|
1348
1334
|
doc_add_field(doc, df);
|
1349
1335
|
break;
|
1350
1336
|
case T_STRING:
|
1351
|
-
df = df_add_data_len(df_new("content"),
|
1337
|
+
df = df_add_data_len(df_new("content"), rs2s(rdoc),
|
1352
1338
|
RSTRING(rdoc)->len);
|
1353
1339
|
doc_add_field(doc, df);
|
1354
1340
|
break;
|
1355
1341
|
default:
|
1356
1342
|
val = rb_obj_as_string(rdoc);
|
1357
|
-
df = df_add_data_len(df_new("content"),
|
1358
|
-
nstrdup(RSTRING(val)->ptr,
|
1359
|
-
RSTRING(val)->len),
|
1343
|
+
df = df_add_data_len(df_new("content"), nstrdup(val),
|
1360
1344
|
RSTRING(val)->len);
|
1361
1345
|
df->destroy_data = true;
|
1362
1346
|
doc_add_field(doc, df);
|
@@ -1512,6 +1496,19 @@ frt_iw_set_analyzer(VALUE self, VALUE ranalyzer)
|
|
1512
1496
|
return ranalyzer;
|
1513
1497
|
}
|
1514
1498
|
|
1499
|
+
/*
|
1500
|
+
* call-seq:
|
1501
|
+
* index_writer.version -> int
|
1502
|
+
*
|
1503
|
+
* Returns the current version of the index writer.
|
1504
|
+
*/
|
1505
|
+
static VALUE
|
1506
|
+
frt_iw_version(VALUE self)
|
1507
|
+
{
|
1508
|
+
IndexWriter *iw = (IndexWriter *)DATA_PTR(self);
|
1509
|
+
return ULL2NUM(iw->sis->version);
|
1510
|
+
}
|
1511
|
+
|
1515
1512
|
/*
|
1516
1513
|
* call-seq:
|
1517
1514
|
* iw.chunk_size -> number
|
@@ -1804,7 +1801,7 @@ frt_lzd_default(VALUE self, VALUE rkey)
|
|
1804
1801
|
char *field = NULL;
|
1805
1802
|
switch (TYPE(rkey)) {
|
1806
1803
|
case T_STRING:
|
1807
|
-
field =
|
1804
|
+
field = rs2s(rkey);
|
1808
1805
|
rkey = ID2SYM(rb_intern(field));
|
1809
1806
|
break;
|
1810
1807
|
case T_SYMBOL:
|
@@ -1813,7 +1810,7 @@ frt_lzd_default(VALUE self, VALUE rkey)
|
|
1813
1810
|
default:
|
1814
1811
|
rb_raise(rb_eArgError,
|
1815
1812
|
"%s cannot be a key to a field. Field keys must "
|
1816
|
-
" be symbols.",
|
1813
|
+
" be symbols.", rs2s(rb_obj_as_string(rkey)));
|
1817
1814
|
break;
|
1818
1815
|
}
|
1819
1816
|
return frt_lazy_df_load(self, rkey, h_get(lazy_doc->field_dict, field));
|
@@ -1960,19 +1957,19 @@ frt_ir_init(VALUE self, VALUE rdir)
|
|
1960
1957
|
"be created from other IndexReaders, "
|
1961
1958
|
"Directory objects or file-system paths. "
|
1962
1959
|
"Not %s",
|
1963
|
-
|
1960
|
+
rs2s(rb_obj_as_string(rdir)));
|
1964
1961
|
}
|
1965
1962
|
break;
|
1966
1963
|
case T_STRING:
|
1967
1964
|
frt_create_dir(rdir);
|
1968
|
-
store = open_fs_store(
|
1965
|
+
store = open_fs_store(rs2s(rdir));
|
1969
1966
|
DEREF(store);
|
1970
1967
|
break;
|
1971
1968
|
default:
|
1972
1969
|
rb_raise(rb_eArgError, "%s isn't a valid directory "
|
1973
1970
|
"argument. You should use either a String or "
|
1974
1971
|
"a Directory",
|
1975
|
-
|
1972
|
+
rs2s(rb_obj_as_string(rdir)));
|
1976
1973
|
break;
|
1977
1974
|
}
|
1978
1975
|
sub_readers[i] = ir_open(store);
|
@@ -1986,13 +1983,13 @@ frt_ir_init(VALUE self, VALUE rdir)
|
|
1986
1983
|
break;
|
1987
1984
|
case T_STRING:
|
1988
1985
|
frt_create_dir(rdir);
|
1989
|
-
store = open_fs_store(
|
1986
|
+
store = open_fs_store(rs2s(rdir));
|
1990
1987
|
DEREF(store);
|
1991
1988
|
break;
|
1992
1989
|
default:
|
1993
1990
|
rb_raise(rb_eArgError, "%s isn't a valid directory argument. "
|
1994
1991
|
"You should use either a String or a Directory",
|
1995
|
-
|
1992
|
+
rs2s(rb_obj_as_string(rdir)));
|
1996
1993
|
break;
|
1997
1994
|
}
|
1998
1995
|
ir = ir_open(store);
|
@@ -2070,7 +2067,7 @@ frt_ir_get_norms_into(VALUE self, VALUE rfield, VALUE rnorms, VALUE roffset)
|
|
2070
2067
|
}
|
2071
2068
|
|
2072
2069
|
ir_get_norms_into(ir, frt_field(rfield),
|
2073
|
-
(uchar *)
|
2070
|
+
(uchar *)rs2s(rnorms) + offset);
|
2074
2071
|
return rnorms;
|
2075
2072
|
}
|
2076
2073
|
|
@@ -2210,8 +2207,8 @@ frt_get_doc_range(IndexReader *ir, int pos, int len, int max)
|
|
2210
2207
|
ary = rb_ary_new2(len);
|
2211
2208
|
for (i = 0; i < len; i++) {
|
2212
2209
|
RARRAY(ary)->ptr[i] = frt_get_lazy_doc(ir->get_lazy_doc(ir, i + pos));
|
2210
|
+
RARRAY(ary)->len++;
|
2213
2211
|
}
|
2214
|
-
RARRAY(ary)->len = len;
|
2215
2212
|
return ary;
|
2216
2213
|
}
|
2217
2214
|
|
@@ -2525,7 +2522,7 @@ static VALUE
|
|
2525
2522
|
frt_ir_version(VALUE self)
|
2526
2523
|
{
|
2527
2524
|
IndexReader *ir = (IndexReader *)DATA_PTR(self);
|
2528
|
-
return
|
2525
|
+
return ULL2NUM(ir->sis->version);
|
2529
2526
|
}
|
2530
2527
|
|
2531
2528
|
/****************************************************************************
|
@@ -3126,6 +3123,7 @@ Init_IndexWriter(void)
|
|
3126
3123
|
rb_define_method(cIndexWriter, "field_infos", frt_iw_field_infos, 0);
|
3127
3124
|
rb_define_method(cIndexWriter, "analyzer", frt_iw_get_analyzer, 0);
|
3128
3125
|
rb_define_method(cIndexWriter, "analyzer=", frt_iw_set_analyzer, 1);
|
3126
|
+
rb_define_method(cIndexWriter, "version", frt_iw_version, 0);
|
3129
3127
|
|
3130
3128
|
rb_define_method(cIndexWriter, "chunk_size",
|
3131
3129
|
frt_iw_get_chunk_size, 0);
|
data/ext/r_qparser.c
CHANGED
@@ -54,15 +54,15 @@ frt_get_fields(VALUE rfields)
|
|
54
54
|
int i;
|
55
55
|
for (i = 0; i < RARRAY(rfields)->len; i++) {
|
56
56
|
rval = rb_obj_as_string(RARRAY(rfields)->ptr[i]);
|
57
|
-
hs_add(fields,
|
57
|
+
hs_add(fields, nstrdup(rval));
|
58
58
|
}
|
59
59
|
} else {
|
60
60
|
rval = rb_obj_as_string(rfields);
|
61
|
-
if (strcmp("*",
|
61
|
+
if (strcmp("*", rs2s(rval)) == 0) {
|
62
62
|
hs_destroy(fields);
|
63
63
|
fields = NULL;
|
64
64
|
} else {
|
65
|
-
s = str =
|
65
|
+
s = str = nstrdup(rval);
|
66
66
|
while ((p = strchr(s, '|')) != '\0') {
|
67
67
|
*p = '\0';
|
68
68
|
hs_add(fields, estrdup(s));
|
@@ -84,38 +84,38 @@ frt_get_fields(VALUE rfields)
|
|
84
84
|
*
|
85
85
|
* === Options
|
86
86
|
*
|
87
|
-
* :default_field::
|
88
|
-
*
|
89
|
-
*
|
90
|
-
* :analyzer::
|
91
|
-
*
|
92
|
-
* :wild_card_downcase::
|
93
|
-
*
|
94
|
-
*
|
95
|
-
* :fields::
|
96
|
-
*
|
97
|
-
*
|
98
|
-
* :tokenized_fields::
|
99
|
-
*
|
100
|
-
*
|
101
|
-
* :validate_fields::
|
102
|
-
*
|
103
|
-
*
|
104
|
-
* :or_default::
|
105
|
-
*
|
106
|
-
* :default_slop::
|
107
|
-
* :
|
108
|
-
*
|
109
|
-
*
|
110
|
-
* :clean_string::
|
111
|
-
*
|
112
|
-
*
|
113
|
-
*
|
114
|
-
* :max_clauses::
|
115
|
-
*
|
116
|
-
*
|
117
|
-
*
|
118
|
-
*
|
87
|
+
* :default_field:: Default: "*" (all fields). The default field to
|
88
|
+
* search when no field is specified in the search
|
89
|
+
* string. It can also be an array of fields.
|
90
|
+
* :analyzer:: Default: StandardAnalyzer. Analyzer used by the
|
91
|
+
* query parser to parse query terms
|
92
|
+
* :wild_card_downcase:: Default: true. Specifies whether wild-card queries
|
93
|
+
* and range queries should be downcased or not since
|
94
|
+
* they are not passed through the parser
|
95
|
+
* :fields:: Default: []. Lets the query parser know what
|
96
|
+
* fields are available for searching, particularly
|
97
|
+
* when the "*" is specified as the search field
|
98
|
+
* :tokenized_fields:: Default: :fields. Lets the query parser know which
|
99
|
+
* fields are tokenized so it knows which fields to
|
100
|
+
* run the analyzer over.
|
101
|
+
* :validate_fields:: Default: false. Set to true if you want an
|
102
|
+
* exception to be raised if there is an attempt to
|
103
|
+
* search a non-existent field
|
104
|
+
* :or_default:: Default: true. Use "OR" as the default boolean
|
105
|
+
* operator
|
106
|
+
* :default_slop:: Default: 0. Default slop to use in PhraseQuery
|
107
|
+
* :handle_parse_errors:: Default: true. QueryParser will quietly handle all
|
108
|
+
* parsing errors internally. If you'd like to handle
|
109
|
+
* them yourself, set this parameter to false.
|
110
|
+
* :clean_string:: Default: true. QueryParser will do a quick
|
111
|
+
* once-over the query string make sure that quotes
|
112
|
+
* and brackets match up and special characters are
|
113
|
+
* escaped
|
114
|
+
* :max_clauses:: Default: 512. the maximum number of clauses
|
115
|
+
* allowed in boolean queries and the maximum number
|
116
|
+
* of terms allowed in multi, prefix, wild-card or
|
117
|
+
* fuzzy queries when those queries are generated by
|
118
|
+
* rewriting other queries
|
119
119
|
*/
|
120
120
|
static VALUE
|
121
121
|
frt_qp_init(int argc, VALUE *argv, VALUE self)
|
@@ -163,6 +163,7 @@ frt_qp_init(int argc, VALUE *argv, VALUE self)
|
|
163
163
|
qp = qp_new(all_fields, def_fields, tkz_fields, analyzer);
|
164
164
|
qp->allow_any_fields = true;
|
165
165
|
qp->clean_str = true;
|
166
|
+
qp->handle_parse_errors = true;
|
166
167
|
/* handle options */
|
167
168
|
if (argc > 0) {
|
168
169
|
if (Qnil != (rval = rb_hash_aref(roptions, sym_handle_parse_errors))) {
|
@@ -208,7 +209,7 @@ frt_qp_parse(VALUE self, VALUE rstr)
|
|
208
209
|
GET_QP;
|
209
210
|
rstr = rb_obj_as_string(rstr);
|
210
211
|
TRY
|
211
|
-
rq = frt_get_q(qp_parse(qp,
|
212
|
+
rq = frt_get_q(qp_parse(qp, rs2s(rstr)));
|
212
213
|
break;
|
213
214
|
default:
|
214
215
|
msg = xcontext.msg;
|