isomorfeus-ferret 0.17.2 → 0.17.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/isomorfeus_ferret_ext/benchmark.c +9 -20
- data/ext/isomorfeus_ferret_ext/benchmarks_all.h +1 -2
- data/ext/isomorfeus_ferret_ext/bm_hash.c +1 -2
- data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +4 -2
- data/ext/isomorfeus_ferret_ext/brotli_enc_encode.c +3 -2
- data/ext/isomorfeus_ferret_ext/frb_analysis.c +4 -5
- data/ext/isomorfeus_ferret_ext/frb_field_info.c +3 -4
- data/ext/isomorfeus_ferret_ext/frb_index.c +161 -187
- data/ext/isomorfeus_ferret_ext/frb_lazy_doc.c +14 -16
- data/ext/isomorfeus_ferret_ext/frb_search.c +77 -69
- data/ext/isomorfeus_ferret_ext/frb_store.c +27 -13
- data/ext/isomorfeus_ferret_ext/frb_utils.c +3 -6
- data/ext/isomorfeus_ferret_ext/frt_analysis.c +39 -46
- data/ext/isomorfeus_ferret_ext/frt_analysis.h +9 -9
- data/ext/isomorfeus_ferret_ext/frt_array.c +11 -22
- data/ext/isomorfeus_ferret_ext/frt_bitvector.h +3 -6
- data/ext/isomorfeus_ferret_ext/frt_doc_field.c +87 -0
- data/ext/isomorfeus_ferret_ext/frt_doc_field.h +26 -0
- data/ext/isomorfeus_ferret_ext/frt_document.c +4 -97
- data/ext/isomorfeus_ferret_ext/frt_document.h +5 -33
- data/ext/isomorfeus_ferret_ext/frt_except.c +8 -6
- data/ext/isomorfeus_ferret_ext/frt_except.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_field_index.c +14 -33
- data/ext/isomorfeus_ferret_ext/frt_field_index.h +0 -6
- data/ext/isomorfeus_ferret_ext/frt_field_info.c +69 -0
- data/ext/isomorfeus_ferret_ext/frt_field_info.h +49 -0
- data/ext/isomorfeus_ferret_ext/frt_field_infos.c +196 -0
- data/ext/isomorfeus_ferret_ext/frt_field_infos.h +35 -0
- data/ext/isomorfeus_ferret_ext/frt_filter.c +2 -2
- data/ext/isomorfeus_ferret_ext/frt_global.c +10 -4
- data/ext/isomorfeus_ferret_ext/frt_global.h +11 -15
- data/ext/isomorfeus_ferret_ext/frt_hash.c +8 -8
- data/ext/isomorfeus_ferret_ext/frt_hash.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_hashset.c +20 -40
- data/ext/isomorfeus_ferret_ext/frt_hashset.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_helper.c +7 -15
- data/ext/isomorfeus_ferret_ext/frt_in_stream.c +35 -45
- data/ext/isomorfeus_ferret_ext/frt_in_stream.h +3 -2
- data/ext/isomorfeus_ferret_ext/frt_ind.c +21 -39
- data/ext/isomorfeus_ferret_ext/frt_ind.h +1 -1
- data/ext/isomorfeus_ferret_ext/frt_index.c +334 -848
- data/ext/isomorfeus_ferret_ext/frt_index.h +4 -105
- data/ext/isomorfeus_ferret_ext/frt_lang.c +5 -10
- data/ext/isomorfeus_ferret_ext/frt_lazy_doc.c +3 -3
- data/ext/isomorfeus_ferret_ext/frt_lazy_doc.h +1 -1
- data/ext/isomorfeus_ferret_ext/frt_lazy_doc_field.c +18 -25
- data/ext/isomorfeus_ferret_ext/frt_lazy_doc_field.h +5 -5
- data/ext/isomorfeus_ferret_ext/frt_mdbx_store.c +102 -70
- data/ext/isomorfeus_ferret_ext/frt_mempool.c +8 -16
- data/ext/isomorfeus_ferret_ext/frt_multimapper.c +23 -46
- data/ext/isomorfeus_ferret_ext/frt_multimapper.h +4 -8
- data/ext/isomorfeus_ferret_ext/frt_out_stream.c +31 -43
- data/ext/isomorfeus_ferret_ext/frt_out_stream.h +2 -2
- data/ext/isomorfeus_ferret_ext/frt_posh.c +6 -819
- data/ext/isomorfeus_ferret_ext/frt_posh.h +0 -57
- data/ext/isomorfeus_ferret_ext/frt_priorityqueue.c +11 -22
- data/ext/isomorfeus_ferret_ext/frt_priorityqueue.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +131 -217
- data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +2 -2
- data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +4 -4
- data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +18 -26
- data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +27 -28
- data/ext/isomorfeus_ferret_ext/frt_q_parser.c +49 -98
- data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +64 -116
- data/ext/isomorfeus_ferret_ext/frt_q_range.c +8 -14
- data/ext/isomorfeus_ferret_ext/frt_q_span.c +251 -365
- data/ext/isomorfeus_ferret_ext/frt_q_term.c +9 -9
- data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +1 -2
- data/ext/isomorfeus_ferret_ext/frt_ram_store.c +134 -85
- data/ext/isomorfeus_ferret_ext/frt_search.c +109 -191
- data/ext/isomorfeus_ferret_ext/frt_search.h +6 -6
- data/ext/isomorfeus_ferret_ext/frt_similarity.c +12 -23
- data/ext/isomorfeus_ferret_ext/frt_similarity.h +1 -2
- data/ext/isomorfeus_ferret_ext/frt_sort.c +20 -20
- data/ext/isomorfeus_ferret_ext/frt_store.c +13 -25
- data/ext/isomorfeus_ferret_ext/frt_store.h +86 -52
- data/ext/isomorfeus_ferret_ext/frt_term_vectors.c +8 -16
- data/ext/isomorfeus_ferret_ext/frt_win32.h +5 -10
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +12 -11
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +11 -13
- data/ext/isomorfeus_ferret_ext/lz4.c +422 -195
- data/ext/isomorfeus_ferret_ext/lz4.h +114 -46
- data/ext/isomorfeus_ferret_ext/lz4frame.c +421 -242
- data/ext/isomorfeus_ferret_ext/lz4frame.h +122 -53
- data/ext/isomorfeus_ferret_ext/lz4hc.c +127 -111
- data/ext/isomorfeus_ferret_ext/lz4hc.h +14 -14
- data/ext/isomorfeus_ferret_ext/lz4xxhash.h +1 -1
- data/ext/isomorfeus_ferret_ext/mdbx.c +3762 -2526
- data/ext/isomorfeus_ferret_ext/mdbx.h +115 -70
- data/ext/isomorfeus_ferret_ext/test.c +41 -88
- data/ext/isomorfeus_ferret_ext/test.h +3 -6
- data/ext/isomorfeus_ferret_ext/test_1710.c +11 -13
- data/ext/isomorfeus_ferret_ext/test_analysis.c +32 -64
- data/ext/isomorfeus_ferret_ext/test_array.c +6 -12
- data/ext/isomorfeus_ferret_ext/test_bitvector.c +12 -24
- data/ext/isomorfeus_ferret_ext/test_document.c +23 -33
- data/ext/isomorfeus_ferret_ext/test_except.c +10 -21
- data/ext/isomorfeus_ferret_ext/test_fields.c +62 -68
- data/ext/isomorfeus_ferret_ext/test_file_deleter.c +16 -25
- data/ext/isomorfeus_ferret_ext/test_filter.c +22 -33
- data/ext/isomorfeus_ferret_ext/test_global.c +14 -29
- data/ext/isomorfeus_ferret_ext/test_hash.c +19 -38
- data/ext/isomorfeus_ferret_ext/test_hashset.c +8 -16
- data/ext/isomorfeus_ferret_ext/test_helper.c +4 -8
- data/ext/isomorfeus_ferret_ext/test_highlighter.c +16 -28
- data/ext/isomorfeus_ferret_ext/test_index.c +307 -519
- data/ext/isomorfeus_ferret_ext/test_lang.c +7 -14
- data/ext/isomorfeus_ferret_ext/test_mdbx_store.c +2 -5
- data/ext/isomorfeus_ferret_ext/test_mempool.c +5 -10
- data/ext/isomorfeus_ferret_ext/test_multimapper.c +3 -6
- data/ext/isomorfeus_ferret_ext/test_priorityqueue.c +9 -18
- data/ext/isomorfeus_ferret_ext/test_q_const_score.c +4 -6
- data/ext/isomorfeus_ferret_ext/test_q_filtered.c +3 -4
- data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +9 -15
- data/ext/isomorfeus_ferret_ext/test_q_parser.c +8 -16
- data/ext/isomorfeus_ferret_ext/test_q_span.c +19 -35
- data/ext/isomorfeus_ferret_ext/test_ram_store.c +14 -13
- data/ext/isomorfeus_ferret_ext/test_search.c +66 -115
- data/ext/isomorfeus_ferret_ext/test_segments.c +8 -13
- data/ext/isomorfeus_ferret_ext/test_similarity.c +2 -4
- data/ext/isomorfeus_ferret_ext/test_sort.c +17 -27
- data/ext/isomorfeus_ferret_ext/test_store.c +96 -115
- data/ext/isomorfeus_ferret_ext/test_term.c +9 -15
- data/ext/isomorfeus_ferret_ext/test_term_vectors.c +9 -14
- data/ext/isomorfeus_ferret_ext/test_test.c +4 -8
- data/ext/isomorfeus_ferret_ext/test_threading.c +15 -21
- data/ext/isomorfeus_ferret_ext/testhelper.c +11 -21
- data/ext/isomorfeus_ferret_ext/testhelper.h +1 -1
- data/ext/isomorfeus_ferret_ext/tests_all.h +1 -2
- data/lib/isomorfeus/ferret/index/index.rb +8 -8
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +32 -6
@@ -1,8 +1,6 @@
|
|
1
1
|
#include "frt_index.h"
|
2
2
|
#include "isomorfeus_ferret.h"
|
3
3
|
|
4
|
-
extern VALUE rb_hash_update(int argc, VALUE *argv, VALUE self);
|
5
|
-
|
6
4
|
extern VALUE sym_each;
|
7
5
|
extern ID id_eql;
|
8
6
|
|
@@ -97,7 +95,7 @@ static VALUE frb_ld_df_load(VALUE self, VALUE rkey, FrtLazyDocField *lazy_df) {
|
|
97
95
|
rLazyDoc *rld = DATA_PTR(self);
|
98
96
|
VALUE rdata;
|
99
97
|
if (lazy_df->size == 1) {
|
100
|
-
char *data = frt_lazy_df_get_data(lazy_df, 0);
|
98
|
+
const char *data = frt_lazy_df_get_data(lazy_df, 0);
|
101
99
|
rdata = rb_str_new(data, lazy_df->data[0].length);
|
102
100
|
rb_enc_associate(rdata, lazy_df->data[0].encoding);
|
103
101
|
} else {
|
@@ -105,7 +103,7 @@ static VALUE frb_ld_df_load(VALUE self, VALUE rkey, FrtLazyDocField *lazy_df) {
|
|
105
103
|
VALUE rstr;
|
106
104
|
rdata = rb_ary_new2(lazy_df->size);
|
107
105
|
for (i = 0; i < lazy_df->size; i++) {
|
108
|
-
char *data = frt_lazy_df_get_data(lazy_df, i);
|
106
|
+
const char *data = frt_lazy_df_get_data(lazy_df, i);
|
109
107
|
rstr = rb_str_new(data, lazy_df->data[i].length);
|
110
108
|
rb_enc_associate(rstr, lazy_df->data[i].encoding);
|
111
109
|
rb_ary_store(rdata, i, rstr);
|
@@ -127,7 +125,7 @@ static VALUE frb_ld_load(VALUE self) {
|
|
127
125
|
if (ld->loaded) return self;
|
128
126
|
int i;
|
129
127
|
FrtLazyDocField *lazy_df;
|
130
|
-
for (i = 0; i < ld->
|
128
|
+
for (i = 0; i < ld->field_count; i++) {
|
131
129
|
lazy_df = ld->fields[i];
|
132
130
|
if (!(lazy_df->loaded)) frb_ld_df_load(self, ID2SYM(lazy_df->name), lazy_df);
|
133
131
|
}
|
@@ -148,8 +146,8 @@ static VALUE frb_ld_fields(VALUE self) {
|
|
148
146
|
VALUE rfields = rb_ivar_get(self, id_fields);
|
149
147
|
if (rfields == Qnil) {
|
150
148
|
int i;
|
151
|
-
rfields = rb_ary_new2(ld->
|
152
|
-
for (i = 0; i < ld->
|
149
|
+
rfields = rb_ary_new2(ld->field_count);
|
150
|
+
for (i = 0; i < ld->field_count; i++) {
|
153
151
|
rb_ary_store(rfields, i, ID2SYM(ld->fields[i]->name));
|
154
152
|
}
|
155
153
|
rb_ivar_set(self, id_fields, rfields);
|
@@ -208,9 +206,9 @@ static VALUE frb_ld_equal(VALUE self, VALUE other) {
|
|
208
206
|
rLazyDoc *other_rld;
|
209
207
|
TypedData_Get_Struct(other, rLazyDoc, &frb_ld_t, other_rld);
|
210
208
|
other_h = frb_ld_to_h(other);
|
211
|
-
other_size = other_rld->doc->
|
209
|
+
other_size = other_rld->doc->field_count;
|
212
210
|
}
|
213
|
-
if (ld->
|
211
|
+
if (ld->field_count == other_size) {
|
214
212
|
VALUE self_h = frb_ld_to_h(self);
|
215
213
|
return rb_funcall(self_h, id_equal, 1, other_h);
|
216
214
|
}
|
@@ -278,7 +276,7 @@ static VALUE frb_ld_any(int argc, VALUE *argv, VALUE self) {
|
|
278
276
|
FrtLazyDoc *ld = rld->doc;
|
279
277
|
if (argc == 0) {
|
280
278
|
if (!rb_block_given_p()) {
|
281
|
-
return (ld->
|
279
|
+
return (ld->field_count > 0) ? Qtrue : Qfalse;
|
282
280
|
} else {
|
283
281
|
if (!ld->loaded) frb_ld_load(self);
|
284
282
|
VALUE res = Qnil;
|
@@ -375,7 +373,7 @@ static VALUE frb_ld_each_value(VALUE self) {
|
|
375
373
|
|
376
374
|
static VALUE frb_ld_empty(VALUE self) {
|
377
375
|
FrtLazyDoc *ld = ((rLazyDoc *)DATA_PTR(self))->doc;
|
378
|
-
return (ld->
|
376
|
+
return (ld->field_count == 0) ? Qtrue : Qfalse;
|
379
377
|
}
|
380
378
|
|
381
379
|
static VALUE frb_ld_eql(VALUE self, VALUE other) {
|
@@ -389,9 +387,9 @@ static VALUE frb_ld_eql(VALUE self, VALUE other) {
|
|
389
387
|
} else {
|
390
388
|
TypedData_Get_Struct(other, rLazyDoc, &frb_ld_t, other_rld);
|
391
389
|
other_h = frb_ld_to_h(other);
|
392
|
-
other_size = other_rld->doc->
|
390
|
+
other_size = other_rld->doc->field_count;
|
393
391
|
}
|
394
|
-
if (ld->
|
392
|
+
if (ld->field_count == other_size) {
|
395
393
|
VALUE self_h = frb_ld_to_h(self);
|
396
394
|
return rb_funcall(self_h, id_eql, 1, other_h);
|
397
395
|
}
|
@@ -475,7 +473,7 @@ static VALUE frb_ld_has_value(VALUE self, VALUE value) {
|
|
475
473
|
if (!ld->loaded) frb_ld_load(self);
|
476
474
|
int i;
|
477
475
|
VALUE hvalue;
|
478
|
-
for (i=0; i<ld->
|
476
|
+
for (i=0; i<ld->field_count; i++) {
|
479
477
|
hvalue = (VALUE)frt_h_get(rld->hash, (void *)ID2SYM(ld->fields[i]->name));
|
480
478
|
hvalue = rb_funcall(hvalue, id_equal, 1, value);
|
481
479
|
if (hvalue == Qtrue) return Qtrue;
|
@@ -499,7 +497,7 @@ static VALUE frb_ld_key(VALUE self, VALUE value) {
|
|
499
497
|
if (!ld->loaded) frb_ld_load(self);
|
500
498
|
int i;
|
501
499
|
VALUE hvalue;
|
502
|
-
for (i=0; i<ld->
|
500
|
+
for (i=0; i<ld->field_count; i++) {
|
503
501
|
hvalue = (VALUE)frt_h_get(rld->hash, (void *)ID2SYM(ld->fields[i]->name));
|
504
502
|
hvalue = rb_funcall(hvalue, id_equal, 1, value);
|
505
503
|
if (hvalue == Qtrue) return ID2SYM(ld->fields[i]->name);
|
@@ -509,7 +507,7 @@ static VALUE frb_ld_key(VALUE self, VALUE value) {
|
|
509
507
|
|
510
508
|
static VALUE frb_ld_length(VALUE self) {
|
511
509
|
FrtLazyDoc *ld = ((rLazyDoc *)DATA_PTR(self))->doc;
|
512
|
-
return INT2FIX(ld->
|
510
|
+
return INT2FIX(ld->field_count);
|
513
511
|
}
|
514
512
|
|
515
513
|
static VALUE frb_ld_merge(int argc, VALUE *argv, VALUE self) {
|
@@ -6,7 +6,8 @@
|
|
6
6
|
|
7
7
|
// #undef close
|
8
8
|
|
9
|
-
VALUE mSearch;
|
9
|
+
static VALUE mSearch;
|
10
|
+
static VALUE mSpans;
|
10
11
|
|
11
12
|
static VALUE cHit;
|
12
13
|
static VALUE cTopDocs;
|
@@ -92,7 +93,7 @@ static VALUE sym_integer;
|
|
92
93
|
static VALUE sym_float;
|
93
94
|
static VALUE sym_string;
|
94
95
|
static VALUE sym_auto;
|
95
|
-
static VALUE
|
96
|
+
static VALUE sym_doc_num;
|
96
97
|
static VALUE sym_score;
|
97
98
|
static VALUE sym_byte;
|
98
99
|
|
@@ -145,7 +146,7 @@ extern VALUE frb_get_lazy_doc(FrtLazyDoc *lazy_doc);
|
|
145
146
|
****************************************************************************/
|
146
147
|
|
147
148
|
static VALUE frb_get_hit(FrtHit *hit) {
|
148
|
-
return rb_struct_new(cHit, INT2FIX(hit->
|
149
|
+
return rb_struct_new(cHit, INT2FIX(hit->doc_num), rb_float_new((double)hit->score), NULL);
|
149
150
|
}
|
150
151
|
|
151
152
|
/****************************************************************************
|
@@ -197,10 +198,10 @@ static VALUE frb_td_to_s(int argc, VALUE *argv, VALUE self) {
|
|
197
198
|
|
198
199
|
for (i = 0; i < len; i++) {
|
199
200
|
VALUE rhit = RARRAY_PTR(rhits)[i];
|
200
|
-
int
|
201
|
+
int doc_num = FIX2INT(rb_funcall(rhit, id_doc, 0));
|
201
202
|
const char *value = "";
|
202
203
|
size_t value_len = 0;
|
203
|
-
FrtLazyDoc *lzd = sea->get_lazy_doc(sea,
|
204
|
+
FrtLazyDoc *lzd = sea->get_lazy_doc(sea, doc_num);
|
204
205
|
FrtLazyDocField *lzdf = frt_lazy_doc_get(lzd, field);
|
205
206
|
if (NULL != lzdf) {
|
206
207
|
value = frt_lazy_df_get_data(lzdf, 0);
|
@@ -211,7 +212,7 @@ static VALUE frb_td_to_s(int argc, VALUE *argv, VALUE self) {
|
|
211
212
|
FRT_REALLOC_N(str, char, capa);
|
212
213
|
}
|
213
214
|
|
214
|
-
sprintf(str + p, "\t%d \"%s\": %0.5f\n",
|
215
|
+
sprintf(str + p, "\t%d \"%s\": %0.5f\n", doc_num, value,
|
215
216
|
NUM2DBL(rb_funcall(rhit, id_score, 0)));
|
216
217
|
p += strlen(str + p);
|
217
218
|
frt_lazy_doc_close(lzd);
|
@@ -229,7 +230,7 @@ static char *frb_lzd_load_to_json(FrtLazyDoc *lzd, char **str, char *s, int *sle
|
|
229
230
|
int len = diff, l;
|
230
231
|
FrtLazyDocField *f;
|
231
232
|
|
232
|
-
for (i = 0; i < lzd->
|
233
|
+
for (i = 0; i < lzd->field_count; i++) {
|
233
234
|
f = lzd->fields[i];
|
234
235
|
/* 3 times length of field to make space for quoted quotes ('"') and
|
235
236
|
* 4 times field elements to make space for '"' around fields and ','
|
@@ -244,7 +245,7 @@ static char *frb_lzd_load_to_json(FrtLazyDoc *lzd, char **str, char *s, int *sle
|
|
244
245
|
s = *str + diff;
|
245
246
|
}
|
246
247
|
|
247
|
-
for (i = 0; i < lzd->
|
248
|
+
for (i = 0; i < lzd->field_count; i++) {
|
248
249
|
const char *field_name;
|
249
250
|
f = lzd->fields[i];
|
250
251
|
field_name = rb_id2name(f->name);
|
@@ -278,7 +279,7 @@ static VALUE frb_td_to_json(VALUE self) {
|
|
278
279
|
FrtLazyDoc *lzd;
|
279
280
|
FrtSearcher *sea = (FrtSearcher *)DATA_PTR(rb_funcall(self, id_searcher, 0));
|
280
281
|
const int num_hits = RARRAY_LEN(rhits);
|
281
|
-
int
|
282
|
+
int doc_num;
|
282
283
|
int len = 32768;
|
283
284
|
char *str = FRT_ALLOC_N(char, len);
|
284
285
|
char *s = str;
|
@@ -289,8 +290,8 @@ static VALUE frb_td_to_json(VALUE self) {
|
|
289
290
|
if (i) *(s++) = ',';
|
290
291
|
*(s++) = '{';
|
291
292
|
rhit = RARRAY_PTR(rhits)[i];
|
292
|
-
|
293
|
-
lzd = sea->get_lazy_doc(sea,
|
293
|
+
doc_num = FIX2INT(rb_funcall(rhit, id_doc, 0));
|
294
|
+
lzd = sea->get_lazy_doc(sea, doc_num);
|
294
295
|
s = frb_lzd_load_to_json(lzd, &str, s, &len);
|
295
296
|
frt_lazy_doc_close(lzd);
|
296
297
|
*(s++) = '}';
|
@@ -1292,8 +1293,7 @@ static VALUE frb_phq_init(int argc, VALUE *argv, VALUE self) {
|
|
1292
1293
|
* # doesn't match => "big house"
|
1293
1294
|
*/
|
1294
1295
|
static VALUE
|
1295
|
-
frb_phq_add(int argc, VALUE *argv, VALUE self)
|
1296
|
-
{
|
1296
|
+
frb_phq_add(int argc, VALUE *argv, VALUE self) {
|
1297
1297
|
VALUE rterm, rpos_inc;
|
1298
1298
|
int pos_inc = 1;
|
1299
1299
|
FrtQuery *q = (FrtQuery *)DATA_PTR(self);
|
@@ -1688,8 +1688,7 @@ extern float frt_qp_default_fuzzy_min_sim;
|
|
1688
1688
|
* Set the default value for +:min_similarity+
|
1689
1689
|
*/
|
1690
1690
|
static VALUE
|
1691
|
-
frb_fq_set_dms(VALUE self, VALUE val)
|
1692
|
-
{
|
1691
|
+
frb_fq_set_dms(VALUE self, VALUE val) {
|
1693
1692
|
double min_sim = NUM2DBL(val);
|
1694
1693
|
if (min_sim >= 1.0) {
|
1695
1694
|
rb_raise(rb_eArgError,
|
@@ -1710,8 +1709,7 @@ frb_fq_set_dms(VALUE self, VALUE val)
|
|
1710
1709
|
* Get the default value for +:prefix_length+
|
1711
1710
|
*/
|
1712
1711
|
static VALUE
|
1713
|
-
frb_fq_get_dpl(VALUE self)
|
1714
|
-
{
|
1712
|
+
frb_fq_get_dpl(VALUE self) {
|
1715
1713
|
return rb_cvar_get(cFuzzyQuery, id_default_prefix_length);
|
1716
1714
|
}
|
1717
1715
|
|
@@ -1723,8 +1721,7 @@ extern int frt_qp_default_fuzzy_pre_len;
|
|
1723
1721
|
* Set the default value for +:prefix_length+
|
1724
1722
|
*/
|
1725
1723
|
static VALUE
|
1726
|
-
frb_fq_set_dpl(VALUE self, VALUE val)
|
1727
|
-
{
|
1724
|
+
frb_fq_set_dpl(VALUE self, VALUE val) {
|
1728
1725
|
int pre_len = FIX2INT(val);
|
1729
1726
|
if (pre_len < 0) {
|
1730
1727
|
rb_raise(rb_eArgError,
|
@@ -2695,7 +2692,7 @@ static int get_sort_type(VALUE rtype) {
|
|
2695
2692
|
return FRT_SORT_TYPE_STRING;
|
2696
2693
|
} else if (rtype == sym_score) {
|
2697
2694
|
return FRT_SORT_TYPE_SCORE;
|
2698
|
-
} else if (rtype ==
|
2695
|
+
} else if (rtype == sym_doc_num) {
|
2699
2696
|
return FRT_SORT_TYPE_DOC;
|
2700
2697
|
} else if (rtype == sym_float) {
|
2701
2698
|
return FRT_SORT_TYPE_FLOAT;
|
@@ -2703,7 +2700,7 @@ static int get_sort_type(VALUE rtype) {
|
|
2703
2700
|
return FRT_SORT_TYPE_AUTO;
|
2704
2701
|
} else {
|
2705
2702
|
rb_raise(rb_eArgError, ":%s is an unknown sort-type. Please choose "
|
2706
|
-
"from [:integer, :float, :string, :auto, :score, :
|
2703
|
+
"from [:integer, :float, :string, :auto, :score, :doc_num]",
|
2707
2704
|
rb_id2name(SYM2ID(rtype)));
|
2708
2705
|
}
|
2709
2706
|
return FRT_SORT_TYPE_DOC;
|
@@ -2720,7 +2717,7 @@ static int get_sort_type(VALUE rtype) {
|
|
2720
2717
|
*
|
2721
2718
|
* :type:: Default: +:auto+. Specifies how a field should be sorted.
|
2722
2719
|
* Choose from one of; +:auto+, +:integer+, +:float+,
|
2723
|
-
* +:string+, +:byte+, +:
|
2720
|
+
* +:string+, +:byte+, +:doc_num+ or +:score+. +:auto+ will
|
2724
2721
|
* check the datatype of the field by trying to parse it into
|
2725
2722
|
* either a number or a float before settling on a string
|
2726
2723
|
* sort. String sort is locale dependent and works for
|
@@ -2789,7 +2786,7 @@ static VALUE frb_sf_get_name(VALUE self) {
|
|
2789
2786
|
* sort_field.type -> symbol
|
2790
2787
|
*
|
2791
2788
|
* Return the type of sort. Should be one of; +:auto+, +:integer+, +:float+,
|
2792
|
-
* +:string+, +:byte+, +:
|
2789
|
+
* +:string+, +:byte+, +:doc_num+ or +:score+.
|
2793
2790
|
*/
|
2794
2791
|
static VALUE frb_sf_get_type(VALUE self) {
|
2795
2792
|
GET_SF();
|
@@ -2799,7 +2796,7 @@ static VALUE frb_sf_get_type(VALUE self) {
|
|
2799
2796
|
case FRT_SORT_TYPE_FLOAT: return sym_float;
|
2800
2797
|
case FRT_SORT_TYPE_STRING: return sym_string;
|
2801
2798
|
case FRT_SORT_TYPE_AUTO: return sym_auto;
|
2802
|
-
case FRT_SORT_TYPE_DOC: return
|
2799
|
+
case FRT_SORT_TYPE_DOC: return sym_doc_num;
|
2803
2800
|
case FRT_SORT_TYPE_SCORE: return sym_score;
|
2804
2801
|
}
|
2805
2802
|
return Qnil;
|
@@ -2903,7 +2900,7 @@ static void frb_parse_sort_str(FrtSort *sort, char *xsort_str) {
|
|
2903
2900
|
|
2904
2901
|
if (strcmp("SCORE", s) == 0) {
|
2905
2902
|
sf = frt_sort_field_score_new(reverse);
|
2906
|
-
} else if (strcmp("
|
2903
|
+
} else if (strcmp("DOC_NUM", s) == 0) {
|
2907
2904
|
sf = frt_sort_field_doc_new(reverse);
|
2908
2905
|
} else {
|
2909
2906
|
sf = frt_sort_field_auto_new(rb_intern(s), reverse);
|
@@ -2942,7 +2939,7 @@ static void frb_sort_add(FrtSort *sort, VALUE rsf, bool reverse) {
|
|
2942
2939
|
#define GET_SORT() FrtSort *sort = (FrtSort *)DATA_PTR(self)
|
2943
2940
|
/*
|
2944
2941
|
* call-seq:
|
2945
|
-
* Sort.new(sort_fields = [SortField::SCORE, SortField::
|
2942
|
+
* Sort.new(sort_fields = [SortField::SCORE, SortField::DOC_NUM], reverse = false) -> Sort
|
2946
2943
|
*
|
2947
2944
|
* Create a new Sort object. If +reverse+ is true, all sort_fields will be
|
2948
2945
|
* reversed so if any of them are already reversed the will be turned back
|
@@ -3064,16 +3061,32 @@ static VALUE frb_sea_doc_freq(VALUE self, VALUE rfield, VALUE rterm) {
|
|
3064
3061
|
|
3065
3062
|
/*
|
3066
3063
|
* call-seq:
|
3067
|
-
* searcher.get_document(
|
3068
|
-
* searcher[
|
3064
|
+
* searcher.get_document(doc_num) -> LazyDoc
|
3065
|
+
* searcher[doc_num] -> LazyDoc
|
3069
3066
|
*
|
3070
3067
|
* Retrieve a document from the index. See LazyDoc for more details on the
|
3071
3068
|
* document returned. Documents are referenced internally by document ids
|
3072
3069
|
* which are returned by the Searchers search methods.
|
3073
3070
|
*/
|
3074
|
-
static VALUE frb_sea_doc(VALUE self, VALUE
|
3071
|
+
static VALUE frb_sea_doc(VALUE self, VALUE rdoc_num) {
|
3072
|
+
int ex_code = 0;
|
3073
|
+
const char *msg = NULL;
|
3075
3074
|
GET_SEA();
|
3076
|
-
|
3075
|
+
VALUE ld = Qnil;
|
3076
|
+
|
3077
|
+
FRT_TRY
|
3078
|
+
ld = frb_get_lazy_doc(sea->get_lazy_doc(sea, FIX2INT(rdoc_num)));
|
3079
|
+
FRT_XCATCHALL
|
3080
|
+
ex_code = xcontext.excode;
|
3081
|
+
msg = xcontext.msg;
|
3082
|
+
FRT_HANDLED();
|
3083
|
+
FRT_XENDTRY
|
3084
|
+
|
3085
|
+
if (ex_code && msg) {
|
3086
|
+
frb_raise(ex_code, msg);
|
3087
|
+
}
|
3088
|
+
|
3089
|
+
return ld;
|
3077
3090
|
}
|
3078
3091
|
|
3079
3092
|
/*
|
@@ -3085,13 +3098,13 @@ static VALUE frb_sea_doc(VALUE self, VALUE rdoc_id) {
|
|
3085
3098
|
* there are no deletions, this number also refers to the number of documents
|
3086
3099
|
* in the index.
|
3087
3100
|
*/
|
3088
|
-
static VALUE
|
3101
|
+
static VALUE frb_sea_max_doc_num(VALUE self) {
|
3089
3102
|
GET_SEA();
|
3090
|
-
return INT2FIX(sea->
|
3103
|
+
return INT2FIX(sea->max_doc_num(sea));
|
3091
3104
|
}
|
3092
3105
|
|
3093
|
-
static float call_filter_proc(int
|
3094
|
-
VALUE val = rb_funcall((VALUE)arg, id_call, 3, INT2FIX(
|
3106
|
+
static float call_filter_proc(int doc_num, float score, FrtSearcher *sea, void *arg) {
|
3107
|
+
VALUE val = rb_funcall((VALUE)arg, id_call, 3, INT2FIX(doc_num), rb_float_new((double)score), sea->rsea);
|
3095
3108
|
switch (TYPE(val)) {
|
3096
3109
|
case T_NIL:
|
3097
3110
|
case T_FALSE:
|
@@ -3192,8 +3205,7 @@ static FrtTopDocs *frb_sea_search_internal(FrtQuery *query, VALUE roptions, FrtS
|
|
3192
3205
|
post_filter_holder.filter_func = &call_filter_proc;
|
3193
3206
|
post_filter_holder.arg = (void *)rval;
|
3194
3207
|
post_filter = &post_filter_holder;
|
3195
|
-
}
|
3196
|
-
else {
|
3208
|
+
} else {
|
3197
3209
|
post_filter = DATA_PTR(rval);
|
3198
3210
|
}
|
3199
3211
|
}
|
@@ -3249,7 +3261,7 @@ static FrtTopDocs *frb_sea_search_internal(FrtQuery *query, VALUE roptions, FrtS
|
|
3249
3261
|
* to specify a fields type to sort it correctly. For more
|
3250
3262
|
* on this, see the documentation for SortField
|
3251
3263
|
* :filter:: a Filter object to filter the search results with
|
3252
|
-
* :filter_proc:: a filter Proc is a Proc which takes the
|
3264
|
+
* :filter_proc:: a filter Proc is a Proc which takes the doc_num, the score
|
3253
3265
|
* and the Searcher object as its parameters and returns
|
3254
3266
|
* either a Boolean value specifying whether the result
|
3255
3267
|
* should be included in the result set, or a Float between 0
|
@@ -3268,13 +3280,13 @@ static VALUE frb_sea_search(int argc, VALUE *argv, VALUE self) {
|
|
3268
3280
|
|
3269
3281
|
/*
|
3270
3282
|
* call-seq:
|
3271
|
-
* searcher.search_each(query, options = {}) {|
|
3283
|
+
* searcher.search_each(query, options = {}) {|doc_num, score| do_something}
|
3272
3284
|
* -> total_hits
|
3273
3285
|
*
|
3274
3286
|
* Run a query through the Searcher on the index. A TopDocs object is
|
3275
3287
|
* returned with the relevant results. The +query+ is a Query object. The
|
3276
3288
|
* Searcher#search_each method yields the internal document id (used to
|
3277
|
-
* reference documents in the Searcher object like this; +searcher[
|
3289
|
+
* reference documents in the Searcher object like this; +searcher[doc_num]+)
|
3278
3290
|
* and the search score for that document. It is possible for the score to be
|
3279
3291
|
* greater than 1.0 for some queries and taking boosts into account. This
|
3280
3292
|
* method will also normalize scores to the range 0.0..1.0 when the max-score
|
@@ -3302,7 +3314,7 @@ static VALUE frb_sea_search(int argc, VALUE *argv, VALUE self) {
|
|
3302
3314
|
* to specify a fields type to sort it correctly. For more
|
3303
3315
|
* on this, see the documentation for SortField
|
3304
3316
|
* :filter:: a Filter object to filter the search results with
|
3305
|
-
* :filter_proc:: a filter Proc is a Proc which takes the
|
3317
|
+
* :filter_proc:: a filter Proc is a Proc which takes the doc_num, the score
|
3306
3318
|
* and the Searcher object as its parameters and returns a
|
3307
3319
|
* Boolean value specifying whether the result should be
|
3308
3320
|
* included in the result set.
|
@@ -3323,7 +3335,7 @@ static VALUE frb_sea_search_each(int argc, VALUE *argv, VALUE self) {
|
|
3323
3335
|
|
3324
3336
|
/* yield normalized scores */
|
3325
3337
|
for (i = 0; i < td->size; i++) {
|
3326
|
-
rb_yield_values(2, INT2FIX(td->hits[i]->
|
3338
|
+
rb_yield_values(2, INT2FIX(td->hits[i]->doc_num), rb_float_new((double)(td->hits[i]->score/max_score)));
|
3327
3339
|
}
|
3328
3340
|
|
3329
3341
|
rtotal_hits = INT2FIX(td->total_hits);
|
@@ -3396,11 +3408,9 @@ static VALUE frb_sea_scan(int argc, VALUE *argv, VALUE self) {
|
|
3396
3408
|
if (limit <= 0) {
|
3397
3409
|
rb_raise(rb_eArgError, ":limit must be > 0");
|
3398
3410
|
}
|
3399
|
-
}
|
3400
|
-
else if (rval == sym_all) {
|
3411
|
+
} else if (rval == sym_all) {
|
3401
3412
|
limit = INT_MAX;
|
3402
|
-
}
|
3403
|
-
else {
|
3413
|
+
} else {
|
3404
3414
|
rb_raise(rb_eArgError, "%s is not a sensible :limit value "
|
3405
3415
|
"Please use a positive integer or :all",
|
3406
3416
|
rs2s(rb_obj_as_string(rval)));
|
@@ -3420,14 +3430,14 @@ static VALUE frb_sea_scan(int argc, VALUE *argv, VALUE self) {
|
|
3420
3430
|
|
3421
3431
|
/*
|
3422
3432
|
* call-seq:
|
3423
|
-
* searcher.explain(query,
|
3433
|
+
* searcher.explain(query, doc_num) -> Explanation
|
3424
3434
|
*
|
3425
3435
|
* Create an explanation object to explain the score returned for a
|
3426
|
-
* particular document at +
|
3436
|
+
* particular document at +doc_num+ in the index for the query +query+.
|
3427
3437
|
*
|
3428
3438
|
* Usually used like this;
|
3429
3439
|
*
|
3430
|
-
* puts searcher.explain(query,
|
3440
|
+
* puts searcher.explain(query, doc_num).to_s
|
3431
3441
|
*/
|
3432
3442
|
|
3433
3443
|
static size_t frb_explanation_size(const void *p) {
|
@@ -3458,17 +3468,17 @@ static VALUE frb_expl_alloc(VALUE rclass) {
|
|
3458
3468
|
return TypedData_Wrap_Struct(rclass, &frb_explanation_t, e);
|
3459
3469
|
}
|
3460
3470
|
|
3461
|
-
static VALUE frb_sea_explain(VALUE self, VALUE rquery, VALUE
|
3471
|
+
static VALUE frb_sea_explain(VALUE self, VALUE rquery, VALUE rdoc_num) {
|
3462
3472
|
GET_SEA();
|
3463
3473
|
FrtQuery *query = DATA_PTR(rquery);
|
3464
3474
|
FrtExplanation *expl;
|
3465
|
-
expl = sea->explain(sea, query, FIX2INT(
|
3475
|
+
expl = sea->explain(sea, query, FIX2INT(rdoc_num));
|
3466
3476
|
return TypedData_Wrap_Struct(cExplanation, &frb_explanation_t, expl);
|
3467
3477
|
}
|
3468
3478
|
|
3469
3479
|
/*
|
3470
3480
|
* call-seq:
|
3471
|
-
* searcher.highlight(query,
|
3481
|
+
* searcher.highlight(query, doc_num, field, options = {}) -> Array
|
3472
3482
|
*
|
3473
3483
|
* Returns an array of strings with the matches highlighted.
|
3474
3484
|
*
|
@@ -3490,7 +3500,7 @@ static VALUE frb_sea_explain(VALUE self, VALUE rquery, VALUE rdoc_id) {
|
|
3490
3500
|
*/
|
3491
3501
|
static VALUE frb_sea_highlight(int argc, VALUE *argv, VALUE self) {
|
3492
3502
|
GET_SEA();
|
3493
|
-
VALUE rquery,
|
3503
|
+
VALUE rquery, rdoc_num, rfield, roptions, v;
|
3494
3504
|
int excerpt_length = 150;
|
3495
3505
|
int num_excerpts = 2;
|
3496
3506
|
const char *pre_tag = "<b>";
|
@@ -3498,7 +3508,7 @@ static VALUE frb_sea_highlight(int argc, VALUE *argv, VALUE self) {
|
|
3498
3508
|
const char *ellipsis = "...";
|
3499
3509
|
char **excerpts;
|
3500
3510
|
|
3501
|
-
rb_scan_args(argc, argv, "31", &rquery, &
|
3511
|
+
rb_scan_args(argc, argv, "31", &rquery, &rdoc_num, &rfield, &roptions);
|
3502
3512
|
FrtQuery *query = DATA_PTR(rquery);
|
3503
3513
|
if (argc > 3) {
|
3504
3514
|
if (TYPE(roptions) != T_HASH) {
|
@@ -3511,8 +3521,7 @@ static VALUE frb_sea_highlight(int argc, VALUE *argv, VALUE self) {
|
|
3511
3521
|
if (v == sym_all) {
|
3512
3522
|
num_excerpts = 1;
|
3513
3523
|
excerpt_length = INT_MAX/2;
|
3514
|
-
}
|
3515
|
-
else {
|
3524
|
+
} else {
|
3516
3525
|
excerpt_length = FIX2INT(v);
|
3517
3526
|
}
|
3518
3527
|
}
|
@@ -3529,7 +3538,7 @@ static VALUE frb_sea_highlight(int argc, VALUE *argv, VALUE self) {
|
|
3529
3538
|
|
3530
3539
|
if ((excerpts = frt_searcher_highlight(sea,
|
3531
3540
|
query,
|
3532
|
-
FIX2INT(
|
3541
|
+
FIX2INT(rdoc_num),
|
3533
3542
|
frb_field(rfield),
|
3534
3543
|
excerpt_length,
|
3535
3544
|
num_excerpts,
|
@@ -3605,6 +3614,7 @@ static VALUE frb_sea_init(VALUE self, VALUE obj) {
|
|
3605
3614
|
if (TYPE(obj) == T_STRING) {
|
3606
3615
|
frb_create_dir(obj);
|
3607
3616
|
store = frt_open_mdbx_store(rs2s(obj));
|
3617
|
+
store->create_folder(store, segm_idx_name);
|
3608
3618
|
ir = frt_ir_open(NULL, store);
|
3609
3619
|
ir->rir = TypedData_Wrap_Struct(cIndexReader, &frb_index_reader_t, ir);
|
3610
3620
|
} else {
|
@@ -3859,7 +3869,7 @@ static void Init_TopDocs(void) {
|
|
3859
3869
|
*
|
3860
3870
|
* == Example
|
3861
3871
|
*
|
3862
|
-
* puts searcher.explain(query,
|
3872
|
+
* puts searcher.explain(query, doc_num).to_s
|
3863
3873
|
*/
|
3864
3874
|
static void Init_Explanation(void) {
|
3865
3875
|
cExplanation = rb_define_class_under(mSearch, "Explanation", rb_cObject);
|
@@ -4738,7 +4748,7 @@ static void Init_Filter(void) {
|
|
4738
4748
|
* * :float
|
4739
4749
|
* * :string
|
4740
4750
|
* * :byte
|
4741
|
-
* * :
|
4751
|
+
* * :doc_num
|
4742
4752
|
* * :score
|
4743
4753
|
*
|
4744
4754
|
* The type of the SortField is set by passing it as a parameter to the
|
@@ -4774,7 +4784,7 @@ static void Init_SortField(void) {
|
|
4774
4784
|
sym_float = ID2SYM(rb_intern("float"));
|
4775
4785
|
sym_string = ID2SYM(rb_intern("string"));
|
4776
4786
|
sym_auto = ID2SYM(rb_intern("auto"));
|
4777
|
-
|
4787
|
+
sym_doc_num = ID2SYM(rb_intern("doc_num"));
|
4778
4788
|
sym_score = ID2SYM(rb_intern("score"));
|
4779
4789
|
sym_byte = ID2SYM(rb_intern("byte"));
|
4780
4790
|
|
@@ -4794,12 +4804,12 @@ static void Init_SortField(void) {
|
|
4794
4804
|
rb_define_const(cSortField, "SCORE_REV", TypedData_Wrap_Struct(cSortField, &frb_sort_field_t, FRT_SORT_FIELD_SCORE_REV));
|
4795
4805
|
FRT_SORT_FIELD_SCORE_REV->rfield = rb_const_get(cSortField, rb_intern("SCORE_REV"));
|
4796
4806
|
|
4797
|
-
rb_define_const(cSortField, "
|
4798
|
-
oSORT_FIELD_DOC = rb_const_get(cSortField, rb_intern("
|
4807
|
+
rb_define_const(cSortField, "DOC_NUM", TypedData_Wrap_Struct(cSortField, &frb_sort_field_t, FRT_SORT_FIELD_DOC));
|
4808
|
+
oSORT_FIELD_DOC = rb_const_get(cSortField, rb_intern("DOC_NUM"));
|
4799
4809
|
FRT_SORT_FIELD_DOC->rfield = oSORT_FIELD_DOC;
|
4800
4810
|
|
4801
|
-
rb_define_const(cSortField, "
|
4802
|
-
FRT_SORT_FIELD_DOC_REV->rfield = rb_const_get(cSortField, rb_intern("
|
4811
|
+
rb_define_const(cSortField, "DOC_NUM_REV", TypedData_Wrap_Struct(cSortField, &frb_sort_field_t, FRT_SORT_FIELD_DOC_REV));
|
4812
|
+
FRT_SORT_FIELD_DOC_REV->rfield = rb_const_get(cSortField, rb_intern("DOC_NUM_REV"));
|
4803
4813
|
}
|
4804
4814
|
|
4805
4815
|
/*
|
@@ -4861,8 +4871,8 @@ static void Init_Sort(void) {
|
|
4861
4871
|
*
|
4862
4872
|
* searcher.search_each(TermQuery.new(:content, "ferret")
|
4863
4873
|
* :filter => RangeFilter.new(:date, :< => "2006"),
|
4864
|
-
* :sort => "date DESC, title") do |
|
4865
|
-
* puts "#{searcher[
|
4874
|
+
* :sort => "date DESC, title") do |doc_num, score|
|
4875
|
+
* puts "#{searcher[doc_num][title] scored #{score}"
|
4866
4876
|
* end
|
4867
4877
|
*/
|
4868
4878
|
static void Init_Searcher(void) {
|
@@ -4892,7 +4902,7 @@ static void Init_Searcher(void) {
|
|
4892
4902
|
rb_define_method(cSearcher, "doc_freq", frb_sea_doc_freq, 2);
|
4893
4903
|
rb_define_method(cSearcher, "get_document", frb_sea_doc, 1);
|
4894
4904
|
rb_define_method(cSearcher, "[]", frb_sea_doc, 1);
|
4895
|
-
rb_define_method(cSearcher, "
|
4905
|
+
rb_define_method(cSearcher, "max_doc_num", frb_sea_max_doc_num, 0);
|
4896
4906
|
rb_define_method(cSearcher, "search", frb_sea_search, -1);
|
4897
4907
|
rb_define_method(cSearcher, "search_each", frb_sea_search_each, -1);
|
4898
4908
|
rb_define_method(cSearcher, "scan", frb_sea_scan, -1);
|
@@ -4934,9 +4944,7 @@ static void Init_MultiSearcher(void) {
|
|
4934
4944
|
*
|
4935
4945
|
* Happy Ferreting!!
|
4936
4946
|
*/
|
4937
|
-
void
|
4938
|
-
Init_Search(void)
|
4939
|
-
{
|
4947
|
+
void Init_Search(void) {
|
4940
4948
|
mSearch = rb_define_module_under(mFerret, "Search");
|
4941
4949
|
|
4942
4950
|
fsym_id = rb_intern("id");
|