isomorfeus-ferret 0.12.7 → 0.13.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/LICENSE +101 -19
- data/README.md +85 -13
- data/ext/isomorfeus_ferret_ext/bm_bitvector.c +22 -30
- data/ext/isomorfeus_ferret_ext/bm_hash.c +6 -12
- data/ext/isomorfeus_ferret_ext/bm_micro_string.c +3 -6
- data/ext/isomorfeus_ferret_ext/bm_store.c +11 -22
- data/ext/isomorfeus_ferret_ext/brotli_common_dictionary.c +1 -1
- data/ext/isomorfeus_ferret_ext/brotli_dec_decode.c +1 -1
- data/ext/isomorfeus_ferret_ext/bzlib.c +1572 -0
- data/ext/isomorfeus_ferret_ext/bzlib.h +282 -0
- data/ext/isomorfeus_ferret_ext/bzlib_blocksort.c +1094 -0
- data/ext/isomorfeus_ferret_ext/bzlib_compress.c +672 -0
- data/ext/isomorfeus_ferret_ext/bzlib_crctable.c +104 -0
- data/ext/isomorfeus_ferret_ext/bzlib_decompress.c +652 -0
- data/ext/isomorfeus_ferret_ext/bzlib_huffman.c +205 -0
- data/ext/isomorfeus_ferret_ext/bzlib_private.h +509 -0
- data/ext/isomorfeus_ferret_ext/bzlib_randtable.c +84 -0
- data/ext/isomorfeus_ferret_ext/fio_tmpfile.h +53 -53
- data/ext/isomorfeus_ferret_ext/frb_analysis.c +785 -1192
- data/ext/isomorfeus_ferret_ext/frb_index.c +497 -495
- data/ext/isomorfeus_ferret_ext/frb_qparser.c +48 -60
- data/ext/isomorfeus_ferret_ext/frb_search.c +1520 -1002
- data/ext/isomorfeus_ferret_ext/frb_store.c +96 -96
- data/ext/isomorfeus_ferret_ext/frb_threading.h +0 -1
- data/ext/isomorfeus_ferret_ext/frb_utils.c +147 -196
- data/ext/isomorfeus_ferret_ext/frt_analysis.c +695 -1090
- data/ext/isomorfeus_ferret_ext/frt_analysis.h +174 -170
- data/ext/isomorfeus_ferret_ext/frt_array.c +2 -4
- data/ext/isomorfeus_ferret_ext/frt_bitvector.c +9 -16
- data/ext/isomorfeus_ferret_ext/frt_bitvector.h +32 -81
- data/ext/isomorfeus_ferret_ext/frt_document.c +15 -20
- data/ext/isomorfeus_ferret_ext/frt_document.h +10 -10
- data/ext/isomorfeus_ferret_ext/frt_except.c +5 -12
- data/ext/isomorfeus_ferret_ext/frt_field_index.c +3 -3
- data/ext/isomorfeus_ferret_ext/frt_field_index.h +6 -7
- data/ext/isomorfeus_ferret_ext/frt_filter.c +35 -46
- data/ext/isomorfeus_ferret_ext/frt_fs_store.c +1 -0
- data/ext/isomorfeus_ferret_ext/frt_global.c +91 -200
- data/ext/isomorfeus_ferret_ext/frt_global.h +7 -18
- data/ext/isomorfeus_ferret_ext/frt_hash.c +1 -2
- data/ext/isomorfeus_ferret_ext/frt_ind.c +32 -35
- data/ext/isomorfeus_ferret_ext/frt_ind.h +9 -9
- data/ext/isomorfeus_ferret_ext/frt_index.c +603 -410
- data/ext/isomorfeus_ferret_ext/frt_index.h +272 -291
- data/ext/isomorfeus_ferret_ext/frt_lang.c +0 -2
- data/ext/isomorfeus_ferret_ext/frt_mempool.c +1 -2
- data/ext/isomorfeus_ferret_ext/frt_multimapper.c +4 -7
- data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +68 -91
- data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +35 -38
- data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +53 -72
- data/ext/isomorfeus_ferret_ext/frt_q_fuzzy.c +25 -32
- data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +21 -23
- data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +66 -103
- data/ext/isomorfeus_ferret_ext/frt_q_parser.c +207 -195
- data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +20 -16
- data/ext/isomorfeus_ferret_ext/frt_q_prefix.c +17 -14
- data/ext/isomorfeus_ferret_ext/frt_q_range.c +102 -131
- data/ext/isomorfeus_ferret_ext/frt_q_span.c +179 -178
- data/ext/isomorfeus_ferret_ext/frt_q_term.c +47 -60
- data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +18 -16
- data/ext/isomorfeus_ferret_ext/frt_ram_store.c +45 -84
- data/ext/isomorfeus_ferret_ext/frt_search.c +105 -146
- data/ext/isomorfeus_ferret_ext/frt_search.h +331 -320
- data/ext/isomorfeus_ferret_ext/frt_similarity.c +5 -13
- data/ext/isomorfeus_ferret_ext/frt_similarity.h +7 -12
- data/ext/isomorfeus_ferret_ext/frt_sort.c +105 -149
- data/ext/isomorfeus_ferret_ext/frt_store.c +13 -7
- data/ext/isomorfeus_ferret_ext/frt_store.h +10 -2
- data/ext/isomorfeus_ferret_ext/frt_threading.h +0 -1
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +22 -112
- data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +2 -32
- data/ext/isomorfeus_ferret_ext/lz4.c +2495 -0
- data/ext/isomorfeus_ferret_ext/lz4.h +774 -0
- data/ext/isomorfeus_ferret_ext/lz4frame.c +1899 -0
- data/ext/isomorfeus_ferret_ext/lz4frame.h +623 -0
- data/ext/isomorfeus_ferret_ext/lz4hc.c +1615 -0
- data/ext/isomorfeus_ferret_ext/lz4hc.h +413 -0
- data/ext/isomorfeus_ferret_ext/lz4xxhash.c +1030 -0
- data/ext/isomorfeus_ferret_ext/lz4xxhash.h +328 -0
- data/ext/isomorfeus_ferret_ext/stem_modules.h +0 -86
- data/ext/isomorfeus_ferret_ext/test.c +0 -17
- data/ext/isomorfeus_ferret_ext/test_1710.c +11 -12
- data/ext/isomorfeus_ferret_ext/test_analysis.c +590 -583
- data/ext/isomorfeus_ferret_ext/test_compound_io.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_document.c +19 -15
- data/ext/isomorfeus_ferret_ext/test_except.c +1 -2
- data/ext/isomorfeus_ferret_ext/test_fields.c +59 -60
- data/ext/isomorfeus_ferret_ext/test_file_deleter.c +10 -27
- data/ext/isomorfeus_ferret_ext/test_filter.c +11 -8
- data/ext/isomorfeus_ferret_ext/test_global.c +0 -46
- data/ext/isomorfeus_ferret_ext/test_hash.c +2 -2
- data/ext/isomorfeus_ferret_ext/test_hashset.c +1 -1
- data/ext/isomorfeus_ferret_ext/test_highlighter.c +15 -11
- data/ext/isomorfeus_ferret_ext/test_index.c +372 -365
- data/ext/isomorfeus_ferret_ext/test_q_const_score.c +5 -3
- data/ext/isomorfeus_ferret_ext/test_q_filtered.c +5 -3
- data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +13 -10
- data/ext/isomorfeus_ferret_ext/test_q_parser.c +45 -7
- data/ext/isomorfeus_ferret_ext/test_q_span.c +15 -12
- data/ext/isomorfeus_ferret_ext/test_ram_store.c +3 -3
- data/ext/isomorfeus_ferret_ext/test_search.c +60 -64
- data/ext/isomorfeus_ferret_ext/test_segments.c +5 -4
- data/ext/isomorfeus_ferret_ext/test_sort.c +17 -14
- data/ext/isomorfeus_ferret_ext/test_store.c +2 -0
- data/ext/isomorfeus_ferret_ext/test_term.c +3 -1
- data/ext/isomorfeus_ferret_ext/test_term_vectors.c +9 -10
- data/ext/isomorfeus_ferret_ext/test_test.c +1 -2
- data/ext/isomorfeus_ferret_ext/test_threading.c +9 -10
- data/ext/isomorfeus_ferret_ext/testhelper.c +1 -2
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +27 -57
- data/ext/isomorfeus_ferret_ext/email.rl +0 -21
- data/ext/isomorfeus_ferret_ext/frt_scanner.c +0 -900
- data/ext/isomorfeus_ferret_ext/frt_scanner.h +0 -28
- data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +0 -6706
- data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +0 -4420
- data/ext/isomorfeus_ferret_ext/scanner.h +0 -28
- data/ext/isomorfeus_ferret_ext/scanner.in +0 -43
- data/ext/isomorfeus_ferret_ext/scanner.rl +0 -84
- data/ext/isomorfeus_ferret_ext/scanner_mb.rl +0 -200
- data/ext/isomorfeus_ferret_ext/scanner_utf8.rl +0 -85
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.c +0 -1167
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_basque.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.c +0 -1433
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_catalan.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +0 -301
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +0 -590
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +0 -1049
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +0 -705
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +0 -1239
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +0 -477
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +0 -1217
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.h +0 -7
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.c +0 -394
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_indonesian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.c +0 -457
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_irish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +0 -1009
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +0 -259
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +0 -704
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +0 -948
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +0 -1028
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +0 -275
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.c +0 -849
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_hungarian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +0 -952
- data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +0 -669
- data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +0 -6
- data/ext/isomorfeus_ferret_ext/stem_modules.txt +0 -63
- data/ext/isomorfeus_ferret_ext/uchar-ucs4.rl +0 -1854
- data/ext/isomorfeus_ferret_ext/uchar-utf8.rl +0 -1999
- data/ext/isomorfeus_ferret_ext/url.rl +0 -27
@@ -3,6 +3,8 @@
|
|
3
3
|
#include "frt_search.h"
|
4
4
|
#include "frt_array.h"
|
5
5
|
|
6
|
+
#undef close
|
7
|
+
|
6
8
|
/***************************************************************************
|
7
9
|
*
|
8
10
|
* FrtExplanation - Used to give details for query scores
|
@@ -305,47 +307,39 @@ static FrtQuery *q_rewrite(FrtQuery *self, FrtIndexReader *ir)
|
|
305
307
|
return self;
|
306
308
|
}
|
307
309
|
|
308
|
-
static void q_extract_terms(FrtQuery *self, FrtHashSet *terms)
|
309
|
-
{
|
310
|
+
static void q_extract_terms(FrtQuery *self, FrtHashSet *terms) {
|
310
311
|
/* do nothing by default */
|
311
312
|
(void)self;
|
312
313
|
(void)terms;
|
313
314
|
}
|
314
315
|
|
315
|
-
FrtSimilarity *frt_q_get_similarity_i(FrtQuery *self, FrtSearcher *searcher)
|
316
|
-
{
|
316
|
+
FrtSimilarity *frt_q_get_similarity_i(FrtQuery *self, FrtSearcher *searcher) {
|
317
317
|
(void)self;
|
318
318
|
return searcher->get_similarity(searcher);
|
319
319
|
}
|
320
320
|
|
321
|
-
void frt_q_destroy_i(FrtQuery *self)
|
322
|
-
{
|
321
|
+
void frt_q_destroy_i(FrtQuery *self) {
|
323
322
|
free(self);
|
324
323
|
}
|
325
324
|
|
326
|
-
void frt_q_deref(FrtQuery *self)
|
327
|
-
|
328
|
-
if (--(self->ref_cnt) == 0) {
|
325
|
+
void frt_q_deref(FrtQuery *self) {
|
326
|
+
if (--(self->ref_cnt) == 0)
|
329
327
|
self->destroy_i(self);
|
330
|
-
}
|
331
328
|
}
|
332
329
|
|
333
|
-
FrtWeight *frt_q_create_weight_unsup(FrtQuery *self, FrtSearcher *searcher)
|
334
|
-
{
|
330
|
+
FrtWeight *frt_q_create_weight_unsup(FrtQuery *self, FrtSearcher *searcher) {
|
335
331
|
(void)self;
|
336
332
|
(void)searcher;
|
337
|
-
FRT_RAISE(FRT_UNSUPPORTED_ERROR,
|
338
|
-
"Create weight is unsupported for this type of query");
|
333
|
+
FRT_RAISE(FRT_UNSUPPORTED_ERROR, "Create weight is unsupported for this type of query");
|
339
334
|
return NULL;
|
340
335
|
}
|
341
336
|
|
342
|
-
FrtWeight *frt_q_weight(FrtQuery *self, FrtSearcher *searcher)
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
float norm = frt_sim_query_norm(sim, sum);
|
337
|
+
FrtWeight *frt_q_weight(FrtQuery *self, FrtSearcher *searcher) {
|
338
|
+
FrtQuery *query = searcher->rewrite(searcher, self);
|
339
|
+
FrtWeight *weight = query->create_weight_i(query, searcher);
|
340
|
+
float sum = weight->sum_of_squared_weights(weight);
|
341
|
+
FrtSimilarity *sim = query->get_similarity(query, searcher);
|
342
|
+
float norm = frt_sim_query_norm(sim, sum);
|
349
343
|
frt_q_deref(query);
|
350
344
|
|
351
345
|
weight->normalize(weight, norm);
|
@@ -353,8 +347,8 @@ FrtWeight *frt_q_weight(FrtQuery *self, FrtSearcher *searcher)
|
|
353
347
|
}
|
354
348
|
|
355
349
|
#define BQ(query) ((FrtBooleanQuery *)(query))
|
356
|
-
|
357
|
-
{
|
350
|
+
|
351
|
+
FrtQuery *frt_q_combine(FrtQuery **queries, int q_cnt) {
|
358
352
|
int i;
|
359
353
|
FrtQuery *q, *ret_q;
|
360
354
|
FrtHashSet *uniques = frt_hs_new((frt_hash_ft)&frt_q_hash, (frt_eq_ft)&frt_q_eq, NULL);
|
@@ -402,28 +396,24 @@ FrtQuery *frt_q_combine(FrtQuery **queries, int q_cnt)
|
|
402
396
|
return ret_q;
|
403
397
|
}
|
404
398
|
|
405
|
-
unsigned long long frt_q_hash(FrtQuery *self)
|
406
|
-
{
|
399
|
+
unsigned long long frt_q_hash(FrtQuery *self) {
|
407
400
|
return (self->hash(self) << 5) | self->type;
|
408
401
|
}
|
409
402
|
|
410
|
-
int frt_q_eq(FrtQuery *self, FrtQuery *o)
|
411
|
-
{
|
403
|
+
int frt_q_eq(FrtQuery *self, FrtQuery *o) {
|
412
404
|
return (self == o)
|
413
405
|
|| ((self->type == o->type)
|
414
406
|
&& (self->boost == o->boost)
|
415
407
|
&& self->eq(self, o));
|
416
408
|
}
|
417
409
|
|
418
|
-
static FrtMatchVector *q_get_matchv_i(FrtQuery *self, FrtMatchVector *mv, FrtTermVector *tv)
|
419
|
-
{
|
410
|
+
static FrtMatchVector *q_get_matchv_i(FrtQuery *self, FrtMatchVector *mv, FrtTermVector *tv) {
|
420
411
|
/* be default we don't add any matches */
|
421
412
|
(void)self; (void)tv;
|
422
413
|
return mv;
|
423
414
|
}
|
424
415
|
|
425
|
-
FrtQuery *frt_q_create(size_t size)
|
426
|
-
{
|
416
|
+
FrtQuery *frt_q_create(size_t size) {
|
427
417
|
FrtQuery *self = (FrtQuery *)frt_ecalloc(size);
|
428
418
|
#ifdef DEBUG
|
429
419
|
if (size < sizeof(FrtQuery)) {
|
@@ -431,13 +421,14 @@ FrtQuery *frt_q_create(size_t size)
|
|
431
421
|
"the size of a Query struct <%d>", (int)size, (int)sizeof(FrtQuery));
|
432
422
|
}
|
433
423
|
#endif
|
434
|
-
self->boost
|
435
|
-
self->rewrite
|
436
|
-
self->get_similarity
|
437
|
-
self->extract_terms
|
438
|
-
self->get_matchv_i
|
439
|
-
self->weight
|
440
|
-
self->ref_cnt
|
424
|
+
self->boost = 1.0f;
|
425
|
+
self->rewrite = &q_rewrite;
|
426
|
+
self->get_similarity = &frt_q_get_similarity_i;
|
427
|
+
self->extract_terms = &q_extract_terms;
|
428
|
+
self->get_matchv_i = &q_get_matchv_i;
|
429
|
+
self->weight = NULL;
|
430
|
+
self->ref_cnt = 1;
|
431
|
+
self->rquery = Qnil;
|
441
432
|
return self;
|
442
433
|
}
|
443
434
|
|
@@ -447,14 +438,12 @@ FrtQuery *frt_q_create(size_t size)
|
|
447
438
|
*
|
448
439
|
***************************************************************************/
|
449
440
|
|
450
|
-
void frt_scorer_destroy_i(FrtScorer *scorer)
|
451
|
-
{
|
441
|
+
void frt_scorer_destroy_i(FrtScorer *scorer) {
|
452
442
|
free(scorer);
|
453
443
|
}
|
454
444
|
|
455
|
-
FrtScorer *frt_scorer_create(size_t size, FrtSimilarity *similarity)
|
456
|
-
|
457
|
-
FrtScorer *self = (FrtScorer *)frt_ecalloc(size);
|
445
|
+
FrtScorer *frt_scorer_create(size_t size, FrtSimilarity *similarity) {
|
446
|
+
FrtScorer *self = (FrtScorer *)frt_ecalloc(size);
|
458
447
|
#ifdef DEBUG
|
459
448
|
if (size < sizeof(FrtScorer)) {
|
460
449
|
FRT_RAISE(FRT_ARG_ERROR, "size of scorer <%d> should be at least <%d>",
|
@@ -466,13 +455,11 @@ FrtScorer *frt_scorer_create(size_t size, FrtSimilarity *similarity)
|
|
466
455
|
return self;
|
467
456
|
}
|
468
457
|
|
469
|
-
bool frt_scorer_doc_less_than(const FrtScorer *s1, const FrtScorer *s2)
|
470
|
-
{
|
458
|
+
bool frt_scorer_doc_less_than(const FrtScorer *s1, const FrtScorer *s2) {
|
471
459
|
return s1->doc < s2->doc;
|
472
460
|
}
|
473
461
|
|
474
|
-
int frt_scorer_doc_cmp(const void *p1, const void *p2)
|
475
|
-
{
|
462
|
+
int frt_scorer_doc_cmp(const void *p1, const void *p2) {
|
476
463
|
return (*(FrtScorer **)p1)->doc - (*(FrtScorer **)p2)->doc;
|
477
464
|
}
|
478
465
|
|
@@ -497,8 +484,7 @@ static int match_range_cmp(const void *p1, const void *p2)
|
|
497
484
|
|
498
485
|
|
499
486
|
/* ** FrtMatchVector ** */
|
500
|
-
FrtMatchVector *frt_matchv_new()
|
501
|
-
{
|
487
|
+
FrtMatchVector *frt_matchv_new(void) {
|
502
488
|
FrtMatchVector *matchv = FRT_ALLOC(FrtMatchVector);
|
503
489
|
|
504
490
|
matchv->size = 0;
|
@@ -597,11 +583,7 @@ void frt_matchv_destroy(FrtMatchVector *self)
|
|
597
583
|
*
|
598
584
|
***************************************************************************/
|
599
585
|
|
600
|
-
FrtMatchVector *frt_searcher_get_match_vector(FrtSearcher *self,
|
601
|
-
FrtQuery *query,
|
602
|
-
const int doc_num,
|
603
|
-
FrtSymbol field)
|
604
|
-
{
|
586
|
+
FrtMatchVector *frt_searcher_get_match_vector(FrtSearcher *self, FrtQuery *query, const int doc_num, ID field) {
|
605
587
|
FrtMatchVector *mv = frt_matchv_new();
|
606
588
|
bool rewrite = query->get_matchv_i == q_get_matchv_i;
|
607
589
|
FrtTermVector *tv = self->get_term_vector(self, doc_num, field);
|
@@ -618,8 +600,7 @@ FrtMatchVector *frt_searcher_get_match_vector(FrtSearcher *self,
|
|
618
600
|
return mv;
|
619
601
|
}
|
620
602
|
|
621
|
-
typedef struct Excerpt
|
622
|
-
{
|
603
|
+
typedef struct Excerpt {
|
623
604
|
int start;
|
624
605
|
int end;
|
625
606
|
int start_pos;
|
@@ -812,13 +793,12 @@ static char *highlight_field(FrtMatchVector *mv,
|
|
812
793
|
char **frt_searcher_highlight(FrtSearcher *self,
|
813
794
|
FrtQuery *query,
|
814
795
|
const int doc_num,
|
815
|
-
|
796
|
+
ID field,
|
816
797
|
const int excerpt_len,
|
817
798
|
const int num_excerpts,
|
818
799
|
const char *pre_tag,
|
819
800
|
const char *post_tag,
|
820
|
-
const char *ellipsis)
|
821
|
-
{
|
801
|
+
const char *ellipsis) {
|
822
802
|
char **excerpt_strs = NULL;
|
823
803
|
FrtTermVector *tv = self->get_term_vector(self, doc_num, field);
|
824
804
|
FrtLazyDoc *lazy_doc = self->get_lazy_doc(self, doc_num);
|
@@ -970,8 +950,7 @@ static FrtSimilarity *sea_get_similarity(FrtSearcher *self)
|
|
970
950
|
|
971
951
|
#define ISEA(searcher) ((FrtIndexSearcher *)(searcher))
|
972
952
|
|
973
|
-
int frt_isea_doc_freq(FrtSearcher *self,
|
974
|
-
{
|
953
|
+
int frt_isea_doc_freq(FrtSearcher *self, ID field, const char *term) {
|
975
954
|
return frt_ir_doc_freq(ISEA(self)->ir, field, term);
|
976
955
|
}
|
977
956
|
|
@@ -1041,9 +1020,9 @@ static FrtTopDocs *isea_search_w(FrtSearcher *self,
|
|
1041
1020
|
}
|
1042
1021
|
} else {
|
1043
1022
|
hq = frt_pq_new(max_size, (frt_lt_ft)&hit_lt, &free);
|
1044
|
-
hq_pop = &hit_pq_pop;
|
1045
1023
|
hq_insert = &hit_pq_insert;
|
1046
1024
|
hq_destroy = &frt_pq_destroy;
|
1025
|
+
hq_pop = &hit_pq_pop;
|
1047
1026
|
}
|
1048
1027
|
|
1049
1028
|
scorer = weight->scorer(weight, ISEA(self)->ir);
|
@@ -1080,7 +1059,6 @@ static FrtTopDocs *isea_search_w(FrtSearcher *self,
|
|
1080
1059
|
} else {
|
1081
1060
|
num_docs = 0;
|
1082
1061
|
}
|
1083
|
-
frt_pq_clear(hq);
|
1084
1062
|
hq_destroy(hq);
|
1085
1063
|
return frt_td_new(total_hits, num_docs, score_docs, max_score);
|
1086
1064
|
}
|
@@ -1151,12 +1129,7 @@ static void isea_search_each(FrtSearcher *self, FrtQuery *query, FrtFilter *filt
|
|
1151
1129
|
* Note: Unlike the offset_docnum in other search methods, this offset_docnum
|
1152
1130
|
* refers to document number and not hit.
|
1153
1131
|
*/
|
1154
|
-
static int isea_search_unscored_w(FrtSearcher *self,
|
1155
|
-
FrtWeight *weight,
|
1156
|
-
int *buf,
|
1157
|
-
int limit,
|
1158
|
-
int offset_docnum)
|
1159
|
-
{
|
1132
|
+
static int isea_search_unscored_w(FrtSearcher *self, FrtWeight *weight, int *buf, int limit, int offset_docnum) {
|
1160
1133
|
int count = 0;
|
1161
1134
|
FrtScorer *scorer = weight->scorer(weight, ISEA(self)->ir);
|
1162
1135
|
if (scorer) {
|
@@ -1170,12 +1143,7 @@ static int isea_search_unscored_w(FrtSearcher *self,
|
|
1170
1143
|
return count;
|
1171
1144
|
}
|
1172
1145
|
|
1173
|
-
static int isea_search_unscored(FrtSearcher *self,
|
1174
|
-
FrtQuery *query,
|
1175
|
-
int *buf,
|
1176
|
-
int limit,
|
1177
|
-
int offset_docnum)
|
1178
|
-
{
|
1146
|
+
static int isea_search_unscored(FrtSearcher *self, FrtQuery *query, int *buf, int limit, int offset_docnum) {
|
1179
1147
|
int count;
|
1180
1148
|
FrtWeight *weight = frt_q_weight(query, self);
|
1181
1149
|
count = isea_search_unscored_w(self, weight, buf, limit, offset_docnum);
|
@@ -1183,8 +1151,7 @@ static int isea_search_unscored(FrtSearcher *self,
|
|
1183
1151
|
return count;
|
1184
1152
|
}
|
1185
1153
|
|
1186
|
-
static FrtQuery *isea_rewrite(FrtSearcher *self, FrtQuery *original)
|
1187
|
-
{
|
1154
|
+
static FrtQuery *isea_rewrite(FrtSearcher *self, FrtQuery *original) {
|
1188
1155
|
int q_is_destroyed = false;
|
1189
1156
|
FrtQuery *query = original;
|
1190
1157
|
FrtQuery *rewritten_query = query->rewrite(query, ISEA(self)->ir);
|
@@ -1197,41 +1164,34 @@ static FrtQuery *isea_rewrite(FrtSearcher *self, FrtQuery *original)
|
|
1197
1164
|
return query;
|
1198
1165
|
}
|
1199
1166
|
|
1200
|
-
static FrtExplanation *isea_explain(FrtSearcher *self,
|
1201
|
-
FrtQuery *query,
|
1202
|
-
int doc_num)
|
1203
|
-
{
|
1167
|
+
static FrtExplanation *isea_explain(FrtSearcher *self, FrtQuery *query, int doc_num) {
|
1204
1168
|
FrtWeight *weight = frt_q_weight(query, self);
|
1205
1169
|
FrtExplanation *e = weight->explain(weight, ISEA(self)->ir, doc_num);
|
1206
1170
|
weight->destroy(weight);
|
1207
1171
|
return e;
|
1208
1172
|
}
|
1209
1173
|
|
1210
|
-
static FrtExplanation *isea_explain_w(FrtSearcher *self, FrtWeight *w, int doc_num)
|
1211
|
-
{
|
1174
|
+
static FrtExplanation *isea_explain_w(FrtSearcher *self, FrtWeight *w, int doc_num) {
|
1212
1175
|
return w->explain(w, ISEA(self)->ir, doc_num);
|
1213
1176
|
}
|
1214
1177
|
|
1215
|
-
static FrtTermVector *isea_get_term_vector(FrtSearcher *self,
|
1216
|
-
const int doc_num,
|
1217
|
-
FrtSymbol field)
|
1218
|
-
{
|
1178
|
+
static FrtTermVector *isea_get_term_vector(FrtSearcher *self, const int doc_num, ID field) {
|
1219
1179
|
FrtIndexReader *ir = ISEA(self)->ir;
|
1220
1180
|
return ir->term_vector(ir, doc_num, field);
|
1221
1181
|
}
|
1222
1182
|
|
1223
|
-
static void isea_close(FrtSearcher *self)
|
1224
|
-
{
|
1183
|
+
static void isea_close(FrtSearcher *self) {
|
1225
1184
|
if (ISEA(self)->ir && ISEA(self)->close_ir) {
|
1226
1185
|
frt_ir_close(ISEA(self)->ir);
|
1227
1186
|
}
|
1228
1187
|
free(self);
|
1229
1188
|
}
|
1230
1189
|
|
1231
|
-
FrtSearcher *
|
1232
|
-
|
1233
|
-
|
1190
|
+
FrtSearcher *frt_isea_alloc(void) {
|
1191
|
+
return (FrtSearcher *)FRT_ALLOC(FrtIndexSearcher);
|
1192
|
+
}
|
1234
1193
|
|
1194
|
+
FrtSearcher *frt_isea_init(FrtSearcher *self, FrtIndexReader *ir) {
|
1235
1195
|
ISEA(self)->ir = ir;
|
1236
1196
|
ISEA(self)->close_ir = true;
|
1237
1197
|
|
@@ -1253,10 +1213,15 @@ FrtSearcher *frt_isea_new(FrtIndexReader *ir)
|
|
1253
1213
|
self->get_term_vector = &isea_get_term_vector;
|
1254
1214
|
self->get_similarity = &sea_get_similarity;
|
1255
1215
|
self->close = &isea_close;
|
1256
|
-
|
1216
|
+
self->rsea = Qnil;
|
1257
1217
|
return self;
|
1258
1218
|
}
|
1259
1219
|
|
1220
|
+
FrtSearcher *frt_isea_new(FrtIndexReader *ir) {
|
1221
|
+
FrtSearcher *self = frt_isea_alloc();
|
1222
|
+
return frt_isea_init(self, ir);
|
1223
|
+
}
|
1224
|
+
|
1260
1225
|
/***************************************************************************
|
1261
1226
|
*
|
1262
1227
|
* CachedDFSearcher
|
@@ -1271,8 +1236,7 @@ typedef struct CachedDFSearcher
|
|
1271
1236
|
int max_doc;
|
1272
1237
|
} CachedDFSearcher;
|
1273
1238
|
|
1274
|
-
static int cdfsea_doc_freq(FrtSearcher *self,
|
1275
|
-
{
|
1239
|
+
static int cdfsea_doc_freq(FrtSearcher *self, ID field, const char *text) {
|
1276
1240
|
FrtTerm term;
|
1277
1241
|
int *df;
|
1278
1242
|
term.field = field;
|
@@ -1358,9 +1322,7 @@ static FrtExplanation *cdfsea_explain_w(FrtSearcher *self, FrtWeight *w, int doc
|
|
1358
1322
|
return NULL;
|
1359
1323
|
}
|
1360
1324
|
|
1361
|
-
static FrtTermVector *cdfsea_get_term_vector(FrtSearcher *self, const int doc_num,
|
1362
|
-
FrtSymbol field)
|
1363
|
-
{
|
1325
|
+
static FrtTermVector *cdfsea_get_term_vector(FrtSearcher *self, const int doc_num, ID field) {
|
1364
1326
|
(void)self; (void)doc_num; (void)field;
|
1365
1327
|
FRT_RAISE(FRT_UNSUPPORTED_ERROR, "%s", FRT_UNSUPPORTED_ERROR_MSG);
|
1366
1328
|
return NULL;
|
@@ -1436,8 +1398,7 @@ static int msea_get_searcher_index(FrtSearcher *self, int n)
|
|
1436
1398
|
return hi;
|
1437
1399
|
}
|
1438
1400
|
|
1439
|
-
static int msea_doc_freq(FrtSearcher *self,
|
1440
|
-
{
|
1401
|
+
static int msea_doc_freq(FrtSearcher *self, ID field, const char *term) {
|
1441
1402
|
int i;
|
1442
1403
|
int doc_freq = 0;
|
1443
1404
|
FrtMultiSearcher *msea = MSEA(self);
|
@@ -1599,8 +1560,7 @@ static int msea_search_unscored(FrtSearcher *self,
|
|
1599
1560
|
FrtQuery *query,
|
1600
1561
|
int *buf,
|
1601
1562
|
int limit,
|
1602
|
-
int offset_docnum)
|
1603
|
-
{
|
1563
|
+
int offset_docnum) {
|
1604
1564
|
int count;
|
1605
1565
|
FrtWeight *weight = frt_q_weight(query, self);
|
1606
1566
|
count = msea_search_unscored_w(self, weight, buf, limit, offset_docnum);
|
@@ -1636,8 +1596,7 @@ static FrtTopDocs *msea_search_w(FrtSearcher *self,
|
|
1636
1596
|
FrtFilter *filter,
|
1637
1597
|
FrtSort *sort,
|
1638
1598
|
FrtPostFilter *post_filter,
|
1639
|
-
bool load_fields)
|
1640
|
-
{
|
1599
|
+
bool load_fields) {
|
1641
1600
|
int max_size = num_docs + (num_docs == INT_MAX ? 0 : first_doc);
|
1642
1601
|
int i;
|
1643
1602
|
int total_hits = 0;
|
@@ -1705,8 +1664,7 @@ static FrtTopDocs *msea_search(FrtSearcher *self,
|
|
1705
1664
|
FrtFilter *filter,
|
1706
1665
|
FrtSort *sort,
|
1707
1666
|
FrtPostFilter *post_filter,
|
1708
|
-
bool load_fields)
|
1709
|
-
{
|
1667
|
+
bool load_fields) {
|
1710
1668
|
FrtTopDocs *td;
|
1711
1669
|
FrtWeight *weight = frt_q_weight(query, self);
|
1712
1670
|
td = msea_search_w(self, weight, first_doc, num_docs, filter,
|
@@ -1715,8 +1673,7 @@ static FrtTopDocs *msea_search(FrtSearcher *self,
|
|
1715
1673
|
return td;
|
1716
1674
|
}
|
1717
1675
|
|
1718
|
-
static FrtQuery *msea_rewrite(FrtSearcher *self, FrtQuery *original)
|
1719
|
-
{
|
1676
|
+
static FrtQuery *msea_rewrite(FrtSearcher *self, FrtQuery *original) {
|
1720
1677
|
int i;
|
1721
1678
|
FrtSearcher *s;
|
1722
1679
|
FrtMultiSearcher *msea = MSEA(self);
|
@@ -1735,8 +1692,7 @@ static FrtQuery *msea_rewrite(FrtSearcher *self, FrtQuery *original)
|
|
1735
1692
|
return rewritten;
|
1736
1693
|
}
|
1737
1694
|
|
1738
|
-
static FrtExplanation *msea_explain(FrtSearcher *self, FrtQuery *query, int doc_num)
|
1739
|
-
{
|
1695
|
+
static FrtExplanation *msea_explain(FrtSearcher *self, FrtQuery *query, int doc_num) {
|
1740
1696
|
FrtMultiSearcher *msea = MSEA(self);
|
1741
1697
|
int i = msea_get_searcher_index(self, doc_num);
|
1742
1698
|
FrtWeight *w = frt_q_weight(query, self);
|
@@ -1746,8 +1702,7 @@ static FrtExplanation *msea_explain(FrtSearcher *self, FrtQuery *query, int doc_
|
|
1746
1702
|
return e;
|
1747
1703
|
}
|
1748
1704
|
|
1749
|
-
static FrtExplanation *msea_explain_w(FrtSearcher *self, FrtWeight *w, int doc_num)
|
1750
|
-
{
|
1705
|
+
static FrtExplanation *msea_explain_w(FrtSearcher *self, FrtWeight *w, int doc_num) {
|
1751
1706
|
FrtMultiSearcher *msea = MSEA(self);
|
1752
1707
|
int i = msea_get_searcher_index(self, doc_num);
|
1753
1708
|
FrtSearcher *s = msea->searchers[i];
|
@@ -1755,22 +1710,18 @@ static FrtExplanation *msea_explain_w(FrtSearcher *self, FrtWeight *w, int doc_n
|
|
1755
1710
|
return e;
|
1756
1711
|
}
|
1757
1712
|
|
1758
|
-
static FrtTermVector *msea_get_term_vector(FrtSearcher *self, const int doc_num,
|
1759
|
-
FrtSymbol field)
|
1760
|
-
{
|
1713
|
+
static FrtTermVector *msea_get_term_vector(FrtSearcher *self, const int doc_num, ID field) {
|
1761
1714
|
FrtMultiSearcher *msea = MSEA(self);
|
1762
1715
|
int i = msea_get_searcher_index(self, doc_num);
|
1763
1716
|
FrtSearcher *s = msea->searchers[i];
|
1764
1717
|
return s->get_term_vector(s, doc_num - msea->starts[i], field);
|
1765
1718
|
}
|
1766
1719
|
|
1767
|
-
static FrtSimilarity *msea_get_similarity(FrtSearcher *self)
|
1768
|
-
{
|
1720
|
+
static FrtSimilarity *msea_get_similarity(FrtSearcher *self) {
|
1769
1721
|
return self->similarity;
|
1770
1722
|
}
|
1771
1723
|
|
1772
|
-
static void msea_close(FrtSearcher *self)
|
1773
|
-
{
|
1724
|
+
static void msea_close(FrtSearcher *self) {
|
1774
1725
|
int i;
|
1775
1726
|
FrtSearcher *s;
|
1776
1727
|
FrtMultiSearcher *msea = MSEA(self);
|
@@ -1785,10 +1736,12 @@ static void msea_close(FrtSearcher *self)
|
|
1785
1736
|
free(self);
|
1786
1737
|
}
|
1787
1738
|
|
1788
|
-
FrtSearcher *
|
1789
|
-
|
1739
|
+
FrtSearcher *frt_msea_alloc(void) {
|
1740
|
+
return (FrtSearcher *)FRT_ALLOC(FrtMultiSearcher);
|
1741
|
+
}
|
1742
|
+
|
1743
|
+
FrtSearcher *frt_msea_init(FrtSearcher *self, FrtSearcher **searchers, int s_cnt, bool close_subs) {
|
1790
1744
|
int i, max_doc = 0;
|
1791
|
-
FrtSearcher *self = (FrtSearcher *)FRT_ALLOC(FrtMultiSearcher);
|
1792
1745
|
int *starts = FRT_ALLOC_N(int, s_cnt + 1);
|
1793
1746
|
for (i = 0; i < s_cnt; i++) {
|
1794
1747
|
starts[i] = max_doc;
|
@@ -1796,29 +1749,35 @@ FrtSearcher *frt_msea_new(FrtSearcher **searchers, int s_cnt, bool close_subs)
|
|
1796
1749
|
}
|
1797
1750
|
starts[i] = max_doc;
|
1798
1751
|
|
1799
|
-
MSEA(self)->s_cnt
|
1800
|
-
MSEA(self)->searchers
|
1801
|
-
MSEA(self)->starts
|
1802
|
-
MSEA(self)->max_doc
|
1803
|
-
MSEA(self)->close_subs
|
1804
|
-
|
1805
|
-
self->similarity
|
1806
|
-
self->doc_freq
|
1807
|
-
self->get_doc
|
1808
|
-
self->get_lazy_doc
|
1809
|
-
self->max_doc
|
1810
|
-
self->create_weight
|
1811
|
-
self->search
|
1812
|
-
self->search_w
|
1813
|
-
self->search_each
|
1814
|
-
self->search_each_w
|
1815
|
-
self->search_unscored
|
1816
|
-
self->search_unscored_w
|
1817
|
-
self->rewrite
|
1818
|
-
self->explain
|
1819
|
-
self->explain_w
|
1820
|
-
self->get_term_vector
|
1821
|
-
self->get_similarity
|
1822
|
-
self->close
|
1752
|
+
MSEA(self)->s_cnt = s_cnt;
|
1753
|
+
MSEA(self)->searchers = searchers;
|
1754
|
+
MSEA(self)->starts = starts;
|
1755
|
+
MSEA(self)->max_doc = max_doc;
|
1756
|
+
MSEA(self)->close_subs = close_subs;
|
1757
|
+
|
1758
|
+
self->similarity = frt_sim_create_default();
|
1759
|
+
self->doc_freq = &msea_doc_freq;
|
1760
|
+
self->get_doc = &msea_get_doc;
|
1761
|
+
self->get_lazy_doc = &msea_get_lazy_doc;
|
1762
|
+
self->max_doc = &msea_max_doc;
|
1763
|
+
self->create_weight = &msea_create_weight;
|
1764
|
+
self->search = &msea_search;
|
1765
|
+
self->search_w = &msea_search_w;
|
1766
|
+
self->search_each = &msea_search_each;
|
1767
|
+
self->search_each_w = &msea_search_each_w;
|
1768
|
+
self->search_unscored = &msea_search_unscored;
|
1769
|
+
self->search_unscored_w = &msea_search_unscored_w;
|
1770
|
+
self->rewrite = &msea_rewrite;
|
1771
|
+
self->explain = &msea_explain;
|
1772
|
+
self->explain_w = &msea_explain_w;
|
1773
|
+
self->get_term_vector = &msea_get_term_vector;
|
1774
|
+
self->get_similarity = &msea_get_similarity;
|
1775
|
+
self->close = &msea_close;
|
1776
|
+
self->rsea = Qnil;
|
1823
1777
|
return self;
|
1824
1778
|
}
|
1779
|
+
|
1780
|
+
FrtSearcher *frt_msea_new(FrtSearcher **searchers, int s_cnt, bool close_subs) {
|
1781
|
+
FrtSearcher *self = frt_msea_alloc();
|
1782
|
+
return frt_msea_init(self, searchers, s_cnt, close_subs);
|
1783
|
+
}
|