ferret 0.10.14 → 0.11.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/TODO +3 -0
- data/ext/analysis.c +5 -0
- data/ext/compound_io.c +46 -24
- data/ext/except.c +14 -0
- data/ext/except.h +29 -17
- data/ext/ferret.c +22 -1
- data/ext/ferret.h +2 -1
- data/ext/fs_store.c +9 -12
- data/ext/global.c +80 -0
- data/ext/global.h +10 -0
- data/ext/hash.c +0 -7
- data/ext/hash.h +0 -8
- data/ext/index.c +1289 -625
- data/ext/index.h +59 -14
- data/ext/q_boolean.c +12 -5
- data/ext/q_parser.c +570 -372
- data/ext/r_analysis.c +16 -16
- data/ext/r_index.c +41 -43
- data/ext/r_qparser.c +37 -36
- data/ext/r_search.c +10 -10
- data/ext/r_store.c +7 -7
- data/ext/ram_store.c +4 -3
- data/ext/search.c +3 -2
- data/ext/store.c +35 -19
- data/ext/store.h +3 -5
- data/lib/ferret/index.rb +4 -4
- data/lib/ferret_version.rb +1 -1
- data/test/threading/thread_safety_read_write_test.rb +76 -0
- data/test/threading/thread_safety_test.rb +17 -21
- data/test/unit/index/tc_index.rb +6 -2
- data/test/unit/index/tc_index_writer.rb +2 -2
- data/test/unit/query_parser/tc_query_parser.rb +20 -5
- data/test/unit/search/tc_index_searcher.rb +3 -1
- data/test/unit/search/tm_searcher.rb +3 -1
- metadata +3 -2
data/ext/hash.c
CHANGED
@@ -18,13 +18,6 @@ static char *dummy_key = "";
|
|
18
18
|
static HashTable *free_hts[MAX_FREE_HASH_TABLES];
|
19
19
|
static int num_free_hts = 0;
|
20
20
|
|
21
|
-
unsigned long *imalloc(unsigned long value)
|
22
|
-
{
|
23
|
-
unsigned long *p = ALLOC(unsigned long);
|
24
|
-
*p = value;
|
25
|
-
return p;
|
26
|
-
}
|
27
|
-
|
28
21
|
unsigned long str_hash(const char *const str)
|
29
22
|
{
|
30
23
|
register unsigned long h = 0;
|
data/ext/hash.h
CHANGED
@@ -81,14 +81,6 @@ typedef unsigned long (*hash_ft)(const void *key);
|
|
81
81
|
*/
|
82
82
|
typedef int (*eq_ft)(const void *key1, const void *key2);
|
83
83
|
|
84
|
-
|
85
|
-
/**
|
86
|
-
* Create a pointer to an allocated U32 integer. This function is a utility
|
87
|
-
* function used to add integers to a HashTable, either as the key or the
|
88
|
-
* value.
|
89
|
-
*/
|
90
|
-
extern unsigned long *imalloc(unsigned long value);
|
91
|
-
|
92
84
|
/**
|
93
85
|
* Determine a hash value for a string. The string must be null terminated
|
94
86
|
*
|
data/ext/index.c
CHANGED
@@ -5,6 +5,7 @@
|
|
5
5
|
#include "priorityqueue.h"
|
6
6
|
#include <string.h>
|
7
7
|
#include <limits.h>
|
8
|
+
#include <ctype.h>
|
8
9
|
|
9
10
|
#define GET_LOCK(lock, name, store, err_msg) do {\
|
10
11
|
lock = store->open_lock(store, name);\
|
@@ -18,14 +19,6 @@
|
|
18
19
|
store->close_lock(lock);\
|
19
20
|
} while (0)
|
20
21
|
|
21
|
-
const char *INDEX_EXTENSIONS[] = {
|
22
|
-
"fdx", "fdt", "tfx", "tix", "tis", "frq", "prx", "del"
|
23
|
-
};
|
24
|
-
|
25
|
-
const char *COMPOUND_EXTENSIONS[] = {
|
26
|
-
"frq", "prx", "fdx", "fdt", "tfx", "tix", "tis"
|
27
|
-
};
|
28
|
-
|
29
22
|
const Config default_config = {
|
30
23
|
0x100000, /* chunk size is 1Mb */
|
31
24
|
0x1000000, /* Max memory used for buffer is 16 Mb */
|
@@ -41,6 +34,128 @@ const Config default_config = {
|
|
41
34
|
static void ste_reset(TermEnum *te);
|
42
35
|
static char *ste_next(TermEnum *te);
|
43
36
|
|
37
|
+
#define FORMAT 0
|
38
|
+
#define SEGMENTS_GEN_FILE_NAME "segments.gen"
|
39
|
+
#define MAX_EXT_LEN 10
|
40
|
+
|
41
|
+
/* *** Must be three characters *** */
|
42
|
+
const char *INDEX_EXTENSIONS[] = {
|
43
|
+
"frq", "prx", "fdx", "fdt", "tfx", "tix", "tis", "del", "gen", "cfs"
|
44
|
+
};
|
45
|
+
|
46
|
+
/* *** Must be three characters *** */
|
47
|
+
const char *COMPOUND_EXTENSIONS[] = {
|
48
|
+
"frq", "prx", "fdx", "fdt", "tfx", "tix", "tis"
|
49
|
+
};
|
50
|
+
|
51
|
+
|
52
|
+
static const char BASE36_DIGITMAP[] = "0123456789abcdefghijklmnopqrstuvwxyz";
|
53
|
+
|
54
|
+
static char *u64_to_str36(char *buf, int buf_size, f_u64 u)
|
55
|
+
{
|
56
|
+
int i = buf_size--;
|
57
|
+
buf[i] = '\0';
|
58
|
+
for (i--; i >= 0; i--) {
|
59
|
+
buf[i] = BASE36_DIGITMAP[u % 36];
|
60
|
+
u /= 36;
|
61
|
+
if (0 == u) {
|
62
|
+
break;
|
63
|
+
}
|
64
|
+
}
|
65
|
+
if (0 < u) {
|
66
|
+
RAISE(EXCEPTION, "Max length of segment filename has been reached. "
|
67
|
+
"Perhaps it's time to re-index.\n");
|
68
|
+
}
|
69
|
+
return buf + i;
|
70
|
+
}
|
71
|
+
|
72
|
+
static f_u64 str36_to_u64(char *p)
|
73
|
+
{
|
74
|
+
f_u64 u = 0;
|
75
|
+
while (true) {
|
76
|
+
if ('0' <= *p && '9' >= *p) {
|
77
|
+
u = u * 36 + *p - '0';
|
78
|
+
}
|
79
|
+
else if ('a' <= *p && 'z' >= *p) {
|
80
|
+
u = u * 36 + *p - 'a' + 10;
|
81
|
+
}
|
82
|
+
else {
|
83
|
+
break;
|
84
|
+
}
|
85
|
+
p++;
|
86
|
+
}
|
87
|
+
return u;
|
88
|
+
}
|
89
|
+
|
90
|
+
/*
|
91
|
+
* Computes the full file name from base, extension and generation. If the
|
92
|
+
* generation is -1, the file name is NULL. If it's 0, the file name is
|
93
|
+
* <base><extension>. If it's > 0, the file name is
|
94
|
+
* <base>_<generation><extension>.
|
95
|
+
*
|
96
|
+
* @param buf buffer to write filename to
|
97
|
+
* @param base main part of the file name
|
98
|
+
* @param ext extension of the filename (including .)
|
99
|
+
* @param gen generation
|
100
|
+
*/
|
101
|
+
char *fn_for_generation(char *buf, char *base, char *ext, f_i64 gen)
|
102
|
+
{
|
103
|
+
if (-1 == gen) {
|
104
|
+
return NULL;
|
105
|
+
}
|
106
|
+
else {
|
107
|
+
char b[SEGMENT_NAME_MAX_LENGTH];
|
108
|
+
char *u = u64_to_str36(b, SEGMENT_NAME_MAX_LENGTH, (f_u64)gen);
|
109
|
+
if (ext == NULL) {
|
110
|
+
sprintf(buf, "%s_%s", base, u);
|
111
|
+
}
|
112
|
+
else {
|
113
|
+
sprintf(buf, "%s_%s.%s", base, u, ext);
|
114
|
+
}
|
115
|
+
return buf;
|
116
|
+
}
|
117
|
+
}
|
118
|
+
|
119
|
+
char *segfn_for_generation(char *buf, int generation)
|
120
|
+
{
|
121
|
+
char b[SEGMENT_NAME_MAX_LENGTH];
|
122
|
+
char *u = u64_to_str36(b, SEGMENT_NAME_MAX_LENGTH, (f_u64)generation);
|
123
|
+
sprintf(buf, SEGMENTS_FILE_NAME"_%s", u);
|
124
|
+
return buf;
|
125
|
+
}
|
126
|
+
|
127
|
+
/*
|
128
|
+
* Computes the field specific file name from base, extension, generation and
|
129
|
+
* field number. If the generation is -1, the file name is NULL. If it's 0,
|
130
|
+
* the file name is <base><extension>. If it's > 0, the file name is
|
131
|
+
* <base>_<generation><extension>.
|
132
|
+
*
|
133
|
+
* @param buf buffer to write filename to
|
134
|
+
* @param base main part of the file name
|
135
|
+
* @param ext extension of the filename (including .)
|
136
|
+
* @param gen generation
|
137
|
+
* @param field_num field number
|
138
|
+
*/
|
139
|
+
static char *fn_for_gen_field(char *buf,
|
140
|
+
char *base,
|
141
|
+
char *ext,
|
142
|
+
f_i64 gen,
|
143
|
+
int field_num)
|
144
|
+
{
|
145
|
+
if (-1 == gen) {
|
146
|
+
return NULL;
|
147
|
+
}
|
148
|
+
else {
|
149
|
+
char b[SEGMENT_NAME_MAX_LENGTH];
|
150
|
+
sprintf(buf, "%s_%s.%s%d",
|
151
|
+
base,
|
152
|
+
u64_to_str36(b, SEGMENT_NAME_MAX_LENGTH, (f_u64)gen),
|
153
|
+
ext,
|
154
|
+
field_num);
|
155
|
+
return buf;
|
156
|
+
}
|
157
|
+
}
|
158
|
+
|
44
159
|
/***************************************************************************
|
45
160
|
*
|
46
161
|
* CacheObject
|
@@ -175,7 +290,7 @@ FieldInfo *fi_new(const char *name,
|
|
175
290
|
|
176
291
|
void fi_deref(FieldInfo *fi)
|
177
292
|
{
|
178
|
-
if (--(fi->ref_cnt)
|
293
|
+
if (0 == --(fi->ref_cnt)) {
|
179
294
|
free(fi->name);
|
180
295
|
free(fi);
|
181
296
|
}
|
@@ -208,9 +323,6 @@ char *fi_to_s(FieldInfo *fi)
|
|
208
323
|
*
|
209
324
|
****************************************************************************/
|
210
325
|
|
211
|
-
#define FIELDS_FILENAME "fields"
|
212
|
-
#define TEMPORARY_FIELDS_FILENAME "fields.new"
|
213
|
-
|
214
326
|
FieldInfos *fis_new(int store, int index, int term_vector)
|
215
327
|
{
|
216
328
|
FieldInfos *fis = ALLOC(FieldInfos);
|
@@ -278,14 +390,13 @@ FieldInfo *fis_by_number(FieldInfos *fis, int num)
|
|
278
390
|
}
|
279
391
|
}
|
280
392
|
|
281
|
-
FieldInfos *fis_read(
|
393
|
+
FieldInfos *fis_read(InStream *is)
|
282
394
|
{
|
283
395
|
int store_val, index_val, term_vector_val;
|
284
396
|
int i;
|
285
397
|
union { f_u32 i; float f; } tmp;
|
286
398
|
FieldInfo *fi;
|
287
399
|
FieldInfos *fis;
|
288
|
-
InStream *is = store->open_input(store, FIELDS_FILENAME);
|
289
400
|
|
290
401
|
store_val = is_read_vint(is);
|
291
402
|
index_val = is_read_vint(is);
|
@@ -300,17 +411,15 @@ FieldInfos *fis_read(Store *store)
|
|
300
411
|
fis_add_field(fis, fi);
|
301
412
|
fi->ref_cnt = 1;
|
302
413
|
}
|
303
|
-
is_close(is);
|
304
414
|
|
305
415
|
return fis;
|
306
416
|
}
|
307
417
|
|
308
|
-
void fis_write(FieldInfos *fis,
|
418
|
+
void fis_write(FieldInfos *fis, OutStream *os)
|
309
419
|
{
|
310
420
|
int i;
|
311
421
|
union { f_u32 i; float f; } tmp;
|
312
422
|
FieldInfo *fi;
|
313
|
-
OutStream *os = store->new_output(store, TEMPORARY_FIELDS_FILENAME);
|
314
423
|
const int fis_size = fis->size;
|
315
424
|
|
316
425
|
os_write_vint(os, fis->store);
|
@@ -324,9 +433,6 @@ void fis_write(FieldInfos *fis, Store *store)
|
|
324
433
|
os_write_u32(os, tmp.i);
|
325
434
|
os_write_vint(os, fi->bits);
|
326
435
|
}
|
327
|
-
os_close(os);
|
328
|
-
|
329
|
-
store->rename(store, TEMPORARY_FIELDS_FILENAME, FIELDS_FILENAME);
|
330
436
|
}
|
331
437
|
|
332
438
|
static const char *store_str[] = {
|
@@ -408,7 +514,7 @@ char *fis_to_s(FieldInfos *fis)
|
|
408
514
|
|
409
515
|
void fis_deref(FieldInfos *fis)
|
410
516
|
{
|
411
|
-
if (--(fis->ref_cnt)
|
517
|
+
if (0 == --(fis->ref_cnt)) {
|
412
518
|
h_destroy(fis->field_dict);
|
413
519
|
free(fis->fields);
|
414
520
|
free(fis);
|
@@ -440,54 +546,144 @@ SegmentInfo *si_new(char *name, int doc_cnt, Store *store)
|
|
440
546
|
si->name = name;
|
441
547
|
si->doc_cnt = doc_cnt;
|
442
548
|
si->store = store;
|
549
|
+
si->del_gen = -1;
|
550
|
+
si->norm_gens = NULL;
|
551
|
+
si->norm_gens_size = 0;
|
552
|
+
si->ref_cnt = 1;
|
553
|
+
si->use_compound_file = false;
|
443
554
|
return si;
|
444
555
|
}
|
445
556
|
|
446
|
-
|
557
|
+
SegmentInfo *si_read(Store *store, InStream *is)
|
447
558
|
{
|
448
|
-
|
449
|
-
|
559
|
+
SegmentInfo *si = ALLOC_AND_ZERO(SegmentInfo);
|
560
|
+
si->store = store;
|
561
|
+
si->name = is_read_string(is);
|
562
|
+
si->doc_cnt = is_read_vint(is);
|
563
|
+
si->del_gen = is_read_vint(is);
|
564
|
+
si->norm_gens_size = is_read_vint(is);
|
565
|
+
si->ref_cnt = 1;
|
566
|
+
if (0 < si->norm_gens_size) {
|
567
|
+
int i;
|
568
|
+
si->norm_gens = ALLOC_N(int, si->norm_gens_size);
|
569
|
+
for (i = si->norm_gens_size - 1; i >= 0; i--) {
|
570
|
+
si->norm_gens[i] = is_read_vint(is);
|
571
|
+
}
|
572
|
+
}
|
573
|
+
si->use_compound_file = (bool)is_read_byte(is);
|
574
|
+
return si;
|
450
575
|
}
|
451
576
|
|
452
|
-
|
577
|
+
void si_write(SegmentInfo *si, OutStream *os)
|
453
578
|
{
|
454
|
-
|
455
|
-
|
456
|
-
|
579
|
+
os_write_string(os, si->name);
|
580
|
+
os_write_vint(os, si->doc_cnt);
|
581
|
+
os_write_vint(os, si->del_gen);
|
582
|
+
os_write_vint(os, si->norm_gens_size);
|
583
|
+
if (0 < si->norm_gens_size) {
|
584
|
+
int i;
|
585
|
+
for (i = si->norm_gens_size - 1; i >= 0; i--) {
|
586
|
+
os_write_vint(os, si->norm_gens[i]);
|
587
|
+
}
|
588
|
+
}
|
589
|
+
os_write_byte(os, (uchar)si->use_compound_file);
|
457
590
|
}
|
458
591
|
|
459
|
-
|
592
|
+
void si_deref(SegmentInfo *si)
|
460
593
|
{
|
461
|
-
|
462
|
-
|
463
|
-
|
594
|
+
if (--si->ref_cnt <= 0) {
|
595
|
+
free(si->name);
|
596
|
+
free(si->norm_gens);
|
597
|
+
free(si);
|
598
|
+
}
|
464
599
|
}
|
465
600
|
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
};
|
601
|
+
bool si_has_deletions(SegmentInfo *si)
|
602
|
+
{
|
603
|
+
return si->del_gen >= 0;
|
604
|
+
}
|
471
605
|
|
472
|
-
|
606
|
+
char *si_del_file_name(SegmentInfo *si, char *buf)
|
473
607
|
{
|
474
|
-
if (
|
475
|
-
|
476
|
-
|
608
|
+
if (si->del_gen < 0) {
|
609
|
+
return NULL;
|
610
|
+
}
|
611
|
+
else {
|
612
|
+
return fn_for_generation(buf, si->name, ".del", si->del_gen);
|
477
613
|
}
|
478
614
|
}
|
479
615
|
|
480
616
|
bool si_has_separate_norms(SegmentInfo *si)
|
481
617
|
{
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
618
|
+
if (si->use_compound_file && si->norm_gens) {
|
619
|
+
int i;
|
620
|
+
for (i = si->norm_gens_size - 1; i >= 0; i--) {
|
621
|
+
if (si->norm_gens[i] > 0) return true;
|
622
|
+
}
|
623
|
+
}
|
624
|
+
return false;
|
625
|
+
}
|
487
626
|
|
488
|
-
|
627
|
+
void si_advance_norm_gen(SegmentInfo *si, int field_num)
|
628
|
+
{
|
629
|
+
if (field_num >= si->norm_gens_size) {
|
630
|
+
int i;
|
631
|
+
REALLOC_N(si->norm_gens, int, field_num + 1);
|
632
|
+
for (i = si->norm_gens_size; i <= field_num; i++) {
|
633
|
+
si->norm_gens[i] = -1;
|
634
|
+
}
|
635
|
+
si->norm_gens_size = field_num + 1;
|
636
|
+
}
|
637
|
+
si->norm_gens[field_num]++;
|
489
638
|
}
|
490
639
|
|
640
|
+
char *si_norm_file_name(SegmentInfo *si, char *buf, int field_num)
|
641
|
+
{
|
642
|
+
int norm_gen;
|
643
|
+
if (field_num >= si->norm_gens_size
|
644
|
+
|| 0 > (norm_gen = si->norm_gens[field_num])) {
|
645
|
+
return NULL;
|
646
|
+
}
|
647
|
+
else {
|
648
|
+
char *ext = (si->use_compound_file && norm_gen > 0) ? "s" : "f";
|
649
|
+
return fn_for_gen_field(buf, si->name, ext, norm_gen, field_num);
|
650
|
+
}
|
651
|
+
}
|
652
|
+
|
653
|
+
void deleter_queue_file(Deleter *dlr, char *file_name);
|
654
|
+
#define DEL(file_name) deleter_queue_file(dlr, file_name)
|
655
|
+
|
656
|
+
static void si_delete_files(SegmentInfo *si, FieldInfos *fis, Deleter *dlr)
|
657
|
+
{
|
658
|
+
int i;
|
659
|
+
char file_name[SEGMENT_NAME_MAX_LENGTH];
|
660
|
+
size_t seg_len = strlen(si->name);
|
661
|
+
char *ext;
|
662
|
+
|
663
|
+
for (i = si->norm_gens_size - 1; i >= 0; i--) {
|
664
|
+
if (0 <= si->norm_gens[i]) {
|
665
|
+
DEL(si_norm_file_name(si, file_name, fis->fields[i]->number));
|
666
|
+
}
|
667
|
+
}
|
668
|
+
|
669
|
+
memcpy(file_name, si->name, seg_len);
|
670
|
+
file_name[seg_len] = '.';
|
671
|
+
ext = file_name + seg_len + 1;
|
672
|
+
|
673
|
+
if (si->use_compound_file) {
|
674
|
+
memcpy(ext, "cfs", 4);
|
675
|
+
DEL(file_name);
|
676
|
+
if (0 <= si->del_gen) {
|
677
|
+
DEL(fn_for_generation(file_name, si->name, "del", si->del_gen));
|
678
|
+
}
|
679
|
+
}
|
680
|
+
else {
|
681
|
+
for (i = NELEMS(INDEX_EXTENSIONS) - 1; i >= 0; i--) {
|
682
|
+
memcpy(ext, INDEX_EXTENSIONS[i], 4);
|
683
|
+
DEL(file_name);
|
684
|
+
}
|
685
|
+
}
|
686
|
+
}
|
491
687
|
|
492
688
|
/****************************************************************************
|
493
689
|
*
|
@@ -496,42 +692,266 @@ bool si_has_separate_norms(SegmentInfo *si)
|
|
496
692
|
****************************************************************************/
|
497
693
|
|
498
694
|
#include <time.h>
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
695
|
+
static char *new_segment(f_i64 generation)
|
696
|
+
{
|
697
|
+
char buf[SEGMENT_NAME_MAX_LENGTH];
|
698
|
+
char *fn_p = u64_to_str36(buf, SEGMENT_NAME_MAX_LENGTH - 1,
|
699
|
+
(f_u64)generation);
|
700
|
+
*(--fn_p) = '_';
|
701
|
+
return estrdup(fn_p);
|
702
|
+
}
|
703
|
+
|
704
|
+
/****************************************************************************
|
705
|
+
* FindSegmentsFile
|
706
|
+
****************************************************************************/
|
503
707
|
|
504
|
-
|
708
|
+
typedef struct FindSegmentsFile {
|
709
|
+
f_i64 generation;
|
710
|
+
f_u64 u64_return;
|
711
|
+
void *p_return;
|
712
|
+
} FindSegmentsFile;
|
505
713
|
|
506
|
-
static char *
|
714
|
+
static void which_gen_i(char *file_name, void *arg)
|
715
|
+
{
|
716
|
+
f_i64 *max_generation = (f_i64 *)arg;
|
717
|
+
if (0 == strncmp(SEGMENTS_FILE_NAME"_", file_name,
|
718
|
+
sizeof(SEGMENTS_FILE_NAME))) {
|
719
|
+
char *p = strrchr(file_name, '_') + 1;
|
720
|
+
f_i64 generation = (f_i64)str36_to_u64(p);
|
721
|
+
if (generation > *max_generation) *max_generation = generation;
|
722
|
+
}
|
723
|
+
}
|
724
|
+
|
725
|
+
static void si_put(SegmentInfo *si, FILE *stream)
|
507
726
|
{
|
508
|
-
char file_name[SEGMENT_NAME_MAX_LENGTH];
|
509
727
|
int i;
|
728
|
+
fprintf(stream, "\tSegmentInfo {\n");
|
729
|
+
fprintf(stream, "\t\tname = %s\n", si->name);
|
730
|
+
fprintf(stream, "\t\tdoc_cnt = %d\n", si->doc_cnt);
|
731
|
+
fprintf(stream, "\t\tdel_gen = %d\n", si->del_gen);
|
732
|
+
fprintf(stream, "\t\tnorm_gens_size = %d\n", si->norm_gens_size);
|
733
|
+
fprintf(stream, "\t\tnorm_gens {\n");
|
734
|
+
for (i = 0; i < si->norm_gens_size; i++) {
|
735
|
+
fprintf(stream, "\t\t\t%d\n", si->norm_gens[i]);
|
736
|
+
}
|
737
|
+
fprintf(stream, "\t\t}\n");
|
738
|
+
fprintf(stream, "\t\tref_cnt = %d\n", si->ref_cnt);
|
739
|
+
fprintf(stream, "\t}\n");
|
740
|
+
}
|
510
741
|
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
742
|
+
void sis_put(SegmentInfos *sis, FILE *stream)
|
743
|
+
{
|
744
|
+
int i;
|
745
|
+
fprintf(stream, "SegmentInfos {\n");
|
746
|
+
fprintf(stream, "\tcounter = %"POSH_I64_PRINTF_PREFIX"d\n", sis->counter);
|
747
|
+
fprintf(stream, "\tversion = %"POSH_I64_PRINTF_PREFIX"d\n", sis->version);
|
748
|
+
fprintf(stream, "\tgeneration = %"POSH_I64_PRINTF_PREFIX"d\n", sis->generation);
|
749
|
+
fprintf(stream, "\tformat = %d\n", sis->format);
|
750
|
+
fprintf(stream, "\tsize = %d\n", sis->size);
|
751
|
+
fprintf(stream, "\tcapa = %d\n", sis->capa);
|
752
|
+
for (i = 0; i < sis->size; i++) {
|
753
|
+
si_put(sis->segs[i], stream);
|
518
754
|
}
|
519
|
-
|
520
|
-
|
521
|
-
|
755
|
+
fprintf(stream, "}\n");
|
756
|
+
}
|
757
|
+
|
758
|
+
/*
|
759
|
+
* Get the generation (N) of the current segments_N file from a list of files.
|
760
|
+
*
|
761
|
+
* @param store - the Store to look in
|
762
|
+
*/
|
763
|
+
f_i64 sis_current_segment_generation(Store *store)
|
764
|
+
{
|
765
|
+
f_i64 current_generation = -1;
|
766
|
+
store->each(store, &which_gen_i, ¤t_generation);
|
767
|
+
return current_generation;
|
768
|
+
}
|
769
|
+
|
770
|
+
/*
|
771
|
+
* Get the current generation filename.
|
772
|
+
*
|
773
|
+
* @param buf - buffer to write filename to
|
774
|
+
* @param store - the Store to look in
|
775
|
+
* @return segments_N where N is the current generation
|
776
|
+
*/
|
777
|
+
char *sis_curr_seg_file_name(char *buf, Store *store)
|
778
|
+
{
|
779
|
+
return segfn_for_generation(buf, sis_current_segment_generation(store));
|
780
|
+
}
|
781
|
+
|
782
|
+
/*
|
783
|
+
* Get the next generation filename.
|
784
|
+
*
|
785
|
+
* @param buf - buffer to write filename to
|
786
|
+
* @param store - the Store to look in
|
787
|
+
* @return segments_N where N is the +next+ generation
|
788
|
+
*/
|
789
|
+
char *sis_next_seg_file_name(char *buf, Store *store)
|
790
|
+
{
|
791
|
+
return segfn_for_generation(buf, sis_current_segment_generation(store) + 1);
|
792
|
+
}
|
793
|
+
|
794
|
+
#define GEN_FILE_RETRY_COUNT 10
|
795
|
+
#define GEN_LOOK_AHEAD_COUNT 10
|
796
|
+
void sis_find_segments_file(Store *store, FindSegmentsFile *fsf,
|
797
|
+
void (*run)(Store *store, FindSegmentsFile *fsf))
|
798
|
+
{
|
799
|
+
int i;
|
800
|
+
int gen_look_ahead_count = 0;
|
801
|
+
bool retry = false;
|
802
|
+
int method = 0;
|
803
|
+
f_i64 last_gen = -1;
|
804
|
+
f_i64 gen = 0;
|
805
|
+
|
806
|
+
/* Loop until we succeed in calling doBody() without hitting an
|
807
|
+
* IOException. An IOException most likely means a commit was in process
|
808
|
+
* and has finished, in the time it took us to load the now-old infos
|
809
|
+
* files (and segments files). It's also possible it's a true error
|
810
|
+
* (corrupt index). To distinguish these, on each retry we must see
|
811
|
+
* "forward progress" on which generation we are trying to load. If we
|
812
|
+
* don't, then the original error is real and we throw it.
|
813
|
+
*
|
814
|
+
* We have three methods for determining the current generation. We try
|
815
|
+
* each in sequence. */
|
816
|
+
while (true) {
|
817
|
+
/* Method 1: list the directory and use the highest segments_N file.
|
818
|
+
* This method works well as long as there is no stale caching on the
|
819
|
+
* directory contents: */
|
820
|
+
if (0 == method) {
|
821
|
+
gen = sis_current_segment_generation(store);
|
822
|
+
if (gen == -1) {
|
823
|
+
/*fprintf(stderr, ">>\n%s\n>>\n", store_to_s(store));*/
|
824
|
+
RAISE(FILE_NOT_FOUND_ERROR, "couldn't find segments file");
|
825
|
+
}
|
826
|
+
}
|
827
|
+
|
828
|
+
/* Method 2 (fallback if Method 1 isn't reliable): if the directory
|
829
|
+
* listing seems to be stale, try loading the "segments.gen" file. */
|
830
|
+
if (1 == method || (0 == method && last_gen == gen && retry)) {
|
831
|
+
method = 1;
|
832
|
+
for (i = 0; i < GEN_FILE_RETRY_COUNT; i++) {
|
833
|
+
InStream *gen_is = NULL;
|
834
|
+
TRY
|
835
|
+
gen_is = store->open_input(store, SEGMENTS_GEN_FILE_NAME);
|
836
|
+
XCATCHALL
|
837
|
+
HANDLED();
|
838
|
+
/* TODO:LOG "segments.gen open: IO_ERROR"*/
|
839
|
+
XENDTRY
|
840
|
+
|
841
|
+
if (NULL != gen_is) {
|
842
|
+
f_i64 gen0 = -1, gen1 = -1;
|
843
|
+
|
844
|
+
TRY
|
845
|
+
gen0 = is_read_u64(gen_is);
|
846
|
+
gen1 = is_read_u64(gen_is);
|
847
|
+
XFINALLY
|
848
|
+
/* if there is an error well simply try again */
|
849
|
+
HANDLED();
|
850
|
+
is_close(gen_is);
|
851
|
+
XENDTRY
|
852
|
+
/* TODO:LOG "fallback check: " + gen0 + "; " + gen1 */
|
853
|
+
if (gen0 == gen1) {
|
854
|
+
/* The file is consistent. */
|
855
|
+
if (gen0 > gen) {
|
856
|
+
/* TODO:LOG "fallback to '" +
|
857
|
+
* IndexFileNames.SEGMENTS_GEN + "' check: now
|
858
|
+
* try generation " + gen0 + " > " + gen */
|
859
|
+
gen = gen0;
|
860
|
+
}
|
861
|
+
goto method_two_loop_end;
|
862
|
+
}
|
863
|
+
break;
|
864
|
+
}
|
865
|
+
/* sleep for 50 milliseconds */
|
866
|
+
micro_sleep(50000);
|
867
|
+
}
|
868
|
+
}
|
869
|
+
method_two_loop_end:
|
870
|
+
|
871
|
+
/* Method 3 (fallback if Methods 2 & 3 are not reliable): since both
|
872
|
+
* directory cache and file contents cache seem to be stale, just
|
873
|
+
* advance the generation. */
|
874
|
+
if (2 == method || (1 == method && last_gen == gen && retry)) {
|
875
|
+
method = 2;
|
876
|
+
if (gen_look_ahead_count < GEN_LOOK_AHEAD_COUNT) {
|
877
|
+
gen++;
|
878
|
+
gen_look_ahead_count++;
|
879
|
+
/* TODO:LOG "look ahead increment gen to " + gen */
|
880
|
+
}
|
881
|
+
}
|
882
|
+
|
883
|
+
if (last_gen == gen) {
|
884
|
+
/* This means we're about to try the same segments_N last tried.
|
885
|
+
* This is allowed, exactly once, because writer could have been
|
886
|
+
* in the process of writing segments_N last time. */
|
887
|
+
if (retry) {
|
888
|
+
/* OK, we've tried the same segments_N file twice in a row, so
|
889
|
+
* this must be a real error. We throw the original exception
|
890
|
+
* we got. */
|
891
|
+
RAISE(IO_ERROR, "Error reading the segment infos");
|
892
|
+
}
|
893
|
+
else {
|
894
|
+
retry = true;
|
895
|
+
}
|
896
|
+
}
|
897
|
+
else {
|
898
|
+
/* Segment file has advanced since our last loop, so reset retry: */
|
899
|
+
retry = false;
|
900
|
+
}
|
901
|
+
last_gen = gen;
|
902
|
+
|
903
|
+
TRY
|
904
|
+
fsf->generation = gen;
|
905
|
+
run(store, fsf);
|
906
|
+
RETURN_EARLY();
|
907
|
+
return;
|
908
|
+
case IO_ERROR: case FILE_NOT_FOUND_ERROR:
|
909
|
+
HANDLED();
|
910
|
+
/* Save the original root cause: */
|
911
|
+
/* TODO:LOG "primary Exception on '" + segmentFileName + "': " +
|
912
|
+
* err + "'; will retry: retry=" + retry + "; gen = " + gen */
|
913
|
+
|
914
|
+
if (!retry && gen > 1) {
|
915
|
+
/* This is our first time trying this segments file (because
|
916
|
+
* retry is false), and, there is possibly a segments_(N-1)
|
917
|
+
* (because gen > 1). So, check if the segments_(N-1) exists
|
918
|
+
* and try it if so: */
|
919
|
+
char prev_seg_file_name[SEGMENT_NAME_MAX_LENGTH];
|
920
|
+
segfn_for_generation(prev_seg_file_name, gen - 1);
|
921
|
+
if (store->exists(store, prev_seg_file_name)) {
|
922
|
+
/* TODO:LOG "fallback to prior segment file '" +
|
923
|
+
* prevSegmentFileName + "'" */
|
924
|
+
TRY
|
925
|
+
fsf->generation = gen - 1;
|
926
|
+
run(store, fsf);
|
927
|
+
/* TODO:LOG "success on fallback " +
|
928
|
+
* prev_seg_file_name */
|
929
|
+
|
930
|
+
/* pop two contexts as we are in nested try blocks */
|
931
|
+
RETURN_EARLY();
|
932
|
+
RETURN_EARLY();
|
933
|
+
return;
|
934
|
+
case IO_ERROR: case FILE_NOT_FOUND_ERROR:
|
935
|
+
HANDLED();
|
936
|
+
/* TODO:LOG "secondary Exception on '" +
|
937
|
+
* prev_seg_file_name + "': " + err2 + "'; will retry"*/
|
938
|
+
XENDTRY
|
939
|
+
}
|
940
|
+
}
|
941
|
+
XENDTRY
|
522
942
|
}
|
523
|
-
i--;
|
524
|
-
file_name[i] = '_';
|
525
|
-
return estrdup(&file_name[i]);
|
526
943
|
}
|
527
944
|
|
528
|
-
SegmentInfos *sis_new()
|
945
|
+
SegmentInfos *sis_new(FieldInfos *fis)
|
529
946
|
{
|
530
|
-
SegmentInfos *sis =
|
947
|
+
SegmentInfos *sis = ALLOC_AND_ZERO(SegmentInfos);
|
948
|
+
REF(fis);
|
949
|
+
sis->fis = fis;
|
531
950
|
sis->format = FORMAT;
|
532
951
|
sis->version = (f_u64)time(NULL);
|
533
952
|
sis->size = 0;
|
534
953
|
sis->counter = 0;
|
954
|
+
sis->generation = -1;
|
535
955
|
sis->capa = 4;
|
536
956
|
sis->segs = ALLOC_N(SegmentInfo *, sis->capa);
|
537
957
|
return sis;
|
@@ -539,8 +959,7 @@ SegmentInfos *sis_new()
|
|
539
959
|
|
540
960
|
SegmentInfo *sis_new_segment(SegmentInfos *sis, int doc_cnt, Store *store)
|
541
961
|
{
|
542
|
-
return sis_add_si(sis, si_new(new_segment(sis->counter++), doc_cnt,
|
543
|
-
store));
|
962
|
+
return sis_add_si(sis, si_new(new_segment(sis->counter++), doc_cnt, store));
|
544
963
|
}
|
545
964
|
|
546
965
|
void sis_destroy(SegmentInfos *sis)
|
@@ -548,8 +967,9 @@ void sis_destroy(SegmentInfos *sis)
|
|
548
967
|
int i;
|
549
968
|
const int sis_size = sis->size;
|
550
969
|
for (i = 0; i < sis_size; i++) {
|
551
|
-
|
970
|
+
si_deref(sis->segs[i]);
|
552
971
|
}
|
972
|
+
if (sis->fis) fis_deref(sis->fis);
|
553
973
|
free(sis->segs);
|
554
974
|
free(sis);
|
555
975
|
}
|
@@ -557,11 +977,10 @@ void sis_destroy(SegmentInfos *sis)
|
|
557
977
|
SegmentInfo *sis_add_si(SegmentInfos *sis, SegmentInfo *si)
|
558
978
|
{
|
559
979
|
if (sis->size >= sis->capa) {
|
560
|
-
sis->capa
|
980
|
+
sis->capa <<= 1;
|
561
981
|
REALLOC_N(sis->segs, SegmentInfo *, sis->capa);
|
562
982
|
}
|
563
|
-
sis->segs[sis->size] = si;
|
564
|
-
sis->size++;
|
983
|
+
sis->segs[sis->size++] = si;
|
565
984
|
return si;
|
566
985
|
}
|
567
986
|
|
@@ -569,7 +988,7 @@ void sis_del_at(SegmentInfos *sis, int at)
|
|
569
988
|
{
|
570
989
|
int i;
|
571
990
|
const int sis_size = --(sis->size);
|
572
|
-
|
991
|
+
si_deref(sis->segs[at]);
|
573
992
|
for (i = at; i < sis_size; i++) {
|
574
993
|
sis->segs[i] = sis->segs[i+1];
|
575
994
|
}
|
@@ -580,7 +999,7 @@ void sis_del_from_to(SegmentInfos *sis, int from, int to)
|
|
580
999
|
int i, num_to_del = to - from;
|
581
1000
|
const int sis_size = sis->size -= num_to_del;
|
582
1001
|
for (i = from; i < to; i++) {
|
583
|
-
|
1002
|
+
si_deref(sis->segs[i]);
|
584
1003
|
}
|
585
1004
|
for (i = from; i < sis_size; i++) {
|
586
1005
|
sis->segs[i] = sis->segs[i+num_to_del];
|
@@ -592,74 +1011,106 @@ void sis_clear(SegmentInfos *sis)
|
|
592
1011
|
int i;
|
593
1012
|
const int sis_size = sis->size;
|
594
1013
|
for (i = 0; i < sis_size; i++) {
|
595
|
-
|
1014
|
+
si_deref(sis->segs[i]);
|
596
1015
|
}
|
597
1016
|
sis->size = 0;
|
598
1017
|
}
|
599
1018
|
|
600
|
-
|
1019
|
+
void sis_read_i(Store *store, FindSegmentsFile *fsf)
|
601
1020
|
{
|
602
|
-
int doc_cnt;
|
603
1021
|
int seg_cnt;
|
604
1022
|
int i;
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
sis
|
1023
|
+
bool success = false;
|
1024
|
+
char seg_file_name[SEGMENT_NAME_MAX_LENGTH];
|
1025
|
+
InStream *is = NULL;
|
1026
|
+
SegmentInfos *sis = ALLOC_AND_ZERO(SegmentInfos);
|
1027
|
+
segfn_for_generation(seg_file_name, fsf->generation);
|
1028
|
+
TRY
|
1029
|
+
is = store->open_input(store, seg_file_name);
|
1030
|
+
sis->store = store;
|
609
1031
|
|
610
|
-
|
611
|
-
|
612
|
-
|
613
|
-
|
1032
|
+
sis->generation = fsf->generation;
|
1033
|
+
sis->format = is_read_u32(is); /* do nothing. it's the first version */
|
1034
|
+
sis->version = is_read_u64(is);
|
1035
|
+
sis->counter = is_read_u64(is);
|
1036
|
+
seg_cnt = is_read_vint(is);
|
614
1037
|
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
|
619
|
-
|
1038
|
+
/* allocate space for segments */
|
1039
|
+
for (sis->capa = 4; sis->capa < seg_cnt; sis->capa <<= 1) {
|
1040
|
+
}
|
1041
|
+
sis->size = 0;
|
1042
|
+
sis->segs = ALLOC_N(SegmentInfo *, sis->capa);
|
620
1043
|
|
621
|
-
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
|
626
|
-
|
1044
|
+
for (i = 0; i < seg_cnt; i++) {
|
1045
|
+
sis_add_si(sis, si_read(store, is));
|
1046
|
+
}
|
1047
|
+
sis->fis = fis_read(is);
|
1048
|
+
success = true;
|
1049
|
+
XFINALLY
|
1050
|
+
if (is) is_close(is);
|
1051
|
+
if (!success) {
|
1052
|
+
sis_destroy(sis);
|
1053
|
+
}
|
1054
|
+
XENDTRY
|
1055
|
+
fsf->p_return = sis;
|
1056
|
+
}
|
627
1057
|
|
628
|
-
|
1058
|
+
SegmentInfos *sis_read(Store *store)
|
1059
|
+
{
|
1060
|
+
FindSegmentsFile fsf;
|
1061
|
+
sis_find_segments_file(store, &fsf, &sis_read_i);
|
1062
|
+
return fsf.p_return;
|
629
1063
|
}
|
630
1064
|
|
631
|
-
void sis_write(SegmentInfos *sis, Store *store)
|
1065
|
+
void sis_write(SegmentInfos *sis, Store *store, Deleter *deleter)
|
632
1066
|
{
|
633
1067
|
int i;
|
634
|
-
|
635
|
-
OutStream *os = store->new_output(store, TEMPORARY_SEGMENTS_FILENAME);
|
1068
|
+
OutStream *os = NULL;
|
636
1069
|
const int sis_size = sis->size;
|
1070
|
+
char buf[SEGMENT_NAME_MAX_LENGTH];
|
1071
|
+
sis->generation++;
|
637
1072
|
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
|
1073
|
+
TRY
|
1074
|
+
os = store->new_output(store,
|
1075
|
+
segfn_for_generation(buf, sis->generation));
|
1076
|
+
os_write_u32(os, FORMAT);
|
1077
|
+
os_write_u64(os, ++(sis->version)); /* every write changes the index */
|
1078
|
+
os_write_u64(os, sis->counter);
|
1079
|
+
os_write_vint(os, sis->size);
|
1080
|
+
for (i = 0; i < sis_size; i++) {
|
1081
|
+
si_write(sis->segs[i], os);
|
1082
|
+
}
|
1083
|
+
fis_write(sis->fis, os);
|
1084
|
+
XFINALLY
|
1085
|
+
os_close(os);
|
1086
|
+
XENDTRY
|
1087
|
+
|
1088
|
+
TRY
|
1089
|
+
os = store->new_output(store, SEGMENTS_GEN_FILE_NAME);
|
1090
|
+
os_write_u64(os, sis->generation);
|
1091
|
+
os_write_u64(os, sis->generation);
|
1092
|
+
XFINALLY
|
1093
|
+
/* It's OK if we fail to write this file since it's
|
1094
|
+
* used only as one of the retry fallbacks. */
|
1095
|
+
HANDLED();
|
1096
|
+
os_close(os);
|
1097
|
+
XENDTRY
|
648
1098
|
|
649
|
-
|
650
|
-
|
1099
|
+
if (deleter && sis->generation > 0) {
|
1100
|
+
deleter_delete_file(deleter,
|
1101
|
+
segfn_for_generation(buf, sis->generation - 1));
|
1102
|
+
}
|
651
1103
|
}
|
652
1104
|
|
653
|
-
|
1105
|
+
void sis_read_ver_i(Store *store, FindSegmentsFile *fsf)
|
654
1106
|
{
|
655
1107
|
InStream *is;
|
656
1108
|
f_u32 format = 0;
|
657
1109
|
f_u64 version = 0;
|
1110
|
+
char seg_file_name[SEGMENT_NAME_MAX_LENGTH];
|
658
1111
|
|
659
|
-
|
660
|
-
|
661
|
-
}
|
662
|
-
is = store->open_input(store, SEGMENTS_FILENAME);
|
1112
|
+
segfn_for_generation(seg_file_name, (f_u64)fsf->generation);
|
1113
|
+
is = store->open_input(store, seg_file_name);
|
663
1114
|
|
664
1115
|
TRY
|
665
1116
|
format = is_read_u32(is);
|
@@ -668,7 +1119,14 @@ f_u64 sis_read_current_version(Store *store)
|
|
668
1119
|
is_close(is);
|
669
1120
|
XENDTRY
|
670
1121
|
|
671
|
-
|
1122
|
+
fsf->u64_return = version;
|
1123
|
+
}
|
1124
|
+
|
1125
|
+
f_u64 sis_read_current_version(Store *store)
|
1126
|
+
{
|
1127
|
+
FindSegmentsFile fsf;
|
1128
|
+
sis_find_segments_file(store, &fsf, &sis_read_ver_i);
|
1129
|
+
return fsf.u64_return;
|
672
1130
|
}
|
673
1131
|
|
674
1132
|
/****************************************************************************
|
@@ -704,7 +1162,7 @@ char *lazy_df_get_data(LazyDocField *self, int i)
|
|
704
1162
|
char *text = NULL;
|
705
1163
|
if (i < self->size && i >= 0) {
|
706
1164
|
text = self->data[i].text;
|
707
|
-
if (
|
1165
|
+
if (NULL == text) {
|
708
1166
|
const int read_len = self->data[i].length + 1;
|
709
1167
|
self->data[i].text = text = ALLOC_N(char, read_len);
|
710
1168
|
is_seek(self->doc->fields_in, self->data[i].start);
|
@@ -1220,8 +1678,8 @@ char *te_skip_to(TermEnum *te, const char *term)
|
|
1220
1678
|
{
|
1221
1679
|
char *curr_term = te->curr_term;
|
1222
1680
|
if (strcmp(curr_term, term) < 0) {
|
1223
|
-
while (((curr_term = te->next(te))
|
1224
|
-
(strcmp(curr_term, term) < 0)) {
|
1681
|
+
while (NULL != ((curr_term = te->next(te)))
|
1682
|
+
&& (strcmp(curr_term, term) < 0)) {
|
1225
1683
|
}
|
1226
1684
|
}
|
1227
1685
|
return curr_term;
|
@@ -1258,7 +1716,7 @@ static void sti_destroy(SegmentTermIndex *sti)
|
|
1258
1716
|
static void sti_ensure_index_is_read(SegmentTermIndex *sti,
|
1259
1717
|
TermEnum *index_te)
|
1260
1718
|
{
|
1261
|
-
if (sti->index_terms
|
1719
|
+
if (NULL == sti->index_terms) {
|
1262
1720
|
int i;
|
1263
1721
|
int index_size = sti->index_size;
|
1264
1722
|
off_t index_ptr = 0;
|
@@ -1314,7 +1772,7 @@ static int sti_get_index_offset(SegmentTermIndex *sti, const char *term)
|
|
1314
1772
|
****************************************************************************/
|
1315
1773
|
|
1316
1774
|
#define SFI_ENSURE_INDEX_IS_READ(sfi, sti) do {\
|
1317
|
-
if (sti->index_terms
|
1775
|
+
if (NULL == sti->index_terms) {\
|
1318
1776
|
mutex_lock(&sfi->mutex);\
|
1319
1777
|
sti_ensure_index_is_read(sti, sfi->index_te);\
|
1320
1778
|
mutex_unlock(&sfi->mutex);\
|
@@ -1351,7 +1809,7 @@ SegmentFieldIndex *sfi_open(Store *store, const char *segment)
|
|
1351
1809
|
|
1352
1810
|
sprintf(file_name, "%s.tix", segment);
|
1353
1811
|
is = store->open_input(store, file_name);
|
1354
|
-
sfi->index_te = ste_new(is,
|
1812
|
+
sfi->index_te = ste_new(is, sfi);
|
1355
1813
|
return sfi;
|
1356
1814
|
}
|
1357
1815
|
|
@@ -1394,8 +1852,8 @@ static char *ste_next(TermEnum *te)
|
|
1394
1852
|
|
1395
1853
|
ti = &(te->curr_ti);
|
1396
1854
|
ti->doc_freq = is_read_vint(is); /* read doc freq */
|
1397
|
-
ti->frq_ptr += is_read_voff_t(is)
|
1398
|
-
ti->prx_ptr += is_read_voff_t(is)
|
1855
|
+
ti->frq_ptr += is_read_voff_t(is); /* read freq ptr */
|
1856
|
+
ti->prx_ptr += is_read_voff_t(is); /* read prox ptr */
|
1399
1857
|
if (ti->doc_freq >= STE(te)->skip_interval) {
|
1400
1858
|
ti->skip_offset = is_read_voff_t(is);
|
1401
1859
|
}
|
@@ -1497,7 +1955,7 @@ static TermInfo *ste_scan_for_term_info(SegmentTermEnum *ste, const char *term)
|
|
1497
1955
|
{
|
1498
1956
|
ste_scan_to(ste, term);
|
1499
1957
|
|
1500
|
-
if (strcmp(TE(ste)->curr_term, term)
|
1958
|
+
if (0 == strcmp(TE(ste)->curr_term, term)) {
|
1501
1959
|
return te_get_ti((TermEnum *)ste);
|
1502
1960
|
}
|
1503
1961
|
else {
|
@@ -1521,7 +1979,7 @@ static char *ste_get_term(TermEnum *te, int pos)
|
|
1521
1979
|
ste_index_seek(te, sti, pos / idx_int);
|
1522
1980
|
}
|
1523
1981
|
while (ste->pos < pos) {
|
1524
|
-
if (ste_next(te)
|
1982
|
+
if (NULL == ste_next(te)) {
|
1525
1983
|
return NULL;
|
1526
1984
|
}
|
1527
1985
|
}
|
@@ -1575,7 +2033,7 @@ typedef struct MultiTermEnum
|
|
1575
2033
|
static bool tew_lt(const TermEnumWrapper *tew1, const TermEnumWrapper *tew2)
|
1576
2034
|
{
|
1577
2035
|
int cmpres = strcmp(tew1->term, tew2->term);
|
1578
|
-
if (
|
2036
|
+
if (0 == cmpres) {
|
1579
2037
|
return tew1->index < tew2->index;
|
1580
2038
|
}
|
1581
2039
|
else {
|
@@ -1637,7 +2095,7 @@ static char *mte_next(TermEnum *te)
|
|
1637
2095
|
TermEnumWrapper *top =
|
1638
2096
|
(TermEnumWrapper *)pq_top(MTE(te)->tew_queue);
|
1639
2097
|
|
1640
|
-
if (
|
2098
|
+
if (NULL == top) {
|
1641
2099
|
te->curr_term[0] = '\0';
|
1642
2100
|
te->curr_term_len = 0;
|
1643
2101
|
return false;
|
@@ -1650,7 +2108,7 @@ static char *mte_next(TermEnum *te)
|
|
1650
2108
|
te->curr_ti.doc_freq = 0;
|
1651
2109
|
|
1652
2110
|
MTE(te)->ti_cnt = 0;
|
1653
|
-
while ((
|
2111
|
+
while ((NULL != top) && (0 == strcmp(te->curr_term, top->term))) {
|
1654
2112
|
pq_pop(MTE(te)->tew_queue);
|
1655
2113
|
te->curr_ti.doc_freq += top->te->curr_ti.doc_freq;/* increment freq */
|
1656
2114
|
MTE(te)->ti_indexes[MTE(te)->ti_cnt] = top->index;
|
@@ -1752,7 +2210,7 @@ TermEnum *mte_new(MultiReader *mr, int field_num, const char *term)
|
|
1752
2210
|
if (fnum >= 0) {
|
1753
2211
|
TermEnumWrapper *tew;
|
1754
2212
|
|
1755
|
-
if (
|
2213
|
+
if (NULL != term) {
|
1756
2214
|
sub_te = reader->terms_from(reader, fnum, term);
|
1757
2215
|
}
|
1758
2216
|
else {
|
@@ -1760,7 +2218,7 @@ TermEnum *mte_new(MultiReader *mr, int field_num, const char *term)
|
|
1760
2218
|
}
|
1761
2219
|
|
1762
2220
|
tew = tew_setup(&(mte->tews[i]), i, sub_te, reader);
|
1763
|
-
if (((
|
2221
|
+
if (((NULL == term) && tew_next(tew))
|
1764
2222
|
|| (tew->term && (tew->term[0] != '\0'))) {
|
1765
2223
|
pq_push(mte->tew_queue, tew); /* initialize queue */
|
1766
2224
|
}
|
@@ -1772,7 +2230,7 @@ TermEnum *mte_new(MultiReader *mr, int field_num, const char *term)
|
|
1772
2230
|
}
|
1773
2231
|
}
|
1774
2232
|
|
1775
|
-
if ((
|
2233
|
+
if ((NULL != term) && (0 < mte->tew_queue->size)) {
|
1776
2234
|
mte_next(TE(mte));
|
1777
2235
|
}
|
1778
2236
|
|
@@ -1804,7 +2262,7 @@ TermInfosReader *tir_open(Store *store,
|
|
1804
2262
|
static __inline TermEnum *tir_enum(TermInfosReader *tir)
|
1805
2263
|
{
|
1806
2264
|
TermEnum *te;
|
1807
|
-
if ((te = thread_getspecific(tir->thread_te))
|
2265
|
+
if (NULL == (te = thread_getspecific(tir->thread_te))) {
|
1808
2266
|
te = ste_clone(tir->orig_te);
|
1809
2267
|
ste_set_field(te, tir->field_num);
|
1810
2268
|
ary_push(tir->te_bucket, te);
|
@@ -1827,8 +2285,8 @@ TermInfo *tir_get_ti(TermInfosReader *tir, const char *term)
|
|
1827
2285
|
TermEnum *te = tir_enum(tir);
|
1828
2286
|
char *match;
|
1829
2287
|
|
1830
|
-
if ((match = ste_scan_to(te, term))
|
1831
|
-
strcmp(match, term)
|
2288
|
+
if (NULL != (match = ste_scan_to(te, term))
|
2289
|
+
&& 0 == strcmp(match, term)) {
|
1832
2290
|
return &(te->curr_ti);
|
1833
2291
|
}
|
1834
2292
|
return NULL;
|
@@ -1845,8 +2303,8 @@ TermInfo *tir_get_ti_field(TermInfosReader *tir, int field_num,
|
|
1845
2303
|
tir->field_num = field_num;
|
1846
2304
|
}
|
1847
2305
|
|
1848
|
-
if ((match = ste_scan_to(te, term))
|
1849
|
-
strcmp(match, term)
|
2306
|
+
if (NULL != (match = ste_scan_to(te, term))
|
2307
|
+
&& 0 == strcmp(match, term)) {
|
1850
2308
|
return &(te->curr_ti);
|
1851
2309
|
}
|
1852
2310
|
return NULL;
|
@@ -1937,7 +2395,7 @@ static __inline void tw_write_term(TermWriter *tw,
|
|
1937
2395
|
|
1938
2396
|
os_write_vint(os, start); /* write shared prefix length */
|
1939
2397
|
os_write_vint(os, length); /* write delta length */
|
1940
|
-
os_write_bytes(os, (uchar *)(term + start), length);
|
2398
|
+
os_write_bytes(os, (uchar *)(term + start), length); /* write delta chars */
|
1941
2399
|
|
1942
2400
|
tw->last_term = term;
|
1943
2401
|
}
|
@@ -1945,13 +2403,15 @@ static __inline void tw_write_term(TermWriter *tw,
|
|
1945
2403
|
static void tw_add(TermWriter *tw,
|
1946
2404
|
const char *term,
|
1947
2405
|
int term_len,
|
1948
|
-
TermInfo *ti
|
2406
|
+
TermInfo *ti,
|
2407
|
+
int skip_interval)
|
1949
2408
|
{
|
1950
2409
|
OutStream *os = tw->os;
|
1951
2410
|
|
1952
2411
|
#ifdef DEBUG
|
1953
2412
|
if (strcmp(tw->last_term, term) > 0) {
|
1954
|
-
RAISE(STATE_ERROR, "\"%s\" > \"%s\" %d > %d",
|
2413
|
+
RAISE(STATE_ERROR, "\"%s\" > \"%s\" %d > %d",
|
2414
|
+
tw->last_term, term, *tw->last_term, *term);
|
1955
2415
|
}
|
1956
2416
|
if (ti->frq_ptr < tw->last_term_info.frq_ptr) {
|
1957
2417
|
RAISE(STATE_ERROR, "%"F_OFF_T_PFX"d > %"F_OFF_T_PFX"d", ti->frq_ptr,
|
@@ -1967,6 +2427,9 @@ static void tw_add(TermWriter *tw,
|
|
1967
2427
|
os_write_vint(os, ti->doc_freq); /* write doc freq */
|
1968
2428
|
os_write_voff_t(os, ti->frq_ptr - tw->last_term_info.frq_ptr);
|
1969
2429
|
os_write_voff_t(os, ti->prx_ptr - tw->last_term_info.prx_ptr);
|
2430
|
+
if (ti->doc_freq >= skip_interval) {
|
2431
|
+
os_write_voff_t(os, ti->skip_offset);
|
2432
|
+
}
|
1970
2433
|
|
1971
2434
|
tw->last_term_info = *ti;
|
1972
2435
|
tw->counter++;
|
@@ -1983,22 +2446,19 @@ void tiw_add(TermInfosWriter *tiw,
|
|
1983
2446
|
printf("%s:%d:%d:%d:%d\n", term, term_len, ti->doc_freq,
|
1984
2447
|
ti->frq_ptr, ti->prx_ptr);
|
1985
2448
|
*/
|
1986
|
-
if ((tiw->tis_writer->counter % tiw->index_interval)
|
2449
|
+
if (0 == (tiw->tis_writer->counter % tiw->index_interval)) {
|
1987
2450
|
/* add an index term */
|
1988
2451
|
tw_add(tiw->tix_writer,
|
1989
2452
|
tiw->tis_writer->last_term,
|
1990
2453
|
strlen(tiw->tis_writer->last_term),
|
1991
|
-
&(tiw->tis_writer->last_term_info)
|
2454
|
+
&(tiw->tis_writer->last_term_info),
|
2455
|
+
tiw->skip_interval);
|
1992
2456
|
tis_pos = os_pos(tiw->tis_writer->os);
|
1993
2457
|
os_write_voff_t(tiw->tix_writer->os, tis_pos - tiw->last_index_ptr);
|
1994
2458
|
tiw->last_index_ptr = tis_pos; /* write ptr */
|
1995
2459
|
}
|
1996
2460
|
|
1997
|
-
tw_add(tiw->tis_writer, term, term_len, ti);
|
1998
|
-
|
1999
|
-
if (ti->doc_freq >= tiw->skip_interval) {
|
2000
|
-
os_write_voff_t(tiw->tis_writer->os, ti->skip_offset);
|
2001
|
-
}
|
2461
|
+
tw_add(tiw->tis_writer, term, term_len, ti, tiw->skip_interval);
|
2002
2462
|
}
|
2003
2463
|
|
2004
2464
|
static __inline void tw_reset(TermWriter *tw)
|
@@ -2051,7 +2511,7 @@ void tiw_close(TermInfosWriter *tiw)
|
|
2051
2511
|
#define TDE(stde) ((TermDocEnum *)(stde))
|
2052
2512
|
|
2053
2513
|
#define CHECK_STATE(method) do {\
|
2054
|
-
if (STDE(tde)->count
|
2514
|
+
if (0 == STDE(tde)->count) {\
|
2055
2515
|
RAISE(STATE_ERROR, "Illegal state of TermDocEnum. You must call #next "\
|
2056
2516
|
"before you call #"method);\
|
2057
2517
|
}\
|
@@ -2059,7 +2519,7 @@ void tiw_close(TermInfosWriter *tiw)
|
|
2059
2519
|
|
2060
2520
|
static void stde_seek_ti(SegmentTermDocEnum *stde, TermInfo *ti)
|
2061
2521
|
{
|
2062
|
-
if (
|
2522
|
+
if (NULL == ti) {
|
2063
2523
|
stde->doc_freq = 0;
|
2064
2524
|
}
|
2065
2525
|
else {
|
@@ -2117,7 +2577,7 @@ static bool stde_next(TermDocEnum *tde)
|
|
2117
2577
|
|
2118
2578
|
doc_code = is_read_vint(stde->frq_in);
|
2119
2579
|
stde->doc_num += doc_code >> 1; /* shift off low bit */
|
2120
|
-
if ((doc_code & 1)
|
2580
|
+
if (0 != (doc_code & 1)) { /* if low bit is set */
|
2121
2581
|
stde->freq = 1; /* freq is one */
|
2122
2582
|
}
|
2123
2583
|
else {
|
@@ -2126,8 +2586,8 @@ static bool stde_next(TermDocEnum *tde)
|
|
2126
2586
|
|
2127
2587
|
stde->count++;
|
2128
2588
|
|
2129
|
-
if (stde->deleted_docs
|
2130
|
-
bv_get(stde->deleted_docs, stde->doc_num)
|
2589
|
+
if (NULL == stde->deleted_docs
|
2590
|
+
|| 0 == bv_get(stde->deleted_docs, stde->doc_num)) {
|
2131
2591
|
break; /* We found an undeleted doc so return */
|
2132
2592
|
}
|
2133
2593
|
|
@@ -2146,7 +2606,7 @@ static int stde_read(TermDocEnum *tde, int *docs, int *freqs, int req_num)
|
|
2146
2606
|
/* manually inlined call to next() for speed */
|
2147
2607
|
doc_code = is_read_vint(stde->frq_in);
|
2148
2608
|
stde->doc_num += (doc_code >> 1); /* shift off low bit */
|
2149
|
-
if ((doc_code & 1)
|
2609
|
+
if (0 != (doc_code & 1)) { /* if low bit is set */
|
2150
2610
|
stde->freq = 1; /* freq is one */
|
2151
2611
|
}
|
2152
2612
|
else {
|
@@ -2155,8 +2615,8 @@ static int stde_read(TermDocEnum *tde, int *docs, int *freqs, int req_num)
|
|
2155
2615
|
|
2156
2616
|
stde->count++;
|
2157
2617
|
|
2158
|
-
if (stde->deleted_docs
|
2159
|
-
bv_get(stde->deleted_docs, stde->doc_num)
|
2618
|
+
if (NULL == stde->deleted_docs
|
2619
|
+
|| 0 == bv_get(stde->deleted_docs, stde->doc_num)) {
|
2160
2620
|
docs[i] = stde->doc_num;
|
2161
2621
|
freqs[i] = stde->freq;
|
2162
2622
|
i++;
|
@@ -2169,16 +2629,18 @@ static bool stde_skip_to(TermDocEnum *tde, int target_doc_num)
|
|
2169
2629
|
{
|
2170
2630
|
SegmentTermDocEnum *stde = STDE(tde);
|
2171
2631
|
|
2172
|
-
if (stde->doc_freq >= stde->skip_interval
|
2632
|
+
if (stde->doc_freq >= stde->skip_interval
|
2633
|
+
&& target_doc_num > stde->doc_num) { /* optimized case */
|
2173
2634
|
int last_skip_doc;
|
2174
|
-
|
2175
|
-
|
2635
|
+
off_t last_frq_ptr;
|
2636
|
+
off_t last_prx_ptr;
|
2176
2637
|
int num_skipped;
|
2177
2638
|
|
2178
|
-
if (stde->skip_in
|
2179
|
-
stde->skip_in = is_clone(stde->frq_in)
|
2639
|
+
if (NULL == stde->skip_in) {
|
2640
|
+
stde->skip_in = is_clone(stde->frq_in);/* lazily clone */
|
2180
2641
|
}
|
2181
2642
|
|
2643
|
+
//printf("skip_ptr = %lld\n", stde->skip_ptr);
|
2182
2644
|
if (!stde->have_skipped) { /* lazily seek skip stream */
|
2183
2645
|
is_seek(stde->skip_in, stde->skip_ptr);
|
2184
2646
|
stde->have_skipped = true;
|
@@ -2189,13 +2651,14 @@ static bool stde_skip_to(TermDocEnum *tde, int target_doc_num)
|
|
2189
2651
|
last_frq_ptr = is_pos(stde->frq_in);
|
2190
2652
|
last_prx_ptr = -1;
|
2191
2653
|
num_skipped = -1 - (stde->count % stde->skip_interval);
|
2654
|
+
//printf("%d, %d, %d, %d\n", last_skip_doc, last_frq_ptr, last_prx_ptr, num_skipped);
|
2192
2655
|
|
2193
2656
|
while (target_doc_num > stde->skip_doc) {
|
2194
2657
|
last_skip_doc = stde->skip_doc;
|
2195
2658
|
last_frq_ptr = stde->frq_ptr;
|
2196
2659
|
last_prx_ptr = stde->prx_ptr;
|
2197
2660
|
|
2198
|
-
if (stde->skip_doc
|
2661
|
+
if (0 != stde->skip_doc && stde->skip_doc >= stde->doc_num) {
|
2199
2662
|
num_skipped += stde->skip_interval;
|
2200
2663
|
}
|
2201
2664
|
|
@@ -2204,13 +2667,14 @@ static bool stde_skip_to(TermDocEnum *tde, int target_doc_num)
|
|
2204
2667
|
}
|
2205
2668
|
|
2206
2669
|
stde->skip_doc += is_read_vint(stde->skip_in);
|
2207
|
-
stde->frq_ptr
|
2208
|
-
stde->prx_ptr
|
2670
|
+
stde->frq_ptr += is_read_vint(stde->skip_in);
|
2671
|
+
stde->prx_ptr += is_read_vint(stde->skip_in);
|
2672
|
+
//printf("inner-> skip_doc:%d, frq_ptr:%d, prx_ptr:%d\n", stde->skip_doc, stde->frq_ptr, stde->prx_ptr);
|
2209
2673
|
|
2210
2674
|
stde->skip_count++;
|
2211
2675
|
}
|
2212
2676
|
|
2213
|
-
/* if we found something to skip,
|
2677
|
+
/* if we found something to skip, skip it */
|
2214
2678
|
if (last_frq_ptr > is_pos(stde->frq_in)) {
|
2215
2679
|
is_seek(stde->frq_in, last_frq_ptr);
|
2216
2680
|
stde->seek_prox(stde, last_prx_ptr);
|
@@ -2233,7 +2697,7 @@ static void stde_close(TermDocEnum *tde)
|
|
2233
2697
|
{
|
2234
2698
|
is_close(STDE(tde)->frq_in);
|
2235
2699
|
|
2236
|
-
if (STDE(tde)->skip_in
|
2700
|
+
if (NULL != STDE(tde)->skip_in) {
|
2237
2701
|
is_close(STDE(tde)->skip_in);
|
2238
2702
|
}
|
2239
2703
|
|
@@ -2245,7 +2709,7 @@ static void stde_skip_prox(SegmentTermDocEnum *stde)
|
|
2245
2709
|
(void)stde;
|
2246
2710
|
}
|
2247
2711
|
|
2248
|
-
static void stde_seek_prox(SegmentTermDocEnum *stde,
|
2712
|
+
static void stde_seek_prox(SegmentTermDocEnum *stde, off_t prx_ptr)
|
2249
2713
|
{
|
2250
2714
|
(void)stde;
|
2251
2715
|
(void)prx_ptr;
|
@@ -2290,7 +2754,7 @@ TermDocEnum *stde_new(TermInfosReader *tir,
|
|
2290
2754
|
|
2291
2755
|
static void stpe_seek_ti(SegmentTermDocEnum *stde, TermInfo *ti)
|
2292
2756
|
{
|
2293
|
-
if (
|
2757
|
+
if (NULL == ti) {
|
2294
2758
|
stde->doc_freq = 0;
|
2295
2759
|
}
|
2296
2760
|
else {
|
@@ -2351,7 +2815,7 @@ static void stpe_skip_prox(SegmentTermDocEnum *stde)
|
|
2351
2815
|
is_skip_vints(stde->prx_in, stde->freq);
|
2352
2816
|
}
|
2353
2817
|
|
2354
|
-
static void stpe_seek_prox(SegmentTermDocEnum *stde,
|
2818
|
+
static void stpe_seek_prox(SegmentTermDocEnum *stde, off_t prx_ptr)
|
2355
2819
|
{
|
2356
2820
|
is_seek(stde->prx_in, prx_ptr);
|
2357
2821
|
stde->prx_cnt = 0;
|
@@ -2422,7 +2886,7 @@ static TermDocEnum *mtde_next_tde(MultiTermDocEnum *mtde)
|
|
2422
2886
|
}
|
2423
2887
|
|
2424
2888
|
#define CHECK_CURR_TDE(method) do {\
|
2425
|
-
if (MTDE(tde)->curr_tde
|
2889
|
+
if (NULL == MTDE(tde)->curr_tde) {\
|
2426
2890
|
RAISE(STATE_ERROR, "Illegal state of TermDocEnum. You must call #next "\
|
2427
2891
|
"before you call #"method);\
|
2428
2892
|
}\
|
@@ -2456,7 +2920,7 @@ static void mtde_seek(TermDocEnum *tde, int field_num, const char *term)
|
|
2456
2920
|
TermEnum *te = mtde->te;
|
2457
2921
|
char *t;
|
2458
2922
|
te->set_field(te, field_num);
|
2459
|
-
if ((t = te->skip_to(te, term))
|
2923
|
+
if (NULL != (t = te->skip_to(te, term)) && 0 == strcmp(term, t)) {
|
2460
2924
|
mtde_seek_te(tde, te);
|
2461
2925
|
} else {
|
2462
2926
|
memset(mtde->state, 0, mtde->ir_cnt);
|
@@ -2478,7 +2942,7 @@ static int mtde_freq(TermDocEnum *tde)
|
|
2478
2942
|
static bool mtde_next(TermDocEnum *tde)
|
2479
2943
|
{
|
2480
2944
|
MultiTermDocEnum *mtde = MTDE(tde);
|
2481
|
-
if (mtde->curr_tde
|
2945
|
+
if (NULL != mtde->curr_tde && mtde->curr_tde->next(mtde->curr_tde)) {
|
2482
2946
|
return true;
|
2483
2947
|
}
|
2484
2948
|
else if (mtde_next_tde(mtde)) {
|
@@ -2494,7 +2958,7 @@ static int mtde_read(TermDocEnum *tde, int *docs, int *freqs, int req_num)
|
|
2494
2958
|
int i, end = 0, last_end = 0, b;
|
2495
2959
|
MultiTermDocEnum *mtde = MTDE(tde);
|
2496
2960
|
while (true) {
|
2497
|
-
if (mtde->curr_tde
|
2961
|
+
if (NULL == mtde->curr_tde) return end;
|
2498
2962
|
end += mtde->curr_tde->read(mtde->curr_tde, docs + last_end,
|
2499
2963
|
freqs + last_end, req_num - last_end);
|
2500
2964
|
if (end == last_end) { /* none left in segment */
|
@@ -2527,13 +2991,7 @@ static bool mtde_skip_to(TermDocEnum *tde, int target_doc_num)
|
|
2527
2991
|
|
2528
2992
|
mtde_next_tde(mtde);
|
2529
2993
|
}
|
2530
|
-
|
2531
|
-
if (curr_tde) {
|
2532
|
-
return curr_tde->skip_to(curr_tde, target_doc_num - mtde->base);
|
2533
|
-
}
|
2534
|
-
else {
|
2535
|
-
return false;
|
2536
|
-
}
|
2994
|
+
return false;
|
2537
2995
|
}
|
2538
2996
|
|
2539
2997
|
static void mtde_close(TermDocEnum *tde)
|
@@ -2660,7 +3118,7 @@ static bool mtdpe_next(TermDocEnum *tde)
|
|
2660
3118
|
int doc;
|
2661
3119
|
MultipleTermDocPosEnum *mtdpe = MTDPE(tde);
|
2662
3120
|
|
2663
|
-
if (mtdpe->pq->size
|
3121
|
+
if (0 == mtdpe->pq->size) {
|
2664
3122
|
return false;
|
2665
3123
|
}
|
2666
3124
|
|
@@ -2710,7 +3168,7 @@ bool mtdpe_skip_to(TermDocEnum *tde, int target_doc_num)
|
|
2710
3168
|
TermDocEnum *sub_tde;
|
2711
3169
|
PriorityQueue *mtdpe_pq = MTDPE(tde)->pq;
|
2712
3170
|
|
2713
|
-
while ((sub_tde = (TermDocEnum *)pq_top(mtdpe_pq))
|
3171
|
+
while (NULL != (sub_tde = (TermDocEnum *)pq_top(mtdpe_pq))
|
2714
3172
|
&& (target_doc_num > sub_tde->doc_num(sub_tde))) {
|
2715
3173
|
if (sub_tde->skip_to(sub_tde, target_doc_num)) {
|
2716
3174
|
pq_down(mtdpe_pq);
|
@@ -2779,6 +3237,256 @@ TermDocEnum *mtdpe_new(IndexReader *ir, int field_num, char **terms, int t_cnt)
|
|
2779
3237
|
return tde;
|
2780
3238
|
}
|
2781
3239
|
|
3240
|
+
/****************************************************************************
|
3241
|
+
*
|
3242
|
+
* FileNameFilter
|
3243
|
+
*
|
3244
|
+
****************************************************************************/
|
3245
|
+
|
3246
|
+
static HashTable *fn_extensions = NULL;
|
3247
|
+
static void file_name_filter_init()
|
3248
|
+
{
|
3249
|
+
if (NULL == fn_extensions) {
|
3250
|
+
int i;
|
3251
|
+
fn_extensions = h_new_str((free_ft)NULL, (free_ft)NULL);
|
3252
|
+
for (i = 0; i < NELEMS(INDEX_EXTENSIONS); i++) {
|
3253
|
+
h_set(fn_extensions, INDEX_EXTENSIONS[i], (char *)INDEX_EXTENSIONS[i]);
|
3254
|
+
}
|
3255
|
+
register_for_cleanup(fn_extensions, (free_ft)&h_destroy);
|
3256
|
+
}
|
3257
|
+
}
|
3258
|
+
|
3259
|
+
static bool file_name_filter_accept(char *file_name)
|
3260
|
+
{
|
3261
|
+
char *p = strrchr(file_name, '.');
|
3262
|
+
if (NULL != p) {
|
3263
|
+
char *extension = p + 1;
|
3264
|
+
if (NULL != h_get(fn_extensions, extension)) {
|
3265
|
+
return true;
|
3266
|
+
}
|
3267
|
+
else if ((*extension == 'f' || *extension == 's')
|
3268
|
+
&& *(extension + 1) >= '0'
|
3269
|
+
&& *(extension + 1) <= '9') {
|
3270
|
+
return true;
|
3271
|
+
}
|
3272
|
+
}
|
3273
|
+
else if (0 == strncmp(SEGMENTS_FILE_NAME, file_name,
|
3274
|
+
sizeof(SEGMENTS_FILE_NAME) - 1)) {
|
3275
|
+
return true;
|
3276
|
+
}
|
3277
|
+
return false;
|
3278
|
+
}
|
3279
|
+
|
3280
|
+
/*
|
3281
|
+
* Returns true if this is a file that would be contained in a CFS file. This
|
3282
|
+
* function should only be called on files that pass the above "accept" (ie,
|
3283
|
+
* are already known to be a Lucene index file).
|
3284
|
+
*/
|
3285
|
+
static bool file_name_filter_is_cfs_file(char *file_name) {
|
3286
|
+
char *p = strrchr(file_name, '.');
|
3287
|
+
if (NULL != p) {
|
3288
|
+
char *extension = p + 1;
|
3289
|
+
if (NULL != h_get(fn_extensions, extension)
|
3290
|
+
&& 0 != strcmp(extension, "del")
|
3291
|
+
&& 0 != strcmp(extension, "gen")
|
3292
|
+
&& 0 != strcmp(extension, "cfs")) {
|
3293
|
+
return true;
|
3294
|
+
}
|
3295
|
+
else if ('f' == *extension
|
3296
|
+
&& '0' <= *(extension + 1)
|
3297
|
+
&& '9' >= *(extension + 1)) {
|
3298
|
+
return true;
|
3299
|
+
}
|
3300
|
+
}
|
3301
|
+
return false;
|
3302
|
+
}
|
3303
|
+
|
3304
|
+
/****************************************************************************
|
3305
|
+
*
|
3306
|
+
* Deleter
|
3307
|
+
*
|
3308
|
+
****************************************************************************/
|
3309
|
+
|
3310
|
+
#define DELETABLE_START_CAPA 8
|
3311
|
+
Deleter *deleter_new(SegmentInfos *sis, Store *store)
|
3312
|
+
{
|
3313
|
+
Deleter *dlr = ALLOC(Deleter);
|
3314
|
+
dlr->sis = sis;
|
3315
|
+
dlr->store = store;
|
3316
|
+
dlr->pending = hs_new_str(&free);
|
3317
|
+
return dlr;
|
3318
|
+
}
|
3319
|
+
|
3320
|
+
void deleter_destroy(Deleter *dlr)
|
3321
|
+
{
|
3322
|
+
hs_destroy(dlr->pending);
|
3323
|
+
free(dlr);
|
3324
|
+
}
|
3325
|
+
|
3326
|
+
void deleter_queue_file(Deleter *dlr, char *file_name)
|
3327
|
+
{
|
3328
|
+
hs_add(dlr->pending, estrdup(file_name));
|
3329
|
+
}
|
3330
|
+
|
3331
|
+
void deleter_delete_file(Deleter *dlr, char *file_name)
|
3332
|
+
{
|
3333
|
+
Store *store = dlr->store;
|
3334
|
+
TRY
|
3335
|
+
if (store->exists(store, file_name)) {
|
3336
|
+
store->remove(store, file_name);
|
3337
|
+
}
|
3338
|
+
hs_del(dlr->pending, file_name);
|
3339
|
+
XCATCHALL
|
3340
|
+
hs_add(dlr->pending, estrdup(file_name));
|
3341
|
+
XENDTRY
|
3342
|
+
}
|
3343
|
+
|
3344
|
+
void deleter_commit_pending_deletions(Deleter *dlr)
|
3345
|
+
{
|
3346
|
+
int i;
|
3347
|
+
char **pending = (char **)dlr->pending->elems;
|
3348
|
+
for (i = dlr->pending->size - 1; i >= 0; i--) {
|
3349
|
+
deleter_delete_file(dlr, pending[i]);
|
3350
|
+
}
|
3351
|
+
}
|
3352
|
+
|
3353
|
+
void deleter_delete_files(Deleter *dlr, char **files, int file_cnt)
|
3354
|
+
{
|
3355
|
+
int i;
|
3356
|
+
for (i = file_cnt - 1; i >= 0; i--) {
|
3357
|
+
deleter_queue_file(dlr, files[i]);
|
3358
|
+
}
|
3359
|
+
deleter_commit_pending_deletions(dlr);
|
3360
|
+
}
|
3361
|
+
|
3362
|
+
struct DelFilesArg {
|
3363
|
+
char curr_seg_file_name[SEGMENT_NAME_MAX_LENGTH];
|
3364
|
+
Deleter *dlr;
|
3365
|
+
HashTable *current;
|
3366
|
+
};
|
3367
|
+
|
3368
|
+
static void deleter_find_deletable_files_i(char *file_name, void *arg)
|
3369
|
+
{
|
3370
|
+
struct DelFilesArg *dfa = (struct DelFilesArg *)arg;
|
3371
|
+
Deleter *dlr = dfa->dlr;
|
3372
|
+
|
3373
|
+
if (file_name_filter_accept(file_name)
|
3374
|
+
&& 0 != strcmp(file_name, dfa->curr_seg_file_name)
|
3375
|
+
&& 0 != strcmp(file_name, SEGMENTS_GEN_FILE_NAME)) {
|
3376
|
+
|
3377
|
+
bool do_delete = false;
|
3378
|
+
SegmentInfo *si;
|
3379
|
+
char segment_name[SEGMENT_NAME_MAX_LENGTH];
|
3380
|
+
char *extension, *p;
|
3381
|
+
strcpy(segment_name, file_name);
|
3382
|
+
|
3383
|
+
p = strrchr(segment_name, '.');
|
3384
|
+
|
3385
|
+
/* First remove any extension: */
|
3386
|
+
if (NULL != p) {
|
3387
|
+
*p = '\0';
|
3388
|
+
extension = p + 1;
|
3389
|
+
} else {
|
3390
|
+
extension = NULL;
|
3391
|
+
}
|
3392
|
+
|
3393
|
+
/* Then, remove any generation count: */
|
3394
|
+
p = strrchr(segment_name + 1, '_');
|
3395
|
+
if (NULL != p) {
|
3396
|
+
*p = '\0';
|
3397
|
+
}
|
3398
|
+
|
3399
|
+
/* Delete this file if it's not a "current" segment, or, it is a
|
3400
|
+
* single index file but there is now a corresponding compound file: */
|
3401
|
+
if (NULL == (si = h_get(dfa->current, segment_name))) {
|
3402
|
+
/* Delete if segment is not referenced: */
|
3403
|
+
do_delete = true;
|
3404
|
+
}
|
3405
|
+
else {
|
3406
|
+
char tmp_fn[SEGMENT_NAME_MAX_LENGTH];
|
3407
|
+
/* OK, segment is referenced, but file may still be orphan'd: */
|
3408
|
+
if (file_name_filter_is_cfs_file(file_name)
|
3409
|
+
&& si->use_compound_file) {
|
3410
|
+
/* This file is stored in a CFS file for this segment: */
|
3411
|
+
do_delete = true;
|
3412
|
+
}
|
3413
|
+
else if (0 == strcmp("del", extension)) {
|
3414
|
+
/* This is a _segmentName_N.del file: */
|
3415
|
+
if (!fn_for_generation(tmp_fn, segment_name, "del", si->del_gen)
|
3416
|
+
|| 0 != strcmp(file_name, tmp_fn)) {
|
3417
|
+
/* If this is a seperate .del file, but it
|
3418
|
+
* doesn't match the current del file name for
|
3419
|
+
* this segment, then delete it: */
|
3420
|
+
do_delete = true;
|
3421
|
+
}
|
3422
|
+
}
|
3423
|
+
else if (NULL != extension
|
3424
|
+
&& ('s' == *extension || 'f' == *extension)
|
3425
|
+
&& isdigit(extension[1])) {
|
3426
|
+
si_norm_file_name(si, tmp_fn, atoi(extension + 1));
|
3427
|
+
/* This is a _segmentName_N.sX file: */
|
3428
|
+
if (0 != strcmp(tmp_fn, file_name)) {
|
3429
|
+
/* This is an orphan'd norms file: */
|
3430
|
+
do_delete = true;
|
3431
|
+
}
|
3432
|
+
}
|
3433
|
+
else if (0 == strcmp("cfs", extension) && !si->use_compound_file) {
|
3434
|
+
/* This is a partially written _segmentName.cfs: */
|
3435
|
+
do_delete = true;
|
3436
|
+
}
|
3437
|
+
}
|
3438
|
+
|
3439
|
+
if (do_delete) {
|
3440
|
+
deleter_queue_file(dlr, file_name);
|
3441
|
+
}
|
3442
|
+
}
|
3443
|
+
}
|
3444
|
+
|
3445
|
+
/*
|
3446
|
+
* Determine index files that are no longer referenced and therefore should be
|
3447
|
+
* deleted. This is called once (by the writer), and then subsequently we add
|
3448
|
+
* onto deletable any files that are no longer needed at the point that we
|
3449
|
+
* create the unused file (eg when merging segments), and we only remove from
|
3450
|
+
* deletable when a file is successfully deleted.
|
3451
|
+
*/
|
3452
|
+
void deleter_find_deletable_files(Deleter *dlr)
|
3453
|
+
{
|
3454
|
+
/* Gather all "current" segments: */
|
3455
|
+
int i;
|
3456
|
+
SegmentInfos *sis = dlr->sis;
|
3457
|
+
Store *store = dlr->store;
|
3458
|
+
struct DelFilesArg dfa;
|
3459
|
+
HashTable *current = dfa.current
|
3460
|
+
= h_new_str((free_ft)NULL, (free_ft)si_deref);
|
3461
|
+
dfa.dlr = dlr;
|
3462
|
+
|
3463
|
+
for(i = 0; i < sis->size; i++) {
|
3464
|
+
SegmentInfo *si = (SegmentInfo *)sis->segs[i];
|
3465
|
+
REF(si);
|
3466
|
+
h_set(current, si->name, si);
|
3467
|
+
}
|
3468
|
+
|
3469
|
+
/* Then go through all files in the Directory that are Ferret index files,
|
3470
|
+
* and add to deletable if they are not referenced by the current segments
|
3471
|
+
* info: */
|
3472
|
+
sis_curr_seg_file_name(dfa.curr_seg_file_name, store);
|
3473
|
+
file_name_filter_init();
|
3474
|
+
|
3475
|
+
store->each(store, &deleter_find_deletable_files_i, &dfa);
|
3476
|
+
h_destroy(dfa.current);
|
3477
|
+
}
|
3478
|
+
|
3479
|
+
void deleter_delete_deletable_files(Deleter *dlr)
|
3480
|
+
{
|
3481
|
+
deleter_find_deletable_files(dlr);
|
3482
|
+
deleter_commit_pending_deletions(dlr);
|
3483
|
+
}
|
3484
|
+
|
3485
|
+
void deleter_clear_pending_deletions(Deleter *dlr)
|
3486
|
+
{
|
3487
|
+
hs_clear(dlr->pending);
|
3488
|
+
}
|
3489
|
+
|
2782
3490
|
/****************************************************************************
|
2783
3491
|
*
|
2784
3492
|
* IndexReader
|
@@ -2800,7 +3508,7 @@ void ir_acquire_write_lock(IndexReader *ir)
|
|
2800
3508
|
"need to close and reopen the index");
|
2801
3509
|
}
|
2802
3510
|
|
2803
|
-
if (ir->write_lock
|
3511
|
+
if (NULL == ir->write_lock) {
|
2804
3512
|
ir->write_lock = open_lock(ir->store, WRITE_LOCK_NAME);
|
2805
3513
|
if (!ir->write_lock->obtain(ir->write_lock)) {/* obtain write lock */
|
2806
3514
|
RAISE(LOCK_ERROR, "Could not obtain write lock when trying to "
|
@@ -2811,8 +3519,8 @@ void ir_acquire_write_lock(IndexReader *ir)
|
|
2811
3519
|
"you can safely delete these files.");
|
2812
3520
|
}
|
2813
3521
|
|
2814
|
-
/* we have to check whether index has changed since this reader was
|
2815
|
-
* if so, this reader is no longer valid for deletion */
|
3522
|
+
/* we have to check whether index has changed since this reader was
|
3523
|
+
* opened. if so, this reader is no longer valid for deletion */
|
2816
3524
|
if (sis_read_current_version(ir->store) > ir->sis->version) {
|
2817
3525
|
ir->is_stale = true;
|
2818
3526
|
ir->write_lock->release(ir->write_lock);
|
@@ -2856,7 +3564,7 @@ IndexReader *ir_setup(IndexReader *ir, Store *store, SegmentInfos *sis,
|
|
2856
3564
|
|
2857
3565
|
bool ir_index_exists(Store *store)
|
2858
3566
|
{
|
2859
|
-
return
|
3567
|
+
return sis_current_segment_generation(store) != 1;
|
2860
3568
|
}
|
2861
3569
|
|
2862
3570
|
int ir_get_field_num(IndexReader *ir, const char *field)
|
@@ -2903,7 +3611,7 @@ uchar *ir_get_norms_i(IndexReader *ir, int field_num)
|
|
2903
3611
|
norms = ir->get_norms(ir, field_num);
|
2904
3612
|
}
|
2905
3613
|
if (!norms) {
|
2906
|
-
if (ir->fake_norms
|
3614
|
+
if (NULL == ir->fake_norms) {
|
2907
3615
|
ir->fake_norms = (uchar *)ecalloc(ir->max_doc(ir));
|
2908
3616
|
}
|
2909
3617
|
norms = ir->fake_norms;
|
@@ -3009,34 +3717,41 @@ TermDocEnum *ir_term_positions_for(IndexReader *ir, const char *field,
|
|
3009
3717
|
|
3010
3718
|
void ir_commit_i(IndexReader *ir)
|
3011
3719
|
{
|
3012
|
-
if (ir->has_changes
|
3013
|
-
|
3014
|
-
|
3015
|
-
|
3016
|
-
|
3017
|
-
if (!commit_lock->obtain(commit_lock)) { /* obtain write lock */
|
3018
|
-
RAISE(LOCK_ERROR, "Error trying to commit the index. Commit "
|
3019
|
-
"lock already obtained");
|
3720
|
+
if (ir->has_changes) {
|
3721
|
+
if (NULL == ir->deleter && NULL != ir->store) {
|
3722
|
+
/* In the MultiReader case, we share this deleter across all
|
3723
|
+
* SegmentReaders: */
|
3724
|
+
ir->set_deleter_i(ir, deleter_new(ir->sis, ir->store));
|
3020
3725
|
}
|
3726
|
+
if (ir->is_owner) {
|
3727
|
+
char curr_seg_fn[MAX_FILE_PATH];
|
3728
|
+
mutex_lock(&ir->store->mutex);
|
3021
3729
|
|
3022
|
-
|
3023
|
-
|
3730
|
+
/* Should not be necessary: no prior commit should have left
|
3731
|
+
* pending files, so just defensive: */
|
3732
|
+
if (ir->deleter) deleter_clear_pending_deletions(ir->deleter);
|
3024
3733
|
|
3025
|
-
|
3026
|
-
|
3027
|
-
|
3734
|
+
sis_curr_seg_file_name(curr_seg_fn, ir->store);
|
3735
|
+
|
3736
|
+
ir->commit_i(ir);
|
3737
|
+
sis_write(ir->sis, ir->store, ir->deleter);
|
3028
3738
|
|
3029
|
-
|
3030
|
-
|
3031
|
-
ir->
|
3032
|
-
|
3033
|
-
ir->write_lock
|
3739
|
+
if (ir->deleter) deleter_delete_file(ir->deleter, curr_seg_fn);
|
3740
|
+
|
3741
|
+
mutex_unlock(&ir->store->mutex);
|
3742
|
+
|
3743
|
+
if (NULL != ir->write_lock) {
|
3744
|
+
/* release write lock */
|
3745
|
+
ir->write_lock->release(ir->write_lock);
|
3746
|
+
close_lock(ir->write_lock);
|
3747
|
+
ir->write_lock = NULL;
|
3748
|
+
}
|
3749
|
+
}
|
3750
|
+
else {
|
3751
|
+
ir->commit_i(ir);
|
3034
3752
|
}
|
3035
|
-
ir->has_changes = false;
|
3036
|
-
}
|
3037
|
-
else {
|
3038
|
-
ir->commit_i(ir);
|
3039
3753
|
}
|
3754
|
+
ir->has_changes = false;
|
3040
3755
|
}
|
3041
3756
|
|
3042
3757
|
void ir_commit(IndexReader *ir)
|
@@ -3049,15 +3764,14 @@ void ir_commit(IndexReader *ir)
|
|
3049
3764
|
void ir_close(IndexReader *ir)
|
3050
3765
|
{
|
3051
3766
|
mutex_lock(&ir->mutex);
|
3052
|
-
if (--(ir->ref_cnt)
|
3767
|
+
if (0 == --(ir->ref_cnt)) {
|
3053
3768
|
ir_commit_i(ir);
|
3054
3769
|
ir->close_i(ir);
|
3055
3770
|
if (ir->store) {
|
3056
3771
|
store_deref(ir->store);
|
3057
3772
|
}
|
3058
|
-
if (ir->is_owner) {
|
3773
|
+
if (ir->is_owner && ir->sis) {
|
3059
3774
|
sis_destroy(ir->sis);
|
3060
|
-
fis_deref(ir->fis);
|
3061
3775
|
}
|
3062
3776
|
if (ir->cache) {
|
3063
3777
|
h_destroy(ir->cache);
|
@@ -3065,6 +3779,9 @@ void ir_close(IndexReader *ir)
|
|
3065
3779
|
if (ir->sort_cache) {
|
3066
3780
|
h_destroy(ir->sort_cache);
|
3067
3781
|
}
|
3782
|
+
if (ir->deleter && ir->is_owner) {
|
3783
|
+
deleter_destroy(ir->deleter);
|
3784
|
+
}
|
3068
3785
|
free(ir->fake_norms);
|
3069
3786
|
|
3070
3787
|
mutex_destroy(&ir->mutex);
|
@@ -3080,26 +3797,14 @@ void ir_close(IndexReader *ir)
|
|
3080
3797
|
**/
|
3081
3798
|
void ir_add_cache(IndexReader *ir)
|
3082
3799
|
{
|
3083
|
-
if (ir->cache
|
3800
|
+
if (NULL == ir->cache) {
|
3084
3801
|
ir->cache = co_hash_create();
|
3085
3802
|
}
|
3086
3803
|
}
|
3087
3804
|
|
3088
3805
|
bool ir_is_latest(IndexReader *ir)
|
3089
3806
|
{
|
3090
|
-
|
3091
|
-
|
3092
|
-
Lock *commit_lock = open_lock(ir->store, COMMIT_LOCK_NAME);
|
3093
|
-
if (!commit_lock->obtain(commit_lock)) {
|
3094
|
-
close_lock(commit_lock);
|
3095
|
-
RAISE(LOCK_ERROR, "Error detecting if the current index is latest "
|
3096
|
-
"version. Commit lock currently obtained");
|
3097
|
-
}
|
3098
|
-
is_latest = (sis_read_current_version(ir->store) == ir->sis->version);
|
3099
|
-
commit_lock->release(commit_lock);
|
3100
|
-
close_lock(commit_lock);
|
3101
|
-
|
3102
|
-
return is_latest;
|
3807
|
+
return (sis_read_current_version(ir->store) == ir->sis->version);
|
3103
3808
|
}
|
3104
3809
|
|
3105
3810
|
/****************************************************************************
|
@@ -3128,35 +3833,27 @@ static Norm *norm_create(InStream *is, int field_num)
|
|
3128
3833
|
static void norm_destroy(Norm *norm)
|
3129
3834
|
{
|
3130
3835
|
is_close(norm->is);
|
3131
|
-
if (norm->bytes
|
3836
|
+
if (NULL != norm->bytes) {
|
3132
3837
|
free(norm->bytes);
|
3133
3838
|
}
|
3134
3839
|
free(norm);
|
3135
3840
|
}
|
3136
3841
|
|
3137
|
-
static void norm_rewrite(Norm *norm, Store *store,
|
3138
|
-
|
3842
|
+
static void norm_rewrite(Norm *norm, Store *store, Deleter *dlr,
|
3843
|
+
SegmentInfo *si, int doc_count)
|
3139
3844
|
{
|
3140
3845
|
OutStream *os;
|
3141
|
-
char tmp_file_name[SEGMENT_NAME_MAX_LENGTH];
|
3142
3846
|
char norm_file_name[SEGMENT_NAME_MAX_LENGTH];
|
3847
|
+
const int field_num = norm->field_num;
|
3143
3848
|
|
3144
|
-
if (
|
3145
|
-
|
3849
|
+
if (si_norm_file_name(si, norm_file_name, field_num)) {
|
3850
|
+
deleter_queue_file(dlr, norm_file_name);
|
3146
3851
|
}
|
3147
|
-
|
3148
|
-
|
3149
|
-
os = store->new_output(store,
|
3852
|
+
si_advance_norm_gen(si, field_num);
|
3853
|
+
si_norm_file_name(si, norm_file_name, field_num);
|
3854
|
+
os = store->new_output(store, norm_file_name);
|
3150
3855
|
os_write_bytes(os, norm->bytes, doc_count);
|
3151
3856
|
os_close(os);
|
3152
|
-
|
3153
|
-
if (cfs_store) {
|
3154
|
-
sprintf(norm_file_name, "%s.s%d", segment, norm->field_num);
|
3155
|
-
}
|
3156
|
-
else {
|
3157
|
-
sprintf(norm_file_name, "%s.f%d", segment, norm->field_num);
|
3158
|
-
}
|
3159
|
-
store->rename(store, tmp_file_name, norm_file_name);
|
3160
3857
|
norm->is_dirty = false;
|
3161
3858
|
}
|
3162
3859
|
|
@@ -3166,6 +3863,7 @@ static void norm_rewrite(Norm *norm, Store *store, char *segment,
|
|
3166
3863
|
|
3167
3864
|
typedef struct SegmentReader {
|
3168
3865
|
IndexReader ir;
|
3866
|
+
SegmentInfo *si;
|
3169
3867
|
char *segment;
|
3170
3868
|
FieldsReader *fr;
|
3171
3869
|
BitVector *deleted_docs;
|
@@ -3191,7 +3889,7 @@ static __inline FieldsReader *sr_fr(SegmentReader *sr)
|
|
3191
3889
|
{
|
3192
3890
|
FieldsReader *fr;
|
3193
3891
|
|
3194
|
-
if ((fr = thread_getspecific(sr->thread_fr))
|
3892
|
+
if (NULL == (fr = thread_getspecific(sr->thread_fr))) {
|
3195
3893
|
fr = fr_clone(sr->fr);
|
3196
3894
|
ary_push(sr->fr_bucket, fr);
|
3197
3895
|
thread_setspecific(sr->thread_fr, fr);
|
@@ -3201,17 +3899,17 @@ static __inline FieldsReader *sr_fr(SegmentReader *sr)
|
|
3201
3899
|
|
3202
3900
|
static __inline bool sr_is_deleted_i(SegmentReader *sr, int doc_num)
|
3203
3901
|
{
|
3204
|
-
return (sr->deleted_docs
|
3902
|
+
return (NULL != sr->deleted_docs && bv_get(sr->deleted_docs, doc_num));
|
3205
3903
|
}
|
3206
3904
|
|
3207
3905
|
static __inline void sr_get_norms_into_i(SegmentReader *sr, int field_num,
|
3208
3906
|
uchar *buf)
|
3209
3907
|
{
|
3210
3908
|
Norm *norm = h_get_int(sr->norms, field_num);
|
3211
|
-
if (
|
3909
|
+
if (NULL == norm) {
|
3212
3910
|
memset(buf, 0, SR_SIZE(sr));
|
3213
3911
|
}
|
3214
|
-
else if (norm->bytes
|
3912
|
+
else if (NULL != norm->bytes) { /* can copy from cache */
|
3215
3913
|
memcpy(buf, norm->bytes, SR_SIZE(sr));
|
3216
3914
|
}
|
3217
3915
|
else {
|
@@ -3226,11 +3924,11 @@ static __inline void sr_get_norms_into_i(SegmentReader *sr, int field_num,
|
|
3226
3924
|
static __inline uchar *sr_get_norms_i(SegmentReader *sr, int field_num)
|
3227
3925
|
{
|
3228
3926
|
Norm *norm = h_get_int(sr->norms, field_num);
|
3229
|
-
if (
|
3927
|
+
if (NULL == norm) { /* not an indexed field */
|
3230
3928
|
return NULL;
|
3231
3929
|
}
|
3232
3930
|
|
3233
|
-
if (norm->bytes
|
3931
|
+
if (NULL == norm->bytes) { /* value not yet read */
|
3234
3932
|
uchar *bytes = ALLOC_N(uchar, SR_SIZE(sr));
|
3235
3933
|
sr_get_norms_into_i(sr, field_num, bytes);
|
3236
3934
|
norm->bytes = bytes; /* cache it */
|
@@ -3241,7 +3939,8 @@ static __inline uchar *sr_get_norms_i(SegmentReader *sr, int field_num)
|
|
3241
3939
|
static void sr_set_norm_i(IndexReader *ir, int doc_num, int field_num, uchar b)
|
3242
3940
|
{
|
3243
3941
|
Norm *norm = h_get_int(SR(ir)->norms, field_num);
|
3244
|
-
if (
|
3942
|
+
if (NULL != norm) { /* has_norms */
|
3943
|
+
ir->has_changes = true;
|
3245
3944
|
norm->is_dirty = true; /* mark it dirty */
|
3246
3945
|
SR(ir)->norms_dirty = true;
|
3247
3946
|
sr_get_norms_i(SR(ir), field_num)[doc_num] = b;
|
@@ -3250,12 +3949,13 @@ static void sr_set_norm_i(IndexReader *ir, int doc_num, int field_num, uchar b)
|
|
3250
3949
|
|
3251
3950
|
static void sr_delete_doc_i(IndexReader *ir, int doc_num)
|
3252
3951
|
{
|
3253
|
-
if (SR(ir)->deleted_docs
|
3952
|
+
if (NULL == SR(ir)->deleted_docs) {
|
3254
3953
|
SR(ir)->deleted_docs = bv_new();
|
3255
3954
|
}
|
3256
3955
|
|
3257
3956
|
SR(ir)->deleted_docs_dirty = true;
|
3258
3957
|
SR(ir)->undelete_all = false;
|
3958
|
+
ir->has_changes = true;
|
3259
3959
|
bv_set(SR(ir)->deleted_docs, doc_num);
|
3260
3960
|
}
|
3261
3961
|
|
@@ -3263,12 +3963,18 @@ static void sr_undelete_all_i(IndexReader *ir)
|
|
3263
3963
|
{
|
3264
3964
|
SR(ir)->undelete_all = true;
|
3265
3965
|
SR(ir)->deleted_docs_dirty = false;
|
3266
|
-
|
3966
|
+
ir->has_changes = true;
|
3967
|
+
if (NULL != SR(ir)->deleted_docs) {
|
3267
3968
|
bv_destroy(SR(ir)->deleted_docs);
|
3268
3969
|
}
|
3269
3970
|
SR(ir)->deleted_docs = NULL;
|
3270
3971
|
}
|
3271
3972
|
|
3973
|
+
static void sr_set_deleter_i(IndexReader *ir, Deleter *deleter)
|
3974
|
+
{
|
3975
|
+
ir->deleter = deleter;
|
3976
|
+
}
|
3977
|
+
|
3272
3978
|
static void bv_write(BitVector *bv, Store *store, char *name)
|
3273
3979
|
{
|
3274
3980
|
int i;
|
@@ -3299,64 +4005,61 @@ static BitVector *bv_read(Store *store, char *name)
|
|
3299
4005
|
|
3300
4006
|
static void sr_commit_i(IndexReader *ir)
|
3301
4007
|
{
|
4008
|
+
SegmentInfo *si = SR(ir)->si;
|
4009
|
+
char *segment = SR(ir)->si->name;
|
3302
4010
|
char tmp_file_name[SEGMENT_NAME_MAX_LENGTH];
|
3303
|
-
char del_file_name[SEGMENT_NAME_MAX_LENGTH];
|
3304
|
-
|
3305
|
-
sprintf(del_file_name, "%s.del", SR(ir)->segment);
|
3306
4011
|
|
3307
|
-
if (SR(ir)->deleted_docs_dirty) {
|
3308
|
-
|
3309
|
-
|
3310
|
-
|
3311
|
-
|
3312
|
-
|
3313
|
-
|
4012
|
+
if (SR(ir)->undelete_all || SR(ir)->deleted_docs_dirty) {
|
4013
|
+
if (si->del_gen >= 0) {
|
4014
|
+
fn_for_generation(tmp_file_name, segment, "del", si->del_gen);
|
4015
|
+
deleter_queue_file(ir->deleter, tmp_file_name);
|
4016
|
+
}
|
4017
|
+
if (SR(ir)->undelete_all) {
|
4018
|
+
si->del_gen = -1;
|
4019
|
+
SR(ir)->undelete_all = false;
|
4020
|
+
} else {
|
4021
|
+
/* (SR(ir)->deleted_docs_dirty) re-write deleted */
|
4022
|
+
si->del_gen++;
|
4023
|
+
fn_for_generation(tmp_file_name, segment, "del", si->del_gen);
|
4024
|
+
bv_write(SR(ir)->deleted_docs, ir->store, tmp_file_name);
|
4025
|
+
SR(ir)->deleted_docs_dirty = false;
|
4026
|
+
}
|
3314
4027
|
}
|
3315
4028
|
if (SR(ir)->norms_dirty) { /* re-write norms */
|
3316
4029
|
int i;
|
3317
4030
|
const int field_cnt = ir->fis->size;
|
3318
4031
|
FieldInfo *fi;
|
3319
|
-
for (i =
|
4032
|
+
for (i = field_cnt - 1; i >= 0; i--) {
|
3320
4033
|
fi = ir->fis->fields[i];
|
3321
4034
|
if (fi_is_indexed(fi)) {
|
3322
|
-
|
3323
|
-
|
4035
|
+
Norm *norm = h_get_int(SR(ir)->norms, fi->number);
|
4036
|
+
if (norm && norm->is_dirty) {
|
4037
|
+
norm_rewrite(norm, ir->store, ir->deleter, SR(ir)->si,
|
4038
|
+
SR_SIZE(ir));
|
4039
|
+
}
|
3324
4040
|
}
|
3325
4041
|
}
|
4042
|
+
SR(ir)->norms_dirty = false;
|
3326
4043
|
}
|
3327
|
-
SR(ir)->deleted_docs_dirty = false;
|
3328
|
-
SR(ir)->norms_dirty = false;
|
3329
|
-
SR(ir)->undelete_all = false;
|
3330
4044
|
}
|
3331
4045
|
|
3332
4046
|
static void sr_close_i(IndexReader *ir)
|
3333
4047
|
{
|
3334
4048
|
SegmentReader *sr = SR(ir);
|
3335
4049
|
|
3336
|
-
fr_close(sr->fr);
|
3337
|
-
tir_close(sr->tir);
|
3338
|
-
sfi_close(sr->sfi);
|
3339
|
-
|
3340
|
-
if (sr->
|
3341
|
-
|
3342
|
-
|
3343
|
-
if (sr->
|
3344
|
-
is_close(sr->prx_in);
|
3345
|
-
}
|
3346
|
-
|
3347
|
-
h_destroy(sr->norms);
|
3348
|
-
|
4050
|
+
if (sr->fr) fr_close(sr->fr);
|
4051
|
+
if (sr->tir) tir_close(sr->tir);
|
4052
|
+
if (sr->sfi) sfi_close(sr->sfi);
|
4053
|
+
if (sr->frq_in) is_close(sr->frq_in);
|
4054
|
+
if (sr->prx_in) is_close(sr->prx_in);
|
4055
|
+
if (sr->norms) h_destroy(sr->norms);
|
4056
|
+
if (sr->deleted_docs) bv_destroy(sr->deleted_docs);
|
4057
|
+
if (sr->cfs_store) store_deref(sr->cfs_store);
|
3349
4058
|
if (sr->fr_bucket) {
|
3350
4059
|
thread_setspecific(sr->thread_fr, NULL);
|
3351
4060
|
thread_key_delete(sr->thread_fr);
|
3352
4061
|
ary_destroy(sr->fr_bucket, (free_ft)&fr_close);
|
3353
4062
|
}
|
3354
|
-
if (sr->deleted_docs) {
|
3355
|
-
bv_destroy(sr->deleted_docs);
|
3356
|
-
}
|
3357
|
-
if (sr->cfs_store) {
|
3358
|
-
store_deref(sr->cfs_store);
|
3359
|
-
}
|
3360
4063
|
}
|
3361
4064
|
|
3362
4065
|
static int sr_num_docs(IndexReader *ir)
|
@@ -3365,7 +4068,7 @@ static int sr_num_docs(IndexReader *ir)
|
|
3365
4068
|
|
3366
4069
|
mutex_lock(&ir->mutex);
|
3367
4070
|
num_docs = SR(ir)->fr->size;
|
3368
|
-
if (SR(ir)->deleted_docs
|
4071
|
+
if (NULL != SR(ir)->deleted_docs) {
|
3369
4072
|
num_docs -= SR(ir)->deleted_docs->count;
|
3370
4073
|
}
|
3371
4074
|
mutex_unlock(&ir->mutex);
|
@@ -3473,7 +4176,7 @@ static TermVector *sr_term_vector(IndexReader *ir, int doc_num,
|
|
3473
4176
|
static HashTable *sr_term_vectors(IndexReader *ir, int doc_num)
|
3474
4177
|
{
|
3475
4178
|
FieldsReader *fr;
|
3476
|
-
if (!SR(ir)->fr || (fr = sr_fr(SR(ir)))
|
4179
|
+
if (!SR(ir)->fr || NULL == (fr = sr_fr(SR(ir)))) {
|
3477
4180
|
return NULL;
|
3478
4181
|
}
|
3479
4182
|
|
@@ -3493,42 +4196,32 @@ static bool sr_is_deleted(IndexReader *ir, int doc_num)
|
|
3493
4196
|
|
3494
4197
|
static bool sr_has_deletions(IndexReader *ir)
|
3495
4198
|
{
|
3496
|
-
return
|
4199
|
+
return NULL != SR(ir)->deleted_docs;
|
3497
4200
|
}
|
3498
4201
|
|
3499
4202
|
static void sr_open_norms(IndexReader *ir, Store *cfs_store)
|
3500
4203
|
{
|
3501
4204
|
int i;
|
3502
|
-
|
4205
|
+
SegmentInfo *si = SR(ir)->si;
|
3503
4206
|
char file_name[SEGMENT_NAME_MAX_LENGTH];
|
3504
|
-
FieldInfos *fis = ir->fis;
|
3505
|
-
char *ext_ptr;
|
3506
|
-
const int field_cnt = fis->size;
|
3507
|
-
|
3508
|
-
sprintf(file_name, "%s.", SR(ir)->segment);
|
3509
|
-
ext_ptr = file_name + strlen(file_name);
|
3510
4207
|
|
3511
|
-
for (i =
|
3512
|
-
|
3513
|
-
|
3514
|
-
|
3515
|
-
|
3516
|
-
|
3517
|
-
}
|
3518
|
-
if (store->exists(store, file_name)) {
|
3519
|
-
h_set_int(SR(ir)->norms, i,
|
3520
|
-
norm_create(store->open_input(store, file_name), i));
|
3521
|
-
}
|
4208
|
+
for (i = si->norm_gens_size - 1; i >= 0; i--) {
|
4209
|
+
Store *store = (si->use_compound_file && si->norm_gens[i] == 0) ?
|
4210
|
+
cfs_store : ir->store;
|
4211
|
+
if (si_norm_file_name(si, file_name, i)) {
|
4212
|
+
h_set_int(SR(ir)->norms, i,
|
4213
|
+
norm_create(store->open_input(store, file_name), i));
|
3522
4214
|
}
|
3523
4215
|
}
|
3524
4216
|
SR(ir)->norms_dirty = false;
|
3525
4217
|
}
|
3526
4218
|
|
3527
|
-
static IndexReader *sr_setup_i(SegmentReader *sr
|
4219
|
+
static IndexReader *sr_setup_i(SegmentReader *sr)
|
3528
4220
|
{
|
3529
|
-
Store *store = si->store;
|
4221
|
+
Store *store = sr->si->store;
|
3530
4222
|
IndexReader *ir = IR(sr);
|
3531
4223
|
char file_name[SEGMENT_NAME_MAX_LENGTH];
|
4224
|
+
char *sr_segment = sr->si->name;
|
3532
4225
|
|
3533
4226
|
ir->num_docs = &sr_num_docs;
|
3534
4227
|
ir->max_doc = &sr_max_doc;
|
@@ -3549,51 +4242,56 @@ static IndexReader *sr_setup_i(SegmentReader *sr, SegmentInfo *si)
|
|
3549
4242
|
ir->set_norm_i = &sr_set_norm_i;
|
3550
4243
|
ir->delete_doc_i = &sr_delete_doc_i;
|
3551
4244
|
ir->undelete_all_i = &sr_undelete_all_i;
|
4245
|
+
ir->set_deleter_i = &sr_set_deleter_i;
|
3552
4246
|
ir->commit_i = &sr_commit_i;
|
3553
4247
|
ir->close_i = &sr_close_i;
|
3554
4248
|
|
3555
|
-
sr->segment = si->name;
|
3556
4249
|
sr->cfs_store = NULL;
|
3557
4250
|
|
3558
|
-
|
3559
|
-
|
3560
|
-
|
3561
|
-
|
3562
|
-
|
4251
|
+
TRY
|
4252
|
+
if (sr->si->use_compound_file) {
|
4253
|
+
sprintf(file_name, "%s.cfs", sr_segment);
|
4254
|
+
sr->cfs_store = open_cmpd_store(store, file_name);
|
4255
|
+
store = sr->cfs_store;
|
4256
|
+
}
|
3563
4257
|
|
3564
|
-
|
3565
|
-
|
3566
|
-
|
4258
|
+
sr->fr = fr_open(store, sr_segment, ir->fis);
|
4259
|
+
sr->sfi = sfi_open(store, sr_segment);
|
4260
|
+
sr->tir = tir_open(store, sr->sfi, sr_segment);
|
3567
4261
|
|
3568
|
-
|
3569
|
-
|
3570
|
-
|
3571
|
-
|
3572
|
-
|
3573
|
-
|
3574
|
-
|
4262
|
+
sr->deleted_docs = NULL;
|
4263
|
+
sr->deleted_docs_dirty = false;
|
4264
|
+
sr->undelete_all = false;
|
4265
|
+
if (si_has_deletions(sr->si)) {
|
4266
|
+
fn_for_generation(file_name, sr_segment, "del", sr->si->del_gen);
|
4267
|
+
sr->deleted_docs = bv_read(sr->si->store, file_name);
|
4268
|
+
}
|
3575
4269
|
|
3576
|
-
|
3577
|
-
|
3578
|
-
|
3579
|
-
|
3580
|
-
|
3581
|
-
|
4270
|
+
sprintf(file_name, "%s.frq", sr_segment);
|
4271
|
+
sr->frq_in = store->open_input(store, file_name);
|
4272
|
+
sprintf(file_name, "%s.prx", sr_segment);
|
4273
|
+
sr->prx_in = store->open_input(store, file_name);
|
4274
|
+
sr->norms = h_new_int((free_ft)&norm_destroy);
|
4275
|
+
sr_open_norms(ir, store);
|
4276
|
+
if (fis_has_vectors(ir->fis)) {
|
4277
|
+
thread_key_create(&sr->thread_fr, NULL);
|
4278
|
+
sr->fr_bucket = ary_new();
|
4279
|
+
}
|
4280
|
+
XCATCHALL
|
4281
|
+
ir_close(ir);
|
4282
|
+
XENDTRY
|
3582
4283
|
|
3583
|
-
if (fis_has_vectors(ir->fis)) {
|
3584
|
-
thread_key_create(&sr->thread_fr, NULL);
|
3585
|
-
sr->fr_bucket = ary_new();
|
3586
|
-
}
|
3587
4284
|
return ir;
|
3588
4285
|
}
|
3589
4286
|
|
3590
4287
|
static IndexReader *sr_open(SegmentInfos *sis, FieldInfos *fis, int si_num,
|
3591
4288
|
bool is_owner)
|
3592
4289
|
{
|
4290
|
+
IndexReader *ir;
|
3593
4291
|
SegmentReader *sr = ALLOC_AND_ZERO(SegmentReader);
|
3594
|
-
|
3595
|
-
|
3596
|
-
return sr_setup_i(
|
4292
|
+
sr->si = sis->segs[si_num];
|
4293
|
+
ir = ir_setup(IR(sr), sr->si->store, sis, fis, is_owner);
|
4294
|
+
return sr_setup_i(sr);
|
3597
4295
|
}
|
3598
4296
|
|
3599
4297
|
/****************************************************************************
|
@@ -3683,7 +4381,7 @@ static uchar *mr_get_norms(IndexReader *ir, int field_num)
|
|
3683
4381
|
|
3684
4382
|
mutex_lock(&ir->mutex);
|
3685
4383
|
bytes = h_get_int(MR(ir)->norms_cache, field_num);
|
3686
|
-
if (
|
4384
|
+
if (NULL == bytes) {
|
3687
4385
|
int i;
|
3688
4386
|
const int mr_reader_cnt = MR(ir)->r_cnt;
|
3689
4387
|
|
@@ -3709,7 +4407,7 @@ static uchar *mr_get_norms_into(IndexReader *ir, int field_num, uchar *buf)
|
|
3709
4407
|
|
3710
4408
|
mutex_lock(&ir->mutex);
|
3711
4409
|
bytes = h_get_int(MR(ir)->norms_cache, field_num);
|
3712
|
-
if (
|
4410
|
+
if (NULL != bytes) {
|
3713
4411
|
memcpy(buf, bytes, MR(ir)->max_doc);
|
3714
4412
|
}
|
3715
4413
|
else {
|
@@ -3791,6 +4489,7 @@ static void mr_set_norm_i(IndexReader *ir, int doc_num, int field_num, uchar val
|
|
3791
4489
|
int fnum = mr_get_field_num(MR(ir), i, field_num);
|
3792
4490
|
if (fnum >= 0) {
|
3793
4491
|
IndexReader *reader = MR(ir)->sub_readers[i];
|
4492
|
+
ir->has_changes = true;
|
3794
4493
|
h_del_int(MR(ir)->norms_cache, fnum);/* clear cache */
|
3795
4494
|
ir_set_norm_i(reader, doc_num - MR(ir)->starts[i], fnum, val);
|
3796
4495
|
}
|
@@ -3804,6 +4503,7 @@ static void mr_delete_doc_i(IndexReader *ir, int doc_num)
|
|
3804
4503
|
/* dispatch to segment reader */
|
3805
4504
|
reader->delete_doc_i(reader, doc_num - MR(ir)->starts[i]);
|
3806
4505
|
MR(ir)->has_deletions = true;
|
4506
|
+
ir->has_changes = true;
|
3807
4507
|
}
|
3808
4508
|
|
3809
4509
|
static void mr_undelete_all_i(IndexReader *ir)
|
@@ -3817,6 +4517,17 @@ static void mr_undelete_all_i(IndexReader *ir)
|
|
3817
4517
|
reader->undelete_all_i(reader);
|
3818
4518
|
}
|
3819
4519
|
MR(ir)->has_deletions = false;
|
4520
|
+
ir->has_changes = true;
|
4521
|
+
}
|
4522
|
+
|
4523
|
+
static void mr_set_deleter_i(IndexReader *ir, Deleter *deleter)
|
4524
|
+
{
|
4525
|
+
int i;
|
4526
|
+
ir->deleter = deleter;
|
4527
|
+
for (i = MR(ir)->r_cnt - 1; i >= 0; i--) {
|
4528
|
+
IndexReader *reader = MR(ir)->sub_readers[i];
|
4529
|
+
reader->set_deleter_i(reader, deleter);
|
4530
|
+
}
|
3820
4531
|
}
|
3821
4532
|
|
3822
4533
|
static void mr_commit_i(IndexReader *ir)
|
@@ -3825,7 +4536,7 @@ static void mr_commit_i(IndexReader *ir)
|
|
3825
4536
|
const int mr_reader_cnt = MR(ir)->r_cnt;
|
3826
4537
|
for (i = 0; i < mr_reader_cnt; i++) {
|
3827
4538
|
IndexReader *reader = MR(ir)->sub_readers[i];
|
3828
|
-
|
4539
|
+
ir_commit_i(reader);
|
3829
4540
|
}
|
3830
4541
|
}
|
3831
4542
|
|
@@ -3887,6 +4598,7 @@ static IndexReader *mr_new(IndexReader **sub_readers, const int r_cnt)
|
|
3887
4598
|
ir->set_norm_i = &mr_set_norm_i;
|
3888
4599
|
ir->delete_doc_i = &mr_delete_doc_i;
|
3889
4600
|
ir->undelete_all_i = &mr_undelete_all_i;
|
4601
|
+
ir->set_deleter_i = &mr_set_deleter_i;
|
3890
4602
|
ir->commit_i = &mr_commit_i;
|
3891
4603
|
ir->close_i = &mr_close_i;
|
3892
4604
|
|
@@ -3980,33 +4692,65 @@ IndexReader *mr_open(IndexReader **sub_readers, const int r_cnt)
|
|
3980
4692
|
* IndexReader
|
3981
4693
|
****************************************************************************/
|
3982
4694
|
|
4695
|
+
|
4696
|
+
static void ir_open_i(Store *store, FindSegmentsFile *fsf)
|
4697
|
+
{
|
4698
|
+
volatile bool success = false;
|
4699
|
+
IndexReader *ir = NULL;
|
4700
|
+
SegmentInfos *sis = NULL;
|
4701
|
+
TRY
|
4702
|
+
do {
|
4703
|
+
FieldInfos *fis;
|
4704
|
+
|
4705
|
+
mutex_lock(&store->mutex);
|
4706
|
+
sis_read_i(store, fsf);
|
4707
|
+
sis = fsf->p_return;
|
4708
|
+
fis = sis->fis;
|
4709
|
+
|
4710
|
+
if (sis->size == 1) {
|
4711
|
+
ir = sr_open(sis, fis, 0, true);
|
4712
|
+
}
|
4713
|
+
else {
|
4714
|
+
int i;
|
4715
|
+
IndexReader **readers = ALLOC_N(IndexReader *, sis->size);
|
4716
|
+
int num_segments = sis->size;
|
4717
|
+
for (i = num_segments - 1; i >= 0; i--) {
|
4718
|
+
TRY
|
4719
|
+
readers[i] = sr_open(sis, fis, i, false);
|
4720
|
+
XCATCHALL
|
4721
|
+
for (i++; i < num_segments; i++) {
|
4722
|
+
ir_close(readers[i]);
|
4723
|
+
}
|
4724
|
+
free(readers);
|
4725
|
+
XENDTRY
|
4726
|
+
}
|
4727
|
+
ir = mr_open_i(store, sis, fis, readers, sis->size);
|
4728
|
+
}
|
4729
|
+
fsf->p_return = ir;
|
4730
|
+
success = true;
|
4731
|
+
} while (0);
|
4732
|
+
XFINALLY
|
4733
|
+
if (!success) {
|
4734
|
+
if (ir) {
|
4735
|
+
ir_close(ir);
|
4736
|
+
}
|
4737
|
+
else if (sis) {
|
4738
|
+
sis_destroy(sis);
|
4739
|
+
}
|
4740
|
+
}
|
4741
|
+
mutex_unlock(&store->mutex);
|
4742
|
+
XENDTRY
|
4743
|
+
}
|
4744
|
+
|
3983
4745
|
/**
|
3984
4746
|
* Will keep a reference to the store. To let this method delete the store
|
3985
4747
|
* make sure you deref the store that you pass to it
|
3986
4748
|
*/
|
3987
4749
|
IndexReader *ir_open(Store *store)
|
3988
4750
|
{
|
3989
|
-
|
3990
|
-
|
3991
|
-
|
3992
|
-
FieldInfos *fis;
|
3993
|
-
|
3994
|
-
mutex_lock(&store->mutex);
|
3995
|
-
sis = sis_read(store);
|
3996
|
-
fis = fis_read(store);
|
3997
|
-
if (sis->size == 1) {
|
3998
|
-
ir = sr_open(sis, fis, 0, true);
|
3999
|
-
}
|
4000
|
-
else {
|
4001
|
-
IndexReader **readers = ALLOC_N(IndexReader *, sis->size);
|
4002
|
-
for (i = sis->size; i > 0;) {
|
4003
|
-
i--;
|
4004
|
-
readers[i] = sr_open(sis, fis, i, false);
|
4005
|
-
}
|
4006
|
-
ir = mr_open_i(store, sis, fis, readers, sis->size);
|
4007
|
-
}
|
4008
|
-
mutex_unlock(&store->mutex);
|
4009
|
-
return ir;
|
4751
|
+
FindSegmentsFile fsf;
|
4752
|
+
sis_find_segments_file(store, &fsf, &ir_open_i);
|
4753
|
+
return (IndexReader *)fsf.p_return;
|
4010
4754
|
}
|
4011
4755
|
|
4012
4756
|
/****************************************************************************
|
@@ -4126,8 +4870,8 @@ typedef struct SkipBuffer
|
|
4126
4870
|
OutStream *frq_out;
|
4127
4871
|
OutStream *prx_out;
|
4128
4872
|
int last_doc;
|
4129
|
-
|
4130
|
-
|
4873
|
+
off_t last_frq_ptr;
|
4874
|
+
off_t last_prx_ptr;
|
4131
4875
|
} SkipBuffer;
|
4132
4876
|
|
4133
4877
|
static void skip_buf_reset(SkipBuffer *skip_buf)
|
@@ -4149,8 +4893,8 @@ static SkipBuffer *skip_buf_new(OutStream *frq_out, OutStream *prx_out)
|
|
4149
4893
|
|
4150
4894
|
static void skip_buf_add(SkipBuffer *skip_buf, int doc)
|
4151
4895
|
{
|
4152
|
-
|
4153
|
-
|
4896
|
+
off_t frq_ptr = os_pos(skip_buf->frq_out);
|
4897
|
+
off_t prx_ptr = os_pos(skip_buf->prx_out);
|
4154
4898
|
|
4155
4899
|
os_write_vint(skip_buf->buf, doc - skip_buf->last_doc);
|
4156
4900
|
os_write_vint(skip_buf->buf, frq_ptr - skip_buf->last_frq_ptr);
|
@@ -4161,9 +4905,9 @@ static void skip_buf_add(SkipBuffer *skip_buf, int doc)
|
|
4161
4905
|
skip_buf->last_prx_ptr = prx_ptr;
|
4162
4906
|
}
|
4163
4907
|
|
4164
|
-
static
|
4908
|
+
static off_t skip_buf_write(SkipBuffer *skip_buf)
|
4165
4909
|
{
|
4166
|
-
|
4910
|
+
off_t skip_ptr = os_pos(skip_buf->frq_out);
|
4167
4911
|
ramo_write_to(skip_buf->buf, skip_buf->frq_out);
|
4168
4912
|
return skip_ptr;
|
4169
4913
|
}
|
@@ -4184,7 +4928,8 @@ static void dw_write_norms(DocWriter *dw, FieldInverter *fld_inv)
|
|
4184
4928
|
{
|
4185
4929
|
char file_name[SEGMENT_NAME_MAX_LENGTH];
|
4186
4930
|
OutStream *norms_out;
|
4187
|
-
|
4931
|
+
si_advance_norm_gen(dw->si, fld_inv->fi->number);
|
4932
|
+
si_norm_file_name(dw->si, file_name, fld_inv->fi->number);
|
4188
4933
|
norms_out = dw->store->new_output(dw->store, file_name);
|
4189
4934
|
os_write_bytes(norms_out, fld_inv->norms, dw->doc_num);
|
4190
4935
|
os_close(norms_out);
|
@@ -4232,23 +4977,23 @@ static void dw_flush(DocWriter *dw)
|
|
4232
4977
|
Posting *p;
|
4233
4978
|
Occurence *occ;
|
4234
4979
|
Store *store = dw->store;
|
4235
|
-
TermInfosWriter *tiw = tiw_open(store, dw->
|
4980
|
+
TermInfosWriter *tiw = tiw_open(store, dw->si->name,
|
4236
4981
|
dw->index_interval, skip_interval);
|
4237
4982
|
TermInfo ti;
|
4238
4983
|
char file_name[SEGMENT_NAME_MAX_LENGTH];
|
4239
4984
|
OutStream *frq_out, *prx_out;
|
4240
4985
|
SkipBuffer *skip_buf;
|
4241
4986
|
|
4242
|
-
sprintf(file_name, "%s.frq", dw->
|
4987
|
+
sprintf(file_name, "%s.frq", dw->si->name);
|
4243
4988
|
frq_out = store->new_output(store, file_name);
|
4244
|
-
sprintf(file_name, "%s.prx", dw->
|
4989
|
+
sprintf(file_name, "%s.prx", dw->si->name);
|
4245
4990
|
prx_out = store->new_output(store, file_name);
|
4246
4991
|
skip_buf = skip_buf_new(frq_out, prx_out);
|
4247
4992
|
|
4248
4993
|
for (i = 0; i < fields_count; i++) {
|
4249
4994
|
fi = fis->fields[i];
|
4250
4995
|
if (!fi_is_indexed(fi)
|
4251
|
-
|| (fld_inv = h_get_int(dw->fields, fi->number))
|
4996
|
+
|| NULL == (fld_inv = h_get_int(dw->fields, fi->number))) {
|
4252
4997
|
continue;
|
4253
4998
|
}
|
4254
4999
|
if (!fi_omit_norms(fi)) {
|
@@ -4265,9 +5010,9 @@ static void dw_flush(DocWriter *dw)
|
|
4265
5010
|
last_doc = 0;
|
4266
5011
|
doc_freq = 0;
|
4267
5012
|
skip_buf_reset(skip_buf);
|
4268
|
-
for (p = pl->first;
|
5013
|
+
for (p = pl->first; NULL != p; p = p->next) {
|
4269
5014
|
doc_freq++;
|
4270
|
-
if ((doc_freq % dw->skip_interval)
|
5015
|
+
if (0 == (doc_freq % dw->skip_interval)) {
|
4271
5016
|
skip_buf_add(skip_buf, last_doc);
|
4272
5017
|
}
|
4273
5018
|
|
@@ -4283,7 +5028,7 @@ static void dw_flush(DocWriter *dw)
|
|
4283
5028
|
}
|
4284
5029
|
|
4285
5030
|
last_pos = 0;
|
4286
|
-
for (occ = p->first_occ;
|
5031
|
+
for (occ = p->first_occ; NULL != occ; occ = occ->next) {
|
4287
5032
|
os_write_vint(prx_out, occ->pos - last_pos);
|
4288
5033
|
last_pos = occ->pos;
|
4289
5034
|
}
|
@@ -4300,7 +5045,7 @@ static void dw_flush(DocWriter *dw)
|
|
4300
5045
|
dw_flush_streams(dw);
|
4301
5046
|
}
|
4302
5047
|
|
4303
|
-
DocWriter *dw_open(IndexWriter *iw,
|
5048
|
+
DocWriter *dw_open(IndexWriter *iw, SegmentInfo *si)
|
4304
5049
|
{
|
4305
5050
|
Store *store = iw->store;
|
4306
5051
|
MemoryPool *mp = mp_new_capa(iw->config.chunk_size,
|
@@ -4308,34 +5053,34 @@ DocWriter *dw_open(IndexWriter *iw, const char *segment)
|
|
4308
5053
|
|
4309
5054
|
DocWriter *dw = ALLOC(DocWriter);
|
4310
5055
|
|
4311
|
-
dw->mp
|
4312
|
-
dw->analyzer
|
4313
|
-
dw->fis
|
4314
|
-
dw->store
|
4315
|
-
dw->fw
|
4316
|
-
dw->
|
5056
|
+
dw->mp = mp;
|
5057
|
+
dw->analyzer = iw->analyzer;
|
5058
|
+
dw->fis = iw->fis;
|
5059
|
+
dw->store = store;
|
5060
|
+
dw->fw = fw_open(store, si->name, iw->fis);
|
5061
|
+
dw->si = si;
|
4317
5062
|
|
4318
5063
|
dw->curr_plists = h_new_str(NULL, NULL);
|
4319
|
-
dw->fields
|
4320
|
-
dw->doc_num
|
5064
|
+
dw->fields = h_new_int((free_ft)fld_inv_destroy);
|
5065
|
+
dw->doc_num = 0;
|
4321
5066
|
|
4322
|
-
dw->index_interval
|
4323
|
-
dw->skip_interval
|
4324
|
-
dw->max_field_length
|
4325
|
-
dw->max_buffered_docs
|
5067
|
+
dw->index_interval = iw->config.index_interval;
|
5068
|
+
dw->skip_interval = iw->config.skip_interval;
|
5069
|
+
dw->max_field_length = iw->config.max_field_length;
|
5070
|
+
dw->max_buffered_docs = iw->config.max_buffered_docs;
|
4326
5071
|
|
4327
|
-
dw->offsets
|
4328
|
-
dw->offsets_size
|
4329
|
-
dw->offsets_capa
|
5072
|
+
dw->offsets = ALLOC_AND_ZERO_N(Offset, DW_OFFSET_INIT_CAPA);
|
5073
|
+
dw->offsets_size = 0;
|
5074
|
+
dw->offsets_capa = DW_OFFSET_INIT_CAPA;
|
4330
5075
|
|
4331
|
-
dw->similarity
|
5076
|
+
dw->similarity = iw->similarity;
|
4332
5077
|
return dw;
|
4333
5078
|
}
|
4334
5079
|
|
4335
|
-
void dw_new_segment(DocWriter *dw,
|
5080
|
+
void dw_new_segment(DocWriter *dw, SegmentInfo *si)
|
4336
5081
|
{
|
4337
|
-
dw->fw = fw_open(dw->store,
|
4338
|
-
dw->
|
5082
|
+
dw->fw = fw_open(dw->store, si->name, dw->fis);
|
5083
|
+
dw->si = si;
|
4339
5084
|
}
|
4340
5085
|
|
4341
5086
|
void dw_close(DocWriter *dw)
|
@@ -4536,7 +5281,7 @@ typedef struct SegmentMergeInfo {
|
|
4536
5281
|
int base;
|
4537
5282
|
int max_doc;
|
4538
5283
|
int doc_cnt;
|
4539
|
-
|
5284
|
+
SegmentInfo *si;
|
4540
5285
|
Store *store;
|
4541
5286
|
Store *orig_store;
|
4542
5287
|
BitVector *deleted_docs;
|
@@ -4552,7 +5297,7 @@ typedef struct SegmentMergeInfo {
|
|
4552
5297
|
static bool smi_lt(const SegmentMergeInfo *smi1, const SegmentMergeInfo *smi2)
|
4553
5298
|
{
|
4554
5299
|
int cmpres = strcmp(smi1->term, smi2->term);
|
4555
|
-
if (
|
5300
|
+
if (0 == cmpres) {
|
4556
5301
|
return smi1->base < smi2->base;
|
4557
5302
|
}
|
4558
5303
|
else {
|
@@ -4578,12 +5323,13 @@ static void smi_load_doc_map(SegmentMergeInfo *smi)
|
|
4578
5323
|
smi->doc_cnt = j;
|
4579
5324
|
}
|
4580
5325
|
|
4581
|
-
static SegmentMergeInfo *smi_new(int base, Store *store,
|
5326
|
+
static SegmentMergeInfo *smi_new(int base, Store *store, SegmentInfo *si)
|
4582
5327
|
{
|
4583
5328
|
SegmentMergeInfo *smi = ALLOC_AND_ZERO(SegmentMergeInfo);
|
4584
5329
|
char file_name[SEGMENT_NAME_MAX_LENGTH];
|
5330
|
+
char *segment = si->name;
|
4585
5331
|
smi->base = base;
|
4586
|
-
smi->
|
5332
|
+
smi->si = si;
|
4587
5333
|
smi->orig_store = smi->store = store;
|
4588
5334
|
sprintf(file_name, "%s.cfs", segment);
|
4589
5335
|
if (store->exists(store, file_name)) {
|
@@ -4595,8 +5341,8 @@ static SegmentMergeInfo *smi_new(int base, Store *store, char *segment)
|
|
4595
5341
|
smi->doc_cnt = smi->max_doc
|
4596
5342
|
= smi->store->length(smi->store, file_name) / FIELDS_IDX_PTR_SIZE;
|
4597
5343
|
|
4598
|
-
|
4599
|
-
|
5344
|
+
if (si->del_gen >= 0) {
|
5345
|
+
fn_for_generation(file_name, segment, "del", si->del_gen);
|
4600
5346
|
smi->deleted_docs = bv_read(store, file_name);
|
4601
5347
|
smi_load_doc_map(smi);
|
4602
5348
|
}
|
@@ -4606,13 +5352,14 @@ static SegmentMergeInfo *smi_new(int base, Store *store, char *segment)
|
|
4606
5352
|
static void smi_load_term_input(SegmentMergeInfo *smi)
|
4607
5353
|
{
|
4608
5354
|
Store *store = smi->store;
|
5355
|
+
char *segment = smi->si->name;
|
4609
5356
|
char file_name[SEGMENT_NAME_MAX_LENGTH];
|
4610
|
-
smi->sfi = sfi_open(store,
|
4611
|
-
sprintf(file_name, "%s.tis",
|
5357
|
+
smi->sfi = sfi_open(store, segment);
|
5358
|
+
sprintf(file_name, "%s.tis", segment);
|
4612
5359
|
smi->te = TE(ste_new(store->open_input(store, file_name), smi->sfi));
|
4613
|
-
sprintf(file_name, "%s.frq",
|
5360
|
+
sprintf(file_name, "%s.frq", segment);
|
4614
5361
|
smi->frq_in = store->open_input(store, file_name);
|
4615
|
-
sprintf(file_name, "%s.prx",
|
5362
|
+
sprintf(file_name, "%s.prx", segment);
|
4616
5363
|
smi->prx_in = store->open_input(store, file_name);
|
4617
5364
|
smi->tde = stpe_new(NULL, smi->frq_in, smi->prx_in, smi->deleted_docs,
|
4618
5365
|
STE(smi->te)->skip_interval);
|
@@ -4652,7 +5399,7 @@ typedef struct SegmentMerger {
|
|
4652
5399
|
TermInfo ti;
|
4653
5400
|
Store *store;
|
4654
5401
|
FieldInfos *fis;
|
4655
|
-
|
5402
|
+
SegmentInfo *si;
|
4656
5403
|
SegmentMergeInfo **smis;
|
4657
5404
|
int seg_cnt;
|
4658
5405
|
int doc_cnt;
|
@@ -4667,19 +5414,19 @@ typedef struct SegmentMerger {
|
|
4667
5414
|
OutStream *prx_out;
|
4668
5415
|
} SegmentMerger;
|
4669
5416
|
|
4670
|
-
static SegmentMerger *sm_create(IndexWriter *iw,
|
5417
|
+
static SegmentMerger *sm_create(IndexWriter *iw, SegmentInfo *si,
|
4671
5418
|
SegmentInfo **seg_infos, const int seg_cnt)
|
4672
5419
|
{
|
4673
5420
|
int i;
|
4674
5421
|
SegmentMerger *sm = ALLOC_AND_ZERO_N(SegmentMerger, seg_cnt);
|
4675
5422
|
sm->store = iw->store;
|
4676
5423
|
sm->fis = iw->fis;
|
4677
|
-
sm->
|
5424
|
+
sm->si = si;
|
4678
5425
|
sm->doc_cnt = 0;
|
4679
5426
|
sm->smis = ALLOC_N(SegmentMergeInfo *, seg_cnt);
|
4680
5427
|
for (i = 0; i < seg_cnt; i++) {
|
4681
5428
|
sm->smis[i] = smi_new(sm->doc_cnt, seg_infos[i]->store,
|
4682
|
-
seg_infos[i]
|
5429
|
+
seg_infos[i]);
|
4683
5430
|
sm->doc_cnt += sm->smis[i]->doc_cnt;
|
4684
5431
|
}
|
4685
5432
|
sm->seg_cnt = seg_cnt;
|
@@ -4695,7 +5442,6 @@ static void sm_destroy(SegmentMerger *sm)
|
|
4695
5442
|
smi_destroy(sm->smis[i]);
|
4696
5443
|
}
|
4697
5444
|
free(sm->smis);
|
4698
|
-
free(sm->segment);
|
4699
5445
|
free(sm);
|
4700
5446
|
}
|
4701
5447
|
|
@@ -4708,20 +5454,21 @@ static void sm_merge_fields(SegmentMerger *sm)
|
|
4708
5454
|
Store *store = sm->store;
|
4709
5455
|
const int seg_cnt = sm->seg_cnt;
|
4710
5456
|
|
4711
|
-
sprintf(file_name, "%s.fdt", sm->
|
5457
|
+
sprintf(file_name, "%s.fdt", sm->si->name);
|
4712
5458
|
fdt_out = store->new_output(store, file_name);
|
4713
5459
|
|
4714
|
-
sprintf(file_name, "%s.fdx", sm->
|
5460
|
+
sprintf(file_name, "%s.fdx", sm->si->name);
|
4715
5461
|
fdx_out = store->new_output(store, file_name);
|
4716
5462
|
|
4717
5463
|
for (i = 0; i < seg_cnt; i++) {
|
4718
5464
|
SegmentMergeInfo *smi = sm->smis[i];
|
4719
5465
|
const int max_doc = smi->max_doc;
|
4720
5466
|
InStream *fdt_in, *fdx_in;
|
5467
|
+
char *segment = smi->si->name;
|
4721
5468
|
store = smi->store;
|
4722
|
-
sprintf(file_name, "%s.fdt",
|
5469
|
+
sprintf(file_name, "%s.fdt", segment);
|
4723
5470
|
fdt_in = store->open_input(store, file_name);
|
4724
|
-
sprintf(file_name, "%s.fdx",
|
5471
|
+
sprintf(file_name, "%s.fdx", segment);
|
4725
5472
|
fdx_in = store->open_input(store, file_name);
|
4726
5473
|
|
4727
5474
|
if (max_doc > 0) {
|
@@ -4775,7 +5522,7 @@ static int sm_append_postings(SegmentMerger *sm, SegmentMergeInfo **matches,
|
|
4775
5522
|
* stde_next rather than stpe_next here */
|
4776
5523
|
while (stde_next(tde)) {
|
4777
5524
|
doc = stde_doc_num(tde);
|
4778
|
-
if (
|
5525
|
+
if (NULL != doc_map) {
|
4779
5526
|
doc = doc_map[doc]; /* work around deletions */
|
4780
5527
|
}
|
4781
5528
|
doc += base; /* convert to merged space */
|
@@ -4787,7 +5534,7 @@ static int sm_append_postings(SegmentMerger *sm, SegmentMergeInfo **matches,
|
|
4787
5534
|
#endif
|
4788
5535
|
df++;
|
4789
5536
|
|
4790
|
-
if ((df % skip_interval)
|
5537
|
+
if (0 == (df % skip_interval)) {
|
4791
5538
|
skip_buf_add(skip_buf, last_doc);
|
4792
5539
|
}
|
4793
5540
|
|
@@ -4823,12 +5570,12 @@ static char *sm_cache_term(SegmentMerger *sm, char *term, int term_len)
|
|
4823
5570
|
static void sm_merge_term_info(SegmentMerger *sm, SegmentMergeInfo **matches,
|
4824
5571
|
int match_size)
|
4825
5572
|
{
|
4826
|
-
|
4827
|
-
|
5573
|
+
off_t frq_ptr = os_pos(sm->frq_out);
|
5574
|
+
off_t prx_ptr = os_pos(sm->prx_out);
|
4828
5575
|
|
4829
5576
|
int df = sm_append_postings(sm, matches, match_size); /* append posting data */
|
4830
5577
|
|
4831
|
-
|
5578
|
+
off_t skip_ptr = skip_buf_write(sm->skip_buf);
|
4832
5579
|
|
4833
5580
|
if (df > 0) {
|
4834
5581
|
/* add an entry to the dictionary with ptrs to prox and freq files */
|
@@ -4861,7 +5608,7 @@ static void sm_merge_term_infos(SegmentMerger *sm)
|
|
4861
5608
|
for (j = 0; j < seg_cnt; j++) {
|
4862
5609
|
smi = sm->smis[j];
|
4863
5610
|
ste_set_field(smi->te, i);
|
4864
|
-
if (smi_next(smi)
|
5611
|
+
if (NULL != smi_next(smi)) {
|
4865
5612
|
pq_push(sm->queue, smi); /* initialize @queue */
|
4866
5613
|
}
|
4867
5614
|
}
|
@@ -4877,7 +5624,7 @@ static void sm_merge_term_infos(SegmentMerger *sm)
|
|
4877
5624
|
match_size++;
|
4878
5625
|
term = matches[0]->term;
|
4879
5626
|
top = pq_top(sm->queue);
|
4880
|
-
while ((
|
5627
|
+
while ((NULL != top) && (0 == strcmp(term, top->term))) {
|
4881
5628
|
matches[match_size] = pq_pop(sm->queue);
|
4882
5629
|
match_size++;
|
4883
5630
|
top = pq_top(sm->queue);
|
@@ -4889,7 +5636,7 @@ static void sm_merge_term_infos(SegmentMerger *sm)
|
|
4889
5636
|
while (match_size > 0) {
|
4890
5637
|
match_size--;
|
4891
5638
|
smi = matches[match_size];
|
4892
|
-
if (smi_next(smi)
|
5639
|
+
if (NULL != smi_next(smi)) {
|
4893
5640
|
pq_push(sm->queue, smi); /* restore queue */
|
4894
5641
|
}
|
4895
5642
|
}
|
@@ -4905,12 +5652,12 @@ static void sm_merge_terms(SegmentMerger *sm)
|
|
4905
5652
|
{
|
4906
5653
|
char file_name[SEGMENT_NAME_MAX_LENGTH];
|
4907
5654
|
|
4908
|
-
sprintf(file_name, "%s.frq", sm->
|
5655
|
+
sprintf(file_name, "%s.frq", sm->si->name);
|
4909
5656
|
sm->frq_out = sm->store->new_output(sm->store, file_name);
|
4910
|
-
sprintf(file_name, "%s.prx", sm->
|
5657
|
+
sprintf(file_name, "%s.prx", sm->si->name);
|
4911
5658
|
sm->prx_out = sm->store->new_output(sm->store, file_name);
|
4912
5659
|
|
4913
|
-
sm->tiw = tiw_open(sm->store, sm->
|
5660
|
+
sm->tiw = tiw_open(sm->store, sm->si->name, sm->config->index_interval,
|
4914
5661
|
sm->config->skip_interval);
|
4915
5662
|
sm->skip_buf = skip_buf_new(sm->frq_out, sm->prx_out);
|
4916
5663
|
|
@@ -4936,6 +5683,7 @@ static void sm_merge_terms(SegmentMerger *sm)
|
|
4936
5683
|
|
4937
5684
|
static void sm_merge_norms(SegmentMerger *sm)
|
4938
5685
|
{
|
5686
|
+
SegmentInfo *si;
|
4939
5687
|
int i, j, k;
|
4940
5688
|
Store *store;
|
4941
5689
|
uchar byte;
|
@@ -4945,23 +5693,21 @@ static void sm_merge_norms(SegmentMerger *sm)
|
|
4945
5693
|
char file_name[SEGMENT_NAME_MAX_LENGTH];
|
4946
5694
|
SegmentMergeInfo *smi;
|
4947
5695
|
const int seg_cnt = sm->seg_cnt;
|
4948
|
-
|
4949
|
-
for (i = 0; i < fis_size; i++) {
|
5696
|
+
for (i = sm->fis->size - 1; i >= 0; i--) {
|
4950
5697
|
fi = sm->fis->fields[i];
|
4951
5698
|
if (fi_has_norms(fi)) {
|
4952
|
-
|
5699
|
+
si = sm->si;
|
5700
|
+
si_advance_norm_gen(si, i);
|
5701
|
+
si_norm_file_name(si, file_name, i);
|
4953
5702
|
os = sm->store->new_output(sm->store, file_name);
|
4954
5703
|
for (j = 0; j < seg_cnt; j++) {
|
4955
5704
|
smi = sm->smis[j];
|
4956
|
-
|
4957
|
-
|
4958
|
-
if (!store->exists(store, file_name)) {
|
4959
|
-
sprintf(file_name, "%s.f%d", smi->segment, i);
|
4960
|
-
store = smi->store;
|
4961
|
-
}
|
4962
|
-
if (store->exists(store, file_name)) {
|
5705
|
+
si = smi->si;
|
5706
|
+
if (si_norm_file_name(si, file_name, i)) {
|
4963
5707
|
const int max_doc = smi->max_doc;
|
4964
5708
|
BitVector *deleted_docs = smi->deleted_docs;
|
5709
|
+
store = (si->use_compound_file && si->norm_gens[i])
|
5710
|
+
? smi->orig_store : smi->store;
|
4965
5711
|
is = store->open_input(store, file_name);
|
4966
5712
|
if (deleted_docs) {
|
4967
5713
|
for (k = 0; k < max_doc; k++) {
|
@@ -5004,11 +5750,18 @@ static int sm_merge(SegmentMerger *sm)
|
|
5004
5750
|
/* prepare an index ready for writing */
|
5005
5751
|
void index_create(Store *store, FieldInfos *fis)
|
5006
5752
|
{
|
5007
|
-
SegmentInfos *sis = sis_new();
|
5753
|
+
SegmentInfos *sis = sis_new(fis);
|
5008
5754
|
store->clear_all(store);
|
5009
|
-
sis_write(sis, store);
|
5755
|
+
sis_write(sis, store, NULL);
|
5010
5756
|
sis_destroy(sis);
|
5011
|
-
|
5757
|
+
}
|
5758
|
+
|
5759
|
+
bool index_is_locked(Store *store)
|
5760
|
+
{
|
5761
|
+
Lock *write_lock = open_lock(store, WRITE_LOCK_NAME);
|
5762
|
+
bool is_locked = write_lock->is_locked(write_lock);
|
5763
|
+
close_lock(write_lock);
|
5764
|
+
return is_locked;
|
5012
5765
|
}
|
5013
5766
|
|
5014
5767
|
int iw_doc_count(IndexWriter *iw)
|
@@ -5025,158 +5778,79 @@ int iw_doc_count(IndexWriter *iw)
|
|
5025
5778
|
return doc_cnt;
|
5026
5779
|
}
|
5027
5780
|
|
5028
|
-
|
5781
|
+
#define MOVE_TO_COMPOUND_DIR(file_name)\
|
5782
|
+
deleter_queue_file(dlr, file_name);\
|
5783
|
+
cw_add_file(cw, file_name)
|
5784
|
+
|
5785
|
+
static void iw_create_compound_file(Store *store, FieldInfos *fis,
|
5786
|
+
SegmentInfo *si, char *cfs_file_name,
|
5787
|
+
Deleter *dlr)
|
5029
5788
|
{
|
5030
5789
|
int i;
|
5031
|
-
for (i = ary_size(file_names) - 1; i >= 0; i--) {
|
5032
|
-
store->remove(store, file_names[i]);
|
5033
|
-
}
|
5034
|
-
ary_destroy((void **)file_names, &free);
|
5035
|
-
}
|
5036
|
-
|
5037
|
-
static char **iw_create_compound_file(Store *store, FieldInfos *fis,
|
5038
|
-
char *segment, char *cfs_file_name)
|
5039
|
-
{
|
5040
|
-
char **file_names = (char **)ary_new_capa(16);
|
5041
5790
|
CompoundWriter *cw;
|
5042
|
-
FieldInfo *fi;
|
5043
|
-
int i;
|
5044
5791
|
char file_name[SEGMENT_NAME_MAX_LENGTH];
|
5045
|
-
|
5046
|
-
int
|
5792
|
+
char *ext;
|
5793
|
+
int seg_len = strlen(si->name);
|
5794
|
+
|
5795
|
+
memcpy(file_name, si->name, seg_len);
|
5796
|
+
file_name[seg_len] = '.';
|
5797
|
+
ext = file_name + seg_len + 1;
|
5047
5798
|
|
5048
5799
|
cw = open_cw(store, cfs_file_name);
|
5049
5800
|
for (i = 0; i < NELEMS(COMPOUND_EXTENSIONS); i++) {
|
5050
|
-
|
5051
|
-
|
5052
|
-
ary_push(file_names, estrdup(file_name));
|
5801
|
+
memcpy(ext, COMPOUND_EXTENSIONS[i], 4);
|
5802
|
+
MOVE_TO_COMPOUND_DIR(file_name);
|
5053
5803
|
}
|
5054
5804
|
|
5055
5805
|
/* Field norm file_names */
|
5056
|
-
for (i =
|
5057
|
-
|
5058
|
-
|
5059
|
-
|
5060
|
-
if (!store->exists(store, file_name)) {
|
5061
|
-
continue;
|
5062
|
-
}
|
5063
|
-
ary_push(file_names, estrdup(file_name));
|
5806
|
+
for (i = fis->size - 1; i >= 0; i--) {
|
5807
|
+
if (fi_has_norms(fis->fields[i])
|
5808
|
+
&& si_norm_file_name(si, file_name, i)) {
|
5809
|
+
MOVE_TO_COMPOUND_DIR(file_name);
|
5064
5810
|
}
|
5065
5811
|
}
|
5066
5812
|
|
5067
|
-
/* Now merge all added file_names */
|
5068
|
-
file_names_size = ary_size(file_names);
|
5069
|
-
for (i = 0; i < file_names_size; i++) {
|
5070
|
-
cw_add_file(cw, file_names[i]);
|
5071
|
-
}
|
5072
|
-
|
5073
5813
|
/* Perform the merge */
|
5074
5814
|
cw_close(cw);
|
5075
|
-
|
5076
|
-
return file_names;
|
5077
5815
|
}
|
5078
5816
|
|
5079
|
-
static void iw_commit_compound_file(IndexWriter *iw,
|
5080
|
-
Lock *commit_lock)
|
5817
|
+
static void iw_commit_compound_file(IndexWriter *iw, SegmentInfo *si)
|
5081
5818
|
{
|
5082
|
-
char tmp_name[SEGMENT_NAME_MAX_LENGTH];
|
5083
5819
|
char cfs_name[SEGMENT_NAME_MAX_LENGTH];
|
5084
|
-
|
5085
|
-
sprintf(tmp_name, "%s.tmp", segment);
|
5086
|
-
sprintf(cfs_name, "%s.cfs", segment);
|
5087
|
-
|
5088
|
-
files_to_delete =
|
5089
|
-
iw_create_compound_file(iw->store, iw->fis, segment, tmp_name);
|
5090
|
-
if (!commit_lock->obtain(commit_lock)) {
|
5091
|
-
RAISE(LOCK_ERROR,
|
5092
|
-
"Couldn't obtain commit lock to write compound file");
|
5093
|
-
}
|
5820
|
+
sprintf(cfs_name, "%s.cfs", si->name);
|
5094
5821
|
|
5095
|
-
|
5096
|
-
|
5097
|
-
|
5098
|
-
commit_lock->release(commit_lock);
|
5099
|
-
}
|
5100
|
-
|
5101
|
-
#define ADD_IF_EXISTS_FMT(fmt, ext) do {\
|
5102
|
-
sprintf(file_name, fmt, segment, ext);\
|
5103
|
-
if (store->exists(store, file_name)) {\
|
5104
|
-
ary_push(file_names, estrdup(file_name));\
|
5105
|
-
}\
|
5106
|
-
} while (0)
|
5107
|
-
|
5108
|
-
#define ADD_IF_EXISTS(ext) ADD_IF_EXISTS_FMT("%s.%s", ext)
|
5109
|
-
|
5110
|
-
static char **iw_seg_file_names(FieldInfos *fis, Store *store, char *segment)
|
5111
|
-
{
|
5112
|
-
char **file_names = (char **)ary_new_capa(16);
|
5113
|
-
int i;
|
5114
|
-
char file_name[SEGMENT_NAME_MAX_LENGTH];
|
5115
|
-
const int fis_size = fis->size;
|
5116
|
-
|
5117
|
-
|
5118
|
-
sprintf(file_name, "%s.cfs", segment);
|
5119
|
-
if (store->exists(store, file_name)) {
|
5120
|
-
ary_push(file_names, estrdup(file_name));
|
5121
|
-
ADD_IF_EXISTS("del");
|
5122
|
-
for (i = 0; i < fis_size; i++) {
|
5123
|
-
if (fi_has_norms(fis->fields[i])) {
|
5124
|
-
ADD_IF_EXISTS_FMT("%s.s%d", i);
|
5125
|
-
}
|
5126
|
-
}
|
5127
|
-
}
|
5128
|
-
else {
|
5129
|
-
for (i = 0; i < NELEMS(INDEX_EXTENSIONS); i++) {
|
5130
|
-
ADD_IF_EXISTS(INDEX_EXTENSIONS[i]);
|
5131
|
-
}
|
5132
|
-
for (i = 0; i < fis_size; i++) {
|
5133
|
-
if (fi_has_norms(fis->fields[i])) {
|
5134
|
-
ADD_IF_EXISTS_FMT("%s.f%d", i);
|
5135
|
-
}
|
5136
|
-
}
|
5137
|
-
}
|
5138
|
-
return file_names;
|
5822
|
+
iw_create_compound_file(iw->store, iw->fis, si, cfs_name, iw->deleter);
|
5823
|
+
deleter_commit_pending_deletions(iw->deleter);
|
5139
5824
|
}
|
5140
5825
|
|
5141
5826
|
static void iw_merge_segments(IndexWriter *iw, const int min_seg,
|
5142
5827
|
const int max_seg)
|
5143
5828
|
{
|
5144
5829
|
int i;
|
5145
|
-
Lock *commit_lock;
|
5146
5830
|
SegmentInfos *sis = iw->sis;
|
5147
5831
|
SegmentInfo *si = sis_new_segment(sis, 0, iw->store);
|
5148
5832
|
|
5149
|
-
SegmentMerger *merger = sm_create(iw, si
|
5833
|
+
SegmentMerger *merger = sm_create(iw, si, &sis->segs[min_seg],
|
5150
5834
|
max_seg - min_seg);
|
5151
5835
|
|
5152
5836
|
/* This is where all the action happens. */
|
5153
5837
|
si->doc_cnt = sm_merge(merger);
|
5154
5838
|
|
5155
5839
|
mutex_lock(&iw->store->mutex);
|
5156
|
-
commit_lock = open_lock(iw->store, COMMIT_LOCK_NAME);
|
5157
|
-
|
5158
|
-
/* *** OBTAIN COMMIT LOCK *** */
|
5159
|
-
if (!commit_lock->obtain(commit_lock)) {
|
5160
|
-
RAISE(LOCK_ERROR, "Couldn't obtain commit lock to commit merged segment "
|
5161
|
-
"%s", si->name);
|
5162
|
-
}
|
5163
5840
|
/* delete merged segments */
|
5164
5841
|
for (i = min_seg; i < max_seg; i++) {
|
5165
|
-
|
5166
|
-
iw_seg_file_names(iw->fis, sis->segs[i]->store, sis->segs[i]->name),
|
5167
|
-
iw->store);
|
5842
|
+
si_delete_files(sis->segs[i], iw->fis, iw->deleter);
|
5168
5843
|
}
|
5844
|
+
deleter_commit_pending_deletions(iw->deleter);
|
5845
|
+
|
5169
5846
|
sis_del_from_to(sis, min_seg, max_seg);
|
5170
|
-
/* commit the segments file */
|
5171
|
-
sis_write(sis, iw->store);
|
5172
|
-
commit_lock->release(commit_lock);
|
5173
|
-
/* RELEASE COMMIT LOCK */
|
5174
5847
|
|
5175
5848
|
if (iw->config.use_compound_file) {
|
5176
|
-
iw_commit_compound_file(iw, si
|
5849
|
+
iw_commit_compound_file(iw, si);
|
5850
|
+
si->use_compound_file = true;
|
5177
5851
|
}
|
5178
5852
|
|
5179
|
-
|
5853
|
+
sis_write(sis, iw->store, iw->deleter);
|
5180
5854
|
|
5181
5855
|
mutex_unlock(&iw->store->mutex);
|
5182
5856
|
|
@@ -5223,28 +5897,20 @@ static void iw_flush_ram_segment(IndexWriter *iw)
|
|
5223
5897
|
{
|
5224
5898
|
SegmentInfos *sis = iw->sis;
|
5225
5899
|
SegmentInfo *si;
|
5226
|
-
Lock *commit_lock;
|
5227
5900
|
|
5228
5901
|
si = sis->segs[sis->size - 1];
|
5229
5902
|
si->doc_cnt = iw->dw->doc_num;
|
5230
5903
|
dw_flush(iw->dw);
|
5231
5904
|
|
5232
5905
|
mutex_lock(&iw->store->mutex);
|
5233
|
-
commit_lock = open_lock(iw->store, COMMIT_LOCK_NAME);
|
5234
5906
|
|
5235
|
-
if (
|
5236
|
-
|
5907
|
+
if (iw->config.use_compound_file) {
|
5908
|
+
iw_commit_compound_file(iw, si);
|
5909
|
+
si->use_compound_file = true;
|
5237
5910
|
}
|
5238
5911
|
/* commit the segments file and the fields file */
|
5239
|
-
|
5240
|
-
sis_write(iw->sis, iw->store);
|
5241
|
-
commit_lock->release(commit_lock);
|
5912
|
+
sis_write(iw->sis, iw->store, iw->deleter);
|
5242
5913
|
|
5243
|
-
|
5244
|
-
if (iw->config.use_compound_file) {
|
5245
|
-
iw_commit_compound_file(iw, si->name, commit_lock);
|
5246
|
-
}
|
5247
|
-
close_lock(commit_lock);
|
5248
5914
|
mutex_unlock(&iw->store->mutex);
|
5249
5915
|
|
5250
5916
|
iw_maybe_merge_segments(iw);
|
@@ -5253,11 +5919,11 @@ static void iw_flush_ram_segment(IndexWriter *iw)
|
|
5253
5919
|
void iw_add_doc(IndexWriter *iw, Document *doc)
|
5254
5920
|
{
|
5255
5921
|
mutex_lock(&iw->mutex);
|
5256
|
-
if (
|
5257
|
-
iw->dw = dw_open(iw, sis_new_segment(iw->sis, 0, iw->store)
|
5922
|
+
if (NULL == iw->dw) {
|
5923
|
+
iw->dw = dw_open(iw, sis_new_segment(iw->sis, 0, iw->store));
|
5258
5924
|
}
|
5259
|
-
else if (iw->dw->fw
|
5260
|
-
dw_new_segment(iw->dw, sis_new_segment(iw->sis, 0, iw->store)
|
5925
|
+
else if (NULL == iw->dw->fw) {
|
5926
|
+
dw_new_segment(iw->dw, sis_new_segment(iw->sis, 0, iw->store));
|
5261
5927
|
}
|
5262
5928
|
dw_add_doc(iw->dw, doc);
|
5263
5929
|
if (mp_used(iw->dw->mp) > iw->config.max_buffer_memory
|
@@ -5291,17 +5957,25 @@ void iw_delete_term(IndexWriter *iw, const char *field, const char *term)
|
|
5291
5957
|
do {
|
5292
5958
|
SegmentInfos *sis = iw->sis;
|
5293
5959
|
const int seg_cnt = sis->size;
|
5960
|
+
bool did_delete = false;
|
5294
5961
|
for (i = 0; i < seg_cnt; i++) {
|
5295
5962
|
IndexReader *ir = sr_open(sis, iw->fis, i, false);
|
5296
5963
|
TermDocEnum *tde = ir->term_docs(ir);
|
5964
|
+
ir->deleter = iw->deleter;
|
5297
5965
|
stde_seek(tde, field_num, term);
|
5298
5966
|
while (tde->next(tde)) {
|
5967
|
+
did_delete = true;
|
5299
5968
|
sr_delete_doc_i(ir, STDE(tde)->doc_num);
|
5300
5969
|
}
|
5301
5970
|
tde_destroy(tde);
|
5302
5971
|
sr_commit_i(ir);
|
5303
5972
|
ir_close(ir);
|
5304
5973
|
}
|
5974
|
+
if (did_delete) {
|
5975
|
+
mutex_lock(&iw->store->mutex);
|
5976
|
+
sis_write(iw->sis, iw->store, iw->deleter);
|
5977
|
+
mutex_unlock(&iw->store->mutex);
|
5978
|
+
}
|
5305
5979
|
} while (0);
|
5306
5980
|
mutex_unlock(&iw->mutex);
|
5307
5981
|
}
|
@@ -5316,7 +5990,7 @@ static void iw_optimize_i(IndexWriter *iw)
|
|
5316
5990
|
&& (si_has_deletions(iw->sis->segs[0])
|
5317
5991
|
|| (iw->sis->segs[0]->store != iw->store)
|
5318
5992
|
|| (iw->config.use_compound_file
|
5319
|
-
&& (!
|
5993
|
+
&& (!iw->sis->segs[0]->use_compound_file
|
5320
5994
|
|| si_has_separate_norms(iw->sis->segs[0])))))) {
|
5321
5995
|
min_segment = iw->sis->size - iw->config.merge_factor;
|
5322
5996
|
iw_merge_segments_from(iw, min_segment < 0 ? 0 : min_segment);
|
@@ -5344,7 +6018,9 @@ void iw_close(IndexWriter *iw)
|
|
5344
6018
|
|
5345
6019
|
iw->write_lock->release(iw->write_lock);
|
5346
6020
|
close_lock(iw->write_lock);
|
6021
|
+
iw->write_lock = NULL;
|
5347
6022
|
store_deref(iw->store);
|
6023
|
+
deleter_destroy(iw->deleter);
|
5348
6024
|
|
5349
6025
|
mutex_destroy(&iw->mutex);
|
5350
6026
|
free(iw);
|
@@ -5368,16 +6044,17 @@ IndexWriter *iw_open(Store *store, volatile Analyzer *analyzer,
|
|
5368
6044
|
"Couldn't obtain write lock when opening IndexWriter");
|
5369
6045
|
}
|
5370
6046
|
|
5371
|
-
|
5372
6047
|
iw->sis = sis_read(store);
|
5373
|
-
iw->fis =
|
6048
|
+
iw->fis = iw->sis->fis;
|
6049
|
+
REF(iw->fis);
|
5374
6050
|
XCATCHALL
|
5375
6051
|
if (iw->write_lock) {
|
5376
6052
|
iw->write_lock->release(iw->write_lock);
|
5377
6053
|
close_lock(iw->write_lock);
|
6054
|
+
iw->write_lock = NULL;
|
5378
6055
|
}
|
5379
6056
|
if (iw->sis) sis_destroy(iw->sis);
|
5380
|
-
if (
|
6057
|
+
if (analyzer) a_deref((Analyzer *)analyzer);
|
5381
6058
|
free(iw);
|
5382
6059
|
XENDTRY
|
5383
6060
|
|
@@ -5385,6 +6062,9 @@ IndexWriter *iw_open(Store *store, volatile Analyzer *analyzer,
|
|
5385
6062
|
iw->analyzer = analyzer ? (Analyzer *)analyzer
|
5386
6063
|
: mb_standard_analyzer_new(true);
|
5387
6064
|
|
6065
|
+
iw->deleter = deleter_new(iw->sis, store);
|
6066
|
+
deleter_delete_deletable_files(iw->deleter);
|
6067
|
+
|
5388
6068
|
REF(store);
|
5389
6069
|
return iw;
|
5390
6070
|
}
|
@@ -5400,18 +6080,19 @@ static void iw_cp_fields(IndexWriter *iw, SegmentReader *sr,
|
|
5400
6080
|
InStream *fdt_in, *fdx_in;
|
5401
6081
|
Store *store_in = sr->cfs_store ? sr->cfs_store : sr->ir.store;
|
5402
6082
|
Store *store_out = iw->store;
|
6083
|
+
char *sr_segment = sr->si->name;
|
5403
6084
|
|
5404
6085
|
sprintf(file_name, "%s.fdt", segment);
|
5405
6086
|
fdt_out = store_out->new_output(store_out, file_name);
|
5406
6087
|
sprintf(file_name, "%s.fdx", segment);
|
5407
6088
|
fdx_out = store_out->new_output(store_out, file_name);
|
5408
6089
|
|
5409
|
-
sprintf(file_name, "%s.fdt",
|
6090
|
+
sprintf(file_name, "%s.fdt", sr_segment);
|
5410
6091
|
fdt_in = store_in->open_input(store_in, file_name);
|
5411
|
-
sprintf(file_name, "%s.fdx",
|
6092
|
+
sprintf(file_name, "%s.fdx", sr_segment);
|
5412
6093
|
fdx_in = store_in->open_input(store_in, file_name);
|
5413
6094
|
|
5414
|
-
sprintf(file_name, "%s.del",
|
6095
|
+
sprintf(file_name, "%s.del", sr_segment);
|
5415
6096
|
if (store_in->exists(store_in, file_name)) {
|
5416
6097
|
OutStream *del_out;
|
5417
6098
|
InStream *del_in = store_in->open_input(store_in, file_name);
|
@@ -5487,30 +6168,31 @@ static void iw_cp_terms(IndexWriter *iw, SegmentReader *sr,
|
|
5487
6168
|
InStream *tix_in, *tis_in, *tfx_in, *frq_in, *prx_in;
|
5488
6169
|
Store *store_out = iw->store;
|
5489
6170
|
Store *store_in = sr->cfs_store ? sr->cfs_store : sr->ir.store;
|
6171
|
+
char *sr_segment = sr->si->name;
|
5490
6172
|
|
5491
6173
|
sprintf(file_name, "%s.tix", segment);
|
5492
6174
|
tix_out = store_out->new_output(store_out, file_name);
|
5493
|
-
sprintf(file_name, "%s.tix",
|
6175
|
+
sprintf(file_name, "%s.tix", sr_segment);
|
5494
6176
|
tix_in = store_in->open_input(store_in, file_name);
|
5495
6177
|
|
5496
6178
|
sprintf(file_name, "%s.tis", segment);
|
5497
6179
|
tis_out = store_out->new_output(store_out, file_name);
|
5498
|
-
sprintf(file_name, "%s.tis",
|
6180
|
+
sprintf(file_name, "%s.tis", sr_segment);
|
5499
6181
|
tis_in = store_in->open_input(store_in, file_name);
|
5500
6182
|
|
5501
6183
|
sprintf(file_name, "%s.tfx", segment);
|
5502
6184
|
tfx_out = store_out->new_output(store_out, file_name);
|
5503
|
-
sprintf(file_name, "%s.tfx",
|
6185
|
+
sprintf(file_name, "%s.tfx", sr_segment);
|
5504
6186
|
tfx_in = store_in->open_input(store_in, file_name);
|
5505
6187
|
|
5506
6188
|
sprintf(file_name, "%s.frq", segment);
|
5507
6189
|
frq_out = store_out->new_output(store_out, file_name);
|
5508
|
-
sprintf(file_name, "%s.frq",
|
6190
|
+
sprintf(file_name, "%s.frq", sr_segment);
|
5509
6191
|
frq_in = store_in->open_input(store_in, file_name);
|
5510
6192
|
|
5511
6193
|
sprintf(file_name, "%s.prx", segment);
|
5512
6194
|
prx_out = store_out->new_output(store_out, file_name);
|
5513
|
-
sprintf(file_name, "%s.prx",
|
6195
|
+
sprintf(file_name, "%s.prx", sr_segment);
|
5514
6196
|
prx_in = store_in->open_input(store_in, file_name);
|
5515
6197
|
|
5516
6198
|
if (map) {
|
@@ -5548,47 +6230,38 @@ static void iw_cp_terms(IndexWriter *iw, SegmentReader *sr,
|
|
5548
6230
|
}
|
5549
6231
|
|
5550
6232
|
static void iw_cp_norms(IndexWriter *iw, SegmentReader *sr,
|
5551
|
-
|
6233
|
+
SegmentInfo *si, int *map)
|
5552
6234
|
{
|
5553
6235
|
int i;
|
5554
6236
|
FieldInfos *fis = IR(sr)->fis;
|
5555
6237
|
const int field_cnt = fis->size;
|
5556
6238
|
InStream *norms_in;
|
5557
6239
|
OutStream *norms_out;
|
5558
|
-
Store *store_in = sr->ir.store;
|
5559
|
-
Store *cfs_store_in = sr->cfs_store;
|
5560
6240
|
Store *store_out = iw->store;
|
5561
6241
|
char file_name_in[SEGMENT_NAME_MAX_LENGTH];
|
5562
|
-
char *ext_ptr_in;
|
5563
6242
|
char file_name_out[SEGMENT_NAME_MAX_LENGTH];
|
5564
|
-
char *ext_ptr_out;
|
5565
|
-
sprintf(file_name_in, "%s.", sr->segment);
|
5566
|
-
ext_ptr_in = file_name_in + strlen(file_name_in);
|
5567
|
-
sprintf(file_name_out, "%s.", segment);
|
5568
|
-
ext_ptr_out = file_name_out + strlen(file_name_out);
|
5569
6243
|
|
5570
6244
|
for (i = 0; i < field_cnt; i++) {
|
5571
|
-
if (fi_has_norms(fis->fields[i])
|
5572
|
-
|
5573
|
-
|
5574
|
-
|
5575
|
-
|
5576
|
-
|
5577
|
-
|
5578
|
-
|
5579
|
-
|
5580
|
-
|
5581
|
-
|
5582
|
-
|
5583
|
-
|
5584
|
-
|
5585
|
-
}
|
6245
|
+
if (fi_has_norms(fis->fields[i])
|
6246
|
+
&& si_norm_file_name(sr->si, file_name_in, i)) {
|
6247
|
+
Store *store = (sr->si->use_compound_file
|
6248
|
+
&& sr->si->norm_gens[i] == 0) ? sr->cfs_store
|
6249
|
+
: IR(sr)->store;
|
6250
|
+
int field_num = map ? map[i] : i;
|
6251
|
+
|
6252
|
+
norms_in = store->open_input(store, file_name_in);
|
6253
|
+
si_advance_norm_gen(si, field_num);
|
6254
|
+
si_norm_file_name(si, file_name_out, field_num);
|
6255
|
+
norms_out = store_out->new_output(store_out, file_name_out);
|
6256
|
+
is2os_copy_bytes(norms_in, norms_out, is_length(norms_in));
|
6257
|
+
os_close(norms_out);
|
6258
|
+
is_close(norms_in);
|
5586
6259
|
}
|
5587
6260
|
}
|
5588
6261
|
}
|
5589
6262
|
|
5590
6263
|
static void iw_cp_map_files(IndexWriter *iw, SegmentReader *sr,
|
5591
|
-
|
6264
|
+
SegmentInfo *si)
|
5592
6265
|
{
|
5593
6266
|
int i;
|
5594
6267
|
FieldInfos *from_fis = IR(sr)->fis;
|
@@ -5600,19 +6273,19 @@ static void iw_cp_map_files(IndexWriter *iw, SegmentReader *sr,
|
|
5600
6273
|
field_map[i] = fis_get_field_num(to_fis, from_fis->fields[i]->name);
|
5601
6274
|
}
|
5602
6275
|
|
5603
|
-
iw_cp_fields(iw, sr,
|
5604
|
-
iw_cp_terms(iw, sr,
|
5605
|
-
iw_cp_norms(iw, sr,
|
6276
|
+
iw_cp_fields(iw, sr, si->name, field_map);
|
6277
|
+
iw_cp_terms( iw, sr, si->name, field_map);
|
6278
|
+
iw_cp_norms( iw, sr, si, field_map);
|
5606
6279
|
|
5607
6280
|
free(field_map);
|
5608
6281
|
}
|
5609
6282
|
|
5610
6283
|
static void iw_cp_files(IndexWriter *iw, SegmentReader *sr,
|
5611
|
-
|
6284
|
+
SegmentInfo *si)
|
5612
6285
|
{
|
5613
|
-
iw_cp_fields(iw, sr,
|
5614
|
-
iw_cp_terms(iw, sr,
|
5615
|
-
iw_cp_norms(iw, sr,
|
6286
|
+
iw_cp_fields(iw, sr, si->name, NULL);
|
6287
|
+
iw_cp_terms( iw, sr, si->name, NULL);
|
6288
|
+
iw_cp_norms( iw, sr, si, NULL);
|
5616
6289
|
}
|
5617
6290
|
|
5618
6291
|
static void iw_add_segment(IndexWriter *iw, SegmentReader *sr)
|
@@ -5641,10 +6314,10 @@ static void iw_add_segment(IndexWriter *iw, SegmentReader *sr)
|
|
5641
6314
|
}
|
5642
6315
|
|
5643
6316
|
if (must_map_fields) {
|
5644
|
-
iw_cp_map_files(iw, sr, si
|
6317
|
+
iw_cp_map_files(iw, sr, si);
|
5645
6318
|
}
|
5646
6319
|
else {
|
5647
|
-
iw_cp_files(iw, sr, si
|
6320
|
+
iw_cp_files(iw, sr, si);
|
5648
6321
|
}
|
5649
6322
|
}
|
5650
6323
|
|
@@ -5666,8 +6339,6 @@ static void iw_add_segments(IndexWriter *iw, IndexReader *ir)
|
|
5666
6339
|
void iw_add_readers(IndexWriter *iw, IndexReader **readers, const int r_cnt)
|
5667
6340
|
{
|
5668
6341
|
int i;
|
5669
|
-
Lock *commit_lock;
|
5670
|
-
|
5671
6342
|
mutex_lock(&iw->mutex);
|
5672
6343
|
iw_optimize_i(iw);
|
5673
6344
|
|
@@ -5676,16 +6347,9 @@ void iw_add_readers(IndexWriter *iw, IndexReader **readers, const int r_cnt)
|
|
5676
6347
|
}
|
5677
6348
|
|
5678
6349
|
mutex_lock(&iw->store->mutex);
|
5679
|
-
commit_lock = open_lock(iw->store, COMMIT_LOCK_NAME);
|
5680
6350
|
|
5681
|
-
if (!commit_lock->obtain(commit_lock)) {
|
5682
|
-
RAISE(LOCK_ERROR, "Couldn't obtain commit lock to write segments file");
|
5683
|
-
}
|
5684
6351
|
/* commit the segments file and the fields file */
|
5685
|
-
|
5686
|
-
sis_write(iw->sis, iw->store);
|
5687
|
-
commit_lock->release(commit_lock);
|
5688
|
-
close_lock(commit_lock);
|
6352
|
+
sis_write(iw->sis, iw->store, iw->deleter);
|
5689
6353
|
mutex_unlock(&iw->store->mutex);
|
5690
6354
|
|
5691
6355
|
iw_optimize_i(iw);
|