ferret 0.10.14 → 0.11.0
Sign up to get free protection for your applications and to get access to all the features.
- data/TODO +3 -0
- data/ext/analysis.c +5 -0
- data/ext/compound_io.c +46 -24
- data/ext/except.c +14 -0
- data/ext/except.h +29 -17
- data/ext/ferret.c +22 -1
- data/ext/ferret.h +2 -1
- data/ext/fs_store.c +9 -12
- data/ext/global.c +80 -0
- data/ext/global.h +10 -0
- data/ext/hash.c +0 -7
- data/ext/hash.h +0 -8
- data/ext/index.c +1289 -625
- data/ext/index.h +59 -14
- data/ext/q_boolean.c +12 -5
- data/ext/q_parser.c +570 -372
- data/ext/r_analysis.c +16 -16
- data/ext/r_index.c +41 -43
- data/ext/r_qparser.c +37 -36
- data/ext/r_search.c +10 -10
- data/ext/r_store.c +7 -7
- data/ext/ram_store.c +4 -3
- data/ext/search.c +3 -2
- data/ext/store.c +35 -19
- data/ext/store.h +3 -5
- data/lib/ferret/index.rb +4 -4
- data/lib/ferret_version.rb +1 -1
- data/test/threading/thread_safety_read_write_test.rb +76 -0
- data/test/threading/thread_safety_test.rb +17 -21
- data/test/unit/index/tc_index.rb +6 -2
- data/test/unit/index/tc_index_writer.rb +2 -2
- data/test/unit/query_parser/tc_query_parser.rb +20 -5
- data/test/unit/search/tc_index_searcher.rb +3 -1
- data/test/unit/search/tm_searcher.rb +3 -1
- metadata +3 -2
data/ext/hash.c
CHANGED
@@ -18,13 +18,6 @@ static char *dummy_key = "";
|
|
18
18
|
static HashTable *free_hts[MAX_FREE_HASH_TABLES];
|
19
19
|
static int num_free_hts = 0;
|
20
20
|
|
21
|
-
unsigned long *imalloc(unsigned long value)
|
22
|
-
{
|
23
|
-
unsigned long *p = ALLOC(unsigned long);
|
24
|
-
*p = value;
|
25
|
-
return p;
|
26
|
-
}
|
27
|
-
|
28
21
|
unsigned long str_hash(const char *const str)
|
29
22
|
{
|
30
23
|
register unsigned long h = 0;
|
data/ext/hash.h
CHANGED
@@ -81,14 +81,6 @@ typedef unsigned long (*hash_ft)(const void *key);
|
|
81
81
|
*/
|
82
82
|
typedef int (*eq_ft)(const void *key1, const void *key2);
|
83
83
|
|
84
|
-
|
85
|
-
/**
|
86
|
-
* Create a pointer to an allocated U32 integer. This function is a utility
|
87
|
-
* function used to add integers to a HashTable, either as the key or the
|
88
|
-
* value.
|
89
|
-
*/
|
90
|
-
extern unsigned long *imalloc(unsigned long value);
|
91
|
-
|
92
84
|
/**
|
93
85
|
* Determine a hash value for a string. The string must be null terminated
|
94
86
|
*
|
data/ext/index.c
CHANGED
@@ -5,6 +5,7 @@
|
|
5
5
|
#include "priorityqueue.h"
|
6
6
|
#include <string.h>
|
7
7
|
#include <limits.h>
|
8
|
+
#include <ctype.h>
|
8
9
|
|
9
10
|
#define GET_LOCK(lock, name, store, err_msg) do {\
|
10
11
|
lock = store->open_lock(store, name);\
|
@@ -18,14 +19,6 @@
|
|
18
19
|
store->close_lock(lock);\
|
19
20
|
} while (0)
|
20
21
|
|
21
|
-
const char *INDEX_EXTENSIONS[] = {
|
22
|
-
"fdx", "fdt", "tfx", "tix", "tis", "frq", "prx", "del"
|
23
|
-
};
|
24
|
-
|
25
|
-
const char *COMPOUND_EXTENSIONS[] = {
|
26
|
-
"frq", "prx", "fdx", "fdt", "tfx", "tix", "tis"
|
27
|
-
};
|
28
|
-
|
29
22
|
const Config default_config = {
|
30
23
|
0x100000, /* chunk size is 1Mb */
|
31
24
|
0x1000000, /* Max memory used for buffer is 16 Mb */
|
@@ -41,6 +34,128 @@ const Config default_config = {
|
|
41
34
|
static void ste_reset(TermEnum *te);
|
42
35
|
static char *ste_next(TermEnum *te);
|
43
36
|
|
37
|
+
#define FORMAT 0
|
38
|
+
#define SEGMENTS_GEN_FILE_NAME "segments.gen"
|
39
|
+
#define MAX_EXT_LEN 10
|
40
|
+
|
41
|
+
/* *** Must be three characters *** */
|
42
|
+
const char *INDEX_EXTENSIONS[] = {
|
43
|
+
"frq", "prx", "fdx", "fdt", "tfx", "tix", "tis", "del", "gen", "cfs"
|
44
|
+
};
|
45
|
+
|
46
|
+
/* *** Must be three characters *** */
|
47
|
+
const char *COMPOUND_EXTENSIONS[] = {
|
48
|
+
"frq", "prx", "fdx", "fdt", "tfx", "tix", "tis"
|
49
|
+
};
|
50
|
+
|
51
|
+
|
52
|
+
static const char BASE36_DIGITMAP[] = "0123456789abcdefghijklmnopqrstuvwxyz";
|
53
|
+
|
54
|
+
static char *u64_to_str36(char *buf, int buf_size, f_u64 u)
|
55
|
+
{
|
56
|
+
int i = buf_size--;
|
57
|
+
buf[i] = '\0';
|
58
|
+
for (i--; i >= 0; i--) {
|
59
|
+
buf[i] = BASE36_DIGITMAP[u % 36];
|
60
|
+
u /= 36;
|
61
|
+
if (0 == u) {
|
62
|
+
break;
|
63
|
+
}
|
64
|
+
}
|
65
|
+
if (0 < u) {
|
66
|
+
RAISE(EXCEPTION, "Max length of segment filename has been reached. "
|
67
|
+
"Perhaps it's time to re-index.\n");
|
68
|
+
}
|
69
|
+
return buf + i;
|
70
|
+
}
|
71
|
+
|
72
|
+
static f_u64 str36_to_u64(char *p)
|
73
|
+
{
|
74
|
+
f_u64 u = 0;
|
75
|
+
while (true) {
|
76
|
+
if ('0' <= *p && '9' >= *p) {
|
77
|
+
u = u * 36 + *p - '0';
|
78
|
+
}
|
79
|
+
else if ('a' <= *p && 'z' >= *p) {
|
80
|
+
u = u * 36 + *p - 'a' + 10;
|
81
|
+
}
|
82
|
+
else {
|
83
|
+
break;
|
84
|
+
}
|
85
|
+
p++;
|
86
|
+
}
|
87
|
+
return u;
|
88
|
+
}
|
89
|
+
|
90
|
+
/*
|
91
|
+
* Computes the full file name from base, extension and generation. If the
|
92
|
+
* generation is -1, the file name is NULL. If it's 0, the file name is
|
93
|
+
* <base><extension>. If it's > 0, the file name is
|
94
|
+
* <base>_<generation><extension>.
|
95
|
+
*
|
96
|
+
* @param buf buffer to write filename to
|
97
|
+
* @param base main part of the file name
|
98
|
+
* @param ext extension of the filename (including .)
|
99
|
+
* @param gen generation
|
100
|
+
*/
|
101
|
+
char *fn_for_generation(char *buf, char *base, char *ext, f_i64 gen)
|
102
|
+
{
|
103
|
+
if (-1 == gen) {
|
104
|
+
return NULL;
|
105
|
+
}
|
106
|
+
else {
|
107
|
+
char b[SEGMENT_NAME_MAX_LENGTH];
|
108
|
+
char *u = u64_to_str36(b, SEGMENT_NAME_MAX_LENGTH, (f_u64)gen);
|
109
|
+
if (ext == NULL) {
|
110
|
+
sprintf(buf, "%s_%s", base, u);
|
111
|
+
}
|
112
|
+
else {
|
113
|
+
sprintf(buf, "%s_%s.%s", base, u, ext);
|
114
|
+
}
|
115
|
+
return buf;
|
116
|
+
}
|
117
|
+
}
|
118
|
+
|
119
|
+
char *segfn_for_generation(char *buf, int generation)
|
120
|
+
{
|
121
|
+
char b[SEGMENT_NAME_MAX_LENGTH];
|
122
|
+
char *u = u64_to_str36(b, SEGMENT_NAME_MAX_LENGTH, (f_u64)generation);
|
123
|
+
sprintf(buf, SEGMENTS_FILE_NAME"_%s", u);
|
124
|
+
return buf;
|
125
|
+
}
|
126
|
+
|
127
|
+
/*
|
128
|
+
* Computes the field specific file name from base, extension, generation and
|
129
|
+
* field number. If the generation is -1, the file name is NULL. If it's 0,
|
130
|
+
* the file name is <base><extension>. If it's > 0, the file name is
|
131
|
+
* <base>_<generation><extension>.
|
132
|
+
*
|
133
|
+
* @param buf buffer to write filename to
|
134
|
+
* @param base main part of the file name
|
135
|
+
* @param ext extension of the filename (including .)
|
136
|
+
* @param gen generation
|
137
|
+
* @param field_num field number
|
138
|
+
*/
|
139
|
+
static char *fn_for_gen_field(char *buf,
|
140
|
+
char *base,
|
141
|
+
char *ext,
|
142
|
+
f_i64 gen,
|
143
|
+
int field_num)
|
144
|
+
{
|
145
|
+
if (-1 == gen) {
|
146
|
+
return NULL;
|
147
|
+
}
|
148
|
+
else {
|
149
|
+
char b[SEGMENT_NAME_MAX_LENGTH];
|
150
|
+
sprintf(buf, "%s_%s.%s%d",
|
151
|
+
base,
|
152
|
+
u64_to_str36(b, SEGMENT_NAME_MAX_LENGTH, (f_u64)gen),
|
153
|
+
ext,
|
154
|
+
field_num);
|
155
|
+
return buf;
|
156
|
+
}
|
157
|
+
}
|
158
|
+
|
44
159
|
/***************************************************************************
|
45
160
|
*
|
46
161
|
* CacheObject
|
@@ -175,7 +290,7 @@ FieldInfo *fi_new(const char *name,
|
|
175
290
|
|
176
291
|
void fi_deref(FieldInfo *fi)
|
177
292
|
{
|
178
|
-
if (--(fi->ref_cnt)
|
293
|
+
if (0 == --(fi->ref_cnt)) {
|
179
294
|
free(fi->name);
|
180
295
|
free(fi);
|
181
296
|
}
|
@@ -208,9 +323,6 @@ char *fi_to_s(FieldInfo *fi)
|
|
208
323
|
*
|
209
324
|
****************************************************************************/
|
210
325
|
|
211
|
-
#define FIELDS_FILENAME "fields"
|
212
|
-
#define TEMPORARY_FIELDS_FILENAME "fields.new"
|
213
|
-
|
214
326
|
FieldInfos *fis_new(int store, int index, int term_vector)
|
215
327
|
{
|
216
328
|
FieldInfos *fis = ALLOC(FieldInfos);
|
@@ -278,14 +390,13 @@ FieldInfo *fis_by_number(FieldInfos *fis, int num)
|
|
278
390
|
}
|
279
391
|
}
|
280
392
|
|
281
|
-
FieldInfos *fis_read(
|
393
|
+
FieldInfos *fis_read(InStream *is)
|
282
394
|
{
|
283
395
|
int store_val, index_val, term_vector_val;
|
284
396
|
int i;
|
285
397
|
union { f_u32 i; float f; } tmp;
|
286
398
|
FieldInfo *fi;
|
287
399
|
FieldInfos *fis;
|
288
|
-
InStream *is = store->open_input(store, FIELDS_FILENAME);
|
289
400
|
|
290
401
|
store_val = is_read_vint(is);
|
291
402
|
index_val = is_read_vint(is);
|
@@ -300,17 +411,15 @@ FieldInfos *fis_read(Store *store)
|
|
300
411
|
fis_add_field(fis, fi);
|
301
412
|
fi->ref_cnt = 1;
|
302
413
|
}
|
303
|
-
is_close(is);
|
304
414
|
|
305
415
|
return fis;
|
306
416
|
}
|
307
417
|
|
308
|
-
void fis_write(FieldInfos *fis,
|
418
|
+
void fis_write(FieldInfos *fis, OutStream *os)
|
309
419
|
{
|
310
420
|
int i;
|
311
421
|
union { f_u32 i; float f; } tmp;
|
312
422
|
FieldInfo *fi;
|
313
|
-
OutStream *os = store->new_output(store, TEMPORARY_FIELDS_FILENAME);
|
314
423
|
const int fis_size = fis->size;
|
315
424
|
|
316
425
|
os_write_vint(os, fis->store);
|
@@ -324,9 +433,6 @@ void fis_write(FieldInfos *fis, Store *store)
|
|
324
433
|
os_write_u32(os, tmp.i);
|
325
434
|
os_write_vint(os, fi->bits);
|
326
435
|
}
|
327
|
-
os_close(os);
|
328
|
-
|
329
|
-
store->rename(store, TEMPORARY_FIELDS_FILENAME, FIELDS_FILENAME);
|
330
436
|
}
|
331
437
|
|
332
438
|
static const char *store_str[] = {
|
@@ -408,7 +514,7 @@ char *fis_to_s(FieldInfos *fis)
|
|
408
514
|
|
409
515
|
void fis_deref(FieldInfos *fis)
|
410
516
|
{
|
411
|
-
if (--(fis->ref_cnt)
|
517
|
+
if (0 == --(fis->ref_cnt)) {
|
412
518
|
h_destroy(fis->field_dict);
|
413
519
|
free(fis->fields);
|
414
520
|
free(fis);
|
@@ -440,54 +546,144 @@ SegmentInfo *si_new(char *name, int doc_cnt, Store *store)
|
|
440
546
|
si->name = name;
|
441
547
|
si->doc_cnt = doc_cnt;
|
442
548
|
si->store = store;
|
549
|
+
si->del_gen = -1;
|
550
|
+
si->norm_gens = NULL;
|
551
|
+
si->norm_gens_size = 0;
|
552
|
+
si->ref_cnt = 1;
|
553
|
+
si->use_compound_file = false;
|
443
554
|
return si;
|
444
555
|
}
|
445
556
|
|
446
|
-
|
557
|
+
SegmentInfo *si_read(Store *store, InStream *is)
|
447
558
|
{
|
448
|
-
|
449
|
-
|
559
|
+
SegmentInfo *si = ALLOC_AND_ZERO(SegmentInfo);
|
560
|
+
si->store = store;
|
561
|
+
si->name = is_read_string(is);
|
562
|
+
si->doc_cnt = is_read_vint(is);
|
563
|
+
si->del_gen = is_read_vint(is);
|
564
|
+
si->norm_gens_size = is_read_vint(is);
|
565
|
+
si->ref_cnt = 1;
|
566
|
+
if (0 < si->norm_gens_size) {
|
567
|
+
int i;
|
568
|
+
si->norm_gens = ALLOC_N(int, si->norm_gens_size);
|
569
|
+
for (i = si->norm_gens_size - 1; i >= 0; i--) {
|
570
|
+
si->norm_gens[i] = is_read_vint(is);
|
571
|
+
}
|
572
|
+
}
|
573
|
+
si->use_compound_file = (bool)is_read_byte(is);
|
574
|
+
return si;
|
450
575
|
}
|
451
576
|
|
452
|
-
|
577
|
+
void si_write(SegmentInfo *si, OutStream *os)
|
453
578
|
{
|
454
|
-
|
455
|
-
|
456
|
-
|
579
|
+
os_write_string(os, si->name);
|
580
|
+
os_write_vint(os, si->doc_cnt);
|
581
|
+
os_write_vint(os, si->del_gen);
|
582
|
+
os_write_vint(os, si->norm_gens_size);
|
583
|
+
if (0 < si->norm_gens_size) {
|
584
|
+
int i;
|
585
|
+
for (i = si->norm_gens_size - 1; i >= 0; i--) {
|
586
|
+
os_write_vint(os, si->norm_gens[i]);
|
587
|
+
}
|
588
|
+
}
|
589
|
+
os_write_byte(os, (uchar)si->use_compound_file);
|
457
590
|
}
|
458
591
|
|
459
|
-
|
592
|
+
void si_deref(SegmentInfo *si)
|
460
593
|
{
|
461
|
-
|
462
|
-
|
463
|
-
|
594
|
+
if (--si->ref_cnt <= 0) {
|
595
|
+
free(si->name);
|
596
|
+
free(si->norm_gens);
|
597
|
+
free(si);
|
598
|
+
}
|
464
599
|
}
|
465
600
|
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
};
|
601
|
+
bool si_has_deletions(SegmentInfo *si)
|
602
|
+
{
|
603
|
+
return si->del_gen >= 0;
|
604
|
+
}
|
471
605
|
|
472
|
-
|
606
|
+
char *si_del_file_name(SegmentInfo *si, char *buf)
|
473
607
|
{
|
474
|
-
if (
|
475
|
-
|
476
|
-
|
608
|
+
if (si->del_gen < 0) {
|
609
|
+
return NULL;
|
610
|
+
}
|
611
|
+
else {
|
612
|
+
return fn_for_generation(buf, si->name, ".del", si->del_gen);
|
477
613
|
}
|
478
614
|
}
|
479
615
|
|
480
616
|
bool si_has_separate_norms(SegmentInfo *si)
|
481
617
|
{
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
618
|
+
if (si->use_compound_file && si->norm_gens) {
|
619
|
+
int i;
|
620
|
+
for (i = si->norm_gens_size - 1; i >= 0; i--) {
|
621
|
+
if (si->norm_gens[i] > 0) return true;
|
622
|
+
}
|
623
|
+
}
|
624
|
+
return false;
|
625
|
+
}
|
487
626
|
|
488
|
-
|
627
|
+
void si_advance_norm_gen(SegmentInfo *si, int field_num)
|
628
|
+
{
|
629
|
+
if (field_num >= si->norm_gens_size) {
|
630
|
+
int i;
|
631
|
+
REALLOC_N(si->norm_gens, int, field_num + 1);
|
632
|
+
for (i = si->norm_gens_size; i <= field_num; i++) {
|
633
|
+
si->norm_gens[i] = -1;
|
634
|
+
}
|
635
|
+
si->norm_gens_size = field_num + 1;
|
636
|
+
}
|
637
|
+
si->norm_gens[field_num]++;
|
489
638
|
}
|
490
639
|
|
640
|
+
char *si_norm_file_name(SegmentInfo *si, char *buf, int field_num)
|
641
|
+
{
|
642
|
+
int norm_gen;
|
643
|
+
if (field_num >= si->norm_gens_size
|
644
|
+
|| 0 > (norm_gen = si->norm_gens[field_num])) {
|
645
|
+
return NULL;
|
646
|
+
}
|
647
|
+
else {
|
648
|
+
char *ext = (si->use_compound_file && norm_gen > 0) ? "s" : "f";
|
649
|
+
return fn_for_gen_field(buf, si->name, ext, norm_gen, field_num);
|
650
|
+
}
|
651
|
+
}
|
652
|
+
|
653
|
+
void deleter_queue_file(Deleter *dlr, char *file_name);
|
654
|
+
#define DEL(file_name) deleter_queue_file(dlr, file_name)
|
655
|
+
|
656
|
+
static void si_delete_files(SegmentInfo *si, FieldInfos *fis, Deleter *dlr)
|
657
|
+
{
|
658
|
+
int i;
|
659
|
+
char file_name[SEGMENT_NAME_MAX_LENGTH];
|
660
|
+
size_t seg_len = strlen(si->name);
|
661
|
+
char *ext;
|
662
|
+
|
663
|
+
for (i = si->norm_gens_size - 1; i >= 0; i--) {
|
664
|
+
if (0 <= si->norm_gens[i]) {
|
665
|
+
DEL(si_norm_file_name(si, file_name, fis->fields[i]->number));
|
666
|
+
}
|
667
|
+
}
|
668
|
+
|
669
|
+
memcpy(file_name, si->name, seg_len);
|
670
|
+
file_name[seg_len] = '.';
|
671
|
+
ext = file_name + seg_len + 1;
|
672
|
+
|
673
|
+
if (si->use_compound_file) {
|
674
|
+
memcpy(ext, "cfs", 4);
|
675
|
+
DEL(file_name);
|
676
|
+
if (0 <= si->del_gen) {
|
677
|
+
DEL(fn_for_generation(file_name, si->name, "del", si->del_gen));
|
678
|
+
}
|
679
|
+
}
|
680
|
+
else {
|
681
|
+
for (i = NELEMS(INDEX_EXTENSIONS) - 1; i >= 0; i--) {
|
682
|
+
memcpy(ext, INDEX_EXTENSIONS[i], 4);
|
683
|
+
DEL(file_name);
|
684
|
+
}
|
685
|
+
}
|
686
|
+
}
|
491
687
|
|
492
688
|
/****************************************************************************
|
493
689
|
*
|
@@ -496,42 +692,266 @@ bool si_has_separate_norms(SegmentInfo *si)
|
|
496
692
|
****************************************************************************/
|
497
693
|
|
498
694
|
#include <time.h>
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
695
|
+
static char *new_segment(f_i64 generation)
|
696
|
+
{
|
697
|
+
char buf[SEGMENT_NAME_MAX_LENGTH];
|
698
|
+
char *fn_p = u64_to_str36(buf, SEGMENT_NAME_MAX_LENGTH - 1,
|
699
|
+
(f_u64)generation);
|
700
|
+
*(--fn_p) = '_';
|
701
|
+
return estrdup(fn_p);
|
702
|
+
}
|
703
|
+
|
704
|
+
/****************************************************************************
|
705
|
+
* FindSegmentsFile
|
706
|
+
****************************************************************************/
|
503
707
|
|
504
|
-
|
708
|
+
typedef struct FindSegmentsFile {
|
709
|
+
f_i64 generation;
|
710
|
+
f_u64 u64_return;
|
711
|
+
void *p_return;
|
712
|
+
} FindSegmentsFile;
|
505
713
|
|
506
|
-
static char *
|
714
|
+
static void which_gen_i(char *file_name, void *arg)
|
715
|
+
{
|
716
|
+
f_i64 *max_generation = (f_i64 *)arg;
|
717
|
+
if (0 == strncmp(SEGMENTS_FILE_NAME"_", file_name,
|
718
|
+
sizeof(SEGMENTS_FILE_NAME))) {
|
719
|
+
char *p = strrchr(file_name, '_') + 1;
|
720
|
+
f_i64 generation = (f_i64)str36_to_u64(p);
|
721
|
+
if (generation > *max_generation) *max_generation = generation;
|
722
|
+
}
|
723
|
+
}
|
724
|
+
|
725
|
+
static void si_put(SegmentInfo *si, FILE *stream)
|
507
726
|
{
|
508
|
-
char file_name[SEGMENT_NAME_MAX_LENGTH];
|
509
727
|
int i;
|
728
|
+
fprintf(stream, "\tSegmentInfo {\n");
|
729
|
+
fprintf(stream, "\t\tname = %s\n", si->name);
|
730
|
+
fprintf(stream, "\t\tdoc_cnt = %d\n", si->doc_cnt);
|
731
|
+
fprintf(stream, "\t\tdel_gen = %d\n", si->del_gen);
|
732
|
+
fprintf(stream, "\t\tnorm_gens_size = %d\n", si->norm_gens_size);
|
733
|
+
fprintf(stream, "\t\tnorm_gens {\n");
|
734
|
+
for (i = 0; i < si->norm_gens_size; i++) {
|
735
|
+
fprintf(stream, "\t\t\t%d\n", si->norm_gens[i]);
|
736
|
+
}
|
737
|
+
fprintf(stream, "\t\t}\n");
|
738
|
+
fprintf(stream, "\t\tref_cnt = %d\n", si->ref_cnt);
|
739
|
+
fprintf(stream, "\t}\n");
|
740
|
+
}
|
510
741
|
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
742
|
+
void sis_put(SegmentInfos *sis, FILE *stream)
|
743
|
+
{
|
744
|
+
int i;
|
745
|
+
fprintf(stream, "SegmentInfos {\n");
|
746
|
+
fprintf(stream, "\tcounter = %"POSH_I64_PRINTF_PREFIX"d\n", sis->counter);
|
747
|
+
fprintf(stream, "\tversion = %"POSH_I64_PRINTF_PREFIX"d\n", sis->version);
|
748
|
+
fprintf(stream, "\tgeneration = %"POSH_I64_PRINTF_PREFIX"d\n", sis->generation);
|
749
|
+
fprintf(stream, "\tformat = %d\n", sis->format);
|
750
|
+
fprintf(stream, "\tsize = %d\n", sis->size);
|
751
|
+
fprintf(stream, "\tcapa = %d\n", sis->capa);
|
752
|
+
for (i = 0; i < sis->size; i++) {
|
753
|
+
si_put(sis->segs[i], stream);
|
518
754
|
}
|
519
|
-
|
520
|
-
|
521
|
-
|
755
|
+
fprintf(stream, "}\n");
|
756
|
+
}
|
757
|
+
|
758
|
+
/*
|
759
|
+
* Get the generation (N) of the current segments_N file from a list of files.
|
760
|
+
*
|
761
|
+
* @param store - the Store to look in
|
762
|
+
*/
|
763
|
+
f_i64 sis_current_segment_generation(Store *store)
|
764
|
+
{
|
765
|
+
f_i64 current_generation = -1;
|
766
|
+
store->each(store, &which_gen_i, ¤t_generation);
|
767
|
+
return current_generation;
|
768
|
+
}
|
769
|
+
|
770
|
+
/*
|
771
|
+
* Get the current generation filename.
|
772
|
+
*
|
773
|
+
* @param buf - buffer to write filename to
|
774
|
+
* @param store - the Store to look in
|
775
|
+
* @return segments_N where N is the current generation
|
776
|
+
*/
|
777
|
+
char *sis_curr_seg_file_name(char *buf, Store *store)
|
778
|
+
{
|
779
|
+
return segfn_for_generation(buf, sis_current_segment_generation(store));
|
780
|
+
}
|
781
|
+
|
782
|
+
/*
|
783
|
+
* Get the next generation filename.
|
784
|
+
*
|
785
|
+
* @param buf - buffer to write filename to
|
786
|
+
* @param store - the Store to look in
|
787
|
+
* @return segments_N where N is the +next+ generation
|
788
|
+
*/
|
789
|
+
char *sis_next_seg_file_name(char *buf, Store *store)
|
790
|
+
{
|
791
|
+
return segfn_for_generation(buf, sis_current_segment_generation(store) + 1);
|
792
|
+
}
|
793
|
+
|
794
|
+
#define GEN_FILE_RETRY_COUNT 10
|
795
|
+
#define GEN_LOOK_AHEAD_COUNT 10
|
796
|
+
void sis_find_segments_file(Store *store, FindSegmentsFile *fsf,
|
797
|
+
void (*run)(Store *store, FindSegmentsFile *fsf))
|
798
|
+
{
|
799
|
+
int i;
|
800
|
+
int gen_look_ahead_count = 0;
|
801
|
+
bool retry = false;
|
802
|
+
int method = 0;
|
803
|
+
f_i64 last_gen = -1;
|
804
|
+
f_i64 gen = 0;
|
805
|
+
|
806
|
+
/* Loop until we succeed in calling doBody() without hitting an
|
807
|
+
* IOException. An IOException most likely means a commit was in process
|
808
|
+
* and has finished, in the time it took us to load the now-old infos
|
809
|
+
* files (and segments files). It's also possible it's a true error
|
810
|
+
* (corrupt index). To distinguish these, on each retry we must see
|
811
|
+
* "forward progress" on which generation we are trying to load. If we
|
812
|
+
* don't, then the original error is real and we throw it.
|
813
|
+
*
|
814
|
+
* We have three methods for determining the current generation. We try
|
815
|
+
* each in sequence. */
|
816
|
+
while (true) {
|
817
|
+
/* Method 1: list the directory and use the highest segments_N file.
|
818
|
+
* This method works well as long as there is no stale caching on the
|
819
|
+
* directory contents: */
|
820
|
+
if (0 == method) {
|
821
|
+
gen = sis_current_segment_generation(store);
|
822
|
+
if (gen == -1) {
|
823
|
+
/*fprintf(stderr, ">>\n%s\n>>\n", store_to_s(store));*/
|
824
|
+
RAISE(FILE_NOT_FOUND_ERROR, "couldn't find segments file");
|
825
|
+
}
|
826
|
+
}
|
827
|
+
|
828
|
+
/* Method 2 (fallback if Method 1 isn't reliable): if the directory
|
829
|
+
* listing seems to be stale, try loading the "segments.gen" file. */
|
830
|
+
if (1 == method || (0 == method && last_gen == gen && retry)) {
|
831
|
+
method = 1;
|
832
|
+
for (i = 0; i < GEN_FILE_RETRY_COUNT; i++) {
|
833
|
+
InStream *gen_is = NULL;
|
834
|
+
TRY
|
835
|
+
gen_is = store->open_input(store, SEGMENTS_GEN_FILE_NAME);
|
836
|
+
XCATCHALL
|
837
|
+
HANDLED();
|
838
|
+
/* TODO:LOG "segments.gen open: IO_ERROR"*/
|
839
|
+
XENDTRY
|
840
|
+
|
841
|
+
if (NULL != gen_is) {
|
842
|
+
f_i64 gen0 = -1, gen1 = -1;
|
843
|
+
|
844
|
+
TRY
|
845
|
+
gen0 = is_read_u64(gen_is);
|
846
|
+
gen1 = is_read_u64(gen_is);
|
847
|
+
XFINALLY
|
848
|
+
/* if there is an error well simply try again */
|
849
|
+
HANDLED();
|
850
|
+
is_close(gen_is);
|
851
|
+
XENDTRY
|
852
|
+
/* TODO:LOG "fallback check: " + gen0 + "; " + gen1 */
|
853
|
+
if (gen0 == gen1) {
|
854
|
+
/* The file is consistent. */
|
855
|
+
if (gen0 > gen) {
|
856
|
+
/* TODO:LOG "fallback to '" +
|
857
|
+
* IndexFileNames.SEGMENTS_GEN + "' check: now
|
858
|
+
* try generation " + gen0 + " > " + gen */
|
859
|
+
gen = gen0;
|
860
|
+
}
|
861
|
+
goto method_two_loop_end;
|
862
|
+
}
|
863
|
+
break;
|
864
|
+
}
|
865
|
+
/* sleep for 50 milliseconds */
|
866
|
+
micro_sleep(50000);
|
867
|
+
}
|
868
|
+
}
|
869
|
+
method_two_loop_end:
|
870
|
+
|
871
|
+
/* Method 3 (fallback if Methods 2 & 3 are not reliable): since both
|
872
|
+
* directory cache and file contents cache seem to be stale, just
|
873
|
+
* advance the generation. */
|
874
|
+
if (2 == method || (1 == method && last_gen == gen && retry)) {
|
875
|
+
method = 2;
|
876
|
+
if (gen_look_ahead_count < GEN_LOOK_AHEAD_COUNT) {
|
877
|
+
gen++;
|
878
|
+
gen_look_ahead_count++;
|
879
|
+
/* TODO:LOG "look ahead increment gen to " + gen */
|
880
|
+
}
|
881
|
+
}
|
882
|
+
|
883
|
+
if (last_gen == gen) {
|
884
|
+
/* This means we're about to try the same segments_N last tried.
|
885
|
+
* This is allowed, exactly once, because writer could have been
|
886
|
+
* in the process of writing segments_N last time. */
|
887
|
+
if (retry) {
|
888
|
+
/* OK, we've tried the same segments_N file twice in a row, so
|
889
|
+
* this must be a real error. We throw the original exception
|
890
|
+
* we got. */
|
891
|
+
RAISE(IO_ERROR, "Error reading the segment infos");
|
892
|
+
}
|
893
|
+
else {
|
894
|
+
retry = true;
|
895
|
+
}
|
896
|
+
}
|
897
|
+
else {
|
898
|
+
/* Segment file has advanced since our last loop, so reset retry: */
|
899
|
+
retry = false;
|
900
|
+
}
|
901
|
+
last_gen = gen;
|
902
|
+
|
903
|
+
TRY
|
904
|
+
fsf->generation = gen;
|
905
|
+
run(store, fsf);
|
906
|
+
RETURN_EARLY();
|
907
|
+
return;
|
908
|
+
case IO_ERROR: case FILE_NOT_FOUND_ERROR:
|
909
|
+
HANDLED();
|
910
|
+
/* Save the original root cause: */
|
911
|
+
/* TODO:LOG "primary Exception on '" + segmentFileName + "': " +
|
912
|
+
* err + "'; will retry: retry=" + retry + "; gen = " + gen */
|
913
|
+
|
914
|
+
if (!retry && gen > 1) {
|
915
|
+
/* This is our first time trying this segments file (because
|
916
|
+
* retry is false), and, there is possibly a segments_(N-1)
|
917
|
+
* (because gen > 1). So, check if the segments_(N-1) exists
|
918
|
+
* and try it if so: */
|
919
|
+
char prev_seg_file_name[SEGMENT_NAME_MAX_LENGTH];
|
920
|
+
segfn_for_generation(prev_seg_file_name, gen - 1);
|
921
|
+
if (store->exists(store, prev_seg_file_name)) {
|
922
|
+
/* TODO:LOG "fallback to prior segment file '" +
|
923
|
+
* prevSegmentFileName + "'" */
|
924
|
+
TRY
|
925
|
+
fsf->generation = gen - 1;
|
926
|
+
run(store, fsf);
|
927
|
+
/* TODO:LOG "success on fallback " +
|
928
|
+
* prev_seg_file_name */
|
929
|
+
|
930
|
+
/* pop two contexts as we are in nested try blocks */
|
931
|
+
RETURN_EARLY();
|
932
|
+
RETURN_EARLY();
|
933
|
+
return;
|
934
|
+
case IO_ERROR: case FILE_NOT_FOUND_ERROR:
|
935
|
+
HANDLED();
|
936
|
+
/* TODO:LOG "secondary Exception on '" +
|
937
|
+
* prev_seg_file_name + "': " + err2 + "'; will retry"*/
|
938
|
+
XENDTRY
|
939
|
+
}
|
940
|
+
}
|
941
|
+
XENDTRY
|
522
942
|
}
|
523
|
-
i--;
|
524
|
-
file_name[i] = '_';
|
525
|
-
return estrdup(&file_name[i]);
|
526
943
|
}
|
527
944
|
|
528
|
-
SegmentInfos *sis_new()
|
945
|
+
SegmentInfos *sis_new(FieldInfos *fis)
|
529
946
|
{
|
530
|
-
SegmentInfos *sis =
|
947
|
+
SegmentInfos *sis = ALLOC_AND_ZERO(SegmentInfos);
|
948
|
+
REF(fis);
|
949
|
+
sis->fis = fis;
|
531
950
|
sis->format = FORMAT;
|
532
951
|
sis->version = (f_u64)time(NULL);
|
533
952
|
sis->size = 0;
|
534
953
|
sis->counter = 0;
|
954
|
+
sis->generation = -1;
|
535
955
|
sis->capa = 4;
|
536
956
|
sis->segs = ALLOC_N(SegmentInfo *, sis->capa);
|
537
957
|
return sis;
|
@@ -539,8 +959,7 @@ SegmentInfos *sis_new()
|
|
539
959
|
|
540
960
|
SegmentInfo *sis_new_segment(SegmentInfos *sis, int doc_cnt, Store *store)
|
541
961
|
{
|
542
|
-
return sis_add_si(sis, si_new(new_segment(sis->counter++), doc_cnt,
|
543
|
-
store));
|
962
|
+
return sis_add_si(sis, si_new(new_segment(sis->counter++), doc_cnt, store));
|
544
963
|
}
|
545
964
|
|
546
965
|
void sis_destroy(SegmentInfos *sis)
|
@@ -548,8 +967,9 @@ void sis_destroy(SegmentInfos *sis)
|
|
548
967
|
int i;
|
549
968
|
const int sis_size = sis->size;
|
550
969
|
for (i = 0; i < sis_size; i++) {
|
551
|
-
|
970
|
+
si_deref(sis->segs[i]);
|
552
971
|
}
|
972
|
+
if (sis->fis) fis_deref(sis->fis);
|
553
973
|
free(sis->segs);
|
554
974
|
free(sis);
|
555
975
|
}
|
@@ -557,11 +977,10 @@ void sis_destroy(SegmentInfos *sis)
|
|
557
977
|
SegmentInfo *sis_add_si(SegmentInfos *sis, SegmentInfo *si)
|
558
978
|
{
|
559
979
|
if (sis->size >= sis->capa) {
|
560
|
-
sis->capa
|
980
|
+
sis->capa <<= 1;
|
561
981
|
REALLOC_N(sis->segs, SegmentInfo *, sis->capa);
|
562
982
|
}
|
563
|
-
sis->segs[sis->size] = si;
|
564
|
-
sis->size++;
|
983
|
+
sis->segs[sis->size++] = si;
|
565
984
|
return si;
|
566
985
|
}
|
567
986
|
|
@@ -569,7 +988,7 @@ void sis_del_at(SegmentInfos *sis, int at)
|
|
569
988
|
{
|
570
989
|
int i;
|
571
990
|
const int sis_size = --(sis->size);
|
572
|
-
|
991
|
+
si_deref(sis->segs[at]);
|
573
992
|
for (i = at; i < sis_size; i++) {
|
574
993
|
sis->segs[i] = sis->segs[i+1];
|
575
994
|
}
|
@@ -580,7 +999,7 @@ void sis_del_from_to(SegmentInfos *sis, int from, int to)
|
|
580
999
|
int i, num_to_del = to - from;
|
581
1000
|
const int sis_size = sis->size -= num_to_del;
|
582
1001
|
for (i = from; i < to; i++) {
|
583
|
-
|
1002
|
+
si_deref(sis->segs[i]);
|
584
1003
|
}
|
585
1004
|
for (i = from; i < sis_size; i++) {
|
586
1005
|
sis->segs[i] = sis->segs[i+num_to_del];
|
@@ -592,74 +1011,106 @@ void sis_clear(SegmentInfos *sis)
|
|
592
1011
|
int i;
|
593
1012
|
const int sis_size = sis->size;
|
594
1013
|
for (i = 0; i < sis_size; i++) {
|
595
|
-
|
1014
|
+
si_deref(sis->segs[i]);
|
596
1015
|
}
|
597
1016
|
sis->size = 0;
|
598
1017
|
}
|
599
1018
|
|
600
|
-
|
1019
|
+
void sis_read_i(Store *store, FindSegmentsFile *fsf)
|
601
1020
|
{
|
602
|
-
int doc_cnt;
|
603
1021
|
int seg_cnt;
|
604
1022
|
int i;
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
sis
|
1023
|
+
bool success = false;
|
1024
|
+
char seg_file_name[SEGMENT_NAME_MAX_LENGTH];
|
1025
|
+
InStream *is = NULL;
|
1026
|
+
SegmentInfos *sis = ALLOC_AND_ZERO(SegmentInfos);
|
1027
|
+
segfn_for_generation(seg_file_name, fsf->generation);
|
1028
|
+
TRY
|
1029
|
+
is = store->open_input(store, seg_file_name);
|
1030
|
+
sis->store = store;
|
609
1031
|
|
610
|
-
|
611
|
-
|
612
|
-
|
613
|
-
|
1032
|
+
sis->generation = fsf->generation;
|
1033
|
+
sis->format = is_read_u32(is); /* do nothing. it's the first version */
|
1034
|
+
sis->version = is_read_u64(is);
|
1035
|
+
sis->counter = is_read_u64(is);
|
1036
|
+
seg_cnt = is_read_vint(is);
|
614
1037
|
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
|
619
|
-
|
1038
|
+
/* allocate space for segments */
|
1039
|
+
for (sis->capa = 4; sis->capa < seg_cnt; sis->capa <<= 1) {
|
1040
|
+
}
|
1041
|
+
sis->size = 0;
|
1042
|
+
sis->segs = ALLOC_N(SegmentInfo *, sis->capa);
|
620
1043
|
|
621
|
-
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
|
626
|
-
|
1044
|
+
for (i = 0; i < seg_cnt; i++) {
|
1045
|
+
sis_add_si(sis, si_read(store, is));
|
1046
|
+
}
|
1047
|
+
sis->fis = fis_read(is);
|
1048
|
+
success = true;
|
1049
|
+
XFINALLY
|
1050
|
+
if (is) is_close(is);
|
1051
|
+
if (!success) {
|
1052
|
+
sis_destroy(sis);
|
1053
|
+
}
|
1054
|
+
XENDTRY
|
1055
|
+
fsf->p_return = sis;
|
1056
|
+
}
|
627
1057
|
|
628
|
-
|
1058
|
+
SegmentInfos *sis_read(Store *store)
|
1059
|
+
{
|
1060
|
+
FindSegmentsFile fsf;
|
1061
|
+
sis_find_segments_file(store, &fsf, &sis_read_i);
|
1062
|
+
return fsf.p_return;
|
629
1063
|
}
|
630
1064
|
|
631
|
-
void sis_write(SegmentInfos *sis, Store *store)
|
1065
|
+
void sis_write(SegmentInfos *sis, Store *store, Deleter *deleter)
|
632
1066
|
{
|
633
1067
|
int i;
|
634
|
-
|
635
|
-
OutStream *os = store->new_output(store, TEMPORARY_SEGMENTS_FILENAME);
|
1068
|
+
OutStream *os = NULL;
|
636
1069
|
const int sis_size = sis->size;
|
1070
|
+
char buf[SEGMENT_NAME_MAX_LENGTH];
|
1071
|
+
sis->generation++;
|
637
1072
|
|
638
|
-
|
639
|
-
|
640
|
-
|
641
|
-
|
642
|
-
|
643
|
-
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
|
1073
|
+
TRY
|
1074
|
+
os = store->new_output(store,
|
1075
|
+
segfn_for_generation(buf, sis->generation));
|
1076
|
+
os_write_u32(os, FORMAT);
|
1077
|
+
os_write_u64(os, ++(sis->version)); /* every write changes the index */
|
1078
|
+
os_write_u64(os, sis->counter);
|
1079
|
+
os_write_vint(os, sis->size);
|
1080
|
+
for (i = 0; i < sis_size; i++) {
|
1081
|
+
si_write(sis->segs[i], os);
|
1082
|
+
}
|
1083
|
+
fis_write(sis->fis, os);
|
1084
|
+
XFINALLY
|
1085
|
+
os_close(os);
|
1086
|
+
XENDTRY
|
1087
|
+
|
1088
|
+
TRY
|
1089
|
+
os = store->new_output(store, SEGMENTS_GEN_FILE_NAME);
|
1090
|
+
os_write_u64(os, sis->generation);
|
1091
|
+
os_write_u64(os, sis->generation);
|
1092
|
+
XFINALLY
|
1093
|
+
/* It's OK if we fail to write this file since it's
|
1094
|
+
* used only as one of the retry fallbacks. */
|
1095
|
+
HANDLED();
|
1096
|
+
os_close(os);
|
1097
|
+
XENDTRY
|
648
1098
|
|
649
|
-
|
650
|
-
|
1099
|
+
if (deleter && sis->generation > 0) {
|
1100
|
+
deleter_delete_file(deleter,
|
1101
|
+
segfn_for_generation(buf, sis->generation - 1));
|
1102
|
+
}
|
651
1103
|
}
|
652
1104
|
|
653
|
-
|
1105
|
+
void sis_read_ver_i(Store *store, FindSegmentsFile *fsf)
|
654
1106
|
{
|
655
1107
|
InStream *is;
|
656
1108
|
f_u32 format = 0;
|
657
1109
|
f_u64 version = 0;
|
1110
|
+
char seg_file_name[SEGMENT_NAME_MAX_LENGTH];
|
658
1111
|
|
659
|
-
|
660
|
-
|
661
|
-
}
|
662
|
-
is = store->open_input(store, SEGMENTS_FILENAME);
|
1112
|
+
segfn_for_generation(seg_file_name, (f_u64)fsf->generation);
|
1113
|
+
is = store->open_input(store, seg_file_name);
|
663
1114
|
|
664
1115
|
TRY
|
665
1116
|
format = is_read_u32(is);
|
@@ -668,7 +1119,14 @@ f_u64 sis_read_current_version(Store *store)
|
|
668
1119
|
is_close(is);
|
669
1120
|
XENDTRY
|
670
1121
|
|
671
|
-
|
1122
|
+
fsf->u64_return = version;
|
1123
|
+
}
|
1124
|
+
|
1125
|
+
f_u64 sis_read_current_version(Store *store)
|
1126
|
+
{
|
1127
|
+
FindSegmentsFile fsf;
|
1128
|
+
sis_find_segments_file(store, &fsf, &sis_read_ver_i);
|
1129
|
+
return fsf.u64_return;
|
672
1130
|
}
|
673
1131
|
|
674
1132
|
/****************************************************************************
|
@@ -704,7 +1162,7 @@ char *lazy_df_get_data(LazyDocField *self, int i)
|
|
704
1162
|
char *text = NULL;
|
705
1163
|
if (i < self->size && i >= 0) {
|
706
1164
|
text = self->data[i].text;
|
707
|
-
if (
|
1165
|
+
if (NULL == text) {
|
708
1166
|
const int read_len = self->data[i].length + 1;
|
709
1167
|
self->data[i].text = text = ALLOC_N(char, read_len);
|
710
1168
|
is_seek(self->doc->fields_in, self->data[i].start);
|
@@ -1220,8 +1678,8 @@ char *te_skip_to(TermEnum *te, const char *term)
|
|
1220
1678
|
{
|
1221
1679
|
char *curr_term = te->curr_term;
|
1222
1680
|
if (strcmp(curr_term, term) < 0) {
|
1223
|
-
while (((curr_term = te->next(te))
|
1224
|
-
(strcmp(curr_term, term) < 0)) {
|
1681
|
+
while (NULL != ((curr_term = te->next(te)))
|
1682
|
+
&& (strcmp(curr_term, term) < 0)) {
|
1225
1683
|
}
|
1226
1684
|
}
|
1227
1685
|
return curr_term;
|
@@ -1258,7 +1716,7 @@ static void sti_destroy(SegmentTermIndex *sti)
|
|
1258
1716
|
static void sti_ensure_index_is_read(SegmentTermIndex *sti,
|
1259
1717
|
TermEnum *index_te)
|
1260
1718
|
{
|
1261
|
-
if (sti->index_terms
|
1719
|
+
if (NULL == sti->index_terms) {
|
1262
1720
|
int i;
|
1263
1721
|
int index_size = sti->index_size;
|
1264
1722
|
off_t index_ptr = 0;
|
@@ -1314,7 +1772,7 @@ static int sti_get_index_offset(SegmentTermIndex *sti, const char *term)
|
|
1314
1772
|
****************************************************************************/
|
1315
1773
|
|
1316
1774
|
#define SFI_ENSURE_INDEX_IS_READ(sfi, sti) do {\
|
1317
|
-
if (sti->index_terms
|
1775
|
+
if (NULL == sti->index_terms) {\
|
1318
1776
|
mutex_lock(&sfi->mutex);\
|
1319
1777
|
sti_ensure_index_is_read(sti, sfi->index_te);\
|
1320
1778
|
mutex_unlock(&sfi->mutex);\
|
@@ -1351,7 +1809,7 @@ SegmentFieldIndex *sfi_open(Store *store, const char *segment)
|
|
1351
1809
|
|
1352
1810
|
sprintf(file_name, "%s.tix", segment);
|
1353
1811
|
is = store->open_input(store, file_name);
|
1354
|
-
sfi->index_te = ste_new(is,
|
1812
|
+
sfi->index_te = ste_new(is, sfi);
|
1355
1813
|
return sfi;
|
1356
1814
|
}
|
1357
1815
|
|
@@ -1394,8 +1852,8 @@ static char *ste_next(TermEnum *te)
|
|
1394
1852
|
|
1395
1853
|
ti = &(te->curr_ti);
|
1396
1854
|
ti->doc_freq = is_read_vint(is); /* read doc freq */
|
1397
|
-
ti->frq_ptr += is_read_voff_t(is)
|
1398
|
-
ti->prx_ptr += is_read_voff_t(is)
|
1855
|
+
ti->frq_ptr += is_read_voff_t(is); /* read freq ptr */
|
1856
|
+
ti->prx_ptr += is_read_voff_t(is); /* read prox ptr */
|
1399
1857
|
if (ti->doc_freq >= STE(te)->skip_interval) {
|
1400
1858
|
ti->skip_offset = is_read_voff_t(is);
|
1401
1859
|
}
|
@@ -1497,7 +1955,7 @@ static TermInfo *ste_scan_for_term_info(SegmentTermEnum *ste, const char *term)
|
|
1497
1955
|
{
|
1498
1956
|
ste_scan_to(ste, term);
|
1499
1957
|
|
1500
|
-
if (strcmp(TE(ste)->curr_term, term)
|
1958
|
+
if (0 == strcmp(TE(ste)->curr_term, term)) {
|
1501
1959
|
return te_get_ti((TermEnum *)ste);
|
1502
1960
|
}
|
1503
1961
|
else {
|
@@ -1521,7 +1979,7 @@ static char *ste_get_term(TermEnum *te, int pos)
|
|
1521
1979
|
ste_index_seek(te, sti, pos / idx_int);
|
1522
1980
|
}
|
1523
1981
|
while (ste->pos < pos) {
|
1524
|
-
if (ste_next(te)
|
1982
|
+
if (NULL == ste_next(te)) {
|
1525
1983
|
return NULL;
|
1526
1984
|
}
|
1527
1985
|
}
|
@@ -1575,7 +2033,7 @@ typedef struct MultiTermEnum
|
|
1575
2033
|
static bool tew_lt(const TermEnumWrapper *tew1, const TermEnumWrapper *tew2)
|
1576
2034
|
{
|
1577
2035
|
int cmpres = strcmp(tew1->term, tew2->term);
|
1578
|
-
if (
|
2036
|
+
if (0 == cmpres) {
|
1579
2037
|
return tew1->index < tew2->index;
|
1580
2038
|
}
|
1581
2039
|
else {
|
@@ -1637,7 +2095,7 @@ static char *mte_next(TermEnum *te)
|
|
1637
2095
|
TermEnumWrapper *top =
|
1638
2096
|
(TermEnumWrapper *)pq_top(MTE(te)->tew_queue);
|
1639
2097
|
|
1640
|
-
if (
|
2098
|
+
if (NULL == top) {
|
1641
2099
|
te->curr_term[0] = '\0';
|
1642
2100
|
te->curr_term_len = 0;
|
1643
2101
|
return false;
|
@@ -1650,7 +2108,7 @@ static char *mte_next(TermEnum *te)
|
|
1650
2108
|
te->curr_ti.doc_freq = 0;
|
1651
2109
|
|
1652
2110
|
MTE(te)->ti_cnt = 0;
|
1653
|
-
while ((
|
2111
|
+
while ((NULL != top) && (0 == strcmp(te->curr_term, top->term))) {
|
1654
2112
|
pq_pop(MTE(te)->tew_queue);
|
1655
2113
|
te->curr_ti.doc_freq += top->te->curr_ti.doc_freq;/* increment freq */
|
1656
2114
|
MTE(te)->ti_indexes[MTE(te)->ti_cnt] = top->index;
|
@@ -1752,7 +2210,7 @@ TermEnum *mte_new(MultiReader *mr, int field_num, const char *term)
|
|
1752
2210
|
if (fnum >= 0) {
|
1753
2211
|
TermEnumWrapper *tew;
|
1754
2212
|
|
1755
|
-
if (
|
2213
|
+
if (NULL != term) {
|
1756
2214
|
sub_te = reader->terms_from(reader, fnum, term);
|
1757
2215
|
}
|
1758
2216
|
else {
|
@@ -1760,7 +2218,7 @@ TermEnum *mte_new(MultiReader *mr, int field_num, const char *term)
|
|
1760
2218
|
}
|
1761
2219
|
|
1762
2220
|
tew = tew_setup(&(mte->tews[i]), i, sub_te, reader);
|
1763
|
-
if (((
|
2221
|
+
if (((NULL == term) && tew_next(tew))
|
1764
2222
|
|| (tew->term && (tew->term[0] != '\0'))) {
|
1765
2223
|
pq_push(mte->tew_queue, tew); /* initialize queue */
|
1766
2224
|
}
|
@@ -1772,7 +2230,7 @@ TermEnum *mte_new(MultiReader *mr, int field_num, const char *term)
|
|
1772
2230
|
}
|
1773
2231
|
}
|
1774
2232
|
|
1775
|
-
if ((
|
2233
|
+
if ((NULL != term) && (0 < mte->tew_queue->size)) {
|
1776
2234
|
mte_next(TE(mte));
|
1777
2235
|
}
|
1778
2236
|
|
@@ -1804,7 +2262,7 @@ TermInfosReader *tir_open(Store *store,
|
|
1804
2262
|
static __inline TermEnum *tir_enum(TermInfosReader *tir)
|
1805
2263
|
{
|
1806
2264
|
TermEnum *te;
|
1807
|
-
if ((te = thread_getspecific(tir->thread_te))
|
2265
|
+
if (NULL == (te = thread_getspecific(tir->thread_te))) {
|
1808
2266
|
te = ste_clone(tir->orig_te);
|
1809
2267
|
ste_set_field(te, tir->field_num);
|
1810
2268
|
ary_push(tir->te_bucket, te);
|
@@ -1827,8 +2285,8 @@ TermInfo *tir_get_ti(TermInfosReader *tir, const char *term)
|
|
1827
2285
|
TermEnum *te = tir_enum(tir);
|
1828
2286
|
char *match;
|
1829
2287
|
|
1830
|
-
if ((match = ste_scan_to(te, term))
|
1831
|
-
strcmp(match, term)
|
2288
|
+
if (NULL != (match = ste_scan_to(te, term))
|
2289
|
+
&& 0 == strcmp(match, term)) {
|
1832
2290
|
return &(te->curr_ti);
|
1833
2291
|
}
|
1834
2292
|
return NULL;
|
@@ -1845,8 +2303,8 @@ TermInfo *tir_get_ti_field(TermInfosReader *tir, int field_num,
|
|
1845
2303
|
tir->field_num = field_num;
|
1846
2304
|
}
|
1847
2305
|
|
1848
|
-
if ((match = ste_scan_to(te, term))
|
1849
|
-
strcmp(match, term)
|
2306
|
+
if (NULL != (match = ste_scan_to(te, term))
|
2307
|
+
&& 0 == strcmp(match, term)) {
|
1850
2308
|
return &(te->curr_ti);
|
1851
2309
|
}
|
1852
2310
|
return NULL;
|
@@ -1937,7 +2395,7 @@ static __inline void tw_write_term(TermWriter *tw,
|
|
1937
2395
|
|
1938
2396
|
os_write_vint(os, start); /* write shared prefix length */
|
1939
2397
|
os_write_vint(os, length); /* write delta length */
|
1940
|
-
os_write_bytes(os, (uchar *)(term + start), length);
|
2398
|
+
os_write_bytes(os, (uchar *)(term + start), length); /* write delta chars */
|
1941
2399
|
|
1942
2400
|
tw->last_term = term;
|
1943
2401
|
}
|
@@ -1945,13 +2403,15 @@ static __inline void tw_write_term(TermWriter *tw,
|
|
1945
2403
|
static void tw_add(TermWriter *tw,
|
1946
2404
|
const char *term,
|
1947
2405
|
int term_len,
|
1948
|
-
TermInfo *ti
|
2406
|
+
TermInfo *ti,
|
2407
|
+
int skip_interval)
|
1949
2408
|
{
|
1950
2409
|
OutStream *os = tw->os;
|
1951
2410
|
|
1952
2411
|
#ifdef DEBUG
|
1953
2412
|
if (strcmp(tw->last_term, term) > 0) {
|
1954
|
-
RAISE(STATE_ERROR, "\"%s\" > \"%s\" %d > %d",
|
2413
|
+
RAISE(STATE_ERROR, "\"%s\" > \"%s\" %d > %d",
|
2414
|
+
tw->last_term, term, *tw->last_term, *term);
|
1955
2415
|
}
|
1956
2416
|
if (ti->frq_ptr < tw->last_term_info.frq_ptr) {
|
1957
2417
|
RAISE(STATE_ERROR, "%"F_OFF_T_PFX"d > %"F_OFF_T_PFX"d", ti->frq_ptr,
|
@@ -1967,6 +2427,9 @@ static void tw_add(TermWriter *tw,
|
|
1967
2427
|
os_write_vint(os, ti->doc_freq); /* write doc freq */
|
1968
2428
|
os_write_voff_t(os, ti->frq_ptr - tw->last_term_info.frq_ptr);
|
1969
2429
|
os_write_voff_t(os, ti->prx_ptr - tw->last_term_info.prx_ptr);
|
2430
|
+
if (ti->doc_freq >= skip_interval) {
|
2431
|
+
os_write_voff_t(os, ti->skip_offset);
|
2432
|
+
}
|
1970
2433
|
|
1971
2434
|
tw->last_term_info = *ti;
|
1972
2435
|
tw->counter++;
|
@@ -1983,22 +2446,19 @@ void tiw_add(TermInfosWriter *tiw,
|
|
1983
2446
|
printf("%s:%d:%d:%d:%d\n", term, term_len, ti->doc_freq,
|
1984
2447
|
ti->frq_ptr, ti->prx_ptr);
|
1985
2448
|
*/
|
1986
|
-
if ((tiw->tis_writer->counter % tiw->index_interval)
|
2449
|
+
if (0 == (tiw->tis_writer->counter % tiw->index_interval)) {
|
1987
2450
|
/* add an index term */
|
1988
2451
|
tw_add(tiw->tix_writer,
|
1989
2452
|
tiw->tis_writer->last_term,
|
1990
2453
|
strlen(tiw->tis_writer->last_term),
|
1991
|
-
&(tiw->tis_writer->last_term_info)
|
2454
|
+
&(tiw->tis_writer->last_term_info),
|
2455
|
+
tiw->skip_interval);
|
1992
2456
|
tis_pos = os_pos(tiw->tis_writer->os);
|
1993
2457
|
os_write_voff_t(tiw->tix_writer->os, tis_pos - tiw->last_index_ptr);
|
1994
2458
|
tiw->last_index_ptr = tis_pos; /* write ptr */
|
1995
2459
|
}
|
1996
2460
|
|
1997
|
-
tw_add(tiw->tis_writer, term, term_len, ti);
|
1998
|
-
|
1999
|
-
if (ti->doc_freq >= tiw->skip_interval) {
|
2000
|
-
os_write_voff_t(tiw->tis_writer->os, ti->skip_offset);
|
2001
|
-
}
|
2461
|
+
tw_add(tiw->tis_writer, term, term_len, ti, tiw->skip_interval);
|
2002
2462
|
}
|
2003
2463
|
|
2004
2464
|
static __inline void tw_reset(TermWriter *tw)
|
@@ -2051,7 +2511,7 @@ void tiw_close(TermInfosWriter *tiw)
|
|
2051
2511
|
#define TDE(stde) ((TermDocEnum *)(stde))
|
2052
2512
|
|
2053
2513
|
#define CHECK_STATE(method) do {\
|
2054
|
-
if (STDE(tde)->count
|
2514
|
+
if (0 == STDE(tde)->count) {\
|
2055
2515
|
RAISE(STATE_ERROR, "Illegal state of TermDocEnum. You must call #next "\
|
2056
2516
|
"before you call #"method);\
|
2057
2517
|
}\
|
@@ -2059,7 +2519,7 @@ void tiw_close(TermInfosWriter *tiw)
|
|
2059
2519
|
|
2060
2520
|
static void stde_seek_ti(SegmentTermDocEnum *stde, TermInfo *ti)
|
2061
2521
|
{
|
2062
|
-
if (
|
2522
|
+
if (NULL == ti) {
|
2063
2523
|
stde->doc_freq = 0;
|
2064
2524
|
}
|
2065
2525
|
else {
|
@@ -2117,7 +2577,7 @@ static bool stde_next(TermDocEnum *tde)
|
|
2117
2577
|
|
2118
2578
|
doc_code = is_read_vint(stde->frq_in);
|
2119
2579
|
stde->doc_num += doc_code >> 1; /* shift off low bit */
|
2120
|
-
if ((doc_code & 1)
|
2580
|
+
if (0 != (doc_code & 1)) { /* if low bit is set */
|
2121
2581
|
stde->freq = 1; /* freq is one */
|
2122
2582
|
}
|
2123
2583
|
else {
|
@@ -2126,8 +2586,8 @@ static bool stde_next(TermDocEnum *tde)
|
|
2126
2586
|
|
2127
2587
|
stde->count++;
|
2128
2588
|
|
2129
|
-
if (stde->deleted_docs
|
2130
|
-
bv_get(stde->deleted_docs, stde->doc_num)
|
2589
|
+
if (NULL == stde->deleted_docs
|
2590
|
+
|| 0 == bv_get(stde->deleted_docs, stde->doc_num)) {
|
2131
2591
|
break; /* We found an undeleted doc so return */
|
2132
2592
|
}
|
2133
2593
|
|
@@ -2146,7 +2606,7 @@ static int stde_read(TermDocEnum *tde, int *docs, int *freqs, int req_num)
|
|
2146
2606
|
/* manually inlined call to next() for speed */
|
2147
2607
|
doc_code = is_read_vint(stde->frq_in);
|
2148
2608
|
stde->doc_num += (doc_code >> 1); /* shift off low bit */
|
2149
|
-
if ((doc_code & 1)
|
2609
|
+
if (0 != (doc_code & 1)) { /* if low bit is set */
|
2150
2610
|
stde->freq = 1; /* freq is one */
|
2151
2611
|
}
|
2152
2612
|
else {
|
@@ -2155,8 +2615,8 @@ static int stde_read(TermDocEnum *tde, int *docs, int *freqs, int req_num)
|
|
2155
2615
|
|
2156
2616
|
stde->count++;
|
2157
2617
|
|
2158
|
-
if (stde->deleted_docs
|
2159
|
-
bv_get(stde->deleted_docs, stde->doc_num)
|
2618
|
+
if (NULL == stde->deleted_docs
|
2619
|
+
|| 0 == bv_get(stde->deleted_docs, stde->doc_num)) {
|
2160
2620
|
docs[i] = stde->doc_num;
|
2161
2621
|
freqs[i] = stde->freq;
|
2162
2622
|
i++;
|
@@ -2169,16 +2629,18 @@ static bool stde_skip_to(TermDocEnum *tde, int target_doc_num)
|
|
2169
2629
|
{
|
2170
2630
|
SegmentTermDocEnum *stde = STDE(tde);
|
2171
2631
|
|
2172
|
-
if (stde->doc_freq >= stde->skip_interval
|
2632
|
+
if (stde->doc_freq >= stde->skip_interval
|
2633
|
+
&& target_doc_num > stde->doc_num) { /* optimized case */
|
2173
2634
|
int last_skip_doc;
|
2174
|
-
|
2175
|
-
|
2635
|
+
off_t last_frq_ptr;
|
2636
|
+
off_t last_prx_ptr;
|
2176
2637
|
int num_skipped;
|
2177
2638
|
|
2178
|
-
if (stde->skip_in
|
2179
|
-
stde->skip_in = is_clone(stde->frq_in)
|
2639
|
+
if (NULL == stde->skip_in) {
|
2640
|
+
stde->skip_in = is_clone(stde->frq_in);/* lazily clone */
|
2180
2641
|
}
|
2181
2642
|
|
2643
|
+
//printf("skip_ptr = %lld\n", stde->skip_ptr);
|
2182
2644
|
if (!stde->have_skipped) { /* lazily seek skip stream */
|
2183
2645
|
is_seek(stde->skip_in, stde->skip_ptr);
|
2184
2646
|
stde->have_skipped = true;
|
@@ -2189,13 +2651,14 @@ static bool stde_skip_to(TermDocEnum *tde, int target_doc_num)
|
|
2189
2651
|
last_frq_ptr = is_pos(stde->frq_in);
|
2190
2652
|
last_prx_ptr = -1;
|
2191
2653
|
num_skipped = -1 - (stde->count % stde->skip_interval);
|
2654
|
+
//printf("%d, %d, %d, %d\n", last_skip_doc, last_frq_ptr, last_prx_ptr, num_skipped);
|
2192
2655
|
|
2193
2656
|
while (target_doc_num > stde->skip_doc) {
|
2194
2657
|
last_skip_doc = stde->skip_doc;
|
2195
2658
|
last_frq_ptr = stde->frq_ptr;
|
2196
2659
|
last_prx_ptr = stde->prx_ptr;
|
2197
2660
|
|
2198
|
-
if (stde->skip_doc
|
2661
|
+
if (0 != stde->skip_doc && stde->skip_doc >= stde->doc_num) {
|
2199
2662
|
num_skipped += stde->skip_interval;
|
2200
2663
|
}
|
2201
2664
|
|
@@ -2204,13 +2667,14 @@ static bool stde_skip_to(TermDocEnum *tde, int target_doc_num)
|
|
2204
2667
|
}
|
2205
2668
|
|
2206
2669
|
stde->skip_doc += is_read_vint(stde->skip_in);
|
2207
|
-
stde->frq_ptr
|
2208
|
-
stde->prx_ptr
|
2670
|
+
stde->frq_ptr += is_read_vint(stde->skip_in);
|
2671
|
+
stde->prx_ptr += is_read_vint(stde->skip_in);
|
2672
|
+
//printf("inner-> skip_doc:%d, frq_ptr:%d, prx_ptr:%d\n", stde->skip_doc, stde->frq_ptr, stde->prx_ptr);
|
2209
2673
|
|
2210
2674
|
stde->skip_count++;
|
2211
2675
|
}
|
2212
2676
|
|
2213
|
-
/* if we found something to skip,
|
2677
|
+
/* if we found something to skip, skip it */
|
2214
2678
|
if (last_frq_ptr > is_pos(stde->frq_in)) {
|
2215
2679
|
is_seek(stde->frq_in, last_frq_ptr);
|
2216
2680
|
stde->seek_prox(stde, last_prx_ptr);
|
@@ -2233,7 +2697,7 @@ static void stde_close(TermDocEnum *tde)
|
|
2233
2697
|
{
|
2234
2698
|
is_close(STDE(tde)->frq_in);
|
2235
2699
|
|
2236
|
-
if (STDE(tde)->skip_in
|
2700
|
+
if (NULL != STDE(tde)->skip_in) {
|
2237
2701
|
is_close(STDE(tde)->skip_in);
|
2238
2702
|
}
|
2239
2703
|
|
@@ -2245,7 +2709,7 @@ static void stde_skip_prox(SegmentTermDocEnum *stde)
|
|
2245
2709
|
(void)stde;
|
2246
2710
|
}
|
2247
2711
|
|
2248
|
-
static void stde_seek_prox(SegmentTermDocEnum *stde,
|
2712
|
+
static void stde_seek_prox(SegmentTermDocEnum *stde, off_t prx_ptr)
|
2249
2713
|
{
|
2250
2714
|
(void)stde;
|
2251
2715
|
(void)prx_ptr;
|
@@ -2290,7 +2754,7 @@ TermDocEnum *stde_new(TermInfosReader *tir,
|
|
2290
2754
|
|
2291
2755
|
static void stpe_seek_ti(SegmentTermDocEnum *stde, TermInfo *ti)
|
2292
2756
|
{
|
2293
|
-
if (
|
2757
|
+
if (NULL == ti) {
|
2294
2758
|
stde->doc_freq = 0;
|
2295
2759
|
}
|
2296
2760
|
else {
|
@@ -2351,7 +2815,7 @@ static void stpe_skip_prox(SegmentTermDocEnum *stde)
|
|
2351
2815
|
is_skip_vints(stde->prx_in, stde->freq);
|
2352
2816
|
}
|
2353
2817
|
|
2354
|
-
static void stpe_seek_prox(SegmentTermDocEnum *stde,
|
2818
|
+
static void stpe_seek_prox(SegmentTermDocEnum *stde, off_t prx_ptr)
|
2355
2819
|
{
|
2356
2820
|
is_seek(stde->prx_in, prx_ptr);
|
2357
2821
|
stde->prx_cnt = 0;
|
@@ -2422,7 +2886,7 @@ static TermDocEnum *mtde_next_tde(MultiTermDocEnum *mtde)
|
|
2422
2886
|
}
|
2423
2887
|
|
2424
2888
|
#define CHECK_CURR_TDE(method) do {\
|
2425
|
-
if (MTDE(tde)->curr_tde
|
2889
|
+
if (NULL == MTDE(tde)->curr_tde) {\
|
2426
2890
|
RAISE(STATE_ERROR, "Illegal state of TermDocEnum. You must call #next "\
|
2427
2891
|
"before you call #"method);\
|
2428
2892
|
}\
|
@@ -2456,7 +2920,7 @@ static void mtde_seek(TermDocEnum *tde, int field_num, const char *term)
|
|
2456
2920
|
TermEnum *te = mtde->te;
|
2457
2921
|
char *t;
|
2458
2922
|
te->set_field(te, field_num);
|
2459
|
-
if ((t = te->skip_to(te, term))
|
2923
|
+
if (NULL != (t = te->skip_to(te, term)) && 0 == strcmp(term, t)) {
|
2460
2924
|
mtde_seek_te(tde, te);
|
2461
2925
|
} else {
|
2462
2926
|
memset(mtde->state, 0, mtde->ir_cnt);
|
@@ -2478,7 +2942,7 @@ static int mtde_freq(TermDocEnum *tde)
|
|
2478
2942
|
static bool mtde_next(TermDocEnum *tde)
|
2479
2943
|
{
|
2480
2944
|
MultiTermDocEnum *mtde = MTDE(tde);
|
2481
|
-
if (mtde->curr_tde
|
2945
|
+
if (NULL != mtde->curr_tde && mtde->curr_tde->next(mtde->curr_tde)) {
|
2482
2946
|
return true;
|
2483
2947
|
}
|
2484
2948
|
else if (mtde_next_tde(mtde)) {
|
@@ -2494,7 +2958,7 @@ static int mtde_read(TermDocEnum *tde, int *docs, int *freqs, int req_num)
|
|
2494
2958
|
int i, end = 0, last_end = 0, b;
|
2495
2959
|
MultiTermDocEnum *mtde = MTDE(tde);
|
2496
2960
|
while (true) {
|
2497
|
-
if (mtde->curr_tde
|
2961
|
+
if (NULL == mtde->curr_tde) return end;
|
2498
2962
|
end += mtde->curr_tde->read(mtde->curr_tde, docs + last_end,
|
2499
2963
|
freqs + last_end, req_num - last_end);
|
2500
2964
|
if (end == last_end) { /* none left in segment */
|
@@ -2527,13 +2991,7 @@ static bool mtde_skip_to(TermDocEnum *tde, int target_doc_num)
|
|
2527
2991
|
|
2528
2992
|
mtde_next_tde(mtde);
|
2529
2993
|
}
|
2530
|
-
|
2531
|
-
if (curr_tde) {
|
2532
|
-
return curr_tde->skip_to(curr_tde, target_doc_num - mtde->base);
|
2533
|
-
}
|
2534
|
-
else {
|
2535
|
-
return false;
|
2536
|
-
}
|
2994
|
+
return false;
|
2537
2995
|
}
|
2538
2996
|
|
2539
2997
|
static void mtde_close(TermDocEnum *tde)
|
@@ -2660,7 +3118,7 @@ static bool mtdpe_next(TermDocEnum *tde)
|
|
2660
3118
|
int doc;
|
2661
3119
|
MultipleTermDocPosEnum *mtdpe = MTDPE(tde);
|
2662
3120
|
|
2663
|
-
if (mtdpe->pq->size
|
3121
|
+
if (0 == mtdpe->pq->size) {
|
2664
3122
|
return false;
|
2665
3123
|
}
|
2666
3124
|
|
@@ -2710,7 +3168,7 @@ bool mtdpe_skip_to(TermDocEnum *tde, int target_doc_num)
|
|
2710
3168
|
TermDocEnum *sub_tde;
|
2711
3169
|
PriorityQueue *mtdpe_pq = MTDPE(tde)->pq;
|
2712
3170
|
|
2713
|
-
while ((sub_tde = (TermDocEnum *)pq_top(mtdpe_pq))
|
3171
|
+
while (NULL != (sub_tde = (TermDocEnum *)pq_top(mtdpe_pq))
|
2714
3172
|
&& (target_doc_num > sub_tde->doc_num(sub_tde))) {
|
2715
3173
|
if (sub_tde->skip_to(sub_tde, target_doc_num)) {
|
2716
3174
|
pq_down(mtdpe_pq);
|
@@ -2779,6 +3237,256 @@ TermDocEnum *mtdpe_new(IndexReader *ir, int field_num, char **terms, int t_cnt)
|
|
2779
3237
|
return tde;
|
2780
3238
|
}
|
2781
3239
|
|
3240
|
+
/****************************************************************************
|
3241
|
+
*
|
3242
|
+
* FileNameFilter
|
3243
|
+
*
|
3244
|
+
****************************************************************************/
|
3245
|
+
|
3246
|
+
static HashTable *fn_extensions = NULL;
|
3247
|
+
static void file_name_filter_init()
|
3248
|
+
{
|
3249
|
+
if (NULL == fn_extensions) {
|
3250
|
+
int i;
|
3251
|
+
fn_extensions = h_new_str((free_ft)NULL, (free_ft)NULL);
|
3252
|
+
for (i = 0; i < NELEMS(INDEX_EXTENSIONS); i++) {
|
3253
|
+
h_set(fn_extensions, INDEX_EXTENSIONS[i], (char *)INDEX_EXTENSIONS[i]);
|
3254
|
+
}
|
3255
|
+
register_for_cleanup(fn_extensions, (free_ft)&h_destroy);
|
3256
|
+
}
|
3257
|
+
}
|
3258
|
+
|
3259
|
+
static bool file_name_filter_accept(char *file_name)
|
3260
|
+
{
|
3261
|
+
char *p = strrchr(file_name, '.');
|
3262
|
+
if (NULL != p) {
|
3263
|
+
char *extension = p + 1;
|
3264
|
+
if (NULL != h_get(fn_extensions, extension)) {
|
3265
|
+
return true;
|
3266
|
+
}
|
3267
|
+
else if ((*extension == 'f' || *extension == 's')
|
3268
|
+
&& *(extension + 1) >= '0'
|
3269
|
+
&& *(extension + 1) <= '9') {
|
3270
|
+
return true;
|
3271
|
+
}
|
3272
|
+
}
|
3273
|
+
else if (0 == strncmp(SEGMENTS_FILE_NAME, file_name,
|
3274
|
+
sizeof(SEGMENTS_FILE_NAME) - 1)) {
|
3275
|
+
return true;
|
3276
|
+
}
|
3277
|
+
return false;
|
3278
|
+
}
|
3279
|
+
|
3280
|
+
/*
|
3281
|
+
* Returns true if this is a file that would be contained in a CFS file. This
|
3282
|
+
* function should only be called on files that pass the above "accept" (ie,
|
3283
|
+
* are already known to be a Lucene index file).
|
3284
|
+
*/
|
3285
|
+
static bool file_name_filter_is_cfs_file(char *file_name) {
|
3286
|
+
char *p = strrchr(file_name, '.');
|
3287
|
+
if (NULL != p) {
|
3288
|
+
char *extension = p + 1;
|
3289
|
+
if (NULL != h_get(fn_extensions, extension)
|
3290
|
+
&& 0 != strcmp(extension, "del")
|
3291
|
+
&& 0 != strcmp(extension, "gen")
|
3292
|
+
&& 0 != strcmp(extension, "cfs")) {
|
3293
|
+
return true;
|
3294
|
+
}
|
3295
|
+
else if ('f' == *extension
|
3296
|
+
&& '0' <= *(extension + 1)
|
3297
|
+
&& '9' >= *(extension + 1)) {
|
3298
|
+
return true;
|
3299
|
+
}
|
3300
|
+
}
|
3301
|
+
return false;
|
3302
|
+
}
|
3303
|
+
|
3304
|
+
/****************************************************************************
|
3305
|
+
*
|
3306
|
+
* Deleter
|
3307
|
+
*
|
3308
|
+
****************************************************************************/
|
3309
|
+
|
3310
|
+
#define DELETABLE_START_CAPA 8
|
3311
|
+
Deleter *deleter_new(SegmentInfos *sis, Store *store)
|
3312
|
+
{
|
3313
|
+
Deleter *dlr = ALLOC(Deleter);
|
3314
|
+
dlr->sis = sis;
|
3315
|
+
dlr->store = store;
|
3316
|
+
dlr->pending = hs_new_str(&free);
|
3317
|
+
return dlr;
|
3318
|
+
}
|
3319
|
+
|
3320
|
+
void deleter_destroy(Deleter *dlr)
|
3321
|
+
{
|
3322
|
+
hs_destroy(dlr->pending);
|
3323
|
+
free(dlr);
|
3324
|
+
}
|
3325
|
+
|
3326
|
+
void deleter_queue_file(Deleter *dlr, char *file_name)
|
3327
|
+
{
|
3328
|
+
hs_add(dlr->pending, estrdup(file_name));
|
3329
|
+
}
|
3330
|
+
|
3331
|
+
void deleter_delete_file(Deleter *dlr, char *file_name)
|
3332
|
+
{
|
3333
|
+
Store *store = dlr->store;
|
3334
|
+
TRY
|
3335
|
+
if (store->exists(store, file_name)) {
|
3336
|
+
store->remove(store, file_name);
|
3337
|
+
}
|
3338
|
+
hs_del(dlr->pending, file_name);
|
3339
|
+
XCATCHALL
|
3340
|
+
hs_add(dlr->pending, estrdup(file_name));
|
3341
|
+
XENDTRY
|
3342
|
+
}
|
3343
|
+
|
3344
|
+
void deleter_commit_pending_deletions(Deleter *dlr)
|
3345
|
+
{
|
3346
|
+
int i;
|
3347
|
+
char **pending = (char **)dlr->pending->elems;
|
3348
|
+
for (i = dlr->pending->size - 1; i >= 0; i--) {
|
3349
|
+
deleter_delete_file(dlr, pending[i]);
|
3350
|
+
}
|
3351
|
+
}
|
3352
|
+
|
3353
|
+
void deleter_delete_files(Deleter *dlr, char **files, int file_cnt)
|
3354
|
+
{
|
3355
|
+
int i;
|
3356
|
+
for (i = file_cnt - 1; i >= 0; i--) {
|
3357
|
+
deleter_queue_file(dlr, files[i]);
|
3358
|
+
}
|
3359
|
+
deleter_commit_pending_deletions(dlr);
|
3360
|
+
}
|
3361
|
+
|
3362
|
+
struct DelFilesArg {
|
3363
|
+
char curr_seg_file_name[SEGMENT_NAME_MAX_LENGTH];
|
3364
|
+
Deleter *dlr;
|
3365
|
+
HashTable *current;
|
3366
|
+
};
|
3367
|
+
|
3368
|
+
static void deleter_find_deletable_files_i(char *file_name, void *arg)
|
3369
|
+
{
|
3370
|
+
struct DelFilesArg *dfa = (struct DelFilesArg *)arg;
|
3371
|
+
Deleter *dlr = dfa->dlr;
|
3372
|
+
|
3373
|
+
if (file_name_filter_accept(file_name)
|
3374
|
+
&& 0 != strcmp(file_name, dfa->curr_seg_file_name)
|
3375
|
+
&& 0 != strcmp(file_name, SEGMENTS_GEN_FILE_NAME)) {
|
3376
|
+
|
3377
|
+
bool do_delete = false;
|
3378
|
+
SegmentInfo *si;
|
3379
|
+
char segment_name[SEGMENT_NAME_MAX_LENGTH];
|
3380
|
+
char *extension, *p;
|
3381
|
+
strcpy(segment_name, file_name);
|
3382
|
+
|
3383
|
+
p = strrchr(segment_name, '.');
|
3384
|
+
|
3385
|
+
/* First remove any extension: */
|
3386
|
+
if (NULL != p) {
|
3387
|
+
*p = '\0';
|
3388
|
+
extension = p + 1;
|
3389
|
+
} else {
|
3390
|
+
extension = NULL;
|
3391
|
+
}
|
3392
|
+
|
3393
|
+
/* Then, remove any generation count: */
|
3394
|
+
p = strrchr(segment_name + 1, '_');
|
3395
|
+
if (NULL != p) {
|
3396
|
+
*p = '\0';
|
3397
|
+
}
|
3398
|
+
|
3399
|
+
/* Delete this file if it's not a "current" segment, or, it is a
|
3400
|
+
* single index file but there is now a corresponding compound file: */
|
3401
|
+
if (NULL == (si = h_get(dfa->current, segment_name))) {
|
3402
|
+
/* Delete if segment is not referenced: */
|
3403
|
+
do_delete = true;
|
3404
|
+
}
|
3405
|
+
else {
|
3406
|
+
char tmp_fn[SEGMENT_NAME_MAX_LENGTH];
|
3407
|
+
/* OK, segment is referenced, but file may still be orphan'd: */
|
3408
|
+
if (file_name_filter_is_cfs_file(file_name)
|
3409
|
+
&& si->use_compound_file) {
|
3410
|
+
/* This file is stored in a CFS file for this segment: */
|
3411
|
+
do_delete = true;
|
3412
|
+
}
|
3413
|
+
else if (0 == strcmp("del", extension)) {
|
3414
|
+
/* This is a _segmentName_N.del file: */
|
3415
|
+
if (!fn_for_generation(tmp_fn, segment_name, "del", si->del_gen)
|
3416
|
+
|| 0 != strcmp(file_name, tmp_fn)) {
|
3417
|
+
/* If this is a seperate .del file, but it
|
3418
|
+
* doesn't match the current del file name for
|
3419
|
+
* this segment, then delete it: */
|
3420
|
+
do_delete = true;
|
3421
|
+
}
|
3422
|
+
}
|
3423
|
+
else if (NULL != extension
|
3424
|
+
&& ('s' == *extension || 'f' == *extension)
|
3425
|
+
&& isdigit(extension[1])) {
|
3426
|
+
si_norm_file_name(si, tmp_fn, atoi(extension + 1));
|
3427
|
+
/* This is a _segmentName_N.sX file: */
|
3428
|
+
if (0 != strcmp(tmp_fn, file_name)) {
|
3429
|
+
/* This is an orphan'd norms file: */
|
3430
|
+
do_delete = true;
|
3431
|
+
}
|
3432
|
+
}
|
3433
|
+
else if (0 == strcmp("cfs", extension) && !si->use_compound_file) {
|
3434
|
+
/* This is a partially written _segmentName.cfs: */
|
3435
|
+
do_delete = true;
|
3436
|
+
}
|
3437
|
+
}
|
3438
|
+
|
3439
|
+
if (do_delete) {
|
3440
|
+
deleter_queue_file(dlr, file_name);
|
3441
|
+
}
|
3442
|
+
}
|
3443
|
+
}
|
3444
|
+
|
3445
|
+
/*
|
3446
|
+
* Determine index files that are no longer referenced and therefore should be
|
3447
|
+
* deleted. This is called once (by the writer), and then subsequently we add
|
3448
|
+
* onto deletable any files that are no longer needed at the point that we
|
3449
|
+
* create the unused file (eg when merging segments), and we only remove from
|
3450
|
+
* deletable when a file is successfully deleted.
|
3451
|
+
*/
|
3452
|
+
void deleter_find_deletable_files(Deleter *dlr)
|
3453
|
+
{
|
3454
|
+
/* Gather all "current" segments: */
|
3455
|
+
int i;
|
3456
|
+
SegmentInfos *sis = dlr->sis;
|
3457
|
+
Store *store = dlr->store;
|
3458
|
+
struct DelFilesArg dfa;
|
3459
|
+
HashTable *current = dfa.current
|
3460
|
+
= h_new_str((free_ft)NULL, (free_ft)si_deref);
|
3461
|
+
dfa.dlr = dlr;
|
3462
|
+
|
3463
|
+
for(i = 0; i < sis->size; i++) {
|
3464
|
+
SegmentInfo *si = (SegmentInfo *)sis->segs[i];
|
3465
|
+
REF(si);
|
3466
|
+
h_set(current, si->name, si);
|
3467
|
+
}
|
3468
|
+
|
3469
|
+
/* Then go through all files in the Directory that are Ferret index files,
|
3470
|
+
* and add to deletable if they are not referenced by the current segments
|
3471
|
+
* info: */
|
3472
|
+
sis_curr_seg_file_name(dfa.curr_seg_file_name, store);
|
3473
|
+
file_name_filter_init();
|
3474
|
+
|
3475
|
+
store->each(store, &deleter_find_deletable_files_i, &dfa);
|
3476
|
+
h_destroy(dfa.current);
|
3477
|
+
}
|
3478
|
+
|
3479
|
+
void deleter_delete_deletable_files(Deleter *dlr)
|
3480
|
+
{
|
3481
|
+
deleter_find_deletable_files(dlr);
|
3482
|
+
deleter_commit_pending_deletions(dlr);
|
3483
|
+
}
|
3484
|
+
|
3485
|
+
void deleter_clear_pending_deletions(Deleter *dlr)
|
3486
|
+
{
|
3487
|
+
hs_clear(dlr->pending);
|
3488
|
+
}
|
3489
|
+
|
2782
3490
|
/****************************************************************************
|
2783
3491
|
*
|
2784
3492
|
* IndexReader
|
@@ -2800,7 +3508,7 @@ void ir_acquire_write_lock(IndexReader *ir)
|
|
2800
3508
|
"need to close and reopen the index");
|
2801
3509
|
}
|
2802
3510
|
|
2803
|
-
if (ir->write_lock
|
3511
|
+
if (NULL == ir->write_lock) {
|
2804
3512
|
ir->write_lock = open_lock(ir->store, WRITE_LOCK_NAME);
|
2805
3513
|
if (!ir->write_lock->obtain(ir->write_lock)) {/* obtain write lock */
|
2806
3514
|
RAISE(LOCK_ERROR, "Could not obtain write lock when trying to "
|
@@ -2811,8 +3519,8 @@ void ir_acquire_write_lock(IndexReader *ir)
|
|
2811
3519
|
"you can safely delete these files.");
|
2812
3520
|
}
|
2813
3521
|
|
2814
|
-
/* we have to check whether index has changed since this reader was
|
2815
|
-
* if so, this reader is no longer valid for deletion */
|
3522
|
+
/* we have to check whether index has changed since this reader was
|
3523
|
+
* opened. if so, this reader is no longer valid for deletion */
|
2816
3524
|
if (sis_read_current_version(ir->store) > ir->sis->version) {
|
2817
3525
|
ir->is_stale = true;
|
2818
3526
|
ir->write_lock->release(ir->write_lock);
|
@@ -2856,7 +3564,7 @@ IndexReader *ir_setup(IndexReader *ir, Store *store, SegmentInfos *sis,
|
|
2856
3564
|
|
2857
3565
|
bool ir_index_exists(Store *store)
|
2858
3566
|
{
|
2859
|
-
return
|
3567
|
+
return sis_current_segment_generation(store) != 1;
|
2860
3568
|
}
|
2861
3569
|
|
2862
3570
|
int ir_get_field_num(IndexReader *ir, const char *field)
|
@@ -2903,7 +3611,7 @@ uchar *ir_get_norms_i(IndexReader *ir, int field_num)
|
|
2903
3611
|
norms = ir->get_norms(ir, field_num);
|
2904
3612
|
}
|
2905
3613
|
if (!norms) {
|
2906
|
-
if (ir->fake_norms
|
3614
|
+
if (NULL == ir->fake_norms) {
|
2907
3615
|
ir->fake_norms = (uchar *)ecalloc(ir->max_doc(ir));
|
2908
3616
|
}
|
2909
3617
|
norms = ir->fake_norms;
|
@@ -3009,34 +3717,41 @@ TermDocEnum *ir_term_positions_for(IndexReader *ir, const char *field,
|
|
3009
3717
|
|
3010
3718
|
void ir_commit_i(IndexReader *ir)
|
3011
3719
|
{
|
3012
|
-
if (ir->has_changes
|
3013
|
-
|
3014
|
-
|
3015
|
-
|
3016
|
-
|
3017
|
-
if (!commit_lock->obtain(commit_lock)) { /* obtain write lock */
|
3018
|
-
RAISE(LOCK_ERROR, "Error trying to commit the index. Commit "
|
3019
|
-
"lock already obtained");
|
3720
|
+
if (ir->has_changes) {
|
3721
|
+
if (NULL == ir->deleter && NULL != ir->store) {
|
3722
|
+
/* In the MultiReader case, we share this deleter across all
|
3723
|
+
* SegmentReaders: */
|
3724
|
+
ir->set_deleter_i(ir, deleter_new(ir->sis, ir->store));
|
3020
3725
|
}
|
3726
|
+
if (ir->is_owner) {
|
3727
|
+
char curr_seg_fn[MAX_FILE_PATH];
|
3728
|
+
mutex_lock(&ir->store->mutex);
|
3021
3729
|
|
3022
|
-
|
3023
|
-
|
3730
|
+
/* Should not be necessary: no prior commit should have left
|
3731
|
+
* pending files, so just defensive: */
|
3732
|
+
if (ir->deleter) deleter_clear_pending_deletions(ir->deleter);
|
3024
3733
|
|
3025
|
-
|
3026
|
-
|
3027
|
-
|
3734
|
+
sis_curr_seg_file_name(curr_seg_fn, ir->store);
|
3735
|
+
|
3736
|
+
ir->commit_i(ir);
|
3737
|
+
sis_write(ir->sis, ir->store, ir->deleter);
|
3028
3738
|
|
3029
|
-
|
3030
|
-
|
3031
|
-
ir->
|
3032
|
-
|
3033
|
-
ir->write_lock
|
3739
|
+
if (ir->deleter) deleter_delete_file(ir->deleter, curr_seg_fn);
|
3740
|
+
|
3741
|
+
mutex_unlock(&ir->store->mutex);
|
3742
|
+
|
3743
|
+
if (NULL != ir->write_lock) {
|
3744
|
+
/* release write lock */
|
3745
|
+
ir->write_lock->release(ir->write_lock);
|
3746
|
+
close_lock(ir->write_lock);
|
3747
|
+
ir->write_lock = NULL;
|
3748
|
+
}
|
3749
|
+
}
|
3750
|
+
else {
|
3751
|
+
ir->commit_i(ir);
|
3034
3752
|
}
|
3035
|
-
ir->has_changes = false;
|
3036
|
-
}
|
3037
|
-
else {
|
3038
|
-
ir->commit_i(ir);
|
3039
3753
|
}
|
3754
|
+
ir->has_changes = false;
|
3040
3755
|
}
|
3041
3756
|
|
3042
3757
|
void ir_commit(IndexReader *ir)
|
@@ -3049,15 +3764,14 @@ void ir_commit(IndexReader *ir)
|
|
3049
3764
|
void ir_close(IndexReader *ir)
|
3050
3765
|
{
|
3051
3766
|
mutex_lock(&ir->mutex);
|
3052
|
-
if (--(ir->ref_cnt)
|
3767
|
+
if (0 == --(ir->ref_cnt)) {
|
3053
3768
|
ir_commit_i(ir);
|
3054
3769
|
ir->close_i(ir);
|
3055
3770
|
if (ir->store) {
|
3056
3771
|
store_deref(ir->store);
|
3057
3772
|
}
|
3058
|
-
if (ir->is_owner) {
|
3773
|
+
if (ir->is_owner && ir->sis) {
|
3059
3774
|
sis_destroy(ir->sis);
|
3060
|
-
fis_deref(ir->fis);
|
3061
3775
|
}
|
3062
3776
|
if (ir->cache) {
|
3063
3777
|
h_destroy(ir->cache);
|
@@ -3065,6 +3779,9 @@ void ir_close(IndexReader *ir)
|
|
3065
3779
|
if (ir->sort_cache) {
|
3066
3780
|
h_destroy(ir->sort_cache);
|
3067
3781
|
}
|
3782
|
+
if (ir->deleter && ir->is_owner) {
|
3783
|
+
deleter_destroy(ir->deleter);
|
3784
|
+
}
|
3068
3785
|
free(ir->fake_norms);
|
3069
3786
|
|
3070
3787
|
mutex_destroy(&ir->mutex);
|
@@ -3080,26 +3797,14 @@ void ir_close(IndexReader *ir)
|
|
3080
3797
|
**/
|
3081
3798
|
void ir_add_cache(IndexReader *ir)
|
3082
3799
|
{
|
3083
|
-
if (ir->cache
|
3800
|
+
if (NULL == ir->cache) {
|
3084
3801
|
ir->cache = co_hash_create();
|
3085
3802
|
}
|
3086
3803
|
}
|
3087
3804
|
|
3088
3805
|
bool ir_is_latest(IndexReader *ir)
|
3089
3806
|
{
|
3090
|
-
|
3091
|
-
|
3092
|
-
Lock *commit_lock = open_lock(ir->store, COMMIT_LOCK_NAME);
|
3093
|
-
if (!commit_lock->obtain(commit_lock)) {
|
3094
|
-
close_lock(commit_lock);
|
3095
|
-
RAISE(LOCK_ERROR, "Error detecting if the current index is latest "
|
3096
|
-
"version. Commit lock currently obtained");
|
3097
|
-
}
|
3098
|
-
is_latest = (sis_read_current_version(ir->store) == ir->sis->version);
|
3099
|
-
commit_lock->release(commit_lock);
|
3100
|
-
close_lock(commit_lock);
|
3101
|
-
|
3102
|
-
return is_latest;
|
3807
|
+
return (sis_read_current_version(ir->store) == ir->sis->version);
|
3103
3808
|
}
|
3104
3809
|
|
3105
3810
|
/****************************************************************************
|
@@ -3128,35 +3833,27 @@ static Norm *norm_create(InStream *is, int field_num)
|
|
3128
3833
|
static void norm_destroy(Norm *norm)
|
3129
3834
|
{
|
3130
3835
|
is_close(norm->is);
|
3131
|
-
if (norm->bytes
|
3836
|
+
if (NULL != norm->bytes) {
|
3132
3837
|
free(norm->bytes);
|
3133
3838
|
}
|
3134
3839
|
free(norm);
|
3135
3840
|
}
|
3136
3841
|
|
3137
|
-
static void norm_rewrite(Norm *norm, Store *store,
|
3138
|
-
|
3842
|
+
static void norm_rewrite(Norm *norm, Store *store, Deleter *dlr,
|
3843
|
+
SegmentInfo *si, int doc_count)
|
3139
3844
|
{
|
3140
3845
|
OutStream *os;
|
3141
|
-
char tmp_file_name[SEGMENT_NAME_MAX_LENGTH];
|
3142
3846
|
char norm_file_name[SEGMENT_NAME_MAX_LENGTH];
|
3847
|
+
const int field_num = norm->field_num;
|
3143
3848
|
|
3144
|
-
if (
|
3145
|
-
|
3849
|
+
if (si_norm_file_name(si, norm_file_name, field_num)) {
|
3850
|
+
deleter_queue_file(dlr, norm_file_name);
|
3146
3851
|
}
|
3147
|
-
|
3148
|
-
|
3149
|
-
os = store->new_output(store,
|
3852
|
+
si_advance_norm_gen(si, field_num);
|
3853
|
+
si_norm_file_name(si, norm_file_name, field_num);
|
3854
|
+
os = store->new_output(store, norm_file_name);
|
3150
3855
|
os_write_bytes(os, norm->bytes, doc_count);
|
3151
3856
|
os_close(os);
|
3152
|
-
|
3153
|
-
if (cfs_store) {
|
3154
|
-
sprintf(norm_file_name, "%s.s%d", segment, norm->field_num);
|
3155
|
-
}
|
3156
|
-
else {
|
3157
|
-
sprintf(norm_file_name, "%s.f%d", segment, norm->field_num);
|
3158
|
-
}
|
3159
|
-
store->rename(store, tmp_file_name, norm_file_name);
|
3160
3857
|
norm->is_dirty = false;
|
3161
3858
|
}
|
3162
3859
|
|
@@ -3166,6 +3863,7 @@ static void norm_rewrite(Norm *norm, Store *store, char *segment,
|
|
3166
3863
|
|
3167
3864
|
typedef struct SegmentReader {
|
3168
3865
|
IndexReader ir;
|
3866
|
+
SegmentInfo *si;
|
3169
3867
|
char *segment;
|
3170
3868
|
FieldsReader *fr;
|
3171
3869
|
BitVector *deleted_docs;
|
@@ -3191,7 +3889,7 @@ static __inline FieldsReader *sr_fr(SegmentReader *sr)
|
|
3191
3889
|
{
|
3192
3890
|
FieldsReader *fr;
|
3193
3891
|
|
3194
|
-
if ((fr = thread_getspecific(sr->thread_fr))
|
3892
|
+
if (NULL == (fr = thread_getspecific(sr->thread_fr))) {
|
3195
3893
|
fr = fr_clone(sr->fr);
|
3196
3894
|
ary_push(sr->fr_bucket, fr);
|
3197
3895
|
thread_setspecific(sr->thread_fr, fr);
|
@@ -3201,17 +3899,17 @@ static __inline FieldsReader *sr_fr(SegmentReader *sr)
|
|
3201
3899
|
|
3202
3900
|
static __inline bool sr_is_deleted_i(SegmentReader *sr, int doc_num)
|
3203
3901
|
{
|
3204
|
-
return (sr->deleted_docs
|
3902
|
+
return (NULL != sr->deleted_docs && bv_get(sr->deleted_docs, doc_num));
|
3205
3903
|
}
|
3206
3904
|
|
3207
3905
|
static __inline void sr_get_norms_into_i(SegmentReader *sr, int field_num,
|
3208
3906
|
uchar *buf)
|
3209
3907
|
{
|
3210
3908
|
Norm *norm = h_get_int(sr->norms, field_num);
|
3211
|
-
if (
|
3909
|
+
if (NULL == norm) {
|
3212
3910
|
memset(buf, 0, SR_SIZE(sr));
|
3213
3911
|
}
|
3214
|
-
else if (norm->bytes
|
3912
|
+
else if (NULL != norm->bytes) { /* can copy from cache */
|
3215
3913
|
memcpy(buf, norm->bytes, SR_SIZE(sr));
|
3216
3914
|
}
|
3217
3915
|
else {
|
@@ -3226,11 +3924,11 @@ static __inline void sr_get_norms_into_i(SegmentReader *sr, int field_num,
|
|
3226
3924
|
static __inline uchar *sr_get_norms_i(SegmentReader *sr, int field_num)
|
3227
3925
|
{
|
3228
3926
|
Norm *norm = h_get_int(sr->norms, field_num);
|
3229
|
-
if (
|
3927
|
+
if (NULL == norm) { /* not an indexed field */
|
3230
3928
|
return NULL;
|
3231
3929
|
}
|
3232
3930
|
|
3233
|
-
if (norm->bytes
|
3931
|
+
if (NULL == norm->bytes) { /* value not yet read */
|
3234
3932
|
uchar *bytes = ALLOC_N(uchar, SR_SIZE(sr));
|
3235
3933
|
sr_get_norms_into_i(sr, field_num, bytes);
|
3236
3934
|
norm->bytes = bytes; /* cache it */
|
@@ -3241,7 +3939,8 @@ static __inline uchar *sr_get_norms_i(SegmentReader *sr, int field_num)
|
|
3241
3939
|
static void sr_set_norm_i(IndexReader *ir, int doc_num, int field_num, uchar b)
|
3242
3940
|
{
|
3243
3941
|
Norm *norm = h_get_int(SR(ir)->norms, field_num);
|
3244
|
-
if (
|
3942
|
+
if (NULL != norm) { /* has_norms */
|
3943
|
+
ir->has_changes = true;
|
3245
3944
|
norm->is_dirty = true; /* mark it dirty */
|
3246
3945
|
SR(ir)->norms_dirty = true;
|
3247
3946
|
sr_get_norms_i(SR(ir), field_num)[doc_num] = b;
|
@@ -3250,12 +3949,13 @@ static void sr_set_norm_i(IndexReader *ir, int doc_num, int field_num, uchar b)
|
|
3250
3949
|
|
3251
3950
|
static void sr_delete_doc_i(IndexReader *ir, int doc_num)
|
3252
3951
|
{
|
3253
|
-
if (SR(ir)->deleted_docs
|
3952
|
+
if (NULL == SR(ir)->deleted_docs) {
|
3254
3953
|
SR(ir)->deleted_docs = bv_new();
|
3255
3954
|
}
|
3256
3955
|
|
3257
3956
|
SR(ir)->deleted_docs_dirty = true;
|
3258
3957
|
SR(ir)->undelete_all = false;
|
3958
|
+
ir->has_changes = true;
|
3259
3959
|
bv_set(SR(ir)->deleted_docs, doc_num);
|
3260
3960
|
}
|
3261
3961
|
|
@@ -3263,12 +3963,18 @@ static void sr_undelete_all_i(IndexReader *ir)
|
|
3263
3963
|
{
|
3264
3964
|
SR(ir)->undelete_all = true;
|
3265
3965
|
SR(ir)->deleted_docs_dirty = false;
|
3266
|
-
|
3966
|
+
ir->has_changes = true;
|
3967
|
+
if (NULL != SR(ir)->deleted_docs) {
|
3267
3968
|
bv_destroy(SR(ir)->deleted_docs);
|
3268
3969
|
}
|
3269
3970
|
SR(ir)->deleted_docs = NULL;
|
3270
3971
|
}
|
3271
3972
|
|
3973
|
+
static void sr_set_deleter_i(IndexReader *ir, Deleter *deleter)
|
3974
|
+
{
|
3975
|
+
ir->deleter = deleter;
|
3976
|
+
}
|
3977
|
+
|
3272
3978
|
static void bv_write(BitVector *bv, Store *store, char *name)
|
3273
3979
|
{
|
3274
3980
|
int i;
|
@@ -3299,64 +4005,61 @@ static BitVector *bv_read(Store *store, char *name)
|
|
3299
4005
|
|
3300
4006
|
static void sr_commit_i(IndexReader *ir)
|
3301
4007
|
{
|
4008
|
+
SegmentInfo *si = SR(ir)->si;
|
4009
|
+
char *segment = SR(ir)->si->name;
|
3302
4010
|
char tmp_file_name[SEGMENT_NAME_MAX_LENGTH];
|
3303
|
-
char del_file_name[SEGMENT_NAME_MAX_LENGTH];
|
3304
|
-
|
3305
|
-
sprintf(del_file_name, "%s.del", SR(ir)->segment);
|
3306
4011
|
|
3307
|
-
if (SR(ir)->deleted_docs_dirty) {
|
3308
|
-
|
3309
|
-
|
3310
|
-
|
3311
|
-
|
3312
|
-
|
3313
|
-
|
4012
|
+
if (SR(ir)->undelete_all || SR(ir)->deleted_docs_dirty) {
|
4013
|
+
if (si->del_gen >= 0) {
|
4014
|
+
fn_for_generation(tmp_file_name, segment, "del", si->del_gen);
|
4015
|
+
deleter_queue_file(ir->deleter, tmp_file_name);
|
4016
|
+
}
|
4017
|
+
if (SR(ir)->undelete_all) {
|
4018
|
+
si->del_gen = -1;
|
4019
|
+
SR(ir)->undelete_all = false;
|
4020
|
+
} else {
|
4021
|
+
/* (SR(ir)->deleted_docs_dirty) re-write deleted */
|
4022
|
+
si->del_gen++;
|
4023
|
+
fn_for_generation(tmp_file_name, segment, "del", si->del_gen);
|
4024
|
+
bv_write(SR(ir)->deleted_docs, ir->store, tmp_file_name);
|
4025
|
+
SR(ir)->deleted_docs_dirty = false;
|
4026
|
+
}
|
3314
4027
|
}
|
3315
4028
|
if (SR(ir)->norms_dirty) { /* re-write norms */
|
3316
4029
|
int i;
|
3317
4030
|
const int field_cnt = ir->fis->size;
|
3318
4031
|
FieldInfo *fi;
|
3319
|
-
for (i =
|
4032
|
+
for (i = field_cnt - 1; i >= 0; i--) {
|
3320
4033
|
fi = ir->fis->fields[i];
|
3321
4034
|
if (fi_is_indexed(fi)) {
|
3322
|
-
|
3323
|
-
|
4035
|
+
Norm *norm = h_get_int(SR(ir)->norms, fi->number);
|
4036
|
+
if (norm && norm->is_dirty) {
|
4037
|
+
norm_rewrite(norm, ir->store, ir->deleter, SR(ir)->si,
|
4038
|
+
SR_SIZE(ir));
|
4039
|
+
}
|
3324
4040
|
}
|
3325
4041
|
}
|
4042
|
+
SR(ir)->norms_dirty = false;
|
3326
4043
|
}
|
3327
|
-
SR(ir)->deleted_docs_dirty = false;
|
3328
|
-
SR(ir)->norms_dirty = false;
|
3329
|
-
SR(ir)->undelete_all = false;
|
3330
4044
|
}
|
3331
4045
|
|
3332
4046
|
static void sr_close_i(IndexReader *ir)
|
3333
4047
|
{
|
3334
4048
|
SegmentReader *sr = SR(ir);
|
3335
4049
|
|
3336
|
-
fr_close(sr->fr);
|
3337
|
-
tir_close(sr->tir);
|
3338
|
-
sfi_close(sr->sfi);
|
3339
|
-
|
3340
|
-
if (sr->
|
3341
|
-
|
3342
|
-
|
3343
|
-
if (sr->
|
3344
|
-
is_close(sr->prx_in);
|
3345
|
-
}
|
3346
|
-
|
3347
|
-
h_destroy(sr->norms);
|
3348
|
-
|
4050
|
+
if (sr->fr) fr_close(sr->fr);
|
4051
|
+
if (sr->tir) tir_close(sr->tir);
|
4052
|
+
if (sr->sfi) sfi_close(sr->sfi);
|
4053
|
+
if (sr->frq_in) is_close(sr->frq_in);
|
4054
|
+
if (sr->prx_in) is_close(sr->prx_in);
|
4055
|
+
if (sr->norms) h_destroy(sr->norms);
|
4056
|
+
if (sr->deleted_docs) bv_destroy(sr->deleted_docs);
|
4057
|
+
if (sr->cfs_store) store_deref(sr->cfs_store);
|
3349
4058
|
if (sr->fr_bucket) {
|
3350
4059
|
thread_setspecific(sr->thread_fr, NULL);
|
3351
4060
|
thread_key_delete(sr->thread_fr);
|
3352
4061
|
ary_destroy(sr->fr_bucket, (free_ft)&fr_close);
|
3353
4062
|
}
|
3354
|
-
if (sr->deleted_docs) {
|
3355
|
-
bv_destroy(sr->deleted_docs);
|
3356
|
-
}
|
3357
|
-
if (sr->cfs_store) {
|
3358
|
-
store_deref(sr->cfs_store);
|
3359
|
-
}
|
3360
4063
|
}
|
3361
4064
|
|
3362
4065
|
static int sr_num_docs(IndexReader *ir)
|
@@ -3365,7 +4068,7 @@ static int sr_num_docs(IndexReader *ir)
|
|
3365
4068
|
|
3366
4069
|
mutex_lock(&ir->mutex);
|
3367
4070
|
num_docs = SR(ir)->fr->size;
|
3368
|
-
if (SR(ir)->deleted_docs
|
4071
|
+
if (NULL != SR(ir)->deleted_docs) {
|
3369
4072
|
num_docs -= SR(ir)->deleted_docs->count;
|
3370
4073
|
}
|
3371
4074
|
mutex_unlock(&ir->mutex);
|
@@ -3473,7 +4176,7 @@ static TermVector *sr_term_vector(IndexReader *ir, int doc_num,
|
|
3473
4176
|
static HashTable *sr_term_vectors(IndexReader *ir, int doc_num)
|
3474
4177
|
{
|
3475
4178
|
FieldsReader *fr;
|
3476
|
-
if (!SR(ir)->fr || (fr = sr_fr(SR(ir)))
|
4179
|
+
if (!SR(ir)->fr || NULL == (fr = sr_fr(SR(ir)))) {
|
3477
4180
|
return NULL;
|
3478
4181
|
}
|
3479
4182
|
|
@@ -3493,42 +4196,32 @@ static bool sr_is_deleted(IndexReader *ir, int doc_num)
|
|
3493
4196
|
|
3494
4197
|
static bool sr_has_deletions(IndexReader *ir)
|
3495
4198
|
{
|
3496
|
-
return
|
4199
|
+
return NULL != SR(ir)->deleted_docs;
|
3497
4200
|
}
|
3498
4201
|
|
3499
4202
|
static void sr_open_norms(IndexReader *ir, Store *cfs_store)
|
3500
4203
|
{
|
3501
4204
|
int i;
|
3502
|
-
|
4205
|
+
SegmentInfo *si = SR(ir)->si;
|
3503
4206
|
char file_name[SEGMENT_NAME_MAX_LENGTH];
|
3504
|
-
FieldInfos *fis = ir->fis;
|
3505
|
-
char *ext_ptr;
|
3506
|
-
const int field_cnt = fis->size;
|
3507
|
-
|
3508
|
-
sprintf(file_name, "%s.", SR(ir)->segment);
|
3509
|
-
ext_ptr = file_name + strlen(file_name);
|
3510
4207
|
|
3511
|
-
for (i =
|
3512
|
-
|
3513
|
-
|
3514
|
-
|
3515
|
-
|
3516
|
-
|
3517
|
-
}
|
3518
|
-
if (store->exists(store, file_name)) {
|
3519
|
-
h_set_int(SR(ir)->norms, i,
|
3520
|
-
norm_create(store->open_input(store, file_name), i));
|
3521
|
-
}
|
4208
|
+
for (i = si->norm_gens_size - 1; i >= 0; i--) {
|
4209
|
+
Store *store = (si->use_compound_file && si->norm_gens[i] == 0) ?
|
4210
|
+
cfs_store : ir->store;
|
4211
|
+
if (si_norm_file_name(si, file_name, i)) {
|
4212
|
+
h_set_int(SR(ir)->norms, i,
|
4213
|
+
norm_create(store->open_input(store, file_name), i));
|
3522
4214
|
}
|
3523
4215
|
}
|
3524
4216
|
SR(ir)->norms_dirty = false;
|
3525
4217
|
}
|
3526
4218
|
|
3527
|
-
static IndexReader *sr_setup_i(SegmentReader *sr
|
4219
|
+
static IndexReader *sr_setup_i(SegmentReader *sr)
|
3528
4220
|
{
|
3529
|
-
Store *store = si->store;
|
4221
|
+
Store *store = sr->si->store;
|
3530
4222
|
IndexReader *ir = IR(sr);
|
3531
4223
|
char file_name[SEGMENT_NAME_MAX_LENGTH];
|
4224
|
+
char *sr_segment = sr->si->name;
|
3532
4225
|
|
3533
4226
|
ir->num_docs = &sr_num_docs;
|
3534
4227
|
ir->max_doc = &sr_max_doc;
|
@@ -3549,51 +4242,56 @@ static IndexReader *sr_setup_i(SegmentReader *sr, SegmentInfo *si)
|
|
3549
4242
|
ir->set_norm_i = &sr_set_norm_i;
|
3550
4243
|
ir->delete_doc_i = &sr_delete_doc_i;
|
3551
4244
|
ir->undelete_all_i = &sr_undelete_all_i;
|
4245
|
+
ir->set_deleter_i = &sr_set_deleter_i;
|
3552
4246
|
ir->commit_i = &sr_commit_i;
|
3553
4247
|
ir->close_i = &sr_close_i;
|
3554
4248
|
|
3555
|
-
sr->segment = si->name;
|
3556
4249
|
sr->cfs_store = NULL;
|
3557
4250
|
|
3558
|
-
|
3559
|
-
|
3560
|
-
|
3561
|
-
|
3562
|
-
|
4251
|
+
TRY
|
4252
|
+
if (sr->si->use_compound_file) {
|
4253
|
+
sprintf(file_name, "%s.cfs", sr_segment);
|
4254
|
+
sr->cfs_store = open_cmpd_store(store, file_name);
|
4255
|
+
store = sr->cfs_store;
|
4256
|
+
}
|
3563
4257
|
|
3564
|
-
|
3565
|
-
|
3566
|
-
|
4258
|
+
sr->fr = fr_open(store, sr_segment, ir->fis);
|
4259
|
+
sr->sfi = sfi_open(store, sr_segment);
|
4260
|
+
sr->tir = tir_open(store, sr->sfi, sr_segment);
|
3567
4261
|
|
3568
|
-
|
3569
|
-
|
3570
|
-
|
3571
|
-
|
3572
|
-
|
3573
|
-
|
3574
|
-
|
4262
|
+
sr->deleted_docs = NULL;
|
4263
|
+
sr->deleted_docs_dirty = false;
|
4264
|
+
sr->undelete_all = false;
|
4265
|
+
if (si_has_deletions(sr->si)) {
|
4266
|
+
fn_for_generation(file_name, sr_segment, "del", sr->si->del_gen);
|
4267
|
+
sr->deleted_docs = bv_read(sr->si->store, file_name);
|
4268
|
+
}
|
3575
4269
|
|
3576
|
-
|
3577
|
-
|
3578
|
-
|
3579
|
-
|
3580
|
-
|
3581
|
-
|
4270
|
+
sprintf(file_name, "%s.frq", sr_segment);
|
4271
|
+
sr->frq_in = store->open_input(store, file_name);
|
4272
|
+
sprintf(file_name, "%s.prx", sr_segment);
|
4273
|
+
sr->prx_in = store->open_input(store, file_name);
|
4274
|
+
sr->norms = h_new_int((free_ft)&norm_destroy);
|
4275
|
+
sr_open_norms(ir, store);
|
4276
|
+
if (fis_has_vectors(ir->fis)) {
|
4277
|
+
thread_key_create(&sr->thread_fr, NULL);
|
4278
|
+
sr->fr_bucket = ary_new();
|
4279
|
+
}
|
4280
|
+
XCATCHALL
|
4281
|
+
ir_close(ir);
|
4282
|
+
XENDTRY
|
3582
4283
|
|
3583
|
-
if (fis_has_vectors(ir->fis)) {
|
3584
|
-
thread_key_create(&sr->thread_fr, NULL);
|
3585
|
-
sr->fr_bucket = ary_new();
|
3586
|
-
}
|
3587
4284
|
return ir;
|
3588
4285
|
}
|
3589
4286
|
|
3590
4287
|
static IndexReader *sr_open(SegmentInfos *sis, FieldInfos *fis, int si_num,
|
3591
4288
|
bool is_owner)
|
3592
4289
|
{
|
4290
|
+
IndexReader *ir;
|
3593
4291
|
SegmentReader *sr = ALLOC_AND_ZERO(SegmentReader);
|
3594
|
-
|
3595
|
-
|
3596
|
-
return sr_setup_i(
|
4292
|
+
sr->si = sis->segs[si_num];
|
4293
|
+
ir = ir_setup(IR(sr), sr->si->store, sis, fis, is_owner);
|
4294
|
+
return sr_setup_i(sr);
|
3597
4295
|
}
|
3598
4296
|
|
3599
4297
|
/****************************************************************************
|
@@ -3683,7 +4381,7 @@ static uchar *mr_get_norms(IndexReader *ir, int field_num)
|
|
3683
4381
|
|
3684
4382
|
mutex_lock(&ir->mutex);
|
3685
4383
|
bytes = h_get_int(MR(ir)->norms_cache, field_num);
|
3686
|
-
if (
|
4384
|
+
if (NULL == bytes) {
|
3687
4385
|
int i;
|
3688
4386
|
const int mr_reader_cnt = MR(ir)->r_cnt;
|
3689
4387
|
|
@@ -3709,7 +4407,7 @@ static uchar *mr_get_norms_into(IndexReader *ir, int field_num, uchar *buf)
|
|
3709
4407
|
|
3710
4408
|
mutex_lock(&ir->mutex);
|
3711
4409
|
bytes = h_get_int(MR(ir)->norms_cache, field_num);
|
3712
|
-
if (
|
4410
|
+
if (NULL != bytes) {
|
3713
4411
|
memcpy(buf, bytes, MR(ir)->max_doc);
|
3714
4412
|
}
|
3715
4413
|
else {
|
@@ -3791,6 +4489,7 @@ static void mr_set_norm_i(IndexReader *ir, int doc_num, int field_num, uchar val
|
|
3791
4489
|
int fnum = mr_get_field_num(MR(ir), i, field_num);
|
3792
4490
|
if (fnum >= 0) {
|
3793
4491
|
IndexReader *reader = MR(ir)->sub_readers[i];
|
4492
|
+
ir->has_changes = true;
|
3794
4493
|
h_del_int(MR(ir)->norms_cache, fnum);/* clear cache */
|
3795
4494
|
ir_set_norm_i(reader, doc_num - MR(ir)->starts[i], fnum, val);
|
3796
4495
|
}
|
@@ -3804,6 +4503,7 @@ static void mr_delete_doc_i(IndexReader *ir, int doc_num)
|
|
3804
4503
|
/* dispatch to segment reader */
|
3805
4504
|
reader->delete_doc_i(reader, doc_num - MR(ir)->starts[i]);
|
3806
4505
|
MR(ir)->has_deletions = true;
|
4506
|
+
ir->has_changes = true;
|
3807
4507
|
}
|
3808
4508
|
|
3809
4509
|
static void mr_undelete_all_i(IndexReader *ir)
|
@@ -3817,6 +4517,17 @@ static void mr_undelete_all_i(IndexReader *ir)
|
|
3817
4517
|
reader->undelete_all_i(reader);
|
3818
4518
|
}
|
3819
4519
|
MR(ir)->has_deletions = false;
|
4520
|
+
ir->has_changes = true;
|
4521
|
+
}
|
4522
|
+
|
4523
|
+
static void mr_set_deleter_i(IndexReader *ir, Deleter *deleter)
|
4524
|
+
{
|
4525
|
+
int i;
|
4526
|
+
ir->deleter = deleter;
|
4527
|
+
for (i = MR(ir)->r_cnt - 1; i >= 0; i--) {
|
4528
|
+
IndexReader *reader = MR(ir)->sub_readers[i];
|
4529
|
+
reader->set_deleter_i(reader, deleter);
|
4530
|
+
}
|
3820
4531
|
}
|
3821
4532
|
|
3822
4533
|
static void mr_commit_i(IndexReader *ir)
|
@@ -3825,7 +4536,7 @@ static void mr_commit_i(IndexReader *ir)
|
|
3825
4536
|
const int mr_reader_cnt = MR(ir)->r_cnt;
|
3826
4537
|
for (i = 0; i < mr_reader_cnt; i++) {
|
3827
4538
|
IndexReader *reader = MR(ir)->sub_readers[i];
|
3828
|
-
|
4539
|
+
ir_commit_i(reader);
|
3829
4540
|
}
|
3830
4541
|
}
|
3831
4542
|
|
@@ -3887,6 +4598,7 @@ static IndexReader *mr_new(IndexReader **sub_readers, const int r_cnt)
|
|
3887
4598
|
ir->set_norm_i = &mr_set_norm_i;
|
3888
4599
|
ir->delete_doc_i = &mr_delete_doc_i;
|
3889
4600
|
ir->undelete_all_i = &mr_undelete_all_i;
|
4601
|
+
ir->set_deleter_i = &mr_set_deleter_i;
|
3890
4602
|
ir->commit_i = &mr_commit_i;
|
3891
4603
|
ir->close_i = &mr_close_i;
|
3892
4604
|
|
@@ -3980,33 +4692,65 @@ IndexReader *mr_open(IndexReader **sub_readers, const int r_cnt)
|
|
3980
4692
|
* IndexReader
|
3981
4693
|
****************************************************************************/
|
3982
4694
|
|
4695
|
+
|
4696
|
+
static void ir_open_i(Store *store, FindSegmentsFile *fsf)
|
4697
|
+
{
|
4698
|
+
volatile bool success = false;
|
4699
|
+
IndexReader *ir = NULL;
|
4700
|
+
SegmentInfos *sis = NULL;
|
4701
|
+
TRY
|
4702
|
+
do {
|
4703
|
+
FieldInfos *fis;
|
4704
|
+
|
4705
|
+
mutex_lock(&store->mutex);
|
4706
|
+
sis_read_i(store, fsf);
|
4707
|
+
sis = fsf->p_return;
|
4708
|
+
fis = sis->fis;
|
4709
|
+
|
4710
|
+
if (sis->size == 1) {
|
4711
|
+
ir = sr_open(sis, fis, 0, true);
|
4712
|
+
}
|
4713
|
+
else {
|
4714
|
+
int i;
|
4715
|
+
IndexReader **readers = ALLOC_N(IndexReader *, sis->size);
|
4716
|
+
int num_segments = sis->size;
|
4717
|
+
for (i = num_segments - 1; i >= 0; i--) {
|
4718
|
+
TRY
|
4719
|
+
readers[i] = sr_open(sis, fis, i, false);
|
4720
|
+
XCATCHALL
|
4721
|
+
for (i++; i < num_segments; i++) {
|
4722
|
+
ir_close(readers[i]);
|
4723
|
+
}
|
4724
|
+
free(readers);
|
4725
|
+
XENDTRY
|
4726
|
+
}
|
4727
|
+
ir = mr_open_i(store, sis, fis, readers, sis->size);
|
4728
|
+
}
|
4729
|
+
fsf->p_return = ir;
|
4730
|
+
success = true;
|
4731
|
+
} while (0);
|
4732
|
+
XFINALLY
|
4733
|
+
if (!success) {
|
4734
|
+
if (ir) {
|
4735
|
+
ir_close(ir);
|
4736
|
+
}
|
4737
|
+
else if (sis) {
|
4738
|
+
sis_destroy(sis);
|
4739
|
+
}
|
4740
|
+
}
|
4741
|
+
mutex_unlock(&store->mutex);
|
4742
|
+
XENDTRY
|
4743
|
+
}
|
4744
|
+
|
3983
4745
|
/**
|
3984
4746
|
* Will keep a reference to the store. To let this method delete the store
|
3985
4747
|
* make sure you deref the store that you pass to it
|
3986
4748
|
*/
|
3987
4749
|
IndexReader *ir_open(Store *store)
|
3988
4750
|
{
|
3989
|
-
|
3990
|
-
|
3991
|
-
|
3992
|
-
FieldInfos *fis;
|
3993
|
-
|
3994
|
-
mutex_lock(&store->mutex);
|
3995
|
-
sis = sis_read(store);
|
3996
|
-
fis = fis_read(store);
|
3997
|
-
if (sis->size == 1) {
|
3998
|
-
ir = sr_open(sis, fis, 0, true);
|
3999
|
-
}
|
4000
|
-
else {
|
4001
|
-
IndexReader **readers = ALLOC_N(IndexReader *, sis->size);
|
4002
|
-
for (i = sis->size; i > 0;) {
|
4003
|
-
i--;
|
4004
|
-
readers[i] = sr_open(sis, fis, i, false);
|
4005
|
-
}
|
4006
|
-
ir = mr_open_i(store, sis, fis, readers, sis->size);
|
4007
|
-
}
|
4008
|
-
mutex_unlock(&store->mutex);
|
4009
|
-
return ir;
|
4751
|
+
FindSegmentsFile fsf;
|
4752
|
+
sis_find_segments_file(store, &fsf, &ir_open_i);
|
4753
|
+
return (IndexReader *)fsf.p_return;
|
4010
4754
|
}
|
4011
4755
|
|
4012
4756
|
/****************************************************************************
|
@@ -4126,8 +4870,8 @@ typedef struct SkipBuffer
|
|
4126
4870
|
OutStream *frq_out;
|
4127
4871
|
OutStream *prx_out;
|
4128
4872
|
int last_doc;
|
4129
|
-
|
4130
|
-
|
4873
|
+
off_t last_frq_ptr;
|
4874
|
+
off_t last_prx_ptr;
|
4131
4875
|
} SkipBuffer;
|
4132
4876
|
|
4133
4877
|
static void skip_buf_reset(SkipBuffer *skip_buf)
|
@@ -4149,8 +4893,8 @@ static SkipBuffer *skip_buf_new(OutStream *frq_out, OutStream *prx_out)
|
|
4149
4893
|
|
4150
4894
|
static void skip_buf_add(SkipBuffer *skip_buf, int doc)
|
4151
4895
|
{
|
4152
|
-
|
4153
|
-
|
4896
|
+
off_t frq_ptr = os_pos(skip_buf->frq_out);
|
4897
|
+
off_t prx_ptr = os_pos(skip_buf->prx_out);
|
4154
4898
|
|
4155
4899
|
os_write_vint(skip_buf->buf, doc - skip_buf->last_doc);
|
4156
4900
|
os_write_vint(skip_buf->buf, frq_ptr - skip_buf->last_frq_ptr);
|
@@ -4161,9 +4905,9 @@ static void skip_buf_add(SkipBuffer *skip_buf, int doc)
|
|
4161
4905
|
skip_buf->last_prx_ptr = prx_ptr;
|
4162
4906
|
}
|
4163
4907
|
|
4164
|
-
static
|
4908
|
+
static off_t skip_buf_write(SkipBuffer *skip_buf)
|
4165
4909
|
{
|
4166
|
-
|
4910
|
+
off_t skip_ptr = os_pos(skip_buf->frq_out);
|
4167
4911
|
ramo_write_to(skip_buf->buf, skip_buf->frq_out);
|
4168
4912
|
return skip_ptr;
|
4169
4913
|
}
|
@@ -4184,7 +4928,8 @@ static void dw_write_norms(DocWriter *dw, FieldInverter *fld_inv)
|
|
4184
4928
|
{
|
4185
4929
|
char file_name[SEGMENT_NAME_MAX_LENGTH];
|
4186
4930
|
OutStream *norms_out;
|
4187
|
-
|
4931
|
+
si_advance_norm_gen(dw->si, fld_inv->fi->number);
|
4932
|
+
si_norm_file_name(dw->si, file_name, fld_inv->fi->number);
|
4188
4933
|
norms_out = dw->store->new_output(dw->store, file_name);
|
4189
4934
|
os_write_bytes(norms_out, fld_inv->norms, dw->doc_num);
|
4190
4935
|
os_close(norms_out);
|
@@ -4232,23 +4977,23 @@ static void dw_flush(DocWriter *dw)
|
|
4232
4977
|
Posting *p;
|
4233
4978
|
Occurence *occ;
|
4234
4979
|
Store *store = dw->store;
|
4235
|
-
TermInfosWriter *tiw = tiw_open(store, dw->
|
4980
|
+
TermInfosWriter *tiw = tiw_open(store, dw->si->name,
|
4236
4981
|
dw->index_interval, skip_interval);
|
4237
4982
|
TermInfo ti;
|
4238
4983
|
char file_name[SEGMENT_NAME_MAX_LENGTH];
|
4239
4984
|
OutStream *frq_out, *prx_out;
|
4240
4985
|
SkipBuffer *skip_buf;
|
4241
4986
|
|
4242
|
-
sprintf(file_name, "%s.frq", dw->
|
4987
|
+
sprintf(file_name, "%s.frq", dw->si->name);
|
4243
4988
|
frq_out = store->new_output(store, file_name);
|
4244
|
-
sprintf(file_name, "%s.prx", dw->
|
4989
|
+
sprintf(file_name, "%s.prx", dw->si->name);
|
4245
4990
|
prx_out = store->new_output(store, file_name);
|
4246
4991
|
skip_buf = skip_buf_new(frq_out, prx_out);
|
4247
4992
|
|
4248
4993
|
for (i = 0; i < fields_count; i++) {
|
4249
4994
|
fi = fis->fields[i];
|
4250
4995
|
if (!fi_is_indexed(fi)
|
4251
|
-
|| (fld_inv = h_get_int(dw->fields, fi->number))
|
4996
|
+
|| NULL == (fld_inv = h_get_int(dw->fields, fi->number))) {
|
4252
4997
|
continue;
|
4253
4998
|
}
|
4254
4999
|
if (!fi_omit_norms(fi)) {
|
@@ -4265,9 +5010,9 @@ static void dw_flush(DocWriter *dw)
|
|
4265
5010
|
last_doc = 0;
|
4266
5011
|
doc_freq = 0;
|
4267
5012
|
skip_buf_reset(skip_buf);
|
4268
|
-
for (p = pl->first;
|
5013
|
+
for (p = pl->first; NULL != p; p = p->next) {
|
4269
5014
|
doc_freq++;
|
4270
|
-
if ((doc_freq % dw->skip_interval)
|
5015
|
+
if (0 == (doc_freq % dw->skip_interval)) {
|
4271
5016
|
skip_buf_add(skip_buf, last_doc);
|
4272
5017
|
}
|
4273
5018
|
|
@@ -4283,7 +5028,7 @@ static void dw_flush(DocWriter *dw)
|
|
4283
5028
|
}
|
4284
5029
|
|
4285
5030
|
last_pos = 0;
|
4286
|
-
for (occ = p->first_occ;
|
5031
|
+
for (occ = p->first_occ; NULL != occ; occ = occ->next) {
|
4287
5032
|
os_write_vint(prx_out, occ->pos - last_pos);
|
4288
5033
|
last_pos = occ->pos;
|
4289
5034
|
}
|
@@ -4300,7 +5045,7 @@ static void dw_flush(DocWriter *dw)
|
|
4300
5045
|
dw_flush_streams(dw);
|
4301
5046
|
}
|
4302
5047
|
|
4303
|
-
DocWriter *dw_open(IndexWriter *iw,
|
5048
|
+
DocWriter *dw_open(IndexWriter *iw, SegmentInfo *si)
|
4304
5049
|
{
|
4305
5050
|
Store *store = iw->store;
|
4306
5051
|
MemoryPool *mp = mp_new_capa(iw->config.chunk_size,
|
@@ -4308,34 +5053,34 @@ DocWriter *dw_open(IndexWriter *iw, const char *segment)
|
|
4308
5053
|
|
4309
5054
|
DocWriter *dw = ALLOC(DocWriter);
|
4310
5055
|
|
4311
|
-
dw->mp
|
4312
|
-
dw->analyzer
|
4313
|
-
dw->fis
|
4314
|
-
dw->store
|
4315
|
-
dw->fw
|
4316
|
-
dw->
|
5056
|
+
dw->mp = mp;
|
5057
|
+
dw->analyzer = iw->analyzer;
|
5058
|
+
dw->fis = iw->fis;
|
5059
|
+
dw->store = store;
|
5060
|
+
dw->fw = fw_open(store, si->name, iw->fis);
|
5061
|
+
dw->si = si;
|
4317
5062
|
|
4318
5063
|
dw->curr_plists = h_new_str(NULL, NULL);
|
4319
|
-
dw->fields
|
4320
|
-
dw->doc_num
|
5064
|
+
dw->fields = h_new_int((free_ft)fld_inv_destroy);
|
5065
|
+
dw->doc_num = 0;
|
4321
5066
|
|
4322
|
-
dw->index_interval
|
4323
|
-
dw->skip_interval
|
4324
|
-
dw->max_field_length
|
4325
|
-
dw->max_buffered_docs
|
5067
|
+
dw->index_interval = iw->config.index_interval;
|
5068
|
+
dw->skip_interval = iw->config.skip_interval;
|
5069
|
+
dw->max_field_length = iw->config.max_field_length;
|
5070
|
+
dw->max_buffered_docs = iw->config.max_buffered_docs;
|
4326
5071
|
|
4327
|
-
dw->offsets
|
4328
|
-
dw->offsets_size
|
4329
|
-
dw->offsets_capa
|
5072
|
+
dw->offsets = ALLOC_AND_ZERO_N(Offset, DW_OFFSET_INIT_CAPA);
|
5073
|
+
dw->offsets_size = 0;
|
5074
|
+
dw->offsets_capa = DW_OFFSET_INIT_CAPA;
|
4330
5075
|
|
4331
|
-
dw->similarity
|
5076
|
+
dw->similarity = iw->similarity;
|
4332
5077
|
return dw;
|
4333
5078
|
}
|
4334
5079
|
|
4335
|
-
void dw_new_segment(DocWriter *dw,
|
5080
|
+
void dw_new_segment(DocWriter *dw, SegmentInfo *si)
|
4336
5081
|
{
|
4337
|
-
dw->fw = fw_open(dw->store,
|
4338
|
-
dw->
|
5082
|
+
dw->fw = fw_open(dw->store, si->name, dw->fis);
|
5083
|
+
dw->si = si;
|
4339
5084
|
}
|
4340
5085
|
|
4341
5086
|
void dw_close(DocWriter *dw)
|
@@ -4536,7 +5281,7 @@ typedef struct SegmentMergeInfo {
|
|
4536
5281
|
int base;
|
4537
5282
|
int max_doc;
|
4538
5283
|
int doc_cnt;
|
4539
|
-
|
5284
|
+
SegmentInfo *si;
|
4540
5285
|
Store *store;
|
4541
5286
|
Store *orig_store;
|
4542
5287
|
BitVector *deleted_docs;
|
@@ -4552,7 +5297,7 @@ typedef struct SegmentMergeInfo {
|
|
4552
5297
|
static bool smi_lt(const SegmentMergeInfo *smi1, const SegmentMergeInfo *smi2)
|
4553
5298
|
{
|
4554
5299
|
int cmpres = strcmp(smi1->term, smi2->term);
|
4555
|
-
if (
|
5300
|
+
if (0 == cmpres) {
|
4556
5301
|
return smi1->base < smi2->base;
|
4557
5302
|
}
|
4558
5303
|
else {
|
@@ -4578,12 +5323,13 @@ static void smi_load_doc_map(SegmentMergeInfo *smi)
|
|
4578
5323
|
smi->doc_cnt = j;
|
4579
5324
|
}
|
4580
5325
|
|
4581
|
-
static SegmentMergeInfo *smi_new(int base, Store *store,
|
5326
|
+
static SegmentMergeInfo *smi_new(int base, Store *store, SegmentInfo *si)
|
4582
5327
|
{
|
4583
5328
|
SegmentMergeInfo *smi = ALLOC_AND_ZERO(SegmentMergeInfo);
|
4584
5329
|
char file_name[SEGMENT_NAME_MAX_LENGTH];
|
5330
|
+
char *segment = si->name;
|
4585
5331
|
smi->base = base;
|
4586
|
-
smi->
|
5332
|
+
smi->si = si;
|
4587
5333
|
smi->orig_store = smi->store = store;
|
4588
5334
|
sprintf(file_name, "%s.cfs", segment);
|
4589
5335
|
if (store->exists(store, file_name)) {
|
@@ -4595,8 +5341,8 @@ static SegmentMergeInfo *smi_new(int base, Store *store, char *segment)
|
|
4595
5341
|
smi->doc_cnt = smi->max_doc
|
4596
5342
|
= smi->store->length(smi->store, file_name) / FIELDS_IDX_PTR_SIZE;
|
4597
5343
|
|
4598
|
-
|
4599
|
-
|
5344
|
+
if (si->del_gen >= 0) {
|
5345
|
+
fn_for_generation(file_name, segment, "del", si->del_gen);
|
4600
5346
|
smi->deleted_docs = bv_read(store, file_name);
|
4601
5347
|
smi_load_doc_map(smi);
|
4602
5348
|
}
|
@@ -4606,13 +5352,14 @@ static SegmentMergeInfo *smi_new(int base, Store *store, char *segment)
|
|
4606
5352
|
static void smi_load_term_input(SegmentMergeInfo *smi)
|
4607
5353
|
{
|
4608
5354
|
Store *store = smi->store;
|
5355
|
+
char *segment = smi->si->name;
|
4609
5356
|
char file_name[SEGMENT_NAME_MAX_LENGTH];
|
4610
|
-
smi->sfi = sfi_open(store,
|
4611
|
-
sprintf(file_name, "%s.tis",
|
5357
|
+
smi->sfi = sfi_open(store, segment);
|
5358
|
+
sprintf(file_name, "%s.tis", segment);
|
4612
5359
|
smi->te = TE(ste_new(store->open_input(store, file_name), smi->sfi));
|
4613
|
-
sprintf(file_name, "%s.frq",
|
5360
|
+
sprintf(file_name, "%s.frq", segment);
|
4614
5361
|
smi->frq_in = store->open_input(store, file_name);
|
4615
|
-
sprintf(file_name, "%s.prx",
|
5362
|
+
sprintf(file_name, "%s.prx", segment);
|
4616
5363
|
smi->prx_in = store->open_input(store, file_name);
|
4617
5364
|
smi->tde = stpe_new(NULL, smi->frq_in, smi->prx_in, smi->deleted_docs,
|
4618
5365
|
STE(smi->te)->skip_interval);
|
@@ -4652,7 +5399,7 @@ typedef struct SegmentMerger {
|
|
4652
5399
|
TermInfo ti;
|
4653
5400
|
Store *store;
|
4654
5401
|
FieldInfos *fis;
|
4655
|
-
|
5402
|
+
SegmentInfo *si;
|
4656
5403
|
SegmentMergeInfo **smis;
|
4657
5404
|
int seg_cnt;
|
4658
5405
|
int doc_cnt;
|
@@ -4667,19 +5414,19 @@ typedef struct SegmentMerger {
|
|
4667
5414
|
OutStream *prx_out;
|
4668
5415
|
} SegmentMerger;
|
4669
5416
|
|
4670
|
-
static SegmentMerger *sm_create(IndexWriter *iw,
|
5417
|
+
static SegmentMerger *sm_create(IndexWriter *iw, SegmentInfo *si,
|
4671
5418
|
SegmentInfo **seg_infos, const int seg_cnt)
|
4672
5419
|
{
|
4673
5420
|
int i;
|
4674
5421
|
SegmentMerger *sm = ALLOC_AND_ZERO_N(SegmentMerger, seg_cnt);
|
4675
5422
|
sm->store = iw->store;
|
4676
5423
|
sm->fis = iw->fis;
|
4677
|
-
sm->
|
5424
|
+
sm->si = si;
|
4678
5425
|
sm->doc_cnt = 0;
|
4679
5426
|
sm->smis = ALLOC_N(SegmentMergeInfo *, seg_cnt);
|
4680
5427
|
for (i = 0; i < seg_cnt; i++) {
|
4681
5428
|
sm->smis[i] = smi_new(sm->doc_cnt, seg_infos[i]->store,
|
4682
|
-
seg_infos[i]
|
5429
|
+
seg_infos[i]);
|
4683
5430
|
sm->doc_cnt += sm->smis[i]->doc_cnt;
|
4684
5431
|
}
|
4685
5432
|
sm->seg_cnt = seg_cnt;
|
@@ -4695,7 +5442,6 @@ static void sm_destroy(SegmentMerger *sm)
|
|
4695
5442
|
smi_destroy(sm->smis[i]);
|
4696
5443
|
}
|
4697
5444
|
free(sm->smis);
|
4698
|
-
free(sm->segment);
|
4699
5445
|
free(sm);
|
4700
5446
|
}
|
4701
5447
|
|
@@ -4708,20 +5454,21 @@ static void sm_merge_fields(SegmentMerger *sm)
|
|
4708
5454
|
Store *store = sm->store;
|
4709
5455
|
const int seg_cnt = sm->seg_cnt;
|
4710
5456
|
|
4711
|
-
sprintf(file_name, "%s.fdt", sm->
|
5457
|
+
sprintf(file_name, "%s.fdt", sm->si->name);
|
4712
5458
|
fdt_out = store->new_output(store, file_name);
|
4713
5459
|
|
4714
|
-
sprintf(file_name, "%s.fdx", sm->
|
5460
|
+
sprintf(file_name, "%s.fdx", sm->si->name);
|
4715
5461
|
fdx_out = store->new_output(store, file_name);
|
4716
5462
|
|
4717
5463
|
for (i = 0; i < seg_cnt; i++) {
|
4718
5464
|
SegmentMergeInfo *smi = sm->smis[i];
|
4719
5465
|
const int max_doc = smi->max_doc;
|
4720
5466
|
InStream *fdt_in, *fdx_in;
|
5467
|
+
char *segment = smi->si->name;
|
4721
5468
|
store = smi->store;
|
4722
|
-
sprintf(file_name, "%s.fdt",
|
5469
|
+
sprintf(file_name, "%s.fdt", segment);
|
4723
5470
|
fdt_in = store->open_input(store, file_name);
|
4724
|
-
sprintf(file_name, "%s.fdx",
|
5471
|
+
sprintf(file_name, "%s.fdx", segment);
|
4725
5472
|
fdx_in = store->open_input(store, file_name);
|
4726
5473
|
|
4727
5474
|
if (max_doc > 0) {
|
@@ -4775,7 +5522,7 @@ static int sm_append_postings(SegmentMerger *sm, SegmentMergeInfo **matches,
|
|
4775
5522
|
* stde_next rather than stpe_next here */
|
4776
5523
|
while (stde_next(tde)) {
|
4777
5524
|
doc = stde_doc_num(tde);
|
4778
|
-
if (
|
5525
|
+
if (NULL != doc_map) {
|
4779
5526
|
doc = doc_map[doc]; /* work around deletions */
|
4780
5527
|
}
|
4781
5528
|
doc += base; /* convert to merged space */
|
@@ -4787,7 +5534,7 @@ static int sm_append_postings(SegmentMerger *sm, SegmentMergeInfo **matches,
|
|
4787
5534
|
#endif
|
4788
5535
|
df++;
|
4789
5536
|
|
4790
|
-
if ((df % skip_interval)
|
5537
|
+
if (0 == (df % skip_interval)) {
|
4791
5538
|
skip_buf_add(skip_buf, last_doc);
|
4792
5539
|
}
|
4793
5540
|
|
@@ -4823,12 +5570,12 @@ static char *sm_cache_term(SegmentMerger *sm, char *term, int term_len)
|
|
4823
5570
|
static void sm_merge_term_info(SegmentMerger *sm, SegmentMergeInfo **matches,
|
4824
5571
|
int match_size)
|
4825
5572
|
{
|
4826
|
-
|
4827
|
-
|
5573
|
+
off_t frq_ptr = os_pos(sm->frq_out);
|
5574
|
+
off_t prx_ptr = os_pos(sm->prx_out);
|
4828
5575
|
|
4829
5576
|
int df = sm_append_postings(sm, matches, match_size); /* append posting data */
|
4830
5577
|
|
4831
|
-
|
5578
|
+
off_t skip_ptr = skip_buf_write(sm->skip_buf);
|
4832
5579
|
|
4833
5580
|
if (df > 0) {
|
4834
5581
|
/* add an entry to the dictionary with ptrs to prox and freq files */
|
@@ -4861,7 +5608,7 @@ static void sm_merge_term_infos(SegmentMerger *sm)
|
|
4861
5608
|
for (j = 0; j < seg_cnt; j++) {
|
4862
5609
|
smi = sm->smis[j];
|
4863
5610
|
ste_set_field(smi->te, i);
|
4864
|
-
if (smi_next(smi)
|
5611
|
+
if (NULL != smi_next(smi)) {
|
4865
5612
|
pq_push(sm->queue, smi); /* initialize @queue */
|
4866
5613
|
}
|
4867
5614
|
}
|
@@ -4877,7 +5624,7 @@ static void sm_merge_term_infos(SegmentMerger *sm)
|
|
4877
5624
|
match_size++;
|
4878
5625
|
term = matches[0]->term;
|
4879
5626
|
top = pq_top(sm->queue);
|
4880
|
-
while ((
|
5627
|
+
while ((NULL != top) && (0 == strcmp(term, top->term))) {
|
4881
5628
|
matches[match_size] = pq_pop(sm->queue);
|
4882
5629
|
match_size++;
|
4883
5630
|
top = pq_top(sm->queue);
|
@@ -4889,7 +5636,7 @@ static void sm_merge_term_infos(SegmentMerger *sm)
|
|
4889
5636
|
while (match_size > 0) {
|
4890
5637
|
match_size--;
|
4891
5638
|
smi = matches[match_size];
|
4892
|
-
if (smi_next(smi)
|
5639
|
+
if (NULL != smi_next(smi)) {
|
4893
5640
|
pq_push(sm->queue, smi); /* restore queue */
|
4894
5641
|
}
|
4895
5642
|
}
|
@@ -4905,12 +5652,12 @@ static void sm_merge_terms(SegmentMerger *sm)
|
|
4905
5652
|
{
|
4906
5653
|
char file_name[SEGMENT_NAME_MAX_LENGTH];
|
4907
5654
|
|
4908
|
-
sprintf(file_name, "%s.frq", sm->
|
5655
|
+
sprintf(file_name, "%s.frq", sm->si->name);
|
4909
5656
|
sm->frq_out = sm->store->new_output(sm->store, file_name);
|
4910
|
-
sprintf(file_name, "%s.prx", sm->
|
5657
|
+
sprintf(file_name, "%s.prx", sm->si->name);
|
4911
5658
|
sm->prx_out = sm->store->new_output(sm->store, file_name);
|
4912
5659
|
|
4913
|
-
sm->tiw = tiw_open(sm->store, sm->
|
5660
|
+
sm->tiw = tiw_open(sm->store, sm->si->name, sm->config->index_interval,
|
4914
5661
|
sm->config->skip_interval);
|
4915
5662
|
sm->skip_buf = skip_buf_new(sm->frq_out, sm->prx_out);
|
4916
5663
|
|
@@ -4936,6 +5683,7 @@ static void sm_merge_terms(SegmentMerger *sm)
|
|
4936
5683
|
|
4937
5684
|
static void sm_merge_norms(SegmentMerger *sm)
|
4938
5685
|
{
|
5686
|
+
SegmentInfo *si;
|
4939
5687
|
int i, j, k;
|
4940
5688
|
Store *store;
|
4941
5689
|
uchar byte;
|
@@ -4945,23 +5693,21 @@ static void sm_merge_norms(SegmentMerger *sm)
|
|
4945
5693
|
char file_name[SEGMENT_NAME_MAX_LENGTH];
|
4946
5694
|
SegmentMergeInfo *smi;
|
4947
5695
|
const int seg_cnt = sm->seg_cnt;
|
4948
|
-
|
4949
|
-
for (i = 0; i < fis_size; i++) {
|
5696
|
+
for (i = sm->fis->size - 1; i >= 0; i--) {
|
4950
5697
|
fi = sm->fis->fields[i];
|
4951
5698
|
if (fi_has_norms(fi)) {
|
4952
|
-
|
5699
|
+
si = sm->si;
|
5700
|
+
si_advance_norm_gen(si, i);
|
5701
|
+
si_norm_file_name(si, file_name, i);
|
4953
5702
|
os = sm->store->new_output(sm->store, file_name);
|
4954
5703
|
for (j = 0; j < seg_cnt; j++) {
|
4955
5704
|
smi = sm->smis[j];
|
4956
|
-
|
4957
|
-
|
4958
|
-
if (!store->exists(store, file_name)) {
|
4959
|
-
sprintf(file_name, "%s.f%d", smi->segment, i);
|
4960
|
-
store = smi->store;
|
4961
|
-
}
|
4962
|
-
if (store->exists(store, file_name)) {
|
5705
|
+
si = smi->si;
|
5706
|
+
if (si_norm_file_name(si, file_name, i)) {
|
4963
5707
|
const int max_doc = smi->max_doc;
|
4964
5708
|
BitVector *deleted_docs = smi->deleted_docs;
|
5709
|
+
store = (si->use_compound_file && si->norm_gens[i])
|
5710
|
+
? smi->orig_store : smi->store;
|
4965
5711
|
is = store->open_input(store, file_name);
|
4966
5712
|
if (deleted_docs) {
|
4967
5713
|
for (k = 0; k < max_doc; k++) {
|
@@ -5004,11 +5750,18 @@ static int sm_merge(SegmentMerger *sm)
|
|
5004
5750
|
/* prepare an index ready for writing */
|
5005
5751
|
void index_create(Store *store, FieldInfos *fis)
|
5006
5752
|
{
|
5007
|
-
SegmentInfos *sis = sis_new();
|
5753
|
+
SegmentInfos *sis = sis_new(fis);
|
5008
5754
|
store->clear_all(store);
|
5009
|
-
sis_write(sis, store);
|
5755
|
+
sis_write(sis, store, NULL);
|
5010
5756
|
sis_destroy(sis);
|
5011
|
-
|
5757
|
+
}
|
5758
|
+
|
5759
|
+
bool index_is_locked(Store *store)
|
5760
|
+
{
|
5761
|
+
Lock *write_lock = open_lock(store, WRITE_LOCK_NAME);
|
5762
|
+
bool is_locked = write_lock->is_locked(write_lock);
|
5763
|
+
close_lock(write_lock);
|
5764
|
+
return is_locked;
|
5012
5765
|
}
|
5013
5766
|
|
5014
5767
|
int iw_doc_count(IndexWriter *iw)
|
@@ -5025,158 +5778,79 @@ int iw_doc_count(IndexWriter *iw)
|
|
5025
5778
|
return doc_cnt;
|
5026
5779
|
}
|
5027
5780
|
|
5028
|
-
|
5781
|
+
#define MOVE_TO_COMPOUND_DIR(file_name)\
|
5782
|
+
deleter_queue_file(dlr, file_name);\
|
5783
|
+
cw_add_file(cw, file_name)
|
5784
|
+
|
5785
|
+
static void iw_create_compound_file(Store *store, FieldInfos *fis,
|
5786
|
+
SegmentInfo *si, char *cfs_file_name,
|
5787
|
+
Deleter *dlr)
|
5029
5788
|
{
|
5030
5789
|
int i;
|
5031
|
-
for (i = ary_size(file_names) - 1; i >= 0; i--) {
|
5032
|
-
store->remove(store, file_names[i]);
|
5033
|
-
}
|
5034
|
-
ary_destroy((void **)file_names, &free);
|
5035
|
-
}
|
5036
|
-
|
5037
|
-
static char **iw_create_compound_file(Store *store, FieldInfos *fis,
|
5038
|
-
char *segment, char *cfs_file_name)
|
5039
|
-
{
|
5040
|
-
char **file_names = (char **)ary_new_capa(16);
|
5041
5790
|
CompoundWriter *cw;
|
5042
|
-
FieldInfo *fi;
|
5043
|
-
int i;
|
5044
5791
|
char file_name[SEGMENT_NAME_MAX_LENGTH];
|
5045
|
-
|
5046
|
-
int
|
5792
|
+
char *ext;
|
5793
|
+
int seg_len = strlen(si->name);
|
5794
|
+
|
5795
|
+
memcpy(file_name, si->name, seg_len);
|
5796
|
+
file_name[seg_len] = '.';
|
5797
|
+
ext = file_name + seg_len + 1;
|
5047
5798
|
|
5048
5799
|
cw = open_cw(store, cfs_file_name);
|
5049
5800
|
for (i = 0; i < NELEMS(COMPOUND_EXTENSIONS); i++) {
|
5050
|
-
|
5051
|
-
|
5052
|
-
ary_push(file_names, estrdup(file_name));
|
5801
|
+
memcpy(ext, COMPOUND_EXTENSIONS[i], 4);
|
5802
|
+
MOVE_TO_COMPOUND_DIR(file_name);
|
5053
5803
|
}
|
5054
5804
|
|
5055
5805
|
/* Field norm file_names */
|
5056
|
-
for (i =
|
5057
|
-
|
5058
|
-
|
5059
|
-
|
5060
|
-
if (!store->exists(store, file_name)) {
|
5061
|
-
continue;
|
5062
|
-
}
|
5063
|
-
ary_push(file_names, estrdup(file_name));
|
5806
|
+
for (i = fis->size - 1; i >= 0; i--) {
|
5807
|
+
if (fi_has_norms(fis->fields[i])
|
5808
|
+
&& si_norm_file_name(si, file_name, i)) {
|
5809
|
+
MOVE_TO_COMPOUND_DIR(file_name);
|
5064
5810
|
}
|
5065
5811
|
}
|
5066
5812
|
|
5067
|
-
/* Now merge all added file_names */
|
5068
|
-
file_names_size = ary_size(file_names);
|
5069
|
-
for (i = 0; i < file_names_size; i++) {
|
5070
|
-
cw_add_file(cw, file_names[i]);
|
5071
|
-
}
|
5072
|
-
|
5073
5813
|
/* Perform the merge */
|
5074
5814
|
cw_close(cw);
|
5075
|
-
|
5076
|
-
return file_names;
|
5077
5815
|
}
|
5078
5816
|
|
5079
|
-
static void iw_commit_compound_file(IndexWriter *iw,
|
5080
|
-
Lock *commit_lock)
|
5817
|
+
static void iw_commit_compound_file(IndexWriter *iw, SegmentInfo *si)
|
5081
5818
|
{
|
5082
|
-
char tmp_name[SEGMENT_NAME_MAX_LENGTH];
|
5083
5819
|
char cfs_name[SEGMENT_NAME_MAX_LENGTH];
|
5084
|
-
|
5085
|
-
sprintf(tmp_name, "%s.tmp", segment);
|
5086
|
-
sprintf(cfs_name, "%s.cfs", segment);
|
5087
|
-
|
5088
|
-
files_to_delete =
|
5089
|
-
iw_create_compound_file(iw->store, iw->fis, segment, tmp_name);
|
5090
|
-
if (!commit_lock->obtain(commit_lock)) {
|
5091
|
-
RAISE(LOCK_ERROR,
|
5092
|
-
"Couldn't obtain commit lock to write compound file");
|
5093
|
-
}
|
5820
|
+
sprintf(cfs_name, "%s.cfs", si->name);
|
5094
5821
|
|
5095
|
-
|
5096
|
-
|
5097
|
-
|
5098
|
-
commit_lock->release(commit_lock);
|
5099
|
-
}
|
5100
|
-
|
5101
|
-
#define ADD_IF_EXISTS_FMT(fmt, ext) do {\
|
5102
|
-
sprintf(file_name, fmt, segment, ext);\
|
5103
|
-
if (store->exists(store, file_name)) {\
|
5104
|
-
ary_push(file_names, estrdup(file_name));\
|
5105
|
-
}\
|
5106
|
-
} while (0)
|
5107
|
-
|
5108
|
-
#define ADD_IF_EXISTS(ext) ADD_IF_EXISTS_FMT("%s.%s", ext)
|
5109
|
-
|
5110
|
-
static char **iw_seg_file_names(FieldInfos *fis, Store *store, char *segment)
|
5111
|
-
{
|
5112
|
-
char **file_names = (char **)ary_new_capa(16);
|
5113
|
-
int i;
|
5114
|
-
char file_name[SEGMENT_NAME_MAX_LENGTH];
|
5115
|
-
const int fis_size = fis->size;
|
5116
|
-
|
5117
|
-
|
5118
|
-
sprintf(file_name, "%s.cfs", segment);
|
5119
|
-
if (store->exists(store, file_name)) {
|
5120
|
-
ary_push(file_names, estrdup(file_name));
|
5121
|
-
ADD_IF_EXISTS("del");
|
5122
|
-
for (i = 0; i < fis_size; i++) {
|
5123
|
-
if (fi_has_norms(fis->fields[i])) {
|
5124
|
-
ADD_IF_EXISTS_FMT("%s.s%d", i);
|
5125
|
-
}
|
5126
|
-
}
|
5127
|
-
}
|
5128
|
-
else {
|
5129
|
-
for (i = 0; i < NELEMS(INDEX_EXTENSIONS); i++) {
|
5130
|
-
ADD_IF_EXISTS(INDEX_EXTENSIONS[i]);
|
5131
|
-
}
|
5132
|
-
for (i = 0; i < fis_size; i++) {
|
5133
|
-
if (fi_has_norms(fis->fields[i])) {
|
5134
|
-
ADD_IF_EXISTS_FMT("%s.f%d", i);
|
5135
|
-
}
|
5136
|
-
}
|
5137
|
-
}
|
5138
|
-
return file_names;
|
5822
|
+
iw_create_compound_file(iw->store, iw->fis, si, cfs_name, iw->deleter);
|
5823
|
+
deleter_commit_pending_deletions(iw->deleter);
|
5139
5824
|
}
|
5140
5825
|
|
5141
5826
|
static void iw_merge_segments(IndexWriter *iw, const int min_seg,
|
5142
5827
|
const int max_seg)
|
5143
5828
|
{
|
5144
5829
|
int i;
|
5145
|
-
Lock *commit_lock;
|
5146
5830
|
SegmentInfos *sis = iw->sis;
|
5147
5831
|
SegmentInfo *si = sis_new_segment(sis, 0, iw->store);
|
5148
5832
|
|
5149
|
-
SegmentMerger *merger = sm_create(iw, si
|
5833
|
+
SegmentMerger *merger = sm_create(iw, si, &sis->segs[min_seg],
|
5150
5834
|
max_seg - min_seg);
|
5151
5835
|
|
5152
5836
|
/* This is where all the action happens. */
|
5153
5837
|
si->doc_cnt = sm_merge(merger);
|
5154
5838
|
|
5155
5839
|
mutex_lock(&iw->store->mutex);
|
5156
|
-
commit_lock = open_lock(iw->store, COMMIT_LOCK_NAME);
|
5157
|
-
|
5158
|
-
/* *** OBTAIN COMMIT LOCK *** */
|
5159
|
-
if (!commit_lock->obtain(commit_lock)) {
|
5160
|
-
RAISE(LOCK_ERROR, "Couldn't obtain commit lock to commit merged segment "
|
5161
|
-
"%s", si->name);
|
5162
|
-
}
|
5163
5840
|
/* delete merged segments */
|
5164
5841
|
for (i = min_seg; i < max_seg; i++) {
|
5165
|
-
|
5166
|
-
iw_seg_file_names(iw->fis, sis->segs[i]->store, sis->segs[i]->name),
|
5167
|
-
iw->store);
|
5842
|
+
si_delete_files(sis->segs[i], iw->fis, iw->deleter);
|
5168
5843
|
}
|
5844
|
+
deleter_commit_pending_deletions(iw->deleter);
|
5845
|
+
|
5169
5846
|
sis_del_from_to(sis, min_seg, max_seg);
|
5170
|
-
/* commit the segments file */
|
5171
|
-
sis_write(sis, iw->store);
|
5172
|
-
commit_lock->release(commit_lock);
|
5173
|
-
/* RELEASE COMMIT LOCK */
|
5174
5847
|
|
5175
5848
|
if (iw->config.use_compound_file) {
|
5176
|
-
iw_commit_compound_file(iw, si
|
5849
|
+
iw_commit_compound_file(iw, si);
|
5850
|
+
si->use_compound_file = true;
|
5177
5851
|
}
|
5178
5852
|
|
5179
|
-
|
5853
|
+
sis_write(sis, iw->store, iw->deleter);
|
5180
5854
|
|
5181
5855
|
mutex_unlock(&iw->store->mutex);
|
5182
5856
|
|
@@ -5223,28 +5897,20 @@ static void iw_flush_ram_segment(IndexWriter *iw)
|
|
5223
5897
|
{
|
5224
5898
|
SegmentInfos *sis = iw->sis;
|
5225
5899
|
SegmentInfo *si;
|
5226
|
-
Lock *commit_lock;
|
5227
5900
|
|
5228
5901
|
si = sis->segs[sis->size - 1];
|
5229
5902
|
si->doc_cnt = iw->dw->doc_num;
|
5230
5903
|
dw_flush(iw->dw);
|
5231
5904
|
|
5232
5905
|
mutex_lock(&iw->store->mutex);
|
5233
|
-
commit_lock = open_lock(iw->store, COMMIT_LOCK_NAME);
|
5234
5906
|
|
5235
|
-
if (
|
5236
|
-
|
5907
|
+
if (iw->config.use_compound_file) {
|
5908
|
+
iw_commit_compound_file(iw, si);
|
5909
|
+
si->use_compound_file = true;
|
5237
5910
|
}
|
5238
5911
|
/* commit the segments file and the fields file */
|
5239
|
-
|
5240
|
-
sis_write(iw->sis, iw->store);
|
5241
|
-
commit_lock->release(commit_lock);
|
5912
|
+
sis_write(iw->sis, iw->store, iw->deleter);
|
5242
5913
|
|
5243
|
-
|
5244
|
-
if (iw->config.use_compound_file) {
|
5245
|
-
iw_commit_compound_file(iw, si->name, commit_lock);
|
5246
|
-
}
|
5247
|
-
close_lock(commit_lock);
|
5248
5914
|
mutex_unlock(&iw->store->mutex);
|
5249
5915
|
|
5250
5916
|
iw_maybe_merge_segments(iw);
|
@@ -5253,11 +5919,11 @@ static void iw_flush_ram_segment(IndexWriter *iw)
|
|
5253
5919
|
void iw_add_doc(IndexWriter *iw, Document *doc)
|
5254
5920
|
{
|
5255
5921
|
mutex_lock(&iw->mutex);
|
5256
|
-
if (
|
5257
|
-
iw->dw = dw_open(iw, sis_new_segment(iw->sis, 0, iw->store)
|
5922
|
+
if (NULL == iw->dw) {
|
5923
|
+
iw->dw = dw_open(iw, sis_new_segment(iw->sis, 0, iw->store));
|
5258
5924
|
}
|
5259
|
-
else if (iw->dw->fw
|
5260
|
-
dw_new_segment(iw->dw, sis_new_segment(iw->sis, 0, iw->store)
|
5925
|
+
else if (NULL == iw->dw->fw) {
|
5926
|
+
dw_new_segment(iw->dw, sis_new_segment(iw->sis, 0, iw->store));
|
5261
5927
|
}
|
5262
5928
|
dw_add_doc(iw->dw, doc);
|
5263
5929
|
if (mp_used(iw->dw->mp) > iw->config.max_buffer_memory
|
@@ -5291,17 +5957,25 @@ void iw_delete_term(IndexWriter *iw, const char *field, const char *term)
|
|
5291
5957
|
do {
|
5292
5958
|
SegmentInfos *sis = iw->sis;
|
5293
5959
|
const int seg_cnt = sis->size;
|
5960
|
+
bool did_delete = false;
|
5294
5961
|
for (i = 0; i < seg_cnt; i++) {
|
5295
5962
|
IndexReader *ir = sr_open(sis, iw->fis, i, false);
|
5296
5963
|
TermDocEnum *tde = ir->term_docs(ir);
|
5964
|
+
ir->deleter = iw->deleter;
|
5297
5965
|
stde_seek(tde, field_num, term);
|
5298
5966
|
while (tde->next(tde)) {
|
5967
|
+
did_delete = true;
|
5299
5968
|
sr_delete_doc_i(ir, STDE(tde)->doc_num);
|
5300
5969
|
}
|
5301
5970
|
tde_destroy(tde);
|
5302
5971
|
sr_commit_i(ir);
|
5303
5972
|
ir_close(ir);
|
5304
5973
|
}
|
5974
|
+
if (did_delete) {
|
5975
|
+
mutex_lock(&iw->store->mutex);
|
5976
|
+
sis_write(iw->sis, iw->store, iw->deleter);
|
5977
|
+
mutex_unlock(&iw->store->mutex);
|
5978
|
+
}
|
5305
5979
|
} while (0);
|
5306
5980
|
mutex_unlock(&iw->mutex);
|
5307
5981
|
}
|
@@ -5316,7 +5990,7 @@ static void iw_optimize_i(IndexWriter *iw)
|
|
5316
5990
|
&& (si_has_deletions(iw->sis->segs[0])
|
5317
5991
|
|| (iw->sis->segs[0]->store != iw->store)
|
5318
5992
|
|| (iw->config.use_compound_file
|
5319
|
-
&& (!
|
5993
|
+
&& (!iw->sis->segs[0]->use_compound_file
|
5320
5994
|
|| si_has_separate_norms(iw->sis->segs[0])))))) {
|
5321
5995
|
min_segment = iw->sis->size - iw->config.merge_factor;
|
5322
5996
|
iw_merge_segments_from(iw, min_segment < 0 ? 0 : min_segment);
|
@@ -5344,7 +6018,9 @@ void iw_close(IndexWriter *iw)
|
|
5344
6018
|
|
5345
6019
|
iw->write_lock->release(iw->write_lock);
|
5346
6020
|
close_lock(iw->write_lock);
|
6021
|
+
iw->write_lock = NULL;
|
5347
6022
|
store_deref(iw->store);
|
6023
|
+
deleter_destroy(iw->deleter);
|
5348
6024
|
|
5349
6025
|
mutex_destroy(&iw->mutex);
|
5350
6026
|
free(iw);
|
@@ -5368,16 +6044,17 @@ IndexWriter *iw_open(Store *store, volatile Analyzer *analyzer,
|
|
5368
6044
|
"Couldn't obtain write lock when opening IndexWriter");
|
5369
6045
|
}
|
5370
6046
|
|
5371
|
-
|
5372
6047
|
iw->sis = sis_read(store);
|
5373
|
-
iw->fis =
|
6048
|
+
iw->fis = iw->sis->fis;
|
6049
|
+
REF(iw->fis);
|
5374
6050
|
XCATCHALL
|
5375
6051
|
if (iw->write_lock) {
|
5376
6052
|
iw->write_lock->release(iw->write_lock);
|
5377
6053
|
close_lock(iw->write_lock);
|
6054
|
+
iw->write_lock = NULL;
|
5378
6055
|
}
|
5379
6056
|
if (iw->sis) sis_destroy(iw->sis);
|
5380
|
-
if (
|
6057
|
+
if (analyzer) a_deref((Analyzer *)analyzer);
|
5381
6058
|
free(iw);
|
5382
6059
|
XENDTRY
|
5383
6060
|
|
@@ -5385,6 +6062,9 @@ IndexWriter *iw_open(Store *store, volatile Analyzer *analyzer,
|
|
5385
6062
|
iw->analyzer = analyzer ? (Analyzer *)analyzer
|
5386
6063
|
: mb_standard_analyzer_new(true);
|
5387
6064
|
|
6065
|
+
iw->deleter = deleter_new(iw->sis, store);
|
6066
|
+
deleter_delete_deletable_files(iw->deleter);
|
6067
|
+
|
5388
6068
|
REF(store);
|
5389
6069
|
return iw;
|
5390
6070
|
}
|
@@ -5400,18 +6080,19 @@ static void iw_cp_fields(IndexWriter *iw, SegmentReader *sr,
|
|
5400
6080
|
InStream *fdt_in, *fdx_in;
|
5401
6081
|
Store *store_in = sr->cfs_store ? sr->cfs_store : sr->ir.store;
|
5402
6082
|
Store *store_out = iw->store;
|
6083
|
+
char *sr_segment = sr->si->name;
|
5403
6084
|
|
5404
6085
|
sprintf(file_name, "%s.fdt", segment);
|
5405
6086
|
fdt_out = store_out->new_output(store_out, file_name);
|
5406
6087
|
sprintf(file_name, "%s.fdx", segment);
|
5407
6088
|
fdx_out = store_out->new_output(store_out, file_name);
|
5408
6089
|
|
5409
|
-
sprintf(file_name, "%s.fdt",
|
6090
|
+
sprintf(file_name, "%s.fdt", sr_segment);
|
5410
6091
|
fdt_in = store_in->open_input(store_in, file_name);
|
5411
|
-
sprintf(file_name, "%s.fdx",
|
6092
|
+
sprintf(file_name, "%s.fdx", sr_segment);
|
5412
6093
|
fdx_in = store_in->open_input(store_in, file_name);
|
5413
6094
|
|
5414
|
-
sprintf(file_name, "%s.del",
|
6095
|
+
sprintf(file_name, "%s.del", sr_segment);
|
5415
6096
|
if (store_in->exists(store_in, file_name)) {
|
5416
6097
|
OutStream *del_out;
|
5417
6098
|
InStream *del_in = store_in->open_input(store_in, file_name);
|
@@ -5487,30 +6168,31 @@ static void iw_cp_terms(IndexWriter *iw, SegmentReader *sr,
|
|
5487
6168
|
InStream *tix_in, *tis_in, *tfx_in, *frq_in, *prx_in;
|
5488
6169
|
Store *store_out = iw->store;
|
5489
6170
|
Store *store_in = sr->cfs_store ? sr->cfs_store : sr->ir.store;
|
6171
|
+
char *sr_segment = sr->si->name;
|
5490
6172
|
|
5491
6173
|
sprintf(file_name, "%s.tix", segment);
|
5492
6174
|
tix_out = store_out->new_output(store_out, file_name);
|
5493
|
-
sprintf(file_name, "%s.tix",
|
6175
|
+
sprintf(file_name, "%s.tix", sr_segment);
|
5494
6176
|
tix_in = store_in->open_input(store_in, file_name);
|
5495
6177
|
|
5496
6178
|
sprintf(file_name, "%s.tis", segment);
|
5497
6179
|
tis_out = store_out->new_output(store_out, file_name);
|
5498
|
-
sprintf(file_name, "%s.tis",
|
6180
|
+
sprintf(file_name, "%s.tis", sr_segment);
|
5499
6181
|
tis_in = store_in->open_input(store_in, file_name);
|
5500
6182
|
|
5501
6183
|
sprintf(file_name, "%s.tfx", segment);
|
5502
6184
|
tfx_out = store_out->new_output(store_out, file_name);
|
5503
|
-
sprintf(file_name, "%s.tfx",
|
6185
|
+
sprintf(file_name, "%s.tfx", sr_segment);
|
5504
6186
|
tfx_in = store_in->open_input(store_in, file_name);
|
5505
6187
|
|
5506
6188
|
sprintf(file_name, "%s.frq", segment);
|
5507
6189
|
frq_out = store_out->new_output(store_out, file_name);
|
5508
|
-
sprintf(file_name, "%s.frq",
|
6190
|
+
sprintf(file_name, "%s.frq", sr_segment);
|
5509
6191
|
frq_in = store_in->open_input(store_in, file_name);
|
5510
6192
|
|
5511
6193
|
sprintf(file_name, "%s.prx", segment);
|
5512
6194
|
prx_out = store_out->new_output(store_out, file_name);
|
5513
|
-
sprintf(file_name, "%s.prx",
|
6195
|
+
sprintf(file_name, "%s.prx", sr_segment);
|
5514
6196
|
prx_in = store_in->open_input(store_in, file_name);
|
5515
6197
|
|
5516
6198
|
if (map) {
|
@@ -5548,47 +6230,38 @@ static void iw_cp_terms(IndexWriter *iw, SegmentReader *sr,
|
|
5548
6230
|
}
|
5549
6231
|
|
5550
6232
|
static void iw_cp_norms(IndexWriter *iw, SegmentReader *sr,
|
5551
|
-
|
6233
|
+
SegmentInfo *si, int *map)
|
5552
6234
|
{
|
5553
6235
|
int i;
|
5554
6236
|
FieldInfos *fis = IR(sr)->fis;
|
5555
6237
|
const int field_cnt = fis->size;
|
5556
6238
|
InStream *norms_in;
|
5557
6239
|
OutStream *norms_out;
|
5558
|
-
Store *store_in = sr->ir.store;
|
5559
|
-
Store *cfs_store_in = sr->cfs_store;
|
5560
6240
|
Store *store_out = iw->store;
|
5561
6241
|
char file_name_in[SEGMENT_NAME_MAX_LENGTH];
|
5562
|
-
char *ext_ptr_in;
|
5563
6242
|
char file_name_out[SEGMENT_NAME_MAX_LENGTH];
|
5564
|
-
char *ext_ptr_out;
|
5565
|
-
sprintf(file_name_in, "%s.", sr->segment);
|
5566
|
-
ext_ptr_in = file_name_in + strlen(file_name_in);
|
5567
|
-
sprintf(file_name_out, "%s.", segment);
|
5568
|
-
ext_ptr_out = file_name_out + strlen(file_name_out);
|
5569
6243
|
|
5570
6244
|
for (i = 0; i < field_cnt; i++) {
|
5571
|
-
if (fi_has_norms(fis->fields[i])
|
5572
|
-
|
5573
|
-
|
5574
|
-
|
5575
|
-
|
5576
|
-
|
5577
|
-
|
5578
|
-
|
5579
|
-
|
5580
|
-
|
5581
|
-
|
5582
|
-
|
5583
|
-
|
5584
|
-
|
5585
|
-
}
|
6245
|
+
if (fi_has_norms(fis->fields[i])
|
6246
|
+
&& si_norm_file_name(sr->si, file_name_in, i)) {
|
6247
|
+
Store *store = (sr->si->use_compound_file
|
6248
|
+
&& sr->si->norm_gens[i] == 0) ? sr->cfs_store
|
6249
|
+
: IR(sr)->store;
|
6250
|
+
int field_num = map ? map[i] : i;
|
6251
|
+
|
6252
|
+
norms_in = store->open_input(store, file_name_in);
|
6253
|
+
si_advance_norm_gen(si, field_num);
|
6254
|
+
si_norm_file_name(si, file_name_out, field_num);
|
6255
|
+
norms_out = store_out->new_output(store_out, file_name_out);
|
6256
|
+
is2os_copy_bytes(norms_in, norms_out, is_length(norms_in));
|
6257
|
+
os_close(norms_out);
|
6258
|
+
is_close(norms_in);
|
5586
6259
|
}
|
5587
6260
|
}
|
5588
6261
|
}
|
5589
6262
|
|
5590
6263
|
static void iw_cp_map_files(IndexWriter *iw, SegmentReader *sr,
|
5591
|
-
|
6264
|
+
SegmentInfo *si)
|
5592
6265
|
{
|
5593
6266
|
int i;
|
5594
6267
|
FieldInfos *from_fis = IR(sr)->fis;
|
@@ -5600,19 +6273,19 @@ static void iw_cp_map_files(IndexWriter *iw, SegmentReader *sr,
|
|
5600
6273
|
field_map[i] = fis_get_field_num(to_fis, from_fis->fields[i]->name);
|
5601
6274
|
}
|
5602
6275
|
|
5603
|
-
iw_cp_fields(iw, sr,
|
5604
|
-
iw_cp_terms(iw, sr,
|
5605
|
-
iw_cp_norms(iw, sr,
|
6276
|
+
iw_cp_fields(iw, sr, si->name, field_map);
|
6277
|
+
iw_cp_terms( iw, sr, si->name, field_map);
|
6278
|
+
iw_cp_norms( iw, sr, si, field_map);
|
5606
6279
|
|
5607
6280
|
free(field_map);
|
5608
6281
|
}
|
5609
6282
|
|
5610
6283
|
static void iw_cp_files(IndexWriter *iw, SegmentReader *sr,
|
5611
|
-
|
6284
|
+
SegmentInfo *si)
|
5612
6285
|
{
|
5613
|
-
iw_cp_fields(iw, sr,
|
5614
|
-
iw_cp_terms(iw, sr,
|
5615
|
-
iw_cp_norms(iw, sr,
|
6286
|
+
iw_cp_fields(iw, sr, si->name, NULL);
|
6287
|
+
iw_cp_terms( iw, sr, si->name, NULL);
|
6288
|
+
iw_cp_norms( iw, sr, si, NULL);
|
5616
6289
|
}
|
5617
6290
|
|
5618
6291
|
static void iw_add_segment(IndexWriter *iw, SegmentReader *sr)
|
@@ -5641,10 +6314,10 @@ static void iw_add_segment(IndexWriter *iw, SegmentReader *sr)
|
|
5641
6314
|
}
|
5642
6315
|
|
5643
6316
|
if (must_map_fields) {
|
5644
|
-
iw_cp_map_files(iw, sr, si
|
6317
|
+
iw_cp_map_files(iw, sr, si);
|
5645
6318
|
}
|
5646
6319
|
else {
|
5647
|
-
iw_cp_files(iw, sr, si
|
6320
|
+
iw_cp_files(iw, sr, si);
|
5648
6321
|
}
|
5649
6322
|
}
|
5650
6323
|
|
@@ -5666,8 +6339,6 @@ static void iw_add_segments(IndexWriter *iw, IndexReader *ir)
|
|
5666
6339
|
void iw_add_readers(IndexWriter *iw, IndexReader **readers, const int r_cnt)
|
5667
6340
|
{
|
5668
6341
|
int i;
|
5669
|
-
Lock *commit_lock;
|
5670
|
-
|
5671
6342
|
mutex_lock(&iw->mutex);
|
5672
6343
|
iw_optimize_i(iw);
|
5673
6344
|
|
@@ -5676,16 +6347,9 @@ void iw_add_readers(IndexWriter *iw, IndexReader **readers, const int r_cnt)
|
|
5676
6347
|
}
|
5677
6348
|
|
5678
6349
|
mutex_lock(&iw->store->mutex);
|
5679
|
-
commit_lock = open_lock(iw->store, COMMIT_LOCK_NAME);
|
5680
6350
|
|
5681
|
-
if (!commit_lock->obtain(commit_lock)) {
|
5682
|
-
RAISE(LOCK_ERROR, "Couldn't obtain commit lock to write segments file");
|
5683
|
-
}
|
5684
6351
|
/* commit the segments file and the fields file */
|
5685
|
-
|
5686
|
-
sis_write(iw->sis, iw->store);
|
5687
|
-
commit_lock->release(commit_lock);
|
5688
|
-
close_lock(commit_lock);
|
6352
|
+
sis_write(iw->sis, iw->store, iw->deleter);
|
5689
6353
|
mutex_unlock(&iw->store->mutex);
|
5690
6354
|
|
5691
6355
|
iw_optimize_i(iw);
|