ferret 0.10.14 → 0.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/hash.c CHANGED
@@ -18,13 +18,6 @@ static char *dummy_key = "";
18
18
  static HashTable *free_hts[MAX_FREE_HASH_TABLES];
19
19
  static int num_free_hts = 0;
20
20
 
21
- unsigned long *imalloc(unsigned long value)
22
- {
23
- unsigned long *p = ALLOC(unsigned long);
24
- *p = value;
25
- return p;
26
- }
27
-
28
21
  unsigned long str_hash(const char *const str)
29
22
  {
30
23
  register unsigned long h = 0;
data/ext/hash.h CHANGED
@@ -81,14 +81,6 @@ typedef unsigned long (*hash_ft)(const void *key);
81
81
  */
82
82
  typedef int (*eq_ft)(const void *key1, const void *key2);
83
83
 
84
-
85
- /**
86
- * Create a pointer to an allocated U32 integer. This function is a utility
87
- * function used to add integers to a HashTable, either as the key or the
88
- * value.
89
- */
90
- extern unsigned long *imalloc(unsigned long value);
91
-
92
84
  /**
93
85
  * Determine a hash value for a string. The string must be null terminated
94
86
  *
data/ext/index.c CHANGED
@@ -5,6 +5,7 @@
5
5
  #include "priorityqueue.h"
6
6
  #include <string.h>
7
7
  #include <limits.h>
8
+ #include <ctype.h>
8
9
 
9
10
  #define GET_LOCK(lock, name, store, err_msg) do {\
10
11
  lock = store->open_lock(store, name);\
@@ -18,14 +19,6 @@
18
19
  store->close_lock(lock);\
19
20
  } while (0)
20
21
 
21
- const char *INDEX_EXTENSIONS[] = {
22
- "fdx", "fdt", "tfx", "tix", "tis", "frq", "prx", "del"
23
- };
24
-
25
- const char *COMPOUND_EXTENSIONS[] = {
26
- "frq", "prx", "fdx", "fdt", "tfx", "tix", "tis"
27
- };
28
-
29
22
  const Config default_config = {
30
23
  0x100000, /* chunk size is 1Mb */
31
24
  0x1000000, /* Max memory used for buffer is 16 Mb */
@@ -41,6 +34,128 @@ const Config default_config = {
41
34
  static void ste_reset(TermEnum *te);
42
35
  static char *ste_next(TermEnum *te);
43
36
 
37
+ #define FORMAT 0
38
+ #define SEGMENTS_GEN_FILE_NAME "segments.gen"
39
+ #define MAX_EXT_LEN 10
40
+
41
+ /* *** Must be three characters *** */
42
+ const char *INDEX_EXTENSIONS[] = {
43
+ "frq", "prx", "fdx", "fdt", "tfx", "tix", "tis", "del", "gen", "cfs"
44
+ };
45
+
46
+ /* *** Must be three characters *** */
47
+ const char *COMPOUND_EXTENSIONS[] = {
48
+ "frq", "prx", "fdx", "fdt", "tfx", "tix", "tis"
49
+ };
50
+
51
+
52
+ static const char BASE36_DIGITMAP[] = "0123456789abcdefghijklmnopqrstuvwxyz";
53
+
54
+ static char *u64_to_str36(char *buf, int buf_size, f_u64 u)
55
+ {
56
+ int i = buf_size--;
57
+ buf[i] = '\0';
58
+ for (i--; i >= 0; i--) {
59
+ buf[i] = BASE36_DIGITMAP[u % 36];
60
+ u /= 36;
61
+ if (0 == u) {
62
+ break;
63
+ }
64
+ }
65
+ if (0 < u) {
66
+ RAISE(EXCEPTION, "Max length of segment filename has been reached. "
67
+ "Perhaps it's time to re-index.\n");
68
+ }
69
+ return buf + i;
70
+ }
71
+
72
+ static f_u64 str36_to_u64(char *p)
73
+ {
74
+ f_u64 u = 0;
75
+ while (true) {
76
+ if ('0' <= *p && '9' >= *p) {
77
+ u = u * 36 + *p - '0';
78
+ }
79
+ else if ('a' <= *p && 'z' >= *p) {
80
+ u = u * 36 + *p - 'a' + 10;
81
+ }
82
+ else {
83
+ break;
84
+ }
85
+ p++;
86
+ }
87
+ return u;
88
+ }
89
+
90
+ /*
91
+ * Computes the full file name from base, extension and generation. If the
92
+ * generation is -1, the file name is NULL. If it's 0, the file name is
93
+ * <base><extension>. If it's > 0, the file name is
94
+ * <base>_<generation><extension>.
95
+ *
96
+ * @param buf buffer to write filename to
97
+ * @param base main part of the file name
98
+ * @param ext extension of the filename (including .)
99
+ * @param gen generation
100
+ */
101
+ char *fn_for_generation(char *buf, char *base, char *ext, f_i64 gen)
102
+ {
103
+ if (-1 == gen) {
104
+ return NULL;
105
+ }
106
+ else {
107
+ char b[SEGMENT_NAME_MAX_LENGTH];
108
+ char *u = u64_to_str36(b, SEGMENT_NAME_MAX_LENGTH, (f_u64)gen);
109
+ if (ext == NULL) {
110
+ sprintf(buf, "%s_%s", base, u);
111
+ }
112
+ else {
113
+ sprintf(buf, "%s_%s.%s", base, u, ext);
114
+ }
115
+ return buf;
116
+ }
117
+ }
118
+
119
+ char *segfn_for_generation(char *buf, int generation)
120
+ {
121
+ char b[SEGMENT_NAME_MAX_LENGTH];
122
+ char *u = u64_to_str36(b, SEGMENT_NAME_MAX_LENGTH, (f_u64)generation);
123
+ sprintf(buf, SEGMENTS_FILE_NAME"_%s", u);
124
+ return buf;
125
+ }
126
+
127
+ /*
128
+ * Computes the field specific file name from base, extension, generation and
129
+ * field number. If the generation is -1, the file name is NULL. If it's 0,
130
+ * the file name is <base><extension>. If it's > 0, the file name is
131
+ * <base>_<generation><extension>.
132
+ *
133
+ * @param buf buffer to write filename to
134
+ * @param base main part of the file name
135
+ * @param ext extension of the filename (including .)
136
+ * @param gen generation
137
+ * @param field_num field number
138
+ */
139
+ static char *fn_for_gen_field(char *buf,
140
+ char *base,
141
+ char *ext,
142
+ f_i64 gen,
143
+ int field_num)
144
+ {
145
+ if (-1 == gen) {
146
+ return NULL;
147
+ }
148
+ else {
149
+ char b[SEGMENT_NAME_MAX_LENGTH];
150
+ sprintf(buf, "%s_%s.%s%d",
151
+ base,
152
+ u64_to_str36(b, SEGMENT_NAME_MAX_LENGTH, (f_u64)gen),
153
+ ext,
154
+ field_num);
155
+ return buf;
156
+ }
157
+ }
158
+
44
159
  /***************************************************************************
45
160
  *
46
161
  * CacheObject
@@ -175,7 +290,7 @@ FieldInfo *fi_new(const char *name,
175
290
 
176
291
  void fi_deref(FieldInfo *fi)
177
292
  {
178
- if (--(fi->ref_cnt) == 0) {
293
+ if (0 == --(fi->ref_cnt)) {
179
294
  free(fi->name);
180
295
  free(fi);
181
296
  }
@@ -208,9 +323,6 @@ char *fi_to_s(FieldInfo *fi)
208
323
  *
209
324
  ****************************************************************************/
210
325
 
211
- #define FIELDS_FILENAME "fields"
212
- #define TEMPORARY_FIELDS_FILENAME "fields.new"
213
-
214
326
  FieldInfos *fis_new(int store, int index, int term_vector)
215
327
  {
216
328
  FieldInfos *fis = ALLOC(FieldInfos);
@@ -278,14 +390,13 @@ FieldInfo *fis_by_number(FieldInfos *fis, int num)
278
390
  }
279
391
  }
280
392
 
281
- FieldInfos *fis_read(Store *store)
393
+ FieldInfos *fis_read(InStream *is)
282
394
  {
283
395
  int store_val, index_val, term_vector_val;
284
396
  int i;
285
397
  union { f_u32 i; float f; } tmp;
286
398
  FieldInfo *fi;
287
399
  FieldInfos *fis;
288
- InStream *is = store->open_input(store, FIELDS_FILENAME);
289
400
 
290
401
  store_val = is_read_vint(is);
291
402
  index_val = is_read_vint(is);
@@ -300,17 +411,15 @@ FieldInfos *fis_read(Store *store)
300
411
  fis_add_field(fis, fi);
301
412
  fi->ref_cnt = 1;
302
413
  }
303
- is_close(is);
304
414
 
305
415
  return fis;
306
416
  }
307
417
 
308
- void fis_write(FieldInfos *fis, Store *store)
418
+ void fis_write(FieldInfos *fis, OutStream *os)
309
419
  {
310
420
  int i;
311
421
  union { f_u32 i; float f; } tmp;
312
422
  FieldInfo *fi;
313
- OutStream *os = store->new_output(store, TEMPORARY_FIELDS_FILENAME);
314
423
  const int fis_size = fis->size;
315
424
 
316
425
  os_write_vint(os, fis->store);
@@ -324,9 +433,6 @@ void fis_write(FieldInfos *fis, Store *store)
324
433
  os_write_u32(os, tmp.i);
325
434
  os_write_vint(os, fi->bits);
326
435
  }
327
- os_close(os);
328
-
329
- store->rename(store, TEMPORARY_FIELDS_FILENAME, FIELDS_FILENAME);
330
436
  }
331
437
 
332
438
  static const char *store_str[] = {
@@ -408,7 +514,7 @@ char *fis_to_s(FieldInfos *fis)
408
514
 
409
515
  void fis_deref(FieldInfos *fis)
410
516
  {
411
- if (--(fis->ref_cnt) == 0) {
517
+ if (0 == --(fis->ref_cnt)) {
412
518
  h_destroy(fis->field_dict);
413
519
  free(fis->fields);
414
520
  free(fis);
@@ -440,54 +546,144 @@ SegmentInfo *si_new(char *name, int doc_cnt, Store *store)
440
546
  si->name = name;
441
547
  si->doc_cnt = doc_cnt;
442
548
  si->store = store;
549
+ si->del_gen = -1;
550
+ si->norm_gens = NULL;
551
+ si->norm_gens_size = 0;
552
+ si->ref_cnt = 1;
553
+ si->use_compound_file = false;
443
554
  return si;
444
555
  }
445
556
 
446
- void si_destroy(SegmentInfo *si)
557
+ SegmentInfo *si_read(Store *store, InStream *is)
447
558
  {
448
- free(si->name);
449
- free(si);
559
+ SegmentInfo *si = ALLOC_AND_ZERO(SegmentInfo);
560
+ si->store = store;
561
+ si->name = is_read_string(is);
562
+ si->doc_cnt = is_read_vint(is);
563
+ si->del_gen = is_read_vint(is);
564
+ si->norm_gens_size = is_read_vint(is);
565
+ si->ref_cnt = 1;
566
+ if (0 < si->norm_gens_size) {
567
+ int i;
568
+ si->norm_gens = ALLOC_N(int, si->norm_gens_size);
569
+ for (i = si->norm_gens_size - 1; i >= 0; i--) {
570
+ si->norm_gens[i] = is_read_vint(is);
571
+ }
572
+ }
573
+ si->use_compound_file = (bool)is_read_byte(is);
574
+ return si;
450
575
  }
451
576
 
452
- bool si_has_deletions(SegmentInfo *si)
577
+ void si_write(SegmentInfo *si, OutStream *os)
453
578
  {
454
- char del_file_name[SEGMENT_NAME_MAX_LENGTH];
455
- sprintf(del_file_name, "%s.del", si->name);
456
- return si->store->exists(si->store, del_file_name);
579
+ os_write_string(os, si->name);
580
+ os_write_vint(os, si->doc_cnt);
581
+ os_write_vint(os, si->del_gen);
582
+ os_write_vint(os, si->norm_gens_size);
583
+ if (0 < si->norm_gens_size) {
584
+ int i;
585
+ for (i = si->norm_gens_size - 1; i >= 0; i--) {
586
+ os_write_vint(os, si->norm_gens[i]);
587
+ }
588
+ }
589
+ os_write_byte(os, (uchar)si->use_compound_file);
457
590
  }
458
591
 
459
- bool si_uses_compound_file(SegmentInfo *si)
592
+ void si_deref(SegmentInfo *si)
460
593
  {
461
- char compound_file_name[SEGMENT_NAME_MAX_LENGTH];
462
- sprintf(compound_file_name, "%s.cfs", si->name);
463
- return si->store->exists(si->store, compound_file_name);
594
+ if (--si->ref_cnt <= 0) {
595
+ free(si->name);
596
+ free(si->norm_gens);
597
+ free(si);
598
+ }
464
599
  }
465
600
 
466
- struct NormTester {
467
- bool has_norm_file;
468
- int norm_file_pattern_len;
469
- char norm_file_pattern[SEGMENT_NAME_MAX_LENGTH];
470
- };
601
+ bool si_has_deletions(SegmentInfo *si)
602
+ {
603
+ return si->del_gen >= 0;
604
+ }
471
605
 
472
- static void is_norm_file(char *file_name, struct NormTester *nt)
606
+ char *si_del_file_name(SegmentInfo *si, char *buf)
473
607
  {
474
- if (strncmp(file_name, nt->norm_file_pattern,
475
- nt->norm_file_pattern_len) == 0) {
476
- nt->has_norm_file = true;
608
+ if (si->del_gen < 0) {
609
+ return NULL;
610
+ }
611
+ else {
612
+ return fn_for_generation(buf, si->name, ".del", si->del_gen);
477
613
  }
478
614
  }
479
615
 
480
616
  bool si_has_separate_norms(SegmentInfo *si)
481
617
  {
482
- struct NormTester nt;
483
- sprintf(nt.norm_file_pattern, "%s.s", si->name);
484
- nt.norm_file_pattern_len = strlen(nt.norm_file_pattern);
485
- nt.has_norm_file = false;
486
- si->store->each(si->store, (void (*)(char *file_name, void *arg))&is_norm_file, &nt);
618
+ if (si->use_compound_file && si->norm_gens) {
619
+ int i;
620
+ for (i = si->norm_gens_size - 1; i >= 0; i--) {
621
+ if (si->norm_gens[i] > 0) return true;
622
+ }
623
+ }
624
+ return false;
625
+ }
487
626
 
488
- return nt.has_norm_file;
627
+ void si_advance_norm_gen(SegmentInfo *si, int field_num)
628
+ {
629
+ if (field_num >= si->norm_gens_size) {
630
+ int i;
631
+ REALLOC_N(si->norm_gens, int, field_num + 1);
632
+ for (i = si->norm_gens_size; i <= field_num; i++) {
633
+ si->norm_gens[i] = -1;
634
+ }
635
+ si->norm_gens_size = field_num + 1;
636
+ }
637
+ si->norm_gens[field_num]++;
489
638
  }
490
639
 
640
+ char *si_norm_file_name(SegmentInfo *si, char *buf, int field_num)
641
+ {
642
+ int norm_gen;
643
+ if (field_num >= si->norm_gens_size
644
+ || 0 > (norm_gen = si->norm_gens[field_num])) {
645
+ return NULL;
646
+ }
647
+ else {
648
+ char *ext = (si->use_compound_file && norm_gen > 0) ? "s" : "f";
649
+ return fn_for_gen_field(buf, si->name, ext, norm_gen, field_num);
650
+ }
651
+ }
652
+
653
+ void deleter_queue_file(Deleter *dlr, char *file_name);
654
+ #define DEL(file_name) deleter_queue_file(dlr, file_name)
655
+
656
+ static void si_delete_files(SegmentInfo *si, FieldInfos *fis, Deleter *dlr)
657
+ {
658
+ int i;
659
+ char file_name[SEGMENT_NAME_MAX_LENGTH];
660
+ size_t seg_len = strlen(si->name);
661
+ char *ext;
662
+
663
+ for (i = si->norm_gens_size - 1; i >= 0; i--) {
664
+ if (0 <= si->norm_gens[i]) {
665
+ DEL(si_norm_file_name(si, file_name, fis->fields[i]->number));
666
+ }
667
+ }
668
+
669
+ memcpy(file_name, si->name, seg_len);
670
+ file_name[seg_len] = '.';
671
+ ext = file_name + seg_len + 1;
672
+
673
+ if (si->use_compound_file) {
674
+ memcpy(ext, "cfs", 4);
675
+ DEL(file_name);
676
+ if (0 <= si->del_gen) {
677
+ DEL(fn_for_generation(file_name, si->name, "del", si->del_gen));
678
+ }
679
+ }
680
+ else {
681
+ for (i = NELEMS(INDEX_EXTENSIONS) - 1; i >= 0; i--) {
682
+ memcpy(ext, INDEX_EXTENSIONS[i], 4);
683
+ DEL(file_name);
684
+ }
685
+ }
686
+ }
491
687
 
492
688
  /****************************************************************************
493
689
  *
@@ -496,42 +692,266 @@ bool si_has_separate_norms(SegmentInfo *si)
496
692
  ****************************************************************************/
497
693
 
498
694
  #include <time.h>
499
- #define FORMAT 0
500
- #define SEGMENTS_FILENAME "segments"
501
- #define TEMPORARY_SEGMENTS_FILENAME "segments.new"
502
- #define MAX_EXT_LEN 10
695
+ static char *new_segment(f_i64 generation)
696
+ {
697
+ char buf[SEGMENT_NAME_MAX_LENGTH];
698
+ char *fn_p = u64_to_str36(buf, SEGMENT_NAME_MAX_LENGTH - 1,
699
+ (f_u64)generation);
700
+ *(--fn_p) = '_';
701
+ return estrdup(fn_p);
702
+ }
703
+
704
+ /****************************************************************************
705
+ * FindSegmentsFile
706
+ ****************************************************************************/
503
707
 
504
- static const char base36_digitmap[] = "0123456789abcdefghijklmnopqrstuvwxyz";
708
+ typedef struct FindSegmentsFile {
709
+ f_i64 generation;
710
+ f_u64 u64_return;
711
+ void *p_return;
712
+ } FindSegmentsFile;
505
713
 
506
- static char *new_segment(f_u64 counter)
714
+ static void which_gen_i(char *file_name, void *arg)
715
+ {
716
+ f_i64 *max_generation = (f_i64 *)arg;
717
+ if (0 == strncmp(SEGMENTS_FILE_NAME"_", file_name,
718
+ sizeof(SEGMENTS_FILE_NAME))) {
719
+ char *p = strrchr(file_name, '_') + 1;
720
+ f_i64 generation = (f_i64)str36_to_u64(p);
721
+ if (generation > *max_generation) *max_generation = generation;
722
+ }
723
+ }
724
+
725
+ static void si_put(SegmentInfo *si, FILE *stream)
507
726
  {
508
- char file_name[SEGMENT_NAME_MAX_LENGTH];
509
727
  int i;
728
+ fprintf(stream, "\tSegmentInfo {\n");
729
+ fprintf(stream, "\t\tname = %s\n", si->name);
730
+ fprintf(stream, "\t\tdoc_cnt = %d\n", si->doc_cnt);
731
+ fprintf(stream, "\t\tdel_gen = %d\n", si->del_gen);
732
+ fprintf(stream, "\t\tnorm_gens_size = %d\n", si->norm_gens_size);
733
+ fprintf(stream, "\t\tnorm_gens {\n");
734
+ for (i = 0; i < si->norm_gens_size; i++) {
735
+ fprintf(stream, "\t\t\t%d\n", si->norm_gens[i]);
736
+ }
737
+ fprintf(stream, "\t\t}\n");
738
+ fprintf(stream, "\t\tref_cnt = %d\n", si->ref_cnt);
739
+ fprintf(stream, "\t}\n");
740
+ }
510
741
 
511
- file_name[SEGMENT_NAME_MAX_LENGTH - 1] = '\0';
512
- for (i = SEGMENT_NAME_MAX_LENGTH - 2; i > MAX_EXT_LEN; i--) {
513
- file_name[i] = base36_digitmap[counter%36];
514
- counter /= 36;
515
- if (counter == 0) {
516
- break;
517
- }
742
+ void sis_put(SegmentInfos *sis, FILE *stream)
743
+ {
744
+ int i;
745
+ fprintf(stream, "SegmentInfos {\n");
746
+ fprintf(stream, "\tcounter = %"POSH_I64_PRINTF_PREFIX"d\n", sis->counter);
747
+ fprintf(stream, "\tversion = %"POSH_I64_PRINTF_PREFIX"d\n", sis->version);
748
+ fprintf(stream, "\tgeneration = %"POSH_I64_PRINTF_PREFIX"d\n", sis->generation);
749
+ fprintf(stream, "\tformat = %d\n", sis->format);
750
+ fprintf(stream, "\tsize = %d\n", sis->size);
751
+ fprintf(stream, "\tcapa = %d\n", sis->capa);
752
+ for (i = 0; i < sis->size; i++) {
753
+ si_put(sis->segs[i], stream);
518
754
  }
519
- if (i == MAX_EXT_LEN) {
520
- RAISE(EXCEPTION, "Max length of segment filename has been reached. "
521
- "Time to re-index.\n");
755
+ fprintf(stream, "}\n");
756
+ }
757
+
758
+ /*
759
+ * Get the generation (N) of the current segments_N file from a list of files.
760
+ *
761
+ * @param store - the Store to look in
762
+ */
763
+ f_i64 sis_current_segment_generation(Store *store)
764
+ {
765
+ f_i64 current_generation = -1;
766
+ store->each(store, &which_gen_i, &current_generation);
767
+ return current_generation;
768
+ }
769
+
770
+ /*
771
+ * Get the current generation filename.
772
+ *
773
+ * @param buf - buffer to write filename to
774
+ * @param store - the Store to look in
775
+ * @return segments_N where N is the current generation
776
+ */
777
+ char *sis_curr_seg_file_name(char *buf, Store *store)
778
+ {
779
+ return segfn_for_generation(buf, sis_current_segment_generation(store));
780
+ }
781
+
782
+ /*
783
+ * Get the next generation filename.
784
+ *
785
+ * @param buf - buffer to write filename to
786
+ * @param store - the Store to look in
787
+ * @return segments_N where N is the +next+ generation
788
+ */
789
+ char *sis_next_seg_file_name(char *buf, Store *store)
790
+ {
791
+ return segfn_for_generation(buf, sis_current_segment_generation(store) + 1);
792
+ }
793
+
794
+ #define GEN_FILE_RETRY_COUNT 10
795
+ #define GEN_LOOK_AHEAD_COUNT 10
796
+ void sis_find_segments_file(Store *store, FindSegmentsFile *fsf,
797
+ void (*run)(Store *store, FindSegmentsFile *fsf))
798
+ {
799
+ int i;
800
+ int gen_look_ahead_count = 0;
801
+ bool retry = false;
802
+ int method = 0;
803
+ f_i64 last_gen = -1;
804
+ f_i64 gen = 0;
805
+
806
+ /* Loop until we succeed in calling doBody() without hitting an
807
+ * IOException. An IOException most likely means a commit was in process
808
+ * and has finished, in the time it took us to load the now-old infos
809
+ * files (and segments files). It's also possible it's a true error
810
+ * (corrupt index). To distinguish these, on each retry we must see
811
+ * "forward progress" on which generation we are trying to load. If we
812
+ * don't, then the original error is real and we throw it.
813
+ *
814
+ * We have three methods for determining the current generation. We try
815
+ * each in sequence. */
816
+ while (true) {
817
+ /* Method 1: list the directory and use the highest segments_N file.
818
+ * This method works well as long as there is no stale caching on the
819
+ * directory contents: */
820
+ if (0 == method) {
821
+ gen = sis_current_segment_generation(store);
822
+ if (gen == -1) {
823
+ /*fprintf(stderr, ">>\n%s\n>>\n", store_to_s(store));*/
824
+ RAISE(FILE_NOT_FOUND_ERROR, "couldn't find segments file");
825
+ }
826
+ }
827
+
828
+ /* Method 2 (fallback if Method 1 isn't reliable): if the directory
829
+ * listing seems to be stale, try loading the "segments.gen" file. */
830
+ if (1 == method || (0 == method && last_gen == gen && retry)) {
831
+ method = 1;
832
+ for (i = 0; i < GEN_FILE_RETRY_COUNT; i++) {
833
+ InStream *gen_is = NULL;
834
+ TRY
835
+ gen_is = store->open_input(store, SEGMENTS_GEN_FILE_NAME);
836
+ XCATCHALL
837
+ HANDLED();
838
+ /* TODO:LOG "segments.gen open: IO_ERROR"*/
839
+ XENDTRY
840
+
841
+ if (NULL != gen_is) {
842
+ f_i64 gen0 = -1, gen1 = -1;
843
+
844
+ TRY
845
+ gen0 = is_read_u64(gen_is);
846
+ gen1 = is_read_u64(gen_is);
847
+ XFINALLY
848
+ /* if there is an error well simply try again */
849
+ HANDLED();
850
+ is_close(gen_is);
851
+ XENDTRY
852
+ /* TODO:LOG "fallback check: " + gen0 + "; " + gen1 */
853
+ if (gen0 == gen1) {
854
+ /* The file is consistent. */
855
+ if (gen0 > gen) {
856
+ /* TODO:LOG "fallback to '" +
857
+ * IndexFileNames.SEGMENTS_GEN + "' check: now
858
+ * try generation " + gen0 + " > " + gen */
859
+ gen = gen0;
860
+ }
861
+ goto method_two_loop_end;
862
+ }
863
+ break;
864
+ }
865
+ /* sleep for 50 milliseconds */
866
+ micro_sleep(50000);
867
+ }
868
+ }
869
+ method_two_loop_end:
870
+
871
+ /* Method 3 (fallback if Methods 2 & 3 are not reliable): since both
872
+ * directory cache and file contents cache seem to be stale, just
873
+ * advance the generation. */
874
+ if (2 == method || (1 == method && last_gen == gen && retry)) {
875
+ method = 2;
876
+ if (gen_look_ahead_count < GEN_LOOK_AHEAD_COUNT) {
877
+ gen++;
878
+ gen_look_ahead_count++;
879
+ /* TODO:LOG "look ahead increment gen to " + gen */
880
+ }
881
+ }
882
+
883
+ if (last_gen == gen) {
884
+ /* This means we're about to try the same segments_N last tried.
885
+ * This is allowed, exactly once, because writer could have been
886
+ * in the process of writing segments_N last time. */
887
+ if (retry) {
888
+ /* OK, we've tried the same segments_N file twice in a row, so
889
+ * this must be a real error. We throw the original exception
890
+ * we got. */
891
+ RAISE(IO_ERROR, "Error reading the segment infos");
892
+ }
893
+ else {
894
+ retry = true;
895
+ }
896
+ }
897
+ else {
898
+ /* Segment file has advanced since our last loop, so reset retry: */
899
+ retry = false;
900
+ }
901
+ last_gen = gen;
902
+
903
+ TRY
904
+ fsf->generation = gen;
905
+ run(store, fsf);
906
+ RETURN_EARLY();
907
+ return;
908
+ case IO_ERROR: case FILE_NOT_FOUND_ERROR:
909
+ HANDLED();
910
+ /* Save the original root cause: */
911
+ /* TODO:LOG "primary Exception on '" + segmentFileName + "': " +
912
+ * err + "'; will retry: retry=" + retry + "; gen = " + gen */
913
+
914
+ if (!retry && gen > 1) {
915
+ /* This is our first time trying this segments file (because
916
+ * retry is false), and, there is possibly a segments_(N-1)
917
+ * (because gen > 1). So, check if the segments_(N-1) exists
918
+ * and try it if so: */
919
+ char prev_seg_file_name[SEGMENT_NAME_MAX_LENGTH];
920
+ segfn_for_generation(prev_seg_file_name, gen - 1);
921
+ if (store->exists(store, prev_seg_file_name)) {
922
+ /* TODO:LOG "fallback to prior segment file '" +
923
+ * prevSegmentFileName + "'" */
924
+ TRY
925
+ fsf->generation = gen - 1;
926
+ run(store, fsf);
927
+ /* TODO:LOG "success on fallback " +
928
+ * prev_seg_file_name */
929
+
930
+ /* pop two contexts as we are in nested try blocks */
931
+ RETURN_EARLY();
932
+ RETURN_EARLY();
933
+ return;
934
+ case IO_ERROR: case FILE_NOT_FOUND_ERROR:
935
+ HANDLED();
936
+ /* TODO:LOG "secondary Exception on '" +
937
+ * prev_seg_file_name + "': " + err2 + "'; will retry"*/
938
+ XENDTRY
939
+ }
940
+ }
941
+ XENDTRY
522
942
  }
523
- i--;
524
- file_name[i] = '_';
525
- return estrdup(&file_name[i]);
526
943
  }
527
944
 
528
- SegmentInfos *sis_new()
945
+ SegmentInfos *sis_new(FieldInfos *fis)
529
946
  {
530
- SegmentInfos *sis = ALLOC(SegmentInfos);
947
+ SegmentInfos *sis = ALLOC_AND_ZERO(SegmentInfos);
948
+ REF(fis);
949
+ sis->fis = fis;
531
950
  sis->format = FORMAT;
532
951
  sis->version = (f_u64)time(NULL);
533
952
  sis->size = 0;
534
953
  sis->counter = 0;
954
+ sis->generation = -1;
535
955
  sis->capa = 4;
536
956
  sis->segs = ALLOC_N(SegmentInfo *, sis->capa);
537
957
  return sis;
@@ -539,8 +959,7 @@ SegmentInfos *sis_new()
539
959
 
540
960
  SegmentInfo *sis_new_segment(SegmentInfos *sis, int doc_cnt, Store *store)
541
961
  {
542
- return sis_add_si(sis, si_new(new_segment(sis->counter++), doc_cnt,
543
- store));
962
+ return sis_add_si(sis, si_new(new_segment(sis->counter++), doc_cnt, store));
544
963
  }
545
964
 
546
965
  void sis_destroy(SegmentInfos *sis)
@@ -548,8 +967,9 @@ void sis_destroy(SegmentInfos *sis)
548
967
  int i;
549
968
  const int sis_size = sis->size;
550
969
  for (i = 0; i < sis_size; i++) {
551
- si_destroy(sis->segs[i]);
970
+ si_deref(sis->segs[i]);
552
971
  }
972
+ if (sis->fis) fis_deref(sis->fis);
553
973
  free(sis->segs);
554
974
  free(sis);
555
975
  }
@@ -557,11 +977,10 @@ void sis_destroy(SegmentInfos *sis)
557
977
  SegmentInfo *sis_add_si(SegmentInfos *sis, SegmentInfo *si)
558
978
  {
559
979
  if (sis->size >= sis->capa) {
560
- sis->capa = sis->size * 2;
980
+ sis->capa <<= 1;
561
981
  REALLOC_N(sis->segs, SegmentInfo *, sis->capa);
562
982
  }
563
- sis->segs[sis->size] = si;
564
- sis->size++;
983
+ sis->segs[sis->size++] = si;
565
984
  return si;
566
985
  }
567
986
 
@@ -569,7 +988,7 @@ void sis_del_at(SegmentInfos *sis, int at)
569
988
  {
570
989
  int i;
571
990
  const int sis_size = --(sis->size);
572
- si_destroy(sis->segs[at]);
991
+ si_deref(sis->segs[at]);
573
992
  for (i = at; i < sis_size; i++) {
574
993
  sis->segs[i] = sis->segs[i+1];
575
994
  }
@@ -580,7 +999,7 @@ void sis_del_from_to(SegmentInfos *sis, int from, int to)
580
999
  int i, num_to_del = to - from;
581
1000
  const int sis_size = sis->size -= num_to_del;
582
1001
  for (i = from; i < to; i++) {
583
- si_destroy(sis->segs[i]);
1002
+ si_deref(sis->segs[i]);
584
1003
  }
585
1004
  for (i = from; i < sis_size; i++) {
586
1005
  sis->segs[i] = sis->segs[i+num_to_del];
@@ -592,74 +1011,106 @@ void sis_clear(SegmentInfos *sis)
592
1011
  int i;
593
1012
  const int sis_size = sis->size;
594
1013
  for (i = 0; i < sis_size; i++) {
595
- si_destroy(sis->segs[i]);
1014
+ si_deref(sis->segs[i]);
596
1015
  }
597
1016
  sis->size = 0;
598
1017
  }
599
1018
 
600
- SegmentInfos *sis_read(Store *store)
1019
+ void sis_read_i(Store *store, FindSegmentsFile *fsf)
601
1020
  {
602
- int doc_cnt;
603
1021
  int seg_cnt;
604
1022
  int i;
605
- char *name;
606
- InStream *is = store->open_input(store, SEGMENTS_FILENAME);
607
- SegmentInfos *sis = ALLOC(SegmentInfos);
608
- sis->store = store;
1023
+ bool success = false;
1024
+ char seg_file_name[SEGMENT_NAME_MAX_LENGTH];
1025
+ InStream *is = NULL;
1026
+ SegmentInfos *sis = ALLOC_AND_ZERO(SegmentInfos);
1027
+ segfn_for_generation(seg_file_name, fsf->generation);
1028
+ TRY
1029
+ is = store->open_input(store, seg_file_name);
1030
+ sis->store = store;
609
1031
 
610
- sis->format = is_read_u32(is); /* do nothing. it's the first version */
611
- sis->version = is_read_u64(is);
612
- sis->counter = is_read_u64(is);
613
- seg_cnt = is_read_vint(is);
1032
+ sis->generation = fsf->generation;
1033
+ sis->format = is_read_u32(is); /* do nothing. it's the first version */
1034
+ sis->version = is_read_u64(is);
1035
+ sis->counter = is_read_u64(is);
1036
+ seg_cnt = is_read_vint(is);
614
1037
 
615
- /* allocate space for segments */
616
- for (sis->capa = 4; sis->capa < seg_cnt; sis->capa <<= 1) {
617
- }
618
- sis->size = 0;
619
- sis->segs = ALLOC_N(SegmentInfo *, sis->capa);
1038
+ /* allocate space for segments */
1039
+ for (sis->capa = 4; sis->capa < seg_cnt; sis->capa <<= 1) {
1040
+ }
1041
+ sis->size = 0;
1042
+ sis->segs = ALLOC_N(SegmentInfo *, sis->capa);
620
1043
 
621
- for (i = 0; i < seg_cnt; i++) {
622
- name = is_read_string(is);
623
- doc_cnt = is_read_vint(is);
624
- sis_add_si(sis, si_new(name, doc_cnt, store));
625
- }
626
- is_close(is);
1044
+ for (i = 0; i < seg_cnt; i++) {
1045
+ sis_add_si(sis, si_read(store, is));
1046
+ }
1047
+ sis->fis = fis_read(is);
1048
+ success = true;
1049
+ XFINALLY
1050
+ if (is) is_close(is);
1051
+ if (!success) {
1052
+ sis_destroy(sis);
1053
+ }
1054
+ XENDTRY
1055
+ fsf->p_return = sis;
1056
+ }
627
1057
 
628
- return sis;
1058
+ SegmentInfos *sis_read(Store *store)
1059
+ {
1060
+ FindSegmentsFile fsf;
1061
+ sis_find_segments_file(store, &fsf, &sis_read_i);
1062
+ return fsf.p_return;
629
1063
  }
630
1064
 
631
- void sis_write(SegmentInfos *sis, Store *store)
1065
+ void sis_write(SegmentInfos *sis, Store *store, Deleter *deleter)
632
1066
  {
633
1067
  int i;
634
- SegmentInfo *si;
635
- OutStream *os = store->new_output(store, TEMPORARY_SEGMENTS_FILENAME);
1068
+ OutStream *os = NULL;
636
1069
  const int sis_size = sis->size;
1070
+ char buf[SEGMENT_NAME_MAX_LENGTH];
1071
+ sis->generation++;
637
1072
 
638
- os_write_u32(os, FORMAT);
639
- os_write_u64(os, ++(sis->version)); /* every write changes the index */
640
- os_write_u64(os, sis->counter);
641
- os_write_vint(os, sis->size);
642
- for (i = 0; i < sis_size; i++) {
643
- si = sis->segs[i];
644
- os_write_string(os, si->name);
645
- os_write_vint(os, si->doc_cnt);
646
- }
647
- os_close(os);
1073
+ TRY
1074
+ os = store->new_output(store,
1075
+ segfn_for_generation(buf, sis->generation));
1076
+ os_write_u32(os, FORMAT);
1077
+ os_write_u64(os, ++(sis->version)); /* every write changes the index */
1078
+ os_write_u64(os, sis->counter);
1079
+ os_write_vint(os, sis->size);
1080
+ for (i = 0; i < sis_size; i++) {
1081
+ si_write(sis->segs[i], os);
1082
+ }
1083
+ fis_write(sis->fis, os);
1084
+ XFINALLY
1085
+ os_close(os);
1086
+ XENDTRY
1087
+
1088
+ TRY
1089
+ os = store->new_output(store, SEGMENTS_GEN_FILE_NAME);
1090
+ os_write_u64(os, sis->generation);
1091
+ os_write_u64(os, sis->generation);
1092
+ XFINALLY
1093
+ /* It's OK if we fail to write this file since it's
1094
+ * used only as one of the retry fallbacks. */
1095
+ HANDLED();
1096
+ os_close(os);
1097
+ XENDTRY
648
1098
 
649
- /* install new segment info */
650
- store->rename(store, TEMPORARY_SEGMENTS_FILENAME, SEGMENTS_FILENAME);
1099
+ if (deleter && sis->generation > 0) {
1100
+ deleter_delete_file(deleter,
1101
+ segfn_for_generation(buf, sis->generation - 1));
1102
+ }
651
1103
  }
652
1104
 
653
- f_u64 sis_read_current_version(Store *store)
1105
+ void sis_read_ver_i(Store *store, FindSegmentsFile *fsf)
654
1106
  {
655
1107
  InStream *is;
656
1108
  f_u32 format = 0;
657
1109
  f_u64 version = 0;
1110
+ char seg_file_name[SEGMENT_NAME_MAX_LENGTH];
658
1111
 
659
- if (!store->exists(store, SEGMENTS_FILENAME)) {
660
- return 0;
661
- }
662
- is = store->open_input(store, SEGMENTS_FILENAME);
1112
+ segfn_for_generation(seg_file_name, (f_u64)fsf->generation);
1113
+ is = store->open_input(store, seg_file_name);
663
1114
 
664
1115
  TRY
665
1116
  format = is_read_u32(is);
@@ -668,7 +1119,14 @@ f_u64 sis_read_current_version(Store *store)
668
1119
  is_close(is);
669
1120
  XENDTRY
670
1121
 
671
- return version;
1122
+ fsf->u64_return = version;
1123
+ }
1124
+
1125
+ f_u64 sis_read_current_version(Store *store)
1126
+ {
1127
+ FindSegmentsFile fsf;
1128
+ sis_find_segments_file(store, &fsf, &sis_read_ver_i);
1129
+ return fsf.u64_return;
672
1130
  }
673
1131
 
674
1132
  /****************************************************************************
@@ -704,7 +1162,7 @@ char *lazy_df_get_data(LazyDocField *self, int i)
704
1162
  char *text = NULL;
705
1163
  if (i < self->size && i >= 0) {
706
1164
  text = self->data[i].text;
707
- if (text == NULL) {
1165
+ if (NULL == text) {
708
1166
  const int read_len = self->data[i].length + 1;
709
1167
  self->data[i].text = text = ALLOC_N(char, read_len);
710
1168
  is_seek(self->doc->fields_in, self->data[i].start);
@@ -1220,8 +1678,8 @@ char *te_skip_to(TermEnum *te, const char *term)
1220
1678
  {
1221
1679
  char *curr_term = te->curr_term;
1222
1680
  if (strcmp(curr_term, term) < 0) {
1223
- while (((curr_term = te->next(te)) != NULL) &&
1224
- (strcmp(curr_term, term) < 0)) {
1681
+ while (NULL != ((curr_term = te->next(te)))
1682
+ && (strcmp(curr_term, term) < 0)) {
1225
1683
  }
1226
1684
  }
1227
1685
  return curr_term;
@@ -1258,7 +1716,7 @@ static void sti_destroy(SegmentTermIndex *sti)
1258
1716
  static void sti_ensure_index_is_read(SegmentTermIndex *sti,
1259
1717
  TermEnum *index_te)
1260
1718
  {
1261
- if (sti->index_terms == NULL) {
1719
+ if (NULL == sti->index_terms) {
1262
1720
  int i;
1263
1721
  int index_size = sti->index_size;
1264
1722
  off_t index_ptr = 0;
@@ -1314,7 +1772,7 @@ static int sti_get_index_offset(SegmentTermIndex *sti, const char *term)
1314
1772
  ****************************************************************************/
1315
1773
 
1316
1774
  #define SFI_ENSURE_INDEX_IS_READ(sfi, sti) do {\
1317
- if (sti->index_terms == NULL) {\
1775
+ if (NULL == sti->index_terms) {\
1318
1776
  mutex_lock(&sfi->mutex);\
1319
1777
  sti_ensure_index_is_read(sti, sfi->index_te);\
1320
1778
  mutex_unlock(&sfi->mutex);\
@@ -1351,7 +1809,7 @@ SegmentFieldIndex *sfi_open(Store *store, const char *segment)
1351
1809
 
1352
1810
  sprintf(file_name, "%s.tix", segment);
1353
1811
  is = store->open_input(store, file_name);
1354
- sfi->index_te = ste_new(is, NULL);
1812
+ sfi->index_te = ste_new(is, sfi);
1355
1813
  return sfi;
1356
1814
  }
1357
1815
 
@@ -1394,8 +1852,8 @@ static char *ste_next(TermEnum *te)
1394
1852
 
1395
1853
  ti = &(te->curr_ti);
1396
1854
  ti->doc_freq = is_read_vint(is); /* read doc freq */
1397
- ti->frq_ptr += is_read_voff_t(is);/* read freq ptr */
1398
- ti->prx_ptr += is_read_voff_t(is);/* read prox ptr */
1855
+ ti->frq_ptr += is_read_voff_t(is); /* read freq ptr */
1856
+ ti->prx_ptr += is_read_voff_t(is); /* read prox ptr */
1399
1857
  if (ti->doc_freq >= STE(te)->skip_interval) {
1400
1858
  ti->skip_offset = is_read_voff_t(is);
1401
1859
  }
@@ -1497,7 +1955,7 @@ static TermInfo *ste_scan_for_term_info(SegmentTermEnum *ste, const char *term)
1497
1955
  {
1498
1956
  ste_scan_to(ste, term);
1499
1957
 
1500
- if (strcmp(TE(ste)->curr_term, term) == 0) {
1958
+ if (0 == strcmp(TE(ste)->curr_term, term)) {
1501
1959
  return te_get_ti((TermEnum *)ste);
1502
1960
  }
1503
1961
  else {
@@ -1521,7 +1979,7 @@ static char *ste_get_term(TermEnum *te, int pos)
1521
1979
  ste_index_seek(te, sti, pos / idx_int);
1522
1980
  }
1523
1981
  while (ste->pos < pos) {
1524
- if (ste_next(te) == NULL) {
1982
+ if (NULL == ste_next(te)) {
1525
1983
  return NULL;
1526
1984
  }
1527
1985
  }
@@ -1575,7 +2033,7 @@ typedef struct MultiTermEnum
1575
2033
  static bool tew_lt(const TermEnumWrapper *tew1, const TermEnumWrapper *tew2)
1576
2034
  {
1577
2035
  int cmpres = strcmp(tew1->term, tew2->term);
1578
- if (cmpres == 0) {
2036
+ if (0 == cmpres) {
1579
2037
  return tew1->index < tew2->index;
1580
2038
  }
1581
2039
  else {
@@ -1637,7 +2095,7 @@ static char *mte_next(TermEnum *te)
1637
2095
  TermEnumWrapper *top =
1638
2096
  (TermEnumWrapper *)pq_top(MTE(te)->tew_queue);
1639
2097
 
1640
- if (top == NULL) {
2098
+ if (NULL == top) {
1641
2099
  te->curr_term[0] = '\0';
1642
2100
  te->curr_term_len = 0;
1643
2101
  return false;
@@ -1650,7 +2108,7 @@ static char *mte_next(TermEnum *te)
1650
2108
  te->curr_ti.doc_freq = 0;
1651
2109
 
1652
2110
  MTE(te)->ti_cnt = 0;
1653
- while ((top != NULL) && (strcmp(te->curr_term, top->term) == 0)) {
2111
+ while ((NULL != top) && (0 == strcmp(te->curr_term, top->term))) {
1654
2112
  pq_pop(MTE(te)->tew_queue);
1655
2113
  te->curr_ti.doc_freq += top->te->curr_ti.doc_freq;/* increment freq */
1656
2114
  MTE(te)->ti_indexes[MTE(te)->ti_cnt] = top->index;
@@ -1752,7 +2210,7 @@ TermEnum *mte_new(MultiReader *mr, int field_num, const char *term)
1752
2210
  if (fnum >= 0) {
1753
2211
  TermEnumWrapper *tew;
1754
2212
 
1755
- if (term != NULL) {
2213
+ if (NULL != term) {
1756
2214
  sub_te = reader->terms_from(reader, fnum, term);
1757
2215
  }
1758
2216
  else {
@@ -1760,7 +2218,7 @@ TermEnum *mte_new(MultiReader *mr, int field_num, const char *term)
1760
2218
  }
1761
2219
 
1762
2220
  tew = tew_setup(&(mte->tews[i]), i, sub_te, reader);
1763
- if (((term == NULL) && tew_next(tew))
2221
+ if (((NULL == term) && tew_next(tew))
1764
2222
  || (tew->term && (tew->term[0] != '\0'))) {
1765
2223
  pq_push(mte->tew_queue, tew); /* initialize queue */
1766
2224
  }
@@ -1772,7 +2230,7 @@ TermEnum *mte_new(MultiReader *mr, int field_num, const char *term)
1772
2230
  }
1773
2231
  }
1774
2232
 
1775
- if ((term != NULL) && (mte->tew_queue->size > 0)) {
2233
+ if ((NULL != term) && (0 < mte->tew_queue->size)) {
1776
2234
  mte_next(TE(mte));
1777
2235
  }
1778
2236
 
@@ -1804,7 +2262,7 @@ TermInfosReader *tir_open(Store *store,
1804
2262
  static __inline TermEnum *tir_enum(TermInfosReader *tir)
1805
2263
  {
1806
2264
  TermEnum *te;
1807
- if ((te = thread_getspecific(tir->thread_te)) == NULL) {
2265
+ if (NULL == (te = thread_getspecific(tir->thread_te))) {
1808
2266
  te = ste_clone(tir->orig_te);
1809
2267
  ste_set_field(te, tir->field_num);
1810
2268
  ary_push(tir->te_bucket, te);
@@ -1827,8 +2285,8 @@ TermInfo *tir_get_ti(TermInfosReader *tir, const char *term)
1827
2285
  TermEnum *te = tir_enum(tir);
1828
2286
  char *match;
1829
2287
 
1830
- if ((match = ste_scan_to(te, term)) != NULL &&
1831
- strcmp(match, term) == 0) {
2288
+ if (NULL != (match = ste_scan_to(te, term))
2289
+ && 0 == strcmp(match, term)) {
1832
2290
  return &(te->curr_ti);
1833
2291
  }
1834
2292
  return NULL;
@@ -1845,8 +2303,8 @@ TermInfo *tir_get_ti_field(TermInfosReader *tir, int field_num,
1845
2303
  tir->field_num = field_num;
1846
2304
  }
1847
2305
 
1848
- if ((match = ste_scan_to(te, term)) != NULL &&
1849
- strcmp(match, term) == 0) {
2306
+ if (NULL != (match = ste_scan_to(te, term))
2307
+ && 0 == strcmp(match, term)) {
1850
2308
  return &(te->curr_ti);
1851
2309
  }
1852
2310
  return NULL;
@@ -1937,7 +2395,7 @@ static __inline void tw_write_term(TermWriter *tw,
1937
2395
 
1938
2396
  os_write_vint(os, start); /* write shared prefix length */
1939
2397
  os_write_vint(os, length); /* write delta length */
1940
- os_write_bytes(os, (uchar *)(term + start), length); /* write delta chars */
2398
+ os_write_bytes(os, (uchar *)(term + start), length); /* write delta chars */
1941
2399
 
1942
2400
  tw->last_term = term;
1943
2401
  }
@@ -1945,13 +2403,15 @@ static __inline void tw_write_term(TermWriter *tw,
1945
2403
  static void tw_add(TermWriter *tw,
1946
2404
  const char *term,
1947
2405
  int term_len,
1948
- TermInfo *ti)
2406
+ TermInfo *ti,
2407
+ int skip_interval)
1949
2408
  {
1950
2409
  OutStream *os = tw->os;
1951
2410
 
1952
2411
  #ifdef DEBUG
1953
2412
  if (strcmp(tw->last_term, term) > 0) {
1954
- RAISE(STATE_ERROR, "\"%s\" > \"%s\" %d > %d", tw->last_term, term, *tw->last_term, *term);
2413
+ RAISE(STATE_ERROR, "\"%s\" > \"%s\" %d > %d",
2414
+ tw->last_term, term, *tw->last_term, *term);
1955
2415
  }
1956
2416
  if (ti->frq_ptr < tw->last_term_info.frq_ptr) {
1957
2417
  RAISE(STATE_ERROR, "%"F_OFF_T_PFX"d > %"F_OFF_T_PFX"d", ti->frq_ptr,
@@ -1967,6 +2427,9 @@ static void tw_add(TermWriter *tw,
1967
2427
  os_write_vint(os, ti->doc_freq); /* write doc freq */
1968
2428
  os_write_voff_t(os, ti->frq_ptr - tw->last_term_info.frq_ptr);
1969
2429
  os_write_voff_t(os, ti->prx_ptr - tw->last_term_info.prx_ptr);
2430
+ if (ti->doc_freq >= skip_interval) {
2431
+ os_write_voff_t(os, ti->skip_offset);
2432
+ }
1970
2433
 
1971
2434
  tw->last_term_info = *ti;
1972
2435
  tw->counter++;
@@ -1983,22 +2446,19 @@ void tiw_add(TermInfosWriter *tiw,
1983
2446
  printf("%s:%d:%d:%d:%d\n", term, term_len, ti->doc_freq,
1984
2447
  ti->frq_ptr, ti->prx_ptr);
1985
2448
  */
1986
- if ((tiw->tis_writer->counter % tiw->index_interval) == 0) {
2449
+ if (0 == (tiw->tis_writer->counter % tiw->index_interval)) {
1987
2450
  /* add an index term */
1988
2451
  tw_add(tiw->tix_writer,
1989
2452
  tiw->tis_writer->last_term,
1990
2453
  strlen(tiw->tis_writer->last_term),
1991
- &(tiw->tis_writer->last_term_info));
2454
+ &(tiw->tis_writer->last_term_info),
2455
+ tiw->skip_interval);
1992
2456
  tis_pos = os_pos(tiw->tis_writer->os);
1993
2457
  os_write_voff_t(tiw->tix_writer->os, tis_pos - tiw->last_index_ptr);
1994
2458
  tiw->last_index_ptr = tis_pos; /* write ptr */
1995
2459
  }
1996
2460
 
1997
- tw_add(tiw->tis_writer, term, term_len, ti);
1998
-
1999
- if (ti->doc_freq >= tiw->skip_interval) {
2000
- os_write_voff_t(tiw->tis_writer->os, ti->skip_offset);
2001
- }
2461
+ tw_add(tiw->tis_writer, term, term_len, ti, tiw->skip_interval);
2002
2462
  }
2003
2463
 
2004
2464
  static __inline void tw_reset(TermWriter *tw)
@@ -2051,7 +2511,7 @@ void tiw_close(TermInfosWriter *tiw)
2051
2511
  #define TDE(stde) ((TermDocEnum *)(stde))
2052
2512
 
2053
2513
  #define CHECK_STATE(method) do {\
2054
- if (STDE(tde)->count == 0) {\
2514
+ if (0 == STDE(tde)->count) {\
2055
2515
  RAISE(STATE_ERROR, "Illegal state of TermDocEnum. You must call #next "\
2056
2516
  "before you call #"method);\
2057
2517
  }\
@@ -2059,7 +2519,7 @@ void tiw_close(TermInfosWriter *tiw)
2059
2519
 
2060
2520
  static void stde_seek_ti(SegmentTermDocEnum *stde, TermInfo *ti)
2061
2521
  {
2062
- if (ti == NULL) {
2522
+ if (NULL == ti) {
2063
2523
  stde->doc_freq = 0;
2064
2524
  }
2065
2525
  else {
@@ -2117,7 +2577,7 @@ static bool stde_next(TermDocEnum *tde)
2117
2577
 
2118
2578
  doc_code = is_read_vint(stde->frq_in);
2119
2579
  stde->doc_num += doc_code >> 1; /* shift off low bit */
2120
- if ((doc_code & 1) != 0) { /* if low bit is set */
2580
+ if (0 != (doc_code & 1)) { /* if low bit is set */
2121
2581
  stde->freq = 1; /* freq is one */
2122
2582
  }
2123
2583
  else {
@@ -2126,8 +2586,8 @@ static bool stde_next(TermDocEnum *tde)
2126
2586
 
2127
2587
  stde->count++;
2128
2588
 
2129
- if (stde->deleted_docs == NULL ||
2130
- bv_get(stde->deleted_docs, stde->doc_num) == 0) {
2589
+ if (NULL == stde->deleted_docs
2590
+ || 0 == bv_get(stde->deleted_docs, stde->doc_num)) {
2131
2591
  break; /* We found an undeleted doc so return */
2132
2592
  }
2133
2593
 
@@ -2146,7 +2606,7 @@ static int stde_read(TermDocEnum *tde, int *docs, int *freqs, int req_num)
2146
2606
  /* manually inlined call to next() for speed */
2147
2607
  doc_code = is_read_vint(stde->frq_in);
2148
2608
  stde->doc_num += (doc_code >> 1); /* shift off low bit */
2149
- if ((doc_code & 1) != 0) { /* if low bit is set */
2609
+ if (0 != (doc_code & 1)) { /* if low bit is set */
2150
2610
  stde->freq = 1; /* freq is one */
2151
2611
  }
2152
2612
  else {
@@ -2155,8 +2615,8 @@ static int stde_read(TermDocEnum *tde, int *docs, int *freqs, int req_num)
2155
2615
 
2156
2616
  stde->count++;
2157
2617
 
2158
- if (stde->deleted_docs == NULL ||
2159
- bv_get(stde->deleted_docs, stde->doc_num) == 0) {
2618
+ if (NULL == stde->deleted_docs
2619
+ || 0 == bv_get(stde->deleted_docs, stde->doc_num)) {
2160
2620
  docs[i] = stde->doc_num;
2161
2621
  freqs[i] = stde->freq;
2162
2622
  i++;
@@ -2169,16 +2629,18 @@ static bool stde_skip_to(TermDocEnum *tde, int target_doc_num)
2169
2629
  {
2170
2630
  SegmentTermDocEnum *stde = STDE(tde);
2171
2631
 
2172
- if (stde->doc_freq >= stde->skip_interval) { /* optimized case */
2632
+ if (stde->doc_freq >= stde->skip_interval
2633
+ && target_doc_num > stde->doc_num) { /* optimized case */
2173
2634
  int last_skip_doc;
2174
- int last_frq_ptr;
2175
- int last_prx_ptr;
2635
+ off_t last_frq_ptr;
2636
+ off_t last_prx_ptr;
2176
2637
  int num_skipped;
2177
2638
 
2178
- if (stde->skip_in == NULL) {
2179
- stde->skip_in = is_clone(stde->frq_in); /* lazily clone */
2639
+ if (NULL == stde->skip_in) {
2640
+ stde->skip_in = is_clone(stde->frq_in);/* lazily clone */
2180
2641
  }
2181
2642
 
2643
+ //printf("skip_ptr = %lld\n", stde->skip_ptr);
2182
2644
  if (!stde->have_skipped) { /* lazily seek skip stream */
2183
2645
  is_seek(stde->skip_in, stde->skip_ptr);
2184
2646
  stde->have_skipped = true;
@@ -2189,13 +2651,14 @@ static bool stde_skip_to(TermDocEnum *tde, int target_doc_num)
2189
2651
  last_frq_ptr = is_pos(stde->frq_in);
2190
2652
  last_prx_ptr = -1;
2191
2653
  num_skipped = -1 - (stde->count % stde->skip_interval);
2654
+ //printf("%d, %d, %d, %d\n", last_skip_doc, last_frq_ptr, last_prx_ptr, num_skipped);
2192
2655
 
2193
2656
  while (target_doc_num > stde->skip_doc) {
2194
2657
  last_skip_doc = stde->skip_doc;
2195
2658
  last_frq_ptr = stde->frq_ptr;
2196
2659
  last_prx_ptr = stde->prx_ptr;
2197
2660
 
2198
- if (stde->skip_doc != 0 && stde->skip_doc >= stde->doc_num) {
2661
+ if (0 != stde->skip_doc && stde->skip_doc >= stde->doc_num) {
2199
2662
  num_skipped += stde->skip_interval;
2200
2663
  }
2201
2664
 
@@ -2204,13 +2667,14 @@ static bool stde_skip_to(TermDocEnum *tde, int target_doc_num)
2204
2667
  }
2205
2668
 
2206
2669
  stde->skip_doc += is_read_vint(stde->skip_in);
2207
- stde->frq_ptr += is_read_vint(stde->skip_in);
2208
- stde->prx_ptr += is_read_vint(stde->skip_in);
2670
+ stde->frq_ptr += is_read_vint(stde->skip_in);
2671
+ stde->prx_ptr += is_read_vint(stde->skip_in);
2672
+ //printf("inner-> skip_doc:%d, frq_ptr:%d, prx_ptr:%d\n", stde->skip_doc, stde->frq_ptr, stde->prx_ptr);
2209
2673
 
2210
2674
  stde->skip_count++;
2211
2675
  }
2212
2676
 
2213
- /* if we found something to skip, so skip it */
2677
+ /* if we found something to skip, skip it */
2214
2678
  if (last_frq_ptr > is_pos(stde->frq_in)) {
2215
2679
  is_seek(stde->frq_in, last_frq_ptr);
2216
2680
  stde->seek_prox(stde, last_prx_ptr);
@@ -2233,7 +2697,7 @@ static void stde_close(TermDocEnum *tde)
2233
2697
  {
2234
2698
  is_close(STDE(tde)->frq_in);
2235
2699
 
2236
- if (STDE(tde)->skip_in != NULL) {
2700
+ if (NULL != STDE(tde)->skip_in) {
2237
2701
  is_close(STDE(tde)->skip_in);
2238
2702
  }
2239
2703
 
@@ -2245,7 +2709,7 @@ static void stde_skip_prox(SegmentTermDocEnum *stde)
2245
2709
  (void)stde;
2246
2710
  }
2247
2711
 
2248
- static void stde_seek_prox(SegmentTermDocEnum *stde, int prx_ptr)
2712
+ static void stde_seek_prox(SegmentTermDocEnum *stde, off_t prx_ptr)
2249
2713
  {
2250
2714
  (void)stde;
2251
2715
  (void)prx_ptr;
@@ -2290,7 +2754,7 @@ TermDocEnum *stde_new(TermInfosReader *tir,
2290
2754
 
2291
2755
  static void stpe_seek_ti(SegmentTermDocEnum *stde, TermInfo *ti)
2292
2756
  {
2293
- if (ti == NULL) {
2757
+ if (NULL == ti) {
2294
2758
  stde->doc_freq = 0;
2295
2759
  }
2296
2760
  else {
@@ -2351,7 +2815,7 @@ static void stpe_skip_prox(SegmentTermDocEnum *stde)
2351
2815
  is_skip_vints(stde->prx_in, stde->freq);
2352
2816
  }
2353
2817
 
2354
- static void stpe_seek_prox(SegmentTermDocEnum *stde, int prx_ptr)
2818
+ static void stpe_seek_prox(SegmentTermDocEnum *stde, off_t prx_ptr)
2355
2819
  {
2356
2820
  is_seek(stde->prx_in, prx_ptr);
2357
2821
  stde->prx_cnt = 0;
@@ -2422,7 +2886,7 @@ static TermDocEnum *mtde_next_tde(MultiTermDocEnum *mtde)
2422
2886
  }
2423
2887
 
2424
2888
  #define CHECK_CURR_TDE(method) do {\
2425
- if (MTDE(tde)->curr_tde == NULL) {\
2889
+ if (NULL == MTDE(tde)->curr_tde) {\
2426
2890
  RAISE(STATE_ERROR, "Illegal state of TermDocEnum. You must call #next "\
2427
2891
  "before you call #"method);\
2428
2892
  }\
@@ -2456,7 +2920,7 @@ static void mtde_seek(TermDocEnum *tde, int field_num, const char *term)
2456
2920
  TermEnum *te = mtde->te;
2457
2921
  char *t;
2458
2922
  te->set_field(te, field_num);
2459
- if ((t = te->skip_to(te, term)) != NULL && strcmp(term, t) == 0) {
2923
+ if (NULL != (t = te->skip_to(te, term)) && 0 == strcmp(term, t)) {
2460
2924
  mtde_seek_te(tde, te);
2461
2925
  } else {
2462
2926
  memset(mtde->state, 0, mtde->ir_cnt);
@@ -2478,7 +2942,7 @@ static int mtde_freq(TermDocEnum *tde)
2478
2942
  static bool mtde_next(TermDocEnum *tde)
2479
2943
  {
2480
2944
  MultiTermDocEnum *mtde = MTDE(tde);
2481
- if (mtde->curr_tde != NULL && mtde->curr_tde->next(mtde->curr_tde)) {
2945
+ if (NULL != mtde->curr_tde && mtde->curr_tde->next(mtde->curr_tde)) {
2482
2946
  return true;
2483
2947
  }
2484
2948
  else if (mtde_next_tde(mtde)) {
@@ -2494,7 +2958,7 @@ static int mtde_read(TermDocEnum *tde, int *docs, int *freqs, int req_num)
2494
2958
  int i, end = 0, last_end = 0, b;
2495
2959
  MultiTermDocEnum *mtde = MTDE(tde);
2496
2960
  while (true) {
2497
- if (mtde->curr_tde == NULL) return end;
2961
+ if (NULL == mtde->curr_tde) return end;
2498
2962
  end += mtde->curr_tde->read(mtde->curr_tde, docs + last_end,
2499
2963
  freqs + last_end, req_num - last_end);
2500
2964
  if (end == last_end) { /* none left in segment */
@@ -2527,13 +2991,7 @@ static bool mtde_skip_to(TermDocEnum *tde, int target_doc_num)
2527
2991
 
2528
2992
  mtde_next_tde(mtde);
2529
2993
  }
2530
-
2531
- if (curr_tde) {
2532
- return curr_tde->skip_to(curr_tde, target_doc_num - mtde->base);
2533
- }
2534
- else {
2535
- return false;
2536
- }
2994
+ return false;
2537
2995
  }
2538
2996
 
2539
2997
  static void mtde_close(TermDocEnum *tde)
@@ -2660,7 +3118,7 @@ static bool mtdpe_next(TermDocEnum *tde)
2660
3118
  int doc;
2661
3119
  MultipleTermDocPosEnum *mtdpe = MTDPE(tde);
2662
3120
 
2663
- if (mtdpe->pq->size == 0) {
3121
+ if (0 == mtdpe->pq->size) {
2664
3122
  return false;
2665
3123
  }
2666
3124
 
@@ -2710,7 +3168,7 @@ bool mtdpe_skip_to(TermDocEnum *tde, int target_doc_num)
2710
3168
  TermDocEnum *sub_tde;
2711
3169
  PriorityQueue *mtdpe_pq = MTDPE(tde)->pq;
2712
3170
 
2713
- while ((sub_tde = (TermDocEnum *)pq_top(mtdpe_pq)) != NULL
3171
+ while (NULL != (sub_tde = (TermDocEnum *)pq_top(mtdpe_pq))
2714
3172
  && (target_doc_num > sub_tde->doc_num(sub_tde))) {
2715
3173
  if (sub_tde->skip_to(sub_tde, target_doc_num)) {
2716
3174
  pq_down(mtdpe_pq);
@@ -2779,6 +3237,256 @@ TermDocEnum *mtdpe_new(IndexReader *ir, int field_num, char **terms, int t_cnt)
2779
3237
  return tde;
2780
3238
  }
2781
3239
 
3240
+ /****************************************************************************
3241
+ *
3242
+ * FileNameFilter
3243
+ *
3244
+ ****************************************************************************/
3245
+
3246
+ static HashTable *fn_extensions = NULL;
3247
+ static void file_name_filter_init()
3248
+ {
3249
+ if (NULL == fn_extensions) {
3250
+ int i;
3251
+ fn_extensions = h_new_str((free_ft)NULL, (free_ft)NULL);
3252
+ for (i = 0; i < NELEMS(INDEX_EXTENSIONS); i++) {
3253
+ h_set(fn_extensions, INDEX_EXTENSIONS[i], (char *)INDEX_EXTENSIONS[i]);
3254
+ }
3255
+ register_for_cleanup(fn_extensions, (free_ft)&h_destroy);
3256
+ }
3257
+ }
3258
+
3259
+ static bool file_name_filter_accept(char *file_name)
3260
+ {
3261
+ char *p = strrchr(file_name, '.');
3262
+ if (NULL != p) {
3263
+ char *extension = p + 1;
3264
+ if (NULL != h_get(fn_extensions, extension)) {
3265
+ return true;
3266
+ }
3267
+ else if ((*extension == 'f' || *extension == 's')
3268
+ && *(extension + 1) >= '0'
3269
+ && *(extension + 1) <= '9') {
3270
+ return true;
3271
+ }
3272
+ }
3273
+ else if (0 == strncmp(SEGMENTS_FILE_NAME, file_name,
3274
+ sizeof(SEGMENTS_FILE_NAME) - 1)) {
3275
+ return true;
3276
+ }
3277
+ return false;
3278
+ }
3279
+
3280
+ /*
3281
+ * Returns true if this is a file that would be contained in a CFS file. This
3282
+ * function should only be called on files that pass the above "accept" (ie,
3283
+ * are already known to be a Lucene index file).
3284
+ */
3285
+ static bool file_name_filter_is_cfs_file(char *file_name) {
3286
+ char *p = strrchr(file_name, '.');
3287
+ if (NULL != p) {
3288
+ char *extension = p + 1;
3289
+ if (NULL != h_get(fn_extensions, extension)
3290
+ && 0 != strcmp(extension, "del")
3291
+ && 0 != strcmp(extension, "gen")
3292
+ && 0 != strcmp(extension, "cfs")) {
3293
+ return true;
3294
+ }
3295
+ else if ('f' == *extension
3296
+ && '0' <= *(extension + 1)
3297
+ && '9' >= *(extension + 1)) {
3298
+ return true;
3299
+ }
3300
+ }
3301
+ return false;
3302
+ }
3303
+
3304
+ /****************************************************************************
3305
+ *
3306
+ * Deleter
3307
+ *
3308
+ ****************************************************************************/
3309
+
3310
+ #define DELETABLE_START_CAPA 8
3311
+ Deleter *deleter_new(SegmentInfos *sis, Store *store)
3312
+ {
3313
+ Deleter *dlr = ALLOC(Deleter);
3314
+ dlr->sis = sis;
3315
+ dlr->store = store;
3316
+ dlr->pending = hs_new_str(&free);
3317
+ return dlr;
3318
+ }
3319
+
3320
+ void deleter_destroy(Deleter *dlr)
3321
+ {
3322
+ hs_destroy(dlr->pending);
3323
+ free(dlr);
3324
+ }
3325
+
3326
+ void deleter_queue_file(Deleter *dlr, char *file_name)
3327
+ {
3328
+ hs_add(dlr->pending, estrdup(file_name));
3329
+ }
3330
+
3331
+ void deleter_delete_file(Deleter *dlr, char *file_name)
3332
+ {
3333
+ Store *store = dlr->store;
3334
+ TRY
3335
+ if (store->exists(store, file_name)) {
3336
+ store->remove(store, file_name);
3337
+ }
3338
+ hs_del(dlr->pending, file_name);
3339
+ XCATCHALL
3340
+ hs_add(dlr->pending, estrdup(file_name));
3341
+ XENDTRY
3342
+ }
3343
+
3344
+ void deleter_commit_pending_deletions(Deleter *dlr)
3345
+ {
3346
+ int i;
3347
+ char **pending = (char **)dlr->pending->elems;
3348
+ for (i = dlr->pending->size - 1; i >= 0; i--) {
3349
+ deleter_delete_file(dlr, pending[i]);
3350
+ }
3351
+ }
3352
+
3353
+ void deleter_delete_files(Deleter *dlr, char **files, int file_cnt)
3354
+ {
3355
+ int i;
3356
+ for (i = file_cnt - 1; i >= 0; i--) {
3357
+ deleter_queue_file(dlr, files[i]);
3358
+ }
3359
+ deleter_commit_pending_deletions(dlr);
3360
+ }
3361
+
3362
+ struct DelFilesArg {
3363
+ char curr_seg_file_name[SEGMENT_NAME_MAX_LENGTH];
3364
+ Deleter *dlr;
3365
+ HashTable *current;
3366
+ };
3367
+
3368
+ static void deleter_find_deletable_files_i(char *file_name, void *arg)
3369
+ {
3370
+ struct DelFilesArg *dfa = (struct DelFilesArg *)arg;
3371
+ Deleter *dlr = dfa->dlr;
3372
+
3373
+ if (file_name_filter_accept(file_name)
3374
+ && 0 != strcmp(file_name, dfa->curr_seg_file_name)
3375
+ && 0 != strcmp(file_name, SEGMENTS_GEN_FILE_NAME)) {
3376
+
3377
+ bool do_delete = false;
3378
+ SegmentInfo *si;
3379
+ char segment_name[SEGMENT_NAME_MAX_LENGTH];
3380
+ char *extension, *p;
3381
+ strcpy(segment_name, file_name);
3382
+
3383
+ p = strrchr(segment_name, '.');
3384
+
3385
+ /* First remove any extension: */
3386
+ if (NULL != p) {
3387
+ *p = '\0';
3388
+ extension = p + 1;
3389
+ } else {
3390
+ extension = NULL;
3391
+ }
3392
+
3393
+ /* Then, remove any generation count: */
3394
+ p = strrchr(segment_name + 1, '_');
3395
+ if (NULL != p) {
3396
+ *p = '\0';
3397
+ }
3398
+
3399
+ /* Delete this file if it's not a "current" segment, or, it is a
3400
+ * single index file but there is now a corresponding compound file: */
3401
+ if (NULL == (si = h_get(dfa->current, segment_name))) {
3402
+ /* Delete if segment is not referenced: */
3403
+ do_delete = true;
3404
+ }
3405
+ else {
3406
+ char tmp_fn[SEGMENT_NAME_MAX_LENGTH];
3407
+ /* OK, segment is referenced, but file may still be orphan'd: */
3408
+ if (file_name_filter_is_cfs_file(file_name)
3409
+ && si->use_compound_file) {
3410
+ /* This file is stored in a CFS file for this segment: */
3411
+ do_delete = true;
3412
+ }
3413
+ else if (0 == strcmp("del", extension)) {
3414
+ /* This is a _segmentName_N.del file: */
3415
+ if (!fn_for_generation(tmp_fn, segment_name, "del", si->del_gen)
3416
+ || 0 != strcmp(file_name, tmp_fn)) {
3417
+ /* If this is a seperate .del file, but it
3418
+ * doesn't match the current del file name for
3419
+ * this segment, then delete it: */
3420
+ do_delete = true;
3421
+ }
3422
+ }
3423
+ else if (NULL != extension
3424
+ && ('s' == *extension || 'f' == *extension)
3425
+ && isdigit(extension[1])) {
3426
+ si_norm_file_name(si, tmp_fn, atoi(extension + 1));
3427
+ /* This is a _segmentName_N.sX file: */
3428
+ if (0 != strcmp(tmp_fn, file_name)) {
3429
+ /* This is an orphan'd norms file: */
3430
+ do_delete = true;
3431
+ }
3432
+ }
3433
+ else if (0 == strcmp("cfs", extension) && !si->use_compound_file) {
3434
+ /* This is a partially written _segmentName.cfs: */
3435
+ do_delete = true;
3436
+ }
3437
+ }
3438
+
3439
+ if (do_delete) {
3440
+ deleter_queue_file(dlr, file_name);
3441
+ }
3442
+ }
3443
+ }
3444
+
3445
+ /*
3446
+ * Determine index files that are no longer referenced and therefore should be
3447
+ * deleted. This is called once (by the writer), and then subsequently we add
3448
+ * onto deletable any files that are no longer needed at the point that we
3449
+ * create the unused file (eg when merging segments), and we only remove from
3450
+ * deletable when a file is successfully deleted.
3451
+ */
3452
+ void deleter_find_deletable_files(Deleter *dlr)
3453
+ {
3454
+ /* Gather all "current" segments: */
3455
+ int i;
3456
+ SegmentInfos *sis = dlr->sis;
3457
+ Store *store = dlr->store;
3458
+ struct DelFilesArg dfa;
3459
+ HashTable *current = dfa.current
3460
+ = h_new_str((free_ft)NULL, (free_ft)si_deref);
3461
+ dfa.dlr = dlr;
3462
+
3463
+ for(i = 0; i < sis->size; i++) {
3464
+ SegmentInfo *si = (SegmentInfo *)sis->segs[i];
3465
+ REF(si);
3466
+ h_set(current, si->name, si);
3467
+ }
3468
+
3469
+ /* Then go through all files in the Directory that are Ferret index files,
3470
+ * and add to deletable if they are not referenced by the current segments
3471
+ * info: */
3472
+ sis_curr_seg_file_name(dfa.curr_seg_file_name, store);
3473
+ file_name_filter_init();
3474
+
3475
+ store->each(store, &deleter_find_deletable_files_i, &dfa);
3476
+ h_destroy(dfa.current);
3477
+ }
3478
+
3479
+ void deleter_delete_deletable_files(Deleter *dlr)
3480
+ {
3481
+ deleter_find_deletable_files(dlr);
3482
+ deleter_commit_pending_deletions(dlr);
3483
+ }
3484
+
3485
+ void deleter_clear_pending_deletions(Deleter *dlr)
3486
+ {
3487
+ hs_clear(dlr->pending);
3488
+ }
3489
+
2782
3490
  /****************************************************************************
2783
3491
  *
2784
3492
  * IndexReader
@@ -2800,7 +3508,7 @@ void ir_acquire_write_lock(IndexReader *ir)
2800
3508
  "need to close and reopen the index");
2801
3509
  }
2802
3510
 
2803
- if (ir->write_lock == NULL) {
3511
+ if (NULL == ir->write_lock) {
2804
3512
  ir->write_lock = open_lock(ir->store, WRITE_LOCK_NAME);
2805
3513
  if (!ir->write_lock->obtain(ir->write_lock)) {/* obtain write lock */
2806
3514
  RAISE(LOCK_ERROR, "Could not obtain write lock when trying to "
@@ -2811,8 +3519,8 @@ void ir_acquire_write_lock(IndexReader *ir)
2811
3519
  "you can safely delete these files.");
2812
3520
  }
2813
3521
 
2814
- /* we have to check whether index has changed since this reader was opened.
2815
- * if so, this reader is no longer valid for deletion */
3522
+ /* we have to check whether index has changed since this reader was
3523
+ * opened. if so, this reader is no longer valid for deletion */
2816
3524
  if (sis_read_current_version(ir->store) > ir->sis->version) {
2817
3525
  ir->is_stale = true;
2818
3526
  ir->write_lock->release(ir->write_lock);
@@ -2856,7 +3564,7 @@ IndexReader *ir_setup(IndexReader *ir, Store *store, SegmentInfos *sis,
2856
3564
 
2857
3565
  bool ir_index_exists(Store *store)
2858
3566
  {
2859
- return store->exists(store, "segments");
3567
+ return sis_current_segment_generation(store) != 1;
2860
3568
  }
2861
3569
 
2862
3570
  int ir_get_field_num(IndexReader *ir, const char *field)
@@ -2903,7 +3611,7 @@ uchar *ir_get_norms_i(IndexReader *ir, int field_num)
2903
3611
  norms = ir->get_norms(ir, field_num);
2904
3612
  }
2905
3613
  if (!norms) {
2906
- if (ir->fake_norms == NULL) {
3614
+ if (NULL == ir->fake_norms) {
2907
3615
  ir->fake_norms = (uchar *)ecalloc(ir->max_doc(ir));
2908
3616
  }
2909
3617
  norms = ir->fake_norms;
@@ -3009,34 +3717,41 @@ TermDocEnum *ir_term_positions_for(IndexReader *ir, const char *field,
3009
3717
 
3010
3718
  void ir_commit_i(IndexReader *ir)
3011
3719
  {
3012
- if (ir->has_changes && ir->is_owner) {
3013
- Lock *commit_lock;
3014
-
3015
- mutex_lock(&ir->store->mutex);
3016
- commit_lock = open_lock(ir->store, COMMIT_LOCK_NAME);
3017
- if (!commit_lock->obtain(commit_lock)) { /* obtain write lock */
3018
- RAISE(LOCK_ERROR, "Error trying to commit the index. Commit "
3019
- "lock already obtained");
3720
+ if (ir->has_changes) {
3721
+ if (NULL == ir->deleter && NULL != ir->store) {
3722
+ /* In the MultiReader case, we share this deleter across all
3723
+ * SegmentReaders: */
3724
+ ir->set_deleter_i(ir, deleter_new(ir->sis, ir->store));
3020
3725
  }
3726
+ if (ir->is_owner) {
3727
+ char curr_seg_fn[MAX_FILE_PATH];
3728
+ mutex_lock(&ir->store->mutex);
3021
3729
 
3022
- ir->commit_i(ir);
3023
- sis_write(ir->sis, ir->store);
3730
+ /* Should not be necessary: no prior commit should have left
3731
+ * pending files, so just defensive: */
3732
+ if (ir->deleter) deleter_clear_pending_deletions(ir->deleter);
3024
3733
 
3025
- commit_lock->release(commit_lock);
3026
- close_lock(commit_lock);
3027
- mutex_unlock(&ir->store->mutex);
3734
+ sis_curr_seg_file_name(curr_seg_fn, ir->store);
3735
+
3736
+ ir->commit_i(ir);
3737
+ sis_write(ir->sis, ir->store, ir->deleter);
3028
3738
 
3029
- if (ir->write_lock != NULL) {
3030
- /* release write lock */
3031
- ir->write_lock->release(ir->write_lock);
3032
- close_lock(ir->write_lock);
3033
- ir->write_lock = NULL;
3739
+ if (ir->deleter) deleter_delete_file(ir->deleter, curr_seg_fn);
3740
+
3741
+ mutex_unlock(&ir->store->mutex);
3742
+
3743
+ if (NULL != ir->write_lock) {
3744
+ /* release write lock */
3745
+ ir->write_lock->release(ir->write_lock);
3746
+ close_lock(ir->write_lock);
3747
+ ir->write_lock = NULL;
3748
+ }
3749
+ }
3750
+ else {
3751
+ ir->commit_i(ir);
3034
3752
  }
3035
- ir->has_changes = false;
3036
- }
3037
- else {
3038
- ir->commit_i(ir);
3039
3753
  }
3754
+ ir->has_changes = false;
3040
3755
  }
3041
3756
 
3042
3757
  void ir_commit(IndexReader *ir)
@@ -3049,15 +3764,14 @@ void ir_commit(IndexReader *ir)
3049
3764
  void ir_close(IndexReader *ir)
3050
3765
  {
3051
3766
  mutex_lock(&ir->mutex);
3052
- if (--(ir->ref_cnt) == 0) {
3767
+ if (0 == --(ir->ref_cnt)) {
3053
3768
  ir_commit_i(ir);
3054
3769
  ir->close_i(ir);
3055
3770
  if (ir->store) {
3056
3771
  store_deref(ir->store);
3057
3772
  }
3058
- if (ir->is_owner) {
3773
+ if (ir->is_owner && ir->sis) {
3059
3774
  sis_destroy(ir->sis);
3060
- fis_deref(ir->fis);
3061
3775
  }
3062
3776
  if (ir->cache) {
3063
3777
  h_destroy(ir->cache);
@@ -3065,6 +3779,9 @@ void ir_close(IndexReader *ir)
3065
3779
  if (ir->sort_cache) {
3066
3780
  h_destroy(ir->sort_cache);
3067
3781
  }
3782
+ if (ir->deleter && ir->is_owner) {
3783
+ deleter_destroy(ir->deleter);
3784
+ }
3068
3785
  free(ir->fake_norms);
3069
3786
 
3070
3787
  mutex_destroy(&ir->mutex);
@@ -3080,26 +3797,14 @@ void ir_close(IndexReader *ir)
3080
3797
  **/
3081
3798
  void ir_add_cache(IndexReader *ir)
3082
3799
  {
3083
- if (ir->cache == NULL) {
3800
+ if (NULL == ir->cache) {
3084
3801
  ir->cache = co_hash_create();
3085
3802
  }
3086
3803
  }
3087
3804
 
3088
3805
  bool ir_is_latest(IndexReader *ir)
3089
3806
  {
3090
- volatile bool is_latest = false;
3091
-
3092
- Lock *commit_lock = open_lock(ir->store, COMMIT_LOCK_NAME);
3093
- if (!commit_lock->obtain(commit_lock)) {
3094
- close_lock(commit_lock);
3095
- RAISE(LOCK_ERROR, "Error detecting if the current index is latest "
3096
- "version. Commit lock currently obtained");
3097
- }
3098
- is_latest = (sis_read_current_version(ir->store) == ir->sis->version);
3099
- commit_lock->release(commit_lock);
3100
- close_lock(commit_lock);
3101
-
3102
- return is_latest;
3807
+ return (sis_read_current_version(ir->store) == ir->sis->version);
3103
3808
  }
3104
3809
 
3105
3810
  /****************************************************************************
@@ -3128,35 +3833,27 @@ static Norm *norm_create(InStream *is, int field_num)
3128
3833
  static void norm_destroy(Norm *norm)
3129
3834
  {
3130
3835
  is_close(norm->is);
3131
- if (norm->bytes != NULL) {
3836
+ if (NULL != norm->bytes) {
3132
3837
  free(norm->bytes);
3133
3838
  }
3134
3839
  free(norm);
3135
3840
  }
3136
3841
 
3137
- static void norm_rewrite(Norm *norm, Store *store, char *segment,
3138
- int doc_count, Store *cfs_store)
3842
+ static void norm_rewrite(Norm *norm, Store *store, Deleter *dlr,
3843
+ SegmentInfo *si, int doc_count)
3139
3844
  {
3140
3845
  OutStream *os;
3141
- char tmp_file_name[SEGMENT_NAME_MAX_LENGTH];
3142
3846
  char norm_file_name[SEGMENT_NAME_MAX_LENGTH];
3847
+ const int field_num = norm->field_num;
3143
3848
 
3144
- if (norm == NULL || norm->bytes == NULL) {
3145
- return; /* These norms do not need to be rewritten */
3849
+ if (si_norm_file_name(si, norm_file_name, field_num)) {
3850
+ deleter_queue_file(dlr, norm_file_name);
3146
3851
  }
3147
-
3148
- sprintf(tmp_file_name, "%s.tmp", segment);
3149
- os = store->new_output(store, tmp_file_name);
3852
+ si_advance_norm_gen(si, field_num);
3853
+ si_norm_file_name(si, norm_file_name, field_num);
3854
+ os = store->new_output(store, norm_file_name);
3150
3855
  os_write_bytes(os, norm->bytes, doc_count);
3151
3856
  os_close(os);
3152
-
3153
- if (cfs_store) {
3154
- sprintf(norm_file_name, "%s.s%d", segment, norm->field_num);
3155
- }
3156
- else {
3157
- sprintf(norm_file_name, "%s.f%d", segment, norm->field_num);
3158
- }
3159
- store->rename(store, tmp_file_name, norm_file_name);
3160
3857
  norm->is_dirty = false;
3161
3858
  }
3162
3859
 
@@ -3166,6 +3863,7 @@ static void norm_rewrite(Norm *norm, Store *store, char *segment,
3166
3863
 
3167
3864
  typedef struct SegmentReader {
3168
3865
  IndexReader ir;
3866
+ SegmentInfo *si;
3169
3867
  char *segment;
3170
3868
  FieldsReader *fr;
3171
3869
  BitVector *deleted_docs;
@@ -3191,7 +3889,7 @@ static __inline FieldsReader *sr_fr(SegmentReader *sr)
3191
3889
  {
3192
3890
  FieldsReader *fr;
3193
3891
 
3194
- if ((fr = thread_getspecific(sr->thread_fr)) == NULL) {
3892
+ if (NULL == (fr = thread_getspecific(sr->thread_fr))) {
3195
3893
  fr = fr_clone(sr->fr);
3196
3894
  ary_push(sr->fr_bucket, fr);
3197
3895
  thread_setspecific(sr->thread_fr, fr);
@@ -3201,17 +3899,17 @@ static __inline FieldsReader *sr_fr(SegmentReader *sr)
3201
3899
 
3202
3900
  static __inline bool sr_is_deleted_i(SegmentReader *sr, int doc_num)
3203
3901
  {
3204
- return (sr->deleted_docs != NULL && bv_get(sr->deleted_docs, doc_num));
3902
+ return (NULL != sr->deleted_docs && bv_get(sr->deleted_docs, doc_num));
3205
3903
  }
3206
3904
 
3207
3905
  static __inline void sr_get_norms_into_i(SegmentReader *sr, int field_num,
3208
3906
  uchar *buf)
3209
3907
  {
3210
3908
  Norm *norm = h_get_int(sr->norms, field_num);
3211
- if (norm == NULL) {
3909
+ if (NULL == norm) {
3212
3910
  memset(buf, 0, SR_SIZE(sr));
3213
3911
  }
3214
- else if (norm->bytes != NULL) { /* can copy from cache */
3912
+ else if (NULL != norm->bytes) { /* can copy from cache */
3215
3913
  memcpy(buf, norm->bytes, SR_SIZE(sr));
3216
3914
  }
3217
3915
  else {
@@ -3226,11 +3924,11 @@ static __inline void sr_get_norms_into_i(SegmentReader *sr, int field_num,
3226
3924
  static __inline uchar *sr_get_norms_i(SegmentReader *sr, int field_num)
3227
3925
  {
3228
3926
  Norm *norm = h_get_int(sr->norms, field_num);
3229
- if (norm == NULL) { /* not an indexed field */
3927
+ if (NULL == norm) { /* not an indexed field */
3230
3928
  return NULL;
3231
3929
  }
3232
3930
 
3233
- if (norm->bytes == NULL) { /* value not yet read */
3931
+ if (NULL == norm->bytes) { /* value not yet read */
3234
3932
  uchar *bytes = ALLOC_N(uchar, SR_SIZE(sr));
3235
3933
  sr_get_norms_into_i(sr, field_num, bytes);
3236
3934
  norm->bytes = bytes; /* cache it */
@@ -3241,7 +3939,8 @@ static __inline uchar *sr_get_norms_i(SegmentReader *sr, int field_num)
3241
3939
  static void sr_set_norm_i(IndexReader *ir, int doc_num, int field_num, uchar b)
3242
3940
  {
3243
3941
  Norm *norm = h_get_int(SR(ir)->norms, field_num);
3244
- if (norm != NULL) { /* has_norms */
3942
+ if (NULL != norm) { /* has_norms */
3943
+ ir->has_changes = true;
3245
3944
  norm->is_dirty = true; /* mark it dirty */
3246
3945
  SR(ir)->norms_dirty = true;
3247
3946
  sr_get_norms_i(SR(ir), field_num)[doc_num] = b;
@@ -3250,12 +3949,13 @@ static void sr_set_norm_i(IndexReader *ir, int doc_num, int field_num, uchar b)
3250
3949
 
3251
3950
  static void sr_delete_doc_i(IndexReader *ir, int doc_num)
3252
3951
  {
3253
- if (SR(ir)->deleted_docs == NULL) {
3952
+ if (NULL == SR(ir)->deleted_docs) {
3254
3953
  SR(ir)->deleted_docs = bv_new();
3255
3954
  }
3256
3955
 
3257
3956
  SR(ir)->deleted_docs_dirty = true;
3258
3957
  SR(ir)->undelete_all = false;
3958
+ ir->has_changes = true;
3259
3959
  bv_set(SR(ir)->deleted_docs, doc_num);
3260
3960
  }
3261
3961
 
@@ -3263,12 +3963,18 @@ static void sr_undelete_all_i(IndexReader *ir)
3263
3963
  {
3264
3964
  SR(ir)->undelete_all = true;
3265
3965
  SR(ir)->deleted_docs_dirty = false;
3266
- if (SR(ir)->deleted_docs != NULL) {
3966
+ ir->has_changes = true;
3967
+ if (NULL != SR(ir)->deleted_docs) {
3267
3968
  bv_destroy(SR(ir)->deleted_docs);
3268
3969
  }
3269
3970
  SR(ir)->deleted_docs = NULL;
3270
3971
  }
3271
3972
 
3973
+ static void sr_set_deleter_i(IndexReader *ir, Deleter *deleter)
3974
+ {
3975
+ ir->deleter = deleter;
3976
+ }
3977
+
3272
3978
  static void bv_write(BitVector *bv, Store *store, char *name)
3273
3979
  {
3274
3980
  int i;
@@ -3299,64 +4005,61 @@ static BitVector *bv_read(Store *store, char *name)
3299
4005
 
3300
4006
  static void sr_commit_i(IndexReader *ir)
3301
4007
  {
4008
+ SegmentInfo *si = SR(ir)->si;
4009
+ char *segment = SR(ir)->si->name;
3302
4010
  char tmp_file_name[SEGMENT_NAME_MAX_LENGTH];
3303
- char del_file_name[SEGMENT_NAME_MAX_LENGTH];
3304
-
3305
- sprintf(del_file_name, "%s.del", SR(ir)->segment);
3306
4011
 
3307
- if (SR(ir)->deleted_docs_dirty) { /* re-write deleted */
3308
- sprintf(tmp_file_name, "%s.tmp", SR(ir)->segment);
3309
- bv_write(SR(ir)->deleted_docs, ir->store, tmp_file_name);
3310
- ir->store->rename(ir->store, tmp_file_name, del_file_name);
3311
- }
3312
- if (SR(ir)->undelete_all && ir->store->exists(ir->store, del_file_name)) {
3313
- ir->store->remove(ir->store, del_file_name);
4012
+ if (SR(ir)->undelete_all || SR(ir)->deleted_docs_dirty) {
4013
+ if (si->del_gen >= 0) {
4014
+ fn_for_generation(tmp_file_name, segment, "del", si->del_gen);
4015
+ deleter_queue_file(ir->deleter, tmp_file_name);
4016
+ }
4017
+ if (SR(ir)->undelete_all) {
4018
+ si->del_gen = -1;
4019
+ SR(ir)->undelete_all = false;
4020
+ } else {
4021
+ /* (SR(ir)->deleted_docs_dirty) re-write deleted */
4022
+ si->del_gen++;
4023
+ fn_for_generation(tmp_file_name, segment, "del", si->del_gen);
4024
+ bv_write(SR(ir)->deleted_docs, ir->store, tmp_file_name);
4025
+ SR(ir)->deleted_docs_dirty = false;
4026
+ }
3314
4027
  }
3315
4028
  if (SR(ir)->norms_dirty) { /* re-write norms */
3316
4029
  int i;
3317
4030
  const int field_cnt = ir->fis->size;
3318
4031
  FieldInfo *fi;
3319
- for (i = 0; i < field_cnt; i++) {
4032
+ for (i = field_cnt - 1; i >= 0; i--) {
3320
4033
  fi = ir->fis->fields[i];
3321
4034
  if (fi_is_indexed(fi)) {
3322
- norm_rewrite(h_get_int(SR(ir)->norms, fi->number), ir->store,
3323
- SR(ir)->segment, SR_SIZE(ir), SR(ir)->cfs_store);
4035
+ Norm *norm = h_get_int(SR(ir)->norms, fi->number);
4036
+ if (norm && norm->is_dirty) {
4037
+ norm_rewrite(norm, ir->store, ir->deleter, SR(ir)->si,
4038
+ SR_SIZE(ir));
4039
+ }
3324
4040
  }
3325
4041
  }
4042
+ SR(ir)->norms_dirty = false;
3326
4043
  }
3327
- SR(ir)->deleted_docs_dirty = false;
3328
- SR(ir)->norms_dirty = false;
3329
- SR(ir)->undelete_all = false;
3330
4044
  }
3331
4045
 
3332
4046
  static void sr_close_i(IndexReader *ir)
3333
4047
  {
3334
4048
  SegmentReader *sr = SR(ir);
3335
4049
 
3336
- fr_close(sr->fr);
3337
- tir_close(sr->tir);
3338
- sfi_close(sr->sfi);
3339
-
3340
- if (sr->frq_in) {
3341
- is_close(sr->frq_in);
3342
- }
3343
- if (sr->prx_in) {
3344
- is_close(sr->prx_in);
3345
- }
3346
-
3347
- h_destroy(sr->norms);
3348
-
4050
+ if (sr->fr) fr_close(sr->fr);
4051
+ if (sr->tir) tir_close(sr->tir);
4052
+ if (sr->sfi) sfi_close(sr->sfi);
4053
+ if (sr->frq_in) is_close(sr->frq_in);
4054
+ if (sr->prx_in) is_close(sr->prx_in);
4055
+ if (sr->norms) h_destroy(sr->norms);
4056
+ if (sr->deleted_docs) bv_destroy(sr->deleted_docs);
4057
+ if (sr->cfs_store) store_deref(sr->cfs_store);
3349
4058
  if (sr->fr_bucket) {
3350
4059
  thread_setspecific(sr->thread_fr, NULL);
3351
4060
  thread_key_delete(sr->thread_fr);
3352
4061
  ary_destroy(sr->fr_bucket, (free_ft)&fr_close);
3353
4062
  }
3354
- if (sr->deleted_docs) {
3355
- bv_destroy(sr->deleted_docs);
3356
- }
3357
- if (sr->cfs_store) {
3358
- store_deref(sr->cfs_store);
3359
- }
3360
4063
  }
3361
4064
 
3362
4065
  static int sr_num_docs(IndexReader *ir)
@@ -3365,7 +4068,7 @@ static int sr_num_docs(IndexReader *ir)
3365
4068
 
3366
4069
  mutex_lock(&ir->mutex);
3367
4070
  num_docs = SR(ir)->fr->size;
3368
- if (SR(ir)->deleted_docs != NULL) {
4071
+ if (NULL != SR(ir)->deleted_docs) {
3369
4072
  num_docs -= SR(ir)->deleted_docs->count;
3370
4073
  }
3371
4074
  mutex_unlock(&ir->mutex);
@@ -3473,7 +4176,7 @@ static TermVector *sr_term_vector(IndexReader *ir, int doc_num,
3473
4176
  static HashTable *sr_term_vectors(IndexReader *ir, int doc_num)
3474
4177
  {
3475
4178
  FieldsReader *fr;
3476
- if (!SR(ir)->fr || (fr = sr_fr(SR(ir))) == NULL) {
4179
+ if (!SR(ir)->fr || NULL == (fr = sr_fr(SR(ir)))) {
3477
4180
  return NULL;
3478
4181
  }
3479
4182
 
@@ -3493,42 +4196,32 @@ static bool sr_is_deleted(IndexReader *ir, int doc_num)
3493
4196
 
3494
4197
  static bool sr_has_deletions(IndexReader *ir)
3495
4198
  {
3496
- return (SR(ir)->deleted_docs != NULL);
4199
+ return NULL != SR(ir)->deleted_docs;
3497
4200
  }
3498
4201
 
3499
4202
  static void sr_open_norms(IndexReader *ir, Store *cfs_store)
3500
4203
  {
3501
4204
  int i;
3502
- Store *store = ir->store;
4205
+ SegmentInfo *si = SR(ir)->si;
3503
4206
  char file_name[SEGMENT_NAME_MAX_LENGTH];
3504
- FieldInfos *fis = ir->fis;
3505
- char *ext_ptr;
3506
- const int field_cnt = fis->size;
3507
-
3508
- sprintf(file_name, "%s.", SR(ir)->segment);
3509
- ext_ptr = file_name + strlen(file_name);
3510
4207
 
3511
- for (i = 0; i < field_cnt; i++) {
3512
- if (fi_has_norms(fis->fields[i])) {
3513
- sprintf(ext_ptr, "s%d", i);
3514
- if (!store->exists(store, file_name)) {
3515
- sprintf(ext_ptr, "f%d", i);
3516
- store = cfs_store;
3517
- }
3518
- if (store->exists(store, file_name)) {
3519
- h_set_int(SR(ir)->norms, i,
3520
- norm_create(store->open_input(store, file_name), i));
3521
- }
4208
+ for (i = si->norm_gens_size - 1; i >= 0; i--) {
4209
+ Store *store = (si->use_compound_file && si->norm_gens[i] == 0) ?
4210
+ cfs_store : ir->store;
4211
+ if (si_norm_file_name(si, file_name, i)) {
4212
+ h_set_int(SR(ir)->norms, i,
4213
+ norm_create(store->open_input(store, file_name), i));
3522
4214
  }
3523
4215
  }
3524
4216
  SR(ir)->norms_dirty = false;
3525
4217
  }
3526
4218
 
3527
- static IndexReader *sr_setup_i(SegmentReader *sr, SegmentInfo *si)
4219
+ static IndexReader *sr_setup_i(SegmentReader *sr)
3528
4220
  {
3529
- Store *store = si->store;
4221
+ Store *store = sr->si->store;
3530
4222
  IndexReader *ir = IR(sr);
3531
4223
  char file_name[SEGMENT_NAME_MAX_LENGTH];
4224
+ char *sr_segment = sr->si->name;
3532
4225
 
3533
4226
  ir->num_docs = &sr_num_docs;
3534
4227
  ir->max_doc = &sr_max_doc;
@@ -3549,51 +4242,56 @@ static IndexReader *sr_setup_i(SegmentReader *sr, SegmentInfo *si)
3549
4242
  ir->set_norm_i = &sr_set_norm_i;
3550
4243
  ir->delete_doc_i = &sr_delete_doc_i;
3551
4244
  ir->undelete_all_i = &sr_undelete_all_i;
4245
+ ir->set_deleter_i = &sr_set_deleter_i;
3552
4246
  ir->commit_i = &sr_commit_i;
3553
4247
  ir->close_i = &sr_close_i;
3554
4248
 
3555
- sr->segment = si->name;
3556
4249
  sr->cfs_store = NULL;
3557
4250
 
3558
- sprintf(file_name, "%s.cfs", sr->segment);
3559
- if (store->exists(store, file_name)) {
3560
- sr->cfs_store = open_cmpd_store(store, file_name);
3561
- store = sr->cfs_store;
3562
- }
4251
+ TRY
4252
+ if (sr->si->use_compound_file) {
4253
+ sprintf(file_name, "%s.cfs", sr_segment);
4254
+ sr->cfs_store = open_cmpd_store(store, file_name);
4255
+ store = sr->cfs_store;
4256
+ }
3563
4257
 
3564
- sr->fr = fr_open(store, sr->segment, ir->fis);
3565
- sr->sfi = sfi_open(store, sr->segment);
3566
- sr->tir = tir_open(store, sr->sfi, sr->segment);
4258
+ sr->fr = fr_open(store, sr_segment, ir->fis);
4259
+ sr->sfi = sfi_open(store, sr_segment);
4260
+ sr->tir = tir_open(store, sr->sfi, sr_segment);
3567
4261
 
3568
- sr->deleted_docs = NULL;
3569
- sr->deleted_docs_dirty = false;
3570
- sr->undelete_all = false;
3571
- if (si_has_deletions(si)) {
3572
- sprintf(file_name, "%s.del", sr->segment);
3573
- sr->deleted_docs = bv_read(si->store, file_name);
3574
- }
4262
+ sr->deleted_docs = NULL;
4263
+ sr->deleted_docs_dirty = false;
4264
+ sr->undelete_all = false;
4265
+ if (si_has_deletions(sr->si)) {
4266
+ fn_for_generation(file_name, sr_segment, "del", sr->si->del_gen);
4267
+ sr->deleted_docs = bv_read(sr->si->store, file_name);
4268
+ }
3575
4269
 
3576
- sprintf(file_name, "%s.frq", sr->segment);
3577
- sr->frq_in = store->open_input(store, file_name);
3578
- sprintf(file_name, "%s.prx", sr->segment);
3579
- sr->prx_in = store->open_input(store, file_name);
3580
- sr->norms = h_new_int((free_ft)&norm_destroy);
3581
- sr_open_norms(ir, store);
4270
+ sprintf(file_name, "%s.frq", sr_segment);
4271
+ sr->frq_in = store->open_input(store, file_name);
4272
+ sprintf(file_name, "%s.prx", sr_segment);
4273
+ sr->prx_in = store->open_input(store, file_name);
4274
+ sr->norms = h_new_int((free_ft)&norm_destroy);
4275
+ sr_open_norms(ir, store);
4276
+ if (fis_has_vectors(ir->fis)) {
4277
+ thread_key_create(&sr->thread_fr, NULL);
4278
+ sr->fr_bucket = ary_new();
4279
+ }
4280
+ XCATCHALL
4281
+ ir_close(ir);
4282
+ XENDTRY
3582
4283
 
3583
- if (fis_has_vectors(ir->fis)) {
3584
- thread_key_create(&sr->thread_fr, NULL);
3585
- sr->fr_bucket = ary_new();
3586
- }
3587
4284
  return ir;
3588
4285
  }
3589
4286
 
3590
4287
  static IndexReader *sr_open(SegmentInfos *sis, FieldInfos *fis, int si_num,
3591
4288
  bool is_owner)
3592
4289
  {
4290
+ IndexReader *ir;
3593
4291
  SegmentReader *sr = ALLOC_AND_ZERO(SegmentReader);
3594
- SegmentInfo *si = sis->segs[si_num];
3595
- IndexReader *ir = ir_setup(IR(sr), si->store, sis, fis, is_owner);
3596
- return sr_setup_i(SR(ir), si);
4292
+ sr->si = sis->segs[si_num];
4293
+ ir = ir_setup(IR(sr), sr->si->store, sis, fis, is_owner);
4294
+ return sr_setup_i(sr);
3597
4295
  }
3598
4296
 
3599
4297
  /****************************************************************************
@@ -3683,7 +4381,7 @@ static uchar *mr_get_norms(IndexReader *ir, int field_num)
3683
4381
 
3684
4382
  mutex_lock(&ir->mutex);
3685
4383
  bytes = h_get_int(MR(ir)->norms_cache, field_num);
3686
- if (bytes == NULL) {
4384
+ if (NULL == bytes) {
3687
4385
  int i;
3688
4386
  const int mr_reader_cnt = MR(ir)->r_cnt;
3689
4387
 
@@ -3709,7 +4407,7 @@ static uchar *mr_get_norms_into(IndexReader *ir, int field_num, uchar *buf)
3709
4407
 
3710
4408
  mutex_lock(&ir->mutex);
3711
4409
  bytes = h_get_int(MR(ir)->norms_cache, field_num);
3712
- if (bytes != NULL) {
4410
+ if (NULL != bytes) {
3713
4411
  memcpy(buf, bytes, MR(ir)->max_doc);
3714
4412
  }
3715
4413
  else {
@@ -3791,6 +4489,7 @@ static void mr_set_norm_i(IndexReader *ir, int doc_num, int field_num, uchar val
3791
4489
  int fnum = mr_get_field_num(MR(ir), i, field_num);
3792
4490
  if (fnum >= 0) {
3793
4491
  IndexReader *reader = MR(ir)->sub_readers[i];
4492
+ ir->has_changes = true;
3794
4493
  h_del_int(MR(ir)->norms_cache, fnum);/* clear cache */
3795
4494
  ir_set_norm_i(reader, doc_num - MR(ir)->starts[i], fnum, val);
3796
4495
  }
@@ -3804,6 +4503,7 @@ static void mr_delete_doc_i(IndexReader *ir, int doc_num)
3804
4503
  /* dispatch to segment reader */
3805
4504
  reader->delete_doc_i(reader, doc_num - MR(ir)->starts[i]);
3806
4505
  MR(ir)->has_deletions = true;
4506
+ ir->has_changes = true;
3807
4507
  }
3808
4508
 
3809
4509
  static void mr_undelete_all_i(IndexReader *ir)
@@ -3817,6 +4517,17 @@ static void mr_undelete_all_i(IndexReader *ir)
3817
4517
  reader->undelete_all_i(reader);
3818
4518
  }
3819
4519
  MR(ir)->has_deletions = false;
4520
+ ir->has_changes = true;
4521
+ }
4522
+
4523
+ static void mr_set_deleter_i(IndexReader *ir, Deleter *deleter)
4524
+ {
4525
+ int i;
4526
+ ir->deleter = deleter;
4527
+ for (i = MR(ir)->r_cnt - 1; i >= 0; i--) {
4528
+ IndexReader *reader = MR(ir)->sub_readers[i];
4529
+ reader->set_deleter_i(reader, deleter);
4530
+ }
3820
4531
  }
3821
4532
 
3822
4533
  static void mr_commit_i(IndexReader *ir)
@@ -3825,7 +4536,7 @@ static void mr_commit_i(IndexReader *ir)
3825
4536
  const int mr_reader_cnt = MR(ir)->r_cnt;
3826
4537
  for (i = 0; i < mr_reader_cnt; i++) {
3827
4538
  IndexReader *reader = MR(ir)->sub_readers[i];
3828
- ir_commit(reader);
4539
+ ir_commit_i(reader);
3829
4540
  }
3830
4541
  }
3831
4542
 
@@ -3887,6 +4598,7 @@ static IndexReader *mr_new(IndexReader **sub_readers, const int r_cnt)
3887
4598
  ir->set_norm_i = &mr_set_norm_i;
3888
4599
  ir->delete_doc_i = &mr_delete_doc_i;
3889
4600
  ir->undelete_all_i = &mr_undelete_all_i;
4601
+ ir->set_deleter_i = &mr_set_deleter_i;
3890
4602
  ir->commit_i = &mr_commit_i;
3891
4603
  ir->close_i = &mr_close_i;
3892
4604
 
@@ -3980,33 +4692,65 @@ IndexReader *mr_open(IndexReader **sub_readers, const int r_cnt)
3980
4692
  * IndexReader
3981
4693
  ****************************************************************************/
3982
4694
 
4695
+
4696
+ static void ir_open_i(Store *store, FindSegmentsFile *fsf)
4697
+ {
4698
+ volatile bool success = false;
4699
+ IndexReader *ir = NULL;
4700
+ SegmentInfos *sis = NULL;
4701
+ TRY
4702
+ do {
4703
+ FieldInfos *fis;
4704
+
4705
+ mutex_lock(&store->mutex);
4706
+ sis_read_i(store, fsf);
4707
+ sis = fsf->p_return;
4708
+ fis = sis->fis;
4709
+
4710
+ if (sis->size == 1) {
4711
+ ir = sr_open(sis, fis, 0, true);
4712
+ }
4713
+ else {
4714
+ int i;
4715
+ IndexReader **readers = ALLOC_N(IndexReader *, sis->size);
4716
+ int num_segments = sis->size;
4717
+ for (i = num_segments - 1; i >= 0; i--) {
4718
+ TRY
4719
+ readers[i] = sr_open(sis, fis, i, false);
4720
+ XCATCHALL
4721
+ for (i++; i < num_segments; i++) {
4722
+ ir_close(readers[i]);
4723
+ }
4724
+ free(readers);
4725
+ XENDTRY
4726
+ }
4727
+ ir = mr_open_i(store, sis, fis, readers, sis->size);
4728
+ }
4729
+ fsf->p_return = ir;
4730
+ success = true;
4731
+ } while (0);
4732
+ XFINALLY
4733
+ if (!success) {
4734
+ if (ir) {
4735
+ ir_close(ir);
4736
+ }
4737
+ else if (sis) {
4738
+ sis_destroy(sis);
4739
+ }
4740
+ }
4741
+ mutex_unlock(&store->mutex);
4742
+ XENDTRY
4743
+ }
4744
+
3983
4745
  /**
3984
4746
  * Will keep a reference to the store. To let this method delete the store
3985
4747
  * make sure you deref the store that you pass to it
3986
4748
  */
3987
4749
  IndexReader *ir_open(Store *store)
3988
4750
  {
3989
- int i;
3990
- IndexReader *ir;
3991
- SegmentInfos *sis;
3992
- FieldInfos *fis;
3993
-
3994
- mutex_lock(&store->mutex);
3995
- sis = sis_read(store);
3996
- fis = fis_read(store);
3997
- if (sis->size == 1) {
3998
- ir = sr_open(sis, fis, 0, true);
3999
- }
4000
- else {
4001
- IndexReader **readers = ALLOC_N(IndexReader *, sis->size);
4002
- for (i = sis->size; i > 0;) {
4003
- i--;
4004
- readers[i] = sr_open(sis, fis, i, false);
4005
- }
4006
- ir = mr_open_i(store, sis, fis, readers, sis->size);
4007
- }
4008
- mutex_unlock(&store->mutex);
4009
- return ir;
4751
+ FindSegmentsFile fsf;
4752
+ sis_find_segments_file(store, &fsf, &ir_open_i);
4753
+ return (IndexReader *)fsf.p_return;
4010
4754
  }
4011
4755
 
4012
4756
  /****************************************************************************
@@ -4126,8 +4870,8 @@ typedef struct SkipBuffer
4126
4870
  OutStream *frq_out;
4127
4871
  OutStream *prx_out;
4128
4872
  int last_doc;
4129
- int last_frq_ptr;
4130
- int last_prx_ptr;
4873
+ off_t last_frq_ptr;
4874
+ off_t last_prx_ptr;
4131
4875
  } SkipBuffer;
4132
4876
 
4133
4877
  static void skip_buf_reset(SkipBuffer *skip_buf)
@@ -4149,8 +4893,8 @@ static SkipBuffer *skip_buf_new(OutStream *frq_out, OutStream *prx_out)
4149
4893
 
4150
4894
  static void skip_buf_add(SkipBuffer *skip_buf, int doc)
4151
4895
  {
4152
- int frq_ptr = os_pos(skip_buf->frq_out);
4153
- int prx_ptr = os_pos(skip_buf->prx_out);
4896
+ off_t frq_ptr = os_pos(skip_buf->frq_out);
4897
+ off_t prx_ptr = os_pos(skip_buf->prx_out);
4154
4898
 
4155
4899
  os_write_vint(skip_buf->buf, doc - skip_buf->last_doc);
4156
4900
  os_write_vint(skip_buf->buf, frq_ptr - skip_buf->last_frq_ptr);
@@ -4161,9 +4905,9 @@ static void skip_buf_add(SkipBuffer *skip_buf, int doc)
4161
4905
  skip_buf->last_prx_ptr = prx_ptr;
4162
4906
  }
4163
4907
 
4164
- static int skip_buf_write(SkipBuffer *skip_buf)
4908
+ static off_t skip_buf_write(SkipBuffer *skip_buf)
4165
4909
  {
4166
- int skip_ptr = os_pos(skip_buf->frq_out);
4910
+ off_t skip_ptr = os_pos(skip_buf->frq_out);
4167
4911
  ramo_write_to(skip_buf->buf, skip_buf->frq_out);
4168
4912
  return skip_ptr;
4169
4913
  }
@@ -4184,7 +4928,8 @@ static void dw_write_norms(DocWriter *dw, FieldInverter *fld_inv)
4184
4928
  {
4185
4929
  char file_name[SEGMENT_NAME_MAX_LENGTH];
4186
4930
  OutStream *norms_out;
4187
- sprintf(file_name, "%s.f%d", dw->segment, fld_inv->fi->number);
4931
+ si_advance_norm_gen(dw->si, fld_inv->fi->number);
4932
+ si_norm_file_name(dw->si, file_name, fld_inv->fi->number);
4188
4933
  norms_out = dw->store->new_output(dw->store, file_name);
4189
4934
  os_write_bytes(norms_out, fld_inv->norms, dw->doc_num);
4190
4935
  os_close(norms_out);
@@ -4232,23 +4977,23 @@ static void dw_flush(DocWriter *dw)
4232
4977
  Posting *p;
4233
4978
  Occurence *occ;
4234
4979
  Store *store = dw->store;
4235
- TermInfosWriter *tiw = tiw_open(store, dw->segment,
4980
+ TermInfosWriter *tiw = tiw_open(store, dw->si->name,
4236
4981
  dw->index_interval, skip_interval);
4237
4982
  TermInfo ti;
4238
4983
  char file_name[SEGMENT_NAME_MAX_LENGTH];
4239
4984
  OutStream *frq_out, *prx_out;
4240
4985
  SkipBuffer *skip_buf;
4241
4986
 
4242
- sprintf(file_name, "%s.frq", dw->segment);
4987
+ sprintf(file_name, "%s.frq", dw->si->name);
4243
4988
  frq_out = store->new_output(store, file_name);
4244
- sprintf(file_name, "%s.prx", dw->segment);
4989
+ sprintf(file_name, "%s.prx", dw->si->name);
4245
4990
  prx_out = store->new_output(store, file_name);
4246
4991
  skip_buf = skip_buf_new(frq_out, prx_out);
4247
4992
 
4248
4993
  for (i = 0; i < fields_count; i++) {
4249
4994
  fi = fis->fields[i];
4250
4995
  if (!fi_is_indexed(fi)
4251
- || (fld_inv = h_get_int(dw->fields, fi->number)) == NULL) {
4996
+ || NULL == (fld_inv = h_get_int(dw->fields, fi->number))) {
4252
4997
  continue;
4253
4998
  }
4254
4999
  if (!fi_omit_norms(fi)) {
@@ -4265,9 +5010,9 @@ static void dw_flush(DocWriter *dw)
4265
5010
  last_doc = 0;
4266
5011
  doc_freq = 0;
4267
5012
  skip_buf_reset(skip_buf);
4268
- for (p = pl->first; p != NULL; p = p->next) {
5013
+ for (p = pl->first; NULL != p; p = p->next) {
4269
5014
  doc_freq++;
4270
- if ((doc_freq % dw->skip_interval) == 0) {
5015
+ if (0 == (doc_freq % dw->skip_interval)) {
4271
5016
  skip_buf_add(skip_buf, last_doc);
4272
5017
  }
4273
5018
 
@@ -4283,7 +5028,7 @@ static void dw_flush(DocWriter *dw)
4283
5028
  }
4284
5029
 
4285
5030
  last_pos = 0;
4286
- for (occ = p->first_occ; occ != NULL; occ = occ->next) {
5031
+ for (occ = p->first_occ; NULL != occ; occ = occ->next) {
4287
5032
  os_write_vint(prx_out, occ->pos - last_pos);
4288
5033
  last_pos = occ->pos;
4289
5034
  }
@@ -4300,7 +5045,7 @@ static void dw_flush(DocWriter *dw)
4300
5045
  dw_flush_streams(dw);
4301
5046
  }
4302
5047
 
4303
- DocWriter *dw_open(IndexWriter *iw, const char *segment)
5048
+ DocWriter *dw_open(IndexWriter *iw, SegmentInfo *si)
4304
5049
  {
4305
5050
  Store *store = iw->store;
4306
5051
  MemoryPool *mp = mp_new_capa(iw->config.chunk_size,
@@ -4308,34 +5053,34 @@ DocWriter *dw_open(IndexWriter *iw, const char *segment)
4308
5053
 
4309
5054
  DocWriter *dw = ALLOC(DocWriter);
4310
5055
 
4311
- dw->mp = mp;
4312
- dw->analyzer = iw->analyzer;
4313
- dw->fis = iw->fis;
4314
- dw->store = store;
4315
- dw->fw = fw_open(store, segment, iw->fis);
4316
- dw->segment = segment;
5056
+ dw->mp = mp;
5057
+ dw->analyzer = iw->analyzer;
5058
+ dw->fis = iw->fis;
5059
+ dw->store = store;
5060
+ dw->fw = fw_open(store, si->name, iw->fis);
5061
+ dw->si = si;
4317
5062
 
4318
5063
  dw->curr_plists = h_new_str(NULL, NULL);
4319
- dw->fields = h_new_int((free_ft)fld_inv_destroy);
4320
- dw->doc_num = 0;
5064
+ dw->fields = h_new_int((free_ft)fld_inv_destroy);
5065
+ dw->doc_num = 0;
4321
5066
 
4322
- dw->index_interval = iw->config.index_interval;
4323
- dw->skip_interval = iw->config.skip_interval;
4324
- dw->max_field_length = iw->config.max_field_length;
4325
- dw->max_buffered_docs = iw->config.max_buffered_docs;
5067
+ dw->index_interval = iw->config.index_interval;
5068
+ dw->skip_interval = iw->config.skip_interval;
5069
+ dw->max_field_length = iw->config.max_field_length;
5070
+ dw->max_buffered_docs = iw->config.max_buffered_docs;
4326
5071
 
4327
- dw->offsets = ALLOC_AND_ZERO_N(Offset, DW_OFFSET_INIT_CAPA);
4328
- dw->offsets_size = 0;
4329
- dw->offsets_capa = DW_OFFSET_INIT_CAPA;
5072
+ dw->offsets = ALLOC_AND_ZERO_N(Offset, DW_OFFSET_INIT_CAPA);
5073
+ dw->offsets_size = 0;
5074
+ dw->offsets_capa = DW_OFFSET_INIT_CAPA;
4330
5075
 
4331
- dw->similarity = iw->similarity;
5076
+ dw->similarity = iw->similarity;
4332
5077
  return dw;
4333
5078
  }
4334
5079
 
4335
- void dw_new_segment(DocWriter *dw, char *segment)
5080
+ void dw_new_segment(DocWriter *dw, SegmentInfo *si)
4336
5081
  {
4337
- dw->fw = fw_open(dw->store, segment, dw->fis);
4338
- dw->segment = segment;
5082
+ dw->fw = fw_open(dw->store, si->name, dw->fis);
5083
+ dw->si = si;
4339
5084
  }
4340
5085
 
4341
5086
  void dw_close(DocWriter *dw)
@@ -4536,7 +5281,7 @@ typedef struct SegmentMergeInfo {
4536
5281
  int base;
4537
5282
  int max_doc;
4538
5283
  int doc_cnt;
4539
- char *segment;
5284
+ SegmentInfo *si;
4540
5285
  Store *store;
4541
5286
  Store *orig_store;
4542
5287
  BitVector *deleted_docs;
@@ -4552,7 +5297,7 @@ typedef struct SegmentMergeInfo {
4552
5297
  static bool smi_lt(const SegmentMergeInfo *smi1, const SegmentMergeInfo *smi2)
4553
5298
  {
4554
5299
  int cmpres = strcmp(smi1->term, smi2->term);
4555
- if (cmpres == 0) {
5300
+ if (0 == cmpres) {
4556
5301
  return smi1->base < smi2->base;
4557
5302
  }
4558
5303
  else {
@@ -4578,12 +5323,13 @@ static void smi_load_doc_map(SegmentMergeInfo *smi)
4578
5323
  smi->doc_cnt = j;
4579
5324
  }
4580
5325
 
4581
- static SegmentMergeInfo *smi_new(int base, Store *store, char *segment)
5326
+ static SegmentMergeInfo *smi_new(int base, Store *store, SegmentInfo *si)
4582
5327
  {
4583
5328
  SegmentMergeInfo *smi = ALLOC_AND_ZERO(SegmentMergeInfo);
4584
5329
  char file_name[SEGMENT_NAME_MAX_LENGTH];
5330
+ char *segment = si->name;
4585
5331
  smi->base = base;
4586
- smi->segment = segment;
5332
+ smi->si = si;
4587
5333
  smi->orig_store = smi->store = store;
4588
5334
  sprintf(file_name, "%s.cfs", segment);
4589
5335
  if (store->exists(store, file_name)) {
@@ -4595,8 +5341,8 @@ static SegmentMergeInfo *smi_new(int base, Store *store, char *segment)
4595
5341
  smi->doc_cnt = smi->max_doc
4596
5342
  = smi->store->length(smi->store, file_name) / FIELDS_IDX_PTR_SIZE;
4597
5343
 
4598
- sprintf(file_name, "%s.del", segment);
4599
- if (store->exists(store, file_name)) {
5344
+ if (si->del_gen >= 0) {
5345
+ fn_for_generation(file_name, segment, "del", si->del_gen);
4600
5346
  smi->deleted_docs = bv_read(store, file_name);
4601
5347
  smi_load_doc_map(smi);
4602
5348
  }
@@ -4606,13 +5352,14 @@ static SegmentMergeInfo *smi_new(int base, Store *store, char *segment)
4606
5352
  static void smi_load_term_input(SegmentMergeInfo *smi)
4607
5353
  {
4608
5354
  Store *store = smi->store;
5355
+ char *segment = smi->si->name;
4609
5356
  char file_name[SEGMENT_NAME_MAX_LENGTH];
4610
- smi->sfi = sfi_open(store, smi->segment);
4611
- sprintf(file_name, "%s.tis", smi->segment);
5357
+ smi->sfi = sfi_open(store, segment);
5358
+ sprintf(file_name, "%s.tis", segment);
4612
5359
  smi->te = TE(ste_new(store->open_input(store, file_name), smi->sfi));
4613
- sprintf(file_name, "%s.frq", smi->segment);
5360
+ sprintf(file_name, "%s.frq", segment);
4614
5361
  smi->frq_in = store->open_input(store, file_name);
4615
- sprintf(file_name, "%s.prx", smi->segment);
5362
+ sprintf(file_name, "%s.prx", segment);
4616
5363
  smi->prx_in = store->open_input(store, file_name);
4617
5364
  smi->tde = stpe_new(NULL, smi->frq_in, smi->prx_in, smi->deleted_docs,
4618
5365
  STE(smi->te)->skip_interval);
@@ -4652,7 +5399,7 @@ typedef struct SegmentMerger {
4652
5399
  TermInfo ti;
4653
5400
  Store *store;
4654
5401
  FieldInfos *fis;
4655
- char *segment;
5402
+ SegmentInfo *si;
4656
5403
  SegmentMergeInfo **smis;
4657
5404
  int seg_cnt;
4658
5405
  int doc_cnt;
@@ -4667,19 +5414,19 @@ typedef struct SegmentMerger {
4667
5414
  OutStream *prx_out;
4668
5415
  } SegmentMerger;
4669
5416
 
4670
- static SegmentMerger *sm_create(IndexWriter *iw, char *segment,
5417
+ static SegmentMerger *sm_create(IndexWriter *iw, SegmentInfo *si,
4671
5418
  SegmentInfo **seg_infos, const int seg_cnt)
4672
5419
  {
4673
5420
  int i;
4674
5421
  SegmentMerger *sm = ALLOC_AND_ZERO_N(SegmentMerger, seg_cnt);
4675
5422
  sm->store = iw->store;
4676
5423
  sm->fis = iw->fis;
4677
- sm->segment = estrdup(segment);
5424
+ sm->si = si;
4678
5425
  sm->doc_cnt = 0;
4679
5426
  sm->smis = ALLOC_N(SegmentMergeInfo *, seg_cnt);
4680
5427
  for (i = 0; i < seg_cnt; i++) {
4681
5428
  sm->smis[i] = smi_new(sm->doc_cnt, seg_infos[i]->store,
4682
- seg_infos[i]->name);
5429
+ seg_infos[i]);
4683
5430
  sm->doc_cnt += sm->smis[i]->doc_cnt;
4684
5431
  }
4685
5432
  sm->seg_cnt = seg_cnt;
@@ -4695,7 +5442,6 @@ static void sm_destroy(SegmentMerger *sm)
4695
5442
  smi_destroy(sm->smis[i]);
4696
5443
  }
4697
5444
  free(sm->smis);
4698
- free(sm->segment);
4699
5445
  free(sm);
4700
5446
  }
4701
5447
 
@@ -4708,20 +5454,21 @@ static void sm_merge_fields(SegmentMerger *sm)
4708
5454
  Store *store = sm->store;
4709
5455
  const int seg_cnt = sm->seg_cnt;
4710
5456
 
4711
- sprintf(file_name, "%s.fdt", sm->segment);
5457
+ sprintf(file_name, "%s.fdt", sm->si->name);
4712
5458
  fdt_out = store->new_output(store, file_name);
4713
5459
 
4714
- sprintf(file_name, "%s.fdx", sm->segment);
5460
+ sprintf(file_name, "%s.fdx", sm->si->name);
4715
5461
  fdx_out = store->new_output(store, file_name);
4716
5462
 
4717
5463
  for (i = 0; i < seg_cnt; i++) {
4718
5464
  SegmentMergeInfo *smi = sm->smis[i];
4719
5465
  const int max_doc = smi->max_doc;
4720
5466
  InStream *fdt_in, *fdx_in;
5467
+ char *segment = smi->si->name;
4721
5468
  store = smi->store;
4722
- sprintf(file_name, "%s.fdt", smi->segment);
5469
+ sprintf(file_name, "%s.fdt", segment);
4723
5470
  fdt_in = store->open_input(store, file_name);
4724
- sprintf(file_name, "%s.fdx", smi->segment);
5471
+ sprintf(file_name, "%s.fdx", segment);
4725
5472
  fdx_in = store->open_input(store, file_name);
4726
5473
 
4727
5474
  if (max_doc > 0) {
@@ -4775,7 +5522,7 @@ static int sm_append_postings(SegmentMerger *sm, SegmentMergeInfo **matches,
4775
5522
  * stde_next rather than stpe_next here */
4776
5523
  while (stde_next(tde)) {
4777
5524
  doc = stde_doc_num(tde);
4778
- if (doc_map != NULL) {
5525
+ if (NULL != doc_map) {
4779
5526
  doc = doc_map[doc]; /* work around deletions */
4780
5527
  }
4781
5528
  doc += base; /* convert to merged space */
@@ -4787,7 +5534,7 @@ static int sm_append_postings(SegmentMerger *sm, SegmentMergeInfo **matches,
4787
5534
  #endif
4788
5535
  df++;
4789
5536
 
4790
- if ((df % skip_interval) == 0) {
5537
+ if (0 == (df % skip_interval)) {
4791
5538
  skip_buf_add(skip_buf, last_doc);
4792
5539
  }
4793
5540
 
@@ -4823,12 +5570,12 @@ static char *sm_cache_term(SegmentMerger *sm, char *term, int term_len)
4823
5570
  static void sm_merge_term_info(SegmentMerger *sm, SegmentMergeInfo **matches,
4824
5571
  int match_size)
4825
5572
  {
4826
- int frq_ptr = os_pos(sm->frq_out);
4827
- int prx_ptr = os_pos(sm->prx_out);
5573
+ off_t frq_ptr = os_pos(sm->frq_out);
5574
+ off_t prx_ptr = os_pos(sm->prx_out);
4828
5575
 
4829
5576
  int df = sm_append_postings(sm, matches, match_size); /* append posting data */
4830
5577
 
4831
- int skip_ptr = skip_buf_write(sm->skip_buf);
5578
+ off_t skip_ptr = skip_buf_write(sm->skip_buf);
4832
5579
 
4833
5580
  if (df > 0) {
4834
5581
  /* add an entry to the dictionary with ptrs to prox and freq files */
@@ -4861,7 +5608,7 @@ static void sm_merge_term_infos(SegmentMerger *sm)
4861
5608
  for (j = 0; j < seg_cnt; j++) {
4862
5609
  smi = sm->smis[j];
4863
5610
  ste_set_field(smi->te, i);
4864
- if (smi_next(smi) != NULL) {
5611
+ if (NULL != smi_next(smi)) {
4865
5612
  pq_push(sm->queue, smi); /* initialize @queue */
4866
5613
  }
4867
5614
  }
@@ -4877,7 +5624,7 @@ static void sm_merge_term_infos(SegmentMerger *sm)
4877
5624
  match_size++;
4878
5625
  term = matches[0]->term;
4879
5626
  top = pq_top(sm->queue);
4880
- while ((top != NULL) && (strcmp(term, top->term) == 0)) {
5627
+ while ((NULL != top) && (0 == strcmp(term, top->term))) {
4881
5628
  matches[match_size] = pq_pop(sm->queue);
4882
5629
  match_size++;
4883
5630
  top = pq_top(sm->queue);
@@ -4889,7 +5636,7 @@ static void sm_merge_term_infos(SegmentMerger *sm)
4889
5636
  while (match_size > 0) {
4890
5637
  match_size--;
4891
5638
  smi = matches[match_size];
4892
- if (smi_next(smi) != NULL) {
5639
+ if (NULL != smi_next(smi)) {
4893
5640
  pq_push(sm->queue, smi); /* restore queue */
4894
5641
  }
4895
5642
  }
@@ -4905,12 +5652,12 @@ static void sm_merge_terms(SegmentMerger *sm)
4905
5652
  {
4906
5653
  char file_name[SEGMENT_NAME_MAX_LENGTH];
4907
5654
 
4908
- sprintf(file_name, "%s.frq", sm->segment);
5655
+ sprintf(file_name, "%s.frq", sm->si->name);
4909
5656
  sm->frq_out = sm->store->new_output(sm->store, file_name);
4910
- sprintf(file_name, "%s.prx", sm->segment);
5657
+ sprintf(file_name, "%s.prx", sm->si->name);
4911
5658
  sm->prx_out = sm->store->new_output(sm->store, file_name);
4912
5659
 
4913
- sm->tiw = tiw_open(sm->store, sm->segment, sm->config->index_interval,
5660
+ sm->tiw = tiw_open(sm->store, sm->si->name, sm->config->index_interval,
4914
5661
  sm->config->skip_interval);
4915
5662
  sm->skip_buf = skip_buf_new(sm->frq_out, sm->prx_out);
4916
5663
 
@@ -4936,6 +5683,7 @@ static void sm_merge_terms(SegmentMerger *sm)
4936
5683
 
4937
5684
  static void sm_merge_norms(SegmentMerger *sm)
4938
5685
  {
5686
+ SegmentInfo *si;
4939
5687
  int i, j, k;
4940
5688
  Store *store;
4941
5689
  uchar byte;
@@ -4945,23 +5693,21 @@ static void sm_merge_norms(SegmentMerger *sm)
4945
5693
  char file_name[SEGMENT_NAME_MAX_LENGTH];
4946
5694
  SegmentMergeInfo *smi;
4947
5695
  const int seg_cnt = sm->seg_cnt;
4948
- const int fis_size = sm->fis->size;
4949
- for (i = 0; i < fis_size; i++) {
5696
+ for (i = sm->fis->size - 1; i >= 0; i--) {
4950
5697
  fi = sm->fis->fields[i];
4951
5698
  if (fi_has_norms(fi)) {
4952
- sprintf(file_name, "%s.f%d", sm->segment, i);
5699
+ si = sm->si;
5700
+ si_advance_norm_gen(si, i);
5701
+ si_norm_file_name(si, file_name, i);
4953
5702
  os = sm->store->new_output(sm->store, file_name);
4954
5703
  for (j = 0; j < seg_cnt; j++) {
4955
5704
  smi = sm->smis[j];
4956
- store = smi->orig_store;
4957
- sprintf(file_name, "%s.s%d", smi->segment, i);
4958
- if (!store->exists(store, file_name)) {
4959
- sprintf(file_name, "%s.f%d", smi->segment, i);
4960
- store = smi->store;
4961
- }
4962
- if (store->exists(store, file_name)) {
5705
+ si = smi->si;
5706
+ if (si_norm_file_name(si, file_name, i)) {
4963
5707
  const int max_doc = smi->max_doc;
4964
5708
  BitVector *deleted_docs = smi->deleted_docs;
5709
+ store = (si->use_compound_file && si->norm_gens[i])
5710
+ ? smi->orig_store : smi->store;
4965
5711
  is = store->open_input(store, file_name);
4966
5712
  if (deleted_docs) {
4967
5713
  for (k = 0; k < max_doc; k++) {
@@ -5004,11 +5750,18 @@ static int sm_merge(SegmentMerger *sm)
5004
5750
  /* prepare an index ready for writing */
5005
5751
  void index_create(Store *store, FieldInfos *fis)
5006
5752
  {
5007
- SegmentInfos *sis = sis_new();
5753
+ SegmentInfos *sis = sis_new(fis);
5008
5754
  store->clear_all(store);
5009
- sis_write(sis, store);
5755
+ sis_write(sis, store, NULL);
5010
5756
  sis_destroy(sis);
5011
- fis_write(fis, store);
5757
+ }
5758
+
5759
+ bool index_is_locked(Store *store)
5760
+ {
5761
+ Lock *write_lock = open_lock(store, WRITE_LOCK_NAME);
5762
+ bool is_locked = write_lock->is_locked(write_lock);
5763
+ close_lock(write_lock);
5764
+ return is_locked;
5012
5765
  }
5013
5766
 
5014
5767
  int iw_doc_count(IndexWriter *iw)
@@ -5025,158 +5778,79 @@ int iw_doc_count(IndexWriter *iw)
5025
5778
  return doc_cnt;
5026
5779
  }
5027
5780
 
5028
- static void delete_files(char **file_names, Store *store)
5781
+ #define MOVE_TO_COMPOUND_DIR(file_name)\
5782
+ deleter_queue_file(dlr, file_name);\
5783
+ cw_add_file(cw, file_name)
5784
+
5785
+ static void iw_create_compound_file(Store *store, FieldInfos *fis,
5786
+ SegmentInfo *si, char *cfs_file_name,
5787
+ Deleter *dlr)
5029
5788
  {
5030
5789
  int i;
5031
- for (i = ary_size(file_names) - 1; i >= 0; i--) {
5032
- store->remove(store, file_names[i]);
5033
- }
5034
- ary_destroy((void **)file_names, &free);
5035
- }
5036
-
5037
- static char **iw_create_compound_file(Store *store, FieldInfos *fis,
5038
- char *segment, char *cfs_file_name)
5039
- {
5040
- char **file_names = (char **)ary_new_capa(16);
5041
5790
  CompoundWriter *cw;
5042
- FieldInfo *fi;
5043
- int i;
5044
5791
  char file_name[SEGMENT_NAME_MAX_LENGTH];
5045
- const int fis_size = fis->size;
5046
- int file_names_size;
5792
+ char *ext;
5793
+ int seg_len = strlen(si->name);
5794
+
5795
+ memcpy(file_name, si->name, seg_len);
5796
+ file_name[seg_len] = '.';
5797
+ ext = file_name + seg_len + 1;
5047
5798
 
5048
5799
  cw = open_cw(store, cfs_file_name);
5049
5800
  for (i = 0; i < NELEMS(COMPOUND_EXTENSIONS); i++) {
5050
- sprintf(file_name, "%s.%s",
5051
- segment, COMPOUND_EXTENSIONS[i]);
5052
- ary_push(file_names, estrdup(file_name));
5801
+ memcpy(ext, COMPOUND_EXTENSIONS[i], 4);
5802
+ MOVE_TO_COMPOUND_DIR(file_name);
5053
5803
  }
5054
5804
 
5055
5805
  /* Field norm file_names */
5056
- for (i = 0; i < fis_size; i++) {
5057
- fi = fis->fields[i];
5058
- if (fi_has_norms(fi)) {
5059
- sprintf(file_name, "%s.f%d", segment, i);
5060
- if (!store->exists(store, file_name)) {
5061
- continue;
5062
- }
5063
- ary_push(file_names, estrdup(file_name));
5806
+ for (i = fis->size - 1; i >= 0; i--) {
5807
+ if (fi_has_norms(fis->fields[i])
5808
+ && si_norm_file_name(si, file_name, i)) {
5809
+ MOVE_TO_COMPOUND_DIR(file_name);
5064
5810
  }
5065
5811
  }
5066
5812
 
5067
- /* Now merge all added file_names */
5068
- file_names_size = ary_size(file_names);
5069
- for (i = 0; i < file_names_size; i++) {
5070
- cw_add_file(cw, file_names[i]);
5071
- }
5072
-
5073
5813
  /* Perform the merge */
5074
5814
  cw_close(cw);
5075
-
5076
- return file_names;
5077
5815
  }
5078
5816
 
5079
- static void iw_commit_compound_file(IndexWriter *iw, char *segment,
5080
- Lock *commit_lock)
5817
+ static void iw_commit_compound_file(IndexWriter *iw, SegmentInfo *si)
5081
5818
  {
5082
- char tmp_name[SEGMENT_NAME_MAX_LENGTH];
5083
5819
  char cfs_name[SEGMENT_NAME_MAX_LENGTH];
5084
- char **files_to_delete;
5085
- sprintf(tmp_name, "%s.tmp", segment);
5086
- sprintf(cfs_name, "%s.cfs", segment);
5087
-
5088
- files_to_delete =
5089
- iw_create_compound_file(iw->store, iw->fis, segment, tmp_name);
5090
- if (!commit_lock->obtain(commit_lock)) {
5091
- RAISE(LOCK_ERROR,
5092
- "Couldn't obtain commit lock to write compound file");
5093
- }
5820
+ sprintf(cfs_name, "%s.cfs", si->name);
5094
5821
 
5095
- delete_files(files_to_delete, iw->store);
5096
- iw->store->rename(iw->store, tmp_name, cfs_name);
5097
-
5098
- commit_lock->release(commit_lock);
5099
- }
5100
-
5101
- #define ADD_IF_EXISTS_FMT(fmt, ext) do {\
5102
- sprintf(file_name, fmt, segment, ext);\
5103
- if (store->exists(store, file_name)) {\
5104
- ary_push(file_names, estrdup(file_name));\
5105
- }\
5106
- } while (0)
5107
-
5108
- #define ADD_IF_EXISTS(ext) ADD_IF_EXISTS_FMT("%s.%s", ext)
5109
-
5110
- static char **iw_seg_file_names(FieldInfos *fis, Store *store, char *segment)
5111
- {
5112
- char **file_names = (char **)ary_new_capa(16);
5113
- int i;
5114
- char file_name[SEGMENT_NAME_MAX_LENGTH];
5115
- const int fis_size = fis->size;
5116
-
5117
-
5118
- sprintf(file_name, "%s.cfs", segment);
5119
- if (store->exists(store, file_name)) {
5120
- ary_push(file_names, estrdup(file_name));
5121
- ADD_IF_EXISTS("del");
5122
- for (i = 0; i < fis_size; i++) {
5123
- if (fi_has_norms(fis->fields[i])) {
5124
- ADD_IF_EXISTS_FMT("%s.s%d", i);
5125
- }
5126
- }
5127
- }
5128
- else {
5129
- for (i = 0; i < NELEMS(INDEX_EXTENSIONS); i++) {
5130
- ADD_IF_EXISTS(INDEX_EXTENSIONS[i]);
5131
- }
5132
- for (i = 0; i < fis_size; i++) {
5133
- if (fi_has_norms(fis->fields[i])) {
5134
- ADD_IF_EXISTS_FMT("%s.f%d", i);
5135
- }
5136
- }
5137
- }
5138
- return file_names;
5822
+ iw_create_compound_file(iw->store, iw->fis, si, cfs_name, iw->deleter);
5823
+ deleter_commit_pending_deletions(iw->deleter);
5139
5824
  }
5140
5825
 
5141
5826
  static void iw_merge_segments(IndexWriter *iw, const int min_seg,
5142
5827
  const int max_seg)
5143
5828
  {
5144
5829
  int i;
5145
- Lock *commit_lock;
5146
5830
  SegmentInfos *sis = iw->sis;
5147
5831
  SegmentInfo *si = sis_new_segment(sis, 0, iw->store);
5148
5832
 
5149
- SegmentMerger *merger = sm_create(iw, si->name, &sis->segs[min_seg],
5833
+ SegmentMerger *merger = sm_create(iw, si, &sis->segs[min_seg],
5150
5834
  max_seg - min_seg);
5151
5835
 
5152
5836
  /* This is where all the action happens. */
5153
5837
  si->doc_cnt = sm_merge(merger);
5154
5838
 
5155
5839
  mutex_lock(&iw->store->mutex);
5156
- commit_lock = open_lock(iw->store, COMMIT_LOCK_NAME);
5157
-
5158
- /* *** OBTAIN COMMIT LOCK *** */
5159
- if (!commit_lock->obtain(commit_lock)) {
5160
- RAISE(LOCK_ERROR, "Couldn't obtain commit lock to commit merged segment "
5161
- "%s", si->name);
5162
- }
5163
5840
  /* delete merged segments */
5164
5841
  for (i = min_seg; i < max_seg; i++) {
5165
- delete_files(
5166
- iw_seg_file_names(iw->fis, sis->segs[i]->store, sis->segs[i]->name),
5167
- iw->store);
5842
+ si_delete_files(sis->segs[i], iw->fis, iw->deleter);
5168
5843
  }
5844
+ deleter_commit_pending_deletions(iw->deleter);
5845
+
5169
5846
  sis_del_from_to(sis, min_seg, max_seg);
5170
- /* commit the segments file */
5171
- sis_write(sis, iw->store);
5172
- commit_lock->release(commit_lock);
5173
- /* RELEASE COMMIT LOCK */
5174
5847
 
5175
5848
  if (iw->config.use_compound_file) {
5176
- iw_commit_compound_file(iw, si->name, commit_lock);
5849
+ iw_commit_compound_file(iw, si);
5850
+ si->use_compound_file = true;
5177
5851
  }
5178
5852
 
5179
- close_lock(commit_lock);
5853
+ sis_write(sis, iw->store, iw->deleter);
5180
5854
 
5181
5855
  mutex_unlock(&iw->store->mutex);
5182
5856
 
@@ -5223,28 +5897,20 @@ static void iw_flush_ram_segment(IndexWriter *iw)
5223
5897
  {
5224
5898
  SegmentInfos *sis = iw->sis;
5225
5899
  SegmentInfo *si;
5226
- Lock *commit_lock;
5227
5900
 
5228
5901
  si = sis->segs[sis->size - 1];
5229
5902
  si->doc_cnt = iw->dw->doc_num;
5230
5903
  dw_flush(iw->dw);
5231
5904
 
5232
5905
  mutex_lock(&iw->store->mutex);
5233
- commit_lock = open_lock(iw->store, COMMIT_LOCK_NAME);
5234
5906
 
5235
- if (!commit_lock->obtain(commit_lock)) {
5236
- RAISE(LOCK_ERROR, "Couldn't obtain commit lock to write segments file");
5907
+ if (iw->config.use_compound_file) {
5908
+ iw_commit_compound_file(iw, si);
5909
+ si->use_compound_file = true;
5237
5910
  }
5238
5911
  /* commit the segments file and the fields file */
5239
- fis_write(iw->fis, iw->store);
5240
- sis_write(iw->sis, iw->store);
5241
- commit_lock->release(commit_lock);
5912
+ sis_write(iw->sis, iw->store, iw->deleter);
5242
5913
 
5243
-
5244
- if (iw->config.use_compound_file) {
5245
- iw_commit_compound_file(iw, si->name, commit_lock);
5246
- }
5247
- close_lock(commit_lock);
5248
5914
  mutex_unlock(&iw->store->mutex);
5249
5915
 
5250
5916
  iw_maybe_merge_segments(iw);
@@ -5253,11 +5919,11 @@ static void iw_flush_ram_segment(IndexWriter *iw)
5253
5919
  void iw_add_doc(IndexWriter *iw, Document *doc)
5254
5920
  {
5255
5921
  mutex_lock(&iw->mutex);
5256
- if (!iw->dw) {
5257
- iw->dw = dw_open(iw, sis_new_segment(iw->sis, 0, iw->store)->name);
5922
+ if (NULL == iw->dw) {
5923
+ iw->dw = dw_open(iw, sis_new_segment(iw->sis, 0, iw->store));
5258
5924
  }
5259
- else if (iw->dw->fw == NULL) {
5260
- dw_new_segment(iw->dw, sis_new_segment(iw->sis, 0, iw->store)->name);
5925
+ else if (NULL == iw->dw->fw) {
5926
+ dw_new_segment(iw->dw, sis_new_segment(iw->sis, 0, iw->store));
5261
5927
  }
5262
5928
  dw_add_doc(iw->dw, doc);
5263
5929
  if (mp_used(iw->dw->mp) > iw->config.max_buffer_memory
@@ -5291,17 +5957,25 @@ void iw_delete_term(IndexWriter *iw, const char *field, const char *term)
5291
5957
  do {
5292
5958
  SegmentInfos *sis = iw->sis;
5293
5959
  const int seg_cnt = sis->size;
5960
+ bool did_delete = false;
5294
5961
  for (i = 0; i < seg_cnt; i++) {
5295
5962
  IndexReader *ir = sr_open(sis, iw->fis, i, false);
5296
5963
  TermDocEnum *tde = ir->term_docs(ir);
5964
+ ir->deleter = iw->deleter;
5297
5965
  stde_seek(tde, field_num, term);
5298
5966
  while (tde->next(tde)) {
5967
+ did_delete = true;
5299
5968
  sr_delete_doc_i(ir, STDE(tde)->doc_num);
5300
5969
  }
5301
5970
  tde_destroy(tde);
5302
5971
  sr_commit_i(ir);
5303
5972
  ir_close(ir);
5304
5973
  }
5974
+ if (did_delete) {
5975
+ mutex_lock(&iw->store->mutex);
5976
+ sis_write(iw->sis, iw->store, iw->deleter);
5977
+ mutex_unlock(&iw->store->mutex);
5978
+ }
5305
5979
  } while (0);
5306
5980
  mutex_unlock(&iw->mutex);
5307
5981
  }
@@ -5316,7 +5990,7 @@ static void iw_optimize_i(IndexWriter *iw)
5316
5990
  && (si_has_deletions(iw->sis->segs[0])
5317
5991
  || (iw->sis->segs[0]->store != iw->store)
5318
5992
  || (iw->config.use_compound_file
5319
- && (!si_uses_compound_file(iw->sis->segs[0])
5993
+ && (!iw->sis->segs[0]->use_compound_file
5320
5994
  || si_has_separate_norms(iw->sis->segs[0])))))) {
5321
5995
  min_segment = iw->sis->size - iw->config.merge_factor;
5322
5996
  iw_merge_segments_from(iw, min_segment < 0 ? 0 : min_segment);
@@ -5344,7 +6018,9 @@ void iw_close(IndexWriter *iw)
5344
6018
 
5345
6019
  iw->write_lock->release(iw->write_lock);
5346
6020
  close_lock(iw->write_lock);
6021
+ iw->write_lock = NULL;
5347
6022
  store_deref(iw->store);
6023
+ deleter_destroy(iw->deleter);
5348
6024
 
5349
6025
  mutex_destroy(&iw->mutex);
5350
6026
  free(iw);
@@ -5368,16 +6044,17 @@ IndexWriter *iw_open(Store *store, volatile Analyzer *analyzer,
5368
6044
  "Couldn't obtain write lock when opening IndexWriter");
5369
6045
  }
5370
6046
 
5371
-
5372
6047
  iw->sis = sis_read(store);
5373
- iw->fis = fis_read(store);
6048
+ iw->fis = iw->sis->fis;
6049
+ REF(iw->fis);
5374
6050
  XCATCHALL
5375
6051
  if (iw->write_lock) {
5376
6052
  iw->write_lock->release(iw->write_lock);
5377
6053
  close_lock(iw->write_lock);
6054
+ iw->write_lock = NULL;
5378
6055
  }
5379
6056
  if (iw->sis) sis_destroy(iw->sis);
5380
- if (iw->fis) fis_deref(iw->fis);
6057
+ if (analyzer) a_deref((Analyzer *)analyzer);
5381
6058
  free(iw);
5382
6059
  XENDTRY
5383
6060
 
@@ -5385,6 +6062,9 @@ IndexWriter *iw_open(Store *store, volatile Analyzer *analyzer,
5385
6062
  iw->analyzer = analyzer ? (Analyzer *)analyzer
5386
6063
  : mb_standard_analyzer_new(true);
5387
6064
 
6065
+ iw->deleter = deleter_new(iw->sis, store);
6066
+ deleter_delete_deletable_files(iw->deleter);
6067
+
5388
6068
  REF(store);
5389
6069
  return iw;
5390
6070
  }
@@ -5400,18 +6080,19 @@ static void iw_cp_fields(IndexWriter *iw, SegmentReader *sr,
5400
6080
  InStream *fdt_in, *fdx_in;
5401
6081
  Store *store_in = sr->cfs_store ? sr->cfs_store : sr->ir.store;
5402
6082
  Store *store_out = iw->store;
6083
+ char *sr_segment = sr->si->name;
5403
6084
 
5404
6085
  sprintf(file_name, "%s.fdt", segment);
5405
6086
  fdt_out = store_out->new_output(store_out, file_name);
5406
6087
  sprintf(file_name, "%s.fdx", segment);
5407
6088
  fdx_out = store_out->new_output(store_out, file_name);
5408
6089
 
5409
- sprintf(file_name, "%s.fdt", sr->segment);
6090
+ sprintf(file_name, "%s.fdt", sr_segment);
5410
6091
  fdt_in = store_in->open_input(store_in, file_name);
5411
- sprintf(file_name, "%s.fdx", sr->segment);
6092
+ sprintf(file_name, "%s.fdx", sr_segment);
5412
6093
  fdx_in = store_in->open_input(store_in, file_name);
5413
6094
 
5414
- sprintf(file_name, "%s.del", sr->segment);
6095
+ sprintf(file_name, "%s.del", sr_segment);
5415
6096
  if (store_in->exists(store_in, file_name)) {
5416
6097
  OutStream *del_out;
5417
6098
  InStream *del_in = store_in->open_input(store_in, file_name);
@@ -5487,30 +6168,31 @@ static void iw_cp_terms(IndexWriter *iw, SegmentReader *sr,
5487
6168
  InStream *tix_in, *tis_in, *tfx_in, *frq_in, *prx_in;
5488
6169
  Store *store_out = iw->store;
5489
6170
  Store *store_in = sr->cfs_store ? sr->cfs_store : sr->ir.store;
6171
+ char *sr_segment = sr->si->name;
5490
6172
 
5491
6173
  sprintf(file_name, "%s.tix", segment);
5492
6174
  tix_out = store_out->new_output(store_out, file_name);
5493
- sprintf(file_name, "%s.tix", sr->segment);
6175
+ sprintf(file_name, "%s.tix", sr_segment);
5494
6176
  tix_in = store_in->open_input(store_in, file_name);
5495
6177
 
5496
6178
  sprintf(file_name, "%s.tis", segment);
5497
6179
  tis_out = store_out->new_output(store_out, file_name);
5498
- sprintf(file_name, "%s.tis", sr->segment);
6180
+ sprintf(file_name, "%s.tis", sr_segment);
5499
6181
  tis_in = store_in->open_input(store_in, file_name);
5500
6182
 
5501
6183
  sprintf(file_name, "%s.tfx", segment);
5502
6184
  tfx_out = store_out->new_output(store_out, file_name);
5503
- sprintf(file_name, "%s.tfx", sr->segment);
6185
+ sprintf(file_name, "%s.tfx", sr_segment);
5504
6186
  tfx_in = store_in->open_input(store_in, file_name);
5505
6187
 
5506
6188
  sprintf(file_name, "%s.frq", segment);
5507
6189
  frq_out = store_out->new_output(store_out, file_name);
5508
- sprintf(file_name, "%s.frq", sr->segment);
6190
+ sprintf(file_name, "%s.frq", sr_segment);
5509
6191
  frq_in = store_in->open_input(store_in, file_name);
5510
6192
 
5511
6193
  sprintf(file_name, "%s.prx", segment);
5512
6194
  prx_out = store_out->new_output(store_out, file_name);
5513
- sprintf(file_name, "%s.prx", sr->segment);
6195
+ sprintf(file_name, "%s.prx", sr_segment);
5514
6196
  prx_in = store_in->open_input(store_in, file_name);
5515
6197
 
5516
6198
  if (map) {
@@ -5548,47 +6230,38 @@ static void iw_cp_terms(IndexWriter *iw, SegmentReader *sr,
5548
6230
  }
5549
6231
 
5550
6232
  static void iw_cp_norms(IndexWriter *iw, SegmentReader *sr,
5551
- const char *segment, int *map)
6233
+ SegmentInfo *si, int *map)
5552
6234
  {
5553
6235
  int i;
5554
6236
  FieldInfos *fis = IR(sr)->fis;
5555
6237
  const int field_cnt = fis->size;
5556
6238
  InStream *norms_in;
5557
6239
  OutStream *norms_out;
5558
- Store *store_in = sr->ir.store;
5559
- Store *cfs_store_in = sr->cfs_store;
5560
6240
  Store *store_out = iw->store;
5561
6241
  char file_name_in[SEGMENT_NAME_MAX_LENGTH];
5562
- char *ext_ptr_in;
5563
6242
  char file_name_out[SEGMENT_NAME_MAX_LENGTH];
5564
- char *ext_ptr_out;
5565
- sprintf(file_name_in, "%s.", sr->segment);
5566
- ext_ptr_in = file_name_in + strlen(file_name_in);
5567
- sprintf(file_name_out, "%s.", segment);
5568
- ext_ptr_out = file_name_out + strlen(file_name_out);
5569
6243
 
5570
6244
  for (i = 0; i < field_cnt; i++) {
5571
- if (fi_has_norms(fis->fields[i])) {
5572
- Store *store = store_in;
5573
- sprintf(ext_ptr_in, "s%d", i);
5574
- if (!store->exists(store, file_name_in)) {
5575
- sprintf(ext_ptr_in, "f%d", i);
5576
- store = cfs_store_in;
5577
- }
5578
- if (store->exists(store, file_name_in)) {
5579
- norms_in = store->open_input(store, file_name_in);
5580
- sprintf(ext_ptr_out, "f%d", map ? map[i] : i);
5581
- norms_out = store_out->new_output(store_out, file_name_out);
5582
- is2os_copy_bytes(norms_in, norms_out, is_length(norms_in));
5583
- os_close(norms_out);
5584
- is_close(norms_in);
5585
- }
6245
+ if (fi_has_norms(fis->fields[i])
6246
+ && si_norm_file_name(sr->si, file_name_in, i)) {
6247
+ Store *store = (sr->si->use_compound_file
6248
+ && sr->si->norm_gens[i] == 0) ? sr->cfs_store
6249
+ : IR(sr)->store;
6250
+ int field_num = map ? map[i] : i;
6251
+
6252
+ norms_in = store->open_input(store, file_name_in);
6253
+ si_advance_norm_gen(si, field_num);
6254
+ si_norm_file_name(si, file_name_out, field_num);
6255
+ norms_out = store_out->new_output(store_out, file_name_out);
6256
+ is2os_copy_bytes(norms_in, norms_out, is_length(norms_in));
6257
+ os_close(norms_out);
6258
+ is_close(norms_in);
5586
6259
  }
5587
6260
  }
5588
6261
  }
5589
6262
 
5590
6263
  static void iw_cp_map_files(IndexWriter *iw, SegmentReader *sr,
5591
- const char *segment)
6264
+ SegmentInfo *si)
5592
6265
  {
5593
6266
  int i;
5594
6267
  FieldInfos *from_fis = IR(sr)->fis;
@@ -5600,19 +6273,19 @@ static void iw_cp_map_files(IndexWriter *iw, SegmentReader *sr,
5600
6273
  field_map[i] = fis_get_field_num(to_fis, from_fis->fields[i]->name);
5601
6274
  }
5602
6275
 
5603
- iw_cp_fields(iw, sr, segment, field_map);
5604
- iw_cp_terms(iw, sr, segment, field_map);
5605
- iw_cp_norms(iw, sr, segment, field_map);
6276
+ iw_cp_fields(iw, sr, si->name, field_map);
6277
+ iw_cp_terms( iw, sr, si->name, field_map);
6278
+ iw_cp_norms( iw, sr, si, field_map);
5606
6279
 
5607
6280
  free(field_map);
5608
6281
  }
5609
6282
 
5610
6283
  static void iw_cp_files(IndexWriter *iw, SegmentReader *sr,
5611
- const char *segment)
6284
+ SegmentInfo *si)
5612
6285
  {
5613
- iw_cp_fields(iw, sr, segment, NULL);
5614
- iw_cp_terms(iw, sr, segment, NULL);
5615
- iw_cp_norms(iw, sr, segment, NULL);
6286
+ iw_cp_fields(iw, sr, si->name, NULL);
6287
+ iw_cp_terms( iw, sr, si->name, NULL);
6288
+ iw_cp_norms( iw, sr, si, NULL);
5616
6289
  }
5617
6290
 
5618
6291
  static void iw_add_segment(IndexWriter *iw, SegmentReader *sr)
@@ -5641,10 +6314,10 @@ static void iw_add_segment(IndexWriter *iw, SegmentReader *sr)
5641
6314
  }
5642
6315
 
5643
6316
  if (must_map_fields) {
5644
- iw_cp_map_files(iw, sr, si->name);
6317
+ iw_cp_map_files(iw, sr, si);
5645
6318
  }
5646
6319
  else {
5647
- iw_cp_files(iw, sr, si->name);
6320
+ iw_cp_files(iw, sr, si);
5648
6321
  }
5649
6322
  }
5650
6323
 
@@ -5666,8 +6339,6 @@ static void iw_add_segments(IndexWriter *iw, IndexReader *ir)
5666
6339
  void iw_add_readers(IndexWriter *iw, IndexReader **readers, const int r_cnt)
5667
6340
  {
5668
6341
  int i;
5669
- Lock *commit_lock;
5670
-
5671
6342
  mutex_lock(&iw->mutex);
5672
6343
  iw_optimize_i(iw);
5673
6344
 
@@ -5676,16 +6347,9 @@ void iw_add_readers(IndexWriter *iw, IndexReader **readers, const int r_cnt)
5676
6347
  }
5677
6348
 
5678
6349
  mutex_lock(&iw->store->mutex);
5679
- commit_lock = open_lock(iw->store, COMMIT_LOCK_NAME);
5680
6350
 
5681
- if (!commit_lock->obtain(commit_lock)) {
5682
- RAISE(LOCK_ERROR, "Couldn't obtain commit lock to write segments file");
5683
- }
5684
6351
  /* commit the segments file and the fields file */
5685
- fis_write(iw->fis, iw->store);
5686
- sis_write(iw->sis, iw->store);
5687
- commit_lock->release(commit_lock);
5688
- close_lock(commit_lock);
6352
+ sis_write(iw->sis, iw->store, iw->deleter);
5689
6353
  mutex_unlock(&iw->store->mutex);
5690
6354
 
5691
6355
  iw_optimize_i(iw);