ferret 0.10.14 → 0.11.0

Sign up to get free protection for your applications and to get access to all the features.
data/ext/hash.c CHANGED
@@ -18,13 +18,6 @@ static char *dummy_key = "";
18
18
  static HashTable *free_hts[MAX_FREE_HASH_TABLES];
19
19
  static int num_free_hts = 0;
20
20
 
21
- unsigned long *imalloc(unsigned long value)
22
- {
23
- unsigned long *p = ALLOC(unsigned long);
24
- *p = value;
25
- return p;
26
- }
27
-
28
21
  unsigned long str_hash(const char *const str)
29
22
  {
30
23
  register unsigned long h = 0;
data/ext/hash.h CHANGED
@@ -81,14 +81,6 @@ typedef unsigned long (*hash_ft)(const void *key);
81
81
  */
82
82
  typedef int (*eq_ft)(const void *key1, const void *key2);
83
83
 
84
-
85
- /**
86
- * Create a pointer to an allocated U32 integer. This function is a utility
87
- * function used to add integers to a HashTable, either as the key or the
88
- * value.
89
- */
90
- extern unsigned long *imalloc(unsigned long value);
91
-
92
84
  /**
93
85
  * Determine a hash value for a string. The string must be null terminated
94
86
  *
data/ext/index.c CHANGED
@@ -5,6 +5,7 @@
5
5
  #include "priorityqueue.h"
6
6
  #include <string.h>
7
7
  #include <limits.h>
8
+ #include <ctype.h>
8
9
 
9
10
  #define GET_LOCK(lock, name, store, err_msg) do {\
10
11
  lock = store->open_lock(store, name);\
@@ -18,14 +19,6 @@
18
19
  store->close_lock(lock);\
19
20
  } while (0)
20
21
 
21
- const char *INDEX_EXTENSIONS[] = {
22
- "fdx", "fdt", "tfx", "tix", "tis", "frq", "prx", "del"
23
- };
24
-
25
- const char *COMPOUND_EXTENSIONS[] = {
26
- "frq", "prx", "fdx", "fdt", "tfx", "tix", "tis"
27
- };
28
-
29
22
  const Config default_config = {
30
23
  0x100000, /* chunk size is 1Mb */
31
24
  0x1000000, /* Max memory used for buffer is 16 Mb */
@@ -41,6 +34,128 @@ const Config default_config = {
41
34
  static void ste_reset(TermEnum *te);
42
35
  static char *ste_next(TermEnum *te);
43
36
 
37
+ #define FORMAT 0
38
+ #define SEGMENTS_GEN_FILE_NAME "segments.gen"
39
+ #define MAX_EXT_LEN 10
40
+
41
+ /* *** Must be three characters *** */
42
+ const char *INDEX_EXTENSIONS[] = {
43
+ "frq", "prx", "fdx", "fdt", "tfx", "tix", "tis", "del", "gen", "cfs"
44
+ };
45
+
46
+ /* *** Must be three characters *** */
47
+ const char *COMPOUND_EXTENSIONS[] = {
48
+ "frq", "prx", "fdx", "fdt", "tfx", "tix", "tis"
49
+ };
50
+
51
+
52
+ static const char BASE36_DIGITMAP[] = "0123456789abcdefghijklmnopqrstuvwxyz";
53
+
54
+ static char *u64_to_str36(char *buf, int buf_size, f_u64 u)
55
+ {
56
+ int i = buf_size--;
57
+ buf[i] = '\0';
58
+ for (i--; i >= 0; i--) {
59
+ buf[i] = BASE36_DIGITMAP[u % 36];
60
+ u /= 36;
61
+ if (0 == u) {
62
+ break;
63
+ }
64
+ }
65
+ if (0 < u) {
66
+ RAISE(EXCEPTION, "Max length of segment filename has been reached. "
67
+ "Perhaps it's time to re-index.\n");
68
+ }
69
+ return buf + i;
70
+ }
71
+
72
+ static f_u64 str36_to_u64(char *p)
73
+ {
74
+ f_u64 u = 0;
75
+ while (true) {
76
+ if ('0' <= *p && '9' >= *p) {
77
+ u = u * 36 + *p - '0';
78
+ }
79
+ else if ('a' <= *p && 'z' >= *p) {
80
+ u = u * 36 + *p - 'a' + 10;
81
+ }
82
+ else {
83
+ break;
84
+ }
85
+ p++;
86
+ }
87
+ return u;
88
+ }
89
+
90
+ /*
91
+ * Computes the full file name from base, extension and generation. If the
92
+ * generation is -1, the file name is NULL. If it's 0, the file name is
93
+ * <base><extension>. If it's > 0, the file name is
94
+ * <base>_<generation><extension>.
95
+ *
96
+ * @param buf buffer to write filename to
97
+ * @param base main part of the file name
98
+ * @param ext extension of the filename (including .)
99
+ * @param gen generation
100
+ */
101
+ char *fn_for_generation(char *buf, char *base, char *ext, f_i64 gen)
102
+ {
103
+ if (-1 == gen) {
104
+ return NULL;
105
+ }
106
+ else {
107
+ char b[SEGMENT_NAME_MAX_LENGTH];
108
+ char *u = u64_to_str36(b, SEGMENT_NAME_MAX_LENGTH, (f_u64)gen);
109
+ if (ext == NULL) {
110
+ sprintf(buf, "%s_%s", base, u);
111
+ }
112
+ else {
113
+ sprintf(buf, "%s_%s.%s", base, u, ext);
114
+ }
115
+ return buf;
116
+ }
117
+ }
118
+
119
+ char *segfn_for_generation(char *buf, int generation)
120
+ {
121
+ char b[SEGMENT_NAME_MAX_LENGTH];
122
+ char *u = u64_to_str36(b, SEGMENT_NAME_MAX_LENGTH, (f_u64)generation);
123
+ sprintf(buf, SEGMENTS_FILE_NAME"_%s", u);
124
+ return buf;
125
+ }
126
+
127
+ /*
128
+ * Computes the field specific file name from base, extension, generation and
129
+ * field number. If the generation is -1, the file name is NULL. If it's 0,
130
+ * the file name is <base><extension>. If it's > 0, the file name is
131
+ * <base>_<generation><extension>.
132
+ *
133
+ * @param buf buffer to write filename to
134
+ * @param base main part of the file name
135
+ * @param ext extension of the filename (including .)
136
+ * @param gen generation
137
+ * @param field_num field number
138
+ */
139
+ static char *fn_for_gen_field(char *buf,
140
+ char *base,
141
+ char *ext,
142
+ f_i64 gen,
143
+ int field_num)
144
+ {
145
+ if (-1 == gen) {
146
+ return NULL;
147
+ }
148
+ else {
149
+ char b[SEGMENT_NAME_MAX_LENGTH];
150
+ sprintf(buf, "%s_%s.%s%d",
151
+ base,
152
+ u64_to_str36(b, SEGMENT_NAME_MAX_LENGTH, (f_u64)gen),
153
+ ext,
154
+ field_num);
155
+ return buf;
156
+ }
157
+ }
158
+
44
159
  /***************************************************************************
45
160
  *
46
161
  * CacheObject
@@ -175,7 +290,7 @@ FieldInfo *fi_new(const char *name,
175
290
 
176
291
  void fi_deref(FieldInfo *fi)
177
292
  {
178
- if (--(fi->ref_cnt) == 0) {
293
+ if (0 == --(fi->ref_cnt)) {
179
294
  free(fi->name);
180
295
  free(fi);
181
296
  }
@@ -208,9 +323,6 @@ char *fi_to_s(FieldInfo *fi)
208
323
  *
209
324
  ****************************************************************************/
210
325
 
211
- #define FIELDS_FILENAME "fields"
212
- #define TEMPORARY_FIELDS_FILENAME "fields.new"
213
-
214
326
  FieldInfos *fis_new(int store, int index, int term_vector)
215
327
  {
216
328
  FieldInfos *fis = ALLOC(FieldInfos);
@@ -278,14 +390,13 @@ FieldInfo *fis_by_number(FieldInfos *fis, int num)
278
390
  }
279
391
  }
280
392
 
281
- FieldInfos *fis_read(Store *store)
393
+ FieldInfos *fis_read(InStream *is)
282
394
  {
283
395
  int store_val, index_val, term_vector_val;
284
396
  int i;
285
397
  union { f_u32 i; float f; } tmp;
286
398
  FieldInfo *fi;
287
399
  FieldInfos *fis;
288
- InStream *is = store->open_input(store, FIELDS_FILENAME);
289
400
 
290
401
  store_val = is_read_vint(is);
291
402
  index_val = is_read_vint(is);
@@ -300,17 +411,15 @@ FieldInfos *fis_read(Store *store)
300
411
  fis_add_field(fis, fi);
301
412
  fi->ref_cnt = 1;
302
413
  }
303
- is_close(is);
304
414
 
305
415
  return fis;
306
416
  }
307
417
 
308
- void fis_write(FieldInfos *fis, Store *store)
418
+ void fis_write(FieldInfos *fis, OutStream *os)
309
419
  {
310
420
  int i;
311
421
  union { f_u32 i; float f; } tmp;
312
422
  FieldInfo *fi;
313
- OutStream *os = store->new_output(store, TEMPORARY_FIELDS_FILENAME);
314
423
  const int fis_size = fis->size;
315
424
 
316
425
  os_write_vint(os, fis->store);
@@ -324,9 +433,6 @@ void fis_write(FieldInfos *fis, Store *store)
324
433
  os_write_u32(os, tmp.i);
325
434
  os_write_vint(os, fi->bits);
326
435
  }
327
- os_close(os);
328
-
329
- store->rename(store, TEMPORARY_FIELDS_FILENAME, FIELDS_FILENAME);
330
436
  }
331
437
 
332
438
  static const char *store_str[] = {
@@ -408,7 +514,7 @@ char *fis_to_s(FieldInfos *fis)
408
514
 
409
515
  void fis_deref(FieldInfos *fis)
410
516
  {
411
- if (--(fis->ref_cnt) == 0) {
517
+ if (0 == --(fis->ref_cnt)) {
412
518
  h_destroy(fis->field_dict);
413
519
  free(fis->fields);
414
520
  free(fis);
@@ -440,54 +546,144 @@ SegmentInfo *si_new(char *name, int doc_cnt, Store *store)
440
546
  si->name = name;
441
547
  si->doc_cnt = doc_cnt;
442
548
  si->store = store;
549
+ si->del_gen = -1;
550
+ si->norm_gens = NULL;
551
+ si->norm_gens_size = 0;
552
+ si->ref_cnt = 1;
553
+ si->use_compound_file = false;
443
554
  return si;
444
555
  }
445
556
 
446
- void si_destroy(SegmentInfo *si)
557
+ SegmentInfo *si_read(Store *store, InStream *is)
447
558
  {
448
- free(si->name);
449
- free(si);
559
+ SegmentInfo *si = ALLOC_AND_ZERO(SegmentInfo);
560
+ si->store = store;
561
+ si->name = is_read_string(is);
562
+ si->doc_cnt = is_read_vint(is);
563
+ si->del_gen = is_read_vint(is);
564
+ si->norm_gens_size = is_read_vint(is);
565
+ si->ref_cnt = 1;
566
+ if (0 < si->norm_gens_size) {
567
+ int i;
568
+ si->norm_gens = ALLOC_N(int, si->norm_gens_size);
569
+ for (i = si->norm_gens_size - 1; i >= 0; i--) {
570
+ si->norm_gens[i] = is_read_vint(is);
571
+ }
572
+ }
573
+ si->use_compound_file = (bool)is_read_byte(is);
574
+ return si;
450
575
  }
451
576
 
452
- bool si_has_deletions(SegmentInfo *si)
577
+ void si_write(SegmentInfo *si, OutStream *os)
453
578
  {
454
- char del_file_name[SEGMENT_NAME_MAX_LENGTH];
455
- sprintf(del_file_name, "%s.del", si->name);
456
- return si->store->exists(si->store, del_file_name);
579
+ os_write_string(os, si->name);
580
+ os_write_vint(os, si->doc_cnt);
581
+ os_write_vint(os, si->del_gen);
582
+ os_write_vint(os, si->norm_gens_size);
583
+ if (0 < si->norm_gens_size) {
584
+ int i;
585
+ for (i = si->norm_gens_size - 1; i >= 0; i--) {
586
+ os_write_vint(os, si->norm_gens[i]);
587
+ }
588
+ }
589
+ os_write_byte(os, (uchar)si->use_compound_file);
457
590
  }
458
591
 
459
- bool si_uses_compound_file(SegmentInfo *si)
592
+ void si_deref(SegmentInfo *si)
460
593
  {
461
- char compound_file_name[SEGMENT_NAME_MAX_LENGTH];
462
- sprintf(compound_file_name, "%s.cfs", si->name);
463
- return si->store->exists(si->store, compound_file_name);
594
+ if (--si->ref_cnt <= 0) {
595
+ free(si->name);
596
+ free(si->norm_gens);
597
+ free(si);
598
+ }
464
599
  }
465
600
 
466
- struct NormTester {
467
- bool has_norm_file;
468
- int norm_file_pattern_len;
469
- char norm_file_pattern[SEGMENT_NAME_MAX_LENGTH];
470
- };
601
+ bool si_has_deletions(SegmentInfo *si)
602
+ {
603
+ return si->del_gen >= 0;
604
+ }
471
605
 
472
- static void is_norm_file(char *file_name, struct NormTester *nt)
606
+ char *si_del_file_name(SegmentInfo *si, char *buf)
473
607
  {
474
- if (strncmp(file_name, nt->norm_file_pattern,
475
- nt->norm_file_pattern_len) == 0) {
476
- nt->has_norm_file = true;
608
+ if (si->del_gen < 0) {
609
+ return NULL;
610
+ }
611
+ else {
612
+ return fn_for_generation(buf, si->name, ".del", si->del_gen);
477
613
  }
478
614
  }
479
615
 
480
616
  bool si_has_separate_norms(SegmentInfo *si)
481
617
  {
482
- struct NormTester nt;
483
- sprintf(nt.norm_file_pattern, "%s.s", si->name);
484
- nt.norm_file_pattern_len = strlen(nt.norm_file_pattern);
485
- nt.has_norm_file = false;
486
- si->store->each(si->store, (void (*)(char *file_name, void *arg))&is_norm_file, &nt);
618
+ if (si->use_compound_file && si->norm_gens) {
619
+ int i;
620
+ for (i = si->norm_gens_size - 1; i >= 0; i--) {
621
+ if (si->norm_gens[i] > 0) return true;
622
+ }
623
+ }
624
+ return false;
625
+ }
487
626
 
488
- return nt.has_norm_file;
627
+ void si_advance_norm_gen(SegmentInfo *si, int field_num)
628
+ {
629
+ if (field_num >= si->norm_gens_size) {
630
+ int i;
631
+ REALLOC_N(si->norm_gens, int, field_num + 1);
632
+ for (i = si->norm_gens_size; i <= field_num; i++) {
633
+ si->norm_gens[i] = -1;
634
+ }
635
+ si->norm_gens_size = field_num + 1;
636
+ }
637
+ si->norm_gens[field_num]++;
489
638
  }
490
639
 
640
+ char *si_norm_file_name(SegmentInfo *si, char *buf, int field_num)
641
+ {
642
+ int norm_gen;
643
+ if (field_num >= si->norm_gens_size
644
+ || 0 > (norm_gen = si->norm_gens[field_num])) {
645
+ return NULL;
646
+ }
647
+ else {
648
+ char *ext = (si->use_compound_file && norm_gen > 0) ? "s" : "f";
649
+ return fn_for_gen_field(buf, si->name, ext, norm_gen, field_num);
650
+ }
651
+ }
652
+
653
+ void deleter_queue_file(Deleter *dlr, char *file_name);
654
+ #define DEL(file_name) deleter_queue_file(dlr, file_name)
655
+
656
+ static void si_delete_files(SegmentInfo *si, FieldInfos *fis, Deleter *dlr)
657
+ {
658
+ int i;
659
+ char file_name[SEGMENT_NAME_MAX_LENGTH];
660
+ size_t seg_len = strlen(si->name);
661
+ char *ext;
662
+
663
+ for (i = si->norm_gens_size - 1; i >= 0; i--) {
664
+ if (0 <= si->norm_gens[i]) {
665
+ DEL(si_norm_file_name(si, file_name, fis->fields[i]->number));
666
+ }
667
+ }
668
+
669
+ memcpy(file_name, si->name, seg_len);
670
+ file_name[seg_len] = '.';
671
+ ext = file_name + seg_len + 1;
672
+
673
+ if (si->use_compound_file) {
674
+ memcpy(ext, "cfs", 4);
675
+ DEL(file_name);
676
+ if (0 <= si->del_gen) {
677
+ DEL(fn_for_generation(file_name, si->name, "del", si->del_gen));
678
+ }
679
+ }
680
+ else {
681
+ for (i = NELEMS(INDEX_EXTENSIONS) - 1; i >= 0; i--) {
682
+ memcpy(ext, INDEX_EXTENSIONS[i], 4);
683
+ DEL(file_name);
684
+ }
685
+ }
686
+ }
491
687
 
492
688
  /****************************************************************************
493
689
  *
@@ -496,42 +692,266 @@ bool si_has_separate_norms(SegmentInfo *si)
496
692
  ****************************************************************************/
497
693
 
498
694
  #include <time.h>
499
- #define FORMAT 0
500
- #define SEGMENTS_FILENAME "segments"
501
- #define TEMPORARY_SEGMENTS_FILENAME "segments.new"
502
- #define MAX_EXT_LEN 10
695
+ static char *new_segment(f_i64 generation)
696
+ {
697
+ char buf[SEGMENT_NAME_MAX_LENGTH];
698
+ char *fn_p = u64_to_str36(buf, SEGMENT_NAME_MAX_LENGTH - 1,
699
+ (f_u64)generation);
700
+ *(--fn_p) = '_';
701
+ return estrdup(fn_p);
702
+ }
703
+
704
+ /****************************************************************************
705
+ * FindSegmentsFile
706
+ ****************************************************************************/
503
707
 
504
- static const char base36_digitmap[] = "0123456789abcdefghijklmnopqrstuvwxyz";
708
+ typedef struct FindSegmentsFile {
709
+ f_i64 generation;
710
+ f_u64 u64_return;
711
+ void *p_return;
712
+ } FindSegmentsFile;
505
713
 
506
- static char *new_segment(f_u64 counter)
714
+ static void which_gen_i(char *file_name, void *arg)
715
+ {
716
+ f_i64 *max_generation = (f_i64 *)arg;
717
+ if (0 == strncmp(SEGMENTS_FILE_NAME"_", file_name,
718
+ sizeof(SEGMENTS_FILE_NAME))) {
719
+ char *p = strrchr(file_name, '_') + 1;
720
+ f_i64 generation = (f_i64)str36_to_u64(p);
721
+ if (generation > *max_generation) *max_generation = generation;
722
+ }
723
+ }
724
+
725
+ static void si_put(SegmentInfo *si, FILE *stream)
507
726
  {
508
- char file_name[SEGMENT_NAME_MAX_LENGTH];
509
727
  int i;
728
+ fprintf(stream, "\tSegmentInfo {\n");
729
+ fprintf(stream, "\t\tname = %s\n", si->name);
730
+ fprintf(stream, "\t\tdoc_cnt = %d\n", si->doc_cnt);
731
+ fprintf(stream, "\t\tdel_gen = %d\n", si->del_gen);
732
+ fprintf(stream, "\t\tnorm_gens_size = %d\n", si->norm_gens_size);
733
+ fprintf(stream, "\t\tnorm_gens {\n");
734
+ for (i = 0; i < si->norm_gens_size; i++) {
735
+ fprintf(stream, "\t\t\t%d\n", si->norm_gens[i]);
736
+ }
737
+ fprintf(stream, "\t\t}\n");
738
+ fprintf(stream, "\t\tref_cnt = %d\n", si->ref_cnt);
739
+ fprintf(stream, "\t}\n");
740
+ }
510
741
 
511
- file_name[SEGMENT_NAME_MAX_LENGTH - 1] = '\0';
512
- for (i = SEGMENT_NAME_MAX_LENGTH - 2; i > MAX_EXT_LEN; i--) {
513
- file_name[i] = base36_digitmap[counter%36];
514
- counter /= 36;
515
- if (counter == 0) {
516
- break;
517
- }
742
+ void sis_put(SegmentInfos *sis, FILE *stream)
743
+ {
744
+ int i;
745
+ fprintf(stream, "SegmentInfos {\n");
746
+ fprintf(stream, "\tcounter = %"POSH_I64_PRINTF_PREFIX"d\n", sis->counter);
747
+ fprintf(stream, "\tversion = %"POSH_I64_PRINTF_PREFIX"d\n", sis->version);
748
+ fprintf(stream, "\tgeneration = %"POSH_I64_PRINTF_PREFIX"d\n", sis->generation);
749
+ fprintf(stream, "\tformat = %d\n", sis->format);
750
+ fprintf(stream, "\tsize = %d\n", sis->size);
751
+ fprintf(stream, "\tcapa = %d\n", sis->capa);
752
+ for (i = 0; i < sis->size; i++) {
753
+ si_put(sis->segs[i], stream);
518
754
  }
519
- if (i == MAX_EXT_LEN) {
520
- RAISE(EXCEPTION, "Max length of segment filename has been reached. "
521
- "Time to re-index.\n");
755
+ fprintf(stream, "}\n");
756
+ }
757
+
758
+ /*
759
+ * Get the generation (N) of the current segments_N file from a list of files.
760
+ *
761
+ * @param store - the Store to look in
762
+ */
763
+ f_i64 sis_current_segment_generation(Store *store)
764
+ {
765
+ f_i64 current_generation = -1;
766
+ store->each(store, &which_gen_i, &current_generation);
767
+ return current_generation;
768
+ }
769
+
770
+ /*
771
+ * Get the current generation filename.
772
+ *
773
+ * @param buf - buffer to write filename to
774
+ * @param store - the Store to look in
775
+ * @return segments_N where N is the current generation
776
+ */
777
+ char *sis_curr_seg_file_name(char *buf, Store *store)
778
+ {
779
+ return segfn_for_generation(buf, sis_current_segment_generation(store));
780
+ }
781
+
782
+ /*
783
+ * Get the next generation filename.
784
+ *
785
+ * @param buf - buffer to write filename to
786
+ * @param store - the Store to look in
787
+ * @return segments_N where N is the +next+ generation
788
+ */
789
+ char *sis_next_seg_file_name(char *buf, Store *store)
790
+ {
791
+ return segfn_for_generation(buf, sis_current_segment_generation(store) + 1);
792
+ }
793
+
794
+ #define GEN_FILE_RETRY_COUNT 10
795
+ #define GEN_LOOK_AHEAD_COUNT 10
796
+ void sis_find_segments_file(Store *store, FindSegmentsFile *fsf,
797
+ void (*run)(Store *store, FindSegmentsFile *fsf))
798
+ {
799
+ int i;
800
+ int gen_look_ahead_count = 0;
801
+ bool retry = false;
802
+ int method = 0;
803
+ f_i64 last_gen = -1;
804
+ f_i64 gen = 0;
805
+
806
+ /* Loop until we succeed in calling doBody() without hitting an
807
+ * IOException. An IOException most likely means a commit was in process
808
+ * and has finished, in the time it took us to load the now-old infos
809
+ * files (and segments files). It's also possible it's a true error
810
+ * (corrupt index). To distinguish these, on each retry we must see
811
+ * "forward progress" on which generation we are trying to load. If we
812
+ * don't, then the original error is real and we throw it.
813
+ *
814
+ * We have three methods for determining the current generation. We try
815
+ * each in sequence. */
816
+ while (true) {
817
+ /* Method 1: list the directory and use the highest segments_N file.
818
+ * This method works well as long as there is no stale caching on the
819
+ * directory contents: */
820
+ if (0 == method) {
821
+ gen = sis_current_segment_generation(store);
822
+ if (gen == -1) {
823
+ /*fprintf(stderr, ">>\n%s\n>>\n", store_to_s(store));*/
824
+ RAISE(FILE_NOT_FOUND_ERROR, "couldn't find segments file");
825
+ }
826
+ }
827
+
828
+ /* Method 2 (fallback if Method 1 isn't reliable): if the directory
829
+ * listing seems to be stale, try loading the "segments.gen" file. */
830
+ if (1 == method || (0 == method && last_gen == gen && retry)) {
831
+ method = 1;
832
+ for (i = 0; i < GEN_FILE_RETRY_COUNT; i++) {
833
+ InStream *gen_is = NULL;
834
+ TRY
835
+ gen_is = store->open_input(store, SEGMENTS_GEN_FILE_NAME);
836
+ XCATCHALL
837
+ HANDLED();
838
+ /* TODO:LOG "segments.gen open: IO_ERROR"*/
839
+ XENDTRY
840
+
841
+ if (NULL != gen_is) {
842
+ f_i64 gen0 = -1, gen1 = -1;
843
+
844
+ TRY
845
+ gen0 = is_read_u64(gen_is);
846
+ gen1 = is_read_u64(gen_is);
847
+ XFINALLY
848
+ /* if there is an error well simply try again */
849
+ HANDLED();
850
+ is_close(gen_is);
851
+ XENDTRY
852
+ /* TODO:LOG "fallback check: " + gen0 + "; " + gen1 */
853
+ if (gen0 == gen1) {
854
+ /* The file is consistent. */
855
+ if (gen0 > gen) {
856
+ /* TODO:LOG "fallback to '" +
857
+ * IndexFileNames.SEGMENTS_GEN + "' check: now
858
+ * try generation " + gen0 + " > " + gen */
859
+ gen = gen0;
860
+ }
861
+ goto method_two_loop_end;
862
+ }
863
+ break;
864
+ }
865
+ /* sleep for 50 milliseconds */
866
+ micro_sleep(50000);
867
+ }
868
+ }
869
+ method_two_loop_end:
870
+
871
+ /* Method 3 (fallback if Methods 2 & 3 are not reliable): since both
872
+ * directory cache and file contents cache seem to be stale, just
873
+ * advance the generation. */
874
+ if (2 == method || (1 == method && last_gen == gen && retry)) {
875
+ method = 2;
876
+ if (gen_look_ahead_count < GEN_LOOK_AHEAD_COUNT) {
877
+ gen++;
878
+ gen_look_ahead_count++;
879
+ /* TODO:LOG "look ahead increment gen to " + gen */
880
+ }
881
+ }
882
+
883
+ if (last_gen == gen) {
884
+ /* This means we're about to try the same segments_N last tried.
885
+ * This is allowed, exactly once, because writer could have been
886
+ * in the process of writing segments_N last time. */
887
+ if (retry) {
888
+ /* OK, we've tried the same segments_N file twice in a row, so
889
+ * this must be a real error. We throw the original exception
890
+ * we got. */
891
+ RAISE(IO_ERROR, "Error reading the segment infos");
892
+ }
893
+ else {
894
+ retry = true;
895
+ }
896
+ }
897
+ else {
898
+ /* Segment file has advanced since our last loop, so reset retry: */
899
+ retry = false;
900
+ }
901
+ last_gen = gen;
902
+
903
+ TRY
904
+ fsf->generation = gen;
905
+ run(store, fsf);
906
+ RETURN_EARLY();
907
+ return;
908
+ case IO_ERROR: case FILE_NOT_FOUND_ERROR:
909
+ HANDLED();
910
+ /* Save the original root cause: */
911
+ /* TODO:LOG "primary Exception on '" + segmentFileName + "': " +
912
+ * err + "'; will retry: retry=" + retry + "; gen = " + gen */
913
+
914
+ if (!retry && gen > 1) {
915
+ /* This is our first time trying this segments file (because
916
+ * retry is false), and, there is possibly a segments_(N-1)
917
+ * (because gen > 1). So, check if the segments_(N-1) exists
918
+ * and try it if so: */
919
+ char prev_seg_file_name[SEGMENT_NAME_MAX_LENGTH];
920
+ segfn_for_generation(prev_seg_file_name, gen - 1);
921
+ if (store->exists(store, prev_seg_file_name)) {
922
+ /* TODO:LOG "fallback to prior segment file '" +
923
+ * prevSegmentFileName + "'" */
924
+ TRY
925
+ fsf->generation = gen - 1;
926
+ run(store, fsf);
927
+ /* TODO:LOG "success on fallback " +
928
+ * prev_seg_file_name */
929
+
930
+ /* pop two contexts as we are in nested try blocks */
931
+ RETURN_EARLY();
932
+ RETURN_EARLY();
933
+ return;
934
+ case IO_ERROR: case FILE_NOT_FOUND_ERROR:
935
+ HANDLED();
936
+ /* TODO:LOG "secondary Exception on '" +
937
+ * prev_seg_file_name + "': " + err2 + "'; will retry"*/
938
+ XENDTRY
939
+ }
940
+ }
941
+ XENDTRY
522
942
  }
523
- i--;
524
- file_name[i] = '_';
525
- return estrdup(&file_name[i]);
526
943
  }
527
944
 
528
- SegmentInfos *sis_new()
945
+ SegmentInfos *sis_new(FieldInfos *fis)
529
946
  {
530
- SegmentInfos *sis = ALLOC(SegmentInfos);
947
+ SegmentInfos *sis = ALLOC_AND_ZERO(SegmentInfos);
948
+ REF(fis);
949
+ sis->fis = fis;
531
950
  sis->format = FORMAT;
532
951
  sis->version = (f_u64)time(NULL);
533
952
  sis->size = 0;
534
953
  sis->counter = 0;
954
+ sis->generation = -1;
535
955
  sis->capa = 4;
536
956
  sis->segs = ALLOC_N(SegmentInfo *, sis->capa);
537
957
  return sis;
@@ -539,8 +959,7 @@ SegmentInfos *sis_new()
539
959
 
540
960
  SegmentInfo *sis_new_segment(SegmentInfos *sis, int doc_cnt, Store *store)
541
961
  {
542
- return sis_add_si(sis, si_new(new_segment(sis->counter++), doc_cnt,
543
- store));
962
+ return sis_add_si(sis, si_new(new_segment(sis->counter++), doc_cnt, store));
544
963
  }
545
964
 
546
965
  void sis_destroy(SegmentInfos *sis)
@@ -548,8 +967,9 @@ void sis_destroy(SegmentInfos *sis)
548
967
  int i;
549
968
  const int sis_size = sis->size;
550
969
  for (i = 0; i < sis_size; i++) {
551
- si_destroy(sis->segs[i]);
970
+ si_deref(sis->segs[i]);
552
971
  }
972
+ if (sis->fis) fis_deref(sis->fis);
553
973
  free(sis->segs);
554
974
  free(sis);
555
975
  }
@@ -557,11 +977,10 @@ void sis_destroy(SegmentInfos *sis)
557
977
  SegmentInfo *sis_add_si(SegmentInfos *sis, SegmentInfo *si)
558
978
  {
559
979
  if (sis->size >= sis->capa) {
560
- sis->capa = sis->size * 2;
980
+ sis->capa <<= 1;
561
981
  REALLOC_N(sis->segs, SegmentInfo *, sis->capa);
562
982
  }
563
- sis->segs[sis->size] = si;
564
- sis->size++;
983
+ sis->segs[sis->size++] = si;
565
984
  return si;
566
985
  }
567
986
 
@@ -569,7 +988,7 @@ void sis_del_at(SegmentInfos *sis, int at)
569
988
  {
570
989
  int i;
571
990
  const int sis_size = --(sis->size);
572
- si_destroy(sis->segs[at]);
991
+ si_deref(sis->segs[at]);
573
992
  for (i = at; i < sis_size; i++) {
574
993
  sis->segs[i] = sis->segs[i+1];
575
994
  }
@@ -580,7 +999,7 @@ void sis_del_from_to(SegmentInfos *sis, int from, int to)
580
999
  int i, num_to_del = to - from;
581
1000
  const int sis_size = sis->size -= num_to_del;
582
1001
  for (i = from; i < to; i++) {
583
- si_destroy(sis->segs[i]);
1002
+ si_deref(sis->segs[i]);
584
1003
  }
585
1004
  for (i = from; i < sis_size; i++) {
586
1005
  sis->segs[i] = sis->segs[i+num_to_del];
@@ -592,74 +1011,106 @@ void sis_clear(SegmentInfos *sis)
592
1011
  int i;
593
1012
  const int sis_size = sis->size;
594
1013
  for (i = 0; i < sis_size; i++) {
595
- si_destroy(sis->segs[i]);
1014
+ si_deref(sis->segs[i]);
596
1015
  }
597
1016
  sis->size = 0;
598
1017
  }
599
1018
 
600
- SegmentInfos *sis_read(Store *store)
1019
+ void sis_read_i(Store *store, FindSegmentsFile *fsf)
601
1020
  {
602
- int doc_cnt;
603
1021
  int seg_cnt;
604
1022
  int i;
605
- char *name;
606
- InStream *is = store->open_input(store, SEGMENTS_FILENAME);
607
- SegmentInfos *sis = ALLOC(SegmentInfos);
608
- sis->store = store;
1023
+ bool success = false;
1024
+ char seg_file_name[SEGMENT_NAME_MAX_LENGTH];
1025
+ InStream *is = NULL;
1026
+ SegmentInfos *sis = ALLOC_AND_ZERO(SegmentInfos);
1027
+ segfn_for_generation(seg_file_name, fsf->generation);
1028
+ TRY
1029
+ is = store->open_input(store, seg_file_name);
1030
+ sis->store = store;
609
1031
 
610
- sis->format = is_read_u32(is); /* do nothing. it's the first version */
611
- sis->version = is_read_u64(is);
612
- sis->counter = is_read_u64(is);
613
- seg_cnt = is_read_vint(is);
1032
+ sis->generation = fsf->generation;
1033
+ sis->format = is_read_u32(is); /* do nothing. it's the first version */
1034
+ sis->version = is_read_u64(is);
1035
+ sis->counter = is_read_u64(is);
1036
+ seg_cnt = is_read_vint(is);
614
1037
 
615
- /* allocate space for segments */
616
- for (sis->capa = 4; sis->capa < seg_cnt; sis->capa <<= 1) {
617
- }
618
- sis->size = 0;
619
- sis->segs = ALLOC_N(SegmentInfo *, sis->capa);
1038
+ /* allocate space for segments */
1039
+ for (sis->capa = 4; sis->capa < seg_cnt; sis->capa <<= 1) {
1040
+ }
1041
+ sis->size = 0;
1042
+ sis->segs = ALLOC_N(SegmentInfo *, sis->capa);
620
1043
 
621
- for (i = 0; i < seg_cnt; i++) {
622
- name = is_read_string(is);
623
- doc_cnt = is_read_vint(is);
624
- sis_add_si(sis, si_new(name, doc_cnt, store));
625
- }
626
- is_close(is);
1044
+ for (i = 0; i < seg_cnt; i++) {
1045
+ sis_add_si(sis, si_read(store, is));
1046
+ }
1047
+ sis->fis = fis_read(is);
1048
+ success = true;
1049
+ XFINALLY
1050
+ if (is) is_close(is);
1051
+ if (!success) {
1052
+ sis_destroy(sis);
1053
+ }
1054
+ XENDTRY
1055
+ fsf->p_return = sis;
1056
+ }
627
1057
 
628
- return sis;
1058
+ SegmentInfos *sis_read(Store *store)
1059
+ {
1060
+ FindSegmentsFile fsf;
1061
+ sis_find_segments_file(store, &fsf, &sis_read_i);
1062
+ return fsf.p_return;
629
1063
  }
630
1064
 
631
- void sis_write(SegmentInfos *sis, Store *store)
1065
+ void sis_write(SegmentInfos *sis, Store *store, Deleter *deleter)
632
1066
  {
633
1067
  int i;
634
- SegmentInfo *si;
635
- OutStream *os = store->new_output(store, TEMPORARY_SEGMENTS_FILENAME);
1068
+ OutStream *os = NULL;
636
1069
  const int sis_size = sis->size;
1070
+ char buf[SEGMENT_NAME_MAX_LENGTH];
1071
+ sis->generation++;
637
1072
 
638
- os_write_u32(os, FORMAT);
639
- os_write_u64(os, ++(sis->version)); /* every write changes the index */
640
- os_write_u64(os, sis->counter);
641
- os_write_vint(os, sis->size);
642
- for (i = 0; i < sis_size; i++) {
643
- si = sis->segs[i];
644
- os_write_string(os, si->name);
645
- os_write_vint(os, si->doc_cnt);
646
- }
647
- os_close(os);
1073
+ TRY
1074
+ os = store->new_output(store,
1075
+ segfn_for_generation(buf, sis->generation));
1076
+ os_write_u32(os, FORMAT);
1077
+ os_write_u64(os, ++(sis->version)); /* every write changes the index */
1078
+ os_write_u64(os, sis->counter);
1079
+ os_write_vint(os, sis->size);
1080
+ for (i = 0; i < sis_size; i++) {
1081
+ si_write(sis->segs[i], os);
1082
+ }
1083
+ fis_write(sis->fis, os);
1084
+ XFINALLY
1085
+ os_close(os);
1086
+ XENDTRY
1087
+
1088
+ TRY
1089
+ os = store->new_output(store, SEGMENTS_GEN_FILE_NAME);
1090
+ os_write_u64(os, sis->generation);
1091
+ os_write_u64(os, sis->generation);
1092
+ XFINALLY
1093
+ /* It's OK if we fail to write this file since it's
1094
+ * used only as one of the retry fallbacks. */
1095
+ HANDLED();
1096
+ os_close(os);
1097
+ XENDTRY
648
1098
 
649
- /* install new segment info */
650
- store->rename(store, TEMPORARY_SEGMENTS_FILENAME, SEGMENTS_FILENAME);
1099
+ if (deleter && sis->generation > 0) {
1100
+ deleter_delete_file(deleter,
1101
+ segfn_for_generation(buf, sis->generation - 1));
1102
+ }
651
1103
  }
652
1104
 
653
- f_u64 sis_read_current_version(Store *store)
1105
+ void sis_read_ver_i(Store *store, FindSegmentsFile *fsf)
654
1106
  {
655
1107
  InStream *is;
656
1108
  f_u32 format = 0;
657
1109
  f_u64 version = 0;
1110
+ char seg_file_name[SEGMENT_NAME_MAX_LENGTH];
658
1111
 
659
- if (!store->exists(store, SEGMENTS_FILENAME)) {
660
- return 0;
661
- }
662
- is = store->open_input(store, SEGMENTS_FILENAME);
1112
+ segfn_for_generation(seg_file_name, (f_u64)fsf->generation);
1113
+ is = store->open_input(store, seg_file_name);
663
1114
 
664
1115
  TRY
665
1116
  format = is_read_u32(is);
@@ -668,7 +1119,14 @@ f_u64 sis_read_current_version(Store *store)
668
1119
  is_close(is);
669
1120
  XENDTRY
670
1121
 
671
- return version;
1122
+ fsf->u64_return = version;
1123
+ }
1124
+
1125
+ f_u64 sis_read_current_version(Store *store)
1126
+ {
1127
+ FindSegmentsFile fsf;
1128
+ sis_find_segments_file(store, &fsf, &sis_read_ver_i);
1129
+ return fsf.u64_return;
672
1130
  }
673
1131
 
674
1132
  /****************************************************************************
@@ -704,7 +1162,7 @@ char *lazy_df_get_data(LazyDocField *self, int i)
704
1162
  char *text = NULL;
705
1163
  if (i < self->size && i >= 0) {
706
1164
  text = self->data[i].text;
707
- if (text == NULL) {
1165
+ if (NULL == text) {
708
1166
  const int read_len = self->data[i].length + 1;
709
1167
  self->data[i].text = text = ALLOC_N(char, read_len);
710
1168
  is_seek(self->doc->fields_in, self->data[i].start);
@@ -1220,8 +1678,8 @@ char *te_skip_to(TermEnum *te, const char *term)
1220
1678
  {
1221
1679
  char *curr_term = te->curr_term;
1222
1680
  if (strcmp(curr_term, term) < 0) {
1223
- while (((curr_term = te->next(te)) != NULL) &&
1224
- (strcmp(curr_term, term) < 0)) {
1681
+ while (NULL != ((curr_term = te->next(te)))
1682
+ && (strcmp(curr_term, term) < 0)) {
1225
1683
  }
1226
1684
  }
1227
1685
  return curr_term;
@@ -1258,7 +1716,7 @@ static void sti_destroy(SegmentTermIndex *sti)
1258
1716
  static void sti_ensure_index_is_read(SegmentTermIndex *sti,
1259
1717
  TermEnum *index_te)
1260
1718
  {
1261
- if (sti->index_terms == NULL) {
1719
+ if (NULL == sti->index_terms) {
1262
1720
  int i;
1263
1721
  int index_size = sti->index_size;
1264
1722
  off_t index_ptr = 0;
@@ -1314,7 +1772,7 @@ static int sti_get_index_offset(SegmentTermIndex *sti, const char *term)
1314
1772
  ****************************************************************************/
1315
1773
 
1316
1774
  #define SFI_ENSURE_INDEX_IS_READ(sfi, sti) do {\
1317
- if (sti->index_terms == NULL) {\
1775
+ if (NULL == sti->index_terms) {\
1318
1776
  mutex_lock(&sfi->mutex);\
1319
1777
  sti_ensure_index_is_read(sti, sfi->index_te);\
1320
1778
  mutex_unlock(&sfi->mutex);\
@@ -1351,7 +1809,7 @@ SegmentFieldIndex *sfi_open(Store *store, const char *segment)
1351
1809
 
1352
1810
  sprintf(file_name, "%s.tix", segment);
1353
1811
  is = store->open_input(store, file_name);
1354
- sfi->index_te = ste_new(is, NULL);
1812
+ sfi->index_te = ste_new(is, sfi);
1355
1813
  return sfi;
1356
1814
  }
1357
1815
 
@@ -1394,8 +1852,8 @@ static char *ste_next(TermEnum *te)
1394
1852
 
1395
1853
  ti = &(te->curr_ti);
1396
1854
  ti->doc_freq = is_read_vint(is); /* read doc freq */
1397
- ti->frq_ptr += is_read_voff_t(is);/* read freq ptr */
1398
- ti->prx_ptr += is_read_voff_t(is);/* read prox ptr */
1855
+ ti->frq_ptr += is_read_voff_t(is); /* read freq ptr */
1856
+ ti->prx_ptr += is_read_voff_t(is); /* read prox ptr */
1399
1857
  if (ti->doc_freq >= STE(te)->skip_interval) {
1400
1858
  ti->skip_offset = is_read_voff_t(is);
1401
1859
  }
@@ -1497,7 +1955,7 @@ static TermInfo *ste_scan_for_term_info(SegmentTermEnum *ste, const char *term)
1497
1955
  {
1498
1956
  ste_scan_to(ste, term);
1499
1957
 
1500
- if (strcmp(TE(ste)->curr_term, term) == 0) {
1958
+ if (0 == strcmp(TE(ste)->curr_term, term)) {
1501
1959
  return te_get_ti((TermEnum *)ste);
1502
1960
  }
1503
1961
  else {
@@ -1521,7 +1979,7 @@ static char *ste_get_term(TermEnum *te, int pos)
1521
1979
  ste_index_seek(te, sti, pos / idx_int);
1522
1980
  }
1523
1981
  while (ste->pos < pos) {
1524
- if (ste_next(te) == NULL) {
1982
+ if (NULL == ste_next(te)) {
1525
1983
  return NULL;
1526
1984
  }
1527
1985
  }
@@ -1575,7 +2033,7 @@ typedef struct MultiTermEnum
1575
2033
  static bool tew_lt(const TermEnumWrapper *tew1, const TermEnumWrapper *tew2)
1576
2034
  {
1577
2035
  int cmpres = strcmp(tew1->term, tew2->term);
1578
- if (cmpres == 0) {
2036
+ if (0 == cmpres) {
1579
2037
  return tew1->index < tew2->index;
1580
2038
  }
1581
2039
  else {
@@ -1637,7 +2095,7 @@ static char *mte_next(TermEnum *te)
1637
2095
  TermEnumWrapper *top =
1638
2096
  (TermEnumWrapper *)pq_top(MTE(te)->tew_queue);
1639
2097
 
1640
- if (top == NULL) {
2098
+ if (NULL == top) {
1641
2099
  te->curr_term[0] = '\0';
1642
2100
  te->curr_term_len = 0;
1643
2101
  return false;
@@ -1650,7 +2108,7 @@ static char *mte_next(TermEnum *te)
1650
2108
  te->curr_ti.doc_freq = 0;
1651
2109
 
1652
2110
  MTE(te)->ti_cnt = 0;
1653
- while ((top != NULL) && (strcmp(te->curr_term, top->term) == 0)) {
2111
+ while ((NULL != top) && (0 == strcmp(te->curr_term, top->term))) {
1654
2112
  pq_pop(MTE(te)->tew_queue);
1655
2113
  te->curr_ti.doc_freq += top->te->curr_ti.doc_freq;/* increment freq */
1656
2114
  MTE(te)->ti_indexes[MTE(te)->ti_cnt] = top->index;
@@ -1752,7 +2210,7 @@ TermEnum *mte_new(MultiReader *mr, int field_num, const char *term)
1752
2210
  if (fnum >= 0) {
1753
2211
  TermEnumWrapper *tew;
1754
2212
 
1755
- if (term != NULL) {
2213
+ if (NULL != term) {
1756
2214
  sub_te = reader->terms_from(reader, fnum, term);
1757
2215
  }
1758
2216
  else {
@@ -1760,7 +2218,7 @@ TermEnum *mte_new(MultiReader *mr, int field_num, const char *term)
1760
2218
  }
1761
2219
 
1762
2220
  tew = tew_setup(&(mte->tews[i]), i, sub_te, reader);
1763
- if (((term == NULL) && tew_next(tew))
2221
+ if (((NULL == term) && tew_next(tew))
1764
2222
  || (tew->term && (tew->term[0] != '\0'))) {
1765
2223
  pq_push(mte->tew_queue, tew); /* initialize queue */
1766
2224
  }
@@ -1772,7 +2230,7 @@ TermEnum *mte_new(MultiReader *mr, int field_num, const char *term)
1772
2230
  }
1773
2231
  }
1774
2232
 
1775
- if ((term != NULL) && (mte->tew_queue->size > 0)) {
2233
+ if ((NULL != term) && (0 < mte->tew_queue->size)) {
1776
2234
  mte_next(TE(mte));
1777
2235
  }
1778
2236
 
@@ -1804,7 +2262,7 @@ TermInfosReader *tir_open(Store *store,
1804
2262
  static __inline TermEnum *tir_enum(TermInfosReader *tir)
1805
2263
  {
1806
2264
  TermEnum *te;
1807
- if ((te = thread_getspecific(tir->thread_te)) == NULL) {
2265
+ if (NULL == (te = thread_getspecific(tir->thread_te))) {
1808
2266
  te = ste_clone(tir->orig_te);
1809
2267
  ste_set_field(te, tir->field_num);
1810
2268
  ary_push(tir->te_bucket, te);
@@ -1827,8 +2285,8 @@ TermInfo *tir_get_ti(TermInfosReader *tir, const char *term)
1827
2285
  TermEnum *te = tir_enum(tir);
1828
2286
  char *match;
1829
2287
 
1830
- if ((match = ste_scan_to(te, term)) != NULL &&
1831
- strcmp(match, term) == 0) {
2288
+ if (NULL != (match = ste_scan_to(te, term))
2289
+ && 0 == strcmp(match, term)) {
1832
2290
  return &(te->curr_ti);
1833
2291
  }
1834
2292
  return NULL;
@@ -1845,8 +2303,8 @@ TermInfo *tir_get_ti_field(TermInfosReader *tir, int field_num,
1845
2303
  tir->field_num = field_num;
1846
2304
  }
1847
2305
 
1848
- if ((match = ste_scan_to(te, term)) != NULL &&
1849
- strcmp(match, term) == 0) {
2306
+ if (NULL != (match = ste_scan_to(te, term))
2307
+ && 0 == strcmp(match, term)) {
1850
2308
  return &(te->curr_ti);
1851
2309
  }
1852
2310
  return NULL;
@@ -1937,7 +2395,7 @@ static __inline void tw_write_term(TermWriter *tw,
1937
2395
 
1938
2396
  os_write_vint(os, start); /* write shared prefix length */
1939
2397
  os_write_vint(os, length); /* write delta length */
1940
- os_write_bytes(os, (uchar *)(term + start), length); /* write delta chars */
2398
+ os_write_bytes(os, (uchar *)(term + start), length); /* write delta chars */
1941
2399
 
1942
2400
  tw->last_term = term;
1943
2401
  }
@@ -1945,13 +2403,15 @@ static __inline void tw_write_term(TermWriter *tw,
1945
2403
  static void tw_add(TermWriter *tw,
1946
2404
  const char *term,
1947
2405
  int term_len,
1948
- TermInfo *ti)
2406
+ TermInfo *ti,
2407
+ int skip_interval)
1949
2408
  {
1950
2409
  OutStream *os = tw->os;
1951
2410
 
1952
2411
  #ifdef DEBUG
1953
2412
  if (strcmp(tw->last_term, term) > 0) {
1954
- RAISE(STATE_ERROR, "\"%s\" > \"%s\" %d > %d", tw->last_term, term, *tw->last_term, *term);
2413
+ RAISE(STATE_ERROR, "\"%s\" > \"%s\" %d > %d",
2414
+ tw->last_term, term, *tw->last_term, *term);
1955
2415
  }
1956
2416
  if (ti->frq_ptr < tw->last_term_info.frq_ptr) {
1957
2417
  RAISE(STATE_ERROR, "%"F_OFF_T_PFX"d > %"F_OFF_T_PFX"d", ti->frq_ptr,
@@ -1967,6 +2427,9 @@ static void tw_add(TermWriter *tw,
1967
2427
  os_write_vint(os, ti->doc_freq); /* write doc freq */
1968
2428
  os_write_voff_t(os, ti->frq_ptr - tw->last_term_info.frq_ptr);
1969
2429
  os_write_voff_t(os, ti->prx_ptr - tw->last_term_info.prx_ptr);
2430
+ if (ti->doc_freq >= skip_interval) {
2431
+ os_write_voff_t(os, ti->skip_offset);
2432
+ }
1970
2433
 
1971
2434
  tw->last_term_info = *ti;
1972
2435
  tw->counter++;
@@ -1983,22 +2446,19 @@ void tiw_add(TermInfosWriter *tiw,
1983
2446
  printf("%s:%d:%d:%d:%d\n", term, term_len, ti->doc_freq,
1984
2447
  ti->frq_ptr, ti->prx_ptr);
1985
2448
  */
1986
- if ((tiw->tis_writer->counter % tiw->index_interval) == 0) {
2449
+ if (0 == (tiw->tis_writer->counter % tiw->index_interval)) {
1987
2450
  /* add an index term */
1988
2451
  tw_add(tiw->tix_writer,
1989
2452
  tiw->tis_writer->last_term,
1990
2453
  strlen(tiw->tis_writer->last_term),
1991
- &(tiw->tis_writer->last_term_info));
2454
+ &(tiw->tis_writer->last_term_info),
2455
+ tiw->skip_interval);
1992
2456
  tis_pos = os_pos(tiw->tis_writer->os);
1993
2457
  os_write_voff_t(tiw->tix_writer->os, tis_pos - tiw->last_index_ptr);
1994
2458
  tiw->last_index_ptr = tis_pos; /* write ptr */
1995
2459
  }
1996
2460
 
1997
- tw_add(tiw->tis_writer, term, term_len, ti);
1998
-
1999
- if (ti->doc_freq >= tiw->skip_interval) {
2000
- os_write_voff_t(tiw->tis_writer->os, ti->skip_offset);
2001
- }
2461
+ tw_add(tiw->tis_writer, term, term_len, ti, tiw->skip_interval);
2002
2462
  }
2003
2463
 
2004
2464
  static __inline void tw_reset(TermWriter *tw)
@@ -2051,7 +2511,7 @@ void tiw_close(TermInfosWriter *tiw)
2051
2511
  #define TDE(stde) ((TermDocEnum *)(stde))
2052
2512
 
2053
2513
  #define CHECK_STATE(method) do {\
2054
- if (STDE(tde)->count == 0) {\
2514
+ if (0 == STDE(tde)->count) {\
2055
2515
  RAISE(STATE_ERROR, "Illegal state of TermDocEnum. You must call #next "\
2056
2516
  "before you call #"method);\
2057
2517
  }\
@@ -2059,7 +2519,7 @@ void tiw_close(TermInfosWriter *tiw)
2059
2519
 
2060
2520
  static void stde_seek_ti(SegmentTermDocEnum *stde, TermInfo *ti)
2061
2521
  {
2062
- if (ti == NULL) {
2522
+ if (NULL == ti) {
2063
2523
  stde->doc_freq = 0;
2064
2524
  }
2065
2525
  else {
@@ -2117,7 +2577,7 @@ static bool stde_next(TermDocEnum *tde)
2117
2577
 
2118
2578
  doc_code = is_read_vint(stde->frq_in);
2119
2579
  stde->doc_num += doc_code >> 1; /* shift off low bit */
2120
- if ((doc_code & 1) != 0) { /* if low bit is set */
2580
+ if (0 != (doc_code & 1)) { /* if low bit is set */
2121
2581
  stde->freq = 1; /* freq is one */
2122
2582
  }
2123
2583
  else {
@@ -2126,8 +2586,8 @@ static bool stde_next(TermDocEnum *tde)
2126
2586
 
2127
2587
  stde->count++;
2128
2588
 
2129
- if (stde->deleted_docs == NULL ||
2130
- bv_get(stde->deleted_docs, stde->doc_num) == 0) {
2589
+ if (NULL == stde->deleted_docs
2590
+ || 0 == bv_get(stde->deleted_docs, stde->doc_num)) {
2131
2591
  break; /* We found an undeleted doc so return */
2132
2592
  }
2133
2593
 
@@ -2146,7 +2606,7 @@ static int stde_read(TermDocEnum *tde, int *docs, int *freqs, int req_num)
2146
2606
  /* manually inlined call to next() for speed */
2147
2607
  doc_code = is_read_vint(stde->frq_in);
2148
2608
  stde->doc_num += (doc_code >> 1); /* shift off low bit */
2149
- if ((doc_code & 1) != 0) { /* if low bit is set */
2609
+ if (0 != (doc_code & 1)) { /* if low bit is set */
2150
2610
  stde->freq = 1; /* freq is one */
2151
2611
  }
2152
2612
  else {
@@ -2155,8 +2615,8 @@ static int stde_read(TermDocEnum *tde, int *docs, int *freqs, int req_num)
2155
2615
 
2156
2616
  stde->count++;
2157
2617
 
2158
- if (stde->deleted_docs == NULL ||
2159
- bv_get(stde->deleted_docs, stde->doc_num) == 0) {
2618
+ if (NULL == stde->deleted_docs
2619
+ || 0 == bv_get(stde->deleted_docs, stde->doc_num)) {
2160
2620
  docs[i] = stde->doc_num;
2161
2621
  freqs[i] = stde->freq;
2162
2622
  i++;
@@ -2169,16 +2629,18 @@ static bool stde_skip_to(TermDocEnum *tde, int target_doc_num)
2169
2629
  {
2170
2630
  SegmentTermDocEnum *stde = STDE(tde);
2171
2631
 
2172
- if (stde->doc_freq >= stde->skip_interval) { /* optimized case */
2632
+ if (stde->doc_freq >= stde->skip_interval
2633
+ && target_doc_num > stde->doc_num) { /* optimized case */
2173
2634
  int last_skip_doc;
2174
- int last_frq_ptr;
2175
- int last_prx_ptr;
2635
+ off_t last_frq_ptr;
2636
+ off_t last_prx_ptr;
2176
2637
  int num_skipped;
2177
2638
 
2178
- if (stde->skip_in == NULL) {
2179
- stde->skip_in = is_clone(stde->frq_in); /* lazily clone */
2639
+ if (NULL == stde->skip_in) {
2640
+ stde->skip_in = is_clone(stde->frq_in);/* lazily clone */
2180
2641
  }
2181
2642
 
2643
+ //printf("skip_ptr = %lld\n", stde->skip_ptr);
2182
2644
  if (!stde->have_skipped) { /* lazily seek skip stream */
2183
2645
  is_seek(stde->skip_in, stde->skip_ptr);
2184
2646
  stde->have_skipped = true;
@@ -2189,13 +2651,14 @@ static bool stde_skip_to(TermDocEnum *tde, int target_doc_num)
2189
2651
  last_frq_ptr = is_pos(stde->frq_in);
2190
2652
  last_prx_ptr = -1;
2191
2653
  num_skipped = -1 - (stde->count % stde->skip_interval);
2654
+ //printf("%d, %d, %d, %d\n", last_skip_doc, last_frq_ptr, last_prx_ptr, num_skipped);
2192
2655
 
2193
2656
  while (target_doc_num > stde->skip_doc) {
2194
2657
  last_skip_doc = stde->skip_doc;
2195
2658
  last_frq_ptr = stde->frq_ptr;
2196
2659
  last_prx_ptr = stde->prx_ptr;
2197
2660
 
2198
- if (stde->skip_doc != 0 && stde->skip_doc >= stde->doc_num) {
2661
+ if (0 != stde->skip_doc && stde->skip_doc >= stde->doc_num) {
2199
2662
  num_skipped += stde->skip_interval;
2200
2663
  }
2201
2664
 
@@ -2204,13 +2667,14 @@ static bool stde_skip_to(TermDocEnum *tde, int target_doc_num)
2204
2667
  }
2205
2668
 
2206
2669
  stde->skip_doc += is_read_vint(stde->skip_in);
2207
- stde->frq_ptr += is_read_vint(stde->skip_in);
2208
- stde->prx_ptr += is_read_vint(stde->skip_in);
2670
+ stde->frq_ptr += is_read_vint(stde->skip_in);
2671
+ stde->prx_ptr += is_read_vint(stde->skip_in);
2672
+ //printf("inner-> skip_doc:%d, frq_ptr:%d, prx_ptr:%d\n", stde->skip_doc, stde->frq_ptr, stde->prx_ptr);
2209
2673
 
2210
2674
  stde->skip_count++;
2211
2675
  }
2212
2676
 
2213
- /* if we found something to skip, so skip it */
2677
+ /* if we found something to skip, skip it */
2214
2678
  if (last_frq_ptr > is_pos(stde->frq_in)) {
2215
2679
  is_seek(stde->frq_in, last_frq_ptr);
2216
2680
  stde->seek_prox(stde, last_prx_ptr);
@@ -2233,7 +2697,7 @@ static void stde_close(TermDocEnum *tde)
2233
2697
  {
2234
2698
  is_close(STDE(tde)->frq_in);
2235
2699
 
2236
- if (STDE(tde)->skip_in != NULL) {
2700
+ if (NULL != STDE(tde)->skip_in) {
2237
2701
  is_close(STDE(tde)->skip_in);
2238
2702
  }
2239
2703
 
@@ -2245,7 +2709,7 @@ static void stde_skip_prox(SegmentTermDocEnum *stde)
2245
2709
  (void)stde;
2246
2710
  }
2247
2711
 
2248
- static void stde_seek_prox(SegmentTermDocEnum *stde, int prx_ptr)
2712
+ static void stde_seek_prox(SegmentTermDocEnum *stde, off_t prx_ptr)
2249
2713
  {
2250
2714
  (void)stde;
2251
2715
  (void)prx_ptr;
@@ -2290,7 +2754,7 @@ TermDocEnum *stde_new(TermInfosReader *tir,
2290
2754
 
2291
2755
  static void stpe_seek_ti(SegmentTermDocEnum *stde, TermInfo *ti)
2292
2756
  {
2293
- if (ti == NULL) {
2757
+ if (NULL == ti) {
2294
2758
  stde->doc_freq = 0;
2295
2759
  }
2296
2760
  else {
@@ -2351,7 +2815,7 @@ static void stpe_skip_prox(SegmentTermDocEnum *stde)
2351
2815
  is_skip_vints(stde->prx_in, stde->freq);
2352
2816
  }
2353
2817
 
2354
- static void stpe_seek_prox(SegmentTermDocEnum *stde, int prx_ptr)
2818
+ static void stpe_seek_prox(SegmentTermDocEnum *stde, off_t prx_ptr)
2355
2819
  {
2356
2820
  is_seek(stde->prx_in, prx_ptr);
2357
2821
  stde->prx_cnt = 0;
@@ -2422,7 +2886,7 @@ static TermDocEnum *mtde_next_tde(MultiTermDocEnum *mtde)
2422
2886
  }
2423
2887
 
2424
2888
  #define CHECK_CURR_TDE(method) do {\
2425
- if (MTDE(tde)->curr_tde == NULL) {\
2889
+ if (NULL == MTDE(tde)->curr_tde) {\
2426
2890
  RAISE(STATE_ERROR, "Illegal state of TermDocEnum. You must call #next "\
2427
2891
  "before you call #"method);\
2428
2892
  }\
@@ -2456,7 +2920,7 @@ static void mtde_seek(TermDocEnum *tde, int field_num, const char *term)
2456
2920
  TermEnum *te = mtde->te;
2457
2921
  char *t;
2458
2922
  te->set_field(te, field_num);
2459
- if ((t = te->skip_to(te, term)) != NULL && strcmp(term, t) == 0) {
2923
+ if (NULL != (t = te->skip_to(te, term)) && 0 == strcmp(term, t)) {
2460
2924
  mtde_seek_te(tde, te);
2461
2925
  } else {
2462
2926
  memset(mtde->state, 0, mtde->ir_cnt);
@@ -2478,7 +2942,7 @@ static int mtde_freq(TermDocEnum *tde)
2478
2942
  static bool mtde_next(TermDocEnum *tde)
2479
2943
  {
2480
2944
  MultiTermDocEnum *mtde = MTDE(tde);
2481
- if (mtde->curr_tde != NULL && mtde->curr_tde->next(mtde->curr_tde)) {
2945
+ if (NULL != mtde->curr_tde && mtde->curr_tde->next(mtde->curr_tde)) {
2482
2946
  return true;
2483
2947
  }
2484
2948
  else if (mtde_next_tde(mtde)) {
@@ -2494,7 +2958,7 @@ static int mtde_read(TermDocEnum *tde, int *docs, int *freqs, int req_num)
2494
2958
  int i, end = 0, last_end = 0, b;
2495
2959
  MultiTermDocEnum *mtde = MTDE(tde);
2496
2960
  while (true) {
2497
- if (mtde->curr_tde == NULL) return end;
2961
+ if (NULL == mtde->curr_tde) return end;
2498
2962
  end += mtde->curr_tde->read(mtde->curr_tde, docs + last_end,
2499
2963
  freqs + last_end, req_num - last_end);
2500
2964
  if (end == last_end) { /* none left in segment */
@@ -2527,13 +2991,7 @@ static bool mtde_skip_to(TermDocEnum *tde, int target_doc_num)
2527
2991
 
2528
2992
  mtde_next_tde(mtde);
2529
2993
  }
2530
-
2531
- if (curr_tde) {
2532
- return curr_tde->skip_to(curr_tde, target_doc_num - mtde->base);
2533
- }
2534
- else {
2535
- return false;
2536
- }
2994
+ return false;
2537
2995
  }
2538
2996
 
2539
2997
  static void mtde_close(TermDocEnum *tde)
@@ -2660,7 +3118,7 @@ static bool mtdpe_next(TermDocEnum *tde)
2660
3118
  int doc;
2661
3119
  MultipleTermDocPosEnum *mtdpe = MTDPE(tde);
2662
3120
 
2663
- if (mtdpe->pq->size == 0) {
3121
+ if (0 == mtdpe->pq->size) {
2664
3122
  return false;
2665
3123
  }
2666
3124
 
@@ -2710,7 +3168,7 @@ bool mtdpe_skip_to(TermDocEnum *tde, int target_doc_num)
2710
3168
  TermDocEnum *sub_tde;
2711
3169
  PriorityQueue *mtdpe_pq = MTDPE(tde)->pq;
2712
3170
 
2713
- while ((sub_tde = (TermDocEnum *)pq_top(mtdpe_pq)) != NULL
3171
+ while (NULL != (sub_tde = (TermDocEnum *)pq_top(mtdpe_pq))
2714
3172
  && (target_doc_num > sub_tde->doc_num(sub_tde))) {
2715
3173
  if (sub_tde->skip_to(sub_tde, target_doc_num)) {
2716
3174
  pq_down(mtdpe_pq);
@@ -2779,6 +3237,256 @@ TermDocEnum *mtdpe_new(IndexReader *ir, int field_num, char **terms, int t_cnt)
2779
3237
  return tde;
2780
3238
  }
2781
3239
 
3240
+ /****************************************************************************
3241
+ *
3242
+ * FileNameFilter
3243
+ *
3244
+ ****************************************************************************/
3245
+
3246
+ static HashTable *fn_extensions = NULL;
3247
+ static void file_name_filter_init()
3248
+ {
3249
+ if (NULL == fn_extensions) {
3250
+ int i;
3251
+ fn_extensions = h_new_str((free_ft)NULL, (free_ft)NULL);
3252
+ for (i = 0; i < NELEMS(INDEX_EXTENSIONS); i++) {
3253
+ h_set(fn_extensions, INDEX_EXTENSIONS[i], (char *)INDEX_EXTENSIONS[i]);
3254
+ }
3255
+ register_for_cleanup(fn_extensions, (free_ft)&h_destroy);
3256
+ }
3257
+ }
3258
+
3259
+ static bool file_name_filter_accept(char *file_name)
3260
+ {
3261
+ char *p = strrchr(file_name, '.');
3262
+ if (NULL != p) {
3263
+ char *extension = p + 1;
3264
+ if (NULL != h_get(fn_extensions, extension)) {
3265
+ return true;
3266
+ }
3267
+ else if ((*extension == 'f' || *extension == 's')
3268
+ && *(extension + 1) >= '0'
3269
+ && *(extension + 1) <= '9') {
3270
+ return true;
3271
+ }
3272
+ }
3273
+ else if (0 == strncmp(SEGMENTS_FILE_NAME, file_name,
3274
+ sizeof(SEGMENTS_FILE_NAME) - 1)) {
3275
+ return true;
3276
+ }
3277
+ return false;
3278
+ }
3279
+
3280
+ /*
3281
+ * Returns true if this is a file that would be contained in a CFS file. This
3282
+ * function should only be called on files that pass the above "accept" (ie,
3283
+ * are already known to be a Lucene index file).
3284
+ */
3285
+ static bool file_name_filter_is_cfs_file(char *file_name) {
3286
+ char *p = strrchr(file_name, '.');
3287
+ if (NULL != p) {
3288
+ char *extension = p + 1;
3289
+ if (NULL != h_get(fn_extensions, extension)
3290
+ && 0 != strcmp(extension, "del")
3291
+ && 0 != strcmp(extension, "gen")
3292
+ && 0 != strcmp(extension, "cfs")) {
3293
+ return true;
3294
+ }
3295
+ else if ('f' == *extension
3296
+ && '0' <= *(extension + 1)
3297
+ && '9' >= *(extension + 1)) {
3298
+ return true;
3299
+ }
3300
+ }
3301
+ return false;
3302
+ }
3303
+
3304
+ /****************************************************************************
3305
+ *
3306
+ * Deleter
3307
+ *
3308
+ ****************************************************************************/
3309
+
3310
+ #define DELETABLE_START_CAPA 8
3311
+ Deleter *deleter_new(SegmentInfos *sis, Store *store)
3312
+ {
3313
+ Deleter *dlr = ALLOC(Deleter);
3314
+ dlr->sis = sis;
3315
+ dlr->store = store;
3316
+ dlr->pending = hs_new_str(&free);
3317
+ return dlr;
3318
+ }
3319
+
3320
+ void deleter_destroy(Deleter *dlr)
3321
+ {
3322
+ hs_destroy(dlr->pending);
3323
+ free(dlr);
3324
+ }
3325
+
3326
+ void deleter_queue_file(Deleter *dlr, char *file_name)
3327
+ {
3328
+ hs_add(dlr->pending, estrdup(file_name));
3329
+ }
3330
+
3331
+ void deleter_delete_file(Deleter *dlr, char *file_name)
3332
+ {
3333
+ Store *store = dlr->store;
3334
+ TRY
3335
+ if (store->exists(store, file_name)) {
3336
+ store->remove(store, file_name);
3337
+ }
3338
+ hs_del(dlr->pending, file_name);
3339
+ XCATCHALL
3340
+ hs_add(dlr->pending, estrdup(file_name));
3341
+ XENDTRY
3342
+ }
3343
+
3344
+ void deleter_commit_pending_deletions(Deleter *dlr)
3345
+ {
3346
+ int i;
3347
+ char **pending = (char **)dlr->pending->elems;
3348
+ for (i = dlr->pending->size - 1; i >= 0; i--) {
3349
+ deleter_delete_file(dlr, pending[i]);
3350
+ }
3351
+ }
3352
+
3353
+ void deleter_delete_files(Deleter *dlr, char **files, int file_cnt)
3354
+ {
3355
+ int i;
3356
+ for (i = file_cnt - 1; i >= 0; i--) {
3357
+ deleter_queue_file(dlr, files[i]);
3358
+ }
3359
+ deleter_commit_pending_deletions(dlr);
3360
+ }
3361
+
3362
+ struct DelFilesArg {
3363
+ char curr_seg_file_name[SEGMENT_NAME_MAX_LENGTH];
3364
+ Deleter *dlr;
3365
+ HashTable *current;
3366
+ };
3367
+
3368
+ static void deleter_find_deletable_files_i(char *file_name, void *arg)
3369
+ {
3370
+ struct DelFilesArg *dfa = (struct DelFilesArg *)arg;
3371
+ Deleter *dlr = dfa->dlr;
3372
+
3373
+ if (file_name_filter_accept(file_name)
3374
+ && 0 != strcmp(file_name, dfa->curr_seg_file_name)
3375
+ && 0 != strcmp(file_name, SEGMENTS_GEN_FILE_NAME)) {
3376
+
3377
+ bool do_delete = false;
3378
+ SegmentInfo *si;
3379
+ char segment_name[SEGMENT_NAME_MAX_LENGTH];
3380
+ char *extension, *p;
3381
+ strcpy(segment_name, file_name);
3382
+
3383
+ p = strrchr(segment_name, '.');
3384
+
3385
+ /* First remove any extension: */
3386
+ if (NULL != p) {
3387
+ *p = '\0';
3388
+ extension = p + 1;
3389
+ } else {
3390
+ extension = NULL;
3391
+ }
3392
+
3393
+ /* Then, remove any generation count: */
3394
+ p = strrchr(segment_name + 1, '_');
3395
+ if (NULL != p) {
3396
+ *p = '\0';
3397
+ }
3398
+
3399
+ /* Delete this file if it's not a "current" segment, or, it is a
3400
+ * single index file but there is now a corresponding compound file: */
3401
+ if (NULL == (si = h_get(dfa->current, segment_name))) {
3402
+ /* Delete if segment is not referenced: */
3403
+ do_delete = true;
3404
+ }
3405
+ else {
3406
+ char tmp_fn[SEGMENT_NAME_MAX_LENGTH];
3407
+ /* OK, segment is referenced, but file may still be orphan'd: */
3408
+ if (file_name_filter_is_cfs_file(file_name)
3409
+ && si->use_compound_file) {
3410
+ /* This file is stored in a CFS file for this segment: */
3411
+ do_delete = true;
3412
+ }
3413
+ else if (0 == strcmp("del", extension)) {
3414
+ /* This is a _segmentName_N.del file: */
3415
+ if (!fn_for_generation(tmp_fn, segment_name, "del", si->del_gen)
3416
+ || 0 != strcmp(file_name, tmp_fn)) {
3417
+ /* If this is a seperate .del file, but it
3418
+ * doesn't match the current del file name for
3419
+ * this segment, then delete it: */
3420
+ do_delete = true;
3421
+ }
3422
+ }
3423
+ else if (NULL != extension
3424
+ && ('s' == *extension || 'f' == *extension)
3425
+ && isdigit(extension[1])) {
3426
+ si_norm_file_name(si, tmp_fn, atoi(extension + 1));
3427
+ /* This is a _segmentName_N.sX file: */
3428
+ if (0 != strcmp(tmp_fn, file_name)) {
3429
+ /* This is an orphan'd norms file: */
3430
+ do_delete = true;
3431
+ }
3432
+ }
3433
+ else if (0 == strcmp("cfs", extension) && !si->use_compound_file) {
3434
+ /* This is a partially written _segmentName.cfs: */
3435
+ do_delete = true;
3436
+ }
3437
+ }
3438
+
3439
+ if (do_delete) {
3440
+ deleter_queue_file(dlr, file_name);
3441
+ }
3442
+ }
3443
+ }
3444
+
3445
+ /*
3446
+ * Determine index files that are no longer referenced and therefore should be
3447
+ * deleted. This is called once (by the writer), and then subsequently we add
3448
+ * onto deletable any files that are no longer needed at the point that we
3449
+ * create the unused file (eg when merging segments), and we only remove from
3450
+ * deletable when a file is successfully deleted.
3451
+ */
3452
+ void deleter_find_deletable_files(Deleter *dlr)
3453
+ {
3454
+ /* Gather all "current" segments: */
3455
+ int i;
3456
+ SegmentInfos *sis = dlr->sis;
3457
+ Store *store = dlr->store;
3458
+ struct DelFilesArg dfa;
3459
+ HashTable *current = dfa.current
3460
+ = h_new_str((free_ft)NULL, (free_ft)si_deref);
3461
+ dfa.dlr = dlr;
3462
+
3463
+ for(i = 0; i < sis->size; i++) {
3464
+ SegmentInfo *si = (SegmentInfo *)sis->segs[i];
3465
+ REF(si);
3466
+ h_set(current, si->name, si);
3467
+ }
3468
+
3469
+ /* Then go through all files in the Directory that are Ferret index files,
3470
+ * and add to deletable if they are not referenced by the current segments
3471
+ * info: */
3472
+ sis_curr_seg_file_name(dfa.curr_seg_file_name, store);
3473
+ file_name_filter_init();
3474
+
3475
+ store->each(store, &deleter_find_deletable_files_i, &dfa);
3476
+ h_destroy(dfa.current);
3477
+ }
3478
+
3479
+ void deleter_delete_deletable_files(Deleter *dlr)
3480
+ {
3481
+ deleter_find_deletable_files(dlr);
3482
+ deleter_commit_pending_deletions(dlr);
3483
+ }
3484
+
3485
+ void deleter_clear_pending_deletions(Deleter *dlr)
3486
+ {
3487
+ hs_clear(dlr->pending);
3488
+ }
3489
+
2782
3490
  /****************************************************************************
2783
3491
  *
2784
3492
  * IndexReader
@@ -2800,7 +3508,7 @@ void ir_acquire_write_lock(IndexReader *ir)
2800
3508
  "need to close and reopen the index");
2801
3509
  }
2802
3510
 
2803
- if (ir->write_lock == NULL) {
3511
+ if (NULL == ir->write_lock) {
2804
3512
  ir->write_lock = open_lock(ir->store, WRITE_LOCK_NAME);
2805
3513
  if (!ir->write_lock->obtain(ir->write_lock)) {/* obtain write lock */
2806
3514
  RAISE(LOCK_ERROR, "Could not obtain write lock when trying to "
@@ -2811,8 +3519,8 @@ void ir_acquire_write_lock(IndexReader *ir)
2811
3519
  "you can safely delete these files.");
2812
3520
  }
2813
3521
 
2814
- /* we have to check whether index has changed since this reader was opened.
2815
- * if so, this reader is no longer valid for deletion */
3522
+ /* we have to check whether index has changed since this reader was
3523
+ * opened. if so, this reader is no longer valid for deletion */
2816
3524
  if (sis_read_current_version(ir->store) > ir->sis->version) {
2817
3525
  ir->is_stale = true;
2818
3526
  ir->write_lock->release(ir->write_lock);
@@ -2856,7 +3564,7 @@ IndexReader *ir_setup(IndexReader *ir, Store *store, SegmentInfos *sis,
2856
3564
 
2857
3565
  bool ir_index_exists(Store *store)
2858
3566
  {
2859
- return store->exists(store, "segments");
3567
+ return sis_current_segment_generation(store) != 1;
2860
3568
  }
2861
3569
 
2862
3570
  int ir_get_field_num(IndexReader *ir, const char *field)
@@ -2903,7 +3611,7 @@ uchar *ir_get_norms_i(IndexReader *ir, int field_num)
2903
3611
  norms = ir->get_norms(ir, field_num);
2904
3612
  }
2905
3613
  if (!norms) {
2906
- if (ir->fake_norms == NULL) {
3614
+ if (NULL == ir->fake_norms) {
2907
3615
  ir->fake_norms = (uchar *)ecalloc(ir->max_doc(ir));
2908
3616
  }
2909
3617
  norms = ir->fake_norms;
@@ -3009,34 +3717,41 @@ TermDocEnum *ir_term_positions_for(IndexReader *ir, const char *field,
3009
3717
 
3010
3718
  void ir_commit_i(IndexReader *ir)
3011
3719
  {
3012
- if (ir->has_changes && ir->is_owner) {
3013
- Lock *commit_lock;
3014
-
3015
- mutex_lock(&ir->store->mutex);
3016
- commit_lock = open_lock(ir->store, COMMIT_LOCK_NAME);
3017
- if (!commit_lock->obtain(commit_lock)) { /* obtain write lock */
3018
- RAISE(LOCK_ERROR, "Error trying to commit the index. Commit "
3019
- "lock already obtained");
3720
+ if (ir->has_changes) {
3721
+ if (NULL == ir->deleter && NULL != ir->store) {
3722
+ /* In the MultiReader case, we share this deleter across all
3723
+ * SegmentReaders: */
3724
+ ir->set_deleter_i(ir, deleter_new(ir->sis, ir->store));
3020
3725
  }
3726
+ if (ir->is_owner) {
3727
+ char curr_seg_fn[MAX_FILE_PATH];
3728
+ mutex_lock(&ir->store->mutex);
3021
3729
 
3022
- ir->commit_i(ir);
3023
- sis_write(ir->sis, ir->store);
3730
+ /* Should not be necessary: no prior commit should have left
3731
+ * pending files, so just defensive: */
3732
+ if (ir->deleter) deleter_clear_pending_deletions(ir->deleter);
3024
3733
 
3025
- commit_lock->release(commit_lock);
3026
- close_lock(commit_lock);
3027
- mutex_unlock(&ir->store->mutex);
3734
+ sis_curr_seg_file_name(curr_seg_fn, ir->store);
3735
+
3736
+ ir->commit_i(ir);
3737
+ sis_write(ir->sis, ir->store, ir->deleter);
3028
3738
 
3029
- if (ir->write_lock != NULL) {
3030
- /* release write lock */
3031
- ir->write_lock->release(ir->write_lock);
3032
- close_lock(ir->write_lock);
3033
- ir->write_lock = NULL;
3739
+ if (ir->deleter) deleter_delete_file(ir->deleter, curr_seg_fn);
3740
+
3741
+ mutex_unlock(&ir->store->mutex);
3742
+
3743
+ if (NULL != ir->write_lock) {
3744
+ /* release write lock */
3745
+ ir->write_lock->release(ir->write_lock);
3746
+ close_lock(ir->write_lock);
3747
+ ir->write_lock = NULL;
3748
+ }
3749
+ }
3750
+ else {
3751
+ ir->commit_i(ir);
3034
3752
  }
3035
- ir->has_changes = false;
3036
- }
3037
- else {
3038
- ir->commit_i(ir);
3039
3753
  }
3754
+ ir->has_changes = false;
3040
3755
  }
3041
3756
 
3042
3757
  void ir_commit(IndexReader *ir)
@@ -3049,15 +3764,14 @@ void ir_commit(IndexReader *ir)
3049
3764
  void ir_close(IndexReader *ir)
3050
3765
  {
3051
3766
  mutex_lock(&ir->mutex);
3052
- if (--(ir->ref_cnt) == 0) {
3767
+ if (0 == --(ir->ref_cnt)) {
3053
3768
  ir_commit_i(ir);
3054
3769
  ir->close_i(ir);
3055
3770
  if (ir->store) {
3056
3771
  store_deref(ir->store);
3057
3772
  }
3058
- if (ir->is_owner) {
3773
+ if (ir->is_owner && ir->sis) {
3059
3774
  sis_destroy(ir->sis);
3060
- fis_deref(ir->fis);
3061
3775
  }
3062
3776
  if (ir->cache) {
3063
3777
  h_destroy(ir->cache);
@@ -3065,6 +3779,9 @@ void ir_close(IndexReader *ir)
3065
3779
  if (ir->sort_cache) {
3066
3780
  h_destroy(ir->sort_cache);
3067
3781
  }
3782
+ if (ir->deleter && ir->is_owner) {
3783
+ deleter_destroy(ir->deleter);
3784
+ }
3068
3785
  free(ir->fake_norms);
3069
3786
 
3070
3787
  mutex_destroy(&ir->mutex);
@@ -3080,26 +3797,14 @@ void ir_close(IndexReader *ir)
3080
3797
  **/
3081
3798
  void ir_add_cache(IndexReader *ir)
3082
3799
  {
3083
- if (ir->cache == NULL) {
3800
+ if (NULL == ir->cache) {
3084
3801
  ir->cache = co_hash_create();
3085
3802
  }
3086
3803
  }
3087
3804
 
3088
3805
  bool ir_is_latest(IndexReader *ir)
3089
3806
  {
3090
- volatile bool is_latest = false;
3091
-
3092
- Lock *commit_lock = open_lock(ir->store, COMMIT_LOCK_NAME);
3093
- if (!commit_lock->obtain(commit_lock)) {
3094
- close_lock(commit_lock);
3095
- RAISE(LOCK_ERROR, "Error detecting if the current index is latest "
3096
- "version. Commit lock currently obtained");
3097
- }
3098
- is_latest = (sis_read_current_version(ir->store) == ir->sis->version);
3099
- commit_lock->release(commit_lock);
3100
- close_lock(commit_lock);
3101
-
3102
- return is_latest;
3807
+ return (sis_read_current_version(ir->store) == ir->sis->version);
3103
3808
  }
3104
3809
 
3105
3810
  /****************************************************************************
@@ -3128,35 +3833,27 @@ static Norm *norm_create(InStream *is, int field_num)
3128
3833
  static void norm_destroy(Norm *norm)
3129
3834
  {
3130
3835
  is_close(norm->is);
3131
- if (norm->bytes != NULL) {
3836
+ if (NULL != norm->bytes) {
3132
3837
  free(norm->bytes);
3133
3838
  }
3134
3839
  free(norm);
3135
3840
  }
3136
3841
 
3137
- static void norm_rewrite(Norm *norm, Store *store, char *segment,
3138
- int doc_count, Store *cfs_store)
3842
+ static void norm_rewrite(Norm *norm, Store *store, Deleter *dlr,
3843
+ SegmentInfo *si, int doc_count)
3139
3844
  {
3140
3845
  OutStream *os;
3141
- char tmp_file_name[SEGMENT_NAME_MAX_LENGTH];
3142
3846
  char norm_file_name[SEGMENT_NAME_MAX_LENGTH];
3847
+ const int field_num = norm->field_num;
3143
3848
 
3144
- if (norm == NULL || norm->bytes == NULL) {
3145
- return; /* These norms do not need to be rewritten */
3849
+ if (si_norm_file_name(si, norm_file_name, field_num)) {
3850
+ deleter_queue_file(dlr, norm_file_name);
3146
3851
  }
3147
-
3148
- sprintf(tmp_file_name, "%s.tmp", segment);
3149
- os = store->new_output(store, tmp_file_name);
3852
+ si_advance_norm_gen(si, field_num);
3853
+ si_norm_file_name(si, norm_file_name, field_num);
3854
+ os = store->new_output(store, norm_file_name);
3150
3855
  os_write_bytes(os, norm->bytes, doc_count);
3151
3856
  os_close(os);
3152
-
3153
- if (cfs_store) {
3154
- sprintf(norm_file_name, "%s.s%d", segment, norm->field_num);
3155
- }
3156
- else {
3157
- sprintf(norm_file_name, "%s.f%d", segment, norm->field_num);
3158
- }
3159
- store->rename(store, tmp_file_name, norm_file_name);
3160
3857
  norm->is_dirty = false;
3161
3858
  }
3162
3859
 
@@ -3166,6 +3863,7 @@ static void norm_rewrite(Norm *norm, Store *store, char *segment,
3166
3863
 
3167
3864
  typedef struct SegmentReader {
3168
3865
  IndexReader ir;
3866
+ SegmentInfo *si;
3169
3867
  char *segment;
3170
3868
  FieldsReader *fr;
3171
3869
  BitVector *deleted_docs;
@@ -3191,7 +3889,7 @@ static __inline FieldsReader *sr_fr(SegmentReader *sr)
3191
3889
  {
3192
3890
  FieldsReader *fr;
3193
3891
 
3194
- if ((fr = thread_getspecific(sr->thread_fr)) == NULL) {
3892
+ if (NULL == (fr = thread_getspecific(sr->thread_fr))) {
3195
3893
  fr = fr_clone(sr->fr);
3196
3894
  ary_push(sr->fr_bucket, fr);
3197
3895
  thread_setspecific(sr->thread_fr, fr);
@@ -3201,17 +3899,17 @@ static __inline FieldsReader *sr_fr(SegmentReader *sr)
3201
3899
 
3202
3900
  static __inline bool sr_is_deleted_i(SegmentReader *sr, int doc_num)
3203
3901
  {
3204
- return (sr->deleted_docs != NULL && bv_get(sr->deleted_docs, doc_num));
3902
+ return (NULL != sr->deleted_docs && bv_get(sr->deleted_docs, doc_num));
3205
3903
  }
3206
3904
 
3207
3905
  static __inline void sr_get_norms_into_i(SegmentReader *sr, int field_num,
3208
3906
  uchar *buf)
3209
3907
  {
3210
3908
  Norm *norm = h_get_int(sr->norms, field_num);
3211
- if (norm == NULL) {
3909
+ if (NULL == norm) {
3212
3910
  memset(buf, 0, SR_SIZE(sr));
3213
3911
  }
3214
- else if (norm->bytes != NULL) { /* can copy from cache */
3912
+ else if (NULL != norm->bytes) { /* can copy from cache */
3215
3913
  memcpy(buf, norm->bytes, SR_SIZE(sr));
3216
3914
  }
3217
3915
  else {
@@ -3226,11 +3924,11 @@ static __inline void sr_get_norms_into_i(SegmentReader *sr, int field_num,
3226
3924
  static __inline uchar *sr_get_norms_i(SegmentReader *sr, int field_num)
3227
3925
  {
3228
3926
  Norm *norm = h_get_int(sr->norms, field_num);
3229
- if (norm == NULL) { /* not an indexed field */
3927
+ if (NULL == norm) { /* not an indexed field */
3230
3928
  return NULL;
3231
3929
  }
3232
3930
 
3233
- if (norm->bytes == NULL) { /* value not yet read */
3931
+ if (NULL == norm->bytes) { /* value not yet read */
3234
3932
  uchar *bytes = ALLOC_N(uchar, SR_SIZE(sr));
3235
3933
  sr_get_norms_into_i(sr, field_num, bytes);
3236
3934
  norm->bytes = bytes; /* cache it */
@@ -3241,7 +3939,8 @@ static __inline uchar *sr_get_norms_i(SegmentReader *sr, int field_num)
3241
3939
  static void sr_set_norm_i(IndexReader *ir, int doc_num, int field_num, uchar b)
3242
3940
  {
3243
3941
  Norm *norm = h_get_int(SR(ir)->norms, field_num);
3244
- if (norm != NULL) { /* has_norms */
3942
+ if (NULL != norm) { /* has_norms */
3943
+ ir->has_changes = true;
3245
3944
  norm->is_dirty = true; /* mark it dirty */
3246
3945
  SR(ir)->norms_dirty = true;
3247
3946
  sr_get_norms_i(SR(ir), field_num)[doc_num] = b;
@@ -3250,12 +3949,13 @@ static void sr_set_norm_i(IndexReader *ir, int doc_num, int field_num, uchar b)
3250
3949
 
3251
3950
  static void sr_delete_doc_i(IndexReader *ir, int doc_num)
3252
3951
  {
3253
- if (SR(ir)->deleted_docs == NULL) {
3952
+ if (NULL == SR(ir)->deleted_docs) {
3254
3953
  SR(ir)->deleted_docs = bv_new();
3255
3954
  }
3256
3955
 
3257
3956
  SR(ir)->deleted_docs_dirty = true;
3258
3957
  SR(ir)->undelete_all = false;
3958
+ ir->has_changes = true;
3259
3959
  bv_set(SR(ir)->deleted_docs, doc_num);
3260
3960
  }
3261
3961
 
@@ -3263,12 +3963,18 @@ static void sr_undelete_all_i(IndexReader *ir)
3263
3963
  {
3264
3964
  SR(ir)->undelete_all = true;
3265
3965
  SR(ir)->deleted_docs_dirty = false;
3266
- if (SR(ir)->deleted_docs != NULL) {
3966
+ ir->has_changes = true;
3967
+ if (NULL != SR(ir)->deleted_docs) {
3267
3968
  bv_destroy(SR(ir)->deleted_docs);
3268
3969
  }
3269
3970
  SR(ir)->deleted_docs = NULL;
3270
3971
  }
3271
3972
 
3973
+ static void sr_set_deleter_i(IndexReader *ir, Deleter *deleter)
3974
+ {
3975
+ ir->deleter = deleter;
3976
+ }
3977
+
3272
3978
  static void bv_write(BitVector *bv, Store *store, char *name)
3273
3979
  {
3274
3980
  int i;
@@ -3299,64 +4005,61 @@ static BitVector *bv_read(Store *store, char *name)
3299
4005
 
3300
4006
  static void sr_commit_i(IndexReader *ir)
3301
4007
  {
4008
+ SegmentInfo *si = SR(ir)->si;
4009
+ char *segment = SR(ir)->si->name;
3302
4010
  char tmp_file_name[SEGMENT_NAME_MAX_LENGTH];
3303
- char del_file_name[SEGMENT_NAME_MAX_LENGTH];
3304
-
3305
- sprintf(del_file_name, "%s.del", SR(ir)->segment);
3306
4011
 
3307
- if (SR(ir)->deleted_docs_dirty) { /* re-write deleted */
3308
- sprintf(tmp_file_name, "%s.tmp", SR(ir)->segment);
3309
- bv_write(SR(ir)->deleted_docs, ir->store, tmp_file_name);
3310
- ir->store->rename(ir->store, tmp_file_name, del_file_name);
3311
- }
3312
- if (SR(ir)->undelete_all && ir->store->exists(ir->store, del_file_name)) {
3313
- ir->store->remove(ir->store, del_file_name);
4012
+ if (SR(ir)->undelete_all || SR(ir)->deleted_docs_dirty) {
4013
+ if (si->del_gen >= 0) {
4014
+ fn_for_generation(tmp_file_name, segment, "del", si->del_gen);
4015
+ deleter_queue_file(ir->deleter, tmp_file_name);
4016
+ }
4017
+ if (SR(ir)->undelete_all) {
4018
+ si->del_gen = -1;
4019
+ SR(ir)->undelete_all = false;
4020
+ } else {
4021
+ /* (SR(ir)->deleted_docs_dirty) re-write deleted */
4022
+ si->del_gen++;
4023
+ fn_for_generation(tmp_file_name, segment, "del", si->del_gen);
4024
+ bv_write(SR(ir)->deleted_docs, ir->store, tmp_file_name);
4025
+ SR(ir)->deleted_docs_dirty = false;
4026
+ }
3314
4027
  }
3315
4028
  if (SR(ir)->norms_dirty) { /* re-write norms */
3316
4029
  int i;
3317
4030
  const int field_cnt = ir->fis->size;
3318
4031
  FieldInfo *fi;
3319
- for (i = 0; i < field_cnt; i++) {
4032
+ for (i = field_cnt - 1; i >= 0; i--) {
3320
4033
  fi = ir->fis->fields[i];
3321
4034
  if (fi_is_indexed(fi)) {
3322
- norm_rewrite(h_get_int(SR(ir)->norms, fi->number), ir->store,
3323
- SR(ir)->segment, SR_SIZE(ir), SR(ir)->cfs_store);
4035
+ Norm *norm = h_get_int(SR(ir)->norms, fi->number);
4036
+ if (norm && norm->is_dirty) {
4037
+ norm_rewrite(norm, ir->store, ir->deleter, SR(ir)->si,
4038
+ SR_SIZE(ir));
4039
+ }
3324
4040
  }
3325
4041
  }
4042
+ SR(ir)->norms_dirty = false;
3326
4043
  }
3327
- SR(ir)->deleted_docs_dirty = false;
3328
- SR(ir)->norms_dirty = false;
3329
- SR(ir)->undelete_all = false;
3330
4044
  }
3331
4045
 
3332
4046
  static void sr_close_i(IndexReader *ir)
3333
4047
  {
3334
4048
  SegmentReader *sr = SR(ir);
3335
4049
 
3336
- fr_close(sr->fr);
3337
- tir_close(sr->tir);
3338
- sfi_close(sr->sfi);
3339
-
3340
- if (sr->frq_in) {
3341
- is_close(sr->frq_in);
3342
- }
3343
- if (sr->prx_in) {
3344
- is_close(sr->prx_in);
3345
- }
3346
-
3347
- h_destroy(sr->norms);
3348
-
4050
+ if (sr->fr) fr_close(sr->fr);
4051
+ if (sr->tir) tir_close(sr->tir);
4052
+ if (sr->sfi) sfi_close(sr->sfi);
4053
+ if (sr->frq_in) is_close(sr->frq_in);
4054
+ if (sr->prx_in) is_close(sr->prx_in);
4055
+ if (sr->norms) h_destroy(sr->norms);
4056
+ if (sr->deleted_docs) bv_destroy(sr->deleted_docs);
4057
+ if (sr->cfs_store) store_deref(sr->cfs_store);
3349
4058
  if (sr->fr_bucket) {
3350
4059
  thread_setspecific(sr->thread_fr, NULL);
3351
4060
  thread_key_delete(sr->thread_fr);
3352
4061
  ary_destroy(sr->fr_bucket, (free_ft)&fr_close);
3353
4062
  }
3354
- if (sr->deleted_docs) {
3355
- bv_destroy(sr->deleted_docs);
3356
- }
3357
- if (sr->cfs_store) {
3358
- store_deref(sr->cfs_store);
3359
- }
3360
4063
  }
3361
4064
 
3362
4065
  static int sr_num_docs(IndexReader *ir)
@@ -3365,7 +4068,7 @@ static int sr_num_docs(IndexReader *ir)
3365
4068
 
3366
4069
  mutex_lock(&ir->mutex);
3367
4070
  num_docs = SR(ir)->fr->size;
3368
- if (SR(ir)->deleted_docs != NULL) {
4071
+ if (NULL != SR(ir)->deleted_docs) {
3369
4072
  num_docs -= SR(ir)->deleted_docs->count;
3370
4073
  }
3371
4074
  mutex_unlock(&ir->mutex);
@@ -3473,7 +4176,7 @@ static TermVector *sr_term_vector(IndexReader *ir, int doc_num,
3473
4176
  static HashTable *sr_term_vectors(IndexReader *ir, int doc_num)
3474
4177
  {
3475
4178
  FieldsReader *fr;
3476
- if (!SR(ir)->fr || (fr = sr_fr(SR(ir))) == NULL) {
4179
+ if (!SR(ir)->fr || NULL == (fr = sr_fr(SR(ir)))) {
3477
4180
  return NULL;
3478
4181
  }
3479
4182
 
@@ -3493,42 +4196,32 @@ static bool sr_is_deleted(IndexReader *ir, int doc_num)
3493
4196
 
3494
4197
  static bool sr_has_deletions(IndexReader *ir)
3495
4198
  {
3496
- return (SR(ir)->deleted_docs != NULL);
4199
+ return NULL != SR(ir)->deleted_docs;
3497
4200
  }
3498
4201
 
3499
4202
  static void sr_open_norms(IndexReader *ir, Store *cfs_store)
3500
4203
  {
3501
4204
  int i;
3502
- Store *store = ir->store;
4205
+ SegmentInfo *si = SR(ir)->si;
3503
4206
  char file_name[SEGMENT_NAME_MAX_LENGTH];
3504
- FieldInfos *fis = ir->fis;
3505
- char *ext_ptr;
3506
- const int field_cnt = fis->size;
3507
-
3508
- sprintf(file_name, "%s.", SR(ir)->segment);
3509
- ext_ptr = file_name + strlen(file_name);
3510
4207
 
3511
- for (i = 0; i < field_cnt; i++) {
3512
- if (fi_has_norms(fis->fields[i])) {
3513
- sprintf(ext_ptr, "s%d", i);
3514
- if (!store->exists(store, file_name)) {
3515
- sprintf(ext_ptr, "f%d", i);
3516
- store = cfs_store;
3517
- }
3518
- if (store->exists(store, file_name)) {
3519
- h_set_int(SR(ir)->norms, i,
3520
- norm_create(store->open_input(store, file_name), i));
3521
- }
4208
+ for (i = si->norm_gens_size - 1; i >= 0; i--) {
4209
+ Store *store = (si->use_compound_file && si->norm_gens[i] == 0) ?
4210
+ cfs_store : ir->store;
4211
+ if (si_norm_file_name(si, file_name, i)) {
4212
+ h_set_int(SR(ir)->norms, i,
4213
+ norm_create(store->open_input(store, file_name), i));
3522
4214
  }
3523
4215
  }
3524
4216
  SR(ir)->norms_dirty = false;
3525
4217
  }
3526
4218
 
3527
- static IndexReader *sr_setup_i(SegmentReader *sr, SegmentInfo *si)
4219
+ static IndexReader *sr_setup_i(SegmentReader *sr)
3528
4220
  {
3529
- Store *store = si->store;
4221
+ Store *store = sr->si->store;
3530
4222
  IndexReader *ir = IR(sr);
3531
4223
  char file_name[SEGMENT_NAME_MAX_LENGTH];
4224
+ char *sr_segment = sr->si->name;
3532
4225
 
3533
4226
  ir->num_docs = &sr_num_docs;
3534
4227
  ir->max_doc = &sr_max_doc;
@@ -3549,51 +4242,56 @@ static IndexReader *sr_setup_i(SegmentReader *sr, SegmentInfo *si)
3549
4242
  ir->set_norm_i = &sr_set_norm_i;
3550
4243
  ir->delete_doc_i = &sr_delete_doc_i;
3551
4244
  ir->undelete_all_i = &sr_undelete_all_i;
4245
+ ir->set_deleter_i = &sr_set_deleter_i;
3552
4246
  ir->commit_i = &sr_commit_i;
3553
4247
  ir->close_i = &sr_close_i;
3554
4248
 
3555
- sr->segment = si->name;
3556
4249
  sr->cfs_store = NULL;
3557
4250
 
3558
- sprintf(file_name, "%s.cfs", sr->segment);
3559
- if (store->exists(store, file_name)) {
3560
- sr->cfs_store = open_cmpd_store(store, file_name);
3561
- store = sr->cfs_store;
3562
- }
4251
+ TRY
4252
+ if (sr->si->use_compound_file) {
4253
+ sprintf(file_name, "%s.cfs", sr_segment);
4254
+ sr->cfs_store = open_cmpd_store(store, file_name);
4255
+ store = sr->cfs_store;
4256
+ }
3563
4257
 
3564
- sr->fr = fr_open(store, sr->segment, ir->fis);
3565
- sr->sfi = sfi_open(store, sr->segment);
3566
- sr->tir = tir_open(store, sr->sfi, sr->segment);
4258
+ sr->fr = fr_open(store, sr_segment, ir->fis);
4259
+ sr->sfi = sfi_open(store, sr_segment);
4260
+ sr->tir = tir_open(store, sr->sfi, sr_segment);
3567
4261
 
3568
- sr->deleted_docs = NULL;
3569
- sr->deleted_docs_dirty = false;
3570
- sr->undelete_all = false;
3571
- if (si_has_deletions(si)) {
3572
- sprintf(file_name, "%s.del", sr->segment);
3573
- sr->deleted_docs = bv_read(si->store, file_name);
3574
- }
4262
+ sr->deleted_docs = NULL;
4263
+ sr->deleted_docs_dirty = false;
4264
+ sr->undelete_all = false;
4265
+ if (si_has_deletions(sr->si)) {
4266
+ fn_for_generation(file_name, sr_segment, "del", sr->si->del_gen);
4267
+ sr->deleted_docs = bv_read(sr->si->store, file_name);
4268
+ }
3575
4269
 
3576
- sprintf(file_name, "%s.frq", sr->segment);
3577
- sr->frq_in = store->open_input(store, file_name);
3578
- sprintf(file_name, "%s.prx", sr->segment);
3579
- sr->prx_in = store->open_input(store, file_name);
3580
- sr->norms = h_new_int((free_ft)&norm_destroy);
3581
- sr_open_norms(ir, store);
4270
+ sprintf(file_name, "%s.frq", sr_segment);
4271
+ sr->frq_in = store->open_input(store, file_name);
4272
+ sprintf(file_name, "%s.prx", sr_segment);
4273
+ sr->prx_in = store->open_input(store, file_name);
4274
+ sr->norms = h_new_int((free_ft)&norm_destroy);
4275
+ sr_open_norms(ir, store);
4276
+ if (fis_has_vectors(ir->fis)) {
4277
+ thread_key_create(&sr->thread_fr, NULL);
4278
+ sr->fr_bucket = ary_new();
4279
+ }
4280
+ XCATCHALL
4281
+ ir_close(ir);
4282
+ XENDTRY
3582
4283
 
3583
- if (fis_has_vectors(ir->fis)) {
3584
- thread_key_create(&sr->thread_fr, NULL);
3585
- sr->fr_bucket = ary_new();
3586
- }
3587
4284
  return ir;
3588
4285
  }
3589
4286
 
3590
4287
  static IndexReader *sr_open(SegmentInfos *sis, FieldInfos *fis, int si_num,
3591
4288
  bool is_owner)
3592
4289
  {
4290
+ IndexReader *ir;
3593
4291
  SegmentReader *sr = ALLOC_AND_ZERO(SegmentReader);
3594
- SegmentInfo *si = sis->segs[si_num];
3595
- IndexReader *ir = ir_setup(IR(sr), si->store, sis, fis, is_owner);
3596
- return sr_setup_i(SR(ir), si);
4292
+ sr->si = sis->segs[si_num];
4293
+ ir = ir_setup(IR(sr), sr->si->store, sis, fis, is_owner);
4294
+ return sr_setup_i(sr);
3597
4295
  }
3598
4296
 
3599
4297
  /****************************************************************************
@@ -3683,7 +4381,7 @@ static uchar *mr_get_norms(IndexReader *ir, int field_num)
3683
4381
 
3684
4382
  mutex_lock(&ir->mutex);
3685
4383
  bytes = h_get_int(MR(ir)->norms_cache, field_num);
3686
- if (bytes == NULL) {
4384
+ if (NULL == bytes) {
3687
4385
  int i;
3688
4386
  const int mr_reader_cnt = MR(ir)->r_cnt;
3689
4387
 
@@ -3709,7 +4407,7 @@ static uchar *mr_get_norms_into(IndexReader *ir, int field_num, uchar *buf)
3709
4407
 
3710
4408
  mutex_lock(&ir->mutex);
3711
4409
  bytes = h_get_int(MR(ir)->norms_cache, field_num);
3712
- if (bytes != NULL) {
4410
+ if (NULL != bytes) {
3713
4411
  memcpy(buf, bytes, MR(ir)->max_doc);
3714
4412
  }
3715
4413
  else {
@@ -3791,6 +4489,7 @@ static void mr_set_norm_i(IndexReader *ir, int doc_num, int field_num, uchar val
3791
4489
  int fnum = mr_get_field_num(MR(ir), i, field_num);
3792
4490
  if (fnum >= 0) {
3793
4491
  IndexReader *reader = MR(ir)->sub_readers[i];
4492
+ ir->has_changes = true;
3794
4493
  h_del_int(MR(ir)->norms_cache, fnum);/* clear cache */
3795
4494
  ir_set_norm_i(reader, doc_num - MR(ir)->starts[i], fnum, val);
3796
4495
  }
@@ -3804,6 +4503,7 @@ static void mr_delete_doc_i(IndexReader *ir, int doc_num)
3804
4503
  /* dispatch to segment reader */
3805
4504
  reader->delete_doc_i(reader, doc_num - MR(ir)->starts[i]);
3806
4505
  MR(ir)->has_deletions = true;
4506
+ ir->has_changes = true;
3807
4507
  }
3808
4508
 
3809
4509
  static void mr_undelete_all_i(IndexReader *ir)
@@ -3817,6 +4517,17 @@ static void mr_undelete_all_i(IndexReader *ir)
3817
4517
  reader->undelete_all_i(reader);
3818
4518
  }
3819
4519
  MR(ir)->has_deletions = false;
4520
+ ir->has_changes = true;
4521
+ }
4522
+
4523
+ static void mr_set_deleter_i(IndexReader *ir, Deleter *deleter)
4524
+ {
4525
+ int i;
4526
+ ir->deleter = deleter;
4527
+ for (i = MR(ir)->r_cnt - 1; i >= 0; i--) {
4528
+ IndexReader *reader = MR(ir)->sub_readers[i];
4529
+ reader->set_deleter_i(reader, deleter);
4530
+ }
3820
4531
  }
3821
4532
 
3822
4533
  static void mr_commit_i(IndexReader *ir)
@@ -3825,7 +4536,7 @@ static void mr_commit_i(IndexReader *ir)
3825
4536
  const int mr_reader_cnt = MR(ir)->r_cnt;
3826
4537
  for (i = 0; i < mr_reader_cnt; i++) {
3827
4538
  IndexReader *reader = MR(ir)->sub_readers[i];
3828
- ir_commit(reader);
4539
+ ir_commit_i(reader);
3829
4540
  }
3830
4541
  }
3831
4542
 
@@ -3887,6 +4598,7 @@ static IndexReader *mr_new(IndexReader **sub_readers, const int r_cnt)
3887
4598
  ir->set_norm_i = &mr_set_norm_i;
3888
4599
  ir->delete_doc_i = &mr_delete_doc_i;
3889
4600
  ir->undelete_all_i = &mr_undelete_all_i;
4601
+ ir->set_deleter_i = &mr_set_deleter_i;
3890
4602
  ir->commit_i = &mr_commit_i;
3891
4603
  ir->close_i = &mr_close_i;
3892
4604
 
@@ -3980,33 +4692,65 @@ IndexReader *mr_open(IndexReader **sub_readers, const int r_cnt)
3980
4692
  * IndexReader
3981
4693
  ****************************************************************************/
3982
4694
 
4695
+
4696
+ static void ir_open_i(Store *store, FindSegmentsFile *fsf)
4697
+ {
4698
+ volatile bool success = false;
4699
+ IndexReader *ir = NULL;
4700
+ SegmentInfos *sis = NULL;
4701
+ TRY
4702
+ do {
4703
+ FieldInfos *fis;
4704
+
4705
+ mutex_lock(&store->mutex);
4706
+ sis_read_i(store, fsf);
4707
+ sis = fsf->p_return;
4708
+ fis = sis->fis;
4709
+
4710
+ if (sis->size == 1) {
4711
+ ir = sr_open(sis, fis, 0, true);
4712
+ }
4713
+ else {
4714
+ int i;
4715
+ IndexReader **readers = ALLOC_N(IndexReader *, sis->size);
4716
+ int num_segments = sis->size;
4717
+ for (i = num_segments - 1; i >= 0; i--) {
4718
+ TRY
4719
+ readers[i] = sr_open(sis, fis, i, false);
4720
+ XCATCHALL
4721
+ for (i++; i < num_segments; i++) {
4722
+ ir_close(readers[i]);
4723
+ }
4724
+ free(readers);
4725
+ XENDTRY
4726
+ }
4727
+ ir = mr_open_i(store, sis, fis, readers, sis->size);
4728
+ }
4729
+ fsf->p_return = ir;
4730
+ success = true;
4731
+ } while (0);
4732
+ XFINALLY
4733
+ if (!success) {
4734
+ if (ir) {
4735
+ ir_close(ir);
4736
+ }
4737
+ else if (sis) {
4738
+ sis_destroy(sis);
4739
+ }
4740
+ }
4741
+ mutex_unlock(&store->mutex);
4742
+ XENDTRY
4743
+ }
4744
+
3983
4745
  /**
3984
4746
  * Will keep a reference to the store. To let this method delete the store
3985
4747
  * make sure you deref the store that you pass to it
3986
4748
  */
3987
4749
  IndexReader *ir_open(Store *store)
3988
4750
  {
3989
- int i;
3990
- IndexReader *ir;
3991
- SegmentInfos *sis;
3992
- FieldInfos *fis;
3993
-
3994
- mutex_lock(&store->mutex);
3995
- sis = sis_read(store);
3996
- fis = fis_read(store);
3997
- if (sis->size == 1) {
3998
- ir = sr_open(sis, fis, 0, true);
3999
- }
4000
- else {
4001
- IndexReader **readers = ALLOC_N(IndexReader *, sis->size);
4002
- for (i = sis->size; i > 0;) {
4003
- i--;
4004
- readers[i] = sr_open(sis, fis, i, false);
4005
- }
4006
- ir = mr_open_i(store, sis, fis, readers, sis->size);
4007
- }
4008
- mutex_unlock(&store->mutex);
4009
- return ir;
4751
+ FindSegmentsFile fsf;
4752
+ sis_find_segments_file(store, &fsf, &ir_open_i);
4753
+ return (IndexReader *)fsf.p_return;
4010
4754
  }
4011
4755
 
4012
4756
  /****************************************************************************
@@ -4126,8 +4870,8 @@ typedef struct SkipBuffer
4126
4870
  OutStream *frq_out;
4127
4871
  OutStream *prx_out;
4128
4872
  int last_doc;
4129
- int last_frq_ptr;
4130
- int last_prx_ptr;
4873
+ off_t last_frq_ptr;
4874
+ off_t last_prx_ptr;
4131
4875
  } SkipBuffer;
4132
4876
 
4133
4877
  static void skip_buf_reset(SkipBuffer *skip_buf)
@@ -4149,8 +4893,8 @@ static SkipBuffer *skip_buf_new(OutStream *frq_out, OutStream *prx_out)
4149
4893
 
4150
4894
  static void skip_buf_add(SkipBuffer *skip_buf, int doc)
4151
4895
  {
4152
- int frq_ptr = os_pos(skip_buf->frq_out);
4153
- int prx_ptr = os_pos(skip_buf->prx_out);
4896
+ off_t frq_ptr = os_pos(skip_buf->frq_out);
4897
+ off_t prx_ptr = os_pos(skip_buf->prx_out);
4154
4898
 
4155
4899
  os_write_vint(skip_buf->buf, doc - skip_buf->last_doc);
4156
4900
  os_write_vint(skip_buf->buf, frq_ptr - skip_buf->last_frq_ptr);
@@ -4161,9 +4905,9 @@ static void skip_buf_add(SkipBuffer *skip_buf, int doc)
4161
4905
  skip_buf->last_prx_ptr = prx_ptr;
4162
4906
  }
4163
4907
 
4164
- static int skip_buf_write(SkipBuffer *skip_buf)
4908
+ static off_t skip_buf_write(SkipBuffer *skip_buf)
4165
4909
  {
4166
- int skip_ptr = os_pos(skip_buf->frq_out);
4910
+ off_t skip_ptr = os_pos(skip_buf->frq_out);
4167
4911
  ramo_write_to(skip_buf->buf, skip_buf->frq_out);
4168
4912
  return skip_ptr;
4169
4913
  }
@@ -4184,7 +4928,8 @@ static void dw_write_norms(DocWriter *dw, FieldInverter *fld_inv)
4184
4928
  {
4185
4929
  char file_name[SEGMENT_NAME_MAX_LENGTH];
4186
4930
  OutStream *norms_out;
4187
- sprintf(file_name, "%s.f%d", dw->segment, fld_inv->fi->number);
4931
+ si_advance_norm_gen(dw->si, fld_inv->fi->number);
4932
+ si_norm_file_name(dw->si, file_name, fld_inv->fi->number);
4188
4933
  norms_out = dw->store->new_output(dw->store, file_name);
4189
4934
  os_write_bytes(norms_out, fld_inv->norms, dw->doc_num);
4190
4935
  os_close(norms_out);
@@ -4232,23 +4977,23 @@ static void dw_flush(DocWriter *dw)
4232
4977
  Posting *p;
4233
4978
  Occurence *occ;
4234
4979
  Store *store = dw->store;
4235
- TermInfosWriter *tiw = tiw_open(store, dw->segment,
4980
+ TermInfosWriter *tiw = tiw_open(store, dw->si->name,
4236
4981
  dw->index_interval, skip_interval);
4237
4982
  TermInfo ti;
4238
4983
  char file_name[SEGMENT_NAME_MAX_LENGTH];
4239
4984
  OutStream *frq_out, *prx_out;
4240
4985
  SkipBuffer *skip_buf;
4241
4986
 
4242
- sprintf(file_name, "%s.frq", dw->segment);
4987
+ sprintf(file_name, "%s.frq", dw->si->name);
4243
4988
  frq_out = store->new_output(store, file_name);
4244
- sprintf(file_name, "%s.prx", dw->segment);
4989
+ sprintf(file_name, "%s.prx", dw->si->name);
4245
4990
  prx_out = store->new_output(store, file_name);
4246
4991
  skip_buf = skip_buf_new(frq_out, prx_out);
4247
4992
 
4248
4993
  for (i = 0; i < fields_count; i++) {
4249
4994
  fi = fis->fields[i];
4250
4995
  if (!fi_is_indexed(fi)
4251
- || (fld_inv = h_get_int(dw->fields, fi->number)) == NULL) {
4996
+ || NULL == (fld_inv = h_get_int(dw->fields, fi->number))) {
4252
4997
  continue;
4253
4998
  }
4254
4999
  if (!fi_omit_norms(fi)) {
@@ -4265,9 +5010,9 @@ static void dw_flush(DocWriter *dw)
4265
5010
  last_doc = 0;
4266
5011
  doc_freq = 0;
4267
5012
  skip_buf_reset(skip_buf);
4268
- for (p = pl->first; p != NULL; p = p->next) {
5013
+ for (p = pl->first; NULL != p; p = p->next) {
4269
5014
  doc_freq++;
4270
- if ((doc_freq % dw->skip_interval) == 0) {
5015
+ if (0 == (doc_freq % dw->skip_interval)) {
4271
5016
  skip_buf_add(skip_buf, last_doc);
4272
5017
  }
4273
5018
 
@@ -4283,7 +5028,7 @@ static void dw_flush(DocWriter *dw)
4283
5028
  }
4284
5029
 
4285
5030
  last_pos = 0;
4286
- for (occ = p->first_occ; occ != NULL; occ = occ->next) {
5031
+ for (occ = p->first_occ; NULL != occ; occ = occ->next) {
4287
5032
  os_write_vint(prx_out, occ->pos - last_pos);
4288
5033
  last_pos = occ->pos;
4289
5034
  }
@@ -4300,7 +5045,7 @@ static void dw_flush(DocWriter *dw)
4300
5045
  dw_flush_streams(dw);
4301
5046
  }
4302
5047
 
4303
- DocWriter *dw_open(IndexWriter *iw, const char *segment)
5048
+ DocWriter *dw_open(IndexWriter *iw, SegmentInfo *si)
4304
5049
  {
4305
5050
  Store *store = iw->store;
4306
5051
  MemoryPool *mp = mp_new_capa(iw->config.chunk_size,
@@ -4308,34 +5053,34 @@ DocWriter *dw_open(IndexWriter *iw, const char *segment)
4308
5053
 
4309
5054
  DocWriter *dw = ALLOC(DocWriter);
4310
5055
 
4311
- dw->mp = mp;
4312
- dw->analyzer = iw->analyzer;
4313
- dw->fis = iw->fis;
4314
- dw->store = store;
4315
- dw->fw = fw_open(store, segment, iw->fis);
4316
- dw->segment = segment;
5056
+ dw->mp = mp;
5057
+ dw->analyzer = iw->analyzer;
5058
+ dw->fis = iw->fis;
5059
+ dw->store = store;
5060
+ dw->fw = fw_open(store, si->name, iw->fis);
5061
+ dw->si = si;
4317
5062
 
4318
5063
  dw->curr_plists = h_new_str(NULL, NULL);
4319
- dw->fields = h_new_int((free_ft)fld_inv_destroy);
4320
- dw->doc_num = 0;
5064
+ dw->fields = h_new_int((free_ft)fld_inv_destroy);
5065
+ dw->doc_num = 0;
4321
5066
 
4322
- dw->index_interval = iw->config.index_interval;
4323
- dw->skip_interval = iw->config.skip_interval;
4324
- dw->max_field_length = iw->config.max_field_length;
4325
- dw->max_buffered_docs = iw->config.max_buffered_docs;
5067
+ dw->index_interval = iw->config.index_interval;
5068
+ dw->skip_interval = iw->config.skip_interval;
5069
+ dw->max_field_length = iw->config.max_field_length;
5070
+ dw->max_buffered_docs = iw->config.max_buffered_docs;
4326
5071
 
4327
- dw->offsets = ALLOC_AND_ZERO_N(Offset, DW_OFFSET_INIT_CAPA);
4328
- dw->offsets_size = 0;
4329
- dw->offsets_capa = DW_OFFSET_INIT_CAPA;
5072
+ dw->offsets = ALLOC_AND_ZERO_N(Offset, DW_OFFSET_INIT_CAPA);
5073
+ dw->offsets_size = 0;
5074
+ dw->offsets_capa = DW_OFFSET_INIT_CAPA;
4330
5075
 
4331
- dw->similarity = iw->similarity;
5076
+ dw->similarity = iw->similarity;
4332
5077
  return dw;
4333
5078
  }
4334
5079
 
4335
- void dw_new_segment(DocWriter *dw, char *segment)
5080
+ void dw_new_segment(DocWriter *dw, SegmentInfo *si)
4336
5081
  {
4337
- dw->fw = fw_open(dw->store, segment, dw->fis);
4338
- dw->segment = segment;
5082
+ dw->fw = fw_open(dw->store, si->name, dw->fis);
5083
+ dw->si = si;
4339
5084
  }
4340
5085
 
4341
5086
  void dw_close(DocWriter *dw)
@@ -4536,7 +5281,7 @@ typedef struct SegmentMergeInfo {
4536
5281
  int base;
4537
5282
  int max_doc;
4538
5283
  int doc_cnt;
4539
- char *segment;
5284
+ SegmentInfo *si;
4540
5285
  Store *store;
4541
5286
  Store *orig_store;
4542
5287
  BitVector *deleted_docs;
@@ -4552,7 +5297,7 @@ typedef struct SegmentMergeInfo {
4552
5297
  static bool smi_lt(const SegmentMergeInfo *smi1, const SegmentMergeInfo *smi2)
4553
5298
  {
4554
5299
  int cmpres = strcmp(smi1->term, smi2->term);
4555
- if (cmpres == 0) {
5300
+ if (0 == cmpres) {
4556
5301
  return smi1->base < smi2->base;
4557
5302
  }
4558
5303
  else {
@@ -4578,12 +5323,13 @@ static void smi_load_doc_map(SegmentMergeInfo *smi)
4578
5323
  smi->doc_cnt = j;
4579
5324
  }
4580
5325
 
4581
- static SegmentMergeInfo *smi_new(int base, Store *store, char *segment)
5326
+ static SegmentMergeInfo *smi_new(int base, Store *store, SegmentInfo *si)
4582
5327
  {
4583
5328
  SegmentMergeInfo *smi = ALLOC_AND_ZERO(SegmentMergeInfo);
4584
5329
  char file_name[SEGMENT_NAME_MAX_LENGTH];
5330
+ char *segment = si->name;
4585
5331
  smi->base = base;
4586
- smi->segment = segment;
5332
+ smi->si = si;
4587
5333
  smi->orig_store = smi->store = store;
4588
5334
  sprintf(file_name, "%s.cfs", segment);
4589
5335
  if (store->exists(store, file_name)) {
@@ -4595,8 +5341,8 @@ static SegmentMergeInfo *smi_new(int base, Store *store, char *segment)
4595
5341
  smi->doc_cnt = smi->max_doc
4596
5342
  = smi->store->length(smi->store, file_name) / FIELDS_IDX_PTR_SIZE;
4597
5343
 
4598
- sprintf(file_name, "%s.del", segment);
4599
- if (store->exists(store, file_name)) {
5344
+ if (si->del_gen >= 0) {
5345
+ fn_for_generation(file_name, segment, "del", si->del_gen);
4600
5346
  smi->deleted_docs = bv_read(store, file_name);
4601
5347
  smi_load_doc_map(smi);
4602
5348
  }
@@ -4606,13 +5352,14 @@ static SegmentMergeInfo *smi_new(int base, Store *store, char *segment)
4606
5352
  static void smi_load_term_input(SegmentMergeInfo *smi)
4607
5353
  {
4608
5354
  Store *store = smi->store;
5355
+ char *segment = smi->si->name;
4609
5356
  char file_name[SEGMENT_NAME_MAX_LENGTH];
4610
- smi->sfi = sfi_open(store, smi->segment);
4611
- sprintf(file_name, "%s.tis", smi->segment);
5357
+ smi->sfi = sfi_open(store, segment);
5358
+ sprintf(file_name, "%s.tis", segment);
4612
5359
  smi->te = TE(ste_new(store->open_input(store, file_name), smi->sfi));
4613
- sprintf(file_name, "%s.frq", smi->segment);
5360
+ sprintf(file_name, "%s.frq", segment);
4614
5361
  smi->frq_in = store->open_input(store, file_name);
4615
- sprintf(file_name, "%s.prx", smi->segment);
5362
+ sprintf(file_name, "%s.prx", segment);
4616
5363
  smi->prx_in = store->open_input(store, file_name);
4617
5364
  smi->tde = stpe_new(NULL, smi->frq_in, smi->prx_in, smi->deleted_docs,
4618
5365
  STE(smi->te)->skip_interval);
@@ -4652,7 +5399,7 @@ typedef struct SegmentMerger {
4652
5399
  TermInfo ti;
4653
5400
  Store *store;
4654
5401
  FieldInfos *fis;
4655
- char *segment;
5402
+ SegmentInfo *si;
4656
5403
  SegmentMergeInfo **smis;
4657
5404
  int seg_cnt;
4658
5405
  int doc_cnt;
@@ -4667,19 +5414,19 @@ typedef struct SegmentMerger {
4667
5414
  OutStream *prx_out;
4668
5415
  } SegmentMerger;
4669
5416
 
4670
- static SegmentMerger *sm_create(IndexWriter *iw, char *segment,
5417
+ static SegmentMerger *sm_create(IndexWriter *iw, SegmentInfo *si,
4671
5418
  SegmentInfo **seg_infos, const int seg_cnt)
4672
5419
  {
4673
5420
  int i;
4674
5421
  SegmentMerger *sm = ALLOC_AND_ZERO_N(SegmentMerger, seg_cnt);
4675
5422
  sm->store = iw->store;
4676
5423
  sm->fis = iw->fis;
4677
- sm->segment = estrdup(segment);
5424
+ sm->si = si;
4678
5425
  sm->doc_cnt = 0;
4679
5426
  sm->smis = ALLOC_N(SegmentMergeInfo *, seg_cnt);
4680
5427
  for (i = 0; i < seg_cnt; i++) {
4681
5428
  sm->smis[i] = smi_new(sm->doc_cnt, seg_infos[i]->store,
4682
- seg_infos[i]->name);
5429
+ seg_infos[i]);
4683
5430
  sm->doc_cnt += sm->smis[i]->doc_cnt;
4684
5431
  }
4685
5432
  sm->seg_cnt = seg_cnt;
@@ -4695,7 +5442,6 @@ static void sm_destroy(SegmentMerger *sm)
4695
5442
  smi_destroy(sm->smis[i]);
4696
5443
  }
4697
5444
  free(sm->smis);
4698
- free(sm->segment);
4699
5445
  free(sm);
4700
5446
  }
4701
5447
 
@@ -4708,20 +5454,21 @@ static void sm_merge_fields(SegmentMerger *sm)
4708
5454
  Store *store = sm->store;
4709
5455
  const int seg_cnt = sm->seg_cnt;
4710
5456
 
4711
- sprintf(file_name, "%s.fdt", sm->segment);
5457
+ sprintf(file_name, "%s.fdt", sm->si->name);
4712
5458
  fdt_out = store->new_output(store, file_name);
4713
5459
 
4714
- sprintf(file_name, "%s.fdx", sm->segment);
5460
+ sprintf(file_name, "%s.fdx", sm->si->name);
4715
5461
  fdx_out = store->new_output(store, file_name);
4716
5462
 
4717
5463
  for (i = 0; i < seg_cnt; i++) {
4718
5464
  SegmentMergeInfo *smi = sm->smis[i];
4719
5465
  const int max_doc = smi->max_doc;
4720
5466
  InStream *fdt_in, *fdx_in;
5467
+ char *segment = smi->si->name;
4721
5468
  store = smi->store;
4722
- sprintf(file_name, "%s.fdt", smi->segment);
5469
+ sprintf(file_name, "%s.fdt", segment);
4723
5470
  fdt_in = store->open_input(store, file_name);
4724
- sprintf(file_name, "%s.fdx", smi->segment);
5471
+ sprintf(file_name, "%s.fdx", segment);
4725
5472
  fdx_in = store->open_input(store, file_name);
4726
5473
 
4727
5474
  if (max_doc > 0) {
@@ -4775,7 +5522,7 @@ static int sm_append_postings(SegmentMerger *sm, SegmentMergeInfo **matches,
4775
5522
  * stde_next rather than stpe_next here */
4776
5523
  while (stde_next(tde)) {
4777
5524
  doc = stde_doc_num(tde);
4778
- if (doc_map != NULL) {
5525
+ if (NULL != doc_map) {
4779
5526
  doc = doc_map[doc]; /* work around deletions */
4780
5527
  }
4781
5528
  doc += base; /* convert to merged space */
@@ -4787,7 +5534,7 @@ static int sm_append_postings(SegmentMerger *sm, SegmentMergeInfo **matches,
4787
5534
  #endif
4788
5535
  df++;
4789
5536
 
4790
- if ((df % skip_interval) == 0) {
5537
+ if (0 == (df % skip_interval)) {
4791
5538
  skip_buf_add(skip_buf, last_doc);
4792
5539
  }
4793
5540
 
@@ -4823,12 +5570,12 @@ static char *sm_cache_term(SegmentMerger *sm, char *term, int term_len)
4823
5570
  static void sm_merge_term_info(SegmentMerger *sm, SegmentMergeInfo **matches,
4824
5571
  int match_size)
4825
5572
  {
4826
- int frq_ptr = os_pos(sm->frq_out);
4827
- int prx_ptr = os_pos(sm->prx_out);
5573
+ off_t frq_ptr = os_pos(sm->frq_out);
5574
+ off_t prx_ptr = os_pos(sm->prx_out);
4828
5575
 
4829
5576
  int df = sm_append_postings(sm, matches, match_size); /* append posting data */
4830
5577
 
4831
- int skip_ptr = skip_buf_write(sm->skip_buf);
5578
+ off_t skip_ptr = skip_buf_write(sm->skip_buf);
4832
5579
 
4833
5580
  if (df > 0) {
4834
5581
  /* add an entry to the dictionary with ptrs to prox and freq files */
@@ -4861,7 +5608,7 @@ static void sm_merge_term_infos(SegmentMerger *sm)
4861
5608
  for (j = 0; j < seg_cnt; j++) {
4862
5609
  smi = sm->smis[j];
4863
5610
  ste_set_field(smi->te, i);
4864
- if (smi_next(smi) != NULL) {
5611
+ if (NULL != smi_next(smi)) {
4865
5612
  pq_push(sm->queue, smi); /* initialize @queue */
4866
5613
  }
4867
5614
  }
@@ -4877,7 +5624,7 @@ static void sm_merge_term_infos(SegmentMerger *sm)
4877
5624
  match_size++;
4878
5625
  term = matches[0]->term;
4879
5626
  top = pq_top(sm->queue);
4880
- while ((top != NULL) && (strcmp(term, top->term) == 0)) {
5627
+ while ((NULL != top) && (0 == strcmp(term, top->term))) {
4881
5628
  matches[match_size] = pq_pop(sm->queue);
4882
5629
  match_size++;
4883
5630
  top = pq_top(sm->queue);
@@ -4889,7 +5636,7 @@ static void sm_merge_term_infos(SegmentMerger *sm)
4889
5636
  while (match_size > 0) {
4890
5637
  match_size--;
4891
5638
  smi = matches[match_size];
4892
- if (smi_next(smi) != NULL) {
5639
+ if (NULL != smi_next(smi)) {
4893
5640
  pq_push(sm->queue, smi); /* restore queue */
4894
5641
  }
4895
5642
  }
@@ -4905,12 +5652,12 @@ static void sm_merge_terms(SegmentMerger *sm)
4905
5652
  {
4906
5653
  char file_name[SEGMENT_NAME_MAX_LENGTH];
4907
5654
 
4908
- sprintf(file_name, "%s.frq", sm->segment);
5655
+ sprintf(file_name, "%s.frq", sm->si->name);
4909
5656
  sm->frq_out = sm->store->new_output(sm->store, file_name);
4910
- sprintf(file_name, "%s.prx", sm->segment);
5657
+ sprintf(file_name, "%s.prx", sm->si->name);
4911
5658
  sm->prx_out = sm->store->new_output(sm->store, file_name);
4912
5659
 
4913
- sm->tiw = tiw_open(sm->store, sm->segment, sm->config->index_interval,
5660
+ sm->tiw = tiw_open(sm->store, sm->si->name, sm->config->index_interval,
4914
5661
  sm->config->skip_interval);
4915
5662
  sm->skip_buf = skip_buf_new(sm->frq_out, sm->prx_out);
4916
5663
 
@@ -4936,6 +5683,7 @@ static void sm_merge_terms(SegmentMerger *sm)
4936
5683
 
4937
5684
  static void sm_merge_norms(SegmentMerger *sm)
4938
5685
  {
5686
+ SegmentInfo *si;
4939
5687
  int i, j, k;
4940
5688
  Store *store;
4941
5689
  uchar byte;
@@ -4945,23 +5693,21 @@ static void sm_merge_norms(SegmentMerger *sm)
4945
5693
  char file_name[SEGMENT_NAME_MAX_LENGTH];
4946
5694
  SegmentMergeInfo *smi;
4947
5695
  const int seg_cnt = sm->seg_cnt;
4948
- const int fis_size = sm->fis->size;
4949
- for (i = 0; i < fis_size; i++) {
5696
+ for (i = sm->fis->size - 1; i >= 0; i--) {
4950
5697
  fi = sm->fis->fields[i];
4951
5698
  if (fi_has_norms(fi)) {
4952
- sprintf(file_name, "%s.f%d", sm->segment, i);
5699
+ si = sm->si;
5700
+ si_advance_norm_gen(si, i);
5701
+ si_norm_file_name(si, file_name, i);
4953
5702
  os = sm->store->new_output(sm->store, file_name);
4954
5703
  for (j = 0; j < seg_cnt; j++) {
4955
5704
  smi = sm->smis[j];
4956
- store = smi->orig_store;
4957
- sprintf(file_name, "%s.s%d", smi->segment, i);
4958
- if (!store->exists(store, file_name)) {
4959
- sprintf(file_name, "%s.f%d", smi->segment, i);
4960
- store = smi->store;
4961
- }
4962
- if (store->exists(store, file_name)) {
5705
+ si = smi->si;
5706
+ if (si_norm_file_name(si, file_name, i)) {
4963
5707
  const int max_doc = smi->max_doc;
4964
5708
  BitVector *deleted_docs = smi->deleted_docs;
5709
+ store = (si->use_compound_file && si->norm_gens[i])
5710
+ ? smi->orig_store : smi->store;
4965
5711
  is = store->open_input(store, file_name);
4966
5712
  if (deleted_docs) {
4967
5713
  for (k = 0; k < max_doc; k++) {
@@ -5004,11 +5750,18 @@ static int sm_merge(SegmentMerger *sm)
5004
5750
  /* prepare an index ready for writing */
5005
5751
  void index_create(Store *store, FieldInfos *fis)
5006
5752
  {
5007
- SegmentInfos *sis = sis_new();
5753
+ SegmentInfos *sis = sis_new(fis);
5008
5754
  store->clear_all(store);
5009
- sis_write(sis, store);
5755
+ sis_write(sis, store, NULL);
5010
5756
  sis_destroy(sis);
5011
- fis_write(fis, store);
5757
+ }
5758
+
5759
+ bool index_is_locked(Store *store)
5760
+ {
5761
+ Lock *write_lock = open_lock(store, WRITE_LOCK_NAME);
5762
+ bool is_locked = write_lock->is_locked(write_lock);
5763
+ close_lock(write_lock);
5764
+ return is_locked;
5012
5765
  }
5013
5766
 
5014
5767
  int iw_doc_count(IndexWriter *iw)
@@ -5025,158 +5778,79 @@ int iw_doc_count(IndexWriter *iw)
5025
5778
  return doc_cnt;
5026
5779
  }
5027
5780
 
5028
- static void delete_files(char **file_names, Store *store)
5781
+ #define MOVE_TO_COMPOUND_DIR(file_name)\
5782
+ deleter_queue_file(dlr, file_name);\
5783
+ cw_add_file(cw, file_name)
5784
+
5785
+ static void iw_create_compound_file(Store *store, FieldInfos *fis,
5786
+ SegmentInfo *si, char *cfs_file_name,
5787
+ Deleter *dlr)
5029
5788
  {
5030
5789
  int i;
5031
- for (i = ary_size(file_names) - 1; i >= 0; i--) {
5032
- store->remove(store, file_names[i]);
5033
- }
5034
- ary_destroy((void **)file_names, &free);
5035
- }
5036
-
5037
- static char **iw_create_compound_file(Store *store, FieldInfos *fis,
5038
- char *segment, char *cfs_file_name)
5039
- {
5040
- char **file_names = (char **)ary_new_capa(16);
5041
5790
  CompoundWriter *cw;
5042
- FieldInfo *fi;
5043
- int i;
5044
5791
  char file_name[SEGMENT_NAME_MAX_LENGTH];
5045
- const int fis_size = fis->size;
5046
- int file_names_size;
5792
+ char *ext;
5793
+ int seg_len = strlen(si->name);
5794
+
5795
+ memcpy(file_name, si->name, seg_len);
5796
+ file_name[seg_len] = '.';
5797
+ ext = file_name + seg_len + 1;
5047
5798
 
5048
5799
  cw = open_cw(store, cfs_file_name);
5049
5800
  for (i = 0; i < NELEMS(COMPOUND_EXTENSIONS); i++) {
5050
- sprintf(file_name, "%s.%s",
5051
- segment, COMPOUND_EXTENSIONS[i]);
5052
- ary_push(file_names, estrdup(file_name));
5801
+ memcpy(ext, COMPOUND_EXTENSIONS[i], 4);
5802
+ MOVE_TO_COMPOUND_DIR(file_name);
5053
5803
  }
5054
5804
 
5055
5805
  /* Field norm file_names */
5056
- for (i = 0; i < fis_size; i++) {
5057
- fi = fis->fields[i];
5058
- if (fi_has_norms(fi)) {
5059
- sprintf(file_name, "%s.f%d", segment, i);
5060
- if (!store->exists(store, file_name)) {
5061
- continue;
5062
- }
5063
- ary_push(file_names, estrdup(file_name));
5806
+ for (i = fis->size - 1; i >= 0; i--) {
5807
+ if (fi_has_norms(fis->fields[i])
5808
+ && si_norm_file_name(si, file_name, i)) {
5809
+ MOVE_TO_COMPOUND_DIR(file_name);
5064
5810
  }
5065
5811
  }
5066
5812
 
5067
- /* Now merge all added file_names */
5068
- file_names_size = ary_size(file_names);
5069
- for (i = 0; i < file_names_size; i++) {
5070
- cw_add_file(cw, file_names[i]);
5071
- }
5072
-
5073
5813
  /* Perform the merge */
5074
5814
  cw_close(cw);
5075
-
5076
- return file_names;
5077
5815
  }
5078
5816
 
5079
- static void iw_commit_compound_file(IndexWriter *iw, char *segment,
5080
- Lock *commit_lock)
5817
+ static void iw_commit_compound_file(IndexWriter *iw, SegmentInfo *si)
5081
5818
  {
5082
- char tmp_name[SEGMENT_NAME_MAX_LENGTH];
5083
5819
  char cfs_name[SEGMENT_NAME_MAX_LENGTH];
5084
- char **files_to_delete;
5085
- sprintf(tmp_name, "%s.tmp", segment);
5086
- sprintf(cfs_name, "%s.cfs", segment);
5087
-
5088
- files_to_delete =
5089
- iw_create_compound_file(iw->store, iw->fis, segment, tmp_name);
5090
- if (!commit_lock->obtain(commit_lock)) {
5091
- RAISE(LOCK_ERROR,
5092
- "Couldn't obtain commit lock to write compound file");
5093
- }
5820
+ sprintf(cfs_name, "%s.cfs", si->name);
5094
5821
 
5095
- delete_files(files_to_delete, iw->store);
5096
- iw->store->rename(iw->store, tmp_name, cfs_name);
5097
-
5098
- commit_lock->release(commit_lock);
5099
- }
5100
-
5101
- #define ADD_IF_EXISTS_FMT(fmt, ext) do {\
5102
- sprintf(file_name, fmt, segment, ext);\
5103
- if (store->exists(store, file_name)) {\
5104
- ary_push(file_names, estrdup(file_name));\
5105
- }\
5106
- } while (0)
5107
-
5108
- #define ADD_IF_EXISTS(ext) ADD_IF_EXISTS_FMT("%s.%s", ext)
5109
-
5110
- static char **iw_seg_file_names(FieldInfos *fis, Store *store, char *segment)
5111
- {
5112
- char **file_names = (char **)ary_new_capa(16);
5113
- int i;
5114
- char file_name[SEGMENT_NAME_MAX_LENGTH];
5115
- const int fis_size = fis->size;
5116
-
5117
-
5118
- sprintf(file_name, "%s.cfs", segment);
5119
- if (store->exists(store, file_name)) {
5120
- ary_push(file_names, estrdup(file_name));
5121
- ADD_IF_EXISTS("del");
5122
- for (i = 0; i < fis_size; i++) {
5123
- if (fi_has_norms(fis->fields[i])) {
5124
- ADD_IF_EXISTS_FMT("%s.s%d", i);
5125
- }
5126
- }
5127
- }
5128
- else {
5129
- for (i = 0; i < NELEMS(INDEX_EXTENSIONS); i++) {
5130
- ADD_IF_EXISTS(INDEX_EXTENSIONS[i]);
5131
- }
5132
- for (i = 0; i < fis_size; i++) {
5133
- if (fi_has_norms(fis->fields[i])) {
5134
- ADD_IF_EXISTS_FMT("%s.f%d", i);
5135
- }
5136
- }
5137
- }
5138
- return file_names;
5822
+ iw_create_compound_file(iw->store, iw->fis, si, cfs_name, iw->deleter);
5823
+ deleter_commit_pending_deletions(iw->deleter);
5139
5824
  }
5140
5825
 
5141
5826
  static void iw_merge_segments(IndexWriter *iw, const int min_seg,
5142
5827
  const int max_seg)
5143
5828
  {
5144
5829
  int i;
5145
- Lock *commit_lock;
5146
5830
  SegmentInfos *sis = iw->sis;
5147
5831
  SegmentInfo *si = sis_new_segment(sis, 0, iw->store);
5148
5832
 
5149
- SegmentMerger *merger = sm_create(iw, si->name, &sis->segs[min_seg],
5833
+ SegmentMerger *merger = sm_create(iw, si, &sis->segs[min_seg],
5150
5834
  max_seg - min_seg);
5151
5835
 
5152
5836
  /* This is where all the action happens. */
5153
5837
  si->doc_cnt = sm_merge(merger);
5154
5838
 
5155
5839
  mutex_lock(&iw->store->mutex);
5156
- commit_lock = open_lock(iw->store, COMMIT_LOCK_NAME);
5157
-
5158
- /* *** OBTAIN COMMIT LOCK *** */
5159
- if (!commit_lock->obtain(commit_lock)) {
5160
- RAISE(LOCK_ERROR, "Couldn't obtain commit lock to commit merged segment "
5161
- "%s", si->name);
5162
- }
5163
5840
  /* delete merged segments */
5164
5841
  for (i = min_seg; i < max_seg; i++) {
5165
- delete_files(
5166
- iw_seg_file_names(iw->fis, sis->segs[i]->store, sis->segs[i]->name),
5167
- iw->store);
5842
+ si_delete_files(sis->segs[i], iw->fis, iw->deleter);
5168
5843
  }
5844
+ deleter_commit_pending_deletions(iw->deleter);
5845
+
5169
5846
  sis_del_from_to(sis, min_seg, max_seg);
5170
- /* commit the segments file */
5171
- sis_write(sis, iw->store);
5172
- commit_lock->release(commit_lock);
5173
- /* RELEASE COMMIT LOCK */
5174
5847
 
5175
5848
  if (iw->config.use_compound_file) {
5176
- iw_commit_compound_file(iw, si->name, commit_lock);
5849
+ iw_commit_compound_file(iw, si);
5850
+ si->use_compound_file = true;
5177
5851
  }
5178
5852
 
5179
- close_lock(commit_lock);
5853
+ sis_write(sis, iw->store, iw->deleter);
5180
5854
 
5181
5855
  mutex_unlock(&iw->store->mutex);
5182
5856
 
@@ -5223,28 +5897,20 @@ static void iw_flush_ram_segment(IndexWriter *iw)
5223
5897
  {
5224
5898
  SegmentInfos *sis = iw->sis;
5225
5899
  SegmentInfo *si;
5226
- Lock *commit_lock;
5227
5900
 
5228
5901
  si = sis->segs[sis->size - 1];
5229
5902
  si->doc_cnt = iw->dw->doc_num;
5230
5903
  dw_flush(iw->dw);
5231
5904
 
5232
5905
  mutex_lock(&iw->store->mutex);
5233
- commit_lock = open_lock(iw->store, COMMIT_LOCK_NAME);
5234
5906
 
5235
- if (!commit_lock->obtain(commit_lock)) {
5236
- RAISE(LOCK_ERROR, "Couldn't obtain commit lock to write segments file");
5907
+ if (iw->config.use_compound_file) {
5908
+ iw_commit_compound_file(iw, si);
5909
+ si->use_compound_file = true;
5237
5910
  }
5238
5911
  /* commit the segments file and the fields file */
5239
- fis_write(iw->fis, iw->store);
5240
- sis_write(iw->sis, iw->store);
5241
- commit_lock->release(commit_lock);
5912
+ sis_write(iw->sis, iw->store, iw->deleter);
5242
5913
 
5243
-
5244
- if (iw->config.use_compound_file) {
5245
- iw_commit_compound_file(iw, si->name, commit_lock);
5246
- }
5247
- close_lock(commit_lock);
5248
5914
  mutex_unlock(&iw->store->mutex);
5249
5915
 
5250
5916
  iw_maybe_merge_segments(iw);
@@ -5253,11 +5919,11 @@ static void iw_flush_ram_segment(IndexWriter *iw)
5253
5919
  void iw_add_doc(IndexWriter *iw, Document *doc)
5254
5920
  {
5255
5921
  mutex_lock(&iw->mutex);
5256
- if (!iw->dw) {
5257
- iw->dw = dw_open(iw, sis_new_segment(iw->sis, 0, iw->store)->name);
5922
+ if (NULL == iw->dw) {
5923
+ iw->dw = dw_open(iw, sis_new_segment(iw->sis, 0, iw->store));
5258
5924
  }
5259
- else if (iw->dw->fw == NULL) {
5260
- dw_new_segment(iw->dw, sis_new_segment(iw->sis, 0, iw->store)->name);
5925
+ else if (NULL == iw->dw->fw) {
5926
+ dw_new_segment(iw->dw, sis_new_segment(iw->sis, 0, iw->store));
5261
5927
  }
5262
5928
  dw_add_doc(iw->dw, doc);
5263
5929
  if (mp_used(iw->dw->mp) > iw->config.max_buffer_memory
@@ -5291,17 +5957,25 @@ void iw_delete_term(IndexWriter *iw, const char *field, const char *term)
5291
5957
  do {
5292
5958
  SegmentInfos *sis = iw->sis;
5293
5959
  const int seg_cnt = sis->size;
5960
+ bool did_delete = false;
5294
5961
  for (i = 0; i < seg_cnt; i++) {
5295
5962
  IndexReader *ir = sr_open(sis, iw->fis, i, false);
5296
5963
  TermDocEnum *tde = ir->term_docs(ir);
5964
+ ir->deleter = iw->deleter;
5297
5965
  stde_seek(tde, field_num, term);
5298
5966
  while (tde->next(tde)) {
5967
+ did_delete = true;
5299
5968
  sr_delete_doc_i(ir, STDE(tde)->doc_num);
5300
5969
  }
5301
5970
  tde_destroy(tde);
5302
5971
  sr_commit_i(ir);
5303
5972
  ir_close(ir);
5304
5973
  }
5974
+ if (did_delete) {
5975
+ mutex_lock(&iw->store->mutex);
5976
+ sis_write(iw->sis, iw->store, iw->deleter);
5977
+ mutex_unlock(&iw->store->mutex);
5978
+ }
5305
5979
  } while (0);
5306
5980
  mutex_unlock(&iw->mutex);
5307
5981
  }
@@ -5316,7 +5990,7 @@ static void iw_optimize_i(IndexWriter *iw)
5316
5990
  && (si_has_deletions(iw->sis->segs[0])
5317
5991
  || (iw->sis->segs[0]->store != iw->store)
5318
5992
  || (iw->config.use_compound_file
5319
- && (!si_uses_compound_file(iw->sis->segs[0])
5993
+ && (!iw->sis->segs[0]->use_compound_file
5320
5994
  || si_has_separate_norms(iw->sis->segs[0])))))) {
5321
5995
  min_segment = iw->sis->size - iw->config.merge_factor;
5322
5996
  iw_merge_segments_from(iw, min_segment < 0 ? 0 : min_segment);
@@ -5344,7 +6018,9 @@ void iw_close(IndexWriter *iw)
5344
6018
 
5345
6019
  iw->write_lock->release(iw->write_lock);
5346
6020
  close_lock(iw->write_lock);
6021
+ iw->write_lock = NULL;
5347
6022
  store_deref(iw->store);
6023
+ deleter_destroy(iw->deleter);
5348
6024
 
5349
6025
  mutex_destroy(&iw->mutex);
5350
6026
  free(iw);
@@ -5368,16 +6044,17 @@ IndexWriter *iw_open(Store *store, volatile Analyzer *analyzer,
5368
6044
  "Couldn't obtain write lock when opening IndexWriter");
5369
6045
  }
5370
6046
 
5371
-
5372
6047
  iw->sis = sis_read(store);
5373
- iw->fis = fis_read(store);
6048
+ iw->fis = iw->sis->fis;
6049
+ REF(iw->fis);
5374
6050
  XCATCHALL
5375
6051
  if (iw->write_lock) {
5376
6052
  iw->write_lock->release(iw->write_lock);
5377
6053
  close_lock(iw->write_lock);
6054
+ iw->write_lock = NULL;
5378
6055
  }
5379
6056
  if (iw->sis) sis_destroy(iw->sis);
5380
- if (iw->fis) fis_deref(iw->fis);
6057
+ if (analyzer) a_deref((Analyzer *)analyzer);
5381
6058
  free(iw);
5382
6059
  XENDTRY
5383
6060
 
@@ -5385,6 +6062,9 @@ IndexWriter *iw_open(Store *store, volatile Analyzer *analyzer,
5385
6062
  iw->analyzer = analyzer ? (Analyzer *)analyzer
5386
6063
  : mb_standard_analyzer_new(true);
5387
6064
 
6065
+ iw->deleter = deleter_new(iw->sis, store);
6066
+ deleter_delete_deletable_files(iw->deleter);
6067
+
5388
6068
  REF(store);
5389
6069
  return iw;
5390
6070
  }
@@ -5400,18 +6080,19 @@ static void iw_cp_fields(IndexWriter *iw, SegmentReader *sr,
5400
6080
  InStream *fdt_in, *fdx_in;
5401
6081
  Store *store_in = sr->cfs_store ? sr->cfs_store : sr->ir.store;
5402
6082
  Store *store_out = iw->store;
6083
+ char *sr_segment = sr->si->name;
5403
6084
 
5404
6085
  sprintf(file_name, "%s.fdt", segment);
5405
6086
  fdt_out = store_out->new_output(store_out, file_name);
5406
6087
  sprintf(file_name, "%s.fdx", segment);
5407
6088
  fdx_out = store_out->new_output(store_out, file_name);
5408
6089
 
5409
- sprintf(file_name, "%s.fdt", sr->segment);
6090
+ sprintf(file_name, "%s.fdt", sr_segment);
5410
6091
  fdt_in = store_in->open_input(store_in, file_name);
5411
- sprintf(file_name, "%s.fdx", sr->segment);
6092
+ sprintf(file_name, "%s.fdx", sr_segment);
5412
6093
  fdx_in = store_in->open_input(store_in, file_name);
5413
6094
 
5414
- sprintf(file_name, "%s.del", sr->segment);
6095
+ sprintf(file_name, "%s.del", sr_segment);
5415
6096
  if (store_in->exists(store_in, file_name)) {
5416
6097
  OutStream *del_out;
5417
6098
  InStream *del_in = store_in->open_input(store_in, file_name);
@@ -5487,30 +6168,31 @@ static void iw_cp_terms(IndexWriter *iw, SegmentReader *sr,
5487
6168
  InStream *tix_in, *tis_in, *tfx_in, *frq_in, *prx_in;
5488
6169
  Store *store_out = iw->store;
5489
6170
  Store *store_in = sr->cfs_store ? sr->cfs_store : sr->ir.store;
6171
+ char *sr_segment = sr->si->name;
5490
6172
 
5491
6173
  sprintf(file_name, "%s.tix", segment);
5492
6174
  tix_out = store_out->new_output(store_out, file_name);
5493
- sprintf(file_name, "%s.tix", sr->segment);
6175
+ sprintf(file_name, "%s.tix", sr_segment);
5494
6176
  tix_in = store_in->open_input(store_in, file_name);
5495
6177
 
5496
6178
  sprintf(file_name, "%s.tis", segment);
5497
6179
  tis_out = store_out->new_output(store_out, file_name);
5498
- sprintf(file_name, "%s.tis", sr->segment);
6180
+ sprintf(file_name, "%s.tis", sr_segment);
5499
6181
  tis_in = store_in->open_input(store_in, file_name);
5500
6182
 
5501
6183
  sprintf(file_name, "%s.tfx", segment);
5502
6184
  tfx_out = store_out->new_output(store_out, file_name);
5503
- sprintf(file_name, "%s.tfx", sr->segment);
6185
+ sprintf(file_name, "%s.tfx", sr_segment);
5504
6186
  tfx_in = store_in->open_input(store_in, file_name);
5505
6187
 
5506
6188
  sprintf(file_name, "%s.frq", segment);
5507
6189
  frq_out = store_out->new_output(store_out, file_name);
5508
- sprintf(file_name, "%s.frq", sr->segment);
6190
+ sprintf(file_name, "%s.frq", sr_segment);
5509
6191
  frq_in = store_in->open_input(store_in, file_name);
5510
6192
 
5511
6193
  sprintf(file_name, "%s.prx", segment);
5512
6194
  prx_out = store_out->new_output(store_out, file_name);
5513
- sprintf(file_name, "%s.prx", sr->segment);
6195
+ sprintf(file_name, "%s.prx", sr_segment);
5514
6196
  prx_in = store_in->open_input(store_in, file_name);
5515
6197
 
5516
6198
  if (map) {
@@ -5548,47 +6230,38 @@ static void iw_cp_terms(IndexWriter *iw, SegmentReader *sr,
5548
6230
  }
5549
6231
 
5550
6232
  static void iw_cp_norms(IndexWriter *iw, SegmentReader *sr,
5551
- const char *segment, int *map)
6233
+ SegmentInfo *si, int *map)
5552
6234
  {
5553
6235
  int i;
5554
6236
  FieldInfos *fis = IR(sr)->fis;
5555
6237
  const int field_cnt = fis->size;
5556
6238
  InStream *norms_in;
5557
6239
  OutStream *norms_out;
5558
- Store *store_in = sr->ir.store;
5559
- Store *cfs_store_in = sr->cfs_store;
5560
6240
  Store *store_out = iw->store;
5561
6241
  char file_name_in[SEGMENT_NAME_MAX_LENGTH];
5562
- char *ext_ptr_in;
5563
6242
  char file_name_out[SEGMENT_NAME_MAX_LENGTH];
5564
- char *ext_ptr_out;
5565
- sprintf(file_name_in, "%s.", sr->segment);
5566
- ext_ptr_in = file_name_in + strlen(file_name_in);
5567
- sprintf(file_name_out, "%s.", segment);
5568
- ext_ptr_out = file_name_out + strlen(file_name_out);
5569
6243
 
5570
6244
  for (i = 0; i < field_cnt; i++) {
5571
- if (fi_has_norms(fis->fields[i])) {
5572
- Store *store = store_in;
5573
- sprintf(ext_ptr_in, "s%d", i);
5574
- if (!store->exists(store, file_name_in)) {
5575
- sprintf(ext_ptr_in, "f%d", i);
5576
- store = cfs_store_in;
5577
- }
5578
- if (store->exists(store, file_name_in)) {
5579
- norms_in = store->open_input(store, file_name_in);
5580
- sprintf(ext_ptr_out, "f%d", map ? map[i] : i);
5581
- norms_out = store_out->new_output(store_out, file_name_out);
5582
- is2os_copy_bytes(norms_in, norms_out, is_length(norms_in));
5583
- os_close(norms_out);
5584
- is_close(norms_in);
5585
- }
6245
+ if (fi_has_norms(fis->fields[i])
6246
+ && si_norm_file_name(sr->si, file_name_in, i)) {
6247
+ Store *store = (sr->si->use_compound_file
6248
+ && sr->si->norm_gens[i] == 0) ? sr->cfs_store
6249
+ : IR(sr)->store;
6250
+ int field_num = map ? map[i] : i;
6251
+
6252
+ norms_in = store->open_input(store, file_name_in);
6253
+ si_advance_norm_gen(si, field_num);
6254
+ si_norm_file_name(si, file_name_out, field_num);
6255
+ norms_out = store_out->new_output(store_out, file_name_out);
6256
+ is2os_copy_bytes(norms_in, norms_out, is_length(norms_in));
6257
+ os_close(norms_out);
6258
+ is_close(norms_in);
5586
6259
  }
5587
6260
  }
5588
6261
  }
5589
6262
 
5590
6263
  static void iw_cp_map_files(IndexWriter *iw, SegmentReader *sr,
5591
- const char *segment)
6264
+ SegmentInfo *si)
5592
6265
  {
5593
6266
  int i;
5594
6267
  FieldInfos *from_fis = IR(sr)->fis;
@@ -5600,19 +6273,19 @@ static void iw_cp_map_files(IndexWriter *iw, SegmentReader *sr,
5600
6273
  field_map[i] = fis_get_field_num(to_fis, from_fis->fields[i]->name);
5601
6274
  }
5602
6275
 
5603
- iw_cp_fields(iw, sr, segment, field_map);
5604
- iw_cp_terms(iw, sr, segment, field_map);
5605
- iw_cp_norms(iw, sr, segment, field_map);
6276
+ iw_cp_fields(iw, sr, si->name, field_map);
6277
+ iw_cp_terms( iw, sr, si->name, field_map);
6278
+ iw_cp_norms( iw, sr, si, field_map);
5606
6279
 
5607
6280
  free(field_map);
5608
6281
  }
5609
6282
 
5610
6283
  static void iw_cp_files(IndexWriter *iw, SegmentReader *sr,
5611
- const char *segment)
6284
+ SegmentInfo *si)
5612
6285
  {
5613
- iw_cp_fields(iw, sr, segment, NULL);
5614
- iw_cp_terms(iw, sr, segment, NULL);
5615
- iw_cp_norms(iw, sr, segment, NULL);
6286
+ iw_cp_fields(iw, sr, si->name, NULL);
6287
+ iw_cp_terms( iw, sr, si->name, NULL);
6288
+ iw_cp_norms( iw, sr, si, NULL);
5616
6289
  }
5617
6290
 
5618
6291
  static void iw_add_segment(IndexWriter *iw, SegmentReader *sr)
@@ -5641,10 +6314,10 @@ static void iw_add_segment(IndexWriter *iw, SegmentReader *sr)
5641
6314
  }
5642
6315
 
5643
6316
  if (must_map_fields) {
5644
- iw_cp_map_files(iw, sr, si->name);
6317
+ iw_cp_map_files(iw, sr, si);
5645
6318
  }
5646
6319
  else {
5647
- iw_cp_files(iw, sr, si->name);
6320
+ iw_cp_files(iw, sr, si);
5648
6321
  }
5649
6322
  }
5650
6323
 
@@ -5666,8 +6339,6 @@ static void iw_add_segments(IndexWriter *iw, IndexReader *ir)
5666
6339
  void iw_add_readers(IndexWriter *iw, IndexReader **readers, const int r_cnt)
5667
6340
  {
5668
6341
  int i;
5669
- Lock *commit_lock;
5670
-
5671
6342
  mutex_lock(&iw->mutex);
5672
6343
  iw_optimize_i(iw);
5673
6344
 
@@ -5676,16 +6347,9 @@ void iw_add_readers(IndexWriter *iw, IndexReader **readers, const int r_cnt)
5676
6347
  }
5677
6348
 
5678
6349
  mutex_lock(&iw->store->mutex);
5679
- commit_lock = open_lock(iw->store, COMMIT_LOCK_NAME);
5680
6350
 
5681
- if (!commit_lock->obtain(commit_lock)) {
5682
- RAISE(LOCK_ERROR, "Couldn't obtain commit lock to write segments file");
5683
- }
5684
6351
  /* commit the segments file and the fields file */
5685
- fis_write(iw->fis, iw->store);
5686
- sis_write(iw->sis, iw->store);
5687
- commit_lock->release(commit_lock);
5688
- close_lock(commit_lock);
6352
+ sis_write(iw->sis, iw->store, iw->deleter);
5689
6353
  mutex_unlock(&iw->store->mutex);
5690
6354
 
5691
6355
  iw_optimize_i(iw);