ferret 0.9.1 → 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. data/README +6 -5
  2. data/Rakefile +34 -13
  3. data/TODO +1 -0
  4. data/TUTORIAL +1 -1
  5. data/ext/analysis.c +87 -70
  6. data/ext/analysis.h +18 -6
  7. data/ext/array.c +1 -2
  8. data/ext/array.h +1 -1
  9. data/ext/bitvector.c +10 -6
  10. data/ext/bitvector.h +2 -2
  11. data/ext/compound_io.c +30 -27
  12. data/ext/document.c +15 -15
  13. data/ext/document.h +5 -5
  14. data/ext/except.c +2 -0
  15. data/ext/except.h +25 -23
  16. data/ext/extconf.rb +1 -0
  17. data/ext/ferret.c +10 -8
  18. data/ext/ferret.h +9 -8
  19. data/ext/field.c +29 -25
  20. data/ext/filter.c +52 -14
  21. data/ext/frtio.h +13 -0
  22. data/ext/fs_store.c +115 -170
  23. data/ext/global.c +9 -8
  24. data/ext/global.h +17 -13
  25. data/ext/hash.c +13 -19
  26. data/ext/hash.h +11 -11
  27. data/ext/hashset.c +5 -7
  28. data/ext/hashset.h +9 -8
  29. data/ext/helper.c +1 -1
  30. data/ext/helper.h +2 -1
  31. data/ext/inc/except.h +25 -23
  32. data/ext/inc/lang.h +11 -1
  33. data/ext/ind.c +33 -21
  34. data/ext/index.h +44 -39
  35. data/ext/index_io.c +61 -57
  36. data/ext/index_rw.c +418 -361
  37. data/ext/lang.c +10 -0
  38. data/ext/lang.h +11 -1
  39. data/ext/nix_io.c +135 -0
  40. data/ext/priorityqueue.c +16 -16
  41. data/ext/priorityqueue.h +9 -6
  42. data/ext/q_boolean.c +128 -76
  43. data/ext/q_const_score.c +20 -20
  44. data/ext/q_filtered_query.c +20 -20
  45. data/ext/q_fuzzy.c +37 -23
  46. data/ext/q_match_all.c +15 -19
  47. data/ext/q_multi_phrase.c +87 -46
  48. data/ext/q_parser.c +247 -119
  49. data/ext/q_phrase.c +86 -52
  50. data/ext/q_prefix.c +25 -14
  51. data/ext/q_range.c +59 -14
  52. data/ext/q_span.c +263 -172
  53. data/ext/q_term.c +62 -51
  54. data/ext/q_wildcard.c +24 -13
  55. data/ext/r_analysis.c +328 -80
  56. data/ext/r_doc.c +11 -6
  57. data/ext/r_index_io.c +40 -32
  58. data/ext/r_qparser.c +15 -14
  59. data/ext/r_search.c +270 -152
  60. data/ext/r_store.c +32 -17
  61. data/ext/ram_store.c +38 -22
  62. data/ext/search.c +617 -87
  63. data/ext/search.h +227 -163
  64. data/ext/similarity.c +54 -45
  65. data/ext/similarity.h +3 -3
  66. data/ext/sort.c +132 -53
  67. data/ext/store.c +21 -2
  68. data/ext/store.h +14 -14
  69. data/ext/tags +4322 -232
  70. data/ext/term.c +140 -109
  71. data/ext/termdocs.c +74 -60
  72. data/ext/vector.c +181 -152
  73. data/ext/w32_io.c +150 -0
  74. data/lib/ferret.rb +1 -1
  75. data/lib/ferret/analysis/standard_tokenizer.rb +4 -3
  76. data/lib/ferret/document/field.rb +1 -1
  77. data/lib/ferret/index/field_infos.rb +1 -1
  78. data/lib/ferret/index/term.rb +1 -1
  79. data/lib/ferret/query_parser/query_parser.tab.rb +8 -24
  80. data/lib/ferret/search.rb +1 -0
  81. data/lib/ferret/search/boolean_query.rb +0 -4
  82. data/lib/ferret/search/index_searcher.rb +21 -8
  83. data/lib/ferret/search/multi_phrase_query.rb +7 -0
  84. data/lib/ferret/search/multi_searcher.rb +261 -0
  85. data/lib/ferret/search/phrase_query.rb +1 -1
  86. data/lib/ferret/search/query.rb +34 -5
  87. data/lib/ferret/search/sort.rb +7 -3
  88. data/lib/ferret/search/sort_field.rb +8 -4
  89. data/lib/ferret/store/fs_store.rb +13 -6
  90. data/lib/ferret/store/index_io.rb +0 -14
  91. data/lib/ferret/store/ram_store.rb +3 -2
  92. data/lib/rferret.rb +1 -1
  93. data/test/unit/analysis/ctc_analyzer.rb +131 -0
  94. data/test/unit/analysis/ctc_tokenstream.rb +98 -9
  95. data/test/unit/index/tc_index.rb +40 -1
  96. data/test/unit/index/tc_term.rb +7 -0
  97. data/test/unit/index/th_doc.rb +8 -0
  98. data/test/unit/query_parser/tc_query_parser.rb +6 -4
  99. data/test/unit/search/rtc_sort_field.rb +6 -6
  100. data/test/unit/search/tc_index_searcher.rb +8 -0
  101. data/test/unit/search/tc_multi_searcher.rb +275 -0
  102. data/test/unit/search/tc_multi_searcher2.rb +126 -0
  103. data/test/unit/search/tc_search_and_sort.rb +66 -0
  104. metadata +31 -26
  105. data/test/unit/query_parser/rtc_query_parser.rb +0 -138
data/ext/ind.c CHANGED
@@ -1,5 +1,6 @@
1
- #include <string.h>
2
1
  #include "search.h"
2
+ #include <string.h>
3
+
3
4
 
4
5
  static char * const NON_UNIQUE_KEY_ERROR_MSG = "Tried to use a key that was not unique";
5
6
 
@@ -45,23 +46,22 @@ Index *index_create(Store *store, Analyzer *analyzer, HashSet *def_fields,
45
46
  self->has_writes = false;
46
47
  if (store) {
47
48
  self->store = store;
48
- self->close_store = false;
49
+ ref(store);
49
50
  } else {
50
51
  self->store = open_ram_store();
51
52
  create = true;
52
- self->close_store = true;
53
53
  }
54
54
  if (analyzer) {
55
55
  self->analyzer = analyzer;
56
- self->close_analyzer = false;
56
+ ref(analyzer);
57
57
  } else {
58
58
  self->analyzer = mb_standard_analyzer_create(true);
59
- self->close_analyzer = true;
60
59
  }
61
60
  self->use_compound_file = true;
62
61
 
63
62
  if (create) {
64
- self->iw = iw_open(self->store, self->analyzer, create, false, false);
63
+ ref(self->analyzer);
64
+ self->iw = iw_open(self->store, self->analyzer, create);
65
65
  iw_close(self->iw);
66
66
  self->iw = NULL;
67
67
  }
@@ -73,6 +73,7 @@ Index *index_create(Store *store, Analyzer *analyzer, HashSet *def_fields,
73
73
  self->auto_flush = false;
74
74
  self->check_latest = true;
75
75
 
76
+ ref(self->analyzer);
76
77
  self->qp = qp_create(all_fields, def_fields, self->analyzer);
77
78
  /* Index is a convenience class so set qp convenience options */
78
79
  self->qp->allow_any_fields = true;
@@ -87,8 +88,8 @@ void index_destroy(Index *self)
87
88
  mutex_destroy(&self->mutex);
88
89
  INDEX_CLOSE_READER(self);
89
90
  if (self->iw) iw_close(self->iw);
90
- if (self->close_store) self->store->close(self->store);
91
- if (self->close_analyzer) a_destroy(self->analyzer);
91
+ store_deref(self->store);
92
+ a_deref(self->analyzer);
92
93
  if (self->qp) qp_destroy(self->qp);
93
94
  if (self->id_field != ((char *)ID_STRING)) free(self->id_field);
94
95
  if (self->def_field != ((char *)ID_STRING)) free(self->def_field);
@@ -106,13 +107,19 @@ void index_flush(Index *self)
106
107
  }
107
108
  self->has_writes = false;
108
109
  }
110
+
109
111
  inline void ensure_writer_open(Index *self)
110
112
  {
111
113
  if (!self->iw) {
112
114
  INDEX_CLOSE_READER(self);
113
- self->iw = iw_open(self->store, self->analyzer, false, false, false);
115
+
116
+ /* make sure the analzyer isn't deleted by the IndexWriter */
117
+ ref(self->analyzer);
118
+ self->iw = iw_open(self->store, self->analyzer, false);
114
119
  self->iw->use_compound_file = self->use_compound_file;
115
- } else {
120
+ } else if (self->analyzer != self->iw->analyzer) {
121
+ a_deref(self->iw->analyzer);
122
+ ref(self->analyzer);
116
123
  self->iw->analyzer = self->analyzer; /* in case it has changed */
117
124
  }
118
125
  }
@@ -122,14 +129,14 @@ inline void ensure_reader_open(Index *self)
122
129
  if (self->ir) {
123
130
  if (self->check_latest && !ir_is_latest(self->ir)) {
124
131
  INDEX_CLOSE_READER(self);
125
- self->ir = ir_open(self->store, false);
132
+ self->ir = ir_open(self->store);
126
133
  }
127
134
  } else {
128
135
  if (self->iw) {
129
136
  iw_close(self->iw);
130
137
  self->iw = NULL;
131
138
  }
132
- self->ir = ir_open(self->store, false);
139
+ self->ir = ir_open(self->store);
133
140
  }
134
141
  }
135
142
 
@@ -203,7 +210,7 @@ static void inline index_add_doc_i(Index *self, Document *doc)
203
210
  } else if (td->total_hits == 1) {
204
211
  ir_delete_doc(self->ir, td->hits[0]->doc);
205
212
  }
206
- q->destroy(q);
213
+ q_deref(q);
207
214
  td_destroy(td);
208
215
  }
209
216
  ensure_writer_open(self);
@@ -215,11 +222,16 @@ void index_add_doc_a(Index *self, Document *doc, Analyzer *analyzer)
215
222
  {
216
223
  Analyzer *tmp_analyzer;
217
224
  mutex_lock(&self->store->ext_mutex);
218
- tmp_analyzer = self->analyzer;
219
- self->analyzer = analyzer;
220
- iw_add_doc(self->iw, doc);
221
- index_add_doc_i(self, doc);
222
- self->analyzer = tmp_analyzer;
225
+ if (analyzer != self->analyzer) {
226
+ ref(analyzer);
227
+ tmp_analyzer = self->analyzer;
228
+ self->analyzer = analyzer;
229
+ index_add_doc_i(self, doc);
230
+ self->analyzer = tmp_analyzer;
231
+ a_deref(analyzer);
232
+ } else {
233
+ index_add_doc_i(self, doc);
234
+ }
223
235
  mutex_unlock(&self->store->ext_mutex);
224
236
  }
225
237
 
@@ -272,7 +284,7 @@ TopDocs *index_search_str(Index *self, char *qstr, int first_doc,
272
284
  TopDocs *td;
273
285
  query = index_get_query(self, qstr); /* will ensure_searcher is open */
274
286
  td = sea_search(self->sea, query, first_doc, num_docs, filter, sort);
275
- query->destroy(query);
287
+ q_deref(query);
276
288
  return td;
277
289
  }
278
290
 
@@ -363,7 +375,7 @@ void index_delete_id(Index *self, char *id)
363
375
  index_delete_term(self, &t);
364
376
  }
365
377
 
366
- static void index_qdel_i(Searcher *sea, int doc_num, void *arg)
378
+ static void index_qdel_i(Searcher *sea, int doc_num, float score, void *arg)
367
379
  {
368
380
  ir_delete_doc(sea->ir, doc_num);
369
381
  }
@@ -381,7 +393,7 @@ void index_delete_query_str(Index *self, char *qstr, Filter *f)
381
393
  {
382
394
  Query *q = index_get_query(self, qstr);
383
395
  index_delete_query(self, q, f);
384
- q->destroy(q);
396
+ q_deref(q);
385
397
  }
386
398
 
387
399
  Explanation *index_explain(Index *self, Query *q, int doc_num)
data/ext/index.h CHANGED
@@ -15,6 +15,7 @@
15
15
 
16
16
 
17
17
  #define SEGMENT_NAME_MAX_LENGTH 100
18
+ #define NOT_A_FIELD 0xFFFFFFFF
18
19
 
19
20
  typedef struct Config {
20
21
  int merge_factor;
@@ -75,7 +76,7 @@ FieldInfo *fi_create(char *name,
75
76
  bool store_pos,
76
77
  bool store_offset,
77
78
  bool omit_norms);
78
- void fi_destroy(void *p);
79
+ void fi_destroy(FieldInfo *fi);
79
80
 
80
81
  /****************************************************************************
81
82
  *
@@ -91,7 +92,7 @@ typedef struct FieldInfos {
91
92
 
92
93
  FieldInfos *fis_create();
93
94
  FieldInfos *fis_open(Store *store, char *filename);
94
- void fis_destroy(void *p);
95
+ void fis_destroy(FieldInfos *fis);
95
96
  FieldInfo *fis_add(FieldInfos *fis,
96
97
  char *name,
97
98
  bool is_indexed,
@@ -111,8 +112,9 @@ bool fis_has_vectors(FieldInfos *fis);
111
112
  void fis_write(FieldInfos *fis, Store *store, char *segment, char *ext);
112
113
  FieldInfos *fis_read(FieldInfos *fis, InStream *is);
113
114
  FieldInfos *fis_add_doc(FieldInfos *fis, Document *doc);
114
- unsigned long long fis_get_number(FieldInfos *fis, char *name);
115
+ ullong fis_get_number(FieldInfos *fis, char *name);
115
116
  FieldInfo *fis_get_fi(FieldInfos *fis, char *name);
117
+ bool fis_reorder_required(FieldInfos *fis, Document *doc);
116
118
 
117
119
  /****************************************************************************
118
120
  *
@@ -126,7 +128,7 @@ typedef struct TermBuffer {
126
128
  } TermBuffer;
127
129
 
128
130
  TermBuffer *tb_create();
129
- void tb_destroy(void *p);
131
+ void tb_destroy(TermBuffer *tb);
130
132
  TermBuffer *tb_set_term(TermBuffer *tb, Term *t);
131
133
  Term *tb_get_term(TermBuffer *tb);
132
134
  int tb_cmp(TermBuffer *tb1, TermBuffer *tb2);
@@ -149,7 +151,7 @@ typedef struct TermInfo {
149
151
 
150
152
  TermInfo *ti_create(int doc_freq, int freq_pointer, int prox_pointer, int skip_offset);
151
153
  TermInfo *ti_set(TermInfo *ti, int df, int fp, int pp, int so);
152
- void ti_destroy(void *p);
154
+ void ti_destroy(TermInfo *ti);
153
155
  TermInfo *ti_cpy(TermInfo *ti1, TermInfo *ti2);
154
156
  TermInfo *ti_clone(TermInfo *other);
155
157
  int ti_eq(TermInfo *ti1, TermInfo *ti2);
@@ -226,7 +228,7 @@ typedef struct TermInfosWriter {
226
228
  TermInfo *last_term_info;
227
229
  FieldInfos *fis;
228
230
  char *curr_field;
229
- int curr_field_num;
231
+ ullong curr_field_num;
230
232
  } TermInfosWriter;
231
233
 
232
234
  TermInfosWriter *tiw_open(Store *store,
@@ -332,7 +334,7 @@ TermVector *tv_create(const char *field,
332
334
  int *freqs,
333
335
  int **positions,
334
336
  TVOffsetInfo ***offsets);
335
- void tv_destroy(void *p);
337
+ void tv_destroy(TermVector *tv);
336
338
 
337
339
  /****************************************************************************
338
340
  *
@@ -441,11 +443,12 @@ struct TermDocEnum {
441
443
  /* * SegmentTermDocEnum * */
442
444
 
443
445
  typedef struct SegmentTermDocEnum SegmentTermDocEnum;
446
+
444
447
  struct SegmentTermDocEnum {
445
448
  SegmentReader *parent;
446
449
  InStream *freq_in;
447
- int count; // the number of docs for this term that we have skipped
448
- int doc_freq; // the number of doc this term appears in
450
+ int count; /* number of docs for this term skipped */
451
+ int doc_freq; /* number of doc this term appears in */
449
452
  BitVector *deleted_docs;
450
453
  int doc_num;
451
454
  int freq;
@@ -538,8 +541,8 @@ typedef struct Posting {
538
541
  } Posting;
539
542
 
540
543
  Posting *p_create(Term *term, int position, TVOffsetInfo *offset);
541
- void p_destroy(void *p);
542
- void p_add_occurance(Posting *p, int position, TVOffsetInfo *offset);
544
+ void p_destroy(Posting *self);
545
+ void p_add_occurance(Posting *self, int position, TVOffsetInfo *offset);
543
546
 
544
547
 
545
548
  /****************************************************************************
@@ -581,7 +584,7 @@ typedef struct SegmentInfo {
581
584
  } SegmentInfo;
582
585
 
583
586
  SegmentInfo *si_create(char *name, int doc_cnt, Store *store);
584
- void si_destroy(void *p);
587
+ void si_destroy(SegmentInfo *si);
585
588
  bool si_has_deletions(SegmentInfo *si);
586
589
  bool si_uses_compound_file(SegmentInfo *si);
587
590
  bool si_has_separate_norms(SegmentInfo *si);
@@ -598,12 +601,12 @@ typedef struct SegmentInfos {
598
601
  int scnt;
599
602
  int size;
600
603
  int counter;
601
- unsigned int version;
604
+ int version;
602
605
  int format;
603
606
  } SegmentInfos;
604
607
 
605
608
  SegmentInfos *sis_create();
606
- void sis_destroy(void *p);
609
+ void sis_destroy(SegmentInfos *sis);
607
610
  void sis_add_si(SegmentInfos *sis, SegmentInfo *si);
608
611
  void sis_del_at(SegmentInfos *sis, int at);
609
612
  void sis_del_from_to(SegmentInfos *sis, int from, int to);
@@ -619,24 +622,24 @@ int sis_read_current_version(Store *store);
619
622
  ****************************************************************************/
620
623
 
621
624
  enum FIELD_TYPE {
622
- // all fields
625
+ /* all fields */
623
626
  IR_ALL,
624
- // all indexed fields
627
+ /* all indexed fields */
625
628
  IR_INDEXED,
626
- // all fields which are not indexed
629
+ /* all fields which are not indexed */
627
630
  IR_UNINDEXED,
628
- // all fields which are indexed with termvectors enables
631
+ /* all fields which are indexed with termvectors enables */
629
632
  IR_INDEXED_WITH_TERM_VECTOR,
630
- // all fields which are indexed but don't have termvectors enabled
633
+ /* all fields which are indexed but don't have termvectors enabled */
631
634
  IR_INDEXED_NO_TERM_VECTOR,
632
- // all fields where termvectors are enabled. Please note that only standard
633
- // termvector fields are returned
635
+ /* all fields where termvectors are enabled. Please note that only standard */
636
+ /* termvector fields are returned */
634
637
  IR_TERM_VECTOR,
635
- // all field with termvectors wiht positions enabled
638
+ /* all field with termvectors wiht positions enabled */
636
639
  IR_TERM_VECTOR_WITH_POSITION,
637
- // all fields where termvectors with offset position are set
640
+ /* all fields where termvectors with offset position are set */
638
641
  IR_TERM_VECTOR_WITH_OFFSET,
639
- // all fields where termvectors with offset and position values set
642
+ /* all fields where termvectors with offset and position values set */
640
643
  IR_TERM_VECTOR_WITH_POSITION_OFFSET
641
644
  };
642
645
 
@@ -651,7 +654,6 @@ struct IndexReader {
651
654
  bool has_changes : 1;
652
655
  bool is_stale : 1;
653
656
  bool is_owner : 1;
654
- bool close_store : 1;
655
657
  TermVector *(*get_term_vector)(IndexReader *ir, int doc_num, char *field);
656
658
  Array *(*get_term_vectors)(IndexReader *ir, int doc_num);
657
659
  int (*num_docs)(IndexReader *ir);
@@ -659,8 +661,10 @@ struct IndexReader {
659
661
  Document *(*get_doc)(IndexReader *ir, int doc_num);
660
662
  uchar *(*get_norms)(IndexReader *ir, char *field);
661
663
  uchar *(*get_norms_always)(IndexReader *ir, char *field);
662
- void (*do_set_norm)(IndexReader *ir, int doc_num, char *field, uchar val);
663
- void (*get_norms_into)(IndexReader *ir, char *field, uchar *buf, int offset);
664
+ void (*do_set_norm)(IndexReader *ir, int doc_num, char *field,
665
+ uchar val);
666
+ void (*get_norms_into)(IndexReader *ir, char *field, uchar *buf,
667
+ int offset);
664
668
  TermEnum *(*terms)(IndexReader *ir);
665
669
  TermEnum *(*terms_from)(IndexReader *ir, Term *term);
666
670
  int (*doc_freq)(IndexReader *ir, Term *t);
@@ -675,17 +679,19 @@ struct IndexReader {
675
679
  void (*do_commit)(IndexReader *ir);
676
680
  void (*do_close)(IndexReader *ir);
677
681
  void (*acquire_write_lock)(IndexReader *ir);
682
+ int (*write_fields_i)(IndexReader *ir, OutStream *fdt_out,
683
+ OutStream *fdx_out);
678
684
  };
679
685
 
680
- IndexReader *ir_create(Store *store, SegmentInfos *sis, int is_owner, int close_store);
681
- IndexReader *ir_open(Store *store, int close_store);
686
+ IndexReader *ir_create(Store *store, SegmentInfos *sis, int is_owner);
687
+ IndexReader *ir_open(Store *store);
682
688
  bool ir_index_exists(Store *store);
683
689
  void ir_close(IndexReader *ir);
684
690
  void ir_commit(IndexReader *ir);
685
691
  void ir_delete_doc(IndexReader *ir, int doc_num);
686
692
  void ir_undelete_all(IndexReader *ir);
687
693
  void ir_set_norm(IndexReader *ir, int doc_num, char *field, uchar val);
688
- void ir_destroy(void *p);
694
+ void ir_destroy(IndexReader *self);
689
695
  Document *ir_get_doc_with_term(IndexReader *ir, Term *term);
690
696
  TermDocEnum *ir_term_docs_for(IndexReader *ir, Term *term);
691
697
  TermDocEnum *ir_term_positions_for(IndexReader *ir, Term *term);
@@ -730,9 +736,8 @@ struct SegmentReader {
730
736
  uchar *fake_norms;
731
737
  };
732
738
 
733
- IndexReader *sr_open(SegmentInfos *sis, int si_num, int is_owner, int close_store);
739
+ IndexReader *sr_open(SegmentInfos *sis, int si_num, bool is_owner);
734
740
  IndexReader *sr_open_si(SegmentInfo *si);
735
- //int sr_has_deletions(IndexReader *ir);
736
741
 
737
742
  /****************************************************************************
738
743
  *
@@ -751,7 +756,7 @@ typedef struct MultiReader {
751
756
  } MultiReader;
752
757
 
753
758
  IndexReader *mr_open(Store *store, SegmentInfos *sis, IndexReader **readers,
754
- int rcnt, int close_store);
759
+ int rcnt);
755
760
 
756
761
  /****************************************************************************
757
762
  *
@@ -769,9 +774,9 @@ typedef struct SegmentMergeInfo {
769
774
  } SegmentMergeInfo;
770
775
 
771
776
  SegmentMergeInfo *smi_create(int base, TermEnum *te, IndexReader *ir);
772
- void smi_destroy(void *p);
777
+ void smi_destroy(SegmentMergeInfo *smi);
773
778
  TermBuffer *smi_next(SegmentMergeInfo *smi);
774
- bool smi_lt(void *p1, void *p2);
779
+ bool smi_lt(SegmentMergeInfo *smi1, SegmentMergeInfo *smi2);
775
780
 
776
781
  /****************************************************************************
777
782
  *
@@ -801,7 +806,7 @@ typedef struct SegmentMerger {
801
806
  } SegmentMerger;
802
807
 
803
808
  SegmentMerger *sm_create(Store *store, char *name, int term_index_interval);
804
- void sm_destroy(void *p);
809
+ void sm_destroy(SegmentMerger *sm);
805
810
  void sm_add(SegmentMerger *sm, IndexReader *ir);
806
811
  int sm_merge(SegmentMerger *sm);
807
812
  Array *sm_create_compound_file(SegmentMerger *sm, char *fname);
@@ -817,6 +822,8 @@ Array *sm_create_compound_file(SegmentMerger *sm, char *fname);
817
822
  #define COMMIT_LOCK_NAME "commit"
818
823
  struct IndexWriter {
819
824
  mutex_t mutex;
825
+ HshTable *postings;
826
+ FieldInfos *fis;
820
827
  int merge_factor;
821
828
  int min_merge_docs;
822
829
  int max_merge_docs;
@@ -828,13 +835,11 @@ struct IndexWriter {
828
835
  SegmentInfos *sis;
829
836
  Store *ram_store;
830
837
  Lock *write_lock;
831
- bool close_store : 1;
832
- bool close_analyzer : 1;
833
838
  bool use_compound_file : 1;
834
839
  };
835
840
 
836
841
  IndexWriter *iw_open(Store *store, Analyzer *analyzer,
837
- bool create, bool close_store, bool close_analyzer);
842
+ bool create);
838
843
  void iw_flush_ram_segments(IndexWriter *iw);
839
844
  void iw_close(IndexWriter *iw);
840
845
  int iw_doc_count(IndexWriter *iw);
data/ext/index_io.c CHANGED
@@ -56,15 +56,17 @@ void os_seek(OutStream *os, int new_pos)
56
56
 
57
57
  inline void os_write_byte(OutStream *os, uchar b)
58
58
  {
59
- if (os->buf.pos >= BUFFER_SIZE)
59
+ if (os->buf.pos >= BUFFER_SIZE) {
60
60
  os_flush(os);
61
+ }
61
62
  write_byte(os, b);
62
63
  }
63
64
 
64
65
  void os_write_bytes(OutStream *os, uchar *b, int len)
65
66
  {
66
- if (os->buf.pos > 0) // flush buffer
67
+ if (os->buf.pos > 0) { /* flush buffer */
67
68
  os_flush(os);
69
+ }
68
70
 
69
71
  if (len < BUFFER_SIZE) {
70
72
  os->flush_internal(os, b, len);
@@ -99,8 +101,9 @@ void is_refill(InStream *is)
99
101
  int start = is->buf.start + is->buf.pos;
100
102
  int last = start + BUFFER_SIZE;
101
103
  int flen = is->length_internal(is);
102
- if (last > flen) // don't read past EOF
104
+ if (last > flen) { /* don't read past EOF */
103
105
  last = flen;
106
+ }
104
107
 
105
108
  is->buf.len = last - start;
106
109
  if (is->buf.len <= 0) {
@@ -116,8 +119,9 @@ void is_refill(InStream *is)
116
119
  #define read_byte(is) is->buf.buf[is->buf.pos++]
117
120
  inline uchar is_read_byte(InStream *is)
118
121
  {
119
- if (is->buf.pos >= is->buf.len)
122
+ if (is->buf.pos >= is->buf.len) {
120
123
  is_refill(is);
124
+ }
121
125
 
122
126
  return read_byte(is);
123
127
  }
@@ -182,17 +186,17 @@ is_read_int(InStream *is)
182
186
  (int)is_read_byte(is);
183
187
  }
184
188
 
185
- long long
189
+ llong
186
190
  is_read_long(InStream *is)
187
191
  {
188
- return ((long long)is_read_byte(is) << 56) |
189
- ((long long)is_read_byte(is) << 48) |
190
- ((long long)is_read_byte(is) << 40) |
191
- ((long long)is_read_byte(is) << 32) |
192
- ((long long)is_read_byte(is) << 24) |
193
- ((long long)is_read_byte(is) << 16) |
194
- ((long long)is_read_byte(is) << 8) |
195
- (long long)is_read_byte(is);
192
+ return ((llong)is_read_byte(is) << 56) |
193
+ ((llong)is_read_byte(is) << 48) |
194
+ ((llong)is_read_byte(is) << 40) |
195
+ ((llong)is_read_byte(is) << 32) |
196
+ ((llong)is_read_byte(is) << 24) |
197
+ ((llong)is_read_byte(is) << 16) |
198
+ ((llong)is_read_byte(is) << 8) |
199
+ (llong)is_read_byte(is);
196
200
  }
197
201
 
198
202
  unsigned int
@@ -204,24 +208,24 @@ is_read_uint(InStream *is)
204
208
  (unsigned int)is_read_byte(is);
205
209
  }
206
210
 
207
- unsigned long long
211
+ ullong
208
212
  is_read_ulong(InStream *is)
209
213
  {
210
- return ((unsigned long long)is_read_byte(is) << 56) |
211
- ((unsigned long long)is_read_byte(is) << 48) |
212
- ((unsigned long long)is_read_byte(is) << 40) |
213
- ((unsigned long long)is_read_byte(is) << 32) |
214
- ((unsigned long long)is_read_byte(is) << 24) |
215
- ((unsigned long long)is_read_byte(is) << 16) |
216
- ((unsigned long long)is_read_byte(is) << 8) |
217
- (unsigned long long)is_read_byte(is);
214
+ return ((ullong)is_read_byte(is) << 56) |
215
+ ((ullong)is_read_byte(is) << 48) |
216
+ ((ullong)is_read_byte(is) << 40) |
217
+ ((ullong)is_read_byte(is) << 32) |
218
+ ((ullong)is_read_byte(is) << 24) |
219
+ ((ullong)is_read_byte(is) << 16) |
220
+ ((ullong)is_read_byte(is) << 8) |
221
+ (ullong)is_read_byte(is);
218
222
  }
219
223
 
220
224
  /* optimized to use unchecked read_byte if there is definitely space */
221
- inline unsigned long long
225
+ inline ullong
222
226
  is_read_vint(InStream *is)
223
227
  {
224
- register unsigned long long res, b;
228
+ register ullong res, b;
225
229
  register int shift = 7;
226
230
 
227
231
  if (is->buf.pos > (is->buf.len - VINT_MAX_LEN)) {
@@ -283,63 +287,63 @@ is_read_string(InStream *is)
283
287
  void
284
288
  os_write_int(OutStream *os, int l)
285
289
  {
286
- os_write_byte(os, (l >> 24) & 0xFF);
287
- os_write_byte(os, (l >> 16) & 0xFF);
288
- os_write_byte(os, (l >> 8) & 0xFF);
289
- os_write_byte(os, l & 0xFF);
290
+ os_write_byte(os, (uchar)((l >> 24) & 0xFF));
291
+ os_write_byte(os, (uchar)((l >> 16) & 0xFF));
292
+ os_write_byte(os, (uchar)((l >> 8) & 0xFF));
293
+ os_write_byte(os, (uchar)(l & 0xFF));
290
294
  }
291
295
 
292
296
  void
293
- os_write_long(OutStream *os, long long l)
297
+ os_write_long(OutStream *os, llong l)
294
298
  {
295
- os_write_byte(os, (l >> 56) & 0xFF);
296
- os_write_byte(os, (l >> 48) & 0xFF);
297
- os_write_byte(os, (l >> 40) & 0xFF);
298
- os_write_byte(os, (l >> 32) & 0xFF);
299
- os_write_byte(os, (l >> 24) & 0xFF);
300
- os_write_byte(os, (l >> 16) & 0xFF);
301
- os_write_byte(os, (l >> 8) & 0xFF);
302
- os_write_byte(os, l & 0xFF);
299
+ os_write_byte(os, (uchar)((l >> 56) & 0xFF));
300
+ os_write_byte(os, (uchar)((l >> 48) & 0xFF));
301
+ os_write_byte(os, (uchar)((l >> 40) & 0xFF));
302
+ os_write_byte(os, (uchar)((l >> 32) & 0xFF));
303
+ os_write_byte(os, (uchar)((l >> 24) & 0xFF));
304
+ os_write_byte(os, (uchar)((l >> 16) & 0xFF));
305
+ os_write_byte(os, (uchar)((l >> 8) & 0xFF));
306
+ os_write_byte(os, (uchar)(l & 0xFF));
303
307
  }
304
308
 
305
309
  void
306
310
  os_write_uint(OutStream *os, unsigned int l)
307
311
  {
308
- os_write_byte(os, (l >> 24) & 0xFF);
309
- os_write_byte(os, (l >> 16) & 0xFF);
310
- os_write_byte(os, (l >> 8) & 0xFF);
311
- os_write_byte(os, l & 0xFF);
312
+ os_write_byte(os, (uchar)((l >> 24) & 0xFF));
313
+ os_write_byte(os, (uchar)((l >> 16) & 0xFF));
314
+ os_write_byte(os, (uchar)((l >> 8) & 0xFF));
315
+ os_write_byte(os, (uchar)(l & 0xFF));
312
316
  }
313
317
 
314
318
  void
315
- os_write_ulong(OutStream *os, unsigned long long l)
319
+ os_write_ulong(OutStream *os, ullong l)
316
320
  {
317
- os_write_byte(os, (l >> 56) & 0xFF);
318
- os_write_byte(os, (l >> 48) & 0xFF);
319
- os_write_byte(os, (l >> 40) & 0xFF);
320
- os_write_byte(os, (l >> 32) & 0xFF);
321
- os_write_byte(os, (l >> 24) & 0xFF);
322
- os_write_byte(os, (l >> 16) & 0xFF);
323
- os_write_byte(os, (l >> 8) & 0xFF);
324
- os_write_byte(os, l & 0xFF);
321
+ os_write_byte(os, (uchar)((l >> 56) & 0xFF));
322
+ os_write_byte(os, (uchar)((l >> 48) & 0xFF));
323
+ os_write_byte(os, (uchar)((l >> 40) & 0xFF));
324
+ os_write_byte(os, (uchar)((l >> 32) & 0xFF));
325
+ os_write_byte(os, (uchar)((l >> 24) & 0xFF));
326
+ os_write_byte(os, (uchar)((l >> 16) & 0xFF));
327
+ os_write_byte(os, (uchar)((l >> 8) & 0xFF));
328
+ os_write_byte(os, (uchar)(l & 0xFF));
325
329
  }
326
330
 
327
331
  /* optimized to use an unchecked write if there is space */
328
332
  inline void
329
- os_write_vint(OutStream *os, register unsigned long long i)
333
+ os_write_vint(OutStream *os, register ullong i)
330
334
  {
331
335
  if (os->buf.pos > VINT_END) {
332
336
  while (i > 127) {
333
- os_write_byte(os, (i & 0x7f) | 0x80);
337
+ os_write_byte(os, (uchar)((i & 0x7f) | 0x80));
334
338
  i >>= 7;
335
339
  }
336
- os_write_byte(os, i);
340
+ os_write_byte(os, (uchar)(i));
337
341
  } else {
338
342
  while (i > 127) {
339
- write_byte(os, (i & 0x7f) | 0x80);
343
+ write_byte(os, (uchar)((i & 0x7f) | 0x80));
340
344
  i >>= 7;
341
345
  }
342
- write_byte(os, i);
346
+ write_byte(os, (uchar)(i));
343
347
  }
344
348
  }
345
349
 
@@ -356,7 +360,7 @@ os_write_chars(OutStream *os, char *buf, int start, int length)
356
360
  void
357
361
  os_write_string(OutStream *os, char *str)
358
362
  {
359
- int len = strlen(str);
363
+ int len = (int)strlen(str);
360
364
  os_write_vint(os, len);
361
365
 
362
366
  os_write_chars(os, str, 0, len);
@@ -364,6 +368,6 @@ os_write_string(OutStream *os, char *str)
364
368
 
365
369
  int file_is_lock(char *filename)
366
370
  {
367
- int start = strlen(filename) - 4;
371
+ int start = (int)strlen(filename) - 4;
368
372
  return ((start > 0) && (strcmp(".lck", &filename[start]) == 0));
369
373
  }