ferret 0.9.1 → 0.9.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (105) hide show
  1. data/README +6 -5
  2. data/Rakefile +34 -13
  3. data/TODO +1 -0
  4. data/TUTORIAL +1 -1
  5. data/ext/analysis.c +87 -70
  6. data/ext/analysis.h +18 -6
  7. data/ext/array.c +1 -2
  8. data/ext/array.h +1 -1
  9. data/ext/bitvector.c +10 -6
  10. data/ext/bitvector.h +2 -2
  11. data/ext/compound_io.c +30 -27
  12. data/ext/document.c +15 -15
  13. data/ext/document.h +5 -5
  14. data/ext/except.c +2 -0
  15. data/ext/except.h +25 -23
  16. data/ext/extconf.rb +1 -0
  17. data/ext/ferret.c +10 -8
  18. data/ext/ferret.h +9 -8
  19. data/ext/field.c +29 -25
  20. data/ext/filter.c +52 -14
  21. data/ext/frtio.h +13 -0
  22. data/ext/fs_store.c +115 -170
  23. data/ext/global.c +9 -8
  24. data/ext/global.h +17 -13
  25. data/ext/hash.c +13 -19
  26. data/ext/hash.h +11 -11
  27. data/ext/hashset.c +5 -7
  28. data/ext/hashset.h +9 -8
  29. data/ext/helper.c +1 -1
  30. data/ext/helper.h +2 -1
  31. data/ext/inc/except.h +25 -23
  32. data/ext/inc/lang.h +11 -1
  33. data/ext/ind.c +33 -21
  34. data/ext/index.h +44 -39
  35. data/ext/index_io.c +61 -57
  36. data/ext/index_rw.c +418 -361
  37. data/ext/lang.c +10 -0
  38. data/ext/lang.h +11 -1
  39. data/ext/nix_io.c +135 -0
  40. data/ext/priorityqueue.c +16 -16
  41. data/ext/priorityqueue.h +9 -6
  42. data/ext/q_boolean.c +128 -76
  43. data/ext/q_const_score.c +20 -20
  44. data/ext/q_filtered_query.c +20 -20
  45. data/ext/q_fuzzy.c +37 -23
  46. data/ext/q_match_all.c +15 -19
  47. data/ext/q_multi_phrase.c +87 -46
  48. data/ext/q_parser.c +247 -119
  49. data/ext/q_phrase.c +86 -52
  50. data/ext/q_prefix.c +25 -14
  51. data/ext/q_range.c +59 -14
  52. data/ext/q_span.c +263 -172
  53. data/ext/q_term.c +62 -51
  54. data/ext/q_wildcard.c +24 -13
  55. data/ext/r_analysis.c +328 -80
  56. data/ext/r_doc.c +11 -6
  57. data/ext/r_index_io.c +40 -32
  58. data/ext/r_qparser.c +15 -14
  59. data/ext/r_search.c +270 -152
  60. data/ext/r_store.c +32 -17
  61. data/ext/ram_store.c +38 -22
  62. data/ext/search.c +617 -87
  63. data/ext/search.h +227 -163
  64. data/ext/similarity.c +54 -45
  65. data/ext/similarity.h +3 -3
  66. data/ext/sort.c +132 -53
  67. data/ext/store.c +21 -2
  68. data/ext/store.h +14 -14
  69. data/ext/tags +4322 -232
  70. data/ext/term.c +140 -109
  71. data/ext/termdocs.c +74 -60
  72. data/ext/vector.c +181 -152
  73. data/ext/w32_io.c +150 -0
  74. data/lib/ferret.rb +1 -1
  75. data/lib/ferret/analysis/standard_tokenizer.rb +4 -3
  76. data/lib/ferret/document/field.rb +1 -1
  77. data/lib/ferret/index/field_infos.rb +1 -1
  78. data/lib/ferret/index/term.rb +1 -1
  79. data/lib/ferret/query_parser/query_parser.tab.rb +8 -24
  80. data/lib/ferret/search.rb +1 -0
  81. data/lib/ferret/search/boolean_query.rb +0 -4
  82. data/lib/ferret/search/index_searcher.rb +21 -8
  83. data/lib/ferret/search/multi_phrase_query.rb +7 -0
  84. data/lib/ferret/search/multi_searcher.rb +261 -0
  85. data/lib/ferret/search/phrase_query.rb +1 -1
  86. data/lib/ferret/search/query.rb +34 -5
  87. data/lib/ferret/search/sort.rb +7 -3
  88. data/lib/ferret/search/sort_field.rb +8 -4
  89. data/lib/ferret/store/fs_store.rb +13 -6
  90. data/lib/ferret/store/index_io.rb +0 -14
  91. data/lib/ferret/store/ram_store.rb +3 -2
  92. data/lib/rferret.rb +1 -1
  93. data/test/unit/analysis/ctc_analyzer.rb +131 -0
  94. data/test/unit/analysis/ctc_tokenstream.rb +98 -9
  95. data/test/unit/index/tc_index.rb +40 -1
  96. data/test/unit/index/tc_term.rb +7 -0
  97. data/test/unit/index/th_doc.rb +8 -0
  98. data/test/unit/query_parser/tc_query_parser.rb +6 -4
  99. data/test/unit/search/rtc_sort_field.rb +6 -6
  100. data/test/unit/search/tc_index_searcher.rb +8 -0
  101. data/test/unit/search/tc_multi_searcher.rb +275 -0
  102. data/test/unit/search/tc_multi_searcher2.rb +126 -0
  103. data/test/unit/search/tc_search_and_sort.rb +66 -0
  104. metadata +31 -26
  105. data/test/unit/query_parser/rtc_query_parser.rb +0 -138
data/ext/ind.c CHANGED
@@ -1,5 +1,6 @@
1
- #include <string.h>
2
1
  #include "search.h"
2
+ #include <string.h>
3
+
3
4
 
4
5
  static char * const NON_UNIQUE_KEY_ERROR_MSG = "Tried to use a key that was not unique";
5
6
 
@@ -45,23 +46,22 @@ Index *index_create(Store *store, Analyzer *analyzer, HashSet *def_fields,
45
46
  self->has_writes = false;
46
47
  if (store) {
47
48
  self->store = store;
48
- self->close_store = false;
49
+ ref(store);
49
50
  } else {
50
51
  self->store = open_ram_store();
51
52
  create = true;
52
- self->close_store = true;
53
53
  }
54
54
  if (analyzer) {
55
55
  self->analyzer = analyzer;
56
- self->close_analyzer = false;
56
+ ref(analyzer);
57
57
  } else {
58
58
  self->analyzer = mb_standard_analyzer_create(true);
59
- self->close_analyzer = true;
60
59
  }
61
60
  self->use_compound_file = true;
62
61
 
63
62
  if (create) {
64
- self->iw = iw_open(self->store, self->analyzer, create, false, false);
63
+ ref(self->analyzer);
64
+ self->iw = iw_open(self->store, self->analyzer, create);
65
65
  iw_close(self->iw);
66
66
  self->iw = NULL;
67
67
  }
@@ -73,6 +73,7 @@ Index *index_create(Store *store, Analyzer *analyzer, HashSet *def_fields,
73
73
  self->auto_flush = false;
74
74
  self->check_latest = true;
75
75
 
76
+ ref(self->analyzer);
76
77
  self->qp = qp_create(all_fields, def_fields, self->analyzer);
77
78
  /* Index is a convenience class so set qp convenience options */
78
79
  self->qp->allow_any_fields = true;
@@ -87,8 +88,8 @@ void index_destroy(Index *self)
87
88
  mutex_destroy(&self->mutex);
88
89
  INDEX_CLOSE_READER(self);
89
90
  if (self->iw) iw_close(self->iw);
90
- if (self->close_store) self->store->close(self->store);
91
- if (self->close_analyzer) a_destroy(self->analyzer);
91
+ store_deref(self->store);
92
+ a_deref(self->analyzer);
92
93
  if (self->qp) qp_destroy(self->qp);
93
94
  if (self->id_field != ((char *)ID_STRING)) free(self->id_field);
94
95
  if (self->def_field != ((char *)ID_STRING)) free(self->def_field);
@@ -106,13 +107,19 @@ void index_flush(Index *self)
106
107
  }
107
108
  self->has_writes = false;
108
109
  }
110
+
109
111
  inline void ensure_writer_open(Index *self)
110
112
  {
111
113
  if (!self->iw) {
112
114
  INDEX_CLOSE_READER(self);
113
- self->iw = iw_open(self->store, self->analyzer, false, false, false);
115
+
116
+ /* make sure the analzyer isn't deleted by the IndexWriter */
117
+ ref(self->analyzer);
118
+ self->iw = iw_open(self->store, self->analyzer, false);
114
119
  self->iw->use_compound_file = self->use_compound_file;
115
- } else {
120
+ } else if (self->analyzer != self->iw->analyzer) {
121
+ a_deref(self->iw->analyzer);
122
+ ref(self->analyzer);
116
123
  self->iw->analyzer = self->analyzer; /* in case it has changed */
117
124
  }
118
125
  }
@@ -122,14 +129,14 @@ inline void ensure_reader_open(Index *self)
122
129
  if (self->ir) {
123
130
  if (self->check_latest && !ir_is_latest(self->ir)) {
124
131
  INDEX_CLOSE_READER(self);
125
- self->ir = ir_open(self->store, false);
132
+ self->ir = ir_open(self->store);
126
133
  }
127
134
  } else {
128
135
  if (self->iw) {
129
136
  iw_close(self->iw);
130
137
  self->iw = NULL;
131
138
  }
132
- self->ir = ir_open(self->store, false);
139
+ self->ir = ir_open(self->store);
133
140
  }
134
141
  }
135
142
 
@@ -203,7 +210,7 @@ static void inline index_add_doc_i(Index *self, Document *doc)
203
210
  } else if (td->total_hits == 1) {
204
211
  ir_delete_doc(self->ir, td->hits[0]->doc);
205
212
  }
206
- q->destroy(q);
213
+ q_deref(q);
207
214
  td_destroy(td);
208
215
  }
209
216
  ensure_writer_open(self);
@@ -215,11 +222,16 @@ void index_add_doc_a(Index *self, Document *doc, Analyzer *analyzer)
215
222
  {
216
223
  Analyzer *tmp_analyzer;
217
224
  mutex_lock(&self->store->ext_mutex);
218
- tmp_analyzer = self->analyzer;
219
- self->analyzer = analyzer;
220
- iw_add_doc(self->iw, doc);
221
- index_add_doc_i(self, doc);
222
- self->analyzer = tmp_analyzer;
225
+ if (analyzer != self->analyzer) {
226
+ ref(analyzer);
227
+ tmp_analyzer = self->analyzer;
228
+ self->analyzer = analyzer;
229
+ index_add_doc_i(self, doc);
230
+ self->analyzer = tmp_analyzer;
231
+ a_deref(analyzer);
232
+ } else {
233
+ index_add_doc_i(self, doc);
234
+ }
223
235
  mutex_unlock(&self->store->ext_mutex);
224
236
  }
225
237
 
@@ -272,7 +284,7 @@ TopDocs *index_search_str(Index *self, char *qstr, int first_doc,
272
284
  TopDocs *td;
273
285
  query = index_get_query(self, qstr); /* will ensure_searcher is open */
274
286
  td = sea_search(self->sea, query, first_doc, num_docs, filter, sort);
275
- query->destroy(query);
287
+ q_deref(query);
276
288
  return td;
277
289
  }
278
290
 
@@ -363,7 +375,7 @@ void index_delete_id(Index *self, char *id)
363
375
  index_delete_term(self, &t);
364
376
  }
365
377
 
366
- static void index_qdel_i(Searcher *sea, int doc_num, void *arg)
378
+ static void index_qdel_i(Searcher *sea, int doc_num, float score, void *arg)
367
379
  {
368
380
  ir_delete_doc(sea->ir, doc_num);
369
381
  }
@@ -381,7 +393,7 @@ void index_delete_query_str(Index *self, char *qstr, Filter *f)
381
393
  {
382
394
  Query *q = index_get_query(self, qstr);
383
395
  index_delete_query(self, q, f);
384
- q->destroy(q);
396
+ q_deref(q);
385
397
  }
386
398
 
387
399
  Explanation *index_explain(Index *self, Query *q, int doc_num)
data/ext/index.h CHANGED
@@ -15,6 +15,7 @@
15
15
 
16
16
 
17
17
  #define SEGMENT_NAME_MAX_LENGTH 100
18
+ #define NOT_A_FIELD 0xFFFFFFFF
18
19
 
19
20
  typedef struct Config {
20
21
  int merge_factor;
@@ -75,7 +76,7 @@ FieldInfo *fi_create(char *name,
75
76
  bool store_pos,
76
77
  bool store_offset,
77
78
  bool omit_norms);
78
- void fi_destroy(void *p);
79
+ void fi_destroy(FieldInfo *fi);
79
80
 
80
81
  /****************************************************************************
81
82
  *
@@ -91,7 +92,7 @@ typedef struct FieldInfos {
91
92
 
92
93
  FieldInfos *fis_create();
93
94
  FieldInfos *fis_open(Store *store, char *filename);
94
- void fis_destroy(void *p);
95
+ void fis_destroy(FieldInfos *fis);
95
96
  FieldInfo *fis_add(FieldInfos *fis,
96
97
  char *name,
97
98
  bool is_indexed,
@@ -111,8 +112,9 @@ bool fis_has_vectors(FieldInfos *fis);
111
112
  void fis_write(FieldInfos *fis, Store *store, char *segment, char *ext);
112
113
  FieldInfos *fis_read(FieldInfos *fis, InStream *is);
113
114
  FieldInfos *fis_add_doc(FieldInfos *fis, Document *doc);
114
- unsigned long long fis_get_number(FieldInfos *fis, char *name);
115
+ ullong fis_get_number(FieldInfos *fis, char *name);
115
116
  FieldInfo *fis_get_fi(FieldInfos *fis, char *name);
117
+ bool fis_reorder_required(FieldInfos *fis, Document *doc);
116
118
 
117
119
  /****************************************************************************
118
120
  *
@@ -126,7 +128,7 @@ typedef struct TermBuffer {
126
128
  } TermBuffer;
127
129
 
128
130
  TermBuffer *tb_create();
129
- void tb_destroy(void *p);
131
+ void tb_destroy(TermBuffer *tb);
130
132
  TermBuffer *tb_set_term(TermBuffer *tb, Term *t);
131
133
  Term *tb_get_term(TermBuffer *tb);
132
134
  int tb_cmp(TermBuffer *tb1, TermBuffer *tb2);
@@ -149,7 +151,7 @@ typedef struct TermInfo {
149
151
 
150
152
  TermInfo *ti_create(int doc_freq, int freq_pointer, int prox_pointer, int skip_offset);
151
153
  TermInfo *ti_set(TermInfo *ti, int df, int fp, int pp, int so);
152
- void ti_destroy(void *p);
154
+ void ti_destroy(TermInfo *ti);
153
155
  TermInfo *ti_cpy(TermInfo *ti1, TermInfo *ti2);
154
156
  TermInfo *ti_clone(TermInfo *other);
155
157
  int ti_eq(TermInfo *ti1, TermInfo *ti2);
@@ -226,7 +228,7 @@ typedef struct TermInfosWriter {
226
228
  TermInfo *last_term_info;
227
229
  FieldInfos *fis;
228
230
  char *curr_field;
229
- int curr_field_num;
231
+ ullong curr_field_num;
230
232
  } TermInfosWriter;
231
233
 
232
234
  TermInfosWriter *tiw_open(Store *store,
@@ -332,7 +334,7 @@ TermVector *tv_create(const char *field,
332
334
  int *freqs,
333
335
  int **positions,
334
336
  TVOffsetInfo ***offsets);
335
- void tv_destroy(void *p);
337
+ void tv_destroy(TermVector *tv);
336
338
 
337
339
  /****************************************************************************
338
340
  *
@@ -441,11 +443,12 @@ struct TermDocEnum {
441
443
  /* * SegmentTermDocEnum * */
442
444
 
443
445
  typedef struct SegmentTermDocEnum SegmentTermDocEnum;
446
+
444
447
  struct SegmentTermDocEnum {
445
448
  SegmentReader *parent;
446
449
  InStream *freq_in;
447
- int count; // the number of docs for this term that we have skipped
448
- int doc_freq; // the number of doc this term appears in
450
+ int count; /* number of docs for this term skipped */
451
+ int doc_freq; /* number of doc this term appears in */
449
452
  BitVector *deleted_docs;
450
453
  int doc_num;
451
454
  int freq;
@@ -538,8 +541,8 @@ typedef struct Posting {
538
541
  } Posting;
539
542
 
540
543
  Posting *p_create(Term *term, int position, TVOffsetInfo *offset);
541
- void p_destroy(void *p);
542
- void p_add_occurance(Posting *p, int position, TVOffsetInfo *offset);
544
+ void p_destroy(Posting *self);
545
+ void p_add_occurance(Posting *self, int position, TVOffsetInfo *offset);
543
546
 
544
547
 
545
548
  /****************************************************************************
@@ -581,7 +584,7 @@ typedef struct SegmentInfo {
581
584
  } SegmentInfo;
582
585
 
583
586
  SegmentInfo *si_create(char *name, int doc_cnt, Store *store);
584
- void si_destroy(void *p);
587
+ void si_destroy(SegmentInfo *si);
585
588
  bool si_has_deletions(SegmentInfo *si);
586
589
  bool si_uses_compound_file(SegmentInfo *si);
587
590
  bool si_has_separate_norms(SegmentInfo *si);
@@ -598,12 +601,12 @@ typedef struct SegmentInfos {
598
601
  int scnt;
599
602
  int size;
600
603
  int counter;
601
- unsigned int version;
604
+ int version;
602
605
  int format;
603
606
  } SegmentInfos;
604
607
 
605
608
  SegmentInfos *sis_create();
606
- void sis_destroy(void *p);
609
+ void sis_destroy(SegmentInfos *sis);
607
610
  void sis_add_si(SegmentInfos *sis, SegmentInfo *si);
608
611
  void sis_del_at(SegmentInfos *sis, int at);
609
612
  void sis_del_from_to(SegmentInfos *sis, int from, int to);
@@ -619,24 +622,24 @@ int sis_read_current_version(Store *store);
619
622
  ****************************************************************************/
620
623
 
621
624
  enum FIELD_TYPE {
622
- // all fields
625
+ /* all fields */
623
626
  IR_ALL,
624
- // all indexed fields
627
+ /* all indexed fields */
625
628
  IR_INDEXED,
626
- // all fields which are not indexed
629
+ /* all fields which are not indexed */
627
630
  IR_UNINDEXED,
628
- // all fields which are indexed with termvectors enables
631
+ /* all fields which are indexed with termvectors enables */
629
632
  IR_INDEXED_WITH_TERM_VECTOR,
630
- // all fields which are indexed but don't have termvectors enabled
633
+ /* all fields which are indexed but don't have termvectors enabled */
631
634
  IR_INDEXED_NO_TERM_VECTOR,
632
- // all fields where termvectors are enabled. Please note that only standard
633
- // termvector fields are returned
635
+ /* all fields where termvectors are enabled. Please note that only standard */
636
+ /* termvector fields are returned */
634
637
  IR_TERM_VECTOR,
635
- // all field with termvectors wiht positions enabled
638
+ /* all field with termvectors wiht positions enabled */
636
639
  IR_TERM_VECTOR_WITH_POSITION,
637
- // all fields where termvectors with offset position are set
640
+ /* all fields where termvectors with offset position are set */
638
641
  IR_TERM_VECTOR_WITH_OFFSET,
639
- // all fields where termvectors with offset and position values set
642
+ /* all fields where termvectors with offset and position values set */
640
643
  IR_TERM_VECTOR_WITH_POSITION_OFFSET
641
644
  };
642
645
 
@@ -651,7 +654,6 @@ struct IndexReader {
651
654
  bool has_changes : 1;
652
655
  bool is_stale : 1;
653
656
  bool is_owner : 1;
654
- bool close_store : 1;
655
657
  TermVector *(*get_term_vector)(IndexReader *ir, int doc_num, char *field);
656
658
  Array *(*get_term_vectors)(IndexReader *ir, int doc_num);
657
659
  int (*num_docs)(IndexReader *ir);
@@ -659,8 +661,10 @@ struct IndexReader {
659
661
  Document *(*get_doc)(IndexReader *ir, int doc_num);
660
662
  uchar *(*get_norms)(IndexReader *ir, char *field);
661
663
  uchar *(*get_norms_always)(IndexReader *ir, char *field);
662
- void (*do_set_norm)(IndexReader *ir, int doc_num, char *field, uchar val);
663
- void (*get_norms_into)(IndexReader *ir, char *field, uchar *buf, int offset);
664
+ void (*do_set_norm)(IndexReader *ir, int doc_num, char *field,
665
+ uchar val);
666
+ void (*get_norms_into)(IndexReader *ir, char *field, uchar *buf,
667
+ int offset);
664
668
  TermEnum *(*terms)(IndexReader *ir);
665
669
  TermEnum *(*terms_from)(IndexReader *ir, Term *term);
666
670
  int (*doc_freq)(IndexReader *ir, Term *t);
@@ -675,17 +679,19 @@ struct IndexReader {
675
679
  void (*do_commit)(IndexReader *ir);
676
680
  void (*do_close)(IndexReader *ir);
677
681
  void (*acquire_write_lock)(IndexReader *ir);
682
+ int (*write_fields_i)(IndexReader *ir, OutStream *fdt_out,
683
+ OutStream *fdx_out);
678
684
  };
679
685
 
680
- IndexReader *ir_create(Store *store, SegmentInfos *sis, int is_owner, int close_store);
681
- IndexReader *ir_open(Store *store, int close_store);
686
+ IndexReader *ir_create(Store *store, SegmentInfos *sis, int is_owner);
687
+ IndexReader *ir_open(Store *store);
682
688
  bool ir_index_exists(Store *store);
683
689
  void ir_close(IndexReader *ir);
684
690
  void ir_commit(IndexReader *ir);
685
691
  void ir_delete_doc(IndexReader *ir, int doc_num);
686
692
  void ir_undelete_all(IndexReader *ir);
687
693
  void ir_set_norm(IndexReader *ir, int doc_num, char *field, uchar val);
688
- void ir_destroy(void *p);
694
+ void ir_destroy(IndexReader *self);
689
695
  Document *ir_get_doc_with_term(IndexReader *ir, Term *term);
690
696
  TermDocEnum *ir_term_docs_for(IndexReader *ir, Term *term);
691
697
  TermDocEnum *ir_term_positions_for(IndexReader *ir, Term *term);
@@ -730,9 +736,8 @@ struct SegmentReader {
730
736
  uchar *fake_norms;
731
737
  };
732
738
 
733
- IndexReader *sr_open(SegmentInfos *sis, int si_num, int is_owner, int close_store);
739
+ IndexReader *sr_open(SegmentInfos *sis, int si_num, bool is_owner);
734
740
  IndexReader *sr_open_si(SegmentInfo *si);
735
- //int sr_has_deletions(IndexReader *ir);
736
741
 
737
742
  /****************************************************************************
738
743
  *
@@ -751,7 +756,7 @@ typedef struct MultiReader {
751
756
  } MultiReader;
752
757
 
753
758
  IndexReader *mr_open(Store *store, SegmentInfos *sis, IndexReader **readers,
754
- int rcnt, int close_store);
759
+ int rcnt);
755
760
 
756
761
  /****************************************************************************
757
762
  *
@@ -769,9 +774,9 @@ typedef struct SegmentMergeInfo {
769
774
  } SegmentMergeInfo;
770
775
 
771
776
  SegmentMergeInfo *smi_create(int base, TermEnum *te, IndexReader *ir);
772
- void smi_destroy(void *p);
777
+ void smi_destroy(SegmentMergeInfo *smi);
773
778
  TermBuffer *smi_next(SegmentMergeInfo *smi);
774
- bool smi_lt(void *p1, void *p2);
779
+ bool smi_lt(SegmentMergeInfo *smi1, SegmentMergeInfo *smi2);
775
780
 
776
781
  /****************************************************************************
777
782
  *
@@ -801,7 +806,7 @@ typedef struct SegmentMerger {
801
806
  } SegmentMerger;
802
807
 
803
808
  SegmentMerger *sm_create(Store *store, char *name, int term_index_interval);
804
- void sm_destroy(void *p);
809
+ void sm_destroy(SegmentMerger *sm);
805
810
  void sm_add(SegmentMerger *sm, IndexReader *ir);
806
811
  int sm_merge(SegmentMerger *sm);
807
812
  Array *sm_create_compound_file(SegmentMerger *sm, char *fname);
@@ -817,6 +822,8 @@ Array *sm_create_compound_file(SegmentMerger *sm, char *fname);
817
822
  #define COMMIT_LOCK_NAME "commit"
818
823
  struct IndexWriter {
819
824
  mutex_t mutex;
825
+ HshTable *postings;
826
+ FieldInfos *fis;
820
827
  int merge_factor;
821
828
  int min_merge_docs;
822
829
  int max_merge_docs;
@@ -828,13 +835,11 @@ struct IndexWriter {
828
835
  SegmentInfos *sis;
829
836
  Store *ram_store;
830
837
  Lock *write_lock;
831
- bool close_store : 1;
832
- bool close_analyzer : 1;
833
838
  bool use_compound_file : 1;
834
839
  };
835
840
 
836
841
  IndexWriter *iw_open(Store *store, Analyzer *analyzer,
837
- bool create, bool close_store, bool close_analyzer);
842
+ bool create);
838
843
  void iw_flush_ram_segments(IndexWriter *iw);
839
844
  void iw_close(IndexWriter *iw);
840
845
  int iw_doc_count(IndexWriter *iw);
data/ext/index_io.c CHANGED
@@ -56,15 +56,17 @@ void os_seek(OutStream *os, int new_pos)
56
56
 
57
57
  inline void os_write_byte(OutStream *os, uchar b)
58
58
  {
59
- if (os->buf.pos >= BUFFER_SIZE)
59
+ if (os->buf.pos >= BUFFER_SIZE) {
60
60
  os_flush(os);
61
+ }
61
62
  write_byte(os, b);
62
63
  }
63
64
 
64
65
  void os_write_bytes(OutStream *os, uchar *b, int len)
65
66
  {
66
- if (os->buf.pos > 0) // flush buffer
67
+ if (os->buf.pos > 0) { /* flush buffer */
67
68
  os_flush(os);
69
+ }
68
70
 
69
71
  if (len < BUFFER_SIZE) {
70
72
  os->flush_internal(os, b, len);
@@ -99,8 +101,9 @@ void is_refill(InStream *is)
99
101
  int start = is->buf.start + is->buf.pos;
100
102
  int last = start + BUFFER_SIZE;
101
103
  int flen = is->length_internal(is);
102
- if (last > flen) // don't read past EOF
104
+ if (last > flen) { /* don't read past EOF */
103
105
  last = flen;
106
+ }
104
107
 
105
108
  is->buf.len = last - start;
106
109
  if (is->buf.len <= 0) {
@@ -116,8 +119,9 @@ void is_refill(InStream *is)
116
119
  #define read_byte(is) is->buf.buf[is->buf.pos++]
117
120
  inline uchar is_read_byte(InStream *is)
118
121
  {
119
- if (is->buf.pos >= is->buf.len)
122
+ if (is->buf.pos >= is->buf.len) {
120
123
  is_refill(is);
124
+ }
121
125
 
122
126
  return read_byte(is);
123
127
  }
@@ -182,17 +186,17 @@ is_read_int(InStream *is)
182
186
  (int)is_read_byte(is);
183
187
  }
184
188
 
185
- long long
189
+ llong
186
190
  is_read_long(InStream *is)
187
191
  {
188
- return ((long long)is_read_byte(is) << 56) |
189
- ((long long)is_read_byte(is) << 48) |
190
- ((long long)is_read_byte(is) << 40) |
191
- ((long long)is_read_byte(is) << 32) |
192
- ((long long)is_read_byte(is) << 24) |
193
- ((long long)is_read_byte(is) << 16) |
194
- ((long long)is_read_byte(is) << 8) |
195
- (long long)is_read_byte(is);
192
+ return ((llong)is_read_byte(is) << 56) |
193
+ ((llong)is_read_byte(is) << 48) |
194
+ ((llong)is_read_byte(is) << 40) |
195
+ ((llong)is_read_byte(is) << 32) |
196
+ ((llong)is_read_byte(is) << 24) |
197
+ ((llong)is_read_byte(is) << 16) |
198
+ ((llong)is_read_byte(is) << 8) |
199
+ (llong)is_read_byte(is);
196
200
  }
197
201
 
198
202
  unsigned int
@@ -204,24 +208,24 @@ is_read_uint(InStream *is)
204
208
  (unsigned int)is_read_byte(is);
205
209
  }
206
210
 
207
- unsigned long long
211
+ ullong
208
212
  is_read_ulong(InStream *is)
209
213
  {
210
- return ((unsigned long long)is_read_byte(is) << 56) |
211
- ((unsigned long long)is_read_byte(is) << 48) |
212
- ((unsigned long long)is_read_byte(is) << 40) |
213
- ((unsigned long long)is_read_byte(is) << 32) |
214
- ((unsigned long long)is_read_byte(is) << 24) |
215
- ((unsigned long long)is_read_byte(is) << 16) |
216
- ((unsigned long long)is_read_byte(is) << 8) |
217
- (unsigned long long)is_read_byte(is);
214
+ return ((ullong)is_read_byte(is) << 56) |
215
+ ((ullong)is_read_byte(is) << 48) |
216
+ ((ullong)is_read_byte(is) << 40) |
217
+ ((ullong)is_read_byte(is) << 32) |
218
+ ((ullong)is_read_byte(is) << 24) |
219
+ ((ullong)is_read_byte(is) << 16) |
220
+ ((ullong)is_read_byte(is) << 8) |
221
+ (ullong)is_read_byte(is);
218
222
  }
219
223
 
220
224
  /* optimized to use unchecked read_byte if there is definitely space */
221
- inline unsigned long long
225
+ inline ullong
222
226
  is_read_vint(InStream *is)
223
227
  {
224
- register unsigned long long res, b;
228
+ register ullong res, b;
225
229
  register int shift = 7;
226
230
 
227
231
  if (is->buf.pos > (is->buf.len - VINT_MAX_LEN)) {
@@ -283,63 +287,63 @@ is_read_string(InStream *is)
283
287
  void
284
288
  os_write_int(OutStream *os, int l)
285
289
  {
286
- os_write_byte(os, (l >> 24) & 0xFF);
287
- os_write_byte(os, (l >> 16) & 0xFF);
288
- os_write_byte(os, (l >> 8) & 0xFF);
289
- os_write_byte(os, l & 0xFF);
290
+ os_write_byte(os, (uchar)((l >> 24) & 0xFF));
291
+ os_write_byte(os, (uchar)((l >> 16) & 0xFF));
292
+ os_write_byte(os, (uchar)((l >> 8) & 0xFF));
293
+ os_write_byte(os, (uchar)(l & 0xFF));
290
294
  }
291
295
 
292
296
  void
293
- os_write_long(OutStream *os, long long l)
297
+ os_write_long(OutStream *os, llong l)
294
298
  {
295
- os_write_byte(os, (l >> 56) & 0xFF);
296
- os_write_byte(os, (l >> 48) & 0xFF);
297
- os_write_byte(os, (l >> 40) & 0xFF);
298
- os_write_byte(os, (l >> 32) & 0xFF);
299
- os_write_byte(os, (l >> 24) & 0xFF);
300
- os_write_byte(os, (l >> 16) & 0xFF);
301
- os_write_byte(os, (l >> 8) & 0xFF);
302
- os_write_byte(os, l & 0xFF);
299
+ os_write_byte(os, (uchar)((l >> 56) & 0xFF));
300
+ os_write_byte(os, (uchar)((l >> 48) & 0xFF));
301
+ os_write_byte(os, (uchar)((l >> 40) & 0xFF));
302
+ os_write_byte(os, (uchar)((l >> 32) & 0xFF));
303
+ os_write_byte(os, (uchar)((l >> 24) & 0xFF));
304
+ os_write_byte(os, (uchar)((l >> 16) & 0xFF));
305
+ os_write_byte(os, (uchar)((l >> 8) & 0xFF));
306
+ os_write_byte(os, (uchar)(l & 0xFF));
303
307
  }
304
308
 
305
309
  void
306
310
  os_write_uint(OutStream *os, unsigned int l)
307
311
  {
308
- os_write_byte(os, (l >> 24) & 0xFF);
309
- os_write_byte(os, (l >> 16) & 0xFF);
310
- os_write_byte(os, (l >> 8) & 0xFF);
311
- os_write_byte(os, l & 0xFF);
312
+ os_write_byte(os, (uchar)((l >> 24) & 0xFF));
313
+ os_write_byte(os, (uchar)((l >> 16) & 0xFF));
314
+ os_write_byte(os, (uchar)((l >> 8) & 0xFF));
315
+ os_write_byte(os, (uchar)(l & 0xFF));
312
316
  }
313
317
 
314
318
  void
315
- os_write_ulong(OutStream *os, unsigned long long l)
319
+ os_write_ulong(OutStream *os, ullong l)
316
320
  {
317
- os_write_byte(os, (l >> 56) & 0xFF);
318
- os_write_byte(os, (l >> 48) & 0xFF);
319
- os_write_byte(os, (l >> 40) & 0xFF);
320
- os_write_byte(os, (l >> 32) & 0xFF);
321
- os_write_byte(os, (l >> 24) & 0xFF);
322
- os_write_byte(os, (l >> 16) & 0xFF);
323
- os_write_byte(os, (l >> 8) & 0xFF);
324
- os_write_byte(os, l & 0xFF);
321
+ os_write_byte(os, (uchar)((l >> 56) & 0xFF));
322
+ os_write_byte(os, (uchar)((l >> 48) & 0xFF));
323
+ os_write_byte(os, (uchar)((l >> 40) & 0xFF));
324
+ os_write_byte(os, (uchar)((l >> 32) & 0xFF));
325
+ os_write_byte(os, (uchar)((l >> 24) & 0xFF));
326
+ os_write_byte(os, (uchar)((l >> 16) & 0xFF));
327
+ os_write_byte(os, (uchar)((l >> 8) & 0xFF));
328
+ os_write_byte(os, (uchar)(l & 0xFF));
325
329
  }
326
330
 
327
331
  /* optimized to use an unchecked write if there is space */
328
332
  inline void
329
- os_write_vint(OutStream *os, register unsigned long long i)
333
+ os_write_vint(OutStream *os, register ullong i)
330
334
  {
331
335
  if (os->buf.pos > VINT_END) {
332
336
  while (i > 127) {
333
- os_write_byte(os, (i & 0x7f) | 0x80);
337
+ os_write_byte(os, (uchar)((i & 0x7f) | 0x80));
334
338
  i >>= 7;
335
339
  }
336
- os_write_byte(os, i);
340
+ os_write_byte(os, (uchar)(i));
337
341
  } else {
338
342
  while (i > 127) {
339
- write_byte(os, (i & 0x7f) | 0x80);
343
+ write_byte(os, (uchar)((i & 0x7f) | 0x80));
340
344
  i >>= 7;
341
345
  }
342
- write_byte(os, i);
346
+ write_byte(os, (uchar)(i));
343
347
  }
344
348
  }
345
349
 
@@ -356,7 +360,7 @@ os_write_chars(OutStream *os, char *buf, int start, int length)
356
360
  void
357
361
  os_write_string(OutStream *os, char *str)
358
362
  {
359
- int len = strlen(str);
363
+ int len = (int)strlen(str);
360
364
  os_write_vint(os, len);
361
365
 
362
366
  os_write_chars(os, str, 0, len);
@@ -364,6 +368,6 @@ os_write_string(OutStream *os, char *str)
364
368
 
365
369
  int file_is_lock(char *filename)
366
370
  {
367
- int start = strlen(filename) - 4;
371
+ int start = (int)strlen(filename) - 4;
368
372
  return ((start > 0) && (strcmp(".lck", &filename[start]) == 0));
369
373
  }