ferret 0.9.1 → 0.9.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README +6 -5
- data/Rakefile +34 -13
- data/TODO +1 -0
- data/TUTORIAL +1 -1
- data/ext/analysis.c +87 -70
- data/ext/analysis.h +18 -6
- data/ext/array.c +1 -2
- data/ext/array.h +1 -1
- data/ext/bitvector.c +10 -6
- data/ext/bitvector.h +2 -2
- data/ext/compound_io.c +30 -27
- data/ext/document.c +15 -15
- data/ext/document.h +5 -5
- data/ext/except.c +2 -0
- data/ext/except.h +25 -23
- data/ext/extconf.rb +1 -0
- data/ext/ferret.c +10 -8
- data/ext/ferret.h +9 -8
- data/ext/field.c +29 -25
- data/ext/filter.c +52 -14
- data/ext/frtio.h +13 -0
- data/ext/fs_store.c +115 -170
- data/ext/global.c +9 -8
- data/ext/global.h +17 -13
- data/ext/hash.c +13 -19
- data/ext/hash.h +11 -11
- data/ext/hashset.c +5 -7
- data/ext/hashset.h +9 -8
- data/ext/helper.c +1 -1
- data/ext/helper.h +2 -1
- data/ext/inc/except.h +25 -23
- data/ext/inc/lang.h +11 -1
- data/ext/ind.c +33 -21
- data/ext/index.h +44 -39
- data/ext/index_io.c +61 -57
- data/ext/index_rw.c +418 -361
- data/ext/lang.c +10 -0
- data/ext/lang.h +11 -1
- data/ext/nix_io.c +135 -0
- data/ext/priorityqueue.c +16 -16
- data/ext/priorityqueue.h +9 -6
- data/ext/q_boolean.c +128 -76
- data/ext/q_const_score.c +20 -20
- data/ext/q_filtered_query.c +20 -20
- data/ext/q_fuzzy.c +37 -23
- data/ext/q_match_all.c +15 -19
- data/ext/q_multi_phrase.c +87 -46
- data/ext/q_parser.c +247 -119
- data/ext/q_phrase.c +86 -52
- data/ext/q_prefix.c +25 -14
- data/ext/q_range.c +59 -14
- data/ext/q_span.c +263 -172
- data/ext/q_term.c +62 -51
- data/ext/q_wildcard.c +24 -13
- data/ext/r_analysis.c +328 -80
- data/ext/r_doc.c +11 -6
- data/ext/r_index_io.c +40 -32
- data/ext/r_qparser.c +15 -14
- data/ext/r_search.c +270 -152
- data/ext/r_store.c +32 -17
- data/ext/ram_store.c +38 -22
- data/ext/search.c +617 -87
- data/ext/search.h +227 -163
- data/ext/similarity.c +54 -45
- data/ext/similarity.h +3 -3
- data/ext/sort.c +132 -53
- data/ext/store.c +21 -2
- data/ext/store.h +14 -14
- data/ext/tags +4322 -232
- data/ext/term.c +140 -109
- data/ext/termdocs.c +74 -60
- data/ext/vector.c +181 -152
- data/ext/w32_io.c +150 -0
- data/lib/ferret.rb +1 -1
- data/lib/ferret/analysis/standard_tokenizer.rb +4 -3
- data/lib/ferret/document/field.rb +1 -1
- data/lib/ferret/index/field_infos.rb +1 -1
- data/lib/ferret/index/term.rb +1 -1
- data/lib/ferret/query_parser/query_parser.tab.rb +8 -24
- data/lib/ferret/search.rb +1 -0
- data/lib/ferret/search/boolean_query.rb +0 -4
- data/lib/ferret/search/index_searcher.rb +21 -8
- data/lib/ferret/search/multi_phrase_query.rb +7 -0
- data/lib/ferret/search/multi_searcher.rb +261 -0
- data/lib/ferret/search/phrase_query.rb +1 -1
- data/lib/ferret/search/query.rb +34 -5
- data/lib/ferret/search/sort.rb +7 -3
- data/lib/ferret/search/sort_field.rb +8 -4
- data/lib/ferret/store/fs_store.rb +13 -6
- data/lib/ferret/store/index_io.rb +0 -14
- data/lib/ferret/store/ram_store.rb +3 -2
- data/lib/rferret.rb +1 -1
- data/test/unit/analysis/ctc_analyzer.rb +131 -0
- data/test/unit/analysis/ctc_tokenstream.rb +98 -9
- data/test/unit/index/tc_index.rb +40 -1
- data/test/unit/index/tc_term.rb +7 -0
- data/test/unit/index/th_doc.rb +8 -0
- data/test/unit/query_parser/tc_query_parser.rb +6 -4
- data/test/unit/search/rtc_sort_field.rb +6 -6
- data/test/unit/search/tc_index_searcher.rb +8 -0
- data/test/unit/search/tc_multi_searcher.rb +275 -0
- data/test/unit/search/tc_multi_searcher2.rb +126 -0
- data/test/unit/search/tc_search_and_sort.rb +66 -0
- metadata +31 -26
- data/test/unit/query_parser/rtc_query_parser.rb +0 -138
data/ext/ind.c
CHANGED
@@ -1,5 +1,6 @@
|
|
1
|
-
#include <string.h>
|
2
1
|
#include "search.h"
|
2
|
+
#include <string.h>
|
3
|
+
|
3
4
|
|
4
5
|
static char * const NON_UNIQUE_KEY_ERROR_MSG = "Tried to use a key that was not unique";
|
5
6
|
|
@@ -45,23 +46,22 @@ Index *index_create(Store *store, Analyzer *analyzer, HashSet *def_fields,
|
|
45
46
|
self->has_writes = false;
|
46
47
|
if (store) {
|
47
48
|
self->store = store;
|
48
|
-
|
49
|
+
ref(store);
|
49
50
|
} else {
|
50
51
|
self->store = open_ram_store();
|
51
52
|
create = true;
|
52
|
-
self->close_store = true;
|
53
53
|
}
|
54
54
|
if (analyzer) {
|
55
55
|
self->analyzer = analyzer;
|
56
|
-
|
56
|
+
ref(analyzer);
|
57
57
|
} else {
|
58
58
|
self->analyzer = mb_standard_analyzer_create(true);
|
59
|
-
self->close_analyzer = true;
|
60
59
|
}
|
61
60
|
self->use_compound_file = true;
|
62
61
|
|
63
62
|
if (create) {
|
64
|
-
|
63
|
+
ref(self->analyzer);
|
64
|
+
self->iw = iw_open(self->store, self->analyzer, create);
|
65
65
|
iw_close(self->iw);
|
66
66
|
self->iw = NULL;
|
67
67
|
}
|
@@ -73,6 +73,7 @@ Index *index_create(Store *store, Analyzer *analyzer, HashSet *def_fields,
|
|
73
73
|
self->auto_flush = false;
|
74
74
|
self->check_latest = true;
|
75
75
|
|
76
|
+
ref(self->analyzer);
|
76
77
|
self->qp = qp_create(all_fields, def_fields, self->analyzer);
|
77
78
|
/* Index is a convenience class so set qp convenience options */
|
78
79
|
self->qp->allow_any_fields = true;
|
@@ -87,8 +88,8 @@ void index_destroy(Index *self)
|
|
87
88
|
mutex_destroy(&self->mutex);
|
88
89
|
INDEX_CLOSE_READER(self);
|
89
90
|
if (self->iw) iw_close(self->iw);
|
90
|
-
|
91
|
-
|
91
|
+
store_deref(self->store);
|
92
|
+
a_deref(self->analyzer);
|
92
93
|
if (self->qp) qp_destroy(self->qp);
|
93
94
|
if (self->id_field != ((char *)ID_STRING)) free(self->id_field);
|
94
95
|
if (self->def_field != ((char *)ID_STRING)) free(self->def_field);
|
@@ -106,13 +107,19 @@ void index_flush(Index *self)
|
|
106
107
|
}
|
107
108
|
self->has_writes = false;
|
108
109
|
}
|
110
|
+
|
109
111
|
inline void ensure_writer_open(Index *self)
|
110
112
|
{
|
111
113
|
if (!self->iw) {
|
112
114
|
INDEX_CLOSE_READER(self);
|
113
|
-
|
115
|
+
|
116
|
+
/* make sure the analzyer isn't deleted by the IndexWriter */
|
117
|
+
ref(self->analyzer);
|
118
|
+
self->iw = iw_open(self->store, self->analyzer, false);
|
114
119
|
self->iw->use_compound_file = self->use_compound_file;
|
115
|
-
} else {
|
120
|
+
} else if (self->analyzer != self->iw->analyzer) {
|
121
|
+
a_deref(self->iw->analyzer);
|
122
|
+
ref(self->analyzer);
|
116
123
|
self->iw->analyzer = self->analyzer; /* in case it has changed */
|
117
124
|
}
|
118
125
|
}
|
@@ -122,14 +129,14 @@ inline void ensure_reader_open(Index *self)
|
|
122
129
|
if (self->ir) {
|
123
130
|
if (self->check_latest && !ir_is_latest(self->ir)) {
|
124
131
|
INDEX_CLOSE_READER(self);
|
125
|
-
self->ir = ir_open(self->store
|
132
|
+
self->ir = ir_open(self->store);
|
126
133
|
}
|
127
134
|
} else {
|
128
135
|
if (self->iw) {
|
129
136
|
iw_close(self->iw);
|
130
137
|
self->iw = NULL;
|
131
138
|
}
|
132
|
-
self->ir = ir_open(self->store
|
139
|
+
self->ir = ir_open(self->store);
|
133
140
|
}
|
134
141
|
}
|
135
142
|
|
@@ -203,7 +210,7 @@ static void inline index_add_doc_i(Index *self, Document *doc)
|
|
203
210
|
} else if (td->total_hits == 1) {
|
204
211
|
ir_delete_doc(self->ir, td->hits[0]->doc);
|
205
212
|
}
|
206
|
-
|
213
|
+
q_deref(q);
|
207
214
|
td_destroy(td);
|
208
215
|
}
|
209
216
|
ensure_writer_open(self);
|
@@ -215,11 +222,16 @@ void index_add_doc_a(Index *self, Document *doc, Analyzer *analyzer)
|
|
215
222
|
{
|
216
223
|
Analyzer *tmp_analyzer;
|
217
224
|
mutex_lock(&self->store->ext_mutex);
|
218
|
-
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
225
|
+
if (analyzer != self->analyzer) {
|
226
|
+
ref(analyzer);
|
227
|
+
tmp_analyzer = self->analyzer;
|
228
|
+
self->analyzer = analyzer;
|
229
|
+
index_add_doc_i(self, doc);
|
230
|
+
self->analyzer = tmp_analyzer;
|
231
|
+
a_deref(analyzer);
|
232
|
+
} else {
|
233
|
+
index_add_doc_i(self, doc);
|
234
|
+
}
|
223
235
|
mutex_unlock(&self->store->ext_mutex);
|
224
236
|
}
|
225
237
|
|
@@ -272,7 +284,7 @@ TopDocs *index_search_str(Index *self, char *qstr, int first_doc,
|
|
272
284
|
TopDocs *td;
|
273
285
|
query = index_get_query(self, qstr); /* will ensure_searcher is open */
|
274
286
|
td = sea_search(self->sea, query, first_doc, num_docs, filter, sort);
|
275
|
-
|
287
|
+
q_deref(query);
|
276
288
|
return td;
|
277
289
|
}
|
278
290
|
|
@@ -363,7 +375,7 @@ void index_delete_id(Index *self, char *id)
|
|
363
375
|
index_delete_term(self, &t);
|
364
376
|
}
|
365
377
|
|
366
|
-
static void index_qdel_i(Searcher *sea, int doc_num, void *arg)
|
378
|
+
static void index_qdel_i(Searcher *sea, int doc_num, float score, void *arg)
|
367
379
|
{
|
368
380
|
ir_delete_doc(sea->ir, doc_num);
|
369
381
|
}
|
@@ -381,7 +393,7 @@ void index_delete_query_str(Index *self, char *qstr, Filter *f)
|
|
381
393
|
{
|
382
394
|
Query *q = index_get_query(self, qstr);
|
383
395
|
index_delete_query(self, q, f);
|
384
|
-
|
396
|
+
q_deref(q);
|
385
397
|
}
|
386
398
|
|
387
399
|
Explanation *index_explain(Index *self, Query *q, int doc_num)
|
data/ext/index.h
CHANGED
@@ -15,6 +15,7 @@
|
|
15
15
|
|
16
16
|
|
17
17
|
#define SEGMENT_NAME_MAX_LENGTH 100
|
18
|
+
#define NOT_A_FIELD 0xFFFFFFFF
|
18
19
|
|
19
20
|
typedef struct Config {
|
20
21
|
int merge_factor;
|
@@ -75,7 +76,7 @@ FieldInfo *fi_create(char *name,
|
|
75
76
|
bool store_pos,
|
76
77
|
bool store_offset,
|
77
78
|
bool omit_norms);
|
78
|
-
void fi_destroy(
|
79
|
+
void fi_destroy(FieldInfo *fi);
|
79
80
|
|
80
81
|
/****************************************************************************
|
81
82
|
*
|
@@ -91,7 +92,7 @@ typedef struct FieldInfos {
|
|
91
92
|
|
92
93
|
FieldInfos *fis_create();
|
93
94
|
FieldInfos *fis_open(Store *store, char *filename);
|
94
|
-
void fis_destroy(
|
95
|
+
void fis_destroy(FieldInfos *fis);
|
95
96
|
FieldInfo *fis_add(FieldInfos *fis,
|
96
97
|
char *name,
|
97
98
|
bool is_indexed,
|
@@ -111,8 +112,9 @@ bool fis_has_vectors(FieldInfos *fis);
|
|
111
112
|
void fis_write(FieldInfos *fis, Store *store, char *segment, char *ext);
|
112
113
|
FieldInfos *fis_read(FieldInfos *fis, InStream *is);
|
113
114
|
FieldInfos *fis_add_doc(FieldInfos *fis, Document *doc);
|
114
|
-
|
115
|
+
ullong fis_get_number(FieldInfos *fis, char *name);
|
115
116
|
FieldInfo *fis_get_fi(FieldInfos *fis, char *name);
|
117
|
+
bool fis_reorder_required(FieldInfos *fis, Document *doc);
|
116
118
|
|
117
119
|
/****************************************************************************
|
118
120
|
*
|
@@ -126,7 +128,7 @@ typedef struct TermBuffer {
|
|
126
128
|
} TermBuffer;
|
127
129
|
|
128
130
|
TermBuffer *tb_create();
|
129
|
-
void tb_destroy(
|
131
|
+
void tb_destroy(TermBuffer *tb);
|
130
132
|
TermBuffer *tb_set_term(TermBuffer *tb, Term *t);
|
131
133
|
Term *tb_get_term(TermBuffer *tb);
|
132
134
|
int tb_cmp(TermBuffer *tb1, TermBuffer *tb2);
|
@@ -149,7 +151,7 @@ typedef struct TermInfo {
|
|
149
151
|
|
150
152
|
TermInfo *ti_create(int doc_freq, int freq_pointer, int prox_pointer, int skip_offset);
|
151
153
|
TermInfo *ti_set(TermInfo *ti, int df, int fp, int pp, int so);
|
152
|
-
void ti_destroy(
|
154
|
+
void ti_destroy(TermInfo *ti);
|
153
155
|
TermInfo *ti_cpy(TermInfo *ti1, TermInfo *ti2);
|
154
156
|
TermInfo *ti_clone(TermInfo *other);
|
155
157
|
int ti_eq(TermInfo *ti1, TermInfo *ti2);
|
@@ -226,7 +228,7 @@ typedef struct TermInfosWriter {
|
|
226
228
|
TermInfo *last_term_info;
|
227
229
|
FieldInfos *fis;
|
228
230
|
char *curr_field;
|
229
|
-
|
231
|
+
ullong curr_field_num;
|
230
232
|
} TermInfosWriter;
|
231
233
|
|
232
234
|
TermInfosWriter *tiw_open(Store *store,
|
@@ -332,7 +334,7 @@ TermVector *tv_create(const char *field,
|
|
332
334
|
int *freqs,
|
333
335
|
int **positions,
|
334
336
|
TVOffsetInfo ***offsets);
|
335
|
-
void tv_destroy(
|
337
|
+
void tv_destroy(TermVector *tv);
|
336
338
|
|
337
339
|
/****************************************************************************
|
338
340
|
*
|
@@ -441,11 +443,12 @@ struct TermDocEnum {
|
|
441
443
|
/* * SegmentTermDocEnum * */
|
442
444
|
|
443
445
|
typedef struct SegmentTermDocEnum SegmentTermDocEnum;
|
446
|
+
|
444
447
|
struct SegmentTermDocEnum {
|
445
448
|
SegmentReader *parent;
|
446
449
|
InStream *freq_in;
|
447
|
-
int count;
|
448
|
-
int doc_freq;
|
450
|
+
int count; /* number of docs for this term skipped */
|
451
|
+
int doc_freq; /* number of doc this term appears in */
|
449
452
|
BitVector *deleted_docs;
|
450
453
|
int doc_num;
|
451
454
|
int freq;
|
@@ -538,8 +541,8 @@ typedef struct Posting {
|
|
538
541
|
} Posting;
|
539
542
|
|
540
543
|
Posting *p_create(Term *term, int position, TVOffsetInfo *offset);
|
541
|
-
void p_destroy(
|
542
|
-
void p_add_occurance(Posting *
|
544
|
+
void p_destroy(Posting *self);
|
545
|
+
void p_add_occurance(Posting *self, int position, TVOffsetInfo *offset);
|
543
546
|
|
544
547
|
|
545
548
|
/****************************************************************************
|
@@ -581,7 +584,7 @@ typedef struct SegmentInfo {
|
|
581
584
|
} SegmentInfo;
|
582
585
|
|
583
586
|
SegmentInfo *si_create(char *name, int doc_cnt, Store *store);
|
584
|
-
void si_destroy(
|
587
|
+
void si_destroy(SegmentInfo *si);
|
585
588
|
bool si_has_deletions(SegmentInfo *si);
|
586
589
|
bool si_uses_compound_file(SegmentInfo *si);
|
587
590
|
bool si_has_separate_norms(SegmentInfo *si);
|
@@ -598,12 +601,12 @@ typedef struct SegmentInfos {
|
|
598
601
|
int scnt;
|
599
602
|
int size;
|
600
603
|
int counter;
|
601
|
-
|
604
|
+
int version;
|
602
605
|
int format;
|
603
606
|
} SegmentInfos;
|
604
607
|
|
605
608
|
SegmentInfos *sis_create();
|
606
|
-
void sis_destroy(
|
609
|
+
void sis_destroy(SegmentInfos *sis);
|
607
610
|
void sis_add_si(SegmentInfos *sis, SegmentInfo *si);
|
608
611
|
void sis_del_at(SegmentInfos *sis, int at);
|
609
612
|
void sis_del_from_to(SegmentInfos *sis, int from, int to);
|
@@ -619,24 +622,24 @@ int sis_read_current_version(Store *store);
|
|
619
622
|
****************************************************************************/
|
620
623
|
|
621
624
|
enum FIELD_TYPE {
|
622
|
-
|
625
|
+
/* all fields */
|
623
626
|
IR_ALL,
|
624
|
-
|
627
|
+
/* all indexed fields */
|
625
628
|
IR_INDEXED,
|
626
|
-
|
629
|
+
/* all fields which are not indexed */
|
627
630
|
IR_UNINDEXED,
|
628
|
-
|
631
|
+
/* all fields which are indexed with termvectors enables */
|
629
632
|
IR_INDEXED_WITH_TERM_VECTOR,
|
630
|
-
|
633
|
+
/* all fields which are indexed but don't have termvectors enabled */
|
631
634
|
IR_INDEXED_NO_TERM_VECTOR,
|
632
|
-
|
633
|
-
|
635
|
+
/* all fields where termvectors are enabled. Please note that only standard */
|
636
|
+
/* termvector fields are returned */
|
634
637
|
IR_TERM_VECTOR,
|
635
|
-
|
638
|
+
/* all field with termvectors wiht positions enabled */
|
636
639
|
IR_TERM_VECTOR_WITH_POSITION,
|
637
|
-
|
640
|
+
/* all fields where termvectors with offset position are set */
|
638
641
|
IR_TERM_VECTOR_WITH_OFFSET,
|
639
|
-
|
642
|
+
/* all fields where termvectors with offset and position values set */
|
640
643
|
IR_TERM_VECTOR_WITH_POSITION_OFFSET
|
641
644
|
};
|
642
645
|
|
@@ -651,7 +654,6 @@ struct IndexReader {
|
|
651
654
|
bool has_changes : 1;
|
652
655
|
bool is_stale : 1;
|
653
656
|
bool is_owner : 1;
|
654
|
-
bool close_store : 1;
|
655
657
|
TermVector *(*get_term_vector)(IndexReader *ir, int doc_num, char *field);
|
656
658
|
Array *(*get_term_vectors)(IndexReader *ir, int doc_num);
|
657
659
|
int (*num_docs)(IndexReader *ir);
|
@@ -659,8 +661,10 @@ struct IndexReader {
|
|
659
661
|
Document *(*get_doc)(IndexReader *ir, int doc_num);
|
660
662
|
uchar *(*get_norms)(IndexReader *ir, char *field);
|
661
663
|
uchar *(*get_norms_always)(IndexReader *ir, char *field);
|
662
|
-
void (*do_set_norm)(IndexReader *ir, int doc_num, char *field,
|
663
|
-
|
664
|
+
void (*do_set_norm)(IndexReader *ir, int doc_num, char *field,
|
665
|
+
uchar val);
|
666
|
+
void (*get_norms_into)(IndexReader *ir, char *field, uchar *buf,
|
667
|
+
int offset);
|
664
668
|
TermEnum *(*terms)(IndexReader *ir);
|
665
669
|
TermEnum *(*terms_from)(IndexReader *ir, Term *term);
|
666
670
|
int (*doc_freq)(IndexReader *ir, Term *t);
|
@@ -675,17 +679,19 @@ struct IndexReader {
|
|
675
679
|
void (*do_commit)(IndexReader *ir);
|
676
680
|
void (*do_close)(IndexReader *ir);
|
677
681
|
void (*acquire_write_lock)(IndexReader *ir);
|
682
|
+
int (*write_fields_i)(IndexReader *ir, OutStream *fdt_out,
|
683
|
+
OutStream *fdx_out);
|
678
684
|
};
|
679
685
|
|
680
|
-
IndexReader *ir_create(Store *store, SegmentInfos *sis, int is_owner
|
681
|
-
IndexReader *ir_open(Store *store
|
686
|
+
IndexReader *ir_create(Store *store, SegmentInfos *sis, int is_owner);
|
687
|
+
IndexReader *ir_open(Store *store);
|
682
688
|
bool ir_index_exists(Store *store);
|
683
689
|
void ir_close(IndexReader *ir);
|
684
690
|
void ir_commit(IndexReader *ir);
|
685
691
|
void ir_delete_doc(IndexReader *ir, int doc_num);
|
686
692
|
void ir_undelete_all(IndexReader *ir);
|
687
693
|
void ir_set_norm(IndexReader *ir, int doc_num, char *field, uchar val);
|
688
|
-
void ir_destroy(
|
694
|
+
void ir_destroy(IndexReader *self);
|
689
695
|
Document *ir_get_doc_with_term(IndexReader *ir, Term *term);
|
690
696
|
TermDocEnum *ir_term_docs_for(IndexReader *ir, Term *term);
|
691
697
|
TermDocEnum *ir_term_positions_for(IndexReader *ir, Term *term);
|
@@ -730,9 +736,8 @@ struct SegmentReader {
|
|
730
736
|
uchar *fake_norms;
|
731
737
|
};
|
732
738
|
|
733
|
-
IndexReader *sr_open(SegmentInfos *sis, int si_num,
|
739
|
+
IndexReader *sr_open(SegmentInfos *sis, int si_num, bool is_owner);
|
734
740
|
IndexReader *sr_open_si(SegmentInfo *si);
|
735
|
-
//int sr_has_deletions(IndexReader *ir);
|
736
741
|
|
737
742
|
/****************************************************************************
|
738
743
|
*
|
@@ -751,7 +756,7 @@ typedef struct MultiReader {
|
|
751
756
|
} MultiReader;
|
752
757
|
|
753
758
|
IndexReader *mr_open(Store *store, SegmentInfos *sis, IndexReader **readers,
|
754
|
-
int rcnt
|
759
|
+
int rcnt);
|
755
760
|
|
756
761
|
/****************************************************************************
|
757
762
|
*
|
@@ -769,9 +774,9 @@ typedef struct SegmentMergeInfo {
|
|
769
774
|
} SegmentMergeInfo;
|
770
775
|
|
771
776
|
SegmentMergeInfo *smi_create(int base, TermEnum *te, IndexReader *ir);
|
772
|
-
void smi_destroy(
|
777
|
+
void smi_destroy(SegmentMergeInfo *smi);
|
773
778
|
TermBuffer *smi_next(SegmentMergeInfo *smi);
|
774
|
-
bool smi_lt(
|
779
|
+
bool smi_lt(SegmentMergeInfo *smi1, SegmentMergeInfo *smi2);
|
775
780
|
|
776
781
|
/****************************************************************************
|
777
782
|
*
|
@@ -801,7 +806,7 @@ typedef struct SegmentMerger {
|
|
801
806
|
} SegmentMerger;
|
802
807
|
|
803
808
|
SegmentMerger *sm_create(Store *store, char *name, int term_index_interval);
|
804
|
-
void sm_destroy(
|
809
|
+
void sm_destroy(SegmentMerger *sm);
|
805
810
|
void sm_add(SegmentMerger *sm, IndexReader *ir);
|
806
811
|
int sm_merge(SegmentMerger *sm);
|
807
812
|
Array *sm_create_compound_file(SegmentMerger *sm, char *fname);
|
@@ -817,6 +822,8 @@ Array *sm_create_compound_file(SegmentMerger *sm, char *fname);
|
|
817
822
|
#define COMMIT_LOCK_NAME "commit"
|
818
823
|
struct IndexWriter {
|
819
824
|
mutex_t mutex;
|
825
|
+
HshTable *postings;
|
826
|
+
FieldInfos *fis;
|
820
827
|
int merge_factor;
|
821
828
|
int min_merge_docs;
|
822
829
|
int max_merge_docs;
|
@@ -828,13 +835,11 @@ struct IndexWriter {
|
|
828
835
|
SegmentInfos *sis;
|
829
836
|
Store *ram_store;
|
830
837
|
Lock *write_lock;
|
831
|
-
bool close_store : 1;
|
832
|
-
bool close_analyzer : 1;
|
833
838
|
bool use_compound_file : 1;
|
834
839
|
};
|
835
840
|
|
836
841
|
IndexWriter *iw_open(Store *store, Analyzer *analyzer,
|
837
|
-
bool create
|
842
|
+
bool create);
|
838
843
|
void iw_flush_ram_segments(IndexWriter *iw);
|
839
844
|
void iw_close(IndexWriter *iw);
|
840
845
|
int iw_doc_count(IndexWriter *iw);
|
data/ext/index_io.c
CHANGED
@@ -56,15 +56,17 @@ void os_seek(OutStream *os, int new_pos)
|
|
56
56
|
|
57
57
|
inline void os_write_byte(OutStream *os, uchar b)
|
58
58
|
{
|
59
|
-
if (os->buf.pos >= BUFFER_SIZE)
|
59
|
+
if (os->buf.pos >= BUFFER_SIZE) {
|
60
60
|
os_flush(os);
|
61
|
+
}
|
61
62
|
write_byte(os, b);
|
62
63
|
}
|
63
64
|
|
64
65
|
void os_write_bytes(OutStream *os, uchar *b, int len)
|
65
66
|
{
|
66
|
-
if (os->buf.pos > 0)
|
67
|
+
if (os->buf.pos > 0) { /* flush buffer */
|
67
68
|
os_flush(os);
|
69
|
+
}
|
68
70
|
|
69
71
|
if (len < BUFFER_SIZE) {
|
70
72
|
os->flush_internal(os, b, len);
|
@@ -99,8 +101,9 @@ void is_refill(InStream *is)
|
|
99
101
|
int start = is->buf.start + is->buf.pos;
|
100
102
|
int last = start + BUFFER_SIZE;
|
101
103
|
int flen = is->length_internal(is);
|
102
|
-
if (last > flen)
|
104
|
+
if (last > flen) { /* don't read past EOF */
|
103
105
|
last = flen;
|
106
|
+
}
|
104
107
|
|
105
108
|
is->buf.len = last - start;
|
106
109
|
if (is->buf.len <= 0) {
|
@@ -116,8 +119,9 @@ void is_refill(InStream *is)
|
|
116
119
|
#define read_byte(is) is->buf.buf[is->buf.pos++]
|
117
120
|
inline uchar is_read_byte(InStream *is)
|
118
121
|
{
|
119
|
-
if (is->buf.pos >= is->buf.len)
|
122
|
+
if (is->buf.pos >= is->buf.len) {
|
120
123
|
is_refill(is);
|
124
|
+
}
|
121
125
|
|
122
126
|
return read_byte(is);
|
123
127
|
}
|
@@ -182,17 +186,17 @@ is_read_int(InStream *is)
|
|
182
186
|
(int)is_read_byte(is);
|
183
187
|
}
|
184
188
|
|
185
|
-
|
189
|
+
llong
|
186
190
|
is_read_long(InStream *is)
|
187
191
|
{
|
188
|
-
return ((
|
189
|
-
((
|
190
|
-
((
|
191
|
-
((
|
192
|
-
((
|
193
|
-
((
|
194
|
-
((
|
195
|
-
(
|
192
|
+
return ((llong)is_read_byte(is) << 56) |
|
193
|
+
((llong)is_read_byte(is) << 48) |
|
194
|
+
((llong)is_read_byte(is) << 40) |
|
195
|
+
((llong)is_read_byte(is) << 32) |
|
196
|
+
((llong)is_read_byte(is) << 24) |
|
197
|
+
((llong)is_read_byte(is) << 16) |
|
198
|
+
((llong)is_read_byte(is) << 8) |
|
199
|
+
(llong)is_read_byte(is);
|
196
200
|
}
|
197
201
|
|
198
202
|
unsigned int
|
@@ -204,24 +208,24 @@ is_read_uint(InStream *is)
|
|
204
208
|
(unsigned int)is_read_byte(is);
|
205
209
|
}
|
206
210
|
|
207
|
-
|
211
|
+
ullong
|
208
212
|
is_read_ulong(InStream *is)
|
209
213
|
{
|
210
|
-
return ((
|
211
|
-
((
|
212
|
-
((
|
213
|
-
((
|
214
|
-
((
|
215
|
-
((
|
216
|
-
((
|
217
|
-
(
|
214
|
+
return ((ullong)is_read_byte(is) << 56) |
|
215
|
+
((ullong)is_read_byte(is) << 48) |
|
216
|
+
((ullong)is_read_byte(is) << 40) |
|
217
|
+
((ullong)is_read_byte(is) << 32) |
|
218
|
+
((ullong)is_read_byte(is) << 24) |
|
219
|
+
((ullong)is_read_byte(is) << 16) |
|
220
|
+
((ullong)is_read_byte(is) << 8) |
|
221
|
+
(ullong)is_read_byte(is);
|
218
222
|
}
|
219
223
|
|
220
224
|
/* optimized to use unchecked read_byte if there is definitely space */
|
221
|
-
inline
|
225
|
+
inline ullong
|
222
226
|
is_read_vint(InStream *is)
|
223
227
|
{
|
224
|
-
register
|
228
|
+
register ullong res, b;
|
225
229
|
register int shift = 7;
|
226
230
|
|
227
231
|
if (is->buf.pos > (is->buf.len - VINT_MAX_LEN)) {
|
@@ -283,63 +287,63 @@ is_read_string(InStream *is)
|
|
283
287
|
void
|
284
288
|
os_write_int(OutStream *os, int l)
|
285
289
|
{
|
286
|
-
os_write_byte(os, (l >> 24) & 0xFF);
|
287
|
-
os_write_byte(os, (l >> 16) & 0xFF);
|
288
|
-
os_write_byte(os, (l >> 8) & 0xFF);
|
289
|
-
os_write_byte(os, l & 0xFF);
|
290
|
+
os_write_byte(os, (uchar)((l >> 24) & 0xFF));
|
291
|
+
os_write_byte(os, (uchar)((l >> 16) & 0xFF));
|
292
|
+
os_write_byte(os, (uchar)((l >> 8) & 0xFF));
|
293
|
+
os_write_byte(os, (uchar)(l & 0xFF));
|
290
294
|
}
|
291
295
|
|
292
296
|
void
|
293
|
-
os_write_long(OutStream *os,
|
297
|
+
os_write_long(OutStream *os, llong l)
|
294
298
|
{
|
295
|
-
os_write_byte(os, (l >> 56) & 0xFF);
|
296
|
-
os_write_byte(os, (l >> 48) & 0xFF);
|
297
|
-
os_write_byte(os, (l >> 40) & 0xFF);
|
298
|
-
os_write_byte(os, (l >> 32) & 0xFF);
|
299
|
-
os_write_byte(os, (l >> 24) & 0xFF);
|
300
|
-
os_write_byte(os, (l >> 16) & 0xFF);
|
301
|
-
os_write_byte(os, (l >> 8) & 0xFF);
|
302
|
-
os_write_byte(os, l & 0xFF);
|
299
|
+
os_write_byte(os, (uchar)((l >> 56) & 0xFF));
|
300
|
+
os_write_byte(os, (uchar)((l >> 48) & 0xFF));
|
301
|
+
os_write_byte(os, (uchar)((l >> 40) & 0xFF));
|
302
|
+
os_write_byte(os, (uchar)((l >> 32) & 0xFF));
|
303
|
+
os_write_byte(os, (uchar)((l >> 24) & 0xFF));
|
304
|
+
os_write_byte(os, (uchar)((l >> 16) & 0xFF));
|
305
|
+
os_write_byte(os, (uchar)((l >> 8) & 0xFF));
|
306
|
+
os_write_byte(os, (uchar)(l & 0xFF));
|
303
307
|
}
|
304
308
|
|
305
309
|
void
|
306
310
|
os_write_uint(OutStream *os, unsigned int l)
|
307
311
|
{
|
308
|
-
os_write_byte(os, (l >> 24) & 0xFF);
|
309
|
-
os_write_byte(os, (l >> 16) & 0xFF);
|
310
|
-
os_write_byte(os, (l >> 8) & 0xFF);
|
311
|
-
os_write_byte(os, l & 0xFF);
|
312
|
+
os_write_byte(os, (uchar)((l >> 24) & 0xFF));
|
313
|
+
os_write_byte(os, (uchar)((l >> 16) & 0xFF));
|
314
|
+
os_write_byte(os, (uchar)((l >> 8) & 0xFF));
|
315
|
+
os_write_byte(os, (uchar)(l & 0xFF));
|
312
316
|
}
|
313
317
|
|
314
318
|
void
|
315
|
-
os_write_ulong(OutStream *os,
|
319
|
+
os_write_ulong(OutStream *os, ullong l)
|
316
320
|
{
|
317
|
-
os_write_byte(os, (l >> 56) & 0xFF);
|
318
|
-
os_write_byte(os, (l >> 48) & 0xFF);
|
319
|
-
os_write_byte(os, (l >> 40) & 0xFF);
|
320
|
-
os_write_byte(os, (l >> 32) & 0xFF);
|
321
|
-
os_write_byte(os, (l >> 24) & 0xFF);
|
322
|
-
os_write_byte(os, (l >> 16) & 0xFF);
|
323
|
-
os_write_byte(os, (l >> 8) & 0xFF);
|
324
|
-
os_write_byte(os, l & 0xFF);
|
321
|
+
os_write_byte(os, (uchar)((l >> 56) & 0xFF));
|
322
|
+
os_write_byte(os, (uchar)((l >> 48) & 0xFF));
|
323
|
+
os_write_byte(os, (uchar)((l >> 40) & 0xFF));
|
324
|
+
os_write_byte(os, (uchar)((l >> 32) & 0xFF));
|
325
|
+
os_write_byte(os, (uchar)((l >> 24) & 0xFF));
|
326
|
+
os_write_byte(os, (uchar)((l >> 16) & 0xFF));
|
327
|
+
os_write_byte(os, (uchar)((l >> 8) & 0xFF));
|
328
|
+
os_write_byte(os, (uchar)(l & 0xFF));
|
325
329
|
}
|
326
330
|
|
327
331
|
/* optimized to use an unchecked write if there is space */
|
328
332
|
inline void
|
329
|
-
os_write_vint(OutStream *os, register
|
333
|
+
os_write_vint(OutStream *os, register ullong i)
|
330
334
|
{
|
331
335
|
if (os->buf.pos > VINT_END) {
|
332
336
|
while (i > 127) {
|
333
|
-
os_write_byte(os, (i & 0x7f) | 0x80);
|
337
|
+
os_write_byte(os, (uchar)((i & 0x7f) | 0x80));
|
334
338
|
i >>= 7;
|
335
339
|
}
|
336
|
-
os_write_byte(os, i);
|
340
|
+
os_write_byte(os, (uchar)(i));
|
337
341
|
} else {
|
338
342
|
while (i > 127) {
|
339
|
-
write_byte(os, (i & 0x7f) | 0x80);
|
343
|
+
write_byte(os, (uchar)((i & 0x7f) | 0x80));
|
340
344
|
i >>= 7;
|
341
345
|
}
|
342
|
-
write_byte(os, i);
|
346
|
+
write_byte(os, (uchar)(i));
|
343
347
|
}
|
344
348
|
}
|
345
349
|
|
@@ -356,7 +360,7 @@ os_write_chars(OutStream *os, char *buf, int start, int length)
|
|
356
360
|
void
|
357
361
|
os_write_string(OutStream *os, char *str)
|
358
362
|
{
|
359
|
-
int len = strlen(str);
|
363
|
+
int len = (int)strlen(str);
|
360
364
|
os_write_vint(os, len);
|
361
365
|
|
362
366
|
os_write_chars(os, str, 0, len);
|
@@ -364,6 +368,6 @@ os_write_string(OutStream *os, char *str)
|
|
364
368
|
|
365
369
|
int file_is_lock(char *filename)
|
366
370
|
{
|
367
|
-
int start = strlen(filename) - 4;
|
371
|
+
int start = (int)strlen(filename) - 4;
|
368
372
|
return ((start > 0) && (strcmp(".lck", &filename[start]) == 0));
|
369
373
|
}
|