ferret 0.9.1 → 0.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +6 -5
- data/Rakefile +34 -13
- data/TODO +1 -0
- data/TUTORIAL +1 -1
- data/ext/analysis.c +87 -70
- data/ext/analysis.h +18 -6
- data/ext/array.c +1 -2
- data/ext/array.h +1 -1
- data/ext/bitvector.c +10 -6
- data/ext/bitvector.h +2 -2
- data/ext/compound_io.c +30 -27
- data/ext/document.c +15 -15
- data/ext/document.h +5 -5
- data/ext/except.c +2 -0
- data/ext/except.h +25 -23
- data/ext/extconf.rb +1 -0
- data/ext/ferret.c +10 -8
- data/ext/ferret.h +9 -8
- data/ext/field.c +29 -25
- data/ext/filter.c +52 -14
- data/ext/frtio.h +13 -0
- data/ext/fs_store.c +115 -170
- data/ext/global.c +9 -8
- data/ext/global.h +17 -13
- data/ext/hash.c +13 -19
- data/ext/hash.h +11 -11
- data/ext/hashset.c +5 -7
- data/ext/hashset.h +9 -8
- data/ext/helper.c +1 -1
- data/ext/helper.h +2 -1
- data/ext/inc/except.h +25 -23
- data/ext/inc/lang.h +11 -1
- data/ext/ind.c +33 -21
- data/ext/index.h +44 -39
- data/ext/index_io.c +61 -57
- data/ext/index_rw.c +418 -361
- data/ext/lang.c +10 -0
- data/ext/lang.h +11 -1
- data/ext/nix_io.c +135 -0
- data/ext/priorityqueue.c +16 -16
- data/ext/priorityqueue.h +9 -6
- data/ext/q_boolean.c +128 -76
- data/ext/q_const_score.c +20 -20
- data/ext/q_filtered_query.c +20 -20
- data/ext/q_fuzzy.c +37 -23
- data/ext/q_match_all.c +15 -19
- data/ext/q_multi_phrase.c +87 -46
- data/ext/q_parser.c +247 -119
- data/ext/q_phrase.c +86 -52
- data/ext/q_prefix.c +25 -14
- data/ext/q_range.c +59 -14
- data/ext/q_span.c +263 -172
- data/ext/q_term.c +62 -51
- data/ext/q_wildcard.c +24 -13
- data/ext/r_analysis.c +328 -80
- data/ext/r_doc.c +11 -6
- data/ext/r_index_io.c +40 -32
- data/ext/r_qparser.c +15 -14
- data/ext/r_search.c +270 -152
- data/ext/r_store.c +32 -17
- data/ext/ram_store.c +38 -22
- data/ext/search.c +617 -87
- data/ext/search.h +227 -163
- data/ext/similarity.c +54 -45
- data/ext/similarity.h +3 -3
- data/ext/sort.c +132 -53
- data/ext/store.c +21 -2
- data/ext/store.h +14 -14
- data/ext/tags +4322 -232
- data/ext/term.c +140 -109
- data/ext/termdocs.c +74 -60
- data/ext/vector.c +181 -152
- data/ext/w32_io.c +150 -0
- data/lib/ferret.rb +1 -1
- data/lib/ferret/analysis/standard_tokenizer.rb +4 -3
- data/lib/ferret/document/field.rb +1 -1
- data/lib/ferret/index/field_infos.rb +1 -1
- data/lib/ferret/index/term.rb +1 -1
- data/lib/ferret/query_parser/query_parser.tab.rb +8 -24
- data/lib/ferret/search.rb +1 -0
- data/lib/ferret/search/boolean_query.rb +0 -4
- data/lib/ferret/search/index_searcher.rb +21 -8
- data/lib/ferret/search/multi_phrase_query.rb +7 -0
- data/lib/ferret/search/multi_searcher.rb +261 -0
- data/lib/ferret/search/phrase_query.rb +1 -1
- data/lib/ferret/search/query.rb +34 -5
- data/lib/ferret/search/sort.rb +7 -3
- data/lib/ferret/search/sort_field.rb +8 -4
- data/lib/ferret/store/fs_store.rb +13 -6
- data/lib/ferret/store/index_io.rb +0 -14
- data/lib/ferret/store/ram_store.rb +3 -2
- data/lib/rferret.rb +1 -1
- data/test/unit/analysis/ctc_analyzer.rb +131 -0
- data/test/unit/analysis/ctc_tokenstream.rb +98 -9
- data/test/unit/index/tc_index.rb +40 -1
- data/test/unit/index/tc_term.rb +7 -0
- data/test/unit/index/th_doc.rb +8 -0
- data/test/unit/query_parser/tc_query_parser.rb +6 -4
- data/test/unit/search/rtc_sort_field.rb +6 -6
- data/test/unit/search/tc_index_searcher.rb +8 -0
- data/test/unit/search/tc_multi_searcher.rb +275 -0
- data/test/unit/search/tc_multi_searcher2.rb +126 -0
- data/test/unit/search/tc_search_and_sort.rb +66 -0
- metadata +31 -26
- data/test/unit/query_parser/rtc_query_parser.rb +0 -138
data/ext/analysis.h
CHANGED
@@ -40,13 +40,14 @@ struct TokenStream {
|
|
40
40
|
Token *(*next)(TokenStream *ts);
|
41
41
|
void (*reset)(TokenStream *ts, char *text);
|
42
42
|
void (*clone_i)(TokenStream *orig_ts, TokenStream *new_ts);
|
43
|
-
void (*destroy)(
|
43
|
+
void (*destroy)(TokenStream *ts);
|
44
44
|
TokenStream *sub_ts; /* used by filters */
|
45
|
-
|
45
|
+
int ref_cnt;
|
46
46
|
};
|
47
47
|
|
48
48
|
#define ts_next(mts) mts->next(mts)
|
49
|
-
|
49
|
+
|
50
|
+
void ts_deref(void *p);
|
50
51
|
|
51
52
|
TokenStream *whitespace_tokenizer_create();
|
52
53
|
TokenStream *mb_whitespace_tokenizer_create(bool lowercase);
|
@@ -93,13 +94,19 @@ typedef struct Analyzer {
|
|
93
94
|
void *data;
|
94
95
|
TokenStream *current_ts;
|
95
96
|
TokenStream *(*get_ts)(struct Analyzer *a, char *field, char *text);
|
96
|
-
void (*destroy)(
|
97
|
+
void (*destroy)(struct Analyzer *a);
|
98
|
+
int ref_cnt;
|
97
99
|
} Analyzer;
|
98
100
|
|
99
|
-
|
101
|
+
void a_deref(void *p);
|
102
|
+
|
100
103
|
#define a_get_ts(ma, field, text) ma->get_ts(ma, field, text)
|
101
104
|
#define a_get_new_ts(ma, field, text) ts_clone(ma->get_ts(ma, field, text))
|
102
105
|
|
106
|
+
Analyzer *analyzer_create(void *data, TokenStream *ts,
|
107
|
+
void (*destroy)(Analyzer *),
|
108
|
+
TokenStream *(*get_ts)(Analyzer *a, char *field, char *text));
|
109
|
+
void a_standard_destroy(Analyzer *a);
|
103
110
|
Analyzer *whitespace_analyzer_create(bool lowercase);
|
104
111
|
Analyzer *mb_whitespace_analyzer_create(bool lowercase);
|
105
112
|
|
@@ -118,7 +125,12 @@ Analyzer *mb_standard_analyzer_create_with_words(
|
|
118
125
|
Analyzer *mb_standard_analyzer_create_with_words_len(
|
119
126
|
const char **words, int len, bool lowercase);
|
120
127
|
|
121
|
-
|
128
|
+
typedef struct PerFieldAnalyzer {
|
129
|
+
HshTable *dict;
|
130
|
+
Analyzer *def;
|
131
|
+
} PerFieldAnalyzer;
|
132
|
+
|
133
|
+
Analyzer *per_field_analyzer_create(Analyzer *def);
|
122
134
|
void pfa_add_field(Analyzer *self, char *field, Analyzer *analyzer);
|
123
135
|
|
124
136
|
#endif
|
data/ext/array.c
CHANGED
data/ext/array.h
CHANGED
@@ -9,7 +9,7 @@ typedef struct Array {
|
|
9
9
|
} Array;
|
10
10
|
|
11
11
|
Array *ary_create(int size, void (*free_elem)(void *p));
|
12
|
-
void ary_destroy(
|
12
|
+
void ary_destroy(Array *ary);
|
13
13
|
void ary_set(Array *ary, int index, void *value);
|
14
14
|
void ary_append(Array *ary, void *value);
|
15
15
|
void *ary_get(Array *ary, int index);
|
data/ext/bitvector.c
CHANGED
@@ -20,15 +20,15 @@ BitVector *bv_create()
|
|
20
20
|
return bv_create_size(BV_INIT_CAPA);
|
21
21
|
}
|
22
22
|
|
23
|
-
void bv_destroy(
|
23
|
+
void bv_destroy(BitVector *bv)
|
24
24
|
{
|
25
|
-
BitVector *bv = (BitVector *)p;
|
26
25
|
free(bv->bits);
|
27
26
|
free(bv);
|
28
27
|
}
|
29
28
|
|
30
29
|
void bv_set(BitVector *bv, int bit)
|
31
30
|
{
|
31
|
+
uchar *byte_p;
|
32
32
|
int byte = bit>>3;
|
33
33
|
uchar bitmask = 1<<(bit&7);
|
34
34
|
if (bv->size <= byte) {
|
@@ -41,7 +41,7 @@ void bv_set(BitVector *bv, int bit)
|
|
41
41
|
bv->capa = capa;
|
42
42
|
}
|
43
43
|
}
|
44
|
-
|
44
|
+
byte_p = &(bv->bits[byte]);
|
45
45
|
if ((bitmask & *byte_p) == 0) {
|
46
46
|
bv->count++;
|
47
47
|
*byte_p |= bitmask;
|
@@ -63,10 +63,14 @@ void bv_clear(BitVector *bv)
|
|
63
63
|
|
64
64
|
void bv_unset(BitVector *bv, int bit)
|
65
65
|
{
|
66
|
+
uchar *byte_p;
|
67
|
+
uchar bitmask;
|
66
68
|
int byte = bit>>3;
|
69
|
+
|
67
70
|
if (byte >= bv->size) return;
|
68
|
-
|
69
|
-
|
71
|
+
|
72
|
+
byte_p = &(bv->bits[byte]);
|
73
|
+
bitmask = 1<<(bit&7);
|
70
74
|
if ((bitmask & *byte_p) > 0) {
|
71
75
|
bv->count--;
|
72
76
|
*byte_p &= ~bitmask;
|
@@ -115,7 +119,7 @@ BitVector *bv_read(Store *store, char *name)
|
|
115
119
|
{
|
116
120
|
BitVector *bv = ALLOC(BitVector);
|
117
121
|
InStream *is = store->open_input(store, name);
|
118
|
-
bv->capa = bv->size = is_read_vint(is);
|
122
|
+
bv->capa = bv->size = (int)is_read_vint(is);
|
119
123
|
bv->bits = ALLOC_N(uchar, bv->capa);
|
120
124
|
is_read_bytes(is, bv->bits, 0, bv->size);
|
121
125
|
is_close(is);
|
data/ext/bitvector.h
CHANGED
@@ -15,7 +15,7 @@ typedef struct BitVector {
|
|
15
15
|
|
16
16
|
BitVector *bv_create();;
|
17
17
|
BitVector *bv_create_size(int size);
|
18
|
-
void bv_destroy(
|
18
|
+
void bv_destroy(BitVector *bv);
|
19
19
|
void bv_set(BitVector *bv, int bit);
|
20
20
|
int bv_get(BitVector *bv, int bit);
|
21
21
|
void bv_clear(BitVector *bv);
|
@@ -24,6 +24,6 @@ void bv_write(BitVector *bv, Store *store, char *name);
|
|
24
24
|
BitVector *bv_read(Store *store, char *name);
|
25
25
|
void bv_scan_reset(BitVector *bv);
|
26
26
|
int bv_scan_next(BitVector *bv);
|
27
|
-
int bv_scan_next_from(BitVector *bv, int from);
|
27
|
+
int bv_scan_next_from(BitVector *bv, register const int from);
|
28
28
|
|
29
29
|
#endif
|
data/ext/compound_io.c
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
#include "index.h"
|
2
|
+
|
2
3
|
static char * const ALREADY_CLOSED_MSG = "Already closed";
|
3
4
|
static char * const STREAM_CLOSED_MSG = "Stream closed";
|
4
5
|
static char * const MISSING_FILE_MSG = "No sub-file found";
|
@@ -8,6 +9,8 @@ static char * const FILE_OFFSET_MSG = "Difference in the output file offsets"
|
|
8
9
|
" does not match the original file length";
|
9
10
|
static char * const NO_FILES_TO_MERGE_MSG = "No Files to merge into the compound file";
|
10
11
|
|
12
|
+
extern void store_destroy(Store *store);
|
13
|
+
|
11
14
|
/****************************************************************************
|
12
15
|
*
|
13
16
|
* CompoundStore
|
@@ -26,10 +29,11 @@ void cmpd_touch(Store *store, char *filename)
|
|
26
29
|
|
27
30
|
int cmpd_exists(Store *store, char *filename)
|
28
31
|
{
|
29
|
-
if (h_get(store->dir.cmpd->entries, filename) != NULL)
|
32
|
+
if (h_get(store->dir.cmpd->entries, filename) != NULL) {
|
30
33
|
return true;
|
31
|
-
else
|
34
|
+
} else {
|
32
35
|
return false;
|
36
|
+
}
|
33
37
|
}
|
34
38
|
|
35
39
|
/**
|
@@ -63,9 +67,8 @@ void cmpd_clear(Store *store)
|
|
63
67
|
RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
|
64
68
|
}
|
65
69
|
|
66
|
-
void
|
70
|
+
void cmpd_close_i(Store *store)
|
67
71
|
{
|
68
|
-
mutex_lock(&store->mutex);
|
69
72
|
CompoundStore *cmpd = store->dir.cmpd;
|
70
73
|
if (cmpd->stream == NULL)
|
71
74
|
RAISE(IO_ERROR, ALREADY_CLOSED_MSG);
|
@@ -143,6 +146,7 @@ InStream *cmpd_create_input(InStream *sub_is, int offset, int length)
|
|
143
146
|
|
144
147
|
InStream *cmpd_open_input(Store *store, const char *filename)
|
145
148
|
{
|
149
|
+
FileEntry *entry;
|
146
150
|
CompoundStore *cmpd = store->dir.cmpd;
|
147
151
|
InStream *is;
|
148
152
|
|
@@ -152,7 +156,7 @@ InStream *cmpd_open_input(Store *store, const char *filename)
|
|
152
156
|
RAISE(IO_ERROR, STREAM_CLOSED_MSG);
|
153
157
|
}
|
154
158
|
|
155
|
-
|
159
|
+
entry = (FileEntry *)h_get(cmpd->entries, filename);
|
156
160
|
if (entry == NULL) {
|
157
161
|
mutex_unlock(&store->mutex);
|
158
162
|
RAISE(IO_ERROR, MISSING_FILE_MSG);
|
@@ -183,6 +187,9 @@ void cmpd_close_lock(Lock *lock)
|
|
183
187
|
|
184
188
|
Store *open_cmpd_store(Store *store, const char *name)
|
185
189
|
{
|
190
|
+
int count, i, offset;
|
191
|
+
char *fname;
|
192
|
+
FileEntry *entry;
|
186
193
|
Store * volatile new_store = NULL;
|
187
194
|
CompoundStore * volatile cmpd = NULL;
|
188
195
|
InStream * volatile is = NULL;
|
@@ -193,16 +200,14 @@ Store *open_cmpd_store(Store *store, const char *name)
|
|
193
200
|
|
194
201
|
cmpd->store = store;
|
195
202
|
cmpd->name = name;
|
196
|
-
cmpd->entries = h_new_str(&
|
203
|
+
cmpd->entries = h_new_str(&free, &free);
|
197
204
|
is = cmpd->stream = store->open_input(store, cmpd->name);
|
198
205
|
|
199
206
|
// read the directory and init files
|
200
|
-
|
201
|
-
|
202
|
-
int i, offset;
|
203
|
-
char *fname;
|
207
|
+
count = (int)is_read_vint(is);
|
208
|
+
entry = NULL;
|
204
209
|
for (i = 0; i < count; i++) {
|
205
|
-
offset = is_read_long(is);
|
210
|
+
offset = (int)is_read_long(is);
|
206
211
|
fname = is_read_string(is);
|
207
212
|
|
208
213
|
if (entry != NULL) {
|
@@ -231,9 +236,9 @@ Store *open_cmpd_store(Store *store, const char *name)
|
|
231
236
|
new_store->remove = &cmpd_remove;
|
232
237
|
new_store->rename = &cmpd_rename;
|
233
238
|
new_store->count = &cmpd_count;
|
234
|
-
new_store->close = &cmpd_close;
|
235
239
|
new_store->clear = &cmpd_clear;
|
236
240
|
new_store->length = &cmpd_length;
|
241
|
+
new_store->close_i = &cmpd_close_i;
|
237
242
|
new_store->create_output = &cmpd_create_output;
|
238
243
|
new_store->open_input = &cmpd_open_input;
|
239
244
|
new_store->open_lock = &cmpd_open_lock;
|
@@ -260,19 +265,13 @@ WFileEntry *wfe_create(char *name)
|
|
260
265
|
return wfe;
|
261
266
|
}
|
262
267
|
|
263
|
-
void wfe_destroy(void *p)
|
264
|
-
{
|
265
|
-
WFileEntry *wfe = (WFileEntry *)p;
|
266
|
-
efree(wfe);
|
267
|
-
}
|
268
|
-
|
269
268
|
CompoundWriter *open_cw(Store *store, char *name)
|
270
269
|
{
|
271
270
|
CompoundWriter *cw = ALLOC(CompoundWriter);
|
272
271
|
cw->store = store;
|
273
272
|
cw->name = name;
|
274
273
|
cw->ids = hs_str_create(NULL);
|
275
|
-
cw->file_entries = ary_create(1, &
|
274
|
+
cw->file_entries = ary_create(1, &free);
|
276
275
|
cw->merged = false;
|
277
276
|
return cw;
|
278
277
|
}
|
@@ -290,14 +289,16 @@ void cw_add_file(CompoundWriter *cw, char *id)
|
|
290
289
|
void cw_copy_file(CompoundWriter *cw, WFileEntry *src, OutStream *os)
|
291
290
|
{
|
292
291
|
int start_ptr = os_pos(os);
|
292
|
+
int end_ptr;
|
293
293
|
int remainder, length, len;
|
294
|
-
|
294
|
+
uchar buffer[BUFFER_SIZE];
|
295
|
+
|
295
296
|
InStream *is = cw->store->open_input(cw->store, src->name);
|
296
297
|
|
297
298
|
TRY
|
298
299
|
remainder = length = is_length(is);
|
299
300
|
|
300
|
-
|
301
|
+
|
301
302
|
while (remainder > 0) {
|
302
303
|
len = MIN(remainder, BUFFER_SIZE);
|
303
304
|
is_read_bytes(is, buffer, 0, len);
|
@@ -310,9 +311,9 @@ void cw_copy_file(CompoundWriter *cw, WFileEntry *src, OutStream *os)
|
|
310
311
|
RAISE(IO_ERROR, REMAINDER_ERROR_MSG);
|
311
312
|
|
312
313
|
// Verify that the output length diff is equal to original file
|
313
|
-
|
314
|
-
|
315
|
-
if (
|
314
|
+
end_ptr = os_pos(os);
|
315
|
+
len = end_ptr - start_ptr;
|
316
|
+
if (len != length)
|
316
317
|
RAISE(IO_ERROR, FILE_OFFSET_MSG);
|
317
318
|
|
318
319
|
XFINALLY
|
@@ -322,13 +323,16 @@ void cw_copy_file(CompoundWriter *cw, WFileEntry *src, OutStream *os)
|
|
322
323
|
|
323
324
|
void cw_close(CompoundWriter *cw)
|
324
325
|
{
|
326
|
+
OutStream * volatile os = NULL;
|
327
|
+
int i;
|
328
|
+
WFileEntry *wfe;
|
329
|
+
|
325
330
|
if (cw->merged) RAISE(STATE_ERROR, ALREADY_MERGED_MSG);
|
326
331
|
if (cw->ids->size <= 0)
|
327
332
|
RAISE(STATE_ERROR, NO_FILES_TO_MERGE_MSG);
|
328
333
|
|
329
334
|
cw->merged = true;
|
330
335
|
|
331
|
-
OutStream * volatile os = NULL;
|
332
336
|
TRY
|
333
337
|
os = cw->store->create_output(cw->store, cw->name);
|
334
338
|
os_write_vint(os, cw->file_entries->size);
|
@@ -336,8 +340,7 @@ void cw_close(CompoundWriter *cw)
|
|
336
340
|
/* Write the directory with all offsets at 0.
|
337
341
|
* Remember the positions of directory entries so that we can adjust the
|
338
342
|
* offsets later */
|
339
|
-
|
340
|
-
WFileEntry *wfe;
|
343
|
+
|
341
344
|
for (i = 0; i < cw->file_entries->size; i++) {
|
342
345
|
wfe = (WFileEntry *)cw->file_entries->elems[i];
|
343
346
|
wfe->dir_offset = os_pos(os);
|
data/ext/document.c
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
#include
|
1
|
+
#include "document.h"
|
2
2
|
#include <string.h>
|
3
3
|
|
4
4
|
/****************************************************************************
|
@@ -26,7 +26,7 @@ inline void df_set(DocField *df, const char *name,
|
|
26
26
|
RAISE(ARG_ERROR, INDEX_NO_TV_YES_MSG);
|
27
27
|
df->name = estrdup(name);
|
28
28
|
df->data = data;
|
29
|
-
df->blen = strlen(data);
|
29
|
+
df->blen = (int)strlen(data);
|
30
30
|
df_set_store(df, store);
|
31
31
|
df_set_index(df, index);
|
32
32
|
df_set_term_vector(df, tv);
|
@@ -53,19 +53,17 @@ DocField *df_clone(DocField *self)
|
|
53
53
|
return clone;
|
54
54
|
}
|
55
55
|
|
56
|
-
void df_destroy(
|
56
|
+
void df_destroy(DocField *df)
|
57
57
|
{
|
58
|
-
DocField *df = (DocField *)p;
|
59
58
|
free(df->name);
|
60
|
-
free(
|
59
|
+
free(df);
|
61
60
|
}
|
62
61
|
|
63
|
-
void df_destroy_data(
|
62
|
+
void df_destroy_data(DocField *df)
|
64
63
|
{
|
65
|
-
DocField *df = (DocField *)p;
|
66
64
|
free(df->data);
|
67
65
|
free(df->name);
|
68
|
-
free(
|
66
|
+
free(df);
|
69
67
|
}
|
70
68
|
|
71
69
|
/*
|
@@ -161,10 +159,13 @@ void df_set_term_vector(DocField *df, int tv)
|
|
161
159
|
*/
|
162
160
|
DocField *df_create_binary(char *name, char *data, int blen, int store)
|
163
161
|
{
|
162
|
+
DocField *df;
|
163
|
+
|
164
164
|
if (store == DF_STORE_NO) {
|
165
165
|
RAISE(ARG_ERROR, BIN_FIELD_STORE_NO_MSG);
|
166
166
|
}
|
167
|
-
|
167
|
+
|
168
|
+
df = df_create(name, data, store, DF_INDEX_NO, DF_TERM_VECTOR_NO);
|
168
169
|
df->is_binary = true;
|
169
170
|
df->blen = blen;
|
170
171
|
return df;
|
@@ -203,27 +204,26 @@ char *df_to_s(DocField *self)
|
|
203
204
|
Document *doc_create()
|
204
205
|
{
|
205
206
|
Document *doc = ALLOC(Document);
|
206
|
-
doc->fields = h_new_str(&free, &ary_destroy);
|
207
|
+
doc->fields = h_new_str(&free, (free_ft)&ary_destroy);
|
207
208
|
doc->fcnt = 0;
|
208
209
|
doc->dfcnt = 0;
|
209
210
|
doc->field_arr = NULL;
|
210
211
|
doc->df_arr = NULL;
|
211
212
|
doc->boost = 1.0;
|
212
|
-
doc->free_data = &df_destroy_data;
|
213
|
+
doc->free_data = (free_ft)&df_destroy_data;
|
213
214
|
return doc;
|
214
215
|
}
|
215
216
|
|
216
217
|
Document *doc_create_keep_data()
|
217
218
|
{
|
218
219
|
Document *doc = doc_create();
|
219
|
-
doc->free_data = df_destroy;
|
220
|
+
doc->free_data = (free_ft)&df_destroy;
|
220
221
|
return doc;
|
221
222
|
}
|
222
223
|
|
223
|
-
void doc_destroy(
|
224
|
+
void doc_destroy(Document *doc)
|
224
225
|
{
|
225
226
|
int i;
|
226
|
-
Document *doc = (Document *)p;
|
227
227
|
if (doc->free_data) {
|
228
228
|
for (i = 0; i < doc->dfcnt; i++) {
|
229
229
|
doc->free_data(doc->df_arr[i]);
|
@@ -349,7 +349,7 @@ char *doc_to_s(Document *doc)
|
|
349
349
|
char **df_strs = ALLOC_N(char *, doc->dfcnt);
|
350
350
|
for (i = 0; i < doc->dfcnt; i++) {
|
351
351
|
df_strs[i] = df_to_s(doc->df_arr[i]);
|
352
|
-
len += strlen(df_strs[i]) + 3;
|
352
|
+
len += (int)strlen(df_strs[i]) + 3;
|
353
353
|
}
|
354
354
|
str_ptr = str = ALLOC_N(char, len);
|
355
355
|
sprintf(str_ptr, "Document {\n");
|
data/ext/document.h
CHANGED
@@ -34,7 +34,7 @@ enum {
|
|
34
34
|
typedef struct DocField {
|
35
35
|
char *name;
|
36
36
|
char *data;
|
37
|
-
int blen;
|
37
|
+
int blen; /* used for binary fields to store data length */
|
38
38
|
float boost;
|
39
39
|
bool is_stored : 1;
|
40
40
|
bool is_compressed : 1;
|
@@ -50,8 +50,8 @@ typedef struct DocField {
|
|
50
50
|
DocField *df_create(const char *name, char *data, int store, int index, int tv);
|
51
51
|
DocField *df_clone(DocField *self);
|
52
52
|
void df_set(DocField *df, const char *name, char *data, int store, int index, int tv);
|
53
|
-
void df_destroy(
|
54
|
-
void df_destroy_data(
|
53
|
+
void df_destroy(DocField *df);
|
54
|
+
void df_destroy_data(DocField *df);
|
55
55
|
void df_set_store(DocField *df, int store);
|
56
56
|
void df_set_index(DocField *df, int index);
|
57
57
|
void df_set_term_vector(DocField *df, int tv);
|
@@ -71,12 +71,12 @@ typedef struct Document {
|
|
71
71
|
DocField **df_arr;
|
72
72
|
int dfcnt;
|
73
73
|
float boost;
|
74
|
-
|
74
|
+
free_ft free_data;
|
75
75
|
} Document;
|
76
76
|
|
77
77
|
Document *doc_create();
|
78
78
|
Document *doc_create_keep_data();
|
79
|
-
void doc_destroy(
|
79
|
+
void doc_destroy(Document *doc);
|
80
80
|
void doc_add_field(Document *doc, DocField *df);
|
81
81
|
DocField *doc_get_field(Document *doc, const char *fname);
|
82
82
|
Array *doc_get_fields(Document *doc, const char *fname);
|