isomorfeus-ferret 0.13.9 → 0.13.10
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/isomorfeus_ferret_ext/bm_store.c +3 -3
- data/ext/isomorfeus_ferret_ext/frt_analysis.c +2 -2
- data/ext/isomorfeus_ferret_ext/frt_analysis.h +4 -4
- data/ext/isomorfeus_ferret_ext/frt_compound_io.c +11 -11
- data/ext/isomorfeus_ferret_ext/frt_config.h +8 -0
- data/ext/isomorfeus_ferret_ext/frt_fs_store.c +13 -7
- data/ext/isomorfeus_ferret_ext/frt_index.c +25 -25
- data/ext/isomorfeus_ferret_ext/frt_index.h +18 -18
- data/ext/isomorfeus_ferret_ext/frt_ram_store.c +6 -6
- data/ext/isomorfeus_ferret_ext/frt_store.c +8 -8
- data/ext/isomorfeus_ferret_ext/frt_store.h +22 -22
- data/ext/isomorfeus_ferret_ext/test_index.c +2 -2
- data/lib/isomorfeus/ferret/version.rb +1 -1
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0625b49341ee2fc35e80e673e368789532c06ce6c93e779072cacc1206847a4b
|
4
|
+
data.tar.gz: 7e81488d430471a37a872f80319efdcf8378e55dc4a583e33ff12bea942b9416
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 38cf613eac98374898aa9a5c998cc3534d18d6badba3560cd1660413acd189cb6e618eba38f3bf828cef23fcafb9e44b2de349363b55585af9ea79927ff9b2cf
|
7
|
+
data.tar.gz: 88564c30711737b48bec8879df788b46e03b06a041a98aaf3147c7375ef0d313d48f789805303774b4d0ed4531603ee8baaa46935b81abafb6df8ba7ccdb56a6
|
@@ -6,7 +6,7 @@
|
|
6
6
|
#define N 10
|
7
7
|
#define write_byte(os, b) os->buf.buf[os->buf.pos++] = (frt_uchar)b
|
8
8
|
|
9
|
-
void my_os_write_voff_t(FrtOutStream *os, register
|
9
|
+
void my_os_write_voff_t(FrtOutStream *os, register frt_off_t num) {
|
10
10
|
if (!(num&0x7f)) {
|
11
11
|
if (os->buf.pos >= FRT_BUFFER_SIZE) {
|
12
12
|
frt_os_write_byte(os, (frt_uchar)num);
|
@@ -48,7 +48,7 @@ void my_os_write_voff_t(FrtOutStream *os, register off_t num) {
|
|
48
48
|
|
49
49
|
static void vint_out(void) {
|
50
50
|
int n;
|
51
|
-
|
51
|
+
frt_off_t i;
|
52
52
|
FrtOutStream *os;
|
53
53
|
|
54
54
|
for (n = 0; n < N; n++) {
|
@@ -63,7 +63,7 @@ static void vint_out(void) {
|
|
63
63
|
|
64
64
|
static void unrolled_vint_out(void) {
|
65
65
|
int n;
|
66
|
-
|
66
|
+
frt_off_t i;
|
67
67
|
FrtOutStream *os;
|
68
68
|
|
69
69
|
for (n = 0; n < N; n++) {
|
@@ -64,7 +64,7 @@ static inline int get_cp(char *start, char *end, int *cp_len, rb_encoding *enc)
|
|
64
64
|
/*** FrtToken ****************************************************************/
|
65
65
|
/*****************************************************************************/
|
66
66
|
|
67
|
-
FrtToken *frt_tk_set(FrtToken *tk, char *text, int tlen,
|
67
|
+
FrtToken *frt_tk_set(FrtToken *tk, char *text, int tlen, frt_off_t start, frt_off_t end, int pos_inc, rb_encoding *encoding) {
|
68
68
|
if (tlen >= FRT_MAX_WORD_SIZE) {
|
69
69
|
tlen = FRT_MAX_WORD_SIZE - 1; // TODO: this may invalidate mbc's
|
70
70
|
}
|
@@ -92,7 +92,7 @@ static FrtToken *frt_tk_set_ts(FrtToken *tk, char *start, char *end, char *text,
|
|
92
92
|
return frt_tk_set(tk, start, (int)(end - start), (off_t)(start - text), (off_t)(end - text), pos_inc, encoding);
|
93
93
|
}
|
94
94
|
|
95
|
-
FrtToken *frt_tk_set_no_len(FrtToken *tk, char *text,
|
95
|
+
FrtToken *frt_tk_set_no_len(FrtToken *tk, char *text, frt_off_t start, frt_off_t end, int pos_inc, rb_encoding *encoding) {
|
96
96
|
return frt_tk_set(tk, text, (int)strlen(text), start, end, pos_inc, encoding);
|
97
97
|
}
|
98
98
|
|
@@ -13,15 +13,15 @@
|
|
13
13
|
typedef struct FrtToken {
|
14
14
|
char text[FRT_MAX_WORD_SIZE];
|
15
15
|
int len;
|
16
|
-
|
17
|
-
|
16
|
+
frt_off_t start;
|
17
|
+
frt_off_t end;
|
18
18
|
int pos_inc;
|
19
19
|
} FrtToken;
|
20
20
|
|
21
21
|
extern FrtToken *frt_tk_new();
|
22
22
|
extern void frt_tk_destroy(void *p);
|
23
|
-
extern FrtToken *frt_tk_set(FrtToken *tk, char *text, int tlen,
|
24
|
-
extern FrtToken *frt_tk_set_no_len(FrtToken *tk, char *text,
|
23
|
+
extern FrtToken *frt_tk_set(FrtToken *tk, char *text, int tlen, frt_off_t start, frt_off_t end, int pos_inc, rb_encoding *encoding);
|
24
|
+
extern FrtToken *frt_tk_set_no_len(FrtToken *tk, char *text, frt_off_t start, frt_off_t end, int pos_inc, rb_encoding *encoding);
|
25
25
|
extern int frt_tk_eq(FrtToken *tk1, FrtToken *tk2);
|
26
26
|
extern int frt_tk_cmp(FrtToken *tk1, FrtToken *tk2);
|
27
27
|
|
@@ -12,8 +12,8 @@ extern FrtStore *frt_store_new();
|
|
12
12
|
****************************************************************************/
|
13
13
|
|
14
14
|
typedef struct FileEntry {
|
15
|
-
|
16
|
-
|
15
|
+
frt_off_t offset;
|
16
|
+
frt_off_t length;
|
17
17
|
} FileEntry;
|
18
18
|
|
19
19
|
static void cmpd_touch(FrtStore *store, const char *file_name) {
|
@@ -76,7 +76,7 @@ static void cmpd_close_i(FrtStore *store) {
|
|
76
76
|
free(store->dir.cmpd);
|
77
77
|
}
|
78
78
|
|
79
|
-
static
|
79
|
+
static frt_off_t cmpd_length(FrtStore *store, const char *file_name) {
|
80
80
|
FileEntry *fe = (FileEntry *)frt_h_get(store->dir.cmpd->entries, file_name);
|
81
81
|
if (fe != NULL) {
|
82
82
|
return fe->length;
|
@@ -85,7 +85,7 @@ static off_t cmpd_length(FrtStore *store, const char *file_name) {
|
|
85
85
|
}
|
86
86
|
}
|
87
87
|
|
88
|
-
static void cmpdi_seek_i(FrtInStream *is,
|
88
|
+
static void cmpdi_seek_i(FrtInStream *is, frt_off_t pos) {
|
89
89
|
(void)is;
|
90
90
|
(void)pos;
|
91
91
|
}
|
@@ -95,7 +95,7 @@ static void cmpdi_close_i(FrtInStream *is) {
|
|
95
95
|
free(is->d.cis);
|
96
96
|
}
|
97
97
|
|
98
|
-
static
|
98
|
+
static frt_off_t cmpdi_length_i(FrtInStream *is) {
|
99
99
|
return (is->d.cis->length);
|
100
100
|
}
|
101
101
|
|
@@ -104,7 +104,7 @@ static off_t cmpdi_length_i(FrtInStream *is) {
|
|
104
104
|
*/
|
105
105
|
static void cmpdi_read_i(FrtInStream *is, frt_uchar *b, int len) {
|
106
106
|
FrtCompoundInStream *cis = is->d.cis;
|
107
|
-
|
107
|
+
frt_off_t start = frt_is_pos(is);
|
108
108
|
|
109
109
|
if ((start + len) > cis->length) {
|
110
110
|
FRT_RAISE(FRT_EOF_ERROR, "Tried to read past end of file. File length is "
|
@@ -123,7 +123,7 @@ static const struct FrtInStreamMethods CMPD_IN_STREAM_METHODS = {
|
|
123
123
|
cmpdi_close_i
|
124
124
|
};
|
125
125
|
|
126
|
-
static FrtInStream *cmpd_create_input(FrtInStream *sub_is,
|
126
|
+
static FrtInStream *cmpd_create_input(FrtInStream *sub_is, frt_off_t offset, frt_off_t length) {
|
127
127
|
FrtInStream *is = frt_is_new();
|
128
128
|
FrtCompoundInStream *cis = FRT_ALLOC(FrtCompoundInStream);
|
129
129
|
|
@@ -182,7 +182,7 @@ static void cmpd_close_lock_i(FrtLock *lock) {
|
|
182
182
|
|
183
183
|
FrtStore *frt_open_cmpd_store(FrtStore *store, const char *name) {
|
184
184
|
int count, i;
|
185
|
-
|
185
|
+
frt_off_t offset;
|
186
186
|
char *fname;
|
187
187
|
FileEntry *volatile entry = NULL;
|
188
188
|
FrtStore *new_store = NULL;
|
@@ -273,9 +273,9 @@ void frt_cw_add_file(FrtCompoundWriter *cw, char *id) {
|
|
273
273
|
}
|
274
274
|
|
275
275
|
static void cw_copy_file(FrtCompoundWriter *cw, FrtCWFileEntry *src, FrtOutStream *os) {
|
276
|
-
|
277
|
-
|
278
|
-
|
276
|
+
frt_off_t start_ptr = frt_os_pos(os);
|
277
|
+
frt_off_t end_ptr;
|
278
|
+
frt_off_t remainder, length, len;
|
279
279
|
frt_uchar buffer[FRT_BUFFER_SIZE];
|
280
280
|
|
281
281
|
FrtInStream *is = cw->store->open_input(cw->store, src->name);
|
@@ -1,6 +1,8 @@
|
|
1
1
|
#ifndef FRT_DEFINES_H
|
2
2
|
#define FRT_DEFINES_H
|
3
3
|
|
4
|
+
#define _FILE_OFFSET_BITS 64
|
5
|
+
|
4
6
|
#include <sys/types.h>
|
5
7
|
#include <limits.h>
|
6
8
|
#include "frt_posh.h"
|
@@ -24,6 +26,12 @@ typedef posh_i32_t frt_i32;
|
|
24
26
|
typedef posh_u64_t frt_u64;
|
25
27
|
typedef posh_i64_t frt_i64;
|
26
28
|
|
29
|
+
#if defined POSH_OS_WIN64
|
30
|
+
typedef off64_t frt_off_t;
|
31
|
+
#else
|
32
|
+
typedef off_t frt_off_t;
|
33
|
+
#endif
|
34
|
+
|
27
35
|
#if ( LONG_MAX == 2147483647 ) && defined(_FILE_OFFSET_BITS) && (_FILE_OFFSET_BITS == 64)
|
28
36
|
#define FRT_OFF_T_PFX "ll"
|
29
37
|
#else
|
@@ -225,7 +225,7 @@ static void fs_destroy(FrtStore *store)
|
|
225
225
|
free(store->dir.path);
|
226
226
|
}
|
227
227
|
|
228
|
-
static
|
228
|
+
static frt_off_t fs_length(FrtStore *store, const char *filename)
|
229
229
|
{
|
230
230
|
char path[FRT_MAX_FILE_PATH];
|
231
231
|
struct stat stt;
|
@@ -247,9 +247,12 @@ static void fso_flush_i(FrtOutStream *os, const frt_uchar *src, int len)
|
|
247
247
|
}
|
248
248
|
}
|
249
249
|
|
250
|
-
static void fso_seek_i(FrtOutStream *os,
|
251
|
-
|
250
|
+
static void fso_seek_i(FrtOutStream *os, frt_off_t pos) {
|
251
|
+
#if (defined POSH_OS_WIN32 || defined POSH_OS_WIN64)
|
252
|
+
if (_lseeki64(os->file.fd, pos, SEEK_SET) < 0) {
|
253
|
+
#else
|
252
254
|
if (lseek(os->file.fd, pos, SEEK_SET) < 0) {
|
255
|
+
#endif
|
253
256
|
FRT_RAISE(FRT_IO_ERROR, "seeking position %"FRT_OFF_T_PFX"d: <%s>",
|
254
257
|
pos, strerror(errno));
|
255
258
|
}
|
@@ -286,7 +289,7 @@ static FrtOutStream *fs_new_output(FrtStore *store, const char *filename)
|
|
286
289
|
static void fsi_read_i(FrtInStream *is, frt_uchar *path, int len)
|
287
290
|
{
|
288
291
|
int fd = is->f->file.fd;
|
289
|
-
|
292
|
+
frt_off_t pos = frt_is_pos(is);
|
290
293
|
if (pos != lseek(fd, 0, SEEK_CUR)) {
|
291
294
|
lseek(fd, pos, SEEK_SET);
|
292
295
|
}
|
@@ -299,9 +302,12 @@ static void fsi_read_i(FrtInStream *is, frt_uchar *path, int len)
|
|
299
302
|
}
|
300
303
|
}
|
301
304
|
|
302
|
-
static void fsi_seek_i(FrtInStream *is,
|
303
|
-
|
305
|
+
static void fsi_seek_i(FrtInStream *is, frt_off_t pos) {
|
306
|
+
#if (defined POSH_OS_WIN32 || defined POSH_OS_WIN64)
|
307
|
+
if (_lseeki64(is->f->file.fd, pos, SEEK_SET) < 0) {
|
308
|
+
#else
|
304
309
|
if (lseek(is->f->file.fd, pos, SEEK_SET) < 0) {
|
310
|
+
#endif
|
305
311
|
FRT_RAISE(FRT_IO_ERROR, "seeking pos %"FRT_OFF_T_PFX"d: <%s>",
|
306
312
|
pos, strerror(errno));
|
307
313
|
}
|
@@ -315,7 +321,7 @@ static void fsi_close_i(FrtInStream *is)
|
|
315
321
|
if (is->d.path) free(is->d.path);
|
316
322
|
}
|
317
323
|
|
318
|
-
static
|
324
|
+
static frt_off_t fsi_length_i(FrtInStream *is)
|
319
325
|
{
|
320
326
|
struct stat stt;
|
321
327
|
if (fstat(is->f->file.fd, &stt)) {
|
@@ -1565,7 +1565,7 @@ static void frt_fr_read_compressed_fields(FrtFieldsReader *fr, FrtDocField *df,
|
|
1565
1565
|
FrtDocument *frt_fr_get_doc(FrtFieldsReader *fr, int doc_num)
|
1566
1566
|
{
|
1567
1567
|
int i, j;
|
1568
|
-
|
1568
|
+
frt_off_t pos;
|
1569
1569
|
int stored_cnt;
|
1570
1570
|
FrtDocument *doc = frt_doc_new();
|
1571
1571
|
FrtInStream *fdx_in = fr->fdx_in;
|
@@ -1612,7 +1612,7 @@ FrtLazyDoc *frt_fr_get_lazy_doc(FrtFieldsReader *fr, int doc_num)
|
|
1612
1612
|
{
|
1613
1613
|
int start = 0;
|
1614
1614
|
int i, j;
|
1615
|
-
|
1615
|
+
frt_off_t pos;
|
1616
1616
|
int stored_cnt;
|
1617
1617
|
FrtLazyDoc *lazy_doc;
|
1618
1618
|
FrtInStream *fdx_in = fr->fdx_in;
|
@@ -1642,7 +1642,7 @@ FrtLazyDoc *frt_fr_get_lazy_doc(FrtFieldsReader *fr, int doc_num)
|
|
1642
1642
|
lazy_doc_add_field(lazy_doc, lazy_df, i);
|
1643
1643
|
}
|
1644
1644
|
/* correct the starts to their correct absolute positions */
|
1645
|
-
const
|
1645
|
+
const frt_off_t abs_start = frt_is_pos(fdt_in);
|
1646
1646
|
for (i = 0; i < stored_cnt; i++) {
|
1647
1647
|
FrtLazyDocField *lazy_df = lazy_doc->fields[i];
|
1648
1648
|
const int df_size = lazy_df->size;
|
@@ -1719,7 +1719,7 @@ FrtHash *frt_fr_get_tv(FrtFieldsReader *fr, int doc_num)
|
|
1719
1719
|
int i;
|
1720
1720
|
FrtInStream *fdx_in = fr->fdx_in;
|
1721
1721
|
FrtInStream *fdt_in = fr->fdt_in;
|
1722
|
-
|
1722
|
+
frt_off_t data_ptr, field_index_ptr;
|
1723
1723
|
int field_cnt;
|
1724
1724
|
int *field_nums;
|
1725
1725
|
|
@@ -1757,7 +1757,7 @@ FrtTermVector *frt_fr_get_field_tv(FrtFieldsReader *fr, int doc_num, int field_n
|
|
1757
1757
|
|
1758
1758
|
if (doc_num >= 0 && doc_num < fr->size) {
|
1759
1759
|
int i, fnum = -1;
|
1760
|
-
|
1760
|
+
frt_off_t field_index_ptr;
|
1761
1761
|
int field_cnt;
|
1762
1762
|
int offset = 0;
|
1763
1763
|
FrtInStream *fdx_in = fr->fdx_in;
|
@@ -2038,7 +2038,7 @@ void frt_fw_add_postings(FrtFieldsWriter *fw,
|
|
2038
2038
|
int i, delta_start, delta_length;
|
2039
2039
|
const char *last_term = FRT_EMPTY_STRING;
|
2040
2040
|
FrtOutStream *fdt_out = fw->fdt_out;
|
2041
|
-
|
2041
|
+
frt_off_t fdt_start_pos = frt_os_pos(fdt_out);
|
2042
2042
|
FrtPostingList *plist;
|
2043
2043
|
FrtPosting *posting;
|
2044
2044
|
FrtOccurence *occ;
|
@@ -2152,7 +2152,7 @@ static void sti_ensure_index_is_read(FrtSegmentTermIndex *sti, FrtTermEnum *inde
|
|
2152
2152
|
if (NULL == sti->index_terms) {
|
2153
2153
|
int i;
|
2154
2154
|
int index_cnt = sti->index_cnt;
|
2155
|
-
|
2155
|
+
frt_off_t index_ptr = 0;
|
2156
2156
|
ste_reset(index_te);
|
2157
2157
|
frt_is_seek(STE(index_te)->is, sti->index_ptr);
|
2158
2158
|
STE(index_te)->size = sti->index_cnt;
|
@@ -2789,7 +2789,7 @@ static void tw_add(FrtTermWriter *tw, const char *term, int term_len, FrtTermInf
|
|
2789
2789
|
}
|
2790
2790
|
|
2791
2791
|
void frt_tiw_add(FrtTermInfosWriter *tiw, const char *term, int term_len, FrtTermInfo *ti) {
|
2792
|
-
|
2792
|
+
frt_off_t tis_pos;
|
2793
2793
|
|
2794
2794
|
if (0 == (tiw->tis_writer->counter % tiw->index_interval)) {
|
2795
2795
|
/* add an index term */
|
@@ -2962,8 +2962,8 @@ static bool stde_skip_to(FrtTermDocEnum *tde, int target_doc_num) {
|
|
2962
2962
|
if (stde->doc_freq >= stde->skip_interval
|
2963
2963
|
&& target_doc_num > stde->doc_num) { /* optimized case */
|
2964
2964
|
int last_skip_doc;
|
2965
|
-
|
2966
|
-
|
2965
|
+
frt_off_t last_frq_ptr;
|
2966
|
+
frt_off_t last_prx_ptr;
|
2967
2967
|
int num_skipped;
|
2968
2968
|
|
2969
2969
|
if (NULL == stde->skip_in) {
|
@@ -3034,7 +3034,7 @@ static void stde_skip_prox(FrtSegmentTermDocEnum *stde) {
|
|
3034
3034
|
(void)stde;
|
3035
3035
|
}
|
3036
3036
|
|
3037
|
-
static void stde_seek_prox(FrtSegmentTermDocEnum *stde,
|
3037
|
+
static void stde_seek_prox(FrtSegmentTermDocEnum *stde, frt_off_t prx_ptr) {
|
3038
3038
|
(void)stde;
|
3039
3039
|
(void)prx_ptr;
|
3040
3040
|
}
|
@@ -3130,7 +3130,7 @@ static void stpe_skip_prox(FrtSegmentTermDocEnum *stde)
|
|
3130
3130
|
frt_is_skip_vints(stde->prx_in, stde->freq);
|
3131
3131
|
}
|
3132
3132
|
|
3133
|
-
static void stpe_seek_prox(FrtSegmentTermDocEnum *stde,
|
3133
|
+
static void stpe_seek_prox(FrtSegmentTermDocEnum *stde, frt_off_t prx_ptr)
|
3134
3134
|
{
|
3135
3135
|
frt_is_seek(stde->prx_in, prx_ptr);
|
3136
3136
|
stde->prx_cnt = 0;
|
@@ -5108,8 +5108,8 @@ typedef struct SkipBuffer
|
|
5108
5108
|
FrtOutStream *frq_out;
|
5109
5109
|
FrtOutStream *prx_out;
|
5110
5110
|
int last_doc;
|
5111
|
-
|
5112
|
-
|
5111
|
+
frt_off_t last_frq_ptr;
|
5112
|
+
frt_off_t last_prx_ptr;
|
5113
5113
|
} SkipBuffer;
|
5114
5114
|
|
5115
5115
|
static void skip_buf_reset(SkipBuffer *skip_buf)
|
@@ -5131,8 +5131,8 @@ static SkipBuffer *skip_buf_new(FrtOutStream *frq_out, FrtOutStream *prx_out)
|
|
5131
5131
|
|
5132
5132
|
static void skip_buf_add(SkipBuffer *skip_buf, int doc)
|
5133
5133
|
{
|
5134
|
-
|
5135
|
-
|
5134
|
+
frt_off_t frq_ptr = frt_os_pos(skip_buf->frq_out);
|
5135
|
+
frt_off_t prx_ptr = frt_os_pos(skip_buf->prx_out);
|
5136
5136
|
|
5137
5137
|
frt_os_write_vint(skip_buf->buf, doc - skip_buf->last_doc);
|
5138
5138
|
frt_os_write_vint(skip_buf->buf, frq_ptr - skip_buf->last_frq_ptr);
|
@@ -5143,9 +5143,9 @@ static void skip_buf_add(SkipBuffer *skip_buf, int doc)
|
|
5143
5143
|
skip_buf->last_prx_ptr = prx_ptr;
|
5144
5144
|
}
|
5145
5145
|
|
5146
|
-
static
|
5146
|
+
static frt_off_t skip_buf_write(SkipBuffer *skip_buf)
|
5147
5147
|
{
|
5148
|
-
|
5148
|
+
frt_off_t skip_ptr = frt_os_pos(skip_buf->frq_out);
|
5149
5149
|
frt_ramo_write_to(skip_buf->buf, skip_buf->frq_out);
|
5150
5150
|
return skip_ptr;
|
5151
5151
|
}
|
@@ -5378,7 +5378,7 @@ static void dw_add_posting(FrtMemoryPool *mp,
|
|
5378
5378
|
}
|
5379
5379
|
}
|
5380
5380
|
|
5381
|
-
static void dw_add_offsets(FrtDocWriter *dw, int pos,
|
5381
|
+
static void dw_add_offsets(FrtDocWriter *dw, int pos, frt_off_t start, frt_off_t end)
|
5382
5382
|
{
|
5383
5383
|
if (pos >= dw->offsets_capa) {
|
5384
5384
|
int old_capa = dw->offsets_capa;
|
@@ -5402,7 +5402,7 @@ FrtHash *frt_dw_invert_field(FrtDocWriter *dw, FrtFieldInverter *fld_inv, FrtDoc
|
|
5402
5402
|
int doc_num = dw->doc_num;
|
5403
5403
|
int i;
|
5404
5404
|
const int df_size = df->size;
|
5405
|
-
|
5405
|
+
frt_off_t start_offset = 0;
|
5406
5406
|
|
5407
5407
|
if (fld_inv->is_tokenized) {
|
5408
5408
|
FrtToken *tk;
|
@@ -5701,7 +5701,7 @@ static void sm_destroy(SegmentMerger *sm)
|
|
5701
5701
|
static void sm_merge_fields(SegmentMerger *sm)
|
5702
5702
|
{
|
5703
5703
|
int i, j;
|
5704
|
-
|
5704
|
+
frt_off_t start, end = 0;
|
5705
5705
|
char file_name[FRT_SEGMENT_NAME_MAX_LENGTH];
|
5706
5706
|
FrtOutStream *fdt_out, *fdx_out;
|
5707
5707
|
FrtStore *store = sm->store;
|
@@ -5818,12 +5818,12 @@ static char *sm_cache_term(SegmentMerger *sm, char *term, int term_len)
|
|
5818
5818
|
static void sm_merge_term_info(SegmentMerger *sm, SegmentMergeInfo **matches,
|
5819
5819
|
int match_size)
|
5820
5820
|
{
|
5821
|
-
|
5822
|
-
|
5821
|
+
frt_off_t frq_ptr = frt_os_pos(sm->frq_out);
|
5822
|
+
frt_off_t prx_ptr = frt_os_pos(sm->prx_out);
|
5823
5823
|
|
5824
5824
|
int df = sm_append_postings(sm, matches, match_size); /* append posting data */
|
5825
5825
|
|
5826
|
-
|
5826
|
+
frt_off_t skip_ptr = skip_buf_write(sm->skip_buf);
|
5827
5827
|
|
5828
5828
|
if (df > 0) {
|
5829
5829
|
/* add an entry to the dictionary with ptrs to prox and freq files */
|
@@ -6370,7 +6370,7 @@ static void iw_cp_fields(FrtIndexWriter *iw, FrtSegmentReader *sr, const char *s
|
|
6370
6370
|
int j, data_len = 0;
|
6371
6371
|
const int field_cnt = frt_is_read_vint(fdt_in);
|
6372
6372
|
int tv_cnt;
|
6373
|
-
|
6373
|
+
frt_off_t doc_start_ptr = frt_os_pos(fdt_out);
|
6374
6374
|
|
6375
6375
|
frt_os_write_u64(fdx_out, doc_start_ptr);
|
6376
6376
|
frt_os_write_vint(fdt_out, field_cnt);
|
@@ -224,9 +224,9 @@ extern void frt_sis_put(FrtSegmentInfos *sis, FILE *stream);
|
|
224
224
|
|
225
225
|
typedef struct FrtTermInfo {
|
226
226
|
int doc_freq;
|
227
|
-
|
228
|
-
|
229
|
-
|
227
|
+
frt_off_t frq_ptr;
|
228
|
+
frt_off_t prx_ptr;
|
229
|
+
frt_off_t skip_offset;
|
230
230
|
} FrtTermInfo;
|
231
231
|
|
232
232
|
#define frt_ti_set(ti, mdf, mfp, mpp, mso) do {\
|
@@ -265,14 +265,14 @@ FrtTermInfo *frt_te_get_ti(struct FrtTermEnum *te);
|
|
265
265
|
/* FrtSegmentTermIndex */
|
266
266
|
|
267
267
|
typedef struct FrtSegmentTermIndex {
|
268
|
-
|
269
|
-
|
268
|
+
frt_off_t index_ptr;
|
269
|
+
frt_off_t ptr;
|
270
270
|
int index_cnt;
|
271
271
|
int size;
|
272
272
|
char **index_terms;
|
273
273
|
int *index_term_lens;
|
274
274
|
FrtTermInfo *index_term_infos;
|
275
|
-
|
275
|
+
frt_off_t *index_ptrs;
|
276
276
|
} FrtSegmentTermIndex;
|
277
277
|
|
278
278
|
/* FrtSegmentFieldIndex */
|
@@ -281,7 +281,7 @@ typedef struct FrtSegmentFieldIndex {
|
|
281
281
|
frt_mutex_t mutex;
|
282
282
|
int skip_interval;
|
283
283
|
int index_interval;
|
284
|
-
|
284
|
+
frt_off_t index_ptr;
|
285
285
|
FrtTermEnum *index_te;
|
286
286
|
FrtHash *field_dict;
|
287
287
|
} FrtSegmentFieldIndex;
|
@@ -349,7 +349,7 @@ typedef struct FrtTermInfosWriter {
|
|
349
349
|
int field_count;
|
350
350
|
int index_interval;
|
351
351
|
int skip_interval;
|
352
|
-
|
352
|
+
frt_off_t last_index_ptr;
|
353
353
|
FrtOutStream *tfx_out;
|
354
354
|
FrtTermWriter *tix_writer;
|
355
355
|
FrtTermWriter *tis_writer;
|
@@ -385,7 +385,7 @@ struct FrtTermDocEnum {
|
|
385
385
|
typedef struct FrtSegmentTermDocEnum FrtSegmentTermDocEnum;
|
386
386
|
struct FrtSegmentTermDocEnum {
|
387
387
|
FrtTermDocEnum tde;
|
388
|
-
void (*seek_prox)(FrtSegmentTermDocEnum *stde,
|
388
|
+
void (*seek_prox)(FrtSegmentTermDocEnum *stde, frt_off_t prx_ptr);
|
389
389
|
void (*skip_prox)(FrtSegmentTermDocEnum *stde);
|
390
390
|
FrtTermInfosReader *tir;
|
391
391
|
FrtInStream *frq_in;
|
@@ -402,9 +402,9 @@ struct FrtSegmentTermDocEnum {
|
|
402
402
|
int skip_doc;
|
403
403
|
int prx_cnt;
|
404
404
|
int position;
|
405
|
-
|
406
|
-
|
407
|
-
|
405
|
+
frt_off_t frq_ptr;
|
406
|
+
frt_off_t prx_ptr;
|
407
|
+
frt_off_t skip_ptr;
|
408
408
|
bool have_skipped : 1;
|
409
409
|
};
|
410
410
|
|
@@ -429,8 +429,8 @@ extern FrtTermDocEnum *frt_mtdpe_new(FrtIndexReader *ir, int field_num, char **t
|
|
429
429
|
****************************************************************************/
|
430
430
|
|
431
431
|
typedef struct FrtOffset {
|
432
|
-
|
433
|
-
|
432
|
+
frt_off_t start;
|
433
|
+
frt_off_t end;
|
434
434
|
} FrtOffset;
|
435
435
|
|
436
436
|
/****************************************************************************
|
@@ -529,7 +529,7 @@ extern FrtTVTerm *frt_tv_get_tv_term(FrtTermVector *tv, const char *term);
|
|
529
529
|
|
530
530
|
/* * * FrtLazyDocField * * */
|
531
531
|
typedef struct FrtLazyDocFieldData {
|
532
|
-
|
532
|
+
frt_off_t start;
|
533
533
|
int length;
|
534
534
|
rb_encoding *encoding;
|
535
535
|
FrtCompressionType compression; /* as stored */
|
@@ -595,7 +595,7 @@ typedef struct FrtFieldsWriter {
|
|
595
595
|
FrtOutStream *fdx_out;
|
596
596
|
FrtOutStream *buffer;
|
597
597
|
FrtTVField *tv_fields;
|
598
|
-
|
598
|
+
frt_off_t start_ptr;
|
599
599
|
} FrtFieldsWriter;
|
600
600
|
|
601
601
|
extern FrtFieldsWriter *frt_fw_open(FrtStore *store, const char *segment, FrtFieldInfos *fis);
|
@@ -846,8 +846,8 @@ extern void frt_iw_add_readers(FrtIndexWriter *iw, FrtIndexReader **readers, con
|
|
846
846
|
#define FRT_CW_INIT_CAPA 16
|
847
847
|
typedef struct FrtCWFileEntry {
|
848
848
|
char *name;
|
849
|
-
|
850
|
-
|
849
|
+
frt_off_t dir_offset;
|
850
|
+
frt_off_t data_offset;
|
851
851
|
} FrtCWFileEntry;
|
852
852
|
|
853
853
|
typedef struct FrtCompoundWriter {
|
@@ -136,7 +136,7 @@ static void ram_clear_all(FrtStore *store) {
|
|
136
136
|
}
|
137
137
|
}
|
138
138
|
|
139
|
-
static
|
139
|
+
static frt_off_t ram_length(FrtStore *store, const char *filename) {
|
140
140
|
FrtRAMFile *rf = (FrtRAMFile *)frt_h_get(store->dir.ht, filename);
|
141
141
|
if (rf != NULL) {
|
142
142
|
return rf->len;
|
@@ -151,7 +151,7 @@ static void ramo_flush_i(FrtOutStream *os, const frt_uchar *src, int len) {
|
|
151
151
|
FrtRAMFile *rf = os->file.rf;
|
152
152
|
int buffer_number, buffer_offset, bytes_in_buffer, bytes_to_copy;
|
153
153
|
int src_offset;
|
154
|
-
|
154
|
+
frt_off_t pointer = os->pointer;
|
155
155
|
|
156
156
|
buffer_number = (int)(pointer / FRT_BUFFER_SIZE);
|
157
157
|
buffer_offset = pointer % FRT_BUFFER_SIZE;
|
@@ -179,7 +179,7 @@ static void ramo_flush_i(FrtOutStream *os, const frt_uchar *src, int len) {
|
|
179
179
|
}
|
180
180
|
}
|
181
181
|
|
182
|
-
static void ramo_seek_i(FrtOutStream *os,
|
182
|
+
static void ramo_seek_i(FrtOutStream *os, frt_off_t pos) {
|
183
183
|
os->pointer = pos;
|
184
184
|
}
|
185
185
|
|
@@ -248,7 +248,7 @@ static void rami_read_i(FrtInStream *is, frt_uchar *b, int len) {
|
|
248
248
|
int offset = 0;
|
249
249
|
int buffer_number, buffer_offset, bytes_in_buffer, bytes_to_copy;
|
250
250
|
int remainder = len;
|
251
|
-
|
251
|
+
frt_off_t start = is->d.pointer;
|
252
252
|
frt_uchar *buffer;
|
253
253
|
|
254
254
|
while (remainder > 0) {
|
@@ -271,11 +271,11 @@ static void rami_read_i(FrtInStream *is, frt_uchar *b, int len) {
|
|
271
271
|
is->d.pointer += len;
|
272
272
|
}
|
273
273
|
|
274
|
-
static
|
274
|
+
static frt_off_t rami_length_i(FrtInStream *is) {
|
275
275
|
return is->f->file.rf->len;
|
276
276
|
}
|
277
277
|
|
278
|
-
static void rami_seek_i(FrtInStream *is,
|
278
|
+
static void rami_seek_i(FrtInStream *is, frt_off_t pos) {
|
279
279
|
is->d.pointer = pos;
|
280
280
|
}
|
281
281
|
|
@@ -125,7 +125,7 @@ off_t frt_os_pos(FrtOutStream *os)
|
|
125
125
|
return os->buf.start + os->buf.pos;
|
126
126
|
}
|
127
127
|
|
128
|
-
void frt_os_seek(FrtOutStream *os,
|
128
|
+
void frt_os_seek(FrtOutStream *os, frt_off_t new_pos)
|
129
129
|
{
|
130
130
|
frt_os_flush(os);
|
131
131
|
os->buf.start = new_pos;
|
@@ -202,9 +202,9 @@ FrtInStream *frt_is_new(void) {
|
|
202
202
|
*/
|
203
203
|
static void is_refill(FrtInStream *is)
|
204
204
|
{
|
205
|
-
|
206
|
-
|
207
|
-
|
205
|
+
frt_off_t start = is->buf.start + is->buf.pos;
|
206
|
+
frt_off_t last = start + FRT_BUFFER_SIZE;
|
207
|
+
frt_off_t flen = is->m->length_i(is);
|
208
208
|
|
209
209
|
if (last > flen) { /* don't read past EOF */
|
210
210
|
last = flen;
|
@@ -254,7 +254,7 @@ off_t frt_is_pos(FrtInStream *is)
|
|
254
254
|
frt_uchar *frt_is_read_bytes(FrtInStream *is, frt_uchar *buf, int len)
|
255
255
|
{
|
256
256
|
int i;
|
257
|
-
|
257
|
+
frt_off_t start;
|
258
258
|
|
259
259
|
if ((is->buf.pos + len) < is->buf.len) {
|
260
260
|
for (i = 0; i < len; i++) {
|
@@ -273,7 +273,7 @@ frt_uchar *frt_is_read_bytes(FrtInStream *is, frt_uchar *buf, int len)
|
|
273
273
|
return buf;
|
274
274
|
}
|
275
275
|
|
276
|
-
void frt_is_seek(FrtInStream *is,
|
276
|
+
void frt_is_seek(FrtInStream *is, frt_off_t pos) {
|
277
277
|
if (pos >= is->buf.start && pos < (is->buf.start + is->buf.len)) {
|
278
278
|
is->buf.pos = pos - is->buf.start; /* seek within buffer */
|
279
279
|
} else {
|
@@ -384,7 +384,7 @@ unsigned int frt_is_read_vint(FrtInStream *is)
|
|
384
384
|
/* optimized to use unchecked read_byte if there is definitely space */
|
385
385
|
off_t frt_is_read_voff_t(FrtInStream *is)
|
386
386
|
{
|
387
|
-
register
|
387
|
+
register frt_off_t res, b;
|
388
388
|
register int shift = 7;
|
389
389
|
|
390
390
|
if (is->buf.pos > (is->buf.len - VINT_MAX_LEN)) {
|
@@ -553,7 +553,7 @@ void frt_os_write_vint(FrtOutStream *os, register unsigned int num)
|
|
553
553
|
}
|
554
554
|
|
555
555
|
/* optimized to use an unchecked write if there is space */
|
556
|
-
void frt_os_write_voff_t(FrtOutStream *os, register
|
556
|
+
void frt_os_write_voff_t(FrtOutStream *os, register frt_off_t num)
|
557
557
|
{
|
558
558
|
if (os->buf.pos > VINT_END) {
|
559
559
|
while (num > 127) {
|
@@ -13,9 +13,9 @@
|
|
13
13
|
typedef struct FrtBuffer
|
14
14
|
{
|
15
15
|
frt_uchar buf[FRT_BUFFER_SIZE];
|
16
|
-
|
17
|
-
|
18
|
-
|
16
|
+
frt_off_t start;
|
17
|
+
frt_off_t pos;
|
18
|
+
frt_off_t len;
|
19
19
|
} FrtBuffer;
|
20
20
|
|
21
21
|
typedef struct FrtOutStream FrtOutStream;
|
@@ -38,7 +38,7 @@ struct FrtOutStreamMethods {
|
|
38
38
|
* @param pos the position to seek in the stream
|
39
39
|
* @raise FRT_IO_ERROR if there is an error seeking in the output stream
|
40
40
|
*/
|
41
|
-
void (*seek_i)(struct FrtOutStream *os,
|
41
|
+
void (*seek_i)(struct FrtOutStream *os, frt_off_t pos);
|
42
42
|
|
43
43
|
/**
|
44
44
|
* Close any resources used by the output stream +os+
|
@@ -54,7 +54,7 @@ typedef struct FrtRAMFile
|
|
54
54
|
char *name;
|
55
55
|
frt_uchar **buffers;
|
56
56
|
int bufcnt;
|
57
|
-
|
57
|
+
frt_off_t len;
|
58
58
|
_Atomic unsigned int ref_cnt;
|
59
59
|
} FrtRAMFile;
|
60
60
|
|
@@ -66,7 +66,7 @@ struct FrtOutStream
|
|
66
66
|
int fd;
|
67
67
|
FrtRAMFile *rf;
|
68
68
|
} file;
|
69
|
-
|
69
|
+
frt_off_t pointer; /* only used by RAMOut */
|
70
70
|
const struct FrtOutStreamMethods *m;
|
71
71
|
};
|
72
72
|
|
@@ -95,7 +95,7 @@ struct FrtInStreamMethods
|
|
95
95
|
* @param pos the position to seek
|
96
96
|
* @raise FRT_IO_ERROR if the seek fails
|
97
97
|
*/
|
98
|
-
void (*seek_i)(struct FrtInStream *is,
|
98
|
+
void (*seek_i)(struct FrtInStream *is, frt_off_t pos);
|
99
99
|
|
100
100
|
/**
|
101
101
|
* Returns the length of the input stream +is+
|
@@ -103,7 +103,7 @@ struct FrtInStreamMethods
|
|
103
103
|
* @param is self
|
104
104
|
* @raise FRT_IO_ERROR if there is an error getting the file length
|
105
105
|
*/
|
106
|
-
|
106
|
+
frt_off_t (*length_i)(struct FrtInStream *is);
|
107
107
|
|
108
108
|
/**
|
109
109
|
* Close the resources allocated to the inputstream +is+
|
@@ -126,7 +126,7 @@ struct FrtInStream {
|
|
126
126
|
FrtBuffer buf;
|
127
127
|
struct FrtInStreamFile *f;
|
128
128
|
union {
|
129
|
-
|
129
|
+
frt_off_t pointer; /* only used by RAMIn */
|
130
130
|
char *path; /* only used by FSIn */
|
131
131
|
FrtCompoundInStream *cis;
|
132
132
|
} d;
|
@@ -137,8 +137,8 @@ struct FrtInStream {
|
|
137
137
|
struct FrtCompoundInStream
|
138
138
|
{
|
139
139
|
FrtInStream *sub;
|
140
|
-
|
141
|
-
|
140
|
+
frt_off_t offset;
|
141
|
+
frt_off_t length;
|
142
142
|
};
|
143
143
|
|
144
144
|
#define frt_is_length(mis) mis->m->length_i(mis)
|
@@ -274,7 +274,7 @@ struct FrtStore {
|
|
274
274
|
* @return the length of the file in bytes
|
275
275
|
* @raise FRT_IO_ERROR if there is an error checking the file length
|
276
276
|
*/
|
277
|
-
|
277
|
+
frt_off_t (*length)(FrtStore *store, const char *filename);
|
278
278
|
|
279
279
|
/**
|
280
280
|
* Allocate the resources needed for the output stream in the +store+ with
|
@@ -465,7 +465,7 @@ extern void frt_os_close(FrtOutStream *os);
|
|
465
465
|
* @param os the FrtOutStream to get the position from
|
466
466
|
* @return the current position in FrtOutStream +os+
|
467
467
|
*/
|
468
|
-
extern
|
468
|
+
extern frt_off_t frt_os_pos(FrtOutStream *os);
|
469
469
|
|
470
470
|
/**
|
471
471
|
* Set the current position in FrtOutStream +os+.
|
@@ -474,7 +474,7 @@ extern off_t frt_os_pos(FrtOutStream *os);
|
|
474
474
|
* @param pos the new position in the FrtOutStream
|
475
475
|
* @raise FRT_IO_ERROR if there is a file-system IO error seeking the file
|
476
476
|
*/
|
477
|
-
extern void frt_os_seek(FrtOutStream *os,
|
477
|
+
extern void frt_os_seek(FrtOutStream *os, frt_off_t new_pos);
|
478
478
|
|
479
479
|
/**
|
480
480
|
* Write a single byte +b+ to the FrtOutStream +os+
|
@@ -541,14 +541,14 @@ extern void frt_os_write_u64(FrtOutStream *os, frt_u64 num);
|
|
541
541
|
extern void frt_os_write_vint(FrtOutStream *os, register unsigned int num);
|
542
542
|
|
543
543
|
/**
|
544
|
-
* Write an unsigned
|
544
|
+
* Write an unsigned frt_off_t to FrtOutStream in compressed VINT format.
|
545
545
|
* TODO: describe VINT format
|
546
546
|
*
|
547
547
|
* @param os FrtOutStream to write to
|
548
|
-
* @param num the
|
548
|
+
* @param num the frt_off_t to write
|
549
549
|
* @raise FRT_IO_ERROR if there is an error writing to the file-system
|
550
550
|
*/
|
551
|
-
extern void frt_os_write_voff_t(FrtOutStream *os, register
|
551
|
+
extern void frt_os_write_voff_t(FrtOutStream *os, register frt_off_t num);
|
552
552
|
|
553
553
|
/**
|
554
554
|
* Write an unsigned 64bit int to FrtOutStream in compressed VINT format.
|
@@ -591,7 +591,7 @@ extern void frt_os_write_string(FrtOutStream *os, const char *str);
|
|
591
591
|
* @param is the FrtInStream to get the current position from
|
592
592
|
* @return the current position within the FrtInStream +is+
|
593
593
|
*/
|
594
|
-
extern
|
594
|
+
extern frt_off_t frt_is_pos(FrtInStream *is);
|
595
595
|
|
596
596
|
/**
|
597
597
|
* Set the current position in FrtInStream +is+ to +pos+.
|
@@ -601,7 +601,7 @@ extern off_t frt_is_pos(FrtInStream *is);
|
|
601
601
|
* @raise FRT_IO_ERROR if there is a error seeking from the file-system
|
602
602
|
* @raise FRT_EOF_ERROR if there is an attempt to seek past the end of the file
|
603
603
|
*/
|
604
|
-
extern void frt_is_seek(FrtInStream *is,
|
604
|
+
extern void frt_is_seek(FrtInStream *is, frt_off_t pos);
|
605
605
|
|
606
606
|
/**
|
607
607
|
* Close the FrtInStream freeing all allocated resources.
|
@@ -705,15 +705,15 @@ extern unsigned int frt_is_read_vint(FrtInStream *is);
|
|
705
705
|
extern void frt_is_skip_vints(FrtInStream *is, register int cnt);
|
706
706
|
|
707
707
|
/**
|
708
|
-
* Read a compressed (VINT) unsigned
|
708
|
+
* Read a compressed (VINT) unsigned frt_off_t from the FrtInStream.
|
709
709
|
* TODO: describe VINT format
|
710
710
|
*
|
711
711
|
* @param is the FrtInStream to read from
|
712
|
-
* @return a
|
712
|
+
* @return a frt_off_t
|
713
713
|
* @raise FRT_IO_ERROR if there is a error reading from the file-system
|
714
714
|
* @raise FRT_EOF_ERROR if there is an attempt to read past the end of the file
|
715
715
|
*/
|
716
|
-
extern
|
716
|
+
extern frt_off_t frt_is_read_voff_t(FrtInStream *is);
|
717
717
|
|
718
718
|
/**
|
719
719
|
* Read a compressed (VINT) unsigned 64bit int from the FrtInStream.
|
@@ -998,7 +998,7 @@ static void test_simulated_crashed_writer(TestCase *tc, void *data)
|
|
998
998
|
{
|
999
999
|
int i;
|
1000
1000
|
long gen;
|
1001
|
-
|
1001
|
+
frt_off_t length;
|
1002
1002
|
FrtStore *store = (FrtStore *)data;
|
1003
1003
|
FrtIndexWriter *iw;
|
1004
1004
|
FrtIndexReader *ir;
|
@@ -1059,7 +1059,7 @@ static void test_simulated_corrupt_index1(TestCase *tc, void *data)
|
|
1059
1059
|
{
|
1060
1060
|
int i;
|
1061
1061
|
long gen;
|
1062
|
-
|
1062
|
+
frt_off_t length;
|
1063
1063
|
FrtStore *store = (FrtStore *)data;
|
1064
1064
|
FrtIndexWriter *iw;
|
1065
1065
|
FrtIndexReader *ir;
|
metadata
CHANGED
@@ -1,15 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: isomorfeus-ferret
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.13.
|
4
|
+
version: 0.13.10
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jan Biedermann
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-05-
|
11
|
+
date: 2022-05-07 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: oj
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - ">="
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '0'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - ">="
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '0'
|
13
27
|
- !ruby/object:Gem::Dependency
|
14
28
|
name: rake
|
15
29
|
requirement: !ruby/object:Gem::Requirement
|