ferret 0.11.4 → 0.11.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +1 -0
- data/TUTORIAL +3 -3
- data/ext/analysis.c +12 -9
- data/ext/array.c +10 -10
- data/ext/array.h +8 -1
- data/ext/bitvector.c +2 -2
- data/ext/except.c +1 -1
- data/ext/ferret.c +2 -2
- data/ext/ferret.h +1 -1
- data/ext/fs_store.c +13 -2
- data/ext/global.c +4 -4
- data/ext/global.h +6 -0
- data/ext/hash.c +1 -1
- data/ext/helper.c +1 -1
- data/ext/helper.h +1 -1
- data/ext/index.c +48 -22
- data/ext/index.h +17 -16
- data/ext/mempool.c +4 -1
- data/ext/mempool.h +1 -1
- data/ext/multimapper.c +2 -2
- data/ext/q_fuzzy.c +2 -2
- data/ext/q_multi_term.c +2 -2
- data/ext/q_parser.c +39 -8
- data/ext/q_range.c +32 -1
- data/ext/r_analysis.c +66 -28
- data/ext/r_index.c +18 -19
- data/ext/r_qparser.c +21 -6
- data/ext/r_search.c +74 -49
- data/ext/r_store.c +1 -1
- data/ext/r_utils.c +17 -17
- data/ext/search.c +10 -5
- data/ext/search.h +3 -1
- data/ext/sort.c +2 -2
- data/ext/stopwords.c +23 -34
- data/ext/store.c +9 -9
- data/ext/store.h +5 -4
- data/lib/ferret/document.rb +2 -2
- data/lib/ferret/field_infos.rb +37 -35
- data/lib/ferret/index.rb +16 -6
- data/lib/ferret/number_tools.rb +2 -2
- data/lib/ferret_version.rb +1 -1
- data/test/unit/analysis/tc_token_stream.rb +40 -0
- data/test/unit/index/tc_index.rb +64 -101
- data/test/unit/index/tc_index_reader.rb +13 -0
- data/test/unit/largefile/tc_largefile.rb +46 -0
- data/test/unit/query_parser/tc_query_parser.rb +17 -1
- data/test/unit/search/tc_multiple_search_requests.rb +58 -0
- data/test/unit/search/tm_searcher.rb +27 -1
- data/test/unit/ts_largefile.rb +4 -0
- metadata +147 -144
data/Rakefile
CHANGED
@@ -55,6 +55,7 @@ task :valgrind do
|
|
55
55
|
"--leak-check=yes --show-reachable=yes -v ruby test/test_all.rb"
|
56
56
|
#sh "valgrind --suppressions=ferret_valgrind.supp " +
|
57
57
|
# "--leak-check=yes --show-reachable=yes -v ruby test/unit/index/tc_index_reader.rb"
|
58
|
+
#valgrind --gen-suppressions=yes --suppressions=ferret_valgrind.supp --leak-check=yes --show-reachable=yes -v ruby test/test_all.rb
|
58
59
|
end
|
59
60
|
|
60
61
|
task :default => :test_all
|
data/TUTORIAL
CHANGED
@@ -116,7 +116,7 @@ when printing to the console:
|
|
116
116
|
puts highlights
|
117
117
|
end
|
118
118
|
|
119
|
-
And if you want to highlight a whole document, set :
|
119
|
+
And if you want to highlight a whole document, set :excerpt_length to :all:
|
120
120
|
|
121
121
|
puts index.highlight(query, doc_id,
|
122
122
|
:field => :content,
|
@@ -175,7 +175,7 @@ you change the data once it is in the index. But you can delete documents so
|
|
175
175
|
the standard way to modify data is to delete it and re-add it again with the
|
176
176
|
modifications made. It is important to note that when doing this the documents
|
177
177
|
will get a new document number so you should be careful not to use a document
|
178
|
-
number after the document has been deleted. Here is an
|
178
|
+
number after the document has been deleted. Here is an example of modifying a
|
179
179
|
document;
|
180
180
|
|
181
181
|
index << {:title => "Programing Rbuy", :content => "blah blah blah"}
|
@@ -185,7 +185,7 @@ document;
|
|
185
185
|
doc = index[doc_id]
|
186
186
|
index.delete(doc_id)
|
187
187
|
|
188
|
-
# modify doc. It is just a Hash
|
188
|
+
# modify doc. It is just a Hash after all
|
189
189
|
doc[:title] = "Programming Ruby"
|
190
190
|
|
191
191
|
index << doc
|
data/ext/analysis.c
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
#include "analysis.h"
|
2
2
|
#include "hash.h"
|
3
|
-
#include
|
3
|
+
#include "libstemmer.h"
|
4
4
|
#include <string.h>
|
5
5
|
#include <ctype.h>
|
6
6
|
#include <wctype.h>
|
@@ -12,7 +12,7 @@
|
|
12
12
|
*
|
13
13
|
****************************************************************************/
|
14
14
|
|
15
|
-
|
15
|
+
INLINE Token *tk_set(Token *tk,
|
16
16
|
char *text, int tlen, int start, int end, int pos_inc)
|
17
17
|
{
|
18
18
|
if (tlen >= MAX_WORD_SIZE) {
|
@@ -27,20 +27,20 @@ __inline Token *tk_set(Token *tk,
|
|
27
27
|
return tk;
|
28
28
|
}
|
29
29
|
|
30
|
-
|
30
|
+
INLINE Token *tk_set_ts(Token *tk,
|
31
31
|
char *start, char *end, char *text, int pos_inc)
|
32
32
|
{
|
33
33
|
return tk_set(tk, start, (int)(end - start),
|
34
34
|
(int)(start - text), (int)(end - text), pos_inc);
|
35
35
|
}
|
36
36
|
|
37
|
-
|
37
|
+
INLINE Token *tk_set_no_len(Token *tk,
|
38
38
|
char *text, int start, int end, int pos_inc)
|
39
39
|
{
|
40
40
|
return tk_set(tk, text, (int)strlen(text), start, end, pos_inc);
|
41
41
|
}
|
42
42
|
|
43
|
-
|
43
|
+
INLINE Token *w_tk_set(Token *tk, wchar_t *text, int start, int end,
|
44
44
|
int pos_inc)
|
45
45
|
{
|
46
46
|
int len = wcstombs(tk->text, text, MAX_WORD_SIZE - 1);
|
@@ -152,7 +152,7 @@ static TokenStream *cts_new()
|
|
152
152
|
|
153
153
|
#define MBTS(token_stream) ((MultiByteTokenStream *)(token_stream))
|
154
154
|
|
155
|
-
|
155
|
+
INLINE int mb_next_char(wchar_t *wchr, const char *s, mbstate_t *state)
|
156
156
|
{
|
157
157
|
int num_bytes;
|
158
158
|
if ((num_bytes = (int)mbrtowc(wchr, s, MB_CUR_MAX, state)) < 0) {
|
@@ -830,7 +830,7 @@ static bool mb_std_advance_to_start(TokenStream *ts)
|
|
830
830
|
|
831
831
|
i = mb_next_char(&wchr, ts->t, &state);
|
832
832
|
|
833
|
-
while (wchr != 0 && !
|
833
|
+
while (wchr != 0 && !iswalnum(wchr)) {
|
834
834
|
if (isnumpunc(*ts->t) && isdigit(ts->t[1])) break;
|
835
835
|
ts->t += i;
|
836
836
|
i = mb_next_char(&wchr, ts->t, &state);
|
@@ -950,11 +950,14 @@ static Token *std_next(TokenStream *ts)
|
|
950
950
|
}
|
951
951
|
t++;
|
952
952
|
}
|
953
|
-
while (isurlxatpunc(t[-1])) {
|
953
|
+
while (isurlxatpunc(t[-1]) && t > ts->t) {
|
954
954
|
t--; /* strip trailing punctuation */
|
955
955
|
}
|
956
956
|
|
957
|
-
if (num_end
|
957
|
+
if (t < ts->t || (num_end != NULL && num_end < ts->t)) {
|
958
|
+
fprintf(stderr, "Warning: encoding error. Please check that you are using the correct locale for your input");
|
959
|
+
return NULL;
|
960
|
+
} else if (num_end == NULL || t > num_end) {
|
958
961
|
ts->t = t;
|
959
962
|
|
960
963
|
if (is_acronym) { /* check it is one letter followed by one '.' */
|
data/ext/array.c
CHANGED
@@ -1,35 +1,35 @@
|
|
1
1
|
#include "array.h"
|
2
|
-
#include "global.h"
|
3
2
|
#include <string.h>
|
4
3
|
|
5
|
-
#define
|
4
|
+
#define META_CNT ARY_META_CNT
|
5
|
+
#define DATA_SZ sizeof(int) * META_CNT
|
6
6
|
|
7
7
|
void **ary_new_i(int type_size, int init_capa)
|
8
8
|
{
|
9
|
-
|
9
|
+
void **ary;
|
10
10
|
if (init_capa <= 0) {
|
11
11
|
init_capa = ARY_INIT_CAPA;
|
12
12
|
}
|
13
|
-
ary = ((int *)ecalloc(DATA_SZ + init_capa * type_size));
|
14
|
-
ary
|
15
|
-
ary
|
16
|
-
return
|
13
|
+
ary = (void **)&(((int *)ecalloc(DATA_SZ + init_capa * type_size))[META_CNT]);
|
14
|
+
ary_type_size(ary) = type_size;
|
15
|
+
ary_capa(ary) = init_capa;
|
16
|
+
return ary;
|
17
17
|
}
|
18
18
|
|
19
|
-
|
19
|
+
INLINE void ary_resize_i(void ***ary, int size)
|
20
20
|
{
|
21
21
|
size++;
|
22
22
|
if (size >= ary_sz(*ary)) {
|
23
23
|
int capa = ary_capa(*ary);
|
24
24
|
if (size >= capa) {
|
25
|
-
int *ary_start = &((int *)*ary)[-
|
25
|
+
int *ary_start = &((int *)*ary)[-META_CNT];
|
26
26
|
while (size >= capa) {
|
27
27
|
capa <<= 1;
|
28
28
|
}
|
29
29
|
|
30
30
|
ary_start = (int *)erealloc(ary_start,
|
31
31
|
DATA_SZ + capa * ary_type_size(*ary));
|
32
|
-
*ary = (void **)&(ary_start[
|
32
|
+
*ary = (void **)&(ary_start[META_CNT]);
|
33
33
|
memset(((char *)*ary) + ary_type_size(*ary) * ary_sz(*ary), 0,
|
34
34
|
(capa - ary_sz(*ary)) * ary_type_size(*ary));
|
35
35
|
ary_capa(*ary) = capa;
|
data/ext/array.h
CHANGED
@@ -1,12 +1,19 @@
|
|
1
1
|
#ifndef FRT_ARRAY_H
|
2
2
|
#define FRT_ARRAY_H
|
3
|
+
#include "global.h"
|
4
|
+
|
5
|
+
#if defined POSH_OS_SOLARIS || defined POSH_OS_SUNOS
|
6
|
+
# define ARY_META_CNT 4
|
7
|
+
#else
|
8
|
+
# define ARY_META_CNT 3
|
9
|
+
#endif
|
3
10
|
|
4
11
|
#define ARY_INIT_CAPA 8
|
5
12
|
#define ary_size(ary) ary_sz(ary)
|
6
13
|
#define ary_sz(ary) (((int *)ary)[-1])
|
7
14
|
#define ary_capa(ary) (((int *)ary)[-2])
|
8
15
|
#define ary_type_size(ary) (((int *)ary)[-3])
|
9
|
-
#define ary_start(ary) ((void **)&(((int *)ary)[-
|
16
|
+
#define ary_start(ary) ((void **)&(((int *)ary)[-ARY_META_CNT]))
|
10
17
|
#define ary_free(ary) free(ary_start(ary))
|
11
18
|
|
12
19
|
#define ary_new_type_capa(type, init_capa)\
|
data/ext/bitvector.c
CHANGED
@@ -193,7 +193,7 @@ const int NUM_TRAILING_ZEROS[] = {
|
|
193
193
|
/*
|
194
194
|
* This method is highly optimized, hence the loop unrolling
|
195
195
|
*/
|
196
|
-
static
|
196
|
+
static INLINE int bv_get_1_offset(f_u32 word)
|
197
197
|
{
|
198
198
|
if (word & 0xff) {
|
199
199
|
return NUM_TRAILING_ZEROS[word & 0xff];
|
@@ -360,7 +360,7 @@ unsigned long bv_hash(BitVector *bv)
|
|
360
360
|
return hash;
|
361
361
|
}
|
362
362
|
|
363
|
-
static
|
363
|
+
static INLINE void bv_recapa(BitVector *bv, int new_capa)
|
364
364
|
{
|
365
365
|
if (bv->capa < new_capa) {
|
366
366
|
REALLOC_N(bv->bits, f_u32, new_capa);
|
data/ext/except.c
CHANGED
data/ext/ferret.c
CHANGED
@@ -192,11 +192,11 @@ frt_field(VALUE rfield)
|
|
192
192
|
/*
|
193
193
|
* Json Exportation - Loading each LazyDoc and formatting them into json
|
194
194
|
* This code is designed to get a VERY FAST json string, the goal was speed,
|
195
|
-
* not
|
195
|
+
* not sexiness.
|
196
196
|
* Jeremie 'ahFeel' BORDIER
|
197
197
|
* ahFeel@rift.Fr
|
198
198
|
*/
|
199
|
-
|
199
|
+
char *
|
200
200
|
json_concat_string(char *s, char *field)
|
201
201
|
{
|
202
202
|
*(s++) = '"';
|
data/ext/ferret.h
CHANGED
@@ -65,7 +65,7 @@ extern VALUE frt_hs_to_rb_ary(HashSet *hs);
|
|
65
65
|
extern void *frt_rb_data_ptr(VALUE val);
|
66
66
|
extern char * frt_field(VALUE rfield);
|
67
67
|
extern VALUE frt_get_term(const char *field, const char *term);
|
68
|
-
extern
|
68
|
+
extern char *json_concat_string(char *s, char *field);
|
69
69
|
extern char *rs2s(VALUE rstr);
|
70
70
|
extern char *nstrdup(VALUE rstr);
|
71
71
|
#define Frt_Make_Struct(klass)\
|
data/ext/fs_store.c
CHANGED
@@ -51,7 +51,7 @@ static void fs_touch(Store *store, char *filename)
|
|
51
51
|
int f;
|
52
52
|
char path[MAX_FILE_PATH];
|
53
53
|
join_path(path, store->dir.path, filename);
|
54
|
-
if ((f = creat(path,
|
54
|
+
if ((f = creat(path, store->file_mode)) == 0) {
|
55
55
|
RAISE(IO_ERROR, "couldn't create file %s: <%s>", path,
|
56
56
|
strerror(errno));
|
57
57
|
}
|
@@ -257,7 +257,7 @@ static OutStream *fs_new_output(Store *store, const char *filename)
|
|
257
257
|
{
|
258
258
|
char path[MAX_FILE_PATH];
|
259
259
|
int fd = open(join_path(path, store->dir.path, filename),
|
260
|
-
O_WRONLY | O_CREAT | O_BINARY,
|
260
|
+
O_WRONLY | O_CREAT | O_BINARY, store->file_mode);
|
261
261
|
OutStream *os;
|
262
262
|
if (fd < 0) {
|
263
263
|
RAISE(IO_ERROR, "couldn't create OutStream %s: <%s>",
|
@@ -430,8 +430,19 @@ static void fs_close_i(Store *store)
|
|
430
430
|
|
431
431
|
static Store *fs_store_new(const char *pathname)
|
432
432
|
{
|
433
|
+
struct stat stt;
|
433
434
|
Store *new_store = store_new();
|
434
435
|
|
436
|
+
new_store->file_mode = S_IRUSR | S_IWUSR;
|
437
|
+
#ifndef POSH_OS_WIN32
|
438
|
+
if (!stat(pathname, &stt) && stt.st_gid == getgid()) {
|
439
|
+
if (stt.st_mode & S_IWGRP) {
|
440
|
+
umask(S_IWOTH);
|
441
|
+
}
|
442
|
+
new_store->file_mode |= stt.st_mode & (S_IRGRP | S_IWGRP);
|
443
|
+
}
|
444
|
+
#endif
|
445
|
+
|
435
446
|
new_store->dir.path = estrdup(pathname);
|
436
447
|
new_store->touch = &fs_touch;
|
437
448
|
new_store->exists = &fs_exists;
|
data/ext/global.c
CHANGED
@@ -11,22 +11,22 @@ const char *EMPTY_STRING = "";
|
|
11
11
|
|
12
12
|
bool x_do_logging = false;
|
13
13
|
|
14
|
-
|
14
|
+
INLINE int min3(int a, int b, int c)
|
15
15
|
{
|
16
16
|
return MIN3(a, b, c);
|
17
17
|
}
|
18
18
|
|
19
|
-
|
19
|
+
INLINE int min2(int a, int b)
|
20
20
|
{
|
21
21
|
return MIN(a, b);
|
22
22
|
}
|
23
23
|
|
24
|
-
|
24
|
+
INLINE int max3(int a, int b, int c)
|
25
25
|
{
|
26
26
|
return MAX3(a, b, c);
|
27
27
|
}
|
28
28
|
|
29
|
-
|
29
|
+
INLINE int max2(int a, int b)
|
30
30
|
{
|
31
31
|
return MAX(a, b);
|
32
32
|
}
|
data/ext/global.h
CHANGED
@@ -11,6 +11,12 @@
|
|
11
11
|
#define MAX_WORD_SIZE 255
|
12
12
|
#define MAX_FILE_PATH 1024
|
13
13
|
|
14
|
+
#if defined(__GNUC__)
|
15
|
+
# define INLINE __inline__
|
16
|
+
#else
|
17
|
+
# define INLINE
|
18
|
+
#endif
|
19
|
+
|
14
20
|
typedef void (*free_ft)(void *key);
|
15
21
|
|
16
22
|
#define NELEMS(array) ((int)(sizeof(array)/sizeof(array[0])))
|
data/ext/hash.c
CHANGED
@@ -61,7 +61,7 @@ typedef HashEntry *(*lookup_ft)(struct HashTable *ht, register const void *key);
|
|
61
61
|
* @param ht the HashTable to do the fast lookup in
|
62
62
|
* @param the hashkey we are looking for
|
63
63
|
*/
|
64
|
-
static
|
64
|
+
static INLINE HashEntry *h_resize_lookup(HashTable *ht,
|
65
65
|
register const unsigned long hash)
|
66
66
|
{
|
67
67
|
register unsigned long perturb;
|
data/ext/helper.c
CHANGED
data/ext/helper.h
CHANGED
data/ext/index.c
CHANGED
@@ -206,7 +206,7 @@ HashTable *co_hash_create()
|
|
206
206
|
*
|
207
207
|
****************************************************************************/
|
208
208
|
|
209
|
-
|
209
|
+
INLINE void fi_set_store(FieldInfo *fi, int store)
|
210
210
|
{
|
211
211
|
switch (store) {
|
212
212
|
case STORE_NO:
|
@@ -220,7 +220,7 @@ __inline void fi_set_store(FieldInfo *fi, int store)
|
|
220
220
|
}
|
221
221
|
}
|
222
222
|
|
223
|
-
|
223
|
+
INLINE void fi_set_index(FieldInfo *fi, int index)
|
224
224
|
{
|
225
225
|
switch (index) {
|
226
226
|
case INDEX_NO:
|
@@ -241,7 +241,7 @@ __inline void fi_set_index(FieldInfo *fi, int index)
|
|
241
241
|
}
|
242
242
|
}
|
243
243
|
|
244
|
-
|
244
|
+
INLINE void fi_set_term_vector(FieldInfo *fi, int term_vector)
|
245
245
|
{
|
246
246
|
switch (term_vector) {
|
247
247
|
case TERM_VECTOR_NO:
|
@@ -466,7 +466,7 @@ static const char *index_str[] = {
|
|
466
466
|
"",
|
467
467
|
":untokenized_omit_norms",
|
468
468
|
"",
|
469
|
-
":
|
469
|
+
":omit_norms"
|
470
470
|
};
|
471
471
|
|
472
472
|
static const char *fi_index_str(FieldInfo *fi)
|
@@ -1375,7 +1375,8 @@ LazyDoc *fr_get_lazy_doc(FieldsReader *fr, int doc_num)
|
|
1375
1375
|
lazy_doc = lazy_doc_new(stored_cnt, fdt_in);
|
1376
1376
|
|
1377
1377
|
for (i = 0; i < stored_cnt; i++) {
|
1378
|
-
|
1378
|
+
off_t start = 0, end;
|
1379
|
+
int data_cnt;
|
1379
1380
|
field_num = is_read_vint(fdt_in);
|
1380
1381
|
fi = fr->fis->fields[field_num];
|
1381
1382
|
data_cnt = is_read_vint(fdt_in);
|
@@ -1449,7 +1450,7 @@ TermVector *fr_read_term_vector(FieldsReader *fr, int field_num)
|
|
1449
1450
|
if (store_offsets) {
|
1450
1451
|
int num_positions = tv->offset_cnt = is_read_vint(fdt_in);
|
1451
1452
|
Offset *offsets = tv->offsets = ALLOC_N(Offset, num_positions);
|
1452
|
-
|
1453
|
+
off_t offset = 0;
|
1453
1454
|
for (i = 0; i < num_positions; i++) {
|
1454
1455
|
offsets[i].start = offset += is_read_vint(fdt_in);
|
1455
1456
|
offsets[i].end = offset += is_read_vint(fdt_in);
|
@@ -1567,7 +1568,7 @@ void fw_close(FieldsWriter *fw)
|
|
1567
1568
|
free(fw);
|
1568
1569
|
}
|
1569
1570
|
|
1570
|
-
static
|
1571
|
+
static INLINE void save_data(OutStream *fdt_out, char *data, int dlen)
|
1571
1572
|
{
|
1572
1573
|
os_write_vint(fdt_out, dlen);
|
1573
1574
|
os_write_bytes(fdt_out, (uchar *)data, dlen);
|
@@ -1683,8 +1684,8 @@ void fw_add_postings(FieldsWriter *fw,
|
|
1683
1684
|
int last_end = 0;
|
1684
1685
|
os_write_vint(fdt_out, offset_count); /* write shared prefix length */
|
1685
1686
|
for (i = 0; i < offset_count; i++) {
|
1686
|
-
|
1687
|
-
|
1687
|
+
off_t start = offsets[i].start;
|
1688
|
+
off_t end = offsets[i].end;
|
1688
1689
|
os_write_vint(fdt_out, start - last_end);
|
1689
1690
|
os_write_vint(fdt_out, end - start);
|
1690
1691
|
last_end = end;
|
@@ -1863,7 +1864,7 @@ void sfi_close(SegmentFieldIndex *sfi)
|
|
1863
1864
|
* SegmentTermEnum
|
1864
1865
|
****************************************************************************/
|
1865
1866
|
|
1866
|
-
static
|
1867
|
+
static INLINE int term_read(char *buf, InStream *is)
|
1867
1868
|
{
|
1868
1869
|
int start = (int)is_read_vint(is);
|
1869
1870
|
int length = (int)is_read_vint(is);
|
@@ -2297,7 +2298,7 @@ TermInfosReader *tir_open(Store *store,
|
|
2297
2298
|
return tir;
|
2298
2299
|
}
|
2299
2300
|
|
2300
|
-
static
|
2301
|
+
static INLINE TermEnum *tir_enum(TermInfosReader *tir)
|
2301
2302
|
{
|
2302
2303
|
TermEnum *te;
|
2303
2304
|
if (NULL == (te = thread_getspecific(tir->thread_te))) {
|
@@ -2423,7 +2424,7 @@ TermInfosWriter *tiw_open(Store *store,
|
|
2423
2424
|
return tiw;
|
2424
2425
|
}
|
2425
2426
|
|
2426
|
-
static
|
2427
|
+
static INLINE void tw_write_term(TermWriter *tw,
|
2427
2428
|
OutStream *os,
|
2428
2429
|
const char *term,
|
2429
2430
|
int term_len)
|
@@ -2499,7 +2500,7 @@ void tiw_add(TermInfosWriter *tiw,
|
|
2499
2500
|
tw_add(tiw->tis_writer, term, term_len, ti, tiw->skip_interval);
|
2500
2501
|
}
|
2501
2502
|
|
2502
|
-
static
|
2503
|
+
static INLINE void tw_reset(TermWriter *tw)
|
2503
2504
|
{
|
2504
2505
|
tw->counter = 0;
|
2505
2506
|
tw->last_term = EMPTY_STRING;
|
@@ -3838,7 +3839,7 @@ void ir_add_cache(IndexReader *ir)
|
|
3838
3839
|
|
3839
3840
|
bool ir_is_latest(IndexReader *ir)
|
3840
3841
|
{
|
3841
|
-
return
|
3842
|
+
return ir->is_latest_i(ir);
|
3842
3843
|
}
|
3843
3844
|
|
3844
3845
|
/****************************************************************************
|
@@ -3919,7 +3920,7 @@ typedef struct SegmentReader {
|
|
3919
3920
|
#define SR(ir) ((SegmentReader *)(ir))
|
3920
3921
|
#define SR_SIZE(ir) (SR(ir)->fr->size)
|
3921
3922
|
|
3922
|
-
static
|
3923
|
+
static INLINE FieldsReader *sr_fr(SegmentReader *sr)
|
3923
3924
|
{
|
3924
3925
|
FieldsReader *fr;
|
3925
3926
|
|
@@ -3931,12 +3932,12 @@ static __inline FieldsReader *sr_fr(SegmentReader *sr)
|
|
3931
3932
|
return fr;
|
3932
3933
|
}
|
3933
3934
|
|
3934
|
-
static
|
3935
|
+
static INLINE bool sr_is_deleted_i(SegmentReader *sr, int doc_num)
|
3935
3936
|
{
|
3936
3937
|
return (NULL != sr->deleted_docs && bv_get(sr->deleted_docs, doc_num));
|
3937
3938
|
}
|
3938
3939
|
|
3939
|
-
static
|
3940
|
+
static INLINE void sr_get_norms_into_i(SegmentReader *sr, int field_num,
|
3940
3941
|
uchar *buf)
|
3941
3942
|
{
|
3942
3943
|
Norm *norm = h_get_int(sr->norms, field_num);
|
@@ -3955,7 +3956,7 @@ static __inline void sr_get_norms_into_i(SegmentReader *sr, int field_num,
|
|
3955
3956
|
}
|
3956
3957
|
}
|
3957
3958
|
|
3958
|
-
static
|
3959
|
+
static INLINE uchar *sr_get_norms_i(SegmentReader *sr, int field_num)
|
3959
3960
|
{
|
3960
3961
|
Norm *norm = h_get_int(sr->norms, field_num);
|
3961
3962
|
if (NULL == norm) { /* not an indexed field */
|
@@ -4043,6 +4044,11 @@ static BitVector *bv_read(Store *store, char *name)
|
|
4043
4044
|
return bv;
|
4044
4045
|
}
|
4045
4046
|
|
4047
|
+
static bool sr_is_latest_i(IndexReader *ir)
|
4048
|
+
{
|
4049
|
+
return (sis_read_current_version(ir->store) == ir->sis->version);
|
4050
|
+
}
|
4051
|
+
|
4046
4052
|
static void sr_commit_i(IndexReader *ir)
|
4047
4053
|
{
|
4048
4054
|
SegmentInfo *si = SR(ir)->si;
|
@@ -4283,6 +4289,7 @@ static IndexReader *sr_setup_i(SegmentReader *sr)
|
|
4283
4289
|
ir->delete_doc_i = &sr_delete_doc_i;
|
4284
4290
|
ir->undelete_all_i = &sr_undelete_all_i;
|
4285
4291
|
ir->set_deleter_i = &sr_set_deleter_i;
|
4292
|
+
ir->is_latest_i = &sr_is_latest_i;
|
4286
4293
|
ir->commit_i = &sr_commit_i;
|
4287
4294
|
ir->close_i = &sr_close_i;
|
4288
4295
|
|
@@ -4570,6 +4577,18 @@ static void mr_set_deleter_i(IndexReader *ir, Deleter *deleter)
|
|
4570
4577
|
}
|
4571
4578
|
}
|
4572
4579
|
|
4580
|
+
static bool mr_is_latest_i(IndexReader *ir)
|
4581
|
+
{
|
4582
|
+
int i;
|
4583
|
+
const int mr_reader_cnt = MR(ir)->r_cnt;
|
4584
|
+
for (i = 0; i < mr_reader_cnt; i++) {
|
4585
|
+
if (!ir_is_latest(MR(ir)->sub_readers[i])) {
|
4586
|
+
return false;
|
4587
|
+
}
|
4588
|
+
}
|
4589
|
+
return true;
|
4590
|
+
}
|
4591
|
+
|
4573
4592
|
static void mr_commit_i(IndexReader *ir)
|
4574
4593
|
{
|
4575
4594
|
int i;
|
@@ -4639,6 +4658,7 @@ static IndexReader *mr_new(IndexReader **sub_readers, const int r_cnt)
|
|
4639
4658
|
ir->delete_doc_i = &mr_delete_doc_i;
|
4640
4659
|
ir->undelete_all_i = &mr_undelete_all_i;
|
4641
4660
|
ir->set_deleter_i = &mr_set_deleter_i;
|
4661
|
+
ir->is_latest_i = &mr_is_latest_i;
|
4642
4662
|
ir->commit_i = &mr_commit_i;
|
4643
4663
|
ir->close_i = &mr_close_i;
|
4644
4664
|
|
@@ -4799,7 +4819,7 @@ IndexReader *ir_open(Store *store)
|
|
4799
4819
|
*
|
4800
4820
|
****************************************************************************/
|
4801
4821
|
|
4802
|
-
Offset *offset_new(
|
4822
|
+
Offset *offset_new(off_t start, off_t end)
|
4803
4823
|
{
|
4804
4824
|
Offset *offset = ALLOC(Offset);
|
4805
4825
|
offset->start = start;
|
@@ -5177,7 +5197,7 @@ static void dw_add_posting(MemoryPool *mp,
|
|
5177
5197
|
}
|
5178
5198
|
}
|
5179
5199
|
|
5180
|
-
static
|
5200
|
+
static INLINE void dw_add_offsets(DocWriter *dw, int pos, int start, int end)
|
5181
5201
|
{
|
5182
5202
|
if (pos >= dw->offsets_capa) {
|
5183
5203
|
int old_capa = dw->offsets_capa;
|
@@ -5204,6 +5224,7 @@ HashTable *dw_invert_field(DocWriter *dw,
|
|
5204
5224
|
int doc_num = dw->doc_num;
|
5205
5225
|
int i;
|
5206
5226
|
const int df_size = df->size;
|
5227
|
+
off_t start_offset = 0;
|
5207
5228
|
|
5208
5229
|
if (fld_inv->is_tokenized) {
|
5209
5230
|
Token *tk;
|
@@ -5217,7 +5238,9 @@ HashTable *dw_invert_field(DocWriter *dw,
|
|
5217
5238
|
pos += tk->pos_inc;
|
5218
5239
|
dw_add_posting(mp, curr_plists, fld_plists, doc_num,
|
5219
5240
|
tk->text, tk->len, pos);
|
5220
|
-
dw_add_offsets(dw, pos,
|
5241
|
+
dw_add_offsets(dw, pos,
|
5242
|
+
start_offset + tk->start,
|
5243
|
+
start_offset + tk->end);
|
5221
5244
|
if (num_terms++ >= dw->max_field_length) {
|
5222
5245
|
break;
|
5223
5246
|
}
|
@@ -5234,6 +5257,7 @@ HashTable *dw_invert_field(DocWriter *dw,
|
|
5234
5257
|
}
|
5235
5258
|
}
|
5236
5259
|
ts_deref(ts);
|
5260
|
+
start_offset += df->lengths[i] + 1;
|
5237
5261
|
}
|
5238
5262
|
fld_inv->length = num_terms;
|
5239
5263
|
}
|
@@ -5250,8 +5274,10 @@ HashTable *dw_invert_field(DocWriter *dw,
|
|
5250
5274
|
dw_add_posting(mp, curr_plists, fld_plists, doc_num, data_ptr,
|
5251
5275
|
len, i);
|
5252
5276
|
if (store_offsets) {
|
5253
|
-
dw_add_offsets(dw, i,
|
5277
|
+
dw_add_offsets(dw, i, start_offset,
|
5278
|
+
start_offset + df->lengths[i]);
|
5254
5279
|
}
|
5280
|
+
start_offset += df->lengths[i] + 1;
|
5255
5281
|
}
|
5256
5282
|
fld_inv->length = i;
|
5257
5283
|
}
|