ferret 0.11.4 → 0.11.5
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +1 -0
- data/TUTORIAL +3 -3
- data/ext/analysis.c +12 -9
- data/ext/array.c +10 -10
- data/ext/array.h +8 -1
- data/ext/bitvector.c +2 -2
- data/ext/except.c +1 -1
- data/ext/ferret.c +2 -2
- data/ext/ferret.h +1 -1
- data/ext/fs_store.c +13 -2
- data/ext/global.c +4 -4
- data/ext/global.h +6 -0
- data/ext/hash.c +1 -1
- data/ext/helper.c +1 -1
- data/ext/helper.h +1 -1
- data/ext/index.c +48 -22
- data/ext/index.h +17 -16
- data/ext/mempool.c +4 -1
- data/ext/mempool.h +1 -1
- data/ext/multimapper.c +2 -2
- data/ext/q_fuzzy.c +2 -2
- data/ext/q_multi_term.c +2 -2
- data/ext/q_parser.c +39 -8
- data/ext/q_range.c +32 -1
- data/ext/r_analysis.c +66 -28
- data/ext/r_index.c +18 -19
- data/ext/r_qparser.c +21 -6
- data/ext/r_search.c +74 -49
- data/ext/r_store.c +1 -1
- data/ext/r_utils.c +17 -17
- data/ext/search.c +10 -5
- data/ext/search.h +3 -1
- data/ext/sort.c +2 -2
- data/ext/stopwords.c +23 -34
- data/ext/store.c +9 -9
- data/ext/store.h +5 -4
- data/lib/ferret/document.rb +2 -2
- data/lib/ferret/field_infos.rb +37 -35
- data/lib/ferret/index.rb +16 -6
- data/lib/ferret/number_tools.rb +2 -2
- data/lib/ferret_version.rb +1 -1
- data/test/unit/analysis/tc_token_stream.rb +40 -0
- data/test/unit/index/tc_index.rb +64 -101
- data/test/unit/index/tc_index_reader.rb +13 -0
- data/test/unit/largefile/tc_largefile.rb +46 -0
- data/test/unit/query_parser/tc_query_parser.rb +17 -1
- data/test/unit/search/tc_multiple_search_requests.rb +58 -0
- data/test/unit/search/tm_searcher.rb +27 -1
- data/test/unit/ts_largefile.rb +4 -0
- metadata +147 -144
data/Rakefile
CHANGED
@@ -55,6 +55,7 @@ task :valgrind do
|
|
55
55
|
"--leak-check=yes --show-reachable=yes -v ruby test/test_all.rb"
|
56
56
|
#sh "valgrind --suppressions=ferret_valgrind.supp " +
|
57
57
|
# "--leak-check=yes --show-reachable=yes -v ruby test/unit/index/tc_index_reader.rb"
|
58
|
+
#valgrind --gen-suppressions=yes --suppressions=ferret_valgrind.supp --leak-check=yes --show-reachable=yes -v ruby test/test_all.rb
|
58
59
|
end
|
59
60
|
|
60
61
|
task :default => :test_all
|
data/TUTORIAL
CHANGED
@@ -116,7 +116,7 @@ when printing to the console:
|
|
116
116
|
puts highlights
|
117
117
|
end
|
118
118
|
|
119
|
-
And if you want to highlight a whole document, set :
|
119
|
+
And if you want to highlight a whole document, set :excerpt_length to :all:
|
120
120
|
|
121
121
|
puts index.highlight(query, doc_id,
|
122
122
|
:field => :content,
|
@@ -175,7 +175,7 @@ you change the data once it is in the index. But you can delete documents so
|
|
175
175
|
the standard way to modify data is to delete it and re-add it again with the
|
176
176
|
modifications made. It is important to note that when doing this the documents
|
177
177
|
will get a new document number so you should be careful not to use a document
|
178
|
-
number after the document has been deleted. Here is an
|
178
|
+
number after the document has been deleted. Here is an example of modifying a
|
179
179
|
document;
|
180
180
|
|
181
181
|
index << {:title => "Programing Rbuy", :content => "blah blah blah"}
|
@@ -185,7 +185,7 @@ document;
|
|
185
185
|
doc = index[doc_id]
|
186
186
|
index.delete(doc_id)
|
187
187
|
|
188
|
-
# modify doc. It is just a Hash
|
188
|
+
# modify doc. It is just a Hash after all
|
189
189
|
doc[:title] = "Programming Ruby"
|
190
190
|
|
191
191
|
index << doc
|
data/ext/analysis.c
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
#include "analysis.h"
|
2
2
|
#include "hash.h"
|
3
|
-
#include
|
3
|
+
#include "libstemmer.h"
|
4
4
|
#include <string.h>
|
5
5
|
#include <ctype.h>
|
6
6
|
#include <wctype.h>
|
@@ -12,7 +12,7 @@
|
|
12
12
|
*
|
13
13
|
****************************************************************************/
|
14
14
|
|
15
|
-
|
15
|
+
INLINE Token *tk_set(Token *tk,
|
16
16
|
char *text, int tlen, int start, int end, int pos_inc)
|
17
17
|
{
|
18
18
|
if (tlen >= MAX_WORD_SIZE) {
|
@@ -27,20 +27,20 @@ __inline Token *tk_set(Token *tk,
|
|
27
27
|
return tk;
|
28
28
|
}
|
29
29
|
|
30
|
-
|
30
|
+
INLINE Token *tk_set_ts(Token *tk,
|
31
31
|
char *start, char *end, char *text, int pos_inc)
|
32
32
|
{
|
33
33
|
return tk_set(tk, start, (int)(end - start),
|
34
34
|
(int)(start - text), (int)(end - text), pos_inc);
|
35
35
|
}
|
36
36
|
|
37
|
-
|
37
|
+
INLINE Token *tk_set_no_len(Token *tk,
|
38
38
|
char *text, int start, int end, int pos_inc)
|
39
39
|
{
|
40
40
|
return tk_set(tk, text, (int)strlen(text), start, end, pos_inc);
|
41
41
|
}
|
42
42
|
|
43
|
-
|
43
|
+
INLINE Token *w_tk_set(Token *tk, wchar_t *text, int start, int end,
|
44
44
|
int pos_inc)
|
45
45
|
{
|
46
46
|
int len = wcstombs(tk->text, text, MAX_WORD_SIZE - 1);
|
@@ -152,7 +152,7 @@ static TokenStream *cts_new()
|
|
152
152
|
|
153
153
|
#define MBTS(token_stream) ((MultiByteTokenStream *)(token_stream))
|
154
154
|
|
155
|
-
|
155
|
+
INLINE int mb_next_char(wchar_t *wchr, const char *s, mbstate_t *state)
|
156
156
|
{
|
157
157
|
int num_bytes;
|
158
158
|
if ((num_bytes = (int)mbrtowc(wchr, s, MB_CUR_MAX, state)) < 0) {
|
@@ -830,7 +830,7 @@ static bool mb_std_advance_to_start(TokenStream *ts)
|
|
830
830
|
|
831
831
|
i = mb_next_char(&wchr, ts->t, &state);
|
832
832
|
|
833
|
-
while (wchr != 0 && !
|
833
|
+
while (wchr != 0 && !iswalnum(wchr)) {
|
834
834
|
if (isnumpunc(*ts->t) && isdigit(ts->t[1])) break;
|
835
835
|
ts->t += i;
|
836
836
|
i = mb_next_char(&wchr, ts->t, &state);
|
@@ -950,11 +950,14 @@ static Token *std_next(TokenStream *ts)
|
|
950
950
|
}
|
951
951
|
t++;
|
952
952
|
}
|
953
|
-
while (isurlxatpunc(t[-1])) {
|
953
|
+
while (isurlxatpunc(t[-1]) && t > ts->t) {
|
954
954
|
t--; /* strip trailing punctuation */
|
955
955
|
}
|
956
956
|
|
957
|
-
if (num_end
|
957
|
+
if (t < ts->t || (num_end != NULL && num_end < ts->t)) {
|
958
|
+
fprintf(stderr, "Warning: encoding error. Please check that you are using the correct locale for your input");
|
959
|
+
return NULL;
|
960
|
+
} else if (num_end == NULL || t > num_end) {
|
958
961
|
ts->t = t;
|
959
962
|
|
960
963
|
if (is_acronym) { /* check it is one letter followed by one '.' */
|
data/ext/array.c
CHANGED
@@ -1,35 +1,35 @@
|
|
1
1
|
#include "array.h"
|
2
|
-
#include "global.h"
|
3
2
|
#include <string.h>
|
4
3
|
|
5
|
-
#define
|
4
|
+
#define META_CNT ARY_META_CNT
|
5
|
+
#define DATA_SZ sizeof(int) * META_CNT
|
6
6
|
|
7
7
|
void **ary_new_i(int type_size, int init_capa)
|
8
8
|
{
|
9
|
-
|
9
|
+
void **ary;
|
10
10
|
if (init_capa <= 0) {
|
11
11
|
init_capa = ARY_INIT_CAPA;
|
12
12
|
}
|
13
|
-
ary = ((int *)ecalloc(DATA_SZ + init_capa * type_size));
|
14
|
-
ary
|
15
|
-
ary
|
16
|
-
return
|
13
|
+
ary = (void **)&(((int *)ecalloc(DATA_SZ + init_capa * type_size))[META_CNT]);
|
14
|
+
ary_type_size(ary) = type_size;
|
15
|
+
ary_capa(ary) = init_capa;
|
16
|
+
return ary;
|
17
17
|
}
|
18
18
|
|
19
|
-
|
19
|
+
INLINE void ary_resize_i(void ***ary, int size)
|
20
20
|
{
|
21
21
|
size++;
|
22
22
|
if (size >= ary_sz(*ary)) {
|
23
23
|
int capa = ary_capa(*ary);
|
24
24
|
if (size >= capa) {
|
25
|
-
int *ary_start = &((int *)*ary)[-
|
25
|
+
int *ary_start = &((int *)*ary)[-META_CNT];
|
26
26
|
while (size >= capa) {
|
27
27
|
capa <<= 1;
|
28
28
|
}
|
29
29
|
|
30
30
|
ary_start = (int *)erealloc(ary_start,
|
31
31
|
DATA_SZ + capa * ary_type_size(*ary));
|
32
|
-
*ary = (void **)&(ary_start[
|
32
|
+
*ary = (void **)&(ary_start[META_CNT]);
|
33
33
|
memset(((char *)*ary) + ary_type_size(*ary) * ary_sz(*ary), 0,
|
34
34
|
(capa - ary_sz(*ary)) * ary_type_size(*ary));
|
35
35
|
ary_capa(*ary) = capa;
|
data/ext/array.h
CHANGED
@@ -1,12 +1,19 @@
|
|
1
1
|
#ifndef FRT_ARRAY_H
|
2
2
|
#define FRT_ARRAY_H
|
3
|
+
#include "global.h"
|
4
|
+
|
5
|
+
#if defined POSH_OS_SOLARIS || defined POSH_OS_SUNOS
|
6
|
+
# define ARY_META_CNT 4
|
7
|
+
#else
|
8
|
+
# define ARY_META_CNT 3
|
9
|
+
#endif
|
3
10
|
|
4
11
|
#define ARY_INIT_CAPA 8
|
5
12
|
#define ary_size(ary) ary_sz(ary)
|
6
13
|
#define ary_sz(ary) (((int *)ary)[-1])
|
7
14
|
#define ary_capa(ary) (((int *)ary)[-2])
|
8
15
|
#define ary_type_size(ary) (((int *)ary)[-3])
|
9
|
-
#define ary_start(ary) ((void **)&(((int *)ary)[-
|
16
|
+
#define ary_start(ary) ((void **)&(((int *)ary)[-ARY_META_CNT]))
|
10
17
|
#define ary_free(ary) free(ary_start(ary))
|
11
18
|
|
12
19
|
#define ary_new_type_capa(type, init_capa)\
|
data/ext/bitvector.c
CHANGED
@@ -193,7 +193,7 @@ const int NUM_TRAILING_ZEROS[] = {
|
|
193
193
|
/*
|
194
194
|
* This method is highly optimized, hence the loop unrolling
|
195
195
|
*/
|
196
|
-
static
|
196
|
+
static INLINE int bv_get_1_offset(f_u32 word)
|
197
197
|
{
|
198
198
|
if (word & 0xff) {
|
199
199
|
return NUM_TRAILING_ZEROS[word & 0xff];
|
@@ -360,7 +360,7 @@ unsigned long bv_hash(BitVector *bv)
|
|
360
360
|
return hash;
|
361
361
|
}
|
362
362
|
|
363
|
-
static
|
363
|
+
static INLINE void bv_recapa(BitVector *bv, int new_capa)
|
364
364
|
{
|
365
365
|
if (bv->capa < new_capa) {
|
366
366
|
REALLOC_N(bv->bits, f_u32, new_capa);
|
data/ext/except.c
CHANGED
data/ext/ferret.c
CHANGED
@@ -192,11 +192,11 @@ frt_field(VALUE rfield)
|
|
192
192
|
/*
|
193
193
|
* Json Exportation - Loading each LazyDoc and formatting them into json
|
194
194
|
* This code is designed to get a VERY FAST json string, the goal was speed,
|
195
|
-
* not
|
195
|
+
* not sexiness.
|
196
196
|
* Jeremie 'ahFeel' BORDIER
|
197
197
|
* ahFeel@rift.Fr
|
198
198
|
*/
|
199
|
-
|
199
|
+
char *
|
200
200
|
json_concat_string(char *s, char *field)
|
201
201
|
{
|
202
202
|
*(s++) = '"';
|
data/ext/ferret.h
CHANGED
@@ -65,7 +65,7 @@ extern VALUE frt_hs_to_rb_ary(HashSet *hs);
|
|
65
65
|
extern void *frt_rb_data_ptr(VALUE val);
|
66
66
|
extern char * frt_field(VALUE rfield);
|
67
67
|
extern VALUE frt_get_term(const char *field, const char *term);
|
68
|
-
extern
|
68
|
+
extern char *json_concat_string(char *s, char *field);
|
69
69
|
extern char *rs2s(VALUE rstr);
|
70
70
|
extern char *nstrdup(VALUE rstr);
|
71
71
|
#define Frt_Make_Struct(klass)\
|
data/ext/fs_store.c
CHANGED
@@ -51,7 +51,7 @@ static void fs_touch(Store *store, char *filename)
|
|
51
51
|
int f;
|
52
52
|
char path[MAX_FILE_PATH];
|
53
53
|
join_path(path, store->dir.path, filename);
|
54
|
-
if ((f = creat(path,
|
54
|
+
if ((f = creat(path, store->file_mode)) == 0) {
|
55
55
|
RAISE(IO_ERROR, "couldn't create file %s: <%s>", path,
|
56
56
|
strerror(errno));
|
57
57
|
}
|
@@ -257,7 +257,7 @@ static OutStream *fs_new_output(Store *store, const char *filename)
|
|
257
257
|
{
|
258
258
|
char path[MAX_FILE_PATH];
|
259
259
|
int fd = open(join_path(path, store->dir.path, filename),
|
260
|
-
O_WRONLY | O_CREAT | O_BINARY,
|
260
|
+
O_WRONLY | O_CREAT | O_BINARY, store->file_mode);
|
261
261
|
OutStream *os;
|
262
262
|
if (fd < 0) {
|
263
263
|
RAISE(IO_ERROR, "couldn't create OutStream %s: <%s>",
|
@@ -430,8 +430,19 @@ static void fs_close_i(Store *store)
|
|
430
430
|
|
431
431
|
static Store *fs_store_new(const char *pathname)
|
432
432
|
{
|
433
|
+
struct stat stt;
|
433
434
|
Store *new_store = store_new();
|
434
435
|
|
436
|
+
new_store->file_mode = S_IRUSR | S_IWUSR;
|
437
|
+
#ifndef POSH_OS_WIN32
|
438
|
+
if (!stat(pathname, &stt) && stt.st_gid == getgid()) {
|
439
|
+
if (stt.st_mode & S_IWGRP) {
|
440
|
+
umask(S_IWOTH);
|
441
|
+
}
|
442
|
+
new_store->file_mode |= stt.st_mode & (S_IRGRP | S_IWGRP);
|
443
|
+
}
|
444
|
+
#endif
|
445
|
+
|
435
446
|
new_store->dir.path = estrdup(pathname);
|
436
447
|
new_store->touch = &fs_touch;
|
437
448
|
new_store->exists = &fs_exists;
|
data/ext/global.c
CHANGED
@@ -11,22 +11,22 @@ const char *EMPTY_STRING = "";
|
|
11
11
|
|
12
12
|
bool x_do_logging = false;
|
13
13
|
|
14
|
-
|
14
|
+
INLINE int min3(int a, int b, int c)
|
15
15
|
{
|
16
16
|
return MIN3(a, b, c);
|
17
17
|
}
|
18
18
|
|
19
|
-
|
19
|
+
INLINE int min2(int a, int b)
|
20
20
|
{
|
21
21
|
return MIN(a, b);
|
22
22
|
}
|
23
23
|
|
24
|
-
|
24
|
+
INLINE int max3(int a, int b, int c)
|
25
25
|
{
|
26
26
|
return MAX3(a, b, c);
|
27
27
|
}
|
28
28
|
|
29
|
-
|
29
|
+
INLINE int max2(int a, int b)
|
30
30
|
{
|
31
31
|
return MAX(a, b);
|
32
32
|
}
|
data/ext/global.h
CHANGED
@@ -11,6 +11,12 @@
|
|
11
11
|
#define MAX_WORD_SIZE 255
|
12
12
|
#define MAX_FILE_PATH 1024
|
13
13
|
|
14
|
+
#if defined(__GNUC__)
|
15
|
+
# define INLINE __inline__
|
16
|
+
#else
|
17
|
+
# define INLINE
|
18
|
+
#endif
|
19
|
+
|
14
20
|
typedef void (*free_ft)(void *key);
|
15
21
|
|
16
22
|
#define NELEMS(array) ((int)(sizeof(array)/sizeof(array[0])))
|
data/ext/hash.c
CHANGED
@@ -61,7 +61,7 @@ typedef HashEntry *(*lookup_ft)(struct HashTable *ht, register const void *key);
|
|
61
61
|
* @param ht the HashTable to do the fast lookup in
|
62
62
|
* @param the hashkey we are looking for
|
63
63
|
*/
|
64
|
-
static
|
64
|
+
static INLINE HashEntry *h_resize_lookup(HashTable *ht,
|
65
65
|
register const unsigned long hash)
|
66
66
|
{
|
67
67
|
register unsigned long perturb;
|
data/ext/helper.c
CHANGED
data/ext/helper.h
CHANGED
data/ext/index.c
CHANGED
@@ -206,7 +206,7 @@ HashTable *co_hash_create()
|
|
206
206
|
*
|
207
207
|
****************************************************************************/
|
208
208
|
|
209
|
-
|
209
|
+
INLINE void fi_set_store(FieldInfo *fi, int store)
|
210
210
|
{
|
211
211
|
switch (store) {
|
212
212
|
case STORE_NO:
|
@@ -220,7 +220,7 @@ __inline void fi_set_store(FieldInfo *fi, int store)
|
|
220
220
|
}
|
221
221
|
}
|
222
222
|
|
223
|
-
|
223
|
+
INLINE void fi_set_index(FieldInfo *fi, int index)
|
224
224
|
{
|
225
225
|
switch (index) {
|
226
226
|
case INDEX_NO:
|
@@ -241,7 +241,7 @@ __inline void fi_set_index(FieldInfo *fi, int index)
|
|
241
241
|
}
|
242
242
|
}
|
243
243
|
|
244
|
-
|
244
|
+
INLINE void fi_set_term_vector(FieldInfo *fi, int term_vector)
|
245
245
|
{
|
246
246
|
switch (term_vector) {
|
247
247
|
case TERM_VECTOR_NO:
|
@@ -466,7 +466,7 @@ static const char *index_str[] = {
|
|
466
466
|
"",
|
467
467
|
":untokenized_omit_norms",
|
468
468
|
"",
|
469
|
-
":
|
469
|
+
":omit_norms"
|
470
470
|
};
|
471
471
|
|
472
472
|
static const char *fi_index_str(FieldInfo *fi)
|
@@ -1375,7 +1375,8 @@ LazyDoc *fr_get_lazy_doc(FieldsReader *fr, int doc_num)
|
|
1375
1375
|
lazy_doc = lazy_doc_new(stored_cnt, fdt_in);
|
1376
1376
|
|
1377
1377
|
for (i = 0; i < stored_cnt; i++) {
|
1378
|
-
|
1378
|
+
off_t start = 0, end;
|
1379
|
+
int data_cnt;
|
1379
1380
|
field_num = is_read_vint(fdt_in);
|
1380
1381
|
fi = fr->fis->fields[field_num];
|
1381
1382
|
data_cnt = is_read_vint(fdt_in);
|
@@ -1449,7 +1450,7 @@ TermVector *fr_read_term_vector(FieldsReader *fr, int field_num)
|
|
1449
1450
|
if (store_offsets) {
|
1450
1451
|
int num_positions = tv->offset_cnt = is_read_vint(fdt_in);
|
1451
1452
|
Offset *offsets = tv->offsets = ALLOC_N(Offset, num_positions);
|
1452
|
-
|
1453
|
+
off_t offset = 0;
|
1453
1454
|
for (i = 0; i < num_positions; i++) {
|
1454
1455
|
offsets[i].start = offset += is_read_vint(fdt_in);
|
1455
1456
|
offsets[i].end = offset += is_read_vint(fdt_in);
|
@@ -1567,7 +1568,7 @@ void fw_close(FieldsWriter *fw)
|
|
1567
1568
|
free(fw);
|
1568
1569
|
}
|
1569
1570
|
|
1570
|
-
static
|
1571
|
+
static INLINE void save_data(OutStream *fdt_out, char *data, int dlen)
|
1571
1572
|
{
|
1572
1573
|
os_write_vint(fdt_out, dlen);
|
1573
1574
|
os_write_bytes(fdt_out, (uchar *)data, dlen);
|
@@ -1683,8 +1684,8 @@ void fw_add_postings(FieldsWriter *fw,
|
|
1683
1684
|
int last_end = 0;
|
1684
1685
|
os_write_vint(fdt_out, offset_count); /* write shared prefix length */
|
1685
1686
|
for (i = 0; i < offset_count; i++) {
|
1686
|
-
|
1687
|
-
|
1687
|
+
off_t start = offsets[i].start;
|
1688
|
+
off_t end = offsets[i].end;
|
1688
1689
|
os_write_vint(fdt_out, start - last_end);
|
1689
1690
|
os_write_vint(fdt_out, end - start);
|
1690
1691
|
last_end = end;
|
@@ -1863,7 +1864,7 @@ void sfi_close(SegmentFieldIndex *sfi)
|
|
1863
1864
|
* SegmentTermEnum
|
1864
1865
|
****************************************************************************/
|
1865
1866
|
|
1866
|
-
static
|
1867
|
+
static INLINE int term_read(char *buf, InStream *is)
|
1867
1868
|
{
|
1868
1869
|
int start = (int)is_read_vint(is);
|
1869
1870
|
int length = (int)is_read_vint(is);
|
@@ -2297,7 +2298,7 @@ TermInfosReader *tir_open(Store *store,
|
|
2297
2298
|
return tir;
|
2298
2299
|
}
|
2299
2300
|
|
2300
|
-
static
|
2301
|
+
static INLINE TermEnum *tir_enum(TermInfosReader *tir)
|
2301
2302
|
{
|
2302
2303
|
TermEnum *te;
|
2303
2304
|
if (NULL == (te = thread_getspecific(tir->thread_te))) {
|
@@ -2423,7 +2424,7 @@ TermInfosWriter *tiw_open(Store *store,
|
|
2423
2424
|
return tiw;
|
2424
2425
|
}
|
2425
2426
|
|
2426
|
-
static
|
2427
|
+
static INLINE void tw_write_term(TermWriter *tw,
|
2427
2428
|
OutStream *os,
|
2428
2429
|
const char *term,
|
2429
2430
|
int term_len)
|
@@ -2499,7 +2500,7 @@ void tiw_add(TermInfosWriter *tiw,
|
|
2499
2500
|
tw_add(tiw->tis_writer, term, term_len, ti, tiw->skip_interval);
|
2500
2501
|
}
|
2501
2502
|
|
2502
|
-
static
|
2503
|
+
static INLINE void tw_reset(TermWriter *tw)
|
2503
2504
|
{
|
2504
2505
|
tw->counter = 0;
|
2505
2506
|
tw->last_term = EMPTY_STRING;
|
@@ -3838,7 +3839,7 @@ void ir_add_cache(IndexReader *ir)
|
|
3838
3839
|
|
3839
3840
|
bool ir_is_latest(IndexReader *ir)
|
3840
3841
|
{
|
3841
|
-
return
|
3842
|
+
return ir->is_latest_i(ir);
|
3842
3843
|
}
|
3843
3844
|
|
3844
3845
|
/****************************************************************************
|
@@ -3919,7 +3920,7 @@ typedef struct SegmentReader {
|
|
3919
3920
|
#define SR(ir) ((SegmentReader *)(ir))
|
3920
3921
|
#define SR_SIZE(ir) (SR(ir)->fr->size)
|
3921
3922
|
|
3922
|
-
static
|
3923
|
+
static INLINE FieldsReader *sr_fr(SegmentReader *sr)
|
3923
3924
|
{
|
3924
3925
|
FieldsReader *fr;
|
3925
3926
|
|
@@ -3931,12 +3932,12 @@ static __inline FieldsReader *sr_fr(SegmentReader *sr)
|
|
3931
3932
|
return fr;
|
3932
3933
|
}
|
3933
3934
|
|
3934
|
-
static
|
3935
|
+
static INLINE bool sr_is_deleted_i(SegmentReader *sr, int doc_num)
|
3935
3936
|
{
|
3936
3937
|
return (NULL != sr->deleted_docs && bv_get(sr->deleted_docs, doc_num));
|
3937
3938
|
}
|
3938
3939
|
|
3939
|
-
static
|
3940
|
+
static INLINE void sr_get_norms_into_i(SegmentReader *sr, int field_num,
|
3940
3941
|
uchar *buf)
|
3941
3942
|
{
|
3942
3943
|
Norm *norm = h_get_int(sr->norms, field_num);
|
@@ -3955,7 +3956,7 @@ static __inline void sr_get_norms_into_i(SegmentReader *sr, int field_num,
|
|
3955
3956
|
}
|
3956
3957
|
}
|
3957
3958
|
|
3958
|
-
static
|
3959
|
+
static INLINE uchar *sr_get_norms_i(SegmentReader *sr, int field_num)
|
3959
3960
|
{
|
3960
3961
|
Norm *norm = h_get_int(sr->norms, field_num);
|
3961
3962
|
if (NULL == norm) { /* not an indexed field */
|
@@ -4043,6 +4044,11 @@ static BitVector *bv_read(Store *store, char *name)
|
|
4043
4044
|
return bv;
|
4044
4045
|
}
|
4045
4046
|
|
4047
|
+
static bool sr_is_latest_i(IndexReader *ir)
|
4048
|
+
{
|
4049
|
+
return (sis_read_current_version(ir->store) == ir->sis->version);
|
4050
|
+
}
|
4051
|
+
|
4046
4052
|
static void sr_commit_i(IndexReader *ir)
|
4047
4053
|
{
|
4048
4054
|
SegmentInfo *si = SR(ir)->si;
|
@@ -4283,6 +4289,7 @@ static IndexReader *sr_setup_i(SegmentReader *sr)
|
|
4283
4289
|
ir->delete_doc_i = &sr_delete_doc_i;
|
4284
4290
|
ir->undelete_all_i = &sr_undelete_all_i;
|
4285
4291
|
ir->set_deleter_i = &sr_set_deleter_i;
|
4292
|
+
ir->is_latest_i = &sr_is_latest_i;
|
4286
4293
|
ir->commit_i = &sr_commit_i;
|
4287
4294
|
ir->close_i = &sr_close_i;
|
4288
4295
|
|
@@ -4570,6 +4577,18 @@ static void mr_set_deleter_i(IndexReader *ir, Deleter *deleter)
|
|
4570
4577
|
}
|
4571
4578
|
}
|
4572
4579
|
|
4580
|
+
static bool mr_is_latest_i(IndexReader *ir)
|
4581
|
+
{
|
4582
|
+
int i;
|
4583
|
+
const int mr_reader_cnt = MR(ir)->r_cnt;
|
4584
|
+
for (i = 0; i < mr_reader_cnt; i++) {
|
4585
|
+
if (!ir_is_latest(MR(ir)->sub_readers[i])) {
|
4586
|
+
return false;
|
4587
|
+
}
|
4588
|
+
}
|
4589
|
+
return true;
|
4590
|
+
}
|
4591
|
+
|
4573
4592
|
static void mr_commit_i(IndexReader *ir)
|
4574
4593
|
{
|
4575
4594
|
int i;
|
@@ -4639,6 +4658,7 @@ static IndexReader *mr_new(IndexReader **sub_readers, const int r_cnt)
|
|
4639
4658
|
ir->delete_doc_i = &mr_delete_doc_i;
|
4640
4659
|
ir->undelete_all_i = &mr_undelete_all_i;
|
4641
4660
|
ir->set_deleter_i = &mr_set_deleter_i;
|
4661
|
+
ir->is_latest_i = &mr_is_latest_i;
|
4642
4662
|
ir->commit_i = &mr_commit_i;
|
4643
4663
|
ir->close_i = &mr_close_i;
|
4644
4664
|
|
@@ -4799,7 +4819,7 @@ IndexReader *ir_open(Store *store)
|
|
4799
4819
|
*
|
4800
4820
|
****************************************************************************/
|
4801
4821
|
|
4802
|
-
Offset *offset_new(
|
4822
|
+
Offset *offset_new(off_t start, off_t end)
|
4803
4823
|
{
|
4804
4824
|
Offset *offset = ALLOC(Offset);
|
4805
4825
|
offset->start = start;
|
@@ -5177,7 +5197,7 @@ static void dw_add_posting(MemoryPool *mp,
|
|
5177
5197
|
}
|
5178
5198
|
}
|
5179
5199
|
|
5180
|
-
static
|
5200
|
+
static INLINE void dw_add_offsets(DocWriter *dw, int pos, int start, int end)
|
5181
5201
|
{
|
5182
5202
|
if (pos >= dw->offsets_capa) {
|
5183
5203
|
int old_capa = dw->offsets_capa;
|
@@ -5204,6 +5224,7 @@ HashTable *dw_invert_field(DocWriter *dw,
|
|
5204
5224
|
int doc_num = dw->doc_num;
|
5205
5225
|
int i;
|
5206
5226
|
const int df_size = df->size;
|
5227
|
+
off_t start_offset = 0;
|
5207
5228
|
|
5208
5229
|
if (fld_inv->is_tokenized) {
|
5209
5230
|
Token *tk;
|
@@ -5217,7 +5238,9 @@ HashTable *dw_invert_field(DocWriter *dw,
|
|
5217
5238
|
pos += tk->pos_inc;
|
5218
5239
|
dw_add_posting(mp, curr_plists, fld_plists, doc_num,
|
5219
5240
|
tk->text, tk->len, pos);
|
5220
|
-
dw_add_offsets(dw, pos,
|
5241
|
+
dw_add_offsets(dw, pos,
|
5242
|
+
start_offset + tk->start,
|
5243
|
+
start_offset + tk->end);
|
5221
5244
|
if (num_terms++ >= dw->max_field_length) {
|
5222
5245
|
break;
|
5223
5246
|
}
|
@@ -5234,6 +5257,7 @@ HashTable *dw_invert_field(DocWriter *dw,
|
|
5234
5257
|
}
|
5235
5258
|
}
|
5236
5259
|
ts_deref(ts);
|
5260
|
+
start_offset += df->lengths[i] + 1;
|
5237
5261
|
}
|
5238
5262
|
fld_inv->length = num_terms;
|
5239
5263
|
}
|
@@ -5250,8 +5274,10 @@ HashTable *dw_invert_field(DocWriter *dw,
|
|
5250
5274
|
dw_add_posting(mp, curr_plists, fld_plists, doc_num, data_ptr,
|
5251
5275
|
len, i);
|
5252
5276
|
if (store_offsets) {
|
5253
|
-
dw_add_offsets(dw, i,
|
5277
|
+
dw_add_offsets(dw, i, start_offset,
|
5278
|
+
start_offset + df->lengths[i]);
|
5254
5279
|
}
|
5280
|
+
start_offset += df->lengths[i] + 1;
|
5255
5281
|
}
|
5256
5282
|
fld_inv->length = i;
|
5257
5283
|
}
|