ferret 0.11.4 → 0.11.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. data/Rakefile +1 -0
  2. data/TUTORIAL +3 -3
  3. data/ext/analysis.c +12 -9
  4. data/ext/array.c +10 -10
  5. data/ext/array.h +8 -1
  6. data/ext/bitvector.c +2 -2
  7. data/ext/except.c +1 -1
  8. data/ext/ferret.c +2 -2
  9. data/ext/ferret.h +1 -1
  10. data/ext/fs_store.c +13 -2
  11. data/ext/global.c +4 -4
  12. data/ext/global.h +6 -0
  13. data/ext/hash.c +1 -1
  14. data/ext/helper.c +1 -1
  15. data/ext/helper.h +1 -1
  16. data/ext/index.c +48 -22
  17. data/ext/index.h +17 -16
  18. data/ext/mempool.c +4 -1
  19. data/ext/mempool.h +1 -1
  20. data/ext/multimapper.c +2 -2
  21. data/ext/q_fuzzy.c +2 -2
  22. data/ext/q_multi_term.c +2 -2
  23. data/ext/q_parser.c +39 -8
  24. data/ext/q_range.c +32 -1
  25. data/ext/r_analysis.c +66 -28
  26. data/ext/r_index.c +18 -19
  27. data/ext/r_qparser.c +21 -6
  28. data/ext/r_search.c +74 -49
  29. data/ext/r_store.c +1 -1
  30. data/ext/r_utils.c +17 -17
  31. data/ext/search.c +10 -5
  32. data/ext/search.h +3 -1
  33. data/ext/sort.c +2 -2
  34. data/ext/stopwords.c +23 -34
  35. data/ext/store.c +9 -9
  36. data/ext/store.h +5 -4
  37. data/lib/ferret/document.rb +2 -2
  38. data/lib/ferret/field_infos.rb +37 -35
  39. data/lib/ferret/index.rb +16 -6
  40. data/lib/ferret/number_tools.rb +2 -2
  41. data/lib/ferret_version.rb +1 -1
  42. data/test/unit/analysis/tc_token_stream.rb +40 -0
  43. data/test/unit/index/tc_index.rb +64 -101
  44. data/test/unit/index/tc_index_reader.rb +13 -0
  45. data/test/unit/largefile/tc_largefile.rb +46 -0
  46. data/test/unit/query_parser/tc_query_parser.rb +17 -1
  47. data/test/unit/search/tc_multiple_search_requests.rb +58 -0
  48. data/test/unit/search/tm_searcher.rb +27 -1
  49. data/test/unit/ts_largefile.rb +4 -0
  50. metadata +147 -144
data/Rakefile CHANGED
@@ -55,6 +55,7 @@ task :valgrind do
55
55
  "--leak-check=yes --show-reachable=yes -v ruby test/test_all.rb"
56
56
  #sh "valgrind --suppressions=ferret_valgrind.supp " +
57
57
  # "--leak-check=yes --show-reachable=yes -v ruby test/unit/index/tc_index_reader.rb"
58
+ #valgrind --gen-suppressions=yes --suppressions=ferret_valgrind.supp --leak-check=yes --show-reachable=yes -v ruby test/test_all.rb
58
59
  end
59
60
 
60
61
  task :default => :test_all
data/TUTORIAL CHANGED
@@ -116,7 +116,7 @@ when printing to the console:
116
116
  puts highlights
117
117
  end
118
118
 
119
- And if you want to highlight a whole document, set :excert_length to :all:
119
+ And if you want to highlight a whole document, set :excerpt_length to :all:
120
120
 
121
121
  puts index.highlight(query, doc_id,
122
122
  :field => :content,
@@ -175,7 +175,7 @@ you change the data once it is in the index. But you can delete documents so
175
175
  the standard way to modify data is to delete it and re-add it again with the
176
176
  modifications made. It is important to note that when doing this the documents
177
177
  will get a new document number so you should be careful not to use a document
178
- number after the document has been deleted. Here is an examle of modifying a
178
+ number after the document has been deleted. Here is an example of modifying a
179
179
  document;
180
180
 
181
181
  index << {:title => "Programing Rbuy", :content => "blah blah blah"}
@@ -185,7 +185,7 @@ document;
185
185
  doc = index[doc_id]
186
186
  index.delete(doc_id)
187
187
 
188
- # modify doc. It is just a Hash afterall
188
+ # modify doc. It is just a Hash after all
189
189
  doc[:title] = "Programming Ruby"
190
190
 
191
191
  index << doc
data/ext/analysis.c CHANGED
@@ -1,6 +1,6 @@
1
1
  #include "analysis.h"
2
2
  #include "hash.h"
3
- #include <libstemmer.h>
3
+ #include "libstemmer.h"
4
4
  #include <string.h>
5
5
  #include <ctype.h>
6
6
  #include <wctype.h>
@@ -12,7 +12,7 @@
12
12
  *
13
13
  ****************************************************************************/
14
14
 
15
- __inline Token *tk_set(Token *tk,
15
+ INLINE Token *tk_set(Token *tk,
16
16
  char *text, int tlen, int start, int end, int pos_inc)
17
17
  {
18
18
  if (tlen >= MAX_WORD_SIZE) {
@@ -27,20 +27,20 @@ __inline Token *tk_set(Token *tk,
27
27
  return tk;
28
28
  }
29
29
 
30
- __inline Token *tk_set_ts(Token *tk,
30
+ INLINE Token *tk_set_ts(Token *tk,
31
31
  char *start, char *end, char *text, int pos_inc)
32
32
  {
33
33
  return tk_set(tk, start, (int)(end - start),
34
34
  (int)(start - text), (int)(end - text), pos_inc);
35
35
  }
36
36
 
37
- __inline Token *tk_set_no_len(Token *tk,
37
+ INLINE Token *tk_set_no_len(Token *tk,
38
38
  char *text, int start, int end, int pos_inc)
39
39
  {
40
40
  return tk_set(tk, text, (int)strlen(text), start, end, pos_inc);
41
41
  }
42
42
 
43
- __inline Token *w_tk_set(Token *tk, wchar_t *text, int start, int end,
43
+ INLINE Token *w_tk_set(Token *tk, wchar_t *text, int start, int end,
44
44
  int pos_inc)
45
45
  {
46
46
  int len = wcstombs(tk->text, text, MAX_WORD_SIZE - 1);
@@ -152,7 +152,7 @@ static TokenStream *cts_new()
152
152
 
153
153
  #define MBTS(token_stream) ((MultiByteTokenStream *)(token_stream))
154
154
 
155
- __inline int mb_next_char(wchar_t *wchr, const char *s, mbstate_t *state)
155
+ INLINE int mb_next_char(wchar_t *wchr, const char *s, mbstate_t *state)
156
156
  {
157
157
  int num_bytes;
158
158
  if ((num_bytes = (int)mbrtowc(wchr, s, MB_CUR_MAX, state)) < 0) {
@@ -830,7 +830,7 @@ static bool mb_std_advance_to_start(TokenStream *ts)
830
830
 
831
831
  i = mb_next_char(&wchr, ts->t, &state);
832
832
 
833
- while (wchr != 0 && !iswalpha(wchr) && !isdigit(*(ts->t))) {
833
+ while (wchr != 0 && !iswalnum(wchr)) {
834
834
  if (isnumpunc(*ts->t) && isdigit(ts->t[1])) break;
835
835
  ts->t += i;
836
836
  i = mb_next_char(&wchr, ts->t, &state);
@@ -950,11 +950,14 @@ static Token *std_next(TokenStream *ts)
950
950
  }
951
951
  t++;
952
952
  }
953
- while (isurlxatpunc(t[-1])) {
953
+ while (isurlxatpunc(t[-1]) && t > ts->t) {
954
954
  t--; /* strip trailing punctuation */
955
955
  }
956
956
 
957
- if (num_end == NULL || t > num_end) {
957
+ if (t < ts->t || (num_end != NULL && num_end < ts->t)) {
958
+ fprintf(stderr, "Warning: encoding error. Please check that you are using the correct locale for your input");
959
+ return NULL;
960
+ } else if (num_end == NULL || t > num_end) {
958
961
  ts->t = t;
959
962
 
960
963
  if (is_acronym) { /* check it is one letter followed by one '.' */
data/ext/array.c CHANGED
@@ -1,35 +1,35 @@
1
1
  #include "array.h"
2
- #include "global.h"
3
2
  #include <string.h>
4
3
 
5
- #define DATA_SZ sizeof(int) * 3
4
+ #define META_CNT ARY_META_CNT
5
+ #define DATA_SZ sizeof(int) * META_CNT
6
6
 
7
7
  void **ary_new_i(int type_size, int init_capa)
8
8
  {
9
- int *ary;
9
+ void **ary;
10
10
  if (init_capa <= 0) {
11
11
  init_capa = ARY_INIT_CAPA;
12
12
  }
13
- ary = ((int *)ecalloc(DATA_SZ + init_capa * type_size));
14
- ary[0] = type_size;
15
- ary[1] = init_capa;
16
- return (void **)&ary[3];
13
+ ary = (void **)&(((int *)ecalloc(DATA_SZ + init_capa * type_size))[META_CNT]);
14
+ ary_type_size(ary) = type_size;
15
+ ary_capa(ary) = init_capa;
16
+ return ary;
17
17
  }
18
18
 
19
- __inline void ary_resize_i(void ***ary, int size)
19
+ INLINE void ary_resize_i(void ***ary, int size)
20
20
  {
21
21
  size++;
22
22
  if (size >= ary_sz(*ary)) {
23
23
  int capa = ary_capa(*ary);
24
24
  if (size >= capa) {
25
- int *ary_start = &((int *)*ary)[-3];
25
+ int *ary_start = &((int *)*ary)[-META_CNT];
26
26
  while (size >= capa) {
27
27
  capa <<= 1;
28
28
  }
29
29
 
30
30
  ary_start = (int *)erealloc(ary_start,
31
31
  DATA_SZ + capa * ary_type_size(*ary));
32
- *ary = (void **)&(ary_start[3]);
32
+ *ary = (void **)&(ary_start[META_CNT]);
33
33
  memset(((char *)*ary) + ary_type_size(*ary) * ary_sz(*ary), 0,
34
34
  (capa - ary_sz(*ary)) * ary_type_size(*ary));
35
35
  ary_capa(*ary) = capa;
data/ext/array.h CHANGED
@@ -1,12 +1,19 @@
1
1
  #ifndef FRT_ARRAY_H
2
2
  #define FRT_ARRAY_H
3
+ #include "global.h"
4
+
5
+ #if defined POSH_OS_SOLARIS || defined POSH_OS_SUNOS
6
+ # define ARY_META_CNT 4
7
+ #else
8
+ # define ARY_META_CNT 3
9
+ #endif
3
10
 
4
11
  #define ARY_INIT_CAPA 8
5
12
  #define ary_size(ary) ary_sz(ary)
6
13
  #define ary_sz(ary) (((int *)ary)[-1])
7
14
  #define ary_capa(ary) (((int *)ary)[-2])
8
15
  #define ary_type_size(ary) (((int *)ary)[-3])
9
- #define ary_start(ary) ((void **)&(((int *)ary)[-3]))
16
+ #define ary_start(ary) ((void **)&(((int *)ary)[-ARY_META_CNT]))
10
17
  #define ary_free(ary) free(ary_start(ary))
11
18
 
12
19
  #define ary_new_type_capa(type, init_capa)\
data/ext/bitvector.c CHANGED
@@ -193,7 +193,7 @@ const int NUM_TRAILING_ZEROS[] = {
193
193
  /*
194
194
  * This method is highly optimized, hence the loop unrolling
195
195
  */
196
- static __inline int bv_get_1_offset(f_u32 word)
196
+ static INLINE int bv_get_1_offset(f_u32 word)
197
197
  {
198
198
  if (word & 0xff) {
199
199
  return NUM_TRAILING_ZEROS[word & 0xff];
@@ -360,7 +360,7 @@ unsigned long bv_hash(BitVector *bv)
360
360
  return hash;
361
361
  }
362
362
 
363
- static __inline void bv_recapa(BitVector *bv, int new_capa)
363
+ static INLINE void bv_recapa(BitVector *bv, int new_capa)
364
364
  {
365
365
  if (bv->capa < new_capa) {
366
366
  REALLOC_N(bv->bits, f_u32, new_capa);
data/ext/except.c CHANGED
@@ -62,7 +62,7 @@ void xpush_context(xcontext_t *context)
62
62
  context->in_finally = false;
63
63
  }
64
64
 
65
- static __inline void xraise_context(xcontext_t *context,
65
+ static INLINE void xraise_context(xcontext_t *context,
66
66
  volatile int excode,
67
67
  const char *const msg)
68
68
  {
data/ext/ferret.c CHANGED
@@ -192,11 +192,11 @@ frt_field(VALUE rfield)
192
192
  /*
193
193
  * Json Exportation - Loading each LazyDoc and formatting them into json
194
194
  * This code is designed to get a VERY FAST json string, the goal was speed,
195
- * not sexyness.
195
+ * not sexiness.
196
196
  * Jeremie 'ahFeel' BORDIER
197
197
  * ahFeel@rift.Fr
198
198
  */
199
- __inline char *
199
+ char *
200
200
  json_concat_string(char *s, char *field)
201
201
  {
202
202
  *(s++) = '"';
data/ext/ferret.h CHANGED
@@ -65,7 +65,7 @@ extern VALUE frt_hs_to_rb_ary(HashSet *hs);
65
65
  extern void *frt_rb_data_ptr(VALUE val);
66
66
  extern char * frt_field(VALUE rfield);
67
67
  extern VALUE frt_get_term(const char *field, const char *term);
68
- extern __inline char *json_concat_string(char *s, char *field);
68
+ extern char *json_concat_string(char *s, char *field);
69
69
  extern char *rs2s(VALUE rstr);
70
70
  extern char *nstrdup(VALUE rstr);
71
71
  #define Frt_Make_Struct(klass)\
data/ext/fs_store.c CHANGED
@@ -51,7 +51,7 @@ static void fs_touch(Store *store, char *filename)
51
51
  int f;
52
52
  char path[MAX_FILE_PATH];
53
53
  join_path(path, store->dir.path, filename);
54
- if ((f = creat(path, S_IRUSR | S_IWUSR)) == 0) {
54
+ if ((f = creat(path, store->file_mode)) == 0) {
55
55
  RAISE(IO_ERROR, "couldn't create file %s: <%s>", path,
56
56
  strerror(errno));
57
57
  }
@@ -257,7 +257,7 @@ static OutStream *fs_new_output(Store *store, const char *filename)
257
257
  {
258
258
  char path[MAX_FILE_PATH];
259
259
  int fd = open(join_path(path, store->dir.path, filename),
260
- O_WRONLY | O_CREAT | O_BINARY, S_IRUSR | S_IWUSR);
260
+ O_WRONLY | O_CREAT | O_BINARY, store->file_mode);
261
261
  OutStream *os;
262
262
  if (fd < 0) {
263
263
  RAISE(IO_ERROR, "couldn't create OutStream %s: <%s>",
@@ -430,8 +430,19 @@ static void fs_close_i(Store *store)
430
430
 
431
431
  static Store *fs_store_new(const char *pathname)
432
432
  {
433
+ struct stat stt;
433
434
  Store *new_store = store_new();
434
435
 
436
+ new_store->file_mode = S_IRUSR | S_IWUSR;
437
+ #ifndef POSH_OS_WIN32
438
+ if (!stat(pathname, &stt) && stt.st_gid == getgid()) {
439
+ if (stt.st_mode & S_IWGRP) {
440
+ umask(S_IWOTH);
441
+ }
442
+ new_store->file_mode |= stt.st_mode & (S_IRGRP | S_IWGRP);
443
+ }
444
+ #endif
445
+
435
446
  new_store->dir.path = estrdup(pathname);
436
447
  new_store->touch = &fs_touch;
437
448
  new_store->exists = &fs_exists;
data/ext/global.c CHANGED
@@ -11,22 +11,22 @@ const char *EMPTY_STRING = "";
11
11
 
12
12
  bool x_do_logging = false;
13
13
 
14
- __inline int min3(int a, int b, int c)
14
+ INLINE int min3(int a, int b, int c)
15
15
  {
16
16
  return MIN3(a, b, c);
17
17
  }
18
18
 
19
- __inline int min2(int a, int b)
19
+ INLINE int min2(int a, int b)
20
20
  {
21
21
  return MIN(a, b);
22
22
  }
23
23
 
24
- __inline int max3(int a, int b, int c)
24
+ INLINE int max3(int a, int b, int c)
25
25
  {
26
26
  return MAX3(a, b, c);
27
27
  }
28
28
 
29
- __inline int max2(int a, int b)
29
+ INLINE int max2(int a, int b)
30
30
  {
31
31
  return MAX(a, b);
32
32
  }
data/ext/global.h CHANGED
@@ -11,6 +11,12 @@
11
11
  #define MAX_WORD_SIZE 255
12
12
  #define MAX_FILE_PATH 1024
13
13
 
14
+ #if defined(__GNUC__)
15
+ # define INLINE __inline__
16
+ #else
17
+ # define INLINE
18
+ #endif
19
+
14
20
  typedef void (*free_ft)(void *key);
15
21
 
16
22
  #define NELEMS(array) ((int)(sizeof(array)/sizeof(array[0])))
data/ext/hash.c CHANGED
@@ -61,7 +61,7 @@ typedef HashEntry *(*lookup_ft)(struct HashTable *ht, register const void *key);
61
61
  * @param ht the HashTable to do the fast lookup in
62
62
  * @param the hashkey we are looking for
63
63
  */
64
- static __inline HashEntry *h_resize_lookup(HashTable *ht,
64
+ static INLINE HashEntry *h_resize_lookup(HashTable *ht,
65
65
  register const unsigned long hash)
66
66
  {
67
67
  register unsigned long perturb;
data/ext/helper.c CHANGED
@@ -1,6 +1,6 @@
1
1
  #include "helper.h"
2
2
 
3
- __inline int hlp_string_diff(register const char *const s1,
3
+ int hlp_string_diff(register const char *const s1,
4
4
  register const char *const s2)
5
5
  {
6
6
  register int i = 0;
data/ext/helper.h CHANGED
@@ -3,7 +3,7 @@
3
3
 
4
4
  #include "config.h"
5
5
 
6
- extern __inline int hlp_string_diff(register const char *const s1,
6
+ extern int hlp_string_diff(register const char *const s1,
7
7
  register const char *const s2);
8
8
  extern f_i32 float2int(float f);
9
9
  extern float int2float(f_i32 i32);
data/ext/index.c CHANGED
@@ -206,7 +206,7 @@ HashTable *co_hash_create()
206
206
  *
207
207
  ****************************************************************************/
208
208
 
209
- __inline void fi_set_store(FieldInfo *fi, int store)
209
+ INLINE void fi_set_store(FieldInfo *fi, int store)
210
210
  {
211
211
  switch (store) {
212
212
  case STORE_NO:
@@ -220,7 +220,7 @@ __inline void fi_set_store(FieldInfo *fi, int store)
220
220
  }
221
221
  }
222
222
 
223
- __inline void fi_set_index(FieldInfo *fi, int index)
223
+ INLINE void fi_set_index(FieldInfo *fi, int index)
224
224
  {
225
225
  switch (index) {
226
226
  case INDEX_NO:
@@ -241,7 +241,7 @@ __inline void fi_set_index(FieldInfo *fi, int index)
241
241
  }
242
242
  }
243
243
 
244
- __inline void fi_set_term_vector(FieldInfo *fi, int term_vector)
244
+ INLINE void fi_set_term_vector(FieldInfo *fi, int term_vector)
245
245
  {
246
246
  switch (term_vector) {
247
247
  case TERM_VECTOR_NO:
@@ -466,7 +466,7 @@ static const char *index_str[] = {
466
466
  "",
467
467
  ":untokenized_omit_norms",
468
468
  "",
469
- ":yes_omit_norms"
469
+ ":omit_norms"
470
470
  };
471
471
 
472
472
  static const char *fi_index_str(FieldInfo *fi)
@@ -1375,7 +1375,8 @@ LazyDoc *fr_get_lazy_doc(FieldsReader *fr, int doc_num)
1375
1375
  lazy_doc = lazy_doc_new(stored_cnt, fdt_in);
1376
1376
 
1377
1377
  for (i = 0; i < stored_cnt; i++) {
1378
- int start = 0, end, data_cnt;
1378
+ off_t start = 0, end;
1379
+ int data_cnt;
1379
1380
  field_num = is_read_vint(fdt_in);
1380
1381
  fi = fr->fis->fields[field_num];
1381
1382
  data_cnt = is_read_vint(fdt_in);
@@ -1449,7 +1450,7 @@ TermVector *fr_read_term_vector(FieldsReader *fr, int field_num)
1449
1450
  if (store_offsets) {
1450
1451
  int num_positions = tv->offset_cnt = is_read_vint(fdt_in);
1451
1452
  Offset *offsets = tv->offsets = ALLOC_N(Offset, num_positions);
1452
- int offset = 0;
1453
+ off_t offset = 0;
1453
1454
  for (i = 0; i < num_positions; i++) {
1454
1455
  offsets[i].start = offset += is_read_vint(fdt_in);
1455
1456
  offsets[i].end = offset += is_read_vint(fdt_in);
@@ -1567,7 +1568,7 @@ void fw_close(FieldsWriter *fw)
1567
1568
  free(fw);
1568
1569
  }
1569
1570
 
1570
- static __inline void save_data(OutStream *fdt_out, char *data, int dlen)
1571
+ static INLINE void save_data(OutStream *fdt_out, char *data, int dlen)
1571
1572
  {
1572
1573
  os_write_vint(fdt_out, dlen);
1573
1574
  os_write_bytes(fdt_out, (uchar *)data, dlen);
@@ -1683,8 +1684,8 @@ void fw_add_postings(FieldsWriter *fw,
1683
1684
  int last_end = 0;
1684
1685
  os_write_vint(fdt_out, offset_count); /* write shared prefix length */
1685
1686
  for (i = 0; i < offset_count; i++) {
1686
- int start = offsets[i].start;
1687
- int end = offsets[i].end;
1687
+ off_t start = offsets[i].start;
1688
+ off_t end = offsets[i].end;
1688
1689
  os_write_vint(fdt_out, start - last_end);
1689
1690
  os_write_vint(fdt_out, end - start);
1690
1691
  last_end = end;
@@ -1863,7 +1864,7 @@ void sfi_close(SegmentFieldIndex *sfi)
1863
1864
  * SegmentTermEnum
1864
1865
  ****************************************************************************/
1865
1866
 
1866
- static __inline int term_read(char *buf, InStream *is)
1867
+ static INLINE int term_read(char *buf, InStream *is)
1867
1868
  {
1868
1869
  int start = (int)is_read_vint(is);
1869
1870
  int length = (int)is_read_vint(is);
@@ -2297,7 +2298,7 @@ TermInfosReader *tir_open(Store *store,
2297
2298
  return tir;
2298
2299
  }
2299
2300
 
2300
- static __inline TermEnum *tir_enum(TermInfosReader *tir)
2301
+ static INLINE TermEnum *tir_enum(TermInfosReader *tir)
2301
2302
  {
2302
2303
  TermEnum *te;
2303
2304
  if (NULL == (te = thread_getspecific(tir->thread_te))) {
@@ -2423,7 +2424,7 @@ TermInfosWriter *tiw_open(Store *store,
2423
2424
  return tiw;
2424
2425
  }
2425
2426
 
2426
- static __inline void tw_write_term(TermWriter *tw,
2427
+ static INLINE void tw_write_term(TermWriter *tw,
2427
2428
  OutStream *os,
2428
2429
  const char *term,
2429
2430
  int term_len)
@@ -2499,7 +2500,7 @@ void tiw_add(TermInfosWriter *tiw,
2499
2500
  tw_add(tiw->tis_writer, term, term_len, ti, tiw->skip_interval);
2500
2501
  }
2501
2502
 
2502
- static __inline void tw_reset(TermWriter *tw)
2503
+ static INLINE void tw_reset(TermWriter *tw)
2503
2504
  {
2504
2505
  tw->counter = 0;
2505
2506
  tw->last_term = EMPTY_STRING;
@@ -3838,7 +3839,7 @@ void ir_add_cache(IndexReader *ir)
3838
3839
 
3839
3840
  bool ir_is_latest(IndexReader *ir)
3840
3841
  {
3841
- return (sis_read_current_version(ir->store) == ir->sis->version);
3842
+ return ir->is_latest_i(ir);
3842
3843
  }
3843
3844
 
3844
3845
  /****************************************************************************
@@ -3919,7 +3920,7 @@ typedef struct SegmentReader {
3919
3920
  #define SR(ir) ((SegmentReader *)(ir))
3920
3921
  #define SR_SIZE(ir) (SR(ir)->fr->size)
3921
3922
 
3922
- static __inline FieldsReader *sr_fr(SegmentReader *sr)
3923
+ static INLINE FieldsReader *sr_fr(SegmentReader *sr)
3923
3924
  {
3924
3925
  FieldsReader *fr;
3925
3926
 
@@ -3931,12 +3932,12 @@ static __inline FieldsReader *sr_fr(SegmentReader *sr)
3931
3932
  return fr;
3932
3933
  }
3933
3934
 
3934
- static __inline bool sr_is_deleted_i(SegmentReader *sr, int doc_num)
3935
+ static INLINE bool sr_is_deleted_i(SegmentReader *sr, int doc_num)
3935
3936
  {
3936
3937
  return (NULL != sr->deleted_docs && bv_get(sr->deleted_docs, doc_num));
3937
3938
  }
3938
3939
 
3939
- static __inline void sr_get_norms_into_i(SegmentReader *sr, int field_num,
3940
+ static INLINE void sr_get_norms_into_i(SegmentReader *sr, int field_num,
3940
3941
  uchar *buf)
3941
3942
  {
3942
3943
  Norm *norm = h_get_int(sr->norms, field_num);
@@ -3955,7 +3956,7 @@ static __inline void sr_get_norms_into_i(SegmentReader *sr, int field_num,
3955
3956
  }
3956
3957
  }
3957
3958
 
3958
- static __inline uchar *sr_get_norms_i(SegmentReader *sr, int field_num)
3959
+ static INLINE uchar *sr_get_norms_i(SegmentReader *sr, int field_num)
3959
3960
  {
3960
3961
  Norm *norm = h_get_int(sr->norms, field_num);
3961
3962
  if (NULL == norm) { /* not an indexed field */
@@ -4043,6 +4044,11 @@ static BitVector *bv_read(Store *store, char *name)
4043
4044
  return bv;
4044
4045
  }
4045
4046
 
4047
+ static bool sr_is_latest_i(IndexReader *ir)
4048
+ {
4049
+ return (sis_read_current_version(ir->store) == ir->sis->version);
4050
+ }
4051
+
4046
4052
  static void sr_commit_i(IndexReader *ir)
4047
4053
  {
4048
4054
  SegmentInfo *si = SR(ir)->si;
@@ -4283,6 +4289,7 @@ static IndexReader *sr_setup_i(SegmentReader *sr)
4283
4289
  ir->delete_doc_i = &sr_delete_doc_i;
4284
4290
  ir->undelete_all_i = &sr_undelete_all_i;
4285
4291
  ir->set_deleter_i = &sr_set_deleter_i;
4292
+ ir->is_latest_i = &sr_is_latest_i;
4286
4293
  ir->commit_i = &sr_commit_i;
4287
4294
  ir->close_i = &sr_close_i;
4288
4295
 
@@ -4570,6 +4577,18 @@ static void mr_set_deleter_i(IndexReader *ir, Deleter *deleter)
4570
4577
  }
4571
4578
  }
4572
4579
 
4580
+ static bool mr_is_latest_i(IndexReader *ir)
4581
+ {
4582
+ int i;
4583
+ const int mr_reader_cnt = MR(ir)->r_cnt;
4584
+ for (i = 0; i < mr_reader_cnt; i++) {
4585
+ if (!ir_is_latest(MR(ir)->sub_readers[i])) {
4586
+ return false;
4587
+ }
4588
+ }
4589
+ return true;
4590
+ }
4591
+
4573
4592
  static void mr_commit_i(IndexReader *ir)
4574
4593
  {
4575
4594
  int i;
@@ -4639,6 +4658,7 @@ static IndexReader *mr_new(IndexReader **sub_readers, const int r_cnt)
4639
4658
  ir->delete_doc_i = &mr_delete_doc_i;
4640
4659
  ir->undelete_all_i = &mr_undelete_all_i;
4641
4660
  ir->set_deleter_i = &mr_set_deleter_i;
4661
+ ir->is_latest_i = &mr_is_latest_i;
4642
4662
  ir->commit_i = &mr_commit_i;
4643
4663
  ir->close_i = &mr_close_i;
4644
4664
 
@@ -4799,7 +4819,7 @@ IndexReader *ir_open(Store *store)
4799
4819
  *
4800
4820
  ****************************************************************************/
4801
4821
 
4802
- Offset *offset_new(int start, int end)
4822
+ Offset *offset_new(off_t start, off_t end)
4803
4823
  {
4804
4824
  Offset *offset = ALLOC(Offset);
4805
4825
  offset->start = start;
@@ -5177,7 +5197,7 @@ static void dw_add_posting(MemoryPool *mp,
5177
5197
  }
5178
5198
  }
5179
5199
 
5180
- static __inline void dw_add_offsets(DocWriter *dw, int pos, int start, int end)
5200
+ static INLINE void dw_add_offsets(DocWriter *dw, int pos, int start, int end)
5181
5201
  {
5182
5202
  if (pos >= dw->offsets_capa) {
5183
5203
  int old_capa = dw->offsets_capa;
@@ -5204,6 +5224,7 @@ HashTable *dw_invert_field(DocWriter *dw,
5204
5224
  int doc_num = dw->doc_num;
5205
5225
  int i;
5206
5226
  const int df_size = df->size;
5227
+ off_t start_offset = 0;
5207
5228
 
5208
5229
  if (fld_inv->is_tokenized) {
5209
5230
  Token *tk;
@@ -5217,7 +5238,9 @@ HashTable *dw_invert_field(DocWriter *dw,
5217
5238
  pos += tk->pos_inc;
5218
5239
  dw_add_posting(mp, curr_plists, fld_plists, doc_num,
5219
5240
  tk->text, tk->len, pos);
5220
- dw_add_offsets(dw, pos, tk->start, tk->end);
5241
+ dw_add_offsets(dw, pos,
5242
+ start_offset + tk->start,
5243
+ start_offset + tk->end);
5221
5244
  if (num_terms++ >= dw->max_field_length) {
5222
5245
  break;
5223
5246
  }
@@ -5234,6 +5257,7 @@ HashTable *dw_invert_field(DocWriter *dw,
5234
5257
  }
5235
5258
  }
5236
5259
  ts_deref(ts);
5260
+ start_offset += df->lengths[i] + 1;
5237
5261
  }
5238
5262
  fld_inv->length = num_terms;
5239
5263
  }
@@ -5250,8 +5274,10 @@ HashTable *dw_invert_field(DocWriter *dw,
5250
5274
  dw_add_posting(mp, curr_plists, fld_plists, doc_num, data_ptr,
5251
5275
  len, i);
5252
5276
  if (store_offsets) {
5253
- dw_add_offsets(dw, i, 0, df->lengths[i]);
5277
+ dw_add_offsets(dw, i, start_offset,
5278
+ start_offset + df->lengths[i]);
5254
5279
  }
5280
+ start_offset += df->lengths[i] + 1;
5255
5281
  }
5256
5282
  fld_inv->length = i;
5257
5283
  }