ferret 0.11.4 → 0.11.5

Sign up to get free protection for your applications and to get access to all the features.
Files changed (50) hide show
  1. data/Rakefile +1 -0
  2. data/TUTORIAL +3 -3
  3. data/ext/analysis.c +12 -9
  4. data/ext/array.c +10 -10
  5. data/ext/array.h +8 -1
  6. data/ext/bitvector.c +2 -2
  7. data/ext/except.c +1 -1
  8. data/ext/ferret.c +2 -2
  9. data/ext/ferret.h +1 -1
  10. data/ext/fs_store.c +13 -2
  11. data/ext/global.c +4 -4
  12. data/ext/global.h +6 -0
  13. data/ext/hash.c +1 -1
  14. data/ext/helper.c +1 -1
  15. data/ext/helper.h +1 -1
  16. data/ext/index.c +48 -22
  17. data/ext/index.h +17 -16
  18. data/ext/mempool.c +4 -1
  19. data/ext/mempool.h +1 -1
  20. data/ext/multimapper.c +2 -2
  21. data/ext/q_fuzzy.c +2 -2
  22. data/ext/q_multi_term.c +2 -2
  23. data/ext/q_parser.c +39 -8
  24. data/ext/q_range.c +32 -1
  25. data/ext/r_analysis.c +66 -28
  26. data/ext/r_index.c +18 -19
  27. data/ext/r_qparser.c +21 -6
  28. data/ext/r_search.c +74 -49
  29. data/ext/r_store.c +1 -1
  30. data/ext/r_utils.c +17 -17
  31. data/ext/search.c +10 -5
  32. data/ext/search.h +3 -1
  33. data/ext/sort.c +2 -2
  34. data/ext/stopwords.c +23 -34
  35. data/ext/store.c +9 -9
  36. data/ext/store.h +5 -4
  37. data/lib/ferret/document.rb +2 -2
  38. data/lib/ferret/field_infos.rb +37 -35
  39. data/lib/ferret/index.rb +16 -6
  40. data/lib/ferret/number_tools.rb +2 -2
  41. data/lib/ferret_version.rb +1 -1
  42. data/test/unit/analysis/tc_token_stream.rb +40 -0
  43. data/test/unit/index/tc_index.rb +64 -101
  44. data/test/unit/index/tc_index_reader.rb +13 -0
  45. data/test/unit/largefile/tc_largefile.rb +46 -0
  46. data/test/unit/query_parser/tc_query_parser.rb +17 -1
  47. data/test/unit/search/tc_multiple_search_requests.rb +58 -0
  48. data/test/unit/search/tm_searcher.rb +27 -1
  49. data/test/unit/ts_largefile.rb +4 -0
  50. metadata +147 -144
data/Rakefile CHANGED
@@ -55,6 +55,7 @@ task :valgrind do
55
55
  "--leak-check=yes --show-reachable=yes -v ruby test/test_all.rb"
56
56
  #sh "valgrind --suppressions=ferret_valgrind.supp " +
57
57
  # "--leak-check=yes --show-reachable=yes -v ruby test/unit/index/tc_index_reader.rb"
58
+ #valgrind --gen-suppressions=yes --suppressions=ferret_valgrind.supp --leak-check=yes --show-reachable=yes -v ruby test/test_all.rb
58
59
  end
59
60
 
60
61
  task :default => :test_all
data/TUTORIAL CHANGED
@@ -116,7 +116,7 @@ when printing to the console:
116
116
  puts highlights
117
117
  end
118
118
 
119
- And if you want to highlight a whole document, set :excert_length to :all:
119
+ And if you want to highlight a whole document, set :excerpt_length to :all:
120
120
 
121
121
  puts index.highlight(query, doc_id,
122
122
  :field => :content,
@@ -175,7 +175,7 @@ you change the data once it is in the index. But you can delete documents so
175
175
  the standard way to modify data is to delete it and re-add it again with the
176
176
  modifications made. It is important to note that when doing this the documents
177
177
  will get a new document number so you should be careful not to use a document
178
- number after the document has been deleted. Here is an examle of modifying a
178
+ number after the document has been deleted. Here is an example of modifying a
179
179
  document;
180
180
 
181
181
  index << {:title => "Programing Rbuy", :content => "blah blah blah"}
@@ -185,7 +185,7 @@ document;
185
185
  doc = index[doc_id]
186
186
  index.delete(doc_id)
187
187
 
188
- # modify doc. It is just a Hash afterall
188
+ # modify doc. It is just a Hash after all
189
189
  doc[:title] = "Programming Ruby"
190
190
 
191
191
  index << doc
data/ext/analysis.c CHANGED
@@ -1,6 +1,6 @@
1
1
  #include "analysis.h"
2
2
  #include "hash.h"
3
- #include <libstemmer.h>
3
+ #include "libstemmer.h"
4
4
  #include <string.h>
5
5
  #include <ctype.h>
6
6
  #include <wctype.h>
@@ -12,7 +12,7 @@
12
12
  *
13
13
  ****************************************************************************/
14
14
 
15
- __inline Token *tk_set(Token *tk,
15
+ INLINE Token *tk_set(Token *tk,
16
16
  char *text, int tlen, int start, int end, int pos_inc)
17
17
  {
18
18
  if (tlen >= MAX_WORD_SIZE) {
@@ -27,20 +27,20 @@ __inline Token *tk_set(Token *tk,
27
27
  return tk;
28
28
  }
29
29
 
30
- __inline Token *tk_set_ts(Token *tk,
30
+ INLINE Token *tk_set_ts(Token *tk,
31
31
  char *start, char *end, char *text, int pos_inc)
32
32
  {
33
33
  return tk_set(tk, start, (int)(end - start),
34
34
  (int)(start - text), (int)(end - text), pos_inc);
35
35
  }
36
36
 
37
- __inline Token *tk_set_no_len(Token *tk,
37
+ INLINE Token *tk_set_no_len(Token *tk,
38
38
  char *text, int start, int end, int pos_inc)
39
39
  {
40
40
  return tk_set(tk, text, (int)strlen(text), start, end, pos_inc);
41
41
  }
42
42
 
43
- __inline Token *w_tk_set(Token *tk, wchar_t *text, int start, int end,
43
+ INLINE Token *w_tk_set(Token *tk, wchar_t *text, int start, int end,
44
44
  int pos_inc)
45
45
  {
46
46
  int len = wcstombs(tk->text, text, MAX_WORD_SIZE - 1);
@@ -152,7 +152,7 @@ static TokenStream *cts_new()
152
152
 
153
153
  #define MBTS(token_stream) ((MultiByteTokenStream *)(token_stream))
154
154
 
155
- __inline int mb_next_char(wchar_t *wchr, const char *s, mbstate_t *state)
155
+ INLINE int mb_next_char(wchar_t *wchr, const char *s, mbstate_t *state)
156
156
  {
157
157
  int num_bytes;
158
158
  if ((num_bytes = (int)mbrtowc(wchr, s, MB_CUR_MAX, state)) < 0) {
@@ -830,7 +830,7 @@ static bool mb_std_advance_to_start(TokenStream *ts)
830
830
 
831
831
  i = mb_next_char(&wchr, ts->t, &state);
832
832
 
833
- while (wchr != 0 && !iswalpha(wchr) && !isdigit(*(ts->t))) {
833
+ while (wchr != 0 && !iswalnum(wchr)) {
834
834
  if (isnumpunc(*ts->t) && isdigit(ts->t[1])) break;
835
835
  ts->t += i;
836
836
  i = mb_next_char(&wchr, ts->t, &state);
@@ -950,11 +950,14 @@ static Token *std_next(TokenStream *ts)
950
950
  }
951
951
  t++;
952
952
  }
953
- while (isurlxatpunc(t[-1])) {
953
+ while (isurlxatpunc(t[-1]) && t > ts->t) {
954
954
  t--; /* strip trailing punctuation */
955
955
  }
956
956
 
957
- if (num_end == NULL || t > num_end) {
957
+ if (t < ts->t || (num_end != NULL && num_end < ts->t)) {
958
+ fprintf(stderr, "Warning: encoding error. Please check that you are using the correct locale for your input");
959
+ return NULL;
960
+ } else if (num_end == NULL || t > num_end) {
958
961
  ts->t = t;
959
962
 
960
963
  if (is_acronym) { /* check it is one letter followed by one '.' */
data/ext/array.c CHANGED
@@ -1,35 +1,35 @@
1
1
  #include "array.h"
2
- #include "global.h"
3
2
  #include <string.h>
4
3
 
5
- #define DATA_SZ sizeof(int) * 3
4
+ #define META_CNT ARY_META_CNT
5
+ #define DATA_SZ sizeof(int) * META_CNT
6
6
 
7
7
  void **ary_new_i(int type_size, int init_capa)
8
8
  {
9
- int *ary;
9
+ void **ary;
10
10
  if (init_capa <= 0) {
11
11
  init_capa = ARY_INIT_CAPA;
12
12
  }
13
- ary = ((int *)ecalloc(DATA_SZ + init_capa * type_size));
14
- ary[0] = type_size;
15
- ary[1] = init_capa;
16
- return (void **)&ary[3];
13
+ ary = (void **)&(((int *)ecalloc(DATA_SZ + init_capa * type_size))[META_CNT]);
14
+ ary_type_size(ary) = type_size;
15
+ ary_capa(ary) = init_capa;
16
+ return ary;
17
17
  }
18
18
 
19
- __inline void ary_resize_i(void ***ary, int size)
19
+ INLINE void ary_resize_i(void ***ary, int size)
20
20
  {
21
21
  size++;
22
22
  if (size >= ary_sz(*ary)) {
23
23
  int capa = ary_capa(*ary);
24
24
  if (size >= capa) {
25
- int *ary_start = &((int *)*ary)[-3];
25
+ int *ary_start = &((int *)*ary)[-META_CNT];
26
26
  while (size >= capa) {
27
27
  capa <<= 1;
28
28
  }
29
29
 
30
30
  ary_start = (int *)erealloc(ary_start,
31
31
  DATA_SZ + capa * ary_type_size(*ary));
32
- *ary = (void **)&(ary_start[3]);
32
+ *ary = (void **)&(ary_start[META_CNT]);
33
33
  memset(((char *)*ary) + ary_type_size(*ary) * ary_sz(*ary), 0,
34
34
  (capa - ary_sz(*ary)) * ary_type_size(*ary));
35
35
  ary_capa(*ary) = capa;
data/ext/array.h CHANGED
@@ -1,12 +1,19 @@
1
1
  #ifndef FRT_ARRAY_H
2
2
  #define FRT_ARRAY_H
3
+ #include "global.h"
4
+
5
+ #if defined POSH_OS_SOLARIS || defined POSH_OS_SUNOS
6
+ # define ARY_META_CNT 4
7
+ #else
8
+ # define ARY_META_CNT 3
9
+ #endif
3
10
 
4
11
  #define ARY_INIT_CAPA 8
5
12
  #define ary_size(ary) ary_sz(ary)
6
13
  #define ary_sz(ary) (((int *)ary)[-1])
7
14
  #define ary_capa(ary) (((int *)ary)[-2])
8
15
  #define ary_type_size(ary) (((int *)ary)[-3])
9
- #define ary_start(ary) ((void **)&(((int *)ary)[-3]))
16
+ #define ary_start(ary) ((void **)&(((int *)ary)[-ARY_META_CNT]))
10
17
  #define ary_free(ary) free(ary_start(ary))
11
18
 
12
19
  #define ary_new_type_capa(type, init_capa)\
data/ext/bitvector.c CHANGED
@@ -193,7 +193,7 @@ const int NUM_TRAILING_ZEROS[] = {
193
193
  /*
194
194
  * This method is highly optimized, hence the loop unrolling
195
195
  */
196
- static __inline int bv_get_1_offset(f_u32 word)
196
+ static INLINE int bv_get_1_offset(f_u32 word)
197
197
  {
198
198
  if (word & 0xff) {
199
199
  return NUM_TRAILING_ZEROS[word & 0xff];
@@ -360,7 +360,7 @@ unsigned long bv_hash(BitVector *bv)
360
360
  return hash;
361
361
  }
362
362
 
363
- static __inline void bv_recapa(BitVector *bv, int new_capa)
363
+ static INLINE void bv_recapa(BitVector *bv, int new_capa)
364
364
  {
365
365
  if (bv->capa < new_capa) {
366
366
  REALLOC_N(bv->bits, f_u32, new_capa);
data/ext/except.c CHANGED
@@ -62,7 +62,7 @@ void xpush_context(xcontext_t *context)
62
62
  context->in_finally = false;
63
63
  }
64
64
 
65
- static __inline void xraise_context(xcontext_t *context,
65
+ static INLINE void xraise_context(xcontext_t *context,
66
66
  volatile int excode,
67
67
  const char *const msg)
68
68
  {
data/ext/ferret.c CHANGED
@@ -192,11 +192,11 @@ frt_field(VALUE rfield)
192
192
  /*
193
193
  * Json Exportation - Loading each LazyDoc and formatting them into json
194
194
  * This code is designed to get a VERY FAST json string, the goal was speed,
195
- * not sexyness.
195
+ * not sexiness.
196
196
  * Jeremie 'ahFeel' BORDIER
197
197
  * ahFeel@rift.Fr
198
198
  */
199
- __inline char *
199
+ char *
200
200
  json_concat_string(char *s, char *field)
201
201
  {
202
202
  *(s++) = '"';
data/ext/ferret.h CHANGED
@@ -65,7 +65,7 @@ extern VALUE frt_hs_to_rb_ary(HashSet *hs);
65
65
  extern void *frt_rb_data_ptr(VALUE val);
66
66
  extern char * frt_field(VALUE rfield);
67
67
  extern VALUE frt_get_term(const char *field, const char *term);
68
- extern __inline char *json_concat_string(char *s, char *field);
68
+ extern char *json_concat_string(char *s, char *field);
69
69
  extern char *rs2s(VALUE rstr);
70
70
  extern char *nstrdup(VALUE rstr);
71
71
  #define Frt_Make_Struct(klass)\
data/ext/fs_store.c CHANGED
@@ -51,7 +51,7 @@ static void fs_touch(Store *store, char *filename)
51
51
  int f;
52
52
  char path[MAX_FILE_PATH];
53
53
  join_path(path, store->dir.path, filename);
54
- if ((f = creat(path, S_IRUSR | S_IWUSR)) == 0) {
54
+ if ((f = creat(path, store->file_mode)) == 0) {
55
55
  RAISE(IO_ERROR, "couldn't create file %s: <%s>", path,
56
56
  strerror(errno));
57
57
  }
@@ -257,7 +257,7 @@ static OutStream *fs_new_output(Store *store, const char *filename)
257
257
  {
258
258
  char path[MAX_FILE_PATH];
259
259
  int fd = open(join_path(path, store->dir.path, filename),
260
- O_WRONLY | O_CREAT | O_BINARY, S_IRUSR | S_IWUSR);
260
+ O_WRONLY | O_CREAT | O_BINARY, store->file_mode);
261
261
  OutStream *os;
262
262
  if (fd < 0) {
263
263
  RAISE(IO_ERROR, "couldn't create OutStream %s: <%s>",
@@ -430,8 +430,19 @@ static void fs_close_i(Store *store)
430
430
 
431
431
  static Store *fs_store_new(const char *pathname)
432
432
  {
433
+ struct stat stt;
433
434
  Store *new_store = store_new();
434
435
 
436
+ new_store->file_mode = S_IRUSR | S_IWUSR;
437
+ #ifndef POSH_OS_WIN32
438
+ if (!stat(pathname, &stt) && stt.st_gid == getgid()) {
439
+ if (stt.st_mode & S_IWGRP) {
440
+ umask(S_IWOTH);
441
+ }
442
+ new_store->file_mode |= stt.st_mode & (S_IRGRP | S_IWGRP);
443
+ }
444
+ #endif
445
+
435
446
  new_store->dir.path = estrdup(pathname);
436
447
  new_store->touch = &fs_touch;
437
448
  new_store->exists = &fs_exists;
data/ext/global.c CHANGED
@@ -11,22 +11,22 @@ const char *EMPTY_STRING = "";
11
11
 
12
12
  bool x_do_logging = false;
13
13
 
14
- __inline int min3(int a, int b, int c)
14
+ INLINE int min3(int a, int b, int c)
15
15
  {
16
16
  return MIN3(a, b, c);
17
17
  }
18
18
 
19
- __inline int min2(int a, int b)
19
+ INLINE int min2(int a, int b)
20
20
  {
21
21
  return MIN(a, b);
22
22
  }
23
23
 
24
- __inline int max3(int a, int b, int c)
24
+ INLINE int max3(int a, int b, int c)
25
25
  {
26
26
  return MAX3(a, b, c);
27
27
  }
28
28
 
29
- __inline int max2(int a, int b)
29
+ INLINE int max2(int a, int b)
30
30
  {
31
31
  return MAX(a, b);
32
32
  }
data/ext/global.h CHANGED
@@ -11,6 +11,12 @@
11
11
  #define MAX_WORD_SIZE 255
12
12
  #define MAX_FILE_PATH 1024
13
13
 
14
+ #if defined(__GNUC__)
15
+ # define INLINE __inline__
16
+ #else
17
+ # define INLINE
18
+ #endif
19
+
14
20
  typedef void (*free_ft)(void *key);
15
21
 
16
22
  #define NELEMS(array) ((int)(sizeof(array)/sizeof(array[0])))
data/ext/hash.c CHANGED
@@ -61,7 +61,7 @@ typedef HashEntry *(*lookup_ft)(struct HashTable *ht, register const void *key);
61
61
  * @param ht the HashTable to do the fast lookup in
62
62
  * @param the hashkey we are looking for
63
63
  */
64
- static __inline HashEntry *h_resize_lookup(HashTable *ht,
64
+ static INLINE HashEntry *h_resize_lookup(HashTable *ht,
65
65
  register const unsigned long hash)
66
66
  {
67
67
  register unsigned long perturb;
data/ext/helper.c CHANGED
@@ -1,6 +1,6 @@
1
1
  #include "helper.h"
2
2
 
3
- __inline int hlp_string_diff(register const char *const s1,
3
+ int hlp_string_diff(register const char *const s1,
4
4
  register const char *const s2)
5
5
  {
6
6
  register int i = 0;
data/ext/helper.h CHANGED
@@ -3,7 +3,7 @@
3
3
 
4
4
  #include "config.h"
5
5
 
6
- extern __inline int hlp_string_diff(register const char *const s1,
6
+ extern int hlp_string_diff(register const char *const s1,
7
7
  register const char *const s2);
8
8
  extern f_i32 float2int(float f);
9
9
  extern float int2float(f_i32 i32);
data/ext/index.c CHANGED
@@ -206,7 +206,7 @@ HashTable *co_hash_create()
206
206
  *
207
207
  ****************************************************************************/
208
208
 
209
- __inline void fi_set_store(FieldInfo *fi, int store)
209
+ INLINE void fi_set_store(FieldInfo *fi, int store)
210
210
  {
211
211
  switch (store) {
212
212
  case STORE_NO:
@@ -220,7 +220,7 @@ __inline void fi_set_store(FieldInfo *fi, int store)
220
220
  }
221
221
  }
222
222
 
223
- __inline void fi_set_index(FieldInfo *fi, int index)
223
+ INLINE void fi_set_index(FieldInfo *fi, int index)
224
224
  {
225
225
  switch (index) {
226
226
  case INDEX_NO:
@@ -241,7 +241,7 @@ __inline void fi_set_index(FieldInfo *fi, int index)
241
241
  }
242
242
  }
243
243
 
244
- __inline void fi_set_term_vector(FieldInfo *fi, int term_vector)
244
+ INLINE void fi_set_term_vector(FieldInfo *fi, int term_vector)
245
245
  {
246
246
  switch (term_vector) {
247
247
  case TERM_VECTOR_NO:
@@ -466,7 +466,7 @@ static const char *index_str[] = {
466
466
  "",
467
467
  ":untokenized_omit_norms",
468
468
  "",
469
- ":yes_omit_norms"
469
+ ":omit_norms"
470
470
  };
471
471
 
472
472
  static const char *fi_index_str(FieldInfo *fi)
@@ -1375,7 +1375,8 @@ LazyDoc *fr_get_lazy_doc(FieldsReader *fr, int doc_num)
1375
1375
  lazy_doc = lazy_doc_new(stored_cnt, fdt_in);
1376
1376
 
1377
1377
  for (i = 0; i < stored_cnt; i++) {
1378
- int start = 0, end, data_cnt;
1378
+ off_t start = 0, end;
1379
+ int data_cnt;
1379
1380
  field_num = is_read_vint(fdt_in);
1380
1381
  fi = fr->fis->fields[field_num];
1381
1382
  data_cnt = is_read_vint(fdt_in);
@@ -1449,7 +1450,7 @@ TermVector *fr_read_term_vector(FieldsReader *fr, int field_num)
1449
1450
  if (store_offsets) {
1450
1451
  int num_positions = tv->offset_cnt = is_read_vint(fdt_in);
1451
1452
  Offset *offsets = tv->offsets = ALLOC_N(Offset, num_positions);
1452
- int offset = 0;
1453
+ off_t offset = 0;
1453
1454
  for (i = 0; i < num_positions; i++) {
1454
1455
  offsets[i].start = offset += is_read_vint(fdt_in);
1455
1456
  offsets[i].end = offset += is_read_vint(fdt_in);
@@ -1567,7 +1568,7 @@ void fw_close(FieldsWriter *fw)
1567
1568
  free(fw);
1568
1569
  }
1569
1570
 
1570
- static __inline void save_data(OutStream *fdt_out, char *data, int dlen)
1571
+ static INLINE void save_data(OutStream *fdt_out, char *data, int dlen)
1571
1572
  {
1572
1573
  os_write_vint(fdt_out, dlen);
1573
1574
  os_write_bytes(fdt_out, (uchar *)data, dlen);
@@ -1683,8 +1684,8 @@ void fw_add_postings(FieldsWriter *fw,
1683
1684
  int last_end = 0;
1684
1685
  os_write_vint(fdt_out, offset_count); /* write shared prefix length */
1685
1686
  for (i = 0; i < offset_count; i++) {
1686
- int start = offsets[i].start;
1687
- int end = offsets[i].end;
1687
+ off_t start = offsets[i].start;
1688
+ off_t end = offsets[i].end;
1688
1689
  os_write_vint(fdt_out, start - last_end);
1689
1690
  os_write_vint(fdt_out, end - start);
1690
1691
  last_end = end;
@@ -1863,7 +1864,7 @@ void sfi_close(SegmentFieldIndex *sfi)
1863
1864
  * SegmentTermEnum
1864
1865
  ****************************************************************************/
1865
1866
 
1866
- static __inline int term_read(char *buf, InStream *is)
1867
+ static INLINE int term_read(char *buf, InStream *is)
1867
1868
  {
1868
1869
  int start = (int)is_read_vint(is);
1869
1870
  int length = (int)is_read_vint(is);
@@ -2297,7 +2298,7 @@ TermInfosReader *tir_open(Store *store,
2297
2298
  return tir;
2298
2299
  }
2299
2300
 
2300
- static __inline TermEnum *tir_enum(TermInfosReader *tir)
2301
+ static INLINE TermEnum *tir_enum(TermInfosReader *tir)
2301
2302
  {
2302
2303
  TermEnum *te;
2303
2304
  if (NULL == (te = thread_getspecific(tir->thread_te))) {
@@ -2423,7 +2424,7 @@ TermInfosWriter *tiw_open(Store *store,
2423
2424
  return tiw;
2424
2425
  }
2425
2426
 
2426
- static __inline void tw_write_term(TermWriter *tw,
2427
+ static INLINE void tw_write_term(TermWriter *tw,
2427
2428
  OutStream *os,
2428
2429
  const char *term,
2429
2430
  int term_len)
@@ -2499,7 +2500,7 @@ void tiw_add(TermInfosWriter *tiw,
2499
2500
  tw_add(tiw->tis_writer, term, term_len, ti, tiw->skip_interval);
2500
2501
  }
2501
2502
 
2502
- static __inline void tw_reset(TermWriter *tw)
2503
+ static INLINE void tw_reset(TermWriter *tw)
2503
2504
  {
2504
2505
  tw->counter = 0;
2505
2506
  tw->last_term = EMPTY_STRING;
@@ -3838,7 +3839,7 @@ void ir_add_cache(IndexReader *ir)
3838
3839
 
3839
3840
  bool ir_is_latest(IndexReader *ir)
3840
3841
  {
3841
- return (sis_read_current_version(ir->store) == ir->sis->version);
3842
+ return ir->is_latest_i(ir);
3842
3843
  }
3843
3844
 
3844
3845
  /****************************************************************************
@@ -3919,7 +3920,7 @@ typedef struct SegmentReader {
3919
3920
  #define SR(ir) ((SegmentReader *)(ir))
3920
3921
  #define SR_SIZE(ir) (SR(ir)->fr->size)
3921
3922
 
3922
- static __inline FieldsReader *sr_fr(SegmentReader *sr)
3923
+ static INLINE FieldsReader *sr_fr(SegmentReader *sr)
3923
3924
  {
3924
3925
  FieldsReader *fr;
3925
3926
 
@@ -3931,12 +3932,12 @@ static __inline FieldsReader *sr_fr(SegmentReader *sr)
3931
3932
  return fr;
3932
3933
  }
3933
3934
 
3934
- static __inline bool sr_is_deleted_i(SegmentReader *sr, int doc_num)
3935
+ static INLINE bool sr_is_deleted_i(SegmentReader *sr, int doc_num)
3935
3936
  {
3936
3937
  return (NULL != sr->deleted_docs && bv_get(sr->deleted_docs, doc_num));
3937
3938
  }
3938
3939
 
3939
- static __inline void sr_get_norms_into_i(SegmentReader *sr, int field_num,
3940
+ static INLINE void sr_get_norms_into_i(SegmentReader *sr, int field_num,
3940
3941
  uchar *buf)
3941
3942
  {
3942
3943
  Norm *norm = h_get_int(sr->norms, field_num);
@@ -3955,7 +3956,7 @@ static __inline void sr_get_norms_into_i(SegmentReader *sr, int field_num,
3955
3956
  }
3956
3957
  }
3957
3958
 
3958
- static __inline uchar *sr_get_norms_i(SegmentReader *sr, int field_num)
3959
+ static INLINE uchar *sr_get_norms_i(SegmentReader *sr, int field_num)
3959
3960
  {
3960
3961
  Norm *norm = h_get_int(sr->norms, field_num);
3961
3962
  if (NULL == norm) { /* not an indexed field */
@@ -4043,6 +4044,11 @@ static BitVector *bv_read(Store *store, char *name)
4043
4044
  return bv;
4044
4045
  }
4045
4046
 
4047
+ static bool sr_is_latest_i(IndexReader *ir)
4048
+ {
4049
+ return (sis_read_current_version(ir->store) == ir->sis->version);
4050
+ }
4051
+
4046
4052
  static void sr_commit_i(IndexReader *ir)
4047
4053
  {
4048
4054
  SegmentInfo *si = SR(ir)->si;
@@ -4283,6 +4289,7 @@ static IndexReader *sr_setup_i(SegmentReader *sr)
4283
4289
  ir->delete_doc_i = &sr_delete_doc_i;
4284
4290
  ir->undelete_all_i = &sr_undelete_all_i;
4285
4291
  ir->set_deleter_i = &sr_set_deleter_i;
4292
+ ir->is_latest_i = &sr_is_latest_i;
4286
4293
  ir->commit_i = &sr_commit_i;
4287
4294
  ir->close_i = &sr_close_i;
4288
4295
 
@@ -4570,6 +4577,18 @@ static void mr_set_deleter_i(IndexReader *ir, Deleter *deleter)
4570
4577
  }
4571
4578
  }
4572
4579
 
4580
+ static bool mr_is_latest_i(IndexReader *ir)
4581
+ {
4582
+ int i;
4583
+ const int mr_reader_cnt = MR(ir)->r_cnt;
4584
+ for (i = 0; i < mr_reader_cnt; i++) {
4585
+ if (!ir_is_latest(MR(ir)->sub_readers[i])) {
4586
+ return false;
4587
+ }
4588
+ }
4589
+ return true;
4590
+ }
4591
+
4573
4592
  static void mr_commit_i(IndexReader *ir)
4574
4593
  {
4575
4594
  int i;
@@ -4639,6 +4658,7 @@ static IndexReader *mr_new(IndexReader **sub_readers, const int r_cnt)
4639
4658
  ir->delete_doc_i = &mr_delete_doc_i;
4640
4659
  ir->undelete_all_i = &mr_undelete_all_i;
4641
4660
  ir->set_deleter_i = &mr_set_deleter_i;
4661
+ ir->is_latest_i = &mr_is_latest_i;
4642
4662
  ir->commit_i = &mr_commit_i;
4643
4663
  ir->close_i = &mr_close_i;
4644
4664
 
@@ -4799,7 +4819,7 @@ IndexReader *ir_open(Store *store)
4799
4819
  *
4800
4820
  ****************************************************************************/
4801
4821
 
4802
- Offset *offset_new(int start, int end)
4822
+ Offset *offset_new(off_t start, off_t end)
4803
4823
  {
4804
4824
  Offset *offset = ALLOC(Offset);
4805
4825
  offset->start = start;
@@ -5177,7 +5197,7 @@ static void dw_add_posting(MemoryPool *mp,
5177
5197
  }
5178
5198
  }
5179
5199
 
5180
- static __inline void dw_add_offsets(DocWriter *dw, int pos, int start, int end)
5200
+ static INLINE void dw_add_offsets(DocWriter *dw, int pos, int start, int end)
5181
5201
  {
5182
5202
  if (pos >= dw->offsets_capa) {
5183
5203
  int old_capa = dw->offsets_capa;
@@ -5204,6 +5224,7 @@ HashTable *dw_invert_field(DocWriter *dw,
5204
5224
  int doc_num = dw->doc_num;
5205
5225
  int i;
5206
5226
  const int df_size = df->size;
5227
+ off_t start_offset = 0;
5207
5228
 
5208
5229
  if (fld_inv->is_tokenized) {
5209
5230
  Token *tk;
@@ -5217,7 +5238,9 @@ HashTable *dw_invert_field(DocWriter *dw,
5217
5238
  pos += tk->pos_inc;
5218
5239
  dw_add_posting(mp, curr_plists, fld_plists, doc_num,
5219
5240
  tk->text, tk->len, pos);
5220
- dw_add_offsets(dw, pos, tk->start, tk->end);
5241
+ dw_add_offsets(dw, pos,
5242
+ start_offset + tk->start,
5243
+ start_offset + tk->end);
5221
5244
  if (num_terms++ >= dw->max_field_length) {
5222
5245
  break;
5223
5246
  }
@@ -5234,6 +5257,7 @@ HashTable *dw_invert_field(DocWriter *dw,
5234
5257
  }
5235
5258
  }
5236
5259
  ts_deref(ts);
5260
+ start_offset += df->lengths[i] + 1;
5237
5261
  }
5238
5262
  fld_inv->length = num_terms;
5239
5263
  }
@@ -5250,8 +5274,10 @@ HashTable *dw_invert_field(DocWriter *dw,
5250
5274
  dw_add_posting(mp, curr_plists, fld_plists, doc_num, data_ptr,
5251
5275
  len, i);
5252
5276
  if (store_offsets) {
5253
- dw_add_offsets(dw, i, 0, df->lengths[i]);
5277
+ dw_add_offsets(dw, i, start_offset,
5278
+ start_offset + df->lengths[i]);
5254
5279
  }
5280
+ start_offset += df->lengths[i] + 1;
5255
5281
  }
5256
5282
  fld_inv->length = i;
5257
5283
  }