ferret 0.11.6 → 0.11.8.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (185) hide show
  1. data/README +10 -22
  2. data/RELEASE_CHANGES +137 -0
  3. data/RELEASE_NOTES +60 -0
  4. data/Rakefile +379 -274
  5. data/TODO +100 -8
  6. data/bin/ferret-browser +0 -0
  7. data/ext/BZLIB_blocksort.c +1094 -0
  8. data/ext/BZLIB_bzlib.c +1578 -0
  9. data/ext/BZLIB_compress.c +672 -0
  10. data/ext/BZLIB_crctable.c +104 -0
  11. data/ext/BZLIB_decompress.c +626 -0
  12. data/ext/BZLIB_huffman.c +205 -0
  13. data/ext/BZLIB_randtable.c +84 -0
  14. data/ext/{api.c → STEMMER_api.c} +7 -10
  15. data/ext/{libstemmer.c → STEMMER_libstemmer.c} +3 -2
  16. data/ext/{stem_ISO_8859_1_danish.c → STEMMER_stem_ISO_8859_1_danish.c} +123 -124
  17. data/ext/{stem_ISO_8859_1_dutch.c → STEMMER_stem_ISO_8859_1_dutch.c} +177 -188
  18. data/ext/STEMMER_stem_ISO_8859_1_english.c +1117 -0
  19. data/ext/{stem_ISO_8859_1_finnish.c → STEMMER_stem_ISO_8859_1_finnish.c} +276 -306
  20. data/ext/STEMMER_stem_ISO_8859_1_french.c +1246 -0
  21. data/ext/{stem_ISO_8859_1_german.c → STEMMER_stem_ISO_8859_1_german.c} +161 -170
  22. data/ext/STEMMER_stem_ISO_8859_1_hungarian.c +1230 -0
  23. data/ext/STEMMER_stem_ISO_8859_1_italian.c +1065 -0
  24. data/ext/STEMMER_stem_ISO_8859_1_norwegian.c +297 -0
  25. data/ext/{stem_ISO_8859_1_porter.c → STEMMER_stem_ISO_8859_1_porter.c} +263 -290
  26. data/ext/{stem_ISO_8859_1_portuguese.c → STEMMER_stem_ISO_8859_1_portuguese.c} +362 -380
  27. data/ext/STEMMER_stem_ISO_8859_1_spanish.c +1093 -0
  28. data/ext/STEMMER_stem_ISO_8859_1_swedish.c +307 -0
  29. data/ext/STEMMER_stem_ISO_8859_2_romanian.c +998 -0
  30. data/ext/{stem_KOI8_R_russian.c → STEMMER_stem_KOI8_R_russian.c} +244 -245
  31. data/ext/STEMMER_stem_UTF_8_danish.c +339 -0
  32. data/ext/{stem_UTF_8_dutch.c → STEMMER_stem_UTF_8_dutch.c} +192 -211
  33. data/ext/STEMMER_stem_UTF_8_english.c +1125 -0
  34. data/ext/{stem_UTF_8_finnish.c → STEMMER_stem_UTF_8_finnish.c} +284 -324
  35. data/ext/STEMMER_stem_UTF_8_french.c +1256 -0
  36. data/ext/{stem_UTF_8_german.c → STEMMER_stem_UTF_8_german.c} +170 -187
  37. data/ext/STEMMER_stem_UTF_8_hungarian.c +1234 -0
  38. data/ext/STEMMER_stem_UTF_8_italian.c +1073 -0
  39. data/ext/STEMMER_stem_UTF_8_norwegian.c +299 -0
  40. data/ext/{stem_UTF_8_porter.c → STEMMER_stem_UTF_8_porter.c} +271 -310
  41. data/ext/STEMMER_stem_UTF_8_portuguese.c +1023 -0
  42. data/ext/STEMMER_stem_UTF_8_romanian.c +1004 -0
  43. data/ext/STEMMER_stem_UTF_8_russian.c +694 -0
  44. data/ext/STEMMER_stem_UTF_8_spanish.c +1097 -0
  45. data/ext/STEMMER_stem_UTF_8_swedish.c +309 -0
  46. data/ext/STEMMER_stem_UTF_8_turkish.c +2205 -0
  47. data/ext/{utilities.c → STEMMER_utilities.c} +100 -68
  48. data/ext/analysis.c +276 -121
  49. data/ext/analysis.h +190 -143
  50. data/ext/api.h +3 -4
  51. data/ext/array.c +5 -3
  52. data/ext/array.h +52 -43
  53. data/ext/bitvector.c +38 -482
  54. data/ext/bitvector.h +446 -124
  55. data/ext/bzlib.h +282 -0
  56. data/ext/bzlib_private.h +503 -0
  57. data/ext/compound_io.c +23 -22
  58. data/ext/config.h +21 -11
  59. data/ext/document.c +43 -40
  60. data/ext/document.h +31 -21
  61. data/ext/except.c +20 -38
  62. data/ext/except.h +89 -76
  63. data/ext/extconf.rb +3 -2
  64. data/ext/ferret.c +49 -35
  65. data/ext/ferret.h +14 -11
  66. data/ext/field_index.c +262 -0
  67. data/ext/field_index.h +52 -0
  68. data/ext/filter.c +11 -10
  69. data/ext/fs_store.c +65 -47
  70. data/ext/global.c +245 -165
  71. data/ext/global.h +252 -54
  72. data/ext/hash.c +200 -243
  73. data/ext/hash.h +205 -163
  74. data/ext/hashset.c +118 -96
  75. data/ext/hashset.h +110 -82
  76. data/ext/header.h +19 -19
  77. data/ext/helper.c +11 -10
  78. data/ext/helper.h +14 -6
  79. data/ext/index.c +745 -366
  80. data/ext/index.h +503 -529
  81. data/ext/internal.h +1020 -0
  82. data/ext/lang.c +10 -0
  83. data/ext/lang.h +35 -15
  84. data/ext/mempool.c +5 -4
  85. data/ext/mempool.h +30 -22
  86. data/ext/modules.h +35 -7
  87. data/ext/multimapper.c +43 -2
  88. data/ext/multimapper.h +32 -23
  89. data/ext/posh.c +0 -0
  90. data/ext/posh.h +4 -38
  91. data/ext/priorityqueue.c +10 -12
  92. data/ext/priorityqueue.h +33 -21
  93. data/ext/q_boolean.c +22 -9
  94. data/ext/q_const_score.c +3 -2
  95. data/ext/q_filtered_query.c +15 -12
  96. data/ext/q_fuzzy.c +147 -135
  97. data/ext/q_match_all.c +3 -2
  98. data/ext/q_multi_term.c +28 -32
  99. data/ext/q_parser.c +451 -173
  100. data/ext/q_phrase.c +158 -79
  101. data/ext/q_prefix.c +16 -18
  102. data/ext/q_range.c +363 -31
  103. data/ext/q_span.c +130 -141
  104. data/ext/q_term.c +21 -21
  105. data/ext/q_wildcard.c +19 -23
  106. data/ext/r_analysis.c +369 -242
  107. data/ext/r_index.c +421 -434
  108. data/ext/r_qparser.c +142 -92
  109. data/ext/r_search.c +790 -407
  110. data/ext/r_store.c +44 -44
  111. data/ext/r_utils.c +264 -96
  112. data/ext/ram_store.c +29 -23
  113. data/ext/scanner.c +895 -0
  114. data/ext/scanner.h +36 -0
  115. data/ext/scanner_mb.c +6701 -0
  116. data/ext/scanner_utf8.c +4415 -0
  117. data/ext/search.c +210 -87
  118. data/ext/search.h +556 -488
  119. data/ext/similarity.c +17 -16
  120. data/ext/similarity.h +51 -44
  121. data/ext/sort.c +157 -354
  122. data/ext/stem_ISO_8859_1_hungarian.h +16 -0
  123. data/ext/stem_ISO_8859_2_romanian.h +16 -0
  124. data/ext/stem_UTF_8_hungarian.h +16 -0
  125. data/ext/stem_UTF_8_romanian.h +16 -0
  126. data/ext/stem_UTF_8_turkish.h +16 -0
  127. data/ext/stopwords.c +287 -278
  128. data/ext/store.c +57 -51
  129. data/ext/store.h +308 -286
  130. data/ext/symbol.c +10 -0
  131. data/ext/symbol.h +23 -0
  132. data/ext/term_vectors.c +14 -293
  133. data/ext/threading.h +22 -22
  134. data/ext/win32.h +12 -4
  135. data/lib/ferret.rb +2 -1
  136. data/lib/ferret/browser.rb +1 -1
  137. data/lib/ferret/field_symbol.rb +94 -0
  138. data/lib/ferret/index.rb +221 -34
  139. data/lib/ferret/number_tools.rb +6 -6
  140. data/lib/ferret/version.rb +3 -0
  141. data/test/{unit → long_running}/largefile/tc_largefile.rb +1 -1
  142. data/test/test_helper.rb +7 -2
  143. data/test/test_installed.rb +1 -0
  144. data/test/threading/thread_safety_index_test.rb +10 -1
  145. data/test/threading/thread_safety_read_write_test.rb +4 -7
  146. data/test/threading/thread_safety_test.rb +0 -0
  147. data/test/unit/analysis/tc_analyzer.rb +29 -27
  148. data/test/unit/analysis/tc_token_stream.rb +23 -16
  149. data/test/unit/index/tc_index.rb +116 -11
  150. data/test/unit/index/tc_index_reader.rb +27 -27
  151. data/test/unit/index/tc_index_writer.rb +10 -0
  152. data/test/unit/index/th_doc.rb +38 -21
  153. data/test/unit/search/tc_filter.rb +31 -10
  154. data/test/unit/search/tc_index_searcher.rb +6 -0
  155. data/test/unit/search/tm_searcher.rb +53 -1
  156. data/test/unit/store/tc_fs_store.rb +40 -2
  157. data/test/unit/store/tc_ram_store.rb +0 -0
  158. data/test/unit/store/tm_store.rb +0 -0
  159. data/test/unit/store/tm_store_lock.rb +7 -6
  160. data/test/unit/tc_field_symbol.rb +26 -0
  161. data/test/unit/ts_analysis.rb +0 -0
  162. data/test/unit/ts_index.rb +0 -0
  163. data/test/unit/ts_store.rb +0 -0
  164. data/test/unit/ts_utils.rb +0 -0
  165. data/test/unit/utils/tc_number_tools.rb +0 -0
  166. data/test/utils/content_generator.rb +226 -0
  167. metadata +262 -221
  168. data/ext/inc/lang.h +0 -48
  169. data/ext/inc/threading.h +0 -31
  170. data/ext/stem_ISO_8859_1_english.c +0 -1156
  171. data/ext/stem_ISO_8859_1_french.c +0 -1276
  172. data/ext/stem_ISO_8859_1_italian.c +0 -1091
  173. data/ext/stem_ISO_8859_1_norwegian.c +0 -296
  174. data/ext/stem_ISO_8859_1_spanish.c +0 -1119
  175. data/ext/stem_ISO_8859_1_swedish.c +0 -307
  176. data/ext/stem_UTF_8_danish.c +0 -344
  177. data/ext/stem_UTF_8_english.c +0 -1176
  178. data/ext/stem_UTF_8_french.c +0 -1296
  179. data/ext/stem_UTF_8_italian.c +0 -1113
  180. data/ext/stem_UTF_8_norwegian.c +0 -302
  181. data/ext/stem_UTF_8_portuguese.c +0 -1055
  182. data/ext/stem_UTF_8_russian.c +0 -709
  183. data/ext/stem_UTF_8_spanish.c +0 -1137
  184. data/ext/stem_UTF_8_swedish.c +0 -313
  185. data/lib/ferret_version.rb +0 -3
@@ -14,7 +14,7 @@
14
14
 
15
15
  struct among
16
16
  { int s_size; /* number of chars in string */
17
- symbol * s; /* search string */
17
+ const symbol * s; /* search string */
18
18
  int substring_i;/* index to longest matching substring */
19
19
  int result; /* result of the lookup */
20
20
  int (* function)(struct SN_env *);
@@ -25,31 +25,31 @@ extern void lose_s(symbol * p);
25
25
 
26
26
  extern int skip_utf8(const symbol * p, int c, int lb, int l, int n);
27
27
 
28
- extern int in_grouping_U(struct SN_env * z, unsigned char * s, int min, int max);
29
- extern int in_grouping_b_U(struct SN_env * z, unsigned char * s, int min, int max);
30
- extern int out_grouping_U(struct SN_env * z, unsigned char * s, int min, int max);
31
- extern int out_grouping_b_U(struct SN_env * z, unsigned char * s, int min, int max);
28
+ extern int in_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
29
+ extern int in_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
30
+ extern int out_grouping_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
31
+ extern int out_grouping_b_U(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
32
32
 
33
- extern int in_grouping(struct SN_env * z, unsigned char * s, int min, int max);
34
- extern int in_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
35
- extern int out_grouping(struct SN_env * z, unsigned char * s, int min, int max);
36
- extern int out_grouping_b(struct SN_env * z, unsigned char * s, int min, int max);
33
+ extern int in_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
34
+ extern int in_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
35
+ extern int out_grouping(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
36
+ extern int out_grouping_b(struct SN_env * z, const unsigned char * s, int min, int max, int repeat);
37
37
 
38
- extern int eq_s(struct SN_env * z, int s_size, symbol * s);
39
- extern int eq_s_b(struct SN_env * z, int s_size, symbol * s);
40
- extern int eq_v(struct SN_env * z, symbol * p);
41
- extern int eq_v_b(struct SN_env * z, symbol * p);
38
+ extern int eq_s(struct SN_env * z, int s_size, const symbol * s);
39
+ extern int eq_s_b(struct SN_env * z, int s_size, const symbol * s);
40
+ extern int eq_v(struct SN_env * z, const symbol * p);
41
+ extern int eq_v_b(struct SN_env * z, const symbol * p);
42
42
 
43
- extern int find_among(struct SN_env * z, struct among * v, int v_size);
44
- extern int find_among_b(struct SN_env * z, struct among * v, int v_size);
43
+ extern int find_among(struct SN_env * z, const struct among * v, int v_size);
44
+ extern int find_among_b(struct SN_env * z, const struct among * v, int v_size);
45
45
 
46
46
  extern int replace_s(struct SN_env * z, int c_bra, int c_ket, int s_size, const symbol * s, int * adjustment);
47
- extern int slice_from_s(struct SN_env * z, int s_size, symbol * s);
48
- extern int slice_from_v(struct SN_env * z, symbol * p);
47
+ extern int slice_from_s(struct SN_env * z, int s_size, const symbol * s);
48
+ extern int slice_from_v(struct SN_env * z, const symbol * p);
49
49
  extern int slice_del(struct SN_env * z);
50
50
 
51
- extern int insert_s(struct SN_env * z, int bra, int ket, int s_size, symbol * s);
52
- extern int insert_v(struct SN_env * z, int bra, int ket, symbol * p);
51
+ extern int insert_s(struct SN_env * z, int bra, int ket, int s_size, const symbol * s);
52
+ extern int insert_v(struct SN_env * z, int bra, int ket, const symbol * p);
53
53
 
54
54
  extern symbol * slice_to(struct SN_env * z, symbol * p);
55
55
  extern symbol * assign_to(struct SN_env * z, symbol * p);
@@ -1,4 +1,5 @@
1
1
  #include "helper.h"
2
+ #include "internal.h"
2
3
 
3
4
  int hlp_string_diff(register const char *const s1,
4
5
  register const char *const s2)
@@ -10,17 +11,17 @@ int hlp_string_diff(register const char *const s1,
10
11
  return i;
11
12
  }
12
13
 
13
- f_i32 float2int(float f)
14
+ i32 float2int(float f)
14
15
  {
15
- union { f_i32 i; float f; } tmp;
16
+ union { i32 i; float f; } tmp;
16
17
  tmp.f = f;
17
18
  return tmp.i;
18
19
  }
19
20
 
20
- float int2float(f_i32 i32)
21
+ float int2float(i32 v)
21
22
  {
22
- union { f_i32 i; float f; } tmp;
23
- tmp.i = i32;
23
+ union { i32 i; float f; } tmp;
24
+ tmp.i = v;
24
25
  return tmp.f;
25
26
  }
26
27
 
@@ -30,8 +31,8 @@ float byte2float(unsigned char b)
30
31
  return 0.0;
31
32
  }
32
33
  else {
33
- f_u32 mantissa = b & 0x07;
34
- f_u32 exponent = (b >> 3) & 0x1f;
34
+ u32 mantissa = b & 0x07;
35
+ u32 exponent = (b >> 3) & 0x1f;
35
36
 
36
37
  return int2float((mantissa << 21) | ((exponent + 48) << 24));
37
38
  }
@@ -44,9 +45,9 @@ unsigned char float2byte(float f)
44
45
  }
45
46
  else {
46
47
  /* correctly order the bytes for encoding */
47
- f_u32 i32 = float2int(f);
48
- int mantissa = (i32 & 0xEf0000) >> 21;
49
- int exponent = ((i32 >> 24) - 48);
48
+ u32 i = float2int(f);
49
+ int mantissa = (i & 0xEf0000) >> 21;
50
+ int exponent = ((i >> 24) - 48);
50
51
 
51
52
  if (exponent > 0x1f) {
52
53
  exponent = 0x1f; /* 0x1f = 31 = 0b00011111 */
@@ -1,13 +1,21 @@
1
1
  #ifndef FRT_HELPER_H
2
2
  #define FRT_HELPER_H
3
3
 
4
+ #ifdef __cplusplus
5
+ extern "C" {
6
+ #endif
7
+
4
8
  #include "config.h"
5
9
 
6
- extern int hlp_string_diff(register const char *const s1,
7
- register const char *const s2);
8
- extern f_i32 float2int(float f);
9
- extern float int2float(f_i32 i32);
10
- extern float byte2float(unsigned char b);
11
- extern unsigned char float2byte(float f);
10
+ extern int frt_hlp_string_diff(register const char *const s1,
11
+ register const char *const s2);
12
+ extern frt_i32 frt_float2int(float f);
13
+ extern float frt_int2float(frt_i32 i32);
14
+ extern float frt_byte2float(unsigned char b);
15
+ extern unsigned char frt_float2byte(float f);
16
+
17
+ #ifdef __cplusplus
18
+ } // extern "C"
19
+ #endif
12
20
 
13
21
  #endif
@@ -1,11 +1,17 @@
1
1
  #include "index.h"
2
+ #include "symbol.h"
2
3
  #include "similarity.h"
3
4
  #include "helper.h"
4
5
  #include "array.h"
5
- #include "priorityqueue.h"
6
6
  #include <string.h>
7
7
  #include <limits.h>
8
8
  #include <ctype.h>
9
+ #ifdef USE_ZLIB
10
+ # include <zlib.h>
11
+ #else
12
+ # include "bzlib.h"
13
+ #endif
14
+ #include "internal.h"
9
15
 
10
16
  #define GET_LOCK(lock, name, store, err_msg) do {\
11
17
  lock = store->open_lock(store, name);\
@@ -26,7 +32,7 @@ const Config default_config = {
26
32
  SKIP_INTERVAL, /* skip interval */
27
33
  10, /* default merge factor */
28
34
  10000, /* max_buffered_docs */
29
- INT_MAX, /* max_merged_docs */
35
+ INT_MAX, /* max_merge_docs */
30
36
  10000, /* maximum field length (number of terms) */
31
37
  true /* use compound file by default */
32
38
  };
@@ -37,21 +43,22 @@ static char *ste_next(TermEnum *te);
37
43
  #define FORMAT 0
38
44
  #define SEGMENTS_GEN_FILE_NAME "segments"
39
45
  #define MAX_EXT_LEN 10
46
+ #define ZIP_BUFFER_SIZE 16348
47
+ #define ZIP_LEVEL 9
40
48
 
41
49
  /* *** Must be three characters *** */
42
- const char *INDEX_EXTENSIONS[] = {
50
+ static const char *INDEX_EXTENSIONS[] = {
43
51
  "frq", "prx", "fdx", "fdt", "tfx", "tix", "tis", "del", "gen", "cfs"
44
52
  };
45
53
 
46
54
  /* *** Must be three characters *** */
47
- const char *COMPOUND_EXTENSIONS[] = {
55
+ static const char *COMPOUND_EXTENSIONS[] = {
48
56
  "frq", "prx", "fdx", "fdt", "tfx", "tix", "tis"
49
57
  };
50
58
 
51
-
52
59
  static const char BASE36_DIGITMAP[] = "0123456789abcdefghijklmnopqrstuvwxyz";
53
60
 
54
- static char *u64_to_str36(char *buf, int buf_size, f_u64 u)
61
+ static char *u64_to_str36(char *buf, int buf_size, u64 u)
55
62
  {
56
63
  int i = buf_size - 1;
57
64
  buf[i] = '\0';
@@ -69,9 +76,9 @@ static char *u64_to_str36(char *buf, int buf_size, f_u64 u)
69
76
  return buf + i;
70
77
  }
71
78
 
72
- static f_u64 str36_to_u64(char *p)
79
+ static u64 str36_to_u64(char *p)
73
80
  {
74
- f_u64 u = 0;
81
+ u64 u = 0;
75
82
  while (true) {
76
83
  if ('0' <= *p && '9' >= *p) {
77
84
  u = u * 36 + *p - '0';
@@ -98,14 +105,14 @@ static f_u64 str36_to_u64(char *p)
98
105
  * @param ext extension of the filename (including .)
99
106
  * @param gen generation
100
107
  */
101
- char *fn_for_generation(char *buf, char *base, char *ext, f_i64 gen)
108
+ char *fn_for_generation(char *buf, char *base, char *ext, i64 gen)
102
109
  {
103
110
  if (-1 == gen) {
104
111
  return NULL;
105
112
  }
106
113
  else {
107
114
  char b[SEGMENT_NAME_MAX_LENGTH];
108
- char *u = u64_to_str36(b, SEGMENT_NAME_MAX_LENGTH, (f_u64)gen);
115
+ char *u = u64_to_str36(b, SEGMENT_NAME_MAX_LENGTH, (u64)gen);
109
116
  if (ext == NULL) {
110
117
  sprintf(buf, "%s_%s", base, u);
111
118
  }
@@ -116,7 +123,7 @@ char *fn_for_generation(char *buf, char *base, char *ext, f_i64 gen)
116
123
  }
117
124
  }
118
125
 
119
- char *segfn_for_generation(char *buf, f_u64 generation)
126
+ static char *segfn_for_generation(char *buf, u64 generation)
120
127
  {
121
128
  char b[SEGMENT_NAME_MAX_LENGTH];
122
129
  char *u = u64_to_str36(b, SEGMENT_NAME_MAX_LENGTH, generation);
@@ -137,9 +144,9 @@ char *segfn_for_generation(char *buf, f_u64 generation)
137
144
  * @param field_num field number
138
145
  */
139
146
  static char *fn_for_gen_field(char *buf,
140
- char *base,
141
- char *ext,
142
- f_i64 gen,
147
+ const char *base,
148
+ const char *ext,
149
+ i64 gen,
143
150
  int field_num)
144
151
  {
145
152
  if (-1 == gen) {
@@ -149,7 +156,7 @@ static char *fn_for_gen_field(char *buf,
149
156
  char b[SEGMENT_NAME_MAX_LENGTH];
150
157
  sprintf(buf, "%s_%s.%s%d",
151
158
  base,
152
- u64_to_str36(b, SEGMENT_NAME_MAX_LENGTH, (f_u64)gen),
159
+ u64_to_str36(b, SEGMENT_NAME_MAX_LENGTH, (u64)gen),
153
160
  ext,
154
161
  field_num);
155
162
  return buf;
@@ -172,7 +179,7 @@ static int co_eq(const void *key1, const void *key2)
172
179
  return (key1 == key2);
173
180
  }
174
181
 
175
- void co_destroy(CacheObject *self)
182
+ static void co_destroy(CacheObject *self)
176
183
  {
177
184
  h_rem(self->ref_tab1, self->ref2, false);
178
185
  h_rem(self->ref_tab2, self->ref1, false);
@@ -180,7 +187,7 @@ void co_destroy(CacheObject *self)
180
187
  free(self);
181
188
  }
182
189
 
183
- CacheObject *co_create(HashTable *ref_tab1, HashTable *ref_tab2,
190
+ CacheObject *co_create(Hash *ref_tab1, Hash *ref_tab2,
184
191
  void *ref1, void *ref2, free_ft destroy, void *obj)
185
192
  {
186
193
  CacheObject *self = ALLOC(CacheObject);
@@ -195,7 +202,7 @@ CacheObject *co_create(HashTable *ref_tab1, HashTable *ref_tab2,
195
202
  return self;
196
203
  }
197
204
 
198
- HashTable *co_hash_create()
205
+ Hash *co_hash_create()
199
206
  {
200
207
  return h_new(&co_hash, &co_eq, (free_ft)NULL, (free_ft)&co_destroy);
201
208
  }
@@ -206,7 +213,7 @@ HashTable *co_hash_create()
206
213
  *
207
214
  ****************************************************************************/
208
215
 
209
- INLINE void fi_set_store(FieldInfo *fi, int store)
216
+ static INLINE void fi_set_store(FieldInfo *fi, int store)
210
217
  {
211
218
  switch (store) {
212
219
  case STORE_NO:
@@ -220,7 +227,7 @@ INLINE void fi_set_store(FieldInfo *fi, int store)
220
227
  }
221
228
  }
222
229
 
223
- INLINE void fi_set_index(FieldInfo *fi, int index)
230
+ static INLINE void fi_set_index(FieldInfo *fi, int index)
224
231
  {
225
232
  switch (index) {
226
233
  case INDEX_NO:
@@ -241,7 +248,7 @@ INLINE void fi_set_index(FieldInfo *fi, int index)
241
248
  }
242
249
  }
243
250
 
244
- INLINE void fi_set_term_vector(FieldInfo *fi, int term_vector)
251
+ static INLINE void fi_set_term_vector(FieldInfo *fi, int term_vector)
245
252
  {
246
253
  switch (term_vector) {
247
254
  case TERM_VECTOR_NO:
@@ -271,14 +278,15 @@ static void fi_check_params(int store, int index, int term_vector)
271
278
  }
272
279
  }
273
280
 
274
- FieldInfo *fi_new(const char *name,
275
- enum StoreValues store,
276
- enum IndexValues index,
277
- enum TermVectorValues term_vector)
281
+ FieldInfo *fi_new(Symbol name,
282
+ StoreValue store,
283
+ IndexValue index,
284
+ TermVectorValue term_vector)
278
285
  {
279
286
  FieldInfo *fi = ALLOC(FieldInfo);
287
+ assert(NULL != name);
280
288
  fi_check_params(store, index, term_vector);
281
- fi->name = estrdup(name);
289
+ fi->name = name;
282
290
  fi->boost = 1.0;
283
291
  fi->bits = 0;
284
292
  fi_set_store(fi, store);
@@ -291,28 +299,28 @@ FieldInfo *fi_new(const char *name,
291
299
  void fi_deref(FieldInfo *fi)
292
300
  {
293
301
  if (0 == --(fi->ref_cnt)) {
294
- free(fi->name);
295
302
  free(fi);
296
303
  }
297
304
  }
298
305
 
299
306
  char *fi_to_s(FieldInfo *fi)
300
307
  {
301
- char *str = ALLOC_N(char, strlen(fi->name) + 200);
308
+ char *str = ALLOC_N(char, strlen((char *)fi->name) + 200);
302
309
  char *s = str;
303
- sprintf(str, "[\"%s\":(%s%s%s%s%s%s%s%s", fi->name,
304
- fi_is_stored(fi) ? "is_stored, " : "",
305
- fi_is_compressed(fi) ? "is_compressed, " : "",
306
- fi_is_indexed(fi) ? "is_indexed, " : "",
307
- fi_is_tokenized(fi) ? "is_tokenized, " : "",
308
- fi_omit_norms(fi) ? "omit_norms, " : "",
309
- fi_store_term_vector(fi) ? "store_term_vector, " : "",
310
- fi_store_positions(fi) ? "store_positions, " : "",
311
- fi_store_offsets(fi) ? "store_offsets, " : "");
312
- s += (int)strlen(str) - 2;
310
+ s += sprintf(str, "[\"%s\":(%s%s%s%s%s%s%s%s", (char *)fi->name,
311
+ fi_is_stored(fi) ? "is_stored, " : "",
312
+ fi_is_compressed(fi) ? "is_compressed, " : "",
313
+ fi_is_indexed(fi) ? "is_indexed, " : "",
314
+ fi_is_tokenized(fi) ? "is_tokenized, " : "",
315
+ fi_omit_norms(fi) ? "omit_norms, " : "",
316
+ fi_store_term_vector(fi) ? "store_term_vector, " : "",
317
+ fi_store_positions(fi) ? "store_positions, " : "",
318
+ fi_store_offsets(fi) ? "store_offsets, " : "");
319
+ s -= 2;
313
320
  if (*s != ',') {
314
321
  s += 2;
315
322
  }
323
+
316
324
  sprintf(s, ")]");
317
325
  return str;
318
326
  }
@@ -323,11 +331,12 @@ char *fi_to_s(FieldInfo *fi)
323
331
  *
324
332
  ****************************************************************************/
325
333
 
326
- FieldInfos *fis_new(int store, int index, int term_vector)
334
+ FieldInfos *fis_new(StoreValue store, IndexValue index,
335
+ TermVectorValue term_vector)
327
336
  {
328
337
  FieldInfos *fis = ALLOC(FieldInfos);
329
338
  fi_check_params(store, index, term_vector);
330
- fis->field_dict = h_new_str((free_ft)NULL, (free_ft)&fi_deref);
339
+ fis->field_dict = h_new_ptr((free_ft)&fi_deref);
331
340
  fis->size = 0;
332
341
  fis->capa = FIELD_INFOS_INIT_CAPA;
333
342
  fis->fields = ALLOC_N(FieldInfo *, fis->capa);
@@ -346,7 +355,7 @@ FieldInfo *fis_add_field(FieldInfos *fis, FieldInfo *fi)
346
355
  }
347
356
  if (!h_set_safe(fis->field_dict, fi->name, fi)) {
348
357
  RAISE(ARG_ERROR,
349
- "Field :%s already exists", fi->name);
358
+ "Field :%s already exists", (char *)fi->name);
350
359
  }
351
360
  fi->number = fis->size;
352
361
  fis->fields[fis->size] = fi;
@@ -354,14 +363,14 @@ FieldInfo *fis_add_field(FieldInfos *fis, FieldInfo *fi)
354
363
  return fi;
355
364
  }
356
365
 
357
- FieldInfo *fis_get_field(FieldInfos *fis, const char *name)
366
+ FieldInfo *fis_get_field(FieldInfos *fis, Symbol name)
358
367
  {
359
- return h_get(fis->field_dict, name);
368
+ return (FieldInfo *)h_get(fis->field_dict, name);
360
369
  }
361
370
 
362
- int fis_get_field_num(FieldInfos *fis, const char *name)
371
+ int fis_get_field_num(FieldInfos *fis, Symbol name)
363
372
  {
364
- FieldInfo *fi = h_get(fis->field_dict, name);
373
+ FieldInfo *fi = (FieldInfo *)h_get(fis->field_dict, name);
365
374
  if (fi) {
366
375
  return fi->number;
367
376
  }
@@ -370,11 +379,11 @@ int fis_get_field_num(FieldInfos *fis, const char *name)
370
379
  }
371
380
  }
372
381
 
373
- FieldInfo *fis_get_or_add_field(FieldInfos *fis, const char *name)
382
+ FieldInfo *fis_get_or_add_field(FieldInfos *fis, Symbol name)
374
383
  {
375
- FieldInfo *fi = h_get(fis->field_dict, name);
384
+ FieldInfo *fi = (FieldInfo *)h_get(fis->field_dict, name);
376
385
  if (!fi) {
377
- fi = fi_new(name, fis->store, fis->index, fis->term_vector);
386
+ fi = (FieldInfo*)fi_new(name, fis->store, fis->index, fis->term_vector);
378
387
  fis_add_field(fis, fi);
379
388
  }
380
389
  return fi;
@@ -392,27 +401,28 @@ FieldInfo *fis_by_number(FieldInfos *fis, int num)
392
401
 
393
402
  FieldInfos *fis_read(InStream *is)
394
403
  {
395
- FieldInfos *volatile fis;
404
+ FieldInfos *volatile fis = NULL;
396
405
  TRY
397
406
  do {
398
- int store_val, index_val, term_vector_val;
399
- int i;
400
- union { f_u32 i; float f; } tmp;
407
+ StoreValue store_val;
408
+ IndexValue index_val;
409
+ TermVectorValue term_vector_val;
410
+ volatile int i;
411
+ union { u32 i; float f; } tmp;
401
412
  FieldInfo *volatile fi;
402
413
 
403
- store_val = is_read_vint(is);
404
- index_val = is_read_vint(is);
405
- term_vector_val = is_read_vint(is);
414
+ store_val = (StoreValue)is_read_vint(is);
415
+ index_val = (IndexValue)is_read_vint(is);
416
+ term_vector_val = (TermVectorValue)is_read_vint(is);
406
417
  fis = fis_new(store_val, index_val, term_vector_val);
407
418
  for (i = is_read_vint(is); i > 0; i--) {
408
419
  fi = ALLOC_AND_ZERO(FieldInfo);
409
420
  TRY
410
- fi->name = is_read_string_safe(is);
421
+ fi->name = intern_and_free(is_read_string_safe(is));
411
422
  tmp.i = is_read_u32(is);
412
423
  fi->boost = tmp.f;
413
424
  fi->bits = is_read_vint(is);
414
425
  XCATCHALL
415
- free(fi->name);
416
426
  free(fi);
417
427
  XENDTRY
418
428
  fis_add_field(fis, fi);
@@ -423,13 +433,13 @@ FieldInfos *fis_read(InStream *is)
423
433
  fis_deref(fis);
424
434
  XENDTRY
425
435
 
426
- return fis;
436
+ return fis;
427
437
  }
428
438
 
429
439
  void fis_write(FieldInfos *fis, OutStream *os)
430
440
  {
431
441
  int i;
432
- union { f_u32 i; float f; } tmp;
442
+ union { u32 i; float f; } tmp;
433
443
  FieldInfo *fi;
434
444
  const int fis_size = fis->size;
435
445
 
@@ -439,7 +449,7 @@ void fis_write(FieldInfos *fis, OutStream *os)
439
449
  os_write_vint(os, fis->size);
440
450
  for (i = 0; i < fis_size; i++) {
441
451
  fi = fis->fields[i];
442
- os_write_string(os, fi->name);
452
+ os_write_string(os, S(fi->name));
443
453
  tmp.f = fi->boost;
444
454
  os_write_u32(os, tmp.i);
445
455
  os_write_vint(os, fi->bits);
@@ -497,27 +507,25 @@ char *fis_to_s(FieldInfos *fis)
497
507
  FieldInfo *fi;
498
508
  const int fis_size = fis->size;
499
509
 
500
- sprintf(buf,
501
- "default:\n"
502
- " store: %s\n"
503
- " index: %s\n"
504
- " term_vector: %s\n"
505
- "fields:\n",
506
- store_str[fis->store], index_str[fis->index],
507
- term_vector_str[fis->term_vector]);
508
- pos = (int)strlen(buf);
510
+ pos = sprintf(buf,
511
+ "default:\n"
512
+ " store: %s\n"
513
+ " index: %s\n"
514
+ " term_vector: %s\n"
515
+ "fields:\n",
516
+ store_str[fis->store],
517
+ index_str[fis->index],
518
+ term_vector_str[fis->term_vector]);
509
519
  for (i = 0; i < fis_size; i++) {
510
520
  fi = fis->fields[i];
511
- sprintf(buf + pos,
512
- " %s:\n"
513
- " boost: %f\n"
514
- " store: %s\n"
515
- " index: %s\n"
516
- " term_vector: %s\n",
517
- fi->name, fi->boost, fi_store_str(fi),
518
- fi_index_str(fi), fi_term_vector_str(fi));
519
-
520
- pos += strlen(buf + pos);
521
+ pos += sprintf(buf + pos,
522
+ " %s:\n"
523
+ " boost: %f\n"
524
+ " store: %s\n"
525
+ " index: %s\n"
526
+ " term_vector: %s\n",
527
+ (char *)fi->name, fi->boost, fi_store_str(fi),
528
+ fi_index_str(fi), fi_term_vector_str(fi));
521
529
  }
522
530
 
523
531
  return buf;
@@ -565,7 +573,7 @@ SegmentInfo *si_new(char *name, int doc_cnt, Store *store)
565
573
  return si;
566
574
  }
567
575
 
568
- SegmentInfo *si_read(Store *store, InStream *is)
576
+ static SegmentInfo *si_read(Store *store, InStream *is)
569
577
  {
570
578
  SegmentInfo *volatile si = ALLOC_AND_ZERO(SegmentInfo);
571
579
  TRY
@@ -590,7 +598,7 @@ SegmentInfo *si_read(Store *store, InStream *is)
590
598
  return si;
591
599
  }
592
600
 
593
- void si_write(SegmentInfo *si, OutStream *os)
601
+ static void si_write(SegmentInfo *si, OutStream *os)
594
602
  {
595
603
  os_write_string(os, si->name);
596
604
  os_write_vint(os, si->doc_cnt);
@@ -619,7 +627,9 @@ bool si_has_deletions(SegmentInfo *si)
619
627
  return si->del_gen >= 0;
620
628
  }
621
629
 
622
- char *si_del_file_name(SegmentInfo *si, char *buf)
630
+ /*
631
+ FIXME: not used
632
+ static char *si_del_file_name(SegmentInfo *si, char *buf)
623
633
  {
624
634
  if (si->del_gen < 0) {
625
635
  return NULL;
@@ -628,6 +638,7 @@ char *si_del_file_name(SegmentInfo *si, char *buf)
628
638
  return fn_for_generation(buf, si->name, ".del", si->del_gen);
629
639
  }
630
640
  }
641
+ */
631
642
 
632
643
  bool si_has_separate_norms(SegmentInfo *si)
633
644
  {
@@ -653,7 +664,7 @@ void si_advance_norm_gen(SegmentInfo *si, int field_num)
653
664
  si->norm_gens[field_num]++;
654
665
  }
655
666
 
656
- char *si_norm_file_name(SegmentInfo *si, char *buf, int field_num)
667
+ static char *si_norm_file_name(SegmentInfo *si, char *buf, int field_num)
657
668
  {
658
669
  int norm_gen;
659
670
  if (field_num >= si->norm_gens_size
@@ -661,12 +672,12 @@ char *si_norm_file_name(SegmentInfo *si, char *buf, int field_num)
661
672
  return NULL;
662
673
  }
663
674
  else {
664
- char *ext = (si->use_compound_file && norm_gen > 0) ? "s" : "f";
675
+ const char *ext = (si->use_compound_file && norm_gen > 0) ? "s" : "f";
665
676
  return fn_for_gen_field(buf, si->name, ext, norm_gen, field_num);
666
677
  }
667
678
  }
668
679
 
669
- void deleter_queue_file(Deleter *dlr, char *file_name);
680
+ static void deleter_queue_file(Deleter *dlr, const char *file_name);
670
681
  #define DEL(file_name) deleter_queue_file(dlr, file_name)
671
682
 
672
683
  static void si_delete_files(SegmentInfo *si, FieldInfos *fis, Deleter *dlr)
@@ -708,11 +719,11 @@ static void si_delete_files(SegmentInfo *si, FieldInfos *fis, Deleter *dlr)
708
719
  ****************************************************************************/
709
720
 
710
721
  #include <time.h>
711
- static char *new_segment(f_i64 generation)
722
+ static char *new_segment(i64 generation)
712
723
  {
713
724
  char buf[SEGMENT_NAME_MAX_LENGTH];
714
725
  char *fn_p = u64_to_str36(buf, SEGMENT_NAME_MAX_LENGTH - 1,
715
- (f_u64)generation);
726
+ (u64)generation);
716
727
  *(--fn_p) = '_';
717
728
  return estrdup(fn_p);
718
729
  }
@@ -722,18 +733,21 @@ static char *new_segment(f_i64 generation)
722
733
  ****************************************************************************/
723
734
 
724
735
  typedef struct FindSegmentsFile {
725
- f_i64 generation;
726
- f_u64 u64_return;
727
- void *p_return;
736
+ i64 generation;
737
+ union {
738
+ SegmentInfos *sis;
739
+ IndexReader *ir;
740
+ u64 uint64;
741
+ } ret;
728
742
  } FindSegmentsFile;
729
743
 
730
- static void which_gen_i(char *file_name, void *arg)
744
+ static void which_gen_i(const char *file_name, void *arg)
731
745
  {
732
- f_i64 *max_generation = (f_i64 *)arg;
746
+ i64 *max_generation = (i64 *)arg;
733
747
  if (0 == strncmp(SEGMENTS_FILE_NAME"_", file_name,
734
748
  sizeof(SEGMENTS_FILE_NAME))) {
735
749
  char *p = strrchr(file_name, '_') + 1;
736
- f_i64 generation = (f_i64)str36_to_u64(p);
750
+ i64 generation = (i64)str36_to_u64(p);
737
751
  if (generation > *max_generation) *max_generation = generation;
738
752
  }
739
753
  }
@@ -776,9 +790,9 @@ void sis_put(SegmentInfos *sis, FILE *stream)
776
790
  *
777
791
  * @param store - the Store to look in
778
792
  */
779
- f_i64 sis_current_segment_generation(Store *store)
793
+ i64 sis_current_segment_generation(Store *store)
780
794
  {
781
- f_i64 current_generation = -1;
795
+ i64 current_generation = -1;
782
796
  store->each(store, &which_gen_i, &current_generation);
783
797
  return current_generation;
784
798
  }
@@ -802,22 +816,25 @@ char *sis_curr_seg_file_name(char *buf, Store *store)
802
816
  * @param store - the Store to look in
803
817
  * @return segments_N where N is the +next+ generation
804
818
  */
805
- char *sis_next_seg_file_name(char *buf, Store *store)
819
+ /*
820
+ FIXME: not used
821
+ static char *sis_next_seg_file_name(char *buf, Store *store)
806
822
  {
807
823
  return segfn_for_generation(buf, sis_current_segment_generation(store) + 1);
808
824
  }
825
+ */
809
826
 
810
827
  #define GEN_FILE_RETRY_COUNT 10
811
828
  #define GEN_LOOK_AHEAD_COUNT 10
812
- void sis_find_segments_file(Store *store, FindSegmentsFile *fsf,
829
+ static void sis_find_segments_file(Store *store, FindSegmentsFile *fsf,
813
830
  void (*run)(Store *store, FindSegmentsFile *fsf))
814
831
  {
815
- int i;
816
- int gen_look_ahead_count = 0;
817
- bool retry = false;
818
- int method = 0;
819
- f_i64 last_gen = -1;
820
- f_i64 gen = 0;
832
+ volatile int i;
833
+ volatile int gen_look_ahead_count = 0;
834
+ volatile bool retry = false;
835
+ volatile int method = 0;
836
+ volatile i64 last_gen = -1;
837
+ volatile i64 gen = 0;
821
838
 
822
839
  /* Loop until we succeed in calling doBody() without hitting an
823
840
  * IOException. An IOException most likely means a commit was in process
@@ -855,7 +872,7 @@ void sis_find_segments_file(Store *store, FindSegmentsFile *fsf,
855
872
  XENDTRY
856
873
 
857
874
  if (NULL != gen_is) {
858
- f_i64 gen0 = -1, gen1 = -1;
875
+ i64 gen0 = -1, gen1 = -1;
859
876
 
860
877
  TRY
861
878
  gen0 = is_read_u64(gen_is);
@@ -897,8 +914,14 @@ void sis_find_segments_file(Store *store, FindSegmentsFile *fsf,
897
914
  /* OK, we've tried the same segments_N file twice in a row, so
898
915
  * this must be a real error. We throw the original exception
899
916
  * we got. */
917
+ char *listing, listing_buffer[1024];
918
+ listing = store_to_s(store);
919
+ strncpy(listing_buffer, listing, 1023);
920
+ listing_buffer[1023] = '\0';
921
+ free(listing);
900
922
  RAISE(IO_ERROR,
901
- "Error reading the segment infos. Store listing was\n");
923
+ "Error reading the segment infos. Store:\n %s\n",
924
+ listing_buffer);
902
925
  }
903
926
  else {
904
927
  micro_sleep(50000);
@@ -985,7 +1008,7 @@ SegmentInfos *sis_new(FieldInfos *fis)
985
1008
  REF(fis);
986
1009
  sis->fis = fis;
987
1010
  sis->format = FORMAT;
988
- sis->version = (f_u64)time(NULL);
1011
+ sis->version = (u64)time(NULL);
989
1012
  sis->size = 0;
990
1013
  sis->counter = 0;
991
1014
  sis->generation = -1;
@@ -1053,7 +1076,7 @@ void sis_clear(SegmentInfos *sis)
1053
1076
  sis->size = 0;
1054
1077
  }
1055
1078
 
1056
- void sis_read_i(Store *store, FindSegmentsFile *fsf)
1079
+ static void sis_read_i(Store *store, FindSegmentsFile *fsf)
1057
1080
  {
1058
1081
  int seg_cnt;
1059
1082
  int i;
@@ -1062,7 +1085,7 @@ void sis_read_i(Store *store, FindSegmentsFile *fsf)
1062
1085
  InStream *volatile is = NULL;
1063
1086
  SegmentInfos *volatile sis = ALLOC_AND_ZERO(SegmentInfos);
1064
1087
  segfn_for_generation(seg_file_name, fsf->generation);
1065
- fsf->p_return = NULL;
1088
+ fsf->ret.sis = NULL;
1066
1089
  TRY
1067
1090
  is = store->open_input(store, seg_file_name);
1068
1091
  sis->store = store;
@@ -1090,20 +1113,20 @@ void sis_read_i(Store *store, FindSegmentsFile *fsf)
1090
1113
  sis_destroy(sis);
1091
1114
  }
1092
1115
  XENDTRY
1093
- fsf->p_return = sis;
1116
+ fsf->ret.sis = sis;
1094
1117
  }
1095
1118
 
1096
1119
  SegmentInfos *sis_read(Store *store)
1097
1120
  {
1098
1121
  FindSegmentsFile fsf;
1099
1122
  sis_find_segments_file(store, &fsf, &sis_read_i);
1100
- return fsf.p_return;
1123
+ return fsf.ret.sis;
1101
1124
  }
1102
1125
 
1103
1126
  void sis_write(SegmentInfos *sis, Store *store, Deleter *deleter)
1104
1127
  {
1105
1128
  int i;
1106
- OutStream *os = NULL;
1129
+ OutStream *volatile os = NULL;
1107
1130
  const int sis_size = sis->size;
1108
1131
  char buf[SEGMENT_NAME_MAX_LENGTH];
1109
1132
  sis->generation++;
@@ -1114,7 +1137,7 @@ void sis_write(SegmentInfos *sis, Store *store, Deleter *deleter)
1114
1137
  os_write_u32(os, FORMAT);
1115
1138
  os_write_u64(os, ++(sis->version)); /* every write changes the index */
1116
1139
  os_write_u64(os, sis->counter);
1117
- os_write_vint(os, sis->size);
1140
+ os_write_vint(os, sis->size);
1118
1141
  for (i = 0; i < sis_size; i++) {
1119
1142
  si_write(sis->segs[i], os);
1120
1143
  }
@@ -1140,14 +1163,14 @@ void sis_write(SegmentInfos *sis, Store *store, Deleter *deleter)
1140
1163
  }
1141
1164
  }
1142
1165
 
1143
- void sis_read_ver_i(Store *store, FindSegmentsFile *fsf)
1166
+ static void sis_read_ver_i(Store *store, FindSegmentsFile *fsf)
1144
1167
  {
1145
1168
  InStream *is;
1146
- f_u32 format = 0;
1147
- f_u64 version = 0;
1169
+ u32 format = 0;
1170
+ u64 version = 0;
1148
1171
  char seg_file_name[SEGMENT_NAME_MAX_LENGTH];
1149
1172
 
1150
- segfn_for_generation(seg_file_name, (f_u64)fsf->generation);
1173
+ segfn_for_generation(seg_file_name, (u64)fsf->generation);
1151
1174
  is = store->open_input(store, seg_file_name);
1152
1175
 
1153
1176
  TRY
@@ -1157,14 +1180,14 @@ void sis_read_ver_i(Store *store, FindSegmentsFile *fsf)
1157
1180
  is_close(is);
1158
1181
  XENDTRY
1159
1182
 
1160
- fsf->u64_return = version;
1183
+ fsf->ret.uint64 = version;
1161
1184
  }
1162
1185
 
1163
- f_u64 sis_read_current_version(Store *store)
1186
+ u64 sis_read_current_version(Store *store)
1164
1187
  {
1165
1188
  FindSegmentsFile fsf;
1166
1189
  sis_find_segments_file(store, &fsf, &sis_read_ver_i);
1167
- return fsf.u64_return;
1190
+ return fsf.ret.uint64;
1168
1191
  }
1169
1192
 
1170
1193
  /****************************************************************************
@@ -1173,12 +1196,14 @@ f_u64 sis_read_current_version(Store *store)
1173
1196
  *
1174
1197
  ****************************************************************************/
1175
1198
 
1176
- static LazyDocField *lazy_df_new(const char *name, const int size)
1199
+ static LazyDocField *lazy_df_new(Symbol name, const int size,
1200
+ bool is_compressed)
1177
1201
  {
1178
1202
  LazyDocField *self = ALLOC(LazyDocField);
1179
- self->name = estrdup(name);
1203
+ self->name = name;
1180
1204
  self->size = size;
1181
1205
  self->data = ALLOC_AND_ZERO_N(LazyDocFieldData, size);
1206
+ self->is_compressed = is_compressed;
1182
1207
  return self;
1183
1208
  }
1184
1209
 
@@ -1190,11 +1215,171 @@ static void lazy_df_destroy(LazyDocField *self)
1190
1215
  free(self->data[i].text);
1191
1216
  }
1192
1217
  }
1193
- free(self->name);
1194
1218
  free(self->data);
1195
1219
  free(self);
1196
1220
  }
1197
1221
 
1222
+ #ifdef USE_ZLIB
1223
+ /* good zlib example at http://www.zlib.net/zlib_how.html */
1224
+
1225
+ /* report a zlib or i/o error */
1226
+ static void zraise(int ret)
1227
+ {
1228
+ switch (ret) {
1229
+ case Z_ERRNO:
1230
+ if (ferror(stdin))
1231
+ RAISE(IO_ERROR, "zlib: error reading stdin");
1232
+ if (ferror(stdout))
1233
+ RAISE(IO_ERROR, "zlib: error writing stdout");
1234
+ break;
1235
+ case Z_STREAM_ERROR:
1236
+ RAISE(IO_ERROR, "zlib: invalid compression level");
1237
+ break;
1238
+ case Z_DATA_ERROR:
1239
+ RAISE(IO_ERROR, "zlib: invalid or incomplete deflate data");
1240
+ break;
1241
+ case Z_MEM_ERROR:
1242
+ RAISE(IO_ERROR, "zlib: out of memory");
1243
+ break;
1244
+ case Z_VERSION_ERROR:
1245
+ RAISE(IO_ERROR, "zlib: version mismatch!");
1246
+ break;
1247
+ default:
1248
+ RAISE(EXCEPTION, "zlib: unknown error");
1249
+ }
1250
+ }
1251
+
1252
+ static char *is_read_zipped_bytes(InStream *is, int zip_len, int *len)
1253
+ {
1254
+ int buf_out_idx = 0, ret, read_len;
1255
+ uchar *buf_out = NULL;
1256
+ uchar buf_in[ZIP_BUFFER_SIZE];
1257
+ z_stream zstrm;
1258
+ zstrm.zalloc = Z_NULL;
1259
+ zstrm.zfree = Z_NULL;
1260
+ zstrm.opaque = Z_NULL;
1261
+ zstrm.next_in = Z_NULL;
1262
+ zstrm.avail_in = 0;
1263
+ if ((ret = inflateInit(&zstrm)) != Z_OK) zraise(ret);
1264
+
1265
+ do {
1266
+ read_len = zip_len > ZIP_BUFFER_SIZE ? ZIP_BUFFER_SIZE : zip_len;
1267
+ is_read_bytes(is, buf_in, zip_len);
1268
+ zip_len -= read_len;
1269
+ zstrm.avail_in = read_len;
1270
+ zstrm.next_in = buf_in;
1271
+ zstrm.avail_out = ZIP_BUFFER_SIZE;
1272
+
1273
+ do {
1274
+ REALLOC_N(buf_out, uchar, buf_out_idx + ZIP_BUFFER_SIZE);
1275
+ zstrm.next_out = buf_out + buf_out_idx;
1276
+ ret = inflate(&zstrm, Z_NO_FLUSH);
1277
+ assert(ret != Z_STREAM_ERROR); /* state not clobbered */
1278
+ switch(ret) {
1279
+ case Z_NEED_DICT:
1280
+ ret = Z_DATA_ERROR; /* and fall through */
1281
+ case Z_DATA_ERROR:
1282
+ case Z_MEM_ERROR:
1283
+ (void)inflateEnd(&zstrm);
1284
+ zraise(ret);
1285
+ }
1286
+ buf_out_idx += ZIP_BUFFER_SIZE - zstrm.avail_out;
1287
+ } while (zstrm.avail_out == 0);
1288
+ } while (ret != Z_STREAM_END && zip_len != 0);
1289
+
1290
+ /* clean up */
1291
+ (void)inflateEnd(&zstrm);
1292
+
1293
+ buf_out[buf_out_idx] = '\0';
1294
+ REALLOC_N(buf_out, uchar, buf_out_idx + 1);
1295
+ *len = buf_out_idx;
1296
+ return (char *)buf_out;
1297
+ }
1298
+ #else /* use bzlib */
1299
+ static void zraise(int ret)
1300
+ {
1301
+ switch (ret) {
1302
+ case BZ_IO_ERROR:
1303
+ if (ferror(stdin))
1304
+ RAISE(IO_ERROR, "bzlib: error reading stdin");
1305
+ if (ferror(stdout))
1306
+ RAISE(IO_ERROR, "bzlib: error writing stdout");
1307
+ break;
1308
+ case BZ_CONFIG_ERROR:
1309
+ RAISE(IO_ERROR, "bzlib: system configuration error");
1310
+ break;
1311
+ case BZ_SEQUENCE_ERROR: /* shouldn't occur if code is correct */
1312
+ RAISE(IO_ERROR, "bzlib: !!BUG!! sequence error");
1313
+ break;
1314
+ case BZ_PARAM_ERROR: /* shouldn't occur if code is correct */
1315
+ RAISE(IO_ERROR, "bzlib: !!BUG!! parameter error");
1316
+ break;
1317
+ case BZ_MEM_ERROR:
1318
+ RAISE(IO_ERROR, "bzlib: memory error");
1319
+ break;
1320
+ case BZ_DATA_ERROR:
1321
+ RAISE(IO_ERROR, "bzlib: data integrity check error");
1322
+ break;
1323
+ case BZ_DATA_ERROR_MAGIC:
1324
+ RAISE(IO_ERROR, "bzlib: data integrity check - non-matching magic");
1325
+ break;
1326
+ case BZ_UNEXPECTED_EOF:
1327
+ RAISE(IO_ERROR, "bzlib: unexpected end-of-file");
1328
+ break;
1329
+ case BZ_OUTBUFF_FULL:
1330
+ RAISE(IO_ERROR, "bzlib: output buffer full");
1331
+ break;
1332
+ default:
1333
+ RAISE(EXCEPTION, "bzlib: unknown error");
1334
+ }
1335
+ }
1336
+
1337
+ static char *is_read_zipped_bytes(InStream *is, int zip_len, int *len)
1338
+ {
1339
+ int buf_out_idx = 0, ret, read_len;
1340
+ char *buf_out = NULL;
1341
+ char buf_in[ZIP_BUFFER_SIZE];
1342
+ bz_stream zstrm;
1343
+ zstrm.bzalloc = NULL;
1344
+ zstrm.bzfree = NULL;
1345
+ zstrm.opaque = NULL;
1346
+ zstrm.next_in = NULL;
1347
+ zstrm.avail_in = 0;
1348
+ if ((ret = BZ2_bzDecompressInit(&zstrm, 0, 0)) != BZ_OK) zraise(ret);
1349
+
1350
+ do {
1351
+ read_len = zip_len > ZIP_BUFFER_SIZE ? ZIP_BUFFER_SIZE : zip_len;
1352
+ is_read_bytes(is, (uchar *)buf_in, zip_len);
1353
+ zip_len -= read_len;
1354
+ zstrm.avail_in = read_len;
1355
+ zstrm.next_in = buf_in;
1356
+ zstrm.avail_out = ZIP_BUFFER_SIZE;
1357
+
1358
+ do {
1359
+ REALLOC_N(buf_out, char, buf_out_idx + ZIP_BUFFER_SIZE);
1360
+ zstrm.next_out = buf_out + buf_out_idx;
1361
+ ret = BZ2_bzDecompress(&zstrm);
1362
+ assert(ret != BZ_SEQUENCE_ERROR); /* state not clobbered */
1363
+ if (ret != BZ_OK && ret != BZ_STREAM_END) {
1364
+ (void)BZ2_bzDecompressEnd(&zstrm);
1365
+ zraise(ret);
1366
+ }
1367
+ buf_out_idx += ZIP_BUFFER_SIZE - zstrm.avail_out;
1368
+ } while (zstrm.avail_out == 0);
1369
+ } while (ret != BZ_STREAM_END && zip_len != 0);
1370
+
1371
+ /* clean up */
1372
+ (void)BZ2_bzDecompressEnd(&zstrm);
1373
+
1374
+ buf_out[buf_out_idx] = '\0';
1375
+ REALLOC_N(buf_out, char, buf_out_idx + 1);
1376
+ *len = buf_out_idx;
1377
+ return (char *)buf_out;
1378
+ }
1379
+
1380
+ #endif
1381
+
1382
+
1198
1383
  char *lazy_df_get_data(LazyDocField *self, int i)
1199
1384
  {
1200
1385
  char *text = NULL;
@@ -1202,10 +1387,17 @@ char *lazy_df_get_data(LazyDocField *self, int i)
1202
1387
  text = self->data[i].text;
1203
1388
  if (NULL == text) {
1204
1389
  const int read_len = self->data[i].length + 1;
1205
- self->data[i].text = text = ALLOC_N(char, read_len);
1206
1390
  is_seek(self->doc->fields_in, self->data[i].start);
1207
- is_read_bytes(self->doc->fields_in, (uchar *)text, read_len);
1208
- text[read_len - 1] = '\0';
1391
+ if (self->is_compressed) {
1392
+ text = self->data[i].text =
1393
+ is_read_zipped_bytes(self->doc->fields_in, read_len,
1394
+ &(self->data[i].length));
1395
+ }
1396
+ else {
1397
+ self->data[i].text = text = ALLOC_N(char, read_len);
1398
+ is_read_bytes(self->doc->fields_in, (uchar *)text, read_len);
1399
+ text[read_len - 1] = '\0';
1400
+ }
1209
1401
  }
1210
1402
  }
1211
1403
 
@@ -1214,6 +1406,16 @@ char *lazy_df_get_data(LazyDocField *self, int i)
1214
1406
 
1215
1407
  void lazy_df_get_bytes(LazyDocField *self, char *buf, int start, int len)
1216
1408
  {
1409
+ if (self->is_compressed == 1) {
1410
+ int i;
1411
+ self->len = 0;
1412
+ for (i = self->size-1; i >= 0; i--) {
1413
+ (void)lazy_df_get_data(self, i);
1414
+ self->len += self->data[i].length + 1;
1415
+ }
1416
+ self->len--; /* each field separated by ' ' but no need to add to end */
1417
+ self->is_compressed = 2;
1418
+ }
1217
1419
  if (start < 0 || start >= self->len) {
1218
1420
  RAISE(IO_ERROR, "start out of range in LazyDocField#get_bytes. %d "
1219
1421
  "is not between 0 and %d", start, self->len);
@@ -1225,8 +1427,37 @@ void lazy_df_get_bytes(LazyDocField *self, char *buf, int start, int len)
1225
1427
  RAISE(IO_ERROR, "Tried to read past end of field. Field is only %d "
1226
1428
  "bytes long but tried to read to %d", self->len, start + len);
1227
1429
  }
1228
- is_seek(self->doc->fields_in, self->data[0].start + start);
1229
- is_read_bytes(self->doc->fields_in, (uchar *)buf, len);
1430
+ if (self->is_compressed) {
1431
+ int cur_start = 0, buf_start = 0, cur_end, i, copy_start, copy_len;
1432
+ for (i = 0; i < self->size; i++) {
1433
+ cur_end = cur_start + self->data[i].length;
1434
+ if (start < cur_end) {
1435
+ copy_start = start > cur_start ? start - cur_start : 0;
1436
+ copy_len = cur_end - cur_start - copy_start;
1437
+ if (copy_len >= len) {
1438
+ copy_len = len;
1439
+ len = 0;
1440
+ }
1441
+ else {
1442
+ len -= copy_len;
1443
+ }
1444
+ memcpy(buf + buf_start,
1445
+ self->data[i].text + copy_start,
1446
+ copy_len);
1447
+ buf_start += copy_len;
1448
+ if (len > 0) {
1449
+ buf[buf_start++] = ' ';
1450
+ len--;
1451
+ }
1452
+ if (len == 0) break;
1453
+ }
1454
+ cur_start = cur_end + 1;
1455
+ }
1456
+ }
1457
+ else {
1458
+ is_seek(self->doc->fields_in, self->data[0].start + start);
1459
+ is_read_bytes(self->doc->fields_in, (uchar *)buf, len);
1460
+ }
1230
1461
  }
1231
1462
 
1232
1463
  /****************************************************************************
@@ -1238,7 +1469,7 @@ void lazy_df_get_bytes(LazyDocField *self, char *buf, int start, int len)
1238
1469
  static LazyDoc *lazy_doc_new(int size, InStream *fdt_in)
1239
1470
  {
1240
1471
  LazyDoc *self = ALLOC(LazyDoc);
1241
- self->field_dict = h_new_str(NULL, (free_ft)&lazy_df_destroy);
1472
+ self->field_dictionary = h_new_ptr((free_ft)&lazy_df_destroy);
1242
1473
  self->size = size;
1243
1474
  self->fields = ALLOC_AND_ZERO_N(LazyDocField *, size);
1244
1475
  self->fields_in = is_clone(fdt_in);
@@ -1247,7 +1478,7 @@ static LazyDoc *lazy_doc_new(int size, InStream *fdt_in)
1247
1478
 
1248
1479
  void lazy_doc_close(LazyDoc *self)
1249
1480
  {
1250
- h_destroy(self->field_dict);
1481
+ h_destroy(self->field_dictionary);
1251
1482
  is_close(self->fields_in);
1252
1483
  free(self->fields);
1253
1484
  free(self);
@@ -1256,10 +1487,15 @@ void lazy_doc_close(LazyDoc *self)
1256
1487
  static void lazy_doc_add_field(LazyDoc *self, LazyDocField *lazy_df, int i)
1257
1488
  {
1258
1489
  self->fields[i] = lazy_df;
1259
- h_set(self->field_dict, lazy_df->name, lazy_df);
1490
+ h_set(self->field_dictionary, lazy_df->name, lazy_df);
1260
1491
  lazy_df->doc = self;
1261
1492
  }
1262
1493
 
1494
+ LazyDocField *frt_lazy_doc_get(LazyDoc *self, Symbol field)
1495
+ {
1496
+ return (LazyDocField *)h_get(self->field_dictionary, field);
1497
+ }
1498
+
1263
1499
  /****************************************************************************
1264
1500
  *
1265
1501
  * FieldsReader
@@ -1296,7 +1532,7 @@ FieldsReader *fr_clone(FieldsReader *orig)
1296
1532
  memcpy(fr, orig, sizeof(FieldsReader));
1297
1533
  fr->fdx_in = is_clone(orig->fdx_in);
1298
1534
  fr->fdt_in = is_clone(orig->fdt_in);
1299
-
1535
+
1300
1536
  return fr;
1301
1537
  }
1302
1538
 
@@ -1307,25 +1543,36 @@ void fr_close(FieldsReader *fr)
1307
1543
  free(fr);
1308
1544
  }
1309
1545
 
1310
- static DocField *fr_df_new(char *name, int size)
1546
+ static DocField *fr_df_new(Symbol name, int size, bool is_compressed)
1311
1547
  {
1312
1548
  DocField *df = ALLOC(DocField);
1313
- df->name = estrdup(name);
1549
+ df->name = name;
1314
1550
  df->capa = df->size = size;
1315
1551
  df->data = ALLOC_N(char *, df->capa);
1316
1552
  df->lengths = ALLOC_N(int, df->capa);
1317
1553
  df->destroy_data = true;
1318
1554
  df->boost = 1.0;
1555
+ df->is_compressed = is_compressed;
1319
1556
  return df;
1320
1557
  }
1321
1558
 
1559
+ static void fr_read_zipped_fields(FieldsReader *fr, DocField *df)
1560
+ {
1561
+ int i;
1562
+ const int df_size = df->size;
1563
+ InStream *fdt_in = fr->fdt_in;
1564
+
1565
+ for (i = 0; i < df_size; i++) {
1566
+ const int zip_len = df->lengths[i] + 1;
1567
+ df->data[i] = is_read_zipped_bytes(fdt_in, zip_len, &(df->lengths[i]));
1568
+ }
1569
+ }
1570
+
1322
1571
  Document *fr_get_doc(FieldsReader *fr, int doc_num)
1323
1572
  {
1324
1573
  int i, j;
1325
- FieldInfo *fi;
1326
1574
  off_t pos;
1327
- int stored_cnt, field_num, df_size;
1328
- DocField *df;
1575
+ int stored_cnt;
1329
1576
  Document *doc = doc_new();
1330
1577
  InStream *fdx_in = fr->fdx_in;
1331
1578
  InStream *fdt_in = fr->fdt_in;
@@ -1336,34 +1583,42 @@ Document *fr_get_doc(FieldsReader *fr, int doc_num)
1336
1583
  stored_cnt = is_read_vint(fdt_in);
1337
1584
 
1338
1585
  for (i = 0; i < stored_cnt; i++) {
1339
- field_num = is_read_vint(fdt_in);
1340
- fi = fr->fis->fields[field_num];
1341
- df_size = is_read_vint(fdt_in);
1342
- df = fr_df_new(fi->name, df_size);
1586
+ const int field_num = is_read_vint(fdt_in);
1587
+ FieldInfo *fi = fr->fis->fields[field_num];
1588
+ const int df_size = is_read_vint(fdt_in);
1589
+ DocField *df = fr_df_new(fi->name, df_size, fi_is_compressed(fi));
1343
1590
 
1344
1591
  for (j = 0; j < df_size; j++) {
1345
1592
  df->lengths[j] = is_read_vint(fdt_in);
1346
1593
  }
1347
1594
 
1348
- for (j = 0; j < df_size; j++) {
1349
- const int read_len = df->lengths[j] + 1;
1350
- df->data[j] = ALLOC_N(char, read_len);
1351
- is_read_bytes(fdt_in, (uchar *)df->data[j], read_len);
1352
- df->data[j][read_len - 1] = '\0';
1353
- }
1354
1595
  doc_add_field(doc, df);
1355
1596
  }
1597
+ for (i = 0; i < stored_cnt; i++) {
1598
+ DocField *df = doc->fields[i];
1599
+ if (df->is_compressed) {
1600
+ fr_read_zipped_fields(fr, df);
1601
+ }
1602
+ else {
1603
+ const int df_size = df->size;
1604
+ for (j = 0; j < df_size; j++) {
1605
+ const int read_len = df->lengths[j] + 1;
1606
+ df->data[j] = ALLOC_N(char, read_len);
1607
+ is_read_bytes(fdt_in, (uchar *)df->data[j], read_len);
1608
+ df->data[j][read_len - 1] = '\0';
1609
+ }
1610
+ }
1611
+ }
1356
1612
 
1357
1613
  return doc;
1358
1614
  }
1359
1615
 
1360
1616
  LazyDoc *fr_get_lazy_doc(FieldsReader *fr, int doc_num)
1361
1617
  {
1618
+ int start = 0;
1362
1619
  int i, j;
1363
- FieldInfo *fi;
1364
1620
  off_t pos;
1365
- int stored_cnt, field_num;
1366
- LazyDocField *lazy_df;
1621
+ int stored_cnt;
1367
1622
  LazyDoc *lazy_doc;
1368
1623
  InStream *fdx_in = fr->fdx_in;
1369
1624
  InStream *fdt_in = fr->fdt_in;
@@ -1375,43 +1630,43 @@ LazyDoc *fr_get_lazy_doc(FieldsReader *fr, int doc_num)
1375
1630
  lazy_doc = lazy_doc_new(stored_cnt, fdt_in);
1376
1631
 
1377
1632
  for (i = 0; i < stored_cnt; i++) {
1378
- off_t start = 0, end;
1379
- int data_cnt;
1380
- field_num = is_read_vint(fdt_in);
1381
- fi = fr->fis->fields[field_num];
1382
- data_cnt = is_read_vint(fdt_in);
1383
- lazy_df = lazy_df_new(fi->name, data_cnt);
1633
+ FieldInfo *fi = fr->fis->fields[is_read_vint(fdt_in)];
1634
+ const int data_cnt = is_read_vint(fdt_in);
1635
+ LazyDocField *lazy_df = lazy_df_new(fi->name, data_cnt,
1636
+ fi_is_compressed(fi));
1637
+ const int field_start = start;
1384
1638
 
1385
1639
  /* get the starts relative positions this time around */
1386
1640
  for (j = 0; j < data_cnt; j++) {
1387
1641
  lazy_df->data[j].start = start;
1388
1642
  start += 1 + (lazy_df->data[j].length = is_read_vint(fdt_in));
1389
1643
  }
1390
- end = is_pos(fdt_in) + start;
1391
- lazy_df->len = start - 1;
1644
+ lazy_df->len = start - field_start - 1;
1392
1645
 
1393
- /* correct the starts to their correct absolute positions */
1394
- start = is_pos(fdt_in);
1646
+ lazy_doc_add_field(lazy_doc, lazy_df, i);
1647
+ }
1648
+ /* correct the starts to their correct absolute positions */
1649
+ for (i = 0; i < stored_cnt; i++) {
1650
+ LazyDocField *lazy_df = lazy_doc->fields[i];
1651
+ const int data_cnt = lazy_df->size;
1652
+ const int start = is_pos(fdt_in);
1395
1653
  for (j = 0; j < data_cnt; j++) {
1396
1654
  lazy_df->data[j].start += start;
1397
1655
  }
1398
-
1399
- lazy_doc_add_field(lazy_doc, lazy_df, i);
1400
- is_seek(fdt_in, end);
1401
1656
  }
1402
1657
 
1403
1658
  return lazy_doc;
1404
1659
  }
1405
1660
 
1406
- TermVector *fr_read_term_vector(FieldsReader *fr, int field_num)
1661
+ static TermVector *fr_read_term_vector(FieldsReader *fr, int field_num)
1407
1662
  {
1408
1663
  TermVector *tv = ALLOC_AND_ZERO(TermVector);
1409
1664
  InStream *fdt_in = fr->fdt_in;
1410
1665
  FieldInfo *fi = fr->fis->fields[field_num];
1411
1666
  const int num_terms = is_read_vint(fdt_in);
1412
-
1667
+
1413
1668
  tv->field_num = field_num;
1414
- tv->field = estrdup(fi->name);
1669
+ tv->field = fi->name;
1415
1670
 
1416
1671
  if (num_terms > 0) {
1417
1672
  int i, j, delta_start, delta_len, total_len, freq;
@@ -1431,7 +1686,8 @@ TermVector *fr_read_term_vector(FieldsReader *fr, int field_num)
1431
1686
  total_len = delta_start + delta_len;
1432
1687
  is_read_bytes(fdt_in, buffer + delta_start, delta_len);
1433
1688
  buffer[total_len++] = '\0';
1434
- term->text = memcpy(ALLOC_N(char, total_len), buffer, total_len);
1689
+ term->text = (char *)memcpy(ALLOC_N(char, total_len),
1690
+ buffer, total_len);
1435
1691
 
1436
1692
  /* read freq */
1437
1693
  freq = term->freq = is_read_vint(fdt_in);
@@ -1450,21 +1706,21 @@ TermVector *fr_read_term_vector(FieldsReader *fr, int field_num)
1450
1706
  if (store_offsets) {
1451
1707
  int num_positions = tv->offset_cnt = is_read_vint(fdt_in);
1452
1708
  Offset *offsets = tv->offsets = ALLOC_N(Offset, num_positions);
1453
- long long offset = 0;
1709
+ i64 offset = 0;
1454
1710
  for (i = 0; i < num_positions; i++) {
1455
1711
  offsets[i].start =
1456
- (off_t)(offset += (long long)is_read_vll(fdt_in));
1712
+ (off_t)(offset += (i64)is_read_vll(fdt_in));
1457
1713
  offsets[i].end =
1458
- (off_t)(offset += (long long)is_read_vll(fdt_in));
1714
+ (off_t)(offset += (i64)is_read_vll(fdt_in));
1459
1715
  }
1460
1716
  }
1461
1717
  }
1462
1718
  return tv;
1463
1719
  }
1464
1720
 
1465
- HashTable *fr_get_tv(FieldsReader *fr, int doc_num)
1721
+ Hash *fr_get_tv(FieldsReader *fr, int doc_num)
1466
1722
  {
1467
- HashTable *term_vectors = h_new_str((free_ft)NULL, (free_ft)&tv_destroy);
1723
+ Hash *term_vectors = h_new_ptr((free_ft)&tv_destroy);
1468
1724
  int i;
1469
1725
  InStream *fdx_in = fr->fdx_in;
1470
1726
  InStream *fdt_in = fr->fdt_in;
@@ -1556,6 +1812,8 @@ FieldsWriter *fw_open(Store *store, const char *segment, FieldInfos *fis)
1556
1812
  strcpy(file_name + segment_len, ".fdx");
1557
1813
  fw->fdx_out = store->new_output(store, file_name);
1558
1814
 
1815
+ fw->buffer = ram_new_buffer();
1816
+
1559
1817
  fw->fis = fis;
1560
1818
  fw->tv_fields = ary_new_type_capa(TVField, TV_FIELD_INIT_CAPA);
1561
1819
 
@@ -1566,15 +1824,68 @@ void fw_close(FieldsWriter *fw)
1566
1824
  {
1567
1825
  os_close(fw->fdt_out);
1568
1826
  os_close(fw->fdx_out);
1827
+ ram_destroy_buffer(fw->buffer);
1569
1828
  ary_free(fw->tv_fields);
1570
1829
  free(fw);
1571
1830
  }
1572
1831
 
1573
- static INLINE void save_data(OutStream *fdt_out, char *data, int dlen)
1832
+ #ifdef USE_ZLIB
1833
+ static int os_write_zipped_bytes(OutStream* out_stream, uchar *data, int length)
1574
1834
  {
1575
- os_write_vint(fdt_out, dlen);
1576
- os_write_bytes(fdt_out, (uchar *)data, dlen);
1835
+ int ret, buf_size, zip_len = 0;
1836
+ uchar out_buffer[ZIP_BUFFER_SIZE];
1837
+ z_stream zstrm;
1838
+ zstrm.zalloc = Z_NULL;
1839
+ zstrm.zfree = Z_NULL;
1840
+ zstrm.opaque = Z_NULL;
1841
+ if ((ret = deflateInit(&zstrm, ZIP_LEVEL)) != Z_OK) zraise(ret);
1842
+
1843
+ zstrm.avail_in = length;
1844
+ zstrm.next_in = data;
1845
+ zstrm.avail_out = ZIP_BUFFER_SIZE;
1846
+ zstrm.next_out = out_buffer;
1847
+
1848
+ do {
1849
+ ret = deflate(&zstrm, Z_FINISH); /* no bad return value */
1850
+ assert(ret != Z_STREAM_ERROR) ; /* state not clobbered */
1851
+ zip_len += buf_size = ZIP_BUFFER_SIZE - zstrm.avail_out;
1852
+ os_write_bytes(out_stream, out_buffer, buf_size);
1853
+ } while (zstrm.avail_out == 0);
1854
+ assert(zstrm.avail_in == 0); /* all input will be used */
1855
+
1856
+ /* clean up */
1857
+ (void)deflateEnd(&zstrm);
1858
+ return zip_len;
1859
+ }
1860
+ #else
1861
+ static int os_write_zipped_bytes(OutStream* out_stream, uchar *data, int length)
1862
+ {
1863
+ int ret, buf_size, zip_len = 0;
1864
+ char out_buffer[ZIP_BUFFER_SIZE];
1865
+ bz_stream zstrm;
1866
+ zstrm.bzalloc = NULL;
1867
+ zstrm.bzfree = NULL;
1868
+ zstrm.opaque = NULL;
1869
+ if ((ret = BZ2_bzCompressInit(&zstrm, ZIP_LEVEL, 0, 0)) != BZ_OK) zraise(ret);
1870
+
1871
+ zstrm.avail_in = length;
1872
+ zstrm.next_in = (char *)data;
1873
+ zstrm.avail_out = ZIP_BUFFER_SIZE;
1874
+ zstrm.next_out = out_buffer;
1875
+
1876
+ do {
1877
+ ret = BZ2_bzCompress(&zstrm, BZ_FINISH); /* no bad return value */
1878
+ assert(ret != BZ_SEQUENCE_ERROR); /* state not clobbered */
1879
+ zip_len += buf_size = ZIP_BUFFER_SIZE - zstrm.avail_out;
1880
+ os_write_bytes(out_stream, (uchar *)out_buffer, buf_size);
1881
+ } while (zstrm.avail_out == 0);
1882
+ assert(zstrm.avail_in == 0); /* all input will be used */
1883
+
1884
+ /* clean up */
1885
+ (void)BZ2_bzCompressEnd(&zstrm);
1886
+ return zip_len;
1577
1887
  }
1888
+ #endif
1578
1889
 
1579
1890
  void fw_add_doc(FieldsWriter *fw, Document *doc)
1580
1891
  {
@@ -1595,6 +1906,7 @@ void fw_add_doc(FieldsWriter *fw, Document *doc)
1595
1906
  ary_size(fw->tv_fields) = 0;
1596
1907
  os_write_u64(fdx_out, fw->start_ptr);
1597
1908
  os_write_vint(fdt_out, stored_cnt);
1909
+ ramo_reset(fw->buffer);
1598
1910
 
1599
1911
  for (i = 0; i < doc_size; i++) {
1600
1912
  df = doc->fields[i];
@@ -1602,21 +1914,29 @@ void fw_add_doc(FieldsWriter *fw, Document *doc)
1602
1914
  if (fi_is_stored(fi)) {
1603
1915
  const int df_size = df->size;
1604
1916
  os_write_vint(fdt_out, fi->number);
1605
- os_write_vint(fdt_out, df->size);
1606
- /**
1607
- * TODO: add compression
1608
- */
1609
- for (j = 0; j < df_size; j++) {
1610
- os_write_vint(fdt_out, df->lengths[j]);
1917
+ os_write_vint(fdt_out, df_size);
1918
+ if (fi_is_compressed(fi)) {
1919
+ for (j = 0; j < df_size; j++) {
1920
+ const int length = df->lengths[j];
1921
+ int zip_len = os_write_zipped_bytes(fw->buffer,
1922
+ (uchar*)df->data[j],
1923
+ length);
1924
+ os_write_vint(fdt_out, zip_len - 1);
1925
+ }
1611
1926
  }
1612
- for (j = 0; j < df_size; j++) {
1613
- os_write_bytes(fdt_out, (uchar *)df->data[j], df->lengths[j]);
1614
- /* leave a space between fields as that is how they are
1615
- * analyzed */
1616
- os_write_byte(fdt_out, ' ');
1927
+ else {
1928
+ for (j = 0; j < df_size; j++) {
1929
+ const int length = df->lengths[j];
1930
+ os_write_vint(fdt_out, length);
1931
+ os_write_bytes(fw->buffer, (uchar*)df->data[j], length);
1932
+ /* leave a space between fields as that is how they are
1933
+ * analyzed */
1934
+ os_write_byte(fw->buffer, ' ');
1935
+ }
1617
1936
  }
1618
1937
  }
1619
1938
  }
1939
+ ramo_write_to(fw->buffer, fdt_out);
1620
1940
  }
1621
1941
 
1622
1942
  void fw_write_tv_index(FieldsWriter *fw)
@@ -1624,7 +1944,7 @@ void fw_write_tv_index(FieldsWriter *fw)
1624
1944
  int i;
1625
1945
  const int tv_cnt = ary_size(fw->tv_fields);
1626
1946
  OutStream *fdt_out = fw->fdt_out;
1627
- os_write_u32(fw->fdx_out, (f_u32)(os_pos(fdt_out) - fw->start_ptr));
1947
+ os_write_u32(fw->fdx_out, (u32)(os_pos(fdt_out) - fw->start_ptr));
1628
1948
  os_write_vint(fdt_out, tv_cnt);
1629
1949
  /* write in reverse order so we can count back from the start position to
1630
1950
  * the beginning of the TermVector's data */
@@ -1683,13 +2003,13 @@ void fw_add_postings(FieldsWriter *fw,
1683
2003
 
1684
2004
  if (fi_store_offsets(fi)) {
1685
2005
  /* use delta encoding for offsets */
1686
- long long last_end = 0;
2006
+ i64 last_end = 0;
1687
2007
  os_write_vint(fdt_out, offset_count); /* write shared prefix length */
1688
2008
  for (i = 0; i < offset_count; i++) {
1689
- long long start = (long long)offsets[i].start;
1690
- long long end = (long long)offsets[i].end;
1691
- os_write_vll(fdt_out, (unsigned long long)(start - last_end));
1692
- os_write_vll(fdt_out, (unsigned long long)(end - start));
2009
+ i64 start = (i64)offsets[i].start;
2010
+ i64 end = (i64)offsets[i].end;
2011
+ os_write_vll(fdt_out, (u64)(start - last_end));
2012
+ os_write_vll(fdt_out, (u64)(end - start));
1693
2013
  last_end = end;
1694
2014
  }
1695
2015
  }
@@ -1706,16 +2026,16 @@ void fw_add_postings(FieldsWriter *fw,
1706
2026
 
1707
2027
  char *te_get_term(TermEnum *te)
1708
2028
  {
1709
- return memcpy(ALLOC_N(char, te->curr_term_len + 1),
1710
- te->curr_term, te->curr_term_len + 1);
2029
+ return (char *)memcpy(ALLOC_N(char, te->curr_term_len + 1),
2030
+ te->curr_term, te->curr_term_len + 1);
1711
2031
  }
1712
2032
 
1713
2033
  TermInfo *te_get_ti(TermEnum *te)
1714
2034
  {
1715
- return memcpy(ALLOC(TermInfo), &(te->curr_ti), sizeof(TermInfo));
2035
+ return (TermInfo*)memcpy(ALLOC(TermInfo), &(te->curr_ti), sizeof(TermInfo));
1716
2036
  }
1717
2037
 
1718
- char *te_skip_to(TermEnum *te, const char *term)
2038
+ static char *te_skip_to(TermEnum *te, const char *term)
1719
2039
  {
1720
2040
  char *curr_term = te->curr_term;
1721
2041
  if (strcmp(curr_term, term) < 0) {
@@ -1742,8 +2062,8 @@ static void sti_destroy(SegmentTermIndex *sti)
1742
2062
  {
1743
2063
  if (sti->index_terms) {
1744
2064
  int i;
1745
- const int sti_index_size = sti->index_size;
1746
- for (i = 0; i < sti_index_size; i++) {
2065
+ const int sti_index_cnt = sti->index_cnt;
2066
+ for (i = 0; i < sti_index_cnt; i++) {
1747
2067
  free(sti->index_terms[i]);
1748
2068
  }
1749
2069
  free(sti->index_terms);
@@ -1759,20 +2079,20 @@ static void sti_ensure_index_is_read(SegmentTermIndex *sti,
1759
2079
  {
1760
2080
  if (NULL == sti->index_terms) {
1761
2081
  int i;
1762
- int index_size = sti->index_size;
2082
+ int index_cnt = sti->index_cnt;
1763
2083
  off_t index_ptr = 0;
1764
2084
  ste_reset(index_te);
1765
2085
  is_seek(STE(index_te)->is, sti->index_ptr);
1766
- STE(index_te)->size = sti->index_size;
1767
-
1768
- sti->index_terms = ALLOC_N(char *, index_size);
1769
- sti->index_term_lens = ALLOC_N(int, index_size);
1770
- sti->index_term_infos = ALLOC_N(TermInfo, index_size);
1771
- sti->index_ptrs = ALLOC_N(off_t, index_size);
1772
-
2086
+ STE(index_te)->size = sti->index_cnt;
2087
+
2088
+ sti->index_terms = ALLOC_N(char *, index_cnt);
2089
+ sti->index_term_lens = ALLOC_N(int, index_cnt);
2090
+ sti->index_term_infos = ALLOC_N(TermInfo, index_cnt);
2091
+ sti->index_ptrs = ALLOC_N(off_t, index_cnt);
2092
+
1773
2093
  for (i = 0; NULL != ste_next(index_te); i++) {
1774
2094
  #ifdef DEBUG
1775
- if (i >= index_size) {
2095
+ if (i >= index_cnt) {
1776
2096
  RAISE(FERRET_ERROR, "index term enum read too many terms");
1777
2097
  }
1778
2098
  #endif
@@ -1788,7 +2108,7 @@ static void sti_ensure_index_is_read(SegmentTermIndex *sti,
1788
2108
  static int sti_get_index_offset(SegmentTermIndex *sti, const char *term)
1789
2109
  {
1790
2110
  int lo = 0;
1791
- int hi = sti->index_size - 1;
2111
+ int hi = sti->index_cnt - 1;
1792
2112
  int mid, delta;
1793
2113
  char **index_terms = sti->index_terms;
1794
2114
 
@@ -1842,7 +2162,7 @@ SegmentFieldIndex *sfi_open(Store *store, const char *segment)
1842
2162
  SegmentTermIndex *sti = ALLOC_AND_ZERO(SegmentTermIndex);
1843
2163
  sti->index_ptr = is_read_voff_t(is);
1844
2164
  sti->ptr = is_read_voff_t(is);
1845
- sti->index_size = is_read_vint(is);
2165
+ sti->index_cnt = is_read_vint(is);
1846
2166
  sti->size = is_read_vint(is);
1847
2167
  h_set_int(sfi->field_dict, field_num, sti);
1848
2168
  }
@@ -1912,7 +2232,8 @@ static void ste_reset(TermEnum *te)
1912
2232
 
1913
2233
  static TermEnum *ste_set_field(TermEnum *te, int field_num)
1914
2234
  {
1915
- SegmentTermIndex *sti = h_get_int(STE(te)->sfi->field_dict, field_num);
2235
+ SegmentTermIndex *sti
2236
+ = (SegmentTermIndex *)h_get_int(STE(te)->sfi->field_dict, field_num);
1916
2237
  ste_reset(te);
1917
2238
  te->field_num = field_num;
1918
2239
  if (sti) {
@@ -1940,7 +2261,8 @@ static void ste_index_seek(TermEnum *te, SegmentTermIndex *sti, int idx_offset)
1940
2261
  static char *ste_scan_to(TermEnum *te, const char *term)
1941
2262
  {
1942
2263
  SegmentFieldIndex *sfi = STE(te)->sfi;
1943
- SegmentTermIndex *sti = h_get_int(sfi->field_dict, te->field_num);
2264
+ SegmentTermIndex *sti
2265
+ = (SegmentTermIndex *)h_get_int(sfi->field_dict, te->field_num);
1944
2266
  if (sti && sti->size > 0) {
1945
2267
  SFI_ENSURE_INDEX_IS_READ(sfi, sti);
1946
2268
  if (term[0] == '\0') {
@@ -1952,8 +2274,8 @@ static char *ste_scan_to(TermEnum *te, const char *term)
1952
2274
  int enum_offset = (int)(STE(te)->pos / sfi->index_interval) + 1;
1953
2275
  /* if we are at the end of the index or before the next index
1954
2276
  * ptr then a simple scan suffices */
1955
- if (sti->index_size == enum_offset ||
1956
- strcmp(term, sti->index_terms[enum_offset]) < 0) {
2277
+ if (sti->index_cnt == enum_offset ||
2278
+ strcmp(term, sti->index_terms[enum_offset]) < 0) {
1957
2279
  return te_skip_to(te, term);
1958
2280
  }
1959
2281
  }
@@ -2014,8 +2336,8 @@ static char *ste_get_term(TermEnum *te, int pos)
2014
2336
  else if (pos != ste->pos) {
2015
2337
  int idx_int = ste->sfi->index_interval;
2016
2338
  if ((pos < ste->pos) || pos > (1 + ste->pos / idx_int) * idx_int) {
2017
- SegmentTermIndex *sti = h_get_int(ste->sfi->field_dict,
2018
- te->field_num);
2339
+ SegmentTermIndex *sti = (SegmentTermIndex *)h_get_int(
2340
+ ste->sfi->field_dict, te->field_num);
2019
2341
  SFI_ENSURE_INDEX_IS_READ(ste->sfi, sti);
2020
2342
  ste_index_seek(te, sti, pos / idx_int);
2021
2343
  }
@@ -2119,8 +2441,8 @@ static void tew_destroy(TermEnumWrapper *tew)
2119
2441
  tew->te->close(tew->te);
2120
2442
  }
2121
2443
 
2122
- TermEnumWrapper *tew_setup(TermEnumWrapper *tew, int index, TermEnum *te,
2123
- IndexReader *ir)
2444
+ static TermEnumWrapper *tew_setup(TermEnumWrapper *tew, int index, TermEnum *te,
2445
+ IndexReader *ir)
2124
2446
  {
2125
2447
  tew->index = index;
2126
2448
  tew->ir = ir;
@@ -2263,7 +2585,8 @@ TermEnum *mte_new(MultiReader *mr, int field_num, const char *term)
2263
2585
  || (tew->term && (tew->term[0] != '\0'))) {
2264
2586
  pq_push(mte->tew_queue, tew); /* initialize queue */
2265
2587
  }
2266
- } else {
2588
+ }
2589
+ else {
2267
2590
  /* add the term_enum_wrapper just in case */
2268
2591
  sub_te = reader->terms(reader, 0);
2269
2592
  sub_te->field_num = -1;
@@ -2303,7 +2626,7 @@ TermInfosReader *tir_open(Store *store,
2303
2626
  static INLINE TermEnum *tir_enum(TermInfosReader *tir)
2304
2627
  {
2305
2628
  TermEnum *te;
2306
- if (NULL == (te = thread_getspecific(tir->thread_te))) {
2629
+ if (NULL == (te = (TermEnum *)thread_getspecific(tir->thread_te))) {
2307
2630
  te = ste_clone(tir->orig_te);
2308
2631
  ste_set_field(te, tir->field_num);
2309
2632
  ary_push(tir->te_bucket, te);
@@ -2333,8 +2656,8 @@ TermInfo *tir_get_ti(TermInfosReader *tir, const char *term)
2333
2656
  return NULL;
2334
2657
  }
2335
2658
 
2336
- TermInfo *tir_get_ti_field(TermInfosReader *tir, int field_num,
2337
- const char *term)
2659
+ static TermInfo *tir_get_ti_field(TermInfosReader *tir, int field_num,
2660
+ const char *term)
2338
2661
  {
2339
2662
  TermEnum *te = tir_enum(tir);
2340
2663
  char *match;
@@ -2352,7 +2675,7 @@ TermInfo *tir_get_ti_field(TermInfosReader *tir, int field_num,
2352
2675
  }
2353
2676
 
2354
2677
  char *tir_get_term(TermInfosReader *tir, int pos)
2355
- {
2678
+ {
2356
2679
  if (pos < 0) {
2357
2680
  return NULL;
2358
2681
  }
@@ -2455,11 +2778,11 @@ static void tw_add(TermWriter *tw,
2455
2778
  tw->last_term, term, *tw->last_term, *term);
2456
2779
  }
2457
2780
  if (ti->frq_ptr < tw->last_term_info.frq_ptr) {
2458
- RAISE(STATE_ERROR, "%"F_OFF_T_PFX"d > %"F_OFF_T_PFX"d", ti->frq_ptr,
2781
+ RAISE(STATE_ERROR, "%"OFF_T_PFX"d > %"OFF_T_PFX"d", ti->frq_ptr,
2459
2782
  tw->last_term_info.frq_ptr);
2460
2783
  }
2461
2784
  if (ti->prx_ptr < tw->last_term_info.prx_ptr) {
2462
- RAISE(STATE_ERROR, "%"F_OFF_T_PFX"d > %"F_OFF_T_PFX"d", ti->prx_ptr,
2785
+ RAISE(STATE_ERROR, "%"OFF_T_PFX"d > %"OFF_T_PFX"d", ti->prx_ptr,
2463
2786
  tw->last_term_info.prx_ptr);
2464
2787
  }
2465
2788
  #endif
@@ -2611,7 +2934,7 @@ static bool stde_next(TermDocEnum *tde)
2611
2934
  int doc_code;
2612
2935
  SegmentTermDocEnum *stde = STDE(tde);
2613
2936
 
2614
- while (true) {
2937
+ while (true) {
2615
2938
  if (stde->count >= stde->doc_freq) {
2616
2939
  return false;
2617
2940
  }
@@ -2726,7 +3049,7 @@ static bool stde_skip_to(TermDocEnum *tde, int target_doc_num)
2726
3049
  }
2727
3050
 
2728
3051
  /* done skipping, now just scan */
2729
- do {
3052
+ do {
2730
3053
  if (!tde->next(tde)) {
2731
3054
  return false;
2732
3055
  }
@@ -2746,12 +3069,12 @@ static void stde_close(TermDocEnum *tde)
2746
3069
  }
2747
3070
 
2748
3071
  static void stde_skip_prox(SegmentTermDocEnum *stde)
2749
- {
3072
+ {
2750
3073
  (void)stde;
2751
3074
  }
2752
3075
 
2753
3076
  static void stde_seek_prox(SegmentTermDocEnum *stde, off_t prx_ptr)
2754
- {
3077
+ {
2755
3078
  (void)stde;
2756
3079
  (void)prx_ptr;
2757
3080
  }
@@ -2812,7 +3135,7 @@ static void stpe_seek(TermDocEnum *tde, int field_num, const char *term)
2812
3135
  stde->prx_cnt = 0;
2813
3136
  }
2814
3137
 
2815
- bool stpe_next(TermDocEnum *tde)
3138
+ static bool stpe_next(TermDocEnum *tde)
2816
3139
  {
2817
3140
  SegmentTermDocEnum *stde = STDE(tde);
2818
3141
  is_skip_vints(stde->prx_in, stde->prx_cnt);
@@ -2829,7 +3152,7 @@ bool stpe_next(TermDocEnum *tde)
2829
3152
  }
2830
3153
  }
2831
3154
 
2832
- int stpe_read(TermDocEnum *tde, int *docs, int *freqs, int req_num)
3155
+ static int stpe_read(TermDocEnum *tde, int *docs, int *freqs, int req_num)
2833
3156
  {
2834
3157
  (void)tde; (void)docs; (void)freqs; (void)req_num;
2835
3158
  RAISE(ARG_ERROR, "TermPosEnum does not handle processing multiple documents"
@@ -2944,9 +3267,11 @@ static void mtde_seek_te(TermDocEnum *tde, TermEnum *te)
2944
3267
  mtde->state[index] = 1;
2945
3268
  if (tde->close == stde_close) {
2946
3269
  stde_seek_ti(STDE(tde), MTE(te)->tis + i);
2947
- } else if (tde->close == stpe_close) {
3270
+ }
3271
+ else if (tde->close == stpe_close) {
2948
3272
  stpe_seek_ti(STDE(tde), MTE(te)->tis + i);
2949
- } else {
3273
+ }
3274
+ else {
2950
3275
  tde->seek(tde, MTE(te)->tews[index].te->field_num, te->curr_term);
2951
3276
  }
2952
3277
  }
@@ -2963,7 +3288,8 @@ static void mtde_seek(TermDocEnum *tde, int field_num, const char *term)
2963
3288
  te->set_field(te, field_num);
2964
3289
  if (NULL != (t = te->skip_to(te, term)) && 0 == strcmp(term, t)) {
2965
3290
  mtde_seek_te(tde, te);
2966
- } else {
3291
+ }
3292
+ else {
2967
3293
  memset(mtde->state, 0, mtde->ir_cnt);
2968
3294
  }
2969
3295
  }
@@ -3051,7 +3377,7 @@ static void mtde_close(TermDocEnum *tde)
3051
3377
  free(tde);
3052
3378
  }
3053
3379
 
3054
- TermDocEnum *mtxe_new(MultiReader *mr)
3380
+ static TermDocEnum *mtxe_new(MultiReader *mr)
3055
3381
  {
3056
3382
  MultiTermDocEnum *mtde = ALLOC_AND_ZERO(MultiTermDocEnum);
3057
3383
  TermDocEnum *tde = TDE(mtde);
@@ -3074,7 +3400,7 @@ TermDocEnum *mtxe_new(MultiReader *mr)
3074
3400
  return tde;
3075
3401
  }
3076
3402
 
3077
- TermDocEnum *mtde_new(MultiReader *mr)
3403
+ static TermDocEnum *mtde_new(MultiReader *mr)
3078
3404
  {
3079
3405
  int i;
3080
3406
  TermDocEnum *tde = mtxe_new(mr);
@@ -3090,13 +3416,13 @@ TermDocEnum *mtde_new(MultiReader *mr)
3090
3416
  * MultiTermPosEnum
3091
3417
  ****************************************************************************/
3092
3418
 
3093
- int mtpe_next_position(TermDocEnum *tde)
3419
+ static int mtpe_next_position(TermDocEnum *tde)
3094
3420
  {
3095
3421
  CHECK_CURR_TDE("next_position");
3096
3422
  return MTDE(tde)->curr_tde->next_position(MTDE(tde)->curr_tde);
3097
3423
  }
3098
3424
 
3099
- TermDocEnum *mtpe_new(MultiReader *mr)
3425
+ static TermDocEnum *mtpe_new(MultiReader *mr)
3100
3426
  {
3101
3427
  int i;
3102
3428
  TermDocEnum *tde = mtxe_new(mr);
@@ -3184,7 +3510,7 @@ static bool mtdpe_next(TermDocEnum *tde)
3184
3510
  pq_down(mtdpe->pq);
3185
3511
  }
3186
3512
  else {
3187
- sub_tde = pq_pop(mtdpe->pq);
3513
+ sub_tde = (TermDocEnum *)pq_pop(mtdpe->pq);
3188
3514
  sub_tde->close(sub_tde);
3189
3515
  }
3190
3516
  sub_tde = (TermDocEnum *)pq_top(mtdpe->pq);
@@ -3199,12 +3525,12 @@ static bool mtdpe_next(TermDocEnum *tde)
3199
3525
  return true;
3200
3526
  }
3201
3527
 
3202
- bool tdpe_less_than(TermDocEnum *p1, TermDocEnum *p2)
3528
+ static bool tdpe_less_than(TermDocEnum *p1, TermDocEnum *p2)
3203
3529
  {
3204
3530
  return p1->doc_num(p1) < p2->doc_num(p2);
3205
3531
  }
3206
3532
 
3207
- bool mtdpe_skip_to(TermDocEnum *tde, int target_doc_num)
3533
+ static bool mtdpe_skip_to(TermDocEnum *tde, int target_doc_num)
3208
3534
  {
3209
3535
  TermDocEnum *sub_tde;
3210
3536
  PriorityQueue *mtdpe_pq = MTDPE(tde)->pq;
@@ -3215,7 +3541,7 @@ bool mtdpe_skip_to(TermDocEnum *tde, int target_doc_num)
3215
3541
  pq_down(mtdpe_pq);
3216
3542
  }
3217
3543
  else {
3218
- sub_tde = pq_pop(mtdpe_pq);
3544
+ sub_tde = (TermDocEnum *)pq_pop(mtdpe_pq);
3219
3545
  sub_tde->close(sub_tde);
3220
3546
  }
3221
3547
  }
@@ -3284,22 +3610,21 @@ TermDocEnum *mtdpe_new(IndexReader *ir, int field_num, char **terms, int t_cnt)
3284
3610
  *
3285
3611
  ****************************************************************************/
3286
3612
 
3287
- static HashTable *fn_extensions = NULL;
3613
+ static Hash *fn_extensions = NULL;
3288
3614
  static void file_name_filter_init()
3289
3615
  {
3290
- if (NULL == fn_extensions) {
3291
- int i;
3292
- fn_extensions = h_new_str((free_ft)NULL, (free_ft)NULL);
3293
- for (i = 0; i < NELEMS(INDEX_EXTENSIONS); i++) {
3294
- h_set(fn_extensions, INDEX_EXTENSIONS[i], (char *)INDEX_EXTENSIONS[i]);
3295
- }
3296
- register_for_cleanup(fn_extensions, (free_ft)&h_destroy);
3616
+ int i;
3617
+ fn_extensions = h_new_str((free_ft)NULL, (free_ft)NULL);
3618
+ for (i = 0; i < NELEMS(INDEX_EXTENSIONS); i++) {
3619
+ h_set(fn_extensions, INDEX_EXTENSIONS[i], (char *)INDEX_EXTENSIONS[i]);
3297
3620
  }
3621
+ register_for_cleanup(fn_extensions, (free_ft)&h_destroy);
3298
3622
  }
3299
3623
 
3300
- static bool file_name_filter_accept(char *file_name)
3624
+ bool file_name_filter_is_index_file(const char *file_name, bool include_locks)
3301
3625
  {
3302
3626
  char *p = strrchr(file_name, '.');
3627
+ if (NULL == fn_extensions) file_name_filter_init();
3303
3628
  if (NULL != p) {
3304
3629
  char *extension = p + 1;
3305
3630
  if (NULL != h_get(fn_extensions, extension)) {
@@ -3310,6 +3635,10 @@ static bool file_name_filter_accept(char *file_name)
3310
3635
  && *(extension + 1) <= '9') {
3311
3636
  return true;
3312
3637
  }
3638
+ else if (include_locks && (strcmp(extension, "lck") == 0)
3639
+ && (strncmp(file_name, "ferret", 6) == 0)) {
3640
+ return true;
3641
+ }
3313
3642
  }
3314
3643
  else if (0 == strncmp(SEGMENTS_FILE_NAME, file_name,
3315
3644
  sizeof(SEGMENTS_FILE_NAME) - 1)) {
@@ -3323,7 +3652,7 @@ static bool file_name_filter_accept(char *file_name)
3323
3652
  * function should only be called on files that pass the above "accept" (ie,
3324
3653
  * are already known to be a Lucene index file).
3325
3654
  */
3326
- static bool file_name_filter_is_cfs_file(char *file_name) {
3655
+ static bool file_name_filter_is_cfs_file(const char *file_name) {
3327
3656
  char *p = strrchr(file_name, '.');
3328
3657
  if (NULL != p) {
3329
3658
  char *extension = p + 1;
@@ -3364,7 +3693,7 @@ void deleter_destroy(Deleter *dlr)
3364
3693
  free(dlr);
3365
3694
  }
3366
3695
 
3367
- void deleter_queue_file(Deleter *dlr, char *file_name)
3696
+ static void deleter_queue_file(Deleter *dlr, const char *file_name)
3368
3697
  {
3369
3698
  hs_add(dlr->pending, estrdup(file_name));
3370
3699
  }
@@ -3382,12 +3711,12 @@ void deleter_delete_file(Deleter *dlr, char *file_name)
3382
3711
  XENDTRY
3383
3712
  }
3384
3713
 
3385
- void deleter_commit_pending_deletions(Deleter *dlr)
3714
+ static void deleter_commit_pending_deletions(Deleter *dlr)
3386
3715
  {
3387
- int i;
3388
- char **pending = (char **)dlr->pending->elems;
3389
- for (i = dlr->pending->size - 1; i >= 0; i--) {
3390
- deleter_delete_file(dlr, pending[i]);
3716
+ HashSetEntry *hse, *hse_next = dlr->pending->first;
3717
+ while ((hse = hse_next) != NULL) {
3718
+ hse_next = hse->next;
3719
+ deleter_delete_file(dlr, (char *)hse->elem);
3391
3720
  }
3392
3721
  }
3393
3722
 
@@ -3403,15 +3732,15 @@ void deleter_delete_files(Deleter *dlr, char **files, int file_cnt)
3403
3732
  struct DelFilesArg {
3404
3733
  char curr_seg_file_name[SEGMENT_NAME_MAX_LENGTH];
3405
3734
  Deleter *dlr;
3406
- HashTable *current;
3735
+ Hash *current;
3407
3736
  };
3408
3737
 
3409
- static void deleter_find_deletable_files_i(char *file_name, void *arg)
3738
+ static void deleter_find_deletable_files_i(const char *file_name, void *arg)
3410
3739
  {
3411
3740
  struct DelFilesArg *dfa = (struct DelFilesArg *)arg;
3412
3741
  Deleter *dlr = dfa->dlr;
3413
3742
 
3414
- if (file_name_filter_accept(file_name)
3743
+ if (file_name_filter_is_index_file(file_name, false)
3415
3744
  && 0 != strcmp(file_name, dfa->curr_seg_file_name)
3416
3745
  && 0 != strcmp(file_name, SEGMENTS_GEN_FILE_NAME)) {
3417
3746
 
@@ -3427,7 +3756,8 @@ static void deleter_find_deletable_files_i(char *file_name, void *arg)
3427
3756
  if (NULL != p) {
3428
3757
  *p = '\0';
3429
3758
  extension = p + 1;
3430
- } else {
3759
+ }
3760
+ else {
3431
3761
  extension = NULL;
3432
3762
  }
3433
3763
 
@@ -3439,7 +3769,7 @@ static void deleter_find_deletable_files_i(char *file_name, void *arg)
3439
3769
 
3440
3770
  /* Delete this file if it's not a "current" segment, or, it is a
3441
3771
  * single index file but there is now a corresponding compound file: */
3442
- if (NULL == (si = h_get(dfa->current, segment_name))) {
3772
+ if (NULL == (si = (SegmentInfo *)h_get(dfa->current, segment_name))) {
3443
3773
  /* Delete if segment is not referenced: */
3444
3774
  do_delete = true;
3445
3775
  }
@@ -3497,7 +3827,7 @@ void deleter_find_deletable_files(Deleter *dlr)
3497
3827
  SegmentInfos *sis = dlr->sis;
3498
3828
  Store *store = dlr->store;
3499
3829
  struct DelFilesArg dfa;
3500
- HashTable *current = dfa.current
3830
+ Hash *current = dfa.current
3501
3831
  = h_new_str((free_ft)NULL, (free_ft)si_deref);
3502
3832
  dfa.dlr = dlr;
3503
3833
 
@@ -3511,22 +3841,24 @@ void deleter_find_deletable_files(Deleter *dlr)
3511
3841
  * and add to deletable if they are not referenced by the current segments
3512
3842
  * info: */
3513
3843
  sis_curr_seg_file_name(dfa.curr_seg_file_name, store);
3514
- file_name_filter_init();
3515
3844
 
3516
3845
  store->each(store, &deleter_find_deletable_files_i, &dfa);
3517
3846
  h_destroy(dfa.current);
3518
3847
  }
3519
3848
 
3520
- void deleter_delete_deletable_files(Deleter *dlr)
3849
+ static void deleter_delete_deletable_files(Deleter *dlr)
3521
3850
  {
3522
3851
  deleter_find_deletable_files(dlr);
3523
3852
  deleter_commit_pending_deletions(dlr);
3524
3853
  }
3525
3854
 
3526
- void deleter_clear_pending_deletions(Deleter *dlr)
3855
+ /*
3856
+ TODO: currently not used. Why not?
3857
+ static void deleter_clear_pending_deletions(Deleter *dlr)
3527
3858
  {
3528
3859
  hs_clear(dlr->pending);
3529
3860
  }
3861
+ */
3530
3862
 
3531
3863
  /****************************************************************************
3532
3864
  *
@@ -3534,13 +3866,13 @@ void deleter_clear_pending_deletions(Deleter *dlr)
3534
3866
  *
3535
3867
  ****************************************************************************/
3536
3868
 
3537
- void ir_acquire_not_necessary(IndexReader *ir)
3869
+ static void ir_acquire_not_necessary(IndexReader *ir)
3538
3870
  {
3539
3871
  (void)ir;
3540
3872
  }
3541
3873
 
3542
3874
  #define I64_PFX POSH_I64_PRINTF_PREFIX
3543
- void ir_acquire_write_lock(IndexReader *ir)
3875
+ static void ir_acquire_write_lock(IndexReader *ir)
3544
3876
  {
3545
3877
  if (ir->is_stale) {
3546
3878
  RAISE(STATE_ERROR, "IndexReader out of date and no longer valid for "
@@ -3579,10 +3911,11 @@ void ir_acquire_write_lock(IndexReader *ir)
3579
3911
  }
3580
3912
  }
3581
3913
 
3582
- IndexReader *ir_setup(IndexReader *ir, Store *store, SegmentInfos *sis,
3914
+ static IndexReader *ir_setup(IndexReader *ir, Store *store, SegmentInfos *sis,
3583
3915
  FieldInfos *fis, int is_owner)
3584
3916
  {
3585
3917
  mutex_init(&ir->mutex, NULL);
3918
+ mutex_init(&ir->field_index_mutex, NULL);
3586
3919
 
3587
3920
  if (store) {
3588
3921
  ir->store = store;
@@ -3608,16 +3941,17 @@ bool ir_index_exists(Store *store)
3608
3941
  return sis_current_segment_generation(store) != 1;
3609
3942
  }
3610
3943
 
3611
- int ir_get_field_num(IndexReader *ir, const char *field)
3944
+ int ir_get_field_num(IndexReader *ir, Symbol field)
3612
3945
  {
3613
3946
  int field_num = fis_get_field_num(ir->fis, field);
3614
3947
  if (field_num < 0) {
3615
- RAISE(ARG_ERROR, "Field :%s does not exist in this index", field);
3948
+ RAISE(ARG_ERROR,
3949
+ "Field :%s does not exist in this index", (char *)field);
3616
3950
  }
3617
3951
  return field_num;
3618
3952
  }
3619
3953
 
3620
- int ir_doc_freq(IndexReader *ir, const char *field, const char *term)
3954
+ int ir_doc_freq(IndexReader *ir, Symbol field, const char *term)
3621
3955
  {
3622
3956
  int field_num = fis_get_field_num(ir->fis, field);
3623
3957
  if (field_num >= 0) {
@@ -3637,7 +3971,7 @@ static void ir_set_norm_i(IndexReader *ir, int doc_num, int field_num, uchar val
3637
3971
  mutex_unlock(&ir->mutex);
3638
3972
  }
3639
3973
 
3640
- void ir_set_norm(IndexReader *ir, int doc_num, const char *field, uchar val)
3974
+ void ir_set_norm(IndexReader *ir, int doc_num, Symbol field, uchar val)
3641
3975
  {
3642
3976
  int field_num = fis_get_field_num(ir->fis, field);
3643
3977
  if (field_num >= 0) {
@@ -3653,20 +3987,20 @@ uchar *ir_get_norms_i(IndexReader *ir, int field_num)
3653
3987
  }
3654
3988
  if (!norms) {
3655
3989
  if (NULL == ir->fake_norms) {
3656
- ir->fake_norms = (uchar *)ecalloc(ir->max_doc(ir));
3990
+ ir->fake_norms = ALLOC_AND_ZERO_N(uchar, ir->max_doc(ir));
3657
3991
  }
3658
3992
  norms = ir->fake_norms;
3659
3993
  }
3660
3994
  return norms;
3661
3995
  }
3662
3996
 
3663
- uchar *ir_get_norms(IndexReader *ir, const char *field)
3997
+ uchar *ir_get_norms(IndexReader *ir, Symbol field)
3664
3998
  {
3665
3999
  int field_num = fis_get_field_num(ir->fis, field);
3666
4000
  return ir_get_norms_i(ir, field_num);
3667
4001
  }
3668
4002
 
3669
- uchar *ir_get_norms_into(IndexReader *ir, const char *field, uchar *buf)
4003
+ uchar *ir_get_norms_into(IndexReader *ir, Symbol field, uchar *buf)
3670
4004
  {
3671
4005
  int field_num = fis_get_field_num(ir->fis, field);
3672
4006
  if (field_num >= 0) {
@@ -3698,7 +4032,7 @@ void ir_delete_doc(IndexReader *ir, int doc_num)
3698
4032
  }
3699
4033
  }
3700
4034
 
3701
- Document *ir_get_doc_with_term(IndexReader *ir, const char *field,
4035
+ Document *ir_get_doc_with_term(IndexReader *ir, Symbol field,
3702
4036
  const char *term)
3703
4037
  {
3704
4038
  TermDocEnum *tde = ir_term_docs_for(ir, field, term);
@@ -3713,7 +4047,7 @@ Document *ir_get_doc_with_term(IndexReader *ir, const char *field,
3713
4047
  return doc;
3714
4048
  }
3715
4049
 
3716
- TermEnum *ir_terms(IndexReader *ir, const char *field)
4050
+ TermEnum *ir_terms(IndexReader *ir, Symbol field)
3717
4051
  {
3718
4052
  TermEnum *te = NULL;
3719
4053
  int field_num = fis_get_field_num(ir->fis, field);
@@ -3723,7 +4057,7 @@ TermEnum *ir_terms(IndexReader *ir, const char *field)
3723
4057
  return te;
3724
4058
  }
3725
4059
 
3726
- TermEnum *ir_terms_from(IndexReader *ir, const char *field,
4060
+ TermEnum *ir_terms_from(IndexReader *ir, Symbol field,
3727
4061
  const char *term)
3728
4062
  {
3729
4063
  TermEnum *te = NULL;
@@ -3734,7 +4068,7 @@ TermEnum *ir_terms_from(IndexReader *ir, const char *field,
3734
4068
  return te;
3735
4069
  }
3736
4070
 
3737
- TermDocEnum *ir_term_docs_for(IndexReader *ir, const char *field,
4071
+ TermDocEnum *ir_term_docs_for(IndexReader *ir, Symbol field,
3738
4072
  const char *term)
3739
4073
  {
3740
4074
  int field_num = fis_get_field_num(ir->fis, field);
@@ -3745,7 +4079,7 @@ TermDocEnum *ir_term_docs_for(IndexReader *ir, const char *field,
3745
4079
  return tde;
3746
4080
  }
3747
4081
 
3748
- TermDocEnum *ir_term_positions_for(IndexReader *ir, const char *field,
4082
+ TermDocEnum *ir_term_positions_for(IndexReader *ir, Symbol field,
3749
4083
  const char *term)
3750
4084
  {
3751
4085
  int field_num = fis_get_field_num(ir->fis, field);
@@ -3756,7 +4090,7 @@ TermDocEnum *ir_term_positions_for(IndexReader *ir, const char *field,
3756
4090
  return tde;
3757
4091
  }
3758
4092
 
3759
- void ir_commit_i(IndexReader *ir)
4093
+ static void ir_commit_i(IndexReader *ir)
3760
4094
  {
3761
4095
  if (ir->has_changes) {
3762
4096
  if (NULL == ir->deleter && NULL != ir->store) {
@@ -3769,7 +4103,7 @@ void ir_commit_i(IndexReader *ir)
3769
4103
  mutex_lock(&ir->store->mutex);
3770
4104
 
3771
4105
  sis_curr_seg_file_name(curr_seg_fn, ir->store);
3772
-
4106
+
3773
4107
  ir->commit_i(ir);
3774
4108
  sis_write(ir->sis, ir->store, ir->deleter);
3775
4109
 
@@ -3813,8 +4147,8 @@ void ir_close(IndexReader *ir)
3813
4147
  if (ir->cache) {
3814
4148
  h_destroy(ir->cache);
3815
4149
  }
3816
- if (ir->sort_cache) {
3817
- h_destroy(ir->sort_cache);
4150
+ if (ir->field_index_cache) {
4151
+ h_destroy(ir->field_index_cache);
3818
4152
  }
3819
4153
  if (ir->deleter && ir->is_owner) {
3820
4154
  deleter_destroy(ir->deleter);
@@ -3822,8 +4156,10 @@ void ir_close(IndexReader *ir)
3822
4156
  free(ir->fake_norms);
3823
4157
 
3824
4158
  mutex_destroy(&ir->mutex);
4159
+ mutex_destroy(&ir->field_index_mutex);
3825
4160
  free(ir);
3826
- } else {
4161
+ }
4162
+ else {
3827
4163
  mutex_unlock(&ir->mutex);
3828
4164
  }
3829
4165
 
@@ -3910,7 +4246,7 @@ typedef struct SegmentReader {
3910
4246
  TermInfosReader *tir;
3911
4247
  thread_key_t thread_fr;
3912
4248
  void **fr_bucket;
3913
- HashTable *norms;
4249
+ Hash *norms;
3914
4250
  Store *cfs_store;
3915
4251
  bool deleted_docs_dirty : 1;
3916
4252
  bool undelete_all : 1;
@@ -3926,7 +4262,7 @@ static INLINE FieldsReader *sr_fr(SegmentReader *sr)
3926
4262
  {
3927
4263
  FieldsReader *fr;
3928
4264
 
3929
- if (NULL == (fr = thread_getspecific(sr->thread_fr))) {
4265
+ if (NULL == (fr = (FieldsReader *)thread_getspecific(sr->thread_fr))) {
3930
4266
  fr = fr_clone(sr->fr);
3931
4267
  ary_push(sr->fr_bucket, fr);
3932
4268
  thread_setspecific(sr->thread_fr, fr);
@@ -3942,7 +4278,7 @@ static INLINE bool sr_is_deleted_i(SegmentReader *sr, int doc_num)
3942
4278
  static INLINE void sr_get_norms_into_i(SegmentReader *sr, int field_num,
3943
4279
  uchar *buf)
3944
4280
  {
3945
- Norm *norm = h_get_int(sr->norms, field_num);
4281
+ Norm *norm = (Norm *)h_get_int(sr->norms, field_num);
3946
4282
  if (NULL == norm) {
3947
4283
  memset(buf, 0, SR_SIZE(sr));
3948
4284
  }
@@ -3960,7 +4296,7 @@ static INLINE void sr_get_norms_into_i(SegmentReader *sr, int field_num,
3960
4296
 
3961
4297
  static INLINE uchar *sr_get_norms_i(SegmentReader *sr, int field_num)
3962
4298
  {
3963
- Norm *norm = h_get_int(sr->norms, field_num);
4299
+ Norm *norm = (Norm *)h_get_int(sr->norms, field_num);
3964
4300
  if (NULL == norm) { /* not an indexed field */
3965
4301
  return NULL;
3966
4302
  }
@@ -3975,7 +4311,7 @@ static INLINE uchar *sr_get_norms_i(SegmentReader *sr, int field_num)
3975
4311
 
3976
4312
  static void sr_set_norm_i(IndexReader *ir, int doc_num, int field_num, uchar b)
3977
4313
  {
3978
- Norm *norm = h_get_int(SR(ir)->norms, field_num);
4314
+ Norm *norm = (Norm *)h_get_int(SR(ir)->norms, field_num);
3979
4315
  if (NULL != norm) { /* has_norms */
3980
4316
  ir->has_changes = true;
3981
4317
  norm->is_dirty = true; /* mark it dirty */
@@ -3984,7 +4320,7 @@ static void sr_set_norm_i(IndexReader *ir, int doc_num, int field_num, uchar b)
3984
4320
  }
3985
4321
  }
3986
4322
 
3987
- static void sr_delete_doc_i(IndexReader *ir, int doc_num)
4323
+ static void sr_delete_doc_i(IndexReader *ir, int doc_num)
3988
4324
  {
3989
4325
  if (NULL == SR(ir)->deleted_docs) {
3990
4326
  SR(ir)->deleted_docs = bv_new();
@@ -4017,7 +4353,7 @@ static void bv_write(BitVector *bv, Store *store, char *name)
4017
4353
  int i;
4018
4354
  OutStream *os = store->new_output(store, name);
4019
4355
  os_write_vint(os, bv->size);
4020
- for (i = (bv->size >> 5); i >= 0; i--) {
4356
+ for (i = ((bv->size-1) >> 5); i >= 0; i--) {
4021
4357
  os_write_u32(os, bv->bits[i]);
4022
4358
  }
4023
4359
  os_close(os);
@@ -4031,10 +4367,10 @@ static BitVector *bv_read(Store *store, char *name)
4031
4367
  BitVector *volatile bv = ALLOC_AND_ZERO(BitVector);
4032
4368
  bv->size = (int)is_read_vint(is);
4033
4369
  bv->capa = (bv->size >> 5) + 1;
4034
- bv->bits = ALLOC_AND_ZERO_N(f_u32, bv->capa);
4370
+ bv->bits = ALLOC_AND_ZERO_N(u32, bv->capa);
4035
4371
  bv->ref_cnt = 1;
4036
4372
  TRY
4037
- for (i = (bv->size >> 5); i >= 0; i--) {
4373
+ for (i = ((bv->size-1) >> 5); i >= 0; i--) {
4038
4374
  bv->bits[i] = is_read_u32(is);
4039
4375
  }
4040
4376
  bv_recount(bv);
@@ -4065,7 +4401,8 @@ static void sr_commit_i(IndexReader *ir)
4065
4401
  if (SR(ir)->undelete_all) {
4066
4402
  si->del_gen = -1;
4067
4403
  SR(ir)->undelete_all = false;
4068
- } else {
4404
+ }
4405
+ else {
4069
4406
  /* (SR(ir)->deleted_docs_dirty) re-write deleted */
4070
4407
  si->del_gen++;
4071
4408
  fn_for_generation(tmp_file_name, segment, "del", si->del_gen);
@@ -4080,7 +4417,7 @@ static void sr_commit_i(IndexReader *ir)
4080
4417
  for (i = field_cnt - 1; i >= 0; i--) {
4081
4418
  fi = ir->fis->fields[i];
4082
4419
  if (fi_is_indexed(fi)) {
4083
- Norm *norm = h_get_int(SR(ir)->norms, fi->number);
4420
+ Norm *norm = (Norm *)h_get_int(SR(ir)->norms, fi->number);
4084
4421
  if (norm && norm->is_dirty) {
4085
4422
  norm_rewrite(norm, ir->store, ir->deleter, SR(ir)->si,
4086
4423
  SR_SIZE(ir));
@@ -4208,9 +4545,9 @@ static TermDocEnum *sr_term_positions(IndexReader *ir)
4208
4545
  }
4209
4546
 
4210
4547
  static TermVector *sr_term_vector(IndexReader *ir, int doc_num,
4211
- const char *field)
4548
+ Symbol field)
4212
4549
  {
4213
- FieldInfo *fi = h_get(ir->fis->field_dict, field);
4550
+ FieldInfo *fi = (FieldInfo *)h_get(ir->fis->field_dict, field);
4214
4551
  FieldsReader *fr;
4215
4552
 
4216
4553
  if (!fi || !fi_store_term_vector(fi) || !SR(ir)->fr ||
@@ -4221,7 +4558,7 @@ static TermVector *sr_term_vector(IndexReader *ir, int doc_num,
4221
4558
  return fr_get_field_tv(fr, doc_num, fi->number);
4222
4559
  }
4223
4560
 
4224
- static HashTable *sr_term_vectors(IndexReader *ir, int doc_num)
4561
+ static Hash *sr_term_vectors(IndexReader *ir, int doc_num)
4225
4562
  {
4226
4563
  FieldsReader *fr;
4227
4564
  if (!SR(ir)->fr || NULL == (fr = sr_fr(SR(ir)))) {
@@ -4266,7 +4603,7 @@ static void sr_open_norms(IndexReader *ir, Store *cfs_store)
4266
4603
 
4267
4604
  static IndexReader *sr_setup_i(SegmentReader *sr)
4268
4605
  {
4269
- Store *store = sr->si->store;
4606
+ Store *volatile store = sr->si->store;
4270
4607
  IndexReader *ir = IR(sr);
4271
4608
  char file_name[SEGMENT_NAME_MAX_LENGTH];
4272
4609
  char *sr_segment = sr->si->name;
@@ -4375,7 +4712,7 @@ static int mr_reader_index_i(MultiReader *mr, int doc_num)
4375
4712
  return hi;
4376
4713
  }
4377
4714
 
4378
- int mr_num_docs(IndexReader *ir)
4715
+ static int mr_num_docs(IndexReader *ir)
4379
4716
  {
4380
4717
  int i, num_docs;
4381
4718
  mutex_lock(&ir->mutex);
@@ -4429,7 +4766,7 @@ static uchar *mr_get_norms(IndexReader *ir, int field_num)
4429
4766
  uchar *bytes;
4430
4767
 
4431
4768
  mutex_lock(&ir->mutex);
4432
- bytes = h_get_int(MR(ir)->norms_cache, field_num);
4769
+ bytes = (uchar *)h_get_int(MR(ir)->norms_cache, field_num);
4433
4770
  if (NULL == bytes) {
4434
4771
  int i;
4435
4772
  const int mr_reader_cnt = MR(ir)->r_cnt;
@@ -4455,7 +4792,7 @@ static uchar *mr_get_norms_into(IndexReader *ir, int field_num, uchar *buf)
4455
4792
  uchar *bytes;
4456
4793
 
4457
4794
  mutex_lock(&ir->mutex);
4458
- bytes = h_get_int(MR(ir)->norms_cache, field_num);
4795
+ bytes = (uchar *)h_get_int(MR(ir)->norms_cache, field_num);
4459
4796
  if (NULL != bytes) {
4460
4797
  memcpy(buf, bytes, MR(ir)->max_doc);
4461
4798
  }
@@ -4509,13 +4846,13 @@ static TermDocEnum *mr_term_positions(IndexReader *ir)
4509
4846
  }
4510
4847
 
4511
4848
  static TermVector *mr_term_vector(IndexReader *ir, int doc_num,
4512
- const char *field)
4849
+ Symbol field)
4513
4850
  {
4514
4851
  GET_READER();
4515
4852
  return reader->term_vector(reader, doc_num - MR(ir)->starts[i], field);
4516
4853
  }
4517
4854
 
4518
- static HashTable *mr_term_vectors(IndexReader *ir, int doc_num)
4855
+ static Hash *mr_term_vectors(IndexReader *ir, int doc_num)
4519
4856
  {
4520
4857
  GET_READER();
4521
4858
  return reader->term_vectors(reader, doc_num - MR(ir)->starts[i]);
@@ -4667,7 +5004,7 @@ static IndexReader *mr_new(IndexReader **sub_readers, const int r_cnt)
4667
5004
  return ir;
4668
5005
  }
4669
5006
 
4670
- IndexReader *mr_open_i(Store *store,
5007
+ static IndexReader *mr_open_i(Store *store,
4671
5008
  SegmentInfos *sis,
4672
5009
  FieldInfos *fis,
4673
5010
  IndexReader **sub_readers,
@@ -4696,7 +5033,7 @@ IndexReader *mr_open(IndexReader **sub_readers, const int r_cnt)
4696
5033
  IndexReader *ir = mr_new(sub_readers, r_cnt);
4697
5034
  MultiReader *mr = MR(ir);
4698
5035
  /* defaults don't matter, this is just for reading fields, not adding */
4699
- FieldInfos *fis = fis_new(0, 0, 0);
5036
+ FieldInfos *fis = fis_new(STORE_NO, INDEX_NO, TERM_VECTOR_NO);
4700
5037
  int i, j;
4701
5038
  bool need_field_map = false;
4702
5039
 
@@ -4731,7 +5068,7 @@ IndexReader *mr_open(IndexReader **sub_readers, const int r_cnt)
4731
5068
  mr->field_num_map[i][j] = fi_sub ? fi_sub->number : -1;
4732
5069
  }
4733
5070
  }
4734
- /* print out the field map
5071
+ /* print out the field map
4735
5072
  for (i = 0; i < r_cnt; i++) {
4736
5073
  for (j = 0; j < fis->size; j++) {
4737
5074
  printf("%d ", mr->field_num_map[i][j]);
@@ -4766,14 +5103,14 @@ static void ir_open_i(Store *store, FindSegmentsFile *fsf)
4766
5103
 
4767
5104
  mutex_lock(&store->mutex);
4768
5105
  sis_read_i(store, fsf);
4769
- sis = fsf->p_return;
5106
+ sis = fsf->ret.sis;
4770
5107
  fis = sis->fis;
4771
5108
 
4772
5109
  if (sis->size == 1) {
4773
5110
  ir = sr_open(sis, fis, 0, true);
4774
5111
  }
4775
5112
  else {
4776
- int i;
5113
+ volatile int i;
4777
5114
  IndexReader **readers = ALLOC_N(IndexReader *, sis->size);
4778
5115
  int num_segments = sis->size;
4779
5116
  for (i = num_segments - 1; i >= 0; i--) {
@@ -4788,7 +5125,7 @@ static void ir_open_i(Store *store, FindSegmentsFile *fsf)
4788
5125
  }
4789
5126
  ir = mr_open_i(store, sis, fis, readers, sis->size);
4790
5127
  }
4791
- fsf->p_return = ir;
5128
+ fsf->ret.ir = ir;
4792
5129
  success = true;
4793
5130
  } while (0);
4794
5131
  XFINALLY
@@ -4812,7 +5149,7 @@ IndexReader *ir_open(Store *store)
4812
5149
  {
4813
5150
  FindSegmentsFile fsf;
4814
5151
  sis_find_segments_file(store, &fsf, &ir_open_i);
4815
- return (IndexReader *)fsf.p_return;
5152
+ return fsf.ret.ir;
4816
5153
  }
4817
5154
 
4818
5155
  /****************************************************************************
@@ -4865,10 +5202,11 @@ Posting *p_new(MemoryPool *mp, int doc_num, int pos)
4865
5202
  *
4866
5203
  ****************************************************************************/
4867
5204
 
4868
- PostingList *pl_new(MemoryPool *mp, const char *term, int term_len, Posting *p)
5205
+ PostingList *pl_new(MemoryPool *mp, const char *term,
5206
+ int term_len, Posting *p)
4869
5207
  {
4870
5208
  PostingList *pl = MP_ALLOC(mp, PostingList);
4871
- pl->term = mp_memdup(mp, term, term_len + 1);
5209
+ pl->term = (char *)mp_memdup(mp, term, term_len + 1);
4872
5210
  pl->term_len = term_len;
4873
5211
  pl->first = pl->last = p;
4874
5212
  pl->last_occ = p->first_occ;
@@ -4881,7 +5219,7 @@ void pl_add_occ(MemoryPool *mp, PostingList *pl, int pos)
4881
5219
  pl->last->freq++;
4882
5220
  }
4883
5221
 
4884
- void pl_add_posting(PostingList *pl, Posting *p)
5222
+ static void pl_add_posting(PostingList *pl, Posting *p)
4885
5223
  {
4886
5224
  pl->last = pl->last->next = p;
4887
5225
  pl->last_occ = p->first_occ;
@@ -4905,7 +5243,8 @@ static FieldInverter *fld_inv_new(DocWriter *dw, FieldInfo *fi)
4905
5243
  fld_inv->store_term_vector = fi_store_term_vector(fi);
4906
5244
  fld_inv->store_offsets = fi_store_offsets(fi);
4907
5245
  if ((fld_inv->has_norms = fi_has_norms(fi)) == true) {
4908
- fld_inv->norms = MP_ALLOC_AND_ZERO_N(dw->mp, uchar, dw->max_buffered_docs);
5246
+ fld_inv->norms = MP_ALLOC_AND_ZERO_N(dw->mp, uchar,
5247
+ dw->max_buffered_docs);
4909
5248
  }
4910
5249
  fld_inv->fi = fi;
4911
5250
 
@@ -4991,15 +5330,15 @@ static void dw_write_norms(DocWriter *dw, FieldInverter *fld_inv)
4991
5330
  char file_name[SEGMENT_NAME_MAX_LENGTH];
4992
5331
  OutStream *norms_out;
4993
5332
  si_advance_norm_gen(dw->si, fld_inv->fi->number);
4994
- si_norm_file_name(dw->si, file_name, fld_inv->fi->number);
5333
+ si_norm_file_name(dw->si, file_name, fld_inv->fi->number);
4995
5334
  norms_out = dw->store->new_output(dw->store, file_name);
4996
5335
  os_write_bytes(norms_out, fld_inv->norms, dw->doc_num);
4997
5336
  os_close(norms_out);
4998
5337
  }
4999
5338
 
5000
- /* we'll use the postings HashTable's table area to sort the postings as it is
5339
+ /* we'll use the postings Hash's table area to sort the postings as it is
5001
5340
  * going to be zeroset soon anyway */
5002
- static PostingList **dw_sort_postings(HashTable *plists_ht)
5341
+ static PostingList **dw_sort_postings(Hash *plists_ht)
5003
5342
  {
5004
5343
  int i, j;
5005
5344
  HashEntry *he;
@@ -5054,8 +5393,8 @@ static void dw_flush(DocWriter *dw)
5054
5393
 
5055
5394
  for (i = 0; i < fields_count; i++) {
5056
5395
  fi = fis->fields[i];
5057
- if (!fi_is_indexed(fi)
5058
- || NULL == (fld_inv = h_get_int(dw->fields, fi->number))) {
5396
+ if (!fi_is_indexed(fi) || NULL ==
5397
+ (fld_inv = (FieldInverter*)h_get_int(dw->fields, fi->number))) {
5059
5398
  continue;
5060
5399
  }
5061
5400
  if (!fi_omit_norms(fi)) {
@@ -5105,7 +5444,7 @@ static void dw_flush(DocWriter *dw)
5105
5444
  tiw_close(tiw);
5106
5445
  skip_buf_destroy(skip_buf);
5107
5446
  dw_flush_streams(dw);
5108
- }
5447
+ }
5109
5448
 
5110
5449
  DocWriter *dw_open(IndexWriter *iw, SegmentInfo *si)
5111
5450
  {
@@ -5130,7 +5469,7 @@ DocWriter *dw_open(IndexWriter *iw, SegmentInfo *si)
5130
5469
  dw->skip_interval = iw->config.skip_interval;
5131
5470
  dw->max_field_length = iw->config.max_field_length;
5132
5471
  dw->max_buffered_docs = iw->config.max_buffered_docs;
5133
-
5472
+
5134
5473
  dw->offsets = ALLOC_AND_ZERO_N(Offset, DW_OFFSET_INIT_CAPA);
5135
5474
  dw->offsets_size = 0;
5136
5475
  dw->offsets_capa = DW_OFFSET_INIT_CAPA;
@@ -5147,7 +5486,7 @@ void dw_new_segment(DocWriter *dw, SegmentInfo *si)
5147
5486
 
5148
5487
  void dw_close(DocWriter *dw)
5149
5488
  {
5150
- if (dw->doc_num) {
5489
+ if (dw->doc_num) {
5151
5490
  dw_flush(dw);
5152
5491
  }
5153
5492
  if (dw->fw) {
@@ -5162,7 +5501,7 @@ void dw_close(DocWriter *dw)
5162
5501
 
5163
5502
  FieldInverter *dw_get_fld_inv(DocWriter *dw, FieldInfo *fi)
5164
5503
  {
5165
- FieldInverter *fld_inv = h_get_int(dw->fields, fi->number);
5504
+ FieldInverter *fld_inv = (FieldInverter*)h_get_int(dw->fields, fi->number);
5166
5505
 
5167
5506
  if (!fld_inv) {
5168
5507
  fld_inv = fld_inv_new(dw, fi);
@@ -5172,31 +5511,33 @@ FieldInverter *dw_get_fld_inv(DocWriter *dw, FieldInfo *fi)
5172
5511
  }
5173
5512
 
5174
5513
  static void dw_add_posting(MemoryPool *mp,
5175
- HashTable *curr_plists,
5176
- HashTable *fld_plists,
5514
+ Hash *curr_plists,
5515
+ Hash *fld_plists,
5177
5516
  int doc_num,
5178
5517
  const char *text,
5179
5518
  int len,
5180
5519
  int pos)
5181
5520
  {
5182
- HashEntry *pl_he = h_set_ext(curr_plists, text);
5183
- if (pl_he->value) {
5184
- pl_add_occ(mp, pl_he->value, pos);
5185
- }
5186
- else {
5187
- HashEntry *fld_pl_he = h_set_ext(fld_plists, text);
5188
- PostingList *pl = fld_pl_he->value;
5521
+ HashEntry *pl_he;
5522
+ if (h_set_ext(curr_plists, text, &pl_he)) {
5189
5523
  Posting *p = p_new(mp, doc_num, pos);
5190
- if (!pl) {
5191
- pl = fld_pl_he->value = pl_new(mp, text, len, p);
5524
+ HashEntry *fld_pl_he;
5525
+ PostingList *pl;
5526
+
5527
+ if (h_set_ext(fld_plists, text, &fld_pl_he)) {
5528
+ fld_pl_he->value = pl = pl_new(mp, text, len, p);
5192
5529
  pl_he->key = fld_pl_he->key = (char *)pl->term;
5193
5530
  }
5194
5531
  else {
5532
+ pl = (PostingList *)fld_pl_he->value;
5195
5533
  pl_add_posting(pl, p);
5196
5534
  pl_he->key = (char *)pl->term;
5197
5535
  }
5198
5536
  pl_he->value = pl;
5199
5537
  }
5538
+ else {
5539
+ pl_add_occ(mp, (PostingList *)pl_he->value, pos);
5540
+ }
5200
5541
  }
5201
5542
 
5202
5543
  static INLINE void dw_add_offsets(DocWriter *dw, int pos, off_t start, off_t end)
@@ -5214,14 +5555,14 @@ static INLINE void dw_add_offsets(DocWriter *dw, int pos, off_t start, off_t end
5214
5555
  dw->offsets_size = pos + 1;
5215
5556
  }
5216
5557
 
5217
- HashTable *dw_invert_field(DocWriter *dw,
5558
+ Hash *dw_invert_field(DocWriter *dw,
5218
5559
  FieldInverter *fld_inv,
5219
5560
  DocField *df)
5220
5561
  {
5221
5562
  MemoryPool *mp = dw->mp;
5222
5563
  Analyzer *a = dw->analyzer;
5223
- HashTable *curr_plists = dw->curr_plists;
5224
- HashTable *fld_plists = fld_inv->plists;
5564
+ Hash *curr_plists = dw->curr_plists;
5565
+ Hash *fld_plists = fld_inv->plists;
5225
5566
  const bool store_offsets = fld_inv->store_offsets;
5226
5567
  int doc_num = dw->doc_num;
5227
5568
  int i;
@@ -5238,6 +5579,11 @@ HashTable *dw_invert_field(DocWriter *dw,
5238
5579
  if (store_offsets) {
5239
5580
  while (NULL != (tk = ts->next(ts))) {
5240
5581
  pos += tk->pos_inc;
5582
+ /* if for some reason pos gets set to some number less
5583
+ * than 0 the we'll start pos at 0 */
5584
+ if (pos < 0) {
5585
+ pos = 0;
5586
+ }
5241
5587
  dw_add_posting(mp, curr_plists, fld_plists, doc_num,
5242
5588
  tk->text, tk->len, pos);
5243
5589
  dw_add_offsets(dw, pos,
@@ -5271,7 +5617,7 @@ HashTable *dw_invert_field(DocWriter *dw,
5271
5617
  char *data_ptr = df->data[i];
5272
5618
  if (len > MAX_WORD_SIZE) {
5273
5619
  len = MAX_WORD_SIZE - 1;
5274
- data_ptr = memcpy(buf, df->data[i], len);
5620
+ data_ptr = (char *)memcpy(buf, df->data[i], len);
5275
5621
  }
5276
5622
  dw_add_posting(mp, curr_plists, fld_plists, doc_num, data_ptr,
5277
5623
  len, i);
@@ -5286,7 +5632,7 @@ HashTable *dw_invert_field(DocWriter *dw,
5286
5632
  return curr_plists;
5287
5633
  }
5288
5634
 
5289
- void dw_reset_postings(HashTable *postings)
5635
+ void dw_reset_postings(Hash *postings)
5290
5636
  {
5291
5637
  ZEROSET_N(postings->table, HashEntry, postings->mask + 1);
5292
5638
  postings->fill = postings->size = 0;
@@ -5298,7 +5644,7 @@ void dw_add_doc(DocWriter *dw, Document *doc)
5298
5644
  float boost;
5299
5645
  DocField *df;
5300
5646
  FieldInverter *fld_inv;
5301
- HashTable *postings;
5647
+ Hash *postings;
5302
5648
  FieldInfo *fi;
5303
5649
  const int doc_size = doc->size;
5304
5650
 
@@ -5543,7 +5889,7 @@ static void sm_merge_fields(SegmentMerger *sm)
5543
5889
  end = (off_t)is_read_u64(fdx_in);
5544
5890
  }
5545
5891
  for (j = 0; j < max_doc; j++) {
5546
- f_u32 tv_idx_offset = is_read_u32(fdx_in);
5892
+ u32 tv_idx_offset = is_read_u32(fdx_in);
5547
5893
  start = end;
5548
5894
  if (j == max_doc - 1) {
5549
5895
  end = is_length(fdt_in);
@@ -5594,14 +5940,9 @@ static int sm_append_postings(SegmentMerger *sm, SegmentMergeInfo **matches,
5594
5940
  doc = doc_map[doc]; /* work around deletions */
5595
5941
  }
5596
5942
  doc += base; /* convert to merged space */
5943
+ assert(doc == 0 || doc > last_doc);
5597
5944
 
5598
- #ifdef DEBUG
5599
- if (doc && doc <= last_doc) {
5600
- RAISE(STATE_ERROR, "Docs not ordered, %d < %d", doc, last_doc);
5601
- }
5602
- #endif
5603
5945
  df++;
5604
-
5605
5946
  if (0 == (df % skip_interval)) {
5606
5947
  skip_buf_add(skip_buf, last_doc);
5607
5948
  }
@@ -5627,7 +5968,7 @@ static int sm_append_postings(SegmentMerger *sm, SegmentMergeInfo **matches,
5627
5968
 
5628
5969
  static char *sm_cache_term(SegmentMerger *sm, char *term, int term_len)
5629
5970
  {
5630
- term = memcpy(sm->term_buf + sm->term_buf_ptr, term, term_len + 1);
5971
+ term = (char *)memcpy(sm->term_buf + sm->term_buf_ptr, term, term_len + 1);
5631
5972
  sm->term_buf_ptr += term_len + 1;
5632
5973
  if (sm->term_buf_ptr > sm->term_buf_size) {
5633
5974
  sm->term_buf_ptr = 0;
@@ -5688,14 +6029,14 @@ static void sm_merge_term_infos(SegmentMerger *sm)
5688
6029
  }printf("\n\n");
5689
6030
  */
5690
6031
  match_size = 0; /* pop matching terms */
5691
- matches[0] = pq_pop(sm->queue);
6032
+ matches[0] = (SegmentMergeInfo *)pq_pop(sm->queue);
5692
6033
  match_size++;
5693
6034
  term = matches[0]->term;
5694
- top = pq_top(sm->queue);
6035
+ top = (SegmentMergeInfo *)pq_top(sm->queue);
5695
6036
  while ((NULL != top) && (0 == strcmp(term, top->term))) {
5696
- matches[match_size] = pq_pop(sm->queue);
6037
+ matches[match_size] = (SegmentMergeInfo *)pq_pop(sm->queue);
5697
6038
  match_size++;
5698
- top = pq_top(sm->queue);
6039
+ top = (SegmentMergeInfo *)pq_top(sm->queue);
5699
6040
  }
5700
6041
 
5701
6042
  /* printf(">%s:%s<\n", matches[0]->tb->field, matches[0]->tb->text); */
@@ -5849,7 +6190,7 @@ int iw_doc_count(IndexWriter *iw)
5849
6190
  #define MOVE_TO_COMPOUND_DIR(file_name)\
5850
6191
  deleter_queue_file(dlr, file_name);\
5851
6192
  cw_add_file(cw, file_name)
5852
-
6193
+
5853
6194
  static void iw_create_compound_file(Store *store, FieldInfos *fis,
5854
6195
  SegmentInfo *si, char *cfs_file_name,
5855
6196
  Deleter *dlr)
@@ -6015,7 +6356,7 @@ void iw_commit(IndexWriter *iw)
6015
6356
  mutex_unlock(&iw->mutex);
6016
6357
  }
6017
6358
 
6018
- void iw_delete_term(IndexWriter *iw, const char *field, const char *term)
6359
+ void iw_delete_term(IndexWriter *iw, Symbol field, const char *term)
6019
6360
  {
6020
6361
  int field_num = fis_get_field_num(iw->fis, field);
6021
6362
  if (field_num >= 0) {
@@ -6049,6 +6390,45 @@ void iw_delete_term(IndexWriter *iw, const char *field, const char *term)
6049
6390
  }
6050
6391
  }
6051
6392
 
6393
+ void iw_delete_terms(IndexWriter *iw, Symbol field,
6394
+ char **terms, const int term_cnt)
6395
+ {
6396
+ int field_num = fis_get_field_num(iw->fis, field);
6397
+ if (field_num >= 0) {
6398
+ int i;
6399
+ mutex_lock(&iw->mutex);
6400
+ iw_commit_i(iw);
6401
+ do {
6402
+ SegmentInfos *sis = iw->sis;
6403
+ const int seg_cnt = sis->size;
6404
+ bool did_delete = false;
6405
+ for (i = 0; i < seg_cnt; i++) {
6406
+ IndexReader *ir = sr_open(sis, iw->fis, i, false);
6407
+ TermDocEnum *tde = ir->term_docs(ir);
6408
+ int j;
6409
+ for (j = 0 ; j < term_cnt; j++) {
6410
+ const char *term = terms[j];
6411
+ ir->deleter = iw->deleter;
6412
+ stde_seek(tde, field_num, term);
6413
+ while (tde->next(tde)) {
6414
+ did_delete = true;
6415
+ sr_delete_doc_i(ir, STDE(tde)->doc_num);
6416
+ }
6417
+ }
6418
+ tde_destroy(tde);
6419
+ sr_commit_i(ir);
6420
+ ir_close(ir);
6421
+ }
6422
+ if (did_delete) {
6423
+ mutex_lock(&iw->store->mutex);
6424
+ sis_write(iw->sis, iw->store, iw->deleter);
6425
+ mutex_unlock(&iw->store->mutex);
6426
+ }
6427
+ } while (0);
6428
+ mutex_unlock(&iw->mutex);
6429
+ }
6430
+ }
6431
+
6052
6432
  static void iw_optimize_i(IndexWriter *iw)
6053
6433
  {
6054
6434
  int min_segment;
@@ -6070,7 +6450,7 @@ void iw_optimize(IndexWriter *iw)
6070
6450
  mutex_lock(&iw->mutex);
6071
6451
  iw_optimize_i(iw);
6072
6452
  mutex_unlock(&iw->mutex);
6073
- }
6453
+ }
6074
6454
 
6075
6455
  void iw_close(IndexWriter *iw)
6076
6456
  {
@@ -6094,7 +6474,7 @@ void iw_close(IndexWriter *iw)
6094
6474
  free(iw);
6095
6475
  }
6096
6476
 
6097
- IndexWriter *iw_open(Store *store, volatile Analyzer *analyzer,
6477
+ IndexWriter *iw_open(Store *store, Analyzer *volatile analyzer,
6098
6478
  const Config *config)
6099
6479
  {
6100
6480
  IndexWriter *iw = ALLOC_AND_ZERO(IndexWriter);
@@ -6174,7 +6554,7 @@ static void iw_cp_fields(IndexWriter *iw, SegmentReader *sr,
6174
6554
  int i;
6175
6555
  const int max_doc = sr_max_doc(IR(sr));
6176
6556
  for (i = 0; i < max_doc; i++) {
6177
- int j;
6557
+ int j, data_len = 0;
6178
6558
  const int field_cnt = is_read_vint(fdt_in);
6179
6559
  int tv_cnt;
6180
6560
  off_t doc_start_ptr = os_pos(fdt_out);
@@ -6186,7 +6566,6 @@ static void iw_cp_fields(IndexWriter *iw, SegmentReader *sr,
6186
6566
  int k;
6187
6567
  const int field_num = map[is_read_vint(fdt_in)];
6188
6568
  const int df_size = is_read_vint(fdt_in);
6189
- int data_len = 0;
6190
6569
  os_write_vint(fdt_out, field_num);
6191
6570
  os_write_vint(fdt_out, df_size);
6192
6571
  /* sum total lengths of DocField */
@@ -6196,18 +6575,18 @@ static void iw_cp_fields(IndexWriter *iw, SegmentReader *sr,
6196
6575
  os_write_vint(fdt_out, flen);
6197
6576
  data_len += flen + 1;
6198
6577
  }
6199
- is2os_copy_bytes(fdt_in, fdt_out, data_len);
6200
6578
  }
6579
+ is2os_copy_bytes(fdt_in, fdt_out, data_len);
6201
6580
 
6202
6581
  /* Write TermVectors */
6203
6582
  /* write TVs up to TV index */
6204
6583
  is2os_copy_bytes(fdt_in, fdt_out,
6205
6584
  (int)(is_read_u64(fdx_in)
6206
- + (f_u64)is_read_u32(fdx_in)
6207
- - (f_u64)is_pos(fdt_in)));
6585
+ + (u64)is_read_u32(fdx_in)
6586
+ - (u64)is_pos(fdt_in)));
6208
6587
 
6209
6588
  /* Write TV index pos */
6210
- os_write_u32(fdx_out, (f_u32)(os_pos(fdt_out) - doc_start_ptr));
6589
+ os_write_u32(fdx_out, (u32)(os_pos(fdt_out) - doc_start_ptr));
6211
6590
  tv_cnt = is_read_vint(fdt_in);
6212
6591
  os_write_vint(fdt_out, tv_cnt);
6213
6592
  for (j = 0; j < tv_cnt; j++) {
@@ -6242,7 +6621,7 @@ static void iw_cp_terms(IndexWriter *iw, SegmentReader *sr,
6242
6621
  tix_out = store_out->new_output(store_out, file_name);
6243
6622
  sprintf(file_name, "%s.tix", sr_segment);
6244
6623
  tix_in = store_in->open_input(store_in, file_name);
6245
-
6624
+
6246
6625
  sprintf(file_name, "%s.tis", segment);
6247
6626
  tis_out = store_out->new_output(store_out, file_name);
6248
6627
  sprintf(file_name, "%s.tis", sr_segment);
@@ -6371,7 +6750,7 @@ static void iw_add_segment(IndexWriter *iw, SegmentReader *sr)
6371
6750
  FieldInfo *fi = sub_fis->fields[j];
6372
6751
  FieldInfo *new_fi = fis_get_field(fis, fi->name);
6373
6752
  if (NULL == new_fi) {
6374
- new_fi = fi_new(fi->name, 0, 0, 0);
6753
+ new_fi = fi_new(fi->name, STORE_NO, INDEX_NO, TERM_VECTOR_NO);
6375
6754
  new_fi->bits = fi->bits;
6376
6755
  fis_add_field(fis, new_fi);
6377
6756
  }