jk-ferret 0.11.8.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (228) hide show
  1. data/CHANGELOG +24 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README +90 -0
  4. data/RELEASE_CHANGES +137 -0
  5. data/RELEASE_NOTES +60 -0
  6. data/Rakefile +443 -0
  7. data/TODO +109 -0
  8. data/TUTORIAL +231 -0
  9. data/bin/ferret-browser +79 -0
  10. data/ext/BZLIB_blocksort.c +1094 -0
  11. data/ext/BZLIB_bzlib.c +1578 -0
  12. data/ext/BZLIB_compress.c +672 -0
  13. data/ext/BZLIB_crctable.c +104 -0
  14. data/ext/BZLIB_decompress.c +626 -0
  15. data/ext/BZLIB_huffman.c +205 -0
  16. data/ext/BZLIB_randtable.c +84 -0
  17. data/ext/STEMMER_api.c +66 -0
  18. data/ext/STEMMER_libstemmer.c +93 -0
  19. data/ext/STEMMER_stem_ISO_8859_1_danish.c +337 -0
  20. data/ext/STEMMER_stem_ISO_8859_1_dutch.c +624 -0
  21. data/ext/STEMMER_stem_ISO_8859_1_english.c +1117 -0
  22. data/ext/STEMMER_stem_ISO_8859_1_finnish.c +762 -0
  23. data/ext/STEMMER_stem_ISO_8859_1_french.c +1246 -0
  24. data/ext/STEMMER_stem_ISO_8859_1_german.c +503 -0
  25. data/ext/STEMMER_stem_ISO_8859_1_hungarian.c +1230 -0
  26. data/ext/STEMMER_stem_ISO_8859_1_italian.c +1065 -0
  27. data/ext/STEMMER_stem_ISO_8859_1_norwegian.c +297 -0
  28. data/ext/STEMMER_stem_ISO_8859_1_porter.c +749 -0
  29. data/ext/STEMMER_stem_ISO_8859_1_portuguese.c +1017 -0
  30. data/ext/STEMMER_stem_ISO_8859_1_spanish.c +1093 -0
  31. data/ext/STEMMER_stem_ISO_8859_1_swedish.c +307 -0
  32. data/ext/STEMMER_stem_ISO_8859_2_romanian.c +998 -0
  33. data/ext/STEMMER_stem_KOI8_R_russian.c +700 -0
  34. data/ext/STEMMER_stem_UTF_8_danish.c +339 -0
  35. data/ext/STEMMER_stem_UTF_8_dutch.c +634 -0
  36. data/ext/STEMMER_stem_UTF_8_english.c +1125 -0
  37. data/ext/STEMMER_stem_UTF_8_finnish.c +768 -0
  38. data/ext/STEMMER_stem_UTF_8_french.c +1256 -0
  39. data/ext/STEMMER_stem_UTF_8_german.c +509 -0
  40. data/ext/STEMMER_stem_UTF_8_hungarian.c +1234 -0
  41. data/ext/STEMMER_stem_UTF_8_italian.c +1073 -0
  42. data/ext/STEMMER_stem_UTF_8_norwegian.c +299 -0
  43. data/ext/STEMMER_stem_UTF_8_porter.c +755 -0
  44. data/ext/STEMMER_stem_UTF_8_portuguese.c +1023 -0
  45. data/ext/STEMMER_stem_UTF_8_romanian.c +1004 -0
  46. data/ext/STEMMER_stem_UTF_8_russian.c +694 -0
  47. data/ext/STEMMER_stem_UTF_8_spanish.c +1097 -0
  48. data/ext/STEMMER_stem_UTF_8_swedish.c +309 -0
  49. data/ext/STEMMER_stem_UTF_8_turkish.c +2205 -0
  50. data/ext/STEMMER_utilities.c +478 -0
  51. data/ext/analysis.c +1710 -0
  52. data/ext/analysis.h +266 -0
  53. data/ext/api.h +26 -0
  54. data/ext/array.c +125 -0
  55. data/ext/array.h +62 -0
  56. data/ext/bitvector.c +96 -0
  57. data/ext/bitvector.h +594 -0
  58. data/ext/bzlib.h +282 -0
  59. data/ext/bzlib_private.h +503 -0
  60. data/ext/compound_io.c +384 -0
  61. data/ext/config.h +52 -0
  62. data/ext/document.c +159 -0
  63. data/ext/document.h +63 -0
  64. data/ext/except.c +102 -0
  65. data/ext/except.h +176 -0
  66. data/ext/extconf.rb +15 -0
  67. data/ext/ferret.c +416 -0
  68. data/ext/ferret.h +94 -0
  69. data/ext/field_index.c +262 -0
  70. data/ext/field_index.h +52 -0
  71. data/ext/filter.c +157 -0
  72. data/ext/fs_store.c +493 -0
  73. data/ext/global.c +458 -0
  74. data/ext/global.h +302 -0
  75. data/ext/hash.c +524 -0
  76. data/ext/hash.h +515 -0
  77. data/ext/hashset.c +192 -0
  78. data/ext/hashset.h +215 -0
  79. data/ext/header.h +58 -0
  80. data/ext/helper.c +63 -0
  81. data/ext/helper.h +21 -0
  82. data/ext/index.c +6804 -0
  83. data/ext/index.h +935 -0
  84. data/ext/internal.h +1019 -0
  85. data/ext/lang.c +10 -0
  86. data/ext/lang.h +68 -0
  87. data/ext/libstemmer.h +79 -0
  88. data/ext/mempool.c +88 -0
  89. data/ext/mempool.h +43 -0
  90. data/ext/modules.h +190 -0
  91. data/ext/multimapper.c +351 -0
  92. data/ext/multimapper.h +60 -0
  93. data/ext/posh.c +1006 -0
  94. data/ext/posh.h +973 -0
  95. data/ext/priorityqueue.c +149 -0
  96. data/ext/priorityqueue.h +155 -0
  97. data/ext/q_boolean.c +1621 -0
  98. data/ext/q_const_score.c +162 -0
  99. data/ext/q_filtered_query.c +212 -0
  100. data/ext/q_fuzzy.c +280 -0
  101. data/ext/q_match_all.c +149 -0
  102. data/ext/q_multi_term.c +673 -0
  103. data/ext/q_parser.c +3103 -0
  104. data/ext/q_phrase.c +1206 -0
  105. data/ext/q_prefix.c +98 -0
  106. data/ext/q_range.c +682 -0
  107. data/ext/q_span.c +2390 -0
  108. data/ext/q_term.c +337 -0
  109. data/ext/q_wildcard.c +167 -0
  110. data/ext/r_analysis.c +2626 -0
  111. data/ext/r_index.c +3468 -0
  112. data/ext/r_qparser.c +635 -0
  113. data/ext/r_search.c +4490 -0
  114. data/ext/r_store.c +513 -0
  115. data/ext/r_utils.c +1131 -0
  116. data/ext/ram_store.c +476 -0
  117. data/ext/scanner.c +895 -0
  118. data/ext/scanner.h +36 -0
  119. data/ext/scanner_mb.c +6701 -0
  120. data/ext/scanner_utf8.c +4415 -0
  121. data/ext/search.c +1864 -0
  122. data/ext/search.h +953 -0
  123. data/ext/similarity.c +151 -0
  124. data/ext/similarity.h +89 -0
  125. data/ext/sort.c +786 -0
  126. data/ext/stem_ISO_8859_1_danish.h +16 -0
  127. data/ext/stem_ISO_8859_1_dutch.h +16 -0
  128. data/ext/stem_ISO_8859_1_english.h +16 -0
  129. data/ext/stem_ISO_8859_1_finnish.h +16 -0
  130. data/ext/stem_ISO_8859_1_french.h +16 -0
  131. data/ext/stem_ISO_8859_1_german.h +16 -0
  132. data/ext/stem_ISO_8859_1_hungarian.h +16 -0
  133. data/ext/stem_ISO_8859_1_italian.h +16 -0
  134. data/ext/stem_ISO_8859_1_norwegian.h +16 -0
  135. data/ext/stem_ISO_8859_1_porter.h +16 -0
  136. data/ext/stem_ISO_8859_1_portuguese.h +16 -0
  137. data/ext/stem_ISO_8859_1_spanish.h +16 -0
  138. data/ext/stem_ISO_8859_1_swedish.h +16 -0
  139. data/ext/stem_ISO_8859_2_romanian.h +16 -0
  140. data/ext/stem_KOI8_R_russian.h +16 -0
  141. data/ext/stem_UTF_8_danish.h +16 -0
  142. data/ext/stem_UTF_8_dutch.h +16 -0
  143. data/ext/stem_UTF_8_english.h +16 -0
  144. data/ext/stem_UTF_8_finnish.h +16 -0
  145. data/ext/stem_UTF_8_french.h +16 -0
  146. data/ext/stem_UTF_8_german.h +16 -0
  147. data/ext/stem_UTF_8_hungarian.h +16 -0
  148. data/ext/stem_UTF_8_italian.h +16 -0
  149. data/ext/stem_UTF_8_norwegian.h +16 -0
  150. data/ext/stem_UTF_8_porter.h +16 -0
  151. data/ext/stem_UTF_8_portuguese.h +16 -0
  152. data/ext/stem_UTF_8_romanian.h +16 -0
  153. data/ext/stem_UTF_8_russian.h +16 -0
  154. data/ext/stem_UTF_8_spanish.h +16 -0
  155. data/ext/stem_UTF_8_swedish.h +16 -0
  156. data/ext/stem_UTF_8_turkish.h +16 -0
  157. data/ext/stopwords.c +410 -0
  158. data/ext/store.c +698 -0
  159. data/ext/store.h +799 -0
  160. data/ext/symbol.c +10 -0
  161. data/ext/symbol.h +23 -0
  162. data/ext/term_vectors.c +73 -0
  163. data/ext/threading.h +31 -0
  164. data/ext/win32.h +62 -0
  165. data/lib/ferret.rb +30 -0
  166. data/lib/ferret/browser.rb +246 -0
  167. data/lib/ferret/browser/s/global.js +192 -0
  168. data/lib/ferret/browser/s/style.css +148 -0
  169. data/lib/ferret/browser/views/document/list.rhtml +49 -0
  170. data/lib/ferret/browser/views/document/show.rhtml +27 -0
  171. data/lib/ferret/browser/views/error/index.rhtml +7 -0
  172. data/lib/ferret/browser/views/help/index.rhtml +8 -0
  173. data/lib/ferret/browser/views/home/index.rhtml +29 -0
  174. data/lib/ferret/browser/views/layout.rhtml +22 -0
  175. data/lib/ferret/browser/views/term-vector/index.rhtml +4 -0
  176. data/lib/ferret/browser/views/term/index.rhtml +199 -0
  177. data/lib/ferret/browser/views/term/termdocs.rhtml +1 -0
  178. data/lib/ferret/browser/webrick.rb +14 -0
  179. data/lib/ferret/document.rb +130 -0
  180. data/lib/ferret/field_infos.rb +44 -0
  181. data/lib/ferret/field_symbol.rb +87 -0
  182. data/lib/ferret/index.rb +973 -0
  183. data/lib/ferret/number_tools.rb +157 -0
  184. data/lib/ferret/version.rb +3 -0
  185. data/setup.rb +1555 -0
  186. data/test/long_running/largefile/tc_largefile.rb +46 -0
  187. data/test/test_all.rb +5 -0
  188. data/test/test_helper.rb +29 -0
  189. data/test/test_installed.rb +1 -0
  190. data/test/threading/number_to_spoken.rb +132 -0
  191. data/test/threading/thread_safety_index_test.rb +88 -0
  192. data/test/threading/thread_safety_read_write_test.rb +73 -0
  193. data/test/threading/thread_safety_test.rb +133 -0
  194. data/test/unit/analysis/tc_analyzer.rb +550 -0
  195. data/test/unit/analysis/tc_token_stream.rb +653 -0
  196. data/test/unit/index/tc_index.rb +867 -0
  197. data/test/unit/index/tc_index_reader.rb +699 -0
  198. data/test/unit/index/tc_index_writer.rb +447 -0
  199. data/test/unit/index/th_doc.rb +332 -0
  200. data/test/unit/query_parser/tc_query_parser.rb +238 -0
  201. data/test/unit/search/tc_filter.rb +156 -0
  202. data/test/unit/search/tc_fuzzy_query.rb +147 -0
  203. data/test/unit/search/tc_index_searcher.rb +67 -0
  204. data/test/unit/search/tc_multi_searcher.rb +128 -0
  205. data/test/unit/search/tc_multiple_search_requests.rb +58 -0
  206. data/test/unit/search/tc_search_and_sort.rb +179 -0
  207. data/test/unit/search/tc_sort.rb +49 -0
  208. data/test/unit/search/tc_sort_field.rb +27 -0
  209. data/test/unit/search/tc_spans.rb +190 -0
  210. data/test/unit/search/tm_searcher.rb +436 -0
  211. data/test/unit/store/tc_fs_store.rb +115 -0
  212. data/test/unit/store/tc_ram_store.rb +35 -0
  213. data/test/unit/store/tm_store.rb +34 -0
  214. data/test/unit/store/tm_store_lock.rb +68 -0
  215. data/test/unit/tc_document.rb +81 -0
  216. data/test/unit/tc_field_symbol.rb +26 -0
  217. data/test/unit/ts_analysis.rb +2 -0
  218. data/test/unit/ts_index.rb +2 -0
  219. data/test/unit/ts_largefile.rb +4 -0
  220. data/test/unit/ts_query_parser.rb +2 -0
  221. data/test/unit/ts_search.rb +2 -0
  222. data/test/unit/ts_store.rb +2 -0
  223. data/test/unit/ts_utils.rb +2 -0
  224. data/test/unit/utils/tc_bit_vector.rb +295 -0
  225. data/test/unit/utils/tc_number_tools.rb +117 -0
  226. data/test/unit/utils/tc_priority_queue.rb +106 -0
  227. data/test/utils/content_generator.rb +226 -0
  228. metadata +319 -0
data/ext/index.h ADDED
@@ -0,0 +1,935 @@
1
+ #ifndef FRT_INDEX_H
2
+ #define FRT_INDEX_H
3
+
4
+ #ifdef __cplusplus
5
+ extern "C" {
6
+ #endif
7
+
8
+ #include "global.h"
9
+ #include "document.h"
10
+ #include "analysis.h"
11
+ #include "hash.h"
12
+ #include "hashset.h"
13
+ #include "store.h"
14
+ #include "mempool.h"
15
+ #include "similarity.h"
16
+ #include "bitvector.h"
17
+ #include "priorityqueue.h"
18
+
19
+ typedef struct FrtIndexReader FrtIndexReader;
20
+ typedef struct FrtMultiReader FrtMultiReader;
21
+ typedef struct FrtDeleter FrtDeleter;
22
+
23
+ extern bool frt_file_name_filter_is_index_file(const char *file_name, bool include_locks);
24
+
25
+ /****************************************************************************
26
+ *
27
+ * FrtConfig
28
+ *
29
+ ****************************************************************************/
30
+
31
+ typedef struct FrtConfig
32
+ {
33
+ int chunk_size;
34
+ int max_buffer_memory;
35
+ int index_interval;
36
+ int skip_interval;
37
+ int merge_factor;
38
+ int max_buffered_docs;
39
+ int max_merge_docs;
40
+ int max_field_length;
41
+ bool use_compound_file;
42
+ } FrtConfig;
43
+
44
+ extern const FrtConfig frt_default_config;
45
+
46
+ /***************************************************************************
47
+ *
48
+ * FrtCacheObject
49
+ *
50
+ ***************************************************************************/
51
+
52
+ typedef struct FrtCacheObject {
53
+ FrtHash *ref_tab1;
54
+ FrtHash *ref_tab2;
55
+ void *ref1;
56
+ void *ref2;
57
+ void *obj;
58
+ void (*destroy)(void *p);
59
+ } FrtCacheObject;
60
+
61
+ extern void frt_cache_destroy(FrtCacheObject *co);
62
+ extern FrtCacheObject *frt_co_create(FrtHash *ref_tab1,
63
+ FrtHash *ref_tab2,
64
+ void *ref1, void *ref2, frt_free_ft destroy, void *obj);
65
+ extern FrtHash *frt_co_hash_create();
66
+
67
+ /****************************************************************************
68
+ *
69
+ * FrtFieldInfo
70
+ *
71
+ ****************************************************************************/
72
+
73
+ typedef enum
74
+ {
75
+ FRT_STORE_NO = 0,
76
+ FRT_STORE_YES = 1,
77
+ FRT_STORE_COMPRESS = 2
78
+ } FrtStoreValue;
79
+
80
+ typedef enum
81
+ {
82
+ FRT_INDEX_NO = 0,
83
+ FRT_INDEX_UNTOKENIZED = 1,
84
+ FRT_INDEX_YES = 3,
85
+ FRT_INDEX_UNTOKENIZED_OMIT_NORMS = 5,
86
+ FRT_INDEX_YES_OMIT_NORMS = 7
87
+ } FrtIndexValue;
88
+
89
+ typedef enum
90
+ {
91
+ FRT_TERM_VECTOR_NO = 0,
92
+ FRT_TERM_VECTOR_YES = 1,
93
+ FRT_TERM_VECTOR_WITH_POSITIONS = 3,
94
+ FRT_TERM_VECTOR_WITH_OFFSETS = 5,
95
+ FRT_TERM_VECTOR_WITH_POSITIONS_OFFSETS = 7
96
+ } FrtTermVectorValue;
97
+
98
+ #define FRT_FI_IS_STORED_BM 0x001
99
+ #define FRT_FI_IS_COMPRESSED_BM 0x002
100
+ #define FRT_FI_IS_INDEXED_BM 0x004
101
+ #define FRT_FI_IS_TOKENIZED_BM 0x008
102
+ #define FRT_FI_OMIT_NORMS_BM 0x010
103
+ #define FRT_FI_STORE_TERM_VECTOR_BM 0x020
104
+ #define FRT_FI_STORE_POSITIONS_BM 0x040
105
+ #define FRT_FI_STORE_OFFSETS_BM 0x080
106
+
107
+ typedef struct FrtFieldInfo
108
+ {
109
+ FrtSymbol name;
110
+ float boost;
111
+ unsigned int bits;
112
+ int number;
113
+ int ref_cnt;
114
+ } FrtFieldInfo;
115
+
116
+ extern FrtFieldInfo *frt_fi_new(FrtSymbol name,
117
+ FrtStoreValue store,
118
+ FrtIndexValue index,
119
+ FrtTermVectorValue term_vector);
120
+ extern char *frt_fi_to_s(FrtFieldInfo *fi);
121
+ extern void frt_fi_deref(FrtFieldInfo *fi);
122
+
123
+ #define fi_is_stored(fi) (((fi)->bits & FRT_FI_IS_STORED_BM) != 0)
124
+ #define fi_is_compressed(fi) (((fi)->bits & FRT_FI_IS_COMPRESSED_BM) != 0)
125
+ #define fi_is_indexed(fi) (((fi)->bits & FRT_FI_IS_INDEXED_BM) != 0)
126
+ #define fi_is_tokenized(fi) (((fi)->bits & FRT_FI_IS_TOKENIZED_BM) != 0)
127
+ #define fi_omit_norms(fi) (((fi)->bits & FRT_FI_OMIT_NORMS_BM) != 0)
128
+ #define fi_store_term_vector(fi) (((fi)->bits & FRT_FI_STORE_TERM_VECTOR_BM) != 0)
129
+ #define fi_store_positions(fi) (((fi)->bits & FRT_FI_STORE_POSITIONS_BM) != 0)
130
+ #define fi_store_offsets(fi) (((fi)->bits & FRT_FI_STORE_OFFSETS_BM) != 0)
131
+ #define fi_has_norms(fi)\
132
+ (((fi)->bits & (FRT_FI_OMIT_NORMS_BM|FRT_FI_IS_INDEXED_BM)) == FRT_FI_IS_INDEXED_BM)
133
+
134
+ /****************************************************************************
135
+ *
136
+ * FrtFieldInfos
137
+ *
138
+ ****************************************************************************/
139
+
140
+ #define FIELD_INFOS_INIT_CAPA 4
141
+ /* carry changes over to dummy_fis in test/test_segments.c */
142
+ typedef struct FrtFieldInfos
143
+ {
144
+ FrtStoreValue store;
145
+ FrtIndexValue index;
146
+ FrtTermVectorValue term_vector;
147
+ int size;
148
+ int capa;
149
+ FrtFieldInfo **fields;
150
+ FrtHash *field_dict;
151
+ int ref_cnt;
152
+ } FrtFieldInfos;
153
+
154
+ FrtFieldInfos *frt_fis_new(FrtStoreValue store, FrtIndexValue index,
155
+ FrtTermVectorValue term_vector);
156
+ extern FrtFieldInfo *frt_fis_add_field(FrtFieldInfos *fis, FrtFieldInfo *fi);
157
+ extern FrtFieldInfo *frt_fis_get_field(FrtFieldInfos *fis, FrtSymbol name);
158
+ extern int frt_fis_get_field_num(FrtFieldInfos *fis, FrtSymbol name);
159
+ extern FrtFieldInfo *frt_fis_by_number(FrtFieldInfos *fis, int num);
160
+ extern FrtFieldInfo *frt_fis_get_or_add_field(FrtFieldInfos *fis,
161
+ FrtSymbol name);
162
+ extern void frt_fis_write(FrtFieldInfos *fis, FrtOutStream *os);
163
+ extern FrtFieldInfos *frt_fis_read(FrtInStream *is);
164
+ extern char *frt_fis_to_s(FrtFieldInfos *fis);
165
+ extern void frt_fis_deref(FrtFieldInfos *fis);
166
+
167
+ /****************************************************************************
168
+ *
169
+ * FrtSegmentInfo
170
+ *
171
+ ****************************************************************************/
172
+
173
+ #define FRT_SEGMENT_NAME_MAX_LENGTH 100
174
+ #define FRT_SEGMENTS_FILE_NAME "segments"
175
+
176
+ typedef struct FrtSegmentInfo
177
+ {
178
+ int ref_cnt;
179
+ char *name;
180
+ FrtStore *store;
181
+ int doc_cnt;
182
+ int del_gen;
183
+ int *norm_gens;
184
+ int norm_gens_size;
185
+ bool use_compound_file;
186
+ } FrtSegmentInfo;
187
+
188
+ extern FrtSegmentInfo *frt_si_new(char *name, int doc_cnt, FrtStore *store);
189
+ extern void frt_si_deref(FrtSegmentInfo *si);
190
+ extern bool frt_si_has_deletions(FrtSegmentInfo *si);
191
+ extern bool frt_si_uses_compound_file(FrtSegmentInfo *si);
192
+ extern bool frt_si_has_separate_norms(FrtSegmentInfo *si);
193
+ extern void frt_si_advance_norm_gen(FrtSegmentInfo *si, int field_num);
194
+
195
+ /****************************************************************************
196
+ *
197
+ * FrtSegmentInfos
198
+ *
199
+ ****************************************************************************/
200
+
201
+ typedef struct FrtSegmentInfos
202
+ {
203
+ FrtFieldInfos *fis;
204
+ frt_u64 counter;
205
+ frt_u64 version;
206
+ frt_i64 generation;
207
+ frt_i32 format;
208
+ FrtStore *store;
209
+ FrtSegmentInfo **segs;
210
+ int size;
211
+ int capa;
212
+ } FrtSegmentInfos;
213
+
214
+ extern char *frt_fn_for_generation(char *buf, char *base, char *ext, frt_i64 gen);
215
+
216
+ extern FrtSegmentInfos *frt_sis_new(FrtFieldInfos *fis);
217
+ extern FrtSegmentInfo *frt_sis_new_segment(FrtSegmentInfos *sis, int dcnt, FrtStore *store);
218
+ extern FrtSegmentInfo *frt_sis_add_si(FrtSegmentInfos *sis, FrtSegmentInfo *si);
219
+ extern void frt_sis_del_at(FrtSegmentInfos *sis, int at);
220
+ extern void frt_sis_del_from_to(FrtSegmentInfos *sis, int from, int to);
221
+ extern void frt_sis_clear(FrtSegmentInfos *sis);
222
+ extern FrtSegmentInfos *frt_sis_read(FrtStore *store);
223
+ extern void frt_sis_write(FrtSegmentInfos *sis, FrtStore *store, FrtDeleter *deleter);
224
+ extern frt_u64 frt_sis_read_current_version(FrtStore *store);
225
+ extern void frt_sis_destroy(FrtSegmentInfos *sis);
226
+ extern frt_i64 frt_sis_current_segment_generation(FrtStore *store);
227
+ extern char *frt_sis_curr_seg_file_name(char *buf, FrtStore *store);
228
+ extern void frt_sis_put(FrtSegmentInfos *sis, FILE *stream);
229
+
230
+ /****************************************************************************
231
+ *
232
+ * FrtTermInfo
233
+ *
234
+ ****************************************************************************/
235
+
236
+ typedef struct FrtTermInfo
237
+ {
238
+ int doc_freq;
239
+ off_t frq_ptr;
240
+ off_t prx_ptr;
241
+ off_t skip_offset;
242
+ } FrtTermInfo;
243
+
244
+ #define frt_ti_set(ti, mdf, mfp, mpp, mso) do {\
245
+ (ti).doc_freq = mdf;\
246
+ (ti).frq_ptr = mfp;\
247
+ (ti).prx_ptr = mpp;\
248
+ (ti).skip_offset = mso;\
249
+ } while (0)
250
+
251
+ /****************************************************************************
252
+ *
253
+ * FrtTermEnum
254
+ *
255
+ ****************************************************************************/
256
+
257
+ typedef struct FrtTermEnum FrtTermEnum;
258
+
259
+ struct FrtTermEnum
260
+ {
261
+ char curr_term[FRT_MAX_WORD_SIZE];
262
+ char prev_term[FRT_MAX_WORD_SIZE];
263
+ FrtTermInfo curr_ti;
264
+ int curr_term_len;
265
+ int field_num;
266
+ FrtTermEnum *(*set_field)(FrtTermEnum *te, int field_num);
267
+ char *(*next)(FrtTermEnum *te);
268
+ char *(*skip_to)(FrtTermEnum *te, const char *term);
269
+ void (*close)(FrtTermEnum *te);
270
+ FrtTermEnum *(*clone)(FrtTermEnum *te);
271
+ };
272
+
273
+ char *frt_te_get_term(struct FrtTermEnum *te);
274
+ FrtTermInfo *frt_te_get_ti(struct FrtTermEnum *te);
275
+
276
+ /****************************************************************************
277
+ *
278
+ * FrtSegmentTermEnum
279
+ *
280
+ ****************************************************************************/
281
+
282
+ /* * FrtSegmentTermIndex * */
283
+
284
+ typedef struct FrtSegmentTermIndex
285
+ {
286
+ off_t index_ptr;
287
+ off_t ptr;
288
+ int index_cnt;
289
+ int size;
290
+ char **index_terms;
291
+ int *index_term_lens;
292
+ FrtTermInfo *index_term_infos;
293
+ off_t *index_ptrs;
294
+ } FrtSegmentTermIndex;
295
+
296
+ /* * FrtSegmentFieldIndex * */
297
+
298
+ typedef struct FrtSegmentTermEnum FrtSegmentTermEnum;
299
+
300
+ typedef struct FrtSegmentFieldIndex
301
+ {
302
+ frt_mutex_t mutex;
303
+ int skip_interval;
304
+ int index_interval;
305
+ off_t index_ptr;
306
+ FrtTermEnum *index_te;
307
+ FrtHash *field_dict;
308
+ } FrtSegmentFieldIndex;
309
+
310
+ extern FrtSegmentFieldIndex *frt_sfi_open(FrtStore *store, const char *segment);
311
+ extern void frt_sfi_close(FrtSegmentFieldIndex *sfi);
312
+
313
+
314
+ /* * FrtSegmentTermEnum * */
315
+ struct FrtSegmentTermEnum
316
+ {
317
+ FrtTermEnum te;
318
+ FrtInStream *is;
319
+ int size;
320
+ int pos;
321
+ int skip_interval;
322
+ FrtSegmentFieldIndex *sfi;
323
+ };
324
+
325
+ extern void frt_ste_close(FrtTermEnum *te);
326
+ extern FrtTermEnum *frt_ste_clone(FrtTermEnum *te);
327
+ extern FrtTermEnum *frt_ste_new(FrtInStream *is, FrtSegmentFieldIndex *sfi);
328
+
329
+ /* * MultiTermEnum * */
330
+
331
+ extern FrtTermEnum *frt_mte_new(FrtMultiReader *mr, int field_num, const char *term);
332
+
333
+ /****************************************************************************
334
+ *
335
+ * FrtTermInfosReader
336
+ *
337
+ ****************************************************************************/
338
+
339
+ #define FRT_TE_BUCKET_INIT_CAPA 1
340
+
341
+ typedef struct FrtTermInfosReader
342
+ {
343
+ frt_thread_key_t thread_te;
344
+ void **te_bucket;
345
+ FrtTermEnum *orig_te;
346
+ int field_num;
347
+ } FrtTermInfosReader;
348
+
349
+ extern FrtTermInfosReader *frt_tir_open(FrtStore *store,
350
+ FrtSegmentFieldIndex *sfi,
351
+ const char *segment);
352
+ extern FrtTermInfosReader *frt_tir_set_field(FrtTermInfosReader *tir, int field_num);
353
+ extern FrtTermInfo *frt_tir_get_ti(FrtTermInfosReader *tir, const char *term);
354
+ extern char *frt_tir_get_term(FrtTermInfosReader *tir, int pos);
355
+ extern void frt_tir_close(FrtTermInfosReader *tir);
356
+
357
+ /****************************************************************************
358
+ *
359
+ * FrtTermInfosWriter
360
+ *
361
+ ****************************************************************************/
362
+
363
+ #define FRT_INDEX_INTERVAL 128
364
+ #define FRT_SKIP_INTERVAL 16
365
+
366
+ typedef struct FrtTermWriter
367
+ {
368
+ int counter;
369
+ const char *last_term;
370
+ FrtTermInfo last_term_info;
371
+ FrtOutStream *os;
372
+ } FrtTermWriter;
373
+
374
+ typedef struct FrtTermInfosWriter
375
+ {
376
+ int field_count;
377
+ int index_interval;
378
+ int skip_interval;
379
+ off_t last_index_ptr;
380
+ FrtOutStream *tfx_out;
381
+ FrtTermWriter *tix_writer;
382
+ FrtTermWriter *tis_writer;
383
+ } FrtTermInfosWriter;
384
+
385
+ extern FrtTermInfosWriter *frt_tiw_open(FrtStore *store,
386
+ const char *segment,
387
+ int index_interval,
388
+ int skip_interval);
389
+ extern void frt_tiw_start_field(FrtTermInfosWriter *tiw, int field_num);
390
+ extern void frt_tiw_add(FrtTermInfosWriter *tiw,
391
+ const char *term,
392
+ int t_len,
393
+ FrtTermInfo *ti);
394
+ extern void frt_tiw_close(FrtTermInfosWriter *tiw);
395
+
396
+ /****************************************************************************
397
+ *
398
+ * FrtTermDocEnum
399
+ *
400
+ ****************************************************************************/
401
+
402
+ typedef struct FrtTermDocEnum FrtTermDocEnum;
403
+ struct FrtTermDocEnum
404
+ {
405
+ void (*seek)(FrtTermDocEnum *tde, int field_num, const char *term);
406
+ void (*seek_te)(FrtTermDocEnum *tde, FrtTermEnum *te);
407
+ void (*seek_ti)(FrtTermDocEnum *tde, FrtTermInfo *ti);
408
+ int (*doc_num)(FrtTermDocEnum *tde);
409
+ int (*freq)(FrtTermDocEnum *tde);
410
+ bool (*next)(FrtTermDocEnum *tde);
411
+ int (*read)(FrtTermDocEnum *tde, int *docs, int *freqs, int req_num);
412
+ bool (*skip_to)(FrtTermDocEnum *tde, int target);
413
+ int (*next_position)(FrtTermDocEnum *tde);
414
+ void (*close)(FrtTermDocEnum *tde);
415
+ };
416
+
417
+ /* * FrtSegmentTermDocEnum * */
418
+
419
+ typedef struct FrtSegmentTermDocEnum FrtSegmentTermDocEnum;
420
+ struct FrtSegmentTermDocEnum
421
+ {
422
+ FrtTermDocEnum tde;
423
+ void (*seek_prox)(FrtSegmentTermDocEnum *stde, off_t prx_ptr);
424
+ void (*skip_prox)(FrtSegmentTermDocEnum *stde);
425
+ FrtTermInfosReader *tir;
426
+ FrtInStream *frq_in;
427
+ FrtInStream *prx_in;
428
+ FrtInStream *skip_in;
429
+ FrtBitVector *deleted_docs;
430
+ int count; /* number of docs for this term skipped */
431
+ int doc_freq; /* number of doc this term appears in */
432
+ int doc_num;
433
+ int freq;
434
+ int num_skips;
435
+ int skip_interval;
436
+ int skip_count;
437
+ int skip_doc;
438
+ int prx_cnt;
439
+ int position;
440
+ off_t frq_ptr;
441
+ off_t prx_ptr;
442
+ off_t skip_ptr;
443
+ bool have_skipped : 1;
444
+ };
445
+
446
+ extern FrtTermDocEnum *frt_stde_new(FrtTermInfosReader *tir, FrtInStream *frq_in,
447
+ FrtBitVector *deleted_docs, int skip_interval);
448
+
449
+ /* * FrtSegmentTermDocEnum * */
450
+ extern FrtTermDocEnum *frt_stpe_new(FrtTermInfosReader *tir, FrtInStream *frq_in,
451
+ FrtInStream *prx_in, FrtBitVector *deleted_docs,
452
+ int skip_interval);
453
+
454
+ /****************************************************************************
455
+ * MultipleTermDocPosEnum
456
+ ****************************************************************************/
457
+
458
+ extern FrtTermDocEnum *frt_mtdpe_new(FrtIndexReader *ir, int field_num, char **terms,
459
+ int t_cnt);
460
+
461
+ /****************************************************************************
462
+ *
463
+ * FrtOffset
464
+ *
465
+ ****************************************************************************/
466
+
467
+ typedef struct FrtOffset
468
+ {
469
+ off_t start;
470
+ off_t end;
471
+ } FrtOffset;
472
+
473
+ extern FrtOffset *frt_offset_new(off_t start, off_t end);
474
+
475
+ /****************************************************************************
476
+ *
477
+ * FrtOccurence
478
+ *
479
+ ****************************************************************************/
480
+
481
+ typedef struct FrtOccurence
482
+ {
483
+ struct FrtOccurence *next;
484
+ int pos;
485
+ } FrtOccurence;
486
+
487
+ /****************************************************************************
488
+ *
489
+ * FrtPosting
490
+ *
491
+ ****************************************************************************/
492
+
493
+ typedef struct FrtPosting
494
+ {
495
+ int freq;
496
+ int doc_num;
497
+ FrtOccurence *first_occ;
498
+ struct FrtPosting *next;
499
+ } FrtPosting;
500
+
501
+ extern FrtPosting *frt_p_new(FrtMemoryPool *mp, int doc_num, int pos);
502
+
503
+ /****************************************************************************
504
+ *
505
+ * FrtPostingList
506
+ *
507
+ ****************************************************************************/
508
+
509
+ typedef struct FrtPostingList
510
+ {
511
+ const char *term;
512
+ int term_len;
513
+ FrtPosting *first;
514
+ FrtPosting *last;
515
+ FrtOccurence *last_occ;
516
+ } FrtPostingList;
517
+
518
+ extern FrtPostingList *frt_pl_new(FrtMemoryPool *mp, const char *term,
519
+ int term_len, FrtPosting *p);
520
+ extern void frt_pl_add_occ(FrtMemoryPool *mp, FrtPostingList *pl, int pos);
521
+ extern int frt_pl_cmp(const FrtPostingList **pl1, const FrtPostingList **pl2);
522
+
523
+ /****************************************************************************
524
+ *
525
+ * FrtTVField
526
+ *
527
+ ****************************************************************************/
528
+
529
+ typedef struct FrtTVField
530
+ {
531
+ int field_num;
532
+ int size;
533
+ } FrtTVField;
534
+
535
+ /****************************************************************************
536
+ *
537
+ * FrtTVTerm
538
+ *
539
+ ****************************************************************************/
540
+
541
+ typedef struct FrtTVTerm
542
+ {
543
+ char *text;
544
+ int freq;
545
+ int *positions;
546
+ } FrtTVTerm;
547
+
548
+ /****************************************************************************
549
+ *
550
+ * FrtTermVector
551
+ *
552
+ ****************************************************************************/
553
+
554
+ #define FRT_TV_FIELD_INIT_CAPA 8
555
+ typedef struct FrtTermVector
556
+ {
557
+ int field_num;
558
+ FrtSymbol field;
559
+ int term_cnt;
560
+ FrtTVTerm *terms;
561
+ int offset_cnt;
562
+ FrtOffset *offsets;
563
+ } FrtTermVector;
564
+
565
+ extern void frt_tv_destroy(FrtTermVector *tv);
566
+ extern int frt_tv_get_term_index(FrtTermVector *tv, const char *term);
567
+ extern int frt_tv_scan_to_term_index(FrtTermVector *tv, const char *term);
568
+ extern FrtTVTerm *frt_tv_get_tv_term(FrtTermVector *tv, const char *term);
569
+
570
+ /****************************************************************************
571
+ *
572
+ * FrtLazyDoc
573
+ *
574
+ ****************************************************************************/
575
+
576
+ /* * * FrtLazyDocField * * */
577
+ typedef struct FrtLazyDocFieldData
578
+ {
579
+ off_t start;
580
+ int length;
581
+ char *text;
582
+ } FrtLazyDocFieldData;
583
+
584
+ typedef struct FrtLazyDoc FrtLazyDoc;
585
+ typedef struct FrtLazyDocField
586
+ {
587
+ FrtSymbol name;
588
+ FrtLazyDocFieldData *data;
589
+ FrtLazyDoc *doc;
590
+ int size; /* number of data elements */
591
+ int len; /* length of data elements concatenated */
592
+ bool is_compressed : 2; /* set to 2 after all data is loaded */
593
+ } FrtLazyDocField;
594
+
595
+ extern char *frt_lazy_df_get_data(FrtLazyDocField *self, int i);
596
+ extern void frt_lazy_df_get_bytes(FrtLazyDocField *self, char *buf,
597
+ int start, int len);
598
+
599
+ /* * * FrtLazyDoc * * */
600
+ struct FrtLazyDoc
601
+ {
602
+ FrtHash *field_dictionary;
603
+ int size;
604
+ FrtLazyDocField **fields;
605
+ FrtInStream *fields_in;
606
+ };
607
+
608
+ extern void frt_lazy_doc_close(FrtLazyDoc *self);
609
+ extern FrtLazyDocField *frt_lazy_doc_get(FrtLazyDoc *self, FrtSymbol field);
610
+
611
+ /****************************************************************************
612
+ *
613
+ * FrtFieldsReader
614
+ *
615
+ ****************************************************************************/
616
+
617
+ typedef struct FrtFieldsReader
618
+ {
619
+ int size;
620
+ FrtFieldInfos *fis;
621
+ FrtStore *store;
622
+ FrtInStream *fdx_in;
623
+ FrtInStream *fdt_in;
624
+ } FrtFieldsReader;
625
+
626
+ extern FrtFieldsReader *frt_fr_open(FrtStore *store,
627
+ const char *segment, FrtFieldInfos *fis);
628
+ extern FrtFieldsReader *frt_fr_clone(FrtFieldsReader *orig);
629
+ extern void frt_fr_close(FrtFieldsReader *fr);
630
+ extern FrtDocument *frt_fr_get_doc(FrtFieldsReader *fr, int doc_num);
631
+ extern FrtLazyDoc *frt_fr_get_lazy_doc(FrtFieldsReader *fr, int doc_num);
632
+ extern FrtHash *frt_fr_get_tv(FrtFieldsReader *fr, int doc_num);
633
+ extern FrtTermVector *frt_fr_get_field_tv(FrtFieldsReader *fr, int doc_num,
634
+ int field_num);
635
+
636
+ /****************************************************************************
637
+ *
638
+ * FrtFieldsWriter
639
+ *
640
+ ****************************************************************************/
641
+
642
+ typedef struct FrtFieldsWriter
643
+ {
644
+ FrtFieldInfos *fis;
645
+ FrtOutStream *fdt_out;
646
+ FrtOutStream *fdx_out;
647
+ FrtOutStream *buffer;
648
+ FrtTVField *tv_fields;
649
+ off_t start_ptr;
650
+ } FrtFieldsWriter;
651
+
652
+ extern FrtFieldsWriter *frt_fw_open(FrtStore *store,
653
+ const char *segment, FrtFieldInfos *fis);
654
+ extern void frt_fw_close(FrtFieldsWriter *fw);
655
+ extern void frt_fw_add_doc(FrtFieldsWriter *fw, FrtDocument *doc);
656
+ extern void frt_fw_add_postings(FrtFieldsWriter *fw,
657
+ int field_num,
658
+ FrtPostingList **plists,
659
+ int posting_count,
660
+ FrtOffset *offsets,
661
+ int offset_count);
662
+ extern void frt_fw_write_tv_index(FrtFieldsWriter *fw);
663
+
664
+ /****************************************************************************
665
+ *
666
+ * FrtDeleter
667
+ *
668
+ * A utility class (used by both FrtIndexReader and FrtIndexWriter) to keep track of
669
+ * files that need to be deleted because they are no longer referenced by the
670
+ * index.
671
+ *
672
+ ****************************************************************************/
673
+
674
+ struct FrtDeleter
675
+ {
676
+ FrtStore *store;
677
+ FrtSegmentInfos *sis;
678
+ FrtHashSet *pending;
679
+ };
680
+
681
+ extern FrtDeleter *frt_deleter_new(FrtSegmentInfos *sis, FrtStore *store);
682
+ extern void frt_deleter_destroy(FrtDeleter *dlr);
683
+ extern void frt_deleter_clear_pending_files(FrtDeleter *dlr);
684
+ extern void frt_deleter_delete_file(FrtDeleter *dlr, char *file_name);
685
+ extern void frt_deleter_find_deletable_files(FrtDeleter *dlr);
686
+ extern void frt_deleter_commit_pending_files(FrtDeleter *dlr);
687
+ extern void frt_deleter_delete_files(FrtDeleter *dlr, char **files, int file_cnt);
688
+
689
+ /****************************************************************************
690
+ *
691
+ * FrtIndexReader
692
+ *
693
+ ****************************************************************************/
694
+
695
+ #define FRT_WRITE_LOCK_NAME "write"
696
+ #define FRT_COMMIT_LOCK_NAME "commit"
697
+
698
+ struct FrtIndexReader
699
+ {
700
+ int (*num_docs)(FrtIndexReader *ir);
701
+ int (*max_doc)(FrtIndexReader *ir);
702
+ FrtDocument *(*get_doc)(FrtIndexReader *ir, int doc_num);
703
+ FrtLazyDoc *(*get_lazy_doc)(FrtIndexReader *ir, int doc_num);
704
+ frt_uchar *(*get_norms)(FrtIndexReader *ir, int field_num);
705
+ frt_uchar *(*get_norms_into)(FrtIndexReader *ir, int field_num,
706
+ frt_uchar *buf);
707
+ FrtTermEnum *(*terms)(FrtIndexReader *ir, int field_num);
708
+ FrtTermEnum *(*terms_from)(FrtIndexReader *ir, int field_num,
709
+ const char *term);
710
+ int (*doc_freq)(FrtIndexReader *ir, int field_num,
711
+ const char *term);
712
+ FrtTermDocEnum *(*term_docs)(FrtIndexReader *ir);
713
+ FrtTermDocEnum *(*term_positions)(FrtIndexReader *ir);
714
+ FrtTermVector *(*term_vector)(FrtIndexReader *ir, int doc_num,
715
+ FrtSymbol field);
716
+ FrtHash *(*term_vectors)(FrtIndexReader *ir, int doc_num);
717
+ bool (*is_deleted)(FrtIndexReader *ir, int doc_num);
718
+ bool (*has_deletions)(FrtIndexReader *ir);
719
+ void (*acquire_write_lock)(FrtIndexReader *ir);
720
+ void (*set_norm_i)(FrtIndexReader *ir, int doc_num,
721
+ int field_num, frt_uchar val);
722
+ void (*delete_doc_i)(FrtIndexReader *ir, int doc_num);
723
+ void (*undelete_all_i)(FrtIndexReader *ir);
724
+ void (*set_deleter_i)(FrtIndexReader *ir, FrtDeleter *dlr);
725
+ bool (*is_latest_i)(FrtIndexReader *ir);
726
+ void (*commit_i)(FrtIndexReader *ir);
727
+ void (*close_i)(FrtIndexReader *ir);
728
+ int ref_cnt;
729
+ FrtDeleter *deleter;
730
+ FrtStore *store;
731
+ FrtLock *write_lock;
732
+ FrtSegmentInfos *sis;
733
+ FrtFieldInfos *fis;
734
+ FrtHash *cache;
735
+ FrtHash *field_index_cache;
736
+ frt_mutex_t field_index_mutex;
737
+ frt_uchar *fake_norms;
738
+ frt_mutex_t mutex;
739
+ bool has_changes : 1;
740
+ bool is_stale : 1;
741
+ bool is_owner : 1;
742
+ };
743
+
744
+ extern FrtIndexReader *frt_ir_create(FrtStore *store, FrtSegmentInfos *sis, int is_owner);
745
+ extern FrtIndexReader *frt_ir_open(FrtStore *store);
746
+ extern int frt_ir_get_field_num(FrtIndexReader *ir, FrtSymbol field);
747
+ extern bool frt_ir_index_exists(FrtStore *store);
748
+ extern void frt_ir_close(FrtIndexReader *ir);
749
+ extern void frt_ir_commit(FrtIndexReader *ir);
750
+ extern void frt_ir_delete_doc(FrtIndexReader *ir, int doc_num);
751
+ extern void frt_ir_undelete_all(FrtIndexReader *ir);
752
+ extern int frt_ir_doc_freq(FrtIndexReader *ir, FrtSymbol field, const char *term);
753
+ extern void frt_ir_set_norm(FrtIndexReader *ir, int doc_num, FrtSymbol field,
754
+ frt_uchar val);
755
+ extern frt_uchar *frt_ir_get_norms_i(FrtIndexReader *ir, int field_num);
756
+ extern frt_uchar *frt_ir_get_norms(FrtIndexReader *ir, FrtSymbol field);
757
+ extern frt_uchar *frt_ir_get_norms_into(FrtIndexReader *ir, FrtSymbol field, frt_uchar *buf);
758
+ extern void frt_ir_destroy(FrtIndexReader *self);
759
+ extern FrtDocument *frt_ir_get_doc_with_term(FrtIndexReader *ir, FrtSymbol field,
760
+ const char *term);
761
+ extern FrtTermEnum *frt_ir_terms(FrtIndexReader *ir, FrtSymbol field);
762
+ extern FrtTermEnum *frt_ir_terms_from(FrtIndexReader *ir, FrtSymbol field,
763
+ const char *t);
764
+ extern FrtTermDocEnum *frt_ir_term_docs_for(FrtIndexReader *ir, FrtSymbol field,
765
+ const char *term);
766
+ extern FrtTermDocEnum *frt_ir_term_positions_for(FrtIndexReader *ir,
767
+ FrtSymbol field,
768
+ const char *t);
769
+ extern void frt_ir_add_cache(FrtIndexReader *ir);
770
+ extern bool frt_ir_is_latest(FrtIndexReader *ir);
771
+
772
+ /****************************************************************************
773
+ * FrtMultiReader
774
+ ****************************************************************************/
775
+
776
+ struct FrtMultiReader {
777
+ FrtIndexReader ir;
778
+ int max_doc;
779
+ int num_docs_cache;
780
+ int r_cnt;
781
+ int *starts;
782
+ FrtIndexReader **sub_readers;
783
+ FrtHash *norms_cache;
784
+ bool has_deletions : 1;
785
+ int **field_num_map;
786
+ };
787
+
788
+ extern int frt_mr_get_field_num(FrtMultiReader *mr, int ir_num, int f_num);
789
+ extern FrtIndexReader *frt_mr_open(FrtIndexReader **sub_readers, const int r_cnt);
790
+
791
+
792
+ /****************************************************************************
793
+ *
794
+ * FrtBoost
795
+ *
796
+ ****************************************************************************/
797
+
798
+ typedef struct FrtBoost
799
+ {
800
+ float val;
801
+ int doc_num;
802
+ struct FrtBoost *next;
803
+ } FrtBoost;
804
+
805
+ /****************************************************************************
806
+ *
807
+ * FrtFieldInverter
808
+ *
809
+ ****************************************************************************/
810
+
811
+ typedef struct FrtFieldInverter
812
+ {
813
+ FrtHash *plists;
814
+ frt_uchar *norms;
815
+ FrtFieldInfo *fi;
816
+ int length;
817
+ bool is_tokenized : 1;
818
+ bool store_term_vector : 1;
819
+ bool store_offsets : 1;
820
+ bool has_norms : 1;
821
+ } FrtFieldInverter;
822
+
823
+ /****************************************************************************
824
+ *
825
+ * FrtDocWriter
826
+ *
827
+ ****************************************************************************/
828
+
829
+ #define DW_OFFSET_INIT_CAPA 512
830
+ typedef struct FrtIndexWriter FrtIndexWriter;
831
+
832
+ typedef struct FrtDocWriter
833
+ {
834
+ FrtStore *store;
835
+ FrtSegmentInfo *si;
836
+ FrtFieldInfos *fis;
837
+ FrtFieldsWriter *fw;
838
+ FrtMemoryPool *mp;
839
+ FrtAnalyzer *analyzer;
840
+ FrtHash *curr_plists;
841
+ FrtHash *fields;
842
+ FrtSimilarity *similarity;
843
+ FrtOffset *offsets;
844
+ int offsets_size;
845
+ int offsets_capa;
846
+ int doc_num;
847
+ int index_interval;
848
+ int skip_interval;
849
+ int max_field_length;
850
+ int max_buffered_docs;
851
+ } FrtDocWriter;
852
+
853
+ extern FrtDocWriter *frt_dw_open(FrtIndexWriter *is, FrtSegmentInfo *si);
854
+ extern void frt_dw_close(FrtDocWriter *dw);
855
+ extern void frt_dw_add_doc(FrtDocWriter *dw, FrtDocument *doc);
856
+ extern void frt_dw_new_segment(FrtDocWriter *dw, FrtSegmentInfo *si);
857
+ /* For testing. need to remove somehow. FIXME */
858
+ extern FrtHash *frt_dw_invert_field(FrtDocWriter *dw,
859
+ FrtFieldInverter *fld_inv,
860
+ FrtDocField *df);
861
+ extern FrtFieldInverter *frt_dw_get_fld_inv(FrtDocWriter *dw, FrtFieldInfo *fi);
862
+ extern void frt_dw_reset_postings(FrtHash *postings);
863
+
864
+ /****************************************************************************
865
+ *
866
+ * FrtIndexWriter
867
+ *
868
+ ****************************************************************************/
869
+
870
+ typedef struct FrtDelTerm
871
+ {
872
+ int field_num;
873
+ char *term;
874
+ } FrtDelTerm;
875
+
876
+ struct FrtIndexWriter
877
+ {
878
+ FrtConfig config;
879
+ frt_mutex_t mutex;
880
+ FrtStore *store;
881
+ FrtAnalyzer *analyzer;
882
+ FrtSegmentInfos *sis;
883
+ FrtFieldInfos *fis;
884
+ FrtDocWriter *dw;
885
+ FrtSimilarity *similarity;
886
+ FrtLock *write_lock;
887
+ FrtDeleter *deleter;
888
+ };
889
+
890
+ extern void frt_index_create(FrtStore *store, FrtFieldInfos *fis);
891
+ extern bool frt_index_is_locked(FrtStore *store);
892
+ extern FrtIndexWriter *frt_iw_open(FrtStore *store, FrtAnalyzer *analyzer,
893
+ const FrtConfig *config);
894
+ extern void frt_iw_delete_term(FrtIndexWriter *iw, FrtSymbol field,
895
+ const char *term);
896
+ extern void frt_iw_delete_terms(FrtIndexWriter *iw, FrtSymbol field,
897
+ char **terms, const int term_cnt);
898
+ extern void frt_iw_close(FrtIndexWriter *iw);
899
+ extern void frt_iw_add_doc(FrtIndexWriter *iw, FrtDocument *doc);
900
+ extern int frt_iw_doc_count(FrtIndexWriter *iw);
901
+ extern void frt_iw_commit(FrtIndexWriter *iw);
902
+ extern void frt_iw_optimize(FrtIndexWriter *iw);
903
+ extern void frt_iw_add_readers(FrtIndexWriter *iw, FrtIndexReader **readers,
904
+ const int r_cnt);
905
+
906
+ /****************************************************************************
907
+ *
908
+ * FrtCompoundWriter
909
+ *
910
+ ****************************************************************************/
911
+
912
+ #define FRT_CW_INIT_CAPA 16
913
+ typedef struct FrtCWFileEntry
914
+ {
915
+ char *name;
916
+ off_t dir_offset;
917
+ off_t data_offset;
918
+ } FrtCWFileEntry;
919
+
920
+ typedef struct FrtCompoundWriter {
921
+ FrtStore *store;
922
+ const char *name;
923
+ FrtHashSet *ids;
924
+ FrtCWFileEntry *file_entries;
925
+ } FrtCompoundWriter;
926
+
927
+ extern FrtCompoundWriter *frt_open_cw(FrtStore *store, char *name);
928
+ extern void frt_cw_add_file(FrtCompoundWriter *cw, char *id);
929
+ extern void frt_cw_close(FrtCompoundWriter *cw);
930
+
931
+ #ifdef __cplusplus
932
+ } // extern "C"
933
+ #endif
934
+
935
+ #endif