jk-ferret 0.11.8.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (228) hide show
  1. data/CHANGELOG +24 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README +90 -0
  4. data/RELEASE_CHANGES +137 -0
  5. data/RELEASE_NOTES +60 -0
  6. data/Rakefile +443 -0
  7. data/TODO +109 -0
  8. data/TUTORIAL +231 -0
  9. data/bin/ferret-browser +79 -0
  10. data/ext/BZLIB_blocksort.c +1094 -0
  11. data/ext/BZLIB_bzlib.c +1578 -0
  12. data/ext/BZLIB_compress.c +672 -0
  13. data/ext/BZLIB_crctable.c +104 -0
  14. data/ext/BZLIB_decompress.c +626 -0
  15. data/ext/BZLIB_huffman.c +205 -0
  16. data/ext/BZLIB_randtable.c +84 -0
  17. data/ext/STEMMER_api.c +66 -0
  18. data/ext/STEMMER_libstemmer.c +93 -0
  19. data/ext/STEMMER_stem_ISO_8859_1_danish.c +337 -0
  20. data/ext/STEMMER_stem_ISO_8859_1_dutch.c +624 -0
  21. data/ext/STEMMER_stem_ISO_8859_1_english.c +1117 -0
  22. data/ext/STEMMER_stem_ISO_8859_1_finnish.c +762 -0
  23. data/ext/STEMMER_stem_ISO_8859_1_french.c +1246 -0
  24. data/ext/STEMMER_stem_ISO_8859_1_german.c +503 -0
  25. data/ext/STEMMER_stem_ISO_8859_1_hungarian.c +1230 -0
  26. data/ext/STEMMER_stem_ISO_8859_1_italian.c +1065 -0
  27. data/ext/STEMMER_stem_ISO_8859_1_norwegian.c +297 -0
  28. data/ext/STEMMER_stem_ISO_8859_1_porter.c +749 -0
  29. data/ext/STEMMER_stem_ISO_8859_1_portuguese.c +1017 -0
  30. data/ext/STEMMER_stem_ISO_8859_1_spanish.c +1093 -0
  31. data/ext/STEMMER_stem_ISO_8859_1_swedish.c +307 -0
  32. data/ext/STEMMER_stem_ISO_8859_2_romanian.c +998 -0
  33. data/ext/STEMMER_stem_KOI8_R_russian.c +700 -0
  34. data/ext/STEMMER_stem_UTF_8_danish.c +339 -0
  35. data/ext/STEMMER_stem_UTF_8_dutch.c +634 -0
  36. data/ext/STEMMER_stem_UTF_8_english.c +1125 -0
  37. data/ext/STEMMER_stem_UTF_8_finnish.c +768 -0
  38. data/ext/STEMMER_stem_UTF_8_french.c +1256 -0
  39. data/ext/STEMMER_stem_UTF_8_german.c +509 -0
  40. data/ext/STEMMER_stem_UTF_8_hungarian.c +1234 -0
  41. data/ext/STEMMER_stem_UTF_8_italian.c +1073 -0
  42. data/ext/STEMMER_stem_UTF_8_norwegian.c +299 -0
  43. data/ext/STEMMER_stem_UTF_8_porter.c +755 -0
  44. data/ext/STEMMER_stem_UTF_8_portuguese.c +1023 -0
  45. data/ext/STEMMER_stem_UTF_8_romanian.c +1004 -0
  46. data/ext/STEMMER_stem_UTF_8_russian.c +694 -0
  47. data/ext/STEMMER_stem_UTF_8_spanish.c +1097 -0
  48. data/ext/STEMMER_stem_UTF_8_swedish.c +309 -0
  49. data/ext/STEMMER_stem_UTF_8_turkish.c +2205 -0
  50. data/ext/STEMMER_utilities.c +478 -0
  51. data/ext/analysis.c +1710 -0
  52. data/ext/analysis.h +266 -0
  53. data/ext/api.h +26 -0
  54. data/ext/array.c +125 -0
  55. data/ext/array.h +62 -0
  56. data/ext/bitvector.c +96 -0
  57. data/ext/bitvector.h +594 -0
  58. data/ext/bzlib.h +282 -0
  59. data/ext/bzlib_private.h +503 -0
  60. data/ext/compound_io.c +384 -0
  61. data/ext/config.h +52 -0
  62. data/ext/document.c +159 -0
  63. data/ext/document.h +63 -0
  64. data/ext/except.c +102 -0
  65. data/ext/except.h +176 -0
  66. data/ext/extconf.rb +15 -0
  67. data/ext/ferret.c +416 -0
  68. data/ext/ferret.h +94 -0
  69. data/ext/field_index.c +262 -0
  70. data/ext/field_index.h +52 -0
  71. data/ext/filter.c +157 -0
  72. data/ext/fs_store.c +493 -0
  73. data/ext/global.c +458 -0
  74. data/ext/global.h +302 -0
  75. data/ext/hash.c +524 -0
  76. data/ext/hash.h +515 -0
  77. data/ext/hashset.c +192 -0
  78. data/ext/hashset.h +215 -0
  79. data/ext/header.h +58 -0
  80. data/ext/helper.c +63 -0
  81. data/ext/helper.h +21 -0
  82. data/ext/index.c +6804 -0
  83. data/ext/index.h +935 -0
  84. data/ext/internal.h +1019 -0
  85. data/ext/lang.c +10 -0
  86. data/ext/lang.h +68 -0
  87. data/ext/libstemmer.h +79 -0
  88. data/ext/mempool.c +88 -0
  89. data/ext/mempool.h +43 -0
  90. data/ext/modules.h +190 -0
  91. data/ext/multimapper.c +351 -0
  92. data/ext/multimapper.h +60 -0
  93. data/ext/posh.c +1006 -0
  94. data/ext/posh.h +973 -0
  95. data/ext/priorityqueue.c +149 -0
  96. data/ext/priorityqueue.h +155 -0
  97. data/ext/q_boolean.c +1621 -0
  98. data/ext/q_const_score.c +162 -0
  99. data/ext/q_filtered_query.c +212 -0
  100. data/ext/q_fuzzy.c +280 -0
  101. data/ext/q_match_all.c +149 -0
  102. data/ext/q_multi_term.c +673 -0
  103. data/ext/q_parser.c +3103 -0
  104. data/ext/q_phrase.c +1206 -0
  105. data/ext/q_prefix.c +98 -0
  106. data/ext/q_range.c +682 -0
  107. data/ext/q_span.c +2390 -0
  108. data/ext/q_term.c +337 -0
  109. data/ext/q_wildcard.c +167 -0
  110. data/ext/r_analysis.c +2626 -0
  111. data/ext/r_index.c +3468 -0
  112. data/ext/r_qparser.c +635 -0
  113. data/ext/r_search.c +4490 -0
  114. data/ext/r_store.c +513 -0
  115. data/ext/r_utils.c +1131 -0
  116. data/ext/ram_store.c +476 -0
  117. data/ext/scanner.c +895 -0
  118. data/ext/scanner.h +36 -0
  119. data/ext/scanner_mb.c +6701 -0
  120. data/ext/scanner_utf8.c +4415 -0
  121. data/ext/search.c +1864 -0
  122. data/ext/search.h +953 -0
  123. data/ext/similarity.c +151 -0
  124. data/ext/similarity.h +89 -0
  125. data/ext/sort.c +786 -0
  126. data/ext/stem_ISO_8859_1_danish.h +16 -0
  127. data/ext/stem_ISO_8859_1_dutch.h +16 -0
  128. data/ext/stem_ISO_8859_1_english.h +16 -0
  129. data/ext/stem_ISO_8859_1_finnish.h +16 -0
  130. data/ext/stem_ISO_8859_1_french.h +16 -0
  131. data/ext/stem_ISO_8859_1_german.h +16 -0
  132. data/ext/stem_ISO_8859_1_hungarian.h +16 -0
  133. data/ext/stem_ISO_8859_1_italian.h +16 -0
  134. data/ext/stem_ISO_8859_1_norwegian.h +16 -0
  135. data/ext/stem_ISO_8859_1_porter.h +16 -0
  136. data/ext/stem_ISO_8859_1_portuguese.h +16 -0
  137. data/ext/stem_ISO_8859_1_spanish.h +16 -0
  138. data/ext/stem_ISO_8859_1_swedish.h +16 -0
  139. data/ext/stem_ISO_8859_2_romanian.h +16 -0
  140. data/ext/stem_KOI8_R_russian.h +16 -0
  141. data/ext/stem_UTF_8_danish.h +16 -0
  142. data/ext/stem_UTF_8_dutch.h +16 -0
  143. data/ext/stem_UTF_8_english.h +16 -0
  144. data/ext/stem_UTF_8_finnish.h +16 -0
  145. data/ext/stem_UTF_8_french.h +16 -0
  146. data/ext/stem_UTF_8_german.h +16 -0
  147. data/ext/stem_UTF_8_hungarian.h +16 -0
  148. data/ext/stem_UTF_8_italian.h +16 -0
  149. data/ext/stem_UTF_8_norwegian.h +16 -0
  150. data/ext/stem_UTF_8_porter.h +16 -0
  151. data/ext/stem_UTF_8_portuguese.h +16 -0
  152. data/ext/stem_UTF_8_romanian.h +16 -0
  153. data/ext/stem_UTF_8_russian.h +16 -0
  154. data/ext/stem_UTF_8_spanish.h +16 -0
  155. data/ext/stem_UTF_8_swedish.h +16 -0
  156. data/ext/stem_UTF_8_turkish.h +16 -0
  157. data/ext/stopwords.c +410 -0
  158. data/ext/store.c +698 -0
  159. data/ext/store.h +799 -0
  160. data/ext/symbol.c +10 -0
  161. data/ext/symbol.h +23 -0
  162. data/ext/term_vectors.c +73 -0
  163. data/ext/threading.h +31 -0
  164. data/ext/win32.h +62 -0
  165. data/lib/ferret.rb +30 -0
  166. data/lib/ferret/browser.rb +246 -0
  167. data/lib/ferret/browser/s/global.js +192 -0
  168. data/lib/ferret/browser/s/style.css +148 -0
  169. data/lib/ferret/browser/views/document/list.rhtml +49 -0
  170. data/lib/ferret/browser/views/document/show.rhtml +27 -0
  171. data/lib/ferret/browser/views/error/index.rhtml +7 -0
  172. data/lib/ferret/browser/views/help/index.rhtml +8 -0
  173. data/lib/ferret/browser/views/home/index.rhtml +29 -0
  174. data/lib/ferret/browser/views/layout.rhtml +22 -0
  175. data/lib/ferret/browser/views/term-vector/index.rhtml +4 -0
  176. data/lib/ferret/browser/views/term/index.rhtml +199 -0
  177. data/lib/ferret/browser/views/term/termdocs.rhtml +1 -0
  178. data/lib/ferret/browser/webrick.rb +14 -0
  179. data/lib/ferret/document.rb +130 -0
  180. data/lib/ferret/field_infos.rb +44 -0
  181. data/lib/ferret/field_symbol.rb +87 -0
  182. data/lib/ferret/index.rb +973 -0
  183. data/lib/ferret/number_tools.rb +157 -0
  184. data/lib/ferret/version.rb +3 -0
  185. data/setup.rb +1555 -0
  186. data/test/long_running/largefile/tc_largefile.rb +46 -0
  187. data/test/test_all.rb +5 -0
  188. data/test/test_helper.rb +29 -0
  189. data/test/test_installed.rb +1 -0
  190. data/test/threading/number_to_spoken.rb +132 -0
  191. data/test/threading/thread_safety_index_test.rb +88 -0
  192. data/test/threading/thread_safety_read_write_test.rb +73 -0
  193. data/test/threading/thread_safety_test.rb +133 -0
  194. data/test/unit/analysis/tc_analyzer.rb +550 -0
  195. data/test/unit/analysis/tc_token_stream.rb +653 -0
  196. data/test/unit/index/tc_index.rb +867 -0
  197. data/test/unit/index/tc_index_reader.rb +699 -0
  198. data/test/unit/index/tc_index_writer.rb +447 -0
  199. data/test/unit/index/th_doc.rb +332 -0
  200. data/test/unit/query_parser/tc_query_parser.rb +238 -0
  201. data/test/unit/search/tc_filter.rb +156 -0
  202. data/test/unit/search/tc_fuzzy_query.rb +147 -0
  203. data/test/unit/search/tc_index_searcher.rb +67 -0
  204. data/test/unit/search/tc_multi_searcher.rb +128 -0
  205. data/test/unit/search/tc_multiple_search_requests.rb +58 -0
  206. data/test/unit/search/tc_search_and_sort.rb +179 -0
  207. data/test/unit/search/tc_sort.rb +49 -0
  208. data/test/unit/search/tc_sort_field.rb +27 -0
  209. data/test/unit/search/tc_spans.rb +190 -0
  210. data/test/unit/search/tm_searcher.rb +436 -0
  211. data/test/unit/store/tc_fs_store.rb +115 -0
  212. data/test/unit/store/tc_ram_store.rb +35 -0
  213. data/test/unit/store/tm_store.rb +34 -0
  214. data/test/unit/store/tm_store_lock.rb +68 -0
  215. data/test/unit/tc_document.rb +81 -0
  216. data/test/unit/tc_field_symbol.rb +26 -0
  217. data/test/unit/ts_analysis.rb +2 -0
  218. data/test/unit/ts_index.rb +2 -0
  219. data/test/unit/ts_largefile.rb +4 -0
  220. data/test/unit/ts_query_parser.rb +2 -0
  221. data/test/unit/ts_search.rb +2 -0
  222. data/test/unit/ts_store.rb +2 -0
  223. data/test/unit/ts_utils.rb +2 -0
  224. data/test/unit/utils/tc_bit_vector.rb +295 -0
  225. data/test/unit/utils/tc_number_tools.rb +117 -0
  226. data/test/unit/utils/tc_priority_queue.rb +106 -0
  227. data/test/utils/content_generator.rb +226 -0
  228. metadata +319 -0
data/ext/store.h ADDED
@@ -0,0 +1,799 @@
1
+ #ifndef FRT_STORE_H
2
+ #define FRT_STORE_H
3
+
4
+ #include <sys/types.h>
5
+ #include "global.h"
6
+ #include "hash.h"
7
+ #include "hashset.h"
8
+ #include "threading.h"
9
+
10
+ #define FRT_LOCK_PREFIX "ferret-"
11
+ #define FRT_LOCK_EXT ".lck"
12
+
13
+ typedef struct FrtBuffer
14
+ {
15
+ frt_uchar buf[FRT_BUFFER_SIZE];
16
+ off_t start;
17
+ off_t pos;
18
+ off_t len;
19
+ } FrtBuffer;
20
+
21
+ typedef struct FrtOutStream FrtOutStream;
22
+ struct FrtOutStreamMethods {
23
+ /* internal functions for the FrtInStream */
24
+ /**
25
+ * Flush +len+ characters from +src+ to the output stream +os+
26
+ *
27
+ * @param os self
28
+ * @param src the characters to write to the output stream
29
+ * @param len the number of characters to write
30
+ * @raise FRT_IO_ERROR if there is an error writing the characters
31
+ */
32
+ void (*flush_i)(struct FrtOutStream *os, const frt_uchar *buf, int len);
33
+
34
+ /**
35
+ * Seek +pos+ in the output stream
36
+ *
37
+ * @param os self
38
+ * @param pos the position to seek in the stream
39
+ * @raise FRT_IO_ERROR if there is an error seeking in the output stream
40
+ */
41
+ void (*seek_i)(struct FrtOutStream *os, off_t pos);
42
+
43
+ /**
44
+ * Close any resources used by the output stream +os+
45
+ *
46
+ * @param os self
47
+ * @raise FRT_IO_ERROR if there is an closing the file
48
+ */
49
+ void (*close_i)(struct FrtOutStream *os);
50
+ };
51
+
52
+ typedef struct FrtRAMFile
53
+ {
54
+ char *name;
55
+ frt_uchar **buffers;
56
+ int bufcnt;
57
+ off_t len;
58
+ int ref_cnt;
59
+ } FrtRAMFile;
60
+
61
+ struct FrtOutStream
62
+ {
63
+ FrtBuffer buf;
64
+ union
65
+ {
66
+ int fd;
67
+ FrtRAMFile *rf;
68
+ } file;
69
+ off_t pointer; /* only used by RAMOut */
70
+ const struct FrtOutStreamMethods *m;
71
+ };
72
+
73
+ typedef struct FrtCompoundInStream FrtCompoundInStream;
74
+
75
+ typedef struct FrtInStream FrtInStream;
76
+
77
+ struct FrtInStreamMethods
78
+ {
79
+ /**
80
+ * Read +len+ characters from the input stream into the +offset+ position in
81
+ * +buf+, an array of unsigned characters.
82
+ *
83
+ * @param is self
84
+ * @param buf an array of characters which must be allocated with at least
85
+ * +offset+ + +len+ bytes
86
+ * @param len the number of bytes to read
87
+ * @raise FRT_IO_ERROR if there is an error reading from the input stream
88
+ */
89
+ void (*read_i)(struct FrtInStream *is, frt_uchar *buf, int len);
90
+
91
+ /**
92
+ * Seek position +pos+ in input stream +is+
93
+ *
94
+ * @param is self
95
+ * @param pos the position to seek
96
+ * @raise FRT_IO_ERROR if the seek fails
97
+ */
98
+ void (*seek_i)(struct FrtInStream *is, off_t pos);
99
+
100
+ /**
101
+ * Returns the length of the input stream +is+
102
+ *
103
+ * @param is self
104
+ * @raise FRT_IO_ERROR if there is an error getting the file length
105
+ */
106
+ off_t (*length_i)(struct FrtInStream *is);
107
+
108
+ /**
109
+ * Close the resources allocated to the inputstream +is+
110
+ *
111
+ * @param is self
112
+ * @raise FRT_IO_ERROR if the close fails
113
+ */
114
+ void (*close_i)(struct FrtInStream *is);
115
+ };
116
+
117
+ struct FrtInStream
118
+ {
119
+ FrtBuffer buf;
120
+ union
121
+ {
122
+ int fd;
123
+ FrtRAMFile *rf;
124
+ } file;
125
+ union
126
+ {
127
+ off_t pointer; /* only used by RAMIn */
128
+ char *path; /* only used by FSIn */
129
+ FrtCompoundInStream *cis;
130
+ } d;
131
+ int *ref_cnt_ptr;
132
+ const struct FrtInStreamMethods *m;
133
+ };
134
+
135
+ struct FrtCompoundInStream
136
+ {
137
+ FrtInStream *sub;
138
+ off_t offset;
139
+ off_t length;
140
+ };
141
+
142
+ #define is_length(mis) mis->m->length_i(mis)
143
+
144
+ typedef struct FrtStore FrtStore;
145
+ typedef struct FrtLock FrtLock;
146
+ struct FrtLock
147
+ {
148
+ char *name;
149
+ FrtStore *store;
150
+ int (*obtain)(FrtLock *lock);
151
+ int (*is_locked)(FrtLock *lock);
152
+ void (*release)(FrtLock *lock);
153
+ };
154
+
155
+ typedef struct FrtCompoundStore
156
+ {
157
+ FrtStore *store;
158
+ const char *name;
159
+ FrtHash *entries;
160
+ FrtInStream *stream;
161
+ } FrtCompoundStore;
162
+
163
+ struct FrtStore
164
+ {
165
+ int ref_cnt; /* for fs_store only */
166
+ frt_mutex_t mutex_i; /* for internal use only */
167
+ frt_mutex_t mutex; /* external mutex for use outside */
168
+ union
169
+ {
170
+ char *path; /* for fs_store only */
171
+ FrtHash *ht; /* for ram_store only */
172
+ FrtCompoundStore *cmpd; /* for compound_store only */
173
+ } dir;
174
+
175
+ #ifdef POSH_OS_WIN32
176
+ int file_mode;
177
+ #else
178
+ mode_t file_mode;
179
+ #endif
180
+ FrtHashSet *locks;
181
+
182
+ /**
183
+ * Create the file +filename+ in the +store+.
184
+ *
185
+ * @param store self
186
+ * @param filename the name of the file to create
187
+ * @raise FRT_IO_ERROR if the file cannot be created
188
+ */
189
+ void (*touch)(FrtStore *store, const char *filename);
190
+
191
+ /**
192
+ * Return true if a file of name +filename+ exists in +store+.
193
+ *
194
+ * @param store self
195
+ * @param filename the name of the file to check for
196
+ * @returns true if the file exists
197
+ * @raise FRT_IO_ERROR if there is an error checking for the files existance
198
+ */
199
+ int (*exists)(FrtStore *store, const char *filename);
200
+
201
+ /**
202
+ * Remove the file +filename+ from the +store+
203
+ *
204
+ * @param store self
205
+ * @param filename the name of the file to remove
206
+ * @returns On success, zero is returned. On error, -1 is returned, and errno
207
+ * is set appropriately.
208
+ */
209
+ int (*remove)(FrtStore *store, const char *filename);
210
+
211
+ /**
212
+ * Rename the file in the +store+ from the name +from+ to the name +to+.
213
+ *
214
+ * @param store self
215
+ * @param from the name of the file to rename
216
+ * @param to the new name of the file
217
+ * @raise FRT_IO_ERROR if there is an error renaming the file
218
+ */
219
+ void (*rename)(FrtStore *store, const char *from, const char *to);
220
+
221
+ /**
222
+ * Returns the number of files in the store.
223
+ *
224
+ * @param store self
225
+ * @return the number of files in the store
226
+ * @raise FRT_IO_ERROR if there is an error opening the directory
227
+ */
228
+ int (*count)(FrtStore *store);
229
+
230
+ /**
231
+ * Call the function +func+ with each filename in the store and the arg
232
+ * that you passed. If you need to open the file you should pass the store
233
+ * as the argument. If you need to pass more than one argument, you should
234
+ * pass a struct.
235
+ *
236
+ * @param store self
237
+ * @param func the function to call with each files name and the +arg+
238
+ * passed
239
+ * @param arg the argument to pass to the function
240
+ * @raise FRT_IO_ERROR if there is an error opening the directory
241
+ */
242
+ void (*each)(FrtStore *store, void (*func)(const char *fname, void *arg),
243
+ void *arg);
244
+
245
+ /**
246
+ * Clear all the locks in the store.
247
+ *
248
+ * @param store self
249
+ * @raise FRT_IO_ERROR if there is an error opening the directory
250
+ */
251
+ void (*clear_locks)(FrtStore *store);
252
+
253
+ /**
254
+ * Clear all files from the store except the lock files.
255
+ *
256
+ * @param store self
257
+ * @raise FRT_IO_ERROR if there is an error deleting the files
258
+ */
259
+ void (*clear)(FrtStore *store);
260
+
261
+ /**
262
+ * Clear all files from the store including the lock files.
263
+ *
264
+ * @param store self
265
+ * @raise FRT_IO_ERROR if there is an error deleting the files
266
+ */
267
+ void (*clear_all)(FrtStore *store);
268
+
269
+ /**
270
+ * Return the length of the file +filename+ in +store+
271
+ *
272
+ * @param store self
273
+ * @param the name of the file to check the length of
274
+ * @return the length of the file in bytes
275
+ * @raise FRT_IO_ERROR if there is an error checking the file length
276
+ */
277
+ off_t (*length)(FrtStore *store, const char *filename);
278
+
279
+ /**
280
+ * Allocate the resources needed for the output stream in the +store+ with
281
+ * the name +filename+
282
+ *
283
+ * @param store self
284
+ * @param filename the name of the output stream
285
+ * @return a newly allocated filestream
286
+ * @raise FRT_IO_ERROR if there is an error opening the output stream
287
+ * resources
288
+ */
289
+ FrtOutStream *(*new_output)(FrtStore *store, const char *filename);
290
+
291
+ /**
292
+ * Open an input stream in the +store+ with the name +filename+
293
+ *
294
+ * @param store self
295
+ * @param filename the name of the input stream
296
+ * @raise FRT_FILE_NOT_FOUND_ERROR if the input stream cannot be opened
297
+ */
298
+ FrtInStream *(*open_input)(FrtStore *store, const char *filename);
299
+
300
+ /**
301
+ * Obtain a lock on the lock +lock+
302
+ *
303
+ * @param store self
304
+ * @param lock the lock to obtain
305
+ */
306
+ FrtLock *(*open_lock_i)(FrtStore *store, const char *lockname);
307
+
308
+ /**
309
+ * Returns true if +lock+ is locked. To test if the file is locked:wq
310
+ *
311
+ * @param lock the lock to test
312
+ * @raise FRT_IO_ERROR if there is an error detecting the lock status
313
+ */
314
+ void (*close_lock_i)(FrtLock *lock);
315
+
316
+ /**
317
+ * Internal function to close the store freeing implementation specific
318
+ * resources.
319
+ *
320
+ * @param store self
321
+ */
322
+ void (*close_i)(FrtStore *store);
323
+ };
324
+
325
+ /**
326
+ * Create a newly allocated file-system FrtStore at the pathname designated. The
327
+ * pathname must be the name of an existing directory.
328
+ *
329
+ * @param pathname the pathname of the directory to be used by the index
330
+ * @return a newly allocated file-system FrtStore.
331
+ */
332
+ extern FrtStore *frt_open_fs_store(const char *pathname);
333
+
334
+ /**
335
+ * Create a newly allocated in-memory or RAM FrtStore.
336
+ *
337
+ * @return a newly allocated RAM FrtStore.
338
+ */
339
+ extern FrtStore *frt_open_ram_store();
340
+
341
+ /**
342
+ * Create a newly allocated in-memory or RAM FrtStore. Copy the contents of
343
+ * another store into this store. Then close the other store if required. This
344
+ * method would be used for example to read an index into memory for faster
345
+ * searching.
346
+ *
347
+ * @param store the whose contents will be copied into the newly allocated RAM
348
+ * store
349
+ * @param close_store close the store whose contents where copied
350
+ * @return a newly allocated RAM FrtStore.
351
+ */
352
+ extern FrtStore *frt_open_ram_store_and_copy(FrtStore *store, bool close_store);
353
+
354
+ /**
355
+ * Open a compound store. This is basically store which is stored within a
356
+ * single file and can in turn be stored within either a FileSystem or RAM
357
+ * store.
358
+ *
359
+ * @param store the store within which this compound store will be stored
360
+ * @param filename the name of the file in which to store the compound store
361
+ * @return a newly allocated Compound FrtStore.
362
+ */
363
+ extern FrtStore *frt_open_cmpd_store(FrtStore *store, const char *filename);
364
+
365
+ /*
366
+ * == RamStore functions ==
367
+ *
368
+ * These functions or optimizations to be used when you know you are using a
369
+ * Ram FrtOutStream.
370
+ */
371
+
372
+ /**
373
+ * Return the length of the FrtOutStream in bytes.
374
+ *
375
+ * @param os the FrtOutStream who's length you want
376
+ * @return the length of +os+ in bytes
377
+ */
378
+ extern off_t frt_ramo_length(FrtOutStream *os);
379
+
380
+ /**
381
+ * Reset the FrtOutStream removing any data written to it. Since it is a RAM
382
+ * file, all that needs to be done is set the length to 0.
383
+ *
384
+ * @param os the FrtOutStream to reset
385
+ */
386
+ extern void frt_ramo_reset(FrtOutStream *os);
387
+
388
+ /**
389
+ * Write the contents of a RAM FrtOutStream to another FrtOutStream.
390
+ *
391
+ * @param from_os the FrtOutStream to write from
392
+ * @param to_os the FrtOutStream to write to
393
+ */
394
+ extern void frt_ramo_write_to(FrtOutStream *from_os, FrtOutStream *to_os);
395
+
396
+ /**
397
+ * Create a buffer RAM FrtOutStream which is unassociated with any RAM FrtStore.
398
+ * This FrtOutStream can be used to write temporary data too. When the time
399
+ * comes, this data can be written to another FrtOutStream (which might possibly
400
+ * be a file-system FrtOutStream) using frt_ramo_write_to.
401
+ *
402
+ * @return A newly allocated RAM FrtOutStream
403
+ */
404
+ extern FrtOutStream *frt_ram_new_buffer();
405
+
406
+ /**
407
+ * Destroy a RAM FrtOutStream which is unassociated with any RAM FrtStore, freeing
408
+ * all resources allocated to it.
409
+ *
410
+ * @param os the FrtOutStream to destroy
411
+ */
412
+ extern void frt_ram_destroy_buffer(FrtOutStream *os);
413
+
414
+ /**
415
+ * Call the function +func+ with the +lock+ locked. The argument +arg+ will be
416
+ * passed to +func+. If you need to pass more than one argument you should use
417
+ * a struct. When the function is finished, release the lock.
418
+ *
419
+ * @param lock lock to be locked while func is called
420
+ * @param func function to call with the lock locked
421
+ * @param arg argument to pass to the function
422
+ * @raise FRT_IO_ERROR if the lock is already locked
423
+ * @see frt_with_lock_name
424
+ */
425
+ extern void frt_with_lock(FrtLock *lock, void (*func)(void *arg), void *arg);
426
+
427
+ /**
428
+ * Create a lock in the +store+ with the name +lock_name+. Call the function
429
+ * +func+ with the lock locked. The argument +arg+ will be passed to +func+.
430
+ * If you need to pass more than one argument you should use a struct. When
431
+ * the function is finished, release and destroy the lock.
432
+ *
433
+ * @param store store to open the lock in
434
+ * @param lock_name name of the lock to open
435
+ * @param func function to call with the lock locked
436
+ * @param arg argument to pass to the function
437
+ * @raise FRT_IO_ERROR if the lock is already locked
438
+ * @see frt_with_lock
439
+ */
440
+ extern void frt_with_lock_name(FrtStore *store, const char *lock_name,
441
+ void (*func)(void *arg), void *arg);
442
+
443
+ /**
444
+ * Remove a reference to the store. If the reference count gets to zero free
445
+ * all resources used by the store.
446
+ *
447
+ * @param store the store to be dereferenced
448
+ */
449
+ extern void frt_store_deref(FrtStore *store);
450
+
451
+ /**
452
+ * Flush the buffered contents of the FrtOutStream to the store.
453
+ *
454
+ * @param os the FrtOutStream to flush
455
+ */
456
+ extern void frt_os_flush(FrtOutStream *os);
457
+
458
+ /**
459
+ * Close the FrtOutStream after flushing the buffers, also freeing all allocated
460
+ * resources.
461
+ *
462
+ * @param os the FrtOutStream to close
463
+ */
464
+ extern void frt_os_close(FrtOutStream *os);
465
+
466
+ /**
467
+ * Return the current position of FrtOutStream +os+.
468
+ *
469
+ * @param os the FrtOutStream to get the position from
470
+ * @return the current position in FrtOutStream +os+
471
+ */
472
+ extern off_t frt_os_pos(FrtOutStream *os);
473
+
474
+ /**
475
+ * Set the current position in FrtOutStream +os+.
476
+ *
477
+ * @param os the FrtOutStream to set the position in
478
+ * @param pos the new position in the FrtOutStream
479
+ * @raise FRT_IO_ERROR if there is a file-system IO error seeking the file
480
+ */
481
+ extern void frt_os_seek(FrtOutStream *os, off_t new_pos);
482
+
483
+ /**
484
+ * Write a single byte +b+ to the FrtOutStream +os+
485
+ *
486
+ * @param os the FrtOutStream to write to @param b the byte to write @raise
487
+ * FRT_IO_ERROR if there is an IO error writing to the file-system
488
+ */
489
+ extern void frt_os_write_byte(FrtOutStream *os, frt_uchar b);
490
+ /**
491
+ * Write +len+ bytes from buffer +buf+ to the FrtOutStream +os+.
492
+ *
493
+ * @param os the FrtOutStream to write to
494
+ * @param len the number of bytes to write
495
+ * @param buf the buffer from which to get the bytes to write.
496
+ * @raise FRT_IO_ERROR if there is an IO error writing to the file-system
497
+ */
498
+ extern void frt_os_write_bytes(FrtOutStream *os, const frt_uchar *buf, int len);
499
+
500
+ /**
501
+ * Write a 32-bit signed integer to the FrtOutStream
502
+ *
503
+ * @param os FrtOutStream to write to
504
+ * @param num the 32-bit signed integer to write
505
+ * @raise FRT_IO_ERROR if there is an error writing to the file-system
506
+ */
507
+ extern void frt_os_write_i32(FrtOutStream *os, frt_i32 num);
508
+
509
+ /**
510
+ * Write a 64-bit signed integer to the FrtOutStream
511
+ *
512
+ *
513
+ * @param os FrtOutStream to write to
514
+ * @param num the 64-bit signed integer to write
515
+ * @raise FRT_IO_ERROR if there is an error writing to the file-system
516
+ */
517
+ extern void frt_os_write_i64(FrtOutStream *os, frt_i64 num);
518
+
519
+ /**
520
+ * Write a 32-bit unsigned integer to the FrtOutStream
521
+ *
522
+ * @param os FrtOutStream to write to
523
+ * @param num the 32-bit unsigned integer to write
524
+ * @raise FRT_IO_ERROR if there is an error writing to the file-system
525
+ */
526
+ extern void frt_os_write_u32(FrtOutStream *os, frt_u32 num);
527
+
528
+ /**
529
+ * Write a 64-bit unsigned integer to the FrtOutStream
530
+ *
531
+ * @param os FrtOutStream to write to
532
+ * @param num the 64-bit unsigned integer to write
533
+ * @raise FRT_IO_ERROR if there is an error writing to the file-system
534
+ */
535
+ extern void frt_os_write_u64(FrtOutStream *os, frt_u64 num);
536
+
537
+ /**
538
+ * Write an unsigned integer to FrtOutStream in compressed VINT format.
539
+ * TODO: describe VINT format
540
+ *
541
+ * @param os FrtOutStream to write to
542
+ * @param num the integer to write
543
+ * @raise FRT_IO_ERROR if there is an error writing to the file-system
544
+ */
545
+ extern void frt_os_write_vint(FrtOutStream *os, register unsigned int num);
546
+
547
+ /**
548
+ * Write an unsigned off_t to FrtOutStream in compressed VINT format.
549
+ * TODO: describe VINT format
550
+ *
551
+ * @param os FrtOutStream to write to
552
+ * @param num the off_t to write
553
+ * @raise FRT_IO_ERROR if there is an error writing to the file-system
554
+ */
555
+ extern void frt_os_write_voff_t(FrtOutStream *os, register off_t num);
556
+
557
+ /**
558
+ * Write an unsigned 64bit int to FrtOutStream in compressed VINT format.
559
+ * TODO: describe VINT format
560
+ *
561
+ * @param os FrtOutStream to write to
562
+ * @param num the 64bit int to write
563
+ * @raise FRT_IO_ERROR if there is an error writing to the file-system
564
+ */
565
+ extern void frt_os_write_vll(FrtOutStream *os, register frt_u64 num);
566
+
567
+ /**
568
+ * Write a string with known length to the FrtOutStream. A string is an
569
+ * integer +length+ in VINT format (see frt_os_write_vint) followed by
570
+ * +length+ bytes. The string can then be read using frt_is_read_string.
571
+ *
572
+ * @param os FrtOutStream to write to
573
+ * @param str the string to write
574
+ * @param len the length of the string to write
575
+ * @raise FRT_IO_ERROR if there is an error writing to the file-system
576
+ */
577
+ extern FRT_INLINE void frt_os_write_string_len(FrtOutStream *os,
578
+ const char *str,
579
+ int len);
580
+
581
+ /**
582
+ * Write a string to the FrtOutStream. A string is an integer +length+ in VINT
583
+ * format (see frt_os_write_vint) followed by +length+ bytes. The string can then
584
+ * be read using frt_is_read_string.
585
+ *
586
+ * @param os FrtOutStream to write to
587
+ * @param str the string to write
588
+ * @raise FRT_IO_ERROR if there is an error writing to the file-system
589
+ */
590
+ extern void frt_os_write_string(FrtOutStream *os, const char *str);
591
+
592
+ /**
593
+ * Get the current position within an FrtInStream.
594
+ *
595
+ * @param is the FrtInStream to get the current position from
596
+ * @return the current position within the FrtInStream +is+
597
+ */
598
+ extern off_t frt_is_pos(FrtInStream *is);
599
+
600
+ /**
601
+ * Set the current position in FrtInStream +is+ to +pos+.
602
+ *
603
+ * @param is the FrtInStream to set the current position in
604
+ * @param pos the position in FrtInStream to seek
605
+ * @raise FRT_IO_ERROR if there is a error seeking from the file-system
606
+ * @raise FRT_EOF_ERROR if there is an attempt to seek past the end of the file
607
+ */
608
+ extern void frt_is_seek(FrtInStream *is, off_t pos);
609
+
610
+ /**
611
+ * Close the FrtInStream freeing all allocated resources.
612
+ *
613
+ * @param is the FrtInStream to close
614
+ * @raise FRT_IO_ERROR if there is an error closing the associated file
615
+ */
616
+ extern void frt_is_close(FrtInStream *is);
617
+
618
+ /**
619
+ * Clone the FrtInStream allocating a new FrtInStream structure
620
+ *
621
+ * @param is the FrtInStream to clone
622
+ * @return a newly allocated FrtInStream which is a clone of +is+
623
+ */
624
+ extern FrtInStream *frt_is_clone(FrtInStream *is);
625
+
626
+ /**
627
+ * Read a singly byte (unsigned char) from the FrtInStream +is+.
628
+ *
629
+ * @param is the Instream to read from
630
+ * @return a single unsigned char read from the FrtInStream +is+
631
+ * @raise FRT_IO_ERROR if there is a error reading from the file-system
632
+ * @raise FRT_EOF_ERROR if there is an attempt to read past the end of the file
633
+ */
634
+ extern FRT_INLINE frt_uchar frt_is_read_byte(FrtInStream *is);
635
+
636
+ /**
637
+ * Read +len+ bytes from FrtInStream +is+ and write them to buffer +buf+
638
+ *
639
+ * @param is the FrtInStream to read from
640
+ * @param buf the buffer to read into, that is copy the bytes read to
641
+ * @param len the number of bytes to read
642
+ * @return the resultant buffer +buf+
643
+ * @raise FRT_IO_ERROR if there is a error reading from the file-system
644
+ * @raise FRT_EOF_ERROR if there is an attempt to read past the end of the file
645
+ */
646
+ extern frt_uchar *frt_is_read_bytes(FrtInStream *is, frt_uchar *buf, int len);
647
+
648
+ /**
649
+ * Read a 32-bit unsigned integer from the FrtInStream.
650
+ *
651
+ * @param is the FrtInStream to read from
652
+ * @return a 32-bit unsigned integer
653
+ * @raise FRT_IO_ERROR if there is a error reading from the file-system
654
+ * @raise FRT_EOF_ERROR if there is an attempt to read past the end of the file
655
+ */
656
+ extern frt_i32 frt_is_read_i32(FrtInStream *is);
657
+
658
+ /**
659
+ * Read a 64-bit unsigned integer from the FrtInStream.
660
+ *
661
+ * @param is the FrtInStream to read from
662
+ * @return a 64-bit unsigned integer
663
+ * @raise FRT_IO_ERROR if there is a error reading from the file-system
664
+ * @raise FRT_EOF_ERROR if there is an attempt to read past the end of the file
665
+ */
666
+ extern frt_i64 frt_is_read_i64(FrtInStream *is);
667
+
668
+ /**
669
+ * Read a 32-bit signed integer from the FrtInStream.
670
+ *
671
+ * @param is the FrtInStream to read from
672
+ * @return a 32-bit signed integer
673
+ * @raise FRT_IO_ERROR if there is a error reading from the file-system
674
+ * @raise FRT_EOF_ERROR if there is an attempt to read past the end of the file
675
+ */
676
+ extern frt_u32 frt_is_read_u32(FrtInStream *is);
677
+
678
+ /**
679
+ * Read a 64-bit signed integer from the FrtInStream.
680
+ *
681
+ * @param is the FrtInStream to read from
682
+ * @return a 64-bit signed integer
683
+ * @raise FRT_IO_ERROR if there is a error reading from the file-system
684
+ * @raise FRT_EOF_ERROR if there is an attempt to read past the end of the file
685
+ */
686
+ extern frt_u64 frt_is_read_u64(FrtInStream *is);
687
+
688
+ /**
689
+ * Read a compressed (VINT) unsigned integer from the FrtInStream.
690
+ * TODO: describe VINT format
691
+ *
692
+ * @param is the FrtInStream to read from
693
+ * @return an int
694
+ * @raise FRT_IO_ERROR if there is a error reading from the file-system
695
+ * @raise FRT_EOF_ERROR if there is an attempt to read past the end of the file
696
+ */
697
+ extern FRT_INLINE unsigned int frt_is_read_vint(FrtInStream *is);
698
+
699
+ /**
700
+ * Skip _cnt_ vints. This is a convenience method used for performance reasons
701
+ * to skip large numbers of vints. It is mostly used by TermDocEnums. When
702
+ * skipping positions os the proximity index file.
703
+ *
704
+ * @param is the FrtInStream to read from
705
+ * @param cnt the number of vints to skip
706
+ * @raise FRT_IO_ERROR if there is a error reading from the file-system
707
+ * @raise FRT_EOF_ERROR if there is an attempt to read past the end of the file
708
+ */
709
+ extern FRT_INLINE void frt_is_skip_vints(FrtInStream *is, register int cnt);
710
+
711
+ /**
712
+ * Read a compressed (VINT) unsigned off_t from the FrtInStream.
713
+ * TODO: describe VINT format
714
+ *
715
+ * @param is the FrtInStream to read from
716
+ * @return a off_t
717
+ * @raise FRT_IO_ERROR if there is a error reading from the file-system
718
+ * @raise FRT_EOF_ERROR if there is an attempt to read past the end of the file
719
+ */
720
+ extern FRT_INLINE off_t frt_is_read_voff_t(FrtInStream *is);
721
+
722
+ /**
723
+ * Read a compressed (VINT) unsigned 64bit int from the FrtInStream.
724
+ * TODO: describe VINT format
725
+ *
726
+ * @param is the FrtInStream to read from
727
+ * @return a 64bit int
728
+ * @raise FRT_IO_ERROR if there is a error reading from the file-system
729
+ * @raise FRT_EOF_ERROR if there is an attempt to read past the end of the file
730
+ */
731
+ extern FRT_INLINE frt_u64 frt_is_read_vll(FrtInStream *is);
732
+
733
+ /**
734
+ * Read a string from the FrtInStream. A string is an integer +length+ in vint
735
+ * format (see frt_is_read_vint) followed by +length+ bytes. This is the format
736
+ * used by frt_os_write_string.
737
+ *
738
+ * @param is the FrtInStream to read from
739
+ * @return a null byte delimited string
740
+ * @raise FRT_IO_ERROR if there is a error reading from the file-system
741
+ * @raise FRT_EOF_ERROR if there is an attempt to read past the end of the file
742
+ */
743
+ extern char *frt_is_read_string(FrtInStream *is);
744
+
745
+ /**
746
+ * Read a string from the FrtInStream. A string is an integer +length+ in vint
747
+ * format (see frt_is_read_vint) followed by +length+ bytes. This is the format
748
+ * used by frt_os_write_string. This method is similar to +frt_is_read_string+ except
749
+ * that it will safely free all memory if there is an error reading the
750
+ * string.
751
+ *
752
+ * @param is the FrtInStream to read from
753
+ * @return a null byte delimited string
754
+ * @raise FRT_IO_ERROR if there is a error reading from the file-system
755
+ * @raise FRT_EOF_ERROR if there is an attempt to read past the end of the file
756
+ */
757
+ extern char *frt_is_read_string_safe(FrtInStream *is);
758
+
759
+ /**
760
+ * Copy cnt bytes from Instream _is_ to FrtOutStream _os_.
761
+ *
762
+ * @param is the FrtInStream to read from
763
+ * @param os the FrtOutStream to write to
764
+ * @raise FRT_IO_ERROR
765
+ * @raise FRT_EOF_ERROR
766
+ */
767
+ extern void frt_is2os_copy_bytes(FrtInStream *is, FrtOutStream *os, int cnt);
768
+
769
+ /**
770
+ * Copy cnt vints from Instream _is_ to FrtOutStream _os_.
771
+ *
772
+ * @param is the FrtInStream to read from
773
+ * @param os the FrtOutStream to write to
774
+ * @raise FRT_IO_ERROR
775
+ * @raise FRT_EOF_ERROR
776
+ */
777
+ extern void frt_is2os_copy_vints(FrtInStream *is, FrtOutStream *os, int cnt);
778
+
779
+ /**
780
+ * Print the filenames in a store to a buffer.
781
+ *
782
+ * @param store the store to get the filenames from
783
+ */
784
+ extern char *frt_store_to_s(FrtStore *store);
785
+
786
+ extern FrtLock *frt_open_lock(FrtStore *store, const char *lockname);
787
+ extern void frt_close_lock(FrtLock *lock);
788
+
789
+ /* required by submodules
790
+ * FIXME document. Perhaps include in different header?? */
791
+ extern FrtStore *frt_store_new();
792
+ extern void frt_store_destroy(FrtStore *store);
793
+ extern FrtOutStream *frt_os_new();
794
+ extern FrtInStream *frt_is_new();
795
+ extern int frt_file_is_lock(const char *filename);
796
+ extern bool frt_file_name_filter_is_index_file(const char *file_name,
797
+ bool include_locks);
798
+
799
+ #endif