jk-ferret 0.11.8.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (228) hide show
  1. data/CHANGELOG +24 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README +90 -0
  4. data/RELEASE_CHANGES +137 -0
  5. data/RELEASE_NOTES +60 -0
  6. data/Rakefile +443 -0
  7. data/TODO +109 -0
  8. data/TUTORIAL +231 -0
  9. data/bin/ferret-browser +79 -0
  10. data/ext/BZLIB_blocksort.c +1094 -0
  11. data/ext/BZLIB_bzlib.c +1578 -0
  12. data/ext/BZLIB_compress.c +672 -0
  13. data/ext/BZLIB_crctable.c +104 -0
  14. data/ext/BZLIB_decompress.c +626 -0
  15. data/ext/BZLIB_huffman.c +205 -0
  16. data/ext/BZLIB_randtable.c +84 -0
  17. data/ext/STEMMER_api.c +66 -0
  18. data/ext/STEMMER_libstemmer.c +93 -0
  19. data/ext/STEMMER_stem_ISO_8859_1_danish.c +337 -0
  20. data/ext/STEMMER_stem_ISO_8859_1_dutch.c +624 -0
  21. data/ext/STEMMER_stem_ISO_8859_1_english.c +1117 -0
  22. data/ext/STEMMER_stem_ISO_8859_1_finnish.c +762 -0
  23. data/ext/STEMMER_stem_ISO_8859_1_french.c +1246 -0
  24. data/ext/STEMMER_stem_ISO_8859_1_german.c +503 -0
  25. data/ext/STEMMER_stem_ISO_8859_1_hungarian.c +1230 -0
  26. data/ext/STEMMER_stem_ISO_8859_1_italian.c +1065 -0
  27. data/ext/STEMMER_stem_ISO_8859_1_norwegian.c +297 -0
  28. data/ext/STEMMER_stem_ISO_8859_1_porter.c +749 -0
  29. data/ext/STEMMER_stem_ISO_8859_1_portuguese.c +1017 -0
  30. data/ext/STEMMER_stem_ISO_8859_1_spanish.c +1093 -0
  31. data/ext/STEMMER_stem_ISO_8859_1_swedish.c +307 -0
  32. data/ext/STEMMER_stem_ISO_8859_2_romanian.c +998 -0
  33. data/ext/STEMMER_stem_KOI8_R_russian.c +700 -0
  34. data/ext/STEMMER_stem_UTF_8_danish.c +339 -0
  35. data/ext/STEMMER_stem_UTF_8_dutch.c +634 -0
  36. data/ext/STEMMER_stem_UTF_8_english.c +1125 -0
  37. data/ext/STEMMER_stem_UTF_8_finnish.c +768 -0
  38. data/ext/STEMMER_stem_UTF_8_french.c +1256 -0
  39. data/ext/STEMMER_stem_UTF_8_german.c +509 -0
  40. data/ext/STEMMER_stem_UTF_8_hungarian.c +1234 -0
  41. data/ext/STEMMER_stem_UTF_8_italian.c +1073 -0
  42. data/ext/STEMMER_stem_UTF_8_norwegian.c +299 -0
  43. data/ext/STEMMER_stem_UTF_8_porter.c +755 -0
  44. data/ext/STEMMER_stem_UTF_8_portuguese.c +1023 -0
  45. data/ext/STEMMER_stem_UTF_8_romanian.c +1004 -0
  46. data/ext/STEMMER_stem_UTF_8_russian.c +694 -0
  47. data/ext/STEMMER_stem_UTF_8_spanish.c +1097 -0
  48. data/ext/STEMMER_stem_UTF_8_swedish.c +309 -0
  49. data/ext/STEMMER_stem_UTF_8_turkish.c +2205 -0
  50. data/ext/STEMMER_utilities.c +478 -0
  51. data/ext/analysis.c +1710 -0
  52. data/ext/analysis.h +266 -0
  53. data/ext/api.h +26 -0
  54. data/ext/array.c +125 -0
  55. data/ext/array.h +62 -0
  56. data/ext/bitvector.c +96 -0
  57. data/ext/bitvector.h +594 -0
  58. data/ext/bzlib.h +282 -0
  59. data/ext/bzlib_private.h +503 -0
  60. data/ext/compound_io.c +384 -0
  61. data/ext/config.h +52 -0
  62. data/ext/document.c +159 -0
  63. data/ext/document.h +63 -0
  64. data/ext/except.c +102 -0
  65. data/ext/except.h +176 -0
  66. data/ext/extconf.rb +15 -0
  67. data/ext/ferret.c +416 -0
  68. data/ext/ferret.h +94 -0
  69. data/ext/field_index.c +262 -0
  70. data/ext/field_index.h +52 -0
  71. data/ext/filter.c +157 -0
  72. data/ext/fs_store.c +493 -0
  73. data/ext/global.c +458 -0
  74. data/ext/global.h +302 -0
  75. data/ext/hash.c +524 -0
  76. data/ext/hash.h +515 -0
  77. data/ext/hashset.c +192 -0
  78. data/ext/hashset.h +215 -0
  79. data/ext/header.h +58 -0
  80. data/ext/helper.c +63 -0
  81. data/ext/helper.h +21 -0
  82. data/ext/index.c +6804 -0
  83. data/ext/index.h +935 -0
  84. data/ext/internal.h +1019 -0
  85. data/ext/lang.c +10 -0
  86. data/ext/lang.h +68 -0
  87. data/ext/libstemmer.h +79 -0
  88. data/ext/mempool.c +88 -0
  89. data/ext/mempool.h +43 -0
  90. data/ext/modules.h +190 -0
  91. data/ext/multimapper.c +351 -0
  92. data/ext/multimapper.h +60 -0
  93. data/ext/posh.c +1006 -0
  94. data/ext/posh.h +973 -0
  95. data/ext/priorityqueue.c +149 -0
  96. data/ext/priorityqueue.h +155 -0
  97. data/ext/q_boolean.c +1621 -0
  98. data/ext/q_const_score.c +162 -0
  99. data/ext/q_filtered_query.c +212 -0
  100. data/ext/q_fuzzy.c +280 -0
  101. data/ext/q_match_all.c +149 -0
  102. data/ext/q_multi_term.c +673 -0
  103. data/ext/q_parser.c +3103 -0
  104. data/ext/q_phrase.c +1206 -0
  105. data/ext/q_prefix.c +98 -0
  106. data/ext/q_range.c +682 -0
  107. data/ext/q_span.c +2390 -0
  108. data/ext/q_term.c +337 -0
  109. data/ext/q_wildcard.c +167 -0
  110. data/ext/r_analysis.c +2626 -0
  111. data/ext/r_index.c +3468 -0
  112. data/ext/r_qparser.c +635 -0
  113. data/ext/r_search.c +4490 -0
  114. data/ext/r_store.c +513 -0
  115. data/ext/r_utils.c +1131 -0
  116. data/ext/ram_store.c +476 -0
  117. data/ext/scanner.c +895 -0
  118. data/ext/scanner.h +36 -0
  119. data/ext/scanner_mb.c +6701 -0
  120. data/ext/scanner_utf8.c +4415 -0
  121. data/ext/search.c +1864 -0
  122. data/ext/search.h +953 -0
  123. data/ext/similarity.c +151 -0
  124. data/ext/similarity.h +89 -0
  125. data/ext/sort.c +786 -0
  126. data/ext/stem_ISO_8859_1_danish.h +16 -0
  127. data/ext/stem_ISO_8859_1_dutch.h +16 -0
  128. data/ext/stem_ISO_8859_1_english.h +16 -0
  129. data/ext/stem_ISO_8859_1_finnish.h +16 -0
  130. data/ext/stem_ISO_8859_1_french.h +16 -0
  131. data/ext/stem_ISO_8859_1_german.h +16 -0
  132. data/ext/stem_ISO_8859_1_hungarian.h +16 -0
  133. data/ext/stem_ISO_8859_1_italian.h +16 -0
  134. data/ext/stem_ISO_8859_1_norwegian.h +16 -0
  135. data/ext/stem_ISO_8859_1_porter.h +16 -0
  136. data/ext/stem_ISO_8859_1_portuguese.h +16 -0
  137. data/ext/stem_ISO_8859_1_spanish.h +16 -0
  138. data/ext/stem_ISO_8859_1_swedish.h +16 -0
  139. data/ext/stem_ISO_8859_2_romanian.h +16 -0
  140. data/ext/stem_KOI8_R_russian.h +16 -0
  141. data/ext/stem_UTF_8_danish.h +16 -0
  142. data/ext/stem_UTF_8_dutch.h +16 -0
  143. data/ext/stem_UTF_8_english.h +16 -0
  144. data/ext/stem_UTF_8_finnish.h +16 -0
  145. data/ext/stem_UTF_8_french.h +16 -0
  146. data/ext/stem_UTF_8_german.h +16 -0
  147. data/ext/stem_UTF_8_hungarian.h +16 -0
  148. data/ext/stem_UTF_8_italian.h +16 -0
  149. data/ext/stem_UTF_8_norwegian.h +16 -0
  150. data/ext/stem_UTF_8_porter.h +16 -0
  151. data/ext/stem_UTF_8_portuguese.h +16 -0
  152. data/ext/stem_UTF_8_romanian.h +16 -0
  153. data/ext/stem_UTF_8_russian.h +16 -0
  154. data/ext/stem_UTF_8_spanish.h +16 -0
  155. data/ext/stem_UTF_8_swedish.h +16 -0
  156. data/ext/stem_UTF_8_turkish.h +16 -0
  157. data/ext/stopwords.c +410 -0
  158. data/ext/store.c +698 -0
  159. data/ext/store.h +799 -0
  160. data/ext/symbol.c +10 -0
  161. data/ext/symbol.h +23 -0
  162. data/ext/term_vectors.c +73 -0
  163. data/ext/threading.h +31 -0
  164. data/ext/win32.h +62 -0
  165. data/lib/ferret.rb +30 -0
  166. data/lib/ferret/browser.rb +246 -0
  167. data/lib/ferret/browser/s/global.js +192 -0
  168. data/lib/ferret/browser/s/style.css +148 -0
  169. data/lib/ferret/browser/views/document/list.rhtml +49 -0
  170. data/lib/ferret/browser/views/document/show.rhtml +27 -0
  171. data/lib/ferret/browser/views/error/index.rhtml +7 -0
  172. data/lib/ferret/browser/views/help/index.rhtml +8 -0
  173. data/lib/ferret/browser/views/home/index.rhtml +29 -0
  174. data/lib/ferret/browser/views/layout.rhtml +22 -0
  175. data/lib/ferret/browser/views/term-vector/index.rhtml +4 -0
  176. data/lib/ferret/browser/views/term/index.rhtml +199 -0
  177. data/lib/ferret/browser/views/term/termdocs.rhtml +1 -0
  178. data/lib/ferret/browser/webrick.rb +14 -0
  179. data/lib/ferret/document.rb +130 -0
  180. data/lib/ferret/field_infos.rb +44 -0
  181. data/lib/ferret/field_symbol.rb +87 -0
  182. data/lib/ferret/index.rb +973 -0
  183. data/lib/ferret/number_tools.rb +157 -0
  184. data/lib/ferret/version.rb +3 -0
  185. data/setup.rb +1555 -0
  186. data/test/long_running/largefile/tc_largefile.rb +46 -0
  187. data/test/test_all.rb +5 -0
  188. data/test/test_helper.rb +29 -0
  189. data/test/test_installed.rb +1 -0
  190. data/test/threading/number_to_spoken.rb +132 -0
  191. data/test/threading/thread_safety_index_test.rb +88 -0
  192. data/test/threading/thread_safety_read_write_test.rb +73 -0
  193. data/test/threading/thread_safety_test.rb +133 -0
  194. data/test/unit/analysis/tc_analyzer.rb +550 -0
  195. data/test/unit/analysis/tc_token_stream.rb +653 -0
  196. data/test/unit/index/tc_index.rb +867 -0
  197. data/test/unit/index/tc_index_reader.rb +699 -0
  198. data/test/unit/index/tc_index_writer.rb +447 -0
  199. data/test/unit/index/th_doc.rb +332 -0
  200. data/test/unit/query_parser/tc_query_parser.rb +238 -0
  201. data/test/unit/search/tc_filter.rb +156 -0
  202. data/test/unit/search/tc_fuzzy_query.rb +147 -0
  203. data/test/unit/search/tc_index_searcher.rb +67 -0
  204. data/test/unit/search/tc_multi_searcher.rb +128 -0
  205. data/test/unit/search/tc_multiple_search_requests.rb +58 -0
  206. data/test/unit/search/tc_search_and_sort.rb +179 -0
  207. data/test/unit/search/tc_sort.rb +49 -0
  208. data/test/unit/search/tc_sort_field.rb +27 -0
  209. data/test/unit/search/tc_spans.rb +190 -0
  210. data/test/unit/search/tm_searcher.rb +436 -0
  211. data/test/unit/store/tc_fs_store.rb +115 -0
  212. data/test/unit/store/tc_ram_store.rb +35 -0
  213. data/test/unit/store/tm_store.rb +34 -0
  214. data/test/unit/store/tm_store_lock.rb +68 -0
  215. data/test/unit/tc_document.rb +81 -0
  216. data/test/unit/tc_field_symbol.rb +26 -0
  217. data/test/unit/ts_analysis.rb +2 -0
  218. data/test/unit/ts_index.rb +2 -0
  219. data/test/unit/ts_largefile.rb +4 -0
  220. data/test/unit/ts_query_parser.rb +2 -0
  221. data/test/unit/ts_search.rb +2 -0
  222. data/test/unit/ts_store.rb +2 -0
  223. data/test/unit/ts_utils.rb +2 -0
  224. data/test/unit/utils/tc_bit_vector.rb +295 -0
  225. data/test/unit/utils/tc_number_tools.rb +117 -0
  226. data/test/unit/utils/tc_priority_queue.rb +106 -0
  227. data/test/utils/content_generator.rb +226 -0
  228. metadata +319 -0
data/ext/compound_io.c ADDED
@@ -0,0 +1,384 @@
1
+ #include "index.h"
2
+ #include "array.h"
3
+ #include "internal.h"
4
+
5
+ extern void store_destroy(Store *store);
6
+ extern InStream *is_new();
7
+ extern Store *store_new();
8
+
9
+ /****************************************************************************
10
+ *
11
+ * CompoundStore
12
+ *
13
+ ****************************************************************************/
14
+
15
+ typedef struct FileEntry {
16
+ off_t offset;
17
+ off_t length;
18
+ } FileEntry;
19
+
20
+ static void cmpd_touch(Store *store, const char *file_name)
21
+ {
22
+ store->dir.cmpd->store->touch(store->dir.cmpd->store, file_name);
23
+ }
24
+
25
+ static int cmpd_exists(Store *store, const char *file_name)
26
+ {
27
+ if (h_get(store->dir.cmpd->entries, file_name) != NULL) {
28
+ return true;
29
+ }
30
+ else {
31
+ return false;
32
+ }
33
+ }
34
+
35
+ /**
36
+ * @throws UNSUPPORTED_ERROR
37
+ */
38
+ static int cmpd_remove(Store *store, const char *file_name)
39
+ {
40
+ (void)store;
41
+ (void)file_name;
42
+ RAISE(UNSUPPORTED_ERROR, "%s", UNSUPPORTED_ERROR_MSG);
43
+ return 0;
44
+ }
45
+
46
+ /**
47
+ * @throws UNSUPPORTED_ERROR
48
+ */
49
+ static void cmpd_rename(Store *store, const char *from, const char *to)
50
+ {
51
+ (void)store;
52
+ (void)from;
53
+ (void)to;
54
+ RAISE(UNSUPPORTED_ERROR, "%s", UNSUPPORTED_ERROR_MSG);
55
+ }
56
+
57
+ static int cmpd_count(Store *store)
58
+ {
59
+ return store->dir.cmpd->entries->size;
60
+ }
61
+
62
+ static void cmpd_each(Store *store,
63
+ void (*func)(const char *fname, void *arg), void *arg)
64
+ {
65
+ Hash *ht = store->dir.cmpd->entries;
66
+ int i;
67
+ for (i = 0; i <= ht->mask; i++) {
68
+ char *fn = (char *)ht->table[i].key;
69
+ if (fn) {
70
+ func(fn, arg);
71
+ }
72
+ }
73
+ }
74
+
75
+
76
+ /**
77
+ * @throws UNSUPPORTED_ERROR
78
+ */
79
+ static void cmpd_clear(Store *store)
80
+ {
81
+ (void)store;
82
+ RAISE(UNSUPPORTED_ERROR, "%s", UNSUPPORTED_ERROR_MSG);
83
+ }
84
+
85
+ static void cmpd_close_i(Store *store)
86
+ {
87
+ CompoundStore *cmpd = store->dir.cmpd;
88
+ if (cmpd->stream == NULL) {
89
+ RAISE(IO_ERROR, "Tried to close already closed compound store");
90
+ }
91
+
92
+ h_destroy(cmpd->entries);
93
+
94
+ is_close(cmpd->stream);
95
+ cmpd->stream = NULL;
96
+ free(store->dir.cmpd);
97
+ store_destroy(store);
98
+ }
99
+
100
+ static off_t cmpd_length(Store *store, const char *file_name)
101
+ {
102
+ FileEntry *fe = (FileEntry *)h_get(store->dir.cmpd->entries, file_name);
103
+ if (fe != NULL) {
104
+ return fe->length;
105
+ }
106
+ else {
107
+ return 0;
108
+ }
109
+ }
110
+
111
+ static void cmpdi_seek_i(InStream *is, off_t pos)
112
+ {
113
+ (void)is;
114
+ (void)pos;
115
+ }
116
+
117
+ static void cmpdi_close_i(InStream *is)
118
+ {
119
+ free(is->d.cis);
120
+ }
121
+
122
+ static off_t cmpdi_length_i(InStream *is)
123
+ {
124
+ return (is->d.cis->length);
125
+ }
126
+
127
+ /*
128
+ * raises: EOF_ERROR
129
+ */
130
+ static void cmpdi_read_i(InStream *is, uchar *b, int len)
131
+ {
132
+ CompoundInStream *cis = is->d.cis;
133
+ off_t start = is_pos(is);
134
+
135
+ if ((start + len) > cis->length) {
136
+ RAISE(EOF_ERROR, "Tried to read past end of file. File length is "
137
+ "<%"OFF_T_PFX"d> and tried to read to <%"OFF_T_PFX"d>",
138
+ cis->length, start + len);
139
+ }
140
+
141
+ is_seek(cis->sub, cis->offset + start);
142
+ is_read_bytes(cis->sub, b, len);
143
+ }
144
+
145
+ static const struct InStreamMethods CMPD_IN_STREAM_METHODS = {
146
+ cmpdi_read_i,
147
+ cmpdi_seek_i,
148
+ cmpdi_length_i,
149
+ cmpdi_close_i
150
+ };
151
+
152
+ static InStream *cmpd_create_input(InStream *sub_is, off_t offset, off_t length)
153
+ {
154
+ InStream *is = is_new();
155
+ CompoundInStream *cis = ALLOC(CompoundInStream);
156
+
157
+ cis->sub = sub_is;
158
+ cis->offset = offset;
159
+ cis->length = length;
160
+ is->d.cis = cis;
161
+ is->m = &CMPD_IN_STREAM_METHODS;
162
+
163
+ return is;
164
+ }
165
+
166
+ static InStream *cmpd_open_input(Store *store, const char *file_name)
167
+ {
168
+ FileEntry *entry;
169
+ CompoundStore *cmpd = store->dir.cmpd;
170
+ InStream *is;
171
+
172
+ mutex_lock(&store->mutex);
173
+ if (cmpd->stream == NULL) {
174
+ mutex_unlock(&store->mutex);
175
+ RAISE(IO_ERROR, "Can't open compound file input stream. Parent "
176
+ "stream is closed.");
177
+ }
178
+
179
+ entry = (FileEntry *)h_get(cmpd->entries, file_name);
180
+ if (entry == NULL) {
181
+ mutex_unlock(&store->mutex);
182
+ RAISE(IO_ERROR, "File %s does not exist: ", file_name);
183
+ }
184
+
185
+ is = cmpd_create_input(cmpd->stream, entry->offset, entry->length);
186
+ mutex_unlock(&store->mutex);
187
+
188
+ return is;
189
+ }
190
+
191
+ static OutStream *cmpd_new_output(Store *store, const char *file_name)
192
+ {
193
+ (void)store;
194
+ (void)file_name;
195
+ RAISE(UNSUPPORTED_ERROR, "%s", UNSUPPORTED_ERROR_MSG);
196
+ return NULL;
197
+ }
198
+
199
+ static Lock *cmpd_open_lock_i(Store *store, const char *lock_name)
200
+ {
201
+ (void)store;
202
+ (void)lock_name;
203
+ RAISE(UNSUPPORTED_ERROR, "%s", UNSUPPORTED_ERROR_MSG);
204
+ return NULL;
205
+ }
206
+
207
+ static void cmpd_close_lock_i(Lock *lock)
208
+ {
209
+ (void)lock;
210
+ RAISE(UNSUPPORTED_ERROR, "%s", UNSUPPORTED_ERROR_MSG);
211
+ }
212
+
213
+ Store *open_cmpd_store(Store *store, const char *name)
214
+ {
215
+ int count, i;
216
+ off_t offset;
217
+ char *fname;
218
+ FileEntry *entry = NULL;
219
+ Store *new_store = NULL;
220
+ CompoundStore *volatile cmpd = NULL;
221
+ InStream *volatile is = NULL;
222
+
223
+ TRY
224
+ cmpd = ALLOC_AND_ZERO(CompoundStore);
225
+
226
+ cmpd->store = store;
227
+ cmpd->name = name;
228
+ cmpd->entries = h_new_str(&free, &free);
229
+ is = cmpd->stream = store->open_input(store, cmpd->name);
230
+
231
+ /* read the directory and init files */
232
+ count = is_read_vint(is);
233
+ entry = NULL;
234
+ for (i = 0; i < count; i++) {
235
+ offset = (off_t)is_read_i64(is);
236
+ fname = is_read_string(is);
237
+
238
+ if (entry != NULL) {
239
+ /* set length of the previous entry */
240
+ entry->length = offset - entry->offset;
241
+ }
242
+
243
+ entry = ALLOC(FileEntry);
244
+ entry->offset = offset;
245
+ h_set(cmpd->entries, fname, entry);
246
+ }
247
+ XCATCHALL
248
+ if (is) is_close(is);
249
+ if (cmpd->entries) h_destroy(cmpd->entries);
250
+ free(cmpd);
251
+ XENDTRY
252
+
253
+ /* set the length of the final entry */
254
+ if (entry != NULL) {
255
+ entry->length = is_length(is) - entry->offset;
256
+ }
257
+
258
+ new_store = store_new();
259
+ new_store->dir.cmpd = cmpd;
260
+ new_store->touch = &cmpd_touch;
261
+ new_store->exists = &cmpd_exists;
262
+ new_store->remove = &cmpd_remove;
263
+ new_store->rename = &cmpd_rename;
264
+ new_store->count = &cmpd_count;
265
+ new_store->clear = &cmpd_clear;
266
+ new_store->length = &cmpd_length;
267
+ new_store->each = &cmpd_each;
268
+ new_store->close_i = &cmpd_close_i;
269
+ new_store->new_output = &cmpd_new_output;
270
+ new_store->open_input = &cmpd_open_input;
271
+ new_store->open_lock_i = &cmpd_open_lock_i;
272
+ new_store->close_lock_i = &cmpd_close_lock_i;
273
+
274
+ return new_store;
275
+ }
276
+
277
+ /****************************************************************************
278
+ *
279
+ * CompoundWriter
280
+ *
281
+ ****************************************************************************/
282
+
283
+ CompoundWriter *open_cw(Store *store, char *name)
284
+ {
285
+ CompoundWriter *cw = ALLOC(CompoundWriter);
286
+ cw->store = store;
287
+ cw->name = name;
288
+ cw->ids = hs_new_str(&free);
289
+ cw->file_entries = ary_new_type_capa(CWFileEntry, CW_INIT_CAPA);
290
+ return cw;
291
+ }
292
+
293
+ void cw_add_file(CompoundWriter *cw, char *id)
294
+ {
295
+ id = estrdup(id);
296
+ if (hs_add(cw->ids, id) != HASH_KEY_DOES_NOT_EXIST) {
297
+ RAISE(IO_ERROR, "Tried to add file \"%s\" which has already been "
298
+ "added to the compound store", id);
299
+ }
300
+
301
+ ary_grow(cw->file_entries);
302
+ ary_last(cw->file_entries).name = id;
303
+ }
304
+
305
+ static void cw_copy_file(CompoundWriter *cw, CWFileEntry *src, OutStream *os)
306
+ {
307
+ off_t start_ptr = os_pos(os);
308
+ off_t end_ptr;
309
+ off_t remainder, length, len;
310
+ uchar buffer[BUFFER_SIZE];
311
+
312
+ InStream *is = cw->store->open_input(cw->store, src->name);
313
+
314
+ remainder = length = is_length(is);
315
+
316
+ while (remainder > 0) {
317
+ len = MIN(remainder, BUFFER_SIZE);
318
+ is_read_bytes(is, buffer, len);
319
+ os_write_bytes(os, buffer, len);
320
+ remainder -= len;
321
+ }
322
+
323
+ /* Verify that remainder is 0 */
324
+ if (remainder != 0) {
325
+ RAISE(IO_ERROR, "There seems to be an error in the compound file "
326
+ "should have read to the end but there are <%"OFF_T_PFX"d> "
327
+ "bytes left", remainder);
328
+ }
329
+
330
+ /* Verify that the output length diff is equal to original file */
331
+ end_ptr = os_pos(os);
332
+ len = end_ptr - start_ptr;
333
+ if (len != length) {
334
+ RAISE(IO_ERROR, "Difference in compound file output file offsets "
335
+ "<%"OFF_T_PFX"d> does not match the original file lenght "
336
+ "<%"OFF_T_PFX"d>", len, length);
337
+ }
338
+
339
+ is_close(is);
340
+ }
341
+
342
+ void cw_close(CompoundWriter *cw)
343
+ {
344
+ OutStream *os = NULL;
345
+ int i;
346
+
347
+ if (cw->ids->size <= 0) {
348
+ RAISE(STATE_ERROR, "Tried to merge compound file with no entries");
349
+ }
350
+
351
+ os = cw->store->new_output(cw->store, cw->name);
352
+
353
+ os_write_vint(os, ary_size(cw->file_entries));
354
+
355
+ /* Write the directory with all offsets at 0.
356
+ * Remember the positions of directory entries so that we can adjust the
357
+ * offsets later */
358
+ for (i = 0; i < ary_size(cw->file_entries); i++) {
359
+ cw->file_entries[i].dir_offset = os_pos(os);
360
+ os_write_u64(os, 0); /* for now */
361
+ os_write_string(os, cw->file_entries[i].name);
362
+ }
363
+
364
+ /* Open the files and copy their data into the stream. Remember the
365
+ * locations of each file's data section. */
366
+ for (i = 0; i < ary_size(cw->file_entries); i++) {
367
+ cw->file_entries[i].data_offset = os_pos(os);
368
+ cw_copy_file(cw, &cw->file_entries[i], os);
369
+ }
370
+
371
+ /* Write the data offsets into the directory of the compound stream */
372
+ for (i = 0; i < ary_size(cw->file_entries); i++) {
373
+ os_seek(os, cw->file_entries[i].dir_offset);
374
+ os_write_u64(os, cw->file_entries[i].data_offset);
375
+ }
376
+
377
+ if (os) {
378
+ os_close(os);
379
+ }
380
+
381
+ hs_destroy(cw->ids);
382
+ ary_free(cw->file_entries);
383
+ free(cw);
384
+ }
data/ext/config.h ADDED
@@ -0,0 +1,52 @@
1
+ #ifndef FRT_DEFINES_H
2
+ #define FRT_DEFINES_H
3
+
4
+ #ifdef __cplusplus
5
+ extern "C" {
6
+ #endif
7
+
8
+ #include <sys/types.h>
9
+ #include <limits.h>
10
+ #include "posh.h"
11
+
12
+ #ifndef false
13
+ #define false 0
14
+ #endif
15
+ #ifndef true
16
+ #define true 1
17
+ #endif
18
+
19
+ #ifndef __cplusplus
20
+ typedef unsigned int bool;
21
+ #endif
22
+ typedef unsigned char frt_uchar;
23
+
24
+ typedef posh_u16_t frt_u16;
25
+ typedef posh_i16_t frt_i16;
26
+ typedef posh_u32_t frt_u32;
27
+ typedef posh_i32_t frt_i32;
28
+ typedef posh_u64_t frt_u64;
29
+ typedef posh_i64_t frt_i64;
30
+
31
+ #if ( LONG_MAX == 2147483647 ) && defined(_FILE_OFFSET_BITS) && (_FILE_OFFSET_BITS == 64)
32
+ #define FRT_OFF_T_PFX "ll"
33
+ #else
34
+ #define FRT_OFF_T_PFX "l"
35
+ #endif
36
+
37
+ #if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) && !defined(__cplusplus)
38
+ #define FRT_IS_C99
39
+ #define FRT_HAS_ISO_VARARGS
40
+ #define FRT_HAS_VARARGS
41
+ #endif
42
+
43
+ #if defined(__GNUC__) && !defined(__STRICT_ANSI__) && !defined(__cplusplus)
44
+ #define FRT_HAS_GNUC_VARARGS
45
+ #define FRT_HAS_VARARGS
46
+ #endif
47
+
48
+ #ifdef __cplusplus
49
+ } // extern "C"
50
+ #endif
51
+
52
+ #endif
data/ext/document.c ADDED
@@ -0,0 +1,159 @@
1
+ #include "document.h"
2
+ #include "symbol.h"
3
+ #include <string.h>
4
+ #include "internal.h"
5
+
6
+ /****************************************************************************
7
+ *
8
+ * DocField
9
+ *
10
+ ****************************************************************************/
11
+
12
+ DocField *df_new(Symbol name)
13
+ {
14
+ DocField *df = ALLOC(DocField);
15
+ df->name = name;
16
+ df->size = 0;
17
+ df->capa = DF_INIT_CAPA;
18
+ df->data = ALLOC_N(char *, df->capa);
19
+ df->lengths = ALLOC_N(int, df->capa);
20
+ df->destroy_data = false;
21
+ df->boost = 1.0;
22
+ return df;
23
+ }
24
+
25
+ DocField *df_add_data_len(DocField *df, char *data, int len)
26
+ {
27
+ if (df->size >= df->capa) {
28
+ df->capa <<= 2;
29
+ REALLOC_N(df->data, char *, df->capa);
30
+ REALLOC_N(df->lengths, int, df->capa);
31
+ }
32
+ df->data[df->size] = data;
33
+ df->lengths[df->size] = len;
34
+ df->size++;
35
+ return df;
36
+ }
37
+
38
+ DocField *df_add_data(DocField *df, char *data)
39
+ {
40
+ return df_add_data_len(df, data, strlen(data));
41
+ }
42
+
43
+ void df_destroy(DocField *df)
44
+ {
45
+ if (df->destroy_data) {
46
+ int i;
47
+ for (i = 0; i < df->size; i++) {
48
+ free(df->data[i]);
49
+ }
50
+ }
51
+ free(df->data);
52
+ free(df->lengths);
53
+ free(df);
54
+ }
55
+
56
+ /*
57
+ * Format for one item is: name: "data"
58
+ * for more items : name: ["data", "data", "data"]
59
+ */
60
+ char *df_to_s(DocField *df)
61
+ {
62
+ #define APPEND(dst, src) ((dst)[0] = (src)[0], 1)
63
+ #define APPEND2(dst, src) (APPEND(dst, src), APPEND(dst+1, src+1), 2)
64
+
65
+ int i, len = 0, namelen = sym_len(df->name);
66
+ char *str, *s;
67
+ for (i = 0; i < df->size; i++) {
68
+ len += df->lengths[i] + 4;
69
+ }
70
+ s = str = ALLOC_N(char, namelen + len + 5);
71
+ memcpy(s, df->name, namelen);
72
+ s += namelen;
73
+ s += APPEND2(s, ": ");
74
+
75
+ if (df->size > 1) {
76
+ s += APPEND(s, "[");
77
+ }
78
+ for (i = 0; i < df->size; i++) {
79
+ if (i != 0) {
80
+ s += APPEND2(s, ", ");
81
+ }
82
+ s += APPEND(s, "\"");
83
+ memcpy(s, df->data[i], df->lengths[i]);
84
+ s += df->lengths[i];
85
+ s += APPEND(s, "\"");
86
+ }
87
+
88
+ if (df->size > 1) {
89
+ s += APPEND(s, "]");
90
+ }
91
+ *s = 0;
92
+ return str;
93
+ }
94
+
95
+ /****************************************************************************
96
+ *
97
+ * Document
98
+ *
99
+ ****************************************************************************/
100
+
101
+ Document *doc_new()
102
+ {
103
+ Document *doc = ALLOC(Document);
104
+ doc->field_dict = h_new_ptr((free_ft)&df_destroy);
105
+ doc->size = 0;
106
+ doc->capa = DOC_INIT_CAPA;
107
+ doc->fields = ALLOC_N(DocField *, doc->capa);
108
+ doc->boost = 1.0;
109
+ return doc;
110
+ }
111
+
112
+ DocField *doc_add_field(Document *doc, DocField *df)
113
+ {
114
+ if (!h_set_safe(doc->field_dict, df->name, df)) {
115
+ RAISE(EXCEPTION, "tried to add %s field which alread existed\n",
116
+ S(df->name));
117
+ }
118
+ if (doc->size >= doc->capa) {
119
+ doc->capa <<= 1;
120
+ REALLOC_N(doc->fields, DocField *, doc->capa);
121
+ }
122
+ doc->fields[doc->size] = df;
123
+ doc->size++;
124
+ return df;
125
+ }
126
+
127
+ DocField *doc_get_field(Document *doc, Symbol name)
128
+ {
129
+ return (DocField *)h_get(doc->field_dict, name);
130
+ }
131
+
132
+ char *doc_to_s(Document *doc)
133
+ {
134
+ int i;
135
+ int len = 0;
136
+ char **fields = ALLOC_N(char *, doc->size);
137
+ char *buf, *s;
138
+
139
+ for (i = 0; i < doc->size; i++) {
140
+ fields[i] = df_to_s(doc->fields[i]);
141
+ len += strlen(fields[i]) + 5;
142
+ }
143
+ s = buf = ALLOC_N(char, len + 12);
144
+ s += sprintf(buf, "Document [\n");
145
+ for (i = 0; i < doc->size; i++) {
146
+ s += sprintf(s, " =>%s\n", fields[i]);
147
+ free(fields[i]);
148
+ }
149
+ free(fields);
150
+ return buf;
151
+ }
152
+
153
+ void doc_destroy(Document *doc)
154
+ {
155
+ h_destroy(doc->field_dict);
156
+ free(doc->fields);
157
+ free(doc);
158
+ }
159
+