sdsykes-ferret 0.11.6.19

Sign up to get free protection for your applications and to get access to all the features.
Files changed (195) hide show
  1. data/CHANGELOG +24 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README +102 -0
  4. data/Rakefile +338 -0
  5. data/TODO +17 -0
  6. data/TUTORIAL +231 -0
  7. data/bin/ferret-browser +79 -0
  8. data/ext/analysis.c +1555 -0
  9. data/ext/analysis.h +219 -0
  10. data/ext/api.c +69 -0
  11. data/ext/api.h +27 -0
  12. data/ext/array.c +123 -0
  13. data/ext/array.h +53 -0
  14. data/ext/bitvector.c +540 -0
  15. data/ext/bitvector.h +272 -0
  16. data/ext/compound_io.c +383 -0
  17. data/ext/config.h +42 -0
  18. data/ext/document.c +156 -0
  19. data/ext/document.h +53 -0
  20. data/ext/except.c +120 -0
  21. data/ext/except.h +168 -0
  22. data/ext/extconf.rb +14 -0
  23. data/ext/ferret.c +402 -0
  24. data/ext/ferret.h +91 -0
  25. data/ext/filter.c +156 -0
  26. data/ext/fs_store.c +483 -0
  27. data/ext/global.c +418 -0
  28. data/ext/global.h +117 -0
  29. data/ext/hash.c +567 -0
  30. data/ext/hash.h +473 -0
  31. data/ext/hashset.c +170 -0
  32. data/ext/hashset.h +187 -0
  33. data/ext/header.h +58 -0
  34. data/ext/helper.c +62 -0
  35. data/ext/helper.h +13 -0
  36. data/ext/inc/lang.h +48 -0
  37. data/ext/inc/threading.h +31 -0
  38. data/ext/index.c +6425 -0
  39. data/ext/index.h +961 -0
  40. data/ext/lang.h +66 -0
  41. data/ext/libstemmer.c +92 -0
  42. data/ext/libstemmer.h +79 -0
  43. data/ext/mempool.c +87 -0
  44. data/ext/mempool.h +35 -0
  45. data/ext/modules.h +162 -0
  46. data/ext/multimapper.c +310 -0
  47. data/ext/multimapper.h +51 -0
  48. data/ext/posh.c +1006 -0
  49. data/ext/posh.h +1007 -0
  50. data/ext/priorityqueue.c +151 -0
  51. data/ext/priorityqueue.h +143 -0
  52. data/ext/q_boolean.c +1608 -0
  53. data/ext/q_const_score.c +161 -0
  54. data/ext/q_filtered_query.c +209 -0
  55. data/ext/q_fuzzy.c +268 -0
  56. data/ext/q_match_all.c +148 -0
  57. data/ext/q_multi_term.c +677 -0
  58. data/ext/q_parser.c +2825 -0
  59. data/ext/q_phrase.c +1126 -0
  60. data/ext/q_prefix.c +100 -0
  61. data/ext/q_range.c +350 -0
  62. data/ext/q_span.c +2402 -0
  63. data/ext/q_term.c +337 -0
  64. data/ext/q_wildcard.c +171 -0
  65. data/ext/r_analysis.c +2575 -0
  66. data/ext/r_index.c +3472 -0
  67. data/ext/r_qparser.c +585 -0
  68. data/ext/r_search.c +4105 -0
  69. data/ext/r_store.c +513 -0
  70. data/ext/r_utils.c +963 -0
  71. data/ext/ram_store.c +471 -0
  72. data/ext/search.c +1741 -0
  73. data/ext/search.h +885 -0
  74. data/ext/similarity.c +150 -0
  75. data/ext/similarity.h +82 -0
  76. data/ext/sort.c +983 -0
  77. data/ext/stem_ISO_8859_1_danish.c +338 -0
  78. data/ext/stem_ISO_8859_1_danish.h +16 -0
  79. data/ext/stem_ISO_8859_1_dutch.c +635 -0
  80. data/ext/stem_ISO_8859_1_dutch.h +16 -0
  81. data/ext/stem_ISO_8859_1_english.c +1156 -0
  82. data/ext/stem_ISO_8859_1_english.h +16 -0
  83. data/ext/stem_ISO_8859_1_finnish.c +792 -0
  84. data/ext/stem_ISO_8859_1_finnish.h +16 -0
  85. data/ext/stem_ISO_8859_1_french.c +1276 -0
  86. data/ext/stem_ISO_8859_1_french.h +16 -0
  87. data/ext/stem_ISO_8859_1_german.c +512 -0
  88. data/ext/stem_ISO_8859_1_german.h +16 -0
  89. data/ext/stem_ISO_8859_1_italian.c +1091 -0
  90. data/ext/stem_ISO_8859_1_italian.h +16 -0
  91. data/ext/stem_ISO_8859_1_norwegian.c +296 -0
  92. data/ext/stem_ISO_8859_1_norwegian.h +16 -0
  93. data/ext/stem_ISO_8859_1_porter.c +776 -0
  94. data/ext/stem_ISO_8859_1_porter.h +16 -0
  95. data/ext/stem_ISO_8859_1_portuguese.c +1035 -0
  96. data/ext/stem_ISO_8859_1_portuguese.h +16 -0
  97. data/ext/stem_ISO_8859_1_spanish.c +1119 -0
  98. data/ext/stem_ISO_8859_1_spanish.h +16 -0
  99. data/ext/stem_ISO_8859_1_swedish.c +307 -0
  100. data/ext/stem_ISO_8859_1_swedish.h +16 -0
  101. data/ext/stem_KOI8_R_russian.c +701 -0
  102. data/ext/stem_KOI8_R_russian.h +16 -0
  103. data/ext/stem_UTF_8_danish.c +344 -0
  104. data/ext/stem_UTF_8_danish.h +16 -0
  105. data/ext/stem_UTF_8_dutch.c +653 -0
  106. data/ext/stem_UTF_8_dutch.h +16 -0
  107. data/ext/stem_UTF_8_english.c +1176 -0
  108. data/ext/stem_UTF_8_english.h +16 -0
  109. data/ext/stem_UTF_8_finnish.c +808 -0
  110. data/ext/stem_UTF_8_finnish.h +16 -0
  111. data/ext/stem_UTF_8_french.c +1296 -0
  112. data/ext/stem_UTF_8_french.h +16 -0
  113. data/ext/stem_UTF_8_german.c +526 -0
  114. data/ext/stem_UTF_8_german.h +16 -0
  115. data/ext/stem_UTF_8_italian.c +1113 -0
  116. data/ext/stem_UTF_8_italian.h +16 -0
  117. data/ext/stem_UTF_8_norwegian.c +302 -0
  118. data/ext/stem_UTF_8_norwegian.h +16 -0
  119. data/ext/stem_UTF_8_porter.c +794 -0
  120. data/ext/stem_UTF_8_porter.h +16 -0
  121. data/ext/stem_UTF_8_portuguese.c +1055 -0
  122. data/ext/stem_UTF_8_portuguese.h +16 -0
  123. data/ext/stem_UTF_8_russian.c +709 -0
  124. data/ext/stem_UTF_8_russian.h +16 -0
  125. data/ext/stem_UTF_8_spanish.c +1137 -0
  126. data/ext/stem_UTF_8_spanish.h +16 -0
  127. data/ext/stem_UTF_8_swedish.c +313 -0
  128. data/ext/stem_UTF_8_swedish.h +16 -0
  129. data/ext/stopwords.c +401 -0
  130. data/ext/store.c +692 -0
  131. data/ext/store.h +777 -0
  132. data/ext/term_vectors.c +352 -0
  133. data/ext/threading.h +31 -0
  134. data/ext/utilities.c +446 -0
  135. data/ext/win32.h +54 -0
  136. data/lib/ferret.rb +29 -0
  137. data/lib/ferret/browser.rb +246 -0
  138. data/lib/ferret/browser/s/global.js +192 -0
  139. data/lib/ferret/browser/s/style.css +148 -0
  140. data/lib/ferret/browser/views/document/list.rhtml +49 -0
  141. data/lib/ferret/browser/views/document/show.rhtml +27 -0
  142. data/lib/ferret/browser/views/error/index.rhtml +7 -0
  143. data/lib/ferret/browser/views/help/index.rhtml +8 -0
  144. data/lib/ferret/browser/views/home/index.rhtml +29 -0
  145. data/lib/ferret/browser/views/layout.rhtml +22 -0
  146. data/lib/ferret/browser/views/term-vector/index.rhtml +4 -0
  147. data/lib/ferret/browser/views/term/index.rhtml +199 -0
  148. data/lib/ferret/browser/views/term/termdocs.rhtml +1 -0
  149. data/lib/ferret/browser/webrick.rb +14 -0
  150. data/lib/ferret/document.rb +130 -0
  151. data/lib/ferret/field_infos.rb +44 -0
  152. data/lib/ferret/index.rb +786 -0
  153. data/lib/ferret/number_tools.rb +157 -0
  154. data/lib/ferret_version.rb +3 -0
  155. data/setup.rb +1555 -0
  156. data/test/test_all.rb +5 -0
  157. data/test/test_helper.rb +24 -0
  158. data/test/threading/number_to_spoken.rb +132 -0
  159. data/test/threading/thread_safety_index_test.rb +79 -0
  160. data/test/threading/thread_safety_read_write_test.rb +76 -0
  161. data/test/threading/thread_safety_test.rb +133 -0
  162. data/test/unit/analysis/tc_analyzer.rb +548 -0
  163. data/test/unit/analysis/tc_token_stream.rb +646 -0
  164. data/test/unit/index/tc_index.rb +762 -0
  165. data/test/unit/index/tc_index_reader.rb +699 -0
  166. data/test/unit/index/tc_index_writer.rb +437 -0
  167. data/test/unit/index/th_doc.rb +315 -0
  168. data/test/unit/largefile/tc_largefile.rb +46 -0
  169. data/test/unit/query_parser/tc_query_parser.rb +238 -0
  170. data/test/unit/search/tc_filter.rb +135 -0
  171. data/test/unit/search/tc_fuzzy_query.rb +147 -0
  172. data/test/unit/search/tc_index_searcher.rb +61 -0
  173. data/test/unit/search/tc_multi_searcher.rb +128 -0
  174. data/test/unit/search/tc_multiple_search_requests.rb +58 -0
  175. data/test/unit/search/tc_search_and_sort.rb +179 -0
  176. data/test/unit/search/tc_sort.rb +49 -0
  177. data/test/unit/search/tc_sort_field.rb +27 -0
  178. data/test/unit/search/tc_spans.rb +190 -0
  179. data/test/unit/search/tm_searcher.rb +384 -0
  180. data/test/unit/store/tc_fs_store.rb +77 -0
  181. data/test/unit/store/tc_ram_store.rb +35 -0
  182. data/test/unit/store/tm_store.rb +34 -0
  183. data/test/unit/store/tm_store_lock.rb +68 -0
  184. data/test/unit/tc_document.rb +81 -0
  185. data/test/unit/ts_analysis.rb +2 -0
  186. data/test/unit/ts_index.rb +2 -0
  187. data/test/unit/ts_largefile.rb +4 -0
  188. data/test/unit/ts_query_parser.rb +2 -0
  189. data/test/unit/ts_search.rb +2 -0
  190. data/test/unit/ts_store.rb +2 -0
  191. data/test/unit/ts_utils.rb +2 -0
  192. data/test/unit/utils/tc_bit_vector.rb +295 -0
  193. data/test/unit/utils/tc_number_tools.rb +117 -0
  194. data/test/unit/utils/tc_priority_queue.rb +106 -0
  195. metadata +285 -0
@@ -0,0 +1,272 @@
1
+ #ifndef FRT_BIT_VECTOR_H
2
+ #define FRT_BIT_VECTOR_H
3
+
4
+ #include "global.h"
5
+
6
+ #define BV_INIT_CAPA 256
7
+ typedef struct BitVector
8
+ {
9
+ /** The bits are held in an array of 32-bit integers */
10
+ f_u32 *bits;
11
+
12
+ /** size is equal to 1 + the highest order bit set */
13
+ int size;
14
+
15
+ /** capa is the number of words (U32) allocated for the bits */
16
+ int capa;
17
+
18
+ /** count is the running count of bits set. This is kept up to date by
19
+ *bv_set and bv_unset. You can reset this value by calling bv_recount */
20
+ int count;
21
+
22
+ /** curr_bit is used by scan_next to record the previously scanned bit */
23
+ int curr_bit;
24
+
25
+ bool extends_as_ones : 1;
26
+ int ref_cnt;
27
+ } BitVector;
28
+
29
+ /**
30
+ * Create a new BitVector with a capacity of +BV_INIT_CAPA+. Note that the
31
+ * BitVector is growable and will adjust it's capacity when you use bv_set.
32
+ *
33
+ * @return BitVector with a capacity of +BV_INIT_CAPA+.
34
+ */
35
+ extern BitVector *bv_new();
36
+
37
+ /**
38
+ * Create a new BitVector with a capacity of +capa+. Note that the BitVector
39
+ * is growable and will adjust it's capacity when you use bv_set.
40
+ *
41
+ * @param capa the initial capacity of the BitVector
42
+ * @return BitVector with a capacity of +capa+.
43
+ */
44
+ extern BitVector *bv_new_capa(int capa);
45
+
46
+ /**
47
+ * Destroy a BitVector, freeing all memory allocated to that BitVector
48
+ *
49
+ * @param bv BitVector to destroy
50
+ */
51
+ extern void bv_destroy(BitVector *bv);
52
+
53
+ /**
54
+ * Set the bit at position +index+. If +index+ is outside of the range of the
55
+ * BitVector, that is >= BitVector.size, BitVector.size will be set to +index+
56
+ * + 1. If it is greater than the capacity of the BitVector, the capacity will
57
+ * be expanded to accomodate.
58
+ *
59
+ * @param bv the BitVector to set the bit in
60
+ * @param index the index of the bit to set
61
+ */
62
+ extern void bv_set(BitVector *bv, int index);
63
+
64
+ /**
65
+ * Unsafely set the bit at position +index+. If you choose to use this
66
+ * function you must create the BitVector with a large enough capacity to
67
+ * accomodate all of the bv_set_fast operations. You must also set bits in
68
+ * order and only one time per bit. Otherwise, use the safe bv_set function.
69
+ *
70
+ * So this is ok;
71
+ * <pre>
72
+ * BitVector *bv = bv_new_capa(1000);
73
+ * bv_set_fast(bv, 900);
74
+ * bv_set_fast(bv, 920);
75
+ * bv_set_fast(bv, 999);
76
+ * </pre>
77
+ *
78
+ * While these are not ok;
79
+ * <pre>
80
+ * BitVector *bv = bv_new_capa(90);
81
+ * bv_set_fast(bv, 80);
82
+ * bv_set_fast(bv, 79); // <= Bad: Out of Order
83
+ * bv_set_fast(bv, 80); // <= Bad: Already set
84
+ * bv_set_fast(bv, 90); // <= Bad: Out of Range. index must be < capa
85
+ * </pre>
86
+ *
87
+ * @param bv the BitVector to set the bit in
88
+ * @param index the index of the bit to set
89
+ */
90
+ extern void bv_set_fast(BitVector *bv, int bit);
91
+
92
+ /**
93
+ * Return 1 if the bit at +index+ was set or 0 otherwise. If +index+ is out of
94
+ * range, that is greater then the BitVectors capacity, it will also return 0.
95
+ *
96
+ * @param bv the BitVector to check in
97
+ * @param index the index of the bit to check
98
+ * @return 1 if the bit was set, 0 otherwise
99
+ */
100
+ extern int bv_get(BitVector *bv, int index);
101
+
102
+ /**
103
+ * Unset the bit at position +index+. If the +index+ was out of range, that is
104
+ * greater than the BitVectors capacity then do nothing. (bv_get will return 0
105
+ * in this case anyway).
106
+ *
107
+ * @param bv the BitVector to unset the bit in
108
+ * @param index the index of the bit to unset
109
+ */
110
+ extern void bv_unset(BitVector *bv, int bit);
111
+
112
+ /**
113
+ * Clear all set bits. This function will set all set bits to 0.
114
+ *
115
+ * @param bv the BitVector to clear
116
+ */
117
+ extern void bv_clear(BitVector *bv);
118
+
119
+ /**
120
+ * Resets the set bit count by running through the whole BitVector and
121
+ * counting all set bits. A running count of the bits is kept by bv_set,
122
+ *bv_get and bv_set_fast so this function is only necessary if the count could
123
+ * have been corrupted somehow or if the BitVector has been constructed in a
124
+ * different way (for example being read from the file_system).
125
+ *
126
+ * @param bv the BitVector to count the bits in
127
+ * @return the number of set bits in the BitVector. BitVector.count is also
128
+ * set
129
+ */
130
+ extern int bv_recount(BitVector *bv);
131
+
132
+ /**
133
+ * Reset the BitVector for scanning. This function should be called before
134
+ * using bv_scan_next to scan through all set bits in the BitVector. This is
135
+ * not necessary when using bv_scan_next_from.
136
+ *
137
+ * @param bv the BitVector to reset for scanning
138
+ */
139
+ extern void bv_scan_reset(BitVector *bv);
140
+
141
+ /**
142
+ * Scan the BitVector for the next set bit. Before using this function you
143
+ * should reset the BitVector for scanning using +bv_scan_reset+. You can the
144
+ * repeated call bv_scan_next to get each set bit until it finally returns
145
+ * -1.
146
+ *
147
+ * @param bv the BitVector to scan
148
+ * @return the next set bits index or -1 if no more bits are set
149
+ */
150
+ extern int bv_scan_next(BitVector *bv);
151
+
152
+ /**
153
+ * Scan the BitVector for the next set bit after +from+. If no more bits are
154
+ * set then return -1, otherwise return the index of teh next set bit.
155
+ *
156
+ * @param bv the BitVector to scan
157
+ * @return the next set bit's index or -1 if no more bits are set
158
+ */
159
+
160
+ extern int bv_scan_next_from(BitVector *bv, register const int from);
161
+ /**
162
+ * Scan the BitVector for the next unset bit. Before using this function you
163
+ * should reset the BitVector for scanning using +bv_scan_reset+. You can the
164
+ * repeated call bv_scan_next to get each unset bit until it finally returns
165
+ * -1.
166
+ *
167
+ * @param bv the BitVector to scan
168
+ * @return the next unset bits index or -1 if no more bits are unset
169
+ */
170
+ extern int bv_scan_next_unset(BitVector *bv);
171
+
172
+ /**
173
+ * Scan the BitVector for the next unset bit after +from+. If no more bits are
174
+ * unset then return -1, otherwise return the index of teh next unset bit.
175
+ *
176
+ * @param bv the BitVector to scan
177
+ * @return the next unset bit's index or -1 if no more bits are unset
178
+ */
179
+ extern int bv_scan_next_unset_from(BitVector *bv, register const int from);
180
+
181
+ /**
182
+ * Check whether the two BitVectors have the same bits set.
183
+ *
184
+ * @param bv1 first BitVector to compare
185
+ * @param bv2 second BitVectors to compare
186
+ * @return true if bv1 == bv2
187
+ */
188
+ extern int bv_eq(BitVector *bv1, BitVector *bv2);
189
+
190
+ /**
191
+ * Determines a hash value for the BitVector
192
+ *
193
+ * @param bv the BitVector to hash
194
+ * @return A hash value for the BitVector
195
+ */
196
+ extern unsigned long bv_hash(BitVector *bv);
197
+
198
+ /**
199
+ * ANDs two BitVectors (+bv1+ and +bv2+) together and return the resultant
200
+ * BitVector
201
+ *
202
+ * @param bv1 first BitVector to AND
203
+ * @param bv2 second BitVector to AND
204
+ * @return A BitVector with all bits set that are set in both bv1 and bv2
205
+ */
206
+ extern BitVector *bv_and(BitVector *bv1, BitVector *bv2);
207
+
208
+ /**
209
+ * ORs two BitVectors (+bv1+ and +bv2+) together and return the resultant
210
+ * BitVector
211
+ *
212
+ * @param bv1 first BitVector to OR
213
+ * @param bv2 second BitVector to OR
214
+ * @return A BitVector with all bits set that are set in both bv1 and bv2
215
+ */
216
+ extern BitVector *bv_or(BitVector *bv1, BitVector *bv2);
217
+
218
+ /**
219
+ * XORs two BitVectors (+bv1+ and +bv2+) together and return the resultant
220
+ * BitVector
221
+ *
222
+ * @param bv1 first BitVector to XOR
223
+ * @param bv2 second BitVector to XOR
224
+ * @return A BitVector with all bits set that are equal in bv1 and bv2
225
+ */
226
+ extern BitVector *bv_xor(BitVector *bv1, BitVector *bv2);
227
+
228
+ /**
229
+ * Returns BitVector with all of +bv+'s bits flipped
230
+ *
231
+ * @param bv BitVector to flip
232
+ * @return A BitVector with all bits set that are set in both bv1 and bv2
233
+ */
234
+ extern BitVector *bv_not(BitVector *bv);
235
+
236
+ /**
237
+ * ANDs two BitVectors together +bv1+ and +bv2+ in place of +bv1+
238
+ *
239
+ * @param bv1 first BitVector to AND
240
+ * @param bv2 second BitVector to AND
241
+ * @return A BitVector
242
+ * @return bv1 with all bits set that where set in both bv1 and bv2
243
+ */
244
+ extern BitVector *bv_and_x(BitVector *bv1, BitVector *bv2);
245
+
246
+ /**
247
+ * ORs two BitVectors together
248
+ *
249
+ * @param bv1 first BitVector to OR
250
+ * @param bv2 second BitVector to OR
251
+ * @return bv1
252
+ */
253
+ extern BitVector *bv_or_x(BitVector *bv1, BitVector *bv2);
254
+
255
+ /**
256
+ * XORs two BitVectors together +bv1+ and +bv2+ in place of +bv1+
257
+ *
258
+ * @param bv1 first BitVector to XOR
259
+ * @param bv2 second BitVector to XOR
260
+ * @return bv1
261
+ */
262
+ extern BitVector *bv_xor_x(BitVector *bv1, BitVector *bv2);
263
+
264
+ /**
265
+ * Flips all bits in the BitVector +bv+
266
+ *
267
+ * @param bv BitVector to flip
268
+ * @return A +bv+ with all it's bits flipped
269
+ */
270
+ extern BitVector *bv_not_x(BitVector *bv);
271
+
272
+ #endif
@@ -0,0 +1,383 @@
1
+ #include "index.h"
2
+ #include "array.h"
3
+
4
+ extern void store_destroy(Store *store);
5
+ extern InStream *is_new();
6
+ extern Store *store_new();
7
+
8
+ /****************************************************************************
9
+ *
10
+ * CompoundStore
11
+ *
12
+ ****************************************************************************/
13
+
14
+ typedef struct FileEntry {
15
+ off_t offset;
16
+ off_t length;
17
+ } FileEntry;
18
+
19
+ static void cmpd_touch(Store *store, char *file_name)
20
+ {
21
+ store->dir.cmpd->store->touch(store->dir.cmpd->store, file_name);
22
+ }
23
+
24
+ static int cmpd_exists(Store *store, char *file_name)
25
+ {
26
+ if (h_get(store->dir.cmpd->entries, file_name) != NULL) {
27
+ return true;
28
+ }
29
+ else {
30
+ return false;
31
+ }
32
+ }
33
+
34
+ /**
35
+ * @throws UNSUPPORTED_ERROR
36
+ */
37
+ static int cmpd_remove(Store *store, char *file_name)
38
+ {
39
+ (void)store;
40
+ (void)file_name;
41
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
42
+ return 0;
43
+ }
44
+
45
+ /**
46
+ * @throws UNSUPPORTED_ERROR
47
+ */
48
+ static void cmpd_rename(Store *store, char *from, char *to)
49
+ {
50
+ (void)store;
51
+ (void)from;
52
+ (void)to;
53
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
54
+ }
55
+
56
+ static int cmpd_count(Store *store)
57
+ {
58
+ return store->dir.cmpd->entries->size;
59
+ }
60
+
61
+ static void cmpd_each(Store *store,
62
+ void (*func)(char *fname, void *arg), void *arg)
63
+ {
64
+ HashTable *ht = store->dir.cmpd->entries;
65
+ int i;
66
+ for (i = 0; i <= ht->mask; i++) {
67
+ char *fn = (char *)ht->table[i].key;
68
+ if (fn) {
69
+ func(fn, arg);
70
+ }
71
+ }
72
+ }
73
+
74
+
75
+ /**
76
+ * @throws UNSUPPORTED_ERROR
77
+ */
78
+ static void cmpd_clear(Store *store)
79
+ {
80
+ (void)store;
81
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
82
+ }
83
+
84
+ static void cmpd_close_i(Store *store)
85
+ {
86
+ CompoundStore *cmpd = store->dir.cmpd;
87
+ if (cmpd->stream == NULL) {
88
+ RAISE(IO_ERROR, "Tried to close already closed compound store");
89
+ }
90
+
91
+ h_destroy(cmpd->entries);
92
+
93
+ is_close(cmpd->stream);
94
+ cmpd->stream = NULL;
95
+ free(store->dir.cmpd);
96
+ store_destroy(store);
97
+ }
98
+
99
+ static off_t cmpd_length(Store *store, char *file_name)
100
+ {
101
+ FileEntry *fe = h_get(store->dir.cmpd->entries, file_name);
102
+ if (fe != NULL) {
103
+ return fe->length;
104
+ }
105
+ else {
106
+ return 0;
107
+ }
108
+ }
109
+
110
+ static void cmpdi_seek_i(InStream *is, off_t pos)
111
+ {
112
+ (void)is;
113
+ (void)pos;
114
+ }
115
+
116
+ static void cmpdi_close_i(InStream *is)
117
+ {
118
+ free(is->d.cis);
119
+ }
120
+
121
+ static off_t cmpdi_length_i(InStream *is)
122
+ {
123
+ return (is->d.cis->length);
124
+ }
125
+
126
+ /*
127
+ * raises: EOF_ERROR
128
+ */
129
+ static void cmpdi_read_i(InStream *is, uchar *b, int len)
130
+ {
131
+ CompoundInStream *cis = is->d.cis;
132
+ off_t start = is_pos(is);
133
+
134
+ if ((start + len) > cis->length) {
135
+ RAISE(EOF_ERROR, "Tried to read past end of file. File length is "
136
+ "<%"F_OFF_T_PFX"d> and tried to read to <%"F_OFF_T_PFX"d>",
137
+ cis->length, start + len);
138
+ }
139
+
140
+ is_seek(cis->sub, cis->offset + start);
141
+ is_read_bytes(cis->sub, b, len);
142
+ }
143
+
144
+ static const struct InStreamMethods CMPD_IN_STREAM_METHODS = {
145
+ cmpdi_read_i,
146
+ cmpdi_seek_i,
147
+ cmpdi_length_i,
148
+ cmpdi_close_i
149
+ };
150
+
151
+ static InStream *cmpd_create_input(InStream *sub_is, off_t offset, off_t length)
152
+ {
153
+ InStream *is = is_new();
154
+ CompoundInStream *cis = ALLOC(CompoundInStream);
155
+
156
+ cis->sub = sub_is;
157
+ cis->offset = offset;
158
+ cis->length = length;
159
+ is->d.cis = cis;
160
+ is->m = &CMPD_IN_STREAM_METHODS;
161
+
162
+ return is;
163
+ }
164
+
165
+ static InStream *cmpd_open_input(Store *store, const char *file_name)
166
+ {
167
+ FileEntry *entry;
168
+ CompoundStore *cmpd = store->dir.cmpd;
169
+ InStream *is;
170
+
171
+ mutex_lock(&store->mutex);
172
+ if (cmpd->stream == NULL) {
173
+ mutex_unlock(&store->mutex);
174
+ RAISE(IO_ERROR, "Can't open compound file input stream. Parent "
175
+ "stream is closed.");
176
+ }
177
+
178
+ entry = h_get(cmpd->entries, file_name);
179
+ if (entry == NULL) {
180
+ mutex_unlock(&store->mutex);
181
+ RAISE(IO_ERROR, "File %s does not exist: ", file_name);
182
+ }
183
+
184
+ is = cmpd_create_input(cmpd->stream, entry->offset, entry->length);
185
+ mutex_unlock(&store->mutex);
186
+
187
+ return is;
188
+ }
189
+
190
+ static OutStream *cmpd_new_output(Store *store, const char *file_name)
191
+ {
192
+ (void)store;
193
+ (void)file_name;
194
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
195
+ return NULL;
196
+ }
197
+
198
+ static Lock *cmpd_open_lock_i(Store *store, char *lock_name)
199
+ {
200
+ (void)store;
201
+ (void)lock_name;
202
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
203
+ return NULL;
204
+ }
205
+
206
+ static void cmpd_close_lock_i(Lock *lock)
207
+ {
208
+ (void)lock;
209
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
210
+ }
211
+
212
+ Store *open_cmpd_store(Store *store, const char *name)
213
+ {
214
+ int count, i;
215
+ off_t offset;
216
+ char *fname;
217
+ FileEntry *entry = NULL;
218
+ Store *new_store = NULL;
219
+ CompoundStore *volatile cmpd = NULL;
220
+ InStream *volatile is = NULL;
221
+
222
+ TRY
223
+ cmpd = ALLOC_AND_ZERO(CompoundStore);
224
+
225
+ cmpd->store = store;
226
+ cmpd->name = name;
227
+ cmpd->entries = h_new_str(&free, &free);
228
+ is = cmpd->stream = store->open_input(store, cmpd->name);
229
+
230
+ /* read the directory and init files */
231
+ count = is_read_vint(is);
232
+ entry = NULL;
233
+ for (i = 0; i < count; i++) {
234
+ offset = (off_t)is_read_i64(is);
235
+ fname = is_read_string(is);
236
+
237
+ if (entry != NULL) {
238
+ /* set length of the previous entry */
239
+ entry->length = offset - entry->offset;
240
+ }
241
+
242
+ entry = ALLOC(FileEntry);
243
+ entry->offset = offset;
244
+ h_set(cmpd->entries, fname, entry);
245
+ }
246
+ XCATCHALL
247
+ if (is) is_close(is);
248
+ if (cmpd->entries) h_destroy(cmpd->entries);
249
+ free(cmpd);
250
+ XENDTRY
251
+
252
+ /* set the length of the final entry */
253
+ if (entry != NULL) {
254
+ entry->length = is_length(is) - entry->offset;
255
+ }
256
+
257
+ new_store = store_new();
258
+ new_store->dir.cmpd = cmpd;
259
+ new_store->touch = &cmpd_touch;
260
+ new_store->exists = &cmpd_exists;
261
+ new_store->remove = &cmpd_remove;
262
+ new_store->rename = &cmpd_rename;
263
+ new_store->count = &cmpd_count;
264
+ new_store->clear = &cmpd_clear;
265
+ new_store->length = &cmpd_length;
266
+ new_store->each = &cmpd_each;
267
+ new_store->close_i = &cmpd_close_i;
268
+ new_store->new_output = &cmpd_new_output;
269
+ new_store->open_input = &cmpd_open_input;
270
+ new_store->open_lock_i = &cmpd_open_lock_i;
271
+ new_store->close_lock_i = &cmpd_close_lock_i;
272
+
273
+ return new_store;
274
+ }
275
+
276
+ /****************************************************************************
277
+ *
278
+ * CompoundWriter
279
+ *
280
+ ****************************************************************************/
281
+
282
+ CompoundWriter *open_cw(Store *store, char *name)
283
+ {
284
+ CompoundWriter *cw = ALLOC(CompoundWriter);
285
+ cw->store = store;
286
+ cw->name = name;
287
+ cw->ids = hs_new_str(&free);
288
+ cw->file_entries = ary_new_type_capa(CWFileEntry, CW_INIT_CAPA);
289
+ return cw;
290
+ }
291
+
292
+ void cw_add_file(CompoundWriter *cw, char *id)
293
+ {
294
+ id = estrdup(id);
295
+ if (hs_add(cw->ids, id) != HASH_KEY_DOES_NOT_EXIST) {
296
+ RAISE(IO_ERROR, "Tried to add file \"%s\" which has already been "
297
+ "added to the compound store", id);
298
+ }
299
+
300
+ ary_grow(cw->file_entries);
301
+ ary_last(cw->file_entries).name = id;
302
+ }
303
+
304
+ static void cw_copy_file(CompoundWriter *cw, CWFileEntry *src, OutStream *os)
305
+ {
306
+ off_t start_ptr = os_pos(os);
307
+ off_t end_ptr;
308
+ off_t remainder, length, len;
309
+ uchar buffer[BUFFER_SIZE];
310
+
311
+ InStream *is = cw->store->open_input(cw->store, src->name);
312
+
313
+ remainder = length = is_length(is);
314
+
315
+ while (remainder > 0) {
316
+ len = MIN(remainder, BUFFER_SIZE);
317
+ is_read_bytes(is, buffer, len);
318
+ os_write_bytes(os, buffer, len);
319
+ remainder -= len;
320
+ }
321
+
322
+ /* Verify that remainder is 0 */
323
+ if (remainder != 0) {
324
+ RAISE(IO_ERROR, "There seems to be an error in the compound file "
325
+ "should have read to the end but there are <%"F_OFF_T_PFX"d> "
326
+ "bytes left", remainder);
327
+ }
328
+
329
+ /* Verify that the output length diff is equal to original file */
330
+ end_ptr = os_pos(os);
331
+ len = end_ptr - start_ptr;
332
+ if (len != length) {
333
+ RAISE(IO_ERROR, "Difference in compound file output file offsets "
334
+ "<%"F_OFF_T_PFX"d> does not match the original file lenght "
335
+ "<%"F_OFF_T_PFX"d>", len, length);
336
+ }
337
+
338
+ is_close(is);
339
+ }
340
+
341
+ void cw_close(CompoundWriter *cw)
342
+ {
343
+ OutStream *os = NULL;
344
+ int i;
345
+
346
+ if (cw->ids->size <= 0) {
347
+ RAISE(STATE_ERROR, "Tried to merge compound file with no entries");
348
+ }
349
+
350
+ os = cw->store->new_output(cw->store, cw->name);
351
+
352
+ os_write_vint(os, ary_size(cw->file_entries));
353
+
354
+ /* Write the directory with all offsets at 0.
355
+ * Remember the positions of directory entries so that we can adjust the
356
+ * offsets later */
357
+ for (i = 0; i < ary_size(cw->file_entries); i++) {
358
+ cw->file_entries[i].dir_offset = os_pos(os);
359
+ os_write_u64(os, 0); /* for now */
360
+ os_write_string(os, cw->file_entries[i].name);
361
+ }
362
+
363
+ /* Open the files and copy their data into the stream. Remember the
364
+ * locations of each file's data section. */
365
+ for (i = 0; i < ary_size(cw->file_entries); i++) {
366
+ cw->file_entries[i].data_offset = os_pos(os);
367
+ cw_copy_file(cw, &cw->file_entries[i], os);
368
+ }
369
+
370
+ /* Write the data offsets into the directory of the compound stream */
371
+ for (i = 0; i < ary_size(cw->file_entries); i++) {
372
+ os_seek(os, cw->file_entries[i].dir_offset);
373
+ os_write_u64(os, cw->file_entries[i].data_offset);
374
+ }
375
+
376
+ if (os) {
377
+ os_close(os);
378
+ }
379
+
380
+ hs_destroy(cw->ids);
381
+ ary_free(cw->file_entries);
382
+ free(cw);
383
+ }