ferret 0.9.6 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (295) hide show
  1. data/MIT-LICENSE +1 -1
  2. data/README +12 -24
  3. data/Rakefile +38 -54
  4. data/TODO +14 -17
  5. data/ext/analysis.c +982 -823
  6. data/ext/analysis.h +133 -76
  7. data/ext/array.c +96 -58
  8. data/ext/array.h +40 -13
  9. data/ext/bitvector.c +476 -118
  10. data/ext/bitvector.h +264 -22
  11. data/ext/compound_io.c +217 -229
  12. data/ext/defines.h +49 -0
  13. data/ext/document.c +107 -317
  14. data/ext/document.h +31 -65
  15. data/ext/except.c +81 -36
  16. data/ext/except.h +117 -55
  17. data/ext/extconf.rb +2 -9
  18. data/ext/ferret.c +211 -104
  19. data/ext/ferret.h +22 -11
  20. data/ext/filter.c +97 -82
  21. data/ext/fs_store.c +348 -367
  22. data/ext/global.c +226 -188
  23. data/ext/global.h +44 -26
  24. data/ext/hash.c +474 -391
  25. data/ext/hash.h +441 -68
  26. data/ext/hashset.c +124 -96
  27. data/ext/hashset.h +169 -20
  28. data/ext/helper.c +56 -5
  29. data/ext/helper.h +7 -0
  30. data/ext/inc/lang.h +29 -49
  31. data/ext/inc/threading.h +31 -0
  32. data/ext/ind.c +288 -278
  33. data/ext/ind.h +68 -0
  34. data/ext/index.c +5688 -0
  35. data/ext/index.h +663 -616
  36. data/ext/lang.h +29 -49
  37. data/ext/libstemmer.c +3 -3
  38. data/ext/mem_pool.c +84 -0
  39. data/ext/mem_pool.h +35 -0
  40. data/ext/posh.c +1006 -0
  41. data/ext/posh.h +1007 -0
  42. data/ext/priorityqueue.c +117 -194
  43. data/ext/priorityqueue.h +135 -39
  44. data/ext/q_boolean.c +1305 -1108
  45. data/ext/q_const_score.c +106 -93
  46. data/ext/q_filtered_query.c +138 -135
  47. data/ext/q_fuzzy.c +206 -242
  48. data/ext/q_match_all.c +94 -80
  49. data/ext/q_multi_term.c +663 -0
  50. data/ext/q_parser.c +667 -593
  51. data/ext/q_phrase.c +992 -555
  52. data/ext/q_prefix.c +72 -61
  53. data/ext/q_range.c +235 -210
  54. data/ext/q_span.c +1480 -1166
  55. data/ext/q_term.c +273 -246
  56. data/ext/q_wildcard.c +127 -114
  57. data/ext/r_analysis.c +1720 -711
  58. data/ext/r_index.c +3049 -0
  59. data/ext/r_qparser.c +433 -146
  60. data/ext/r_search.c +2934 -1993
  61. data/ext/r_store.c +372 -143
  62. data/ext/r_utils.c +941 -0
  63. data/ext/ram_store.c +330 -326
  64. data/ext/search.c +1291 -668
  65. data/ext/search.h +403 -702
  66. data/ext/similarity.c +91 -113
  67. data/ext/similarity.h +45 -30
  68. data/ext/sort.c +721 -484
  69. data/ext/stopwords.c +361 -273
  70. data/ext/store.c +556 -58
  71. data/ext/store.h +706 -126
  72. data/ext/tags +3578 -2780
  73. data/ext/term_vectors.c +352 -0
  74. data/ext/threading.h +31 -0
  75. data/ext/win32.h +54 -0
  76. data/lib/ferret.rb +5 -17
  77. data/lib/ferret/document.rb +130 -2
  78. data/lib/ferret/index.rb +577 -26
  79. data/lib/ferret/number_tools.rb +157 -0
  80. data/lib/ferret_version.rb +3 -0
  81. data/test/test_helper.rb +5 -13
  82. data/test/unit/analysis/tc_analyzer.rb +513 -1
  83. data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
  84. data/test/unit/index/tc_index.rb +183 -240
  85. data/test/unit/index/tc_index_reader.rb +312 -479
  86. data/test/unit/index/tc_index_writer.rb +397 -13
  87. data/test/unit/index/th_doc.rb +269 -206
  88. data/test/unit/query_parser/tc_query_parser.rb +40 -33
  89. data/test/unit/search/tc_filter.rb +59 -71
  90. data/test/unit/search/tc_fuzzy_query.rb +24 -16
  91. data/test/unit/search/tc_index_searcher.rb +23 -201
  92. data/test/unit/search/tc_multi_searcher.rb +78 -226
  93. data/test/unit/search/tc_search_and_sort.rb +93 -81
  94. data/test/unit/search/tc_sort.rb +23 -23
  95. data/test/unit/search/tc_sort_field.rb +7 -7
  96. data/test/unit/search/tc_spans.rb +51 -47
  97. data/test/unit/search/tm_searcher.rb +339 -0
  98. data/test/unit/store/tc_fs_store.rb +1 -1
  99. data/test/unit/store/tm_store_lock.rb +3 -3
  100. data/test/unit/tc_document.rb +81 -0
  101. data/test/unit/ts_analysis.rb +1 -1
  102. data/test/unit/ts_utils.rb +1 -1
  103. data/test/unit/utils/tc_bit_vector.rb +288 -0
  104. data/test/unit/utils/tc_number_tools.rb +117 -0
  105. data/test/unit/utils/tc_priority_queue.rb +106 -0
  106. metadata +140 -301
  107. data/CHANGELOG +0 -9
  108. data/ext/dummy.exe +0 -0
  109. data/ext/field.c +0 -408
  110. data/ext/frtio.h +0 -13
  111. data/ext/inc/except.h +0 -90
  112. data/ext/index_io.c +0 -382
  113. data/ext/index_rw.c +0 -2658
  114. data/ext/lang.c +0 -41
  115. data/ext/nix_io.c +0 -134
  116. data/ext/q_multi_phrase.c +0 -380
  117. data/ext/r_doc.c +0 -582
  118. data/ext/r_index_io.c +0 -1021
  119. data/ext/r_term.c +0 -219
  120. data/ext/term.c +0 -820
  121. data/ext/termdocs.c +0 -611
  122. data/ext/vector.c +0 -637
  123. data/ext/w32_io.c +0 -150
  124. data/lib/ferret/analysis.rb +0 -11
  125. data/lib/ferret/analysis/analyzers.rb +0 -112
  126. data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
  127. data/lib/ferret/analysis/token.rb +0 -100
  128. data/lib/ferret/analysis/token_filters.rb +0 -86
  129. data/lib/ferret/analysis/token_stream.rb +0 -26
  130. data/lib/ferret/analysis/tokenizers.rb +0 -112
  131. data/lib/ferret/analysis/word_list_loader.rb +0 -27
  132. data/lib/ferret/document/document.rb +0 -152
  133. data/lib/ferret/document/field.rb +0 -312
  134. data/lib/ferret/index/compound_file_io.rb +0 -338
  135. data/lib/ferret/index/document_writer.rb +0 -289
  136. data/lib/ferret/index/field_infos.rb +0 -279
  137. data/lib/ferret/index/fields_io.rb +0 -181
  138. data/lib/ferret/index/index.rb +0 -675
  139. data/lib/ferret/index/index_file_names.rb +0 -33
  140. data/lib/ferret/index/index_reader.rb +0 -503
  141. data/lib/ferret/index/index_writer.rb +0 -534
  142. data/lib/ferret/index/multi_reader.rb +0 -377
  143. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
  144. data/lib/ferret/index/segment_infos.rb +0 -130
  145. data/lib/ferret/index/segment_merge_info.rb +0 -49
  146. data/lib/ferret/index/segment_merge_queue.rb +0 -16
  147. data/lib/ferret/index/segment_merger.rb +0 -358
  148. data/lib/ferret/index/segment_reader.rb +0 -412
  149. data/lib/ferret/index/segment_term_enum.rb +0 -169
  150. data/lib/ferret/index/segment_term_vector.rb +0 -58
  151. data/lib/ferret/index/term.rb +0 -53
  152. data/lib/ferret/index/term_buffer.rb +0 -83
  153. data/lib/ferret/index/term_doc_enum.rb +0 -291
  154. data/lib/ferret/index/term_enum.rb +0 -52
  155. data/lib/ferret/index/term_info.rb +0 -37
  156. data/lib/ferret/index/term_infos_io.rb +0 -321
  157. data/lib/ferret/index/term_vector_offset_info.rb +0 -20
  158. data/lib/ferret/index/term_vectors_io.rb +0 -553
  159. data/lib/ferret/query_parser.rb +0 -312
  160. data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
  161. data/lib/ferret/search.rb +0 -50
  162. data/lib/ferret/search/boolean_clause.rb +0 -100
  163. data/lib/ferret/search/boolean_query.rb +0 -299
  164. data/lib/ferret/search/boolean_scorer.rb +0 -294
  165. data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
  166. data/lib/ferret/search/conjunction_scorer.rb +0 -99
  167. data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
  168. data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
  169. data/lib/ferret/search/explanation.rb +0 -41
  170. data/lib/ferret/search/field_cache.rb +0 -215
  171. data/lib/ferret/search/field_doc.rb +0 -31
  172. data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
  173. data/lib/ferret/search/filter.rb +0 -11
  174. data/lib/ferret/search/filtered_query.rb +0 -130
  175. data/lib/ferret/search/filtered_term_enum.rb +0 -79
  176. data/lib/ferret/search/fuzzy_query.rb +0 -154
  177. data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
  178. data/lib/ferret/search/hit_collector.rb +0 -34
  179. data/lib/ferret/search/hit_queue.rb +0 -11
  180. data/lib/ferret/search/index_searcher.rb +0 -200
  181. data/lib/ferret/search/match_all_query.rb +0 -104
  182. data/lib/ferret/search/multi_phrase_query.rb +0 -216
  183. data/lib/ferret/search/multi_searcher.rb +0 -261
  184. data/lib/ferret/search/multi_term_query.rb +0 -65
  185. data/lib/ferret/search/non_matching_scorer.rb +0 -22
  186. data/lib/ferret/search/phrase_positions.rb +0 -55
  187. data/lib/ferret/search/phrase_query.rb +0 -214
  188. data/lib/ferret/search/phrase_scorer.rb +0 -152
  189. data/lib/ferret/search/prefix_query.rb +0 -54
  190. data/lib/ferret/search/query.rb +0 -140
  191. data/lib/ferret/search/query_filter.rb +0 -51
  192. data/lib/ferret/search/range_filter.rb +0 -103
  193. data/lib/ferret/search/range_query.rb +0 -139
  194. data/lib/ferret/search/req_excl_scorer.rb +0 -125
  195. data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
  196. data/lib/ferret/search/score_doc.rb +0 -38
  197. data/lib/ferret/search/score_doc_comparator.rb +0 -114
  198. data/lib/ferret/search/scorer.rb +0 -91
  199. data/lib/ferret/search/similarity.rb +0 -278
  200. data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
  201. data/lib/ferret/search/sort.rb +0 -112
  202. data/lib/ferret/search/sort_comparator.rb +0 -60
  203. data/lib/ferret/search/sort_field.rb +0 -91
  204. data/lib/ferret/search/spans.rb +0 -12
  205. data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
  206. data/lib/ferret/search/spans/span_first_query.rb +0 -79
  207. data/lib/ferret/search/spans/span_near_query.rb +0 -108
  208. data/lib/ferret/search/spans/span_not_query.rb +0 -130
  209. data/lib/ferret/search/spans/span_or_query.rb +0 -176
  210. data/lib/ferret/search/spans/span_query.rb +0 -25
  211. data/lib/ferret/search/spans/span_scorer.rb +0 -74
  212. data/lib/ferret/search/spans/span_term_query.rb +0 -105
  213. data/lib/ferret/search/spans/span_weight.rb +0 -84
  214. data/lib/ferret/search/spans/spans_enum.rb +0 -44
  215. data/lib/ferret/search/term_query.rb +0 -128
  216. data/lib/ferret/search/term_scorer.rb +0 -183
  217. data/lib/ferret/search/top_docs.rb +0 -36
  218. data/lib/ferret/search/top_field_docs.rb +0 -17
  219. data/lib/ferret/search/weight.rb +0 -54
  220. data/lib/ferret/search/wildcard_query.rb +0 -26
  221. data/lib/ferret/search/wildcard_term_enum.rb +0 -61
  222. data/lib/ferret/stemmers.rb +0 -1
  223. data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
  224. data/lib/ferret/store.rb +0 -5
  225. data/lib/ferret/store/buffered_index_io.rb +0 -190
  226. data/lib/ferret/store/directory.rb +0 -141
  227. data/lib/ferret/store/fs_store.rb +0 -381
  228. data/lib/ferret/store/index_io.rb +0 -245
  229. data/lib/ferret/store/ram_store.rb +0 -286
  230. data/lib/ferret/utils.rb +0 -8
  231. data/lib/ferret/utils/bit_vector.rb +0 -123
  232. data/lib/ferret/utils/date_tools.rb +0 -138
  233. data/lib/ferret/utils/number_tools.rb +0 -91
  234. data/lib/ferret/utils/parameter.rb +0 -41
  235. data/lib/ferret/utils/priority_queue.rb +0 -120
  236. data/lib/ferret/utils/string_helper.rb +0 -47
  237. data/lib/ferret/utils/thread_local.rb +0 -28
  238. data/lib/ferret/utils/weak_key_hash.rb +0 -60
  239. data/lib/rferret.rb +0 -37
  240. data/rake_utils/code_statistics.rb +0 -106
  241. data/test/benchmark/tb_ram_store.rb +0 -76
  242. data/test/benchmark/tb_rw_vint.rb +0 -26
  243. data/test/functional/thread_safety_index_test.rb +0 -81
  244. data/test/functional/thread_safety_test.rb +0 -137
  245. data/test/longrunning/tc_numbertools.rb +0 -60
  246. data/test/longrunning/tm_store.rb +0 -19
  247. data/test/unit/analysis/ctc_analyzer.rb +0 -532
  248. data/test/unit/analysis/data/wordfile +0 -6
  249. data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
  250. data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
  251. data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
  252. data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
  253. data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
  254. data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
  255. data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
  256. data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
  257. data/test/unit/analysis/rtc_stop_filter.rb +0 -14
  258. data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
  259. data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
  260. data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
  261. data/test/unit/analysis/tc_token.rb +0 -25
  262. data/test/unit/document/rtc_field.rb +0 -28
  263. data/test/unit/document/tc_document.rb +0 -47
  264. data/test/unit/document/tc_field.rb +0 -98
  265. data/test/unit/index/rtc_compound_file_io.rb +0 -107
  266. data/test/unit/index/rtc_field_infos.rb +0 -127
  267. data/test/unit/index/rtc_fields_io.rb +0 -167
  268. data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
  269. data/test/unit/index/rtc_segment_infos.rb +0 -74
  270. data/test/unit/index/rtc_segment_term_docs.rb +0 -17
  271. data/test/unit/index/rtc_segment_term_enum.rb +0 -60
  272. data/test/unit/index/rtc_segment_term_vector.rb +0 -71
  273. data/test/unit/index/rtc_term_buffer.rb +0 -57
  274. data/test/unit/index/rtc_term_info.rb +0 -19
  275. data/test/unit/index/rtc_term_infos_io.rb +0 -192
  276. data/test/unit/index/rtc_term_vectors_io.rb +0 -108
  277. data/test/unit/index/tc_term.rb +0 -27
  278. data/test/unit/index/tc_term_voi.rb +0 -18
  279. data/test/unit/search/rtc_similarity.rb +0 -37
  280. data/test/unit/search/rtc_sort_field.rb +0 -14
  281. data/test/unit/search/tc_multi_searcher2.rb +0 -126
  282. data/test/unit/store/rtc_fs_store.rb +0 -62
  283. data/test/unit/store/rtc_ram_store.rb +0 -15
  284. data/test/unit/store/rtm_store.rb +0 -150
  285. data/test/unit/store/rtm_store_lock.rb +0 -2
  286. data/test/unit/ts_document.rb +0 -2
  287. data/test/unit/utils/rtc_bit_vector.rb +0 -73
  288. data/test/unit/utils/rtc_date_tools.rb +0 -50
  289. data/test/unit/utils/rtc_number_tools.rb +0 -59
  290. data/test/unit/utils/rtc_parameter.rb +0 -40
  291. data/test/unit/utils/rtc_priority_queue.rb +0 -62
  292. data/test/unit/utils/rtc_string_helper.rb +0 -21
  293. data/test/unit/utils/rtc_thread.rb +0 -61
  294. data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
  295. data/test/utils/number_to_spoken.rb +0 -132
data/ext/bitvector.h CHANGED
@@ -1,29 +1,271 @@
1
1
  #ifndef FRT_BIT_VECTOR_H
2
2
  #define FRT_BIT_VECTOR_H
3
3
 
4
- #include <global.h>
5
- #include <store.h>
6
-
7
- #define BV_INIT_CAPA 256
8
- typedef struct BitVector {
9
- uchar *bits;
10
- int size;
11
- int capa;
12
- int count;
13
- int curr_bit;
4
+ #include "global.h"
5
+
6
+ #define BV_INIT_CAPA 256
7
+ typedef struct BitVector
8
+ {
9
+ /** The bits are held in an array of 32-bit integers */
10
+ f_u32 *bits;
11
+
12
+ /** size is equal to 1 + the highest order bit set */
13
+ int size;
14
+
15
+ /** capa is the number of words (U32) allocated for the bits */
16
+ int capa;
17
+
18
+ /** count is the running count of bits set. This is kept up to date by
19
+ *bv_set and bv_unset. You can reset this value by calling bv_recount */
20
+ int count;
21
+
22
+ /** curr_bit is used by scan_next to record the previously scanned bit */
23
+ int curr_bit;
24
+
25
+ bool extends_as_ones : 1;
14
26
  } BitVector;
15
27
 
16
- BitVector *bv_create();;
17
- BitVector *bv_create_size(int size);
18
- void bv_destroy(BitVector *bv);
19
- void bv_set(BitVector *bv, int bit);
20
- int bv_get(BitVector *bv, int bit);
21
- void bv_clear(BitVector *bv);
22
- void bv_unset(BitVector *bv, int bit);
23
- void bv_write(BitVector *bv, Store *store, char *name);
24
- BitVector *bv_read(Store *store, char *name);
25
- void bv_scan_reset(BitVector *bv);
26
- int bv_scan_next(BitVector *bv);
27
- int bv_scan_next_from(BitVector *bv, register const int from);
28
+ /**
29
+ * Create a new BitVector with a capacity of +BV_INIT_CAPA+. Note that the
30
+ * BitVector is growable and will adjust it's capacity when you use bv_set.
31
+ *
32
+ * @return BitVector with a capacity of +BV_INIT_CAPA+.
33
+ */
34
+ extern BitVector *bv_new();
35
+
36
+ /**
37
+ * Create a new BitVector with a capacity of +capa+. Note that the BitVector
38
+ * is growable and will adjust it's capacity when you use bv_set.
39
+ *
40
+ * @param capa the initial capacity of the BitVector
41
+ * @return BitVector with a capacity of +capa+.
42
+ */
43
+ extern BitVector *bv_new_capa(int capa);
44
+
45
+ /**
46
+ * Destroy a BitVector, freeing all memory allocated to that BitVector
47
+ *
48
+ * @param bv BitVector to destroy
49
+ */
50
+ extern void bv_destroy(BitVector *bv);
51
+
52
+ /**
53
+ * Set the bit at position +index+. If +index+ is outside of the range of the
54
+ * BitVector, that is >= BitVector.size, BitVector.size will be set to +index+
55
+ * + 1. If it is greater than the capacity of the BitVector, the capacity will
56
+ * be expanded to accomodate.
57
+ *
58
+ * @param bv the BitVector to set the bit in
59
+ * @param index the index of the bit to set
60
+ */
61
+ extern void bv_set(BitVector *bv, int index);
62
+
63
+ /**
64
+ * Unsafely set the bit at position +index+. If you choose to use this
65
+ * function you must create the BitVector with a large enough capacity to
66
+ * accomodate all of the bv_set_fast operations. You must also set bits in
67
+ * order and only one time per bit. Otherwise, use the safe bv_set function.
68
+ *
69
+ * So this is ok;
70
+ * <pre>
71
+ * BitVector *bv = bv_new_capa(1000);
72
+ * bv_set_fast(bv, 900);
73
+ * bv_set_fast(bv, 920);
74
+ * bv_set_fast(bv, 999);
75
+ * </pre>
76
+ *
77
+ * While these are not ok;
78
+ * <pre>
79
+ * BitVector *bv = bv_new_capa(90);
80
+ * bv_set_fast(bv, 80);
81
+ * bv_set_fast(bv, 79); // <= Bad: Out of Order
82
+ * bv_set_fast(bv, 80); // <= Bad: Already set
83
+ * bv_set_fast(bv, 90); // <= Bad: Out of Range. index must be < capa
84
+ * </pre>
85
+ *
86
+ * @param bv the BitVector to set the bit in
87
+ * @param index the index of the bit to set
88
+ */
89
+ extern void bv_set_fast(BitVector *bv, int bit);
90
+
91
+ /**
92
+ * Return 1 if the bit at +index+ was set or 0 otherwise. If +index+ is out of
93
+ * range, that is greater then the BitVectors capacity, it will also return 0.
94
+ *
95
+ * @param bv the BitVector to check in
96
+ * @param index the index of the bit to check
97
+ * @return 1 if the bit was set, 0 otherwise
98
+ */
99
+ extern int bv_get(BitVector *bv, int index);
100
+
101
+ /**
102
+ * Unset the bit at position +index+. If the +index+ was out of range, that is
103
+ * greater than the BitVectors capacity then do nothing. (bv_get will return 0
104
+ * in this case anyway).
105
+ *
106
+ * @param bv the BitVector to unset the bit in
107
+ * @param index the index of the bit to unset
108
+ */
109
+ extern void bv_unset(BitVector *bv, int bit);
110
+
111
+ /**
112
+ * Clear all set bits. This function will set all set bits to 0.
113
+ *
114
+ * @param bv the BitVector to clear
115
+ */
116
+ extern void bv_clear(BitVector *bv);
117
+
118
+ /**
119
+ * Resets the set bit count by running through the whole BitVector and
120
+ * counting all set bits. A running count of the bits is kept by bv_set,
121
+ *bv_get and bv_set_fast so this function is only necessary if the count could
122
+ * have been corrupted somehow or if the BitVector has been constructed in a
123
+ * different way (for example being read from the file_system).
124
+ *
125
+ * @param bv the BitVector to count the bits in
126
+ * @return the number of set bits in the BitVector. BitVector.count is also
127
+ * set
128
+ */
129
+ extern int bv_recount(BitVector *bv);
130
+
131
+ /**
132
+ * Reset the BitVector for scanning. This function should be called before
133
+ * using bv_scan_next to scan through all set bits in the BitVector. This is
134
+ * not necessary when using bv_scan_next_from.
135
+ *
136
+ * @param bv the BitVector to reset for scanning
137
+ */
138
+ extern void bv_scan_reset(BitVector *bv);
139
+
140
+ /**
141
+ * Scan the BitVector for the next set bit. Before using this function you
142
+ * should reset the BitVector for scanning using +bv_scan_reset+. You can the
143
+ * repeated call bv_scan_next to get each set bit until it finally returns
144
+ * -1.
145
+ *
146
+ * @param bv the BitVector to scan
147
+ * @return the next set bits index or -1 if no more bits are set
148
+ */
149
+ extern int bv_scan_next(BitVector *bv);
150
+
151
+ /**
152
+ * Scan the BitVector for the next set bit after +from+. If no more bits are
153
+ * set then return -1, otherwise return the index of teh next set bit.
154
+ *
155
+ * @param bv the BitVector to scan
156
+ * @return the next set bit's index or -1 if no more bits are set
157
+ */
158
+
159
+ extern int bv_scan_next_from(BitVector *bv, register const int from);
160
+ /**
161
+ * Scan the BitVector for the next unset bit. Before using this function you
162
+ * should reset the BitVector for scanning using +bv_scan_reset+. You can the
163
+ * repeated call bv_scan_next to get each unset bit until it finally returns
164
+ * -1.
165
+ *
166
+ * @param bv the BitVector to scan
167
+ * @return the next unset bits index or -1 if no more bits are unset
168
+ */
169
+ extern int bv_scan_next_unset(BitVector *bv);
170
+
171
+ /**
172
+ * Scan the BitVector for the next unset bit after +from+. If no more bits are
173
+ * unset then return -1, otherwise return the index of teh next unset bit.
174
+ *
175
+ * @param bv the BitVector to scan
176
+ * @return the next unset bit's index or -1 if no more bits are unset
177
+ */
178
+ extern int bv_scan_next_unset_from(BitVector *bv, register const int from);
179
+
180
+ /**
181
+ * Check whether the two BitVectors have the same bits set.
182
+ *
183
+ * @param bv1 first BitVector to compare
184
+ * @param bv2 second BitVectors to compare
185
+ * @return true if bv1 == bv2
186
+ */
187
+ extern int bv_eq(BitVector *bv1, BitVector *bv2);
188
+
189
+ /**
190
+ * Determines a hash value for the BitVector
191
+ *
192
+ * @param bv the BitVector to hash
193
+ * @return A hash value for the BitVector
194
+ */
195
+ extern ulong bv_hash(BitVector *bv);
196
+
197
+ /**
198
+ * ANDs two BitVectors (+bv1+ and +bv2+) together and return the resultant
199
+ * BitVector
200
+ *
201
+ * @param bv1 first BitVector to AND
202
+ * @param bv2 second BitVector to AND
203
+ * @return A BitVector with all bits set that are set in both bv1 and bv2
204
+ */
205
+ extern BitVector *bv_and(BitVector *bv1, BitVector *bv2);
206
+
207
+ /**
208
+ * ORs two BitVectors (+bv1+ and +bv2+) together and return the resultant
209
+ * BitVector
210
+ *
211
+ * @param bv1 first BitVector to OR
212
+ * @param bv2 second BitVector to OR
213
+ * @return A BitVector with all bits set that are set in both bv1 and bv2
214
+ */
215
+ extern BitVector *bv_or(BitVector *bv1, BitVector *bv2);
216
+
217
+ /**
218
+ * XORs two BitVectors (+bv1+ and +bv2+) together and return the resultant
219
+ * BitVector
220
+ *
221
+ * @param bv1 first BitVector to XOR
222
+ * @param bv2 second BitVector to XOR
223
+ * @return A BitVector with all bits set that are equal in bv1 and bv2
224
+ */
225
+ extern BitVector *bv_xor(BitVector *bv1, BitVector *bv2);
226
+
227
+ /**
228
+ * Returns BitVector with all of +bv+'s bits flipped
229
+ *
230
+ * @param bv BitVector to flip
231
+ * @return A BitVector with all bits set that are set in both bv1 and bv2
232
+ */
233
+ extern BitVector *bv_not(BitVector *bv);
234
+
235
+ /**
236
+ * ANDs two BitVectors together +bv1+ and +bv2+ in place of +bv1+
237
+ *
238
+ * @param bv1 first BitVector to AND
239
+ * @param bv2 second BitVector to AND
240
+ * @return A BitVector
241
+ * @return bv1 with all bits set that where set in both bv1 and bv2
242
+ */
243
+ extern BitVector *bv_and_x(BitVector *bv1, BitVector *bv2);
244
+
245
+ /**
246
+ * ORs two BitVectors together
247
+ *
248
+ * @param bv1 first BitVector to OR
249
+ * @param bv2 second BitVector to OR
250
+ * @return bv1
251
+ */
252
+ extern BitVector *bv_or_x(BitVector *bv1, BitVector *bv2);
253
+
254
+ /**
255
+ * XORs two BitVectors together +bv1+ and +bv2+ in place of +bv1+
256
+ *
257
+ * @param bv1 first BitVector to XOR
258
+ * @param bv2 second BitVector to XOR
259
+ * @return bv1
260
+ */
261
+ extern BitVector *bv_xor_x(BitVector *bv1, BitVector *bv2);
262
+
263
+ /**
264
+ * Flips all bits in the BitVector +bv+
265
+ *
266
+ * @param bv BitVector to flip
267
+ * @return A +bv+ with all it's bits flipped
268
+ */
269
+ extern BitVector *bv_not_x(BitVector *bv);
28
270
 
29
271
  #endif
data/ext/compound_io.c CHANGED
@@ -1,15 +1,9 @@
1
1
  #include "index.h"
2
-
3
- static char * const ALREADY_CLOSED_MSG = "Already closed";
4
- static char * const STREAM_CLOSED_MSG = "Stream closed";
5
- static char * const MISSING_FILE_MSG = "No sub-file found";
6
- static char * const ALREADY_MERGED_MSG = "Already merged";
7
- static char * const REMAINDER_ERROR_MSG = "Non-zero remainder length after copying";
8
- static char * const FILE_OFFSET_MSG = "Difference in the output file offsets"
9
- " does not match the original file length";
10
- static char * const NO_FILES_TO_MERGE_MSG = "No Files to merge into the compound file";
2
+ #include "array.h"
11
3
 
12
4
  extern void store_destroy(Store *store);
5
+ extern InStream *is_new();
6
+ extern Store *store_new();
13
7
 
14
8
  /****************************************************************************
15
9
  *
@@ -18,232 +12,244 @@ extern void store_destroy(Store *store);
18
12
  ****************************************************************************/
19
13
 
20
14
  typedef struct FileEntry {
21
- int offset;
22
- int length;
15
+ off_t offset;
16
+ off_t length;
23
17
  } FileEntry;
24
18
 
25
- void cmpd_touch(Store *store, char *filename)
19
+ static void cmpd_touch(Store *store, char *file_name)
26
20
  {
27
- store->dir.cmpd->store->touch(store->dir.cmpd->store, filename);
21
+ store->dir.cmpd->store->touch(store->dir.cmpd->store, file_name);
28
22
  }
29
23
 
30
- int cmpd_exists(Store *store, char *filename)
24
+ static int cmpd_exists(Store *store, char *file_name)
31
25
  {
32
- if (h_get(store->dir.cmpd->entries, filename) != NULL) {
33
- return true;
34
- } else {
35
- return false;
36
- }
26
+ if (h_get(store->dir.cmpd->entries, file_name) != NULL) {
27
+ return true;
28
+ }
29
+ else {
30
+ return false;
31
+ }
37
32
  }
38
33
 
39
34
  /**
40
35
  * @throws UNSUPPORTED_ERROR
41
36
  */
42
- int cmpd_remove(Store *store, char *filename)
37
+ static int cmpd_remove(Store *store, char *file_name)
43
38
  {
44
- RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
45
- return 0;
39
+ (void)store;
40
+ (void)file_name;
41
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
42
+ return 0;
46
43
  }
47
44
 
48
45
  /**
49
46
  * @throws UNSUPPORTED_ERROR
50
47
  */
51
- int cmpd_rename(Store *store, char *from, char *to)
48
+ static void cmpd_rename(Store *store, char *from, char *to)
52
49
  {
53
- RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
54
- return 0;
50
+ (void)store;
51
+ (void)from;
52
+ (void)to;
53
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
55
54
  }
56
55
 
57
- int cmpd_count(Store *store)
56
+ static int cmpd_count(Store *store)
58
57
  {
59
- return store->dir.cmpd->entries->used;
58
+ return store->dir.cmpd->entries->size;
60
59
  }
61
60
 
62
61
  /**
63
62
  * @throws UNSUPPORTED_ERROR
64
63
  */
65
- void cmpd_clear(Store *store)
64
+ static void cmpd_clear(Store *store)
66
65
  {
67
- RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
66
+ (void)store;
67
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
68
68
  }
69
69
 
70
- void cmpd_close_i(Store *store)
70
+ static void cmpd_close_i(Store *store)
71
71
  {
72
- CompoundStore *cmpd = store->dir.cmpd;
73
- if (cmpd->stream == NULL)
74
- RAISE(IO_ERROR, ALREADY_CLOSED_MSG);
72
+ CompoundStore *cmpd = store->dir.cmpd;
73
+ if (cmpd->stream == NULL) {
74
+ RAISE(IO_ERROR, "Tried to close already closed compound store");
75
+ }
75
76
 
76
- h_destroy(cmpd->entries);
77
+ h_destroy(cmpd->entries);
77
78
 
78
- is_close(cmpd->stream);
79
- cmpd->stream = NULL;
80
- free(store->dir.cmpd);
81
- store_destroy(store);
79
+ is_close(cmpd->stream);
80
+ cmpd->stream = NULL;
81
+ free(store->dir.cmpd);
82
+ store_destroy(store);
82
83
  }
83
84
 
84
- int cmpd_length(Store *store, char *filename)
85
+ static off_t cmpd_length(Store *store, char *file_name)
85
86
  {
86
- FileEntry *fe = (FileEntry *)h_get(store->dir.cmpd->entries, filename);
87
- if (fe != NULL)
88
- return fe->length;
89
- else
90
- return 0;
87
+ FileEntry *fe = h_get(store->dir.cmpd->entries, file_name);
88
+ if (fe != NULL) {
89
+ return fe->length;
90
+ }
91
+ else {
92
+ return 0;
93
+ }
91
94
  }
92
95
 
93
- void cmpdi_seek_internal(InStream *is, int pos) {}
94
- void cmpdi_close_internal(InStream *is)
96
+ static void cmpdi_seek_i(InStream *is, off_t pos)
95
97
  {
96
- //is_close(is->d.cis->sub);
97
- free(is->d.cis);
98
+ (void)is;
99
+ (void)pos;
98
100
  }
99
101
 
100
- void cmpdi_clone_internal(InStream *is, InStream *new_is)
102
+ static void cmpdi_close_i(InStream *is)
101
103
  {
102
- CompoundInStream *cis = ALLOC(CompoundInStream);
103
- //cis->sub = is_clone(is->d.cis->sub);
104
- cis->sub = is->d.cis->sub;
105
- cis->offset = is->d.cis->offset;
106
- cis->length = is->d.cis->length;
107
- new_is->d.cis = cis;
104
+ free(is->d.cis);
108
105
  }
109
106
 
110
- int cmpdi_length_internal(InStream *is)
107
+ static off_t cmpdi_length_i(InStream *is)
111
108
  {
112
- return (is->d.cis->length);
109
+ return (is->d.cis->length);
113
110
  }
114
111
 
115
112
  /*
116
113
  * raises: EOF_ERROR
117
114
  */
118
- void cmpdi_read_internal(InStream *is, uchar *b, int offset, int len)
115
+ static void cmpdi_read_i(InStream *is, uchar *b, int len)
119
116
  {
120
- CompoundInStream *cis = is->d.cis;
121
- int start = is_pos(is);
122
- if ((start + len) > cis->length)
123
- RAISE(EOF_ERROR, EOF_ERROR_MSG);
124
- is_seek(cis->sub, cis->offset + start);
125
- is_read_bytes(cis->sub, b, offset, len);
117
+ CompoundInStream *cis = is->d.cis;
118
+ off_t start = is_pos(is);
119
+
120
+ if ((start + len) > cis->length) {
121
+ RAISE(EOF_ERROR, "Tried to read past end of file. File length is "
122
+ "<%"F_OFF_T_PFX"d> and tried to read to <%"F_OFF_T_PFX"d>",
123
+ cis->length, start + len);
124
+ }
125
+
126
+ is_seek(cis->sub, cis->offset + start);
127
+ is_read_bytes(cis->sub, b, len);
126
128
  }
127
129
 
128
- InStream *cmpd_create_input(InStream *sub_is, int offset, int length)
130
+ static const struct InStreamMethods CMPD_IN_STREAM_METHODS = {
131
+ cmpdi_read_i,
132
+ cmpdi_seek_i,
133
+ cmpdi_length_i,
134
+ cmpdi_close_i
135
+ };
136
+
137
+ static InStream *cmpd_create_input(InStream *sub_is, off_t offset, off_t length)
129
138
  {
130
- InStream *is = is_create();
131
- CompoundInStream *cis = ALLOC(CompoundInStream);
132
- //cis->sub = is_clone(sub_is);
133
- cis->sub = sub_is;
134
- cis->offset = offset;
135
- cis->length = length;
136
- is->d.cis = cis;
137
- is->file = NULL;
138
-
139
- is->read_internal = &cmpdi_read_internal;
140
- is->seek_internal = &cmpdi_seek_internal;
141
- is->close_internal = &cmpdi_close_internal;
142
- is->clone_internal = &cmpdi_clone_internal;
143
- is->length_internal = &cmpdi_length_internal;
144
- return is;
139
+ InStream *is = is_new();
140
+ CompoundInStream *cis = ALLOC(CompoundInStream);
141
+
142
+ cis->sub = sub_is;
143
+ cis->offset = offset;
144
+ cis->length = length;
145
+ is->d.cis = cis;
146
+ is->m = &CMPD_IN_STREAM_METHODS;
147
+
148
+ return is;
145
149
  }
146
150
 
147
- InStream *cmpd_open_input(Store *store, const char *filename)
151
+ static InStream *cmpd_open_input(Store *store, const char *file_name)
148
152
  {
149
- FileEntry *entry;
150
- CompoundStore *cmpd = store->dir.cmpd;
151
- InStream *is;
153
+ FileEntry *entry;
154
+ CompoundStore *cmpd = store->dir.cmpd;
155
+ InStream *is;
156
+
157
+ mutex_lock(&store->mutex);
158
+ if (cmpd->stream == NULL) {
159
+ mutex_unlock(&store->mutex);
160
+ RAISE(IO_ERROR, "Can't open compound file input stream. Parent "
161
+ "stream is closed.");
162
+ }
152
163
 
153
- mutex_lock(&store->mutex);
154
- if (cmpd->stream == NULL) {
155
- mutex_unlock(&store->mutex);
156
- RAISE(IO_ERROR, STREAM_CLOSED_MSG);
157
- }
164
+ entry = h_get(cmpd->entries, file_name);
165
+ if (entry == NULL) {
166
+ mutex_unlock(&store->mutex);
167
+ RAISE(IO_ERROR, "File %s does not exist: ", file_name);
168
+ }
158
169
 
159
- entry = (FileEntry *)h_get(cmpd->entries, filename);
160
- if (entry == NULL) {
170
+ is = cmpd_create_input(cmpd->stream, entry->offset, entry->length);
161
171
  mutex_unlock(&store->mutex);
162
- RAISE(IO_ERROR, MISSING_FILE_MSG);
163
- }
164
-
165
- is = cmpd_create_input(cmpd->stream, entry->offset, entry->length);
166
- mutex_unlock(&store->mutex);
167
172
 
168
- return is;
173
+ return is;
169
174
  }
170
175
 
171
- OutStream *cmpd_create_output(Store *store, const char *filename)
176
+ static OutStream *cmpd_new_output(Store *store, const char *file_name)
172
177
  {
173
- RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
174
- return NULL;
178
+ (void)store;
179
+ (void)file_name;
180
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
181
+ return NULL;
175
182
  }
176
183
 
177
- Lock *cmpd_open_lock(Store *store, char *lockname)
184
+ static Lock *cmpd_open_lock(Store *store, char *lock_name)
178
185
  {
179
- RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
180
- return NULL;
186
+ (void)store;
187
+ (void)lock_name;
188
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
189
+ return NULL;
181
190
  }
182
191
 
183
- void cmpd_close_lock(Lock *lock)
192
+ static void cmpd_close_lock(Lock *lock)
184
193
  {
185
- RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
194
+ (void)lock;
195
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
186
196
  }
187
197
 
188
198
  Store *open_cmpd_store(Store *store, const char *name)
189
199
  {
190
- int count, i, offset;
191
- char *fname;
192
- FileEntry *entry;
193
- Store * volatile new_store = NULL;
194
- CompoundStore * volatile cmpd = NULL;
195
- InStream * volatile is = NULL;
196
-
197
- TRY
198
- new_store = store_create();
200
+ int count, i;
201
+ off_t offset;
202
+ char *fname;
203
+ FileEntry *entry;
204
+ Store *new_store = NULL;
205
+ CompoundStore *cmpd = NULL;
206
+ InStream *is = NULL;
207
+
208
+ new_store = store_new();
199
209
  cmpd = ALLOC(CompoundStore);
200
210
 
201
- cmpd->store = store;
202
- cmpd->name = name;
203
- cmpd->entries = h_new_str(&free, &free);
211
+ cmpd->store = store;
212
+ cmpd->name = name;
213
+ cmpd->entries = h_new_str(&free, &free);
204
214
  is = cmpd->stream = store->open_input(store, cmpd->name);
205
215
 
206
- // read the directory and init files
207
- count = (int)is_read_vint(is);
216
+ /* read the directory and init files */
217
+ count = is_read_vint(is);
208
218
  entry = NULL;
209
219
  for (i = 0; i < count; i++) {
210
- offset = (int)is_read_long(is);
211
- fname = is_read_string(is);
220
+ offset = (off_t)is_read_i64(is);
221
+ fname = is_read_string(is);
212
222
 
213
- if (entry != NULL) {
214
- // set length of the previous entry
215
- entry->length = offset - entry->offset;
216
- }
223
+ if (entry != NULL) {
224
+ /* set length of the previous entry */
225
+ entry->length = offset - entry->offset;
226
+ }
217
227
 
218
- entry = ALLOC(FileEntry);
219
- entry->offset = offset;
220
- h_set(cmpd->entries, fname, entry);
228
+ entry = ALLOC(FileEntry);
229
+ entry->offset = offset;
230
+ h_set(cmpd->entries, fname, entry);
221
231
  }
222
232
 
223
- // set the length of the final entry
224
- if (entry != NULL)
225
- entry->length = is_length(is) - entry->offset;
226
- XCATCHALL
227
- free(new_store);
228
- free(cmpd);
229
- if (is) is_close(is);
230
- XENDTRY
231
-
232
-
233
- new_store->dir.cmpd = cmpd;
234
- new_store->touch = &cmpd_touch;
235
- new_store->exists = &cmpd_exists;
236
- new_store->remove = &cmpd_remove;
237
- new_store->rename = &cmpd_rename;
238
- new_store->count = &cmpd_count;
239
- new_store->clear = &cmpd_clear;
240
- new_store->length = &cmpd_length;
241
- new_store->close_i = &cmpd_close_i;
242
- new_store->create_output = &cmpd_create_output;
243
- new_store->open_input = &cmpd_open_input;
244
- new_store->open_lock = &cmpd_open_lock;
245
- new_store->close_lock = &cmpd_close_lock;
246
- return new_store;
233
+ /* set the length of the final entry */
234
+ if (entry != NULL) {
235
+ entry->length = is_length(is) - entry->offset;
236
+ }
237
+
238
+ new_store->dir.cmpd = cmpd;
239
+ new_store->touch = &cmpd_touch;
240
+ new_store->exists = &cmpd_exists;
241
+ new_store->remove = &cmpd_remove;
242
+ new_store->rename = &cmpd_rename;
243
+ new_store->count = &cmpd_count;
244
+ new_store->clear = &cmpd_clear;
245
+ new_store->length = &cmpd_length;
246
+ new_store->close_i = &cmpd_close_i;
247
+ new_store->new_output = &cmpd_new_output;
248
+ new_store->open_input = &cmpd_open_input;
249
+ new_store->open_lock = &cmpd_open_lock;
250
+ new_store->close_lock = &cmpd_close_lock;
251
+
252
+ return new_store;
247
253
  }
248
254
 
249
255
  /****************************************************************************
@@ -252,123 +258,105 @@ Store *open_cmpd_store(Store *store, const char *name)
252
258
  *
253
259
  ****************************************************************************/
254
260
 
255
- typedef struct WFileEntry {
256
- char *name;
257
- int dir_offset;
258
- int data_offset;
259
- } WFileEntry;
260
-
261
- WFileEntry *wfe_create(char *name)
262
- {
263
- WFileEntry *wfe = ALLOC(WFileEntry);
264
- wfe->name = name;
265
- return wfe;
266
- }
267
-
268
261
  CompoundWriter *open_cw(Store *store, char *name)
269
262
  {
270
- CompoundWriter *cw = ALLOC(CompoundWriter);
271
- cw->store = store;
272
- cw->name = name;
273
- cw->ids = hs_str_create(NULL);
274
- cw->file_entries = ary_create(1, &free);
275
- cw->merged = false;
276
- return cw;
263
+ CompoundWriter *cw = ALLOC(CompoundWriter);
264
+ cw->store = store;
265
+ cw->name = name;
266
+ cw->ids = hs_new_str(&free);
267
+ cw->file_entries = ary_new_type_capa(CWFileEntry, CW_INIT_CAPA);
268
+ return cw;
277
269
  }
278
270
 
279
271
  void cw_add_file(CompoundWriter *cw, char *id)
280
272
  {
281
- if (cw->merged) RAISE(STATE_ERROR, ALREADY_MERGED_MSG);
282
- if (hs_add(cw->ids, id) != HASH_KEY_DOES_NOT_EXIST)
283
- RAISE(STATE_ERROR, ALREADY_MERGED_MSG);
273
+ id = estrdup(id);
274
+ if (hs_add(cw->ids, id) != HASH_KEY_DOES_NOT_EXIST) {
275
+ RAISE(IO_ERROR, "Tried to add file \"%s\" which has already been "
276
+ "added to the compound store", id);
277
+ }
284
278
 
285
- hs_add(cw->ids, id);
286
- ary_append(cw->file_entries, wfe_create(id));
279
+ ary_grow(cw->file_entries);
280
+ ary_last(cw->file_entries).name = id;
287
281
  }
288
282
 
289
- void cw_copy_file(CompoundWriter *cw, WFileEntry *src, OutStream *os)
283
+ static void cw_copy_file(CompoundWriter *cw, CWFileEntry *src, OutStream *os)
290
284
  {
291
- int start_ptr = os_pos(os);
292
- int end_ptr;
293
- int remainder, length, len;
294
- uchar buffer[BUFFER_SIZE];
295
-
296
- InStream *is = cw->store->open_input(cw->store, src->name);
297
-
298
- TRY
299
- remainder = length = is_length(is);
285
+ off_t start_ptr = os_pos(os);
286
+ off_t end_ptr;
287
+ off_t remainder, length, len;
288
+ uchar buffer[BUFFER_SIZE];
300
289
 
290
+ InStream *is = cw->store->open_input(cw->store, src->name);
291
+
292
+ remainder = length = is_length(is);
301
293
 
302
294
  while (remainder > 0) {
303
- len = MIN(remainder, BUFFER_SIZE);
304
- is_read_bytes(is, buffer, 0, len);
305
- os_write_bytes(os, buffer, len);
306
- remainder -= len;
295
+ len = MIN(remainder, BUFFER_SIZE);
296
+ is_read_bytes(is, buffer, len);
297
+ os_write_bytes(os, buffer, len);
298
+ remainder -= len;
307
299
  }
308
300
 
309
- // Verify that remainder is 0
310
- if (remainder != 0)
311
- RAISE(IO_ERROR, REMAINDER_ERROR_MSG);
301
+ /* Verify that remainder is 0 */
302
+ if (remainder != 0) {
303
+ RAISE(IO_ERROR, "There seems to be an error in the compound file "
304
+ "should have read to the end but there are <%"F_OFF_T_PFX"d> "
305
+ "bytes left", remainder);
306
+ }
312
307
 
313
- // Verify that the output length diff is equal to original file
308
+ /* Verify that the output length diff is equal to original file */
314
309
  end_ptr = os_pos(os);
315
310
  len = end_ptr - start_ptr;
316
- if (len != length)
317
- RAISE(IO_ERROR, FILE_OFFSET_MSG);
311
+ if (len != length) {
312
+ RAISE(IO_ERROR, "Difference in compound file output file offsets "
313
+ "<%"F_OFF_T_PFX"d> does not match the original file lenght "
314
+ "<%"F_OFF_T_PFX"d>", len, length);
315
+ }
318
316
 
319
- XFINALLY
320
317
  is_close(is);
321
- XENDTRY
322
318
  }
323
319
 
324
320
  void cw_close(CompoundWriter *cw)
325
321
  {
326
- OutStream * volatile os = NULL;
327
- int i;
328
- WFileEntry *wfe;
322
+ OutStream *os = NULL;
323
+ int i;
329
324
 
330
- if (cw->merged) RAISE(STATE_ERROR, ALREADY_MERGED_MSG);
331
- if (cw->ids->size <= 0)
332
- RAISE(STATE_ERROR, NO_FILES_TO_MERGE_MSG);
325
+ if (cw->ids->size <= 0) {
326
+ RAISE(STATE_ERROR, "Tried to merge compound file with no entries");
327
+ }
333
328
 
334
- cw->merged = true;
329
+ os = cw->store->new_output(cw->store, cw->name);
335
330
 
336
- TRY
337
- os = cw->store->create_output(cw->store, cw->name);
338
- os_write_vint(os, cw->file_entries->size);
331
+ os_write_vint(os, ary_size(cw->file_entries));
339
332
 
340
333
  /* Write the directory with all offsets at 0.
341
334
  * Remember the positions of directory entries so that we can adjust the
342
335
  * offsets later */
343
-
344
- for (i = 0; i < cw->file_entries->size; i++) {
345
- wfe = (WFileEntry *)cw->file_entries->elems[i];
346
- wfe->dir_offset = os_pos(os);
347
- os_write_long(os, 0); // for now
348
- os_write_string(os, wfe->name);
336
+ for (i = 0; i < ary_size(cw->file_entries); i++) {
337
+ cw->file_entries[i].dir_offset = os_pos(os);
338
+ os_write_u64(os, 0); /* for now */
339
+ os_write_string(os, cw->file_entries[i].name);
349
340
  }
350
341
 
351
342
  /* Open the files and copy their data into the stream. Remember the
352
343
  * locations of each file's data section. */
353
- for (i = 0; i < cw->file_entries->size; i++) {
354
- wfe = (WFileEntry *)cw->file_entries->elems[i];
355
- wfe->data_offset = os_pos(os);
356
- cw_copy_file(cw, wfe, os);
344
+ for (i = 0; i < ary_size(cw->file_entries); i++) {
345
+ cw->file_entries[i].data_offset = os_pos(os);
346
+ cw_copy_file(cw, &cw->file_entries[i], os);
357
347
  }
358
348
 
359
349
  /* Write the data offsets into the directory of the compound stream */
360
- for (i = 0; i < cw->file_entries->size; i++) {
361
- wfe = (WFileEntry *)cw->file_entries->elems[i];
362
- os_seek(os, wfe->dir_offset);
363
- os_write_long(os, wfe->data_offset);
350
+ for (i = 0; i < ary_size(cw->file_entries); i++) {
351
+ os_seek(os, cw->file_entries[i].dir_offset);
352
+ os_write_u64(os, cw->file_entries[i].data_offset);
353
+ }
354
+
355
+ if (os) {
356
+ os_close(os);
364
357
  }
365
358
 
366
- XFINALLY
367
- if (os) os_close(os);
368
359
  hs_destroy(cw->ids);
369
- ary_destroy(cw->file_entries);
360
+ ary_free(cw->file_entries);
370
361
  free(cw);
371
- break;
372
- XENDTRY
373
362
  }
374
-