ferret 0.9.6 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (295) hide show
  1. data/MIT-LICENSE +1 -1
  2. data/README +12 -24
  3. data/Rakefile +38 -54
  4. data/TODO +14 -17
  5. data/ext/analysis.c +982 -823
  6. data/ext/analysis.h +133 -76
  7. data/ext/array.c +96 -58
  8. data/ext/array.h +40 -13
  9. data/ext/bitvector.c +476 -118
  10. data/ext/bitvector.h +264 -22
  11. data/ext/compound_io.c +217 -229
  12. data/ext/defines.h +49 -0
  13. data/ext/document.c +107 -317
  14. data/ext/document.h +31 -65
  15. data/ext/except.c +81 -36
  16. data/ext/except.h +117 -55
  17. data/ext/extconf.rb +2 -9
  18. data/ext/ferret.c +211 -104
  19. data/ext/ferret.h +22 -11
  20. data/ext/filter.c +97 -82
  21. data/ext/fs_store.c +348 -367
  22. data/ext/global.c +226 -188
  23. data/ext/global.h +44 -26
  24. data/ext/hash.c +474 -391
  25. data/ext/hash.h +441 -68
  26. data/ext/hashset.c +124 -96
  27. data/ext/hashset.h +169 -20
  28. data/ext/helper.c +56 -5
  29. data/ext/helper.h +7 -0
  30. data/ext/inc/lang.h +29 -49
  31. data/ext/inc/threading.h +31 -0
  32. data/ext/ind.c +288 -278
  33. data/ext/ind.h +68 -0
  34. data/ext/index.c +5688 -0
  35. data/ext/index.h +663 -616
  36. data/ext/lang.h +29 -49
  37. data/ext/libstemmer.c +3 -3
  38. data/ext/mem_pool.c +84 -0
  39. data/ext/mem_pool.h +35 -0
  40. data/ext/posh.c +1006 -0
  41. data/ext/posh.h +1007 -0
  42. data/ext/priorityqueue.c +117 -194
  43. data/ext/priorityqueue.h +135 -39
  44. data/ext/q_boolean.c +1305 -1108
  45. data/ext/q_const_score.c +106 -93
  46. data/ext/q_filtered_query.c +138 -135
  47. data/ext/q_fuzzy.c +206 -242
  48. data/ext/q_match_all.c +94 -80
  49. data/ext/q_multi_term.c +663 -0
  50. data/ext/q_parser.c +667 -593
  51. data/ext/q_phrase.c +992 -555
  52. data/ext/q_prefix.c +72 -61
  53. data/ext/q_range.c +235 -210
  54. data/ext/q_span.c +1480 -1166
  55. data/ext/q_term.c +273 -246
  56. data/ext/q_wildcard.c +127 -114
  57. data/ext/r_analysis.c +1720 -711
  58. data/ext/r_index.c +3049 -0
  59. data/ext/r_qparser.c +433 -146
  60. data/ext/r_search.c +2934 -1993
  61. data/ext/r_store.c +372 -143
  62. data/ext/r_utils.c +941 -0
  63. data/ext/ram_store.c +330 -326
  64. data/ext/search.c +1291 -668
  65. data/ext/search.h +403 -702
  66. data/ext/similarity.c +91 -113
  67. data/ext/similarity.h +45 -30
  68. data/ext/sort.c +721 -484
  69. data/ext/stopwords.c +361 -273
  70. data/ext/store.c +556 -58
  71. data/ext/store.h +706 -126
  72. data/ext/tags +3578 -2780
  73. data/ext/term_vectors.c +352 -0
  74. data/ext/threading.h +31 -0
  75. data/ext/win32.h +54 -0
  76. data/lib/ferret.rb +5 -17
  77. data/lib/ferret/document.rb +130 -2
  78. data/lib/ferret/index.rb +577 -26
  79. data/lib/ferret/number_tools.rb +157 -0
  80. data/lib/ferret_version.rb +3 -0
  81. data/test/test_helper.rb +5 -13
  82. data/test/unit/analysis/tc_analyzer.rb +513 -1
  83. data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
  84. data/test/unit/index/tc_index.rb +183 -240
  85. data/test/unit/index/tc_index_reader.rb +312 -479
  86. data/test/unit/index/tc_index_writer.rb +397 -13
  87. data/test/unit/index/th_doc.rb +269 -206
  88. data/test/unit/query_parser/tc_query_parser.rb +40 -33
  89. data/test/unit/search/tc_filter.rb +59 -71
  90. data/test/unit/search/tc_fuzzy_query.rb +24 -16
  91. data/test/unit/search/tc_index_searcher.rb +23 -201
  92. data/test/unit/search/tc_multi_searcher.rb +78 -226
  93. data/test/unit/search/tc_search_and_sort.rb +93 -81
  94. data/test/unit/search/tc_sort.rb +23 -23
  95. data/test/unit/search/tc_sort_field.rb +7 -7
  96. data/test/unit/search/tc_spans.rb +51 -47
  97. data/test/unit/search/tm_searcher.rb +339 -0
  98. data/test/unit/store/tc_fs_store.rb +1 -1
  99. data/test/unit/store/tm_store_lock.rb +3 -3
  100. data/test/unit/tc_document.rb +81 -0
  101. data/test/unit/ts_analysis.rb +1 -1
  102. data/test/unit/ts_utils.rb +1 -1
  103. data/test/unit/utils/tc_bit_vector.rb +288 -0
  104. data/test/unit/utils/tc_number_tools.rb +117 -0
  105. data/test/unit/utils/tc_priority_queue.rb +106 -0
  106. metadata +140 -301
  107. data/CHANGELOG +0 -9
  108. data/ext/dummy.exe +0 -0
  109. data/ext/field.c +0 -408
  110. data/ext/frtio.h +0 -13
  111. data/ext/inc/except.h +0 -90
  112. data/ext/index_io.c +0 -382
  113. data/ext/index_rw.c +0 -2658
  114. data/ext/lang.c +0 -41
  115. data/ext/nix_io.c +0 -134
  116. data/ext/q_multi_phrase.c +0 -380
  117. data/ext/r_doc.c +0 -582
  118. data/ext/r_index_io.c +0 -1021
  119. data/ext/r_term.c +0 -219
  120. data/ext/term.c +0 -820
  121. data/ext/termdocs.c +0 -611
  122. data/ext/vector.c +0 -637
  123. data/ext/w32_io.c +0 -150
  124. data/lib/ferret/analysis.rb +0 -11
  125. data/lib/ferret/analysis/analyzers.rb +0 -112
  126. data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
  127. data/lib/ferret/analysis/token.rb +0 -100
  128. data/lib/ferret/analysis/token_filters.rb +0 -86
  129. data/lib/ferret/analysis/token_stream.rb +0 -26
  130. data/lib/ferret/analysis/tokenizers.rb +0 -112
  131. data/lib/ferret/analysis/word_list_loader.rb +0 -27
  132. data/lib/ferret/document/document.rb +0 -152
  133. data/lib/ferret/document/field.rb +0 -312
  134. data/lib/ferret/index/compound_file_io.rb +0 -338
  135. data/lib/ferret/index/document_writer.rb +0 -289
  136. data/lib/ferret/index/field_infos.rb +0 -279
  137. data/lib/ferret/index/fields_io.rb +0 -181
  138. data/lib/ferret/index/index.rb +0 -675
  139. data/lib/ferret/index/index_file_names.rb +0 -33
  140. data/lib/ferret/index/index_reader.rb +0 -503
  141. data/lib/ferret/index/index_writer.rb +0 -534
  142. data/lib/ferret/index/multi_reader.rb +0 -377
  143. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
  144. data/lib/ferret/index/segment_infos.rb +0 -130
  145. data/lib/ferret/index/segment_merge_info.rb +0 -49
  146. data/lib/ferret/index/segment_merge_queue.rb +0 -16
  147. data/lib/ferret/index/segment_merger.rb +0 -358
  148. data/lib/ferret/index/segment_reader.rb +0 -412
  149. data/lib/ferret/index/segment_term_enum.rb +0 -169
  150. data/lib/ferret/index/segment_term_vector.rb +0 -58
  151. data/lib/ferret/index/term.rb +0 -53
  152. data/lib/ferret/index/term_buffer.rb +0 -83
  153. data/lib/ferret/index/term_doc_enum.rb +0 -291
  154. data/lib/ferret/index/term_enum.rb +0 -52
  155. data/lib/ferret/index/term_info.rb +0 -37
  156. data/lib/ferret/index/term_infos_io.rb +0 -321
  157. data/lib/ferret/index/term_vector_offset_info.rb +0 -20
  158. data/lib/ferret/index/term_vectors_io.rb +0 -553
  159. data/lib/ferret/query_parser.rb +0 -312
  160. data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
  161. data/lib/ferret/search.rb +0 -50
  162. data/lib/ferret/search/boolean_clause.rb +0 -100
  163. data/lib/ferret/search/boolean_query.rb +0 -299
  164. data/lib/ferret/search/boolean_scorer.rb +0 -294
  165. data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
  166. data/lib/ferret/search/conjunction_scorer.rb +0 -99
  167. data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
  168. data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
  169. data/lib/ferret/search/explanation.rb +0 -41
  170. data/lib/ferret/search/field_cache.rb +0 -215
  171. data/lib/ferret/search/field_doc.rb +0 -31
  172. data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
  173. data/lib/ferret/search/filter.rb +0 -11
  174. data/lib/ferret/search/filtered_query.rb +0 -130
  175. data/lib/ferret/search/filtered_term_enum.rb +0 -79
  176. data/lib/ferret/search/fuzzy_query.rb +0 -154
  177. data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
  178. data/lib/ferret/search/hit_collector.rb +0 -34
  179. data/lib/ferret/search/hit_queue.rb +0 -11
  180. data/lib/ferret/search/index_searcher.rb +0 -200
  181. data/lib/ferret/search/match_all_query.rb +0 -104
  182. data/lib/ferret/search/multi_phrase_query.rb +0 -216
  183. data/lib/ferret/search/multi_searcher.rb +0 -261
  184. data/lib/ferret/search/multi_term_query.rb +0 -65
  185. data/lib/ferret/search/non_matching_scorer.rb +0 -22
  186. data/lib/ferret/search/phrase_positions.rb +0 -55
  187. data/lib/ferret/search/phrase_query.rb +0 -214
  188. data/lib/ferret/search/phrase_scorer.rb +0 -152
  189. data/lib/ferret/search/prefix_query.rb +0 -54
  190. data/lib/ferret/search/query.rb +0 -140
  191. data/lib/ferret/search/query_filter.rb +0 -51
  192. data/lib/ferret/search/range_filter.rb +0 -103
  193. data/lib/ferret/search/range_query.rb +0 -139
  194. data/lib/ferret/search/req_excl_scorer.rb +0 -125
  195. data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
  196. data/lib/ferret/search/score_doc.rb +0 -38
  197. data/lib/ferret/search/score_doc_comparator.rb +0 -114
  198. data/lib/ferret/search/scorer.rb +0 -91
  199. data/lib/ferret/search/similarity.rb +0 -278
  200. data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
  201. data/lib/ferret/search/sort.rb +0 -112
  202. data/lib/ferret/search/sort_comparator.rb +0 -60
  203. data/lib/ferret/search/sort_field.rb +0 -91
  204. data/lib/ferret/search/spans.rb +0 -12
  205. data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
  206. data/lib/ferret/search/spans/span_first_query.rb +0 -79
  207. data/lib/ferret/search/spans/span_near_query.rb +0 -108
  208. data/lib/ferret/search/spans/span_not_query.rb +0 -130
  209. data/lib/ferret/search/spans/span_or_query.rb +0 -176
  210. data/lib/ferret/search/spans/span_query.rb +0 -25
  211. data/lib/ferret/search/spans/span_scorer.rb +0 -74
  212. data/lib/ferret/search/spans/span_term_query.rb +0 -105
  213. data/lib/ferret/search/spans/span_weight.rb +0 -84
  214. data/lib/ferret/search/spans/spans_enum.rb +0 -44
  215. data/lib/ferret/search/term_query.rb +0 -128
  216. data/lib/ferret/search/term_scorer.rb +0 -183
  217. data/lib/ferret/search/top_docs.rb +0 -36
  218. data/lib/ferret/search/top_field_docs.rb +0 -17
  219. data/lib/ferret/search/weight.rb +0 -54
  220. data/lib/ferret/search/wildcard_query.rb +0 -26
  221. data/lib/ferret/search/wildcard_term_enum.rb +0 -61
  222. data/lib/ferret/stemmers.rb +0 -1
  223. data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
  224. data/lib/ferret/store.rb +0 -5
  225. data/lib/ferret/store/buffered_index_io.rb +0 -190
  226. data/lib/ferret/store/directory.rb +0 -141
  227. data/lib/ferret/store/fs_store.rb +0 -381
  228. data/lib/ferret/store/index_io.rb +0 -245
  229. data/lib/ferret/store/ram_store.rb +0 -286
  230. data/lib/ferret/utils.rb +0 -8
  231. data/lib/ferret/utils/bit_vector.rb +0 -123
  232. data/lib/ferret/utils/date_tools.rb +0 -138
  233. data/lib/ferret/utils/number_tools.rb +0 -91
  234. data/lib/ferret/utils/parameter.rb +0 -41
  235. data/lib/ferret/utils/priority_queue.rb +0 -120
  236. data/lib/ferret/utils/string_helper.rb +0 -47
  237. data/lib/ferret/utils/thread_local.rb +0 -28
  238. data/lib/ferret/utils/weak_key_hash.rb +0 -60
  239. data/lib/rferret.rb +0 -37
  240. data/rake_utils/code_statistics.rb +0 -106
  241. data/test/benchmark/tb_ram_store.rb +0 -76
  242. data/test/benchmark/tb_rw_vint.rb +0 -26
  243. data/test/functional/thread_safety_index_test.rb +0 -81
  244. data/test/functional/thread_safety_test.rb +0 -137
  245. data/test/longrunning/tc_numbertools.rb +0 -60
  246. data/test/longrunning/tm_store.rb +0 -19
  247. data/test/unit/analysis/ctc_analyzer.rb +0 -532
  248. data/test/unit/analysis/data/wordfile +0 -6
  249. data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
  250. data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
  251. data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
  252. data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
  253. data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
  254. data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
  255. data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
  256. data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
  257. data/test/unit/analysis/rtc_stop_filter.rb +0 -14
  258. data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
  259. data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
  260. data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
  261. data/test/unit/analysis/tc_token.rb +0 -25
  262. data/test/unit/document/rtc_field.rb +0 -28
  263. data/test/unit/document/tc_document.rb +0 -47
  264. data/test/unit/document/tc_field.rb +0 -98
  265. data/test/unit/index/rtc_compound_file_io.rb +0 -107
  266. data/test/unit/index/rtc_field_infos.rb +0 -127
  267. data/test/unit/index/rtc_fields_io.rb +0 -167
  268. data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
  269. data/test/unit/index/rtc_segment_infos.rb +0 -74
  270. data/test/unit/index/rtc_segment_term_docs.rb +0 -17
  271. data/test/unit/index/rtc_segment_term_enum.rb +0 -60
  272. data/test/unit/index/rtc_segment_term_vector.rb +0 -71
  273. data/test/unit/index/rtc_term_buffer.rb +0 -57
  274. data/test/unit/index/rtc_term_info.rb +0 -19
  275. data/test/unit/index/rtc_term_infos_io.rb +0 -192
  276. data/test/unit/index/rtc_term_vectors_io.rb +0 -108
  277. data/test/unit/index/tc_term.rb +0 -27
  278. data/test/unit/index/tc_term_voi.rb +0 -18
  279. data/test/unit/search/rtc_similarity.rb +0 -37
  280. data/test/unit/search/rtc_sort_field.rb +0 -14
  281. data/test/unit/search/tc_multi_searcher2.rb +0 -126
  282. data/test/unit/store/rtc_fs_store.rb +0 -62
  283. data/test/unit/store/rtc_ram_store.rb +0 -15
  284. data/test/unit/store/rtm_store.rb +0 -150
  285. data/test/unit/store/rtm_store_lock.rb +0 -2
  286. data/test/unit/ts_document.rb +0 -2
  287. data/test/unit/utils/rtc_bit_vector.rb +0 -73
  288. data/test/unit/utils/rtc_date_tools.rb +0 -50
  289. data/test/unit/utils/rtc_number_tools.rb +0 -59
  290. data/test/unit/utils/rtc_parameter.rb +0 -40
  291. data/test/unit/utils/rtc_priority_queue.rb +0 -62
  292. data/test/unit/utils/rtc_string_helper.rb +0 -21
  293. data/test/unit/utils/rtc_thread.rb +0 -61
  294. data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
  295. data/test/utils/number_to_spoken.rb +0 -132
data/ext/bitvector.h CHANGED
@@ -1,29 +1,271 @@
1
1
  #ifndef FRT_BIT_VECTOR_H
2
2
  #define FRT_BIT_VECTOR_H
3
3
 
4
- #include <global.h>
5
- #include <store.h>
6
-
7
- #define BV_INIT_CAPA 256
8
- typedef struct BitVector {
9
- uchar *bits;
10
- int size;
11
- int capa;
12
- int count;
13
- int curr_bit;
4
+ #include "global.h"
5
+
6
+ #define BV_INIT_CAPA 256
7
+ typedef struct BitVector
8
+ {
9
+ /** The bits are held in an array of 32-bit integers */
10
+ f_u32 *bits;
11
+
12
+ /** size is equal to 1 + the highest order bit set */
13
+ int size;
14
+
15
+ /** capa is the number of words (U32) allocated for the bits */
16
+ int capa;
17
+
18
+ /** count is the running count of bits set. This is kept up to date by
19
+ *bv_set and bv_unset. You can reset this value by calling bv_recount */
20
+ int count;
21
+
22
+ /** curr_bit is used by scan_next to record the previously scanned bit */
23
+ int curr_bit;
24
+
25
+ bool extends_as_ones : 1;
14
26
  } BitVector;
15
27
 
16
- BitVector *bv_create();;
17
- BitVector *bv_create_size(int size);
18
- void bv_destroy(BitVector *bv);
19
- void bv_set(BitVector *bv, int bit);
20
- int bv_get(BitVector *bv, int bit);
21
- void bv_clear(BitVector *bv);
22
- void bv_unset(BitVector *bv, int bit);
23
- void bv_write(BitVector *bv, Store *store, char *name);
24
- BitVector *bv_read(Store *store, char *name);
25
- void bv_scan_reset(BitVector *bv);
26
- int bv_scan_next(BitVector *bv);
27
- int bv_scan_next_from(BitVector *bv, register const int from);
28
+ /**
29
+ * Create a new BitVector with a capacity of +BV_INIT_CAPA+. Note that the
30
+ * BitVector is growable and will adjust it's capacity when you use bv_set.
31
+ *
32
+ * @return BitVector with a capacity of +BV_INIT_CAPA+.
33
+ */
34
+ extern BitVector *bv_new();
35
+
36
+ /**
37
+ * Create a new BitVector with a capacity of +capa+. Note that the BitVector
38
+ * is growable and will adjust it's capacity when you use bv_set.
39
+ *
40
+ * @param capa the initial capacity of the BitVector
41
+ * @return BitVector with a capacity of +capa+.
42
+ */
43
+ extern BitVector *bv_new_capa(int capa);
44
+
45
+ /**
46
+ * Destroy a BitVector, freeing all memory allocated to that BitVector
47
+ *
48
+ * @param bv BitVector to destroy
49
+ */
50
+ extern void bv_destroy(BitVector *bv);
51
+
52
+ /**
53
+ * Set the bit at position +index+. If +index+ is outside of the range of the
54
+ * BitVector, that is >= BitVector.size, BitVector.size will be set to +index+
55
+ * + 1. If it is greater than the capacity of the BitVector, the capacity will
56
+ * be expanded to accomodate.
57
+ *
58
+ * @param bv the BitVector to set the bit in
59
+ * @param index the index of the bit to set
60
+ */
61
+ extern void bv_set(BitVector *bv, int index);
62
+
63
+ /**
64
+ * Unsafely set the bit at position +index+. If you choose to use this
65
+ * function you must create the BitVector with a large enough capacity to
66
+ * accomodate all of the bv_set_fast operations. You must also set bits in
67
+ * order and only one time per bit. Otherwise, use the safe bv_set function.
68
+ *
69
+ * So this is ok;
70
+ * <pre>
71
+ * BitVector *bv = bv_new_capa(1000);
72
+ * bv_set_fast(bv, 900);
73
+ * bv_set_fast(bv, 920);
74
+ * bv_set_fast(bv, 999);
75
+ * </pre>
76
+ *
77
+ * While these are not ok;
78
+ * <pre>
79
+ * BitVector *bv = bv_new_capa(90);
80
+ * bv_set_fast(bv, 80);
81
+ * bv_set_fast(bv, 79); // <= Bad: Out of Order
82
+ * bv_set_fast(bv, 80); // <= Bad: Already set
83
+ * bv_set_fast(bv, 90); // <= Bad: Out of Range. index must be < capa
84
+ * </pre>
85
+ *
86
+ * @param bv the BitVector to set the bit in
87
+ * @param index the index of the bit to set
88
+ */
89
+ extern void bv_set_fast(BitVector *bv, int bit);
90
+
91
+ /**
92
+ * Return 1 if the bit at +index+ was set or 0 otherwise. If +index+ is out of
93
+ * range, that is greater then the BitVectors capacity, it will also return 0.
94
+ *
95
+ * @param bv the BitVector to check in
96
+ * @param index the index of the bit to check
97
+ * @return 1 if the bit was set, 0 otherwise
98
+ */
99
+ extern int bv_get(BitVector *bv, int index);
100
+
101
+ /**
102
+ * Unset the bit at position +index+. If the +index+ was out of range, that is
103
+ * greater than the BitVectors capacity then do nothing. (bv_get will return 0
104
+ * in this case anyway).
105
+ *
106
+ * @param bv the BitVector to unset the bit in
107
+ * @param index the index of the bit to unset
108
+ */
109
+ extern void bv_unset(BitVector *bv, int bit);
110
+
111
+ /**
112
+ * Clear all set bits. This function will set all set bits to 0.
113
+ *
114
+ * @param bv the BitVector to clear
115
+ */
116
+ extern void bv_clear(BitVector *bv);
117
+
118
+ /**
119
+ * Resets the set bit count by running through the whole BitVector and
120
+ * counting all set bits. A running count of the bits is kept by bv_set,
121
+ *bv_get and bv_set_fast so this function is only necessary if the count could
122
+ * have been corrupted somehow or if the BitVector has been constructed in a
123
+ * different way (for example being read from the file_system).
124
+ *
125
+ * @param bv the BitVector to count the bits in
126
+ * @return the number of set bits in the BitVector. BitVector.count is also
127
+ * set
128
+ */
129
+ extern int bv_recount(BitVector *bv);
130
+
131
+ /**
132
+ * Reset the BitVector for scanning. This function should be called before
133
+ * using bv_scan_next to scan through all set bits in the BitVector. This is
134
+ * not necessary when using bv_scan_next_from.
135
+ *
136
+ * @param bv the BitVector to reset for scanning
137
+ */
138
+ extern void bv_scan_reset(BitVector *bv);
139
+
140
+ /**
141
+ * Scan the BitVector for the next set bit. Before using this function you
142
+ * should reset the BitVector for scanning using +bv_scan_reset+. You can the
143
+ * repeated call bv_scan_next to get each set bit until it finally returns
144
+ * -1.
145
+ *
146
+ * @param bv the BitVector to scan
147
+ * @return the next set bits index or -1 if no more bits are set
148
+ */
149
+ extern int bv_scan_next(BitVector *bv);
150
+
151
+ /**
152
+ * Scan the BitVector for the next set bit after +from+. If no more bits are
153
+ * set then return -1, otherwise return the index of teh next set bit.
154
+ *
155
+ * @param bv the BitVector to scan
156
+ * @return the next set bit's index or -1 if no more bits are set
157
+ */
158
+
159
+ extern int bv_scan_next_from(BitVector *bv, register const int from);
160
+ /**
161
+ * Scan the BitVector for the next unset bit. Before using this function you
162
+ * should reset the BitVector for scanning using +bv_scan_reset+. You can the
163
+ * repeated call bv_scan_next to get each unset bit until it finally returns
164
+ * -1.
165
+ *
166
+ * @param bv the BitVector to scan
167
+ * @return the next unset bits index or -1 if no more bits are unset
168
+ */
169
+ extern int bv_scan_next_unset(BitVector *bv);
170
+
171
+ /**
172
+ * Scan the BitVector for the next unset bit after +from+. If no more bits are
173
+ * unset then return -1, otherwise return the index of teh next unset bit.
174
+ *
175
+ * @param bv the BitVector to scan
176
+ * @return the next unset bit's index or -1 if no more bits are unset
177
+ */
178
+ extern int bv_scan_next_unset_from(BitVector *bv, register const int from);
179
+
180
+ /**
181
+ * Check whether the two BitVectors have the same bits set.
182
+ *
183
+ * @param bv1 first BitVector to compare
184
+ * @param bv2 second BitVectors to compare
185
+ * @return true if bv1 == bv2
186
+ */
187
+ extern int bv_eq(BitVector *bv1, BitVector *bv2);
188
+
189
+ /**
190
+ * Determines a hash value for the BitVector
191
+ *
192
+ * @param bv the BitVector to hash
193
+ * @return A hash value for the BitVector
194
+ */
195
+ extern ulong bv_hash(BitVector *bv);
196
+
197
+ /**
198
+ * ANDs two BitVectors (+bv1+ and +bv2+) together and return the resultant
199
+ * BitVector
200
+ *
201
+ * @param bv1 first BitVector to AND
202
+ * @param bv2 second BitVector to AND
203
+ * @return A BitVector with all bits set that are set in both bv1 and bv2
204
+ */
205
+ extern BitVector *bv_and(BitVector *bv1, BitVector *bv2);
206
+
207
+ /**
208
+ * ORs two BitVectors (+bv1+ and +bv2+) together and return the resultant
209
+ * BitVector
210
+ *
211
+ * @param bv1 first BitVector to OR
212
+ * @param bv2 second BitVector to OR
213
+ * @return A BitVector with all bits set that are set in both bv1 and bv2
214
+ */
215
+ extern BitVector *bv_or(BitVector *bv1, BitVector *bv2);
216
+
217
+ /**
218
+ * XORs two BitVectors (+bv1+ and +bv2+) together and return the resultant
219
+ * BitVector
220
+ *
221
+ * @param bv1 first BitVector to XOR
222
+ * @param bv2 second BitVector to XOR
223
+ * @return A BitVector with all bits set that are equal in bv1 and bv2
224
+ */
225
+ extern BitVector *bv_xor(BitVector *bv1, BitVector *bv2);
226
+
227
+ /**
228
+ * Returns BitVector with all of +bv+'s bits flipped
229
+ *
230
+ * @param bv BitVector to flip
231
+ * @return A BitVector with all bits set that are set in both bv1 and bv2
232
+ */
233
+ extern BitVector *bv_not(BitVector *bv);
234
+
235
+ /**
236
+ * ANDs two BitVectors together +bv1+ and +bv2+ in place of +bv1+
237
+ *
238
+ * @param bv1 first BitVector to AND
239
+ * @param bv2 second BitVector to AND
240
+ * @return A BitVector
241
+ * @return bv1 with all bits set that where set in both bv1 and bv2
242
+ */
243
+ extern BitVector *bv_and_x(BitVector *bv1, BitVector *bv2);
244
+
245
+ /**
246
+ * ORs two BitVectors together
247
+ *
248
+ * @param bv1 first BitVector to OR
249
+ * @param bv2 second BitVector to OR
250
+ * @return bv1
251
+ */
252
+ extern BitVector *bv_or_x(BitVector *bv1, BitVector *bv2);
253
+
254
+ /**
255
+ * XORs two BitVectors together +bv1+ and +bv2+ in place of +bv1+
256
+ *
257
+ * @param bv1 first BitVector to XOR
258
+ * @param bv2 second BitVector to XOR
259
+ * @return bv1
260
+ */
261
+ extern BitVector *bv_xor_x(BitVector *bv1, BitVector *bv2);
262
+
263
+ /**
264
+ * Flips all bits in the BitVector +bv+
265
+ *
266
+ * @param bv BitVector to flip
267
+ * @return A +bv+ with all it's bits flipped
268
+ */
269
+ extern BitVector *bv_not_x(BitVector *bv);
28
270
 
29
271
  #endif
data/ext/compound_io.c CHANGED
@@ -1,15 +1,9 @@
1
1
  #include "index.h"
2
-
3
- static char * const ALREADY_CLOSED_MSG = "Already closed";
4
- static char * const STREAM_CLOSED_MSG = "Stream closed";
5
- static char * const MISSING_FILE_MSG = "No sub-file found";
6
- static char * const ALREADY_MERGED_MSG = "Already merged";
7
- static char * const REMAINDER_ERROR_MSG = "Non-zero remainder length after copying";
8
- static char * const FILE_OFFSET_MSG = "Difference in the output file offsets"
9
- " does not match the original file length";
10
- static char * const NO_FILES_TO_MERGE_MSG = "No Files to merge into the compound file";
2
+ #include "array.h"
11
3
 
12
4
  extern void store_destroy(Store *store);
5
+ extern InStream *is_new();
6
+ extern Store *store_new();
13
7
 
14
8
  /****************************************************************************
15
9
  *
@@ -18,232 +12,244 @@ extern void store_destroy(Store *store);
18
12
  ****************************************************************************/
19
13
 
20
14
  typedef struct FileEntry {
21
- int offset;
22
- int length;
15
+ off_t offset;
16
+ off_t length;
23
17
  } FileEntry;
24
18
 
25
- void cmpd_touch(Store *store, char *filename)
19
+ static void cmpd_touch(Store *store, char *file_name)
26
20
  {
27
- store->dir.cmpd->store->touch(store->dir.cmpd->store, filename);
21
+ store->dir.cmpd->store->touch(store->dir.cmpd->store, file_name);
28
22
  }
29
23
 
30
- int cmpd_exists(Store *store, char *filename)
24
+ static int cmpd_exists(Store *store, char *file_name)
31
25
  {
32
- if (h_get(store->dir.cmpd->entries, filename) != NULL) {
33
- return true;
34
- } else {
35
- return false;
36
- }
26
+ if (h_get(store->dir.cmpd->entries, file_name) != NULL) {
27
+ return true;
28
+ }
29
+ else {
30
+ return false;
31
+ }
37
32
  }
38
33
 
39
34
  /**
40
35
  * @throws UNSUPPORTED_ERROR
41
36
  */
42
- int cmpd_remove(Store *store, char *filename)
37
+ static int cmpd_remove(Store *store, char *file_name)
43
38
  {
44
- RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
45
- return 0;
39
+ (void)store;
40
+ (void)file_name;
41
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
42
+ return 0;
46
43
  }
47
44
 
48
45
  /**
49
46
  * @throws UNSUPPORTED_ERROR
50
47
  */
51
- int cmpd_rename(Store *store, char *from, char *to)
48
+ static void cmpd_rename(Store *store, char *from, char *to)
52
49
  {
53
- RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
54
- return 0;
50
+ (void)store;
51
+ (void)from;
52
+ (void)to;
53
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
55
54
  }
56
55
 
57
- int cmpd_count(Store *store)
56
+ static int cmpd_count(Store *store)
58
57
  {
59
- return store->dir.cmpd->entries->used;
58
+ return store->dir.cmpd->entries->size;
60
59
  }
61
60
 
62
61
  /**
63
62
  * @throws UNSUPPORTED_ERROR
64
63
  */
65
- void cmpd_clear(Store *store)
64
+ static void cmpd_clear(Store *store)
66
65
  {
67
- RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
66
+ (void)store;
67
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
68
68
  }
69
69
 
70
- void cmpd_close_i(Store *store)
70
+ static void cmpd_close_i(Store *store)
71
71
  {
72
- CompoundStore *cmpd = store->dir.cmpd;
73
- if (cmpd->stream == NULL)
74
- RAISE(IO_ERROR, ALREADY_CLOSED_MSG);
72
+ CompoundStore *cmpd = store->dir.cmpd;
73
+ if (cmpd->stream == NULL) {
74
+ RAISE(IO_ERROR, "Tried to close already closed compound store");
75
+ }
75
76
 
76
- h_destroy(cmpd->entries);
77
+ h_destroy(cmpd->entries);
77
78
 
78
- is_close(cmpd->stream);
79
- cmpd->stream = NULL;
80
- free(store->dir.cmpd);
81
- store_destroy(store);
79
+ is_close(cmpd->stream);
80
+ cmpd->stream = NULL;
81
+ free(store->dir.cmpd);
82
+ store_destroy(store);
82
83
  }
83
84
 
84
- int cmpd_length(Store *store, char *filename)
85
+ static off_t cmpd_length(Store *store, char *file_name)
85
86
  {
86
- FileEntry *fe = (FileEntry *)h_get(store->dir.cmpd->entries, filename);
87
- if (fe != NULL)
88
- return fe->length;
89
- else
90
- return 0;
87
+ FileEntry *fe = h_get(store->dir.cmpd->entries, file_name);
88
+ if (fe != NULL) {
89
+ return fe->length;
90
+ }
91
+ else {
92
+ return 0;
93
+ }
91
94
  }
92
95
 
93
- void cmpdi_seek_internal(InStream *is, int pos) {}
94
- void cmpdi_close_internal(InStream *is)
96
+ static void cmpdi_seek_i(InStream *is, off_t pos)
95
97
  {
96
- //is_close(is->d.cis->sub);
97
- free(is->d.cis);
98
+ (void)is;
99
+ (void)pos;
98
100
  }
99
101
 
100
- void cmpdi_clone_internal(InStream *is, InStream *new_is)
102
+ static void cmpdi_close_i(InStream *is)
101
103
  {
102
- CompoundInStream *cis = ALLOC(CompoundInStream);
103
- //cis->sub = is_clone(is->d.cis->sub);
104
- cis->sub = is->d.cis->sub;
105
- cis->offset = is->d.cis->offset;
106
- cis->length = is->d.cis->length;
107
- new_is->d.cis = cis;
104
+ free(is->d.cis);
108
105
  }
109
106
 
110
- int cmpdi_length_internal(InStream *is)
107
+ static off_t cmpdi_length_i(InStream *is)
111
108
  {
112
- return (is->d.cis->length);
109
+ return (is->d.cis->length);
113
110
  }
114
111
 
115
112
  /*
116
113
  * raises: EOF_ERROR
117
114
  */
118
- void cmpdi_read_internal(InStream *is, uchar *b, int offset, int len)
115
+ static void cmpdi_read_i(InStream *is, uchar *b, int len)
119
116
  {
120
- CompoundInStream *cis = is->d.cis;
121
- int start = is_pos(is);
122
- if ((start + len) > cis->length)
123
- RAISE(EOF_ERROR, EOF_ERROR_MSG);
124
- is_seek(cis->sub, cis->offset + start);
125
- is_read_bytes(cis->sub, b, offset, len);
117
+ CompoundInStream *cis = is->d.cis;
118
+ off_t start = is_pos(is);
119
+
120
+ if ((start + len) > cis->length) {
121
+ RAISE(EOF_ERROR, "Tried to read past end of file. File length is "
122
+ "<%"F_OFF_T_PFX"d> and tried to read to <%"F_OFF_T_PFX"d>",
123
+ cis->length, start + len);
124
+ }
125
+
126
+ is_seek(cis->sub, cis->offset + start);
127
+ is_read_bytes(cis->sub, b, len);
126
128
  }
127
129
 
128
- InStream *cmpd_create_input(InStream *sub_is, int offset, int length)
130
+ static const struct InStreamMethods CMPD_IN_STREAM_METHODS = {
131
+ cmpdi_read_i,
132
+ cmpdi_seek_i,
133
+ cmpdi_length_i,
134
+ cmpdi_close_i
135
+ };
136
+
137
+ static InStream *cmpd_create_input(InStream *sub_is, off_t offset, off_t length)
129
138
  {
130
- InStream *is = is_create();
131
- CompoundInStream *cis = ALLOC(CompoundInStream);
132
- //cis->sub = is_clone(sub_is);
133
- cis->sub = sub_is;
134
- cis->offset = offset;
135
- cis->length = length;
136
- is->d.cis = cis;
137
- is->file = NULL;
138
-
139
- is->read_internal = &cmpdi_read_internal;
140
- is->seek_internal = &cmpdi_seek_internal;
141
- is->close_internal = &cmpdi_close_internal;
142
- is->clone_internal = &cmpdi_clone_internal;
143
- is->length_internal = &cmpdi_length_internal;
144
- return is;
139
+ InStream *is = is_new();
140
+ CompoundInStream *cis = ALLOC(CompoundInStream);
141
+
142
+ cis->sub = sub_is;
143
+ cis->offset = offset;
144
+ cis->length = length;
145
+ is->d.cis = cis;
146
+ is->m = &CMPD_IN_STREAM_METHODS;
147
+
148
+ return is;
145
149
  }
146
150
 
147
- InStream *cmpd_open_input(Store *store, const char *filename)
151
+ static InStream *cmpd_open_input(Store *store, const char *file_name)
148
152
  {
149
- FileEntry *entry;
150
- CompoundStore *cmpd = store->dir.cmpd;
151
- InStream *is;
153
+ FileEntry *entry;
154
+ CompoundStore *cmpd = store->dir.cmpd;
155
+ InStream *is;
156
+
157
+ mutex_lock(&store->mutex);
158
+ if (cmpd->stream == NULL) {
159
+ mutex_unlock(&store->mutex);
160
+ RAISE(IO_ERROR, "Can't open compound file input stream. Parent "
161
+ "stream is closed.");
162
+ }
152
163
 
153
- mutex_lock(&store->mutex);
154
- if (cmpd->stream == NULL) {
155
- mutex_unlock(&store->mutex);
156
- RAISE(IO_ERROR, STREAM_CLOSED_MSG);
157
- }
164
+ entry = h_get(cmpd->entries, file_name);
165
+ if (entry == NULL) {
166
+ mutex_unlock(&store->mutex);
167
+ RAISE(IO_ERROR, "File %s does not exist: ", file_name);
168
+ }
158
169
 
159
- entry = (FileEntry *)h_get(cmpd->entries, filename);
160
- if (entry == NULL) {
170
+ is = cmpd_create_input(cmpd->stream, entry->offset, entry->length);
161
171
  mutex_unlock(&store->mutex);
162
- RAISE(IO_ERROR, MISSING_FILE_MSG);
163
- }
164
-
165
- is = cmpd_create_input(cmpd->stream, entry->offset, entry->length);
166
- mutex_unlock(&store->mutex);
167
172
 
168
- return is;
173
+ return is;
169
174
  }
170
175
 
171
- OutStream *cmpd_create_output(Store *store, const char *filename)
176
+ static OutStream *cmpd_new_output(Store *store, const char *file_name)
172
177
  {
173
- RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
174
- return NULL;
178
+ (void)store;
179
+ (void)file_name;
180
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
181
+ return NULL;
175
182
  }
176
183
 
177
- Lock *cmpd_open_lock(Store *store, char *lockname)
184
+ static Lock *cmpd_open_lock(Store *store, char *lock_name)
178
185
  {
179
- RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
180
- return NULL;
186
+ (void)store;
187
+ (void)lock_name;
188
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
189
+ return NULL;
181
190
  }
182
191
 
183
- void cmpd_close_lock(Lock *lock)
192
+ static void cmpd_close_lock(Lock *lock)
184
193
  {
185
- RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
194
+ (void)lock;
195
+ RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
186
196
  }
187
197
 
188
198
  Store *open_cmpd_store(Store *store, const char *name)
189
199
  {
190
- int count, i, offset;
191
- char *fname;
192
- FileEntry *entry;
193
- Store * volatile new_store = NULL;
194
- CompoundStore * volatile cmpd = NULL;
195
- InStream * volatile is = NULL;
196
-
197
- TRY
198
- new_store = store_create();
200
+ int count, i;
201
+ off_t offset;
202
+ char *fname;
203
+ FileEntry *entry;
204
+ Store *new_store = NULL;
205
+ CompoundStore *cmpd = NULL;
206
+ InStream *is = NULL;
207
+
208
+ new_store = store_new();
199
209
  cmpd = ALLOC(CompoundStore);
200
210
 
201
- cmpd->store = store;
202
- cmpd->name = name;
203
- cmpd->entries = h_new_str(&free, &free);
211
+ cmpd->store = store;
212
+ cmpd->name = name;
213
+ cmpd->entries = h_new_str(&free, &free);
204
214
  is = cmpd->stream = store->open_input(store, cmpd->name);
205
215
 
206
- // read the directory and init files
207
- count = (int)is_read_vint(is);
216
+ /* read the directory and init files */
217
+ count = is_read_vint(is);
208
218
  entry = NULL;
209
219
  for (i = 0; i < count; i++) {
210
- offset = (int)is_read_long(is);
211
- fname = is_read_string(is);
220
+ offset = (off_t)is_read_i64(is);
221
+ fname = is_read_string(is);
212
222
 
213
- if (entry != NULL) {
214
- // set length of the previous entry
215
- entry->length = offset - entry->offset;
216
- }
223
+ if (entry != NULL) {
224
+ /* set length of the previous entry */
225
+ entry->length = offset - entry->offset;
226
+ }
217
227
 
218
- entry = ALLOC(FileEntry);
219
- entry->offset = offset;
220
- h_set(cmpd->entries, fname, entry);
228
+ entry = ALLOC(FileEntry);
229
+ entry->offset = offset;
230
+ h_set(cmpd->entries, fname, entry);
221
231
  }
222
232
 
223
- // set the length of the final entry
224
- if (entry != NULL)
225
- entry->length = is_length(is) - entry->offset;
226
- XCATCHALL
227
- free(new_store);
228
- free(cmpd);
229
- if (is) is_close(is);
230
- XENDTRY
231
-
232
-
233
- new_store->dir.cmpd = cmpd;
234
- new_store->touch = &cmpd_touch;
235
- new_store->exists = &cmpd_exists;
236
- new_store->remove = &cmpd_remove;
237
- new_store->rename = &cmpd_rename;
238
- new_store->count = &cmpd_count;
239
- new_store->clear = &cmpd_clear;
240
- new_store->length = &cmpd_length;
241
- new_store->close_i = &cmpd_close_i;
242
- new_store->create_output = &cmpd_create_output;
243
- new_store->open_input = &cmpd_open_input;
244
- new_store->open_lock = &cmpd_open_lock;
245
- new_store->close_lock = &cmpd_close_lock;
246
- return new_store;
233
+ /* set the length of the final entry */
234
+ if (entry != NULL) {
235
+ entry->length = is_length(is) - entry->offset;
236
+ }
237
+
238
+ new_store->dir.cmpd = cmpd;
239
+ new_store->touch = &cmpd_touch;
240
+ new_store->exists = &cmpd_exists;
241
+ new_store->remove = &cmpd_remove;
242
+ new_store->rename = &cmpd_rename;
243
+ new_store->count = &cmpd_count;
244
+ new_store->clear = &cmpd_clear;
245
+ new_store->length = &cmpd_length;
246
+ new_store->close_i = &cmpd_close_i;
247
+ new_store->new_output = &cmpd_new_output;
248
+ new_store->open_input = &cmpd_open_input;
249
+ new_store->open_lock = &cmpd_open_lock;
250
+ new_store->close_lock = &cmpd_close_lock;
251
+
252
+ return new_store;
247
253
  }
248
254
 
249
255
  /****************************************************************************
@@ -252,123 +258,105 @@ Store *open_cmpd_store(Store *store, const char *name)
252
258
  *
253
259
  ****************************************************************************/
254
260
 
255
- typedef struct WFileEntry {
256
- char *name;
257
- int dir_offset;
258
- int data_offset;
259
- } WFileEntry;
260
-
261
- WFileEntry *wfe_create(char *name)
262
- {
263
- WFileEntry *wfe = ALLOC(WFileEntry);
264
- wfe->name = name;
265
- return wfe;
266
- }
267
-
268
261
  CompoundWriter *open_cw(Store *store, char *name)
269
262
  {
270
- CompoundWriter *cw = ALLOC(CompoundWriter);
271
- cw->store = store;
272
- cw->name = name;
273
- cw->ids = hs_str_create(NULL);
274
- cw->file_entries = ary_create(1, &free);
275
- cw->merged = false;
276
- return cw;
263
+ CompoundWriter *cw = ALLOC(CompoundWriter);
264
+ cw->store = store;
265
+ cw->name = name;
266
+ cw->ids = hs_new_str(&free);
267
+ cw->file_entries = ary_new_type_capa(CWFileEntry, CW_INIT_CAPA);
268
+ return cw;
277
269
  }
278
270
 
279
271
  void cw_add_file(CompoundWriter *cw, char *id)
280
272
  {
281
- if (cw->merged) RAISE(STATE_ERROR, ALREADY_MERGED_MSG);
282
- if (hs_add(cw->ids, id) != HASH_KEY_DOES_NOT_EXIST)
283
- RAISE(STATE_ERROR, ALREADY_MERGED_MSG);
273
+ id = estrdup(id);
274
+ if (hs_add(cw->ids, id) != HASH_KEY_DOES_NOT_EXIST) {
275
+ RAISE(IO_ERROR, "Tried to add file \"%s\" which has already been "
276
+ "added to the compound store", id);
277
+ }
284
278
 
285
- hs_add(cw->ids, id);
286
- ary_append(cw->file_entries, wfe_create(id));
279
+ ary_grow(cw->file_entries);
280
+ ary_last(cw->file_entries).name = id;
287
281
  }
288
282
 
289
- void cw_copy_file(CompoundWriter *cw, WFileEntry *src, OutStream *os)
283
+ static void cw_copy_file(CompoundWriter *cw, CWFileEntry *src, OutStream *os)
290
284
  {
291
- int start_ptr = os_pos(os);
292
- int end_ptr;
293
- int remainder, length, len;
294
- uchar buffer[BUFFER_SIZE];
295
-
296
- InStream *is = cw->store->open_input(cw->store, src->name);
297
-
298
- TRY
299
- remainder = length = is_length(is);
285
+ off_t start_ptr = os_pos(os);
286
+ off_t end_ptr;
287
+ off_t remainder, length, len;
288
+ uchar buffer[BUFFER_SIZE];
300
289
 
290
+ InStream *is = cw->store->open_input(cw->store, src->name);
291
+
292
+ remainder = length = is_length(is);
301
293
 
302
294
  while (remainder > 0) {
303
- len = MIN(remainder, BUFFER_SIZE);
304
- is_read_bytes(is, buffer, 0, len);
305
- os_write_bytes(os, buffer, len);
306
- remainder -= len;
295
+ len = MIN(remainder, BUFFER_SIZE);
296
+ is_read_bytes(is, buffer, len);
297
+ os_write_bytes(os, buffer, len);
298
+ remainder -= len;
307
299
  }
308
300
 
309
- // Verify that remainder is 0
310
- if (remainder != 0)
311
- RAISE(IO_ERROR, REMAINDER_ERROR_MSG);
301
+ /* Verify that remainder is 0 */
302
+ if (remainder != 0) {
303
+ RAISE(IO_ERROR, "There seems to be an error in the compound file "
304
+ "should have read to the end but there are <%"F_OFF_T_PFX"d> "
305
+ "bytes left", remainder);
306
+ }
312
307
 
313
- // Verify that the output length diff is equal to original file
308
+ /* Verify that the output length diff is equal to original file */
314
309
  end_ptr = os_pos(os);
315
310
  len = end_ptr - start_ptr;
316
- if (len != length)
317
- RAISE(IO_ERROR, FILE_OFFSET_MSG);
311
+ if (len != length) {
312
+ RAISE(IO_ERROR, "Difference in compound file output file offsets "
313
+ "<%"F_OFF_T_PFX"d> does not match the original file lenght "
314
+ "<%"F_OFF_T_PFX"d>", len, length);
315
+ }
318
316
 
319
- XFINALLY
320
317
  is_close(is);
321
- XENDTRY
322
318
  }
323
319
 
324
320
  void cw_close(CompoundWriter *cw)
325
321
  {
326
- OutStream * volatile os = NULL;
327
- int i;
328
- WFileEntry *wfe;
322
+ OutStream *os = NULL;
323
+ int i;
329
324
 
330
- if (cw->merged) RAISE(STATE_ERROR, ALREADY_MERGED_MSG);
331
- if (cw->ids->size <= 0)
332
- RAISE(STATE_ERROR, NO_FILES_TO_MERGE_MSG);
325
+ if (cw->ids->size <= 0) {
326
+ RAISE(STATE_ERROR, "Tried to merge compound file with no entries");
327
+ }
333
328
 
334
- cw->merged = true;
329
+ os = cw->store->new_output(cw->store, cw->name);
335
330
 
336
- TRY
337
- os = cw->store->create_output(cw->store, cw->name);
338
- os_write_vint(os, cw->file_entries->size);
331
+ os_write_vint(os, ary_size(cw->file_entries));
339
332
 
340
333
  /* Write the directory with all offsets at 0.
341
334
  * Remember the positions of directory entries so that we can adjust the
342
335
  * offsets later */
343
-
344
- for (i = 0; i < cw->file_entries->size; i++) {
345
- wfe = (WFileEntry *)cw->file_entries->elems[i];
346
- wfe->dir_offset = os_pos(os);
347
- os_write_long(os, 0); // for now
348
- os_write_string(os, wfe->name);
336
+ for (i = 0; i < ary_size(cw->file_entries); i++) {
337
+ cw->file_entries[i].dir_offset = os_pos(os);
338
+ os_write_u64(os, 0); /* for now */
339
+ os_write_string(os, cw->file_entries[i].name);
349
340
  }
350
341
 
351
342
  /* Open the files and copy their data into the stream. Remember the
352
343
  * locations of each file's data section. */
353
- for (i = 0; i < cw->file_entries->size; i++) {
354
- wfe = (WFileEntry *)cw->file_entries->elems[i];
355
- wfe->data_offset = os_pos(os);
356
- cw_copy_file(cw, wfe, os);
344
+ for (i = 0; i < ary_size(cw->file_entries); i++) {
345
+ cw->file_entries[i].data_offset = os_pos(os);
346
+ cw_copy_file(cw, &cw->file_entries[i], os);
357
347
  }
358
348
 
359
349
  /* Write the data offsets into the directory of the compound stream */
360
- for (i = 0; i < cw->file_entries->size; i++) {
361
- wfe = (WFileEntry *)cw->file_entries->elems[i];
362
- os_seek(os, wfe->dir_offset);
363
- os_write_long(os, wfe->data_offset);
350
+ for (i = 0; i < ary_size(cw->file_entries); i++) {
351
+ os_seek(os, cw->file_entries[i].dir_offset);
352
+ os_write_u64(os, cw->file_entries[i].data_offset);
353
+ }
354
+
355
+ if (os) {
356
+ os_close(os);
364
357
  }
365
358
 
366
- XFINALLY
367
- if (os) os_close(os);
368
359
  hs_destroy(cw->ids);
369
- ary_destroy(cw->file_entries);
360
+ ary_free(cw->file_entries);
370
361
  free(cw);
371
- break;
372
- XENDTRY
373
362
  }
374
-