ferret 0.9.6 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/MIT-LICENSE +1 -1
- data/README +12 -24
- data/Rakefile +38 -54
- data/TODO +14 -17
- data/ext/analysis.c +982 -823
- data/ext/analysis.h +133 -76
- data/ext/array.c +96 -58
- data/ext/array.h +40 -13
- data/ext/bitvector.c +476 -118
- data/ext/bitvector.h +264 -22
- data/ext/compound_io.c +217 -229
- data/ext/defines.h +49 -0
- data/ext/document.c +107 -317
- data/ext/document.h +31 -65
- data/ext/except.c +81 -36
- data/ext/except.h +117 -55
- data/ext/extconf.rb +2 -9
- data/ext/ferret.c +211 -104
- data/ext/ferret.h +22 -11
- data/ext/filter.c +97 -82
- data/ext/fs_store.c +348 -367
- data/ext/global.c +226 -188
- data/ext/global.h +44 -26
- data/ext/hash.c +474 -391
- data/ext/hash.h +441 -68
- data/ext/hashset.c +124 -96
- data/ext/hashset.h +169 -20
- data/ext/helper.c +56 -5
- data/ext/helper.h +7 -0
- data/ext/inc/lang.h +29 -49
- data/ext/inc/threading.h +31 -0
- data/ext/ind.c +288 -278
- data/ext/ind.h +68 -0
- data/ext/index.c +5688 -0
- data/ext/index.h +663 -616
- data/ext/lang.h +29 -49
- data/ext/libstemmer.c +3 -3
- data/ext/mem_pool.c +84 -0
- data/ext/mem_pool.h +35 -0
- data/ext/posh.c +1006 -0
- data/ext/posh.h +1007 -0
- data/ext/priorityqueue.c +117 -194
- data/ext/priorityqueue.h +135 -39
- data/ext/q_boolean.c +1305 -1108
- data/ext/q_const_score.c +106 -93
- data/ext/q_filtered_query.c +138 -135
- data/ext/q_fuzzy.c +206 -242
- data/ext/q_match_all.c +94 -80
- data/ext/q_multi_term.c +663 -0
- data/ext/q_parser.c +667 -593
- data/ext/q_phrase.c +992 -555
- data/ext/q_prefix.c +72 -61
- data/ext/q_range.c +235 -210
- data/ext/q_span.c +1480 -1166
- data/ext/q_term.c +273 -246
- data/ext/q_wildcard.c +127 -114
- data/ext/r_analysis.c +1720 -711
- data/ext/r_index.c +3049 -0
- data/ext/r_qparser.c +433 -146
- data/ext/r_search.c +2934 -1993
- data/ext/r_store.c +372 -143
- data/ext/r_utils.c +941 -0
- data/ext/ram_store.c +330 -326
- data/ext/search.c +1291 -668
- data/ext/search.h +403 -702
- data/ext/similarity.c +91 -113
- data/ext/similarity.h +45 -30
- data/ext/sort.c +721 -484
- data/ext/stopwords.c +361 -273
- data/ext/store.c +556 -58
- data/ext/store.h +706 -126
- data/ext/tags +3578 -2780
- data/ext/term_vectors.c +352 -0
- data/ext/threading.h +31 -0
- data/ext/win32.h +54 -0
- data/lib/ferret.rb +5 -17
- data/lib/ferret/document.rb +130 -2
- data/lib/ferret/index.rb +577 -26
- data/lib/ferret/number_tools.rb +157 -0
- data/lib/ferret_version.rb +3 -0
- data/test/test_helper.rb +5 -13
- data/test/unit/analysis/tc_analyzer.rb +513 -1
- data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
- data/test/unit/index/tc_index.rb +183 -240
- data/test/unit/index/tc_index_reader.rb +312 -479
- data/test/unit/index/tc_index_writer.rb +397 -13
- data/test/unit/index/th_doc.rb +269 -206
- data/test/unit/query_parser/tc_query_parser.rb +40 -33
- data/test/unit/search/tc_filter.rb +59 -71
- data/test/unit/search/tc_fuzzy_query.rb +24 -16
- data/test/unit/search/tc_index_searcher.rb +23 -201
- data/test/unit/search/tc_multi_searcher.rb +78 -226
- data/test/unit/search/tc_search_and_sort.rb +93 -81
- data/test/unit/search/tc_sort.rb +23 -23
- data/test/unit/search/tc_sort_field.rb +7 -7
- data/test/unit/search/tc_spans.rb +51 -47
- data/test/unit/search/tm_searcher.rb +339 -0
- data/test/unit/store/tc_fs_store.rb +1 -1
- data/test/unit/store/tm_store_lock.rb +3 -3
- data/test/unit/tc_document.rb +81 -0
- data/test/unit/ts_analysis.rb +1 -1
- data/test/unit/ts_utils.rb +1 -1
- data/test/unit/utils/tc_bit_vector.rb +288 -0
- data/test/unit/utils/tc_number_tools.rb +117 -0
- data/test/unit/utils/tc_priority_queue.rb +106 -0
- metadata +140 -301
- data/CHANGELOG +0 -9
- data/ext/dummy.exe +0 -0
- data/ext/field.c +0 -408
- data/ext/frtio.h +0 -13
- data/ext/inc/except.h +0 -90
- data/ext/index_io.c +0 -382
- data/ext/index_rw.c +0 -2658
- data/ext/lang.c +0 -41
- data/ext/nix_io.c +0 -134
- data/ext/q_multi_phrase.c +0 -380
- data/ext/r_doc.c +0 -582
- data/ext/r_index_io.c +0 -1021
- data/ext/r_term.c +0 -219
- data/ext/term.c +0 -820
- data/ext/termdocs.c +0 -611
- data/ext/vector.c +0 -637
- data/ext/w32_io.c +0 -150
- data/lib/ferret/analysis.rb +0 -11
- data/lib/ferret/analysis/analyzers.rb +0 -112
- data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
- data/lib/ferret/analysis/token.rb +0 -100
- data/lib/ferret/analysis/token_filters.rb +0 -86
- data/lib/ferret/analysis/token_stream.rb +0 -26
- data/lib/ferret/analysis/tokenizers.rb +0 -112
- data/lib/ferret/analysis/word_list_loader.rb +0 -27
- data/lib/ferret/document/document.rb +0 -152
- data/lib/ferret/document/field.rb +0 -312
- data/lib/ferret/index/compound_file_io.rb +0 -338
- data/lib/ferret/index/document_writer.rb +0 -289
- data/lib/ferret/index/field_infos.rb +0 -279
- data/lib/ferret/index/fields_io.rb +0 -181
- data/lib/ferret/index/index.rb +0 -675
- data/lib/ferret/index/index_file_names.rb +0 -33
- data/lib/ferret/index/index_reader.rb +0 -503
- data/lib/ferret/index/index_writer.rb +0 -534
- data/lib/ferret/index/multi_reader.rb +0 -377
- data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
- data/lib/ferret/index/segment_infos.rb +0 -130
- data/lib/ferret/index/segment_merge_info.rb +0 -49
- data/lib/ferret/index/segment_merge_queue.rb +0 -16
- data/lib/ferret/index/segment_merger.rb +0 -358
- data/lib/ferret/index/segment_reader.rb +0 -412
- data/lib/ferret/index/segment_term_enum.rb +0 -169
- data/lib/ferret/index/segment_term_vector.rb +0 -58
- data/lib/ferret/index/term.rb +0 -53
- data/lib/ferret/index/term_buffer.rb +0 -83
- data/lib/ferret/index/term_doc_enum.rb +0 -291
- data/lib/ferret/index/term_enum.rb +0 -52
- data/lib/ferret/index/term_info.rb +0 -37
- data/lib/ferret/index/term_infos_io.rb +0 -321
- data/lib/ferret/index/term_vector_offset_info.rb +0 -20
- data/lib/ferret/index/term_vectors_io.rb +0 -553
- data/lib/ferret/query_parser.rb +0 -312
- data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
- data/lib/ferret/search.rb +0 -50
- data/lib/ferret/search/boolean_clause.rb +0 -100
- data/lib/ferret/search/boolean_query.rb +0 -299
- data/lib/ferret/search/boolean_scorer.rb +0 -294
- data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
- data/lib/ferret/search/conjunction_scorer.rb +0 -99
- data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
- data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
- data/lib/ferret/search/explanation.rb +0 -41
- data/lib/ferret/search/field_cache.rb +0 -215
- data/lib/ferret/search/field_doc.rb +0 -31
- data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
- data/lib/ferret/search/filter.rb +0 -11
- data/lib/ferret/search/filtered_query.rb +0 -130
- data/lib/ferret/search/filtered_term_enum.rb +0 -79
- data/lib/ferret/search/fuzzy_query.rb +0 -154
- data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
- data/lib/ferret/search/hit_collector.rb +0 -34
- data/lib/ferret/search/hit_queue.rb +0 -11
- data/lib/ferret/search/index_searcher.rb +0 -200
- data/lib/ferret/search/match_all_query.rb +0 -104
- data/lib/ferret/search/multi_phrase_query.rb +0 -216
- data/lib/ferret/search/multi_searcher.rb +0 -261
- data/lib/ferret/search/multi_term_query.rb +0 -65
- data/lib/ferret/search/non_matching_scorer.rb +0 -22
- data/lib/ferret/search/phrase_positions.rb +0 -55
- data/lib/ferret/search/phrase_query.rb +0 -214
- data/lib/ferret/search/phrase_scorer.rb +0 -152
- data/lib/ferret/search/prefix_query.rb +0 -54
- data/lib/ferret/search/query.rb +0 -140
- data/lib/ferret/search/query_filter.rb +0 -51
- data/lib/ferret/search/range_filter.rb +0 -103
- data/lib/ferret/search/range_query.rb +0 -139
- data/lib/ferret/search/req_excl_scorer.rb +0 -125
- data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
- data/lib/ferret/search/score_doc.rb +0 -38
- data/lib/ferret/search/score_doc_comparator.rb +0 -114
- data/lib/ferret/search/scorer.rb +0 -91
- data/lib/ferret/search/similarity.rb +0 -278
- data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
- data/lib/ferret/search/sort.rb +0 -112
- data/lib/ferret/search/sort_comparator.rb +0 -60
- data/lib/ferret/search/sort_field.rb +0 -91
- data/lib/ferret/search/spans.rb +0 -12
- data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
- data/lib/ferret/search/spans/span_first_query.rb +0 -79
- data/lib/ferret/search/spans/span_near_query.rb +0 -108
- data/lib/ferret/search/spans/span_not_query.rb +0 -130
- data/lib/ferret/search/spans/span_or_query.rb +0 -176
- data/lib/ferret/search/spans/span_query.rb +0 -25
- data/lib/ferret/search/spans/span_scorer.rb +0 -74
- data/lib/ferret/search/spans/span_term_query.rb +0 -105
- data/lib/ferret/search/spans/span_weight.rb +0 -84
- data/lib/ferret/search/spans/spans_enum.rb +0 -44
- data/lib/ferret/search/term_query.rb +0 -128
- data/lib/ferret/search/term_scorer.rb +0 -183
- data/lib/ferret/search/top_docs.rb +0 -36
- data/lib/ferret/search/top_field_docs.rb +0 -17
- data/lib/ferret/search/weight.rb +0 -54
- data/lib/ferret/search/wildcard_query.rb +0 -26
- data/lib/ferret/search/wildcard_term_enum.rb +0 -61
- data/lib/ferret/stemmers.rb +0 -1
- data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
- data/lib/ferret/store.rb +0 -5
- data/lib/ferret/store/buffered_index_io.rb +0 -190
- data/lib/ferret/store/directory.rb +0 -141
- data/lib/ferret/store/fs_store.rb +0 -381
- data/lib/ferret/store/index_io.rb +0 -245
- data/lib/ferret/store/ram_store.rb +0 -286
- data/lib/ferret/utils.rb +0 -8
- data/lib/ferret/utils/bit_vector.rb +0 -123
- data/lib/ferret/utils/date_tools.rb +0 -138
- data/lib/ferret/utils/number_tools.rb +0 -91
- data/lib/ferret/utils/parameter.rb +0 -41
- data/lib/ferret/utils/priority_queue.rb +0 -120
- data/lib/ferret/utils/string_helper.rb +0 -47
- data/lib/ferret/utils/thread_local.rb +0 -28
- data/lib/ferret/utils/weak_key_hash.rb +0 -60
- data/lib/rferret.rb +0 -37
- data/rake_utils/code_statistics.rb +0 -106
- data/test/benchmark/tb_ram_store.rb +0 -76
- data/test/benchmark/tb_rw_vint.rb +0 -26
- data/test/functional/thread_safety_index_test.rb +0 -81
- data/test/functional/thread_safety_test.rb +0 -137
- data/test/longrunning/tc_numbertools.rb +0 -60
- data/test/longrunning/tm_store.rb +0 -19
- data/test/unit/analysis/ctc_analyzer.rb +0 -532
- data/test/unit/analysis/data/wordfile +0 -6
- data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
- data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
- data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
- data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
- data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
- data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
- data/test/unit/analysis/rtc_stop_filter.rb +0 -14
- data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
- data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
- data/test/unit/analysis/tc_token.rb +0 -25
- data/test/unit/document/rtc_field.rb +0 -28
- data/test/unit/document/tc_document.rb +0 -47
- data/test/unit/document/tc_field.rb +0 -98
- data/test/unit/index/rtc_compound_file_io.rb +0 -107
- data/test/unit/index/rtc_field_infos.rb +0 -127
- data/test/unit/index/rtc_fields_io.rb +0 -167
- data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
- data/test/unit/index/rtc_segment_infos.rb +0 -74
- data/test/unit/index/rtc_segment_term_docs.rb +0 -17
- data/test/unit/index/rtc_segment_term_enum.rb +0 -60
- data/test/unit/index/rtc_segment_term_vector.rb +0 -71
- data/test/unit/index/rtc_term_buffer.rb +0 -57
- data/test/unit/index/rtc_term_info.rb +0 -19
- data/test/unit/index/rtc_term_infos_io.rb +0 -192
- data/test/unit/index/rtc_term_vectors_io.rb +0 -108
- data/test/unit/index/tc_term.rb +0 -27
- data/test/unit/index/tc_term_voi.rb +0 -18
- data/test/unit/search/rtc_similarity.rb +0 -37
- data/test/unit/search/rtc_sort_field.rb +0 -14
- data/test/unit/search/tc_multi_searcher2.rb +0 -126
- data/test/unit/store/rtc_fs_store.rb +0 -62
- data/test/unit/store/rtc_ram_store.rb +0 -15
- data/test/unit/store/rtm_store.rb +0 -150
- data/test/unit/store/rtm_store_lock.rb +0 -2
- data/test/unit/ts_document.rb +0 -2
- data/test/unit/utils/rtc_bit_vector.rb +0 -73
- data/test/unit/utils/rtc_date_tools.rb +0 -50
- data/test/unit/utils/rtc_number_tools.rb +0 -59
- data/test/unit/utils/rtc_parameter.rb +0 -40
- data/test/unit/utils/rtc_priority_queue.rb +0 -62
- data/test/unit/utils/rtc_string_helper.rb +0 -21
- data/test/unit/utils/rtc_thread.rb +0 -61
- data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
- data/test/utils/number_to_spoken.rb +0 -132
data/ext/bitvector.h
CHANGED
@@ -1,29 +1,271 @@
|
|
1
1
|
#ifndef FRT_BIT_VECTOR_H
|
2
2
|
#define FRT_BIT_VECTOR_H
|
3
3
|
|
4
|
-
#include
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
4
|
+
#include "global.h"
|
5
|
+
|
6
|
+
#define BV_INIT_CAPA 256
|
7
|
+
typedef struct BitVector
|
8
|
+
{
|
9
|
+
/** The bits are held in an array of 32-bit integers */
|
10
|
+
f_u32 *bits;
|
11
|
+
|
12
|
+
/** size is equal to 1 + the highest order bit set */
|
13
|
+
int size;
|
14
|
+
|
15
|
+
/** capa is the number of words (U32) allocated for the bits */
|
16
|
+
int capa;
|
17
|
+
|
18
|
+
/** count is the running count of bits set. This is kept up to date by
|
19
|
+
*bv_set and bv_unset. You can reset this value by calling bv_recount */
|
20
|
+
int count;
|
21
|
+
|
22
|
+
/** curr_bit is used by scan_next to record the previously scanned bit */
|
23
|
+
int curr_bit;
|
24
|
+
|
25
|
+
bool extends_as_ones : 1;
|
14
26
|
} BitVector;
|
15
27
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
+
/**
|
29
|
+
* Create a new BitVector with a capacity of +BV_INIT_CAPA+. Note that the
|
30
|
+
* BitVector is growable and will adjust it's capacity when you use bv_set.
|
31
|
+
*
|
32
|
+
* @return BitVector with a capacity of +BV_INIT_CAPA+.
|
33
|
+
*/
|
34
|
+
extern BitVector *bv_new();
|
35
|
+
|
36
|
+
/**
|
37
|
+
* Create a new BitVector with a capacity of +capa+. Note that the BitVector
|
38
|
+
* is growable and will adjust it's capacity when you use bv_set.
|
39
|
+
*
|
40
|
+
* @param capa the initial capacity of the BitVector
|
41
|
+
* @return BitVector with a capacity of +capa+.
|
42
|
+
*/
|
43
|
+
extern BitVector *bv_new_capa(int capa);
|
44
|
+
|
45
|
+
/**
|
46
|
+
* Destroy a BitVector, freeing all memory allocated to that BitVector
|
47
|
+
*
|
48
|
+
* @param bv BitVector to destroy
|
49
|
+
*/
|
50
|
+
extern void bv_destroy(BitVector *bv);
|
51
|
+
|
52
|
+
/**
|
53
|
+
* Set the bit at position +index+. If +index+ is outside of the range of the
|
54
|
+
* BitVector, that is >= BitVector.size, BitVector.size will be set to +index+
|
55
|
+
* + 1. If it is greater than the capacity of the BitVector, the capacity will
|
56
|
+
* be expanded to accomodate.
|
57
|
+
*
|
58
|
+
* @param bv the BitVector to set the bit in
|
59
|
+
* @param index the index of the bit to set
|
60
|
+
*/
|
61
|
+
extern void bv_set(BitVector *bv, int index);
|
62
|
+
|
63
|
+
/**
|
64
|
+
* Unsafely set the bit at position +index+. If you choose to use this
|
65
|
+
* function you must create the BitVector with a large enough capacity to
|
66
|
+
* accomodate all of the bv_set_fast operations. You must also set bits in
|
67
|
+
* order and only one time per bit. Otherwise, use the safe bv_set function.
|
68
|
+
*
|
69
|
+
* So this is ok;
|
70
|
+
* <pre>
|
71
|
+
* BitVector *bv = bv_new_capa(1000);
|
72
|
+
* bv_set_fast(bv, 900);
|
73
|
+
* bv_set_fast(bv, 920);
|
74
|
+
* bv_set_fast(bv, 999);
|
75
|
+
* </pre>
|
76
|
+
*
|
77
|
+
* While these are not ok;
|
78
|
+
* <pre>
|
79
|
+
* BitVector *bv = bv_new_capa(90);
|
80
|
+
* bv_set_fast(bv, 80);
|
81
|
+
* bv_set_fast(bv, 79); // <= Bad: Out of Order
|
82
|
+
* bv_set_fast(bv, 80); // <= Bad: Already set
|
83
|
+
* bv_set_fast(bv, 90); // <= Bad: Out of Range. index must be < capa
|
84
|
+
* </pre>
|
85
|
+
*
|
86
|
+
* @param bv the BitVector to set the bit in
|
87
|
+
* @param index the index of the bit to set
|
88
|
+
*/
|
89
|
+
extern void bv_set_fast(BitVector *bv, int bit);
|
90
|
+
|
91
|
+
/**
|
92
|
+
* Return 1 if the bit at +index+ was set or 0 otherwise. If +index+ is out of
|
93
|
+
* range, that is greater then the BitVectors capacity, it will also return 0.
|
94
|
+
*
|
95
|
+
* @param bv the BitVector to check in
|
96
|
+
* @param index the index of the bit to check
|
97
|
+
* @return 1 if the bit was set, 0 otherwise
|
98
|
+
*/
|
99
|
+
extern int bv_get(BitVector *bv, int index);
|
100
|
+
|
101
|
+
/**
|
102
|
+
* Unset the bit at position +index+. If the +index+ was out of range, that is
|
103
|
+
* greater than the BitVectors capacity then do nothing. (bv_get will return 0
|
104
|
+
* in this case anyway).
|
105
|
+
*
|
106
|
+
* @param bv the BitVector to unset the bit in
|
107
|
+
* @param index the index of the bit to unset
|
108
|
+
*/
|
109
|
+
extern void bv_unset(BitVector *bv, int bit);
|
110
|
+
|
111
|
+
/**
|
112
|
+
* Clear all set bits. This function will set all set bits to 0.
|
113
|
+
*
|
114
|
+
* @param bv the BitVector to clear
|
115
|
+
*/
|
116
|
+
extern void bv_clear(BitVector *bv);
|
117
|
+
|
118
|
+
/**
|
119
|
+
* Resets the set bit count by running through the whole BitVector and
|
120
|
+
* counting all set bits. A running count of the bits is kept by bv_set,
|
121
|
+
*bv_get and bv_set_fast so this function is only necessary if the count could
|
122
|
+
* have been corrupted somehow or if the BitVector has been constructed in a
|
123
|
+
* different way (for example being read from the file_system).
|
124
|
+
*
|
125
|
+
* @param bv the BitVector to count the bits in
|
126
|
+
* @return the number of set bits in the BitVector. BitVector.count is also
|
127
|
+
* set
|
128
|
+
*/
|
129
|
+
extern int bv_recount(BitVector *bv);
|
130
|
+
|
131
|
+
/**
|
132
|
+
* Reset the BitVector for scanning. This function should be called before
|
133
|
+
* using bv_scan_next to scan through all set bits in the BitVector. This is
|
134
|
+
* not necessary when using bv_scan_next_from.
|
135
|
+
*
|
136
|
+
* @param bv the BitVector to reset for scanning
|
137
|
+
*/
|
138
|
+
extern void bv_scan_reset(BitVector *bv);
|
139
|
+
|
140
|
+
/**
|
141
|
+
* Scan the BitVector for the next set bit. Before using this function you
|
142
|
+
* should reset the BitVector for scanning using +bv_scan_reset+. You can the
|
143
|
+
* repeated call bv_scan_next to get each set bit until it finally returns
|
144
|
+
* -1.
|
145
|
+
*
|
146
|
+
* @param bv the BitVector to scan
|
147
|
+
* @return the next set bits index or -1 if no more bits are set
|
148
|
+
*/
|
149
|
+
extern int bv_scan_next(BitVector *bv);
|
150
|
+
|
151
|
+
/**
|
152
|
+
* Scan the BitVector for the next set bit after +from+. If no more bits are
|
153
|
+
* set then return -1, otherwise return the index of teh next set bit.
|
154
|
+
*
|
155
|
+
* @param bv the BitVector to scan
|
156
|
+
* @return the next set bit's index or -1 if no more bits are set
|
157
|
+
*/
|
158
|
+
|
159
|
+
extern int bv_scan_next_from(BitVector *bv, register const int from);
|
160
|
+
/**
|
161
|
+
* Scan the BitVector for the next unset bit. Before using this function you
|
162
|
+
* should reset the BitVector for scanning using +bv_scan_reset+. You can the
|
163
|
+
* repeated call bv_scan_next to get each unset bit until it finally returns
|
164
|
+
* -1.
|
165
|
+
*
|
166
|
+
* @param bv the BitVector to scan
|
167
|
+
* @return the next unset bits index or -1 if no more bits are unset
|
168
|
+
*/
|
169
|
+
extern int bv_scan_next_unset(BitVector *bv);
|
170
|
+
|
171
|
+
/**
|
172
|
+
* Scan the BitVector for the next unset bit after +from+. If no more bits are
|
173
|
+
* unset then return -1, otherwise return the index of teh next unset bit.
|
174
|
+
*
|
175
|
+
* @param bv the BitVector to scan
|
176
|
+
* @return the next unset bit's index or -1 if no more bits are unset
|
177
|
+
*/
|
178
|
+
extern int bv_scan_next_unset_from(BitVector *bv, register const int from);
|
179
|
+
|
180
|
+
/**
|
181
|
+
* Check whether the two BitVectors have the same bits set.
|
182
|
+
*
|
183
|
+
* @param bv1 first BitVector to compare
|
184
|
+
* @param bv2 second BitVectors to compare
|
185
|
+
* @return true if bv1 == bv2
|
186
|
+
*/
|
187
|
+
extern int bv_eq(BitVector *bv1, BitVector *bv2);
|
188
|
+
|
189
|
+
/**
|
190
|
+
* Determines a hash value for the BitVector
|
191
|
+
*
|
192
|
+
* @param bv the BitVector to hash
|
193
|
+
* @return A hash value for the BitVector
|
194
|
+
*/
|
195
|
+
extern ulong bv_hash(BitVector *bv);
|
196
|
+
|
197
|
+
/**
|
198
|
+
* ANDs two BitVectors (+bv1+ and +bv2+) together and return the resultant
|
199
|
+
* BitVector
|
200
|
+
*
|
201
|
+
* @param bv1 first BitVector to AND
|
202
|
+
* @param bv2 second BitVector to AND
|
203
|
+
* @return A BitVector with all bits set that are set in both bv1 and bv2
|
204
|
+
*/
|
205
|
+
extern BitVector *bv_and(BitVector *bv1, BitVector *bv2);
|
206
|
+
|
207
|
+
/**
|
208
|
+
* ORs two BitVectors (+bv1+ and +bv2+) together and return the resultant
|
209
|
+
* BitVector
|
210
|
+
*
|
211
|
+
* @param bv1 first BitVector to OR
|
212
|
+
* @param bv2 second BitVector to OR
|
213
|
+
* @return A BitVector with all bits set that are set in both bv1 and bv2
|
214
|
+
*/
|
215
|
+
extern BitVector *bv_or(BitVector *bv1, BitVector *bv2);
|
216
|
+
|
217
|
+
/**
|
218
|
+
* XORs two BitVectors (+bv1+ and +bv2+) together and return the resultant
|
219
|
+
* BitVector
|
220
|
+
*
|
221
|
+
* @param bv1 first BitVector to XOR
|
222
|
+
* @param bv2 second BitVector to XOR
|
223
|
+
* @return A BitVector with all bits set that are equal in bv1 and bv2
|
224
|
+
*/
|
225
|
+
extern BitVector *bv_xor(BitVector *bv1, BitVector *bv2);
|
226
|
+
|
227
|
+
/**
|
228
|
+
* Returns BitVector with all of +bv+'s bits flipped
|
229
|
+
*
|
230
|
+
* @param bv BitVector to flip
|
231
|
+
* @return A BitVector with all bits set that are set in both bv1 and bv2
|
232
|
+
*/
|
233
|
+
extern BitVector *bv_not(BitVector *bv);
|
234
|
+
|
235
|
+
/**
|
236
|
+
* ANDs two BitVectors together +bv1+ and +bv2+ in place of +bv1+
|
237
|
+
*
|
238
|
+
* @param bv1 first BitVector to AND
|
239
|
+
* @param bv2 second BitVector to AND
|
240
|
+
* @return A BitVector
|
241
|
+
* @return bv1 with all bits set that where set in both bv1 and bv2
|
242
|
+
*/
|
243
|
+
extern BitVector *bv_and_x(BitVector *bv1, BitVector *bv2);
|
244
|
+
|
245
|
+
/**
|
246
|
+
* ORs two BitVectors together
|
247
|
+
*
|
248
|
+
* @param bv1 first BitVector to OR
|
249
|
+
* @param bv2 second BitVector to OR
|
250
|
+
* @return bv1
|
251
|
+
*/
|
252
|
+
extern BitVector *bv_or_x(BitVector *bv1, BitVector *bv2);
|
253
|
+
|
254
|
+
/**
|
255
|
+
* XORs two BitVectors together +bv1+ and +bv2+ in place of +bv1+
|
256
|
+
*
|
257
|
+
* @param bv1 first BitVector to XOR
|
258
|
+
* @param bv2 second BitVector to XOR
|
259
|
+
* @return bv1
|
260
|
+
*/
|
261
|
+
extern BitVector *bv_xor_x(BitVector *bv1, BitVector *bv2);
|
262
|
+
|
263
|
+
/**
|
264
|
+
* Flips all bits in the BitVector +bv+
|
265
|
+
*
|
266
|
+
* @param bv BitVector to flip
|
267
|
+
* @return A +bv+ with all it's bits flipped
|
268
|
+
*/
|
269
|
+
extern BitVector *bv_not_x(BitVector *bv);
|
28
270
|
|
29
271
|
#endif
|
data/ext/compound_io.c
CHANGED
@@ -1,15 +1,9 @@
|
|
1
1
|
#include "index.h"
|
2
|
-
|
3
|
-
static char * const ALREADY_CLOSED_MSG = "Already closed";
|
4
|
-
static char * const STREAM_CLOSED_MSG = "Stream closed";
|
5
|
-
static char * const MISSING_FILE_MSG = "No sub-file found";
|
6
|
-
static char * const ALREADY_MERGED_MSG = "Already merged";
|
7
|
-
static char * const REMAINDER_ERROR_MSG = "Non-zero remainder length after copying";
|
8
|
-
static char * const FILE_OFFSET_MSG = "Difference in the output file offsets"
|
9
|
-
" does not match the original file length";
|
10
|
-
static char * const NO_FILES_TO_MERGE_MSG = "No Files to merge into the compound file";
|
2
|
+
#include "array.h"
|
11
3
|
|
12
4
|
extern void store_destroy(Store *store);
|
5
|
+
extern InStream *is_new();
|
6
|
+
extern Store *store_new();
|
13
7
|
|
14
8
|
/****************************************************************************
|
15
9
|
*
|
@@ -18,232 +12,244 @@ extern void store_destroy(Store *store);
|
|
18
12
|
****************************************************************************/
|
19
13
|
|
20
14
|
typedef struct FileEntry {
|
21
|
-
|
22
|
-
|
15
|
+
off_t offset;
|
16
|
+
off_t length;
|
23
17
|
} FileEntry;
|
24
18
|
|
25
|
-
void cmpd_touch(Store *store, char *
|
19
|
+
static void cmpd_touch(Store *store, char *file_name)
|
26
20
|
{
|
27
|
-
|
21
|
+
store->dir.cmpd->store->touch(store->dir.cmpd->store, file_name);
|
28
22
|
}
|
29
23
|
|
30
|
-
int cmpd_exists(Store *store, char *
|
24
|
+
static int cmpd_exists(Store *store, char *file_name)
|
31
25
|
{
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
26
|
+
if (h_get(store->dir.cmpd->entries, file_name) != NULL) {
|
27
|
+
return true;
|
28
|
+
}
|
29
|
+
else {
|
30
|
+
return false;
|
31
|
+
}
|
37
32
|
}
|
38
33
|
|
39
34
|
/**
|
40
35
|
* @throws UNSUPPORTED_ERROR
|
41
36
|
*/
|
42
|
-
int cmpd_remove(Store *store, char *
|
37
|
+
static int cmpd_remove(Store *store, char *file_name)
|
43
38
|
{
|
44
|
-
|
45
|
-
|
39
|
+
(void)store;
|
40
|
+
(void)file_name;
|
41
|
+
RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
|
42
|
+
return 0;
|
46
43
|
}
|
47
44
|
|
48
45
|
/**
|
49
46
|
* @throws UNSUPPORTED_ERROR
|
50
47
|
*/
|
51
|
-
|
48
|
+
static void cmpd_rename(Store *store, char *from, char *to)
|
52
49
|
{
|
53
|
-
|
54
|
-
|
50
|
+
(void)store;
|
51
|
+
(void)from;
|
52
|
+
(void)to;
|
53
|
+
RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
|
55
54
|
}
|
56
55
|
|
57
|
-
int cmpd_count(Store *store)
|
56
|
+
static int cmpd_count(Store *store)
|
58
57
|
{
|
59
|
-
|
58
|
+
return store->dir.cmpd->entries->size;
|
60
59
|
}
|
61
60
|
|
62
61
|
/**
|
63
62
|
* @throws UNSUPPORTED_ERROR
|
64
63
|
*/
|
65
|
-
void cmpd_clear(Store *store)
|
64
|
+
static void cmpd_clear(Store *store)
|
66
65
|
{
|
67
|
-
|
66
|
+
(void)store;
|
67
|
+
RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
|
68
68
|
}
|
69
69
|
|
70
|
-
void cmpd_close_i(Store *store)
|
70
|
+
static void cmpd_close_i(Store *store)
|
71
71
|
{
|
72
|
-
|
73
|
-
|
74
|
-
|
72
|
+
CompoundStore *cmpd = store->dir.cmpd;
|
73
|
+
if (cmpd->stream == NULL) {
|
74
|
+
RAISE(IO_ERROR, "Tried to close already closed compound store");
|
75
|
+
}
|
75
76
|
|
76
|
-
|
77
|
+
h_destroy(cmpd->entries);
|
77
78
|
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
79
|
+
is_close(cmpd->stream);
|
80
|
+
cmpd->stream = NULL;
|
81
|
+
free(store->dir.cmpd);
|
82
|
+
store_destroy(store);
|
82
83
|
}
|
83
84
|
|
84
|
-
|
85
|
+
static off_t cmpd_length(Store *store, char *file_name)
|
85
86
|
{
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
87
|
+
FileEntry *fe = h_get(store->dir.cmpd->entries, file_name);
|
88
|
+
if (fe != NULL) {
|
89
|
+
return fe->length;
|
90
|
+
}
|
91
|
+
else {
|
92
|
+
return 0;
|
93
|
+
}
|
91
94
|
}
|
92
95
|
|
93
|
-
void
|
94
|
-
void cmpdi_close_internal(InStream *is)
|
96
|
+
static void cmpdi_seek_i(InStream *is, off_t pos)
|
95
97
|
{
|
96
|
-
|
97
|
-
|
98
|
+
(void)is;
|
99
|
+
(void)pos;
|
98
100
|
}
|
99
101
|
|
100
|
-
void
|
102
|
+
static void cmpdi_close_i(InStream *is)
|
101
103
|
{
|
102
|
-
|
103
|
-
//cis->sub = is_clone(is->d.cis->sub);
|
104
|
-
cis->sub = is->d.cis->sub;
|
105
|
-
cis->offset = is->d.cis->offset;
|
106
|
-
cis->length = is->d.cis->length;
|
107
|
-
new_is->d.cis = cis;
|
104
|
+
free(is->d.cis);
|
108
105
|
}
|
109
106
|
|
110
|
-
|
107
|
+
static off_t cmpdi_length_i(InStream *is)
|
111
108
|
{
|
112
|
-
|
109
|
+
return (is->d.cis->length);
|
113
110
|
}
|
114
111
|
|
115
112
|
/*
|
116
113
|
* raises: EOF_ERROR
|
117
114
|
*/
|
118
|
-
void
|
115
|
+
static void cmpdi_read_i(InStream *is, uchar *b, int len)
|
119
116
|
{
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
117
|
+
CompoundInStream *cis = is->d.cis;
|
118
|
+
off_t start = is_pos(is);
|
119
|
+
|
120
|
+
if ((start + len) > cis->length) {
|
121
|
+
RAISE(EOF_ERROR, "Tried to read past end of file. File length is "
|
122
|
+
"<%"F_OFF_T_PFX"d> and tried to read to <%"F_OFF_T_PFX"d>",
|
123
|
+
cis->length, start + len);
|
124
|
+
}
|
125
|
+
|
126
|
+
is_seek(cis->sub, cis->offset + start);
|
127
|
+
is_read_bytes(cis->sub, b, len);
|
126
128
|
}
|
127
129
|
|
128
|
-
|
130
|
+
static const struct InStreamMethods CMPD_IN_STREAM_METHODS = {
|
131
|
+
cmpdi_read_i,
|
132
|
+
cmpdi_seek_i,
|
133
|
+
cmpdi_length_i,
|
134
|
+
cmpdi_close_i
|
135
|
+
};
|
136
|
+
|
137
|
+
static InStream *cmpd_create_input(InStream *sub_is, off_t offset, off_t length)
|
129
138
|
{
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
is->seek_internal = &cmpdi_seek_internal;
|
141
|
-
is->close_internal = &cmpdi_close_internal;
|
142
|
-
is->clone_internal = &cmpdi_clone_internal;
|
143
|
-
is->length_internal = &cmpdi_length_internal;
|
144
|
-
return is;
|
139
|
+
InStream *is = is_new();
|
140
|
+
CompoundInStream *cis = ALLOC(CompoundInStream);
|
141
|
+
|
142
|
+
cis->sub = sub_is;
|
143
|
+
cis->offset = offset;
|
144
|
+
cis->length = length;
|
145
|
+
is->d.cis = cis;
|
146
|
+
is->m = &CMPD_IN_STREAM_METHODS;
|
147
|
+
|
148
|
+
return is;
|
145
149
|
}
|
146
150
|
|
147
|
-
InStream *cmpd_open_input(Store *store, const char *
|
151
|
+
static InStream *cmpd_open_input(Store *store, const char *file_name)
|
148
152
|
{
|
149
|
-
|
150
|
-
|
151
|
-
|
153
|
+
FileEntry *entry;
|
154
|
+
CompoundStore *cmpd = store->dir.cmpd;
|
155
|
+
InStream *is;
|
156
|
+
|
157
|
+
mutex_lock(&store->mutex);
|
158
|
+
if (cmpd->stream == NULL) {
|
159
|
+
mutex_unlock(&store->mutex);
|
160
|
+
RAISE(IO_ERROR, "Can't open compound file input stream. Parent "
|
161
|
+
"stream is closed.");
|
162
|
+
}
|
152
163
|
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
164
|
+
entry = h_get(cmpd->entries, file_name);
|
165
|
+
if (entry == NULL) {
|
166
|
+
mutex_unlock(&store->mutex);
|
167
|
+
RAISE(IO_ERROR, "File %s does not exist: ", file_name);
|
168
|
+
}
|
158
169
|
|
159
|
-
|
160
|
-
if (entry == NULL) {
|
170
|
+
is = cmpd_create_input(cmpd->stream, entry->offset, entry->length);
|
161
171
|
mutex_unlock(&store->mutex);
|
162
|
-
RAISE(IO_ERROR, MISSING_FILE_MSG);
|
163
|
-
}
|
164
|
-
|
165
|
-
is = cmpd_create_input(cmpd->stream, entry->offset, entry->length);
|
166
|
-
mutex_unlock(&store->mutex);
|
167
172
|
|
168
|
-
|
173
|
+
return is;
|
169
174
|
}
|
170
175
|
|
171
|
-
OutStream *
|
176
|
+
static OutStream *cmpd_new_output(Store *store, const char *file_name)
|
172
177
|
{
|
173
|
-
|
174
|
-
|
178
|
+
(void)store;
|
179
|
+
(void)file_name;
|
180
|
+
RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
|
181
|
+
return NULL;
|
175
182
|
}
|
176
183
|
|
177
|
-
Lock *cmpd_open_lock(Store *store, char *
|
184
|
+
static Lock *cmpd_open_lock(Store *store, char *lock_name)
|
178
185
|
{
|
179
|
-
|
180
|
-
|
186
|
+
(void)store;
|
187
|
+
(void)lock_name;
|
188
|
+
RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
|
189
|
+
return NULL;
|
181
190
|
}
|
182
191
|
|
183
|
-
void cmpd_close_lock(Lock *lock)
|
192
|
+
static void cmpd_close_lock(Lock *lock)
|
184
193
|
{
|
185
|
-
|
194
|
+
(void)lock;
|
195
|
+
RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
|
186
196
|
}
|
187
197
|
|
188
198
|
Store *open_cmpd_store(Store *store, const char *name)
|
189
199
|
{
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
new_store =
|
200
|
+
int count, i;
|
201
|
+
off_t offset;
|
202
|
+
char *fname;
|
203
|
+
FileEntry *entry;
|
204
|
+
Store *new_store = NULL;
|
205
|
+
CompoundStore *cmpd = NULL;
|
206
|
+
InStream *is = NULL;
|
207
|
+
|
208
|
+
new_store = store_new();
|
199
209
|
cmpd = ALLOC(CompoundStore);
|
200
210
|
|
201
|
-
cmpd->store
|
202
|
-
cmpd->name
|
203
|
-
cmpd->entries
|
211
|
+
cmpd->store = store;
|
212
|
+
cmpd->name = name;
|
213
|
+
cmpd->entries = h_new_str(&free, &free);
|
204
214
|
is = cmpd->stream = store->open_input(store, cmpd->name);
|
205
215
|
|
206
|
-
|
207
|
-
count =
|
216
|
+
/* read the directory and init files */
|
217
|
+
count = is_read_vint(is);
|
208
218
|
entry = NULL;
|
209
219
|
for (i = 0; i < count; i++) {
|
210
|
-
|
211
|
-
|
220
|
+
offset = (off_t)is_read_i64(is);
|
221
|
+
fname = is_read_string(is);
|
212
222
|
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
223
|
+
if (entry != NULL) {
|
224
|
+
/* set length of the previous entry */
|
225
|
+
entry->length = offset - entry->offset;
|
226
|
+
}
|
217
227
|
|
218
|
-
|
219
|
-
|
220
|
-
|
228
|
+
entry = ALLOC(FileEntry);
|
229
|
+
entry->offset = offset;
|
230
|
+
h_set(cmpd->entries, fname, entry);
|
221
231
|
}
|
222
232
|
|
223
|
-
|
224
|
-
if (entry != NULL)
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
new_store->open_input = &cmpd_open_input;
|
244
|
-
new_store->open_lock = &cmpd_open_lock;
|
245
|
-
new_store->close_lock = &cmpd_close_lock;
|
246
|
-
return new_store;
|
233
|
+
/* set the length of the final entry */
|
234
|
+
if (entry != NULL) {
|
235
|
+
entry->length = is_length(is) - entry->offset;
|
236
|
+
}
|
237
|
+
|
238
|
+
new_store->dir.cmpd = cmpd;
|
239
|
+
new_store->touch = &cmpd_touch;
|
240
|
+
new_store->exists = &cmpd_exists;
|
241
|
+
new_store->remove = &cmpd_remove;
|
242
|
+
new_store->rename = &cmpd_rename;
|
243
|
+
new_store->count = &cmpd_count;
|
244
|
+
new_store->clear = &cmpd_clear;
|
245
|
+
new_store->length = &cmpd_length;
|
246
|
+
new_store->close_i = &cmpd_close_i;
|
247
|
+
new_store->new_output = &cmpd_new_output;
|
248
|
+
new_store->open_input = &cmpd_open_input;
|
249
|
+
new_store->open_lock = &cmpd_open_lock;
|
250
|
+
new_store->close_lock = &cmpd_close_lock;
|
251
|
+
|
252
|
+
return new_store;
|
247
253
|
}
|
248
254
|
|
249
255
|
/****************************************************************************
|
@@ -252,123 +258,105 @@ Store *open_cmpd_store(Store *store, const char *name)
|
|
252
258
|
*
|
253
259
|
****************************************************************************/
|
254
260
|
|
255
|
-
typedef struct WFileEntry {
|
256
|
-
char *name;
|
257
|
-
int dir_offset;
|
258
|
-
int data_offset;
|
259
|
-
} WFileEntry;
|
260
|
-
|
261
|
-
WFileEntry *wfe_create(char *name)
|
262
|
-
{
|
263
|
-
WFileEntry *wfe = ALLOC(WFileEntry);
|
264
|
-
wfe->name = name;
|
265
|
-
return wfe;
|
266
|
-
}
|
267
|
-
|
268
261
|
CompoundWriter *open_cw(Store *store, char *name)
|
269
262
|
{
|
270
|
-
|
271
|
-
|
272
|
-
|
273
|
-
|
274
|
-
|
275
|
-
|
276
|
-
return cw;
|
263
|
+
CompoundWriter *cw = ALLOC(CompoundWriter);
|
264
|
+
cw->store = store;
|
265
|
+
cw->name = name;
|
266
|
+
cw->ids = hs_new_str(&free);
|
267
|
+
cw->file_entries = ary_new_type_capa(CWFileEntry, CW_INIT_CAPA);
|
268
|
+
return cw;
|
277
269
|
}
|
278
270
|
|
279
271
|
void cw_add_file(CompoundWriter *cw, char *id)
|
280
272
|
{
|
281
|
-
|
282
|
-
|
283
|
-
|
273
|
+
id = estrdup(id);
|
274
|
+
if (hs_add(cw->ids, id) != HASH_KEY_DOES_NOT_EXIST) {
|
275
|
+
RAISE(IO_ERROR, "Tried to add file \"%s\" which has already been "
|
276
|
+
"added to the compound store", id);
|
277
|
+
}
|
284
278
|
|
285
|
-
|
286
|
-
|
279
|
+
ary_grow(cw->file_entries);
|
280
|
+
ary_last(cw->file_entries).name = id;
|
287
281
|
}
|
288
282
|
|
289
|
-
void cw_copy_file(CompoundWriter *cw,
|
283
|
+
static void cw_copy_file(CompoundWriter *cw, CWFileEntry *src, OutStream *os)
|
290
284
|
{
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
|
295
|
-
|
296
|
-
InStream *is = cw->store->open_input(cw->store, src->name);
|
297
|
-
|
298
|
-
TRY
|
299
|
-
remainder = length = is_length(is);
|
285
|
+
off_t start_ptr = os_pos(os);
|
286
|
+
off_t end_ptr;
|
287
|
+
off_t remainder, length, len;
|
288
|
+
uchar buffer[BUFFER_SIZE];
|
300
289
|
|
290
|
+
InStream *is = cw->store->open_input(cw->store, src->name);
|
291
|
+
|
292
|
+
remainder = length = is_length(is);
|
301
293
|
|
302
294
|
while (remainder > 0) {
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
295
|
+
len = MIN(remainder, BUFFER_SIZE);
|
296
|
+
is_read_bytes(is, buffer, len);
|
297
|
+
os_write_bytes(os, buffer, len);
|
298
|
+
remainder -= len;
|
307
299
|
}
|
308
300
|
|
309
|
-
|
310
|
-
if (remainder != 0)
|
311
|
-
|
301
|
+
/* Verify that remainder is 0 */
|
302
|
+
if (remainder != 0) {
|
303
|
+
RAISE(IO_ERROR, "There seems to be an error in the compound file "
|
304
|
+
"should have read to the end but there are <%"F_OFF_T_PFX"d> "
|
305
|
+
"bytes left", remainder);
|
306
|
+
}
|
312
307
|
|
313
|
-
|
308
|
+
/* Verify that the output length diff is equal to original file */
|
314
309
|
end_ptr = os_pos(os);
|
315
310
|
len = end_ptr - start_ptr;
|
316
|
-
if (len != length)
|
317
|
-
|
311
|
+
if (len != length) {
|
312
|
+
RAISE(IO_ERROR, "Difference in compound file output file offsets "
|
313
|
+
"<%"F_OFF_T_PFX"d> does not match the original file lenght "
|
314
|
+
"<%"F_OFF_T_PFX"d>", len, length);
|
315
|
+
}
|
318
316
|
|
319
|
-
XFINALLY
|
320
317
|
is_close(is);
|
321
|
-
XENDTRY
|
322
318
|
}
|
323
319
|
|
324
320
|
void cw_close(CompoundWriter *cw)
|
325
321
|
{
|
326
|
-
|
327
|
-
|
328
|
-
WFileEntry *wfe;
|
322
|
+
OutStream *os = NULL;
|
323
|
+
int i;
|
329
324
|
|
330
|
-
|
331
|
-
|
332
|
-
|
325
|
+
if (cw->ids->size <= 0) {
|
326
|
+
RAISE(STATE_ERROR, "Tried to merge compound file with no entries");
|
327
|
+
}
|
333
328
|
|
334
|
-
|
329
|
+
os = cw->store->new_output(cw->store, cw->name);
|
335
330
|
|
336
|
-
|
337
|
-
os = cw->store->create_output(cw->store, cw->name);
|
338
|
-
os_write_vint(os, cw->file_entries->size);
|
331
|
+
os_write_vint(os, ary_size(cw->file_entries));
|
339
332
|
|
340
333
|
/* Write the directory with all offsets at 0.
|
341
334
|
* Remember the positions of directory entries so that we can adjust the
|
342
335
|
* offsets later */
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
os_write_long(os, 0); // for now
|
348
|
-
os_write_string(os, wfe->name);
|
336
|
+
for (i = 0; i < ary_size(cw->file_entries); i++) {
|
337
|
+
cw->file_entries[i].dir_offset = os_pos(os);
|
338
|
+
os_write_u64(os, 0); /* for now */
|
339
|
+
os_write_string(os, cw->file_entries[i].name);
|
349
340
|
}
|
350
341
|
|
351
342
|
/* Open the files and copy their data into the stream. Remember the
|
352
343
|
* locations of each file's data section. */
|
353
|
-
for (i = 0; i < cw->file_entries
|
354
|
-
|
355
|
-
|
356
|
-
cw_copy_file(cw, wfe, os);
|
344
|
+
for (i = 0; i < ary_size(cw->file_entries); i++) {
|
345
|
+
cw->file_entries[i].data_offset = os_pos(os);
|
346
|
+
cw_copy_file(cw, &cw->file_entries[i], os);
|
357
347
|
}
|
358
348
|
|
359
349
|
/* Write the data offsets into the directory of the compound stream */
|
360
|
-
for (i = 0; i < cw->file_entries
|
361
|
-
|
362
|
-
|
363
|
-
|
350
|
+
for (i = 0; i < ary_size(cw->file_entries); i++) {
|
351
|
+
os_seek(os, cw->file_entries[i].dir_offset);
|
352
|
+
os_write_u64(os, cw->file_entries[i].data_offset);
|
353
|
+
}
|
354
|
+
|
355
|
+
if (os) {
|
356
|
+
os_close(os);
|
364
357
|
}
|
365
358
|
|
366
|
-
XFINALLY
|
367
|
-
if (os) os_close(os);
|
368
359
|
hs_destroy(cw->ids);
|
369
|
-
|
360
|
+
ary_free(cw->file_entries);
|
370
361
|
free(cw);
|
371
|
-
break;
|
372
|
-
XENDTRY
|
373
362
|
}
|
374
|
-
|