ferret 0.9.6 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/MIT-LICENSE +1 -1
- data/README +12 -24
- data/Rakefile +38 -54
- data/TODO +14 -17
- data/ext/analysis.c +982 -823
- data/ext/analysis.h +133 -76
- data/ext/array.c +96 -58
- data/ext/array.h +40 -13
- data/ext/bitvector.c +476 -118
- data/ext/bitvector.h +264 -22
- data/ext/compound_io.c +217 -229
- data/ext/defines.h +49 -0
- data/ext/document.c +107 -317
- data/ext/document.h +31 -65
- data/ext/except.c +81 -36
- data/ext/except.h +117 -55
- data/ext/extconf.rb +2 -9
- data/ext/ferret.c +211 -104
- data/ext/ferret.h +22 -11
- data/ext/filter.c +97 -82
- data/ext/fs_store.c +348 -367
- data/ext/global.c +226 -188
- data/ext/global.h +44 -26
- data/ext/hash.c +474 -391
- data/ext/hash.h +441 -68
- data/ext/hashset.c +124 -96
- data/ext/hashset.h +169 -20
- data/ext/helper.c +56 -5
- data/ext/helper.h +7 -0
- data/ext/inc/lang.h +29 -49
- data/ext/inc/threading.h +31 -0
- data/ext/ind.c +288 -278
- data/ext/ind.h +68 -0
- data/ext/index.c +5688 -0
- data/ext/index.h +663 -616
- data/ext/lang.h +29 -49
- data/ext/libstemmer.c +3 -3
- data/ext/mem_pool.c +84 -0
- data/ext/mem_pool.h +35 -0
- data/ext/posh.c +1006 -0
- data/ext/posh.h +1007 -0
- data/ext/priorityqueue.c +117 -194
- data/ext/priorityqueue.h +135 -39
- data/ext/q_boolean.c +1305 -1108
- data/ext/q_const_score.c +106 -93
- data/ext/q_filtered_query.c +138 -135
- data/ext/q_fuzzy.c +206 -242
- data/ext/q_match_all.c +94 -80
- data/ext/q_multi_term.c +663 -0
- data/ext/q_parser.c +667 -593
- data/ext/q_phrase.c +992 -555
- data/ext/q_prefix.c +72 -61
- data/ext/q_range.c +235 -210
- data/ext/q_span.c +1480 -1166
- data/ext/q_term.c +273 -246
- data/ext/q_wildcard.c +127 -114
- data/ext/r_analysis.c +1720 -711
- data/ext/r_index.c +3049 -0
- data/ext/r_qparser.c +433 -146
- data/ext/r_search.c +2934 -1993
- data/ext/r_store.c +372 -143
- data/ext/r_utils.c +941 -0
- data/ext/ram_store.c +330 -326
- data/ext/search.c +1291 -668
- data/ext/search.h +403 -702
- data/ext/similarity.c +91 -113
- data/ext/similarity.h +45 -30
- data/ext/sort.c +721 -484
- data/ext/stopwords.c +361 -273
- data/ext/store.c +556 -58
- data/ext/store.h +706 -126
- data/ext/tags +3578 -2780
- data/ext/term_vectors.c +352 -0
- data/ext/threading.h +31 -0
- data/ext/win32.h +54 -0
- data/lib/ferret.rb +5 -17
- data/lib/ferret/document.rb +130 -2
- data/lib/ferret/index.rb +577 -26
- data/lib/ferret/number_tools.rb +157 -0
- data/lib/ferret_version.rb +3 -0
- data/test/test_helper.rb +5 -13
- data/test/unit/analysis/tc_analyzer.rb +513 -1
- data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
- data/test/unit/index/tc_index.rb +183 -240
- data/test/unit/index/tc_index_reader.rb +312 -479
- data/test/unit/index/tc_index_writer.rb +397 -13
- data/test/unit/index/th_doc.rb +269 -206
- data/test/unit/query_parser/tc_query_parser.rb +40 -33
- data/test/unit/search/tc_filter.rb +59 -71
- data/test/unit/search/tc_fuzzy_query.rb +24 -16
- data/test/unit/search/tc_index_searcher.rb +23 -201
- data/test/unit/search/tc_multi_searcher.rb +78 -226
- data/test/unit/search/tc_search_and_sort.rb +93 -81
- data/test/unit/search/tc_sort.rb +23 -23
- data/test/unit/search/tc_sort_field.rb +7 -7
- data/test/unit/search/tc_spans.rb +51 -47
- data/test/unit/search/tm_searcher.rb +339 -0
- data/test/unit/store/tc_fs_store.rb +1 -1
- data/test/unit/store/tm_store_lock.rb +3 -3
- data/test/unit/tc_document.rb +81 -0
- data/test/unit/ts_analysis.rb +1 -1
- data/test/unit/ts_utils.rb +1 -1
- data/test/unit/utils/tc_bit_vector.rb +288 -0
- data/test/unit/utils/tc_number_tools.rb +117 -0
- data/test/unit/utils/tc_priority_queue.rb +106 -0
- metadata +140 -301
- data/CHANGELOG +0 -9
- data/ext/dummy.exe +0 -0
- data/ext/field.c +0 -408
- data/ext/frtio.h +0 -13
- data/ext/inc/except.h +0 -90
- data/ext/index_io.c +0 -382
- data/ext/index_rw.c +0 -2658
- data/ext/lang.c +0 -41
- data/ext/nix_io.c +0 -134
- data/ext/q_multi_phrase.c +0 -380
- data/ext/r_doc.c +0 -582
- data/ext/r_index_io.c +0 -1021
- data/ext/r_term.c +0 -219
- data/ext/term.c +0 -820
- data/ext/termdocs.c +0 -611
- data/ext/vector.c +0 -637
- data/ext/w32_io.c +0 -150
- data/lib/ferret/analysis.rb +0 -11
- data/lib/ferret/analysis/analyzers.rb +0 -112
- data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
- data/lib/ferret/analysis/token.rb +0 -100
- data/lib/ferret/analysis/token_filters.rb +0 -86
- data/lib/ferret/analysis/token_stream.rb +0 -26
- data/lib/ferret/analysis/tokenizers.rb +0 -112
- data/lib/ferret/analysis/word_list_loader.rb +0 -27
- data/lib/ferret/document/document.rb +0 -152
- data/lib/ferret/document/field.rb +0 -312
- data/lib/ferret/index/compound_file_io.rb +0 -338
- data/lib/ferret/index/document_writer.rb +0 -289
- data/lib/ferret/index/field_infos.rb +0 -279
- data/lib/ferret/index/fields_io.rb +0 -181
- data/lib/ferret/index/index.rb +0 -675
- data/lib/ferret/index/index_file_names.rb +0 -33
- data/lib/ferret/index/index_reader.rb +0 -503
- data/lib/ferret/index/index_writer.rb +0 -534
- data/lib/ferret/index/multi_reader.rb +0 -377
- data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
- data/lib/ferret/index/segment_infos.rb +0 -130
- data/lib/ferret/index/segment_merge_info.rb +0 -49
- data/lib/ferret/index/segment_merge_queue.rb +0 -16
- data/lib/ferret/index/segment_merger.rb +0 -358
- data/lib/ferret/index/segment_reader.rb +0 -412
- data/lib/ferret/index/segment_term_enum.rb +0 -169
- data/lib/ferret/index/segment_term_vector.rb +0 -58
- data/lib/ferret/index/term.rb +0 -53
- data/lib/ferret/index/term_buffer.rb +0 -83
- data/lib/ferret/index/term_doc_enum.rb +0 -291
- data/lib/ferret/index/term_enum.rb +0 -52
- data/lib/ferret/index/term_info.rb +0 -37
- data/lib/ferret/index/term_infos_io.rb +0 -321
- data/lib/ferret/index/term_vector_offset_info.rb +0 -20
- data/lib/ferret/index/term_vectors_io.rb +0 -553
- data/lib/ferret/query_parser.rb +0 -312
- data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
- data/lib/ferret/search.rb +0 -50
- data/lib/ferret/search/boolean_clause.rb +0 -100
- data/lib/ferret/search/boolean_query.rb +0 -299
- data/lib/ferret/search/boolean_scorer.rb +0 -294
- data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
- data/lib/ferret/search/conjunction_scorer.rb +0 -99
- data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
- data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
- data/lib/ferret/search/explanation.rb +0 -41
- data/lib/ferret/search/field_cache.rb +0 -215
- data/lib/ferret/search/field_doc.rb +0 -31
- data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
- data/lib/ferret/search/filter.rb +0 -11
- data/lib/ferret/search/filtered_query.rb +0 -130
- data/lib/ferret/search/filtered_term_enum.rb +0 -79
- data/lib/ferret/search/fuzzy_query.rb +0 -154
- data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
- data/lib/ferret/search/hit_collector.rb +0 -34
- data/lib/ferret/search/hit_queue.rb +0 -11
- data/lib/ferret/search/index_searcher.rb +0 -200
- data/lib/ferret/search/match_all_query.rb +0 -104
- data/lib/ferret/search/multi_phrase_query.rb +0 -216
- data/lib/ferret/search/multi_searcher.rb +0 -261
- data/lib/ferret/search/multi_term_query.rb +0 -65
- data/lib/ferret/search/non_matching_scorer.rb +0 -22
- data/lib/ferret/search/phrase_positions.rb +0 -55
- data/lib/ferret/search/phrase_query.rb +0 -214
- data/lib/ferret/search/phrase_scorer.rb +0 -152
- data/lib/ferret/search/prefix_query.rb +0 -54
- data/lib/ferret/search/query.rb +0 -140
- data/lib/ferret/search/query_filter.rb +0 -51
- data/lib/ferret/search/range_filter.rb +0 -103
- data/lib/ferret/search/range_query.rb +0 -139
- data/lib/ferret/search/req_excl_scorer.rb +0 -125
- data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
- data/lib/ferret/search/score_doc.rb +0 -38
- data/lib/ferret/search/score_doc_comparator.rb +0 -114
- data/lib/ferret/search/scorer.rb +0 -91
- data/lib/ferret/search/similarity.rb +0 -278
- data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
- data/lib/ferret/search/sort.rb +0 -112
- data/lib/ferret/search/sort_comparator.rb +0 -60
- data/lib/ferret/search/sort_field.rb +0 -91
- data/lib/ferret/search/spans.rb +0 -12
- data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
- data/lib/ferret/search/spans/span_first_query.rb +0 -79
- data/lib/ferret/search/spans/span_near_query.rb +0 -108
- data/lib/ferret/search/spans/span_not_query.rb +0 -130
- data/lib/ferret/search/spans/span_or_query.rb +0 -176
- data/lib/ferret/search/spans/span_query.rb +0 -25
- data/lib/ferret/search/spans/span_scorer.rb +0 -74
- data/lib/ferret/search/spans/span_term_query.rb +0 -105
- data/lib/ferret/search/spans/span_weight.rb +0 -84
- data/lib/ferret/search/spans/spans_enum.rb +0 -44
- data/lib/ferret/search/term_query.rb +0 -128
- data/lib/ferret/search/term_scorer.rb +0 -183
- data/lib/ferret/search/top_docs.rb +0 -36
- data/lib/ferret/search/top_field_docs.rb +0 -17
- data/lib/ferret/search/weight.rb +0 -54
- data/lib/ferret/search/wildcard_query.rb +0 -26
- data/lib/ferret/search/wildcard_term_enum.rb +0 -61
- data/lib/ferret/stemmers.rb +0 -1
- data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
- data/lib/ferret/store.rb +0 -5
- data/lib/ferret/store/buffered_index_io.rb +0 -190
- data/lib/ferret/store/directory.rb +0 -141
- data/lib/ferret/store/fs_store.rb +0 -381
- data/lib/ferret/store/index_io.rb +0 -245
- data/lib/ferret/store/ram_store.rb +0 -286
- data/lib/ferret/utils.rb +0 -8
- data/lib/ferret/utils/bit_vector.rb +0 -123
- data/lib/ferret/utils/date_tools.rb +0 -138
- data/lib/ferret/utils/number_tools.rb +0 -91
- data/lib/ferret/utils/parameter.rb +0 -41
- data/lib/ferret/utils/priority_queue.rb +0 -120
- data/lib/ferret/utils/string_helper.rb +0 -47
- data/lib/ferret/utils/thread_local.rb +0 -28
- data/lib/ferret/utils/weak_key_hash.rb +0 -60
- data/lib/rferret.rb +0 -37
- data/rake_utils/code_statistics.rb +0 -106
- data/test/benchmark/tb_ram_store.rb +0 -76
- data/test/benchmark/tb_rw_vint.rb +0 -26
- data/test/functional/thread_safety_index_test.rb +0 -81
- data/test/functional/thread_safety_test.rb +0 -137
- data/test/longrunning/tc_numbertools.rb +0 -60
- data/test/longrunning/tm_store.rb +0 -19
- data/test/unit/analysis/ctc_analyzer.rb +0 -532
- data/test/unit/analysis/data/wordfile +0 -6
- data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
- data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
- data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
- data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
- data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
- data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
- data/test/unit/analysis/rtc_stop_filter.rb +0 -14
- data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
- data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
- data/test/unit/analysis/tc_token.rb +0 -25
- data/test/unit/document/rtc_field.rb +0 -28
- data/test/unit/document/tc_document.rb +0 -47
- data/test/unit/document/tc_field.rb +0 -98
- data/test/unit/index/rtc_compound_file_io.rb +0 -107
- data/test/unit/index/rtc_field_infos.rb +0 -127
- data/test/unit/index/rtc_fields_io.rb +0 -167
- data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
- data/test/unit/index/rtc_segment_infos.rb +0 -74
- data/test/unit/index/rtc_segment_term_docs.rb +0 -17
- data/test/unit/index/rtc_segment_term_enum.rb +0 -60
- data/test/unit/index/rtc_segment_term_vector.rb +0 -71
- data/test/unit/index/rtc_term_buffer.rb +0 -57
- data/test/unit/index/rtc_term_info.rb +0 -19
- data/test/unit/index/rtc_term_infos_io.rb +0 -192
- data/test/unit/index/rtc_term_vectors_io.rb +0 -108
- data/test/unit/index/tc_term.rb +0 -27
- data/test/unit/index/tc_term_voi.rb +0 -18
- data/test/unit/search/rtc_similarity.rb +0 -37
- data/test/unit/search/rtc_sort_field.rb +0 -14
- data/test/unit/search/tc_multi_searcher2.rb +0 -126
- data/test/unit/store/rtc_fs_store.rb +0 -62
- data/test/unit/store/rtc_ram_store.rb +0 -15
- data/test/unit/store/rtm_store.rb +0 -150
- data/test/unit/store/rtm_store_lock.rb +0 -2
- data/test/unit/ts_document.rb +0 -2
- data/test/unit/utils/rtc_bit_vector.rb +0 -73
- data/test/unit/utils/rtc_date_tools.rb +0 -50
- data/test/unit/utils/rtc_number_tools.rb +0 -59
- data/test/unit/utils/rtc_parameter.rb +0 -40
- data/test/unit/utils/rtc_priority_queue.rb +0 -62
- data/test/unit/utils/rtc_string_helper.rb +0 -21
- data/test/unit/utils/rtc_thread.rb +0 -61
- data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
- data/test/utils/number_to_spoken.rb +0 -132
    
        data/ext/bitvector.h
    CHANGED
    
    | @@ -1,29 +1,271 @@ | |
| 1 1 | 
             
            #ifndef FRT_BIT_VECTOR_H
         | 
| 2 2 | 
             
            #define FRT_BIT_VECTOR_H
         | 
| 3 3 |  | 
| 4 | 
            -
            #include  | 
| 5 | 
            -
             | 
| 6 | 
            -
             | 
| 7 | 
            -
             | 
| 8 | 
            -
             | 
| 9 | 
            -
             | 
| 10 | 
            -
             | 
| 11 | 
            -
             | 
| 12 | 
            -
             | 
| 13 | 
            -
             | 
| 4 | 
            +
            #include "global.h"
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            #define BV_INIT_CAPA 256
         | 
| 7 | 
            +
            typedef struct BitVector
         | 
| 8 | 
            +
            {
         | 
| 9 | 
            +
                /** The bits are held in an array of 32-bit integers */
         | 
| 10 | 
            +
                f_u32 *bits;
         | 
| 11 | 
            +
             | 
| 12 | 
            +
                /** size is equal to 1 + the highest order bit set */
         | 
| 13 | 
            +
                int size;
         | 
| 14 | 
            +
             | 
| 15 | 
            +
                /** capa is the number of words (U32) allocated for the bits */
         | 
| 16 | 
            +
                int capa;
         | 
| 17 | 
            +
             | 
| 18 | 
            +
                /** count is the running count of bits set. This is kept up to date by
         | 
| 19 | 
            +
                 *bv_set and bv_unset. You can reset this value by calling bv_recount */
         | 
| 20 | 
            +
                int count;
         | 
| 21 | 
            +
             | 
| 22 | 
            +
                /** curr_bit is used by scan_next to record the previously  scanned bit */
         | 
| 23 | 
            +
                int curr_bit;
         | 
| 24 | 
            +
             | 
| 25 | 
            +
                bool extends_as_ones : 1;
         | 
| 14 26 | 
             
            } BitVector;
         | 
| 15 27 |  | 
| 16 | 
            -
             | 
| 17 | 
            -
             | 
| 18 | 
            -
             | 
| 19 | 
            -
             | 
| 20 | 
            -
             | 
| 21 | 
            -
             | 
| 22 | 
            -
             | 
| 23 | 
            -
             | 
| 24 | 
            -
             | 
| 25 | 
            -
             | 
| 26 | 
            -
             | 
| 27 | 
            -
             | 
| 28 | 
            +
            /**
         | 
| 29 | 
            +
             * Create a new BitVector with a capacity of +BV_INIT_CAPA+. Note that the
         | 
| 30 | 
            +
             * BitVector is growable and will adjust it's capacity when you use bv_set.
         | 
| 31 | 
            +
             *
         | 
| 32 | 
            +
             * @return BitVector with a capacity of +BV_INIT_CAPA+.
         | 
| 33 | 
            +
             */
         | 
| 34 | 
            +
            extern BitVector *bv_new();
         | 
| 35 | 
            +
             | 
| 36 | 
            +
            /**
         | 
| 37 | 
            +
             * Create a new BitVector with a capacity of +capa+. Note that the BitVector
         | 
| 38 | 
            +
             * is growable and will adjust it's capacity when you use bv_set.
         | 
| 39 | 
            +
             *
         | 
| 40 | 
            +
             * @param capa the initial capacity of the BitVector
         | 
| 41 | 
            +
             * @return BitVector with a capacity of +capa+.
         | 
| 42 | 
            +
             */
         | 
| 43 | 
            +
            extern BitVector *bv_new_capa(int capa);
         | 
| 44 | 
            +
             | 
| 45 | 
            +
            /**
         | 
| 46 | 
            +
             * Destroy a BitVector, freeing all memory allocated to that BitVector
         | 
| 47 | 
            +
             *
         | 
| 48 | 
            +
             * @param bv BitVector to destroy
         | 
| 49 | 
            +
             */
         | 
| 50 | 
            +
            extern void bv_destroy(BitVector *bv);
         | 
| 51 | 
            +
             | 
| 52 | 
            +
            /**
         | 
| 53 | 
            +
             * Set the bit at position +index+. If +index+ is outside of the range of the
         | 
| 54 | 
            +
             * BitVector, that is >= BitVector.size, BitVector.size will be set to +index+
         | 
| 55 | 
            +
             * + 1. If it is greater than the capacity of the BitVector, the capacity will
         | 
| 56 | 
            +
             * be expanded to accomodate.
         | 
| 57 | 
            +
             *
         | 
| 58 | 
            +
             * @param bv the BitVector to set the bit in
         | 
| 59 | 
            +
             * @param index the index of the bit to set
         | 
| 60 | 
            +
             */
         | 
| 61 | 
            +
            extern void bv_set(BitVector *bv, int index);
         | 
| 62 | 
            +
             | 
| 63 | 
            +
            /**
         | 
| 64 | 
            +
             * Unsafely set the bit at position +index+. If you choose to use this
         | 
| 65 | 
            +
             * function you must create the BitVector with a large enough capacity to
         | 
| 66 | 
            +
             * accomodate all of the bv_set_fast operations. You must also set bits in
         | 
| 67 | 
            +
             * order and only one time per bit. Otherwise, use the safe bv_set function.
         | 
| 68 | 
            +
             *
         | 
| 69 | 
            +
             * So this is ok;
         | 
| 70 | 
            +
             * <pre>
         | 
| 71 | 
            +
             *   BitVector *bv = bv_new_capa(1000);
         | 
| 72 | 
            +
             *   bv_set_fast(bv, 900);
         | 
| 73 | 
            +
             *   bv_set_fast(bv, 920);
         | 
| 74 | 
            +
             *   bv_set_fast(bv, 999);
         | 
| 75 | 
            +
             * </pre>
         | 
| 76 | 
            +
             *
         | 
| 77 | 
            +
             * While these are not ok;
         | 
| 78 | 
            +
             * <pre>
         | 
| 79 | 
            +
             *   BitVector *bv = bv_new_capa(90);
         | 
| 80 | 
            +
             *   bv_set_fast(bv, 80);
         | 
| 81 | 
            +
             *   bv_set_fast(bv, 79); // <= Bad: Out of Order
         | 
| 82 | 
            +
             *   bv_set_fast(bv, 80); // <= Bad: Already set
         | 
| 83 | 
            +
             *   bv_set_fast(bv, 90); // <= Bad: Out of Range. index must be < capa
         | 
| 84 | 
            +
             * </pre>
         | 
| 85 | 
            +
             *
         | 
| 86 | 
            +
             * @param bv the BitVector to set the bit in
         | 
| 87 | 
            +
             * @param index the index of the bit to set
         | 
| 88 | 
            +
             */
         | 
| 89 | 
            +
            extern void bv_set_fast(BitVector *bv, int bit);
         | 
| 90 | 
            +
             | 
| 91 | 
            +
            /**
         | 
| 92 | 
            +
             * Return 1 if the bit at +index+ was set or 0 otherwise. If +index+ is out of
         | 
| 93 | 
            +
             * range, that is greater then the BitVectors capacity, it will also return 0.
         | 
| 94 | 
            +
             *
         | 
| 95 | 
            +
             * @param bv the BitVector to check in
         | 
| 96 | 
            +
             * @param index the index of the bit to check
         | 
| 97 | 
            +
             * @return 1 if the bit was set, 0 otherwise
         | 
| 98 | 
            +
             */
         | 
| 99 | 
            +
            extern int bv_get(BitVector *bv, int index);
         | 
| 100 | 
            +
             | 
| 101 | 
            +
            /**
         | 
| 102 | 
            +
             * Unset the bit at position +index+. If the +index+ was out of range, that is
         | 
| 103 | 
            +
             * greater than the BitVectors capacity then do nothing. (bv_get will return 0
         | 
| 104 | 
            +
             * in this case anyway).
         | 
| 105 | 
            +
             *
         | 
| 106 | 
            +
             * @param bv the BitVector to unset the bit in
         | 
| 107 | 
            +
             * @param index the index of the bit to unset
         | 
| 108 | 
            +
             */
         | 
| 109 | 
            +
            extern void bv_unset(BitVector *bv, int bit);
         | 
| 110 | 
            +
             | 
| 111 | 
            +
            /**
         | 
| 112 | 
            +
             * Clear all set bits. This function will set all set bits to 0.
         | 
| 113 | 
            +
             *
         | 
| 114 | 
            +
             * @param bv the BitVector to clear
         | 
| 115 | 
            +
             */
         | 
| 116 | 
            +
            extern void bv_clear(BitVector *bv);
         | 
| 117 | 
            +
             | 
| 118 | 
            +
            /**
         | 
| 119 | 
            +
             * Resets the set bit count by running through the whole BitVector and
         | 
| 120 | 
            +
             * counting all set bits. A running count of the bits is kept by bv_set,
         | 
| 121 | 
            +
             *bv_get and bv_set_fast so this function is only necessary if the count could
         | 
| 122 | 
            +
             * have been corrupted somehow or if the BitVector has been constructed in a
         | 
| 123 | 
            +
             * different way (for example being read from the file_system).
         | 
| 124 | 
            +
             *
         | 
| 125 | 
            +
             * @param bv the BitVector to count the bits in
         | 
| 126 | 
            +
             * @return the number of set bits in the BitVector. BitVector.count is also
         | 
| 127 | 
            +
             *   set
         | 
| 128 | 
            +
             */
         | 
| 129 | 
            +
            extern int bv_recount(BitVector *bv);
         | 
| 130 | 
            +
             | 
| 131 | 
            +
            /**
         | 
| 132 | 
            +
             * Reset the BitVector for scanning. This function should be called before
         | 
| 133 | 
            +
             * using bv_scan_next to scan through all set bits in the BitVector. This is
         | 
| 134 | 
            +
             * not necessary when using bv_scan_next_from.
         | 
| 135 | 
            +
             *
         | 
| 136 | 
            +
             * @param bv the BitVector to reset for scanning
         | 
| 137 | 
            +
             */
         | 
| 138 | 
            +
            extern void bv_scan_reset(BitVector *bv);
         | 
| 139 | 
            +
             | 
| 140 | 
            +
            /**
         | 
| 141 | 
            +
             * Scan the BitVector for the next set bit. Before using this function you
         | 
| 142 | 
            +
             * should reset the BitVector for scanning using +bv_scan_reset+. You can the
         | 
| 143 | 
            +
             * repeated call bv_scan_next to get each set bit until it finally returns
         | 
| 144 | 
            +
             * -1.
         | 
| 145 | 
            +
             *
         | 
| 146 | 
            +
             * @param bv the BitVector to scan
         | 
| 147 | 
            +
             * @return the next set bits index or -1 if no more bits are set
         | 
| 148 | 
            +
             */
         | 
| 149 | 
            +
            extern int bv_scan_next(BitVector *bv);
         | 
| 150 | 
            +
             | 
| 151 | 
            +
            /**
         | 
| 152 | 
            +
             * Scan the BitVector for the next set bit after +from+. If no more bits are
         | 
| 153 | 
            +
             * set then return -1, otherwise return the index of teh next set bit.
         | 
| 154 | 
            +
             *
         | 
| 155 | 
            +
             * @param bv the BitVector to scan
         | 
| 156 | 
            +
             * @return the next set bit's index or -1 if no more bits are set
         | 
| 157 | 
            +
             */
         | 
| 158 | 
            +
             | 
| 159 | 
            +
            extern int bv_scan_next_from(BitVector *bv, register const int from);
         | 
| 160 | 
            +
            /**
         | 
| 161 | 
            +
             * Scan the BitVector for the next unset bit. Before using this function you
         | 
| 162 | 
            +
             * should reset the BitVector for scanning using +bv_scan_reset+. You can the
         | 
| 163 | 
            +
             * repeated call bv_scan_next to get each unset bit until it finally returns
         | 
| 164 | 
            +
             * -1.
         | 
| 165 | 
            +
             *
         | 
| 166 | 
            +
             * @param bv the BitVector to scan
         | 
| 167 | 
            +
             * @return the next unset bits index or -1 if no more bits are unset
         | 
| 168 | 
            +
             */
         | 
| 169 | 
            +
            extern int bv_scan_next_unset(BitVector *bv);
         | 
| 170 | 
            +
             | 
| 171 | 
            +
            /**
         | 
| 172 | 
            +
             * Scan the BitVector for the next unset bit after +from+. If no more bits are
         | 
| 173 | 
            +
             * unset then return -1, otherwise return the index of teh next unset bit.
         | 
| 174 | 
            +
             *
         | 
| 175 | 
            +
             * @param bv the BitVector to scan
         | 
| 176 | 
            +
             * @return the next unset bit's index or -1 if no more bits are unset
         | 
| 177 | 
            +
             */
         | 
| 178 | 
            +
            extern int bv_scan_next_unset_from(BitVector *bv, register const int from);
         | 
| 179 | 
            +
             | 
| 180 | 
            +
            /**
         | 
| 181 | 
            +
             * Check whether the two BitVectors have the same bits set.
         | 
| 182 | 
            +
             *
         | 
| 183 | 
            +
             * @param bv1 first BitVector to compare
         | 
| 184 | 
            +
             * @param bv2 second BitVectors to compare
         | 
| 185 | 
            +
             * @return true if bv1 == bv2
         | 
| 186 | 
            +
             */
         | 
| 187 | 
            +
            extern int bv_eq(BitVector *bv1, BitVector *bv2);
         | 
| 188 | 
            +
             | 
| 189 | 
            +
            /**
         | 
| 190 | 
            +
             * Determines a hash value for the BitVector
         | 
| 191 | 
            +
             *
         | 
| 192 | 
            +
             * @param bv the BitVector to hash
         | 
| 193 | 
            +
             * @return A hash value for the BitVector
         | 
| 194 | 
            +
             */
         | 
| 195 | 
            +
            extern ulong bv_hash(BitVector *bv);
         | 
| 196 | 
            +
             | 
| 197 | 
            +
            /**
         | 
| 198 | 
            +
             * ANDs two BitVectors (+bv1+ and +bv2+) together and return the resultant
         | 
| 199 | 
            +
             * BitVector
         | 
| 200 | 
            +
             *
         | 
| 201 | 
            +
             * @param bv1 first BitVector to AND
         | 
| 202 | 
            +
             * @param bv2 second BitVector to AND
         | 
| 203 | 
            +
             * @return A BitVector with all bits set that are set in both bv1 and bv2
         | 
| 204 | 
            +
             */
         | 
| 205 | 
            +
            extern BitVector *bv_and(BitVector *bv1, BitVector *bv2);
         | 
| 206 | 
            +
             | 
| 207 | 
            +
            /**
         | 
| 208 | 
            +
             * ORs two BitVectors (+bv1+ and +bv2+) together and return the resultant
         | 
| 209 | 
            +
             * BitVector
         | 
| 210 | 
            +
             *
         | 
| 211 | 
            +
             * @param bv1 first BitVector to OR
         | 
| 212 | 
            +
             * @param bv2 second BitVector to OR
         | 
| 213 | 
            +
             * @return A BitVector with all bits set that are set in both bv1 and bv2
         | 
| 214 | 
            +
             */
         | 
| 215 | 
            +
            extern BitVector *bv_or(BitVector *bv1, BitVector *bv2);
         | 
| 216 | 
            +
             | 
| 217 | 
            +
            /**
         | 
| 218 | 
            +
             * XORs two BitVectors (+bv1+ and +bv2+) together and return the resultant
         | 
| 219 | 
            +
             * BitVector
         | 
| 220 | 
            +
             *
         | 
| 221 | 
            +
             * @param bv1 first BitVector to XOR
         | 
| 222 | 
            +
             * @param bv2 second BitVector to XOR
         | 
| 223 | 
            +
             * @return A BitVector with all bits set that are equal in bv1 and bv2
         | 
| 224 | 
            +
             */
         | 
| 225 | 
            +
            extern BitVector *bv_xor(BitVector *bv1, BitVector *bv2);
         | 
| 226 | 
            +
             | 
| 227 | 
            +
            /**
         | 
| 228 | 
            +
             * Returns BitVector with all of +bv+'s bits flipped
         | 
| 229 | 
            +
             *
         | 
| 230 | 
            +
             * @param bv BitVector to flip
         | 
| 231 | 
            +
             * @return A BitVector with all bits set that are set in both bv1 and bv2
         | 
| 232 | 
            +
             */
         | 
| 233 | 
            +
            extern BitVector *bv_not(BitVector *bv);
         | 
| 234 | 
            +
             | 
| 235 | 
            +
            /**
         | 
| 236 | 
            +
             * ANDs two BitVectors together +bv1+ and +bv2+ in place of +bv1+
         | 
| 237 | 
            +
             *
         | 
| 238 | 
            +
             * @param bv1 first BitVector to AND
         | 
| 239 | 
            +
             * @param bv2 second BitVector to AND
         | 
| 240 | 
            +
             * @return A BitVector
         | 
| 241 | 
            +
             * @return bv1 with all bits set that where set in both bv1 and bv2
         | 
| 242 | 
            +
             */
         | 
| 243 | 
            +
            extern BitVector *bv_and_x(BitVector *bv1, BitVector *bv2);
         | 
| 244 | 
            +
             | 
| 245 | 
            +
            /**
         | 
| 246 | 
            +
             * ORs two BitVectors together
         | 
| 247 | 
            +
             *
         | 
| 248 | 
            +
             * @param bv1 first BitVector to OR
         | 
| 249 | 
            +
             * @param bv2 second BitVector to OR
         | 
| 250 | 
            +
             * @return bv1
         | 
| 251 | 
            +
             */
         | 
| 252 | 
            +
            extern BitVector *bv_or_x(BitVector *bv1, BitVector *bv2);
         | 
| 253 | 
            +
             | 
| 254 | 
            +
            /**
         | 
| 255 | 
            +
             * XORs two BitVectors together +bv1+ and +bv2+ in place of +bv1+
         | 
| 256 | 
            +
             *
         | 
| 257 | 
            +
             * @param bv1 first BitVector to XOR
         | 
| 258 | 
            +
             * @param bv2 second BitVector to XOR
         | 
| 259 | 
            +
             * @return bv1
         | 
| 260 | 
            +
             */
         | 
| 261 | 
            +
            extern BitVector *bv_xor_x(BitVector *bv1, BitVector *bv2);
         | 
| 262 | 
            +
             | 
| 263 | 
            +
            /**
         | 
| 264 | 
            +
             * Flips all bits in the BitVector +bv+
         | 
| 265 | 
            +
             *
         | 
| 266 | 
            +
             * @param bv BitVector to flip
         | 
| 267 | 
            +
             * @return A +bv+ with all it's bits flipped
         | 
| 268 | 
            +
             */
         | 
| 269 | 
            +
            extern BitVector *bv_not_x(BitVector *bv);
         | 
| 28 270 |  | 
| 29 271 | 
             
            #endif
         | 
    
        data/ext/compound_io.c
    CHANGED
    
    | @@ -1,15 +1,9 @@ | |
| 1 1 | 
             
            #include "index.h" 
         | 
| 2 | 
            -
             | 
| 3 | 
            -
            static char * const ALREADY_CLOSED_MSG = "Already closed";
         | 
| 4 | 
            -
            static char * const STREAM_CLOSED_MSG = "Stream closed";
         | 
| 5 | 
            -
            static char * const MISSING_FILE_MSG = "No sub-file found";
         | 
| 6 | 
            -
            static char * const ALREADY_MERGED_MSG = "Already merged";
         | 
| 7 | 
            -
            static char * const REMAINDER_ERROR_MSG = "Non-zero remainder length after copying";
         | 
| 8 | 
            -
            static char * const FILE_OFFSET_MSG =  "Difference in the output file offsets"
         | 
| 9 | 
            -
                    " does not match the original file length";
         | 
| 10 | 
            -
            static char * const NO_FILES_TO_MERGE_MSG = "No Files to merge into the compound file";
         | 
| 2 | 
            +
            #include "array.h" 
         | 
| 11 3 |  | 
| 12 4 | 
             
            extern void store_destroy(Store *store);
         | 
| 5 | 
            +
            extern InStream *is_new();
         | 
| 6 | 
            +
            extern Store *store_new();
         | 
| 13 7 |  | 
| 14 8 | 
             
            /****************************************************************************
         | 
| 15 9 | 
             
             *
         | 
| @@ -18,232 +12,244 @@ extern void store_destroy(Store *store); | |
| 18 12 | 
             
             ****************************************************************************/
         | 
| 19 13 |  | 
| 20 14 | 
             
            typedef struct FileEntry {
         | 
| 21 | 
            -
             | 
| 22 | 
            -
             | 
| 15 | 
            +
                off_t offset;
         | 
| 16 | 
            +
                off_t length;
         | 
| 23 17 | 
             
            } FileEntry;
         | 
| 24 18 |  | 
| 25 | 
            -
            void cmpd_touch(Store *store, char * | 
| 19 | 
            +
            static void cmpd_touch(Store *store, char *file_name)
         | 
| 26 20 | 
             
            {
         | 
| 27 | 
            -
             | 
| 21 | 
            +
                store->dir.cmpd->store->touch(store->dir.cmpd->store, file_name);
         | 
| 28 22 | 
             
            }
         | 
| 29 23 |  | 
| 30 | 
            -
            int cmpd_exists(Store *store, char * | 
| 24 | 
            +
            static int cmpd_exists(Store *store, char *file_name)
         | 
| 31 25 | 
             
            {
         | 
| 32 | 
            -
             | 
| 33 | 
            -
             | 
| 34 | 
            -
             | 
| 35 | 
            -
                 | 
| 36 | 
            -
             | 
| 26 | 
            +
                if (h_get(store->dir.cmpd->entries, file_name) != NULL) {
         | 
| 27 | 
            +
                    return true;
         | 
| 28 | 
            +
                }
         | 
| 29 | 
            +
                else {
         | 
| 30 | 
            +
                    return false;
         | 
| 31 | 
            +
                }
         | 
| 37 32 | 
             
            }
         | 
| 38 33 |  | 
| 39 34 | 
             
            /**
         | 
| 40 35 | 
             
             * @throws UNSUPPORTED_ERROR
         | 
| 41 36 | 
             
             */
         | 
| 42 | 
            -
            int cmpd_remove(Store *store, char * | 
| 37 | 
            +
            static int cmpd_remove(Store *store, char *file_name)
         | 
| 43 38 | 
             
            {
         | 
| 44 | 
            -
             | 
| 45 | 
            -
             | 
| 39 | 
            +
                (void)store;
         | 
| 40 | 
            +
                (void)file_name;
         | 
| 41 | 
            +
                RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
         | 
| 42 | 
            +
                return 0;
         | 
| 46 43 | 
             
            }
         | 
| 47 44 |  | 
| 48 45 | 
             
            /**
         | 
| 49 46 | 
             
             * @throws UNSUPPORTED_ERROR
         | 
| 50 47 | 
             
             */
         | 
| 51 | 
            -
             | 
| 48 | 
            +
            static void cmpd_rename(Store *store, char *from, char *to)
         | 
| 52 49 | 
             
            {
         | 
| 53 | 
            -
             | 
| 54 | 
            -
             | 
| 50 | 
            +
                (void)store;
         | 
| 51 | 
            +
                (void)from;
         | 
| 52 | 
            +
                (void)to;
         | 
| 53 | 
            +
                RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
         | 
| 55 54 | 
             
            }
         | 
| 56 55 |  | 
| 57 | 
            -
            int cmpd_count(Store *store)
         | 
| 56 | 
            +
            static int cmpd_count(Store *store)
         | 
| 58 57 | 
             
            {
         | 
| 59 | 
            -
             | 
| 58 | 
            +
                return store->dir.cmpd->entries->size;
         | 
| 60 59 | 
             
            }
         | 
| 61 60 |  | 
| 62 61 | 
             
            /**
         | 
| 63 62 | 
             
             * @throws UNSUPPORTED_ERROR
         | 
| 64 63 | 
             
             */
         | 
| 65 | 
            -
            void cmpd_clear(Store *store)
         | 
| 64 | 
            +
            static void cmpd_clear(Store *store)
         | 
| 66 65 | 
             
            {
         | 
| 67 | 
            -
             | 
| 66 | 
            +
                (void)store;
         | 
| 67 | 
            +
                RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
         | 
| 68 68 | 
             
            }
         | 
| 69 69 |  | 
| 70 | 
            -
            void cmpd_close_i(Store *store)
         | 
| 70 | 
            +
            static void cmpd_close_i(Store *store)
         | 
| 71 71 | 
             
            {
         | 
| 72 | 
            -
             | 
| 73 | 
            -
             | 
| 74 | 
            -
             | 
| 72 | 
            +
                CompoundStore *cmpd = store->dir.cmpd;
         | 
| 73 | 
            +
                if (cmpd->stream == NULL) {
         | 
| 74 | 
            +
                    RAISE(IO_ERROR, "Tried to close already closed compound store");
         | 
| 75 | 
            +
                }
         | 
| 75 76 |  | 
| 76 | 
            -
             | 
| 77 | 
            +
                h_destroy(cmpd->entries);
         | 
| 77 78 |  | 
| 78 | 
            -
             | 
| 79 | 
            -
             | 
| 80 | 
            -
             | 
| 81 | 
            -
             | 
| 79 | 
            +
                is_close(cmpd->stream);
         | 
| 80 | 
            +
                cmpd->stream = NULL;
         | 
| 81 | 
            +
                free(store->dir.cmpd);
         | 
| 82 | 
            +
                store_destroy(store);
         | 
| 82 83 | 
             
            }
         | 
| 83 84 |  | 
| 84 | 
            -
             | 
| 85 | 
            +
            static off_t cmpd_length(Store *store, char *file_name)
         | 
| 85 86 | 
             
            {
         | 
| 86 | 
            -
             | 
| 87 | 
            -
             | 
| 88 | 
            -
             | 
| 89 | 
            -
             | 
| 90 | 
            -
                 | 
| 87 | 
            +
                FileEntry *fe = h_get(store->dir.cmpd->entries, file_name);
         | 
| 88 | 
            +
                if (fe != NULL) {
         | 
| 89 | 
            +
                    return fe->length;
         | 
| 90 | 
            +
                }
         | 
| 91 | 
            +
                else {
         | 
| 92 | 
            +
                    return 0;
         | 
| 93 | 
            +
                }
         | 
| 91 94 | 
             
            }
         | 
| 92 95 |  | 
| 93 | 
            -
            void  | 
| 94 | 
            -
            void cmpdi_close_internal(InStream *is)
         | 
| 96 | 
            +
            static void cmpdi_seek_i(InStream *is, off_t pos)
         | 
| 95 97 | 
             
            {
         | 
| 96 | 
            -
             | 
| 97 | 
            -
             | 
| 98 | 
            +
                (void)is;
         | 
| 99 | 
            +
                (void)pos;
         | 
| 98 100 | 
             
            }
         | 
| 99 101 |  | 
| 100 | 
            -
            void  | 
| 102 | 
            +
            static void cmpdi_close_i(InStream *is)
         | 
| 101 103 | 
             
            {
         | 
| 102 | 
            -
             | 
| 103 | 
            -
              //cis->sub = is_clone(is->d.cis->sub);
         | 
| 104 | 
            -
              cis->sub = is->d.cis->sub;
         | 
| 105 | 
            -
              cis->offset = is->d.cis->offset;
         | 
| 106 | 
            -
              cis->length = is->d.cis->length;
         | 
| 107 | 
            -
              new_is->d.cis = cis;
         | 
| 104 | 
            +
                free(is->d.cis);
         | 
| 108 105 | 
             
            }
         | 
| 109 106 |  | 
| 110 | 
            -
             | 
| 107 | 
            +
            static off_t cmpdi_length_i(InStream *is)
         | 
| 111 108 | 
             
            {
         | 
| 112 | 
            -
             | 
| 109 | 
            +
                return (is->d.cis->length);
         | 
| 113 110 | 
             
            }
         | 
| 114 111 |  | 
| 115 112 | 
             
            /*
         | 
| 116 113 | 
             
             * raises: EOF_ERROR
         | 
| 117 114 | 
             
             */
         | 
| 118 | 
            -
            void  | 
| 115 | 
            +
            static void cmpdi_read_i(InStream *is, uchar *b, int len)
         | 
| 119 116 | 
             
            {
         | 
| 120 | 
            -
             | 
| 121 | 
            -
             | 
| 122 | 
            -
             | 
| 123 | 
            -
                 | 
| 124 | 
            -
             | 
| 125 | 
            -
             | 
| 117 | 
            +
                CompoundInStream *cis = is->d.cis;
         | 
| 118 | 
            +
                off_t start = is_pos(is);
         | 
| 119 | 
            +
             | 
| 120 | 
            +
                if ((start + len) > cis->length) {
         | 
| 121 | 
            +
                    RAISE(EOF_ERROR, "Tried to read past end of file. File length is "
         | 
| 122 | 
            +
                          "<%"F_OFF_T_PFX"d> and tried to read to <%"F_OFF_T_PFX"d>",
         | 
| 123 | 
            +
                          cis->length, start + len);
         | 
| 124 | 
            +
                }
         | 
| 125 | 
            +
             | 
| 126 | 
            +
                is_seek(cis->sub, cis->offset + start);
         | 
| 127 | 
            +
                is_read_bytes(cis->sub, b, len);
         | 
| 126 128 | 
             
            }
         | 
| 127 129 |  | 
| 128 | 
            -
             | 
| 130 | 
            +
            static const struct InStreamMethods CMPD_IN_STREAM_METHODS = {
         | 
| 131 | 
            +
                cmpdi_read_i,
         | 
| 132 | 
            +
                cmpdi_seek_i,
         | 
| 133 | 
            +
                cmpdi_length_i,
         | 
| 134 | 
            +
                cmpdi_close_i
         | 
| 135 | 
            +
            };
         | 
| 136 | 
            +
             | 
| 137 | 
            +
            static InStream *cmpd_create_input(InStream *sub_is, off_t offset, off_t length)
         | 
| 129 138 | 
             
            {
         | 
| 130 | 
            -
             | 
| 131 | 
            -
             | 
| 132 | 
            -
             | 
| 133 | 
            -
             | 
| 134 | 
            -
             | 
| 135 | 
            -
             | 
| 136 | 
            -
             | 
| 137 | 
            -
             | 
| 138 | 
            -
             | 
| 139 | 
            -
             | 
| 140 | 
            -
              is->seek_internal = &cmpdi_seek_internal;
         | 
| 141 | 
            -
              is->close_internal = &cmpdi_close_internal;
         | 
| 142 | 
            -
              is->clone_internal = &cmpdi_clone_internal;
         | 
| 143 | 
            -
              is->length_internal = &cmpdi_length_internal;
         | 
| 144 | 
            -
              return is;
         | 
| 139 | 
            +
                InStream *is = is_new();
         | 
| 140 | 
            +
                CompoundInStream *cis = ALLOC(CompoundInStream);
         | 
| 141 | 
            +
             | 
| 142 | 
            +
                cis->sub = sub_is;
         | 
| 143 | 
            +
                cis->offset = offset;
         | 
| 144 | 
            +
                cis->length = length;
         | 
| 145 | 
            +
                is->d.cis = cis;
         | 
| 146 | 
            +
                is->m = &CMPD_IN_STREAM_METHODS;
         | 
| 147 | 
            +
             | 
| 148 | 
            +
                return is;
         | 
| 145 149 | 
             
            }
         | 
| 146 150 |  | 
| 147 | 
            -
            InStream *cmpd_open_input(Store *store, const char * | 
| 151 | 
            +
            static InStream *cmpd_open_input(Store *store, const char *file_name)
         | 
| 148 152 | 
             
            {
         | 
| 149 | 
            -
             | 
| 150 | 
            -
             | 
| 151 | 
            -
             | 
| 153 | 
            +
                FileEntry *entry;
         | 
| 154 | 
            +
                CompoundStore *cmpd = store->dir.cmpd;
         | 
| 155 | 
            +
                InStream *is;
         | 
| 156 | 
            +
             | 
| 157 | 
            +
                mutex_lock(&store->mutex);
         | 
| 158 | 
            +
                if (cmpd->stream == NULL) {
         | 
| 159 | 
            +
                    mutex_unlock(&store->mutex);
         | 
| 160 | 
            +
                    RAISE(IO_ERROR, "Can't open compound file input stream. Parent "
         | 
| 161 | 
            +
                          "stream is closed.");
         | 
| 162 | 
            +
                }
         | 
| 152 163 |  | 
| 153 | 
            -
             | 
| 154 | 
            -
             | 
| 155 | 
            -
             | 
| 156 | 
            -
             | 
| 157 | 
            -
             | 
| 164 | 
            +
                entry = h_get(cmpd->entries, file_name);
         | 
| 165 | 
            +
                if (entry == NULL) {
         | 
| 166 | 
            +
                    mutex_unlock(&store->mutex);
         | 
| 167 | 
            +
                    RAISE(IO_ERROR, "File %s does not exist: ", file_name);
         | 
| 168 | 
            +
                }
         | 
| 158 169 |  | 
| 159 | 
            -
             | 
| 160 | 
            -
              if (entry == NULL) {
         | 
| 170 | 
            +
                is = cmpd_create_input(cmpd->stream, entry->offset, entry->length);
         | 
| 161 171 | 
             
                mutex_unlock(&store->mutex);
         | 
| 162 | 
            -
                RAISE(IO_ERROR, MISSING_FILE_MSG);
         | 
| 163 | 
            -
              }
         | 
| 164 | 
            -
             | 
| 165 | 
            -
              is = cmpd_create_input(cmpd->stream, entry->offset, entry->length);
         | 
| 166 | 
            -
              mutex_unlock(&store->mutex);
         | 
| 167 172 |  | 
| 168 | 
            -
             | 
| 173 | 
            +
                return is;
         | 
| 169 174 | 
             
            }
         | 
| 170 175 |  | 
| 171 | 
            -
            OutStream * | 
| 176 | 
            +
            static OutStream *cmpd_new_output(Store *store, const char *file_name)
         | 
| 172 177 | 
             
            {
         | 
| 173 | 
            -
             | 
| 174 | 
            -
             | 
| 178 | 
            +
                (void)store;
         | 
| 179 | 
            +
                (void)file_name;
         | 
| 180 | 
            +
                RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
         | 
| 181 | 
            +
                return NULL;
         | 
| 175 182 | 
             
            }
         | 
| 176 183 |  | 
| 177 | 
            -
            Lock *cmpd_open_lock(Store *store, char * | 
| 184 | 
            +
            static Lock *cmpd_open_lock(Store *store, char *lock_name)
         | 
| 178 185 | 
             
            {
         | 
| 179 | 
            -
             | 
| 180 | 
            -
             | 
| 186 | 
            +
                (void)store;
         | 
| 187 | 
            +
                (void)lock_name;
         | 
| 188 | 
            +
                RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
         | 
| 189 | 
            +
                return NULL;
         | 
| 181 190 | 
             
            }
         | 
| 182 191 |  | 
| 183 | 
            -
            void cmpd_close_lock(Lock *lock)
         | 
| 192 | 
            +
            static void cmpd_close_lock(Lock *lock)
         | 
| 184 193 | 
             
            {
         | 
| 185 | 
            -
             | 
| 194 | 
            +
                (void)lock;
         | 
| 195 | 
            +
                RAISE(UNSUPPORTED_ERROR, UNSUPPORTED_ERROR_MSG);
         | 
| 186 196 | 
             
            }
         | 
| 187 197 |  | 
| 188 198 | 
             
            Store *open_cmpd_store(Store *store, const char *name)
         | 
| 189 199 | 
             
            {
         | 
| 190 | 
            -
             | 
| 191 | 
            -
             | 
| 192 | 
            -
             | 
| 193 | 
            -
             | 
| 194 | 
            -
             | 
| 195 | 
            -
             | 
| 196 | 
            -
             | 
| 197 | 
            -
             | 
| 198 | 
            -
                new_store =  | 
| 200 | 
            +
                int count, i;
         | 
| 201 | 
            +
                off_t offset;
         | 
| 202 | 
            +
                char *fname;
         | 
| 203 | 
            +
                FileEntry *entry;
         | 
| 204 | 
            +
                Store *new_store = NULL;
         | 
| 205 | 
            +
                CompoundStore *cmpd = NULL;
         | 
| 206 | 
            +
                InStream *is = NULL;
         | 
| 207 | 
            +
             | 
| 208 | 
            +
                new_store = store_new();
         | 
| 199 209 | 
             
                cmpd = ALLOC(CompoundStore);
         | 
| 200 210 |  | 
| 201 | 
            -
                cmpd->store | 
| 202 | 
            -
                cmpd->name | 
| 203 | 
            -
                cmpd->entries | 
| 211 | 
            +
                cmpd->store       = store;
         | 
| 212 | 
            +
                cmpd->name        = name;
         | 
| 213 | 
            +
                cmpd->entries     = h_new_str(&free, &free);
         | 
| 204 214 | 
             
                is = cmpd->stream = store->open_input(store, cmpd->name);
         | 
| 205 215 |  | 
| 206 | 
            -
                 | 
| 207 | 
            -
                count =  | 
| 216 | 
            +
                /* read the directory and init files */
         | 
| 217 | 
            +
                count = is_read_vint(is);
         | 
| 208 218 | 
             
                entry = NULL;
         | 
| 209 219 | 
             
                for (i = 0; i < count; i++) {
         | 
| 210 | 
            -
             | 
| 211 | 
            -
             | 
| 220 | 
            +
                    offset = (off_t)is_read_i64(is);
         | 
| 221 | 
            +
                    fname = is_read_string(is);
         | 
| 212 222 |  | 
| 213 | 
            -
             | 
| 214 | 
            -
             | 
| 215 | 
            -
             | 
| 216 | 
            -
             | 
| 223 | 
            +
                    if (entry != NULL) {
         | 
| 224 | 
            +
                        /* set length of the previous entry */
         | 
| 225 | 
            +
                        entry->length = offset - entry->offset;
         | 
| 226 | 
            +
                    }
         | 
| 217 227 |  | 
| 218 | 
            -
             | 
| 219 | 
            -
             | 
| 220 | 
            -
             | 
| 228 | 
            +
                    entry = ALLOC(FileEntry);
         | 
| 229 | 
            +
                    entry->offset = offset;
         | 
| 230 | 
            +
                    h_set(cmpd->entries, fname, entry);
         | 
| 221 231 | 
             
                }
         | 
| 222 232 |  | 
| 223 | 
            -
                 | 
| 224 | 
            -
                if (entry != NULL)
         | 
| 225 | 
            -
             | 
| 226 | 
            -
             | 
| 227 | 
            -
             | 
| 228 | 
            -
                 | 
| 229 | 
            -
                 | 
| 230 | 
            -
             | 
| 231 | 
            -
             | 
| 232 | 
            -
             | 
| 233 | 
            -
             | 
| 234 | 
            -
             | 
| 235 | 
            -
             | 
| 236 | 
            -
             | 
| 237 | 
            -
             | 
| 238 | 
            -
             | 
| 239 | 
            -
             | 
| 240 | 
            -
             | 
| 241 | 
            -
             | 
| 242 | 
            -
             | 
| 243 | 
            -
              new_store->open_input    = &cmpd_open_input;
         | 
| 244 | 
            -
              new_store->open_lock     = &cmpd_open_lock;
         | 
| 245 | 
            -
              new_store->close_lock    = &cmpd_close_lock;
         | 
| 246 | 
            -
              return new_store;
         | 
| 233 | 
            +
                /* set the length of the final entry */
         | 
| 234 | 
            +
                if (entry != NULL) {
         | 
| 235 | 
            +
                    entry->length = is_length(is) - entry->offset;
         | 
| 236 | 
            +
                }
         | 
| 237 | 
            +
             | 
| 238 | 
            +
                new_store->dir.cmpd     = cmpd;
         | 
| 239 | 
            +
                new_store->touch        = &cmpd_touch;
         | 
| 240 | 
            +
                new_store->exists       = &cmpd_exists;
         | 
| 241 | 
            +
                new_store->remove       = &cmpd_remove;
         | 
| 242 | 
            +
                new_store->rename       = &cmpd_rename;
         | 
| 243 | 
            +
                new_store->count        = &cmpd_count;
         | 
| 244 | 
            +
                new_store->clear        = &cmpd_clear;
         | 
| 245 | 
            +
                new_store->length       = &cmpd_length;
         | 
| 246 | 
            +
                new_store->close_i      = &cmpd_close_i;
         | 
| 247 | 
            +
                new_store->new_output   = &cmpd_new_output;
         | 
| 248 | 
            +
                new_store->open_input   = &cmpd_open_input;
         | 
| 249 | 
            +
                new_store->open_lock    = &cmpd_open_lock;
         | 
| 250 | 
            +
                new_store->close_lock   = &cmpd_close_lock;
         | 
| 251 | 
            +
             | 
| 252 | 
            +
                return new_store;
         | 
| 247 253 | 
             
            }
         | 
| 248 254 |  | 
| 249 255 | 
             
            /****************************************************************************
         | 
| @@ -252,123 +258,105 @@ Store *open_cmpd_store(Store *store, const char *name) | |
| 252 258 | 
             
             *
         | 
| 253 259 | 
             
             ****************************************************************************/
         | 
| 254 260 |  | 
| 255 | 
            -
            typedef struct WFileEntry {
         | 
| 256 | 
            -
              char *name;
         | 
| 257 | 
            -
              int dir_offset;
         | 
| 258 | 
            -
              int data_offset;
         | 
| 259 | 
            -
            } WFileEntry;
         | 
| 260 | 
            -
             | 
| 261 | 
            -
            WFileEntry *wfe_create(char *name)
         | 
| 262 | 
            -
            {
         | 
| 263 | 
            -
              WFileEntry *wfe = ALLOC(WFileEntry);
         | 
| 264 | 
            -
              wfe->name = name;
         | 
| 265 | 
            -
              return wfe;
         | 
| 266 | 
            -
            }
         | 
| 267 | 
            -
             | 
| 268 261 | 
             
            CompoundWriter *open_cw(Store *store, char *name)
         | 
| 269 262 | 
             
            {
         | 
| 270 | 
            -
             | 
| 271 | 
            -
             | 
| 272 | 
            -
             | 
| 273 | 
            -
             | 
| 274 | 
            -
             | 
| 275 | 
            -
             | 
| 276 | 
            -
              return cw;
         | 
| 263 | 
            +
                CompoundWriter *cw = ALLOC(CompoundWriter);
         | 
| 264 | 
            +
                cw->store = store;
         | 
| 265 | 
            +
                cw->name = name;
         | 
| 266 | 
            +
                cw->ids = hs_new_str(&free);
         | 
| 267 | 
            +
                cw->file_entries = ary_new_type_capa(CWFileEntry, CW_INIT_CAPA);
         | 
| 268 | 
            +
                return cw;
         | 
| 277 269 | 
             
            }
         | 
| 278 270 |  | 
| 279 271 | 
             
            void cw_add_file(CompoundWriter *cw, char *id)
         | 
| 280 272 | 
             
            {
         | 
| 281 | 
            -
             | 
| 282 | 
            -
             | 
| 283 | 
            -
             | 
| 273 | 
            +
                id = estrdup(id);
         | 
| 274 | 
            +
                if (hs_add(cw->ids, id) != HASH_KEY_DOES_NOT_EXIST) {
         | 
| 275 | 
            +
                    RAISE(IO_ERROR, "Tried to add file \"%s\" which has already been "
         | 
| 276 | 
            +
                          "added to the compound store", id);
         | 
| 277 | 
            +
                }
         | 
| 284 278 |  | 
| 285 | 
            -
             | 
| 286 | 
            -
             | 
| 279 | 
            +
                ary_grow(cw->file_entries);
         | 
| 280 | 
            +
                ary_last(cw->file_entries).name = id;
         | 
| 287 281 | 
             
            }
         | 
| 288 282 |  | 
| 289 | 
            -
            void cw_copy_file(CompoundWriter *cw,  | 
| 283 | 
            +
            static void cw_copy_file(CompoundWriter *cw, CWFileEntry *src, OutStream *os)
         | 
| 290 284 | 
             
            {
         | 
| 291 | 
            -
             | 
| 292 | 
            -
             | 
| 293 | 
            -
             | 
| 294 | 
            -
             | 
| 295 | 
            -
              
         | 
| 296 | 
            -
              InStream *is = cw->store->open_input(cw->store, src->name);
         | 
| 297 | 
            -
              
         | 
| 298 | 
            -
              TRY
         | 
| 299 | 
            -
                remainder = length = is_length(is);
         | 
| 285 | 
            +
                off_t start_ptr = os_pos(os);
         | 
| 286 | 
            +
                off_t end_ptr;
         | 
| 287 | 
            +
                off_t remainder, length, len;
         | 
| 288 | 
            +
                uchar buffer[BUFFER_SIZE];
         | 
| 300 289 |  | 
| 290 | 
            +
                InStream *is = cw->store->open_input(cw->store, src->name);
         | 
| 291 | 
            +
             | 
| 292 | 
            +
                remainder = length = is_length(is);
         | 
| 301 293 |  | 
| 302 294 | 
             
                while (remainder > 0) {
         | 
| 303 | 
            -
             | 
| 304 | 
            -
             | 
| 305 | 
            -
             | 
| 306 | 
            -
             | 
| 295 | 
            +
                    len = MIN(remainder, BUFFER_SIZE);
         | 
| 296 | 
            +
                    is_read_bytes(is, buffer, len);
         | 
| 297 | 
            +
                    os_write_bytes(os, buffer, len);
         | 
| 298 | 
            +
                    remainder -= len;
         | 
| 307 299 | 
             
                }
         | 
| 308 300 |  | 
| 309 | 
            -
                 | 
| 310 | 
            -
                if (remainder != 0)
         | 
| 311 | 
            -
             | 
| 301 | 
            +
                /* Verify that remainder is 0 */
         | 
| 302 | 
            +
                if (remainder != 0) {
         | 
| 303 | 
            +
                    RAISE(IO_ERROR, "There seems to be an error in the compound file "
         | 
| 304 | 
            +
                          "should have read to the end but there are <%"F_OFF_T_PFX"d> "
         | 
| 305 | 
            +
                          "bytes left", remainder);
         | 
| 306 | 
            +
                }
         | 
| 312 307 |  | 
| 313 | 
            -
                 | 
| 308 | 
            +
                /* Verify that the output length diff is equal to original file */
         | 
| 314 309 | 
             
                end_ptr = os_pos(os);
         | 
| 315 310 | 
             
                len = end_ptr - start_ptr;
         | 
| 316 | 
            -
                if (len != length)
         | 
| 317 | 
            -
             | 
| 311 | 
            +
                if (len != length) {
         | 
| 312 | 
            +
                    RAISE(IO_ERROR, "Difference in compound file output file offsets "
         | 
| 313 | 
            +
                          "<%"F_OFF_T_PFX"d> does not match the original file lenght "
         | 
| 314 | 
            +
                          "<%"F_OFF_T_PFX"d>", len, length);
         | 
| 315 | 
            +
                }
         | 
| 318 316 |  | 
| 319 | 
            -
              XFINALLY
         | 
| 320 317 | 
             
                is_close(is);
         | 
| 321 | 
            -
              XENDTRY
         | 
| 322 318 | 
             
            }
         | 
| 323 319 |  | 
| 324 320 | 
             
            void cw_close(CompoundWriter *cw)
         | 
| 325 321 | 
             
            {
         | 
| 326 | 
            -
             | 
| 327 | 
            -
             | 
| 328 | 
            -
              WFileEntry *wfe;
         | 
| 322 | 
            +
                OutStream *os = NULL;
         | 
| 323 | 
            +
                int i;
         | 
| 329 324 |  | 
| 330 | 
            -
             | 
| 331 | 
            -
             | 
| 332 | 
            -
                 | 
| 325 | 
            +
                if (cw->ids->size <= 0) {
         | 
| 326 | 
            +
                    RAISE(STATE_ERROR, "Tried to merge compound file with no entries");
         | 
| 327 | 
            +
                }
         | 
| 333 328 |  | 
| 334 | 
            -
             | 
| 329 | 
            +
                os = cw->store->new_output(cw->store, cw->name);
         | 
| 335 330 |  | 
| 336 | 
            -
             | 
| 337 | 
            -
                os = cw->store->create_output(cw->store, cw->name);
         | 
| 338 | 
            -
                os_write_vint(os, cw->file_entries->size);
         | 
| 331 | 
            +
                os_write_vint(os, ary_size(cw->file_entries));
         | 
| 339 332 |  | 
| 340 333 | 
             
                /* Write the directory with all offsets at 0.
         | 
| 341 334 | 
             
                 * Remember the positions of directory entries so that we can adjust the
         | 
| 342 335 | 
             
                 * offsets later */
         | 
| 343 | 
            -
             | 
| 344 | 
            -
             | 
| 345 | 
            -
             | 
| 346 | 
            -
             | 
| 347 | 
            -
                  os_write_long(os, 0);  // for now
         | 
| 348 | 
            -
                  os_write_string(os, wfe->name);
         | 
| 336 | 
            +
                for (i = 0; i < ary_size(cw->file_entries); i++) {
         | 
| 337 | 
            +
                    cw->file_entries[i].dir_offset = os_pos(os);
         | 
| 338 | 
            +
                    os_write_u64(os, 0);  /* for now */
         | 
| 339 | 
            +
                    os_write_string(os, cw->file_entries[i].name);
         | 
| 349 340 | 
             
                }
         | 
| 350 341 |  | 
| 351 342 | 
             
                /* Open the files and copy their data into the stream.  Remember the
         | 
| 352 343 | 
             
                 * locations of each file's data section. */
         | 
| 353 | 
            -
                for (i = 0; i < cw->file_entries | 
| 354 | 
            -
             | 
| 355 | 
            -
             | 
| 356 | 
            -
                  cw_copy_file(cw, wfe, os);
         | 
| 344 | 
            +
                for (i = 0; i < ary_size(cw->file_entries); i++) {
         | 
| 345 | 
            +
                    cw->file_entries[i].data_offset = os_pos(os);
         | 
| 346 | 
            +
                    cw_copy_file(cw, &cw->file_entries[i], os);
         | 
| 357 347 | 
             
                }
         | 
| 358 348 |  | 
| 359 349 | 
             
                /* Write the data offsets into the directory of the compound stream */
         | 
| 360 | 
            -
                for (i = 0; i < cw->file_entries | 
| 361 | 
            -
             | 
| 362 | 
            -
             | 
| 363 | 
            -
             | 
| 350 | 
            +
                for (i = 0; i < ary_size(cw->file_entries); i++) {
         | 
| 351 | 
            +
                    os_seek(os, cw->file_entries[i].dir_offset);
         | 
| 352 | 
            +
                    os_write_u64(os, cw->file_entries[i].data_offset);
         | 
| 353 | 
            +
                }
         | 
| 354 | 
            +
             | 
| 355 | 
            +
                if (os) {
         | 
| 356 | 
            +
                    os_close(os);
         | 
| 364 357 | 
             
                }
         | 
| 365 358 |  | 
| 366 | 
            -
              XFINALLY
         | 
| 367 | 
            -
                if (os) os_close(os);
         | 
| 368 359 | 
             
                hs_destroy(cw->ids);
         | 
| 369 | 
            -
                 | 
| 360 | 
            +
                ary_free(cw->file_entries);
         | 
| 370 361 | 
             
                free(cw);
         | 
| 371 | 
            -
                break;
         | 
| 372 | 
            -
              XENDTRY
         | 
| 373 362 | 
             
            }
         | 
| 374 | 
            -
             |