ferret 0.9.6 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/MIT-LICENSE +1 -1
- data/README +12 -24
- data/Rakefile +38 -54
- data/TODO +14 -17
- data/ext/analysis.c +982 -823
- data/ext/analysis.h +133 -76
- data/ext/array.c +96 -58
- data/ext/array.h +40 -13
- data/ext/bitvector.c +476 -118
- data/ext/bitvector.h +264 -22
- data/ext/compound_io.c +217 -229
- data/ext/defines.h +49 -0
- data/ext/document.c +107 -317
- data/ext/document.h +31 -65
- data/ext/except.c +81 -36
- data/ext/except.h +117 -55
- data/ext/extconf.rb +2 -9
- data/ext/ferret.c +211 -104
- data/ext/ferret.h +22 -11
- data/ext/filter.c +97 -82
- data/ext/fs_store.c +348 -367
- data/ext/global.c +226 -188
- data/ext/global.h +44 -26
- data/ext/hash.c +474 -391
- data/ext/hash.h +441 -68
- data/ext/hashset.c +124 -96
- data/ext/hashset.h +169 -20
- data/ext/helper.c +56 -5
- data/ext/helper.h +7 -0
- data/ext/inc/lang.h +29 -49
- data/ext/inc/threading.h +31 -0
- data/ext/ind.c +288 -278
- data/ext/ind.h +68 -0
- data/ext/index.c +5688 -0
- data/ext/index.h +663 -616
- data/ext/lang.h +29 -49
- data/ext/libstemmer.c +3 -3
- data/ext/mem_pool.c +84 -0
- data/ext/mem_pool.h +35 -0
- data/ext/posh.c +1006 -0
- data/ext/posh.h +1007 -0
- data/ext/priorityqueue.c +117 -194
- data/ext/priorityqueue.h +135 -39
- data/ext/q_boolean.c +1305 -1108
- data/ext/q_const_score.c +106 -93
- data/ext/q_filtered_query.c +138 -135
- data/ext/q_fuzzy.c +206 -242
- data/ext/q_match_all.c +94 -80
- data/ext/q_multi_term.c +663 -0
- data/ext/q_parser.c +667 -593
- data/ext/q_phrase.c +992 -555
- data/ext/q_prefix.c +72 -61
- data/ext/q_range.c +235 -210
- data/ext/q_span.c +1480 -1166
- data/ext/q_term.c +273 -246
- data/ext/q_wildcard.c +127 -114
- data/ext/r_analysis.c +1720 -711
- data/ext/r_index.c +3049 -0
- data/ext/r_qparser.c +433 -146
- data/ext/r_search.c +2934 -1993
- data/ext/r_store.c +372 -143
- data/ext/r_utils.c +941 -0
- data/ext/ram_store.c +330 -326
- data/ext/search.c +1291 -668
- data/ext/search.h +403 -702
- data/ext/similarity.c +91 -113
- data/ext/similarity.h +45 -30
- data/ext/sort.c +721 -484
- data/ext/stopwords.c +361 -273
- data/ext/store.c +556 -58
- data/ext/store.h +706 -126
- data/ext/tags +3578 -2780
- data/ext/term_vectors.c +352 -0
- data/ext/threading.h +31 -0
- data/ext/win32.h +54 -0
- data/lib/ferret.rb +5 -17
- data/lib/ferret/document.rb +130 -2
- data/lib/ferret/index.rb +577 -26
- data/lib/ferret/number_tools.rb +157 -0
- data/lib/ferret_version.rb +3 -0
- data/test/test_helper.rb +5 -13
- data/test/unit/analysis/tc_analyzer.rb +513 -1
- data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
- data/test/unit/index/tc_index.rb +183 -240
- data/test/unit/index/tc_index_reader.rb +312 -479
- data/test/unit/index/tc_index_writer.rb +397 -13
- data/test/unit/index/th_doc.rb +269 -206
- data/test/unit/query_parser/tc_query_parser.rb +40 -33
- data/test/unit/search/tc_filter.rb +59 -71
- data/test/unit/search/tc_fuzzy_query.rb +24 -16
- data/test/unit/search/tc_index_searcher.rb +23 -201
- data/test/unit/search/tc_multi_searcher.rb +78 -226
- data/test/unit/search/tc_search_and_sort.rb +93 -81
- data/test/unit/search/tc_sort.rb +23 -23
- data/test/unit/search/tc_sort_field.rb +7 -7
- data/test/unit/search/tc_spans.rb +51 -47
- data/test/unit/search/tm_searcher.rb +339 -0
- data/test/unit/store/tc_fs_store.rb +1 -1
- data/test/unit/store/tm_store_lock.rb +3 -3
- data/test/unit/tc_document.rb +81 -0
- data/test/unit/ts_analysis.rb +1 -1
- data/test/unit/ts_utils.rb +1 -1
- data/test/unit/utils/tc_bit_vector.rb +288 -0
- data/test/unit/utils/tc_number_tools.rb +117 -0
- data/test/unit/utils/tc_priority_queue.rb +106 -0
- metadata +140 -301
- data/CHANGELOG +0 -9
- data/ext/dummy.exe +0 -0
- data/ext/field.c +0 -408
- data/ext/frtio.h +0 -13
- data/ext/inc/except.h +0 -90
- data/ext/index_io.c +0 -382
- data/ext/index_rw.c +0 -2658
- data/ext/lang.c +0 -41
- data/ext/nix_io.c +0 -134
- data/ext/q_multi_phrase.c +0 -380
- data/ext/r_doc.c +0 -582
- data/ext/r_index_io.c +0 -1021
- data/ext/r_term.c +0 -219
- data/ext/term.c +0 -820
- data/ext/termdocs.c +0 -611
- data/ext/vector.c +0 -637
- data/ext/w32_io.c +0 -150
- data/lib/ferret/analysis.rb +0 -11
- data/lib/ferret/analysis/analyzers.rb +0 -112
- data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
- data/lib/ferret/analysis/token.rb +0 -100
- data/lib/ferret/analysis/token_filters.rb +0 -86
- data/lib/ferret/analysis/token_stream.rb +0 -26
- data/lib/ferret/analysis/tokenizers.rb +0 -112
- data/lib/ferret/analysis/word_list_loader.rb +0 -27
- data/lib/ferret/document/document.rb +0 -152
- data/lib/ferret/document/field.rb +0 -312
- data/lib/ferret/index/compound_file_io.rb +0 -338
- data/lib/ferret/index/document_writer.rb +0 -289
- data/lib/ferret/index/field_infos.rb +0 -279
- data/lib/ferret/index/fields_io.rb +0 -181
- data/lib/ferret/index/index.rb +0 -675
- data/lib/ferret/index/index_file_names.rb +0 -33
- data/lib/ferret/index/index_reader.rb +0 -503
- data/lib/ferret/index/index_writer.rb +0 -534
- data/lib/ferret/index/multi_reader.rb +0 -377
- data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
- data/lib/ferret/index/segment_infos.rb +0 -130
- data/lib/ferret/index/segment_merge_info.rb +0 -49
- data/lib/ferret/index/segment_merge_queue.rb +0 -16
- data/lib/ferret/index/segment_merger.rb +0 -358
- data/lib/ferret/index/segment_reader.rb +0 -412
- data/lib/ferret/index/segment_term_enum.rb +0 -169
- data/lib/ferret/index/segment_term_vector.rb +0 -58
- data/lib/ferret/index/term.rb +0 -53
- data/lib/ferret/index/term_buffer.rb +0 -83
- data/lib/ferret/index/term_doc_enum.rb +0 -291
- data/lib/ferret/index/term_enum.rb +0 -52
- data/lib/ferret/index/term_info.rb +0 -37
- data/lib/ferret/index/term_infos_io.rb +0 -321
- data/lib/ferret/index/term_vector_offset_info.rb +0 -20
- data/lib/ferret/index/term_vectors_io.rb +0 -553
- data/lib/ferret/query_parser.rb +0 -312
- data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
- data/lib/ferret/search.rb +0 -50
- data/lib/ferret/search/boolean_clause.rb +0 -100
- data/lib/ferret/search/boolean_query.rb +0 -299
- data/lib/ferret/search/boolean_scorer.rb +0 -294
- data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
- data/lib/ferret/search/conjunction_scorer.rb +0 -99
- data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
- data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
- data/lib/ferret/search/explanation.rb +0 -41
- data/lib/ferret/search/field_cache.rb +0 -215
- data/lib/ferret/search/field_doc.rb +0 -31
- data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
- data/lib/ferret/search/filter.rb +0 -11
- data/lib/ferret/search/filtered_query.rb +0 -130
- data/lib/ferret/search/filtered_term_enum.rb +0 -79
- data/lib/ferret/search/fuzzy_query.rb +0 -154
- data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
- data/lib/ferret/search/hit_collector.rb +0 -34
- data/lib/ferret/search/hit_queue.rb +0 -11
- data/lib/ferret/search/index_searcher.rb +0 -200
- data/lib/ferret/search/match_all_query.rb +0 -104
- data/lib/ferret/search/multi_phrase_query.rb +0 -216
- data/lib/ferret/search/multi_searcher.rb +0 -261
- data/lib/ferret/search/multi_term_query.rb +0 -65
- data/lib/ferret/search/non_matching_scorer.rb +0 -22
- data/lib/ferret/search/phrase_positions.rb +0 -55
- data/lib/ferret/search/phrase_query.rb +0 -214
- data/lib/ferret/search/phrase_scorer.rb +0 -152
- data/lib/ferret/search/prefix_query.rb +0 -54
- data/lib/ferret/search/query.rb +0 -140
- data/lib/ferret/search/query_filter.rb +0 -51
- data/lib/ferret/search/range_filter.rb +0 -103
- data/lib/ferret/search/range_query.rb +0 -139
- data/lib/ferret/search/req_excl_scorer.rb +0 -125
- data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
- data/lib/ferret/search/score_doc.rb +0 -38
- data/lib/ferret/search/score_doc_comparator.rb +0 -114
- data/lib/ferret/search/scorer.rb +0 -91
- data/lib/ferret/search/similarity.rb +0 -278
- data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
- data/lib/ferret/search/sort.rb +0 -112
- data/lib/ferret/search/sort_comparator.rb +0 -60
- data/lib/ferret/search/sort_field.rb +0 -91
- data/lib/ferret/search/spans.rb +0 -12
- data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
- data/lib/ferret/search/spans/span_first_query.rb +0 -79
- data/lib/ferret/search/spans/span_near_query.rb +0 -108
- data/lib/ferret/search/spans/span_not_query.rb +0 -130
- data/lib/ferret/search/spans/span_or_query.rb +0 -176
- data/lib/ferret/search/spans/span_query.rb +0 -25
- data/lib/ferret/search/spans/span_scorer.rb +0 -74
- data/lib/ferret/search/spans/span_term_query.rb +0 -105
- data/lib/ferret/search/spans/span_weight.rb +0 -84
- data/lib/ferret/search/spans/spans_enum.rb +0 -44
- data/lib/ferret/search/term_query.rb +0 -128
- data/lib/ferret/search/term_scorer.rb +0 -183
- data/lib/ferret/search/top_docs.rb +0 -36
- data/lib/ferret/search/top_field_docs.rb +0 -17
- data/lib/ferret/search/weight.rb +0 -54
- data/lib/ferret/search/wildcard_query.rb +0 -26
- data/lib/ferret/search/wildcard_term_enum.rb +0 -61
- data/lib/ferret/stemmers.rb +0 -1
- data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
- data/lib/ferret/store.rb +0 -5
- data/lib/ferret/store/buffered_index_io.rb +0 -190
- data/lib/ferret/store/directory.rb +0 -141
- data/lib/ferret/store/fs_store.rb +0 -381
- data/lib/ferret/store/index_io.rb +0 -245
- data/lib/ferret/store/ram_store.rb +0 -286
- data/lib/ferret/utils.rb +0 -8
- data/lib/ferret/utils/bit_vector.rb +0 -123
- data/lib/ferret/utils/date_tools.rb +0 -138
- data/lib/ferret/utils/number_tools.rb +0 -91
- data/lib/ferret/utils/parameter.rb +0 -41
- data/lib/ferret/utils/priority_queue.rb +0 -120
- data/lib/ferret/utils/string_helper.rb +0 -47
- data/lib/ferret/utils/thread_local.rb +0 -28
- data/lib/ferret/utils/weak_key_hash.rb +0 -60
- data/lib/rferret.rb +0 -37
- data/rake_utils/code_statistics.rb +0 -106
- data/test/benchmark/tb_ram_store.rb +0 -76
- data/test/benchmark/tb_rw_vint.rb +0 -26
- data/test/functional/thread_safety_index_test.rb +0 -81
- data/test/functional/thread_safety_test.rb +0 -137
- data/test/longrunning/tc_numbertools.rb +0 -60
- data/test/longrunning/tm_store.rb +0 -19
- data/test/unit/analysis/ctc_analyzer.rb +0 -532
- data/test/unit/analysis/data/wordfile +0 -6
- data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
- data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
- data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
- data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
- data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
- data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
- data/test/unit/analysis/rtc_stop_filter.rb +0 -14
- data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
- data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
- data/test/unit/analysis/tc_token.rb +0 -25
- data/test/unit/document/rtc_field.rb +0 -28
- data/test/unit/document/tc_document.rb +0 -47
- data/test/unit/document/tc_field.rb +0 -98
- data/test/unit/index/rtc_compound_file_io.rb +0 -107
- data/test/unit/index/rtc_field_infos.rb +0 -127
- data/test/unit/index/rtc_fields_io.rb +0 -167
- data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
- data/test/unit/index/rtc_segment_infos.rb +0 -74
- data/test/unit/index/rtc_segment_term_docs.rb +0 -17
- data/test/unit/index/rtc_segment_term_enum.rb +0 -60
- data/test/unit/index/rtc_segment_term_vector.rb +0 -71
- data/test/unit/index/rtc_term_buffer.rb +0 -57
- data/test/unit/index/rtc_term_info.rb +0 -19
- data/test/unit/index/rtc_term_infos_io.rb +0 -192
- data/test/unit/index/rtc_term_vectors_io.rb +0 -108
- data/test/unit/index/tc_term.rb +0 -27
- data/test/unit/index/tc_term_voi.rb +0 -18
- data/test/unit/search/rtc_similarity.rb +0 -37
- data/test/unit/search/rtc_sort_field.rb +0 -14
- data/test/unit/search/tc_multi_searcher2.rb +0 -126
- data/test/unit/store/rtc_fs_store.rb +0 -62
- data/test/unit/store/rtc_ram_store.rb +0 -15
- data/test/unit/store/rtm_store.rb +0 -150
- data/test/unit/store/rtm_store_lock.rb +0 -2
- data/test/unit/ts_document.rb +0 -2
- data/test/unit/utils/rtc_bit_vector.rb +0 -73
- data/test/unit/utils/rtc_date_tools.rb +0 -50
- data/test/unit/utils/rtc_number_tools.rb +0 -59
- data/test/unit/utils/rtc_parameter.rb +0 -40
- data/test/unit/utils/rtc_priority_queue.rb +0 -62
- data/test/unit/utils/rtc_string_helper.rb +0 -21
- data/test/unit/utils/rtc_thread.rb +0 -61
- data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
- data/test/utils/number_to_spoken.rb +0 -132
data/ext/store.c
CHANGED
@@ -1,86 +1,584 @@
|
|
1
1
|
#include "store.h"
|
2
|
+
#include <string.h>
|
2
3
|
|
3
|
-
|
4
|
+
#define VINT_MAX_LEN 10
|
5
|
+
#define VINT_END BUFFER_SIZE - VINT_MAX_LEN
|
4
6
|
|
5
|
-
|
6
|
-
*
|
7
|
-
* passed to +func+. If you need to pass more than one argument you should use
|
8
|
-
* a struct. When the function is finished, release the lock.
|
9
|
-
*
|
10
|
-
* @param lock lock to be locked while func is called
|
11
|
-
* @param func function to call with the lock locked
|
12
|
-
* @param arg argument to pass to the function
|
13
|
-
* @throws IO_ERROR if the lock is already locked
|
14
|
-
* @see with_lock_name
|
7
|
+
/*
|
8
|
+
* TODO: add try finally
|
15
9
|
*/
|
16
10
|
void with_lock(Lock *lock, void (*func)(void *arg), void *arg)
|
17
11
|
{
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
12
|
+
if (!lock->obtain(lock)) {
|
13
|
+
RAISE(IO_ERROR, "couldn't obtain lock \"%s\"", lock->name);
|
14
|
+
}
|
15
|
+
func(arg);
|
16
|
+
lock->release(lock);
|
23
17
|
}
|
24
18
|
|
25
|
-
|
26
|
-
*
|
27
|
-
* +func+ with the lock locked. The argument +arg+ will be passed to +func+.
|
28
|
-
* If you need to pass more than one argument you should use a struct. When
|
29
|
-
* the function is finished, release and destroy the lock.
|
30
|
-
*
|
31
|
-
* @param store store to open the lock in
|
32
|
-
* @param lock_name name of the lock to open
|
33
|
-
* @param func function to call with the lock locked
|
34
|
-
* @param arg argument to pass to the function
|
35
|
-
* @throws IO_ERROR if the lock is already locked
|
36
|
-
* @see with_lock
|
19
|
+
/*
|
20
|
+
* TODO: add try finally
|
37
21
|
*/
|
38
22
|
void with_lock_name(Store *store, char *lock_name,
|
39
|
-
|
23
|
+
void (*func)(void *arg), void *arg)
|
40
24
|
{
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
25
|
+
Lock *lock = store->open_lock(store, lock_name);
|
26
|
+
if (!lock->obtain(lock)) {
|
27
|
+
RAISE(LOCK_ERROR, "couldn't obtain lock \"%s\"", lock->name);
|
28
|
+
}
|
29
|
+
func(arg);
|
30
|
+
lock->release(lock);
|
31
|
+
store->close_lock(lock);
|
48
32
|
}
|
49
33
|
|
50
|
-
/**
|
51
|
-
* Remove a reference to the store. If the reference count gets to zero free
|
52
|
-
* all resources used by the store.
|
53
|
-
*
|
54
|
-
* @param store the store to be dereferenced
|
55
|
-
*/
|
56
34
|
void store_deref(Store *store)
|
57
35
|
{
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
36
|
+
mutex_lock(&store->mutex_i);
|
37
|
+
if (--store->ref_cnt == 0) {
|
38
|
+
store->close_i(store);
|
39
|
+
}
|
40
|
+
else {
|
41
|
+
mutex_unlock(&store->mutex_i);
|
42
|
+
}
|
64
43
|
}
|
65
44
|
|
66
45
|
/**
|
67
46
|
* Create a store struct initializing the mutex.
|
68
47
|
*/
|
69
|
-
Store *
|
48
|
+
Store *store_new()
|
70
49
|
{
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
50
|
+
Store *store = ALLOC(Store);
|
51
|
+
store->ref_cnt = 1;
|
52
|
+
mutex_init(&store->mutex_i, NULL);
|
53
|
+
mutex_init(&store->mutex, NULL);
|
54
|
+
return store;
|
76
55
|
}
|
77
56
|
|
78
57
|
/**
|
79
|
-
* Destroy the store
|
58
|
+
* Destroy the store freeing allocated resources
|
59
|
+
*
|
60
|
+
* @param store the store struct to free
|
80
61
|
*/
|
81
62
|
void store_destroy(Store *store)
|
82
63
|
{
|
83
|
-
|
84
|
-
|
85
|
-
|
64
|
+
mutex_destroy(&store->mutex_i);
|
65
|
+
mutex_destroy(&store->mutex);
|
66
|
+
free(store);
|
67
|
+
}
|
68
|
+
|
69
|
+
/**
|
70
|
+
* Create a newly allocated and initialized OutStream object
|
71
|
+
*
|
72
|
+
* @return a newly allocated and initialized OutStream object
|
73
|
+
*/
|
74
|
+
OutStream *os_new()
|
75
|
+
{
|
76
|
+
OutStream *os = ALLOC(OutStream);
|
77
|
+
os->buf.start = 0;
|
78
|
+
os->buf.pos = 0;
|
79
|
+
os->buf.len = 0;
|
80
|
+
return os;
|
81
|
+
}
|
82
|
+
|
83
|
+
/**
|
84
|
+
* Flush the countents of the OutStream's buffers
|
85
|
+
*
|
86
|
+
* @param the OutStream to flush
|
87
|
+
*/
|
88
|
+
inline void os_flush(OutStream *os)
|
89
|
+
{
|
90
|
+
os->m->flush_i(os, os->buf.buf, os->buf.pos);
|
91
|
+
os->buf.start += os->buf.pos;
|
92
|
+
os->buf.pos = 0;
|
93
|
+
}
|
94
|
+
|
95
|
+
void os_close(OutStream *os)
|
96
|
+
{
|
97
|
+
os_flush(os);
|
98
|
+
os->m->close_i(os);
|
99
|
+
free(os);
|
100
|
+
}
|
101
|
+
|
102
|
+
off_t os_pos(OutStream *os)
|
103
|
+
{
|
104
|
+
return os->buf.start + os->buf.pos;
|
105
|
+
}
|
106
|
+
|
107
|
+
void os_seek(OutStream *os, off_t new_pos)
|
108
|
+
{
|
109
|
+
os_flush(os);
|
110
|
+
os->buf.start = new_pos;
|
111
|
+
os->m->seek_i(os, new_pos);
|
112
|
+
}
|
113
|
+
|
114
|
+
/**
|
115
|
+
* Unsafe alternative to os_write_byte. Only use this method if you know there
|
116
|
+
* is no chance of buffer overflow.
|
117
|
+
*/
|
118
|
+
#define write_byte(os, b) os->buf.buf[os->buf.pos++] = (uchar)b
|
119
|
+
|
120
|
+
/**
|
121
|
+
* Write a single byte +b+ to the OutStream +os+
|
122
|
+
*
|
123
|
+
* @param os the OutStream to write to
|
124
|
+
* @param b the byte to write
|
125
|
+
* @raise IO_ERROR if there is an IO error writing to the filesystem
|
126
|
+
*/
|
127
|
+
inline void os_write_byte(OutStream *os, uchar b)
|
128
|
+
{
|
129
|
+
if (os->buf.pos >= BUFFER_SIZE) {
|
130
|
+
os_flush(os);
|
131
|
+
}
|
132
|
+
write_byte(os, b);
|
133
|
+
}
|
134
|
+
|
135
|
+
void os_write_bytes(OutStream *os, uchar *buf, int len)
|
136
|
+
{
|
137
|
+
if (os->buf.pos > 0) { /* flush buffer */
|
138
|
+
os_flush(os);
|
139
|
+
}
|
140
|
+
|
141
|
+
if (len < BUFFER_SIZE) {
|
142
|
+
os->m->flush_i(os, buf, len);
|
143
|
+
os->buf.start += len;
|
144
|
+
}
|
145
|
+
else {
|
146
|
+
int pos = 0;
|
147
|
+
int size;
|
148
|
+
while (pos < len) {
|
149
|
+
if (len - pos < BUFFER_SIZE) {
|
150
|
+
size = len - pos;
|
151
|
+
}
|
152
|
+
else {
|
153
|
+
size = BUFFER_SIZE;
|
154
|
+
}
|
155
|
+
os->m->flush_i(os, buf + pos, size);
|
156
|
+
pos += size;
|
157
|
+
os->buf.start += size;
|
158
|
+
}
|
159
|
+
}
|
160
|
+
}
|
161
|
+
|
162
|
+
/**
|
163
|
+
* Create a newly allocated and initialized InStream
|
164
|
+
*
|
165
|
+
* @return a newly allocated and initialized InStream
|
166
|
+
*/
|
167
|
+
InStream *is_new()
|
168
|
+
{
|
169
|
+
InStream *is = ALLOC(InStream);
|
170
|
+
is->buf.start = 0;
|
171
|
+
is->buf.pos = 0;
|
172
|
+
is->buf.len = 0;
|
173
|
+
is->ref_cnt_ptr = ALLOC_AND_ZERO(int);
|
174
|
+
return is;
|
175
|
+
}
|
176
|
+
|
177
|
+
/**
|
178
|
+
* Refill the InStream's buffer from the store source (filesystem or memory).
|
179
|
+
*
|
180
|
+
* @param is the InStream to refill
|
181
|
+
* @raise IO_ERROR if there is a error reading from the filesystem
|
182
|
+
* @raise EOF_ERROR if there is an attempt to read past the end of the file
|
183
|
+
*/
|
184
|
+
void is_refill(InStream *is)
|
185
|
+
{
|
186
|
+
off_t start = is->buf.start + is->buf.pos;
|
187
|
+
off_t last = start + BUFFER_SIZE;
|
188
|
+
off_t flen = is->m->length_i(is);
|
189
|
+
|
190
|
+
if (last > flen) { /* don't read past EOF */
|
191
|
+
last = flen;
|
192
|
+
}
|
193
|
+
|
194
|
+
is->buf.len = last - start;
|
195
|
+
if (is->buf.len <= 0) {
|
196
|
+
RAISE(EOF_ERROR, "current pos = %"F_OFF_T_PFX"d, "
|
197
|
+
"file length = %"F_OFF_T_PFX"d", start, flen);
|
198
|
+
}
|
199
|
+
|
200
|
+
is->m->read_i(is, is->buf.buf, is->buf.len);
|
201
|
+
|
202
|
+
is->buf.start = start;
|
203
|
+
is->buf.pos = 0;
|
204
|
+
}
|
205
|
+
|
206
|
+
/**
|
207
|
+
* Unsafe alternative to is_read_byte. Only use this method when you know
|
208
|
+
* there is no chance that you will read past the end of the InStream's
|
209
|
+
* buffer.
|
210
|
+
*/
|
211
|
+
#define read_byte(is) is->buf.buf[is->buf.pos++]
|
212
|
+
|
213
|
+
/**
|
214
|
+
* Read a singly byte (unsigned char) from the InStream +is+.
|
215
|
+
*
|
216
|
+
* @param is the Instream to read from
|
217
|
+
* @return a single unsigned char read from the InStream +is+
|
218
|
+
* @raise IO_ERROR if there is a error reading from the filesystem
|
219
|
+
* @raise EOF_ERROR if there is an attempt to read past the end of the file
|
220
|
+
*/
|
221
|
+
inline uchar is_read_byte(InStream *is)
|
222
|
+
{
|
223
|
+
if (is->buf.pos >= is->buf.len) {
|
224
|
+
is_refill(is);
|
225
|
+
}
|
226
|
+
|
227
|
+
return read_byte(is);
|
228
|
+
}
|
229
|
+
|
230
|
+
off_t is_pos(InStream *is)
|
231
|
+
{
|
232
|
+
return is->buf.start + is->buf.pos;
|
233
|
+
}
|
234
|
+
|
235
|
+
uchar *is_read_bytes(InStream *is, uchar *buf, int len)
|
236
|
+
{
|
237
|
+
int i;
|
238
|
+
off_t start;
|
239
|
+
|
240
|
+
if ((is->buf.pos + len) < is->buf.len) {
|
241
|
+
for (i = 0; i < len; i++) {
|
242
|
+
buf[i] = read_byte(is);
|
243
|
+
}
|
244
|
+
}
|
245
|
+
else { /* read all-at-once */
|
246
|
+
start = is_pos(is);
|
247
|
+
is->m->seek_i(is, start);
|
248
|
+
is->m->read_i(is, buf, len);
|
249
|
+
|
250
|
+
is->buf.start = start + len; /* adjust stream variables */
|
251
|
+
is->buf.pos = 0;
|
252
|
+
is->buf.len = 0; /* trigger refill on read */
|
253
|
+
}
|
254
|
+
return buf;
|
255
|
+
}
|
256
|
+
|
257
|
+
void is_seek(InStream *is, off_t pos)
|
258
|
+
{
|
259
|
+
if (pos >= is->buf.start && pos < (is->buf.start + is->buf.len)) {
|
260
|
+
is->buf.pos = pos - is->buf.start; /* seek within buffer */
|
261
|
+
}
|
262
|
+
else {
|
263
|
+
is->buf.start = pos;
|
264
|
+
is->buf.pos = 0;
|
265
|
+
is->buf.len = 0; /* trigger refill() on read() */
|
266
|
+
is->m->seek_i(is, pos);
|
267
|
+
}
|
268
|
+
}
|
269
|
+
|
270
|
+
void is_close(InStream *is)
|
271
|
+
{
|
272
|
+
if (--(*(is->ref_cnt_ptr)) < 0) {
|
273
|
+
is->m->close_i(is);
|
274
|
+
free(is->ref_cnt_ptr);
|
275
|
+
}
|
276
|
+
free(is);
|
277
|
+
}
|
278
|
+
|
279
|
+
InStream *is_clone(InStream *is)
|
280
|
+
{
|
281
|
+
InStream *new_index_i = ALLOC(InStream);
|
282
|
+
memcpy(new_index_i, is, sizeof(InStream));
|
283
|
+
(*(new_index_i->ref_cnt_ptr))++;
|
284
|
+
return new_index_i;
|
285
|
+
}
|
286
|
+
|
287
|
+
f_i32 is_read_i32(InStream *is)
|
288
|
+
{
|
289
|
+
return ((f_i32)is_read_byte(is) << 24) |
|
290
|
+
((f_i32)is_read_byte(is) << 16) |
|
291
|
+
((f_i32)is_read_byte(is) << 8) |
|
292
|
+
((f_i32)is_read_byte(is));
|
86
293
|
}
|
294
|
+
|
295
|
+
f_i64 is_read_i64(InStream *is)
|
296
|
+
{
|
297
|
+
return ((f_i64)is_read_byte(is) << 56) |
|
298
|
+
((f_i64)is_read_byte(is) << 48) |
|
299
|
+
((f_i64)is_read_byte(is) << 40) |
|
300
|
+
((f_i64)is_read_byte(is) << 32) |
|
301
|
+
((f_i64)is_read_byte(is) << 24) |
|
302
|
+
((f_i64)is_read_byte(is) << 16) |
|
303
|
+
((f_i64)is_read_byte(is) << 8) |
|
304
|
+
((f_i64)is_read_byte(is));
|
305
|
+
}
|
306
|
+
|
307
|
+
f_u32 is_read_u32(InStream *is)
|
308
|
+
{
|
309
|
+
return ((f_u32)is_read_byte(is) << 24) |
|
310
|
+
((f_u32)is_read_byte(is) << 16) |
|
311
|
+
((f_u32)is_read_byte(is) << 8) |
|
312
|
+
((f_u32)is_read_byte(is));
|
313
|
+
}
|
314
|
+
|
315
|
+
f_u64 is_read_u64(InStream *is)
|
316
|
+
{
|
317
|
+
return ((f_u64)is_read_byte(is) << 56) |
|
318
|
+
((f_u64)is_read_byte(is) << 48) |
|
319
|
+
((f_u64)is_read_byte(is) << 40) |
|
320
|
+
((f_u64)is_read_byte(is) << 32) |
|
321
|
+
((f_u64)is_read_byte(is) << 24) |
|
322
|
+
((f_u64)is_read_byte(is) << 16) |
|
323
|
+
((f_u64)is_read_byte(is) << 8) |
|
324
|
+
((f_u64)is_read_byte(is));
|
325
|
+
}
|
326
|
+
|
327
|
+
/* optimized to use unchecked read_byte if there is definitely space */
|
328
|
+
inline unsigned int is_read_vint(InStream *is)
|
329
|
+
{
|
330
|
+
register unsigned int res, b;
|
331
|
+
register int shift = 7;
|
332
|
+
|
333
|
+
if (is->buf.pos > (is->buf.len - VINT_MAX_LEN)) {
|
334
|
+
b = is_read_byte(is);
|
335
|
+
res = b & 0x7F; /* 0x7F = 0b01111111 */
|
336
|
+
|
337
|
+
while ((b & 0x80) != 0) { /* 0x80 = 0b10000000 */
|
338
|
+
b = is_read_byte(is);
|
339
|
+
res |= (b & 0x7F) << shift;
|
340
|
+
shift += 7;
|
341
|
+
}
|
342
|
+
}
|
343
|
+
else { /* unchecked optimization */
|
344
|
+
b = read_byte(is);
|
345
|
+
res = b & 0x7F; /* 0x7F = 0b01111111 */
|
346
|
+
|
347
|
+
while ((b & 0x80) != 0) { /* 0x80 = 0b10000000 */
|
348
|
+
b = read_byte(is);
|
349
|
+
res |= (b & 0x7F) << shift;
|
350
|
+
shift += 7;
|
351
|
+
}
|
352
|
+
}
|
353
|
+
|
354
|
+
return res;
|
355
|
+
}
|
356
|
+
|
357
|
+
/* optimized to use unchecked read_byte if there is definitely space */
|
358
|
+
inline off_t is_read_voff_t(InStream *is)
|
359
|
+
{
|
360
|
+
register off_t res, b;
|
361
|
+
register int shift = 7;
|
362
|
+
|
363
|
+
if (is->buf.pos > (is->buf.len - VINT_MAX_LEN)) {
|
364
|
+
b = is_read_byte(is);
|
365
|
+
res = b & 0x7F; /* 0x7F = 0b01111111 */
|
366
|
+
|
367
|
+
while ((b & 0x80) != 0) { /* 0x80 = 0b10000000 */
|
368
|
+
b = is_read_byte(is);
|
369
|
+
res |= (b & 0x7F) << shift;
|
370
|
+
shift += 7;
|
371
|
+
}
|
372
|
+
}
|
373
|
+
else { /* unchecked optimization */
|
374
|
+
b = read_byte(is);
|
375
|
+
res = b & 0x7F; /* 0x7F = 0b01111111 */
|
376
|
+
|
377
|
+
while ((b & 0x80) != 0) { /* 0x80 = 0b10000000 */
|
378
|
+
b = read_byte(is);
|
379
|
+
res |= (b & 0x7F) << shift;
|
380
|
+
shift += 7;
|
381
|
+
}
|
382
|
+
}
|
383
|
+
|
384
|
+
return res;
|
385
|
+
}
|
386
|
+
|
387
|
+
inline void is_skip_vints(InStream *is, register int cnt)
|
388
|
+
{
|
389
|
+
for (; cnt > 0; cnt--) {
|
390
|
+
while ((is_read_byte(is) & 0x80) != 0) {
|
391
|
+
}
|
392
|
+
}
|
393
|
+
}
|
394
|
+
|
395
|
+
inline void is_read_chars(InStream *is, char *buffer,
|
396
|
+
int off, int len)
|
397
|
+
{
|
398
|
+
int end, i;
|
399
|
+
|
400
|
+
end = off + len;
|
401
|
+
|
402
|
+
for (i = off; i < end; i++) {
|
403
|
+
buffer[i] = is_read_byte(is);
|
404
|
+
}
|
405
|
+
}
|
406
|
+
|
407
|
+
char *is_read_string(InStream *is)
|
408
|
+
{
|
409
|
+
register int length = (int) is_read_vint(is);
|
410
|
+
char *str = ALLOC_N(char, length + 1);
|
411
|
+
str[length] = '\0';
|
412
|
+
|
413
|
+
if (is->buf.pos > (is->buf.len - length)) {
|
414
|
+
register int i;
|
415
|
+
for (i = 0; i < length; i++) {
|
416
|
+
str[i] = is_read_byte(is);
|
417
|
+
}
|
418
|
+
}
|
419
|
+
else { /* unchecked optimization */
|
420
|
+
memcpy(str, is->buf.buf + is->buf.pos, length);
|
421
|
+
is->buf.pos += length;
|
422
|
+
}
|
423
|
+
|
424
|
+
return str;
|
425
|
+
}
|
426
|
+
|
427
|
+
void os_write_i32(OutStream *os, f_i32 num)
|
428
|
+
{
|
429
|
+
os_write_byte(os, (uchar)((num >> 24) & 0xFF));
|
430
|
+
os_write_byte(os, (uchar)((num >> 16) & 0xFF));
|
431
|
+
os_write_byte(os, (uchar)((num >> 8) & 0xFF));
|
432
|
+
os_write_byte(os, (uchar)(num & 0xFF));
|
433
|
+
}
|
434
|
+
|
435
|
+
void os_write_i64(OutStream *os, f_i64 num)
|
436
|
+
{
|
437
|
+
os_write_byte(os, (uchar)((num >> 56) & 0xFF));
|
438
|
+
os_write_byte(os, (uchar)((num >> 48) & 0xFF));
|
439
|
+
os_write_byte(os, (uchar)((num >> 40) & 0xFF));
|
440
|
+
os_write_byte(os, (uchar)((num >> 32) & 0xFF));
|
441
|
+
os_write_byte(os, (uchar)((num >> 24) & 0xFF));
|
442
|
+
os_write_byte(os, (uchar)((num >> 16) & 0xFF));
|
443
|
+
os_write_byte(os, (uchar)((num >> 8) & 0xFF));
|
444
|
+
os_write_byte(os, (uchar)(num & 0xFF));
|
445
|
+
}
|
446
|
+
|
447
|
+
void os_write_u32(OutStream *os, f_u32 num)
|
448
|
+
{
|
449
|
+
os_write_byte(os, (uchar)((num >> 24) & 0xFF));
|
450
|
+
os_write_byte(os, (uchar)((num >> 16) & 0xFF));
|
451
|
+
os_write_byte(os, (uchar)((num >> 8) & 0xFF));
|
452
|
+
os_write_byte(os, (uchar)(num & 0xFF));
|
453
|
+
}
|
454
|
+
|
455
|
+
void os_write_u64(OutStream *os, f_u64 num)
|
456
|
+
{
|
457
|
+
os_write_byte(os, (uchar)((num >> 56) & 0xFF));
|
458
|
+
os_write_byte(os, (uchar)((num >> 48) & 0xFF));
|
459
|
+
os_write_byte(os, (uchar)((num >> 40) & 0xFF));
|
460
|
+
os_write_byte(os, (uchar)((num >> 32) & 0xFF));
|
461
|
+
os_write_byte(os, (uchar)((num >> 24) & 0xFF));
|
462
|
+
os_write_byte(os, (uchar)((num >> 16) & 0xFF));
|
463
|
+
os_write_byte(os, (uchar)((num >> 8) & 0xFF));
|
464
|
+
os_write_byte(os, (uchar)(num & 0xFF));
|
465
|
+
}
|
466
|
+
|
467
|
+
/* optimized to use an unchecked write if there is space */
|
468
|
+
inline void os_write_vint(OutStream *os, register unsigned int num)
|
469
|
+
{
|
470
|
+
if (os->buf.pos > VINT_END) {
|
471
|
+
while (num > 127) {
|
472
|
+
os_write_byte(os, (uchar)((num & 0x7f) | 0x80));
|
473
|
+
num >>= 7;
|
474
|
+
}
|
475
|
+
os_write_byte(os, (uchar)(num));
|
476
|
+
}
|
477
|
+
else {
|
478
|
+
while (num > 127) {
|
479
|
+
write_byte(os, (uchar)((num & 0x7f) | 0x80));
|
480
|
+
num >>= 7;
|
481
|
+
}
|
482
|
+
write_byte(os, (uchar)(num));
|
483
|
+
}
|
484
|
+
}
|
485
|
+
|
486
|
+
/* optimized to use an unchecked write if there is space */
|
487
|
+
inline void os_write_voff_t(OutStream *os, register off_t num)
|
488
|
+
{
|
489
|
+
if (os->buf.pos > VINT_END) {
|
490
|
+
while (num > 127) {
|
491
|
+
os_write_byte(os, (uchar)((num & 0x7f) | 0x80));
|
492
|
+
num >>= 7;
|
493
|
+
}
|
494
|
+
os_write_byte(os, (uchar)num);
|
495
|
+
}
|
496
|
+
else {
|
497
|
+
while (num > 127) {
|
498
|
+
write_byte(os, (uchar)((num & 0x7f) | 0x80));
|
499
|
+
num >>= 7;
|
500
|
+
}
|
501
|
+
write_byte(os, (uchar)num);
|
502
|
+
}
|
503
|
+
}
|
504
|
+
|
505
|
+
void os_write_string(OutStream *os, char *str)
|
506
|
+
{
|
507
|
+
int len = (int)strlen(str);
|
508
|
+
os_write_vint(os, len);
|
509
|
+
|
510
|
+
os_write_bytes(os, (uchar *)str, len);
|
511
|
+
}
|
512
|
+
|
513
|
+
/**
|
514
|
+
* Determine if the filename is the name of a lock file. Return 1 if it is, 0
|
515
|
+
* otherwise.
|
516
|
+
*
|
517
|
+
* @param filename the name of the file to check
|
518
|
+
* @return 1 (true) if the file is a lock file, 0 (false) otherwise
|
519
|
+
*/
|
520
|
+
int file_is_lock(char *filename)
|
521
|
+
{
|
522
|
+
int start = (int) strlen(filename) - 4;
|
523
|
+
return ((start > 0) && (strcmp(LOCK_EXT, &filename[start]) == 0));
|
524
|
+
}
|
525
|
+
|
526
|
+
void is2os_copy_bytes(InStream *is, OutStream *os, int cnt)
|
527
|
+
{
|
528
|
+
int len;
|
529
|
+
uchar buf[BUFFER_SIZE];
|
530
|
+
|
531
|
+
for (; cnt > 0; cnt -= BUFFER_SIZE) {
|
532
|
+
len = ((cnt > BUFFER_SIZE) ? BUFFER_SIZE : cnt);
|
533
|
+
is_read_bytes(is, buf, len);
|
534
|
+
os_write_bytes(os, buf, len);
|
535
|
+
}
|
536
|
+
}
|
537
|
+
|
538
|
+
void is2os_copy_vints(InStream *is, OutStream *os, int cnt)
|
539
|
+
{
|
540
|
+
uchar b;
|
541
|
+
for (; cnt > 0; cnt--) {
|
542
|
+
while (((b = is_read_byte(is)) & 0x80) != 0) {
|
543
|
+
os_write_byte(os, b);
|
544
|
+
}
|
545
|
+
os_write_byte(os, b);
|
546
|
+
}
|
547
|
+
}
|
548
|
+
|
549
|
+
/**
|
550
|
+
* Test argument used to test the store->each function
|
551
|
+
*/
|
552
|
+
struct FileNameConcatArg
|
553
|
+
{
|
554
|
+
char *p;
|
555
|
+
char *end;
|
556
|
+
};
|
557
|
+
|
558
|
+
/**
|
559
|
+
* Test function used to test store->each function
|
560
|
+
*/
|
561
|
+
static void concat_filenames(char *fname, void *arg)
|
562
|
+
{
|
563
|
+
struct FileNameConcatArg *fnca = (struct FileNameConcatArg *)arg;
|
564
|
+
if (fnca->p + strlen(fname) + 2 < fnca->end) {
|
565
|
+
strcpy(fnca->p, fname);
|
566
|
+
fnca->p += strlen(fname);
|
567
|
+
*(fnca->p++) = ',';
|
568
|
+
*(fnca->p++) = ' ';
|
569
|
+
}
|
570
|
+
}
|
571
|
+
|
572
|
+
char *store_to_s(Store *store, char *buf, int buf_size)
|
573
|
+
{
|
574
|
+
struct FileNameConcatArg fnca;
|
575
|
+
|
576
|
+
fnca.p = buf;
|
577
|
+
fnca.end = buf + buf_size;
|
578
|
+
store->each(store, &concat_filenames, &fnca);
|
579
|
+
if (fnca.p > buf + 2) {
|
580
|
+
fnca.p[-2] = '\0';
|
581
|
+
}
|
582
|
+
return buf;
|
583
|
+
}
|
584
|
+
|