ferret 0.9.6 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/MIT-LICENSE +1 -1
- data/README +12 -24
- data/Rakefile +38 -54
- data/TODO +14 -17
- data/ext/analysis.c +982 -823
- data/ext/analysis.h +133 -76
- data/ext/array.c +96 -58
- data/ext/array.h +40 -13
- data/ext/bitvector.c +476 -118
- data/ext/bitvector.h +264 -22
- data/ext/compound_io.c +217 -229
- data/ext/defines.h +49 -0
- data/ext/document.c +107 -317
- data/ext/document.h +31 -65
- data/ext/except.c +81 -36
- data/ext/except.h +117 -55
- data/ext/extconf.rb +2 -9
- data/ext/ferret.c +211 -104
- data/ext/ferret.h +22 -11
- data/ext/filter.c +97 -82
- data/ext/fs_store.c +348 -367
- data/ext/global.c +226 -188
- data/ext/global.h +44 -26
- data/ext/hash.c +474 -391
- data/ext/hash.h +441 -68
- data/ext/hashset.c +124 -96
- data/ext/hashset.h +169 -20
- data/ext/helper.c +56 -5
- data/ext/helper.h +7 -0
- data/ext/inc/lang.h +29 -49
- data/ext/inc/threading.h +31 -0
- data/ext/ind.c +288 -278
- data/ext/ind.h +68 -0
- data/ext/index.c +5688 -0
- data/ext/index.h +663 -616
- data/ext/lang.h +29 -49
- data/ext/libstemmer.c +3 -3
- data/ext/mem_pool.c +84 -0
- data/ext/mem_pool.h +35 -0
- data/ext/posh.c +1006 -0
- data/ext/posh.h +1007 -0
- data/ext/priorityqueue.c +117 -194
- data/ext/priorityqueue.h +135 -39
- data/ext/q_boolean.c +1305 -1108
- data/ext/q_const_score.c +106 -93
- data/ext/q_filtered_query.c +138 -135
- data/ext/q_fuzzy.c +206 -242
- data/ext/q_match_all.c +94 -80
- data/ext/q_multi_term.c +663 -0
- data/ext/q_parser.c +667 -593
- data/ext/q_phrase.c +992 -555
- data/ext/q_prefix.c +72 -61
- data/ext/q_range.c +235 -210
- data/ext/q_span.c +1480 -1166
- data/ext/q_term.c +273 -246
- data/ext/q_wildcard.c +127 -114
- data/ext/r_analysis.c +1720 -711
- data/ext/r_index.c +3049 -0
- data/ext/r_qparser.c +433 -146
- data/ext/r_search.c +2934 -1993
- data/ext/r_store.c +372 -143
- data/ext/r_utils.c +941 -0
- data/ext/ram_store.c +330 -326
- data/ext/search.c +1291 -668
- data/ext/search.h +403 -702
- data/ext/similarity.c +91 -113
- data/ext/similarity.h +45 -30
- data/ext/sort.c +721 -484
- data/ext/stopwords.c +361 -273
- data/ext/store.c +556 -58
- data/ext/store.h +706 -126
- data/ext/tags +3578 -2780
- data/ext/term_vectors.c +352 -0
- data/ext/threading.h +31 -0
- data/ext/win32.h +54 -0
- data/lib/ferret.rb +5 -17
- data/lib/ferret/document.rb +130 -2
- data/lib/ferret/index.rb +577 -26
- data/lib/ferret/number_tools.rb +157 -0
- data/lib/ferret_version.rb +3 -0
- data/test/test_helper.rb +5 -13
- data/test/unit/analysis/tc_analyzer.rb +513 -1
- data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
- data/test/unit/index/tc_index.rb +183 -240
- data/test/unit/index/tc_index_reader.rb +312 -479
- data/test/unit/index/tc_index_writer.rb +397 -13
- data/test/unit/index/th_doc.rb +269 -206
- data/test/unit/query_parser/tc_query_parser.rb +40 -33
- data/test/unit/search/tc_filter.rb +59 -71
- data/test/unit/search/tc_fuzzy_query.rb +24 -16
- data/test/unit/search/tc_index_searcher.rb +23 -201
- data/test/unit/search/tc_multi_searcher.rb +78 -226
- data/test/unit/search/tc_search_and_sort.rb +93 -81
- data/test/unit/search/tc_sort.rb +23 -23
- data/test/unit/search/tc_sort_field.rb +7 -7
- data/test/unit/search/tc_spans.rb +51 -47
- data/test/unit/search/tm_searcher.rb +339 -0
- data/test/unit/store/tc_fs_store.rb +1 -1
- data/test/unit/store/tm_store_lock.rb +3 -3
- data/test/unit/tc_document.rb +81 -0
- data/test/unit/ts_analysis.rb +1 -1
- data/test/unit/ts_utils.rb +1 -1
- data/test/unit/utils/tc_bit_vector.rb +288 -0
- data/test/unit/utils/tc_number_tools.rb +117 -0
- data/test/unit/utils/tc_priority_queue.rb +106 -0
- metadata +140 -301
- data/CHANGELOG +0 -9
- data/ext/dummy.exe +0 -0
- data/ext/field.c +0 -408
- data/ext/frtio.h +0 -13
- data/ext/inc/except.h +0 -90
- data/ext/index_io.c +0 -382
- data/ext/index_rw.c +0 -2658
- data/ext/lang.c +0 -41
- data/ext/nix_io.c +0 -134
- data/ext/q_multi_phrase.c +0 -380
- data/ext/r_doc.c +0 -582
- data/ext/r_index_io.c +0 -1021
- data/ext/r_term.c +0 -219
- data/ext/term.c +0 -820
- data/ext/termdocs.c +0 -611
- data/ext/vector.c +0 -637
- data/ext/w32_io.c +0 -150
- data/lib/ferret/analysis.rb +0 -11
- data/lib/ferret/analysis/analyzers.rb +0 -112
- data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
- data/lib/ferret/analysis/token.rb +0 -100
- data/lib/ferret/analysis/token_filters.rb +0 -86
- data/lib/ferret/analysis/token_stream.rb +0 -26
- data/lib/ferret/analysis/tokenizers.rb +0 -112
- data/lib/ferret/analysis/word_list_loader.rb +0 -27
- data/lib/ferret/document/document.rb +0 -152
- data/lib/ferret/document/field.rb +0 -312
- data/lib/ferret/index/compound_file_io.rb +0 -338
- data/lib/ferret/index/document_writer.rb +0 -289
- data/lib/ferret/index/field_infos.rb +0 -279
- data/lib/ferret/index/fields_io.rb +0 -181
- data/lib/ferret/index/index.rb +0 -675
- data/lib/ferret/index/index_file_names.rb +0 -33
- data/lib/ferret/index/index_reader.rb +0 -503
- data/lib/ferret/index/index_writer.rb +0 -534
- data/lib/ferret/index/multi_reader.rb +0 -377
- data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
- data/lib/ferret/index/segment_infos.rb +0 -130
- data/lib/ferret/index/segment_merge_info.rb +0 -49
- data/lib/ferret/index/segment_merge_queue.rb +0 -16
- data/lib/ferret/index/segment_merger.rb +0 -358
- data/lib/ferret/index/segment_reader.rb +0 -412
- data/lib/ferret/index/segment_term_enum.rb +0 -169
- data/lib/ferret/index/segment_term_vector.rb +0 -58
- data/lib/ferret/index/term.rb +0 -53
- data/lib/ferret/index/term_buffer.rb +0 -83
- data/lib/ferret/index/term_doc_enum.rb +0 -291
- data/lib/ferret/index/term_enum.rb +0 -52
- data/lib/ferret/index/term_info.rb +0 -37
- data/lib/ferret/index/term_infos_io.rb +0 -321
- data/lib/ferret/index/term_vector_offset_info.rb +0 -20
- data/lib/ferret/index/term_vectors_io.rb +0 -553
- data/lib/ferret/query_parser.rb +0 -312
- data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
- data/lib/ferret/search.rb +0 -50
- data/lib/ferret/search/boolean_clause.rb +0 -100
- data/lib/ferret/search/boolean_query.rb +0 -299
- data/lib/ferret/search/boolean_scorer.rb +0 -294
- data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
- data/lib/ferret/search/conjunction_scorer.rb +0 -99
- data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
- data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
- data/lib/ferret/search/explanation.rb +0 -41
- data/lib/ferret/search/field_cache.rb +0 -215
- data/lib/ferret/search/field_doc.rb +0 -31
- data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
- data/lib/ferret/search/filter.rb +0 -11
- data/lib/ferret/search/filtered_query.rb +0 -130
- data/lib/ferret/search/filtered_term_enum.rb +0 -79
- data/lib/ferret/search/fuzzy_query.rb +0 -154
- data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
- data/lib/ferret/search/hit_collector.rb +0 -34
- data/lib/ferret/search/hit_queue.rb +0 -11
- data/lib/ferret/search/index_searcher.rb +0 -200
- data/lib/ferret/search/match_all_query.rb +0 -104
- data/lib/ferret/search/multi_phrase_query.rb +0 -216
- data/lib/ferret/search/multi_searcher.rb +0 -261
- data/lib/ferret/search/multi_term_query.rb +0 -65
- data/lib/ferret/search/non_matching_scorer.rb +0 -22
- data/lib/ferret/search/phrase_positions.rb +0 -55
- data/lib/ferret/search/phrase_query.rb +0 -214
- data/lib/ferret/search/phrase_scorer.rb +0 -152
- data/lib/ferret/search/prefix_query.rb +0 -54
- data/lib/ferret/search/query.rb +0 -140
- data/lib/ferret/search/query_filter.rb +0 -51
- data/lib/ferret/search/range_filter.rb +0 -103
- data/lib/ferret/search/range_query.rb +0 -139
- data/lib/ferret/search/req_excl_scorer.rb +0 -125
- data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
- data/lib/ferret/search/score_doc.rb +0 -38
- data/lib/ferret/search/score_doc_comparator.rb +0 -114
- data/lib/ferret/search/scorer.rb +0 -91
- data/lib/ferret/search/similarity.rb +0 -278
- data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
- data/lib/ferret/search/sort.rb +0 -112
- data/lib/ferret/search/sort_comparator.rb +0 -60
- data/lib/ferret/search/sort_field.rb +0 -91
- data/lib/ferret/search/spans.rb +0 -12
- data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
- data/lib/ferret/search/spans/span_first_query.rb +0 -79
- data/lib/ferret/search/spans/span_near_query.rb +0 -108
- data/lib/ferret/search/spans/span_not_query.rb +0 -130
- data/lib/ferret/search/spans/span_or_query.rb +0 -176
- data/lib/ferret/search/spans/span_query.rb +0 -25
- data/lib/ferret/search/spans/span_scorer.rb +0 -74
- data/lib/ferret/search/spans/span_term_query.rb +0 -105
- data/lib/ferret/search/spans/span_weight.rb +0 -84
- data/lib/ferret/search/spans/spans_enum.rb +0 -44
- data/lib/ferret/search/term_query.rb +0 -128
- data/lib/ferret/search/term_scorer.rb +0 -183
- data/lib/ferret/search/top_docs.rb +0 -36
- data/lib/ferret/search/top_field_docs.rb +0 -17
- data/lib/ferret/search/weight.rb +0 -54
- data/lib/ferret/search/wildcard_query.rb +0 -26
- data/lib/ferret/search/wildcard_term_enum.rb +0 -61
- data/lib/ferret/stemmers.rb +0 -1
- data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
- data/lib/ferret/store.rb +0 -5
- data/lib/ferret/store/buffered_index_io.rb +0 -190
- data/lib/ferret/store/directory.rb +0 -141
- data/lib/ferret/store/fs_store.rb +0 -381
- data/lib/ferret/store/index_io.rb +0 -245
- data/lib/ferret/store/ram_store.rb +0 -286
- data/lib/ferret/utils.rb +0 -8
- data/lib/ferret/utils/bit_vector.rb +0 -123
- data/lib/ferret/utils/date_tools.rb +0 -138
- data/lib/ferret/utils/number_tools.rb +0 -91
- data/lib/ferret/utils/parameter.rb +0 -41
- data/lib/ferret/utils/priority_queue.rb +0 -120
- data/lib/ferret/utils/string_helper.rb +0 -47
- data/lib/ferret/utils/thread_local.rb +0 -28
- data/lib/ferret/utils/weak_key_hash.rb +0 -60
- data/lib/rferret.rb +0 -37
- data/rake_utils/code_statistics.rb +0 -106
- data/test/benchmark/tb_ram_store.rb +0 -76
- data/test/benchmark/tb_rw_vint.rb +0 -26
- data/test/functional/thread_safety_index_test.rb +0 -81
- data/test/functional/thread_safety_test.rb +0 -137
- data/test/longrunning/tc_numbertools.rb +0 -60
- data/test/longrunning/tm_store.rb +0 -19
- data/test/unit/analysis/ctc_analyzer.rb +0 -532
- data/test/unit/analysis/data/wordfile +0 -6
- data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
- data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
- data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
- data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
- data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
- data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
- data/test/unit/analysis/rtc_stop_filter.rb +0 -14
- data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
- data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
- data/test/unit/analysis/tc_token.rb +0 -25
- data/test/unit/document/rtc_field.rb +0 -28
- data/test/unit/document/tc_document.rb +0 -47
- data/test/unit/document/tc_field.rb +0 -98
- data/test/unit/index/rtc_compound_file_io.rb +0 -107
- data/test/unit/index/rtc_field_infos.rb +0 -127
- data/test/unit/index/rtc_fields_io.rb +0 -167
- data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
- data/test/unit/index/rtc_segment_infos.rb +0 -74
- data/test/unit/index/rtc_segment_term_docs.rb +0 -17
- data/test/unit/index/rtc_segment_term_enum.rb +0 -60
- data/test/unit/index/rtc_segment_term_vector.rb +0 -71
- data/test/unit/index/rtc_term_buffer.rb +0 -57
- data/test/unit/index/rtc_term_info.rb +0 -19
- data/test/unit/index/rtc_term_infos_io.rb +0 -192
- data/test/unit/index/rtc_term_vectors_io.rb +0 -108
- data/test/unit/index/tc_term.rb +0 -27
- data/test/unit/index/tc_term_voi.rb +0 -18
- data/test/unit/search/rtc_similarity.rb +0 -37
- data/test/unit/search/rtc_sort_field.rb +0 -14
- data/test/unit/search/tc_multi_searcher2.rb +0 -126
- data/test/unit/store/rtc_fs_store.rb +0 -62
- data/test/unit/store/rtc_ram_store.rb +0 -15
- data/test/unit/store/rtm_store.rb +0 -150
- data/test/unit/store/rtm_store_lock.rb +0 -2
- data/test/unit/ts_document.rb +0 -2
- data/test/unit/utils/rtc_bit_vector.rb +0 -73
- data/test/unit/utils/rtc_date_tools.rb +0 -50
- data/test/unit/utils/rtc_number_tools.rb +0 -59
- data/test/unit/utils/rtc_parameter.rb +0 -40
- data/test/unit/utils/rtc_priority_queue.rb +0 -62
- data/test/unit/utils/rtc_string_helper.rb +0 -21
- data/test/unit/utils/rtc_thread.rb +0 -61
- data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
- data/test/utils/number_to_spoken.rb +0 -132
@@ -1,245 +0,0 @@
|
|
1
|
-
module Ferret::Store
|
2
|
-
# Ferret's IO Input methods are defined here. The methods read_byte and
|
3
|
-
# read_bytes need to be defined before this class is of any use.
|
4
|
-
class IndexInput
|
5
|
-
|
6
|
-
# Reads and returns a single byte.
|
7
|
-
def read_byte()
|
8
|
-
raise NotImplementedError
|
9
|
-
end
|
10
|
-
|
11
|
-
# Reads a specified number of bytes into an array at the specified offset.
|
12
|
-
# buf:: the array to read bytes into
|
13
|
-
# offset:: the offset in the array to start storing bytes
|
14
|
-
# len:: the number of bytes to read
|
15
|
-
def read_bytes(buf, offset, len)
|
16
|
-
raise NotImplementedError
|
17
|
-
end
|
18
|
-
|
19
|
-
|
20
|
-
# Reads four bytes and returns an int. read_uint should be used for
|
21
|
-
# unsigned integers for performance reasons.
|
22
|
-
def read_int
|
23
|
-
# This may be slow. I'm not sure if this is the best way to get
|
24
|
-
# integers from files but this is the only way I could find to get
|
25
|
-
# signed integers.
|
26
|
-
#i = read_byte
|
27
|
-
#return (((i&0x80)==0 ? 0 : -1) << 32) |
|
28
|
-
#(i << 24) |
|
29
|
-
#((read_byte) << 16) |
|
30
|
-
#((read_byte) << 8) |
|
31
|
-
#(read_byte)
|
32
|
-
i1 = read_byte
|
33
|
-
i2 = read_byte
|
34
|
-
i3 = read_byte
|
35
|
-
i4 = read_byte
|
36
|
-
res = (((i1&0x80) == 0 ? 0 : -0x100000000)) +
|
37
|
-
((i1 << 24) + (i2 << 16) + (i3 << 8) + (i4))
|
38
|
-
return res
|
39
|
-
end
|
40
|
-
|
41
|
-
# Reads eight bytes and returns a long.
|
42
|
-
def read_long
|
43
|
-
return (read_int << 32) + (read_int & 0xFFFFFFFF)
|
44
|
-
end
|
45
|
-
|
46
|
-
# Reads four bytes and returns a positive integer
|
47
|
-
def read_uint
|
48
|
-
return ((read_byte) << 24) | ((read_byte) << 16) |
|
49
|
-
((read_byte) << 8) | (read_byte)
|
50
|
-
end
|
51
|
-
|
52
|
-
# Reads eight bytes and returns a positive integer.
|
53
|
-
def read_ulong
|
54
|
-
return (read_uint << 32) | (read_uint & 0xFFFFFFFF)
|
55
|
-
end
|
56
|
-
|
57
|
-
# Reads an int stored in variable-length format. Reads between one and
|
58
|
-
# five bytes. Smaller values take fewer bytes. Negative numbers are not
|
59
|
-
# supported.
|
60
|
-
def read_vint
|
61
|
-
b = read_byte
|
62
|
-
i = b & 0x7F # 0x7F = 0b01111111
|
63
|
-
shift = 7
|
64
|
-
|
65
|
-
while b & 0x80 != 0 # 0x80 = 0b10000000
|
66
|
-
b = read_byte
|
67
|
-
i |= (b & 0x7F) << shift
|
68
|
-
shift += 7
|
69
|
-
end
|
70
|
-
|
71
|
-
return i
|
72
|
-
end
|
73
|
-
alias :read_vlong :read_vint
|
74
|
-
|
75
|
-
# Reads a string. A string is stored as a single vint which describes
|
76
|
-
# the length of the string, followed by the actually string itself.
|
77
|
-
def read_string
|
78
|
-
length = read_vint
|
79
|
-
|
80
|
-
chars = Array.new(length, ' ')
|
81
|
-
read_chars(chars, 0, length)
|
82
|
-
|
83
|
-
chars.to_s
|
84
|
-
end
|
85
|
-
|
86
|
-
# Reads UTF-8 encoded characters into an array.
|
87
|
-
# buf:: the array to read characters into
|
88
|
-
# start:: the offset in the array to start storing characters
|
89
|
-
# length:: the number of characters to read
|
90
|
-
#
|
91
|
-
# TODO: Test on some actual UTF-8 documents.
|
92
|
-
def read_chars(buf, start, length)
|
93
|
-
if buf.length < (start + length)
|
94
|
-
# make room for the characters to read
|
95
|
-
buf << " " * (start + length - buf.length)
|
96
|
-
end
|
97
|
-
last = start + length
|
98
|
-
(start...last).each do |i|
|
99
|
-
buf[i] = read_byte.chr
|
100
|
-
end
|
101
|
-
# last = start + length
|
102
|
-
#
|
103
|
-
# (start...last).each do |i|
|
104
|
-
# b = read_byte
|
105
|
-
# if (b & 0x80) == 0
|
106
|
-
# buf[i] = (b & 0x7F).chr # don't need to worry about UTF-8 here
|
107
|
-
# else
|
108
|
-
# if (b & 0xE0) != 0xE0
|
109
|
-
# tmp_int = (((b & 0x1F) << 6) | (read_byte & 0x3F))
|
110
|
-
# buf[i] = [tmp_int].pack("C") # pack into a UTF-8 string
|
111
|
-
# else
|
112
|
-
# buf[i] = [
|
113
|
-
# ((b & 0x0F) << 12) |
|
114
|
-
# ((read_byte & 0x3F) << 6) |
|
115
|
-
# (read_byte & 0x3F)
|
116
|
-
# ].pack("U") # pack into a UTF-8 string
|
117
|
-
# end
|
118
|
-
# end
|
119
|
-
# end
|
120
|
-
end
|
121
|
-
|
122
|
-
# Closes the stream to futher operations.
|
123
|
-
def close
|
124
|
-
raise NotImplementedError
|
125
|
-
end
|
126
|
-
|
127
|
-
# Returns the current position in this file, where the next read will
|
128
|
-
# occur.
|
129
|
-
def pos
|
130
|
-
raise NotImplementedError
|
131
|
-
end
|
132
|
-
|
133
|
-
# Sets current position in this file, where the next read will occur.
|
134
|
-
def seek(pos)
|
135
|
-
raise NotImplementedError
|
136
|
-
end
|
137
|
-
|
138
|
-
# The number of bytes in the file.
|
139
|
-
def length
|
140
|
-
raise NotImplementedError
|
141
|
-
end
|
142
|
-
|
143
|
-
# Returns a clone of this stream.
|
144
|
-
#
|
145
|
-
# Clones of a stream access the same data, and are positioned at the same
|
146
|
-
# point as the stream they were cloned from.
|
147
|
-
#
|
148
|
-
# Expert:: Subclasses must ensure that clones may be positioned at
|
149
|
-
# different points in the input from each other and from the stream they
|
150
|
-
# were cloned from.
|
151
|
-
# def clone
|
152
|
-
# raise NotImplementedError
|
153
|
-
# end
|
154
|
-
|
155
|
-
end
|
156
|
-
|
157
|
-
# Ferret's IO Output methods are defined here. The methods write_byte and
|
158
|
-
# write_bytes need to be defined before this class is of any use.
|
159
|
-
class IndexOutput
|
160
|
-
|
161
|
-
# Writes a single byte.
|
162
|
-
def write_byte(b)
|
163
|
-
raise NotImplementedError
|
164
|
-
end
|
165
|
-
|
166
|
-
# Writes an array of bytes.
|
167
|
-
# buf:: the bytes to write
|
168
|
-
# len:: the number of bytes to write
|
169
|
-
def write_bytes(buf, len)
|
170
|
-
raise NotImplementedError
|
171
|
-
end
|
172
|
-
|
173
|
-
# Writes an int as four bytes.
|
174
|
-
def write_int(i)
|
175
|
-
write_byte((i >> 24) & 0xFF)
|
176
|
-
write_byte((i >> 16) & 0xFF)
|
177
|
-
write_byte((i >> 8) & 0xFF)
|
178
|
-
write_byte(i & 0xFF)
|
179
|
-
end
|
180
|
-
alias :write_uint :write_int
|
181
|
-
|
182
|
-
# Writes an int in a variable-length format. Writes between one and
|
183
|
-
# five bytes. Smaller values take fewer bytes. Negative numbers are not
|
184
|
-
# supported.
|
185
|
-
def write_vint(i)
|
186
|
-
while i > 127
|
187
|
-
write_byte((i & 0x7f) | 0x80)
|
188
|
-
i >>= 7
|
189
|
-
end
|
190
|
-
write_byte(i)
|
191
|
-
end
|
192
|
-
alias :write_vlong :write_vint
|
193
|
-
|
194
|
-
# Writes a long as eight bytes.
|
195
|
-
def write_long(i)
|
196
|
-
write_int(i >> 32)
|
197
|
-
write_int(i)
|
198
|
-
end
|
199
|
-
alias :write_ulong :write_long
|
200
|
-
|
201
|
-
# Writes a string.
|
202
|
-
def write_string(s)
|
203
|
-
length = s.length()
|
204
|
-
write_vint(length)
|
205
|
-
write_chars(s, 0, length)
|
206
|
-
end
|
207
|
-
|
208
|
-
# Writes a sequence of UTF-8 encoded characters from a string.
|
209
|
-
# buf:: the source of the characters
|
210
|
-
# start:: the first character in the sequence
|
211
|
-
# length:: the number of characters in the sequence
|
212
|
-
def write_chars(buf, start, length)
|
213
|
-
last = start + length
|
214
|
-
(start ... last).each do |i|
|
215
|
-
write_byte(buf[i])
|
216
|
-
end
|
217
|
-
end
|
218
|
-
|
219
|
-
# Forces any buffered output to be written.
|
220
|
-
def flush
|
221
|
-
raise NotImplementedError
|
222
|
-
end
|
223
|
-
|
224
|
-
# Closes this stream to further operations.
|
225
|
-
def close
|
226
|
-
raise NotImplementedError
|
227
|
-
end
|
228
|
-
|
229
|
-
# Returns the current position in this file, where the next write will
|
230
|
-
# occur.
|
231
|
-
def pos
|
232
|
-
raise NotImplementedError
|
233
|
-
end
|
234
|
-
|
235
|
-
# Sets current position in this file, where the next write will occur.
|
236
|
-
def seek(pos)
|
237
|
-
raise NotImplementedError
|
238
|
-
end
|
239
|
-
|
240
|
-
# The number of bytes in the file.
|
241
|
-
def length
|
242
|
-
raise NotImplementedError
|
243
|
-
end
|
244
|
-
end
|
245
|
-
end
|
@@ -1,286 +0,0 @@
|
|
1
|
-
module Ferret::Store
|
2
|
-
require 'monitor'
|
3
|
-
|
4
|
-
class RAMDirectory < Directory
|
5
|
-
include MonitorMixin
|
6
|
-
|
7
|
-
def initialize(dir = nil, close_dir = false)
|
8
|
-
super()
|
9
|
-
@files = Hash.new
|
10
|
-
if dir
|
11
|
-
buf = BUFFER.clone
|
12
|
-
dir.each do |file|
|
13
|
-
os = create_output(file) # make a place on ram disk
|
14
|
-
is = dir.open_input(file) # read the current file
|
15
|
-
len = is.length # and copy the file to ram disk
|
16
|
-
if len > buf.size
|
17
|
-
buf << " " * (len - buf.size)
|
18
|
-
end
|
19
|
-
is.read_bytes(buf, 0, len)
|
20
|
-
os.write_bytes(buf, len)
|
21
|
-
is.close()
|
22
|
-
os.close()
|
23
|
-
end
|
24
|
-
dir.close() if close_dir
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
# returns an array of strings, one for each file in the directory
|
29
|
-
def each()
|
30
|
-
@files.each do |path, file|
|
31
|
-
next if file =~ /#{LOCK_PREFIX}/
|
32
|
-
yield file
|
33
|
-
end
|
34
|
-
end
|
35
|
-
|
36
|
-
# Returns true if a file with the given name exists.
|
37
|
-
def exists?(name)
|
38
|
-
@files.has_key?(name)
|
39
|
-
end
|
40
|
-
|
41
|
-
# Returns the time the named file was last modified.
|
42
|
-
def modified(name)
|
43
|
-
@files[name].mtime
|
44
|
-
end
|
45
|
-
|
46
|
-
# Set the modified time of an existing file to now.
|
47
|
-
def touch(name)
|
48
|
-
if @files[name].nil?
|
49
|
-
@files[name] = RAMFile.new(name)
|
50
|
-
end
|
51
|
-
@files[name].mtime = Time.now
|
52
|
-
end
|
53
|
-
|
54
|
-
# Removes an existing file in the directory.
|
55
|
-
def delete(name)
|
56
|
-
@files.delete(name)
|
57
|
-
end
|
58
|
-
|
59
|
-
# Renames an existing file in the directory.
|
60
|
-
# If a file already exists with the new name, then it is replaced.
|
61
|
-
# This replacement should be atomic.
|
62
|
-
def rename(from, to)
|
63
|
-
@files[to] = @files[from]
|
64
|
-
@files.delete(from)
|
65
|
-
end
|
66
|
-
|
67
|
-
# Returns the length of a file in the directory.
|
68
|
-
def length(name)
|
69
|
-
@files[name].length
|
70
|
-
end
|
71
|
-
|
72
|
-
# Creates a new, empty file in the directory with the given name.
|
73
|
-
# Returns a stream writing this file.
|
74
|
-
def create_output(name)
|
75
|
-
file = RAMFile.new(name)
|
76
|
-
@files[name] = file
|
77
|
-
RAMIndexOutput.new(file)
|
78
|
-
end
|
79
|
-
|
80
|
-
# Returns a stream reading an existing file.
|
81
|
-
def open_input(name)
|
82
|
-
raise IOError, "No file #{name}" if @files[name].nil?
|
83
|
-
RAMIndexInput.new(@files[name])
|
84
|
-
end
|
85
|
-
|
86
|
-
def print_file(name)
|
87
|
-
input = RAMIndexInput.new(@files[name])
|
88
|
-
buf = " " * input.length
|
89
|
-
input.read_internal(buf, 0, input.length)
|
90
|
-
puts buf
|
91
|
-
end
|
92
|
-
|
93
|
-
# Construct a Lock.
|
94
|
-
def make_lock(name)
|
95
|
-
RAMLock.new(LOCK_PREFIX + name + ".lck", self)
|
96
|
-
end
|
97
|
-
|
98
|
-
|
99
|
-
# Closes the store.
|
100
|
-
def close()
|
101
|
-
end
|
102
|
-
|
103
|
-
def to_s
|
104
|
-
str = "The files in this directory are: \n"
|
105
|
-
@files.each do |path, file|
|
106
|
-
str << path + " - " + file.size.to_s + "\n"
|
107
|
-
end
|
108
|
-
str
|
109
|
-
end
|
110
|
-
|
111
|
-
class RAMIndexOutput < BufferedIndexOutput
|
112
|
-
def initialize(f)
|
113
|
-
@file = f
|
114
|
-
@pointer = 0
|
115
|
-
super()
|
116
|
-
end
|
117
|
-
|
118
|
-
def length
|
119
|
-
return @file.length
|
120
|
-
end
|
121
|
-
|
122
|
-
def flush_buffer(src, len)
|
123
|
-
buffer_number = (@pointer / BUFFER_SIZE).to_i
|
124
|
-
buffer_offset = @pointer % BUFFER_SIZE
|
125
|
-
bytes_in_buffer = BUFFER_SIZE - buffer_offset
|
126
|
-
bytes_to_copy = [bytes_in_buffer, len].min
|
127
|
-
|
128
|
-
extend_buffer_if_necessary(buffer_number)
|
129
|
-
|
130
|
-
buffer = @file.buffers[buffer_number]
|
131
|
-
buffer[buffer_offset, bytes_to_copy] = src[0, bytes_to_copy]
|
132
|
-
|
133
|
-
if bytes_to_copy < len
|
134
|
-
src_offset = bytes_to_copy
|
135
|
-
bytes_to_copy = len - bytes_to_copy
|
136
|
-
buffer_number += 1
|
137
|
-
extend_buffer_if_necessary(buffer_number)
|
138
|
-
buffer = @file.buffers[buffer_number]
|
139
|
-
buffer[0, bytes_to_copy] = src[src_offset, bytes_to_copy]
|
140
|
-
end
|
141
|
-
@pointer += len
|
142
|
-
@file.length = @pointer unless @pointer < @file.length
|
143
|
-
@file.mtime = Time.now
|
144
|
-
end
|
145
|
-
|
146
|
-
def reset
|
147
|
-
seek(0)
|
148
|
-
@file.length = 0
|
149
|
-
end
|
150
|
-
|
151
|
-
def seek(pos)
|
152
|
-
super(pos)
|
153
|
-
@pointer = pos
|
154
|
-
end
|
155
|
-
|
156
|
-
def close
|
157
|
-
super()
|
158
|
-
@file.mtime = Time.new
|
159
|
-
end
|
160
|
-
|
161
|
-
def write_to(output)
|
162
|
-
flush()
|
163
|
-
last_buffer_number = (@file.length / BUFFER_SIZE).to_i
|
164
|
-
last_buffer_offset = @file.length % BUFFER_SIZE
|
165
|
-
|
166
|
-
(0..last_buffer_number).each do |i|
|
167
|
-
len = (i == last_buffer_number ? last_buffer_offset : BUFFER_SIZE)
|
168
|
-
output.write_bytes(@file.buffers[i], len)
|
169
|
-
end
|
170
|
-
end
|
171
|
-
|
172
|
-
private
|
173
|
-
|
174
|
-
def extend_buffer_if_necessary(buffer_number)
|
175
|
-
if buffer_number == @file.buffers.size
|
176
|
-
@file.buffers << RAMFile::BUFFER.clone
|
177
|
-
end
|
178
|
-
end
|
179
|
-
|
180
|
-
end
|
181
|
-
|
182
|
-
class RAMIndexInput < BufferedIndexInput
|
183
|
-
|
184
|
-
def initialize(f)
|
185
|
-
@pointer = 0
|
186
|
-
@file = f
|
187
|
-
super()
|
188
|
-
end
|
189
|
-
|
190
|
-
def length
|
191
|
-
return @file.length
|
192
|
-
end
|
193
|
-
|
194
|
-
def read_internal(b, offset, length)
|
195
|
-
remainder = length
|
196
|
-
start = @pointer
|
197
|
-
|
198
|
-
while remainder != 0
|
199
|
-
buffer_number = (start / BUFFER_SIZE).to_i
|
200
|
-
buffer_offset = start % BUFFER_SIZE
|
201
|
-
bytes_in_buffer = BUFFER_SIZE - buffer_offset
|
202
|
-
|
203
|
-
if bytes_in_buffer >= remainder
|
204
|
-
bytes_to_copy = remainder
|
205
|
-
else
|
206
|
-
bytes_to_copy = bytes_in_buffer
|
207
|
-
end
|
208
|
-
buffer = @file.buffers[buffer_number]
|
209
|
-
bo2 = buffer_offset
|
210
|
-
do2 = offset
|
211
|
-
b[do2, bytes_to_copy] = buffer[bo2, bytes_to_copy]
|
212
|
-
offset += bytes_to_copy
|
213
|
-
start += bytes_to_copy
|
214
|
-
remainder -= bytes_to_copy
|
215
|
-
end
|
216
|
-
|
217
|
-
@pointer += length
|
218
|
-
end
|
219
|
-
|
220
|
-
def seek_internal(pos)
|
221
|
-
@pointer = pos
|
222
|
-
end
|
223
|
-
|
224
|
-
def close
|
225
|
-
end
|
226
|
-
end
|
227
|
-
|
228
|
-
# This class contains an array of byte arrays which act as buffers to
|
229
|
-
# store the data in.
|
230
|
-
class RAMFile
|
231
|
-
BUFFER = " " * BUFFER_SIZE
|
232
|
-
|
233
|
-
attr_reader :buffers
|
234
|
-
attr_accessor :mtime
|
235
|
-
#attr_accessor :name
|
236
|
-
attr_accessor :length
|
237
|
-
alias :size :length
|
238
|
-
|
239
|
-
|
240
|
-
def initialize(name)
|
241
|
-
@buffers = Array.new
|
242
|
-
@mtime = Time.now
|
243
|
-
@length = 0
|
244
|
-
end
|
245
|
-
end
|
246
|
-
|
247
|
-
# A Lock is used to lock a data source (in this case a file) so that
|
248
|
-
# not more than one output stream can access a data source at one time.
|
249
|
-
class RAMLock < Lock
|
250
|
-
# pass the name of the file that we are going to lock
|
251
|
-
def initialize(lock_file, dir)
|
252
|
-
@lock_file = lock_file
|
253
|
-
@dir = dir
|
254
|
-
end
|
255
|
-
|
256
|
-
# obtain the lock on the data source
|
257
|
-
def obtain(lock_timeout = 1)
|
258
|
-
MAX_ATTEMPTS.times do
|
259
|
-
#@dir.synchronize do
|
260
|
-
# create a file if none exists. If one already exists
|
261
|
-
# then someone beat us to the lock so return false
|
262
|
-
if (! locked?) then
|
263
|
-
@dir.create_output(@lock_file)
|
264
|
-
return true
|
265
|
-
end
|
266
|
-
#end
|
267
|
-
# lock was not obtained so sleep for timeout then try again.
|
268
|
-
sleep(lock_timeout)
|
269
|
-
end
|
270
|
-
# lock could not be obtained so raise an exception
|
271
|
-
raise "could not obtain lock: " + @lock_file.to_s
|
272
|
-
end
|
273
|
-
|
274
|
-
# Release the lock on the data source. Returns true if successful.
|
275
|
-
def release
|
276
|
-
@dir.delete(@lock_file)
|
277
|
-
return true
|
278
|
-
end
|
279
|
-
|
280
|
-
# returns true if there is a lock on the data source
|
281
|
-
def locked?
|
282
|
-
@dir.exists?(@lock_file)
|
283
|
-
end
|
284
|
-
end
|
285
|
-
end
|
286
|
-
end
|