ferret 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. data/MIT-LICENSE +20 -0
  2. data/README +109 -0
  3. data/Rakefile +275 -0
  4. data/TODO +9 -0
  5. data/TUTORIAL +197 -0
  6. data/ext/extconf.rb +3 -0
  7. data/ext/ferret.c +23 -0
  8. data/ext/ferret.h +85 -0
  9. data/ext/index_io.c +543 -0
  10. data/ext/priority_queue.c +227 -0
  11. data/ext/ram_directory.c +316 -0
  12. data/ext/segment_merge_queue.c +41 -0
  13. data/ext/string_helper.c +42 -0
  14. data/ext/tags +240 -0
  15. data/ext/term.c +261 -0
  16. data/ext/term_buffer.c +299 -0
  17. data/ext/util.c +12 -0
  18. data/lib/ferret.rb +41 -0
  19. data/lib/ferret/analysis.rb +11 -0
  20. data/lib/ferret/analysis/analyzers.rb +93 -0
  21. data/lib/ferret/analysis/standard_tokenizer.rb +65 -0
  22. data/lib/ferret/analysis/token.rb +79 -0
  23. data/lib/ferret/analysis/token_filters.rb +86 -0
  24. data/lib/ferret/analysis/token_stream.rb +26 -0
  25. data/lib/ferret/analysis/tokenizers.rb +107 -0
  26. data/lib/ferret/analysis/word_list_loader.rb +27 -0
  27. data/lib/ferret/document.rb +2 -0
  28. data/lib/ferret/document/document.rb +152 -0
  29. data/lib/ferret/document/field.rb +304 -0
  30. data/lib/ferret/index.rb +26 -0
  31. data/lib/ferret/index/compound_file_io.rb +343 -0
  32. data/lib/ferret/index/document_writer.rb +288 -0
  33. data/lib/ferret/index/field_infos.rb +259 -0
  34. data/lib/ferret/index/fields_io.rb +175 -0
  35. data/lib/ferret/index/index.rb +228 -0
  36. data/lib/ferret/index/index_file_names.rb +33 -0
  37. data/lib/ferret/index/index_reader.rb +462 -0
  38. data/lib/ferret/index/index_writer.rb +488 -0
  39. data/lib/ferret/index/multi_reader.rb +363 -0
  40. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +105 -0
  41. data/lib/ferret/index/segment_infos.rb +130 -0
  42. data/lib/ferret/index/segment_merge_info.rb +47 -0
  43. data/lib/ferret/index/segment_merge_queue.rb +16 -0
  44. data/lib/ferret/index/segment_merger.rb +337 -0
  45. data/lib/ferret/index/segment_reader.rb +380 -0
  46. data/lib/ferret/index/segment_term_enum.rb +178 -0
  47. data/lib/ferret/index/segment_term_vector.rb +58 -0
  48. data/lib/ferret/index/term.rb +49 -0
  49. data/lib/ferret/index/term_buffer.rb +88 -0
  50. data/lib/ferret/index/term_doc_enum.rb +283 -0
  51. data/lib/ferret/index/term_enum.rb +52 -0
  52. data/lib/ferret/index/term_info.rb +41 -0
  53. data/lib/ferret/index/term_infos_io.rb +312 -0
  54. data/lib/ferret/index/term_vector_offset_info.rb +20 -0
  55. data/lib/ferret/index/term_vectors_io.rb +552 -0
  56. data/lib/ferret/query_parser.rb +274 -0
  57. data/lib/ferret/query_parser/query_parser.tab.rb +819 -0
  58. data/lib/ferret/search.rb +49 -0
  59. data/lib/ferret/search/boolean_clause.rb +100 -0
  60. data/lib/ferret/search/boolean_query.rb +303 -0
  61. data/lib/ferret/search/boolean_scorer.rb +294 -0
  62. data/lib/ferret/search/caching_wrapper_filter.rb +40 -0
  63. data/lib/ferret/search/conjunction_scorer.rb +99 -0
  64. data/lib/ferret/search/disjunction_sum_scorer.rb +203 -0
  65. data/lib/ferret/search/exact_phrase_scorer.rb +32 -0
  66. data/lib/ferret/search/explanation.rb +41 -0
  67. data/lib/ferret/search/field_cache.rb +216 -0
  68. data/lib/ferret/search/field_doc.rb +31 -0
  69. data/lib/ferret/search/field_sorted_hit_queue.rb +184 -0
  70. data/lib/ferret/search/filter.rb +11 -0
  71. data/lib/ferret/search/filtered_query.rb +130 -0
  72. data/lib/ferret/search/filtered_term_enum.rb +79 -0
  73. data/lib/ferret/search/fuzzy_query.rb +153 -0
  74. data/lib/ferret/search/fuzzy_term_enum.rb +244 -0
  75. data/lib/ferret/search/hit_collector.rb +34 -0
  76. data/lib/ferret/search/hit_queue.rb +11 -0
  77. data/lib/ferret/search/index_searcher.rb +173 -0
  78. data/lib/ferret/search/match_all_docs_query.rb +104 -0
  79. data/lib/ferret/search/multi_phrase_query.rb +204 -0
  80. data/lib/ferret/search/multi_term_query.rb +65 -0
  81. data/lib/ferret/search/non_matching_scorer.rb +22 -0
  82. data/lib/ferret/search/phrase_positions.rb +55 -0
  83. data/lib/ferret/search/phrase_query.rb +217 -0
  84. data/lib/ferret/search/phrase_scorer.rb +153 -0
  85. data/lib/ferret/search/prefix_query.rb +47 -0
  86. data/lib/ferret/search/query.rb +111 -0
  87. data/lib/ferret/search/query_filter.rb +51 -0
  88. data/lib/ferret/search/range_filter.rb +103 -0
  89. data/lib/ferret/search/range_query.rb +139 -0
  90. data/lib/ferret/search/req_excl_scorer.rb +125 -0
  91. data/lib/ferret/search/req_opt_sum_scorer.rb +70 -0
  92. data/lib/ferret/search/score_doc.rb +38 -0
  93. data/lib/ferret/search/score_doc_comparator.rb +114 -0
  94. data/lib/ferret/search/scorer.rb +91 -0
  95. data/lib/ferret/search/similarity.rb +278 -0
  96. data/lib/ferret/search/sloppy_phrase_scorer.rb +47 -0
  97. data/lib/ferret/search/sort.rb +105 -0
  98. data/lib/ferret/search/sort_comparator.rb +60 -0
  99. data/lib/ferret/search/sort_field.rb +87 -0
  100. data/lib/ferret/search/spans.rb +12 -0
  101. data/lib/ferret/search/spans/near_spans_enum.rb +304 -0
  102. data/lib/ferret/search/spans/span_first_query.rb +79 -0
  103. data/lib/ferret/search/spans/span_near_query.rb +108 -0
  104. data/lib/ferret/search/spans/span_not_query.rb +130 -0
  105. data/lib/ferret/search/spans/span_or_query.rb +176 -0
  106. data/lib/ferret/search/spans/span_query.rb +25 -0
  107. data/lib/ferret/search/spans/span_scorer.rb +74 -0
  108. data/lib/ferret/search/spans/span_term_query.rb +105 -0
  109. data/lib/ferret/search/spans/span_weight.rb +84 -0
  110. data/lib/ferret/search/spans/spans_enum.rb +44 -0
  111. data/lib/ferret/search/term_query.rb +128 -0
  112. data/lib/ferret/search/term_scorer.rb +181 -0
  113. data/lib/ferret/search/top_docs.rb +24 -0
  114. data/lib/ferret/search/top_field_docs.rb +17 -0
  115. data/lib/ferret/search/weight.rb +54 -0
  116. data/lib/ferret/search/wildcard_query.rb +26 -0
  117. data/lib/ferret/search/wildcard_term_enum.rb +61 -0
  118. data/lib/ferret/stemmers.rb +1 -0
  119. data/lib/ferret/stemmers/porter_stemmer.rb +218 -0
  120. data/lib/ferret/store.rb +5 -0
  121. data/lib/ferret/store/buffered_index_io.rb +191 -0
  122. data/lib/ferret/store/directory.rb +139 -0
  123. data/lib/ferret/store/fs_store.rb +338 -0
  124. data/lib/ferret/store/index_io.rb +259 -0
  125. data/lib/ferret/store/ram_store.rb +282 -0
  126. data/lib/ferret/utils.rb +7 -0
  127. data/lib/ferret/utils/bit_vector.rb +105 -0
  128. data/lib/ferret/utils/date_tools.rb +138 -0
  129. data/lib/ferret/utils/number_tools.rb +91 -0
  130. data/lib/ferret/utils/parameter.rb +41 -0
  131. data/lib/ferret/utils/priority_queue.rb +120 -0
  132. data/lib/ferret/utils/string_helper.rb +47 -0
  133. data/lib/ferret/utils/weak_key_hash.rb +51 -0
  134. data/rake_utils/code_statistics.rb +106 -0
  135. data/setup.rb +1551 -0
  136. data/test/benchmark/tb_ram_store.rb +76 -0
  137. data/test/benchmark/tb_rw_vint.rb +26 -0
  138. data/test/longrunning/tc_numbertools.rb +60 -0
  139. data/test/longrunning/tm_store.rb +19 -0
  140. data/test/test_all.rb +9 -0
  141. data/test/test_helper.rb +6 -0
  142. data/test/unit/analysis/tc_analyzer.rb +21 -0
  143. data/test/unit/analysis/tc_letter_tokenizer.rb +20 -0
  144. data/test/unit/analysis/tc_lower_case_filter.rb +20 -0
  145. data/test/unit/analysis/tc_lower_case_tokenizer.rb +27 -0
  146. data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +39 -0
  147. data/test/unit/analysis/tc_porter_stem_filter.rb +16 -0
  148. data/test/unit/analysis/tc_standard_analyzer.rb +20 -0
  149. data/test/unit/analysis/tc_standard_tokenizer.rb +20 -0
  150. data/test/unit/analysis/tc_stop_analyzer.rb +20 -0
  151. data/test/unit/analysis/tc_stop_filter.rb +14 -0
  152. data/test/unit/analysis/tc_white_space_analyzer.rb +21 -0
  153. data/test/unit/analysis/tc_white_space_tokenizer.rb +20 -0
  154. data/test/unit/analysis/tc_word_list_loader.rb +32 -0
  155. data/test/unit/document/tc_document.rb +47 -0
  156. data/test/unit/document/tc_field.rb +80 -0
  157. data/test/unit/index/tc_compound_file_io.rb +107 -0
  158. data/test/unit/index/tc_field_infos.rb +119 -0
  159. data/test/unit/index/tc_fields_io.rb +167 -0
  160. data/test/unit/index/tc_index.rb +140 -0
  161. data/test/unit/index/tc_index_reader.rb +622 -0
  162. data/test/unit/index/tc_index_writer.rb +57 -0
  163. data/test/unit/index/tc_multiple_term_doc_pos_enum.rb +80 -0
  164. data/test/unit/index/tc_segment_infos.rb +74 -0
  165. data/test/unit/index/tc_segment_term_docs.rb +17 -0
  166. data/test/unit/index/tc_segment_term_enum.rb +60 -0
  167. data/test/unit/index/tc_segment_term_vector.rb +71 -0
  168. data/test/unit/index/tc_term.rb +22 -0
  169. data/test/unit/index/tc_term_buffer.rb +57 -0
  170. data/test/unit/index/tc_term_info.rb +19 -0
  171. data/test/unit/index/tc_term_infos_io.rb +192 -0
  172. data/test/unit/index/tc_term_vector_offset_info.rb +18 -0
  173. data/test/unit/index/tc_term_vectors_io.rb +108 -0
  174. data/test/unit/index/th_doc.rb +244 -0
  175. data/test/unit/query_parser/tc_query_parser.rb +84 -0
  176. data/test/unit/search/tc_filter.rb +113 -0
  177. data/test/unit/search/tc_fuzzy_query.rb +136 -0
  178. data/test/unit/search/tc_index_searcher.rb +188 -0
  179. data/test/unit/search/tc_search_and_sort.rb +98 -0
  180. data/test/unit/search/tc_similarity.rb +37 -0
  181. data/test/unit/search/tc_sort.rb +48 -0
  182. data/test/unit/search/tc_sort_field.rb +27 -0
  183. data/test/unit/search/tc_spans.rb +153 -0
  184. data/test/unit/store/tc_fs_store.rb +84 -0
  185. data/test/unit/store/tc_ram_store.rb +35 -0
  186. data/test/unit/store/tm_store.rb +180 -0
  187. data/test/unit/store/tm_store_lock.rb +68 -0
  188. data/test/unit/ts_analysis.rb +16 -0
  189. data/test/unit/ts_document.rb +4 -0
  190. data/test/unit/ts_index.rb +18 -0
  191. data/test/unit/ts_query_parser.rb +3 -0
  192. data/test/unit/ts_search.rb +10 -0
  193. data/test/unit/ts_store.rb +6 -0
  194. data/test/unit/ts_utils.rb +10 -0
  195. data/test/unit/utils/tc_bit_vector.rb +65 -0
  196. data/test/unit/utils/tc_date_tools.rb +50 -0
  197. data/test/unit/utils/tc_number_tools.rb +59 -0
  198. data/test/unit/utils/tc_parameter.rb +40 -0
  199. data/test/unit/utils/tc_priority_queue.rb +62 -0
  200. data/test/unit/utils/tc_string_helper.rb +21 -0
  201. data/test/unit/utils/tc_weak_key_hash.rb +25 -0
  202. metadata +251 -0
@@ -0,0 +1,259 @@
1
+ module Ferret::Store
2
+ # Ferret's IO Input methods are defined here. The methods read_byte and
3
+ # read_bytes need to be defined before this class is of any use.
4
+ class IndexInput
5
+
6
+ # Reads and returns a single byte.
7
+ def read_byte()
8
+ raise NotImplementedError
9
+ end
10
+
11
+ # Reads a specified number of bytes into an array at the specified offset.
12
+ # buf:: the array to read bytes into
13
+ # offset:: the offset in the array to start storing bytes
14
+ # len:: the number of bytes to read
15
+ def read_bytes(buf, offset, len)
16
+ raise NotImplementedError
17
+ end
18
+
19
+
20
+ # Reads four bytes and returns an int. read_uint should be used for
21
+ # unsigned integers for performance reasons.
22
+ def read_int
23
+ # This may be slow. I'm not sure if this is the best way to get
24
+ # integers from files but this is the only way I could find to get
25
+ # signed integers.
26
+ #i = read_byte
27
+ #return (((i&0x80)==0 ? 0 : -1) << 32) |
28
+ #(i << 24) |
29
+ #((read_byte) << 16) |
30
+ #((read_byte) << 8) |
31
+ #(read_byte)
32
+ i1 = read_byte
33
+ i2 = read_byte
34
+ i3 = read_byte
35
+ i4 = read_byte
36
+ res = (((i1&0x80) == 0 ? 0 : -0x100000000)) +
37
+ ((i1 << 24) + (i2 << 16) + (i3 << 8) + (i4))
38
+ return res
39
+ end
40
+
41
+ # Reads eight bytes and returns a long.
42
+ def read_long
43
+ return (read_int << 32) + (read_int & 0xFFFFFFFF)
44
+ end
45
+
46
+ # Reads four bytes and returns a positive integer
47
+ def read_uint
48
+ return ((read_byte) << 24) | ((read_byte) << 16) |
49
+ ((read_byte) << 8) | (read_byte)
50
+ end
51
+
52
+ # Reads eight bytes and returns a positive integer.
53
+ def read_ulong
54
+ return (read_uint << 32) | (read_uint & 0xFFFFFFFF)
55
+ end
56
+
57
+ # Reads an int stored in variable-length format. Reads between one and
58
+ # five bytes. Smaller values take fewer bytes. Negative numbers are not
59
+ # supported.
60
+ def read_vint
61
+ b = read_byte
62
+ i = b & 0x7F # 0x7F = 0b01111111
63
+ shift = 7
64
+
65
+ while b & 0x80 != 0 # 0x80 = 0b10000000
66
+ b = read_byte
67
+ i |= (b & 0x7F) << shift
68
+ shift += 7
69
+ end
70
+
71
+ return i
72
+ end
73
+ alias :read_vlong :read_vint
74
+
75
+ # Reads a string. A string is stored as a single vint which describes
76
+ # the length of the string, followed by the actually string itself.
77
+ def read_string
78
+ length = read_vint
79
+
80
+ chars = Array.new(length, ' ')
81
+ read_chars(chars, 0, length)
82
+
83
+ chars.to_s
84
+ end
85
+
86
+ # Reads UTF-8 encoded characters into an array.
87
+ # buf:: the array to read characters into
88
+ # start:: the offset in the array to start storing characters
89
+ # length:: the number of characters to read
90
+ #
91
+ # TODO: Test on some actual UTF-8 documents.
92
+ def read_chars(buf, start, length)
93
+ if buf.length < (start + length)
94
+ # make room for the characters to read
95
+ buf << " " * (start + length - buf.length)
96
+ end
97
+ last = start + length
98
+ (start...last).each do |i|
99
+ buf[i] = read_byte.chr
100
+ end
101
+ # last = start + length
102
+ #
103
+ # (start...last).each do |i|
104
+ # b = read_byte
105
+ # if (b & 0x80) == 0
106
+ # buf[i] = (b & 0x7F).chr # don't need to worry about UTF-8 here
107
+ # else
108
+ # if (b & 0xE0) != 0xE0
109
+ # tmp_int = (((b & 0x1F) << 6) | (read_byte & 0x3F))
110
+ # buf[i] = [tmp_int].pack("C") # pack into a UTF-8 string
111
+ # else
112
+ # buf[i] = [
113
+ # ((b & 0x0F) << 12) |
114
+ # ((read_byte & 0x3F) << 6) |
115
+ # (read_byte & 0x3F)
116
+ # ].pack("U") # pack into a UTF-8 string
117
+ # end
118
+ # end
119
+ # end
120
+ end
121
+
122
+ # Closes the stream to futher operations.
123
+ def close
124
+ raise NotImplementedError
125
+ end
126
+
127
+ # Returns the current position in this file, where the next read will
128
+ # occur.
129
+ def pos
130
+ raise NotImplementedError
131
+ end
132
+
133
+ # Sets current position in this file, where the next read will occur.
134
+ def seek(pos)
135
+ raise NotImplementedError
136
+ end
137
+
138
+ # The number of bytes in the file.
139
+ def length
140
+ raise NotImplementedError
141
+ end
142
+
143
+ # Returns a clone of this stream.
144
+ #
145
+ # Clones of a stream access the same data, and are positioned at the same
146
+ # point as the stream they were cloned from.
147
+ #
148
+ # Expert:: Subclasses must ensure that clones may be positioned at
149
+ # different points in the input from each other and from the stream they
150
+ # were cloned from.
151
+ # def clone
152
+ # raise NotImplementedError
153
+ # end
154
+
155
+ end
156
+
157
+ # Ferret's IO Output methods are defined here. The methods write_byte and
158
+ # write_bytes need to be defined before this class is of any use.
159
+ class IndexOutput
160
+
161
+ # Writes a single byte.
162
+ def write_byte(b)
163
+ raise NotImplementedError
164
+ end
165
+
166
+ # Writes an array of bytes.
167
+ # buf:: the bytes to write
168
+ # len:: the number of bytes to write
169
+ def write_bytes(buf, len)
170
+ raise NotImplementedError
171
+ end
172
+
173
+ # Writes an int as four bytes.
174
+ def write_int(i)
175
+ write_byte((i >> 24) & 0xFF)
176
+ write_byte((i >> 16) & 0xFF)
177
+ write_byte((i >> 8) & 0xFF)
178
+ write_byte(i & 0xFF)
179
+ end
180
+ alias :write_uint :write_int
181
+
182
+ # Writes an int in a variable-length format. Writes between one and
183
+ # five bytes. Smaller values take fewer bytes. Negative numbers are not
184
+ # supported.
185
+ def write_vint(i)
186
+ while i > 127
187
+ write_byte((i & 0x7f) | 0x80)
188
+ i >>= 7
189
+ end
190
+ write_byte(i)
191
+ end
192
+ alias :write_vlong :write_vint
193
+
194
+ # Writes a long as eight bytes.
195
+ def write_long(i)
196
+ write_int(i >> 32)
197
+ write_int(i)
198
+ end
199
+ alias :write_ulong :write_long
200
+
201
+ # Writes a string.
202
+ def write_string(s)
203
+ length = s.length()
204
+ write_vint(length)
205
+ write_chars(s, 0, length)
206
+ end
207
+
208
+ # Writes a sequence of UTF-8 encoded characters from a string.
209
+ # buf:: the source of the characters
210
+ # start:: the first character in the sequence
211
+ # length:: the number of characters in the sequence
212
+ def write_chars(buf, start, length)
213
+ last = start + length
214
+ (start ... last).each do |i|
215
+ write_byte(buf[i])
216
+ # code = buf[i]
217
+ # if code >= 0x01 and code <= 0x7F
218
+ # write_byte(code)
219
+ # else
220
+ # # We need to write unicode characters. ToDo: test that this works.
221
+ # if code > 0x80 and code <= 0x7FF or code == 0
222
+ # write_byte(0xC0 | code >> 6)
223
+ # write_byte(0x80 | code & 0x3F)
224
+ # else
225
+ # write_byte(0xE0 | (code >> 12))
226
+ # write_byte(0x80 | ((code >> 6) & 0x3F))
227
+ # write_byte(0x80 | (code & 0x3F))
228
+ # end
229
+ # end
230
+ end
231
+ end
232
+
233
+ # Forces any buffered output to be written.
234
+ def flush
235
+ raise NotImplementedError
236
+ end
237
+
238
+ # Closes this stream to further operations.
239
+ def close
240
+ raise NotImplementedError
241
+ end
242
+
243
+ # Returns the current position in this file, where the next write will
244
+ # occur.
245
+ def pos
246
+ raise NotImplementedError
247
+ end
248
+
249
+ # Sets current position in this file, where the next write will occur.
250
+ def seek(pos)
251
+ raise NotImplementedError
252
+ end
253
+
254
+ # The number of bytes in the file.
255
+ def length
256
+ raise NotImplementedError
257
+ end
258
+ end
259
+ end
@@ -0,0 +1,282 @@
1
+ module Ferret::Store
2
+ require 'monitor'
3
+
4
+ class RAMDirectory < Directory
5
+ include MonitorMixin
6
+
7
+ def initialize(dir = nil, close_dir = false)
8
+ super()
9
+ @files = Hash.new
10
+ unless dir.nil?
11
+ dir.each do |file|
12
+ os = create_output(file) # make a place on ram disk
13
+ is = dir.open_input(file) # read the current file
14
+ len = is.length # and copy the file to ram disk
15
+ buf = Array.new(len)
16
+ is.read_bytes(buf, 0, len)
17
+ os.write_bytes(buf, len)
18
+ is.close()
19
+ os.close()
20
+ end
21
+ dir.close() if close_dir
22
+ end
23
+ end
24
+
25
+ # returns an array of strings, one for each file in the directory
26
+ def each()
27
+ @files.each do |path, file|
28
+ next if file =~ Regexp.new('^rubylock-')
29
+ yield file
30
+ end
31
+ end
32
+
33
+ # Returns true if a file with the given name exists.
34
+ def exists?(name)
35
+ @files.has_key?(name)
36
+ end
37
+
38
+ # Returns the time the named file was last modified.
39
+ def modified(name)
40
+ @files[name].mtime
41
+ end
42
+
43
+ # Set the modified time of an existing file to now.
44
+ def touch(name)
45
+ if @files[name].nil?
46
+ @files[name] = RAMFile.new(name)
47
+ end
48
+ @files[name].mtime = Time.now
49
+ end
50
+
51
+ # Removes an existing file in the directory.
52
+ def delete(name)
53
+ @files.delete(name)
54
+ end
55
+
56
+ # Renames an existing file in the directory.
57
+ # If a file already exists with the new name, then it is replaced.
58
+ # This replacement should be atomic.
59
+ def rename(from, to)
60
+ @files[to] = @files[from]
61
+ @files.delete(from)
62
+ end
63
+
64
+ # Returns the length of a file in the directory.
65
+ def length(name)
66
+ @files[name].length
67
+ end
68
+
69
+ # Creates a new, empty file in the directory with the given name.
70
+ # Returns a stream writing this file.
71
+ def create_output(name)
72
+ file = RAMFile.new(name)
73
+ @files[name] = file
74
+ RAMIndexOutput.new(file)
75
+ end
76
+
77
+ # Returns a stream reading an existing file.
78
+ def open_input(name)
79
+ raise IOError, "No file #{name}" if @files[name].nil?
80
+ RAMIndexInput.new(@files[name])
81
+ end
82
+
83
+ def print_file(name)
84
+ input = RAMIndexInput.new(@files[name])
85
+ buf = " " * input.length
86
+ input.read_internal(buf, 0, input.length)
87
+ puts buf
88
+ end
89
+
90
+ # Construct a Lock.
91
+ def make_lock(name)
92
+ RAMLock.new("rubylock-" + name, self)
93
+ end
94
+
95
+
96
+ # Closes the store.
97
+ def close()
98
+ end
99
+
100
+ def to_s
101
+ str = "The files in this directory are: \n"
102
+ @files.each do |path,file|
103
+ str << path + " - " + file.size.to_s + "\n"
104
+ end
105
+ str
106
+ end
107
+
108
+ class RAMIndexOutput < BufferedIndexOutput
109
+ def initialize(f)
110
+ @file = f
111
+ @pointer = 0
112
+ super()
113
+ end
114
+
115
+ def length
116
+ return @file.length
117
+ end
118
+
119
+ def flush_buffer(src, len)
120
+ buffer_number = (@pointer / BUFFER_SIZE).to_i
121
+ buffer_offset = @pointer % BUFFER_SIZE
122
+ bytes_in_buffer = BUFFER_SIZE - buffer_offset
123
+ bytes_to_copy = [bytes_in_buffer, len].min
124
+
125
+ extend_buffer_if_necessary(buffer_number)
126
+
127
+ buffer = @file.buffers[buffer_number]
128
+ buffer[buffer_offset, bytes_to_copy] = src[0, bytes_to_copy]
129
+
130
+ if bytes_to_copy < len
131
+ src_offset = bytes_to_copy
132
+ bytes_to_copy = len - bytes_to_copy
133
+ buffer_number += 1
134
+ extend_buffer_if_necessary(buffer_number)
135
+ buffer = @file.buffers[buffer_number]
136
+ buffer[0, bytes_to_copy] = src[src_offset, bytes_to_copy]
137
+ end
138
+ @pointer += len
139
+ @file.length = @pointer unless @pointer < @file.length
140
+ @file.mtime = Time.now
141
+ end
142
+
143
+ def reset
144
+ seek(0)
145
+ @file.length = 0
146
+ end
147
+
148
+ def seek(pos)
149
+ super(pos)
150
+ @pointer = pos
151
+ end
152
+
153
+ def close
154
+ super()
155
+ @file.mtime = Time.new
156
+ end
157
+
158
+ def write_to(output)
159
+ flush()
160
+ last_buffer_number = (@file.length / BUFFER_SIZE).to_i
161
+ last_buffer_offset = @file.length % BUFFER_SIZE
162
+ @file.buffers.each_with_index do |buffer, i|
163
+ len = (i == last_buffer_number ? last_buffer_offset : BUFFER_SIZE)
164
+ output.write_bytes(buffer, len)
165
+ end
166
+ end
167
+
168
+ private
169
+
170
+ def extend_buffer_if_necessary(buffer_number)
171
+ if buffer_number == @file.buffers.size
172
+ @file.buffers << RAMFile::BUFFER.clone
173
+ end
174
+ end
175
+
176
+ end
177
+
178
+ class RAMIndexInput < BufferedIndexInput
179
+
180
+ def initialize(f)
181
+ @pointer = 0
182
+ @file = f
183
+ super()
184
+ end
185
+
186
+ def length
187
+ return @file.length
188
+ end
189
+
190
+ def read_internal(b, offset, length)
191
+ remainder = length
192
+ start = @pointer
193
+
194
+ while remainder != 0
195
+ buffer_number = (start / BUFFER_SIZE).to_i
196
+ buffer_offset = start % BUFFER_SIZE
197
+ bytes_in_buffer = BUFFER_SIZE - buffer_offset
198
+
199
+ if bytes_in_buffer >= remainder
200
+ bytes_to_copy = remainder
201
+ else
202
+ bytes_to_copy = bytes_in_buffer
203
+ end
204
+ buffer = @file.buffers[buffer_number]
205
+ bo2 = buffer_offset
206
+ do2 = offset
207
+ b[do2, bytes_to_copy] = buffer[bo2, bytes_to_copy]
208
+ offset += bytes_to_copy
209
+ start += bytes_to_copy
210
+ remainder -= bytes_to_copy
211
+ end
212
+
213
+ @pointer += length
214
+ end
215
+
216
+ def seek_internal(pos)
217
+ @pointer = pos
218
+ end
219
+
220
+ def close
221
+ end
222
+ end
223
+
224
+ # This class contains an array of byte arrays which act as buffers to
225
+ # store the data in.
226
+ class RAMFile
227
+ BUFFER = " " * BUFFER_SIZE
228
+
229
+ attr_reader :buffers
230
+ attr_accessor :mtime
231
+ #attr_accessor :name
232
+ attr_accessor :length
233
+
234
+
235
+ def initialize(name)
236
+ @buffers = Array.new
237
+ @mtime = Time.now
238
+ #@name = name
239
+ @length = 0
240
+ end
241
+ end
242
+
243
+ # A Lock is used to lock a data source (in this case a file) so that
244
+ # not more than one output stream can access a data source at one time.
245
+ class RAMLock < Lock
246
+ # pass the name of the file that we are going to lock
247
+ def initialize(lock_file, dir)
248
+ @lock_file = lock_file
249
+ @dir = dir
250
+ end
251
+
252
+ # obtain the lock on the data source
253
+ def obtain(lock_timeout = 1)
254
+ MAX_ATTEMPTS.times do
255
+ @dir.synchronize do
256
+ # create a file if none exists. If one already exists
257
+ # then someone beat us to the lock so return false
258
+ if (! locked?) then
259
+ @dir.create_output(@lock_file)
260
+ return true
261
+ end
262
+ end
263
+ # lock was not obtained so sleep for timeout then try again.
264
+ sleep(lock_timeout)
265
+ end
266
+ # lock could not be obtained so raise an exception
267
+ raise "could not obtain lock: " + @lock_file.to_s
268
+ end
269
+
270
+ # Release the lock on the data source. Returns true if successful.
271
+ def release
272
+ @dir.delete(@lock_file)
273
+ return true
274
+ end
275
+
276
+ # returns true if there is a lock on the data source
277
+ def locked?
278
+ @dir.exists?(@lock_file)
279
+ end
280
+ end
281
+ end
282
+ end