ferret 0.9.6 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (295) hide show
  1. data/MIT-LICENSE +1 -1
  2. data/README +12 -24
  3. data/Rakefile +38 -54
  4. data/TODO +14 -17
  5. data/ext/analysis.c +982 -823
  6. data/ext/analysis.h +133 -76
  7. data/ext/array.c +96 -58
  8. data/ext/array.h +40 -13
  9. data/ext/bitvector.c +476 -118
  10. data/ext/bitvector.h +264 -22
  11. data/ext/compound_io.c +217 -229
  12. data/ext/defines.h +49 -0
  13. data/ext/document.c +107 -317
  14. data/ext/document.h +31 -65
  15. data/ext/except.c +81 -36
  16. data/ext/except.h +117 -55
  17. data/ext/extconf.rb +2 -9
  18. data/ext/ferret.c +211 -104
  19. data/ext/ferret.h +22 -11
  20. data/ext/filter.c +97 -82
  21. data/ext/fs_store.c +348 -367
  22. data/ext/global.c +226 -188
  23. data/ext/global.h +44 -26
  24. data/ext/hash.c +474 -391
  25. data/ext/hash.h +441 -68
  26. data/ext/hashset.c +124 -96
  27. data/ext/hashset.h +169 -20
  28. data/ext/helper.c +56 -5
  29. data/ext/helper.h +7 -0
  30. data/ext/inc/lang.h +29 -49
  31. data/ext/inc/threading.h +31 -0
  32. data/ext/ind.c +288 -278
  33. data/ext/ind.h +68 -0
  34. data/ext/index.c +5688 -0
  35. data/ext/index.h +663 -616
  36. data/ext/lang.h +29 -49
  37. data/ext/libstemmer.c +3 -3
  38. data/ext/mem_pool.c +84 -0
  39. data/ext/mem_pool.h +35 -0
  40. data/ext/posh.c +1006 -0
  41. data/ext/posh.h +1007 -0
  42. data/ext/priorityqueue.c +117 -194
  43. data/ext/priorityqueue.h +135 -39
  44. data/ext/q_boolean.c +1305 -1108
  45. data/ext/q_const_score.c +106 -93
  46. data/ext/q_filtered_query.c +138 -135
  47. data/ext/q_fuzzy.c +206 -242
  48. data/ext/q_match_all.c +94 -80
  49. data/ext/q_multi_term.c +663 -0
  50. data/ext/q_parser.c +667 -593
  51. data/ext/q_phrase.c +992 -555
  52. data/ext/q_prefix.c +72 -61
  53. data/ext/q_range.c +235 -210
  54. data/ext/q_span.c +1480 -1166
  55. data/ext/q_term.c +273 -246
  56. data/ext/q_wildcard.c +127 -114
  57. data/ext/r_analysis.c +1720 -711
  58. data/ext/r_index.c +3049 -0
  59. data/ext/r_qparser.c +433 -146
  60. data/ext/r_search.c +2934 -1993
  61. data/ext/r_store.c +372 -143
  62. data/ext/r_utils.c +941 -0
  63. data/ext/ram_store.c +330 -326
  64. data/ext/search.c +1291 -668
  65. data/ext/search.h +403 -702
  66. data/ext/similarity.c +91 -113
  67. data/ext/similarity.h +45 -30
  68. data/ext/sort.c +721 -484
  69. data/ext/stopwords.c +361 -273
  70. data/ext/store.c +556 -58
  71. data/ext/store.h +706 -126
  72. data/ext/tags +3578 -2780
  73. data/ext/term_vectors.c +352 -0
  74. data/ext/threading.h +31 -0
  75. data/ext/win32.h +54 -0
  76. data/lib/ferret.rb +5 -17
  77. data/lib/ferret/document.rb +130 -2
  78. data/lib/ferret/index.rb +577 -26
  79. data/lib/ferret/number_tools.rb +157 -0
  80. data/lib/ferret_version.rb +3 -0
  81. data/test/test_helper.rb +5 -13
  82. data/test/unit/analysis/tc_analyzer.rb +513 -1
  83. data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
  84. data/test/unit/index/tc_index.rb +183 -240
  85. data/test/unit/index/tc_index_reader.rb +312 -479
  86. data/test/unit/index/tc_index_writer.rb +397 -13
  87. data/test/unit/index/th_doc.rb +269 -206
  88. data/test/unit/query_parser/tc_query_parser.rb +40 -33
  89. data/test/unit/search/tc_filter.rb +59 -71
  90. data/test/unit/search/tc_fuzzy_query.rb +24 -16
  91. data/test/unit/search/tc_index_searcher.rb +23 -201
  92. data/test/unit/search/tc_multi_searcher.rb +78 -226
  93. data/test/unit/search/tc_search_and_sort.rb +93 -81
  94. data/test/unit/search/tc_sort.rb +23 -23
  95. data/test/unit/search/tc_sort_field.rb +7 -7
  96. data/test/unit/search/tc_spans.rb +51 -47
  97. data/test/unit/search/tm_searcher.rb +339 -0
  98. data/test/unit/store/tc_fs_store.rb +1 -1
  99. data/test/unit/store/tm_store_lock.rb +3 -3
  100. data/test/unit/tc_document.rb +81 -0
  101. data/test/unit/ts_analysis.rb +1 -1
  102. data/test/unit/ts_utils.rb +1 -1
  103. data/test/unit/utils/tc_bit_vector.rb +288 -0
  104. data/test/unit/utils/tc_number_tools.rb +117 -0
  105. data/test/unit/utils/tc_priority_queue.rb +106 -0
  106. metadata +140 -301
  107. data/CHANGELOG +0 -9
  108. data/ext/dummy.exe +0 -0
  109. data/ext/field.c +0 -408
  110. data/ext/frtio.h +0 -13
  111. data/ext/inc/except.h +0 -90
  112. data/ext/index_io.c +0 -382
  113. data/ext/index_rw.c +0 -2658
  114. data/ext/lang.c +0 -41
  115. data/ext/nix_io.c +0 -134
  116. data/ext/q_multi_phrase.c +0 -380
  117. data/ext/r_doc.c +0 -582
  118. data/ext/r_index_io.c +0 -1021
  119. data/ext/r_term.c +0 -219
  120. data/ext/term.c +0 -820
  121. data/ext/termdocs.c +0 -611
  122. data/ext/vector.c +0 -637
  123. data/ext/w32_io.c +0 -150
  124. data/lib/ferret/analysis.rb +0 -11
  125. data/lib/ferret/analysis/analyzers.rb +0 -112
  126. data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
  127. data/lib/ferret/analysis/token.rb +0 -100
  128. data/lib/ferret/analysis/token_filters.rb +0 -86
  129. data/lib/ferret/analysis/token_stream.rb +0 -26
  130. data/lib/ferret/analysis/tokenizers.rb +0 -112
  131. data/lib/ferret/analysis/word_list_loader.rb +0 -27
  132. data/lib/ferret/document/document.rb +0 -152
  133. data/lib/ferret/document/field.rb +0 -312
  134. data/lib/ferret/index/compound_file_io.rb +0 -338
  135. data/lib/ferret/index/document_writer.rb +0 -289
  136. data/lib/ferret/index/field_infos.rb +0 -279
  137. data/lib/ferret/index/fields_io.rb +0 -181
  138. data/lib/ferret/index/index.rb +0 -675
  139. data/lib/ferret/index/index_file_names.rb +0 -33
  140. data/lib/ferret/index/index_reader.rb +0 -503
  141. data/lib/ferret/index/index_writer.rb +0 -534
  142. data/lib/ferret/index/multi_reader.rb +0 -377
  143. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
  144. data/lib/ferret/index/segment_infos.rb +0 -130
  145. data/lib/ferret/index/segment_merge_info.rb +0 -49
  146. data/lib/ferret/index/segment_merge_queue.rb +0 -16
  147. data/lib/ferret/index/segment_merger.rb +0 -358
  148. data/lib/ferret/index/segment_reader.rb +0 -412
  149. data/lib/ferret/index/segment_term_enum.rb +0 -169
  150. data/lib/ferret/index/segment_term_vector.rb +0 -58
  151. data/lib/ferret/index/term.rb +0 -53
  152. data/lib/ferret/index/term_buffer.rb +0 -83
  153. data/lib/ferret/index/term_doc_enum.rb +0 -291
  154. data/lib/ferret/index/term_enum.rb +0 -52
  155. data/lib/ferret/index/term_info.rb +0 -37
  156. data/lib/ferret/index/term_infos_io.rb +0 -321
  157. data/lib/ferret/index/term_vector_offset_info.rb +0 -20
  158. data/lib/ferret/index/term_vectors_io.rb +0 -553
  159. data/lib/ferret/query_parser.rb +0 -312
  160. data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
  161. data/lib/ferret/search.rb +0 -50
  162. data/lib/ferret/search/boolean_clause.rb +0 -100
  163. data/lib/ferret/search/boolean_query.rb +0 -299
  164. data/lib/ferret/search/boolean_scorer.rb +0 -294
  165. data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
  166. data/lib/ferret/search/conjunction_scorer.rb +0 -99
  167. data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
  168. data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
  169. data/lib/ferret/search/explanation.rb +0 -41
  170. data/lib/ferret/search/field_cache.rb +0 -215
  171. data/lib/ferret/search/field_doc.rb +0 -31
  172. data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
  173. data/lib/ferret/search/filter.rb +0 -11
  174. data/lib/ferret/search/filtered_query.rb +0 -130
  175. data/lib/ferret/search/filtered_term_enum.rb +0 -79
  176. data/lib/ferret/search/fuzzy_query.rb +0 -154
  177. data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
  178. data/lib/ferret/search/hit_collector.rb +0 -34
  179. data/lib/ferret/search/hit_queue.rb +0 -11
  180. data/lib/ferret/search/index_searcher.rb +0 -200
  181. data/lib/ferret/search/match_all_query.rb +0 -104
  182. data/lib/ferret/search/multi_phrase_query.rb +0 -216
  183. data/lib/ferret/search/multi_searcher.rb +0 -261
  184. data/lib/ferret/search/multi_term_query.rb +0 -65
  185. data/lib/ferret/search/non_matching_scorer.rb +0 -22
  186. data/lib/ferret/search/phrase_positions.rb +0 -55
  187. data/lib/ferret/search/phrase_query.rb +0 -214
  188. data/lib/ferret/search/phrase_scorer.rb +0 -152
  189. data/lib/ferret/search/prefix_query.rb +0 -54
  190. data/lib/ferret/search/query.rb +0 -140
  191. data/lib/ferret/search/query_filter.rb +0 -51
  192. data/lib/ferret/search/range_filter.rb +0 -103
  193. data/lib/ferret/search/range_query.rb +0 -139
  194. data/lib/ferret/search/req_excl_scorer.rb +0 -125
  195. data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
  196. data/lib/ferret/search/score_doc.rb +0 -38
  197. data/lib/ferret/search/score_doc_comparator.rb +0 -114
  198. data/lib/ferret/search/scorer.rb +0 -91
  199. data/lib/ferret/search/similarity.rb +0 -278
  200. data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
  201. data/lib/ferret/search/sort.rb +0 -112
  202. data/lib/ferret/search/sort_comparator.rb +0 -60
  203. data/lib/ferret/search/sort_field.rb +0 -91
  204. data/lib/ferret/search/spans.rb +0 -12
  205. data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
  206. data/lib/ferret/search/spans/span_first_query.rb +0 -79
  207. data/lib/ferret/search/spans/span_near_query.rb +0 -108
  208. data/lib/ferret/search/spans/span_not_query.rb +0 -130
  209. data/lib/ferret/search/spans/span_or_query.rb +0 -176
  210. data/lib/ferret/search/spans/span_query.rb +0 -25
  211. data/lib/ferret/search/spans/span_scorer.rb +0 -74
  212. data/lib/ferret/search/spans/span_term_query.rb +0 -105
  213. data/lib/ferret/search/spans/span_weight.rb +0 -84
  214. data/lib/ferret/search/spans/spans_enum.rb +0 -44
  215. data/lib/ferret/search/term_query.rb +0 -128
  216. data/lib/ferret/search/term_scorer.rb +0 -183
  217. data/lib/ferret/search/top_docs.rb +0 -36
  218. data/lib/ferret/search/top_field_docs.rb +0 -17
  219. data/lib/ferret/search/weight.rb +0 -54
  220. data/lib/ferret/search/wildcard_query.rb +0 -26
  221. data/lib/ferret/search/wildcard_term_enum.rb +0 -61
  222. data/lib/ferret/stemmers.rb +0 -1
  223. data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
  224. data/lib/ferret/store.rb +0 -5
  225. data/lib/ferret/store/buffered_index_io.rb +0 -190
  226. data/lib/ferret/store/directory.rb +0 -141
  227. data/lib/ferret/store/fs_store.rb +0 -381
  228. data/lib/ferret/store/index_io.rb +0 -245
  229. data/lib/ferret/store/ram_store.rb +0 -286
  230. data/lib/ferret/utils.rb +0 -8
  231. data/lib/ferret/utils/bit_vector.rb +0 -123
  232. data/lib/ferret/utils/date_tools.rb +0 -138
  233. data/lib/ferret/utils/number_tools.rb +0 -91
  234. data/lib/ferret/utils/parameter.rb +0 -41
  235. data/lib/ferret/utils/priority_queue.rb +0 -120
  236. data/lib/ferret/utils/string_helper.rb +0 -47
  237. data/lib/ferret/utils/thread_local.rb +0 -28
  238. data/lib/ferret/utils/weak_key_hash.rb +0 -60
  239. data/lib/rferret.rb +0 -37
  240. data/rake_utils/code_statistics.rb +0 -106
  241. data/test/benchmark/tb_ram_store.rb +0 -76
  242. data/test/benchmark/tb_rw_vint.rb +0 -26
  243. data/test/functional/thread_safety_index_test.rb +0 -81
  244. data/test/functional/thread_safety_test.rb +0 -137
  245. data/test/longrunning/tc_numbertools.rb +0 -60
  246. data/test/longrunning/tm_store.rb +0 -19
  247. data/test/unit/analysis/ctc_analyzer.rb +0 -532
  248. data/test/unit/analysis/data/wordfile +0 -6
  249. data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
  250. data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
  251. data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
  252. data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
  253. data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
  254. data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
  255. data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
  256. data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
  257. data/test/unit/analysis/rtc_stop_filter.rb +0 -14
  258. data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
  259. data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
  260. data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
  261. data/test/unit/analysis/tc_token.rb +0 -25
  262. data/test/unit/document/rtc_field.rb +0 -28
  263. data/test/unit/document/tc_document.rb +0 -47
  264. data/test/unit/document/tc_field.rb +0 -98
  265. data/test/unit/index/rtc_compound_file_io.rb +0 -107
  266. data/test/unit/index/rtc_field_infos.rb +0 -127
  267. data/test/unit/index/rtc_fields_io.rb +0 -167
  268. data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
  269. data/test/unit/index/rtc_segment_infos.rb +0 -74
  270. data/test/unit/index/rtc_segment_term_docs.rb +0 -17
  271. data/test/unit/index/rtc_segment_term_enum.rb +0 -60
  272. data/test/unit/index/rtc_segment_term_vector.rb +0 -71
  273. data/test/unit/index/rtc_term_buffer.rb +0 -57
  274. data/test/unit/index/rtc_term_info.rb +0 -19
  275. data/test/unit/index/rtc_term_infos_io.rb +0 -192
  276. data/test/unit/index/rtc_term_vectors_io.rb +0 -108
  277. data/test/unit/index/tc_term.rb +0 -27
  278. data/test/unit/index/tc_term_voi.rb +0 -18
  279. data/test/unit/search/rtc_similarity.rb +0 -37
  280. data/test/unit/search/rtc_sort_field.rb +0 -14
  281. data/test/unit/search/tc_multi_searcher2.rb +0 -126
  282. data/test/unit/store/rtc_fs_store.rb +0 -62
  283. data/test/unit/store/rtc_ram_store.rb +0 -15
  284. data/test/unit/store/rtm_store.rb +0 -150
  285. data/test/unit/store/rtm_store_lock.rb +0 -2
  286. data/test/unit/ts_document.rb +0 -2
  287. data/test/unit/utils/rtc_bit_vector.rb +0 -73
  288. data/test/unit/utils/rtc_date_tools.rb +0 -50
  289. data/test/unit/utils/rtc_number_tools.rb +0 -59
  290. data/test/unit/utils/rtc_parameter.rb +0 -40
  291. data/test/unit/utils/rtc_priority_queue.rb +0 -62
  292. data/test/unit/utils/rtc_string_helper.rb +0 -21
  293. data/test/unit/utils/rtc_thread.rb +0 -61
  294. data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
  295. data/test/utils/number_to_spoken.rb +0 -132
@@ -1,245 +0,0 @@
1
- module Ferret::Store
2
- # Ferret's IO Input methods are defined here. The methods read_byte and
3
- # read_bytes need to be defined before this class is of any use.
4
- class IndexInput
5
-
6
- # Reads and returns a single byte.
7
- def read_byte()
8
- raise NotImplementedError
9
- end
10
-
11
- # Reads a specified number of bytes into an array at the specified offset.
12
- # buf:: the array to read bytes into
13
- # offset:: the offset in the array to start storing bytes
14
- # len:: the number of bytes to read
15
- def read_bytes(buf, offset, len)
16
- raise NotImplementedError
17
- end
18
-
19
-
20
- # Reads four bytes and returns an int. read_uint should be used for
21
- # unsigned integers for performance reasons.
22
- def read_int
23
- # This may be slow. I'm not sure if this is the best way to get
24
- # integers from files but this is the only way I could find to get
25
- # signed integers.
26
- #i = read_byte
27
- #return (((i&0x80)==0 ? 0 : -1) << 32) |
28
- #(i << 24) |
29
- #((read_byte) << 16) |
30
- #((read_byte) << 8) |
31
- #(read_byte)
32
- i1 = read_byte
33
- i2 = read_byte
34
- i3 = read_byte
35
- i4 = read_byte
36
- res = (((i1&0x80) == 0 ? 0 : -0x100000000)) +
37
- ((i1 << 24) + (i2 << 16) + (i3 << 8) + (i4))
38
- return res
39
- end
40
-
41
- # Reads eight bytes and returns a long.
42
- def read_long
43
- return (read_int << 32) + (read_int & 0xFFFFFFFF)
44
- end
45
-
46
- # Reads four bytes and returns a positive integer
47
- def read_uint
48
- return ((read_byte) << 24) | ((read_byte) << 16) |
49
- ((read_byte) << 8) | (read_byte)
50
- end
51
-
52
- # Reads eight bytes and returns a positive integer.
53
- def read_ulong
54
- return (read_uint << 32) | (read_uint & 0xFFFFFFFF)
55
- end
56
-
57
- # Reads an int stored in variable-length format. Reads between one and
58
- # five bytes. Smaller values take fewer bytes. Negative numbers are not
59
- # supported.
60
- def read_vint
61
- b = read_byte
62
- i = b & 0x7F # 0x7F = 0b01111111
63
- shift = 7
64
-
65
- while b & 0x80 != 0 # 0x80 = 0b10000000
66
- b = read_byte
67
- i |= (b & 0x7F) << shift
68
- shift += 7
69
- end
70
-
71
- return i
72
- end
73
- alias :read_vlong :read_vint
74
-
75
- # Reads a string. A string is stored as a single vint which describes
76
- # the length of the string, followed by the actually string itself.
77
- def read_string
78
- length = read_vint
79
-
80
- chars = Array.new(length, ' ')
81
- read_chars(chars, 0, length)
82
-
83
- chars.to_s
84
- end
85
-
86
- # Reads UTF-8 encoded characters into an array.
87
- # buf:: the array to read characters into
88
- # start:: the offset in the array to start storing characters
89
- # length:: the number of characters to read
90
- #
91
- # TODO: Test on some actual UTF-8 documents.
92
- def read_chars(buf, start, length)
93
- if buf.length < (start + length)
94
- # make room for the characters to read
95
- buf << " " * (start + length - buf.length)
96
- end
97
- last = start + length
98
- (start...last).each do |i|
99
- buf[i] = read_byte.chr
100
- end
101
- # last = start + length
102
- #
103
- # (start...last).each do |i|
104
- # b = read_byte
105
- # if (b & 0x80) == 0
106
- # buf[i] = (b & 0x7F).chr # don't need to worry about UTF-8 here
107
- # else
108
- # if (b & 0xE0) != 0xE0
109
- # tmp_int = (((b & 0x1F) << 6) | (read_byte & 0x3F))
110
- # buf[i] = [tmp_int].pack("C") # pack into a UTF-8 string
111
- # else
112
- # buf[i] = [
113
- # ((b & 0x0F) << 12) |
114
- # ((read_byte & 0x3F) << 6) |
115
- # (read_byte & 0x3F)
116
- # ].pack("U") # pack into a UTF-8 string
117
- # end
118
- # end
119
- # end
120
- end
121
-
122
- # Closes the stream to futher operations.
123
- def close
124
- raise NotImplementedError
125
- end
126
-
127
- # Returns the current position in this file, where the next read will
128
- # occur.
129
- def pos
130
- raise NotImplementedError
131
- end
132
-
133
- # Sets current position in this file, where the next read will occur.
134
- def seek(pos)
135
- raise NotImplementedError
136
- end
137
-
138
- # The number of bytes in the file.
139
- def length
140
- raise NotImplementedError
141
- end
142
-
143
- # Returns a clone of this stream.
144
- #
145
- # Clones of a stream access the same data, and are positioned at the same
146
- # point as the stream they were cloned from.
147
- #
148
- # Expert:: Subclasses must ensure that clones may be positioned at
149
- # different points in the input from each other and from the stream they
150
- # were cloned from.
151
- # def clone
152
- # raise NotImplementedError
153
- # end
154
-
155
- end
156
-
157
- # Ferret's IO Output methods are defined here. The methods write_byte and
158
- # write_bytes need to be defined before this class is of any use.
159
- class IndexOutput
160
-
161
- # Writes a single byte.
162
- def write_byte(b)
163
- raise NotImplementedError
164
- end
165
-
166
- # Writes an array of bytes.
167
- # buf:: the bytes to write
168
- # len:: the number of bytes to write
169
- def write_bytes(buf, len)
170
- raise NotImplementedError
171
- end
172
-
173
- # Writes an int as four bytes.
174
- def write_int(i)
175
- write_byte((i >> 24) & 0xFF)
176
- write_byte((i >> 16) & 0xFF)
177
- write_byte((i >> 8) & 0xFF)
178
- write_byte(i & 0xFF)
179
- end
180
- alias :write_uint :write_int
181
-
182
- # Writes an int in a variable-length format. Writes between one and
183
- # five bytes. Smaller values take fewer bytes. Negative numbers are not
184
- # supported.
185
- def write_vint(i)
186
- while i > 127
187
- write_byte((i & 0x7f) | 0x80)
188
- i >>= 7
189
- end
190
- write_byte(i)
191
- end
192
- alias :write_vlong :write_vint
193
-
194
- # Writes a long as eight bytes.
195
- def write_long(i)
196
- write_int(i >> 32)
197
- write_int(i)
198
- end
199
- alias :write_ulong :write_long
200
-
201
- # Writes a string.
202
- def write_string(s)
203
- length = s.length()
204
- write_vint(length)
205
- write_chars(s, 0, length)
206
- end
207
-
208
- # Writes a sequence of UTF-8 encoded characters from a string.
209
- # buf:: the source of the characters
210
- # start:: the first character in the sequence
211
- # length:: the number of characters in the sequence
212
- def write_chars(buf, start, length)
213
- last = start + length
214
- (start ... last).each do |i|
215
- write_byte(buf[i])
216
- end
217
- end
218
-
219
- # Forces any buffered output to be written.
220
- def flush
221
- raise NotImplementedError
222
- end
223
-
224
- # Closes this stream to further operations.
225
- def close
226
- raise NotImplementedError
227
- end
228
-
229
- # Returns the current position in this file, where the next write will
230
- # occur.
231
- def pos
232
- raise NotImplementedError
233
- end
234
-
235
- # Sets current position in this file, where the next write will occur.
236
- def seek(pos)
237
- raise NotImplementedError
238
- end
239
-
240
- # The number of bytes in the file.
241
- def length
242
- raise NotImplementedError
243
- end
244
- end
245
- end
@@ -1,286 +0,0 @@
1
- module Ferret::Store
2
- require 'monitor'
3
-
4
- class RAMDirectory < Directory
5
- include MonitorMixin
6
-
7
- def initialize(dir = nil, close_dir = false)
8
- super()
9
- @files = Hash.new
10
- if dir
11
- buf = BUFFER.clone
12
- dir.each do |file|
13
- os = create_output(file) # make a place on ram disk
14
- is = dir.open_input(file) # read the current file
15
- len = is.length # and copy the file to ram disk
16
- if len > buf.size
17
- buf << " " * (len - buf.size)
18
- end
19
- is.read_bytes(buf, 0, len)
20
- os.write_bytes(buf, len)
21
- is.close()
22
- os.close()
23
- end
24
- dir.close() if close_dir
25
- end
26
- end
27
-
28
- # returns an array of strings, one for each file in the directory
29
- def each()
30
- @files.each do |path, file|
31
- next if file =~ /#{LOCK_PREFIX}/
32
- yield file
33
- end
34
- end
35
-
36
- # Returns true if a file with the given name exists.
37
- def exists?(name)
38
- @files.has_key?(name)
39
- end
40
-
41
- # Returns the time the named file was last modified.
42
- def modified(name)
43
- @files[name].mtime
44
- end
45
-
46
- # Set the modified time of an existing file to now.
47
- def touch(name)
48
- if @files[name].nil?
49
- @files[name] = RAMFile.new(name)
50
- end
51
- @files[name].mtime = Time.now
52
- end
53
-
54
- # Removes an existing file in the directory.
55
- def delete(name)
56
- @files.delete(name)
57
- end
58
-
59
- # Renames an existing file in the directory.
60
- # If a file already exists with the new name, then it is replaced.
61
- # This replacement should be atomic.
62
- def rename(from, to)
63
- @files[to] = @files[from]
64
- @files.delete(from)
65
- end
66
-
67
- # Returns the length of a file in the directory.
68
- def length(name)
69
- @files[name].length
70
- end
71
-
72
- # Creates a new, empty file in the directory with the given name.
73
- # Returns a stream writing this file.
74
- def create_output(name)
75
- file = RAMFile.new(name)
76
- @files[name] = file
77
- RAMIndexOutput.new(file)
78
- end
79
-
80
- # Returns a stream reading an existing file.
81
- def open_input(name)
82
- raise IOError, "No file #{name}" if @files[name].nil?
83
- RAMIndexInput.new(@files[name])
84
- end
85
-
86
- def print_file(name)
87
- input = RAMIndexInput.new(@files[name])
88
- buf = " " * input.length
89
- input.read_internal(buf, 0, input.length)
90
- puts buf
91
- end
92
-
93
- # Construct a Lock.
94
- def make_lock(name)
95
- RAMLock.new(LOCK_PREFIX + name + ".lck", self)
96
- end
97
-
98
-
99
- # Closes the store.
100
- def close()
101
- end
102
-
103
- def to_s
104
- str = "The files in this directory are: \n"
105
- @files.each do |path, file|
106
- str << path + " - " + file.size.to_s + "\n"
107
- end
108
- str
109
- end
110
-
111
- class RAMIndexOutput < BufferedIndexOutput
112
- def initialize(f)
113
- @file = f
114
- @pointer = 0
115
- super()
116
- end
117
-
118
- def length
119
- return @file.length
120
- end
121
-
122
- def flush_buffer(src, len)
123
- buffer_number = (@pointer / BUFFER_SIZE).to_i
124
- buffer_offset = @pointer % BUFFER_SIZE
125
- bytes_in_buffer = BUFFER_SIZE - buffer_offset
126
- bytes_to_copy = [bytes_in_buffer, len].min
127
-
128
- extend_buffer_if_necessary(buffer_number)
129
-
130
- buffer = @file.buffers[buffer_number]
131
- buffer[buffer_offset, bytes_to_copy] = src[0, bytes_to_copy]
132
-
133
- if bytes_to_copy < len
134
- src_offset = bytes_to_copy
135
- bytes_to_copy = len - bytes_to_copy
136
- buffer_number += 1
137
- extend_buffer_if_necessary(buffer_number)
138
- buffer = @file.buffers[buffer_number]
139
- buffer[0, bytes_to_copy] = src[src_offset, bytes_to_copy]
140
- end
141
- @pointer += len
142
- @file.length = @pointer unless @pointer < @file.length
143
- @file.mtime = Time.now
144
- end
145
-
146
- def reset
147
- seek(0)
148
- @file.length = 0
149
- end
150
-
151
- def seek(pos)
152
- super(pos)
153
- @pointer = pos
154
- end
155
-
156
- def close
157
- super()
158
- @file.mtime = Time.new
159
- end
160
-
161
- def write_to(output)
162
- flush()
163
- last_buffer_number = (@file.length / BUFFER_SIZE).to_i
164
- last_buffer_offset = @file.length % BUFFER_SIZE
165
-
166
- (0..last_buffer_number).each do |i|
167
- len = (i == last_buffer_number ? last_buffer_offset : BUFFER_SIZE)
168
- output.write_bytes(@file.buffers[i], len)
169
- end
170
- end
171
-
172
- private
173
-
174
- def extend_buffer_if_necessary(buffer_number)
175
- if buffer_number == @file.buffers.size
176
- @file.buffers << RAMFile::BUFFER.clone
177
- end
178
- end
179
-
180
- end
181
-
182
- class RAMIndexInput < BufferedIndexInput
183
-
184
- def initialize(f)
185
- @pointer = 0
186
- @file = f
187
- super()
188
- end
189
-
190
- def length
191
- return @file.length
192
- end
193
-
194
- def read_internal(b, offset, length)
195
- remainder = length
196
- start = @pointer
197
-
198
- while remainder != 0
199
- buffer_number = (start / BUFFER_SIZE).to_i
200
- buffer_offset = start % BUFFER_SIZE
201
- bytes_in_buffer = BUFFER_SIZE - buffer_offset
202
-
203
- if bytes_in_buffer >= remainder
204
- bytes_to_copy = remainder
205
- else
206
- bytes_to_copy = bytes_in_buffer
207
- end
208
- buffer = @file.buffers[buffer_number]
209
- bo2 = buffer_offset
210
- do2 = offset
211
- b[do2, bytes_to_copy] = buffer[bo2, bytes_to_copy]
212
- offset += bytes_to_copy
213
- start += bytes_to_copy
214
- remainder -= bytes_to_copy
215
- end
216
-
217
- @pointer += length
218
- end
219
-
220
- def seek_internal(pos)
221
- @pointer = pos
222
- end
223
-
224
- def close
225
- end
226
- end
227
-
228
- # This class contains an array of byte arrays which act as buffers to
229
- # store the data in.
230
- class RAMFile
231
- BUFFER = " " * BUFFER_SIZE
232
-
233
- attr_reader :buffers
234
- attr_accessor :mtime
235
- #attr_accessor :name
236
- attr_accessor :length
237
- alias :size :length
238
-
239
-
240
- def initialize(name)
241
- @buffers = Array.new
242
- @mtime = Time.now
243
- @length = 0
244
- end
245
- end
246
-
247
- # A Lock is used to lock a data source (in this case a file) so that
248
- # not more than one output stream can access a data source at one time.
249
- class RAMLock < Lock
250
- # pass the name of the file that we are going to lock
251
- def initialize(lock_file, dir)
252
- @lock_file = lock_file
253
- @dir = dir
254
- end
255
-
256
- # obtain the lock on the data source
257
- def obtain(lock_timeout = 1)
258
- MAX_ATTEMPTS.times do
259
- #@dir.synchronize do
260
- # create a file if none exists. If one already exists
261
- # then someone beat us to the lock so return false
262
- if (! locked?) then
263
- @dir.create_output(@lock_file)
264
- return true
265
- end
266
- #end
267
- # lock was not obtained so sleep for timeout then try again.
268
- sleep(lock_timeout)
269
- end
270
- # lock could not be obtained so raise an exception
271
- raise "could not obtain lock: " + @lock_file.to_s
272
- end
273
-
274
- # Release the lock on the data source. Returns true if successful.
275
- def release
276
- @dir.delete(@lock_file)
277
- return true
278
- end
279
-
280
- # returns true if there is a lock on the data source
281
- def locked?
282
- @dir.exists?(@lock_file)
283
- end
284
- end
285
- end
286
- end