ferret 0.9.6 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (295) hide show
  1. data/MIT-LICENSE +1 -1
  2. data/README +12 -24
  3. data/Rakefile +38 -54
  4. data/TODO +14 -17
  5. data/ext/analysis.c +982 -823
  6. data/ext/analysis.h +133 -76
  7. data/ext/array.c +96 -58
  8. data/ext/array.h +40 -13
  9. data/ext/bitvector.c +476 -118
  10. data/ext/bitvector.h +264 -22
  11. data/ext/compound_io.c +217 -229
  12. data/ext/defines.h +49 -0
  13. data/ext/document.c +107 -317
  14. data/ext/document.h +31 -65
  15. data/ext/except.c +81 -36
  16. data/ext/except.h +117 -55
  17. data/ext/extconf.rb +2 -9
  18. data/ext/ferret.c +211 -104
  19. data/ext/ferret.h +22 -11
  20. data/ext/filter.c +97 -82
  21. data/ext/fs_store.c +348 -367
  22. data/ext/global.c +226 -188
  23. data/ext/global.h +44 -26
  24. data/ext/hash.c +474 -391
  25. data/ext/hash.h +441 -68
  26. data/ext/hashset.c +124 -96
  27. data/ext/hashset.h +169 -20
  28. data/ext/helper.c +56 -5
  29. data/ext/helper.h +7 -0
  30. data/ext/inc/lang.h +29 -49
  31. data/ext/inc/threading.h +31 -0
  32. data/ext/ind.c +288 -278
  33. data/ext/ind.h +68 -0
  34. data/ext/index.c +5688 -0
  35. data/ext/index.h +663 -616
  36. data/ext/lang.h +29 -49
  37. data/ext/libstemmer.c +3 -3
  38. data/ext/mem_pool.c +84 -0
  39. data/ext/mem_pool.h +35 -0
  40. data/ext/posh.c +1006 -0
  41. data/ext/posh.h +1007 -0
  42. data/ext/priorityqueue.c +117 -194
  43. data/ext/priorityqueue.h +135 -39
  44. data/ext/q_boolean.c +1305 -1108
  45. data/ext/q_const_score.c +106 -93
  46. data/ext/q_filtered_query.c +138 -135
  47. data/ext/q_fuzzy.c +206 -242
  48. data/ext/q_match_all.c +94 -80
  49. data/ext/q_multi_term.c +663 -0
  50. data/ext/q_parser.c +667 -593
  51. data/ext/q_phrase.c +992 -555
  52. data/ext/q_prefix.c +72 -61
  53. data/ext/q_range.c +235 -210
  54. data/ext/q_span.c +1480 -1166
  55. data/ext/q_term.c +273 -246
  56. data/ext/q_wildcard.c +127 -114
  57. data/ext/r_analysis.c +1720 -711
  58. data/ext/r_index.c +3049 -0
  59. data/ext/r_qparser.c +433 -146
  60. data/ext/r_search.c +2934 -1993
  61. data/ext/r_store.c +372 -143
  62. data/ext/r_utils.c +941 -0
  63. data/ext/ram_store.c +330 -326
  64. data/ext/search.c +1291 -668
  65. data/ext/search.h +403 -702
  66. data/ext/similarity.c +91 -113
  67. data/ext/similarity.h +45 -30
  68. data/ext/sort.c +721 -484
  69. data/ext/stopwords.c +361 -273
  70. data/ext/store.c +556 -58
  71. data/ext/store.h +706 -126
  72. data/ext/tags +3578 -2780
  73. data/ext/term_vectors.c +352 -0
  74. data/ext/threading.h +31 -0
  75. data/ext/win32.h +54 -0
  76. data/lib/ferret.rb +5 -17
  77. data/lib/ferret/document.rb +130 -2
  78. data/lib/ferret/index.rb +577 -26
  79. data/lib/ferret/number_tools.rb +157 -0
  80. data/lib/ferret_version.rb +3 -0
  81. data/test/test_helper.rb +5 -13
  82. data/test/unit/analysis/tc_analyzer.rb +513 -1
  83. data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
  84. data/test/unit/index/tc_index.rb +183 -240
  85. data/test/unit/index/tc_index_reader.rb +312 -479
  86. data/test/unit/index/tc_index_writer.rb +397 -13
  87. data/test/unit/index/th_doc.rb +269 -206
  88. data/test/unit/query_parser/tc_query_parser.rb +40 -33
  89. data/test/unit/search/tc_filter.rb +59 -71
  90. data/test/unit/search/tc_fuzzy_query.rb +24 -16
  91. data/test/unit/search/tc_index_searcher.rb +23 -201
  92. data/test/unit/search/tc_multi_searcher.rb +78 -226
  93. data/test/unit/search/tc_search_and_sort.rb +93 -81
  94. data/test/unit/search/tc_sort.rb +23 -23
  95. data/test/unit/search/tc_sort_field.rb +7 -7
  96. data/test/unit/search/tc_spans.rb +51 -47
  97. data/test/unit/search/tm_searcher.rb +339 -0
  98. data/test/unit/store/tc_fs_store.rb +1 -1
  99. data/test/unit/store/tm_store_lock.rb +3 -3
  100. data/test/unit/tc_document.rb +81 -0
  101. data/test/unit/ts_analysis.rb +1 -1
  102. data/test/unit/ts_utils.rb +1 -1
  103. data/test/unit/utils/tc_bit_vector.rb +288 -0
  104. data/test/unit/utils/tc_number_tools.rb +117 -0
  105. data/test/unit/utils/tc_priority_queue.rb +106 -0
  106. metadata +140 -301
  107. data/CHANGELOG +0 -9
  108. data/ext/dummy.exe +0 -0
  109. data/ext/field.c +0 -408
  110. data/ext/frtio.h +0 -13
  111. data/ext/inc/except.h +0 -90
  112. data/ext/index_io.c +0 -382
  113. data/ext/index_rw.c +0 -2658
  114. data/ext/lang.c +0 -41
  115. data/ext/nix_io.c +0 -134
  116. data/ext/q_multi_phrase.c +0 -380
  117. data/ext/r_doc.c +0 -582
  118. data/ext/r_index_io.c +0 -1021
  119. data/ext/r_term.c +0 -219
  120. data/ext/term.c +0 -820
  121. data/ext/termdocs.c +0 -611
  122. data/ext/vector.c +0 -637
  123. data/ext/w32_io.c +0 -150
  124. data/lib/ferret/analysis.rb +0 -11
  125. data/lib/ferret/analysis/analyzers.rb +0 -112
  126. data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
  127. data/lib/ferret/analysis/token.rb +0 -100
  128. data/lib/ferret/analysis/token_filters.rb +0 -86
  129. data/lib/ferret/analysis/token_stream.rb +0 -26
  130. data/lib/ferret/analysis/tokenizers.rb +0 -112
  131. data/lib/ferret/analysis/word_list_loader.rb +0 -27
  132. data/lib/ferret/document/document.rb +0 -152
  133. data/lib/ferret/document/field.rb +0 -312
  134. data/lib/ferret/index/compound_file_io.rb +0 -338
  135. data/lib/ferret/index/document_writer.rb +0 -289
  136. data/lib/ferret/index/field_infos.rb +0 -279
  137. data/lib/ferret/index/fields_io.rb +0 -181
  138. data/lib/ferret/index/index.rb +0 -675
  139. data/lib/ferret/index/index_file_names.rb +0 -33
  140. data/lib/ferret/index/index_reader.rb +0 -503
  141. data/lib/ferret/index/index_writer.rb +0 -534
  142. data/lib/ferret/index/multi_reader.rb +0 -377
  143. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
  144. data/lib/ferret/index/segment_infos.rb +0 -130
  145. data/lib/ferret/index/segment_merge_info.rb +0 -49
  146. data/lib/ferret/index/segment_merge_queue.rb +0 -16
  147. data/lib/ferret/index/segment_merger.rb +0 -358
  148. data/lib/ferret/index/segment_reader.rb +0 -412
  149. data/lib/ferret/index/segment_term_enum.rb +0 -169
  150. data/lib/ferret/index/segment_term_vector.rb +0 -58
  151. data/lib/ferret/index/term.rb +0 -53
  152. data/lib/ferret/index/term_buffer.rb +0 -83
  153. data/lib/ferret/index/term_doc_enum.rb +0 -291
  154. data/lib/ferret/index/term_enum.rb +0 -52
  155. data/lib/ferret/index/term_info.rb +0 -37
  156. data/lib/ferret/index/term_infos_io.rb +0 -321
  157. data/lib/ferret/index/term_vector_offset_info.rb +0 -20
  158. data/lib/ferret/index/term_vectors_io.rb +0 -553
  159. data/lib/ferret/query_parser.rb +0 -312
  160. data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
  161. data/lib/ferret/search.rb +0 -50
  162. data/lib/ferret/search/boolean_clause.rb +0 -100
  163. data/lib/ferret/search/boolean_query.rb +0 -299
  164. data/lib/ferret/search/boolean_scorer.rb +0 -294
  165. data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
  166. data/lib/ferret/search/conjunction_scorer.rb +0 -99
  167. data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
  168. data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
  169. data/lib/ferret/search/explanation.rb +0 -41
  170. data/lib/ferret/search/field_cache.rb +0 -215
  171. data/lib/ferret/search/field_doc.rb +0 -31
  172. data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
  173. data/lib/ferret/search/filter.rb +0 -11
  174. data/lib/ferret/search/filtered_query.rb +0 -130
  175. data/lib/ferret/search/filtered_term_enum.rb +0 -79
  176. data/lib/ferret/search/fuzzy_query.rb +0 -154
  177. data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
  178. data/lib/ferret/search/hit_collector.rb +0 -34
  179. data/lib/ferret/search/hit_queue.rb +0 -11
  180. data/lib/ferret/search/index_searcher.rb +0 -200
  181. data/lib/ferret/search/match_all_query.rb +0 -104
  182. data/lib/ferret/search/multi_phrase_query.rb +0 -216
  183. data/lib/ferret/search/multi_searcher.rb +0 -261
  184. data/lib/ferret/search/multi_term_query.rb +0 -65
  185. data/lib/ferret/search/non_matching_scorer.rb +0 -22
  186. data/lib/ferret/search/phrase_positions.rb +0 -55
  187. data/lib/ferret/search/phrase_query.rb +0 -214
  188. data/lib/ferret/search/phrase_scorer.rb +0 -152
  189. data/lib/ferret/search/prefix_query.rb +0 -54
  190. data/lib/ferret/search/query.rb +0 -140
  191. data/lib/ferret/search/query_filter.rb +0 -51
  192. data/lib/ferret/search/range_filter.rb +0 -103
  193. data/lib/ferret/search/range_query.rb +0 -139
  194. data/lib/ferret/search/req_excl_scorer.rb +0 -125
  195. data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
  196. data/lib/ferret/search/score_doc.rb +0 -38
  197. data/lib/ferret/search/score_doc_comparator.rb +0 -114
  198. data/lib/ferret/search/scorer.rb +0 -91
  199. data/lib/ferret/search/similarity.rb +0 -278
  200. data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
  201. data/lib/ferret/search/sort.rb +0 -112
  202. data/lib/ferret/search/sort_comparator.rb +0 -60
  203. data/lib/ferret/search/sort_field.rb +0 -91
  204. data/lib/ferret/search/spans.rb +0 -12
  205. data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
  206. data/lib/ferret/search/spans/span_first_query.rb +0 -79
  207. data/lib/ferret/search/spans/span_near_query.rb +0 -108
  208. data/lib/ferret/search/spans/span_not_query.rb +0 -130
  209. data/lib/ferret/search/spans/span_or_query.rb +0 -176
  210. data/lib/ferret/search/spans/span_query.rb +0 -25
  211. data/lib/ferret/search/spans/span_scorer.rb +0 -74
  212. data/lib/ferret/search/spans/span_term_query.rb +0 -105
  213. data/lib/ferret/search/spans/span_weight.rb +0 -84
  214. data/lib/ferret/search/spans/spans_enum.rb +0 -44
  215. data/lib/ferret/search/term_query.rb +0 -128
  216. data/lib/ferret/search/term_scorer.rb +0 -183
  217. data/lib/ferret/search/top_docs.rb +0 -36
  218. data/lib/ferret/search/top_field_docs.rb +0 -17
  219. data/lib/ferret/search/weight.rb +0 -54
  220. data/lib/ferret/search/wildcard_query.rb +0 -26
  221. data/lib/ferret/search/wildcard_term_enum.rb +0 -61
  222. data/lib/ferret/stemmers.rb +0 -1
  223. data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
  224. data/lib/ferret/store.rb +0 -5
  225. data/lib/ferret/store/buffered_index_io.rb +0 -190
  226. data/lib/ferret/store/directory.rb +0 -141
  227. data/lib/ferret/store/fs_store.rb +0 -381
  228. data/lib/ferret/store/index_io.rb +0 -245
  229. data/lib/ferret/store/ram_store.rb +0 -286
  230. data/lib/ferret/utils.rb +0 -8
  231. data/lib/ferret/utils/bit_vector.rb +0 -123
  232. data/lib/ferret/utils/date_tools.rb +0 -138
  233. data/lib/ferret/utils/number_tools.rb +0 -91
  234. data/lib/ferret/utils/parameter.rb +0 -41
  235. data/lib/ferret/utils/priority_queue.rb +0 -120
  236. data/lib/ferret/utils/string_helper.rb +0 -47
  237. data/lib/ferret/utils/thread_local.rb +0 -28
  238. data/lib/ferret/utils/weak_key_hash.rb +0 -60
  239. data/lib/rferret.rb +0 -37
  240. data/rake_utils/code_statistics.rb +0 -106
  241. data/test/benchmark/tb_ram_store.rb +0 -76
  242. data/test/benchmark/tb_rw_vint.rb +0 -26
  243. data/test/functional/thread_safety_index_test.rb +0 -81
  244. data/test/functional/thread_safety_test.rb +0 -137
  245. data/test/longrunning/tc_numbertools.rb +0 -60
  246. data/test/longrunning/tm_store.rb +0 -19
  247. data/test/unit/analysis/ctc_analyzer.rb +0 -532
  248. data/test/unit/analysis/data/wordfile +0 -6
  249. data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
  250. data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
  251. data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
  252. data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
  253. data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
  254. data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
  255. data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
  256. data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
  257. data/test/unit/analysis/rtc_stop_filter.rb +0 -14
  258. data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
  259. data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
  260. data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
  261. data/test/unit/analysis/tc_token.rb +0 -25
  262. data/test/unit/document/rtc_field.rb +0 -28
  263. data/test/unit/document/tc_document.rb +0 -47
  264. data/test/unit/document/tc_field.rb +0 -98
  265. data/test/unit/index/rtc_compound_file_io.rb +0 -107
  266. data/test/unit/index/rtc_field_infos.rb +0 -127
  267. data/test/unit/index/rtc_fields_io.rb +0 -167
  268. data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
  269. data/test/unit/index/rtc_segment_infos.rb +0 -74
  270. data/test/unit/index/rtc_segment_term_docs.rb +0 -17
  271. data/test/unit/index/rtc_segment_term_enum.rb +0 -60
  272. data/test/unit/index/rtc_segment_term_vector.rb +0 -71
  273. data/test/unit/index/rtc_term_buffer.rb +0 -57
  274. data/test/unit/index/rtc_term_info.rb +0 -19
  275. data/test/unit/index/rtc_term_infos_io.rb +0 -192
  276. data/test/unit/index/rtc_term_vectors_io.rb +0 -108
  277. data/test/unit/index/tc_term.rb +0 -27
  278. data/test/unit/index/tc_term_voi.rb +0 -18
  279. data/test/unit/search/rtc_similarity.rb +0 -37
  280. data/test/unit/search/rtc_sort_field.rb +0 -14
  281. data/test/unit/search/tc_multi_searcher2.rb +0 -126
  282. data/test/unit/store/rtc_fs_store.rb +0 -62
  283. data/test/unit/store/rtc_ram_store.rb +0 -15
  284. data/test/unit/store/rtm_store.rb +0 -150
  285. data/test/unit/store/rtm_store_lock.rb +0 -2
  286. data/test/unit/ts_document.rb +0 -2
  287. data/test/unit/utils/rtc_bit_vector.rb +0 -73
  288. data/test/unit/utils/rtc_date_tools.rb +0 -50
  289. data/test/unit/utils/rtc_number_tools.rb +0 -59
  290. data/test/unit/utils/rtc_parameter.rb +0 -40
  291. data/test/unit/utils/rtc_priority_queue.rb +0 -62
  292. data/test/unit/utils/rtc_string_helper.rb +0 -21
  293. data/test/unit/utils/rtc_thread.rb +0 -61
  294. data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
  295. data/test/utils/number_to_spoken.rb +0 -132
data/lib/ferret/utils.rb DELETED
@@ -1,8 +0,0 @@
1
- require 'ferret/utils/string_helper'
2
- require 'ferret/utils/parameter'
3
- require 'ferret/utils/priority_queue'
4
- require 'ferret/utils/bit_vector'
5
- require 'ferret/utils/date_tools'
6
- require 'ferret/utils/number_tools'
7
- require 'ferret/utils/weak_key_hash'
8
- require 'ferret/utils/thread_local'
@@ -1,123 +0,0 @@
1
- module Ferret::Utils
2
- # Optimized implementation of a vector of bits.
3
- #
4
- # * a count() method, which efficiently computes the number of one bits
5
- # * optimized read from and write to disk
6
- # * inlinable get() method
7
- class BitVector
8
- attr_reader :size
9
- attr_accessor :bits
10
-
11
- def initialize
12
- @bits = 0
13
- @count = -1
14
- end
15
-
16
- # Sets the value of _bit_ to one.
17
- def set(bit)
18
- @bits |= 1 << bit
19
- @count = -1
20
- end
21
-
22
- # Sets the value of _bit_ to zero.
23
- def clear(bit)
24
- @bits &= ~(1 << bit)
25
- @count = -1
26
- end
27
-
28
- # Returns _true_ if _bit_ is one and
29
- # _false_ if it is zero.
30
- def get(bit)
31
- return (@bits & (1 << bit)) != 0
32
- end
33
- alias :[] :get
34
-
35
- # Returns the total number of one bits in this vector. This is
36
- # efficiently computed and cached, so that, if the vector is not
37
- # changed, no recomputation is done for repeated calls.
38
- def count()
39
- # if the vector has been modified
40
- if (@count == -1)
41
- c = 0
42
- tmp = @bits
43
- while tmp > 0
44
- c += BYTE_COUNTS[tmp & 0xFF] # sum bits per byte
45
- tmp >>= 8
46
- end
47
- @count = c
48
- end
49
- return @count
50
- end
51
-
52
- BYTE_COUNTS = [ # table of bits/byte
53
- 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
54
- 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
55
- 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
56
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
57
- 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
58
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
59
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
60
- 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
61
- 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
62
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
63
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
64
- 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
65
- 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
66
- 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
67
- 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
68
- 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
69
- ]
70
-
71
-
72
- # Writes this vector to the file _name_ in Directory _d_, in a format
73
- # that can be read by the constructor
74
- def write(d, name)
75
- output = d.create_output(name)
76
- begin
77
- output.write_string(self.class.bignum_to_string(@bits))
78
- ensure
79
- output.close()
80
- end
81
- end
82
-
83
- # Constructs a bit vector from the file _name_ in Directory _d_, as
84
- # written by the @link #writeendmethod.
85
- def BitVector.read(d, name)
86
- bv = BitVector.new
87
- input = d.open_input(name)
88
- begin
89
- bv.bits = string_to_bignum(input.read_string())
90
- ensure
91
- input.close()
92
- end
93
- return bv
94
- end
95
-
96
- def to_s
97
- i = @bits
98
- while i > 0
99
- print(i&1)
100
- i >>= 1
101
- end
102
- puts ""
103
- end
104
-
105
- # converts a BigNum into a string
106
- def BitVector.bignum_to_string(num)
107
- str = []
108
- while (num > 0)
109
- str << (num & 0xff)
110
- num >>= 8
111
- end
112
- return str.pack("C*")
113
- end
114
-
115
- # converts a string into a bignum
116
- def BitVector.string_to_bignum(str)
117
- str = str.unpack("C*")
118
- num = 0
119
- str.reverse.each {|c| num = ((num << 8) | c) }
120
- return num
121
- end
122
- end
123
- end
@@ -1,138 +0,0 @@
1
- require 'date'
2
- module Ferret::Utils
3
- # Provides support for converting dates to strings and vice-versa. The
4
- # strings are structured so that lexicographic sorting orders them by
5
- # date, which makes them suitable for use as field values and search
6
- # terms.
7
- #
8
- # This class also helps you to limit the resolution of your dates. Do not
9
- # save dates with a finer resolution than you really need, as then
10
- # RangeQuery and PrefixQuery will require more memory and become slower.
11
- #
12
- # Compared to the serialize methods the strings generated by the to_s
13
- # methods in this class take slightly more space, unless your selected
14
- # resolution is set to _Resolution.DAY_ or lower.
15
-
16
- # Provides support for converting dates to strings and vice-versa. The
17
- # strings are structured so that lexicographic sorting orders by date,
18
- # which makes them suitable for use as field values and search terms.
19
- #
20
- # Note:: dates before 1970 cannot be used, and therefore cannot be indexed
21
- # when using this class.
22
- module DateTools
23
- # make date strings long enough to last a millenium
24
- SERIALIZED_DATE_LEN = (1000*365*24*60*60*1000).to_s(36).length
25
-
26
- # The latest date that can be stored in this format
27
- MAX_SERIALIZED_DATE_STRING = Array.new(SERIALIZED_DATE_LEN, "z").to_s.to_i(36)
28
-
29
- # Converts a Date to a string suitable for indexing. Throws Exception
30
- # if the date specified in the method argument is before 1970 This
31
- # method is unsupported. Please use Time instead of Date
32
- def DateTools.serialize_date(date)
33
- return serialize_time(Time.parse(date))
34
- end
35
-
36
- # Converts a millisecond time to a string suitable for indexing.
37
- # Accepts a Time object or a time in milliseconds.
38
- #
39
- # Throws Exception if the time specified in the method argument is
40
- # negative, that is, before 1970 It is recommended that you store the
41
- # date as a string if you don't need the time to the nearest
42
- # millisecond. That makes things a lot easier.
43
- def DateTools.serialize_time(time)
44
- if time.instance_of?(Time) then time = time.to_i end
45
-
46
- if (time < 0) then raise("time too early") end
47
-
48
- # convert to milliseconds before serialization
49
- s = (time*1000).to_s(36)
50
-
51
- if (s.length() > SERIALIZED_DATE_LEN) then raise("time too late") end
52
-
53
- # pad to 16 charactors
54
- s = "0" + s while (s.length() < SERIALIZED_DATE_LEN)
55
-
56
- return s
57
- end
58
-
59
- # The earliest date that can be stored in this format.
60
- MIN_SERIALIZED_DATE_STRING = DateTools.serialize_time(0)
61
-
62
- # Converts a string-encoded date into a millisecond time.
63
- def DateTools.deserialize_time(s)
64
- # remember to convert back to seconds
65
- return Time.at(s.to_i(36)/1000)
66
- end
67
-
68
- def DateTools.date_to_s(date, resolution = Resolution::MILLISECOND)
69
- return time_to_s(Time.parse(date), resolution)
70
- end
71
-
72
-
73
- # Converts a millisecond time to a string suitable for indexing.
74
- #
75
- # time:: the date expressed as milliseconds since January 1, 1970,
76
- # 00:00:00 GMT resolution:: the desired resolution, see
77
- # #round(long, DateTools.Resolution)
78
- # return:: a string in format _%Y%m%d%H%M%SSSS_ or shorter,
79
- # depending on _resolution_
80
- def DateTools.time_to_s(time, resolution = Resolution::MILLISECOND)
81
- if time.instance_of?(Date) then time = Time.parse(time) end
82
- suffix = ""
83
- if (resolution == Resolution::MILLISECOND)
84
- # the suffix is the number of milliseconds if needed.
85
- suffix = ((time.to_f-time.to_f.floor)*1000).round.to_s
86
- end
87
- return time.strftime(resolution.format) + suffix
88
- end
89
-
90
- # Converts a string produced by _time_to_s_ or _date_to_s_ back to a
91
- # time, represented as the number of milliseconds since January 1, 1970,
92
- # 00:00:00 GMT.
93
- #
94
- # str:: the date string to be converted
95
- # return:: the number of milliseconds since January 1, 1970, 00:00:00GMT
96
- def DateTools.s_to_time(str)
97
- year = str.size >= 4 ? str[ 0.. 3].to_i : nil
98
- month = str.size >= 6 ? str[ 4.. 5].to_i : nil
99
- day = str.size >= 8 ? str[ 6.. 7].to_i : nil
100
- hour = str.size >= 10 ? str[ 8.. 9].to_i : nil
101
- minute = str.size >= 12 ? str[10..11].to_i : nil
102
- second = str.size >= 14 ? str[12..13].to_i : nil
103
- microsecond = str.size >= 17 ? str[14..17].to_i*1000 : nil
104
- return Time.mktime(year, month, day, hour, minute, second, microsecond)
105
- end
106
-
107
- # Limit a date's resolution. For example, the date _2004-09-21 13:50:11_
108
- # will be changed to _2004-09-01 00:00:00_ when using
109
- # _Resolution.MONTH_.
110
- #
111
- # resolution:: The desired resolution of the date to be returned
112
- # return:: the date with all values more precise than _resolution_
113
- # set to 0 or 1
114
- def DateTools.round(time, resolution)
115
- return s_to_time(time_to_s(time, resolution))
116
- end
117
-
118
- class Resolution < Parameter
119
- attr_accessor :format
120
-
121
- private :initialize
122
-
123
- def initialize(name, format)
124
- super(name)
125
- @format = format
126
- end
127
-
128
- YEAR = Resolution.new("year", "%Y")
129
- MONTH = Resolution.new("month", "%Y%m")
130
- DAY = Resolution.new("day", "%Y%m%d")
131
- HOUR = Resolution.new("hour", "%Y%m%d%H")
132
- MINUTE = Resolution.new("minute", "%Y%m%d%H%M")
133
- SECOND = Resolution.new("second", "%Y%m%d%H%M%S")
134
- MILLISECOND = Resolution.new("millisecond", "%Y%m%d%H%M%S")
135
-
136
- end
137
- end
138
- end
@@ -1,91 +0,0 @@
1
- class Float
2
- def =~(o)
3
- return (1 - self/o).abs < 0.0000000001
4
- end
5
- end
6
-
7
- module Ferret::Utils
8
- # Provides support for converting longs to Strings, and back again. The
9
- # strings are structured so that lexicographic sorting order is preserved.
10
- #
11
- # That is, if long1 is less than long2 for any two longs long1 and long2,
12
- # then NumberTools.long_to_s(long1) is lexicographically less than
13
- # NumberTools.long_to_s(long2). (Similarly for "greater than" and "equals".)
14
- #
15
- # This class handles all long values
16
- module NumberTools
17
- RADIX = 36
18
- NEGATIVE_PREFIX = '-'
19
-
20
- # NB: NEGATIVE_PREFIX must be < POSITIVE_PREFIX
21
- POSITIVE_PREFIX = '0'
22
-
23
- # The following constants are from Java
24
- LONG_MAX_VALUE = 9223372036854775807
25
- LONG_MIN_VALUE = -9223372036854775808
26
-
27
- # NB: This function is used to match the java equivalent. Actually
28
- # ruby allows much larger numbers than Java so this is just so that we
29
- # can read the Java Lucene created indexes.
30
- MIN_STRING_VALUE = NEGATIVE_PREFIX + "0000000000000"
31
- MAX_STRING_VALUE = POSITIVE_PREFIX + "1y2p0ij32e8e7"
32
-
33
- # The length of the long field
34
- STR_SIZE = MIN_STRING_VALUE.length()
35
-
36
- # Converts a long to a String suitable for indexing.
37
- def NumberTools.long_to_s(l)
38
- if (l == LONG_MIN_VALUE)
39
- # special case, because long is not symetric around zero
40
- return MIN_STRING_VALUE;
41
- end
42
-
43
- s = ""
44
- if (l < 0)
45
- s << NEGATIVE_PREFIX
46
- l = LONG_MAX_VALUE + l + 1
47
- else
48
- s << POSITIVE_PREFIX
49
- end
50
- num = l.to_s(RADIX)
51
-
52
- pad_len = STR_SIZE - num.length() - s.length()
53
- while ((pad_len -= 1) >= 0)
54
- s << '0'
55
- end
56
- s << num
57
-
58
- return s
59
- end
60
-
61
- # Converts a String that was returned by #long_to_s back to a long.
62
- #
63
- # Throws:: ArgumentError if the input is nil
64
- def NumberTools.s_to_long(s)
65
- if (s == nil)
66
- raise ArgumentError, "string cannot be nil"
67
- end
68
- if (s.length() != STR_SIZE)
69
- raise ArgumentError, "string is the wrong size"
70
- end
71
-
72
- if (s == MIN_STRING_VALUE)
73
- return LONG_MIN_VALUE
74
- end
75
-
76
- prefix = s[0,1]
77
- l = s[1..-1].to_i(36)
78
-
79
- if (prefix == POSITIVE_PREFIX)
80
- # nop
81
- elsif (prefix == NEGATIVE_PREFIX)
82
- l = l - LONG_MAX_VALUE - 1
83
- else
84
- raise ArgumentError, "string <" + prefix +
85
- "> does not begin with the correct prefix"
86
- end
87
-
88
- return l
89
- end
90
- end
91
- end
@@ -1,41 +0,0 @@
1
- module Ferret::Utils
2
- class Parameter
3
- def to_s() return @name end
4
-
5
- def _dump(arg)
6
- @name
7
- end
8
-
9
- def Parameter._load(var)
10
- name = var
11
- key = make_key(name)
12
- if (@@all_parameters.has_key?(key))
13
- return @@all_parameters[key]
14
- else
15
- return self.new(name)
16
- end
17
- end
18
-
19
- def hash
20
- return self.class.make_key(@name).hash
21
- end
22
-
23
- protected
24
- @@all_parameters = {}
25
-
26
- def initialize(name)
27
- @name = name
28
- key = self.class.make_key(name)
29
-
30
- if (@@all_parameters.has_key?(key))
31
- raise ArgumentError, "key already in use"
32
- end
33
-
34
- @@all_parameters[key] = self
35
- end
36
-
37
- def Parameter.make_key(name)
38
- return self.to_s + " " + name
39
- end
40
- end
41
- end
@@ -1,120 +0,0 @@
1
- module Ferret::Utils
2
- # A PriorityQueue maintains a partial ordering of its objects such that
3
- # the least object can always be found in constant time. push()'s and
4
- # pop()'s require log(size) time. The objects in this priority queue must
5
- # be Comparable
6
- class PriorityQueue
7
- attr_reader :size
8
-
9
- def less_than(a, b)
10
- a < b
11
- end
12
-
13
- # Subclass constructors must call this.
14
- def initialize(max_size)
15
- @size = 0
16
- @heap = Array.new(max_size + 1)
17
- @max_size = max_size
18
- end
19
-
20
- # Adds an Object to a PriorityQueue in log(size) time.
21
- #
22
- # If one tries to add more objects than max_size from initialize a
23
- # RuntimeException (ArrayIndexOutOfBound) is thrown.
24
- def push(object)
25
- @size += 1
26
- @heap[@size] = object
27
- up_heap()
28
- end
29
- alias :<< :push
30
-
31
- # Adds object to the PriorityQueue in log(size) time if either the
32
- # PriorityQueue is not full, or not less_than(object, top()).
33
- #
34
- # object:: the object to be inserted
35
- # return true if object is added, false otherwise.
36
- def insert(object)
37
- if(@size < @max_size)
38
- push(object)
39
- return true
40
- elsif (@size > 0 and less_than(top, object))
41
- @heap[1] = object
42
- down_heap()
43
- return true
44
- else
45
- return false
46
- end
47
- end
48
-
49
- # Returns the least object of the PriorityQueue in constant time.
50
- def top
51
- return @heap[1]
52
- end
53
-
54
- # Removes and returns the least object of the PriorityQueue in log(size)
55
- # time.
56
- def pop()
57
- if (@size > 0)
58
- result = @heap[1] # save first value
59
- @heap[1] = @heap[@size] # move last to first
60
- @heap[@size] = nil; # permit GC of objects
61
- @size -= 1
62
- down_heap() # adjust heap
63
- return result
64
- else
65
- return nil
66
- end
67
- end
68
-
69
- # Removes all entries from the PriorityQueue.
70
- def clear()
71
- (1..@size).each do |i|
72
- @heap[i] = nil
73
- end
74
- @size = 0
75
- end
76
-
77
- def put_heap
78
- puts @heap
79
- end
80
-
81
- # resets the queue after the top has been changed
82
- def adjust_top()
83
- down_heap()
84
- end
85
-
86
- private
87
-
88
- def up_heap()
89
- i = @size
90
- node = @heap[i] # save bottom node
91
- j = i >> 1
92
- while (j > 0 and less_than(node, @heap[j]))
93
- @heap[i] = @heap[j]; # shift parents down
94
- i = j
95
- j = j >> 1
96
- end
97
- @heap[i] = node; # install saved node
98
- end
99
-
100
- def down_heap()
101
- i = 1
102
- node = @heap[i] # save top node
103
- j = i << 1 # find smaller child
104
- k = j + 1
105
- if k <= @size and less_than(@heap[k], @heap[j])
106
- j = k
107
- end
108
- while (j <= @size and less_than(@heap[j], node))
109
- @heap[i] = @heap[j] # shift up child
110
- i = j
111
- j = i << 1
112
- k = j + 1
113
- if k <= @size and less_than(@heap[k], @heap[j])
114
- j = k
115
- end
116
- end
117
- @heap[i] = node; # install saved node
118
- end
119
- end
120
- end