ferret 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. data/MIT-LICENSE +20 -0
  2. data/README +109 -0
  3. data/Rakefile +275 -0
  4. data/TODO +9 -0
  5. data/TUTORIAL +197 -0
  6. data/ext/extconf.rb +3 -0
  7. data/ext/ferret.c +23 -0
  8. data/ext/ferret.h +85 -0
  9. data/ext/index_io.c +543 -0
  10. data/ext/priority_queue.c +227 -0
  11. data/ext/ram_directory.c +316 -0
  12. data/ext/segment_merge_queue.c +41 -0
  13. data/ext/string_helper.c +42 -0
  14. data/ext/tags +240 -0
  15. data/ext/term.c +261 -0
  16. data/ext/term_buffer.c +299 -0
  17. data/ext/util.c +12 -0
  18. data/lib/ferret.rb +41 -0
  19. data/lib/ferret/analysis.rb +11 -0
  20. data/lib/ferret/analysis/analyzers.rb +93 -0
  21. data/lib/ferret/analysis/standard_tokenizer.rb +65 -0
  22. data/lib/ferret/analysis/token.rb +79 -0
  23. data/lib/ferret/analysis/token_filters.rb +86 -0
  24. data/lib/ferret/analysis/token_stream.rb +26 -0
  25. data/lib/ferret/analysis/tokenizers.rb +107 -0
  26. data/lib/ferret/analysis/word_list_loader.rb +27 -0
  27. data/lib/ferret/document.rb +2 -0
  28. data/lib/ferret/document/document.rb +152 -0
  29. data/lib/ferret/document/field.rb +304 -0
  30. data/lib/ferret/index.rb +26 -0
  31. data/lib/ferret/index/compound_file_io.rb +343 -0
  32. data/lib/ferret/index/document_writer.rb +288 -0
  33. data/lib/ferret/index/field_infos.rb +259 -0
  34. data/lib/ferret/index/fields_io.rb +175 -0
  35. data/lib/ferret/index/index.rb +228 -0
  36. data/lib/ferret/index/index_file_names.rb +33 -0
  37. data/lib/ferret/index/index_reader.rb +462 -0
  38. data/lib/ferret/index/index_writer.rb +488 -0
  39. data/lib/ferret/index/multi_reader.rb +363 -0
  40. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +105 -0
  41. data/lib/ferret/index/segment_infos.rb +130 -0
  42. data/lib/ferret/index/segment_merge_info.rb +47 -0
  43. data/lib/ferret/index/segment_merge_queue.rb +16 -0
  44. data/lib/ferret/index/segment_merger.rb +337 -0
  45. data/lib/ferret/index/segment_reader.rb +380 -0
  46. data/lib/ferret/index/segment_term_enum.rb +178 -0
  47. data/lib/ferret/index/segment_term_vector.rb +58 -0
  48. data/lib/ferret/index/term.rb +49 -0
  49. data/lib/ferret/index/term_buffer.rb +88 -0
  50. data/lib/ferret/index/term_doc_enum.rb +283 -0
  51. data/lib/ferret/index/term_enum.rb +52 -0
  52. data/lib/ferret/index/term_info.rb +41 -0
  53. data/lib/ferret/index/term_infos_io.rb +312 -0
  54. data/lib/ferret/index/term_vector_offset_info.rb +20 -0
  55. data/lib/ferret/index/term_vectors_io.rb +552 -0
  56. data/lib/ferret/query_parser.rb +274 -0
  57. data/lib/ferret/query_parser/query_parser.tab.rb +819 -0
  58. data/lib/ferret/search.rb +49 -0
  59. data/lib/ferret/search/boolean_clause.rb +100 -0
  60. data/lib/ferret/search/boolean_query.rb +303 -0
  61. data/lib/ferret/search/boolean_scorer.rb +294 -0
  62. data/lib/ferret/search/caching_wrapper_filter.rb +40 -0
  63. data/lib/ferret/search/conjunction_scorer.rb +99 -0
  64. data/lib/ferret/search/disjunction_sum_scorer.rb +203 -0
  65. data/lib/ferret/search/exact_phrase_scorer.rb +32 -0
  66. data/lib/ferret/search/explanation.rb +41 -0
  67. data/lib/ferret/search/field_cache.rb +216 -0
  68. data/lib/ferret/search/field_doc.rb +31 -0
  69. data/lib/ferret/search/field_sorted_hit_queue.rb +184 -0
  70. data/lib/ferret/search/filter.rb +11 -0
  71. data/lib/ferret/search/filtered_query.rb +130 -0
  72. data/lib/ferret/search/filtered_term_enum.rb +79 -0
  73. data/lib/ferret/search/fuzzy_query.rb +153 -0
  74. data/lib/ferret/search/fuzzy_term_enum.rb +244 -0
  75. data/lib/ferret/search/hit_collector.rb +34 -0
  76. data/lib/ferret/search/hit_queue.rb +11 -0
  77. data/lib/ferret/search/index_searcher.rb +173 -0
  78. data/lib/ferret/search/match_all_docs_query.rb +104 -0
  79. data/lib/ferret/search/multi_phrase_query.rb +204 -0
  80. data/lib/ferret/search/multi_term_query.rb +65 -0
  81. data/lib/ferret/search/non_matching_scorer.rb +22 -0
  82. data/lib/ferret/search/phrase_positions.rb +55 -0
  83. data/lib/ferret/search/phrase_query.rb +217 -0
  84. data/lib/ferret/search/phrase_scorer.rb +153 -0
  85. data/lib/ferret/search/prefix_query.rb +47 -0
  86. data/lib/ferret/search/query.rb +111 -0
  87. data/lib/ferret/search/query_filter.rb +51 -0
  88. data/lib/ferret/search/range_filter.rb +103 -0
  89. data/lib/ferret/search/range_query.rb +139 -0
  90. data/lib/ferret/search/req_excl_scorer.rb +125 -0
  91. data/lib/ferret/search/req_opt_sum_scorer.rb +70 -0
  92. data/lib/ferret/search/score_doc.rb +38 -0
  93. data/lib/ferret/search/score_doc_comparator.rb +114 -0
  94. data/lib/ferret/search/scorer.rb +91 -0
  95. data/lib/ferret/search/similarity.rb +278 -0
  96. data/lib/ferret/search/sloppy_phrase_scorer.rb +47 -0
  97. data/lib/ferret/search/sort.rb +105 -0
  98. data/lib/ferret/search/sort_comparator.rb +60 -0
  99. data/lib/ferret/search/sort_field.rb +87 -0
  100. data/lib/ferret/search/spans.rb +12 -0
  101. data/lib/ferret/search/spans/near_spans_enum.rb +304 -0
  102. data/lib/ferret/search/spans/span_first_query.rb +79 -0
  103. data/lib/ferret/search/spans/span_near_query.rb +108 -0
  104. data/lib/ferret/search/spans/span_not_query.rb +130 -0
  105. data/lib/ferret/search/spans/span_or_query.rb +176 -0
  106. data/lib/ferret/search/spans/span_query.rb +25 -0
  107. data/lib/ferret/search/spans/span_scorer.rb +74 -0
  108. data/lib/ferret/search/spans/span_term_query.rb +105 -0
  109. data/lib/ferret/search/spans/span_weight.rb +84 -0
  110. data/lib/ferret/search/spans/spans_enum.rb +44 -0
  111. data/lib/ferret/search/term_query.rb +128 -0
  112. data/lib/ferret/search/term_scorer.rb +181 -0
  113. data/lib/ferret/search/top_docs.rb +24 -0
  114. data/lib/ferret/search/top_field_docs.rb +17 -0
  115. data/lib/ferret/search/weight.rb +54 -0
  116. data/lib/ferret/search/wildcard_query.rb +26 -0
  117. data/lib/ferret/search/wildcard_term_enum.rb +61 -0
  118. data/lib/ferret/stemmers.rb +1 -0
  119. data/lib/ferret/stemmers/porter_stemmer.rb +218 -0
  120. data/lib/ferret/store.rb +5 -0
  121. data/lib/ferret/store/buffered_index_io.rb +191 -0
  122. data/lib/ferret/store/directory.rb +139 -0
  123. data/lib/ferret/store/fs_store.rb +338 -0
  124. data/lib/ferret/store/index_io.rb +259 -0
  125. data/lib/ferret/store/ram_store.rb +282 -0
  126. data/lib/ferret/utils.rb +7 -0
  127. data/lib/ferret/utils/bit_vector.rb +105 -0
  128. data/lib/ferret/utils/date_tools.rb +138 -0
  129. data/lib/ferret/utils/number_tools.rb +91 -0
  130. data/lib/ferret/utils/parameter.rb +41 -0
  131. data/lib/ferret/utils/priority_queue.rb +120 -0
  132. data/lib/ferret/utils/string_helper.rb +47 -0
  133. data/lib/ferret/utils/weak_key_hash.rb +51 -0
  134. data/rake_utils/code_statistics.rb +106 -0
  135. data/setup.rb +1551 -0
  136. data/test/benchmark/tb_ram_store.rb +76 -0
  137. data/test/benchmark/tb_rw_vint.rb +26 -0
  138. data/test/longrunning/tc_numbertools.rb +60 -0
  139. data/test/longrunning/tm_store.rb +19 -0
  140. data/test/test_all.rb +9 -0
  141. data/test/test_helper.rb +6 -0
  142. data/test/unit/analysis/tc_analyzer.rb +21 -0
  143. data/test/unit/analysis/tc_letter_tokenizer.rb +20 -0
  144. data/test/unit/analysis/tc_lower_case_filter.rb +20 -0
  145. data/test/unit/analysis/tc_lower_case_tokenizer.rb +27 -0
  146. data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +39 -0
  147. data/test/unit/analysis/tc_porter_stem_filter.rb +16 -0
  148. data/test/unit/analysis/tc_standard_analyzer.rb +20 -0
  149. data/test/unit/analysis/tc_standard_tokenizer.rb +20 -0
  150. data/test/unit/analysis/tc_stop_analyzer.rb +20 -0
  151. data/test/unit/analysis/tc_stop_filter.rb +14 -0
  152. data/test/unit/analysis/tc_white_space_analyzer.rb +21 -0
  153. data/test/unit/analysis/tc_white_space_tokenizer.rb +20 -0
  154. data/test/unit/analysis/tc_word_list_loader.rb +32 -0
  155. data/test/unit/document/tc_document.rb +47 -0
  156. data/test/unit/document/tc_field.rb +80 -0
  157. data/test/unit/index/tc_compound_file_io.rb +107 -0
  158. data/test/unit/index/tc_field_infos.rb +119 -0
  159. data/test/unit/index/tc_fields_io.rb +167 -0
  160. data/test/unit/index/tc_index.rb +140 -0
  161. data/test/unit/index/tc_index_reader.rb +622 -0
  162. data/test/unit/index/tc_index_writer.rb +57 -0
  163. data/test/unit/index/tc_multiple_term_doc_pos_enum.rb +80 -0
  164. data/test/unit/index/tc_segment_infos.rb +74 -0
  165. data/test/unit/index/tc_segment_term_docs.rb +17 -0
  166. data/test/unit/index/tc_segment_term_enum.rb +60 -0
  167. data/test/unit/index/tc_segment_term_vector.rb +71 -0
  168. data/test/unit/index/tc_term.rb +22 -0
  169. data/test/unit/index/tc_term_buffer.rb +57 -0
  170. data/test/unit/index/tc_term_info.rb +19 -0
  171. data/test/unit/index/tc_term_infos_io.rb +192 -0
  172. data/test/unit/index/tc_term_vector_offset_info.rb +18 -0
  173. data/test/unit/index/tc_term_vectors_io.rb +108 -0
  174. data/test/unit/index/th_doc.rb +244 -0
  175. data/test/unit/query_parser/tc_query_parser.rb +84 -0
  176. data/test/unit/search/tc_filter.rb +113 -0
  177. data/test/unit/search/tc_fuzzy_query.rb +136 -0
  178. data/test/unit/search/tc_index_searcher.rb +188 -0
  179. data/test/unit/search/tc_search_and_sort.rb +98 -0
  180. data/test/unit/search/tc_similarity.rb +37 -0
  181. data/test/unit/search/tc_sort.rb +48 -0
  182. data/test/unit/search/tc_sort_field.rb +27 -0
  183. data/test/unit/search/tc_spans.rb +153 -0
  184. data/test/unit/store/tc_fs_store.rb +84 -0
  185. data/test/unit/store/tc_ram_store.rb +35 -0
  186. data/test/unit/store/tm_store.rb +180 -0
  187. data/test/unit/store/tm_store_lock.rb +68 -0
  188. data/test/unit/ts_analysis.rb +16 -0
  189. data/test/unit/ts_document.rb +4 -0
  190. data/test/unit/ts_index.rb +18 -0
  191. data/test/unit/ts_query_parser.rb +3 -0
  192. data/test/unit/ts_search.rb +10 -0
  193. data/test/unit/ts_store.rb +6 -0
  194. data/test/unit/ts_utils.rb +10 -0
  195. data/test/unit/utils/tc_bit_vector.rb +65 -0
  196. data/test/unit/utils/tc_date_tools.rb +50 -0
  197. data/test/unit/utils/tc_number_tools.rb +59 -0
  198. data/test/unit/utils/tc_parameter.rb +40 -0
  199. data/test/unit/utils/tc_priority_queue.rb +62 -0
  200. data/test/unit/utils/tc_string_helper.rb +21 -0
  201. data/test/unit/utils/tc_weak_key_hash.rb +25 -0
  202. metadata +251 -0
@@ -0,0 +1,7 @@
1
+ require 'ferret/utils/string_helper'
2
+ require 'ferret/utils/parameter'
3
+ require 'ferret/utils/priority_queue'
4
+ require 'ferret/utils/bit_vector'
5
+ require 'ferret/utils/date_tools'
6
+ require 'ferret/utils/number_tools'
7
+ require 'ferret/utils/weak_key_hash'
@@ -0,0 +1,105 @@
1
+ module Ferret::Utils
2
+ # Optimized implementation of a vector of bits.
3
+ #
4
+ # * a count() method, which efficiently computes the number of one bits
5
+ # * optimized read from and write to disk
6
+ # * inlinable get() method
7
+ class BitVector
8
+ attr_reader :size
9
+ attr_accessor :bits
10
+
11
+ def initialize
12
+ @bits = 0
13
+ @count = -1
14
+ end
15
+
16
+ # Sets the value of _bit_ to one.
17
+ def set(bit)
18
+ @bits |= 1 << bit
19
+ @count = -1
20
+ end
21
+
22
+ # Sets the value of _bit_ to zero.
23
+ def clear(bit)
24
+ @bits &= ~(1 << bit)
25
+ @count = -1
26
+ end
27
+
28
+ # Returns _true_ if _bit_ is one and
29
+ # _false_ if it is zero.
30
+ def get(bit)
31
+ return (@bits & (1 << bit)) != 0
32
+ end
33
+ alias :[] :get
34
+
35
+ # Returns the total number of one bits in this vector. This is
36
+ # efficiently computed and cached, so that, if the vector is not
37
+ # changed, no recomputation is done for repeated calls.
38
+ def count()
39
+ # if the vector has been modified
40
+ if (@count == -1)
41
+ c = 0
42
+ tmp = @bits
43
+ while tmp > 0
44
+ c += BYTE_COUNTS[tmp & 0xFF] # sum bits per byte
45
+ tmp >>= 8
46
+ end
47
+ @count = c
48
+ end
49
+ return @count
50
+ end
51
+
52
+ BYTE_COUNTS = [ # table of bits/byte
53
+ 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
54
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
55
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
56
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
57
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
58
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
59
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
60
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
61
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
62
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
63
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
64
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
65
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
66
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
67
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
68
+ 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
69
+ ]
70
+
71
+
72
+ # Writes this vector to the file _name_ in Directory _d_, in a format
73
+ # that can be read by the constructor
74
+ def write(d, name)
75
+ output = d.create_output(name)
76
+ begin
77
+ output.write_vint(@bits)
78
+ ensure
79
+ output.close()
80
+ end
81
+ end
82
+
83
+ # Constructs a bit vector from the file _name_ in Directory _d_, as
84
+ # written by the @link #writeendmethod.
85
+ def BitVector.read(d, name)
86
+ bv = BitVector.new
87
+ input = d.open_input(name)
88
+ begin
89
+ bv.bits = input.read_vint()
90
+ ensure
91
+ input.close()
92
+ end
93
+ return bv
94
+ end
95
+
96
+ def to_s
97
+ i = @bits
98
+ while i > 0
99
+ print(i&1)
100
+ i >>= 1
101
+ end
102
+ puts ""
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,138 @@
1
+ require 'date'
2
+ module Ferret::Utils
3
+ # Provides support for converting dates to strings and vice-versa. The
4
+ # strings are structured so that lexicographic sorting orders them by
5
+ # date, which makes them suitable for use as field values and search
6
+ # terms.
7
+ #
8
+ # This class also helps you to limit the resolution of your dates. Do not
9
+ # save dates with a finer resolution than you really need, as then
10
+ # RangeQuery and PrefixQuery will require more memory and become slower.
11
+ #
12
+ # Compared to the serialize methods the strings generated by the to_s
13
+ # methods in this class take slightly more space, unless your selected
14
+ # resolution is set to _Resolution.DAY_ or lower.
15
+
16
+ # Provides support for converting dates to strings and vice-versa. The
17
+ # strings are structured so that lexicographic sorting orders by date,
18
+ # which makes them suitable for use as field values and search terms.
19
+ #
20
+ # Note:: dates before 1970 cannot be used, and therefore cannot be indexed
21
+ # when using this class.
22
+ module DateTools
23
+ # make date strings long enough to last a millenium
24
+ SERIALIZED_DATE_LEN = (1000*365*24*60*60*1000).to_s(36).length
25
+
26
+ # The latest date that can be stored in this format
27
+ MAX_SERIALIZED_DATE_STRING = Array.new(SERIALIZED_DATE_LEN, "z").to_s.to_i(36)
28
+
29
+ # Converts a Date to a string suitable for indexing. Throws Exception
30
+ # if the date specified in the method argument is before 1970 This
31
+ # method is unsupported. Please use Time instead of Date
32
+ def DateTools.serialize_date(date)
33
+ return serialize_time(Time.parse(date))
34
+ end
35
+
36
+ # Converts a millisecond time to a string suitable for indexing.
37
+ # Accepts a Time object or a time in milliseconds.
38
+ #
39
+ # Throws Exception if the time specified in the method argument is
40
+ # negative, that is, before 1970 It is recommended that you store the
41
+ # date as a string if you don't need the time to the nearest
42
+ # millisecond. That makes things a lot easier.
43
+ def DateTools.serialize_time(time)
44
+ if time.instance_of?(Time) then time = time.to_i end
45
+
46
+ if (time < 0) then raise("time too early") end
47
+
48
+ # convert to milliseconds before serialization
49
+ s = (time*1000).to_s(36)
50
+
51
+ if (s.length() > SERIALIZED_DATE_LEN) then raise("time too late") end
52
+
53
+ # pad to 16 charactors
54
+ s = "0" + s while (s.length() < SERIALIZED_DATE_LEN)
55
+
56
+ return s
57
+ end
58
+
59
+ # The earliest date that can be stored in this format.
60
+ MIN_SERIALIZED_DATE_STRING = DateTools.serialize_time(0)
61
+
62
+ # Converts a string-encoded date into a millisecond time.
63
+ def DateTools.deserialize_time(s)
64
+ # remember to convert back to seconds
65
+ return Time.at(s.to_i(36)/1000)
66
+ end
67
+
68
+ def DateTools.date_to_s(date, resolution = Resolution::MILLISECOND)
69
+ return time_to_s(Time.parse(date), resolution)
70
+ end
71
+
72
+
73
+ # Converts a millisecond time to a string suitable for indexing.
74
+ #
75
+ # time:: the date expressed as milliseconds since January 1, 1970,
76
+ # 00:00:00 GMT resolution:: the desired resolution, see
77
+ # #round(long, DateTools.Resolution)
78
+ # return:: a string in format _%Y%m%d%H%M%SSSS_ or shorter,
79
+ # depending on _resolution_
80
+ def DateTools.time_to_s(time, resolution = Resolution::MILLISECOND)
81
+ if time.instance_of?(Date) then time = Time.parse(time) end
82
+ suffix = ""
83
+ if (resolution == Resolution::MILLISECOND)
84
+ # the suffix is the number of milliseconds if needed.
85
+ suffix = ((time.to_f-time.to_f.floor)*1000).round.to_s
86
+ end
87
+ return time.strftime(resolution.format) + suffix
88
+ end
89
+
90
+ # Converts a string produced by _time_to_s_ or _date_to_s_ back to a
91
+ # time, represented as the number of milliseconds since January 1, 1970,
92
+ # 00:00:00 GMT.
93
+ #
94
+ # str:: the date string to be converted
95
+ # return:: the number of milliseconds since January 1, 1970, 00:00:00GMT
96
+ def DateTools.s_to_time(str)
97
+ year = str.size >= 4 ? str[ 0.. 3].to_i : nil
98
+ month = str.size >= 6 ? str[ 4.. 5].to_i : nil
99
+ day = str.size >= 8 ? str[ 6.. 7].to_i : nil
100
+ hour = str.size >= 10 ? str[ 8.. 9].to_i : nil
101
+ minute = str.size >= 12 ? str[10..11].to_i : nil
102
+ second = str.size >= 14 ? str[12..13].to_i : nil
103
+ microsecond = str.size >= 17 ? str[14..17].to_i*1000 : nil
104
+ return Time.mktime(year, month, day, hour, minute, second, microsecond)
105
+ end
106
+
107
+ # Limit a date's resolution. For example, the date _2004-09-21 13:50:11_
108
+ # will be changed to _2004-09-01 00:00:00_ when using
109
+ # _Resolution.MONTH_.
110
+ #
111
+ # resolution:: The desired resolution of the date to be returned
112
+ # return:: the date with all values more precise than _resolution_
113
+ # set to 0 or 1
114
+ def DateTools.round(time, resolution)
115
+ return s_to_time(time_to_s(time, resolution))
116
+ end
117
+
118
+ class Resolution < Parameter
119
+ attr_accessor :format
120
+
121
+ private :initialize
122
+
123
+ def initialize(name, format)
124
+ super(name)
125
+ @format = format
126
+ end
127
+
128
+ YEAR = Resolution.new("year", "%Y")
129
+ MONTH = Resolution.new("month", "%Y%m")
130
+ DAY = Resolution.new("day", "%Y%m%d")
131
+ HOUR = Resolution.new("hour", "%Y%m%d%H")
132
+ MINUTE = Resolution.new("minute", "%Y%m%d%H%M")
133
+ SECOND = Resolution.new("second", "%Y%m%d%H%M%S")
134
+ MILLISECOND = Resolution.new("millisecond", "%Y%m%d%H%M%S")
135
+
136
+ end
137
+ end
138
+ end
@@ -0,0 +1,91 @@
1
+ class Float
2
+ def =~(o)
3
+ return (1 - self/o).abs < 0.0000000001
4
+ end
5
+ end
6
+
7
+ module Ferret::Utils
8
+ # Provides support for converting longs to Strings, and back again. The
9
+ # strings are structured so that lexicographic sorting order is preserved.
10
+ #
11
+ # That is, if long1 is less than long2 for any two longs long1 and long2,
12
+ # then NumberTools.long_to_s(long1) is lexicographically less than
13
+ # NumberTools.long_to_s(long2). (Similarly for "greater than" and "equals".)
14
+ #
15
+ # This class handles all long values
16
+ module NumberTools
17
+ RADIX = 36
18
+ NEGATIVE_PREFIX = '-'
19
+
20
+ # NB: NEGATIVE_PREFIX must be < POSITIVE_PREFIX
21
+ POSITIVE_PREFIX = '0'
22
+
23
+ # The following constants are from Java
24
+ LONG_MAX_VALUE = 9223372036854775807
25
+ LONG_MIN_VALUE = -9223372036854775808
26
+
27
+ # NB: This function is used to match the java equivalent. Actually
28
+ # ruby allows much larger numbers than Java so this is just so that we
29
+ # can read the Java Lucene created indexes.
30
+ MIN_STRING_VALUE = NEGATIVE_PREFIX + "0000000000000"
31
+ MAX_STRING_VALUE = POSITIVE_PREFIX + "1y2p0ij32e8e7"
32
+
33
+ # The length of the long field
34
+ STR_SIZE = MIN_STRING_VALUE.length()
35
+
36
+ # Converts a long to a String suitable for indexing.
37
+ def NumberTools.long_to_s(l)
38
+ if (l == LONG_MIN_VALUE)
39
+ # special case, because long is not symetric around zero
40
+ return MIN_STRING_VALUE;
41
+ end
42
+
43
+ s = ""
44
+ if (l < 0)
45
+ s << NEGATIVE_PREFIX
46
+ l = LONG_MAX_VALUE + l + 1
47
+ else
48
+ s << POSITIVE_PREFIX
49
+ end
50
+ num = l.to_s(RADIX)
51
+
52
+ pad_len = STR_SIZE - num.length() - s.length()
53
+ while ((pad_len -= 1) >= 0)
54
+ s << '0'
55
+ end
56
+ s << num
57
+
58
+ return s
59
+ end
60
+
61
+ # Converts a String that was returned by #long_to_s back to a long.
62
+ #
63
+ # Throws:: ArgumentError if the input is nil
64
+ def NumberTools.s_to_long(s)
65
+ if (s == nil)
66
+ raise ArgumentError, "string cannot be nil"
67
+ end
68
+ if (s.length() != STR_SIZE)
69
+ raise ArgumentError, "string is the wrong size"
70
+ end
71
+
72
+ if (s == MIN_STRING_VALUE)
73
+ return LONG_MIN_VALUE
74
+ end
75
+
76
+ prefix = s[0,1]
77
+ l = s[1..-1].to_i(36)
78
+
79
+ if (prefix == POSITIVE_PREFIX)
80
+ # nop
81
+ elsif (prefix == NEGATIVE_PREFIX)
82
+ l = l - LONG_MAX_VALUE - 1
83
+ else
84
+ raise ArgumentError, "string <" + prefix +
85
+ "> does not begin with the correct prefix"
86
+ end
87
+
88
+ return l
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,41 @@
1
+ module Ferret::Utils
2
+ class Parameter
3
+ def to_s() return @name end
4
+
5
+ def _dump(arg)
6
+ @name
7
+ end
8
+
9
+ def Parameter._load(var)
10
+ name = var
11
+ key = make_key(name)
12
+ if (@@all_parameters.has_key?(key))
13
+ return @@all_parameters[key]
14
+ else
15
+ return self.new(name)
16
+ end
17
+ end
18
+
19
+ def hash
20
+ return self.class.make_key(@name).hash
21
+ end
22
+
23
+ protected
24
+ @@all_parameters = {}
25
+
26
+ def initialize(name)
27
+ @name = name
28
+ key = self.class.make_key(name)
29
+
30
+ if (@@all_parameters.has_key?(key))
31
+ raise ArgumentError, "key already in use"
32
+ end
33
+
34
+ @@all_parameters[key] = self
35
+ end
36
+
37
+ def Parameter.make_key(name)
38
+ return self.to_s + " " + name
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,120 @@
1
+ module Ferret::Utils
2
+ # A PriorityQueue maintains a partial ordering of its objects such that
3
+ # the least object can always be found in constant time. push()'s and
4
+ # pop()'s require log(size) time. The objects in this priority queue must
5
+ # be Comparable
6
+ class PriorityQueue
7
+ attr_reader :size
8
+
9
+ def less_than(a, b)
10
+ a < b
11
+ end
12
+
13
+ # Subclass constructors must call this.
14
+ def initialize(max_size)
15
+ @size = 0
16
+ @heap = Array.new(max_size + 1)
17
+ @max_size = max_size
18
+ end
19
+
20
+ # Adds an Object to a PriorityQueue in log(size) time.
21
+ #
22
+ # If one tries to add more objects than max_size from initialize a
23
+ # RuntimeException (ArrayIndexOutOfBound) is thrown.
24
+ def push(object)
25
+ @size += 1
26
+ @heap[@size] = object
27
+ up_heap()
28
+ end
29
+ alias :<< :push
30
+
31
+ # Adds object to the PriorityQueue in log(size) time if either the
32
+ # PriorityQueue is not full, or not less_than(object, top()).
33
+ #
34
+ # object:: the object to be inserted
35
+ # return true if object is added, false otherwise.
36
+ def insert(object)
37
+ if(@size < @max_size)
38
+ push(object)
39
+ return true
40
+ elsif (@size > 0 and less_than(top, object))
41
+ @heap[1] = object
42
+ down_heap()
43
+ return true
44
+ else
45
+ return false
46
+ end
47
+ end
48
+
49
+ # Returns the least object of the PriorityQueue in constant time.
50
+ def top
51
+ return @heap[1]
52
+ end
53
+
54
+ # Removes and returns the least object of the PriorityQueue in log(size)
55
+ # time.
56
+ def pop()
57
+ if (@size > 0)
58
+ result = @heap[1] # save first value
59
+ @heap[1] = @heap[@size] # move last to first
60
+ @heap[@size] = nil; # permit GC of objects
61
+ @size -= 1
62
+ down_heap() # adjust heap
63
+ return result
64
+ else
65
+ return nil
66
+ end
67
+ end
68
+
69
+ # Removes all entries from the PriorityQueue.
70
+ def clear()
71
+ (1..@size).each do |i|
72
+ @heap[i] = nil
73
+ end
74
+ @size = 0
75
+ end
76
+
77
+ def put_heap
78
+ puts @heap
79
+ end
80
+
81
+ # resets the queue after the top has been changed
82
+ def adjust_top()
83
+ down_heap()
84
+ end
85
+
86
+ private
87
+
88
+ def up_heap()
89
+ i = @size
90
+ node = @heap[i] # save bottom node
91
+ j = i >> 1
92
+ while (j > 0 and less_than(node, @heap[j]))
93
+ @heap[i] = @heap[j]; # shift parents down
94
+ i = j
95
+ j = j >> 1
96
+ end
97
+ @heap[i] = node; # install saved node
98
+ end
99
+
100
+ def down_heap()
101
+ i = 1
102
+ node = @heap[i] # save top node
103
+ j = i << 1 # find smaller child
104
+ k = j + 1
105
+ if k <= @size and less_than(@heap[k], @heap[j])
106
+ j = k
107
+ end
108
+ while (j <= @size and less_than(@heap[j], node))
109
+ @heap[i] = @heap[j] # shift up child
110
+ i = j
111
+ j = i << 1
112
+ k = j + 1
113
+ if k <= @size and less_than(@heap[k], @heap[j])
114
+ j = k
115
+ end
116
+ end
117
+ @heap[i] = node; # install saved node
118
+ end
119
+ end
120
+ end