ferret 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (202) hide show
  1. data/MIT-LICENSE +20 -0
  2. data/README +109 -0
  3. data/Rakefile +275 -0
  4. data/TODO +9 -0
  5. data/TUTORIAL +197 -0
  6. data/ext/extconf.rb +3 -0
  7. data/ext/ferret.c +23 -0
  8. data/ext/ferret.h +85 -0
  9. data/ext/index_io.c +543 -0
  10. data/ext/priority_queue.c +227 -0
  11. data/ext/ram_directory.c +316 -0
  12. data/ext/segment_merge_queue.c +41 -0
  13. data/ext/string_helper.c +42 -0
  14. data/ext/tags +240 -0
  15. data/ext/term.c +261 -0
  16. data/ext/term_buffer.c +299 -0
  17. data/ext/util.c +12 -0
  18. data/lib/ferret.rb +41 -0
  19. data/lib/ferret/analysis.rb +11 -0
  20. data/lib/ferret/analysis/analyzers.rb +93 -0
  21. data/lib/ferret/analysis/standard_tokenizer.rb +65 -0
  22. data/lib/ferret/analysis/token.rb +79 -0
  23. data/lib/ferret/analysis/token_filters.rb +86 -0
  24. data/lib/ferret/analysis/token_stream.rb +26 -0
  25. data/lib/ferret/analysis/tokenizers.rb +107 -0
  26. data/lib/ferret/analysis/word_list_loader.rb +27 -0
  27. data/lib/ferret/document.rb +2 -0
  28. data/lib/ferret/document/document.rb +152 -0
  29. data/lib/ferret/document/field.rb +304 -0
  30. data/lib/ferret/index.rb +26 -0
  31. data/lib/ferret/index/compound_file_io.rb +343 -0
  32. data/lib/ferret/index/document_writer.rb +288 -0
  33. data/lib/ferret/index/field_infos.rb +259 -0
  34. data/lib/ferret/index/fields_io.rb +175 -0
  35. data/lib/ferret/index/index.rb +228 -0
  36. data/lib/ferret/index/index_file_names.rb +33 -0
  37. data/lib/ferret/index/index_reader.rb +462 -0
  38. data/lib/ferret/index/index_writer.rb +488 -0
  39. data/lib/ferret/index/multi_reader.rb +363 -0
  40. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +105 -0
  41. data/lib/ferret/index/segment_infos.rb +130 -0
  42. data/lib/ferret/index/segment_merge_info.rb +47 -0
  43. data/lib/ferret/index/segment_merge_queue.rb +16 -0
  44. data/lib/ferret/index/segment_merger.rb +337 -0
  45. data/lib/ferret/index/segment_reader.rb +380 -0
  46. data/lib/ferret/index/segment_term_enum.rb +178 -0
  47. data/lib/ferret/index/segment_term_vector.rb +58 -0
  48. data/lib/ferret/index/term.rb +49 -0
  49. data/lib/ferret/index/term_buffer.rb +88 -0
  50. data/lib/ferret/index/term_doc_enum.rb +283 -0
  51. data/lib/ferret/index/term_enum.rb +52 -0
  52. data/lib/ferret/index/term_info.rb +41 -0
  53. data/lib/ferret/index/term_infos_io.rb +312 -0
  54. data/lib/ferret/index/term_vector_offset_info.rb +20 -0
  55. data/lib/ferret/index/term_vectors_io.rb +552 -0
  56. data/lib/ferret/query_parser.rb +274 -0
  57. data/lib/ferret/query_parser/query_parser.tab.rb +819 -0
  58. data/lib/ferret/search.rb +49 -0
  59. data/lib/ferret/search/boolean_clause.rb +100 -0
  60. data/lib/ferret/search/boolean_query.rb +303 -0
  61. data/lib/ferret/search/boolean_scorer.rb +294 -0
  62. data/lib/ferret/search/caching_wrapper_filter.rb +40 -0
  63. data/lib/ferret/search/conjunction_scorer.rb +99 -0
  64. data/lib/ferret/search/disjunction_sum_scorer.rb +203 -0
  65. data/lib/ferret/search/exact_phrase_scorer.rb +32 -0
  66. data/lib/ferret/search/explanation.rb +41 -0
  67. data/lib/ferret/search/field_cache.rb +216 -0
  68. data/lib/ferret/search/field_doc.rb +31 -0
  69. data/lib/ferret/search/field_sorted_hit_queue.rb +184 -0
  70. data/lib/ferret/search/filter.rb +11 -0
  71. data/lib/ferret/search/filtered_query.rb +130 -0
  72. data/lib/ferret/search/filtered_term_enum.rb +79 -0
  73. data/lib/ferret/search/fuzzy_query.rb +153 -0
  74. data/lib/ferret/search/fuzzy_term_enum.rb +244 -0
  75. data/lib/ferret/search/hit_collector.rb +34 -0
  76. data/lib/ferret/search/hit_queue.rb +11 -0
  77. data/lib/ferret/search/index_searcher.rb +173 -0
  78. data/lib/ferret/search/match_all_docs_query.rb +104 -0
  79. data/lib/ferret/search/multi_phrase_query.rb +204 -0
  80. data/lib/ferret/search/multi_term_query.rb +65 -0
  81. data/lib/ferret/search/non_matching_scorer.rb +22 -0
  82. data/lib/ferret/search/phrase_positions.rb +55 -0
  83. data/lib/ferret/search/phrase_query.rb +217 -0
  84. data/lib/ferret/search/phrase_scorer.rb +153 -0
  85. data/lib/ferret/search/prefix_query.rb +47 -0
  86. data/lib/ferret/search/query.rb +111 -0
  87. data/lib/ferret/search/query_filter.rb +51 -0
  88. data/lib/ferret/search/range_filter.rb +103 -0
  89. data/lib/ferret/search/range_query.rb +139 -0
  90. data/lib/ferret/search/req_excl_scorer.rb +125 -0
  91. data/lib/ferret/search/req_opt_sum_scorer.rb +70 -0
  92. data/lib/ferret/search/score_doc.rb +38 -0
  93. data/lib/ferret/search/score_doc_comparator.rb +114 -0
  94. data/lib/ferret/search/scorer.rb +91 -0
  95. data/lib/ferret/search/similarity.rb +278 -0
  96. data/lib/ferret/search/sloppy_phrase_scorer.rb +47 -0
  97. data/lib/ferret/search/sort.rb +105 -0
  98. data/lib/ferret/search/sort_comparator.rb +60 -0
  99. data/lib/ferret/search/sort_field.rb +87 -0
  100. data/lib/ferret/search/spans.rb +12 -0
  101. data/lib/ferret/search/spans/near_spans_enum.rb +304 -0
  102. data/lib/ferret/search/spans/span_first_query.rb +79 -0
  103. data/lib/ferret/search/spans/span_near_query.rb +108 -0
  104. data/lib/ferret/search/spans/span_not_query.rb +130 -0
  105. data/lib/ferret/search/spans/span_or_query.rb +176 -0
  106. data/lib/ferret/search/spans/span_query.rb +25 -0
  107. data/lib/ferret/search/spans/span_scorer.rb +74 -0
  108. data/lib/ferret/search/spans/span_term_query.rb +105 -0
  109. data/lib/ferret/search/spans/span_weight.rb +84 -0
  110. data/lib/ferret/search/spans/spans_enum.rb +44 -0
  111. data/lib/ferret/search/term_query.rb +128 -0
  112. data/lib/ferret/search/term_scorer.rb +181 -0
  113. data/lib/ferret/search/top_docs.rb +24 -0
  114. data/lib/ferret/search/top_field_docs.rb +17 -0
  115. data/lib/ferret/search/weight.rb +54 -0
  116. data/lib/ferret/search/wildcard_query.rb +26 -0
  117. data/lib/ferret/search/wildcard_term_enum.rb +61 -0
  118. data/lib/ferret/stemmers.rb +1 -0
  119. data/lib/ferret/stemmers/porter_stemmer.rb +218 -0
  120. data/lib/ferret/store.rb +5 -0
  121. data/lib/ferret/store/buffered_index_io.rb +191 -0
  122. data/lib/ferret/store/directory.rb +139 -0
  123. data/lib/ferret/store/fs_store.rb +338 -0
  124. data/lib/ferret/store/index_io.rb +259 -0
  125. data/lib/ferret/store/ram_store.rb +282 -0
  126. data/lib/ferret/utils.rb +7 -0
  127. data/lib/ferret/utils/bit_vector.rb +105 -0
  128. data/lib/ferret/utils/date_tools.rb +138 -0
  129. data/lib/ferret/utils/number_tools.rb +91 -0
  130. data/lib/ferret/utils/parameter.rb +41 -0
  131. data/lib/ferret/utils/priority_queue.rb +120 -0
  132. data/lib/ferret/utils/string_helper.rb +47 -0
  133. data/lib/ferret/utils/weak_key_hash.rb +51 -0
  134. data/rake_utils/code_statistics.rb +106 -0
  135. data/setup.rb +1551 -0
  136. data/test/benchmark/tb_ram_store.rb +76 -0
  137. data/test/benchmark/tb_rw_vint.rb +26 -0
  138. data/test/longrunning/tc_numbertools.rb +60 -0
  139. data/test/longrunning/tm_store.rb +19 -0
  140. data/test/test_all.rb +9 -0
  141. data/test/test_helper.rb +6 -0
  142. data/test/unit/analysis/tc_analyzer.rb +21 -0
  143. data/test/unit/analysis/tc_letter_tokenizer.rb +20 -0
  144. data/test/unit/analysis/tc_lower_case_filter.rb +20 -0
  145. data/test/unit/analysis/tc_lower_case_tokenizer.rb +27 -0
  146. data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +39 -0
  147. data/test/unit/analysis/tc_porter_stem_filter.rb +16 -0
  148. data/test/unit/analysis/tc_standard_analyzer.rb +20 -0
  149. data/test/unit/analysis/tc_standard_tokenizer.rb +20 -0
  150. data/test/unit/analysis/tc_stop_analyzer.rb +20 -0
  151. data/test/unit/analysis/tc_stop_filter.rb +14 -0
  152. data/test/unit/analysis/tc_white_space_analyzer.rb +21 -0
  153. data/test/unit/analysis/tc_white_space_tokenizer.rb +20 -0
  154. data/test/unit/analysis/tc_word_list_loader.rb +32 -0
  155. data/test/unit/document/tc_document.rb +47 -0
  156. data/test/unit/document/tc_field.rb +80 -0
  157. data/test/unit/index/tc_compound_file_io.rb +107 -0
  158. data/test/unit/index/tc_field_infos.rb +119 -0
  159. data/test/unit/index/tc_fields_io.rb +167 -0
  160. data/test/unit/index/tc_index.rb +140 -0
  161. data/test/unit/index/tc_index_reader.rb +622 -0
  162. data/test/unit/index/tc_index_writer.rb +57 -0
  163. data/test/unit/index/tc_multiple_term_doc_pos_enum.rb +80 -0
  164. data/test/unit/index/tc_segment_infos.rb +74 -0
  165. data/test/unit/index/tc_segment_term_docs.rb +17 -0
  166. data/test/unit/index/tc_segment_term_enum.rb +60 -0
  167. data/test/unit/index/tc_segment_term_vector.rb +71 -0
  168. data/test/unit/index/tc_term.rb +22 -0
  169. data/test/unit/index/tc_term_buffer.rb +57 -0
  170. data/test/unit/index/tc_term_info.rb +19 -0
  171. data/test/unit/index/tc_term_infos_io.rb +192 -0
  172. data/test/unit/index/tc_term_vector_offset_info.rb +18 -0
  173. data/test/unit/index/tc_term_vectors_io.rb +108 -0
  174. data/test/unit/index/th_doc.rb +244 -0
  175. data/test/unit/query_parser/tc_query_parser.rb +84 -0
  176. data/test/unit/search/tc_filter.rb +113 -0
  177. data/test/unit/search/tc_fuzzy_query.rb +136 -0
  178. data/test/unit/search/tc_index_searcher.rb +188 -0
  179. data/test/unit/search/tc_search_and_sort.rb +98 -0
  180. data/test/unit/search/tc_similarity.rb +37 -0
  181. data/test/unit/search/tc_sort.rb +48 -0
  182. data/test/unit/search/tc_sort_field.rb +27 -0
  183. data/test/unit/search/tc_spans.rb +153 -0
  184. data/test/unit/store/tc_fs_store.rb +84 -0
  185. data/test/unit/store/tc_ram_store.rb +35 -0
  186. data/test/unit/store/tm_store.rb +180 -0
  187. data/test/unit/store/tm_store_lock.rb +68 -0
  188. data/test/unit/ts_analysis.rb +16 -0
  189. data/test/unit/ts_document.rb +4 -0
  190. data/test/unit/ts_index.rb +18 -0
  191. data/test/unit/ts_query_parser.rb +3 -0
  192. data/test/unit/ts_search.rb +10 -0
  193. data/test/unit/ts_store.rb +6 -0
  194. data/test/unit/ts_utils.rb +10 -0
  195. data/test/unit/utils/tc_bit_vector.rb +65 -0
  196. data/test/unit/utils/tc_date_tools.rb +50 -0
  197. data/test/unit/utils/tc_number_tools.rb +59 -0
  198. data/test/unit/utils/tc_parameter.rb +40 -0
  199. data/test/unit/utils/tc_priority_queue.rb +62 -0
  200. data/test/unit/utils/tc_string_helper.rb +21 -0
  201. data/test/unit/utils/tc_weak_key_hash.rb +25 -0
  202. metadata +251 -0
@@ -0,0 +1,7 @@
1
+ require 'ferret/utils/string_helper'
2
+ require 'ferret/utils/parameter'
3
+ require 'ferret/utils/priority_queue'
4
+ require 'ferret/utils/bit_vector'
5
+ require 'ferret/utils/date_tools'
6
+ require 'ferret/utils/number_tools'
7
+ require 'ferret/utils/weak_key_hash'
@@ -0,0 +1,105 @@
1
+ module Ferret::Utils
2
+ # Optimized implementation of a vector of bits.
3
+ #
4
+ # * a count() method, which efficiently computes the number of one bits
5
+ # * optimized read from and write to disk
6
+ # * inlinable get() method
7
+ class BitVector
8
+ attr_reader :size
9
+ attr_accessor :bits
10
+
11
+ def initialize
12
+ @bits = 0
13
+ @count = -1
14
+ end
15
+
16
+ # Sets the value of _bit_ to one.
17
+ def set(bit)
18
+ @bits |= 1 << bit
19
+ @count = -1
20
+ end
21
+
22
+ # Sets the value of _bit_ to zero.
23
+ def clear(bit)
24
+ @bits &= ~(1 << bit)
25
+ @count = -1
26
+ end
27
+
28
+ # Returns _true_ if _bit_ is one and
29
+ # _false_ if it is zero.
30
+ def get(bit)
31
+ return (@bits & (1 << bit)) != 0
32
+ end
33
+ alias :[] :get
34
+
35
+ # Returns the total number of one bits in this vector. This is
36
+ # efficiently computed and cached, so that, if the vector is not
37
+ # changed, no recomputation is done for repeated calls.
38
+ def count()
39
+ # if the vector has been modified
40
+ if (@count == -1)
41
+ c = 0
42
+ tmp = @bits
43
+ while tmp > 0
44
+ c += BYTE_COUNTS[tmp & 0xFF] # sum bits per byte
45
+ tmp >>= 8
46
+ end
47
+ @count = c
48
+ end
49
+ return @count
50
+ end
51
+
52
+ BYTE_COUNTS = [ # table of bits/byte
53
+ 0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
54
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
55
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
56
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
57
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
58
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
59
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
60
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
61
+ 1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
62
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
63
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
64
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
65
+ 2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
66
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
67
+ 3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
68
+ 4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
69
+ ]
70
+
71
+
72
+ # Writes this vector to the file _name_ in Directory _d_, in a format
73
+ # that can be read by the constructor
74
+ def write(d, name)
75
+ output = d.create_output(name)
76
+ begin
77
+ output.write_vint(@bits)
78
+ ensure
79
+ output.close()
80
+ end
81
+ end
82
+
83
+ # Constructs a bit vector from the file _name_ in Directory _d_, as
84
+ # written by the @link #writeendmethod.
85
+ def BitVector.read(d, name)
86
+ bv = BitVector.new
87
+ input = d.open_input(name)
88
+ begin
89
+ bv.bits = input.read_vint()
90
+ ensure
91
+ input.close()
92
+ end
93
+ return bv
94
+ end
95
+
96
+ def to_s
97
+ i = @bits
98
+ while i > 0
99
+ print(i&1)
100
+ i >>= 1
101
+ end
102
+ puts ""
103
+ end
104
+ end
105
+ end
@@ -0,0 +1,138 @@
1
+ require 'date'
2
+ module Ferret::Utils
3
+ # Provides support for converting dates to strings and vice-versa. The
4
+ # strings are structured so that lexicographic sorting orders them by
5
+ # date, which makes them suitable for use as field values and search
6
+ # terms.
7
+ #
8
+ # This class also helps you to limit the resolution of your dates. Do not
9
+ # save dates with a finer resolution than you really need, as then
10
+ # RangeQuery and PrefixQuery will require more memory and become slower.
11
+ #
12
+ # Compared to the serialize methods the strings generated by the to_s
13
+ # methods in this class take slightly more space, unless your selected
14
+ # resolution is set to _Resolution.DAY_ or lower.
15
+
16
+ # Provides support for converting dates to strings and vice-versa. The
17
+ # strings are structured so that lexicographic sorting orders by date,
18
+ # which makes them suitable for use as field values and search terms.
19
+ #
20
+ # Note:: dates before 1970 cannot be used, and therefore cannot be indexed
21
+ # when using this class.
22
+ module DateTools
23
+ # make date strings long enough to last a millenium
24
+ SERIALIZED_DATE_LEN = (1000*365*24*60*60*1000).to_s(36).length
25
+
26
+ # The latest date that can be stored in this format
27
+ MAX_SERIALIZED_DATE_STRING = Array.new(SERIALIZED_DATE_LEN, "z").to_s.to_i(36)
28
+
29
+ # Converts a Date to a string suitable for indexing. Throws Exception
30
+ # if the date specified in the method argument is before 1970 This
31
+ # method is unsupported. Please use Time instead of Date
32
+ def DateTools.serialize_date(date)
33
+ return serialize_time(Time.parse(date))
34
+ end
35
+
36
+ # Converts a millisecond time to a string suitable for indexing.
37
+ # Accepts a Time object or a time in milliseconds.
38
+ #
39
+ # Throws Exception if the time specified in the method argument is
40
+ # negative, that is, before 1970 It is recommended that you store the
41
+ # date as a string if you don't need the time to the nearest
42
+ # millisecond. That makes things a lot easier.
43
+ def DateTools.serialize_time(time)
44
+ if time.instance_of?(Time) then time = time.to_i end
45
+
46
+ if (time < 0) then raise("time too early") end
47
+
48
+ # convert to milliseconds before serialization
49
+ s = (time*1000).to_s(36)
50
+
51
+ if (s.length() > SERIALIZED_DATE_LEN) then raise("time too late") end
52
+
53
+ # pad to 16 charactors
54
+ s = "0" + s while (s.length() < SERIALIZED_DATE_LEN)
55
+
56
+ return s
57
+ end
58
+
59
+ # The earliest date that can be stored in this format.
60
+ MIN_SERIALIZED_DATE_STRING = DateTools.serialize_time(0)
61
+
62
+ # Converts a string-encoded date into a millisecond time.
63
+ def DateTools.deserialize_time(s)
64
+ # remember to convert back to seconds
65
+ return Time.at(s.to_i(36)/1000)
66
+ end
67
+
68
+ def DateTools.date_to_s(date, resolution = Resolution::MILLISECOND)
69
+ return time_to_s(Time.parse(date), resolution)
70
+ end
71
+
72
+
73
+ # Converts a millisecond time to a string suitable for indexing.
74
+ #
75
+ # time:: the date expressed as milliseconds since January 1, 1970,
76
+ # 00:00:00 GMT resolution:: the desired resolution, see
77
+ # #round(long, DateTools.Resolution)
78
+ # return:: a string in format _%Y%m%d%H%M%SSSS_ or shorter,
79
+ # depending on _resolution_
80
+ def DateTools.time_to_s(time, resolution = Resolution::MILLISECOND)
81
+ if time.instance_of?(Date) then time = Time.parse(time) end
82
+ suffix = ""
83
+ if (resolution == Resolution::MILLISECOND)
84
+ # the suffix is the number of milliseconds if needed.
85
+ suffix = ((time.to_f-time.to_f.floor)*1000).round.to_s
86
+ end
87
+ return time.strftime(resolution.format) + suffix
88
+ end
89
+
90
+ # Converts a string produced by _time_to_s_ or _date_to_s_ back to a
91
+ # time, represented as the number of milliseconds since January 1, 1970,
92
+ # 00:00:00 GMT.
93
+ #
94
+ # str:: the date string to be converted
95
+ # return:: the number of milliseconds since January 1, 1970, 00:00:00GMT
96
+ def DateTools.s_to_time(str)
97
+ year = str.size >= 4 ? str[ 0.. 3].to_i : nil
98
+ month = str.size >= 6 ? str[ 4.. 5].to_i : nil
99
+ day = str.size >= 8 ? str[ 6.. 7].to_i : nil
100
+ hour = str.size >= 10 ? str[ 8.. 9].to_i : nil
101
+ minute = str.size >= 12 ? str[10..11].to_i : nil
102
+ second = str.size >= 14 ? str[12..13].to_i : nil
103
+ microsecond = str.size >= 17 ? str[14..17].to_i*1000 : nil
104
+ return Time.mktime(year, month, day, hour, minute, second, microsecond)
105
+ end
106
+
107
+ # Limit a date's resolution. For example, the date _2004-09-21 13:50:11_
108
+ # will be changed to _2004-09-01 00:00:00_ when using
109
+ # _Resolution.MONTH_.
110
+ #
111
+ # resolution:: The desired resolution of the date to be returned
112
+ # return:: the date with all values more precise than _resolution_
113
+ # set to 0 or 1
114
+ def DateTools.round(time, resolution)
115
+ return s_to_time(time_to_s(time, resolution))
116
+ end
117
+
118
+ class Resolution < Parameter
119
+ attr_accessor :format
120
+
121
+ private :initialize
122
+
123
+ def initialize(name, format)
124
+ super(name)
125
+ @format = format
126
+ end
127
+
128
+ YEAR = Resolution.new("year", "%Y")
129
+ MONTH = Resolution.new("month", "%Y%m")
130
+ DAY = Resolution.new("day", "%Y%m%d")
131
+ HOUR = Resolution.new("hour", "%Y%m%d%H")
132
+ MINUTE = Resolution.new("minute", "%Y%m%d%H%M")
133
+ SECOND = Resolution.new("second", "%Y%m%d%H%M%S")
134
+ MILLISECOND = Resolution.new("millisecond", "%Y%m%d%H%M%S")
135
+
136
+ end
137
+ end
138
+ end
@@ -0,0 +1,91 @@
1
+ class Float
2
+ def =~(o)
3
+ return (1 - self/o).abs < 0.0000000001
4
+ end
5
+ end
6
+
7
+ module Ferret::Utils
8
+ # Provides support for converting longs to Strings, and back again. The
9
+ # strings are structured so that lexicographic sorting order is preserved.
10
+ #
11
+ # That is, if long1 is less than long2 for any two longs long1 and long2,
12
+ # then NumberTools.long_to_s(long1) is lexicographically less than
13
+ # NumberTools.long_to_s(long2). (Similarly for "greater than" and "equals".)
14
+ #
15
+ # This class handles all long values
16
+ module NumberTools
17
+ RADIX = 36
18
+ NEGATIVE_PREFIX = '-'
19
+
20
+ # NB: NEGATIVE_PREFIX must be < POSITIVE_PREFIX
21
+ POSITIVE_PREFIX = '0'
22
+
23
+ # The following constants are from Java
24
+ LONG_MAX_VALUE = 9223372036854775807
25
+ LONG_MIN_VALUE = -9223372036854775808
26
+
27
+ # NB: This function is used to match the java equivalent. Actually
28
+ # ruby allows much larger numbers than Java so this is just so that we
29
+ # can read the Java Lucene created indexes.
30
+ MIN_STRING_VALUE = NEGATIVE_PREFIX + "0000000000000"
31
+ MAX_STRING_VALUE = POSITIVE_PREFIX + "1y2p0ij32e8e7"
32
+
33
+ # The length of the long field
34
+ STR_SIZE = MIN_STRING_VALUE.length()
35
+
36
+ # Converts a long to a String suitable for indexing.
37
+ def NumberTools.long_to_s(l)
38
+ if (l == LONG_MIN_VALUE)
39
+ # special case, because long is not symetric around zero
40
+ return MIN_STRING_VALUE;
41
+ end
42
+
43
+ s = ""
44
+ if (l < 0)
45
+ s << NEGATIVE_PREFIX
46
+ l = LONG_MAX_VALUE + l + 1
47
+ else
48
+ s << POSITIVE_PREFIX
49
+ end
50
+ num = l.to_s(RADIX)
51
+
52
+ pad_len = STR_SIZE - num.length() - s.length()
53
+ while ((pad_len -= 1) >= 0)
54
+ s << '0'
55
+ end
56
+ s << num
57
+
58
+ return s
59
+ end
60
+
61
+ # Converts a String that was returned by #long_to_s back to a long.
62
+ #
63
+ # Throws:: ArgumentError if the input is nil
64
+ def NumberTools.s_to_long(s)
65
+ if (s == nil)
66
+ raise ArgumentError, "string cannot be nil"
67
+ end
68
+ if (s.length() != STR_SIZE)
69
+ raise ArgumentError, "string is the wrong size"
70
+ end
71
+
72
+ if (s == MIN_STRING_VALUE)
73
+ return LONG_MIN_VALUE
74
+ end
75
+
76
+ prefix = s[0,1]
77
+ l = s[1..-1].to_i(36)
78
+
79
+ if (prefix == POSITIVE_PREFIX)
80
+ # nop
81
+ elsif (prefix == NEGATIVE_PREFIX)
82
+ l = l - LONG_MAX_VALUE - 1
83
+ else
84
+ raise ArgumentError, "string <" + prefix +
85
+ "> does not begin with the correct prefix"
86
+ end
87
+
88
+ return l
89
+ end
90
+ end
91
+ end
@@ -0,0 +1,41 @@
1
+ module Ferret::Utils
2
+ class Parameter
3
+ def to_s() return @name end
4
+
5
+ def _dump(arg)
6
+ @name
7
+ end
8
+
9
+ def Parameter._load(var)
10
+ name = var
11
+ key = make_key(name)
12
+ if (@@all_parameters.has_key?(key))
13
+ return @@all_parameters[key]
14
+ else
15
+ return self.new(name)
16
+ end
17
+ end
18
+
19
+ def hash
20
+ return self.class.make_key(@name).hash
21
+ end
22
+
23
+ protected
24
+ @@all_parameters = {}
25
+
26
+ def initialize(name)
27
+ @name = name
28
+ key = self.class.make_key(name)
29
+
30
+ if (@@all_parameters.has_key?(key))
31
+ raise ArgumentError, "key already in use"
32
+ end
33
+
34
+ @@all_parameters[key] = self
35
+ end
36
+
37
+ def Parameter.make_key(name)
38
+ return self.to_s + " " + name
39
+ end
40
+ end
41
+ end
@@ -0,0 +1,120 @@
1
+ module Ferret::Utils
2
+ # A PriorityQueue maintains a partial ordering of its objects such that
3
+ # the least object can always be found in constant time. push()'s and
4
+ # pop()'s require log(size) time. The objects in this priority queue must
5
+ # be Comparable
6
+ class PriorityQueue
7
+ attr_reader :size
8
+
9
+ def less_than(a, b)
10
+ a < b
11
+ end
12
+
13
+ # Subclass constructors must call this.
14
+ def initialize(max_size)
15
+ @size = 0
16
+ @heap = Array.new(max_size + 1)
17
+ @max_size = max_size
18
+ end
19
+
20
+ # Adds an Object to a PriorityQueue in log(size) time.
21
+ #
22
+ # If one tries to add more objects than max_size from initialize a
23
+ # RuntimeException (ArrayIndexOutOfBound) is thrown.
24
+ def push(object)
25
+ @size += 1
26
+ @heap[@size] = object
27
+ up_heap()
28
+ end
29
+ alias :<< :push
30
+
31
+ # Adds object to the PriorityQueue in log(size) time if either the
32
+ # PriorityQueue is not full, or not less_than(object, top()).
33
+ #
34
+ # object:: the object to be inserted
35
+ # return true if object is added, false otherwise.
36
+ def insert(object)
37
+ if(@size < @max_size)
38
+ push(object)
39
+ return true
40
+ elsif (@size > 0 and less_than(top, object))
41
+ @heap[1] = object
42
+ down_heap()
43
+ return true
44
+ else
45
+ return false
46
+ end
47
+ end
48
+
49
+ # Returns the least object of the PriorityQueue in constant time.
50
+ def top
51
+ return @heap[1]
52
+ end
53
+
54
+ # Removes and returns the least object of the PriorityQueue in log(size)
55
+ # time.
56
+ def pop()
57
+ if (@size > 0)
58
+ result = @heap[1] # save first value
59
+ @heap[1] = @heap[@size] # move last to first
60
+ @heap[@size] = nil; # permit GC of objects
61
+ @size -= 1
62
+ down_heap() # adjust heap
63
+ return result
64
+ else
65
+ return nil
66
+ end
67
+ end
68
+
69
+ # Removes all entries from the PriorityQueue.
70
+ def clear()
71
+ (1..@size).each do |i|
72
+ @heap[i] = nil
73
+ end
74
+ @size = 0
75
+ end
76
+
77
+ def put_heap
78
+ puts @heap
79
+ end
80
+
81
+ # resets the queue after the top has been changed
82
+ def adjust_top()
83
+ down_heap()
84
+ end
85
+
86
+ private
87
+
88
+ def up_heap()
89
+ i = @size
90
+ node = @heap[i] # save bottom node
91
+ j = i >> 1
92
+ while (j > 0 and less_than(node, @heap[j]))
93
+ @heap[i] = @heap[j]; # shift parents down
94
+ i = j
95
+ j = j >> 1
96
+ end
97
+ @heap[i] = node; # install saved node
98
+ end
99
+
100
+ def down_heap()
101
+ i = 1
102
+ node = @heap[i] # save top node
103
+ j = i << 1 # find smaller child
104
+ k = j + 1
105
+ if k <= @size and less_than(@heap[k], @heap[j])
106
+ j = k
107
+ end
108
+ while (j <= @size and less_than(@heap[j], node))
109
+ @heap[i] = @heap[j] # shift up child
110
+ i = j
111
+ j = i << 1
112
+ k = j + 1
113
+ if k <= @size and less_than(@heap[k], @heap[j])
114
+ j = k
115
+ end
116
+ end
117
+ @heap[i] = node; # install saved node
118
+ end
119
+ end
120
+ end