ferret 0.9.6 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/MIT-LICENSE +1 -1
- data/README +12 -24
- data/Rakefile +38 -54
- data/TODO +14 -17
- data/ext/analysis.c +982 -823
- data/ext/analysis.h +133 -76
- data/ext/array.c +96 -58
- data/ext/array.h +40 -13
- data/ext/bitvector.c +476 -118
- data/ext/bitvector.h +264 -22
- data/ext/compound_io.c +217 -229
- data/ext/defines.h +49 -0
- data/ext/document.c +107 -317
- data/ext/document.h +31 -65
- data/ext/except.c +81 -36
- data/ext/except.h +117 -55
- data/ext/extconf.rb +2 -9
- data/ext/ferret.c +211 -104
- data/ext/ferret.h +22 -11
- data/ext/filter.c +97 -82
- data/ext/fs_store.c +348 -367
- data/ext/global.c +226 -188
- data/ext/global.h +44 -26
- data/ext/hash.c +474 -391
- data/ext/hash.h +441 -68
- data/ext/hashset.c +124 -96
- data/ext/hashset.h +169 -20
- data/ext/helper.c +56 -5
- data/ext/helper.h +7 -0
- data/ext/inc/lang.h +29 -49
- data/ext/inc/threading.h +31 -0
- data/ext/ind.c +288 -278
- data/ext/ind.h +68 -0
- data/ext/index.c +5688 -0
- data/ext/index.h +663 -616
- data/ext/lang.h +29 -49
- data/ext/libstemmer.c +3 -3
- data/ext/mem_pool.c +84 -0
- data/ext/mem_pool.h +35 -0
- data/ext/posh.c +1006 -0
- data/ext/posh.h +1007 -0
- data/ext/priorityqueue.c +117 -194
- data/ext/priorityqueue.h +135 -39
- data/ext/q_boolean.c +1305 -1108
- data/ext/q_const_score.c +106 -93
- data/ext/q_filtered_query.c +138 -135
- data/ext/q_fuzzy.c +206 -242
- data/ext/q_match_all.c +94 -80
- data/ext/q_multi_term.c +663 -0
- data/ext/q_parser.c +667 -593
- data/ext/q_phrase.c +992 -555
- data/ext/q_prefix.c +72 -61
- data/ext/q_range.c +235 -210
- data/ext/q_span.c +1480 -1166
- data/ext/q_term.c +273 -246
- data/ext/q_wildcard.c +127 -114
- data/ext/r_analysis.c +1720 -711
- data/ext/r_index.c +3049 -0
- data/ext/r_qparser.c +433 -146
- data/ext/r_search.c +2934 -1993
- data/ext/r_store.c +372 -143
- data/ext/r_utils.c +941 -0
- data/ext/ram_store.c +330 -326
- data/ext/search.c +1291 -668
- data/ext/search.h +403 -702
- data/ext/similarity.c +91 -113
- data/ext/similarity.h +45 -30
- data/ext/sort.c +721 -484
- data/ext/stopwords.c +361 -273
- data/ext/store.c +556 -58
- data/ext/store.h +706 -126
- data/ext/tags +3578 -2780
- data/ext/term_vectors.c +352 -0
- data/ext/threading.h +31 -0
- data/ext/win32.h +54 -0
- data/lib/ferret.rb +5 -17
- data/lib/ferret/document.rb +130 -2
- data/lib/ferret/index.rb +577 -26
- data/lib/ferret/number_tools.rb +157 -0
- data/lib/ferret_version.rb +3 -0
- data/test/test_helper.rb +5 -13
- data/test/unit/analysis/tc_analyzer.rb +513 -1
- data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
- data/test/unit/index/tc_index.rb +183 -240
- data/test/unit/index/tc_index_reader.rb +312 -479
- data/test/unit/index/tc_index_writer.rb +397 -13
- data/test/unit/index/th_doc.rb +269 -206
- data/test/unit/query_parser/tc_query_parser.rb +40 -33
- data/test/unit/search/tc_filter.rb +59 -71
- data/test/unit/search/tc_fuzzy_query.rb +24 -16
- data/test/unit/search/tc_index_searcher.rb +23 -201
- data/test/unit/search/tc_multi_searcher.rb +78 -226
- data/test/unit/search/tc_search_and_sort.rb +93 -81
- data/test/unit/search/tc_sort.rb +23 -23
- data/test/unit/search/tc_sort_field.rb +7 -7
- data/test/unit/search/tc_spans.rb +51 -47
- data/test/unit/search/tm_searcher.rb +339 -0
- data/test/unit/store/tc_fs_store.rb +1 -1
- data/test/unit/store/tm_store_lock.rb +3 -3
- data/test/unit/tc_document.rb +81 -0
- data/test/unit/ts_analysis.rb +1 -1
- data/test/unit/ts_utils.rb +1 -1
- data/test/unit/utils/tc_bit_vector.rb +288 -0
- data/test/unit/utils/tc_number_tools.rb +117 -0
- data/test/unit/utils/tc_priority_queue.rb +106 -0
- metadata +140 -301
- data/CHANGELOG +0 -9
- data/ext/dummy.exe +0 -0
- data/ext/field.c +0 -408
- data/ext/frtio.h +0 -13
- data/ext/inc/except.h +0 -90
- data/ext/index_io.c +0 -382
- data/ext/index_rw.c +0 -2658
- data/ext/lang.c +0 -41
- data/ext/nix_io.c +0 -134
- data/ext/q_multi_phrase.c +0 -380
- data/ext/r_doc.c +0 -582
- data/ext/r_index_io.c +0 -1021
- data/ext/r_term.c +0 -219
- data/ext/term.c +0 -820
- data/ext/termdocs.c +0 -611
- data/ext/vector.c +0 -637
- data/ext/w32_io.c +0 -150
- data/lib/ferret/analysis.rb +0 -11
- data/lib/ferret/analysis/analyzers.rb +0 -112
- data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
- data/lib/ferret/analysis/token.rb +0 -100
- data/lib/ferret/analysis/token_filters.rb +0 -86
- data/lib/ferret/analysis/token_stream.rb +0 -26
- data/lib/ferret/analysis/tokenizers.rb +0 -112
- data/lib/ferret/analysis/word_list_loader.rb +0 -27
- data/lib/ferret/document/document.rb +0 -152
- data/lib/ferret/document/field.rb +0 -312
- data/lib/ferret/index/compound_file_io.rb +0 -338
- data/lib/ferret/index/document_writer.rb +0 -289
- data/lib/ferret/index/field_infos.rb +0 -279
- data/lib/ferret/index/fields_io.rb +0 -181
- data/lib/ferret/index/index.rb +0 -675
- data/lib/ferret/index/index_file_names.rb +0 -33
- data/lib/ferret/index/index_reader.rb +0 -503
- data/lib/ferret/index/index_writer.rb +0 -534
- data/lib/ferret/index/multi_reader.rb +0 -377
- data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
- data/lib/ferret/index/segment_infos.rb +0 -130
- data/lib/ferret/index/segment_merge_info.rb +0 -49
- data/lib/ferret/index/segment_merge_queue.rb +0 -16
- data/lib/ferret/index/segment_merger.rb +0 -358
- data/lib/ferret/index/segment_reader.rb +0 -412
- data/lib/ferret/index/segment_term_enum.rb +0 -169
- data/lib/ferret/index/segment_term_vector.rb +0 -58
- data/lib/ferret/index/term.rb +0 -53
- data/lib/ferret/index/term_buffer.rb +0 -83
- data/lib/ferret/index/term_doc_enum.rb +0 -291
- data/lib/ferret/index/term_enum.rb +0 -52
- data/lib/ferret/index/term_info.rb +0 -37
- data/lib/ferret/index/term_infos_io.rb +0 -321
- data/lib/ferret/index/term_vector_offset_info.rb +0 -20
- data/lib/ferret/index/term_vectors_io.rb +0 -553
- data/lib/ferret/query_parser.rb +0 -312
- data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
- data/lib/ferret/search.rb +0 -50
- data/lib/ferret/search/boolean_clause.rb +0 -100
- data/lib/ferret/search/boolean_query.rb +0 -299
- data/lib/ferret/search/boolean_scorer.rb +0 -294
- data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
- data/lib/ferret/search/conjunction_scorer.rb +0 -99
- data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
- data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
- data/lib/ferret/search/explanation.rb +0 -41
- data/lib/ferret/search/field_cache.rb +0 -215
- data/lib/ferret/search/field_doc.rb +0 -31
- data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
- data/lib/ferret/search/filter.rb +0 -11
- data/lib/ferret/search/filtered_query.rb +0 -130
- data/lib/ferret/search/filtered_term_enum.rb +0 -79
- data/lib/ferret/search/fuzzy_query.rb +0 -154
- data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
- data/lib/ferret/search/hit_collector.rb +0 -34
- data/lib/ferret/search/hit_queue.rb +0 -11
- data/lib/ferret/search/index_searcher.rb +0 -200
- data/lib/ferret/search/match_all_query.rb +0 -104
- data/lib/ferret/search/multi_phrase_query.rb +0 -216
- data/lib/ferret/search/multi_searcher.rb +0 -261
- data/lib/ferret/search/multi_term_query.rb +0 -65
- data/lib/ferret/search/non_matching_scorer.rb +0 -22
- data/lib/ferret/search/phrase_positions.rb +0 -55
- data/lib/ferret/search/phrase_query.rb +0 -214
- data/lib/ferret/search/phrase_scorer.rb +0 -152
- data/lib/ferret/search/prefix_query.rb +0 -54
- data/lib/ferret/search/query.rb +0 -140
- data/lib/ferret/search/query_filter.rb +0 -51
- data/lib/ferret/search/range_filter.rb +0 -103
- data/lib/ferret/search/range_query.rb +0 -139
- data/lib/ferret/search/req_excl_scorer.rb +0 -125
- data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
- data/lib/ferret/search/score_doc.rb +0 -38
- data/lib/ferret/search/score_doc_comparator.rb +0 -114
- data/lib/ferret/search/scorer.rb +0 -91
- data/lib/ferret/search/similarity.rb +0 -278
- data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
- data/lib/ferret/search/sort.rb +0 -112
- data/lib/ferret/search/sort_comparator.rb +0 -60
- data/lib/ferret/search/sort_field.rb +0 -91
- data/lib/ferret/search/spans.rb +0 -12
- data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
- data/lib/ferret/search/spans/span_first_query.rb +0 -79
- data/lib/ferret/search/spans/span_near_query.rb +0 -108
- data/lib/ferret/search/spans/span_not_query.rb +0 -130
- data/lib/ferret/search/spans/span_or_query.rb +0 -176
- data/lib/ferret/search/spans/span_query.rb +0 -25
- data/lib/ferret/search/spans/span_scorer.rb +0 -74
- data/lib/ferret/search/spans/span_term_query.rb +0 -105
- data/lib/ferret/search/spans/span_weight.rb +0 -84
- data/lib/ferret/search/spans/spans_enum.rb +0 -44
- data/lib/ferret/search/term_query.rb +0 -128
- data/lib/ferret/search/term_scorer.rb +0 -183
- data/lib/ferret/search/top_docs.rb +0 -36
- data/lib/ferret/search/top_field_docs.rb +0 -17
- data/lib/ferret/search/weight.rb +0 -54
- data/lib/ferret/search/wildcard_query.rb +0 -26
- data/lib/ferret/search/wildcard_term_enum.rb +0 -61
- data/lib/ferret/stemmers.rb +0 -1
- data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
- data/lib/ferret/store.rb +0 -5
- data/lib/ferret/store/buffered_index_io.rb +0 -190
- data/lib/ferret/store/directory.rb +0 -141
- data/lib/ferret/store/fs_store.rb +0 -381
- data/lib/ferret/store/index_io.rb +0 -245
- data/lib/ferret/store/ram_store.rb +0 -286
- data/lib/ferret/utils.rb +0 -8
- data/lib/ferret/utils/bit_vector.rb +0 -123
- data/lib/ferret/utils/date_tools.rb +0 -138
- data/lib/ferret/utils/number_tools.rb +0 -91
- data/lib/ferret/utils/parameter.rb +0 -41
- data/lib/ferret/utils/priority_queue.rb +0 -120
- data/lib/ferret/utils/string_helper.rb +0 -47
- data/lib/ferret/utils/thread_local.rb +0 -28
- data/lib/ferret/utils/weak_key_hash.rb +0 -60
- data/lib/rferret.rb +0 -37
- data/rake_utils/code_statistics.rb +0 -106
- data/test/benchmark/tb_ram_store.rb +0 -76
- data/test/benchmark/tb_rw_vint.rb +0 -26
- data/test/functional/thread_safety_index_test.rb +0 -81
- data/test/functional/thread_safety_test.rb +0 -137
- data/test/longrunning/tc_numbertools.rb +0 -60
- data/test/longrunning/tm_store.rb +0 -19
- data/test/unit/analysis/ctc_analyzer.rb +0 -532
- data/test/unit/analysis/data/wordfile +0 -6
- data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
- data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
- data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
- data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
- data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
- data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
- data/test/unit/analysis/rtc_stop_filter.rb +0 -14
- data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
- data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
- data/test/unit/analysis/tc_token.rb +0 -25
- data/test/unit/document/rtc_field.rb +0 -28
- data/test/unit/document/tc_document.rb +0 -47
- data/test/unit/document/tc_field.rb +0 -98
- data/test/unit/index/rtc_compound_file_io.rb +0 -107
- data/test/unit/index/rtc_field_infos.rb +0 -127
- data/test/unit/index/rtc_fields_io.rb +0 -167
- data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
- data/test/unit/index/rtc_segment_infos.rb +0 -74
- data/test/unit/index/rtc_segment_term_docs.rb +0 -17
- data/test/unit/index/rtc_segment_term_enum.rb +0 -60
- data/test/unit/index/rtc_segment_term_vector.rb +0 -71
- data/test/unit/index/rtc_term_buffer.rb +0 -57
- data/test/unit/index/rtc_term_info.rb +0 -19
- data/test/unit/index/rtc_term_infos_io.rb +0 -192
- data/test/unit/index/rtc_term_vectors_io.rb +0 -108
- data/test/unit/index/tc_term.rb +0 -27
- data/test/unit/index/tc_term_voi.rb +0 -18
- data/test/unit/search/rtc_similarity.rb +0 -37
- data/test/unit/search/rtc_sort_field.rb +0 -14
- data/test/unit/search/tc_multi_searcher2.rb +0 -126
- data/test/unit/store/rtc_fs_store.rb +0 -62
- data/test/unit/store/rtc_ram_store.rb +0 -15
- data/test/unit/store/rtm_store.rb +0 -150
- data/test/unit/store/rtm_store_lock.rb +0 -2
- data/test/unit/ts_document.rb +0 -2
- data/test/unit/utils/rtc_bit_vector.rb +0 -73
- data/test/unit/utils/rtc_date_tools.rb +0 -50
- data/test/unit/utils/rtc_number_tools.rb +0 -59
- data/test/unit/utils/rtc_parameter.rb +0 -40
- data/test/unit/utils/rtc_priority_queue.rb +0 -62
- data/test/unit/utils/rtc_string_helper.rb +0 -21
- data/test/unit/utils/rtc_thread.rb +0 -61
- data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
- data/test/utils/number_to_spoken.rb +0 -132
data/lib/ferret/utils.rb
DELETED
@@ -1,8 +0,0 @@
|
|
1
|
-
require 'ferret/utils/string_helper'
|
2
|
-
require 'ferret/utils/parameter'
|
3
|
-
require 'ferret/utils/priority_queue'
|
4
|
-
require 'ferret/utils/bit_vector'
|
5
|
-
require 'ferret/utils/date_tools'
|
6
|
-
require 'ferret/utils/number_tools'
|
7
|
-
require 'ferret/utils/weak_key_hash'
|
8
|
-
require 'ferret/utils/thread_local'
|
@@ -1,123 +0,0 @@
|
|
1
|
-
module Ferret::Utils
|
2
|
-
# Optimized implementation of a vector of bits.
|
3
|
-
#
|
4
|
-
# * a count() method, which efficiently computes the number of one bits
|
5
|
-
# * optimized read from and write to disk
|
6
|
-
# * inlinable get() method
|
7
|
-
class BitVector
|
8
|
-
attr_reader :size
|
9
|
-
attr_accessor :bits
|
10
|
-
|
11
|
-
def initialize
|
12
|
-
@bits = 0
|
13
|
-
@count = -1
|
14
|
-
end
|
15
|
-
|
16
|
-
# Sets the value of _bit_ to one.
|
17
|
-
def set(bit)
|
18
|
-
@bits |= 1 << bit
|
19
|
-
@count = -1
|
20
|
-
end
|
21
|
-
|
22
|
-
# Sets the value of _bit_ to zero.
|
23
|
-
def clear(bit)
|
24
|
-
@bits &= ~(1 << bit)
|
25
|
-
@count = -1
|
26
|
-
end
|
27
|
-
|
28
|
-
# Returns _true_ if _bit_ is one and
|
29
|
-
# _false_ if it is zero.
|
30
|
-
def get(bit)
|
31
|
-
return (@bits & (1 << bit)) != 0
|
32
|
-
end
|
33
|
-
alias :[] :get
|
34
|
-
|
35
|
-
# Returns the total number of one bits in this vector. This is
|
36
|
-
# efficiently computed and cached, so that, if the vector is not
|
37
|
-
# changed, no recomputation is done for repeated calls.
|
38
|
-
def count()
|
39
|
-
# if the vector has been modified
|
40
|
-
if (@count == -1)
|
41
|
-
c = 0
|
42
|
-
tmp = @bits
|
43
|
-
while tmp > 0
|
44
|
-
c += BYTE_COUNTS[tmp & 0xFF] # sum bits per byte
|
45
|
-
tmp >>= 8
|
46
|
-
end
|
47
|
-
@count = c
|
48
|
-
end
|
49
|
-
return @count
|
50
|
-
end
|
51
|
-
|
52
|
-
BYTE_COUNTS = [ # table of bits/byte
|
53
|
-
0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
|
54
|
-
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
55
|
-
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
56
|
-
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
57
|
-
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
58
|
-
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
59
|
-
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
60
|
-
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
61
|
-
1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
|
62
|
-
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
63
|
-
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
64
|
-
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
65
|
-
2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
|
66
|
-
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
67
|
-
3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
|
68
|
-
4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8
|
69
|
-
]
|
70
|
-
|
71
|
-
|
72
|
-
# Writes this vector to the file _name_ in Directory _d_, in a format
|
73
|
-
# that can be read by the constructor
|
74
|
-
def write(d, name)
|
75
|
-
output = d.create_output(name)
|
76
|
-
begin
|
77
|
-
output.write_string(self.class.bignum_to_string(@bits))
|
78
|
-
ensure
|
79
|
-
output.close()
|
80
|
-
end
|
81
|
-
end
|
82
|
-
|
83
|
-
# Constructs a bit vector from the file _name_ in Directory _d_, as
|
84
|
-
# written by the @link #writeendmethod.
|
85
|
-
def BitVector.read(d, name)
|
86
|
-
bv = BitVector.new
|
87
|
-
input = d.open_input(name)
|
88
|
-
begin
|
89
|
-
bv.bits = string_to_bignum(input.read_string())
|
90
|
-
ensure
|
91
|
-
input.close()
|
92
|
-
end
|
93
|
-
return bv
|
94
|
-
end
|
95
|
-
|
96
|
-
def to_s
|
97
|
-
i = @bits
|
98
|
-
while i > 0
|
99
|
-
print(i&1)
|
100
|
-
i >>= 1
|
101
|
-
end
|
102
|
-
puts ""
|
103
|
-
end
|
104
|
-
|
105
|
-
# converts a BigNum into a string
|
106
|
-
def BitVector.bignum_to_string(num)
|
107
|
-
str = []
|
108
|
-
while (num > 0)
|
109
|
-
str << (num & 0xff)
|
110
|
-
num >>= 8
|
111
|
-
end
|
112
|
-
return str.pack("C*")
|
113
|
-
end
|
114
|
-
|
115
|
-
# converts a string into a bignum
|
116
|
-
def BitVector.string_to_bignum(str)
|
117
|
-
str = str.unpack("C*")
|
118
|
-
num = 0
|
119
|
-
str.reverse.each {|c| num = ((num << 8) | c) }
|
120
|
-
return num
|
121
|
-
end
|
122
|
-
end
|
123
|
-
end
|
@@ -1,138 +0,0 @@
|
|
1
|
-
require 'date'
|
2
|
-
module Ferret::Utils
|
3
|
-
# Provides support for converting dates to strings and vice-versa. The
|
4
|
-
# strings are structured so that lexicographic sorting orders them by
|
5
|
-
# date, which makes them suitable for use as field values and search
|
6
|
-
# terms.
|
7
|
-
#
|
8
|
-
# This class also helps you to limit the resolution of your dates. Do not
|
9
|
-
# save dates with a finer resolution than you really need, as then
|
10
|
-
# RangeQuery and PrefixQuery will require more memory and become slower.
|
11
|
-
#
|
12
|
-
# Compared to the serialize methods the strings generated by the to_s
|
13
|
-
# methods in this class take slightly more space, unless your selected
|
14
|
-
# resolution is set to _Resolution.DAY_ or lower.
|
15
|
-
|
16
|
-
# Provides support for converting dates to strings and vice-versa. The
|
17
|
-
# strings are structured so that lexicographic sorting orders by date,
|
18
|
-
# which makes them suitable for use as field values and search terms.
|
19
|
-
#
|
20
|
-
# Note:: dates before 1970 cannot be used, and therefore cannot be indexed
|
21
|
-
# when using this class.
|
22
|
-
module DateTools
|
23
|
-
# make date strings long enough to last a millenium
|
24
|
-
SERIALIZED_DATE_LEN = (1000*365*24*60*60*1000).to_s(36).length
|
25
|
-
|
26
|
-
# The latest date that can be stored in this format
|
27
|
-
MAX_SERIALIZED_DATE_STRING = Array.new(SERIALIZED_DATE_LEN, "z").to_s.to_i(36)
|
28
|
-
|
29
|
-
# Converts a Date to a string suitable for indexing. Throws Exception
|
30
|
-
# if the date specified in the method argument is before 1970 This
|
31
|
-
# method is unsupported. Please use Time instead of Date
|
32
|
-
def DateTools.serialize_date(date)
|
33
|
-
return serialize_time(Time.parse(date))
|
34
|
-
end
|
35
|
-
|
36
|
-
# Converts a millisecond time to a string suitable for indexing.
|
37
|
-
# Accepts a Time object or a time in milliseconds.
|
38
|
-
#
|
39
|
-
# Throws Exception if the time specified in the method argument is
|
40
|
-
# negative, that is, before 1970 It is recommended that you store the
|
41
|
-
# date as a string if you don't need the time to the nearest
|
42
|
-
# millisecond. That makes things a lot easier.
|
43
|
-
def DateTools.serialize_time(time)
|
44
|
-
if time.instance_of?(Time) then time = time.to_i end
|
45
|
-
|
46
|
-
if (time < 0) then raise("time too early") end
|
47
|
-
|
48
|
-
# convert to milliseconds before serialization
|
49
|
-
s = (time*1000).to_s(36)
|
50
|
-
|
51
|
-
if (s.length() > SERIALIZED_DATE_LEN) then raise("time too late") end
|
52
|
-
|
53
|
-
# pad to 16 charactors
|
54
|
-
s = "0" + s while (s.length() < SERIALIZED_DATE_LEN)
|
55
|
-
|
56
|
-
return s
|
57
|
-
end
|
58
|
-
|
59
|
-
# The earliest date that can be stored in this format.
|
60
|
-
MIN_SERIALIZED_DATE_STRING = DateTools.serialize_time(0)
|
61
|
-
|
62
|
-
# Converts a string-encoded date into a millisecond time.
|
63
|
-
def DateTools.deserialize_time(s)
|
64
|
-
# remember to convert back to seconds
|
65
|
-
return Time.at(s.to_i(36)/1000)
|
66
|
-
end
|
67
|
-
|
68
|
-
def DateTools.date_to_s(date, resolution = Resolution::MILLISECOND)
|
69
|
-
return time_to_s(Time.parse(date), resolution)
|
70
|
-
end
|
71
|
-
|
72
|
-
|
73
|
-
# Converts a millisecond time to a string suitable for indexing.
|
74
|
-
#
|
75
|
-
# time:: the date expressed as milliseconds since January 1, 1970,
|
76
|
-
# 00:00:00 GMT resolution:: the desired resolution, see
|
77
|
-
# #round(long, DateTools.Resolution)
|
78
|
-
# return:: a string in format _%Y%m%d%H%M%SSSS_ or shorter,
|
79
|
-
# depending on _resolution_
|
80
|
-
def DateTools.time_to_s(time, resolution = Resolution::MILLISECOND)
|
81
|
-
if time.instance_of?(Date) then time = Time.parse(time) end
|
82
|
-
suffix = ""
|
83
|
-
if (resolution == Resolution::MILLISECOND)
|
84
|
-
# the suffix is the number of milliseconds if needed.
|
85
|
-
suffix = ((time.to_f-time.to_f.floor)*1000).round.to_s
|
86
|
-
end
|
87
|
-
return time.strftime(resolution.format) + suffix
|
88
|
-
end
|
89
|
-
|
90
|
-
# Converts a string produced by _time_to_s_ or _date_to_s_ back to a
|
91
|
-
# time, represented as the number of milliseconds since January 1, 1970,
|
92
|
-
# 00:00:00 GMT.
|
93
|
-
#
|
94
|
-
# str:: the date string to be converted
|
95
|
-
# return:: the number of milliseconds since January 1, 1970, 00:00:00GMT
|
96
|
-
def DateTools.s_to_time(str)
|
97
|
-
year = str.size >= 4 ? str[ 0.. 3].to_i : nil
|
98
|
-
month = str.size >= 6 ? str[ 4.. 5].to_i : nil
|
99
|
-
day = str.size >= 8 ? str[ 6.. 7].to_i : nil
|
100
|
-
hour = str.size >= 10 ? str[ 8.. 9].to_i : nil
|
101
|
-
minute = str.size >= 12 ? str[10..11].to_i : nil
|
102
|
-
second = str.size >= 14 ? str[12..13].to_i : nil
|
103
|
-
microsecond = str.size >= 17 ? str[14..17].to_i*1000 : nil
|
104
|
-
return Time.mktime(year, month, day, hour, minute, second, microsecond)
|
105
|
-
end
|
106
|
-
|
107
|
-
# Limit a date's resolution. For example, the date _2004-09-21 13:50:11_
|
108
|
-
# will be changed to _2004-09-01 00:00:00_ when using
|
109
|
-
# _Resolution.MONTH_.
|
110
|
-
#
|
111
|
-
# resolution:: The desired resolution of the date to be returned
|
112
|
-
# return:: the date with all values more precise than _resolution_
|
113
|
-
# set to 0 or 1
|
114
|
-
def DateTools.round(time, resolution)
|
115
|
-
return s_to_time(time_to_s(time, resolution))
|
116
|
-
end
|
117
|
-
|
118
|
-
class Resolution < Parameter
|
119
|
-
attr_accessor :format
|
120
|
-
|
121
|
-
private :initialize
|
122
|
-
|
123
|
-
def initialize(name, format)
|
124
|
-
super(name)
|
125
|
-
@format = format
|
126
|
-
end
|
127
|
-
|
128
|
-
YEAR = Resolution.new("year", "%Y")
|
129
|
-
MONTH = Resolution.new("month", "%Y%m")
|
130
|
-
DAY = Resolution.new("day", "%Y%m%d")
|
131
|
-
HOUR = Resolution.new("hour", "%Y%m%d%H")
|
132
|
-
MINUTE = Resolution.new("minute", "%Y%m%d%H%M")
|
133
|
-
SECOND = Resolution.new("second", "%Y%m%d%H%M%S")
|
134
|
-
MILLISECOND = Resolution.new("millisecond", "%Y%m%d%H%M%S")
|
135
|
-
|
136
|
-
end
|
137
|
-
end
|
138
|
-
end
|
@@ -1,91 +0,0 @@
|
|
1
|
-
class Float
|
2
|
-
def =~(o)
|
3
|
-
return (1 - self/o).abs < 0.0000000001
|
4
|
-
end
|
5
|
-
end
|
6
|
-
|
7
|
-
module Ferret::Utils
|
8
|
-
# Provides support for converting longs to Strings, and back again. The
|
9
|
-
# strings are structured so that lexicographic sorting order is preserved.
|
10
|
-
#
|
11
|
-
# That is, if long1 is less than long2 for any two longs long1 and long2,
|
12
|
-
# then NumberTools.long_to_s(long1) is lexicographically less than
|
13
|
-
# NumberTools.long_to_s(long2). (Similarly for "greater than" and "equals".)
|
14
|
-
#
|
15
|
-
# This class handles all long values
|
16
|
-
module NumberTools
|
17
|
-
RADIX = 36
|
18
|
-
NEGATIVE_PREFIX = '-'
|
19
|
-
|
20
|
-
# NB: NEGATIVE_PREFIX must be < POSITIVE_PREFIX
|
21
|
-
POSITIVE_PREFIX = '0'
|
22
|
-
|
23
|
-
# The following constants are from Java
|
24
|
-
LONG_MAX_VALUE = 9223372036854775807
|
25
|
-
LONG_MIN_VALUE = -9223372036854775808
|
26
|
-
|
27
|
-
# NB: This function is used to match the java equivalent. Actually
|
28
|
-
# ruby allows much larger numbers than Java so this is just so that we
|
29
|
-
# can read the Java Lucene created indexes.
|
30
|
-
MIN_STRING_VALUE = NEGATIVE_PREFIX + "0000000000000"
|
31
|
-
MAX_STRING_VALUE = POSITIVE_PREFIX + "1y2p0ij32e8e7"
|
32
|
-
|
33
|
-
# The length of the long field
|
34
|
-
STR_SIZE = MIN_STRING_VALUE.length()
|
35
|
-
|
36
|
-
# Converts a long to a String suitable for indexing.
|
37
|
-
def NumberTools.long_to_s(l)
|
38
|
-
if (l == LONG_MIN_VALUE)
|
39
|
-
# special case, because long is not symetric around zero
|
40
|
-
return MIN_STRING_VALUE;
|
41
|
-
end
|
42
|
-
|
43
|
-
s = ""
|
44
|
-
if (l < 0)
|
45
|
-
s << NEGATIVE_PREFIX
|
46
|
-
l = LONG_MAX_VALUE + l + 1
|
47
|
-
else
|
48
|
-
s << POSITIVE_PREFIX
|
49
|
-
end
|
50
|
-
num = l.to_s(RADIX)
|
51
|
-
|
52
|
-
pad_len = STR_SIZE - num.length() - s.length()
|
53
|
-
while ((pad_len -= 1) >= 0)
|
54
|
-
s << '0'
|
55
|
-
end
|
56
|
-
s << num
|
57
|
-
|
58
|
-
return s
|
59
|
-
end
|
60
|
-
|
61
|
-
# Converts a String that was returned by #long_to_s back to a long.
|
62
|
-
#
|
63
|
-
# Throws:: ArgumentError if the input is nil
|
64
|
-
def NumberTools.s_to_long(s)
|
65
|
-
if (s == nil)
|
66
|
-
raise ArgumentError, "string cannot be nil"
|
67
|
-
end
|
68
|
-
if (s.length() != STR_SIZE)
|
69
|
-
raise ArgumentError, "string is the wrong size"
|
70
|
-
end
|
71
|
-
|
72
|
-
if (s == MIN_STRING_VALUE)
|
73
|
-
return LONG_MIN_VALUE
|
74
|
-
end
|
75
|
-
|
76
|
-
prefix = s[0,1]
|
77
|
-
l = s[1..-1].to_i(36)
|
78
|
-
|
79
|
-
if (prefix == POSITIVE_PREFIX)
|
80
|
-
# nop
|
81
|
-
elsif (prefix == NEGATIVE_PREFIX)
|
82
|
-
l = l - LONG_MAX_VALUE - 1
|
83
|
-
else
|
84
|
-
raise ArgumentError, "string <" + prefix +
|
85
|
-
"> does not begin with the correct prefix"
|
86
|
-
end
|
87
|
-
|
88
|
-
return l
|
89
|
-
end
|
90
|
-
end
|
91
|
-
end
|
@@ -1,41 +0,0 @@
|
|
1
|
-
module Ferret::Utils
|
2
|
-
class Parameter
|
3
|
-
def to_s() return @name end
|
4
|
-
|
5
|
-
def _dump(arg)
|
6
|
-
@name
|
7
|
-
end
|
8
|
-
|
9
|
-
def Parameter._load(var)
|
10
|
-
name = var
|
11
|
-
key = make_key(name)
|
12
|
-
if (@@all_parameters.has_key?(key))
|
13
|
-
return @@all_parameters[key]
|
14
|
-
else
|
15
|
-
return self.new(name)
|
16
|
-
end
|
17
|
-
end
|
18
|
-
|
19
|
-
def hash
|
20
|
-
return self.class.make_key(@name).hash
|
21
|
-
end
|
22
|
-
|
23
|
-
protected
|
24
|
-
@@all_parameters = {}
|
25
|
-
|
26
|
-
def initialize(name)
|
27
|
-
@name = name
|
28
|
-
key = self.class.make_key(name)
|
29
|
-
|
30
|
-
if (@@all_parameters.has_key?(key))
|
31
|
-
raise ArgumentError, "key already in use"
|
32
|
-
end
|
33
|
-
|
34
|
-
@@all_parameters[key] = self
|
35
|
-
end
|
36
|
-
|
37
|
-
def Parameter.make_key(name)
|
38
|
-
return self.to_s + " " + name
|
39
|
-
end
|
40
|
-
end
|
41
|
-
end
|
@@ -1,120 +0,0 @@
|
|
1
|
-
module Ferret::Utils
|
2
|
-
# A PriorityQueue maintains a partial ordering of its objects such that
|
3
|
-
# the least object can always be found in constant time. push()'s and
|
4
|
-
# pop()'s require log(size) time. The objects in this priority queue must
|
5
|
-
# be Comparable
|
6
|
-
class PriorityQueue
|
7
|
-
attr_reader :size
|
8
|
-
|
9
|
-
def less_than(a, b)
|
10
|
-
a < b
|
11
|
-
end
|
12
|
-
|
13
|
-
# Subclass constructors must call this.
|
14
|
-
def initialize(max_size)
|
15
|
-
@size = 0
|
16
|
-
@heap = Array.new(max_size + 1)
|
17
|
-
@max_size = max_size
|
18
|
-
end
|
19
|
-
|
20
|
-
# Adds an Object to a PriorityQueue in log(size) time.
|
21
|
-
#
|
22
|
-
# If one tries to add more objects than max_size from initialize a
|
23
|
-
# RuntimeException (ArrayIndexOutOfBound) is thrown.
|
24
|
-
def push(object)
|
25
|
-
@size += 1
|
26
|
-
@heap[@size] = object
|
27
|
-
up_heap()
|
28
|
-
end
|
29
|
-
alias :<< :push
|
30
|
-
|
31
|
-
# Adds object to the PriorityQueue in log(size) time if either the
|
32
|
-
# PriorityQueue is not full, or not less_than(object, top()).
|
33
|
-
#
|
34
|
-
# object:: the object to be inserted
|
35
|
-
# return true if object is added, false otherwise.
|
36
|
-
def insert(object)
|
37
|
-
if(@size < @max_size)
|
38
|
-
push(object)
|
39
|
-
return true
|
40
|
-
elsif (@size > 0 and less_than(top, object))
|
41
|
-
@heap[1] = object
|
42
|
-
down_heap()
|
43
|
-
return true
|
44
|
-
else
|
45
|
-
return false
|
46
|
-
end
|
47
|
-
end
|
48
|
-
|
49
|
-
# Returns the least object of the PriorityQueue in constant time.
|
50
|
-
def top
|
51
|
-
return @heap[1]
|
52
|
-
end
|
53
|
-
|
54
|
-
# Removes and returns the least object of the PriorityQueue in log(size)
|
55
|
-
# time.
|
56
|
-
def pop()
|
57
|
-
if (@size > 0)
|
58
|
-
result = @heap[1] # save first value
|
59
|
-
@heap[1] = @heap[@size] # move last to first
|
60
|
-
@heap[@size] = nil; # permit GC of objects
|
61
|
-
@size -= 1
|
62
|
-
down_heap() # adjust heap
|
63
|
-
return result
|
64
|
-
else
|
65
|
-
return nil
|
66
|
-
end
|
67
|
-
end
|
68
|
-
|
69
|
-
# Removes all entries from the PriorityQueue.
|
70
|
-
def clear()
|
71
|
-
(1..@size).each do |i|
|
72
|
-
@heap[i] = nil
|
73
|
-
end
|
74
|
-
@size = 0
|
75
|
-
end
|
76
|
-
|
77
|
-
def put_heap
|
78
|
-
puts @heap
|
79
|
-
end
|
80
|
-
|
81
|
-
# resets the queue after the top has been changed
|
82
|
-
def adjust_top()
|
83
|
-
down_heap()
|
84
|
-
end
|
85
|
-
|
86
|
-
private
|
87
|
-
|
88
|
-
def up_heap()
|
89
|
-
i = @size
|
90
|
-
node = @heap[i] # save bottom node
|
91
|
-
j = i >> 1
|
92
|
-
while (j > 0 and less_than(node, @heap[j]))
|
93
|
-
@heap[i] = @heap[j]; # shift parents down
|
94
|
-
i = j
|
95
|
-
j = j >> 1
|
96
|
-
end
|
97
|
-
@heap[i] = node; # install saved node
|
98
|
-
end
|
99
|
-
|
100
|
-
def down_heap()
|
101
|
-
i = 1
|
102
|
-
node = @heap[i] # save top node
|
103
|
-
j = i << 1 # find smaller child
|
104
|
-
k = j + 1
|
105
|
-
if k <= @size and less_than(@heap[k], @heap[j])
|
106
|
-
j = k
|
107
|
-
end
|
108
|
-
while (j <= @size and less_than(@heap[j], node))
|
109
|
-
@heap[i] = @heap[j] # shift up child
|
110
|
-
i = j
|
111
|
-
j = i << 1
|
112
|
-
k = j + 1
|
113
|
-
if k <= @size and less_than(@heap[k], @heap[j])
|
114
|
-
j = k
|
115
|
-
end
|
116
|
-
end
|
117
|
-
@heap[i] = node; # install saved node
|
118
|
-
end
|
119
|
-
end
|
120
|
-
end
|