ferret 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/MIT-LICENSE +20 -0
- data/README +109 -0
- data/Rakefile +275 -0
- data/TODO +9 -0
- data/TUTORIAL +197 -0
- data/ext/extconf.rb +3 -0
- data/ext/ferret.c +23 -0
- data/ext/ferret.h +85 -0
- data/ext/index_io.c +543 -0
- data/ext/priority_queue.c +227 -0
- data/ext/ram_directory.c +316 -0
- data/ext/segment_merge_queue.c +41 -0
- data/ext/string_helper.c +42 -0
- data/ext/tags +240 -0
- data/ext/term.c +261 -0
- data/ext/term_buffer.c +299 -0
- data/ext/util.c +12 -0
- data/lib/ferret.rb +41 -0
- data/lib/ferret/analysis.rb +11 -0
- data/lib/ferret/analysis/analyzers.rb +93 -0
- data/lib/ferret/analysis/standard_tokenizer.rb +65 -0
- data/lib/ferret/analysis/token.rb +79 -0
- data/lib/ferret/analysis/token_filters.rb +86 -0
- data/lib/ferret/analysis/token_stream.rb +26 -0
- data/lib/ferret/analysis/tokenizers.rb +107 -0
- data/lib/ferret/analysis/word_list_loader.rb +27 -0
- data/lib/ferret/document.rb +2 -0
- data/lib/ferret/document/document.rb +152 -0
- data/lib/ferret/document/field.rb +304 -0
- data/lib/ferret/index.rb +26 -0
- data/lib/ferret/index/compound_file_io.rb +343 -0
- data/lib/ferret/index/document_writer.rb +288 -0
- data/lib/ferret/index/field_infos.rb +259 -0
- data/lib/ferret/index/fields_io.rb +175 -0
- data/lib/ferret/index/index.rb +228 -0
- data/lib/ferret/index/index_file_names.rb +33 -0
- data/lib/ferret/index/index_reader.rb +462 -0
- data/lib/ferret/index/index_writer.rb +488 -0
- data/lib/ferret/index/multi_reader.rb +363 -0
- data/lib/ferret/index/multiple_term_doc_pos_enum.rb +105 -0
- data/lib/ferret/index/segment_infos.rb +130 -0
- data/lib/ferret/index/segment_merge_info.rb +47 -0
- data/lib/ferret/index/segment_merge_queue.rb +16 -0
- data/lib/ferret/index/segment_merger.rb +337 -0
- data/lib/ferret/index/segment_reader.rb +380 -0
- data/lib/ferret/index/segment_term_enum.rb +178 -0
- data/lib/ferret/index/segment_term_vector.rb +58 -0
- data/lib/ferret/index/term.rb +49 -0
- data/lib/ferret/index/term_buffer.rb +88 -0
- data/lib/ferret/index/term_doc_enum.rb +283 -0
- data/lib/ferret/index/term_enum.rb +52 -0
- data/lib/ferret/index/term_info.rb +41 -0
- data/lib/ferret/index/term_infos_io.rb +312 -0
- data/lib/ferret/index/term_vector_offset_info.rb +20 -0
- data/lib/ferret/index/term_vectors_io.rb +552 -0
- data/lib/ferret/query_parser.rb +274 -0
- data/lib/ferret/query_parser/query_parser.tab.rb +819 -0
- data/lib/ferret/search.rb +49 -0
- data/lib/ferret/search/boolean_clause.rb +100 -0
- data/lib/ferret/search/boolean_query.rb +303 -0
- data/lib/ferret/search/boolean_scorer.rb +294 -0
- data/lib/ferret/search/caching_wrapper_filter.rb +40 -0
- data/lib/ferret/search/conjunction_scorer.rb +99 -0
- data/lib/ferret/search/disjunction_sum_scorer.rb +203 -0
- data/lib/ferret/search/exact_phrase_scorer.rb +32 -0
- data/lib/ferret/search/explanation.rb +41 -0
- data/lib/ferret/search/field_cache.rb +216 -0
- data/lib/ferret/search/field_doc.rb +31 -0
- data/lib/ferret/search/field_sorted_hit_queue.rb +184 -0
- data/lib/ferret/search/filter.rb +11 -0
- data/lib/ferret/search/filtered_query.rb +130 -0
- data/lib/ferret/search/filtered_term_enum.rb +79 -0
- data/lib/ferret/search/fuzzy_query.rb +153 -0
- data/lib/ferret/search/fuzzy_term_enum.rb +244 -0
- data/lib/ferret/search/hit_collector.rb +34 -0
- data/lib/ferret/search/hit_queue.rb +11 -0
- data/lib/ferret/search/index_searcher.rb +173 -0
- data/lib/ferret/search/match_all_docs_query.rb +104 -0
- data/lib/ferret/search/multi_phrase_query.rb +204 -0
- data/lib/ferret/search/multi_term_query.rb +65 -0
- data/lib/ferret/search/non_matching_scorer.rb +22 -0
- data/lib/ferret/search/phrase_positions.rb +55 -0
- data/lib/ferret/search/phrase_query.rb +217 -0
- data/lib/ferret/search/phrase_scorer.rb +153 -0
- data/lib/ferret/search/prefix_query.rb +47 -0
- data/lib/ferret/search/query.rb +111 -0
- data/lib/ferret/search/query_filter.rb +51 -0
- data/lib/ferret/search/range_filter.rb +103 -0
- data/lib/ferret/search/range_query.rb +139 -0
- data/lib/ferret/search/req_excl_scorer.rb +125 -0
- data/lib/ferret/search/req_opt_sum_scorer.rb +70 -0
- data/lib/ferret/search/score_doc.rb +38 -0
- data/lib/ferret/search/score_doc_comparator.rb +114 -0
- data/lib/ferret/search/scorer.rb +91 -0
- data/lib/ferret/search/similarity.rb +278 -0
- data/lib/ferret/search/sloppy_phrase_scorer.rb +47 -0
- data/lib/ferret/search/sort.rb +105 -0
- data/lib/ferret/search/sort_comparator.rb +60 -0
- data/lib/ferret/search/sort_field.rb +87 -0
- data/lib/ferret/search/spans.rb +12 -0
- data/lib/ferret/search/spans/near_spans_enum.rb +304 -0
- data/lib/ferret/search/spans/span_first_query.rb +79 -0
- data/lib/ferret/search/spans/span_near_query.rb +108 -0
- data/lib/ferret/search/spans/span_not_query.rb +130 -0
- data/lib/ferret/search/spans/span_or_query.rb +176 -0
- data/lib/ferret/search/spans/span_query.rb +25 -0
- data/lib/ferret/search/spans/span_scorer.rb +74 -0
- data/lib/ferret/search/spans/span_term_query.rb +105 -0
- data/lib/ferret/search/spans/span_weight.rb +84 -0
- data/lib/ferret/search/spans/spans_enum.rb +44 -0
- data/lib/ferret/search/term_query.rb +128 -0
- data/lib/ferret/search/term_scorer.rb +181 -0
- data/lib/ferret/search/top_docs.rb +24 -0
- data/lib/ferret/search/top_field_docs.rb +17 -0
- data/lib/ferret/search/weight.rb +54 -0
- data/lib/ferret/search/wildcard_query.rb +26 -0
- data/lib/ferret/search/wildcard_term_enum.rb +61 -0
- data/lib/ferret/stemmers.rb +1 -0
- data/lib/ferret/stemmers/porter_stemmer.rb +218 -0
- data/lib/ferret/store.rb +5 -0
- data/lib/ferret/store/buffered_index_io.rb +191 -0
- data/lib/ferret/store/directory.rb +139 -0
- data/lib/ferret/store/fs_store.rb +338 -0
- data/lib/ferret/store/index_io.rb +259 -0
- data/lib/ferret/store/ram_store.rb +282 -0
- data/lib/ferret/utils.rb +7 -0
- data/lib/ferret/utils/bit_vector.rb +105 -0
- data/lib/ferret/utils/date_tools.rb +138 -0
- data/lib/ferret/utils/number_tools.rb +91 -0
- data/lib/ferret/utils/parameter.rb +41 -0
- data/lib/ferret/utils/priority_queue.rb +120 -0
- data/lib/ferret/utils/string_helper.rb +47 -0
- data/lib/ferret/utils/weak_key_hash.rb +51 -0
- data/rake_utils/code_statistics.rb +106 -0
- data/setup.rb +1551 -0
- data/test/benchmark/tb_ram_store.rb +76 -0
- data/test/benchmark/tb_rw_vint.rb +26 -0
- data/test/longrunning/tc_numbertools.rb +60 -0
- data/test/longrunning/tm_store.rb +19 -0
- data/test/test_all.rb +9 -0
- data/test/test_helper.rb +6 -0
- data/test/unit/analysis/tc_analyzer.rb +21 -0
- data/test/unit/analysis/tc_letter_tokenizer.rb +20 -0
- data/test/unit/analysis/tc_lower_case_filter.rb +20 -0
- data/test/unit/analysis/tc_lower_case_tokenizer.rb +27 -0
- data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +39 -0
- data/test/unit/analysis/tc_porter_stem_filter.rb +16 -0
- data/test/unit/analysis/tc_standard_analyzer.rb +20 -0
- data/test/unit/analysis/tc_standard_tokenizer.rb +20 -0
- data/test/unit/analysis/tc_stop_analyzer.rb +20 -0
- data/test/unit/analysis/tc_stop_filter.rb +14 -0
- data/test/unit/analysis/tc_white_space_analyzer.rb +21 -0
- data/test/unit/analysis/tc_white_space_tokenizer.rb +20 -0
- data/test/unit/analysis/tc_word_list_loader.rb +32 -0
- data/test/unit/document/tc_document.rb +47 -0
- data/test/unit/document/tc_field.rb +80 -0
- data/test/unit/index/tc_compound_file_io.rb +107 -0
- data/test/unit/index/tc_field_infos.rb +119 -0
- data/test/unit/index/tc_fields_io.rb +167 -0
- data/test/unit/index/tc_index.rb +140 -0
- data/test/unit/index/tc_index_reader.rb +622 -0
- data/test/unit/index/tc_index_writer.rb +57 -0
- data/test/unit/index/tc_multiple_term_doc_pos_enum.rb +80 -0
- data/test/unit/index/tc_segment_infos.rb +74 -0
- data/test/unit/index/tc_segment_term_docs.rb +17 -0
- data/test/unit/index/tc_segment_term_enum.rb +60 -0
- data/test/unit/index/tc_segment_term_vector.rb +71 -0
- data/test/unit/index/tc_term.rb +22 -0
- data/test/unit/index/tc_term_buffer.rb +57 -0
- data/test/unit/index/tc_term_info.rb +19 -0
- data/test/unit/index/tc_term_infos_io.rb +192 -0
- data/test/unit/index/tc_term_vector_offset_info.rb +18 -0
- data/test/unit/index/tc_term_vectors_io.rb +108 -0
- data/test/unit/index/th_doc.rb +244 -0
- data/test/unit/query_parser/tc_query_parser.rb +84 -0
- data/test/unit/search/tc_filter.rb +113 -0
- data/test/unit/search/tc_fuzzy_query.rb +136 -0
- data/test/unit/search/tc_index_searcher.rb +188 -0
- data/test/unit/search/tc_search_and_sort.rb +98 -0
- data/test/unit/search/tc_similarity.rb +37 -0
- data/test/unit/search/tc_sort.rb +48 -0
- data/test/unit/search/tc_sort_field.rb +27 -0
- data/test/unit/search/tc_spans.rb +153 -0
- data/test/unit/store/tc_fs_store.rb +84 -0
- data/test/unit/store/tc_ram_store.rb +35 -0
- data/test/unit/store/tm_store.rb +180 -0
- data/test/unit/store/tm_store_lock.rb +68 -0
- data/test/unit/ts_analysis.rb +16 -0
- data/test/unit/ts_document.rb +4 -0
- data/test/unit/ts_index.rb +18 -0
- data/test/unit/ts_query_parser.rb +3 -0
- data/test/unit/ts_search.rb +10 -0
- data/test/unit/ts_store.rb +6 -0
- data/test/unit/ts_utils.rb +10 -0
- data/test/unit/utils/tc_bit_vector.rb +65 -0
- data/test/unit/utils/tc_date_tools.rb +50 -0
- data/test/unit/utils/tc_number_tools.rb +59 -0
- data/test/unit/utils/tc_parameter.rb +40 -0
- data/test/unit/utils/tc_priority_queue.rb +62 -0
- data/test/unit/utils/tc_string_helper.rb +21 -0
- data/test/unit/utils/tc_weak_key_hash.rb +25 -0
- metadata +251 -0
@@ -0,0 +1,35 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../../test_helper"
|
2
|
+
require File.dirname(__FILE__) + "/tm_store"
|
3
|
+
require File.dirname(__FILE__) + "/tm_store_lock"
|
4
|
+
|
5
|
+
class RAMStoreTest < Test::Unit::TestCase
|
6
|
+
include StoreTest
|
7
|
+
include StoreLockTest
|
8
|
+
def setup
|
9
|
+
@dir = Ferret::Store::RAMDirectory.new
|
10
|
+
end
|
11
|
+
|
12
|
+
def teardown
|
13
|
+
@dir.close()
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_ramlock
|
17
|
+
name = "lfile"
|
18
|
+
lfile = "rubylock-" + name
|
19
|
+
assert(! @dir.exists?(lfile),
|
20
|
+
"There should be no lock file")
|
21
|
+
lock = @dir.make_lock(name)
|
22
|
+
assert(! @dir.exists?(lfile),
|
23
|
+
"There should still be no lock file")
|
24
|
+
assert(! @dir.exists?(lfile),
|
25
|
+
"The lock should be hidden by the FSDirectories directory scan")
|
26
|
+
assert(! lock.locked?, "lock shouldn't be locked yet")
|
27
|
+
lock.obtain
|
28
|
+
assert(lock.locked?, "lock should now be locked")
|
29
|
+
assert(@dir.exists?(lfile), "A lock file should have been created")
|
30
|
+
lock.release
|
31
|
+
assert(! lock.locked?, "lock should be freed again")
|
32
|
+
assert(! @dir.exists?(lfile),
|
33
|
+
"The lock file should have been deleted")
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,180 @@
|
|
1
|
+
module StoreTest
|
2
|
+
# declare dir so inheritors can access it.
|
3
|
+
@dir = nil
|
4
|
+
|
5
|
+
# test the basic file manipulation methods;
|
6
|
+
# - exists?
|
7
|
+
# - touch
|
8
|
+
# - delete
|
9
|
+
# - file_count
|
10
|
+
def test_basic_file_ops
|
11
|
+
assert_equal(0, @dir.file_count(), "directory should be empty")
|
12
|
+
assert(! @dir.exists?('filename'), "File should not exist")
|
13
|
+
@dir.touch('tmpfile1')
|
14
|
+
assert_equal(1, @dir.file_count(), "directory should have one file")
|
15
|
+
@dir.touch('tmpfile2')
|
16
|
+
assert_equal(2, @dir.file_count(), "directory should have two files")
|
17
|
+
assert(@dir.exists?('tmpfile1'), "'tmpfile1' should exist")
|
18
|
+
@dir.delete('tmpfile1')
|
19
|
+
assert(! @dir.exists?('tmpfile1'), "'tmpfile1' should no longer exist")
|
20
|
+
assert_equal(1, @dir.file_count(), "directory should have one file")
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_rename
|
24
|
+
@dir.touch("from")
|
25
|
+
assert(@dir.exists?('from'), "File should exist")
|
26
|
+
assert(! @dir.exists?('to'), "File should not exist")
|
27
|
+
cnt_before = @dir.file_count()
|
28
|
+
@dir.rename('from', 'to')
|
29
|
+
cnt_after = @dir.file_count()
|
30
|
+
assert_equal(cnt_before, cnt_after, "the number of files shouldn't have changed")
|
31
|
+
assert(@dir.exists?('to'), "File should now exist")
|
32
|
+
assert(! @dir.exists?('from'), "File should no longer exist")
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_modified
|
36
|
+
# difficult to test this one but as file mtime is only stored to the nearest second.
|
37
|
+
# we can assume this test will happen in less than a few seconds. (I hope)
|
38
|
+
time = Time.new.to_i
|
39
|
+
@dir.touch('mtime.test')
|
40
|
+
time_before = @dir.modified('mtime.test').to_i
|
41
|
+
assert(time_before - time <= 3,
|
42
|
+
"test that mtime is approximately equal to the system time when the file was touched")
|
43
|
+
end
|
44
|
+
|
45
|
+
def test_rw_bytes
|
46
|
+
bytes = [0x34, 0x87, 0xF9, 0xEA, 0x00, 0xFF]
|
47
|
+
rw_test(bytes, "byte", 6)
|
48
|
+
end
|
49
|
+
|
50
|
+
def test_rw_ints
|
51
|
+
ints = [-2147483648, 2147483647, -1, 0]
|
52
|
+
rw_test(ints, "int", 16)
|
53
|
+
end
|
54
|
+
|
55
|
+
def test_rw_longs
|
56
|
+
longs = [-9223372036854775808, 9223372036854775807, -1, 0]
|
57
|
+
rw_test(longs, "long", 32)
|
58
|
+
end
|
59
|
+
|
60
|
+
def test_rw_uints
|
61
|
+
uints = [0xffffffff, 100000, 0]
|
62
|
+
rw_test(uints, "uint", 12)
|
63
|
+
end
|
64
|
+
|
65
|
+
def test_rw_ulongs
|
66
|
+
ulongs = [0xffffffffffffffff, 100000000000000, 0]
|
67
|
+
rw_test(ulongs, "ulong", 24)
|
68
|
+
end
|
69
|
+
|
70
|
+
def test_rw_vints
|
71
|
+
vints = [ 9223372036854775807,
|
72
|
+
0x00,
|
73
|
+
0xFFFFFFFFFFFFFFFF]
|
74
|
+
rw_test(vints, "vint", 20)
|
75
|
+
end
|
76
|
+
|
77
|
+
def test_rw_vlongs
|
78
|
+
vlongs = [ 9223372036854775807,
|
79
|
+
0x00,
|
80
|
+
0xFFFFFFFFFFFFFFFF]
|
81
|
+
rw_test(vlongs, "vlong", 20)
|
82
|
+
end
|
83
|
+
|
84
|
+
def test_rw_strings
|
85
|
+
text = 'This is a ruby ferret test string ~!@#$%^&*()`123456790-=\)_+|'
|
86
|
+
ostream = @dir.create_output("rw_strings.test")
|
87
|
+
ostream.write_string(text)
|
88
|
+
ostream.write_string(text*100)
|
89
|
+
ostream.close
|
90
|
+
istream = @dir.open_input("rw_strings.test")
|
91
|
+
assert_equal(text, istream.read_string, "Short string test failed")
|
92
|
+
assert_equal(text*100, istream.read_string, "Short string test failed")
|
93
|
+
istream.close
|
94
|
+
assert_equal(6265, @dir.length('rw_strings.test'))
|
95
|
+
end
|
96
|
+
|
97
|
+
def test_rw_utf8_strings
|
98
|
+
text = '�� ������'
|
99
|
+
ostream = @dir.create_output("rw_utf8_strings.test")
|
100
|
+
ostream.write_string(text)
|
101
|
+
ostream.write_string(text*100)
|
102
|
+
ostream.close
|
103
|
+
istream = @dir.open_input("rw_utf8_strings.test")
|
104
|
+
assert_equal(text, istream.read_string, "Short string test failed")
|
105
|
+
assert_equal(text*100, istream.read_string, "Short string test failed")
|
106
|
+
istream.close
|
107
|
+
end
|
108
|
+
|
109
|
+
# this test fills up the output stream so that the buffer will have to be
|
110
|
+
# written a few times. It then uses seek to make sure that it works
|
111
|
+
# correctly
|
112
|
+
def test_buffer_seek
|
113
|
+
ostream = @dir.create_output("rw_seek.test")
|
114
|
+
text = 'This is another long test string !@#$%#$%&%$*%^&*()(_'
|
115
|
+
1000.times {|i| ostream.write_long(i); ostream.write_string(text) }
|
116
|
+
ostream.seek(987)
|
117
|
+
assert_equal(987, ostream.pos)
|
118
|
+
ostream.write_vint(555)
|
119
|
+
ostream.seek(56)
|
120
|
+
assert_equal(56, ostream.pos)
|
121
|
+
ostream.write_vint(1234567890)
|
122
|
+
ostream.seek(4000)
|
123
|
+
assert_equal(4000, ostream.pos)
|
124
|
+
ostream.write_vint(9876543210)
|
125
|
+
ostream.close()
|
126
|
+
istream = @dir.open_input("rw_seek.test")
|
127
|
+
istream.seek(56)
|
128
|
+
assert_equal(56, istream.pos)
|
129
|
+
assert_equal(1234567890, istream.read_vint())
|
130
|
+
istream.seek(4000)
|
131
|
+
assert_equal(4000, istream.pos)
|
132
|
+
assert_equal(9876543210, istream.read_vint())
|
133
|
+
istream.seek(987)
|
134
|
+
assert_equal(987, istream.pos)
|
135
|
+
assert_equal(555, istream.read_vint())
|
136
|
+
istream.close()
|
137
|
+
end
|
138
|
+
|
139
|
+
def test_clone
|
140
|
+
ostream = @dir.create_output("clone_test")
|
141
|
+
10.times {|i| ostream.write_long(i) }
|
142
|
+
ostream.close
|
143
|
+
istream = @dir.open_input("clone_test")
|
144
|
+
istream.seek(24)
|
145
|
+
alt_istream = istream.clone
|
146
|
+
assert_equal(istream.pos, alt_istream.pos)
|
147
|
+
(3...10).each {|i| assert_equal(i, alt_istream.read_long) }
|
148
|
+
assert_equal(80, alt_istream.pos)
|
149
|
+
assert_equal(24, istream.pos)
|
150
|
+
alt_istream.close
|
151
|
+
(3...10).each {|i| assert_equal(i, istream.read_long) }
|
152
|
+
istream.close
|
153
|
+
end
|
154
|
+
|
155
|
+
def test_read_bytes
|
156
|
+
str = "0000000000"
|
157
|
+
ostream = @dir.create_output("rw_read_bytes")
|
158
|
+
ostream.write_bytes("how are you doing?", 18)
|
159
|
+
ostream.close
|
160
|
+
istream = @dir.open_input("rw_read_bytes")
|
161
|
+
istream.read_bytes(str, 2, 4)
|
162
|
+
assert_equal("00how 0000", str)
|
163
|
+
istream.read_bytes(str, 1, 8)
|
164
|
+
assert_equal("0are you 0", str)
|
165
|
+
istream.close
|
166
|
+
end
|
167
|
+
|
168
|
+
private
|
169
|
+
|
170
|
+
def rw_test(values, type, expected_length)
|
171
|
+
ostream = @dir.create_output("rw_#{type}.test")
|
172
|
+
values.each { |b| ostream.__send__("write_" + type, b) }
|
173
|
+
ostream.close
|
174
|
+
istream = @dir.open_input("rw_#{type}.test")
|
175
|
+
values.each { |b| assert_equal(b, istream.__send__("read_" + type), "#{type} should be equal") }
|
176
|
+
istream.close
|
177
|
+
assert_equal(expected_length, @dir.length("rw_#{type}.test"))
|
178
|
+
end
|
179
|
+
|
180
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
module StoreLockTest
|
2
|
+
class Switch
|
3
|
+
@@counter = 0
|
4
|
+
def Switch.counter() return @@counter end
|
5
|
+
def Switch.counter=(counter) @@counter = counter end
|
6
|
+
end
|
7
|
+
|
8
|
+
def test_locking()
|
9
|
+
lock_time_out = 0.001 # we want this test to run quickly
|
10
|
+
lock1 = @dir.make_lock("l.lck")
|
11
|
+
lock2 = @dir.make_lock("l.lck")
|
12
|
+
|
13
|
+
assert(!lock2.locked?)
|
14
|
+
assert(lock1.obtain(lock_time_out))
|
15
|
+
assert(lock2.locked?)
|
16
|
+
|
17
|
+
assert(! obtain_lock_true_false(lock2))
|
18
|
+
|
19
|
+
exception_thrown = false
|
20
|
+
begin
|
21
|
+
lock2.while_locked(lock_time_out) do
|
22
|
+
assert(false, "lock should not have been obtained")
|
23
|
+
end
|
24
|
+
rescue
|
25
|
+
exception_thrown = true
|
26
|
+
ensure
|
27
|
+
assert(exception_thrown)
|
28
|
+
end
|
29
|
+
|
30
|
+
lock1.release()
|
31
|
+
assert(lock2.obtain(lock_time_out))
|
32
|
+
lock2.release()
|
33
|
+
|
34
|
+
t = Thread.new() do
|
35
|
+
lock1.while_locked(lock_time_out) do
|
36
|
+
Switch.counter = 1
|
37
|
+
# make sure lock2 obtain test was run
|
38
|
+
while Switch.counter < 2
|
39
|
+
end
|
40
|
+
Switch.counter = 3
|
41
|
+
end
|
42
|
+
end
|
43
|
+
t.run()
|
44
|
+
|
45
|
+
#make sure thread has started and lock been obtained
|
46
|
+
while Switch.counter < 1
|
47
|
+
end
|
48
|
+
|
49
|
+
assert(! obtain_lock_true_false(lock2))
|
50
|
+
|
51
|
+
Switch.counter = 2
|
52
|
+
while Switch.counter < 3
|
53
|
+
end
|
54
|
+
|
55
|
+
assert(lock2.obtain(lock_time_out))
|
56
|
+
lock2.release()
|
57
|
+
end
|
58
|
+
|
59
|
+
def obtain_lock_true_false(lock)
|
60
|
+
lock_time_out = 0.001 # we want this test to run quickly
|
61
|
+
begin
|
62
|
+
lock.obtain(lock_time_out)
|
63
|
+
return true
|
64
|
+
rescue
|
65
|
+
end
|
66
|
+
return false
|
67
|
+
end
|
68
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
$:.unshift File.dirname(__FILE__)
|
2
|
+
require File.join(File.dirname(__FILE__), "../test_helper.rb")
|
3
|
+
|
4
|
+
require 'analysis/tc_letter_tokenizer'
|
5
|
+
require 'analysis/tc_white_space_tokenizer'
|
6
|
+
require 'analysis/tc_lower_case_tokenizer'
|
7
|
+
require 'analysis/tc_word_list_loader'
|
8
|
+
require 'analysis/tc_lower_case_filter'
|
9
|
+
require 'analysis/tc_stop_filter'
|
10
|
+
require 'analysis/tc_porter_stem_filter'
|
11
|
+
require 'analysis/tc_analyzer'
|
12
|
+
require 'analysis/tc_stop_analyzer'
|
13
|
+
require 'analysis/tc_white_space_analyzer'
|
14
|
+
require 'analysis/tc_per_field_analyzer_wrapper'
|
15
|
+
require 'analysis/tc_standard_tokenizer'
|
16
|
+
require 'analysis/tc_standard_analyzer'
|
@@ -0,0 +1,18 @@
|
|
1
|
+
$:.unshift File.dirname(__FILE__)
|
2
|
+
|
3
|
+
require 'index/tc_compound_file_io.rb'
|
4
|
+
require 'index/tc_field_infos.rb'
|
5
|
+
require 'index/tc_fields_io.rb'
|
6
|
+
require 'index/tc_index_writer.rb'
|
7
|
+
require 'index/tc_index_reader.rb'
|
8
|
+
require 'index/tc_segment_infos.rb'
|
9
|
+
require 'index/tc_segment_term_docs.rb'
|
10
|
+
require 'index/tc_segment_term_enum.rb'
|
11
|
+
require 'index/tc_segment_term_vector.rb'
|
12
|
+
require 'index/tc_term.rb'
|
13
|
+
require 'index/tc_term_buffer.rb'
|
14
|
+
require 'index/tc_term_info.rb'
|
15
|
+
require 'index/tc_term_infos_io.rb'
|
16
|
+
require 'index/tc_term_vector_offset_info.rb'
|
17
|
+
require 'index/tc_term_vectors_io.rb'
|
18
|
+
require 'index/tc_index.rb'
|
@@ -0,0 +1,10 @@
|
|
1
|
+
$:.unshift File.dirname(__FILE__)
|
2
|
+
|
3
|
+
require 'search/tc_similarity.rb'
|
4
|
+
require 'search/tc_index_searcher.rb'
|
5
|
+
require 'search/tc_fuzzy_query.rb'
|
6
|
+
require 'search/tc_sort_field.rb'
|
7
|
+
require 'search/tc_sort.rb'
|
8
|
+
require 'search/tc_search_and_sort.rb'
|
9
|
+
require 'search/tc_filter.rb'
|
10
|
+
require 'search/tc_spans.rb'
|
@@ -0,0 +1,10 @@
|
|
1
|
+
$:.unshift File.dirname(__FILE__)
|
2
|
+
require File.join(File.dirname(__FILE__), "../test_helper.rb")
|
3
|
+
|
4
|
+
require 'utils/tc_string_helper'
|
5
|
+
require 'utils/tc_priority_queue'
|
6
|
+
require 'utils/tc_bit_vector'
|
7
|
+
require 'utils/tc_date_tools.rb'
|
8
|
+
require 'utils/tc_number_tools.rb'
|
9
|
+
require 'utils/tc_parameter.rb'
|
10
|
+
require 'utils/tc_weak_key_hash.rb'
|
@@ -0,0 +1,65 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../../test_helper"
|
2
|
+
|
3
|
+
|
4
|
+
class BitVectorTest < Test::Unit::TestCase
|
5
|
+
include Ferret::Utils
|
6
|
+
|
7
|
+
def test_bv()
|
8
|
+
bv = BitVector.new
|
9
|
+
assert_equal(0, bv.count)
|
10
|
+
bv.set(10)
|
11
|
+
assert(bv.get(10))
|
12
|
+
assert_equal(1, bv.count)
|
13
|
+
bv.set(10)
|
14
|
+
assert(bv.get(10))
|
15
|
+
assert_equal(1, bv.count)
|
16
|
+
bv.set(20)
|
17
|
+
assert(bv.get(20))
|
18
|
+
assert_equal(2, bv.count)
|
19
|
+
bv.set(21)
|
20
|
+
assert(bv.get(21))
|
21
|
+
assert_equal(3, bv.count)
|
22
|
+
bv.clear(21)
|
23
|
+
assert(!bv.get(21))
|
24
|
+
assert_equal(2, bv.count)
|
25
|
+
bv.clear(20)
|
26
|
+
assert(!bv.get(20))
|
27
|
+
assert_equal(1, bv.count)
|
28
|
+
assert(bv.get(10))
|
29
|
+
end
|
30
|
+
|
31
|
+
def test_bv_rw()
|
32
|
+
dir = Ferret::Store::RAMDirectory.new
|
33
|
+
bv = BitVector.new
|
34
|
+
assert_equal(0, bv.count)
|
35
|
+
bv.set(5)
|
36
|
+
assert_equal(1, bv.count)
|
37
|
+
bv.set(8)
|
38
|
+
assert_equal(2, bv.count)
|
39
|
+
bv.set(13)
|
40
|
+
assert_equal(3, bv.count)
|
41
|
+
bv.set(21)
|
42
|
+
assert_equal(4, bv.count)
|
43
|
+
bv.set(34)
|
44
|
+
assert_equal(5, bv.count)
|
45
|
+
bv.write(dir, "bv.test")
|
46
|
+
bv = nil
|
47
|
+
bv = BitVector.read(dir, "bv.test")
|
48
|
+
assert(!bv.get(4))
|
49
|
+
assert(bv.get(5))
|
50
|
+
assert(!bv.get(6))
|
51
|
+
assert(!bv.get(7))
|
52
|
+
assert(bv.get(8))
|
53
|
+
assert(!bv.get(9))
|
54
|
+
assert(!bv.get(12))
|
55
|
+
assert(bv.get(13))
|
56
|
+
assert(!bv.get(14))
|
57
|
+
assert(!bv.get(20))
|
58
|
+
assert(bv.get(21))
|
59
|
+
assert(!bv.get(22))
|
60
|
+
assert(!bv.get(33))
|
61
|
+
assert(bv.get(34))
|
62
|
+
assert(!bv.get(35))
|
63
|
+
assert_equal(5, bv.count)
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../../test_helper"
|
2
|
+
|
3
|
+
|
4
|
+
class DateToolsTest < Test::Unit::TestCase
|
5
|
+
include Ferret::Utils
|
6
|
+
|
7
|
+
def test_serialization()
|
8
|
+
# grab time to the nearest millisecond
|
9
|
+
t = Time.at((Time.now().to_i*1000).floor()/1000)
|
10
|
+
|
11
|
+
s = DateTools.serialize_time(t)
|
12
|
+
|
13
|
+
t_after = DateTools.deserialize_time(s)
|
14
|
+
assert_equal(t, t_after, "date changed after serialization")
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_serialization_constants()
|
18
|
+
# assert existance of these constants
|
19
|
+
assert(DateTools::MAX_SERIALIZED_DATE_STRING)
|
20
|
+
assert(DateTools::MIN_SERIALIZED_DATE_STRING)
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_time_to_s()
|
24
|
+
t = Time.mktime(2004, 9, 5, 22, 33, 44, 555000)
|
25
|
+
|
26
|
+
assert_equal("2004", DateTools.time_to_s(t, DateTools::Resolution::YEAR))
|
27
|
+
assert_equal("200409", DateTools.time_to_s(t, DateTools::Resolution::MONTH))
|
28
|
+
assert_equal("20040905", DateTools.time_to_s(t, DateTools::Resolution::DAY))
|
29
|
+
assert_equal("2004090522", DateTools.time_to_s(t, DateTools::Resolution::HOUR))
|
30
|
+
assert_equal("200409052233", DateTools.time_to_s(t, DateTools::Resolution::MINUTE))
|
31
|
+
assert_equal("20040905223344", DateTools.time_to_s(t, DateTools::Resolution::SECOND))
|
32
|
+
assert_equal("20040905223344555", DateTools.time_to_s(t, DateTools::Resolution::MILLISECOND))
|
33
|
+
end
|
34
|
+
|
35
|
+
def test_s_to_time()
|
36
|
+
assert_equal(Time.mktime(2004), DateTools.s_to_time("2004"))
|
37
|
+
assert_equal(Time.mktime(2004, 9), DateTools.s_to_time("200409"))
|
38
|
+
assert_equal(Time.mktime(2004, 9, 5), DateTools.s_to_time("20040905"))
|
39
|
+
assert_equal(Time.mktime(2004, 9, 5, 22), DateTools.s_to_time("2004090522"))
|
40
|
+
assert_equal(Time.mktime(2004, 9, 5, 22, 33), DateTools.s_to_time("200409052233"))
|
41
|
+
assert_equal(Time.mktime(2004, 9, 5, 22, 33, 44), DateTools.s_to_time("20040905223344"))
|
42
|
+
assert_equal(Time.mktime(2004, 9, 5, 22, 33, 44, 555000), DateTools.s_to_time("20040905223344555"))
|
43
|
+
end
|
44
|
+
|
45
|
+
def test_round()
|
46
|
+
t = Time.mktime(2004, 9, 5, 22, 33, 44, 555000)
|
47
|
+
assert_equal(Time.mktime(2004, 9, 5), DateTools.round(t, DateTools::Resolution::DAY))
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|