ferret 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (202) hide show
  1. data/MIT-LICENSE +20 -0
  2. data/README +109 -0
  3. data/Rakefile +275 -0
  4. data/TODO +9 -0
  5. data/TUTORIAL +197 -0
  6. data/ext/extconf.rb +3 -0
  7. data/ext/ferret.c +23 -0
  8. data/ext/ferret.h +85 -0
  9. data/ext/index_io.c +543 -0
  10. data/ext/priority_queue.c +227 -0
  11. data/ext/ram_directory.c +316 -0
  12. data/ext/segment_merge_queue.c +41 -0
  13. data/ext/string_helper.c +42 -0
  14. data/ext/tags +240 -0
  15. data/ext/term.c +261 -0
  16. data/ext/term_buffer.c +299 -0
  17. data/ext/util.c +12 -0
  18. data/lib/ferret.rb +41 -0
  19. data/lib/ferret/analysis.rb +11 -0
  20. data/lib/ferret/analysis/analyzers.rb +93 -0
  21. data/lib/ferret/analysis/standard_tokenizer.rb +65 -0
  22. data/lib/ferret/analysis/token.rb +79 -0
  23. data/lib/ferret/analysis/token_filters.rb +86 -0
  24. data/lib/ferret/analysis/token_stream.rb +26 -0
  25. data/lib/ferret/analysis/tokenizers.rb +107 -0
  26. data/lib/ferret/analysis/word_list_loader.rb +27 -0
  27. data/lib/ferret/document.rb +2 -0
  28. data/lib/ferret/document/document.rb +152 -0
  29. data/lib/ferret/document/field.rb +304 -0
  30. data/lib/ferret/index.rb +26 -0
  31. data/lib/ferret/index/compound_file_io.rb +343 -0
  32. data/lib/ferret/index/document_writer.rb +288 -0
  33. data/lib/ferret/index/field_infos.rb +259 -0
  34. data/lib/ferret/index/fields_io.rb +175 -0
  35. data/lib/ferret/index/index.rb +228 -0
  36. data/lib/ferret/index/index_file_names.rb +33 -0
  37. data/lib/ferret/index/index_reader.rb +462 -0
  38. data/lib/ferret/index/index_writer.rb +488 -0
  39. data/lib/ferret/index/multi_reader.rb +363 -0
  40. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +105 -0
  41. data/lib/ferret/index/segment_infos.rb +130 -0
  42. data/lib/ferret/index/segment_merge_info.rb +47 -0
  43. data/lib/ferret/index/segment_merge_queue.rb +16 -0
  44. data/lib/ferret/index/segment_merger.rb +337 -0
  45. data/lib/ferret/index/segment_reader.rb +380 -0
  46. data/lib/ferret/index/segment_term_enum.rb +178 -0
  47. data/lib/ferret/index/segment_term_vector.rb +58 -0
  48. data/lib/ferret/index/term.rb +49 -0
  49. data/lib/ferret/index/term_buffer.rb +88 -0
  50. data/lib/ferret/index/term_doc_enum.rb +283 -0
  51. data/lib/ferret/index/term_enum.rb +52 -0
  52. data/lib/ferret/index/term_info.rb +41 -0
  53. data/lib/ferret/index/term_infos_io.rb +312 -0
  54. data/lib/ferret/index/term_vector_offset_info.rb +20 -0
  55. data/lib/ferret/index/term_vectors_io.rb +552 -0
  56. data/lib/ferret/query_parser.rb +274 -0
  57. data/lib/ferret/query_parser/query_parser.tab.rb +819 -0
  58. data/lib/ferret/search.rb +49 -0
  59. data/lib/ferret/search/boolean_clause.rb +100 -0
  60. data/lib/ferret/search/boolean_query.rb +303 -0
  61. data/lib/ferret/search/boolean_scorer.rb +294 -0
  62. data/lib/ferret/search/caching_wrapper_filter.rb +40 -0
  63. data/lib/ferret/search/conjunction_scorer.rb +99 -0
  64. data/lib/ferret/search/disjunction_sum_scorer.rb +203 -0
  65. data/lib/ferret/search/exact_phrase_scorer.rb +32 -0
  66. data/lib/ferret/search/explanation.rb +41 -0
  67. data/lib/ferret/search/field_cache.rb +216 -0
  68. data/lib/ferret/search/field_doc.rb +31 -0
  69. data/lib/ferret/search/field_sorted_hit_queue.rb +184 -0
  70. data/lib/ferret/search/filter.rb +11 -0
  71. data/lib/ferret/search/filtered_query.rb +130 -0
  72. data/lib/ferret/search/filtered_term_enum.rb +79 -0
  73. data/lib/ferret/search/fuzzy_query.rb +153 -0
  74. data/lib/ferret/search/fuzzy_term_enum.rb +244 -0
  75. data/lib/ferret/search/hit_collector.rb +34 -0
  76. data/lib/ferret/search/hit_queue.rb +11 -0
  77. data/lib/ferret/search/index_searcher.rb +173 -0
  78. data/lib/ferret/search/match_all_docs_query.rb +104 -0
  79. data/lib/ferret/search/multi_phrase_query.rb +204 -0
  80. data/lib/ferret/search/multi_term_query.rb +65 -0
  81. data/lib/ferret/search/non_matching_scorer.rb +22 -0
  82. data/lib/ferret/search/phrase_positions.rb +55 -0
  83. data/lib/ferret/search/phrase_query.rb +217 -0
  84. data/lib/ferret/search/phrase_scorer.rb +153 -0
  85. data/lib/ferret/search/prefix_query.rb +47 -0
  86. data/lib/ferret/search/query.rb +111 -0
  87. data/lib/ferret/search/query_filter.rb +51 -0
  88. data/lib/ferret/search/range_filter.rb +103 -0
  89. data/lib/ferret/search/range_query.rb +139 -0
  90. data/lib/ferret/search/req_excl_scorer.rb +125 -0
  91. data/lib/ferret/search/req_opt_sum_scorer.rb +70 -0
  92. data/lib/ferret/search/score_doc.rb +38 -0
  93. data/lib/ferret/search/score_doc_comparator.rb +114 -0
  94. data/lib/ferret/search/scorer.rb +91 -0
  95. data/lib/ferret/search/similarity.rb +278 -0
  96. data/lib/ferret/search/sloppy_phrase_scorer.rb +47 -0
  97. data/lib/ferret/search/sort.rb +105 -0
  98. data/lib/ferret/search/sort_comparator.rb +60 -0
  99. data/lib/ferret/search/sort_field.rb +87 -0
  100. data/lib/ferret/search/spans.rb +12 -0
  101. data/lib/ferret/search/spans/near_spans_enum.rb +304 -0
  102. data/lib/ferret/search/spans/span_first_query.rb +79 -0
  103. data/lib/ferret/search/spans/span_near_query.rb +108 -0
  104. data/lib/ferret/search/spans/span_not_query.rb +130 -0
  105. data/lib/ferret/search/spans/span_or_query.rb +176 -0
  106. data/lib/ferret/search/spans/span_query.rb +25 -0
  107. data/lib/ferret/search/spans/span_scorer.rb +74 -0
  108. data/lib/ferret/search/spans/span_term_query.rb +105 -0
  109. data/lib/ferret/search/spans/span_weight.rb +84 -0
  110. data/lib/ferret/search/spans/spans_enum.rb +44 -0
  111. data/lib/ferret/search/term_query.rb +128 -0
  112. data/lib/ferret/search/term_scorer.rb +181 -0
  113. data/lib/ferret/search/top_docs.rb +24 -0
  114. data/lib/ferret/search/top_field_docs.rb +17 -0
  115. data/lib/ferret/search/weight.rb +54 -0
  116. data/lib/ferret/search/wildcard_query.rb +26 -0
  117. data/lib/ferret/search/wildcard_term_enum.rb +61 -0
  118. data/lib/ferret/stemmers.rb +1 -0
  119. data/lib/ferret/stemmers/porter_stemmer.rb +218 -0
  120. data/lib/ferret/store.rb +5 -0
  121. data/lib/ferret/store/buffered_index_io.rb +191 -0
  122. data/lib/ferret/store/directory.rb +139 -0
  123. data/lib/ferret/store/fs_store.rb +338 -0
  124. data/lib/ferret/store/index_io.rb +259 -0
  125. data/lib/ferret/store/ram_store.rb +282 -0
  126. data/lib/ferret/utils.rb +7 -0
  127. data/lib/ferret/utils/bit_vector.rb +105 -0
  128. data/lib/ferret/utils/date_tools.rb +138 -0
  129. data/lib/ferret/utils/number_tools.rb +91 -0
  130. data/lib/ferret/utils/parameter.rb +41 -0
  131. data/lib/ferret/utils/priority_queue.rb +120 -0
  132. data/lib/ferret/utils/string_helper.rb +47 -0
  133. data/lib/ferret/utils/weak_key_hash.rb +51 -0
  134. data/rake_utils/code_statistics.rb +106 -0
  135. data/setup.rb +1551 -0
  136. data/test/benchmark/tb_ram_store.rb +76 -0
  137. data/test/benchmark/tb_rw_vint.rb +26 -0
  138. data/test/longrunning/tc_numbertools.rb +60 -0
  139. data/test/longrunning/tm_store.rb +19 -0
  140. data/test/test_all.rb +9 -0
  141. data/test/test_helper.rb +6 -0
  142. data/test/unit/analysis/tc_analyzer.rb +21 -0
  143. data/test/unit/analysis/tc_letter_tokenizer.rb +20 -0
  144. data/test/unit/analysis/tc_lower_case_filter.rb +20 -0
  145. data/test/unit/analysis/tc_lower_case_tokenizer.rb +27 -0
  146. data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +39 -0
  147. data/test/unit/analysis/tc_porter_stem_filter.rb +16 -0
  148. data/test/unit/analysis/tc_standard_analyzer.rb +20 -0
  149. data/test/unit/analysis/tc_standard_tokenizer.rb +20 -0
  150. data/test/unit/analysis/tc_stop_analyzer.rb +20 -0
  151. data/test/unit/analysis/tc_stop_filter.rb +14 -0
  152. data/test/unit/analysis/tc_white_space_analyzer.rb +21 -0
  153. data/test/unit/analysis/tc_white_space_tokenizer.rb +20 -0
  154. data/test/unit/analysis/tc_word_list_loader.rb +32 -0
  155. data/test/unit/document/tc_document.rb +47 -0
  156. data/test/unit/document/tc_field.rb +80 -0
  157. data/test/unit/index/tc_compound_file_io.rb +107 -0
  158. data/test/unit/index/tc_field_infos.rb +119 -0
  159. data/test/unit/index/tc_fields_io.rb +167 -0
  160. data/test/unit/index/tc_index.rb +140 -0
  161. data/test/unit/index/tc_index_reader.rb +622 -0
  162. data/test/unit/index/tc_index_writer.rb +57 -0
  163. data/test/unit/index/tc_multiple_term_doc_pos_enum.rb +80 -0
  164. data/test/unit/index/tc_segment_infos.rb +74 -0
  165. data/test/unit/index/tc_segment_term_docs.rb +17 -0
  166. data/test/unit/index/tc_segment_term_enum.rb +60 -0
  167. data/test/unit/index/tc_segment_term_vector.rb +71 -0
  168. data/test/unit/index/tc_term.rb +22 -0
  169. data/test/unit/index/tc_term_buffer.rb +57 -0
  170. data/test/unit/index/tc_term_info.rb +19 -0
  171. data/test/unit/index/tc_term_infos_io.rb +192 -0
  172. data/test/unit/index/tc_term_vector_offset_info.rb +18 -0
  173. data/test/unit/index/tc_term_vectors_io.rb +108 -0
  174. data/test/unit/index/th_doc.rb +244 -0
  175. data/test/unit/query_parser/tc_query_parser.rb +84 -0
  176. data/test/unit/search/tc_filter.rb +113 -0
  177. data/test/unit/search/tc_fuzzy_query.rb +136 -0
  178. data/test/unit/search/tc_index_searcher.rb +188 -0
  179. data/test/unit/search/tc_search_and_sort.rb +98 -0
  180. data/test/unit/search/tc_similarity.rb +37 -0
  181. data/test/unit/search/tc_sort.rb +48 -0
  182. data/test/unit/search/tc_sort_field.rb +27 -0
  183. data/test/unit/search/tc_spans.rb +153 -0
  184. data/test/unit/store/tc_fs_store.rb +84 -0
  185. data/test/unit/store/tc_ram_store.rb +35 -0
  186. data/test/unit/store/tm_store.rb +180 -0
  187. data/test/unit/store/tm_store_lock.rb +68 -0
  188. data/test/unit/ts_analysis.rb +16 -0
  189. data/test/unit/ts_document.rb +4 -0
  190. data/test/unit/ts_index.rb +18 -0
  191. data/test/unit/ts_query_parser.rb +3 -0
  192. data/test/unit/ts_search.rb +10 -0
  193. data/test/unit/ts_store.rb +6 -0
  194. data/test/unit/ts_utils.rb +10 -0
  195. data/test/unit/utils/tc_bit_vector.rb +65 -0
  196. data/test/unit/utils/tc_date_tools.rb +50 -0
  197. data/test/unit/utils/tc_number_tools.rb +59 -0
  198. data/test/unit/utils/tc_parameter.rb +40 -0
  199. data/test/unit/utils/tc_priority_queue.rb +62 -0
  200. data/test/unit/utils/tc_string_helper.rb +21 -0
  201. data/test/unit/utils/tc_weak_key_hash.rb +25 -0
  202. metadata +251 -0
@@ -0,0 +1,35 @@
1
+ require File.dirname(__FILE__) + "/../../test_helper"
2
+ require File.dirname(__FILE__) + "/tm_store"
3
+ require File.dirname(__FILE__) + "/tm_store_lock"
4
+
5
+ class RAMStoreTest < Test::Unit::TestCase
6
+ include StoreTest
7
+ include StoreLockTest
8
+ def setup
9
+ @dir = Ferret::Store::RAMDirectory.new
10
+ end
11
+
12
+ def teardown
13
+ @dir.close()
14
+ end
15
+
16
+ def test_ramlock
17
+ name = "lfile"
18
+ lfile = "rubylock-" + name
19
+ assert(! @dir.exists?(lfile),
20
+ "There should be no lock file")
21
+ lock = @dir.make_lock(name)
22
+ assert(! @dir.exists?(lfile),
23
+ "There should still be no lock file")
24
+ assert(! @dir.exists?(lfile),
25
+ "The lock should be hidden by the FSDirectories directory scan")
26
+ assert(! lock.locked?, "lock shouldn't be locked yet")
27
+ lock.obtain
28
+ assert(lock.locked?, "lock should now be locked")
29
+ assert(@dir.exists?(lfile), "A lock file should have been created")
30
+ lock.release
31
+ assert(! lock.locked?, "lock should be freed again")
32
+ assert(! @dir.exists?(lfile),
33
+ "The lock file should have been deleted")
34
+ end
35
+ end
@@ -0,0 +1,180 @@
1
+ module StoreTest
2
+ # declare dir so inheritors can access it.
3
+ @dir = nil
4
+
5
+ # test the basic file manipulation methods;
6
+ # - exists?
7
+ # - touch
8
+ # - delete
9
+ # - file_count
10
+ def test_basic_file_ops
11
+ assert_equal(0, @dir.file_count(), "directory should be empty")
12
+ assert(! @dir.exists?('filename'), "File should not exist")
13
+ @dir.touch('tmpfile1')
14
+ assert_equal(1, @dir.file_count(), "directory should have one file")
15
+ @dir.touch('tmpfile2')
16
+ assert_equal(2, @dir.file_count(), "directory should have two files")
17
+ assert(@dir.exists?('tmpfile1'), "'tmpfile1' should exist")
18
+ @dir.delete('tmpfile1')
19
+ assert(! @dir.exists?('tmpfile1'), "'tmpfile1' should no longer exist")
20
+ assert_equal(1, @dir.file_count(), "directory should have one file")
21
+ end
22
+
23
+ def test_rename
24
+ @dir.touch("from")
25
+ assert(@dir.exists?('from'), "File should exist")
26
+ assert(! @dir.exists?('to'), "File should not exist")
27
+ cnt_before = @dir.file_count()
28
+ @dir.rename('from', 'to')
29
+ cnt_after = @dir.file_count()
30
+ assert_equal(cnt_before, cnt_after, "the number of files shouldn't have changed")
31
+ assert(@dir.exists?('to'), "File should now exist")
32
+ assert(! @dir.exists?('from'), "File should no longer exist")
33
+ end
34
+
35
+ def test_modified
36
+ # difficult to test this one but as file mtime is only stored to the nearest second.
37
+ # we can assume this test will happen in less than a few seconds. (I hope)
38
+ time = Time.new.to_i
39
+ @dir.touch('mtime.test')
40
+ time_before = @dir.modified('mtime.test').to_i
41
+ assert(time_before - time <= 3,
42
+ "test that mtime is approximately equal to the system time when the file was touched")
43
+ end
44
+
45
+ def test_rw_bytes
46
+ bytes = [0x34, 0x87, 0xF9, 0xEA, 0x00, 0xFF]
47
+ rw_test(bytes, "byte", 6)
48
+ end
49
+
50
+ def test_rw_ints
51
+ ints = [-2147483648, 2147483647, -1, 0]
52
+ rw_test(ints, "int", 16)
53
+ end
54
+
55
+ def test_rw_longs
56
+ longs = [-9223372036854775808, 9223372036854775807, -1, 0]
57
+ rw_test(longs, "long", 32)
58
+ end
59
+
60
+ def test_rw_uints
61
+ uints = [0xffffffff, 100000, 0]
62
+ rw_test(uints, "uint", 12)
63
+ end
64
+
65
+ def test_rw_ulongs
66
+ ulongs = [0xffffffffffffffff, 100000000000000, 0]
67
+ rw_test(ulongs, "ulong", 24)
68
+ end
69
+
70
+ def test_rw_vints
71
+ vints = [ 9223372036854775807,
72
+ 0x00,
73
+ 0xFFFFFFFFFFFFFFFF]
74
+ rw_test(vints, "vint", 20)
75
+ end
76
+
77
+ def test_rw_vlongs
78
+ vlongs = [ 9223372036854775807,
79
+ 0x00,
80
+ 0xFFFFFFFFFFFFFFFF]
81
+ rw_test(vlongs, "vlong", 20)
82
+ end
83
+
84
+ def test_rw_strings
85
+ text = 'This is a ruby ferret test string ~!@#$%^&*()`123456790-=\)_+|'
86
+ ostream = @dir.create_output("rw_strings.test")
87
+ ostream.write_string(text)
88
+ ostream.write_string(text*100)
89
+ ostream.close
90
+ istream = @dir.open_input("rw_strings.test")
91
+ assert_equal(text, istream.read_string, "Short string test failed")
92
+ assert_equal(text*100, istream.read_string, "Short string test failed")
93
+ istream.close
94
+ assert_equal(6265, @dir.length('rw_strings.test'))
95
+ end
96
+
97
+ def test_rw_utf8_strings
98
+ text = '�� ������'
99
+ ostream = @dir.create_output("rw_utf8_strings.test")
100
+ ostream.write_string(text)
101
+ ostream.write_string(text*100)
102
+ ostream.close
103
+ istream = @dir.open_input("rw_utf8_strings.test")
104
+ assert_equal(text, istream.read_string, "Short string test failed")
105
+ assert_equal(text*100, istream.read_string, "Short string test failed")
106
+ istream.close
107
+ end
108
+
109
+ # this test fills up the output stream so that the buffer will have to be
110
+ # written a few times. It then uses seek to make sure that it works
111
+ # correctly
112
+ def test_buffer_seek
113
+ ostream = @dir.create_output("rw_seek.test")
114
+ text = 'This is another long test string !@#$%#$%&%$*%^&*()(_'
115
+ 1000.times {|i| ostream.write_long(i); ostream.write_string(text) }
116
+ ostream.seek(987)
117
+ assert_equal(987, ostream.pos)
118
+ ostream.write_vint(555)
119
+ ostream.seek(56)
120
+ assert_equal(56, ostream.pos)
121
+ ostream.write_vint(1234567890)
122
+ ostream.seek(4000)
123
+ assert_equal(4000, ostream.pos)
124
+ ostream.write_vint(9876543210)
125
+ ostream.close()
126
+ istream = @dir.open_input("rw_seek.test")
127
+ istream.seek(56)
128
+ assert_equal(56, istream.pos)
129
+ assert_equal(1234567890, istream.read_vint())
130
+ istream.seek(4000)
131
+ assert_equal(4000, istream.pos)
132
+ assert_equal(9876543210, istream.read_vint())
133
+ istream.seek(987)
134
+ assert_equal(987, istream.pos)
135
+ assert_equal(555, istream.read_vint())
136
+ istream.close()
137
+ end
138
+
139
+ def test_clone
140
+ ostream = @dir.create_output("clone_test")
141
+ 10.times {|i| ostream.write_long(i) }
142
+ ostream.close
143
+ istream = @dir.open_input("clone_test")
144
+ istream.seek(24)
145
+ alt_istream = istream.clone
146
+ assert_equal(istream.pos, alt_istream.pos)
147
+ (3...10).each {|i| assert_equal(i, alt_istream.read_long) }
148
+ assert_equal(80, alt_istream.pos)
149
+ assert_equal(24, istream.pos)
150
+ alt_istream.close
151
+ (3...10).each {|i| assert_equal(i, istream.read_long) }
152
+ istream.close
153
+ end
154
+
155
+ def test_read_bytes
156
+ str = "0000000000"
157
+ ostream = @dir.create_output("rw_read_bytes")
158
+ ostream.write_bytes("how are you doing?", 18)
159
+ ostream.close
160
+ istream = @dir.open_input("rw_read_bytes")
161
+ istream.read_bytes(str, 2, 4)
162
+ assert_equal("00how 0000", str)
163
+ istream.read_bytes(str, 1, 8)
164
+ assert_equal("0are you 0", str)
165
+ istream.close
166
+ end
167
+
168
+ private
169
+
170
+ def rw_test(values, type, expected_length)
171
+ ostream = @dir.create_output("rw_#{type}.test")
172
+ values.each { |b| ostream.__send__("write_" + type, b) }
173
+ ostream.close
174
+ istream = @dir.open_input("rw_#{type}.test")
175
+ values.each { |b| assert_equal(b, istream.__send__("read_" + type), "#{type} should be equal") }
176
+ istream.close
177
+ assert_equal(expected_length, @dir.length("rw_#{type}.test"))
178
+ end
179
+
180
+ end
@@ -0,0 +1,68 @@
1
+ module StoreLockTest
2
+ class Switch
3
+ @@counter = 0
4
+ def Switch.counter() return @@counter end
5
+ def Switch.counter=(counter) @@counter = counter end
6
+ end
7
+
8
+ def test_locking()
9
+ lock_time_out = 0.001 # we want this test to run quickly
10
+ lock1 = @dir.make_lock("l.lck")
11
+ lock2 = @dir.make_lock("l.lck")
12
+
13
+ assert(!lock2.locked?)
14
+ assert(lock1.obtain(lock_time_out))
15
+ assert(lock2.locked?)
16
+
17
+ assert(! obtain_lock_true_false(lock2))
18
+
19
+ exception_thrown = false
20
+ begin
21
+ lock2.while_locked(lock_time_out) do
22
+ assert(false, "lock should not have been obtained")
23
+ end
24
+ rescue
25
+ exception_thrown = true
26
+ ensure
27
+ assert(exception_thrown)
28
+ end
29
+
30
+ lock1.release()
31
+ assert(lock2.obtain(lock_time_out))
32
+ lock2.release()
33
+
34
+ t = Thread.new() do
35
+ lock1.while_locked(lock_time_out) do
36
+ Switch.counter = 1
37
+ # make sure lock2 obtain test was run
38
+ while Switch.counter < 2
39
+ end
40
+ Switch.counter = 3
41
+ end
42
+ end
43
+ t.run()
44
+
45
+ #make sure thread has started and lock been obtained
46
+ while Switch.counter < 1
47
+ end
48
+
49
+ assert(! obtain_lock_true_false(lock2))
50
+
51
+ Switch.counter = 2
52
+ while Switch.counter < 3
53
+ end
54
+
55
+ assert(lock2.obtain(lock_time_out))
56
+ lock2.release()
57
+ end
58
+
59
+ def obtain_lock_true_false(lock)
60
+ lock_time_out = 0.001 # we want this test to run quickly
61
+ begin
62
+ lock.obtain(lock_time_out)
63
+ return true
64
+ rescue
65
+ end
66
+ return false
67
+ end
68
+ end
@@ -0,0 +1,16 @@
1
+ $:.unshift File.dirname(__FILE__)
2
+ require File.join(File.dirname(__FILE__), "../test_helper.rb")
3
+
4
+ require 'analysis/tc_letter_tokenizer'
5
+ require 'analysis/tc_white_space_tokenizer'
6
+ require 'analysis/tc_lower_case_tokenizer'
7
+ require 'analysis/tc_word_list_loader'
8
+ require 'analysis/tc_lower_case_filter'
9
+ require 'analysis/tc_stop_filter'
10
+ require 'analysis/tc_porter_stem_filter'
11
+ require 'analysis/tc_analyzer'
12
+ require 'analysis/tc_stop_analyzer'
13
+ require 'analysis/tc_white_space_analyzer'
14
+ require 'analysis/tc_per_field_analyzer_wrapper'
15
+ require 'analysis/tc_standard_tokenizer'
16
+ require 'analysis/tc_standard_analyzer'
@@ -0,0 +1,4 @@
1
+ $:.unshift File.dirname(__FILE__)
2
+
3
+ require 'document/tc_field.rb'
4
+ require 'document/tc_document.rb'
@@ -0,0 +1,18 @@
1
+ $:.unshift File.dirname(__FILE__)
2
+
3
+ require 'index/tc_compound_file_io.rb'
4
+ require 'index/tc_field_infos.rb'
5
+ require 'index/tc_fields_io.rb'
6
+ require 'index/tc_index_writer.rb'
7
+ require 'index/tc_index_reader.rb'
8
+ require 'index/tc_segment_infos.rb'
9
+ require 'index/tc_segment_term_docs.rb'
10
+ require 'index/tc_segment_term_enum.rb'
11
+ require 'index/tc_segment_term_vector.rb'
12
+ require 'index/tc_term.rb'
13
+ require 'index/tc_term_buffer.rb'
14
+ require 'index/tc_term_info.rb'
15
+ require 'index/tc_term_infos_io.rb'
16
+ require 'index/tc_term_vector_offset_info.rb'
17
+ require 'index/tc_term_vectors_io.rb'
18
+ require 'index/tc_index.rb'
@@ -0,0 +1,3 @@
1
+ $:.unshift File.dirname(__FILE__)
2
+
3
+ require 'query_parser/tc_query_parser.rb'
@@ -0,0 +1,10 @@
1
+ $:.unshift File.dirname(__FILE__)
2
+
3
+ require 'search/tc_similarity.rb'
4
+ require 'search/tc_index_searcher.rb'
5
+ require 'search/tc_fuzzy_query.rb'
6
+ require 'search/tc_sort_field.rb'
7
+ require 'search/tc_sort.rb'
8
+ require 'search/tc_search_and_sort.rb'
9
+ require 'search/tc_filter.rb'
10
+ require 'search/tc_spans.rb'
@@ -0,0 +1,6 @@
1
+ $:.unshift File.dirname(__FILE__)
2
+ require File.join(File.dirname(__FILE__), "../test_helper.rb")
3
+
4
+
5
+ require 'store/tc_fs_store'
6
+ require 'store/tc_ram_store'
@@ -0,0 +1,10 @@
1
+ $:.unshift File.dirname(__FILE__)
2
+ require File.join(File.dirname(__FILE__), "../test_helper.rb")
3
+
4
+ require 'utils/tc_string_helper'
5
+ require 'utils/tc_priority_queue'
6
+ require 'utils/tc_bit_vector'
7
+ require 'utils/tc_date_tools.rb'
8
+ require 'utils/tc_number_tools.rb'
9
+ require 'utils/tc_parameter.rb'
10
+ require 'utils/tc_weak_key_hash.rb'
@@ -0,0 +1,65 @@
1
+ require File.dirname(__FILE__) + "/../../test_helper"
2
+
3
+
4
+ class BitVectorTest < Test::Unit::TestCase
5
+ include Ferret::Utils
6
+
7
+ def test_bv()
8
+ bv = BitVector.new
9
+ assert_equal(0, bv.count)
10
+ bv.set(10)
11
+ assert(bv.get(10))
12
+ assert_equal(1, bv.count)
13
+ bv.set(10)
14
+ assert(bv.get(10))
15
+ assert_equal(1, bv.count)
16
+ bv.set(20)
17
+ assert(bv.get(20))
18
+ assert_equal(2, bv.count)
19
+ bv.set(21)
20
+ assert(bv.get(21))
21
+ assert_equal(3, bv.count)
22
+ bv.clear(21)
23
+ assert(!bv.get(21))
24
+ assert_equal(2, bv.count)
25
+ bv.clear(20)
26
+ assert(!bv.get(20))
27
+ assert_equal(1, bv.count)
28
+ assert(bv.get(10))
29
+ end
30
+
31
+ def test_bv_rw()
32
+ dir = Ferret::Store::RAMDirectory.new
33
+ bv = BitVector.new
34
+ assert_equal(0, bv.count)
35
+ bv.set(5)
36
+ assert_equal(1, bv.count)
37
+ bv.set(8)
38
+ assert_equal(2, bv.count)
39
+ bv.set(13)
40
+ assert_equal(3, bv.count)
41
+ bv.set(21)
42
+ assert_equal(4, bv.count)
43
+ bv.set(34)
44
+ assert_equal(5, bv.count)
45
+ bv.write(dir, "bv.test")
46
+ bv = nil
47
+ bv = BitVector.read(dir, "bv.test")
48
+ assert(!bv.get(4))
49
+ assert(bv.get(5))
50
+ assert(!bv.get(6))
51
+ assert(!bv.get(7))
52
+ assert(bv.get(8))
53
+ assert(!bv.get(9))
54
+ assert(!bv.get(12))
55
+ assert(bv.get(13))
56
+ assert(!bv.get(14))
57
+ assert(!bv.get(20))
58
+ assert(bv.get(21))
59
+ assert(!bv.get(22))
60
+ assert(!bv.get(33))
61
+ assert(bv.get(34))
62
+ assert(!bv.get(35))
63
+ assert_equal(5, bv.count)
64
+ end
65
+ end
@@ -0,0 +1,50 @@
1
+ require File.dirname(__FILE__) + "/../../test_helper"
2
+
3
+
4
+ class DateToolsTest < Test::Unit::TestCase
5
+ include Ferret::Utils
6
+
7
+ def test_serialization()
8
+ # grab time to the nearest millisecond
9
+ t = Time.at((Time.now().to_i*1000).floor()/1000)
10
+
11
+ s = DateTools.serialize_time(t)
12
+
13
+ t_after = DateTools.deserialize_time(s)
14
+ assert_equal(t, t_after, "date changed after serialization")
15
+ end
16
+
17
+ def test_serialization_constants()
18
+ # assert existance of these constants
19
+ assert(DateTools::MAX_SERIALIZED_DATE_STRING)
20
+ assert(DateTools::MIN_SERIALIZED_DATE_STRING)
21
+ end
22
+
23
+ def test_time_to_s()
24
+ t = Time.mktime(2004, 9, 5, 22, 33, 44, 555000)
25
+
26
+ assert_equal("2004", DateTools.time_to_s(t, DateTools::Resolution::YEAR))
27
+ assert_equal("200409", DateTools.time_to_s(t, DateTools::Resolution::MONTH))
28
+ assert_equal("20040905", DateTools.time_to_s(t, DateTools::Resolution::DAY))
29
+ assert_equal("2004090522", DateTools.time_to_s(t, DateTools::Resolution::HOUR))
30
+ assert_equal("200409052233", DateTools.time_to_s(t, DateTools::Resolution::MINUTE))
31
+ assert_equal("20040905223344", DateTools.time_to_s(t, DateTools::Resolution::SECOND))
32
+ assert_equal("20040905223344555", DateTools.time_to_s(t, DateTools::Resolution::MILLISECOND))
33
+ end
34
+
35
+ def test_s_to_time()
36
+ assert_equal(Time.mktime(2004), DateTools.s_to_time("2004"))
37
+ assert_equal(Time.mktime(2004, 9), DateTools.s_to_time("200409"))
38
+ assert_equal(Time.mktime(2004, 9, 5), DateTools.s_to_time("20040905"))
39
+ assert_equal(Time.mktime(2004, 9, 5, 22), DateTools.s_to_time("2004090522"))
40
+ assert_equal(Time.mktime(2004, 9, 5, 22, 33), DateTools.s_to_time("200409052233"))
41
+ assert_equal(Time.mktime(2004, 9, 5, 22, 33, 44), DateTools.s_to_time("20040905223344"))
42
+ assert_equal(Time.mktime(2004, 9, 5, 22, 33, 44, 555000), DateTools.s_to_time("20040905223344555"))
43
+ end
44
+
45
+ def test_round()
46
+ t = Time.mktime(2004, 9, 5, 22, 33, 44, 555000)
47
+ assert_equal(Time.mktime(2004, 9, 5), DateTools.round(t, DateTools::Resolution::DAY))
48
+ end
49
+
50
+ end