sdsykes-ferret 0.11.6.19

Sign up to get free protection for your applications and to get access to all the features.
Files changed (195) hide show
  1. data/CHANGELOG +24 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README +102 -0
  4. data/Rakefile +338 -0
  5. data/TODO +17 -0
  6. data/TUTORIAL +231 -0
  7. data/bin/ferret-browser +79 -0
  8. data/ext/analysis.c +1555 -0
  9. data/ext/analysis.h +219 -0
  10. data/ext/api.c +69 -0
  11. data/ext/api.h +27 -0
  12. data/ext/array.c +123 -0
  13. data/ext/array.h +53 -0
  14. data/ext/bitvector.c +540 -0
  15. data/ext/bitvector.h +272 -0
  16. data/ext/compound_io.c +383 -0
  17. data/ext/config.h +42 -0
  18. data/ext/document.c +156 -0
  19. data/ext/document.h +53 -0
  20. data/ext/except.c +120 -0
  21. data/ext/except.h +168 -0
  22. data/ext/extconf.rb +14 -0
  23. data/ext/ferret.c +402 -0
  24. data/ext/ferret.h +91 -0
  25. data/ext/filter.c +156 -0
  26. data/ext/fs_store.c +483 -0
  27. data/ext/global.c +418 -0
  28. data/ext/global.h +117 -0
  29. data/ext/hash.c +567 -0
  30. data/ext/hash.h +473 -0
  31. data/ext/hashset.c +170 -0
  32. data/ext/hashset.h +187 -0
  33. data/ext/header.h +58 -0
  34. data/ext/helper.c +62 -0
  35. data/ext/helper.h +13 -0
  36. data/ext/inc/lang.h +48 -0
  37. data/ext/inc/threading.h +31 -0
  38. data/ext/index.c +6425 -0
  39. data/ext/index.h +961 -0
  40. data/ext/lang.h +66 -0
  41. data/ext/libstemmer.c +92 -0
  42. data/ext/libstemmer.h +79 -0
  43. data/ext/mempool.c +87 -0
  44. data/ext/mempool.h +35 -0
  45. data/ext/modules.h +162 -0
  46. data/ext/multimapper.c +310 -0
  47. data/ext/multimapper.h +51 -0
  48. data/ext/posh.c +1006 -0
  49. data/ext/posh.h +1007 -0
  50. data/ext/priorityqueue.c +151 -0
  51. data/ext/priorityqueue.h +143 -0
  52. data/ext/q_boolean.c +1608 -0
  53. data/ext/q_const_score.c +161 -0
  54. data/ext/q_filtered_query.c +209 -0
  55. data/ext/q_fuzzy.c +268 -0
  56. data/ext/q_match_all.c +148 -0
  57. data/ext/q_multi_term.c +677 -0
  58. data/ext/q_parser.c +2825 -0
  59. data/ext/q_phrase.c +1126 -0
  60. data/ext/q_prefix.c +100 -0
  61. data/ext/q_range.c +350 -0
  62. data/ext/q_span.c +2402 -0
  63. data/ext/q_term.c +337 -0
  64. data/ext/q_wildcard.c +171 -0
  65. data/ext/r_analysis.c +2575 -0
  66. data/ext/r_index.c +3472 -0
  67. data/ext/r_qparser.c +585 -0
  68. data/ext/r_search.c +4105 -0
  69. data/ext/r_store.c +513 -0
  70. data/ext/r_utils.c +963 -0
  71. data/ext/ram_store.c +471 -0
  72. data/ext/search.c +1741 -0
  73. data/ext/search.h +885 -0
  74. data/ext/similarity.c +150 -0
  75. data/ext/similarity.h +82 -0
  76. data/ext/sort.c +983 -0
  77. data/ext/stem_ISO_8859_1_danish.c +338 -0
  78. data/ext/stem_ISO_8859_1_danish.h +16 -0
  79. data/ext/stem_ISO_8859_1_dutch.c +635 -0
  80. data/ext/stem_ISO_8859_1_dutch.h +16 -0
  81. data/ext/stem_ISO_8859_1_english.c +1156 -0
  82. data/ext/stem_ISO_8859_1_english.h +16 -0
  83. data/ext/stem_ISO_8859_1_finnish.c +792 -0
  84. data/ext/stem_ISO_8859_1_finnish.h +16 -0
  85. data/ext/stem_ISO_8859_1_french.c +1276 -0
  86. data/ext/stem_ISO_8859_1_french.h +16 -0
  87. data/ext/stem_ISO_8859_1_german.c +512 -0
  88. data/ext/stem_ISO_8859_1_german.h +16 -0
  89. data/ext/stem_ISO_8859_1_italian.c +1091 -0
  90. data/ext/stem_ISO_8859_1_italian.h +16 -0
  91. data/ext/stem_ISO_8859_1_norwegian.c +296 -0
  92. data/ext/stem_ISO_8859_1_norwegian.h +16 -0
  93. data/ext/stem_ISO_8859_1_porter.c +776 -0
  94. data/ext/stem_ISO_8859_1_porter.h +16 -0
  95. data/ext/stem_ISO_8859_1_portuguese.c +1035 -0
  96. data/ext/stem_ISO_8859_1_portuguese.h +16 -0
  97. data/ext/stem_ISO_8859_1_spanish.c +1119 -0
  98. data/ext/stem_ISO_8859_1_spanish.h +16 -0
  99. data/ext/stem_ISO_8859_1_swedish.c +307 -0
  100. data/ext/stem_ISO_8859_1_swedish.h +16 -0
  101. data/ext/stem_KOI8_R_russian.c +701 -0
  102. data/ext/stem_KOI8_R_russian.h +16 -0
  103. data/ext/stem_UTF_8_danish.c +344 -0
  104. data/ext/stem_UTF_8_danish.h +16 -0
  105. data/ext/stem_UTF_8_dutch.c +653 -0
  106. data/ext/stem_UTF_8_dutch.h +16 -0
  107. data/ext/stem_UTF_8_english.c +1176 -0
  108. data/ext/stem_UTF_8_english.h +16 -0
  109. data/ext/stem_UTF_8_finnish.c +808 -0
  110. data/ext/stem_UTF_8_finnish.h +16 -0
  111. data/ext/stem_UTF_8_french.c +1296 -0
  112. data/ext/stem_UTF_8_french.h +16 -0
  113. data/ext/stem_UTF_8_german.c +526 -0
  114. data/ext/stem_UTF_8_german.h +16 -0
  115. data/ext/stem_UTF_8_italian.c +1113 -0
  116. data/ext/stem_UTF_8_italian.h +16 -0
  117. data/ext/stem_UTF_8_norwegian.c +302 -0
  118. data/ext/stem_UTF_8_norwegian.h +16 -0
  119. data/ext/stem_UTF_8_porter.c +794 -0
  120. data/ext/stem_UTF_8_porter.h +16 -0
  121. data/ext/stem_UTF_8_portuguese.c +1055 -0
  122. data/ext/stem_UTF_8_portuguese.h +16 -0
  123. data/ext/stem_UTF_8_russian.c +709 -0
  124. data/ext/stem_UTF_8_russian.h +16 -0
  125. data/ext/stem_UTF_8_spanish.c +1137 -0
  126. data/ext/stem_UTF_8_spanish.h +16 -0
  127. data/ext/stem_UTF_8_swedish.c +313 -0
  128. data/ext/stem_UTF_8_swedish.h +16 -0
  129. data/ext/stopwords.c +401 -0
  130. data/ext/store.c +692 -0
  131. data/ext/store.h +777 -0
  132. data/ext/term_vectors.c +352 -0
  133. data/ext/threading.h +31 -0
  134. data/ext/utilities.c +446 -0
  135. data/ext/win32.h +54 -0
  136. data/lib/ferret.rb +29 -0
  137. data/lib/ferret/browser.rb +246 -0
  138. data/lib/ferret/browser/s/global.js +192 -0
  139. data/lib/ferret/browser/s/style.css +148 -0
  140. data/lib/ferret/browser/views/document/list.rhtml +49 -0
  141. data/lib/ferret/browser/views/document/show.rhtml +27 -0
  142. data/lib/ferret/browser/views/error/index.rhtml +7 -0
  143. data/lib/ferret/browser/views/help/index.rhtml +8 -0
  144. data/lib/ferret/browser/views/home/index.rhtml +29 -0
  145. data/lib/ferret/browser/views/layout.rhtml +22 -0
  146. data/lib/ferret/browser/views/term-vector/index.rhtml +4 -0
  147. data/lib/ferret/browser/views/term/index.rhtml +199 -0
  148. data/lib/ferret/browser/views/term/termdocs.rhtml +1 -0
  149. data/lib/ferret/browser/webrick.rb +14 -0
  150. data/lib/ferret/document.rb +130 -0
  151. data/lib/ferret/field_infos.rb +44 -0
  152. data/lib/ferret/index.rb +786 -0
  153. data/lib/ferret/number_tools.rb +157 -0
  154. data/lib/ferret_version.rb +3 -0
  155. data/setup.rb +1555 -0
  156. data/test/test_all.rb +5 -0
  157. data/test/test_helper.rb +24 -0
  158. data/test/threading/number_to_spoken.rb +132 -0
  159. data/test/threading/thread_safety_index_test.rb +79 -0
  160. data/test/threading/thread_safety_read_write_test.rb +76 -0
  161. data/test/threading/thread_safety_test.rb +133 -0
  162. data/test/unit/analysis/tc_analyzer.rb +548 -0
  163. data/test/unit/analysis/tc_token_stream.rb +646 -0
  164. data/test/unit/index/tc_index.rb +762 -0
  165. data/test/unit/index/tc_index_reader.rb +699 -0
  166. data/test/unit/index/tc_index_writer.rb +437 -0
  167. data/test/unit/index/th_doc.rb +315 -0
  168. data/test/unit/largefile/tc_largefile.rb +46 -0
  169. data/test/unit/query_parser/tc_query_parser.rb +238 -0
  170. data/test/unit/search/tc_filter.rb +135 -0
  171. data/test/unit/search/tc_fuzzy_query.rb +147 -0
  172. data/test/unit/search/tc_index_searcher.rb +61 -0
  173. data/test/unit/search/tc_multi_searcher.rb +128 -0
  174. data/test/unit/search/tc_multiple_search_requests.rb +58 -0
  175. data/test/unit/search/tc_search_and_sort.rb +179 -0
  176. data/test/unit/search/tc_sort.rb +49 -0
  177. data/test/unit/search/tc_sort_field.rb +27 -0
  178. data/test/unit/search/tc_spans.rb +190 -0
  179. data/test/unit/search/tm_searcher.rb +384 -0
  180. data/test/unit/store/tc_fs_store.rb +77 -0
  181. data/test/unit/store/tc_ram_store.rb +35 -0
  182. data/test/unit/store/tm_store.rb +34 -0
  183. data/test/unit/store/tm_store_lock.rb +68 -0
  184. data/test/unit/tc_document.rb +81 -0
  185. data/test/unit/ts_analysis.rb +2 -0
  186. data/test/unit/ts_index.rb +2 -0
  187. data/test/unit/ts_largefile.rb +4 -0
  188. data/test/unit/ts_query_parser.rb +2 -0
  189. data/test/unit/ts_search.rb +2 -0
  190. data/test/unit/ts_store.rb +2 -0
  191. data/test/unit/ts_utils.rb +2 -0
  192. data/test/unit/utils/tc_bit_vector.rb +295 -0
  193. data/test/unit/utils/tc_number_tools.rb +117 -0
  194. data/test/unit/utils/tc_priority_queue.rb +106 -0
  195. metadata +285 -0
@@ -0,0 +1,762 @@
1
+ require File.dirname(__FILE__) + "/../../test_helper"
2
+
3
+ class IndexTest < Test::Unit::TestCase
4
+ include Ferret::Index
5
+ include Ferret::Search
6
+ include Ferret::Analysis
7
+ include Ferret::Store
8
+
9
+ def setup()
10
+ end
11
+
12
+ def teardown()
13
+ end
14
+
15
+ def check_results(index, query, expected)
16
+ cnt = 0
17
+ #puts "#{query} - #{expected.inspect}"
18
+ #puts index.size
19
+ index.search_each(query) do |doc, score|
20
+ #puts "doc-#{doc} score=#{score}"
21
+ assert_not_nil(expected.index(doc), "doc #{doc} found but not expected")
22
+ cnt += 1
23
+ end
24
+ assert_equal(expected.length, cnt)
25
+ end
26
+
27
+ def do_test_index_with_array(index)
28
+ [
29
+ ["one two"],
30
+ ["one", "three"],
31
+ ["two"],
32
+ ["one", "four"],
33
+ ["one two"],
34
+ ["two", "three", "four"],
35
+ ["one"],
36
+ ["two", "three", "four", "five"]
37
+ ].each {|doc| index << doc }
38
+ assert_equal(8, index.size)
39
+ q = "one"
40
+ check_results(index, q, [0, 1, 3, 4, 6])
41
+ q = "one AND two"
42
+ check_results(index, q, [0, 4])
43
+ q = "one OR five"
44
+ check_results(index, q, [0, 1, 3, 4, 6, 7])
45
+ assert_equal(%w{two three four five}, index.doc(7)[:xxx])
46
+ end
47
+
48
+ def do_test_index_with_hash(index)
49
+ data = [
50
+ {:xxx => "one two"},
51
+ {:xxx => "one", :field2 => "three"},
52
+ {:xxx => "two"},
53
+ {:xxx => "one", :field2 => "four"},
54
+ {:xxx => "one two"},
55
+ {:xxx => "two", :field2 => "three", :field3 => "four"},
56
+ {:xxx => "one"},
57
+ {:xxx => "two", :field2 => "three", :field3 => "five"}
58
+ ]
59
+ data.each {|doc| index << doc }
60
+ q = "one AND two"
61
+ check_results(index, q, [0, 4])
62
+ q = "one OR five"
63
+ check_results(index, q, [0, 1, 3, 4, 6])
64
+ q = "one OR field3:five"
65
+ check_results(index, q, [0, 1, 3, 4, 6, 7])
66
+ assert_equal("four", index[5]["field3"])
67
+ q = "field3:f*"
68
+ check_results(index, q, [5, 7])
69
+ q = "*:(one AND NOT three)"
70
+ check_results(index, q, [0, 3, 4, 6])
71
+ q = "*:(one AND (NOT three))"
72
+ check_results(index, q, [0, 3, 4, 6])
73
+ q = "two AND field3:f*"
74
+ check_results(index, q, [5, 7])
75
+ assert_equal("five", index.doc(7)["field3"])
76
+ assert_equal("two", index.doc(7)[:xxx])
77
+ end
78
+
79
+ def do_test_index_with_doc_array(index)
80
+ data = [
81
+ {:xxx => "one two multi", :id => "myid"},
82
+ {:xxx => "one", :field2 => "three multi"},
83
+ {:xxx => "two"},
84
+ {:xxx => "one", :field2 => "four"},
85
+ {:xxx => "one two"},
86
+ {:xxx => "two", :field2 => "three", :field3 => "four"},
87
+ {:xxx => "one multi2", :id => "hello"},
88
+ {:xxx => "two", :field2 => "this three multi2", :field3 => "five multi"}
89
+ ]
90
+ data.each {|doc| index << doc }
91
+ q = "one AND two"
92
+ check_results(index, q, [0, 4])
93
+ q = "one OR five"
94
+ check_results(index, q, [0, 1, 3, 4, 6])
95
+ q = "one OR field3:five"
96
+ check_results(index, q, [0, 1, 3, 4, 6, 7])
97
+ q = "two AND (field3:f*)"
98
+ check_results(index, q, [5, 7])
99
+ q = "*:(multi OR multi2)"
100
+ check_results(index, q, [0, 1, 6, 7])
101
+ q = "field2|field3:(multi OR multi2)"
102
+ check_results(index, q, [1, 7])
103
+ doc = index[5]
104
+ assert_equal("three", index[5]["field2"])
105
+ assert(!index.has_deletions?)
106
+ assert(!index.deleted?(5))
107
+ assert_equal(8, index.size)
108
+ index.delete(5)
109
+ assert(index.has_deletions?)
110
+ assert(index.deleted?(5))
111
+ assert_equal(7, index.size)
112
+ q = "two AND (field3:f*)"
113
+ check_results(index, q, [7])
114
+
115
+ doc.load
116
+ doc[:field2] = "dave"
117
+ index << doc
118
+ check_results(index, q, [7, 8])
119
+ check_results(index, "*:this", [])
120
+ assert_equal(8, index.size)
121
+ assert_equal("dave", index[8][:field2])
122
+ index.optimize
123
+ check_results(index, q, [6, 7])
124
+ assert_equal("dave", index[7][:field2])
125
+ index.query_delete("field2:three")
126
+ assert(index.deleted?(1))
127
+ assert(index.deleted?(6))
128
+ assert(! index.deleted?(7))
129
+ assert_equal("one multi2", index["hello"][:xxx])
130
+ assert_equal("one two multi", index["myid"][:xxx])
131
+ index.delete("myid")
132
+ assert(index.deleted?(0))
133
+ end
134
+
135
+ def test_ram_index
136
+ index = Ferret::I.new(:default_input_field => :xxx)
137
+ do_test_index_with_array(index)
138
+ index.close
139
+
140
+ index = Index.new(:default_field => :xxx)
141
+ do_test_index_with_hash(index)
142
+ index.close
143
+
144
+ index = Index.new(:default_field => :xxx, :id_field => :id)
145
+ do_test_index_with_doc_array(index)
146
+ index.close
147
+ end
148
+
149
+ def test_fs_index
150
+ fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
151
+
152
+ Dir[File.join(fs_path, "*")].each {|path| begin File.delete(path) rescue nil end}
153
+ assert_raise(Ferret::FileNotFoundError) do
154
+ Index.new(:path => fs_path,
155
+ :create_if_missing => false,
156
+ :default_field => :xxx)
157
+ end
158
+
159
+ index = Index.new(:path => fs_path, :default_input_field => :xxx)
160
+ do_test_index_with_array(index)
161
+ index.close
162
+
163
+ Dir[File.join(fs_path, "*")].each {|path| begin File.delete(path) rescue nil end}
164
+ index = Index.new(:path => fs_path, :default_field => :xxx)
165
+ do_test_index_with_hash(index)
166
+ index.close
167
+
168
+ Dir[File.join(fs_path, "*")].each {|path| begin File.delete(path) rescue nil end}
169
+ index = Index.new(:path => fs_path,
170
+ :default_field => :xxx,
171
+ :id_field => "id")
172
+ do_test_index_with_doc_array(index)
173
+ index.close
174
+ end
175
+
176
+ def test_fs_index_is_persistant
177
+ fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
178
+ index = Index.new(:path => fs_path, :default_field => :xxx, :create => true)
179
+
180
+ [
181
+ {:xxx => "one two", :id => "me"},
182
+ {:xxx => "one", :field2 => "three"},
183
+ {:xxx => "two"},
184
+ {:xxx => "one", :field2 => "four"},
185
+ {:xxx => "one two"},
186
+ {:xxx => "two", :field2 => "three", :field3 => "four"},
187
+ {:xxx => "one"},
188
+ {:xxx => "two", :field2 => "three", :field3 => "five"}
189
+ ].each {|doc| index << doc }
190
+ assert_equal(8, index.size)
191
+ index.close
192
+
193
+ index = Index.new(:path => fs_path, :create_if_missing => false)
194
+ assert_equal(8, index.size)
195
+ assert_equal("four", index[5]["field3"])
196
+ index.close
197
+ end
198
+
199
+ def test_key_used_for_id_field
200
+ fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
201
+
202
+ index = Index.new(:path => fs_path, :key => :my_id, :create => true)
203
+ [
204
+ {:my_id => "three", :id => "me"},
205
+ {:my_id => "one", :field2 => "three"},
206
+ {:my_id => "two"},
207
+ {:my_id => "one", :field2 => "four"},
208
+ {:my_id => "three"},
209
+ {:my_id => "two", :field2 => "three", :field3 => "four"},
210
+ {:my_id => "one"},
211
+ {:my_id => "two", :field2 => "three", :field3 => "five"}
212
+ ].each {|doc| index << doc }
213
+ index.optimize
214
+ assert_equal(3, index.size)
215
+ assert_equal("three", index["two"][:field2])
216
+ index.close
217
+ end
218
+
219
+ def test_merging_indexes
220
+ index1 = Index.new(:default_field => :f)
221
+ index2 = Index.new(:default_field => :f)
222
+ index3 = Index.new(:default_field => :f)
223
+
224
+ [
225
+ {:f => "zero"},
226
+ {:f => "one"},
227
+ {:f => "two"}
228
+ ].each {|doc| index1 << doc }
229
+ [
230
+ {:f => "three"},
231
+ {:f => "four"},
232
+ {:f => "five"}
233
+ ].each {|doc| index2 << doc }
234
+ [
235
+ {:f => "six"},
236
+ {:f => "seven"},
237
+ {:f => "eight"}
238
+ ].each {|doc| index3 << doc }
239
+
240
+ index = Index.new(:default_field => :f)
241
+ index.add_indexes(index1)
242
+ assert_equal(3, index.size)
243
+ assert_equal("zero", index[0][:f])
244
+ index.add_indexes([index2, index3])
245
+ assert_equal(9, index.size)
246
+ assert_equal("zero", index[0][:f])
247
+ assert_equal("eight", index[8][:f])
248
+ index1.close
249
+ index2.close
250
+ index3.close
251
+ assert_equal("seven", index[7][:f])
252
+ data = [
253
+ {:f => "alpha"},
254
+ {:f => "beta"},
255
+ {:f => "charlie"}
256
+ ]
257
+ dir1 = RAMDirectory.new
258
+ index1 = Index.new(:dir => dir1, :default_field => :f)
259
+ data.each {|doc| index1 << doc }
260
+ index1.flush
261
+ data = [
262
+ {:f => "delta"},
263
+ {:f => "echo"},
264
+ {:f => "foxtrot"}
265
+ ]
266
+ dir2 = RAMDirectory.new
267
+ index2 = Index.new(:dir => dir2, :default_field => :f)
268
+ data.each {|doc| index2 << doc }
269
+ index2.flush
270
+ data = [
271
+ {:f => "golf"},
272
+ {:f => "india"},
273
+ {:f => "juliet"}
274
+ ]
275
+ dir3 = RAMDirectory.new
276
+ index3 = Index.new(:dir => dir3, :default_field => :f)
277
+ data.each {|doc| index3 << doc }
278
+ index3.flush
279
+
280
+ index.add_indexes(dir1)
281
+ assert_equal(12, index.size)
282
+ assert_equal("alpha", index[9][:f])
283
+ index.add_indexes([dir2, dir3])
284
+ assert_equal(18, index.size)
285
+ assert_equal("juliet", index[17][:f])
286
+ index1.close
287
+ dir1.close
288
+ index2.close
289
+ dir2.close
290
+ index3.close
291
+ dir3.close
292
+ assert_equal("golf", index[15][:f])
293
+ index.close
294
+ end
295
+
296
+ def test_persist_index
297
+ data = [
298
+ {:f => "zero"},
299
+ {:f => "one"},
300
+ {:f => "two"}
301
+ ]
302
+ index = Index.new(:default_field => :f)
303
+ data.each {|doc| index << doc }
304
+ fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
305
+
306
+ index.persist(fs_path, true)
307
+ assert_equal(3, index.size)
308
+ assert_equal("zero", index[0][:f])
309
+ index.close
310
+
311
+ index = Index.new(:path => fs_path)
312
+ assert_equal(3, index.size)
313
+ assert_equal("zero", index[0][:f])
314
+ index.close
315
+
316
+
317
+ data = [
318
+ {:f => "romeo"},
319
+ {:f => "sierra"},
320
+ {:f => "tango"}
321
+ ]
322
+ index = Index.new(:default_field => :f)
323
+ data.each {|doc| index << doc }
324
+ assert_equal(3, index.size)
325
+ assert_equal("romeo", index[0][:f])
326
+ dir = FSDirectory.new(fs_path, false)
327
+ index.persist(dir)
328
+ assert_equal(6, index.size)
329
+ assert_equal("zero", index[0][:f])
330
+ assert_equal("romeo", index[3][:f])
331
+ index.close
332
+
333
+ index = Index.new(:path => fs_path)
334
+ assert_equal(6, index.size)
335
+ assert_equal("zero", index[0][:f])
336
+ assert_equal("romeo", index[3][:f])
337
+ index.close
338
+ end
339
+
340
+ def test_auto_update_when_externally_modified()
341
+ fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
342
+ index = Index.new(:path => fs_path, :default_field => :f, :create => true)
343
+ index << "document 1"
344
+ assert_equal(1, index.size)
345
+
346
+ index2 = Index.new(:path => fs_path, :default_field => :f)
347
+ assert_equal(1, index2.size)
348
+ index2 << "document 2"
349
+ assert_equal(2, index2.size)
350
+ assert_equal(2, index.size)
351
+ top_docs = index.search("content3")
352
+
353
+ assert_equal(0, top_docs.hits.size)
354
+
355
+ iw = IndexWriter.new(:path => fs_path, :analyzer => WhiteSpaceAnalyzer.new)
356
+ iw << {:f, "content3"}
357
+ iw.close()
358
+
359
+ top_docs = index.search("content3")
360
+ assert_equal(1, top_docs.hits.size)
361
+ assert_equal(3, index.size)
362
+ assert_equal("content3", index[2][:f])
363
+ index2.close
364
+ index.close
365
+ end
366
+
367
+ def test_delete
368
+ index = Index.new(:analyzer => WhiteSpaceAnalyzer.new)
369
+ data = [
370
+ {:id => 0, :cat => "/cat1/subcat1"},
371
+ {:id => 1, :cat => "/cat1/subcat2"},
372
+ {:id => 2, :cat => "/cat1/subcat2"},
373
+ {:id => 3, :cat => "/cat1/subcat3"},
374
+ {:id => 4, :cat => "/cat1/subcat4"},
375
+ {:id => 5, :cat => "/cat2/subcat1"},
376
+ {:id => 6, :cat => "/cat2/subcat2"},
377
+ {:id => 7, :cat => "/cat2/subcat3"},
378
+ {:id => 8, :cat => "/cat2/subcat4"},
379
+ {:id => 9, :cat => "/cat2/subcat5"},
380
+ ].each {|doc| index << doc }
381
+ assert_equal(10, index.size)
382
+ assert_equal(1, index.search("id:9").total_hits)
383
+ index.delete(9)
384
+ assert_equal(9, index.size)
385
+ assert_equal(0, index.search("id:9").total_hits)
386
+ assert_equal(1, index.search("id:8").total_hits)
387
+ index.delete("8")
388
+ assert_equal(8, index.size)
389
+ assert_equal(0, index.search("id:8").total_hits)
390
+ assert_equal(5, index.search("cat:/cat1*").total_hits)
391
+ index.query_delete("cat:/cat1*")
392
+ assert_equal(3, index.size)
393
+ assert_equal(0, index.search("cat:/cat1*").total_hits)
394
+ index.close
395
+ end
396
+
397
+ def test_update
398
+ index = Index.new(:analyzer => WhiteSpaceAnalyzer.new,
399
+ :default_input_field => :content,
400
+ :id_field => :id)
401
+ data = [
402
+ {:id => 0, :cat => "/cat1/subcat1", :content => "content0"},
403
+ {:id => 1, :cat => "/cat1/subcat2", :content => "content1"},
404
+ {:id => 2, :cat => "/cat1/subcat2", :content => "content2"},
405
+ {:id => 3, :cat => "/cat1/subcat3", :content => "content3"},
406
+ {:id => 4, :cat => "/cat1/subcat4", :content => "content4"},
407
+ {:id => 5, :cat => "/cat2/subcat1", :content => "content5"},
408
+ {:id => 6, :cat => "/cat2/subcat2", :content => "content6"},
409
+ {:id => 7, :cat => "/cat2/subcat3", :content => "content7"},
410
+ {:id => 8, :cat => "/cat2/subcat4", :content => "content8"},
411
+ {:id => 9, :cat => "/cat2/subcat5", :content => "content9"},
412
+ ].each { |doc| index << doc }
413
+ assert_equal(10, index.size)
414
+ assert_equal("content5", index["5"][:content])
415
+ index.query_update("id:5", {:content => "content five"})
416
+ assert_equal("content five", index["5"][:content])
417
+ assert_equal(nil, index["5"][:extra_content])
418
+ index.update("5", {:id => "5",
419
+ :cat => "/cat1/subcat6",
420
+ :content => "high five",
421
+ :extra_content => "hello"})
422
+ assert_equal("hello", index["5"][:extra_content])
423
+ assert_equal("high five", index["5"][:content])
424
+ assert_equal("/cat1/subcat6", index["5"][:cat])
425
+ assert_equal("content9", index["9"][:content])
426
+ index.query_update("content:content9", {:content => "content nine"})
427
+ assert_equal("content nine", index["9"][:content])
428
+ assert_equal("content0", index["0"][:content])
429
+ assert_equal(nil, index["0"][:extra_content])
430
+ document = index[0].load
431
+ document[:content] = "content zero"
432
+ document[:extra_content] = "extra content"
433
+ index.update(0, document)
434
+ assert_equal("content zero", index["0"][:content])
435
+ assert_equal("extra content", index["0"][:extra_content])
436
+ assert_equal(nil, index["1"][:tag])
437
+ assert_equal(nil, index["2"][:tag])
438
+ assert_equal(nil, index["3"][:tag])
439
+ assert_equal(nil, index["4"][:tag])
440
+ index.query_update("id:<5 AND cat:>=/cat1/subcat2", {:tag => "cool"})
441
+ assert_equal("cool", index["1"][:tag])
442
+ assert_equal("cool", index["2"][:tag])
443
+ assert_equal("cool", index["3"][:tag])
444
+ assert_equal("cool", index["4"][:tag])
445
+ assert_equal(4, index.search("tag:cool").total_hits)
446
+ index.close
447
+ end
448
+
449
+ def test_index_key
450
+ data = [
451
+ {:id => 0, :val => "one"},
452
+ {:id => 0, :val => "two"},
453
+ {:id => 1, :val => "three"},
454
+ {:id => 1, :val => "four"},
455
+ ]
456
+ index = Index.new(:analyzer => WhiteSpaceAnalyzer.new,
457
+ :key => :id)
458
+ data.each { |doc| index << doc }
459
+ assert_equal(2, index.size)
460
+ assert_equal("two", index["0"][:val])
461
+ assert_equal("four", index["1"][:val])
462
+ index.close
463
+ end
464
+
465
+ def test_index_multi_key
466
+ index = Index.new(:analyzer => WhiteSpaceAnalyzer.new,
467
+ :key => [:id, :table])
468
+ data = [
469
+ {:id => 0, :table => "product", :product => "tent"},
470
+ {:id => 0, :table => "location", :location => "first floor"},
471
+ {:id => 0, :table => "product", :product => "super tent"},
472
+ {:id => 0, :table => "location", :location => "second floor"},
473
+ {:id => 1, :table => "product", :product => "backback"},
474
+ {:id => 1, :table => "location", :location => "second floor"},
475
+ {:id => 1, :table => "location", :location => "first floor"},
476
+ {:id => 1, :table => "product", :product => "rucksack"},
477
+ {:id => 1, :table => "product", :product => "backpack"}
478
+ ].each { |doc| index << doc }
479
+ index.optimize
480
+ assert_equal(4, index.size)
481
+ assert_equal("super tent", index[0][:product])
482
+ assert_equal("second floor", index[1][:location])
483
+ assert_equal("backpack", index[3][:product])
484
+ assert_equal("first floor", index[2][:location])
485
+ index.close
486
+ end
487
+
488
+ def test_index_multi_key_untokenized
489
+ field_infos = FieldInfos.new(:term_vector => :no)
490
+ field_infos.add_field(:id, :index => :untokenized)
491
+ field_infos.add_field(:table, :index => :untokenized)
492
+
493
+ index = Index.new(:analyzer => Analyzer.new,
494
+ :key => [:id, :table],
495
+ :field_infos => field_infos)
496
+ data = [
497
+ {:id => 0, :table => "Product", :product => "tent"},
498
+ {:id => 0, :table => "location", :location => "first floor"},
499
+ {:id => 0, :table => "Product", :product => "super tent"},
500
+ {:id => 0, :table => "location", :location => "second floor"},
501
+ {:id => 1, :table => "Product", :product => "backback"},
502
+ {:id => 1, :table => "location", :location => "second floor"},
503
+ {:id => 1, :table => "location", :location => "first floor"},
504
+ {:id => 1, :table => "Product", :product => "rucksack"},
505
+ {:id => 1, :table => "Product", :product => "backpack"}
506
+ ].each {|doc| index << doc}
507
+
508
+ assert_equal(4, index.size)
509
+ index.optimize
510
+ assert_equal("super tent", index[0][:product])
511
+ assert_equal("second floor", index[1][:location])
512
+ assert_equal("backpack", index[3][:product])
513
+ assert_equal("first floor", index[2][:location])
514
+ index.close
515
+ end
516
+
517
+ def test_sortby_date
518
+ index = Index.new(:analyzer => WhiteSpaceAnalyzer.new)
519
+
520
+ data = [
521
+ {:content => "one", :date => "20051023"},
522
+ {:content => "two", :date => "19530315"},
523
+ {:content => "three four", :date => "19390912"},
524
+ {:content => "one", :date => "19770905"},
525
+ {:content => "two", :date => "19810831"},
526
+ {:content => "three", :date => "19790531"},
527
+ {:content => "one", :date => "19770725"},
528
+ {:content => "two", :date => "19751226"},
529
+ {:content => "four", :date => "19390912"}
530
+ ].each {|doc| index << doc}
531
+
532
+ sf_date = SortField.new("date", {:type => :integer})
533
+ #top_docs = index.search("one", :sort => [sf_date, SortField::SCORE])
534
+ top_docs = index.search("one", :sort => Sort.new("date"))
535
+ assert_equal(3, top_docs.total_hits)
536
+ assert_equal("19770725", index[top_docs.hits[0].doc][:date])
537
+ assert_equal("19770905", index[top_docs.hits[1].doc][:date])
538
+ assert_equal("20051023", index[top_docs.hits[2].doc][:date])
539
+ top_docs = index.search("one two three four",
540
+ :sort => [sf_date, SortField::SCORE])
541
+
542
+ assert_equal("19390912", index[top_docs.hits[0].doc][:date])
543
+ assert_equal("three four", index[top_docs.hits[0].doc][:content])
544
+ assert_equal("19390912", index[top_docs.hits[1].doc][:date])
545
+ assert_equal("four", index[top_docs.hits[1].doc][:content])
546
+ assert_equal("19530315", index[top_docs.hits[2].doc][:date])
547
+
548
+ top_docs = index.search("one two three four",
549
+ :sort => [:date, :content])
550
+ assert_equal("19390912", index[top_docs.hits[0].doc][:date])
551
+ assert_equal("four", index[top_docs.hits[0].doc][:content])
552
+ assert_equal("19390912", index[top_docs.hits[1].doc][:date])
553
+ assert_equal("three four", index[top_docs.hits[1].doc][:content])
554
+ assert_equal("19530315", index[top_docs.hits[2].doc][:date])
555
+
556
+ index.close
557
+ end
558
+
559
+ def test_auto_flush
560
+ fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
561
+ Dir[File.join(fs_path, "*")].each {|path| begin File.delete(path) rescue nil end}
562
+
563
+ data = %q(one two three four five six seven eight nine ten eleven twelve)
564
+ index1 = Index.new(:path => fs_path, :auto_flush => true, :key => :id)
565
+ index1 << "zero"
566
+ index2 = Index.new(:path => fs_path, :auto_flush => true)
567
+ begin
568
+ data.each do |datum|
569
+ index1 << {:id => datum[0], :content => datum}
570
+ index2 << {:id => datum[0], :content => datum}
571
+ end
572
+ 5.times do |i|
573
+ index1.delete(i)
574
+ index2.delete(i + 5)
575
+ end
576
+ index1.optimize
577
+ index2 << "thirteen"
578
+ rescue Exception => e
579
+ assert(false, "This should not cause an error when auto flush has been set")
580
+ end
581
+ index1.close
582
+ index2.close
583
+ end
584
+
585
+ def test_doc_specific_analyzer
586
+ index = Index.new
587
+ index.add_document("abc", Ferret::Analysis::Analyzer.new)
588
+ assert_equal(1, index.size)
589
+ end
590
+
591
+ def test_adding_empty_term_vectors
592
+ index = Index.new(:field_infos => FieldInfos.new(:term_vector => :no))
593
+
594
+ # Note: Adding keywords to either field1 or field2 gets rid of the error
595
+
596
+ index << {:field1, ''}
597
+ index << {:field2, ''}
598
+ index << {:field3, 'foo bar baz'}
599
+
600
+ index.flush
601
+ index.close
602
+ end
603
+
604
+ def test_stopwords
605
+ field_infos = FieldInfos.new(:store => :no, :term_vector => :no)
606
+ field_infos.add_field(:id, :store => :yes, :index => :untokenized)
607
+
608
+ i = Ferret::Index::Index.new(:or_default => false, :default_search_field => '*')
609
+
610
+ # adding this additional field to the document leads to failure below
611
+ # comment out this statement and all tests pass:
612
+ i << {:id => 1, :content => "Move or shake"}
613
+
614
+ hits = i.search 'move nothere shake'
615
+ assert_equal 0, hits.total_hits
616
+ hits = i.search 'move shake'
617
+ assert_equal 1, hits.total_hits
618
+ hits = i.search 'move or shake'
619
+ assert_equal 1, hits.total_hits # fails when id field is present
620
+ end
621
+
622
+ def test_threading
623
+ path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
624
+ index = Ferret::Index::Index.new(:path => path, :create => true)
625
+
626
+ 100.times do |i|
627
+ buf = ''
628
+ doc = {}
629
+ doc[:id] = i
630
+ doc[:foo] = "foo #{i}"
631
+ index << doc
632
+ end
633
+
634
+ threads = []
635
+
636
+ 4.times do
637
+ threads << Thread.new(index) do |index|
638
+ result = index.search('id:42')
639
+ assert_equal(1, result.total_hits)
640
+ end
641
+ end
642
+
643
+ threads.each{|t| t.join }
644
+ end
645
+
646
+ def test_wildcard
647
+ i = nil
648
+ Ferret::I.new do |i|
649
+ i << "one"
650
+ assert_equal(1, i.search("*").total_hits)
651
+ i << "two"
652
+ assert_equal(2, i.search("*").total_hits)
653
+ i << {:content => "three"}
654
+ assert_equal(3, i.search("*").total_hits)
655
+ assert_equal(3, i.search("id:*").total_hits)
656
+ assert_equal(2, i.search('id:?*').total_hits)
657
+ end
658
+ assert_raise(StandardError) {i.close}
659
+ end
660
+
661
+ def check_highlight(index, q, excerpt_length, num_excerpts, expected, field = :field)
662
+ highlights = index.highlight(q, 0,
663
+ :excerpt_length => excerpt_length,
664
+ :num_excerpts => num_excerpts,
665
+ :field => field)
666
+ assert_equal(expected, highlights)
667
+ highlights = index.highlight(q, 1,
668
+ :excerpt_length => excerpt_length,
669
+ :num_excerpts => num_excerpts,
670
+ :field => field)
671
+ assert_equal(expected, highlights)
672
+ end
673
+
674
+ def test_highlighter()
675
+ index = Ferret::I.new(:default_field => :field,
676
+ :default_input_field => :field,
677
+ :analyzer => Ferret::Analysis::WhiteSpaceAnalyzer.new)
678
+ [
679
+ "the words we are searching for are one and two also " +
680
+ "sometimes looking for them as a phrase like this; one " +
681
+ "two lets see how it goes",
682
+ [
683
+ "the words we",
684
+ "are searching",
685
+ "for are one",
686
+ "and two also",
687
+ "sometimes looking",
688
+ "for them as a",
689
+ "phrase like this;",
690
+ "one two lets see",
691
+ "how it goes"
692
+ ]
693
+ ].each {|doc| index << doc }
694
+
695
+ check_highlight(index, "one", 10, 1, ["...are <b>one</b>..."])
696
+ check_highlight(index, "one", 10, 2,
697
+ ["...are <b>one</b>...","...this; <b>one</b>..."])
698
+ check_highlight(index, "one", 10, 3,
699
+ ["the words...","...are <b>one</b>...","...this; <b>one</b>..."])
700
+ check_highlight(index, "one", 10, 4,
701
+ ["the words we are...","...are <b>one</b>...","...this; <b>one</b>..."])
702
+ check_highlight(index, "one", 10, 5,
703
+ ["the words we are searching for are <b>one</b>...","...this; <b>one</b>..."])
704
+ check_highlight(index, "one", 10, 20,
705
+ ["the words we are searching for are <b>one</b> and two also " +
706
+ "sometimes looking for them as a phrase like this; <b>one</b> " +
707
+ "two lets see how it goes"])
708
+ check_highlight(index, "one", 200, 1,
709
+ ["the words we are searching for are <b>one</b> and two also " +
710
+ "sometimes looking for them as a phrase like this; <b>one</b> " +
711
+ "two lets see how it goes"])
712
+ check_highlight(index, "(one two)", 15, 2,
713
+ ["...<b>one</b> and <b>two</b>...","...this; <b>one</b> <b>two</b>..."])
714
+ check_highlight(index, 'one two "one two"', 15, 2,
715
+ ["...<b>one</b> and <b>two</b>...","...this; <b>one two</b>..."])
716
+ check_highlight(index, 'one two "one two"', 15, 1,
717
+ ["...this; <b>one two</b>..."])
718
+ check_highlight(index, '"one two"', 15, 1, nil, :not_a_field)
719
+ check_highlight(index, 'wrong_field:one', 15, 1, nil, :wrong_field)
720
+ check_highlight(index, '"the words" "for are one and two" words one two', 10, 1,
721
+ ["<b>the words</b>..."])
722
+ check_highlight(index, '"the words" "for are one and two" words one two', 20, 2,
723
+ ["<b>the words</b> we are...","...<b>for are one and two</b>..."])
724
+ index.close
725
+ end
726
+
727
+ def test_changing_analyzer
728
+ index = Ferret::I.new
729
+ a = Ferret::Analysis::WhiteSpaceAnalyzer.new(false)
730
+ index.add_document({:content => "Content With Capitals"}, a)
731
+ tv = index.reader.term_vector(0, :content)
732
+ assert_equal("Capitals", tv.terms[0].text)
733
+ index.close
734
+ end
735
+
736
+ def test_top_doc_to_json
737
+ index = Ferret::I.new
738
+ [
739
+ {:f1 => "one"},
740
+ {:f2 => ["two",2,2.0]},
741
+ {:f3 => 3},
742
+ {:f4 => 4.0},
743
+ {:f5 => "five", :funny => '"' * 10_000}
744
+ ].each {|doc| index << doc}
745
+ json_str = index.search("one two 3 4.0 five",
746
+ :sort => Ferret::Search::Sort::INDEX_ORDER).to_json
747
+ assert(json_str == '[{"f1":"one"},{"f2":["two","2","2.0"]},{"f3":"3"},{"f4":"4.0"},{"f5":"five","funny":"' + '\'"\'' * 10_000 + '"}]' ||
748
+ json_str == '[{"f1":"one"},{"f2":["two","2","2.0"]},{"f3":"3"},{"f4":"4.0"},{"funny":"' + '\'"\'' * 10_000 + '","f5":"five"}]')
749
+ assert_equal('[]', index.search("xxx").to_json)
750
+ index.close
751
+ end
752
+
753
+ def test_large_query_delete
754
+ index = Ferret::I.new
755
+ 20.times do
756
+ index << {:id => 'one'}
757
+ index << {:id => 'two'}
758
+ end
759
+ index.query_delete('id:one')
760
+ assert_equal(20, index.size)
761
+ end
762
+ end