sa-ferret 0.11.6.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (193) hide show
  1. data/CHANGELOG +24 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README +102 -0
  4. data/Rakefile +338 -0
  5. data/TODO +17 -0
  6. data/TUTORIAL +231 -0
  7. data/bin/ferret-browser +79 -0
  8. data/ext/analysis.c +1588 -0
  9. data/ext/analysis.h +219 -0
  10. data/ext/api.c +69 -0
  11. data/ext/api.h +27 -0
  12. data/ext/array.c +123 -0
  13. data/ext/array.h +53 -0
  14. data/ext/bitvector.c +540 -0
  15. data/ext/bitvector.h +272 -0
  16. data/ext/compound_io.c +383 -0
  17. data/ext/config.h +42 -0
  18. data/ext/document.c +156 -0
  19. data/ext/document.h +53 -0
  20. data/ext/except.c +120 -0
  21. data/ext/except.h +168 -0
  22. data/ext/extconf.rb +14 -0
  23. data/ext/ferret.c +402 -0
  24. data/ext/ferret.h +91 -0
  25. data/ext/filter.c +156 -0
  26. data/ext/fs_store.c +483 -0
  27. data/ext/global.c +418 -0
  28. data/ext/global.h +117 -0
  29. data/ext/hash.c +567 -0
  30. data/ext/hash.h +473 -0
  31. data/ext/hashset.c +170 -0
  32. data/ext/hashset.h +187 -0
  33. data/ext/header.h +58 -0
  34. data/ext/helper.c +62 -0
  35. data/ext/helper.h +13 -0
  36. data/ext/index.c +6425 -0
  37. data/ext/index.h +961 -0
  38. data/ext/lang.h +48 -0
  39. data/ext/libstemmer.c +92 -0
  40. data/ext/libstemmer.h +79 -0
  41. data/ext/mempool.c +87 -0
  42. data/ext/mempool.h +35 -0
  43. data/ext/modules.h +162 -0
  44. data/ext/multimapper.c +310 -0
  45. data/ext/multimapper.h +51 -0
  46. data/ext/posh.c +1006 -0
  47. data/ext/posh.h +1007 -0
  48. data/ext/priorityqueue.c +151 -0
  49. data/ext/priorityqueue.h +143 -0
  50. data/ext/q_boolean.c +1608 -0
  51. data/ext/q_const_score.c +161 -0
  52. data/ext/q_filtered_query.c +209 -0
  53. data/ext/q_fuzzy.c +268 -0
  54. data/ext/q_match_all.c +148 -0
  55. data/ext/q_multi_term.c +677 -0
  56. data/ext/q_parser.c +2825 -0
  57. data/ext/q_phrase.c +1126 -0
  58. data/ext/q_prefix.c +100 -0
  59. data/ext/q_range.c +350 -0
  60. data/ext/q_span.c +2402 -0
  61. data/ext/q_term.c +337 -0
  62. data/ext/q_wildcard.c +171 -0
  63. data/ext/r_analysis.c +2499 -0
  64. data/ext/r_index.c +3485 -0
  65. data/ext/r_qparser.c +585 -0
  66. data/ext/r_search.c +4107 -0
  67. data/ext/r_store.c +513 -0
  68. data/ext/r_utils.c +963 -0
  69. data/ext/ram_store.c +471 -0
  70. data/ext/search.c +1741 -0
  71. data/ext/search.h +885 -0
  72. data/ext/similarity.c +150 -0
  73. data/ext/similarity.h +82 -0
  74. data/ext/sort.c +983 -0
  75. data/ext/stem_ISO_8859_1_danish.c +338 -0
  76. data/ext/stem_ISO_8859_1_danish.h +16 -0
  77. data/ext/stem_ISO_8859_1_dutch.c +635 -0
  78. data/ext/stem_ISO_8859_1_dutch.h +16 -0
  79. data/ext/stem_ISO_8859_1_english.c +1156 -0
  80. data/ext/stem_ISO_8859_1_english.h +16 -0
  81. data/ext/stem_ISO_8859_1_finnish.c +792 -0
  82. data/ext/stem_ISO_8859_1_finnish.h +16 -0
  83. data/ext/stem_ISO_8859_1_french.c +1276 -0
  84. data/ext/stem_ISO_8859_1_french.h +16 -0
  85. data/ext/stem_ISO_8859_1_german.c +512 -0
  86. data/ext/stem_ISO_8859_1_german.h +16 -0
  87. data/ext/stem_ISO_8859_1_italian.c +1091 -0
  88. data/ext/stem_ISO_8859_1_italian.h +16 -0
  89. data/ext/stem_ISO_8859_1_norwegian.c +296 -0
  90. data/ext/stem_ISO_8859_1_norwegian.h +16 -0
  91. data/ext/stem_ISO_8859_1_porter.c +776 -0
  92. data/ext/stem_ISO_8859_1_porter.h +16 -0
  93. data/ext/stem_ISO_8859_1_portuguese.c +1035 -0
  94. data/ext/stem_ISO_8859_1_portuguese.h +16 -0
  95. data/ext/stem_ISO_8859_1_spanish.c +1119 -0
  96. data/ext/stem_ISO_8859_1_spanish.h +16 -0
  97. data/ext/stem_ISO_8859_1_swedish.c +307 -0
  98. data/ext/stem_ISO_8859_1_swedish.h +16 -0
  99. data/ext/stem_KOI8_R_russian.c +701 -0
  100. data/ext/stem_KOI8_R_russian.h +16 -0
  101. data/ext/stem_UTF_8_danish.c +344 -0
  102. data/ext/stem_UTF_8_danish.h +16 -0
  103. data/ext/stem_UTF_8_dutch.c +653 -0
  104. data/ext/stem_UTF_8_dutch.h +16 -0
  105. data/ext/stem_UTF_8_english.c +1176 -0
  106. data/ext/stem_UTF_8_english.h +16 -0
  107. data/ext/stem_UTF_8_finnish.c +808 -0
  108. data/ext/stem_UTF_8_finnish.h +16 -0
  109. data/ext/stem_UTF_8_french.c +1296 -0
  110. data/ext/stem_UTF_8_french.h +16 -0
  111. data/ext/stem_UTF_8_german.c +526 -0
  112. data/ext/stem_UTF_8_german.h +16 -0
  113. data/ext/stem_UTF_8_italian.c +1113 -0
  114. data/ext/stem_UTF_8_italian.h +16 -0
  115. data/ext/stem_UTF_8_norwegian.c +302 -0
  116. data/ext/stem_UTF_8_norwegian.h +16 -0
  117. data/ext/stem_UTF_8_porter.c +794 -0
  118. data/ext/stem_UTF_8_porter.h +16 -0
  119. data/ext/stem_UTF_8_portuguese.c +1055 -0
  120. data/ext/stem_UTF_8_portuguese.h +16 -0
  121. data/ext/stem_UTF_8_russian.c +709 -0
  122. data/ext/stem_UTF_8_russian.h +16 -0
  123. data/ext/stem_UTF_8_spanish.c +1137 -0
  124. data/ext/stem_UTF_8_spanish.h +16 -0
  125. data/ext/stem_UTF_8_swedish.c +313 -0
  126. data/ext/stem_UTF_8_swedish.h +16 -0
  127. data/ext/stopwords.c +401 -0
  128. data/ext/store.c +692 -0
  129. data/ext/store.h +777 -0
  130. data/ext/term_vectors.c +352 -0
  131. data/ext/threading.h +31 -0
  132. data/ext/utilities.c +446 -0
  133. data/ext/win32.h +54 -0
  134. data/lib/ferret.rb +29 -0
  135. data/lib/ferret/browser.rb +246 -0
  136. data/lib/ferret/browser/s/global.js +192 -0
  137. data/lib/ferret/browser/s/style.css +148 -0
  138. data/lib/ferret/browser/views/document/list.rhtml +49 -0
  139. data/lib/ferret/browser/views/document/show.rhtml +27 -0
  140. data/lib/ferret/browser/views/error/index.rhtml +7 -0
  141. data/lib/ferret/browser/views/help/index.rhtml +8 -0
  142. data/lib/ferret/browser/views/home/index.rhtml +29 -0
  143. data/lib/ferret/browser/views/layout.rhtml +22 -0
  144. data/lib/ferret/browser/views/term-vector/index.rhtml +4 -0
  145. data/lib/ferret/browser/views/term/index.rhtml +199 -0
  146. data/lib/ferret/browser/views/term/termdocs.rhtml +1 -0
  147. data/lib/ferret/browser/webrick.rb +14 -0
  148. data/lib/ferret/document.rb +130 -0
  149. data/lib/ferret/field_infos.rb +44 -0
  150. data/lib/ferret/index.rb +786 -0
  151. data/lib/ferret/number_tools.rb +157 -0
  152. data/lib/ferret_version.rb +3 -0
  153. data/setup.rb +1555 -0
  154. data/test/test_all.rb +5 -0
  155. data/test/test_helper.rb +24 -0
  156. data/test/threading/number_to_spoken.rb +132 -0
  157. data/test/threading/thread_safety_index_test.rb +79 -0
  158. data/test/threading/thread_safety_read_write_test.rb +76 -0
  159. data/test/threading/thread_safety_test.rb +133 -0
  160. data/test/unit/analysis/tc_analyzer.rb +548 -0
  161. data/test/unit/analysis/tc_token_stream.rb +646 -0
  162. data/test/unit/index/tc_index.rb +762 -0
  163. data/test/unit/index/tc_index_reader.rb +699 -0
  164. data/test/unit/index/tc_index_writer.rb +437 -0
  165. data/test/unit/index/th_doc.rb +315 -0
  166. data/test/unit/largefile/tc_largefile.rb +46 -0
  167. data/test/unit/query_parser/tc_query_parser.rb +238 -0
  168. data/test/unit/search/tc_filter.rb +135 -0
  169. data/test/unit/search/tc_fuzzy_query.rb +147 -0
  170. data/test/unit/search/tc_index_searcher.rb +61 -0
  171. data/test/unit/search/tc_multi_searcher.rb +128 -0
  172. data/test/unit/search/tc_multiple_search_requests.rb +58 -0
  173. data/test/unit/search/tc_search_and_sort.rb +179 -0
  174. data/test/unit/search/tc_sort.rb +49 -0
  175. data/test/unit/search/tc_sort_field.rb +27 -0
  176. data/test/unit/search/tc_spans.rb +190 -0
  177. data/test/unit/search/tm_searcher.rb +384 -0
  178. data/test/unit/store/tc_fs_store.rb +77 -0
  179. data/test/unit/store/tc_ram_store.rb +35 -0
  180. data/test/unit/store/tm_store.rb +34 -0
  181. data/test/unit/store/tm_store_lock.rb +68 -0
  182. data/test/unit/tc_document.rb +81 -0
  183. data/test/unit/ts_analysis.rb +2 -0
  184. data/test/unit/ts_index.rb +2 -0
  185. data/test/unit/ts_largefile.rb +4 -0
  186. data/test/unit/ts_query_parser.rb +2 -0
  187. data/test/unit/ts_search.rb +2 -0
  188. data/test/unit/ts_store.rb +2 -0
  189. data/test/unit/ts_utils.rb +2 -0
  190. data/test/unit/utils/tc_bit_vector.rb +295 -0
  191. data/test/unit/utils/tc_number_tools.rb +117 -0
  192. data/test/unit/utils/tc_priority_queue.rb +106 -0
  193. metadata +269 -0
@@ -0,0 +1,699 @@
1
+ require File.dirname(__FILE__) + "/../../test_helper"
2
+
3
+ module IndexReaderCommon
4
+
5
+ include Ferret::Index
6
+ include Ferret::Analysis
7
+
8
+ def test_index_reader
9
+ do_test_get_field_names()
10
+
11
+ do_test_term_enum()
12
+
13
+ do_test_term_doc_enum()
14
+
15
+ do_test_term_vectors()
16
+
17
+ do_test_get_doc()
18
+ end
19
+
20
+ def do_test_get_field_names()
21
+ field_names = @ir.field_names
22
+
23
+ assert(field_names.include?(:body))
24
+ assert(field_names.include?(:changing_field))
25
+ assert(field_names.include?(:author))
26
+ assert(field_names.include?(:title))
27
+ assert(field_names.include?(:text))
28
+ assert(field_names.include?(:year))
29
+ end
30
+
31
+ def do_test_term_enum()
32
+ te = @ir.terms(:author)
33
+
34
+ assert_equal('[{"term":"Leo","frequency":1},{"term":"Tolstoy","frequency":1}]', te.to_json);
35
+ te.field = :author
36
+ assert_equal('[["Leo",1],["Tolstoy",1]]', te.to_json(:fast));
37
+ te.field = :author
38
+
39
+ assert(te.next?)
40
+ assert_equal("Leo", te.term)
41
+ assert_equal(1, te.doc_freq)
42
+ assert(te.next?)
43
+ assert_equal("Tolstoy", te.term)
44
+ assert_equal(1, te.doc_freq)
45
+ assert(! te.next?)
46
+
47
+ te.field = :body
48
+ assert(te.next?)
49
+ assert_equal("And", te.term)
50
+ assert_equal(1, te.doc_freq)
51
+
52
+ assert(te.skip_to("Not"))
53
+ assert_equal("Not", te.term)
54
+ assert_equal(1, te.doc_freq)
55
+ assert(te.next?)
56
+ assert_equal("Random", te.term)
57
+ assert_equal(16, te.doc_freq)
58
+
59
+ te.field = :text
60
+ assert(te.skip_to("which"))
61
+ assert("which", te.term)
62
+ assert_equal(1, te.doc_freq)
63
+ assert(! te.next?)
64
+
65
+ te.field = :title
66
+ assert(te.next?)
67
+ assert_equal("War And Peace", te.term)
68
+ assert_equal(1, te.doc_freq)
69
+ assert(!te.next?)
70
+
71
+ expected = %w{is 1 more 1 not 1 skip 42 stored 1 text 1 which 1}
72
+ te = @ir.terms(:text)
73
+ te.each do |term, doc_freq|
74
+ assert_equal(expected.shift, term)
75
+ assert_equal(expected.shift.to_i, doc_freq)
76
+ end
77
+
78
+ te = @ir.terms_from(:body, "Not")
79
+ assert_equal("Not", te.term)
80
+ assert_equal(1, te.doc_freq)
81
+ assert(te.next?)
82
+ assert_equal("Random", te.term)
83
+ assert_equal(16, te.doc_freq)
84
+ end
85
+
86
+ def do_test_term_doc_enum()
87
+
88
+ assert_equal(IndexTestHelper::INDEX_TEST_DOCS.size, @ir.num_docs())
89
+ assert_equal(IndexTestHelper::INDEX_TEST_DOCS.size, @ir.max_doc())
90
+
91
+ assert_equal(4, @ir.doc_freq(:body, "Wally"))
92
+
93
+ tde = @ir.term_docs_for(:body, "Wally")
94
+
95
+ [
96
+ [ 0, 1],
97
+ [ 5, 1],
98
+ [18, 3],
99
+ [20, 6]
100
+ ].each do |doc, freq|
101
+ assert(tde.next?)
102
+ assert_equal(doc, tde.doc())
103
+ assert_equal(freq, tde.freq())
104
+ end
105
+ assert(! tde.next?)
106
+
107
+ tde = @ir.term_docs_for(:body, "Wally")
108
+ assert_equal('[{"document":0,"frequency":1},{"document":5,"frequency":1},{"document":18,"frequency":3},{"document":20,"frequency":6}]', tde.to_json)
109
+ tde = @ir.term_docs_for(:body, "Wally")
110
+ assert_equal('[[0,1],[5,1],[18,3],[20,6]]', tde.to_json(:fast))
111
+
112
+ do_test_term_docpos_enum_skip_to(tde)
113
+
114
+ # test term positions
115
+ tde = @ir.term_positions_for(:body, "read")
116
+ [
117
+ [false, 1, 1, [3]],
118
+ [false, 2, 2, [1, 4]],
119
+ [false, 6, 4, [3, 4]],
120
+ [false, 9, 3, [0, 4]],
121
+ [ true, 16, 2, [2]],
122
+ [ true, 21, 6, [3, 4, 5, 8, 9, 10]]
123
+ ].each do |skip, doc, freq, positions|
124
+ if skip
125
+ assert(tde.skip_to(doc))
126
+ else
127
+ assert(tde.next?)
128
+ end
129
+ assert_equal(doc, tde.doc())
130
+ assert_equal(freq, tde.freq())
131
+ positions.each {|pos| assert_equal(pos, tde.next_position())}
132
+ end
133
+
134
+ assert_nil(tde.next_position())
135
+ assert(! tde.next?)
136
+
137
+ tde = @ir.term_positions_for(:body, "read")
138
+ assert_equal('[' +
139
+ '{"document":1,"frequency":1,"positions":[3]},' +
140
+ '{"document":2,"frequency":2,"positions":[1,4]},' +
141
+ '{"document":6,"frequency":4,"positions":[3,4,5,6]},' +
142
+ '{"document":9,"frequency":3,"positions":[0,4,13]},' +
143
+ '{"document":10,"frequency":1,"positions":[1]},' +
144
+ '{"document":16,"frequency":2,"positions":[2,3]},' +
145
+ '{"document":17,"frequency":1,"positions":[2]},' +
146
+ '{"document":20,"frequency":1,"positions":[21]},' +
147
+ '{"document":21,"frequency":6,"positions":[3,4,5,8,9,10]}]',
148
+ tde.to_json())
149
+ tde = @ir.term_positions_for(:body, "read")
150
+ assert_equal('[' +
151
+ '[1,1,[3]],' +
152
+ '[2,2,[1,4]],' +
153
+ '[6,4,[3,4,5,6]],' +
154
+ '[9,3,[0,4,13]],' +
155
+ '[10,1,[1]],' +
156
+ '[16,2,[2,3]],' +
157
+ '[17,1,[2]],' +
158
+ '[20,1,[21]],' +
159
+ '[21,6,[3,4,5,8,9,10]]]',
160
+ tde.to_json(:fast))
161
+
162
+ tde = @ir.term_positions_for(:body, "read")
163
+
164
+ do_test_term_docpos_enum_skip_to(tde)
165
+ end
166
+
167
+ def do_test_term_docpos_enum_skip_to(tde)
168
+ tde.seek(:text, "skip")
169
+
170
+ [
171
+ [10, 22],
172
+ [44, 44],
173
+ [60, 60],
174
+ [62, 62],
175
+ [63, 63],
176
+ ].each do |skip_doc, doc_and_freq|
177
+ assert(tde.skip_to(skip_doc))
178
+ assert_equal(doc_and_freq, tde.doc())
179
+ assert_equal(doc_and_freq, tde.freq())
180
+ end
181
+
182
+
183
+ assert(! tde.skip_to(IndexTestHelper::INDEX_TEST_DOC_COUNT))
184
+ assert(! tde.skip_to(IndexTestHelper::INDEX_TEST_DOC_COUNT))
185
+ assert(! tde.skip_to(IndexTestHelper::INDEX_TEST_DOC_COUNT + 100))
186
+
187
+ tde.seek(:text, "skip")
188
+ assert(! tde.skip_to(IndexTestHelper::INDEX_TEST_DOC_COUNT))
189
+ end
190
+
191
+ def do_test_term_vectors()
192
+ expected_tv = TermVector.new(:body,
193
+ [
194
+ TVTerm.new("word1", [2, 4, 7]),
195
+ TVTerm.new("word2", [3]),
196
+ TVTerm.new("word3", [0, 5, 8, 9]),
197
+ TVTerm.new("word4", [1, 6])
198
+ ],
199
+ [*(0...10)].collect {|i| TVOffsets.new(i*6, (i+1)*6 - 1)})
200
+
201
+ tv = @ir.term_vector(3, :body)
202
+
203
+ assert_equal(expected_tv, tv)
204
+
205
+ tvs = @ir.term_vectors(3)
206
+ assert_equal(3, tvs.size)
207
+
208
+ assert_equal(expected_tv, tvs[:body])
209
+
210
+ tv = tvs[:author]
211
+ assert_equal(:author, tv.field)
212
+ assert_equal([TVTerm.new("Leo", [0]), TVTerm.new("Tolstoy", [1])], tv.terms)
213
+ assert(tv.offsets.nil?)
214
+
215
+
216
+ tv = tvs[:title]
217
+ assert_equal(:title, tv.field)
218
+ assert_equal([TVTerm.new("War And Peace", nil)], tv.terms)
219
+ assert_equal([TVOffsets.new(0, 13)], tv.offsets)
220
+ end
221
+
222
+ def do_test_get_doc()
223
+ doc = @ir.get_document(3)
224
+ [:author, :body, :title, :year].each {|fn| assert(doc.fields.include?(fn))}
225
+ assert_equal(4, doc.fields.size)
226
+ assert_equal(0, doc.size)
227
+ assert_equal([], doc.keys)
228
+
229
+ assert_equal("Leo Tolstoy", doc[:author])
230
+ assert_equal("word3 word4 word1 word2 word1 word3 word4 word1 word3 word3",
231
+ doc[:body])
232
+ assert_equal("War And Peace", doc[:title])
233
+ assert_equal("1865", doc[:year])
234
+ assert_nil(doc[:text])
235
+
236
+ assert_equal(4, doc.size)
237
+ [:author, :body, :title, :year].each {|fn| assert(doc.keys.include?(fn))}
238
+ assert_equal([@ir[0].load, @ir[1].load, @ir[2].load], @ir[0, 3].collect {|d| d.load})
239
+ assert_equal([@ir[61].load, @ir[62].load, @ir[63].load], @ir[61, 100].collect {|d| d.load})
240
+ assert_equal([@ir[0].load, @ir[1].load, @ir[2].load], @ir[0..2].collect {|d| d.load})
241
+ assert_equal([@ir[61].load, @ir[62].load, @ir[63].load], @ir[61..100].collect {|d| d.load})
242
+ assert_equal(@ir[-60], @ir[4])
243
+ end
244
+
245
+ def test_ir_norms()
246
+ @ir.set_norm(3, :title, 1)
247
+ @ir.set_norm(3, :body, 12)
248
+ @ir.set_norm(3, :author, 145)
249
+ @ir.set_norm(3, :year, 31)
250
+ @ir.set_norm(3, :text, 202)
251
+ @ir.set_norm(25, :text, 20)
252
+ @ir.set_norm(50, :text, 200)
253
+ @ir.set_norm(63, :text, 155)
254
+
255
+ norms = @ir.norms(:text)
256
+
257
+ assert_equal(202, norms[ 3])
258
+ assert_equal( 20, norms[25])
259
+ assert_equal(200, norms[50])
260
+ assert_equal(155, norms[63])
261
+
262
+ norms = @ir.norms(:title)
263
+ assert_equal(1, norms[3])
264
+
265
+ norms = @ir.norms(:body)
266
+ assert_equal(12, norms[3])
267
+
268
+ norms = @ir.norms(:author)
269
+ assert_equal(145, norms[3])
270
+
271
+ norms = @ir.norms(:year)
272
+ # TODO: this returns two possible results depending on whether it is
273
+ # a multi reader or a segment reader. If it is a multi reader it will
274
+ # always return an empty set of norms, otherwise it will return nil.
275
+ # I'm not sure what to do here just yet or if this is even an issue.
276
+ #assert(norms.nil?)
277
+
278
+ norms = " " * 164
279
+ @ir.get_norms_into(:text, norms, 100)
280
+ assert_equal(202, norms[103])
281
+ assert_equal( 20, norms[125])
282
+ assert_equal(200, norms[150])
283
+ assert_equal(155, norms[163])
284
+
285
+ @ir.commit()
286
+
287
+ iw_optimize()
288
+
289
+ ir2 = ir_new()
290
+
291
+ norms = " " * 164
292
+ ir2.get_norms_into(:text, norms, 100)
293
+ assert_equal(202, norms[103])
294
+ assert_equal( 20, norms[125])
295
+ assert_equal(200, norms[150])
296
+ assert_equal(155, norms[163])
297
+ ir2.close()
298
+ end
299
+
300
+ def test_ir_delete()
301
+ doc_count = IndexTestHelper::INDEX_TEST_DOCS.size
302
+ @ir.delete(1000) # non existant doc_num
303
+ assert(! @ir.has_deletions?())
304
+ assert_equal(doc_count, @ir.max_doc())
305
+ assert_equal(doc_count, @ir.num_docs())
306
+ assert(! @ir.deleted?(10))
307
+
308
+ [
309
+ [10, doc_count - 1],
310
+ [10, doc_count - 1],
311
+ [doc_count - 1, doc_count - 2],
312
+ [doc_count - 2, doc_count - 3],
313
+ ].each do |del_num, num_docs|
314
+ @ir.delete(del_num)
315
+ assert(@ir.has_deletions?())
316
+ assert_equal(doc_count, @ir.max_doc())
317
+ assert_equal(num_docs, @ir.num_docs())
318
+ assert(@ir.deleted?(del_num))
319
+ end
320
+
321
+ @ir.undelete_all()
322
+ assert(! @ir.has_deletions?())
323
+ assert_equal(doc_count, @ir.max_doc())
324
+ assert_equal(doc_count, @ir.num_docs())
325
+ assert(! @ir.deleted?(10))
326
+ assert(! @ir.deleted?(doc_count - 2))
327
+ assert(! @ir.deleted?(doc_count - 1))
328
+
329
+ del_list = [10, 20, 30, 40, 50, doc_count - 1]
330
+
331
+ del_list.each {|doc_num| @ir.delete(doc_num)}
332
+ assert(@ir.has_deletions?())
333
+ assert_equal(doc_count, @ir.max_doc())
334
+ assert_equal(doc_count - del_list.size, @ir.num_docs())
335
+ del_list.each {|doc_num| assert(@ir.deleted?(doc_num))}
336
+
337
+ ir2 = ir_new()
338
+ assert(! ir2.has_deletions?())
339
+ assert_equal(doc_count, ir2.max_doc())
340
+ assert_equal(doc_count, ir2.num_docs())
341
+
342
+ @ir.commit()
343
+
344
+ assert(! ir2.has_deletions?())
345
+ assert_equal(doc_count, ir2.max_doc())
346
+ assert_equal(doc_count, ir2.num_docs())
347
+
348
+ ir2.close
349
+ ir2 = ir_new()
350
+ assert(ir2.has_deletions?())
351
+ assert_equal(doc_count, ir2.max_doc())
352
+ assert_equal(doc_count - 6, ir2.num_docs())
353
+ del_list.each {|doc_num| assert(ir2.deleted?(doc_num))}
354
+
355
+ ir2.undelete_all()
356
+ assert(! ir2.has_deletions?())
357
+ assert_equal(doc_count, ir2.max_doc())
358
+ assert_equal(doc_count, ir2.num_docs())
359
+ del_list.each {|doc_num| assert(! ir2.deleted?(doc_num))}
360
+
361
+ del_list.each {|doc_num| assert(@ir.deleted?(doc_num))}
362
+
363
+ ir2.commit()
364
+
365
+ del_list.each {|doc_num| assert(@ir.deleted?(doc_num))}
366
+
367
+ del_list.each {|doc_num| ir2.delete(doc_num)}
368
+ ir2.commit()
369
+
370
+ iw_optimize()
371
+
372
+ ir3 = ir_new()
373
+
374
+ assert(!ir3.has_deletions?())
375
+ assert_equal(doc_count - 6, ir3.max_doc())
376
+ assert_equal(doc_count - 6, ir3.num_docs())
377
+
378
+ ir2.close()
379
+ ir3.close()
380
+ end
381
+
382
+ def test_latest
383
+ assert(@ir.latest?)
384
+ ir2 = ir_new()
385
+ assert(ir2.latest?)
386
+
387
+ ir2.delete(0)
388
+ ir2.commit()
389
+ assert(ir2.latest?)
390
+ assert(!@ir.latest?)
391
+
392
+ ir2.close()
393
+ end
394
+ end
395
+
396
+ class MultiReaderTest < Test::Unit::TestCase
397
+ include IndexReaderCommon
398
+
399
+ def ir_new
400
+ IndexReader.new(@dir)
401
+ end
402
+
403
+ def iw_optimize
404
+ iw = IndexWriter.new(:dir => @dir, :analyzer => WhiteSpaceAnalyzer.new())
405
+ iw.optimize()
406
+ iw.close()
407
+ end
408
+
409
+ def setup
410
+ @dir = Ferret::Store::RAMDirectory.new()
411
+
412
+ iw = IndexWriter.new(:dir => @dir,
413
+ :analyzer => WhiteSpaceAnalyzer.new(),
414
+ :create => true,
415
+ :field_infos => IndexTestHelper::INDEX_TEST_FIS,
416
+ :max_buffered_docs => 15)
417
+ IndexTestHelper::INDEX_TEST_DOCS.each {|doc| iw << doc}
418
+
419
+ # we mustn't optimize here so that MultiReader is used.
420
+ #iw.optimize() unless self.class == MultiReaderTest
421
+ iw.close()
422
+ @ir = ir_new()
423
+ end
424
+
425
+ def teardown()
426
+ @ir.close()
427
+ @dir.close()
428
+ end
429
+ end
430
+
431
+ class SegmentReaderTest < MultiReaderTest
432
+ end
433
+
434
+ class MultiExternalReaderTest < Test::Unit::TestCase
435
+ include IndexReaderCommon
436
+
437
+ def ir_new
438
+ readers = @dirs.collect {|dir| IndexReader.new(dir) }
439
+ IndexReader.new(readers)
440
+ end
441
+
442
+ def iw_optimize
443
+ @dirs.each do |dir|
444
+ iw = IndexWriter.new(:dir => dir, :analyzer => WhiteSpaceAnalyzer.new())
445
+ iw.optimize()
446
+ iw.close()
447
+ end
448
+ end
449
+
450
+ def setup()
451
+ @dirs = []
452
+
453
+ [
454
+ [0, 10],
455
+ [10, 30],
456
+ [30, IndexTestHelper::INDEX_TEST_DOCS.size]
457
+ ].each do |start, finish|
458
+ dir = Ferret::Store::RAMDirectory.new()
459
+ @dirs << dir
460
+
461
+ iw = IndexWriter.new(:dir => dir,
462
+ :analyzer => WhiteSpaceAnalyzer.new(),
463
+ :create => true,
464
+ :field_infos => IndexTestHelper::INDEX_TEST_FIS)
465
+ (start...finish).each do |doc_id|
466
+ iw << IndexTestHelper::INDEX_TEST_DOCS[doc_id]
467
+ end
468
+ iw.close()
469
+ end
470
+ @ir = ir_new
471
+ end
472
+
473
+ def teardown()
474
+ @ir.close()
475
+ @dirs.each {|dir| dir.close}
476
+ end
477
+ end
478
+
479
+ class MultiExternalReaderDirTest < Test::Unit::TestCase
480
+ include IndexReaderCommon
481
+
482
+ def ir_new
483
+ IndexReader.new(@dirs)
484
+ end
485
+
486
+ def iw_optimize
487
+ @dirs.each do |dir|
488
+ iw = IndexWriter.new(:dir => dir, :analyzer => WhiteSpaceAnalyzer.new())
489
+ iw.optimize()
490
+ iw.close()
491
+ end
492
+ end
493
+
494
+ def setup()
495
+ @dirs = []
496
+
497
+ [
498
+ [0, 10],
499
+ [10, 30],
500
+ [30, IndexTestHelper::INDEX_TEST_DOCS.size]
501
+ ].each do |start, finish|
502
+ dir = Ferret::Store::RAMDirectory.new()
503
+ @dirs << dir
504
+
505
+ iw = IndexWriter.new(:dir => dir,
506
+ :analyzer => WhiteSpaceAnalyzer.new(),
507
+ :create => true,
508
+ :field_infos => IndexTestHelper::INDEX_TEST_FIS)
509
+ (start...finish).each do |doc_id|
510
+ iw << IndexTestHelper::INDEX_TEST_DOCS[doc_id]
511
+ end
512
+ iw.close()
513
+ end
514
+ @ir = ir_new
515
+ end
516
+
517
+ def teardown()
518
+ @ir.close()
519
+ @dirs.each {|dir| dir.close}
520
+ end
521
+ end
522
+
523
+ class MultiExternalReaderPathTest < Test::Unit::TestCase
524
+ include IndexReaderCommon
525
+
526
+ def ir_new
527
+ IndexReader.new(@paths)
528
+ end
529
+
530
+ def iw_optimize
531
+ @paths.each do |path|
532
+ iw = IndexWriter.new(:path => path, :analyzer => WhiteSpaceAnalyzer.new())
533
+ iw.optimize()
534
+ iw.close()
535
+ end
536
+ end
537
+
538
+ def setup()
539
+ base_dir = File.expand_path(File.join(File.dirname(__FILE__),
540
+ '../../temp/multidir'))
541
+ FileUtils.mkdir_p(base_dir)
542
+ @paths = [
543
+ File.join(base_dir, "i1"),
544
+ File.join(base_dir, "i2"),
545
+ File.join(base_dir, "i3")
546
+ ]
547
+
548
+ [
549
+ [0, 10],
550
+ [10, 30],
551
+ [30, IndexTestHelper::INDEX_TEST_DOCS.size]
552
+ ].each_with_index do |(start, finish), i|
553
+ path = @paths[i]
554
+
555
+ iw = IndexWriter.new(:path => path,
556
+ :analyzer => WhiteSpaceAnalyzer.new(),
557
+ :create => true,
558
+ :field_infos => IndexTestHelper::INDEX_TEST_FIS)
559
+ (start...finish).each do |doc_id|
560
+ iw << IndexTestHelper::INDEX_TEST_DOCS[doc_id]
561
+ end
562
+ iw.close()
563
+ end
564
+ @ir = ir_new
565
+ end
566
+
567
+ def teardown()
568
+ @ir.close()
569
+ end
570
+ end
571
+
572
+ class IndexReaderTest < Test::Unit::TestCase
573
+ include Ferret::Index
574
+ include Ferret::Analysis
575
+
576
+ def setup()
577
+ @dir = Ferret::Store::RAMDirectory.new()
578
+ end
579
+
580
+ def teardown()
581
+ @dir.close()
582
+ end
583
+
584
+ def test_ir_multivalue_fields()
585
+ @fs_dpath = File.expand_path(File.join(File.dirname(__FILE__),
586
+ '../../temp/fsdir'))
587
+ @fs_dir = Ferret::Store::FSDirectory.new(@fs_dpath, true)
588
+
589
+ iw = IndexWriter.new(:dir => @fs_dir,
590
+ :analyzer => WhiteSpaceAnalyzer.new(),
591
+ :create => true)
592
+ doc = {
593
+ :tag => ["Ruby", "C", "Lucene", "Ferret"],
594
+ :body => "this is the body Document Field",
595
+ :title => "this is the title DocField",
596
+ :author => "this is the author field"
597
+ }
598
+ iw << doc
599
+
600
+ iw.close()
601
+
602
+ @dir = Ferret::Store::RAMDirectory.new(@fs_dir)
603
+ ir = IndexReader.new(@dir)
604
+ assert_equal(doc, ir.get_document(0).load)
605
+ ir.close
606
+ end
607
+
608
+ def do_test_term_vectors(ir)
609
+ expected_tv = TermVector.new(:body,
610
+ [
611
+ TVTerm.new("word1", [2, 4, 7]),
612
+ TVTerm.new("word2", [3]),
613
+ TVTerm.new("word3", [0, 5, 8, 9]),
614
+ TVTerm.new("word4", [1, 6])
615
+ ],
616
+ [*(0...10)].collect {|i| TVOffsets.new(i*6, (i+1)*6 - 1)})
617
+
618
+ tv = ir.term_vector(3, :body)
619
+
620
+ assert_equal(expected_tv, tv)
621
+
622
+ tvs = ir.term_vectors(3)
623
+ assert_equal(3, tvs.size)
624
+
625
+ assert_equal(expected_tv, tvs[:body])
626
+
627
+ tv = tvs[:author]
628
+ assert_equal(:author, tv.field)
629
+ assert_equal([TVTerm.new("Leo", [0]), TVTerm.new("Tolstoy", [1])], tv.terms)
630
+ assert(tv.offsets.nil?)
631
+
632
+
633
+ tv = tvs[:title]
634
+ assert_equal(:title, tv.field)
635
+ assert_equal([TVTerm.new("War And Peace", nil)], tv.terms)
636
+ assert_equal([TVOffsets.new(0, 13)], tv.offsets)
637
+ end
638
+
639
+ def do_test_ir_read_while_optimizing(dir)
640
+ iw = IndexWriter.new(:dir => dir,
641
+ :analyzer => WhiteSpaceAnalyzer.new(),
642
+ :create => true,
643
+ :field_infos => IndexTestHelper::INDEX_TEST_FIS)
644
+
645
+ IndexTestHelper::INDEX_TEST_DOCS.each {|doc| iw << doc}
646
+
647
+ iw.close()
648
+
649
+ ir = IndexReader.new(dir)
650
+ do_test_term_vectors(ir)
651
+
652
+ iw = IndexWriter.new(:dir => dir, :analyzer => WhiteSpaceAnalyzer.new())
653
+ iw.optimize()
654
+ iw.close()
655
+
656
+ do_test_term_vectors(ir)
657
+
658
+ ir.close()
659
+ end
660
+
661
+ def test_ir_read_while_optimizing()
662
+ do_test_ir_read_while_optimizing(@dir)
663
+ end
664
+
665
+ def test_ir_read_while_optimizing_on_disk()
666
+ dpath = File.expand_path(File.join(File.dirname(__FILE__),
667
+ '../../temp/fsdir'))
668
+ fs_dir = Ferret::Store::FSDirectory.new(dpath, true)
669
+ do_test_ir_read_while_optimizing(fs_dir)
670
+ fs_dir.close()
671
+ end
672
+
673
+ def test_latest()
674
+ dpath = File.expand_path(File.join(File.dirname(__FILE__),
675
+ '../../temp/fsdir'))
676
+ fs_dir = Ferret::Store::FSDirectory.new(dpath, true)
677
+
678
+ iw = IndexWriter.new(:dir => fs_dir,
679
+ :analyzer => WhiteSpaceAnalyzer.new(),
680
+ :create => true)
681
+ iw << {:field => "content"}
682
+ iw.close()
683
+
684
+ ir = IndexReader.new(fs_dir)
685
+ assert(ir.latest?)
686
+
687
+ iw = IndexWriter.new(:dir => fs_dir, :analyzer => WhiteSpaceAnalyzer.new())
688
+ iw << {:field => "content2"}
689
+ iw.close()
690
+
691
+ assert(!ir.latest?)
692
+
693
+ ir.close()
694
+ ir = IndexReader.new(fs_dir)
695
+ assert(ir.latest?)
696
+ ir.close()
697
+ end
698
+ end
699
+