ferret 0.9.6 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (295) hide show
  1. data/MIT-LICENSE +1 -1
  2. data/README +12 -24
  3. data/Rakefile +38 -54
  4. data/TODO +14 -17
  5. data/ext/analysis.c +982 -823
  6. data/ext/analysis.h +133 -76
  7. data/ext/array.c +96 -58
  8. data/ext/array.h +40 -13
  9. data/ext/bitvector.c +476 -118
  10. data/ext/bitvector.h +264 -22
  11. data/ext/compound_io.c +217 -229
  12. data/ext/defines.h +49 -0
  13. data/ext/document.c +107 -317
  14. data/ext/document.h +31 -65
  15. data/ext/except.c +81 -36
  16. data/ext/except.h +117 -55
  17. data/ext/extconf.rb +2 -9
  18. data/ext/ferret.c +211 -104
  19. data/ext/ferret.h +22 -11
  20. data/ext/filter.c +97 -82
  21. data/ext/fs_store.c +348 -367
  22. data/ext/global.c +226 -188
  23. data/ext/global.h +44 -26
  24. data/ext/hash.c +474 -391
  25. data/ext/hash.h +441 -68
  26. data/ext/hashset.c +124 -96
  27. data/ext/hashset.h +169 -20
  28. data/ext/helper.c +56 -5
  29. data/ext/helper.h +7 -0
  30. data/ext/inc/lang.h +29 -49
  31. data/ext/inc/threading.h +31 -0
  32. data/ext/ind.c +288 -278
  33. data/ext/ind.h +68 -0
  34. data/ext/index.c +5688 -0
  35. data/ext/index.h +663 -616
  36. data/ext/lang.h +29 -49
  37. data/ext/libstemmer.c +3 -3
  38. data/ext/mem_pool.c +84 -0
  39. data/ext/mem_pool.h +35 -0
  40. data/ext/posh.c +1006 -0
  41. data/ext/posh.h +1007 -0
  42. data/ext/priorityqueue.c +117 -194
  43. data/ext/priorityqueue.h +135 -39
  44. data/ext/q_boolean.c +1305 -1108
  45. data/ext/q_const_score.c +106 -93
  46. data/ext/q_filtered_query.c +138 -135
  47. data/ext/q_fuzzy.c +206 -242
  48. data/ext/q_match_all.c +94 -80
  49. data/ext/q_multi_term.c +663 -0
  50. data/ext/q_parser.c +667 -593
  51. data/ext/q_phrase.c +992 -555
  52. data/ext/q_prefix.c +72 -61
  53. data/ext/q_range.c +235 -210
  54. data/ext/q_span.c +1480 -1166
  55. data/ext/q_term.c +273 -246
  56. data/ext/q_wildcard.c +127 -114
  57. data/ext/r_analysis.c +1720 -711
  58. data/ext/r_index.c +3049 -0
  59. data/ext/r_qparser.c +433 -146
  60. data/ext/r_search.c +2934 -1993
  61. data/ext/r_store.c +372 -143
  62. data/ext/r_utils.c +941 -0
  63. data/ext/ram_store.c +330 -326
  64. data/ext/search.c +1291 -668
  65. data/ext/search.h +403 -702
  66. data/ext/similarity.c +91 -113
  67. data/ext/similarity.h +45 -30
  68. data/ext/sort.c +721 -484
  69. data/ext/stopwords.c +361 -273
  70. data/ext/store.c +556 -58
  71. data/ext/store.h +706 -126
  72. data/ext/tags +3578 -2780
  73. data/ext/term_vectors.c +352 -0
  74. data/ext/threading.h +31 -0
  75. data/ext/win32.h +54 -0
  76. data/lib/ferret.rb +5 -17
  77. data/lib/ferret/document.rb +130 -2
  78. data/lib/ferret/index.rb +577 -26
  79. data/lib/ferret/number_tools.rb +157 -0
  80. data/lib/ferret_version.rb +3 -0
  81. data/test/test_helper.rb +5 -13
  82. data/test/unit/analysis/tc_analyzer.rb +513 -1
  83. data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
  84. data/test/unit/index/tc_index.rb +183 -240
  85. data/test/unit/index/tc_index_reader.rb +312 -479
  86. data/test/unit/index/tc_index_writer.rb +397 -13
  87. data/test/unit/index/th_doc.rb +269 -206
  88. data/test/unit/query_parser/tc_query_parser.rb +40 -33
  89. data/test/unit/search/tc_filter.rb +59 -71
  90. data/test/unit/search/tc_fuzzy_query.rb +24 -16
  91. data/test/unit/search/tc_index_searcher.rb +23 -201
  92. data/test/unit/search/tc_multi_searcher.rb +78 -226
  93. data/test/unit/search/tc_search_and_sort.rb +93 -81
  94. data/test/unit/search/tc_sort.rb +23 -23
  95. data/test/unit/search/tc_sort_field.rb +7 -7
  96. data/test/unit/search/tc_spans.rb +51 -47
  97. data/test/unit/search/tm_searcher.rb +339 -0
  98. data/test/unit/store/tc_fs_store.rb +1 -1
  99. data/test/unit/store/tm_store_lock.rb +3 -3
  100. data/test/unit/tc_document.rb +81 -0
  101. data/test/unit/ts_analysis.rb +1 -1
  102. data/test/unit/ts_utils.rb +1 -1
  103. data/test/unit/utils/tc_bit_vector.rb +288 -0
  104. data/test/unit/utils/tc_number_tools.rb +117 -0
  105. data/test/unit/utils/tc_priority_queue.rb +106 -0
  106. metadata +140 -301
  107. data/CHANGELOG +0 -9
  108. data/ext/dummy.exe +0 -0
  109. data/ext/field.c +0 -408
  110. data/ext/frtio.h +0 -13
  111. data/ext/inc/except.h +0 -90
  112. data/ext/index_io.c +0 -382
  113. data/ext/index_rw.c +0 -2658
  114. data/ext/lang.c +0 -41
  115. data/ext/nix_io.c +0 -134
  116. data/ext/q_multi_phrase.c +0 -380
  117. data/ext/r_doc.c +0 -582
  118. data/ext/r_index_io.c +0 -1021
  119. data/ext/r_term.c +0 -219
  120. data/ext/term.c +0 -820
  121. data/ext/termdocs.c +0 -611
  122. data/ext/vector.c +0 -637
  123. data/ext/w32_io.c +0 -150
  124. data/lib/ferret/analysis.rb +0 -11
  125. data/lib/ferret/analysis/analyzers.rb +0 -112
  126. data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
  127. data/lib/ferret/analysis/token.rb +0 -100
  128. data/lib/ferret/analysis/token_filters.rb +0 -86
  129. data/lib/ferret/analysis/token_stream.rb +0 -26
  130. data/lib/ferret/analysis/tokenizers.rb +0 -112
  131. data/lib/ferret/analysis/word_list_loader.rb +0 -27
  132. data/lib/ferret/document/document.rb +0 -152
  133. data/lib/ferret/document/field.rb +0 -312
  134. data/lib/ferret/index/compound_file_io.rb +0 -338
  135. data/lib/ferret/index/document_writer.rb +0 -289
  136. data/lib/ferret/index/field_infos.rb +0 -279
  137. data/lib/ferret/index/fields_io.rb +0 -181
  138. data/lib/ferret/index/index.rb +0 -675
  139. data/lib/ferret/index/index_file_names.rb +0 -33
  140. data/lib/ferret/index/index_reader.rb +0 -503
  141. data/lib/ferret/index/index_writer.rb +0 -534
  142. data/lib/ferret/index/multi_reader.rb +0 -377
  143. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
  144. data/lib/ferret/index/segment_infos.rb +0 -130
  145. data/lib/ferret/index/segment_merge_info.rb +0 -49
  146. data/lib/ferret/index/segment_merge_queue.rb +0 -16
  147. data/lib/ferret/index/segment_merger.rb +0 -358
  148. data/lib/ferret/index/segment_reader.rb +0 -412
  149. data/lib/ferret/index/segment_term_enum.rb +0 -169
  150. data/lib/ferret/index/segment_term_vector.rb +0 -58
  151. data/lib/ferret/index/term.rb +0 -53
  152. data/lib/ferret/index/term_buffer.rb +0 -83
  153. data/lib/ferret/index/term_doc_enum.rb +0 -291
  154. data/lib/ferret/index/term_enum.rb +0 -52
  155. data/lib/ferret/index/term_info.rb +0 -37
  156. data/lib/ferret/index/term_infos_io.rb +0 -321
  157. data/lib/ferret/index/term_vector_offset_info.rb +0 -20
  158. data/lib/ferret/index/term_vectors_io.rb +0 -553
  159. data/lib/ferret/query_parser.rb +0 -312
  160. data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
  161. data/lib/ferret/search.rb +0 -50
  162. data/lib/ferret/search/boolean_clause.rb +0 -100
  163. data/lib/ferret/search/boolean_query.rb +0 -299
  164. data/lib/ferret/search/boolean_scorer.rb +0 -294
  165. data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
  166. data/lib/ferret/search/conjunction_scorer.rb +0 -99
  167. data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
  168. data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
  169. data/lib/ferret/search/explanation.rb +0 -41
  170. data/lib/ferret/search/field_cache.rb +0 -215
  171. data/lib/ferret/search/field_doc.rb +0 -31
  172. data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
  173. data/lib/ferret/search/filter.rb +0 -11
  174. data/lib/ferret/search/filtered_query.rb +0 -130
  175. data/lib/ferret/search/filtered_term_enum.rb +0 -79
  176. data/lib/ferret/search/fuzzy_query.rb +0 -154
  177. data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
  178. data/lib/ferret/search/hit_collector.rb +0 -34
  179. data/lib/ferret/search/hit_queue.rb +0 -11
  180. data/lib/ferret/search/index_searcher.rb +0 -200
  181. data/lib/ferret/search/match_all_query.rb +0 -104
  182. data/lib/ferret/search/multi_phrase_query.rb +0 -216
  183. data/lib/ferret/search/multi_searcher.rb +0 -261
  184. data/lib/ferret/search/multi_term_query.rb +0 -65
  185. data/lib/ferret/search/non_matching_scorer.rb +0 -22
  186. data/lib/ferret/search/phrase_positions.rb +0 -55
  187. data/lib/ferret/search/phrase_query.rb +0 -214
  188. data/lib/ferret/search/phrase_scorer.rb +0 -152
  189. data/lib/ferret/search/prefix_query.rb +0 -54
  190. data/lib/ferret/search/query.rb +0 -140
  191. data/lib/ferret/search/query_filter.rb +0 -51
  192. data/lib/ferret/search/range_filter.rb +0 -103
  193. data/lib/ferret/search/range_query.rb +0 -139
  194. data/lib/ferret/search/req_excl_scorer.rb +0 -125
  195. data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
  196. data/lib/ferret/search/score_doc.rb +0 -38
  197. data/lib/ferret/search/score_doc_comparator.rb +0 -114
  198. data/lib/ferret/search/scorer.rb +0 -91
  199. data/lib/ferret/search/similarity.rb +0 -278
  200. data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
  201. data/lib/ferret/search/sort.rb +0 -112
  202. data/lib/ferret/search/sort_comparator.rb +0 -60
  203. data/lib/ferret/search/sort_field.rb +0 -91
  204. data/lib/ferret/search/spans.rb +0 -12
  205. data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
  206. data/lib/ferret/search/spans/span_first_query.rb +0 -79
  207. data/lib/ferret/search/spans/span_near_query.rb +0 -108
  208. data/lib/ferret/search/spans/span_not_query.rb +0 -130
  209. data/lib/ferret/search/spans/span_or_query.rb +0 -176
  210. data/lib/ferret/search/spans/span_query.rb +0 -25
  211. data/lib/ferret/search/spans/span_scorer.rb +0 -74
  212. data/lib/ferret/search/spans/span_term_query.rb +0 -105
  213. data/lib/ferret/search/spans/span_weight.rb +0 -84
  214. data/lib/ferret/search/spans/spans_enum.rb +0 -44
  215. data/lib/ferret/search/term_query.rb +0 -128
  216. data/lib/ferret/search/term_scorer.rb +0 -183
  217. data/lib/ferret/search/top_docs.rb +0 -36
  218. data/lib/ferret/search/top_field_docs.rb +0 -17
  219. data/lib/ferret/search/weight.rb +0 -54
  220. data/lib/ferret/search/wildcard_query.rb +0 -26
  221. data/lib/ferret/search/wildcard_term_enum.rb +0 -61
  222. data/lib/ferret/stemmers.rb +0 -1
  223. data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
  224. data/lib/ferret/store.rb +0 -5
  225. data/lib/ferret/store/buffered_index_io.rb +0 -190
  226. data/lib/ferret/store/directory.rb +0 -141
  227. data/lib/ferret/store/fs_store.rb +0 -381
  228. data/lib/ferret/store/index_io.rb +0 -245
  229. data/lib/ferret/store/ram_store.rb +0 -286
  230. data/lib/ferret/utils.rb +0 -8
  231. data/lib/ferret/utils/bit_vector.rb +0 -123
  232. data/lib/ferret/utils/date_tools.rb +0 -138
  233. data/lib/ferret/utils/number_tools.rb +0 -91
  234. data/lib/ferret/utils/parameter.rb +0 -41
  235. data/lib/ferret/utils/priority_queue.rb +0 -120
  236. data/lib/ferret/utils/string_helper.rb +0 -47
  237. data/lib/ferret/utils/thread_local.rb +0 -28
  238. data/lib/ferret/utils/weak_key_hash.rb +0 -60
  239. data/lib/rferret.rb +0 -37
  240. data/rake_utils/code_statistics.rb +0 -106
  241. data/test/benchmark/tb_ram_store.rb +0 -76
  242. data/test/benchmark/tb_rw_vint.rb +0 -26
  243. data/test/functional/thread_safety_index_test.rb +0 -81
  244. data/test/functional/thread_safety_test.rb +0 -137
  245. data/test/longrunning/tc_numbertools.rb +0 -60
  246. data/test/longrunning/tm_store.rb +0 -19
  247. data/test/unit/analysis/ctc_analyzer.rb +0 -532
  248. data/test/unit/analysis/data/wordfile +0 -6
  249. data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
  250. data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
  251. data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
  252. data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
  253. data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
  254. data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
  255. data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
  256. data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
  257. data/test/unit/analysis/rtc_stop_filter.rb +0 -14
  258. data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
  259. data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
  260. data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
  261. data/test/unit/analysis/tc_token.rb +0 -25
  262. data/test/unit/document/rtc_field.rb +0 -28
  263. data/test/unit/document/tc_document.rb +0 -47
  264. data/test/unit/document/tc_field.rb +0 -98
  265. data/test/unit/index/rtc_compound_file_io.rb +0 -107
  266. data/test/unit/index/rtc_field_infos.rb +0 -127
  267. data/test/unit/index/rtc_fields_io.rb +0 -167
  268. data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
  269. data/test/unit/index/rtc_segment_infos.rb +0 -74
  270. data/test/unit/index/rtc_segment_term_docs.rb +0 -17
  271. data/test/unit/index/rtc_segment_term_enum.rb +0 -60
  272. data/test/unit/index/rtc_segment_term_vector.rb +0 -71
  273. data/test/unit/index/rtc_term_buffer.rb +0 -57
  274. data/test/unit/index/rtc_term_info.rb +0 -19
  275. data/test/unit/index/rtc_term_infos_io.rb +0 -192
  276. data/test/unit/index/rtc_term_vectors_io.rb +0 -108
  277. data/test/unit/index/tc_term.rb +0 -27
  278. data/test/unit/index/tc_term_voi.rb +0 -18
  279. data/test/unit/search/rtc_similarity.rb +0 -37
  280. data/test/unit/search/rtc_sort_field.rb +0 -14
  281. data/test/unit/search/tc_multi_searcher2.rb +0 -126
  282. data/test/unit/store/rtc_fs_store.rb +0 -62
  283. data/test/unit/store/rtc_ram_store.rb +0 -15
  284. data/test/unit/store/rtm_store.rb +0 -150
  285. data/test/unit/store/rtm_store_lock.rb +0 -2
  286. data/test/unit/ts_document.rb +0 -2
  287. data/test/unit/utils/rtc_bit_vector.rb +0 -73
  288. data/test/unit/utils/rtc_date_tools.rb +0 -50
  289. data/test/unit/utils/rtc_number_tools.rb +0 -59
  290. data/test/unit/utils/rtc_parameter.rb +0 -40
  291. data/test/unit/utils/rtc_priority_queue.rb +0 -62
  292. data/test/unit/utils/rtc_string_helper.rb +0 -21
  293. data/test/unit/utils/rtc_thread.rb +0 -61
  294. data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
  295. data/test/utils/number_to_spoken.rb +0 -132
@@ -1,5 +1,27 @@
1
1
  require File.dirname(__FILE__) + "/../../test_helper"
2
2
 
3
+ class TokenTest < Test::Unit::TestCase
4
+ include Ferret::Analysis
5
+ def test_token
6
+ t = Token.new("text", 1, 2, 3)
7
+ assert_equal("text", t.text)
8
+ assert_equal(1, t.start)
9
+ assert_equal(2, t.end)
10
+ assert_equal(3, t.pos_inc)
11
+ t.text = "yada yada yada"
12
+ t.start = 11
13
+ t.end = 12
14
+ t.pos_inc = 13
15
+ assert_equal("yada yada yada", t.text)
16
+ assert_equal(11, t.start)
17
+ assert_equal(12, t.end)
18
+ assert_equal(13, t.pos_inc)
19
+
20
+ t = Token.new("text", 1, 2)
21
+ assert_equal(1, t.pos_inc)
22
+ end
23
+ end
24
+
3
25
  class AsciiLetterTokenizerTest < Test::Unit::TestCase
4
26
  include Ferret::Analysis
5
27
 
@@ -454,6 +476,7 @@ class CustomTokenizerTest < Test::Unit::TestCase
454
476
  assert(! t.next())
455
477
  t = AsciiLowerCaseFilter.new(MyCSVTokenizer.new(input))
456
478
  assert_equal(Token.new("first field", 0, 11), t.next)
479
+ return
457
480
  assert_equal(Token.new("2nd field", 12, 21), t.next)
458
481
  assert_equal(Token.new(" p a d d e d f i e l d ", 22, 48), t.next)
459
482
  assert(! t.next())
@@ -5,7 +5,6 @@ class IndexTest < Test::Unit::TestCase
5
5
  include Ferret::Search
6
6
  include Ferret::Analysis
7
7
  include Ferret::Store
8
- include Ferret::Document
9
8
 
10
9
  def setup()
11
10
  end
@@ -26,7 +25,7 @@ class IndexTest < Test::Unit::TestCase
26
25
  end
27
26
 
28
27
  def do_test_index_with_array(index)
29
- data = [
28
+ [
30
29
  ["one two"],
31
30
  ["one", "three"],
32
31
  ["two"],
@@ -35,8 +34,7 @@ class IndexTest < Test::Unit::TestCase
35
34
  ["two", "three", "four"],
36
35
  ["one"],
37
36
  ["two", "three", "four", "five"]
38
- ]
39
- data.each {|doc| index << doc }
37
+ ].each {|doc| index << doc }
40
38
  assert_equal(8, index.size)
41
39
  q = "one"
42
40
  check_results(index, q, [0, 1, 3, 4, 6])
@@ -44,19 +42,19 @@ class IndexTest < Test::Unit::TestCase
44
42
  check_results(index, q, [0, 4])
45
43
  q = "one OR five"
46
44
  check_results(index, q, [0, 1, 3, 4, 6, 7])
47
- assert_equal("two three four five", index.doc(7)["def_field"])
45
+ assert_equal(%w{two three four five}, index.doc(7)[:xxx])
48
46
  end
49
47
 
50
48
  def do_test_index_with_hash(index)
51
49
  data = [
52
- {"def_field" => "one two"},
53
- {"def_field" => "one", "field2" => "three"},
54
- {"def_field" => "two"},
55
- {"def_field" => "one", "field2" => "four"},
56
- {"def_field" => "one two"},
57
- {"def_field" => "two", "field2" => "three", "field3" => "four"},
58
- {"def_field" => "one"},
59
- {"def_field" => "two", "field2" => "three", "field3" => "five"}
50
+ {:xxx => "one two"},
51
+ {:xxx => "one", :field2 => "three"},
52
+ {:xxx => "two"},
53
+ {:xxx => "one", :field2 => "four"},
54
+ {:xxx => "one two"},
55
+ {:xxx => "two", :field2 => "three", :field3 => "four"},
56
+ {:xxx => "one"},
57
+ {:xxx => "two", :field2 => "three", :field3 => "five"}
60
58
  ]
61
59
  data.each {|doc| index << doc }
62
60
  q = "one AND two"
@@ -71,19 +69,19 @@ class IndexTest < Test::Unit::TestCase
71
69
  q = "two AND field3:f*"
72
70
  check_results(index, q, [5, 7])
73
71
  assert_equal("five", index.doc(7)["field3"])
74
- assert_equal("two", index.doc(7)["def_field"])
72
+ assert_equal("two", index.doc(7)[:xxx])
75
73
  end
76
74
 
77
75
  def do_test_index_with_doc_array(index)
78
76
  data = [
79
- {"def_field" => "one two multi", :id => "myid"},
80
- {"def_field" => "one", :field2 => "three multi"},
81
- {"def_field" => "two"},
82
- {"def_field" => "one", :field2 => "four"},
83
- {"def_field" => "one two"},
84
- {"def_field" => "two", :field2 => "three", "field3" => "four"},
85
- {"def_field" => "one multi2"},
86
- {"def_field" => "two", :field2 => "this three multi2", "field3" => "five multi"}
77
+ {:xxx => "one two multi", :id => "myid"},
78
+ {:xxx => "one", :field2 => "three multi"},
79
+ {:xxx => "two"},
80
+ {:xxx => "one", :field2 => "four"},
81
+ {:xxx => "one two"},
82
+ {:xxx => "two", :field2 => "three", :field3 => "four"},
83
+ {:xxx => "one multi2", :id => "hello"},
84
+ {:xxx => "two", :field2 => "this three multi2", :field3 => "five multi"}
87
85
  ]
88
86
  data.each {|doc| index << doc }
89
87
  q = "one AND two"
@@ -109,36 +107,37 @@ class IndexTest < Test::Unit::TestCase
109
107
  assert_equal(7, index.size)
110
108
  q = "two AND (field3:f*)"
111
109
  check_results(index, q, [7])
112
- doc["field2"] = "dave"
110
+
111
+ doc.load
112
+ doc[:field2] = "dave"
113
113
  index << doc
114
- check_results(index, q, [6, 7])
114
+ check_results(index, q, [7, 8])
115
115
  check_results(index, "*:this", [])
116
116
  assert_equal(8, index.size)
117
- assert_equal("dave", index[7]["field2"])
117
+ assert_equal("dave", index[8][:field2])
118
118
  index.optimize
119
119
  check_results(index, q, [6, 7])
120
- t = Term.new("field2", "three")
121
- index.delete(t)
120
+ assert_equal("dave", index[7][:field2])
121
+ index.query_delete("field2:three")
122
122
  assert(index.deleted?(1))
123
123
  assert(index.deleted?(6))
124
124
  assert(! index.deleted?(7))
125
- t = Term.new("field2", "four")
126
- assert_equal("one", index[t]["def_field"])
127
- assert_equal("one two multi", index["myid"]["def_field"])
125
+ assert_equal("one multi2", index["hello"][:xxx])
126
+ assert_equal("one two multi", index["myid"][:xxx])
128
127
  index.delete("myid")
129
128
  assert(index.deleted?(0))
130
129
  end
131
130
 
132
131
  def test_ram_index
133
- index = Index.new(:default_field => "def_field")
132
+ index = Index.new(:default_input_field => :xxx)
134
133
  do_test_index_with_array(index)
135
134
  index.close
136
135
 
137
- index = Index.new(:default_field => "def_field")
136
+ index = Index.new(:default_field => :xxx)
138
137
  do_test_index_with_hash(index)
139
138
  index.close
140
139
 
141
- index = Index.new(:default_field => "def_field", :id_field => "id")
140
+ index = Index.new(:default_field => :xxx, :id_field => "id")
142
141
  do_test_index_with_doc_array(index)
143
142
  index.close
144
143
  end
@@ -147,23 +146,24 @@ class IndexTest < Test::Unit::TestCase
147
146
  fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
148
147
 
149
148
  Dir[File.join(fs_path, "*")].each {|path| begin File.delete(path) rescue nil end}
150
- assert_raise(StandardError) do
149
+ assert_raise(IOError) do
151
150
  Index.new(:path => fs_path,
152
151
  :create_if_missing => false,
153
- :default_field => "def_field")
152
+ :default_field => :xxx)
154
153
  end
155
- index = Index.new(:path => fs_path, :default_field => "def_field")
154
+
155
+ index = Index.new(:path => fs_path, :default_input_field => :xxx)
156
156
  do_test_index_with_array(index)
157
157
  index.close
158
158
 
159
159
  Dir[File.join(fs_path, "*")].each {|path| begin File.delete(path) rescue nil end}
160
- index = Index.new(:path => fs_path, :default_field => "def_field")
160
+ index = Index.new(:path => fs_path, :default_field => :xxx)
161
161
  do_test_index_with_hash(index)
162
162
  index.close
163
163
 
164
164
  Dir[File.join(fs_path, "*")].each {|path| begin File.delete(path) rescue nil end}
165
165
  index = Index.new(:path => fs_path,
166
- :default_field => "def_field",
166
+ :default_field => :xxx,
167
167
  :id_field => "id")
168
168
  do_test_index_with_doc_array(index)
169
169
  index.close
@@ -171,20 +171,18 @@ class IndexTest < Test::Unit::TestCase
171
171
 
172
172
  def test_fs_index_is_persistant
173
173
  fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
174
-
175
- Dir[File.join(fs_path, "*")].each {|path| begin File.delete(path) rescue nil end}
176
- data = [
177
- {"def_field" => "one two", :id => "me"},
178
- {"def_field" => "one", :field2 => "three"},
179
- {"def_field" => "two"},
180
- {"def_field" => "one", :field2 => "four"},
181
- {"def_field" => "one two"},
182
- {"def_field" => "two", :field2 => "three", "field3" => "four"},
183
- {"def_field" => "one"},
184
- {"def_field" => "two", :field2 => "three", "field3" => "five"}
185
- ]
186
- index = Index.new(:path => fs_path, :default_field => "def_field")
187
- data.each {|doc| index << doc }
174
+ index = Index.new(:path => fs_path, :default_field => :xxx, :create => true)
175
+
176
+ [
177
+ {:xxx => "one two", :id => "me"},
178
+ {:xxx => "one", :field2 => "three"},
179
+ {:xxx => "two"},
180
+ {:xxx => "one", :field2 => "four"},
181
+ {:xxx => "one two"},
182
+ {:xxx => "two", :field2 => "three", :field3 => "four"},
183
+ {:xxx => "one"},
184
+ {:xxx => "two", :field2 => "three", :field3 => "five"}
185
+ ].each {|doc| index << doc }
188
186
  assert_equal(8, index.size)
189
187
  index.close
190
188
 
@@ -197,174 +195,171 @@ class IndexTest < Test::Unit::TestCase
197
195
  def test_key_used_for_id_field
198
196
  fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
199
197
 
200
- Dir[File.join(fs_path, "*")].each {|path| begin File.delete(path) rescue nil end}
201
- data = [
202
- {:my_id => "one two", :id => "me"},
198
+ index = Index.new(:path => fs_path, :key => :my_id, :create => true)
199
+ [
200
+ {:my_id => "three", :id => "me"},
203
201
  {:my_id => "one", :field2 => "three"},
204
202
  {:my_id => "two"},
205
203
  {:my_id => "one", :field2 => "four"},
206
- {:my_id => "one two"},
207
- {:my_id => "two", :field2 => "three", "field3" => "four"},
204
+ {:my_id => "three"},
205
+ {:my_id => "two", :field2 => "three", :field3 => "four"},
208
206
  {:my_id => "one"},
209
- {:my_id => "two", :field2 => "three", "field3" => "five"}
210
- ]
211
- index = Index.new(:path => fs_path, :key => :my_id)
212
- data.each {|doc| index << doc }
207
+ {:my_id => "two", :field2 => "three", :field3 => "five"}
208
+ ].each {|doc| index << doc }
209
+ index.optimize
213
210
  assert_equal(3, index.size)
214
- assert_equal("three", index[:two][:field2])
211
+ assert_equal("three", index["two"][:field2])
215
212
  index.close
216
213
  end
217
214
 
218
215
  def test_merging_indexes
219
- data = [
220
- {"f" => "zero"},
221
- {"f" => "one"},
222
- {"f" => "two"}
223
- ]
224
- index1 = Index.new(:default_field => "f")
225
- data.each {|doc| index1 << doc }
226
- data = [
227
- {"f" => "three"},
228
- {"f" => "four"},
229
- {"f" => "five"}
230
- ]
231
- index2 = Index.new(:default_field => "f")
232
- data.each {|doc| index2 << doc }
233
- data = [
234
- {"f" => "six"},
235
- {"f" => "seven"},
236
- {"f" => "eight"}
237
- ]
238
- index3 = Index.new(:default_field => "f")
239
- data.each {|doc| index3 << doc }
240
-
241
- index = Index.new(:default_field => "f")
216
+ index1 = Index.new(:default_field => :f)
217
+ index2 = Index.new(:default_field => :f)
218
+ index3 = Index.new(:default_field => :f)
219
+
220
+ [
221
+ {:f => "zero"},
222
+ {:f => "one"},
223
+ {:f => "two"}
224
+ ].each {|doc| index1 << doc }
225
+ [
226
+ {:f => "three"},
227
+ {:f => "four"},
228
+ {:f => "five"}
229
+ ].each {|doc| index2 << doc }
230
+ [
231
+ {:f => "six"},
232
+ {:f => "seven"},
233
+ {:f => "eight"}
234
+ ].each {|doc| index3 << doc }
235
+
236
+ index = Index.new(:default_field => :f)
242
237
  index.add_indexes(index1)
243
238
  assert_equal(3, index.size)
244
- assert_equal("zero", index[0]["f"])
239
+ assert_equal("zero", index[0][:f])
245
240
  index.add_indexes([index2, index3])
246
241
  assert_equal(9, index.size)
247
- assert_equal("zero", index[0]["f"])
248
- assert_equal("eight", index[8]["f"])
242
+ assert_equal("zero", index[0][:f])
243
+ assert_equal("eight", index[8][:f])
249
244
  index1.close
250
245
  index2.close
251
246
  index3.close
252
- assert_equal("seven", index[7]["f"])
247
+ assert_equal("seven", index[7][:f])
253
248
  data = [
254
- {"f" => "alpha"},
255
- {"f" => "beta"},
256
- {"f" => "charlie"}
249
+ {:f => "alpha"},
250
+ {:f => "beta"},
251
+ {:f => "charlie"}
257
252
  ]
258
253
  dir1 = RAMDirectory.new
259
- index1 = Index.new(:dir => dir1, :default_field => "f")
254
+ index1 = Index.new(:dir => dir1, :default_field => :f)
260
255
  data.each {|doc| index1 << doc }
261
256
  index1.flush
262
257
  data = [
263
- {"f" => "delta"},
264
- {"f" => "echo"},
265
- {"f" => "foxtrot"}
258
+ {:f => "delta"},
259
+ {:f => "echo"},
260
+ {:f => "foxtrot"}
266
261
  ]
267
262
  dir2 = RAMDirectory.new
268
- index2 = Index.new(:dir => dir2, :default_field => "f")
263
+ index2 = Index.new(:dir => dir2, :default_field => :f)
269
264
  data.each {|doc| index2 << doc }
270
265
  index2.flush
271
266
  data = [
272
- {"f" => "golf"},
273
- {"f" => "india"},
274
- {"f" => "juliet"}
267
+ {:f => "golf"},
268
+ {:f => "india"},
269
+ {:f => "juliet"}
275
270
  ]
276
271
  dir3 = RAMDirectory.new
277
- index3 = Index.new(:dir => dir3, :default_field => "f")
272
+ index3 = Index.new(:dir => dir3, :default_field => :f)
278
273
  data.each {|doc| index3 << doc }
279
274
  index3.flush
280
275
 
281
276
  index.add_indexes(dir1)
282
277
  assert_equal(12, index.size)
283
- assert_equal("alpha", index[9]["f"])
278
+ assert_equal("alpha", index[9][:f])
284
279
  index.add_indexes([dir2, dir3])
285
280
  assert_equal(18, index.size)
286
- assert_equal("juliet", index[17]["f"])
281
+ assert_equal("juliet", index[17][:f])
287
282
  index1.close
288
283
  dir1.close
289
284
  index2.close
290
285
  dir2.close
291
286
  index3.close
292
287
  dir3.close
293
- assert_equal("golf", index[15]["f"])
288
+ assert_equal("golf", index[15][:f])
294
289
  index.close
295
290
  end
296
291
 
297
292
  def test_persist_index
298
293
  data = [
299
- {"f" => "zero"},
300
- {"f" => "one"},
301
- {"f" => "two"}
294
+ {:f => "zero"},
295
+ {:f => "one"},
296
+ {:f => "two"}
302
297
  ]
303
- index = Index.new(:default_field => "f")
298
+ index = Index.new(:default_field => :f)
304
299
  data.each {|doc| index << doc }
305
300
  fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
306
301
 
307
302
  index.persist(fs_path, true)
308
303
  assert_equal(3, index.size)
309
- assert_equal("zero", index[0]["f"])
304
+ assert_equal("zero", index[0][:f])
310
305
  index.close
311
306
 
312
307
  index = Index.new(:path => fs_path)
313
308
  assert_equal(3, index.size)
314
- assert_equal("zero", index[0]["f"])
309
+ assert_equal("zero", index[0][:f])
315
310
  index.close
316
311
 
317
312
 
318
313
  data = [
319
- {"f" => "romeo"},
320
- {"f" => "sierra"},
321
- {"f" => "tango"}
314
+ {:f => "romeo"},
315
+ {:f => "sierra"},
316
+ {:f => "tango"}
322
317
  ]
323
- index = Index.new(:default_field => "f")
318
+ index = Index.new(:default_field => :f)
324
319
  data.each {|doc| index << doc }
325
320
  assert_equal(3, index.size)
326
- assert_equal("romeo", index[0]["f"])
321
+ assert_equal("romeo", index[0][:f])
327
322
  dir = FSDirectory.new(fs_path, false)
328
323
  index.persist(dir)
329
324
  assert_equal(6, index.size)
330
- assert_equal("zero", index[0]["f"])
331
- assert_equal("romeo", index[3]["f"])
325
+ assert_equal("zero", index[0][:f])
326
+ assert_equal("romeo", index[3][:f])
332
327
  index.close
333
328
 
334
329
  index = Index.new(:path => fs_path)
335
330
  assert_equal(6, index.size)
336
- assert_equal("zero", index[0]["f"])
337
- assert_equal("romeo", index[3]["f"])
331
+ assert_equal("zero", index[0][:f])
332
+ assert_equal("romeo", index[3][:f])
338
333
  index.close
339
334
  end
340
335
 
341
336
  def test_auto_update_when_externally_modified()
342
337
  fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
343
- index = Index.new(:path => fs_path, :default_field => "f", :create => true)
338
+ index = Index.new(:path => fs_path, :default_field => :f, :create => true)
344
339
  index << "document 1"
345
340
  assert_equal(1, index.size)
346
341
 
347
- index2 = Index.new(:path => fs_path, :default_field => "f")
342
+ index2 = Index.new(:path => fs_path, :default_field => :f)
348
343
  assert_equal(1, index2.size)
349
344
  index2 << "document 2"
350
345
  assert_equal(2, index2.size)
351
346
  assert_equal(2, index.size)
352
347
  top_docs = index.search("content3")
353
- assert_equal(0, top_docs.size)
348
+ assert_equal(0, top_docs.hits.size)
354
349
 
355
- iw = IndexWriter.new(fs_path, :analyzer => WhiteSpaceAnalyzer.new())
356
- doc = Document.new
357
- doc << Field.new("f", "content3", Field::Store::YES, Field::Index::TOKENIZED)
358
- iw << doc
350
+ iw = IndexWriter.new(:path => fs_path, :analyzer => WhiteSpaceAnalyzer.new())
351
+ iw << {:f, "content3"}
359
352
  iw.close()
353
+
360
354
  top_docs = index.search("content3")
361
- assert_equal(1, top_docs.size)
355
+ assert_equal(1, top_docs.hits.size)
362
356
  assert_equal(3, index.size)
363
- assert_equal("content3", index[2]["f"])
357
+ assert_equal("content3", index[2][:f])
364
358
  index.close
365
359
  end
366
360
 
367
361
  def test_delete
362
+ index = Index.new(:analyzer => WhiteSpaceAnalyzer.new)
368
363
  data = [
369
364
  {:id => 0, :cat => "/cat1/subcat1"},
370
365
  {:id => 1, :cat => "/cat1/subcat2"},
@@ -376,26 +371,27 @@ class IndexTest < Test::Unit::TestCase
376
371
  {:id => 7, :cat => "/cat2/subcat3"},
377
372
  {:id => 8, :cat => "/cat2/subcat4"},
378
373
  {:id => 9, :cat => "/cat2/subcat5"},
379
- ]
380
- index = Index.new(:analyzer => WhiteSpaceAnalyzer.new)
381
- data.each {|doc| index << doc }
374
+ ].each {|doc| index << doc }
382
375
  assert_equal(10, index.size)
383
- assert_equal(1, index.search("id:9").size)
376
+ assert_equal(1, index.search("id:9").total_hits)
384
377
  index.delete(9)
385
378
  assert_equal(9, index.size)
386
- assert_equal(0, index.search("id:9").size)
387
- assert_equal(1, index.search("id:8").size)
379
+ assert_equal(0, index.search("id:9").total_hits)
380
+ assert_equal(1, index.search("id:8").total_hits)
388
381
  index.delete("8")
389
382
  assert_equal(8, index.size)
390
- assert_equal(0, index.search("id:8").size)
391
- assert_equal(5, index.search("cat:/cat1*").size)
383
+ assert_equal(0, index.search("id:8").total_hits)
384
+ assert_equal(5, index.search("cat:/cat1*").total_hits)
392
385
  index.query_delete("cat:/cat1*")
393
386
  assert_equal(3, index.size)
394
- assert_equal(0, index.search("cat:/cat1*").size)
387
+ assert_equal(0, index.search("cat:/cat1*").total_hits)
395
388
  index.close
396
389
  end
397
390
 
398
391
  def test_update
392
+ index = Index.new(:analyzer => WhiteSpaceAnalyzer.new,
393
+ :default_input_field => :content,
394
+ :id_field => :id)
399
395
  data = [
400
396
  {:id => 0, :cat => "/cat1/subcat1", :content => "content0"},
401
397
  {:id => 1, :cat => "/cat1/subcat2", :content => "content1"},
@@ -407,28 +403,25 @@ class IndexTest < Test::Unit::TestCase
407
403
  {:id => 7, :cat => "/cat2/subcat3", :content => "content7"},
408
404
  {:id => 8, :cat => "/cat2/subcat4", :content => "content8"},
409
405
  {:id => 9, :cat => "/cat2/subcat5", :content => "content9"},
410
- ]
411
- index = Index.new(:analyzer => WhiteSpaceAnalyzer.new,
412
- :default_field => :content,
413
- :id_field => :id)
414
- data.each { |doc| index << doc }
406
+ ].each { |doc| index << doc }
415
407
  assert_equal(10, index.size)
416
408
  assert_equal("content5", index["5"][:content])
417
- index.update(5, "content five")
409
+ index.query_update("id:5", {:content => "content five"})
418
410
  assert_equal("content five", index["5"][:content])
419
411
  assert_equal(nil, index["5"][:extra_content])
420
- index.update("5", {:cat => "/cat1/subcat6",
412
+ index.update("5", {:id => "5",
413
+ :cat => "/cat1/subcat6",
421
414
  :content => "high five",
422
415
  :extra_content => "hello"})
423
416
  assert_equal("hello", index["5"][:extra_content])
424
417
  assert_equal("high five", index["5"][:content])
425
418
  assert_equal("/cat1/subcat6", index["5"][:cat])
426
419
  assert_equal("content9", index["9"][:content])
427
- index.update(Term.new("content", "content9"), {:content => "content nine"})
420
+ index.query_update("content:content9", {:content => "content nine"})
428
421
  assert_equal("content nine", index["9"][:content])
429
422
  assert_equal("content0", index["0"][:content])
430
423
  assert_equal(nil, index["0"][:extra_content])
431
- document = index[0]
424
+ document = index[0].load
432
425
  document[:content] = "content zero"
433
426
  document[:extra_content] = "extra content"
434
427
  index.update(0, document)
@@ -443,7 +436,7 @@ class IndexTest < Test::Unit::TestCase
443
436
  assert_equal("cool", index["2"][:tag])
444
437
  assert_equal("cool", index["3"][:tag])
445
438
  assert_equal("cool", index["4"][:tag])
446
- assert_equal(4, index.search("tag:cool").size)
439
+ assert_equal(4, index.search("tag:cool").total_hits)
447
440
  index.close
448
441
  end
449
442
 
@@ -458,12 +451,14 @@ class IndexTest < Test::Unit::TestCase
458
451
  :key => :id)
459
452
  data.each { |doc| index << doc }
460
453
  assert_equal(2, index.size)
461
- assert_equal("two", index[0][:val])
462
- assert_equal("four", index[1][:val])
454
+ assert_equal("two", index["0"][:val])
455
+ assert_equal("four", index["1"][:val])
463
456
  index.close
464
457
  end
465
458
 
466
459
  def test_index_multi_key
460
+ index = Index.new(:analyzer => WhiteSpaceAnalyzer.new,
461
+ :key => [:id, :table])
467
462
  data = [
468
463
  {:id => 0, :table => "product", :product => "tent"},
469
464
  {:id => 0, :table => "location", :location => "first floor"},
@@ -474,10 +469,8 @@ class IndexTest < Test::Unit::TestCase
474
469
  {:id => 1, :table => "location", :location => "first floor"},
475
470
  {:id => 1, :table => "product", :product => "rucksack"},
476
471
  {:id => 1, :table => "product", :product => "backpack"}
477
- ]
478
- index = Index.new(:analyzer => WhiteSpaceAnalyzer.new,
479
- :key => [:id, :table])
480
- data.each { |doc| index << doc }
472
+ ].each { |doc| index << doc }
473
+ index.optimize
481
474
  assert_equal(4, index.size)
482
475
  assert_equal("super tent", index[0][:product])
483
476
  assert_equal("second floor", index[1][:location])
@@ -487,6 +480,13 @@ class IndexTest < Test::Unit::TestCase
487
480
  end
488
481
 
489
482
  def test_index_multi_key_untokenized
483
+ field_infos = FieldInfos.new(:term_vector => :no)
484
+ field_infos.add_field(:id, :index => :untokenized)
485
+ field_infos.add_field(:table, :index => :untokenized)
486
+
487
+ index = Index.new(:analyzer => Analyzer.new,
488
+ :key => [:id, :table],
489
+ :field_infos => field_infos)
490
490
  data = [
491
491
  {:id => 0, :table => "Product", :product => "tent"},
492
492
  {:id => 0, :table => "location", :location => "first floor"},
@@ -497,21 +497,10 @@ class IndexTest < Test::Unit::TestCase
497
497
  {:id => 1, :table => "location", :location => "first floor"},
498
498
  {:id => 1, :table => "Product", :product => "rucksack"},
499
499
  {:id => 1, :table => "Product", :product => "backpack"}
500
- ]
501
- index = Index.new(:analyzer => Analyzer.new,
502
- :key => [:id, :table])
503
- data.each do |dat|
504
- doc = Document.new
505
- dat.each_pair do |key, value|
506
- if ([:id, :table].include?(key))
507
- doc << Field.new(key, value, Field::Store::YES, Field::Index::UNTOKENIZED)
508
- else
509
- doc << Field.new(key, value, Field::Store::YES, Field::Index::TOKENIZED)
510
- end
511
- end
512
- index << doc
513
- end
500
+ ].each {|doc| index << doc}
501
+
514
502
  assert_equal(4, index.size)
503
+ index.optimize
515
504
  assert_equal("super tent", index[0][:product])
516
505
  assert_equal("second floor", index[1][:location])
517
506
  assert_equal("backpack", index[3][:product])
@@ -520,6 +509,8 @@ class IndexTest < Test::Unit::TestCase
520
509
  end
521
510
 
522
511
  def test_sortby_date
512
+ index = Index.new(:analyzer => WhiteSpaceAnalyzer.new)
513
+
523
514
  data = [
524
515
  {:content => "one", :date => "20051023"},
525
516
  {:content => "two", :date => "19530315"},
@@ -530,37 +521,31 @@ class IndexTest < Test::Unit::TestCase
530
521
  {:content => "one", :date => "19770725"},
531
522
  {:content => "two", :date => "19751226"},
532
523
  {:content => "four", :date => "19390912"}
533
- ]
534
- index = Index.new(:analyzer => WhiteSpaceAnalyzer.new)
535
- data.each { |doc|
536
- document = Document.new
537
- doc.each_pair do |key, value|
538
- document << Field.new(key.to_s, value, Field::Store::YES, Field::Index::TOKENIZED)
539
- end
540
- index << document
541
- }
542
- sf_date = SortField.new("date", {:sort_type => SortField::SortType::INTEGER})
543
- #top_docs = index.search("one", :sort => [sf_date, SortField::FIELD_SCORE])
524
+ ].each {|doc| index << doc}
525
+
526
+ sf_date = SortField.new("date", {:sort_type => :integer})
527
+ #top_docs = index.search("one", :sort => [sf_date, SortField::SCORE])
544
528
  top_docs = index.search("one", :sort => Sort.new("date"))
545
- assert_equal(3, top_docs.size)
546
- assert_equal("19770725", index[top_docs.score_docs[0].doc][:date])
547
- assert_equal("19770905", index[top_docs.score_docs[1].doc][:date])
548
- assert_equal("20051023", index[top_docs.score_docs[2].doc][:date])
529
+ assert_equal(3, top_docs.total_hits)
530
+ assert_equal("19770725", index[top_docs.hits[0].doc][:date])
531
+ assert_equal("19770905", index[top_docs.hits[1].doc][:date])
532
+ assert_equal("20051023", index[top_docs.hits[2].doc][:date])
549
533
  top_docs = index.search("one two three four",
550
- :sort => [sf_date, SortField::FIELD_SCORE])
551
- assert_equal("19390912", index[top_docs.score_docs[0].doc][:date])
552
- assert_equal("three four", index[top_docs.score_docs[0].doc][:content])
553
- assert_equal("19390912", index[top_docs.score_docs[1].doc][:date])
554
- assert_equal("four", index[top_docs.score_docs[1].doc][:content])
555
- assert_equal("19530315", index[top_docs.score_docs[2].doc][:date])
534
+ :sort => [sf_date, SortField::SCORE])
535
+ return
536
+ assert_equal("19390912", index[top_docs.hits[0].doc][:date])
537
+ assert_equal("three four", index[top_docs.hits[0].doc][:content])
538
+ assert_equal("19390912", index[top_docs.hits[1].doc][:date])
539
+ assert_equal("four", index[top_docs.hits[1].doc][:content])
540
+ assert_equal("19530315", index[top_docs.hits[2].doc][:date])
556
541
 
557
542
  top_docs = index.search("one two three four",
558
543
  :sort => [:date, :content])
559
- assert_equal("19390912", index[top_docs.score_docs[0].doc][:date])
560
- assert_equal("four", index[top_docs.score_docs[0].doc][:content])
561
- assert_equal("19390912", index[top_docs.score_docs[1].doc][:date])
562
- assert_equal("three four", index[top_docs.score_docs[1].doc][:content])
563
- assert_equal("19530315", index[top_docs.score_docs[2].doc][:date])
544
+ assert_equal("19390912", index[top_docs.hits[0].doc][:date])
545
+ assert_equal("four", index[top_docs.hits[0].doc][:content])
546
+ assert_equal("19390912", index[top_docs.hits[1].doc][:date])
547
+ assert_equal("three four", index[top_docs.hits[1].doc][:content])
548
+ assert_equal("19530315", index[top_docs.hits[2].doc][:date])
564
549
 
565
550
  index.close
566
551
  end
@@ -568,6 +553,7 @@ class IndexTest < Test::Unit::TestCase
568
553
  def test_auto_flush
569
554
  fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
570
555
  Dir[File.join(fs_path, "*")].each {|path| begin File.delete(path) rescue nil end}
556
+
571
557
  data = %q(one two three four five six seven eight nine ten eleven twelve)
572
558
  index1 = Index.new(:path => fs_path, :auto_flush => true)
573
559
  index2 = Index.new(:path => fs_path, :auto_flush => true)
@@ -595,59 +581,16 @@ class IndexTest < Test::Unit::TestCase
595
581
  assert_equal(1, index.size)
596
582
  end
597
583
 
598
-
599
584
  def test_adding_empty_term_vectors
600
- index = Index.new()
601
- doc = Document.new
585
+ index = Index.new(:field_infos => FieldInfos.new(:term_vector => :no))
602
586
 
603
587
  # Note: Adding keywords to either field1 or field2 gets rid of the error
604
588
 
605
- doc << Field.new('field1', '',
606
- Field::Store::NO,
607
- Field::Index::TOKENIZED,
608
- Field::TermVector::YES)
609
-
610
- doc << Field.new('field2', '',
611
- Field::Store::NO,
612
- Field::Index::TOKENIZED,
613
- Field::TermVector::YES)
614
-
615
- # Note: keywords in this un-term-vector-stored field don't help the situation
616
-
617
- doc << Field.new('field3', 'foo bar baz',
618
- Field::Store::YES,
619
- Field::Index::TOKENIZED,
620
- Field::TermVector::NO)
621
-
622
- index << doc
589
+ index << {:field1, ''}
590
+ index << {:field2, ''}
591
+ index << {:field3, 'foo bar baz'}
623
592
 
624
593
  index.flush
625
594
  index.close
626
595
  end
627
-
628
- def test_stopwords
629
- i = Ferret::Index::Index.new(
630
- :occur_default => Ferret::Search::BooleanClause::Occur::MUST,
631
- :default_search_field => '*')
632
- d = Ferret::Document::Document.new
633
-
634
- # adding this additional field to the document leads to failure below
635
- # comment out this statement and all tests pass:
636
- d << Ferret::Document::Field.new('id', '1',
637
- Ferret::Document::Field::Store::YES,
638
- Ferret::Document::Field::Index::UNTOKENIZED)
639
-
640
- d << Ferret::Document::Field.new('content', 'Move or shake',
641
- Ferret::Document::Field::Store::NO,
642
- Ferret::Document::Field::Index::TOKENIZED,
643
- Ferret::Document::Field::TermVector::NO,
644
- false, 1.0)
645
- i << d
646
- hits = i.search 'move nothere shake'
647
- assert_equal 0, hits.size
648
- hits = i.search 'move shake'
649
- assert_equal 1, hits.size
650
- hits = i.search 'move or shake'
651
- assert_equal 1, hits.size # fails when id field is present
652
- end
653
596
  end