ferret 0.9.6 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (295) hide show
  1. data/MIT-LICENSE +1 -1
  2. data/README +12 -24
  3. data/Rakefile +38 -54
  4. data/TODO +14 -17
  5. data/ext/analysis.c +982 -823
  6. data/ext/analysis.h +133 -76
  7. data/ext/array.c +96 -58
  8. data/ext/array.h +40 -13
  9. data/ext/bitvector.c +476 -118
  10. data/ext/bitvector.h +264 -22
  11. data/ext/compound_io.c +217 -229
  12. data/ext/defines.h +49 -0
  13. data/ext/document.c +107 -317
  14. data/ext/document.h +31 -65
  15. data/ext/except.c +81 -36
  16. data/ext/except.h +117 -55
  17. data/ext/extconf.rb +2 -9
  18. data/ext/ferret.c +211 -104
  19. data/ext/ferret.h +22 -11
  20. data/ext/filter.c +97 -82
  21. data/ext/fs_store.c +348 -367
  22. data/ext/global.c +226 -188
  23. data/ext/global.h +44 -26
  24. data/ext/hash.c +474 -391
  25. data/ext/hash.h +441 -68
  26. data/ext/hashset.c +124 -96
  27. data/ext/hashset.h +169 -20
  28. data/ext/helper.c +56 -5
  29. data/ext/helper.h +7 -0
  30. data/ext/inc/lang.h +29 -49
  31. data/ext/inc/threading.h +31 -0
  32. data/ext/ind.c +288 -278
  33. data/ext/ind.h +68 -0
  34. data/ext/index.c +5688 -0
  35. data/ext/index.h +663 -616
  36. data/ext/lang.h +29 -49
  37. data/ext/libstemmer.c +3 -3
  38. data/ext/mem_pool.c +84 -0
  39. data/ext/mem_pool.h +35 -0
  40. data/ext/posh.c +1006 -0
  41. data/ext/posh.h +1007 -0
  42. data/ext/priorityqueue.c +117 -194
  43. data/ext/priorityqueue.h +135 -39
  44. data/ext/q_boolean.c +1305 -1108
  45. data/ext/q_const_score.c +106 -93
  46. data/ext/q_filtered_query.c +138 -135
  47. data/ext/q_fuzzy.c +206 -242
  48. data/ext/q_match_all.c +94 -80
  49. data/ext/q_multi_term.c +663 -0
  50. data/ext/q_parser.c +667 -593
  51. data/ext/q_phrase.c +992 -555
  52. data/ext/q_prefix.c +72 -61
  53. data/ext/q_range.c +235 -210
  54. data/ext/q_span.c +1480 -1166
  55. data/ext/q_term.c +273 -246
  56. data/ext/q_wildcard.c +127 -114
  57. data/ext/r_analysis.c +1720 -711
  58. data/ext/r_index.c +3049 -0
  59. data/ext/r_qparser.c +433 -146
  60. data/ext/r_search.c +2934 -1993
  61. data/ext/r_store.c +372 -143
  62. data/ext/r_utils.c +941 -0
  63. data/ext/ram_store.c +330 -326
  64. data/ext/search.c +1291 -668
  65. data/ext/search.h +403 -702
  66. data/ext/similarity.c +91 -113
  67. data/ext/similarity.h +45 -30
  68. data/ext/sort.c +721 -484
  69. data/ext/stopwords.c +361 -273
  70. data/ext/store.c +556 -58
  71. data/ext/store.h +706 -126
  72. data/ext/tags +3578 -2780
  73. data/ext/term_vectors.c +352 -0
  74. data/ext/threading.h +31 -0
  75. data/ext/win32.h +54 -0
  76. data/lib/ferret.rb +5 -17
  77. data/lib/ferret/document.rb +130 -2
  78. data/lib/ferret/index.rb +577 -26
  79. data/lib/ferret/number_tools.rb +157 -0
  80. data/lib/ferret_version.rb +3 -0
  81. data/test/test_helper.rb +5 -13
  82. data/test/unit/analysis/tc_analyzer.rb +513 -1
  83. data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
  84. data/test/unit/index/tc_index.rb +183 -240
  85. data/test/unit/index/tc_index_reader.rb +312 -479
  86. data/test/unit/index/tc_index_writer.rb +397 -13
  87. data/test/unit/index/th_doc.rb +269 -206
  88. data/test/unit/query_parser/tc_query_parser.rb +40 -33
  89. data/test/unit/search/tc_filter.rb +59 -71
  90. data/test/unit/search/tc_fuzzy_query.rb +24 -16
  91. data/test/unit/search/tc_index_searcher.rb +23 -201
  92. data/test/unit/search/tc_multi_searcher.rb +78 -226
  93. data/test/unit/search/tc_search_and_sort.rb +93 -81
  94. data/test/unit/search/tc_sort.rb +23 -23
  95. data/test/unit/search/tc_sort_field.rb +7 -7
  96. data/test/unit/search/tc_spans.rb +51 -47
  97. data/test/unit/search/tm_searcher.rb +339 -0
  98. data/test/unit/store/tc_fs_store.rb +1 -1
  99. data/test/unit/store/tm_store_lock.rb +3 -3
  100. data/test/unit/tc_document.rb +81 -0
  101. data/test/unit/ts_analysis.rb +1 -1
  102. data/test/unit/ts_utils.rb +1 -1
  103. data/test/unit/utils/tc_bit_vector.rb +288 -0
  104. data/test/unit/utils/tc_number_tools.rb +117 -0
  105. data/test/unit/utils/tc_priority_queue.rb +106 -0
  106. metadata +140 -301
  107. data/CHANGELOG +0 -9
  108. data/ext/dummy.exe +0 -0
  109. data/ext/field.c +0 -408
  110. data/ext/frtio.h +0 -13
  111. data/ext/inc/except.h +0 -90
  112. data/ext/index_io.c +0 -382
  113. data/ext/index_rw.c +0 -2658
  114. data/ext/lang.c +0 -41
  115. data/ext/nix_io.c +0 -134
  116. data/ext/q_multi_phrase.c +0 -380
  117. data/ext/r_doc.c +0 -582
  118. data/ext/r_index_io.c +0 -1021
  119. data/ext/r_term.c +0 -219
  120. data/ext/term.c +0 -820
  121. data/ext/termdocs.c +0 -611
  122. data/ext/vector.c +0 -637
  123. data/ext/w32_io.c +0 -150
  124. data/lib/ferret/analysis.rb +0 -11
  125. data/lib/ferret/analysis/analyzers.rb +0 -112
  126. data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
  127. data/lib/ferret/analysis/token.rb +0 -100
  128. data/lib/ferret/analysis/token_filters.rb +0 -86
  129. data/lib/ferret/analysis/token_stream.rb +0 -26
  130. data/lib/ferret/analysis/tokenizers.rb +0 -112
  131. data/lib/ferret/analysis/word_list_loader.rb +0 -27
  132. data/lib/ferret/document/document.rb +0 -152
  133. data/lib/ferret/document/field.rb +0 -312
  134. data/lib/ferret/index/compound_file_io.rb +0 -338
  135. data/lib/ferret/index/document_writer.rb +0 -289
  136. data/lib/ferret/index/field_infos.rb +0 -279
  137. data/lib/ferret/index/fields_io.rb +0 -181
  138. data/lib/ferret/index/index.rb +0 -675
  139. data/lib/ferret/index/index_file_names.rb +0 -33
  140. data/lib/ferret/index/index_reader.rb +0 -503
  141. data/lib/ferret/index/index_writer.rb +0 -534
  142. data/lib/ferret/index/multi_reader.rb +0 -377
  143. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
  144. data/lib/ferret/index/segment_infos.rb +0 -130
  145. data/lib/ferret/index/segment_merge_info.rb +0 -49
  146. data/lib/ferret/index/segment_merge_queue.rb +0 -16
  147. data/lib/ferret/index/segment_merger.rb +0 -358
  148. data/lib/ferret/index/segment_reader.rb +0 -412
  149. data/lib/ferret/index/segment_term_enum.rb +0 -169
  150. data/lib/ferret/index/segment_term_vector.rb +0 -58
  151. data/lib/ferret/index/term.rb +0 -53
  152. data/lib/ferret/index/term_buffer.rb +0 -83
  153. data/lib/ferret/index/term_doc_enum.rb +0 -291
  154. data/lib/ferret/index/term_enum.rb +0 -52
  155. data/lib/ferret/index/term_info.rb +0 -37
  156. data/lib/ferret/index/term_infos_io.rb +0 -321
  157. data/lib/ferret/index/term_vector_offset_info.rb +0 -20
  158. data/lib/ferret/index/term_vectors_io.rb +0 -553
  159. data/lib/ferret/query_parser.rb +0 -312
  160. data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
  161. data/lib/ferret/search.rb +0 -50
  162. data/lib/ferret/search/boolean_clause.rb +0 -100
  163. data/lib/ferret/search/boolean_query.rb +0 -299
  164. data/lib/ferret/search/boolean_scorer.rb +0 -294
  165. data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
  166. data/lib/ferret/search/conjunction_scorer.rb +0 -99
  167. data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
  168. data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
  169. data/lib/ferret/search/explanation.rb +0 -41
  170. data/lib/ferret/search/field_cache.rb +0 -215
  171. data/lib/ferret/search/field_doc.rb +0 -31
  172. data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
  173. data/lib/ferret/search/filter.rb +0 -11
  174. data/lib/ferret/search/filtered_query.rb +0 -130
  175. data/lib/ferret/search/filtered_term_enum.rb +0 -79
  176. data/lib/ferret/search/fuzzy_query.rb +0 -154
  177. data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
  178. data/lib/ferret/search/hit_collector.rb +0 -34
  179. data/lib/ferret/search/hit_queue.rb +0 -11
  180. data/lib/ferret/search/index_searcher.rb +0 -200
  181. data/lib/ferret/search/match_all_query.rb +0 -104
  182. data/lib/ferret/search/multi_phrase_query.rb +0 -216
  183. data/lib/ferret/search/multi_searcher.rb +0 -261
  184. data/lib/ferret/search/multi_term_query.rb +0 -65
  185. data/lib/ferret/search/non_matching_scorer.rb +0 -22
  186. data/lib/ferret/search/phrase_positions.rb +0 -55
  187. data/lib/ferret/search/phrase_query.rb +0 -214
  188. data/lib/ferret/search/phrase_scorer.rb +0 -152
  189. data/lib/ferret/search/prefix_query.rb +0 -54
  190. data/lib/ferret/search/query.rb +0 -140
  191. data/lib/ferret/search/query_filter.rb +0 -51
  192. data/lib/ferret/search/range_filter.rb +0 -103
  193. data/lib/ferret/search/range_query.rb +0 -139
  194. data/lib/ferret/search/req_excl_scorer.rb +0 -125
  195. data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
  196. data/lib/ferret/search/score_doc.rb +0 -38
  197. data/lib/ferret/search/score_doc_comparator.rb +0 -114
  198. data/lib/ferret/search/scorer.rb +0 -91
  199. data/lib/ferret/search/similarity.rb +0 -278
  200. data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
  201. data/lib/ferret/search/sort.rb +0 -112
  202. data/lib/ferret/search/sort_comparator.rb +0 -60
  203. data/lib/ferret/search/sort_field.rb +0 -91
  204. data/lib/ferret/search/spans.rb +0 -12
  205. data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
  206. data/lib/ferret/search/spans/span_first_query.rb +0 -79
  207. data/lib/ferret/search/spans/span_near_query.rb +0 -108
  208. data/lib/ferret/search/spans/span_not_query.rb +0 -130
  209. data/lib/ferret/search/spans/span_or_query.rb +0 -176
  210. data/lib/ferret/search/spans/span_query.rb +0 -25
  211. data/lib/ferret/search/spans/span_scorer.rb +0 -74
  212. data/lib/ferret/search/spans/span_term_query.rb +0 -105
  213. data/lib/ferret/search/spans/span_weight.rb +0 -84
  214. data/lib/ferret/search/spans/spans_enum.rb +0 -44
  215. data/lib/ferret/search/term_query.rb +0 -128
  216. data/lib/ferret/search/term_scorer.rb +0 -183
  217. data/lib/ferret/search/top_docs.rb +0 -36
  218. data/lib/ferret/search/top_field_docs.rb +0 -17
  219. data/lib/ferret/search/weight.rb +0 -54
  220. data/lib/ferret/search/wildcard_query.rb +0 -26
  221. data/lib/ferret/search/wildcard_term_enum.rb +0 -61
  222. data/lib/ferret/stemmers.rb +0 -1
  223. data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
  224. data/lib/ferret/store.rb +0 -5
  225. data/lib/ferret/store/buffered_index_io.rb +0 -190
  226. data/lib/ferret/store/directory.rb +0 -141
  227. data/lib/ferret/store/fs_store.rb +0 -381
  228. data/lib/ferret/store/index_io.rb +0 -245
  229. data/lib/ferret/store/ram_store.rb +0 -286
  230. data/lib/ferret/utils.rb +0 -8
  231. data/lib/ferret/utils/bit_vector.rb +0 -123
  232. data/lib/ferret/utils/date_tools.rb +0 -138
  233. data/lib/ferret/utils/number_tools.rb +0 -91
  234. data/lib/ferret/utils/parameter.rb +0 -41
  235. data/lib/ferret/utils/priority_queue.rb +0 -120
  236. data/lib/ferret/utils/string_helper.rb +0 -47
  237. data/lib/ferret/utils/thread_local.rb +0 -28
  238. data/lib/ferret/utils/weak_key_hash.rb +0 -60
  239. data/lib/rferret.rb +0 -37
  240. data/rake_utils/code_statistics.rb +0 -106
  241. data/test/benchmark/tb_ram_store.rb +0 -76
  242. data/test/benchmark/tb_rw_vint.rb +0 -26
  243. data/test/functional/thread_safety_index_test.rb +0 -81
  244. data/test/functional/thread_safety_test.rb +0 -137
  245. data/test/longrunning/tc_numbertools.rb +0 -60
  246. data/test/longrunning/tm_store.rb +0 -19
  247. data/test/unit/analysis/ctc_analyzer.rb +0 -532
  248. data/test/unit/analysis/data/wordfile +0 -6
  249. data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
  250. data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
  251. data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
  252. data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
  253. data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
  254. data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
  255. data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
  256. data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
  257. data/test/unit/analysis/rtc_stop_filter.rb +0 -14
  258. data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
  259. data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
  260. data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
  261. data/test/unit/analysis/tc_token.rb +0 -25
  262. data/test/unit/document/rtc_field.rb +0 -28
  263. data/test/unit/document/tc_document.rb +0 -47
  264. data/test/unit/document/tc_field.rb +0 -98
  265. data/test/unit/index/rtc_compound_file_io.rb +0 -107
  266. data/test/unit/index/rtc_field_infos.rb +0 -127
  267. data/test/unit/index/rtc_fields_io.rb +0 -167
  268. data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
  269. data/test/unit/index/rtc_segment_infos.rb +0 -74
  270. data/test/unit/index/rtc_segment_term_docs.rb +0 -17
  271. data/test/unit/index/rtc_segment_term_enum.rb +0 -60
  272. data/test/unit/index/rtc_segment_term_vector.rb +0 -71
  273. data/test/unit/index/rtc_term_buffer.rb +0 -57
  274. data/test/unit/index/rtc_term_info.rb +0 -19
  275. data/test/unit/index/rtc_term_infos_io.rb +0 -192
  276. data/test/unit/index/rtc_term_vectors_io.rb +0 -108
  277. data/test/unit/index/tc_term.rb +0 -27
  278. data/test/unit/index/tc_term_voi.rb +0 -18
  279. data/test/unit/search/rtc_similarity.rb +0 -37
  280. data/test/unit/search/rtc_sort_field.rb +0 -14
  281. data/test/unit/search/tc_multi_searcher2.rb +0 -126
  282. data/test/unit/store/rtc_fs_store.rb +0 -62
  283. data/test/unit/store/rtc_ram_store.rb +0 -15
  284. data/test/unit/store/rtm_store.rb +0 -150
  285. data/test/unit/store/rtm_store_lock.rb +0 -2
  286. data/test/unit/ts_document.rb +0 -2
  287. data/test/unit/utils/rtc_bit_vector.rb +0 -73
  288. data/test/unit/utils/rtc_date_tools.rb +0 -50
  289. data/test/unit/utils/rtc_number_tools.rb +0 -59
  290. data/test/unit/utils/rtc_parameter.rb +0 -40
  291. data/test/unit/utils/rtc_priority_queue.rb +0 -62
  292. data/test/unit/utils/rtc_string_helper.rb +0 -21
  293. data/test/unit/utils/rtc_thread.rb +0 -61
  294. data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
  295. data/test/utils/number_to_spoken.rb +0 -132
@@ -6,44 +6,44 @@ class SortTest < Test::Unit::TestCase
6
6
  def test_basic()
7
7
  s = Sort::RELEVANCE
8
8
  assert_equal(2, s.fields.size)
9
- assert_equal(SortField::FIELD_SCORE, s.fields[0])
10
- assert_equal(SortField::FIELD_DOC, s.fields[1])
9
+ assert_equal(SortField::SCORE, s.fields[0])
10
+ assert_equal(SortField::DOC_ID, s.fields[1])
11
11
 
12
12
  s = Sort::INDEX_ORDER
13
13
  assert_equal(1, s.fields.size)
14
- assert_equal(SortField::FIELD_DOC, s.fields[0])
14
+ assert_equal(SortField::DOC_ID, s.fields[0])
15
15
  end
16
16
 
17
17
  def test_string_init()
18
- s = Sort.new("field")
18
+ s = Sort.new(:field)
19
19
  assert_equal(2, s.fields.size)
20
- assert_equal(SortField::SortType::AUTO, s.fields[0].sort_type)
21
- assert_equal("field", s.fields[0].name)
22
- assert_equal(SortField::FIELD_DOC, s.fields[1])
20
+ assert_equal(:auto, s.fields[0].type)
21
+ assert_equal(:field, s.fields[0].name)
22
+ assert_equal(SortField::DOC_ID, s.fields[1])
23
23
 
24
- s = Sort.new(["field1", "field2", "field3"])
24
+ s = Sort.new([:field1, :field2, :field3])
25
25
  assert_equal(4, s.fields.size)
26
- assert_equal(SortField::SortType::AUTO, s.fields[0].sort_type)
27
- assert_equal("field1", s.fields[0].name)
28
- assert_equal(SortField::SortType::AUTO, s.fields[1].sort_type)
29
- assert_equal("field2", s.fields[1].name)
30
- assert_equal(SortField::SortType::AUTO, s.fields[2].sort_type)
31
- assert_equal("field3", s.fields[2].name)
32
- assert_equal(SortField::FIELD_DOC, s.fields[3])
26
+ assert_equal(:auto, s.fields[0].type)
27
+ assert_equal(:field1, s.fields[0].name)
28
+ assert_equal(:auto, s.fields[1].type)
29
+ assert_equal(:field2, s.fields[1].name)
30
+ assert_equal(:auto, s.fields[2].type)
31
+ assert_equal(:field3, s.fields[2].name)
32
+ assert_equal(SortField::DOC_ID, s.fields[3])
33
33
  end
34
34
 
35
35
  def test_multi_fields()
36
- sf1 = SortField.new("field", {:sort_type => SortField::SortType::INTEGER,
37
- :reverse => true})
38
- sf2 = SortField::FIELD_SCORE
39
- sf3 = SortField::FIELD_DOC
36
+ sf1 = SortField.new(:field, {:type => :integer,
37
+ :reverse => true})
38
+ sf2 = SortField::SCORE
39
+ sf3 = SortField::DOC_ID
40
40
  s = Sort.new([sf1, sf2, sf3])
41
41
 
42
42
  assert_equal(3, s.fields.size)
43
- assert_equal(SortField::SortType::INTEGER, s.fields[0].sort_type)
44
- assert_equal("field", s.fields[0].name)
43
+ assert_equal(:integer, s.fields[0].type)
44
+ assert_equal(:field, s.fields[0].name)
45
45
  assert(s.fields[0].reverse?)
46
- assert_equal(SortField::FIELD_SCORE, s.fields[1])
47
- assert_equal(SortField::FIELD_DOC, s.fields[2])
46
+ assert_equal(SortField::SCORE, s.fields[1])
47
+ assert_equal(SortField::DOC_ID, s.fields[2])
48
48
  end
49
49
  end
@@ -4,24 +4,24 @@ class SortFieldTest < Test::Unit::TestCase
4
4
  include Ferret::Search
5
5
 
6
6
  def test_field_score()
7
- fs = SortField::FIELD_SCORE
8
- assert_equal(SortField::SortType::SCORE, fs.sort_type)
7
+ fs = SortField::SCORE
8
+ assert_equal(:score, fs.type)
9
9
  assert_nil(fs.name)
10
- assert(!fs.reverse?, "FIELD_SCORE should not be reverse")
10
+ assert(!fs.reverse?, "SCORE_ID should not be reverse")
11
11
  assert_nil(fs.comparator)
12
12
  end
13
13
 
14
14
  def test_field_doc()
15
- fs = SortField::FIELD_DOC
16
- assert_equal(SortField::SortType::DOC, fs.sort_type)
15
+ fs = SortField::DOC_ID
16
+ assert_equal(:doc_id, fs.type)
17
17
  assert_nil(fs.name)
18
- assert(!fs.reverse?, "FIELD_DOC should not be reverse")
18
+ assert(!fs.reverse?, "DOC_ID should be reverse")
19
19
  assert_nil(fs.comparator)
20
20
  end
21
21
 
22
22
  def test_error_raised()
23
23
  assert_raise(ArgumentError) {
24
- fs = SortField.new(nil, {:sort_type => SortField::SortType::INTEGER})
24
+ fs = SortField.new(nil, :type => :integer)
25
25
  }
26
26
  end
27
27
  end
@@ -2,7 +2,6 @@ require File.dirname(__FILE__) + "/../../test_helper"
2
2
 
3
3
 
4
4
  class SpansBasicTest < Test::Unit::TestCase
5
- include Ferret::Document
6
5
  include Ferret::Search
7
6
  include Ferret::Store
8
7
  include Ferret::Index
@@ -10,7 +9,11 @@ class SpansBasicTest < Test::Unit::TestCase
10
9
  include Ferret::Analysis
11
10
 
12
11
  def setup()
13
- data = [
12
+ @dir = RAMDirectory.new
13
+ iw = IndexWriter.new(:dir => @dir,
14
+ :analyzer => WhiteSpaceAnalyzer.new(),
15
+ :create => true)
16
+ [
14
17
  "start finish one two three four five six seven",
15
18
  "start one finish two three four five six seven",
16
19
  "start one two finish three four five six seven",
@@ -42,22 +45,15 @@ class SpansBasicTest < Test::Unit::TestCase
42
45
  "finish one two start three four five six seven",
43
46
  "finish one start two three four five six seven",
44
47
  "finish start one two three four five six seven"
45
- ]
46
- @dir = RAMDirectory.new
47
- iw = IndexWriter.new(@dir, :analyzer => WhiteSpaceAnalyzer.new(), :create => true)
48
- data.each do |line|
49
- doc = Document.new()
50
- doc << Field.new("field", line, Field::Store::NO, Field::Index::TOKENIZED)
51
- iw << doc
52
- end
48
+ ].each { |line| iw << {:field => line} }
53
49
 
54
50
  iw.close()
55
51
 
56
- @is = IndexSearcher.new(@dir)
52
+ @searcher = Searcher.new(@dir)
57
53
  end
58
54
 
59
55
  def tear_down()
60
- @iw.close
56
+ @searcher.close
61
57
  @dir.close
62
58
  end
63
59
 
@@ -72,65 +68,72 @@ class SpansBasicTest < Test::Unit::TestCase
72
68
  end
73
69
 
74
70
  def check_hits(query, expected, test_explain = false, top=nil)
75
- top_docs = @is.search(query, {:num_docs => expected.length})
76
- assert_equal(expected.length, top_docs.score_docs.size)
77
- assert_equal(top, top_docs.score_docs[0].doc) if top
71
+ top_docs = @searcher.search(query, {:limit => expected.length})
72
+ assert_equal(expected.length, top_docs.hits.size)
73
+ assert_equal(top, top_docs.hits[0].doc) if top
78
74
  assert_equal(expected.length, top_docs.total_hits)
79
- top_docs.score_docs.each do |score_doc|
80
- assert(expected.include?(score_doc.doc),
81
- "#{score_doc.doc} was found unexpectedly")
75
+ top_docs.hits.each do |hit|
76
+ assert(expected.include?(hit.doc),
77
+ "#{hit.doc} was found unexpectedly")
82
78
  if test_explain
83
- assert(score_doc.score =~ @is.explain(query, score_doc.doc).value,
84
- "Scores(#{score_doc.score} != #{@is.explain(query, score_doc.doc).value})")
79
+ assert(hit.score =~ @searcher.explain(query, hit.doc).score,
80
+ "Scores(#{hit.score} != " +
81
+ "#{@searcher.explain(query, hit.doc).score})")
85
82
  end
86
83
  end
87
84
  end
88
85
 
89
86
  def test_span_term_query()
90
- tq = SpanTermQuery.new(Term.new("field", "nine"))
87
+ tq = SpanTermQuery.new(:field, "nine")
91
88
  check_hits(tq, [7,23], true)
92
- tq = SpanTermQuery.new(Term.new("field", "eight"))
89
+ tq = SpanTermQuery.new(:field, "eight")
93
90
  check_hits(tq, [6,7,8,22,23,24])
94
91
  end
95
92
 
96
93
  def test_span_near_query()
97
- tq1 = SpanTermQuery.new(Term.new("field", "start"))
98
- tq2 = SpanTermQuery.new(Term.new("field", "finish"))
99
- q = SpanNearQuery.new([tq1, tq2], 0, true)
94
+ tq1 = SpanTermQuery.new(:field, "start")
95
+ tq2 = SpanTermQuery.new(:field, "finish")
96
+ q = SpanNearQuery.new(:clauses => [tq1, tq2], :in_order => true)
100
97
  check_hits(q, [0,14], true)
101
- q = SpanNearQuery.new([tq1, tq2], 0, false)
98
+ q = SpanNearQuery.new()
99
+ q << tq1 << tq2
102
100
  check_hits(q, [0,14,16,30], true)
103
- q = SpanNearQuery.new([tq1, tq2], 1, true)
101
+ q = SpanNearQuery.new(:clauses => [tq1, tq2],
102
+ :slop => 1, :in_order => true)
104
103
  check_hits(q, [0,1,13,14])
105
- q = SpanNearQuery.new([tq1, tq2], 1, false)
104
+ q = SpanNearQuery.new(:clauses => [tq1, tq2], :slop => 1)
106
105
  check_hits(q, [0,1,13,14,16,17,29,30])
107
- q = SpanNearQuery.new([tq1, tq2], 4, true)
106
+ q = SpanNearQuery.new(:clauses => [tq1, tq2],
107
+ :slop => 4, :in_order => true)
108
108
  check_hits(q, [0,1,2,3,4,10,11,12,13,14])
109
- q = SpanNearQuery.new([tq1, tq2], 4, false)
109
+ q = SpanNearQuery.new(:clauses => [tq1, tq2], :slop => 4)
110
110
  check_hits(q, [0,1,2,3,4,10,11,12,13,14,16,17,18,19,20,26,27,28,29,30])
111
111
  end
112
112
 
113
113
  def test_span_not_query()
114
- tq1 = SpanTermQuery.new(Term.new("field", "start"))
115
- tq2 = SpanTermQuery.new(Term.new("field", "finish"))
116
- tq3 = SpanTermQuery.new(Term.new("field", "two"))
117
- tq4 = SpanTermQuery.new(Term.new("field", "five"))
118
- nearq1 = SpanNearQuery.new([tq1, tq2], 4, true)
119
- nearq2 = SpanNearQuery.new([tq3, tq4], 4, true)
114
+ tq1 = SpanTermQuery.new(:field, "start")
115
+ tq2 = SpanTermQuery.new(:field, "finish")
116
+ tq3 = SpanTermQuery.new(:field, "two")
117
+ tq4 = SpanTermQuery.new(:field, "five")
118
+ nearq1 = SpanNearQuery.new(:clauses => [tq1, tq2],
119
+ :slop => 4, :in_order => true)
120
+ nearq2 = SpanNearQuery.new(:clauses => [tq3, tq4],
121
+ :slop => 4, :in_order => true)
120
122
  q = SpanNotQuery.new(nearq1, nearq2)
121
123
  check_hits(q, [0,1,13,14], true)
122
- nearq1 = SpanNearQuery.new([tq1, tq2], 4, false)
124
+ nearq1 = SpanNearQuery.new(:clauses => [tq1, tq2], :slop => 4)
123
125
  q = SpanNotQuery.new(nearq1, nearq2)
124
126
  check_hits(q, [0,1,13,14,16,17,29,30])
125
- nearq1 = SpanNearQuery.new([tq1, tq3], 4, true)
126
- nearq2 = SpanNearQuery.new([tq2, tq4], 8, false)
127
+ nearq1 = SpanNearQuery.new(:clauses => [tq1, tq3],
128
+ :slop => 4, :in_order => true)
129
+ nearq2 = SpanNearQuery.new(:clauses => [tq2, tq4], :slop => 8)
127
130
  q = SpanNotQuery.new(nearq1, nearq2)
128
131
  check_hits(q, [2,3,4,5,6,7,8,9,10,11,12,15])
129
132
  end
130
133
 
131
134
  def test_span_first_query()
132
135
  finish_first = [16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
133
- tq = SpanTermQuery.new(Term.new("field", "finish"))
136
+ tq = SpanTermQuery.new(:field, "finish")
134
137
  q = SpanFirstQuery.new(tq, 1)
135
138
  check_hits(q, finish_first, true)
136
139
  q = SpanFirstQuery.new(tq, 5)
@@ -138,15 +141,16 @@ class SpansBasicTest < Test::Unit::TestCase
138
141
  end
139
142
 
140
143
  def test_span_or_query_query()
141
- tq1 = SpanTermQuery.new(Term.new("field", "start"))
142
- tq2 = SpanTermQuery.new(Term.new("field", "finish"))
143
- tq3 = SpanTermQuery.new(Term.new("field", "five"))
144
- nearq1 = SpanNearQuery.new([tq1, tq2], 1, true)
145
- nearq2 = SpanNearQuery.new([tq2, tq3], 0, false)
144
+ tq1 = SpanTermQuery.new(:field, "start")
145
+ tq2 = SpanTermQuery.new(:field, "finish")
146
+ tq3 = SpanTermQuery.new(:field, "five")
147
+ nearq1 = SpanNearQuery.new(:clauses => [tq1, tq2], :slop => 1,
148
+ :in_order => true)
149
+ nearq2 = SpanNearQuery.new(:clauses => [tq2, tq3], :slop => 0)
146
150
  q = SpanOrQuery.new([nearq1, nearq2])
147
151
  check_hits(q, [0,1,4,5,9,10,13,14], false)
148
- nearq1 = SpanNearQuery.new([tq1, tq2], 0, false)
149
- nearq2 = SpanNearQuery.new([tq2, tq3], 1, false)
152
+ nearq1 = SpanNearQuery.new(:clauses => [tq1, tq2], :slop => 0)
153
+ nearq2 = SpanNearQuery.new(:clauses => [tq2, tq3], :slop => 1)
150
154
  q = SpanOrQuery.new([nearq1, nearq2])
151
155
  check_hits(q, [0,3,4,5,6,8,9,10,11,14,16,30], false)
152
156
  end
@@ -0,0 +1,339 @@
1
+ require File.dirname(__FILE__) + "/../../test_helper"
2
+
3
+ module SearcherTests
4
+ include Ferret::Search
5
+
6
+ def test_term_query
7
+ tq = TermQuery.new(:field, "word2")
8
+ tq.boost = 100
9
+ check_hits(tq, [1,4,8])
10
+ #puts @searcher.explain(tq, 1)
11
+ #puts @searcher.explain(tq, 4)
12
+ #puts @searcher.explain(tq, 8)
13
+
14
+ tq = TermQuery.new(:field, "2342")
15
+ check_hits(tq, [])
16
+
17
+ tq = TermQuery.new(:field, "")
18
+ check_hits(tq, [])
19
+
20
+ tq = TermQuery.new(:field, "word1")
21
+ top_docs = @searcher.search(tq)
22
+ assert_equal(@searcher.max_doc, top_docs.total_hits)
23
+ assert_equal(10, top_docs.hits.size)
24
+ top_docs = @searcher.search(tq, {:limit => 20})
25
+ assert_equal(@searcher.max_doc, top_docs.hits.size)
26
+ end
27
+
28
+ def check_docs(query, options, expected=[])
29
+ top_docs = @searcher.search(query, options)
30
+ docs = top_docs.hits
31
+ assert_equal(expected.length, docs.length)
32
+ docs.length.times do |i|
33
+ assert_equal(expected[i], docs[i].doc)
34
+ end
35
+ end
36
+
37
+ def test_offset
38
+ tq = TermQuery.new(:field, "word1")
39
+ tq.boost = 100
40
+ top_docs = @searcher.search(tq, {:limit => 100})
41
+ expected = []
42
+ top_docs.hits.each do |sd|
43
+ expected << sd.doc
44
+ end
45
+
46
+ assert_raise(ArgumentError) { @searcher.search(tq, {:offset => -1}) }
47
+ assert_raise(ArgumentError) { @searcher.search(tq, {:limit => 0}) }
48
+ assert_raise(ArgumentError) { @searcher.search(tq, {:limit => -1}) }
49
+
50
+ check_docs(tq, {:limit => 8, :offset => 0}, expected[0,8])
51
+ check_docs(tq, {:limit => 3, :offset => 1}, expected[1,3])
52
+ check_docs(tq, {:limit => 6, :offset => 2}, expected[2,6])
53
+ check_docs(tq, {:limit => 2, :offset => expected.length}, [])
54
+ check_docs(tq, {:limit => 2, :offset => expected.length + 100}, [])
55
+ end
56
+
57
+ def test_multi_term_query
58
+ mtq = MultiTermQuery.new(:field, :max_terms => 4, :min_score => 0.5)
59
+ check_hits(mtq, [])
60
+ assert_equal("<>", mtq.to_s(:field))
61
+ assert_equal("field:<>", mtq.to_s())
62
+
63
+ [
64
+ ["brown", 1.0, "<brown>"],
65
+ ["fox", 0.1, "<brown>"],
66
+ ["fox", 0.6, "<fox^0.6|brown>"],
67
+ ["fast", 50.0, "<fox^0.6|brown|fast^50.0>"]
68
+ ].each do |term, boost, str|
69
+ mtq.add_term(term, boost)
70
+ assert_equal(str, mtq.to_s(:field))
71
+ assert_equal("field:#{str}", mtq.to_s())
72
+ end
73
+
74
+ mtq.boost = 80.1
75
+ assert_equal("field:<fox^0.6|brown|fast^50.0>^80.1", mtq.to_s())
76
+ mtq << "word1"
77
+ assert_equal("field:<fox^0.6|brown|word1|fast^50.0>^80.1", mtq.to_s())
78
+ mtq << "word2"
79
+ assert_equal("field:<brown|word1|word2|fast^50.0>^80.1", mtq.to_s())
80
+ mtq << "word3"
81
+ assert_equal("field:<brown|word1|word2|fast^50.0>^80.1", mtq.to_s())
82
+ end
83
+
84
+ def test_boolean_query
85
+ bq = BooleanQuery.new()
86
+ tq1 = TermQuery.new(:field, "word1")
87
+ tq2 = TermQuery.new(:field, "word3")
88
+ bq.add_query(tq1, :must)
89
+ bq.add_query(tq2, :must)
90
+ check_hits(bq, [2,3,6,8,11,14], 14)
91
+
92
+ tq3 = TermQuery.new(:field, "word2")
93
+ bq.add_query(tq3, :should)
94
+ check_hits(bq, [2,3,6,8,11,14], 8)
95
+
96
+ bq = BooleanQuery.new()
97
+ bq.add_query(tq2, :must)
98
+ bq.add_query(tq3, :must_not)
99
+ check_hits(bq, [2,3,6,11,14])
100
+
101
+ bq = BooleanQuery.new()
102
+ bq.add_query(tq2, :must_not)
103
+ check_hits(bq, [])
104
+
105
+ bq = BooleanQuery.new()
106
+ bq.add_query(tq2, :should)
107
+ bq.add_query(tq3, :should)
108
+ check_hits(bq, [1,2,3,4,6,8,11,14])
109
+
110
+ bq = BooleanQuery.new()
111
+ bc1 = BooleanQuery::BooleanClause.new(tq2, :should)
112
+ bc2 = BooleanQuery::BooleanClause.new(tq3, :should)
113
+ bq << bc1
114
+ bq << bc2
115
+ check_hits(bq, [1,2,3,4,6,8,11,14])
116
+ end
117
+
118
+ def test_phrase_query()
119
+ pq = PhraseQuery.new(:field)
120
+ pq << "quick" << "brown" << "fox"
121
+ check_hits(pq, [1])
122
+
123
+ pq = PhraseQuery.new(:field)
124
+ pq << "quick"
125
+ pq.add_term("fox", 2)
126
+ check_hits(pq, [1,11,14])
127
+
128
+ pq.slop = 1
129
+ check_hits(pq, [1,11,14,16])
130
+
131
+ pq.slop = 4
132
+ check_hits(pq, [1,11,14,16,17])
133
+ end
134
+
135
+ def test_range_query()
136
+ rq = RangeQuery.new(:date, :lower => "20051006", :upper => "20051010")
137
+ check_hits(rq, [6,7,8,9,10])
138
+
139
+ rq = RangeQuery.new(:date, :>= => "20051006", :<= => "20051010")
140
+ check_hits(rq, [6,7,8,9,10])
141
+
142
+ rq = RangeQuery.new(:date, :lower => "20051006", :upper => "20051010",
143
+ :include_lower => false)
144
+ check_hits(rq, [7,8,9,10])
145
+
146
+ rq = RangeQuery.new(:date, :> => "20051006", :<= => "20051010")
147
+ check_hits(rq, [7,8,9,10])
148
+
149
+ rq = RangeQuery.new(:date, :lower => "20051006", :upper => "20051010",
150
+ :include_upper => false)
151
+ check_hits(rq, [6,7,8,9])
152
+
153
+ rq = RangeQuery.new(:date, :>= => "20051006", :< => "20051010")
154
+ check_hits(rq, [6,7,8,9])
155
+
156
+ rq = RangeQuery.new(:date, :lower => "20051006", :upper => "20051010",
157
+ :include_lower => false, :include_upper => false)
158
+ check_hits(rq, [7,8,9])
159
+
160
+ rq = RangeQuery.new(:date, :> => "20051006", :< => "20051010")
161
+ check_hits(rq, [7,8,9])
162
+
163
+ rq = RangeQuery.new(:date, :upper => "20051003")
164
+ check_hits(rq, [0,1,2,3])
165
+
166
+ rq = RangeQuery.new(:date, :<= => "20051003")
167
+ check_hits(rq, [0,1,2,3])
168
+
169
+ rq = RangeQuery.new(:date, :upper => "20051003", :include_upper => false)
170
+ check_hits(rq, [0,1,2])
171
+
172
+ rq = RangeQuery.new(:date, :< => "20051003")
173
+ check_hits(rq, [0,1,2])
174
+
175
+ rq = RangeQuery.new(:date, :lower => "20051014")
176
+ check_hits(rq, [14,15,16,17])
177
+
178
+ rq = RangeQuery.new(:date, :>= => "20051014")
179
+ check_hits(rq, [14,15,16,17])
180
+
181
+ rq = RangeQuery.new(:date, :lower => "20051014", :include_lower => false)
182
+ check_hits(rq, [15,16,17])
183
+
184
+ rq = RangeQuery.new(:date, :> => "20051014")
185
+ check_hits(rq, [15,16,17])
186
+ end
187
+
188
+ def test_prefix_query()
189
+ pq = PrefixQuery.new(:category, "cat1")
190
+ check_hits(pq, [0, 1, 2, 3, 4, 13, 14, 15, 16, 17])
191
+
192
+ pq = PrefixQuery.new(:category, "cat1/sub2")
193
+ check_hits(pq, [3, 4, 13, 15])
194
+ end
195
+
196
+ def test_wildcard_query()
197
+ wq = WildcardQuery.new(:category, "cat1*")
198
+ check_hits(wq, [0, 1, 2, 3, 4, 13, 14, 15, 16, 17])
199
+
200
+ wq = WildcardQuery.new(:category, "cat1*/su??ub2")
201
+ check_hits(wq, [4, 16])
202
+ end
203
+
204
+ def test_multi_phrase_query()
205
+ mpq = PhraseQuery.new(:field)
206
+ mpq << ["quick", "fast"]
207
+ mpq << ["brown", "red", "hairy"]
208
+ mpq << "fox"
209
+ check_hits(mpq, [1, 8, 11, 14])
210
+
211
+ mpq.slop = 4
212
+ check_hits(mpq, [1, 8, 11, 14, 16, 17])
213
+ end
214
+
215
+ def test_highlighter()
216
+ dir = Ferret::Store::RAMDirectory.new
217
+ iw = Ferret::Index::IndexWriter.new(:dir => dir,
218
+ :analyzer => Ferret::Analysis::WhiteSpaceAnalyzer.new())
219
+ [
220
+ {:field => "the words we are searching for are one and two also " +
221
+ "sometimes looking for them as a phrase like this; one " +
222
+ "two lets see how it goes"}
223
+ ].each {|doc| iw << doc }
224
+ iw.close
225
+
226
+ searcher = Searcher.new(dir)
227
+
228
+ q = TermQuery.new(:field, "one");
229
+ highlights = searcher.highlight(q, 0, :field,
230
+ :excerpt_length => 10,
231
+ :num_excerpts => 1)
232
+ assert_equal(1, highlights.size)
233
+ assert_equal("...are <b>one</b>...", highlights[0])
234
+
235
+ highlights = searcher.highlight(q, 0, :field,
236
+ :excerpt_length => 10,
237
+ :num_excerpts => 2)
238
+ assert_equal(2, highlights.size)
239
+ assert_equal("...are <b>one</b>...", highlights[0])
240
+ assert_equal("...this; <b>one</b>...", highlights[1])
241
+
242
+ highlights = searcher.highlight(q, 0, :field,
243
+ :excerpt_length => 10,
244
+ :num_excerpts => 3)
245
+ assert_equal(3, highlights.size)
246
+ assert_equal("the words...", highlights[0])
247
+ assert_equal("...are <b>one</b>...", highlights[1])
248
+ assert_equal("...this; <b>one</b>...", highlights[2])
249
+
250
+ highlights = searcher.highlight(q, 0, :field,
251
+ :excerpt_length => 10,
252
+ :num_excerpts => 4)
253
+ assert_equal(3, highlights.size)
254
+ assert_equal("the words we are...", highlights[0])
255
+ assert_equal("...are <b>one</b>...", highlights[1])
256
+ assert_equal("...this; <b>one</b>...", highlights[2])
257
+
258
+ highlights = searcher.highlight(q, 0, :field,
259
+ :excerpt_length => 10,
260
+ :num_excerpts => 5)
261
+ assert_equal(2, highlights.size)
262
+ assert_equal("the words we are searching for are <b>one</b>...", highlights[0])
263
+ assert_equal("...this; <b>one</b>...", highlights[1])
264
+
265
+ highlights = searcher.highlight(q, 0, :field,
266
+ :excerpt_length => 10,
267
+ :num_excerpts => 20)
268
+ assert_equal(1, highlights.size)
269
+ assert_equal("the words we are searching for are <b>one</b> and two also " +
270
+ "sometimes looking for them as a phrase like this; <b>one</b> " +
271
+ "two lets see how it goes", highlights[0])
272
+
273
+ highlights = searcher.highlight(q, 0, :field,
274
+ :excerpt_length => 1000,
275
+ :num_excerpts => 1)
276
+ assert_equal(1, highlights.size)
277
+ assert_equal("the words we are searching for are <b>one</b> and two also " +
278
+ "sometimes looking for them as a phrase like this; <b>one</b> " +
279
+ "two lets see how it goes", highlights[0])
280
+
281
+ q = BooleanQuery.new(false)
282
+ q << TermQuery.new(:field, "one")
283
+ q << TermQuery.new(:field, "two")
284
+
285
+ highlights = searcher.highlight(q, 0, :field,
286
+ :excerpt_length => 15,
287
+ :num_excerpts => 2)
288
+ assert_equal(2, highlights.size)
289
+ assert_equal("...<b>one</b> and <b>two</b>...", highlights[0])
290
+ assert_equal("...this; <b>one</b> <b>two</b>...", highlights[1])
291
+
292
+ q << (PhraseQuery.new(:field) << "one" << "two")
293
+
294
+ highlights = searcher.highlight(q, 0, :field,
295
+ :excerpt_length => 15,
296
+ :num_excerpts => 2)
297
+ assert_equal(2, highlights.size)
298
+ assert_equal("...<b>one</b> and <b>two</b>...", highlights[0])
299
+ assert_equal("...this; <b>one two</b>...", highlights[1])
300
+
301
+ highlights = searcher.highlight(q, 0, :field,
302
+ :excerpt_length => 15,
303
+ :num_excerpts => 1)
304
+ assert_equal(1, highlights.size)
305
+ # should have a higher priority since it the merger of three matches
306
+ assert_equal("...this; <b>one two</b>...", highlights[0])
307
+
308
+ highlights = searcher.highlight(q, 0, :not_a_field,
309
+ :excerpt_length => 15,
310
+ :num_excerpts => 1)
311
+ assert_nil(highlights)
312
+
313
+ q = TermQuery.new(:wrong_field, "one")
314
+ highlights = searcher.highlight(q, 0, :wrong_field,
315
+ :excerpt_length => 15,
316
+ :num_excerpts => 1)
317
+ assert_nil(highlights)
318
+
319
+ q = BooleanQuery.new(false)
320
+ q << (PhraseQuery.new(:field) << "the" << "words")
321
+ q << (PhraseQuery.new(:field) << "for" << "are" << "one" << "and" << "two")
322
+ q << TermQuery.new(:field, "words")
323
+ q << TermQuery.new(:field, "one")
324
+ q << TermQuery.new(:field, "two")
325
+
326
+ highlights = searcher.highlight(q, 0, :field,
327
+ :excerpt_length => 10,
328
+ :num_excerpts => 1)
329
+ assert_equal(1, highlights.size)
330
+ assert_equal("...<b>for are one and two</b>...", highlights[0])
331
+
332
+ highlights = searcher.highlight(q, 0, :field,
333
+ :excerpt_length => 10,
334
+ :num_excerpts => 2)
335
+ assert_equal(2, highlights.size)
336
+ assert_equal("<b>the words</b>...", highlights[0])
337
+ assert_equal("...<b>for are one and two</b>...", highlights[1])
338
+ end
339
+ end