ferret 0.9.6 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (295) hide show
  1. data/MIT-LICENSE +1 -1
  2. data/README +12 -24
  3. data/Rakefile +38 -54
  4. data/TODO +14 -17
  5. data/ext/analysis.c +982 -823
  6. data/ext/analysis.h +133 -76
  7. data/ext/array.c +96 -58
  8. data/ext/array.h +40 -13
  9. data/ext/bitvector.c +476 -118
  10. data/ext/bitvector.h +264 -22
  11. data/ext/compound_io.c +217 -229
  12. data/ext/defines.h +49 -0
  13. data/ext/document.c +107 -317
  14. data/ext/document.h +31 -65
  15. data/ext/except.c +81 -36
  16. data/ext/except.h +117 -55
  17. data/ext/extconf.rb +2 -9
  18. data/ext/ferret.c +211 -104
  19. data/ext/ferret.h +22 -11
  20. data/ext/filter.c +97 -82
  21. data/ext/fs_store.c +348 -367
  22. data/ext/global.c +226 -188
  23. data/ext/global.h +44 -26
  24. data/ext/hash.c +474 -391
  25. data/ext/hash.h +441 -68
  26. data/ext/hashset.c +124 -96
  27. data/ext/hashset.h +169 -20
  28. data/ext/helper.c +56 -5
  29. data/ext/helper.h +7 -0
  30. data/ext/inc/lang.h +29 -49
  31. data/ext/inc/threading.h +31 -0
  32. data/ext/ind.c +288 -278
  33. data/ext/ind.h +68 -0
  34. data/ext/index.c +5688 -0
  35. data/ext/index.h +663 -616
  36. data/ext/lang.h +29 -49
  37. data/ext/libstemmer.c +3 -3
  38. data/ext/mem_pool.c +84 -0
  39. data/ext/mem_pool.h +35 -0
  40. data/ext/posh.c +1006 -0
  41. data/ext/posh.h +1007 -0
  42. data/ext/priorityqueue.c +117 -194
  43. data/ext/priorityqueue.h +135 -39
  44. data/ext/q_boolean.c +1305 -1108
  45. data/ext/q_const_score.c +106 -93
  46. data/ext/q_filtered_query.c +138 -135
  47. data/ext/q_fuzzy.c +206 -242
  48. data/ext/q_match_all.c +94 -80
  49. data/ext/q_multi_term.c +663 -0
  50. data/ext/q_parser.c +667 -593
  51. data/ext/q_phrase.c +992 -555
  52. data/ext/q_prefix.c +72 -61
  53. data/ext/q_range.c +235 -210
  54. data/ext/q_span.c +1480 -1166
  55. data/ext/q_term.c +273 -246
  56. data/ext/q_wildcard.c +127 -114
  57. data/ext/r_analysis.c +1720 -711
  58. data/ext/r_index.c +3049 -0
  59. data/ext/r_qparser.c +433 -146
  60. data/ext/r_search.c +2934 -1993
  61. data/ext/r_store.c +372 -143
  62. data/ext/r_utils.c +941 -0
  63. data/ext/ram_store.c +330 -326
  64. data/ext/search.c +1291 -668
  65. data/ext/search.h +403 -702
  66. data/ext/similarity.c +91 -113
  67. data/ext/similarity.h +45 -30
  68. data/ext/sort.c +721 -484
  69. data/ext/stopwords.c +361 -273
  70. data/ext/store.c +556 -58
  71. data/ext/store.h +706 -126
  72. data/ext/tags +3578 -2780
  73. data/ext/term_vectors.c +352 -0
  74. data/ext/threading.h +31 -0
  75. data/ext/win32.h +54 -0
  76. data/lib/ferret.rb +5 -17
  77. data/lib/ferret/document.rb +130 -2
  78. data/lib/ferret/index.rb +577 -26
  79. data/lib/ferret/number_tools.rb +157 -0
  80. data/lib/ferret_version.rb +3 -0
  81. data/test/test_helper.rb +5 -13
  82. data/test/unit/analysis/tc_analyzer.rb +513 -1
  83. data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
  84. data/test/unit/index/tc_index.rb +183 -240
  85. data/test/unit/index/tc_index_reader.rb +312 -479
  86. data/test/unit/index/tc_index_writer.rb +397 -13
  87. data/test/unit/index/th_doc.rb +269 -206
  88. data/test/unit/query_parser/tc_query_parser.rb +40 -33
  89. data/test/unit/search/tc_filter.rb +59 -71
  90. data/test/unit/search/tc_fuzzy_query.rb +24 -16
  91. data/test/unit/search/tc_index_searcher.rb +23 -201
  92. data/test/unit/search/tc_multi_searcher.rb +78 -226
  93. data/test/unit/search/tc_search_and_sort.rb +93 -81
  94. data/test/unit/search/tc_sort.rb +23 -23
  95. data/test/unit/search/tc_sort_field.rb +7 -7
  96. data/test/unit/search/tc_spans.rb +51 -47
  97. data/test/unit/search/tm_searcher.rb +339 -0
  98. data/test/unit/store/tc_fs_store.rb +1 -1
  99. data/test/unit/store/tm_store_lock.rb +3 -3
  100. data/test/unit/tc_document.rb +81 -0
  101. data/test/unit/ts_analysis.rb +1 -1
  102. data/test/unit/ts_utils.rb +1 -1
  103. data/test/unit/utils/tc_bit_vector.rb +288 -0
  104. data/test/unit/utils/tc_number_tools.rb +117 -0
  105. data/test/unit/utils/tc_priority_queue.rb +106 -0
  106. metadata +140 -301
  107. data/CHANGELOG +0 -9
  108. data/ext/dummy.exe +0 -0
  109. data/ext/field.c +0 -408
  110. data/ext/frtio.h +0 -13
  111. data/ext/inc/except.h +0 -90
  112. data/ext/index_io.c +0 -382
  113. data/ext/index_rw.c +0 -2658
  114. data/ext/lang.c +0 -41
  115. data/ext/nix_io.c +0 -134
  116. data/ext/q_multi_phrase.c +0 -380
  117. data/ext/r_doc.c +0 -582
  118. data/ext/r_index_io.c +0 -1021
  119. data/ext/r_term.c +0 -219
  120. data/ext/term.c +0 -820
  121. data/ext/termdocs.c +0 -611
  122. data/ext/vector.c +0 -637
  123. data/ext/w32_io.c +0 -150
  124. data/lib/ferret/analysis.rb +0 -11
  125. data/lib/ferret/analysis/analyzers.rb +0 -112
  126. data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
  127. data/lib/ferret/analysis/token.rb +0 -100
  128. data/lib/ferret/analysis/token_filters.rb +0 -86
  129. data/lib/ferret/analysis/token_stream.rb +0 -26
  130. data/lib/ferret/analysis/tokenizers.rb +0 -112
  131. data/lib/ferret/analysis/word_list_loader.rb +0 -27
  132. data/lib/ferret/document/document.rb +0 -152
  133. data/lib/ferret/document/field.rb +0 -312
  134. data/lib/ferret/index/compound_file_io.rb +0 -338
  135. data/lib/ferret/index/document_writer.rb +0 -289
  136. data/lib/ferret/index/field_infos.rb +0 -279
  137. data/lib/ferret/index/fields_io.rb +0 -181
  138. data/lib/ferret/index/index.rb +0 -675
  139. data/lib/ferret/index/index_file_names.rb +0 -33
  140. data/lib/ferret/index/index_reader.rb +0 -503
  141. data/lib/ferret/index/index_writer.rb +0 -534
  142. data/lib/ferret/index/multi_reader.rb +0 -377
  143. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
  144. data/lib/ferret/index/segment_infos.rb +0 -130
  145. data/lib/ferret/index/segment_merge_info.rb +0 -49
  146. data/lib/ferret/index/segment_merge_queue.rb +0 -16
  147. data/lib/ferret/index/segment_merger.rb +0 -358
  148. data/lib/ferret/index/segment_reader.rb +0 -412
  149. data/lib/ferret/index/segment_term_enum.rb +0 -169
  150. data/lib/ferret/index/segment_term_vector.rb +0 -58
  151. data/lib/ferret/index/term.rb +0 -53
  152. data/lib/ferret/index/term_buffer.rb +0 -83
  153. data/lib/ferret/index/term_doc_enum.rb +0 -291
  154. data/lib/ferret/index/term_enum.rb +0 -52
  155. data/lib/ferret/index/term_info.rb +0 -37
  156. data/lib/ferret/index/term_infos_io.rb +0 -321
  157. data/lib/ferret/index/term_vector_offset_info.rb +0 -20
  158. data/lib/ferret/index/term_vectors_io.rb +0 -553
  159. data/lib/ferret/query_parser.rb +0 -312
  160. data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
  161. data/lib/ferret/search.rb +0 -50
  162. data/lib/ferret/search/boolean_clause.rb +0 -100
  163. data/lib/ferret/search/boolean_query.rb +0 -299
  164. data/lib/ferret/search/boolean_scorer.rb +0 -294
  165. data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
  166. data/lib/ferret/search/conjunction_scorer.rb +0 -99
  167. data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
  168. data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
  169. data/lib/ferret/search/explanation.rb +0 -41
  170. data/lib/ferret/search/field_cache.rb +0 -215
  171. data/lib/ferret/search/field_doc.rb +0 -31
  172. data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
  173. data/lib/ferret/search/filter.rb +0 -11
  174. data/lib/ferret/search/filtered_query.rb +0 -130
  175. data/lib/ferret/search/filtered_term_enum.rb +0 -79
  176. data/lib/ferret/search/fuzzy_query.rb +0 -154
  177. data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
  178. data/lib/ferret/search/hit_collector.rb +0 -34
  179. data/lib/ferret/search/hit_queue.rb +0 -11
  180. data/lib/ferret/search/index_searcher.rb +0 -200
  181. data/lib/ferret/search/match_all_query.rb +0 -104
  182. data/lib/ferret/search/multi_phrase_query.rb +0 -216
  183. data/lib/ferret/search/multi_searcher.rb +0 -261
  184. data/lib/ferret/search/multi_term_query.rb +0 -65
  185. data/lib/ferret/search/non_matching_scorer.rb +0 -22
  186. data/lib/ferret/search/phrase_positions.rb +0 -55
  187. data/lib/ferret/search/phrase_query.rb +0 -214
  188. data/lib/ferret/search/phrase_scorer.rb +0 -152
  189. data/lib/ferret/search/prefix_query.rb +0 -54
  190. data/lib/ferret/search/query.rb +0 -140
  191. data/lib/ferret/search/query_filter.rb +0 -51
  192. data/lib/ferret/search/range_filter.rb +0 -103
  193. data/lib/ferret/search/range_query.rb +0 -139
  194. data/lib/ferret/search/req_excl_scorer.rb +0 -125
  195. data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
  196. data/lib/ferret/search/score_doc.rb +0 -38
  197. data/lib/ferret/search/score_doc_comparator.rb +0 -114
  198. data/lib/ferret/search/scorer.rb +0 -91
  199. data/lib/ferret/search/similarity.rb +0 -278
  200. data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
  201. data/lib/ferret/search/sort.rb +0 -112
  202. data/lib/ferret/search/sort_comparator.rb +0 -60
  203. data/lib/ferret/search/sort_field.rb +0 -91
  204. data/lib/ferret/search/spans.rb +0 -12
  205. data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
  206. data/lib/ferret/search/spans/span_first_query.rb +0 -79
  207. data/lib/ferret/search/spans/span_near_query.rb +0 -108
  208. data/lib/ferret/search/spans/span_not_query.rb +0 -130
  209. data/lib/ferret/search/spans/span_or_query.rb +0 -176
  210. data/lib/ferret/search/spans/span_query.rb +0 -25
  211. data/lib/ferret/search/spans/span_scorer.rb +0 -74
  212. data/lib/ferret/search/spans/span_term_query.rb +0 -105
  213. data/lib/ferret/search/spans/span_weight.rb +0 -84
  214. data/lib/ferret/search/spans/spans_enum.rb +0 -44
  215. data/lib/ferret/search/term_query.rb +0 -128
  216. data/lib/ferret/search/term_scorer.rb +0 -183
  217. data/lib/ferret/search/top_docs.rb +0 -36
  218. data/lib/ferret/search/top_field_docs.rb +0 -17
  219. data/lib/ferret/search/weight.rb +0 -54
  220. data/lib/ferret/search/wildcard_query.rb +0 -26
  221. data/lib/ferret/search/wildcard_term_enum.rb +0 -61
  222. data/lib/ferret/stemmers.rb +0 -1
  223. data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
  224. data/lib/ferret/store.rb +0 -5
  225. data/lib/ferret/store/buffered_index_io.rb +0 -190
  226. data/lib/ferret/store/directory.rb +0 -141
  227. data/lib/ferret/store/fs_store.rb +0 -381
  228. data/lib/ferret/store/index_io.rb +0 -245
  229. data/lib/ferret/store/ram_store.rb +0 -286
  230. data/lib/ferret/utils.rb +0 -8
  231. data/lib/ferret/utils/bit_vector.rb +0 -123
  232. data/lib/ferret/utils/date_tools.rb +0 -138
  233. data/lib/ferret/utils/number_tools.rb +0 -91
  234. data/lib/ferret/utils/parameter.rb +0 -41
  235. data/lib/ferret/utils/priority_queue.rb +0 -120
  236. data/lib/ferret/utils/string_helper.rb +0 -47
  237. data/lib/ferret/utils/thread_local.rb +0 -28
  238. data/lib/ferret/utils/weak_key_hash.rb +0 -60
  239. data/lib/rferret.rb +0 -37
  240. data/rake_utils/code_statistics.rb +0 -106
  241. data/test/benchmark/tb_ram_store.rb +0 -76
  242. data/test/benchmark/tb_rw_vint.rb +0 -26
  243. data/test/functional/thread_safety_index_test.rb +0 -81
  244. data/test/functional/thread_safety_test.rb +0 -137
  245. data/test/longrunning/tc_numbertools.rb +0 -60
  246. data/test/longrunning/tm_store.rb +0 -19
  247. data/test/unit/analysis/ctc_analyzer.rb +0 -532
  248. data/test/unit/analysis/data/wordfile +0 -6
  249. data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
  250. data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
  251. data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
  252. data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
  253. data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
  254. data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
  255. data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
  256. data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
  257. data/test/unit/analysis/rtc_stop_filter.rb +0 -14
  258. data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
  259. data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
  260. data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
  261. data/test/unit/analysis/tc_token.rb +0 -25
  262. data/test/unit/document/rtc_field.rb +0 -28
  263. data/test/unit/document/tc_document.rb +0 -47
  264. data/test/unit/document/tc_field.rb +0 -98
  265. data/test/unit/index/rtc_compound_file_io.rb +0 -107
  266. data/test/unit/index/rtc_field_infos.rb +0 -127
  267. data/test/unit/index/rtc_fields_io.rb +0 -167
  268. data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
  269. data/test/unit/index/rtc_segment_infos.rb +0 -74
  270. data/test/unit/index/rtc_segment_term_docs.rb +0 -17
  271. data/test/unit/index/rtc_segment_term_enum.rb +0 -60
  272. data/test/unit/index/rtc_segment_term_vector.rb +0 -71
  273. data/test/unit/index/rtc_term_buffer.rb +0 -57
  274. data/test/unit/index/rtc_term_info.rb +0 -19
  275. data/test/unit/index/rtc_term_infos_io.rb +0 -192
  276. data/test/unit/index/rtc_term_vectors_io.rb +0 -108
  277. data/test/unit/index/tc_term.rb +0 -27
  278. data/test/unit/index/tc_term_voi.rb +0 -18
  279. data/test/unit/search/rtc_similarity.rb +0 -37
  280. data/test/unit/search/rtc_sort_field.rb +0 -14
  281. data/test/unit/search/tc_multi_searcher2.rb +0 -126
  282. data/test/unit/store/rtc_fs_store.rb +0 -62
  283. data/test/unit/store/rtc_ram_store.rb +0 -15
  284. data/test/unit/store/rtm_store.rb +0 -150
  285. data/test/unit/store/rtm_store_lock.rb +0 -2
  286. data/test/unit/ts_document.rb +0 -2
  287. data/test/unit/utils/rtc_bit_vector.rb +0 -73
  288. data/test/unit/utils/rtc_date_tools.rb +0 -50
  289. data/test/unit/utils/rtc_number_tools.rb +0 -59
  290. data/test/unit/utils/rtc_parameter.rb +0 -40
  291. data/test/unit/utils/rtc_priority_queue.rb +0 -62
  292. data/test/unit/utils/rtc_string_helper.rb +0 -21
  293. data/test/unit/utils/rtc_thread.rb +0 -61
  294. data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
  295. data/test/utils/number_to_spoken.rb +0 -132
@@ -1,108 +0,0 @@
1
- require File.dirname(__FILE__) + "/../../test_helper"
2
-
3
-
4
- class TermVectorsIOTest < Test::Unit::TestCase
5
-
6
- include Ferret::Index
7
-
8
- def setup()
9
- @dir = Ferret::Store::RAMDirectory.new
10
- @fis = FieldInfos.new
11
- @fis.add("field1", true, true, true, true)
12
- @fis.add("field2", true, true)
13
- end
14
-
15
- def tear_down()
16
- @dir.close()
17
- end
18
-
19
- def test_tv_io_add_fields()
20
- tv_w = TermVectorsWriter.new(@dir, "_test", @fis)
21
- tv_w.open_document
22
- assert(tv_w.document_open?)
23
- tv_w.open_field("field1")
24
- tv_w.add_term("text1", 1, [1], [t(0,4)])
25
- tv_w.add_term("text2", 2, [3,4], [t(5,10), t(11,16)])
26
- tv_w.close_field()
27
- tv_w.close_document()
28
- tv_w.close()
29
-
30
- tv_r = TermVectorsReader.new(@dir, "_test", @fis)
31
- assert_equal(1, tv_r.size)
32
- tv = tv_r.get_field_tv(0, "field1")
33
-
34
- assert_equal(2, tv.size)
35
- assert_equal("text1", tv.terms[0])
36
- assert_equal(1, tv.freqs[0])
37
- assert_equal(1, tv.positions[0][0])
38
- assert_equal(t(0,4), tv.offsets[0][0])
39
-
40
- assert_equal("text2", tv.terms[1])
41
- assert_equal(2, tv.freqs[1])
42
- assert_equal(3, tv.positions[1][0])
43
- assert_equal(t(5,10), tv.offsets[1][0])
44
- assert_equal(4, tv.positions[1][1])
45
- assert_equal(t(11,16), tv.offsets[1][1])
46
- tv_r.close
47
- end
48
-
49
- def test_tv_io_add_documents()
50
- tvs1 = []
51
- tvs2 = []
52
- tv = SegmentTermVector.new("field1",
53
- ["word1", "word2"],
54
- [3, 2],
55
- [[1, 5, 8], [2, 9]],
56
- [[t(0,5), t(34,39), t(45,50)],[t(6,11), t(51,56)]])
57
- tvs1 << tv
58
- tv = SegmentTermVector.new("field2",
59
- ["word3", "word4"],
60
- [1, 5],
61
- [[8], [2, 9, 11, 34, 56]],
62
- [[t(45,50)], [t(6,10), t(51,56), t(64,69), t(103,108), t(183,188)]])
63
- tvs1 << tv
64
- tv_w = TermVectorsWriter.new(@dir, "_test", @fis)
65
- tv = SegmentTermVector.new("field1",
66
- ["word1", "word2"],
67
- [3, 2],
68
- [[1, 5, 8], [2, 9]],
69
- [[t(0,5), t(34,39), t(45,50)],[t(6,11), t(51,56)]])
70
- tvs2 << tv
71
- tv_w.add_all_doc_vectors(tvs1)
72
- tv_w.add_all_doc_vectors(tvs2)
73
- tv_w.close
74
- tv_r = TermVectorsReader.new(@dir, "_test", @fis)
75
- assert_equal(2, tv_r.size)
76
- tv = tv_r.get_field_tv(0, "field1")
77
-
78
- assert_equal(2, tv.size)
79
- assert_equal("word1", tv.terms[0])
80
- assert_equal(3, tv.freqs[0])
81
- assert_equal(1, tv.positions[0][0])
82
- assert_equal(5, tv.positions[0][1])
83
- assert_equal(8, tv.positions[0][2])
84
- assert_equal(t(0,5), tv.offsets[0][0])
85
- assert_equal(t(34,39), tv.offsets[0][1])
86
- assert_equal(t(45,50), tv.offsets[0][2])
87
-
88
- assert_equal("word2", tv.terms[1])
89
- assert_equal(2, tv.freqs[1])
90
- assert_equal(2, tv.positions[1][0])
91
- assert_equal(9, tv.positions[1][1])
92
- assert_equal(t(6,11), tv.offsets[1][0])
93
- assert_equal(t(51,56), tv.offsets[1][1])
94
-
95
- tv = tv_r.get_field_tv(0, "field2")
96
- assert_equal(2, tv.size)
97
- assert_equal("word3", tv.terms[0])
98
-
99
- tv = tv_r.get_field_tv(1, "field1")
100
- assert_equal(2, tv.size)
101
- assert_equal("word1", tv.terms[0])
102
- end
103
-
104
- private
105
- def t(start, finish)
106
- return TermVectorOffsetInfo.new(start, finish)
107
- end
108
- end
@@ -1,27 +0,0 @@
1
- require File.dirname(__FILE__) + "/../../test_helper"
2
-
3
- class TermTest < Test::Unit::TestCase
4
- include Ferret::Index
5
- def test_term()
6
- term1 = Term.new("bfield1", "athis is text1")
7
- assert_equal(term1.field, "bfield1")
8
- assert_equal(term1.text, "athis is text1")
9
- term2 = Term.new("afield2", "athis is text1")
10
- term3 = Term.new("bfield1", "bthis is text2")
11
- term4 = Term.new("bfield1", "athis is text1")
12
- assert(term1 > term2)
13
- assert(term1 < term3)
14
- assert(term1.between?(term2, term3))
15
- assert(term1 == term4)
16
- assert(term1.eql?(term4))
17
- term4.set!("field3", "text3")
18
- assert_not_equal(term1, term4)
19
- end
20
-
21
- def test_non_strings()
22
- t = Term.new(2345, 3)
23
- t = Term.new(:symbol, :symbol)
24
- t.set!(:symbol, :symbol)
25
- t.set!(234, 23462346)
26
- end
27
- end
@@ -1,18 +0,0 @@
1
- require File.dirname(__FILE__) + "/../../test_helper"
2
-
3
-
4
- class TermVectorOffsetInfoTest < Test::Unit::TestCase
5
- include Ferret::Index
6
- def test_tvoi()
7
- t1 = TermVectorOffsetInfo.new(1, 3)
8
- assert_equal(t1.start, 1)
9
- assert_equal(t1.end, 3)
10
- t2 = TermVectorOffsetInfo.new(1, 3)
11
- assert(t1 == t2)
12
- t2.start = 2
13
- assert(t1 != t2)
14
- t2.start = 1
15
- t2.end = 1
16
- assert(t1 != t2)
17
- end
18
- end
@@ -1,37 +0,0 @@
1
- require File.dirname(__FILE__) + "/../../test_helper"
2
-
3
-
4
- class SimilarityTest < Test::Unit::TestCase
5
- include Ferret::Search
6
- include Ferret::Index
7
-
8
- def test_byte_float_conversion()
9
- 256.times do |i|
10
- assert_equal(i, Similarity.float_to_byte(Similarity.byte_to_float(i)))
11
- assert_equal(Similarity.byte_to_float(i), Similarity::NORM_TABLE[i])
12
- assert_equal(i, Similarity.encode_norm(Similarity.decode_norm(i)))
13
- end
14
- end
15
-
16
- def test_default_similarity
17
- dsim = DefaultSimilarity.new()
18
- assert_equal(1.0/4, dsim.length_norm("field", 16))
19
- assert_equal(1.0/4, dsim.query_norm(16))
20
- assert_equal(3.0, dsim.tf(9))
21
- assert_equal(1.0/10, dsim.sloppy_freq(9))
22
- assert_equal(1.0, dsim.idf(9, 10))
23
- assert_equal(4.0, dsim.coord(12, 3))
24
- searcher = Object.new
25
- def searcher.doc_freq(term) 9 end
26
- def searcher.max_doc() 10 end
27
- term = Term.new("field", "text")
28
- assert_equal(1.0, dsim.idf_term(term, searcher))
29
- terms = [
30
- Term.new("field1", "text1"),
31
- Term.new("field1", "text2"),
32
- Term.new("field2", "text3"),
33
- Term.new("field2", "text4")
34
- ]
35
- assert_equal(4.0, dsim.idf_phrase(terms, searcher))
36
- end
37
- end
@@ -1,14 +0,0 @@
1
- require File.dirname(__FILE__) + "/../../test_helper"
2
-
3
- class SortFieldTest < Test::Unit::TestCase
4
- include Ferret::Search
5
-
6
- def test_params()
7
- assert_equal("SCORE", SortField::SortType::SCORE.to_s)
8
- assert_equal("DOC", SortField::SortType::DOC.to_s)
9
- assert_equal("auto", SortField::SortType::AUTO.to_s)
10
- assert_equal("string", SortField::SortType::STRING.to_s)
11
- assert_equal("integer", SortField::SortType::INTEGER.to_s)
12
- assert_equal("float", SortField::SortType::FLOAT.to_s)
13
- end
14
- end
@@ -1,126 +0,0 @@
1
- require File.dirname(__FILE__) + "/../../test_helper"
2
-
3
- # Tests the multisearcher by comparing it's results
4
- # with those returned by an IndexSearcher.
5
- # Taken from TestMultiSearcherRanking.java of Lucene
6
- class MultiSearcher2Test < Test::Unit::TestCase
7
- include Ferret::Document
8
- include Ferret::Search
9
- include Ferret::Store
10
- include Ferret::Analysis
11
- include Ferret::Index
12
-
13
- FIELD_NAME = 'body'
14
-
15
- def test_one_Term_query
16
- check_query 'three'
17
- end
18
-
19
- def test_two_term_query
20
- check_query 'three foo'
21
- # as of 2006/03/11 these fail in Java Lucene as
22
- # well, hits are returned in slightly different order.
23
- #check_query '+pizza +blue*', :body
24
- #check_query '+pizza blue*', :body
25
- #check_query 'pizza blue*', :body
26
- end
27
-
28
- def test_prefix_query
29
- check_query 'multi*'
30
- end
31
-
32
- def test_fuzzy_query
33
- check_query 'multiThree~'
34
- end
35
-
36
- def test_range_query
37
- check_query '{multiA multiP}'
38
- end
39
-
40
- # fails (query parse error)
41
- #def test_multi_phrase_query
42
- # check_query '"blueberry pi*"'
43
- #end
44
-
45
- def test_nomatch_query
46
- check_query '+three +nomatch'
47
- end
48
-
49
- # this yields differing scores, but doesn't work in
50
- # Java Lucene either
51
- #def test_term_repeated_query
52
- # check_query 'multi* multi* foo'
53
- #end
54
-
55
-
56
- def check_query(query_str, debug_field=nil)
57
- @parser ||= Ferret::QueryParser.new(FIELD_NAME, :analyzer => @analyzer)
58
- query = @parser.parse(query_str)
59
- puts "Query: #{query}" if debug_field
60
- IndexTestHelper.explain(query, @multi, debug_field) if debug_field
61
- IndexTestHelper.explain(query, @single, debug_field) if debug_field
62
- multi_hits = @multi.search(query)
63
- single_hits = @single.search(query)
64
- assert_equal single_hits.size, multi_hits.size, "hit count differs"
65
- multi_hits.score_docs.each_with_index { |multi_sd, i|
66
- single_sd = single_hits.score_docs[i]
67
- doc_multi = @multi.doc(multi_sd.doc)
68
- doc_single = @single.doc(single_sd.doc)
69
- assert_equal single_sd.score, multi_sd.score, "score differs in result #{i}"
70
- assert_equal doc_single[FIELD_NAME], doc_multi[FIELD_NAME], "field values differ in result #{i}"
71
- }
72
- end
73
-
74
- def setup()
75
- @analyzer = WhiteSpaceAnalyzer.new()
76
- # create MultiSearcher from two seperate searchers
77
- d1 = RAMDirectory.new()
78
- iw1 = IndexWriter.new(d1, :analyzer => @analyzer, :create => true)
79
- add_collection1(iw1)
80
- iw1.close()
81
-
82
- d2 = RAMDirectory.new()
83
- iw2 = IndexWriter.new(d2, :analyzer => @analyzer, :create => true)
84
- add_collection2(iw2)
85
- iw2.close()
86
- @multi = MultiSearcher.new([IndexSearcher.new(d1), IndexSearcher.new(d2)])
87
-
88
- # create IndexSearcher which contains all documents
89
- d = RAMDirectory.new()
90
- iw = IndexWriter.new(d, :analyzer => @analyzer, :create => true)
91
- add_collection1(iw)
92
- add_collection2(iw)
93
- iw.close()
94
- @single = IndexSearcher.new(d)
95
- end
96
-
97
- def tear_down()
98
- @multi.close
99
- @single.close
100
- end
101
-
102
- def add(value, iw)
103
- d = Document.new
104
- d << Field.new(FIELD_NAME, value, Field::Store::YES, Field::Index::TOKENIZED)
105
- iw << d
106
- end
107
-
108
- def add_collection1(iw)
109
- add("one blah three", iw)
110
- add("one foo three multiOne", iw)
111
- add("one foobar three multiThree", iw)
112
- add("blueberry pie", iw)
113
- add("blueberry strudel", iw)
114
- add("blueberry pizza", iw)
115
- end
116
- def add_collection2(iw)
117
- add("two blah three", iw)
118
- add("two foo xxx multiTwo", iw)
119
- add("two foobar xxx multiThreee", iw)
120
- add("blueberry chewing gum", iw)
121
- add("bluebird pizza", iw)
122
- add("bluebird foobar pizza", iw)
123
- add("piccadilly circus", iw)
124
- end
125
-
126
- end
@@ -1,62 +0,0 @@
1
- require File.dirname(__FILE__) + "/../../test_helper"
2
- require File.dirname(__FILE__) + "/rtm_store"
3
- require File.dirname(__FILE__) + "/rtm_store_lock"
4
-
5
- module Ferret::Store
6
-
7
- class FSDirectory
8
- def FSDirectory.directory_cache
9
- @@Directories
10
- end
11
-
12
- def ref_count
13
- @ref_count
14
- end
15
-
16
- def get_lock_prefix
17
- lock_prefix
18
- end
19
- end
20
- end
21
-
22
- class FSStoreTest < Test::Unit::TestCase
23
- include Ferret::Store
24
- include StoreTest
25
- include StoreLockTest
26
- def setup
27
- @dpath = File.join(File.dirname(__FILE__),
28
- '../../temp/fsdir')
29
- @dir = FSDirectory.new(@dpath, true)
30
- end
31
-
32
- def teardown
33
- @dir.refresh()
34
- @dir.close()
35
- end
36
-
37
- def test_cache
38
- dir_path = File.join(File.dirname(__FILE__),
39
- '/../../temp/cachetest')
40
- assert(! FSDirectory.directory_cache[dir_path],
41
- "this directory should not be cached yet")
42
- @dir1 = FSDirectory.new(dir_path, true)
43
- assert(FSDirectory.directory_cache[dir_path],
44
- "this directory should now be cached")
45
- assert_equal(@dir1.ref_count, 1,
46
- "There is one reference so the refcount should now be 1")
47
- @dir2 = FSDirectory.new(dir_path, true)
48
- assert(@dir1 === @dir2,
49
- "The directory should be cached so the same directory object should have been returned")
50
- assert_equal(@dir1.ref_count, 2,
51
- "There are two references so the refcount should now be 2")
52
- @dir1.close
53
- assert(FSDirectory.directory_cache[dir_path],
54
- "this directory shouldn't have been removed yet")
55
- assert_equal(@dir2.ref_count, 1,
56
- "There is one reference so the refcount should now be 1")
57
- @dir2.close
58
- assert(! FSDirectory.directory_cache[dir_path],
59
- "this directory should have been removed from the cache")
60
- end
61
-
62
- end
@@ -1,15 +0,0 @@
1
- require File.dirname(__FILE__) + "/../../test_helper"
2
- require File.dirname(__FILE__) + "/rtm_store"
3
- require File.dirname(__FILE__) + "/rtm_store_lock"
4
-
5
- class RAMStoreTest < Test::Unit::TestCase
6
- include StoreTest
7
- include StoreLockTest
8
- def setup
9
- @dir = Ferret::Store::RAMDirectory.new
10
- end
11
-
12
- def teardown
13
- @dir.close()
14
- end
15
- end
@@ -1,150 +0,0 @@
1
- module StoreTest
2
- # declare dir so inheritors can access it.
3
- @dir = nil
4
-
5
- def test_modified
6
- # difficult to test this one but as file mtime is only stored to the nearest second.
7
- # we can assume this test will happen in less than a few seconds. (I hope)
8
- time = Time.new.to_i
9
- @dir.touch('mtime.test')
10
- time_before = @dir.modified('mtime.test').to_i
11
- assert(time_before - time <= 3,
12
- "test that mtime is approximately equal to the system time when the file was touched")
13
- end
14
-
15
- def test_rw_bytes
16
- bytes = [0x34, 0x87, 0xF9, 0xEA, 0x00, 0xFF]
17
- rw_test(bytes, "byte", 6)
18
- end
19
-
20
- def test_rw_ints
21
- ints = [-2147483648, 2147483647, -1, 0]
22
- rw_test(ints, "int", 16)
23
- end
24
-
25
- def test_rw_longs
26
- longs = [-9223372036854775808, 9223372036854775807, -1, 0]
27
- rw_test(longs, "long", 32)
28
- end
29
-
30
- def test_rw_uints
31
- uints = [0xffffffff, 100000, 0]
32
- rw_test(uints, "uint", 12)
33
- end
34
-
35
- def test_rw_ulongs
36
- ulongs = [0xffffffffffffffff, 100000000000000, 0]
37
- rw_test(ulongs, "ulong", 24)
38
- end
39
-
40
- def test_rw_vints
41
- vints = [ 9223372036854775807,
42
- 0x00,
43
- 0xFFFFFFFFFFFFFFFF]
44
- rw_test(vints, "vint", 20)
45
- end
46
-
47
- def test_rw_vlongs
48
- vlongs = [ 9223372036854775807,
49
- 0x00,
50
- 0xFFFFFFFFFFFFFFFF]
51
- rw_test(vlongs, "vlong", 20)
52
- end
53
-
54
- def test_rw_strings
55
- text = 'This is a ruby ferret test string ~!@#$%^&*()`123456790-=\)_+|'
56
- ostream = @dir.create_output("rw_strings.test")
57
- ostream.write_string(text)
58
- ostream.write_string(text*100)
59
- ostream.close
60
- istream = @dir.open_input("rw_strings.test")
61
- assert_equal(text, istream.read_string, "Short string test failed")
62
- assert_equal(text*100, istream.read_string, "Short string test failed")
63
- istream.close
64
- assert_equal(6265, @dir.length('rw_strings.test'))
65
- end
66
-
67
- def test_rw_utf8_strings
68
- text = '³³ ÄÄÄÄÄÄ 道德經'
69
- ostream = @dir.create_output("rw_utf8_strings.test")
70
- ostream.write_string(text)
71
- ostream.write_string(text*100)
72
- ostream.close
73
- istream = @dir.open_input("rw_utf8_strings.test")
74
- assert_equal(text, x = istream.read_string, "Short string test failed")
75
- assert_equal(text*100, istream.read_string, "Short string test failed")
76
- istream.close
77
- end
78
-
79
- # this test fills up the output stream so that the buffer will have to be
80
- # written a few times. It then uses seek to make sure that it works
81
- # correctly
82
- def test_buffer_seek
83
- ostream = @dir.create_output("rw_seek.test")
84
- text = 'This is another long test string !@#$%#$%&%$*%^&*()(_'
85
- 1000.times {|i| ostream.write_long(i); ostream.write_string(text) }
86
- ostream.seek(987)
87
- assert_equal(987, ostream.pos)
88
- ostream.write_vint(555)
89
- ostream.seek(56)
90
- assert_equal(56, ostream.pos)
91
- ostream.write_vint(1234567890)
92
- ostream.seek(4000)
93
- assert_equal(4000, ostream.pos)
94
- ostream.write_vint(9876543210)
95
- ostream.close()
96
- istream = @dir.open_input("rw_seek.test")
97
- istream.seek(56)
98
- assert_equal(56, istream.pos)
99
- assert_equal(1234567890, istream.read_vint())
100
- istream.seek(4000)
101
- assert_equal(4000, istream.pos)
102
- assert_equal(9876543210, istream.read_vint())
103
- istream.seek(987)
104
- assert_equal(987, istream.pos)
105
- assert_equal(555, istream.read_vint())
106
- istream.close()
107
- end
108
-
109
- def test_clone
110
- ostream = @dir.create_output("clone_test")
111
- 10.times {|i| ostream.write_long(i) }
112
- ostream.close
113
- istream = @dir.open_input("clone_test")
114
- istream.seek(24)
115
- alt_istream = istream.clone
116
- assert_equal(istream.pos, alt_istream.pos)
117
- (3...10).each {|i| assert_equal(i, alt_istream.read_long) }
118
- assert_equal(80, alt_istream.pos)
119
- assert_equal(24, istream.pos)
120
- alt_istream.close
121
- (3...10).each {|i| assert_equal(i, istream.read_long) }
122
- istream.close
123
- end
124
-
125
- def test_read_bytes
126
- str = "0000000000"
127
- ostream = @dir.create_output("rw_read_bytes")
128
- ostream.write_bytes("how are you doing?", 18)
129
- ostream.close
130
- istream = @dir.open_input("rw_read_bytes")
131
- istream.read_bytes(str, 2, 4)
132
- assert_equal("00how 0000", str)
133
- istream.read_bytes(str, 1, 8)
134
- assert_equal("0are you 0", str)
135
- istream.close
136
- end
137
-
138
- private
139
-
140
- def rw_test(values, type, expected_length)
141
- ostream = @dir.create_output("rw_#{type}.test")
142
- values.each { |b| ostream.__send__("write_" + type, b) }
143
- ostream.close
144
- istream = @dir.open_input("rw_#{type}.test")
145
- values.each { |b| assert_equal(b, istream.__send__("read_" + type), "#{type} should be equal") }
146
- istream.close
147
- assert_equal(expected_length, @dir.length("rw_#{type}.test"))
148
- end
149
-
150
- end