ferret 0.9.6 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (295) hide show
  1. data/MIT-LICENSE +1 -1
  2. data/README +12 -24
  3. data/Rakefile +38 -54
  4. data/TODO +14 -17
  5. data/ext/analysis.c +982 -823
  6. data/ext/analysis.h +133 -76
  7. data/ext/array.c +96 -58
  8. data/ext/array.h +40 -13
  9. data/ext/bitvector.c +476 -118
  10. data/ext/bitvector.h +264 -22
  11. data/ext/compound_io.c +217 -229
  12. data/ext/defines.h +49 -0
  13. data/ext/document.c +107 -317
  14. data/ext/document.h +31 -65
  15. data/ext/except.c +81 -36
  16. data/ext/except.h +117 -55
  17. data/ext/extconf.rb +2 -9
  18. data/ext/ferret.c +211 -104
  19. data/ext/ferret.h +22 -11
  20. data/ext/filter.c +97 -82
  21. data/ext/fs_store.c +348 -367
  22. data/ext/global.c +226 -188
  23. data/ext/global.h +44 -26
  24. data/ext/hash.c +474 -391
  25. data/ext/hash.h +441 -68
  26. data/ext/hashset.c +124 -96
  27. data/ext/hashset.h +169 -20
  28. data/ext/helper.c +56 -5
  29. data/ext/helper.h +7 -0
  30. data/ext/inc/lang.h +29 -49
  31. data/ext/inc/threading.h +31 -0
  32. data/ext/ind.c +288 -278
  33. data/ext/ind.h +68 -0
  34. data/ext/index.c +5688 -0
  35. data/ext/index.h +663 -616
  36. data/ext/lang.h +29 -49
  37. data/ext/libstemmer.c +3 -3
  38. data/ext/mem_pool.c +84 -0
  39. data/ext/mem_pool.h +35 -0
  40. data/ext/posh.c +1006 -0
  41. data/ext/posh.h +1007 -0
  42. data/ext/priorityqueue.c +117 -194
  43. data/ext/priorityqueue.h +135 -39
  44. data/ext/q_boolean.c +1305 -1108
  45. data/ext/q_const_score.c +106 -93
  46. data/ext/q_filtered_query.c +138 -135
  47. data/ext/q_fuzzy.c +206 -242
  48. data/ext/q_match_all.c +94 -80
  49. data/ext/q_multi_term.c +663 -0
  50. data/ext/q_parser.c +667 -593
  51. data/ext/q_phrase.c +992 -555
  52. data/ext/q_prefix.c +72 -61
  53. data/ext/q_range.c +235 -210
  54. data/ext/q_span.c +1480 -1166
  55. data/ext/q_term.c +273 -246
  56. data/ext/q_wildcard.c +127 -114
  57. data/ext/r_analysis.c +1720 -711
  58. data/ext/r_index.c +3049 -0
  59. data/ext/r_qparser.c +433 -146
  60. data/ext/r_search.c +2934 -1993
  61. data/ext/r_store.c +372 -143
  62. data/ext/r_utils.c +941 -0
  63. data/ext/ram_store.c +330 -326
  64. data/ext/search.c +1291 -668
  65. data/ext/search.h +403 -702
  66. data/ext/similarity.c +91 -113
  67. data/ext/similarity.h +45 -30
  68. data/ext/sort.c +721 -484
  69. data/ext/stopwords.c +361 -273
  70. data/ext/store.c +556 -58
  71. data/ext/store.h +706 -126
  72. data/ext/tags +3578 -2780
  73. data/ext/term_vectors.c +352 -0
  74. data/ext/threading.h +31 -0
  75. data/ext/win32.h +54 -0
  76. data/lib/ferret.rb +5 -17
  77. data/lib/ferret/document.rb +130 -2
  78. data/lib/ferret/index.rb +577 -26
  79. data/lib/ferret/number_tools.rb +157 -0
  80. data/lib/ferret_version.rb +3 -0
  81. data/test/test_helper.rb +5 -13
  82. data/test/unit/analysis/tc_analyzer.rb +513 -1
  83. data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
  84. data/test/unit/index/tc_index.rb +183 -240
  85. data/test/unit/index/tc_index_reader.rb +312 -479
  86. data/test/unit/index/tc_index_writer.rb +397 -13
  87. data/test/unit/index/th_doc.rb +269 -206
  88. data/test/unit/query_parser/tc_query_parser.rb +40 -33
  89. data/test/unit/search/tc_filter.rb +59 -71
  90. data/test/unit/search/tc_fuzzy_query.rb +24 -16
  91. data/test/unit/search/tc_index_searcher.rb +23 -201
  92. data/test/unit/search/tc_multi_searcher.rb +78 -226
  93. data/test/unit/search/tc_search_and_sort.rb +93 -81
  94. data/test/unit/search/tc_sort.rb +23 -23
  95. data/test/unit/search/tc_sort_field.rb +7 -7
  96. data/test/unit/search/tc_spans.rb +51 -47
  97. data/test/unit/search/tm_searcher.rb +339 -0
  98. data/test/unit/store/tc_fs_store.rb +1 -1
  99. data/test/unit/store/tm_store_lock.rb +3 -3
  100. data/test/unit/tc_document.rb +81 -0
  101. data/test/unit/ts_analysis.rb +1 -1
  102. data/test/unit/ts_utils.rb +1 -1
  103. data/test/unit/utils/tc_bit_vector.rb +288 -0
  104. data/test/unit/utils/tc_number_tools.rb +117 -0
  105. data/test/unit/utils/tc_priority_queue.rb +106 -0
  106. metadata +140 -301
  107. data/CHANGELOG +0 -9
  108. data/ext/dummy.exe +0 -0
  109. data/ext/field.c +0 -408
  110. data/ext/frtio.h +0 -13
  111. data/ext/inc/except.h +0 -90
  112. data/ext/index_io.c +0 -382
  113. data/ext/index_rw.c +0 -2658
  114. data/ext/lang.c +0 -41
  115. data/ext/nix_io.c +0 -134
  116. data/ext/q_multi_phrase.c +0 -380
  117. data/ext/r_doc.c +0 -582
  118. data/ext/r_index_io.c +0 -1021
  119. data/ext/r_term.c +0 -219
  120. data/ext/term.c +0 -820
  121. data/ext/termdocs.c +0 -611
  122. data/ext/vector.c +0 -637
  123. data/ext/w32_io.c +0 -150
  124. data/lib/ferret/analysis.rb +0 -11
  125. data/lib/ferret/analysis/analyzers.rb +0 -112
  126. data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
  127. data/lib/ferret/analysis/token.rb +0 -100
  128. data/lib/ferret/analysis/token_filters.rb +0 -86
  129. data/lib/ferret/analysis/token_stream.rb +0 -26
  130. data/lib/ferret/analysis/tokenizers.rb +0 -112
  131. data/lib/ferret/analysis/word_list_loader.rb +0 -27
  132. data/lib/ferret/document/document.rb +0 -152
  133. data/lib/ferret/document/field.rb +0 -312
  134. data/lib/ferret/index/compound_file_io.rb +0 -338
  135. data/lib/ferret/index/document_writer.rb +0 -289
  136. data/lib/ferret/index/field_infos.rb +0 -279
  137. data/lib/ferret/index/fields_io.rb +0 -181
  138. data/lib/ferret/index/index.rb +0 -675
  139. data/lib/ferret/index/index_file_names.rb +0 -33
  140. data/lib/ferret/index/index_reader.rb +0 -503
  141. data/lib/ferret/index/index_writer.rb +0 -534
  142. data/lib/ferret/index/multi_reader.rb +0 -377
  143. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
  144. data/lib/ferret/index/segment_infos.rb +0 -130
  145. data/lib/ferret/index/segment_merge_info.rb +0 -49
  146. data/lib/ferret/index/segment_merge_queue.rb +0 -16
  147. data/lib/ferret/index/segment_merger.rb +0 -358
  148. data/lib/ferret/index/segment_reader.rb +0 -412
  149. data/lib/ferret/index/segment_term_enum.rb +0 -169
  150. data/lib/ferret/index/segment_term_vector.rb +0 -58
  151. data/lib/ferret/index/term.rb +0 -53
  152. data/lib/ferret/index/term_buffer.rb +0 -83
  153. data/lib/ferret/index/term_doc_enum.rb +0 -291
  154. data/lib/ferret/index/term_enum.rb +0 -52
  155. data/lib/ferret/index/term_info.rb +0 -37
  156. data/lib/ferret/index/term_infos_io.rb +0 -321
  157. data/lib/ferret/index/term_vector_offset_info.rb +0 -20
  158. data/lib/ferret/index/term_vectors_io.rb +0 -553
  159. data/lib/ferret/query_parser.rb +0 -312
  160. data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
  161. data/lib/ferret/search.rb +0 -50
  162. data/lib/ferret/search/boolean_clause.rb +0 -100
  163. data/lib/ferret/search/boolean_query.rb +0 -299
  164. data/lib/ferret/search/boolean_scorer.rb +0 -294
  165. data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
  166. data/lib/ferret/search/conjunction_scorer.rb +0 -99
  167. data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
  168. data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
  169. data/lib/ferret/search/explanation.rb +0 -41
  170. data/lib/ferret/search/field_cache.rb +0 -215
  171. data/lib/ferret/search/field_doc.rb +0 -31
  172. data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
  173. data/lib/ferret/search/filter.rb +0 -11
  174. data/lib/ferret/search/filtered_query.rb +0 -130
  175. data/lib/ferret/search/filtered_term_enum.rb +0 -79
  176. data/lib/ferret/search/fuzzy_query.rb +0 -154
  177. data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
  178. data/lib/ferret/search/hit_collector.rb +0 -34
  179. data/lib/ferret/search/hit_queue.rb +0 -11
  180. data/lib/ferret/search/index_searcher.rb +0 -200
  181. data/lib/ferret/search/match_all_query.rb +0 -104
  182. data/lib/ferret/search/multi_phrase_query.rb +0 -216
  183. data/lib/ferret/search/multi_searcher.rb +0 -261
  184. data/lib/ferret/search/multi_term_query.rb +0 -65
  185. data/lib/ferret/search/non_matching_scorer.rb +0 -22
  186. data/lib/ferret/search/phrase_positions.rb +0 -55
  187. data/lib/ferret/search/phrase_query.rb +0 -214
  188. data/lib/ferret/search/phrase_scorer.rb +0 -152
  189. data/lib/ferret/search/prefix_query.rb +0 -54
  190. data/lib/ferret/search/query.rb +0 -140
  191. data/lib/ferret/search/query_filter.rb +0 -51
  192. data/lib/ferret/search/range_filter.rb +0 -103
  193. data/lib/ferret/search/range_query.rb +0 -139
  194. data/lib/ferret/search/req_excl_scorer.rb +0 -125
  195. data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
  196. data/lib/ferret/search/score_doc.rb +0 -38
  197. data/lib/ferret/search/score_doc_comparator.rb +0 -114
  198. data/lib/ferret/search/scorer.rb +0 -91
  199. data/lib/ferret/search/similarity.rb +0 -278
  200. data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
  201. data/lib/ferret/search/sort.rb +0 -112
  202. data/lib/ferret/search/sort_comparator.rb +0 -60
  203. data/lib/ferret/search/sort_field.rb +0 -91
  204. data/lib/ferret/search/spans.rb +0 -12
  205. data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
  206. data/lib/ferret/search/spans/span_first_query.rb +0 -79
  207. data/lib/ferret/search/spans/span_near_query.rb +0 -108
  208. data/lib/ferret/search/spans/span_not_query.rb +0 -130
  209. data/lib/ferret/search/spans/span_or_query.rb +0 -176
  210. data/lib/ferret/search/spans/span_query.rb +0 -25
  211. data/lib/ferret/search/spans/span_scorer.rb +0 -74
  212. data/lib/ferret/search/spans/span_term_query.rb +0 -105
  213. data/lib/ferret/search/spans/span_weight.rb +0 -84
  214. data/lib/ferret/search/spans/spans_enum.rb +0 -44
  215. data/lib/ferret/search/term_query.rb +0 -128
  216. data/lib/ferret/search/term_scorer.rb +0 -183
  217. data/lib/ferret/search/top_docs.rb +0 -36
  218. data/lib/ferret/search/top_field_docs.rb +0 -17
  219. data/lib/ferret/search/weight.rb +0 -54
  220. data/lib/ferret/search/wildcard_query.rb +0 -26
  221. data/lib/ferret/search/wildcard_term_enum.rb +0 -61
  222. data/lib/ferret/stemmers.rb +0 -1
  223. data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
  224. data/lib/ferret/store.rb +0 -5
  225. data/lib/ferret/store/buffered_index_io.rb +0 -190
  226. data/lib/ferret/store/directory.rb +0 -141
  227. data/lib/ferret/store/fs_store.rb +0 -381
  228. data/lib/ferret/store/index_io.rb +0 -245
  229. data/lib/ferret/store/ram_store.rb +0 -286
  230. data/lib/ferret/utils.rb +0 -8
  231. data/lib/ferret/utils/bit_vector.rb +0 -123
  232. data/lib/ferret/utils/date_tools.rb +0 -138
  233. data/lib/ferret/utils/number_tools.rb +0 -91
  234. data/lib/ferret/utils/parameter.rb +0 -41
  235. data/lib/ferret/utils/priority_queue.rb +0 -120
  236. data/lib/ferret/utils/string_helper.rb +0 -47
  237. data/lib/ferret/utils/thread_local.rb +0 -28
  238. data/lib/ferret/utils/weak_key_hash.rb +0 -60
  239. data/lib/rferret.rb +0 -37
  240. data/rake_utils/code_statistics.rb +0 -106
  241. data/test/benchmark/tb_ram_store.rb +0 -76
  242. data/test/benchmark/tb_rw_vint.rb +0 -26
  243. data/test/functional/thread_safety_index_test.rb +0 -81
  244. data/test/functional/thread_safety_test.rb +0 -137
  245. data/test/longrunning/tc_numbertools.rb +0 -60
  246. data/test/longrunning/tm_store.rb +0 -19
  247. data/test/unit/analysis/ctc_analyzer.rb +0 -532
  248. data/test/unit/analysis/data/wordfile +0 -6
  249. data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
  250. data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
  251. data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
  252. data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
  253. data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
  254. data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
  255. data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
  256. data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
  257. data/test/unit/analysis/rtc_stop_filter.rb +0 -14
  258. data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
  259. data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
  260. data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
  261. data/test/unit/analysis/tc_token.rb +0 -25
  262. data/test/unit/document/rtc_field.rb +0 -28
  263. data/test/unit/document/tc_document.rb +0 -47
  264. data/test/unit/document/tc_field.rb +0 -98
  265. data/test/unit/index/rtc_compound_file_io.rb +0 -107
  266. data/test/unit/index/rtc_field_infos.rb +0 -127
  267. data/test/unit/index/rtc_fields_io.rb +0 -167
  268. data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
  269. data/test/unit/index/rtc_segment_infos.rb +0 -74
  270. data/test/unit/index/rtc_segment_term_docs.rb +0 -17
  271. data/test/unit/index/rtc_segment_term_enum.rb +0 -60
  272. data/test/unit/index/rtc_segment_term_vector.rb +0 -71
  273. data/test/unit/index/rtc_term_buffer.rb +0 -57
  274. data/test/unit/index/rtc_term_info.rb +0 -19
  275. data/test/unit/index/rtc_term_infos_io.rb +0 -192
  276. data/test/unit/index/rtc_term_vectors_io.rb +0 -108
  277. data/test/unit/index/tc_term.rb +0 -27
  278. data/test/unit/index/tc_term_voi.rb +0 -18
  279. data/test/unit/search/rtc_similarity.rb +0 -37
  280. data/test/unit/search/rtc_sort_field.rb +0 -14
  281. data/test/unit/search/tc_multi_searcher2.rb +0 -126
  282. data/test/unit/store/rtc_fs_store.rb +0 -62
  283. data/test/unit/store/rtc_ram_store.rb +0 -15
  284. data/test/unit/store/rtm_store.rb +0 -150
  285. data/test/unit/store/rtm_store_lock.rb +0 -2
  286. data/test/unit/ts_document.rb +0 -2
  287. data/test/unit/utils/rtc_bit_vector.rb +0 -73
  288. data/test/unit/utils/rtc_date_tools.rb +0 -50
  289. data/test/unit/utils/rtc_number_tools.rb +0 -59
  290. data/test/unit/utils/rtc_parameter.rb +0 -40
  291. data/test/unit/utils/rtc_priority_queue.rb +0 -62
  292. data/test/unit/utils/rtc_string_helper.rb +0 -21
  293. data/test/unit/utils/rtc_thread.rb +0 -61
  294. data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
  295. data/test/utils/number_to_spoken.rb +0 -132
@@ -1,107 +0,0 @@
1
- require File.dirname(__FILE__) + "/../../test_helper"
2
-
3
-
4
- class CompoundFileWriterTest < Test::Unit::TestCase
5
-
6
- include Ferret::Index
7
-
8
- def setup()
9
- @dir = Ferret::Store::RAMDirectory.new
10
- end
11
-
12
- def tear_down()
13
- @dir.close()
14
- end
15
-
16
- def test_writer
17
- file1 = @dir.create_output("file1")
18
- file2 = @dir.create_output("file2")
19
- file1.write_int(20)
20
- file2.write_string('this is file2')
21
- file1.close()
22
- file2.close()
23
- cfile_writer = CompoundFileWriter.new(@dir, "cfile")
24
- cfile_writer.add_file("file1")
25
- cfile_writer.add_file("file2")
26
- cfile_writer.close()
27
-
28
- cfile = @dir.open_input("cfile")
29
- assert_equal(2, cfile.read_vint())
30
- assert_equal(29, cfile.read_long(), "Offset is incorrect")
31
- assert_equal("file1", cfile.read_string(), "Filename is incorrect")
32
- assert_equal(33, cfile.read_long(), "Offset is incorrect")
33
- assert_equal("file2", cfile.read_string(), "Filename is incorrect")
34
- assert_equal(20, cfile.read_int(), "Content is incorrect")
35
- assert_equal('this is file2', cfile.read_string(), "Content is incorrect")
36
- end
37
- end
38
-
39
- class CompoundFileReaderTest < Test::Unit::TestCase
40
-
41
- include Ferret::Index
42
-
43
- def setup()
44
- @dir = Ferret::Store::RAMDirectory.new
45
- end
46
-
47
- def tear_down()
48
- @dir.close()
49
- end
50
-
51
- def test_reader
52
- cfile = @dir.create_output("cfile")
53
- cfile.write_vint(2)
54
- cfile.write_long(29)
55
- cfile.write_string('file1')
56
- cfile.write_long(33)
57
- cfile.write_string('file2')
58
- cfile.write_int(20)
59
- cfile.write_string("this is file 2")
60
- cfile.close()
61
-
62
- cfile_reader = CompoundFileReader.new(@dir, "cfile")
63
- assert_equal(4, cfile_reader.length('file1'))
64
- assert_equal(15, cfile_reader.length('file2'))
65
- file1 = cfile_reader.open_input('file1')
66
- file2 = cfile_reader.open_input('file2')
67
- assert_equal(20, file1.read_int())
68
- assert_equal('this is file 2', file2.read_string())
69
- file1.close()
70
- file2.close()
71
- end
72
- end
73
-
74
- class CompoundFileIOTest < Test::Unit::TestCase
75
-
76
- include Ferret::Index
77
-
78
- def setup()
79
- @dir = Ferret::Store::RAMDirectory.new
80
- end
81
-
82
- def tear_down()
83
- @dir.close()
84
- end
85
-
86
- def test_buffer
87
- file1 = @dir.create_output("file1")
88
- file2 = @dir.create_output("file2")
89
- file3 = @dir.create_output("file3")
90
- 20.times { file1.write_int(rand(10000)) }
91
- file2.write_string('this is file2' * 1000)
92
- file3.write_string('this is file2')
93
- file1.close()
94
- file2.close()
95
- file3.close()
96
- cfile_writer = CompoundFileWriter.new(@dir, "cfile")
97
- cfile_writer.add_file("file1")
98
- cfile_writer.add_file("file2")
99
- cfile_writer.add_file("file3")
100
- cfile_writer.close()
101
-
102
- cfile_reader = CompoundFileReader.new(@dir, "cfile")
103
- file2 = cfile_reader.open_input('file2')
104
- assert_equal('this is file2' * 1000, file2.read_string)
105
- file2.close
106
- end
107
- end
@@ -1,127 +0,0 @@
1
- require File.dirname(__FILE__) + "/../../test_helper"
2
-
3
- class FieldInfosTest < Test::Unit::TestCase
4
- include Ferret::Index
5
-
6
- def test_field_info()
7
- fi = FieldInfo.new("name", true, 1, true, true, true, true)
8
- assert_equal(fi.name, "name")
9
- assert_equal(fi.number, 1)
10
- assert(fi.indexed?)
11
- assert(fi.store_term_vector?)
12
- assert(fi.store_offsets?)
13
- assert(fi.store_positions?)
14
- assert(fi.omit_norms?)
15
-
16
- fi.name = "hello"
17
- fi.indexed = false
18
- fi.number = 2
19
- fi.store_term_vector = false
20
- fi.store_offset = false
21
- fi.store_position = false
22
- fi.omit_norms = false
23
-
24
- assert_equal(fi.name, "hello")
25
- assert_equal(fi.number, 2)
26
- assert(!fi.indexed?)
27
- assert(!fi.store_term_vector?)
28
- assert(!fi.store_offsets?)
29
- assert(!fi.store_positions?)
30
- assert(!fi.omit_norms?)
31
-
32
- fi.set!(true, true, true, true, true)
33
- assert(fi.indexed?)
34
- assert(fi.store_term_vector?)
35
- assert(fi.store_offsets?)
36
- assert(fi.store_positions?)
37
- assert(fi.omit_norms?)
38
-
39
- fi = FieldInfo.new("name", true, 1, true)
40
- assert(!fi.store_offsets?)
41
- assert(!fi.store_positions?)
42
- assert(!fi.omit_norms?)
43
- end
44
-
45
- def fi_test_attr(fi, name, number, indexed, store_tv, store_pos, store_off, omit_norms)
46
- assert_equal(name, fi.name)
47
- assert_equal(number, fi.number)
48
- assert_equal(indexed, fi.indexed?)
49
- assert_equal(store_tv, fi.store_term_vector?)
50
- assert_equal(store_pos, fi.store_positions?)
51
- assert_equal(store_off, fi.store_offsets?)
52
- assert_equal(omit_norms, fi.omit_norms?)
53
- end
54
-
55
- def test_fis_add()
56
- fis = FieldInfos.new()
57
- fi = fis.add("field1", false)
58
- fi_test_attr(fi, "field1", 0, false, false, false, false, false)
59
- assert_equal(1, fis.size)
60
-
61
- fi = fis.add("field1", true, true)
62
- fi_test_attr(fi, "field1", 0, true, true, false, false, false)
63
- assert_equal(1, fis.size)
64
-
65
- fi = fis.add("field2", false)
66
- fi_test_attr(fi, "field2", 1, false, false, false, false, false)
67
- assert_equal(2, fis.size)
68
-
69
- fi = fis.add("field1", true, true, true, true, true)
70
- assert_equal(fi, fis[fi.number])
71
- assert_equal(fi, fis["field1"])
72
- assert_equal(0, fis.field_number("field1"))
73
- assert_equal(1, fis.field_number("field2"))
74
- assert_equal(FieldInfos::NOT_A_FIELD, fis.field_number("field3"))
75
- assert_equal(nil, fis["field3"])
76
- fi_test_attr(fi, "field1", 0, true, true, true, true, false)
77
- assert_equal(2, fis.size)
78
- end
79
-
80
- def test_add_doc_fields
81
- doc = IndexTestHelper.prepare_document
82
- fis = FieldInfos.new()
83
- fis << doc
84
- dir = Ferret::Store::RAMDirectory.new
85
- fis.write_to_dir(dir, "_test")
86
- fis2 = FieldInfos.new(dir, "_test")
87
- assert_equal("text_field1", fis2["text_field1"].name)
88
- fn = fis2.field_number("text_field2")
89
- assert_equal("text_field2", fis2[fn].name)
90
- assert_equal(9, fis2.size)
91
- assert(fis.has_vectors?)
92
- end
93
-
94
- def test_fis_has_vectors
95
- fis = FieldInfos.new()
96
- assert(! fis.has_vectors?)
97
- fis.add("random_field")
98
- assert(! fis.has_vectors?)
99
- fis.add("store_term_vector_field", true, true, false, false, false)
100
- assert(fis.has_vectors?)
101
- end
102
-
103
-
104
- def test_fis_rw()
105
- fis = FieldInfos.new()
106
- dir = Ferret::Store::RAMDirectory.new()
107
- fis.add("field1", false, false, false, false, true)
108
- fis.add("field2", true, false, false, false, true)
109
- fis.add("field3", true, true, false, false, true)
110
- fis.add("field4", true, true, true, false, true)
111
- fis.add("field5", true, true, true, true, true)
112
- fis.add("field6", true, true, true, true, false)
113
- fis.write_to_dir(dir, "fis_rw.test")
114
- fis = nil
115
-
116
- fis = FieldInfos.new(dir, "fis_rw.test")
117
- fi_test_attr(fis[0], "field1", 0, false, false, false, false, true)
118
- fi_test_attr(fis[1], "field2", 1, true, false, false, false, true)
119
- fi_test_attr(fis[2], "field3", 2, true, true, false, false, true)
120
- fi_test_attr(fis[3], "field4", 3, true, true, true, false, true)
121
- fi_test_attr(fis[4], "field5", 4, true, true, true, true, true)
122
- fi_test_attr(fis[5], "field6", 5, true, true, true, true, false)
123
-
124
- assert_equal(6, fis.size)
125
- end
126
-
127
- end
@@ -1,167 +0,0 @@
1
- require File.dirname(__FILE__) + "/../../test_helper"
2
-
3
- class FieldsWriterTest < Test::Unit::TestCase
4
-
5
- include Ferret::Index
6
- include Ferret::Document
7
-
8
- def setup()
9
- @dir = Ferret::Store::RAMDirectory.new
10
- end
11
-
12
- def tear_down()
13
- @dir.close()
14
- end
15
-
16
- def test_writer
17
- doc = Document.new
18
- doc << Field.new("name", "daily news", Field::Store::YES)
19
- doc << Field.new("content", "Nothing happened today.", Field::Store::YES)
20
-
21
- infos = FieldInfos.new
22
- infos << doc
23
-
24
- writer = FieldsWriter.new(@dir, "fieldswritertest", infos)
25
- writer << doc
26
- writer.close
27
-
28
- fstream = @dir.open_input("fieldswritertest.fdt")
29
- istream = @dir.open_input("fieldswritertest.fdx")
30
-
31
- stored = fstream.read_vint
32
- field_num1 = fstream.read_vint
33
- byte1 = fstream.read_byte
34
- data1 = fstream.read_string
35
- assert( stored == 2 )
36
- assert( (byte1 |= FieldsWriter::FIELD_IS_TOKENIZED) != 0 )
37
- assert( data1 == "daily news" )
38
-
39
- field_num2 = fstream.read_vint
40
- byte2 = fstream.read_byte
41
- data2 = fstream.read_string
42
- assert( (byte2 |= FieldsWriter::FIELD_IS_TOKENIZED) != 0 )
43
- assert( data2 == "Nothing happened today." )
44
-
45
- end
46
- end
47
-
48
- class FieldsReaderTest < Test::Unit::TestCase
49
-
50
- include Ferret::Index
51
- include Ferret::Document
52
-
53
- def setup()
54
- @dir = Ferret::Store::RAMDirectory.new
55
- end
56
-
57
- def tear_down()
58
- @dir.close()
59
- end
60
-
61
- def test_doc
62
- doc = Document.new
63
- doc << Field.new("name", "daily news")
64
- doc << Field.new("content", "Nothing happened today.")
65
-
66
- infos = FieldInfos.new
67
- infos << doc
68
-
69
- fstream = @dir.create_output("fieldsreadertest.fdt")
70
- istream = @dir.create_output("fieldsreadertest.fdx")
71
-
72
- istream.write_long(0)
73
- istream.close
74
- fstream.write_vint(2)
75
- fstream.write_vint(0)
76
- fstream.write_byte(0)
77
- fstream.write_string("daily news")
78
- fstream.write_vint(1)
79
- fstream.write_byte(0)
80
- fstream.write_string("Nothing happened today.")
81
- fstream.close
82
-
83
- reader = FieldsReader.new(@dir, "fieldsreadertest", infos)
84
- docres = reader.doc(0)
85
-
86
- assert_equal(docres.field("name").data, "daily news")
87
- assert_equal(docres.field("content").data, "Nothing happened today.")
88
- end
89
- end
90
-
91
- class FieldsIOTest < Test::Unit::TestCase
92
-
93
- include Ferret::Index
94
- include Ferret::Document
95
-
96
- def setup()
97
- @dir = Ferret::Store::RAMDirectory.new
98
- doc = IndexTestHelper.prepare_document()
99
- infos = FieldInfos.new
100
- infos << doc
101
-
102
- writer = FieldsWriter.new(@dir, "field_types", infos)
103
- writer << doc
104
- writer.close
105
-
106
- reader = FieldsReader.new(@dir, "field_types", infos)
107
- @docres = reader.doc(0)
108
- end
109
-
110
- def tear_down()
111
- @dir.close()
112
- end
113
-
114
- def test_text_field_no_term_vector
115
- field = @docres.field("text_field1")
116
- check_field_values(field, "field one text", true, true, true, false, false)
117
- end
118
-
119
- def test_text_field_term_vector
120
- field = @docres.field("text_field2")
121
- check_field_values(field, "field field field two text", true, true, true, true, false)
122
- end
123
-
124
- def test_key_field
125
- field = @docres.field("key_field")
126
- check_field_values(field, "keyword", true, true, false, false, false)
127
- end
128
-
129
- def test_unindexed_field
130
- field = @docres.field("unindexed_field")
131
- check_field_values(field, "unindexed field text", true, false, false, false, false)
132
- end
133
-
134
- def test_unstored_field_no_term_vector
135
- field = @docres.field("unstored_field1")
136
- assert_equal(nil, field)
137
- end
138
-
139
- def test_compressed_field
140
- field = @docres.field("compressed_field")
141
- check_field_values(field, "compressed text", true, true, true, true, false)
142
- end
143
-
144
- def test_binary_field
145
- bin = IndexTestHelper::BINARY_DATA
146
- field = @docres.field("binary_field")
147
- check_field_values(field, bin, true, false, false, false, true)
148
- end
149
-
150
- def test_compressed_binary_field
151
- cbin = IndexTestHelper::COMPRESSED_BINARY_DATA
152
- field = @docres.field("compressed_binary_field")
153
- check_field_values(field, cbin, true, false, false, false, true)
154
- end
155
-
156
-
157
- private
158
-
159
- def check_field_values(field, value, stored, indexed, tokenized, term_vector, binary)
160
- assert_equal(value, field.data)
161
- assert_equal(stored, field.stored?)
162
- assert_equal(indexed, field.indexed?)
163
- assert_equal(tokenized, field.tokenized?)
164
- assert_equal(term_vector, field.store_term_vector?)
165
- assert_equal(binary, field.binary?)
166
- end
167
- end
@@ -1,83 +0,0 @@
1
- require File.dirname(__FILE__) + "/../../test_helper"
2
-
3
- class MultipleTermDocPosEnumTest < Test::Unit::TestCase
4
- include Ferret::Index
5
- include Ferret::Search
6
- include Ferret::Analysis
7
-
8
- def setup()
9
- @dir = Ferret::Store::RAMDirectory.new()
10
- iw = IndexWriter.new(@dir,
11
- :analyzer => WhiteSpaceAnalyzer.new(),
12
- :create_if_missing => true)
13
- @documents = IndexTestHelper.prepare_search_docs()
14
- @documents.each { |doc| iw << doc }
15
- iw.close()
16
- @ir = IndexReader.open(@dir, true)
17
- end
18
-
19
- def tear_down()
20
- @ir.close
21
- end
22
-
23
- def test_mtdpe()
24
- t1 = Term.new("field", "red")
25
- t2 = Term.new("field", "brown")
26
- t3 = Term.new("field", "hairy")
27
- mtdpe = MultipleTermDocPosEnum.new(@ir, [t1, t2, t3])
28
- assert(mtdpe.next?)
29
- assert_equal(1, mtdpe.doc)
30
- assert_equal(1, mtdpe.freq)
31
- assert_equal(4, mtdpe.next_position)
32
-
33
- assert(mtdpe.next?)
34
- assert_equal(8, mtdpe.doc)
35
- assert_equal(1, mtdpe.freq)
36
- assert_equal(5, mtdpe.next_position)
37
-
38
- assert(mtdpe.next?)
39
- assert_equal(11, mtdpe.doc)
40
- assert_equal(1, mtdpe.freq)
41
- assert_equal(4, mtdpe.next_position)
42
-
43
- assert(mtdpe.next?)
44
- assert_equal(14, mtdpe.doc)
45
- assert_equal(1, mtdpe.freq)
46
- assert_equal(4, mtdpe.next_position)
47
-
48
- assert(mtdpe.next?)
49
- assert_equal(16, mtdpe.doc)
50
- assert_equal(3, mtdpe.freq)
51
- assert_equal(5, mtdpe.next_position)
52
- assert_equal(7, mtdpe.next_position)
53
- assert_equal(11, mtdpe.next_position)
54
-
55
- assert(mtdpe.next?)
56
- assert_equal(17, mtdpe.doc)
57
- assert_equal(2, mtdpe.freq)
58
- assert_equal(2, mtdpe.next_position)
59
- assert_equal(7, mtdpe.next_position)
60
-
61
- assert(!mtdpe.next?)
62
- mtdpe.close()
63
- end
64
-
65
- def test_tp
66
- tp = @ir.term_positions_for(Term.new("field", "red"))
67
- assert(tp.next?)
68
- assert_equal(11, tp.doc)
69
- assert_equal(1, tp.freq)
70
- assert_equal(4, tp.next_position)
71
-
72
- assert(tp.next?)
73
- assert_equal(16, tp.doc)
74
- assert_equal(1, tp.freq)
75
- assert_equal(11, tp.next_position)
76
-
77
- assert(tp.next?)
78
- assert_equal(17, tp.doc)
79
- assert_equal(1, tp.freq)
80
- assert_equal(7, tp.next_position)
81
- tp.close()
82
- end
83
- end