ferret 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (202) hide show
  1. data/MIT-LICENSE +20 -0
  2. data/README +109 -0
  3. data/Rakefile +275 -0
  4. data/TODO +9 -0
  5. data/TUTORIAL +197 -0
  6. data/ext/extconf.rb +3 -0
  7. data/ext/ferret.c +23 -0
  8. data/ext/ferret.h +85 -0
  9. data/ext/index_io.c +543 -0
  10. data/ext/priority_queue.c +227 -0
  11. data/ext/ram_directory.c +316 -0
  12. data/ext/segment_merge_queue.c +41 -0
  13. data/ext/string_helper.c +42 -0
  14. data/ext/tags +240 -0
  15. data/ext/term.c +261 -0
  16. data/ext/term_buffer.c +299 -0
  17. data/ext/util.c +12 -0
  18. data/lib/ferret.rb +41 -0
  19. data/lib/ferret/analysis.rb +11 -0
  20. data/lib/ferret/analysis/analyzers.rb +93 -0
  21. data/lib/ferret/analysis/standard_tokenizer.rb +65 -0
  22. data/lib/ferret/analysis/token.rb +79 -0
  23. data/lib/ferret/analysis/token_filters.rb +86 -0
  24. data/lib/ferret/analysis/token_stream.rb +26 -0
  25. data/lib/ferret/analysis/tokenizers.rb +107 -0
  26. data/lib/ferret/analysis/word_list_loader.rb +27 -0
  27. data/lib/ferret/document.rb +2 -0
  28. data/lib/ferret/document/document.rb +152 -0
  29. data/lib/ferret/document/field.rb +304 -0
  30. data/lib/ferret/index.rb +26 -0
  31. data/lib/ferret/index/compound_file_io.rb +343 -0
  32. data/lib/ferret/index/document_writer.rb +288 -0
  33. data/lib/ferret/index/field_infos.rb +259 -0
  34. data/lib/ferret/index/fields_io.rb +175 -0
  35. data/lib/ferret/index/index.rb +228 -0
  36. data/lib/ferret/index/index_file_names.rb +33 -0
  37. data/lib/ferret/index/index_reader.rb +462 -0
  38. data/lib/ferret/index/index_writer.rb +488 -0
  39. data/lib/ferret/index/multi_reader.rb +363 -0
  40. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +105 -0
  41. data/lib/ferret/index/segment_infos.rb +130 -0
  42. data/lib/ferret/index/segment_merge_info.rb +47 -0
  43. data/lib/ferret/index/segment_merge_queue.rb +16 -0
  44. data/lib/ferret/index/segment_merger.rb +337 -0
  45. data/lib/ferret/index/segment_reader.rb +380 -0
  46. data/lib/ferret/index/segment_term_enum.rb +178 -0
  47. data/lib/ferret/index/segment_term_vector.rb +58 -0
  48. data/lib/ferret/index/term.rb +49 -0
  49. data/lib/ferret/index/term_buffer.rb +88 -0
  50. data/lib/ferret/index/term_doc_enum.rb +283 -0
  51. data/lib/ferret/index/term_enum.rb +52 -0
  52. data/lib/ferret/index/term_info.rb +41 -0
  53. data/lib/ferret/index/term_infos_io.rb +312 -0
  54. data/lib/ferret/index/term_vector_offset_info.rb +20 -0
  55. data/lib/ferret/index/term_vectors_io.rb +552 -0
  56. data/lib/ferret/query_parser.rb +274 -0
  57. data/lib/ferret/query_parser/query_parser.tab.rb +819 -0
  58. data/lib/ferret/search.rb +49 -0
  59. data/lib/ferret/search/boolean_clause.rb +100 -0
  60. data/lib/ferret/search/boolean_query.rb +303 -0
  61. data/lib/ferret/search/boolean_scorer.rb +294 -0
  62. data/lib/ferret/search/caching_wrapper_filter.rb +40 -0
  63. data/lib/ferret/search/conjunction_scorer.rb +99 -0
  64. data/lib/ferret/search/disjunction_sum_scorer.rb +203 -0
  65. data/lib/ferret/search/exact_phrase_scorer.rb +32 -0
  66. data/lib/ferret/search/explanation.rb +41 -0
  67. data/lib/ferret/search/field_cache.rb +216 -0
  68. data/lib/ferret/search/field_doc.rb +31 -0
  69. data/lib/ferret/search/field_sorted_hit_queue.rb +184 -0
  70. data/lib/ferret/search/filter.rb +11 -0
  71. data/lib/ferret/search/filtered_query.rb +130 -0
  72. data/lib/ferret/search/filtered_term_enum.rb +79 -0
  73. data/lib/ferret/search/fuzzy_query.rb +153 -0
  74. data/lib/ferret/search/fuzzy_term_enum.rb +244 -0
  75. data/lib/ferret/search/hit_collector.rb +34 -0
  76. data/lib/ferret/search/hit_queue.rb +11 -0
  77. data/lib/ferret/search/index_searcher.rb +173 -0
  78. data/lib/ferret/search/match_all_docs_query.rb +104 -0
  79. data/lib/ferret/search/multi_phrase_query.rb +204 -0
  80. data/lib/ferret/search/multi_term_query.rb +65 -0
  81. data/lib/ferret/search/non_matching_scorer.rb +22 -0
  82. data/lib/ferret/search/phrase_positions.rb +55 -0
  83. data/lib/ferret/search/phrase_query.rb +217 -0
  84. data/lib/ferret/search/phrase_scorer.rb +153 -0
  85. data/lib/ferret/search/prefix_query.rb +47 -0
  86. data/lib/ferret/search/query.rb +111 -0
  87. data/lib/ferret/search/query_filter.rb +51 -0
  88. data/lib/ferret/search/range_filter.rb +103 -0
  89. data/lib/ferret/search/range_query.rb +139 -0
  90. data/lib/ferret/search/req_excl_scorer.rb +125 -0
  91. data/lib/ferret/search/req_opt_sum_scorer.rb +70 -0
  92. data/lib/ferret/search/score_doc.rb +38 -0
  93. data/lib/ferret/search/score_doc_comparator.rb +114 -0
  94. data/lib/ferret/search/scorer.rb +91 -0
  95. data/lib/ferret/search/similarity.rb +278 -0
  96. data/lib/ferret/search/sloppy_phrase_scorer.rb +47 -0
  97. data/lib/ferret/search/sort.rb +105 -0
  98. data/lib/ferret/search/sort_comparator.rb +60 -0
  99. data/lib/ferret/search/sort_field.rb +87 -0
  100. data/lib/ferret/search/spans.rb +12 -0
  101. data/lib/ferret/search/spans/near_spans_enum.rb +304 -0
  102. data/lib/ferret/search/spans/span_first_query.rb +79 -0
  103. data/lib/ferret/search/spans/span_near_query.rb +108 -0
  104. data/lib/ferret/search/spans/span_not_query.rb +130 -0
  105. data/lib/ferret/search/spans/span_or_query.rb +176 -0
  106. data/lib/ferret/search/spans/span_query.rb +25 -0
  107. data/lib/ferret/search/spans/span_scorer.rb +74 -0
  108. data/lib/ferret/search/spans/span_term_query.rb +105 -0
  109. data/lib/ferret/search/spans/span_weight.rb +84 -0
  110. data/lib/ferret/search/spans/spans_enum.rb +44 -0
  111. data/lib/ferret/search/term_query.rb +128 -0
  112. data/lib/ferret/search/term_scorer.rb +181 -0
  113. data/lib/ferret/search/top_docs.rb +24 -0
  114. data/lib/ferret/search/top_field_docs.rb +17 -0
  115. data/lib/ferret/search/weight.rb +54 -0
  116. data/lib/ferret/search/wildcard_query.rb +26 -0
  117. data/lib/ferret/search/wildcard_term_enum.rb +61 -0
  118. data/lib/ferret/stemmers.rb +1 -0
  119. data/lib/ferret/stemmers/porter_stemmer.rb +218 -0
  120. data/lib/ferret/store.rb +5 -0
  121. data/lib/ferret/store/buffered_index_io.rb +191 -0
  122. data/lib/ferret/store/directory.rb +139 -0
  123. data/lib/ferret/store/fs_store.rb +338 -0
  124. data/lib/ferret/store/index_io.rb +259 -0
  125. data/lib/ferret/store/ram_store.rb +282 -0
  126. data/lib/ferret/utils.rb +7 -0
  127. data/lib/ferret/utils/bit_vector.rb +105 -0
  128. data/lib/ferret/utils/date_tools.rb +138 -0
  129. data/lib/ferret/utils/number_tools.rb +91 -0
  130. data/lib/ferret/utils/parameter.rb +41 -0
  131. data/lib/ferret/utils/priority_queue.rb +120 -0
  132. data/lib/ferret/utils/string_helper.rb +47 -0
  133. data/lib/ferret/utils/weak_key_hash.rb +51 -0
  134. data/rake_utils/code_statistics.rb +106 -0
  135. data/setup.rb +1551 -0
  136. data/test/benchmark/tb_ram_store.rb +76 -0
  137. data/test/benchmark/tb_rw_vint.rb +26 -0
  138. data/test/longrunning/tc_numbertools.rb +60 -0
  139. data/test/longrunning/tm_store.rb +19 -0
  140. data/test/test_all.rb +9 -0
  141. data/test/test_helper.rb +6 -0
  142. data/test/unit/analysis/tc_analyzer.rb +21 -0
  143. data/test/unit/analysis/tc_letter_tokenizer.rb +20 -0
  144. data/test/unit/analysis/tc_lower_case_filter.rb +20 -0
  145. data/test/unit/analysis/tc_lower_case_tokenizer.rb +27 -0
  146. data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +39 -0
  147. data/test/unit/analysis/tc_porter_stem_filter.rb +16 -0
  148. data/test/unit/analysis/tc_standard_analyzer.rb +20 -0
  149. data/test/unit/analysis/tc_standard_tokenizer.rb +20 -0
  150. data/test/unit/analysis/tc_stop_analyzer.rb +20 -0
  151. data/test/unit/analysis/tc_stop_filter.rb +14 -0
  152. data/test/unit/analysis/tc_white_space_analyzer.rb +21 -0
  153. data/test/unit/analysis/tc_white_space_tokenizer.rb +20 -0
  154. data/test/unit/analysis/tc_word_list_loader.rb +32 -0
  155. data/test/unit/document/tc_document.rb +47 -0
  156. data/test/unit/document/tc_field.rb +80 -0
  157. data/test/unit/index/tc_compound_file_io.rb +107 -0
  158. data/test/unit/index/tc_field_infos.rb +119 -0
  159. data/test/unit/index/tc_fields_io.rb +167 -0
  160. data/test/unit/index/tc_index.rb +140 -0
  161. data/test/unit/index/tc_index_reader.rb +622 -0
  162. data/test/unit/index/tc_index_writer.rb +57 -0
  163. data/test/unit/index/tc_multiple_term_doc_pos_enum.rb +80 -0
  164. data/test/unit/index/tc_segment_infos.rb +74 -0
  165. data/test/unit/index/tc_segment_term_docs.rb +17 -0
  166. data/test/unit/index/tc_segment_term_enum.rb +60 -0
  167. data/test/unit/index/tc_segment_term_vector.rb +71 -0
  168. data/test/unit/index/tc_term.rb +22 -0
  169. data/test/unit/index/tc_term_buffer.rb +57 -0
  170. data/test/unit/index/tc_term_info.rb +19 -0
  171. data/test/unit/index/tc_term_infos_io.rb +192 -0
  172. data/test/unit/index/tc_term_vector_offset_info.rb +18 -0
  173. data/test/unit/index/tc_term_vectors_io.rb +108 -0
  174. data/test/unit/index/th_doc.rb +244 -0
  175. data/test/unit/query_parser/tc_query_parser.rb +84 -0
  176. data/test/unit/search/tc_filter.rb +113 -0
  177. data/test/unit/search/tc_fuzzy_query.rb +136 -0
  178. data/test/unit/search/tc_index_searcher.rb +188 -0
  179. data/test/unit/search/tc_search_and_sort.rb +98 -0
  180. data/test/unit/search/tc_similarity.rb +37 -0
  181. data/test/unit/search/tc_sort.rb +48 -0
  182. data/test/unit/search/tc_sort_field.rb +27 -0
  183. data/test/unit/search/tc_spans.rb +153 -0
  184. data/test/unit/store/tc_fs_store.rb +84 -0
  185. data/test/unit/store/tc_ram_store.rb +35 -0
  186. data/test/unit/store/tm_store.rb +180 -0
  187. data/test/unit/store/tm_store_lock.rb +68 -0
  188. data/test/unit/ts_analysis.rb +16 -0
  189. data/test/unit/ts_document.rb +4 -0
  190. data/test/unit/ts_index.rb +18 -0
  191. data/test/unit/ts_query_parser.rb +3 -0
  192. data/test/unit/ts_search.rb +10 -0
  193. data/test/unit/ts_store.rb +6 -0
  194. data/test/unit/ts_utils.rb +10 -0
  195. data/test/unit/utils/tc_bit_vector.rb +65 -0
  196. data/test/unit/utils/tc_date_tools.rb +50 -0
  197. data/test/unit/utils/tc_number_tools.rb +59 -0
  198. data/test/unit/utils/tc_parameter.rb +40 -0
  199. data/test/unit/utils/tc_priority_queue.rb +62 -0
  200. data/test/unit/utils/tc_string_helper.rb +21 -0
  201. data/test/unit/utils/tc_weak_key_hash.rb +25 -0
  202. metadata +251 -0
@@ -0,0 +1,57 @@
1
+ require File.dirname(__FILE__) + "/../../test_helper"
2
+
3
+
4
+ class IndexWriterTest < Test::Unit::TestCase
5
+ include Ferret::Index
6
+ include Ferret::Analysis
7
+
8
+ def setup()
9
+ @dir = Ferret::Store::RAMDirectory.new
10
+ end
11
+
12
+ def tear_down()
13
+ @dir.close()
14
+ end
15
+
16
+ def test_initialize
17
+ wlock = @dir.make_lock(IndexWriter::WRITE_LOCK_NAME)
18
+ clock = @dir.make_lock(IndexWriter::COMMIT_LOCK_NAME)
19
+ assert(! wlock.locked?)
20
+ assert(! clock.locked?)
21
+ iw = IndexWriter.new(@dir, :create => true)
22
+ assert(@dir.exists?("segments"))
23
+ assert(wlock.locked?)
24
+ iw.close()
25
+ assert(@dir.exists?("segments"))
26
+ assert(! wlock.locked?)
27
+ assert(! clock.locked?)
28
+ end
29
+
30
+ def test_add_document
31
+ iw = IndexWriter.new(@dir, :analyzer => StandardAnalyzer.new(), :create => true)
32
+ doc = IndexTestHelper.prepare_document()
33
+ infos = FieldInfos.new
34
+ infos << doc
35
+ iw.add_document(doc)
36
+ assert_equal(1, iw.doc_count)
37
+ iw.close()
38
+ end
39
+
40
+ def test_add_documents
41
+ iw = IndexWriter.new(@dir, :analyzer => StandardAnalyzer.new(), :create => true)
42
+ # uncomment the following line to see logging
43
+ #iw.info_stream = $stdout
44
+ iw.merge_factor = 3
45
+ iw.min_merge_docs = 3
46
+ docs = IndexTestHelper.prepare_book_list()
47
+ infos = FieldInfos.new
48
+ infos << docs[0]
49
+ docs.each_with_index do |doc, i|
50
+ #puts "Index doc " + i.to_s
51
+ iw.add_document(doc)
52
+ end
53
+ assert_equal(37, iw.doc_count)
54
+ iw.close()
55
+ end
56
+
57
+ end
@@ -0,0 +1,80 @@
1
+ require File.dirname(__FILE__) + "/../../test_helper"
2
+
3
+ class MultipleTermDocPosEnumTest < Test::Unit::TestCase
4
+ include Ferret::Search
5
+ include Ferret::Analysis
6
+
7
+ def setup()
8
+ @dir = Ferret::Store::RAMDirectory.new()
9
+ iw = IndexWriter.new(@dir, WhiteSpaceAnalyzer.new(), true, false)
10
+ @documents = IndexTestHelper.prepare_search_docs()
11
+ @documents.each { |doc| iw << doc }
12
+ iw.close()
13
+ @ir = IndexReader.open(@dir, true)
14
+ end
15
+
16
+ def tear_down()
17
+ @ir.close
18
+ end
19
+
20
+ def test_mtdpe()
21
+ t1 = Term.new("field", "red")
22
+ t2 = Term.new("field", "brown")
23
+ t3 = Term.new("field", "hairy")
24
+ mtdpe = MultipleTermDocPosEnum.new(@ir, [t1, t2, t3])
25
+ assert(mtdpe.next?)
26
+ assert_equal(1, mtdpe.doc)
27
+ assert_equal(1, mtdpe.freq)
28
+ assert_equal(4, mtdpe.next_position)
29
+
30
+ assert(mtdpe.next?)
31
+ assert_equal(8, mtdpe.doc)
32
+ assert_equal(1, mtdpe.freq)
33
+ assert_equal(5, mtdpe.next_position)
34
+
35
+ assert(mtdpe.next?)
36
+ assert_equal(11, mtdpe.doc)
37
+ assert_equal(1, mtdpe.freq)
38
+ assert_equal(4, mtdpe.next_position)
39
+
40
+ assert(mtdpe.next?)
41
+ assert_equal(14, mtdpe.doc)
42
+ assert_equal(1, mtdpe.freq)
43
+ assert_equal(4, mtdpe.next_position)
44
+
45
+ assert(mtdpe.next?)
46
+ assert_equal(16, mtdpe.doc)
47
+ assert_equal(3, mtdpe.freq)
48
+ assert_equal(5, mtdpe.next_position)
49
+ assert_equal(7, mtdpe.next_position)
50
+ assert_equal(11, mtdpe.next_position)
51
+
52
+ assert(mtdpe.next?)
53
+ assert_equal(17, mtdpe.doc)
54
+ assert_equal(2, mtdpe.freq)
55
+ assert_equal(2, mtdpe.next_position)
56
+ assert_equal(7, mtdpe.next_position)
57
+
58
+ assert(!mtdpe.next?)
59
+ mtdpe.close()
60
+ end
61
+
62
+ def test_tp
63
+ tp = @ir.term_positions_for(Term.new("field", "red"))
64
+ assert(tp.next?)
65
+ assert_equal(11, tp.doc)
66
+ assert_equal(1, tp.freq)
67
+ assert_equal(4, tp.next_position)
68
+
69
+ assert(tp.next?)
70
+ assert_equal(16, tp.doc)
71
+ assert_equal(1, tp.freq)
72
+ assert_equal(11, tp.next_position)
73
+
74
+ assert(tp.next?)
75
+ assert_equal(17, tp.doc)
76
+ assert_equal(1, tp.freq)
77
+ assert_equal(7, tp.next_position)
78
+ tp.close()
79
+ end
80
+ end
@@ -0,0 +1,74 @@
1
+ require File.dirname(__FILE__) + "/../../test_helper"
2
+
3
+
4
+ class SegmentInfosTest < Test::Unit::TestCase
5
+ include Ferret::Index
6
+
7
+ def setup()
8
+ @dir = Ferret::Store::RAMDirectory.new
9
+ end
10
+
11
+ def tear_down()
12
+ @dir.close()
13
+ end
14
+
15
+ def test_read_write
16
+ assert_equal(0, SegmentInfos.read_current_version(@dir))
17
+ sis = SegmentInfos.new()
18
+ seg0 = SegmentInfo.new('seg0', 5, @dir)
19
+ seg1 = SegmentInfo.new('seg1', 5, @dir)
20
+ seg2 = SegmentInfo.new('seg2', 5, @dir)
21
+ seg3 = SegmentInfo.new('seg3', 5, @dir)
22
+ sis << seg0
23
+ sis << seg1
24
+ sis << seg2
25
+ assert_equal(sis.size(), 3)
26
+ assert_equal(sis[0], seg0)
27
+ assert_equal(sis[2], seg2)
28
+ sis.write(@dir)
29
+ version = SegmentInfos.read_current_version(@dir)
30
+ assert(@dir.exists?('segments'))
31
+ sis2 = SegmentInfos.new()
32
+ sis2.read(@dir)
33
+ assert_equal(sis2.size(), 3)
34
+ assert_equal(sis2[0], seg0)
35
+ assert_equal(sis2[2], seg2)
36
+ sis2 << seg3
37
+ sis2.write(@dir)
38
+ assert_equal(version + 1, SegmentInfos.read_current_version(@dir))
39
+ sis3 = SegmentInfos.new()
40
+ sis3.read(@dir)
41
+ assert_equal(sis3.size(), 4)
42
+ assert_equal(sis2[0], seg0)
43
+ assert_equal(sis2[3], seg3)
44
+ end
45
+ end
46
+
47
+ class SegmentInfoTest < Test::Unit::TestCase
48
+ include Ferret::Index
49
+
50
+ def setup()
51
+ @dir = Ferret::Store::RAMDirectory.new
52
+ end
53
+
54
+ def tear_down()
55
+ @dir.close()
56
+ end
57
+
58
+ # just test getters and setters. Nothing else.
59
+ def test_segment_info
60
+ si = SegmentInfo.new("seg1", 0, @dir)
61
+ assert_equal(si.directory, @dir)
62
+ assert_equal(si.doc_count, 0)
63
+ assert_equal(si.name, "seg1")
64
+ @dir.close()
65
+ @dpath = File.dirname(__FILE__) + '/../../temp/fsdir'
66
+ @dir = Ferret::Store::FSDirectory.get_directory(@dpath, true)
67
+ si.name = "seg2"
68
+ si.doc_count += 2
69
+ si.directory = @dir
70
+ assert_equal(si.directory, @dir)
71
+ assert_equal(si.doc_count, 2)
72
+ assert_equal(si.name, "seg2")
73
+ end
74
+ end
@@ -0,0 +1,17 @@
1
+ require File.dirname(__FILE__) + "/../../test_helper"
2
+
3
+ class SegmentTermDocEnumTest < Test::Unit::TestCase
4
+
5
+ include Ferret::Index
6
+ include Ferret::Analysis
7
+
8
+ def setup()
9
+ @dir = Ferret::Store::RAMDirectory.new
10
+ @doc = IndexTestHelper.prepare_document()
11
+ IndexTestHelper.write_document(@dir, @doc)
12
+ end
13
+
14
+ def test_something()
15
+ assert true
16
+ end
17
+ end
@@ -0,0 +1,60 @@
1
+ require File.dirname(__FILE__) + "/../../test_helper"
2
+
3
+
4
+ class SegmentTermEnumTest < Test::Unit::TestCase
5
+ include Ferret::Index
6
+
7
+ TEST_SEGMENT = "_test"
8
+
9
+ def setup()
10
+ @dir = Ferret::Store::RAMDirectory.new
11
+ end
12
+
13
+ def test_initialize()
14
+ fis = FieldInfos.new
15
+ fis.add("author", true, true)
16
+ fis.add("title", true, true)
17
+ tiw = TermInfosWriter.new(@dir, TEST_SEGMENT, fis, 128)
18
+ terms = [ Term.new("author", "Martel"),
19
+ Term.new("title", "Life of Pi"),
20
+ Term.new("author", "Martin"),
21
+ Term.new("title", "Life on the edge") ].sort
22
+ term_infos = []
23
+ 4.times {|i| term_infos << TermInfo.new(i,i,i,0)}
24
+ 4.times {|i| tiw.add(terms[i], term_infos[i]) }
25
+ tiw.close()
26
+
27
+ tis_file = @dir.open_input(TEST_SEGMENT + ".tis")
28
+
29
+ ste = SegmentTermEnum.new(tis_file, fis, false)
30
+ assert_equal(128, ste.index_interval)
31
+ assert_equal(16, ste.skip_interval)
32
+ assert_equal(4, ste.size)
33
+ assert(ste.next?)
34
+ assert_equal(terms[0], ste.term)
35
+ assert_equal(term_infos[0], ste.term_info)
36
+ ti = TermInfo.new
37
+ ste.term_info = ti
38
+ assert_equal(term_infos[0], ti)
39
+ assert(ste.next?)
40
+ assert_equal(terms[0], ste.prev)
41
+ assert_equal(terms[1], ste.term)
42
+ assert_equal(term_infos[1], ste.term_info)
43
+ assert(ste.next?)
44
+ assert_equal(terms[2], ste.term)
45
+ assert_equal(term_infos[2], ste.term_info)
46
+ assert(ste.next?)
47
+ assert_equal(terms[3], ste.term)
48
+ assert_equal(term_infos[3], ste.term_info)
49
+ ste.close()
50
+
51
+ tii_file = @dir.open_input(TEST_SEGMENT + ".tii")
52
+
53
+ ste = SegmentTermEnum.new(tii_file, fis, false)
54
+ assert_equal(128, ste.index_interval)
55
+ assert_equal(16, ste.skip_interval)
56
+ assert_equal(1, ste.size)
57
+ assert(ste.next?)
58
+ assert(Term.new("", ""), ste.term)
59
+ end
60
+ end
@@ -0,0 +1,71 @@
1
+ require File.dirname(__FILE__) + "/../../test_helper"
2
+
3
+
4
+ class SegmentTermVectorTest < Test::Unit::TestCase
5
+
6
+ include Ferret::Index
7
+
8
+ def setup()
9
+ @terms = ["Apples", "Oranges", "Bananas", "Kiwis", "Mandarins"]
10
+ term_freqs = [4,2,1,12,4]
11
+ @stv = SegmentTermVector.new("Fruits", @terms, term_freqs)
12
+ end
13
+
14
+ def test_size()
15
+ assert_equal(@terms.size(), @stv.size())
16
+ end
17
+
18
+ def test_index_of()
19
+ assert_equal(0, @stv.index_of("Apples"))
20
+ assert_equal(4, @stv.term_frequencies[@stv.index_of("Apples")])
21
+ end
22
+
23
+ def test_indexes_of()
24
+ assert_equal([2, 0, 3], @stv.indexes_of(["Bananas", "Apples", "Kiwis"], 0, 3))
25
+ assert_equal([0, 3], @stv.indexes_of(["Bananas", "Apples", "Kiwis"], 1, 2))
26
+ end
27
+ end
28
+
29
+ class SegmentTermVectorWithPosOffsetsTest < Test::Unit::TestCase
30
+
31
+ include Ferret::Index
32
+
33
+ def setup()
34
+ @terms = ["Apples", "Oranges", "Bananas", "Kiwis", "Mandarins"]
35
+ term_freqs = [4,2,1,12,4]
36
+ term_positions = [
37
+ [1,3,5,7],
38
+ [2,4],
39
+ [6],
40
+ [8,9,10,12,13,14,16,17,18,20,21,22],
41
+ [11,15,19,23]
42
+ ]
43
+ term_offsets = [
44
+ [[1,4],[10,14],[20,24],[30,34]],
45
+ [[5,9],[15,19]],
46
+ [[25,29]],
47
+ [[35,39],[40,44],[45,49],[55,59],[60,64],[65,69],[75,79],[80,84],[85,89],[95,99],[100,104],[105,109]],
48
+ [[50,54],[70,74],[90,94],[110,114]]
49
+ ]
50
+ @stv = SegmentTermVector.new("Fruits", @terms, term_freqs, term_positions, term_offsets)
51
+ end
52
+
53
+ def test_size()
54
+ assert_equal(@terms.size(), @stv.size())
55
+ end
56
+
57
+ def test_index_of()
58
+ assert_equal(0, @stv.index_of("Apples"))
59
+ assert_equal(4, @stv.term_frequencies[@stv.index_of("Apples")])
60
+ end
61
+
62
+ def test_indexes_of()
63
+ assert_equal([2, 0, 3], @stv.indexes_of(["Bananas", "Apples", "Kiwis"], 0, 3))
64
+ assert_equal([0, 3], @stv.indexes_of(["Bananas", "Apples", "Kiwis"], 1, 2))
65
+ end
66
+
67
+ def test_positions_offsets()
68
+ assert_equal([1,3,5,7], @stv.positions[@stv.index_of("Apples")])
69
+ assert_equal([[35,39],[40,44],[45,49],[55,59],[60,64],[65,69],[75,79],[80,84],[85,89],[95,99],[100,104],[105,109]], @stv.offsets[@stv.index_of("Kiwis")])
70
+ end
71
+ end
@@ -0,0 +1,22 @@
1
+ require File.dirname(__FILE__) + "/../../test_helper"
2
+
3
+
4
+ class TermTest < Test::Unit::TestCase
5
+ include Ferret::Index
6
+ def test_term()
7
+ term1 = Term.new("bfield1", "athis is text1")
8
+ assert_equal(term1.field, "bfield1")
9
+ assert_equal(term1.text, "athis is text1")
10
+ term2 = Term.new("afield2", "athis is text1")
11
+ term3 = Term.new("bfield1", "bthis is text2")
12
+ term4 = Term.new("bfield1", "athis is text1")
13
+ assert(term1 > term2)
14
+ assert(term1 < term3)
15
+ assert(term1.between?(term2, term3))
16
+ assert(term1 == term4)
17
+ assert(term1.eql?(term4))
18
+ term4.set!("field3", "text3")
19
+ assert(term1 != term4)
20
+ end
21
+
22
+ end
@@ -0,0 +1,57 @@
1
+ require File.dirname(__FILE__) + "/../../test_helper"
2
+
3
+
4
+ class TermBufferTest < Test::Unit::TestCase
5
+ include Ferret::Index
6
+ def test_term_set()
7
+ t = Term.new("title", "Ferret Tutorial")
8
+ tb = TermBuffer.new
9
+ tb.term = t
10
+ assert_equal(t.field, tb.field)
11
+ assert_equal("Ferret Tutorial", tb.text_str)
12
+ assert_equal("Ferret Tutorial".length, tb.text_length)
13
+ assert_equal(t, tb.term)
14
+ end
15
+
16
+ def test_set()
17
+ tb = TermBuffer.new
18
+ tb.term = Term.new("title", "Ferret Tutorial")
19
+ tb2 = TermBuffer.new
20
+ tb2.set!(tb)
21
+ assert_equal(tb.field, tb2.field)
22
+ assert_equal("Ferret Tutorial", tb2.text_str)
23
+ assert_equal("Ferret Tutorial".length, tb2.text_length)
24
+ assert_equal(tb.term, tb2.term)
25
+ end
26
+
27
+ def test_compare()
28
+ tb1 = TermBuffer.new
29
+ tb2 = TermBuffer.new
30
+ tb1.term = Term.new("alpha", "text")
31
+ tb2.term = Term.new("bravo", "text")
32
+ assert(tb1 < tb2)
33
+ tb2.term = Term.new("alpha", "text")
34
+ assert(tb1 == tb2)
35
+ tb2.term = Term.new("alpha", "tex")
36
+ assert(tb1 > tb2)
37
+ end
38
+
39
+ def test_read()
40
+ dir = Ferret::Store::RAMDirectory.new
41
+ fi = FieldInfos.new
42
+ tb = TermBuffer.new
43
+ tb.term = Term.new("Author", "Dave")
44
+ fi.add("Writer", true)
45
+ output = dir.create_output("term_buffer_read_test")
46
+ output.write_vint(4)
47
+ output.write_vint(8)
48
+ output.write_chars(" Balmain", 0, 8)
49
+ output.write_vint(fi.field_number("Writer"))
50
+ output.close
51
+ input = dir.open_input("term_buffer_read_test")
52
+ tb.read(input, fi)
53
+ assert_equal("Dave Balmain", tb.text_str)
54
+ assert_equal("Dave Balmain", tb.term.text)
55
+ assert_equal("Writer", tb.field)
56
+ end
57
+ end