ferret 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/MIT-LICENSE +20 -0
- data/README +109 -0
- data/Rakefile +275 -0
- data/TODO +9 -0
- data/TUTORIAL +197 -0
- data/ext/extconf.rb +3 -0
- data/ext/ferret.c +23 -0
- data/ext/ferret.h +85 -0
- data/ext/index_io.c +543 -0
- data/ext/priority_queue.c +227 -0
- data/ext/ram_directory.c +316 -0
- data/ext/segment_merge_queue.c +41 -0
- data/ext/string_helper.c +42 -0
- data/ext/tags +240 -0
- data/ext/term.c +261 -0
- data/ext/term_buffer.c +299 -0
- data/ext/util.c +12 -0
- data/lib/ferret.rb +41 -0
- data/lib/ferret/analysis.rb +11 -0
- data/lib/ferret/analysis/analyzers.rb +93 -0
- data/lib/ferret/analysis/standard_tokenizer.rb +65 -0
- data/lib/ferret/analysis/token.rb +79 -0
- data/lib/ferret/analysis/token_filters.rb +86 -0
- data/lib/ferret/analysis/token_stream.rb +26 -0
- data/lib/ferret/analysis/tokenizers.rb +107 -0
- data/lib/ferret/analysis/word_list_loader.rb +27 -0
- data/lib/ferret/document.rb +2 -0
- data/lib/ferret/document/document.rb +152 -0
- data/lib/ferret/document/field.rb +304 -0
- data/lib/ferret/index.rb +26 -0
- data/lib/ferret/index/compound_file_io.rb +343 -0
- data/lib/ferret/index/document_writer.rb +288 -0
- data/lib/ferret/index/field_infos.rb +259 -0
- data/lib/ferret/index/fields_io.rb +175 -0
- data/lib/ferret/index/index.rb +228 -0
- data/lib/ferret/index/index_file_names.rb +33 -0
- data/lib/ferret/index/index_reader.rb +462 -0
- data/lib/ferret/index/index_writer.rb +488 -0
- data/lib/ferret/index/multi_reader.rb +363 -0
- data/lib/ferret/index/multiple_term_doc_pos_enum.rb +105 -0
- data/lib/ferret/index/segment_infos.rb +130 -0
- data/lib/ferret/index/segment_merge_info.rb +47 -0
- data/lib/ferret/index/segment_merge_queue.rb +16 -0
- data/lib/ferret/index/segment_merger.rb +337 -0
- data/lib/ferret/index/segment_reader.rb +380 -0
- data/lib/ferret/index/segment_term_enum.rb +178 -0
- data/lib/ferret/index/segment_term_vector.rb +58 -0
- data/lib/ferret/index/term.rb +49 -0
- data/lib/ferret/index/term_buffer.rb +88 -0
- data/lib/ferret/index/term_doc_enum.rb +283 -0
- data/lib/ferret/index/term_enum.rb +52 -0
- data/lib/ferret/index/term_info.rb +41 -0
- data/lib/ferret/index/term_infos_io.rb +312 -0
- data/lib/ferret/index/term_vector_offset_info.rb +20 -0
- data/lib/ferret/index/term_vectors_io.rb +552 -0
- data/lib/ferret/query_parser.rb +274 -0
- data/lib/ferret/query_parser/query_parser.tab.rb +819 -0
- data/lib/ferret/search.rb +49 -0
- data/lib/ferret/search/boolean_clause.rb +100 -0
- data/lib/ferret/search/boolean_query.rb +303 -0
- data/lib/ferret/search/boolean_scorer.rb +294 -0
- data/lib/ferret/search/caching_wrapper_filter.rb +40 -0
- data/lib/ferret/search/conjunction_scorer.rb +99 -0
- data/lib/ferret/search/disjunction_sum_scorer.rb +203 -0
- data/lib/ferret/search/exact_phrase_scorer.rb +32 -0
- data/lib/ferret/search/explanation.rb +41 -0
- data/lib/ferret/search/field_cache.rb +216 -0
- data/lib/ferret/search/field_doc.rb +31 -0
- data/lib/ferret/search/field_sorted_hit_queue.rb +184 -0
- data/lib/ferret/search/filter.rb +11 -0
- data/lib/ferret/search/filtered_query.rb +130 -0
- data/lib/ferret/search/filtered_term_enum.rb +79 -0
- data/lib/ferret/search/fuzzy_query.rb +153 -0
- data/lib/ferret/search/fuzzy_term_enum.rb +244 -0
- data/lib/ferret/search/hit_collector.rb +34 -0
- data/lib/ferret/search/hit_queue.rb +11 -0
- data/lib/ferret/search/index_searcher.rb +173 -0
- data/lib/ferret/search/match_all_docs_query.rb +104 -0
- data/lib/ferret/search/multi_phrase_query.rb +204 -0
- data/lib/ferret/search/multi_term_query.rb +65 -0
- data/lib/ferret/search/non_matching_scorer.rb +22 -0
- data/lib/ferret/search/phrase_positions.rb +55 -0
- data/lib/ferret/search/phrase_query.rb +217 -0
- data/lib/ferret/search/phrase_scorer.rb +153 -0
- data/lib/ferret/search/prefix_query.rb +47 -0
- data/lib/ferret/search/query.rb +111 -0
- data/lib/ferret/search/query_filter.rb +51 -0
- data/lib/ferret/search/range_filter.rb +103 -0
- data/lib/ferret/search/range_query.rb +139 -0
- data/lib/ferret/search/req_excl_scorer.rb +125 -0
- data/lib/ferret/search/req_opt_sum_scorer.rb +70 -0
- data/lib/ferret/search/score_doc.rb +38 -0
- data/lib/ferret/search/score_doc_comparator.rb +114 -0
- data/lib/ferret/search/scorer.rb +91 -0
- data/lib/ferret/search/similarity.rb +278 -0
- data/lib/ferret/search/sloppy_phrase_scorer.rb +47 -0
- data/lib/ferret/search/sort.rb +105 -0
- data/lib/ferret/search/sort_comparator.rb +60 -0
- data/lib/ferret/search/sort_field.rb +87 -0
- data/lib/ferret/search/spans.rb +12 -0
- data/lib/ferret/search/spans/near_spans_enum.rb +304 -0
- data/lib/ferret/search/spans/span_first_query.rb +79 -0
- data/lib/ferret/search/spans/span_near_query.rb +108 -0
- data/lib/ferret/search/spans/span_not_query.rb +130 -0
- data/lib/ferret/search/spans/span_or_query.rb +176 -0
- data/lib/ferret/search/spans/span_query.rb +25 -0
- data/lib/ferret/search/spans/span_scorer.rb +74 -0
- data/lib/ferret/search/spans/span_term_query.rb +105 -0
- data/lib/ferret/search/spans/span_weight.rb +84 -0
- data/lib/ferret/search/spans/spans_enum.rb +44 -0
- data/lib/ferret/search/term_query.rb +128 -0
- data/lib/ferret/search/term_scorer.rb +181 -0
- data/lib/ferret/search/top_docs.rb +24 -0
- data/lib/ferret/search/top_field_docs.rb +17 -0
- data/lib/ferret/search/weight.rb +54 -0
- data/lib/ferret/search/wildcard_query.rb +26 -0
- data/lib/ferret/search/wildcard_term_enum.rb +61 -0
- data/lib/ferret/stemmers.rb +1 -0
- data/lib/ferret/stemmers/porter_stemmer.rb +218 -0
- data/lib/ferret/store.rb +5 -0
- data/lib/ferret/store/buffered_index_io.rb +191 -0
- data/lib/ferret/store/directory.rb +139 -0
- data/lib/ferret/store/fs_store.rb +338 -0
- data/lib/ferret/store/index_io.rb +259 -0
- data/lib/ferret/store/ram_store.rb +282 -0
- data/lib/ferret/utils.rb +7 -0
- data/lib/ferret/utils/bit_vector.rb +105 -0
- data/lib/ferret/utils/date_tools.rb +138 -0
- data/lib/ferret/utils/number_tools.rb +91 -0
- data/lib/ferret/utils/parameter.rb +41 -0
- data/lib/ferret/utils/priority_queue.rb +120 -0
- data/lib/ferret/utils/string_helper.rb +47 -0
- data/lib/ferret/utils/weak_key_hash.rb +51 -0
- data/rake_utils/code_statistics.rb +106 -0
- data/setup.rb +1551 -0
- data/test/benchmark/tb_ram_store.rb +76 -0
- data/test/benchmark/tb_rw_vint.rb +26 -0
- data/test/longrunning/tc_numbertools.rb +60 -0
- data/test/longrunning/tm_store.rb +19 -0
- data/test/test_all.rb +9 -0
- data/test/test_helper.rb +6 -0
- data/test/unit/analysis/tc_analyzer.rb +21 -0
- data/test/unit/analysis/tc_letter_tokenizer.rb +20 -0
- data/test/unit/analysis/tc_lower_case_filter.rb +20 -0
- data/test/unit/analysis/tc_lower_case_tokenizer.rb +27 -0
- data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +39 -0
- data/test/unit/analysis/tc_porter_stem_filter.rb +16 -0
- data/test/unit/analysis/tc_standard_analyzer.rb +20 -0
- data/test/unit/analysis/tc_standard_tokenizer.rb +20 -0
- data/test/unit/analysis/tc_stop_analyzer.rb +20 -0
- data/test/unit/analysis/tc_stop_filter.rb +14 -0
- data/test/unit/analysis/tc_white_space_analyzer.rb +21 -0
- data/test/unit/analysis/tc_white_space_tokenizer.rb +20 -0
- data/test/unit/analysis/tc_word_list_loader.rb +32 -0
- data/test/unit/document/tc_document.rb +47 -0
- data/test/unit/document/tc_field.rb +80 -0
- data/test/unit/index/tc_compound_file_io.rb +107 -0
- data/test/unit/index/tc_field_infos.rb +119 -0
- data/test/unit/index/tc_fields_io.rb +167 -0
- data/test/unit/index/tc_index.rb +140 -0
- data/test/unit/index/tc_index_reader.rb +622 -0
- data/test/unit/index/tc_index_writer.rb +57 -0
- data/test/unit/index/tc_multiple_term_doc_pos_enum.rb +80 -0
- data/test/unit/index/tc_segment_infos.rb +74 -0
- data/test/unit/index/tc_segment_term_docs.rb +17 -0
- data/test/unit/index/tc_segment_term_enum.rb +60 -0
- data/test/unit/index/tc_segment_term_vector.rb +71 -0
- data/test/unit/index/tc_term.rb +22 -0
- data/test/unit/index/tc_term_buffer.rb +57 -0
- data/test/unit/index/tc_term_info.rb +19 -0
- data/test/unit/index/tc_term_infos_io.rb +192 -0
- data/test/unit/index/tc_term_vector_offset_info.rb +18 -0
- data/test/unit/index/tc_term_vectors_io.rb +108 -0
- data/test/unit/index/th_doc.rb +244 -0
- data/test/unit/query_parser/tc_query_parser.rb +84 -0
- data/test/unit/search/tc_filter.rb +113 -0
- data/test/unit/search/tc_fuzzy_query.rb +136 -0
- data/test/unit/search/tc_index_searcher.rb +188 -0
- data/test/unit/search/tc_search_and_sort.rb +98 -0
- data/test/unit/search/tc_similarity.rb +37 -0
- data/test/unit/search/tc_sort.rb +48 -0
- data/test/unit/search/tc_sort_field.rb +27 -0
- data/test/unit/search/tc_spans.rb +153 -0
- data/test/unit/store/tc_fs_store.rb +84 -0
- data/test/unit/store/tc_ram_store.rb +35 -0
- data/test/unit/store/tm_store.rb +180 -0
- data/test/unit/store/tm_store_lock.rb +68 -0
- data/test/unit/ts_analysis.rb +16 -0
- data/test/unit/ts_document.rb +4 -0
- data/test/unit/ts_index.rb +18 -0
- data/test/unit/ts_query_parser.rb +3 -0
- data/test/unit/ts_search.rb +10 -0
- data/test/unit/ts_store.rb +6 -0
- data/test/unit/ts_utils.rb +10 -0
- data/test/unit/utils/tc_bit_vector.rb +65 -0
- data/test/unit/utils/tc_date_tools.rb +50 -0
- data/test/unit/utils/tc_number_tools.rb +59 -0
- data/test/unit/utils/tc_parameter.rb +40 -0
- data/test/unit/utils/tc_priority_queue.rb +62 -0
- data/test/unit/utils/tc_string_helper.rb +21 -0
- data/test/unit/utils/tc_weak_key_hash.rb +25 -0
- metadata +251 -0
@@ -0,0 +1,57 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../../test_helper"
|
2
|
+
|
3
|
+
|
4
|
+
class IndexWriterTest < Test::Unit::TestCase
|
5
|
+
include Ferret::Index
|
6
|
+
include Ferret::Analysis
|
7
|
+
|
8
|
+
def setup()
|
9
|
+
@dir = Ferret::Store::RAMDirectory.new
|
10
|
+
end
|
11
|
+
|
12
|
+
def tear_down()
|
13
|
+
@dir.close()
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_initialize
|
17
|
+
wlock = @dir.make_lock(IndexWriter::WRITE_LOCK_NAME)
|
18
|
+
clock = @dir.make_lock(IndexWriter::COMMIT_LOCK_NAME)
|
19
|
+
assert(! wlock.locked?)
|
20
|
+
assert(! clock.locked?)
|
21
|
+
iw = IndexWriter.new(@dir, :create => true)
|
22
|
+
assert(@dir.exists?("segments"))
|
23
|
+
assert(wlock.locked?)
|
24
|
+
iw.close()
|
25
|
+
assert(@dir.exists?("segments"))
|
26
|
+
assert(! wlock.locked?)
|
27
|
+
assert(! clock.locked?)
|
28
|
+
end
|
29
|
+
|
30
|
+
def test_add_document
|
31
|
+
iw = IndexWriter.new(@dir, :analyzer => StandardAnalyzer.new(), :create => true)
|
32
|
+
doc = IndexTestHelper.prepare_document()
|
33
|
+
infos = FieldInfos.new
|
34
|
+
infos << doc
|
35
|
+
iw.add_document(doc)
|
36
|
+
assert_equal(1, iw.doc_count)
|
37
|
+
iw.close()
|
38
|
+
end
|
39
|
+
|
40
|
+
def test_add_documents
|
41
|
+
iw = IndexWriter.new(@dir, :analyzer => StandardAnalyzer.new(), :create => true)
|
42
|
+
# uncomment the following line to see logging
|
43
|
+
#iw.info_stream = $stdout
|
44
|
+
iw.merge_factor = 3
|
45
|
+
iw.min_merge_docs = 3
|
46
|
+
docs = IndexTestHelper.prepare_book_list()
|
47
|
+
infos = FieldInfos.new
|
48
|
+
infos << docs[0]
|
49
|
+
docs.each_with_index do |doc, i|
|
50
|
+
#puts "Index doc " + i.to_s
|
51
|
+
iw.add_document(doc)
|
52
|
+
end
|
53
|
+
assert_equal(37, iw.doc_count)
|
54
|
+
iw.close()
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../../test_helper"
|
2
|
+
|
3
|
+
class MultipleTermDocPosEnumTest < Test::Unit::TestCase
|
4
|
+
include Ferret::Search
|
5
|
+
include Ferret::Analysis
|
6
|
+
|
7
|
+
def setup()
|
8
|
+
@dir = Ferret::Store::RAMDirectory.new()
|
9
|
+
iw = IndexWriter.new(@dir, WhiteSpaceAnalyzer.new(), true, false)
|
10
|
+
@documents = IndexTestHelper.prepare_search_docs()
|
11
|
+
@documents.each { |doc| iw << doc }
|
12
|
+
iw.close()
|
13
|
+
@ir = IndexReader.open(@dir, true)
|
14
|
+
end
|
15
|
+
|
16
|
+
def tear_down()
|
17
|
+
@ir.close
|
18
|
+
end
|
19
|
+
|
20
|
+
def test_mtdpe()
|
21
|
+
t1 = Term.new("field", "red")
|
22
|
+
t2 = Term.new("field", "brown")
|
23
|
+
t3 = Term.new("field", "hairy")
|
24
|
+
mtdpe = MultipleTermDocPosEnum.new(@ir, [t1, t2, t3])
|
25
|
+
assert(mtdpe.next?)
|
26
|
+
assert_equal(1, mtdpe.doc)
|
27
|
+
assert_equal(1, mtdpe.freq)
|
28
|
+
assert_equal(4, mtdpe.next_position)
|
29
|
+
|
30
|
+
assert(mtdpe.next?)
|
31
|
+
assert_equal(8, mtdpe.doc)
|
32
|
+
assert_equal(1, mtdpe.freq)
|
33
|
+
assert_equal(5, mtdpe.next_position)
|
34
|
+
|
35
|
+
assert(mtdpe.next?)
|
36
|
+
assert_equal(11, mtdpe.doc)
|
37
|
+
assert_equal(1, mtdpe.freq)
|
38
|
+
assert_equal(4, mtdpe.next_position)
|
39
|
+
|
40
|
+
assert(mtdpe.next?)
|
41
|
+
assert_equal(14, mtdpe.doc)
|
42
|
+
assert_equal(1, mtdpe.freq)
|
43
|
+
assert_equal(4, mtdpe.next_position)
|
44
|
+
|
45
|
+
assert(mtdpe.next?)
|
46
|
+
assert_equal(16, mtdpe.doc)
|
47
|
+
assert_equal(3, mtdpe.freq)
|
48
|
+
assert_equal(5, mtdpe.next_position)
|
49
|
+
assert_equal(7, mtdpe.next_position)
|
50
|
+
assert_equal(11, mtdpe.next_position)
|
51
|
+
|
52
|
+
assert(mtdpe.next?)
|
53
|
+
assert_equal(17, mtdpe.doc)
|
54
|
+
assert_equal(2, mtdpe.freq)
|
55
|
+
assert_equal(2, mtdpe.next_position)
|
56
|
+
assert_equal(7, mtdpe.next_position)
|
57
|
+
|
58
|
+
assert(!mtdpe.next?)
|
59
|
+
mtdpe.close()
|
60
|
+
end
|
61
|
+
|
62
|
+
def test_tp
|
63
|
+
tp = @ir.term_positions_for(Term.new("field", "red"))
|
64
|
+
assert(tp.next?)
|
65
|
+
assert_equal(11, tp.doc)
|
66
|
+
assert_equal(1, tp.freq)
|
67
|
+
assert_equal(4, tp.next_position)
|
68
|
+
|
69
|
+
assert(tp.next?)
|
70
|
+
assert_equal(16, tp.doc)
|
71
|
+
assert_equal(1, tp.freq)
|
72
|
+
assert_equal(11, tp.next_position)
|
73
|
+
|
74
|
+
assert(tp.next?)
|
75
|
+
assert_equal(17, tp.doc)
|
76
|
+
assert_equal(1, tp.freq)
|
77
|
+
assert_equal(7, tp.next_position)
|
78
|
+
tp.close()
|
79
|
+
end
|
80
|
+
end
|
@@ -0,0 +1,74 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../../test_helper"
|
2
|
+
|
3
|
+
|
4
|
+
class SegmentInfosTest < Test::Unit::TestCase
|
5
|
+
include Ferret::Index
|
6
|
+
|
7
|
+
def setup()
|
8
|
+
@dir = Ferret::Store::RAMDirectory.new
|
9
|
+
end
|
10
|
+
|
11
|
+
def tear_down()
|
12
|
+
@dir.close()
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_read_write
|
16
|
+
assert_equal(0, SegmentInfos.read_current_version(@dir))
|
17
|
+
sis = SegmentInfos.new()
|
18
|
+
seg0 = SegmentInfo.new('seg0', 5, @dir)
|
19
|
+
seg1 = SegmentInfo.new('seg1', 5, @dir)
|
20
|
+
seg2 = SegmentInfo.new('seg2', 5, @dir)
|
21
|
+
seg3 = SegmentInfo.new('seg3', 5, @dir)
|
22
|
+
sis << seg0
|
23
|
+
sis << seg1
|
24
|
+
sis << seg2
|
25
|
+
assert_equal(sis.size(), 3)
|
26
|
+
assert_equal(sis[0], seg0)
|
27
|
+
assert_equal(sis[2], seg2)
|
28
|
+
sis.write(@dir)
|
29
|
+
version = SegmentInfos.read_current_version(@dir)
|
30
|
+
assert(@dir.exists?('segments'))
|
31
|
+
sis2 = SegmentInfos.new()
|
32
|
+
sis2.read(@dir)
|
33
|
+
assert_equal(sis2.size(), 3)
|
34
|
+
assert_equal(sis2[0], seg0)
|
35
|
+
assert_equal(sis2[2], seg2)
|
36
|
+
sis2 << seg3
|
37
|
+
sis2.write(@dir)
|
38
|
+
assert_equal(version + 1, SegmentInfos.read_current_version(@dir))
|
39
|
+
sis3 = SegmentInfos.new()
|
40
|
+
sis3.read(@dir)
|
41
|
+
assert_equal(sis3.size(), 4)
|
42
|
+
assert_equal(sis2[0], seg0)
|
43
|
+
assert_equal(sis2[3], seg3)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
class SegmentInfoTest < Test::Unit::TestCase
|
48
|
+
include Ferret::Index
|
49
|
+
|
50
|
+
def setup()
|
51
|
+
@dir = Ferret::Store::RAMDirectory.new
|
52
|
+
end
|
53
|
+
|
54
|
+
def tear_down()
|
55
|
+
@dir.close()
|
56
|
+
end
|
57
|
+
|
58
|
+
# just test getters and setters. Nothing else.
|
59
|
+
def test_segment_info
|
60
|
+
si = SegmentInfo.new("seg1", 0, @dir)
|
61
|
+
assert_equal(si.directory, @dir)
|
62
|
+
assert_equal(si.doc_count, 0)
|
63
|
+
assert_equal(si.name, "seg1")
|
64
|
+
@dir.close()
|
65
|
+
@dpath = File.dirname(__FILE__) + '/../../temp/fsdir'
|
66
|
+
@dir = Ferret::Store::FSDirectory.get_directory(@dpath, true)
|
67
|
+
si.name = "seg2"
|
68
|
+
si.doc_count += 2
|
69
|
+
si.directory = @dir
|
70
|
+
assert_equal(si.directory, @dir)
|
71
|
+
assert_equal(si.doc_count, 2)
|
72
|
+
assert_equal(si.name, "seg2")
|
73
|
+
end
|
74
|
+
end
|
@@ -0,0 +1,17 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../../test_helper"
|
2
|
+
|
3
|
+
class SegmentTermDocEnumTest < Test::Unit::TestCase
|
4
|
+
|
5
|
+
include Ferret::Index
|
6
|
+
include Ferret::Analysis
|
7
|
+
|
8
|
+
def setup()
|
9
|
+
@dir = Ferret::Store::RAMDirectory.new
|
10
|
+
@doc = IndexTestHelper.prepare_document()
|
11
|
+
IndexTestHelper.write_document(@dir, @doc)
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_something()
|
15
|
+
assert true
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../../test_helper"
|
2
|
+
|
3
|
+
|
4
|
+
class SegmentTermEnumTest < Test::Unit::TestCase
|
5
|
+
include Ferret::Index
|
6
|
+
|
7
|
+
TEST_SEGMENT = "_test"
|
8
|
+
|
9
|
+
def setup()
|
10
|
+
@dir = Ferret::Store::RAMDirectory.new
|
11
|
+
end
|
12
|
+
|
13
|
+
def test_initialize()
|
14
|
+
fis = FieldInfos.new
|
15
|
+
fis.add("author", true, true)
|
16
|
+
fis.add("title", true, true)
|
17
|
+
tiw = TermInfosWriter.new(@dir, TEST_SEGMENT, fis, 128)
|
18
|
+
terms = [ Term.new("author", "Martel"),
|
19
|
+
Term.new("title", "Life of Pi"),
|
20
|
+
Term.new("author", "Martin"),
|
21
|
+
Term.new("title", "Life on the edge") ].sort
|
22
|
+
term_infos = []
|
23
|
+
4.times {|i| term_infos << TermInfo.new(i,i,i,0)}
|
24
|
+
4.times {|i| tiw.add(terms[i], term_infos[i]) }
|
25
|
+
tiw.close()
|
26
|
+
|
27
|
+
tis_file = @dir.open_input(TEST_SEGMENT + ".tis")
|
28
|
+
|
29
|
+
ste = SegmentTermEnum.new(tis_file, fis, false)
|
30
|
+
assert_equal(128, ste.index_interval)
|
31
|
+
assert_equal(16, ste.skip_interval)
|
32
|
+
assert_equal(4, ste.size)
|
33
|
+
assert(ste.next?)
|
34
|
+
assert_equal(terms[0], ste.term)
|
35
|
+
assert_equal(term_infos[0], ste.term_info)
|
36
|
+
ti = TermInfo.new
|
37
|
+
ste.term_info = ti
|
38
|
+
assert_equal(term_infos[0], ti)
|
39
|
+
assert(ste.next?)
|
40
|
+
assert_equal(terms[0], ste.prev)
|
41
|
+
assert_equal(terms[1], ste.term)
|
42
|
+
assert_equal(term_infos[1], ste.term_info)
|
43
|
+
assert(ste.next?)
|
44
|
+
assert_equal(terms[2], ste.term)
|
45
|
+
assert_equal(term_infos[2], ste.term_info)
|
46
|
+
assert(ste.next?)
|
47
|
+
assert_equal(terms[3], ste.term)
|
48
|
+
assert_equal(term_infos[3], ste.term_info)
|
49
|
+
ste.close()
|
50
|
+
|
51
|
+
tii_file = @dir.open_input(TEST_SEGMENT + ".tii")
|
52
|
+
|
53
|
+
ste = SegmentTermEnum.new(tii_file, fis, false)
|
54
|
+
assert_equal(128, ste.index_interval)
|
55
|
+
assert_equal(16, ste.skip_interval)
|
56
|
+
assert_equal(1, ste.size)
|
57
|
+
assert(ste.next?)
|
58
|
+
assert(Term.new("", ""), ste.term)
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,71 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../../test_helper"
|
2
|
+
|
3
|
+
|
4
|
+
class SegmentTermVectorTest < Test::Unit::TestCase
|
5
|
+
|
6
|
+
include Ferret::Index
|
7
|
+
|
8
|
+
def setup()
|
9
|
+
@terms = ["Apples", "Oranges", "Bananas", "Kiwis", "Mandarins"]
|
10
|
+
term_freqs = [4,2,1,12,4]
|
11
|
+
@stv = SegmentTermVector.new("Fruits", @terms, term_freqs)
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_size()
|
15
|
+
assert_equal(@terms.size(), @stv.size())
|
16
|
+
end
|
17
|
+
|
18
|
+
def test_index_of()
|
19
|
+
assert_equal(0, @stv.index_of("Apples"))
|
20
|
+
assert_equal(4, @stv.term_frequencies[@stv.index_of("Apples")])
|
21
|
+
end
|
22
|
+
|
23
|
+
def test_indexes_of()
|
24
|
+
assert_equal([2, 0, 3], @stv.indexes_of(["Bananas", "Apples", "Kiwis"], 0, 3))
|
25
|
+
assert_equal([0, 3], @stv.indexes_of(["Bananas", "Apples", "Kiwis"], 1, 2))
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
class SegmentTermVectorWithPosOffsetsTest < Test::Unit::TestCase
|
30
|
+
|
31
|
+
include Ferret::Index
|
32
|
+
|
33
|
+
def setup()
|
34
|
+
@terms = ["Apples", "Oranges", "Bananas", "Kiwis", "Mandarins"]
|
35
|
+
term_freqs = [4,2,1,12,4]
|
36
|
+
term_positions = [
|
37
|
+
[1,3,5,7],
|
38
|
+
[2,4],
|
39
|
+
[6],
|
40
|
+
[8,9,10,12,13,14,16,17,18,20,21,22],
|
41
|
+
[11,15,19,23]
|
42
|
+
]
|
43
|
+
term_offsets = [
|
44
|
+
[[1,4],[10,14],[20,24],[30,34]],
|
45
|
+
[[5,9],[15,19]],
|
46
|
+
[[25,29]],
|
47
|
+
[[35,39],[40,44],[45,49],[55,59],[60,64],[65,69],[75,79],[80,84],[85,89],[95,99],[100,104],[105,109]],
|
48
|
+
[[50,54],[70,74],[90,94],[110,114]]
|
49
|
+
]
|
50
|
+
@stv = SegmentTermVector.new("Fruits", @terms, term_freqs, term_positions, term_offsets)
|
51
|
+
end
|
52
|
+
|
53
|
+
def test_size()
|
54
|
+
assert_equal(@terms.size(), @stv.size())
|
55
|
+
end
|
56
|
+
|
57
|
+
def test_index_of()
|
58
|
+
assert_equal(0, @stv.index_of("Apples"))
|
59
|
+
assert_equal(4, @stv.term_frequencies[@stv.index_of("Apples")])
|
60
|
+
end
|
61
|
+
|
62
|
+
def test_indexes_of()
|
63
|
+
assert_equal([2, 0, 3], @stv.indexes_of(["Bananas", "Apples", "Kiwis"], 0, 3))
|
64
|
+
assert_equal([0, 3], @stv.indexes_of(["Bananas", "Apples", "Kiwis"], 1, 2))
|
65
|
+
end
|
66
|
+
|
67
|
+
def test_positions_offsets()
|
68
|
+
assert_equal([1,3,5,7], @stv.positions[@stv.index_of("Apples")])
|
69
|
+
assert_equal([[35,39],[40,44],[45,49],[55,59],[60,64],[65,69],[75,79],[80,84],[85,89],[95,99],[100,104],[105,109]], @stv.offsets[@stv.index_of("Kiwis")])
|
70
|
+
end
|
71
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../../test_helper"
|
2
|
+
|
3
|
+
|
4
|
+
class TermTest < Test::Unit::TestCase
|
5
|
+
include Ferret::Index
|
6
|
+
def test_term()
|
7
|
+
term1 = Term.new("bfield1", "athis is text1")
|
8
|
+
assert_equal(term1.field, "bfield1")
|
9
|
+
assert_equal(term1.text, "athis is text1")
|
10
|
+
term2 = Term.new("afield2", "athis is text1")
|
11
|
+
term3 = Term.new("bfield1", "bthis is text2")
|
12
|
+
term4 = Term.new("bfield1", "athis is text1")
|
13
|
+
assert(term1 > term2)
|
14
|
+
assert(term1 < term3)
|
15
|
+
assert(term1.between?(term2, term3))
|
16
|
+
assert(term1 == term4)
|
17
|
+
assert(term1.eql?(term4))
|
18
|
+
term4.set!("field3", "text3")
|
19
|
+
assert(term1 != term4)
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../../test_helper"
|
2
|
+
|
3
|
+
|
4
|
+
class TermBufferTest < Test::Unit::TestCase
|
5
|
+
include Ferret::Index
|
6
|
+
def test_term_set()
|
7
|
+
t = Term.new("title", "Ferret Tutorial")
|
8
|
+
tb = TermBuffer.new
|
9
|
+
tb.term = t
|
10
|
+
assert_equal(t.field, tb.field)
|
11
|
+
assert_equal("Ferret Tutorial", tb.text_str)
|
12
|
+
assert_equal("Ferret Tutorial".length, tb.text_length)
|
13
|
+
assert_equal(t, tb.term)
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_set()
|
17
|
+
tb = TermBuffer.new
|
18
|
+
tb.term = Term.new("title", "Ferret Tutorial")
|
19
|
+
tb2 = TermBuffer.new
|
20
|
+
tb2.set!(tb)
|
21
|
+
assert_equal(tb.field, tb2.field)
|
22
|
+
assert_equal("Ferret Tutorial", tb2.text_str)
|
23
|
+
assert_equal("Ferret Tutorial".length, tb2.text_length)
|
24
|
+
assert_equal(tb.term, tb2.term)
|
25
|
+
end
|
26
|
+
|
27
|
+
def test_compare()
|
28
|
+
tb1 = TermBuffer.new
|
29
|
+
tb2 = TermBuffer.new
|
30
|
+
tb1.term = Term.new("alpha", "text")
|
31
|
+
tb2.term = Term.new("bravo", "text")
|
32
|
+
assert(tb1 < tb2)
|
33
|
+
tb2.term = Term.new("alpha", "text")
|
34
|
+
assert(tb1 == tb2)
|
35
|
+
tb2.term = Term.new("alpha", "tex")
|
36
|
+
assert(tb1 > tb2)
|
37
|
+
end
|
38
|
+
|
39
|
+
def test_read()
|
40
|
+
dir = Ferret::Store::RAMDirectory.new
|
41
|
+
fi = FieldInfos.new
|
42
|
+
tb = TermBuffer.new
|
43
|
+
tb.term = Term.new("Author", "Dave")
|
44
|
+
fi.add("Writer", true)
|
45
|
+
output = dir.create_output("term_buffer_read_test")
|
46
|
+
output.write_vint(4)
|
47
|
+
output.write_vint(8)
|
48
|
+
output.write_chars(" Balmain", 0, 8)
|
49
|
+
output.write_vint(fi.field_number("Writer"))
|
50
|
+
output.close
|
51
|
+
input = dir.open_input("term_buffer_read_test")
|
52
|
+
tb.read(input, fi)
|
53
|
+
assert_equal("Dave Balmain", tb.text_str)
|
54
|
+
assert_equal("Dave Balmain", tb.term.text)
|
55
|
+
assert_equal("Writer", tb.field)
|
56
|
+
end
|
57
|
+
end
|