ferret 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/MIT-LICENSE +20 -0
- data/README +109 -0
- data/Rakefile +275 -0
- data/TODO +9 -0
- data/TUTORIAL +197 -0
- data/ext/extconf.rb +3 -0
- data/ext/ferret.c +23 -0
- data/ext/ferret.h +85 -0
- data/ext/index_io.c +543 -0
- data/ext/priority_queue.c +227 -0
- data/ext/ram_directory.c +316 -0
- data/ext/segment_merge_queue.c +41 -0
- data/ext/string_helper.c +42 -0
- data/ext/tags +240 -0
- data/ext/term.c +261 -0
- data/ext/term_buffer.c +299 -0
- data/ext/util.c +12 -0
- data/lib/ferret.rb +41 -0
- data/lib/ferret/analysis.rb +11 -0
- data/lib/ferret/analysis/analyzers.rb +93 -0
- data/lib/ferret/analysis/standard_tokenizer.rb +65 -0
- data/lib/ferret/analysis/token.rb +79 -0
- data/lib/ferret/analysis/token_filters.rb +86 -0
- data/lib/ferret/analysis/token_stream.rb +26 -0
- data/lib/ferret/analysis/tokenizers.rb +107 -0
- data/lib/ferret/analysis/word_list_loader.rb +27 -0
- data/lib/ferret/document.rb +2 -0
- data/lib/ferret/document/document.rb +152 -0
- data/lib/ferret/document/field.rb +304 -0
- data/lib/ferret/index.rb +26 -0
- data/lib/ferret/index/compound_file_io.rb +343 -0
- data/lib/ferret/index/document_writer.rb +288 -0
- data/lib/ferret/index/field_infos.rb +259 -0
- data/lib/ferret/index/fields_io.rb +175 -0
- data/lib/ferret/index/index.rb +228 -0
- data/lib/ferret/index/index_file_names.rb +33 -0
- data/lib/ferret/index/index_reader.rb +462 -0
- data/lib/ferret/index/index_writer.rb +488 -0
- data/lib/ferret/index/multi_reader.rb +363 -0
- data/lib/ferret/index/multiple_term_doc_pos_enum.rb +105 -0
- data/lib/ferret/index/segment_infos.rb +130 -0
- data/lib/ferret/index/segment_merge_info.rb +47 -0
- data/lib/ferret/index/segment_merge_queue.rb +16 -0
- data/lib/ferret/index/segment_merger.rb +337 -0
- data/lib/ferret/index/segment_reader.rb +380 -0
- data/lib/ferret/index/segment_term_enum.rb +178 -0
- data/lib/ferret/index/segment_term_vector.rb +58 -0
- data/lib/ferret/index/term.rb +49 -0
- data/lib/ferret/index/term_buffer.rb +88 -0
- data/lib/ferret/index/term_doc_enum.rb +283 -0
- data/lib/ferret/index/term_enum.rb +52 -0
- data/lib/ferret/index/term_info.rb +41 -0
- data/lib/ferret/index/term_infos_io.rb +312 -0
- data/lib/ferret/index/term_vector_offset_info.rb +20 -0
- data/lib/ferret/index/term_vectors_io.rb +552 -0
- data/lib/ferret/query_parser.rb +274 -0
- data/lib/ferret/query_parser/query_parser.tab.rb +819 -0
- data/lib/ferret/search.rb +49 -0
- data/lib/ferret/search/boolean_clause.rb +100 -0
- data/lib/ferret/search/boolean_query.rb +303 -0
- data/lib/ferret/search/boolean_scorer.rb +294 -0
- data/lib/ferret/search/caching_wrapper_filter.rb +40 -0
- data/lib/ferret/search/conjunction_scorer.rb +99 -0
- data/lib/ferret/search/disjunction_sum_scorer.rb +203 -0
- data/lib/ferret/search/exact_phrase_scorer.rb +32 -0
- data/lib/ferret/search/explanation.rb +41 -0
- data/lib/ferret/search/field_cache.rb +216 -0
- data/lib/ferret/search/field_doc.rb +31 -0
- data/lib/ferret/search/field_sorted_hit_queue.rb +184 -0
- data/lib/ferret/search/filter.rb +11 -0
- data/lib/ferret/search/filtered_query.rb +130 -0
- data/lib/ferret/search/filtered_term_enum.rb +79 -0
- data/lib/ferret/search/fuzzy_query.rb +153 -0
- data/lib/ferret/search/fuzzy_term_enum.rb +244 -0
- data/lib/ferret/search/hit_collector.rb +34 -0
- data/lib/ferret/search/hit_queue.rb +11 -0
- data/lib/ferret/search/index_searcher.rb +173 -0
- data/lib/ferret/search/match_all_docs_query.rb +104 -0
- data/lib/ferret/search/multi_phrase_query.rb +204 -0
- data/lib/ferret/search/multi_term_query.rb +65 -0
- data/lib/ferret/search/non_matching_scorer.rb +22 -0
- data/lib/ferret/search/phrase_positions.rb +55 -0
- data/lib/ferret/search/phrase_query.rb +217 -0
- data/lib/ferret/search/phrase_scorer.rb +153 -0
- data/lib/ferret/search/prefix_query.rb +47 -0
- data/lib/ferret/search/query.rb +111 -0
- data/lib/ferret/search/query_filter.rb +51 -0
- data/lib/ferret/search/range_filter.rb +103 -0
- data/lib/ferret/search/range_query.rb +139 -0
- data/lib/ferret/search/req_excl_scorer.rb +125 -0
- data/lib/ferret/search/req_opt_sum_scorer.rb +70 -0
- data/lib/ferret/search/score_doc.rb +38 -0
- data/lib/ferret/search/score_doc_comparator.rb +114 -0
- data/lib/ferret/search/scorer.rb +91 -0
- data/lib/ferret/search/similarity.rb +278 -0
- data/lib/ferret/search/sloppy_phrase_scorer.rb +47 -0
- data/lib/ferret/search/sort.rb +105 -0
- data/lib/ferret/search/sort_comparator.rb +60 -0
- data/lib/ferret/search/sort_field.rb +87 -0
- data/lib/ferret/search/spans.rb +12 -0
- data/lib/ferret/search/spans/near_spans_enum.rb +304 -0
- data/lib/ferret/search/spans/span_first_query.rb +79 -0
- data/lib/ferret/search/spans/span_near_query.rb +108 -0
- data/lib/ferret/search/spans/span_not_query.rb +130 -0
- data/lib/ferret/search/spans/span_or_query.rb +176 -0
- data/lib/ferret/search/spans/span_query.rb +25 -0
- data/lib/ferret/search/spans/span_scorer.rb +74 -0
- data/lib/ferret/search/spans/span_term_query.rb +105 -0
- data/lib/ferret/search/spans/span_weight.rb +84 -0
- data/lib/ferret/search/spans/spans_enum.rb +44 -0
- data/lib/ferret/search/term_query.rb +128 -0
- data/lib/ferret/search/term_scorer.rb +181 -0
- data/lib/ferret/search/top_docs.rb +24 -0
- data/lib/ferret/search/top_field_docs.rb +17 -0
- data/lib/ferret/search/weight.rb +54 -0
- data/lib/ferret/search/wildcard_query.rb +26 -0
- data/lib/ferret/search/wildcard_term_enum.rb +61 -0
- data/lib/ferret/stemmers.rb +1 -0
- data/lib/ferret/stemmers/porter_stemmer.rb +218 -0
- data/lib/ferret/store.rb +5 -0
- data/lib/ferret/store/buffered_index_io.rb +191 -0
- data/lib/ferret/store/directory.rb +139 -0
- data/lib/ferret/store/fs_store.rb +338 -0
- data/lib/ferret/store/index_io.rb +259 -0
- data/lib/ferret/store/ram_store.rb +282 -0
- data/lib/ferret/utils.rb +7 -0
- data/lib/ferret/utils/bit_vector.rb +105 -0
- data/lib/ferret/utils/date_tools.rb +138 -0
- data/lib/ferret/utils/number_tools.rb +91 -0
- data/lib/ferret/utils/parameter.rb +41 -0
- data/lib/ferret/utils/priority_queue.rb +120 -0
- data/lib/ferret/utils/string_helper.rb +47 -0
- data/lib/ferret/utils/weak_key_hash.rb +51 -0
- data/rake_utils/code_statistics.rb +106 -0
- data/setup.rb +1551 -0
- data/test/benchmark/tb_ram_store.rb +76 -0
- data/test/benchmark/tb_rw_vint.rb +26 -0
- data/test/longrunning/tc_numbertools.rb +60 -0
- data/test/longrunning/tm_store.rb +19 -0
- data/test/test_all.rb +9 -0
- data/test/test_helper.rb +6 -0
- data/test/unit/analysis/tc_analyzer.rb +21 -0
- data/test/unit/analysis/tc_letter_tokenizer.rb +20 -0
- data/test/unit/analysis/tc_lower_case_filter.rb +20 -0
- data/test/unit/analysis/tc_lower_case_tokenizer.rb +27 -0
- data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +39 -0
- data/test/unit/analysis/tc_porter_stem_filter.rb +16 -0
- data/test/unit/analysis/tc_standard_analyzer.rb +20 -0
- data/test/unit/analysis/tc_standard_tokenizer.rb +20 -0
- data/test/unit/analysis/tc_stop_analyzer.rb +20 -0
- data/test/unit/analysis/tc_stop_filter.rb +14 -0
- data/test/unit/analysis/tc_white_space_analyzer.rb +21 -0
- data/test/unit/analysis/tc_white_space_tokenizer.rb +20 -0
- data/test/unit/analysis/tc_word_list_loader.rb +32 -0
- data/test/unit/document/tc_document.rb +47 -0
- data/test/unit/document/tc_field.rb +80 -0
- data/test/unit/index/tc_compound_file_io.rb +107 -0
- data/test/unit/index/tc_field_infos.rb +119 -0
- data/test/unit/index/tc_fields_io.rb +167 -0
- data/test/unit/index/tc_index.rb +140 -0
- data/test/unit/index/tc_index_reader.rb +622 -0
- data/test/unit/index/tc_index_writer.rb +57 -0
- data/test/unit/index/tc_multiple_term_doc_pos_enum.rb +80 -0
- data/test/unit/index/tc_segment_infos.rb +74 -0
- data/test/unit/index/tc_segment_term_docs.rb +17 -0
- data/test/unit/index/tc_segment_term_enum.rb +60 -0
- data/test/unit/index/tc_segment_term_vector.rb +71 -0
- data/test/unit/index/tc_term.rb +22 -0
- data/test/unit/index/tc_term_buffer.rb +57 -0
- data/test/unit/index/tc_term_info.rb +19 -0
- data/test/unit/index/tc_term_infos_io.rb +192 -0
- data/test/unit/index/tc_term_vector_offset_info.rb +18 -0
- data/test/unit/index/tc_term_vectors_io.rb +108 -0
- data/test/unit/index/th_doc.rb +244 -0
- data/test/unit/query_parser/tc_query_parser.rb +84 -0
- data/test/unit/search/tc_filter.rb +113 -0
- data/test/unit/search/tc_fuzzy_query.rb +136 -0
- data/test/unit/search/tc_index_searcher.rb +188 -0
- data/test/unit/search/tc_search_and_sort.rb +98 -0
- data/test/unit/search/tc_similarity.rb +37 -0
- data/test/unit/search/tc_sort.rb +48 -0
- data/test/unit/search/tc_sort_field.rb +27 -0
- data/test/unit/search/tc_spans.rb +153 -0
- data/test/unit/store/tc_fs_store.rb +84 -0
- data/test/unit/store/tc_ram_store.rb +35 -0
- data/test/unit/store/tm_store.rb +180 -0
- data/test/unit/store/tm_store_lock.rb +68 -0
- data/test/unit/ts_analysis.rb +16 -0
- data/test/unit/ts_document.rb +4 -0
- data/test/unit/ts_index.rb +18 -0
- data/test/unit/ts_query_parser.rb +3 -0
- data/test/unit/ts_search.rb +10 -0
- data/test/unit/ts_store.rb +6 -0
- data/test/unit/ts_utils.rb +10 -0
- data/test/unit/utils/tc_bit_vector.rb +65 -0
- data/test/unit/utils/tc_date_tools.rb +50 -0
- data/test/unit/utils/tc_number_tools.rb +59 -0
- data/test/unit/utils/tc_parameter.rb +40 -0
- data/test/unit/utils/tc_priority_queue.rb +62 -0
- data/test/unit/utils/tc_string_helper.rb +21 -0
- data/test/unit/utils/tc_weak_key_hash.rb +25 -0
- metadata +251 -0
@@ -0,0 +1,98 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../../test_helper"
|
2
|
+
|
3
|
+
class SearchAndSortTest < Test::Unit::TestCase
|
4
|
+
include Ferret::Document
|
5
|
+
include Ferret::Search
|
6
|
+
include Ferret::Store
|
7
|
+
include Ferret::Analysis
|
8
|
+
include Ferret::Index
|
9
|
+
|
10
|
+
def add_doc(hash, writer)
|
11
|
+
doc = Document.new()
|
12
|
+
hash.each_pair do |field, text|
|
13
|
+
doc << Field.new(field, text, Field::Store::NO, Field::Index::UNTOKENIZED)
|
14
|
+
end
|
15
|
+
doc.boost = hash["float"].to_f
|
16
|
+
writer << doc
|
17
|
+
end
|
18
|
+
|
19
|
+
def setup()
|
20
|
+
@dir = RAMDirectory.new()
|
21
|
+
iw = IndexWriter.new(@dir, :analyzer => WhiteSpaceAnalyzer.new(), :create => true)
|
22
|
+
docs = [ # len mod
|
23
|
+
{"search"=>"findall","string"=>"a","int"=>"6","float"=>"0.01"}, # 4 0
|
24
|
+
{"search"=>"findall","string"=>"c","int"=>"5","float"=>"0.1"}, # 3 3
|
25
|
+
{"search"=>"findall","string"=>"e","int"=>"2","float"=>"0.001"}, # 5 1
|
26
|
+
{"search"=>"findall","string"=>"g","int"=>"1","float"=>"1.0"}, # 3 3
|
27
|
+
{"search"=>"findall","string"=>"i","int"=>"3","float"=>"0.0001"}, # 6 2
|
28
|
+
{"search"=>"findall","string"=>"j","int"=>"4","float"=>"10.0"}, # 4 0
|
29
|
+
{"search"=>"findall","string"=>"h","int"=>"5","float"=>"0.00001"}, # 7 3
|
30
|
+
{"search"=>"findall","string"=>"f","int"=>"2","float"=>"100.0"}, # 5 1
|
31
|
+
{"search"=>"findall","string"=>"d","int"=>"3","float"=>"1000.0"}, # 6 2
|
32
|
+
{"search"=>"findall","string"=>"b","int"=>"4","float"=>"0.000001"} # 8 0
|
33
|
+
]
|
34
|
+
docs.each {|doc| add_doc(doc, iw)}
|
35
|
+
iw.close
|
36
|
+
end
|
37
|
+
|
38
|
+
def tear_down()
|
39
|
+
@dir.close()
|
40
|
+
end
|
41
|
+
|
42
|
+
def do_test_top_docs(is, query, expected, sort = nil)
|
43
|
+
top_docs = is.search(query, {:sort => sort})
|
44
|
+
top_docs.total_hits.times do |i|
|
45
|
+
assert_equal(expected[i], top_docs.score_docs[i].doc)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def test_sorts()
|
50
|
+
is = IndexSearcher.new(@dir)
|
51
|
+
q = TermQuery.new(Term.new("search", "findall"))
|
52
|
+
do_test_top_docs(is, q, [8,7,5,3,1,0,2,4,6,9])
|
53
|
+
do_test_top_docs(is, q, [8,7,5,3,1,0,2,4,6,9], Sort::RELEVANCE)
|
54
|
+
do_test_top_docs(is, q, [8,7,5,3,1,0,2,4,6,9], [SortField::FIELD_SCORE])
|
55
|
+
do_test_top_docs(is, q, [0,1,2,3,4,5,6,7,8,9], Sort::INDEX_ORDER)
|
56
|
+
do_test_top_docs(is, q, [0,1,2,3,4,5,6,7,8,9], [SortField::FIELD_DOC])
|
57
|
+
|
58
|
+
## int
|
59
|
+
sf_int = SortField.new("int", {:sort_type => SortField::SortType::INT})
|
60
|
+
do_test_top_docs(is, q, [0,1,6,5,9,4,8,2,7,3], [sf_int])
|
61
|
+
do_test_top_docs(is, q, [0,1,6,5,9,8,4,7,2,3], [sf_int, SortField::FIELD_SCORE])
|
62
|
+
sf_int = SortField.new("int", {:sort_type => SortField::SortType::INT, :reverse => true})
|
63
|
+
do_test_top_docs(is, q, [3,2,7,4,8,5,9,1,6,0], [sf_int])
|
64
|
+
|
65
|
+
## float
|
66
|
+
sf_float = SortField.new("float", {:sort_type => SortField::SortType::FLOAT})
|
67
|
+
do_test_top_docs(is, q, [8,7,5,3,1,0,2,4,6,9], Sort.new([sf_float, SortField::FIELD_SCORE]))
|
68
|
+
sf_float = SortField.new("float", {:sort_type => SortField::SortType::FLOAT, :reverse => true})
|
69
|
+
do_test_top_docs(is, q, [9,6,4,2,0,1,3,5,7,8], Sort.new([sf_float, SortField::FIELD_SCORE]))
|
70
|
+
|
71
|
+
## str
|
72
|
+
sf_str = SortField.new("string", {:sort_type => SortField::SortType::STRING})
|
73
|
+
do_test_top_docs(is, q, [0,9,1,8,2,7,3,6,4,5], [sf_str, SortField::FIELD_SCORE])
|
74
|
+
|
75
|
+
## auto
|
76
|
+
do_test_top_docs(is, q, [0,9,1,8,2,7,3,6,4,5], Sort.new("string"))
|
77
|
+
do_test_top_docs(is, q, [0,1,6,5,9,4,8,2,7,3], Sort.new(["int"]))
|
78
|
+
do_test_top_docs(is, q, [8,7,5,3,1,0,2,4,6,9], Sort.new("float"))
|
79
|
+
do_test_top_docs(is, q, [9,6,4,2,0,1,3,5,7,8], Sort.new("float", true))
|
80
|
+
do_test_top_docs(is, q, [0,1,6,9,5,8,4,2,7,3], Sort.new(["int", "string"]))
|
81
|
+
do_test_top_docs(is, q, [3,7,2,4,8,5,9,6,1,0], Sort.new(["int", "string"], true))
|
82
|
+
end
|
83
|
+
|
84
|
+
LENGTH = SortField::SortType.new("length", lambda{|str| str.length})
|
85
|
+
LENGTH_MODULO = SortField::SortType.new("length_mod", lambda{|str| str.length},
|
86
|
+
lambda{|i, j| (i%4) <=> (j%4)})
|
87
|
+
def test_special_sorts
|
88
|
+
is = IndexSearcher.new(@dir)
|
89
|
+
q = TermQuery.new(Term.new("search", "findall"))
|
90
|
+
sf = SortField.new("float", {:sort_type => LENGTH})
|
91
|
+
do_test_top_docs(is, q, [9,6,4,8,2,7,0,5,1,3], [sf])
|
92
|
+
sf = SortField.new("float", {:sort_type => LENGTH_MODULO})
|
93
|
+
do_test_top_docs(is, q, [1,3,6,4,8,2,7,0,5,9], [sf])
|
94
|
+
sf = SortField.new("float", {:sort_type => LENGTH,
|
95
|
+
:comparator => lambda{|i,j| (j%4) <=> (i%4)}})
|
96
|
+
do_test_top_docs(is, q, [0,5,9,2,7,4,8,1,3,6], [sf])
|
97
|
+
end
|
98
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../../test_helper"
|
2
|
+
|
3
|
+
|
4
|
+
class SimilarityTest < Test::Unit::TestCase
|
5
|
+
include Ferret::Search
|
6
|
+
include Ferret::Index
|
7
|
+
|
8
|
+
def test_byte_float_conversion()
|
9
|
+
256.times do |i|
|
10
|
+
assert_equal(i, Similarity.float_to_byte(Similarity.byte_to_float(i)))
|
11
|
+
assert_equal(Similarity.byte_to_float(i), Similarity::NORM_TABLE[i])
|
12
|
+
assert_equal(i, Similarity.encode_norm(Similarity.decode_norm(i)))
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def test_default_similarity
|
17
|
+
dsim = DefaultSimilarity.new()
|
18
|
+
assert_equal(1.0/4, dsim.length_norm("field", 16))
|
19
|
+
assert_equal(1.0/4, dsim.query_norm(16))
|
20
|
+
assert_equal(3.0, dsim.tf(9))
|
21
|
+
assert_equal(1.0/10, dsim.sloppy_freq(9))
|
22
|
+
assert_equal(1.0, dsim.idf(9, 10))
|
23
|
+
assert_equal(4.0, dsim.coord(12, 3))
|
24
|
+
searcher = Object.new
|
25
|
+
def searcher.doc_freq(term) 9 end
|
26
|
+
def searcher.max_doc() 10 end
|
27
|
+
term = Term.new("field", "text")
|
28
|
+
assert_equal(1.0, dsim.idf_term(term, searcher))
|
29
|
+
terms = [
|
30
|
+
Term.new("field1", "text1"),
|
31
|
+
Term.new("field1", "text2"),
|
32
|
+
Term.new("field2", "text3"),
|
33
|
+
Term.new("field2", "text4")
|
34
|
+
]
|
35
|
+
assert_equal(4.0, dsim.idf_phrase(terms, searcher))
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../../test_helper"
|
2
|
+
|
3
|
+
class SortTest < Test::Unit::TestCase
|
4
|
+
include Ferret::Search
|
5
|
+
|
6
|
+
def test_basic()
|
7
|
+
s = Sort::RELEVANCE
|
8
|
+
assert_equal(2, s.fields.size)
|
9
|
+
assert_equal(SortField::FIELD_SCORE, s.fields[0])
|
10
|
+
assert_equal(SortField::FIELD_DOC, s.fields[1])
|
11
|
+
|
12
|
+
s = Sort::INDEX_ORDER
|
13
|
+
assert_equal(1, s.fields.size)
|
14
|
+
assert_equal(SortField::FIELD_DOC, s.fields[0])
|
15
|
+
end
|
16
|
+
|
17
|
+
def test_string_init()
|
18
|
+
s = Sort.new("field")
|
19
|
+
assert_equal(2, s.fields.size)
|
20
|
+
assert_equal(SortField::SortType::AUTO, s.fields[0].sort_type)
|
21
|
+
assert_equal("field", s.fields[0].name)
|
22
|
+
assert_equal(SortField::FIELD_DOC, s.fields[1])
|
23
|
+
|
24
|
+
s = Sort.new(["field1", "field2", "field3"])
|
25
|
+
assert_equal(3, s.fields.size)
|
26
|
+
assert_equal(SortField::SortType::AUTO, s.fields[0].sort_type)
|
27
|
+
assert_equal("field1", s.fields[0].name)
|
28
|
+
assert_equal(SortField::SortType::AUTO, s.fields[1].sort_type)
|
29
|
+
assert_equal("field2", s.fields[1].name)
|
30
|
+
assert_equal(SortField::SortType::AUTO, s.fields[2].sort_type)
|
31
|
+
assert_equal("field3", s.fields[2].name)
|
32
|
+
end
|
33
|
+
|
34
|
+
def test_multi_fields()
|
35
|
+
sf1 = SortField.new("field", {:sort_type => SortField::SortType::INT,
|
36
|
+
:reverse => true})
|
37
|
+
sf2 = SortField::FIELD_SCORE
|
38
|
+
sf3 = SortField::FIELD_DOC
|
39
|
+
s = Sort.new([sf1, sf2, sf3])
|
40
|
+
|
41
|
+
assert_equal(3, s.fields.size)
|
42
|
+
assert_equal(SortField::SortType::INT, s.fields[0].sort_type)
|
43
|
+
assert_equal("field", s.fields[0].name)
|
44
|
+
assert(s.fields[0].reverse?)
|
45
|
+
assert_equal(SortField::FIELD_SCORE, s.fields[1])
|
46
|
+
assert_equal(SortField::FIELD_DOC, s.fields[2])
|
47
|
+
end
|
48
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../../test_helper"
|
2
|
+
|
3
|
+
class SortFieldTest < Test::Unit::TestCase
|
4
|
+
include Ferret::Search
|
5
|
+
|
6
|
+
def test_field_score()
|
7
|
+
fs = SortField::FIELD_SCORE
|
8
|
+
assert_equal(SortField::SortType::SCORE, fs.sort_type)
|
9
|
+
assert_nil(fs.name)
|
10
|
+
assert(!fs.reverse?, "FIELD_SCORE should not be reverse")
|
11
|
+
assert_nil(fs.comparator)
|
12
|
+
end
|
13
|
+
|
14
|
+
def test_field_doc()
|
15
|
+
fs = SortField::FIELD_DOC
|
16
|
+
assert_equal(SortField::SortType::DOC, fs.sort_type)
|
17
|
+
assert_nil(fs.name)
|
18
|
+
assert(!fs.reverse?, "FIELD_DOC should not be reverse")
|
19
|
+
assert_nil(fs.comparator)
|
20
|
+
end
|
21
|
+
|
22
|
+
def test_error_raised()
|
23
|
+
assert_raise(ArgumentError) {
|
24
|
+
fs = SortField.new(nil, {:sort_type => SortField::SortType::INT})
|
25
|
+
}
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,153 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../../test_helper"
|
2
|
+
|
3
|
+
|
4
|
+
class SpansBasicTest < Test::Unit::TestCase
|
5
|
+
include Ferret::Document
|
6
|
+
include Ferret::Search
|
7
|
+
include Ferret::Store
|
8
|
+
include Ferret::Index
|
9
|
+
include Ferret::Search::Spans
|
10
|
+
include Ferret::Analysis
|
11
|
+
|
12
|
+
def setup()
|
13
|
+
data = [
|
14
|
+
"start finish one two three four five six seven",
|
15
|
+
"start one finish two three four five six seven",
|
16
|
+
"start one two finish three four five six seven",
|
17
|
+
"start one two three finish four five six seven",
|
18
|
+
"start one two three four finish five six seven",
|
19
|
+
"start one two three four five finish six seven",
|
20
|
+
"start one two three four five six finish seven eight",
|
21
|
+
"start one two three four five six seven finish eight nine",
|
22
|
+
"start one two three four five six finish seven eight",
|
23
|
+
"start one two three four five finish six seven",
|
24
|
+
"start one two three four finish five six seven",
|
25
|
+
"start one two three finish four five six seven",
|
26
|
+
"start one two finish three four five six seven",
|
27
|
+
"start one finish two three four five six seven",
|
28
|
+
"start finish one two three four five six seven",
|
29
|
+
"start start one two three four five six seven",
|
30
|
+
"finish start one two three four five six seven",
|
31
|
+
"finish one start two three four five six seven",
|
32
|
+
"finish one two start three four five six seven",
|
33
|
+
"finish one two three start four five six seven",
|
34
|
+
"finish one two three four start five six seven",
|
35
|
+
"finish one two three four five start six seven",
|
36
|
+
"finish one two three four five six start seven eight",
|
37
|
+
"finish one two three four five six seven start eight nine",
|
38
|
+
"finish one two three four five six start seven eight",
|
39
|
+
"finish one two three four five start six seven",
|
40
|
+
"finish one two three four start five six seven",
|
41
|
+
"finish one two three start four five six seven",
|
42
|
+
"finish one two start three four five six seven",
|
43
|
+
"finish one start two three four five six seven",
|
44
|
+
"finish start one two three four five six seven"
|
45
|
+
]
|
46
|
+
@dir = RAMDirectory.new
|
47
|
+
iw = IndexWriter.new(@dir, :analyzer => WhiteSpaceAnalyzer.new(), :create => true)
|
48
|
+
data.each do |line|
|
49
|
+
doc = Document.new()
|
50
|
+
doc << Field.new("field", line, Field::Store::NO, Field::Index::TOKENIZED)
|
51
|
+
iw << doc
|
52
|
+
end
|
53
|
+
|
54
|
+
iw.close()
|
55
|
+
|
56
|
+
@is = IndexSearcher.new(@dir)
|
57
|
+
end
|
58
|
+
|
59
|
+
def tear_down()
|
60
|
+
@iw.close
|
61
|
+
@dir.close
|
62
|
+
end
|
63
|
+
|
64
|
+
def number_split(i)
|
65
|
+
if (i < 10)
|
66
|
+
return "<#{i}>"
|
67
|
+
elsif (i < 100)
|
68
|
+
return "<#{((i/10)*10)}> <#{i%10}>"
|
69
|
+
else
|
70
|
+
return "<#{((i/100)*100)}> <#{(((i%100)/10)*10)}> <#{i%10}>"
|
71
|
+
end
|
72
|
+
end
|
73
|
+
|
74
|
+
def check_hits(query, expected, test_explain = false, top=nil)
|
75
|
+
top_docs = @is.search(query, {:num_docs => expected.length})
|
76
|
+
assert_equal(expected.length, top_docs.score_docs.size)
|
77
|
+
assert_equal(top, top_docs.score_docs[0].doc) if top
|
78
|
+
assert_equal(expected.length, top_docs.total_hits)
|
79
|
+
top_docs.score_docs.each do |score_doc|
|
80
|
+
assert(expected.include?(score_doc.doc),
|
81
|
+
"#{score_doc.doc} was found unexpectedly")
|
82
|
+
if test_explain
|
83
|
+
assert(score_doc.score =~ @is.explain(query, score_doc.doc).value,
|
84
|
+
"Scores(#{score_doc.score} != #{@is.explain(query, score_doc.doc).value})")
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
def test_span_term_query()
|
90
|
+
tq = SpanTermQuery.new(Term.new("field", "nine"))
|
91
|
+
check_hits(tq, [7,23], true)
|
92
|
+
tq = SpanTermQuery.new(Term.new("field", "eight"))
|
93
|
+
check_hits(tq, [6,7,8,22,23,24])
|
94
|
+
end
|
95
|
+
|
96
|
+
def test_span_near_query()
|
97
|
+
tq1 = SpanTermQuery.new(Term.new("field", "start"))
|
98
|
+
tq2 = SpanTermQuery.new(Term.new("field", "finish"))
|
99
|
+
q = SpanNearQuery.new([tq1, tq2], 0, true)
|
100
|
+
check_hits(q, [0,14], true)
|
101
|
+
q = SpanNearQuery.new([tq1, tq2], 0, false)
|
102
|
+
check_hits(q, [0,14,16,30], true)
|
103
|
+
q = SpanNearQuery.new([tq1, tq2], 1, true)
|
104
|
+
check_hits(q, [0,1,13,14])
|
105
|
+
q = SpanNearQuery.new([tq1, tq2], 1, false)
|
106
|
+
check_hits(q, [0,1,13,14,16,17,29,30])
|
107
|
+
q = SpanNearQuery.new([tq1, tq2], 4, true)
|
108
|
+
check_hits(q, [0,1,2,3,4,10,11,12,13,14])
|
109
|
+
q = SpanNearQuery.new([tq1, tq2], 4, false)
|
110
|
+
check_hits(q, [0,1,2,3,4,10,11,12,13,14,16,17,18,19,20,26,27,28,29,30])
|
111
|
+
end
|
112
|
+
|
113
|
+
def test_span_not_query()
|
114
|
+
tq1 = SpanTermQuery.new(Term.new("field", "start"))
|
115
|
+
tq2 = SpanTermQuery.new(Term.new("field", "finish"))
|
116
|
+
tq3 = SpanTermQuery.new(Term.new("field", "two"))
|
117
|
+
tq4 = SpanTermQuery.new(Term.new("field", "five"))
|
118
|
+
nearq1 = SpanNearQuery.new([tq1, tq2], 4, true)
|
119
|
+
nearq2 = SpanNearQuery.new([tq3, tq4], 4, true)
|
120
|
+
q = SpanNotQuery.new(nearq1, nearq2)
|
121
|
+
check_hits(q, [0,1,13,14], true)
|
122
|
+
nearq1 = SpanNearQuery.new([tq1, tq2], 4, false)
|
123
|
+
q = SpanNotQuery.new(nearq1, nearq2)
|
124
|
+
check_hits(q, [0,1,13,14,16,17,29,30])
|
125
|
+
nearq1 = SpanNearQuery.new([tq1, tq3], 4, true)
|
126
|
+
nearq2 = SpanNearQuery.new([tq2, tq4], 8, false)
|
127
|
+
q = SpanNotQuery.new(nearq1, nearq2)
|
128
|
+
check_hits(q, [2,3,4,5,6,7,8,9,10,11,12,15])
|
129
|
+
end
|
130
|
+
|
131
|
+
def test_span_first_query()
|
132
|
+
finish_first = [16,17,18,19,20,21,22,23,24,25,26,27,28,29,30]
|
133
|
+
tq = SpanTermQuery.new(Term.new("field", "finish"))
|
134
|
+
q = SpanFirstQuery.new(tq, 1)
|
135
|
+
check_hits(q, finish_first, true)
|
136
|
+
q = SpanFirstQuery.new(tq, 5)
|
137
|
+
check_hits(q, [0,1,2,3,11,12,13,14]+finish_first, false)
|
138
|
+
end
|
139
|
+
|
140
|
+
def test_span_or_query_query()
|
141
|
+
tq1 = SpanTermQuery.new(Term.new("field", "start"))
|
142
|
+
tq2 = SpanTermQuery.new(Term.new("field", "finish"))
|
143
|
+
tq3 = SpanTermQuery.new(Term.new("field", "five"))
|
144
|
+
nearq1 = SpanNearQuery.new([tq1, tq2], 1, true)
|
145
|
+
nearq2 = SpanNearQuery.new([tq2, tq3], 0, false)
|
146
|
+
q = SpanOrQuery.new([nearq1, nearq2])
|
147
|
+
check_hits(q, [0,1,4,5,9,10,13,14], false)
|
148
|
+
nearq1 = SpanNearQuery.new([tq1, tq2], 0, false)
|
149
|
+
nearq2 = SpanNearQuery.new([tq2, tq3], 1, false)
|
150
|
+
q = SpanOrQuery.new([nearq1, nearq2])
|
151
|
+
check_hits(q, [0,3,4,5,6,8,9,10,11,14,16,30], false)
|
152
|
+
end
|
153
|
+
end
|
@@ -0,0 +1,84 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../../test_helper"
|
2
|
+
require File.dirname(__FILE__) + "/tm_store"
|
3
|
+
require File.dirname(__FILE__) + "/tm_store_lock"
|
4
|
+
|
5
|
+
module Ferret::Store
|
6
|
+
|
7
|
+
class FSDirectory
|
8
|
+
def FSDirectory.directory_cache
|
9
|
+
@@Directories
|
10
|
+
end
|
11
|
+
|
12
|
+
def ref_count
|
13
|
+
@ref_count
|
14
|
+
end
|
15
|
+
|
16
|
+
def get_lock_prefix
|
17
|
+
lock_prefix
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
class FSStoreTest < Test::Unit::TestCase
|
23
|
+
include Ferret::Store
|
24
|
+
include StoreTest
|
25
|
+
include StoreLockTest
|
26
|
+
def setup
|
27
|
+
@dpath = File.join(File.dirname(__FILE__),
|
28
|
+
'../../temp/fsdir')
|
29
|
+
@dir = FSDirectory.get_directory(@dpath, true)
|
30
|
+
end
|
31
|
+
|
32
|
+
def teardown
|
33
|
+
@dir.refresh()
|
34
|
+
@dir.close()
|
35
|
+
end
|
36
|
+
|
37
|
+
def test_cache
|
38
|
+
dir_path = File.join(File.dirname(__FILE__),
|
39
|
+
'/../../temp/cachetest')
|
40
|
+
assert(! FSDirectory.directory_cache[dir_path],
|
41
|
+
"this directory should not be cached yet")
|
42
|
+
@dir1 = FSDirectory.get_directory(dir_path, true)
|
43
|
+
assert(FSDirectory.directory_cache[dir_path],
|
44
|
+
"this directory should now be cached")
|
45
|
+
assert_equal(@dir1.ref_count, 1,
|
46
|
+
"There is one reference so the refcount should now be 1")
|
47
|
+
@dir2 = FSDirectory.get_directory(dir_path, true)
|
48
|
+
assert(@dir1 === @dir2,
|
49
|
+
"The directory should be cached so the same directory object should have been returned")
|
50
|
+
assert_equal(@dir1.ref_count, 2,
|
51
|
+
"There are two references so the refcount should now be 2")
|
52
|
+
@dir1.close
|
53
|
+
assert(FSDirectory.directory_cache[dir_path],
|
54
|
+
"this directory shouldn't have been removed yet")
|
55
|
+
assert_equal(@dir2.ref_count, 1,
|
56
|
+
"There is one reference so the refcount should now be 1")
|
57
|
+
@dir2.close
|
58
|
+
assert(! FSDirectory.directory_cache[dir_path],
|
59
|
+
"this directory should have been removed from the cache")
|
60
|
+
end
|
61
|
+
|
62
|
+
def test_fslock
|
63
|
+
name = "lfile"
|
64
|
+
lock_file_path = @dpath + "/" + @dir.get_lock_prefix() + name
|
65
|
+
if File.exists?(lock_file_path) then
|
66
|
+
File.delete(lock_file_path)
|
67
|
+
end
|
68
|
+
assert(! File.exists?(lock_file_path),
|
69
|
+
"There should be no lock file")
|
70
|
+
lock = @dir.make_lock(name)
|
71
|
+
assert(! File.exists?(lock_file_path),
|
72
|
+
"There should still be no lock file")
|
73
|
+
assert(! @dir.exists?(lock_file_path),
|
74
|
+
"The lock should be hidden by the FSDirectories directory scan")
|
75
|
+
assert(! lock.locked?, "lock shouldn't be locked yet")
|
76
|
+
lock.obtain
|
77
|
+
assert(lock.locked?, "lock should now be locked")
|
78
|
+
assert(File.exists?(lock_file_path),
|
79
|
+
"A lock file should have been created")
|
80
|
+
lock.release
|
81
|
+
assert(! lock.locked?, "lock should be freed again")
|
82
|
+
assert(! File.exists?(lock_file_path), "The lock file should have been deleted")
|
83
|
+
end
|
84
|
+
end
|