ferret 0.9.6 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/MIT-LICENSE +1 -1
- data/README +12 -24
- data/Rakefile +38 -54
- data/TODO +14 -17
- data/ext/analysis.c +982 -823
- data/ext/analysis.h +133 -76
- data/ext/array.c +96 -58
- data/ext/array.h +40 -13
- data/ext/bitvector.c +476 -118
- data/ext/bitvector.h +264 -22
- data/ext/compound_io.c +217 -229
- data/ext/defines.h +49 -0
- data/ext/document.c +107 -317
- data/ext/document.h +31 -65
- data/ext/except.c +81 -36
- data/ext/except.h +117 -55
- data/ext/extconf.rb +2 -9
- data/ext/ferret.c +211 -104
- data/ext/ferret.h +22 -11
- data/ext/filter.c +97 -82
- data/ext/fs_store.c +348 -367
- data/ext/global.c +226 -188
- data/ext/global.h +44 -26
- data/ext/hash.c +474 -391
- data/ext/hash.h +441 -68
- data/ext/hashset.c +124 -96
- data/ext/hashset.h +169 -20
- data/ext/helper.c +56 -5
- data/ext/helper.h +7 -0
- data/ext/inc/lang.h +29 -49
- data/ext/inc/threading.h +31 -0
- data/ext/ind.c +288 -278
- data/ext/ind.h +68 -0
- data/ext/index.c +5688 -0
- data/ext/index.h +663 -616
- data/ext/lang.h +29 -49
- data/ext/libstemmer.c +3 -3
- data/ext/mem_pool.c +84 -0
- data/ext/mem_pool.h +35 -0
- data/ext/posh.c +1006 -0
- data/ext/posh.h +1007 -0
- data/ext/priorityqueue.c +117 -194
- data/ext/priorityqueue.h +135 -39
- data/ext/q_boolean.c +1305 -1108
- data/ext/q_const_score.c +106 -93
- data/ext/q_filtered_query.c +138 -135
- data/ext/q_fuzzy.c +206 -242
- data/ext/q_match_all.c +94 -80
- data/ext/q_multi_term.c +663 -0
- data/ext/q_parser.c +667 -593
- data/ext/q_phrase.c +992 -555
- data/ext/q_prefix.c +72 -61
- data/ext/q_range.c +235 -210
- data/ext/q_span.c +1480 -1166
- data/ext/q_term.c +273 -246
- data/ext/q_wildcard.c +127 -114
- data/ext/r_analysis.c +1720 -711
- data/ext/r_index.c +3049 -0
- data/ext/r_qparser.c +433 -146
- data/ext/r_search.c +2934 -1993
- data/ext/r_store.c +372 -143
- data/ext/r_utils.c +941 -0
- data/ext/ram_store.c +330 -326
- data/ext/search.c +1291 -668
- data/ext/search.h +403 -702
- data/ext/similarity.c +91 -113
- data/ext/similarity.h +45 -30
- data/ext/sort.c +721 -484
- data/ext/stopwords.c +361 -273
- data/ext/store.c +556 -58
- data/ext/store.h +706 -126
- data/ext/tags +3578 -2780
- data/ext/term_vectors.c +352 -0
- data/ext/threading.h +31 -0
- data/ext/win32.h +54 -0
- data/lib/ferret.rb +5 -17
- data/lib/ferret/document.rb +130 -2
- data/lib/ferret/index.rb +577 -26
- data/lib/ferret/number_tools.rb +157 -0
- data/lib/ferret_version.rb +3 -0
- data/test/test_helper.rb +5 -13
- data/test/unit/analysis/tc_analyzer.rb +513 -1
- data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
- data/test/unit/index/tc_index.rb +183 -240
- data/test/unit/index/tc_index_reader.rb +312 -479
- data/test/unit/index/tc_index_writer.rb +397 -13
- data/test/unit/index/th_doc.rb +269 -206
- data/test/unit/query_parser/tc_query_parser.rb +40 -33
- data/test/unit/search/tc_filter.rb +59 -71
- data/test/unit/search/tc_fuzzy_query.rb +24 -16
- data/test/unit/search/tc_index_searcher.rb +23 -201
- data/test/unit/search/tc_multi_searcher.rb +78 -226
- data/test/unit/search/tc_search_and_sort.rb +93 -81
- data/test/unit/search/tc_sort.rb +23 -23
- data/test/unit/search/tc_sort_field.rb +7 -7
- data/test/unit/search/tc_spans.rb +51 -47
- data/test/unit/search/tm_searcher.rb +339 -0
- data/test/unit/store/tc_fs_store.rb +1 -1
- data/test/unit/store/tm_store_lock.rb +3 -3
- data/test/unit/tc_document.rb +81 -0
- data/test/unit/ts_analysis.rb +1 -1
- data/test/unit/ts_utils.rb +1 -1
- data/test/unit/utils/tc_bit_vector.rb +288 -0
- data/test/unit/utils/tc_number_tools.rb +117 -0
- data/test/unit/utils/tc_priority_queue.rb +106 -0
- metadata +140 -301
- data/CHANGELOG +0 -9
- data/ext/dummy.exe +0 -0
- data/ext/field.c +0 -408
- data/ext/frtio.h +0 -13
- data/ext/inc/except.h +0 -90
- data/ext/index_io.c +0 -382
- data/ext/index_rw.c +0 -2658
- data/ext/lang.c +0 -41
- data/ext/nix_io.c +0 -134
- data/ext/q_multi_phrase.c +0 -380
- data/ext/r_doc.c +0 -582
- data/ext/r_index_io.c +0 -1021
- data/ext/r_term.c +0 -219
- data/ext/term.c +0 -820
- data/ext/termdocs.c +0 -611
- data/ext/vector.c +0 -637
- data/ext/w32_io.c +0 -150
- data/lib/ferret/analysis.rb +0 -11
- data/lib/ferret/analysis/analyzers.rb +0 -112
- data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
- data/lib/ferret/analysis/token.rb +0 -100
- data/lib/ferret/analysis/token_filters.rb +0 -86
- data/lib/ferret/analysis/token_stream.rb +0 -26
- data/lib/ferret/analysis/tokenizers.rb +0 -112
- data/lib/ferret/analysis/word_list_loader.rb +0 -27
- data/lib/ferret/document/document.rb +0 -152
- data/lib/ferret/document/field.rb +0 -312
- data/lib/ferret/index/compound_file_io.rb +0 -338
- data/lib/ferret/index/document_writer.rb +0 -289
- data/lib/ferret/index/field_infos.rb +0 -279
- data/lib/ferret/index/fields_io.rb +0 -181
- data/lib/ferret/index/index.rb +0 -675
- data/lib/ferret/index/index_file_names.rb +0 -33
- data/lib/ferret/index/index_reader.rb +0 -503
- data/lib/ferret/index/index_writer.rb +0 -534
- data/lib/ferret/index/multi_reader.rb +0 -377
- data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
- data/lib/ferret/index/segment_infos.rb +0 -130
- data/lib/ferret/index/segment_merge_info.rb +0 -49
- data/lib/ferret/index/segment_merge_queue.rb +0 -16
- data/lib/ferret/index/segment_merger.rb +0 -358
- data/lib/ferret/index/segment_reader.rb +0 -412
- data/lib/ferret/index/segment_term_enum.rb +0 -169
- data/lib/ferret/index/segment_term_vector.rb +0 -58
- data/lib/ferret/index/term.rb +0 -53
- data/lib/ferret/index/term_buffer.rb +0 -83
- data/lib/ferret/index/term_doc_enum.rb +0 -291
- data/lib/ferret/index/term_enum.rb +0 -52
- data/lib/ferret/index/term_info.rb +0 -37
- data/lib/ferret/index/term_infos_io.rb +0 -321
- data/lib/ferret/index/term_vector_offset_info.rb +0 -20
- data/lib/ferret/index/term_vectors_io.rb +0 -553
- data/lib/ferret/query_parser.rb +0 -312
- data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
- data/lib/ferret/search.rb +0 -50
- data/lib/ferret/search/boolean_clause.rb +0 -100
- data/lib/ferret/search/boolean_query.rb +0 -299
- data/lib/ferret/search/boolean_scorer.rb +0 -294
- data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
- data/lib/ferret/search/conjunction_scorer.rb +0 -99
- data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
- data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
- data/lib/ferret/search/explanation.rb +0 -41
- data/lib/ferret/search/field_cache.rb +0 -215
- data/lib/ferret/search/field_doc.rb +0 -31
- data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
- data/lib/ferret/search/filter.rb +0 -11
- data/lib/ferret/search/filtered_query.rb +0 -130
- data/lib/ferret/search/filtered_term_enum.rb +0 -79
- data/lib/ferret/search/fuzzy_query.rb +0 -154
- data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
- data/lib/ferret/search/hit_collector.rb +0 -34
- data/lib/ferret/search/hit_queue.rb +0 -11
- data/lib/ferret/search/index_searcher.rb +0 -200
- data/lib/ferret/search/match_all_query.rb +0 -104
- data/lib/ferret/search/multi_phrase_query.rb +0 -216
- data/lib/ferret/search/multi_searcher.rb +0 -261
- data/lib/ferret/search/multi_term_query.rb +0 -65
- data/lib/ferret/search/non_matching_scorer.rb +0 -22
- data/lib/ferret/search/phrase_positions.rb +0 -55
- data/lib/ferret/search/phrase_query.rb +0 -214
- data/lib/ferret/search/phrase_scorer.rb +0 -152
- data/lib/ferret/search/prefix_query.rb +0 -54
- data/lib/ferret/search/query.rb +0 -140
- data/lib/ferret/search/query_filter.rb +0 -51
- data/lib/ferret/search/range_filter.rb +0 -103
- data/lib/ferret/search/range_query.rb +0 -139
- data/lib/ferret/search/req_excl_scorer.rb +0 -125
- data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
- data/lib/ferret/search/score_doc.rb +0 -38
- data/lib/ferret/search/score_doc_comparator.rb +0 -114
- data/lib/ferret/search/scorer.rb +0 -91
- data/lib/ferret/search/similarity.rb +0 -278
- data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
- data/lib/ferret/search/sort.rb +0 -112
- data/lib/ferret/search/sort_comparator.rb +0 -60
- data/lib/ferret/search/sort_field.rb +0 -91
- data/lib/ferret/search/spans.rb +0 -12
- data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
- data/lib/ferret/search/spans/span_first_query.rb +0 -79
- data/lib/ferret/search/spans/span_near_query.rb +0 -108
- data/lib/ferret/search/spans/span_not_query.rb +0 -130
- data/lib/ferret/search/spans/span_or_query.rb +0 -176
- data/lib/ferret/search/spans/span_query.rb +0 -25
- data/lib/ferret/search/spans/span_scorer.rb +0 -74
- data/lib/ferret/search/spans/span_term_query.rb +0 -105
- data/lib/ferret/search/spans/span_weight.rb +0 -84
- data/lib/ferret/search/spans/spans_enum.rb +0 -44
- data/lib/ferret/search/term_query.rb +0 -128
- data/lib/ferret/search/term_scorer.rb +0 -183
- data/lib/ferret/search/top_docs.rb +0 -36
- data/lib/ferret/search/top_field_docs.rb +0 -17
- data/lib/ferret/search/weight.rb +0 -54
- data/lib/ferret/search/wildcard_query.rb +0 -26
- data/lib/ferret/search/wildcard_term_enum.rb +0 -61
- data/lib/ferret/stemmers.rb +0 -1
- data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
- data/lib/ferret/store.rb +0 -5
- data/lib/ferret/store/buffered_index_io.rb +0 -190
- data/lib/ferret/store/directory.rb +0 -141
- data/lib/ferret/store/fs_store.rb +0 -381
- data/lib/ferret/store/index_io.rb +0 -245
- data/lib/ferret/store/ram_store.rb +0 -286
- data/lib/ferret/utils.rb +0 -8
- data/lib/ferret/utils/bit_vector.rb +0 -123
- data/lib/ferret/utils/date_tools.rb +0 -138
- data/lib/ferret/utils/number_tools.rb +0 -91
- data/lib/ferret/utils/parameter.rb +0 -41
- data/lib/ferret/utils/priority_queue.rb +0 -120
- data/lib/ferret/utils/string_helper.rb +0 -47
- data/lib/ferret/utils/thread_local.rb +0 -28
- data/lib/ferret/utils/weak_key_hash.rb +0 -60
- data/lib/rferret.rb +0 -37
- data/rake_utils/code_statistics.rb +0 -106
- data/test/benchmark/tb_ram_store.rb +0 -76
- data/test/benchmark/tb_rw_vint.rb +0 -26
- data/test/functional/thread_safety_index_test.rb +0 -81
- data/test/functional/thread_safety_test.rb +0 -137
- data/test/longrunning/tc_numbertools.rb +0 -60
- data/test/longrunning/tm_store.rb +0 -19
- data/test/unit/analysis/ctc_analyzer.rb +0 -532
- data/test/unit/analysis/data/wordfile +0 -6
- data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
- data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
- data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
- data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
- data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
- data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
- data/test/unit/analysis/rtc_stop_filter.rb +0 -14
- data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
- data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
- data/test/unit/analysis/tc_token.rb +0 -25
- data/test/unit/document/rtc_field.rb +0 -28
- data/test/unit/document/tc_document.rb +0 -47
- data/test/unit/document/tc_field.rb +0 -98
- data/test/unit/index/rtc_compound_file_io.rb +0 -107
- data/test/unit/index/rtc_field_infos.rb +0 -127
- data/test/unit/index/rtc_fields_io.rb +0 -167
- data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
- data/test/unit/index/rtc_segment_infos.rb +0 -74
- data/test/unit/index/rtc_segment_term_docs.rb +0 -17
- data/test/unit/index/rtc_segment_term_enum.rb +0 -60
- data/test/unit/index/rtc_segment_term_vector.rb +0 -71
- data/test/unit/index/rtc_term_buffer.rb +0 -57
- data/test/unit/index/rtc_term_info.rb +0 -19
- data/test/unit/index/rtc_term_infos_io.rb +0 -192
- data/test/unit/index/rtc_term_vectors_io.rb +0 -108
- data/test/unit/index/tc_term.rb +0 -27
- data/test/unit/index/tc_term_voi.rb +0 -18
- data/test/unit/search/rtc_similarity.rb +0 -37
- data/test/unit/search/rtc_sort_field.rb +0 -14
- data/test/unit/search/tc_multi_searcher2.rb +0 -126
- data/test/unit/store/rtc_fs_store.rb +0 -62
- data/test/unit/store/rtc_ram_store.rb +0 -15
- data/test/unit/store/rtm_store.rb +0 -150
- data/test/unit/store/rtm_store_lock.rb +0 -2
- data/test/unit/ts_document.rb +0 -2
- data/test/unit/utils/rtc_bit_vector.rb +0 -73
- data/test/unit/utils/rtc_date_tools.rb +0 -50
- data/test/unit/utils/rtc_number_tools.rb +0 -59
- data/test/unit/utils/rtc_parameter.rb +0 -40
- data/test/unit/utils/rtc_priority_queue.rb +0 -62
- data/test/unit/utils/rtc_string_helper.rb +0 -21
- data/test/unit/utils/rtc_thread.rb +0 -61
- data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
- data/test/utils/number_to_spoken.rb +0 -132
@@ -1,108 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + "/../../test_helper"
|
2
|
-
|
3
|
-
|
4
|
-
class TermVectorsIOTest < Test::Unit::TestCase
|
5
|
-
|
6
|
-
include Ferret::Index
|
7
|
-
|
8
|
-
def setup()
|
9
|
-
@dir = Ferret::Store::RAMDirectory.new
|
10
|
-
@fis = FieldInfos.new
|
11
|
-
@fis.add("field1", true, true, true, true)
|
12
|
-
@fis.add("field2", true, true)
|
13
|
-
end
|
14
|
-
|
15
|
-
def tear_down()
|
16
|
-
@dir.close()
|
17
|
-
end
|
18
|
-
|
19
|
-
def test_tv_io_add_fields()
|
20
|
-
tv_w = TermVectorsWriter.new(@dir, "_test", @fis)
|
21
|
-
tv_w.open_document
|
22
|
-
assert(tv_w.document_open?)
|
23
|
-
tv_w.open_field("field1")
|
24
|
-
tv_w.add_term("text1", 1, [1], [t(0,4)])
|
25
|
-
tv_w.add_term("text2", 2, [3,4], [t(5,10), t(11,16)])
|
26
|
-
tv_w.close_field()
|
27
|
-
tv_w.close_document()
|
28
|
-
tv_w.close()
|
29
|
-
|
30
|
-
tv_r = TermVectorsReader.new(@dir, "_test", @fis)
|
31
|
-
assert_equal(1, tv_r.size)
|
32
|
-
tv = tv_r.get_field_tv(0, "field1")
|
33
|
-
|
34
|
-
assert_equal(2, tv.size)
|
35
|
-
assert_equal("text1", tv.terms[0])
|
36
|
-
assert_equal(1, tv.freqs[0])
|
37
|
-
assert_equal(1, tv.positions[0][0])
|
38
|
-
assert_equal(t(0,4), tv.offsets[0][0])
|
39
|
-
|
40
|
-
assert_equal("text2", tv.terms[1])
|
41
|
-
assert_equal(2, tv.freqs[1])
|
42
|
-
assert_equal(3, tv.positions[1][0])
|
43
|
-
assert_equal(t(5,10), tv.offsets[1][0])
|
44
|
-
assert_equal(4, tv.positions[1][1])
|
45
|
-
assert_equal(t(11,16), tv.offsets[1][1])
|
46
|
-
tv_r.close
|
47
|
-
end
|
48
|
-
|
49
|
-
def test_tv_io_add_documents()
|
50
|
-
tvs1 = []
|
51
|
-
tvs2 = []
|
52
|
-
tv = SegmentTermVector.new("field1",
|
53
|
-
["word1", "word2"],
|
54
|
-
[3, 2],
|
55
|
-
[[1, 5, 8], [2, 9]],
|
56
|
-
[[t(0,5), t(34,39), t(45,50)],[t(6,11), t(51,56)]])
|
57
|
-
tvs1 << tv
|
58
|
-
tv = SegmentTermVector.new("field2",
|
59
|
-
["word3", "word4"],
|
60
|
-
[1, 5],
|
61
|
-
[[8], [2, 9, 11, 34, 56]],
|
62
|
-
[[t(45,50)], [t(6,10), t(51,56), t(64,69), t(103,108), t(183,188)]])
|
63
|
-
tvs1 << tv
|
64
|
-
tv_w = TermVectorsWriter.new(@dir, "_test", @fis)
|
65
|
-
tv = SegmentTermVector.new("field1",
|
66
|
-
["word1", "word2"],
|
67
|
-
[3, 2],
|
68
|
-
[[1, 5, 8], [2, 9]],
|
69
|
-
[[t(0,5), t(34,39), t(45,50)],[t(6,11), t(51,56)]])
|
70
|
-
tvs2 << tv
|
71
|
-
tv_w.add_all_doc_vectors(tvs1)
|
72
|
-
tv_w.add_all_doc_vectors(tvs2)
|
73
|
-
tv_w.close
|
74
|
-
tv_r = TermVectorsReader.new(@dir, "_test", @fis)
|
75
|
-
assert_equal(2, tv_r.size)
|
76
|
-
tv = tv_r.get_field_tv(0, "field1")
|
77
|
-
|
78
|
-
assert_equal(2, tv.size)
|
79
|
-
assert_equal("word1", tv.terms[0])
|
80
|
-
assert_equal(3, tv.freqs[0])
|
81
|
-
assert_equal(1, tv.positions[0][0])
|
82
|
-
assert_equal(5, tv.positions[0][1])
|
83
|
-
assert_equal(8, tv.positions[0][2])
|
84
|
-
assert_equal(t(0,5), tv.offsets[0][0])
|
85
|
-
assert_equal(t(34,39), tv.offsets[0][1])
|
86
|
-
assert_equal(t(45,50), tv.offsets[0][2])
|
87
|
-
|
88
|
-
assert_equal("word2", tv.terms[1])
|
89
|
-
assert_equal(2, tv.freqs[1])
|
90
|
-
assert_equal(2, tv.positions[1][0])
|
91
|
-
assert_equal(9, tv.positions[1][1])
|
92
|
-
assert_equal(t(6,11), tv.offsets[1][0])
|
93
|
-
assert_equal(t(51,56), tv.offsets[1][1])
|
94
|
-
|
95
|
-
tv = tv_r.get_field_tv(0, "field2")
|
96
|
-
assert_equal(2, tv.size)
|
97
|
-
assert_equal("word3", tv.terms[0])
|
98
|
-
|
99
|
-
tv = tv_r.get_field_tv(1, "field1")
|
100
|
-
assert_equal(2, tv.size)
|
101
|
-
assert_equal("word1", tv.terms[0])
|
102
|
-
end
|
103
|
-
|
104
|
-
private
|
105
|
-
def t(start, finish)
|
106
|
-
return TermVectorOffsetInfo.new(start, finish)
|
107
|
-
end
|
108
|
-
end
|
data/test/unit/index/tc_term.rb
DELETED
@@ -1,27 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + "/../../test_helper"
|
2
|
-
|
3
|
-
class TermTest < Test::Unit::TestCase
|
4
|
-
include Ferret::Index
|
5
|
-
def test_term()
|
6
|
-
term1 = Term.new("bfield1", "athis is text1")
|
7
|
-
assert_equal(term1.field, "bfield1")
|
8
|
-
assert_equal(term1.text, "athis is text1")
|
9
|
-
term2 = Term.new("afield2", "athis is text1")
|
10
|
-
term3 = Term.new("bfield1", "bthis is text2")
|
11
|
-
term4 = Term.new("bfield1", "athis is text1")
|
12
|
-
assert(term1 > term2)
|
13
|
-
assert(term1 < term3)
|
14
|
-
assert(term1.between?(term2, term3))
|
15
|
-
assert(term1 == term4)
|
16
|
-
assert(term1.eql?(term4))
|
17
|
-
term4.set!("field3", "text3")
|
18
|
-
assert_not_equal(term1, term4)
|
19
|
-
end
|
20
|
-
|
21
|
-
def test_non_strings()
|
22
|
-
t = Term.new(2345, 3)
|
23
|
-
t = Term.new(:symbol, :symbol)
|
24
|
-
t.set!(:symbol, :symbol)
|
25
|
-
t.set!(234, 23462346)
|
26
|
-
end
|
27
|
-
end
|
@@ -1,18 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + "/../../test_helper"
|
2
|
-
|
3
|
-
|
4
|
-
class TermVectorOffsetInfoTest < Test::Unit::TestCase
|
5
|
-
include Ferret::Index
|
6
|
-
def test_tvoi()
|
7
|
-
t1 = TermVectorOffsetInfo.new(1, 3)
|
8
|
-
assert_equal(t1.start, 1)
|
9
|
-
assert_equal(t1.end, 3)
|
10
|
-
t2 = TermVectorOffsetInfo.new(1, 3)
|
11
|
-
assert(t1 == t2)
|
12
|
-
t2.start = 2
|
13
|
-
assert(t1 != t2)
|
14
|
-
t2.start = 1
|
15
|
-
t2.end = 1
|
16
|
-
assert(t1 != t2)
|
17
|
-
end
|
18
|
-
end
|
@@ -1,37 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + "/../../test_helper"
|
2
|
-
|
3
|
-
|
4
|
-
class SimilarityTest < Test::Unit::TestCase
|
5
|
-
include Ferret::Search
|
6
|
-
include Ferret::Index
|
7
|
-
|
8
|
-
def test_byte_float_conversion()
|
9
|
-
256.times do |i|
|
10
|
-
assert_equal(i, Similarity.float_to_byte(Similarity.byte_to_float(i)))
|
11
|
-
assert_equal(Similarity.byte_to_float(i), Similarity::NORM_TABLE[i])
|
12
|
-
assert_equal(i, Similarity.encode_norm(Similarity.decode_norm(i)))
|
13
|
-
end
|
14
|
-
end
|
15
|
-
|
16
|
-
def test_default_similarity
|
17
|
-
dsim = DefaultSimilarity.new()
|
18
|
-
assert_equal(1.0/4, dsim.length_norm("field", 16))
|
19
|
-
assert_equal(1.0/4, dsim.query_norm(16))
|
20
|
-
assert_equal(3.0, dsim.tf(9))
|
21
|
-
assert_equal(1.0/10, dsim.sloppy_freq(9))
|
22
|
-
assert_equal(1.0, dsim.idf(9, 10))
|
23
|
-
assert_equal(4.0, dsim.coord(12, 3))
|
24
|
-
searcher = Object.new
|
25
|
-
def searcher.doc_freq(term) 9 end
|
26
|
-
def searcher.max_doc() 10 end
|
27
|
-
term = Term.new("field", "text")
|
28
|
-
assert_equal(1.0, dsim.idf_term(term, searcher))
|
29
|
-
terms = [
|
30
|
-
Term.new("field1", "text1"),
|
31
|
-
Term.new("field1", "text2"),
|
32
|
-
Term.new("field2", "text3"),
|
33
|
-
Term.new("field2", "text4")
|
34
|
-
]
|
35
|
-
assert_equal(4.0, dsim.idf_phrase(terms, searcher))
|
36
|
-
end
|
37
|
-
end
|
@@ -1,14 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + "/../../test_helper"
|
2
|
-
|
3
|
-
class SortFieldTest < Test::Unit::TestCase
|
4
|
-
include Ferret::Search
|
5
|
-
|
6
|
-
def test_params()
|
7
|
-
assert_equal("SCORE", SortField::SortType::SCORE.to_s)
|
8
|
-
assert_equal("DOC", SortField::SortType::DOC.to_s)
|
9
|
-
assert_equal("auto", SortField::SortType::AUTO.to_s)
|
10
|
-
assert_equal("string", SortField::SortType::STRING.to_s)
|
11
|
-
assert_equal("integer", SortField::SortType::INTEGER.to_s)
|
12
|
-
assert_equal("float", SortField::SortType::FLOAT.to_s)
|
13
|
-
end
|
14
|
-
end
|
@@ -1,126 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + "/../../test_helper"
|
2
|
-
|
3
|
-
# Tests the multisearcher by comparing it's results
|
4
|
-
# with those returned by an IndexSearcher.
|
5
|
-
# Taken from TestMultiSearcherRanking.java of Lucene
|
6
|
-
class MultiSearcher2Test < Test::Unit::TestCase
|
7
|
-
include Ferret::Document
|
8
|
-
include Ferret::Search
|
9
|
-
include Ferret::Store
|
10
|
-
include Ferret::Analysis
|
11
|
-
include Ferret::Index
|
12
|
-
|
13
|
-
FIELD_NAME = 'body'
|
14
|
-
|
15
|
-
def test_one_Term_query
|
16
|
-
check_query 'three'
|
17
|
-
end
|
18
|
-
|
19
|
-
def test_two_term_query
|
20
|
-
check_query 'three foo'
|
21
|
-
# as of 2006/03/11 these fail in Java Lucene as
|
22
|
-
# well, hits are returned in slightly different order.
|
23
|
-
#check_query '+pizza +blue*', :body
|
24
|
-
#check_query '+pizza blue*', :body
|
25
|
-
#check_query 'pizza blue*', :body
|
26
|
-
end
|
27
|
-
|
28
|
-
def test_prefix_query
|
29
|
-
check_query 'multi*'
|
30
|
-
end
|
31
|
-
|
32
|
-
def test_fuzzy_query
|
33
|
-
check_query 'multiThree~'
|
34
|
-
end
|
35
|
-
|
36
|
-
def test_range_query
|
37
|
-
check_query '{multiA multiP}'
|
38
|
-
end
|
39
|
-
|
40
|
-
# fails (query parse error)
|
41
|
-
#def test_multi_phrase_query
|
42
|
-
# check_query '"blueberry pi*"'
|
43
|
-
#end
|
44
|
-
|
45
|
-
def test_nomatch_query
|
46
|
-
check_query '+three +nomatch'
|
47
|
-
end
|
48
|
-
|
49
|
-
# this yields differing scores, but doesn't work in
|
50
|
-
# Java Lucene either
|
51
|
-
#def test_term_repeated_query
|
52
|
-
# check_query 'multi* multi* foo'
|
53
|
-
#end
|
54
|
-
|
55
|
-
|
56
|
-
def check_query(query_str, debug_field=nil)
|
57
|
-
@parser ||= Ferret::QueryParser.new(FIELD_NAME, :analyzer => @analyzer)
|
58
|
-
query = @parser.parse(query_str)
|
59
|
-
puts "Query: #{query}" if debug_field
|
60
|
-
IndexTestHelper.explain(query, @multi, debug_field) if debug_field
|
61
|
-
IndexTestHelper.explain(query, @single, debug_field) if debug_field
|
62
|
-
multi_hits = @multi.search(query)
|
63
|
-
single_hits = @single.search(query)
|
64
|
-
assert_equal single_hits.size, multi_hits.size, "hit count differs"
|
65
|
-
multi_hits.score_docs.each_with_index { |multi_sd, i|
|
66
|
-
single_sd = single_hits.score_docs[i]
|
67
|
-
doc_multi = @multi.doc(multi_sd.doc)
|
68
|
-
doc_single = @single.doc(single_sd.doc)
|
69
|
-
assert_equal single_sd.score, multi_sd.score, "score differs in result #{i}"
|
70
|
-
assert_equal doc_single[FIELD_NAME], doc_multi[FIELD_NAME], "field values differ in result #{i}"
|
71
|
-
}
|
72
|
-
end
|
73
|
-
|
74
|
-
def setup()
|
75
|
-
@analyzer = WhiteSpaceAnalyzer.new()
|
76
|
-
# create MultiSearcher from two seperate searchers
|
77
|
-
d1 = RAMDirectory.new()
|
78
|
-
iw1 = IndexWriter.new(d1, :analyzer => @analyzer, :create => true)
|
79
|
-
add_collection1(iw1)
|
80
|
-
iw1.close()
|
81
|
-
|
82
|
-
d2 = RAMDirectory.new()
|
83
|
-
iw2 = IndexWriter.new(d2, :analyzer => @analyzer, :create => true)
|
84
|
-
add_collection2(iw2)
|
85
|
-
iw2.close()
|
86
|
-
@multi = MultiSearcher.new([IndexSearcher.new(d1), IndexSearcher.new(d2)])
|
87
|
-
|
88
|
-
# create IndexSearcher which contains all documents
|
89
|
-
d = RAMDirectory.new()
|
90
|
-
iw = IndexWriter.new(d, :analyzer => @analyzer, :create => true)
|
91
|
-
add_collection1(iw)
|
92
|
-
add_collection2(iw)
|
93
|
-
iw.close()
|
94
|
-
@single = IndexSearcher.new(d)
|
95
|
-
end
|
96
|
-
|
97
|
-
def tear_down()
|
98
|
-
@multi.close
|
99
|
-
@single.close
|
100
|
-
end
|
101
|
-
|
102
|
-
def add(value, iw)
|
103
|
-
d = Document.new
|
104
|
-
d << Field.new(FIELD_NAME, value, Field::Store::YES, Field::Index::TOKENIZED)
|
105
|
-
iw << d
|
106
|
-
end
|
107
|
-
|
108
|
-
def add_collection1(iw)
|
109
|
-
add("one blah three", iw)
|
110
|
-
add("one foo three multiOne", iw)
|
111
|
-
add("one foobar three multiThree", iw)
|
112
|
-
add("blueberry pie", iw)
|
113
|
-
add("blueberry strudel", iw)
|
114
|
-
add("blueberry pizza", iw)
|
115
|
-
end
|
116
|
-
def add_collection2(iw)
|
117
|
-
add("two blah three", iw)
|
118
|
-
add("two foo xxx multiTwo", iw)
|
119
|
-
add("two foobar xxx multiThreee", iw)
|
120
|
-
add("blueberry chewing gum", iw)
|
121
|
-
add("bluebird pizza", iw)
|
122
|
-
add("bluebird foobar pizza", iw)
|
123
|
-
add("piccadilly circus", iw)
|
124
|
-
end
|
125
|
-
|
126
|
-
end
|
@@ -1,62 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + "/../../test_helper"
|
2
|
-
require File.dirname(__FILE__) + "/rtm_store"
|
3
|
-
require File.dirname(__FILE__) + "/rtm_store_lock"
|
4
|
-
|
5
|
-
module Ferret::Store
|
6
|
-
|
7
|
-
class FSDirectory
|
8
|
-
def FSDirectory.directory_cache
|
9
|
-
@@Directories
|
10
|
-
end
|
11
|
-
|
12
|
-
def ref_count
|
13
|
-
@ref_count
|
14
|
-
end
|
15
|
-
|
16
|
-
def get_lock_prefix
|
17
|
-
lock_prefix
|
18
|
-
end
|
19
|
-
end
|
20
|
-
end
|
21
|
-
|
22
|
-
class FSStoreTest < Test::Unit::TestCase
|
23
|
-
include Ferret::Store
|
24
|
-
include StoreTest
|
25
|
-
include StoreLockTest
|
26
|
-
def setup
|
27
|
-
@dpath = File.join(File.dirname(__FILE__),
|
28
|
-
'../../temp/fsdir')
|
29
|
-
@dir = FSDirectory.new(@dpath, true)
|
30
|
-
end
|
31
|
-
|
32
|
-
def teardown
|
33
|
-
@dir.refresh()
|
34
|
-
@dir.close()
|
35
|
-
end
|
36
|
-
|
37
|
-
def test_cache
|
38
|
-
dir_path = File.join(File.dirname(__FILE__),
|
39
|
-
'/../../temp/cachetest')
|
40
|
-
assert(! FSDirectory.directory_cache[dir_path],
|
41
|
-
"this directory should not be cached yet")
|
42
|
-
@dir1 = FSDirectory.new(dir_path, true)
|
43
|
-
assert(FSDirectory.directory_cache[dir_path],
|
44
|
-
"this directory should now be cached")
|
45
|
-
assert_equal(@dir1.ref_count, 1,
|
46
|
-
"There is one reference so the refcount should now be 1")
|
47
|
-
@dir2 = FSDirectory.new(dir_path, true)
|
48
|
-
assert(@dir1 === @dir2,
|
49
|
-
"The directory should be cached so the same directory object should have been returned")
|
50
|
-
assert_equal(@dir1.ref_count, 2,
|
51
|
-
"There are two references so the refcount should now be 2")
|
52
|
-
@dir1.close
|
53
|
-
assert(FSDirectory.directory_cache[dir_path],
|
54
|
-
"this directory shouldn't have been removed yet")
|
55
|
-
assert_equal(@dir2.ref_count, 1,
|
56
|
-
"There is one reference so the refcount should now be 1")
|
57
|
-
@dir2.close
|
58
|
-
assert(! FSDirectory.directory_cache[dir_path],
|
59
|
-
"this directory should have been removed from the cache")
|
60
|
-
end
|
61
|
-
|
62
|
-
end
|
@@ -1,15 +0,0 @@
|
|
1
|
-
require File.dirname(__FILE__) + "/../../test_helper"
|
2
|
-
require File.dirname(__FILE__) + "/rtm_store"
|
3
|
-
require File.dirname(__FILE__) + "/rtm_store_lock"
|
4
|
-
|
5
|
-
class RAMStoreTest < Test::Unit::TestCase
|
6
|
-
include StoreTest
|
7
|
-
include StoreLockTest
|
8
|
-
def setup
|
9
|
-
@dir = Ferret::Store::RAMDirectory.new
|
10
|
-
end
|
11
|
-
|
12
|
-
def teardown
|
13
|
-
@dir.close()
|
14
|
-
end
|
15
|
-
end
|
@@ -1,150 +0,0 @@
|
|
1
|
-
module StoreTest
|
2
|
-
# declare dir so inheritors can access it.
|
3
|
-
@dir = nil
|
4
|
-
|
5
|
-
def test_modified
|
6
|
-
# difficult to test this one but as file mtime is only stored to the nearest second.
|
7
|
-
# we can assume this test will happen in less than a few seconds. (I hope)
|
8
|
-
time = Time.new.to_i
|
9
|
-
@dir.touch('mtime.test')
|
10
|
-
time_before = @dir.modified('mtime.test').to_i
|
11
|
-
assert(time_before - time <= 3,
|
12
|
-
"test that mtime is approximately equal to the system time when the file was touched")
|
13
|
-
end
|
14
|
-
|
15
|
-
def test_rw_bytes
|
16
|
-
bytes = [0x34, 0x87, 0xF9, 0xEA, 0x00, 0xFF]
|
17
|
-
rw_test(bytes, "byte", 6)
|
18
|
-
end
|
19
|
-
|
20
|
-
def test_rw_ints
|
21
|
-
ints = [-2147483648, 2147483647, -1, 0]
|
22
|
-
rw_test(ints, "int", 16)
|
23
|
-
end
|
24
|
-
|
25
|
-
def test_rw_longs
|
26
|
-
longs = [-9223372036854775808, 9223372036854775807, -1, 0]
|
27
|
-
rw_test(longs, "long", 32)
|
28
|
-
end
|
29
|
-
|
30
|
-
def test_rw_uints
|
31
|
-
uints = [0xffffffff, 100000, 0]
|
32
|
-
rw_test(uints, "uint", 12)
|
33
|
-
end
|
34
|
-
|
35
|
-
def test_rw_ulongs
|
36
|
-
ulongs = [0xffffffffffffffff, 100000000000000, 0]
|
37
|
-
rw_test(ulongs, "ulong", 24)
|
38
|
-
end
|
39
|
-
|
40
|
-
def test_rw_vints
|
41
|
-
vints = [ 9223372036854775807,
|
42
|
-
0x00,
|
43
|
-
0xFFFFFFFFFFFFFFFF]
|
44
|
-
rw_test(vints, "vint", 20)
|
45
|
-
end
|
46
|
-
|
47
|
-
def test_rw_vlongs
|
48
|
-
vlongs = [ 9223372036854775807,
|
49
|
-
0x00,
|
50
|
-
0xFFFFFFFFFFFFFFFF]
|
51
|
-
rw_test(vlongs, "vlong", 20)
|
52
|
-
end
|
53
|
-
|
54
|
-
def test_rw_strings
|
55
|
-
text = 'This is a ruby ferret test string ~!@#$%^&*()`123456790-=\)_+|'
|
56
|
-
ostream = @dir.create_output("rw_strings.test")
|
57
|
-
ostream.write_string(text)
|
58
|
-
ostream.write_string(text*100)
|
59
|
-
ostream.close
|
60
|
-
istream = @dir.open_input("rw_strings.test")
|
61
|
-
assert_equal(text, istream.read_string, "Short string test failed")
|
62
|
-
assert_equal(text*100, istream.read_string, "Short string test failed")
|
63
|
-
istream.close
|
64
|
-
assert_equal(6265, @dir.length('rw_strings.test'))
|
65
|
-
end
|
66
|
-
|
67
|
-
def test_rw_utf8_strings
|
68
|
-
text = '³³ ÄÄÄÄÄÄ 道德經'
|
69
|
-
ostream = @dir.create_output("rw_utf8_strings.test")
|
70
|
-
ostream.write_string(text)
|
71
|
-
ostream.write_string(text*100)
|
72
|
-
ostream.close
|
73
|
-
istream = @dir.open_input("rw_utf8_strings.test")
|
74
|
-
assert_equal(text, x = istream.read_string, "Short string test failed")
|
75
|
-
assert_equal(text*100, istream.read_string, "Short string test failed")
|
76
|
-
istream.close
|
77
|
-
end
|
78
|
-
|
79
|
-
# this test fills up the output stream so that the buffer will have to be
|
80
|
-
# written a few times. It then uses seek to make sure that it works
|
81
|
-
# correctly
|
82
|
-
def test_buffer_seek
|
83
|
-
ostream = @dir.create_output("rw_seek.test")
|
84
|
-
text = 'This is another long test string !@#$%#$%&%$*%^&*()(_'
|
85
|
-
1000.times {|i| ostream.write_long(i); ostream.write_string(text) }
|
86
|
-
ostream.seek(987)
|
87
|
-
assert_equal(987, ostream.pos)
|
88
|
-
ostream.write_vint(555)
|
89
|
-
ostream.seek(56)
|
90
|
-
assert_equal(56, ostream.pos)
|
91
|
-
ostream.write_vint(1234567890)
|
92
|
-
ostream.seek(4000)
|
93
|
-
assert_equal(4000, ostream.pos)
|
94
|
-
ostream.write_vint(9876543210)
|
95
|
-
ostream.close()
|
96
|
-
istream = @dir.open_input("rw_seek.test")
|
97
|
-
istream.seek(56)
|
98
|
-
assert_equal(56, istream.pos)
|
99
|
-
assert_equal(1234567890, istream.read_vint())
|
100
|
-
istream.seek(4000)
|
101
|
-
assert_equal(4000, istream.pos)
|
102
|
-
assert_equal(9876543210, istream.read_vint())
|
103
|
-
istream.seek(987)
|
104
|
-
assert_equal(987, istream.pos)
|
105
|
-
assert_equal(555, istream.read_vint())
|
106
|
-
istream.close()
|
107
|
-
end
|
108
|
-
|
109
|
-
def test_clone
|
110
|
-
ostream = @dir.create_output("clone_test")
|
111
|
-
10.times {|i| ostream.write_long(i) }
|
112
|
-
ostream.close
|
113
|
-
istream = @dir.open_input("clone_test")
|
114
|
-
istream.seek(24)
|
115
|
-
alt_istream = istream.clone
|
116
|
-
assert_equal(istream.pos, alt_istream.pos)
|
117
|
-
(3...10).each {|i| assert_equal(i, alt_istream.read_long) }
|
118
|
-
assert_equal(80, alt_istream.pos)
|
119
|
-
assert_equal(24, istream.pos)
|
120
|
-
alt_istream.close
|
121
|
-
(3...10).each {|i| assert_equal(i, istream.read_long) }
|
122
|
-
istream.close
|
123
|
-
end
|
124
|
-
|
125
|
-
def test_read_bytes
|
126
|
-
str = "0000000000"
|
127
|
-
ostream = @dir.create_output("rw_read_bytes")
|
128
|
-
ostream.write_bytes("how are you doing?", 18)
|
129
|
-
ostream.close
|
130
|
-
istream = @dir.open_input("rw_read_bytes")
|
131
|
-
istream.read_bytes(str, 2, 4)
|
132
|
-
assert_equal("00how 0000", str)
|
133
|
-
istream.read_bytes(str, 1, 8)
|
134
|
-
assert_equal("0are you 0", str)
|
135
|
-
istream.close
|
136
|
-
end
|
137
|
-
|
138
|
-
private
|
139
|
-
|
140
|
-
def rw_test(values, type, expected_length)
|
141
|
-
ostream = @dir.create_output("rw_#{type}.test")
|
142
|
-
values.each { |b| ostream.__send__("write_" + type, b) }
|
143
|
-
ostream.close
|
144
|
-
istream = @dir.open_input("rw_#{type}.test")
|
145
|
-
values.each { |b| assert_equal(b, istream.__send__("read_" + type), "#{type} should be equal") }
|
146
|
-
istream.close
|
147
|
-
assert_equal(expected_length, @dir.length("rw_#{type}.test"))
|
148
|
-
end
|
149
|
-
|
150
|
-
end
|