ferret 0.9.6 → 0.10.0
Sign up to get free protection for your applications and to get access to all the features.
- data/MIT-LICENSE +1 -1
- data/README +12 -24
- data/Rakefile +38 -54
- data/TODO +14 -17
- data/ext/analysis.c +982 -823
- data/ext/analysis.h +133 -76
- data/ext/array.c +96 -58
- data/ext/array.h +40 -13
- data/ext/bitvector.c +476 -118
- data/ext/bitvector.h +264 -22
- data/ext/compound_io.c +217 -229
- data/ext/defines.h +49 -0
- data/ext/document.c +107 -317
- data/ext/document.h +31 -65
- data/ext/except.c +81 -36
- data/ext/except.h +117 -55
- data/ext/extconf.rb +2 -9
- data/ext/ferret.c +211 -104
- data/ext/ferret.h +22 -11
- data/ext/filter.c +97 -82
- data/ext/fs_store.c +348 -367
- data/ext/global.c +226 -188
- data/ext/global.h +44 -26
- data/ext/hash.c +474 -391
- data/ext/hash.h +441 -68
- data/ext/hashset.c +124 -96
- data/ext/hashset.h +169 -20
- data/ext/helper.c +56 -5
- data/ext/helper.h +7 -0
- data/ext/inc/lang.h +29 -49
- data/ext/inc/threading.h +31 -0
- data/ext/ind.c +288 -278
- data/ext/ind.h +68 -0
- data/ext/index.c +5688 -0
- data/ext/index.h +663 -616
- data/ext/lang.h +29 -49
- data/ext/libstemmer.c +3 -3
- data/ext/mem_pool.c +84 -0
- data/ext/mem_pool.h +35 -0
- data/ext/posh.c +1006 -0
- data/ext/posh.h +1007 -0
- data/ext/priorityqueue.c +117 -194
- data/ext/priorityqueue.h +135 -39
- data/ext/q_boolean.c +1305 -1108
- data/ext/q_const_score.c +106 -93
- data/ext/q_filtered_query.c +138 -135
- data/ext/q_fuzzy.c +206 -242
- data/ext/q_match_all.c +94 -80
- data/ext/q_multi_term.c +663 -0
- data/ext/q_parser.c +667 -593
- data/ext/q_phrase.c +992 -555
- data/ext/q_prefix.c +72 -61
- data/ext/q_range.c +235 -210
- data/ext/q_span.c +1480 -1166
- data/ext/q_term.c +273 -246
- data/ext/q_wildcard.c +127 -114
- data/ext/r_analysis.c +1720 -711
- data/ext/r_index.c +3049 -0
- data/ext/r_qparser.c +433 -146
- data/ext/r_search.c +2934 -1993
- data/ext/r_store.c +372 -143
- data/ext/r_utils.c +941 -0
- data/ext/ram_store.c +330 -326
- data/ext/search.c +1291 -668
- data/ext/search.h +403 -702
- data/ext/similarity.c +91 -113
- data/ext/similarity.h +45 -30
- data/ext/sort.c +721 -484
- data/ext/stopwords.c +361 -273
- data/ext/store.c +556 -58
- data/ext/store.h +706 -126
- data/ext/tags +3578 -2780
- data/ext/term_vectors.c +352 -0
- data/ext/threading.h +31 -0
- data/ext/win32.h +54 -0
- data/lib/ferret.rb +5 -17
- data/lib/ferret/document.rb +130 -2
- data/lib/ferret/index.rb +577 -26
- data/lib/ferret/number_tools.rb +157 -0
- data/lib/ferret_version.rb +3 -0
- data/test/test_helper.rb +5 -13
- data/test/unit/analysis/tc_analyzer.rb +513 -1
- data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
- data/test/unit/index/tc_index.rb +183 -240
- data/test/unit/index/tc_index_reader.rb +312 -479
- data/test/unit/index/tc_index_writer.rb +397 -13
- data/test/unit/index/th_doc.rb +269 -206
- data/test/unit/query_parser/tc_query_parser.rb +40 -33
- data/test/unit/search/tc_filter.rb +59 -71
- data/test/unit/search/tc_fuzzy_query.rb +24 -16
- data/test/unit/search/tc_index_searcher.rb +23 -201
- data/test/unit/search/tc_multi_searcher.rb +78 -226
- data/test/unit/search/tc_search_and_sort.rb +93 -81
- data/test/unit/search/tc_sort.rb +23 -23
- data/test/unit/search/tc_sort_field.rb +7 -7
- data/test/unit/search/tc_spans.rb +51 -47
- data/test/unit/search/tm_searcher.rb +339 -0
- data/test/unit/store/tc_fs_store.rb +1 -1
- data/test/unit/store/tm_store_lock.rb +3 -3
- data/test/unit/tc_document.rb +81 -0
- data/test/unit/ts_analysis.rb +1 -1
- data/test/unit/ts_utils.rb +1 -1
- data/test/unit/utils/tc_bit_vector.rb +288 -0
- data/test/unit/utils/tc_number_tools.rb +117 -0
- data/test/unit/utils/tc_priority_queue.rb +106 -0
- metadata +140 -301
- data/CHANGELOG +0 -9
- data/ext/dummy.exe +0 -0
- data/ext/field.c +0 -408
- data/ext/frtio.h +0 -13
- data/ext/inc/except.h +0 -90
- data/ext/index_io.c +0 -382
- data/ext/index_rw.c +0 -2658
- data/ext/lang.c +0 -41
- data/ext/nix_io.c +0 -134
- data/ext/q_multi_phrase.c +0 -380
- data/ext/r_doc.c +0 -582
- data/ext/r_index_io.c +0 -1021
- data/ext/r_term.c +0 -219
- data/ext/term.c +0 -820
- data/ext/termdocs.c +0 -611
- data/ext/vector.c +0 -637
- data/ext/w32_io.c +0 -150
- data/lib/ferret/analysis.rb +0 -11
- data/lib/ferret/analysis/analyzers.rb +0 -112
- data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
- data/lib/ferret/analysis/token.rb +0 -100
- data/lib/ferret/analysis/token_filters.rb +0 -86
- data/lib/ferret/analysis/token_stream.rb +0 -26
- data/lib/ferret/analysis/tokenizers.rb +0 -112
- data/lib/ferret/analysis/word_list_loader.rb +0 -27
- data/lib/ferret/document/document.rb +0 -152
- data/lib/ferret/document/field.rb +0 -312
- data/lib/ferret/index/compound_file_io.rb +0 -338
- data/lib/ferret/index/document_writer.rb +0 -289
- data/lib/ferret/index/field_infos.rb +0 -279
- data/lib/ferret/index/fields_io.rb +0 -181
- data/lib/ferret/index/index.rb +0 -675
- data/lib/ferret/index/index_file_names.rb +0 -33
- data/lib/ferret/index/index_reader.rb +0 -503
- data/lib/ferret/index/index_writer.rb +0 -534
- data/lib/ferret/index/multi_reader.rb +0 -377
- data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
- data/lib/ferret/index/segment_infos.rb +0 -130
- data/lib/ferret/index/segment_merge_info.rb +0 -49
- data/lib/ferret/index/segment_merge_queue.rb +0 -16
- data/lib/ferret/index/segment_merger.rb +0 -358
- data/lib/ferret/index/segment_reader.rb +0 -412
- data/lib/ferret/index/segment_term_enum.rb +0 -169
- data/lib/ferret/index/segment_term_vector.rb +0 -58
- data/lib/ferret/index/term.rb +0 -53
- data/lib/ferret/index/term_buffer.rb +0 -83
- data/lib/ferret/index/term_doc_enum.rb +0 -291
- data/lib/ferret/index/term_enum.rb +0 -52
- data/lib/ferret/index/term_info.rb +0 -37
- data/lib/ferret/index/term_infos_io.rb +0 -321
- data/lib/ferret/index/term_vector_offset_info.rb +0 -20
- data/lib/ferret/index/term_vectors_io.rb +0 -553
- data/lib/ferret/query_parser.rb +0 -312
- data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
- data/lib/ferret/search.rb +0 -50
- data/lib/ferret/search/boolean_clause.rb +0 -100
- data/lib/ferret/search/boolean_query.rb +0 -299
- data/lib/ferret/search/boolean_scorer.rb +0 -294
- data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
- data/lib/ferret/search/conjunction_scorer.rb +0 -99
- data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
- data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
- data/lib/ferret/search/explanation.rb +0 -41
- data/lib/ferret/search/field_cache.rb +0 -215
- data/lib/ferret/search/field_doc.rb +0 -31
- data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
- data/lib/ferret/search/filter.rb +0 -11
- data/lib/ferret/search/filtered_query.rb +0 -130
- data/lib/ferret/search/filtered_term_enum.rb +0 -79
- data/lib/ferret/search/fuzzy_query.rb +0 -154
- data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
- data/lib/ferret/search/hit_collector.rb +0 -34
- data/lib/ferret/search/hit_queue.rb +0 -11
- data/lib/ferret/search/index_searcher.rb +0 -200
- data/lib/ferret/search/match_all_query.rb +0 -104
- data/lib/ferret/search/multi_phrase_query.rb +0 -216
- data/lib/ferret/search/multi_searcher.rb +0 -261
- data/lib/ferret/search/multi_term_query.rb +0 -65
- data/lib/ferret/search/non_matching_scorer.rb +0 -22
- data/lib/ferret/search/phrase_positions.rb +0 -55
- data/lib/ferret/search/phrase_query.rb +0 -214
- data/lib/ferret/search/phrase_scorer.rb +0 -152
- data/lib/ferret/search/prefix_query.rb +0 -54
- data/lib/ferret/search/query.rb +0 -140
- data/lib/ferret/search/query_filter.rb +0 -51
- data/lib/ferret/search/range_filter.rb +0 -103
- data/lib/ferret/search/range_query.rb +0 -139
- data/lib/ferret/search/req_excl_scorer.rb +0 -125
- data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
- data/lib/ferret/search/score_doc.rb +0 -38
- data/lib/ferret/search/score_doc_comparator.rb +0 -114
- data/lib/ferret/search/scorer.rb +0 -91
- data/lib/ferret/search/similarity.rb +0 -278
- data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
- data/lib/ferret/search/sort.rb +0 -112
- data/lib/ferret/search/sort_comparator.rb +0 -60
- data/lib/ferret/search/sort_field.rb +0 -91
- data/lib/ferret/search/spans.rb +0 -12
- data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
- data/lib/ferret/search/spans/span_first_query.rb +0 -79
- data/lib/ferret/search/spans/span_near_query.rb +0 -108
- data/lib/ferret/search/spans/span_not_query.rb +0 -130
- data/lib/ferret/search/spans/span_or_query.rb +0 -176
- data/lib/ferret/search/spans/span_query.rb +0 -25
- data/lib/ferret/search/spans/span_scorer.rb +0 -74
- data/lib/ferret/search/spans/span_term_query.rb +0 -105
- data/lib/ferret/search/spans/span_weight.rb +0 -84
- data/lib/ferret/search/spans/spans_enum.rb +0 -44
- data/lib/ferret/search/term_query.rb +0 -128
- data/lib/ferret/search/term_scorer.rb +0 -183
- data/lib/ferret/search/top_docs.rb +0 -36
- data/lib/ferret/search/top_field_docs.rb +0 -17
- data/lib/ferret/search/weight.rb +0 -54
- data/lib/ferret/search/wildcard_query.rb +0 -26
- data/lib/ferret/search/wildcard_term_enum.rb +0 -61
- data/lib/ferret/stemmers.rb +0 -1
- data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
- data/lib/ferret/store.rb +0 -5
- data/lib/ferret/store/buffered_index_io.rb +0 -190
- data/lib/ferret/store/directory.rb +0 -141
- data/lib/ferret/store/fs_store.rb +0 -381
- data/lib/ferret/store/index_io.rb +0 -245
- data/lib/ferret/store/ram_store.rb +0 -286
- data/lib/ferret/utils.rb +0 -8
- data/lib/ferret/utils/bit_vector.rb +0 -123
- data/lib/ferret/utils/date_tools.rb +0 -138
- data/lib/ferret/utils/number_tools.rb +0 -91
- data/lib/ferret/utils/parameter.rb +0 -41
- data/lib/ferret/utils/priority_queue.rb +0 -120
- data/lib/ferret/utils/string_helper.rb +0 -47
- data/lib/ferret/utils/thread_local.rb +0 -28
- data/lib/ferret/utils/weak_key_hash.rb +0 -60
- data/lib/rferret.rb +0 -37
- data/rake_utils/code_statistics.rb +0 -106
- data/test/benchmark/tb_ram_store.rb +0 -76
- data/test/benchmark/tb_rw_vint.rb +0 -26
- data/test/functional/thread_safety_index_test.rb +0 -81
- data/test/functional/thread_safety_test.rb +0 -137
- data/test/longrunning/tc_numbertools.rb +0 -60
- data/test/longrunning/tm_store.rb +0 -19
- data/test/unit/analysis/ctc_analyzer.rb +0 -532
- data/test/unit/analysis/data/wordfile +0 -6
- data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
- data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
- data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
- data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
- data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
- data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
- data/test/unit/analysis/rtc_stop_filter.rb +0 -14
- data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
- data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
- data/test/unit/analysis/tc_token.rb +0 -25
- data/test/unit/document/rtc_field.rb +0 -28
- data/test/unit/document/tc_document.rb +0 -47
- data/test/unit/document/tc_field.rb +0 -98
- data/test/unit/index/rtc_compound_file_io.rb +0 -107
- data/test/unit/index/rtc_field_infos.rb +0 -127
- data/test/unit/index/rtc_fields_io.rb +0 -167
- data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
- data/test/unit/index/rtc_segment_infos.rb +0 -74
- data/test/unit/index/rtc_segment_term_docs.rb +0 -17
- data/test/unit/index/rtc_segment_term_enum.rb +0 -60
- data/test/unit/index/rtc_segment_term_vector.rb +0 -71
- data/test/unit/index/rtc_term_buffer.rb +0 -57
- data/test/unit/index/rtc_term_info.rb +0 -19
- data/test/unit/index/rtc_term_infos_io.rb +0 -192
- data/test/unit/index/rtc_term_vectors_io.rb +0 -108
- data/test/unit/index/tc_term.rb +0 -27
- data/test/unit/index/tc_term_voi.rb +0 -18
- data/test/unit/search/rtc_similarity.rb +0 -37
- data/test/unit/search/rtc_sort_field.rb +0 -14
- data/test/unit/search/tc_multi_searcher2.rb +0 -126
- data/test/unit/store/rtc_fs_store.rb +0 -62
- data/test/unit/store/rtc_ram_store.rb +0 -15
- data/test/unit/store/rtm_store.rb +0 -150
- data/test/unit/store/rtm_store_lock.rb +0 -2
- data/test/unit/ts_document.rb +0 -2
- data/test/unit/utils/rtc_bit_vector.rb +0 -73
- data/test/unit/utils/rtc_date_tools.rb +0 -50
- data/test/unit/utils/rtc_number_tools.rb +0 -59
- data/test/unit/utils/rtc_parameter.rb +0 -40
- data/test/unit/utils/rtc_priority_queue.rb +0 -62
- data/test/unit/utils/rtc_string_helper.rb +0 -21
- data/test/unit/utils/rtc_thread.rb +0 -61
- data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
- data/test/utils/number_to_spoken.rb +0 -132
@@ -1,5 +1,27 @@
|
|
1
1
|
require File.dirname(__FILE__) + "/../../test_helper"
|
2
2
|
|
3
|
+
class TokenTest < Test::Unit::TestCase
|
4
|
+
include Ferret::Analysis
|
5
|
+
def test_token
|
6
|
+
t = Token.new("text", 1, 2, 3)
|
7
|
+
assert_equal("text", t.text)
|
8
|
+
assert_equal(1, t.start)
|
9
|
+
assert_equal(2, t.end)
|
10
|
+
assert_equal(3, t.pos_inc)
|
11
|
+
t.text = "yada yada yada"
|
12
|
+
t.start = 11
|
13
|
+
t.end = 12
|
14
|
+
t.pos_inc = 13
|
15
|
+
assert_equal("yada yada yada", t.text)
|
16
|
+
assert_equal(11, t.start)
|
17
|
+
assert_equal(12, t.end)
|
18
|
+
assert_equal(13, t.pos_inc)
|
19
|
+
|
20
|
+
t = Token.new("text", 1, 2)
|
21
|
+
assert_equal(1, t.pos_inc)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
3
25
|
class AsciiLetterTokenizerTest < Test::Unit::TestCase
|
4
26
|
include Ferret::Analysis
|
5
27
|
|
@@ -454,6 +476,7 @@ class CustomTokenizerTest < Test::Unit::TestCase
|
|
454
476
|
assert(! t.next())
|
455
477
|
t = AsciiLowerCaseFilter.new(MyCSVTokenizer.new(input))
|
456
478
|
assert_equal(Token.new("first field", 0, 11), t.next)
|
479
|
+
return
|
457
480
|
assert_equal(Token.new("2nd field", 12, 21), t.next)
|
458
481
|
assert_equal(Token.new(" p a d d e d f i e l d ", 22, 48), t.next)
|
459
482
|
assert(! t.next())
|
data/test/unit/index/tc_index.rb
CHANGED
@@ -5,7 +5,6 @@ class IndexTest < Test::Unit::TestCase
|
|
5
5
|
include Ferret::Search
|
6
6
|
include Ferret::Analysis
|
7
7
|
include Ferret::Store
|
8
|
-
include Ferret::Document
|
9
8
|
|
10
9
|
def setup()
|
11
10
|
end
|
@@ -26,7 +25,7 @@ class IndexTest < Test::Unit::TestCase
|
|
26
25
|
end
|
27
26
|
|
28
27
|
def do_test_index_with_array(index)
|
29
|
-
|
28
|
+
[
|
30
29
|
["one two"],
|
31
30
|
["one", "three"],
|
32
31
|
["two"],
|
@@ -35,8 +34,7 @@ class IndexTest < Test::Unit::TestCase
|
|
35
34
|
["two", "three", "four"],
|
36
35
|
["one"],
|
37
36
|
["two", "three", "four", "five"]
|
38
|
-
]
|
39
|
-
data.each {|doc| index << doc }
|
37
|
+
].each {|doc| index << doc }
|
40
38
|
assert_equal(8, index.size)
|
41
39
|
q = "one"
|
42
40
|
check_results(index, q, [0, 1, 3, 4, 6])
|
@@ -44,19 +42,19 @@ class IndexTest < Test::Unit::TestCase
|
|
44
42
|
check_results(index, q, [0, 4])
|
45
43
|
q = "one OR five"
|
46
44
|
check_results(index, q, [0, 1, 3, 4, 6, 7])
|
47
|
-
assert_equal(
|
45
|
+
assert_equal(%w{two three four five}, index.doc(7)[:xxx])
|
48
46
|
end
|
49
47
|
|
50
48
|
def do_test_index_with_hash(index)
|
51
49
|
data = [
|
52
|
-
{
|
53
|
-
{
|
54
|
-
{
|
55
|
-
{
|
56
|
-
{
|
57
|
-
{
|
58
|
-
{
|
59
|
-
{
|
50
|
+
{:xxx => "one two"},
|
51
|
+
{:xxx => "one", :field2 => "three"},
|
52
|
+
{:xxx => "two"},
|
53
|
+
{:xxx => "one", :field2 => "four"},
|
54
|
+
{:xxx => "one two"},
|
55
|
+
{:xxx => "two", :field2 => "three", :field3 => "four"},
|
56
|
+
{:xxx => "one"},
|
57
|
+
{:xxx => "two", :field2 => "three", :field3 => "five"}
|
60
58
|
]
|
61
59
|
data.each {|doc| index << doc }
|
62
60
|
q = "one AND two"
|
@@ -71,19 +69,19 @@ class IndexTest < Test::Unit::TestCase
|
|
71
69
|
q = "two AND field3:f*"
|
72
70
|
check_results(index, q, [5, 7])
|
73
71
|
assert_equal("five", index.doc(7)["field3"])
|
74
|
-
assert_equal("two", index.doc(7)[
|
72
|
+
assert_equal("two", index.doc(7)[:xxx])
|
75
73
|
end
|
76
74
|
|
77
75
|
def do_test_index_with_doc_array(index)
|
78
76
|
data = [
|
79
|
-
{
|
80
|
-
{
|
81
|
-
{
|
82
|
-
{
|
83
|
-
{
|
84
|
-
{
|
85
|
-
{
|
86
|
-
{
|
77
|
+
{:xxx => "one two multi", :id => "myid"},
|
78
|
+
{:xxx => "one", :field2 => "three multi"},
|
79
|
+
{:xxx => "two"},
|
80
|
+
{:xxx => "one", :field2 => "four"},
|
81
|
+
{:xxx => "one two"},
|
82
|
+
{:xxx => "two", :field2 => "three", :field3 => "four"},
|
83
|
+
{:xxx => "one multi2", :id => "hello"},
|
84
|
+
{:xxx => "two", :field2 => "this three multi2", :field3 => "five multi"}
|
87
85
|
]
|
88
86
|
data.each {|doc| index << doc }
|
89
87
|
q = "one AND two"
|
@@ -109,36 +107,37 @@ class IndexTest < Test::Unit::TestCase
|
|
109
107
|
assert_equal(7, index.size)
|
110
108
|
q = "two AND (field3:f*)"
|
111
109
|
check_results(index, q, [7])
|
112
|
-
|
110
|
+
|
111
|
+
doc.load
|
112
|
+
doc[:field2] = "dave"
|
113
113
|
index << doc
|
114
|
-
check_results(index, q, [
|
114
|
+
check_results(index, q, [7, 8])
|
115
115
|
check_results(index, "*:this", [])
|
116
116
|
assert_equal(8, index.size)
|
117
|
-
assert_equal("dave", index[
|
117
|
+
assert_equal("dave", index[8][:field2])
|
118
118
|
index.optimize
|
119
119
|
check_results(index, q, [6, 7])
|
120
|
-
|
121
|
-
index.
|
120
|
+
assert_equal("dave", index[7][:field2])
|
121
|
+
index.query_delete("field2:three")
|
122
122
|
assert(index.deleted?(1))
|
123
123
|
assert(index.deleted?(6))
|
124
124
|
assert(! index.deleted?(7))
|
125
|
-
|
126
|
-
assert_equal("one", index[
|
127
|
-
assert_equal("one two multi", index["myid"]["def_field"])
|
125
|
+
assert_equal("one multi2", index["hello"][:xxx])
|
126
|
+
assert_equal("one two multi", index["myid"][:xxx])
|
128
127
|
index.delete("myid")
|
129
128
|
assert(index.deleted?(0))
|
130
129
|
end
|
131
130
|
|
132
131
|
def test_ram_index
|
133
|
-
index = Index.new(:
|
132
|
+
index = Index.new(:default_input_field => :xxx)
|
134
133
|
do_test_index_with_array(index)
|
135
134
|
index.close
|
136
135
|
|
137
|
-
index = Index.new(:default_field =>
|
136
|
+
index = Index.new(:default_field => :xxx)
|
138
137
|
do_test_index_with_hash(index)
|
139
138
|
index.close
|
140
139
|
|
141
|
-
index = Index.new(:default_field =>
|
140
|
+
index = Index.new(:default_field => :xxx, :id_field => "id")
|
142
141
|
do_test_index_with_doc_array(index)
|
143
142
|
index.close
|
144
143
|
end
|
@@ -147,23 +146,24 @@ class IndexTest < Test::Unit::TestCase
|
|
147
146
|
fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
|
148
147
|
|
149
148
|
Dir[File.join(fs_path, "*")].each {|path| begin File.delete(path) rescue nil end}
|
150
|
-
assert_raise(
|
149
|
+
assert_raise(IOError) do
|
151
150
|
Index.new(:path => fs_path,
|
152
151
|
:create_if_missing => false,
|
153
|
-
:default_field =>
|
152
|
+
:default_field => :xxx)
|
154
153
|
end
|
155
|
-
|
154
|
+
|
155
|
+
index = Index.new(:path => fs_path, :default_input_field => :xxx)
|
156
156
|
do_test_index_with_array(index)
|
157
157
|
index.close
|
158
158
|
|
159
159
|
Dir[File.join(fs_path, "*")].each {|path| begin File.delete(path) rescue nil end}
|
160
|
-
index = Index.new(:path => fs_path, :default_field =>
|
160
|
+
index = Index.new(:path => fs_path, :default_field => :xxx)
|
161
161
|
do_test_index_with_hash(index)
|
162
162
|
index.close
|
163
163
|
|
164
164
|
Dir[File.join(fs_path, "*")].each {|path| begin File.delete(path) rescue nil end}
|
165
165
|
index = Index.new(:path => fs_path,
|
166
|
-
:default_field =>
|
166
|
+
:default_field => :xxx,
|
167
167
|
:id_field => "id")
|
168
168
|
do_test_index_with_doc_array(index)
|
169
169
|
index.close
|
@@ -171,20 +171,18 @@ class IndexTest < Test::Unit::TestCase
|
|
171
171
|
|
172
172
|
def test_fs_index_is_persistant
|
173
173
|
fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
{
|
178
|
-
{
|
179
|
-
{
|
180
|
-
{
|
181
|
-
{
|
182
|
-
{
|
183
|
-
{
|
184
|
-
{
|
185
|
-
]
|
186
|
-
index = Index.new(:path => fs_path, :default_field => "def_field")
|
187
|
-
data.each {|doc| index << doc }
|
174
|
+
index = Index.new(:path => fs_path, :default_field => :xxx, :create => true)
|
175
|
+
|
176
|
+
[
|
177
|
+
{:xxx => "one two", :id => "me"},
|
178
|
+
{:xxx => "one", :field2 => "three"},
|
179
|
+
{:xxx => "two"},
|
180
|
+
{:xxx => "one", :field2 => "four"},
|
181
|
+
{:xxx => "one two"},
|
182
|
+
{:xxx => "two", :field2 => "three", :field3 => "four"},
|
183
|
+
{:xxx => "one"},
|
184
|
+
{:xxx => "two", :field2 => "three", :field3 => "five"}
|
185
|
+
].each {|doc| index << doc }
|
188
186
|
assert_equal(8, index.size)
|
189
187
|
index.close
|
190
188
|
|
@@ -197,174 +195,171 @@ class IndexTest < Test::Unit::TestCase
|
|
197
195
|
def test_key_used_for_id_field
|
198
196
|
fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
|
199
197
|
|
200
|
-
|
201
|
-
|
202
|
-
{:my_id => "
|
198
|
+
index = Index.new(:path => fs_path, :key => :my_id, :create => true)
|
199
|
+
[
|
200
|
+
{:my_id => "three", :id => "me"},
|
203
201
|
{:my_id => "one", :field2 => "three"},
|
204
202
|
{:my_id => "two"},
|
205
203
|
{:my_id => "one", :field2 => "four"},
|
206
|
-
{:my_id => "
|
207
|
-
{:my_id => "two", :field2 => "three",
|
204
|
+
{:my_id => "three"},
|
205
|
+
{:my_id => "two", :field2 => "three", :field3 => "four"},
|
208
206
|
{:my_id => "one"},
|
209
|
-
{:my_id => "two", :field2 => "three",
|
210
|
-
]
|
211
|
-
index
|
212
|
-
data.each {|doc| index << doc }
|
207
|
+
{:my_id => "two", :field2 => "three", :field3 => "five"}
|
208
|
+
].each {|doc| index << doc }
|
209
|
+
index.optimize
|
213
210
|
assert_equal(3, index.size)
|
214
|
-
assert_equal("three", index[
|
211
|
+
assert_equal("three", index["two"][:field2])
|
215
212
|
index.close
|
216
213
|
end
|
217
214
|
|
218
215
|
def test_merging_indexes
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
{
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
{
|
235
|
-
{
|
236
|
-
{
|
237
|
-
]
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
index = Index.new(:default_field => "f")
|
216
|
+
index1 = Index.new(:default_field => :f)
|
217
|
+
index2 = Index.new(:default_field => :f)
|
218
|
+
index3 = Index.new(:default_field => :f)
|
219
|
+
|
220
|
+
[
|
221
|
+
{:f => "zero"},
|
222
|
+
{:f => "one"},
|
223
|
+
{:f => "two"}
|
224
|
+
].each {|doc| index1 << doc }
|
225
|
+
[
|
226
|
+
{:f => "three"},
|
227
|
+
{:f => "four"},
|
228
|
+
{:f => "five"}
|
229
|
+
].each {|doc| index2 << doc }
|
230
|
+
[
|
231
|
+
{:f => "six"},
|
232
|
+
{:f => "seven"},
|
233
|
+
{:f => "eight"}
|
234
|
+
].each {|doc| index3 << doc }
|
235
|
+
|
236
|
+
index = Index.new(:default_field => :f)
|
242
237
|
index.add_indexes(index1)
|
243
238
|
assert_equal(3, index.size)
|
244
|
-
assert_equal("zero", index[0][
|
239
|
+
assert_equal("zero", index[0][:f])
|
245
240
|
index.add_indexes([index2, index3])
|
246
241
|
assert_equal(9, index.size)
|
247
|
-
assert_equal("zero", index[0][
|
248
|
-
assert_equal("eight", index[8][
|
242
|
+
assert_equal("zero", index[0][:f])
|
243
|
+
assert_equal("eight", index[8][:f])
|
249
244
|
index1.close
|
250
245
|
index2.close
|
251
246
|
index3.close
|
252
|
-
assert_equal("seven", index[7][
|
247
|
+
assert_equal("seven", index[7][:f])
|
253
248
|
data = [
|
254
|
-
{
|
255
|
-
{
|
256
|
-
{
|
249
|
+
{:f => "alpha"},
|
250
|
+
{:f => "beta"},
|
251
|
+
{:f => "charlie"}
|
257
252
|
]
|
258
253
|
dir1 = RAMDirectory.new
|
259
|
-
index1 = Index.new(:dir => dir1, :default_field =>
|
254
|
+
index1 = Index.new(:dir => dir1, :default_field => :f)
|
260
255
|
data.each {|doc| index1 << doc }
|
261
256
|
index1.flush
|
262
257
|
data = [
|
263
|
-
{
|
264
|
-
{
|
265
|
-
{
|
258
|
+
{:f => "delta"},
|
259
|
+
{:f => "echo"},
|
260
|
+
{:f => "foxtrot"}
|
266
261
|
]
|
267
262
|
dir2 = RAMDirectory.new
|
268
|
-
index2 = Index.new(:dir => dir2, :default_field =>
|
263
|
+
index2 = Index.new(:dir => dir2, :default_field => :f)
|
269
264
|
data.each {|doc| index2 << doc }
|
270
265
|
index2.flush
|
271
266
|
data = [
|
272
|
-
{
|
273
|
-
{
|
274
|
-
{
|
267
|
+
{:f => "golf"},
|
268
|
+
{:f => "india"},
|
269
|
+
{:f => "juliet"}
|
275
270
|
]
|
276
271
|
dir3 = RAMDirectory.new
|
277
|
-
index3 = Index.new(:dir => dir3, :default_field =>
|
272
|
+
index3 = Index.new(:dir => dir3, :default_field => :f)
|
278
273
|
data.each {|doc| index3 << doc }
|
279
274
|
index3.flush
|
280
275
|
|
281
276
|
index.add_indexes(dir1)
|
282
277
|
assert_equal(12, index.size)
|
283
|
-
assert_equal("alpha", index[9][
|
278
|
+
assert_equal("alpha", index[9][:f])
|
284
279
|
index.add_indexes([dir2, dir3])
|
285
280
|
assert_equal(18, index.size)
|
286
|
-
assert_equal("juliet", index[17][
|
281
|
+
assert_equal("juliet", index[17][:f])
|
287
282
|
index1.close
|
288
283
|
dir1.close
|
289
284
|
index2.close
|
290
285
|
dir2.close
|
291
286
|
index3.close
|
292
287
|
dir3.close
|
293
|
-
assert_equal("golf", index[15][
|
288
|
+
assert_equal("golf", index[15][:f])
|
294
289
|
index.close
|
295
290
|
end
|
296
291
|
|
297
292
|
def test_persist_index
|
298
293
|
data = [
|
299
|
-
{
|
300
|
-
{
|
301
|
-
{
|
294
|
+
{:f => "zero"},
|
295
|
+
{:f => "one"},
|
296
|
+
{:f => "two"}
|
302
297
|
]
|
303
|
-
index = Index.new(:default_field =>
|
298
|
+
index = Index.new(:default_field => :f)
|
304
299
|
data.each {|doc| index << doc }
|
305
300
|
fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
|
306
301
|
|
307
302
|
index.persist(fs_path, true)
|
308
303
|
assert_equal(3, index.size)
|
309
|
-
assert_equal("zero", index[0][
|
304
|
+
assert_equal("zero", index[0][:f])
|
310
305
|
index.close
|
311
306
|
|
312
307
|
index = Index.new(:path => fs_path)
|
313
308
|
assert_equal(3, index.size)
|
314
|
-
assert_equal("zero", index[0][
|
309
|
+
assert_equal("zero", index[0][:f])
|
315
310
|
index.close
|
316
311
|
|
317
312
|
|
318
313
|
data = [
|
319
|
-
{
|
320
|
-
{
|
321
|
-
{
|
314
|
+
{:f => "romeo"},
|
315
|
+
{:f => "sierra"},
|
316
|
+
{:f => "tango"}
|
322
317
|
]
|
323
|
-
index = Index.new(:default_field =>
|
318
|
+
index = Index.new(:default_field => :f)
|
324
319
|
data.each {|doc| index << doc }
|
325
320
|
assert_equal(3, index.size)
|
326
|
-
assert_equal("romeo", index[0][
|
321
|
+
assert_equal("romeo", index[0][:f])
|
327
322
|
dir = FSDirectory.new(fs_path, false)
|
328
323
|
index.persist(dir)
|
329
324
|
assert_equal(6, index.size)
|
330
|
-
assert_equal("zero", index[0][
|
331
|
-
assert_equal("romeo", index[3][
|
325
|
+
assert_equal("zero", index[0][:f])
|
326
|
+
assert_equal("romeo", index[3][:f])
|
332
327
|
index.close
|
333
328
|
|
334
329
|
index = Index.new(:path => fs_path)
|
335
330
|
assert_equal(6, index.size)
|
336
|
-
assert_equal("zero", index[0][
|
337
|
-
assert_equal("romeo", index[3][
|
331
|
+
assert_equal("zero", index[0][:f])
|
332
|
+
assert_equal("romeo", index[3][:f])
|
338
333
|
index.close
|
339
334
|
end
|
340
335
|
|
341
336
|
def test_auto_update_when_externally_modified()
|
342
337
|
fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
|
343
|
-
index = Index.new(:path => fs_path, :default_field =>
|
338
|
+
index = Index.new(:path => fs_path, :default_field => :f, :create => true)
|
344
339
|
index << "document 1"
|
345
340
|
assert_equal(1, index.size)
|
346
341
|
|
347
|
-
index2 = Index.new(:path => fs_path, :default_field =>
|
342
|
+
index2 = Index.new(:path => fs_path, :default_field => :f)
|
348
343
|
assert_equal(1, index2.size)
|
349
344
|
index2 << "document 2"
|
350
345
|
assert_equal(2, index2.size)
|
351
346
|
assert_equal(2, index.size)
|
352
347
|
top_docs = index.search("content3")
|
353
|
-
assert_equal(0, top_docs.size)
|
348
|
+
assert_equal(0, top_docs.hits.size)
|
354
349
|
|
355
|
-
iw = IndexWriter.new(fs_path, :analyzer => WhiteSpaceAnalyzer.new())
|
356
|
-
|
357
|
-
doc << Field.new("f", "content3", Field::Store::YES, Field::Index::TOKENIZED)
|
358
|
-
iw << doc
|
350
|
+
iw = IndexWriter.new(:path => fs_path, :analyzer => WhiteSpaceAnalyzer.new())
|
351
|
+
iw << {:f, "content3"}
|
359
352
|
iw.close()
|
353
|
+
|
360
354
|
top_docs = index.search("content3")
|
361
|
-
assert_equal(1, top_docs.size)
|
355
|
+
assert_equal(1, top_docs.hits.size)
|
362
356
|
assert_equal(3, index.size)
|
363
|
-
assert_equal("content3", index[2][
|
357
|
+
assert_equal("content3", index[2][:f])
|
364
358
|
index.close
|
365
359
|
end
|
366
360
|
|
367
361
|
def test_delete
|
362
|
+
index = Index.new(:analyzer => WhiteSpaceAnalyzer.new)
|
368
363
|
data = [
|
369
364
|
{:id => 0, :cat => "/cat1/subcat1"},
|
370
365
|
{:id => 1, :cat => "/cat1/subcat2"},
|
@@ -376,26 +371,27 @@ class IndexTest < Test::Unit::TestCase
|
|
376
371
|
{:id => 7, :cat => "/cat2/subcat3"},
|
377
372
|
{:id => 8, :cat => "/cat2/subcat4"},
|
378
373
|
{:id => 9, :cat => "/cat2/subcat5"},
|
379
|
-
]
|
380
|
-
index = Index.new(:analyzer => WhiteSpaceAnalyzer.new)
|
381
|
-
data.each {|doc| index << doc }
|
374
|
+
].each {|doc| index << doc }
|
382
375
|
assert_equal(10, index.size)
|
383
|
-
assert_equal(1, index.search("id:9").
|
376
|
+
assert_equal(1, index.search("id:9").total_hits)
|
384
377
|
index.delete(9)
|
385
378
|
assert_equal(9, index.size)
|
386
|
-
assert_equal(0, index.search("id:9").
|
387
|
-
assert_equal(1, index.search("id:8").
|
379
|
+
assert_equal(0, index.search("id:9").total_hits)
|
380
|
+
assert_equal(1, index.search("id:8").total_hits)
|
388
381
|
index.delete("8")
|
389
382
|
assert_equal(8, index.size)
|
390
|
-
assert_equal(0, index.search("id:8").
|
391
|
-
assert_equal(5, index.search("cat:/cat1*").
|
383
|
+
assert_equal(0, index.search("id:8").total_hits)
|
384
|
+
assert_equal(5, index.search("cat:/cat1*").total_hits)
|
392
385
|
index.query_delete("cat:/cat1*")
|
393
386
|
assert_equal(3, index.size)
|
394
|
-
assert_equal(0, index.search("cat:/cat1*").
|
387
|
+
assert_equal(0, index.search("cat:/cat1*").total_hits)
|
395
388
|
index.close
|
396
389
|
end
|
397
390
|
|
398
391
|
def test_update
|
392
|
+
index = Index.new(:analyzer => WhiteSpaceAnalyzer.new,
|
393
|
+
:default_input_field => :content,
|
394
|
+
:id_field => :id)
|
399
395
|
data = [
|
400
396
|
{:id => 0, :cat => "/cat1/subcat1", :content => "content0"},
|
401
397
|
{:id => 1, :cat => "/cat1/subcat2", :content => "content1"},
|
@@ -407,28 +403,25 @@ class IndexTest < Test::Unit::TestCase
|
|
407
403
|
{:id => 7, :cat => "/cat2/subcat3", :content => "content7"},
|
408
404
|
{:id => 8, :cat => "/cat2/subcat4", :content => "content8"},
|
409
405
|
{:id => 9, :cat => "/cat2/subcat5", :content => "content9"},
|
410
|
-
]
|
411
|
-
index = Index.new(:analyzer => WhiteSpaceAnalyzer.new,
|
412
|
-
:default_field => :content,
|
413
|
-
:id_field => :id)
|
414
|
-
data.each { |doc| index << doc }
|
406
|
+
].each { |doc| index << doc }
|
415
407
|
assert_equal(10, index.size)
|
416
408
|
assert_equal("content5", index["5"][:content])
|
417
|
-
index.
|
409
|
+
index.query_update("id:5", {:content => "content five"})
|
418
410
|
assert_equal("content five", index["5"][:content])
|
419
411
|
assert_equal(nil, index["5"][:extra_content])
|
420
|
-
index.update("5", {:
|
412
|
+
index.update("5", {:id => "5",
|
413
|
+
:cat => "/cat1/subcat6",
|
421
414
|
:content => "high five",
|
422
415
|
:extra_content => "hello"})
|
423
416
|
assert_equal("hello", index["5"][:extra_content])
|
424
417
|
assert_equal("high five", index["5"][:content])
|
425
418
|
assert_equal("/cat1/subcat6", index["5"][:cat])
|
426
419
|
assert_equal("content9", index["9"][:content])
|
427
|
-
index.
|
420
|
+
index.query_update("content:content9", {:content => "content nine"})
|
428
421
|
assert_equal("content nine", index["9"][:content])
|
429
422
|
assert_equal("content0", index["0"][:content])
|
430
423
|
assert_equal(nil, index["0"][:extra_content])
|
431
|
-
document = index[0]
|
424
|
+
document = index[0].load
|
432
425
|
document[:content] = "content zero"
|
433
426
|
document[:extra_content] = "extra content"
|
434
427
|
index.update(0, document)
|
@@ -443,7 +436,7 @@ class IndexTest < Test::Unit::TestCase
|
|
443
436
|
assert_equal("cool", index["2"][:tag])
|
444
437
|
assert_equal("cool", index["3"][:tag])
|
445
438
|
assert_equal("cool", index["4"][:tag])
|
446
|
-
assert_equal(4, index.search("tag:cool").
|
439
|
+
assert_equal(4, index.search("tag:cool").total_hits)
|
447
440
|
index.close
|
448
441
|
end
|
449
442
|
|
@@ -458,12 +451,14 @@ class IndexTest < Test::Unit::TestCase
|
|
458
451
|
:key => :id)
|
459
452
|
data.each { |doc| index << doc }
|
460
453
|
assert_equal(2, index.size)
|
461
|
-
assert_equal("two", index[0][:val])
|
462
|
-
assert_equal("four", index[1][:val])
|
454
|
+
assert_equal("two", index["0"][:val])
|
455
|
+
assert_equal("four", index["1"][:val])
|
463
456
|
index.close
|
464
457
|
end
|
465
458
|
|
466
459
|
def test_index_multi_key
|
460
|
+
index = Index.new(:analyzer => WhiteSpaceAnalyzer.new,
|
461
|
+
:key => [:id, :table])
|
467
462
|
data = [
|
468
463
|
{:id => 0, :table => "product", :product => "tent"},
|
469
464
|
{:id => 0, :table => "location", :location => "first floor"},
|
@@ -474,10 +469,8 @@ class IndexTest < Test::Unit::TestCase
|
|
474
469
|
{:id => 1, :table => "location", :location => "first floor"},
|
475
470
|
{:id => 1, :table => "product", :product => "rucksack"},
|
476
471
|
{:id => 1, :table => "product", :product => "backpack"}
|
477
|
-
]
|
478
|
-
index
|
479
|
-
:key => [:id, :table])
|
480
|
-
data.each { |doc| index << doc }
|
472
|
+
].each { |doc| index << doc }
|
473
|
+
index.optimize
|
481
474
|
assert_equal(4, index.size)
|
482
475
|
assert_equal("super tent", index[0][:product])
|
483
476
|
assert_equal("second floor", index[1][:location])
|
@@ -487,6 +480,13 @@ class IndexTest < Test::Unit::TestCase
|
|
487
480
|
end
|
488
481
|
|
489
482
|
def test_index_multi_key_untokenized
|
483
|
+
field_infos = FieldInfos.new(:term_vector => :no)
|
484
|
+
field_infos.add_field(:id, :index => :untokenized)
|
485
|
+
field_infos.add_field(:table, :index => :untokenized)
|
486
|
+
|
487
|
+
index = Index.new(:analyzer => Analyzer.new,
|
488
|
+
:key => [:id, :table],
|
489
|
+
:field_infos => field_infos)
|
490
490
|
data = [
|
491
491
|
{:id => 0, :table => "Product", :product => "tent"},
|
492
492
|
{:id => 0, :table => "location", :location => "first floor"},
|
@@ -497,21 +497,10 @@ class IndexTest < Test::Unit::TestCase
|
|
497
497
|
{:id => 1, :table => "location", :location => "first floor"},
|
498
498
|
{:id => 1, :table => "Product", :product => "rucksack"},
|
499
499
|
{:id => 1, :table => "Product", :product => "backpack"}
|
500
|
-
]
|
501
|
-
|
502
|
-
:key => [:id, :table])
|
503
|
-
data.each do |dat|
|
504
|
-
doc = Document.new
|
505
|
-
dat.each_pair do |key, value|
|
506
|
-
if ([:id, :table].include?(key))
|
507
|
-
doc << Field.new(key, value, Field::Store::YES, Field::Index::UNTOKENIZED)
|
508
|
-
else
|
509
|
-
doc << Field.new(key, value, Field::Store::YES, Field::Index::TOKENIZED)
|
510
|
-
end
|
511
|
-
end
|
512
|
-
index << doc
|
513
|
-
end
|
500
|
+
].each {|doc| index << doc}
|
501
|
+
|
514
502
|
assert_equal(4, index.size)
|
503
|
+
index.optimize
|
515
504
|
assert_equal("super tent", index[0][:product])
|
516
505
|
assert_equal("second floor", index[1][:location])
|
517
506
|
assert_equal("backpack", index[3][:product])
|
@@ -520,6 +509,8 @@ class IndexTest < Test::Unit::TestCase
|
|
520
509
|
end
|
521
510
|
|
522
511
|
def test_sortby_date
|
512
|
+
index = Index.new(:analyzer => WhiteSpaceAnalyzer.new)
|
513
|
+
|
523
514
|
data = [
|
524
515
|
{:content => "one", :date => "20051023"},
|
525
516
|
{:content => "two", :date => "19530315"},
|
@@ -530,37 +521,31 @@ class IndexTest < Test::Unit::TestCase
|
|
530
521
|
{:content => "one", :date => "19770725"},
|
531
522
|
{:content => "two", :date => "19751226"},
|
532
523
|
{:content => "four", :date => "19390912"}
|
533
|
-
]
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
doc.each_pair do |key, value|
|
538
|
-
document << Field.new(key.to_s, value, Field::Store::YES, Field::Index::TOKENIZED)
|
539
|
-
end
|
540
|
-
index << document
|
541
|
-
}
|
542
|
-
sf_date = SortField.new("date", {:sort_type => SortField::SortType::INTEGER})
|
543
|
-
#top_docs = index.search("one", :sort => [sf_date, SortField::FIELD_SCORE])
|
524
|
+
].each {|doc| index << doc}
|
525
|
+
|
526
|
+
sf_date = SortField.new("date", {:sort_type => :integer})
|
527
|
+
#top_docs = index.search("one", :sort => [sf_date, SortField::SCORE])
|
544
528
|
top_docs = index.search("one", :sort => Sort.new("date"))
|
545
|
-
assert_equal(3, top_docs.
|
546
|
-
assert_equal("19770725", index[top_docs.
|
547
|
-
assert_equal("19770905", index[top_docs.
|
548
|
-
assert_equal("20051023", index[top_docs.
|
529
|
+
assert_equal(3, top_docs.total_hits)
|
530
|
+
assert_equal("19770725", index[top_docs.hits[0].doc][:date])
|
531
|
+
assert_equal("19770905", index[top_docs.hits[1].doc][:date])
|
532
|
+
assert_equal("20051023", index[top_docs.hits[2].doc][:date])
|
549
533
|
top_docs = index.search("one two three four",
|
550
|
-
:sort => [sf_date, SortField::
|
551
|
-
|
552
|
-
assert_equal("
|
553
|
-
assert_equal("
|
554
|
-
assert_equal("
|
555
|
-
assert_equal("
|
534
|
+
:sort => [sf_date, SortField::SCORE])
|
535
|
+
return
|
536
|
+
assert_equal("19390912", index[top_docs.hits[0].doc][:date])
|
537
|
+
assert_equal("three four", index[top_docs.hits[0].doc][:content])
|
538
|
+
assert_equal("19390912", index[top_docs.hits[1].doc][:date])
|
539
|
+
assert_equal("four", index[top_docs.hits[1].doc][:content])
|
540
|
+
assert_equal("19530315", index[top_docs.hits[2].doc][:date])
|
556
541
|
|
557
542
|
top_docs = index.search("one two three four",
|
558
543
|
:sort => [:date, :content])
|
559
|
-
assert_equal("19390912", index[top_docs.
|
560
|
-
assert_equal("four", index[top_docs.
|
561
|
-
assert_equal("19390912", index[top_docs.
|
562
|
-
assert_equal("three four", index[top_docs.
|
563
|
-
assert_equal("19530315", index[top_docs.
|
544
|
+
assert_equal("19390912", index[top_docs.hits[0].doc][:date])
|
545
|
+
assert_equal("four", index[top_docs.hits[0].doc][:content])
|
546
|
+
assert_equal("19390912", index[top_docs.hits[1].doc][:date])
|
547
|
+
assert_equal("three four", index[top_docs.hits[1].doc][:content])
|
548
|
+
assert_equal("19530315", index[top_docs.hits[2].doc][:date])
|
564
549
|
|
565
550
|
index.close
|
566
551
|
end
|
@@ -568,6 +553,7 @@ class IndexTest < Test::Unit::TestCase
|
|
568
553
|
def test_auto_flush
|
569
554
|
fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
|
570
555
|
Dir[File.join(fs_path, "*")].each {|path| begin File.delete(path) rescue nil end}
|
556
|
+
|
571
557
|
data = %q(one two three four five six seven eight nine ten eleven twelve)
|
572
558
|
index1 = Index.new(:path => fs_path, :auto_flush => true)
|
573
559
|
index2 = Index.new(:path => fs_path, :auto_flush => true)
|
@@ -595,59 +581,16 @@ class IndexTest < Test::Unit::TestCase
|
|
595
581
|
assert_equal(1, index.size)
|
596
582
|
end
|
597
583
|
|
598
|
-
|
599
584
|
def test_adding_empty_term_vectors
|
600
|
-
index = Index.new()
|
601
|
-
doc = Document.new
|
585
|
+
index = Index.new(:field_infos => FieldInfos.new(:term_vector => :no))
|
602
586
|
|
603
587
|
# Note: Adding keywords to either field1 or field2 gets rid of the error
|
604
588
|
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
Field::TermVector::YES)
|
609
|
-
|
610
|
-
doc << Field.new('field2', '',
|
611
|
-
Field::Store::NO,
|
612
|
-
Field::Index::TOKENIZED,
|
613
|
-
Field::TermVector::YES)
|
614
|
-
|
615
|
-
# Note: keywords in this un-term-vector-stored field don't help the situation
|
616
|
-
|
617
|
-
doc << Field.new('field3', 'foo bar baz',
|
618
|
-
Field::Store::YES,
|
619
|
-
Field::Index::TOKENIZED,
|
620
|
-
Field::TermVector::NO)
|
621
|
-
|
622
|
-
index << doc
|
589
|
+
index << {:field1, ''}
|
590
|
+
index << {:field2, ''}
|
591
|
+
index << {:field3, 'foo bar baz'}
|
623
592
|
|
624
593
|
index.flush
|
625
594
|
index.close
|
626
595
|
end
|
627
|
-
|
628
|
-
def test_stopwords
|
629
|
-
i = Ferret::Index::Index.new(
|
630
|
-
:occur_default => Ferret::Search::BooleanClause::Occur::MUST,
|
631
|
-
:default_search_field => '*')
|
632
|
-
d = Ferret::Document::Document.new
|
633
|
-
|
634
|
-
# adding this additional field to the document leads to failure below
|
635
|
-
# comment out this statement and all tests pass:
|
636
|
-
d << Ferret::Document::Field.new('id', '1',
|
637
|
-
Ferret::Document::Field::Store::YES,
|
638
|
-
Ferret::Document::Field::Index::UNTOKENIZED)
|
639
|
-
|
640
|
-
d << Ferret::Document::Field.new('content', 'Move or shake',
|
641
|
-
Ferret::Document::Field::Store::NO,
|
642
|
-
Ferret::Document::Field::Index::TOKENIZED,
|
643
|
-
Ferret::Document::Field::TermVector::NO,
|
644
|
-
false, 1.0)
|
645
|
-
i << d
|
646
|
-
hits = i.search 'move nothere shake'
|
647
|
-
assert_equal 0, hits.size
|
648
|
-
hits = i.search 'move shake'
|
649
|
-
assert_equal 1, hits.size
|
650
|
-
hits = i.search 'move or shake'
|
651
|
-
assert_equal 1, hits.size # fails when id field is present
|
652
|
-
end
|
653
596
|
end
|