ferret 0.9.6 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/MIT-LICENSE +1 -1
- data/README +12 -24
- data/Rakefile +38 -54
- data/TODO +14 -17
- data/ext/analysis.c +982 -823
- data/ext/analysis.h +133 -76
- data/ext/array.c +96 -58
- data/ext/array.h +40 -13
- data/ext/bitvector.c +476 -118
- data/ext/bitvector.h +264 -22
- data/ext/compound_io.c +217 -229
- data/ext/defines.h +49 -0
- data/ext/document.c +107 -317
- data/ext/document.h +31 -65
- data/ext/except.c +81 -36
- data/ext/except.h +117 -55
- data/ext/extconf.rb +2 -9
- data/ext/ferret.c +211 -104
- data/ext/ferret.h +22 -11
- data/ext/filter.c +97 -82
- data/ext/fs_store.c +348 -367
- data/ext/global.c +226 -188
- data/ext/global.h +44 -26
- data/ext/hash.c +474 -391
- data/ext/hash.h +441 -68
- data/ext/hashset.c +124 -96
- data/ext/hashset.h +169 -20
- data/ext/helper.c +56 -5
- data/ext/helper.h +7 -0
- data/ext/inc/lang.h +29 -49
- data/ext/inc/threading.h +31 -0
- data/ext/ind.c +288 -278
- data/ext/ind.h +68 -0
- data/ext/index.c +5688 -0
- data/ext/index.h +663 -616
- data/ext/lang.h +29 -49
- data/ext/libstemmer.c +3 -3
- data/ext/mem_pool.c +84 -0
- data/ext/mem_pool.h +35 -0
- data/ext/posh.c +1006 -0
- data/ext/posh.h +1007 -0
- data/ext/priorityqueue.c +117 -194
- data/ext/priorityqueue.h +135 -39
- data/ext/q_boolean.c +1305 -1108
- data/ext/q_const_score.c +106 -93
- data/ext/q_filtered_query.c +138 -135
- data/ext/q_fuzzy.c +206 -242
- data/ext/q_match_all.c +94 -80
- data/ext/q_multi_term.c +663 -0
- data/ext/q_parser.c +667 -593
- data/ext/q_phrase.c +992 -555
- data/ext/q_prefix.c +72 -61
- data/ext/q_range.c +235 -210
- data/ext/q_span.c +1480 -1166
- data/ext/q_term.c +273 -246
- data/ext/q_wildcard.c +127 -114
- data/ext/r_analysis.c +1720 -711
- data/ext/r_index.c +3049 -0
- data/ext/r_qparser.c +433 -146
- data/ext/r_search.c +2934 -1993
- data/ext/r_store.c +372 -143
- data/ext/r_utils.c +941 -0
- data/ext/ram_store.c +330 -326
- data/ext/search.c +1291 -668
- data/ext/search.h +403 -702
- data/ext/similarity.c +91 -113
- data/ext/similarity.h +45 -30
- data/ext/sort.c +721 -484
- data/ext/stopwords.c +361 -273
- data/ext/store.c +556 -58
- data/ext/store.h +706 -126
- data/ext/tags +3578 -2780
- data/ext/term_vectors.c +352 -0
- data/ext/threading.h +31 -0
- data/ext/win32.h +54 -0
- data/lib/ferret.rb +5 -17
- data/lib/ferret/document.rb +130 -2
- data/lib/ferret/index.rb +577 -26
- data/lib/ferret/number_tools.rb +157 -0
- data/lib/ferret_version.rb +3 -0
- data/test/test_helper.rb +5 -13
- data/test/unit/analysis/tc_analyzer.rb +513 -1
- data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
- data/test/unit/index/tc_index.rb +183 -240
- data/test/unit/index/tc_index_reader.rb +312 -479
- data/test/unit/index/tc_index_writer.rb +397 -13
- data/test/unit/index/th_doc.rb +269 -206
- data/test/unit/query_parser/tc_query_parser.rb +40 -33
- data/test/unit/search/tc_filter.rb +59 -71
- data/test/unit/search/tc_fuzzy_query.rb +24 -16
- data/test/unit/search/tc_index_searcher.rb +23 -201
- data/test/unit/search/tc_multi_searcher.rb +78 -226
- data/test/unit/search/tc_search_and_sort.rb +93 -81
- data/test/unit/search/tc_sort.rb +23 -23
- data/test/unit/search/tc_sort_field.rb +7 -7
- data/test/unit/search/tc_spans.rb +51 -47
- data/test/unit/search/tm_searcher.rb +339 -0
- data/test/unit/store/tc_fs_store.rb +1 -1
- data/test/unit/store/tm_store_lock.rb +3 -3
- data/test/unit/tc_document.rb +81 -0
- data/test/unit/ts_analysis.rb +1 -1
- data/test/unit/ts_utils.rb +1 -1
- data/test/unit/utils/tc_bit_vector.rb +288 -0
- data/test/unit/utils/tc_number_tools.rb +117 -0
- data/test/unit/utils/tc_priority_queue.rb +106 -0
- metadata +140 -301
- data/CHANGELOG +0 -9
- data/ext/dummy.exe +0 -0
- data/ext/field.c +0 -408
- data/ext/frtio.h +0 -13
- data/ext/inc/except.h +0 -90
- data/ext/index_io.c +0 -382
- data/ext/index_rw.c +0 -2658
- data/ext/lang.c +0 -41
- data/ext/nix_io.c +0 -134
- data/ext/q_multi_phrase.c +0 -380
- data/ext/r_doc.c +0 -582
- data/ext/r_index_io.c +0 -1021
- data/ext/r_term.c +0 -219
- data/ext/term.c +0 -820
- data/ext/termdocs.c +0 -611
- data/ext/vector.c +0 -637
- data/ext/w32_io.c +0 -150
- data/lib/ferret/analysis.rb +0 -11
- data/lib/ferret/analysis/analyzers.rb +0 -112
- data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
- data/lib/ferret/analysis/token.rb +0 -100
- data/lib/ferret/analysis/token_filters.rb +0 -86
- data/lib/ferret/analysis/token_stream.rb +0 -26
- data/lib/ferret/analysis/tokenizers.rb +0 -112
- data/lib/ferret/analysis/word_list_loader.rb +0 -27
- data/lib/ferret/document/document.rb +0 -152
- data/lib/ferret/document/field.rb +0 -312
- data/lib/ferret/index/compound_file_io.rb +0 -338
- data/lib/ferret/index/document_writer.rb +0 -289
- data/lib/ferret/index/field_infos.rb +0 -279
- data/lib/ferret/index/fields_io.rb +0 -181
- data/lib/ferret/index/index.rb +0 -675
- data/lib/ferret/index/index_file_names.rb +0 -33
- data/lib/ferret/index/index_reader.rb +0 -503
- data/lib/ferret/index/index_writer.rb +0 -534
- data/lib/ferret/index/multi_reader.rb +0 -377
- data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
- data/lib/ferret/index/segment_infos.rb +0 -130
- data/lib/ferret/index/segment_merge_info.rb +0 -49
- data/lib/ferret/index/segment_merge_queue.rb +0 -16
- data/lib/ferret/index/segment_merger.rb +0 -358
- data/lib/ferret/index/segment_reader.rb +0 -412
- data/lib/ferret/index/segment_term_enum.rb +0 -169
- data/lib/ferret/index/segment_term_vector.rb +0 -58
- data/lib/ferret/index/term.rb +0 -53
- data/lib/ferret/index/term_buffer.rb +0 -83
- data/lib/ferret/index/term_doc_enum.rb +0 -291
- data/lib/ferret/index/term_enum.rb +0 -52
- data/lib/ferret/index/term_info.rb +0 -37
- data/lib/ferret/index/term_infos_io.rb +0 -321
- data/lib/ferret/index/term_vector_offset_info.rb +0 -20
- data/lib/ferret/index/term_vectors_io.rb +0 -553
- data/lib/ferret/query_parser.rb +0 -312
- data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
- data/lib/ferret/search.rb +0 -50
- data/lib/ferret/search/boolean_clause.rb +0 -100
- data/lib/ferret/search/boolean_query.rb +0 -299
- data/lib/ferret/search/boolean_scorer.rb +0 -294
- data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
- data/lib/ferret/search/conjunction_scorer.rb +0 -99
- data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
- data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
- data/lib/ferret/search/explanation.rb +0 -41
- data/lib/ferret/search/field_cache.rb +0 -215
- data/lib/ferret/search/field_doc.rb +0 -31
- data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
- data/lib/ferret/search/filter.rb +0 -11
- data/lib/ferret/search/filtered_query.rb +0 -130
- data/lib/ferret/search/filtered_term_enum.rb +0 -79
- data/lib/ferret/search/fuzzy_query.rb +0 -154
- data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
- data/lib/ferret/search/hit_collector.rb +0 -34
- data/lib/ferret/search/hit_queue.rb +0 -11
- data/lib/ferret/search/index_searcher.rb +0 -200
- data/lib/ferret/search/match_all_query.rb +0 -104
- data/lib/ferret/search/multi_phrase_query.rb +0 -216
- data/lib/ferret/search/multi_searcher.rb +0 -261
- data/lib/ferret/search/multi_term_query.rb +0 -65
- data/lib/ferret/search/non_matching_scorer.rb +0 -22
- data/lib/ferret/search/phrase_positions.rb +0 -55
- data/lib/ferret/search/phrase_query.rb +0 -214
- data/lib/ferret/search/phrase_scorer.rb +0 -152
- data/lib/ferret/search/prefix_query.rb +0 -54
- data/lib/ferret/search/query.rb +0 -140
- data/lib/ferret/search/query_filter.rb +0 -51
- data/lib/ferret/search/range_filter.rb +0 -103
- data/lib/ferret/search/range_query.rb +0 -139
- data/lib/ferret/search/req_excl_scorer.rb +0 -125
- data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
- data/lib/ferret/search/score_doc.rb +0 -38
- data/lib/ferret/search/score_doc_comparator.rb +0 -114
- data/lib/ferret/search/scorer.rb +0 -91
- data/lib/ferret/search/similarity.rb +0 -278
- data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
- data/lib/ferret/search/sort.rb +0 -112
- data/lib/ferret/search/sort_comparator.rb +0 -60
- data/lib/ferret/search/sort_field.rb +0 -91
- data/lib/ferret/search/spans.rb +0 -12
- data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
- data/lib/ferret/search/spans/span_first_query.rb +0 -79
- data/lib/ferret/search/spans/span_near_query.rb +0 -108
- data/lib/ferret/search/spans/span_not_query.rb +0 -130
- data/lib/ferret/search/spans/span_or_query.rb +0 -176
- data/lib/ferret/search/spans/span_query.rb +0 -25
- data/lib/ferret/search/spans/span_scorer.rb +0 -74
- data/lib/ferret/search/spans/span_term_query.rb +0 -105
- data/lib/ferret/search/spans/span_weight.rb +0 -84
- data/lib/ferret/search/spans/spans_enum.rb +0 -44
- data/lib/ferret/search/term_query.rb +0 -128
- data/lib/ferret/search/term_scorer.rb +0 -183
- data/lib/ferret/search/top_docs.rb +0 -36
- data/lib/ferret/search/top_field_docs.rb +0 -17
- data/lib/ferret/search/weight.rb +0 -54
- data/lib/ferret/search/wildcard_query.rb +0 -26
- data/lib/ferret/search/wildcard_term_enum.rb +0 -61
- data/lib/ferret/stemmers.rb +0 -1
- data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
- data/lib/ferret/store.rb +0 -5
- data/lib/ferret/store/buffered_index_io.rb +0 -190
- data/lib/ferret/store/directory.rb +0 -141
- data/lib/ferret/store/fs_store.rb +0 -381
- data/lib/ferret/store/index_io.rb +0 -245
- data/lib/ferret/store/ram_store.rb +0 -286
- data/lib/ferret/utils.rb +0 -8
- data/lib/ferret/utils/bit_vector.rb +0 -123
- data/lib/ferret/utils/date_tools.rb +0 -138
- data/lib/ferret/utils/number_tools.rb +0 -91
- data/lib/ferret/utils/parameter.rb +0 -41
- data/lib/ferret/utils/priority_queue.rb +0 -120
- data/lib/ferret/utils/string_helper.rb +0 -47
- data/lib/ferret/utils/thread_local.rb +0 -28
- data/lib/ferret/utils/weak_key_hash.rb +0 -60
- data/lib/rferret.rb +0 -37
- data/rake_utils/code_statistics.rb +0 -106
- data/test/benchmark/tb_ram_store.rb +0 -76
- data/test/benchmark/tb_rw_vint.rb +0 -26
- data/test/functional/thread_safety_index_test.rb +0 -81
- data/test/functional/thread_safety_test.rb +0 -137
- data/test/longrunning/tc_numbertools.rb +0 -60
- data/test/longrunning/tm_store.rb +0 -19
- data/test/unit/analysis/ctc_analyzer.rb +0 -532
- data/test/unit/analysis/data/wordfile +0 -6
- data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
- data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
- data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
- data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
- data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
- data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
- data/test/unit/analysis/rtc_stop_filter.rb +0 -14
- data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
- data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
- data/test/unit/analysis/tc_token.rb +0 -25
- data/test/unit/document/rtc_field.rb +0 -28
- data/test/unit/document/tc_document.rb +0 -47
- data/test/unit/document/tc_field.rb +0 -98
- data/test/unit/index/rtc_compound_file_io.rb +0 -107
- data/test/unit/index/rtc_field_infos.rb +0 -127
- data/test/unit/index/rtc_fields_io.rb +0 -167
- data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
- data/test/unit/index/rtc_segment_infos.rb +0 -74
- data/test/unit/index/rtc_segment_term_docs.rb +0 -17
- data/test/unit/index/rtc_segment_term_enum.rb +0 -60
- data/test/unit/index/rtc_segment_term_vector.rb +0 -71
- data/test/unit/index/rtc_term_buffer.rb +0 -57
- data/test/unit/index/rtc_term_info.rb +0 -19
- data/test/unit/index/rtc_term_infos_io.rb +0 -192
- data/test/unit/index/rtc_term_vectors_io.rb +0 -108
- data/test/unit/index/tc_term.rb +0 -27
- data/test/unit/index/tc_term_voi.rb +0 -18
- data/test/unit/search/rtc_similarity.rb +0 -37
- data/test/unit/search/rtc_sort_field.rb +0 -14
- data/test/unit/search/tc_multi_searcher2.rb +0 -126
- data/test/unit/store/rtc_fs_store.rb +0 -62
- data/test/unit/store/rtc_ram_store.rb +0 -15
- data/test/unit/store/rtm_store.rb +0 -150
- data/test/unit/store/rtm_store_lock.rb +0 -2
- data/test/unit/ts_document.rb +0 -2
- data/test/unit/utils/rtc_bit_vector.rb +0 -73
- data/test/unit/utils/rtc_date_tools.rb +0 -50
- data/test/unit/utils/rtc_number_tools.rb +0 -59
- data/test/unit/utils/rtc_parameter.rb +0 -40
- data/test/unit/utils/rtc_priority_queue.rb +0 -62
- data/test/unit/utils/rtc_string_helper.rb +0 -21
- data/test/unit/utils/rtc_thread.rb +0 -61
- data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
- data/test/utils/number_to_spoken.rb +0 -132
@@ -1,5 +1,27 @@
|
|
1
1
|
require File.dirname(__FILE__) + "/../../test_helper"
|
2
2
|
|
3
|
+
class TokenTest < Test::Unit::TestCase
|
4
|
+
include Ferret::Analysis
|
5
|
+
def test_token
|
6
|
+
t = Token.new("text", 1, 2, 3)
|
7
|
+
assert_equal("text", t.text)
|
8
|
+
assert_equal(1, t.start)
|
9
|
+
assert_equal(2, t.end)
|
10
|
+
assert_equal(3, t.pos_inc)
|
11
|
+
t.text = "yada yada yada"
|
12
|
+
t.start = 11
|
13
|
+
t.end = 12
|
14
|
+
t.pos_inc = 13
|
15
|
+
assert_equal("yada yada yada", t.text)
|
16
|
+
assert_equal(11, t.start)
|
17
|
+
assert_equal(12, t.end)
|
18
|
+
assert_equal(13, t.pos_inc)
|
19
|
+
|
20
|
+
t = Token.new("text", 1, 2)
|
21
|
+
assert_equal(1, t.pos_inc)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
3
25
|
class AsciiLetterTokenizerTest < Test::Unit::TestCase
|
4
26
|
include Ferret::Analysis
|
5
27
|
|
@@ -454,6 +476,7 @@ class CustomTokenizerTest < Test::Unit::TestCase
|
|
454
476
|
assert(! t.next())
|
455
477
|
t = AsciiLowerCaseFilter.new(MyCSVTokenizer.new(input))
|
456
478
|
assert_equal(Token.new("first field", 0, 11), t.next)
|
479
|
+
return
|
457
480
|
assert_equal(Token.new("2nd field", 12, 21), t.next)
|
458
481
|
assert_equal(Token.new(" p a d d e d f i e l d ", 22, 48), t.next)
|
459
482
|
assert(! t.next())
|
data/test/unit/index/tc_index.rb
CHANGED
@@ -5,7 +5,6 @@ class IndexTest < Test::Unit::TestCase
|
|
5
5
|
include Ferret::Search
|
6
6
|
include Ferret::Analysis
|
7
7
|
include Ferret::Store
|
8
|
-
include Ferret::Document
|
9
8
|
|
10
9
|
def setup()
|
11
10
|
end
|
@@ -26,7 +25,7 @@ class IndexTest < Test::Unit::TestCase
|
|
26
25
|
end
|
27
26
|
|
28
27
|
def do_test_index_with_array(index)
|
29
|
-
|
28
|
+
[
|
30
29
|
["one two"],
|
31
30
|
["one", "three"],
|
32
31
|
["two"],
|
@@ -35,8 +34,7 @@ class IndexTest < Test::Unit::TestCase
|
|
35
34
|
["two", "three", "four"],
|
36
35
|
["one"],
|
37
36
|
["two", "three", "four", "five"]
|
38
|
-
]
|
39
|
-
data.each {|doc| index << doc }
|
37
|
+
].each {|doc| index << doc }
|
40
38
|
assert_equal(8, index.size)
|
41
39
|
q = "one"
|
42
40
|
check_results(index, q, [0, 1, 3, 4, 6])
|
@@ -44,19 +42,19 @@ class IndexTest < Test::Unit::TestCase
|
|
44
42
|
check_results(index, q, [0, 4])
|
45
43
|
q = "one OR five"
|
46
44
|
check_results(index, q, [0, 1, 3, 4, 6, 7])
|
47
|
-
assert_equal(
|
45
|
+
assert_equal(%w{two three four five}, index.doc(7)[:xxx])
|
48
46
|
end
|
49
47
|
|
50
48
|
def do_test_index_with_hash(index)
|
51
49
|
data = [
|
52
|
-
{
|
53
|
-
{
|
54
|
-
{
|
55
|
-
{
|
56
|
-
{
|
57
|
-
{
|
58
|
-
{
|
59
|
-
{
|
50
|
+
{:xxx => "one two"},
|
51
|
+
{:xxx => "one", :field2 => "three"},
|
52
|
+
{:xxx => "two"},
|
53
|
+
{:xxx => "one", :field2 => "four"},
|
54
|
+
{:xxx => "one two"},
|
55
|
+
{:xxx => "two", :field2 => "three", :field3 => "four"},
|
56
|
+
{:xxx => "one"},
|
57
|
+
{:xxx => "two", :field2 => "three", :field3 => "five"}
|
60
58
|
]
|
61
59
|
data.each {|doc| index << doc }
|
62
60
|
q = "one AND two"
|
@@ -71,19 +69,19 @@ class IndexTest < Test::Unit::TestCase
|
|
71
69
|
q = "two AND field3:f*"
|
72
70
|
check_results(index, q, [5, 7])
|
73
71
|
assert_equal("five", index.doc(7)["field3"])
|
74
|
-
assert_equal("two", index.doc(7)[
|
72
|
+
assert_equal("two", index.doc(7)[:xxx])
|
75
73
|
end
|
76
74
|
|
77
75
|
def do_test_index_with_doc_array(index)
|
78
76
|
data = [
|
79
|
-
{
|
80
|
-
{
|
81
|
-
{
|
82
|
-
{
|
83
|
-
{
|
84
|
-
{
|
85
|
-
{
|
86
|
-
{
|
77
|
+
{:xxx => "one two multi", :id => "myid"},
|
78
|
+
{:xxx => "one", :field2 => "three multi"},
|
79
|
+
{:xxx => "two"},
|
80
|
+
{:xxx => "one", :field2 => "four"},
|
81
|
+
{:xxx => "one two"},
|
82
|
+
{:xxx => "two", :field2 => "three", :field3 => "four"},
|
83
|
+
{:xxx => "one multi2", :id => "hello"},
|
84
|
+
{:xxx => "two", :field2 => "this three multi2", :field3 => "five multi"}
|
87
85
|
]
|
88
86
|
data.each {|doc| index << doc }
|
89
87
|
q = "one AND two"
|
@@ -109,36 +107,37 @@ class IndexTest < Test::Unit::TestCase
|
|
109
107
|
assert_equal(7, index.size)
|
110
108
|
q = "two AND (field3:f*)"
|
111
109
|
check_results(index, q, [7])
|
112
|
-
|
110
|
+
|
111
|
+
doc.load
|
112
|
+
doc[:field2] = "dave"
|
113
113
|
index << doc
|
114
|
-
check_results(index, q, [
|
114
|
+
check_results(index, q, [7, 8])
|
115
115
|
check_results(index, "*:this", [])
|
116
116
|
assert_equal(8, index.size)
|
117
|
-
assert_equal("dave", index[
|
117
|
+
assert_equal("dave", index[8][:field2])
|
118
118
|
index.optimize
|
119
119
|
check_results(index, q, [6, 7])
|
120
|
-
|
121
|
-
index.
|
120
|
+
assert_equal("dave", index[7][:field2])
|
121
|
+
index.query_delete("field2:three")
|
122
122
|
assert(index.deleted?(1))
|
123
123
|
assert(index.deleted?(6))
|
124
124
|
assert(! index.deleted?(7))
|
125
|
-
|
126
|
-
assert_equal("one", index[
|
127
|
-
assert_equal("one two multi", index["myid"]["def_field"])
|
125
|
+
assert_equal("one multi2", index["hello"][:xxx])
|
126
|
+
assert_equal("one two multi", index["myid"][:xxx])
|
128
127
|
index.delete("myid")
|
129
128
|
assert(index.deleted?(0))
|
130
129
|
end
|
131
130
|
|
132
131
|
def test_ram_index
|
133
|
-
index = Index.new(:
|
132
|
+
index = Index.new(:default_input_field => :xxx)
|
134
133
|
do_test_index_with_array(index)
|
135
134
|
index.close
|
136
135
|
|
137
|
-
index = Index.new(:default_field =>
|
136
|
+
index = Index.new(:default_field => :xxx)
|
138
137
|
do_test_index_with_hash(index)
|
139
138
|
index.close
|
140
139
|
|
141
|
-
index = Index.new(:default_field =>
|
140
|
+
index = Index.new(:default_field => :xxx, :id_field => "id")
|
142
141
|
do_test_index_with_doc_array(index)
|
143
142
|
index.close
|
144
143
|
end
|
@@ -147,23 +146,24 @@ class IndexTest < Test::Unit::TestCase
|
|
147
146
|
fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
|
148
147
|
|
149
148
|
Dir[File.join(fs_path, "*")].each {|path| begin File.delete(path) rescue nil end}
|
150
|
-
assert_raise(
|
149
|
+
assert_raise(IOError) do
|
151
150
|
Index.new(:path => fs_path,
|
152
151
|
:create_if_missing => false,
|
153
|
-
:default_field =>
|
152
|
+
:default_field => :xxx)
|
154
153
|
end
|
155
|
-
|
154
|
+
|
155
|
+
index = Index.new(:path => fs_path, :default_input_field => :xxx)
|
156
156
|
do_test_index_with_array(index)
|
157
157
|
index.close
|
158
158
|
|
159
159
|
Dir[File.join(fs_path, "*")].each {|path| begin File.delete(path) rescue nil end}
|
160
|
-
index = Index.new(:path => fs_path, :default_field =>
|
160
|
+
index = Index.new(:path => fs_path, :default_field => :xxx)
|
161
161
|
do_test_index_with_hash(index)
|
162
162
|
index.close
|
163
163
|
|
164
164
|
Dir[File.join(fs_path, "*")].each {|path| begin File.delete(path) rescue nil end}
|
165
165
|
index = Index.new(:path => fs_path,
|
166
|
-
:default_field =>
|
166
|
+
:default_field => :xxx,
|
167
167
|
:id_field => "id")
|
168
168
|
do_test_index_with_doc_array(index)
|
169
169
|
index.close
|
@@ -171,20 +171,18 @@ class IndexTest < Test::Unit::TestCase
|
|
171
171
|
|
172
172
|
def test_fs_index_is_persistant
|
173
173
|
fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
{
|
178
|
-
{
|
179
|
-
{
|
180
|
-
{
|
181
|
-
{
|
182
|
-
{
|
183
|
-
{
|
184
|
-
{
|
185
|
-
]
|
186
|
-
index = Index.new(:path => fs_path, :default_field => "def_field")
|
187
|
-
data.each {|doc| index << doc }
|
174
|
+
index = Index.new(:path => fs_path, :default_field => :xxx, :create => true)
|
175
|
+
|
176
|
+
[
|
177
|
+
{:xxx => "one two", :id => "me"},
|
178
|
+
{:xxx => "one", :field2 => "three"},
|
179
|
+
{:xxx => "two"},
|
180
|
+
{:xxx => "one", :field2 => "four"},
|
181
|
+
{:xxx => "one two"},
|
182
|
+
{:xxx => "two", :field2 => "three", :field3 => "four"},
|
183
|
+
{:xxx => "one"},
|
184
|
+
{:xxx => "two", :field2 => "three", :field3 => "five"}
|
185
|
+
].each {|doc| index << doc }
|
188
186
|
assert_equal(8, index.size)
|
189
187
|
index.close
|
190
188
|
|
@@ -197,174 +195,171 @@ class IndexTest < Test::Unit::TestCase
|
|
197
195
|
def test_key_used_for_id_field
|
198
196
|
fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
|
199
197
|
|
200
|
-
|
201
|
-
|
202
|
-
{:my_id => "
|
198
|
+
index = Index.new(:path => fs_path, :key => :my_id, :create => true)
|
199
|
+
[
|
200
|
+
{:my_id => "three", :id => "me"},
|
203
201
|
{:my_id => "one", :field2 => "three"},
|
204
202
|
{:my_id => "two"},
|
205
203
|
{:my_id => "one", :field2 => "four"},
|
206
|
-
{:my_id => "
|
207
|
-
{:my_id => "two", :field2 => "three",
|
204
|
+
{:my_id => "three"},
|
205
|
+
{:my_id => "two", :field2 => "three", :field3 => "four"},
|
208
206
|
{:my_id => "one"},
|
209
|
-
{:my_id => "two", :field2 => "three",
|
210
|
-
]
|
211
|
-
index
|
212
|
-
data.each {|doc| index << doc }
|
207
|
+
{:my_id => "two", :field2 => "three", :field3 => "five"}
|
208
|
+
].each {|doc| index << doc }
|
209
|
+
index.optimize
|
213
210
|
assert_equal(3, index.size)
|
214
|
-
assert_equal("three", index[
|
211
|
+
assert_equal("three", index["two"][:field2])
|
215
212
|
index.close
|
216
213
|
end
|
217
214
|
|
218
215
|
def test_merging_indexes
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
{
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
{
|
235
|
-
{
|
236
|
-
{
|
237
|
-
]
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
index = Index.new(:default_field => "f")
|
216
|
+
index1 = Index.new(:default_field => :f)
|
217
|
+
index2 = Index.new(:default_field => :f)
|
218
|
+
index3 = Index.new(:default_field => :f)
|
219
|
+
|
220
|
+
[
|
221
|
+
{:f => "zero"},
|
222
|
+
{:f => "one"},
|
223
|
+
{:f => "two"}
|
224
|
+
].each {|doc| index1 << doc }
|
225
|
+
[
|
226
|
+
{:f => "three"},
|
227
|
+
{:f => "four"},
|
228
|
+
{:f => "five"}
|
229
|
+
].each {|doc| index2 << doc }
|
230
|
+
[
|
231
|
+
{:f => "six"},
|
232
|
+
{:f => "seven"},
|
233
|
+
{:f => "eight"}
|
234
|
+
].each {|doc| index3 << doc }
|
235
|
+
|
236
|
+
index = Index.new(:default_field => :f)
|
242
237
|
index.add_indexes(index1)
|
243
238
|
assert_equal(3, index.size)
|
244
|
-
assert_equal("zero", index[0][
|
239
|
+
assert_equal("zero", index[0][:f])
|
245
240
|
index.add_indexes([index2, index3])
|
246
241
|
assert_equal(9, index.size)
|
247
|
-
assert_equal("zero", index[0][
|
248
|
-
assert_equal("eight", index[8][
|
242
|
+
assert_equal("zero", index[0][:f])
|
243
|
+
assert_equal("eight", index[8][:f])
|
249
244
|
index1.close
|
250
245
|
index2.close
|
251
246
|
index3.close
|
252
|
-
assert_equal("seven", index[7][
|
247
|
+
assert_equal("seven", index[7][:f])
|
253
248
|
data = [
|
254
|
-
{
|
255
|
-
{
|
256
|
-
{
|
249
|
+
{:f => "alpha"},
|
250
|
+
{:f => "beta"},
|
251
|
+
{:f => "charlie"}
|
257
252
|
]
|
258
253
|
dir1 = RAMDirectory.new
|
259
|
-
index1 = Index.new(:dir => dir1, :default_field =>
|
254
|
+
index1 = Index.new(:dir => dir1, :default_field => :f)
|
260
255
|
data.each {|doc| index1 << doc }
|
261
256
|
index1.flush
|
262
257
|
data = [
|
263
|
-
{
|
264
|
-
{
|
265
|
-
{
|
258
|
+
{:f => "delta"},
|
259
|
+
{:f => "echo"},
|
260
|
+
{:f => "foxtrot"}
|
266
261
|
]
|
267
262
|
dir2 = RAMDirectory.new
|
268
|
-
index2 = Index.new(:dir => dir2, :default_field =>
|
263
|
+
index2 = Index.new(:dir => dir2, :default_field => :f)
|
269
264
|
data.each {|doc| index2 << doc }
|
270
265
|
index2.flush
|
271
266
|
data = [
|
272
|
-
{
|
273
|
-
{
|
274
|
-
{
|
267
|
+
{:f => "golf"},
|
268
|
+
{:f => "india"},
|
269
|
+
{:f => "juliet"}
|
275
270
|
]
|
276
271
|
dir3 = RAMDirectory.new
|
277
|
-
index3 = Index.new(:dir => dir3, :default_field =>
|
272
|
+
index3 = Index.new(:dir => dir3, :default_field => :f)
|
278
273
|
data.each {|doc| index3 << doc }
|
279
274
|
index3.flush
|
280
275
|
|
281
276
|
index.add_indexes(dir1)
|
282
277
|
assert_equal(12, index.size)
|
283
|
-
assert_equal("alpha", index[9][
|
278
|
+
assert_equal("alpha", index[9][:f])
|
284
279
|
index.add_indexes([dir2, dir3])
|
285
280
|
assert_equal(18, index.size)
|
286
|
-
assert_equal("juliet", index[17][
|
281
|
+
assert_equal("juliet", index[17][:f])
|
287
282
|
index1.close
|
288
283
|
dir1.close
|
289
284
|
index2.close
|
290
285
|
dir2.close
|
291
286
|
index3.close
|
292
287
|
dir3.close
|
293
|
-
assert_equal("golf", index[15][
|
288
|
+
assert_equal("golf", index[15][:f])
|
294
289
|
index.close
|
295
290
|
end
|
296
291
|
|
297
292
|
def test_persist_index
|
298
293
|
data = [
|
299
|
-
{
|
300
|
-
{
|
301
|
-
{
|
294
|
+
{:f => "zero"},
|
295
|
+
{:f => "one"},
|
296
|
+
{:f => "two"}
|
302
297
|
]
|
303
|
-
index = Index.new(:default_field =>
|
298
|
+
index = Index.new(:default_field => :f)
|
304
299
|
data.each {|doc| index << doc }
|
305
300
|
fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
|
306
301
|
|
307
302
|
index.persist(fs_path, true)
|
308
303
|
assert_equal(3, index.size)
|
309
|
-
assert_equal("zero", index[0][
|
304
|
+
assert_equal("zero", index[0][:f])
|
310
305
|
index.close
|
311
306
|
|
312
307
|
index = Index.new(:path => fs_path)
|
313
308
|
assert_equal(3, index.size)
|
314
|
-
assert_equal("zero", index[0][
|
309
|
+
assert_equal("zero", index[0][:f])
|
315
310
|
index.close
|
316
311
|
|
317
312
|
|
318
313
|
data = [
|
319
|
-
{
|
320
|
-
{
|
321
|
-
{
|
314
|
+
{:f => "romeo"},
|
315
|
+
{:f => "sierra"},
|
316
|
+
{:f => "tango"}
|
322
317
|
]
|
323
|
-
index = Index.new(:default_field =>
|
318
|
+
index = Index.new(:default_field => :f)
|
324
319
|
data.each {|doc| index << doc }
|
325
320
|
assert_equal(3, index.size)
|
326
|
-
assert_equal("romeo", index[0][
|
321
|
+
assert_equal("romeo", index[0][:f])
|
327
322
|
dir = FSDirectory.new(fs_path, false)
|
328
323
|
index.persist(dir)
|
329
324
|
assert_equal(6, index.size)
|
330
|
-
assert_equal("zero", index[0][
|
331
|
-
assert_equal("romeo", index[3][
|
325
|
+
assert_equal("zero", index[0][:f])
|
326
|
+
assert_equal("romeo", index[3][:f])
|
332
327
|
index.close
|
333
328
|
|
334
329
|
index = Index.new(:path => fs_path)
|
335
330
|
assert_equal(6, index.size)
|
336
|
-
assert_equal("zero", index[0][
|
337
|
-
assert_equal("romeo", index[3][
|
331
|
+
assert_equal("zero", index[0][:f])
|
332
|
+
assert_equal("romeo", index[3][:f])
|
338
333
|
index.close
|
339
334
|
end
|
340
335
|
|
341
336
|
def test_auto_update_when_externally_modified()
|
342
337
|
fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
|
343
|
-
index = Index.new(:path => fs_path, :default_field =>
|
338
|
+
index = Index.new(:path => fs_path, :default_field => :f, :create => true)
|
344
339
|
index << "document 1"
|
345
340
|
assert_equal(1, index.size)
|
346
341
|
|
347
|
-
index2 = Index.new(:path => fs_path, :default_field =>
|
342
|
+
index2 = Index.new(:path => fs_path, :default_field => :f)
|
348
343
|
assert_equal(1, index2.size)
|
349
344
|
index2 << "document 2"
|
350
345
|
assert_equal(2, index2.size)
|
351
346
|
assert_equal(2, index.size)
|
352
347
|
top_docs = index.search("content3")
|
353
|
-
assert_equal(0, top_docs.size)
|
348
|
+
assert_equal(0, top_docs.hits.size)
|
354
349
|
|
355
|
-
iw = IndexWriter.new(fs_path, :analyzer => WhiteSpaceAnalyzer.new())
|
356
|
-
|
357
|
-
doc << Field.new("f", "content3", Field::Store::YES, Field::Index::TOKENIZED)
|
358
|
-
iw << doc
|
350
|
+
iw = IndexWriter.new(:path => fs_path, :analyzer => WhiteSpaceAnalyzer.new())
|
351
|
+
iw << {:f, "content3"}
|
359
352
|
iw.close()
|
353
|
+
|
360
354
|
top_docs = index.search("content3")
|
361
|
-
assert_equal(1, top_docs.size)
|
355
|
+
assert_equal(1, top_docs.hits.size)
|
362
356
|
assert_equal(3, index.size)
|
363
|
-
assert_equal("content3", index[2][
|
357
|
+
assert_equal("content3", index[2][:f])
|
364
358
|
index.close
|
365
359
|
end
|
366
360
|
|
367
361
|
def test_delete
|
362
|
+
index = Index.new(:analyzer => WhiteSpaceAnalyzer.new)
|
368
363
|
data = [
|
369
364
|
{:id => 0, :cat => "/cat1/subcat1"},
|
370
365
|
{:id => 1, :cat => "/cat1/subcat2"},
|
@@ -376,26 +371,27 @@ class IndexTest < Test::Unit::TestCase
|
|
376
371
|
{:id => 7, :cat => "/cat2/subcat3"},
|
377
372
|
{:id => 8, :cat => "/cat2/subcat4"},
|
378
373
|
{:id => 9, :cat => "/cat2/subcat5"},
|
379
|
-
]
|
380
|
-
index = Index.new(:analyzer => WhiteSpaceAnalyzer.new)
|
381
|
-
data.each {|doc| index << doc }
|
374
|
+
].each {|doc| index << doc }
|
382
375
|
assert_equal(10, index.size)
|
383
|
-
assert_equal(1, index.search("id:9").
|
376
|
+
assert_equal(1, index.search("id:9").total_hits)
|
384
377
|
index.delete(9)
|
385
378
|
assert_equal(9, index.size)
|
386
|
-
assert_equal(0, index.search("id:9").
|
387
|
-
assert_equal(1, index.search("id:8").
|
379
|
+
assert_equal(0, index.search("id:9").total_hits)
|
380
|
+
assert_equal(1, index.search("id:8").total_hits)
|
388
381
|
index.delete("8")
|
389
382
|
assert_equal(8, index.size)
|
390
|
-
assert_equal(0, index.search("id:8").
|
391
|
-
assert_equal(5, index.search("cat:/cat1*").
|
383
|
+
assert_equal(0, index.search("id:8").total_hits)
|
384
|
+
assert_equal(5, index.search("cat:/cat1*").total_hits)
|
392
385
|
index.query_delete("cat:/cat1*")
|
393
386
|
assert_equal(3, index.size)
|
394
|
-
assert_equal(0, index.search("cat:/cat1*").
|
387
|
+
assert_equal(0, index.search("cat:/cat1*").total_hits)
|
395
388
|
index.close
|
396
389
|
end
|
397
390
|
|
398
391
|
def test_update
|
392
|
+
index = Index.new(:analyzer => WhiteSpaceAnalyzer.new,
|
393
|
+
:default_input_field => :content,
|
394
|
+
:id_field => :id)
|
399
395
|
data = [
|
400
396
|
{:id => 0, :cat => "/cat1/subcat1", :content => "content0"},
|
401
397
|
{:id => 1, :cat => "/cat1/subcat2", :content => "content1"},
|
@@ -407,28 +403,25 @@ class IndexTest < Test::Unit::TestCase
|
|
407
403
|
{:id => 7, :cat => "/cat2/subcat3", :content => "content7"},
|
408
404
|
{:id => 8, :cat => "/cat2/subcat4", :content => "content8"},
|
409
405
|
{:id => 9, :cat => "/cat2/subcat5", :content => "content9"},
|
410
|
-
]
|
411
|
-
index = Index.new(:analyzer => WhiteSpaceAnalyzer.new,
|
412
|
-
:default_field => :content,
|
413
|
-
:id_field => :id)
|
414
|
-
data.each { |doc| index << doc }
|
406
|
+
].each { |doc| index << doc }
|
415
407
|
assert_equal(10, index.size)
|
416
408
|
assert_equal("content5", index["5"][:content])
|
417
|
-
index.
|
409
|
+
index.query_update("id:5", {:content => "content five"})
|
418
410
|
assert_equal("content five", index["5"][:content])
|
419
411
|
assert_equal(nil, index["5"][:extra_content])
|
420
|
-
index.update("5", {:
|
412
|
+
index.update("5", {:id => "5",
|
413
|
+
:cat => "/cat1/subcat6",
|
421
414
|
:content => "high five",
|
422
415
|
:extra_content => "hello"})
|
423
416
|
assert_equal("hello", index["5"][:extra_content])
|
424
417
|
assert_equal("high five", index["5"][:content])
|
425
418
|
assert_equal("/cat1/subcat6", index["5"][:cat])
|
426
419
|
assert_equal("content9", index["9"][:content])
|
427
|
-
index.
|
420
|
+
index.query_update("content:content9", {:content => "content nine"})
|
428
421
|
assert_equal("content nine", index["9"][:content])
|
429
422
|
assert_equal("content0", index["0"][:content])
|
430
423
|
assert_equal(nil, index["0"][:extra_content])
|
431
|
-
document = index[0]
|
424
|
+
document = index[0].load
|
432
425
|
document[:content] = "content zero"
|
433
426
|
document[:extra_content] = "extra content"
|
434
427
|
index.update(0, document)
|
@@ -443,7 +436,7 @@ class IndexTest < Test::Unit::TestCase
|
|
443
436
|
assert_equal("cool", index["2"][:tag])
|
444
437
|
assert_equal("cool", index["3"][:tag])
|
445
438
|
assert_equal("cool", index["4"][:tag])
|
446
|
-
assert_equal(4, index.search("tag:cool").
|
439
|
+
assert_equal(4, index.search("tag:cool").total_hits)
|
447
440
|
index.close
|
448
441
|
end
|
449
442
|
|
@@ -458,12 +451,14 @@ class IndexTest < Test::Unit::TestCase
|
|
458
451
|
:key => :id)
|
459
452
|
data.each { |doc| index << doc }
|
460
453
|
assert_equal(2, index.size)
|
461
|
-
assert_equal("two", index[0][:val])
|
462
|
-
assert_equal("four", index[1][:val])
|
454
|
+
assert_equal("two", index["0"][:val])
|
455
|
+
assert_equal("four", index["1"][:val])
|
463
456
|
index.close
|
464
457
|
end
|
465
458
|
|
466
459
|
def test_index_multi_key
|
460
|
+
index = Index.new(:analyzer => WhiteSpaceAnalyzer.new,
|
461
|
+
:key => [:id, :table])
|
467
462
|
data = [
|
468
463
|
{:id => 0, :table => "product", :product => "tent"},
|
469
464
|
{:id => 0, :table => "location", :location => "first floor"},
|
@@ -474,10 +469,8 @@ class IndexTest < Test::Unit::TestCase
|
|
474
469
|
{:id => 1, :table => "location", :location => "first floor"},
|
475
470
|
{:id => 1, :table => "product", :product => "rucksack"},
|
476
471
|
{:id => 1, :table => "product", :product => "backpack"}
|
477
|
-
]
|
478
|
-
index
|
479
|
-
:key => [:id, :table])
|
480
|
-
data.each { |doc| index << doc }
|
472
|
+
].each { |doc| index << doc }
|
473
|
+
index.optimize
|
481
474
|
assert_equal(4, index.size)
|
482
475
|
assert_equal("super tent", index[0][:product])
|
483
476
|
assert_equal("second floor", index[1][:location])
|
@@ -487,6 +480,13 @@ class IndexTest < Test::Unit::TestCase
|
|
487
480
|
end
|
488
481
|
|
489
482
|
def test_index_multi_key_untokenized
|
483
|
+
field_infos = FieldInfos.new(:term_vector => :no)
|
484
|
+
field_infos.add_field(:id, :index => :untokenized)
|
485
|
+
field_infos.add_field(:table, :index => :untokenized)
|
486
|
+
|
487
|
+
index = Index.new(:analyzer => Analyzer.new,
|
488
|
+
:key => [:id, :table],
|
489
|
+
:field_infos => field_infos)
|
490
490
|
data = [
|
491
491
|
{:id => 0, :table => "Product", :product => "tent"},
|
492
492
|
{:id => 0, :table => "location", :location => "first floor"},
|
@@ -497,21 +497,10 @@ class IndexTest < Test::Unit::TestCase
|
|
497
497
|
{:id => 1, :table => "location", :location => "first floor"},
|
498
498
|
{:id => 1, :table => "Product", :product => "rucksack"},
|
499
499
|
{:id => 1, :table => "Product", :product => "backpack"}
|
500
|
-
]
|
501
|
-
|
502
|
-
:key => [:id, :table])
|
503
|
-
data.each do |dat|
|
504
|
-
doc = Document.new
|
505
|
-
dat.each_pair do |key, value|
|
506
|
-
if ([:id, :table].include?(key))
|
507
|
-
doc << Field.new(key, value, Field::Store::YES, Field::Index::UNTOKENIZED)
|
508
|
-
else
|
509
|
-
doc << Field.new(key, value, Field::Store::YES, Field::Index::TOKENIZED)
|
510
|
-
end
|
511
|
-
end
|
512
|
-
index << doc
|
513
|
-
end
|
500
|
+
].each {|doc| index << doc}
|
501
|
+
|
514
502
|
assert_equal(4, index.size)
|
503
|
+
index.optimize
|
515
504
|
assert_equal("super tent", index[0][:product])
|
516
505
|
assert_equal("second floor", index[1][:location])
|
517
506
|
assert_equal("backpack", index[3][:product])
|
@@ -520,6 +509,8 @@ class IndexTest < Test::Unit::TestCase
|
|
520
509
|
end
|
521
510
|
|
522
511
|
def test_sortby_date
|
512
|
+
index = Index.new(:analyzer => WhiteSpaceAnalyzer.new)
|
513
|
+
|
523
514
|
data = [
|
524
515
|
{:content => "one", :date => "20051023"},
|
525
516
|
{:content => "two", :date => "19530315"},
|
@@ -530,37 +521,31 @@ class IndexTest < Test::Unit::TestCase
|
|
530
521
|
{:content => "one", :date => "19770725"},
|
531
522
|
{:content => "two", :date => "19751226"},
|
532
523
|
{:content => "four", :date => "19390912"}
|
533
|
-
]
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
doc.each_pair do |key, value|
|
538
|
-
document << Field.new(key.to_s, value, Field::Store::YES, Field::Index::TOKENIZED)
|
539
|
-
end
|
540
|
-
index << document
|
541
|
-
}
|
542
|
-
sf_date = SortField.new("date", {:sort_type => SortField::SortType::INTEGER})
|
543
|
-
#top_docs = index.search("one", :sort => [sf_date, SortField::FIELD_SCORE])
|
524
|
+
].each {|doc| index << doc}
|
525
|
+
|
526
|
+
sf_date = SortField.new("date", {:sort_type => :integer})
|
527
|
+
#top_docs = index.search("one", :sort => [sf_date, SortField::SCORE])
|
544
528
|
top_docs = index.search("one", :sort => Sort.new("date"))
|
545
|
-
assert_equal(3, top_docs.
|
546
|
-
assert_equal("19770725", index[top_docs.
|
547
|
-
assert_equal("19770905", index[top_docs.
|
548
|
-
assert_equal("20051023", index[top_docs.
|
529
|
+
assert_equal(3, top_docs.total_hits)
|
530
|
+
assert_equal("19770725", index[top_docs.hits[0].doc][:date])
|
531
|
+
assert_equal("19770905", index[top_docs.hits[1].doc][:date])
|
532
|
+
assert_equal("20051023", index[top_docs.hits[2].doc][:date])
|
549
533
|
top_docs = index.search("one two three four",
|
550
|
-
:sort => [sf_date, SortField::
|
551
|
-
|
552
|
-
assert_equal("
|
553
|
-
assert_equal("
|
554
|
-
assert_equal("
|
555
|
-
assert_equal("
|
534
|
+
:sort => [sf_date, SortField::SCORE])
|
535
|
+
return
|
536
|
+
assert_equal("19390912", index[top_docs.hits[0].doc][:date])
|
537
|
+
assert_equal("three four", index[top_docs.hits[0].doc][:content])
|
538
|
+
assert_equal("19390912", index[top_docs.hits[1].doc][:date])
|
539
|
+
assert_equal("four", index[top_docs.hits[1].doc][:content])
|
540
|
+
assert_equal("19530315", index[top_docs.hits[2].doc][:date])
|
556
541
|
|
557
542
|
top_docs = index.search("one two three four",
|
558
543
|
:sort => [:date, :content])
|
559
|
-
assert_equal("19390912", index[top_docs.
|
560
|
-
assert_equal("four", index[top_docs.
|
561
|
-
assert_equal("19390912", index[top_docs.
|
562
|
-
assert_equal("three four", index[top_docs.
|
563
|
-
assert_equal("19530315", index[top_docs.
|
544
|
+
assert_equal("19390912", index[top_docs.hits[0].doc][:date])
|
545
|
+
assert_equal("four", index[top_docs.hits[0].doc][:content])
|
546
|
+
assert_equal("19390912", index[top_docs.hits[1].doc][:date])
|
547
|
+
assert_equal("three four", index[top_docs.hits[1].doc][:content])
|
548
|
+
assert_equal("19530315", index[top_docs.hits[2].doc][:date])
|
564
549
|
|
565
550
|
index.close
|
566
551
|
end
|
@@ -568,6 +553,7 @@ class IndexTest < Test::Unit::TestCase
|
|
568
553
|
def test_auto_flush
|
569
554
|
fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
|
570
555
|
Dir[File.join(fs_path, "*")].each {|path| begin File.delete(path) rescue nil end}
|
556
|
+
|
571
557
|
data = %q(one two three four five six seven eight nine ten eleven twelve)
|
572
558
|
index1 = Index.new(:path => fs_path, :auto_flush => true)
|
573
559
|
index2 = Index.new(:path => fs_path, :auto_flush => true)
|
@@ -595,59 +581,16 @@ class IndexTest < Test::Unit::TestCase
|
|
595
581
|
assert_equal(1, index.size)
|
596
582
|
end
|
597
583
|
|
598
|
-
|
599
584
|
def test_adding_empty_term_vectors
|
600
|
-
index = Index.new()
|
601
|
-
doc = Document.new
|
585
|
+
index = Index.new(:field_infos => FieldInfos.new(:term_vector => :no))
|
602
586
|
|
603
587
|
# Note: Adding keywords to either field1 or field2 gets rid of the error
|
604
588
|
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
Field::TermVector::YES)
|
609
|
-
|
610
|
-
doc << Field.new('field2', '',
|
611
|
-
Field::Store::NO,
|
612
|
-
Field::Index::TOKENIZED,
|
613
|
-
Field::TermVector::YES)
|
614
|
-
|
615
|
-
# Note: keywords in this un-term-vector-stored field don't help the situation
|
616
|
-
|
617
|
-
doc << Field.new('field3', 'foo bar baz',
|
618
|
-
Field::Store::YES,
|
619
|
-
Field::Index::TOKENIZED,
|
620
|
-
Field::TermVector::NO)
|
621
|
-
|
622
|
-
index << doc
|
589
|
+
index << {:field1, ''}
|
590
|
+
index << {:field2, ''}
|
591
|
+
index << {:field3, 'foo bar baz'}
|
623
592
|
|
624
593
|
index.flush
|
625
594
|
index.close
|
626
595
|
end
|
627
|
-
|
628
|
-
def test_stopwords
|
629
|
-
i = Ferret::Index::Index.new(
|
630
|
-
:occur_default => Ferret::Search::BooleanClause::Occur::MUST,
|
631
|
-
:default_search_field => '*')
|
632
|
-
d = Ferret::Document::Document.new
|
633
|
-
|
634
|
-
# adding this additional field to the document leads to failure below
|
635
|
-
# comment out this statement and all tests pass:
|
636
|
-
d << Ferret::Document::Field.new('id', '1',
|
637
|
-
Ferret::Document::Field::Store::YES,
|
638
|
-
Ferret::Document::Field::Index::UNTOKENIZED)
|
639
|
-
|
640
|
-
d << Ferret::Document::Field.new('content', 'Move or shake',
|
641
|
-
Ferret::Document::Field::Store::NO,
|
642
|
-
Ferret::Document::Field::Index::TOKENIZED,
|
643
|
-
Ferret::Document::Field::TermVector::NO,
|
644
|
-
false, 1.0)
|
645
|
-
i << d
|
646
|
-
hits = i.search 'move nothere shake'
|
647
|
-
assert_equal 0, hits.size
|
648
|
-
hits = i.search 'move shake'
|
649
|
-
assert_equal 1, hits.size
|
650
|
-
hits = i.search 'move or shake'
|
651
|
-
assert_equal 1, hits.size # fails when id field is present
|
652
|
-
end
|
653
596
|
end
|