ferret 0.3.2 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +9 -0
- data/Rakefile +51 -25
- data/ext/analysis.c +553 -0
- data/ext/analysis.h +76 -0
- data/ext/array.c +83 -0
- data/ext/array.h +19 -0
- data/ext/bitvector.c +164 -0
- data/ext/bitvector.h +29 -0
- data/ext/compound_io.c +335 -0
- data/ext/document.c +336 -0
- data/ext/document.h +87 -0
- data/ext/ferret.c +88 -47
- data/ext/ferret.h +43 -109
- data/ext/field.c +395 -0
- data/ext/filter.c +103 -0
- data/ext/fs_store.c +352 -0
- data/ext/global.c +219 -0
- data/ext/global.h +73 -0
- data/ext/hash.c +446 -0
- data/ext/hash.h +80 -0
- data/ext/hashset.c +141 -0
- data/ext/hashset.h +37 -0
- data/ext/helper.c +11 -0
- data/ext/helper.h +5 -0
- data/ext/inc/lang.h +41 -0
- data/ext/ind.c +389 -0
- data/ext/index.h +884 -0
- data/ext/index_io.c +269 -415
- data/ext/index_rw.c +2543 -0
- data/ext/lang.c +31 -0
- data/ext/lang.h +41 -0
- data/ext/priorityqueue.c +228 -0
- data/ext/priorityqueue.h +44 -0
- data/ext/q_boolean.c +1331 -0
- data/ext/q_const_score.c +154 -0
- data/ext/q_fuzzy.c +287 -0
- data/ext/q_match_all.c +142 -0
- data/ext/q_multi_phrase.c +343 -0
- data/ext/q_parser.c +2180 -0
- data/ext/q_phrase.c +657 -0
- data/ext/q_prefix.c +75 -0
- data/ext/q_range.c +247 -0
- data/ext/q_span.c +1566 -0
- data/ext/q_term.c +308 -0
- data/ext/q_wildcard.c +146 -0
- data/ext/r_analysis.c +255 -0
- data/ext/r_doc.c +578 -0
- data/ext/r_index_io.c +996 -0
- data/ext/r_qparser.c +158 -0
- data/ext/r_search.c +2321 -0
- data/ext/r_store.c +263 -0
- data/ext/r_term.c +219 -0
- data/ext/ram_store.c +447 -0
- data/ext/search.c +524 -0
- data/ext/search.h +1065 -0
- data/ext/similarity.c +143 -39
- data/ext/sort.c +661 -0
- data/ext/store.c +35 -0
- data/ext/store.h +152 -0
- data/ext/term.c +704 -143
- data/ext/termdocs.c +599 -0
- data/ext/vector.c +594 -0
- data/lib/ferret.rb +9 -10
- data/lib/ferret/analysis/analyzers.rb +2 -2
- data/lib/ferret/analysis/standard_tokenizer.rb +1 -1
- data/lib/ferret/analysis/token.rb +14 -14
- data/lib/ferret/analysis/token_filters.rb +3 -3
- data/lib/ferret/document/field.rb +16 -17
- data/lib/ferret/index/document_writer.rb +4 -4
- data/lib/ferret/index/index.rb +39 -23
- data/lib/ferret/index/index_writer.rb +2 -2
- data/lib/ferret/index/multiple_term_doc_pos_enum.rb +1 -8
- data/lib/ferret/index/segment_term_vector.rb +4 -4
- data/lib/ferret/index/term.rb +5 -1
- data/lib/ferret/index/term_vector_offset_info.rb +6 -6
- data/lib/ferret/index/term_vectors_io.rb +5 -5
- data/lib/ferret/query_parser/query_parser.tab.rb +81 -77
- data/lib/ferret/search.rb +1 -1
- data/lib/ferret/search/boolean_query.rb +2 -1
- data/lib/ferret/search/field_sorted_hit_queue.rb +3 -3
- data/lib/ferret/search/fuzzy_query.rb +2 -1
- data/lib/ferret/search/index_searcher.rb +3 -0
- data/lib/ferret/search/{match_all_docs_query.rb → match_all_query.rb} +7 -7
- data/lib/ferret/search/multi_phrase_query.rb +6 -5
- data/lib/ferret/search/phrase_query.rb +3 -6
- data/lib/ferret/search/prefix_query.rb +4 -4
- data/lib/ferret/search/sort.rb +3 -1
- data/lib/ferret/search/sort_field.rb +9 -9
- data/lib/ferret/search/spans/near_spans_enum.rb +1 -1
- data/lib/ferret/search/spans/span_near_query.rb +1 -1
- data/lib/ferret/search/spans/span_weight.rb +1 -1
- data/lib/ferret/search/spans/spans_enum.rb +7 -7
- data/lib/ferret/store/fs_store.rb +10 -6
- data/lib/ferret/store/ram_store.rb +3 -3
- data/lib/rferret.rb +36 -0
- data/test/functional/thread_safety_index_test.rb +2 -2
- data/test/test_helper.rb +16 -2
- data/test/unit/analysis/c_token.rb +25 -0
- data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +1 -1
- data/test/unit/analysis/tc_standard_analyzer.rb +1 -1
- data/test/unit/document/{tc_document.rb → c_document.rb} +0 -0
- data/test/unit/document/c_field.rb +98 -0
- data/test/unit/document/tc_field.rb +0 -66
- data/test/unit/index/{tc_index.rb → c_index.rb} +62 -6
- data/test/unit/index/{tc_index_reader.rb → c_index_reader.rb} +51 -10
- data/test/unit/index/{tc_index_writer.rb → c_index_writer.rb} +0 -4
- data/test/unit/index/{tc_term.rb → c_term.rb} +1 -3
- data/test/unit/index/{tc_term_vector_offset_info.rb → c_term_voi.rb} +5 -5
- data/test/unit/index/tc_segment_term_vector.rb +2 -2
- data/test/unit/index/tc_term_vectors_io.rb +4 -4
- data/test/unit/query_parser/c_query_parser.rb +138 -0
- data/test/unit/search/{tc_filter.rb → c_filter.rb} +24 -24
- data/test/unit/search/{tc_fuzzy_query.rb → c_fuzzy_query.rb} +0 -0
- data/test/unit/search/{tc_index_searcher.rb → c_index_searcher.rb} +9 -26
- data/test/unit/search/{tc_search_and_sort.rb → c_search_and_sort.rb} +15 -15
- data/test/unit/search/{tc_sort.rb → c_sort.rb} +2 -1
- data/test/unit/search/c_sort_field.rb +27 -0
- data/test/unit/search/{tc_spans.rb → c_spans.rb} +0 -0
- data/test/unit/search/tc_sort_field.rb +7 -20
- data/test/unit/store/c_fs_store.rb +76 -0
- data/test/unit/store/c_ram_store.rb +35 -0
- data/test/unit/store/m_store.rb +34 -0
- data/test/unit/store/m_store_lock.rb +68 -0
- data/test/unit/store/tc_fs_store.rb +0 -53
- data/test/unit/store/tc_ram_store.rb +0 -20
- data/test/unit/store/tm_store.rb +0 -30
- data/test/unit/store/tm_store_lock.rb +0 -66
- metadata +84 -31
- data/ext/Makefile +0 -140
- data/ext/ferret_ext.so +0 -0
- data/ext/priority_queue.c +0 -232
- data/ext/ram_directory.c +0 -321
- data/ext/segment_merge_queue.c +0 -37
- data/ext/segment_term_enum.c +0 -326
- data/ext/string_helper.c +0 -42
- data/ext/tags +0 -344
- data/ext/term_buffer.c +0 -230
- data/ext/term_infos_reader.c +0 -54
- data/ext/terminfo.c +0 -160
- data/ext/token.c +0 -93
- data/ext/util.c +0 -12
@@ -25,70 +25,4 @@ class FieldTest < Test::Unit::TestCase
|
|
25
25
|
assert_equal("WITH_OFFSETS", Field::TermVector::WITH_OFFSETS.to_s)
|
26
26
|
assert_equal("WITH_POSITIONS_OFFSETS", Field::TermVector::WITH_POSITIONS_OFFSETS.to_s)
|
27
27
|
end
|
28
|
-
|
29
|
-
def test_standard_field()
|
30
|
-
f = Field.new("name", "value", Field::Store::COMPRESS, Field::Index::TOKENIZED)
|
31
|
-
assert_equal("name", f.name)
|
32
|
-
assert_equal("value", f.data)
|
33
|
-
assert_equal(true, f.stored?)
|
34
|
-
assert_equal(true, f.compressed?)
|
35
|
-
assert_equal(true, f.indexed?)
|
36
|
-
assert_equal(true, f.tokenized?)
|
37
|
-
assert_equal(false, f.store_term_vector?)
|
38
|
-
assert_equal(false, f.store_offsets?)
|
39
|
-
assert_equal(false, f.store_positions?)
|
40
|
-
assert_equal(false, f.omit_norms?)
|
41
|
-
assert_equal(false, f.binary?)
|
42
|
-
assert_equal("stored/compressed,indexed,tokenized,<name:value>", f.to_s)
|
43
|
-
end
|
44
|
-
|
45
|
-
def test_set_store()
|
46
|
-
f = Field.new("name", nil, Field::Store::COMPRESS, Field::Index::TOKENIZED)
|
47
|
-
f.stored = Field::Store::NO
|
48
|
-
assert_equal(false, f.stored?)
|
49
|
-
assert_equal(false, f.compressed?)
|
50
|
-
assert_equal("indexed,tokenized,<name:>", f.to_s)
|
51
|
-
end
|
52
|
-
|
53
|
-
def test_set_index()
|
54
|
-
f = Field.new("name", "value", Field::Store::COMPRESS, Field::Index::TOKENIZED)
|
55
|
-
f.index = Field::Index::NO
|
56
|
-
assert_equal(false, f.indexed?)
|
57
|
-
assert_equal(false, f.tokenized?)
|
58
|
-
assert_equal(false, f.omit_norms?)
|
59
|
-
assert_equal("stored/compressed,<name:value>", f.to_s)
|
60
|
-
f.index = Field::Index::NO_NORMS
|
61
|
-
assert_equal(true, f.indexed?)
|
62
|
-
assert_equal(false, f.tokenized?)
|
63
|
-
assert_equal(true, f.omit_norms?)
|
64
|
-
assert_equal("stored/compressed,indexed,omit_norms,<name:value>", f.to_s)
|
65
|
-
end
|
66
|
-
|
67
|
-
def test_set_term_vector()
|
68
|
-
f = Field.new("name", "value", Field::Store::COMPRESS, Field::Index::TOKENIZED)
|
69
|
-
f.store_term_vector = Field::TermVector::WITH_POSITIONS_OFFSETS
|
70
|
-
assert_equal(true, f.store_term_vector?)
|
71
|
-
assert_equal(true, f.store_offsets?)
|
72
|
-
assert_equal(true, f.store_positions?)
|
73
|
-
assert_equal("stored/compressed,indexed,tokenized,store_term_vector,tv_offset,tv_position,<name:value>", f.to_s)
|
74
|
-
end
|
75
|
-
|
76
|
-
def test_new_binary_field()
|
77
|
-
tmp = []
|
78
|
-
256.times {|i| tmp[i] = i}
|
79
|
-
bin = tmp.pack("c*")
|
80
|
-
f = Field.new_binary_field("name", bin, Field::Store::YES)
|
81
|
-
assert_equal("name", f.name)
|
82
|
-
assert_equal(bin, f.data)
|
83
|
-
assert_equal(true, f.stored?)
|
84
|
-
assert_equal(false, f.compressed?)
|
85
|
-
assert_equal(false, f.indexed?)
|
86
|
-
assert_equal(false, f.tokenized?)
|
87
|
-
assert_equal(false, f.store_term_vector?)
|
88
|
-
assert_equal(false, f.store_offsets?)
|
89
|
-
assert_equal(false, f.store_positions?)
|
90
|
-
assert_equal(false, f.omit_norms?)
|
91
|
-
assert_equal(true, f.binary?)
|
92
|
-
assert_equal("stored/uncompressed,binary,<name:#{bin}>", f.to_s)
|
93
|
-
end
|
94
28
|
end
|
@@ -16,7 +16,10 @@ class IndexTest < Test::Unit::TestCase
|
|
16
16
|
|
17
17
|
def check_results(index, query, expected)
|
18
18
|
cnt = 0
|
19
|
+
#puts "#{query} - #{expected.inspect}"
|
20
|
+
#puts index.size
|
19
21
|
index.search_each(query) do |doc, score|
|
22
|
+
#puts "doc-#{doc} score=#{score}"
|
20
23
|
assert(expected.index(doc))
|
21
24
|
cnt += 1
|
22
25
|
end
|
@@ -136,7 +139,7 @@ class IndexTest < Test::Unit::TestCase
|
|
136
139
|
do_test_index_with_hash(index)
|
137
140
|
index.close
|
138
141
|
|
139
|
-
index = Index.new(:default_field => "def_field")
|
142
|
+
index = Index.new(:default_field => "def_field", :id_field => "id")
|
140
143
|
do_test_index_with_doc_array(index)
|
141
144
|
index.close
|
142
145
|
end
|
@@ -144,7 +147,11 @@ class IndexTest < Test::Unit::TestCase
|
|
144
147
|
def test_fs_index
|
145
148
|
fs_path = File.expand_path(File.join(File.dirname(__FILE__), '../../temp/fsdir'))
|
146
149
|
Dir[File.join(fs_path, "*")].each {|path| begin File.delete(path) rescue nil end}
|
147
|
-
assert_raise(
|
150
|
+
assert_raise(StandardError) do
|
151
|
+
Index.new(:path => fs_path,
|
152
|
+
:create_if_missing => false,
|
153
|
+
:default_field => "def_field")
|
154
|
+
end
|
148
155
|
index = Index.new(:path => fs_path, :default_field => "def_field")
|
149
156
|
do_test_index_with_array(index)
|
150
157
|
index.close
|
@@ -155,7 +162,9 @@ class IndexTest < Test::Unit::TestCase
|
|
155
162
|
index.close
|
156
163
|
|
157
164
|
Dir[File.join(fs_path, "*")].each {|path| begin File.delete(path) rescue nil end}
|
158
|
-
index = Index.new(:path => fs_path,
|
165
|
+
index = Index.new(:path => fs_path,
|
166
|
+
:default_field => "def_field",
|
167
|
+
:id_field => "id")
|
159
168
|
do_test_index_with_doc_array(index)
|
160
169
|
index.close
|
161
170
|
end
|
@@ -317,12 +326,16 @@ class IndexTest < Test::Unit::TestCase
|
|
317
326
|
index2 << "document 2"
|
318
327
|
assert_equal(2, index2.size)
|
319
328
|
assert_equal(2, index.size)
|
329
|
+
top_docs = index.search("content3")
|
330
|
+
assert_equal(0, top_docs.size)
|
320
331
|
|
321
332
|
iw = IndexWriter.new(fs_path, :analyzer => WhiteSpaceAnalyzer.new())
|
322
333
|
doc = Document.new
|
323
334
|
doc << Field.new("f", "content3", Field::Store::YES, Field::Index::TOKENIZED)
|
324
335
|
iw << doc
|
325
336
|
iw.close()
|
337
|
+
top_docs = index.search("content3")
|
338
|
+
assert_equal(1, top_docs.size)
|
326
339
|
assert_equal(3, index.size)
|
327
340
|
assert_equal("content3", index[2]["f"])
|
328
341
|
index.close
|
@@ -373,7 +386,8 @@ class IndexTest < Test::Unit::TestCase
|
|
373
386
|
{:id => 9, :cat => "/cat2/subcat5", :content => "content9"},
|
374
387
|
]
|
375
388
|
index = Index.new(:analyzer => WhiteSpaceAnalyzer.new,
|
376
|
-
:default_field => :content
|
389
|
+
:default_field => :content,
|
390
|
+
:id_field => :id)
|
377
391
|
data.each { |doc| index << doc }
|
378
392
|
assert_equal(10, index.size)
|
379
393
|
assert_equal("content5", index["5"][:content])
|
@@ -449,6 +463,39 @@ class IndexTest < Test::Unit::TestCase
|
|
449
463
|
index.close
|
450
464
|
end
|
451
465
|
|
466
|
+
def test_index_multi_key_untokenized
|
467
|
+
data = [
|
468
|
+
{:id => 0, :table => "Product", :product => "tent"},
|
469
|
+
{:id => 0, :table => "location", :location => "first floor"},
|
470
|
+
{:id => 0, :table => "Product", :product => "super tent"},
|
471
|
+
{:id => 0, :table => "location", :location => "second floor"},
|
472
|
+
{:id => 1, :table => "Product", :product => "backback"},
|
473
|
+
{:id => 1, :table => "location", :location => "second floor"},
|
474
|
+
{:id => 1, :table => "location", :location => "first floor"},
|
475
|
+
{:id => 1, :table => "Product", :product => "rucksack"},
|
476
|
+
{:id => 1, :table => "Product", :product => "backpack"}
|
477
|
+
]
|
478
|
+
index = Index.new(:analyzer => Analyzer.new,
|
479
|
+
:key => ["id", "table"])
|
480
|
+
data.each do |dat|
|
481
|
+
doc = Document.new
|
482
|
+
dat.each_pair do |key, value|
|
483
|
+
if ([:id, :table].include?(key))
|
484
|
+
doc << Field.new(key, value, Field::Store::YES, Field::Index::UNTOKENIZED)
|
485
|
+
else
|
486
|
+
doc << Field.new(key, value, Field::Store::YES, Field::Index::TOKENIZED)
|
487
|
+
end
|
488
|
+
end
|
489
|
+
index << doc
|
490
|
+
end
|
491
|
+
assert_equal(4, index.size)
|
492
|
+
assert_equal("super tent", index[0][:product])
|
493
|
+
assert_equal("second floor", index[1][:location])
|
494
|
+
assert_equal("backpack", index[3][:product])
|
495
|
+
assert_equal("first floor", index[2][:location])
|
496
|
+
index.close
|
497
|
+
end
|
498
|
+
|
452
499
|
def test_sortby_date
|
453
500
|
data = [
|
454
501
|
{:content => "one", :date => "20051023"},
|
@@ -459,7 +506,7 @@ class IndexTest < Test::Unit::TestCase
|
|
459
506
|
{:content => "three", :date => "19790531"},
|
460
507
|
{:content => "one", :date => "19770725"},
|
461
508
|
{:content => "two", :date => "19751226"},
|
462
|
-
{:content => "
|
509
|
+
{:content => "four", :date => "19390912"}
|
463
510
|
]
|
464
511
|
index = Index.new(:analyzer => WhiteSpaceAnalyzer.new)
|
465
512
|
data.each { |doc|
|
@@ -481,8 +528,17 @@ class IndexTest < Test::Unit::TestCase
|
|
481
528
|
assert_equal("19390912", index[top_docs.score_docs[0].doc][:date])
|
482
529
|
assert_equal("three four", index[top_docs.score_docs[0].doc][:content])
|
483
530
|
assert_equal("19390912", index[top_docs.score_docs[1].doc][:date])
|
484
|
-
assert_equal("
|
531
|
+
assert_equal("four", index[top_docs.score_docs[1].doc][:content])
|
485
532
|
assert_equal("19530315", index[top_docs.score_docs[2].doc][:date])
|
533
|
+
|
534
|
+
top_docs = index.search("one two three four",
|
535
|
+
:sort => [:date, :content])
|
536
|
+
assert_equal("19390912", index[top_docs.score_docs[0].doc][:date])
|
537
|
+
assert_equal("four", index[top_docs.score_docs[0].doc][:content])
|
538
|
+
assert_equal("19390912", index[top_docs.score_docs[1].doc][:date])
|
539
|
+
assert_equal("three four", index[top_docs.score_docs[1].doc][:content])
|
540
|
+
assert_equal("19530315", index[top_docs.score_docs[2].doc][:date])
|
541
|
+
|
486
542
|
index.close
|
487
543
|
end
|
488
544
|
|
@@ -15,6 +15,47 @@ module IndexReaderCommon
|
|
15
15
|
|
16
16
|
do_test_get_doc()
|
17
17
|
|
18
|
+
do_test_term_enum()
|
19
|
+
end
|
20
|
+
|
21
|
+
def do_test_term_enum()
|
22
|
+
te = @ir.terms
|
23
|
+
|
24
|
+
assert(te.next?)
|
25
|
+
assert_equal(Term.new("author", "Leo"), te.term)
|
26
|
+
assert_equal(1, te.doc_freq)
|
27
|
+
assert(te.next?)
|
28
|
+
assert_equal(Term.new("author", "Tolstoy"), te.term)
|
29
|
+
assert_equal(1, te.doc_freq)
|
30
|
+
assert(te.next?)
|
31
|
+
assert_equal(Term.new("body", "And"), te.term)
|
32
|
+
assert_equal(1, te.doc_freq)
|
33
|
+
|
34
|
+
|
35
|
+
assert(te.skip_to(Term.new("body", "Not")))
|
36
|
+
assert_equal(Term.new("body", "Not"), te.term)
|
37
|
+
assert_equal(1, te.doc_freq)
|
38
|
+
assert(te.next?)
|
39
|
+
assert_equal(Term.new("body", "Random"), te.term)
|
40
|
+
assert_equal(16, te.doc_freq)
|
41
|
+
|
42
|
+
assert(te.skip_to(Term.new("text", "which")))
|
43
|
+
assert(Term.new("text", "which"), te.term)
|
44
|
+
assert_equal(1, te.doc_freq)
|
45
|
+
assert(te.next?)
|
46
|
+
assert_equal(Term.new("title", "War And Peace"), te.term)
|
47
|
+
assert_equal(1, te.doc_freq)
|
48
|
+
assert(!te.next?)
|
49
|
+
|
50
|
+
te.close
|
51
|
+
|
52
|
+
te = @ir.terms_from(Term.new("body", "Not"))
|
53
|
+
assert_equal(Term.new("body", "Not"), te.term)
|
54
|
+
assert_equal(1, te.doc_freq)
|
55
|
+
assert(te.next?)
|
56
|
+
assert_equal(Term.new("body", "Random"), te.term)
|
57
|
+
assert_equal(16, te.doc_freq)
|
58
|
+
te.close
|
18
59
|
end
|
19
60
|
|
20
61
|
def do_test_term_doc_enum()
|
@@ -155,7 +196,7 @@ module IndexReaderCommon
|
|
155
196
|
|
156
197
|
assert_equal("body", tv.field)
|
157
198
|
assert_equal(["word1", "word2", "word3", "word4"], tv.terms)
|
158
|
-
assert_equal([3, 1, 4, 2], tv.
|
199
|
+
assert_equal([3, 1, 4, 2], tv.freqs)
|
159
200
|
assert_equal([[2, 4, 7], [3], [0, 5, 8, 9], [1,6]], tv.positions)
|
160
201
|
assert_equal([[t(12,17), t(24,29), t(42,47)],
|
161
202
|
[t(18,23)],
|
@@ -489,15 +530,15 @@ class IndexReaderTest < Test::Unit::TestCase
|
|
489
530
|
doc << Field.new("title", "this is the title DocField", Field::Store::YES, Field::Index::UNTOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
|
490
531
|
doc << Field.new("author", "this is the author field", Field::Store::YES, Field::Index::UNTOKENIZED, Field::TermVector::WITH_POSITIONS_OFFSETS)
|
491
532
|
|
492
|
-
fis = FieldInfos.new()
|
493
|
-
fis << doc
|
494
|
-
assert_equal(4, fis.size)
|
533
|
+
#fis = FieldInfos.new()
|
534
|
+
#fis << doc
|
535
|
+
#assert_equal(4, fis.size)
|
495
536
|
|
496
|
-
fi = fis["tag"]
|
497
|
-
assert_equal(true, fi.indexed?)
|
498
|
-
assert_equal(true, fi.store_term_vector?)
|
499
|
-
assert_equal(true, fi.store_positions?)
|
500
|
-
assert_equal(true, fi.store_offsets?)
|
537
|
+
#fi = fis["tag"]
|
538
|
+
#assert_equal(true, fi.indexed?)
|
539
|
+
#assert_equal(true, fi.store_term_vector?)
|
540
|
+
#assert_equal(true, fi.store_positions?)
|
541
|
+
#assert_equal(true, fi.store_offsets?)
|
501
542
|
|
502
543
|
iw << doc
|
503
544
|
iw.close()
|
@@ -549,7 +590,7 @@ class IndexReaderTest < Test::Unit::TestCase
|
|
549
590
|
|
550
591
|
assert_equal("body", tv.field)
|
551
592
|
assert_equal(["word1", "word2", "word3", "word4"], tv.terms)
|
552
|
-
assert_equal([3, 1, 4, 2], tv.
|
593
|
+
assert_equal([3, 1, 4, 2], tv.freqs)
|
553
594
|
assert_equal([[2, 4, 7], [3], [0, 5, 8, 9], [1,6]], tv.positions)
|
554
595
|
assert_equal([[t(12,17), t(24,29), t(42,47)],
|
555
596
|
[t(18,23)],
|
@@ -30,8 +30,6 @@ class IndexWriterTest < Test::Unit::TestCase
|
|
30
30
|
def test_add_document
|
31
31
|
iw = IndexWriter.new(@dir, :analyzer => StandardAnalyzer.new(), :create => true)
|
32
32
|
doc = IndexTestHelper.prepare_document()
|
33
|
-
infos = FieldInfos.new
|
34
|
-
infos << doc
|
35
33
|
iw.add_document(doc)
|
36
34
|
assert_equal(1, iw.doc_count)
|
37
35
|
iw.close()
|
@@ -44,8 +42,6 @@ class IndexWriterTest < Test::Unit::TestCase
|
|
44
42
|
iw.merge_factor = 3
|
45
43
|
iw.min_merge_docs = 3
|
46
44
|
docs = IndexTestHelper.prepare_book_list()
|
47
|
-
infos = FieldInfos.new
|
48
|
-
infos << docs[0]
|
49
45
|
docs.each_with_index do |doc, i|
|
50
46
|
#puts "Index doc " + i.to_s
|
51
47
|
iw.add_document(doc)
|
@@ -1,6 +1,5 @@
|
|
1
1
|
require File.dirname(__FILE__) + "/../../test_helper"
|
2
2
|
|
3
|
-
|
4
3
|
class TermTest < Test::Unit::TestCase
|
5
4
|
include Ferret::Index
|
6
5
|
def test_term()
|
@@ -16,7 +15,6 @@ class TermTest < Test::Unit::TestCase
|
|
16
15
|
assert(term1 == term4)
|
17
16
|
assert(term1.eql?(term4))
|
18
17
|
term4.set!("field3", "text3")
|
19
|
-
|
18
|
+
assert_not_equal(term1, term4)
|
20
19
|
end
|
21
|
-
|
22
20
|
end
|
@@ -5,14 +5,14 @@ class TermVectorOffsetInfoTest < Test::Unit::TestCase
|
|
5
5
|
include Ferret::Index
|
6
6
|
def test_tvoi()
|
7
7
|
t1 = TermVectorOffsetInfo.new(1, 3)
|
8
|
-
assert_equal(t1.
|
9
|
-
assert_equal(t1.
|
8
|
+
assert_equal(t1.start, 1)
|
9
|
+
assert_equal(t1.end, 3)
|
10
10
|
t2 = TermVectorOffsetInfo.new(1, 3)
|
11
11
|
assert(t1 == t2)
|
12
|
-
t2.
|
12
|
+
t2.start = 2
|
13
13
|
assert(t1 != t2)
|
14
|
-
t2.
|
15
|
-
t2.
|
14
|
+
t2.start = 1
|
15
|
+
t2.end = 1
|
16
16
|
assert(t1 != t2)
|
17
17
|
end
|
18
18
|
end
|
@@ -17,7 +17,7 @@ class SegmentTermVectorTest < Test::Unit::TestCase
|
|
17
17
|
|
18
18
|
def test_index_of()
|
19
19
|
assert_equal(0, @stv.index_of("Apples"))
|
20
|
-
assert_equal(4, @stv.
|
20
|
+
assert_equal(4, @stv.freqs[@stv.index_of("Apples")])
|
21
21
|
end
|
22
22
|
|
23
23
|
def test_indexes_of()
|
@@ -56,7 +56,7 @@ class SegmentTermVectorWithPosOffsetsTest < Test::Unit::TestCase
|
|
56
56
|
|
57
57
|
def test_index_of()
|
58
58
|
assert_equal(0, @stv.index_of("Apples"))
|
59
|
-
assert_equal(4, @stv.
|
59
|
+
assert_equal(4, @stv.freqs[@stv.index_of("Apples")])
|
60
60
|
end
|
61
61
|
|
62
62
|
def test_indexes_of()
|
@@ -33,12 +33,12 @@ class TermVectorsIOTest < Test::Unit::TestCase
|
|
33
33
|
|
34
34
|
assert_equal(2, tv.size)
|
35
35
|
assert_equal("text1", tv.terms[0])
|
36
|
-
assert_equal(1, tv.
|
36
|
+
assert_equal(1, tv.freqs[0])
|
37
37
|
assert_equal(1, tv.positions[0][0])
|
38
38
|
assert_equal(t(0,4), tv.offsets[0][0])
|
39
39
|
|
40
40
|
assert_equal("text2", tv.terms[1])
|
41
|
-
assert_equal(2, tv.
|
41
|
+
assert_equal(2, tv.freqs[1])
|
42
42
|
assert_equal(3, tv.positions[1][0])
|
43
43
|
assert_equal(t(5,10), tv.offsets[1][0])
|
44
44
|
assert_equal(4, tv.positions[1][1])
|
@@ -77,7 +77,7 @@ class TermVectorsIOTest < Test::Unit::TestCase
|
|
77
77
|
|
78
78
|
assert_equal(2, tv.size)
|
79
79
|
assert_equal("word1", tv.terms[0])
|
80
|
-
assert_equal(3, tv.
|
80
|
+
assert_equal(3, tv.freqs[0])
|
81
81
|
assert_equal(1, tv.positions[0][0])
|
82
82
|
assert_equal(5, tv.positions[0][1])
|
83
83
|
assert_equal(8, tv.positions[0][2])
|
@@ -86,7 +86,7 @@ class TermVectorsIOTest < Test::Unit::TestCase
|
|
86
86
|
assert_equal(t(45,50), tv.offsets[0][2])
|
87
87
|
|
88
88
|
assert_equal("word2", tv.terms[1])
|
89
|
-
assert_equal(2, tv.
|
89
|
+
assert_equal(2, tv.freqs[1])
|
90
90
|
assert_equal(2, tv.positions[1][0])
|
91
91
|
assert_equal(9, tv.positions[1][1])
|
92
92
|
assert_equal(t(6,11), tv.offsets[1][0])
|
@@ -0,0 +1,138 @@
|
|
1
|
+
require File.dirname(__FILE__) + "/../../test_helper"
|
2
|
+
|
3
|
+
class QueryParserTest < Test::Unit::TestCase
|
4
|
+
|
5
|
+
def test_strings()
|
6
|
+
parser = Ferret::QueryParser.new("xxx", :fields => ["xxx", "field", "f1", "f2"])
|
7
|
+
pairs = [
|
8
|
+
['', ''],
|
9
|
+
['word', 'word'],
|
10
|
+
['field:word', 'field:word'],
|
11
|
+
['"word1 word2 word3"', '"word word word"'],
|
12
|
+
['"word1 2342 word3"', '"word word"'],
|
13
|
+
['field:"one two three"', 'field:"one two three"'],
|
14
|
+
['field:"one 222 three"', 'field:"one three"'],
|
15
|
+
['field:"one <> three"', 'field:"one <> three"'],
|
16
|
+
['field:"one <> three <>"', 'field:"one <> three"'],
|
17
|
+
['field:"one <> <> <> three <>"', 'field:"one <> <> <> three"'],
|
18
|
+
['field:"one <> <> <> three|four|five <>"', 'field:"one <> <> <> three|four|five"'],
|
19
|
+
['field:"one|two three|four|five six|seven"', 'field:"one|two three|four|five six|seven"'],
|
20
|
+
['field:"testing|trucks"', 'field:testing field:trucks'],
|
21
|
+
['[aaa bbb]', '[aaa bbb]'],
|
22
|
+
['{aaa bbb]', '{aaa bbb]'],
|
23
|
+
['field:[aaa bbb}', 'field:[aaa bbb}'],
|
24
|
+
['{aaa bbb}', '{aaa bbb}'],
|
25
|
+
['{aaa>', '{aaa>'],
|
26
|
+
['[aaa>', '[aaa>'],
|
27
|
+
['field:<aaa}', 'field:<aaa}'],
|
28
|
+
['<aaa]', '<aaa]'],
|
29
|
+
['>aaa', '{aaa>'],
|
30
|
+
['>=aaa', '[aaa>'],
|
31
|
+
['<aaa', '<aaa}'],
|
32
|
+
['field:<=aaa', 'field:<aaa]'],
|
33
|
+
['REQ one REQ two', '+one +two'],
|
34
|
+
['REQ one two', '+one two'],
|
35
|
+
['one REQ two', 'one +two'],
|
36
|
+
['+one +two', '+one +two'],
|
37
|
+
['+one two', '+one two'],
|
38
|
+
['one +two', 'one +two'],
|
39
|
+
['-one -two', '-one -two'],
|
40
|
+
['-one two', '-one two'],
|
41
|
+
['one -two', 'one -two'],
|
42
|
+
['!one !two', '-one -two'],
|
43
|
+
['!one two', '-one two'],
|
44
|
+
['one !two', 'one -two'],
|
45
|
+
['NOT one NOT two', '-one -two'],
|
46
|
+
['NOT one two', '-one two'],
|
47
|
+
['one NOT two', 'one -two'],
|
48
|
+
['one two', 'one two'],
|
49
|
+
['one OR two', 'one two'],
|
50
|
+
['one AND two', '+one +two'],
|
51
|
+
['one two AND three', 'one two +three'],
|
52
|
+
['one two OR three', 'one two three'],
|
53
|
+
['one (two AND three)', 'one (+two +three)'],
|
54
|
+
['one AND (two OR three)', '+one +(two three)'],
|
55
|
+
['field:(one AND (two OR three))', '+field:one +(field:two field:three)'],
|
56
|
+
['one AND (two OR [aaa vvv})', '+one +(two [aaa vvv})'],
|
57
|
+
['one AND (f1:two OR f2:three) AND four', '+one +(f1:two f2:three) +four'],
|
58
|
+
['one^1.23', 'one^1.23'],
|
59
|
+
['(one AND two)^100.23', '(+one +two)^100.23'],
|
60
|
+
['field:(one AND two)^100.23', '(+field:one +field:two)^100.23'],
|
61
|
+
['field:(one AND [aaa bbb]^23.3)^100.23', '(+field:one +field:[aaa bbb]^23.3)^100.23'],
|
62
|
+
['(REQ field:"one two three")^23', 'field:"one two three"^23.0'],
|
63
|
+
['asdf~0.2', 'asdf~0.2'],
|
64
|
+
['field:asdf~0.2', 'field:asdf~0.2'],
|
65
|
+
['asdf~0.2^100.0', 'asdf~0.2^100.0'],
|
66
|
+
['field:asdf~0.2^0.1', 'field:asdf~0.2^0.1'],
|
67
|
+
['field:"asdf <> asdf|asdf"~4', 'field:"asdf <> asdf|asdf"~4'],
|
68
|
+
['"one two three four five"~5', '"one two three four five"~5'],
|
69
|
+
['ab?de', 'ab?de'],
|
70
|
+
['ab*de', 'ab*de'],
|
71
|
+
['asdf?*?asd*dsf?asfd*asdf?', 'asdf?*?asd*dsf?asfd*asdf?'],
|
72
|
+
['field:a* AND field:(b*)', '+field:a* +field:b*'],
|
73
|
+
['field:abc~ AND field:(b*)', '+field:abc~ +field:b*'],
|
74
|
+
['asdf?*?asd*dsf?asfd*asdf?^20.0', 'asdf?*?asd*dsf?asfd*asdf?^20.0'],
|
75
|
+
|
76
|
+
['*:xxx', 'xxx field:xxx f1:xxx f2:xxx'],
|
77
|
+
['f1|f2:xxx', 'f1:xxx f2:xxx'],
|
78
|
+
|
79
|
+
['*:asd~0.2', 'asd~0.2 field:asd~0.2 f1:asd~0.2 f2:asd~0.2'],
|
80
|
+
['f1|f2:asd~0.2', 'f1:asd~0.2 f2:asd~0.2'],
|
81
|
+
|
82
|
+
['*:a?d*^20.0', '(a?d* field:a?d* f1:a?d* f2:a?d*)^20.0'],
|
83
|
+
['f1|f2:a?d*^20.0', '(f1:a?d* f2:a?d*)^20.0'],
|
84
|
+
|
85
|
+
['*:"asdf <> xxx|yyy"', '"asdf <> xxx|yyy" field:"asdf <> xxx|yyy" f1:"asdf <> xxx|yyy" f2:"asdf <> xxx|yyy"'],
|
86
|
+
['f1|f2:"asdf <> xxx|yyy"', 'f1:"asdf <> xxx|yyy" f2:"asdf <> xxx|yyy"'],
|
87
|
+
|
88
|
+
['*:[bbb xxx]', '[bbb xxx] field:[bbb xxx] f1:[bbb xxx] f2:[bbb xxx]'],
|
89
|
+
['f1|f2:[bbb xxx]', 'f1:[bbb xxx] f2:[bbb xxx]'],
|
90
|
+
|
91
|
+
['*:(xxx AND bbb)', '+(xxx field:xxx f1:xxx f2:xxx) +(bbb field:bbb f1:bbb f2:bbb)'],
|
92
|
+
['f1|f2:(xxx AND bbb)', '+(f1:xxx f2:xxx) +(f1:bbb f2:bbb)'],
|
93
|
+
['asdf?*?asd*dsf?asfd*asdf?^20.0', 'asdf?*?asd*dsf?asfd*asdf?^20.0'],
|
94
|
+
['"onewordphrase"', 'onewordphrase']
|
95
|
+
]
|
96
|
+
|
97
|
+
pairs.each do |query_str, expected|
|
98
|
+
assert_equal(expected, parser.parse(query_str).to_s("xxx"))
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
def test_qp_with_standard_analyzer()
|
103
|
+
parser = Ferret::QueryParser.new("xxx", :fields => ["xxx", "key"],
|
104
|
+
:analyzer => Ferret::Analysis::StandardAnalyzer.new)
|
105
|
+
pairs = [
|
106
|
+
['key:1234', 'key:1234'],
|
107
|
+
['key:(1234)', 'key:1234']
|
108
|
+
]
|
109
|
+
|
110
|
+
pairs.each do |query_str, expected|
|
111
|
+
assert_equal(expected, parser.parse(query_str).to_s("xxx"))
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
def do_test_query_parse_exception_raised(str)
|
116
|
+
parser = Ferret::QueryParser.new("xxx", :fields => ["f1", "f2", "f3"])
|
117
|
+
assert_raise(Ferret::QueryParser::QueryParseException) do
|
118
|
+
parser.parse(str)
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
|
123
|
+
def test_bad_queries
|
124
|
+
parser = Ferret::QueryParser.new("xxx", :fields => ["f1", "f2"],
|
125
|
+
:handle_parse_errors => true)
|
126
|
+
|
127
|
+
pairs = [
|
128
|
+
['::*word', 'word'],
|
129
|
+
['()*&)(*^&*(', ''],
|
130
|
+
['()*&one)(*two(*&"', '"one two"']
|
131
|
+
]
|
132
|
+
|
133
|
+
pairs.each do |query_str, expected|
|
134
|
+
do_test_query_parse_exception_raised(query_str)
|
135
|
+
assert_equal(expected, parser.parse(query_str).to_s("xxx"))
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|