ferret 0.10.4 → 0.10.5
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +1 -1
- data/ext/analysis.c +7 -1
- data/ext/bitvector.c +5 -2
- data/ext/bitvector.h +1 -0
- data/ext/ferret.c +55 -8
- data/ext/ferret.h +8 -2
- data/ext/index.c +34 -43
- data/ext/index.h +1 -1
- data/ext/q_boolean.c +1 -1
- data/ext/q_multi_term.c +13 -1
- data/ext/q_parser.c +33 -18
- data/ext/r_analysis.c +68 -45
- data/ext/r_index.c +64 -10
- data/ext/r_search.c +145 -10
- data/ext/search.c +71 -12
- data/lib/ferret/index.rb +42 -28
- data/lib/ferret_version.rb +1 -1
- data/test/unit/analysis/tc_analyzer.rb +1 -1
- data/test/unit/analysis/tc_token_stream.rb +0 -1
- data/test/unit/index/tc_index.rb +3 -3
- data/test/unit/index/tc_index_reader.rb +5 -0
- data/test/unit/search/tc_filter.rb +15 -0
- data/test/unit/search/tm_searcher.rb +13 -2
- metadata +2 -2
data/lib/ferret_version.rb
CHANGED
@@ -246,7 +246,7 @@ class AsciiStandardAnalyzerTest < Test::Unit::TestCase
|
|
246
246
|
assert_equal(Token.new('tnt', 86, 91), t2.next)
|
247
247
|
assert_equal(Token.new('123-1235-asd-1234', 93, 110), t2.next)
|
248
248
|
assert(! t2.next())
|
249
|
-
a = AsciiStandardAnalyzer.new(false)
|
249
|
+
a = AsciiStandardAnalyzer.new(ENGLISH_STOP_WORDS, false)
|
250
250
|
t = a.token_stream("fieldname", input)
|
251
251
|
t2 = a.token_stream("fieldname", input)
|
252
252
|
assert_equal(Token.new('DBalmain@gmail.com', 0, 18), t.next)
|
@@ -486,7 +486,6 @@ class CustomTokenizerTest < Test::Unit::TestCase
|
|
486
486
|
assert(! t.next())
|
487
487
|
t = AsciiLowerCaseFilter.new(MyCSVTokenizer.new(input))
|
488
488
|
assert_equal(Token.new("first field", 0, 11), t.next)
|
489
|
-
return
|
490
489
|
assert_equal(Token.new("2nd field", 12, 21), t.next)
|
491
490
|
assert_equal(Token.new(" p a d d e d f i e l d ", 22, 48), t.next)
|
492
491
|
assert(! t.next())
|
data/test/unit/index/tc_index.rb
CHANGED
@@ -753,14 +753,14 @@ class IndexTest < Test::Unit::TestCase
|
|
753
753
|
:excerpt_length => 10,
|
754
754
|
:num_excerpts => 1)
|
755
755
|
assert_equal(1, highlights.size)
|
756
|
-
assert_equal("
|
756
|
+
assert_equal("<b>the words</b>...", highlights[0])
|
757
757
|
|
758
758
|
highlights = index.highlight('"the words" "for are one and two" ' +
|
759
759
|
'words one two', 0,
|
760
|
-
:excerpt_length =>
|
760
|
+
:excerpt_length => 20,
|
761
761
|
:num_excerpts => 2)
|
762
762
|
assert_equal(2, highlights.size)
|
763
|
-
assert_equal("<b>the words</b
|
763
|
+
assert_equal("<b>the words</b> we are...", highlights[0])
|
764
764
|
assert_equal("...<b>for are one and two</b>...", highlights[1])
|
765
765
|
|
766
766
|
|
@@ -198,6 +198,11 @@ module IndexReaderCommon
|
|
198
198
|
|
199
199
|
assert_equal(4, doc.size)
|
200
200
|
[:author, :body, :title, :year].each {|fn| assert(doc.keys.include?(fn))}
|
201
|
+
assert_equal([@ir[0].load, @ir[1].load, @ir[2].load], @ir[0, 3].collect {|d| d.load})
|
202
|
+
assert_equal([@ir[61].load, @ir[62].load, @ir[63].load], @ir[61, 100].collect {|d| d.load})
|
203
|
+
assert_equal([@ir[0].load, @ir[1].load, @ir[2].load], @ir[0..2].collect {|d| d.load})
|
204
|
+
assert_equal([@ir[61].load, @ir[62].load, @ir[63].load], @ir[61..100].collect {|d| d.load})
|
205
|
+
assert_equal(@ir[-60], @ir[4])
|
201
206
|
end
|
202
207
|
|
203
208
|
def test_ir_norms()
|
@@ -98,4 +98,19 @@ class FilterTest < Test::Unit::TestCase
|
|
98
98
|
rf2 = RangeFilter.new(:int, :>= => "3")
|
99
99
|
do_test_top_docs(searcher, query, [4,6], rf2)
|
100
100
|
end
|
101
|
+
|
102
|
+
class CustomFilter
|
103
|
+
def bits(ir)
|
104
|
+
bv = Ferret::Utils::BitVector.new
|
105
|
+
bv[0] = bv[2] = bv[4] = true
|
106
|
+
bv
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
def test_custom_filter
|
111
|
+
searcher = Searcher.new(@dir)
|
112
|
+
q = MatchAllQuery.new
|
113
|
+
filt = CustomFilter.new
|
114
|
+
do_test_top_docs(searcher, q, [0, 2, 4], filt)
|
115
|
+
end
|
101
116
|
end
|
@@ -23,6 +23,8 @@ module SearcherTests
|
|
23
23
|
assert_equal(10, top_docs.hits.size)
|
24
24
|
top_docs = @searcher.search(tq, {:limit => 20})
|
25
25
|
assert_equal(@searcher.max_doc, top_docs.hits.size)
|
26
|
+
|
27
|
+
assert_equal([Ferret::Term.new(:field, "word1")], tq.terms(@searcher))
|
26
28
|
end
|
27
29
|
|
28
30
|
def check_docs(query, options, expected=[])
|
@@ -79,6 +81,12 @@ module SearcherTests
|
|
79
81
|
assert_equal("field:<brown|word1|word2|fast^50.0>^80.1", mtq.to_s())
|
80
82
|
mtq << "word3"
|
81
83
|
assert_equal("field:<brown|word1|word2|fast^50.0>^80.1", mtq.to_s())
|
84
|
+
|
85
|
+
terms = mtq.terms(@searcher)
|
86
|
+
assert(terms.index(Ferret::Term.new(:field, "brown")))
|
87
|
+
assert(terms.index(Ferret::Term.new(:field, "word1")))
|
88
|
+
assert(terms.index(Ferret::Term.new(:field, "word2")))
|
89
|
+
assert(terms.index(Ferret::Term.new(:field, "fast")))
|
82
90
|
end
|
83
91
|
|
84
92
|
def test_boolean_query
|
@@ -199,6 +207,9 @@ module SearcherTests
|
|
199
207
|
|
200
208
|
wq = WildcardQuery.new(:category, "cat1*/su??ub2")
|
201
209
|
check_hits(wq, [4, 16])
|
210
|
+
|
211
|
+
wq = WildcardQuery.new(:category, "*/sub2*")
|
212
|
+
check_hits(wq, [3, 4, 13, 15])
|
202
213
|
end
|
203
214
|
|
204
215
|
def test_multi_phrase_query()
|
@@ -327,13 +338,13 @@ module SearcherTests
|
|
327
338
|
:excerpt_length => 10,
|
328
339
|
:num_excerpts => 1)
|
329
340
|
assert_equal(1, highlights.size)
|
330
|
-
assert_equal("
|
341
|
+
assert_equal("<b>the words</b>...", highlights[0])
|
331
342
|
|
332
343
|
highlights = searcher.highlight(q, 0, :field,
|
333
344
|
:excerpt_length => 10,
|
334
345
|
:num_excerpts => 2)
|
335
346
|
assert_equal(2, highlights.size)
|
336
347
|
assert_equal("<b>the words</b>...", highlights[0])
|
337
|
-
assert_equal("...<b>
|
348
|
+
assert_equal("...<b>one</b> <b>two</b>...", highlights[1])
|
338
349
|
end
|
339
350
|
end
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.8.11
|
|
3
3
|
specification_version: 1
|
4
4
|
name: ferret
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.10.
|
7
|
-
date: 2006-09-
|
6
|
+
version: 0.10.5
|
7
|
+
date: 2006-09-19 00:00:00 +09:00
|
8
8
|
summary: Ruby indexing library.
|
9
9
|
require_paths:
|
10
10
|
- lib
|