ferret 0.10.4 → 0.10.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +1 -1
- data/ext/analysis.c +7 -1
- data/ext/bitvector.c +5 -2
- data/ext/bitvector.h +1 -0
- data/ext/ferret.c +55 -8
- data/ext/ferret.h +8 -2
- data/ext/index.c +34 -43
- data/ext/index.h +1 -1
- data/ext/q_boolean.c +1 -1
- data/ext/q_multi_term.c +13 -1
- data/ext/q_parser.c +33 -18
- data/ext/r_analysis.c +68 -45
- data/ext/r_index.c +64 -10
- data/ext/r_search.c +145 -10
- data/ext/search.c +71 -12
- data/lib/ferret/index.rb +42 -28
- data/lib/ferret_version.rb +1 -1
- data/test/unit/analysis/tc_analyzer.rb +1 -1
- data/test/unit/analysis/tc_token_stream.rb +0 -1
- data/test/unit/index/tc_index.rb +3 -3
- data/test/unit/index/tc_index_reader.rb +5 -0
- data/test/unit/search/tc_filter.rb +15 -0
- data/test/unit/search/tm_searcher.rb +13 -2
- metadata +2 -2
data/lib/ferret_version.rb
CHANGED
@@ -246,7 +246,7 @@ class AsciiStandardAnalyzerTest < Test::Unit::TestCase
|
|
246
246
|
assert_equal(Token.new('tnt', 86, 91), t2.next)
|
247
247
|
assert_equal(Token.new('123-1235-asd-1234', 93, 110), t2.next)
|
248
248
|
assert(! t2.next())
|
249
|
-
a = AsciiStandardAnalyzer.new(false)
|
249
|
+
a = AsciiStandardAnalyzer.new(ENGLISH_STOP_WORDS, false)
|
250
250
|
t = a.token_stream("fieldname", input)
|
251
251
|
t2 = a.token_stream("fieldname", input)
|
252
252
|
assert_equal(Token.new('DBalmain@gmail.com', 0, 18), t.next)
|
@@ -486,7 +486,6 @@ class CustomTokenizerTest < Test::Unit::TestCase
|
|
486
486
|
assert(! t.next())
|
487
487
|
t = AsciiLowerCaseFilter.new(MyCSVTokenizer.new(input))
|
488
488
|
assert_equal(Token.new("first field", 0, 11), t.next)
|
489
|
-
return
|
490
489
|
assert_equal(Token.new("2nd field", 12, 21), t.next)
|
491
490
|
assert_equal(Token.new(" p a d d e d f i e l d ", 22, 48), t.next)
|
492
491
|
assert(! t.next())
|
data/test/unit/index/tc_index.rb
CHANGED
@@ -753,14 +753,14 @@ class IndexTest < Test::Unit::TestCase
|
|
753
753
|
:excerpt_length => 10,
|
754
754
|
:num_excerpts => 1)
|
755
755
|
assert_equal(1, highlights.size)
|
756
|
-
assert_equal("
|
756
|
+
assert_equal("<b>the words</b>...", highlights[0])
|
757
757
|
|
758
758
|
highlights = index.highlight('"the words" "for are one and two" ' +
|
759
759
|
'words one two', 0,
|
760
|
-
:excerpt_length =>
|
760
|
+
:excerpt_length => 20,
|
761
761
|
:num_excerpts => 2)
|
762
762
|
assert_equal(2, highlights.size)
|
763
|
-
assert_equal("<b>the words</b
|
763
|
+
assert_equal("<b>the words</b> we are...", highlights[0])
|
764
764
|
assert_equal("...<b>for are one and two</b>...", highlights[1])
|
765
765
|
|
766
766
|
|
@@ -198,6 +198,11 @@ module IndexReaderCommon
|
|
198
198
|
|
199
199
|
assert_equal(4, doc.size)
|
200
200
|
[:author, :body, :title, :year].each {|fn| assert(doc.keys.include?(fn))}
|
201
|
+
assert_equal([@ir[0].load, @ir[1].load, @ir[2].load], @ir[0, 3].collect {|d| d.load})
|
202
|
+
assert_equal([@ir[61].load, @ir[62].load, @ir[63].load], @ir[61, 100].collect {|d| d.load})
|
203
|
+
assert_equal([@ir[0].load, @ir[1].load, @ir[2].load], @ir[0..2].collect {|d| d.load})
|
204
|
+
assert_equal([@ir[61].load, @ir[62].load, @ir[63].load], @ir[61..100].collect {|d| d.load})
|
205
|
+
assert_equal(@ir[-60], @ir[4])
|
201
206
|
end
|
202
207
|
|
203
208
|
def test_ir_norms()
|
@@ -98,4 +98,19 @@ class FilterTest < Test::Unit::TestCase
|
|
98
98
|
rf2 = RangeFilter.new(:int, :>= => "3")
|
99
99
|
do_test_top_docs(searcher, query, [4,6], rf2)
|
100
100
|
end
|
101
|
+
|
102
|
+
class CustomFilter
|
103
|
+
def bits(ir)
|
104
|
+
bv = Ferret::Utils::BitVector.new
|
105
|
+
bv[0] = bv[2] = bv[4] = true
|
106
|
+
bv
|
107
|
+
end
|
108
|
+
end
|
109
|
+
|
110
|
+
def test_custom_filter
|
111
|
+
searcher = Searcher.new(@dir)
|
112
|
+
q = MatchAllQuery.new
|
113
|
+
filt = CustomFilter.new
|
114
|
+
do_test_top_docs(searcher, q, [0, 2, 4], filt)
|
115
|
+
end
|
101
116
|
end
|
@@ -23,6 +23,8 @@ module SearcherTests
|
|
23
23
|
assert_equal(10, top_docs.hits.size)
|
24
24
|
top_docs = @searcher.search(tq, {:limit => 20})
|
25
25
|
assert_equal(@searcher.max_doc, top_docs.hits.size)
|
26
|
+
|
27
|
+
assert_equal([Ferret::Term.new(:field, "word1")], tq.terms(@searcher))
|
26
28
|
end
|
27
29
|
|
28
30
|
def check_docs(query, options, expected=[])
|
@@ -79,6 +81,12 @@ module SearcherTests
|
|
79
81
|
assert_equal("field:<brown|word1|word2|fast^50.0>^80.1", mtq.to_s())
|
80
82
|
mtq << "word3"
|
81
83
|
assert_equal("field:<brown|word1|word2|fast^50.0>^80.1", mtq.to_s())
|
84
|
+
|
85
|
+
terms = mtq.terms(@searcher)
|
86
|
+
assert(terms.index(Ferret::Term.new(:field, "brown")))
|
87
|
+
assert(terms.index(Ferret::Term.new(:field, "word1")))
|
88
|
+
assert(terms.index(Ferret::Term.new(:field, "word2")))
|
89
|
+
assert(terms.index(Ferret::Term.new(:field, "fast")))
|
82
90
|
end
|
83
91
|
|
84
92
|
def test_boolean_query
|
@@ -199,6 +207,9 @@ module SearcherTests
|
|
199
207
|
|
200
208
|
wq = WildcardQuery.new(:category, "cat1*/su??ub2")
|
201
209
|
check_hits(wq, [4, 16])
|
210
|
+
|
211
|
+
wq = WildcardQuery.new(:category, "*/sub2*")
|
212
|
+
check_hits(wq, [3, 4, 13, 15])
|
202
213
|
end
|
203
214
|
|
204
215
|
def test_multi_phrase_query()
|
@@ -327,13 +338,13 @@ module SearcherTests
|
|
327
338
|
:excerpt_length => 10,
|
328
339
|
:num_excerpts => 1)
|
329
340
|
assert_equal(1, highlights.size)
|
330
|
-
assert_equal("
|
341
|
+
assert_equal("<b>the words</b>...", highlights[0])
|
331
342
|
|
332
343
|
highlights = searcher.highlight(q, 0, :field,
|
333
344
|
:excerpt_length => 10,
|
334
345
|
:num_excerpts => 2)
|
335
346
|
assert_equal(2, highlights.size)
|
336
347
|
assert_equal("<b>the words</b>...", highlights[0])
|
337
|
-
assert_equal("...<b>
|
348
|
+
assert_equal("...<b>one</b> <b>two</b>...", highlights[1])
|
338
349
|
end
|
339
350
|
end
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.8.11
|
|
3
3
|
specification_version: 1
|
4
4
|
name: ferret
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.10.
|
7
|
-
date: 2006-09-
|
6
|
+
version: 0.10.5
|
7
|
+
date: 2006-09-19 00:00:00 +09:00
|
8
8
|
summary: Ruby indexing library.
|
9
9
|
require_paths:
|
10
10
|
- lib
|