ferret 0.3.0 → 0.3.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ext/ferret_ext.so +0 -0
- data/ext/index_io.c +8 -8
- data/ext/segment_term_enum.c +3 -1
- data/lib/ferret.rb +1 -1
- data/lib/ferret/index/index.rb +6 -4
- data/lib/ferret/index/segment_merger.rb +1 -1
- data/lib/ferret/search/disjunction_sum_scorer.rb +1 -1
- data/test/unit/index/tc_index.rb +6 -0
- data/test/unit/search/tc_index_searcher.rb +3 -0
- metadata +3 -3
data/ext/ferret_ext.so
CHANGED
Binary file
|
data/ext/index_io.c
CHANGED
@@ -170,10 +170,10 @@ VALUE
|
|
170
170
|
frt_indexin_read_int(VALUE self)
|
171
171
|
{
|
172
172
|
GET_MY_BUF;
|
173
|
-
return LONG2NUM(((
|
174
|
-
((
|
175
|
-
((
|
176
|
-
(
|
173
|
+
return LONG2NUM(((int)frt_read_byte(self, my_buf) << 24) |
|
174
|
+
((int)frt_read_byte(self, my_buf) << 16) |
|
175
|
+
((int)frt_read_byte(self, my_buf) << 8) |
|
176
|
+
(int)frt_read_byte(self, my_buf));
|
177
177
|
}
|
178
178
|
|
179
179
|
VALUE
|
@@ -194,10 +194,10 @@ static VALUE
|
|
194
194
|
frt_indexin_read_uint(VALUE self)
|
195
195
|
{
|
196
196
|
GET_MY_BUF;
|
197
|
-
return ULONG2NUM(((unsigned
|
198
|
-
((unsigned
|
199
|
-
((unsigned
|
200
|
-
(unsigned
|
197
|
+
return ULONG2NUM(((unsigned int)frt_read_byte(self, my_buf) << 24) |
|
198
|
+
((unsigned int)frt_read_byte(self, my_buf) << 16) |
|
199
|
+
((unsigned int)frt_read_byte(self, my_buf) << 8) |
|
200
|
+
(unsigned int)frt_read_byte(self, my_buf));
|
201
201
|
}
|
202
202
|
|
203
203
|
static VALUE
|
data/ext/segment_term_enum.c
CHANGED
@@ -35,6 +35,7 @@ static VALUE
|
|
35
35
|
frt_ste_init(VALUE self, VALUE input, VALUE field_infos, VALUE is_index)
|
36
36
|
{
|
37
37
|
int first_int;
|
38
|
+
VALUE rsize;
|
38
39
|
GET_STE;
|
39
40
|
ste->is_index = RTEST(is_index);
|
40
41
|
ste->input = input;
|
@@ -63,7 +64,8 @@ frt_ste_init(VALUE self, VALUE input, VALUE field_infos, VALUE is_index)
|
|
63
64
|
} else {
|
64
65
|
// we have a format version number
|
65
66
|
ste->format = first_int;
|
66
|
-
|
67
|
+
rsize = frt_indexin_read_long(input);
|
68
|
+
ste->size = NUM2INT(rsize); // read the size
|
67
69
|
|
68
70
|
if (ste->format == -1) {
|
69
71
|
if (!ste->is_index) {
|
data/lib/ferret.rb
CHANGED
data/lib/ferret/index/index.rb
CHANGED
@@ -335,18 +335,20 @@ module Ferret::Index
|
|
335
335
|
# id:: The number of the document to delete
|
336
336
|
def delete(id)
|
337
337
|
@dir.synchronize do
|
338
|
+
cnt = 0
|
338
339
|
ensure_reader_open()
|
339
340
|
if id.is_a?(String)
|
340
341
|
t = Term.new("id", id.to_s)
|
341
|
-
|
342
|
+
cnt = @reader.delete_docs_with_term(t)
|
342
343
|
elsif id.is_a?(Term)
|
343
|
-
|
344
|
+
cnt = @reader.delete_docs_with_term(id)
|
344
345
|
elsif id.is_a?(Integer)
|
345
|
-
|
346
|
+
cnt = @reader.delete(id)
|
346
347
|
else
|
347
348
|
raise ArgumentError, "Cannot delete for id of type #{id.class}"
|
348
349
|
end
|
349
350
|
flush() if @auto_flush
|
351
|
+
return cnt
|
350
352
|
end
|
351
353
|
end
|
352
354
|
|
@@ -485,7 +487,7 @@ module Ferret::Index
|
|
485
487
|
@dir.synchronize do
|
486
488
|
ensure_writer_open()
|
487
489
|
@writer.optimize()
|
488
|
-
|
490
|
+
flush()
|
489
491
|
end
|
490
492
|
end
|
491
493
|
|
@@ -242,7 +242,7 @@ module Ferret::Index
|
|
242
242
|
skip_pointer = write_skip()
|
243
243
|
|
244
244
|
if (df > 0)
|
245
|
-
# add an
|
245
|
+
# add an entry to the dictionary with pointers to prox and freq files
|
246
246
|
@term_info.set_values!(df, freq_pointer, prox_pointer, (skip_pointer - freq_pointer))
|
247
247
|
@term_infos_writer.add(smis[0].term_buffer.term, @term_info)
|
248
248
|
end
|
@@ -93,7 +93,7 @@ module Ferret::Search
|
|
93
93
|
# Repeat until at least the minimum number of subscorers match on the same
|
94
94
|
# document and all subscorers are after that document or are exhausted.
|
95
95
|
#
|
96
|
-
# On
|
96
|
+
# On entry the +@scorer_queue+ has at least +@minimum_nr_matchers+
|
97
97
|
# available. At least the scorer with the minimum document number will be advanced.
|
98
98
|
# returns:: true iff there is a match.
|
99
99
|
#
|
data/test/unit/index/tc_index.rb
CHANGED
@@ -459,6 +459,12 @@ class IndexTest < Test::Unit::TestCase
|
|
459
459
|
index1 << doc
|
460
460
|
index2 << doc
|
461
461
|
end
|
462
|
+
5.times do |i|
|
463
|
+
index1.delete(i)
|
464
|
+
index2.delete(i + 5)
|
465
|
+
end
|
466
|
+
index1.optimize
|
467
|
+
index2 << "thirteen"
|
462
468
|
rescue Exception => e
|
463
469
|
assert(false, "This should not cause an error when auto flush has been set")
|
464
470
|
end
|
@@ -65,6 +65,9 @@ class IndexSearcherTest < Test::Unit::TestCase
|
|
65
65
|
tq = TermQuery.new(Term.new("field", "word2"));
|
66
66
|
tq.boost = 100
|
67
67
|
check_hits(tq, [1,4,8])
|
68
|
+
#puts @is.explain(tq, 1)
|
69
|
+
#puts @is.explain(tq, 4)
|
70
|
+
#puts @is.explain(tq, 8)
|
68
71
|
|
69
72
|
tq = TermQuery.new(Term.new("field", "2342"));
|
70
73
|
check_hits(tq, [])
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.8.11
|
|
3
3
|
specification_version: 1
|
4
4
|
name: ferret
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.3.
|
7
|
-
date: 2005-12-
|
6
|
+
version: 0.3.1
|
7
|
+
date: 2005-12-08 00:00:00 +09:00
|
8
8
|
summary: Ruby indexing library.
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -52,8 +52,8 @@ files:
|
|
52
52
|
- ext/dummy.exe
|
53
53
|
- ext/segment_term_enum.c
|
54
54
|
- ext/terminfo.c
|
55
|
-
- ext/term_infos_reader.c
|
56
55
|
- ext/ferret_ext.so
|
56
|
+
- ext/term_infos_reader.c
|
57
57
|
- ext/similarity.c
|
58
58
|
- lib/ferret.rb
|
59
59
|
- lib/ferret/analysis.rb
|