ferret 0.9.4 → 0.9.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +1 -1
- data/Rakefile +1 -0
- data/ext/field.c +87 -87
- data/ext/index.h +253 -255
- data/ext/index_io.c +15 -6
- data/ext/index_rw.c +6 -0
- data/ext/nix_io.c +4 -6
- data/ext/q_boolean.c +0 -6
- data/ext/q_fuzzy.c +10 -7
- data/ext/q_multi_phrase.c +2 -2
- data/ext/q_term.c +2 -2
- data/ext/q_wildcard.c +5 -4
- data/ext/search.c +3 -5
- data/ext/search.h +439 -400
- data/ext/store.h +1 -0
- data/ext/termdocs.c +3 -7
- data/ext/vector.c +1 -1
- data/lib/ferret.rb +1 -1
- data/lib/ferret/store/ram_store.rb +5 -2
- data/lib/rferret.rb +1 -1
- data/test/unit/index/tc_index_reader.rb +6 -1
- data/test/unit/search/tc_search_and_sort.rb +1 -1
- data/test/unit/store/tc_fs_store.rb +1 -1
- metadata +4 -4
data/ext/store.h
CHANGED
@@ -121,6 +121,7 @@ llong is_read_long(InStream *is);
|
|
121
121
|
unsigned int is_read_uint(InStream *is);
|
122
122
|
ullong is_read_ulong(InStream *is);
|
123
123
|
ullong is_read_vint(InStream *is);
|
124
|
+
void is_skip_vints(InStream *is, register int cnt);
|
124
125
|
void is_read_chars(InStream *is, char* buffer, int off, int len) ;
|
125
126
|
char *is_read_string(InStream *is);
|
126
127
|
void os_write_int(OutStream *os, int l);
|
data/ext/termdocs.c
CHANGED
@@ -233,9 +233,7 @@ void stpe_close(TermDocEnum *tde)
|
|
233
233
|
|
234
234
|
void stpe_skip_prox(SegmentTermDocEnum *stde)
|
235
235
|
{
|
236
|
-
|
237
|
-
for (i = 0; i < stde->freq; i++)
|
238
|
-
is_read_vint(stde->prox_in);
|
236
|
+
is_skip_vints(stde->prox_in, stde->freq);
|
239
237
|
}
|
240
238
|
|
241
239
|
void stpe_seek_prox(SegmentTermDocEnum *stde, int prox_pointer)
|
@@ -247,11 +245,9 @@ void stpe_seek_prox(SegmentTermDocEnum *stde, int prox_pointer)
|
|
247
245
|
bool stpe_next(TermDocEnum *tde)
|
248
246
|
{
|
249
247
|
SegmentTermDocEnum *stde = (SegmentTermDocEnum *)tde->data;
|
250
|
-
|
251
|
-
for (i = 0; i < stde->prox_cnt; i++)
|
252
|
-
is_read_vint(stde->prox_in);
|
248
|
+
is_skip_vints(stde->prox_in, stde->prox_cnt);
|
253
249
|
|
254
|
-
|
250
|
+
/* if super */
|
255
251
|
if (stde_next(tde)) {
|
256
252
|
stde->prox_cnt = stde->freq;
|
257
253
|
stde->position = 0;
|
data/ext/vector.c
CHANGED
@@ -98,12 +98,12 @@ void tvw_write_field(TermVectorsWriter *tvw)
|
|
98
98
|
TVOffsetInfo *tmp_offset;
|
99
99
|
TVTerm **terms = tvw->terms;
|
100
100
|
TVTerm *term;
|
101
|
-
/* remember where this field is written */
|
102
101
|
OutStream *tvf = tvw->tvf;
|
103
102
|
int store_positions = tvw->curr_field->store_positions;
|
104
103
|
int store_offsets = tvw->curr_field->store_offsets;
|
105
104
|
uchar bits = 0x0;
|
106
105
|
|
106
|
+
/* remember where this field is written */
|
107
107
|
tvw->curr_field->tvf_pointer = os_pos(tvf);
|
108
108
|
|
109
109
|
/* write the number of terms */
|
data/lib/ferret.rb
CHANGED
@@ -7,12 +7,15 @@ module Ferret::Store
|
|
7
7
|
def initialize(dir = nil, close_dir = false)
|
8
8
|
super()
|
9
9
|
@files = Hash.new
|
10
|
-
|
10
|
+
if dir
|
11
|
+
buf = BUFFER.clone
|
11
12
|
dir.each do |file|
|
12
13
|
os = create_output(file) # make a place on ram disk
|
13
14
|
is = dir.open_input(file) # read the current file
|
14
15
|
len = is.length # and copy the file to ram disk
|
15
|
-
|
16
|
+
if len > buf.size
|
17
|
+
buf << " " * (len - buf.size)
|
18
|
+
end
|
16
19
|
is.read_bytes(buf, 0, len)
|
17
20
|
os.write_bytes(buf, len)
|
18
21
|
is.close()
|
data/lib/rferret.rb
CHANGED
@@ -532,7 +532,11 @@ class IndexReaderTest < Test::Unit::TestCase
|
|
532
532
|
end
|
533
533
|
|
534
534
|
def test_ir_multivalue_fields()
|
535
|
-
|
535
|
+
@fs_dpath = File.expand_path(File.join(File.dirname(__FILE__),
|
536
|
+
'../../temp/fsdir'))
|
537
|
+
@fs_dir = Ferret::Store::FSDirectory.new(@fs_dpath, true)
|
538
|
+
|
539
|
+
iw = IndexWriter.new(@fs_dir, :analyzer => WhiteSpaceAnalyzer.new(), :create => true)
|
536
540
|
doc = Document.new()
|
537
541
|
doc << Field.new("tag", "Ruby", Field::Store::YES, Field::Index::NO, Field::TermVector::NO)
|
538
542
|
doc << Field.new("tag", "C", Field::Store::YES, Field::Index::UNTOKENIZED, Field::TermVector::NO)
|
@@ -555,6 +559,7 @@ class IndexReaderTest < Test::Unit::TestCase
|
|
555
559
|
iw << doc
|
556
560
|
iw.close()
|
557
561
|
|
562
|
+
@dir = Ferret::Store::RAMDirectory.new(@fs_dir, true)
|
558
563
|
ir = IndexReader.open(@dir, false)
|
559
564
|
|
560
565
|
doc = ir.get_document(0)
|
@@ -146,7 +146,7 @@ class SearchAndSortTest < Test::Unit::TestCase
|
|
146
146
|
do_test_top_docs(is, q, [8,7,5,3,1,0,2,4,6,9], Sort.new("float", true))
|
147
147
|
do_test_top_docs(is, q, [0,6,1,5,9,4,8,7,2,3], Sort.new(["int", "string"], true))
|
148
148
|
do_test_top_docs(is, q, [3,2,7,8,4,9,5,1,6,0], Sort.new(["int", "string"]))
|
149
|
-
do_test_top_docs(is, q, [3,2,7,8,4,9,5,1,6,0], [
|
149
|
+
do_test_top_docs(is, q, [3,2,7,8,4,9,5,1,6,0], [:int, "string"])
|
150
150
|
end
|
151
151
|
|
152
152
|
#LENGTH = SortField::SortType.new("length", lambda{|str| str.length})
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.8.11
|
|
3
3
|
specification_version: 1
|
4
4
|
name: ferret
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.9.
|
7
|
-
date: 2006-07
|
6
|
+
version: 0.9.5
|
7
|
+
date: 2006-08-07 00:00:00 +09:00
|
8
8
|
summary: Ruby indexing library.
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -12,7 +12,7 @@ email: dbalmain@gmail.com
|
|
12
12
|
homepage: http://ferret.davebalmain.com/trac
|
13
13
|
rubyforge_project: ferret
|
14
14
|
description: Ferret is a port of the Java Lucene project. It is a powerful indexing and search library.
|
15
|
-
autorequire:
|
15
|
+
autorequire: ferret
|
16
16
|
default_executable:
|
17
17
|
bindir: bin
|
18
18
|
has_rdoc: true
|
@@ -51,7 +51,6 @@ files:
|
|
51
51
|
- ext/document.c
|
52
52
|
- ext/compound_io.c
|
53
53
|
- ext/index_rw.c
|
54
|
-
- ext/termdocs.c
|
55
54
|
- ext/vector.c
|
56
55
|
- ext/field.c
|
57
56
|
- ext/term.c
|
@@ -369,6 +368,7 @@ files:
|
|
369
368
|
- test/utils/number_to_spoken.rb
|
370
369
|
- test/unit/analysis/data/wordfile
|
371
370
|
- rake_utils/code_statistics.rb
|
371
|
+
- ext/termdocs.c
|
372
372
|
test_files: []
|
373
373
|
|
374
374
|
rdoc_options:
|