ferret 0.9.3 → 0.9.4
Sign up to get free protection for your applications and to get access to all the features.
- data/ext/ferret.c +27 -0
- data/ext/ferret.h +4 -1
- data/ext/field.c +7 -4
- data/ext/hash.c +0 -4
- data/ext/inc/lang.h +1 -1
- data/ext/ind.c +1 -1
- data/ext/lang.h +1 -1
- data/ext/nix_io.c +6 -5
- data/ext/r_analysis.c +1 -0
- data/ext/r_doc.c +1 -1
- data/ext/r_index_io.c +14 -0
- data/ext/r_qparser.c +41 -1
- data/ext/r_search.c +5 -1
- data/ext/r_store.c +1 -11
- data/lib/ferret.rb +1 -1
- data/lib/ferret/index/field_infos.rb +2 -2
- data/lib/ferret/index/index_writer.rb +3 -2
- data/lib/ferret/index/segment_merger.rb +6 -3
- data/lib/ferret/index/term_vectors_io.rb +1 -0
- data/lib/rferret.rb +1 -1
- data/test/unit/index/tc_index_reader.rb +17 -5
- data/test/unit/query_parser/tc_query_parser.rb +35 -1
- data/test/unit/search/tc_search_and_sort.rb +2 -0
- data/test/unit/store/tc_fs_store.rb +2 -4
- metadata +3 -3
data/ext/ferret.c
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
#include "ferret.h"
|
2
2
|
#include "except.h"
|
3
3
|
#include "hash.h"
|
4
|
+
#include "hashset.h"
|
4
5
|
|
5
6
|
/* Object Map */
|
6
7
|
static HshTable *object_map;
|
@@ -8,6 +9,9 @@ static HshTable *object_map;
|
|
8
9
|
/* IDs */
|
9
10
|
ID id_new;
|
10
11
|
ID id_call;
|
12
|
+
ID id_is_directory;
|
13
|
+
|
14
|
+
static ID id_mkdir_p;
|
11
15
|
|
12
16
|
/* Modules */
|
13
17
|
VALUE mFerret;
|
@@ -129,6 +133,26 @@ frt_thread_getspecific(thread_key_t key)
|
|
129
133
|
return h_get(key, (void *)rb_thread_current());
|
130
134
|
}
|
131
135
|
|
136
|
+
void
|
137
|
+
frt_create_dir(VALUE rpath)
|
138
|
+
{
|
139
|
+
VALUE mFileUtils;
|
140
|
+
rb_require("fileutils");
|
141
|
+
mFileUtils = rb_define_module("FileUtils");
|
142
|
+
rb_funcall(mFileUtils, id_mkdir_p, 1, rpath);
|
143
|
+
}
|
144
|
+
|
145
|
+
VALUE
|
146
|
+
frt_hs_to_rb_ary(HashSet *hs)
|
147
|
+
{
|
148
|
+
int i;
|
149
|
+
VALUE ary = rb_ary_new();
|
150
|
+
for (i = 0; i < hs->size; i++) {
|
151
|
+
rb_ary_push(ary, rb_str_new2(hs->elems[i]));
|
152
|
+
}
|
153
|
+
return ary;
|
154
|
+
}
|
155
|
+
|
132
156
|
void
|
133
157
|
Init_ferret_ext(void)
|
134
158
|
{
|
@@ -139,6 +163,9 @@ Init_ferret_ext(void)
|
|
139
163
|
id_new = rb_intern("new");
|
140
164
|
id_call = rb_intern("call");
|
141
165
|
|
166
|
+
id_mkdir_p = rb_intern("mkdir_p");
|
167
|
+
id_is_directory = rb_intern("directory?");
|
168
|
+
|
142
169
|
/* Modules */
|
143
170
|
mFerret = rb_define_module("Ferret");
|
144
171
|
mAnalysis = rb_define_module_under(mFerret, "Analysis");
|
data/ext/ferret.h
CHANGED
@@ -2,11 +2,13 @@
|
|
2
2
|
#define __FERRET_H_
|
3
3
|
|
4
4
|
#include "global.h"
|
5
|
+
#include "hashset.h"
|
5
6
|
#include "document.h"
|
6
7
|
|
7
8
|
/* IDs */
|
8
9
|
extern ID id_new;
|
9
10
|
extern ID id_call;
|
11
|
+
extern ID id_is_directory;
|
10
12
|
|
11
13
|
/* Modules */
|
12
14
|
extern VALUE mFerret;
|
@@ -44,7 +46,8 @@ extern VALUE object_get(void *key);
|
|
44
46
|
extern VALUE frt_data_alloc(VALUE klass);
|
45
47
|
extern VALUE frt_get_doc(Document *doc);
|
46
48
|
extern void frt_deref_free(void *p);
|
47
|
-
|
49
|
+
extern void frt_create_dir(VALUE rpath);
|
50
|
+
extern VALUE frt_hs_to_rb_ary(HashSet *hs);
|
48
51
|
|
49
52
|
#define Frt_Make_Struct(klass)\
|
50
53
|
rb_data_object_alloc(klass,NULL,(RUBY_DATA_FUNC)NULL,(RUBY_DATA_FUNC)NULL)
|
data/ext/field.c
CHANGED
@@ -270,17 +270,20 @@ void fw_add_doc(FieldsWriter *fw, Document *doc)
|
|
270
270
|
os_write_vint(fout, ((FieldInfo *)ht_get(fw->fis->by_name, df->name))->number);
|
271
271
|
|
272
272
|
bits = 0;
|
273
|
-
if (df->is_tokenized)
|
273
|
+
if (df->is_tokenized) {
|
274
274
|
bits |= FIELD_IS_TOKENIZED;
|
275
|
-
|
275
|
+
}
|
276
|
+
if (df->is_binary) {
|
276
277
|
bits |= FIELD_IS_BINARY;
|
277
|
-
|
278
|
+
}
|
279
|
+
if (df->is_compressed) {
|
278
280
|
bits |= FIELD_IS_COMPRESSED;
|
281
|
+
}
|
279
282
|
os_write_byte(fout, bits);
|
280
283
|
|
281
284
|
data = NULL;
|
282
285
|
if (df->is_compressed) {
|
283
|
-
|
286
|
+
/* Not compressing just yet but we'll save it anyway */
|
284
287
|
if (df->is_binary) {
|
285
288
|
save_data(fout, df->data, df->blen);
|
286
289
|
} else {
|
data/ext/hash.c
CHANGED
@@ -204,10 +204,6 @@ HshEntry *h_lookup_str(HshTable *ht, register const void *key_p)
|
|
204
204
|
if (he->key == dummy_key && freeslot == NULL)
|
205
205
|
freeslot = he;
|
206
206
|
}
|
207
|
-
if (he->key == NULL || he->key == key) {
|
208
|
-
he->hash = hash;
|
209
|
-
return he;
|
210
|
-
}
|
211
207
|
}
|
212
208
|
|
213
209
|
typedef int (*eq_func)(const void *key1, const void *key2);
|
data/ext/inc/lang.h
CHANGED
@@ -25,7 +25,7 @@ extern void setprogname(const char *str);
|
|
25
25
|
|
26
26
|
extern VALUE cQueryParseException;
|
27
27
|
|
28
|
-
#define EXCEPTION_CODE
|
28
|
+
#define EXCEPTION_CODE rb_eStandardError
|
29
29
|
//#define IO_ERROR rb_eIOError
|
30
30
|
//#define ARG_ERROR rb_eArgError
|
31
31
|
//#define EOF_ERROR rb_eEOFError
|
data/ext/ind.c
CHANGED
@@ -327,8 +327,8 @@ Document *index_get_doc_term(Index *self, Term *term)
|
|
327
327
|
tde = ir_term_docs_for(self->ir, term);
|
328
328
|
if (tde->next(tde)) {
|
329
329
|
doc = index_get_doc(self, tde->doc_num(tde));
|
330
|
-
tde->close(tde);
|
331
330
|
}
|
331
|
+
tde->close(tde);
|
332
332
|
mutex_unlock(&self->store->ext_mutex);
|
333
333
|
return doc;
|
334
334
|
}
|
data/ext/lang.h
CHANGED
@@ -25,7 +25,7 @@ extern void setprogname(const char *str);
|
|
25
25
|
|
26
26
|
extern VALUE cQueryParseException;
|
27
27
|
|
28
|
-
#define EXCEPTION_CODE
|
28
|
+
#define EXCEPTION_CODE rb_eStandardError
|
29
29
|
//#define IO_ERROR rb_eIOError
|
30
30
|
//#define ARG_ERROR rb_eArgError
|
31
31
|
//#define EOF_ERROR rb_eEOFError
|
data/ext/nix_io.c
CHANGED
@@ -2,7 +2,6 @@
|
|
2
2
|
|
3
3
|
#include "global.h"
|
4
4
|
#include "store.h"
|
5
|
-
#include <sys/dir.h>
|
6
5
|
#include <dirent.h>
|
7
6
|
#include <unistd.h>
|
8
7
|
#include <string.h>
|
@@ -41,7 +40,7 @@ int fcount(char *path)
|
|
41
40
|
if (!d) RAISE(IO_ERROR, strerror(errno));
|
42
41
|
|
43
42
|
while ((de = readdir(d)) != NULL) {
|
44
|
-
if (de->d_name[0] != '.') {
|
43
|
+
if ((de->d_name[0] != '\0') && (de->d_name[0] != '.')) {
|
45
44
|
cnt++;
|
46
45
|
}
|
47
46
|
}
|
@@ -58,7 +57,8 @@ void dir_each(char *path, void (*func)(char *fname, void *arg), void *arg)
|
|
58
57
|
if (!d) RAISE(IO_ERROR, strerror(errno));
|
59
58
|
|
60
59
|
while ((de = readdir(d)) != NULL) {
|
61
|
-
if (de->d_name[0] != '
|
60
|
+
if ((de->d_name[0] != '\0') && (de->d_name[0] != '.')
|
61
|
+
&& !file_is_lock(de->d_name)) {
|
62
62
|
func(de->d_name, arg);
|
63
63
|
}
|
64
64
|
}
|
@@ -101,7 +101,8 @@ void fs_clear(Store *store)
|
|
101
101
|
if (!d) RAISE(IO_ERROR, strerror(errno));
|
102
102
|
|
103
103
|
while ((de = readdir(d)) != NULL) {
|
104
|
-
if (de->d_name[0] != '
|
104
|
+
if ((de->d_name[0] != '\0') && (de->d_name[0] != '.')
|
105
|
+
&& !file_is_lock(de->d_name)) {
|
105
106
|
char buf[MAX_FILE_PATH];
|
106
107
|
remove(join_path(buf, store->dir.path, de->d_name));
|
107
108
|
}
|
@@ -123,7 +124,7 @@ void fs_clear_all(Store *store)
|
|
123
124
|
if (!d) RAISE(IO_ERROR, strerror(errno));
|
124
125
|
|
125
126
|
while ((de = readdir(d)) != NULL) {
|
126
|
-
if (de->d_name[0] != '.') {
|
127
|
+
if ((de->d_name[0] != '\0') && (de->d_name[0] != '.')) {
|
127
128
|
char buf[MAX_FILE_PATH];
|
128
129
|
remove(join_path(buf, store->dir.path, de->d_name));
|
129
130
|
}
|
data/ext/r_analysis.c
CHANGED
@@ -1152,6 +1152,7 @@ Init_analysis(void)
|
|
1152
1152
|
frt_per_field_analyzer_add_field, 2);
|
1153
1153
|
rb_define_method(cPerFieldAnalyzer, "[]=",
|
1154
1154
|
frt_per_field_analyzer_add_field, 2);
|
1155
|
+
rb_define_class_under(mAnalysis, "PerFieldAnalyzerWrapper", cPerFieldAnalyzer);
|
1155
1156
|
|
1156
1157
|
/*** * * RegexAnalyzer * * ***/
|
1157
1158
|
cRegExpAnalyzer =
|
data/ext/r_doc.c
CHANGED
@@ -288,7 +288,7 @@ frt_get_doc(Document *doc)
|
|
288
288
|
if (!doc || (self = object_get(doc)) != Qnil) return self;
|
289
289
|
|
290
290
|
doc->free_data = NULL;
|
291
|
-
self = Data_Wrap_Struct(cDocument,
|
291
|
+
self = Data_Wrap_Struct(cDocument, frt_doc_mark, frt_doc_free, doc);
|
292
292
|
|
293
293
|
/* We add all the document's fields to the ruby object space so that they
|
294
294
|
* can be retrieved in ruby later. This code must come after the above
|
data/ext/r_index_io.c
CHANGED
@@ -442,6 +442,7 @@ frt_iw_init(int argc, VALUE *argv, VALUE self)
|
|
442
442
|
ref(store);
|
443
443
|
} else {
|
444
444
|
StringValue(rdir);
|
445
|
+
frt_create_dir(rdir);
|
445
446
|
store = open_fs_store(RSTRING(rdir)->ptr);
|
446
447
|
}
|
447
448
|
} else {
|
@@ -645,6 +646,7 @@ frt_ir_init(int argc, VALUE *argv, VALUE self)
|
|
645
646
|
store = DATA_PTR(rdir);
|
646
647
|
} else {
|
647
648
|
rdir = rb_obj_as_string(rdir);
|
649
|
+
frt_create_dir(rdir);
|
648
650
|
store = open_fs_store(RSTRING(rdir)->ptr);
|
649
651
|
deref(store);
|
650
652
|
}
|
@@ -871,6 +873,17 @@ frt_ir_terms_from(VALUE self, VALUE rterm)
|
|
871
873
|
return Data_Wrap_Struct(cTermEnum, NULL, &frt_te_free, te);
|
872
874
|
}
|
873
875
|
|
876
|
+
static VALUE
|
877
|
+
frt_ir_get_field_names(VALUE self)
|
878
|
+
{
|
879
|
+
GET_IR;
|
880
|
+
VALUE rfnames;
|
881
|
+
HashSet *fnames = ir->get_field_names(ir, IR_ALL);
|
882
|
+
rfnames = frt_hs_to_rb_ary(fnames);
|
883
|
+
hs_destroy(fnames);
|
884
|
+
return rfnames;
|
885
|
+
}
|
886
|
+
|
874
887
|
/****************************************************************************
|
875
888
|
*
|
876
889
|
* Init Function
|
@@ -1004,4 +1017,5 @@ Init_index_io(void)
|
|
1004
1017
|
rb_define_method(cIndexReader, "doc_freq", frt_ir_doc_freq, 1);
|
1005
1018
|
rb_define_method(cIndexReader, "terms", frt_ir_terms, 0);
|
1006
1019
|
rb_define_method(cIndexReader, "terms_from", frt_ir_terms_from, 1);
|
1020
|
+
rb_define_method(cIndexReader, "get_field_names", frt_ir_get_field_names, 0);
|
1007
1021
|
}
|
data/ext/r_qparser.c
CHANGED
@@ -10,6 +10,7 @@ VALUE rwild_lower_key;
|
|
10
10
|
VALUE roccur_default_key;
|
11
11
|
VALUE rdefault_slop_key;
|
12
12
|
VALUE rclean_str_key;
|
13
|
+
VALUE rfields_key;
|
13
14
|
extern VALUE ranalyzer_key;
|
14
15
|
|
15
16
|
extern VALUE frt_get_analyzer(Analyzer *a);
|
@@ -90,6 +91,12 @@ frt_qp_init(int argc, VALUE *argv, VALUE self)
|
|
90
91
|
if (Qnil != (rval = rb_hash_aref(roptions, ranalyzer_key))) {
|
91
92
|
analyzer = frt_get_cwrapped_analyzer(rval);
|
92
93
|
}
|
94
|
+
if (Qnil != (rval = rb_hash_aref(roptions, rfields_key))) {
|
95
|
+
all_fields = frt_get_fields(rval);
|
96
|
+
}
|
97
|
+
}
|
98
|
+
if (all_fields == NULL) {
|
99
|
+
all_fields = hs_str_create(&free);
|
93
100
|
}
|
94
101
|
|
95
102
|
if (!analyzer) {
|
@@ -125,7 +132,7 @@ frt_qp_init(int argc, VALUE *argv, VALUE self)
|
|
125
132
|
return self;
|
126
133
|
}
|
127
134
|
|
128
|
-
#define GET_QP QParser *qp
|
135
|
+
#define GET_QP QParser *qp = (QParser *)DATA_PTR(self)
|
129
136
|
static VALUE
|
130
137
|
frt_qp_parse(VALUE self, VALUE rstr)
|
131
138
|
{
|
@@ -148,6 +155,36 @@ frt_qp_parse(VALUE self, VALUE rstr)
|
|
148
155
|
return rq;
|
149
156
|
}
|
150
157
|
|
158
|
+
static VALUE
|
159
|
+
frt_qp_get_fields(VALUE self)
|
160
|
+
{
|
161
|
+
GET_QP;
|
162
|
+
int i;
|
163
|
+
HashSet *fields = qp->all_fields;
|
164
|
+
VALUE rfields = rb_ary_new();
|
165
|
+
|
166
|
+
for (i = 0; i < fields->size; i++) {
|
167
|
+
rb_ary_push(rfields, rb_str_new2((char *)fields->elems[i]));
|
168
|
+
}
|
169
|
+
|
170
|
+
return rfields;
|
171
|
+
}
|
172
|
+
|
173
|
+
static VALUE
|
174
|
+
frt_qp_set_fields(VALUE self, VALUE rfields)
|
175
|
+
{
|
176
|
+
GET_QP;
|
177
|
+
HashSet *fields = frt_get_fields(rfields);
|
178
|
+
|
179
|
+
if (fields == NULL) {
|
180
|
+
fields = hs_str_create(&free);
|
181
|
+
}
|
182
|
+
hs_destroy(qp->all_fields);
|
183
|
+
qp->all_fields = fields;
|
184
|
+
|
185
|
+
return self;
|
186
|
+
}
|
187
|
+
|
151
188
|
/****************************************************************************
|
152
189
|
*
|
153
190
|
* Init function
|
@@ -164,6 +201,7 @@ Init_qparser(void)
|
|
164
201
|
roccur_default_key = ID2SYM(rb_intern("occur_default"));
|
165
202
|
rdefault_slop_key = ID2SYM(rb_intern("default_slop"));
|
166
203
|
rclean_str_key = ID2SYM(rb_intern("clean_string"));
|
204
|
+
rfields_key = ID2SYM(rb_intern("fields"));
|
167
205
|
|
168
206
|
/* QueryParser */
|
169
207
|
cQueryParser = rb_define_class_under(mFerret, "QueryParser", rb_cObject);
|
@@ -171,6 +209,8 @@ Init_qparser(void)
|
|
171
209
|
|
172
210
|
rb_define_method(cQueryParser, "initialize", frt_qp_init, -1);
|
173
211
|
rb_define_method(cQueryParser, "parse", frt_qp_parse, 1);
|
212
|
+
rb_define_method(cQueryParser, "fields", frt_qp_get_fields, 0);
|
213
|
+
rb_define_method(cQueryParser, "fields=", frt_qp_set_fields, 1);
|
174
214
|
|
175
215
|
/* QueryParseException */
|
176
216
|
cQueryParseException = rb_define_class_under(cQueryParser,
|
data/ext/r_search.c
CHANGED
@@ -110,7 +110,7 @@ frt_get_sd(Hit *hit)
|
|
110
110
|
return self;
|
111
111
|
}
|
112
112
|
|
113
|
-
#define GET_HIT Hit *hit
|
113
|
+
#define GET_HIT Hit *hit = (Hit *)DATA_PTR(self)
|
114
114
|
static VALUE
|
115
115
|
frt_sd_score(VALUE self)
|
116
116
|
{
|
@@ -1423,6 +1423,7 @@ frt_sea_close(VALUE self)
|
|
1423
1423
|
{
|
1424
1424
|
GET_SEA;
|
1425
1425
|
Frt_Unwrap_Struct(self);
|
1426
|
+
object_del(sea);
|
1426
1427
|
sea->close(sea);
|
1427
1428
|
return Qnil;
|
1428
1429
|
}
|
@@ -1561,6 +1562,7 @@ frt_is_init(VALUE self, VALUE obj)
|
|
1561
1562
|
IndexReader *ir = NULL;
|
1562
1563
|
Searcher *sea;
|
1563
1564
|
if (TYPE(obj) == T_STRING) {
|
1565
|
+
frt_create_dir(obj);
|
1564
1566
|
store = open_fs_store(StringValueCStr(obj));
|
1565
1567
|
ir = ir_open(store);
|
1566
1568
|
deref(store);
|
@@ -1698,6 +1700,7 @@ frt_ind_init(int argc, VALUE *argv, VALUE self)
|
|
1698
1700
|
if (Qnil != (rval = rb_hash_aref(roptions, rpath_key))) {
|
1699
1701
|
rval = rb_obj_as_string(rval);
|
1700
1702
|
/* TODO: create the directory if it is missing */
|
1703
|
+
frt_create_dir(rval);
|
1701
1704
|
store = open_fs_store(RSTRING(rval)->ptr);
|
1702
1705
|
deref(store);
|
1703
1706
|
} else if (Qnil != (rval = rb_hash_aref(roptions, rdir_key))) {
|
@@ -2329,6 +2332,7 @@ frt_ind_persist(int argc, VALUE *argv, VALUE self)
|
|
2329
2332
|
ref(ind->store);
|
2330
2333
|
} else {
|
2331
2334
|
rdir = rb_obj_as_string(rdir);
|
2335
|
+
frt_create_dir(rdir);
|
2332
2336
|
ind->store = open_fs_store(RSTRING(rdir)->ptr);
|
2333
2337
|
}
|
2334
2338
|
|
data/ext/r_store.c
CHANGED
@@ -6,10 +6,6 @@ VALUE cDirectory;
|
|
6
6
|
VALUE cRAMDirectory;
|
7
7
|
VALUE cFSDirectory;
|
8
8
|
|
9
|
-
|
10
|
-
static ID id_mkdir_p;
|
11
|
-
static ID id_is_directory;
|
12
|
-
|
13
9
|
/****************************************************************************
|
14
10
|
*
|
15
11
|
* Lock Methods
|
@@ -217,10 +213,7 @@ frt_fsdir_new(VALUE klass, VALUE rpath, VALUE rcreate)
|
|
217
213
|
bool create = RTEST(rcreate);
|
218
214
|
rpath = rb_obj_as_string(rpath);
|
219
215
|
if (create) {
|
220
|
-
|
221
|
-
rb_require("fileutils");
|
222
|
-
mFileUtils = rb_define_module("FileUtils");
|
223
|
-
rb_funcall(mFileUtils, id_mkdir_p, 1, rpath);
|
216
|
+
frt_create_dir(rpath);
|
224
217
|
}
|
225
218
|
if (!rb_funcall(rb_cFile, id_is_directory, 1, rpath)) {
|
226
219
|
rb_raise(rb_eIOError, "There is no directory: %s. Use create = true to "
|
@@ -246,9 +239,6 @@ frt_fsdir_new(VALUE klass, VALUE rpath, VALUE rcreate)
|
|
246
239
|
void
|
247
240
|
Init_dir(void)
|
248
241
|
{
|
249
|
-
id_mkdir_p = rb_intern("mkdir_p");
|
250
|
-
id_is_directory = rb_intern("directory?");
|
251
|
-
|
252
242
|
cLock = rb_define_class_under(mStore, "Lock", rb_cObject);
|
253
243
|
rb_define_method(cLock, "obtain", frt_lock_obtain, -1);
|
254
244
|
rb_define_method(cLock, "while_locked", frt_lock_while_locked, -1);
|
data/lib/ferret.rb
CHANGED
@@ -97,7 +97,7 @@ module Ferret
|
|
97
97
|
# Returns the number of the field that goes by the field name that is
|
98
98
|
# passed. If there is no field of this name then -1 is returned
|
99
99
|
def field_number(name)
|
100
|
-
fi = @fi_hash[name]
|
100
|
+
fi = @fi_hash[name.to_s]
|
101
101
|
return fi ? fi.number : NOT_A_FIELD
|
102
102
|
end
|
103
103
|
|
@@ -109,7 +109,7 @@ module Ferret
|
|
109
109
|
end
|
110
110
|
return @fi_array[index]
|
111
111
|
else
|
112
|
-
return @fi_hash[index]
|
112
|
+
return @fi_hash[index.to_s]
|
113
113
|
end
|
114
114
|
end
|
115
115
|
|
@@ -356,7 +356,7 @@ module Index
|
|
356
356
|
target_merge_docs = @min_merge_docs
|
357
357
|
while (target_merge_docs <= @max_merge_docs)
|
358
358
|
# find segments smaller than current target size
|
359
|
-
min_segment = @segment_infos.size() -1
|
359
|
+
min_segment = @segment_infos.size() - 1
|
360
360
|
merge_docs = 0
|
361
361
|
while (min_segment >= 0)
|
362
362
|
si = @segment_infos[min_segment]
|
@@ -383,7 +383,8 @@ module Index
|
|
383
383
|
segments_to_delete = []
|
384
384
|
merged_name = new_segment_name()
|
385
385
|
if @info_stream != nil
|
386
|
-
@info_stream.print("merging segments from #{min_segment}
|
386
|
+
@info_stream.print("merging segments from #{min_segment} " +
|
387
|
+
"to #{(max_segment - 1)}\n")
|
387
388
|
end
|
388
389
|
merger = SegmentMerger.new(@directory, merged_name, @term_index_interval)
|
389
390
|
|
@@ -40,7 +40,9 @@ module Ferret::Index
|
|
40
40
|
return @readers[i]
|
41
41
|
end
|
42
42
|
|
43
|
-
# Merges the readers specified by the
|
43
|
+
# Merges the readers specified by the #add method into the directory
|
44
|
+
# passed to the constructor
|
45
|
+
#
|
44
46
|
# returns:: The number of documents that were merged
|
45
47
|
# raises:: IOError
|
46
48
|
def merge()
|
@@ -51,8 +53,9 @@ module Ferret::Index
|
|
51
53
|
return value
|
52
54
|
end
|
53
55
|
|
54
|
-
# close all IndexReaders that have been added.
|
55
|
-
#
|
56
|
+
# close all IndexReaders that have been added. Should not be called
|
57
|
+
# before merge().
|
58
|
+
#
|
56
59
|
# raises:: IOError
|
57
60
|
def close_readers()
|
58
61
|
@readers.each { |reader| reader.close }
|
@@ -341,6 +341,7 @@ module Ferret::Index
|
|
341
341
|
# raises:: IOException if there is an error reading the term vector files
|
342
342
|
def get_field_tv(doc_num, field)
|
343
343
|
# Check if no term vectors are available for this segment at all
|
344
|
+
field = field.to_s
|
344
345
|
field_number = @field_infos.field_number(field)
|
345
346
|
result = nil
|
346
347
|
if (@tvx != nil)
|
data/lib/rferret.rb
CHANGED
@@ -6,6 +6,7 @@ module IndexReaderCommon
|
|
6
6
|
include Ferret::Analysis
|
7
7
|
|
8
8
|
def test_index_reader
|
9
|
+
do_test_get_field_names()
|
9
10
|
|
10
11
|
do_test_term_doc_enum()
|
11
12
|
|
@@ -18,6 +19,17 @@ module IndexReaderCommon
|
|
18
19
|
do_test_term_enum()
|
19
20
|
end
|
20
21
|
|
22
|
+
def do_test_get_field_names()
|
23
|
+
field_names = @ir.get_field_names
|
24
|
+
|
25
|
+
assert(field_names.include?("body"))
|
26
|
+
assert(field_names.include?("changing_field"))
|
27
|
+
assert(field_names.include?("author"))
|
28
|
+
assert(field_names.include?("title"))
|
29
|
+
assert(field_names.include?("text"))
|
30
|
+
assert(field_names.include?("year"))
|
31
|
+
end
|
32
|
+
|
21
33
|
def do_test_term_enum()
|
22
34
|
te = @ir.terms
|
23
35
|
|
@@ -192,7 +204,7 @@ module IndexReaderCommon
|
|
192
204
|
end
|
193
205
|
|
194
206
|
def do_test_term_vectors()
|
195
|
-
tv = @ir.get_term_vector(3,
|
207
|
+
tv = @ir.get_term_vector(3, :body)
|
196
208
|
|
197
209
|
assert_equal("body", tv.field)
|
198
210
|
assert_equal(["word1", "word2", "word3", "word4"], tv.terms)
|
@@ -635,8 +647,8 @@ class IndexReaderTest < Test::Unit::TestCase
|
|
635
647
|
end
|
636
648
|
|
637
649
|
def test_ir_read_while_optimizing_on_disk()
|
638
|
-
dpath = File.join(File.dirname(__FILE__),
|
639
|
-
'../../temp/fsdir')
|
650
|
+
dpath = File.expand_path(File.join(File.dirname(__FILE__),
|
651
|
+
'../../temp/fsdir'))
|
640
652
|
fs_dir = Ferret::Store::FSDirectory.new(dpath, true)
|
641
653
|
|
642
654
|
iw = IndexWriter.new(fs_dir, :analyzer => WhiteSpaceAnalyzer.new(), :create => true)
|
@@ -660,8 +672,8 @@ class IndexReaderTest < Test::Unit::TestCase
|
|
660
672
|
end
|
661
673
|
|
662
674
|
def test_latest()
|
663
|
-
dpath = File.join(File.dirname(__FILE__),
|
664
|
-
'../../temp/fsdir')
|
675
|
+
dpath = File.expand_path(File.join(File.dirname(__FILE__),
|
676
|
+
'../../temp/fsdir'))
|
665
677
|
fs_dir = Ferret::Store::FSDirectory.new(dpath, true)
|
666
678
|
|
667
679
|
iw = IndexWriter.new(fs_dir, :analyzer => WhiteSpaceAnalyzer.new(), :create => true)
|
@@ -6,6 +6,7 @@ class QueryParserTest < Test::Unit::TestCase
|
|
6
6
|
parser = Ferret::QueryParser.new("xxx", :fields => ["xxx", "field", "f1", "f2"])
|
7
7
|
pairs = [
|
8
8
|
['', ''],
|
9
|
+
['*:word', 'word field:word f1:word f2:word'],
|
9
10
|
['word', 'word'],
|
10
11
|
['field:word', 'field:word'],
|
11
12
|
['"word1 word2 word#"', '"word1 word2 word"'],
|
@@ -112,6 +113,39 @@ class QueryParserTest < Test::Unit::TestCase
|
|
112
113
|
pairs.each do |query_str, expected|
|
113
114
|
assert_equal(expected, parser.parse(query_str).to_s("xxx"))
|
114
115
|
end
|
116
|
+
|
117
|
+
end
|
118
|
+
|
119
|
+
if not $ferret_pure_ruby
|
120
|
+
def test_qp_changing_fields()
|
121
|
+
parser = Ferret::QueryParser.new("xxx", :fields => ["xxx", "key"],
|
122
|
+
:analyzer => Ferret::Analysis::WhiteSpaceAnalyzer.new)
|
123
|
+
assert_equal('word key:word', parser.parse("*:word").to_s("xxx"))
|
124
|
+
|
125
|
+
parser.fields = ["xxx", "one", "two", "three"]
|
126
|
+
assert_equal('word one:word two:word three:word',
|
127
|
+
parser.parse("*:word").to_s("xxx"))
|
128
|
+
assert_equal('three:word four:word',
|
129
|
+
parser.parse("three:word four:word").to_s("xxx"))
|
130
|
+
end
|
131
|
+
|
132
|
+
def test_qp_allow_any_field()
|
133
|
+
parser = Ferret::QueryParser.new("xxx", :fields => ["xxx", "key"],
|
134
|
+
:analyzer => Ferret::Analysis::WhiteSpaceAnalyzer.new,
|
135
|
+
:allow_any_fields => false)
|
136
|
+
|
137
|
+
assert_equal('key:word',
|
138
|
+
parser.parse("key:word song:word").to_s("xxx"))
|
139
|
+
assert_equal('word key:word', parser.parse("*:word").to_s("xxx"))
|
140
|
+
|
141
|
+
|
142
|
+
parser = Ferret::QueryParser.new("xxx", :fields => ["xxx", "key"],
|
143
|
+
:analyzer => Ferret::Analysis::WhiteSpaceAnalyzer.new)
|
144
|
+
|
145
|
+
assert_equal('key:word song:word',
|
146
|
+
parser.parse("key:word song:word").to_s("xxx"))
|
147
|
+
assert_equal('word key:word song:word', parser.parse("*:word").to_s("xxx"))
|
148
|
+
end
|
115
149
|
end
|
116
150
|
|
117
151
|
def do_test_query_parse_exception_raised(str)
|
@@ -123,7 +157,7 @@ class QueryParserTest < Test::Unit::TestCase
|
|
123
157
|
|
124
158
|
def test_prefix_query
|
125
159
|
parser = Ferret::QueryParser.new("xxx", :fields => ["xxx"],
|
126
|
-
|
160
|
+
:analyzer => Ferret::Analysis::StandardAnalyzer.new)
|
127
161
|
assert_equal(Ferret::Search::PrefixQuery, parser.parse("asdg*").class)
|
128
162
|
assert_equal(Ferret::Search::WildcardQuery, parser.parse("a?dg*").class)
|
129
163
|
assert_equal(Ferret::Search::WildcardQuery, parser.parse("a*dg*").class)
|
@@ -142,9 +142,11 @@ class SearchAndSortTest < Test::Unit::TestCase
|
|
142
142
|
do_test_top_docs(is, q, [0,9,1,8,2,7,3,6,4,5], Sort.new("string"))
|
143
143
|
do_test_top_docs(is, q, [3,2,7,4,8,5,9,1,6,0], Sort.new(["int"]))
|
144
144
|
do_test_top_docs(is, q, [9,6,4,2,0,1,3,5,7,8], Sort.new("float"))
|
145
|
+
do_test_top_docs(is, q, [9,6,4,2,0,1,3,5,7,8], "float")
|
145
146
|
do_test_top_docs(is, q, [8,7,5,3,1,0,2,4,6,9], Sort.new("float", true))
|
146
147
|
do_test_top_docs(is, q, [0,6,1,5,9,4,8,7,2,3], Sort.new(["int", "string"], true))
|
147
148
|
do_test_top_docs(is, q, [3,2,7,8,4,9,5,1,6,0], Sort.new(["int", "string"]))
|
149
|
+
do_test_top_docs(is, q, [3,2,7,8,4,9,5,1,6,0], ["int", "string"])
|
148
150
|
end
|
149
151
|
|
150
152
|
#LENGTH = SortField::SortType.new("length", lambda{|str| str.length})
|
@@ -7,11 +7,9 @@ class FSStoreTest < Test::Unit::TestCase
|
|
7
7
|
include StoreTest
|
8
8
|
include StoreLockTest
|
9
9
|
def setup
|
10
|
-
@dpath = File.join(File.dirname(__FILE__),
|
11
|
-
'../../temp/fsdir')
|
10
|
+
@dpath = File.expand_path(File.join(File.dirname(__FILE__),
|
11
|
+
'../../temp/fsdir'))
|
12
12
|
@dir = FSDirectory.new(@dpath, true)
|
13
|
-
@dir1 = FSDirectory.new(@dpath, true)
|
14
|
-
@dir2 = FSDirectory.new(@dpath, true)
|
15
13
|
end
|
16
14
|
|
17
15
|
def teardown
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.8.11
|
|
3
3
|
specification_version: 1
|
4
4
|
name: ferret
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.9.
|
7
|
-
date: 2006-
|
6
|
+
version: 0.9.4
|
7
|
+
date: 2006-07-01 00:00:00 +09:00
|
8
8
|
summary: Ruby indexing library.
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -51,6 +51,7 @@ files:
|
|
51
51
|
- ext/document.c
|
52
52
|
- ext/compound_io.c
|
53
53
|
- ext/index_rw.c
|
54
|
+
- ext/termdocs.c
|
54
55
|
- ext/vector.c
|
55
56
|
- ext/field.c
|
56
57
|
- ext/term.c
|
@@ -368,7 +369,6 @@ files:
|
|
368
369
|
- test/utils/number_to_spoken.rb
|
369
370
|
- test/unit/analysis/data/wordfile
|
370
371
|
- rake_utils/code_statistics.rb
|
371
|
-
- ext/termdocs.c
|
372
372
|
test_files: []
|
373
373
|
|
374
374
|
rdoc_options:
|