ferret 0.9.3 → 0.9.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/ext/ferret.c +27 -0
- data/ext/ferret.h +4 -1
- data/ext/field.c +7 -4
- data/ext/hash.c +0 -4
- data/ext/inc/lang.h +1 -1
- data/ext/ind.c +1 -1
- data/ext/lang.h +1 -1
- data/ext/nix_io.c +6 -5
- data/ext/r_analysis.c +1 -0
- data/ext/r_doc.c +1 -1
- data/ext/r_index_io.c +14 -0
- data/ext/r_qparser.c +41 -1
- data/ext/r_search.c +5 -1
- data/ext/r_store.c +1 -11
- data/lib/ferret.rb +1 -1
- data/lib/ferret/index/field_infos.rb +2 -2
- data/lib/ferret/index/index_writer.rb +3 -2
- data/lib/ferret/index/segment_merger.rb +6 -3
- data/lib/ferret/index/term_vectors_io.rb +1 -0
- data/lib/rferret.rb +1 -1
- data/test/unit/index/tc_index_reader.rb +17 -5
- data/test/unit/query_parser/tc_query_parser.rb +35 -1
- data/test/unit/search/tc_search_and_sort.rb +2 -0
- data/test/unit/store/tc_fs_store.rb +2 -4
- metadata +3 -3
data/ext/ferret.c
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
#include "ferret.h"
|
2
2
|
#include "except.h"
|
3
3
|
#include "hash.h"
|
4
|
+
#include "hashset.h"
|
4
5
|
|
5
6
|
/* Object Map */
|
6
7
|
static HshTable *object_map;
|
@@ -8,6 +9,9 @@ static HshTable *object_map;
|
|
8
9
|
/* IDs */
|
9
10
|
ID id_new;
|
10
11
|
ID id_call;
|
12
|
+
ID id_is_directory;
|
13
|
+
|
14
|
+
static ID id_mkdir_p;
|
11
15
|
|
12
16
|
/* Modules */
|
13
17
|
VALUE mFerret;
|
@@ -129,6 +133,26 @@ frt_thread_getspecific(thread_key_t key)
|
|
129
133
|
return h_get(key, (void *)rb_thread_current());
|
130
134
|
}
|
131
135
|
|
136
|
+
void
|
137
|
+
frt_create_dir(VALUE rpath)
|
138
|
+
{
|
139
|
+
VALUE mFileUtils;
|
140
|
+
rb_require("fileutils");
|
141
|
+
mFileUtils = rb_define_module("FileUtils");
|
142
|
+
rb_funcall(mFileUtils, id_mkdir_p, 1, rpath);
|
143
|
+
}
|
144
|
+
|
145
|
+
VALUE
|
146
|
+
frt_hs_to_rb_ary(HashSet *hs)
|
147
|
+
{
|
148
|
+
int i;
|
149
|
+
VALUE ary = rb_ary_new();
|
150
|
+
for (i = 0; i < hs->size; i++) {
|
151
|
+
rb_ary_push(ary, rb_str_new2(hs->elems[i]));
|
152
|
+
}
|
153
|
+
return ary;
|
154
|
+
}
|
155
|
+
|
132
156
|
void
|
133
157
|
Init_ferret_ext(void)
|
134
158
|
{
|
@@ -139,6 +163,9 @@ Init_ferret_ext(void)
|
|
139
163
|
id_new = rb_intern("new");
|
140
164
|
id_call = rb_intern("call");
|
141
165
|
|
166
|
+
id_mkdir_p = rb_intern("mkdir_p");
|
167
|
+
id_is_directory = rb_intern("directory?");
|
168
|
+
|
142
169
|
/* Modules */
|
143
170
|
mFerret = rb_define_module("Ferret");
|
144
171
|
mAnalysis = rb_define_module_under(mFerret, "Analysis");
|
data/ext/ferret.h
CHANGED
@@ -2,11 +2,13 @@
|
|
2
2
|
#define __FERRET_H_
|
3
3
|
|
4
4
|
#include "global.h"
|
5
|
+
#include "hashset.h"
|
5
6
|
#include "document.h"
|
6
7
|
|
7
8
|
/* IDs */
|
8
9
|
extern ID id_new;
|
9
10
|
extern ID id_call;
|
11
|
+
extern ID id_is_directory;
|
10
12
|
|
11
13
|
/* Modules */
|
12
14
|
extern VALUE mFerret;
|
@@ -44,7 +46,8 @@ extern VALUE object_get(void *key);
|
|
44
46
|
extern VALUE frt_data_alloc(VALUE klass);
|
45
47
|
extern VALUE frt_get_doc(Document *doc);
|
46
48
|
extern void frt_deref_free(void *p);
|
47
|
-
|
49
|
+
extern void frt_create_dir(VALUE rpath);
|
50
|
+
extern VALUE frt_hs_to_rb_ary(HashSet *hs);
|
48
51
|
|
49
52
|
#define Frt_Make_Struct(klass)\
|
50
53
|
rb_data_object_alloc(klass,NULL,(RUBY_DATA_FUNC)NULL,(RUBY_DATA_FUNC)NULL)
|
data/ext/field.c
CHANGED
@@ -270,17 +270,20 @@ void fw_add_doc(FieldsWriter *fw, Document *doc)
|
|
270
270
|
os_write_vint(fout, ((FieldInfo *)ht_get(fw->fis->by_name, df->name))->number);
|
271
271
|
|
272
272
|
bits = 0;
|
273
|
-
if (df->is_tokenized)
|
273
|
+
if (df->is_tokenized) {
|
274
274
|
bits |= FIELD_IS_TOKENIZED;
|
275
|
-
|
275
|
+
}
|
276
|
+
if (df->is_binary) {
|
276
277
|
bits |= FIELD_IS_BINARY;
|
277
|
-
|
278
|
+
}
|
279
|
+
if (df->is_compressed) {
|
278
280
|
bits |= FIELD_IS_COMPRESSED;
|
281
|
+
}
|
279
282
|
os_write_byte(fout, bits);
|
280
283
|
|
281
284
|
data = NULL;
|
282
285
|
if (df->is_compressed) {
|
283
|
-
|
286
|
+
/* Not compressing just yet but we'll save it anyway */
|
284
287
|
if (df->is_binary) {
|
285
288
|
save_data(fout, df->data, df->blen);
|
286
289
|
} else {
|
data/ext/hash.c
CHANGED
@@ -204,10 +204,6 @@ HshEntry *h_lookup_str(HshTable *ht, register const void *key_p)
|
|
204
204
|
if (he->key == dummy_key && freeslot == NULL)
|
205
205
|
freeslot = he;
|
206
206
|
}
|
207
|
-
if (he->key == NULL || he->key == key) {
|
208
|
-
he->hash = hash;
|
209
|
-
return he;
|
210
|
-
}
|
211
207
|
}
|
212
208
|
|
213
209
|
typedef int (*eq_func)(const void *key1, const void *key2);
|
data/ext/inc/lang.h
CHANGED
@@ -25,7 +25,7 @@ extern void setprogname(const char *str);
|
|
25
25
|
|
26
26
|
extern VALUE cQueryParseException;
|
27
27
|
|
28
|
-
#define EXCEPTION_CODE
|
28
|
+
#define EXCEPTION_CODE rb_eStandardError
|
29
29
|
//#define IO_ERROR rb_eIOError
|
30
30
|
//#define ARG_ERROR rb_eArgError
|
31
31
|
//#define EOF_ERROR rb_eEOFError
|
data/ext/ind.c
CHANGED
@@ -327,8 +327,8 @@ Document *index_get_doc_term(Index *self, Term *term)
|
|
327
327
|
tde = ir_term_docs_for(self->ir, term);
|
328
328
|
if (tde->next(tde)) {
|
329
329
|
doc = index_get_doc(self, tde->doc_num(tde));
|
330
|
-
tde->close(tde);
|
331
330
|
}
|
331
|
+
tde->close(tde);
|
332
332
|
mutex_unlock(&self->store->ext_mutex);
|
333
333
|
return doc;
|
334
334
|
}
|
data/ext/lang.h
CHANGED
@@ -25,7 +25,7 @@ extern void setprogname(const char *str);
|
|
25
25
|
|
26
26
|
extern VALUE cQueryParseException;
|
27
27
|
|
28
|
-
#define EXCEPTION_CODE
|
28
|
+
#define EXCEPTION_CODE rb_eStandardError
|
29
29
|
//#define IO_ERROR rb_eIOError
|
30
30
|
//#define ARG_ERROR rb_eArgError
|
31
31
|
//#define EOF_ERROR rb_eEOFError
|
data/ext/nix_io.c
CHANGED
@@ -2,7 +2,6 @@
|
|
2
2
|
|
3
3
|
#include "global.h"
|
4
4
|
#include "store.h"
|
5
|
-
#include <sys/dir.h>
|
6
5
|
#include <dirent.h>
|
7
6
|
#include <unistd.h>
|
8
7
|
#include <string.h>
|
@@ -41,7 +40,7 @@ int fcount(char *path)
|
|
41
40
|
if (!d) RAISE(IO_ERROR, strerror(errno));
|
42
41
|
|
43
42
|
while ((de = readdir(d)) != NULL) {
|
44
|
-
if (de->d_name[0] != '.') {
|
43
|
+
if ((de->d_name[0] != '\0') && (de->d_name[0] != '.')) {
|
45
44
|
cnt++;
|
46
45
|
}
|
47
46
|
}
|
@@ -58,7 +57,8 @@ void dir_each(char *path, void (*func)(char *fname, void *arg), void *arg)
|
|
58
57
|
if (!d) RAISE(IO_ERROR, strerror(errno));
|
59
58
|
|
60
59
|
while ((de = readdir(d)) != NULL) {
|
61
|
-
if (de->d_name[0] != '
|
60
|
+
if ((de->d_name[0] != '\0') && (de->d_name[0] != '.')
|
61
|
+
&& !file_is_lock(de->d_name)) {
|
62
62
|
func(de->d_name, arg);
|
63
63
|
}
|
64
64
|
}
|
@@ -101,7 +101,8 @@ void fs_clear(Store *store)
|
|
101
101
|
if (!d) RAISE(IO_ERROR, strerror(errno));
|
102
102
|
|
103
103
|
while ((de = readdir(d)) != NULL) {
|
104
|
-
if (de->d_name[0] != '
|
104
|
+
if ((de->d_name[0] != '\0') && (de->d_name[0] != '.')
|
105
|
+
&& !file_is_lock(de->d_name)) {
|
105
106
|
char buf[MAX_FILE_PATH];
|
106
107
|
remove(join_path(buf, store->dir.path, de->d_name));
|
107
108
|
}
|
@@ -123,7 +124,7 @@ void fs_clear_all(Store *store)
|
|
123
124
|
if (!d) RAISE(IO_ERROR, strerror(errno));
|
124
125
|
|
125
126
|
while ((de = readdir(d)) != NULL) {
|
126
|
-
if (de->d_name[0] != '.') {
|
127
|
+
if ((de->d_name[0] != '\0') && (de->d_name[0] != '.')) {
|
127
128
|
char buf[MAX_FILE_PATH];
|
128
129
|
remove(join_path(buf, store->dir.path, de->d_name));
|
129
130
|
}
|
data/ext/r_analysis.c
CHANGED
@@ -1152,6 +1152,7 @@ Init_analysis(void)
|
|
1152
1152
|
frt_per_field_analyzer_add_field, 2);
|
1153
1153
|
rb_define_method(cPerFieldAnalyzer, "[]=",
|
1154
1154
|
frt_per_field_analyzer_add_field, 2);
|
1155
|
+
rb_define_class_under(mAnalysis, "PerFieldAnalyzerWrapper", cPerFieldAnalyzer);
|
1155
1156
|
|
1156
1157
|
/*** * * RegexAnalyzer * * ***/
|
1157
1158
|
cRegExpAnalyzer =
|
data/ext/r_doc.c
CHANGED
@@ -288,7 +288,7 @@ frt_get_doc(Document *doc)
|
|
288
288
|
if (!doc || (self = object_get(doc)) != Qnil) return self;
|
289
289
|
|
290
290
|
doc->free_data = NULL;
|
291
|
-
self = Data_Wrap_Struct(cDocument,
|
291
|
+
self = Data_Wrap_Struct(cDocument, frt_doc_mark, frt_doc_free, doc);
|
292
292
|
|
293
293
|
/* We add all the document's fields to the ruby object space so that they
|
294
294
|
* can be retrieved in ruby later. This code must come after the above
|
data/ext/r_index_io.c
CHANGED
@@ -442,6 +442,7 @@ frt_iw_init(int argc, VALUE *argv, VALUE self)
|
|
442
442
|
ref(store);
|
443
443
|
} else {
|
444
444
|
StringValue(rdir);
|
445
|
+
frt_create_dir(rdir);
|
445
446
|
store = open_fs_store(RSTRING(rdir)->ptr);
|
446
447
|
}
|
447
448
|
} else {
|
@@ -645,6 +646,7 @@ frt_ir_init(int argc, VALUE *argv, VALUE self)
|
|
645
646
|
store = DATA_PTR(rdir);
|
646
647
|
} else {
|
647
648
|
rdir = rb_obj_as_string(rdir);
|
649
|
+
frt_create_dir(rdir);
|
648
650
|
store = open_fs_store(RSTRING(rdir)->ptr);
|
649
651
|
deref(store);
|
650
652
|
}
|
@@ -871,6 +873,17 @@ frt_ir_terms_from(VALUE self, VALUE rterm)
|
|
871
873
|
return Data_Wrap_Struct(cTermEnum, NULL, &frt_te_free, te);
|
872
874
|
}
|
873
875
|
|
876
|
+
static VALUE
|
877
|
+
frt_ir_get_field_names(VALUE self)
|
878
|
+
{
|
879
|
+
GET_IR;
|
880
|
+
VALUE rfnames;
|
881
|
+
HashSet *fnames = ir->get_field_names(ir, IR_ALL);
|
882
|
+
rfnames = frt_hs_to_rb_ary(fnames);
|
883
|
+
hs_destroy(fnames);
|
884
|
+
return rfnames;
|
885
|
+
}
|
886
|
+
|
874
887
|
/****************************************************************************
|
875
888
|
*
|
876
889
|
* Init Function
|
@@ -1004,4 +1017,5 @@ Init_index_io(void)
|
|
1004
1017
|
rb_define_method(cIndexReader, "doc_freq", frt_ir_doc_freq, 1);
|
1005
1018
|
rb_define_method(cIndexReader, "terms", frt_ir_terms, 0);
|
1006
1019
|
rb_define_method(cIndexReader, "terms_from", frt_ir_terms_from, 1);
|
1020
|
+
rb_define_method(cIndexReader, "get_field_names", frt_ir_get_field_names, 0);
|
1007
1021
|
}
|
data/ext/r_qparser.c
CHANGED
@@ -10,6 +10,7 @@ VALUE rwild_lower_key;
|
|
10
10
|
VALUE roccur_default_key;
|
11
11
|
VALUE rdefault_slop_key;
|
12
12
|
VALUE rclean_str_key;
|
13
|
+
VALUE rfields_key;
|
13
14
|
extern VALUE ranalyzer_key;
|
14
15
|
|
15
16
|
extern VALUE frt_get_analyzer(Analyzer *a);
|
@@ -90,6 +91,12 @@ frt_qp_init(int argc, VALUE *argv, VALUE self)
|
|
90
91
|
if (Qnil != (rval = rb_hash_aref(roptions, ranalyzer_key))) {
|
91
92
|
analyzer = frt_get_cwrapped_analyzer(rval);
|
92
93
|
}
|
94
|
+
if (Qnil != (rval = rb_hash_aref(roptions, rfields_key))) {
|
95
|
+
all_fields = frt_get_fields(rval);
|
96
|
+
}
|
97
|
+
}
|
98
|
+
if (all_fields == NULL) {
|
99
|
+
all_fields = hs_str_create(&free);
|
93
100
|
}
|
94
101
|
|
95
102
|
if (!analyzer) {
|
@@ -125,7 +132,7 @@ frt_qp_init(int argc, VALUE *argv, VALUE self)
|
|
125
132
|
return self;
|
126
133
|
}
|
127
134
|
|
128
|
-
#define GET_QP QParser *qp
|
135
|
+
#define GET_QP QParser *qp = (QParser *)DATA_PTR(self)
|
129
136
|
static VALUE
|
130
137
|
frt_qp_parse(VALUE self, VALUE rstr)
|
131
138
|
{
|
@@ -148,6 +155,36 @@ frt_qp_parse(VALUE self, VALUE rstr)
|
|
148
155
|
return rq;
|
149
156
|
}
|
150
157
|
|
158
|
+
static VALUE
|
159
|
+
frt_qp_get_fields(VALUE self)
|
160
|
+
{
|
161
|
+
GET_QP;
|
162
|
+
int i;
|
163
|
+
HashSet *fields = qp->all_fields;
|
164
|
+
VALUE rfields = rb_ary_new();
|
165
|
+
|
166
|
+
for (i = 0; i < fields->size; i++) {
|
167
|
+
rb_ary_push(rfields, rb_str_new2((char *)fields->elems[i]));
|
168
|
+
}
|
169
|
+
|
170
|
+
return rfields;
|
171
|
+
}
|
172
|
+
|
173
|
+
static VALUE
|
174
|
+
frt_qp_set_fields(VALUE self, VALUE rfields)
|
175
|
+
{
|
176
|
+
GET_QP;
|
177
|
+
HashSet *fields = frt_get_fields(rfields);
|
178
|
+
|
179
|
+
if (fields == NULL) {
|
180
|
+
fields = hs_str_create(&free);
|
181
|
+
}
|
182
|
+
hs_destroy(qp->all_fields);
|
183
|
+
qp->all_fields = fields;
|
184
|
+
|
185
|
+
return self;
|
186
|
+
}
|
187
|
+
|
151
188
|
/****************************************************************************
|
152
189
|
*
|
153
190
|
* Init function
|
@@ -164,6 +201,7 @@ Init_qparser(void)
|
|
164
201
|
roccur_default_key = ID2SYM(rb_intern("occur_default"));
|
165
202
|
rdefault_slop_key = ID2SYM(rb_intern("default_slop"));
|
166
203
|
rclean_str_key = ID2SYM(rb_intern("clean_string"));
|
204
|
+
rfields_key = ID2SYM(rb_intern("fields"));
|
167
205
|
|
168
206
|
/* QueryParser */
|
169
207
|
cQueryParser = rb_define_class_under(mFerret, "QueryParser", rb_cObject);
|
@@ -171,6 +209,8 @@ Init_qparser(void)
|
|
171
209
|
|
172
210
|
rb_define_method(cQueryParser, "initialize", frt_qp_init, -1);
|
173
211
|
rb_define_method(cQueryParser, "parse", frt_qp_parse, 1);
|
212
|
+
rb_define_method(cQueryParser, "fields", frt_qp_get_fields, 0);
|
213
|
+
rb_define_method(cQueryParser, "fields=", frt_qp_set_fields, 1);
|
174
214
|
|
175
215
|
/* QueryParseException */
|
176
216
|
cQueryParseException = rb_define_class_under(cQueryParser,
|
data/ext/r_search.c
CHANGED
@@ -110,7 +110,7 @@ frt_get_sd(Hit *hit)
|
|
110
110
|
return self;
|
111
111
|
}
|
112
112
|
|
113
|
-
#define GET_HIT Hit *hit
|
113
|
+
#define GET_HIT Hit *hit = (Hit *)DATA_PTR(self)
|
114
114
|
static VALUE
|
115
115
|
frt_sd_score(VALUE self)
|
116
116
|
{
|
@@ -1423,6 +1423,7 @@ frt_sea_close(VALUE self)
|
|
1423
1423
|
{
|
1424
1424
|
GET_SEA;
|
1425
1425
|
Frt_Unwrap_Struct(self);
|
1426
|
+
object_del(sea);
|
1426
1427
|
sea->close(sea);
|
1427
1428
|
return Qnil;
|
1428
1429
|
}
|
@@ -1561,6 +1562,7 @@ frt_is_init(VALUE self, VALUE obj)
|
|
1561
1562
|
IndexReader *ir = NULL;
|
1562
1563
|
Searcher *sea;
|
1563
1564
|
if (TYPE(obj) == T_STRING) {
|
1565
|
+
frt_create_dir(obj);
|
1564
1566
|
store = open_fs_store(StringValueCStr(obj));
|
1565
1567
|
ir = ir_open(store);
|
1566
1568
|
deref(store);
|
@@ -1698,6 +1700,7 @@ frt_ind_init(int argc, VALUE *argv, VALUE self)
|
|
1698
1700
|
if (Qnil != (rval = rb_hash_aref(roptions, rpath_key))) {
|
1699
1701
|
rval = rb_obj_as_string(rval);
|
1700
1702
|
/* TODO: create the directory if it is missing */
|
1703
|
+
frt_create_dir(rval);
|
1701
1704
|
store = open_fs_store(RSTRING(rval)->ptr);
|
1702
1705
|
deref(store);
|
1703
1706
|
} else if (Qnil != (rval = rb_hash_aref(roptions, rdir_key))) {
|
@@ -2329,6 +2332,7 @@ frt_ind_persist(int argc, VALUE *argv, VALUE self)
|
|
2329
2332
|
ref(ind->store);
|
2330
2333
|
} else {
|
2331
2334
|
rdir = rb_obj_as_string(rdir);
|
2335
|
+
frt_create_dir(rdir);
|
2332
2336
|
ind->store = open_fs_store(RSTRING(rdir)->ptr);
|
2333
2337
|
}
|
2334
2338
|
|
data/ext/r_store.c
CHANGED
@@ -6,10 +6,6 @@ VALUE cDirectory;
|
|
6
6
|
VALUE cRAMDirectory;
|
7
7
|
VALUE cFSDirectory;
|
8
8
|
|
9
|
-
|
10
|
-
static ID id_mkdir_p;
|
11
|
-
static ID id_is_directory;
|
12
|
-
|
13
9
|
/****************************************************************************
|
14
10
|
*
|
15
11
|
* Lock Methods
|
@@ -217,10 +213,7 @@ frt_fsdir_new(VALUE klass, VALUE rpath, VALUE rcreate)
|
|
217
213
|
bool create = RTEST(rcreate);
|
218
214
|
rpath = rb_obj_as_string(rpath);
|
219
215
|
if (create) {
|
220
|
-
|
221
|
-
rb_require("fileutils");
|
222
|
-
mFileUtils = rb_define_module("FileUtils");
|
223
|
-
rb_funcall(mFileUtils, id_mkdir_p, 1, rpath);
|
216
|
+
frt_create_dir(rpath);
|
224
217
|
}
|
225
218
|
if (!rb_funcall(rb_cFile, id_is_directory, 1, rpath)) {
|
226
219
|
rb_raise(rb_eIOError, "There is no directory: %s. Use create = true to "
|
@@ -246,9 +239,6 @@ frt_fsdir_new(VALUE klass, VALUE rpath, VALUE rcreate)
|
|
246
239
|
void
|
247
240
|
Init_dir(void)
|
248
241
|
{
|
249
|
-
id_mkdir_p = rb_intern("mkdir_p");
|
250
|
-
id_is_directory = rb_intern("directory?");
|
251
|
-
|
252
242
|
cLock = rb_define_class_under(mStore, "Lock", rb_cObject);
|
253
243
|
rb_define_method(cLock, "obtain", frt_lock_obtain, -1);
|
254
244
|
rb_define_method(cLock, "while_locked", frt_lock_while_locked, -1);
|
data/lib/ferret.rb
CHANGED
@@ -97,7 +97,7 @@ module Ferret
|
|
97
97
|
# Returns the number of the field that goes by the field name that is
|
98
98
|
# passed. If there is no field of this name then -1 is returned
|
99
99
|
def field_number(name)
|
100
|
-
fi = @fi_hash[name]
|
100
|
+
fi = @fi_hash[name.to_s]
|
101
101
|
return fi ? fi.number : NOT_A_FIELD
|
102
102
|
end
|
103
103
|
|
@@ -109,7 +109,7 @@ module Ferret
|
|
109
109
|
end
|
110
110
|
return @fi_array[index]
|
111
111
|
else
|
112
|
-
return @fi_hash[index]
|
112
|
+
return @fi_hash[index.to_s]
|
113
113
|
end
|
114
114
|
end
|
115
115
|
|
@@ -356,7 +356,7 @@ module Index
|
|
356
356
|
target_merge_docs = @min_merge_docs
|
357
357
|
while (target_merge_docs <= @max_merge_docs)
|
358
358
|
# find segments smaller than current target size
|
359
|
-
min_segment = @segment_infos.size() -1
|
359
|
+
min_segment = @segment_infos.size() - 1
|
360
360
|
merge_docs = 0
|
361
361
|
while (min_segment >= 0)
|
362
362
|
si = @segment_infos[min_segment]
|
@@ -383,7 +383,8 @@ module Index
|
|
383
383
|
segments_to_delete = []
|
384
384
|
merged_name = new_segment_name()
|
385
385
|
if @info_stream != nil
|
386
|
-
@info_stream.print("merging segments from #{min_segment}
|
386
|
+
@info_stream.print("merging segments from #{min_segment} " +
|
387
|
+
"to #{(max_segment - 1)}\n")
|
387
388
|
end
|
388
389
|
merger = SegmentMerger.new(@directory, merged_name, @term_index_interval)
|
389
390
|
|
@@ -40,7 +40,9 @@ module Ferret::Index
|
|
40
40
|
return @readers[i]
|
41
41
|
end
|
42
42
|
|
43
|
-
# Merges the readers specified by the
|
43
|
+
# Merges the readers specified by the #add method into the directory
|
44
|
+
# passed to the constructor
|
45
|
+
#
|
44
46
|
# returns:: The number of documents that were merged
|
45
47
|
# raises:: IOError
|
46
48
|
def merge()
|
@@ -51,8 +53,9 @@ module Ferret::Index
|
|
51
53
|
return value
|
52
54
|
end
|
53
55
|
|
54
|
-
# close all IndexReaders that have been added.
|
55
|
-
#
|
56
|
+
# close all IndexReaders that have been added. Should not be called
|
57
|
+
# before merge().
|
58
|
+
#
|
56
59
|
# raises:: IOError
|
57
60
|
def close_readers()
|
58
61
|
@readers.each { |reader| reader.close }
|
@@ -341,6 +341,7 @@ module Ferret::Index
|
|
341
341
|
# raises:: IOException if there is an error reading the term vector files
|
342
342
|
def get_field_tv(doc_num, field)
|
343
343
|
# Check if no term vectors are available for this segment at all
|
344
|
+
field = field.to_s
|
344
345
|
field_number = @field_infos.field_number(field)
|
345
346
|
result = nil
|
346
347
|
if (@tvx != nil)
|
data/lib/rferret.rb
CHANGED
@@ -6,6 +6,7 @@ module IndexReaderCommon
|
|
6
6
|
include Ferret::Analysis
|
7
7
|
|
8
8
|
def test_index_reader
|
9
|
+
do_test_get_field_names()
|
9
10
|
|
10
11
|
do_test_term_doc_enum()
|
11
12
|
|
@@ -18,6 +19,17 @@ module IndexReaderCommon
|
|
18
19
|
do_test_term_enum()
|
19
20
|
end
|
20
21
|
|
22
|
+
def do_test_get_field_names()
|
23
|
+
field_names = @ir.get_field_names
|
24
|
+
|
25
|
+
assert(field_names.include?("body"))
|
26
|
+
assert(field_names.include?("changing_field"))
|
27
|
+
assert(field_names.include?("author"))
|
28
|
+
assert(field_names.include?("title"))
|
29
|
+
assert(field_names.include?("text"))
|
30
|
+
assert(field_names.include?("year"))
|
31
|
+
end
|
32
|
+
|
21
33
|
def do_test_term_enum()
|
22
34
|
te = @ir.terms
|
23
35
|
|
@@ -192,7 +204,7 @@ module IndexReaderCommon
|
|
192
204
|
end
|
193
205
|
|
194
206
|
def do_test_term_vectors()
|
195
|
-
tv = @ir.get_term_vector(3,
|
207
|
+
tv = @ir.get_term_vector(3, :body)
|
196
208
|
|
197
209
|
assert_equal("body", tv.field)
|
198
210
|
assert_equal(["word1", "word2", "word3", "word4"], tv.terms)
|
@@ -635,8 +647,8 @@ class IndexReaderTest < Test::Unit::TestCase
|
|
635
647
|
end
|
636
648
|
|
637
649
|
def test_ir_read_while_optimizing_on_disk()
|
638
|
-
dpath = File.join(File.dirname(__FILE__),
|
639
|
-
'../../temp/fsdir')
|
650
|
+
dpath = File.expand_path(File.join(File.dirname(__FILE__),
|
651
|
+
'../../temp/fsdir'))
|
640
652
|
fs_dir = Ferret::Store::FSDirectory.new(dpath, true)
|
641
653
|
|
642
654
|
iw = IndexWriter.new(fs_dir, :analyzer => WhiteSpaceAnalyzer.new(), :create => true)
|
@@ -660,8 +672,8 @@ class IndexReaderTest < Test::Unit::TestCase
|
|
660
672
|
end
|
661
673
|
|
662
674
|
def test_latest()
|
663
|
-
dpath = File.join(File.dirname(__FILE__),
|
664
|
-
'../../temp/fsdir')
|
675
|
+
dpath = File.expand_path(File.join(File.dirname(__FILE__),
|
676
|
+
'../../temp/fsdir'))
|
665
677
|
fs_dir = Ferret::Store::FSDirectory.new(dpath, true)
|
666
678
|
|
667
679
|
iw = IndexWriter.new(fs_dir, :analyzer => WhiteSpaceAnalyzer.new(), :create => true)
|
@@ -6,6 +6,7 @@ class QueryParserTest < Test::Unit::TestCase
|
|
6
6
|
parser = Ferret::QueryParser.new("xxx", :fields => ["xxx", "field", "f1", "f2"])
|
7
7
|
pairs = [
|
8
8
|
['', ''],
|
9
|
+
['*:word', 'word field:word f1:word f2:word'],
|
9
10
|
['word', 'word'],
|
10
11
|
['field:word', 'field:word'],
|
11
12
|
['"word1 word2 word#"', '"word1 word2 word"'],
|
@@ -112,6 +113,39 @@ class QueryParserTest < Test::Unit::TestCase
|
|
112
113
|
pairs.each do |query_str, expected|
|
113
114
|
assert_equal(expected, parser.parse(query_str).to_s("xxx"))
|
114
115
|
end
|
116
|
+
|
117
|
+
end
|
118
|
+
|
119
|
+
if not $ferret_pure_ruby
|
120
|
+
def test_qp_changing_fields()
|
121
|
+
parser = Ferret::QueryParser.new("xxx", :fields => ["xxx", "key"],
|
122
|
+
:analyzer => Ferret::Analysis::WhiteSpaceAnalyzer.new)
|
123
|
+
assert_equal('word key:word', parser.parse("*:word").to_s("xxx"))
|
124
|
+
|
125
|
+
parser.fields = ["xxx", "one", "two", "three"]
|
126
|
+
assert_equal('word one:word two:word three:word',
|
127
|
+
parser.parse("*:word").to_s("xxx"))
|
128
|
+
assert_equal('three:word four:word',
|
129
|
+
parser.parse("three:word four:word").to_s("xxx"))
|
130
|
+
end
|
131
|
+
|
132
|
+
def test_qp_allow_any_field()
|
133
|
+
parser = Ferret::QueryParser.new("xxx", :fields => ["xxx", "key"],
|
134
|
+
:analyzer => Ferret::Analysis::WhiteSpaceAnalyzer.new,
|
135
|
+
:allow_any_fields => false)
|
136
|
+
|
137
|
+
assert_equal('key:word',
|
138
|
+
parser.parse("key:word song:word").to_s("xxx"))
|
139
|
+
assert_equal('word key:word', parser.parse("*:word").to_s("xxx"))
|
140
|
+
|
141
|
+
|
142
|
+
parser = Ferret::QueryParser.new("xxx", :fields => ["xxx", "key"],
|
143
|
+
:analyzer => Ferret::Analysis::WhiteSpaceAnalyzer.new)
|
144
|
+
|
145
|
+
assert_equal('key:word song:word',
|
146
|
+
parser.parse("key:word song:word").to_s("xxx"))
|
147
|
+
assert_equal('word key:word song:word', parser.parse("*:word").to_s("xxx"))
|
148
|
+
end
|
115
149
|
end
|
116
150
|
|
117
151
|
def do_test_query_parse_exception_raised(str)
|
@@ -123,7 +157,7 @@ class QueryParserTest < Test::Unit::TestCase
|
|
123
157
|
|
124
158
|
def test_prefix_query
|
125
159
|
parser = Ferret::QueryParser.new("xxx", :fields => ["xxx"],
|
126
|
-
|
160
|
+
:analyzer => Ferret::Analysis::StandardAnalyzer.new)
|
127
161
|
assert_equal(Ferret::Search::PrefixQuery, parser.parse("asdg*").class)
|
128
162
|
assert_equal(Ferret::Search::WildcardQuery, parser.parse("a?dg*").class)
|
129
163
|
assert_equal(Ferret::Search::WildcardQuery, parser.parse("a*dg*").class)
|
@@ -142,9 +142,11 @@ class SearchAndSortTest < Test::Unit::TestCase
|
|
142
142
|
do_test_top_docs(is, q, [0,9,1,8,2,7,3,6,4,5], Sort.new("string"))
|
143
143
|
do_test_top_docs(is, q, [3,2,7,4,8,5,9,1,6,0], Sort.new(["int"]))
|
144
144
|
do_test_top_docs(is, q, [9,6,4,2,0,1,3,5,7,8], Sort.new("float"))
|
145
|
+
do_test_top_docs(is, q, [9,6,4,2,0,1,3,5,7,8], "float")
|
145
146
|
do_test_top_docs(is, q, [8,7,5,3,1,0,2,4,6,9], Sort.new("float", true))
|
146
147
|
do_test_top_docs(is, q, [0,6,1,5,9,4,8,7,2,3], Sort.new(["int", "string"], true))
|
147
148
|
do_test_top_docs(is, q, [3,2,7,8,4,9,5,1,6,0], Sort.new(["int", "string"]))
|
149
|
+
do_test_top_docs(is, q, [3,2,7,8,4,9,5,1,6,0], ["int", "string"])
|
148
150
|
end
|
149
151
|
|
150
152
|
#LENGTH = SortField::SortType.new("length", lambda{|str| str.length})
|
@@ -7,11 +7,9 @@ class FSStoreTest < Test::Unit::TestCase
|
|
7
7
|
include StoreTest
|
8
8
|
include StoreLockTest
|
9
9
|
def setup
|
10
|
-
@dpath = File.join(File.dirname(__FILE__),
|
11
|
-
'../../temp/fsdir')
|
10
|
+
@dpath = File.expand_path(File.join(File.dirname(__FILE__),
|
11
|
+
'../../temp/fsdir'))
|
12
12
|
@dir = FSDirectory.new(@dpath, true)
|
13
|
-
@dir1 = FSDirectory.new(@dpath, true)
|
14
|
-
@dir2 = FSDirectory.new(@dpath, true)
|
15
13
|
end
|
16
14
|
|
17
15
|
def teardown
|
metadata
CHANGED
@@ -3,8 +3,8 @@ rubygems_version: 0.8.11
|
|
3
3
|
specification_version: 1
|
4
4
|
name: ferret
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 0.9.
|
7
|
-
date: 2006-
|
6
|
+
version: 0.9.4
|
7
|
+
date: 2006-07-01 00:00:00 +09:00
|
8
8
|
summary: Ruby indexing library.
|
9
9
|
require_paths:
|
10
10
|
- lib
|
@@ -51,6 +51,7 @@ files:
|
|
51
51
|
- ext/document.c
|
52
52
|
- ext/compound_io.c
|
53
53
|
- ext/index_rw.c
|
54
|
+
- ext/termdocs.c
|
54
55
|
- ext/vector.c
|
55
56
|
- ext/field.c
|
56
57
|
- ext/term.c
|
@@ -368,7 +369,6 @@ files:
|
|
368
369
|
- test/utils/number_to_spoken.rb
|
369
370
|
- test/unit/analysis/data/wordfile
|
370
371
|
- rake_utils/code_statistics.rb
|
371
|
-
- ext/termdocs.c
|
372
372
|
test_files: []
|
373
373
|
|
374
374
|
rdoc_options:
|