ferret 0.9.1 → 0.9.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README +6 -5
- data/Rakefile +34 -13
- data/TODO +1 -0
- data/TUTORIAL +1 -1
- data/ext/analysis.c +87 -70
- data/ext/analysis.h +18 -6
- data/ext/array.c +1 -2
- data/ext/array.h +1 -1
- data/ext/bitvector.c +10 -6
- data/ext/bitvector.h +2 -2
- data/ext/compound_io.c +30 -27
- data/ext/document.c +15 -15
- data/ext/document.h +5 -5
- data/ext/except.c +2 -0
- data/ext/except.h +25 -23
- data/ext/extconf.rb +1 -0
- data/ext/ferret.c +10 -8
- data/ext/ferret.h +9 -8
- data/ext/field.c +29 -25
- data/ext/filter.c +52 -14
- data/ext/frtio.h +13 -0
- data/ext/fs_store.c +115 -170
- data/ext/global.c +9 -8
- data/ext/global.h +17 -13
- data/ext/hash.c +13 -19
- data/ext/hash.h +11 -11
- data/ext/hashset.c +5 -7
- data/ext/hashset.h +9 -8
- data/ext/helper.c +1 -1
- data/ext/helper.h +2 -1
- data/ext/inc/except.h +25 -23
- data/ext/inc/lang.h +11 -1
- data/ext/ind.c +33 -21
- data/ext/index.h +44 -39
- data/ext/index_io.c +61 -57
- data/ext/index_rw.c +418 -361
- data/ext/lang.c +10 -0
- data/ext/lang.h +11 -1
- data/ext/nix_io.c +135 -0
- data/ext/priorityqueue.c +16 -16
- data/ext/priorityqueue.h +9 -6
- data/ext/q_boolean.c +128 -76
- data/ext/q_const_score.c +20 -20
- data/ext/q_filtered_query.c +20 -20
- data/ext/q_fuzzy.c +37 -23
- data/ext/q_match_all.c +15 -19
- data/ext/q_multi_phrase.c +87 -46
- data/ext/q_parser.c +247 -119
- data/ext/q_phrase.c +86 -52
- data/ext/q_prefix.c +25 -14
- data/ext/q_range.c +59 -14
- data/ext/q_span.c +263 -172
- data/ext/q_term.c +62 -51
- data/ext/q_wildcard.c +24 -13
- data/ext/r_analysis.c +328 -80
- data/ext/r_doc.c +11 -6
- data/ext/r_index_io.c +40 -32
- data/ext/r_qparser.c +15 -14
- data/ext/r_search.c +270 -152
- data/ext/r_store.c +32 -17
- data/ext/ram_store.c +38 -22
- data/ext/search.c +617 -87
- data/ext/search.h +227 -163
- data/ext/similarity.c +54 -45
- data/ext/similarity.h +3 -3
- data/ext/sort.c +132 -53
- data/ext/store.c +21 -2
- data/ext/store.h +14 -14
- data/ext/tags +4322 -232
- data/ext/term.c +140 -109
- data/ext/termdocs.c +74 -60
- data/ext/vector.c +181 -152
- data/ext/w32_io.c +150 -0
- data/lib/ferret.rb +1 -1
- data/lib/ferret/analysis/standard_tokenizer.rb +4 -3
- data/lib/ferret/document/field.rb +1 -1
- data/lib/ferret/index/field_infos.rb +1 -1
- data/lib/ferret/index/term.rb +1 -1
- data/lib/ferret/query_parser/query_parser.tab.rb +8 -24
- data/lib/ferret/search.rb +1 -0
- data/lib/ferret/search/boolean_query.rb +0 -4
- data/lib/ferret/search/index_searcher.rb +21 -8
- data/lib/ferret/search/multi_phrase_query.rb +7 -0
- data/lib/ferret/search/multi_searcher.rb +261 -0
- data/lib/ferret/search/phrase_query.rb +1 -1
- data/lib/ferret/search/query.rb +34 -5
- data/lib/ferret/search/sort.rb +7 -3
- data/lib/ferret/search/sort_field.rb +8 -4
- data/lib/ferret/store/fs_store.rb +13 -6
- data/lib/ferret/store/index_io.rb +0 -14
- data/lib/ferret/store/ram_store.rb +3 -2
- data/lib/rferret.rb +1 -1
- data/test/unit/analysis/ctc_analyzer.rb +131 -0
- data/test/unit/analysis/ctc_tokenstream.rb +98 -9
- data/test/unit/index/tc_index.rb +40 -1
- data/test/unit/index/tc_term.rb +7 -0
- data/test/unit/index/th_doc.rb +8 -0
- data/test/unit/query_parser/tc_query_parser.rb +6 -4
- data/test/unit/search/rtc_sort_field.rb +6 -6
- data/test/unit/search/tc_index_searcher.rb +8 -0
- data/test/unit/search/tc_multi_searcher.rb +275 -0
- data/test/unit/search/tc_multi_searcher2.rb +126 -0
- data/test/unit/search/tc_search_and_sort.rb +66 -0
- metadata +31 -26
- data/test/unit/query_parser/rtc_query_parser.rb +0 -138
data/ext/r_doc.c
CHANGED
@@ -32,14 +32,18 @@ frt_field_alloc(VALUE klass)
|
|
32
32
|
return self;
|
33
33
|
}
|
34
34
|
|
35
|
-
#define GET_DF DocField *df
|
35
|
+
#define GET_DF DocField *df = (DocField *)DATA_PTR(self)
|
36
|
+
|
36
37
|
static VALUE
|
37
38
|
frt_field_init(int argc, VALUE *argv, VALUE self)
|
38
39
|
{
|
39
40
|
GET_DF;
|
40
41
|
VALUE rname, rdata, rstored, rindexed, rstore_tv, rbinary, rboost;
|
42
|
+
char *name;
|
43
|
+
char *data;
|
41
44
|
float boost = 1.0;
|
42
45
|
int stored = 0, indexed = 0, store_tv = 0;
|
46
|
+
int len;
|
43
47
|
bool binary = false;
|
44
48
|
switch (rb_scan_args(argc, argv, "25", &rname, &rdata, &rstored,
|
45
49
|
&rindexed, &rstore_tv, &rbinary, &rboost)) {
|
@@ -53,9 +57,9 @@ frt_field_init(int argc, VALUE *argv, VALUE self)
|
|
53
57
|
rdata = rb_obj_as_string(rdata);
|
54
58
|
break;
|
55
59
|
}
|
56
|
-
|
57
|
-
|
58
|
-
|
60
|
+
name = RSTRING(rname)->ptr;
|
61
|
+
len = RSTRING(rdata)->len;
|
62
|
+
data = ALLOC_N(char, len + 1);
|
59
63
|
MEMCPY(data, RSTRING(rdata)->ptr, char, len);
|
60
64
|
data[len] = 0;
|
61
65
|
df_set(df, name, data, stored, indexed, store_tv);
|
@@ -268,8 +272,8 @@ static VALUE
|
|
268
272
|
frt_doc_alloc(VALUE klass)
|
269
273
|
{
|
270
274
|
Document *doc = doc_create();
|
271
|
-
doc->free_data = NULL;
|
272
275
|
VALUE self = Data_Wrap_Struct(klass, &frt_doc_mark, &frt_doc_free, doc);
|
276
|
+
doc->free_data = NULL;
|
273
277
|
object_add(doc, self);
|
274
278
|
return self;
|
275
279
|
}
|
@@ -300,7 +304,8 @@ frt_get_doc(Document *doc)
|
|
300
304
|
return self;
|
301
305
|
}
|
302
306
|
|
303
|
-
#define GET_DOC Document *doc
|
307
|
+
#define GET_DOC Document *doc = (Document *)DATA_PTR(self)
|
308
|
+
|
304
309
|
static VALUE
|
305
310
|
frt_doc_init(VALUE self)
|
306
311
|
{
|
data/ext/r_index_io.c
CHANGED
@@ -21,6 +21,7 @@ VALUE rterm_index_interval_key;
|
|
21
21
|
|
22
22
|
extern void frt_set_term(VALUE rterm, Term *t);
|
23
23
|
extern VALUE frt_get_rterm(char *field, char *text);
|
24
|
+
extern Analyzer *frt_get_cwrapped_analyzer(VALUE ranalyzer);
|
24
25
|
|
25
26
|
/****************************************************************************
|
26
27
|
*
|
@@ -35,7 +36,7 @@ frt_te_free(void *p)
|
|
35
36
|
te->close(te);
|
36
37
|
}
|
37
38
|
|
38
|
-
#define GET_TE TermEnum *te
|
39
|
+
#define GET_TE TermEnum *te = (TermEnum *)DATA_PTR(self)
|
39
40
|
static VALUE
|
40
41
|
frt_te_next(VALUE self)
|
41
42
|
{
|
@@ -100,8 +101,8 @@ frt_tvoi_init(VALUE self, VALUE rstart, VALUE rend)
|
|
100
101
|
return self;
|
101
102
|
}
|
102
103
|
|
103
|
-
#define GET_TVOI TVOffsetInfo *tvoi
|
104
|
-
|
104
|
+
#define GET_TVOI TVOffsetInfo *tvoi = (TVOffsetInfo *)DATA_PTR(self)
|
105
|
+
|
105
106
|
static VALUE
|
106
107
|
frt_tvoi_set_start(VALUE self, VALUE rstart)
|
107
108
|
{
|
@@ -135,9 +136,9 @@ frt_tvoi_get_end(VALUE self)
|
|
135
136
|
static VALUE
|
136
137
|
frt_tvoi_eql(VALUE self, VALUE rother)
|
137
138
|
{
|
138
|
-
if (TYPE(rother) != T_DATA) return Qfalse;
|
139
|
-
TVOffsetInfo *other;
|
140
139
|
GET_TVOI;
|
140
|
+
TVOffsetInfo *other;
|
141
|
+
if (TYPE(rother) != T_DATA) return Qfalse;
|
141
142
|
Data_Get_Struct(rother, TVOffsetInfo, other);
|
142
143
|
|
143
144
|
return ((tvoi->start == other->start) && (tvoi->end == other->end))
|
@@ -234,7 +235,8 @@ frt_get_tv(TermVector *tv)
|
|
234
235
|
return self;
|
235
236
|
}
|
236
237
|
|
237
|
-
#define GET_TV TermVector *tv
|
238
|
+
#define GET_TV TermVector *tv = (TermVector *)DATA_PTR(self)
|
239
|
+
|
238
240
|
static VALUE
|
239
241
|
frt_tv_get_field(VALUE self)
|
240
242
|
{
|
@@ -271,9 +273,10 @@ frt_tv_get_positions(VALUE self)
|
|
271
273
|
{
|
272
274
|
int i, j, freq;
|
273
275
|
GET_TV;
|
276
|
+
VALUE rpositions, rpositionss;
|
277
|
+
|
274
278
|
if (!tv->positions) return Qnil;
|
275
|
-
|
276
|
-
VALUE rpositionss = rb_ary_new2(tv->tcnt);
|
279
|
+
rpositionss = rb_ary_new2(tv->tcnt);
|
277
280
|
for (i = 0; i < tv->tcnt; i++) {
|
278
281
|
freq = tv->freqs[i];
|
279
282
|
rpositions = rb_ary_new2(freq);
|
@@ -290,9 +293,10 @@ frt_tv_get_offsets(VALUE self)
|
|
290
293
|
{
|
291
294
|
int i, j, freq;
|
292
295
|
GET_TV;
|
296
|
+
VALUE roffsetss, roffsets, roffset;
|
293
297
|
if (!tv->offsets) return Qnil;
|
294
|
-
|
295
|
-
|
298
|
+
roffsetss = rb_ary_new2(tv->tcnt);
|
299
|
+
|
296
300
|
for (i = 0; i < tv->tcnt; i++) {
|
297
301
|
freq = tv->freqs[i];
|
298
302
|
roffsets = rb_ary_new2(freq);
|
@@ -324,7 +328,8 @@ frt_get_tde(TermDocEnum *tde)
|
|
324
328
|
return Data_Wrap_Struct(cTermDocEnum, NULL, &frt_tde_free, tde);
|
325
329
|
}
|
326
330
|
|
327
|
-
#define GET_TDE TermDocEnum *tde
|
331
|
+
#define GET_TDE TermDocEnum *tde = (TermDocEnum *)DATA_PTR(self)
|
332
|
+
|
328
333
|
static VALUE
|
329
334
|
frt_tde_close(VALUE self)
|
330
335
|
{
|
@@ -375,12 +380,12 @@ frt_tde_next_position(VALUE self)
|
|
375
380
|
static VALUE
|
376
381
|
frt_tde_read(VALUE self, VALUE rdocs, VALUE rfreqs)
|
377
382
|
{
|
378
|
-
int i;
|
383
|
+
int i, req_num, cnt;
|
379
384
|
GET_TDE;
|
380
385
|
Check_Type(rdocs, T_ARRAY);
|
381
386
|
Check_Type(rfreqs, T_ARRAY);
|
382
|
-
|
383
|
-
|
387
|
+
req_num = MIN(RARRAY(rdocs)->len, RARRAY(rfreqs)->len);
|
388
|
+
cnt = tde->read(tde, (int *)RARRAY(rdocs)->ptr,
|
384
389
|
(int *)RARRAY(rfreqs)->ptr, req_num);
|
385
390
|
for (i = 0; i < cnt; i++) {
|
386
391
|
RARRAY(rdocs)->ptr[i] = INT2FIX(RARRAY(rdocs)->ptr[i]);
|
@@ -425,8 +430,6 @@ static VALUE
|
|
425
430
|
frt_iw_init(int argc, VALUE *argv, VALUE self)
|
426
431
|
{
|
427
432
|
VALUE rdir, roptions, rval;
|
428
|
-
bool close_dir = false;
|
429
|
-
bool close_analyzer = true;
|
430
433
|
bool create = false;
|
431
434
|
bool use_compound_file = true;
|
432
435
|
Store *store;
|
@@ -436,20 +439,21 @@ frt_iw_init(int argc, VALUE *argv, VALUE self)
|
|
436
439
|
if (argc > 0) {
|
437
440
|
if (TYPE(rdir) == T_DATA) {
|
438
441
|
store = DATA_PTR(rdir);
|
442
|
+
ref(store);
|
439
443
|
} else {
|
440
|
-
|
444
|
+
StringValue(rdir);
|
441
445
|
store = open_fs_store(RSTRING(rdir)->ptr);
|
442
|
-
close_dir = true;
|
443
446
|
}
|
444
447
|
} else {
|
445
448
|
store = open_ram_store();
|
446
|
-
close_dir = true;
|
447
449
|
}
|
448
450
|
if (argc == 2) {
|
449
451
|
Check_Type(roptions, T_HASH);
|
452
|
+
/* Let ruby's GC handle the closing of the store
|
450
453
|
if (!close_dir) {
|
451
454
|
close_dir = RTEST(rb_hash_aref(roptions, rclose_dir_key));
|
452
455
|
}
|
456
|
+
*/
|
453
457
|
/* use_compound_file defaults to true */
|
454
458
|
use_compound_file =
|
455
459
|
(rb_hash_aref(roptions, ruse_compound_file_key) == Qfalse) ? false : true;
|
@@ -458,8 +462,7 @@ frt_iw_init(int argc, VALUE *argv, VALUE self)
|
|
458
462
|
if (rval == Qnil) {
|
459
463
|
analyzer = mb_standard_analyzer_create(true);
|
460
464
|
} else {
|
461
|
-
|
462
|
-
close_analyzer = false;
|
465
|
+
analyzer = frt_get_cwrapped_analyzer(rval);
|
463
466
|
}
|
464
467
|
create = RTEST(rb_hash_aref(roptions, rcreate_key));
|
465
468
|
if (!create && RTEST(rb_hash_aref(roptions, rcreate_if_missing_key))) {
|
@@ -468,7 +471,8 @@ frt_iw_init(int argc, VALUE *argv, VALUE self)
|
|
468
471
|
}
|
469
472
|
}
|
470
473
|
}
|
471
|
-
iw = iw_open(store, analyzer, create
|
474
|
+
iw = iw_open(store, analyzer, create);
|
475
|
+
store_deref(store);
|
472
476
|
iw->use_compound_file = use_compound_file;
|
473
477
|
|
474
478
|
SET_INT_ATTR(merge_factor);
|
@@ -481,7 +485,8 @@ frt_iw_init(int argc, VALUE *argv, VALUE self)
|
|
481
485
|
return self;
|
482
486
|
}
|
483
487
|
|
484
|
-
#define GET_IW IndexWriter *iw
|
488
|
+
#define GET_IW IndexWriter *iw = (IndexWriter *)DATA_PTR(self)
|
489
|
+
|
485
490
|
static VALUE
|
486
491
|
frt_iw_close(VALUE self)
|
487
492
|
{
|
@@ -630,21 +635,21 @@ static VALUE
|
|
630
635
|
frt_ir_init(int argc, VALUE *argv, VALUE self)
|
631
636
|
{
|
632
637
|
VALUE rdir, rclose_dir;
|
633
|
-
bool close_dir =
|
638
|
+
//bool close_dir = false;
|
634
639
|
Store *store = NULL;
|
635
640
|
IndexReader *ir;
|
636
641
|
switch (rb_scan_args(argc, argv, "11", &rdir, &rclose_dir)) {
|
637
|
-
case 2: close_dir = RTEST(rclose_dir);
|
642
|
+
case 2: //close_dir = RTEST(rclose_dir);
|
638
643
|
case 1:
|
639
644
|
if (TYPE(rdir) == T_DATA) {
|
640
645
|
store = DATA_PTR(rdir);
|
641
646
|
} else {
|
642
647
|
rdir = rb_obj_as_string(rdir);
|
643
648
|
store = open_fs_store(RSTRING(rdir)->ptr);
|
644
|
-
|
649
|
+
deref(store);
|
645
650
|
}
|
646
651
|
}
|
647
|
-
ir = ir_open(store
|
652
|
+
ir = ir_open(store);
|
648
653
|
Frt_Wrap_Struct(self, &frt_ir_mark, &frt_ir_free, ir);
|
649
654
|
object_add(ir, self);
|
650
655
|
return self;
|
@@ -657,7 +662,8 @@ frt_ir_open(int argc, VALUE *argv, VALUE klass)
|
|
657
662
|
return frt_ir_init(argc, argv, self);
|
658
663
|
}
|
659
664
|
|
660
|
-
#define GET_IR IndexReader *ir
|
665
|
+
#define GET_IR IndexReader *ir = (IndexReader *)DATA_PTR(self)
|
666
|
+
|
661
667
|
static VALUE
|
662
668
|
frt_ir_set_norm(VALUE self, VALUE rdoc_num, VALUE rfield, VALUE rval)
|
663
669
|
{
|
@@ -671,8 +677,9 @@ static VALUE
|
|
671
677
|
frt_ir_get_norms(VALUE self, VALUE rfield)
|
672
678
|
{
|
673
679
|
GET_IR;
|
680
|
+
uchar *norms;
|
674
681
|
rfield = rb_obj_as_string(rfield);
|
675
|
-
|
682
|
+
norms = ir->get_norms(ir, RSTRING(rfield)->ptr);
|
676
683
|
if (norms) {
|
677
684
|
return rb_str_new((char *)norms, ir->max_doc(ir));
|
678
685
|
} else {
|
@@ -684,8 +691,9 @@ static VALUE
|
|
684
691
|
frt_ir_get_norms_into(VALUE self, VALUE rfield, VALUE rnorms, VALUE roffset)
|
685
692
|
{
|
686
693
|
GET_IR;
|
694
|
+
int offset;
|
687
695
|
rfield = rb_obj_as_string(rfield);
|
688
|
-
|
696
|
+
offset = FIX2INT(roffset);
|
689
697
|
Check_Type(rnorms, T_STRING);
|
690
698
|
if (RSTRING(rnorms)->len < offset + ir->max_doc(ir)) {
|
691
699
|
rb_raise(rb_eArgError, "supplied a string of length:%d to IndexReader#get_norms_into but needed a string of length offset:%d + maxdoc:%d", RSTRING(rnorms)->len, offset, ir->max_doc(ir));
|
@@ -778,9 +786,9 @@ static VALUE
|
|
778
786
|
frt_ir_get_term_vector(VALUE self, VALUE rdoc_num, VALUE rfield)
|
779
787
|
{
|
780
788
|
GET_IR;
|
789
|
+
TermVector *tv;
|
781
790
|
rfield = rb_obj_as_string(rfield);
|
782
|
-
|
783
|
-
ir->get_term_vector(ir, FIX2INT(rdoc_num), RSTRING(rfield)->ptr);
|
791
|
+
tv = ir->get_term_vector(ir, FIX2INT(rdoc_num), RSTRING(rfield)->ptr);
|
784
792
|
return frt_get_tv(tv);
|
785
793
|
}
|
786
794
|
|
data/ext/r_qparser.c
CHANGED
@@ -10,10 +10,11 @@ VALUE rwild_lower_key;
|
|
10
10
|
VALUE roccur_default_key;
|
11
11
|
VALUE rdefault_slop_key;
|
12
12
|
VALUE rclean_str_key;
|
13
|
-
VALUE ranalyzer_key;
|
13
|
+
extern VALUE ranalyzer_key;
|
14
14
|
|
15
15
|
extern VALUE frt_get_analyzer(Analyzer *a);
|
16
16
|
extern VALUE frt_get_q(Query *q);
|
17
|
+
extern Analyzer *frt_get_cwrapped_analyzer(VALUE ranalyzer);
|
17
18
|
|
18
19
|
/****************************************************************************
|
19
20
|
*
|
@@ -56,7 +57,7 @@ frt_get_fields(VALUE rfields)
|
|
56
57
|
fields = NULL;
|
57
58
|
} else {
|
58
59
|
s = str = estrdup(RSTRING(rval)->ptr);
|
59
|
-
while ((p =
|
60
|
+
while ((p = strchr(s, '|')) != '\0') {
|
60
61
|
*p = '\0';
|
61
62
|
hs_add(fields, estrdup(s));
|
62
63
|
s = p + 1;
|
@@ -84,7 +85,18 @@ frt_qp_init(int argc, VALUE *argv, VALUE self)
|
|
84
85
|
if (argc > 0) {
|
85
86
|
def_fields = frt_get_fields(rdef_field);
|
86
87
|
}
|
87
|
-
|
88
|
+
|
89
|
+
if (argc == 2) {
|
90
|
+
if (Qnil != (rval = rb_hash_aref(roptions, ranalyzer_key))) {
|
91
|
+
analyzer = frt_get_cwrapped_analyzer(rval);
|
92
|
+
}
|
93
|
+
}
|
94
|
+
|
95
|
+
if (!analyzer) {
|
96
|
+
analyzer = mb_standard_analyzer_create(true);
|
97
|
+
}
|
98
|
+
|
99
|
+
qp = qp_create(all_fields, def_fields, analyzer);
|
88
100
|
qp->allow_any_fields = true;
|
89
101
|
qp->clean_str = true;
|
90
102
|
/* handle options */
|
@@ -107,17 +119,7 @@ frt_qp_init(int argc, VALUE *argv, VALUE self)
|
|
107
119
|
if (Qnil != (rval = rb_hash_aref(roptions, rclean_str_key))) {
|
108
120
|
qp->clean_str = RTEST(rval);
|
109
121
|
}
|
110
|
-
if (Qnil != (rval = rb_hash_aref(roptions, ranalyzer_key))) {
|
111
|
-
Data_Get_Struct(rval, Analyzer, analyzer);
|
112
|
-
}
|
113
|
-
}
|
114
|
-
if (!analyzer) {
|
115
|
-
analyzer = letter_analyzer_create(true);
|
116
|
-
/* make sure the analyzer will be disposed of when the QueryParser
|
117
|
-
* is garbage collected. */
|
118
|
-
rval = frt_get_analyzer(analyzer);
|
119
122
|
}
|
120
|
-
qp->analyzer = analyzer;
|
121
123
|
Frt_Wrap_Struct(self, frt_qp_mark, frt_qp_free, qp);
|
122
124
|
object_add(qp, self);
|
123
125
|
return self;
|
@@ -162,7 +164,6 @@ Init_qparser(void)
|
|
162
164
|
roccur_default_key = ID2SYM(rb_intern("occur_default"));
|
163
165
|
rdefault_slop_key = ID2SYM(rb_intern("default_slop"));
|
164
166
|
rclean_str_key = ID2SYM(rb_intern("clean_string"));
|
165
|
-
ranalyzer_key = ID2SYM(rb_intern("analyzer"));
|
166
167
|
|
167
168
|
/* QueryParser */
|
168
169
|
cQueryParser = rb_define_class_under(mFerret, "QueryParser", rb_cObject);
|
data/ext/r_search.c
CHANGED
@@ -14,7 +14,9 @@ extern void frt_ir_mark(void *p);
|
|
14
14
|
static VALUE cScoreDoc;
|
15
15
|
static VALUE cTopDocs;
|
16
16
|
static VALUE cExplanation;
|
17
|
+
static VALUE cSearcher;
|
17
18
|
static VALUE cIndexSearcher;
|
19
|
+
static VALUE cMultiSearcher;
|
18
20
|
static VALUE cSortField;
|
19
21
|
static VALUE cSortType;
|
20
22
|
static VALUE cSort;
|
@@ -85,6 +87,7 @@ extern void frt_set_term(VALUE rterm, Term *t);
|
|
85
87
|
extern Term *frt_get_term(VALUE rterm);
|
86
88
|
extern VALUE frt_get_analyzer(Analyzer *a);
|
87
89
|
extern HashSet *frt_get_fields(VALUE rfields);
|
90
|
+
extern Analyzer *frt_get_cwrapped_analyzer(VALUE ranalyzer);
|
88
91
|
|
89
92
|
/****************************************************************************
|
90
93
|
*
|
@@ -147,7 +150,8 @@ frt_td_mark(void *p)
|
|
147
150
|
frt_gc_mark(td->hits);
|
148
151
|
}
|
149
152
|
|
150
|
-
#define GET_TD TopDocs *td
|
153
|
+
#define GET_TD TopDocs *td = (TopDocs *)DATA_PTR(self)
|
154
|
+
|
151
155
|
static VALUE
|
152
156
|
frt_get_td(TopDocs *td)
|
153
157
|
{
|
@@ -192,7 +196,7 @@ frt_td_total_hits(VALUE self)
|
|
192
196
|
static VALUE
|
193
197
|
frt_td_fields(VALUE self)
|
194
198
|
{
|
195
|
-
|
199
|
+
rb_raise(rb_eNotImpError, "not implemented in the c extension version");
|
196
200
|
return Qnil;
|
197
201
|
}
|
198
202
|
|
@@ -215,7 +219,8 @@ frt_td_each(VALUE self)
|
|
215
219
|
*
|
216
220
|
****************************************************************************/
|
217
221
|
|
218
|
-
#define GET_EXPL Explanation *expl
|
222
|
+
#define GET_EXPL Explanation *expl = (Explanation *)DATA_PTR(self)
|
223
|
+
|
219
224
|
static VALUE
|
220
225
|
frt_expl_to_s(VALUE self)
|
221
226
|
{
|
@@ -252,12 +257,12 @@ frt_expl_value(VALUE self)
|
|
252
257
|
static void
|
253
258
|
frt_q_free(void *p)
|
254
259
|
{
|
255
|
-
Query *q = (Query *)p;
|
256
260
|
object_del(p);
|
257
|
-
|
261
|
+
q_deref((Query *)p);
|
258
262
|
}
|
259
263
|
|
260
|
-
#define GET_Q Query *q
|
264
|
+
#define GET_Q Query *q = (Query *)DATA_PTR(self)
|
265
|
+
|
261
266
|
|
262
267
|
static VALUE
|
263
268
|
frt_q_to_s(int argc, VALUE *argv, VALUE self)
|
@@ -387,15 +392,15 @@ frt_bc_mark(void *p)
|
|
387
392
|
static void
|
388
393
|
frt_bc_free(void *p)
|
389
394
|
{
|
390
|
-
|
391
|
-
|
392
|
-
free(bc);
|
395
|
+
object_del(p);
|
396
|
+
bc_deref((BooleanClause *)p);
|
393
397
|
}
|
394
398
|
|
395
399
|
static VALUE
|
396
400
|
frt_get_bc(BooleanClause *bc)
|
397
401
|
{
|
398
402
|
VALUE self = Data_Wrap_Struct(cBooleanClause, &frt_bc_mark, &frt_bc_free, bc);
|
403
|
+
ref(bc);
|
399
404
|
object_add(bc, self);
|
400
405
|
return self;
|
401
406
|
}
|
@@ -411,13 +416,14 @@ frt_bc_init(int argc, VALUE *argv, VALUE self)
|
|
411
416
|
occur = FIX2INT(roccur);
|
412
417
|
}
|
413
418
|
Data_Get_Struct(rquery, Query, sub_q);
|
419
|
+
ref(sub_q);
|
414
420
|
bc = bc_create(sub_q, occur);
|
415
421
|
Frt_Wrap_Struct(self, &frt_bc_mark, &frt_bc_free, bc);
|
416
422
|
object_add(bc, self);
|
417
423
|
return self;
|
418
424
|
}
|
419
425
|
|
420
|
-
#define GET_BC BooleanClause *bc
|
426
|
+
#define GET_BC BooleanClause *bc = (BooleanClause *)DATA_PTR(self)
|
421
427
|
static VALUE
|
422
428
|
frt_bc_get_query(VALUE self)
|
423
429
|
{
|
@@ -505,10 +511,11 @@ frt_bq_init(int argc, VALUE *argv, VALUE self)
|
|
505
511
|
{
|
506
512
|
VALUE rcoord_disabled;
|
507
513
|
bool coord_disabled = false;
|
514
|
+
Query *q;
|
508
515
|
if (rb_scan_args(argc, argv, "01", &rcoord_disabled)) {
|
509
516
|
coord_disabled = RTEST(rcoord_disabled);
|
510
517
|
}
|
511
|
-
|
518
|
+
q = bq_create(coord_disabled);
|
512
519
|
Frt_Wrap_Struct(self, &frt_bq_mark, &frt_q_free, q);
|
513
520
|
object_add(q, self);
|
514
521
|
|
@@ -575,10 +582,12 @@ frt_rq_new_more(VALUE klass, VALUE rfield, VALUE rlterm, VALUE rincl)
|
|
575
582
|
{
|
576
583
|
Query *q;
|
577
584
|
VALUE self;
|
578
|
-
rfield = rb_obj_as_string(rfield);
|
579
585
|
char *lterm = NIL_P(rlterm) ? NULL : RSTRING(rb_obj_as_string(rlterm))->ptr;
|
580
|
-
|
581
|
-
|
586
|
+
rfield = rb_obj_as_string(rfield);
|
587
|
+
if (!lterm) {
|
588
|
+
rb_raise(rb_eArgError, "The lower term must not be nil in a more "
|
589
|
+
"than query");
|
590
|
+
}
|
582
591
|
q = rq_create_more(RSTRING(rfield)->ptr, lterm, RTEST(rincl));
|
583
592
|
self = Data_Wrap_Struct(klass, NULL, &frt_q_free, q);
|
584
593
|
object_add(q, self);
|
@@ -590,10 +599,12 @@ frt_rq_new_less(VALUE klass, VALUE rfield, VALUE ruterm, VALUE rincu)
|
|
590
599
|
{
|
591
600
|
Query *q;
|
592
601
|
VALUE self;
|
593
|
-
rfield = rb_obj_as_string(rfield);
|
594
602
|
char *uterm = NIL_P(ruterm) ? NULL : RSTRING(rb_obj_as_string(ruterm))->ptr;
|
595
|
-
|
596
|
-
|
603
|
+
rfield = rb_obj_as_string(rfield);
|
604
|
+
if (!uterm) {
|
605
|
+
rb_raise(rb_eArgError, "The upper term must not be nil in a less "
|
606
|
+
"than query");
|
607
|
+
}
|
597
608
|
q = rq_create_less(RSTRING(rfield)->ptr, uterm, RTEST(rincu));
|
598
609
|
self = Data_Wrap_Struct(klass, NULL, &frt_q_free, q);
|
599
610
|
object_add(q, self);
|
@@ -734,7 +745,6 @@ frt_mphq_set_slop(VALUE self, VALUE rslop)
|
|
734
745
|
return self;
|
735
746
|
}
|
736
747
|
|
737
|
-
|
738
748
|
/****************************************************************************
|
739
749
|
*
|
740
750
|
* PrefixQuery Methods
|
@@ -1054,9 +1064,10 @@ frt_spanxq_mark(void *p)
|
|
1054
1064
|
static VALUE
|
1055
1065
|
frt_spanxq_init(VALUE self, VALUE rinc, VALUE rexc)
|
1056
1066
|
{
|
1067
|
+
Query *q;
|
1057
1068
|
Check_Type(rinc, T_DATA);
|
1058
1069
|
Check_Type(rexc, T_DATA);
|
1059
|
-
|
1070
|
+
q = spanxq_create(DATA_PTR(rinc), DATA_PTR(rexc));
|
1060
1071
|
q->destroy_all = false;
|
1061
1072
|
Frt_Wrap_Struct(self, &frt_spanxq_mark, &frt_q_free, q);
|
1062
1073
|
object_add(q, self);
|
@@ -1077,7 +1088,7 @@ frt_f_free(void *p)
|
|
1077
1088
|
f->destroy(f);
|
1078
1089
|
}
|
1079
1090
|
|
1080
|
-
#define GET_F Filter *f
|
1091
|
+
#define GET_F Filter *f = (Filter *)DATA_PTR(self)
|
1081
1092
|
|
1082
1093
|
static VALUE
|
1083
1094
|
frt_f_to_s(VALUE self)
|
@@ -1126,11 +1137,14 @@ frt_rf_new_more(int argc, VALUE *argv, VALUE klass)
|
|
1126
1137
|
Filter *f;
|
1127
1138
|
VALUE self;
|
1128
1139
|
VALUE rfield, rlterm, rincl;
|
1140
|
+
char *lterm;
|
1129
1141
|
rb_scan_args(argc, argv, "21", &rfield, &rlterm, &rincl);
|
1130
1142
|
rfield = rb_obj_as_string(rfield);
|
1131
|
-
|
1132
|
-
if (!lterm)
|
1133
|
-
rb_raise(rb_eArgError, "The lower term must not be nil in a more
|
1143
|
+
lterm = NIL_P(rlterm) ? NULL : RSTRING(rb_obj_as_string(rlterm))->ptr;
|
1144
|
+
if (!lterm) {
|
1145
|
+
rb_raise(rb_eArgError, "The lower term must not be nil in a more "
|
1146
|
+
"than filter");
|
1147
|
+
}
|
1134
1148
|
f = rfilt_create(RSTRING(rfield)->ptr, lterm, NULL, rincl != Qfalse, false);
|
1135
1149
|
self = Data_Wrap_Struct(klass, NULL, &frt_f_free, f);
|
1136
1150
|
object_add(f, self);
|
@@ -1143,11 +1157,14 @@ frt_rf_new_less(int argc, VALUE *argv, VALUE klass)
|
|
1143
1157
|
Filter *f;
|
1144
1158
|
VALUE self;
|
1145
1159
|
VALUE rfield, ruterm, rincu;
|
1160
|
+
char *uterm;
|
1146
1161
|
rb_scan_args(argc, argv, "21", &rfield, &ruterm, &rincu);
|
1147
1162
|
rfield = rb_obj_as_string(rfield);
|
1148
|
-
|
1149
|
-
if (!uterm)
|
1150
|
-
rb_raise(rb_eArgError, "The upper term must not be nil in a less
|
1163
|
+
uterm = NIL_P(ruterm) ? NULL : RSTRING(rb_obj_as_string(ruterm))->ptr;
|
1164
|
+
if (!uterm) {
|
1165
|
+
rb_raise(rb_eArgError, "The upper term must not be nil in a less "
|
1166
|
+
"than filter");
|
1167
|
+
}
|
1151
1168
|
f = rfilt_create(RSTRING(rfield)->ptr, NULL, uterm, false, rincu != Qfalse);
|
1152
1169
|
self = Data_Wrap_Struct(klass, NULL, &frt_f_free, f);
|
1153
1170
|
object_add(f, self);
|
@@ -1181,9 +1198,8 @@ frt_qf_init(VALUE self, VALUE rquery)
|
|
1181
1198
|
static void
|
1182
1199
|
frt_sf_free(void *p)
|
1183
1200
|
{
|
1184
|
-
|
1185
|
-
|
1186
|
-
sort_field_destroy(sf);
|
1201
|
+
object_del(p);
|
1202
|
+
sort_field_destroy((SortField *)p);
|
1187
1203
|
}
|
1188
1204
|
|
1189
1205
|
static VALUE
|
@@ -1220,13 +1236,16 @@ frt_sf_init(int argc, VALUE *argv, VALUE self)
|
|
1220
1236
|
rfield = rb_obj_as_string(rfield);
|
1221
1237
|
|
1222
1238
|
sf = sort_field_create(RSTRING(rfield)->ptr, sort_type, is_reverse);
|
1239
|
+
if (sf->field == NULL && RSTRING(rfield)->ptr != NULL) {
|
1240
|
+
sf->field = estrdup(RSTRING(rfield)->ptr);
|
1241
|
+
}
|
1223
1242
|
|
1224
1243
|
Frt_Wrap_Struct(self, NULL, &frt_sf_free, sf);
|
1225
1244
|
object_add(sf, self);
|
1226
1245
|
return self;
|
1227
1246
|
}
|
1228
1247
|
|
1229
|
-
#define GET_SF SortField *sf
|
1248
|
+
#define GET_SF SortField *sf = (SortField *)DATA_PTR(self)
|
1230
1249
|
static VALUE
|
1231
1250
|
frt_sf_is_reverse(VALUE self)
|
1232
1251
|
{
|
@@ -1254,9 +1273,19 @@ frt_sf_get_comparator(VALUE self)
|
|
1254
1273
|
return Qnil;
|
1255
1274
|
}
|
1256
1275
|
|
1276
|
+
static VALUE
|
1277
|
+
frt_sf_to_s(VALUE self)
|
1278
|
+
{
|
1279
|
+
GET_SF;
|
1280
|
+
char *str = sort_field_to_s(sf);
|
1281
|
+
VALUE rstr = rb_str_new2(str);
|
1282
|
+
free(str);
|
1283
|
+
return rstr;
|
1284
|
+
}
|
1285
|
+
|
1257
1286
|
/****************************************************************************
|
1258
1287
|
*
|
1259
|
-
*
|
1288
|
+
* Sort Methods
|
1260
1289
|
*
|
1261
1290
|
****************************************************************************/
|
1262
1291
|
|
@@ -1317,7 +1346,7 @@ frt_sort_add(Sort *sort, VALUE rsf, bool reverse)
|
|
1317
1346
|
sort_add_sort_field(sort, sf);
|
1318
1347
|
}
|
1319
1348
|
|
1320
|
-
#define GET_SORT Sort *sort
|
1349
|
+
#define GET_SORT Sort *sort = (Sort *)DATA_PTR(self)
|
1321
1350
|
static VALUE
|
1322
1351
|
frt_sort_init(int argc, VALUE *argv, VALUE self)
|
1323
1352
|
{
|
@@ -1363,65 +1392,34 @@ frt_sort_get_fields(VALUE self)
|
|
1363
1392
|
return object_get(sort->sort_fields);
|
1364
1393
|
}
|
1365
1394
|
|
1395
|
+
|
1396
|
+
static VALUE
|
1397
|
+
frt_sort_to_s(VALUE self)
|
1398
|
+
{
|
1399
|
+
GET_SORT;
|
1400
|
+
char *str = sort_to_s(sort);
|
1401
|
+
VALUE rstr = rb_str_new2(str);
|
1402
|
+
free(str);
|
1403
|
+
return rstr;
|
1404
|
+
}
|
1366
1405
|
/****************************************************************************
|
1367
1406
|
*
|
1368
|
-
*
|
1407
|
+
* Searcher Methods
|
1369
1408
|
*
|
1370
1409
|
****************************************************************************/
|
1371
1410
|
|
1372
1411
|
static void
|
1373
|
-
|
1412
|
+
frt_sea_free(void *p)
|
1374
1413
|
{
|
1375
1414
|
Searcher *sea = (Searcher *)p;
|
1376
|
-
|
1415
|
+
object_del(sea);
|
1416
|
+
sea_close(sea);
|
1377
1417
|
}
|
1378
1418
|
|
1379
|
-
|
1380
|
-
frt_is_mark(void *p)
|
1381
|
-
{
|
1382
|
-
Searcher *sea = (Searcher *)p;
|
1383
|
-
frt_gc_mark(sea->ir);
|
1384
|
-
frt_gc_mark(sea->ir->store);
|
1385
|
-
}
|
1386
|
-
|
1387
|
-
#define FRT_GET_IR(rir, ir) do {\
|
1388
|
-
rir = Data_Wrap_Struct(cIndexReader, &frt_ir_mark, &frt_ir_free, ir);\
|
1389
|
-
object_add(ir, rir);\
|
1390
|
-
} while (0)
|
1391
|
-
|
1392
|
-
static VALUE
|
1393
|
-
frt_is_init(VALUE self, VALUE obj)
|
1394
|
-
{
|
1395
|
-
Store *store = NULL;
|
1396
|
-
IndexReader *ir = NULL;
|
1397
|
-
Searcher *sea;
|
1398
|
-
if (TYPE(obj) == T_STRING) {
|
1399
|
-
store = open_fs_store(StringValueCStr(obj));
|
1400
|
-
ir = ir_open(store, true);
|
1401
|
-
FRT_GET_IR(obj, ir);
|
1402
|
-
} else {
|
1403
|
-
Check_Type(obj, T_DATA);
|
1404
|
-
if (rb_obj_is_kind_of(obj, cDirectory) == Qtrue) {
|
1405
|
-
Data_Get_Struct(obj, Store, store);
|
1406
|
-
ir = ir_open(store, false);
|
1407
|
-
FRT_GET_IR(obj, ir);
|
1408
|
-
} else if (rb_obj_is_kind_of(obj, cIndexReader) == Qtrue) {
|
1409
|
-
Data_Get_Struct(obj, IndexReader, ir);
|
1410
|
-
} else {
|
1411
|
-
rb_raise(rb_eArgError, "Unknown type for argument to IndexSearcher.new");
|
1412
|
-
}
|
1413
|
-
}
|
1414
|
-
|
1415
|
-
sea = sea_create(ir);
|
1416
|
-
sea->close_ir = false;
|
1417
|
-
Frt_Wrap_Struct(self, &frt_is_mark, &frt_is_free, sea);
|
1418
|
-
return self;
|
1419
|
-
}
|
1420
|
-
|
1421
|
-
#define GET_SEA Searcher *sea; Data_Get_Struct(self, Searcher, sea)
|
1419
|
+
#define GET_SEA Searcher *sea = (Searcher *)DATA_PTR(self)
|
1422
1420
|
|
1423
1421
|
static VALUE
|
1424
|
-
|
1422
|
+
frt_sea_close(VALUE self)
|
1425
1423
|
{
|
1426
1424
|
GET_SEA;
|
1427
1425
|
Frt_Unwrap_Struct(self);
|
@@ -1430,14 +1428,14 @@ frt_is_close(VALUE self)
|
|
1430
1428
|
}
|
1431
1429
|
|
1432
1430
|
static VALUE
|
1433
|
-
|
1431
|
+
frt_sea_get_reader(VALUE self, VALUE rterm)
|
1434
1432
|
{
|
1435
1433
|
GET_SEA;
|
1436
1434
|
return object_get(sea->ir);
|
1437
1435
|
}
|
1438
1436
|
|
1439
1437
|
static VALUE
|
1440
|
-
|
1438
|
+
frt_sea_doc_freq(VALUE self, VALUE rterm)
|
1441
1439
|
{
|
1442
1440
|
GET_SEA;
|
1443
1441
|
Term t;
|
@@ -1446,13 +1444,15 @@ frt_is_doc_freq(VALUE self, VALUE rterm)
|
|
1446
1444
|
}
|
1447
1445
|
|
1448
1446
|
static VALUE
|
1449
|
-
|
1447
|
+
frt_sea_doc_freqs(VALUE self, VALUE rterms)
|
1450
1448
|
{
|
1451
1449
|
int i;
|
1452
1450
|
GET_SEA;
|
1453
1451
|
Term t;
|
1452
|
+
VALUE freqs;
|
1454
1453
|
Check_Type(rterms, T_ARRAY);
|
1455
|
-
|
1454
|
+
|
1455
|
+
freqs = rb_ary_new2(RARRAY(rterms)->len);
|
1456
1456
|
for (i = 0; i < RARRAY(rterms)->len; i++) {
|
1457
1457
|
frt_set_term(RARRAY(rterms)->ptr[i], &t);
|
1458
1458
|
rb_ary_store(freqs, i, INT2FIX(sea->doc_freq(sea, &t)));
|
@@ -1461,21 +1461,21 @@ frt_is_doc_freqs(VALUE self, VALUE rterms)
|
|
1461
1461
|
}
|
1462
1462
|
|
1463
1463
|
static VALUE
|
1464
|
-
|
1464
|
+
frt_sea_doc(VALUE self, VALUE rdoc_num)
|
1465
1465
|
{
|
1466
1466
|
GET_SEA;
|
1467
1467
|
return frt_get_doc(sea->get_doc(sea, FIX2INT(rdoc_num)));
|
1468
1468
|
}
|
1469
1469
|
|
1470
1470
|
static VALUE
|
1471
|
-
|
1471
|
+
frt_sea_max_doc(VALUE self)
|
1472
1472
|
{
|
1473
1473
|
GET_SEA;
|
1474
1474
|
return INT2FIX(sea->max_doc(sea));
|
1475
1475
|
}
|
1476
1476
|
|
1477
1477
|
static TopDocs *
|
1478
|
-
|
1478
|
+
frt_sea_search_internal(Query *query, VALUE roptions, Searcher *sea)
|
1479
1479
|
{
|
1480
1480
|
VALUE rval;
|
1481
1481
|
int first_doc = 0, num_docs = 10;
|
@@ -1508,68 +1508,179 @@ frt_is_search_internal(Query *query, VALUE roptions, Searcher *sea)
|
|
1508
1508
|
}
|
1509
1509
|
|
1510
1510
|
static VALUE
|
1511
|
-
|
1511
|
+
frt_sea_search(int argc, VALUE *argv, VALUE self)
|
1512
1512
|
{
|
1513
1513
|
GET_SEA;
|
1514
1514
|
VALUE rquery, roptions;
|
1515
1515
|
Query *query;
|
1516
1516
|
rb_scan_args(argc, argv, "11", &rquery, &roptions);
|
1517
1517
|
Data_Get_Struct(rquery, Query, query);
|
1518
|
-
return frt_get_td(
|
1518
|
+
return frt_get_td(frt_sea_search_internal(query, roptions, sea));
|
1519
1519
|
}
|
1520
1520
|
|
1521
1521
|
static VALUE
|
1522
|
-
|
1522
|
+
frt_sea_search_each(VALUE self, VALUE rquery, VALUE roptions)
|
1523
1523
|
{
|
1524
1524
|
return Qnil;
|
1525
1525
|
}
|
1526
1526
|
|
1527
1527
|
static VALUE
|
1528
|
-
|
1528
|
+
frt_sea_explain(VALUE self, VALUE rquery, VALUE rdoc_num)
|
1529
1529
|
{
|
1530
1530
|
GET_SEA;
|
1531
1531
|
Query *query;
|
1532
|
+
Explanation *expl;
|
1532
1533
|
Data_Get_Struct(rquery, Query, query);
|
1533
|
-
|
1534
|
+
expl = sea->explain(sea, query, FIX2INT(rdoc_num));
|
1534
1535
|
return Data_Wrap_Struct(cExplanation, NULL, &expl_destoy, expl);
|
1535
1536
|
}
|
1536
1537
|
|
1537
1538
|
/****************************************************************************
|
1538
1539
|
*
|
1539
|
-
*
|
1540
|
+
* IndexSearcher Methods
|
1540
1541
|
*
|
1541
1542
|
****************************************************************************/
|
1542
1543
|
|
1543
|
-
|
1544
|
-
|
1545
|
-
|
1544
|
+
static void
|
1545
|
+
frt_is_mark(void *p)
|
1546
|
+
{
|
1547
|
+
Searcher *sea = (Searcher *)p;
|
1548
|
+
frt_gc_mark(sea->ir);
|
1549
|
+
frt_gc_mark(sea->ir->store);
|
1550
|
+
}
|
1551
|
+
|
1552
|
+
#define FRT_GET_IR(rir, ir) do {\
|
1553
|
+
rir = Data_Wrap_Struct(cIndexReader, &frt_ir_mark, &frt_ir_free, ir);\
|
1554
|
+
object_add(ir, rir);\
|
1555
|
+
} while (0)
|
1556
|
+
|
1557
|
+
static VALUE
|
1558
|
+
frt_is_init(VALUE self, VALUE obj)
|
1559
|
+
{
|
1560
|
+
Store *store = NULL;
|
1561
|
+
IndexReader *ir = NULL;
|
1562
|
+
Searcher *sea;
|
1563
|
+
if (TYPE(obj) == T_STRING) {
|
1564
|
+
store = open_fs_store(StringValueCStr(obj));
|
1565
|
+
ir = ir_open(store);
|
1566
|
+
deref(store);
|
1567
|
+
FRT_GET_IR(obj, ir);
|
1568
|
+
} else {
|
1569
|
+
Check_Type(obj, T_DATA);
|
1570
|
+
if (rb_obj_is_kind_of(obj, cDirectory) == Qtrue) {
|
1571
|
+
Data_Get_Struct(obj, Store, store);
|
1572
|
+
ir = ir_open(store);
|
1573
|
+
FRT_GET_IR(obj, ir);
|
1574
|
+
} else if (rb_obj_is_kind_of(obj, cIndexReader) == Qtrue) {
|
1575
|
+
Data_Get_Struct(obj, IndexReader, ir);
|
1576
|
+
} else {
|
1577
|
+
rb_raise(rb_eArgError, "Unknown type for argument to IndexSearcher.new");
|
1578
|
+
}
|
1579
|
+
}
|
1580
|
+
|
1581
|
+
sea = sea_create(ir);
|
1582
|
+
sea->close_ir = false;
|
1583
|
+
Frt_Wrap_Struct(self, &frt_is_mark, &frt_sea_free, sea);
|
1584
|
+
object_add(sea, self);
|
1585
|
+
return self;
|
1586
|
+
}
|
1587
|
+
|
1588
|
+
/****************************************************************************
|
1589
|
+
*
|
1590
|
+
* MultiSearcher Methods
|
1591
|
+
*
|
1592
|
+
****************************************************************************/
|
1593
|
+
|
1594
|
+
static void
|
1595
|
+
frt_ms_free(void *p)
|
1596
|
+
{
|
1597
|
+
Searcher *sea = (Searcher *)p;
|
1598
|
+
MultiSearcher *msea = (MultiSearcher *)sea->data;
|
1599
|
+
free(msea->searchers);
|
1600
|
+
object_del(sea);
|
1601
|
+
sea_close(sea);
|
1602
|
+
}
|
1603
|
+
|
1604
|
+
static void
|
1605
|
+
frt_ms_mark(void *p)
|
1606
|
+
{
|
1607
|
+
int i;
|
1608
|
+
Searcher *sea = (Searcher *)p;
|
1609
|
+
MultiSearcher *msea = (MultiSearcher *)sea->data;
|
1610
|
+
for (i = 0; i < msea->s_cnt; i++) {
|
1611
|
+
frt_gc_mark(msea->searchers[i]);
|
1612
|
+
}
|
1613
|
+
}
|
1614
|
+
|
1615
|
+
static VALUE
|
1616
|
+
frt_ms_init(int argc, VALUE *argv, VALUE self)
|
1617
|
+
{
|
1618
|
+
int i, j;
|
1619
|
+
|
1620
|
+
VALUE rsearcher;
|
1621
|
+
Array *searchers = ary_create(argc, (free_ft)NULL);
|
1622
|
+
Searcher *s;
|
1623
|
+
|
1624
|
+
for (i = 0; i < argc; i++) {
|
1625
|
+
rsearcher = argv[i];
|
1626
|
+
switch (TYPE(rsearcher)) {
|
1627
|
+
case T_ARRAY:
|
1628
|
+
for (j = 0; j < RARRAY(rsearcher)->len; j++) {
|
1629
|
+
VALUE rs = RARRAY(rsearcher)->ptr[j];
|
1630
|
+
Data_Get_Struct(rs, Searcher, s);
|
1631
|
+
ary_append(searchers, s);
|
1632
|
+
}
|
1633
|
+
break;
|
1634
|
+
case T_DATA:
|
1635
|
+
Data_Get_Struct(rsearcher, Searcher, s);
|
1636
|
+
ary_append(searchers, s);
|
1637
|
+
break;
|
1638
|
+
default:
|
1639
|
+
rb_raise(rb_eArgError, "Can't add class %s to MultiSearcher",
|
1640
|
+
rb_obj_classname(rsearcher));
|
1641
|
+
break;
|
1642
|
+
}
|
1643
|
+
}
|
1644
|
+
s = msea_create((Searcher **)searchers->elems, searchers->size, false);
|
1645
|
+
free(searchers); /* only free the Array, not the elems array holding the searchers */
|
1646
|
+
Frt_Wrap_Struct(self, &frt_ms_mark, &frt_ms_free, s);
|
1647
|
+
object_add(s, self);
|
1648
|
+
return self;
|
1649
|
+
}
|
1546
1650
|
|
1651
|
+
/****************************************************************************
|
1652
|
+
*
|
1653
|
+
* Index Methods
|
1654
|
+
*
|
1655
|
+
****************************************************************************/
|
1656
|
+
|
1657
|
+
/*
|
1547
1658
|
static void
|
1548
1659
|
frt_ind_free_store_i(Index *self)
|
1549
1660
|
{
|
1550
1661
|
VALUE rval;
|
1551
1662
|
if (self->close_store && (Qnil != (rval = object_get(self->store)))) {
|
1552
|
-
|
1663
|
+
// user passed close_dir option so unwrap it
|
1553
1664
|
Frt_Unwrap_Struct(rval);
|
1554
1665
|
object_del(self->store);
|
1555
1666
|
}
|
1556
1667
|
}
|
1668
|
+
*/
|
1557
1669
|
|
1558
1670
|
static void
|
1559
1671
|
frt_ind_free(void *p)
|
1560
1672
|
{
|
1561
|
-
Index *
|
1562
|
-
|
1563
|
-
|
1564
|
-
index_destroy(self);
|
1673
|
+
Index *ind = (Index *)p;
|
1674
|
+
object_del(ind);
|
1675
|
+
index_destroy(ind);
|
1565
1676
|
}
|
1566
1677
|
|
1567
1678
|
static void
|
1568
1679
|
frt_ind_mark(void *p)
|
1569
1680
|
{
|
1570
|
-
Index *
|
1571
|
-
frt_gc_mark(
|
1572
|
-
frt_gc_mark(
|
1681
|
+
Index *ind = (Index *)p;
|
1682
|
+
frt_gc_mark(ind->store);
|
1683
|
+
frt_gc_mark(ind->analyzer);
|
1573
1684
|
}
|
1574
1685
|
|
1575
1686
|
static VALUE
|
@@ -1582,14 +1693,13 @@ frt_ind_init(int argc, VALUE *argv, VALUE self)
|
|
1582
1693
|
Store *store = NULL;
|
1583
1694
|
Analyzer *analyzer = NULL;
|
1584
1695
|
bool create = false;
|
1585
|
-
bool close_store = false;
|
1586
1696
|
HashSet *def_fields = NULL;
|
1587
1697
|
|
1588
1698
|
if (Qnil != (rval = rb_hash_aref(roptions, rpath_key))) {
|
1589
1699
|
rval = rb_obj_as_string(rval);
|
1590
1700
|
/* TODO: create the directory if it is missing */
|
1591
1701
|
store = open_fs_store(RSTRING(rval)->ptr);
|
1592
|
-
|
1702
|
+
deref(store);
|
1593
1703
|
} else if (Qnil != (rval = rb_hash_aref(roptions, rdir_key))) {
|
1594
1704
|
Data_Get_Struct(rval, Store, store);
|
1595
1705
|
}
|
@@ -1611,19 +1721,21 @@ frt_ind_init(int argc, VALUE *argv, VALUE self)
|
|
1611
1721
|
}
|
1612
1722
|
|
1613
1723
|
if (Qnil != (rval = rb_hash_aref(roptions, ranalyzer_key))) {
|
1614
|
-
|
1724
|
+
analyzer = frt_get_cwrapped_analyzer(rval);
|
1615
1725
|
}
|
1616
1726
|
if (Qnil != (rval = rb_hash_aref(roptions, rdefault_search_field_key))) {
|
1617
1727
|
def_fields = frt_get_fields(rval);
|
1618
1728
|
}
|
1619
1729
|
if (Qnil != (rval = rb_hash_aref(roptions, rclose_dir_key))) {
|
1620
|
-
|
1730
|
+
/* No need to do anything here. Let the GC do the work.
|
1731
|
+
* if (RTEST(rval) && !close_store) close_store = true;
|
1732
|
+
*/
|
1621
1733
|
}
|
1622
1734
|
if (Qnil != (rval = rb_hash_aref(roptions, rdefault_field_key))) {
|
1623
1735
|
if (!def_fields) def_fields = frt_get_fields(rval);
|
1624
1736
|
}
|
1625
1737
|
ind = index_create(store, analyzer, def_fields, create);
|
1626
|
-
if (
|
1738
|
+
if (analyzer) a_deref(analyzer);
|
1627
1739
|
|
1628
1740
|
/* QueryParser options */
|
1629
1741
|
if (Qnil != (rval = rb_hash_aref(roptions, rhandle_parse_errors_key))) {
|
@@ -1686,13 +1798,13 @@ frt_ind_init(int argc, VALUE *argv, VALUE self)
|
|
1686
1798
|
return self;
|
1687
1799
|
}
|
1688
1800
|
|
1689
|
-
#define GET_IND Index *ind
|
1801
|
+
#define GET_IND Index *ind = (Index *)DATA_PTR(self);\
|
1690
1802
|
if (!ind) rb_raise(rb_eStandardError, "Called method on closed Index object")
|
1691
1803
|
static VALUE
|
1692
1804
|
frt_ind_close(VALUE self)
|
1693
1805
|
{
|
1694
1806
|
GET_IND;
|
1695
|
-
frt_ind_free_store_i(ind);
|
1807
|
+
//frt_ind_free_store_i(ind);
|
1696
1808
|
Frt_Unwrap_Struct(self);
|
1697
1809
|
object_del(ind);
|
1698
1810
|
index_destroy(ind);
|
@@ -1778,9 +1890,9 @@ frt_ind_add_doc(int argc, VALUE *argv, VALUE self)
|
|
1778
1890
|
doc = frt_rdoc_to_doc(ind, rdoc, &close_doc);
|
1779
1891
|
|
1780
1892
|
if (argc == 2) {
|
1781
|
-
Analyzer *analyzer;
|
1782
|
-
Data_Get_Struct(ranalyzer, Analyzer, analyzer);
|
1893
|
+
Analyzer *analyzer = frt_get_cwrapped_analyzer(ranalyzer);
|
1783
1894
|
index_add_doc_a(ind, doc, analyzer);
|
1895
|
+
a_deref(analyzer);
|
1784
1896
|
} else {
|
1785
1897
|
index_add_doc(ind, doc);
|
1786
1898
|
}
|
@@ -1789,7 +1901,7 @@ frt_ind_add_doc(int argc, VALUE *argv, VALUE self)
|
|
1789
1901
|
}
|
1790
1902
|
|
1791
1903
|
static Query *
|
1792
|
-
|
1904
|
+
frt_ind_get_query_i(Index *ind, VALUE rquery)
|
1793
1905
|
{
|
1794
1906
|
Query *q = NULL;
|
1795
1907
|
|
@@ -1798,10 +1910,10 @@ frt_get_query_i(Index *ind, VALUE rquery, bool *destroy_query)
|
|
1798
1910
|
rquery = rb_obj_as_string(rquery);
|
1799
1911
|
case T_STRING:
|
1800
1912
|
q = index_get_query(ind, RSTRING(rquery)->ptr);
|
1801
|
-
*destroy_query = true;
|
1802
1913
|
break;
|
1803
1914
|
case T_DATA:
|
1804
1915
|
Data_Get_Struct(rquery, Query, q);
|
1916
|
+
ref(q);
|
1805
1917
|
break;
|
1806
1918
|
default:
|
1807
1919
|
rb_raise(rb_eArgError, "Can only handle a String or a Query.");
|
@@ -1816,14 +1928,13 @@ frt_ind_search(int argc, VALUE *argv, VALUE self)
|
|
1816
1928
|
{
|
1817
1929
|
Query *q;
|
1818
1930
|
VALUE rquery, roptions, rtd;
|
1819
|
-
bool destroy_query = false;
|
1820
1931
|
GET_IND;
|
1821
1932
|
rb_scan_args(argc, argv, "11", &rquery, &roptions);
|
1822
1933
|
ensure_searcher_open(ind);
|
1823
1934
|
|
1824
|
-
q =
|
1825
|
-
rtd = frt_get_td(
|
1826
|
-
|
1935
|
+
q = frt_ind_get_query_i(ind, rquery);
|
1936
|
+
rtd = frt_get_td(frt_sea_search_internal(q, roptions, ind->sea));
|
1937
|
+
q_deref(q);
|
1827
1938
|
|
1828
1939
|
return rtd;
|
1829
1940
|
}
|
@@ -1835,7 +1946,6 @@ frt_ind_search_each(int argc, VALUE *argv, VALUE self)
|
|
1835
1946
|
Query *q;
|
1836
1947
|
TopDocs *td;
|
1837
1948
|
VALUE rquery, roptions, rtotal_hits;
|
1838
|
-
bool destroy_query = false;
|
1839
1949
|
GET_IND;
|
1840
1950
|
|
1841
1951
|
|
@@ -1845,10 +1955,10 @@ frt_ind_search_each(int argc, VALUE *argv, VALUE self)
|
|
1845
1955
|
|
1846
1956
|
ensure_searcher_open(ind);
|
1847
1957
|
|
1848
|
-
q =
|
1958
|
+
q = frt_ind_get_query_i(ind, rquery);
|
1849
1959
|
//printf(">>>>>%s<<<<<\n", q->to_s(q, "file_name"));
|
1850
|
-
td =
|
1851
|
-
|
1960
|
+
td = frt_sea_search_internal(q, roptions, ind->sea);
|
1961
|
+
q_deref(q);
|
1852
1962
|
|
1853
1963
|
rtotal_hits = INT2FIX(td->total_hits);
|
1854
1964
|
|
@@ -2061,7 +2171,7 @@ struct QueryUpdateArg {
|
|
2061
2171
|
Index *ind;
|
2062
2172
|
};
|
2063
2173
|
|
2064
|
-
static void frt_ind_qupd_i(Searcher *sea, int doc_num, void *arg)
|
2174
|
+
static void frt_ind_qupd_i(Searcher *sea, int doc_num, float score, void *arg)
|
2065
2175
|
{
|
2066
2176
|
struct QueryUpdateArg *qua = (struct QueryUpdateArg *)arg;
|
2067
2177
|
Document *doc = sea->ir->get_doc(sea->ir, doc_num);
|
@@ -2073,22 +2183,19 @@ static void frt_ind_qupd_i(Searcher *sea, int doc_num, void *arg)
|
|
2073
2183
|
static VALUE
|
2074
2184
|
frt_ind_query_update(VALUE self, VALUE rquery, VALUE rdoc)
|
2075
2185
|
{
|
2076
|
-
GET_IND;
|
2077
|
-
|
2078
2186
|
int i;
|
2079
2187
|
Query *q;
|
2080
|
-
bool destroy_query = false;
|
2081
2188
|
struct QueryUpdateArg qua;
|
2082
|
-
|
2189
|
+
GET_IND;
|
2083
2190
|
|
2084
2191
|
ensure_searcher_open(ind);
|
2085
2192
|
qua.rdoc = rdoc;
|
2086
|
-
qua.docs = ary_create(8, &doc_destroy);
|
2193
|
+
qua.docs = ary_create(8, (free_ft)&doc_destroy);
|
2087
2194
|
qua.ind = ind;
|
2088
2195
|
|
2089
|
-
q =
|
2196
|
+
q = frt_ind_get_query_i(ind, rquery);
|
2090
2197
|
sea_search_each(ind->sea, q, NULL, &frt_ind_qupd_i, &qua);
|
2091
|
-
|
2198
|
+
q_deref(q);
|
2092
2199
|
|
2093
2200
|
for (i = 0; i < qua.docs->size; i++) {
|
2094
2201
|
index_add_doc(ind, qua.docs->elems[i]);
|
@@ -2206,37 +2313,37 @@ static VALUE
|
|
2206
2313
|
frt_ind_persist(int argc, VALUE *argv, VALUE self)
|
2207
2314
|
{
|
2208
2315
|
VALUE rdir, rcreate;
|
2209
|
-
bool create
|
2316
|
+
bool create;
|
2210
2317
|
Store *old_store;
|
2211
2318
|
GET_IND;
|
2212
2319
|
|
2213
2320
|
index_flush(ind);
|
2214
|
-
frt_ind_free_store_i(ind);
|
2321
|
+
//frt_ind_free_store_i(ind);
|
2215
2322
|
old_store = ind->store;
|
2216
|
-
close_store = ind->close_store;
|
2217
2323
|
|
2218
2324
|
rb_scan_args(argc, argv, "11", &rdir, &rcreate);
|
2219
2325
|
create = RTEST(rcreate);
|
2220
2326
|
|
2221
2327
|
if (T_DATA == TYPE(rdir)) {
|
2222
2328
|
Data_Get_Struct(rdir, Store, ind->store);
|
2329
|
+
ref(ind->store);
|
2223
2330
|
} else {
|
2224
2331
|
rdir = rb_obj_as_string(rdir);
|
2225
2332
|
ind->store = open_fs_store(RSTRING(rdir)->ptr);
|
2226
|
-
ind->close_store = true;
|
2227
2333
|
}
|
2228
2334
|
|
2229
2335
|
if (!create && !ind->store->exists(ind->store, "segments")) create = true;
|
2230
2336
|
|
2231
2337
|
if (create) {
|
2232
|
-
ind->iw = iw_open(ind->store,
|
2338
|
+
ind->iw = iw_open(ind->store, ind->analyzer, create);
|
2339
|
+
ref(ind->analyzer);
|
2233
2340
|
ind->iw->use_compound_file = ind->use_compound_file;
|
2234
2341
|
}
|
2235
2342
|
|
2236
2343
|
ensure_writer_open(ind);
|
2237
2344
|
iw_add_indexes(ind->iw, &old_store, 1);
|
2238
2345
|
|
2239
|
-
|
2346
|
+
store_deref(old_store);
|
2240
2347
|
|
2241
2348
|
index_auto_flush_iw(ind);
|
2242
2349
|
|
@@ -2246,11 +2353,12 @@ frt_ind_persist(int argc, VALUE *argv, VALUE self)
|
|
2246
2353
|
static VALUE
|
2247
2354
|
frt_ind_explain(VALUE self, VALUE rquery, VALUE rdoc_num)
|
2248
2355
|
{
|
2356
|
+
Query *q;
|
2357
|
+
Explanation *expl;
|
2249
2358
|
GET_IND;
|
2250
|
-
|
2251
|
-
|
2252
|
-
|
2253
|
-
if (destroy_query) q->destroy(q);
|
2359
|
+
q = frt_ind_get_query_i(ind, rquery);
|
2360
|
+
expl = index_explain(ind, q, FIX2INT(rdoc_num));
|
2361
|
+
q_deref(q);
|
2254
2362
|
return Data_Wrap_Struct(cExplanation, NULL, &expl_destoy, expl);
|
2255
2363
|
}
|
2256
2364
|
|
@@ -2498,6 +2606,7 @@ Init_search(void)
|
|
2498
2606
|
rb_define_method(cSortField, "name", frt_sf_get_name, 0);
|
2499
2607
|
rb_define_method(cSortField, "sort_type", frt_sf_get_sort_type, 0);
|
2500
2608
|
rb_define_method(cSortField, "comparator", frt_sf_get_comparator, 0);
|
2609
|
+
rb_define_method(cSortField, "to_s", frt_sf_to_s, 0);
|
2501
2610
|
|
2502
2611
|
/* SortType */
|
2503
2612
|
cSortType = rb_define_class_under(cSortField, "SortType", rb_cObject);
|
@@ -2536,27 +2645,36 @@ Init_search(void)
|
|
2536
2645
|
|
2537
2646
|
rb_define_method(cSort, "initialize", frt_sort_init, -1);
|
2538
2647
|
rb_define_method(cSort, "fields", frt_sort_get_fields, 0);
|
2648
|
+
rb_define_method(cSort, "to_s", frt_sort_to_s, 0);
|
2539
2649
|
|
2540
2650
|
rb_define_const(cSort, "RELEVANCE",
|
2541
2651
|
frt_sort_init(0, NULL, frt_sort_alloc(cSort)));
|
2542
2652
|
rb_define_const(cSort, "INDEX_ORDER",
|
2543
2653
|
frt_sort_init(1, &oSORT_FIELD_DOC, frt_sort_alloc(cSort)));
|
2544
2654
|
|
2655
|
+
/* Searcher */
|
2656
|
+
cSearcher = rb_define_class_under(mSearch, "Searcher", rb_cObject);
|
2657
|
+
rb_define_method(cSearcher, "close", frt_sea_close, 0);
|
2658
|
+
rb_define_method(cSearcher, "reader", frt_sea_get_reader, 0);
|
2659
|
+
rb_define_method(cSearcher, "doc_freq", frt_sea_doc_freq, 1);
|
2660
|
+
rb_define_method(cSearcher, "doc_freqs", frt_sea_doc_freqs, 1);
|
2661
|
+
rb_define_method(cSearcher, "doc", frt_sea_doc, 1);
|
2662
|
+
rb_define_method(cSearcher, "[]", frt_sea_doc, 1);
|
2663
|
+
rb_define_method(cSearcher, "max_doc", frt_sea_max_doc, 0);
|
2664
|
+
rb_define_method(cSearcher, "search", frt_sea_search, -1);
|
2665
|
+
rb_define_method(cSearcher, "search_each", frt_sea_search_each, 2);
|
2666
|
+
rb_define_method(cSearcher, "explain", frt_sea_explain, 2);
|
2667
|
+
|
2545
2668
|
/* IndexSearcher */
|
2546
|
-
cIndexSearcher = rb_define_class_under(mSearch, "IndexSearcher",
|
2669
|
+
cIndexSearcher = rb_define_class_under(mSearch, "IndexSearcher", cSearcher);
|
2547
2670
|
rb_define_alloc_func(cIndexSearcher, frt_data_alloc);
|
2548
|
-
|
2549
2671
|
rb_define_method(cIndexSearcher, "initialize", frt_is_init, 1);
|
2550
|
-
|
2551
|
-
|
2552
|
-
|
2553
|
-
|
2554
|
-
rb_define_method(
|
2555
|
-
|
2556
|
-
rb_define_method(cIndexSearcher, "max_doc", frt_is_max_doc, 0);
|
2557
|
-
rb_define_method(cIndexSearcher, "search", frt_is_search, -1);
|
2558
|
-
rb_define_method(cIndexSearcher, "search_each", frt_is_search_each, 2);
|
2559
|
-
rb_define_method(cIndexSearcher, "explain", frt_is_explain, 2);
|
2672
|
+
|
2673
|
+
/* MultiSearcher */
|
2674
|
+
cMultiSearcher = rb_define_class_under(mSearch, "MultiSearcher", cSearcher);
|
2675
|
+
rb_define_alloc_func(cMultiSearcher, frt_data_alloc);
|
2676
|
+
rb_define_method(cMultiSearcher, "initialize", frt_ms_init, -1);
|
2677
|
+
|
2560
2678
|
|
2561
2679
|
/* Index */
|
2562
2680
|
cIndex = rb_define_class_under(mIndex, "Index", rb_cObject);
|