ferret 0.9.1 → 0.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +6 -5
- data/Rakefile +34 -13
- data/TODO +1 -0
- data/TUTORIAL +1 -1
- data/ext/analysis.c +87 -70
- data/ext/analysis.h +18 -6
- data/ext/array.c +1 -2
- data/ext/array.h +1 -1
- data/ext/bitvector.c +10 -6
- data/ext/bitvector.h +2 -2
- data/ext/compound_io.c +30 -27
- data/ext/document.c +15 -15
- data/ext/document.h +5 -5
- data/ext/except.c +2 -0
- data/ext/except.h +25 -23
- data/ext/extconf.rb +1 -0
- data/ext/ferret.c +10 -8
- data/ext/ferret.h +9 -8
- data/ext/field.c +29 -25
- data/ext/filter.c +52 -14
- data/ext/frtio.h +13 -0
- data/ext/fs_store.c +115 -170
- data/ext/global.c +9 -8
- data/ext/global.h +17 -13
- data/ext/hash.c +13 -19
- data/ext/hash.h +11 -11
- data/ext/hashset.c +5 -7
- data/ext/hashset.h +9 -8
- data/ext/helper.c +1 -1
- data/ext/helper.h +2 -1
- data/ext/inc/except.h +25 -23
- data/ext/inc/lang.h +11 -1
- data/ext/ind.c +33 -21
- data/ext/index.h +44 -39
- data/ext/index_io.c +61 -57
- data/ext/index_rw.c +418 -361
- data/ext/lang.c +10 -0
- data/ext/lang.h +11 -1
- data/ext/nix_io.c +135 -0
- data/ext/priorityqueue.c +16 -16
- data/ext/priorityqueue.h +9 -6
- data/ext/q_boolean.c +128 -76
- data/ext/q_const_score.c +20 -20
- data/ext/q_filtered_query.c +20 -20
- data/ext/q_fuzzy.c +37 -23
- data/ext/q_match_all.c +15 -19
- data/ext/q_multi_phrase.c +87 -46
- data/ext/q_parser.c +247 -119
- data/ext/q_phrase.c +86 -52
- data/ext/q_prefix.c +25 -14
- data/ext/q_range.c +59 -14
- data/ext/q_span.c +263 -172
- data/ext/q_term.c +62 -51
- data/ext/q_wildcard.c +24 -13
- data/ext/r_analysis.c +328 -80
- data/ext/r_doc.c +11 -6
- data/ext/r_index_io.c +40 -32
- data/ext/r_qparser.c +15 -14
- data/ext/r_search.c +270 -152
- data/ext/r_store.c +32 -17
- data/ext/ram_store.c +38 -22
- data/ext/search.c +617 -87
- data/ext/search.h +227 -163
- data/ext/similarity.c +54 -45
- data/ext/similarity.h +3 -3
- data/ext/sort.c +132 -53
- data/ext/store.c +21 -2
- data/ext/store.h +14 -14
- data/ext/tags +4322 -232
- data/ext/term.c +140 -109
- data/ext/termdocs.c +74 -60
- data/ext/vector.c +181 -152
- data/ext/w32_io.c +150 -0
- data/lib/ferret.rb +1 -1
- data/lib/ferret/analysis/standard_tokenizer.rb +4 -3
- data/lib/ferret/document/field.rb +1 -1
- data/lib/ferret/index/field_infos.rb +1 -1
- data/lib/ferret/index/term.rb +1 -1
- data/lib/ferret/query_parser/query_parser.tab.rb +8 -24
- data/lib/ferret/search.rb +1 -0
- data/lib/ferret/search/boolean_query.rb +0 -4
- data/lib/ferret/search/index_searcher.rb +21 -8
- data/lib/ferret/search/multi_phrase_query.rb +7 -0
- data/lib/ferret/search/multi_searcher.rb +261 -0
- data/lib/ferret/search/phrase_query.rb +1 -1
- data/lib/ferret/search/query.rb +34 -5
- data/lib/ferret/search/sort.rb +7 -3
- data/lib/ferret/search/sort_field.rb +8 -4
- data/lib/ferret/store/fs_store.rb +13 -6
- data/lib/ferret/store/index_io.rb +0 -14
- data/lib/ferret/store/ram_store.rb +3 -2
- data/lib/rferret.rb +1 -1
- data/test/unit/analysis/ctc_analyzer.rb +131 -0
- data/test/unit/analysis/ctc_tokenstream.rb +98 -9
- data/test/unit/index/tc_index.rb +40 -1
- data/test/unit/index/tc_term.rb +7 -0
- data/test/unit/index/th_doc.rb +8 -0
- data/test/unit/query_parser/tc_query_parser.rb +6 -4
- data/test/unit/search/rtc_sort_field.rb +6 -6
- data/test/unit/search/tc_index_searcher.rb +8 -0
- data/test/unit/search/tc_multi_searcher.rb +275 -0
- data/test/unit/search/tc_multi_searcher2.rb +126 -0
- data/test/unit/search/tc_search_and_sort.rb +66 -0
- metadata +31 -26
- data/test/unit/query_parser/rtc_query_parser.rb +0 -138
data/ext/r_doc.c
CHANGED
@@ -32,14 +32,18 @@ frt_field_alloc(VALUE klass)
|
|
32
32
|
return self;
|
33
33
|
}
|
34
34
|
|
35
|
-
#define GET_DF DocField *df
|
35
|
+
#define GET_DF DocField *df = (DocField *)DATA_PTR(self)
|
36
|
+
|
36
37
|
static VALUE
|
37
38
|
frt_field_init(int argc, VALUE *argv, VALUE self)
|
38
39
|
{
|
39
40
|
GET_DF;
|
40
41
|
VALUE rname, rdata, rstored, rindexed, rstore_tv, rbinary, rboost;
|
42
|
+
char *name;
|
43
|
+
char *data;
|
41
44
|
float boost = 1.0;
|
42
45
|
int stored = 0, indexed = 0, store_tv = 0;
|
46
|
+
int len;
|
43
47
|
bool binary = false;
|
44
48
|
switch (rb_scan_args(argc, argv, "25", &rname, &rdata, &rstored,
|
45
49
|
&rindexed, &rstore_tv, &rbinary, &rboost)) {
|
@@ -53,9 +57,9 @@ frt_field_init(int argc, VALUE *argv, VALUE self)
|
|
53
57
|
rdata = rb_obj_as_string(rdata);
|
54
58
|
break;
|
55
59
|
}
|
56
|
-
|
57
|
-
|
58
|
-
|
60
|
+
name = RSTRING(rname)->ptr;
|
61
|
+
len = RSTRING(rdata)->len;
|
62
|
+
data = ALLOC_N(char, len + 1);
|
59
63
|
MEMCPY(data, RSTRING(rdata)->ptr, char, len);
|
60
64
|
data[len] = 0;
|
61
65
|
df_set(df, name, data, stored, indexed, store_tv);
|
@@ -268,8 +272,8 @@ static VALUE
|
|
268
272
|
frt_doc_alloc(VALUE klass)
|
269
273
|
{
|
270
274
|
Document *doc = doc_create();
|
271
|
-
doc->free_data = NULL;
|
272
275
|
VALUE self = Data_Wrap_Struct(klass, &frt_doc_mark, &frt_doc_free, doc);
|
276
|
+
doc->free_data = NULL;
|
273
277
|
object_add(doc, self);
|
274
278
|
return self;
|
275
279
|
}
|
@@ -300,7 +304,8 @@ frt_get_doc(Document *doc)
|
|
300
304
|
return self;
|
301
305
|
}
|
302
306
|
|
303
|
-
#define GET_DOC Document *doc
|
307
|
+
#define GET_DOC Document *doc = (Document *)DATA_PTR(self)
|
308
|
+
|
304
309
|
static VALUE
|
305
310
|
frt_doc_init(VALUE self)
|
306
311
|
{
|
data/ext/r_index_io.c
CHANGED
@@ -21,6 +21,7 @@ VALUE rterm_index_interval_key;
|
|
21
21
|
|
22
22
|
extern void frt_set_term(VALUE rterm, Term *t);
|
23
23
|
extern VALUE frt_get_rterm(char *field, char *text);
|
24
|
+
extern Analyzer *frt_get_cwrapped_analyzer(VALUE ranalyzer);
|
24
25
|
|
25
26
|
/****************************************************************************
|
26
27
|
*
|
@@ -35,7 +36,7 @@ frt_te_free(void *p)
|
|
35
36
|
te->close(te);
|
36
37
|
}
|
37
38
|
|
38
|
-
#define GET_TE TermEnum *te
|
39
|
+
#define GET_TE TermEnum *te = (TermEnum *)DATA_PTR(self)
|
39
40
|
static VALUE
|
40
41
|
frt_te_next(VALUE self)
|
41
42
|
{
|
@@ -100,8 +101,8 @@ frt_tvoi_init(VALUE self, VALUE rstart, VALUE rend)
|
|
100
101
|
return self;
|
101
102
|
}
|
102
103
|
|
103
|
-
#define GET_TVOI TVOffsetInfo *tvoi
|
104
|
-
|
104
|
+
#define GET_TVOI TVOffsetInfo *tvoi = (TVOffsetInfo *)DATA_PTR(self)
|
105
|
+
|
105
106
|
static VALUE
|
106
107
|
frt_tvoi_set_start(VALUE self, VALUE rstart)
|
107
108
|
{
|
@@ -135,9 +136,9 @@ frt_tvoi_get_end(VALUE self)
|
|
135
136
|
static VALUE
|
136
137
|
frt_tvoi_eql(VALUE self, VALUE rother)
|
137
138
|
{
|
138
|
-
if (TYPE(rother) != T_DATA) return Qfalse;
|
139
|
-
TVOffsetInfo *other;
|
140
139
|
GET_TVOI;
|
140
|
+
TVOffsetInfo *other;
|
141
|
+
if (TYPE(rother) != T_DATA) return Qfalse;
|
141
142
|
Data_Get_Struct(rother, TVOffsetInfo, other);
|
142
143
|
|
143
144
|
return ((tvoi->start == other->start) && (tvoi->end == other->end))
|
@@ -234,7 +235,8 @@ frt_get_tv(TermVector *tv)
|
|
234
235
|
return self;
|
235
236
|
}
|
236
237
|
|
237
|
-
#define GET_TV TermVector *tv
|
238
|
+
#define GET_TV TermVector *tv = (TermVector *)DATA_PTR(self)
|
239
|
+
|
238
240
|
static VALUE
|
239
241
|
frt_tv_get_field(VALUE self)
|
240
242
|
{
|
@@ -271,9 +273,10 @@ frt_tv_get_positions(VALUE self)
|
|
271
273
|
{
|
272
274
|
int i, j, freq;
|
273
275
|
GET_TV;
|
276
|
+
VALUE rpositions, rpositionss;
|
277
|
+
|
274
278
|
if (!tv->positions) return Qnil;
|
275
|
-
|
276
|
-
VALUE rpositionss = rb_ary_new2(tv->tcnt);
|
279
|
+
rpositionss = rb_ary_new2(tv->tcnt);
|
277
280
|
for (i = 0; i < tv->tcnt; i++) {
|
278
281
|
freq = tv->freqs[i];
|
279
282
|
rpositions = rb_ary_new2(freq);
|
@@ -290,9 +293,10 @@ frt_tv_get_offsets(VALUE self)
|
|
290
293
|
{
|
291
294
|
int i, j, freq;
|
292
295
|
GET_TV;
|
296
|
+
VALUE roffsetss, roffsets, roffset;
|
293
297
|
if (!tv->offsets) return Qnil;
|
294
|
-
|
295
|
-
|
298
|
+
roffsetss = rb_ary_new2(tv->tcnt);
|
299
|
+
|
296
300
|
for (i = 0; i < tv->tcnt; i++) {
|
297
301
|
freq = tv->freqs[i];
|
298
302
|
roffsets = rb_ary_new2(freq);
|
@@ -324,7 +328,8 @@ frt_get_tde(TermDocEnum *tde)
|
|
324
328
|
return Data_Wrap_Struct(cTermDocEnum, NULL, &frt_tde_free, tde);
|
325
329
|
}
|
326
330
|
|
327
|
-
#define GET_TDE TermDocEnum *tde
|
331
|
+
#define GET_TDE TermDocEnum *tde = (TermDocEnum *)DATA_PTR(self)
|
332
|
+
|
328
333
|
static VALUE
|
329
334
|
frt_tde_close(VALUE self)
|
330
335
|
{
|
@@ -375,12 +380,12 @@ frt_tde_next_position(VALUE self)
|
|
375
380
|
static VALUE
|
376
381
|
frt_tde_read(VALUE self, VALUE rdocs, VALUE rfreqs)
|
377
382
|
{
|
378
|
-
int i;
|
383
|
+
int i, req_num, cnt;
|
379
384
|
GET_TDE;
|
380
385
|
Check_Type(rdocs, T_ARRAY);
|
381
386
|
Check_Type(rfreqs, T_ARRAY);
|
382
|
-
|
383
|
-
|
387
|
+
req_num = MIN(RARRAY(rdocs)->len, RARRAY(rfreqs)->len);
|
388
|
+
cnt = tde->read(tde, (int *)RARRAY(rdocs)->ptr,
|
384
389
|
(int *)RARRAY(rfreqs)->ptr, req_num);
|
385
390
|
for (i = 0; i < cnt; i++) {
|
386
391
|
RARRAY(rdocs)->ptr[i] = INT2FIX(RARRAY(rdocs)->ptr[i]);
|
@@ -425,8 +430,6 @@ static VALUE
|
|
425
430
|
frt_iw_init(int argc, VALUE *argv, VALUE self)
|
426
431
|
{
|
427
432
|
VALUE rdir, roptions, rval;
|
428
|
-
bool close_dir = false;
|
429
|
-
bool close_analyzer = true;
|
430
433
|
bool create = false;
|
431
434
|
bool use_compound_file = true;
|
432
435
|
Store *store;
|
@@ -436,20 +439,21 @@ frt_iw_init(int argc, VALUE *argv, VALUE self)
|
|
436
439
|
if (argc > 0) {
|
437
440
|
if (TYPE(rdir) == T_DATA) {
|
438
441
|
store = DATA_PTR(rdir);
|
442
|
+
ref(store);
|
439
443
|
} else {
|
440
|
-
|
444
|
+
StringValue(rdir);
|
441
445
|
store = open_fs_store(RSTRING(rdir)->ptr);
|
442
|
-
close_dir = true;
|
443
446
|
}
|
444
447
|
} else {
|
445
448
|
store = open_ram_store();
|
446
|
-
close_dir = true;
|
447
449
|
}
|
448
450
|
if (argc == 2) {
|
449
451
|
Check_Type(roptions, T_HASH);
|
452
|
+
/* Let ruby's GC handle the closing of the store
|
450
453
|
if (!close_dir) {
|
451
454
|
close_dir = RTEST(rb_hash_aref(roptions, rclose_dir_key));
|
452
455
|
}
|
456
|
+
*/
|
453
457
|
/* use_compound_file defaults to true */
|
454
458
|
use_compound_file =
|
455
459
|
(rb_hash_aref(roptions, ruse_compound_file_key) == Qfalse) ? false : true;
|
@@ -458,8 +462,7 @@ frt_iw_init(int argc, VALUE *argv, VALUE self)
|
|
458
462
|
if (rval == Qnil) {
|
459
463
|
analyzer = mb_standard_analyzer_create(true);
|
460
464
|
} else {
|
461
|
-
|
462
|
-
close_analyzer = false;
|
465
|
+
analyzer = frt_get_cwrapped_analyzer(rval);
|
463
466
|
}
|
464
467
|
create = RTEST(rb_hash_aref(roptions, rcreate_key));
|
465
468
|
if (!create && RTEST(rb_hash_aref(roptions, rcreate_if_missing_key))) {
|
@@ -468,7 +471,8 @@ frt_iw_init(int argc, VALUE *argv, VALUE self)
|
|
468
471
|
}
|
469
472
|
}
|
470
473
|
}
|
471
|
-
iw = iw_open(store, analyzer, create
|
474
|
+
iw = iw_open(store, analyzer, create);
|
475
|
+
store_deref(store);
|
472
476
|
iw->use_compound_file = use_compound_file;
|
473
477
|
|
474
478
|
SET_INT_ATTR(merge_factor);
|
@@ -481,7 +485,8 @@ frt_iw_init(int argc, VALUE *argv, VALUE self)
|
|
481
485
|
return self;
|
482
486
|
}
|
483
487
|
|
484
|
-
#define GET_IW IndexWriter *iw
|
488
|
+
#define GET_IW IndexWriter *iw = (IndexWriter *)DATA_PTR(self)
|
489
|
+
|
485
490
|
static VALUE
|
486
491
|
frt_iw_close(VALUE self)
|
487
492
|
{
|
@@ -630,21 +635,21 @@ static VALUE
|
|
630
635
|
frt_ir_init(int argc, VALUE *argv, VALUE self)
|
631
636
|
{
|
632
637
|
VALUE rdir, rclose_dir;
|
633
|
-
bool close_dir =
|
638
|
+
//bool close_dir = false;
|
634
639
|
Store *store = NULL;
|
635
640
|
IndexReader *ir;
|
636
641
|
switch (rb_scan_args(argc, argv, "11", &rdir, &rclose_dir)) {
|
637
|
-
case 2: close_dir = RTEST(rclose_dir);
|
642
|
+
case 2: //close_dir = RTEST(rclose_dir);
|
638
643
|
case 1:
|
639
644
|
if (TYPE(rdir) == T_DATA) {
|
640
645
|
store = DATA_PTR(rdir);
|
641
646
|
} else {
|
642
647
|
rdir = rb_obj_as_string(rdir);
|
643
648
|
store = open_fs_store(RSTRING(rdir)->ptr);
|
644
|
-
|
649
|
+
deref(store);
|
645
650
|
}
|
646
651
|
}
|
647
|
-
ir = ir_open(store
|
652
|
+
ir = ir_open(store);
|
648
653
|
Frt_Wrap_Struct(self, &frt_ir_mark, &frt_ir_free, ir);
|
649
654
|
object_add(ir, self);
|
650
655
|
return self;
|
@@ -657,7 +662,8 @@ frt_ir_open(int argc, VALUE *argv, VALUE klass)
|
|
657
662
|
return frt_ir_init(argc, argv, self);
|
658
663
|
}
|
659
664
|
|
660
|
-
#define GET_IR IndexReader *ir
|
665
|
+
#define GET_IR IndexReader *ir = (IndexReader *)DATA_PTR(self)
|
666
|
+
|
661
667
|
static VALUE
|
662
668
|
frt_ir_set_norm(VALUE self, VALUE rdoc_num, VALUE rfield, VALUE rval)
|
663
669
|
{
|
@@ -671,8 +677,9 @@ static VALUE
|
|
671
677
|
frt_ir_get_norms(VALUE self, VALUE rfield)
|
672
678
|
{
|
673
679
|
GET_IR;
|
680
|
+
uchar *norms;
|
674
681
|
rfield = rb_obj_as_string(rfield);
|
675
|
-
|
682
|
+
norms = ir->get_norms(ir, RSTRING(rfield)->ptr);
|
676
683
|
if (norms) {
|
677
684
|
return rb_str_new((char *)norms, ir->max_doc(ir));
|
678
685
|
} else {
|
@@ -684,8 +691,9 @@ static VALUE
|
|
684
691
|
frt_ir_get_norms_into(VALUE self, VALUE rfield, VALUE rnorms, VALUE roffset)
|
685
692
|
{
|
686
693
|
GET_IR;
|
694
|
+
int offset;
|
687
695
|
rfield = rb_obj_as_string(rfield);
|
688
|
-
|
696
|
+
offset = FIX2INT(roffset);
|
689
697
|
Check_Type(rnorms, T_STRING);
|
690
698
|
if (RSTRING(rnorms)->len < offset + ir->max_doc(ir)) {
|
691
699
|
rb_raise(rb_eArgError, "supplied a string of length:%d to IndexReader#get_norms_into but needed a string of length offset:%d + maxdoc:%d", RSTRING(rnorms)->len, offset, ir->max_doc(ir));
|
@@ -778,9 +786,9 @@ static VALUE
|
|
778
786
|
frt_ir_get_term_vector(VALUE self, VALUE rdoc_num, VALUE rfield)
|
779
787
|
{
|
780
788
|
GET_IR;
|
789
|
+
TermVector *tv;
|
781
790
|
rfield = rb_obj_as_string(rfield);
|
782
|
-
|
783
|
-
ir->get_term_vector(ir, FIX2INT(rdoc_num), RSTRING(rfield)->ptr);
|
791
|
+
tv = ir->get_term_vector(ir, FIX2INT(rdoc_num), RSTRING(rfield)->ptr);
|
784
792
|
return frt_get_tv(tv);
|
785
793
|
}
|
786
794
|
|
data/ext/r_qparser.c
CHANGED
@@ -10,10 +10,11 @@ VALUE rwild_lower_key;
|
|
10
10
|
VALUE roccur_default_key;
|
11
11
|
VALUE rdefault_slop_key;
|
12
12
|
VALUE rclean_str_key;
|
13
|
-
VALUE ranalyzer_key;
|
13
|
+
extern VALUE ranalyzer_key;
|
14
14
|
|
15
15
|
extern VALUE frt_get_analyzer(Analyzer *a);
|
16
16
|
extern VALUE frt_get_q(Query *q);
|
17
|
+
extern Analyzer *frt_get_cwrapped_analyzer(VALUE ranalyzer);
|
17
18
|
|
18
19
|
/****************************************************************************
|
19
20
|
*
|
@@ -56,7 +57,7 @@ frt_get_fields(VALUE rfields)
|
|
56
57
|
fields = NULL;
|
57
58
|
} else {
|
58
59
|
s = str = estrdup(RSTRING(rval)->ptr);
|
59
|
-
while ((p =
|
60
|
+
while ((p = strchr(s, '|')) != '\0') {
|
60
61
|
*p = '\0';
|
61
62
|
hs_add(fields, estrdup(s));
|
62
63
|
s = p + 1;
|
@@ -84,7 +85,18 @@ frt_qp_init(int argc, VALUE *argv, VALUE self)
|
|
84
85
|
if (argc > 0) {
|
85
86
|
def_fields = frt_get_fields(rdef_field);
|
86
87
|
}
|
87
|
-
|
88
|
+
|
89
|
+
if (argc == 2) {
|
90
|
+
if (Qnil != (rval = rb_hash_aref(roptions, ranalyzer_key))) {
|
91
|
+
analyzer = frt_get_cwrapped_analyzer(rval);
|
92
|
+
}
|
93
|
+
}
|
94
|
+
|
95
|
+
if (!analyzer) {
|
96
|
+
analyzer = mb_standard_analyzer_create(true);
|
97
|
+
}
|
98
|
+
|
99
|
+
qp = qp_create(all_fields, def_fields, analyzer);
|
88
100
|
qp->allow_any_fields = true;
|
89
101
|
qp->clean_str = true;
|
90
102
|
/* handle options */
|
@@ -107,17 +119,7 @@ frt_qp_init(int argc, VALUE *argv, VALUE self)
|
|
107
119
|
if (Qnil != (rval = rb_hash_aref(roptions, rclean_str_key))) {
|
108
120
|
qp->clean_str = RTEST(rval);
|
109
121
|
}
|
110
|
-
if (Qnil != (rval = rb_hash_aref(roptions, ranalyzer_key))) {
|
111
|
-
Data_Get_Struct(rval, Analyzer, analyzer);
|
112
|
-
}
|
113
|
-
}
|
114
|
-
if (!analyzer) {
|
115
|
-
analyzer = letter_analyzer_create(true);
|
116
|
-
/* make sure the analyzer will be disposed of when the QueryParser
|
117
|
-
* is garbage collected. */
|
118
|
-
rval = frt_get_analyzer(analyzer);
|
119
122
|
}
|
120
|
-
qp->analyzer = analyzer;
|
121
123
|
Frt_Wrap_Struct(self, frt_qp_mark, frt_qp_free, qp);
|
122
124
|
object_add(qp, self);
|
123
125
|
return self;
|
@@ -162,7 +164,6 @@ Init_qparser(void)
|
|
162
164
|
roccur_default_key = ID2SYM(rb_intern("occur_default"));
|
163
165
|
rdefault_slop_key = ID2SYM(rb_intern("default_slop"));
|
164
166
|
rclean_str_key = ID2SYM(rb_intern("clean_string"));
|
165
|
-
ranalyzer_key = ID2SYM(rb_intern("analyzer"));
|
166
167
|
|
167
168
|
/* QueryParser */
|
168
169
|
cQueryParser = rb_define_class_under(mFerret, "QueryParser", rb_cObject);
|
data/ext/r_search.c
CHANGED
@@ -14,7 +14,9 @@ extern void frt_ir_mark(void *p);
|
|
14
14
|
static VALUE cScoreDoc;
|
15
15
|
static VALUE cTopDocs;
|
16
16
|
static VALUE cExplanation;
|
17
|
+
static VALUE cSearcher;
|
17
18
|
static VALUE cIndexSearcher;
|
19
|
+
static VALUE cMultiSearcher;
|
18
20
|
static VALUE cSortField;
|
19
21
|
static VALUE cSortType;
|
20
22
|
static VALUE cSort;
|
@@ -85,6 +87,7 @@ extern void frt_set_term(VALUE rterm, Term *t);
|
|
85
87
|
extern Term *frt_get_term(VALUE rterm);
|
86
88
|
extern VALUE frt_get_analyzer(Analyzer *a);
|
87
89
|
extern HashSet *frt_get_fields(VALUE rfields);
|
90
|
+
extern Analyzer *frt_get_cwrapped_analyzer(VALUE ranalyzer);
|
88
91
|
|
89
92
|
/****************************************************************************
|
90
93
|
*
|
@@ -147,7 +150,8 @@ frt_td_mark(void *p)
|
|
147
150
|
frt_gc_mark(td->hits);
|
148
151
|
}
|
149
152
|
|
150
|
-
#define GET_TD TopDocs *td
|
153
|
+
#define GET_TD TopDocs *td = (TopDocs *)DATA_PTR(self)
|
154
|
+
|
151
155
|
static VALUE
|
152
156
|
frt_get_td(TopDocs *td)
|
153
157
|
{
|
@@ -192,7 +196,7 @@ frt_td_total_hits(VALUE self)
|
|
192
196
|
static VALUE
|
193
197
|
frt_td_fields(VALUE self)
|
194
198
|
{
|
195
|
-
|
199
|
+
rb_raise(rb_eNotImpError, "not implemented in the c extension version");
|
196
200
|
return Qnil;
|
197
201
|
}
|
198
202
|
|
@@ -215,7 +219,8 @@ frt_td_each(VALUE self)
|
|
215
219
|
*
|
216
220
|
****************************************************************************/
|
217
221
|
|
218
|
-
#define GET_EXPL Explanation *expl
|
222
|
+
#define GET_EXPL Explanation *expl = (Explanation *)DATA_PTR(self)
|
223
|
+
|
219
224
|
static VALUE
|
220
225
|
frt_expl_to_s(VALUE self)
|
221
226
|
{
|
@@ -252,12 +257,12 @@ frt_expl_value(VALUE self)
|
|
252
257
|
static void
|
253
258
|
frt_q_free(void *p)
|
254
259
|
{
|
255
|
-
Query *q = (Query *)p;
|
256
260
|
object_del(p);
|
257
|
-
|
261
|
+
q_deref((Query *)p);
|
258
262
|
}
|
259
263
|
|
260
|
-
#define GET_Q Query *q
|
264
|
+
#define GET_Q Query *q = (Query *)DATA_PTR(self)
|
265
|
+
|
261
266
|
|
262
267
|
static VALUE
|
263
268
|
frt_q_to_s(int argc, VALUE *argv, VALUE self)
|
@@ -387,15 +392,15 @@ frt_bc_mark(void *p)
|
|
387
392
|
static void
|
388
393
|
frt_bc_free(void *p)
|
389
394
|
{
|
390
|
-
|
391
|
-
|
392
|
-
free(bc);
|
395
|
+
object_del(p);
|
396
|
+
bc_deref((BooleanClause *)p);
|
393
397
|
}
|
394
398
|
|
395
399
|
static VALUE
|
396
400
|
frt_get_bc(BooleanClause *bc)
|
397
401
|
{
|
398
402
|
VALUE self = Data_Wrap_Struct(cBooleanClause, &frt_bc_mark, &frt_bc_free, bc);
|
403
|
+
ref(bc);
|
399
404
|
object_add(bc, self);
|
400
405
|
return self;
|
401
406
|
}
|
@@ -411,13 +416,14 @@ frt_bc_init(int argc, VALUE *argv, VALUE self)
|
|
411
416
|
occur = FIX2INT(roccur);
|
412
417
|
}
|
413
418
|
Data_Get_Struct(rquery, Query, sub_q);
|
419
|
+
ref(sub_q);
|
414
420
|
bc = bc_create(sub_q, occur);
|
415
421
|
Frt_Wrap_Struct(self, &frt_bc_mark, &frt_bc_free, bc);
|
416
422
|
object_add(bc, self);
|
417
423
|
return self;
|
418
424
|
}
|
419
425
|
|
420
|
-
#define GET_BC BooleanClause *bc
|
426
|
+
#define GET_BC BooleanClause *bc = (BooleanClause *)DATA_PTR(self)
|
421
427
|
static VALUE
|
422
428
|
frt_bc_get_query(VALUE self)
|
423
429
|
{
|
@@ -505,10 +511,11 @@ frt_bq_init(int argc, VALUE *argv, VALUE self)
|
|
505
511
|
{
|
506
512
|
VALUE rcoord_disabled;
|
507
513
|
bool coord_disabled = false;
|
514
|
+
Query *q;
|
508
515
|
if (rb_scan_args(argc, argv, "01", &rcoord_disabled)) {
|
509
516
|
coord_disabled = RTEST(rcoord_disabled);
|
510
517
|
}
|
511
|
-
|
518
|
+
q = bq_create(coord_disabled);
|
512
519
|
Frt_Wrap_Struct(self, &frt_bq_mark, &frt_q_free, q);
|
513
520
|
object_add(q, self);
|
514
521
|
|
@@ -575,10 +582,12 @@ frt_rq_new_more(VALUE klass, VALUE rfield, VALUE rlterm, VALUE rincl)
|
|
575
582
|
{
|
576
583
|
Query *q;
|
577
584
|
VALUE self;
|
578
|
-
rfield = rb_obj_as_string(rfield);
|
579
585
|
char *lterm = NIL_P(rlterm) ? NULL : RSTRING(rb_obj_as_string(rlterm))->ptr;
|
580
|
-
|
581
|
-
|
586
|
+
rfield = rb_obj_as_string(rfield);
|
587
|
+
if (!lterm) {
|
588
|
+
rb_raise(rb_eArgError, "The lower term must not be nil in a more "
|
589
|
+
"than query");
|
590
|
+
}
|
582
591
|
q = rq_create_more(RSTRING(rfield)->ptr, lterm, RTEST(rincl));
|
583
592
|
self = Data_Wrap_Struct(klass, NULL, &frt_q_free, q);
|
584
593
|
object_add(q, self);
|
@@ -590,10 +599,12 @@ frt_rq_new_less(VALUE klass, VALUE rfield, VALUE ruterm, VALUE rincu)
|
|
590
599
|
{
|
591
600
|
Query *q;
|
592
601
|
VALUE self;
|
593
|
-
rfield = rb_obj_as_string(rfield);
|
594
602
|
char *uterm = NIL_P(ruterm) ? NULL : RSTRING(rb_obj_as_string(ruterm))->ptr;
|
595
|
-
|
596
|
-
|
603
|
+
rfield = rb_obj_as_string(rfield);
|
604
|
+
if (!uterm) {
|
605
|
+
rb_raise(rb_eArgError, "The upper term must not be nil in a less "
|
606
|
+
"than query");
|
607
|
+
}
|
597
608
|
q = rq_create_less(RSTRING(rfield)->ptr, uterm, RTEST(rincu));
|
598
609
|
self = Data_Wrap_Struct(klass, NULL, &frt_q_free, q);
|
599
610
|
object_add(q, self);
|
@@ -734,7 +745,6 @@ frt_mphq_set_slop(VALUE self, VALUE rslop)
|
|
734
745
|
return self;
|
735
746
|
}
|
736
747
|
|
737
|
-
|
738
748
|
/****************************************************************************
|
739
749
|
*
|
740
750
|
* PrefixQuery Methods
|
@@ -1054,9 +1064,10 @@ frt_spanxq_mark(void *p)
|
|
1054
1064
|
static VALUE
|
1055
1065
|
frt_spanxq_init(VALUE self, VALUE rinc, VALUE rexc)
|
1056
1066
|
{
|
1067
|
+
Query *q;
|
1057
1068
|
Check_Type(rinc, T_DATA);
|
1058
1069
|
Check_Type(rexc, T_DATA);
|
1059
|
-
|
1070
|
+
q = spanxq_create(DATA_PTR(rinc), DATA_PTR(rexc));
|
1060
1071
|
q->destroy_all = false;
|
1061
1072
|
Frt_Wrap_Struct(self, &frt_spanxq_mark, &frt_q_free, q);
|
1062
1073
|
object_add(q, self);
|
@@ -1077,7 +1088,7 @@ frt_f_free(void *p)
|
|
1077
1088
|
f->destroy(f);
|
1078
1089
|
}
|
1079
1090
|
|
1080
|
-
#define GET_F Filter *f
|
1091
|
+
#define GET_F Filter *f = (Filter *)DATA_PTR(self)
|
1081
1092
|
|
1082
1093
|
static VALUE
|
1083
1094
|
frt_f_to_s(VALUE self)
|
@@ -1126,11 +1137,14 @@ frt_rf_new_more(int argc, VALUE *argv, VALUE klass)
|
|
1126
1137
|
Filter *f;
|
1127
1138
|
VALUE self;
|
1128
1139
|
VALUE rfield, rlterm, rincl;
|
1140
|
+
char *lterm;
|
1129
1141
|
rb_scan_args(argc, argv, "21", &rfield, &rlterm, &rincl);
|
1130
1142
|
rfield = rb_obj_as_string(rfield);
|
1131
|
-
|
1132
|
-
if (!lterm)
|
1133
|
-
rb_raise(rb_eArgError, "The lower term must not be nil in a more
|
1143
|
+
lterm = NIL_P(rlterm) ? NULL : RSTRING(rb_obj_as_string(rlterm))->ptr;
|
1144
|
+
if (!lterm) {
|
1145
|
+
rb_raise(rb_eArgError, "The lower term must not be nil in a more "
|
1146
|
+
"than filter");
|
1147
|
+
}
|
1134
1148
|
f = rfilt_create(RSTRING(rfield)->ptr, lterm, NULL, rincl != Qfalse, false);
|
1135
1149
|
self = Data_Wrap_Struct(klass, NULL, &frt_f_free, f);
|
1136
1150
|
object_add(f, self);
|
@@ -1143,11 +1157,14 @@ frt_rf_new_less(int argc, VALUE *argv, VALUE klass)
|
|
1143
1157
|
Filter *f;
|
1144
1158
|
VALUE self;
|
1145
1159
|
VALUE rfield, ruterm, rincu;
|
1160
|
+
char *uterm;
|
1146
1161
|
rb_scan_args(argc, argv, "21", &rfield, &ruterm, &rincu);
|
1147
1162
|
rfield = rb_obj_as_string(rfield);
|
1148
|
-
|
1149
|
-
if (!uterm)
|
1150
|
-
rb_raise(rb_eArgError, "The upper term must not be nil in a less
|
1163
|
+
uterm = NIL_P(ruterm) ? NULL : RSTRING(rb_obj_as_string(ruterm))->ptr;
|
1164
|
+
if (!uterm) {
|
1165
|
+
rb_raise(rb_eArgError, "The upper term must not be nil in a less "
|
1166
|
+
"than filter");
|
1167
|
+
}
|
1151
1168
|
f = rfilt_create(RSTRING(rfield)->ptr, NULL, uterm, false, rincu != Qfalse);
|
1152
1169
|
self = Data_Wrap_Struct(klass, NULL, &frt_f_free, f);
|
1153
1170
|
object_add(f, self);
|
@@ -1181,9 +1198,8 @@ frt_qf_init(VALUE self, VALUE rquery)
|
|
1181
1198
|
static void
|
1182
1199
|
frt_sf_free(void *p)
|
1183
1200
|
{
|
1184
|
-
|
1185
|
-
|
1186
|
-
sort_field_destroy(sf);
|
1201
|
+
object_del(p);
|
1202
|
+
sort_field_destroy((SortField *)p);
|
1187
1203
|
}
|
1188
1204
|
|
1189
1205
|
static VALUE
|
@@ -1220,13 +1236,16 @@ frt_sf_init(int argc, VALUE *argv, VALUE self)
|
|
1220
1236
|
rfield = rb_obj_as_string(rfield);
|
1221
1237
|
|
1222
1238
|
sf = sort_field_create(RSTRING(rfield)->ptr, sort_type, is_reverse);
|
1239
|
+
if (sf->field == NULL && RSTRING(rfield)->ptr != NULL) {
|
1240
|
+
sf->field = estrdup(RSTRING(rfield)->ptr);
|
1241
|
+
}
|
1223
1242
|
|
1224
1243
|
Frt_Wrap_Struct(self, NULL, &frt_sf_free, sf);
|
1225
1244
|
object_add(sf, self);
|
1226
1245
|
return self;
|
1227
1246
|
}
|
1228
1247
|
|
1229
|
-
#define GET_SF SortField *sf
|
1248
|
+
#define GET_SF SortField *sf = (SortField *)DATA_PTR(self)
|
1230
1249
|
static VALUE
|
1231
1250
|
frt_sf_is_reverse(VALUE self)
|
1232
1251
|
{
|
@@ -1254,9 +1273,19 @@ frt_sf_get_comparator(VALUE self)
|
|
1254
1273
|
return Qnil;
|
1255
1274
|
}
|
1256
1275
|
|
1276
|
+
static VALUE
|
1277
|
+
frt_sf_to_s(VALUE self)
|
1278
|
+
{
|
1279
|
+
GET_SF;
|
1280
|
+
char *str = sort_field_to_s(sf);
|
1281
|
+
VALUE rstr = rb_str_new2(str);
|
1282
|
+
free(str);
|
1283
|
+
return rstr;
|
1284
|
+
}
|
1285
|
+
|
1257
1286
|
/****************************************************************************
|
1258
1287
|
*
|
1259
|
-
*
|
1288
|
+
* Sort Methods
|
1260
1289
|
*
|
1261
1290
|
****************************************************************************/
|
1262
1291
|
|
@@ -1317,7 +1346,7 @@ frt_sort_add(Sort *sort, VALUE rsf, bool reverse)
|
|
1317
1346
|
sort_add_sort_field(sort, sf);
|
1318
1347
|
}
|
1319
1348
|
|
1320
|
-
#define GET_SORT Sort *sort
|
1349
|
+
#define GET_SORT Sort *sort = (Sort *)DATA_PTR(self)
|
1321
1350
|
static VALUE
|
1322
1351
|
frt_sort_init(int argc, VALUE *argv, VALUE self)
|
1323
1352
|
{
|
@@ -1363,65 +1392,34 @@ frt_sort_get_fields(VALUE self)
|
|
1363
1392
|
return object_get(sort->sort_fields);
|
1364
1393
|
}
|
1365
1394
|
|
1395
|
+
|
1396
|
+
static VALUE
|
1397
|
+
frt_sort_to_s(VALUE self)
|
1398
|
+
{
|
1399
|
+
GET_SORT;
|
1400
|
+
char *str = sort_to_s(sort);
|
1401
|
+
VALUE rstr = rb_str_new2(str);
|
1402
|
+
free(str);
|
1403
|
+
return rstr;
|
1404
|
+
}
|
1366
1405
|
/****************************************************************************
|
1367
1406
|
*
|
1368
|
-
*
|
1407
|
+
* Searcher Methods
|
1369
1408
|
*
|
1370
1409
|
****************************************************************************/
|
1371
1410
|
|
1372
1411
|
static void
|
1373
|
-
|
1412
|
+
frt_sea_free(void *p)
|
1374
1413
|
{
|
1375
1414
|
Searcher *sea = (Searcher *)p;
|
1376
|
-
|
1415
|
+
object_del(sea);
|
1416
|
+
sea_close(sea);
|
1377
1417
|
}
|
1378
1418
|
|
1379
|
-
|
1380
|
-
frt_is_mark(void *p)
|
1381
|
-
{
|
1382
|
-
Searcher *sea = (Searcher *)p;
|
1383
|
-
frt_gc_mark(sea->ir);
|
1384
|
-
frt_gc_mark(sea->ir->store);
|
1385
|
-
}
|
1386
|
-
|
1387
|
-
#define FRT_GET_IR(rir, ir) do {\
|
1388
|
-
rir = Data_Wrap_Struct(cIndexReader, &frt_ir_mark, &frt_ir_free, ir);\
|
1389
|
-
object_add(ir, rir);\
|
1390
|
-
} while (0)
|
1391
|
-
|
1392
|
-
static VALUE
|
1393
|
-
frt_is_init(VALUE self, VALUE obj)
|
1394
|
-
{
|
1395
|
-
Store *store = NULL;
|
1396
|
-
IndexReader *ir = NULL;
|
1397
|
-
Searcher *sea;
|
1398
|
-
if (TYPE(obj) == T_STRING) {
|
1399
|
-
store = open_fs_store(StringValueCStr(obj));
|
1400
|
-
ir = ir_open(store, true);
|
1401
|
-
FRT_GET_IR(obj, ir);
|
1402
|
-
} else {
|
1403
|
-
Check_Type(obj, T_DATA);
|
1404
|
-
if (rb_obj_is_kind_of(obj, cDirectory) == Qtrue) {
|
1405
|
-
Data_Get_Struct(obj, Store, store);
|
1406
|
-
ir = ir_open(store, false);
|
1407
|
-
FRT_GET_IR(obj, ir);
|
1408
|
-
} else if (rb_obj_is_kind_of(obj, cIndexReader) == Qtrue) {
|
1409
|
-
Data_Get_Struct(obj, IndexReader, ir);
|
1410
|
-
} else {
|
1411
|
-
rb_raise(rb_eArgError, "Unknown type for argument to IndexSearcher.new");
|
1412
|
-
}
|
1413
|
-
}
|
1414
|
-
|
1415
|
-
sea = sea_create(ir);
|
1416
|
-
sea->close_ir = false;
|
1417
|
-
Frt_Wrap_Struct(self, &frt_is_mark, &frt_is_free, sea);
|
1418
|
-
return self;
|
1419
|
-
}
|
1420
|
-
|
1421
|
-
#define GET_SEA Searcher *sea; Data_Get_Struct(self, Searcher, sea)
|
1419
|
+
#define GET_SEA Searcher *sea = (Searcher *)DATA_PTR(self)
|
1422
1420
|
|
1423
1421
|
static VALUE
|
1424
|
-
|
1422
|
+
frt_sea_close(VALUE self)
|
1425
1423
|
{
|
1426
1424
|
GET_SEA;
|
1427
1425
|
Frt_Unwrap_Struct(self);
|
@@ -1430,14 +1428,14 @@ frt_is_close(VALUE self)
|
|
1430
1428
|
}
|
1431
1429
|
|
1432
1430
|
static VALUE
|
1433
|
-
|
1431
|
+
frt_sea_get_reader(VALUE self, VALUE rterm)
|
1434
1432
|
{
|
1435
1433
|
GET_SEA;
|
1436
1434
|
return object_get(sea->ir);
|
1437
1435
|
}
|
1438
1436
|
|
1439
1437
|
static VALUE
|
1440
|
-
|
1438
|
+
frt_sea_doc_freq(VALUE self, VALUE rterm)
|
1441
1439
|
{
|
1442
1440
|
GET_SEA;
|
1443
1441
|
Term t;
|
@@ -1446,13 +1444,15 @@ frt_is_doc_freq(VALUE self, VALUE rterm)
|
|
1446
1444
|
}
|
1447
1445
|
|
1448
1446
|
static VALUE
|
1449
|
-
|
1447
|
+
frt_sea_doc_freqs(VALUE self, VALUE rterms)
|
1450
1448
|
{
|
1451
1449
|
int i;
|
1452
1450
|
GET_SEA;
|
1453
1451
|
Term t;
|
1452
|
+
VALUE freqs;
|
1454
1453
|
Check_Type(rterms, T_ARRAY);
|
1455
|
-
|
1454
|
+
|
1455
|
+
freqs = rb_ary_new2(RARRAY(rterms)->len);
|
1456
1456
|
for (i = 0; i < RARRAY(rterms)->len; i++) {
|
1457
1457
|
frt_set_term(RARRAY(rterms)->ptr[i], &t);
|
1458
1458
|
rb_ary_store(freqs, i, INT2FIX(sea->doc_freq(sea, &t)));
|
@@ -1461,21 +1461,21 @@ frt_is_doc_freqs(VALUE self, VALUE rterms)
|
|
1461
1461
|
}
|
1462
1462
|
|
1463
1463
|
static VALUE
|
1464
|
-
|
1464
|
+
frt_sea_doc(VALUE self, VALUE rdoc_num)
|
1465
1465
|
{
|
1466
1466
|
GET_SEA;
|
1467
1467
|
return frt_get_doc(sea->get_doc(sea, FIX2INT(rdoc_num)));
|
1468
1468
|
}
|
1469
1469
|
|
1470
1470
|
static VALUE
|
1471
|
-
|
1471
|
+
frt_sea_max_doc(VALUE self)
|
1472
1472
|
{
|
1473
1473
|
GET_SEA;
|
1474
1474
|
return INT2FIX(sea->max_doc(sea));
|
1475
1475
|
}
|
1476
1476
|
|
1477
1477
|
static TopDocs *
|
1478
|
-
|
1478
|
+
frt_sea_search_internal(Query *query, VALUE roptions, Searcher *sea)
|
1479
1479
|
{
|
1480
1480
|
VALUE rval;
|
1481
1481
|
int first_doc = 0, num_docs = 10;
|
@@ -1508,68 +1508,179 @@ frt_is_search_internal(Query *query, VALUE roptions, Searcher *sea)
|
|
1508
1508
|
}
|
1509
1509
|
|
1510
1510
|
static VALUE
|
1511
|
-
|
1511
|
+
frt_sea_search(int argc, VALUE *argv, VALUE self)
|
1512
1512
|
{
|
1513
1513
|
GET_SEA;
|
1514
1514
|
VALUE rquery, roptions;
|
1515
1515
|
Query *query;
|
1516
1516
|
rb_scan_args(argc, argv, "11", &rquery, &roptions);
|
1517
1517
|
Data_Get_Struct(rquery, Query, query);
|
1518
|
-
return frt_get_td(
|
1518
|
+
return frt_get_td(frt_sea_search_internal(query, roptions, sea));
|
1519
1519
|
}
|
1520
1520
|
|
1521
1521
|
static VALUE
|
1522
|
-
|
1522
|
+
frt_sea_search_each(VALUE self, VALUE rquery, VALUE roptions)
|
1523
1523
|
{
|
1524
1524
|
return Qnil;
|
1525
1525
|
}
|
1526
1526
|
|
1527
1527
|
static VALUE
|
1528
|
-
|
1528
|
+
frt_sea_explain(VALUE self, VALUE rquery, VALUE rdoc_num)
|
1529
1529
|
{
|
1530
1530
|
GET_SEA;
|
1531
1531
|
Query *query;
|
1532
|
+
Explanation *expl;
|
1532
1533
|
Data_Get_Struct(rquery, Query, query);
|
1533
|
-
|
1534
|
+
expl = sea->explain(sea, query, FIX2INT(rdoc_num));
|
1534
1535
|
return Data_Wrap_Struct(cExplanation, NULL, &expl_destoy, expl);
|
1535
1536
|
}
|
1536
1537
|
|
1537
1538
|
/****************************************************************************
|
1538
1539
|
*
|
1539
|
-
*
|
1540
|
+
* IndexSearcher Methods
|
1540
1541
|
*
|
1541
1542
|
****************************************************************************/
|
1542
1543
|
|
1543
|
-
|
1544
|
-
|
1545
|
-
|
1544
|
+
static void
|
1545
|
+
frt_is_mark(void *p)
|
1546
|
+
{
|
1547
|
+
Searcher *sea = (Searcher *)p;
|
1548
|
+
frt_gc_mark(sea->ir);
|
1549
|
+
frt_gc_mark(sea->ir->store);
|
1550
|
+
}
|
1551
|
+
|
1552
|
+
#define FRT_GET_IR(rir, ir) do {\
|
1553
|
+
rir = Data_Wrap_Struct(cIndexReader, &frt_ir_mark, &frt_ir_free, ir);\
|
1554
|
+
object_add(ir, rir);\
|
1555
|
+
} while (0)
|
1556
|
+
|
1557
|
+
static VALUE
|
1558
|
+
frt_is_init(VALUE self, VALUE obj)
|
1559
|
+
{
|
1560
|
+
Store *store = NULL;
|
1561
|
+
IndexReader *ir = NULL;
|
1562
|
+
Searcher *sea;
|
1563
|
+
if (TYPE(obj) == T_STRING) {
|
1564
|
+
store = open_fs_store(StringValueCStr(obj));
|
1565
|
+
ir = ir_open(store);
|
1566
|
+
deref(store);
|
1567
|
+
FRT_GET_IR(obj, ir);
|
1568
|
+
} else {
|
1569
|
+
Check_Type(obj, T_DATA);
|
1570
|
+
if (rb_obj_is_kind_of(obj, cDirectory) == Qtrue) {
|
1571
|
+
Data_Get_Struct(obj, Store, store);
|
1572
|
+
ir = ir_open(store);
|
1573
|
+
FRT_GET_IR(obj, ir);
|
1574
|
+
} else if (rb_obj_is_kind_of(obj, cIndexReader) == Qtrue) {
|
1575
|
+
Data_Get_Struct(obj, IndexReader, ir);
|
1576
|
+
} else {
|
1577
|
+
rb_raise(rb_eArgError, "Unknown type for argument to IndexSearcher.new");
|
1578
|
+
}
|
1579
|
+
}
|
1580
|
+
|
1581
|
+
sea = sea_create(ir);
|
1582
|
+
sea->close_ir = false;
|
1583
|
+
Frt_Wrap_Struct(self, &frt_is_mark, &frt_sea_free, sea);
|
1584
|
+
object_add(sea, self);
|
1585
|
+
return self;
|
1586
|
+
}
|
1587
|
+
|
1588
|
+
/****************************************************************************
|
1589
|
+
*
|
1590
|
+
* MultiSearcher Methods
|
1591
|
+
*
|
1592
|
+
****************************************************************************/
|
1593
|
+
|
1594
|
+
static void
|
1595
|
+
frt_ms_free(void *p)
|
1596
|
+
{
|
1597
|
+
Searcher *sea = (Searcher *)p;
|
1598
|
+
MultiSearcher *msea = (MultiSearcher *)sea->data;
|
1599
|
+
free(msea->searchers);
|
1600
|
+
object_del(sea);
|
1601
|
+
sea_close(sea);
|
1602
|
+
}
|
1603
|
+
|
1604
|
+
static void
|
1605
|
+
frt_ms_mark(void *p)
|
1606
|
+
{
|
1607
|
+
int i;
|
1608
|
+
Searcher *sea = (Searcher *)p;
|
1609
|
+
MultiSearcher *msea = (MultiSearcher *)sea->data;
|
1610
|
+
for (i = 0; i < msea->s_cnt; i++) {
|
1611
|
+
frt_gc_mark(msea->searchers[i]);
|
1612
|
+
}
|
1613
|
+
}
|
1614
|
+
|
1615
|
+
static VALUE
|
1616
|
+
frt_ms_init(int argc, VALUE *argv, VALUE self)
|
1617
|
+
{
|
1618
|
+
int i, j;
|
1619
|
+
|
1620
|
+
VALUE rsearcher;
|
1621
|
+
Array *searchers = ary_create(argc, (free_ft)NULL);
|
1622
|
+
Searcher *s;
|
1623
|
+
|
1624
|
+
for (i = 0; i < argc; i++) {
|
1625
|
+
rsearcher = argv[i];
|
1626
|
+
switch (TYPE(rsearcher)) {
|
1627
|
+
case T_ARRAY:
|
1628
|
+
for (j = 0; j < RARRAY(rsearcher)->len; j++) {
|
1629
|
+
VALUE rs = RARRAY(rsearcher)->ptr[j];
|
1630
|
+
Data_Get_Struct(rs, Searcher, s);
|
1631
|
+
ary_append(searchers, s);
|
1632
|
+
}
|
1633
|
+
break;
|
1634
|
+
case T_DATA:
|
1635
|
+
Data_Get_Struct(rsearcher, Searcher, s);
|
1636
|
+
ary_append(searchers, s);
|
1637
|
+
break;
|
1638
|
+
default:
|
1639
|
+
rb_raise(rb_eArgError, "Can't add class %s to MultiSearcher",
|
1640
|
+
rb_obj_classname(rsearcher));
|
1641
|
+
break;
|
1642
|
+
}
|
1643
|
+
}
|
1644
|
+
s = msea_create((Searcher **)searchers->elems, searchers->size, false);
|
1645
|
+
free(searchers); /* only free the Array, not the elems array holding the searchers */
|
1646
|
+
Frt_Wrap_Struct(self, &frt_ms_mark, &frt_ms_free, s);
|
1647
|
+
object_add(s, self);
|
1648
|
+
return self;
|
1649
|
+
}
|
1546
1650
|
|
1651
|
+
/****************************************************************************
|
1652
|
+
*
|
1653
|
+
* Index Methods
|
1654
|
+
*
|
1655
|
+
****************************************************************************/
|
1656
|
+
|
1657
|
+
/*
|
1547
1658
|
static void
|
1548
1659
|
frt_ind_free_store_i(Index *self)
|
1549
1660
|
{
|
1550
1661
|
VALUE rval;
|
1551
1662
|
if (self->close_store && (Qnil != (rval = object_get(self->store)))) {
|
1552
|
-
|
1663
|
+
// user passed close_dir option so unwrap it
|
1553
1664
|
Frt_Unwrap_Struct(rval);
|
1554
1665
|
object_del(self->store);
|
1555
1666
|
}
|
1556
1667
|
}
|
1668
|
+
*/
|
1557
1669
|
|
1558
1670
|
static void
|
1559
1671
|
frt_ind_free(void *p)
|
1560
1672
|
{
|
1561
|
-
Index *
|
1562
|
-
|
1563
|
-
|
1564
|
-
index_destroy(self);
|
1673
|
+
Index *ind = (Index *)p;
|
1674
|
+
object_del(ind);
|
1675
|
+
index_destroy(ind);
|
1565
1676
|
}
|
1566
1677
|
|
1567
1678
|
static void
|
1568
1679
|
frt_ind_mark(void *p)
|
1569
1680
|
{
|
1570
|
-
Index *
|
1571
|
-
frt_gc_mark(
|
1572
|
-
frt_gc_mark(
|
1681
|
+
Index *ind = (Index *)p;
|
1682
|
+
frt_gc_mark(ind->store);
|
1683
|
+
frt_gc_mark(ind->analyzer);
|
1573
1684
|
}
|
1574
1685
|
|
1575
1686
|
static VALUE
|
@@ -1582,14 +1693,13 @@ frt_ind_init(int argc, VALUE *argv, VALUE self)
|
|
1582
1693
|
Store *store = NULL;
|
1583
1694
|
Analyzer *analyzer = NULL;
|
1584
1695
|
bool create = false;
|
1585
|
-
bool close_store = false;
|
1586
1696
|
HashSet *def_fields = NULL;
|
1587
1697
|
|
1588
1698
|
if (Qnil != (rval = rb_hash_aref(roptions, rpath_key))) {
|
1589
1699
|
rval = rb_obj_as_string(rval);
|
1590
1700
|
/* TODO: create the directory if it is missing */
|
1591
1701
|
store = open_fs_store(RSTRING(rval)->ptr);
|
1592
|
-
|
1702
|
+
deref(store);
|
1593
1703
|
} else if (Qnil != (rval = rb_hash_aref(roptions, rdir_key))) {
|
1594
1704
|
Data_Get_Struct(rval, Store, store);
|
1595
1705
|
}
|
@@ -1611,19 +1721,21 @@ frt_ind_init(int argc, VALUE *argv, VALUE self)
|
|
1611
1721
|
}
|
1612
1722
|
|
1613
1723
|
if (Qnil != (rval = rb_hash_aref(roptions, ranalyzer_key))) {
|
1614
|
-
|
1724
|
+
analyzer = frt_get_cwrapped_analyzer(rval);
|
1615
1725
|
}
|
1616
1726
|
if (Qnil != (rval = rb_hash_aref(roptions, rdefault_search_field_key))) {
|
1617
1727
|
def_fields = frt_get_fields(rval);
|
1618
1728
|
}
|
1619
1729
|
if (Qnil != (rval = rb_hash_aref(roptions, rclose_dir_key))) {
|
1620
|
-
|
1730
|
+
/* No need to do anything here. Let the GC do the work.
|
1731
|
+
* if (RTEST(rval) && !close_store) close_store = true;
|
1732
|
+
*/
|
1621
1733
|
}
|
1622
1734
|
if (Qnil != (rval = rb_hash_aref(roptions, rdefault_field_key))) {
|
1623
1735
|
if (!def_fields) def_fields = frt_get_fields(rval);
|
1624
1736
|
}
|
1625
1737
|
ind = index_create(store, analyzer, def_fields, create);
|
1626
|
-
if (
|
1738
|
+
if (analyzer) a_deref(analyzer);
|
1627
1739
|
|
1628
1740
|
/* QueryParser options */
|
1629
1741
|
if (Qnil != (rval = rb_hash_aref(roptions, rhandle_parse_errors_key))) {
|
@@ -1686,13 +1798,13 @@ frt_ind_init(int argc, VALUE *argv, VALUE self)
|
|
1686
1798
|
return self;
|
1687
1799
|
}
|
1688
1800
|
|
1689
|
-
#define GET_IND Index *ind
|
1801
|
+
#define GET_IND Index *ind = (Index *)DATA_PTR(self);\
|
1690
1802
|
if (!ind) rb_raise(rb_eStandardError, "Called method on closed Index object")
|
1691
1803
|
static VALUE
|
1692
1804
|
frt_ind_close(VALUE self)
|
1693
1805
|
{
|
1694
1806
|
GET_IND;
|
1695
|
-
frt_ind_free_store_i(ind);
|
1807
|
+
//frt_ind_free_store_i(ind);
|
1696
1808
|
Frt_Unwrap_Struct(self);
|
1697
1809
|
object_del(ind);
|
1698
1810
|
index_destroy(ind);
|
@@ -1778,9 +1890,9 @@ frt_ind_add_doc(int argc, VALUE *argv, VALUE self)
|
|
1778
1890
|
doc = frt_rdoc_to_doc(ind, rdoc, &close_doc);
|
1779
1891
|
|
1780
1892
|
if (argc == 2) {
|
1781
|
-
Analyzer *analyzer;
|
1782
|
-
Data_Get_Struct(ranalyzer, Analyzer, analyzer);
|
1893
|
+
Analyzer *analyzer = frt_get_cwrapped_analyzer(ranalyzer);
|
1783
1894
|
index_add_doc_a(ind, doc, analyzer);
|
1895
|
+
a_deref(analyzer);
|
1784
1896
|
} else {
|
1785
1897
|
index_add_doc(ind, doc);
|
1786
1898
|
}
|
@@ -1789,7 +1901,7 @@ frt_ind_add_doc(int argc, VALUE *argv, VALUE self)
|
|
1789
1901
|
}
|
1790
1902
|
|
1791
1903
|
static Query *
|
1792
|
-
|
1904
|
+
frt_ind_get_query_i(Index *ind, VALUE rquery)
|
1793
1905
|
{
|
1794
1906
|
Query *q = NULL;
|
1795
1907
|
|
@@ -1798,10 +1910,10 @@ frt_get_query_i(Index *ind, VALUE rquery, bool *destroy_query)
|
|
1798
1910
|
rquery = rb_obj_as_string(rquery);
|
1799
1911
|
case T_STRING:
|
1800
1912
|
q = index_get_query(ind, RSTRING(rquery)->ptr);
|
1801
|
-
*destroy_query = true;
|
1802
1913
|
break;
|
1803
1914
|
case T_DATA:
|
1804
1915
|
Data_Get_Struct(rquery, Query, q);
|
1916
|
+
ref(q);
|
1805
1917
|
break;
|
1806
1918
|
default:
|
1807
1919
|
rb_raise(rb_eArgError, "Can only handle a String or a Query.");
|
@@ -1816,14 +1928,13 @@ frt_ind_search(int argc, VALUE *argv, VALUE self)
|
|
1816
1928
|
{
|
1817
1929
|
Query *q;
|
1818
1930
|
VALUE rquery, roptions, rtd;
|
1819
|
-
bool destroy_query = false;
|
1820
1931
|
GET_IND;
|
1821
1932
|
rb_scan_args(argc, argv, "11", &rquery, &roptions);
|
1822
1933
|
ensure_searcher_open(ind);
|
1823
1934
|
|
1824
|
-
q =
|
1825
|
-
rtd = frt_get_td(
|
1826
|
-
|
1935
|
+
q = frt_ind_get_query_i(ind, rquery);
|
1936
|
+
rtd = frt_get_td(frt_sea_search_internal(q, roptions, ind->sea));
|
1937
|
+
q_deref(q);
|
1827
1938
|
|
1828
1939
|
return rtd;
|
1829
1940
|
}
|
@@ -1835,7 +1946,6 @@ frt_ind_search_each(int argc, VALUE *argv, VALUE self)
|
|
1835
1946
|
Query *q;
|
1836
1947
|
TopDocs *td;
|
1837
1948
|
VALUE rquery, roptions, rtotal_hits;
|
1838
|
-
bool destroy_query = false;
|
1839
1949
|
GET_IND;
|
1840
1950
|
|
1841
1951
|
|
@@ -1845,10 +1955,10 @@ frt_ind_search_each(int argc, VALUE *argv, VALUE self)
|
|
1845
1955
|
|
1846
1956
|
ensure_searcher_open(ind);
|
1847
1957
|
|
1848
|
-
q =
|
1958
|
+
q = frt_ind_get_query_i(ind, rquery);
|
1849
1959
|
//printf(">>>>>%s<<<<<\n", q->to_s(q, "file_name"));
|
1850
|
-
td =
|
1851
|
-
|
1960
|
+
td = frt_sea_search_internal(q, roptions, ind->sea);
|
1961
|
+
q_deref(q);
|
1852
1962
|
|
1853
1963
|
rtotal_hits = INT2FIX(td->total_hits);
|
1854
1964
|
|
@@ -2061,7 +2171,7 @@ struct QueryUpdateArg {
|
|
2061
2171
|
Index *ind;
|
2062
2172
|
};
|
2063
2173
|
|
2064
|
-
static void frt_ind_qupd_i(Searcher *sea, int doc_num, void *arg)
|
2174
|
+
static void frt_ind_qupd_i(Searcher *sea, int doc_num, float score, void *arg)
|
2065
2175
|
{
|
2066
2176
|
struct QueryUpdateArg *qua = (struct QueryUpdateArg *)arg;
|
2067
2177
|
Document *doc = sea->ir->get_doc(sea->ir, doc_num);
|
@@ -2073,22 +2183,19 @@ static void frt_ind_qupd_i(Searcher *sea, int doc_num, void *arg)
|
|
2073
2183
|
static VALUE
|
2074
2184
|
frt_ind_query_update(VALUE self, VALUE rquery, VALUE rdoc)
|
2075
2185
|
{
|
2076
|
-
GET_IND;
|
2077
|
-
|
2078
2186
|
int i;
|
2079
2187
|
Query *q;
|
2080
|
-
bool destroy_query = false;
|
2081
2188
|
struct QueryUpdateArg qua;
|
2082
|
-
|
2189
|
+
GET_IND;
|
2083
2190
|
|
2084
2191
|
ensure_searcher_open(ind);
|
2085
2192
|
qua.rdoc = rdoc;
|
2086
|
-
qua.docs = ary_create(8, &doc_destroy);
|
2193
|
+
qua.docs = ary_create(8, (free_ft)&doc_destroy);
|
2087
2194
|
qua.ind = ind;
|
2088
2195
|
|
2089
|
-
q =
|
2196
|
+
q = frt_ind_get_query_i(ind, rquery);
|
2090
2197
|
sea_search_each(ind->sea, q, NULL, &frt_ind_qupd_i, &qua);
|
2091
|
-
|
2198
|
+
q_deref(q);
|
2092
2199
|
|
2093
2200
|
for (i = 0; i < qua.docs->size; i++) {
|
2094
2201
|
index_add_doc(ind, qua.docs->elems[i]);
|
@@ -2206,37 +2313,37 @@ static VALUE
|
|
2206
2313
|
frt_ind_persist(int argc, VALUE *argv, VALUE self)
|
2207
2314
|
{
|
2208
2315
|
VALUE rdir, rcreate;
|
2209
|
-
bool create
|
2316
|
+
bool create;
|
2210
2317
|
Store *old_store;
|
2211
2318
|
GET_IND;
|
2212
2319
|
|
2213
2320
|
index_flush(ind);
|
2214
|
-
frt_ind_free_store_i(ind);
|
2321
|
+
//frt_ind_free_store_i(ind);
|
2215
2322
|
old_store = ind->store;
|
2216
|
-
close_store = ind->close_store;
|
2217
2323
|
|
2218
2324
|
rb_scan_args(argc, argv, "11", &rdir, &rcreate);
|
2219
2325
|
create = RTEST(rcreate);
|
2220
2326
|
|
2221
2327
|
if (T_DATA == TYPE(rdir)) {
|
2222
2328
|
Data_Get_Struct(rdir, Store, ind->store);
|
2329
|
+
ref(ind->store);
|
2223
2330
|
} else {
|
2224
2331
|
rdir = rb_obj_as_string(rdir);
|
2225
2332
|
ind->store = open_fs_store(RSTRING(rdir)->ptr);
|
2226
|
-
ind->close_store = true;
|
2227
2333
|
}
|
2228
2334
|
|
2229
2335
|
if (!create && !ind->store->exists(ind->store, "segments")) create = true;
|
2230
2336
|
|
2231
2337
|
if (create) {
|
2232
|
-
ind->iw = iw_open(ind->store,
|
2338
|
+
ind->iw = iw_open(ind->store, ind->analyzer, create);
|
2339
|
+
ref(ind->analyzer);
|
2233
2340
|
ind->iw->use_compound_file = ind->use_compound_file;
|
2234
2341
|
}
|
2235
2342
|
|
2236
2343
|
ensure_writer_open(ind);
|
2237
2344
|
iw_add_indexes(ind->iw, &old_store, 1);
|
2238
2345
|
|
2239
|
-
|
2346
|
+
store_deref(old_store);
|
2240
2347
|
|
2241
2348
|
index_auto_flush_iw(ind);
|
2242
2349
|
|
@@ -2246,11 +2353,12 @@ frt_ind_persist(int argc, VALUE *argv, VALUE self)
|
|
2246
2353
|
static VALUE
|
2247
2354
|
frt_ind_explain(VALUE self, VALUE rquery, VALUE rdoc_num)
|
2248
2355
|
{
|
2356
|
+
Query *q;
|
2357
|
+
Explanation *expl;
|
2249
2358
|
GET_IND;
|
2250
|
-
|
2251
|
-
|
2252
|
-
|
2253
|
-
if (destroy_query) q->destroy(q);
|
2359
|
+
q = frt_ind_get_query_i(ind, rquery);
|
2360
|
+
expl = index_explain(ind, q, FIX2INT(rdoc_num));
|
2361
|
+
q_deref(q);
|
2254
2362
|
return Data_Wrap_Struct(cExplanation, NULL, &expl_destoy, expl);
|
2255
2363
|
}
|
2256
2364
|
|
@@ -2498,6 +2606,7 @@ Init_search(void)
|
|
2498
2606
|
rb_define_method(cSortField, "name", frt_sf_get_name, 0);
|
2499
2607
|
rb_define_method(cSortField, "sort_type", frt_sf_get_sort_type, 0);
|
2500
2608
|
rb_define_method(cSortField, "comparator", frt_sf_get_comparator, 0);
|
2609
|
+
rb_define_method(cSortField, "to_s", frt_sf_to_s, 0);
|
2501
2610
|
|
2502
2611
|
/* SortType */
|
2503
2612
|
cSortType = rb_define_class_under(cSortField, "SortType", rb_cObject);
|
@@ -2536,27 +2645,36 @@ Init_search(void)
|
|
2536
2645
|
|
2537
2646
|
rb_define_method(cSort, "initialize", frt_sort_init, -1);
|
2538
2647
|
rb_define_method(cSort, "fields", frt_sort_get_fields, 0);
|
2648
|
+
rb_define_method(cSort, "to_s", frt_sort_to_s, 0);
|
2539
2649
|
|
2540
2650
|
rb_define_const(cSort, "RELEVANCE",
|
2541
2651
|
frt_sort_init(0, NULL, frt_sort_alloc(cSort)));
|
2542
2652
|
rb_define_const(cSort, "INDEX_ORDER",
|
2543
2653
|
frt_sort_init(1, &oSORT_FIELD_DOC, frt_sort_alloc(cSort)));
|
2544
2654
|
|
2655
|
+
/* Searcher */
|
2656
|
+
cSearcher = rb_define_class_under(mSearch, "Searcher", rb_cObject);
|
2657
|
+
rb_define_method(cSearcher, "close", frt_sea_close, 0);
|
2658
|
+
rb_define_method(cSearcher, "reader", frt_sea_get_reader, 0);
|
2659
|
+
rb_define_method(cSearcher, "doc_freq", frt_sea_doc_freq, 1);
|
2660
|
+
rb_define_method(cSearcher, "doc_freqs", frt_sea_doc_freqs, 1);
|
2661
|
+
rb_define_method(cSearcher, "doc", frt_sea_doc, 1);
|
2662
|
+
rb_define_method(cSearcher, "[]", frt_sea_doc, 1);
|
2663
|
+
rb_define_method(cSearcher, "max_doc", frt_sea_max_doc, 0);
|
2664
|
+
rb_define_method(cSearcher, "search", frt_sea_search, -1);
|
2665
|
+
rb_define_method(cSearcher, "search_each", frt_sea_search_each, 2);
|
2666
|
+
rb_define_method(cSearcher, "explain", frt_sea_explain, 2);
|
2667
|
+
|
2545
2668
|
/* IndexSearcher */
|
2546
|
-
cIndexSearcher = rb_define_class_under(mSearch, "IndexSearcher",
|
2669
|
+
cIndexSearcher = rb_define_class_under(mSearch, "IndexSearcher", cSearcher);
|
2547
2670
|
rb_define_alloc_func(cIndexSearcher, frt_data_alloc);
|
2548
|
-
|
2549
2671
|
rb_define_method(cIndexSearcher, "initialize", frt_is_init, 1);
|
2550
|
-
|
2551
|
-
|
2552
|
-
|
2553
|
-
|
2554
|
-
rb_define_method(
|
2555
|
-
|
2556
|
-
rb_define_method(cIndexSearcher, "max_doc", frt_is_max_doc, 0);
|
2557
|
-
rb_define_method(cIndexSearcher, "search", frt_is_search, -1);
|
2558
|
-
rb_define_method(cIndexSearcher, "search_each", frt_is_search_each, 2);
|
2559
|
-
rb_define_method(cIndexSearcher, "explain", frt_is_explain, 2);
|
2672
|
+
|
2673
|
+
/* MultiSearcher */
|
2674
|
+
cMultiSearcher = rb_define_class_under(mSearch, "MultiSearcher", cSearcher);
|
2675
|
+
rb_define_alloc_func(cMultiSearcher, frt_data_alloc);
|
2676
|
+
rb_define_method(cMultiSearcher, "initialize", frt_ms_init, -1);
|
2677
|
+
|
2560
2678
|
|
2561
2679
|
/* Index */
|
2562
2680
|
cIndex = rb_define_class_under(mIndex, "Index", rb_cObject);
|