ferret 0.10.4 → 0.10.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +1 -1
- data/ext/analysis.c +7 -1
- data/ext/bitvector.c +5 -2
- data/ext/bitvector.h +1 -0
- data/ext/ferret.c +55 -8
- data/ext/ferret.h +8 -2
- data/ext/index.c +34 -43
- data/ext/index.h +1 -1
- data/ext/q_boolean.c +1 -1
- data/ext/q_multi_term.c +13 -1
- data/ext/q_parser.c +33 -18
- data/ext/r_analysis.c +68 -45
- data/ext/r_index.c +64 -10
- data/ext/r_search.c +145 -10
- data/ext/search.c +71 -12
- data/lib/ferret/index.rb +42 -28
- data/lib/ferret_version.rb +1 -1
- data/test/unit/analysis/tc_analyzer.rb +1 -1
- data/test/unit/analysis/tc_token_stream.rb +0 -1
- data/test/unit/index/tc_index.rb +3 -3
- data/test/unit/index/tc_index_reader.rb +5 -0
- data/test/unit/search/tc_filter.rb +15 -0
- data/test/unit/search/tm_searcher.rb +13 -2
- metadata +2 -2
data/ext/r_search.c
CHANGED
@@ -36,12 +36,13 @@ static VALUE cSpanOrQuery;
|
|
36
36
|
static VALUE cSpanNotQuery;
|
37
37
|
|
38
38
|
/* Filters */
|
39
|
+
static ID id_bits;
|
39
40
|
static VALUE cFilter;
|
40
41
|
static VALUE cRangeFilter;
|
41
42
|
static VALUE cQueryFilter;
|
42
43
|
|
43
44
|
/* MultiTermQuery */
|
44
|
-
static
|
45
|
+
static ID id_default_max_terms;
|
45
46
|
static VALUE sym_max_terms;
|
46
47
|
static VALUE sym_min_score;
|
47
48
|
|
@@ -72,8 +73,8 @@ static VALUE sym_in_order;
|
|
72
73
|
static VALUE sym_clauses;
|
73
74
|
|
74
75
|
/* Class variable ids */
|
75
|
-
static
|
76
|
-
static
|
76
|
+
static ID id_default_min_similarity;
|
77
|
+
static ID id_default_prefix_length;
|
77
78
|
|
78
79
|
|
79
80
|
/** Sort **/
|
@@ -93,6 +94,15 @@ static VALUE sym_type;
|
|
93
94
|
static VALUE sym_reverse;
|
94
95
|
static VALUE sym_comparator;
|
95
96
|
|
97
|
+
/* Hits */
|
98
|
+
static ID id_doc;
|
99
|
+
static ID id_score;
|
100
|
+
|
101
|
+
/* TopDocs */
|
102
|
+
static ID id_hits;
|
103
|
+
static ID id_total_hits;
|
104
|
+
static ID id_max_score;
|
105
|
+
|
96
106
|
/* Search */
|
97
107
|
static VALUE sym_offset;
|
98
108
|
static VALUE sym_limit;
|
@@ -113,7 +123,6 @@ extern void frt_ir_mark(void *p);
|
|
113
123
|
|
114
124
|
|
115
125
|
extern void frt_set_term(VALUE rterm, Term *t);
|
116
|
-
extern Term *frt_get_term(VALUE rterm);
|
117
126
|
extern VALUE frt_get_analyzer(Analyzer *a);
|
118
127
|
extern HashSet *frt_get_fields(VALUE rfields);
|
119
128
|
extern Analyzer *frt_get_cwrapped_analyzer(VALUE ranalyzer);
|
@@ -161,6 +170,35 @@ frt_get_td(TopDocs *td)
|
|
161
170
|
return rtop_docs;
|
162
171
|
}
|
163
172
|
|
173
|
+
static VALUE
|
174
|
+
frt_td_to_s(VALUE self)
|
175
|
+
{
|
176
|
+
int i;
|
177
|
+
VALUE rhits = rb_funcall(self, id_hits, 0);
|
178
|
+
const int len = RARRAY(rhits)->len;
|
179
|
+
char *str = ALLOC_N(char, len * 64 + 100);
|
180
|
+
char *s = str;
|
181
|
+
VALUE rstr;
|
182
|
+
|
183
|
+
sprintf(s, "TopDocs: totalhits = %d, max_score = %f [\n",
|
184
|
+
FIX2INT(rb_funcall(self, id_total_hits, 0)),
|
185
|
+
NUM2DBL(rb_funcall(self, id_max_score, 0)));
|
186
|
+
s += strlen(s);
|
187
|
+
|
188
|
+
for (i = 0; i < len; i++) {
|
189
|
+
VALUE rhit = RARRAY(rhits)->ptr[i];
|
190
|
+
sprintf(s, "\t%d: %f\n",
|
191
|
+
FIX2INT(rb_funcall(rhit, id_doc, 0)),
|
192
|
+
NUM2DBL(rb_funcall(rhit, id_score, 0)));
|
193
|
+
s += strlen(s);
|
194
|
+
}
|
195
|
+
|
196
|
+
sprintf(s, "]\n");
|
197
|
+
rstr = rb_str_new2(str);
|
198
|
+
free(str);
|
199
|
+
return rstr;
|
200
|
+
}
|
201
|
+
|
164
202
|
/****************************************************************************
|
165
203
|
*
|
166
204
|
* Explanation Methods
|
@@ -319,6 +357,34 @@ frt_q_eql(VALUE self, VALUE other)
|
|
319
357
|
return q->eq(q, oq) ? Qtrue : Qfalse;
|
320
358
|
}
|
321
359
|
|
360
|
+
/*
|
361
|
+
* call-seq:
|
362
|
+
* query.terms(searcher) -> term_array
|
363
|
+
*
|
364
|
+
* Returns an array of terms searched for by this query. This can be used for
|
365
|
+
* implementing an external query highlighter for example. You must supply a
|
366
|
+
* searcher so that the query can be rewritten and optimized like it would be
|
367
|
+
* in a real search.
|
368
|
+
*/
|
369
|
+
static VALUE
|
370
|
+
frt_q_get_terms(VALUE self, VALUE searcher)
|
371
|
+
{
|
372
|
+
int i;
|
373
|
+
VALUE rterms = rb_ary_new();
|
374
|
+
HashSet *terms = term_set_new();
|
375
|
+
GET_Q();
|
376
|
+
Searcher *sea = (Searcher *)DATA_PTR(searcher);
|
377
|
+
Query *rq = sea->rewrite(sea, q);
|
378
|
+
rq->extract_terms(rq, terms);
|
379
|
+
q_deref(rq);
|
380
|
+
for (i = 0; i < terms->size; i++) {
|
381
|
+
Term *term = (Term *)terms->elems[i];
|
382
|
+
rb_ary_push(rterms, frt_get_term(term->field, term->text));
|
383
|
+
}
|
384
|
+
hs_destroy(terms);
|
385
|
+
return rterms;
|
386
|
+
}
|
387
|
+
|
322
388
|
#define MK_QUERY(klass, q) Data_Wrap_Struct(klass, NULL, &frt_q_free, q)
|
323
389
|
VALUE
|
324
390
|
frt_get_q(Query *q)
|
@@ -2130,6 +2196,53 @@ call_filter_proc(int doc_id, float score, Searcher *self)
|
|
2130
2196
|
object_get(self)));
|
2131
2197
|
}
|
2132
2198
|
|
2199
|
+
typedef struct CWrappedFilter
|
2200
|
+
{
|
2201
|
+
Filter super;
|
2202
|
+
VALUE rfilter;
|
2203
|
+
} CWrappedFilter;
|
2204
|
+
#define CWF(filt) ((CWrappedFilter *)(filt))
|
2205
|
+
|
2206
|
+
static ulong
|
2207
|
+
cwfilt_hash(Filter *filt)
|
2208
|
+
{
|
2209
|
+
return NUM2ULONG(rb_funcall(CWF(filt)->rfilter, id_hash, 0));
|
2210
|
+
}
|
2211
|
+
|
2212
|
+
static int
|
2213
|
+
cwfilt_eq(Filter *filt, Filter *o)
|
2214
|
+
{
|
2215
|
+
return RTEST(rb_funcall(CWF(filt)->rfilter, id_eql, 1, CWF(o)->rfilter));
|
2216
|
+
}
|
2217
|
+
|
2218
|
+
static BitVector *
|
2219
|
+
cwfilt_get_bv_i(Filter *filt, IndexReader *ir)
|
2220
|
+
{
|
2221
|
+
VALUE rbv = rb_funcall(CWF(filt)->rfilter, id_bits, 1, object_get(ir));
|
2222
|
+
BitVector *bv;
|
2223
|
+
Data_Get_Struct(rbv, BitVector, bv);
|
2224
|
+
REF(bv);
|
2225
|
+
return bv;
|
2226
|
+
}
|
2227
|
+
|
2228
|
+
Filter *
|
2229
|
+
frt_get_cwrapped_filter(VALUE rval)
|
2230
|
+
{
|
2231
|
+
Filter *filter;
|
2232
|
+
if (frt_is_cclass(rval) && DATA_PTR(rval)) {
|
2233
|
+
Data_Get_Struct(rval, Filter, filter);
|
2234
|
+
REF(filter);
|
2235
|
+
}
|
2236
|
+
else {
|
2237
|
+
filter = filt_create(sizeof(CWrappedFilter), "CWrappedFilter");
|
2238
|
+
filter->hash = &cwfilt_hash;
|
2239
|
+
filter->eq = &cwfilt_eq;
|
2240
|
+
filter->get_bv_i = &cwfilt_get_bv_i;
|
2241
|
+
CWF(filter)->rfilter = rval;
|
2242
|
+
}
|
2243
|
+
return filter;
|
2244
|
+
}
|
2245
|
+
|
2133
2246
|
static TopDocs *
|
2134
2247
|
frt_sea_search_internal(Query *query, VALUE roptions, Searcher *sea)
|
2135
2248
|
{
|
@@ -2137,6 +2250,8 @@ frt_sea_search_internal(Query *query, VALUE roptions, Searcher *sea)
|
|
2137
2250
|
int offset = 0, limit = 10;
|
2138
2251
|
Filter *filter = NULL;
|
2139
2252
|
Sort *sort = NULL;
|
2253
|
+
TopDocs *td;
|
2254
|
+
|
2140
2255
|
filter_ft filter_func = NULL;
|
2141
2256
|
|
2142
2257
|
if (Qnil != roptions) {
|
@@ -2159,7 +2274,7 @@ frt_sea_search_internal(Query *query, VALUE roptions, Searcher *sea)
|
|
2159
2274
|
}
|
2160
2275
|
}
|
2161
2276
|
if (Qnil != (rval = rb_hash_aref(roptions, sym_filter))) {
|
2162
|
-
|
2277
|
+
filter = frt_get_cwrapped_filter(rval);
|
2163
2278
|
}
|
2164
2279
|
if (Qnil != (rval = rb_hash_aref(roptions, sym_filter_proc))) {
|
2165
2280
|
filter_func = &call_filter_proc;
|
@@ -2173,7 +2288,9 @@ frt_sea_search_internal(Query *query, VALUE roptions, Searcher *sea)
|
|
2173
2288
|
}
|
2174
2289
|
}
|
2175
2290
|
|
2176
|
-
|
2291
|
+
td = sea->search(sea, query, offset, limit, filter, sort, filter_func, 0);
|
2292
|
+
if (filter) filt_deref(filter);
|
2293
|
+
return td;
|
2177
2294
|
}
|
2178
2295
|
|
2179
2296
|
/*
|
@@ -2317,7 +2434,8 @@ frt_sea_explain(VALUE self, VALUE rquery, VALUE rdoc_id)
|
|
2317
2434
|
* === Options
|
2318
2435
|
*
|
2319
2436
|
* :excerpt_length:: Default: 150. Length of excerpt to show. Highlighted
|
2320
|
-
* terms will be in the centre of the excerpt.
|
2437
|
+
* terms will be in the centre of the excerpt. Set to
|
2438
|
+
* :all to highlight the entire field.
|
2321
2439
|
* :num_excerpts:: Default: 2. Number of excerpts to return.
|
2322
2440
|
* :pre_tag:: Default: "<b>". Tag to place to the left of the match.
|
2323
2441
|
* You'll probably want to change this to a "<span>" tag
|
@@ -2344,12 +2462,18 @@ frt_sea_highlight(int argc, VALUE *argv, VALUE self)
|
|
2344
2462
|
|
2345
2463
|
rb_scan_args(argc, argv, "31", &rquery, &rdoc_id, &rfield, &roptions);
|
2346
2464
|
Data_Get_Struct(rquery, Query, query);
|
2347
|
-
if (Qnil != (v = rb_hash_aref(roptions, sym_excerpt_length))) {
|
2348
|
-
excerpt_length = FIX2INT(v);
|
2349
|
-
}
|
2350
2465
|
if (Qnil != (v = rb_hash_aref(roptions, sym_num_excerpts))) {
|
2351
2466
|
num_excerpts = FIX2INT(v);
|
2352
2467
|
}
|
2468
|
+
if (Qnil != (v = rb_hash_aref(roptions, sym_excerpt_length))) {
|
2469
|
+
if (v == sym_all) {
|
2470
|
+
num_excerpts = 1;
|
2471
|
+
excerpt_length = INT_MAX/2;
|
2472
|
+
}
|
2473
|
+
else {
|
2474
|
+
excerpt_length = FIX2INT(v);
|
2475
|
+
}
|
2476
|
+
}
|
2353
2477
|
if (Qnil != (v = rb_hash_aref(roptions, sym_pre_tag))) {
|
2354
2478
|
pre_tag = RSTRING(rb_obj_as_string(v))->ptr;
|
2355
2479
|
}
|
@@ -2539,6 +2663,8 @@ Init_Hit(void)
|
|
2539
2663
|
cHit = rb_struct_define(hit_class, "doc", "score", NULL);
|
2540
2664
|
rb_set_class_path(cHit, mSearch, hit_class);
|
2541
2665
|
rb_const_set(mSearch, rb_intern(hit_class), cHit);
|
2666
|
+
id_doc = rb_intern("doc");
|
2667
|
+
id_score = rb_intern("score");
|
2542
2668
|
}
|
2543
2669
|
|
2544
2670
|
/*
|
@@ -2570,6 +2696,10 @@ Init_TopDocs(void)
|
|
2570
2696
|
NULL);
|
2571
2697
|
rb_set_class_path(cTopDocs, mSearch, td_class);
|
2572
2698
|
rb_const_set(mSearch, rb_intern(td_class), cTopDocs);
|
2699
|
+
rb_define_method(cTopDocs, "to_s", frt_td_to_s, 0);
|
2700
|
+
id_hits = rb_intern("hits");
|
2701
|
+
id_total_hits = rb_intern("total_hits");
|
2702
|
+
id_max_score = rb_intern("max_score");
|
2573
2703
|
}
|
2574
2704
|
|
2575
2705
|
/*
|
@@ -2646,6 +2776,7 @@ Init_Query(void)
|
|
2646
2776
|
rb_define_method(cQuery, "eql?", frt_q_eql, 1);
|
2647
2777
|
rb_define_method(cQuery, "==", frt_q_eql, 1);
|
2648
2778
|
rb_define_method(cQuery, "hash", frt_q_hash, 0);
|
2779
|
+
rb_define_method(cQuery, "terms", frt_q_get_terms, 1);
|
2649
2780
|
}
|
2650
2781
|
|
2651
2782
|
/*
|
@@ -3326,6 +3457,7 @@ static void
|
|
3326
3457
|
Init_RangeFilter(void)
|
3327
3458
|
{
|
3328
3459
|
cRangeFilter = rb_define_class_under(mSearch, "RangeFilter", cFilter);
|
3460
|
+
frt_mark_cclass(cRangeFilter);
|
3329
3461
|
rb_define_alloc_func(cRangeFilter, frt_data_alloc);
|
3330
3462
|
|
3331
3463
|
rb_define_method(cRangeFilter, "initialize", frt_rf_init, 2);
|
@@ -3360,6 +3492,7 @@ static void
|
|
3360
3492
|
Init_QueryFilter(void)
|
3361
3493
|
{
|
3362
3494
|
cQueryFilter = rb_define_class_under(mSearch, "QueryFilter", cFilter);
|
3495
|
+
frt_mark_cclass(cQueryFilter);
|
3363
3496
|
rb_define_alloc_func(cQueryFilter, frt_data_alloc);
|
3364
3497
|
|
3365
3498
|
rb_define_method(cQueryFilter, "initialize", frt_qf_init, 1);
|
@@ -3383,7 +3516,9 @@ Init_QueryFilter(void)
|
|
3383
3516
|
static void
|
3384
3517
|
Init_Filter(void)
|
3385
3518
|
{
|
3519
|
+
id_bits = rb_intern("bits");
|
3386
3520
|
cFilter = rb_define_class_under(mSearch, "Filter", rb_cObject);
|
3521
|
+
frt_mark_cclass(cFilter);
|
3387
3522
|
rb_define_alloc_func(cConstantScoreQuery, frt_data_alloc);
|
3388
3523
|
|
3389
3524
|
rb_define_method(cFilter, "to_s", frt_f_to_s, 0);
|
data/ext/search.c
CHANGED
@@ -741,13 +741,17 @@ static char *excerpt_get_str(Excerpt *e, MatchVector *mv,
|
|
741
741
|
for (i = e->start; i <= e->end; i++) {
|
742
742
|
MatchRange *mr = mv->matches + i;
|
743
743
|
len = mr->start_offset - last_offset;
|
744
|
-
if (len)
|
745
|
-
|
744
|
+
if (len) {
|
745
|
+
lazy_df_get_bytes(lazy_df, e_ptr, last_offset, len);
|
746
|
+
e_ptr += len;
|
747
|
+
}
|
746
748
|
memcpy(e_ptr, pre_tag, pre_tag_len);
|
747
749
|
e_ptr += pre_tag_len;
|
748
750
|
len = mr->end_offset - mr->start_offset;
|
749
|
-
if (len)
|
750
|
-
|
751
|
+
if (len) {
|
752
|
+
lazy_df_get_bytes(lazy_df, e_ptr, mr->start_offset, len);
|
753
|
+
e_ptr += len;
|
754
|
+
}
|
751
755
|
memcpy(e_ptr, post_tag, post_tag_len);
|
752
756
|
e_ptr += post_tag_len;
|
753
757
|
last_offset = mr->end_offset;
|
@@ -757,8 +761,10 @@ static char *excerpt_get_str(Excerpt *e, MatchVector *mv,
|
|
757
761
|
e->end_offset = lazy_df->len;
|
758
762
|
}
|
759
763
|
len = e->end_offset - last_offset;
|
760
|
-
if (len)
|
761
|
-
|
764
|
+
if (len) {
|
765
|
+
lazy_df_get_bytes(lazy_df, e_ptr, last_offset, len);
|
766
|
+
e_ptr += len;
|
767
|
+
}
|
762
768
|
if (e->end_offset < lazy_df->len) {
|
763
769
|
memcpy(e_ptr, ellipsis, ellipsis_len);
|
764
770
|
e_ptr += ellipsis_len;
|
@@ -767,6 +773,54 @@ static char *excerpt_get_str(Excerpt *e, MatchVector *mv,
|
|
767
773
|
return excerpt_str;
|
768
774
|
}
|
769
775
|
|
776
|
+
static char *highlight_field(MatchVector *mv,
|
777
|
+
LazyDocField *lazy_df,
|
778
|
+
TermVector *tv,
|
779
|
+
const char *pre_tag,
|
780
|
+
const char *post_tag)
|
781
|
+
{
|
782
|
+
const int pre_len = (int)strlen(pre_tag);
|
783
|
+
const int post_len = (int)strlen(post_tag);
|
784
|
+
char *excerpt_str =
|
785
|
+
ALLOC_N(char, 10 + lazy_df->len + (mv->size * (pre_len + post_len)));
|
786
|
+
if (mv->size > 0) {
|
787
|
+
int last_offset = 0;
|
788
|
+
int i, len;
|
789
|
+
char *e_ptr = excerpt_str;
|
790
|
+
matchv_compact_with_breaks(mv);
|
791
|
+
matchv_set_offsets(mv, tv->offsets);
|
792
|
+
for (i = 0; i < mv->size; i++) {
|
793
|
+
MatchRange *mr = mv->matches + i;
|
794
|
+
len = mr->start_offset - last_offset;
|
795
|
+
if (len) {
|
796
|
+
lazy_df_get_bytes(lazy_df, e_ptr, last_offset, len);
|
797
|
+
e_ptr += len;
|
798
|
+
}
|
799
|
+
memcpy(e_ptr, pre_tag, pre_len);
|
800
|
+
e_ptr += pre_len;
|
801
|
+
len = mr->end_offset - mr->start_offset;
|
802
|
+
if (len) {
|
803
|
+
lazy_df_get_bytes(lazy_df, e_ptr, mr->start_offset, len);
|
804
|
+
e_ptr += len;
|
805
|
+
}
|
806
|
+
memcpy(e_ptr, post_tag, post_len);
|
807
|
+
e_ptr += post_len;
|
808
|
+
last_offset = mr->end_offset;
|
809
|
+
}
|
810
|
+
len = lazy_df->len - last_offset;
|
811
|
+
if (len) {
|
812
|
+
lazy_df_get_bytes(lazy_df, e_ptr, last_offset, len);
|
813
|
+
e_ptr += len;
|
814
|
+
}
|
815
|
+
*e_ptr = '\0';
|
816
|
+
}
|
817
|
+
else {
|
818
|
+
lazy_df_get_bytes(lazy_df, excerpt_str, 0, lazy_df->len);
|
819
|
+
excerpt_str[lazy_df->len] = '\0';
|
820
|
+
}
|
821
|
+
return excerpt_str;
|
822
|
+
}
|
823
|
+
|
770
824
|
char **searcher_highlight(Searcher *self,
|
771
825
|
Query *query,
|
772
826
|
const int doc_num,
|
@@ -789,7 +843,12 @@ char **searcher_highlight(Searcher *self,
|
|
789
843
|
MatchVector *mv;
|
790
844
|
query = self->rewrite(self, query);
|
791
845
|
mv = query->get_matchv_i(query, matchv_new(), tv);
|
792
|
-
if (
|
846
|
+
if (lazy_df->len < (excerpt_len * num_excerpts)) {
|
847
|
+
excerpt_strs = ary_new_type_capa(char *, 1);
|
848
|
+
ary_push(excerpt_strs,
|
849
|
+
highlight_field(mv, lazy_df, tv, pre_tag, post_tag));
|
850
|
+
}
|
851
|
+
else if (mv->size > 0) {
|
793
852
|
Excerpt **excerpts = ALLOC_AND_ZERO_N(Excerpt *, num_excerpts);
|
794
853
|
int e_start, e_end, i, j;
|
795
854
|
MatchRange *matches = mv->matches;
|
@@ -802,12 +861,12 @@ char **searcher_highlight(Searcher *self,
|
|
802
861
|
excerpt_pq = pq_new(mv->size, (lt_ft)&excerpt_lt, &free);
|
803
862
|
/* add all possible excerpts to the priority queue */
|
804
863
|
|
805
|
-
for (e_start =
|
864
|
+
for (e_start = e_end = 0; e_start < mv->size; e_start++) {
|
806
865
|
const int start_offset = matches[e_start].start_offset;
|
807
|
-
if (e_start
|
808
|
-
|
866
|
+
if (e_start > e_end) {
|
867
|
+
running_score = 0.0;
|
868
|
+
e_end = e_start;
|
809
869
|
}
|
810
|
-
running_score += matches[e_start].score;
|
811
870
|
while (e_end < mv->size && (matches[e_end].end_offset
|
812
871
|
<= start_offset + excerpt_len)) {
|
813
872
|
running_score += matches[e_end].score;
|
@@ -883,8 +942,8 @@ char **searcher_highlight(Searcher *self,
|
|
883
942
|
}
|
884
943
|
free(excerpts);
|
885
944
|
pq_destroy(excerpt_pq);
|
886
|
-
matchv_destroy(mv);
|
887
945
|
}
|
946
|
+
matchv_destroy(mv);
|
888
947
|
q_deref(query);
|
889
948
|
}
|
890
949
|
if (tv) tv_destroy(tv);
|
data/lib/ferret/index.rb
CHANGED
@@ -152,22 +152,23 @@ module Ferret::Index
|
|
152
152
|
# you want to highlight multiple fields then you will
|
153
153
|
# need to call this method multiple times.
|
154
154
|
# excerpt_length:: Default: 150. Length of excerpt to show. Highlighted
|
155
|
-
# terms will be in the centre of the excerpt.
|
155
|
+
# terms will be in the centre of the excerpt. Set to
|
156
|
+
# :all to highlight the entire field.
|
156
157
|
# num_excerpts:: Default: 2. Number of excerpts to return.
|
157
158
|
# pre_tag:: Default: "<b>". Tag to place to the left of the
|
158
159
|
# match. You'll probably want to change this to a
|
159
|
-
# "<span>" tag with a class "\033[
|
160
|
+
# "<span>" tag with a class "\033[36m" for use in a
|
160
161
|
# terminal.
|
161
162
|
# post_tag:: Default: "</b>". This tag should close the
|
162
163
|
# +:pre_tag+. Try tag "\033[m" in the terminal.
|
163
164
|
# ellipsis:: Default: "...". This is the string that is appended
|
164
165
|
# at the beginning and end of excerpts (unless the
|
165
|
-
# excerpt hits the start or end of the field.
|
166
|
-
#
|
167
|
-
#
|
166
|
+
# excerpt hits the start or end of the field.
|
167
|
+
# Alternatively you may want to use the HTML entity
|
168
|
+
# … or the UTF-8 string "\342\200\246".
|
168
169
|
def highlight(query, doc_id, options = {})
|
169
170
|
ensure_searcher_open()
|
170
|
-
@searcher.highlight(
|
171
|
+
@searcher.highlight(do_process_query(query),
|
171
172
|
doc_id,
|
172
173
|
options[:field]||@options[:default_field],
|
173
174
|
options)
|
@@ -346,7 +347,7 @@ module Ferret::Index
|
|
346
347
|
def search_each(query, options = {}) # :yield: doc, score
|
347
348
|
@dir.synchronize do
|
348
349
|
ensure_searcher_open()
|
349
|
-
query =
|
350
|
+
query = do_process_query(query)
|
350
351
|
|
351
352
|
@searcher.search_each(query, options) do |doc, score|
|
352
353
|
yield doc, score
|
@@ -359,20 +360,16 @@ module Ferret::Index
|
|
359
360
|
#
|
360
361
|
# id:: The number of the document to retrieve, or the term used as the :id
|
361
362
|
# for the document we wish to retrieve
|
362
|
-
def doc(
|
363
|
+
def doc(*args)
|
363
364
|
@dir.synchronize do
|
364
365
|
ensure_reader_open()
|
366
|
+
id = args[0]
|
365
367
|
if id.kind_of?(String) or id.kind_of?(Symbol)
|
366
368
|
term_doc_enum = @reader.term_docs_for(@id_field, id.to_s)
|
367
|
-
|
368
|
-
end
|
369
|
-
return @reader[id] if id.is_a? Integer
|
370
|
-
if id
|
371
|
-
raise(ArgumentError, "key to Index to access a document must be " +
|
372
|
-
"an Integer or a String")
|
369
|
+
return term_doc_enum.next? ? @reader[term_doc_enum.doc] : nil
|
373
370
|
end
|
371
|
+
return @reader[*args]
|
374
372
|
end
|
375
|
-
return nil
|
376
373
|
end
|
377
374
|
alias :[] :doc
|
378
375
|
|
@@ -405,7 +402,7 @@ module Ferret::Index
|
|
405
402
|
def query_delete(query)
|
406
403
|
@dir.synchronize do
|
407
404
|
ensure_searcher_open()
|
408
|
-
query =
|
405
|
+
query = do_process_query(query)
|
409
406
|
@searcher.search_each(query) do |doc, score|
|
410
407
|
@reader.delete(doc)
|
411
408
|
end
|
@@ -470,7 +467,7 @@ module Ferret::Index
|
|
470
467
|
@dir.synchronize do
|
471
468
|
ensure_searcher_open()
|
472
469
|
docs_to_add = []
|
473
|
-
query =
|
470
|
+
query = do_process_query(query)
|
474
471
|
@searcher.search_each(query) do |id, score|
|
475
472
|
document = @searcher[id].load
|
476
473
|
if new_val.is_a?(Hash)
|
@@ -609,9 +606,9 @@ module Ferret::Index
|
|
609
606
|
# Computing an explanation is as expensive as executing the query over the
|
610
607
|
# entire index.
|
611
608
|
def explain(query, doc)
|
612
|
-
synchronize do
|
609
|
+
@dir.synchronize do
|
613
610
|
ensure_searcher_open()
|
614
|
-
query =
|
611
|
+
query = do_process_query(query)
|
615
612
|
|
616
613
|
return @searcher.explain(query, doc)
|
617
614
|
end
|
@@ -619,17 +616,22 @@ module Ferret::Index
|
|
619
616
|
|
620
617
|
# Turn a query string into a Query object with the Index's QueryParser
|
621
618
|
def process_query(query)
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
end
|
626
|
-
# we need to set this ever time, in case a new field has been added
|
627
|
-
@qp.fields = @reader.field_names
|
628
|
-
query = @qp.parse(query)
|
619
|
+
@dir.synchronize do
|
620
|
+
ensure_searcher_open()
|
621
|
+
return do_process_query(query)
|
629
622
|
end
|
630
|
-
return query
|
631
623
|
end
|
632
624
|
|
625
|
+
# Returns the field_infos object so that you can add new fields to the
|
626
|
+
# index.
|
627
|
+
def field_infos
|
628
|
+
@dir.synchronize do
|
629
|
+
ensure_writer_open()
|
630
|
+
return @writer.field_infos
|
631
|
+
end
|
632
|
+
end
|
633
|
+
|
634
|
+
|
633
635
|
protected
|
634
636
|
def ensure_writer_open()
|
635
637
|
raise "tried to use a closed index" if not @open
|
@@ -676,9 +678,21 @@ module Ferret::Index
|
|
676
678
|
end
|
677
679
|
|
678
680
|
private
|
681
|
+
def do_process_query(query)
|
682
|
+
if query.is_a?(String)
|
683
|
+
if @qp.nil?
|
684
|
+
@qp = Ferret::QueryParser.new(@options)
|
685
|
+
end
|
686
|
+
# we need to set this ever time, in case a new field has been added
|
687
|
+
@qp.fields = @reader.field_names
|
688
|
+
query = @qp.parse(query)
|
689
|
+
end
|
690
|
+
return query
|
691
|
+
end
|
692
|
+
|
679
693
|
def do_search(query, options)
|
680
694
|
ensure_searcher_open()
|
681
|
-
query =
|
695
|
+
query = do_process_query(query)
|
682
696
|
|
683
697
|
return @searcher.search(query, options)
|
684
698
|
end
|