ferret 0.10.4 → 0.10.5
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +1 -1
- data/ext/analysis.c +7 -1
- data/ext/bitvector.c +5 -2
- data/ext/bitvector.h +1 -0
- data/ext/ferret.c +55 -8
- data/ext/ferret.h +8 -2
- data/ext/index.c +34 -43
- data/ext/index.h +1 -1
- data/ext/q_boolean.c +1 -1
- data/ext/q_multi_term.c +13 -1
- data/ext/q_parser.c +33 -18
- data/ext/r_analysis.c +68 -45
- data/ext/r_index.c +64 -10
- data/ext/r_search.c +145 -10
- data/ext/search.c +71 -12
- data/lib/ferret/index.rb +42 -28
- data/lib/ferret_version.rb +1 -1
- data/test/unit/analysis/tc_analyzer.rb +1 -1
- data/test/unit/analysis/tc_token_stream.rb +0 -1
- data/test/unit/index/tc_index.rb +3 -3
- data/test/unit/index/tc_index_reader.rb +5 -0
- data/test/unit/search/tc_filter.rb +15 -0
- data/test/unit/search/tm_searcher.rb +13 -2
- metadata +2 -2
data/ext/r_search.c
CHANGED
@@ -36,12 +36,13 @@ static VALUE cSpanOrQuery;
|
|
36
36
|
static VALUE cSpanNotQuery;
|
37
37
|
|
38
38
|
/* Filters */
|
39
|
+
static ID id_bits;
|
39
40
|
static VALUE cFilter;
|
40
41
|
static VALUE cRangeFilter;
|
41
42
|
static VALUE cQueryFilter;
|
42
43
|
|
43
44
|
/* MultiTermQuery */
|
44
|
-
static
|
45
|
+
static ID id_default_max_terms;
|
45
46
|
static VALUE sym_max_terms;
|
46
47
|
static VALUE sym_min_score;
|
47
48
|
|
@@ -72,8 +73,8 @@ static VALUE sym_in_order;
|
|
72
73
|
static VALUE sym_clauses;
|
73
74
|
|
74
75
|
/* Class variable ids */
|
75
|
-
static
|
76
|
-
static
|
76
|
+
static ID id_default_min_similarity;
|
77
|
+
static ID id_default_prefix_length;
|
77
78
|
|
78
79
|
|
79
80
|
/** Sort **/
|
@@ -93,6 +94,15 @@ static VALUE sym_type;
|
|
93
94
|
static VALUE sym_reverse;
|
94
95
|
static VALUE sym_comparator;
|
95
96
|
|
97
|
+
/* Hits */
|
98
|
+
static ID id_doc;
|
99
|
+
static ID id_score;
|
100
|
+
|
101
|
+
/* TopDocs */
|
102
|
+
static ID id_hits;
|
103
|
+
static ID id_total_hits;
|
104
|
+
static ID id_max_score;
|
105
|
+
|
96
106
|
/* Search */
|
97
107
|
static VALUE sym_offset;
|
98
108
|
static VALUE sym_limit;
|
@@ -113,7 +123,6 @@ extern void frt_ir_mark(void *p);
|
|
113
123
|
|
114
124
|
|
115
125
|
extern void frt_set_term(VALUE rterm, Term *t);
|
116
|
-
extern Term *frt_get_term(VALUE rterm);
|
117
126
|
extern VALUE frt_get_analyzer(Analyzer *a);
|
118
127
|
extern HashSet *frt_get_fields(VALUE rfields);
|
119
128
|
extern Analyzer *frt_get_cwrapped_analyzer(VALUE ranalyzer);
|
@@ -161,6 +170,35 @@ frt_get_td(TopDocs *td)
|
|
161
170
|
return rtop_docs;
|
162
171
|
}
|
163
172
|
|
173
|
+
static VALUE
|
174
|
+
frt_td_to_s(VALUE self)
|
175
|
+
{
|
176
|
+
int i;
|
177
|
+
VALUE rhits = rb_funcall(self, id_hits, 0);
|
178
|
+
const int len = RARRAY(rhits)->len;
|
179
|
+
char *str = ALLOC_N(char, len * 64 + 100);
|
180
|
+
char *s = str;
|
181
|
+
VALUE rstr;
|
182
|
+
|
183
|
+
sprintf(s, "TopDocs: totalhits = %d, max_score = %f [\n",
|
184
|
+
FIX2INT(rb_funcall(self, id_total_hits, 0)),
|
185
|
+
NUM2DBL(rb_funcall(self, id_max_score, 0)));
|
186
|
+
s += strlen(s);
|
187
|
+
|
188
|
+
for (i = 0; i < len; i++) {
|
189
|
+
VALUE rhit = RARRAY(rhits)->ptr[i];
|
190
|
+
sprintf(s, "\t%d: %f\n",
|
191
|
+
FIX2INT(rb_funcall(rhit, id_doc, 0)),
|
192
|
+
NUM2DBL(rb_funcall(rhit, id_score, 0)));
|
193
|
+
s += strlen(s);
|
194
|
+
}
|
195
|
+
|
196
|
+
sprintf(s, "]\n");
|
197
|
+
rstr = rb_str_new2(str);
|
198
|
+
free(str);
|
199
|
+
return rstr;
|
200
|
+
}
|
201
|
+
|
164
202
|
/****************************************************************************
|
165
203
|
*
|
166
204
|
* Explanation Methods
|
@@ -319,6 +357,34 @@ frt_q_eql(VALUE self, VALUE other)
|
|
319
357
|
return q->eq(q, oq) ? Qtrue : Qfalse;
|
320
358
|
}
|
321
359
|
|
360
|
+
/*
|
361
|
+
* call-seq:
|
362
|
+
* query.terms(searcher) -> term_array
|
363
|
+
*
|
364
|
+
* Returns an array of terms searched for by this query. This can be used for
|
365
|
+
* implementing an external query highlighter for example. You must supply a
|
366
|
+
* searcher so that the query can be rewritten and optimized like it would be
|
367
|
+
* in a real search.
|
368
|
+
*/
|
369
|
+
static VALUE
|
370
|
+
frt_q_get_terms(VALUE self, VALUE searcher)
|
371
|
+
{
|
372
|
+
int i;
|
373
|
+
VALUE rterms = rb_ary_new();
|
374
|
+
HashSet *terms = term_set_new();
|
375
|
+
GET_Q();
|
376
|
+
Searcher *sea = (Searcher *)DATA_PTR(searcher);
|
377
|
+
Query *rq = sea->rewrite(sea, q);
|
378
|
+
rq->extract_terms(rq, terms);
|
379
|
+
q_deref(rq);
|
380
|
+
for (i = 0; i < terms->size; i++) {
|
381
|
+
Term *term = (Term *)terms->elems[i];
|
382
|
+
rb_ary_push(rterms, frt_get_term(term->field, term->text));
|
383
|
+
}
|
384
|
+
hs_destroy(terms);
|
385
|
+
return rterms;
|
386
|
+
}
|
387
|
+
|
322
388
|
#define MK_QUERY(klass, q) Data_Wrap_Struct(klass, NULL, &frt_q_free, q)
|
323
389
|
VALUE
|
324
390
|
frt_get_q(Query *q)
|
@@ -2130,6 +2196,53 @@ call_filter_proc(int doc_id, float score, Searcher *self)
|
|
2130
2196
|
object_get(self)));
|
2131
2197
|
}
|
2132
2198
|
|
2199
|
+
typedef struct CWrappedFilter
|
2200
|
+
{
|
2201
|
+
Filter super;
|
2202
|
+
VALUE rfilter;
|
2203
|
+
} CWrappedFilter;
|
2204
|
+
#define CWF(filt) ((CWrappedFilter *)(filt))
|
2205
|
+
|
2206
|
+
static ulong
|
2207
|
+
cwfilt_hash(Filter *filt)
|
2208
|
+
{
|
2209
|
+
return NUM2ULONG(rb_funcall(CWF(filt)->rfilter, id_hash, 0));
|
2210
|
+
}
|
2211
|
+
|
2212
|
+
static int
|
2213
|
+
cwfilt_eq(Filter *filt, Filter *o)
|
2214
|
+
{
|
2215
|
+
return RTEST(rb_funcall(CWF(filt)->rfilter, id_eql, 1, CWF(o)->rfilter));
|
2216
|
+
}
|
2217
|
+
|
2218
|
+
static BitVector *
|
2219
|
+
cwfilt_get_bv_i(Filter *filt, IndexReader *ir)
|
2220
|
+
{
|
2221
|
+
VALUE rbv = rb_funcall(CWF(filt)->rfilter, id_bits, 1, object_get(ir));
|
2222
|
+
BitVector *bv;
|
2223
|
+
Data_Get_Struct(rbv, BitVector, bv);
|
2224
|
+
REF(bv);
|
2225
|
+
return bv;
|
2226
|
+
}
|
2227
|
+
|
2228
|
+
Filter *
|
2229
|
+
frt_get_cwrapped_filter(VALUE rval)
|
2230
|
+
{
|
2231
|
+
Filter *filter;
|
2232
|
+
if (frt_is_cclass(rval) && DATA_PTR(rval)) {
|
2233
|
+
Data_Get_Struct(rval, Filter, filter);
|
2234
|
+
REF(filter);
|
2235
|
+
}
|
2236
|
+
else {
|
2237
|
+
filter = filt_create(sizeof(CWrappedFilter), "CWrappedFilter");
|
2238
|
+
filter->hash = &cwfilt_hash;
|
2239
|
+
filter->eq = &cwfilt_eq;
|
2240
|
+
filter->get_bv_i = &cwfilt_get_bv_i;
|
2241
|
+
CWF(filter)->rfilter = rval;
|
2242
|
+
}
|
2243
|
+
return filter;
|
2244
|
+
}
|
2245
|
+
|
2133
2246
|
static TopDocs *
|
2134
2247
|
frt_sea_search_internal(Query *query, VALUE roptions, Searcher *sea)
|
2135
2248
|
{
|
@@ -2137,6 +2250,8 @@ frt_sea_search_internal(Query *query, VALUE roptions, Searcher *sea)
|
|
2137
2250
|
int offset = 0, limit = 10;
|
2138
2251
|
Filter *filter = NULL;
|
2139
2252
|
Sort *sort = NULL;
|
2253
|
+
TopDocs *td;
|
2254
|
+
|
2140
2255
|
filter_ft filter_func = NULL;
|
2141
2256
|
|
2142
2257
|
if (Qnil != roptions) {
|
@@ -2159,7 +2274,7 @@ frt_sea_search_internal(Query *query, VALUE roptions, Searcher *sea)
|
|
2159
2274
|
}
|
2160
2275
|
}
|
2161
2276
|
if (Qnil != (rval = rb_hash_aref(roptions, sym_filter))) {
|
2162
|
-
|
2277
|
+
filter = frt_get_cwrapped_filter(rval);
|
2163
2278
|
}
|
2164
2279
|
if (Qnil != (rval = rb_hash_aref(roptions, sym_filter_proc))) {
|
2165
2280
|
filter_func = &call_filter_proc;
|
@@ -2173,7 +2288,9 @@ frt_sea_search_internal(Query *query, VALUE roptions, Searcher *sea)
|
|
2173
2288
|
}
|
2174
2289
|
}
|
2175
2290
|
|
2176
|
-
|
2291
|
+
td = sea->search(sea, query, offset, limit, filter, sort, filter_func, 0);
|
2292
|
+
if (filter) filt_deref(filter);
|
2293
|
+
return td;
|
2177
2294
|
}
|
2178
2295
|
|
2179
2296
|
/*
|
@@ -2317,7 +2434,8 @@ frt_sea_explain(VALUE self, VALUE rquery, VALUE rdoc_id)
|
|
2317
2434
|
* === Options
|
2318
2435
|
*
|
2319
2436
|
* :excerpt_length:: Default: 150. Length of excerpt to show. Highlighted
|
2320
|
-
* terms will be in the centre of the excerpt.
|
2437
|
+
* terms will be in the centre of the excerpt. Set to
|
2438
|
+
* :all to highlight the entire field.
|
2321
2439
|
* :num_excerpts:: Default: 2. Number of excerpts to return.
|
2322
2440
|
* :pre_tag:: Default: "<b>". Tag to place to the left of the match.
|
2323
2441
|
* You'll probably want to change this to a "<span>" tag
|
@@ -2344,12 +2462,18 @@ frt_sea_highlight(int argc, VALUE *argv, VALUE self)
|
|
2344
2462
|
|
2345
2463
|
rb_scan_args(argc, argv, "31", &rquery, &rdoc_id, &rfield, &roptions);
|
2346
2464
|
Data_Get_Struct(rquery, Query, query);
|
2347
|
-
if (Qnil != (v = rb_hash_aref(roptions, sym_excerpt_length))) {
|
2348
|
-
excerpt_length = FIX2INT(v);
|
2349
|
-
}
|
2350
2465
|
if (Qnil != (v = rb_hash_aref(roptions, sym_num_excerpts))) {
|
2351
2466
|
num_excerpts = FIX2INT(v);
|
2352
2467
|
}
|
2468
|
+
if (Qnil != (v = rb_hash_aref(roptions, sym_excerpt_length))) {
|
2469
|
+
if (v == sym_all) {
|
2470
|
+
num_excerpts = 1;
|
2471
|
+
excerpt_length = INT_MAX/2;
|
2472
|
+
}
|
2473
|
+
else {
|
2474
|
+
excerpt_length = FIX2INT(v);
|
2475
|
+
}
|
2476
|
+
}
|
2353
2477
|
if (Qnil != (v = rb_hash_aref(roptions, sym_pre_tag))) {
|
2354
2478
|
pre_tag = RSTRING(rb_obj_as_string(v))->ptr;
|
2355
2479
|
}
|
@@ -2539,6 +2663,8 @@ Init_Hit(void)
|
|
2539
2663
|
cHit = rb_struct_define(hit_class, "doc", "score", NULL);
|
2540
2664
|
rb_set_class_path(cHit, mSearch, hit_class);
|
2541
2665
|
rb_const_set(mSearch, rb_intern(hit_class), cHit);
|
2666
|
+
id_doc = rb_intern("doc");
|
2667
|
+
id_score = rb_intern("score");
|
2542
2668
|
}
|
2543
2669
|
|
2544
2670
|
/*
|
@@ -2570,6 +2696,10 @@ Init_TopDocs(void)
|
|
2570
2696
|
NULL);
|
2571
2697
|
rb_set_class_path(cTopDocs, mSearch, td_class);
|
2572
2698
|
rb_const_set(mSearch, rb_intern(td_class), cTopDocs);
|
2699
|
+
rb_define_method(cTopDocs, "to_s", frt_td_to_s, 0);
|
2700
|
+
id_hits = rb_intern("hits");
|
2701
|
+
id_total_hits = rb_intern("total_hits");
|
2702
|
+
id_max_score = rb_intern("max_score");
|
2573
2703
|
}
|
2574
2704
|
|
2575
2705
|
/*
|
@@ -2646,6 +2776,7 @@ Init_Query(void)
|
|
2646
2776
|
rb_define_method(cQuery, "eql?", frt_q_eql, 1);
|
2647
2777
|
rb_define_method(cQuery, "==", frt_q_eql, 1);
|
2648
2778
|
rb_define_method(cQuery, "hash", frt_q_hash, 0);
|
2779
|
+
rb_define_method(cQuery, "terms", frt_q_get_terms, 1);
|
2649
2780
|
}
|
2650
2781
|
|
2651
2782
|
/*
|
@@ -3326,6 +3457,7 @@ static void
|
|
3326
3457
|
Init_RangeFilter(void)
|
3327
3458
|
{
|
3328
3459
|
cRangeFilter = rb_define_class_under(mSearch, "RangeFilter", cFilter);
|
3460
|
+
frt_mark_cclass(cRangeFilter);
|
3329
3461
|
rb_define_alloc_func(cRangeFilter, frt_data_alloc);
|
3330
3462
|
|
3331
3463
|
rb_define_method(cRangeFilter, "initialize", frt_rf_init, 2);
|
@@ -3360,6 +3492,7 @@ static void
|
|
3360
3492
|
Init_QueryFilter(void)
|
3361
3493
|
{
|
3362
3494
|
cQueryFilter = rb_define_class_under(mSearch, "QueryFilter", cFilter);
|
3495
|
+
frt_mark_cclass(cQueryFilter);
|
3363
3496
|
rb_define_alloc_func(cQueryFilter, frt_data_alloc);
|
3364
3497
|
|
3365
3498
|
rb_define_method(cQueryFilter, "initialize", frt_qf_init, 1);
|
@@ -3383,7 +3516,9 @@ Init_QueryFilter(void)
|
|
3383
3516
|
static void
|
3384
3517
|
Init_Filter(void)
|
3385
3518
|
{
|
3519
|
+
id_bits = rb_intern("bits");
|
3386
3520
|
cFilter = rb_define_class_under(mSearch, "Filter", rb_cObject);
|
3521
|
+
frt_mark_cclass(cFilter);
|
3387
3522
|
rb_define_alloc_func(cConstantScoreQuery, frt_data_alloc);
|
3388
3523
|
|
3389
3524
|
rb_define_method(cFilter, "to_s", frt_f_to_s, 0);
|
data/ext/search.c
CHANGED
@@ -741,13 +741,17 @@ static char *excerpt_get_str(Excerpt *e, MatchVector *mv,
|
|
741
741
|
for (i = e->start; i <= e->end; i++) {
|
742
742
|
MatchRange *mr = mv->matches + i;
|
743
743
|
len = mr->start_offset - last_offset;
|
744
|
-
if (len)
|
745
|
-
|
744
|
+
if (len) {
|
745
|
+
lazy_df_get_bytes(lazy_df, e_ptr, last_offset, len);
|
746
|
+
e_ptr += len;
|
747
|
+
}
|
746
748
|
memcpy(e_ptr, pre_tag, pre_tag_len);
|
747
749
|
e_ptr += pre_tag_len;
|
748
750
|
len = mr->end_offset - mr->start_offset;
|
749
|
-
if (len)
|
750
|
-
|
751
|
+
if (len) {
|
752
|
+
lazy_df_get_bytes(lazy_df, e_ptr, mr->start_offset, len);
|
753
|
+
e_ptr += len;
|
754
|
+
}
|
751
755
|
memcpy(e_ptr, post_tag, post_tag_len);
|
752
756
|
e_ptr += post_tag_len;
|
753
757
|
last_offset = mr->end_offset;
|
@@ -757,8 +761,10 @@ static char *excerpt_get_str(Excerpt *e, MatchVector *mv,
|
|
757
761
|
e->end_offset = lazy_df->len;
|
758
762
|
}
|
759
763
|
len = e->end_offset - last_offset;
|
760
|
-
if (len)
|
761
|
-
|
764
|
+
if (len) {
|
765
|
+
lazy_df_get_bytes(lazy_df, e_ptr, last_offset, len);
|
766
|
+
e_ptr += len;
|
767
|
+
}
|
762
768
|
if (e->end_offset < lazy_df->len) {
|
763
769
|
memcpy(e_ptr, ellipsis, ellipsis_len);
|
764
770
|
e_ptr += ellipsis_len;
|
@@ -767,6 +773,54 @@ static char *excerpt_get_str(Excerpt *e, MatchVector *mv,
|
|
767
773
|
return excerpt_str;
|
768
774
|
}
|
769
775
|
|
776
|
+
static char *highlight_field(MatchVector *mv,
|
777
|
+
LazyDocField *lazy_df,
|
778
|
+
TermVector *tv,
|
779
|
+
const char *pre_tag,
|
780
|
+
const char *post_tag)
|
781
|
+
{
|
782
|
+
const int pre_len = (int)strlen(pre_tag);
|
783
|
+
const int post_len = (int)strlen(post_tag);
|
784
|
+
char *excerpt_str =
|
785
|
+
ALLOC_N(char, 10 + lazy_df->len + (mv->size * (pre_len + post_len)));
|
786
|
+
if (mv->size > 0) {
|
787
|
+
int last_offset = 0;
|
788
|
+
int i, len;
|
789
|
+
char *e_ptr = excerpt_str;
|
790
|
+
matchv_compact_with_breaks(mv);
|
791
|
+
matchv_set_offsets(mv, tv->offsets);
|
792
|
+
for (i = 0; i < mv->size; i++) {
|
793
|
+
MatchRange *mr = mv->matches + i;
|
794
|
+
len = mr->start_offset - last_offset;
|
795
|
+
if (len) {
|
796
|
+
lazy_df_get_bytes(lazy_df, e_ptr, last_offset, len);
|
797
|
+
e_ptr += len;
|
798
|
+
}
|
799
|
+
memcpy(e_ptr, pre_tag, pre_len);
|
800
|
+
e_ptr += pre_len;
|
801
|
+
len = mr->end_offset - mr->start_offset;
|
802
|
+
if (len) {
|
803
|
+
lazy_df_get_bytes(lazy_df, e_ptr, mr->start_offset, len);
|
804
|
+
e_ptr += len;
|
805
|
+
}
|
806
|
+
memcpy(e_ptr, post_tag, post_len);
|
807
|
+
e_ptr += post_len;
|
808
|
+
last_offset = mr->end_offset;
|
809
|
+
}
|
810
|
+
len = lazy_df->len - last_offset;
|
811
|
+
if (len) {
|
812
|
+
lazy_df_get_bytes(lazy_df, e_ptr, last_offset, len);
|
813
|
+
e_ptr += len;
|
814
|
+
}
|
815
|
+
*e_ptr = '\0';
|
816
|
+
}
|
817
|
+
else {
|
818
|
+
lazy_df_get_bytes(lazy_df, excerpt_str, 0, lazy_df->len);
|
819
|
+
excerpt_str[lazy_df->len] = '\0';
|
820
|
+
}
|
821
|
+
return excerpt_str;
|
822
|
+
}
|
823
|
+
|
770
824
|
char **searcher_highlight(Searcher *self,
|
771
825
|
Query *query,
|
772
826
|
const int doc_num,
|
@@ -789,7 +843,12 @@ char **searcher_highlight(Searcher *self,
|
|
789
843
|
MatchVector *mv;
|
790
844
|
query = self->rewrite(self, query);
|
791
845
|
mv = query->get_matchv_i(query, matchv_new(), tv);
|
792
|
-
if (
|
846
|
+
if (lazy_df->len < (excerpt_len * num_excerpts)) {
|
847
|
+
excerpt_strs = ary_new_type_capa(char *, 1);
|
848
|
+
ary_push(excerpt_strs,
|
849
|
+
highlight_field(mv, lazy_df, tv, pre_tag, post_tag));
|
850
|
+
}
|
851
|
+
else if (mv->size > 0) {
|
793
852
|
Excerpt **excerpts = ALLOC_AND_ZERO_N(Excerpt *, num_excerpts);
|
794
853
|
int e_start, e_end, i, j;
|
795
854
|
MatchRange *matches = mv->matches;
|
@@ -802,12 +861,12 @@ char **searcher_highlight(Searcher *self,
|
|
802
861
|
excerpt_pq = pq_new(mv->size, (lt_ft)&excerpt_lt, &free);
|
803
862
|
/* add all possible excerpts to the priority queue */
|
804
863
|
|
805
|
-
for (e_start =
|
864
|
+
for (e_start = e_end = 0; e_start < mv->size; e_start++) {
|
806
865
|
const int start_offset = matches[e_start].start_offset;
|
807
|
-
if (e_start
|
808
|
-
|
866
|
+
if (e_start > e_end) {
|
867
|
+
running_score = 0.0;
|
868
|
+
e_end = e_start;
|
809
869
|
}
|
810
|
-
running_score += matches[e_start].score;
|
811
870
|
while (e_end < mv->size && (matches[e_end].end_offset
|
812
871
|
<= start_offset + excerpt_len)) {
|
813
872
|
running_score += matches[e_end].score;
|
@@ -883,8 +942,8 @@ char **searcher_highlight(Searcher *self,
|
|
883
942
|
}
|
884
943
|
free(excerpts);
|
885
944
|
pq_destroy(excerpt_pq);
|
886
|
-
matchv_destroy(mv);
|
887
945
|
}
|
946
|
+
matchv_destroy(mv);
|
888
947
|
q_deref(query);
|
889
948
|
}
|
890
949
|
if (tv) tv_destroy(tv);
|
data/lib/ferret/index.rb
CHANGED
@@ -152,22 +152,23 @@ module Ferret::Index
|
|
152
152
|
# you want to highlight multiple fields then you will
|
153
153
|
# need to call this method multiple times.
|
154
154
|
# excerpt_length:: Default: 150. Length of excerpt to show. Highlighted
|
155
|
-
# terms will be in the centre of the excerpt.
|
155
|
+
# terms will be in the centre of the excerpt. Set to
|
156
|
+
# :all to highlight the entire field.
|
156
157
|
# num_excerpts:: Default: 2. Number of excerpts to return.
|
157
158
|
# pre_tag:: Default: "<b>". Tag to place to the left of the
|
158
159
|
# match. You'll probably want to change this to a
|
159
|
-
# "<span>" tag with a class "\033[
|
160
|
+
# "<span>" tag with a class "\033[36m" for use in a
|
160
161
|
# terminal.
|
161
162
|
# post_tag:: Default: "</b>". This tag should close the
|
162
163
|
# +:pre_tag+. Try tag "\033[m" in the terminal.
|
163
164
|
# ellipsis:: Default: "...". This is the string that is appended
|
164
165
|
# at the beginning and end of excerpts (unless the
|
165
|
-
# excerpt hits the start or end of the field.
|
166
|
-
#
|
167
|
-
#
|
166
|
+
# excerpt hits the start or end of the field.
|
167
|
+
# Alternatively you may want to use the HTML entity
|
168
|
+
# … or the UTF-8 string "\342\200\246".
|
168
169
|
def highlight(query, doc_id, options = {})
|
169
170
|
ensure_searcher_open()
|
170
|
-
@searcher.highlight(
|
171
|
+
@searcher.highlight(do_process_query(query),
|
171
172
|
doc_id,
|
172
173
|
options[:field]||@options[:default_field],
|
173
174
|
options)
|
@@ -346,7 +347,7 @@ module Ferret::Index
|
|
346
347
|
def search_each(query, options = {}) # :yield: doc, score
|
347
348
|
@dir.synchronize do
|
348
349
|
ensure_searcher_open()
|
349
|
-
query =
|
350
|
+
query = do_process_query(query)
|
350
351
|
|
351
352
|
@searcher.search_each(query, options) do |doc, score|
|
352
353
|
yield doc, score
|
@@ -359,20 +360,16 @@ module Ferret::Index
|
|
359
360
|
#
|
360
361
|
# id:: The number of the document to retrieve, or the term used as the :id
|
361
362
|
# for the document we wish to retrieve
|
362
|
-
def doc(
|
363
|
+
def doc(*args)
|
363
364
|
@dir.synchronize do
|
364
365
|
ensure_reader_open()
|
366
|
+
id = args[0]
|
365
367
|
if id.kind_of?(String) or id.kind_of?(Symbol)
|
366
368
|
term_doc_enum = @reader.term_docs_for(@id_field, id.to_s)
|
367
|
-
|
368
|
-
end
|
369
|
-
return @reader[id] if id.is_a? Integer
|
370
|
-
if id
|
371
|
-
raise(ArgumentError, "key to Index to access a document must be " +
|
372
|
-
"an Integer or a String")
|
369
|
+
return term_doc_enum.next? ? @reader[term_doc_enum.doc] : nil
|
373
370
|
end
|
371
|
+
return @reader[*args]
|
374
372
|
end
|
375
|
-
return nil
|
376
373
|
end
|
377
374
|
alias :[] :doc
|
378
375
|
|
@@ -405,7 +402,7 @@ module Ferret::Index
|
|
405
402
|
def query_delete(query)
|
406
403
|
@dir.synchronize do
|
407
404
|
ensure_searcher_open()
|
408
|
-
query =
|
405
|
+
query = do_process_query(query)
|
409
406
|
@searcher.search_each(query) do |doc, score|
|
410
407
|
@reader.delete(doc)
|
411
408
|
end
|
@@ -470,7 +467,7 @@ module Ferret::Index
|
|
470
467
|
@dir.synchronize do
|
471
468
|
ensure_searcher_open()
|
472
469
|
docs_to_add = []
|
473
|
-
query =
|
470
|
+
query = do_process_query(query)
|
474
471
|
@searcher.search_each(query) do |id, score|
|
475
472
|
document = @searcher[id].load
|
476
473
|
if new_val.is_a?(Hash)
|
@@ -609,9 +606,9 @@ module Ferret::Index
|
|
609
606
|
# Computing an explanation is as expensive as executing the query over the
|
610
607
|
# entire index.
|
611
608
|
def explain(query, doc)
|
612
|
-
synchronize do
|
609
|
+
@dir.synchronize do
|
613
610
|
ensure_searcher_open()
|
614
|
-
query =
|
611
|
+
query = do_process_query(query)
|
615
612
|
|
616
613
|
return @searcher.explain(query, doc)
|
617
614
|
end
|
@@ -619,17 +616,22 @@ module Ferret::Index
|
|
619
616
|
|
620
617
|
# Turn a query string into a Query object with the Index's QueryParser
|
621
618
|
def process_query(query)
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
end
|
626
|
-
# we need to set this ever time, in case a new field has been added
|
627
|
-
@qp.fields = @reader.field_names
|
628
|
-
query = @qp.parse(query)
|
619
|
+
@dir.synchronize do
|
620
|
+
ensure_searcher_open()
|
621
|
+
return do_process_query(query)
|
629
622
|
end
|
630
|
-
return query
|
631
623
|
end
|
632
624
|
|
625
|
+
# Returns the field_infos object so that you can add new fields to the
|
626
|
+
# index.
|
627
|
+
def field_infos
|
628
|
+
@dir.synchronize do
|
629
|
+
ensure_writer_open()
|
630
|
+
return @writer.field_infos
|
631
|
+
end
|
632
|
+
end
|
633
|
+
|
634
|
+
|
633
635
|
protected
|
634
636
|
def ensure_writer_open()
|
635
637
|
raise "tried to use a closed index" if not @open
|
@@ -676,9 +678,21 @@ module Ferret::Index
|
|
676
678
|
end
|
677
679
|
|
678
680
|
private
|
681
|
+
def do_process_query(query)
|
682
|
+
if query.is_a?(String)
|
683
|
+
if @qp.nil?
|
684
|
+
@qp = Ferret::QueryParser.new(@options)
|
685
|
+
end
|
686
|
+
# we need to set this ever time, in case a new field has been added
|
687
|
+
@qp.fields = @reader.field_names
|
688
|
+
query = @qp.parse(query)
|
689
|
+
end
|
690
|
+
return query
|
691
|
+
end
|
692
|
+
|
679
693
|
def do_search(query, options)
|
680
694
|
ensure_searcher_open()
|
681
|
-
query =
|
695
|
+
query = do_process_query(query)
|
682
696
|
|
683
697
|
return @searcher.search(query, options)
|
684
698
|
end
|