sdsykes-ferret 0.11.6.19
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +24 -0
- data/MIT-LICENSE +20 -0
- data/README +102 -0
- data/Rakefile +338 -0
- data/TODO +17 -0
- data/TUTORIAL +231 -0
- data/bin/ferret-browser +79 -0
- data/ext/analysis.c +1555 -0
- data/ext/analysis.h +219 -0
- data/ext/api.c +69 -0
- data/ext/api.h +27 -0
- data/ext/array.c +123 -0
- data/ext/array.h +53 -0
- data/ext/bitvector.c +540 -0
- data/ext/bitvector.h +272 -0
- data/ext/compound_io.c +383 -0
- data/ext/config.h +42 -0
- data/ext/document.c +156 -0
- data/ext/document.h +53 -0
- data/ext/except.c +120 -0
- data/ext/except.h +168 -0
- data/ext/extconf.rb +14 -0
- data/ext/ferret.c +402 -0
- data/ext/ferret.h +91 -0
- data/ext/filter.c +156 -0
- data/ext/fs_store.c +483 -0
- data/ext/global.c +418 -0
- data/ext/global.h +117 -0
- data/ext/hash.c +567 -0
- data/ext/hash.h +473 -0
- data/ext/hashset.c +170 -0
- data/ext/hashset.h +187 -0
- data/ext/header.h +58 -0
- data/ext/helper.c +62 -0
- data/ext/helper.h +13 -0
- data/ext/inc/lang.h +48 -0
- data/ext/inc/threading.h +31 -0
- data/ext/index.c +6425 -0
- data/ext/index.h +961 -0
- data/ext/lang.h +66 -0
- data/ext/libstemmer.c +92 -0
- data/ext/libstemmer.h +79 -0
- data/ext/mempool.c +87 -0
- data/ext/mempool.h +35 -0
- data/ext/modules.h +162 -0
- data/ext/multimapper.c +310 -0
- data/ext/multimapper.h +51 -0
- data/ext/posh.c +1006 -0
- data/ext/posh.h +1007 -0
- data/ext/priorityqueue.c +151 -0
- data/ext/priorityqueue.h +143 -0
- data/ext/q_boolean.c +1608 -0
- data/ext/q_const_score.c +161 -0
- data/ext/q_filtered_query.c +209 -0
- data/ext/q_fuzzy.c +268 -0
- data/ext/q_match_all.c +148 -0
- data/ext/q_multi_term.c +677 -0
- data/ext/q_parser.c +2825 -0
- data/ext/q_phrase.c +1126 -0
- data/ext/q_prefix.c +100 -0
- data/ext/q_range.c +350 -0
- data/ext/q_span.c +2402 -0
- data/ext/q_term.c +337 -0
- data/ext/q_wildcard.c +171 -0
- data/ext/r_analysis.c +2575 -0
- data/ext/r_index.c +3472 -0
- data/ext/r_qparser.c +585 -0
- data/ext/r_search.c +4105 -0
- data/ext/r_store.c +513 -0
- data/ext/r_utils.c +963 -0
- data/ext/ram_store.c +471 -0
- data/ext/search.c +1741 -0
- data/ext/search.h +885 -0
- data/ext/similarity.c +150 -0
- data/ext/similarity.h +82 -0
- data/ext/sort.c +983 -0
- data/ext/stem_ISO_8859_1_danish.c +338 -0
- data/ext/stem_ISO_8859_1_danish.h +16 -0
- data/ext/stem_ISO_8859_1_dutch.c +635 -0
- data/ext/stem_ISO_8859_1_dutch.h +16 -0
- data/ext/stem_ISO_8859_1_english.c +1156 -0
- data/ext/stem_ISO_8859_1_english.h +16 -0
- data/ext/stem_ISO_8859_1_finnish.c +792 -0
- data/ext/stem_ISO_8859_1_finnish.h +16 -0
- data/ext/stem_ISO_8859_1_french.c +1276 -0
- data/ext/stem_ISO_8859_1_french.h +16 -0
- data/ext/stem_ISO_8859_1_german.c +512 -0
- data/ext/stem_ISO_8859_1_german.h +16 -0
- data/ext/stem_ISO_8859_1_italian.c +1091 -0
- data/ext/stem_ISO_8859_1_italian.h +16 -0
- data/ext/stem_ISO_8859_1_norwegian.c +296 -0
- data/ext/stem_ISO_8859_1_norwegian.h +16 -0
- data/ext/stem_ISO_8859_1_porter.c +776 -0
- data/ext/stem_ISO_8859_1_porter.h +16 -0
- data/ext/stem_ISO_8859_1_portuguese.c +1035 -0
- data/ext/stem_ISO_8859_1_portuguese.h +16 -0
- data/ext/stem_ISO_8859_1_spanish.c +1119 -0
- data/ext/stem_ISO_8859_1_spanish.h +16 -0
- data/ext/stem_ISO_8859_1_swedish.c +307 -0
- data/ext/stem_ISO_8859_1_swedish.h +16 -0
- data/ext/stem_KOI8_R_russian.c +701 -0
- data/ext/stem_KOI8_R_russian.h +16 -0
- data/ext/stem_UTF_8_danish.c +344 -0
- data/ext/stem_UTF_8_danish.h +16 -0
- data/ext/stem_UTF_8_dutch.c +653 -0
- data/ext/stem_UTF_8_dutch.h +16 -0
- data/ext/stem_UTF_8_english.c +1176 -0
- data/ext/stem_UTF_8_english.h +16 -0
- data/ext/stem_UTF_8_finnish.c +808 -0
- data/ext/stem_UTF_8_finnish.h +16 -0
- data/ext/stem_UTF_8_french.c +1296 -0
- data/ext/stem_UTF_8_french.h +16 -0
- data/ext/stem_UTF_8_german.c +526 -0
- data/ext/stem_UTF_8_german.h +16 -0
- data/ext/stem_UTF_8_italian.c +1113 -0
- data/ext/stem_UTF_8_italian.h +16 -0
- data/ext/stem_UTF_8_norwegian.c +302 -0
- data/ext/stem_UTF_8_norwegian.h +16 -0
- data/ext/stem_UTF_8_porter.c +794 -0
- data/ext/stem_UTF_8_porter.h +16 -0
- data/ext/stem_UTF_8_portuguese.c +1055 -0
- data/ext/stem_UTF_8_portuguese.h +16 -0
- data/ext/stem_UTF_8_russian.c +709 -0
- data/ext/stem_UTF_8_russian.h +16 -0
- data/ext/stem_UTF_8_spanish.c +1137 -0
- data/ext/stem_UTF_8_spanish.h +16 -0
- data/ext/stem_UTF_8_swedish.c +313 -0
- data/ext/stem_UTF_8_swedish.h +16 -0
- data/ext/stopwords.c +401 -0
- data/ext/store.c +692 -0
- data/ext/store.h +777 -0
- data/ext/term_vectors.c +352 -0
- data/ext/threading.h +31 -0
- data/ext/utilities.c +446 -0
- data/ext/win32.h +54 -0
- data/lib/ferret.rb +29 -0
- data/lib/ferret/browser.rb +246 -0
- data/lib/ferret/browser/s/global.js +192 -0
- data/lib/ferret/browser/s/style.css +148 -0
- data/lib/ferret/browser/views/document/list.rhtml +49 -0
- data/lib/ferret/browser/views/document/show.rhtml +27 -0
- data/lib/ferret/browser/views/error/index.rhtml +7 -0
- data/lib/ferret/browser/views/help/index.rhtml +8 -0
- data/lib/ferret/browser/views/home/index.rhtml +29 -0
- data/lib/ferret/browser/views/layout.rhtml +22 -0
- data/lib/ferret/browser/views/term-vector/index.rhtml +4 -0
- data/lib/ferret/browser/views/term/index.rhtml +199 -0
- data/lib/ferret/browser/views/term/termdocs.rhtml +1 -0
- data/lib/ferret/browser/webrick.rb +14 -0
- data/lib/ferret/document.rb +130 -0
- data/lib/ferret/field_infos.rb +44 -0
- data/lib/ferret/index.rb +786 -0
- data/lib/ferret/number_tools.rb +157 -0
- data/lib/ferret_version.rb +3 -0
- data/setup.rb +1555 -0
- data/test/test_all.rb +5 -0
- data/test/test_helper.rb +24 -0
- data/test/threading/number_to_spoken.rb +132 -0
- data/test/threading/thread_safety_index_test.rb +79 -0
- data/test/threading/thread_safety_read_write_test.rb +76 -0
- data/test/threading/thread_safety_test.rb +133 -0
- data/test/unit/analysis/tc_analyzer.rb +548 -0
- data/test/unit/analysis/tc_token_stream.rb +646 -0
- data/test/unit/index/tc_index.rb +762 -0
- data/test/unit/index/tc_index_reader.rb +699 -0
- data/test/unit/index/tc_index_writer.rb +437 -0
- data/test/unit/index/th_doc.rb +315 -0
- data/test/unit/largefile/tc_largefile.rb +46 -0
- data/test/unit/query_parser/tc_query_parser.rb +238 -0
- data/test/unit/search/tc_filter.rb +135 -0
- data/test/unit/search/tc_fuzzy_query.rb +147 -0
- data/test/unit/search/tc_index_searcher.rb +61 -0
- data/test/unit/search/tc_multi_searcher.rb +128 -0
- data/test/unit/search/tc_multiple_search_requests.rb +58 -0
- data/test/unit/search/tc_search_and_sort.rb +179 -0
- data/test/unit/search/tc_sort.rb +49 -0
- data/test/unit/search/tc_sort_field.rb +27 -0
- data/test/unit/search/tc_spans.rb +190 -0
- data/test/unit/search/tm_searcher.rb +384 -0
- data/test/unit/store/tc_fs_store.rb +77 -0
- data/test/unit/store/tc_ram_store.rb +35 -0
- data/test/unit/store/tm_store.rb +34 -0
- data/test/unit/store/tm_store_lock.rb +68 -0
- data/test/unit/tc_document.rb +81 -0
- data/test/unit/ts_analysis.rb +2 -0
- data/test/unit/ts_index.rb +2 -0
- data/test/unit/ts_largefile.rb +4 -0
- data/test/unit/ts_query_parser.rb +2 -0
- data/test/unit/ts_search.rb +2 -0
- data/test/unit/ts_store.rb +2 -0
- data/test/unit/ts_utils.rb +2 -0
- data/test/unit/utils/tc_bit_vector.rb +295 -0
- data/test/unit/utils/tc_number_tools.rb +117 -0
- data/test/unit/utils/tc_priority_queue.rb +106 -0
- metadata +285 -0
data/ext/r_search.c
ADDED
@@ -0,0 +1,4105 @@
|
|
1
|
+
#include "ferret.h"
|
2
|
+
#include <st.h>
|
3
|
+
#include <rubysig.h>
|
4
|
+
#include <ctype.h>
|
5
|
+
#include <array.h>
|
6
|
+
#include "search.h"
|
7
|
+
|
8
|
+
VALUE mSearch;
|
9
|
+
|
10
|
+
static VALUE cHit;
|
11
|
+
static VALUE cTopDocs;
|
12
|
+
static VALUE cExplanation;
|
13
|
+
static VALUE cSearcher;
|
14
|
+
static VALUE cMultiSearcher;
|
15
|
+
static VALUE cSortField;
|
16
|
+
static VALUE cSort;
|
17
|
+
|
18
|
+
/* Queries */
|
19
|
+
static VALUE cQuery;
|
20
|
+
static VALUE cTermQuery;
|
21
|
+
static VALUE cMultiTermQuery;
|
22
|
+
static VALUE cBooleanQuery;
|
23
|
+
static VALUE cBooleanClause;
|
24
|
+
static VALUE cRangeQuery;
|
25
|
+
static VALUE cPhraseQuery;
|
26
|
+
static VALUE cPrefixQuery;
|
27
|
+
static VALUE cWildcardQuery;
|
28
|
+
static VALUE cFuzzyQuery;
|
29
|
+
static VALUE cMatchAllQuery;
|
30
|
+
static VALUE cConstantScoreQuery;
|
31
|
+
static VALUE cFilteredQuery;
|
32
|
+
static VALUE cSpanTermQuery;
|
33
|
+
static VALUE cSpanMultiTermQuery;
|
34
|
+
static VALUE cSpanPrefixQuery;
|
35
|
+
static VALUE cSpanFirstQuery;
|
36
|
+
static VALUE cSpanNearQuery;
|
37
|
+
static VALUE cSpanOrQuery;
|
38
|
+
static VALUE cSpanNotQuery;
|
39
|
+
|
40
|
+
/* Filters */
|
41
|
+
static ID id_bits;
|
42
|
+
static VALUE cFilter;
|
43
|
+
static VALUE cRangeFilter;
|
44
|
+
static VALUE cQueryFilter;
|
45
|
+
|
46
|
+
/* MultiTermQuery */
|
47
|
+
static ID id_default_max_terms;
|
48
|
+
static VALUE sym_max_terms;
|
49
|
+
static VALUE sym_min_score;
|
50
|
+
|
51
|
+
/** Option hash keys **/
|
52
|
+
/* BooleanClause */
|
53
|
+
static VALUE sym_should;
|
54
|
+
static VALUE sym_must;
|
55
|
+
static VALUE sym_must_not;
|
56
|
+
|
57
|
+
/* RangeQuery */
|
58
|
+
static VALUE sym_upper;
|
59
|
+
static VALUE sym_lower;
|
60
|
+
static VALUE sym_include_upper;
|
61
|
+
static VALUE sym_include_lower;
|
62
|
+
static VALUE sym_upper_exclusive;
|
63
|
+
static VALUE sym_lower_exclusive;
|
64
|
+
|
65
|
+
static VALUE sym_less_than;
|
66
|
+
static VALUE sym_less_than_or_equal_to;
|
67
|
+
static VALUE sym_greater_than;
|
68
|
+
static VALUE sym_greater_than_or_equal_to;
|
69
|
+
|
70
|
+
/* FuzzyQuery */
|
71
|
+
static VALUE sym_min_similarity;
|
72
|
+
static VALUE sym_prefix_length;
|
73
|
+
|
74
|
+
/* SpanNearQuery */
|
75
|
+
static VALUE sym_slop;
|
76
|
+
static VALUE sym_in_order;
|
77
|
+
static VALUE sym_clauses;
|
78
|
+
|
79
|
+
/* Class variable ids */
|
80
|
+
static ID id_default_min_similarity;
|
81
|
+
static ID id_default_prefix_length;
|
82
|
+
|
83
|
+
|
84
|
+
/** Sort **/
|
85
|
+
static VALUE oSORT_FIELD_DOC;
|
86
|
+
|
87
|
+
/* Sort types */
|
88
|
+
static VALUE sym_integer;
|
89
|
+
static VALUE sym_float;
|
90
|
+
static VALUE sym_string;
|
91
|
+
static VALUE sym_auto;
|
92
|
+
static VALUE sym_doc_id;
|
93
|
+
static VALUE sym_score;
|
94
|
+
static VALUE sym_byte;
|
95
|
+
|
96
|
+
/* Sort params */
|
97
|
+
static VALUE sym_type;
|
98
|
+
static VALUE sym_reverse;
|
99
|
+
static VALUE sym_comparator;
|
100
|
+
|
101
|
+
/* Hits */
|
102
|
+
static ID id_doc;
|
103
|
+
static ID id_score;
|
104
|
+
|
105
|
+
/* TopDocs */
|
106
|
+
static ID id_hits;
|
107
|
+
static ID id_total_hits;
|
108
|
+
static ID id_max_score;
|
109
|
+
static ID id_searcher;
|
110
|
+
|
111
|
+
/* Search */
|
112
|
+
static VALUE sym_offset;
|
113
|
+
static VALUE sym_limit;
|
114
|
+
static VALUE sym_all;
|
115
|
+
static VALUE sym_sort;
|
116
|
+
static VALUE sym_filter;
|
117
|
+
static VALUE sym_filter_proc;
|
118
|
+
|
119
|
+
static VALUE sym_excerpt_length;
|
120
|
+
static VALUE sym_num_excerpts;
|
121
|
+
static VALUE sym_pre_tag;
|
122
|
+
static VALUE sym_post_tag;
|
123
|
+
static VALUE sym_ellipsis;
|
124
|
+
|
125
|
+
extern VALUE cIndexReader;
|
126
|
+
extern void frt_ir_free(void *p);
|
127
|
+
extern void frt_ir_mark(void *p);
|
128
|
+
|
129
|
+
extern void frt_set_term(VALUE rterm, Term *t);
|
130
|
+
extern VALUE frt_get_analyzer(Analyzer *a);
|
131
|
+
extern HashSet *frt_get_fields(VALUE rfields);
|
132
|
+
extern Analyzer *frt_get_cwrapped_analyzer(VALUE ranalyzer);
|
133
|
+
extern VALUE frt_get_lazy_doc(LazyDoc *lazy_doc);
|
134
|
+
|
135
|
+
/****************************************************************************
|
136
|
+
*
|
137
|
+
* Hit Methods
|
138
|
+
*
|
139
|
+
****************************************************************************/
|
140
|
+
|
141
|
+
static VALUE
|
142
|
+
frt_get_hit(Hit *hit)
|
143
|
+
{
|
144
|
+
return rb_struct_new(cHit,
|
145
|
+
INT2FIX(hit->doc),
|
146
|
+
rb_float_new((double)hit->score),
|
147
|
+
NULL);
|
148
|
+
}
|
149
|
+
|
150
|
+
/****************************************************************************
|
151
|
+
*
|
152
|
+
* TopDocs Methods
|
153
|
+
*
|
154
|
+
****************************************************************************/
|
155
|
+
|
156
|
+
static VALUE
|
157
|
+
frt_get_td(TopDocs *td, VALUE rsearcher)
|
158
|
+
{
|
159
|
+
int i;
|
160
|
+
VALUE rtop_docs;
|
161
|
+
VALUE hit_ary = rb_ary_new2(td->size);
|
162
|
+
|
163
|
+
for (i = 0; i < td->size; i++) {
|
164
|
+
rb_ary_store(hit_ary, i, frt_get_hit(td->hits[i]));
|
165
|
+
}
|
166
|
+
|
167
|
+
rtop_docs = rb_struct_new(cTopDocs,
|
168
|
+
INT2FIX(td->total_hits),
|
169
|
+
hit_ary,
|
170
|
+
rb_float_new((double)td->max_score),
|
171
|
+
rsearcher,
|
172
|
+
NULL);
|
173
|
+
td_destroy(td);
|
174
|
+
return rtop_docs;
|
175
|
+
}
|
176
|
+
|
177
|
+
/*
|
178
|
+
* call-seq:
|
179
|
+
* top_doc.to_s(field = :id) -> string
|
180
|
+
*
|
181
|
+
* Returns a string representation of the top_doc in readable format.
|
182
|
+
*/
|
183
|
+
static VALUE
|
184
|
+
frt_td_to_s(int argc, VALUE *argv, VALUE self)
|
185
|
+
{
|
186
|
+
int i;
|
187
|
+
VALUE rhits = rb_funcall(self, id_hits, 0);
|
188
|
+
Searcher *sea = (Searcher *)DATA_PTR(rb_funcall(self, id_searcher, 0));
|
189
|
+
const int len = RARRAY_LEN(rhits);
|
190
|
+
char *str = ALLOC_N(char, len * 64 + 100);
|
191
|
+
char *s = str;
|
192
|
+
char *field = "id";
|
193
|
+
VALUE rstr;
|
194
|
+
|
195
|
+
if (argc) {
|
196
|
+
field = frt_field(argv[0]);
|
197
|
+
}
|
198
|
+
|
199
|
+
sprintf(s, "TopDocs: total_hits = %d, max_score = %f [\n",
|
200
|
+
FIX2INT(rb_funcall(self, id_total_hits, 0)),
|
201
|
+
NUM2DBL(rb_funcall(self, id_max_score, 0)));
|
202
|
+
s += strlen(s);
|
203
|
+
|
204
|
+
for (i = 0; i < len; i++) {
|
205
|
+
VALUE rhit = RARRAY_PTR(rhits)[i];
|
206
|
+
int doc_id = FIX2INT(rb_funcall(rhit, id_doc, 0));
|
207
|
+
char *value = "";
|
208
|
+
LazyDoc *lzd = sea->get_lazy_doc(sea, doc_id);
|
209
|
+
LazyDocField *lzdf = h_get(lzd->field_dict, field);
|
210
|
+
if (NULL != lzdf) {
|
211
|
+
value = lazy_df_get_data(lzdf, 0);
|
212
|
+
}
|
213
|
+
|
214
|
+
sprintf(s, "\t%d \"%s\": %f\n", doc_id, value,
|
215
|
+
NUM2DBL(rb_funcall(rhit, id_score, 0)));
|
216
|
+
s += strlen(s);
|
217
|
+
lazy_doc_close(lzd);
|
218
|
+
}
|
219
|
+
|
220
|
+
sprintf(s, "]\n");
|
221
|
+
rstr = rb_str_new2(str);
|
222
|
+
free(str);
|
223
|
+
return rstr;
|
224
|
+
}
|
225
|
+
|
226
|
+
static INLINE char *
|
227
|
+
frt_lzd_load_to_json(LazyDoc *lzd, char **str, char *s, int *slen)
|
228
|
+
{
|
229
|
+
int i, j;
|
230
|
+
int diff = s - *str;
|
231
|
+
int len = diff, l;
|
232
|
+
LazyDocField *f;
|
233
|
+
|
234
|
+
for (i = 0; i < lzd->size; i++) {
|
235
|
+
f = lzd->fields[i];
|
236
|
+
/* 3 times length of field to make space for quoted quotes ('"') and
|
237
|
+
* 4 times field elements to make space for '"' around fields and ','
|
238
|
+
* between fields. Add 100 for '[', ']' and good safety.
|
239
|
+
*/
|
240
|
+
len += strlen(f->name) + f->len * 3 + 100 + 4 * f->size;
|
241
|
+
}
|
242
|
+
|
243
|
+
if (len > *slen) {
|
244
|
+
while (len > *slen) *slen = *slen << 1;
|
245
|
+
REALLOC_N(*str, char, *slen);
|
246
|
+
s = *str + diff;
|
247
|
+
}
|
248
|
+
|
249
|
+
for (i = 0; i < lzd->size; i++) {
|
250
|
+
f = lzd->fields[i];
|
251
|
+
if (i) *(s++) = ',';
|
252
|
+
*(s++) = '"';
|
253
|
+
l = strlen(f->name);
|
254
|
+
memcpy(s, f->name, l);
|
255
|
+
s += l;
|
256
|
+
*(s++) = '"';
|
257
|
+
*(s++) = ':';
|
258
|
+
if (f->size > 1) *(s++) = '[';
|
259
|
+
for (j = 0; j < f->size; j++) {
|
260
|
+
if (j) *(s++) = ',';
|
261
|
+
s = json_concat_string(s, lazy_df_get_data(f, j));
|
262
|
+
}
|
263
|
+
if (f->size > 1) *(s++) = ']';
|
264
|
+
}
|
265
|
+
return s;
|
266
|
+
}
|
267
|
+
|
268
|
+
/*
|
269
|
+
* call-seq:
|
270
|
+
* top_doc.to_json() -> string
|
271
|
+
*
|
272
|
+
* Returns a json representation of the top_doc.
|
273
|
+
*/
|
274
|
+
static VALUE
|
275
|
+
frt_td_to_json(VALUE self)
|
276
|
+
{
|
277
|
+
int i;
|
278
|
+
VALUE rhits = rb_funcall(self, id_hits, 0);
|
279
|
+
VALUE rhit;
|
280
|
+
LazyDoc *lzd;
|
281
|
+
Searcher *sea = (Searcher *)DATA_PTR(rb_funcall(self, id_searcher, 0));
|
282
|
+
const int num_hits = RARRAY_LEN(rhits);
|
283
|
+
int doc_id;
|
284
|
+
int len = 32768;
|
285
|
+
char *str = ALLOC_N(char, len);
|
286
|
+
char *s = str;
|
287
|
+
VALUE rstr;
|
288
|
+
|
289
|
+
*(s++) = '[';
|
290
|
+
for (i = 0; i < num_hits; i++) {
|
291
|
+
if (i) *(s++) = ',';
|
292
|
+
*(s++) = '{';
|
293
|
+
rhit = RARRAY_PTR(rhits)[i];
|
294
|
+
doc_id = FIX2INT(rb_funcall(rhit, id_doc, 0));
|
295
|
+
lzd = sea->get_lazy_doc(sea, doc_id);
|
296
|
+
s = frt_lzd_load_to_json(lzd, &str, s, &len);
|
297
|
+
lazy_doc_close(lzd);
|
298
|
+
*(s++) = '}';
|
299
|
+
}
|
300
|
+
*(s++) = ']';
|
301
|
+
*(s++) = '\0';
|
302
|
+
rstr = rb_str_new2(str);
|
303
|
+
free(str);
|
304
|
+
return rstr;
|
305
|
+
}
|
306
|
+
|
307
|
+
|
308
|
+
/****************************************************************************
|
309
|
+
*
|
310
|
+
* Explanation Methods
|
311
|
+
*
|
312
|
+
****************************************************************************/
|
313
|
+
|
314
|
+
#define GET_EXPL() Explanation *expl = (Explanation *)DATA_PTR(self)
|
315
|
+
|
316
|
+
/*
|
317
|
+
* call-seq:
|
318
|
+
* explanation.to_s -> string
|
319
|
+
*
|
320
|
+
* Returns a string representation of the explanation in readable format.
|
321
|
+
*/
|
322
|
+
static VALUE
|
323
|
+
frt_expl_to_s(VALUE self)
|
324
|
+
{
|
325
|
+
GET_EXPL();
|
326
|
+
char *str = expl_to_s(expl);
|
327
|
+
VALUE rstr = rb_str_new2(str);
|
328
|
+
free(str);
|
329
|
+
return rstr;
|
330
|
+
}
|
331
|
+
|
332
|
+
/*
|
333
|
+
* call-seq:
|
334
|
+
* explanation.to_html -> string
|
335
|
+
*
|
336
|
+
* Returns an html representation of the explanation in readable format.
|
337
|
+
*/
|
338
|
+
static VALUE
|
339
|
+
frt_expl_to_html(VALUE self)
|
340
|
+
{
|
341
|
+
GET_EXPL();
|
342
|
+
char *str = expl_to_html(expl);
|
343
|
+
VALUE rstr = rb_str_new2(str);
|
344
|
+
free(str);
|
345
|
+
return rstr;
|
346
|
+
}
|
347
|
+
|
348
|
+
/*
|
349
|
+
* call-seq:
|
350
|
+
* explanation.score -> float
|
351
|
+
*
|
352
|
+
* Returns the score represented by the query. This can be used for debugging
|
353
|
+
* purposes mainly to check that the score returned by the explanation
|
354
|
+
* matches that of the score for the document in the original query.
|
355
|
+
*/
|
356
|
+
static VALUE
|
357
|
+
frt_expl_score(VALUE self)
|
358
|
+
{
|
359
|
+
GET_EXPL();
|
360
|
+
return rb_float_new((double)expl->value);
|
361
|
+
}
|
362
|
+
|
363
|
+
/****************************************************************************
|
364
|
+
*
|
365
|
+
* Query Methods
|
366
|
+
*
|
367
|
+
****************************************************************************/
|
368
|
+
|
369
|
+
static void
|
370
|
+
frt_q_free(void *p)
|
371
|
+
{
|
372
|
+
object_del(p);
|
373
|
+
q_deref((Query *)p);
|
374
|
+
}
|
375
|
+
|
376
|
+
#define GET_Q() Query *q = (Query *)DATA_PTR(self)
|
377
|
+
|
378
|
+
/*
|
379
|
+
* call-seq:
|
380
|
+
* query.to_s -> string
|
381
|
+
*
|
382
|
+
* Return a string representation of the query. Most of the time, passing
|
383
|
+
* this string through the Query parser will give you the exact Query you
|
384
|
+
* began with. This can be a good way to explore how the QueryParser works.
|
385
|
+
*/
|
386
|
+
static VALUE
|
387
|
+
frt_q_to_s(int argc, VALUE *argv, VALUE self)
|
388
|
+
{
|
389
|
+
GET_Q();
|
390
|
+
VALUE rstr, rfield;
|
391
|
+
char *str, *field = "";
|
392
|
+
if (rb_scan_args(argc, argv, "01", &rfield)) {
|
393
|
+
field = frt_field(rfield);
|
394
|
+
}
|
395
|
+
str = q->to_s(q, field);
|
396
|
+
rstr = rb_str_new2(str);
|
397
|
+
free(str);
|
398
|
+
return rstr;
|
399
|
+
}
|
400
|
+
|
401
|
+
/*
|
402
|
+
* call-seq:
|
403
|
+
* query.boost
|
404
|
+
*
|
405
|
+
* Returns the queries boost value. See the Query description for more
|
406
|
+
* information on Query boosts.
|
407
|
+
*/
|
408
|
+
static VALUE
|
409
|
+
frt_q_get_boost(VALUE self)
|
410
|
+
{
|
411
|
+
GET_Q();
|
412
|
+
return rb_float_new((double)q->boost);
|
413
|
+
}
|
414
|
+
|
415
|
+
/*
|
416
|
+
* call-seq:
|
417
|
+
* query.boost = boost -> boost
|
418
|
+
*
|
419
|
+
* Set the boost for a query. See the Query description for more information
|
420
|
+
* on Query boosts.
|
421
|
+
*/
|
422
|
+
static VALUE
|
423
|
+
frt_q_set_boost(VALUE self, VALUE rboost)
|
424
|
+
{
|
425
|
+
GET_Q();
|
426
|
+
q->boost = (float)NUM2DBL(rboost);
|
427
|
+
return rboost;
|
428
|
+
}
|
429
|
+
|
430
|
+
/*
|
431
|
+
* call-seq:
|
432
|
+
* query.hash -> number
|
433
|
+
*
|
434
|
+
* Return a hash value for the query. This is used for caching query results
|
435
|
+
* in a hash object.
|
436
|
+
*/
|
437
|
+
static VALUE
|
438
|
+
frt_q_hash(VALUE self)
|
439
|
+
{
|
440
|
+
GET_Q();
|
441
|
+
return INT2FIX(q->hash(q));
|
442
|
+
}
|
443
|
+
|
444
|
+
/*
|
445
|
+
* call-seq;
|
446
|
+
* query.eql?(other_query) -> bool
|
447
|
+
* query == other_query -> bool
|
448
|
+
*
|
449
|
+
* Return true if +query+ equals +other_query+. Theoretically, two queries are
|
450
|
+
* equal if the always return the same results, no matter what the contents
|
451
|
+
* of the index. Practically, however, this is difficult to implement
|
452
|
+
* efficiently for queries like BooleanQuery since the ordering of clauses
|
453
|
+
* unspecified. "Ruby AND Rails" will not match "Rails AND Ruby" for example,
|
454
|
+
* although their result sets will be identical. Most queries should match as
|
455
|
+
* expected however.
|
456
|
+
*/
|
457
|
+
static VALUE
|
458
|
+
frt_q_eql(VALUE self, VALUE other)
|
459
|
+
{
|
460
|
+
GET_Q();
|
461
|
+
Query *oq;
|
462
|
+
Data_Get_Struct(other, Query, oq);
|
463
|
+
return q->eq(q, oq) ? Qtrue : Qfalse;
|
464
|
+
}
|
465
|
+
|
466
|
+
/*
|
467
|
+
* call-seq:
|
468
|
+
* query.terms(searcher) -> term_array
|
469
|
+
*
|
470
|
+
* Returns an array of terms searched for by this query. This can be used for
|
471
|
+
* implementing an external query highlighter for example. You must supply a
|
472
|
+
* searcher so that the query can be rewritten and optimized like it would be
|
473
|
+
* in a real search.
|
474
|
+
*/
|
475
|
+
static VALUE
|
476
|
+
frt_q_get_terms(VALUE self, VALUE searcher)
|
477
|
+
{
|
478
|
+
int i;
|
479
|
+
VALUE rterms = rb_ary_new();
|
480
|
+
HashSet *terms = term_set_new();
|
481
|
+
GET_Q();
|
482
|
+
Searcher *sea = (Searcher *)DATA_PTR(searcher);
|
483
|
+
Query *rq = sea->rewrite(sea, q);
|
484
|
+
rq->extract_terms(rq, terms);
|
485
|
+
q_deref(rq);
|
486
|
+
for (i = 0; i < terms->size; i++) {
|
487
|
+
Term *term = (Term *)terms->elems[i];
|
488
|
+
rb_ary_push(rterms, frt_get_term(term->field, term->text));
|
489
|
+
}
|
490
|
+
hs_destroy(terms);
|
491
|
+
return rterms;
|
492
|
+
}
|
493
|
+
|
494
|
+
#define MK_QUERY(klass, q) Data_Wrap_Struct(klass, NULL, &frt_q_free, q)
|
495
|
+
VALUE
|
496
|
+
frt_get_q(Query *q)
|
497
|
+
{
|
498
|
+
VALUE self = object_get(q);
|
499
|
+
|
500
|
+
if (self == Qnil) {
|
501
|
+
switch (q->type) {
|
502
|
+
case TERM_QUERY:
|
503
|
+
self = MK_QUERY(cTermQuery, q);
|
504
|
+
break;
|
505
|
+
case MULTI_TERM_QUERY:
|
506
|
+
self = MK_QUERY(cMultiTermQuery, q);
|
507
|
+
break;
|
508
|
+
case BOOLEAN_QUERY:
|
509
|
+
self = MK_QUERY(cBooleanQuery, q);
|
510
|
+
break;
|
511
|
+
case PHRASE_QUERY:
|
512
|
+
self = MK_QUERY(cPhraseQuery, q);
|
513
|
+
break;
|
514
|
+
case CONSTANT_QUERY:
|
515
|
+
self = MK_QUERY(cConstantScoreQuery, q);
|
516
|
+
break;
|
517
|
+
case FILTERED_QUERY:
|
518
|
+
self = MK_QUERY(cFilteredQuery, q);
|
519
|
+
break;
|
520
|
+
case MATCH_ALL_QUERY:
|
521
|
+
self = MK_QUERY(cMatchAllQuery, q);
|
522
|
+
break;
|
523
|
+
case RANGE_QUERY:
|
524
|
+
self = MK_QUERY(cRangeQuery, q);
|
525
|
+
break;
|
526
|
+
case WILD_CARD_QUERY:
|
527
|
+
self = MK_QUERY(cWildcardQuery, q);
|
528
|
+
break;
|
529
|
+
case FUZZY_QUERY:
|
530
|
+
self = MK_QUERY(cFuzzyQuery, q);
|
531
|
+
break;
|
532
|
+
case PREFIX_QUERY:
|
533
|
+
self = MK_QUERY(cPrefixQuery, q);
|
534
|
+
break;
|
535
|
+
case SPAN_TERM_QUERY:
|
536
|
+
self = MK_QUERY(cSpanMultiTermQuery, q);
|
537
|
+
break;
|
538
|
+
case SPAN_MULTI_TERM_QUERY:
|
539
|
+
self = MK_QUERY(cSpanPrefixQuery, q);
|
540
|
+
break;
|
541
|
+
case SPAN_PREFIX_QUERY:
|
542
|
+
self = MK_QUERY(cSpanTermQuery, q);
|
543
|
+
break;
|
544
|
+
case SPAN_FIRST_QUERY:
|
545
|
+
self = MK_QUERY(cSpanFirstQuery, q);
|
546
|
+
break;
|
547
|
+
case SPAN_OR_QUERY:
|
548
|
+
self = MK_QUERY(cSpanOrQuery, q);
|
549
|
+
break;
|
550
|
+
case SPAN_NOT_QUERY:
|
551
|
+
self = MK_QUERY(cSpanNotQuery, q);
|
552
|
+
break;
|
553
|
+
case SPAN_NEAR_QUERY:
|
554
|
+
self = MK_QUERY(cSpanNearQuery, q);
|
555
|
+
break;
|
556
|
+
default:
|
557
|
+
rb_raise(rb_eArgError, "Unknown query type");
|
558
|
+
break;
|
559
|
+
}
|
560
|
+
object_add(q, self);
|
561
|
+
}
|
562
|
+
return self;
|
563
|
+
}
|
564
|
+
|
565
|
+
/****************************************************************************
|
566
|
+
*
|
567
|
+
* TermQuery Methods
|
568
|
+
*
|
569
|
+
****************************************************************************/
|
570
|
+
|
571
|
+
/*
|
572
|
+
* call-seq:
|
573
|
+
* TermQuery.new(field, term) -> term_query
|
574
|
+
*
|
575
|
+
* Create a new TermQuery object which will match all documents with the term
|
576
|
+
* +term+ in the field +field+.
|
577
|
+
*
|
578
|
+
* Note: As usual, field should be a symbol
|
579
|
+
*/
|
580
|
+
static VALUE
|
581
|
+
frt_tq_init(VALUE self, VALUE rfield, VALUE rterm)
|
582
|
+
{
|
583
|
+
char *field = frt_field(rfield);
|
584
|
+
char *term = rs2s(rb_obj_as_string(rterm));
|
585
|
+
Query *q = tq_new(field, term);
|
586
|
+
Frt_Wrap_Struct(self, NULL, &frt_q_free, q);
|
587
|
+
object_add(q, self);
|
588
|
+
return self;
|
589
|
+
}
|
590
|
+
|
591
|
+
/****************************************************************************
|
592
|
+
*
|
593
|
+
* MultiTermQuery Methods
|
594
|
+
*
|
595
|
+
****************************************************************************/
|
596
|
+
|
597
|
+
/*
|
598
|
+
* call-seq:
|
599
|
+
* MultiTermQuery.default_max_terms -> number
|
600
|
+
*
|
601
|
+
* Get the default value for +:max_terms+ in a MultiTermQuery. This value is
|
602
|
+
* also used by PrefixQuery, FuzzyQuery and WildcardQuery.
|
603
|
+
*/
|
604
|
+
static VALUE
|
605
|
+
frt_mtq_get_dmt(VALUE self)
|
606
|
+
{
|
607
|
+
return rb_cvar_get(cMultiTermQuery, id_default_max_terms);
|
608
|
+
}
|
609
|
+
|
610
|
+
/*
|
611
|
+
* call-seq:
|
612
|
+
* MultiTermQuery.default_max_terms = max_terms -> max_terms
|
613
|
+
*
|
614
|
+
* Set the default value for +:max_terms+ in a MultiTermQuery. This value is
|
615
|
+
* also used by PrefixQuery, FuzzyQuery and WildcardQuery.
|
616
|
+
*/
|
617
|
+
static VALUE
|
618
|
+
frt_mtq_set_dmt(VALUE self, VALUE rnum_terms)
|
619
|
+
{
|
620
|
+
int max_terms = FIX2INT(rnum_terms);
|
621
|
+
if (max_terms <= 0) {
|
622
|
+
rb_raise(rb_eArgError,
|
623
|
+
"%d <= 0. @@max_terms must be > 0", max_terms);
|
624
|
+
}
|
625
|
+
rb_cvar_set(cMultiTermQuery, id_default_max_terms, rnum_terms);
|
626
|
+
return rnum_terms;
|
627
|
+
}
|
628
|
+
|
629
|
+
/*
|
630
|
+
* call-seq:
|
631
|
+
* MultiTermQuery.new(field, options = {}) -> multi_term_query
|
632
|
+
*
|
633
|
+
* Create a new MultiTermQuery on field +field+. You will also need to add
|
634
|
+
* terms to the query using the MultiTermQuery#add_term method.
|
635
|
+
*
|
636
|
+
* There are several options available to you when creating a
|
637
|
+
* MultiTermQueries;
|
638
|
+
*
|
639
|
+
* === Options
|
640
|
+
*
|
641
|
+
* :max_terms:: You can specify the maximum number of terms that can be
|
642
|
+
* added to the query. This is to prevent memory usage overflow,
|
643
|
+
* particularly when don't directly control the addition of
|
644
|
+
* terms to the Query object like when you create Wildcard
|
645
|
+
* queries. For example, searching for "content:*" would cause
|
646
|
+
* problems without this limit.
|
647
|
+
* :min_score:: The minimum score a term must have to be added to the query.
|
648
|
+
* For example you could implement your own wild-card queries
|
649
|
+
* that gives matches a score. To limit the number of terms
|
650
|
+
* added to the query you could set a lower limit to this score.
|
651
|
+
* FuzzyQuery in particular makes use of this parameter.
|
652
|
+
*/
|
653
|
+
static VALUE
|
654
|
+
frt_mtq_init(int argc, VALUE *argv, VALUE self)
|
655
|
+
{
|
656
|
+
VALUE rfield, roptions;
|
657
|
+
float min_score = 0.0;
|
658
|
+
int max_terms = FIX2INT(frt_mtq_get_dmt(self));
|
659
|
+
Query *q;
|
660
|
+
|
661
|
+
if (rb_scan_args(argc, argv, "11", &rfield, &roptions) == 2) {
|
662
|
+
VALUE v;
|
663
|
+
if (Qnil != (v = rb_hash_aref(roptions, sym_max_terms))) {
|
664
|
+
max_terms = FIX2INT(v);
|
665
|
+
}
|
666
|
+
if (Qnil != (v = rb_hash_aref(roptions, sym_min_score))) {
|
667
|
+
min_score = (float)NUM2DBL(v);
|
668
|
+
}
|
669
|
+
}
|
670
|
+
q = multi_tq_new_conf(frt_field(rfield), max_terms, min_score);
|
671
|
+
Frt_Wrap_Struct(self, NULL, &frt_q_free, q);
|
672
|
+
object_add(q, self);
|
673
|
+
return self;
|
674
|
+
}
|
675
|
+
|
676
|
+
/*
|
677
|
+
* call-seq:
|
678
|
+
* multi_term_query.add_term(term, score = 1.0) -> self
|
679
|
+
* multi_term_query << term1 << term2 << term3 -> self
|
680
|
+
*
|
681
|
+
* Add a term to the MultiTermQuery with the score 1.0 unless specified
|
682
|
+
* otherwise.
|
683
|
+
*/
|
684
|
+
static VALUE
|
685
|
+
frt_mtq_add_term(int argc, VALUE *argv, VALUE self)
|
686
|
+
{
|
687
|
+
GET_Q();
|
688
|
+
VALUE rterm, rboost;
|
689
|
+
float boost = 1.0;
|
690
|
+
char *term = NULL;
|
691
|
+
if (rb_scan_args(argc, argv, "11", &rterm, &rboost) == 2) {
|
692
|
+
boost = (float)NUM2DBL(rboost);
|
693
|
+
}
|
694
|
+
term = StringValuePtr(rterm);
|
695
|
+
multi_tq_add_term_boost(q, term, boost);
|
696
|
+
|
697
|
+
return self;
|
698
|
+
}
|
699
|
+
|
700
|
+
typedef Query *(*mtq_maker_ft)(const char *field, const char *term);
|
701
|
+
|
702
|
+
static VALUE
|
703
|
+
frt_mtq_init_specific(int argc, VALUE *argv, VALUE self, mtq_maker_ft mm)
|
704
|
+
{
|
705
|
+
VALUE rfield, rterm, rmax_terms;
|
706
|
+
int max_terms =
|
707
|
+
FIX2INT(rb_cvar_get(cMultiTermQuery, id_default_max_terms));
|
708
|
+
Query *q;
|
709
|
+
|
710
|
+
if (rb_scan_args(argc, argv, "21", &rfield, &rterm, &rmax_terms) == 3) {
|
711
|
+
max_terms = FIX2INT(rmax_terms);
|
712
|
+
}
|
713
|
+
|
714
|
+
q = (*mm)(frt_field(rfield), StringValuePtr(rterm));
|
715
|
+
MTQMaxTerms(q) = max_terms;
|
716
|
+
Frt_Wrap_Struct(self, NULL, &frt_q_free, q);
|
717
|
+
object_add(q, self);
|
718
|
+
return self;
|
719
|
+
}
|
720
|
+
|
721
|
+
/****************************************************************************
|
722
|
+
*
|
723
|
+
* BooleanClause Methods
|
724
|
+
*
|
725
|
+
****************************************************************************/
|
726
|
+
|
727
|
+
static void
|
728
|
+
frt_bc_mark(void *p)
|
729
|
+
{
|
730
|
+
frt_gc_mark(((BooleanClause *)p)->query);
|
731
|
+
}
|
732
|
+
|
733
|
+
static void
|
734
|
+
frt_bc_free(void *p)
|
735
|
+
{
|
736
|
+
object_del(p);
|
737
|
+
bc_deref((BooleanClause *)p);
|
738
|
+
}
|
739
|
+
|
740
|
+
static VALUE
|
741
|
+
frt_bc_wrap(BooleanClause *bc)
|
742
|
+
{
|
743
|
+
VALUE self = Data_Wrap_Struct(cBooleanClause, &frt_bc_mark, &frt_bc_free, bc);
|
744
|
+
REF(bc);
|
745
|
+
object_add(bc, self);
|
746
|
+
return self;
|
747
|
+
}
|
748
|
+
|
749
|
+
static enum BC_TYPE
|
750
|
+
frt_get_occur(VALUE roccur)
|
751
|
+
{
|
752
|
+
enum BC_TYPE occur = BC_SHOULD;
|
753
|
+
|
754
|
+
if (roccur == sym_should) {
|
755
|
+
occur = BC_SHOULD;
|
756
|
+
} else if (roccur == sym_must) {
|
757
|
+
occur = BC_MUST;
|
758
|
+
} else if (roccur == sym_must_not) {
|
759
|
+
occur = BC_MUST_NOT;
|
760
|
+
} else {
|
761
|
+
rb_raise(rb_eArgError, "occur argument must be one of [:must, "
|
762
|
+
":should, :must_not]");
|
763
|
+
}
|
764
|
+
return occur;
|
765
|
+
}
|
766
|
+
|
767
|
+
/*
|
768
|
+
* call-seq:
|
769
|
+
* BooleanClause.new(query, occur = :should) -> BooleanClause
|
770
|
+
*
|
771
|
+
* Create a new BooleanClause object, wrapping the query +query+. +occur+
|
772
|
+
* must be one of +:must+, +:should+ or +:must_not+.
|
773
|
+
*/
|
774
|
+
static VALUE
|
775
|
+
frt_bc_init(int argc, VALUE *argv, VALUE self)
|
776
|
+
{
|
777
|
+
BooleanClause *bc;
|
778
|
+
VALUE rquery, roccur;
|
779
|
+
unsigned int occur = BC_SHOULD;
|
780
|
+
Query *sub_q;
|
781
|
+
if (rb_scan_args(argc, argv, "11", &rquery, &roccur) == 2) {
|
782
|
+
occur = frt_get_occur(roccur);
|
783
|
+
}
|
784
|
+
Data_Get_Struct(rquery, Query, sub_q);
|
785
|
+
REF(sub_q);
|
786
|
+
bc = bc_new(sub_q, occur);
|
787
|
+
Frt_Wrap_Struct(self, &frt_bc_mark, &frt_bc_free, bc);
|
788
|
+
object_add(bc, self);
|
789
|
+
return self;
|
790
|
+
}
|
791
|
+
|
792
|
+
#define GET_BC() BooleanClause *bc = (BooleanClause *)DATA_PTR(self)
|
793
|
+
/*
|
794
|
+
* call-seq:
|
795
|
+
* clause.query -> query
|
796
|
+
*
|
797
|
+
* Return the query object wrapped by this BooleanClause.
|
798
|
+
*/
|
799
|
+
static VALUE
|
800
|
+
frt_bc_get_query(VALUE self)
|
801
|
+
{
|
802
|
+
GET_BC();
|
803
|
+
return object_get(bc->query);
|
804
|
+
}
|
805
|
+
|
806
|
+
/*
|
807
|
+
* call-seq:
|
808
|
+
* clause.query = query -> query
|
809
|
+
*
|
810
|
+
* Set the query wrapped by this BooleanClause.
|
811
|
+
*/
|
812
|
+
static VALUE
|
813
|
+
frt_bc_set_query(VALUE self, VALUE rquery)
|
814
|
+
{
|
815
|
+
GET_BC();
|
816
|
+
Data_Get_Struct(rquery, Query, bc->query);
|
817
|
+
return rquery;
|
818
|
+
}
|
819
|
+
|
820
|
+
/*
|
821
|
+
* call-seq:
|
822
|
+
* clause.required? -> bool
|
823
|
+
*
|
824
|
+
* Return true if this clause is required. ie, this will be true if occur was
|
825
|
+
* equal to +:must+.
|
826
|
+
*/
|
827
|
+
static VALUE
|
828
|
+
frt_bc_is_required(VALUE self)
|
829
|
+
{
|
830
|
+
GET_BC();
|
831
|
+
return bc->is_required ? Qtrue : Qfalse;
|
832
|
+
}
|
833
|
+
|
834
|
+
/*
|
835
|
+
* call-seq:
|
836
|
+
* clause.prohibited? -> bool
|
837
|
+
*
|
838
|
+
* Return true if this clause is prohibited. ie, this will be true if occur was
|
839
|
+
* equal to +:must_not+.
|
840
|
+
*/
|
841
|
+
static VALUE
|
842
|
+
frt_bc_is_prohibited(VALUE self)
|
843
|
+
{
|
844
|
+
GET_BC();
|
845
|
+
return bc->is_prohibited ? Qtrue : Qfalse;
|
846
|
+
}
|
847
|
+
|
848
|
+
/*
|
849
|
+
* call-seq:
|
850
|
+
* clause.occur = occur -> occur
|
851
|
+
*
|
852
|
+
* Set the +occur+ value for this BooleanClause. +occur+ must be one of
|
853
|
+
* +:must+, +:should+ or +:must_not+.
|
854
|
+
*/
|
855
|
+
static VALUE
|
856
|
+
frt_bc_set_occur(VALUE self, VALUE roccur)
|
857
|
+
{
|
858
|
+
GET_BC();
|
859
|
+
enum BC_TYPE occur = frt_get_occur(roccur);
|
860
|
+
bc_set_occur(bc, occur);
|
861
|
+
|
862
|
+
return roccur;
|
863
|
+
}
|
864
|
+
|
865
|
+
/*
|
866
|
+
* call-seq:
|
867
|
+
* clause.to_s -> string
|
868
|
+
*
|
869
|
+
* Return a string representation of this clause. This will not be used by
|
870
|
+
* BooleanQuery#to_s. It is only used by BooleanClause#to_s and will specify
|
871
|
+
* whether the clause is +:must+, +:should+ or +:must_not+.
|
872
|
+
*/
|
873
|
+
static VALUE
|
874
|
+
frt_bc_to_s(VALUE self)
|
875
|
+
{
|
876
|
+
VALUE rstr;
|
877
|
+
char *qstr, *ostr = "", *str;
|
878
|
+
int len;
|
879
|
+
GET_BC();
|
880
|
+
qstr = bc->query->to_s(bc->query, "");
|
881
|
+
switch (bc->occur) {
|
882
|
+
case BC_SHOULD:
|
883
|
+
ostr = "Should";
|
884
|
+
break;
|
885
|
+
case BC_MUST:
|
886
|
+
ostr = "Must";
|
887
|
+
break;
|
888
|
+
case BC_MUST_NOT:
|
889
|
+
ostr = "Must Not";
|
890
|
+
break;
|
891
|
+
}
|
892
|
+
len = strlen(ostr) + strlen(qstr) + 2;
|
893
|
+
str = ALLOC_N(char, len);
|
894
|
+
sprintf(str, "%s:%s", ostr, qstr);
|
895
|
+
rstr = rb_str_new(str, len);
|
896
|
+
free(qstr);
|
897
|
+
free(str);
|
898
|
+
return rstr;
|
899
|
+
}
|
900
|
+
|
901
|
+
/****************************************************************************
|
902
|
+
*
|
903
|
+
* BooleanQuery Methods
|
904
|
+
*
|
905
|
+
****************************************************************************/
|
906
|
+
|
907
|
+
static void
|
908
|
+
frt_bq_mark(void *p)
|
909
|
+
{
|
910
|
+
int i;
|
911
|
+
Query *q = (Query *)p;
|
912
|
+
BooleanQuery *bq = (BooleanQuery *)q;
|
913
|
+
for (i = 0; i < bq->clause_cnt; i++) {
|
914
|
+
frt_gc_mark(bq->clauses[i]);
|
915
|
+
}
|
916
|
+
}
|
917
|
+
|
918
|
+
/*
|
919
|
+
* call-seq:
|
920
|
+
* BooleanQuery.new(coord_disable = false)
|
921
|
+
*
|
922
|
+
* Create a new BooleanQuery. If you don't care about the scores of the
|
923
|
+
* sub-queries added to the query (as would be the case for many
|
924
|
+
* automatically generated queries) you can disable the coord_factor of the
|
925
|
+
* score. This will slightly improve performance for the query. Usually you
|
926
|
+
* should leave this parameter as is.
|
927
|
+
*/
|
928
|
+
static VALUE
|
929
|
+
frt_bq_init(int argc, VALUE *argv, VALUE self)
|
930
|
+
{
|
931
|
+
VALUE rcoord_disabled;
|
932
|
+
bool coord_disabled = false;
|
933
|
+
Query *q;
|
934
|
+
if (rb_scan_args(argc, argv, "01", &rcoord_disabled)) {
|
935
|
+
coord_disabled = RTEST(rcoord_disabled);
|
936
|
+
}
|
937
|
+
q = bq_new(coord_disabled);
|
938
|
+
Frt_Wrap_Struct(self, &frt_bq_mark, &frt_q_free, q);
|
939
|
+
object_add(q, self);
|
940
|
+
return self;
|
941
|
+
}
|
942
|
+
|
943
|
+
/*
|
944
|
+
* call-seq:
|
945
|
+
* boolean_query.add_query(query, occur = :should) -> boolean_clause
|
946
|
+
* boolean_query.<<(query, occur = :should) -> boolean_clause
|
947
|
+
* boolean_query << boolean_clause -> boolean_clause
|
948
|
+
*
|
949
|
+
* Us this method to add sub-queries to a BooleanQuery. You can either add
|
950
|
+
* a straight Query or a BooleanClause. When adding a Query, the default
|
951
|
+
* occurrence requirement is :should. That is the Query's match will be
|
952
|
+
* scored but it isn't essential for a match. If the query should be
|
953
|
+
* essential, use :must. For exclusive queries use :must_not.
|
954
|
+
*
|
955
|
+
* When adding a Boolean clause to a BooleanQuery there is no need to set the
|
956
|
+
* occurrence property because it is already set in the BooleanClause.
|
957
|
+
* Therefor the +occur+ parameter will be ignored in this case.
|
958
|
+
*
|
959
|
+
* query:: Query to add to the BooleanQuery
|
960
|
+
* occur:: occurrence requirement for the query being added. Must be one of
|
961
|
+
* [:must, :should, :must_not]
|
962
|
+
* returns:: BooleanClause which was added
|
963
|
+
*/
|
964
|
+
static VALUE
|
965
|
+
frt_bq_add_query(int argc, VALUE *argv, VALUE self)
|
966
|
+
{
|
967
|
+
GET_Q();
|
968
|
+
VALUE rquery, roccur;
|
969
|
+
enum BC_TYPE occur = BC_SHOULD;
|
970
|
+
Query *sub_q;
|
971
|
+
VALUE klass;
|
972
|
+
|
973
|
+
if (rb_scan_args(argc, argv, "11", &rquery, &roccur) == 2) {
|
974
|
+
occur = frt_get_occur(roccur);
|
975
|
+
}
|
976
|
+
klass = CLASS_OF(rquery);
|
977
|
+
if (klass == cBooleanClause) {
|
978
|
+
BooleanClause *bc = (BooleanClause *)DATA_PTR(rquery);
|
979
|
+
if (argc > 1) {
|
980
|
+
rb_warning("Second argument to BooleanQuery#add is ignored "
|
981
|
+
"when adding BooleanClause");
|
982
|
+
}
|
983
|
+
bq_add_clause(q, bc);
|
984
|
+
return rquery;
|
985
|
+
} else if (TYPE(rquery) == T_DATA) {
|
986
|
+
Data_Get_Struct(rquery, Query, sub_q);
|
987
|
+
return frt_bc_wrap(bq_add_query(q, sub_q, occur));
|
988
|
+
} else {
|
989
|
+
rb_raise(rb_eArgError, "Cannot add %s to a BooleanQuery",
|
990
|
+
rb_class2name(klass));
|
991
|
+
}
|
992
|
+
return self;
|
993
|
+
}
|
994
|
+
|
995
|
+
/****************************************************************************
|
996
|
+
*
|
997
|
+
* RangeQuery Methods
|
998
|
+
*
|
999
|
+
****************************************************************************/
|
1000
|
+
|
1001
|
+
static void
|
1002
|
+
get_range_params(VALUE roptions, char **lterm, char **uterm,
|
1003
|
+
bool *include_lower, bool *include_upper)
|
1004
|
+
{
|
1005
|
+
VALUE v;
|
1006
|
+
Check_Type(roptions, T_HASH);
|
1007
|
+
if (Qnil != (v = rb_hash_aref(roptions, sym_lower))) {
|
1008
|
+
*lterm = StringValuePtr(v);
|
1009
|
+
*include_lower = true;
|
1010
|
+
}
|
1011
|
+
if (Qnil != (v = rb_hash_aref(roptions, sym_upper))) {
|
1012
|
+
*uterm = StringValuePtr(v);
|
1013
|
+
*include_upper = true;
|
1014
|
+
}
|
1015
|
+
if (Qnil != (v = rb_hash_aref(roptions, sym_lower_exclusive))) {
|
1016
|
+
*lterm = StringValuePtr(v);
|
1017
|
+
*include_lower = false;
|
1018
|
+
}
|
1019
|
+
if (Qnil != (v = rb_hash_aref(roptions, sym_upper_exclusive))) {
|
1020
|
+
*uterm = StringValuePtr(v);
|
1021
|
+
*include_upper = false;
|
1022
|
+
}
|
1023
|
+
if (Qnil != (v = rb_hash_aref(roptions, sym_include_lower))) {
|
1024
|
+
*include_lower = RTEST(v);
|
1025
|
+
}
|
1026
|
+
if (Qnil != (v = rb_hash_aref(roptions, sym_include_upper))) {
|
1027
|
+
*include_upper = RTEST(v);
|
1028
|
+
}
|
1029
|
+
if (Qnil != (v = rb_hash_aref(roptions, sym_greater_than))) {
|
1030
|
+
*lterm = StringValuePtr(v);
|
1031
|
+
*include_lower = false;
|
1032
|
+
}
|
1033
|
+
if (Qnil != (v = rb_hash_aref(roptions, sym_greater_than_or_equal_to))) {
|
1034
|
+
*lterm = StringValuePtr(v);
|
1035
|
+
*include_lower = true;
|
1036
|
+
}
|
1037
|
+
if (Qnil != (v = rb_hash_aref(roptions, sym_less_than))) {
|
1038
|
+
*uterm = StringValuePtr(v);
|
1039
|
+
*include_upper = false;
|
1040
|
+
}
|
1041
|
+
if (Qnil != (v = rb_hash_aref(roptions, sym_less_than_or_equal_to))) {
|
1042
|
+
*uterm = StringValuePtr(v);
|
1043
|
+
*include_upper = true;
|
1044
|
+
}
|
1045
|
+
if (!*lterm && !*uterm) {
|
1046
|
+
rb_raise(rb_eArgError,
|
1047
|
+
"The bounds of a range should not both be nil");
|
1048
|
+
}
|
1049
|
+
if (*include_lower && !*lterm) {
|
1050
|
+
rb_raise(rb_eArgError,
|
1051
|
+
"The lower bound should not be nil if it is inclusive");
|
1052
|
+
}
|
1053
|
+
if (*include_upper && !*uterm) {
|
1054
|
+
rb_raise(rb_eArgError,
|
1055
|
+
"The upper bound should not be nil if it is inclusive");
|
1056
|
+
}
|
1057
|
+
if (*uterm && *lterm && (strcmp(*uterm, *lterm) < 0)) {
|
1058
|
+
rb_raise(rb_eArgError,
|
1059
|
+
"The upper bound should greater than the lower bound."
|
1060
|
+
" %s > %s", *lterm, *uterm);
|
1061
|
+
}
|
1062
|
+
}
|
1063
|
+
|
1064
|
+
/*
|
1065
|
+
* call-seq:
|
1066
|
+
* RangeQuery.new(field, options = {}) -> range_query
|
1067
|
+
*
|
1068
|
+
* Create a new RangeQuery on field +field+. There are two ways to build a
|
1069
|
+
* range query. With the old-style options; +:lower+, +:upper+,
|
1070
|
+
* +:include_lower+ and +:include_upper+ or the new style options; +:<+,
|
1071
|
+
* +:<=+, +:>+ and +:>=+. The options' names should speak for themselves.
|
1072
|
+
* In the old-style options, limits are inclusive by default.
|
1073
|
+
*
|
1074
|
+
* == Examples
|
1075
|
+
*
|
1076
|
+
* q = RangeQuery.new(:date, :lower => "200501", :include_lower => false)
|
1077
|
+
* # is equivalent to
|
1078
|
+
* q = RangeQuery.new(:date, :< => "200501")
|
1079
|
+
* # is equivalent to
|
1080
|
+
* q = RangeQuery.new(:date, :lower_exclusive => "200501")
|
1081
|
+
*
|
1082
|
+
* q = RangeQuery.new(:date, :lower => "200501", :upper => 200502)
|
1083
|
+
* # is equivalent to
|
1084
|
+
* q = RangeQuery.new(:date, :>= => "200501", :<= => 200502)
|
1085
|
+
*/
|
1086
|
+
static VALUE
|
1087
|
+
frt_rq_init(VALUE self, VALUE rfield, VALUE roptions)
|
1088
|
+
{
|
1089
|
+
Query *q;
|
1090
|
+
char *lterm = NULL;
|
1091
|
+
char *uterm = NULL;
|
1092
|
+
bool include_lower = false;
|
1093
|
+
bool include_upper = false;
|
1094
|
+
|
1095
|
+
get_range_params(roptions, <erm, &uterm, &include_lower, &include_upper);
|
1096
|
+
q = rq_new(frt_field(rfield),
|
1097
|
+
lterm, uterm,
|
1098
|
+
include_lower, include_upper);
|
1099
|
+
Frt_Wrap_Struct(self, NULL, &frt_q_free, q);
|
1100
|
+
object_add(q, self);
|
1101
|
+
return self;
|
1102
|
+
}
|
1103
|
+
|
1104
|
+
/****************************************************************************
|
1105
|
+
*
|
1106
|
+
* PhraseQuery Methods
|
1107
|
+
*
|
1108
|
+
****************************************************************************/
|
1109
|
+
|
1110
|
+
/*
|
1111
|
+
* call-seq:
|
1112
|
+
* PhraseQuery.new(field, slop = 0) -> phrase_query
|
1113
|
+
*
|
1114
|
+
* Create a new PhraseQuery on the field +field+. You need to add terms to
|
1115
|
+
* the query it will do anything of value. See PhraseQuery#add_term.
|
1116
|
+
*/
|
1117
|
+
static VALUE
|
1118
|
+
frt_phq_init(int argc, VALUE *argv, VALUE self)
|
1119
|
+
{
|
1120
|
+
VALUE rfield, rslop;
|
1121
|
+
Query *q;
|
1122
|
+
rb_scan_args(argc, argv, "11", &rfield, &rslop);
|
1123
|
+
q = phq_new(frt_field(rfield));
|
1124
|
+
if (argc == 2) {
|
1125
|
+
((PhraseQuery *)q)->slop = FIX2INT(rslop);
|
1126
|
+
}
|
1127
|
+
Frt_Wrap_Struct(self, NULL, &frt_q_free, q);
|
1128
|
+
object_add(q, self);
|
1129
|
+
return self;
|
1130
|
+
}
|
1131
|
+
|
1132
|
+
/*
|
1133
|
+
* call-seq:
|
1134
|
+
* phrase_query.add_term(term, position_increment = 1) -> phrase_query
|
1135
|
+
* phrase_query << term -> phrase_query
|
1136
|
+
*
|
1137
|
+
* Add a term to the phrase query. By default the position_increment is set
|
1138
|
+
* to 1 so each term you add is expected to come directly after the previous
|
1139
|
+
* term. By setting position_increment to 2 you are specifying that the term
|
1140
|
+
* you just added should occur two terms after the previous term. For
|
1141
|
+
* example;
|
1142
|
+
*
|
1143
|
+
* phrase_query.add_term("big").add_term("house", 2)
|
1144
|
+
* # matches => "big brick house"
|
1145
|
+
* # matches => "big red house"
|
1146
|
+
* # doesn't match => "big house"
|
1147
|
+
*/
|
1148
|
+
static VALUE
|
1149
|
+
frt_phq_add(int argc, VALUE *argv, VALUE self)
|
1150
|
+
{
|
1151
|
+
VALUE rterm, rpos_inc;
|
1152
|
+
int pos_inc = 1;
|
1153
|
+
GET_Q();
|
1154
|
+
if (rb_scan_args(argc, argv, "11", &rterm, &rpos_inc) == 2) {
|
1155
|
+
pos_inc = FIX2INT(rpos_inc);
|
1156
|
+
}
|
1157
|
+
switch (TYPE(rterm)) {
|
1158
|
+
case T_STRING:
|
1159
|
+
{
|
1160
|
+
phq_add_term(q, StringValuePtr(rterm), pos_inc);
|
1161
|
+
break;
|
1162
|
+
}
|
1163
|
+
case T_ARRAY:
|
1164
|
+
{
|
1165
|
+
int i;
|
1166
|
+
char *t;
|
1167
|
+
if (RARRAY_LEN(rterm) < 1) {
|
1168
|
+
rb_raise(rb_eArgError, "Cannot add empty array to a "
|
1169
|
+
"PhraseQuery. You must add either a string or "
|
1170
|
+
"an array of strings");
|
1171
|
+
}
|
1172
|
+
t = StringValuePtr(RARRAY_PTR(rterm)[0]);
|
1173
|
+
phq_add_term(q, t, pos_inc);
|
1174
|
+
for (i = 1; i < RARRAY_LEN(rterm); i++) {
|
1175
|
+
t = StringValuePtr(RARRAY_PTR(rterm)[i]);
|
1176
|
+
phq_append_multi_term(q, t);
|
1177
|
+
}
|
1178
|
+
break;
|
1179
|
+
}
|
1180
|
+
default:
|
1181
|
+
rb_raise(rb_eArgError, "You can only add a string or an array of "
|
1182
|
+
"strings to a PhraseQuery, not a %s\n",
|
1183
|
+
rs2s(rb_obj_as_string(rterm)));
|
1184
|
+
}
|
1185
|
+
return self;
|
1186
|
+
}
|
1187
|
+
|
1188
|
+
/*
|
1189
|
+
* call-seq:
|
1190
|
+
* phrase_query.slop -> integer
|
1191
|
+
*
|
1192
|
+
* Return the slop set for this phrase query. See the PhraseQuery
|
1193
|
+
* description for more information on slop
|
1194
|
+
*/
|
1195
|
+
static VALUE
|
1196
|
+
frt_phq_get_slop(VALUE self)
|
1197
|
+
{
|
1198
|
+
GET_Q();
|
1199
|
+
return INT2FIX(((PhraseQuery *)q)->slop);
|
1200
|
+
}
|
1201
|
+
|
1202
|
+
/*
|
1203
|
+
* call-seq:
|
1204
|
+
* phrase_query.slop = slop -> slop
|
1205
|
+
*
|
1206
|
+
* Set the slop set for this phrase query. See the PhraseQuery description
|
1207
|
+
* for more information on slop
|
1208
|
+
*/
|
1209
|
+
static VALUE
|
1210
|
+
frt_phq_set_slop(VALUE self, VALUE rslop)
|
1211
|
+
{
|
1212
|
+
GET_Q();
|
1213
|
+
((PhraseQuery *)q)->slop = FIX2INT(rslop);
|
1214
|
+
return self;
|
1215
|
+
}
|
1216
|
+
|
1217
|
+
/****************************************************************************
|
1218
|
+
*
|
1219
|
+
* PrefixQuery Methods
|
1220
|
+
*
|
1221
|
+
****************************************************************************/
|
1222
|
+
|
1223
|
+
/*
|
1224
|
+
* call-seq:
|
1225
|
+
* PrefixQuery.new(field, prefix, options = {}) -> prefix-query
|
1226
|
+
*
|
1227
|
+
* Create a new PrefixQuery to search for all terms with the prefix +prefix+
|
1228
|
+
* in the field +field+. There is one option that you can set to change the
|
1229
|
+
* behaviour of this query. +:max_terms+ specifies the maximum number of
|
1230
|
+
* terms to be added to the query when it is expanded into a MultiTermQuery.
|
1231
|
+
* Let's say for example you search an index with a million terms for all
|
1232
|
+
* terms beginning with the letter "s". You would end up with a very large
|
1233
|
+
* query which would use a lot of memory and take a long time to get results,
|
1234
|
+
* not to mention that it would probably match every document in the index.
|
1235
|
+
* To prevent queries like this crashing your application you can set
|
1236
|
+
* +:max_terms+ which limits the number of terms that get added to the query.
|
1237
|
+
* By default it is set to 512.
|
1238
|
+
*/
|
1239
|
+
static VALUE
|
1240
|
+
frt_prq_init(int argc, VALUE *argv, VALUE self)
|
1241
|
+
{
|
1242
|
+
return frt_mtq_init_specific(argc, argv, self, &prefixq_new);
|
1243
|
+
}
|
1244
|
+
|
1245
|
+
/****************************************************************************
|
1246
|
+
*
|
1247
|
+
* WildcardQuery Methods
|
1248
|
+
*
|
1249
|
+
****************************************************************************/
|
1250
|
+
|
1251
|
+
/*
|
1252
|
+
* call-seq:
|
1253
|
+
* WildcardQuery.new(field, pattern, options = {}) -> wild-card-query
|
1254
|
+
*
|
1255
|
+
* Create a new WildcardQuery to search for all terms where the pattern
|
1256
|
+
* +pattern+ matches in the field +field+.
|
1257
|
+
*
|
1258
|
+
* There is one option that you can set to change the behaviour of this
|
1259
|
+
* query. +:max_terms+ specifies the maximum number of terms to be added to
|
1260
|
+
* the query when it is expanded into a MultiTermQuery. Let's say for
|
1261
|
+
* example you have a million terms in your index and you let your users do
|
1262
|
+
* wild-card queries and one runs a search for "*". You would end up with a
|
1263
|
+
* very large query which would use a lot of memory and take a long time to
|
1264
|
+
* get results, not to mention that it would probably match every document in
|
1265
|
+
* the index. To prevent queries like this crashing your application you can
|
1266
|
+
* set +:max_terms+ which limits the number of terms that get added to the
|
1267
|
+
* query. By default it is set to 512.
|
1268
|
+
*/
|
1269
|
+
static VALUE
|
1270
|
+
frt_wcq_init(int argc, VALUE *argv, VALUE self)
|
1271
|
+
{
|
1272
|
+
return frt_mtq_init_specific(argc, argv, self, &wcq_new);
|
1273
|
+
}
|
1274
|
+
|
1275
|
+
/****************************************************************************
|
1276
|
+
*
|
1277
|
+
* FuzzyQuery Methods
|
1278
|
+
*
|
1279
|
+
****************************************************************************/
|
1280
|
+
|
1281
|
+
/*
|
1282
|
+
* call-seq:
|
1283
|
+
* FuzzyQuery.new(field, term, options = {}) -> fuzzy-query
|
1284
|
+
*
|
1285
|
+
* Create a new FuzzyQuery that will match terms with a similarity of at
|
1286
|
+
* least +:min_similarity+ to +term+. Similarity is scored using the
|
1287
|
+
* Levenshtein edit distance formula. See
|
1288
|
+
* http://en.wikipedia.org/wiki/Levenshtein_distance
|
1289
|
+
*
|
1290
|
+
* If a +:prefix_length+ > 0 is specified, a common prefix of that length is
|
1291
|
+
* also required.
|
1292
|
+
*
|
1293
|
+
* You can also set +:max_terms+ to prevent memory overflow problems. By
|
1294
|
+
* default it is set to 512.
|
1295
|
+
*
|
1296
|
+
* == Example
|
1297
|
+
*
|
1298
|
+
* FuzzyQuery.new(:content, "levenshtein",
|
1299
|
+
* :min_similarity => 0.8,
|
1300
|
+
* :prefix_length => 5,
|
1301
|
+
* :max_terms => 1024)
|
1302
|
+
*
|
1303
|
+
* field:: field to search
|
1304
|
+
* term:: term to search for including it's close matches
|
1305
|
+
* :min_similarity:: Default: 0.5. minimum levenshtein distance score for a
|
1306
|
+
* match
|
1307
|
+
* :prefix_length:: Default: 0. minimum prefix_match before levenshtein
|
1308
|
+
* distance is measured. This parameter is used to improve
|
1309
|
+
* performance. With a +:prefix_length+ of 0, all terms in
|
1310
|
+
* the index must be checked which can be quite a
|
1311
|
+
* performance hit. By setting the prefix length to a
|
1312
|
+
* larger number you minimize the number of terms that need
|
1313
|
+
* to be checked. Even 1 will cut down the work by a
|
1314
|
+
* factor of about 26 depending on your character set and
|
1315
|
+
* the first letter.
|
1316
|
+
* :max_terms:: Limits the number of terms that can be added to the
|
1317
|
+
* query when it is expanded as a MultiTermQuery. This is
|
1318
|
+
* not usually a problem with FuzzyQueries unless you set
|
1319
|
+
* +:min_similarity+ to a very low value.
|
1320
|
+
*/
|
1321
|
+
static VALUE
|
1322
|
+
frt_fq_init(int argc, VALUE *argv, VALUE self)
|
1323
|
+
{
|
1324
|
+
Query *q;
|
1325
|
+
VALUE rfield, rterm, roptions;
|
1326
|
+
float min_sim =
|
1327
|
+
(float)NUM2DBL(rb_cvar_get(cFuzzyQuery, id_default_min_similarity));
|
1328
|
+
int pre_len =
|
1329
|
+
FIX2INT(rb_cvar_get(cFuzzyQuery, id_default_prefix_length));
|
1330
|
+
int max_terms =
|
1331
|
+
FIX2INT(rb_cvar_get(cMultiTermQuery, id_default_max_terms));
|
1332
|
+
|
1333
|
+
|
1334
|
+
if (rb_scan_args(argc, argv, "21", &rfield, &rterm, &roptions) >= 3) {
|
1335
|
+
VALUE v;
|
1336
|
+
Check_Type(roptions, T_HASH);
|
1337
|
+
if (Qnil != (v = rb_hash_aref(roptions, sym_prefix_length))) {
|
1338
|
+
pre_len = FIX2INT(v);
|
1339
|
+
}
|
1340
|
+
if (Qnil != (v = rb_hash_aref(roptions, sym_min_similarity))) {
|
1341
|
+
min_sim = (float)NUM2DBL(v);
|
1342
|
+
}
|
1343
|
+
if (Qnil != (v = rb_hash_aref(roptions, sym_max_terms))) {
|
1344
|
+
max_terms = FIX2INT(v);
|
1345
|
+
}
|
1346
|
+
}
|
1347
|
+
|
1348
|
+
if (min_sim >= 1.0) {
|
1349
|
+
rb_raise(rb_eArgError,
|
1350
|
+
"%f >= 1.0. :min_similarity must be < 1.0", min_sim);
|
1351
|
+
} else if (min_sim < 0.0) {
|
1352
|
+
rb_raise(rb_eArgError,
|
1353
|
+
"%f < 0.0. :min_similarity must be > 0.0", min_sim);
|
1354
|
+
}
|
1355
|
+
if (pre_len < 0) {
|
1356
|
+
rb_raise(rb_eArgError,
|
1357
|
+
"%d < 0. :prefix_length must be >= 0", pre_len);
|
1358
|
+
}
|
1359
|
+
if (max_terms < 0) {
|
1360
|
+
rb_raise(rb_eArgError,
|
1361
|
+
"%d < 0. :max_terms must be >= 0", max_terms);
|
1362
|
+
}
|
1363
|
+
|
1364
|
+
q = fuzq_new_conf(frt_field(rfield), StringValuePtr(rterm),
|
1365
|
+
min_sim, pre_len, max_terms);
|
1366
|
+
Frt_Wrap_Struct(self, NULL, &frt_q_free, q);
|
1367
|
+
object_add(q, self);
|
1368
|
+
return self;
|
1369
|
+
}
|
1370
|
+
|
1371
|
+
/*
|
1372
|
+
* call-seq:
|
1373
|
+
* FuzzyQuery.prefix_length -> prefix_length
|
1374
|
+
*
|
1375
|
+
* Get the +:prefix_length+ for the query.
|
1376
|
+
*/
|
1377
|
+
static VALUE
|
1378
|
+
frt_fq_pre_len(VALUE self)
|
1379
|
+
{
|
1380
|
+
GET_Q();
|
1381
|
+
return INT2FIX(((FuzzyQuery *)q)->pre_len);
|
1382
|
+
}
|
1383
|
+
|
1384
|
+
/*
|
1385
|
+
* call-seq:
|
1386
|
+
* FuzzyQuery.min_similarity -> min_similarity
|
1387
|
+
*
|
1388
|
+
* Get the +:min_similarity+ for the query.
|
1389
|
+
*/
|
1390
|
+
static VALUE
|
1391
|
+
frt_fq_min_sim(VALUE self)
|
1392
|
+
{
|
1393
|
+
GET_Q();
|
1394
|
+
return rb_float_new((double)((FuzzyQuery *)q)->min_sim);
|
1395
|
+
}
|
1396
|
+
|
1397
|
+
/*
|
1398
|
+
* call-seq:
|
1399
|
+
* FuzzyQuery.default_min_similarity -> number
|
1400
|
+
*
|
1401
|
+
* Get the default value for +:min_similarity+
|
1402
|
+
*/
|
1403
|
+
static VALUE
|
1404
|
+
frt_fq_get_dms(VALUE self)
|
1405
|
+
{
|
1406
|
+
return rb_cvar_get(cFuzzyQuery, id_default_min_similarity);
|
1407
|
+
}
|
1408
|
+
|
1409
|
+
extern float qp_default_fuzzy_min_sim;
|
1410
|
+
/*
|
1411
|
+
* call-seq:
|
1412
|
+
* FuzzyQuery.default_min_similarity = min_sim -> min_sim
|
1413
|
+
*
|
1414
|
+
* Set the default value for +:min_similarity+
|
1415
|
+
*/
|
1416
|
+
static VALUE
|
1417
|
+
frt_fq_set_dms(VALUE self, VALUE val)
|
1418
|
+
{
|
1419
|
+
double min_sim = NUM2DBL(val);
|
1420
|
+
if (min_sim >= 1.0) {
|
1421
|
+
rb_raise(rb_eArgError,
|
1422
|
+
"%f >= 1.0. :min_similarity must be < 1.0", min_sim);
|
1423
|
+
} else if (min_sim < 0.0) {
|
1424
|
+
rb_raise(rb_eArgError,
|
1425
|
+
"%f < 0.0. :min_similarity must be > 0.0", min_sim);
|
1426
|
+
}
|
1427
|
+
qp_default_fuzzy_min_sim = (float)min_sim;
|
1428
|
+
rb_cvar_set(cFuzzyQuery, id_default_min_similarity, val);
|
1429
|
+
return val;
|
1430
|
+
}
|
1431
|
+
|
1432
|
+
/*
|
1433
|
+
* call-seq:
|
1434
|
+
* FuzzyQuery.default_prefix_length -> number
|
1435
|
+
*
|
1436
|
+
* Get the default value for +:prefix_length+
|
1437
|
+
*/
|
1438
|
+
static VALUE
|
1439
|
+
frt_fq_get_dpl(VALUE self)
|
1440
|
+
{
|
1441
|
+
return rb_cvar_get(cFuzzyQuery, id_default_prefix_length);
|
1442
|
+
}
|
1443
|
+
|
1444
|
+
extern int qp_default_fuzzy_pre_len;
|
1445
|
+
/*
|
1446
|
+
* call-seq:
|
1447
|
+
* FuzzyQuery.default_prefix_length = prefix_length -> prefix_length
|
1448
|
+
*
|
1449
|
+
* Set the default value for +:prefix_length+
|
1450
|
+
*/
|
1451
|
+
static VALUE
|
1452
|
+
frt_fq_set_dpl(VALUE self, VALUE val)
|
1453
|
+
{
|
1454
|
+
int pre_len = FIX2INT(val);
|
1455
|
+
if (pre_len < 0) {
|
1456
|
+
rb_raise(rb_eArgError,
|
1457
|
+
"%d < 0. :prefix_length must be >= 0", pre_len);
|
1458
|
+
}
|
1459
|
+
qp_default_fuzzy_pre_len = pre_len;
|
1460
|
+
rb_cvar_set(cFuzzyQuery, id_default_prefix_length, val);
|
1461
|
+
return val;
|
1462
|
+
}
|
1463
|
+
|
1464
|
+
|
1465
|
+
/****************************************************************************
|
1466
|
+
*
|
1467
|
+
* MatchAllQuery Methods
|
1468
|
+
*
|
1469
|
+
****************************************************************************/
|
1470
|
+
|
1471
|
+
static VALUE
|
1472
|
+
frt_maq_alloc(VALUE klass)
|
1473
|
+
{
|
1474
|
+
Query *q = maq_new();
|
1475
|
+
VALUE self = Data_Wrap_Struct(klass, NULL, &frt_q_free, q);
|
1476
|
+
object_add(q, self);
|
1477
|
+
return self;
|
1478
|
+
}
|
1479
|
+
|
1480
|
+
/*
|
1481
|
+
* call-seq:
|
1482
|
+
* MatchAllQuery.new -> query
|
1483
|
+
*
|
1484
|
+
* Create a query which matches all documents.
|
1485
|
+
*/
|
1486
|
+
static VALUE
|
1487
|
+
frt_maq_init(VALUE self)
|
1488
|
+
{
|
1489
|
+
return self;
|
1490
|
+
}
|
1491
|
+
|
1492
|
+
/****************************************************************************
|
1493
|
+
*
|
1494
|
+
* ConstantScoreQuery Methods
|
1495
|
+
*
|
1496
|
+
****************************************************************************/
|
1497
|
+
|
1498
|
+
/*
|
1499
|
+
* call-seq:
|
1500
|
+
* ConstantScoreQuery.new(filter) -> query
|
1501
|
+
*
|
1502
|
+
* Create a ConstantScoreQuery which uses +filter+ to match documents giving
|
1503
|
+
* each document a constant score.
|
1504
|
+
*/
|
1505
|
+
static VALUE
|
1506
|
+
frt_csq_init(VALUE self, VALUE rfilter)
|
1507
|
+
{
|
1508
|
+
Query *q;
|
1509
|
+
Filter *filter;
|
1510
|
+
Data_Get_Struct(rfilter, Filter, filter);
|
1511
|
+
q = csq_new(filter);
|
1512
|
+
|
1513
|
+
Frt_Wrap_Struct(self, NULL, &frt_q_free, q);
|
1514
|
+
object_add(q, self);
|
1515
|
+
return self;
|
1516
|
+
}
|
1517
|
+
|
1518
|
+
/****************************************************************************
|
1519
|
+
*
|
1520
|
+
* FilteredQuery Methods
|
1521
|
+
*
|
1522
|
+
****************************************************************************/
|
1523
|
+
|
1524
|
+
static void
|
1525
|
+
frt_fqq_mark(void *p)
|
1526
|
+
{
|
1527
|
+
FilteredQuery *fq = (FilteredQuery *)p;
|
1528
|
+
frt_gc_mark(fq->query);
|
1529
|
+
frt_gc_mark(fq->filter);
|
1530
|
+
}
|
1531
|
+
|
1532
|
+
/*
|
1533
|
+
* call-seq:
|
1534
|
+
* FilteredQuery.new(query, filter) -> query
|
1535
|
+
*
|
1536
|
+
* Create a new FilteredQuery which filters +query+ with +filter+.
|
1537
|
+
*/
|
1538
|
+
static VALUE
|
1539
|
+
frt_fqq_init(VALUE self, VALUE rquery, VALUE rfilter)
|
1540
|
+
{
|
1541
|
+
Query *sq, *q;
|
1542
|
+
Filter *f;
|
1543
|
+
Data_Get_Struct(rquery, Query, sq);
|
1544
|
+
Data_Get_Struct(rfilter, Filter, f);
|
1545
|
+
q = fq_new(sq, f);
|
1546
|
+
REF(sq);
|
1547
|
+
REF(f);
|
1548
|
+
Frt_Wrap_Struct(self, &frt_fqq_mark, &frt_q_free, q);
|
1549
|
+
object_add(q, self);
|
1550
|
+
return self;
|
1551
|
+
}
|
1552
|
+
|
1553
|
+
/****************************************************************************
|
1554
|
+
*
|
1555
|
+
* SpanTermQuery Methods
|
1556
|
+
*
|
1557
|
+
****************************************************************************/
|
1558
|
+
|
1559
|
+
/*
|
1560
|
+
* call-seq:
|
1561
|
+
* SpanTermQuery.new(field, term) -> query
|
1562
|
+
*
|
1563
|
+
* Create a new SpanTermQuery which matches all documents with the term
|
1564
|
+
* +term+ in the field +field+.
|
1565
|
+
*/
|
1566
|
+
static VALUE
|
1567
|
+
frt_spantq_init(VALUE self, VALUE rfield, VALUE rterm)
|
1568
|
+
{
|
1569
|
+
Query *q = spantq_new(frt_field(rfield), StringValuePtr(rterm));
|
1570
|
+
Frt_Wrap_Struct(self, NULL, &frt_q_free, q);
|
1571
|
+
object_add(q, self);
|
1572
|
+
return self;
|
1573
|
+
}
|
1574
|
+
|
1575
|
+
/****************************************************************************
|
1576
|
+
*
|
1577
|
+
* SpanMultiTermQuery Methods
|
1578
|
+
*
|
1579
|
+
****************************************************************************/
|
1580
|
+
|
1581
|
+
/*
|
1582
|
+
* call-seq:
|
1583
|
+
* SpanMultiTermQuery.new(field, terms) -> query
|
1584
|
+
*
|
1585
|
+
* Create a new SpanMultiTermQuery which matches all documents with the terms
|
1586
|
+
* +terms+ in the field +field+. +terms+ should be an array of Strings.
|
1587
|
+
*/
|
1588
|
+
static VALUE
|
1589
|
+
frt_spanmtq_init(VALUE self, VALUE rfield, VALUE rterms)
|
1590
|
+
{
|
1591
|
+
Query *q = spanmtq_new(frt_field(rfield));
|
1592
|
+
int i;
|
1593
|
+
for (i = RARRAY_LEN(rterms) - 1; i >= 0; i--) {
|
1594
|
+
spanmtq_add_term(q, StringValuePtr(RARRAY_PTR(rterms)[i]));
|
1595
|
+
}
|
1596
|
+
Frt_Wrap_Struct(self, NULL, &frt_q_free, q);
|
1597
|
+
object_add(q, self);
|
1598
|
+
return self;
|
1599
|
+
}
|
1600
|
+
|
1601
|
+
/****************************************************************************
|
1602
|
+
*
|
1603
|
+
* SpanPrefixQuery Methods
|
1604
|
+
*
|
1605
|
+
****************************************************************************/
|
1606
|
+
|
1607
|
+
/*
|
1608
|
+
* call-seq:
|
1609
|
+
* SpanPrefixQuery.new(field, prefix, max_terms = 256) -> query
|
1610
|
+
*
|
1611
|
+
* Create a new SpanPrefixQuery which matches all documents with the prefix
|
1612
|
+
* +prefix+ in the field +field+.
|
1613
|
+
*/
|
1614
|
+
static VALUE
|
1615
|
+
frt_spanprq_init(int argc, VALUE *argv, VALUE self)
|
1616
|
+
{
|
1617
|
+
VALUE rfield, rprefix, rmax_terms;
|
1618
|
+
int max_terms = SPAN_PREFIX_QUERY_MAX_TERMS;
|
1619
|
+
Query *q;
|
1620
|
+
if (rb_scan_args(argc, argv, "21", &rfield, &rprefix, &rmax_terms) == 3) {
|
1621
|
+
max_terms = FIX2INT(rmax_terms);
|
1622
|
+
}
|
1623
|
+
q = spanprq_new(frt_field(rfield), StringValuePtr(rprefix));
|
1624
|
+
((SpanPrefixQuery *)q)->max_terms = max_terms;
|
1625
|
+
Frt_Wrap_Struct(self, NULL, &frt_q_free, q);
|
1626
|
+
object_add(q, self);
|
1627
|
+
return self;
|
1628
|
+
}
|
1629
|
+
|
1630
|
+
/****************************************************************************
|
1631
|
+
*
|
1632
|
+
* SpanFirstQuery Methods
|
1633
|
+
*
|
1634
|
+
****************************************************************************/
|
1635
|
+
|
1636
|
+
/*
|
1637
|
+
* call-seq:
|
1638
|
+
* SpanFirstQuery.new(span_query, end) -> query
|
1639
|
+
*
|
1640
|
+
* Create a new SpanFirstQuery which matches all documents where +span_query+
|
1641
|
+
* matches before +end+ where +end+ is a byte-offset from the start of the
|
1642
|
+
* field
|
1643
|
+
*/
|
1644
|
+
static VALUE
|
1645
|
+
frt_spanfq_init(VALUE self, VALUE rmatch, VALUE rend)
|
1646
|
+
{
|
1647
|
+
Query *q;
|
1648
|
+
Query *match;
|
1649
|
+
Data_Get_Struct(rmatch, Query, match);
|
1650
|
+
q = spanfq_new(match, FIX2INT(rend));
|
1651
|
+
Frt_Wrap_Struct(self, NULL, &frt_q_free, q);
|
1652
|
+
object_add(q, self);
|
1653
|
+
return self;
|
1654
|
+
}
|
1655
|
+
|
1656
|
+
/****************************************************************************
|
1657
|
+
*
|
1658
|
+
* SpanNearQuery Methods
|
1659
|
+
*
|
1660
|
+
****************************************************************************/
|
1661
|
+
|
1662
|
+
static void
|
1663
|
+
frt_spannq_mark(void *p)
|
1664
|
+
{
|
1665
|
+
int i;
|
1666
|
+
SpanNearQuery *snq = (SpanNearQuery *)p;
|
1667
|
+
for (i = 0; i < snq->c_cnt; i++) {
|
1668
|
+
frt_gc_mark(snq->clauses[i]);
|
1669
|
+
}
|
1670
|
+
}
|
1671
|
+
|
1672
|
+
/*
|
1673
|
+
* call-seq:
|
1674
|
+
* SpanNearQuery.new(options = {}) -> query
|
1675
|
+
*
|
1676
|
+
* Create a new SpanNearQuery. You can add an array of clauses with the
|
1677
|
+
* +:clause+ parameter or you can add clauses individually using the
|
1678
|
+
* SpanNearQuery#add method.
|
1679
|
+
*
|
1680
|
+
* query = SpanNearQuery.new(:clauses => [spanq1, spanq2, spanq3])
|
1681
|
+
* # is equivalent to
|
1682
|
+
* query = SpanNearQuery.new()
|
1683
|
+
* query << spanq1 << spanq2 << spanq3
|
1684
|
+
*
|
1685
|
+
* You have two other options which you can set.
|
1686
|
+
*
|
1687
|
+
* :slop:: Default: 0. Works exactly like a PhraseQuery slop. It is the
|
1688
|
+
* amount of slop allowed in the match (the term edit distance
|
1689
|
+
* allowed in the match).
|
1690
|
+
* :in_order:: Default: false. Specifies whether or not the matches have to
|
1691
|
+
* occur in the order they were added to the query. When slop is
|
1692
|
+
* set to 0, this parameter will make no difference.
|
1693
|
+
*/
|
1694
|
+
static VALUE
|
1695
|
+
frt_spannq_init(int argc, VALUE *argv, VALUE self)
|
1696
|
+
{
|
1697
|
+
Query *q;
|
1698
|
+
VALUE roptions;
|
1699
|
+
int slop = 0;
|
1700
|
+
bool in_order = false;
|
1701
|
+
|
1702
|
+
if (rb_scan_args(argc, argv, "01", &roptions) > 0) {
|
1703
|
+
VALUE v;
|
1704
|
+
if (Qnil != (v = rb_hash_aref(roptions, sym_slop))) {
|
1705
|
+
slop = FIX2INT(v);
|
1706
|
+
}
|
1707
|
+
if (Qnil != (v = rb_hash_aref(roptions, sym_in_order))) {
|
1708
|
+
in_order = RTEST(v);
|
1709
|
+
}
|
1710
|
+
}
|
1711
|
+
q = spannq_new(slop, in_order);
|
1712
|
+
if (argc > 0) {
|
1713
|
+
VALUE v;
|
1714
|
+
if (Qnil != (v = rb_hash_aref(roptions, sym_clauses))) {
|
1715
|
+
int i;
|
1716
|
+
Query *clause;
|
1717
|
+
Check_Type(v, T_ARRAY);
|
1718
|
+
for (i = 0; i < RARRAY_LEN(v); i++) {
|
1719
|
+
Data_Get_Struct(RARRAY_PTR(v)[i], Query, clause);
|
1720
|
+
spannq_add_clause(q, clause);
|
1721
|
+
}
|
1722
|
+
}
|
1723
|
+
}
|
1724
|
+
|
1725
|
+
Frt_Wrap_Struct(self, &frt_spannq_mark, &frt_q_free, q);
|
1726
|
+
object_add(q, self);
|
1727
|
+
return self;
|
1728
|
+
}
|
1729
|
+
|
1730
|
+
/*
|
1731
|
+
* call-seq:
|
1732
|
+
* query.add(span_query) -> self
|
1733
|
+
* query << span_query -> self
|
1734
|
+
*
|
1735
|
+
* Add a clause to the SpanNearQuery. Clauses are stored in the order they
|
1736
|
+
* are added to the query which is important for matching. Note that clauses
|
1737
|
+
* must be SpanQueries, not other types of query.
|
1738
|
+
*/
|
1739
|
+
static VALUE
|
1740
|
+
frt_spannq_add(VALUE self, VALUE rclause)
|
1741
|
+
{
|
1742
|
+
GET_Q();
|
1743
|
+
Query *clause;
|
1744
|
+
Data_Get_Struct(rclause, Query, clause);
|
1745
|
+
spannq_add_clause(q, clause);
|
1746
|
+
return self;
|
1747
|
+
}
|
1748
|
+
|
1749
|
+
/****************************************************************************
|
1750
|
+
*
|
1751
|
+
* SpanOrQuery Methods
|
1752
|
+
*
|
1753
|
+
****************************************************************************/
|
1754
|
+
|
1755
|
+
static void
|
1756
|
+
frt_spanoq_mark(void *p)
|
1757
|
+
{
|
1758
|
+
int i;
|
1759
|
+
SpanOrQuery *soq = (SpanOrQuery *)p;
|
1760
|
+
for (i = 0; i < soq->c_cnt; i++) {
|
1761
|
+
frt_gc_mark(soq->clauses[i]);
|
1762
|
+
}
|
1763
|
+
}
|
1764
|
+
|
1765
|
+
/*
|
1766
|
+
* call-seq:
|
1767
|
+
* SpanOrQuery.new(options = {}) -> query
|
1768
|
+
*
|
1769
|
+
* Create a new SpanOrQuery. This is just like a BooleanQuery with all
|
1770
|
+
* clauses with the occur value of :should. The difference is that it can be
|
1771
|
+
* passed to other SpanQuerys like SpanNearQuery.
|
1772
|
+
*/
|
1773
|
+
static VALUE
|
1774
|
+
frt_spanoq_init(int argc, VALUE *argv, VALUE self)
|
1775
|
+
{
|
1776
|
+
Query *q;
|
1777
|
+
VALUE rclauses;
|
1778
|
+
|
1779
|
+
q = spanoq_new();
|
1780
|
+
if (rb_scan_args(argc, argv, "01", &rclauses) > 0) {
|
1781
|
+
int i;
|
1782
|
+
Query *clause;
|
1783
|
+
Check_Type(rclauses, T_ARRAY);
|
1784
|
+
for (i = 0; i < RARRAY_LEN(rclauses); i++) {
|
1785
|
+
Data_Get_Struct(RARRAY_PTR(rclauses)[i], Query, clause);
|
1786
|
+
spanoq_add_clause(q, clause);
|
1787
|
+
}
|
1788
|
+
}
|
1789
|
+
Frt_Wrap_Struct(self, &frt_spanoq_mark, &frt_q_free, q);
|
1790
|
+
object_add(q, self);
|
1791
|
+
return self;
|
1792
|
+
}
|
1793
|
+
|
1794
|
+
/*
|
1795
|
+
* call-seq:
|
1796
|
+
* query.add(span_query) -> self
|
1797
|
+
* query << span_query -> self
|
1798
|
+
*
|
1799
|
+
* Add a clause to the SpanOrQuery. Note that clauses must be SpanQueries,
|
1800
|
+
* not other types of query.
|
1801
|
+
*/
|
1802
|
+
static VALUE
|
1803
|
+
frt_spanoq_add(VALUE self, VALUE rclause)
|
1804
|
+
{
|
1805
|
+
GET_Q();
|
1806
|
+
Query *clause;
|
1807
|
+
Data_Get_Struct(rclause, Query, clause);
|
1808
|
+
spanoq_add_clause(q, clause);
|
1809
|
+
return self;
|
1810
|
+
}
|
1811
|
+
|
1812
|
+
/****************************************************************************
|
1813
|
+
*
|
1814
|
+
* SpanNotQuery Methods
|
1815
|
+
*
|
1816
|
+
****************************************************************************/
|
1817
|
+
|
1818
|
+
static void
|
1819
|
+
frt_spanxq_mark(void *p)
|
1820
|
+
{
|
1821
|
+
SpanNotQuery *sxq = (SpanNotQuery *)p;
|
1822
|
+
frt_gc_mark(sxq->inc);
|
1823
|
+
frt_gc_mark(sxq->exc);
|
1824
|
+
}
|
1825
|
+
|
1826
|
+
/*
|
1827
|
+
* call-seq:
|
1828
|
+
* SpanNotQuery.new(include_query, exclude_query) -> query
|
1829
|
+
*
|
1830
|
+
* Create a new SpanNotQuery which matches all documents which match
|
1831
|
+
* +include_query+ and don't match +exclude_query+.
|
1832
|
+
*/
|
1833
|
+
static VALUE
|
1834
|
+
frt_spanxq_init(VALUE self, VALUE rinc, VALUE rexc)
|
1835
|
+
{
|
1836
|
+
Query *q;
|
1837
|
+
Check_Type(rinc, T_DATA);
|
1838
|
+
Check_Type(rexc, T_DATA);
|
1839
|
+
q = spanxq_new(DATA_PTR(rinc), DATA_PTR(rexc));
|
1840
|
+
Frt_Wrap_Struct(self, &frt_spanxq_mark, &frt_q_free, q);
|
1841
|
+
object_add(q, self);
|
1842
|
+
return self;
|
1843
|
+
}
|
1844
|
+
|
1845
|
+
/****************************************************************************
|
1846
|
+
*
|
1847
|
+
* Filter Methods
|
1848
|
+
*
|
1849
|
+
****************************************************************************/
|
1850
|
+
|
1851
|
+
static void
|
1852
|
+
frt_f_free(void *p)
|
1853
|
+
{
|
1854
|
+
object_del(p);
|
1855
|
+
filt_deref((Filter *)p);
|
1856
|
+
}
|
1857
|
+
|
1858
|
+
#define GET_F() Filter *f = (Filter *)DATA_PTR(self)
|
1859
|
+
|
1860
|
+
/*
|
1861
|
+
* call-seq:
|
1862
|
+
* filter.to_s -> string
|
1863
|
+
*
|
1864
|
+
* Return a human readable string representing the Filter object that the
|
1865
|
+
* method was called on.
|
1866
|
+
*/
|
1867
|
+
static VALUE
|
1868
|
+
frt_f_to_s(VALUE self)
|
1869
|
+
{
|
1870
|
+
VALUE rstr;
|
1871
|
+
char *str;
|
1872
|
+
GET_F();
|
1873
|
+
str = f->to_s(f);
|
1874
|
+
rstr = rb_str_new2(str);
|
1875
|
+
free(str);
|
1876
|
+
return rstr;
|
1877
|
+
}
|
1878
|
+
|
1879
|
+
extern VALUE frt_get_bv(BitVector *bv);
|
1880
|
+
|
1881
|
+
/*
|
1882
|
+
* call-seq:
|
1883
|
+
* filter.bits(index_reader) -> bit_vector
|
1884
|
+
*
|
1885
|
+
* Get the bit_vector used by this filter. This method will usually be used
|
1886
|
+
* to group filters or apply filters to other filters.
|
1887
|
+
*/
|
1888
|
+
static VALUE
|
1889
|
+
frt_f_get_bits(VALUE self, VALUE rindex_reader)
|
1890
|
+
{
|
1891
|
+
BitVector *bv;
|
1892
|
+
IndexReader *ir;
|
1893
|
+
GET_F();
|
1894
|
+
Data_Get_Struct(rindex_reader, IndexReader, ir);
|
1895
|
+
bv = filt_get_bv(f, ir);
|
1896
|
+
return frt_get_bv(bv);
|
1897
|
+
}
|
1898
|
+
|
1899
|
+
/****************************************************************************
|
1900
|
+
*
|
1901
|
+
* RangeFilter Methods
|
1902
|
+
*
|
1903
|
+
****************************************************************************/
|
1904
|
+
|
1905
|
+
|
1906
|
+
/*
|
1907
|
+
* call-seq:
|
1908
|
+
* RangeFilter.new(field, options = {}) -> range_query
|
1909
|
+
*
|
1910
|
+
* Create a new RangeFilter on field +field+. There are two ways to build a
|
1911
|
+
* range filter. With the old-style options; +:lower+, +:upper+,
|
1912
|
+
* +:include_lower+ and +:include_upper+ or the new style options; +:<+,
|
1913
|
+
* +:<=+, +:>+ and +:>=+. The options' names should speak for themselves.
|
1914
|
+
* In the old-style options, limits are inclusive by default.
|
1915
|
+
*
|
1916
|
+
* == Examples
|
1917
|
+
*
|
1918
|
+
* f = RangeFilter.new(:date, :lower => "200501", :include_lower => false)
|
1919
|
+
* # is equivalent to
|
1920
|
+
* f = RangeFilter.new(:date, :< => "200501")
|
1921
|
+
* # is equivalent to
|
1922
|
+
* f = RangeFilter.new(:date, :lower_exclusive => "200501")
|
1923
|
+
*
|
1924
|
+
* f = RangeFilter.new(:date, :lower => "200501", :upper => 200502)
|
1925
|
+
* # is equivalent to
|
1926
|
+
* f = RangeFilter.new(:date, :>= => "200501", :<= => 200502)
|
1927
|
+
*/
|
1928
|
+
static VALUE
|
1929
|
+
frt_rf_init(VALUE self, VALUE rfield, VALUE roptions)
|
1930
|
+
{
|
1931
|
+
Filter *f;
|
1932
|
+
char *lterm = NULL;
|
1933
|
+
char *uterm = NULL;
|
1934
|
+
bool include_lower = false;
|
1935
|
+
bool include_upper = false;
|
1936
|
+
|
1937
|
+
get_range_params(roptions, <erm, &uterm, &include_lower, &include_upper);
|
1938
|
+
f = rfilt_new(frt_field(rfield), lterm, uterm,
|
1939
|
+
include_lower, include_upper);
|
1940
|
+
Frt_Wrap_Struct(self, NULL, &frt_f_free, f);
|
1941
|
+
object_add(f, self);
|
1942
|
+
return self;
|
1943
|
+
}
|
1944
|
+
|
1945
|
+
/****************************************************************************
|
1946
|
+
*
|
1947
|
+
* QueryFilter Methods
|
1948
|
+
*
|
1949
|
+
****************************************************************************/
|
1950
|
+
|
1951
|
+
/*
|
1952
|
+
* call-seq:
|
1953
|
+
* QueryFilter.new(query) -> filter
|
1954
|
+
*
|
1955
|
+
* Create a new QueryFilter which applies the query +query+.
|
1956
|
+
*/
|
1957
|
+
static VALUE
|
1958
|
+
frt_qf_init(VALUE self, VALUE rquery)
|
1959
|
+
{
|
1960
|
+
Query *q;
|
1961
|
+
Filter *f;
|
1962
|
+
Data_Get_Struct(rquery, Query, q);
|
1963
|
+
f = qfilt_new(q);
|
1964
|
+
Frt_Wrap_Struct(self, NULL, &frt_f_free, f);
|
1965
|
+
object_add(f, self);
|
1966
|
+
return self;
|
1967
|
+
}
|
1968
|
+
|
1969
|
+
/****************************************************************************
|
1970
|
+
*
|
1971
|
+
* SortField Methods
|
1972
|
+
*
|
1973
|
+
****************************************************************************/
|
1974
|
+
|
1975
|
+
static void
|
1976
|
+
frt_sf_free(void *p)
|
1977
|
+
{
|
1978
|
+
object_del(p);
|
1979
|
+
sort_field_destroy((SortField *)p);
|
1980
|
+
}
|
1981
|
+
|
1982
|
+
static VALUE
|
1983
|
+
frt_get_sf(SortField *sf)
|
1984
|
+
{
|
1985
|
+
VALUE self = object_get(sf);
|
1986
|
+
if (self == Qnil) {
|
1987
|
+
self = Data_Wrap_Struct(cSortField, NULL, &frt_sf_free, sf);
|
1988
|
+
object_add(sf, self);
|
1989
|
+
}
|
1990
|
+
return self;
|
1991
|
+
}
|
1992
|
+
|
1993
|
+
static int
|
1994
|
+
get_sort_type(VALUE rtype)
|
1995
|
+
{
|
1996
|
+
Check_Type(rtype, T_SYMBOL);
|
1997
|
+
if (rtype == sym_byte) {
|
1998
|
+
return SORT_TYPE_BYTE;
|
1999
|
+
} else if (rtype == sym_integer) {
|
2000
|
+
return SORT_TYPE_INTEGER;
|
2001
|
+
} else if (rtype == sym_string) {
|
2002
|
+
return SORT_TYPE_STRING;
|
2003
|
+
} else if (rtype == sym_score) {
|
2004
|
+
return SORT_TYPE_SCORE;
|
2005
|
+
} else if (rtype == sym_doc_id) {
|
2006
|
+
return SORT_TYPE_DOC;
|
2007
|
+
} else if (rtype == sym_float) {
|
2008
|
+
return SORT_TYPE_FLOAT;
|
2009
|
+
} else if (rtype == sym_auto) {
|
2010
|
+
return SORT_TYPE_AUTO;
|
2011
|
+
} else {
|
2012
|
+
rb_raise(rb_eArgError, ":%s is an unknown sort-type. Please choose "
|
2013
|
+
"from [:integer, :float, :string, :auto, :score, :doc_id]",
|
2014
|
+
rb_id2name(SYM2ID(rtype)));
|
2015
|
+
}
|
2016
|
+
return SORT_TYPE_DOC;
|
2017
|
+
}
|
2018
|
+
|
2019
|
+
/*
|
2020
|
+
* call-seq:
|
2021
|
+
* SortField.new(field, options = {}) -> sort_field
|
2022
|
+
*
|
2023
|
+
* Create a new SortField which can be used to sort the result-set by the
|
2024
|
+
* value in field +field+.
|
2025
|
+
*
|
2026
|
+
* === Options
|
2027
|
+
*
|
2028
|
+
* :type:: Default: +:auto+. Specifies how a field should be sorted.
|
2029
|
+
* Choose from one of; +:auto+, +:integer+, +:float+,
|
2030
|
+
* +:string+, +:byte+, +:doc_id+ or +:score+. +:auto+ will
|
2031
|
+
* check the datatype of the field by trying to parse it into
|
2032
|
+
* either a number or a float before settling on a string
|
2033
|
+
* sort. String sort is locale dependent and works for
|
2034
|
+
* multibyte character sets like UTF-8 if you have your
|
2035
|
+
* locale set correctly.
|
2036
|
+
* :reverse Default: false. Set to true if you want to reverse the
|
2037
|
+
* sort.
|
2038
|
+
*/
|
2039
|
+
static VALUE
|
2040
|
+
frt_sf_init(int argc, VALUE *argv, VALUE self)
|
2041
|
+
{
|
2042
|
+
SortField *sf;
|
2043
|
+
VALUE rfield, roptions;
|
2044
|
+
VALUE rval;
|
2045
|
+
int type = SORT_TYPE_AUTO;
|
2046
|
+
int is_reverse = false;
|
2047
|
+
char *field;
|
2048
|
+
|
2049
|
+
if (rb_scan_args(argc, argv, "11", &rfield, &roptions) == 2) {
|
2050
|
+
if (Qnil != (rval = rb_hash_aref(roptions, sym_type))) {
|
2051
|
+
type = get_sort_type(rval);
|
2052
|
+
}
|
2053
|
+
if (Qnil != (rval = rb_hash_aref(roptions, sym_reverse))) {
|
2054
|
+
is_reverse = RTEST(rval);
|
2055
|
+
}
|
2056
|
+
if (Qnil != (rval = rb_hash_aref(roptions, sym_comparator))) {
|
2057
|
+
rb_raise(rb_eArgError, "Unsupported argument ':comparator'");
|
2058
|
+
}
|
2059
|
+
}
|
2060
|
+
if (NIL_P(rfield)) rb_raise(rb_eArgError, "must pass a valid field name");
|
2061
|
+
field = frt_field(rfield);
|
2062
|
+
|
2063
|
+
sf = sort_field_new(field, type, is_reverse);
|
2064
|
+
if (sf->field == NULL && field) {
|
2065
|
+
sf->field = estrdup(field);
|
2066
|
+
}
|
2067
|
+
|
2068
|
+
Frt_Wrap_Struct(self, NULL, &frt_sf_free, sf);
|
2069
|
+
object_add(sf, self);
|
2070
|
+
return self;
|
2071
|
+
}
|
2072
|
+
|
2073
|
+
#define GET_SF() SortField *sf = (SortField *)DATA_PTR(self)
|
2074
|
+
|
2075
|
+
/*
|
2076
|
+
* call-seq:
|
2077
|
+
* sort_field.reverse? -> bool
|
2078
|
+
*
|
2079
|
+
* Return true if the field is to be reverse sorted. This attribute is set
|
2080
|
+
* when you create the sort_field.
|
2081
|
+
*/
|
2082
|
+
static VALUE
|
2083
|
+
frt_sf_is_reverse(VALUE self)
|
2084
|
+
{
|
2085
|
+
GET_SF();
|
2086
|
+
return sf->reverse ? Qtrue : Qfalse;
|
2087
|
+
}
|
2088
|
+
|
2089
|
+
/*
|
2090
|
+
* call-seq:
|
2091
|
+
* sort_field.name -> symbol
|
2092
|
+
*
|
2093
|
+
* Returns the name of the field to be sorted.
|
2094
|
+
*/
|
2095
|
+
static VALUE
|
2096
|
+
frt_sf_get_name(VALUE self)
|
2097
|
+
{
|
2098
|
+
GET_SF();
|
2099
|
+
return sf->field ? ID2SYM(rb_intern(sf->field)) : Qnil;
|
2100
|
+
}
|
2101
|
+
|
2102
|
+
/*
|
2103
|
+
* call-seq:
|
2104
|
+
* sort_field.type -> symbol
|
2105
|
+
*
|
2106
|
+
* Return the type of sort. Should be one of; +:auto+, +:integer+, +:float+,
|
2107
|
+
* +:string+, +:byte+, +:doc_id+ or +:score+.
|
2108
|
+
*/
|
2109
|
+
static VALUE
|
2110
|
+
frt_sf_get_type(VALUE self)
|
2111
|
+
{
|
2112
|
+
GET_SF();
|
2113
|
+
switch (sf->type) {
|
2114
|
+
case SORT_TYPE_BYTE: return sym_byte;
|
2115
|
+
case SORT_TYPE_INTEGER: return sym_integer;
|
2116
|
+
case SORT_TYPE_FLOAT: return sym_float;
|
2117
|
+
case SORT_TYPE_STRING: return sym_string;
|
2118
|
+
case SORT_TYPE_AUTO: return sym_auto;
|
2119
|
+
case SORT_TYPE_DOC: return sym_doc_id;
|
2120
|
+
case SORT_TYPE_SCORE: return sym_score;
|
2121
|
+
}
|
2122
|
+
return Qnil;
|
2123
|
+
}
|
2124
|
+
|
2125
|
+
/*
|
2126
|
+
* call-seq:
|
2127
|
+
* sort_field.comparator -> symbol
|
2128
|
+
*
|
2129
|
+
* TODO: currently unsupported
|
2130
|
+
*/
|
2131
|
+
static VALUE
|
2132
|
+
frt_sf_get_comparator(VALUE self)
|
2133
|
+
{
|
2134
|
+
return Qnil;
|
2135
|
+
}
|
2136
|
+
|
2137
|
+
/*
|
2138
|
+
* call-seq:
|
2139
|
+
* sort_field.to_s -> string
|
2140
|
+
*
|
2141
|
+
* Return a human readable string describing this +sort_field+.
|
2142
|
+
*/
|
2143
|
+
static VALUE
|
2144
|
+
frt_sf_to_s(VALUE self)
|
2145
|
+
{
|
2146
|
+
GET_SF();
|
2147
|
+
char *str = sort_field_to_s(sf);
|
2148
|
+
VALUE rstr = rb_str_new2(str);
|
2149
|
+
free(str);
|
2150
|
+
return rstr;
|
2151
|
+
}
|
2152
|
+
|
2153
|
+
/****************************************************************************
|
2154
|
+
*
|
2155
|
+
* Sort Methods
|
2156
|
+
*
|
2157
|
+
****************************************************************************/
|
2158
|
+
|
2159
|
+
static void
|
2160
|
+
frt_sort_free(void *p)
|
2161
|
+
{
|
2162
|
+
Sort *sort = (Sort *)p;
|
2163
|
+
object_del(sort);
|
2164
|
+
sort_destroy(sort);
|
2165
|
+
}
|
2166
|
+
|
2167
|
+
static void
|
2168
|
+
frt_sort_mark(void *p)
|
2169
|
+
{
|
2170
|
+
Sort *sort = (Sort *)p;
|
2171
|
+
int i;
|
2172
|
+
for (i = 0; i < sort->size; i++) {
|
2173
|
+
frt_gc_mark(sort->sort_fields[i]);
|
2174
|
+
}
|
2175
|
+
}
|
2176
|
+
|
2177
|
+
static VALUE
|
2178
|
+
frt_sort_alloc(VALUE klass)
|
2179
|
+
{
|
2180
|
+
VALUE self;
|
2181
|
+
Sort *sort = sort_new();
|
2182
|
+
sort->destroy_all = false;
|
2183
|
+
self = Data_Wrap_Struct(klass, &frt_sort_mark, &frt_sort_free, sort);
|
2184
|
+
object_add(sort, self);
|
2185
|
+
return self;
|
2186
|
+
}
|
2187
|
+
|
2188
|
+
static void
|
2189
|
+
frt_parse_sort_str(Sort *sort, char *xsort_str)
|
2190
|
+
{
|
2191
|
+
SortField *sf;
|
2192
|
+
char *comma, *end, *e, *s;
|
2193
|
+
const int len = strlen(xsort_str);
|
2194
|
+
char *sort_str = ALLOC_N(char, len + 2);
|
2195
|
+
strcpy(sort_str, xsort_str);
|
2196
|
+
|
2197
|
+
end = &sort_str[len];
|
2198
|
+
|
2199
|
+
s = sort_str;
|
2200
|
+
|
2201
|
+
while ((s < end)
|
2202
|
+
&& (NULL != (comma = strchr(s, ',')) || (NULL != (comma = end)))) {
|
2203
|
+
bool reverse = false;
|
2204
|
+
/* strip spaces */
|
2205
|
+
e = comma;
|
2206
|
+
while ((isspace(*s) || *s == ':') && s < e) s++;
|
2207
|
+
while (isspace(e[-1]) && s < e) e--;
|
2208
|
+
*e = '\0';
|
2209
|
+
if (e > (s + 4) && strcmp("DESC", &e[-4]) == 0) {
|
2210
|
+
reverse = true;
|
2211
|
+
e -= 4;
|
2212
|
+
while (isspace(e[-1]) && s < e) e--;
|
2213
|
+
}
|
2214
|
+
*e = '\0';
|
2215
|
+
|
2216
|
+
if (strcmp("SCORE", s) == 0) {
|
2217
|
+
sf = sort_field_score_new(reverse);
|
2218
|
+
} else if (strcmp("DOC_ID", s) == 0) {
|
2219
|
+
sf = sort_field_doc_new(reverse);
|
2220
|
+
} else {
|
2221
|
+
sf = sort_field_auto_new(s, reverse);
|
2222
|
+
}
|
2223
|
+
frt_get_sf(sf);
|
2224
|
+
sort_add_sort_field(sort, sf);
|
2225
|
+
s = comma + 1;
|
2226
|
+
}
|
2227
|
+
free(sort_str);
|
2228
|
+
}
|
2229
|
+
|
2230
|
+
static void
|
2231
|
+
frt_sort_add(Sort *sort, VALUE rsf, bool reverse)
|
2232
|
+
{
|
2233
|
+
SortField *sf;
|
2234
|
+
switch (TYPE(rsf)) {
|
2235
|
+
case T_DATA:
|
2236
|
+
Data_Get_Struct(rsf, SortField, sf);
|
2237
|
+
if (reverse) sf->reverse = !sf->reverse;
|
2238
|
+
sort_add_sort_field(sort, sf);
|
2239
|
+
break;
|
2240
|
+
case T_SYMBOL:
|
2241
|
+
rsf = rb_obj_as_string(rsf);
|
2242
|
+
sf = sort_field_auto_new(rs2s(rsf), reverse);
|
2243
|
+
/* need to give it a ruby object so it'll be freed when the
|
2244
|
+
* sort is garbage collected */
|
2245
|
+
rsf = frt_get_sf(sf);
|
2246
|
+
sort_add_sort_field(sort, sf);
|
2247
|
+
break;
|
2248
|
+
case T_STRING:
|
2249
|
+
frt_parse_sort_str(sort, rs2s(rsf));
|
2250
|
+
break;
|
2251
|
+
default:
|
2252
|
+
rb_raise(rb_eArgError, "Unknown SortField Type");
|
2253
|
+
break;
|
2254
|
+
}
|
2255
|
+
}
|
2256
|
+
|
2257
|
+
#define GET_SORT() Sort *sort = (Sort *)DATA_PTR(self)
|
2258
|
+
/*
|
2259
|
+
* call-seq:
|
2260
|
+
* Sort.new(sort_fields = [SortField::SCORE, SortField::DOC_ID], reverse = false) -> Sort
|
2261
|
+
*
|
2262
|
+
* Create a new Sort object. If +reverse+ is true, all sort_fields will be
|
2263
|
+
* reversed so if any of them are already reversed the will be turned back
|
2264
|
+
* to their natural order again. By default
|
2265
|
+
*/
|
2266
|
+
static VALUE
|
2267
|
+
frt_sort_init(int argc, VALUE *argv, VALUE self)
|
2268
|
+
{
|
2269
|
+
int i;
|
2270
|
+
VALUE rfields, rreverse;
|
2271
|
+
bool reverse = false;
|
2272
|
+
bool has_sfd = false;
|
2273
|
+
GET_SORT();
|
2274
|
+
switch (rb_scan_args(argc, argv, "02", &rfields, &rreverse)) {
|
2275
|
+
case 2: reverse = RTEST(rreverse);
|
2276
|
+
case 1:
|
2277
|
+
if (TYPE(rfields) == T_ARRAY) {
|
2278
|
+
int i;
|
2279
|
+
for (i = 0; i < RARRAY_LEN(rfields); i++) {
|
2280
|
+
frt_sort_add(sort, RARRAY_PTR(rfields)[i], reverse);
|
2281
|
+
}
|
2282
|
+
} else {
|
2283
|
+
frt_sort_add(sort, rfields, reverse);
|
2284
|
+
}
|
2285
|
+
for (i = 0; i < sort->size; i++) {
|
2286
|
+
if (sort->sort_fields[i] == &SORT_FIELD_DOC) has_sfd = true;
|
2287
|
+
}
|
2288
|
+
if (!has_sfd) {
|
2289
|
+
sort_add_sort_field(sort, (SortField *)&SORT_FIELD_DOC);
|
2290
|
+
}
|
2291
|
+
break;
|
2292
|
+
case 0:
|
2293
|
+
sort_add_sort_field(sort, (SortField *)&SORT_FIELD_SCORE);
|
2294
|
+
sort_add_sort_field(sort, (SortField *)&SORT_FIELD_DOC);
|
2295
|
+
}
|
2296
|
+
|
2297
|
+
return self;
|
2298
|
+
}
|
2299
|
+
|
2300
|
+
/*
|
2301
|
+
* call-seq:
|
2302
|
+
* sort.fields -> Array
|
2303
|
+
*
|
2304
|
+
* Returns an array of the SortFields held by the Sort object.
|
2305
|
+
*/
|
2306
|
+
static VALUE
|
2307
|
+
frt_sort_get_fields(VALUE self)
|
2308
|
+
{
|
2309
|
+
GET_SORT();
|
2310
|
+
VALUE rfields = rb_ary_new2(sort->size);
|
2311
|
+
int i;
|
2312
|
+
for (i = 0; i < sort->size; i++) {
|
2313
|
+
rb_ary_store(rfields, i, object_get(sort->sort_fields[i]));
|
2314
|
+
}
|
2315
|
+
return rfields;
|
2316
|
+
}
|
2317
|
+
|
2318
|
+
|
2319
|
+
/*
|
2320
|
+
* call-seq:
|
2321
|
+
* sort.to_s -> string
|
2322
|
+
*
|
2323
|
+
* Returns a human readable string representing the sort object.
|
2324
|
+
*/
|
2325
|
+
static VALUE
|
2326
|
+
frt_sort_to_s(VALUE self)
|
2327
|
+
{
|
2328
|
+
GET_SORT();
|
2329
|
+
char *str = sort_to_s(sort);
|
2330
|
+
VALUE rstr = rb_str_new2(str);
|
2331
|
+
free(str);
|
2332
|
+
return rstr;
|
2333
|
+
}
|
2334
|
+
|
2335
|
+
/****************************************************************************
|
2336
|
+
*
|
2337
|
+
* Searcher Methods
|
2338
|
+
*
|
2339
|
+
****************************************************************************/
|
2340
|
+
|
2341
|
+
static void
|
2342
|
+
frt_sea_free(void *p)
|
2343
|
+
{
|
2344
|
+
Searcher *sea = (Searcher *)p;
|
2345
|
+
object_del(sea);
|
2346
|
+
sea->close(sea);
|
2347
|
+
}
|
2348
|
+
|
2349
|
+
#define GET_SEA() Searcher *sea = (Searcher *)DATA_PTR(self)
|
2350
|
+
|
2351
|
+
/*
|
2352
|
+
* call-seq:
|
2353
|
+
* searcher.close -> nil
|
2354
|
+
*
|
2355
|
+
* Close the searcher. The garbage collector will do this for you or you can
|
2356
|
+
* call this method explicitly.
|
2357
|
+
*/
|
2358
|
+
static VALUE
|
2359
|
+
frt_sea_close(VALUE self)
|
2360
|
+
{
|
2361
|
+
GET_SEA();
|
2362
|
+
Frt_Unwrap_Struct(self);
|
2363
|
+
object_del(sea);
|
2364
|
+
sea->close(sea);
|
2365
|
+
return Qnil;
|
2366
|
+
}
|
2367
|
+
|
2368
|
+
/*
|
2369
|
+
* call-seq:
|
2370
|
+
* searcher.reader -> IndexReader
|
2371
|
+
*
|
2372
|
+
* Return the IndexReader wrapped by this searcher.
|
2373
|
+
*/
|
2374
|
+
static VALUE
|
2375
|
+
frt_sea_get_reader(VALUE self, VALUE rterm)
|
2376
|
+
{
|
2377
|
+
GET_SEA();
|
2378
|
+
return object_get(((IndexSearcher *)sea)->ir);
|
2379
|
+
}
|
2380
|
+
|
2381
|
+
/*
|
2382
|
+
* call-seq:
|
2383
|
+
* searcher.doc_freq(field, term) -> integer
|
2384
|
+
*
|
2385
|
+
* Return the number of documents in which the term +term+ appears in the
|
2386
|
+
* field +field+.
|
2387
|
+
*/
|
2388
|
+
static VALUE
|
2389
|
+
frt_sea_doc_freq(VALUE self, VALUE rfield, VALUE rterm)
|
2390
|
+
{
|
2391
|
+
GET_SEA();
|
2392
|
+
return INT2FIX(sea->doc_freq(sea,
|
2393
|
+
frt_field(rfield),
|
2394
|
+
StringValuePtr(rterm)));
|
2395
|
+
}
|
2396
|
+
|
2397
|
+
/*
|
2398
|
+
* call-seq:
|
2399
|
+
* searcher.get_document(doc_id) -> LazyDoc
|
2400
|
+
* searcher[doc_id] -> LazyDoc
|
2401
|
+
*
|
2402
|
+
* Retrieve a document from the index. See LazyDoc for more details on the
|
2403
|
+
* document returned. Documents are referenced internally by document ids
|
2404
|
+
* which are returned by the Searchers search methods.
|
2405
|
+
*/
|
2406
|
+
static VALUE
|
2407
|
+
frt_sea_doc(VALUE self, VALUE rdoc_id)
|
2408
|
+
{
|
2409
|
+
GET_SEA();
|
2410
|
+
return frt_get_lazy_doc(sea->get_lazy_doc(sea, FIX2INT(rdoc_id)));
|
2411
|
+
}
|
2412
|
+
|
2413
|
+
/*
|
2414
|
+
* call-seq:
|
2415
|
+
* searcher.max_doc -> number
|
2416
|
+
*
|
2417
|
+
* Returns 1 + the maximum document id in the index. It is the
|
2418
|
+
* document_id that will be used by the next document added to the index. If
|
2419
|
+
* there are no deletions, this number also refers to the number of documents
|
2420
|
+
* in the index.
|
2421
|
+
*/
|
2422
|
+
static VALUE
|
2423
|
+
frt_sea_max_doc(VALUE self)
|
2424
|
+
{
|
2425
|
+
GET_SEA();
|
2426
|
+
return INT2FIX(sea->max_doc(sea));
|
2427
|
+
}
|
2428
|
+
|
2429
|
+
static bool
|
2430
|
+
call_filter_proc(int doc_id, float score, Searcher *self)
|
2431
|
+
{
|
2432
|
+
return RTEST(rb_funcall((VALUE)self->arg, id_call, 3,
|
2433
|
+
INT2FIX(doc_id),
|
2434
|
+
rb_float_new((double)score),
|
2435
|
+
object_get(self)));
|
2436
|
+
}
|
2437
|
+
|
2438
|
+
typedef struct CWrappedFilter
|
2439
|
+
{
|
2440
|
+
Filter super;
|
2441
|
+
VALUE rfilter;
|
2442
|
+
} CWrappedFilter;
|
2443
|
+
#define CWF(filt) ((CWrappedFilter *)(filt))
|
2444
|
+
|
2445
|
+
static unsigned long
|
2446
|
+
cwfilt_hash(Filter *filt)
|
2447
|
+
{
|
2448
|
+
return NUM2ULONG(rb_funcall(CWF(filt)->rfilter, id_hash, 0));
|
2449
|
+
}
|
2450
|
+
|
2451
|
+
static int
|
2452
|
+
cwfilt_eq(Filter *filt, Filter *o)
|
2453
|
+
{
|
2454
|
+
return RTEST(rb_funcall(CWF(filt)->rfilter, id_eql, 1, CWF(o)->rfilter));
|
2455
|
+
}
|
2456
|
+
|
2457
|
+
static BitVector *
|
2458
|
+
cwfilt_get_bv_i(Filter *filt, IndexReader *ir)
|
2459
|
+
{
|
2460
|
+
VALUE rbv = rb_funcall(CWF(filt)->rfilter, id_bits, 1, object_get(ir));
|
2461
|
+
BitVector *bv;
|
2462
|
+
Data_Get_Struct(rbv, BitVector, bv);
|
2463
|
+
REF(bv);
|
2464
|
+
return bv;
|
2465
|
+
}
|
2466
|
+
|
2467
|
+
Filter *
|
2468
|
+
frt_get_cwrapped_filter(VALUE rval)
|
2469
|
+
{
|
2470
|
+
Filter *filter;
|
2471
|
+
if (frt_is_cclass(rval) && DATA_PTR(rval)) {
|
2472
|
+
Data_Get_Struct(rval, Filter, filter);
|
2473
|
+
REF(filter);
|
2474
|
+
}
|
2475
|
+
else {
|
2476
|
+
filter = filt_create(sizeof(CWrappedFilter), "CWrappedFilter");
|
2477
|
+
filter->hash = &cwfilt_hash;
|
2478
|
+
filter->eq = &cwfilt_eq;
|
2479
|
+
filter->get_bv_i = &cwfilt_get_bv_i;
|
2480
|
+
CWF(filter)->rfilter = rval;
|
2481
|
+
}
|
2482
|
+
return filter;
|
2483
|
+
}
|
2484
|
+
|
2485
|
+
static TopDocs *
|
2486
|
+
frt_sea_search_internal(Query *query, VALUE roptions, Searcher *sea)
|
2487
|
+
{
|
2488
|
+
VALUE rval;
|
2489
|
+
int offset = 0, limit = 10;
|
2490
|
+
Filter *filter = NULL;
|
2491
|
+
Sort *sort = NULL;
|
2492
|
+
TopDocs *td;
|
2493
|
+
|
2494
|
+
filter_ft filter_func = NULL;
|
2495
|
+
|
2496
|
+
if (Qnil != roptions) {
|
2497
|
+
if (Qnil != (rval = rb_hash_aref(roptions, sym_offset))) {
|
2498
|
+
offset = FIX2INT(rval);
|
2499
|
+
if (offset < 0)
|
2500
|
+
rb_raise(rb_eArgError, ":offset must be >= 0");
|
2501
|
+
}
|
2502
|
+
if (Qnil != (rval = rb_hash_aref(roptions, sym_limit))) {
|
2503
|
+
if (TYPE(rval) == T_FIXNUM) {
|
2504
|
+
limit = FIX2INT(rval);
|
2505
|
+
if (limit <= 0)
|
2506
|
+
rb_raise(rb_eArgError, ":limit must be > 0");
|
2507
|
+
} else if (rval == sym_all) {
|
2508
|
+
limit = INT_MAX;
|
2509
|
+
} else {
|
2510
|
+
rb_raise(rb_eArgError, "%s is not a sensible :limit value "
|
2511
|
+
"Please use a positive integer or :all",
|
2512
|
+
rb_obj_as_string(rval));
|
2513
|
+
}
|
2514
|
+
}
|
2515
|
+
if (Qnil != (rval = rb_hash_aref(roptions, sym_filter))) {
|
2516
|
+
filter = frt_get_cwrapped_filter(rval);
|
2517
|
+
}
|
2518
|
+
if (Qnil != (rval = rb_hash_aref(roptions, sym_filter_proc))) {
|
2519
|
+
filter_func = &call_filter_proc;
|
2520
|
+
sea->arg = (void *)rval;
|
2521
|
+
}
|
2522
|
+
if (Qnil != (rval = rb_hash_aref(roptions, sym_sort))) {
|
2523
|
+
if (TYPE(rval) != T_DATA || CLASS_OF(rval) == cSortField) {
|
2524
|
+
rval = frt_sort_init(1, &rval, frt_sort_alloc(cSort));
|
2525
|
+
}
|
2526
|
+
Data_Get_Struct(rval, Sort, sort);
|
2527
|
+
}
|
2528
|
+
}
|
2529
|
+
|
2530
|
+
td = sea->search(sea, query, offset, limit, filter, sort, filter_func, 0);
|
2531
|
+
if (filter) filt_deref(filter);
|
2532
|
+
return td;
|
2533
|
+
}
|
2534
|
+
|
2535
|
+
/*
|
2536
|
+
* call-seq:
|
2537
|
+
* searcher.search(query, options = {}) -> TopDocs
|
2538
|
+
*
|
2539
|
+
* Run a query through the Searcher on the index. A TopDocs object is
|
2540
|
+
* returned with the relevant results. The +query+ is a built in Query
|
2541
|
+
* object. Here are the options;
|
2542
|
+
*
|
2543
|
+
* === Options
|
2544
|
+
*
|
2545
|
+
* :offset:: Default: 0. The offset of the start of the section of the
|
2546
|
+
* result-set to return. This is used for paging through
|
2547
|
+
* results. Let's say you have a page size of 10. If you
|
2548
|
+
* don't find the result you want among the first 10 results
|
2549
|
+
* then set +:offset+ to 10 and look at the next 10 results,
|
2550
|
+
* then 20 and so on.
|
2551
|
+
* :limit:: Default: 10. This is the number of results you want
|
2552
|
+
* returned, also called the page size. Set +:limit+ to
|
2553
|
+
* +:all+ to return all results
|
2554
|
+
* :sort:: A Sort object or sort string describing how the field
|
2555
|
+
* should be sorted. A sort string is made up of field names
|
2556
|
+
* which cannot contain spaces and the word "DESC" if you
|
2557
|
+
* want the field reversed, all separated by commas. For
|
2558
|
+
* example; "rating DESC, author, title". Note that Ferret
|
2559
|
+
* will try to determine a field's type by looking at the
|
2560
|
+
* first term in the index and seeing if it can be parsed as
|
2561
|
+
* an integer or a float. Keep this in mind as you may need
|
2562
|
+
* to specify a fields type to sort it correctly. For more
|
2563
|
+
* on this, see the documentation for SortField
|
2564
|
+
* :filter:: a Filter object to filter the search results with
|
2565
|
+
* :filter_proc:: a filter Proc is a Proc which takes the doc_id, the score
|
2566
|
+
* and the Searcher object as its parameters and returns a
|
2567
|
+
* Boolean value specifying whether the result should be
|
2568
|
+
* included in the result set.
|
2569
|
+
*/
|
2570
|
+
static VALUE
|
2571
|
+
frt_sea_search(int argc, VALUE *argv, VALUE self)
|
2572
|
+
{
|
2573
|
+
GET_SEA();
|
2574
|
+
VALUE rquery, roptions;
|
2575
|
+
Query *query;
|
2576
|
+
rb_scan_args(argc, argv, "11", &rquery, &roptions);
|
2577
|
+
Data_Get_Struct(rquery, Query, query);
|
2578
|
+
return frt_get_td(frt_sea_search_internal(query, roptions, sea), self);
|
2579
|
+
}
|
2580
|
+
|
2581
|
+
/*
|
2582
|
+
* call-seq:
|
2583
|
+
* searcher.search_each(query, options = {}) {|doc_id, score| do_something}
|
2584
|
+
* -> total_hits
|
2585
|
+
*
|
2586
|
+
* Run a query through the Searcher on the index. A TopDocs object is
|
2587
|
+
* returned with the relevant results. The +query+ is a Query object. The
|
2588
|
+
* Searcher#search_each method yields the internal document id (used to
|
2589
|
+
* reference documents in the Searcher object like this; +searcher[doc_id]+)
|
2590
|
+
* and the search score for that document. It is possible for the score to be
|
2591
|
+
* greater than 1.0 for some queries and taking boosts into account. This
|
2592
|
+
* method will also normalize scores to the range 0.0..1.0 when the max-score
|
2593
|
+
* is greater than 1.0. Here are the options;
|
2594
|
+
*
|
2595
|
+
* === Options
|
2596
|
+
*
|
2597
|
+
* :offset:: Default: 0. The offset of the start of the section of the
|
2598
|
+
* result-set to return. This is used for paging through
|
2599
|
+
* results. Let's say you have a page size of 10. If you
|
2600
|
+
* don't find the result you want among the first 10 results
|
2601
|
+
* then set +:offset+ to 10 and look at the next 10 results,
|
2602
|
+
* then 20 and so on.
|
2603
|
+
* :limit:: Default: 10. This is the number of results you want
|
2604
|
+
* returned, also called the page size. Set +:limit+ to
|
2605
|
+
* +:all+ to return all results
|
2606
|
+
* :sort:: A Sort object or sort string describing how the field
|
2607
|
+
* should be sorted. A sort string is made up of field names
|
2608
|
+
* which cannot contain spaces and the word "DESC" if you
|
2609
|
+
* want the field reversed, all separated by commas. For
|
2610
|
+
* example; "rating DESC, author, title". Note that Ferret
|
2611
|
+
* will try to determine a field's type by looking at the
|
2612
|
+
* first term in the index and seeing if it can be parsed as
|
2613
|
+
* an integer or a float. Keep this in mind as you may need
|
2614
|
+
* to specify a fields type to sort it correctly. For more
|
2615
|
+
* on this, see the documentation for SortField
|
2616
|
+
* :filter:: a Filter object to filter the search results with
|
2617
|
+
* :filter_proc:: a filter Proc is a Proc which takes the doc_id, the score
|
2618
|
+
* and the Searcher object as its parameters and returns a
|
2619
|
+
* Boolean value specifying whether the result should be
|
2620
|
+
* included in the result set.
|
2621
|
+
*/
|
2622
|
+
static VALUE
|
2623
|
+
frt_sea_search_each(int argc, VALUE *argv, VALUE self)
|
2624
|
+
{
|
2625
|
+
int i;
|
2626
|
+
Query *q;
|
2627
|
+
float max_score;
|
2628
|
+
TopDocs *td;
|
2629
|
+
VALUE rquery, roptions, rtotal_hits;
|
2630
|
+
GET_SEA();
|
2631
|
+
|
2632
|
+
rb_scan_args(argc, argv, "11", &rquery, &roptions);
|
2633
|
+
|
2634
|
+
//sds rb_thread_critical = Qtrue;
|
2635
|
+
|
2636
|
+
Data_Get_Struct(rquery, Query, q);
|
2637
|
+
td = frt_sea_search_internal(q, roptions, sea);
|
2638
|
+
|
2639
|
+
max_score = (td->max_score > 1.0) ? td->max_score : 1.0;
|
2640
|
+
|
2641
|
+
/* yield normalized scores */
|
2642
|
+
for (i = 0; i < td->size; i++) {
|
2643
|
+
rb_yield_values(2, INT2FIX(td->hits[i]->doc),
|
2644
|
+
rb_float_new((double)(td->hits[i]->score/max_score)));
|
2645
|
+
}
|
2646
|
+
|
2647
|
+
rtotal_hits = INT2FIX(td->total_hits);
|
2648
|
+
td_destroy(td);
|
2649
|
+
|
2650
|
+
//sds rb_thread_critical = 0;
|
2651
|
+
|
2652
|
+
return rtotal_hits;
|
2653
|
+
}
|
2654
|
+
|
2655
|
+
/*
|
2656
|
+
* call-seq:
|
2657
|
+
* searcher.explain(query, doc_id) -> Explanation
|
2658
|
+
*
|
2659
|
+
* Create an explanation object to explain the score returned for a
|
2660
|
+
* particular document at +doc_id+ in the index for the query +query+.
|
2661
|
+
*
|
2662
|
+
* Usually used like this;
|
2663
|
+
*
|
2664
|
+
* puts searcher.explain(query, doc_id).to_s
|
2665
|
+
*/
|
2666
|
+
static VALUE
|
2667
|
+
frt_sea_explain(VALUE self, VALUE rquery, VALUE rdoc_id)
|
2668
|
+
{
|
2669
|
+
GET_SEA();
|
2670
|
+
Query *query;
|
2671
|
+
Explanation *expl;
|
2672
|
+
Data_Get_Struct(rquery, Query, query);
|
2673
|
+
expl = sea->explain(sea, query, FIX2INT(rdoc_id));
|
2674
|
+
return Data_Wrap_Struct(cExplanation, NULL, &expl_destroy, expl);
|
2675
|
+
}
|
2676
|
+
|
2677
|
+
/*
|
2678
|
+
* call-seq:
|
2679
|
+
* searcher.highlight(query, doc_id, field, options = {}) -> Array
|
2680
|
+
*
|
2681
|
+
* Returns an array of strings with the matches highlighted.
|
2682
|
+
*
|
2683
|
+
* === Options
|
2684
|
+
*
|
2685
|
+
* :excerpt_length:: Default: 150. Length of excerpt to show. Highlighted
|
2686
|
+
* terms will be in the centre of the excerpt. Set to
|
2687
|
+
* :all to highlight the entire field.
|
2688
|
+
* :num_excerpts:: Default: 2. Number of excerpts to return.
|
2689
|
+
* :pre_tag:: Default: "<b>". Tag to place to the left of the match.
|
2690
|
+
* You'll probably want to change this to a "<span>" tag
|
2691
|
+
* with a class. Try "\033[7m" for use in a terminal.
|
2692
|
+
* :post_tag:: Default: "</b>". This tag should close the +:pre_tag+.
|
2693
|
+
* Try tag "\033[m" in the terminal.
|
2694
|
+
* :ellipsis:: Default: "...". This is the string that is appended at
|
2695
|
+
* the beginning and end of excerpts (unless the excerpt
|
2696
|
+
* hits the start or end of the field. You'll probably
|
2697
|
+
* want to change this so a Unicode ellipsis character.
|
2698
|
+
*/
|
2699
|
+
static VALUE
|
2700
|
+
frt_sea_highlight(int argc, VALUE *argv, VALUE self)
|
2701
|
+
{
|
2702
|
+
GET_SEA();
|
2703
|
+
VALUE rquery, rdoc_id, rfield, roptions, v;
|
2704
|
+
Query *query;
|
2705
|
+
int excerpt_length = 150;
|
2706
|
+
int num_excerpts = 2;
|
2707
|
+
char *pre_tag = "<b>";
|
2708
|
+
char *post_tag = "</b>";
|
2709
|
+
char *ellipsis = "...";
|
2710
|
+
char **excerpts;
|
2711
|
+
|
2712
|
+
rb_scan_args(argc, argv, "31", &rquery, &rdoc_id, &rfield, &roptions);
|
2713
|
+
Data_Get_Struct(rquery, Query, query);
|
2714
|
+
if (argc > 3) {
|
2715
|
+
if (TYPE(roptions) != T_HASH) {
|
2716
|
+
rb_raise(rb_eArgError, "The fourth argument to Searcher#highlight must be a hash");
|
2717
|
+
}
|
2718
|
+
if (Qnil != (v = rb_hash_aref(roptions, sym_num_excerpts))) {
|
2719
|
+
num_excerpts = FIX2INT(v);
|
2720
|
+
}
|
2721
|
+
if (Qnil != (v = rb_hash_aref(roptions, sym_excerpt_length))) {
|
2722
|
+
if (v == sym_all) {
|
2723
|
+
num_excerpts = 1;
|
2724
|
+
excerpt_length = INT_MAX/2;
|
2725
|
+
}
|
2726
|
+
else {
|
2727
|
+
excerpt_length = FIX2INT(v);
|
2728
|
+
}
|
2729
|
+
}
|
2730
|
+
if (Qnil != (v = rb_hash_aref(roptions, sym_pre_tag))) {
|
2731
|
+
pre_tag = rs2s(rb_obj_as_string(v));
|
2732
|
+
}
|
2733
|
+
if (Qnil != (v = rb_hash_aref(roptions, sym_post_tag))) {
|
2734
|
+
post_tag = rs2s(rb_obj_as_string(v));
|
2735
|
+
}
|
2736
|
+
if (Qnil != (v = rb_hash_aref(roptions, sym_ellipsis))) {
|
2737
|
+
ellipsis = rs2s(rb_obj_as_string(v));
|
2738
|
+
}
|
2739
|
+
}
|
2740
|
+
|
2741
|
+
if ((excerpts = searcher_highlight(sea,
|
2742
|
+
query,
|
2743
|
+
FIX2INT(rdoc_id),
|
2744
|
+
frt_field(rfield),
|
2745
|
+
excerpt_length,
|
2746
|
+
num_excerpts,
|
2747
|
+
pre_tag,
|
2748
|
+
post_tag,
|
2749
|
+
ellipsis)) != NULL) {
|
2750
|
+
const int size = ary_size(excerpts);
|
2751
|
+
int i;
|
2752
|
+
VALUE rexcerpts = rb_ary_new2(size);
|
2753
|
+
|
2754
|
+
for (i = 0; i < size; i++) {
|
2755
|
+
rb_ary_store(rexcerpts, i, rb_str_new2(excerpts[i]));
|
2756
|
+
}
|
2757
|
+
ary_destroy(excerpts, &free);
|
2758
|
+
return rexcerpts;
|
2759
|
+
}
|
2760
|
+
return Qnil;
|
2761
|
+
}
|
2762
|
+
|
2763
|
+
/****************************************************************************
|
2764
|
+
*
|
2765
|
+
* Searcher Methods
|
2766
|
+
*
|
2767
|
+
****************************************************************************/
|
2768
|
+
|
2769
|
+
static void
|
2770
|
+
frt_sea_mark(void *p)
|
2771
|
+
{
|
2772
|
+
IndexSearcher *isea = (IndexSearcher *)p;
|
2773
|
+
frt_gc_mark(isea->ir);
|
2774
|
+
frt_gc_mark(isea->ir->store);
|
2775
|
+
}
|
2776
|
+
|
2777
|
+
#define FRT_GET_IR(rir, ir) do {\
|
2778
|
+
rir = Data_Wrap_Struct(cIndexReader, &frt_ir_mark, &frt_ir_free, ir);\
|
2779
|
+
object_add(ir, rir);\
|
2780
|
+
} while (0)
|
2781
|
+
|
2782
|
+
/*
|
2783
|
+
* call-seq:
|
2784
|
+
* Searcher.new(obj) -> Searcher
|
2785
|
+
*
|
2786
|
+
* Create a new Searcher object. +dir+ can either be a string path to an
|
2787
|
+
* index directory on the file-system, an actual Ferret::Store::Directory
|
2788
|
+
* object or a Ferret::Index::IndexReader. You should use the IndexReader for
|
2789
|
+
* searching multiple indexes. Just open the IndexReader on multiple
|
2790
|
+
* directories.
|
2791
|
+
*/
|
2792
|
+
static VALUE
|
2793
|
+
frt_sea_init(VALUE self, VALUE obj)
|
2794
|
+
{
|
2795
|
+
Store *store = NULL;
|
2796
|
+
IndexReader *ir = NULL;
|
2797
|
+
Searcher *sea;
|
2798
|
+
if (TYPE(obj) == T_STRING) {
|
2799
|
+
frt_create_dir(obj);
|
2800
|
+
store = open_fs_store(StringValueCStr(obj));
|
2801
|
+
ir = ir_open(store);
|
2802
|
+
DEREF(store);
|
2803
|
+
FRT_GET_IR(obj, ir);
|
2804
|
+
} else {
|
2805
|
+
Check_Type(obj, T_DATA);
|
2806
|
+
if (rb_obj_is_kind_of(obj, cDirectory) == Qtrue) {
|
2807
|
+
Data_Get_Struct(obj, Store, store);
|
2808
|
+
ir = ir_open(store);
|
2809
|
+
FRT_GET_IR(obj, ir);
|
2810
|
+
} else if (rb_obj_is_kind_of(obj, cIndexReader) == Qtrue) {
|
2811
|
+
Data_Get_Struct(obj, IndexReader, ir);
|
2812
|
+
} else {
|
2813
|
+
rb_raise(rb_eArgError, "Unknown type for argument to IndexSearcher.new");
|
2814
|
+
}
|
2815
|
+
}
|
2816
|
+
|
2817
|
+
sea = isea_new(ir);
|
2818
|
+
((IndexSearcher *)sea)->close_ir = false;
|
2819
|
+
Frt_Wrap_Struct(self, &frt_sea_mark, &frt_sea_free, sea);
|
2820
|
+
object_add(sea, self);
|
2821
|
+
|
2822
|
+
return self;
|
2823
|
+
}
|
2824
|
+
|
2825
|
+
/****************************************************************************
|
2826
|
+
*
|
2827
|
+
* MultiSearcher Methods
|
2828
|
+
*
|
2829
|
+
****************************************************************************/
|
2830
|
+
|
2831
|
+
static void
|
2832
|
+
frt_ms_free(void *p)
|
2833
|
+
{
|
2834
|
+
Searcher *sea = (Searcher *)p;
|
2835
|
+
MultiSearcher *msea = (MultiSearcher *)sea;
|
2836
|
+
free(msea->searchers);
|
2837
|
+
object_del(sea);
|
2838
|
+
searcher_close(sea);
|
2839
|
+
}
|
2840
|
+
|
2841
|
+
static void
|
2842
|
+
frt_ms_mark(void *p)
|
2843
|
+
{
|
2844
|
+
int i;
|
2845
|
+
MultiSearcher *msea = (MultiSearcher *)p;
|
2846
|
+
for (i = 0; i < msea->s_cnt; i++) {
|
2847
|
+
frt_gc_mark(msea->searchers[i]);
|
2848
|
+
}
|
2849
|
+
}
|
2850
|
+
|
2851
|
+
/*
|
2852
|
+
* call-seq:
|
2853
|
+
* MultiSearcher.new(searcher*) -> searcher
|
2854
|
+
*
|
2855
|
+
* Create a new MultiSearcher by passing a list of subsearchers to the
|
2856
|
+
* constructor.
|
2857
|
+
*/
|
2858
|
+
static VALUE
|
2859
|
+
frt_ms_init(int argc, VALUE *argv, VALUE self)
|
2860
|
+
{
|
2861
|
+
int i, j, top = 0, capa = argc;
|
2862
|
+
|
2863
|
+
VALUE rsearcher;
|
2864
|
+
Searcher **searchers = ALLOC_N(Searcher *, capa);
|
2865
|
+
Searcher *s;
|
2866
|
+
|
2867
|
+
for (i = 0; i < argc; i++) {
|
2868
|
+
rsearcher = argv[i];
|
2869
|
+
switch (TYPE(rsearcher)) {
|
2870
|
+
case T_ARRAY:
|
2871
|
+
capa += RARRAY_LEN(rsearcher);
|
2872
|
+
REALLOC_N(searchers, Searcher *, capa);
|
2873
|
+
for (j = 0; j < RARRAY_LEN(rsearcher); j++) {
|
2874
|
+
VALUE rs = RARRAY_PTR(rsearcher)[j];
|
2875
|
+
Data_Get_Struct(rs, Searcher, s);
|
2876
|
+
searchers[top++] = s;
|
2877
|
+
}
|
2878
|
+
break;
|
2879
|
+
case T_DATA:
|
2880
|
+
Data_Get_Struct(rsearcher, Searcher, s);
|
2881
|
+
searchers[top++] = s;
|
2882
|
+
break;
|
2883
|
+
default:
|
2884
|
+
rb_raise(rb_eArgError, "Can't add class %s to MultiSearcher",
|
2885
|
+
rb_obj_classname(rsearcher));
|
2886
|
+
break;
|
2887
|
+
}
|
2888
|
+
}
|
2889
|
+
s = msea_new(searchers, top, false);
|
2890
|
+
Frt_Wrap_Struct(self, &frt_ms_mark, &frt_ms_free, s);
|
2891
|
+
object_add(s, self);
|
2892
|
+
return self;
|
2893
|
+
}
|
2894
|
+
|
2895
|
+
/****************************************************************************
|
2896
|
+
*
|
2897
|
+
* Init Function
|
2898
|
+
*
|
2899
|
+
****************************************************************************/
|
2900
|
+
|
2901
|
+
/* rdochack
|
2902
|
+
cTopDocs = rb_define_class_under(mSearch, "TopDocs", rb_cObject);
|
2903
|
+
*/
|
2904
|
+
|
2905
|
+
/*
|
2906
|
+
* Document-class: Ferret::Search::Hit
|
2907
|
+
*
|
2908
|
+
* == Summary
|
2909
|
+
*
|
2910
|
+
* A hit represents a single document match for a search. It holds the
|
2911
|
+
* document id of the document that matches along with the score for the
|
2912
|
+
* match. The score is a positive Float value. The score contained in a hit
|
2913
|
+
* is not normalized so it can be greater than 1.0. To normalize scores to
|
2914
|
+
* the range 0.0..1.0 divide the scores by TopDocs#max_score.
|
2915
|
+
*/
|
2916
|
+
static void
|
2917
|
+
Init_Hit(void)
|
2918
|
+
{
|
2919
|
+
const char *hit_class = "Hit";
|
2920
|
+
/* rdochack
|
2921
|
+
cHit = rb_define_class_under(mSearch, "Hit", rb_cObject);
|
2922
|
+
*/
|
2923
|
+
cHit = rb_struct_define(hit_class, "doc", "score", NULL);
|
2924
|
+
rb_set_class_path(cHit, mSearch, hit_class);
|
2925
|
+
rb_const_set(mSearch, rb_intern(hit_class), cHit);
|
2926
|
+
id_doc = rb_intern("doc");
|
2927
|
+
id_score = rb_intern("score");
|
2928
|
+
}
|
2929
|
+
|
2930
|
+
/*
|
2931
|
+
* Document-class: Ferret::Search::TopDocs
|
2932
|
+
*
|
2933
|
+
* == Summary
|
2934
|
+
*
|
2935
|
+
* A TopDocs object holds a result set for a search. The number of documents
|
2936
|
+
* that matched the query his held in TopDocs#total_hits. The actual
|
2937
|
+
* results are in the Array TopDocs#hits. The number of hits returned is
|
2938
|
+
* limited by the +:limit+ option so the size of the +hits+ array will not
|
2939
|
+
* always be equal to the value of +total_hits+. Finally TopDocs#max_score
|
2940
|
+
* holds the maximum score of any match (not necessarily the maximum score
|
2941
|
+
* contained in the +hits+ array) so it can be used to normalize scores. For
|
2942
|
+
* example, to print doc ids with scores out of 100.0 you could do this;
|
2943
|
+
*
|
2944
|
+
* top_docs.hits.each do |hit|
|
2945
|
+
* puts "#{hit.doc} scored #{hit.score * 100.0 / top_docs.max_score}"
|
2946
|
+
* end
|
2947
|
+
*/
|
2948
|
+
static void
|
2949
|
+
Init_TopDocs(void)
|
2950
|
+
{
|
2951
|
+
const char *td_class = "TopDocs";
|
2952
|
+
/* rdochack
|
2953
|
+
cTopDocs = rb_define_class_under(mSearch, "TopDocs", rb_cObject);
|
2954
|
+
*/
|
2955
|
+
cTopDocs = rb_struct_define(td_class,
|
2956
|
+
"total_hits",
|
2957
|
+
"hits",
|
2958
|
+
"max_score",
|
2959
|
+
"searcher",
|
2960
|
+
NULL);
|
2961
|
+
rb_set_class_path(cTopDocs, mSearch, td_class);
|
2962
|
+
rb_const_set(mSearch, rb_intern(td_class), cTopDocs);
|
2963
|
+
rb_define_method(cTopDocs, "to_s", frt_td_to_s, -1);
|
2964
|
+
rb_define_method(cTopDocs, "to_json", frt_td_to_json, 0);
|
2965
|
+
id_hits = rb_intern("hits");
|
2966
|
+
id_total_hits = rb_intern("total_hits");
|
2967
|
+
id_max_score = rb_intern("max_score");
|
2968
|
+
id_searcher = rb_intern("searcher");
|
2969
|
+
}
|
2970
|
+
|
2971
|
+
/*
|
2972
|
+
* Document-class: Ferret::Search::Explanation
|
2973
|
+
*
|
2974
|
+
* == Summary
|
2975
|
+
*
|
2976
|
+
* Explanation is used to give a description of why a document matched with
|
2977
|
+
* the score that it did. Use the Explanation#to_s or Explanation#to_html
|
2978
|
+
* methods to display the explanation in a human readable format. Creating
|
2979
|
+
* explanations is an expensive operation so it should only be used for
|
2980
|
+
* debugging purposes. To create an explanation use the Searcher#explain
|
2981
|
+
* method.
|
2982
|
+
*
|
2983
|
+
* == Example
|
2984
|
+
*
|
2985
|
+
* puts searcher.explain(query, doc_id).to_s
|
2986
|
+
*/
|
2987
|
+
static void
|
2988
|
+
Init_Explanation(void)
|
2989
|
+
{
|
2990
|
+
cExplanation = rb_define_class_under(mSearch, "Explanation", rb_cObject);
|
2991
|
+
rb_define_alloc_func(cExplanation, frt_data_alloc);
|
2992
|
+
|
2993
|
+
rb_define_method(cExplanation, "to_s", frt_expl_to_s, 0);
|
2994
|
+
rb_define_method(cExplanation, "to_html", frt_expl_to_html, 0);
|
2995
|
+
rb_define_method(cExplanation, "score", frt_expl_score, 0);
|
2996
|
+
}
|
2997
|
+
|
2998
|
+
/*
|
2999
|
+
* Document-class: Ferret::Search::Query
|
3000
|
+
*
|
3001
|
+
* == Summary
|
3002
|
+
*
|
3003
|
+
* Abstract class representing a query to the index. There are a number of
|
3004
|
+
* concrete Query implementations;
|
3005
|
+
*
|
3006
|
+
* * TermQuery
|
3007
|
+
* * MultiTermQuery
|
3008
|
+
* * BooleanQuery
|
3009
|
+
* * PhraseQuery
|
3010
|
+
* * ConstantScoreQuery
|
3011
|
+
* * FilteredQuery
|
3012
|
+
* * MatchAllQuery
|
3013
|
+
* * RangeQuery
|
3014
|
+
* * WildcardQuery
|
3015
|
+
* * FuzzyQuery
|
3016
|
+
* * PrefixQuery
|
3017
|
+
* * Spans::SpanTermQuery
|
3018
|
+
* * Spans::SpanFirstQuery
|
3019
|
+
* * Spans::SpanOrQuery
|
3020
|
+
* * Spans::SpanNotQuery
|
3021
|
+
* * Spans::SpanNearQuery
|
3022
|
+
*
|
3023
|
+
* Explore these classes for the query right for you. The queries are passed
|
3024
|
+
* to the Searcher#search* methods.
|
3025
|
+
*
|
3026
|
+
* === Query Boosts
|
3027
|
+
*
|
3028
|
+
* Queries have a boost value so that you can make the results of one query
|
3029
|
+
* more important than the results of another query when combining them in a
|
3030
|
+
* BooleanQuery. For example, documents on Rails. To avoid getting results
|
3031
|
+
* for train rails you might also add the tern Ruby but Rails is the more
|
3032
|
+
* important term so you'd give it a boost.
|
3033
|
+
*/
|
3034
|
+
static void
|
3035
|
+
Init_Query(void)
|
3036
|
+
{
|
3037
|
+
cQuery = rb_define_class_under(mSearch, "Query", rb_cObject);
|
3038
|
+
|
3039
|
+
rb_define_method(cQuery, "to_s", frt_q_to_s, -1);
|
3040
|
+
rb_define_method(cQuery, "boost", frt_q_get_boost, 0);
|
3041
|
+
rb_define_method(cQuery, "boost=", frt_q_set_boost, 1);
|
3042
|
+
rb_define_method(cQuery, "eql?", frt_q_eql, 1);
|
3043
|
+
rb_define_method(cQuery, "==", frt_q_eql, 1);
|
3044
|
+
rb_define_method(cQuery, "hash", frt_q_hash, 0);
|
3045
|
+
rb_define_method(cQuery, "terms", frt_q_get_terms, 1);
|
3046
|
+
}
|
3047
|
+
|
3048
|
+
/*
|
3049
|
+
* Document-class: Ferret::Search::TermQuery
|
3050
|
+
*
|
3051
|
+
* == Summary
|
3052
|
+
*
|
3053
|
+
* TermQuery is the most basic query and it is the building block for most
|
3054
|
+
* other queries. It basically matches documents that contain a specific term
|
3055
|
+
* in a specific field.
|
3056
|
+
*
|
3057
|
+
* == Example
|
3058
|
+
*
|
3059
|
+
* query = TermQuery.new(:content, "rails")
|
3060
|
+
*
|
3061
|
+
* # untokenized fields can also be searched with this query;
|
3062
|
+
* query = TermQuery.new(:title, "Shawshank Redemption")
|
3063
|
+
*
|
3064
|
+
* Notice the all lowercase term Rails. This is important as most analyzers will
|
3065
|
+
* downcase all text added to the index. The title in this case was not
|
3066
|
+
* tokenized so the case would have been left as is.
|
3067
|
+
*/
|
3068
|
+
static void
|
3069
|
+
Init_TermQuery(void)
|
3070
|
+
{
|
3071
|
+
cTermQuery = rb_define_class_under(mSearch, "TermQuery", cQuery);
|
3072
|
+
rb_define_alloc_func(cTermQuery, frt_data_alloc);
|
3073
|
+
|
3074
|
+
rb_define_method(cTermQuery, "initialize", frt_tq_init, 2);
|
3075
|
+
}
|
3076
|
+
|
3077
|
+
/*
|
3078
|
+
* Document-class: Ferret::Search::MultiTermQuery
|
3079
|
+
*
|
3080
|
+
* == Summary
|
3081
|
+
*
|
3082
|
+
* MultiTermQuery matches documents that contain one of a list of terms in a
|
3083
|
+
* specific field. This is the basic building block for queries such as;
|
3084
|
+
*
|
3085
|
+
* * PrefixQuery
|
3086
|
+
* * WildcardQuery
|
3087
|
+
* * FuzzyQuery
|
3088
|
+
*
|
3089
|
+
* MultiTermQuery is very similar to a boolean "Or" query. It is highly
|
3090
|
+
* optimized though as it focuses on a single field.
|
3091
|
+
*
|
3092
|
+
* == Example
|
3093
|
+
*
|
3094
|
+
* multi_term_query = MultiTermQuery.new(:content, :max_term => 10)
|
3095
|
+
*
|
3096
|
+
* multi_term_query << "Ruby" << "Ferret" << "Rails" << "Search"
|
3097
|
+
*/
|
3098
|
+
static void
|
3099
|
+
Init_MultiTermQuery(void)
|
3100
|
+
{
|
3101
|
+
id_default_max_terms = rb_intern("@@default_max_terms");
|
3102
|
+
sym_max_terms = ID2SYM(rb_intern("max_terms"));
|
3103
|
+
sym_min_score = ID2SYM(rb_intern("min_score"));
|
3104
|
+
|
3105
|
+
cMultiTermQuery = rb_define_class_under(mSearch, "MultiTermQuery", cQuery);
|
3106
|
+
rb_define_alloc_func(cMultiTermQuery, frt_data_alloc);
|
3107
|
+
|
3108
|
+
rb_cvar_set(cMultiTermQuery, id_default_max_terms, INT2FIX(512));
|
3109
|
+
rb_define_singleton_method(cMultiTermQuery, "default_max_terms",
|
3110
|
+
frt_mtq_get_dmt, 0);
|
3111
|
+
rb_define_singleton_method(cMultiTermQuery, "default_max_terms=",
|
3112
|
+
frt_mtq_set_dmt, 1);
|
3113
|
+
|
3114
|
+
rb_define_method(cMultiTermQuery, "initialize", frt_mtq_init, -1);
|
3115
|
+
rb_define_method(cMultiTermQuery, "add_term", frt_mtq_add_term, -1);
|
3116
|
+
rb_define_method(cMultiTermQuery, "<<", frt_mtq_add_term, -1);
|
3117
|
+
}
|
3118
|
+
|
3119
|
+
static void Init_BooleanClause(void);
|
3120
|
+
|
3121
|
+
/*
|
3122
|
+
* Document-class: Ferret::Search::BooleanQuery
|
3123
|
+
*
|
3124
|
+
* == Summary
|
3125
|
+
*
|
3126
|
+
* A BooleanQuery is used for combining many queries into one. This is best
|
3127
|
+
* illustrated with an example.
|
3128
|
+
*
|
3129
|
+
* == Example
|
3130
|
+
*
|
3131
|
+
* Lets say we wanted to find all documents with the term "Ruby" in the
|
3132
|
+
* +:title+ and the term "Ferret" in the +:content+ field or the +:title+
|
3133
|
+
* field written before January 2006. You could build the query like this.
|
3134
|
+
*
|
3135
|
+
* tq1 = TermQuery.new(:title, "ruby")
|
3136
|
+
* tq21 = TermQuery.new(:title, "ferret")
|
3137
|
+
* tq22 = TermQuery.new(:content, "ferret")
|
3138
|
+
* bq2 = BooleanQuery.new
|
3139
|
+
* bq2 << tq21 << tq22
|
3140
|
+
*
|
3141
|
+
* rq3 = RangeQuery.new(:written, :< => "200601")
|
3142
|
+
*
|
3143
|
+
* query = BooleanQuery.new
|
3144
|
+
* query.add_query(tq1, :must).add_query(bq2, :must).add_query(rq3, :must)
|
3145
|
+
*/
|
3146
|
+
static void
|
3147
|
+
Init_BooleanQuery(void)
|
3148
|
+
{
|
3149
|
+
cBooleanQuery = rb_define_class_under(mSearch, "BooleanQuery", cQuery);
|
3150
|
+
rb_define_alloc_func(cBooleanQuery, frt_data_alloc);
|
3151
|
+
|
3152
|
+
rb_define_method(cBooleanQuery, "initialize", frt_bq_init, -1);
|
3153
|
+
rb_define_method(cBooleanQuery, "add_query", frt_bq_add_query, -1);
|
3154
|
+
rb_define_method(cBooleanQuery, "<<", frt_bq_add_query, -1);
|
3155
|
+
|
3156
|
+
Init_BooleanClause();
|
3157
|
+
}
|
3158
|
+
|
3159
|
+
/*
|
3160
|
+
* Document-class: Ferret::Search::BooleanQuery::BooleanClause
|
3161
|
+
*
|
3162
|
+
* == Summary
|
3163
|
+
*
|
3164
|
+
* A BooleanClause holes a single query within a BooleanQuery specifying
|
3165
|
+
* wither the query +:must+ match, +:should+ match or +:must_not+ match.
|
3166
|
+
* BooleanClauses can be used to pass a clause from one BooleanQuery to
|
3167
|
+
* another although it is generally easier just to add a query directly to a
|
3168
|
+
* BooleanQuery using the BooleanQuery#add_query method.
|
3169
|
+
*
|
3170
|
+
* == Example
|
3171
|
+
*
|
3172
|
+
* clause1 = BooleanClause.new(query1, :should)
|
3173
|
+
* clause2 = BooleanClause.new(query2, :should)
|
3174
|
+
*
|
3175
|
+
* query = BooleanQuery.new
|
3176
|
+
* query << clause1 << clause2
|
3177
|
+
*/
|
3178
|
+
static void
|
3179
|
+
Init_BooleanClause(void)
|
3180
|
+
{
|
3181
|
+
sym_should = ID2SYM(rb_intern("should"));
|
3182
|
+
sym_must = ID2SYM(rb_intern("must"));
|
3183
|
+
sym_must_not = ID2SYM(rb_intern("must_not"));
|
3184
|
+
|
3185
|
+
cBooleanClause = rb_define_class_under(cBooleanQuery, "BooleanClause",
|
3186
|
+
rb_cObject);
|
3187
|
+
rb_define_alloc_func(cBooleanClause, frt_data_alloc);
|
3188
|
+
|
3189
|
+
rb_define_method(cBooleanClause, "initialize", frt_bc_init, -1);
|
3190
|
+
rb_define_method(cBooleanClause, "query", frt_bc_get_query, 0);
|
3191
|
+
rb_define_method(cBooleanClause, "query=", frt_bc_set_query, 1);
|
3192
|
+
rb_define_method(cBooleanClause, "required?", frt_bc_is_required, 0);
|
3193
|
+
rb_define_method(cBooleanClause, "prohibited?", frt_bc_is_prohibited, 0);
|
3194
|
+
rb_define_method(cBooleanClause, "occur=", frt_bc_set_occur, 1);
|
3195
|
+
rb_define_method(cBooleanClause, "to_s", frt_bc_to_s, 0);
|
3196
|
+
}
|
3197
|
+
|
3198
|
+
/*
|
3199
|
+
* Document-class: Ferret::Search::RangeQuery
|
3200
|
+
*
|
3201
|
+
* == Summary
|
3202
|
+
*
|
3203
|
+
* RangeQuery is used to find documents with terms in a range.
|
3204
|
+
* RangeQuerys are usually used on untokenized fields like date fields or
|
3205
|
+
* number fields.
|
3206
|
+
*
|
3207
|
+
* == Example
|
3208
|
+
*
|
3209
|
+
* To find all documents written between January 1st 2006 and January 26th
|
3210
|
+
* 2006 inclusive you would write the query like this;
|
3211
|
+
*
|
3212
|
+
* query = RangeQuery.new(:create_date, :>= "20060101", :<= "20060126")
|
3213
|
+
*/
|
3214
|
+
static void
|
3215
|
+
Init_RangeQuery(void)
|
3216
|
+
{
|
3217
|
+
sym_upper = ID2SYM(rb_intern("upper"));
|
3218
|
+
sym_lower = ID2SYM(rb_intern("lower"));
|
3219
|
+
sym_upper_exclusive = ID2SYM(rb_intern("upper_exclusive"));
|
3220
|
+
sym_lower_exclusive = ID2SYM(rb_intern("lower_exclusive"));
|
3221
|
+
sym_include_upper = ID2SYM(rb_intern("include_upper"));
|
3222
|
+
sym_include_lower = ID2SYM(rb_intern("include_lower"));
|
3223
|
+
|
3224
|
+
sym_less_than = ID2SYM(rb_intern("<"));
|
3225
|
+
sym_less_than_or_equal_to = ID2SYM(rb_intern("<="));
|
3226
|
+
sym_greater_than = ID2SYM(rb_intern(">"));
|
3227
|
+
sym_greater_than_or_equal_to = ID2SYM(rb_intern(">="));
|
3228
|
+
|
3229
|
+
cRangeQuery = rb_define_class_under(mSearch, "RangeQuery", cQuery);
|
3230
|
+
rb_define_alloc_func(cRangeQuery, frt_data_alloc);
|
3231
|
+
|
3232
|
+
rb_define_method(cRangeQuery, "initialize", frt_rq_init, 2);
|
3233
|
+
}
|
3234
|
+
|
3235
|
+
/*
|
3236
|
+
* Document-class: Ferret::Search::PhraseQuery
|
3237
|
+
*
|
3238
|
+
* == Summary
|
3239
|
+
*
|
3240
|
+
* PhraseQuery matches phrases like "the quick brown fox". Most people are
|
3241
|
+
* familiar with phrase queries having used them in most internet search
|
3242
|
+
* engines.
|
3243
|
+
*
|
3244
|
+
* === Slop
|
3245
|
+
*
|
3246
|
+
* Ferret's phrase queries a slightly more advanced. You can match phrases
|
3247
|
+
* with a slop, ie the match isn't exact but it is good enough. The slop is
|
3248
|
+
* basically the word edit distance of the phrase. For example, "the quick
|
3249
|
+
* brown fox" with a slop of 1 would match "the quick little brown fox". With
|
3250
|
+
* a slop of 2 it would match "the brown quick fox".
|
3251
|
+
*
|
3252
|
+
* query = PhraseQuery.new(:content)
|
3253
|
+
* query << "the" << "quick" << "brown" << "fox"
|
3254
|
+
*
|
3255
|
+
* # matches => "the quick brown fox"
|
3256
|
+
*
|
3257
|
+
* query.slop = 1
|
3258
|
+
* # matches => "the quick little brown fox"
|
3259
|
+
* |__1__^
|
3260
|
+
*
|
3261
|
+
* query.slop = 2
|
3262
|
+
* # matches => "the brown quick _____ fox"
|
3263
|
+
* ^_____2_____|
|
3264
|
+
*
|
3265
|
+
* == Multi-PhraseQuery
|
3266
|
+
*
|
3267
|
+
* Phrase queries can also have multiple terms in a single position. Let's
|
3268
|
+
* say for example that we want to match synonyms for quick like "fast" and
|
3269
|
+
* "speedy". You could the query like this;
|
3270
|
+
*
|
3271
|
+
* query = PhraseQuery.new(:content)
|
3272
|
+
* query << "the" << ["quick", "fast", "speed"] << ["brown", "red"] << "fox"
|
3273
|
+
* # matches => "the quick red fox"
|
3274
|
+
* # matches => "the fast brown fox"
|
3275
|
+
*
|
3276
|
+
* query.slop = 1
|
3277
|
+
* # matches => "the speedy little red fox"
|
3278
|
+
*
|
3279
|
+
* You can also leave positions blank. Lets say you wanted to match "the
|
3280
|
+
* quick <> fox" where "<>" could match anything (but not nothing). You'd
|
3281
|
+
* build this query like this;
|
3282
|
+
*
|
3283
|
+
* query = PhraseQuery.new(:content)
|
3284
|
+
* query.add_term("the").add_term("quick").add_term("fox", 2)
|
3285
|
+
* # matches => "the quick yellow fox"
|
3286
|
+
* # matches => "the quick alkgdhaskghaskjdh fox"
|
3287
|
+
*
|
3288
|
+
* The second parameter to PhraseQuery#add_term is the position increment for
|
3289
|
+
* the term. It is one by default meaning that every time you add a term it
|
3290
|
+
* is expected to follow the previous term. But setting it to 2 or greater
|
3291
|
+
* you are leaving empty spaces in the term.
|
3292
|
+
*
|
3293
|
+
* There are also so tricks you can do by setting the position increment to
|
3294
|
+
* 0. With a little help from your analyzer you can actually tag bold or
|
3295
|
+
* italic text for example. If you want more information about this, ask on
|
3296
|
+
* the mailing list.
|
3297
|
+
*/
|
3298
|
+
static void
|
3299
|
+
Init_PhraseQuery(void)
|
3300
|
+
{
|
3301
|
+
cPhraseQuery = rb_define_class_under(mSearch, "PhraseQuery", cQuery);
|
3302
|
+
rb_define_alloc_func(cPhraseQuery, frt_data_alloc);
|
3303
|
+
|
3304
|
+
rb_define_method(cPhraseQuery, "initialize", frt_phq_init, -1);
|
3305
|
+
rb_define_method(cPhraseQuery, "add_term", frt_phq_add, -1);
|
3306
|
+
rb_define_method(cPhraseQuery, "<<", frt_phq_add, -1);
|
3307
|
+
rb_define_method(cPhraseQuery, "slop", frt_phq_get_slop, 0);
|
3308
|
+
rb_define_method(cPhraseQuery, "slop=", frt_phq_set_slop, 1);
|
3309
|
+
}
|
3310
|
+
|
3311
|
+
/*
|
3312
|
+
* Document-class: Ferret::Search::PrefixQuery
|
3313
|
+
*
|
3314
|
+
* == Summary
|
3315
|
+
*
|
3316
|
+
* A prefix query is like a TermQuery except that it matches any term with a
|
3317
|
+
* specific prefix. PrefixQuery is expanded into a MultiTermQuery when
|
3318
|
+
* submitted in a search.
|
3319
|
+
*
|
3320
|
+
* == Example
|
3321
|
+
*
|
3322
|
+
* PrefixQuery is very useful for matching a tree structure category
|
3323
|
+
* hierarchy. For example, let's say you have the categories;
|
3324
|
+
*
|
3325
|
+
* "cat1/"
|
3326
|
+
* "cat1/sub_cat1"
|
3327
|
+
* "cat1/sub_cat2"
|
3328
|
+
* "cat2"
|
3329
|
+
* "cat2/sub_cat1"
|
3330
|
+
* "cat2/sub_cat2"
|
3331
|
+
*
|
3332
|
+
* Lets say you want to match everything in category 2. You'd build the query
|
3333
|
+
* like this;
|
3334
|
+
*
|
3335
|
+
* query = PrefixQuery.new(:category, "cat2")
|
3336
|
+
* # matches => "cat2"
|
3337
|
+
* # matches => "cat2/sub_cat1"
|
3338
|
+
* # matches => "cat2/sub_cat2"
|
3339
|
+
*/
|
3340
|
+
static void
|
3341
|
+
Init_PrefixQuery(void)
|
3342
|
+
{
|
3343
|
+
cPrefixQuery = rb_define_class_under(mSearch, "PrefixQuery", cQuery);
|
3344
|
+
rb_define_alloc_func(cPrefixQuery, frt_data_alloc);
|
3345
|
+
|
3346
|
+
rb_define_method(cPrefixQuery, "initialize", frt_prq_init, -1);
|
3347
|
+
}
|
3348
|
+
|
3349
|
+
/*
|
3350
|
+
* Document-class: Ferret::Search::WildcardQuery
|
3351
|
+
*
|
3352
|
+
* == Summary
|
3353
|
+
*
|
3354
|
+
* WildcardQuery is a simple pattern matching query. There are two wild-card
|
3355
|
+
* characters.
|
3356
|
+
*
|
3357
|
+
* * "*" which matches 0 or more characters
|
3358
|
+
* * "?" which matches a single character
|
3359
|
+
*
|
3360
|
+
* == Example
|
3361
|
+
*
|
3362
|
+
* query = WildcardQuery.new(:field, "h*og")
|
3363
|
+
* # matches => "hog"
|
3364
|
+
* # matches => "hot dog"
|
3365
|
+
*
|
3366
|
+
* query = WildcardQuery.new(:field, "fe?t")
|
3367
|
+
* # matches => "feat"
|
3368
|
+
* # matches => "feet"
|
3369
|
+
*
|
3370
|
+
* query = WildcardQuery.new(:field, "f?ll*")
|
3371
|
+
* # matches => "fill"
|
3372
|
+
* # matches => "falling"
|
3373
|
+
* # matches => "folly"
|
3374
|
+
*/
|
3375
|
+
static void
|
3376
|
+
Init_WildcardQuery(void)
|
3377
|
+
{
|
3378
|
+
cWildcardQuery = rb_define_class_under(mSearch, "WildcardQuery", cQuery);
|
3379
|
+
rb_define_alloc_func(cWildcardQuery, frt_data_alloc);
|
3380
|
+
|
3381
|
+
rb_define_method(cWildcardQuery, "initialize", frt_wcq_init, -1);
|
3382
|
+
}
|
3383
|
+
|
3384
|
+
/*
|
3385
|
+
* Document-class: Ferret::Search::FuzzyQuery
|
3386
|
+
*
|
3387
|
+
* == Summary
|
3388
|
+
*
|
3389
|
+
* FuzzyQuery uses the Levenshtein distance formula for measuring the
|
3390
|
+
* similarity between two terms. For example, weak and week have one letter
|
3391
|
+
* difference and they are four characters long so the simlarity is 75% or
|
3392
|
+
* 0.75. You can use this query to match terms that are very close to the
|
3393
|
+
* search term.
|
3394
|
+
*
|
3395
|
+
* == Example
|
3396
|
+
*
|
3397
|
+
* FuzzyQuery can be quite useful for find documents that wouldn't normally
|
3398
|
+
* be found because of typos.
|
3399
|
+
*
|
3400
|
+
* FuzzyQuery.new(:field, "google",
|
3401
|
+
* :min_similarity => 0.6,
|
3402
|
+
* :prefix_length => 2)
|
3403
|
+
* # matches => "gogle", "goggle", "googol", "googel"
|
3404
|
+
*/
|
3405
|
+
static void
|
3406
|
+
Init_FuzzyQuery(void)
|
3407
|
+
{
|
3408
|
+
id_default_min_similarity = rb_intern("@@default_min_similarity");
|
3409
|
+
id_default_prefix_length = rb_intern("@@default_prefix_length");
|
3410
|
+
|
3411
|
+
sym_min_similarity = ID2SYM(rb_intern("min_similarity"));
|
3412
|
+
sym_prefix_length = ID2SYM(rb_intern("prefix_length"));
|
3413
|
+
|
3414
|
+
cFuzzyQuery = rb_define_class_under(mSearch, "FuzzyQuery", cQuery);
|
3415
|
+
rb_define_alloc_func(cFuzzyQuery, frt_data_alloc);
|
3416
|
+
rb_cvar_set(cFuzzyQuery, id_default_min_similarity,
|
3417
|
+
rb_float_new(0.5));
|
3418
|
+
rb_cvar_set(cFuzzyQuery, id_default_prefix_length,
|
3419
|
+
INT2FIX(0));
|
3420
|
+
|
3421
|
+
rb_define_singleton_method(cFuzzyQuery, "default_min_similarity",
|
3422
|
+
frt_fq_get_dms, 0);
|
3423
|
+
rb_define_singleton_method(cFuzzyQuery, "default_min_similarity=",
|
3424
|
+
frt_fq_set_dms, 1);
|
3425
|
+
rb_define_singleton_method(cFuzzyQuery, "default_prefix_length",
|
3426
|
+
frt_fq_get_dpl, 0);
|
3427
|
+
rb_define_singleton_method(cFuzzyQuery, "default_prefix_length=",
|
3428
|
+
frt_fq_set_dpl, 1);
|
3429
|
+
|
3430
|
+
rb_define_method(cFuzzyQuery, "initialize", frt_fq_init, -1);
|
3431
|
+
rb_define_method(cFuzzyQuery, "prefix_length", frt_fq_pre_len, 0);
|
3432
|
+
rb_define_method(cFuzzyQuery, "min_similarity", frt_fq_min_sim, 0);
|
3433
|
+
}
|
3434
|
+
|
3435
|
+
/*
|
3436
|
+
* Document-class: Ferret::Search::MatchAllQuery
|
3437
|
+
*
|
3438
|
+
* == Summary
|
3439
|
+
*
|
3440
|
+
* MatchAllQuery matches all documents in the index. You might want use this
|
3441
|
+
* query in combination with a filter, however, ConstantScoreQuery is
|
3442
|
+
* probably better in that circumstance.
|
3443
|
+
*/
|
3444
|
+
static void
|
3445
|
+
Init_MatchAllQuery(void)
|
3446
|
+
{
|
3447
|
+
cMatchAllQuery = rb_define_class_under(mSearch, "MatchAllQuery", cQuery);
|
3448
|
+
rb_define_alloc_func(cMatchAllQuery, frt_maq_alloc);
|
3449
|
+
|
3450
|
+
rb_define_method(cMatchAllQuery, "initialize", frt_maq_init, 0);
|
3451
|
+
}
|
3452
|
+
|
3453
|
+
/*
|
3454
|
+
* Document-class: Ferret::Search::ConstantScoreQuery
|
3455
|
+
*
|
3456
|
+
* == Summary
|
3457
|
+
*
|
3458
|
+
* ConstantScoreQuery is a way to turn a Filter into a Query. It matches all
|
3459
|
+
* documents that its filter matches with a constant score. This is a very
|
3460
|
+
* fast query, particularly when run more than once (since filters are
|
3461
|
+
* cached). It is also used internally be RangeQuery.
|
3462
|
+
*
|
3463
|
+
* == Example
|
3464
|
+
*
|
3465
|
+
* Let's say for example that you often need to display all documents created
|
3466
|
+
* on or after June 1st. You could create a ConstantScoreQuery like this;
|
3467
|
+
*
|
3468
|
+
* query = ConstantScoreQuery.new(RangeFilter.new(:created_on, :>= => "200606"))
|
3469
|
+
*
|
3470
|
+
* Once this is run once the results are cached and will be returned very
|
3471
|
+
* quickly in future requests.
|
3472
|
+
*/
|
3473
|
+
static void
|
3474
|
+
Init_ConstantScoreQuery(void)
|
3475
|
+
{
|
3476
|
+
cConstantScoreQuery = rb_define_class_under(mSearch,
|
3477
|
+
"ConstantScoreQuery", cQuery);
|
3478
|
+
rb_define_alloc_func(cConstantScoreQuery, frt_data_alloc);
|
3479
|
+
|
3480
|
+
rb_define_method(cConstantScoreQuery, "initialize", frt_csq_init, 1);
|
3481
|
+
}
|
3482
|
+
|
3483
|
+
/*
|
3484
|
+
* Document-class: Ferret::Search::FilteredQuery
|
3485
|
+
*
|
3486
|
+
* == Summary
|
3487
|
+
*
|
3488
|
+
* FilteredQuery offers you a way to apply a filter to a specific query.
|
3489
|
+
* The FilteredQuery would then by added to a BooleanQuery to be combined
|
3490
|
+
* with other queries. There is not much point in passing a FilteredQuery
|
3491
|
+
* directly to a Searcher#search method unless you are applying more than one
|
3492
|
+
* filter since the search method also takes a filter as a parameter.
|
3493
|
+
*/
|
3494
|
+
static void
|
3495
|
+
Init_FilteredQuery(void)
|
3496
|
+
{
|
3497
|
+
cFilteredQuery = rb_define_class_under(mSearch, "FilteredQuery", cQuery);
|
3498
|
+
rb_define_alloc_func(cFilteredQuery, frt_data_alloc);
|
3499
|
+
|
3500
|
+
rb_define_method(cFilteredQuery, "initialize", frt_fqq_init, 2);
|
3501
|
+
}
|
3502
|
+
|
3503
|
+
/*
|
3504
|
+
* Document-class: Ferret::Search::Spans::SpanTermQuery
|
3505
|
+
*
|
3506
|
+
* == Summary
|
3507
|
+
*
|
3508
|
+
* A SpanTermQuery is the Spans version of TermQuery, the only difference
|
3509
|
+
* being that it returns the start and end offset of all of its matches for
|
3510
|
+
* use by enclosing SpanQueries.
|
3511
|
+
*/
|
3512
|
+
static void
|
3513
|
+
Init_SpanTermQuery(void)
|
3514
|
+
{
|
3515
|
+
cSpanTermQuery = rb_define_class_under(mSpans, "SpanTermQuery", cQuery);
|
3516
|
+
rb_define_alloc_func(cSpanTermQuery, frt_data_alloc);
|
3517
|
+
|
3518
|
+
rb_define_method(cSpanTermQuery, "initialize", frt_spantq_init, 2);
|
3519
|
+
}
|
3520
|
+
|
3521
|
+
/*
|
3522
|
+
* Document-class: Ferret::Search::Spans::SpanMultiTermQuery
|
3523
|
+
*
|
3524
|
+
* == Summary
|
3525
|
+
*
|
3526
|
+
* A SpanMultiTermQuery is the Spans version of MultiTermQuery, the only
|
3527
|
+
* difference being that it returns the start and end offset of all of its
|
3528
|
+
* matches for use by enclosing SpanQueries.
|
3529
|
+
*/
|
3530
|
+
static void
|
3531
|
+
Init_SpanMultiTermQuery(void)
|
3532
|
+
{
|
3533
|
+
cSpanMultiTermQuery = rb_define_class_under(mSpans, "SpanMultiTermQuery", cQuery);
|
3534
|
+
rb_define_alloc_func(cSpanMultiTermQuery, frt_data_alloc);
|
3535
|
+
|
3536
|
+
rb_define_method(cSpanMultiTermQuery, "initialize", frt_spanmtq_init, 2);
|
3537
|
+
}
|
3538
|
+
|
3539
|
+
/*
|
3540
|
+
* Document-class: Ferret::Search::Spans::SpanPrefixQuery
|
3541
|
+
*
|
3542
|
+
* == Summary
|
3543
|
+
*
|
3544
|
+
* A SpanPrefixQuery is the Spans version of PrefixQuery, the only difference
|
3545
|
+
* being that it returns the start and end offset of all of its matches for
|
3546
|
+
* use by enclosing SpanQueries.
|
3547
|
+
*/
|
3548
|
+
static void
|
3549
|
+
Init_SpanPrefixQuery(void)
|
3550
|
+
{
|
3551
|
+
cSpanPrefixQuery = rb_define_class_under(mSpans, "SpanPrefixQuery", cQuery);
|
3552
|
+
rb_define_alloc_func(cSpanPrefixQuery, frt_data_alloc);
|
3553
|
+
|
3554
|
+
rb_define_method(cSpanPrefixQuery, "initialize", frt_spanprq_init, -1);
|
3555
|
+
}
|
3556
|
+
|
3557
|
+
/*
|
3558
|
+
* Document-class: Ferret::Search::Spans::SpanFirstQuery
|
3559
|
+
*
|
3560
|
+
* == Summary
|
3561
|
+
*
|
3562
|
+
* A SpanFirstQuery restricts a query to search in the first +end+ bytes of a
|
3563
|
+
* field. This is useful since often the most important information in a
|
3564
|
+
* document is at the start of the document.
|
3565
|
+
*
|
3566
|
+
* == Example
|
3567
|
+
*
|
3568
|
+
* To find all documents where "ferret" is within the first 100 characters
|
3569
|
+
* (really bytes);
|
3570
|
+
*
|
3571
|
+
* query = SpanFirstQuery.new(SpanTermQuery.new(:content, "ferret"), 100)
|
3572
|
+
*
|
3573
|
+
* == NOTE
|
3574
|
+
*
|
3575
|
+
* SpanFirstQuery only works with other SpanQueries.
|
3576
|
+
*/
|
3577
|
+
static void
|
3578
|
+
Init_SpanFirstQuery(void)
|
3579
|
+
{
|
3580
|
+
cSpanFirstQuery = rb_define_class_under(mSpans, "SpanFirstQuery", cQuery);
|
3581
|
+
rb_define_alloc_func(cSpanFirstQuery, frt_data_alloc);
|
3582
|
+
|
3583
|
+
rb_define_method(cSpanFirstQuery, "initialize", frt_spanfq_init, 2);
|
3584
|
+
}
|
3585
|
+
|
3586
|
+
/*
|
3587
|
+
* Document-class: Ferret::Search::Spans::SpanNearQuery
|
3588
|
+
*
|
3589
|
+
* == Summary
|
3590
|
+
*
|
3591
|
+
* A SpanNearQuery is like a combination between a PhraseQuery and a
|
3592
|
+
* BooleanQuery. It matches sub-SpanQueries which are added as clauses but
|
3593
|
+
* those clauses must occur within a +slop+ edit distance of each other. You
|
3594
|
+
* can also specify that clauses must occur +in_order+.
|
3595
|
+
*
|
3596
|
+
* == Example
|
3597
|
+
*
|
3598
|
+
* query = SpanNearQuery.new(:slop => 2)
|
3599
|
+
* query << SpanTermQuery.new(:field, "quick")
|
3600
|
+
* query << SpanTermQuery.new(:field, "brown")
|
3601
|
+
* query << SpanTermQuery.new(:field, "fox")
|
3602
|
+
* # matches => "quick brown speckled sleepy fox"
|
3603
|
+
* |______2______^
|
3604
|
+
* # matches => "quick brown speckled fox"
|
3605
|
+
* |__1__^
|
3606
|
+
* # matches => "brown quick _____ fox"
|
3607
|
+
* ^_____2_____|
|
3608
|
+
*
|
3609
|
+
* query = SpanNearQuery.new(:slop => 2, :in_order => true)
|
3610
|
+
* query << SpanTermQuery.new(:field, "quick")
|
3611
|
+
* query << SpanTermQuery.new(:field, "brown")
|
3612
|
+
* query << SpanTermQuery.new(:field, "fox")
|
3613
|
+
* # matches => "quick brown speckled sleepy fox"
|
3614
|
+
* |______2______^
|
3615
|
+
* # matches => "quick brown speckled fox"
|
3616
|
+
* |__1__^
|
3617
|
+
* # doesn't match => "brown quick _____ fox"
|
3618
|
+
* # not in order ^_____2_____|
|
3619
|
+
*
|
3620
|
+
* == NOTE
|
3621
|
+
*
|
3622
|
+
* SpanNearQuery only works with other SpanQueries.
|
3623
|
+
*/
|
3624
|
+
static void
|
3625
|
+
Init_SpanNearQuery(void)
|
3626
|
+
{
|
3627
|
+
sym_slop = ID2SYM(rb_intern("slop"));
|
3628
|
+
sym_in_order = ID2SYM(rb_intern("in_order"));
|
3629
|
+
sym_clauses = ID2SYM(rb_intern("clauses"));
|
3630
|
+
|
3631
|
+
cSpanNearQuery = rb_define_class_under(mSpans, "SpanNearQuery", cQuery);
|
3632
|
+
rb_define_alloc_func(cSpanNearQuery, frt_data_alloc);
|
3633
|
+
|
3634
|
+
rb_define_method(cSpanNearQuery, "initialize", frt_spannq_init, -1);
|
3635
|
+
rb_define_method(cSpanNearQuery, "add", frt_spannq_add, 1);
|
3636
|
+
rb_define_method(cSpanNearQuery, "<<", frt_spannq_add, 1);
|
3637
|
+
}
|
3638
|
+
|
3639
|
+
/*
|
3640
|
+
* Document-class: Ferret::Search::Spans::SpanOrQuery
|
3641
|
+
*
|
3642
|
+
* == Summary
|
3643
|
+
*
|
3644
|
+
* SpanOrQuery is just like a BooleanQuery with all +:should+ clauses.
|
3645
|
+
* However, the difference is that all sub-clauses must be SpanQueries and
|
3646
|
+
* the resulting query can then be used within other SpanQueries like
|
3647
|
+
* SpanNearQuery.
|
3648
|
+
*
|
3649
|
+
* == Example
|
3650
|
+
*
|
3651
|
+
* Combined with SpanNearQuery we can create a multi-PhraseQuery like query;
|
3652
|
+
*
|
3653
|
+
* quick_query = SpanOrQuery.new()
|
3654
|
+
* quick_query << SpanTermQuery.new(:field, "quick")
|
3655
|
+
* quick_query << SpanTermQuery.new(:field, "fast")
|
3656
|
+
* quick_query << SpanTermQuery.new(:field, "speedy")
|
3657
|
+
*
|
3658
|
+
* colour_query = SpanOrQuery.new()
|
3659
|
+
* colour_query << SpanTermQuery.new(:field, "red")
|
3660
|
+
* colour_query << SpanTermQuery.new(:field, "brown")
|
3661
|
+
*
|
3662
|
+
*
|
3663
|
+
* query = SpanNearQuery.new(:slop => 2, :in_order => true)
|
3664
|
+
* query << quick_query
|
3665
|
+
* query << colour_query
|
3666
|
+
* query << SpanTermQuery.new(:field, "fox")
|
3667
|
+
* # matches => "quick red speckled sleepy fox"
|
3668
|
+
* |______2______^
|
3669
|
+
* # matches => "speedy brown speckled fox"
|
3670
|
+
* |__1__^
|
3671
|
+
* # doesn't match => "brown fast _____ fox"
|
3672
|
+
* # not in order ^_____2____|
|
3673
|
+
*
|
3674
|
+
* == NOTE
|
3675
|
+
*
|
3676
|
+
* SpanOrQuery only works with other SpanQueries.
|
3677
|
+
*/
|
3678
|
+
static void
|
3679
|
+
Init_SpanOrQuery(void)
|
3680
|
+
{
|
3681
|
+
cSpanOrQuery = rb_define_class_under(mSpans, "SpanOrQuery", cQuery);
|
3682
|
+
rb_define_alloc_func(cSpanOrQuery, frt_data_alloc);
|
3683
|
+
|
3684
|
+
rb_define_method(cSpanOrQuery, "initialize", frt_spanoq_init, -1);
|
3685
|
+
rb_define_method(cSpanOrQuery, "add", frt_spanoq_add, 1);
|
3686
|
+
rb_define_method(cSpanOrQuery, "<<", frt_spanoq_add, 1);
|
3687
|
+
}
|
3688
|
+
|
3689
|
+
/*
|
3690
|
+
* Document-class: Ferret::Search::Spans::SpanNotQuery
|
3691
|
+
*
|
3692
|
+
* == Summary
|
3693
|
+
*
|
3694
|
+
* SpanNotQuery is like a BooleanQuery with a +:must_not+ clause. The
|
3695
|
+
* difference being that the resulting query can be used in another
|
3696
|
+
* SpanQuery.
|
3697
|
+
*
|
3698
|
+
* == Example
|
3699
|
+
*
|
3700
|
+
* Let's say you wanted to search for all documents with the term "rails"
|
3701
|
+
* near the start but without the term "train" near the start. This would
|
3702
|
+
* allow the term "train" to occur later on in the document.
|
3703
|
+
*
|
3704
|
+
* rails_query = SpanFirstQuery.new(SpanTermQuery.new(:content, "rails"), 100)
|
3705
|
+
* train_query = SpanFirstQuery.new(SpanTermQuery.new(:content, "train"), 100)
|
3706
|
+
* query = SpanNotQuery.new(rails_query, train_query)
|
3707
|
+
*
|
3708
|
+
* == NOTE
|
3709
|
+
*
|
3710
|
+
* SpanOrQuery only works with other SpanQueries.
|
3711
|
+
*/
|
3712
|
+
static void
|
3713
|
+
Init_SpanNotQuery(void)
|
3714
|
+
{
|
3715
|
+
cSpanNotQuery = rb_define_class_under(mSpans, "SpanNotQuery", cQuery);
|
3716
|
+
rb_define_alloc_func(cSpanNotQuery, frt_data_alloc);
|
3717
|
+
|
3718
|
+
rb_define_method(cSpanNotQuery, "initialize", frt_spanxq_init, 2);
|
3719
|
+
}
|
3720
|
+
|
3721
|
+
/* rdoc hack
|
3722
|
+
extern VALUE mFerret = rb_define_module("Ferret");
|
3723
|
+
extern VALUE mSearch = rb_define_module_under(mFerret, "Search");
|
3724
|
+
*/
|
3725
|
+
|
3726
|
+
/*
|
3727
|
+
* Document-module: Ferret::Search::Spans
|
3728
|
+
*
|
3729
|
+
* == Summary
|
3730
|
+
*
|
3731
|
+
* The Spans module contains a number of SpanQueries. SpanQueries, unlike
|
3732
|
+
* regular queries, also return the start and end offsets of all of their
|
3733
|
+
* matches so they can be used to limit queries to a certain position in the
|
3734
|
+
* field. They are often used in combination to perform special types of
|
3735
|
+
* PhraseQuery.
|
3736
|
+
*/
|
3737
|
+
static void
|
3738
|
+
Init_Spans(void)
|
3739
|
+
{
|
3740
|
+
mSpans = rb_define_module_under(mSearch, "Spans");
|
3741
|
+
Init_SpanTermQuery();
|
3742
|
+
Init_SpanMultiTermQuery();
|
3743
|
+
Init_SpanPrefixQuery();
|
3744
|
+
Init_SpanFirstQuery();
|
3745
|
+
Init_SpanNearQuery();
|
3746
|
+
Init_SpanOrQuery();
|
3747
|
+
Init_SpanNotQuery();
|
3748
|
+
}
|
3749
|
+
|
3750
|
+
/*
|
3751
|
+
* Document-class: Ferret::Search::RangeFilter
|
3752
|
+
*
|
3753
|
+
* == Summary
|
3754
|
+
*
|
3755
|
+
* RangeFilter filters a set of documents which contain a lexicographical
|
3756
|
+
* range of terms (ie "aaa", "aab", "aac", etc). See also RangeQuery
|
3757
|
+
*
|
3758
|
+
* == Example
|
3759
|
+
*
|
3760
|
+
* Find all documents created before 5th of September 2002.
|
3761
|
+
*
|
3762
|
+
* filter = RangeFilter.new(:created_on, :< => "20020905")
|
3763
|
+
*/
|
3764
|
+
static void
|
3765
|
+
Init_RangeFilter(void)
|
3766
|
+
{
|
3767
|
+
cRangeFilter = rb_define_class_under(mSearch, "RangeFilter", cFilter);
|
3768
|
+
frt_mark_cclass(cRangeFilter);
|
3769
|
+
rb_define_alloc_func(cRangeFilter, frt_data_alloc);
|
3770
|
+
|
3771
|
+
rb_define_method(cRangeFilter, "initialize", frt_rf_init, 2);
|
3772
|
+
}
|
3773
|
+
|
3774
|
+
/*
|
3775
|
+
* Document-class: Ferret::Search::QueryFilter
|
3776
|
+
*
|
3777
|
+
* == Summary
|
3778
|
+
*
|
3779
|
+
* QueryFilter can be used to restrict one queries results by another queries
|
3780
|
+
* results, basically "and"ing them together. Of course you could easily use
|
3781
|
+
* a BooleanQuery to do this. The reason you may choose to use a QueryFilter
|
3782
|
+
* is that Filter results are cached so if you have one query that is often
|
3783
|
+
* added to other queries you may want to use a QueryFilter for performance
|
3784
|
+
* reasons.
|
3785
|
+
*
|
3786
|
+
* == Example
|
3787
|
+
*
|
3788
|
+
* Let's say you have a field +:approved+ which you set to yes when a
|
3789
|
+
* document is approved for display. You'll probably want to add a Filter
|
3790
|
+
* which filters approved documents to display to your users. This is the
|
3791
|
+
* perfect use case for a QueryFilter.
|
3792
|
+
*
|
3793
|
+
* filter = QueryFilter.new(TermQuery.new(:approved, "yes"))
|
3794
|
+
*
|
3795
|
+
* Just remember to use the same QueryFilter each time to take advantage of
|
3796
|
+
* caching. Don't create a new one for each request. Of course, this won't
|
3797
|
+
* work in a CGI application.
|
3798
|
+
*/
|
3799
|
+
static void
|
3800
|
+
Init_QueryFilter(void)
|
3801
|
+
{
|
3802
|
+
cQueryFilter = rb_define_class_under(mSearch, "QueryFilter", cFilter);
|
3803
|
+
frt_mark_cclass(cQueryFilter);
|
3804
|
+
rb_define_alloc_func(cQueryFilter, frt_data_alloc);
|
3805
|
+
|
3806
|
+
rb_define_method(cQueryFilter, "initialize", frt_qf_init, 1);
|
3807
|
+
}
|
3808
|
+
|
3809
|
+
/*
|
3810
|
+
* Document-class: Ferret::Search::Filter
|
3811
|
+
*
|
3812
|
+
* == Summary
|
3813
|
+
*
|
3814
|
+
* A Filter is used to filter query results. It is usually passed to one of
|
3815
|
+
* Searcher's search methods however it can also be used inside a
|
3816
|
+
* ConstantScoreQuery or a FilteredQuery. To implement your own Filter you
|
3817
|
+
* must implement the method #get_bitvector(index_reader) which returns a
|
3818
|
+
* BitVector with set bits corresponding to documents that are allowed by
|
3819
|
+
* this Filter.
|
3820
|
+
*
|
3821
|
+
* TODO add support for user implemented Filter.
|
3822
|
+
* TODO add example of user implemented Filter.
|
3823
|
+
*/
|
3824
|
+
static void
|
3825
|
+
Init_Filter(void)
|
3826
|
+
{
|
3827
|
+
id_bits = rb_intern("bits");
|
3828
|
+
cFilter = rb_define_class_under(mSearch, "Filter", rb_cObject);
|
3829
|
+
frt_mark_cclass(cFilter);
|
3830
|
+
rb_define_alloc_func(cConstantScoreQuery, frt_data_alloc);
|
3831
|
+
|
3832
|
+
rb_define_method(cFilter, "bits", frt_f_get_bits, 1);
|
3833
|
+
rb_define_method(cFilter, "to_s", frt_f_to_s, 0);
|
3834
|
+
}
|
3835
|
+
|
3836
|
+
/*
|
3837
|
+
* Document-class: Ferret::Search::SortField
|
3838
|
+
*
|
3839
|
+
* == Summary
|
3840
|
+
*
|
3841
|
+
* A SortField is used to sort the result-set of a search be the contents of
|
3842
|
+
* a field. The following types of sort_field are available;
|
3843
|
+
*
|
3844
|
+
* * :auto
|
3845
|
+
* * :integer
|
3846
|
+
* * :float
|
3847
|
+
* * :string
|
3848
|
+
* * :byte
|
3849
|
+
* * :doc_id
|
3850
|
+
* * :score
|
3851
|
+
*
|
3852
|
+
* The type of the SortField is set by passing it as a parameter to the
|
3853
|
+
* constructor. The +:auto+ type specifies that the SortField should detect
|
3854
|
+
* the sort type by looking at the data in the field. This is the default
|
3855
|
+
* :type value although it is recommended that you explicitly specify the
|
3856
|
+
* fields type.
|
3857
|
+
*
|
3858
|
+
* == Example
|
3859
|
+
*
|
3860
|
+
* title_sf = SortField.new(:title, :type => :string)
|
3861
|
+
* rating_sf = SortField.new(:rating, :type => float, :reverse => true)
|
3862
|
+
*
|
3863
|
+
*
|
3864
|
+
* Note 1: Care should be taken when using the :auto sort-type since numbers
|
3865
|
+
* will occur before other strings in the index so if you are sorting a field
|
3866
|
+
* with both numbers and strings (like a title field which might have "24"
|
3867
|
+
* and "Prison Break") then the sort_field will think it is sorting integers
|
3868
|
+
* when it really should be sorting strings.
|
3869
|
+
*
|
3870
|
+
* Note 2: When sorting by integer, integers are only 4 bytes so anything
|
3871
|
+
* larger will cause strange sorting behaviour.
|
3872
|
+
*/
|
3873
|
+
static void
|
3874
|
+
Init_SortField(void)
|
3875
|
+
{
|
3876
|
+
/* option hash keys for SortField#initialize */
|
3877
|
+
sym_type = ID2SYM(rb_intern("type"));
|
3878
|
+
sym_reverse = ID2SYM(rb_intern("reverse"));
|
3879
|
+
sym_comparator = ID2SYM(rb_intern("comparator"));
|
3880
|
+
|
3881
|
+
/* Sort types */
|
3882
|
+
sym_integer = ID2SYM(rb_intern("integer"));
|
3883
|
+
sym_float = ID2SYM(rb_intern("float"));
|
3884
|
+
sym_string = ID2SYM(rb_intern("string"));
|
3885
|
+
sym_auto = ID2SYM(rb_intern("auto"));
|
3886
|
+
sym_doc_id = ID2SYM(rb_intern("doc_id"));
|
3887
|
+
sym_score = ID2SYM(rb_intern("score"));
|
3888
|
+
sym_byte = ID2SYM(rb_intern("byte"));
|
3889
|
+
|
3890
|
+
cSortField = rb_define_class_under(mSearch, "SortField", rb_cObject);
|
3891
|
+
rb_define_alloc_func(cSortField, frt_data_alloc);
|
3892
|
+
|
3893
|
+
rb_define_method(cSortField, "initialize", frt_sf_init, -1);
|
3894
|
+
rb_define_method(cSortField, "reverse?", frt_sf_is_reverse, 0);
|
3895
|
+
rb_define_method(cSortField, "name", frt_sf_get_name, 0);
|
3896
|
+
rb_define_method(cSortField, "type", frt_sf_get_type, 0);
|
3897
|
+
rb_define_method(cSortField, "comparator", frt_sf_get_comparator, 0);
|
3898
|
+
rb_define_method(cSortField, "to_s", frt_sf_to_s, 0);
|
3899
|
+
|
3900
|
+
rb_define_const(cSortField, "SCORE",
|
3901
|
+
Data_Wrap_Struct(cSortField, NULL,
|
3902
|
+
&frt_deref_free,
|
3903
|
+
(SortField *)&SORT_FIELD_SCORE));
|
3904
|
+
object_add((SortField *)&SORT_FIELD_SCORE,
|
3905
|
+
rb_const_get(cSortField, rb_intern("SCORE")));
|
3906
|
+
|
3907
|
+
rb_define_const(cSortField, "SCORE_REV",
|
3908
|
+
Data_Wrap_Struct(cSortField, NULL,
|
3909
|
+
&frt_deref_free,
|
3910
|
+
(SortField *)&SORT_FIELD_SCORE_REV));
|
3911
|
+
object_add((SortField *)&SORT_FIELD_SCORE_REV,
|
3912
|
+
rb_const_get(cSortField, rb_intern("SCORE_REV")));
|
3913
|
+
|
3914
|
+
rb_define_const(cSortField, "DOC_ID",
|
3915
|
+
Data_Wrap_Struct(cSortField, NULL,
|
3916
|
+
&frt_deref_free,
|
3917
|
+
(SortField *)&SORT_FIELD_DOC));
|
3918
|
+
|
3919
|
+
oSORT_FIELD_DOC = rb_const_get(cSortField, rb_intern("DOC_ID"));
|
3920
|
+
object_add((SortField *)&SORT_FIELD_DOC, oSORT_FIELD_DOC);
|
3921
|
+
|
3922
|
+
rb_define_const(cSortField, "DOC_ID_REV",
|
3923
|
+
Data_Wrap_Struct(cSortField, NULL,
|
3924
|
+
&frt_deref_free,
|
3925
|
+
(SortField *)&SORT_FIELD_DOC_REV));
|
3926
|
+
object_add((SortField *)&SORT_FIELD_DOC_REV,
|
3927
|
+
rb_const_get(cSortField, rb_intern("DOC_ID_REV")));
|
3928
|
+
}
|
3929
|
+
|
3930
|
+
/*
|
3931
|
+
* Document-class: Ferret::Search::Sort
|
3932
|
+
*
|
3933
|
+
* == Summary
|
3934
|
+
*
|
3935
|
+
* A Sort object is used to combine and apply a list of SortFields. The
|
3936
|
+
* SortFields are applied in the order they are added to the SortObject.
|
3937
|
+
*
|
3938
|
+
* == Example
|
3939
|
+
*
|
3940
|
+
* Here is how you would create a Sort object that sorts first by rating and
|
3941
|
+
* then by title;
|
3942
|
+
*
|
3943
|
+
* sf_rating = SortField.new(:rating, :type => :float, :reverse => true)
|
3944
|
+
* sf_title = SortField.new(:title, :type => :string)
|
3945
|
+
* sort = Sort.new([sf_rating, sf_title])
|
3946
|
+
*
|
3947
|
+
* Remember that the :type parameter for SortField is set to :auto be default
|
3948
|
+
* be I strongly recommend you specify a :type value.
|
3949
|
+
*/
|
3950
|
+
static void
|
3951
|
+
Init_Sort(void)
|
3952
|
+
{
|
3953
|
+
/* Sort */
|
3954
|
+
cSort = rb_define_class_under(mSearch, "Sort", rb_cObject);
|
3955
|
+
rb_define_alloc_func(cSort, frt_sort_alloc);
|
3956
|
+
|
3957
|
+
rb_define_method(cSort, "initialize", frt_sort_init, -1);
|
3958
|
+
rb_define_method(cSort, "fields", frt_sort_get_fields, 0);
|
3959
|
+
rb_define_method(cSort, "to_s", frt_sort_to_s, 0);
|
3960
|
+
|
3961
|
+
rb_define_const(cSort, "RELEVANCE",
|
3962
|
+
frt_sort_init(0, NULL, frt_sort_alloc(cSort)));
|
3963
|
+
rb_define_const(cSort, "INDEX_ORDER",
|
3964
|
+
frt_sort_init(1, &oSORT_FIELD_DOC, frt_sort_alloc(cSort)));
|
3965
|
+
}
|
3966
|
+
|
3967
|
+
/*
|
3968
|
+
* Document-class: Ferret::Search::Searcher
|
3969
|
+
*
|
3970
|
+
* == Summary
|
3971
|
+
*
|
3972
|
+
* The Searcher class basically performs the task that Ferret was built for.
|
3973
|
+
* It searches the index. To search the index the Searcher class wraps an
|
3974
|
+
* IndexReader so many of the tasks that you can perform on an IndexReader
|
3975
|
+
* are also available on a searcher including, most importantly, accessing
|
3976
|
+
* stored documents.
|
3977
|
+
*
|
3978
|
+
* The main methods that you need to know about when using a Searcher are the
|
3979
|
+
* search methods. There is the Searcher#search_each method which iterates
|
3980
|
+
* through the results by document id and score and there is the
|
3981
|
+
* Searcher#search method which returns a TopDocs object. Another important
|
3982
|
+
* difference to note is that the Searcher#search_each method normalizes the
|
3983
|
+
* score to a value in the range 0.0..1.0 if the max_score is greater than
|
3984
|
+
* 1.0. Searcher#search does not. Apart from that they take the same
|
3985
|
+
* parameters and work the same way.
|
3986
|
+
*
|
3987
|
+
* == Example
|
3988
|
+
*
|
3989
|
+
* searcher = Searcher.new("/path/to/index")
|
3990
|
+
*
|
3991
|
+
* searcher.search_each(TermQuery.new(:content, "ferret")
|
3992
|
+
* :filter => RangeFilter.new(:date, :< => "2006"),
|
3993
|
+
* :sort => "date DESC, title") do |doc_id, score|
|
3994
|
+
* puts "#{searcher[doc_id][title] scored #{score}"
|
3995
|
+
* end
|
3996
|
+
*/
|
3997
|
+
static void
|
3998
|
+
Init_Searcher(void)
|
3999
|
+
{
|
4000
|
+
/* option hash keys for Searcher#search */
|
4001
|
+
sym_offset = ID2SYM(rb_intern("offset"));
|
4002
|
+
sym_limit = ID2SYM(rb_intern("limit"));
|
4003
|
+
sym_all = ID2SYM(rb_intern("all"));
|
4004
|
+
sym_filter = ID2SYM(rb_intern("filter"));
|
4005
|
+
sym_filter_proc = ID2SYM(rb_intern("filter_proc"));
|
4006
|
+
sym_sort = ID2SYM(rb_intern("sort"));
|
4007
|
+
|
4008
|
+
sym_excerpt_length = ID2SYM(rb_intern("excerpt_length"));
|
4009
|
+
sym_num_excerpts = ID2SYM(rb_intern("num_excerpts"));
|
4010
|
+
sym_pre_tag = ID2SYM(rb_intern("pre_tag"));
|
4011
|
+
sym_post_tag = ID2SYM(rb_intern("post_tag"));
|
4012
|
+
sym_ellipsis = ID2SYM(rb_intern("ellipsis"));
|
4013
|
+
|
4014
|
+
/* Searcher */
|
4015
|
+
cSearcher = rb_define_class_under(mSearch, "Searcher", rb_cObject);
|
4016
|
+
rb_define_alloc_func(cSearcher, frt_data_alloc);
|
4017
|
+
|
4018
|
+
rb_define_method(cSearcher, "initialize", frt_sea_init, 1);
|
4019
|
+
rb_define_method(cSearcher, "close", frt_sea_close, 0);
|
4020
|
+
rb_define_method(cSearcher, "reader", frt_sea_get_reader, 0);
|
4021
|
+
rb_define_method(cSearcher, "doc_freq", frt_sea_doc_freq, 2);
|
4022
|
+
rb_define_method(cSearcher, "get_document", frt_sea_doc, 1);
|
4023
|
+
rb_define_method(cSearcher, "[]", frt_sea_doc, 1);
|
4024
|
+
rb_define_method(cSearcher, "max_doc", frt_sea_max_doc, 0);
|
4025
|
+
rb_define_method(cSearcher, "search", frt_sea_search, -1);
|
4026
|
+
rb_define_method(cSearcher, "search_each", frt_sea_search_each, -1);
|
4027
|
+
rb_define_method(cSearcher, "explain", frt_sea_explain, 2);
|
4028
|
+
rb_define_method(cSearcher, "highlight", frt_sea_highlight, -1);
|
4029
|
+
}
|
4030
|
+
|
4031
|
+
/*
|
4032
|
+
* Document-class: Ferret::Search::MultiSearcher
|
4033
|
+
*
|
4034
|
+
* == Summary
|
4035
|
+
*
|
4036
|
+
* See Searcher for the methods that you can use on this object. A
|
4037
|
+
* MultiSearcher is used to search multiple sub-searchers. The most efficient
|
4038
|
+
* way to do this would be to open up an IndexReader on multiple directories
|
4039
|
+
* and creating a Searcher with that. However, if you decide to implement a
|
4040
|
+
* RemoteSearcher, the MultiSearcher can be used to search multiple machines
|
4041
|
+
* at once.
|
4042
|
+
*/
|
4043
|
+
static void
|
4044
|
+
Init_MultiSearcher(void)
|
4045
|
+
{
|
4046
|
+
cMultiSearcher = rb_define_class_under(mSearch, "MultiSearcher", cSearcher);
|
4047
|
+
rb_define_alloc_func(cMultiSearcher, frt_data_alloc);
|
4048
|
+
rb_define_method(cMultiSearcher, "initialize", frt_ms_init, -1);
|
4049
|
+
}
|
4050
|
+
|
4051
|
+
/*
|
4052
|
+
* Document-module: Ferret::Search
|
4053
|
+
*
|
4054
|
+
* == Summary
|
4055
|
+
*
|
4056
|
+
* The Search module contains all the classes used for searching the index;
|
4057
|
+
* what Ferret was designed to do. The important classes to take a look at in
|
4058
|
+
* this module are (in order);
|
4059
|
+
*
|
4060
|
+
* * Query
|
4061
|
+
* * Searcher
|
4062
|
+
* * Filter
|
4063
|
+
* * Sort
|
4064
|
+
*
|
4065
|
+
* Happy Ferreting!!
|
4066
|
+
*/
|
4067
|
+
void
|
4068
|
+
Init_Search(void)
|
4069
|
+
{
|
4070
|
+
mSearch = rb_define_module_under(mFerret, "Search");
|
4071
|
+
|
4072
|
+
Init_Hit();
|
4073
|
+
Init_TopDocs();
|
4074
|
+
Init_Explanation();
|
4075
|
+
|
4076
|
+
/* Queries */
|
4077
|
+
Init_Query();
|
4078
|
+
|
4079
|
+
Init_TermQuery();
|
4080
|
+
Init_MultiTermQuery();
|
4081
|
+
Init_BooleanQuery();
|
4082
|
+
Init_RangeQuery();
|
4083
|
+
Init_PhraseQuery();
|
4084
|
+
Init_PrefixQuery();
|
4085
|
+
Init_WildcardQuery();
|
4086
|
+
Init_FuzzyQuery();
|
4087
|
+
Init_MatchAllQuery();
|
4088
|
+
Init_ConstantScoreQuery();
|
4089
|
+
Init_FilteredQuery();
|
4090
|
+
|
4091
|
+
Init_Spans();
|
4092
|
+
|
4093
|
+
/* Filters */
|
4094
|
+
Init_Filter();
|
4095
|
+
Init_RangeFilter();
|
4096
|
+
Init_QueryFilter();
|
4097
|
+
|
4098
|
+
/* Sorting */
|
4099
|
+
Init_SortField(); /* must be before Init_Sort */
|
4100
|
+
Init_Sort();
|
4101
|
+
|
4102
|
+
/* Searchers */
|
4103
|
+
Init_Searcher();
|
4104
|
+
Init_MultiSearcher();
|
4105
|
+
}
|