sa-ferret 0.11.6.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (193) hide show
  1. data/CHANGELOG +24 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README +102 -0
  4. data/Rakefile +338 -0
  5. data/TODO +17 -0
  6. data/TUTORIAL +231 -0
  7. data/bin/ferret-browser +79 -0
  8. data/ext/analysis.c +1588 -0
  9. data/ext/analysis.h +219 -0
  10. data/ext/api.c +69 -0
  11. data/ext/api.h +27 -0
  12. data/ext/array.c +123 -0
  13. data/ext/array.h +53 -0
  14. data/ext/bitvector.c +540 -0
  15. data/ext/bitvector.h +272 -0
  16. data/ext/compound_io.c +383 -0
  17. data/ext/config.h +42 -0
  18. data/ext/document.c +156 -0
  19. data/ext/document.h +53 -0
  20. data/ext/except.c +120 -0
  21. data/ext/except.h +168 -0
  22. data/ext/extconf.rb +14 -0
  23. data/ext/ferret.c +402 -0
  24. data/ext/ferret.h +91 -0
  25. data/ext/filter.c +156 -0
  26. data/ext/fs_store.c +483 -0
  27. data/ext/global.c +418 -0
  28. data/ext/global.h +117 -0
  29. data/ext/hash.c +567 -0
  30. data/ext/hash.h +473 -0
  31. data/ext/hashset.c +170 -0
  32. data/ext/hashset.h +187 -0
  33. data/ext/header.h +58 -0
  34. data/ext/helper.c +62 -0
  35. data/ext/helper.h +13 -0
  36. data/ext/index.c +6425 -0
  37. data/ext/index.h +961 -0
  38. data/ext/lang.h +48 -0
  39. data/ext/libstemmer.c +92 -0
  40. data/ext/libstemmer.h +79 -0
  41. data/ext/mempool.c +87 -0
  42. data/ext/mempool.h +35 -0
  43. data/ext/modules.h +162 -0
  44. data/ext/multimapper.c +310 -0
  45. data/ext/multimapper.h +51 -0
  46. data/ext/posh.c +1006 -0
  47. data/ext/posh.h +1007 -0
  48. data/ext/priorityqueue.c +151 -0
  49. data/ext/priorityqueue.h +143 -0
  50. data/ext/q_boolean.c +1608 -0
  51. data/ext/q_const_score.c +161 -0
  52. data/ext/q_filtered_query.c +209 -0
  53. data/ext/q_fuzzy.c +268 -0
  54. data/ext/q_match_all.c +148 -0
  55. data/ext/q_multi_term.c +677 -0
  56. data/ext/q_parser.c +2825 -0
  57. data/ext/q_phrase.c +1126 -0
  58. data/ext/q_prefix.c +100 -0
  59. data/ext/q_range.c +350 -0
  60. data/ext/q_span.c +2402 -0
  61. data/ext/q_term.c +337 -0
  62. data/ext/q_wildcard.c +171 -0
  63. data/ext/r_analysis.c +2499 -0
  64. data/ext/r_index.c +3485 -0
  65. data/ext/r_qparser.c +585 -0
  66. data/ext/r_search.c +4107 -0
  67. data/ext/r_store.c +513 -0
  68. data/ext/r_utils.c +963 -0
  69. data/ext/ram_store.c +471 -0
  70. data/ext/search.c +1741 -0
  71. data/ext/search.h +885 -0
  72. data/ext/similarity.c +150 -0
  73. data/ext/similarity.h +82 -0
  74. data/ext/sort.c +983 -0
  75. data/ext/stem_ISO_8859_1_danish.c +338 -0
  76. data/ext/stem_ISO_8859_1_danish.h +16 -0
  77. data/ext/stem_ISO_8859_1_dutch.c +635 -0
  78. data/ext/stem_ISO_8859_1_dutch.h +16 -0
  79. data/ext/stem_ISO_8859_1_english.c +1156 -0
  80. data/ext/stem_ISO_8859_1_english.h +16 -0
  81. data/ext/stem_ISO_8859_1_finnish.c +792 -0
  82. data/ext/stem_ISO_8859_1_finnish.h +16 -0
  83. data/ext/stem_ISO_8859_1_french.c +1276 -0
  84. data/ext/stem_ISO_8859_1_french.h +16 -0
  85. data/ext/stem_ISO_8859_1_german.c +512 -0
  86. data/ext/stem_ISO_8859_1_german.h +16 -0
  87. data/ext/stem_ISO_8859_1_italian.c +1091 -0
  88. data/ext/stem_ISO_8859_1_italian.h +16 -0
  89. data/ext/stem_ISO_8859_1_norwegian.c +296 -0
  90. data/ext/stem_ISO_8859_1_norwegian.h +16 -0
  91. data/ext/stem_ISO_8859_1_porter.c +776 -0
  92. data/ext/stem_ISO_8859_1_porter.h +16 -0
  93. data/ext/stem_ISO_8859_1_portuguese.c +1035 -0
  94. data/ext/stem_ISO_8859_1_portuguese.h +16 -0
  95. data/ext/stem_ISO_8859_1_spanish.c +1119 -0
  96. data/ext/stem_ISO_8859_1_spanish.h +16 -0
  97. data/ext/stem_ISO_8859_1_swedish.c +307 -0
  98. data/ext/stem_ISO_8859_1_swedish.h +16 -0
  99. data/ext/stem_KOI8_R_russian.c +701 -0
  100. data/ext/stem_KOI8_R_russian.h +16 -0
  101. data/ext/stem_UTF_8_danish.c +344 -0
  102. data/ext/stem_UTF_8_danish.h +16 -0
  103. data/ext/stem_UTF_8_dutch.c +653 -0
  104. data/ext/stem_UTF_8_dutch.h +16 -0
  105. data/ext/stem_UTF_8_english.c +1176 -0
  106. data/ext/stem_UTF_8_english.h +16 -0
  107. data/ext/stem_UTF_8_finnish.c +808 -0
  108. data/ext/stem_UTF_8_finnish.h +16 -0
  109. data/ext/stem_UTF_8_french.c +1296 -0
  110. data/ext/stem_UTF_8_french.h +16 -0
  111. data/ext/stem_UTF_8_german.c +526 -0
  112. data/ext/stem_UTF_8_german.h +16 -0
  113. data/ext/stem_UTF_8_italian.c +1113 -0
  114. data/ext/stem_UTF_8_italian.h +16 -0
  115. data/ext/stem_UTF_8_norwegian.c +302 -0
  116. data/ext/stem_UTF_8_norwegian.h +16 -0
  117. data/ext/stem_UTF_8_porter.c +794 -0
  118. data/ext/stem_UTF_8_porter.h +16 -0
  119. data/ext/stem_UTF_8_portuguese.c +1055 -0
  120. data/ext/stem_UTF_8_portuguese.h +16 -0
  121. data/ext/stem_UTF_8_russian.c +709 -0
  122. data/ext/stem_UTF_8_russian.h +16 -0
  123. data/ext/stem_UTF_8_spanish.c +1137 -0
  124. data/ext/stem_UTF_8_spanish.h +16 -0
  125. data/ext/stem_UTF_8_swedish.c +313 -0
  126. data/ext/stem_UTF_8_swedish.h +16 -0
  127. data/ext/stopwords.c +401 -0
  128. data/ext/store.c +692 -0
  129. data/ext/store.h +777 -0
  130. data/ext/term_vectors.c +352 -0
  131. data/ext/threading.h +31 -0
  132. data/ext/utilities.c +446 -0
  133. data/ext/win32.h +54 -0
  134. data/lib/ferret.rb +29 -0
  135. data/lib/ferret/browser.rb +246 -0
  136. data/lib/ferret/browser/s/global.js +192 -0
  137. data/lib/ferret/browser/s/style.css +148 -0
  138. data/lib/ferret/browser/views/document/list.rhtml +49 -0
  139. data/lib/ferret/browser/views/document/show.rhtml +27 -0
  140. data/lib/ferret/browser/views/error/index.rhtml +7 -0
  141. data/lib/ferret/browser/views/help/index.rhtml +8 -0
  142. data/lib/ferret/browser/views/home/index.rhtml +29 -0
  143. data/lib/ferret/browser/views/layout.rhtml +22 -0
  144. data/lib/ferret/browser/views/term-vector/index.rhtml +4 -0
  145. data/lib/ferret/browser/views/term/index.rhtml +199 -0
  146. data/lib/ferret/browser/views/term/termdocs.rhtml +1 -0
  147. data/lib/ferret/browser/webrick.rb +14 -0
  148. data/lib/ferret/document.rb +130 -0
  149. data/lib/ferret/field_infos.rb +44 -0
  150. data/lib/ferret/index.rb +786 -0
  151. data/lib/ferret/number_tools.rb +157 -0
  152. data/lib/ferret_version.rb +3 -0
  153. data/setup.rb +1555 -0
  154. data/test/test_all.rb +5 -0
  155. data/test/test_helper.rb +24 -0
  156. data/test/threading/number_to_spoken.rb +132 -0
  157. data/test/threading/thread_safety_index_test.rb +79 -0
  158. data/test/threading/thread_safety_read_write_test.rb +76 -0
  159. data/test/threading/thread_safety_test.rb +133 -0
  160. data/test/unit/analysis/tc_analyzer.rb +548 -0
  161. data/test/unit/analysis/tc_token_stream.rb +646 -0
  162. data/test/unit/index/tc_index.rb +762 -0
  163. data/test/unit/index/tc_index_reader.rb +699 -0
  164. data/test/unit/index/tc_index_writer.rb +437 -0
  165. data/test/unit/index/th_doc.rb +315 -0
  166. data/test/unit/largefile/tc_largefile.rb +46 -0
  167. data/test/unit/query_parser/tc_query_parser.rb +238 -0
  168. data/test/unit/search/tc_filter.rb +135 -0
  169. data/test/unit/search/tc_fuzzy_query.rb +147 -0
  170. data/test/unit/search/tc_index_searcher.rb +61 -0
  171. data/test/unit/search/tc_multi_searcher.rb +128 -0
  172. data/test/unit/search/tc_multiple_search_requests.rb +58 -0
  173. data/test/unit/search/tc_search_and_sort.rb +179 -0
  174. data/test/unit/search/tc_sort.rb +49 -0
  175. data/test/unit/search/tc_sort_field.rb +27 -0
  176. data/test/unit/search/tc_spans.rb +190 -0
  177. data/test/unit/search/tm_searcher.rb +384 -0
  178. data/test/unit/store/tc_fs_store.rb +77 -0
  179. data/test/unit/store/tc_ram_store.rb +35 -0
  180. data/test/unit/store/tm_store.rb +34 -0
  181. data/test/unit/store/tm_store_lock.rb +68 -0
  182. data/test/unit/tc_document.rb +81 -0
  183. data/test/unit/ts_analysis.rb +2 -0
  184. data/test/unit/ts_index.rb +2 -0
  185. data/test/unit/ts_largefile.rb +4 -0
  186. data/test/unit/ts_query_parser.rb +2 -0
  187. data/test/unit/ts_search.rb +2 -0
  188. data/test/unit/ts_store.rb +2 -0
  189. data/test/unit/ts_utils.rb +2 -0
  190. data/test/unit/utils/tc_bit_vector.rb +295 -0
  191. data/test/unit/utils/tc_number_tools.rb +117 -0
  192. data/test/unit/utils/tc_priority_queue.rb +106 -0
  193. metadata +269 -0
data/ext/r_search.c ADDED
@@ -0,0 +1,4107 @@
1
+ #include "ferret.h"
2
+ #include <st.h>
3
+ #include <rubysig.h>
4
+ #include <ctype.h>
5
+ #include <array.h>
6
+ #include "search.h"
7
+
8
+ VALUE mSearch;
9
+
10
+ static VALUE cHit;
11
+ static VALUE cTopDocs;
12
+ static VALUE cExplanation;
13
+ static VALUE cSearcher;
14
+ static VALUE cMultiSearcher;
15
+ static VALUE cSortField;
16
+ static VALUE cSort;
17
+
18
+ /* Queries */
19
+ static VALUE cQuery;
20
+ static VALUE cTermQuery;
21
+ static VALUE cMultiTermQuery;
22
+ static VALUE cBooleanQuery;
23
+ static VALUE cBooleanClause;
24
+ static VALUE cRangeQuery;
25
+ static VALUE cPhraseQuery;
26
+ static VALUE cPrefixQuery;
27
+ static VALUE cWildcardQuery;
28
+ static VALUE cFuzzyQuery;
29
+ static VALUE cMatchAllQuery;
30
+ static VALUE cConstantScoreQuery;
31
+ static VALUE cFilteredQuery;
32
+ static VALUE cSpanTermQuery;
33
+ static VALUE cSpanMultiTermQuery;
34
+ static VALUE cSpanPrefixQuery;
35
+ static VALUE cSpanFirstQuery;
36
+ static VALUE cSpanNearQuery;
37
+ static VALUE cSpanOrQuery;
38
+ static VALUE cSpanNotQuery;
39
+
40
+ /* Filters */
41
+ static ID id_bits;
42
+ static VALUE cFilter;
43
+ static VALUE cRangeFilter;
44
+ static VALUE cQueryFilter;
45
+
46
+ /* MultiTermQuery */
47
+ static ID id_default_max_terms;
48
+ static VALUE sym_max_terms;
49
+ static VALUE sym_min_score;
50
+
51
+ /** Option hash keys **/
52
+ /* BooleanClause */
53
+ static VALUE sym_should;
54
+ static VALUE sym_must;
55
+ static VALUE sym_must_not;
56
+
57
+ /* RangeQuery */
58
+ static VALUE sym_upper;
59
+ static VALUE sym_lower;
60
+ static VALUE sym_include_upper;
61
+ static VALUE sym_include_lower;
62
+ static VALUE sym_upper_exclusive;
63
+ static VALUE sym_lower_exclusive;
64
+
65
+ static VALUE sym_less_than;
66
+ static VALUE sym_less_than_or_equal_to;
67
+ static VALUE sym_greater_than;
68
+ static VALUE sym_greater_than_or_equal_to;
69
+
70
+ /* FuzzyQuery */
71
+ static VALUE sym_min_similarity;
72
+ static VALUE sym_prefix_length;
73
+
74
+ /* SpanNearQuery */
75
+ static VALUE sym_slop;
76
+ static VALUE sym_in_order;
77
+ static VALUE sym_clauses;
78
+
79
+ /* Class variable ids */
80
+ static ID id_default_min_similarity;
81
+ static ID id_default_prefix_length;
82
+
83
+
84
+ /** Sort **/
85
+ static VALUE oSORT_FIELD_DOC;
86
+
87
+ /* Sort types */
88
+ static VALUE sym_integer;
89
+ static VALUE sym_float;
90
+ static VALUE sym_string;
91
+ static VALUE sym_auto;
92
+ static VALUE sym_doc_id;
93
+ static VALUE sym_score;
94
+ static VALUE sym_byte;
95
+
96
+ /* Sort params */
97
+ static VALUE sym_type;
98
+ static VALUE sym_reverse;
99
+ static VALUE sym_comparator;
100
+
101
+ /* Hits */
102
+ static ID id_doc;
103
+ static ID id_score;
104
+
105
+ /* TopDocs */
106
+ static ID id_hits;
107
+ static ID id_total_hits;
108
+ static ID id_max_score;
109
+ static ID id_searcher;
110
+
111
+ /* Search */
112
+ static VALUE sym_offset;
113
+ static VALUE sym_limit;
114
+ static VALUE sym_all;
115
+ static VALUE sym_sort;
116
+ static VALUE sym_filter;
117
+ static VALUE sym_filter_proc;
118
+
119
+ static VALUE sym_excerpt_length;
120
+ static VALUE sym_num_excerpts;
121
+ static VALUE sym_pre_tag;
122
+ static VALUE sym_post_tag;
123
+ static VALUE sym_ellipsis;
124
+
125
+ extern VALUE cIndexReader;
126
+ extern void frt_ir_free(void *p);
127
+ extern void frt_ir_mark(void *p);
128
+
129
+ extern void frt_set_term(VALUE rterm, Term *t);
130
+ extern VALUE frt_get_analyzer(Analyzer *a);
131
+ extern HashSet *frt_get_fields(VALUE rfields);
132
+ extern Analyzer *frt_get_cwrapped_analyzer(VALUE ranalyzer);
133
+ extern VALUE frt_get_lazy_doc(LazyDoc *lazy_doc);
134
+
135
+ /****************************************************************************
136
+ *
137
+ * Hit Methods
138
+ *
139
+ ****************************************************************************/
140
+
141
+ static VALUE
142
+ frt_get_hit(Hit *hit)
143
+ {
144
+ return rb_struct_new(cHit,
145
+ INT2FIX(hit->doc),
146
+ rb_float_new((double)hit->score),
147
+ NULL);
148
+ }
149
+
150
+ /****************************************************************************
151
+ *
152
+ * TopDocs Methods
153
+ *
154
+ ****************************************************************************/
155
+
156
+ static VALUE
157
+ frt_get_td(TopDocs *td, VALUE rsearcher)
158
+ {
159
+ int i;
160
+ VALUE rtop_docs;
161
+ VALUE hit_ary = rb_ary_new2(td->size);
162
+
163
+ for (i = 0; i < td->size; i++) {
164
+ RARRAY(hit_ary)->ptr[i] = frt_get_hit(td->hits[i]);
165
+ RARRAY(hit_ary)->len++;
166
+ }
167
+
168
+ rtop_docs = rb_struct_new(cTopDocs,
169
+ INT2FIX(td->total_hits),
170
+ hit_ary,
171
+ rb_float_new((double)td->max_score),
172
+ rsearcher,
173
+ NULL);
174
+ td_destroy(td);
175
+ return rtop_docs;
176
+ }
177
+
178
+ /*
179
+ * call-seq:
180
+ * top_doc.to_s(field = :id) -> string
181
+ *
182
+ * Returns a string representation of the top_doc in readable format.
183
+ */
184
+ static VALUE
185
+ frt_td_to_s(int argc, VALUE *argv, VALUE self)
186
+ {
187
+ int i;
188
+ VALUE rhits = rb_funcall(self, id_hits, 0);
189
+ Searcher *sea = (Searcher *)DATA_PTR(rb_funcall(self, id_searcher, 0));
190
+ const int len = RARRAY(rhits)->len;
191
+ char *str = ALLOC_N(char, len * 64 + 100);
192
+ char *s = str;
193
+ char *field = "id";
194
+ VALUE rstr;
195
+
196
+ if (argc) {
197
+ field = frt_field(argv[0]);
198
+ }
199
+
200
+ sprintf(s, "TopDocs: total_hits = %ld, max_score = %f [\n",
201
+ FIX2INT(rb_funcall(self, id_total_hits, 0)),
202
+ NUM2DBL(rb_funcall(self, id_max_score, 0)));
203
+ s += strlen(s);
204
+
205
+ for (i = 0; i < len; i++) {
206
+ VALUE rhit = RARRAY(rhits)->ptr[i];
207
+ int doc_id = FIX2INT(rb_funcall(rhit, id_doc, 0));
208
+ char *value = "";
209
+ LazyDoc *lzd = sea->get_lazy_doc(sea, doc_id);
210
+ LazyDocField *lzdf = h_get(lzd->field_dict, field);
211
+ if (NULL != lzdf) {
212
+ value = lazy_df_get_data(lzdf, 0);
213
+ }
214
+
215
+ sprintf(s, "\t%d \"%s\": %f\n", doc_id, value,
216
+ NUM2DBL(rb_funcall(rhit, id_score, 0)));
217
+ s += strlen(s);
218
+ lazy_doc_close(lzd);
219
+ }
220
+
221
+ sprintf(s, "]\n");
222
+ rstr = rb_str_new2(str);
223
+ free(str);
224
+ return rstr;
225
+ }
226
+
227
+ static INLINE char *
228
+ frt_lzd_load_to_json(LazyDoc *lzd, char **str, char *s, int *slen)
229
+ {
230
+ int i, j;
231
+ int diff = s - *str;
232
+ int len = diff, l;
233
+ LazyDocField *f;
234
+
235
+ for (i = 0; i < lzd->size; i++) {
236
+ f = lzd->fields[i];
237
+ /* 3 times length of field to make space for quoted quotes ('"') and
238
+ * 4 times field elements to make space for '"' around fields and ','
239
+ * between fields. Add 100 for '[', ']' and good safety.
240
+ */
241
+ len += strlen(f->name) + f->len * 3 + 100 + 4 * f->size;
242
+ }
243
+
244
+ if (len > *slen) {
245
+ while (len > *slen) *slen = *slen << 1;
246
+ REALLOC_N(*str, char, *slen);
247
+ s = *str + diff;
248
+ }
249
+
250
+ for (i = 0; i < lzd->size; i++) {
251
+ f = lzd->fields[i];
252
+ if (i) *(s++) = ',';
253
+ *(s++) = '"';
254
+ l = strlen(f->name);
255
+ memcpy(s, f->name, l);
256
+ s += l;
257
+ *(s++) = '"';
258
+ *(s++) = ':';
259
+ if (f->size > 1) *(s++) = '[';
260
+ for (j = 0; j < f->size; j++) {
261
+ if (j) *(s++) = ',';
262
+ s = json_concat_string(s, lazy_df_get_data(f, j));
263
+ }
264
+ if (f->size > 1) *(s++) = ']';
265
+ }
266
+ return s;
267
+ }
268
+
269
+ /*
270
+ * call-seq:
271
+ * top_doc.to_json() -> string
272
+ *
273
+ * Returns a json representation of the top_doc.
274
+ */
275
+ static VALUE
276
+ frt_td_to_json(VALUE self)
277
+ {
278
+ int i;
279
+ VALUE rhits = rb_funcall(self, id_hits, 0);
280
+ VALUE rhit;
281
+ LazyDoc *lzd;
282
+ Searcher *sea = (Searcher *)DATA_PTR(rb_funcall(self, id_searcher, 0));
283
+ const int num_hits = RARRAY(rhits)->len;
284
+ int doc_id;
285
+ int len = 32768;
286
+ char *str = ALLOC_N(char, len);
287
+ char *s = str;
288
+ VALUE rstr;
289
+
290
+ *(s++) = '[';
291
+ for (i = 0; i < num_hits; i++) {
292
+ if (i) *(s++) = ',';
293
+ *(s++) = '{';
294
+ rhit = RARRAY(rhits)->ptr[i];
295
+ doc_id = FIX2INT(rb_funcall(rhit, id_doc, 0));
296
+ lzd = sea->get_lazy_doc(sea, doc_id);
297
+ s = frt_lzd_load_to_json(lzd, &str, s, &len);
298
+ lazy_doc_close(lzd);
299
+ *(s++) = '}';
300
+ }
301
+ *(s++) = ']';
302
+ *(s++) = '\0';
303
+ rstr = rb_str_new2(str);
304
+ free(str);
305
+ return rstr;
306
+ }
307
+
308
+
309
+ /****************************************************************************
310
+ *
311
+ * Explanation Methods
312
+ *
313
+ ****************************************************************************/
314
+
315
+ #define GET_EXPL() Explanation *expl = (Explanation *)DATA_PTR(self)
316
+
317
+ /*
318
+ * call-seq:
319
+ * explanation.to_s -> string
320
+ *
321
+ * Returns a string representation of the explanation in readable format.
322
+ */
323
+ static VALUE
324
+ frt_expl_to_s(VALUE self)
325
+ {
326
+ GET_EXPL();
327
+ char *str = expl_to_s(expl);
328
+ VALUE rstr = rb_str_new2(str);
329
+ free(str);
330
+ return rstr;
331
+ }
332
+
333
+ /*
334
+ * call-seq:
335
+ * explanation.to_html -> string
336
+ *
337
+ * Returns an html representation of the explanation in readable format.
338
+ */
339
+ static VALUE
340
+ frt_expl_to_html(VALUE self)
341
+ {
342
+ GET_EXPL();
343
+ char *str = expl_to_html(expl);
344
+ VALUE rstr = rb_str_new2(str);
345
+ free(str);
346
+ return rstr;
347
+ }
348
+
349
+ /*
350
+ * call-seq:
351
+ * explanation.score -> float
352
+ *
353
+ * Returns the score represented by the query. This can be used for debugging
354
+ * purposes mainly to check that the score returned by the explanation
355
+ * matches that of the score for the document in the original query.
356
+ */
357
+ static VALUE
358
+ frt_expl_score(VALUE self)
359
+ {
360
+ GET_EXPL();
361
+ return rb_float_new((double)expl->value);
362
+ }
363
+
364
+ /****************************************************************************
365
+ *
366
+ * Query Methods
367
+ *
368
+ ****************************************************************************/
369
+
370
+ static void
371
+ frt_q_free(void *p)
372
+ {
373
+ object_del(p);
374
+ q_deref((Query *)p);
375
+ }
376
+
377
+ #define GET_Q() Query *q = (Query *)DATA_PTR(self)
378
+
379
+ /*
380
+ * call-seq:
381
+ * query.to_s -> string
382
+ *
383
+ * Return a string representation of the query. Most of the time, passing
384
+ * this string through the Query parser will give you the exact Query you
385
+ * began with. This can be a good way to explore how the QueryParser works.
386
+ */
387
+ static VALUE
388
+ frt_q_to_s(int argc, VALUE *argv, VALUE self)
389
+ {
390
+ GET_Q();
391
+ VALUE rstr, rfield;
392
+ char *str, *field = "";
393
+ if (rb_scan_args(argc, argv, "01", &rfield)) {
394
+ field = frt_field(rfield);
395
+ }
396
+ str = q->to_s(q, field);
397
+ rstr = rb_str_new2(str);
398
+ free(str);
399
+ return rstr;
400
+ }
401
+
402
+ /*
403
+ * call-seq:
404
+ * query.boost
405
+ *
406
+ * Returns the queries boost value. See the Query description for more
407
+ * information on Query boosts.
408
+ */
409
+ static VALUE
410
+ frt_q_get_boost(VALUE self)
411
+ {
412
+ GET_Q();
413
+ return rb_float_new((double)q->boost);
414
+ }
415
+
416
+ /*
417
+ * call-seq:
418
+ * query.boost = boost -> boost
419
+ *
420
+ * Set the boost for a query. See the Query description for more information
421
+ * on Query boosts.
422
+ */
423
+ static VALUE
424
+ frt_q_set_boost(VALUE self, VALUE rboost)
425
+ {
426
+ GET_Q();
427
+ q->boost = (float)NUM2DBL(rboost);
428
+ return rboost;
429
+ }
430
+
431
+ /*
432
+ * call-seq:
433
+ * query.hash -> number
434
+ *
435
+ * Return a hash value for the query. This is used for caching query results
436
+ * in a hash object.
437
+ */
438
+ static VALUE
439
+ frt_q_hash(VALUE self)
440
+ {
441
+ GET_Q();
442
+ return INT2FIX(q->hash(q));
443
+ }
444
+
445
+ /*
446
+ * call-seq;
447
+ * query.eql?(other_query) -> bool
448
+ * query == other_query -> bool
449
+ *
450
+ * Return true if +query+ equals +other_query+. Theoretically, two queries are
451
+ * equal if the always return the same results, no matter what the contents
452
+ * of the index. Practically, however, this is difficult to implement
453
+ * efficiently for queries like BooleanQuery since the ordering of clauses
454
+ * unspecified. "Ruby AND Rails" will not match "Rails AND Ruby" for example,
455
+ * although their result sets will be identical. Most queries should match as
456
+ * expected however.
457
+ */
458
+ static VALUE
459
+ frt_q_eql(VALUE self, VALUE other)
460
+ {
461
+ GET_Q();
462
+ Query *oq;
463
+ Data_Get_Struct(other, Query, oq);
464
+ return q->eq(q, oq) ? Qtrue : Qfalse;
465
+ }
466
+
467
+ /*
468
+ * call-seq:
469
+ * query.terms(searcher) -> term_array
470
+ *
471
+ * Returns an array of terms searched for by this query. This can be used for
472
+ * implementing an external query highlighter for example. You must supply a
473
+ * searcher so that the query can be rewritten and optimized like it would be
474
+ * in a real search.
475
+ */
476
+ static VALUE
477
+ frt_q_get_terms(VALUE self, VALUE searcher)
478
+ {
479
+ int i;
480
+ VALUE rterms = rb_ary_new();
481
+ HashSet *terms = term_set_new();
482
+ GET_Q();
483
+ Searcher *sea = (Searcher *)DATA_PTR(searcher);
484
+ Query *rq = sea->rewrite(sea, q);
485
+ rq->extract_terms(rq, terms);
486
+ q_deref(rq);
487
+ for (i = 0; i < terms->size; i++) {
488
+ Term *term = (Term *)terms->elems[i];
489
+ rb_ary_push(rterms, frt_get_term(term->field, term->text));
490
+ }
491
+ hs_destroy(terms);
492
+ return rterms;
493
+ }
494
+
495
+ #define MK_QUERY(klass, q) Data_Wrap_Struct(klass, NULL, &frt_q_free, q)
496
+ VALUE
497
+ frt_get_q(Query *q)
498
+ {
499
+ VALUE self = object_get(q);
500
+
501
+ if (self == Qnil) {
502
+ switch (q->type) {
503
+ case TERM_QUERY:
504
+ self = MK_QUERY(cTermQuery, q);
505
+ break;
506
+ case MULTI_TERM_QUERY:
507
+ self = MK_QUERY(cMultiTermQuery, q);
508
+ break;
509
+ case BOOLEAN_QUERY:
510
+ self = MK_QUERY(cBooleanQuery, q);
511
+ break;
512
+ case PHRASE_QUERY:
513
+ self = MK_QUERY(cPhraseQuery, q);
514
+ break;
515
+ case CONSTANT_QUERY:
516
+ self = MK_QUERY(cConstantScoreQuery, q);
517
+ break;
518
+ case FILTERED_QUERY:
519
+ self = MK_QUERY(cFilteredQuery, q);
520
+ break;
521
+ case MATCH_ALL_QUERY:
522
+ self = MK_QUERY(cMatchAllQuery, q);
523
+ break;
524
+ case RANGE_QUERY:
525
+ self = MK_QUERY(cRangeQuery, q);
526
+ break;
527
+ case WILD_CARD_QUERY:
528
+ self = MK_QUERY(cWildcardQuery, q);
529
+ break;
530
+ case FUZZY_QUERY:
531
+ self = MK_QUERY(cFuzzyQuery, q);
532
+ break;
533
+ case PREFIX_QUERY:
534
+ self = MK_QUERY(cPrefixQuery, q);
535
+ break;
536
+ case SPAN_TERM_QUERY:
537
+ self = MK_QUERY(cSpanMultiTermQuery, q);
538
+ break;
539
+ case SPAN_MULTI_TERM_QUERY:
540
+ self = MK_QUERY(cSpanPrefixQuery, q);
541
+ break;
542
+ case SPAN_PREFIX_QUERY:
543
+ self = MK_QUERY(cSpanTermQuery, q);
544
+ break;
545
+ case SPAN_FIRST_QUERY:
546
+ self = MK_QUERY(cSpanFirstQuery, q);
547
+ break;
548
+ case SPAN_OR_QUERY:
549
+ self = MK_QUERY(cSpanOrQuery, q);
550
+ break;
551
+ case SPAN_NOT_QUERY:
552
+ self = MK_QUERY(cSpanNotQuery, q);
553
+ break;
554
+ case SPAN_NEAR_QUERY:
555
+ self = MK_QUERY(cSpanNearQuery, q);
556
+ break;
557
+ default:
558
+ rb_raise(rb_eArgError, "Unknown query type");
559
+ break;
560
+ }
561
+ object_add(q, self);
562
+ }
563
+ return self;
564
+ }
565
+
566
+ /****************************************************************************
567
+ *
568
+ * TermQuery Methods
569
+ *
570
+ ****************************************************************************/
571
+
572
+ /*
573
+ * call-seq:
574
+ * TermQuery.new(field, term) -> term_query
575
+ *
576
+ * Create a new TermQuery object which will match all documents with the term
577
+ * +term+ in the field +field+.
578
+ *
579
+ * Note: As usual, field should be a symbol
580
+ */
581
+ static VALUE
582
+ frt_tq_init(VALUE self, VALUE rfield, VALUE rterm)
583
+ {
584
+ char *field = frt_field(rfield);
585
+ char *term = rs2s(rb_obj_as_string(rterm));
586
+ Query *q = tq_new(field, term);
587
+ Frt_Wrap_Struct(self, NULL, &frt_q_free, q);
588
+ object_add(q, self);
589
+ return self;
590
+ }
591
+
592
+ /****************************************************************************
593
+ *
594
+ * MultiTermQuery Methods
595
+ *
596
+ ****************************************************************************/
597
+
598
+ /*
599
+ * call-seq:
600
+ * MultiTermQuery.default_max_terms -> number
601
+ *
602
+ * Get the default value for +:max_terms+ in a MultiTermQuery. This value is
603
+ * also used by PrefixQuery, FuzzyQuery and WildcardQuery.
604
+ */
605
+ static VALUE
606
+ frt_mtq_get_dmt(VALUE self)
607
+ {
608
+ return rb_cvar_get(cMultiTermQuery, id_default_max_terms);
609
+ }
610
+
611
+ /*
612
+ * call-seq:
613
+ * MultiTermQuery.default_max_terms = max_terms -> max_terms
614
+ *
615
+ * Set the default value for +:max_terms+ in a MultiTermQuery. This value is
616
+ * also used by PrefixQuery, FuzzyQuery and WildcardQuery.
617
+ */
618
+ static VALUE
619
+ frt_mtq_set_dmt(VALUE self, VALUE rnum_terms)
620
+ {
621
+ int max_terms = FIX2INT(rnum_terms);
622
+ if (max_terms <= 0) {
623
+ rb_raise(rb_eArgError,
624
+ "%d <= 0. @@max_terms must be > 0", max_terms);
625
+ }
626
+ rb_cvar_set(cMultiTermQuery, id_default_max_terms, rnum_terms, Qfalse);
627
+ return rnum_terms;
628
+ }
629
+
630
+ /*
631
+ * call-seq:
632
+ * MultiTermQuery.new(field, options = {}) -> multi_term_query
633
+ *
634
+ * Create a new MultiTermQuery on field +field+. You will also need to add
635
+ * terms to the query using the MultiTermQuery#add_term method.
636
+ *
637
+ * There are several options available to you when creating a
638
+ * MultiTermQueries;
639
+ *
640
+ * === Options
641
+ *
642
+ * :max_terms:: You can specify the maximum number of terms that can be
643
+ * added to the query. This is to prevent memory usage overflow,
644
+ * particularly when don't directly control the addition of
645
+ * terms to the Query object like when you create Wildcard
646
+ * queries. For example, searching for "content:*" would cause
647
+ * problems without this limit.
648
+ * :min_score:: The minimum score a term must have to be added to the query.
649
+ * For example you could implement your own wild-card queries
650
+ * that gives matches a score. To limit the number of terms
651
+ * added to the query you could set a lower limit to this score.
652
+ * FuzzyQuery in particular makes use of this parameter.
653
+ */
654
+ static VALUE
655
+ frt_mtq_init(int argc, VALUE *argv, VALUE self)
656
+ {
657
+ VALUE rfield, roptions;
658
+ float min_score = 0.0;
659
+ int max_terms = FIX2INT(frt_mtq_get_dmt(self));
660
+ Query *q;
661
+
662
+ if (rb_scan_args(argc, argv, "11", &rfield, &roptions) == 2) {
663
+ VALUE v;
664
+ if (Qnil != (v = rb_hash_aref(roptions, sym_max_terms))) {
665
+ max_terms = FIX2INT(v);
666
+ }
667
+ if (Qnil != (v = rb_hash_aref(roptions, sym_min_score))) {
668
+ min_score = (float)NUM2DBL(v);
669
+ }
670
+ }
671
+ q = multi_tq_new_conf(frt_field(rfield), max_terms, min_score);
672
+ Frt_Wrap_Struct(self, NULL, &frt_q_free, q);
673
+ object_add(q, self);
674
+ return self;
675
+ }
676
+
677
+ /*
678
+ * call-seq:
679
+ * multi_term_query.add_term(term, score = 1.0) -> self
680
+ * multi_term_query << term1 << term2 << term3 -> self
681
+ *
682
+ * Add a term to the MultiTermQuery with the score 1.0 unless specified
683
+ * otherwise.
684
+ */
685
+ static VALUE
686
+ frt_mtq_add_term(int argc, VALUE *argv, VALUE self)
687
+ {
688
+ GET_Q();
689
+ VALUE rterm, rboost;
690
+ float boost = 1.0;
691
+ char *term = NULL;
692
+ if (rb_scan_args(argc, argv, "11", &rterm, &rboost) == 2) {
693
+ boost = (float)NUM2DBL(rboost);
694
+ }
695
+ term = StringValuePtr(rterm);
696
+ multi_tq_add_term_boost(q, term, boost);
697
+
698
+ return self;
699
+ }
700
+
701
+ typedef Query *(*mtq_maker_ft)(const char *field, const char *term);
702
+
703
+ static VALUE
704
+ frt_mtq_init_specific(int argc, VALUE *argv, VALUE self, mtq_maker_ft mm)
705
+ {
706
+ VALUE rfield, rterm, rmax_terms;
707
+ int max_terms =
708
+ FIX2INT(rb_cvar_get(cMultiTermQuery, id_default_max_terms));
709
+ Query *q;
710
+
711
+ if (rb_scan_args(argc, argv, "21", &rfield, &rterm, &rmax_terms) == 3) {
712
+ max_terms = FIX2INT(rmax_terms);
713
+ }
714
+
715
+ q = (*mm)(frt_field(rfield), StringValuePtr(rterm));
716
+ MTQMaxTerms(q) = max_terms;
717
+ Frt_Wrap_Struct(self, NULL, &frt_q_free, q);
718
+ object_add(q, self);
719
+ return self;
720
+ }
721
+
722
+ /****************************************************************************
723
+ *
724
+ * BooleanClause Methods
725
+ *
726
+ ****************************************************************************/
727
+
728
+ static void
729
+ frt_bc_mark(void *p)
730
+ {
731
+ frt_gc_mark(((BooleanClause *)p)->query);
732
+ }
733
+
734
+ static void
735
+ frt_bc_free(void *p)
736
+ {
737
+ object_del(p);
738
+ bc_deref((BooleanClause *)p);
739
+ }
740
+
741
+ static VALUE
742
+ frt_bc_wrap(BooleanClause *bc)
743
+ {
744
+ VALUE self = Data_Wrap_Struct(cBooleanClause, &frt_bc_mark, &frt_bc_free, bc);
745
+ REF(bc);
746
+ object_add(bc, self);
747
+ return self;
748
+ }
749
+
750
+ static enum BC_TYPE
751
+ frt_get_occur(VALUE roccur)
752
+ {
753
+ enum BC_TYPE occur = BC_SHOULD;
754
+
755
+ if (roccur == sym_should) {
756
+ occur = BC_SHOULD;
757
+ } else if (roccur == sym_must) {
758
+ occur = BC_MUST;
759
+ } else if (roccur == sym_must_not) {
760
+ occur = BC_MUST_NOT;
761
+ } else {
762
+ rb_raise(rb_eArgError, "occur argument must be one of [:must, "
763
+ ":should, :must_not]");
764
+ }
765
+ return occur;
766
+ }
767
+
768
+ /*
769
+ * call-seq:
770
+ * BooleanClause.new(query, occur = :should) -> BooleanClause
771
+ *
772
+ * Create a new BooleanClause object, wrapping the query +query+. +occur+
773
+ * must be one of +:must+, +:should+ or +:must_not+.
774
+ */
775
+ static VALUE
776
+ frt_bc_init(int argc, VALUE *argv, VALUE self)
777
+ {
778
+ BooleanClause *bc;
779
+ VALUE rquery, roccur;
780
+ unsigned int occur = BC_SHOULD;
781
+ Query *sub_q;
782
+ if (rb_scan_args(argc, argv, "11", &rquery, &roccur) == 2) {
783
+ occur = frt_get_occur(roccur);
784
+ }
785
+ Data_Get_Struct(rquery, Query, sub_q);
786
+ REF(sub_q);
787
+ bc = bc_new(sub_q, occur);
788
+ Frt_Wrap_Struct(self, &frt_bc_mark, &frt_bc_free, bc);
789
+ object_add(bc, self);
790
+ return self;
791
+ }
792
+
793
+ #define GET_BC() BooleanClause *bc = (BooleanClause *)DATA_PTR(self)
794
+ /*
795
+ * call-seq:
796
+ * clause.query -> query
797
+ *
798
+ * Return the query object wrapped by this BooleanClause.
799
+ */
800
+ static VALUE
801
+ frt_bc_get_query(VALUE self)
802
+ {
803
+ GET_BC();
804
+ return object_get(bc->query);
805
+ }
806
+
807
+ /*
808
+ * call-seq:
809
+ * clause.query = query -> query
810
+ *
811
+ * Set the query wrapped by this BooleanClause.
812
+ */
813
+ static VALUE
814
+ frt_bc_set_query(VALUE self, VALUE rquery)
815
+ {
816
+ GET_BC();
817
+ Data_Get_Struct(rquery, Query, bc->query);
818
+ return rquery;
819
+ }
820
+
821
+ /*
822
+ * call-seq:
823
+ * clause.required? -> bool
824
+ *
825
+ * Return true if this clause is required. ie, this will be true if occur was
826
+ * equal to +:must+.
827
+ */
828
+ static VALUE
829
+ frt_bc_is_required(VALUE self)
830
+ {
831
+ GET_BC();
832
+ return bc->is_required ? Qtrue : Qfalse;
833
+ }
834
+
835
+ /*
836
+ * call-seq:
837
+ * clause.prohibited? -> bool
838
+ *
839
+ * Return true if this clause is prohibited. ie, this will be true if occur was
840
+ * equal to +:must_not+.
841
+ */
842
+ static VALUE
843
+ frt_bc_is_prohibited(VALUE self)
844
+ {
845
+ GET_BC();
846
+ return bc->is_prohibited ? Qtrue : Qfalse;
847
+ }
848
+
849
+ /*
850
+ * call-seq:
851
+ * clause.occur = occur -> occur
852
+ *
853
+ * Set the +occur+ value for this BooleanClause. +occur+ must be one of
854
+ * +:must+, +:should+ or +:must_not+.
855
+ */
856
+ static VALUE
857
+ frt_bc_set_occur(VALUE self, VALUE roccur)
858
+ {
859
+ GET_BC();
860
+ enum BC_TYPE occur = frt_get_occur(roccur);
861
+ bc_set_occur(bc, occur);
862
+
863
+ return roccur;
864
+ }
865
+
866
+ /*
867
+ * call-seq:
868
+ * clause.to_s -> string
869
+ *
870
+ * Return a string representation of this clause. This will not be used by
871
+ * BooleanQuery#to_s. It is only used by BooleanClause#to_s and will specify
872
+ * whether the clause is +:must+, +:should+ or +:must_not+.
873
+ */
874
+ static VALUE
875
+ frt_bc_to_s(VALUE self)
876
+ {
877
+ VALUE rstr;
878
+ char *qstr, *ostr = "", *str;
879
+ int len;
880
+ GET_BC();
881
+ qstr = bc->query->to_s(bc->query, "");
882
+ switch (bc->occur) {
883
+ case BC_SHOULD:
884
+ ostr = "Should";
885
+ break;
886
+ case BC_MUST:
887
+ ostr = "Must";
888
+ break;
889
+ case BC_MUST_NOT:
890
+ ostr = "Must Not";
891
+ break;
892
+ }
893
+ len = strlen(ostr) + strlen(qstr) + 2;
894
+ str = ALLOC_N(char, len);
895
+ sprintf(str, "%s:%s", ostr, qstr);
896
+ rstr = rb_str_new(str, len);
897
+ free(qstr);
898
+ free(str);
899
+ return rstr;
900
+ }
901
+
902
+ /****************************************************************************
903
+ *
904
+ * BooleanQuery Methods
905
+ *
906
+ ****************************************************************************/
907
+
908
+ static void
909
+ frt_bq_mark(void *p)
910
+ {
911
+ int i;
912
+ Query *q = (Query *)p;
913
+ BooleanQuery *bq = (BooleanQuery *)q;
914
+ for (i = 0; i < bq->clause_cnt; i++) {
915
+ frt_gc_mark(bq->clauses[i]);
916
+ }
917
+ }
918
+
919
+ /*
920
+ * call-seq:
921
+ * BooleanQuery.new(coord_disable = false)
922
+ *
923
+ * Create a new BooleanQuery. If you don't care about the scores of the
924
+ * sub-queries added to the query (as would be the case for many
925
+ * automatically generated queries) you can disable the coord_factor of the
926
+ * score. This will slightly improve performance for the query. Usually you
927
+ * should leave this parameter as is.
928
+ */
929
+ static VALUE
930
+ frt_bq_init(int argc, VALUE *argv, VALUE self)
931
+ {
932
+ VALUE rcoord_disabled;
933
+ bool coord_disabled = false;
934
+ Query *q;
935
+ if (rb_scan_args(argc, argv, "01", &rcoord_disabled)) {
936
+ coord_disabled = RTEST(rcoord_disabled);
937
+ }
938
+ q = bq_new(coord_disabled);
939
+ Frt_Wrap_Struct(self, &frt_bq_mark, &frt_q_free, q);
940
+ object_add(q, self);
941
+ return self;
942
+ }
943
+
944
+ /*
945
+ * call-seq:
946
+ * boolean_query.add_query(query, occur = :should) -> boolean_clause
947
+ * boolean_query.<<(query, occur = :should) -> boolean_clause
948
+ * boolean_query << boolean_clause -> boolean_clause
949
+ *
950
+ * Us this method to add sub-queries to a BooleanQuery. You can either add
951
+ * a straight Query or a BooleanClause. When adding a Query, the default
952
+ * occurrence requirement is :should. That is the Query's match will be
953
+ * scored but it isn't essential for a match. If the query should be
954
+ * essential, use :must. For exclusive queries use :must_not.
955
+ *
956
+ * When adding a Boolean clause to a BooleanQuery there is no need to set the
957
+ * occurrence property because it is already set in the BooleanClause.
958
+ * Therefor the +occur+ parameter will be ignored in this case.
959
+ *
960
+ * query:: Query to add to the BooleanQuery
961
+ * occur:: occurrence requirement for the query being added. Must be one of
962
+ * [:must, :should, :must_not]
963
+ * returns:: BooleanClause which was added
964
+ */
965
+ static VALUE
966
+ frt_bq_add_query(int argc, VALUE *argv, VALUE self)
967
+ {
968
+ GET_Q();
969
+ VALUE rquery, roccur;
970
+ enum BC_TYPE occur = BC_SHOULD;
971
+ Query *sub_q;
972
+ VALUE klass;
973
+
974
+ if (rb_scan_args(argc, argv, "11", &rquery, &roccur) == 2) {
975
+ occur = frt_get_occur(roccur);
976
+ }
977
+ klass = CLASS_OF(rquery);
978
+ if (klass == cBooleanClause) {
979
+ BooleanClause *bc = (BooleanClause *)DATA_PTR(rquery);
980
+ if (argc > 1) {
981
+ rb_warning("Second argument to BooleanQuery#add is ignored "
982
+ "when adding BooleanClause");
983
+ }
984
+ bq_add_clause(q, bc);
985
+ return rquery;
986
+ } else if (TYPE(rquery) == T_DATA) {
987
+ Data_Get_Struct(rquery, Query, sub_q);
988
+ return frt_bc_wrap(bq_add_query(q, sub_q, occur));
989
+ } else {
990
+ rb_raise(rb_eArgError, "Cannot add %s to a BooleanQuery",
991
+ rb_class2name(klass));
992
+ }
993
+ return self;
994
+ }
995
+
996
+ /****************************************************************************
997
+ *
998
+ * RangeQuery Methods
999
+ *
1000
+ ****************************************************************************/
1001
+
1002
+ static void
1003
+ get_range_params(VALUE roptions, char **lterm, char **uterm,
1004
+ bool *include_lower, bool *include_upper)
1005
+ {
1006
+ VALUE v;
1007
+ Check_Type(roptions, T_HASH);
1008
+ if (Qnil != (v = rb_hash_aref(roptions, sym_lower))) {
1009
+ *lterm = StringValuePtr(v);
1010
+ *include_lower = true;
1011
+ }
1012
+ if (Qnil != (v = rb_hash_aref(roptions, sym_upper))) {
1013
+ *uterm = StringValuePtr(v);
1014
+ *include_upper = true;
1015
+ }
1016
+ if (Qnil != (v = rb_hash_aref(roptions, sym_lower_exclusive))) {
1017
+ *lterm = StringValuePtr(v);
1018
+ *include_lower = false;
1019
+ }
1020
+ if (Qnil != (v = rb_hash_aref(roptions, sym_upper_exclusive))) {
1021
+ *uterm = StringValuePtr(v);
1022
+ *include_upper = false;
1023
+ }
1024
+ if (Qnil != (v = rb_hash_aref(roptions, sym_include_lower))) {
1025
+ *include_lower = RTEST(v);
1026
+ }
1027
+ if (Qnil != (v = rb_hash_aref(roptions, sym_include_upper))) {
1028
+ *include_upper = RTEST(v);
1029
+ }
1030
+ if (Qnil != (v = rb_hash_aref(roptions, sym_greater_than))) {
1031
+ *lterm = StringValuePtr(v);
1032
+ *include_lower = false;
1033
+ }
1034
+ if (Qnil != (v = rb_hash_aref(roptions, sym_greater_than_or_equal_to))) {
1035
+ *lterm = StringValuePtr(v);
1036
+ *include_lower = true;
1037
+ }
1038
+ if (Qnil != (v = rb_hash_aref(roptions, sym_less_than))) {
1039
+ *uterm = StringValuePtr(v);
1040
+ *include_upper = false;
1041
+ }
1042
+ if (Qnil != (v = rb_hash_aref(roptions, sym_less_than_or_equal_to))) {
1043
+ *uterm = StringValuePtr(v);
1044
+ *include_upper = true;
1045
+ }
1046
+ if (!*lterm && !*uterm) {
1047
+ rb_raise(rb_eArgError,
1048
+ "The bounds of a range should not both be nil");
1049
+ }
1050
+ if (*include_lower && !*lterm) {
1051
+ rb_raise(rb_eArgError,
1052
+ "The lower bound should not be nil if it is inclusive");
1053
+ }
1054
+ if (*include_upper && !*uterm) {
1055
+ rb_raise(rb_eArgError,
1056
+ "The upper bound should not be nil if it is inclusive");
1057
+ }
1058
+ if (*uterm && *lterm && (strcmp(*uterm, *lterm) < 0)) {
1059
+ rb_raise(rb_eArgError,
1060
+ "The upper bound should greater than the lower bound."
1061
+ " %s > %s", *lterm, *uterm);
1062
+ }
1063
+ }
1064
+
1065
+ /*
1066
+ * call-seq:
1067
+ * RangeQuery.new(field, options = {}) -> range_query
1068
+ *
1069
+ * Create a new RangeQuery on field +field+. There are two ways to build a
1070
+ * range query. With the old-style options; +:lower+, +:upper+,
1071
+ * +:include_lower+ and +:include_upper+ or the new style options; +:<+,
1072
+ * +:<=+, +:>+ and +:>=+. The options' names should speak for themselves.
1073
+ * In the old-style options, limits are inclusive by default.
1074
+ *
1075
+ * == Examples
1076
+ *
1077
+ * q = RangeQuery.new(:date, :lower => "200501", :include_lower => false)
1078
+ * # is equivalent to
1079
+ * q = RangeQuery.new(:date, :< => "200501")
1080
+ * # is equivalent to
1081
+ * q = RangeQuery.new(:date, :lower_exclusive => "200501")
1082
+ *
1083
+ * q = RangeQuery.new(:date, :lower => "200501", :upper => 200502)
1084
+ * # is equivalent to
1085
+ * q = RangeQuery.new(:date, :>= => "200501", :<= => 200502)
1086
+ */
1087
+ static VALUE
1088
+ frt_rq_init(VALUE self, VALUE rfield, VALUE roptions)
1089
+ {
1090
+ Query *q;
1091
+ char *lterm = NULL;
1092
+ char *uterm = NULL;
1093
+ bool include_lower = false;
1094
+ bool include_upper = false;
1095
+
1096
+ get_range_params(roptions, &lterm, &uterm, &include_lower, &include_upper);
1097
+ q = rq_new(frt_field(rfield),
1098
+ lterm, uterm,
1099
+ include_lower, include_upper);
1100
+ Frt_Wrap_Struct(self, NULL, &frt_q_free, q);
1101
+ object_add(q, self);
1102
+ return self;
1103
+ }
1104
+
1105
+ /****************************************************************************
1106
+ *
1107
+ * PhraseQuery Methods
1108
+ *
1109
+ ****************************************************************************/
1110
+
1111
+ /*
1112
+ * call-seq:
1113
+ * PhraseQuery.new(field, slop = 0) -> phrase_query
1114
+ *
1115
+ * Create a new PhraseQuery on the field +field+. You need to add terms to
1116
+ * the query it will do anything of value. See PhraseQuery#add_term.
1117
+ */
1118
+ static VALUE
1119
+ frt_phq_init(int argc, VALUE *argv, VALUE self)
1120
+ {
1121
+ VALUE rfield, rslop;
1122
+ Query *q;
1123
+ rb_scan_args(argc, argv, "11", &rfield, &rslop);
1124
+ q = phq_new(frt_field(rfield));
1125
+ if (argc == 2) {
1126
+ ((PhraseQuery *)q)->slop = FIX2INT(rslop);
1127
+ }
1128
+ Frt_Wrap_Struct(self, NULL, &frt_q_free, q);
1129
+ object_add(q, self);
1130
+ return self;
1131
+ }
1132
+
1133
+ /*
1134
+ * call-seq:
1135
+ * phrase_query.add_term(term, position_increment = 1) -> phrase_query
1136
+ * phrase_query << term -> phrase_query
1137
+ *
1138
+ * Add a term to the phrase query. By default the position_increment is set
1139
+ * to 1 so each term you add is expected to come directly after the previous
1140
+ * term. By setting position_increment to 2 you are specifying that the term
1141
+ * you just added should occur two terms after the previous term. For
1142
+ * example;
1143
+ *
1144
+ * phrase_query.add_term("big").add_term("house", 2)
1145
+ * # matches => "big brick house"
1146
+ * # matches => "big red house"
1147
+ * # doesn't match => "big house"
1148
+ */
1149
+ static VALUE
1150
+ frt_phq_add(int argc, VALUE *argv, VALUE self)
1151
+ {
1152
+ VALUE rterm, rpos_inc;
1153
+ int pos_inc = 1;
1154
+ GET_Q();
1155
+ if (rb_scan_args(argc, argv, "11", &rterm, &rpos_inc) == 2) {
1156
+ pos_inc = FIX2INT(rpos_inc);
1157
+ }
1158
+ switch (TYPE(rterm)) {
1159
+ case T_STRING:
1160
+ {
1161
+ phq_add_term(q, StringValuePtr(rterm), pos_inc);
1162
+ break;
1163
+ }
1164
+ case T_ARRAY:
1165
+ {
1166
+ int i;
1167
+ char *t;
1168
+ if (RARRAY(rterm)->len < 1) {
1169
+ rb_raise(rb_eArgError, "Cannot add empty array to a "
1170
+ "PhraseQuery. You must add either a string or "
1171
+ "an array of strings");
1172
+ }
1173
+ t = StringValuePtr(RARRAY(rterm)->ptr[0]);
1174
+ phq_add_term(q, t, pos_inc);
1175
+ for (i = 1; i < RARRAY(rterm)->len; i++) {
1176
+ t = StringValuePtr(RARRAY(rterm)->ptr[i]);
1177
+ phq_append_multi_term(q, t);
1178
+ }
1179
+ break;
1180
+ }
1181
+ default:
1182
+ rb_raise(rb_eArgError, "You can only add a string or an array of "
1183
+ "strings to a PhraseQuery, not a %s\n",
1184
+ rs2s(rb_obj_as_string(rterm)));
1185
+ }
1186
+ return self;
1187
+ }
1188
+
1189
+ /*
1190
+ * call-seq:
1191
+ * phrase_query.slop -> integer
1192
+ *
1193
+ * Return the slop set for this phrase query. See the PhraseQuery
1194
+ * description for more information on slop
1195
+ */
1196
+ static VALUE
1197
+ frt_phq_get_slop(VALUE self)
1198
+ {
1199
+ GET_Q();
1200
+ return INT2FIX(((PhraseQuery *)q)->slop);
1201
+ }
1202
+
1203
+ /*
1204
+ * call-seq:
1205
+ * phrase_query.slop = slop -> slop
1206
+ *
1207
+ * Set the slop set for this phrase query. See the PhraseQuery description
1208
+ * for more information on slop
1209
+ */
1210
+ static VALUE
1211
+ frt_phq_set_slop(VALUE self, VALUE rslop)
1212
+ {
1213
+ GET_Q();
1214
+ ((PhraseQuery *)q)->slop = FIX2INT(rslop);
1215
+ return self;
1216
+ }
1217
+
1218
+ /****************************************************************************
1219
+ *
1220
+ * PrefixQuery Methods
1221
+ *
1222
+ ****************************************************************************/
1223
+
1224
+ /*
1225
+ * call-seq:
1226
+ * PrefixQuery.new(field, prefix, options = {}) -> prefix-query
1227
+ *
1228
+ * Create a new PrefixQuery to search for all terms with the prefix +prefix+
1229
+ * in the field +field+. There is one option that you can set to change the
1230
+ * behaviour of this query. +:max_terms+ specifies the maximum number of
1231
+ * terms to be added to the query when it is expanded into a MultiTermQuery.
1232
+ * Let's say for example you search an index with a million terms for all
1233
+ * terms beginning with the letter "s". You would end up with a very large
1234
+ * query which would use a lot of memory and take a long time to get results,
1235
+ * not to mention that it would probably match every document in the index.
1236
+ * To prevent queries like this crashing your application you can set
1237
+ * +:max_terms+ which limits the number of terms that get added to the query.
1238
+ * By default it is set to 512.
1239
+ */
1240
+ static VALUE
1241
+ frt_prq_init(int argc, VALUE *argv, VALUE self)
1242
+ {
1243
+ return frt_mtq_init_specific(argc, argv, self, &prefixq_new);
1244
+ }
1245
+
1246
+ /****************************************************************************
1247
+ *
1248
+ * WildcardQuery Methods
1249
+ *
1250
+ ****************************************************************************/
1251
+
1252
+ /*
1253
+ * call-seq:
1254
+ * WildcardQuery.new(field, pattern, options = {}) -> wild-card-query
1255
+ *
1256
+ * Create a new WildcardQuery to search for all terms where the pattern
1257
+ * +pattern+ matches in the field +field+.
1258
+ *
1259
+ * There is one option that you can set to change the behaviour of this
1260
+ * query. +:max_terms+ specifies the maximum number of terms to be added to
1261
+ * the query when it is expanded into a MultiTermQuery. Let's say for
1262
+ * example you have a million terms in your index and you let your users do
1263
+ * wild-card queries and one runs a search for "*". You would end up with a
1264
+ * very large query which would use a lot of memory and take a long time to
1265
+ * get results, not to mention that it would probably match every document in
1266
+ * the index. To prevent queries like this crashing your application you can
1267
+ * set +:max_terms+ which limits the number of terms that get added to the
1268
+ * query. By default it is set to 512.
1269
+ */
1270
+ static VALUE
1271
+ frt_wcq_init(int argc, VALUE *argv, VALUE self)
1272
+ {
1273
+ return frt_mtq_init_specific(argc, argv, self, &wcq_new);
1274
+ }
1275
+
1276
+ /****************************************************************************
1277
+ *
1278
+ * FuzzyQuery Methods
1279
+ *
1280
+ ****************************************************************************/
1281
+
1282
+ /*
1283
+ * call-seq:
1284
+ * FuzzyQuery.new(field, term, options = {}) -> fuzzy-query
1285
+ *
1286
+ * Create a new FuzzyQuery that will match terms with a similarity of at
1287
+ * least +:min_similarity+ to +term+. Similarity is scored using the
1288
+ * Levenshtein edit distance formula. See
1289
+ * http://en.wikipedia.org/wiki/Levenshtein_distance
1290
+ *
1291
+ * If a +:prefix_length+ > 0 is specified, a common prefix of that length is
1292
+ * also required.
1293
+ *
1294
+ * You can also set +:max_terms+ to prevent memory overflow problems. By
1295
+ * default it is set to 512.
1296
+ *
1297
+ * == Example
1298
+ *
1299
+ * FuzzyQuery.new(:content, "levenshtein",
1300
+ * :min_similarity => 0.8,
1301
+ * :prefix_length => 5,
1302
+ * :max_terms => 1024)
1303
+ *
1304
+ * field:: field to search
1305
+ * term:: term to search for including it's close matches
1306
+ * :min_similarity:: Default: 0.5. minimum levenshtein distance score for a
1307
+ * match
1308
+ * :prefix_length:: Default: 0. minimum prefix_match before levenshtein
1309
+ * distance is measured. This parameter is used to improve
1310
+ * performance. With a +:prefix_length+ of 0, all terms in
1311
+ * the index must be checked which can be quite a
1312
+ * performance hit. By setting the prefix length to a
1313
+ * larger number you minimize the number of terms that need
1314
+ * to be checked. Even 1 will cut down the work by a
1315
+ * factor of about 26 depending on your character set and
1316
+ * the first letter.
1317
+ * :max_terms:: Limits the number of terms that can be added to the
1318
+ * query when it is expanded as a MultiTermQuery. This is
1319
+ * not usually a problem with FuzzyQueries unless you set
1320
+ * +:min_similarity+ to a very low value.
1321
+ */
1322
+ static VALUE
1323
+ frt_fq_init(int argc, VALUE *argv, VALUE self)
1324
+ {
1325
+ Query *q;
1326
+ VALUE rfield, rterm, roptions;
1327
+ float min_sim =
1328
+ (float)NUM2DBL(rb_cvar_get(cFuzzyQuery, id_default_min_similarity));
1329
+ int pre_len =
1330
+ FIX2INT(rb_cvar_get(cFuzzyQuery, id_default_prefix_length));
1331
+ int max_terms =
1332
+ FIX2INT(rb_cvar_get(cMultiTermQuery, id_default_max_terms));
1333
+
1334
+
1335
+ if (rb_scan_args(argc, argv, "21", &rfield, &rterm, &roptions) >= 3) {
1336
+ VALUE v;
1337
+ Check_Type(roptions, T_HASH);
1338
+ if (Qnil != (v = rb_hash_aref(roptions, sym_prefix_length))) {
1339
+ pre_len = FIX2INT(v);
1340
+ }
1341
+ if (Qnil != (v = rb_hash_aref(roptions, sym_min_similarity))) {
1342
+ min_sim = (float)NUM2DBL(v);
1343
+ }
1344
+ if (Qnil != (v = rb_hash_aref(roptions, sym_max_terms))) {
1345
+ max_terms = FIX2INT(v);
1346
+ }
1347
+ }
1348
+
1349
+ if (min_sim >= 1.0) {
1350
+ rb_raise(rb_eArgError,
1351
+ "%f >= 1.0. :min_similarity must be < 1.0", min_sim);
1352
+ } else if (min_sim < 0.0) {
1353
+ rb_raise(rb_eArgError,
1354
+ "%f < 0.0. :min_similarity must be > 0.0", min_sim);
1355
+ }
1356
+ if (pre_len < 0) {
1357
+ rb_raise(rb_eArgError,
1358
+ "%d < 0. :prefix_length must be >= 0", pre_len);
1359
+ }
1360
+ if (max_terms < 0) {
1361
+ rb_raise(rb_eArgError,
1362
+ "%d < 0. :max_terms must be >= 0", max_terms);
1363
+ }
1364
+
1365
+ q = fuzq_new_conf(frt_field(rfield), StringValuePtr(rterm),
1366
+ min_sim, pre_len, max_terms);
1367
+ Frt_Wrap_Struct(self, NULL, &frt_q_free, q);
1368
+ object_add(q, self);
1369
+ return self;
1370
+ }
1371
+
1372
+ /*
1373
+ * call-seq:
1374
+ * FuzzyQuery.prefix_length -> prefix_length
1375
+ *
1376
+ * Get the +:prefix_length+ for the query.
1377
+ */
1378
+ static VALUE
1379
+ frt_fq_pre_len(VALUE self)
1380
+ {
1381
+ GET_Q();
1382
+ return INT2FIX(((FuzzyQuery *)q)->pre_len);
1383
+ }
1384
+
1385
+ /*
1386
+ * call-seq:
1387
+ * FuzzyQuery.min_similarity -> min_similarity
1388
+ *
1389
+ * Get the +:min_similarity+ for the query.
1390
+ */
1391
+ static VALUE
1392
+ frt_fq_min_sim(VALUE self)
1393
+ {
1394
+ GET_Q();
1395
+ return rb_float_new((double)((FuzzyQuery *)q)->min_sim);
1396
+ }
1397
+
1398
+ /*
1399
+ * call-seq:
1400
+ * FuzzyQuery.default_min_similarity -> number
1401
+ *
1402
+ * Get the default value for +:min_similarity+
1403
+ */
1404
+ static VALUE
1405
+ frt_fq_get_dms(VALUE self)
1406
+ {
1407
+ return rb_cvar_get(cFuzzyQuery, id_default_min_similarity);
1408
+ }
1409
+
1410
+ extern float qp_default_fuzzy_min_sim;
1411
+ /*
1412
+ * call-seq:
1413
+ * FuzzyQuery.default_min_similarity = min_sim -> min_sim
1414
+ *
1415
+ * Set the default value for +:min_similarity+
1416
+ */
1417
+ static VALUE
1418
+ frt_fq_set_dms(VALUE self, VALUE val)
1419
+ {
1420
+ double min_sim = NUM2DBL(val);
1421
+ if (min_sim >= 1.0) {
1422
+ rb_raise(rb_eArgError,
1423
+ "%f >= 1.0. :min_similarity must be < 1.0", min_sim);
1424
+ } else if (min_sim < 0.0) {
1425
+ rb_raise(rb_eArgError,
1426
+ "%f < 0.0. :min_similarity must be > 0.0", min_sim);
1427
+ }
1428
+ qp_default_fuzzy_min_sim = (float)min_sim;
1429
+ rb_cvar_set(cFuzzyQuery, id_default_min_similarity, val, Qfalse);
1430
+ return val;
1431
+ }
1432
+
1433
+ /*
1434
+ * call-seq:
1435
+ * FuzzyQuery.default_prefix_length -> number
1436
+ *
1437
+ * Get the default value for +:prefix_length+
1438
+ */
1439
+ static VALUE
1440
+ frt_fq_get_dpl(VALUE self)
1441
+ {
1442
+ return rb_cvar_get(cFuzzyQuery, id_default_prefix_length);
1443
+ }
1444
+
1445
+ extern int qp_default_fuzzy_pre_len;
1446
+ /*
1447
+ * call-seq:
1448
+ * FuzzyQuery.default_prefix_length = prefix_length -> prefix_length
1449
+ *
1450
+ * Set the default value for +:prefix_length+
1451
+ */
1452
+ static VALUE
1453
+ frt_fq_set_dpl(VALUE self, VALUE val)
1454
+ {
1455
+ int pre_len = FIX2INT(val);
1456
+ if (pre_len < 0) {
1457
+ rb_raise(rb_eArgError,
1458
+ "%d < 0. :prefix_length must be >= 0", pre_len);
1459
+ }
1460
+ qp_default_fuzzy_pre_len = pre_len;
1461
+ rb_cvar_set(cFuzzyQuery, id_default_prefix_length, val, Qfalse);
1462
+ return val;
1463
+ }
1464
+
1465
+
1466
+ /****************************************************************************
1467
+ *
1468
+ * MatchAllQuery Methods
1469
+ *
1470
+ ****************************************************************************/
1471
+
1472
+ static VALUE
1473
+ frt_maq_alloc(VALUE klass)
1474
+ {
1475
+ Query *q = maq_new();
1476
+ VALUE self = Data_Wrap_Struct(klass, NULL, &frt_q_free, q);
1477
+ object_add(q, self);
1478
+ return self;
1479
+ }
1480
+
1481
+ /*
1482
+ * call-seq:
1483
+ * MatchAllQuery.new -> query
1484
+ *
1485
+ * Create a query which matches all documents.
1486
+ */
1487
+ static VALUE
1488
+ frt_maq_init(VALUE self)
1489
+ {
1490
+ return self;
1491
+ }
1492
+
1493
+ /****************************************************************************
1494
+ *
1495
+ * ConstantScoreQuery Methods
1496
+ *
1497
+ ****************************************************************************/
1498
+
1499
+ /*
1500
+ * call-seq:
1501
+ * ConstantScoreQuery.new(filter) -> query
1502
+ *
1503
+ * Create a ConstantScoreQuery which uses +filter+ to match documents giving
1504
+ * each document a constant score.
1505
+ */
1506
+ static VALUE
1507
+ frt_csq_init(VALUE self, VALUE rfilter)
1508
+ {
1509
+ Query *q;
1510
+ Filter *filter;
1511
+ Data_Get_Struct(rfilter, Filter, filter);
1512
+ q = csq_new(filter);
1513
+
1514
+ Frt_Wrap_Struct(self, NULL, &frt_q_free, q);
1515
+ object_add(q, self);
1516
+ return self;
1517
+ }
1518
+
1519
+ /****************************************************************************
1520
+ *
1521
+ * FilteredQuery Methods
1522
+ *
1523
+ ****************************************************************************/
1524
+
1525
+ static void
1526
+ frt_fqq_mark(void *p)
1527
+ {
1528
+ FilteredQuery *fq = (FilteredQuery *)p;
1529
+ frt_gc_mark(fq->query);
1530
+ frt_gc_mark(fq->filter);
1531
+ }
1532
+
1533
+ /*
1534
+ * call-seq:
1535
+ * FilteredQuery.new(query, filter) -> query
1536
+ *
1537
+ * Create a new FilteredQuery which filters +query+ with +filter+.
1538
+ */
1539
+ static VALUE
1540
+ frt_fqq_init(VALUE self, VALUE rquery, VALUE rfilter)
1541
+ {
1542
+ Query *sq, *q;
1543
+ Filter *f;
1544
+ Data_Get_Struct(rquery, Query, sq);
1545
+ Data_Get_Struct(rfilter, Filter, f);
1546
+ q = fq_new(sq, f);
1547
+ REF(sq);
1548
+ REF(f);
1549
+ Frt_Wrap_Struct(self, &frt_fqq_mark, &frt_q_free, q);
1550
+ object_add(q, self);
1551
+ return self;
1552
+ }
1553
+
1554
+ /****************************************************************************
1555
+ *
1556
+ * SpanTermQuery Methods
1557
+ *
1558
+ ****************************************************************************/
1559
+
1560
+ /*
1561
+ * call-seq:
1562
+ * SpanTermQuery.new(field, term) -> query
1563
+ *
1564
+ * Create a new SpanTermQuery which matches all documents with the term
1565
+ * +term+ in the field +field+.
1566
+ */
1567
+ static VALUE
1568
+ frt_spantq_init(VALUE self, VALUE rfield, VALUE rterm)
1569
+ {
1570
+ Query *q = spantq_new(frt_field(rfield), StringValuePtr(rterm));
1571
+ Frt_Wrap_Struct(self, NULL, &frt_q_free, q);
1572
+ object_add(q, self);
1573
+ return self;
1574
+ }
1575
+
1576
+ /****************************************************************************
1577
+ *
1578
+ * SpanMultiTermQuery Methods
1579
+ *
1580
+ ****************************************************************************/
1581
+
1582
+ /*
1583
+ * call-seq:
1584
+ * SpanMultiTermQuery.new(field, terms) -> query
1585
+ *
1586
+ * Create a new SpanMultiTermQuery which matches all documents with the terms
1587
+ * +terms+ in the field +field+. +terms+ should be an array of Strings.
1588
+ */
1589
+ static VALUE
1590
+ frt_spanmtq_init(VALUE self, VALUE rfield, VALUE rterms)
1591
+ {
1592
+ Query *q = spanmtq_new(frt_field(rfield));
1593
+ int i;
1594
+ for (i = RARRAY(rterms)->len - 1; i >= 0; i--) {
1595
+ spanmtq_add_term(q, StringValuePtr(RARRAY(rterms)->ptr[i]));
1596
+ }
1597
+ Frt_Wrap_Struct(self, NULL, &frt_q_free, q);
1598
+ object_add(q, self);
1599
+ return self;
1600
+ }
1601
+
1602
+ /****************************************************************************
1603
+ *
1604
+ * SpanPrefixQuery Methods
1605
+ *
1606
+ ****************************************************************************/
1607
+
1608
+ /*
1609
+ * call-seq:
1610
+ * SpanPrefixQuery.new(field, prefix, max_terms = 256) -> query
1611
+ *
1612
+ * Create a new SpanPrefixQuery which matches all documents with the prefix
1613
+ * +prefix+ in the field +field+.
1614
+ */
1615
+ static VALUE
1616
+ frt_spanprq_init(int argc, VALUE *argv, VALUE self)
1617
+ {
1618
+ VALUE rfield, rprefix, rmax_terms;
1619
+ int max_terms = SPAN_PREFIX_QUERY_MAX_TERMS;
1620
+ Query *q;
1621
+ if (rb_scan_args(argc, argv, "21", &rfield, &rprefix, &rmax_terms) == 3) {
1622
+ max_terms = FIX2INT(rmax_terms);
1623
+ }
1624
+ q = spanprq_new(frt_field(rfield), StringValuePtr(rprefix));
1625
+ ((SpanPrefixQuery *)q)->max_terms = max_terms;
1626
+ Frt_Wrap_Struct(self, NULL, &frt_q_free, q);
1627
+ object_add(q, self);
1628
+ return self;
1629
+ }
1630
+
1631
+ /****************************************************************************
1632
+ *
1633
+ * SpanFirstQuery Methods
1634
+ *
1635
+ ****************************************************************************/
1636
+
1637
+ /*
1638
+ * call-seq:
1639
+ * SpanFirstQuery.new(span_query, end) -> query
1640
+ *
1641
+ * Create a new SpanFirstQuery which matches all documents where +span_query+
1642
+ * matches before +end+ where +end+ is a byte-offset from the start of the
1643
+ * field
1644
+ */
1645
+ static VALUE
1646
+ frt_spanfq_init(VALUE self, VALUE rmatch, VALUE rend)
1647
+ {
1648
+ Query *q;
1649
+ Query *match;
1650
+ Data_Get_Struct(rmatch, Query, match);
1651
+ q = spanfq_new(match, FIX2INT(rend));
1652
+ Frt_Wrap_Struct(self, NULL, &frt_q_free, q);
1653
+ object_add(q, self);
1654
+ return self;
1655
+ }
1656
+
1657
+ /****************************************************************************
1658
+ *
1659
+ * SpanNearQuery Methods
1660
+ *
1661
+ ****************************************************************************/
1662
+
1663
+ static void
1664
+ frt_spannq_mark(void *p)
1665
+ {
1666
+ int i;
1667
+ SpanNearQuery *snq = (SpanNearQuery *)p;
1668
+ for (i = 0; i < snq->c_cnt; i++) {
1669
+ frt_gc_mark(snq->clauses[i]);
1670
+ }
1671
+ }
1672
+
1673
+ /*
1674
+ * call-seq:
1675
+ * SpanNearQuery.new(options = {}) -> query
1676
+ *
1677
+ * Create a new SpanNearQuery. You can add an array of clauses with the
1678
+ * +:clause+ parameter or you can add clauses individually using the
1679
+ * SpanNearQuery#add method.
1680
+ *
1681
+ * query = SpanNearQuery.new(:clauses => [spanq1, spanq2, spanq3])
1682
+ * # is equivalent to
1683
+ * query = SpanNearQuery.new()
1684
+ * query << spanq1 << spanq2 << spanq3
1685
+ *
1686
+ * You have two other options which you can set.
1687
+ *
1688
+ * :slop:: Default: 0. Works exactly like a PhraseQuery slop. It is the
1689
+ * amount of slop allowed in the match (the term edit distance
1690
+ * allowed in the match).
1691
+ * :in_order:: Default: false. Specifies whether or not the matches have to
1692
+ * occur in the order they were added to the query. When slop is
1693
+ * set to 0, this parameter will make no difference.
1694
+ */
1695
+ static VALUE
1696
+ frt_spannq_init(int argc, VALUE *argv, VALUE self)
1697
+ {
1698
+ Query *q;
1699
+ VALUE roptions;
1700
+ int slop = 0;
1701
+ bool in_order = false;
1702
+
1703
+ if (rb_scan_args(argc, argv, "01", &roptions) > 0) {
1704
+ VALUE v;
1705
+ if (Qnil != (v = rb_hash_aref(roptions, sym_slop))) {
1706
+ slop = FIX2INT(v);
1707
+ }
1708
+ if (Qnil != (v = rb_hash_aref(roptions, sym_in_order))) {
1709
+ in_order = RTEST(v);
1710
+ }
1711
+ }
1712
+ q = spannq_new(slop, in_order);
1713
+ if (argc > 0) {
1714
+ VALUE v;
1715
+ if (Qnil != (v = rb_hash_aref(roptions, sym_clauses))) {
1716
+ int i;
1717
+ Query *clause;
1718
+ Check_Type(v, T_ARRAY);
1719
+ for (i = 0; i < RARRAY(v)->len; i++) {
1720
+ Data_Get_Struct(RARRAY(v)->ptr[i], Query, clause);
1721
+ spannq_add_clause(q, clause);
1722
+ }
1723
+ }
1724
+ }
1725
+
1726
+ Frt_Wrap_Struct(self, &frt_spannq_mark, &frt_q_free, q);
1727
+ object_add(q, self);
1728
+ return self;
1729
+ }
1730
+
1731
+ /*
1732
+ * call-seq:
1733
+ * query.add(span_query) -> self
1734
+ * query << span_query -> self
1735
+ *
1736
+ * Add a clause to the SpanNearQuery. Clauses are stored in the order they
1737
+ * are added to the query which is important for matching. Note that clauses
1738
+ * must be SpanQueries, not other types of query.
1739
+ */
1740
+ static VALUE
1741
+ frt_spannq_add(VALUE self, VALUE rclause)
1742
+ {
1743
+ GET_Q();
1744
+ Query *clause;
1745
+ Data_Get_Struct(rclause, Query, clause);
1746
+ spannq_add_clause(q, clause);
1747
+ return self;
1748
+ }
1749
+
1750
+ /****************************************************************************
1751
+ *
1752
+ * SpanOrQuery Methods
1753
+ *
1754
+ ****************************************************************************/
1755
+
1756
+ static void
1757
+ frt_spanoq_mark(void *p)
1758
+ {
1759
+ int i;
1760
+ SpanOrQuery *soq = (SpanOrQuery *)p;
1761
+ for (i = 0; i < soq->c_cnt; i++) {
1762
+ frt_gc_mark(soq->clauses[i]);
1763
+ }
1764
+ }
1765
+
1766
+ /*
1767
+ * call-seq:
1768
+ * SpanOrQuery.new(options = {}) -> query
1769
+ *
1770
+ * Create a new SpanOrQuery. This is just like a BooleanQuery with all
1771
+ * clauses with the occur value of :should. The difference is that it can be
1772
+ * passed to other SpanQuerys like SpanNearQuery.
1773
+ */
1774
+ static VALUE
1775
+ frt_spanoq_init(int argc, VALUE *argv, VALUE self)
1776
+ {
1777
+ Query *q;
1778
+ VALUE rclauses;
1779
+
1780
+ q = spanoq_new();
1781
+ if (rb_scan_args(argc, argv, "01", &rclauses) > 0) {
1782
+ int i;
1783
+ Query *clause;
1784
+ Check_Type(rclauses, T_ARRAY);
1785
+ for (i = 0; i < RARRAY(rclauses)->len; i++) {
1786
+ Data_Get_Struct(RARRAY(rclauses)->ptr[i], Query, clause);
1787
+ spanoq_add_clause(q, clause);
1788
+ }
1789
+ }
1790
+ Frt_Wrap_Struct(self, &frt_spanoq_mark, &frt_q_free, q);
1791
+ object_add(q, self);
1792
+ return self;
1793
+ }
1794
+
1795
+ /*
1796
+ * call-seq:
1797
+ * query.add(span_query) -> self
1798
+ * query << span_query -> self
1799
+ *
1800
+ * Add a clause to the SpanOrQuery. Note that clauses must be SpanQueries,
1801
+ * not other types of query.
1802
+ */
1803
+ static VALUE
1804
+ frt_spanoq_add(VALUE self, VALUE rclause)
1805
+ {
1806
+ GET_Q();
1807
+ Query *clause;
1808
+ Data_Get_Struct(rclause, Query, clause);
1809
+ spanoq_add_clause(q, clause);
1810
+ return self;
1811
+ }
1812
+
1813
+ /****************************************************************************
1814
+ *
1815
+ * SpanNotQuery Methods
1816
+ *
1817
+ ****************************************************************************/
1818
+
1819
+ static void
1820
+ frt_spanxq_mark(void *p)
1821
+ {
1822
+ SpanNotQuery *sxq = (SpanNotQuery *)p;
1823
+ frt_gc_mark(sxq->inc);
1824
+ frt_gc_mark(sxq->exc);
1825
+ }
1826
+
1827
+ /*
1828
+ * call-seq:
1829
+ * SpanNotQuery.new(include_query, exclude_query) -> query
1830
+ *
1831
+ * Create a new SpanNotQuery which matches all documents which match
1832
+ * +include_query+ and don't match +exclude_query+.
1833
+ */
1834
+ static VALUE
1835
+ frt_spanxq_init(VALUE self, VALUE rinc, VALUE rexc)
1836
+ {
1837
+ Query *q;
1838
+ Check_Type(rinc, T_DATA);
1839
+ Check_Type(rexc, T_DATA);
1840
+ q = spanxq_new(DATA_PTR(rinc), DATA_PTR(rexc));
1841
+ Frt_Wrap_Struct(self, &frt_spanxq_mark, &frt_q_free, q);
1842
+ object_add(q, self);
1843
+ return self;
1844
+ }
1845
+
1846
+ /****************************************************************************
1847
+ *
1848
+ * Filter Methods
1849
+ *
1850
+ ****************************************************************************/
1851
+
1852
+ static void
1853
+ frt_f_free(void *p)
1854
+ {
1855
+ object_del(p);
1856
+ filt_deref((Filter *)p);
1857
+ }
1858
+
1859
+ #define GET_F() Filter *f = (Filter *)DATA_PTR(self)
1860
+
1861
+ /*
1862
+ * call-seq:
1863
+ * filter.to_s -> string
1864
+ *
1865
+ * Return a human readable string representing the Filter object that the
1866
+ * method was called on.
1867
+ */
1868
+ static VALUE
1869
+ frt_f_to_s(VALUE self)
1870
+ {
1871
+ VALUE rstr;
1872
+ char *str;
1873
+ GET_F();
1874
+ str = f->to_s(f);
1875
+ rstr = rb_str_new2(str);
1876
+ free(str);
1877
+ return rstr;
1878
+ }
1879
+
1880
+ extern VALUE frt_get_bv(BitVector *bv);
1881
+
1882
+ /*
1883
+ * call-seq:
1884
+ * filter.bits(index_reader) -> bit_vector
1885
+ *
1886
+ * Get the bit_vector used by this filter. This method will usually be used
1887
+ * to group filters or apply filters to other filters.
1888
+ */
1889
+ static VALUE
1890
+ frt_f_get_bits(VALUE self, VALUE rindex_reader)
1891
+ {
1892
+ BitVector *bv;
1893
+ IndexReader *ir;
1894
+ GET_F();
1895
+ Data_Get_Struct(rindex_reader, IndexReader, ir);
1896
+ bv = filt_get_bv(f, ir);
1897
+ return frt_get_bv(bv);
1898
+ }
1899
+
1900
+ /****************************************************************************
1901
+ *
1902
+ * RangeFilter Methods
1903
+ *
1904
+ ****************************************************************************/
1905
+
1906
+
1907
+ /*
1908
+ * call-seq:
1909
+ * RangeFilter.new(field, options = {}) -> range_query
1910
+ *
1911
+ * Create a new RangeFilter on field +field+. There are two ways to build a
1912
+ * range filter. With the old-style options; +:lower+, +:upper+,
1913
+ * +:include_lower+ and +:include_upper+ or the new style options; +:<+,
1914
+ * +:<=+, +:>+ and +:>=+. The options' names should speak for themselves.
1915
+ * In the old-style options, limits are inclusive by default.
1916
+ *
1917
+ * == Examples
1918
+ *
1919
+ * f = RangeFilter.new(:date, :lower => "200501", :include_lower => false)
1920
+ * # is equivalent to
1921
+ * f = RangeFilter.new(:date, :< => "200501")
1922
+ * # is equivalent to
1923
+ * f = RangeFilter.new(:date, :lower_exclusive => "200501")
1924
+ *
1925
+ * f = RangeFilter.new(:date, :lower => "200501", :upper => 200502)
1926
+ * # is equivalent to
1927
+ * f = RangeFilter.new(:date, :>= => "200501", :<= => 200502)
1928
+ */
1929
+ static VALUE
1930
+ frt_rf_init(VALUE self, VALUE rfield, VALUE roptions)
1931
+ {
1932
+ Filter *f;
1933
+ char *lterm = NULL;
1934
+ char *uterm = NULL;
1935
+ bool include_lower = false;
1936
+ bool include_upper = false;
1937
+
1938
+ get_range_params(roptions, &lterm, &uterm, &include_lower, &include_upper);
1939
+ f = rfilt_new(frt_field(rfield), lterm, uterm,
1940
+ include_lower, include_upper);
1941
+ Frt_Wrap_Struct(self, NULL, &frt_f_free, f);
1942
+ object_add(f, self);
1943
+ return self;
1944
+ }
1945
+
1946
+ /****************************************************************************
1947
+ *
1948
+ * QueryFilter Methods
1949
+ *
1950
+ ****************************************************************************/
1951
+
1952
+ /*
1953
+ * call-seq:
1954
+ * QueryFilter.new(query) -> filter
1955
+ *
1956
+ * Create a new QueryFilter which applies the query +query+.
1957
+ */
1958
+ static VALUE
1959
+ frt_qf_init(VALUE self, VALUE rquery)
1960
+ {
1961
+ Query *q;
1962
+ Filter *f;
1963
+ Data_Get_Struct(rquery, Query, q);
1964
+ f = qfilt_new(q);
1965
+ Frt_Wrap_Struct(self, NULL, &frt_f_free, f);
1966
+ object_add(f, self);
1967
+ return self;
1968
+ }
1969
+
1970
+ /****************************************************************************
1971
+ *
1972
+ * SortField Methods
1973
+ *
1974
+ ****************************************************************************/
1975
+
1976
+ static void
1977
+ frt_sf_free(void *p)
1978
+ {
1979
+ object_del(p);
1980
+ sort_field_destroy((SortField *)p);
1981
+ }
1982
+
1983
+ static VALUE
1984
+ frt_get_sf(SortField *sf)
1985
+ {
1986
+ VALUE self = object_get(sf);
1987
+ if (self == Qnil) {
1988
+ self = Data_Wrap_Struct(cSortField, NULL, &frt_sf_free, sf);
1989
+ object_add(sf, self);
1990
+ }
1991
+ return self;
1992
+ }
1993
+
1994
+ static int
1995
+ get_sort_type(VALUE rtype)
1996
+ {
1997
+ Check_Type(rtype, T_SYMBOL);
1998
+ if (rtype == sym_byte) {
1999
+ return SORT_TYPE_BYTE;
2000
+ } else if (rtype == sym_integer) {
2001
+ return SORT_TYPE_INTEGER;
2002
+ } else if (rtype == sym_string) {
2003
+ return SORT_TYPE_STRING;
2004
+ } else if (rtype == sym_score) {
2005
+ return SORT_TYPE_SCORE;
2006
+ } else if (rtype == sym_doc_id) {
2007
+ return SORT_TYPE_DOC;
2008
+ } else if (rtype == sym_float) {
2009
+ return SORT_TYPE_FLOAT;
2010
+ } else if (rtype == sym_auto) {
2011
+ return SORT_TYPE_AUTO;
2012
+ } else {
2013
+ rb_raise(rb_eArgError, ":%s is an unknown sort-type. Please choose "
2014
+ "from [:integer, :float, :string, :auto, :score, :doc_id]",
2015
+ rb_id2name(SYM2ID(rtype)));
2016
+ }
2017
+ return SORT_TYPE_DOC;
2018
+ }
2019
+
2020
+ /*
2021
+ * call-seq:
2022
+ * SortField.new(field, options = {}) -> sort_field
2023
+ *
2024
+ * Create a new SortField which can be used to sort the result-set by the
2025
+ * value in field +field+.
2026
+ *
2027
+ * === Options
2028
+ *
2029
+ * :type:: Default: +:auto+. Specifies how a field should be sorted.
2030
+ * Choose from one of; +:auto+, +:integer+, +:float+,
2031
+ * +:string+, +:byte+, +:doc_id+ or +:score+. +:auto+ will
2032
+ * check the datatype of the field by trying to parse it into
2033
+ * either a number or a float before settling on a string
2034
+ * sort. String sort is locale dependent and works for
2035
+ * multibyte character sets like UTF-8 if you have your
2036
+ * locale set correctly.
2037
+ * :reverse Default: false. Set to true if you want to reverse the
2038
+ * sort.
2039
+ */
2040
+ static VALUE
2041
+ frt_sf_init(int argc, VALUE *argv, VALUE self)
2042
+ {
2043
+ SortField *sf;
2044
+ VALUE rfield, roptions;
2045
+ VALUE rval;
2046
+ int type = SORT_TYPE_AUTO;
2047
+ int is_reverse = false;
2048
+ char *field;
2049
+
2050
+ if (rb_scan_args(argc, argv, "11", &rfield, &roptions) == 2) {
2051
+ if (Qnil != (rval = rb_hash_aref(roptions, sym_type))) {
2052
+ type = get_sort_type(rval);
2053
+ }
2054
+ if (Qnil != (rval = rb_hash_aref(roptions, sym_reverse))) {
2055
+ is_reverse = RTEST(rval);
2056
+ }
2057
+ if (Qnil != (rval = rb_hash_aref(roptions, sym_comparator))) {
2058
+ rb_raise(rb_eArgError, "Unsupported argument ':comparator'");
2059
+ }
2060
+ }
2061
+ if (NIL_P(rfield)) rb_raise(rb_eArgError, "must pass a valid field name");
2062
+ field = frt_field(rfield);
2063
+
2064
+ sf = sort_field_new(field, type, is_reverse);
2065
+ if (sf->field == NULL && field) {
2066
+ sf->field = estrdup(field);
2067
+ }
2068
+
2069
+ Frt_Wrap_Struct(self, NULL, &frt_sf_free, sf);
2070
+ object_add(sf, self);
2071
+ return self;
2072
+ }
2073
+
2074
+ #define GET_SF() SortField *sf = (SortField *)DATA_PTR(self)
2075
+
2076
+ /*
2077
+ * call-seq:
2078
+ * sort_field.reverse? -> bool
2079
+ *
2080
+ * Return true if the field is to be reverse sorted. This attribute is set
2081
+ * when you create the sort_field.
2082
+ */
2083
+ static VALUE
2084
+ frt_sf_is_reverse(VALUE self)
2085
+ {
2086
+ GET_SF();
2087
+ return sf->reverse ? Qtrue : Qfalse;
2088
+ }
2089
+
2090
+ /*
2091
+ * call-seq:
2092
+ * sort_field.name -> symbol
2093
+ *
2094
+ * Returns the name of the field to be sorted.
2095
+ */
2096
+ static VALUE
2097
+ frt_sf_get_name(VALUE self)
2098
+ {
2099
+ GET_SF();
2100
+ return sf->field ? ID2SYM(rb_intern(sf->field)) : Qnil;
2101
+ }
2102
+
2103
+ /*
2104
+ * call-seq:
2105
+ * sort_field.type -> symbol
2106
+ *
2107
+ * Return the type of sort. Should be one of; +:auto+, +:integer+, +:float+,
2108
+ * +:string+, +:byte+, +:doc_id+ or +:score+.
2109
+ */
2110
+ static VALUE
2111
+ frt_sf_get_type(VALUE self)
2112
+ {
2113
+ GET_SF();
2114
+ switch (sf->type) {
2115
+ case SORT_TYPE_BYTE: return sym_byte;
2116
+ case SORT_TYPE_INTEGER: return sym_integer;
2117
+ case SORT_TYPE_FLOAT: return sym_float;
2118
+ case SORT_TYPE_STRING: return sym_string;
2119
+ case SORT_TYPE_AUTO: return sym_auto;
2120
+ case SORT_TYPE_DOC: return sym_doc_id;
2121
+ case SORT_TYPE_SCORE: return sym_score;
2122
+ }
2123
+ return Qnil;
2124
+ }
2125
+
2126
+ /*
2127
+ * call-seq:
2128
+ * sort_field.comparator -> symbol
2129
+ *
2130
+ * TODO: currently unsupported
2131
+ */
2132
+ static VALUE
2133
+ frt_sf_get_comparator(VALUE self)
2134
+ {
2135
+ return Qnil;
2136
+ }
2137
+
2138
+ /*
2139
+ * call-seq:
2140
+ * sort_field.to_s -> string
2141
+ *
2142
+ * Return a human readable string describing this +sort_field+.
2143
+ */
2144
+ static VALUE
2145
+ frt_sf_to_s(VALUE self)
2146
+ {
2147
+ GET_SF();
2148
+ char *str = sort_field_to_s(sf);
2149
+ VALUE rstr = rb_str_new2(str);
2150
+ free(str);
2151
+ return rstr;
2152
+ }
2153
+
2154
+ /****************************************************************************
2155
+ *
2156
+ * Sort Methods
2157
+ *
2158
+ ****************************************************************************/
2159
+
2160
+ static void
2161
+ frt_sort_free(void *p)
2162
+ {
2163
+ Sort *sort = (Sort *)p;
2164
+ object_del(sort);
2165
+ sort_destroy(sort);
2166
+ }
2167
+
2168
+ static void
2169
+ frt_sort_mark(void *p)
2170
+ {
2171
+ Sort *sort = (Sort *)p;
2172
+ int i;
2173
+ for (i = 0; i < sort->size; i++) {
2174
+ frt_gc_mark(sort->sort_fields[i]);
2175
+ }
2176
+ }
2177
+
2178
+ static VALUE
2179
+ frt_sort_alloc(VALUE klass)
2180
+ {
2181
+ VALUE self;
2182
+ Sort *sort = sort_new();
2183
+ sort->destroy_all = false;
2184
+ self = Data_Wrap_Struct(klass, &frt_sort_mark, &frt_sort_free, sort);
2185
+ object_add(sort, self);
2186
+ return self;
2187
+ }
2188
+
2189
+ static void
2190
+ frt_parse_sort_str(Sort *sort, char *xsort_str)
2191
+ {
2192
+ SortField *sf;
2193
+ char *comma, *end, *e, *s;
2194
+ const int len = strlen(xsort_str);
2195
+ char *sort_str = ALLOC_N(char, len + 2);
2196
+ strcpy(sort_str, xsort_str);
2197
+
2198
+ end = &sort_str[len];
2199
+
2200
+ s = sort_str;
2201
+
2202
+ while ((s < end)
2203
+ && (NULL != (comma = strchr(s, ',')) || (NULL != (comma = end)))) {
2204
+ bool reverse = false;
2205
+ /* strip spaces */
2206
+ e = comma;
2207
+ while ((isspace(*s) || *s == ':') && s < e) s++;
2208
+ while (isspace(e[-1]) && s < e) e--;
2209
+ *e = '\0';
2210
+ if (e > (s + 4) && strcmp("DESC", &e[-4]) == 0) {
2211
+ reverse = true;
2212
+ e -= 4;
2213
+ while (isspace(e[-1]) && s < e) e--;
2214
+ }
2215
+ *e = '\0';
2216
+
2217
+ if (strcmp("SCORE", s) == 0) {
2218
+ sf = sort_field_score_new(reverse);
2219
+ } else if (strcmp("DOC_ID", s) == 0) {
2220
+ sf = sort_field_doc_new(reverse);
2221
+ } else {
2222
+ sf = sort_field_auto_new(s, reverse);
2223
+ }
2224
+ frt_get_sf(sf);
2225
+ sort_add_sort_field(sort, sf);
2226
+ s = comma + 1;
2227
+ }
2228
+ free(sort_str);
2229
+ }
2230
+
2231
+ static void
2232
+ frt_sort_add(Sort *sort, VALUE rsf, bool reverse)
2233
+ {
2234
+ SortField *sf;
2235
+ switch (TYPE(rsf)) {
2236
+ case T_DATA:
2237
+ Data_Get_Struct(rsf, SortField, sf);
2238
+ if (reverse) sf->reverse = !sf->reverse;
2239
+ sort_add_sort_field(sort, sf);
2240
+ break;
2241
+ case T_SYMBOL:
2242
+ rsf = rb_obj_as_string(rsf);
2243
+ sf = sort_field_auto_new(rs2s(rsf), reverse);
2244
+ /* need to give it a ruby object so it'll be freed when the
2245
+ * sort is garbage collected */
2246
+ rsf = frt_get_sf(sf);
2247
+ sort_add_sort_field(sort, sf);
2248
+ break;
2249
+ case T_STRING:
2250
+ frt_parse_sort_str(sort, rs2s(rsf));
2251
+ break;
2252
+ default:
2253
+ rb_raise(rb_eArgError, "Unknown SortField Type");
2254
+ break;
2255
+ }
2256
+ }
2257
+
2258
+ #define GET_SORT() Sort *sort = (Sort *)DATA_PTR(self)
2259
+ /*
2260
+ * call-seq:
2261
+ * Sort.new(sort_fields = [SortField::SCORE, SortField::DOC_ID], reverse = false) -> Sort
2262
+ *
2263
+ * Create a new Sort object. If +reverse+ is true, all sort_fields will be
2264
+ * reversed so if any of them are already reversed the will be turned back
2265
+ * to their natural order again. By default
2266
+ */
2267
+ static VALUE
2268
+ frt_sort_init(int argc, VALUE *argv, VALUE self)
2269
+ {
2270
+ int i;
2271
+ VALUE rfields, rreverse;
2272
+ bool reverse = false;
2273
+ bool has_sfd = false;
2274
+ GET_SORT();
2275
+ switch (rb_scan_args(argc, argv, "02", &rfields, &rreverse)) {
2276
+ case 2: reverse = RTEST(rreverse);
2277
+ case 1:
2278
+ if (TYPE(rfields) == T_ARRAY) {
2279
+ int i;
2280
+ for (i = 0; i < RARRAY(rfields)->len; i++) {
2281
+ frt_sort_add(sort, RARRAY(rfields)->ptr[i], reverse);
2282
+ }
2283
+ } else {
2284
+ frt_sort_add(sort, rfields, reverse);
2285
+ }
2286
+ for (i = 0; i < sort->size; i++) {
2287
+ if (sort->sort_fields[i] == &SORT_FIELD_DOC) has_sfd = true;
2288
+ }
2289
+ if (!has_sfd) {
2290
+ sort_add_sort_field(sort, (SortField *)&SORT_FIELD_DOC);
2291
+ }
2292
+ break;
2293
+ case 0:
2294
+ sort_add_sort_field(sort, (SortField *)&SORT_FIELD_SCORE);
2295
+ sort_add_sort_field(sort, (SortField *)&SORT_FIELD_DOC);
2296
+ }
2297
+
2298
+ return self;
2299
+ }
2300
+
2301
+ /*
2302
+ * call-seq:
2303
+ * sort.fields -> Array
2304
+ *
2305
+ * Returns an array of the SortFields held by the Sort object.
2306
+ */
2307
+ static VALUE
2308
+ frt_sort_get_fields(VALUE self)
2309
+ {
2310
+ GET_SORT();
2311
+ VALUE rfields = rb_ary_new2(sort->size);
2312
+ int i;
2313
+ for (i = 0; i < sort->size; i++) {
2314
+ rb_ary_store(rfields, i, object_get(sort->sort_fields[i]));
2315
+ }
2316
+ return rfields;
2317
+ }
2318
+
2319
+
2320
+ /*
2321
+ * call-seq:
2322
+ * sort.to_s -> string
2323
+ *
2324
+ * Returns a human readable string representing the sort object.
2325
+ */
2326
+ static VALUE
2327
+ frt_sort_to_s(VALUE self)
2328
+ {
2329
+ GET_SORT();
2330
+ char *str = sort_to_s(sort);
2331
+ VALUE rstr = rb_str_new2(str);
2332
+ free(str);
2333
+ return rstr;
2334
+ }
2335
+
2336
+ /****************************************************************************
2337
+ *
2338
+ * Searcher Methods
2339
+ *
2340
+ ****************************************************************************/
2341
+
2342
+ static void
2343
+ frt_sea_free(void *p)
2344
+ {
2345
+ Searcher *sea = (Searcher *)p;
2346
+ object_del(sea);
2347
+ sea->close(sea);
2348
+ }
2349
+
2350
+ #define GET_SEA() Searcher *sea = (Searcher *)DATA_PTR(self)
2351
+
2352
+ /*
2353
+ * call-seq:
2354
+ * searcher.close -> nil
2355
+ *
2356
+ * Close the searcher. The garbage collector will do this for you or you can
2357
+ * call this method explicitly.
2358
+ */
2359
+ static VALUE
2360
+ frt_sea_close(VALUE self)
2361
+ {
2362
+ GET_SEA();
2363
+ Frt_Unwrap_Struct(self);
2364
+ object_del(sea);
2365
+ sea->close(sea);
2366
+ return Qnil;
2367
+ }
2368
+
2369
+ /*
2370
+ * call-seq:
2371
+ * searcher.reader -> IndexReader
2372
+ *
2373
+ * Return the IndexReader wrapped by this searcher.
2374
+ */
2375
+ static VALUE
2376
+ frt_sea_get_reader(VALUE self, VALUE rterm)
2377
+ {
2378
+ GET_SEA();
2379
+ return object_get(((IndexSearcher *)sea)->ir);
2380
+ }
2381
+
2382
+ /*
2383
+ * call-seq:
2384
+ * searcher.doc_freq(field, term) -> integer
2385
+ *
2386
+ * Return the number of documents in which the term +term+ appears in the
2387
+ * field +field+.
2388
+ */
2389
+ static VALUE
2390
+ frt_sea_doc_freq(VALUE self, VALUE rfield, VALUE rterm)
2391
+ {
2392
+ GET_SEA();
2393
+ return INT2FIX(sea->doc_freq(sea,
2394
+ frt_field(rfield),
2395
+ StringValuePtr(rterm)));
2396
+ }
2397
+
2398
+ /*
2399
+ * call-seq:
2400
+ * searcher.get_document(doc_id) -> LazyDoc
2401
+ * searcher[doc_id] -> LazyDoc
2402
+ *
2403
+ * Retrieve a document from the index. See LazyDoc for more details on the
2404
+ * document returned. Documents are referenced internally by document ids
2405
+ * which are returned by the Searchers search methods.
2406
+ */
2407
+ static VALUE
2408
+ frt_sea_doc(VALUE self, VALUE rdoc_id)
2409
+ {
2410
+ GET_SEA();
2411
+ return frt_get_lazy_doc(sea->get_lazy_doc(sea, FIX2INT(rdoc_id)));
2412
+ }
2413
+
2414
+ /*
2415
+ * call-seq:
2416
+ * searcher.max_doc -> number
2417
+ *
2418
+ * Returns 1 + the maximum document id in the index. It is the
2419
+ * document_id that will be used by the next document added to the index. If
2420
+ * there are no deletions, this number also refers to the number of documents
2421
+ * in the index.
2422
+ */
2423
+ static VALUE
2424
+ frt_sea_max_doc(VALUE self)
2425
+ {
2426
+ GET_SEA();
2427
+ return INT2FIX(sea->max_doc(sea));
2428
+ }
2429
+
2430
+ static bool
2431
+ call_filter_proc(int doc_id, float score, Searcher *self)
2432
+ {
2433
+ return RTEST(rb_funcall((VALUE)self->arg, id_call, 3,
2434
+ INT2FIX(doc_id),
2435
+ rb_float_new((double)score),
2436
+ object_get(self)));
2437
+ }
2438
+
2439
+ typedef struct CWrappedFilter
2440
+ {
2441
+ Filter super;
2442
+ VALUE rfilter;
2443
+ } CWrappedFilter;
2444
+ #define CWF(filt) ((CWrappedFilter *)(filt))
2445
+
2446
+ static unsigned long
2447
+ cwfilt_hash(Filter *filt)
2448
+ {
2449
+ return NUM2ULONG(rb_funcall(CWF(filt)->rfilter, id_hash, 0));
2450
+ }
2451
+
2452
+ static int
2453
+ cwfilt_eq(Filter *filt, Filter *o)
2454
+ {
2455
+ return RTEST(rb_funcall(CWF(filt)->rfilter, id_eql, 1, CWF(o)->rfilter));
2456
+ }
2457
+
2458
+ static BitVector *
2459
+ cwfilt_get_bv_i(Filter *filt, IndexReader *ir)
2460
+ {
2461
+ VALUE rbv = rb_funcall(CWF(filt)->rfilter, id_bits, 1, object_get(ir));
2462
+ BitVector *bv;
2463
+ Data_Get_Struct(rbv, BitVector, bv);
2464
+ REF(bv);
2465
+ return bv;
2466
+ }
2467
+
2468
+ Filter *
2469
+ frt_get_cwrapped_filter(VALUE rval)
2470
+ {
2471
+ Filter *filter;
2472
+ if (frt_is_cclass(rval) && DATA_PTR(rval)) {
2473
+ Data_Get_Struct(rval, Filter, filter);
2474
+ REF(filter);
2475
+ }
2476
+ else {
2477
+ filter = filt_create(sizeof(CWrappedFilter), "CWrappedFilter");
2478
+ filter->hash = &cwfilt_hash;
2479
+ filter->eq = &cwfilt_eq;
2480
+ filter->get_bv_i = &cwfilt_get_bv_i;
2481
+ CWF(filter)->rfilter = rval;
2482
+ }
2483
+ return filter;
2484
+ }
2485
+
2486
+ static TopDocs *
2487
+ frt_sea_search_internal(Query *query, VALUE roptions, Searcher *sea)
2488
+ {
2489
+ VALUE rval;
2490
+ int offset = 0, limit = 10;
2491
+ Filter *filter = NULL;
2492
+ Sort *sort = NULL;
2493
+ TopDocs *td;
2494
+
2495
+ filter_ft filter_func = NULL;
2496
+
2497
+ if (Qnil != roptions) {
2498
+ if (Qnil != (rval = rb_hash_aref(roptions, sym_offset))) {
2499
+ offset = FIX2INT(rval);
2500
+ if (offset < 0)
2501
+ rb_raise(rb_eArgError, ":offset must be >= 0");
2502
+ }
2503
+ if (Qnil != (rval = rb_hash_aref(roptions, sym_limit))) {
2504
+ if (TYPE(rval) == T_FIXNUM) {
2505
+ limit = FIX2INT(rval);
2506
+ if (limit <= 0)
2507
+ rb_raise(rb_eArgError, ":limit must be > 0");
2508
+ } else if (rval == sym_all) {
2509
+ limit = INT_MAX;
2510
+ } else {
2511
+ rb_raise(rb_eArgError, "%s is not a sensible :limit value "
2512
+ "Please use a positive integer or :all",
2513
+ rb_obj_as_string(rval));
2514
+ }
2515
+ }
2516
+ if (Qnil != (rval = rb_hash_aref(roptions, sym_filter))) {
2517
+ filter = frt_get_cwrapped_filter(rval);
2518
+ }
2519
+ if (Qnil != (rval = rb_hash_aref(roptions, sym_filter_proc))) {
2520
+ filter_func = &call_filter_proc;
2521
+ sea->arg = (void *)rval;
2522
+ }
2523
+ if (Qnil != (rval = rb_hash_aref(roptions, sym_sort))) {
2524
+ if (TYPE(rval) != T_DATA || CLASS_OF(rval) == cSortField) {
2525
+ rval = frt_sort_init(1, &rval, frt_sort_alloc(cSort));
2526
+ }
2527
+ Data_Get_Struct(rval, Sort, sort);
2528
+ }
2529
+ }
2530
+
2531
+ td = sea->search(sea, query, offset, limit, filter, sort, filter_func, 0);
2532
+ if (filter) filt_deref(filter);
2533
+ return td;
2534
+ }
2535
+
2536
+ /*
2537
+ * call-seq:
2538
+ * searcher.search(query, options = {}) -> TopDocs
2539
+ *
2540
+ * Run a query through the Searcher on the index. A TopDocs object is
2541
+ * returned with the relevant results. The +query+ is a built in Query
2542
+ * object. Here are the options;
2543
+ *
2544
+ * === Options
2545
+ *
2546
+ * :offset:: Default: 0. The offset of the start of the section of the
2547
+ * result-set to return. This is used for paging through
2548
+ * results. Let's say you have a page size of 10. If you
2549
+ * don't find the result you want among the first 10 results
2550
+ * then set +:offset+ to 10 and look at the next 10 results,
2551
+ * then 20 and so on.
2552
+ * :limit:: Default: 10. This is the number of results you want
2553
+ * returned, also called the page size. Set +:limit+ to
2554
+ * +:all+ to return all results
2555
+ * :sort:: A Sort object or sort string describing how the field
2556
+ * should be sorted. A sort string is made up of field names
2557
+ * which cannot contain spaces and the word "DESC" if you
2558
+ * want the field reversed, all separated by commas. For
2559
+ * example; "rating DESC, author, title". Note that Ferret
2560
+ * will try to determine a field's type by looking at the
2561
+ * first term in the index and seeing if it can be parsed as
2562
+ * an integer or a float. Keep this in mind as you may need
2563
+ * to specify a fields type to sort it correctly. For more
2564
+ * on this, see the documentation for SortField
2565
+ * :filter:: a Filter object to filter the search results with
2566
+ * :filter_proc:: a filter Proc is a Proc which takes the doc_id, the score
2567
+ * and the Searcher object as its parameters and returns a
2568
+ * Boolean value specifying whether the result should be
2569
+ * included in the result set.
2570
+ */
2571
+ static VALUE
2572
+ frt_sea_search(int argc, VALUE *argv, VALUE self)
2573
+ {
2574
+ GET_SEA();
2575
+ VALUE rquery, roptions;
2576
+ Query *query;
2577
+ rb_scan_args(argc, argv, "11", &rquery, &roptions);
2578
+ Data_Get_Struct(rquery, Query, query);
2579
+ return frt_get_td(frt_sea_search_internal(query, roptions, sea), self);
2580
+ }
2581
+
2582
+ /*
2583
+ * call-seq:
2584
+ * searcher.search_each(query, options = {}) {|doc_id, score| do_something}
2585
+ * -> total_hits
2586
+ *
2587
+ * Run a query through the Searcher on the index. A TopDocs object is
2588
+ * returned with the relevant results. The +query+ is a Query object. The
2589
+ * Searcher#search_each method yields the internal document id (used to
2590
+ * reference documents in the Searcher object like this; +searcher[doc_id]+)
2591
+ * and the search score for that document. It is possible for the score to be
2592
+ * greater than 1.0 for some queries and taking boosts into account. This
2593
+ * method will also normalize scores to the range 0.0..1.0 when the max-score
2594
+ * is greater than 1.0. Here are the options;
2595
+ *
2596
+ * === Options
2597
+ *
2598
+ * :offset:: Default: 0. The offset of the start of the section of the
2599
+ * result-set to return. This is used for paging through
2600
+ * results. Let's say you have a page size of 10. If you
2601
+ * don't find the result you want among the first 10 results
2602
+ * then set +:offset+ to 10 and look at the next 10 results,
2603
+ * then 20 and so on.
2604
+ * :limit:: Default: 10. This is the number of results you want
2605
+ * returned, also called the page size. Set +:limit+ to
2606
+ * +:all+ to return all results
2607
+ * :sort:: A Sort object or sort string describing how the field
2608
+ * should be sorted. A sort string is made up of field names
2609
+ * which cannot contain spaces and the word "DESC" if you
2610
+ * want the field reversed, all separated by commas. For
2611
+ * example; "rating DESC, author, title". Note that Ferret
2612
+ * will try to determine a field's type by looking at the
2613
+ * first term in the index and seeing if it can be parsed as
2614
+ * an integer or a float. Keep this in mind as you may need
2615
+ * to specify a fields type to sort it correctly. For more
2616
+ * on this, see the documentation for SortField
2617
+ * :filter:: a Filter object to filter the search results with
2618
+ * :filter_proc:: a filter Proc is a Proc which takes the doc_id, the score
2619
+ * and the Searcher object as its parameters and returns a
2620
+ * Boolean value specifying whether the result should be
2621
+ * included in the result set.
2622
+ */
2623
+ static VALUE
2624
+ frt_sea_search_each(int argc, VALUE *argv, VALUE self)
2625
+ {
2626
+ int i;
2627
+ Query *q;
2628
+ float max_score;
2629
+ TopDocs *td;
2630
+ VALUE rquery, roptions, rtotal_hits;
2631
+ GET_SEA();
2632
+
2633
+ rb_scan_args(argc, argv, "11", &rquery, &roptions);
2634
+
2635
+ rb_thread_critical = Qtrue;
2636
+
2637
+ Data_Get_Struct(rquery, Query, q);
2638
+ td = frt_sea_search_internal(q, roptions, sea);
2639
+
2640
+ max_score = (td->max_score > 1.0) ? td->max_score : 1.0;
2641
+
2642
+ /* yield normalized scores */
2643
+ for (i = 0; i < td->size; i++) {
2644
+ rb_yield_values(2, INT2FIX(td->hits[i]->doc),
2645
+ rb_float_new((double)(td->hits[i]->score/max_score)));
2646
+ }
2647
+
2648
+ rtotal_hits = INT2FIX(td->total_hits);
2649
+ td_destroy(td);
2650
+
2651
+ rb_thread_critical = 0;
2652
+
2653
+ return rtotal_hits;
2654
+ }
2655
+
2656
+ /*
2657
+ * call-seq:
2658
+ * searcher.explain(query, doc_id) -> Explanation
2659
+ *
2660
+ * Create an explanation object to explain the score returned for a
2661
+ * particular document at +doc_id+ in the index for the query +query+.
2662
+ *
2663
+ * Usually used like this;
2664
+ *
2665
+ * puts searcher.explain(query, doc_id).to_s
2666
+ */
2667
+ static VALUE
2668
+ frt_sea_explain(VALUE self, VALUE rquery, VALUE rdoc_id)
2669
+ {
2670
+ GET_SEA();
2671
+ Query *query;
2672
+ Explanation *expl;
2673
+ Data_Get_Struct(rquery, Query, query);
2674
+ expl = sea->explain(sea, query, FIX2INT(rdoc_id));
2675
+ return Data_Wrap_Struct(cExplanation, NULL, &expl_destroy, expl);
2676
+ }
2677
+
2678
+ /*
2679
+ * call-seq:
2680
+ * searcher.highlight(query, doc_id, field, options = {}) -> Array
2681
+ *
2682
+ * Returns an array of strings with the matches highlighted.
2683
+ *
2684
+ * === Options
2685
+ *
2686
+ * :excerpt_length:: Default: 150. Length of excerpt to show. Highlighted
2687
+ * terms will be in the centre of the excerpt. Set to
2688
+ * :all to highlight the entire field.
2689
+ * :num_excerpts:: Default: 2. Number of excerpts to return.
2690
+ * :pre_tag:: Default: "<b>". Tag to place to the left of the match.
2691
+ * You'll probably want to change this to a "<span>" tag
2692
+ * with a class. Try "\033[7m" for use in a terminal.
2693
+ * :post_tag:: Default: "</b>". This tag should close the +:pre_tag+.
2694
+ * Try tag "\033[m" in the terminal.
2695
+ * :ellipsis:: Default: "...". This is the string that is appended at
2696
+ * the beginning and end of excerpts (unless the excerpt
2697
+ * hits the start or end of the field. You'll probably
2698
+ * want to change this so a Unicode ellipsis character.
2699
+ */
2700
+ static VALUE
2701
+ frt_sea_highlight(int argc, VALUE *argv, VALUE self)
2702
+ {
2703
+ GET_SEA();
2704
+ VALUE rquery, rdoc_id, rfield, roptions, v;
2705
+ Query *query;
2706
+ int excerpt_length = 150;
2707
+ int num_excerpts = 2;
2708
+ char *pre_tag = "<b>";
2709
+ char *post_tag = "</b>";
2710
+ char *ellipsis = "...";
2711
+ char **excerpts;
2712
+
2713
+ rb_scan_args(argc, argv, "31", &rquery, &rdoc_id, &rfield, &roptions);
2714
+ Data_Get_Struct(rquery, Query, query);
2715
+ if (argc > 3) {
2716
+ if (TYPE(roptions) != T_HASH) {
2717
+ rb_raise(rb_eArgError, "The fourth argument to Searcher#highlight must be a hash");
2718
+ }
2719
+ if (Qnil != (v = rb_hash_aref(roptions, sym_num_excerpts))) {
2720
+ num_excerpts = FIX2INT(v);
2721
+ }
2722
+ if (Qnil != (v = rb_hash_aref(roptions, sym_excerpt_length))) {
2723
+ if (v == sym_all) {
2724
+ num_excerpts = 1;
2725
+ excerpt_length = INT_MAX/2;
2726
+ }
2727
+ else {
2728
+ excerpt_length = FIX2INT(v);
2729
+ }
2730
+ }
2731
+ if (Qnil != (v = rb_hash_aref(roptions, sym_pre_tag))) {
2732
+ pre_tag = rs2s(rb_obj_as_string(v));
2733
+ }
2734
+ if (Qnil != (v = rb_hash_aref(roptions, sym_post_tag))) {
2735
+ post_tag = rs2s(rb_obj_as_string(v));
2736
+ }
2737
+ if (Qnil != (v = rb_hash_aref(roptions, sym_ellipsis))) {
2738
+ ellipsis = rs2s(rb_obj_as_string(v));
2739
+ }
2740
+ }
2741
+
2742
+ if ((excerpts = searcher_highlight(sea,
2743
+ query,
2744
+ FIX2INT(rdoc_id),
2745
+ frt_field(rfield),
2746
+ excerpt_length,
2747
+ num_excerpts,
2748
+ pre_tag,
2749
+ post_tag,
2750
+ ellipsis)) != NULL) {
2751
+ const int size = ary_size(excerpts);
2752
+ int i;
2753
+ VALUE rexcerpts = rb_ary_new2(size);
2754
+
2755
+ for (i = 0; i < size; i++) {
2756
+ RARRAY(rexcerpts)->ptr[i] = rb_str_new2(excerpts[i]);
2757
+ RARRAY(rexcerpts)->len++;
2758
+ }
2759
+ ary_destroy(excerpts, &free);
2760
+ return rexcerpts;
2761
+ }
2762
+ return Qnil;
2763
+ }
2764
+
2765
+ /****************************************************************************
2766
+ *
2767
+ * Searcher Methods
2768
+ *
2769
+ ****************************************************************************/
2770
+
2771
+ static void
2772
+ frt_sea_mark(void *p)
2773
+ {
2774
+ IndexSearcher *isea = (IndexSearcher *)p;
2775
+ frt_gc_mark(isea->ir);
2776
+ frt_gc_mark(isea->ir->store);
2777
+ }
2778
+
2779
+ #define FRT_GET_IR(rir, ir) do {\
2780
+ rir = Data_Wrap_Struct(cIndexReader, &frt_ir_mark, &frt_ir_free, ir);\
2781
+ object_add(ir, rir);\
2782
+ } while (0)
2783
+
2784
+ /*
2785
+ * call-seq:
2786
+ * Searcher.new(obj) -> Searcher
2787
+ *
2788
+ * Create a new Searcher object. +dir+ can either be a string path to an
2789
+ * index directory on the file-system, an actual Ferret::Store::Directory
2790
+ * object or a Ferret::Index::IndexReader. You should use the IndexReader for
2791
+ * searching multiple indexes. Just open the IndexReader on multiple
2792
+ * directories.
2793
+ */
2794
+ static VALUE
2795
+ frt_sea_init(VALUE self, VALUE obj)
2796
+ {
2797
+ Store *store = NULL;
2798
+ IndexReader *ir = NULL;
2799
+ Searcher *sea;
2800
+ if (TYPE(obj) == T_STRING) {
2801
+ frt_create_dir(obj);
2802
+ store = open_fs_store(StringValueCStr(obj));
2803
+ ir = ir_open(store);
2804
+ DEREF(store);
2805
+ FRT_GET_IR(obj, ir);
2806
+ } else {
2807
+ Check_Type(obj, T_DATA);
2808
+ if (rb_obj_is_kind_of(obj, cDirectory) == Qtrue) {
2809
+ Data_Get_Struct(obj, Store, store);
2810
+ ir = ir_open(store);
2811
+ FRT_GET_IR(obj, ir);
2812
+ } else if (rb_obj_is_kind_of(obj, cIndexReader) == Qtrue) {
2813
+ Data_Get_Struct(obj, IndexReader, ir);
2814
+ } else {
2815
+ rb_raise(rb_eArgError, "Unknown type for argument to IndexSearcher.new");
2816
+ }
2817
+ }
2818
+
2819
+ sea = isea_new(ir);
2820
+ ((IndexSearcher *)sea)->close_ir = false;
2821
+ Frt_Wrap_Struct(self, &frt_sea_mark, &frt_sea_free, sea);
2822
+ object_add(sea, self);
2823
+
2824
+ return self;
2825
+ }
2826
+
2827
+ /****************************************************************************
2828
+ *
2829
+ * MultiSearcher Methods
2830
+ *
2831
+ ****************************************************************************/
2832
+
2833
+ static void
2834
+ frt_ms_free(void *p)
2835
+ {
2836
+ Searcher *sea = (Searcher *)p;
2837
+ MultiSearcher *msea = (MultiSearcher *)sea;
2838
+ free(msea->searchers);
2839
+ object_del(sea);
2840
+ searcher_close(sea);
2841
+ }
2842
+
2843
+ static void
2844
+ frt_ms_mark(void *p)
2845
+ {
2846
+ int i;
2847
+ MultiSearcher *msea = (MultiSearcher *)p;
2848
+ for (i = 0; i < msea->s_cnt; i++) {
2849
+ frt_gc_mark(msea->searchers[i]);
2850
+ }
2851
+ }
2852
+
2853
+ /*
2854
+ * call-seq:
2855
+ * MultiSearcher.new(searcher*) -> searcher
2856
+ *
2857
+ * Create a new MultiSearcher by passing a list of subsearchers to the
2858
+ * constructor.
2859
+ */
2860
+ static VALUE
2861
+ frt_ms_init(int argc, VALUE *argv, VALUE self)
2862
+ {
2863
+ int i, j, top = 0, capa = argc;
2864
+
2865
+ VALUE rsearcher;
2866
+ Searcher **searchers = ALLOC_N(Searcher *, capa);
2867
+ Searcher *s;
2868
+
2869
+ for (i = 0; i < argc; i++) {
2870
+ rsearcher = argv[i];
2871
+ switch (TYPE(rsearcher)) {
2872
+ case T_ARRAY:
2873
+ capa += RARRAY(rsearcher)->len;
2874
+ REALLOC_N(searchers, Searcher *, capa);
2875
+ for (j = 0; j < RARRAY(rsearcher)->len; j++) {
2876
+ VALUE rs = RARRAY(rsearcher)->ptr[j];
2877
+ Data_Get_Struct(rs, Searcher, s);
2878
+ searchers[top++] = s;
2879
+ }
2880
+ break;
2881
+ case T_DATA:
2882
+ Data_Get_Struct(rsearcher, Searcher, s);
2883
+ searchers[top++] = s;
2884
+ break;
2885
+ default:
2886
+ rb_raise(rb_eArgError, "Can't add class %s to MultiSearcher",
2887
+ rb_obj_classname(rsearcher));
2888
+ break;
2889
+ }
2890
+ }
2891
+ s = msea_new(searchers, top, false);
2892
+ Frt_Wrap_Struct(self, &frt_ms_mark, &frt_ms_free, s);
2893
+ object_add(s, self);
2894
+ return self;
2895
+ }
2896
+
2897
+ /****************************************************************************
2898
+ *
2899
+ * Init Function
2900
+ *
2901
+ ****************************************************************************/
2902
+
2903
+ /* rdochack
2904
+ cTopDocs = rb_define_class_under(mSearch, "TopDocs", rb_cObject);
2905
+ */
2906
+
2907
+ /*
2908
+ * Document-class: Ferret::Search::Hit
2909
+ *
2910
+ * == Summary
2911
+ *
2912
+ * A hit represents a single document match for a search. It holds the
2913
+ * document id of the document that matches along with the score for the
2914
+ * match. The score is a positive Float value. The score contained in a hit
2915
+ * is not normalized so it can be greater than 1.0. To normalize scores to
2916
+ * the range 0.0..1.0 divide the scores by TopDocs#max_score.
2917
+ */
2918
+ static void
2919
+ Init_Hit(void)
2920
+ {
2921
+ const char *hit_class = "Hit";
2922
+ /* rdochack
2923
+ cHit = rb_define_class_under(mSearch, "Hit", rb_cObject);
2924
+ */
2925
+ cHit = rb_struct_define(hit_class, "doc", "score", NULL);
2926
+ rb_set_class_path(cHit, mSearch, hit_class);
2927
+ rb_const_set(mSearch, rb_intern(hit_class), cHit);
2928
+ id_doc = rb_intern("doc");
2929
+ id_score = rb_intern("score");
2930
+ }
2931
+
2932
+ /*
2933
+ * Document-class: Ferret::Search::TopDocs
2934
+ *
2935
+ * == Summary
2936
+ *
2937
+ * A TopDocs object holds a result set for a search. The number of documents
2938
+ * that matched the query his held in TopDocs#total_hits. The actual
2939
+ * results are in the Array TopDocs#hits. The number of hits returned is
2940
+ * limited by the +:limit+ option so the size of the +hits+ array will not
2941
+ * always be equal to the value of +total_hits+. Finally TopDocs#max_score
2942
+ * holds the maximum score of any match (not necessarily the maximum score
2943
+ * contained in the +hits+ array) so it can be used to normalize scores. For
2944
+ * example, to print doc ids with scores out of 100.0 you could do this;
2945
+ *
2946
+ * top_docs.hits.each do |hit|
2947
+ * puts "#{hit.doc} scored #{hit.score * 100.0 / top_docs.max_score}"
2948
+ * end
2949
+ */
2950
+ static void
2951
+ Init_TopDocs(void)
2952
+ {
2953
+ const char *td_class = "TopDocs";
2954
+ /* rdochack
2955
+ cTopDocs = rb_define_class_under(mSearch, "TopDocs", rb_cObject);
2956
+ */
2957
+ cTopDocs = rb_struct_define(td_class,
2958
+ "total_hits",
2959
+ "hits",
2960
+ "max_score",
2961
+ "searcher",
2962
+ NULL);
2963
+ rb_set_class_path(cTopDocs, mSearch, td_class);
2964
+ rb_const_set(mSearch, rb_intern(td_class), cTopDocs);
2965
+ rb_define_method(cTopDocs, "to_s", frt_td_to_s, -1);
2966
+ rb_define_method(cTopDocs, "to_json", frt_td_to_json, 0);
2967
+ id_hits = rb_intern("hits");
2968
+ id_total_hits = rb_intern("total_hits");
2969
+ id_max_score = rb_intern("max_score");
2970
+ id_searcher = rb_intern("searcher");
2971
+ }
2972
+
2973
+ /*
2974
+ * Document-class: Ferret::Search::Explanation
2975
+ *
2976
+ * == Summary
2977
+ *
2978
+ * Explanation is used to give a description of why a document matched with
2979
+ * the score that it did. Use the Explanation#to_s or Explanation#to_html
2980
+ * methods to display the explanation in a human readable format. Creating
2981
+ * explanations is an expensive operation so it should only be used for
2982
+ * debugging purposes. To create an explanation use the Searcher#explain
2983
+ * method.
2984
+ *
2985
+ * == Example
2986
+ *
2987
+ * puts searcher.explain(query, doc_id).to_s
2988
+ */
2989
+ static void
2990
+ Init_Explanation(void)
2991
+ {
2992
+ cExplanation = rb_define_class_under(mSearch, "Explanation", rb_cObject);
2993
+ rb_define_alloc_func(cExplanation, frt_data_alloc);
2994
+
2995
+ rb_define_method(cExplanation, "to_s", frt_expl_to_s, 0);
2996
+ rb_define_method(cExplanation, "to_html", frt_expl_to_html, 0);
2997
+ rb_define_method(cExplanation, "score", frt_expl_score, 0);
2998
+ }
2999
+
3000
+ /*
3001
+ * Document-class: Ferret::Search::Query
3002
+ *
3003
+ * == Summary
3004
+ *
3005
+ * Abstract class representing a query to the index. There are a number of
3006
+ * concrete Query implementations;
3007
+ *
3008
+ * * TermQuery
3009
+ * * MultiTermQuery
3010
+ * * BooleanQuery
3011
+ * * PhraseQuery
3012
+ * * ConstantScoreQuery
3013
+ * * FilteredQuery
3014
+ * * MatchAllQuery
3015
+ * * RangeQuery
3016
+ * * WildcardQuery
3017
+ * * FuzzyQuery
3018
+ * * PrefixQuery
3019
+ * * Spans::SpanTermQuery
3020
+ * * Spans::SpanFirstQuery
3021
+ * * Spans::SpanOrQuery
3022
+ * * Spans::SpanNotQuery
3023
+ * * Spans::SpanNearQuery
3024
+ *
3025
+ * Explore these classes for the query right for you. The queries are passed
3026
+ * to the Searcher#search* methods.
3027
+ *
3028
+ * === Query Boosts
3029
+ *
3030
+ * Queries have a boost value so that you can make the results of one query
3031
+ * more important than the results of another query when combining them in a
3032
+ * BooleanQuery. For example, documents on Rails. To avoid getting results
3033
+ * for train rails you might also add the tern Ruby but Rails is the more
3034
+ * important term so you'd give it a boost.
3035
+ */
3036
+ static void
3037
+ Init_Query(void)
3038
+ {
3039
+ cQuery = rb_define_class_under(mSearch, "Query", rb_cObject);
3040
+
3041
+ rb_define_method(cQuery, "to_s", frt_q_to_s, -1);
3042
+ rb_define_method(cQuery, "boost", frt_q_get_boost, 0);
3043
+ rb_define_method(cQuery, "boost=", frt_q_set_boost, 1);
3044
+ rb_define_method(cQuery, "eql?", frt_q_eql, 1);
3045
+ rb_define_method(cQuery, "==", frt_q_eql, 1);
3046
+ rb_define_method(cQuery, "hash", frt_q_hash, 0);
3047
+ rb_define_method(cQuery, "terms", frt_q_get_terms, 1);
3048
+ }
3049
+
3050
+ /*
3051
+ * Document-class: Ferret::Search::TermQuery
3052
+ *
3053
+ * == Summary
3054
+ *
3055
+ * TermQuery is the most basic query and it is the building block for most
3056
+ * other queries. It basically matches documents that contain a specific term
3057
+ * in a specific field.
3058
+ *
3059
+ * == Example
3060
+ *
3061
+ * query = TermQuery.new(:content, "rails")
3062
+ *
3063
+ * # untokenized fields can also be searched with this query;
3064
+ * query = TermQuery.new(:title, "Shawshank Redemption")
3065
+ *
3066
+ * Notice the all lowercase term Rails. This is important as most analyzers will
3067
+ * downcase all text added to the index. The title in this case was not
3068
+ * tokenized so the case would have been left as is.
3069
+ */
3070
+ static void
3071
+ Init_TermQuery(void)
3072
+ {
3073
+ cTermQuery = rb_define_class_under(mSearch, "TermQuery", cQuery);
3074
+ rb_define_alloc_func(cTermQuery, frt_data_alloc);
3075
+
3076
+ rb_define_method(cTermQuery, "initialize", frt_tq_init, 2);
3077
+ }
3078
+
3079
+ /*
3080
+ * Document-class: Ferret::Search::MultiTermQuery
3081
+ *
3082
+ * == Summary
3083
+ *
3084
+ * MultiTermQuery matches documents that contain one of a list of terms in a
3085
+ * specific field. This is the basic building block for queries such as;
3086
+ *
3087
+ * * PrefixQuery
3088
+ * * WildcardQuery
3089
+ * * FuzzyQuery
3090
+ *
3091
+ * MultiTermQuery is very similar to a boolean "Or" query. It is highly
3092
+ * optimized though as it focuses on a single field.
3093
+ *
3094
+ * == Example
3095
+ *
3096
+ * multi_term_query = MultiTermQuery.new(:content, :max_term => 10)
3097
+ *
3098
+ * multi_term_query << "Ruby" << "Ferret" << "Rails" << "Search"
3099
+ */
3100
+ static void
3101
+ Init_MultiTermQuery(void)
3102
+ {
3103
+ id_default_max_terms = rb_intern("@@default_max_terms");
3104
+ sym_max_terms = ID2SYM(rb_intern("max_terms"));
3105
+ sym_min_score = ID2SYM(rb_intern("min_score"));
3106
+
3107
+ cMultiTermQuery = rb_define_class_under(mSearch, "MultiTermQuery", cQuery);
3108
+ rb_define_alloc_func(cMultiTermQuery, frt_data_alloc);
3109
+
3110
+ rb_cvar_set(cMultiTermQuery, id_default_max_terms, INT2FIX(512), Qfalse);
3111
+ rb_define_singleton_method(cMultiTermQuery, "default_max_terms",
3112
+ frt_mtq_get_dmt, 0);
3113
+ rb_define_singleton_method(cMultiTermQuery, "default_max_terms=",
3114
+ frt_mtq_set_dmt, 1);
3115
+
3116
+ rb_define_method(cMultiTermQuery, "initialize", frt_mtq_init, -1);
3117
+ rb_define_method(cMultiTermQuery, "add_term", frt_mtq_add_term, -1);
3118
+ rb_define_method(cMultiTermQuery, "<<", frt_mtq_add_term, -1);
3119
+ }
3120
+
3121
+ static void Init_BooleanClause(void);
3122
+
3123
+ /*
3124
+ * Document-class: Ferret::Search::BooleanQuery
3125
+ *
3126
+ * == Summary
3127
+ *
3128
+ * A BooleanQuery is used for combining many queries into one. This is best
3129
+ * illustrated with an example.
3130
+ *
3131
+ * == Example
3132
+ *
3133
+ * Lets say we wanted to find all documents with the term "Ruby" in the
3134
+ * +:title+ and the term "Ferret" in the +:content+ field or the +:title+
3135
+ * field written before January 2006. You could build the query like this.
3136
+ *
3137
+ * tq1 = TermQuery.new(:title, "ruby")
3138
+ * tq21 = TermQuery.new(:title, "ferret")
3139
+ * tq22 = TermQuery.new(:content, "ferret")
3140
+ * bq2 = BooleanQuery.new
3141
+ * bq2 << tq21 << tq22
3142
+ *
3143
+ * rq3 = RangeQuery.new(:written, :< => "200601")
3144
+ *
3145
+ * query = BooleanQuery.new
3146
+ * query.add_query(tq1, :must).add_query(bq2, :must).add_query(rq3, :must)
3147
+ */
3148
+ static void
3149
+ Init_BooleanQuery(void)
3150
+ {
3151
+ cBooleanQuery = rb_define_class_under(mSearch, "BooleanQuery", cQuery);
3152
+ rb_define_alloc_func(cBooleanQuery, frt_data_alloc);
3153
+
3154
+ rb_define_method(cBooleanQuery, "initialize", frt_bq_init, -1);
3155
+ rb_define_method(cBooleanQuery, "add_query", frt_bq_add_query, -1);
3156
+ rb_define_method(cBooleanQuery, "<<", frt_bq_add_query, -1);
3157
+
3158
+ Init_BooleanClause();
3159
+ }
3160
+
3161
+ /*
3162
+ * Document-class: Ferret::Search::BooleanQuery::BooleanClause
3163
+ *
3164
+ * == Summary
3165
+ *
3166
+ * A BooleanClause holes a single query within a BooleanQuery specifying
3167
+ * wither the query +:must+ match, +:should+ match or +:must_not+ match.
3168
+ * BooleanClauses can be used to pass a clause from one BooleanQuery to
3169
+ * another although it is generally easier just to add a query directly to a
3170
+ * BooleanQuery using the BooleanQuery#add_query method.
3171
+ *
3172
+ * == Example
3173
+ *
3174
+ * clause1 = BooleanClause.new(query1, :should)
3175
+ * clause2 = BooleanClause.new(query2, :should)
3176
+ *
3177
+ * query = BooleanQuery.new
3178
+ * query << clause1 << clause2
3179
+ */
3180
+ static void
3181
+ Init_BooleanClause(void)
3182
+ {
3183
+ sym_should = ID2SYM(rb_intern("should"));
3184
+ sym_must = ID2SYM(rb_intern("must"));
3185
+ sym_must_not = ID2SYM(rb_intern("must_not"));
3186
+
3187
+ cBooleanClause = rb_define_class_under(cBooleanQuery, "BooleanClause",
3188
+ rb_cObject);
3189
+ rb_define_alloc_func(cBooleanClause, frt_data_alloc);
3190
+
3191
+ rb_define_method(cBooleanClause, "initialize", frt_bc_init, -1);
3192
+ rb_define_method(cBooleanClause, "query", frt_bc_get_query, 0);
3193
+ rb_define_method(cBooleanClause, "query=", frt_bc_set_query, 1);
3194
+ rb_define_method(cBooleanClause, "required?", frt_bc_is_required, 0);
3195
+ rb_define_method(cBooleanClause, "prohibited?", frt_bc_is_prohibited, 0);
3196
+ rb_define_method(cBooleanClause, "occur=", frt_bc_set_occur, 1);
3197
+ rb_define_method(cBooleanClause, "to_s", frt_bc_to_s, 0);
3198
+ }
3199
+
3200
+ /*
3201
+ * Document-class: Ferret::Search::RangeQuery
3202
+ *
3203
+ * == Summary
3204
+ *
3205
+ * RangeQuery is used to find documents with terms in a range.
3206
+ * RangeQuerys are usually used on untokenized fields like date fields or
3207
+ * number fields.
3208
+ *
3209
+ * == Example
3210
+ *
3211
+ * To find all documents written between January 1st 2006 and January 26th
3212
+ * 2006 inclusive you would write the query like this;
3213
+ *
3214
+ * query = RangeQuery.new(:create_date, :>= "20060101", :<= "20060126")
3215
+ */
3216
+ static void
3217
+ Init_RangeQuery(void)
3218
+ {
3219
+ sym_upper = ID2SYM(rb_intern("upper"));
3220
+ sym_lower = ID2SYM(rb_intern("lower"));
3221
+ sym_upper_exclusive = ID2SYM(rb_intern("upper_exclusive"));
3222
+ sym_lower_exclusive = ID2SYM(rb_intern("lower_exclusive"));
3223
+ sym_include_upper = ID2SYM(rb_intern("include_upper"));
3224
+ sym_include_lower = ID2SYM(rb_intern("include_lower"));
3225
+
3226
+ sym_less_than = ID2SYM(rb_intern("<"));
3227
+ sym_less_than_or_equal_to = ID2SYM(rb_intern("<="));
3228
+ sym_greater_than = ID2SYM(rb_intern(">"));
3229
+ sym_greater_than_or_equal_to = ID2SYM(rb_intern(">="));
3230
+
3231
+ cRangeQuery = rb_define_class_under(mSearch, "RangeQuery", cQuery);
3232
+ rb_define_alloc_func(cRangeQuery, frt_data_alloc);
3233
+
3234
+ rb_define_method(cRangeQuery, "initialize", frt_rq_init, 2);
3235
+ }
3236
+
3237
+ /*
3238
+ * Document-class: Ferret::Search::PhraseQuery
3239
+ *
3240
+ * == Summary
3241
+ *
3242
+ * PhraseQuery matches phrases like "the quick brown fox". Most people are
3243
+ * familiar with phrase queries having used them in most internet search
3244
+ * engines.
3245
+ *
3246
+ * === Slop
3247
+ *
3248
+ * Ferret's phrase queries a slightly more advanced. You can match phrases
3249
+ * with a slop, ie the match isn't exact but it is good enough. The slop is
3250
+ * basically the word edit distance of the phrase. For example, "the quick
3251
+ * brown fox" with a slop of 1 would match "the quick little brown fox". With
3252
+ * a slop of 2 it would match "the brown quick fox".
3253
+ *
3254
+ * query = PhraseQuery.new(:content)
3255
+ * query << "the" << "quick" << "brown" << "fox"
3256
+ *
3257
+ * # matches => "the quick brown fox"
3258
+ *
3259
+ * query.slop = 1
3260
+ * # matches => "the quick little brown fox"
3261
+ * |__1__^
3262
+ *
3263
+ * query.slop = 2
3264
+ * # matches => "the brown quick _____ fox"
3265
+ * ^_____2_____|
3266
+ *
3267
+ * == Multi-PhraseQuery
3268
+ *
3269
+ * Phrase queries can also have multiple terms in a single position. Let's
3270
+ * say for example that we want to match synonyms for quick like "fast" and
3271
+ * "speedy". You could the query like this;
3272
+ *
3273
+ * query = PhraseQuery.new(:content)
3274
+ * query << "the" << ["quick", "fast", "speed"] << ["brown", "red"] << "fox"
3275
+ * # matches => "the quick red fox"
3276
+ * # matches => "the fast brown fox"
3277
+ *
3278
+ * query.slop = 1
3279
+ * # matches => "the speedy little red fox"
3280
+ *
3281
+ * You can also leave positions blank. Lets say you wanted to match "the
3282
+ * quick <> fox" where "<>" could match anything (but not nothing). You'd
3283
+ * build this query like this;
3284
+ *
3285
+ * query = PhraseQuery.new(:content)
3286
+ * query.add_term("the").add_term("quick").add_term("fox", 2)
3287
+ * # matches => "the quick yellow fox"
3288
+ * # matches => "the quick alkgdhaskghaskjdh fox"
3289
+ *
3290
+ * The second parameter to PhraseQuery#add_term is the position increment for
3291
+ * the term. It is one by default meaning that every time you add a term it
3292
+ * is expected to follow the previous term. But setting it to 2 or greater
3293
+ * you are leaving empty spaces in the term.
3294
+ *
3295
+ * There are also so tricks you can do by setting the position increment to
3296
+ * 0. With a little help from your analyzer you can actually tag bold or
3297
+ * italic text for example. If you want more information about this, ask on
3298
+ * the mailing list.
3299
+ */
3300
+ static void
3301
+ Init_PhraseQuery(void)
3302
+ {
3303
+ cPhraseQuery = rb_define_class_under(mSearch, "PhraseQuery", cQuery);
3304
+ rb_define_alloc_func(cPhraseQuery, frt_data_alloc);
3305
+
3306
+ rb_define_method(cPhraseQuery, "initialize", frt_phq_init, -1);
3307
+ rb_define_method(cPhraseQuery, "add_term", frt_phq_add, -1);
3308
+ rb_define_method(cPhraseQuery, "<<", frt_phq_add, -1);
3309
+ rb_define_method(cPhraseQuery, "slop", frt_phq_get_slop, 0);
3310
+ rb_define_method(cPhraseQuery, "slop=", frt_phq_set_slop, 1);
3311
+ }
3312
+
3313
+ /*
3314
+ * Document-class: Ferret::Search::PrefixQuery
3315
+ *
3316
+ * == Summary
3317
+ *
3318
+ * A prefix query is like a TermQuery except that it matches any term with a
3319
+ * specific prefix. PrefixQuery is expanded into a MultiTermQuery when
3320
+ * submitted in a search.
3321
+ *
3322
+ * == Example
3323
+ *
3324
+ * PrefixQuery is very useful for matching a tree structure category
3325
+ * hierarchy. For example, let's say you have the categories;
3326
+ *
3327
+ * "cat1/"
3328
+ * "cat1/sub_cat1"
3329
+ * "cat1/sub_cat2"
3330
+ * "cat2"
3331
+ * "cat2/sub_cat1"
3332
+ * "cat2/sub_cat2"
3333
+ *
3334
+ * Lets say you want to match everything in category 2. You'd build the query
3335
+ * like this;
3336
+ *
3337
+ * query = PrefixQuery.new(:category, "cat2")
3338
+ * # matches => "cat2"
3339
+ * # matches => "cat2/sub_cat1"
3340
+ * # matches => "cat2/sub_cat2"
3341
+ */
3342
+ static void
3343
+ Init_PrefixQuery(void)
3344
+ {
3345
+ cPrefixQuery = rb_define_class_under(mSearch, "PrefixQuery", cQuery);
3346
+ rb_define_alloc_func(cPrefixQuery, frt_data_alloc);
3347
+
3348
+ rb_define_method(cPrefixQuery, "initialize", frt_prq_init, -1);
3349
+ }
3350
+
3351
+ /*
3352
+ * Document-class: Ferret::Search::WildcardQuery
3353
+ *
3354
+ * == Summary
3355
+ *
3356
+ * WildcardQuery is a simple pattern matching query. There are two wild-card
3357
+ * characters.
3358
+ *
3359
+ * * "*" which matches 0 or more characters
3360
+ * * "?" which matches a single character
3361
+ *
3362
+ * == Example
3363
+ *
3364
+ * query = WildcardQuery.new(:field, "h*og")
3365
+ * # matches => "hog"
3366
+ * # matches => "hot dog"
3367
+ *
3368
+ * query = WildcardQuery.new(:field, "fe?t")
3369
+ * # matches => "feat"
3370
+ * # matches => "feet"
3371
+ *
3372
+ * query = WildcardQuery.new(:field, "f?ll*")
3373
+ * # matches => "fill"
3374
+ * # matches => "falling"
3375
+ * # matches => "folly"
3376
+ */
3377
+ static void
3378
+ Init_WildcardQuery(void)
3379
+ {
3380
+ cWildcardQuery = rb_define_class_under(mSearch, "WildcardQuery", cQuery);
3381
+ rb_define_alloc_func(cWildcardQuery, frt_data_alloc);
3382
+
3383
+ rb_define_method(cWildcardQuery, "initialize", frt_wcq_init, -1);
3384
+ }
3385
+
3386
+ /*
3387
+ * Document-class: Ferret::Search::FuzzyQuery
3388
+ *
3389
+ * == Summary
3390
+ *
3391
+ * FuzzyQuery uses the Levenshtein distance formula for measuring the
3392
+ * similarity between two terms. For example, weak and week have one letter
3393
+ * difference and they are four characters long so the simlarity is 75% or
3394
+ * 0.75. You can use this query to match terms that are very close to the
3395
+ * search term.
3396
+ *
3397
+ * == Example
3398
+ *
3399
+ * FuzzyQuery can be quite useful for find documents that wouldn't normally
3400
+ * be found because of typos.
3401
+ *
3402
+ * FuzzyQuery.new(:field, "google",
3403
+ * :min_similarity => 0.6,
3404
+ * :prefix_length => 2)
3405
+ * # matches => "gogle", "goggle", "googol", "googel"
3406
+ */
3407
+ static void
3408
+ Init_FuzzyQuery(void)
3409
+ {
3410
+ id_default_min_similarity = rb_intern("@@default_min_similarity");
3411
+ id_default_prefix_length = rb_intern("@@default_prefix_length");
3412
+
3413
+ sym_min_similarity = ID2SYM(rb_intern("min_similarity"));
3414
+ sym_prefix_length = ID2SYM(rb_intern("prefix_length"));
3415
+
3416
+ cFuzzyQuery = rb_define_class_under(mSearch, "FuzzyQuery", cQuery);
3417
+ rb_define_alloc_func(cFuzzyQuery, frt_data_alloc);
3418
+ rb_cvar_set(cFuzzyQuery, id_default_min_similarity,
3419
+ rb_float_new(0.5), Qfalse);
3420
+ rb_cvar_set(cFuzzyQuery, id_default_prefix_length,
3421
+ INT2FIX(0), Qfalse);
3422
+
3423
+ rb_define_singleton_method(cFuzzyQuery, "default_min_similarity",
3424
+ frt_fq_get_dms, 0);
3425
+ rb_define_singleton_method(cFuzzyQuery, "default_min_similarity=",
3426
+ frt_fq_set_dms, 1);
3427
+ rb_define_singleton_method(cFuzzyQuery, "default_prefix_length",
3428
+ frt_fq_get_dpl, 0);
3429
+ rb_define_singleton_method(cFuzzyQuery, "default_prefix_length=",
3430
+ frt_fq_set_dpl, 1);
3431
+
3432
+ rb_define_method(cFuzzyQuery, "initialize", frt_fq_init, -1);
3433
+ rb_define_method(cFuzzyQuery, "prefix_length", frt_fq_pre_len, 0);
3434
+ rb_define_method(cFuzzyQuery, "min_similarity", frt_fq_min_sim, 0);
3435
+ }
3436
+
3437
+ /*
3438
+ * Document-class: Ferret::Search::MatchAllQuery
3439
+ *
3440
+ * == Summary
3441
+ *
3442
+ * MatchAllQuery matches all documents in the index. You might want use this
3443
+ * query in combination with a filter, however, ConstantScoreQuery is
3444
+ * probably better in that circumstance.
3445
+ */
3446
+ static void
3447
+ Init_MatchAllQuery(void)
3448
+ {
3449
+ cMatchAllQuery = rb_define_class_under(mSearch, "MatchAllQuery", cQuery);
3450
+ rb_define_alloc_func(cMatchAllQuery, frt_maq_alloc);
3451
+
3452
+ rb_define_method(cMatchAllQuery, "initialize", frt_maq_init, 0);
3453
+ }
3454
+
3455
+ /*
3456
+ * Document-class: Ferret::Search::ConstantScoreQuery
3457
+ *
3458
+ * == Summary
3459
+ *
3460
+ * ConstantScoreQuery is a way to turn a Filter into a Query. It matches all
3461
+ * documents that its filter matches with a constant score. This is a very
3462
+ * fast query, particularly when run more than once (since filters are
3463
+ * cached). It is also used internally be RangeQuery.
3464
+ *
3465
+ * == Example
3466
+ *
3467
+ * Let's say for example that you often need to display all documents created
3468
+ * on or after June 1st. You could create a ConstantScoreQuery like this;
3469
+ *
3470
+ * query = ConstantScoreQuery.new(RangeFilter.new(:created_on, :>= => "200606"))
3471
+ *
3472
+ * Once this is run once the results are cached and will be returned very
3473
+ * quickly in future requests.
3474
+ */
3475
+ static void
3476
+ Init_ConstantScoreQuery(void)
3477
+ {
3478
+ cConstantScoreQuery = rb_define_class_under(mSearch,
3479
+ "ConstantScoreQuery", cQuery);
3480
+ rb_define_alloc_func(cConstantScoreQuery, frt_data_alloc);
3481
+
3482
+ rb_define_method(cConstantScoreQuery, "initialize", frt_csq_init, 1);
3483
+ }
3484
+
3485
+ /*
3486
+ * Document-class: Ferret::Search::FilteredQuery
3487
+ *
3488
+ * == Summary
3489
+ *
3490
+ * FilteredQuery offers you a way to apply a filter to a specific query.
3491
+ * The FilteredQuery would then by added to a BooleanQuery to be combined
3492
+ * with other queries. There is not much point in passing a FilteredQuery
3493
+ * directly to a Searcher#search method unless you are applying more than one
3494
+ * filter since the search method also takes a filter as a parameter.
3495
+ */
3496
+ static void
3497
+ Init_FilteredQuery(void)
3498
+ {
3499
+ cFilteredQuery = rb_define_class_under(mSearch, "FilteredQuery", cQuery);
3500
+ rb_define_alloc_func(cFilteredQuery, frt_data_alloc);
3501
+
3502
+ rb_define_method(cFilteredQuery, "initialize", frt_fqq_init, 2);
3503
+ }
3504
+
3505
+ /*
3506
+ * Document-class: Ferret::Search::Spans::SpanTermQuery
3507
+ *
3508
+ * == Summary
3509
+ *
3510
+ * A SpanTermQuery is the Spans version of TermQuery, the only difference
3511
+ * being that it returns the start and end offset of all of its matches for
3512
+ * use by enclosing SpanQueries.
3513
+ */
3514
+ static void
3515
+ Init_SpanTermQuery(void)
3516
+ {
3517
+ cSpanTermQuery = rb_define_class_under(mSpans, "SpanTermQuery", cQuery);
3518
+ rb_define_alloc_func(cSpanTermQuery, frt_data_alloc);
3519
+
3520
+ rb_define_method(cSpanTermQuery, "initialize", frt_spantq_init, 2);
3521
+ }
3522
+
3523
+ /*
3524
+ * Document-class: Ferret::Search::Spans::SpanMultiTermQuery
3525
+ *
3526
+ * == Summary
3527
+ *
3528
+ * A SpanMultiTermQuery is the Spans version of MultiTermQuery, the only
3529
+ * difference being that it returns the start and end offset of all of its
3530
+ * matches for use by enclosing SpanQueries.
3531
+ */
3532
+ static void
3533
+ Init_SpanMultiTermQuery(void)
3534
+ {
3535
+ cSpanMultiTermQuery = rb_define_class_under(mSpans, "SpanMultiTermQuery", cQuery);
3536
+ rb_define_alloc_func(cSpanMultiTermQuery, frt_data_alloc);
3537
+
3538
+ rb_define_method(cSpanMultiTermQuery, "initialize", frt_spanmtq_init, 2);
3539
+ }
3540
+
3541
+ /*
3542
+ * Document-class: Ferret::Search::Spans::SpanPrefixQuery
3543
+ *
3544
+ * == Summary
3545
+ *
3546
+ * A SpanPrefixQuery is the Spans version of PrefixQuery, the only difference
3547
+ * being that it returns the start and end offset of all of its matches for
3548
+ * use by enclosing SpanQueries.
3549
+ */
3550
+ static void
3551
+ Init_SpanPrefixQuery(void)
3552
+ {
3553
+ cSpanPrefixQuery = rb_define_class_under(mSpans, "SpanPrefixQuery", cQuery);
3554
+ rb_define_alloc_func(cSpanPrefixQuery, frt_data_alloc);
3555
+
3556
+ rb_define_method(cSpanPrefixQuery, "initialize", frt_spanprq_init, -1);
3557
+ }
3558
+
3559
+ /*
3560
+ * Document-class: Ferret::Search::Spans::SpanFirstQuery
3561
+ *
3562
+ * == Summary
3563
+ *
3564
+ * A SpanFirstQuery restricts a query to search in the first +end+ bytes of a
3565
+ * field. This is useful since often the most important information in a
3566
+ * document is at the start of the document.
3567
+ *
3568
+ * == Example
3569
+ *
3570
+ * To find all documents where "ferret" is within the first 100 characters
3571
+ * (really bytes);
3572
+ *
3573
+ * query = SpanFirstQuery.new(SpanTermQuery.new(:content, "ferret"), 100)
3574
+ *
3575
+ * == NOTE
3576
+ *
3577
+ * SpanFirstQuery only works with other SpanQueries.
3578
+ */
3579
+ static void
3580
+ Init_SpanFirstQuery(void)
3581
+ {
3582
+ cSpanFirstQuery = rb_define_class_under(mSpans, "SpanFirstQuery", cQuery);
3583
+ rb_define_alloc_func(cSpanFirstQuery, frt_data_alloc);
3584
+
3585
+ rb_define_method(cSpanFirstQuery, "initialize", frt_spanfq_init, 2);
3586
+ }
3587
+
3588
+ /*
3589
+ * Document-class: Ferret::Search::Spans::SpanNearQuery
3590
+ *
3591
+ * == Summary
3592
+ *
3593
+ * A SpanNearQuery is like a combination between a PhraseQuery and a
3594
+ * BooleanQuery. It matches sub-SpanQueries which are added as clauses but
3595
+ * those clauses must occur within a +slop+ edit distance of each other. You
3596
+ * can also specify that clauses must occur +in_order+.
3597
+ *
3598
+ * == Example
3599
+ *
3600
+ * query = SpanNearQuery.new(:slop => 2)
3601
+ * query << SpanTermQuery.new(:field, "quick")
3602
+ * query << SpanTermQuery.new(:field, "brown")
3603
+ * query << SpanTermQuery.new(:field, "fox")
3604
+ * # matches => "quick brown speckled sleepy fox"
3605
+ * |______2______^
3606
+ * # matches => "quick brown speckled fox"
3607
+ * |__1__^
3608
+ * # matches => "brown quick _____ fox"
3609
+ * ^_____2_____|
3610
+ *
3611
+ * query = SpanNearQuery.new(:slop => 2, :in_order => true)
3612
+ * query << SpanTermQuery.new(:field, "quick")
3613
+ * query << SpanTermQuery.new(:field, "brown")
3614
+ * query << SpanTermQuery.new(:field, "fox")
3615
+ * # matches => "quick brown speckled sleepy fox"
3616
+ * |______2______^
3617
+ * # matches => "quick brown speckled fox"
3618
+ * |__1__^
3619
+ * # doesn't match => "brown quick _____ fox"
3620
+ * # not in order ^_____2_____|
3621
+ *
3622
+ * == NOTE
3623
+ *
3624
+ * SpanNearQuery only works with other SpanQueries.
3625
+ */
3626
+ static void
3627
+ Init_SpanNearQuery(void)
3628
+ {
3629
+ sym_slop = ID2SYM(rb_intern("slop"));
3630
+ sym_in_order = ID2SYM(rb_intern("in_order"));
3631
+ sym_clauses = ID2SYM(rb_intern("clauses"));
3632
+
3633
+ cSpanNearQuery = rb_define_class_under(mSpans, "SpanNearQuery", cQuery);
3634
+ rb_define_alloc_func(cSpanNearQuery, frt_data_alloc);
3635
+
3636
+ rb_define_method(cSpanNearQuery, "initialize", frt_spannq_init, -1);
3637
+ rb_define_method(cSpanNearQuery, "add", frt_spannq_add, 1);
3638
+ rb_define_method(cSpanNearQuery, "<<", frt_spannq_add, 1);
3639
+ }
3640
+
3641
+ /*
3642
+ * Document-class: Ferret::Search::Spans::SpanOrQuery
3643
+ *
3644
+ * == Summary
3645
+ *
3646
+ * SpanOrQuery is just like a BooleanQuery with all +:should+ clauses.
3647
+ * However, the difference is that all sub-clauses must be SpanQueries and
3648
+ * the resulting query can then be used within other SpanQueries like
3649
+ * SpanNearQuery.
3650
+ *
3651
+ * == Example
3652
+ *
3653
+ * Combined with SpanNearQuery we can create a multi-PhraseQuery like query;
3654
+ *
3655
+ * quick_query = SpanOrQuery.new()
3656
+ * quick_query << SpanTermQuery.new(:field, "quick")
3657
+ * quick_query << SpanTermQuery.new(:field, "fast")
3658
+ * quick_query << SpanTermQuery.new(:field, "speedy")
3659
+ *
3660
+ * colour_query = SpanOrQuery.new()
3661
+ * colour_query << SpanTermQuery.new(:field, "red")
3662
+ * colour_query << SpanTermQuery.new(:field, "brown")
3663
+ *
3664
+ *
3665
+ * query = SpanNearQuery.new(:slop => 2, :in_order => true)
3666
+ * query << quick_query
3667
+ * query << colour_query
3668
+ * query << SpanTermQuery.new(:field, "fox")
3669
+ * # matches => "quick red speckled sleepy fox"
3670
+ * |______2______^
3671
+ * # matches => "speedy brown speckled fox"
3672
+ * |__1__^
3673
+ * # doesn't match => "brown fast _____ fox"
3674
+ * # not in order ^_____2____|
3675
+ *
3676
+ * == NOTE
3677
+ *
3678
+ * SpanOrQuery only works with other SpanQueries.
3679
+ */
3680
+ static void
3681
+ Init_SpanOrQuery(void)
3682
+ {
3683
+ cSpanOrQuery = rb_define_class_under(mSpans, "SpanOrQuery", cQuery);
3684
+ rb_define_alloc_func(cSpanOrQuery, frt_data_alloc);
3685
+
3686
+ rb_define_method(cSpanOrQuery, "initialize", frt_spanoq_init, -1);
3687
+ rb_define_method(cSpanOrQuery, "add", frt_spanoq_add, 1);
3688
+ rb_define_method(cSpanOrQuery, "<<", frt_spanoq_add, 1);
3689
+ }
3690
+
3691
+ /*
3692
+ * Document-class: Ferret::Search::Spans::SpanNotQuery
3693
+ *
3694
+ * == Summary
3695
+ *
3696
+ * SpanNotQuery is like a BooleanQuery with a +:must_not+ clause. The
3697
+ * difference being that the resulting query can be used in another
3698
+ * SpanQuery.
3699
+ *
3700
+ * == Example
3701
+ *
3702
+ * Let's say you wanted to search for all documents with the term "rails"
3703
+ * near the start but without the term "train" near the start. This would
3704
+ * allow the term "train" to occur later on in the document.
3705
+ *
3706
+ * rails_query = SpanFirstQuery.new(SpanTermQuery.new(:content, "rails"), 100)
3707
+ * train_query = SpanFirstQuery.new(SpanTermQuery.new(:content, "train"), 100)
3708
+ * query = SpanNotQuery.new(rails_query, train_query)
3709
+ *
3710
+ * == NOTE
3711
+ *
3712
+ * SpanOrQuery only works with other SpanQueries.
3713
+ */
3714
+ static void
3715
+ Init_SpanNotQuery(void)
3716
+ {
3717
+ cSpanNotQuery = rb_define_class_under(mSpans, "SpanNotQuery", cQuery);
3718
+ rb_define_alloc_func(cSpanNotQuery, frt_data_alloc);
3719
+
3720
+ rb_define_method(cSpanNotQuery, "initialize", frt_spanxq_init, 2);
3721
+ }
3722
+
3723
+ /* rdoc hack
3724
+ extern VALUE mFerret = rb_define_module("Ferret");
3725
+ extern VALUE mSearch = rb_define_module_under(mFerret, "Search");
3726
+ */
3727
+
3728
+ /*
3729
+ * Document-module: Ferret::Search::Spans
3730
+ *
3731
+ * == Summary
3732
+ *
3733
+ * The Spans module contains a number of SpanQueries. SpanQueries, unlike
3734
+ * regular queries, also return the start and end offsets of all of their
3735
+ * matches so they can be used to limit queries to a certain position in the
3736
+ * field. They are often used in combination to perform special types of
3737
+ * PhraseQuery.
3738
+ */
3739
+ static void
3740
+ Init_Spans(void)
3741
+ {
3742
+ mSpans = rb_define_module_under(mSearch, "Spans");
3743
+ Init_SpanTermQuery();
3744
+ Init_SpanMultiTermQuery();
3745
+ Init_SpanPrefixQuery();
3746
+ Init_SpanFirstQuery();
3747
+ Init_SpanNearQuery();
3748
+ Init_SpanOrQuery();
3749
+ Init_SpanNotQuery();
3750
+ }
3751
+
3752
+ /*
3753
+ * Document-class: Ferret::Search::RangeFilter
3754
+ *
3755
+ * == Summary
3756
+ *
3757
+ * RangeFilter filters a set of documents which contain a lexicographical
3758
+ * range of terms (ie "aaa", "aab", "aac", etc). See also RangeQuery
3759
+ *
3760
+ * == Example
3761
+ *
3762
+ * Find all documents created before 5th of September 2002.
3763
+ *
3764
+ * filter = RangeFilter.new(:created_on, :< => "20020905")
3765
+ */
3766
+ static void
3767
+ Init_RangeFilter(void)
3768
+ {
3769
+ cRangeFilter = rb_define_class_under(mSearch, "RangeFilter", cFilter);
3770
+ frt_mark_cclass(cRangeFilter);
3771
+ rb_define_alloc_func(cRangeFilter, frt_data_alloc);
3772
+
3773
+ rb_define_method(cRangeFilter, "initialize", frt_rf_init, 2);
3774
+ }
3775
+
3776
+ /*
3777
+ * Document-class: Ferret::Search::QueryFilter
3778
+ *
3779
+ * == Summary
3780
+ *
3781
+ * QueryFilter can be used to restrict one queries results by another queries
3782
+ * results, basically "and"ing them together. Of course you could easily use
3783
+ * a BooleanQuery to do this. The reason you may choose to use a QueryFilter
3784
+ * is that Filter results are cached so if you have one query that is often
3785
+ * added to other queries you may want to use a QueryFilter for performance
3786
+ * reasons.
3787
+ *
3788
+ * == Example
3789
+ *
3790
+ * Let's say you have a field +:approved+ which you set to yes when a
3791
+ * document is approved for display. You'll probably want to add a Filter
3792
+ * which filters approved documents to display to your users. This is the
3793
+ * perfect use case for a QueryFilter.
3794
+ *
3795
+ * filter = QueryFilter.new(TermQuery.new(:approved, "yes"))
3796
+ *
3797
+ * Just remember to use the same QueryFilter each time to take advantage of
3798
+ * caching. Don't create a new one for each request. Of course, this won't
3799
+ * work in a CGI application.
3800
+ */
3801
+ static void
3802
+ Init_QueryFilter(void)
3803
+ {
3804
+ cQueryFilter = rb_define_class_under(mSearch, "QueryFilter", cFilter);
3805
+ frt_mark_cclass(cQueryFilter);
3806
+ rb_define_alloc_func(cQueryFilter, frt_data_alloc);
3807
+
3808
+ rb_define_method(cQueryFilter, "initialize", frt_qf_init, 1);
3809
+ }
3810
+
3811
+ /*
3812
+ * Document-class: Ferret::Search::Filter
3813
+ *
3814
+ * == Summary
3815
+ *
3816
+ * A Filter is used to filter query results. It is usually passed to one of
3817
+ * Searcher's search methods however it can also be used inside a
3818
+ * ConstantScoreQuery or a FilteredQuery. To implement your own Filter you
3819
+ * must implement the method #get_bitvector(index_reader) which returns a
3820
+ * BitVector with set bits corresponding to documents that are allowed by
3821
+ * this Filter.
3822
+ *
3823
+ * TODO add support for user implemented Filter.
3824
+ * TODO add example of user implemented Filter.
3825
+ */
3826
+ static void
3827
+ Init_Filter(void)
3828
+ {
3829
+ id_bits = rb_intern("bits");
3830
+ cFilter = rb_define_class_under(mSearch, "Filter", rb_cObject);
3831
+ frt_mark_cclass(cFilter);
3832
+ rb_define_alloc_func(cConstantScoreQuery, frt_data_alloc);
3833
+
3834
+ rb_define_method(cFilter, "bits", frt_f_get_bits, 1);
3835
+ rb_define_method(cFilter, "to_s", frt_f_to_s, 0);
3836
+ }
3837
+
3838
+ /*
3839
+ * Document-class: Ferret::Search::SortField
3840
+ *
3841
+ * == Summary
3842
+ *
3843
+ * A SortField is used to sort the result-set of a search be the contents of
3844
+ * a field. The following types of sort_field are available;
3845
+ *
3846
+ * * :auto
3847
+ * * :integer
3848
+ * * :float
3849
+ * * :string
3850
+ * * :byte
3851
+ * * :doc_id
3852
+ * * :score
3853
+ *
3854
+ * The type of the SortField is set by passing it as a parameter to the
3855
+ * constructor. The +:auto+ type specifies that the SortField should detect
3856
+ * the sort type by looking at the data in the field. This is the default
3857
+ * :type value although it is recommended that you explicitly specify the
3858
+ * fields type.
3859
+ *
3860
+ * == Example
3861
+ *
3862
+ * title_sf = SortField.new(:title, :type => :string)
3863
+ * rating_sf = SortField.new(:rating, :type => float, :reverse => true)
3864
+ *
3865
+ *
3866
+ * Note 1: Care should be taken when using the :auto sort-type since numbers
3867
+ * will occur before other strings in the index so if you are sorting a field
3868
+ * with both numbers and strings (like a title field which might have "24"
3869
+ * and "Prison Break") then the sort_field will think it is sorting integers
3870
+ * when it really should be sorting strings.
3871
+ *
3872
+ * Note 2: When sorting by integer, integers are only 4 bytes so anything
3873
+ * larger will cause strange sorting behaviour.
3874
+ */
3875
+ static void
3876
+ Init_SortField(void)
3877
+ {
3878
+ /* option hash keys for SortField#initialize */
3879
+ sym_type = ID2SYM(rb_intern("type"));
3880
+ sym_reverse = ID2SYM(rb_intern("reverse"));
3881
+ sym_comparator = ID2SYM(rb_intern("comparator"));
3882
+
3883
+ /* Sort types */
3884
+ sym_integer = ID2SYM(rb_intern("integer"));
3885
+ sym_float = ID2SYM(rb_intern("float"));
3886
+ sym_string = ID2SYM(rb_intern("string"));
3887
+ sym_auto = ID2SYM(rb_intern("auto"));
3888
+ sym_doc_id = ID2SYM(rb_intern("doc_id"));
3889
+ sym_score = ID2SYM(rb_intern("score"));
3890
+ sym_byte = ID2SYM(rb_intern("byte"));
3891
+
3892
+ cSortField = rb_define_class_under(mSearch, "SortField", rb_cObject);
3893
+ rb_define_alloc_func(cSortField, frt_data_alloc);
3894
+
3895
+ rb_define_method(cSortField, "initialize", frt_sf_init, -1);
3896
+ rb_define_method(cSortField, "reverse?", frt_sf_is_reverse, 0);
3897
+ rb_define_method(cSortField, "name", frt_sf_get_name, 0);
3898
+ rb_define_method(cSortField, "type", frt_sf_get_type, 0);
3899
+ rb_define_method(cSortField, "comparator", frt_sf_get_comparator, 0);
3900
+ rb_define_method(cSortField, "to_s", frt_sf_to_s, 0);
3901
+
3902
+ rb_define_const(cSortField, "SCORE",
3903
+ Data_Wrap_Struct(cSortField, NULL,
3904
+ &frt_deref_free,
3905
+ (SortField *)&SORT_FIELD_SCORE));
3906
+ object_add((SortField *)&SORT_FIELD_SCORE,
3907
+ rb_const_get(cSortField, rb_intern("SCORE")));
3908
+
3909
+ rb_define_const(cSortField, "SCORE_REV",
3910
+ Data_Wrap_Struct(cSortField, NULL,
3911
+ &frt_deref_free,
3912
+ (SortField *)&SORT_FIELD_SCORE_REV));
3913
+ object_add((SortField *)&SORT_FIELD_SCORE_REV,
3914
+ rb_const_get(cSortField, rb_intern("SCORE_REV")));
3915
+
3916
+ rb_define_const(cSortField, "DOC_ID",
3917
+ Data_Wrap_Struct(cSortField, NULL,
3918
+ &frt_deref_free,
3919
+ (SortField *)&SORT_FIELD_DOC));
3920
+
3921
+ oSORT_FIELD_DOC = rb_const_get(cSortField, rb_intern("DOC_ID"));
3922
+ object_add((SortField *)&SORT_FIELD_DOC, oSORT_FIELD_DOC);
3923
+
3924
+ rb_define_const(cSortField, "DOC_ID_REV",
3925
+ Data_Wrap_Struct(cSortField, NULL,
3926
+ &frt_deref_free,
3927
+ (SortField *)&SORT_FIELD_DOC_REV));
3928
+ object_add((SortField *)&SORT_FIELD_DOC_REV,
3929
+ rb_const_get(cSortField, rb_intern("DOC_ID_REV")));
3930
+ }
3931
+
3932
+ /*
3933
+ * Document-class: Ferret::Search::Sort
3934
+ *
3935
+ * == Summary
3936
+ *
3937
+ * A Sort object is used to combine and apply a list of SortFields. The
3938
+ * SortFields are applied in the order they are added to the SortObject.
3939
+ *
3940
+ * == Example
3941
+ *
3942
+ * Here is how you would create a Sort object that sorts first by rating and
3943
+ * then by title;
3944
+ *
3945
+ * sf_rating = SortField.new(:rating, :type => :float, :reverse => true)
3946
+ * sf_title = SortField.new(:title, :type => :string)
3947
+ * sort = Sort.new([sf_rating, sf_title])
3948
+ *
3949
+ * Remember that the :type parameter for SortField is set to :auto be default
3950
+ * be I strongly recommend you specify a :type value.
3951
+ */
3952
+ static void
3953
+ Init_Sort(void)
3954
+ {
3955
+ /* Sort */
3956
+ cSort = rb_define_class_under(mSearch, "Sort", rb_cObject);
3957
+ rb_define_alloc_func(cSort, frt_sort_alloc);
3958
+
3959
+ rb_define_method(cSort, "initialize", frt_sort_init, -1);
3960
+ rb_define_method(cSort, "fields", frt_sort_get_fields, 0);
3961
+ rb_define_method(cSort, "to_s", frt_sort_to_s, 0);
3962
+
3963
+ rb_define_const(cSort, "RELEVANCE",
3964
+ frt_sort_init(0, NULL, frt_sort_alloc(cSort)));
3965
+ rb_define_const(cSort, "INDEX_ORDER",
3966
+ frt_sort_init(1, &oSORT_FIELD_DOC, frt_sort_alloc(cSort)));
3967
+ }
3968
+
3969
+ /*
3970
+ * Document-class: Ferret::Search::Searcher
3971
+ *
3972
+ * == Summary
3973
+ *
3974
+ * The Searcher class basically performs the task that Ferret was built for.
3975
+ * It searches the index. To search the index the Searcher class wraps an
3976
+ * IndexReader so many of the tasks that you can perform on an IndexReader
3977
+ * are also available on a searcher including, most importantly, accessing
3978
+ * stored documents.
3979
+ *
3980
+ * The main methods that you need to know about when using a Searcher are the
3981
+ * search methods. There is the Searcher#search_each method which iterates
3982
+ * through the results by document id and score and there is the
3983
+ * Searcher#search method which returns a TopDocs object. Another important
3984
+ * difference to note is that the Searcher#search_each method normalizes the
3985
+ * score to a value in the range 0.0..1.0 if the max_score is greater than
3986
+ * 1.0. Searcher#search does not. Apart from that they take the same
3987
+ * parameters and work the same way.
3988
+ *
3989
+ * == Example
3990
+ *
3991
+ * searcher = Searcher.new("/path/to/index")
3992
+ *
3993
+ * searcher.search_each(TermQuery.new(:content, "ferret")
3994
+ * :filter => RangeFilter.new(:date, :< => "2006"),
3995
+ * :sort => "date DESC, title") do |doc_id, score|
3996
+ * puts "#{searcher[doc_id][title] scored #{score}"
3997
+ * end
3998
+ */
3999
+ static void
4000
+ Init_Searcher(void)
4001
+ {
4002
+ /* option hash keys for Searcher#search */
4003
+ sym_offset = ID2SYM(rb_intern("offset"));
4004
+ sym_limit = ID2SYM(rb_intern("limit"));
4005
+ sym_all = ID2SYM(rb_intern("all"));
4006
+ sym_filter = ID2SYM(rb_intern("filter"));
4007
+ sym_filter_proc = ID2SYM(rb_intern("filter_proc"));
4008
+ sym_sort = ID2SYM(rb_intern("sort"));
4009
+
4010
+ sym_excerpt_length = ID2SYM(rb_intern("excerpt_length"));
4011
+ sym_num_excerpts = ID2SYM(rb_intern("num_excerpts"));
4012
+ sym_pre_tag = ID2SYM(rb_intern("pre_tag"));
4013
+ sym_post_tag = ID2SYM(rb_intern("post_tag"));
4014
+ sym_ellipsis = ID2SYM(rb_intern("ellipsis"));
4015
+
4016
+ /* Searcher */
4017
+ cSearcher = rb_define_class_under(mSearch, "Searcher", rb_cObject);
4018
+ rb_define_alloc_func(cSearcher, frt_data_alloc);
4019
+
4020
+ rb_define_method(cSearcher, "initialize", frt_sea_init, 1);
4021
+ rb_define_method(cSearcher, "close", frt_sea_close, 0);
4022
+ rb_define_method(cSearcher, "reader", frt_sea_get_reader, 0);
4023
+ rb_define_method(cSearcher, "doc_freq", frt_sea_doc_freq, 2);
4024
+ rb_define_method(cSearcher, "get_document", frt_sea_doc, 1);
4025
+ rb_define_method(cSearcher, "[]", frt_sea_doc, 1);
4026
+ rb_define_method(cSearcher, "max_doc", frt_sea_max_doc, 0);
4027
+ rb_define_method(cSearcher, "search", frt_sea_search, -1);
4028
+ rb_define_method(cSearcher, "search_each", frt_sea_search_each, -1);
4029
+ rb_define_method(cSearcher, "explain", frt_sea_explain, 2);
4030
+ rb_define_method(cSearcher, "highlight", frt_sea_highlight, -1);
4031
+ }
4032
+
4033
+ /*
4034
+ * Document-class: Ferret::Search::MultiSearcher
4035
+ *
4036
+ * == Summary
4037
+ *
4038
+ * See Searcher for the methods that you can use on this object. A
4039
+ * MultiSearcher is used to search multiple sub-searchers. The most efficient
4040
+ * way to do this would be to open up an IndexReader on multiple directories
4041
+ * and creating a Searcher with that. However, if you decide to implement a
4042
+ * RemoteSearcher, the MultiSearcher can be used to search multiple machines
4043
+ * at once.
4044
+ */
4045
+ static void
4046
+ Init_MultiSearcher(void)
4047
+ {
4048
+ cMultiSearcher = rb_define_class_under(mSearch, "MultiSearcher", cSearcher);
4049
+ rb_define_alloc_func(cMultiSearcher, frt_data_alloc);
4050
+ rb_define_method(cMultiSearcher, "initialize", frt_ms_init, -1);
4051
+ }
4052
+
4053
+ /*
4054
+ * Document-module: Ferret::Search
4055
+ *
4056
+ * == Summary
4057
+ *
4058
+ * The Search module contains all the classes used for searching the index;
4059
+ * what Ferret was designed to do. The important classes to take a look at in
4060
+ * this module are (in order);
4061
+ *
4062
+ * * Query
4063
+ * * Searcher
4064
+ * * Filter
4065
+ * * Sort
4066
+ *
4067
+ * Happy Ferreting!!
4068
+ */
4069
+ void
4070
+ Init_Search(void)
4071
+ {
4072
+ mSearch = rb_define_module_under(mFerret, "Search");
4073
+
4074
+ Init_Hit();
4075
+ Init_TopDocs();
4076
+ Init_Explanation();
4077
+
4078
+ /* Queries */
4079
+ Init_Query();
4080
+
4081
+ Init_TermQuery();
4082
+ Init_MultiTermQuery();
4083
+ Init_BooleanQuery();
4084
+ Init_RangeQuery();
4085
+ Init_PhraseQuery();
4086
+ Init_PrefixQuery();
4087
+ Init_WildcardQuery();
4088
+ Init_FuzzyQuery();
4089
+ Init_MatchAllQuery();
4090
+ Init_ConstantScoreQuery();
4091
+ Init_FilteredQuery();
4092
+
4093
+ Init_Spans();
4094
+
4095
+ /* Filters */
4096
+ Init_Filter();
4097
+ Init_RangeFilter();
4098
+ Init_QueryFilter();
4099
+
4100
+ /* Sorting */
4101
+ Init_SortField(); /* must be before Init_Sort */
4102
+ Init_Sort();
4103
+
4104
+ /* Searchers */
4105
+ Init_Searcher();
4106
+ Init_MultiSearcher();
4107
+ }