sdsykes-ferret 0.11.6.19

Sign up to get free protection for your applications and to get access to all the features.
Files changed (195) hide show
  1. data/CHANGELOG +24 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README +102 -0
  4. data/Rakefile +338 -0
  5. data/TODO +17 -0
  6. data/TUTORIAL +231 -0
  7. data/bin/ferret-browser +79 -0
  8. data/ext/analysis.c +1555 -0
  9. data/ext/analysis.h +219 -0
  10. data/ext/api.c +69 -0
  11. data/ext/api.h +27 -0
  12. data/ext/array.c +123 -0
  13. data/ext/array.h +53 -0
  14. data/ext/bitvector.c +540 -0
  15. data/ext/bitvector.h +272 -0
  16. data/ext/compound_io.c +383 -0
  17. data/ext/config.h +42 -0
  18. data/ext/document.c +156 -0
  19. data/ext/document.h +53 -0
  20. data/ext/except.c +120 -0
  21. data/ext/except.h +168 -0
  22. data/ext/extconf.rb +14 -0
  23. data/ext/ferret.c +402 -0
  24. data/ext/ferret.h +91 -0
  25. data/ext/filter.c +156 -0
  26. data/ext/fs_store.c +483 -0
  27. data/ext/global.c +418 -0
  28. data/ext/global.h +117 -0
  29. data/ext/hash.c +567 -0
  30. data/ext/hash.h +473 -0
  31. data/ext/hashset.c +170 -0
  32. data/ext/hashset.h +187 -0
  33. data/ext/header.h +58 -0
  34. data/ext/helper.c +62 -0
  35. data/ext/helper.h +13 -0
  36. data/ext/inc/lang.h +48 -0
  37. data/ext/inc/threading.h +31 -0
  38. data/ext/index.c +6425 -0
  39. data/ext/index.h +961 -0
  40. data/ext/lang.h +66 -0
  41. data/ext/libstemmer.c +92 -0
  42. data/ext/libstemmer.h +79 -0
  43. data/ext/mempool.c +87 -0
  44. data/ext/mempool.h +35 -0
  45. data/ext/modules.h +162 -0
  46. data/ext/multimapper.c +310 -0
  47. data/ext/multimapper.h +51 -0
  48. data/ext/posh.c +1006 -0
  49. data/ext/posh.h +1007 -0
  50. data/ext/priorityqueue.c +151 -0
  51. data/ext/priorityqueue.h +143 -0
  52. data/ext/q_boolean.c +1608 -0
  53. data/ext/q_const_score.c +161 -0
  54. data/ext/q_filtered_query.c +209 -0
  55. data/ext/q_fuzzy.c +268 -0
  56. data/ext/q_match_all.c +148 -0
  57. data/ext/q_multi_term.c +677 -0
  58. data/ext/q_parser.c +2825 -0
  59. data/ext/q_phrase.c +1126 -0
  60. data/ext/q_prefix.c +100 -0
  61. data/ext/q_range.c +350 -0
  62. data/ext/q_span.c +2402 -0
  63. data/ext/q_term.c +337 -0
  64. data/ext/q_wildcard.c +171 -0
  65. data/ext/r_analysis.c +2575 -0
  66. data/ext/r_index.c +3472 -0
  67. data/ext/r_qparser.c +585 -0
  68. data/ext/r_search.c +4105 -0
  69. data/ext/r_store.c +513 -0
  70. data/ext/r_utils.c +963 -0
  71. data/ext/ram_store.c +471 -0
  72. data/ext/search.c +1741 -0
  73. data/ext/search.h +885 -0
  74. data/ext/similarity.c +150 -0
  75. data/ext/similarity.h +82 -0
  76. data/ext/sort.c +983 -0
  77. data/ext/stem_ISO_8859_1_danish.c +338 -0
  78. data/ext/stem_ISO_8859_1_danish.h +16 -0
  79. data/ext/stem_ISO_8859_1_dutch.c +635 -0
  80. data/ext/stem_ISO_8859_1_dutch.h +16 -0
  81. data/ext/stem_ISO_8859_1_english.c +1156 -0
  82. data/ext/stem_ISO_8859_1_english.h +16 -0
  83. data/ext/stem_ISO_8859_1_finnish.c +792 -0
  84. data/ext/stem_ISO_8859_1_finnish.h +16 -0
  85. data/ext/stem_ISO_8859_1_french.c +1276 -0
  86. data/ext/stem_ISO_8859_1_french.h +16 -0
  87. data/ext/stem_ISO_8859_1_german.c +512 -0
  88. data/ext/stem_ISO_8859_1_german.h +16 -0
  89. data/ext/stem_ISO_8859_1_italian.c +1091 -0
  90. data/ext/stem_ISO_8859_1_italian.h +16 -0
  91. data/ext/stem_ISO_8859_1_norwegian.c +296 -0
  92. data/ext/stem_ISO_8859_1_norwegian.h +16 -0
  93. data/ext/stem_ISO_8859_1_porter.c +776 -0
  94. data/ext/stem_ISO_8859_1_porter.h +16 -0
  95. data/ext/stem_ISO_8859_1_portuguese.c +1035 -0
  96. data/ext/stem_ISO_8859_1_portuguese.h +16 -0
  97. data/ext/stem_ISO_8859_1_spanish.c +1119 -0
  98. data/ext/stem_ISO_8859_1_spanish.h +16 -0
  99. data/ext/stem_ISO_8859_1_swedish.c +307 -0
  100. data/ext/stem_ISO_8859_1_swedish.h +16 -0
  101. data/ext/stem_KOI8_R_russian.c +701 -0
  102. data/ext/stem_KOI8_R_russian.h +16 -0
  103. data/ext/stem_UTF_8_danish.c +344 -0
  104. data/ext/stem_UTF_8_danish.h +16 -0
  105. data/ext/stem_UTF_8_dutch.c +653 -0
  106. data/ext/stem_UTF_8_dutch.h +16 -0
  107. data/ext/stem_UTF_8_english.c +1176 -0
  108. data/ext/stem_UTF_8_english.h +16 -0
  109. data/ext/stem_UTF_8_finnish.c +808 -0
  110. data/ext/stem_UTF_8_finnish.h +16 -0
  111. data/ext/stem_UTF_8_french.c +1296 -0
  112. data/ext/stem_UTF_8_french.h +16 -0
  113. data/ext/stem_UTF_8_german.c +526 -0
  114. data/ext/stem_UTF_8_german.h +16 -0
  115. data/ext/stem_UTF_8_italian.c +1113 -0
  116. data/ext/stem_UTF_8_italian.h +16 -0
  117. data/ext/stem_UTF_8_norwegian.c +302 -0
  118. data/ext/stem_UTF_8_norwegian.h +16 -0
  119. data/ext/stem_UTF_8_porter.c +794 -0
  120. data/ext/stem_UTF_8_porter.h +16 -0
  121. data/ext/stem_UTF_8_portuguese.c +1055 -0
  122. data/ext/stem_UTF_8_portuguese.h +16 -0
  123. data/ext/stem_UTF_8_russian.c +709 -0
  124. data/ext/stem_UTF_8_russian.h +16 -0
  125. data/ext/stem_UTF_8_spanish.c +1137 -0
  126. data/ext/stem_UTF_8_spanish.h +16 -0
  127. data/ext/stem_UTF_8_swedish.c +313 -0
  128. data/ext/stem_UTF_8_swedish.h +16 -0
  129. data/ext/stopwords.c +401 -0
  130. data/ext/store.c +692 -0
  131. data/ext/store.h +777 -0
  132. data/ext/term_vectors.c +352 -0
  133. data/ext/threading.h +31 -0
  134. data/ext/utilities.c +446 -0
  135. data/ext/win32.h +54 -0
  136. data/lib/ferret.rb +29 -0
  137. data/lib/ferret/browser.rb +246 -0
  138. data/lib/ferret/browser/s/global.js +192 -0
  139. data/lib/ferret/browser/s/style.css +148 -0
  140. data/lib/ferret/browser/views/document/list.rhtml +49 -0
  141. data/lib/ferret/browser/views/document/show.rhtml +27 -0
  142. data/lib/ferret/browser/views/error/index.rhtml +7 -0
  143. data/lib/ferret/browser/views/help/index.rhtml +8 -0
  144. data/lib/ferret/browser/views/home/index.rhtml +29 -0
  145. data/lib/ferret/browser/views/layout.rhtml +22 -0
  146. data/lib/ferret/browser/views/term-vector/index.rhtml +4 -0
  147. data/lib/ferret/browser/views/term/index.rhtml +199 -0
  148. data/lib/ferret/browser/views/term/termdocs.rhtml +1 -0
  149. data/lib/ferret/browser/webrick.rb +14 -0
  150. data/lib/ferret/document.rb +130 -0
  151. data/lib/ferret/field_infos.rb +44 -0
  152. data/lib/ferret/index.rb +786 -0
  153. data/lib/ferret/number_tools.rb +157 -0
  154. data/lib/ferret_version.rb +3 -0
  155. data/setup.rb +1555 -0
  156. data/test/test_all.rb +5 -0
  157. data/test/test_helper.rb +24 -0
  158. data/test/threading/number_to_spoken.rb +132 -0
  159. data/test/threading/thread_safety_index_test.rb +79 -0
  160. data/test/threading/thread_safety_read_write_test.rb +76 -0
  161. data/test/threading/thread_safety_test.rb +133 -0
  162. data/test/unit/analysis/tc_analyzer.rb +548 -0
  163. data/test/unit/analysis/tc_token_stream.rb +646 -0
  164. data/test/unit/index/tc_index.rb +762 -0
  165. data/test/unit/index/tc_index_reader.rb +699 -0
  166. data/test/unit/index/tc_index_writer.rb +437 -0
  167. data/test/unit/index/th_doc.rb +315 -0
  168. data/test/unit/largefile/tc_largefile.rb +46 -0
  169. data/test/unit/query_parser/tc_query_parser.rb +238 -0
  170. data/test/unit/search/tc_filter.rb +135 -0
  171. data/test/unit/search/tc_fuzzy_query.rb +147 -0
  172. data/test/unit/search/tc_index_searcher.rb +61 -0
  173. data/test/unit/search/tc_multi_searcher.rb +128 -0
  174. data/test/unit/search/tc_multiple_search_requests.rb +58 -0
  175. data/test/unit/search/tc_search_and_sort.rb +179 -0
  176. data/test/unit/search/tc_sort.rb +49 -0
  177. data/test/unit/search/tc_sort_field.rb +27 -0
  178. data/test/unit/search/tc_spans.rb +190 -0
  179. data/test/unit/search/tm_searcher.rb +384 -0
  180. data/test/unit/store/tc_fs_store.rb +77 -0
  181. data/test/unit/store/tc_ram_store.rb +35 -0
  182. data/test/unit/store/tm_store.rb +34 -0
  183. data/test/unit/store/tm_store_lock.rb +68 -0
  184. data/test/unit/tc_document.rb +81 -0
  185. data/test/unit/ts_analysis.rb +2 -0
  186. data/test/unit/ts_index.rb +2 -0
  187. data/test/unit/ts_largefile.rb +4 -0
  188. data/test/unit/ts_query_parser.rb +2 -0
  189. data/test/unit/ts_search.rb +2 -0
  190. data/test/unit/ts_store.rb +2 -0
  191. data/test/unit/ts_utils.rb +2 -0
  192. data/test/unit/utils/tc_bit_vector.rb +295 -0
  193. data/test/unit/utils/tc_number_tools.rb +117 -0
  194. data/test/unit/utils/tc_priority_queue.rb +106 -0
  195. metadata +285 -0
@@ -0,0 +1,4105 @@
1
+ #include "ferret.h"
2
+ #include <st.h>
3
+ #include <rubysig.h>
4
+ #include <ctype.h>
5
+ #include <array.h>
6
+ #include "search.h"
7
+
8
+ VALUE mSearch;
9
+
10
+ static VALUE cHit;
11
+ static VALUE cTopDocs;
12
+ static VALUE cExplanation;
13
+ static VALUE cSearcher;
14
+ static VALUE cMultiSearcher;
15
+ static VALUE cSortField;
16
+ static VALUE cSort;
17
+
18
+ /* Queries */
19
+ static VALUE cQuery;
20
+ static VALUE cTermQuery;
21
+ static VALUE cMultiTermQuery;
22
+ static VALUE cBooleanQuery;
23
+ static VALUE cBooleanClause;
24
+ static VALUE cRangeQuery;
25
+ static VALUE cPhraseQuery;
26
+ static VALUE cPrefixQuery;
27
+ static VALUE cWildcardQuery;
28
+ static VALUE cFuzzyQuery;
29
+ static VALUE cMatchAllQuery;
30
+ static VALUE cConstantScoreQuery;
31
+ static VALUE cFilteredQuery;
32
+ static VALUE cSpanTermQuery;
33
+ static VALUE cSpanMultiTermQuery;
34
+ static VALUE cSpanPrefixQuery;
35
+ static VALUE cSpanFirstQuery;
36
+ static VALUE cSpanNearQuery;
37
+ static VALUE cSpanOrQuery;
38
+ static VALUE cSpanNotQuery;
39
+
40
+ /* Filters */
41
+ static ID id_bits;
42
+ static VALUE cFilter;
43
+ static VALUE cRangeFilter;
44
+ static VALUE cQueryFilter;
45
+
46
+ /* MultiTermQuery */
47
+ static ID id_default_max_terms;
48
+ static VALUE sym_max_terms;
49
+ static VALUE sym_min_score;
50
+
51
+ /** Option hash keys **/
52
+ /* BooleanClause */
53
+ static VALUE sym_should;
54
+ static VALUE sym_must;
55
+ static VALUE sym_must_not;
56
+
57
+ /* RangeQuery */
58
+ static VALUE sym_upper;
59
+ static VALUE sym_lower;
60
+ static VALUE sym_include_upper;
61
+ static VALUE sym_include_lower;
62
+ static VALUE sym_upper_exclusive;
63
+ static VALUE sym_lower_exclusive;
64
+
65
+ static VALUE sym_less_than;
66
+ static VALUE sym_less_than_or_equal_to;
67
+ static VALUE sym_greater_than;
68
+ static VALUE sym_greater_than_or_equal_to;
69
+
70
+ /* FuzzyQuery */
71
+ static VALUE sym_min_similarity;
72
+ static VALUE sym_prefix_length;
73
+
74
+ /* SpanNearQuery */
75
+ static VALUE sym_slop;
76
+ static VALUE sym_in_order;
77
+ static VALUE sym_clauses;
78
+
79
+ /* Class variable ids */
80
+ static ID id_default_min_similarity;
81
+ static ID id_default_prefix_length;
82
+
83
+
84
+ /** Sort **/
85
+ static VALUE oSORT_FIELD_DOC;
86
+
87
+ /* Sort types */
88
+ static VALUE sym_integer;
89
+ static VALUE sym_float;
90
+ static VALUE sym_string;
91
+ static VALUE sym_auto;
92
+ static VALUE sym_doc_id;
93
+ static VALUE sym_score;
94
+ static VALUE sym_byte;
95
+
96
+ /* Sort params */
97
+ static VALUE sym_type;
98
+ static VALUE sym_reverse;
99
+ static VALUE sym_comparator;
100
+
101
+ /* Hits */
102
+ static ID id_doc;
103
+ static ID id_score;
104
+
105
+ /* TopDocs */
106
+ static ID id_hits;
107
+ static ID id_total_hits;
108
+ static ID id_max_score;
109
+ static ID id_searcher;
110
+
111
+ /* Search */
112
+ static VALUE sym_offset;
113
+ static VALUE sym_limit;
114
+ static VALUE sym_all;
115
+ static VALUE sym_sort;
116
+ static VALUE sym_filter;
117
+ static VALUE sym_filter_proc;
118
+
119
+ static VALUE sym_excerpt_length;
120
+ static VALUE sym_num_excerpts;
121
+ static VALUE sym_pre_tag;
122
+ static VALUE sym_post_tag;
123
+ static VALUE sym_ellipsis;
124
+
125
+ extern VALUE cIndexReader;
126
+ extern void frt_ir_free(void *p);
127
+ extern void frt_ir_mark(void *p);
128
+
129
+ extern void frt_set_term(VALUE rterm, Term *t);
130
+ extern VALUE frt_get_analyzer(Analyzer *a);
131
+ extern HashSet *frt_get_fields(VALUE rfields);
132
+ extern Analyzer *frt_get_cwrapped_analyzer(VALUE ranalyzer);
133
+ extern VALUE frt_get_lazy_doc(LazyDoc *lazy_doc);
134
+
135
+ /****************************************************************************
136
+ *
137
+ * Hit Methods
138
+ *
139
+ ****************************************************************************/
140
+
141
+ static VALUE
142
+ frt_get_hit(Hit *hit)
143
+ {
144
+ return rb_struct_new(cHit,
145
+ INT2FIX(hit->doc),
146
+ rb_float_new((double)hit->score),
147
+ NULL);
148
+ }
149
+
150
+ /****************************************************************************
151
+ *
152
+ * TopDocs Methods
153
+ *
154
+ ****************************************************************************/
155
+
156
+ static VALUE
157
+ frt_get_td(TopDocs *td, VALUE rsearcher)
158
+ {
159
+ int i;
160
+ VALUE rtop_docs;
161
+ VALUE hit_ary = rb_ary_new2(td->size);
162
+
163
+ for (i = 0; i < td->size; i++) {
164
+ rb_ary_store(hit_ary, i, frt_get_hit(td->hits[i]));
165
+ }
166
+
167
+ rtop_docs = rb_struct_new(cTopDocs,
168
+ INT2FIX(td->total_hits),
169
+ hit_ary,
170
+ rb_float_new((double)td->max_score),
171
+ rsearcher,
172
+ NULL);
173
+ td_destroy(td);
174
+ return rtop_docs;
175
+ }
176
+
177
+ /*
178
+ * call-seq:
179
+ * top_doc.to_s(field = :id) -> string
180
+ *
181
+ * Returns a string representation of the top_doc in readable format.
182
+ */
183
+ static VALUE
184
+ frt_td_to_s(int argc, VALUE *argv, VALUE self)
185
+ {
186
+ int i;
187
+ VALUE rhits = rb_funcall(self, id_hits, 0);
188
+ Searcher *sea = (Searcher *)DATA_PTR(rb_funcall(self, id_searcher, 0));
189
+ const int len = RARRAY_LEN(rhits);
190
+ char *str = ALLOC_N(char, len * 64 + 100);
191
+ char *s = str;
192
+ char *field = "id";
193
+ VALUE rstr;
194
+
195
+ if (argc) {
196
+ field = frt_field(argv[0]);
197
+ }
198
+
199
+ sprintf(s, "TopDocs: total_hits = %d, max_score = %f [\n",
200
+ FIX2INT(rb_funcall(self, id_total_hits, 0)),
201
+ NUM2DBL(rb_funcall(self, id_max_score, 0)));
202
+ s += strlen(s);
203
+
204
+ for (i = 0; i < len; i++) {
205
+ VALUE rhit = RARRAY_PTR(rhits)[i];
206
+ int doc_id = FIX2INT(rb_funcall(rhit, id_doc, 0));
207
+ char *value = "";
208
+ LazyDoc *lzd = sea->get_lazy_doc(sea, doc_id);
209
+ LazyDocField *lzdf = h_get(lzd->field_dict, field);
210
+ if (NULL != lzdf) {
211
+ value = lazy_df_get_data(lzdf, 0);
212
+ }
213
+
214
+ sprintf(s, "\t%d \"%s\": %f\n", doc_id, value,
215
+ NUM2DBL(rb_funcall(rhit, id_score, 0)));
216
+ s += strlen(s);
217
+ lazy_doc_close(lzd);
218
+ }
219
+
220
+ sprintf(s, "]\n");
221
+ rstr = rb_str_new2(str);
222
+ free(str);
223
+ return rstr;
224
+ }
225
+
226
+ static INLINE char *
227
+ frt_lzd_load_to_json(LazyDoc *lzd, char **str, char *s, int *slen)
228
+ {
229
+ int i, j;
230
+ int diff = s - *str;
231
+ int len = diff, l;
232
+ LazyDocField *f;
233
+
234
+ for (i = 0; i < lzd->size; i++) {
235
+ f = lzd->fields[i];
236
+ /* 3 times length of field to make space for quoted quotes ('"') and
237
+ * 4 times field elements to make space for '"' around fields and ','
238
+ * between fields. Add 100 for '[', ']' and good safety.
239
+ */
240
+ len += strlen(f->name) + f->len * 3 + 100 + 4 * f->size;
241
+ }
242
+
243
+ if (len > *slen) {
244
+ while (len > *slen) *slen = *slen << 1;
245
+ REALLOC_N(*str, char, *slen);
246
+ s = *str + diff;
247
+ }
248
+
249
+ for (i = 0; i < lzd->size; i++) {
250
+ f = lzd->fields[i];
251
+ if (i) *(s++) = ',';
252
+ *(s++) = '"';
253
+ l = strlen(f->name);
254
+ memcpy(s, f->name, l);
255
+ s += l;
256
+ *(s++) = '"';
257
+ *(s++) = ':';
258
+ if (f->size > 1) *(s++) = '[';
259
+ for (j = 0; j < f->size; j++) {
260
+ if (j) *(s++) = ',';
261
+ s = json_concat_string(s, lazy_df_get_data(f, j));
262
+ }
263
+ if (f->size > 1) *(s++) = ']';
264
+ }
265
+ return s;
266
+ }
267
+
268
+ /*
269
+ * call-seq:
270
+ * top_doc.to_json() -> string
271
+ *
272
+ * Returns a json representation of the top_doc.
273
+ */
274
+ static VALUE
275
+ frt_td_to_json(VALUE self)
276
+ {
277
+ int i;
278
+ VALUE rhits = rb_funcall(self, id_hits, 0);
279
+ VALUE rhit;
280
+ LazyDoc *lzd;
281
+ Searcher *sea = (Searcher *)DATA_PTR(rb_funcall(self, id_searcher, 0));
282
+ const int num_hits = RARRAY_LEN(rhits);
283
+ int doc_id;
284
+ int len = 32768;
285
+ char *str = ALLOC_N(char, len);
286
+ char *s = str;
287
+ VALUE rstr;
288
+
289
+ *(s++) = '[';
290
+ for (i = 0; i < num_hits; i++) {
291
+ if (i) *(s++) = ',';
292
+ *(s++) = '{';
293
+ rhit = RARRAY_PTR(rhits)[i];
294
+ doc_id = FIX2INT(rb_funcall(rhit, id_doc, 0));
295
+ lzd = sea->get_lazy_doc(sea, doc_id);
296
+ s = frt_lzd_load_to_json(lzd, &str, s, &len);
297
+ lazy_doc_close(lzd);
298
+ *(s++) = '}';
299
+ }
300
+ *(s++) = ']';
301
+ *(s++) = '\0';
302
+ rstr = rb_str_new2(str);
303
+ free(str);
304
+ return rstr;
305
+ }
306
+
307
+
308
+ /****************************************************************************
309
+ *
310
+ * Explanation Methods
311
+ *
312
+ ****************************************************************************/
313
+
314
+ #define GET_EXPL() Explanation *expl = (Explanation *)DATA_PTR(self)
315
+
316
+ /*
317
+ * call-seq:
318
+ * explanation.to_s -> string
319
+ *
320
+ * Returns a string representation of the explanation in readable format.
321
+ */
322
+ static VALUE
323
+ frt_expl_to_s(VALUE self)
324
+ {
325
+ GET_EXPL();
326
+ char *str = expl_to_s(expl);
327
+ VALUE rstr = rb_str_new2(str);
328
+ free(str);
329
+ return rstr;
330
+ }
331
+
332
+ /*
333
+ * call-seq:
334
+ * explanation.to_html -> string
335
+ *
336
+ * Returns an html representation of the explanation in readable format.
337
+ */
338
+ static VALUE
339
+ frt_expl_to_html(VALUE self)
340
+ {
341
+ GET_EXPL();
342
+ char *str = expl_to_html(expl);
343
+ VALUE rstr = rb_str_new2(str);
344
+ free(str);
345
+ return rstr;
346
+ }
347
+
348
+ /*
349
+ * call-seq:
350
+ * explanation.score -> float
351
+ *
352
+ * Returns the score represented by the query. This can be used for debugging
353
+ * purposes mainly to check that the score returned by the explanation
354
+ * matches that of the score for the document in the original query.
355
+ */
356
+ static VALUE
357
+ frt_expl_score(VALUE self)
358
+ {
359
+ GET_EXPL();
360
+ return rb_float_new((double)expl->value);
361
+ }
362
+
363
+ /****************************************************************************
364
+ *
365
+ * Query Methods
366
+ *
367
+ ****************************************************************************/
368
+
369
+ static void
370
+ frt_q_free(void *p)
371
+ {
372
+ object_del(p);
373
+ q_deref((Query *)p);
374
+ }
375
+
376
+ #define GET_Q() Query *q = (Query *)DATA_PTR(self)
377
+
378
+ /*
379
+ * call-seq:
380
+ * query.to_s -> string
381
+ *
382
+ * Return a string representation of the query. Most of the time, passing
383
+ * this string through the Query parser will give you the exact Query you
384
+ * began with. This can be a good way to explore how the QueryParser works.
385
+ */
386
+ static VALUE
387
+ frt_q_to_s(int argc, VALUE *argv, VALUE self)
388
+ {
389
+ GET_Q();
390
+ VALUE rstr, rfield;
391
+ char *str, *field = "";
392
+ if (rb_scan_args(argc, argv, "01", &rfield)) {
393
+ field = frt_field(rfield);
394
+ }
395
+ str = q->to_s(q, field);
396
+ rstr = rb_str_new2(str);
397
+ free(str);
398
+ return rstr;
399
+ }
400
+
401
+ /*
402
+ * call-seq:
403
+ * query.boost
404
+ *
405
+ * Returns the queries boost value. See the Query description for more
406
+ * information on Query boosts.
407
+ */
408
+ static VALUE
409
+ frt_q_get_boost(VALUE self)
410
+ {
411
+ GET_Q();
412
+ return rb_float_new((double)q->boost);
413
+ }
414
+
415
+ /*
416
+ * call-seq:
417
+ * query.boost = boost -> boost
418
+ *
419
+ * Set the boost for a query. See the Query description for more information
420
+ * on Query boosts.
421
+ */
422
+ static VALUE
423
+ frt_q_set_boost(VALUE self, VALUE rboost)
424
+ {
425
+ GET_Q();
426
+ q->boost = (float)NUM2DBL(rboost);
427
+ return rboost;
428
+ }
429
+
430
+ /*
431
+ * call-seq:
432
+ * query.hash -> number
433
+ *
434
+ * Return a hash value for the query. This is used for caching query results
435
+ * in a hash object.
436
+ */
437
+ static VALUE
438
+ frt_q_hash(VALUE self)
439
+ {
440
+ GET_Q();
441
+ return INT2FIX(q->hash(q));
442
+ }
443
+
444
+ /*
445
+ * call-seq;
446
+ * query.eql?(other_query) -> bool
447
+ * query == other_query -> bool
448
+ *
449
+ * Return true if +query+ equals +other_query+. Theoretically, two queries are
450
+ * equal if the always return the same results, no matter what the contents
451
+ * of the index. Practically, however, this is difficult to implement
452
+ * efficiently for queries like BooleanQuery since the ordering of clauses
453
+ * unspecified. "Ruby AND Rails" will not match "Rails AND Ruby" for example,
454
+ * although their result sets will be identical. Most queries should match as
455
+ * expected however.
456
+ */
457
+ static VALUE
458
+ frt_q_eql(VALUE self, VALUE other)
459
+ {
460
+ GET_Q();
461
+ Query *oq;
462
+ Data_Get_Struct(other, Query, oq);
463
+ return q->eq(q, oq) ? Qtrue : Qfalse;
464
+ }
465
+
466
+ /*
467
+ * call-seq:
468
+ * query.terms(searcher) -> term_array
469
+ *
470
+ * Returns an array of terms searched for by this query. This can be used for
471
+ * implementing an external query highlighter for example. You must supply a
472
+ * searcher so that the query can be rewritten and optimized like it would be
473
+ * in a real search.
474
+ */
475
+ static VALUE
476
+ frt_q_get_terms(VALUE self, VALUE searcher)
477
+ {
478
+ int i;
479
+ VALUE rterms = rb_ary_new();
480
+ HashSet *terms = term_set_new();
481
+ GET_Q();
482
+ Searcher *sea = (Searcher *)DATA_PTR(searcher);
483
+ Query *rq = sea->rewrite(sea, q);
484
+ rq->extract_terms(rq, terms);
485
+ q_deref(rq);
486
+ for (i = 0; i < terms->size; i++) {
487
+ Term *term = (Term *)terms->elems[i];
488
+ rb_ary_push(rterms, frt_get_term(term->field, term->text));
489
+ }
490
+ hs_destroy(terms);
491
+ return rterms;
492
+ }
493
+
494
+ #define MK_QUERY(klass, q) Data_Wrap_Struct(klass, NULL, &frt_q_free, q)
495
+ VALUE
496
+ frt_get_q(Query *q)
497
+ {
498
+ VALUE self = object_get(q);
499
+
500
+ if (self == Qnil) {
501
+ switch (q->type) {
502
+ case TERM_QUERY:
503
+ self = MK_QUERY(cTermQuery, q);
504
+ break;
505
+ case MULTI_TERM_QUERY:
506
+ self = MK_QUERY(cMultiTermQuery, q);
507
+ break;
508
+ case BOOLEAN_QUERY:
509
+ self = MK_QUERY(cBooleanQuery, q);
510
+ break;
511
+ case PHRASE_QUERY:
512
+ self = MK_QUERY(cPhraseQuery, q);
513
+ break;
514
+ case CONSTANT_QUERY:
515
+ self = MK_QUERY(cConstantScoreQuery, q);
516
+ break;
517
+ case FILTERED_QUERY:
518
+ self = MK_QUERY(cFilteredQuery, q);
519
+ break;
520
+ case MATCH_ALL_QUERY:
521
+ self = MK_QUERY(cMatchAllQuery, q);
522
+ break;
523
+ case RANGE_QUERY:
524
+ self = MK_QUERY(cRangeQuery, q);
525
+ break;
526
+ case WILD_CARD_QUERY:
527
+ self = MK_QUERY(cWildcardQuery, q);
528
+ break;
529
+ case FUZZY_QUERY:
530
+ self = MK_QUERY(cFuzzyQuery, q);
531
+ break;
532
+ case PREFIX_QUERY:
533
+ self = MK_QUERY(cPrefixQuery, q);
534
+ break;
535
+ case SPAN_TERM_QUERY:
536
+ self = MK_QUERY(cSpanMultiTermQuery, q);
537
+ break;
538
+ case SPAN_MULTI_TERM_QUERY:
539
+ self = MK_QUERY(cSpanPrefixQuery, q);
540
+ break;
541
+ case SPAN_PREFIX_QUERY:
542
+ self = MK_QUERY(cSpanTermQuery, q);
543
+ break;
544
+ case SPAN_FIRST_QUERY:
545
+ self = MK_QUERY(cSpanFirstQuery, q);
546
+ break;
547
+ case SPAN_OR_QUERY:
548
+ self = MK_QUERY(cSpanOrQuery, q);
549
+ break;
550
+ case SPAN_NOT_QUERY:
551
+ self = MK_QUERY(cSpanNotQuery, q);
552
+ break;
553
+ case SPAN_NEAR_QUERY:
554
+ self = MK_QUERY(cSpanNearQuery, q);
555
+ break;
556
+ default:
557
+ rb_raise(rb_eArgError, "Unknown query type");
558
+ break;
559
+ }
560
+ object_add(q, self);
561
+ }
562
+ return self;
563
+ }
564
+
565
+ /****************************************************************************
566
+ *
567
+ * TermQuery Methods
568
+ *
569
+ ****************************************************************************/
570
+
571
+ /*
572
+ * call-seq:
573
+ * TermQuery.new(field, term) -> term_query
574
+ *
575
+ * Create a new TermQuery object which will match all documents with the term
576
+ * +term+ in the field +field+.
577
+ *
578
+ * Note: As usual, field should be a symbol
579
+ */
580
+ static VALUE
581
+ frt_tq_init(VALUE self, VALUE rfield, VALUE rterm)
582
+ {
583
+ char *field = frt_field(rfield);
584
+ char *term = rs2s(rb_obj_as_string(rterm));
585
+ Query *q = tq_new(field, term);
586
+ Frt_Wrap_Struct(self, NULL, &frt_q_free, q);
587
+ object_add(q, self);
588
+ return self;
589
+ }
590
+
591
+ /****************************************************************************
592
+ *
593
+ * MultiTermQuery Methods
594
+ *
595
+ ****************************************************************************/
596
+
597
+ /*
598
+ * call-seq:
599
+ * MultiTermQuery.default_max_terms -> number
600
+ *
601
+ * Get the default value for +:max_terms+ in a MultiTermQuery. This value is
602
+ * also used by PrefixQuery, FuzzyQuery and WildcardQuery.
603
+ */
604
+ static VALUE
605
+ frt_mtq_get_dmt(VALUE self)
606
+ {
607
+ return rb_cvar_get(cMultiTermQuery, id_default_max_terms);
608
+ }
609
+
610
+ /*
611
+ * call-seq:
612
+ * MultiTermQuery.default_max_terms = max_terms -> max_terms
613
+ *
614
+ * Set the default value for +:max_terms+ in a MultiTermQuery. This value is
615
+ * also used by PrefixQuery, FuzzyQuery and WildcardQuery.
616
+ */
617
+ static VALUE
618
+ frt_mtq_set_dmt(VALUE self, VALUE rnum_terms)
619
+ {
620
+ int max_terms = FIX2INT(rnum_terms);
621
+ if (max_terms <= 0) {
622
+ rb_raise(rb_eArgError,
623
+ "%d <= 0. @@max_terms must be > 0", max_terms);
624
+ }
625
+ rb_cvar_set(cMultiTermQuery, id_default_max_terms, rnum_terms);
626
+ return rnum_terms;
627
+ }
628
+
629
+ /*
630
+ * call-seq:
631
+ * MultiTermQuery.new(field, options = {}) -> multi_term_query
632
+ *
633
+ * Create a new MultiTermQuery on field +field+. You will also need to add
634
+ * terms to the query using the MultiTermQuery#add_term method.
635
+ *
636
+ * There are several options available to you when creating a
637
+ * MultiTermQueries;
638
+ *
639
+ * === Options
640
+ *
641
+ * :max_terms:: You can specify the maximum number of terms that can be
642
+ * added to the query. This is to prevent memory usage overflow,
643
+ * particularly when don't directly control the addition of
644
+ * terms to the Query object like when you create Wildcard
645
+ * queries. For example, searching for "content:*" would cause
646
+ * problems without this limit.
647
+ * :min_score:: The minimum score a term must have to be added to the query.
648
+ * For example you could implement your own wild-card queries
649
+ * that gives matches a score. To limit the number of terms
650
+ * added to the query you could set a lower limit to this score.
651
+ * FuzzyQuery in particular makes use of this parameter.
652
+ */
653
+ static VALUE
654
+ frt_mtq_init(int argc, VALUE *argv, VALUE self)
655
+ {
656
+ VALUE rfield, roptions;
657
+ float min_score = 0.0;
658
+ int max_terms = FIX2INT(frt_mtq_get_dmt(self));
659
+ Query *q;
660
+
661
+ if (rb_scan_args(argc, argv, "11", &rfield, &roptions) == 2) {
662
+ VALUE v;
663
+ if (Qnil != (v = rb_hash_aref(roptions, sym_max_terms))) {
664
+ max_terms = FIX2INT(v);
665
+ }
666
+ if (Qnil != (v = rb_hash_aref(roptions, sym_min_score))) {
667
+ min_score = (float)NUM2DBL(v);
668
+ }
669
+ }
670
+ q = multi_tq_new_conf(frt_field(rfield), max_terms, min_score);
671
+ Frt_Wrap_Struct(self, NULL, &frt_q_free, q);
672
+ object_add(q, self);
673
+ return self;
674
+ }
675
+
676
+ /*
677
+ * call-seq:
678
+ * multi_term_query.add_term(term, score = 1.0) -> self
679
+ * multi_term_query << term1 << term2 << term3 -> self
680
+ *
681
+ * Add a term to the MultiTermQuery with the score 1.0 unless specified
682
+ * otherwise.
683
+ */
684
+ static VALUE
685
+ frt_mtq_add_term(int argc, VALUE *argv, VALUE self)
686
+ {
687
+ GET_Q();
688
+ VALUE rterm, rboost;
689
+ float boost = 1.0;
690
+ char *term = NULL;
691
+ if (rb_scan_args(argc, argv, "11", &rterm, &rboost) == 2) {
692
+ boost = (float)NUM2DBL(rboost);
693
+ }
694
+ term = StringValuePtr(rterm);
695
+ multi_tq_add_term_boost(q, term, boost);
696
+
697
+ return self;
698
+ }
699
+
700
+ typedef Query *(*mtq_maker_ft)(const char *field, const char *term);
701
+
702
+ static VALUE
703
+ frt_mtq_init_specific(int argc, VALUE *argv, VALUE self, mtq_maker_ft mm)
704
+ {
705
+ VALUE rfield, rterm, rmax_terms;
706
+ int max_terms =
707
+ FIX2INT(rb_cvar_get(cMultiTermQuery, id_default_max_terms));
708
+ Query *q;
709
+
710
+ if (rb_scan_args(argc, argv, "21", &rfield, &rterm, &rmax_terms) == 3) {
711
+ max_terms = FIX2INT(rmax_terms);
712
+ }
713
+
714
+ q = (*mm)(frt_field(rfield), StringValuePtr(rterm));
715
+ MTQMaxTerms(q) = max_terms;
716
+ Frt_Wrap_Struct(self, NULL, &frt_q_free, q);
717
+ object_add(q, self);
718
+ return self;
719
+ }
720
+
721
+ /****************************************************************************
722
+ *
723
+ * BooleanClause Methods
724
+ *
725
+ ****************************************************************************/
726
+
727
+ static void
728
+ frt_bc_mark(void *p)
729
+ {
730
+ frt_gc_mark(((BooleanClause *)p)->query);
731
+ }
732
+
733
+ static void
734
+ frt_bc_free(void *p)
735
+ {
736
+ object_del(p);
737
+ bc_deref((BooleanClause *)p);
738
+ }
739
+
740
+ static VALUE
741
+ frt_bc_wrap(BooleanClause *bc)
742
+ {
743
+ VALUE self = Data_Wrap_Struct(cBooleanClause, &frt_bc_mark, &frt_bc_free, bc);
744
+ REF(bc);
745
+ object_add(bc, self);
746
+ return self;
747
+ }
748
+
749
+ static enum BC_TYPE
750
+ frt_get_occur(VALUE roccur)
751
+ {
752
+ enum BC_TYPE occur = BC_SHOULD;
753
+
754
+ if (roccur == sym_should) {
755
+ occur = BC_SHOULD;
756
+ } else if (roccur == sym_must) {
757
+ occur = BC_MUST;
758
+ } else if (roccur == sym_must_not) {
759
+ occur = BC_MUST_NOT;
760
+ } else {
761
+ rb_raise(rb_eArgError, "occur argument must be one of [:must, "
762
+ ":should, :must_not]");
763
+ }
764
+ return occur;
765
+ }
766
+
767
+ /*
768
+ * call-seq:
769
+ * BooleanClause.new(query, occur = :should) -> BooleanClause
770
+ *
771
+ * Create a new BooleanClause object, wrapping the query +query+. +occur+
772
+ * must be one of +:must+, +:should+ or +:must_not+.
773
+ */
774
+ static VALUE
775
+ frt_bc_init(int argc, VALUE *argv, VALUE self)
776
+ {
777
+ BooleanClause *bc;
778
+ VALUE rquery, roccur;
779
+ unsigned int occur = BC_SHOULD;
780
+ Query *sub_q;
781
+ if (rb_scan_args(argc, argv, "11", &rquery, &roccur) == 2) {
782
+ occur = frt_get_occur(roccur);
783
+ }
784
+ Data_Get_Struct(rquery, Query, sub_q);
785
+ REF(sub_q);
786
+ bc = bc_new(sub_q, occur);
787
+ Frt_Wrap_Struct(self, &frt_bc_mark, &frt_bc_free, bc);
788
+ object_add(bc, self);
789
+ return self;
790
+ }
791
+
792
+ #define GET_BC() BooleanClause *bc = (BooleanClause *)DATA_PTR(self)
793
+ /*
794
+ * call-seq:
795
+ * clause.query -> query
796
+ *
797
+ * Return the query object wrapped by this BooleanClause.
798
+ */
799
+ static VALUE
800
+ frt_bc_get_query(VALUE self)
801
+ {
802
+ GET_BC();
803
+ return object_get(bc->query);
804
+ }
805
+
806
+ /*
807
+ * call-seq:
808
+ * clause.query = query -> query
809
+ *
810
+ * Set the query wrapped by this BooleanClause.
811
+ */
812
+ static VALUE
813
+ frt_bc_set_query(VALUE self, VALUE rquery)
814
+ {
815
+ GET_BC();
816
+ Data_Get_Struct(rquery, Query, bc->query);
817
+ return rquery;
818
+ }
819
+
820
+ /*
821
+ * call-seq:
822
+ * clause.required? -> bool
823
+ *
824
+ * Return true if this clause is required. ie, this will be true if occur was
825
+ * equal to +:must+.
826
+ */
827
+ static VALUE
828
+ frt_bc_is_required(VALUE self)
829
+ {
830
+ GET_BC();
831
+ return bc->is_required ? Qtrue : Qfalse;
832
+ }
833
+
834
+ /*
835
+ * call-seq:
836
+ * clause.prohibited? -> bool
837
+ *
838
+ * Return true if this clause is prohibited. ie, this will be true if occur was
839
+ * equal to +:must_not+.
840
+ */
841
+ static VALUE
842
+ frt_bc_is_prohibited(VALUE self)
843
+ {
844
+ GET_BC();
845
+ return bc->is_prohibited ? Qtrue : Qfalse;
846
+ }
847
+
848
+ /*
849
+ * call-seq:
850
+ * clause.occur = occur -> occur
851
+ *
852
+ * Set the +occur+ value for this BooleanClause. +occur+ must be one of
853
+ * +:must+, +:should+ or +:must_not+.
854
+ */
855
+ static VALUE
856
+ frt_bc_set_occur(VALUE self, VALUE roccur)
857
+ {
858
+ GET_BC();
859
+ enum BC_TYPE occur = frt_get_occur(roccur);
860
+ bc_set_occur(bc, occur);
861
+
862
+ return roccur;
863
+ }
864
+
865
+ /*
866
+ * call-seq:
867
+ * clause.to_s -> string
868
+ *
869
+ * Return a string representation of this clause. This will not be used by
870
+ * BooleanQuery#to_s. It is only used by BooleanClause#to_s and will specify
871
+ * whether the clause is +:must+, +:should+ or +:must_not+.
872
+ */
873
+ static VALUE
874
+ frt_bc_to_s(VALUE self)
875
+ {
876
+ VALUE rstr;
877
+ char *qstr, *ostr = "", *str;
878
+ int len;
879
+ GET_BC();
880
+ qstr = bc->query->to_s(bc->query, "");
881
+ switch (bc->occur) {
882
+ case BC_SHOULD:
883
+ ostr = "Should";
884
+ break;
885
+ case BC_MUST:
886
+ ostr = "Must";
887
+ break;
888
+ case BC_MUST_NOT:
889
+ ostr = "Must Not";
890
+ break;
891
+ }
892
+ len = strlen(ostr) + strlen(qstr) + 2;
893
+ str = ALLOC_N(char, len);
894
+ sprintf(str, "%s:%s", ostr, qstr);
895
+ rstr = rb_str_new(str, len);
896
+ free(qstr);
897
+ free(str);
898
+ return rstr;
899
+ }
900
+
901
+ /****************************************************************************
902
+ *
903
+ * BooleanQuery Methods
904
+ *
905
+ ****************************************************************************/
906
+
907
+ static void
908
+ frt_bq_mark(void *p)
909
+ {
910
+ int i;
911
+ Query *q = (Query *)p;
912
+ BooleanQuery *bq = (BooleanQuery *)q;
913
+ for (i = 0; i < bq->clause_cnt; i++) {
914
+ frt_gc_mark(bq->clauses[i]);
915
+ }
916
+ }
917
+
918
+ /*
919
+ * call-seq:
920
+ * BooleanQuery.new(coord_disable = false)
921
+ *
922
+ * Create a new BooleanQuery. If you don't care about the scores of the
923
+ * sub-queries added to the query (as would be the case for many
924
+ * automatically generated queries) you can disable the coord_factor of the
925
+ * score. This will slightly improve performance for the query. Usually you
926
+ * should leave this parameter as is.
927
+ */
928
+ static VALUE
929
+ frt_bq_init(int argc, VALUE *argv, VALUE self)
930
+ {
931
+ VALUE rcoord_disabled;
932
+ bool coord_disabled = false;
933
+ Query *q;
934
+ if (rb_scan_args(argc, argv, "01", &rcoord_disabled)) {
935
+ coord_disabled = RTEST(rcoord_disabled);
936
+ }
937
+ q = bq_new(coord_disabled);
938
+ Frt_Wrap_Struct(self, &frt_bq_mark, &frt_q_free, q);
939
+ object_add(q, self);
940
+ return self;
941
+ }
942
+
943
+ /*
944
+ * call-seq:
945
+ * boolean_query.add_query(query, occur = :should) -> boolean_clause
946
+ * boolean_query.<<(query, occur = :should) -> boolean_clause
947
+ * boolean_query << boolean_clause -> boolean_clause
948
+ *
949
+ * Us this method to add sub-queries to a BooleanQuery. You can either add
950
+ * a straight Query or a BooleanClause. When adding a Query, the default
951
+ * occurrence requirement is :should. That is the Query's match will be
952
+ * scored but it isn't essential for a match. If the query should be
953
+ * essential, use :must. For exclusive queries use :must_not.
954
+ *
955
+ * When adding a Boolean clause to a BooleanQuery there is no need to set the
956
+ * occurrence property because it is already set in the BooleanClause.
957
+ * Therefor the +occur+ parameter will be ignored in this case.
958
+ *
959
+ * query:: Query to add to the BooleanQuery
960
+ * occur:: occurrence requirement for the query being added. Must be one of
961
+ * [:must, :should, :must_not]
962
+ * returns:: BooleanClause which was added
963
+ */
964
+ static VALUE
965
+ frt_bq_add_query(int argc, VALUE *argv, VALUE self)
966
+ {
967
+ GET_Q();
968
+ VALUE rquery, roccur;
969
+ enum BC_TYPE occur = BC_SHOULD;
970
+ Query *sub_q;
971
+ VALUE klass;
972
+
973
+ if (rb_scan_args(argc, argv, "11", &rquery, &roccur) == 2) {
974
+ occur = frt_get_occur(roccur);
975
+ }
976
+ klass = CLASS_OF(rquery);
977
+ if (klass == cBooleanClause) {
978
+ BooleanClause *bc = (BooleanClause *)DATA_PTR(rquery);
979
+ if (argc > 1) {
980
+ rb_warning("Second argument to BooleanQuery#add is ignored "
981
+ "when adding BooleanClause");
982
+ }
983
+ bq_add_clause(q, bc);
984
+ return rquery;
985
+ } else if (TYPE(rquery) == T_DATA) {
986
+ Data_Get_Struct(rquery, Query, sub_q);
987
+ return frt_bc_wrap(bq_add_query(q, sub_q, occur));
988
+ } else {
989
+ rb_raise(rb_eArgError, "Cannot add %s to a BooleanQuery",
990
+ rb_class2name(klass));
991
+ }
992
+ return self;
993
+ }
994
+
995
+ /****************************************************************************
996
+ *
997
+ * RangeQuery Methods
998
+ *
999
+ ****************************************************************************/
1000
+
1001
+ static void
1002
+ get_range_params(VALUE roptions, char **lterm, char **uterm,
1003
+ bool *include_lower, bool *include_upper)
1004
+ {
1005
+ VALUE v;
1006
+ Check_Type(roptions, T_HASH);
1007
+ if (Qnil != (v = rb_hash_aref(roptions, sym_lower))) {
1008
+ *lterm = StringValuePtr(v);
1009
+ *include_lower = true;
1010
+ }
1011
+ if (Qnil != (v = rb_hash_aref(roptions, sym_upper))) {
1012
+ *uterm = StringValuePtr(v);
1013
+ *include_upper = true;
1014
+ }
1015
+ if (Qnil != (v = rb_hash_aref(roptions, sym_lower_exclusive))) {
1016
+ *lterm = StringValuePtr(v);
1017
+ *include_lower = false;
1018
+ }
1019
+ if (Qnil != (v = rb_hash_aref(roptions, sym_upper_exclusive))) {
1020
+ *uterm = StringValuePtr(v);
1021
+ *include_upper = false;
1022
+ }
1023
+ if (Qnil != (v = rb_hash_aref(roptions, sym_include_lower))) {
1024
+ *include_lower = RTEST(v);
1025
+ }
1026
+ if (Qnil != (v = rb_hash_aref(roptions, sym_include_upper))) {
1027
+ *include_upper = RTEST(v);
1028
+ }
1029
+ if (Qnil != (v = rb_hash_aref(roptions, sym_greater_than))) {
1030
+ *lterm = StringValuePtr(v);
1031
+ *include_lower = false;
1032
+ }
1033
+ if (Qnil != (v = rb_hash_aref(roptions, sym_greater_than_or_equal_to))) {
1034
+ *lterm = StringValuePtr(v);
1035
+ *include_lower = true;
1036
+ }
1037
+ if (Qnil != (v = rb_hash_aref(roptions, sym_less_than))) {
1038
+ *uterm = StringValuePtr(v);
1039
+ *include_upper = false;
1040
+ }
1041
+ if (Qnil != (v = rb_hash_aref(roptions, sym_less_than_or_equal_to))) {
1042
+ *uterm = StringValuePtr(v);
1043
+ *include_upper = true;
1044
+ }
1045
+ if (!*lterm && !*uterm) {
1046
+ rb_raise(rb_eArgError,
1047
+ "The bounds of a range should not both be nil");
1048
+ }
1049
+ if (*include_lower && !*lterm) {
1050
+ rb_raise(rb_eArgError,
1051
+ "The lower bound should not be nil if it is inclusive");
1052
+ }
1053
+ if (*include_upper && !*uterm) {
1054
+ rb_raise(rb_eArgError,
1055
+ "The upper bound should not be nil if it is inclusive");
1056
+ }
1057
+ if (*uterm && *lterm && (strcmp(*uterm, *lterm) < 0)) {
1058
+ rb_raise(rb_eArgError,
1059
+ "The upper bound should greater than the lower bound."
1060
+ " %s > %s", *lterm, *uterm);
1061
+ }
1062
+ }
1063
+
1064
+ /*
1065
+ * call-seq:
1066
+ * RangeQuery.new(field, options = {}) -> range_query
1067
+ *
1068
+ * Create a new RangeQuery on field +field+. There are two ways to build a
1069
+ * range query. With the old-style options; +:lower+, +:upper+,
1070
+ * +:include_lower+ and +:include_upper+ or the new style options; +:<+,
1071
+ * +:<=+, +:>+ and +:>=+. The options' names should speak for themselves.
1072
+ * In the old-style options, limits are inclusive by default.
1073
+ *
1074
+ * == Examples
1075
+ *
1076
+ * q = RangeQuery.new(:date, :lower => "200501", :include_lower => false)
1077
+ * # is equivalent to
1078
+ * q = RangeQuery.new(:date, :< => "200501")
1079
+ * # is equivalent to
1080
+ * q = RangeQuery.new(:date, :lower_exclusive => "200501")
1081
+ *
1082
+ * q = RangeQuery.new(:date, :lower => "200501", :upper => 200502)
1083
+ * # is equivalent to
1084
+ * q = RangeQuery.new(:date, :>= => "200501", :<= => 200502)
1085
+ */
1086
+ static VALUE
1087
+ frt_rq_init(VALUE self, VALUE rfield, VALUE roptions)
1088
+ {
1089
+ Query *q;
1090
+ char *lterm = NULL;
1091
+ char *uterm = NULL;
1092
+ bool include_lower = false;
1093
+ bool include_upper = false;
1094
+
1095
+ get_range_params(roptions, &lterm, &uterm, &include_lower, &include_upper);
1096
+ q = rq_new(frt_field(rfield),
1097
+ lterm, uterm,
1098
+ include_lower, include_upper);
1099
+ Frt_Wrap_Struct(self, NULL, &frt_q_free, q);
1100
+ object_add(q, self);
1101
+ return self;
1102
+ }
1103
+
1104
+ /****************************************************************************
1105
+ *
1106
+ * PhraseQuery Methods
1107
+ *
1108
+ ****************************************************************************/
1109
+
1110
+ /*
1111
+ * call-seq:
1112
+ * PhraseQuery.new(field, slop = 0) -> phrase_query
1113
+ *
1114
+ * Create a new PhraseQuery on the field +field+. You need to add terms to
1115
+ * the query it will do anything of value. See PhraseQuery#add_term.
1116
+ */
1117
+ static VALUE
1118
+ frt_phq_init(int argc, VALUE *argv, VALUE self)
1119
+ {
1120
+ VALUE rfield, rslop;
1121
+ Query *q;
1122
+ rb_scan_args(argc, argv, "11", &rfield, &rslop);
1123
+ q = phq_new(frt_field(rfield));
1124
+ if (argc == 2) {
1125
+ ((PhraseQuery *)q)->slop = FIX2INT(rslop);
1126
+ }
1127
+ Frt_Wrap_Struct(self, NULL, &frt_q_free, q);
1128
+ object_add(q, self);
1129
+ return self;
1130
+ }
1131
+
1132
+ /*
1133
+ * call-seq:
1134
+ * phrase_query.add_term(term, position_increment = 1) -> phrase_query
1135
+ * phrase_query << term -> phrase_query
1136
+ *
1137
+ * Add a term to the phrase query. By default the position_increment is set
1138
+ * to 1 so each term you add is expected to come directly after the previous
1139
+ * term. By setting position_increment to 2 you are specifying that the term
1140
+ * you just added should occur two terms after the previous term. For
1141
+ * example;
1142
+ *
1143
+ * phrase_query.add_term("big").add_term("house", 2)
1144
+ * # matches => "big brick house"
1145
+ * # matches => "big red house"
1146
+ * # doesn't match => "big house"
1147
+ */
1148
+ static VALUE
1149
+ frt_phq_add(int argc, VALUE *argv, VALUE self)
1150
+ {
1151
+ VALUE rterm, rpos_inc;
1152
+ int pos_inc = 1;
1153
+ GET_Q();
1154
+ if (rb_scan_args(argc, argv, "11", &rterm, &rpos_inc) == 2) {
1155
+ pos_inc = FIX2INT(rpos_inc);
1156
+ }
1157
+ switch (TYPE(rterm)) {
1158
+ case T_STRING:
1159
+ {
1160
+ phq_add_term(q, StringValuePtr(rterm), pos_inc);
1161
+ break;
1162
+ }
1163
+ case T_ARRAY:
1164
+ {
1165
+ int i;
1166
+ char *t;
1167
+ if (RARRAY_LEN(rterm) < 1) {
1168
+ rb_raise(rb_eArgError, "Cannot add empty array to a "
1169
+ "PhraseQuery. You must add either a string or "
1170
+ "an array of strings");
1171
+ }
1172
+ t = StringValuePtr(RARRAY_PTR(rterm)[0]);
1173
+ phq_add_term(q, t, pos_inc);
1174
+ for (i = 1; i < RARRAY_LEN(rterm); i++) {
1175
+ t = StringValuePtr(RARRAY_PTR(rterm)[i]);
1176
+ phq_append_multi_term(q, t);
1177
+ }
1178
+ break;
1179
+ }
1180
+ default:
1181
+ rb_raise(rb_eArgError, "You can only add a string or an array of "
1182
+ "strings to a PhraseQuery, not a %s\n",
1183
+ rs2s(rb_obj_as_string(rterm)));
1184
+ }
1185
+ return self;
1186
+ }
1187
+
1188
+ /*
1189
+ * call-seq:
1190
+ * phrase_query.slop -> integer
1191
+ *
1192
+ * Return the slop set for this phrase query. See the PhraseQuery
1193
+ * description for more information on slop
1194
+ */
1195
+ static VALUE
1196
+ frt_phq_get_slop(VALUE self)
1197
+ {
1198
+ GET_Q();
1199
+ return INT2FIX(((PhraseQuery *)q)->slop);
1200
+ }
1201
+
1202
+ /*
1203
+ * call-seq:
1204
+ * phrase_query.slop = slop -> slop
1205
+ *
1206
+ * Set the slop set for this phrase query. See the PhraseQuery description
1207
+ * for more information on slop
1208
+ */
1209
+ static VALUE
1210
+ frt_phq_set_slop(VALUE self, VALUE rslop)
1211
+ {
1212
+ GET_Q();
1213
+ ((PhraseQuery *)q)->slop = FIX2INT(rslop);
1214
+ return self;
1215
+ }
1216
+
1217
+ /****************************************************************************
1218
+ *
1219
+ * PrefixQuery Methods
1220
+ *
1221
+ ****************************************************************************/
1222
+
1223
+ /*
1224
+ * call-seq:
1225
+ * PrefixQuery.new(field, prefix, options = {}) -> prefix-query
1226
+ *
1227
+ * Create a new PrefixQuery to search for all terms with the prefix +prefix+
1228
+ * in the field +field+. There is one option that you can set to change the
1229
+ * behaviour of this query. +:max_terms+ specifies the maximum number of
1230
+ * terms to be added to the query when it is expanded into a MultiTermQuery.
1231
+ * Let's say for example you search an index with a million terms for all
1232
+ * terms beginning with the letter "s". You would end up with a very large
1233
+ * query which would use a lot of memory and take a long time to get results,
1234
+ * not to mention that it would probably match every document in the index.
1235
+ * To prevent queries like this crashing your application you can set
1236
+ * +:max_terms+ which limits the number of terms that get added to the query.
1237
+ * By default it is set to 512.
1238
+ */
1239
+ static VALUE
1240
+ frt_prq_init(int argc, VALUE *argv, VALUE self)
1241
+ {
1242
+ return frt_mtq_init_specific(argc, argv, self, &prefixq_new);
1243
+ }
1244
+
1245
+ /****************************************************************************
1246
+ *
1247
+ * WildcardQuery Methods
1248
+ *
1249
+ ****************************************************************************/
1250
+
1251
+ /*
1252
+ * call-seq:
1253
+ * WildcardQuery.new(field, pattern, options = {}) -> wild-card-query
1254
+ *
1255
+ * Create a new WildcardQuery to search for all terms where the pattern
1256
+ * +pattern+ matches in the field +field+.
1257
+ *
1258
+ * There is one option that you can set to change the behaviour of this
1259
+ * query. +:max_terms+ specifies the maximum number of terms to be added to
1260
+ * the query when it is expanded into a MultiTermQuery. Let's say for
1261
+ * example you have a million terms in your index and you let your users do
1262
+ * wild-card queries and one runs a search for "*". You would end up with a
1263
+ * very large query which would use a lot of memory and take a long time to
1264
+ * get results, not to mention that it would probably match every document in
1265
+ * the index. To prevent queries like this crashing your application you can
1266
+ * set +:max_terms+ which limits the number of terms that get added to the
1267
+ * query. By default it is set to 512.
1268
+ */
1269
+ static VALUE
1270
+ frt_wcq_init(int argc, VALUE *argv, VALUE self)
1271
+ {
1272
+ return frt_mtq_init_specific(argc, argv, self, &wcq_new);
1273
+ }
1274
+
1275
+ /****************************************************************************
1276
+ *
1277
+ * FuzzyQuery Methods
1278
+ *
1279
+ ****************************************************************************/
1280
+
1281
+ /*
1282
+ * call-seq:
1283
+ * FuzzyQuery.new(field, term, options = {}) -> fuzzy-query
1284
+ *
1285
+ * Create a new FuzzyQuery that will match terms with a similarity of at
1286
+ * least +:min_similarity+ to +term+. Similarity is scored using the
1287
+ * Levenshtein edit distance formula. See
1288
+ * http://en.wikipedia.org/wiki/Levenshtein_distance
1289
+ *
1290
+ * If a +:prefix_length+ > 0 is specified, a common prefix of that length is
1291
+ * also required.
1292
+ *
1293
+ * You can also set +:max_terms+ to prevent memory overflow problems. By
1294
+ * default it is set to 512.
1295
+ *
1296
+ * == Example
1297
+ *
1298
+ * FuzzyQuery.new(:content, "levenshtein",
1299
+ * :min_similarity => 0.8,
1300
+ * :prefix_length => 5,
1301
+ * :max_terms => 1024)
1302
+ *
1303
+ * field:: field to search
1304
+ * term:: term to search for including it's close matches
1305
+ * :min_similarity:: Default: 0.5. minimum levenshtein distance score for a
1306
+ * match
1307
+ * :prefix_length:: Default: 0. minimum prefix_match before levenshtein
1308
+ * distance is measured. This parameter is used to improve
1309
+ * performance. With a +:prefix_length+ of 0, all terms in
1310
+ * the index must be checked which can be quite a
1311
+ * performance hit. By setting the prefix length to a
1312
+ * larger number you minimize the number of terms that need
1313
+ * to be checked. Even 1 will cut down the work by a
1314
+ * factor of about 26 depending on your character set and
1315
+ * the first letter.
1316
+ * :max_terms:: Limits the number of terms that can be added to the
1317
+ * query when it is expanded as a MultiTermQuery. This is
1318
+ * not usually a problem with FuzzyQueries unless you set
1319
+ * +:min_similarity+ to a very low value.
1320
+ */
1321
+ static VALUE
1322
+ frt_fq_init(int argc, VALUE *argv, VALUE self)
1323
+ {
1324
+ Query *q;
1325
+ VALUE rfield, rterm, roptions;
1326
+ float min_sim =
1327
+ (float)NUM2DBL(rb_cvar_get(cFuzzyQuery, id_default_min_similarity));
1328
+ int pre_len =
1329
+ FIX2INT(rb_cvar_get(cFuzzyQuery, id_default_prefix_length));
1330
+ int max_terms =
1331
+ FIX2INT(rb_cvar_get(cMultiTermQuery, id_default_max_terms));
1332
+
1333
+
1334
+ if (rb_scan_args(argc, argv, "21", &rfield, &rterm, &roptions) >= 3) {
1335
+ VALUE v;
1336
+ Check_Type(roptions, T_HASH);
1337
+ if (Qnil != (v = rb_hash_aref(roptions, sym_prefix_length))) {
1338
+ pre_len = FIX2INT(v);
1339
+ }
1340
+ if (Qnil != (v = rb_hash_aref(roptions, sym_min_similarity))) {
1341
+ min_sim = (float)NUM2DBL(v);
1342
+ }
1343
+ if (Qnil != (v = rb_hash_aref(roptions, sym_max_terms))) {
1344
+ max_terms = FIX2INT(v);
1345
+ }
1346
+ }
1347
+
1348
+ if (min_sim >= 1.0) {
1349
+ rb_raise(rb_eArgError,
1350
+ "%f >= 1.0. :min_similarity must be < 1.0", min_sim);
1351
+ } else if (min_sim < 0.0) {
1352
+ rb_raise(rb_eArgError,
1353
+ "%f < 0.0. :min_similarity must be > 0.0", min_sim);
1354
+ }
1355
+ if (pre_len < 0) {
1356
+ rb_raise(rb_eArgError,
1357
+ "%d < 0. :prefix_length must be >= 0", pre_len);
1358
+ }
1359
+ if (max_terms < 0) {
1360
+ rb_raise(rb_eArgError,
1361
+ "%d < 0. :max_terms must be >= 0", max_terms);
1362
+ }
1363
+
1364
+ q = fuzq_new_conf(frt_field(rfield), StringValuePtr(rterm),
1365
+ min_sim, pre_len, max_terms);
1366
+ Frt_Wrap_Struct(self, NULL, &frt_q_free, q);
1367
+ object_add(q, self);
1368
+ return self;
1369
+ }
1370
+
1371
+ /*
1372
+ * call-seq:
1373
+ * FuzzyQuery.prefix_length -> prefix_length
1374
+ *
1375
+ * Get the +:prefix_length+ for the query.
1376
+ */
1377
+ static VALUE
1378
+ frt_fq_pre_len(VALUE self)
1379
+ {
1380
+ GET_Q();
1381
+ return INT2FIX(((FuzzyQuery *)q)->pre_len);
1382
+ }
1383
+
1384
+ /*
1385
+ * call-seq:
1386
+ * FuzzyQuery.min_similarity -> min_similarity
1387
+ *
1388
+ * Get the +:min_similarity+ for the query.
1389
+ */
1390
+ static VALUE
1391
+ frt_fq_min_sim(VALUE self)
1392
+ {
1393
+ GET_Q();
1394
+ return rb_float_new((double)((FuzzyQuery *)q)->min_sim);
1395
+ }
1396
+
1397
+ /*
1398
+ * call-seq:
1399
+ * FuzzyQuery.default_min_similarity -> number
1400
+ *
1401
+ * Get the default value for +:min_similarity+
1402
+ */
1403
+ static VALUE
1404
+ frt_fq_get_dms(VALUE self)
1405
+ {
1406
+ return rb_cvar_get(cFuzzyQuery, id_default_min_similarity);
1407
+ }
1408
+
1409
+ extern float qp_default_fuzzy_min_sim;
1410
+ /*
1411
+ * call-seq:
1412
+ * FuzzyQuery.default_min_similarity = min_sim -> min_sim
1413
+ *
1414
+ * Set the default value for +:min_similarity+
1415
+ */
1416
+ static VALUE
1417
+ frt_fq_set_dms(VALUE self, VALUE val)
1418
+ {
1419
+ double min_sim = NUM2DBL(val);
1420
+ if (min_sim >= 1.0) {
1421
+ rb_raise(rb_eArgError,
1422
+ "%f >= 1.0. :min_similarity must be < 1.0", min_sim);
1423
+ } else if (min_sim < 0.0) {
1424
+ rb_raise(rb_eArgError,
1425
+ "%f < 0.0. :min_similarity must be > 0.0", min_sim);
1426
+ }
1427
+ qp_default_fuzzy_min_sim = (float)min_sim;
1428
+ rb_cvar_set(cFuzzyQuery, id_default_min_similarity, val);
1429
+ return val;
1430
+ }
1431
+
1432
+ /*
1433
+ * call-seq:
1434
+ * FuzzyQuery.default_prefix_length -> number
1435
+ *
1436
+ * Get the default value for +:prefix_length+
1437
+ */
1438
+ static VALUE
1439
+ frt_fq_get_dpl(VALUE self)
1440
+ {
1441
+ return rb_cvar_get(cFuzzyQuery, id_default_prefix_length);
1442
+ }
1443
+
1444
+ extern int qp_default_fuzzy_pre_len;
1445
+ /*
1446
+ * call-seq:
1447
+ * FuzzyQuery.default_prefix_length = prefix_length -> prefix_length
1448
+ *
1449
+ * Set the default value for +:prefix_length+
1450
+ */
1451
+ static VALUE
1452
+ frt_fq_set_dpl(VALUE self, VALUE val)
1453
+ {
1454
+ int pre_len = FIX2INT(val);
1455
+ if (pre_len < 0) {
1456
+ rb_raise(rb_eArgError,
1457
+ "%d < 0. :prefix_length must be >= 0", pre_len);
1458
+ }
1459
+ qp_default_fuzzy_pre_len = pre_len;
1460
+ rb_cvar_set(cFuzzyQuery, id_default_prefix_length, val);
1461
+ return val;
1462
+ }
1463
+
1464
+
1465
+ /****************************************************************************
1466
+ *
1467
+ * MatchAllQuery Methods
1468
+ *
1469
+ ****************************************************************************/
1470
+
1471
+ static VALUE
1472
+ frt_maq_alloc(VALUE klass)
1473
+ {
1474
+ Query *q = maq_new();
1475
+ VALUE self = Data_Wrap_Struct(klass, NULL, &frt_q_free, q);
1476
+ object_add(q, self);
1477
+ return self;
1478
+ }
1479
+
1480
+ /*
1481
+ * call-seq:
1482
+ * MatchAllQuery.new -> query
1483
+ *
1484
+ * Create a query which matches all documents.
1485
+ */
1486
+ static VALUE
1487
+ frt_maq_init(VALUE self)
1488
+ {
1489
+ return self;
1490
+ }
1491
+
1492
+ /****************************************************************************
1493
+ *
1494
+ * ConstantScoreQuery Methods
1495
+ *
1496
+ ****************************************************************************/
1497
+
1498
+ /*
1499
+ * call-seq:
1500
+ * ConstantScoreQuery.new(filter) -> query
1501
+ *
1502
+ * Create a ConstantScoreQuery which uses +filter+ to match documents giving
1503
+ * each document a constant score.
1504
+ */
1505
+ static VALUE
1506
+ frt_csq_init(VALUE self, VALUE rfilter)
1507
+ {
1508
+ Query *q;
1509
+ Filter *filter;
1510
+ Data_Get_Struct(rfilter, Filter, filter);
1511
+ q = csq_new(filter);
1512
+
1513
+ Frt_Wrap_Struct(self, NULL, &frt_q_free, q);
1514
+ object_add(q, self);
1515
+ return self;
1516
+ }
1517
+
1518
+ /****************************************************************************
1519
+ *
1520
+ * FilteredQuery Methods
1521
+ *
1522
+ ****************************************************************************/
1523
+
1524
+ static void
1525
+ frt_fqq_mark(void *p)
1526
+ {
1527
+ FilteredQuery *fq = (FilteredQuery *)p;
1528
+ frt_gc_mark(fq->query);
1529
+ frt_gc_mark(fq->filter);
1530
+ }
1531
+
1532
+ /*
1533
+ * call-seq:
1534
+ * FilteredQuery.new(query, filter) -> query
1535
+ *
1536
+ * Create a new FilteredQuery which filters +query+ with +filter+.
1537
+ */
1538
+ static VALUE
1539
+ frt_fqq_init(VALUE self, VALUE rquery, VALUE rfilter)
1540
+ {
1541
+ Query *sq, *q;
1542
+ Filter *f;
1543
+ Data_Get_Struct(rquery, Query, sq);
1544
+ Data_Get_Struct(rfilter, Filter, f);
1545
+ q = fq_new(sq, f);
1546
+ REF(sq);
1547
+ REF(f);
1548
+ Frt_Wrap_Struct(self, &frt_fqq_mark, &frt_q_free, q);
1549
+ object_add(q, self);
1550
+ return self;
1551
+ }
1552
+
1553
+ /****************************************************************************
1554
+ *
1555
+ * SpanTermQuery Methods
1556
+ *
1557
+ ****************************************************************************/
1558
+
1559
+ /*
1560
+ * call-seq:
1561
+ * SpanTermQuery.new(field, term) -> query
1562
+ *
1563
+ * Create a new SpanTermQuery which matches all documents with the term
1564
+ * +term+ in the field +field+.
1565
+ */
1566
+ static VALUE
1567
+ frt_spantq_init(VALUE self, VALUE rfield, VALUE rterm)
1568
+ {
1569
+ Query *q = spantq_new(frt_field(rfield), StringValuePtr(rterm));
1570
+ Frt_Wrap_Struct(self, NULL, &frt_q_free, q);
1571
+ object_add(q, self);
1572
+ return self;
1573
+ }
1574
+
1575
+ /****************************************************************************
1576
+ *
1577
+ * SpanMultiTermQuery Methods
1578
+ *
1579
+ ****************************************************************************/
1580
+
1581
+ /*
1582
+ * call-seq:
1583
+ * SpanMultiTermQuery.new(field, terms) -> query
1584
+ *
1585
+ * Create a new SpanMultiTermQuery which matches all documents with the terms
1586
+ * +terms+ in the field +field+. +terms+ should be an array of Strings.
1587
+ */
1588
+ static VALUE
1589
+ frt_spanmtq_init(VALUE self, VALUE rfield, VALUE rterms)
1590
+ {
1591
+ Query *q = spanmtq_new(frt_field(rfield));
1592
+ int i;
1593
+ for (i = RARRAY_LEN(rterms) - 1; i >= 0; i--) {
1594
+ spanmtq_add_term(q, StringValuePtr(RARRAY_PTR(rterms)[i]));
1595
+ }
1596
+ Frt_Wrap_Struct(self, NULL, &frt_q_free, q);
1597
+ object_add(q, self);
1598
+ return self;
1599
+ }
1600
+
1601
+ /****************************************************************************
1602
+ *
1603
+ * SpanPrefixQuery Methods
1604
+ *
1605
+ ****************************************************************************/
1606
+
1607
+ /*
1608
+ * call-seq:
1609
+ * SpanPrefixQuery.new(field, prefix, max_terms = 256) -> query
1610
+ *
1611
+ * Create a new SpanPrefixQuery which matches all documents with the prefix
1612
+ * +prefix+ in the field +field+.
1613
+ */
1614
+ static VALUE
1615
+ frt_spanprq_init(int argc, VALUE *argv, VALUE self)
1616
+ {
1617
+ VALUE rfield, rprefix, rmax_terms;
1618
+ int max_terms = SPAN_PREFIX_QUERY_MAX_TERMS;
1619
+ Query *q;
1620
+ if (rb_scan_args(argc, argv, "21", &rfield, &rprefix, &rmax_terms) == 3) {
1621
+ max_terms = FIX2INT(rmax_terms);
1622
+ }
1623
+ q = spanprq_new(frt_field(rfield), StringValuePtr(rprefix));
1624
+ ((SpanPrefixQuery *)q)->max_terms = max_terms;
1625
+ Frt_Wrap_Struct(self, NULL, &frt_q_free, q);
1626
+ object_add(q, self);
1627
+ return self;
1628
+ }
1629
+
1630
+ /****************************************************************************
1631
+ *
1632
+ * SpanFirstQuery Methods
1633
+ *
1634
+ ****************************************************************************/
1635
+
1636
+ /*
1637
+ * call-seq:
1638
+ * SpanFirstQuery.new(span_query, end) -> query
1639
+ *
1640
+ * Create a new SpanFirstQuery which matches all documents where +span_query+
1641
+ * matches before +end+ where +end+ is a byte-offset from the start of the
1642
+ * field
1643
+ */
1644
+ static VALUE
1645
+ frt_spanfq_init(VALUE self, VALUE rmatch, VALUE rend)
1646
+ {
1647
+ Query *q;
1648
+ Query *match;
1649
+ Data_Get_Struct(rmatch, Query, match);
1650
+ q = spanfq_new(match, FIX2INT(rend));
1651
+ Frt_Wrap_Struct(self, NULL, &frt_q_free, q);
1652
+ object_add(q, self);
1653
+ return self;
1654
+ }
1655
+
1656
+ /****************************************************************************
1657
+ *
1658
+ * SpanNearQuery Methods
1659
+ *
1660
+ ****************************************************************************/
1661
+
1662
+ static void
1663
+ frt_spannq_mark(void *p)
1664
+ {
1665
+ int i;
1666
+ SpanNearQuery *snq = (SpanNearQuery *)p;
1667
+ for (i = 0; i < snq->c_cnt; i++) {
1668
+ frt_gc_mark(snq->clauses[i]);
1669
+ }
1670
+ }
1671
+
1672
+ /*
1673
+ * call-seq:
1674
+ * SpanNearQuery.new(options = {}) -> query
1675
+ *
1676
+ * Create a new SpanNearQuery. You can add an array of clauses with the
1677
+ * +:clause+ parameter or you can add clauses individually using the
1678
+ * SpanNearQuery#add method.
1679
+ *
1680
+ * query = SpanNearQuery.new(:clauses => [spanq1, spanq2, spanq3])
1681
+ * # is equivalent to
1682
+ * query = SpanNearQuery.new()
1683
+ * query << spanq1 << spanq2 << spanq3
1684
+ *
1685
+ * You have two other options which you can set.
1686
+ *
1687
+ * :slop:: Default: 0. Works exactly like a PhraseQuery slop. It is the
1688
+ * amount of slop allowed in the match (the term edit distance
1689
+ * allowed in the match).
1690
+ * :in_order:: Default: false. Specifies whether or not the matches have to
1691
+ * occur in the order they were added to the query. When slop is
1692
+ * set to 0, this parameter will make no difference.
1693
+ */
1694
+ static VALUE
1695
+ frt_spannq_init(int argc, VALUE *argv, VALUE self)
1696
+ {
1697
+ Query *q;
1698
+ VALUE roptions;
1699
+ int slop = 0;
1700
+ bool in_order = false;
1701
+
1702
+ if (rb_scan_args(argc, argv, "01", &roptions) > 0) {
1703
+ VALUE v;
1704
+ if (Qnil != (v = rb_hash_aref(roptions, sym_slop))) {
1705
+ slop = FIX2INT(v);
1706
+ }
1707
+ if (Qnil != (v = rb_hash_aref(roptions, sym_in_order))) {
1708
+ in_order = RTEST(v);
1709
+ }
1710
+ }
1711
+ q = spannq_new(slop, in_order);
1712
+ if (argc > 0) {
1713
+ VALUE v;
1714
+ if (Qnil != (v = rb_hash_aref(roptions, sym_clauses))) {
1715
+ int i;
1716
+ Query *clause;
1717
+ Check_Type(v, T_ARRAY);
1718
+ for (i = 0; i < RARRAY_LEN(v); i++) {
1719
+ Data_Get_Struct(RARRAY_PTR(v)[i], Query, clause);
1720
+ spannq_add_clause(q, clause);
1721
+ }
1722
+ }
1723
+ }
1724
+
1725
+ Frt_Wrap_Struct(self, &frt_spannq_mark, &frt_q_free, q);
1726
+ object_add(q, self);
1727
+ return self;
1728
+ }
1729
+
1730
+ /*
1731
+ * call-seq:
1732
+ * query.add(span_query) -> self
1733
+ * query << span_query -> self
1734
+ *
1735
+ * Add a clause to the SpanNearQuery. Clauses are stored in the order they
1736
+ * are added to the query which is important for matching. Note that clauses
1737
+ * must be SpanQueries, not other types of query.
1738
+ */
1739
+ static VALUE
1740
+ frt_spannq_add(VALUE self, VALUE rclause)
1741
+ {
1742
+ GET_Q();
1743
+ Query *clause;
1744
+ Data_Get_Struct(rclause, Query, clause);
1745
+ spannq_add_clause(q, clause);
1746
+ return self;
1747
+ }
1748
+
1749
+ /****************************************************************************
1750
+ *
1751
+ * SpanOrQuery Methods
1752
+ *
1753
+ ****************************************************************************/
1754
+
1755
+ static void
1756
+ frt_spanoq_mark(void *p)
1757
+ {
1758
+ int i;
1759
+ SpanOrQuery *soq = (SpanOrQuery *)p;
1760
+ for (i = 0; i < soq->c_cnt; i++) {
1761
+ frt_gc_mark(soq->clauses[i]);
1762
+ }
1763
+ }
1764
+
1765
+ /*
1766
+ * call-seq:
1767
+ * SpanOrQuery.new(options = {}) -> query
1768
+ *
1769
+ * Create a new SpanOrQuery. This is just like a BooleanQuery with all
1770
+ * clauses with the occur value of :should. The difference is that it can be
1771
+ * passed to other SpanQuerys like SpanNearQuery.
1772
+ */
1773
+ static VALUE
1774
+ frt_spanoq_init(int argc, VALUE *argv, VALUE self)
1775
+ {
1776
+ Query *q;
1777
+ VALUE rclauses;
1778
+
1779
+ q = spanoq_new();
1780
+ if (rb_scan_args(argc, argv, "01", &rclauses) > 0) {
1781
+ int i;
1782
+ Query *clause;
1783
+ Check_Type(rclauses, T_ARRAY);
1784
+ for (i = 0; i < RARRAY_LEN(rclauses); i++) {
1785
+ Data_Get_Struct(RARRAY_PTR(rclauses)[i], Query, clause);
1786
+ spanoq_add_clause(q, clause);
1787
+ }
1788
+ }
1789
+ Frt_Wrap_Struct(self, &frt_spanoq_mark, &frt_q_free, q);
1790
+ object_add(q, self);
1791
+ return self;
1792
+ }
1793
+
1794
+ /*
1795
+ * call-seq:
1796
+ * query.add(span_query) -> self
1797
+ * query << span_query -> self
1798
+ *
1799
+ * Add a clause to the SpanOrQuery. Note that clauses must be SpanQueries,
1800
+ * not other types of query.
1801
+ */
1802
+ static VALUE
1803
+ frt_spanoq_add(VALUE self, VALUE rclause)
1804
+ {
1805
+ GET_Q();
1806
+ Query *clause;
1807
+ Data_Get_Struct(rclause, Query, clause);
1808
+ spanoq_add_clause(q, clause);
1809
+ return self;
1810
+ }
1811
+
1812
+ /****************************************************************************
1813
+ *
1814
+ * SpanNotQuery Methods
1815
+ *
1816
+ ****************************************************************************/
1817
+
1818
+ static void
1819
+ frt_spanxq_mark(void *p)
1820
+ {
1821
+ SpanNotQuery *sxq = (SpanNotQuery *)p;
1822
+ frt_gc_mark(sxq->inc);
1823
+ frt_gc_mark(sxq->exc);
1824
+ }
1825
+
1826
+ /*
1827
+ * call-seq:
1828
+ * SpanNotQuery.new(include_query, exclude_query) -> query
1829
+ *
1830
+ * Create a new SpanNotQuery which matches all documents which match
1831
+ * +include_query+ and don't match +exclude_query+.
1832
+ */
1833
+ static VALUE
1834
+ frt_spanxq_init(VALUE self, VALUE rinc, VALUE rexc)
1835
+ {
1836
+ Query *q;
1837
+ Check_Type(rinc, T_DATA);
1838
+ Check_Type(rexc, T_DATA);
1839
+ q = spanxq_new(DATA_PTR(rinc), DATA_PTR(rexc));
1840
+ Frt_Wrap_Struct(self, &frt_spanxq_mark, &frt_q_free, q);
1841
+ object_add(q, self);
1842
+ return self;
1843
+ }
1844
+
1845
+ /****************************************************************************
1846
+ *
1847
+ * Filter Methods
1848
+ *
1849
+ ****************************************************************************/
1850
+
1851
+ static void
1852
+ frt_f_free(void *p)
1853
+ {
1854
+ object_del(p);
1855
+ filt_deref((Filter *)p);
1856
+ }
1857
+
1858
+ #define GET_F() Filter *f = (Filter *)DATA_PTR(self)
1859
+
1860
+ /*
1861
+ * call-seq:
1862
+ * filter.to_s -> string
1863
+ *
1864
+ * Return a human readable string representing the Filter object that the
1865
+ * method was called on.
1866
+ */
1867
+ static VALUE
1868
+ frt_f_to_s(VALUE self)
1869
+ {
1870
+ VALUE rstr;
1871
+ char *str;
1872
+ GET_F();
1873
+ str = f->to_s(f);
1874
+ rstr = rb_str_new2(str);
1875
+ free(str);
1876
+ return rstr;
1877
+ }
1878
+
1879
+ extern VALUE frt_get_bv(BitVector *bv);
1880
+
1881
+ /*
1882
+ * call-seq:
1883
+ * filter.bits(index_reader) -> bit_vector
1884
+ *
1885
+ * Get the bit_vector used by this filter. This method will usually be used
1886
+ * to group filters or apply filters to other filters.
1887
+ */
1888
+ static VALUE
1889
+ frt_f_get_bits(VALUE self, VALUE rindex_reader)
1890
+ {
1891
+ BitVector *bv;
1892
+ IndexReader *ir;
1893
+ GET_F();
1894
+ Data_Get_Struct(rindex_reader, IndexReader, ir);
1895
+ bv = filt_get_bv(f, ir);
1896
+ return frt_get_bv(bv);
1897
+ }
1898
+
1899
+ /****************************************************************************
1900
+ *
1901
+ * RangeFilter Methods
1902
+ *
1903
+ ****************************************************************************/
1904
+
1905
+
1906
+ /*
1907
+ * call-seq:
1908
+ * RangeFilter.new(field, options = {}) -> range_query
1909
+ *
1910
+ * Create a new RangeFilter on field +field+. There are two ways to build a
1911
+ * range filter. With the old-style options; +:lower+, +:upper+,
1912
+ * +:include_lower+ and +:include_upper+ or the new style options; +:<+,
1913
+ * +:<=+, +:>+ and +:>=+. The options' names should speak for themselves.
1914
+ * In the old-style options, limits are inclusive by default.
1915
+ *
1916
+ * == Examples
1917
+ *
1918
+ * f = RangeFilter.new(:date, :lower => "200501", :include_lower => false)
1919
+ * # is equivalent to
1920
+ * f = RangeFilter.new(:date, :< => "200501")
1921
+ * # is equivalent to
1922
+ * f = RangeFilter.new(:date, :lower_exclusive => "200501")
1923
+ *
1924
+ * f = RangeFilter.new(:date, :lower => "200501", :upper => 200502)
1925
+ * # is equivalent to
1926
+ * f = RangeFilter.new(:date, :>= => "200501", :<= => 200502)
1927
+ */
1928
+ static VALUE
1929
+ frt_rf_init(VALUE self, VALUE rfield, VALUE roptions)
1930
+ {
1931
+ Filter *f;
1932
+ char *lterm = NULL;
1933
+ char *uterm = NULL;
1934
+ bool include_lower = false;
1935
+ bool include_upper = false;
1936
+
1937
+ get_range_params(roptions, &lterm, &uterm, &include_lower, &include_upper);
1938
+ f = rfilt_new(frt_field(rfield), lterm, uterm,
1939
+ include_lower, include_upper);
1940
+ Frt_Wrap_Struct(self, NULL, &frt_f_free, f);
1941
+ object_add(f, self);
1942
+ return self;
1943
+ }
1944
+
1945
+ /****************************************************************************
1946
+ *
1947
+ * QueryFilter Methods
1948
+ *
1949
+ ****************************************************************************/
1950
+
1951
+ /*
1952
+ * call-seq:
1953
+ * QueryFilter.new(query) -> filter
1954
+ *
1955
+ * Create a new QueryFilter which applies the query +query+.
1956
+ */
1957
+ static VALUE
1958
+ frt_qf_init(VALUE self, VALUE rquery)
1959
+ {
1960
+ Query *q;
1961
+ Filter *f;
1962
+ Data_Get_Struct(rquery, Query, q);
1963
+ f = qfilt_new(q);
1964
+ Frt_Wrap_Struct(self, NULL, &frt_f_free, f);
1965
+ object_add(f, self);
1966
+ return self;
1967
+ }
1968
+
1969
+ /****************************************************************************
1970
+ *
1971
+ * SortField Methods
1972
+ *
1973
+ ****************************************************************************/
1974
+
1975
+ static void
1976
+ frt_sf_free(void *p)
1977
+ {
1978
+ object_del(p);
1979
+ sort_field_destroy((SortField *)p);
1980
+ }
1981
+
1982
+ static VALUE
1983
+ frt_get_sf(SortField *sf)
1984
+ {
1985
+ VALUE self = object_get(sf);
1986
+ if (self == Qnil) {
1987
+ self = Data_Wrap_Struct(cSortField, NULL, &frt_sf_free, sf);
1988
+ object_add(sf, self);
1989
+ }
1990
+ return self;
1991
+ }
1992
+
1993
+ static int
1994
+ get_sort_type(VALUE rtype)
1995
+ {
1996
+ Check_Type(rtype, T_SYMBOL);
1997
+ if (rtype == sym_byte) {
1998
+ return SORT_TYPE_BYTE;
1999
+ } else if (rtype == sym_integer) {
2000
+ return SORT_TYPE_INTEGER;
2001
+ } else if (rtype == sym_string) {
2002
+ return SORT_TYPE_STRING;
2003
+ } else if (rtype == sym_score) {
2004
+ return SORT_TYPE_SCORE;
2005
+ } else if (rtype == sym_doc_id) {
2006
+ return SORT_TYPE_DOC;
2007
+ } else if (rtype == sym_float) {
2008
+ return SORT_TYPE_FLOAT;
2009
+ } else if (rtype == sym_auto) {
2010
+ return SORT_TYPE_AUTO;
2011
+ } else {
2012
+ rb_raise(rb_eArgError, ":%s is an unknown sort-type. Please choose "
2013
+ "from [:integer, :float, :string, :auto, :score, :doc_id]",
2014
+ rb_id2name(SYM2ID(rtype)));
2015
+ }
2016
+ return SORT_TYPE_DOC;
2017
+ }
2018
+
2019
+ /*
2020
+ * call-seq:
2021
+ * SortField.new(field, options = {}) -> sort_field
2022
+ *
2023
+ * Create a new SortField which can be used to sort the result-set by the
2024
+ * value in field +field+.
2025
+ *
2026
+ * === Options
2027
+ *
2028
+ * :type:: Default: +:auto+. Specifies how a field should be sorted.
2029
+ * Choose from one of; +:auto+, +:integer+, +:float+,
2030
+ * +:string+, +:byte+, +:doc_id+ or +:score+. +:auto+ will
2031
+ * check the datatype of the field by trying to parse it into
2032
+ * either a number or a float before settling on a string
2033
+ * sort. String sort is locale dependent and works for
2034
+ * multibyte character sets like UTF-8 if you have your
2035
+ * locale set correctly.
2036
+ * :reverse Default: false. Set to true if you want to reverse the
2037
+ * sort.
2038
+ */
2039
+ static VALUE
2040
+ frt_sf_init(int argc, VALUE *argv, VALUE self)
2041
+ {
2042
+ SortField *sf;
2043
+ VALUE rfield, roptions;
2044
+ VALUE rval;
2045
+ int type = SORT_TYPE_AUTO;
2046
+ int is_reverse = false;
2047
+ char *field;
2048
+
2049
+ if (rb_scan_args(argc, argv, "11", &rfield, &roptions) == 2) {
2050
+ if (Qnil != (rval = rb_hash_aref(roptions, sym_type))) {
2051
+ type = get_sort_type(rval);
2052
+ }
2053
+ if (Qnil != (rval = rb_hash_aref(roptions, sym_reverse))) {
2054
+ is_reverse = RTEST(rval);
2055
+ }
2056
+ if (Qnil != (rval = rb_hash_aref(roptions, sym_comparator))) {
2057
+ rb_raise(rb_eArgError, "Unsupported argument ':comparator'");
2058
+ }
2059
+ }
2060
+ if (NIL_P(rfield)) rb_raise(rb_eArgError, "must pass a valid field name");
2061
+ field = frt_field(rfield);
2062
+
2063
+ sf = sort_field_new(field, type, is_reverse);
2064
+ if (sf->field == NULL && field) {
2065
+ sf->field = estrdup(field);
2066
+ }
2067
+
2068
+ Frt_Wrap_Struct(self, NULL, &frt_sf_free, sf);
2069
+ object_add(sf, self);
2070
+ return self;
2071
+ }
2072
+
2073
+ #define GET_SF() SortField *sf = (SortField *)DATA_PTR(self)
2074
+
2075
+ /*
2076
+ * call-seq:
2077
+ * sort_field.reverse? -> bool
2078
+ *
2079
+ * Return true if the field is to be reverse sorted. This attribute is set
2080
+ * when you create the sort_field.
2081
+ */
2082
+ static VALUE
2083
+ frt_sf_is_reverse(VALUE self)
2084
+ {
2085
+ GET_SF();
2086
+ return sf->reverse ? Qtrue : Qfalse;
2087
+ }
2088
+
2089
+ /*
2090
+ * call-seq:
2091
+ * sort_field.name -> symbol
2092
+ *
2093
+ * Returns the name of the field to be sorted.
2094
+ */
2095
+ static VALUE
2096
+ frt_sf_get_name(VALUE self)
2097
+ {
2098
+ GET_SF();
2099
+ return sf->field ? ID2SYM(rb_intern(sf->field)) : Qnil;
2100
+ }
2101
+
2102
+ /*
2103
+ * call-seq:
2104
+ * sort_field.type -> symbol
2105
+ *
2106
+ * Return the type of sort. Should be one of; +:auto+, +:integer+, +:float+,
2107
+ * +:string+, +:byte+, +:doc_id+ or +:score+.
2108
+ */
2109
+ static VALUE
2110
+ frt_sf_get_type(VALUE self)
2111
+ {
2112
+ GET_SF();
2113
+ switch (sf->type) {
2114
+ case SORT_TYPE_BYTE: return sym_byte;
2115
+ case SORT_TYPE_INTEGER: return sym_integer;
2116
+ case SORT_TYPE_FLOAT: return sym_float;
2117
+ case SORT_TYPE_STRING: return sym_string;
2118
+ case SORT_TYPE_AUTO: return sym_auto;
2119
+ case SORT_TYPE_DOC: return sym_doc_id;
2120
+ case SORT_TYPE_SCORE: return sym_score;
2121
+ }
2122
+ return Qnil;
2123
+ }
2124
+
2125
+ /*
2126
+ * call-seq:
2127
+ * sort_field.comparator -> symbol
2128
+ *
2129
+ * TODO: currently unsupported
2130
+ */
2131
+ static VALUE
2132
+ frt_sf_get_comparator(VALUE self)
2133
+ {
2134
+ return Qnil;
2135
+ }
2136
+
2137
+ /*
2138
+ * call-seq:
2139
+ * sort_field.to_s -> string
2140
+ *
2141
+ * Return a human readable string describing this +sort_field+.
2142
+ */
2143
+ static VALUE
2144
+ frt_sf_to_s(VALUE self)
2145
+ {
2146
+ GET_SF();
2147
+ char *str = sort_field_to_s(sf);
2148
+ VALUE rstr = rb_str_new2(str);
2149
+ free(str);
2150
+ return rstr;
2151
+ }
2152
+
2153
+ /****************************************************************************
2154
+ *
2155
+ * Sort Methods
2156
+ *
2157
+ ****************************************************************************/
2158
+
2159
+ static void
2160
+ frt_sort_free(void *p)
2161
+ {
2162
+ Sort *sort = (Sort *)p;
2163
+ object_del(sort);
2164
+ sort_destroy(sort);
2165
+ }
2166
+
2167
+ static void
2168
+ frt_sort_mark(void *p)
2169
+ {
2170
+ Sort *sort = (Sort *)p;
2171
+ int i;
2172
+ for (i = 0; i < sort->size; i++) {
2173
+ frt_gc_mark(sort->sort_fields[i]);
2174
+ }
2175
+ }
2176
+
2177
+ static VALUE
2178
+ frt_sort_alloc(VALUE klass)
2179
+ {
2180
+ VALUE self;
2181
+ Sort *sort = sort_new();
2182
+ sort->destroy_all = false;
2183
+ self = Data_Wrap_Struct(klass, &frt_sort_mark, &frt_sort_free, sort);
2184
+ object_add(sort, self);
2185
+ return self;
2186
+ }
2187
+
2188
+ static void
2189
+ frt_parse_sort_str(Sort *sort, char *xsort_str)
2190
+ {
2191
+ SortField *sf;
2192
+ char *comma, *end, *e, *s;
2193
+ const int len = strlen(xsort_str);
2194
+ char *sort_str = ALLOC_N(char, len + 2);
2195
+ strcpy(sort_str, xsort_str);
2196
+
2197
+ end = &sort_str[len];
2198
+
2199
+ s = sort_str;
2200
+
2201
+ while ((s < end)
2202
+ && (NULL != (comma = strchr(s, ',')) || (NULL != (comma = end)))) {
2203
+ bool reverse = false;
2204
+ /* strip spaces */
2205
+ e = comma;
2206
+ while ((isspace(*s) || *s == ':') && s < e) s++;
2207
+ while (isspace(e[-1]) && s < e) e--;
2208
+ *e = '\0';
2209
+ if (e > (s + 4) && strcmp("DESC", &e[-4]) == 0) {
2210
+ reverse = true;
2211
+ e -= 4;
2212
+ while (isspace(e[-1]) && s < e) e--;
2213
+ }
2214
+ *e = '\0';
2215
+
2216
+ if (strcmp("SCORE", s) == 0) {
2217
+ sf = sort_field_score_new(reverse);
2218
+ } else if (strcmp("DOC_ID", s) == 0) {
2219
+ sf = sort_field_doc_new(reverse);
2220
+ } else {
2221
+ sf = sort_field_auto_new(s, reverse);
2222
+ }
2223
+ frt_get_sf(sf);
2224
+ sort_add_sort_field(sort, sf);
2225
+ s = comma + 1;
2226
+ }
2227
+ free(sort_str);
2228
+ }
2229
+
2230
+ static void
2231
+ frt_sort_add(Sort *sort, VALUE rsf, bool reverse)
2232
+ {
2233
+ SortField *sf;
2234
+ switch (TYPE(rsf)) {
2235
+ case T_DATA:
2236
+ Data_Get_Struct(rsf, SortField, sf);
2237
+ if (reverse) sf->reverse = !sf->reverse;
2238
+ sort_add_sort_field(sort, sf);
2239
+ break;
2240
+ case T_SYMBOL:
2241
+ rsf = rb_obj_as_string(rsf);
2242
+ sf = sort_field_auto_new(rs2s(rsf), reverse);
2243
+ /* need to give it a ruby object so it'll be freed when the
2244
+ * sort is garbage collected */
2245
+ rsf = frt_get_sf(sf);
2246
+ sort_add_sort_field(sort, sf);
2247
+ break;
2248
+ case T_STRING:
2249
+ frt_parse_sort_str(sort, rs2s(rsf));
2250
+ break;
2251
+ default:
2252
+ rb_raise(rb_eArgError, "Unknown SortField Type");
2253
+ break;
2254
+ }
2255
+ }
2256
+
2257
+ #define GET_SORT() Sort *sort = (Sort *)DATA_PTR(self)
2258
+ /*
2259
+ * call-seq:
2260
+ * Sort.new(sort_fields = [SortField::SCORE, SortField::DOC_ID], reverse = false) -> Sort
2261
+ *
2262
+ * Create a new Sort object. If +reverse+ is true, all sort_fields will be
2263
+ * reversed so if any of them are already reversed the will be turned back
2264
+ * to their natural order again. By default
2265
+ */
2266
+ static VALUE
2267
+ frt_sort_init(int argc, VALUE *argv, VALUE self)
2268
+ {
2269
+ int i;
2270
+ VALUE rfields, rreverse;
2271
+ bool reverse = false;
2272
+ bool has_sfd = false;
2273
+ GET_SORT();
2274
+ switch (rb_scan_args(argc, argv, "02", &rfields, &rreverse)) {
2275
+ case 2: reverse = RTEST(rreverse);
2276
+ case 1:
2277
+ if (TYPE(rfields) == T_ARRAY) {
2278
+ int i;
2279
+ for (i = 0; i < RARRAY_LEN(rfields); i++) {
2280
+ frt_sort_add(sort, RARRAY_PTR(rfields)[i], reverse);
2281
+ }
2282
+ } else {
2283
+ frt_sort_add(sort, rfields, reverse);
2284
+ }
2285
+ for (i = 0; i < sort->size; i++) {
2286
+ if (sort->sort_fields[i] == &SORT_FIELD_DOC) has_sfd = true;
2287
+ }
2288
+ if (!has_sfd) {
2289
+ sort_add_sort_field(sort, (SortField *)&SORT_FIELD_DOC);
2290
+ }
2291
+ break;
2292
+ case 0:
2293
+ sort_add_sort_field(sort, (SortField *)&SORT_FIELD_SCORE);
2294
+ sort_add_sort_field(sort, (SortField *)&SORT_FIELD_DOC);
2295
+ }
2296
+
2297
+ return self;
2298
+ }
2299
+
2300
+ /*
2301
+ * call-seq:
2302
+ * sort.fields -> Array
2303
+ *
2304
+ * Returns an array of the SortFields held by the Sort object.
2305
+ */
2306
+ static VALUE
2307
+ frt_sort_get_fields(VALUE self)
2308
+ {
2309
+ GET_SORT();
2310
+ VALUE rfields = rb_ary_new2(sort->size);
2311
+ int i;
2312
+ for (i = 0; i < sort->size; i++) {
2313
+ rb_ary_store(rfields, i, object_get(sort->sort_fields[i]));
2314
+ }
2315
+ return rfields;
2316
+ }
2317
+
2318
+
2319
+ /*
2320
+ * call-seq:
2321
+ * sort.to_s -> string
2322
+ *
2323
+ * Returns a human readable string representing the sort object.
2324
+ */
2325
+ static VALUE
2326
+ frt_sort_to_s(VALUE self)
2327
+ {
2328
+ GET_SORT();
2329
+ char *str = sort_to_s(sort);
2330
+ VALUE rstr = rb_str_new2(str);
2331
+ free(str);
2332
+ return rstr;
2333
+ }
2334
+
2335
+ /****************************************************************************
2336
+ *
2337
+ * Searcher Methods
2338
+ *
2339
+ ****************************************************************************/
2340
+
2341
+ static void
2342
+ frt_sea_free(void *p)
2343
+ {
2344
+ Searcher *sea = (Searcher *)p;
2345
+ object_del(sea);
2346
+ sea->close(sea);
2347
+ }
2348
+
2349
+ #define GET_SEA() Searcher *sea = (Searcher *)DATA_PTR(self)
2350
+
2351
+ /*
2352
+ * call-seq:
2353
+ * searcher.close -> nil
2354
+ *
2355
+ * Close the searcher. The garbage collector will do this for you or you can
2356
+ * call this method explicitly.
2357
+ */
2358
+ static VALUE
2359
+ frt_sea_close(VALUE self)
2360
+ {
2361
+ GET_SEA();
2362
+ Frt_Unwrap_Struct(self);
2363
+ object_del(sea);
2364
+ sea->close(sea);
2365
+ return Qnil;
2366
+ }
2367
+
2368
+ /*
2369
+ * call-seq:
2370
+ * searcher.reader -> IndexReader
2371
+ *
2372
+ * Return the IndexReader wrapped by this searcher.
2373
+ */
2374
+ static VALUE
2375
+ frt_sea_get_reader(VALUE self, VALUE rterm)
2376
+ {
2377
+ GET_SEA();
2378
+ return object_get(((IndexSearcher *)sea)->ir);
2379
+ }
2380
+
2381
+ /*
2382
+ * call-seq:
2383
+ * searcher.doc_freq(field, term) -> integer
2384
+ *
2385
+ * Return the number of documents in which the term +term+ appears in the
2386
+ * field +field+.
2387
+ */
2388
+ static VALUE
2389
+ frt_sea_doc_freq(VALUE self, VALUE rfield, VALUE rterm)
2390
+ {
2391
+ GET_SEA();
2392
+ return INT2FIX(sea->doc_freq(sea,
2393
+ frt_field(rfield),
2394
+ StringValuePtr(rterm)));
2395
+ }
2396
+
2397
+ /*
2398
+ * call-seq:
2399
+ * searcher.get_document(doc_id) -> LazyDoc
2400
+ * searcher[doc_id] -> LazyDoc
2401
+ *
2402
+ * Retrieve a document from the index. See LazyDoc for more details on the
2403
+ * document returned. Documents are referenced internally by document ids
2404
+ * which are returned by the Searchers search methods.
2405
+ */
2406
+ static VALUE
2407
+ frt_sea_doc(VALUE self, VALUE rdoc_id)
2408
+ {
2409
+ GET_SEA();
2410
+ return frt_get_lazy_doc(sea->get_lazy_doc(sea, FIX2INT(rdoc_id)));
2411
+ }
2412
+
2413
+ /*
2414
+ * call-seq:
2415
+ * searcher.max_doc -> number
2416
+ *
2417
+ * Returns 1 + the maximum document id in the index. It is the
2418
+ * document_id that will be used by the next document added to the index. If
2419
+ * there are no deletions, this number also refers to the number of documents
2420
+ * in the index.
2421
+ */
2422
+ static VALUE
2423
+ frt_sea_max_doc(VALUE self)
2424
+ {
2425
+ GET_SEA();
2426
+ return INT2FIX(sea->max_doc(sea));
2427
+ }
2428
+
2429
+ static bool
2430
+ call_filter_proc(int doc_id, float score, Searcher *self)
2431
+ {
2432
+ return RTEST(rb_funcall((VALUE)self->arg, id_call, 3,
2433
+ INT2FIX(doc_id),
2434
+ rb_float_new((double)score),
2435
+ object_get(self)));
2436
+ }
2437
+
2438
+ typedef struct CWrappedFilter
2439
+ {
2440
+ Filter super;
2441
+ VALUE rfilter;
2442
+ } CWrappedFilter;
2443
+ #define CWF(filt) ((CWrappedFilter *)(filt))
2444
+
2445
+ static unsigned long
2446
+ cwfilt_hash(Filter *filt)
2447
+ {
2448
+ return NUM2ULONG(rb_funcall(CWF(filt)->rfilter, id_hash, 0));
2449
+ }
2450
+
2451
+ static int
2452
+ cwfilt_eq(Filter *filt, Filter *o)
2453
+ {
2454
+ return RTEST(rb_funcall(CWF(filt)->rfilter, id_eql, 1, CWF(o)->rfilter));
2455
+ }
2456
+
2457
+ static BitVector *
2458
+ cwfilt_get_bv_i(Filter *filt, IndexReader *ir)
2459
+ {
2460
+ VALUE rbv = rb_funcall(CWF(filt)->rfilter, id_bits, 1, object_get(ir));
2461
+ BitVector *bv;
2462
+ Data_Get_Struct(rbv, BitVector, bv);
2463
+ REF(bv);
2464
+ return bv;
2465
+ }
2466
+
2467
+ Filter *
2468
+ frt_get_cwrapped_filter(VALUE rval)
2469
+ {
2470
+ Filter *filter;
2471
+ if (frt_is_cclass(rval) && DATA_PTR(rval)) {
2472
+ Data_Get_Struct(rval, Filter, filter);
2473
+ REF(filter);
2474
+ }
2475
+ else {
2476
+ filter = filt_create(sizeof(CWrappedFilter), "CWrappedFilter");
2477
+ filter->hash = &cwfilt_hash;
2478
+ filter->eq = &cwfilt_eq;
2479
+ filter->get_bv_i = &cwfilt_get_bv_i;
2480
+ CWF(filter)->rfilter = rval;
2481
+ }
2482
+ return filter;
2483
+ }
2484
+
2485
+ static TopDocs *
2486
+ frt_sea_search_internal(Query *query, VALUE roptions, Searcher *sea)
2487
+ {
2488
+ VALUE rval;
2489
+ int offset = 0, limit = 10;
2490
+ Filter *filter = NULL;
2491
+ Sort *sort = NULL;
2492
+ TopDocs *td;
2493
+
2494
+ filter_ft filter_func = NULL;
2495
+
2496
+ if (Qnil != roptions) {
2497
+ if (Qnil != (rval = rb_hash_aref(roptions, sym_offset))) {
2498
+ offset = FIX2INT(rval);
2499
+ if (offset < 0)
2500
+ rb_raise(rb_eArgError, ":offset must be >= 0");
2501
+ }
2502
+ if (Qnil != (rval = rb_hash_aref(roptions, sym_limit))) {
2503
+ if (TYPE(rval) == T_FIXNUM) {
2504
+ limit = FIX2INT(rval);
2505
+ if (limit <= 0)
2506
+ rb_raise(rb_eArgError, ":limit must be > 0");
2507
+ } else if (rval == sym_all) {
2508
+ limit = INT_MAX;
2509
+ } else {
2510
+ rb_raise(rb_eArgError, "%s is not a sensible :limit value "
2511
+ "Please use a positive integer or :all",
2512
+ rb_obj_as_string(rval));
2513
+ }
2514
+ }
2515
+ if (Qnil != (rval = rb_hash_aref(roptions, sym_filter))) {
2516
+ filter = frt_get_cwrapped_filter(rval);
2517
+ }
2518
+ if (Qnil != (rval = rb_hash_aref(roptions, sym_filter_proc))) {
2519
+ filter_func = &call_filter_proc;
2520
+ sea->arg = (void *)rval;
2521
+ }
2522
+ if (Qnil != (rval = rb_hash_aref(roptions, sym_sort))) {
2523
+ if (TYPE(rval) != T_DATA || CLASS_OF(rval) == cSortField) {
2524
+ rval = frt_sort_init(1, &rval, frt_sort_alloc(cSort));
2525
+ }
2526
+ Data_Get_Struct(rval, Sort, sort);
2527
+ }
2528
+ }
2529
+
2530
+ td = sea->search(sea, query, offset, limit, filter, sort, filter_func, 0);
2531
+ if (filter) filt_deref(filter);
2532
+ return td;
2533
+ }
2534
+
2535
+ /*
2536
+ * call-seq:
2537
+ * searcher.search(query, options = {}) -> TopDocs
2538
+ *
2539
+ * Run a query through the Searcher on the index. A TopDocs object is
2540
+ * returned with the relevant results. The +query+ is a built in Query
2541
+ * object. Here are the options;
2542
+ *
2543
+ * === Options
2544
+ *
2545
+ * :offset:: Default: 0. The offset of the start of the section of the
2546
+ * result-set to return. This is used for paging through
2547
+ * results. Let's say you have a page size of 10. If you
2548
+ * don't find the result you want among the first 10 results
2549
+ * then set +:offset+ to 10 and look at the next 10 results,
2550
+ * then 20 and so on.
2551
+ * :limit:: Default: 10. This is the number of results you want
2552
+ * returned, also called the page size. Set +:limit+ to
2553
+ * +:all+ to return all results
2554
+ * :sort:: A Sort object or sort string describing how the field
2555
+ * should be sorted. A sort string is made up of field names
2556
+ * which cannot contain spaces and the word "DESC" if you
2557
+ * want the field reversed, all separated by commas. For
2558
+ * example; "rating DESC, author, title". Note that Ferret
2559
+ * will try to determine a field's type by looking at the
2560
+ * first term in the index and seeing if it can be parsed as
2561
+ * an integer or a float. Keep this in mind as you may need
2562
+ * to specify a fields type to sort it correctly. For more
2563
+ * on this, see the documentation for SortField
2564
+ * :filter:: a Filter object to filter the search results with
2565
+ * :filter_proc:: a filter Proc is a Proc which takes the doc_id, the score
2566
+ * and the Searcher object as its parameters and returns a
2567
+ * Boolean value specifying whether the result should be
2568
+ * included in the result set.
2569
+ */
2570
+ static VALUE
2571
+ frt_sea_search(int argc, VALUE *argv, VALUE self)
2572
+ {
2573
+ GET_SEA();
2574
+ VALUE rquery, roptions;
2575
+ Query *query;
2576
+ rb_scan_args(argc, argv, "11", &rquery, &roptions);
2577
+ Data_Get_Struct(rquery, Query, query);
2578
+ return frt_get_td(frt_sea_search_internal(query, roptions, sea), self);
2579
+ }
2580
+
2581
+ /*
2582
+ * call-seq:
2583
+ * searcher.search_each(query, options = {}) {|doc_id, score| do_something}
2584
+ * -> total_hits
2585
+ *
2586
+ * Run a query through the Searcher on the index. A TopDocs object is
2587
+ * returned with the relevant results. The +query+ is a Query object. The
2588
+ * Searcher#search_each method yields the internal document id (used to
2589
+ * reference documents in the Searcher object like this; +searcher[doc_id]+)
2590
+ * and the search score for that document. It is possible for the score to be
2591
+ * greater than 1.0 for some queries and taking boosts into account. This
2592
+ * method will also normalize scores to the range 0.0..1.0 when the max-score
2593
+ * is greater than 1.0. Here are the options;
2594
+ *
2595
+ * === Options
2596
+ *
2597
+ * :offset:: Default: 0. The offset of the start of the section of the
2598
+ * result-set to return. This is used for paging through
2599
+ * results. Let's say you have a page size of 10. If you
2600
+ * don't find the result you want among the first 10 results
2601
+ * then set +:offset+ to 10 and look at the next 10 results,
2602
+ * then 20 and so on.
2603
+ * :limit:: Default: 10. This is the number of results you want
2604
+ * returned, also called the page size. Set +:limit+ to
2605
+ * +:all+ to return all results
2606
+ * :sort:: A Sort object or sort string describing how the field
2607
+ * should be sorted. A sort string is made up of field names
2608
+ * which cannot contain spaces and the word "DESC" if you
2609
+ * want the field reversed, all separated by commas. For
2610
+ * example; "rating DESC, author, title". Note that Ferret
2611
+ * will try to determine a field's type by looking at the
2612
+ * first term in the index and seeing if it can be parsed as
2613
+ * an integer or a float. Keep this in mind as you may need
2614
+ * to specify a fields type to sort it correctly. For more
2615
+ * on this, see the documentation for SortField
2616
+ * :filter:: a Filter object to filter the search results with
2617
+ * :filter_proc:: a filter Proc is a Proc which takes the doc_id, the score
2618
+ * and the Searcher object as its parameters and returns a
2619
+ * Boolean value specifying whether the result should be
2620
+ * included in the result set.
2621
+ */
2622
+ static VALUE
2623
+ frt_sea_search_each(int argc, VALUE *argv, VALUE self)
2624
+ {
2625
+ int i;
2626
+ Query *q;
2627
+ float max_score;
2628
+ TopDocs *td;
2629
+ VALUE rquery, roptions, rtotal_hits;
2630
+ GET_SEA();
2631
+
2632
+ rb_scan_args(argc, argv, "11", &rquery, &roptions);
2633
+
2634
+ //sds rb_thread_critical = Qtrue;
2635
+
2636
+ Data_Get_Struct(rquery, Query, q);
2637
+ td = frt_sea_search_internal(q, roptions, sea);
2638
+
2639
+ max_score = (td->max_score > 1.0) ? td->max_score : 1.0;
2640
+
2641
+ /* yield normalized scores */
2642
+ for (i = 0; i < td->size; i++) {
2643
+ rb_yield_values(2, INT2FIX(td->hits[i]->doc),
2644
+ rb_float_new((double)(td->hits[i]->score/max_score)));
2645
+ }
2646
+
2647
+ rtotal_hits = INT2FIX(td->total_hits);
2648
+ td_destroy(td);
2649
+
2650
+ //sds rb_thread_critical = 0;
2651
+
2652
+ return rtotal_hits;
2653
+ }
2654
+
2655
+ /*
2656
+ * call-seq:
2657
+ * searcher.explain(query, doc_id) -> Explanation
2658
+ *
2659
+ * Create an explanation object to explain the score returned for a
2660
+ * particular document at +doc_id+ in the index for the query +query+.
2661
+ *
2662
+ * Usually used like this;
2663
+ *
2664
+ * puts searcher.explain(query, doc_id).to_s
2665
+ */
2666
+ static VALUE
2667
+ frt_sea_explain(VALUE self, VALUE rquery, VALUE rdoc_id)
2668
+ {
2669
+ GET_SEA();
2670
+ Query *query;
2671
+ Explanation *expl;
2672
+ Data_Get_Struct(rquery, Query, query);
2673
+ expl = sea->explain(sea, query, FIX2INT(rdoc_id));
2674
+ return Data_Wrap_Struct(cExplanation, NULL, &expl_destroy, expl);
2675
+ }
2676
+
2677
+ /*
2678
+ * call-seq:
2679
+ * searcher.highlight(query, doc_id, field, options = {}) -> Array
2680
+ *
2681
+ * Returns an array of strings with the matches highlighted.
2682
+ *
2683
+ * === Options
2684
+ *
2685
+ * :excerpt_length:: Default: 150. Length of excerpt to show. Highlighted
2686
+ * terms will be in the centre of the excerpt. Set to
2687
+ * :all to highlight the entire field.
2688
+ * :num_excerpts:: Default: 2. Number of excerpts to return.
2689
+ * :pre_tag:: Default: "<b>". Tag to place to the left of the match.
2690
+ * You'll probably want to change this to a "<span>" tag
2691
+ * with a class. Try "\033[7m" for use in a terminal.
2692
+ * :post_tag:: Default: "</b>". This tag should close the +:pre_tag+.
2693
+ * Try tag "\033[m" in the terminal.
2694
+ * :ellipsis:: Default: "...". This is the string that is appended at
2695
+ * the beginning and end of excerpts (unless the excerpt
2696
+ * hits the start or end of the field. You'll probably
2697
+ * want to change this so a Unicode ellipsis character.
2698
+ */
2699
+ static VALUE
2700
+ frt_sea_highlight(int argc, VALUE *argv, VALUE self)
2701
+ {
2702
+ GET_SEA();
2703
+ VALUE rquery, rdoc_id, rfield, roptions, v;
2704
+ Query *query;
2705
+ int excerpt_length = 150;
2706
+ int num_excerpts = 2;
2707
+ char *pre_tag = "<b>";
2708
+ char *post_tag = "</b>";
2709
+ char *ellipsis = "...";
2710
+ char **excerpts;
2711
+
2712
+ rb_scan_args(argc, argv, "31", &rquery, &rdoc_id, &rfield, &roptions);
2713
+ Data_Get_Struct(rquery, Query, query);
2714
+ if (argc > 3) {
2715
+ if (TYPE(roptions) != T_HASH) {
2716
+ rb_raise(rb_eArgError, "The fourth argument to Searcher#highlight must be a hash");
2717
+ }
2718
+ if (Qnil != (v = rb_hash_aref(roptions, sym_num_excerpts))) {
2719
+ num_excerpts = FIX2INT(v);
2720
+ }
2721
+ if (Qnil != (v = rb_hash_aref(roptions, sym_excerpt_length))) {
2722
+ if (v == sym_all) {
2723
+ num_excerpts = 1;
2724
+ excerpt_length = INT_MAX/2;
2725
+ }
2726
+ else {
2727
+ excerpt_length = FIX2INT(v);
2728
+ }
2729
+ }
2730
+ if (Qnil != (v = rb_hash_aref(roptions, sym_pre_tag))) {
2731
+ pre_tag = rs2s(rb_obj_as_string(v));
2732
+ }
2733
+ if (Qnil != (v = rb_hash_aref(roptions, sym_post_tag))) {
2734
+ post_tag = rs2s(rb_obj_as_string(v));
2735
+ }
2736
+ if (Qnil != (v = rb_hash_aref(roptions, sym_ellipsis))) {
2737
+ ellipsis = rs2s(rb_obj_as_string(v));
2738
+ }
2739
+ }
2740
+
2741
+ if ((excerpts = searcher_highlight(sea,
2742
+ query,
2743
+ FIX2INT(rdoc_id),
2744
+ frt_field(rfield),
2745
+ excerpt_length,
2746
+ num_excerpts,
2747
+ pre_tag,
2748
+ post_tag,
2749
+ ellipsis)) != NULL) {
2750
+ const int size = ary_size(excerpts);
2751
+ int i;
2752
+ VALUE rexcerpts = rb_ary_new2(size);
2753
+
2754
+ for (i = 0; i < size; i++) {
2755
+ rb_ary_store(rexcerpts, i, rb_str_new2(excerpts[i]));
2756
+ }
2757
+ ary_destroy(excerpts, &free);
2758
+ return rexcerpts;
2759
+ }
2760
+ return Qnil;
2761
+ }
2762
+
2763
+ /****************************************************************************
2764
+ *
2765
+ * Searcher Methods
2766
+ *
2767
+ ****************************************************************************/
2768
+
2769
+ static void
2770
+ frt_sea_mark(void *p)
2771
+ {
2772
+ IndexSearcher *isea = (IndexSearcher *)p;
2773
+ frt_gc_mark(isea->ir);
2774
+ frt_gc_mark(isea->ir->store);
2775
+ }
2776
+
2777
+ #define FRT_GET_IR(rir, ir) do {\
2778
+ rir = Data_Wrap_Struct(cIndexReader, &frt_ir_mark, &frt_ir_free, ir);\
2779
+ object_add(ir, rir);\
2780
+ } while (0)
2781
+
2782
+ /*
2783
+ * call-seq:
2784
+ * Searcher.new(obj) -> Searcher
2785
+ *
2786
+ * Create a new Searcher object. +dir+ can either be a string path to an
2787
+ * index directory on the file-system, an actual Ferret::Store::Directory
2788
+ * object or a Ferret::Index::IndexReader. You should use the IndexReader for
2789
+ * searching multiple indexes. Just open the IndexReader on multiple
2790
+ * directories.
2791
+ */
2792
+ static VALUE
2793
+ frt_sea_init(VALUE self, VALUE obj)
2794
+ {
2795
+ Store *store = NULL;
2796
+ IndexReader *ir = NULL;
2797
+ Searcher *sea;
2798
+ if (TYPE(obj) == T_STRING) {
2799
+ frt_create_dir(obj);
2800
+ store = open_fs_store(StringValueCStr(obj));
2801
+ ir = ir_open(store);
2802
+ DEREF(store);
2803
+ FRT_GET_IR(obj, ir);
2804
+ } else {
2805
+ Check_Type(obj, T_DATA);
2806
+ if (rb_obj_is_kind_of(obj, cDirectory) == Qtrue) {
2807
+ Data_Get_Struct(obj, Store, store);
2808
+ ir = ir_open(store);
2809
+ FRT_GET_IR(obj, ir);
2810
+ } else if (rb_obj_is_kind_of(obj, cIndexReader) == Qtrue) {
2811
+ Data_Get_Struct(obj, IndexReader, ir);
2812
+ } else {
2813
+ rb_raise(rb_eArgError, "Unknown type for argument to IndexSearcher.new");
2814
+ }
2815
+ }
2816
+
2817
+ sea = isea_new(ir);
2818
+ ((IndexSearcher *)sea)->close_ir = false;
2819
+ Frt_Wrap_Struct(self, &frt_sea_mark, &frt_sea_free, sea);
2820
+ object_add(sea, self);
2821
+
2822
+ return self;
2823
+ }
2824
+
2825
+ /****************************************************************************
2826
+ *
2827
+ * MultiSearcher Methods
2828
+ *
2829
+ ****************************************************************************/
2830
+
2831
+ static void
2832
+ frt_ms_free(void *p)
2833
+ {
2834
+ Searcher *sea = (Searcher *)p;
2835
+ MultiSearcher *msea = (MultiSearcher *)sea;
2836
+ free(msea->searchers);
2837
+ object_del(sea);
2838
+ searcher_close(sea);
2839
+ }
2840
+
2841
+ static void
2842
+ frt_ms_mark(void *p)
2843
+ {
2844
+ int i;
2845
+ MultiSearcher *msea = (MultiSearcher *)p;
2846
+ for (i = 0; i < msea->s_cnt; i++) {
2847
+ frt_gc_mark(msea->searchers[i]);
2848
+ }
2849
+ }
2850
+
2851
+ /*
2852
+ * call-seq:
2853
+ * MultiSearcher.new(searcher*) -> searcher
2854
+ *
2855
+ * Create a new MultiSearcher by passing a list of subsearchers to the
2856
+ * constructor.
2857
+ */
2858
+ static VALUE
2859
+ frt_ms_init(int argc, VALUE *argv, VALUE self)
2860
+ {
2861
+ int i, j, top = 0, capa = argc;
2862
+
2863
+ VALUE rsearcher;
2864
+ Searcher **searchers = ALLOC_N(Searcher *, capa);
2865
+ Searcher *s;
2866
+
2867
+ for (i = 0; i < argc; i++) {
2868
+ rsearcher = argv[i];
2869
+ switch (TYPE(rsearcher)) {
2870
+ case T_ARRAY:
2871
+ capa += RARRAY_LEN(rsearcher);
2872
+ REALLOC_N(searchers, Searcher *, capa);
2873
+ for (j = 0; j < RARRAY_LEN(rsearcher); j++) {
2874
+ VALUE rs = RARRAY_PTR(rsearcher)[j];
2875
+ Data_Get_Struct(rs, Searcher, s);
2876
+ searchers[top++] = s;
2877
+ }
2878
+ break;
2879
+ case T_DATA:
2880
+ Data_Get_Struct(rsearcher, Searcher, s);
2881
+ searchers[top++] = s;
2882
+ break;
2883
+ default:
2884
+ rb_raise(rb_eArgError, "Can't add class %s to MultiSearcher",
2885
+ rb_obj_classname(rsearcher));
2886
+ break;
2887
+ }
2888
+ }
2889
+ s = msea_new(searchers, top, false);
2890
+ Frt_Wrap_Struct(self, &frt_ms_mark, &frt_ms_free, s);
2891
+ object_add(s, self);
2892
+ return self;
2893
+ }
2894
+
2895
+ /****************************************************************************
2896
+ *
2897
+ * Init Function
2898
+ *
2899
+ ****************************************************************************/
2900
+
2901
+ /* rdochack
2902
+ cTopDocs = rb_define_class_under(mSearch, "TopDocs", rb_cObject);
2903
+ */
2904
+
2905
+ /*
2906
+ * Document-class: Ferret::Search::Hit
2907
+ *
2908
+ * == Summary
2909
+ *
2910
+ * A hit represents a single document match for a search. It holds the
2911
+ * document id of the document that matches along with the score for the
2912
+ * match. The score is a positive Float value. The score contained in a hit
2913
+ * is not normalized so it can be greater than 1.0. To normalize scores to
2914
+ * the range 0.0..1.0 divide the scores by TopDocs#max_score.
2915
+ */
2916
+ static void
2917
+ Init_Hit(void)
2918
+ {
2919
+ const char *hit_class = "Hit";
2920
+ /* rdochack
2921
+ cHit = rb_define_class_under(mSearch, "Hit", rb_cObject);
2922
+ */
2923
+ cHit = rb_struct_define(hit_class, "doc", "score", NULL);
2924
+ rb_set_class_path(cHit, mSearch, hit_class);
2925
+ rb_const_set(mSearch, rb_intern(hit_class), cHit);
2926
+ id_doc = rb_intern("doc");
2927
+ id_score = rb_intern("score");
2928
+ }
2929
+
2930
+ /*
2931
+ * Document-class: Ferret::Search::TopDocs
2932
+ *
2933
+ * == Summary
2934
+ *
2935
+ * A TopDocs object holds a result set for a search. The number of documents
2936
+ * that matched the query his held in TopDocs#total_hits. The actual
2937
+ * results are in the Array TopDocs#hits. The number of hits returned is
2938
+ * limited by the +:limit+ option so the size of the +hits+ array will not
2939
+ * always be equal to the value of +total_hits+. Finally TopDocs#max_score
2940
+ * holds the maximum score of any match (not necessarily the maximum score
2941
+ * contained in the +hits+ array) so it can be used to normalize scores. For
2942
+ * example, to print doc ids with scores out of 100.0 you could do this;
2943
+ *
2944
+ * top_docs.hits.each do |hit|
2945
+ * puts "#{hit.doc} scored #{hit.score * 100.0 / top_docs.max_score}"
2946
+ * end
2947
+ */
2948
+ static void
2949
+ Init_TopDocs(void)
2950
+ {
2951
+ const char *td_class = "TopDocs";
2952
+ /* rdochack
2953
+ cTopDocs = rb_define_class_under(mSearch, "TopDocs", rb_cObject);
2954
+ */
2955
+ cTopDocs = rb_struct_define(td_class,
2956
+ "total_hits",
2957
+ "hits",
2958
+ "max_score",
2959
+ "searcher",
2960
+ NULL);
2961
+ rb_set_class_path(cTopDocs, mSearch, td_class);
2962
+ rb_const_set(mSearch, rb_intern(td_class), cTopDocs);
2963
+ rb_define_method(cTopDocs, "to_s", frt_td_to_s, -1);
2964
+ rb_define_method(cTopDocs, "to_json", frt_td_to_json, 0);
2965
+ id_hits = rb_intern("hits");
2966
+ id_total_hits = rb_intern("total_hits");
2967
+ id_max_score = rb_intern("max_score");
2968
+ id_searcher = rb_intern("searcher");
2969
+ }
2970
+
2971
+ /*
2972
+ * Document-class: Ferret::Search::Explanation
2973
+ *
2974
+ * == Summary
2975
+ *
2976
+ * Explanation is used to give a description of why a document matched with
2977
+ * the score that it did. Use the Explanation#to_s or Explanation#to_html
2978
+ * methods to display the explanation in a human readable format. Creating
2979
+ * explanations is an expensive operation so it should only be used for
2980
+ * debugging purposes. To create an explanation use the Searcher#explain
2981
+ * method.
2982
+ *
2983
+ * == Example
2984
+ *
2985
+ * puts searcher.explain(query, doc_id).to_s
2986
+ */
2987
+ static void
2988
+ Init_Explanation(void)
2989
+ {
2990
+ cExplanation = rb_define_class_under(mSearch, "Explanation", rb_cObject);
2991
+ rb_define_alloc_func(cExplanation, frt_data_alloc);
2992
+
2993
+ rb_define_method(cExplanation, "to_s", frt_expl_to_s, 0);
2994
+ rb_define_method(cExplanation, "to_html", frt_expl_to_html, 0);
2995
+ rb_define_method(cExplanation, "score", frt_expl_score, 0);
2996
+ }
2997
+
2998
+ /*
2999
+ * Document-class: Ferret::Search::Query
3000
+ *
3001
+ * == Summary
3002
+ *
3003
+ * Abstract class representing a query to the index. There are a number of
3004
+ * concrete Query implementations;
3005
+ *
3006
+ * * TermQuery
3007
+ * * MultiTermQuery
3008
+ * * BooleanQuery
3009
+ * * PhraseQuery
3010
+ * * ConstantScoreQuery
3011
+ * * FilteredQuery
3012
+ * * MatchAllQuery
3013
+ * * RangeQuery
3014
+ * * WildcardQuery
3015
+ * * FuzzyQuery
3016
+ * * PrefixQuery
3017
+ * * Spans::SpanTermQuery
3018
+ * * Spans::SpanFirstQuery
3019
+ * * Spans::SpanOrQuery
3020
+ * * Spans::SpanNotQuery
3021
+ * * Spans::SpanNearQuery
3022
+ *
3023
+ * Explore these classes for the query right for you. The queries are passed
3024
+ * to the Searcher#search* methods.
3025
+ *
3026
+ * === Query Boosts
3027
+ *
3028
+ * Queries have a boost value so that you can make the results of one query
3029
+ * more important than the results of another query when combining them in a
3030
+ * BooleanQuery. For example, documents on Rails. To avoid getting results
3031
+ * for train rails you might also add the tern Ruby but Rails is the more
3032
+ * important term so you'd give it a boost.
3033
+ */
3034
+ static void
3035
+ Init_Query(void)
3036
+ {
3037
+ cQuery = rb_define_class_under(mSearch, "Query", rb_cObject);
3038
+
3039
+ rb_define_method(cQuery, "to_s", frt_q_to_s, -1);
3040
+ rb_define_method(cQuery, "boost", frt_q_get_boost, 0);
3041
+ rb_define_method(cQuery, "boost=", frt_q_set_boost, 1);
3042
+ rb_define_method(cQuery, "eql?", frt_q_eql, 1);
3043
+ rb_define_method(cQuery, "==", frt_q_eql, 1);
3044
+ rb_define_method(cQuery, "hash", frt_q_hash, 0);
3045
+ rb_define_method(cQuery, "terms", frt_q_get_terms, 1);
3046
+ }
3047
+
3048
+ /*
3049
+ * Document-class: Ferret::Search::TermQuery
3050
+ *
3051
+ * == Summary
3052
+ *
3053
+ * TermQuery is the most basic query and it is the building block for most
3054
+ * other queries. It basically matches documents that contain a specific term
3055
+ * in a specific field.
3056
+ *
3057
+ * == Example
3058
+ *
3059
+ * query = TermQuery.new(:content, "rails")
3060
+ *
3061
+ * # untokenized fields can also be searched with this query;
3062
+ * query = TermQuery.new(:title, "Shawshank Redemption")
3063
+ *
3064
+ * Notice the all lowercase term Rails. This is important as most analyzers will
3065
+ * downcase all text added to the index. The title in this case was not
3066
+ * tokenized so the case would have been left as is.
3067
+ */
3068
+ static void
3069
+ Init_TermQuery(void)
3070
+ {
3071
+ cTermQuery = rb_define_class_under(mSearch, "TermQuery", cQuery);
3072
+ rb_define_alloc_func(cTermQuery, frt_data_alloc);
3073
+
3074
+ rb_define_method(cTermQuery, "initialize", frt_tq_init, 2);
3075
+ }
3076
+
3077
+ /*
3078
+ * Document-class: Ferret::Search::MultiTermQuery
3079
+ *
3080
+ * == Summary
3081
+ *
3082
+ * MultiTermQuery matches documents that contain one of a list of terms in a
3083
+ * specific field. This is the basic building block for queries such as;
3084
+ *
3085
+ * * PrefixQuery
3086
+ * * WildcardQuery
3087
+ * * FuzzyQuery
3088
+ *
3089
+ * MultiTermQuery is very similar to a boolean "Or" query. It is highly
3090
+ * optimized though as it focuses on a single field.
3091
+ *
3092
+ * == Example
3093
+ *
3094
+ * multi_term_query = MultiTermQuery.new(:content, :max_term => 10)
3095
+ *
3096
+ * multi_term_query << "Ruby" << "Ferret" << "Rails" << "Search"
3097
+ */
3098
+ static void
3099
+ Init_MultiTermQuery(void)
3100
+ {
3101
+ id_default_max_terms = rb_intern("@@default_max_terms");
3102
+ sym_max_terms = ID2SYM(rb_intern("max_terms"));
3103
+ sym_min_score = ID2SYM(rb_intern("min_score"));
3104
+
3105
+ cMultiTermQuery = rb_define_class_under(mSearch, "MultiTermQuery", cQuery);
3106
+ rb_define_alloc_func(cMultiTermQuery, frt_data_alloc);
3107
+
3108
+ rb_cvar_set(cMultiTermQuery, id_default_max_terms, INT2FIX(512));
3109
+ rb_define_singleton_method(cMultiTermQuery, "default_max_terms",
3110
+ frt_mtq_get_dmt, 0);
3111
+ rb_define_singleton_method(cMultiTermQuery, "default_max_terms=",
3112
+ frt_mtq_set_dmt, 1);
3113
+
3114
+ rb_define_method(cMultiTermQuery, "initialize", frt_mtq_init, -1);
3115
+ rb_define_method(cMultiTermQuery, "add_term", frt_mtq_add_term, -1);
3116
+ rb_define_method(cMultiTermQuery, "<<", frt_mtq_add_term, -1);
3117
+ }
3118
+
3119
+ static void Init_BooleanClause(void);
3120
+
3121
+ /*
3122
+ * Document-class: Ferret::Search::BooleanQuery
3123
+ *
3124
+ * == Summary
3125
+ *
3126
+ * A BooleanQuery is used for combining many queries into one. This is best
3127
+ * illustrated with an example.
3128
+ *
3129
+ * == Example
3130
+ *
3131
+ * Lets say we wanted to find all documents with the term "Ruby" in the
3132
+ * +:title+ and the term "Ferret" in the +:content+ field or the +:title+
3133
+ * field written before January 2006. You could build the query like this.
3134
+ *
3135
+ * tq1 = TermQuery.new(:title, "ruby")
3136
+ * tq21 = TermQuery.new(:title, "ferret")
3137
+ * tq22 = TermQuery.new(:content, "ferret")
3138
+ * bq2 = BooleanQuery.new
3139
+ * bq2 << tq21 << tq22
3140
+ *
3141
+ * rq3 = RangeQuery.new(:written, :< => "200601")
3142
+ *
3143
+ * query = BooleanQuery.new
3144
+ * query.add_query(tq1, :must).add_query(bq2, :must).add_query(rq3, :must)
3145
+ */
3146
+ static void
3147
+ Init_BooleanQuery(void)
3148
+ {
3149
+ cBooleanQuery = rb_define_class_under(mSearch, "BooleanQuery", cQuery);
3150
+ rb_define_alloc_func(cBooleanQuery, frt_data_alloc);
3151
+
3152
+ rb_define_method(cBooleanQuery, "initialize", frt_bq_init, -1);
3153
+ rb_define_method(cBooleanQuery, "add_query", frt_bq_add_query, -1);
3154
+ rb_define_method(cBooleanQuery, "<<", frt_bq_add_query, -1);
3155
+
3156
+ Init_BooleanClause();
3157
+ }
3158
+
3159
+ /*
3160
+ * Document-class: Ferret::Search::BooleanQuery::BooleanClause
3161
+ *
3162
+ * == Summary
3163
+ *
3164
+ * A BooleanClause holes a single query within a BooleanQuery specifying
3165
+ * wither the query +:must+ match, +:should+ match or +:must_not+ match.
3166
+ * BooleanClauses can be used to pass a clause from one BooleanQuery to
3167
+ * another although it is generally easier just to add a query directly to a
3168
+ * BooleanQuery using the BooleanQuery#add_query method.
3169
+ *
3170
+ * == Example
3171
+ *
3172
+ * clause1 = BooleanClause.new(query1, :should)
3173
+ * clause2 = BooleanClause.new(query2, :should)
3174
+ *
3175
+ * query = BooleanQuery.new
3176
+ * query << clause1 << clause2
3177
+ */
3178
+ static void
3179
+ Init_BooleanClause(void)
3180
+ {
3181
+ sym_should = ID2SYM(rb_intern("should"));
3182
+ sym_must = ID2SYM(rb_intern("must"));
3183
+ sym_must_not = ID2SYM(rb_intern("must_not"));
3184
+
3185
+ cBooleanClause = rb_define_class_under(cBooleanQuery, "BooleanClause",
3186
+ rb_cObject);
3187
+ rb_define_alloc_func(cBooleanClause, frt_data_alloc);
3188
+
3189
+ rb_define_method(cBooleanClause, "initialize", frt_bc_init, -1);
3190
+ rb_define_method(cBooleanClause, "query", frt_bc_get_query, 0);
3191
+ rb_define_method(cBooleanClause, "query=", frt_bc_set_query, 1);
3192
+ rb_define_method(cBooleanClause, "required?", frt_bc_is_required, 0);
3193
+ rb_define_method(cBooleanClause, "prohibited?", frt_bc_is_prohibited, 0);
3194
+ rb_define_method(cBooleanClause, "occur=", frt_bc_set_occur, 1);
3195
+ rb_define_method(cBooleanClause, "to_s", frt_bc_to_s, 0);
3196
+ }
3197
+
3198
+ /*
3199
+ * Document-class: Ferret::Search::RangeQuery
3200
+ *
3201
+ * == Summary
3202
+ *
3203
+ * RangeQuery is used to find documents with terms in a range.
3204
+ * RangeQuerys are usually used on untokenized fields like date fields or
3205
+ * number fields.
3206
+ *
3207
+ * == Example
3208
+ *
3209
+ * To find all documents written between January 1st 2006 and January 26th
3210
+ * 2006 inclusive you would write the query like this;
3211
+ *
3212
+ * query = RangeQuery.new(:create_date, :>= "20060101", :<= "20060126")
3213
+ */
3214
+ static void
3215
+ Init_RangeQuery(void)
3216
+ {
3217
+ sym_upper = ID2SYM(rb_intern("upper"));
3218
+ sym_lower = ID2SYM(rb_intern("lower"));
3219
+ sym_upper_exclusive = ID2SYM(rb_intern("upper_exclusive"));
3220
+ sym_lower_exclusive = ID2SYM(rb_intern("lower_exclusive"));
3221
+ sym_include_upper = ID2SYM(rb_intern("include_upper"));
3222
+ sym_include_lower = ID2SYM(rb_intern("include_lower"));
3223
+
3224
+ sym_less_than = ID2SYM(rb_intern("<"));
3225
+ sym_less_than_or_equal_to = ID2SYM(rb_intern("<="));
3226
+ sym_greater_than = ID2SYM(rb_intern(">"));
3227
+ sym_greater_than_or_equal_to = ID2SYM(rb_intern(">="));
3228
+
3229
+ cRangeQuery = rb_define_class_under(mSearch, "RangeQuery", cQuery);
3230
+ rb_define_alloc_func(cRangeQuery, frt_data_alloc);
3231
+
3232
+ rb_define_method(cRangeQuery, "initialize", frt_rq_init, 2);
3233
+ }
3234
+
3235
+ /*
3236
+ * Document-class: Ferret::Search::PhraseQuery
3237
+ *
3238
+ * == Summary
3239
+ *
3240
+ * PhraseQuery matches phrases like "the quick brown fox". Most people are
3241
+ * familiar with phrase queries having used them in most internet search
3242
+ * engines.
3243
+ *
3244
+ * === Slop
3245
+ *
3246
+ * Ferret's phrase queries a slightly more advanced. You can match phrases
3247
+ * with a slop, ie the match isn't exact but it is good enough. The slop is
3248
+ * basically the word edit distance of the phrase. For example, "the quick
3249
+ * brown fox" with a slop of 1 would match "the quick little brown fox". With
3250
+ * a slop of 2 it would match "the brown quick fox".
3251
+ *
3252
+ * query = PhraseQuery.new(:content)
3253
+ * query << "the" << "quick" << "brown" << "fox"
3254
+ *
3255
+ * # matches => "the quick brown fox"
3256
+ *
3257
+ * query.slop = 1
3258
+ * # matches => "the quick little brown fox"
3259
+ * |__1__^
3260
+ *
3261
+ * query.slop = 2
3262
+ * # matches => "the brown quick _____ fox"
3263
+ * ^_____2_____|
3264
+ *
3265
+ * == Multi-PhraseQuery
3266
+ *
3267
+ * Phrase queries can also have multiple terms in a single position. Let's
3268
+ * say for example that we want to match synonyms for quick like "fast" and
3269
+ * "speedy". You could the query like this;
3270
+ *
3271
+ * query = PhraseQuery.new(:content)
3272
+ * query << "the" << ["quick", "fast", "speed"] << ["brown", "red"] << "fox"
3273
+ * # matches => "the quick red fox"
3274
+ * # matches => "the fast brown fox"
3275
+ *
3276
+ * query.slop = 1
3277
+ * # matches => "the speedy little red fox"
3278
+ *
3279
+ * You can also leave positions blank. Lets say you wanted to match "the
3280
+ * quick <> fox" where "<>" could match anything (but not nothing). You'd
3281
+ * build this query like this;
3282
+ *
3283
+ * query = PhraseQuery.new(:content)
3284
+ * query.add_term("the").add_term("quick").add_term("fox", 2)
3285
+ * # matches => "the quick yellow fox"
3286
+ * # matches => "the quick alkgdhaskghaskjdh fox"
3287
+ *
3288
+ * The second parameter to PhraseQuery#add_term is the position increment for
3289
+ * the term. It is one by default meaning that every time you add a term it
3290
+ * is expected to follow the previous term. But setting it to 2 or greater
3291
+ * you are leaving empty spaces in the term.
3292
+ *
3293
+ * There are also so tricks you can do by setting the position increment to
3294
+ * 0. With a little help from your analyzer you can actually tag bold or
3295
+ * italic text for example. If you want more information about this, ask on
3296
+ * the mailing list.
3297
+ */
3298
+ static void
3299
+ Init_PhraseQuery(void)
3300
+ {
3301
+ cPhraseQuery = rb_define_class_under(mSearch, "PhraseQuery", cQuery);
3302
+ rb_define_alloc_func(cPhraseQuery, frt_data_alloc);
3303
+
3304
+ rb_define_method(cPhraseQuery, "initialize", frt_phq_init, -1);
3305
+ rb_define_method(cPhraseQuery, "add_term", frt_phq_add, -1);
3306
+ rb_define_method(cPhraseQuery, "<<", frt_phq_add, -1);
3307
+ rb_define_method(cPhraseQuery, "slop", frt_phq_get_slop, 0);
3308
+ rb_define_method(cPhraseQuery, "slop=", frt_phq_set_slop, 1);
3309
+ }
3310
+
3311
+ /*
3312
+ * Document-class: Ferret::Search::PrefixQuery
3313
+ *
3314
+ * == Summary
3315
+ *
3316
+ * A prefix query is like a TermQuery except that it matches any term with a
3317
+ * specific prefix. PrefixQuery is expanded into a MultiTermQuery when
3318
+ * submitted in a search.
3319
+ *
3320
+ * == Example
3321
+ *
3322
+ * PrefixQuery is very useful for matching a tree structure category
3323
+ * hierarchy. For example, let's say you have the categories;
3324
+ *
3325
+ * "cat1/"
3326
+ * "cat1/sub_cat1"
3327
+ * "cat1/sub_cat2"
3328
+ * "cat2"
3329
+ * "cat2/sub_cat1"
3330
+ * "cat2/sub_cat2"
3331
+ *
3332
+ * Lets say you want to match everything in category 2. You'd build the query
3333
+ * like this;
3334
+ *
3335
+ * query = PrefixQuery.new(:category, "cat2")
3336
+ * # matches => "cat2"
3337
+ * # matches => "cat2/sub_cat1"
3338
+ * # matches => "cat2/sub_cat2"
3339
+ */
3340
+ static void
3341
+ Init_PrefixQuery(void)
3342
+ {
3343
+ cPrefixQuery = rb_define_class_under(mSearch, "PrefixQuery", cQuery);
3344
+ rb_define_alloc_func(cPrefixQuery, frt_data_alloc);
3345
+
3346
+ rb_define_method(cPrefixQuery, "initialize", frt_prq_init, -1);
3347
+ }
3348
+
3349
+ /*
3350
+ * Document-class: Ferret::Search::WildcardQuery
3351
+ *
3352
+ * == Summary
3353
+ *
3354
+ * WildcardQuery is a simple pattern matching query. There are two wild-card
3355
+ * characters.
3356
+ *
3357
+ * * "*" which matches 0 or more characters
3358
+ * * "?" which matches a single character
3359
+ *
3360
+ * == Example
3361
+ *
3362
+ * query = WildcardQuery.new(:field, "h*og")
3363
+ * # matches => "hog"
3364
+ * # matches => "hot dog"
3365
+ *
3366
+ * query = WildcardQuery.new(:field, "fe?t")
3367
+ * # matches => "feat"
3368
+ * # matches => "feet"
3369
+ *
3370
+ * query = WildcardQuery.new(:field, "f?ll*")
3371
+ * # matches => "fill"
3372
+ * # matches => "falling"
3373
+ * # matches => "folly"
3374
+ */
3375
+ static void
3376
+ Init_WildcardQuery(void)
3377
+ {
3378
+ cWildcardQuery = rb_define_class_under(mSearch, "WildcardQuery", cQuery);
3379
+ rb_define_alloc_func(cWildcardQuery, frt_data_alloc);
3380
+
3381
+ rb_define_method(cWildcardQuery, "initialize", frt_wcq_init, -1);
3382
+ }
3383
+
3384
+ /*
3385
+ * Document-class: Ferret::Search::FuzzyQuery
3386
+ *
3387
+ * == Summary
3388
+ *
3389
+ * FuzzyQuery uses the Levenshtein distance formula for measuring the
3390
+ * similarity between two terms. For example, weak and week have one letter
3391
+ * difference and they are four characters long so the simlarity is 75% or
3392
+ * 0.75. You can use this query to match terms that are very close to the
3393
+ * search term.
3394
+ *
3395
+ * == Example
3396
+ *
3397
+ * FuzzyQuery can be quite useful for find documents that wouldn't normally
3398
+ * be found because of typos.
3399
+ *
3400
+ * FuzzyQuery.new(:field, "google",
3401
+ * :min_similarity => 0.6,
3402
+ * :prefix_length => 2)
3403
+ * # matches => "gogle", "goggle", "googol", "googel"
3404
+ */
3405
+ static void
3406
+ Init_FuzzyQuery(void)
3407
+ {
3408
+ id_default_min_similarity = rb_intern("@@default_min_similarity");
3409
+ id_default_prefix_length = rb_intern("@@default_prefix_length");
3410
+
3411
+ sym_min_similarity = ID2SYM(rb_intern("min_similarity"));
3412
+ sym_prefix_length = ID2SYM(rb_intern("prefix_length"));
3413
+
3414
+ cFuzzyQuery = rb_define_class_under(mSearch, "FuzzyQuery", cQuery);
3415
+ rb_define_alloc_func(cFuzzyQuery, frt_data_alloc);
3416
+ rb_cvar_set(cFuzzyQuery, id_default_min_similarity,
3417
+ rb_float_new(0.5));
3418
+ rb_cvar_set(cFuzzyQuery, id_default_prefix_length,
3419
+ INT2FIX(0));
3420
+
3421
+ rb_define_singleton_method(cFuzzyQuery, "default_min_similarity",
3422
+ frt_fq_get_dms, 0);
3423
+ rb_define_singleton_method(cFuzzyQuery, "default_min_similarity=",
3424
+ frt_fq_set_dms, 1);
3425
+ rb_define_singleton_method(cFuzzyQuery, "default_prefix_length",
3426
+ frt_fq_get_dpl, 0);
3427
+ rb_define_singleton_method(cFuzzyQuery, "default_prefix_length=",
3428
+ frt_fq_set_dpl, 1);
3429
+
3430
+ rb_define_method(cFuzzyQuery, "initialize", frt_fq_init, -1);
3431
+ rb_define_method(cFuzzyQuery, "prefix_length", frt_fq_pre_len, 0);
3432
+ rb_define_method(cFuzzyQuery, "min_similarity", frt_fq_min_sim, 0);
3433
+ }
3434
+
3435
+ /*
3436
+ * Document-class: Ferret::Search::MatchAllQuery
3437
+ *
3438
+ * == Summary
3439
+ *
3440
+ * MatchAllQuery matches all documents in the index. You might want use this
3441
+ * query in combination with a filter, however, ConstantScoreQuery is
3442
+ * probably better in that circumstance.
3443
+ */
3444
+ static void
3445
+ Init_MatchAllQuery(void)
3446
+ {
3447
+ cMatchAllQuery = rb_define_class_under(mSearch, "MatchAllQuery", cQuery);
3448
+ rb_define_alloc_func(cMatchAllQuery, frt_maq_alloc);
3449
+
3450
+ rb_define_method(cMatchAllQuery, "initialize", frt_maq_init, 0);
3451
+ }
3452
+
3453
+ /*
3454
+ * Document-class: Ferret::Search::ConstantScoreQuery
3455
+ *
3456
+ * == Summary
3457
+ *
3458
+ * ConstantScoreQuery is a way to turn a Filter into a Query. It matches all
3459
+ * documents that its filter matches with a constant score. This is a very
3460
+ * fast query, particularly when run more than once (since filters are
3461
+ * cached). It is also used internally be RangeQuery.
3462
+ *
3463
+ * == Example
3464
+ *
3465
+ * Let's say for example that you often need to display all documents created
3466
+ * on or after June 1st. You could create a ConstantScoreQuery like this;
3467
+ *
3468
+ * query = ConstantScoreQuery.new(RangeFilter.new(:created_on, :>= => "200606"))
3469
+ *
3470
+ * Once this is run once the results are cached and will be returned very
3471
+ * quickly in future requests.
3472
+ */
3473
+ static void
3474
+ Init_ConstantScoreQuery(void)
3475
+ {
3476
+ cConstantScoreQuery = rb_define_class_under(mSearch,
3477
+ "ConstantScoreQuery", cQuery);
3478
+ rb_define_alloc_func(cConstantScoreQuery, frt_data_alloc);
3479
+
3480
+ rb_define_method(cConstantScoreQuery, "initialize", frt_csq_init, 1);
3481
+ }
3482
+
3483
+ /*
3484
+ * Document-class: Ferret::Search::FilteredQuery
3485
+ *
3486
+ * == Summary
3487
+ *
3488
+ * FilteredQuery offers you a way to apply a filter to a specific query.
3489
+ * The FilteredQuery would then by added to a BooleanQuery to be combined
3490
+ * with other queries. There is not much point in passing a FilteredQuery
3491
+ * directly to a Searcher#search method unless you are applying more than one
3492
+ * filter since the search method also takes a filter as a parameter.
3493
+ */
3494
+ static void
3495
+ Init_FilteredQuery(void)
3496
+ {
3497
+ cFilteredQuery = rb_define_class_under(mSearch, "FilteredQuery", cQuery);
3498
+ rb_define_alloc_func(cFilteredQuery, frt_data_alloc);
3499
+
3500
+ rb_define_method(cFilteredQuery, "initialize", frt_fqq_init, 2);
3501
+ }
3502
+
3503
+ /*
3504
+ * Document-class: Ferret::Search::Spans::SpanTermQuery
3505
+ *
3506
+ * == Summary
3507
+ *
3508
+ * A SpanTermQuery is the Spans version of TermQuery, the only difference
3509
+ * being that it returns the start and end offset of all of its matches for
3510
+ * use by enclosing SpanQueries.
3511
+ */
3512
+ static void
3513
+ Init_SpanTermQuery(void)
3514
+ {
3515
+ cSpanTermQuery = rb_define_class_under(mSpans, "SpanTermQuery", cQuery);
3516
+ rb_define_alloc_func(cSpanTermQuery, frt_data_alloc);
3517
+
3518
+ rb_define_method(cSpanTermQuery, "initialize", frt_spantq_init, 2);
3519
+ }
3520
+
3521
+ /*
3522
+ * Document-class: Ferret::Search::Spans::SpanMultiTermQuery
3523
+ *
3524
+ * == Summary
3525
+ *
3526
+ * A SpanMultiTermQuery is the Spans version of MultiTermQuery, the only
3527
+ * difference being that it returns the start and end offset of all of its
3528
+ * matches for use by enclosing SpanQueries.
3529
+ */
3530
+ static void
3531
+ Init_SpanMultiTermQuery(void)
3532
+ {
3533
+ cSpanMultiTermQuery = rb_define_class_under(mSpans, "SpanMultiTermQuery", cQuery);
3534
+ rb_define_alloc_func(cSpanMultiTermQuery, frt_data_alloc);
3535
+
3536
+ rb_define_method(cSpanMultiTermQuery, "initialize", frt_spanmtq_init, 2);
3537
+ }
3538
+
3539
+ /*
3540
+ * Document-class: Ferret::Search::Spans::SpanPrefixQuery
3541
+ *
3542
+ * == Summary
3543
+ *
3544
+ * A SpanPrefixQuery is the Spans version of PrefixQuery, the only difference
3545
+ * being that it returns the start and end offset of all of its matches for
3546
+ * use by enclosing SpanQueries.
3547
+ */
3548
+ static void
3549
+ Init_SpanPrefixQuery(void)
3550
+ {
3551
+ cSpanPrefixQuery = rb_define_class_under(mSpans, "SpanPrefixQuery", cQuery);
3552
+ rb_define_alloc_func(cSpanPrefixQuery, frt_data_alloc);
3553
+
3554
+ rb_define_method(cSpanPrefixQuery, "initialize", frt_spanprq_init, -1);
3555
+ }
3556
+
3557
+ /*
3558
+ * Document-class: Ferret::Search::Spans::SpanFirstQuery
3559
+ *
3560
+ * == Summary
3561
+ *
3562
+ * A SpanFirstQuery restricts a query to search in the first +end+ bytes of a
3563
+ * field. This is useful since often the most important information in a
3564
+ * document is at the start of the document.
3565
+ *
3566
+ * == Example
3567
+ *
3568
+ * To find all documents where "ferret" is within the first 100 characters
3569
+ * (really bytes);
3570
+ *
3571
+ * query = SpanFirstQuery.new(SpanTermQuery.new(:content, "ferret"), 100)
3572
+ *
3573
+ * == NOTE
3574
+ *
3575
+ * SpanFirstQuery only works with other SpanQueries.
3576
+ */
3577
+ static void
3578
+ Init_SpanFirstQuery(void)
3579
+ {
3580
+ cSpanFirstQuery = rb_define_class_under(mSpans, "SpanFirstQuery", cQuery);
3581
+ rb_define_alloc_func(cSpanFirstQuery, frt_data_alloc);
3582
+
3583
+ rb_define_method(cSpanFirstQuery, "initialize", frt_spanfq_init, 2);
3584
+ }
3585
+
3586
+ /*
3587
+ * Document-class: Ferret::Search::Spans::SpanNearQuery
3588
+ *
3589
+ * == Summary
3590
+ *
3591
+ * A SpanNearQuery is like a combination between a PhraseQuery and a
3592
+ * BooleanQuery. It matches sub-SpanQueries which are added as clauses but
3593
+ * those clauses must occur within a +slop+ edit distance of each other. You
3594
+ * can also specify that clauses must occur +in_order+.
3595
+ *
3596
+ * == Example
3597
+ *
3598
+ * query = SpanNearQuery.new(:slop => 2)
3599
+ * query << SpanTermQuery.new(:field, "quick")
3600
+ * query << SpanTermQuery.new(:field, "brown")
3601
+ * query << SpanTermQuery.new(:field, "fox")
3602
+ * # matches => "quick brown speckled sleepy fox"
3603
+ * |______2______^
3604
+ * # matches => "quick brown speckled fox"
3605
+ * |__1__^
3606
+ * # matches => "brown quick _____ fox"
3607
+ * ^_____2_____|
3608
+ *
3609
+ * query = SpanNearQuery.new(:slop => 2, :in_order => true)
3610
+ * query << SpanTermQuery.new(:field, "quick")
3611
+ * query << SpanTermQuery.new(:field, "brown")
3612
+ * query << SpanTermQuery.new(:field, "fox")
3613
+ * # matches => "quick brown speckled sleepy fox"
3614
+ * |______2______^
3615
+ * # matches => "quick brown speckled fox"
3616
+ * |__1__^
3617
+ * # doesn't match => "brown quick _____ fox"
3618
+ * # not in order ^_____2_____|
3619
+ *
3620
+ * == NOTE
3621
+ *
3622
+ * SpanNearQuery only works with other SpanQueries.
3623
+ */
3624
+ static void
3625
+ Init_SpanNearQuery(void)
3626
+ {
3627
+ sym_slop = ID2SYM(rb_intern("slop"));
3628
+ sym_in_order = ID2SYM(rb_intern("in_order"));
3629
+ sym_clauses = ID2SYM(rb_intern("clauses"));
3630
+
3631
+ cSpanNearQuery = rb_define_class_under(mSpans, "SpanNearQuery", cQuery);
3632
+ rb_define_alloc_func(cSpanNearQuery, frt_data_alloc);
3633
+
3634
+ rb_define_method(cSpanNearQuery, "initialize", frt_spannq_init, -1);
3635
+ rb_define_method(cSpanNearQuery, "add", frt_spannq_add, 1);
3636
+ rb_define_method(cSpanNearQuery, "<<", frt_spannq_add, 1);
3637
+ }
3638
+
3639
+ /*
3640
+ * Document-class: Ferret::Search::Spans::SpanOrQuery
3641
+ *
3642
+ * == Summary
3643
+ *
3644
+ * SpanOrQuery is just like a BooleanQuery with all +:should+ clauses.
3645
+ * However, the difference is that all sub-clauses must be SpanQueries and
3646
+ * the resulting query can then be used within other SpanQueries like
3647
+ * SpanNearQuery.
3648
+ *
3649
+ * == Example
3650
+ *
3651
+ * Combined with SpanNearQuery we can create a multi-PhraseQuery like query;
3652
+ *
3653
+ * quick_query = SpanOrQuery.new()
3654
+ * quick_query << SpanTermQuery.new(:field, "quick")
3655
+ * quick_query << SpanTermQuery.new(:field, "fast")
3656
+ * quick_query << SpanTermQuery.new(:field, "speedy")
3657
+ *
3658
+ * colour_query = SpanOrQuery.new()
3659
+ * colour_query << SpanTermQuery.new(:field, "red")
3660
+ * colour_query << SpanTermQuery.new(:field, "brown")
3661
+ *
3662
+ *
3663
+ * query = SpanNearQuery.new(:slop => 2, :in_order => true)
3664
+ * query << quick_query
3665
+ * query << colour_query
3666
+ * query << SpanTermQuery.new(:field, "fox")
3667
+ * # matches => "quick red speckled sleepy fox"
3668
+ * |______2______^
3669
+ * # matches => "speedy brown speckled fox"
3670
+ * |__1__^
3671
+ * # doesn't match => "brown fast _____ fox"
3672
+ * # not in order ^_____2____|
3673
+ *
3674
+ * == NOTE
3675
+ *
3676
+ * SpanOrQuery only works with other SpanQueries.
3677
+ */
3678
+ static void
3679
+ Init_SpanOrQuery(void)
3680
+ {
3681
+ cSpanOrQuery = rb_define_class_under(mSpans, "SpanOrQuery", cQuery);
3682
+ rb_define_alloc_func(cSpanOrQuery, frt_data_alloc);
3683
+
3684
+ rb_define_method(cSpanOrQuery, "initialize", frt_spanoq_init, -1);
3685
+ rb_define_method(cSpanOrQuery, "add", frt_spanoq_add, 1);
3686
+ rb_define_method(cSpanOrQuery, "<<", frt_spanoq_add, 1);
3687
+ }
3688
+
3689
+ /*
3690
+ * Document-class: Ferret::Search::Spans::SpanNotQuery
3691
+ *
3692
+ * == Summary
3693
+ *
3694
+ * SpanNotQuery is like a BooleanQuery with a +:must_not+ clause. The
3695
+ * difference being that the resulting query can be used in another
3696
+ * SpanQuery.
3697
+ *
3698
+ * == Example
3699
+ *
3700
+ * Let's say you wanted to search for all documents with the term "rails"
3701
+ * near the start but without the term "train" near the start. This would
3702
+ * allow the term "train" to occur later on in the document.
3703
+ *
3704
+ * rails_query = SpanFirstQuery.new(SpanTermQuery.new(:content, "rails"), 100)
3705
+ * train_query = SpanFirstQuery.new(SpanTermQuery.new(:content, "train"), 100)
3706
+ * query = SpanNotQuery.new(rails_query, train_query)
3707
+ *
3708
+ * == NOTE
3709
+ *
3710
+ * SpanOrQuery only works with other SpanQueries.
3711
+ */
3712
+ static void
3713
+ Init_SpanNotQuery(void)
3714
+ {
3715
+ cSpanNotQuery = rb_define_class_under(mSpans, "SpanNotQuery", cQuery);
3716
+ rb_define_alloc_func(cSpanNotQuery, frt_data_alloc);
3717
+
3718
+ rb_define_method(cSpanNotQuery, "initialize", frt_spanxq_init, 2);
3719
+ }
3720
+
3721
+ /* rdoc hack
3722
+ extern VALUE mFerret = rb_define_module("Ferret");
3723
+ extern VALUE mSearch = rb_define_module_under(mFerret, "Search");
3724
+ */
3725
+
3726
+ /*
3727
+ * Document-module: Ferret::Search::Spans
3728
+ *
3729
+ * == Summary
3730
+ *
3731
+ * The Spans module contains a number of SpanQueries. SpanQueries, unlike
3732
+ * regular queries, also return the start and end offsets of all of their
3733
+ * matches so they can be used to limit queries to a certain position in the
3734
+ * field. They are often used in combination to perform special types of
3735
+ * PhraseQuery.
3736
+ */
3737
+ static void
3738
+ Init_Spans(void)
3739
+ {
3740
+ mSpans = rb_define_module_under(mSearch, "Spans");
3741
+ Init_SpanTermQuery();
3742
+ Init_SpanMultiTermQuery();
3743
+ Init_SpanPrefixQuery();
3744
+ Init_SpanFirstQuery();
3745
+ Init_SpanNearQuery();
3746
+ Init_SpanOrQuery();
3747
+ Init_SpanNotQuery();
3748
+ }
3749
+
3750
+ /*
3751
+ * Document-class: Ferret::Search::RangeFilter
3752
+ *
3753
+ * == Summary
3754
+ *
3755
+ * RangeFilter filters a set of documents which contain a lexicographical
3756
+ * range of terms (ie "aaa", "aab", "aac", etc). See also RangeQuery
3757
+ *
3758
+ * == Example
3759
+ *
3760
+ * Find all documents created before 5th of September 2002.
3761
+ *
3762
+ * filter = RangeFilter.new(:created_on, :< => "20020905")
3763
+ */
3764
+ static void
3765
+ Init_RangeFilter(void)
3766
+ {
3767
+ cRangeFilter = rb_define_class_under(mSearch, "RangeFilter", cFilter);
3768
+ frt_mark_cclass(cRangeFilter);
3769
+ rb_define_alloc_func(cRangeFilter, frt_data_alloc);
3770
+
3771
+ rb_define_method(cRangeFilter, "initialize", frt_rf_init, 2);
3772
+ }
3773
+
3774
+ /*
3775
+ * Document-class: Ferret::Search::QueryFilter
3776
+ *
3777
+ * == Summary
3778
+ *
3779
+ * QueryFilter can be used to restrict one queries results by another queries
3780
+ * results, basically "and"ing them together. Of course you could easily use
3781
+ * a BooleanQuery to do this. The reason you may choose to use a QueryFilter
3782
+ * is that Filter results are cached so if you have one query that is often
3783
+ * added to other queries you may want to use a QueryFilter for performance
3784
+ * reasons.
3785
+ *
3786
+ * == Example
3787
+ *
3788
+ * Let's say you have a field +:approved+ which you set to yes when a
3789
+ * document is approved for display. You'll probably want to add a Filter
3790
+ * which filters approved documents to display to your users. This is the
3791
+ * perfect use case for a QueryFilter.
3792
+ *
3793
+ * filter = QueryFilter.new(TermQuery.new(:approved, "yes"))
3794
+ *
3795
+ * Just remember to use the same QueryFilter each time to take advantage of
3796
+ * caching. Don't create a new one for each request. Of course, this won't
3797
+ * work in a CGI application.
3798
+ */
3799
+ static void
3800
+ Init_QueryFilter(void)
3801
+ {
3802
+ cQueryFilter = rb_define_class_under(mSearch, "QueryFilter", cFilter);
3803
+ frt_mark_cclass(cQueryFilter);
3804
+ rb_define_alloc_func(cQueryFilter, frt_data_alloc);
3805
+
3806
+ rb_define_method(cQueryFilter, "initialize", frt_qf_init, 1);
3807
+ }
3808
+
3809
+ /*
3810
+ * Document-class: Ferret::Search::Filter
3811
+ *
3812
+ * == Summary
3813
+ *
3814
+ * A Filter is used to filter query results. It is usually passed to one of
3815
+ * Searcher's search methods however it can also be used inside a
3816
+ * ConstantScoreQuery or a FilteredQuery. To implement your own Filter you
3817
+ * must implement the method #get_bitvector(index_reader) which returns a
3818
+ * BitVector with set bits corresponding to documents that are allowed by
3819
+ * this Filter.
3820
+ *
3821
+ * TODO add support for user implemented Filter.
3822
+ * TODO add example of user implemented Filter.
3823
+ */
3824
+ static void
3825
+ Init_Filter(void)
3826
+ {
3827
+ id_bits = rb_intern("bits");
3828
+ cFilter = rb_define_class_under(mSearch, "Filter", rb_cObject);
3829
+ frt_mark_cclass(cFilter);
3830
+ rb_define_alloc_func(cConstantScoreQuery, frt_data_alloc);
3831
+
3832
+ rb_define_method(cFilter, "bits", frt_f_get_bits, 1);
3833
+ rb_define_method(cFilter, "to_s", frt_f_to_s, 0);
3834
+ }
3835
+
3836
+ /*
3837
+ * Document-class: Ferret::Search::SortField
3838
+ *
3839
+ * == Summary
3840
+ *
3841
+ * A SortField is used to sort the result-set of a search be the contents of
3842
+ * a field. The following types of sort_field are available;
3843
+ *
3844
+ * * :auto
3845
+ * * :integer
3846
+ * * :float
3847
+ * * :string
3848
+ * * :byte
3849
+ * * :doc_id
3850
+ * * :score
3851
+ *
3852
+ * The type of the SortField is set by passing it as a parameter to the
3853
+ * constructor. The +:auto+ type specifies that the SortField should detect
3854
+ * the sort type by looking at the data in the field. This is the default
3855
+ * :type value although it is recommended that you explicitly specify the
3856
+ * fields type.
3857
+ *
3858
+ * == Example
3859
+ *
3860
+ * title_sf = SortField.new(:title, :type => :string)
3861
+ * rating_sf = SortField.new(:rating, :type => float, :reverse => true)
3862
+ *
3863
+ *
3864
+ * Note 1: Care should be taken when using the :auto sort-type since numbers
3865
+ * will occur before other strings in the index so if you are sorting a field
3866
+ * with both numbers and strings (like a title field which might have "24"
3867
+ * and "Prison Break") then the sort_field will think it is sorting integers
3868
+ * when it really should be sorting strings.
3869
+ *
3870
+ * Note 2: When sorting by integer, integers are only 4 bytes so anything
3871
+ * larger will cause strange sorting behaviour.
3872
+ */
3873
+ static void
3874
+ Init_SortField(void)
3875
+ {
3876
+ /* option hash keys for SortField#initialize */
3877
+ sym_type = ID2SYM(rb_intern("type"));
3878
+ sym_reverse = ID2SYM(rb_intern("reverse"));
3879
+ sym_comparator = ID2SYM(rb_intern("comparator"));
3880
+
3881
+ /* Sort types */
3882
+ sym_integer = ID2SYM(rb_intern("integer"));
3883
+ sym_float = ID2SYM(rb_intern("float"));
3884
+ sym_string = ID2SYM(rb_intern("string"));
3885
+ sym_auto = ID2SYM(rb_intern("auto"));
3886
+ sym_doc_id = ID2SYM(rb_intern("doc_id"));
3887
+ sym_score = ID2SYM(rb_intern("score"));
3888
+ sym_byte = ID2SYM(rb_intern("byte"));
3889
+
3890
+ cSortField = rb_define_class_under(mSearch, "SortField", rb_cObject);
3891
+ rb_define_alloc_func(cSortField, frt_data_alloc);
3892
+
3893
+ rb_define_method(cSortField, "initialize", frt_sf_init, -1);
3894
+ rb_define_method(cSortField, "reverse?", frt_sf_is_reverse, 0);
3895
+ rb_define_method(cSortField, "name", frt_sf_get_name, 0);
3896
+ rb_define_method(cSortField, "type", frt_sf_get_type, 0);
3897
+ rb_define_method(cSortField, "comparator", frt_sf_get_comparator, 0);
3898
+ rb_define_method(cSortField, "to_s", frt_sf_to_s, 0);
3899
+
3900
+ rb_define_const(cSortField, "SCORE",
3901
+ Data_Wrap_Struct(cSortField, NULL,
3902
+ &frt_deref_free,
3903
+ (SortField *)&SORT_FIELD_SCORE));
3904
+ object_add((SortField *)&SORT_FIELD_SCORE,
3905
+ rb_const_get(cSortField, rb_intern("SCORE")));
3906
+
3907
+ rb_define_const(cSortField, "SCORE_REV",
3908
+ Data_Wrap_Struct(cSortField, NULL,
3909
+ &frt_deref_free,
3910
+ (SortField *)&SORT_FIELD_SCORE_REV));
3911
+ object_add((SortField *)&SORT_FIELD_SCORE_REV,
3912
+ rb_const_get(cSortField, rb_intern("SCORE_REV")));
3913
+
3914
+ rb_define_const(cSortField, "DOC_ID",
3915
+ Data_Wrap_Struct(cSortField, NULL,
3916
+ &frt_deref_free,
3917
+ (SortField *)&SORT_FIELD_DOC));
3918
+
3919
+ oSORT_FIELD_DOC = rb_const_get(cSortField, rb_intern("DOC_ID"));
3920
+ object_add((SortField *)&SORT_FIELD_DOC, oSORT_FIELD_DOC);
3921
+
3922
+ rb_define_const(cSortField, "DOC_ID_REV",
3923
+ Data_Wrap_Struct(cSortField, NULL,
3924
+ &frt_deref_free,
3925
+ (SortField *)&SORT_FIELD_DOC_REV));
3926
+ object_add((SortField *)&SORT_FIELD_DOC_REV,
3927
+ rb_const_get(cSortField, rb_intern("DOC_ID_REV")));
3928
+ }
3929
+
3930
+ /*
3931
+ * Document-class: Ferret::Search::Sort
3932
+ *
3933
+ * == Summary
3934
+ *
3935
+ * A Sort object is used to combine and apply a list of SortFields. The
3936
+ * SortFields are applied in the order they are added to the SortObject.
3937
+ *
3938
+ * == Example
3939
+ *
3940
+ * Here is how you would create a Sort object that sorts first by rating and
3941
+ * then by title;
3942
+ *
3943
+ * sf_rating = SortField.new(:rating, :type => :float, :reverse => true)
3944
+ * sf_title = SortField.new(:title, :type => :string)
3945
+ * sort = Sort.new([sf_rating, sf_title])
3946
+ *
3947
+ * Remember that the :type parameter for SortField is set to :auto be default
3948
+ * be I strongly recommend you specify a :type value.
3949
+ */
3950
+ static void
3951
+ Init_Sort(void)
3952
+ {
3953
+ /* Sort */
3954
+ cSort = rb_define_class_under(mSearch, "Sort", rb_cObject);
3955
+ rb_define_alloc_func(cSort, frt_sort_alloc);
3956
+
3957
+ rb_define_method(cSort, "initialize", frt_sort_init, -1);
3958
+ rb_define_method(cSort, "fields", frt_sort_get_fields, 0);
3959
+ rb_define_method(cSort, "to_s", frt_sort_to_s, 0);
3960
+
3961
+ rb_define_const(cSort, "RELEVANCE",
3962
+ frt_sort_init(0, NULL, frt_sort_alloc(cSort)));
3963
+ rb_define_const(cSort, "INDEX_ORDER",
3964
+ frt_sort_init(1, &oSORT_FIELD_DOC, frt_sort_alloc(cSort)));
3965
+ }
3966
+
3967
+ /*
3968
+ * Document-class: Ferret::Search::Searcher
3969
+ *
3970
+ * == Summary
3971
+ *
3972
+ * The Searcher class basically performs the task that Ferret was built for.
3973
+ * It searches the index. To search the index the Searcher class wraps an
3974
+ * IndexReader so many of the tasks that you can perform on an IndexReader
3975
+ * are also available on a searcher including, most importantly, accessing
3976
+ * stored documents.
3977
+ *
3978
+ * The main methods that you need to know about when using a Searcher are the
3979
+ * search methods. There is the Searcher#search_each method which iterates
3980
+ * through the results by document id and score and there is the
3981
+ * Searcher#search method which returns a TopDocs object. Another important
3982
+ * difference to note is that the Searcher#search_each method normalizes the
3983
+ * score to a value in the range 0.0..1.0 if the max_score is greater than
3984
+ * 1.0. Searcher#search does not. Apart from that they take the same
3985
+ * parameters and work the same way.
3986
+ *
3987
+ * == Example
3988
+ *
3989
+ * searcher = Searcher.new("/path/to/index")
3990
+ *
3991
+ * searcher.search_each(TermQuery.new(:content, "ferret")
3992
+ * :filter => RangeFilter.new(:date, :< => "2006"),
3993
+ * :sort => "date DESC, title") do |doc_id, score|
3994
+ * puts "#{searcher[doc_id][title] scored #{score}"
3995
+ * end
3996
+ */
3997
+ static void
3998
+ Init_Searcher(void)
3999
+ {
4000
+ /* option hash keys for Searcher#search */
4001
+ sym_offset = ID2SYM(rb_intern("offset"));
4002
+ sym_limit = ID2SYM(rb_intern("limit"));
4003
+ sym_all = ID2SYM(rb_intern("all"));
4004
+ sym_filter = ID2SYM(rb_intern("filter"));
4005
+ sym_filter_proc = ID2SYM(rb_intern("filter_proc"));
4006
+ sym_sort = ID2SYM(rb_intern("sort"));
4007
+
4008
+ sym_excerpt_length = ID2SYM(rb_intern("excerpt_length"));
4009
+ sym_num_excerpts = ID2SYM(rb_intern("num_excerpts"));
4010
+ sym_pre_tag = ID2SYM(rb_intern("pre_tag"));
4011
+ sym_post_tag = ID2SYM(rb_intern("post_tag"));
4012
+ sym_ellipsis = ID2SYM(rb_intern("ellipsis"));
4013
+
4014
+ /* Searcher */
4015
+ cSearcher = rb_define_class_under(mSearch, "Searcher", rb_cObject);
4016
+ rb_define_alloc_func(cSearcher, frt_data_alloc);
4017
+
4018
+ rb_define_method(cSearcher, "initialize", frt_sea_init, 1);
4019
+ rb_define_method(cSearcher, "close", frt_sea_close, 0);
4020
+ rb_define_method(cSearcher, "reader", frt_sea_get_reader, 0);
4021
+ rb_define_method(cSearcher, "doc_freq", frt_sea_doc_freq, 2);
4022
+ rb_define_method(cSearcher, "get_document", frt_sea_doc, 1);
4023
+ rb_define_method(cSearcher, "[]", frt_sea_doc, 1);
4024
+ rb_define_method(cSearcher, "max_doc", frt_sea_max_doc, 0);
4025
+ rb_define_method(cSearcher, "search", frt_sea_search, -1);
4026
+ rb_define_method(cSearcher, "search_each", frt_sea_search_each, -1);
4027
+ rb_define_method(cSearcher, "explain", frt_sea_explain, 2);
4028
+ rb_define_method(cSearcher, "highlight", frt_sea_highlight, -1);
4029
+ }
4030
+
4031
+ /*
4032
+ * Document-class: Ferret::Search::MultiSearcher
4033
+ *
4034
+ * == Summary
4035
+ *
4036
+ * See Searcher for the methods that you can use on this object. A
4037
+ * MultiSearcher is used to search multiple sub-searchers. The most efficient
4038
+ * way to do this would be to open up an IndexReader on multiple directories
4039
+ * and creating a Searcher with that. However, if you decide to implement a
4040
+ * RemoteSearcher, the MultiSearcher can be used to search multiple machines
4041
+ * at once.
4042
+ */
4043
+ static void
4044
+ Init_MultiSearcher(void)
4045
+ {
4046
+ cMultiSearcher = rb_define_class_under(mSearch, "MultiSearcher", cSearcher);
4047
+ rb_define_alloc_func(cMultiSearcher, frt_data_alloc);
4048
+ rb_define_method(cMultiSearcher, "initialize", frt_ms_init, -1);
4049
+ }
4050
+
4051
+ /*
4052
+ * Document-module: Ferret::Search
4053
+ *
4054
+ * == Summary
4055
+ *
4056
+ * The Search module contains all the classes used for searching the index;
4057
+ * what Ferret was designed to do. The important classes to take a look at in
4058
+ * this module are (in order);
4059
+ *
4060
+ * * Query
4061
+ * * Searcher
4062
+ * * Filter
4063
+ * * Sort
4064
+ *
4065
+ * Happy Ferreting!!
4066
+ */
4067
+ void
4068
+ Init_Search(void)
4069
+ {
4070
+ mSearch = rb_define_module_under(mFerret, "Search");
4071
+
4072
+ Init_Hit();
4073
+ Init_TopDocs();
4074
+ Init_Explanation();
4075
+
4076
+ /* Queries */
4077
+ Init_Query();
4078
+
4079
+ Init_TermQuery();
4080
+ Init_MultiTermQuery();
4081
+ Init_BooleanQuery();
4082
+ Init_RangeQuery();
4083
+ Init_PhraseQuery();
4084
+ Init_PrefixQuery();
4085
+ Init_WildcardQuery();
4086
+ Init_FuzzyQuery();
4087
+ Init_MatchAllQuery();
4088
+ Init_ConstantScoreQuery();
4089
+ Init_FilteredQuery();
4090
+
4091
+ Init_Spans();
4092
+
4093
+ /* Filters */
4094
+ Init_Filter();
4095
+ Init_RangeFilter();
4096
+ Init_QueryFilter();
4097
+
4098
+ /* Sorting */
4099
+ Init_SortField(); /* must be before Init_Sort */
4100
+ Init_Sort();
4101
+
4102
+ /* Searchers */
4103
+ Init_Searcher();
4104
+ Init_MultiSearcher();
4105
+ }