jk-ferret 0.11.8.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (228) hide show
  1. data/CHANGELOG +24 -0
  2. data/MIT-LICENSE +20 -0
  3. data/README +90 -0
  4. data/RELEASE_CHANGES +137 -0
  5. data/RELEASE_NOTES +60 -0
  6. data/Rakefile +443 -0
  7. data/TODO +109 -0
  8. data/TUTORIAL +231 -0
  9. data/bin/ferret-browser +79 -0
  10. data/ext/BZLIB_blocksort.c +1094 -0
  11. data/ext/BZLIB_bzlib.c +1578 -0
  12. data/ext/BZLIB_compress.c +672 -0
  13. data/ext/BZLIB_crctable.c +104 -0
  14. data/ext/BZLIB_decompress.c +626 -0
  15. data/ext/BZLIB_huffman.c +205 -0
  16. data/ext/BZLIB_randtable.c +84 -0
  17. data/ext/STEMMER_api.c +66 -0
  18. data/ext/STEMMER_libstemmer.c +93 -0
  19. data/ext/STEMMER_stem_ISO_8859_1_danish.c +337 -0
  20. data/ext/STEMMER_stem_ISO_8859_1_dutch.c +624 -0
  21. data/ext/STEMMER_stem_ISO_8859_1_english.c +1117 -0
  22. data/ext/STEMMER_stem_ISO_8859_1_finnish.c +762 -0
  23. data/ext/STEMMER_stem_ISO_8859_1_french.c +1246 -0
  24. data/ext/STEMMER_stem_ISO_8859_1_german.c +503 -0
  25. data/ext/STEMMER_stem_ISO_8859_1_hungarian.c +1230 -0
  26. data/ext/STEMMER_stem_ISO_8859_1_italian.c +1065 -0
  27. data/ext/STEMMER_stem_ISO_8859_1_norwegian.c +297 -0
  28. data/ext/STEMMER_stem_ISO_8859_1_porter.c +749 -0
  29. data/ext/STEMMER_stem_ISO_8859_1_portuguese.c +1017 -0
  30. data/ext/STEMMER_stem_ISO_8859_1_spanish.c +1093 -0
  31. data/ext/STEMMER_stem_ISO_8859_1_swedish.c +307 -0
  32. data/ext/STEMMER_stem_ISO_8859_2_romanian.c +998 -0
  33. data/ext/STEMMER_stem_KOI8_R_russian.c +700 -0
  34. data/ext/STEMMER_stem_UTF_8_danish.c +339 -0
  35. data/ext/STEMMER_stem_UTF_8_dutch.c +634 -0
  36. data/ext/STEMMER_stem_UTF_8_english.c +1125 -0
  37. data/ext/STEMMER_stem_UTF_8_finnish.c +768 -0
  38. data/ext/STEMMER_stem_UTF_8_french.c +1256 -0
  39. data/ext/STEMMER_stem_UTF_8_german.c +509 -0
  40. data/ext/STEMMER_stem_UTF_8_hungarian.c +1234 -0
  41. data/ext/STEMMER_stem_UTF_8_italian.c +1073 -0
  42. data/ext/STEMMER_stem_UTF_8_norwegian.c +299 -0
  43. data/ext/STEMMER_stem_UTF_8_porter.c +755 -0
  44. data/ext/STEMMER_stem_UTF_8_portuguese.c +1023 -0
  45. data/ext/STEMMER_stem_UTF_8_romanian.c +1004 -0
  46. data/ext/STEMMER_stem_UTF_8_russian.c +694 -0
  47. data/ext/STEMMER_stem_UTF_8_spanish.c +1097 -0
  48. data/ext/STEMMER_stem_UTF_8_swedish.c +309 -0
  49. data/ext/STEMMER_stem_UTF_8_turkish.c +2205 -0
  50. data/ext/STEMMER_utilities.c +478 -0
  51. data/ext/analysis.c +1710 -0
  52. data/ext/analysis.h +266 -0
  53. data/ext/api.h +26 -0
  54. data/ext/array.c +125 -0
  55. data/ext/array.h +62 -0
  56. data/ext/bitvector.c +96 -0
  57. data/ext/bitvector.h +594 -0
  58. data/ext/bzlib.h +282 -0
  59. data/ext/bzlib_private.h +503 -0
  60. data/ext/compound_io.c +384 -0
  61. data/ext/config.h +52 -0
  62. data/ext/document.c +159 -0
  63. data/ext/document.h +63 -0
  64. data/ext/except.c +102 -0
  65. data/ext/except.h +176 -0
  66. data/ext/extconf.rb +15 -0
  67. data/ext/ferret.c +416 -0
  68. data/ext/ferret.h +94 -0
  69. data/ext/field_index.c +262 -0
  70. data/ext/field_index.h +52 -0
  71. data/ext/filter.c +157 -0
  72. data/ext/fs_store.c +493 -0
  73. data/ext/global.c +458 -0
  74. data/ext/global.h +302 -0
  75. data/ext/hash.c +524 -0
  76. data/ext/hash.h +515 -0
  77. data/ext/hashset.c +192 -0
  78. data/ext/hashset.h +215 -0
  79. data/ext/header.h +58 -0
  80. data/ext/helper.c +63 -0
  81. data/ext/helper.h +21 -0
  82. data/ext/index.c +6804 -0
  83. data/ext/index.h +935 -0
  84. data/ext/internal.h +1019 -0
  85. data/ext/lang.c +10 -0
  86. data/ext/lang.h +68 -0
  87. data/ext/libstemmer.h +79 -0
  88. data/ext/mempool.c +88 -0
  89. data/ext/mempool.h +43 -0
  90. data/ext/modules.h +190 -0
  91. data/ext/multimapper.c +351 -0
  92. data/ext/multimapper.h +60 -0
  93. data/ext/posh.c +1006 -0
  94. data/ext/posh.h +973 -0
  95. data/ext/priorityqueue.c +149 -0
  96. data/ext/priorityqueue.h +155 -0
  97. data/ext/q_boolean.c +1621 -0
  98. data/ext/q_const_score.c +162 -0
  99. data/ext/q_filtered_query.c +212 -0
  100. data/ext/q_fuzzy.c +280 -0
  101. data/ext/q_match_all.c +149 -0
  102. data/ext/q_multi_term.c +673 -0
  103. data/ext/q_parser.c +3103 -0
  104. data/ext/q_phrase.c +1206 -0
  105. data/ext/q_prefix.c +98 -0
  106. data/ext/q_range.c +682 -0
  107. data/ext/q_span.c +2390 -0
  108. data/ext/q_term.c +337 -0
  109. data/ext/q_wildcard.c +167 -0
  110. data/ext/r_analysis.c +2626 -0
  111. data/ext/r_index.c +3468 -0
  112. data/ext/r_qparser.c +635 -0
  113. data/ext/r_search.c +4490 -0
  114. data/ext/r_store.c +513 -0
  115. data/ext/r_utils.c +1131 -0
  116. data/ext/ram_store.c +476 -0
  117. data/ext/scanner.c +895 -0
  118. data/ext/scanner.h +36 -0
  119. data/ext/scanner_mb.c +6701 -0
  120. data/ext/scanner_utf8.c +4415 -0
  121. data/ext/search.c +1864 -0
  122. data/ext/search.h +953 -0
  123. data/ext/similarity.c +151 -0
  124. data/ext/similarity.h +89 -0
  125. data/ext/sort.c +786 -0
  126. data/ext/stem_ISO_8859_1_danish.h +16 -0
  127. data/ext/stem_ISO_8859_1_dutch.h +16 -0
  128. data/ext/stem_ISO_8859_1_english.h +16 -0
  129. data/ext/stem_ISO_8859_1_finnish.h +16 -0
  130. data/ext/stem_ISO_8859_1_french.h +16 -0
  131. data/ext/stem_ISO_8859_1_german.h +16 -0
  132. data/ext/stem_ISO_8859_1_hungarian.h +16 -0
  133. data/ext/stem_ISO_8859_1_italian.h +16 -0
  134. data/ext/stem_ISO_8859_1_norwegian.h +16 -0
  135. data/ext/stem_ISO_8859_1_porter.h +16 -0
  136. data/ext/stem_ISO_8859_1_portuguese.h +16 -0
  137. data/ext/stem_ISO_8859_1_spanish.h +16 -0
  138. data/ext/stem_ISO_8859_1_swedish.h +16 -0
  139. data/ext/stem_ISO_8859_2_romanian.h +16 -0
  140. data/ext/stem_KOI8_R_russian.h +16 -0
  141. data/ext/stem_UTF_8_danish.h +16 -0
  142. data/ext/stem_UTF_8_dutch.h +16 -0
  143. data/ext/stem_UTF_8_english.h +16 -0
  144. data/ext/stem_UTF_8_finnish.h +16 -0
  145. data/ext/stem_UTF_8_french.h +16 -0
  146. data/ext/stem_UTF_8_german.h +16 -0
  147. data/ext/stem_UTF_8_hungarian.h +16 -0
  148. data/ext/stem_UTF_8_italian.h +16 -0
  149. data/ext/stem_UTF_8_norwegian.h +16 -0
  150. data/ext/stem_UTF_8_porter.h +16 -0
  151. data/ext/stem_UTF_8_portuguese.h +16 -0
  152. data/ext/stem_UTF_8_romanian.h +16 -0
  153. data/ext/stem_UTF_8_russian.h +16 -0
  154. data/ext/stem_UTF_8_spanish.h +16 -0
  155. data/ext/stem_UTF_8_swedish.h +16 -0
  156. data/ext/stem_UTF_8_turkish.h +16 -0
  157. data/ext/stopwords.c +410 -0
  158. data/ext/store.c +698 -0
  159. data/ext/store.h +799 -0
  160. data/ext/symbol.c +10 -0
  161. data/ext/symbol.h +23 -0
  162. data/ext/term_vectors.c +73 -0
  163. data/ext/threading.h +31 -0
  164. data/ext/win32.h +62 -0
  165. data/lib/ferret.rb +30 -0
  166. data/lib/ferret/browser.rb +246 -0
  167. data/lib/ferret/browser/s/global.js +192 -0
  168. data/lib/ferret/browser/s/style.css +148 -0
  169. data/lib/ferret/browser/views/document/list.rhtml +49 -0
  170. data/lib/ferret/browser/views/document/show.rhtml +27 -0
  171. data/lib/ferret/browser/views/error/index.rhtml +7 -0
  172. data/lib/ferret/browser/views/help/index.rhtml +8 -0
  173. data/lib/ferret/browser/views/home/index.rhtml +29 -0
  174. data/lib/ferret/browser/views/layout.rhtml +22 -0
  175. data/lib/ferret/browser/views/term-vector/index.rhtml +4 -0
  176. data/lib/ferret/browser/views/term/index.rhtml +199 -0
  177. data/lib/ferret/browser/views/term/termdocs.rhtml +1 -0
  178. data/lib/ferret/browser/webrick.rb +14 -0
  179. data/lib/ferret/document.rb +130 -0
  180. data/lib/ferret/field_infos.rb +44 -0
  181. data/lib/ferret/field_symbol.rb +87 -0
  182. data/lib/ferret/index.rb +973 -0
  183. data/lib/ferret/number_tools.rb +157 -0
  184. data/lib/ferret/version.rb +3 -0
  185. data/setup.rb +1555 -0
  186. data/test/long_running/largefile/tc_largefile.rb +46 -0
  187. data/test/test_all.rb +5 -0
  188. data/test/test_helper.rb +29 -0
  189. data/test/test_installed.rb +1 -0
  190. data/test/threading/number_to_spoken.rb +132 -0
  191. data/test/threading/thread_safety_index_test.rb +88 -0
  192. data/test/threading/thread_safety_read_write_test.rb +73 -0
  193. data/test/threading/thread_safety_test.rb +133 -0
  194. data/test/unit/analysis/tc_analyzer.rb +550 -0
  195. data/test/unit/analysis/tc_token_stream.rb +653 -0
  196. data/test/unit/index/tc_index.rb +867 -0
  197. data/test/unit/index/tc_index_reader.rb +699 -0
  198. data/test/unit/index/tc_index_writer.rb +447 -0
  199. data/test/unit/index/th_doc.rb +332 -0
  200. data/test/unit/query_parser/tc_query_parser.rb +238 -0
  201. data/test/unit/search/tc_filter.rb +156 -0
  202. data/test/unit/search/tc_fuzzy_query.rb +147 -0
  203. data/test/unit/search/tc_index_searcher.rb +67 -0
  204. data/test/unit/search/tc_multi_searcher.rb +128 -0
  205. data/test/unit/search/tc_multiple_search_requests.rb +58 -0
  206. data/test/unit/search/tc_search_and_sort.rb +179 -0
  207. data/test/unit/search/tc_sort.rb +49 -0
  208. data/test/unit/search/tc_sort_field.rb +27 -0
  209. data/test/unit/search/tc_spans.rb +190 -0
  210. data/test/unit/search/tm_searcher.rb +436 -0
  211. data/test/unit/store/tc_fs_store.rb +115 -0
  212. data/test/unit/store/tc_ram_store.rb +35 -0
  213. data/test/unit/store/tm_store.rb +34 -0
  214. data/test/unit/store/tm_store_lock.rb +68 -0
  215. data/test/unit/tc_document.rb +81 -0
  216. data/test/unit/tc_field_symbol.rb +26 -0
  217. data/test/unit/ts_analysis.rb +2 -0
  218. data/test/unit/ts_index.rb +2 -0
  219. data/test/unit/ts_largefile.rb +4 -0
  220. data/test/unit/ts_query_parser.rb +2 -0
  221. data/test/unit/ts_search.rb +2 -0
  222. data/test/unit/ts_store.rb +2 -0
  223. data/test/unit/ts_utils.rb +2 -0
  224. data/test/unit/utils/tc_bit_vector.rb +295 -0
  225. data/test/unit/utils/tc_number_tools.rb +117 -0
  226. data/test/unit/utils/tc_priority_queue.rb +106 -0
  227. data/test/utils/content_generator.rb +226 -0
  228. metadata +319 -0
data/ext/r_search.c ADDED
@@ -0,0 +1,4490 @@
1
+ #include "ferret.h"
2
+ #ifdef FRT_RUBY_VERSION_1_9
3
+ # include <ruby/st.h>
4
+ #else
5
+ # include <st.h>
6
+ # include <rubysig.h>
7
+ #endif
8
+ #include <ctype.h>
9
+ #include <array.h>
10
+ #include "search.h"
11
+
12
+ VALUE mSearch;
13
+
14
+ static VALUE cHit;
15
+ static VALUE cTopDocs;
16
+ static VALUE cExplanation;
17
+ static VALUE cSearcher;
18
+ static VALUE cMultiSearcher;
19
+ static VALUE cSortField;
20
+ static VALUE cSort;
21
+
22
+ /* Queries */
23
+ static VALUE cQuery;
24
+ static VALUE cTermQuery;
25
+ static VALUE cMultiTermQuery;
26
+ static VALUE cBooleanQuery;
27
+ static VALUE cBooleanClause;
28
+ static VALUE cRangeQuery;
29
+ static VALUE cTypedRangeQuery;
30
+ static VALUE cPhraseQuery;
31
+ static VALUE cPrefixQuery;
32
+ static VALUE cWildcardQuery;
33
+ static VALUE cFuzzyQuery;
34
+ static VALUE cMatchAllQuery;
35
+ static VALUE cConstantScoreQuery;
36
+ static VALUE cFilteredQuery;
37
+ static VALUE cSpanTermQuery;
38
+ static VALUE cSpanMultiTermQuery;
39
+ static VALUE cSpanPrefixQuery;
40
+ static VALUE cSpanFirstQuery;
41
+ static VALUE cSpanNearQuery;
42
+ static VALUE cSpanOrQuery;
43
+ static VALUE cSpanNotQuery;
44
+
45
+ /* Filters */
46
+ static ID id_bits;
47
+ static VALUE cFilter;
48
+ static VALUE cRangeFilter;
49
+ static VALUE cTypedRangeFilter;
50
+ static VALUE cQueryFilter;
51
+
52
+ /* MultiTermQuery */
53
+ static ID id_default_max_terms;
54
+ static VALUE sym_max_terms;
55
+ static VALUE sym_min_score;
56
+
57
+ /** Option hash keys **/
58
+ /* BooleanClause */
59
+ static VALUE sym_should;
60
+ static VALUE sym_must;
61
+ static VALUE sym_must_not;
62
+
63
+ /* RangeQuery */
64
+ static VALUE sym_upper;
65
+ static VALUE sym_lower;
66
+ static VALUE sym_include_upper;
67
+ static VALUE sym_include_lower;
68
+ static VALUE sym_upper_exclusive;
69
+ static VALUE sym_lower_exclusive;
70
+
71
+ static VALUE sym_less_than;
72
+ static VALUE sym_less_than_or_equal_to;
73
+ static VALUE sym_greater_than;
74
+ static VALUE sym_greater_than_or_equal_to;
75
+
76
+ /* FuzzyQuery */
77
+ static VALUE sym_min_similarity;
78
+ static VALUE sym_prefix_length;
79
+
80
+ /* SpanNearQuery */
81
+ static VALUE sym_slop;
82
+ static VALUE sym_in_order;
83
+ static VALUE sym_clauses;
84
+
85
+ /* Class variable ids */
86
+ static ID id_default_min_similarity;
87
+ static ID id_default_prefix_length;
88
+
89
+
90
+ /** Sort **/
91
+ static VALUE oSORT_FIELD_DOC;
92
+
93
+ /* Sort types */
94
+ static VALUE sym_integer;
95
+ static VALUE sym_float;
96
+ static VALUE sym_string;
97
+ static VALUE sym_auto;
98
+ static VALUE sym_doc_id;
99
+ static VALUE sym_score;
100
+ static VALUE sym_byte;
101
+
102
+ /* Sort params */
103
+ static VALUE sym_type;
104
+ static VALUE sym_reverse;
105
+ static VALUE sym_comparator;
106
+
107
+ /* Hits */
108
+ static ID id_doc;
109
+ static ID id_score;
110
+
111
+ /* TopDocs */
112
+ static ID id_hits;
113
+ static ID id_total_hits;
114
+ static ID id_max_score;
115
+ static ID id_searcher;
116
+
117
+ /* Search */
118
+ static VALUE sym_offset;
119
+ static VALUE sym_limit;
120
+ static VALUE sym_start_doc;
121
+ static VALUE sym_all;
122
+ static VALUE sym_sort;
123
+ static VALUE sym_filter;
124
+ static VALUE sym_filter_proc;
125
+ static VALUE sym_c_filter_proc;
126
+
127
+ static VALUE sym_excerpt_length;
128
+ static VALUE sym_num_excerpts;
129
+ static VALUE sym_pre_tag;
130
+ static VALUE sym_post_tag;
131
+ static VALUE sym_ellipsis;
132
+
133
+ static Symbol fsym_id;
134
+
135
+ extern VALUE cIndexReader;
136
+ extern void frb_ir_free(void *p);
137
+ extern void frb_ir_mark(void *p);
138
+
139
+ extern void frb_set_term(VALUE rterm, Term *t);
140
+ extern VALUE frb_get_analyzer(Analyzer *a);
141
+ extern HashSet *frb_get_fields(VALUE rfields);
142
+ extern Analyzer *frb_get_cwrapped_analyzer(VALUE ranalyzer);
143
+ extern VALUE frb_get_lazy_doc(LazyDoc *lazy_doc);
144
+
145
+ /****************************************************************************
146
+ *
147
+ * Hit Methods
148
+ *
149
+ ****************************************************************************/
150
+
151
+ static VALUE
152
+ frb_get_hit(Hit *hit)
153
+ {
154
+ return rb_struct_new(cHit,
155
+ INT2FIX(hit->doc),
156
+ rb_float_new((double)hit->score),
157
+ NULL);
158
+ }
159
+
160
+ /****************************************************************************
161
+ *
162
+ * TopDocs Methods
163
+ *
164
+ ****************************************************************************/
165
+
166
+ static VALUE
167
+ frb_get_td(TopDocs *td, VALUE rsearcher)
168
+ {
169
+ int i;
170
+ VALUE rtop_docs;
171
+ VALUE hit_ary = rb_ary_new2(td->size);
172
+
173
+ for (i = 0; i < td->size; i++) {
174
+ rb_ary_store(hit_ary, i, frb_get_hit(td->hits[i]));
175
+ }
176
+
177
+ rtop_docs = rb_struct_new(cTopDocs,
178
+ INT2FIX(td->total_hits),
179
+ hit_ary,
180
+ rb_float_new((double)td->max_score),
181
+ rsearcher,
182
+ NULL);
183
+ td_destroy(td);
184
+ return rtop_docs;
185
+ }
186
+
187
+ /*
188
+ * call-seq:
189
+ * top_doc.to_s(field = :id) -> string
190
+ *
191
+ * Returns a string representation of the top_doc in readable format.
192
+ */
193
+ static VALUE
194
+ frb_td_to_s(int argc, VALUE *argv, VALUE self)
195
+ {
196
+ int i;
197
+ VALUE rhits = rb_funcall(self, id_hits, 0);
198
+ Searcher *sea = (Searcher *)DATA_PTR(rb_funcall(self, id_searcher, 0));
199
+ const int len = RARRAY_LEN(rhits);
200
+ int capa = len * 64 + 100;
201
+ int p = 0;
202
+ char *str = ALLOC_N(char, len * 64 + 100);
203
+ Symbol field = fsym_id;
204
+ VALUE rstr;
205
+
206
+ if (argc) {
207
+ field = frb_field(argv[0]);
208
+ }
209
+
210
+ sprintf(str, "TopDocs: total_hits = %ld, max_score = %f [\n",
211
+ FIX2INT(rb_funcall(self, id_total_hits, 0)),
212
+ NUM2DBL(rb_funcall(self, id_max_score, 0)));
213
+ p = (int)strlen(str);
214
+
215
+ for (i = 0; i < len; i++) {
216
+ VALUE rhit = RARRAY_PTR(rhits)[i];
217
+ int doc_id = FIX2INT(rb_funcall(rhit, id_doc, 0));
218
+ char *value = "";
219
+ size_t value_len = 0;
220
+ LazyDoc *lzd = sea->get_lazy_doc(sea, doc_id);
221
+ LazyDocField *lzdf = lazy_doc_get(lzd, field);
222
+ if (NULL != lzdf) {
223
+ value = lazy_df_get_data(lzdf, 0);
224
+ value_len = strlen(value);
225
+ }
226
+ if (p + value_len + 64 > capa) {
227
+ capa += (value_len + 64) * (len - i);
228
+ REALLOC_N(str, char, capa);
229
+ }
230
+
231
+ sprintf(str + p, "\t%d \"%s\": %0.5f\n", doc_id, value,
232
+ NUM2DBL(rb_funcall(rhit, id_score, 0)));
233
+ p += strlen(str + p);
234
+ lazy_doc_close(lzd);
235
+ }
236
+
237
+ sprintf(str + p, "]\n");
238
+ rstr = rb_str_new2(str);
239
+ free(str);
240
+ return rstr;
241
+ }
242
+
243
+ static INLINE char *
244
+ frb_lzd_load_to_json(LazyDoc *lzd, char **str, char *s, int *slen)
245
+ {
246
+ int i, j;
247
+ int diff = s - *str;
248
+ int len = diff, l;
249
+ LazyDocField *f;
250
+
251
+ for (i = 0; i < lzd->size; i++) {
252
+ f = lzd->fields[i];
253
+ /* 3 times length of field to make space for quoted quotes ('"') and
254
+ * 4 times field elements to make space for '"' around fields and ','
255
+ * between fields. Add 100 for '[', ']' and good safety.
256
+ */
257
+ len += sym_len(f->name) + f->len * 3 + 100 + 4 * f->size;
258
+ }
259
+
260
+ if (len > *slen) {
261
+ while (len > *slen) *slen = *slen << 1;
262
+ REALLOC_N(*str, char, *slen);
263
+ s = *str + diff;
264
+ }
265
+
266
+ for (i = 0; i < lzd->size; i++) {
267
+ const char *field_name;
268
+ f = lzd->fields[i];
269
+ field_name = S(f->name);
270
+ if (i) *(s++) = ',';
271
+ *(s++) = '"';
272
+ l = strlen(field_name);
273
+ memcpy(s, field_name, l);
274
+ s += l;
275
+ *(s++) = '"';
276
+ *(s++) = ':';
277
+ if (f->size > 1) *(s++) = '[';
278
+ for (j = 0; j < f->size; j++) {
279
+ if (j) *(s++) = ',';
280
+ s = json_concat_string(s, lazy_df_get_data(f, j));
281
+ }
282
+ if (f->size > 1) *(s++) = ']';
283
+ }
284
+ return s;
285
+ }
286
+
287
+ /*
288
+ * call-seq:
289
+ * top_doc.to_json() -> string
290
+ *
291
+ * Returns a json representation of the top_doc.
292
+ */
293
+ static VALUE
294
+ frb_td_to_json(VALUE self)
295
+ {
296
+ int i;
297
+ VALUE rhits = rb_funcall(self, id_hits, 0);
298
+ VALUE rhit;
299
+ LazyDoc *lzd;
300
+ Searcher *sea = (Searcher *)DATA_PTR(rb_funcall(self, id_searcher, 0));
301
+ const int num_hits = RARRAY_LEN(rhits);
302
+ int doc_id;
303
+ int len = 32768;
304
+ char *str = ALLOC_N(char, len);
305
+ char *s = str;
306
+ VALUE rstr;
307
+
308
+ *(s++) = '[';
309
+ for (i = 0; i < num_hits; i++) {
310
+ if (i) *(s++) = ',';
311
+ *(s++) = '{';
312
+ rhit = RARRAY_PTR(rhits)[i];
313
+ doc_id = FIX2INT(rb_funcall(rhit, id_doc, 0));
314
+ lzd = sea->get_lazy_doc(sea, doc_id);
315
+ s = frb_lzd_load_to_json(lzd, &str, s, &len);
316
+ lazy_doc_close(lzd);
317
+ *(s++) = '}';
318
+ }
319
+ *(s++) = ']';
320
+ *(s++) = '\0';
321
+ rstr = rb_str_new2(str);
322
+ free(str);
323
+ return rstr;
324
+ }
325
+
326
+
327
+ /****************************************************************************
328
+ *
329
+ * Explanation Methods
330
+ *
331
+ ****************************************************************************/
332
+
333
+ #define GET_EXPL() Explanation *expl = (Explanation *)DATA_PTR(self)
334
+
335
+ /*
336
+ * call-seq:
337
+ * explanation.to_s -> string
338
+ *
339
+ * Returns a string representation of the explanation in readable format.
340
+ */
341
+ static VALUE
342
+ frb_expl_to_s(VALUE self)
343
+ {
344
+ GET_EXPL();
345
+ char *str = expl_to_s(expl);
346
+ VALUE rstr = rb_str_new2(str);
347
+ free(str);
348
+ return rstr;
349
+ }
350
+
351
+ /*
352
+ * call-seq:
353
+ * explanation.to_html -> string
354
+ *
355
+ * Returns an html representation of the explanation in readable format.
356
+ */
357
+ static VALUE
358
+ frb_expl_to_html(VALUE self)
359
+ {
360
+ GET_EXPL();
361
+ char *str = expl_to_html(expl);
362
+ VALUE rstr = rb_str_new2(str);
363
+ free(str);
364
+ return rstr;
365
+ }
366
+
367
+ /*
368
+ * call-seq:
369
+ * explanation.score -> float
370
+ *
371
+ * Returns the score represented by the query. This can be used for debugging
372
+ * purposes mainly to check that the score returned by the explanation
373
+ * matches that of the score for the document in the original query.
374
+ */
375
+ static VALUE
376
+ frb_expl_score(VALUE self)
377
+ {
378
+ GET_EXPL();
379
+ return rb_float_new((double)expl->value);
380
+ }
381
+
382
+ /****************************************************************************
383
+ *
384
+ * Query Methods
385
+ *
386
+ ****************************************************************************/
387
+
388
+ static void
389
+ frb_q_free(void *p)
390
+ {
391
+ object_del(p);
392
+ q_deref((Query *)p);
393
+ }
394
+
395
+ #define GET_Q() Query *q = (Query *)DATA_PTR(self)
396
+
397
+ /*
398
+ * call-seq:
399
+ * query.to_s -> string
400
+ *
401
+ * Return a string representation of the query. Most of the time, passing
402
+ * this string through the Query parser will give you the exact Query you
403
+ * began with. This can be a good way to explore how the QueryParser works.
404
+ */
405
+ static VALUE
406
+ frb_q_to_s(int argc, VALUE *argv, VALUE self)
407
+ {
408
+ GET_Q();
409
+ VALUE rstr, rfield;
410
+ char *str;
411
+ Symbol field = NULL;
412
+ if (rb_scan_args(argc, argv, "01", &rfield)) {
413
+ field = frb_field(rfield);
414
+ }
415
+ str = q->to_s(q, field);
416
+ rstr = rb_str_new2(str);
417
+ free(str);
418
+ return rstr;
419
+ }
420
+
421
+ /*
422
+ * call-seq:
423
+ * query.boost
424
+ *
425
+ * Returns the queries boost value. See the Query description for more
426
+ * information on Query boosts.
427
+ */
428
+ static VALUE
429
+ frb_q_get_boost(VALUE self)
430
+ {
431
+ GET_Q();
432
+ return rb_float_new((double)q->boost);
433
+ }
434
+
435
+ /*
436
+ * call-seq:
437
+ * query.boost = boost -> boost
438
+ *
439
+ * Set the boost for a query. See the Query description for more information
440
+ * on Query boosts.
441
+ */
442
+ static VALUE
443
+ frb_q_set_boost(VALUE self, VALUE rboost)
444
+ {
445
+ GET_Q();
446
+ q->boost = (float)NUM2DBL(rboost);
447
+ return rboost;
448
+ }
449
+
450
+ /*
451
+ * call-seq:
452
+ * query.hash -> number
453
+ *
454
+ * Return a hash value for the query. This is used for caching query results
455
+ * in a hash object.
456
+ */
457
+ static VALUE
458
+ frb_q_hash(VALUE self)
459
+ {
460
+ GET_Q();
461
+ return INT2FIX(q->hash(q));
462
+ }
463
+
464
+ /*
465
+ * call-seq;
466
+ * query.eql?(other_query) -> bool
467
+ * query == other_query -> bool
468
+ *
469
+ * Return true if +query+ equals +other_query+. Theoretically, two queries are
470
+ * equal if the always return the same results, no matter what the contents
471
+ * of the index. Practically, however, this is difficult to implement
472
+ * efficiently for queries like BooleanQuery since the ordering of clauses
473
+ * unspecified. "Ruby AND Rails" will not match "Rails AND Ruby" for example,
474
+ * although their result sets will be identical. Most queries should match as
475
+ * expected however.
476
+ */
477
+ static VALUE
478
+ frb_q_eql(VALUE self, VALUE other)
479
+ {
480
+ GET_Q();
481
+ Query *oq;
482
+ Data_Get_Struct(other, Query, oq);
483
+ return q->eq(q, oq) ? Qtrue : Qfalse;
484
+ }
485
+
486
+ /*
487
+ * call-seq:
488
+ * query.terms(searcher) -> term_array
489
+ *
490
+ * Returns an array of terms searched for by this query. This can be used for
491
+ * implementing an external query highlighter for example. You must supply a
492
+ * searcher so that the query can be rewritten and optimized like it would be
493
+ * in a real search.
494
+ */
495
+ static VALUE
496
+ frb_q_get_terms(VALUE self, VALUE searcher)
497
+ {
498
+ VALUE rterms = rb_ary_new();
499
+ HashSet *terms = hs_new((hash_ft)&term_hash,
500
+ (eq_ft)&term_eq,
501
+ (free_ft)term_destroy);
502
+ HashSetEntry *hse;
503
+ GET_Q();
504
+ Searcher *sea = (Searcher *)DATA_PTR(searcher);
505
+ Query *rq = sea->rewrite(sea, q);
506
+ rq->extract_terms(rq, terms);
507
+ q_deref(rq);
508
+
509
+ for (hse = terms->first; hse; hse = hse->next) {
510
+ Term *term = (Term *)hse->elem;
511
+ rb_ary_push(rterms, frb_get_term(term->field, term->text));
512
+ }
513
+ hs_destroy(terms);
514
+ return rterms;
515
+ }
516
+
517
+ #define MK_QUERY(klass, q) Data_Wrap_Struct(klass, NULL, &frb_q_free, q)
518
+ VALUE
519
+ frb_get_q(Query *q)
520
+ {
521
+ VALUE self = object_get(q);
522
+
523
+ if (self == Qnil) {
524
+ switch (q->type) {
525
+ case TERM_QUERY:
526
+ self = MK_QUERY(cTermQuery, q);
527
+ break;
528
+ case MULTI_TERM_QUERY:
529
+ self = MK_QUERY(cMultiTermQuery, q);
530
+ break;
531
+ case BOOLEAN_QUERY:
532
+ self = MK_QUERY(cBooleanQuery, q);
533
+ break;
534
+ case PHRASE_QUERY:
535
+ self = MK_QUERY(cPhraseQuery, q);
536
+ break;
537
+ case CONSTANT_QUERY:
538
+ self = MK_QUERY(cConstantScoreQuery, q);
539
+ break;
540
+ case FILTERED_QUERY:
541
+ self = MK_QUERY(cFilteredQuery, q);
542
+ break;
543
+ case MATCH_ALL_QUERY:
544
+ self = MK_QUERY(cMatchAllQuery, q);
545
+ break;
546
+ case RANGE_QUERY:
547
+ self = MK_QUERY(cRangeQuery, q);
548
+ break;
549
+ case TYPED_RANGE_QUERY:
550
+ self = MK_QUERY(cTypedRangeQuery, q);
551
+ break;
552
+ case WILD_CARD_QUERY:
553
+ self = MK_QUERY(cWildcardQuery, q);
554
+ break;
555
+ case FUZZY_QUERY:
556
+ self = MK_QUERY(cFuzzyQuery, q);
557
+ break;
558
+ case PREFIX_QUERY:
559
+ self = MK_QUERY(cPrefixQuery, q);
560
+ break;
561
+ case SPAN_TERM_QUERY:
562
+ self = MK_QUERY(cSpanMultiTermQuery, q);
563
+ break;
564
+ case SPAN_MULTI_TERM_QUERY:
565
+ self = MK_QUERY(cSpanPrefixQuery, q);
566
+ break;
567
+ case SPAN_PREFIX_QUERY:
568
+ self = MK_QUERY(cSpanTermQuery, q);
569
+ break;
570
+ case SPAN_FIRST_QUERY:
571
+ self = MK_QUERY(cSpanFirstQuery, q);
572
+ break;
573
+ case SPAN_OR_QUERY:
574
+ self = MK_QUERY(cSpanOrQuery, q);
575
+ break;
576
+ case SPAN_NOT_QUERY:
577
+ self = MK_QUERY(cSpanNotQuery, q);
578
+ break;
579
+ case SPAN_NEAR_QUERY:
580
+ self = MK_QUERY(cSpanNearQuery, q);
581
+ break;
582
+ default:
583
+ rb_raise(rb_eArgError, "Unknown query type");
584
+ break;
585
+ }
586
+ object_add(q, self);
587
+ }
588
+ return self;
589
+ }
590
+
591
+ /****************************************************************************
592
+ *
593
+ * TermQuery Methods
594
+ *
595
+ ****************************************************************************/
596
+
597
+ /*
598
+ * call-seq:
599
+ * TermQuery.new(field, term) -> term_query
600
+ *
601
+ * Create a new TermQuery object which will match all documents with the term
602
+ * +term+ in the field +field+.
603
+ *
604
+ * Note: As usual, field should be a symbol
605
+ */
606
+ static VALUE
607
+ frb_tq_init(VALUE self, VALUE rfield, VALUE rterm)
608
+ {
609
+ Symbol field = frb_field(rfield);
610
+ char *term = rs2s(rb_obj_as_string(rterm));
611
+ Query *q = tq_new(field, term);
612
+ Frt_Wrap_Struct(self, NULL, &frb_q_free, q);
613
+ object_add(q, self);
614
+ return self;
615
+ }
616
+
617
+ /****************************************************************************
618
+ *
619
+ * MultiTermQuery Methods
620
+ *
621
+ ****************************************************************************/
622
+
623
+ /*
624
+ * call-seq:
625
+ * MultiTermQuery.default_max_terms -> number
626
+ *
627
+ * Get the default value for +:max_terms+ in a MultiTermQuery. This value is
628
+ * also used by PrefixQuery, FuzzyQuery and WildcardQuery.
629
+ */
630
+ static VALUE
631
+ frb_mtq_get_dmt(VALUE self)
632
+ {
633
+ return rb_cvar_get(cMultiTermQuery, id_default_max_terms);
634
+ }
635
+
636
+ /*
637
+ * call-seq:
638
+ * MultiTermQuery.default_max_terms = max_terms -> max_terms
639
+ *
640
+ * Set the default value for +:max_terms+ in a MultiTermQuery. This value is
641
+ * also used by PrefixQuery, FuzzyQuery and WildcardQuery.
642
+ */
643
+ static VALUE
644
+ frb_mtq_set_dmt(VALUE self, VALUE rnum_terms)
645
+ {
646
+ int max_terms = FIX2INT(rnum_terms);
647
+ if (max_terms <= 0) {
648
+ rb_raise(rb_eArgError,
649
+ "%d <= 0. @@max_terms must be > 0", max_terms);
650
+ }
651
+ #ifdef FRT_RUBY_VERSION_1_9
652
+ rb_cvar_set(cMultiTermQuery, id_default_max_terms, rnum_terms);
653
+ #else
654
+ rb_cvar_set(cMultiTermQuery, id_default_max_terms, rnum_terms, Qfalse);
655
+ #endif
656
+ return rnum_terms;
657
+ }
658
+
659
+ /*
660
+ * call-seq:
661
+ * MultiTermQuery.new(field, options = {}) -> multi_term_query
662
+ *
663
+ * Create a new MultiTermQuery on field +field+. You will also need to add
664
+ * terms to the query using the MultiTermQuery#add_term method.
665
+ *
666
+ * There are several options available to you when creating a
667
+ * MultiTermQueries;
668
+ *
669
+ * === Options
670
+ *
671
+ * :max_terms:: You can specify the maximum number of terms that can be
672
+ * added to the query. This is to prevent memory usage overflow,
673
+ * particularly when don't directly control the addition of
674
+ * terms to the Query object like when you create Wildcard
675
+ * queries. For example, searching for "content:*" would cause
676
+ * problems without this limit.
677
+ * :min_score:: The minimum score a term must have to be added to the query.
678
+ * For example you could implement your own wild-card queries
679
+ * that gives matches a score. To limit the number of terms
680
+ * added to the query you could set a lower limit to this score.
681
+ * FuzzyQuery in particular makes use of this parameter.
682
+ */
683
+ static VALUE
684
+ frb_mtq_init(int argc, VALUE *argv, VALUE self)
685
+ {
686
+ VALUE rfield, roptions;
687
+ float min_score = 0.0;
688
+ int max_terms = FIX2INT(frb_mtq_get_dmt(self));
689
+ Query *q;
690
+
691
+ if (rb_scan_args(argc, argv, "11", &rfield, &roptions) == 2) {
692
+ VALUE v;
693
+ if (Qnil != (v = rb_hash_aref(roptions, sym_max_terms))) {
694
+ max_terms = FIX2INT(v);
695
+ }
696
+ if (Qnil != (v = rb_hash_aref(roptions, sym_min_score))) {
697
+ min_score = (float)NUM2DBL(v);
698
+ }
699
+ }
700
+ q = multi_tq_new_conf(frb_field(rfield), max_terms, min_score);
701
+ Frt_Wrap_Struct(self, NULL, &frb_q_free, q);
702
+ object_add(q, self);
703
+ return self;
704
+ }
705
+
706
+ /*
707
+ * call-seq:
708
+ * multi_term_query.add_term(term, score = 1.0) -> self
709
+ * multi_term_query << term1 << term2 << term3 -> self
710
+ *
711
+ * Add a term to the MultiTermQuery with the score 1.0 unless specified
712
+ * otherwise.
713
+ */
714
+ static VALUE
715
+ frb_mtq_add_term(int argc, VALUE *argv, VALUE self)
716
+ {
717
+ GET_Q();
718
+ VALUE rterm, rboost;
719
+ float boost = 1.0;
720
+ char *term = NULL;
721
+ if (rb_scan_args(argc, argv, "11", &rterm, &rboost) == 2) {
722
+ boost = (float)NUM2DBL(rboost);
723
+ }
724
+ term = StringValuePtr(rterm);
725
+ multi_tq_add_term_boost(q, term, boost);
726
+
727
+ return self;
728
+ }
729
+
730
+ typedef Query *(*mtq_maker_ft)(Symbol field, const char *term);
731
+
732
+ static int
733
+ get_max_terms(VALUE rmax_terms, int max_terms)
734
+ {
735
+ VALUE v;
736
+ switch (TYPE(rmax_terms)) {
737
+ case T_HASH:
738
+ if (Qnil != (v = rb_hash_aref(rmax_terms, sym_max_terms))) {
739
+ max_terms = FIX2INT(v);
740
+ }
741
+ break;
742
+ case T_FIXNUM:
743
+ max_terms = FIX2INT(rmax_terms);
744
+ break;
745
+ default:
746
+ rb_raise(rb_eArgError, "max_terms must be an integer");
747
+ }
748
+ return max_terms;
749
+ }
750
+
751
+ static VALUE
752
+ frb_mtq_init_specific(int argc, VALUE *argv, VALUE self, mtq_maker_ft mm)
753
+ {
754
+ VALUE rfield, rterm, rmax_terms;
755
+ int max_terms =
756
+ FIX2INT(rb_cvar_get(cMultiTermQuery, id_default_max_terms));
757
+ Query *q;
758
+
759
+ if (rb_scan_args(argc, argv, "21", &rfield, &rterm, &rmax_terms) == 3) {
760
+ max_terms = get_max_terms(rmax_terms, max_terms);
761
+ }
762
+
763
+ q = (*mm)(frb_field(rfield), StringValuePtr(rterm));
764
+ MTQMaxTerms(q) = max_terms;
765
+ Frt_Wrap_Struct(self, NULL, &frb_q_free, q);
766
+ object_add(q, self);
767
+ return self;
768
+ }
769
+
770
+ /****************************************************************************
771
+ *
772
+ * BooleanClause Methods
773
+ *
774
+ ****************************************************************************/
775
+
776
+ static void
777
+ frb_bc_mark(void *p)
778
+ {
779
+ frb_gc_mark(((BooleanClause *)p)->query);
780
+ }
781
+
782
+ static void
783
+ frb_bc_free(void *p)
784
+ {
785
+ object_del(p);
786
+ bc_deref((BooleanClause *)p);
787
+ }
788
+
789
+ static VALUE
790
+ frb_bc_wrap(BooleanClause *bc)
791
+ {
792
+ VALUE self = Data_Wrap_Struct(cBooleanClause, &frb_bc_mark, &frb_bc_free, bc);
793
+ REF(bc);
794
+ object_add(bc, self);
795
+ return self;
796
+ }
797
+
798
+ static BCType
799
+ frb_get_occur(VALUE roccur)
800
+ {
801
+ BCType occur = BC_SHOULD;
802
+
803
+ if (roccur == sym_should) {
804
+ occur = BC_SHOULD;
805
+ } else if (roccur == sym_must) {
806
+ occur = BC_MUST;
807
+ } else if (roccur == sym_must_not) {
808
+ occur = BC_MUST_NOT;
809
+ } else {
810
+ rb_raise(rb_eArgError, "occur argument must be one of [:must, "
811
+ ":should, :must_not]");
812
+ }
813
+ return occur;
814
+ }
815
+
816
+ /*
817
+ * call-seq:
818
+ * BooleanClause.new(query, occur = :should) -> BooleanClause
819
+ *
820
+ * Create a new BooleanClause object, wrapping the query +query+. +occur+
821
+ * must be one of +:must+, +:should+ or +:must_not+.
822
+ */
823
+ static VALUE
824
+ frb_bc_init(int argc, VALUE *argv, VALUE self)
825
+ {
826
+ BooleanClause *bc;
827
+ VALUE rquery, roccur;
828
+ unsigned int occur = BC_SHOULD;
829
+ Query *sub_q;
830
+ if (rb_scan_args(argc, argv, "11", &rquery, &roccur) == 2) {
831
+ occur = frb_get_occur(roccur);
832
+ }
833
+ Data_Get_Struct(rquery, Query, sub_q);
834
+ REF(sub_q);
835
+ bc = bc_new(sub_q, occur);
836
+ Frt_Wrap_Struct(self, &frb_bc_mark, &frb_bc_free, bc);
837
+ object_add(bc, self);
838
+ return self;
839
+ }
840
+
841
+ #define GET_BC() BooleanClause *bc = (BooleanClause *)DATA_PTR(self)
842
+ /*
843
+ * call-seq:
844
+ * clause.query -> query
845
+ *
846
+ * Return the query object wrapped by this BooleanClause.
847
+ */
848
+ static VALUE
849
+ frb_bc_get_query(VALUE self)
850
+ {
851
+ GET_BC();
852
+ return object_get(bc->query);
853
+ }
854
+
855
+ /*
856
+ * call-seq:
857
+ * clause.query = query -> query
858
+ *
859
+ * Set the query wrapped by this BooleanClause.
860
+ */
861
+ static VALUE
862
+ frb_bc_set_query(VALUE self, VALUE rquery)
863
+ {
864
+ GET_BC();
865
+ Data_Get_Struct(rquery, Query, bc->query);
866
+ return rquery;
867
+ }
868
+
869
+ /*
870
+ * call-seq:
871
+ * clause.required? -> bool
872
+ *
873
+ * Return true if this clause is required. ie, this will be true if occur was
874
+ * equal to +:must+.
875
+ */
876
+ static VALUE
877
+ frb_bc_is_required(VALUE self)
878
+ {
879
+ GET_BC();
880
+ return bc->is_required ? Qtrue : Qfalse;
881
+ }
882
+
883
+ /*
884
+ * call-seq:
885
+ * clause.prohibited? -> bool
886
+ *
887
+ * Return true if this clause is prohibited. ie, this will be true if occur was
888
+ * equal to +:must_not+.
889
+ */
890
+ static VALUE
891
+ frb_bc_is_prohibited(VALUE self)
892
+ {
893
+ GET_BC();
894
+ return bc->is_prohibited ? Qtrue : Qfalse;
895
+ }
896
+
897
+ /*
898
+ * call-seq:
899
+ * clause.occur = occur -> occur
900
+ *
901
+ * Set the +occur+ value for this BooleanClause. +occur+ must be one of
902
+ * +:must+, +:should+ or +:must_not+.
903
+ */
904
+ static VALUE
905
+ frb_bc_set_occur(VALUE self, VALUE roccur)
906
+ {
907
+ GET_BC();
908
+ BCType occur = frb_get_occur(roccur);
909
+ bc_set_occur(bc, occur);
910
+
911
+ return roccur;
912
+ }
913
+
914
+ /*
915
+ * call-seq:
916
+ * clause.to_s -> string
917
+ *
918
+ * Return a string representation of this clause. This will not be used by
919
+ * BooleanQuery#to_s. It is only used by BooleanClause#to_s and will specify
920
+ * whether the clause is +:must+, +:should+ or +:must_not+.
921
+ */
922
+ static VALUE
923
+ frb_bc_to_s(VALUE self)
924
+ {
925
+ VALUE rstr;
926
+ char *qstr, *ostr = "", *str;
927
+ int len;
928
+ GET_BC();
929
+ qstr = bc->query->to_s(bc->query, NULL);
930
+ switch (bc->occur) {
931
+ case BC_SHOULD:
932
+ ostr = "Should";
933
+ break;
934
+ case BC_MUST:
935
+ ostr = "Must";
936
+ break;
937
+ case BC_MUST_NOT:
938
+ ostr = "Must Not";
939
+ break;
940
+ }
941
+ len = strlen(ostr) + strlen(qstr) + 2;
942
+ str = ALLOC_N(char, len);
943
+ sprintf(str, "%s:%s", ostr, qstr);
944
+ rstr = rb_str_new(str, len);
945
+ free(qstr);
946
+ free(str);
947
+ return rstr;
948
+ }
949
+
950
+ /****************************************************************************
951
+ *
952
+ * BooleanQuery Methods
953
+ *
954
+ ****************************************************************************/
955
+
956
+ static void
957
+ frb_bq_mark(void *p)
958
+ {
959
+ int i;
960
+ Query *q = (Query *)p;
961
+ BooleanQuery *bq = (BooleanQuery *)q;
962
+ for (i = 0; i < bq->clause_cnt; i++) {
963
+ frb_gc_mark(bq->clauses[i]);
964
+ }
965
+ }
966
+
967
+ /*
968
+ * call-seq:
969
+ * BooleanQuery.new(coord_disable = false)
970
+ *
971
+ * Create a new BooleanQuery. If you don't care about the scores of the
972
+ * sub-queries added to the query (as would be the case for many
973
+ * automatically generated queries) you can disable the coord_factor of the
974
+ * score. This will slightly improve performance for the query. Usually you
975
+ * should leave this parameter as is.
976
+ */
977
+ static VALUE
978
+ frb_bq_init(int argc, VALUE *argv, VALUE self)
979
+ {
980
+ VALUE rcoord_disabled;
981
+ bool coord_disabled = false;
982
+ Query *q;
983
+ if (rb_scan_args(argc, argv, "01", &rcoord_disabled)) {
984
+ coord_disabled = RTEST(rcoord_disabled);
985
+ }
986
+ q = bq_new(coord_disabled);
987
+ Frt_Wrap_Struct(self, &frb_bq_mark, &frb_q_free, q);
988
+ object_add(q, self);
989
+ return self;
990
+ }
991
+
992
+ /*
993
+ * call-seq:
994
+ * boolean_query.add_query(query, occur = :should) -> boolean_clause
995
+ * boolean_query.<<(query, occur = :should) -> boolean_clause
996
+ * boolean_query << boolean_clause -> boolean_clause
997
+ *
998
+ * Us this method to add sub-queries to a BooleanQuery. You can either add
999
+ * a straight Query or a BooleanClause. When adding a Query, the default
1000
+ * occurrence requirement is :should. That is the Query's match will be
1001
+ * scored but it isn't essential for a match. If the query should be
1002
+ * essential, use :must. For exclusive queries use :must_not.
1003
+ *
1004
+ * When adding a Boolean clause to a BooleanQuery there is no need to set the
1005
+ * occurrence property because it is already set in the BooleanClause.
1006
+ * Therefor the +occur+ parameter will be ignored in this case.
1007
+ *
1008
+ * query:: Query to add to the BooleanQuery
1009
+ * occur:: occurrence requirement for the query being added. Must be one of
1010
+ * [:must, :should, :must_not]
1011
+ * returns:: BooleanClause which was added
1012
+ */
1013
+ static VALUE
1014
+ frb_bq_add_query(int argc, VALUE *argv, VALUE self)
1015
+ {
1016
+ GET_Q();
1017
+ VALUE rquery, roccur;
1018
+ BCType occur = BC_SHOULD;
1019
+ Query *sub_q;
1020
+ VALUE klass;
1021
+
1022
+ if (rb_scan_args(argc, argv, "11", &rquery, &roccur) == 2) {
1023
+ occur = frb_get_occur(roccur);
1024
+ }
1025
+ klass = CLASS_OF(rquery);
1026
+ if (klass == cBooleanClause) {
1027
+ BooleanClause *bc = (BooleanClause *)DATA_PTR(rquery);
1028
+ if (argc > 1) {
1029
+ rb_warning("Second argument to BooleanQuery#add is ignored "
1030
+ "when adding BooleanClause");
1031
+ }
1032
+ bq_add_clause(q, bc);
1033
+ return rquery;
1034
+ } else if (TYPE(rquery) == T_DATA) {
1035
+ Data_Get_Struct(rquery, Query, sub_q);
1036
+ return frb_bc_wrap(bq_add_query(q, sub_q, occur));
1037
+ } else {
1038
+ rb_raise(rb_eArgError, "Cannot add %s to a BooleanQuery",
1039
+ rb_class2name(klass));
1040
+ }
1041
+ return self;
1042
+ }
1043
+
1044
+ /****************************************************************************
1045
+ *
1046
+ * RangeQuery Methods
1047
+ *
1048
+ ****************************************************************************/
1049
+
1050
+ static void
1051
+ get_range_params(VALUE roptions, char **lterm, char **uterm,
1052
+ bool *include_lower, bool *include_upper)
1053
+ {
1054
+ VALUE v;
1055
+ Check_Type(roptions, T_HASH);
1056
+ if (Qnil != (v = rb_hash_aref(roptions, sym_lower))) {
1057
+ *lterm = rs2s(rb_obj_as_string(v));
1058
+ *include_lower = true;
1059
+ }
1060
+ if (Qnil != (v = rb_hash_aref(roptions, sym_upper))) {
1061
+ *uterm = rs2s(rb_obj_as_string(v));
1062
+ *include_upper = true;
1063
+ }
1064
+ if (Qnil != (v = rb_hash_aref(roptions, sym_lower_exclusive))) {
1065
+ *lterm = rs2s(rb_obj_as_string(v));
1066
+ *include_lower = false;
1067
+ }
1068
+ if (Qnil != (v = rb_hash_aref(roptions, sym_upper_exclusive))) {
1069
+ *uterm = rs2s(rb_obj_as_string(v));
1070
+ *include_upper = false;
1071
+ }
1072
+ if (Qnil != (v = rb_hash_aref(roptions, sym_include_lower))) {
1073
+ *include_lower = RTEST(v);
1074
+ }
1075
+ if (Qnil != (v = rb_hash_aref(roptions, sym_include_upper))) {
1076
+ *include_upper = RTEST(v);
1077
+ }
1078
+ if (Qnil != (v = rb_hash_aref(roptions, sym_greater_than))) {
1079
+ *lterm = rs2s(rb_obj_as_string(v));
1080
+ *include_lower = false;
1081
+ }
1082
+ if (Qnil != (v = rb_hash_aref(roptions, sym_greater_than_or_equal_to))) {
1083
+ *lterm = rs2s(rb_obj_as_string(v));
1084
+ *include_lower = true;
1085
+ }
1086
+ if (Qnil != (v = rb_hash_aref(roptions, sym_less_than))) {
1087
+ *uterm = rs2s(rb_obj_as_string(v));
1088
+ *include_upper = false;
1089
+ }
1090
+ if (Qnil != (v = rb_hash_aref(roptions, sym_less_than_or_equal_to))) {
1091
+ *uterm = rs2s(rb_obj_as_string(v));
1092
+ *include_upper = true;
1093
+ }
1094
+ if (!*lterm && !*uterm) {
1095
+ rb_raise(rb_eArgError,
1096
+ "The bounds of a range should not both be nil");
1097
+ }
1098
+ if (*include_lower && !*lterm) {
1099
+ rb_raise(rb_eArgError,
1100
+ "The lower bound should not be nil if it is inclusive");
1101
+ }
1102
+ if (*include_upper && !*uterm) {
1103
+ rb_raise(rb_eArgError,
1104
+ "The upper bound should not be nil if it is inclusive");
1105
+ }
1106
+ }
1107
+
1108
+ /*
1109
+ * call-seq:
1110
+ * RangeQuery.new(field, options = {}) -> range_query
1111
+ *
1112
+ * Create a new RangeQuery on field +field+. There are two ways to build a
1113
+ * range query. With the old-style options; +:lower+, +:upper+,
1114
+ * +:include_lower+ and +:include_upper+ or the new style options; +:<+,
1115
+ * +:<=+, +:>+ and +:>=+. The options' names should speak for themselves.
1116
+ * In the old-style options, limits are inclusive by default.
1117
+ *
1118
+ * == Examples
1119
+ *
1120
+ * q = RangeQuery.new(:date, :lower => "200501", :include_lower => false)
1121
+ * # is equivalent to
1122
+ * q = RangeQuery.new(:date, :< => "200501")
1123
+ * # is equivalent to
1124
+ * q = RangeQuery.new(:date, :lower_exclusive => "200501")
1125
+ *
1126
+ * q = RangeQuery.new(:date, :lower => "200501", :upper => 200502)
1127
+ * # is equivalent to
1128
+ * q = RangeQuery.new(:date, :>= => "200501", :<= => 200502)
1129
+ *
1130
+ */
1131
+ static VALUE
1132
+ frb_rq_init(VALUE self, VALUE rfield, VALUE roptions)
1133
+ {
1134
+ Query *q;
1135
+ char *lterm = NULL;
1136
+ char *uterm = NULL;
1137
+ bool include_lower = false;
1138
+ bool include_upper = false;
1139
+
1140
+ get_range_params(roptions, &lterm, &uterm, &include_lower, &include_upper);
1141
+ q = rq_new(frb_field(rfield),
1142
+ lterm, uterm,
1143
+ include_lower, include_upper);
1144
+ Frt_Wrap_Struct(self, NULL, &frb_q_free, q);
1145
+ object_add(q, self);
1146
+ return self;
1147
+ }
1148
+
1149
+ /****************************************************************************
1150
+ *
1151
+ * TypedRangeQuery Methods
1152
+ *
1153
+ ****************************************************************************/
1154
+
1155
+ /*
1156
+ * call-seq:
1157
+ * TypedRangeQuery.new(field, options = {}) -> range_query
1158
+ *
1159
+ * Create a new TypedRangeQuery on field +field+. This differs from the
1160
+ * standard RangeQuery in that it allows range queries with unpadded numbers,
1161
+ * both positive and negative, integer and float. You can even use
1162
+ * hexadecimal numbers. However it could be a lot slower than the standard
1163
+ * RangeQuery on large indexes.
1164
+ *
1165
+ * There are two ways to build a range query. With the old-style options;
1166
+ * +:lower+, +:upper+, +:include_lower+ and +:include_upper+ or the new style
1167
+ * options; +:<+, +:<=+, +:>+ and +:>=+. The options' names should speak for
1168
+ * themselves. In the old-style options, limits are inclusive by default.
1169
+ *
1170
+ * == Examples
1171
+ *
1172
+ * q = TypedRangeQuery.new(:date, :lower => "0.1", :include_lower => false)
1173
+ * # is equivalent to
1174
+ * q = TypedRangeQuery.new(:date, :< => "0.1")
1175
+ * # is equivalent to
1176
+ * q = TypedRangeQuery.new(:date, :lower_exclusive => "0.1")
1177
+ *
1178
+ * # Note that you numbers can be strings or actual numbers
1179
+ * q = TypedRangeQuery.new(:date, :lower => "-12.32", :upper => 0.21)
1180
+ * # is equivalent to
1181
+ * q = TypedRangeQuery.new(:date, :>= => "-12.32", :<= => 0.21)
1182
+ */
1183
+ static VALUE
1184
+ frb_trq_init(VALUE self, VALUE rfield, VALUE roptions)
1185
+ {
1186
+ Query *q;
1187
+ char *lterm = NULL;
1188
+ char *uterm = NULL;
1189
+ bool include_lower = false;
1190
+ bool include_upper = false;
1191
+
1192
+ get_range_params(roptions, &lterm, &uterm, &include_lower, &include_upper);
1193
+ q = trq_new(frb_field(rfield),
1194
+ lterm, uterm,
1195
+ include_lower, include_upper);
1196
+ Frt_Wrap_Struct(self, NULL, &frb_q_free, q);
1197
+ object_add(q, self);
1198
+ return self;
1199
+ }
1200
+
1201
+ /****************************************************************************
1202
+ *
1203
+ * PhraseQuery Methods
1204
+ *
1205
+ ****************************************************************************/
1206
+
1207
+ /*
1208
+ * call-seq:
1209
+ * PhraseQuery.new(field, slop = 0) -> phrase_query
1210
+ *
1211
+ * Create a new PhraseQuery on the field +field+. You need to add terms to
1212
+ * the query it will do anything of value. See PhraseQuery#add_term.
1213
+ */
1214
+ static VALUE
1215
+ frb_phq_init(int argc, VALUE *argv, VALUE self)
1216
+ {
1217
+ VALUE rfield, rslop;
1218
+ Query *q;
1219
+ rb_scan_args(argc, argv, "11", &rfield, &rslop);
1220
+ q = phq_new(frb_field(rfield));
1221
+ if (argc == 2) {
1222
+ ((PhraseQuery *)q)->slop = FIX2INT(rslop);
1223
+ }
1224
+ Frt_Wrap_Struct(self, NULL, &frb_q_free, q);
1225
+ object_add(q, self);
1226
+ return self;
1227
+ }
1228
+
1229
+ /*
1230
+ * call-seq:
1231
+ * phrase_query.add_term(term, position_increment = 1) -> phrase_query
1232
+ * phrase_query << term -> phrase_query
1233
+ *
1234
+ * Add a term to the phrase query. By default the position_increment is set
1235
+ * to 1 so each term you add is expected to come directly after the previous
1236
+ * term. By setting position_increment to 2 you are specifying that the term
1237
+ * you just added should occur two terms after the previous term. For
1238
+ * example;
1239
+ *
1240
+ * phrase_query.add_term("big").add_term("house", 2)
1241
+ * # matches => "big brick house"
1242
+ * # matches => "big red house"
1243
+ * # doesn't match => "big house"
1244
+ */
1245
+ static VALUE
1246
+ frb_phq_add(int argc, VALUE *argv, VALUE self)
1247
+ {
1248
+ VALUE rterm, rpos_inc;
1249
+ int pos_inc = 1;
1250
+ GET_Q();
1251
+ if (rb_scan_args(argc, argv, "11", &rterm, &rpos_inc) == 2) {
1252
+ pos_inc = FIX2INT(rpos_inc);
1253
+ }
1254
+ switch (TYPE(rterm)) {
1255
+ case T_STRING:
1256
+ {
1257
+ phq_add_term(q, StringValuePtr(rterm), pos_inc);
1258
+ break;
1259
+ }
1260
+ case T_ARRAY:
1261
+ {
1262
+ int i;
1263
+ char *t;
1264
+ if (RARRAY_LEN(rterm) < 1) {
1265
+ rb_raise(rb_eArgError, "Cannot add empty array to a "
1266
+ "PhraseQuery. You must add either a string or "
1267
+ "an array of strings");
1268
+ }
1269
+ t = StringValuePtr(RARRAY_PTR(rterm)[0]);
1270
+ phq_add_term(q, t, pos_inc);
1271
+ for (i = 1; i < RARRAY_LEN(rterm); i++) {
1272
+ t = StringValuePtr(RARRAY_PTR(rterm)[i]);
1273
+ phq_append_multi_term(q, t);
1274
+ }
1275
+ break;
1276
+ }
1277
+ default:
1278
+ rb_raise(rb_eArgError, "You can only add a string or an array of "
1279
+ "strings to a PhraseQuery, not a %s\n",
1280
+ rs2s(rb_obj_as_string(rterm)));
1281
+ }
1282
+ return self;
1283
+ }
1284
+
1285
+ /*
1286
+ * call-seq:
1287
+ * phrase_query.slop -> integer
1288
+ *
1289
+ * Return the slop set for this phrase query. See the PhraseQuery
1290
+ * description for more information on slop
1291
+ */
1292
+ static VALUE
1293
+ frb_phq_get_slop(VALUE self)
1294
+ {
1295
+ GET_Q();
1296
+ return INT2FIX(((PhraseQuery *)q)->slop);
1297
+ }
1298
+
1299
+ /*
1300
+ * call-seq:
1301
+ * phrase_query.slop = slop -> slop
1302
+ *
1303
+ * Set the slop set for this phrase query. See the PhraseQuery description
1304
+ * for more information on slop
1305
+ */
1306
+ static VALUE
1307
+ frb_phq_set_slop(VALUE self, VALUE rslop)
1308
+ {
1309
+ GET_Q();
1310
+ ((PhraseQuery *)q)->slop = FIX2INT(rslop);
1311
+ return self;
1312
+ }
1313
+
1314
+ /****************************************************************************
1315
+ *
1316
+ * PrefixQuery Methods
1317
+ *
1318
+ ****************************************************************************/
1319
+
1320
+ /*
1321
+ * call-seq:
1322
+ * PrefixQuery.new(field, prefix, options = {}) -> prefix-query
1323
+ *
1324
+ * Create a new PrefixQuery to search for all terms with the prefix +prefix+
1325
+ * in the field +field+. There is one option that you can set to change the
1326
+ * behaviour of this query. +:max_terms+ specifies the maximum number of
1327
+ * terms to be added to the query when it is expanded into a MultiTermQuery.
1328
+ * Let's say for example you search an index with a million terms for all
1329
+ * terms beginning with the letter "s". You would end up with a very large
1330
+ * query which would use a lot of memory and take a long time to get results,
1331
+ * not to mention that it would probably match every document in the index.
1332
+ * To prevent queries like this crashing your application you can set
1333
+ * +:max_terms+ which limits the number of terms that get added to the query.
1334
+ * By default it is set to 512.
1335
+ */
1336
+ static VALUE
1337
+ frb_prq_init(int argc, VALUE *argv, VALUE self)
1338
+ {
1339
+ return frb_mtq_init_specific(argc, argv, self, &prefixq_new);
1340
+ }
1341
+
1342
+ /****************************************************************************
1343
+ *
1344
+ * WildcardQuery Methods
1345
+ *
1346
+ ****************************************************************************/
1347
+
1348
+ /*
1349
+ * call-seq:
1350
+ * WildcardQuery.new(field, pattern, options = {}) -> wild-card-query
1351
+ *
1352
+ * Create a new WildcardQuery to search for all terms where the pattern
1353
+ * +pattern+ matches in the field +field+.
1354
+ *
1355
+ * There is one option that you can set to change the behaviour of this
1356
+ * query. +:max_terms+ specifies the maximum number of terms to be added to
1357
+ * the query when it is expanded into a MultiTermQuery. Let's say for
1358
+ * example you have a million terms in your index and you let your users do
1359
+ * wild-card queries and one runs a search for "*". You would end up with a
1360
+ * very large query which would use a lot of memory and take a long time to
1361
+ * get results, not to mention that it would probably match every document in
1362
+ * the index. To prevent queries like this crashing your application you can
1363
+ * set +:max_terms+ which limits the number of terms that get added to the
1364
+ * query. By default it is set to 512.
1365
+ */
1366
+ static VALUE
1367
+ frb_wcq_init(int argc, VALUE *argv, VALUE self)
1368
+ {
1369
+ return frb_mtq_init_specific(argc, argv, self, &wcq_new);
1370
+ }
1371
+
1372
+ /****************************************************************************
1373
+ *
1374
+ * FuzzyQuery Methods
1375
+ *
1376
+ ****************************************************************************/
1377
+
1378
+ /*
1379
+ * call-seq:
1380
+ * FuzzyQuery.new(field, term, options = {}) -> fuzzy-query
1381
+ *
1382
+ * Create a new FuzzyQuery that will match terms with a similarity of at
1383
+ * least +:min_similarity+ to +term+. Similarity is scored using the
1384
+ * Levenshtein edit distance formula. See
1385
+ * http://en.wikipedia.org/wiki/Levenshtein_distance
1386
+ *
1387
+ * If a +:prefix_length+ > 0 is specified, a common prefix of that length is
1388
+ * also required.
1389
+ *
1390
+ * You can also set +:max_terms+ to prevent memory overflow problems. By
1391
+ * default it is set to 512.
1392
+ *
1393
+ * == Example
1394
+ *
1395
+ * FuzzyQuery.new(:content, "levenshtein",
1396
+ * :min_similarity => 0.8,
1397
+ * :prefix_length => 5,
1398
+ * :max_terms => 1024)
1399
+ *
1400
+ * field:: field to search
1401
+ * term:: term to search for including it's close matches
1402
+ * :min_similarity:: Default: 0.5. minimum levenshtein distance score for a
1403
+ * match
1404
+ * :prefix_length:: Default: 0. minimum prefix_match before levenshtein
1405
+ * distance is measured. This parameter is used to improve
1406
+ * performance. With a +:prefix_length+ of 0, all terms in
1407
+ * the index must be checked which can be quite a
1408
+ * performance hit. By setting the prefix length to a
1409
+ * larger number you minimize the number of terms that need
1410
+ * to be checked. Even 1 will cut down the work by a
1411
+ * factor of about 26 depending on your character set and
1412
+ * the first letter.
1413
+ * :max_terms:: Limits the number of terms that can be added to the
1414
+ * query when it is expanded as a MultiTermQuery. This is
1415
+ * not usually a problem with FuzzyQueries unless you set
1416
+ * +:min_similarity+ to a very low value.
1417
+ */
1418
+ static VALUE
1419
+ frb_fq_init(int argc, VALUE *argv, VALUE self)
1420
+ {
1421
+ Query *q;
1422
+ VALUE rfield, rterm, roptions;
1423
+ float min_sim =
1424
+ (float)NUM2DBL(rb_cvar_get(cFuzzyQuery, id_default_min_similarity));
1425
+ int pre_len =
1426
+ FIX2INT(rb_cvar_get(cFuzzyQuery, id_default_prefix_length));
1427
+ int max_terms =
1428
+ FIX2INT(rb_cvar_get(cMultiTermQuery, id_default_max_terms));
1429
+
1430
+
1431
+ if (rb_scan_args(argc, argv, "21", &rfield, &rterm, &roptions) >= 3) {
1432
+ VALUE v;
1433
+ Check_Type(roptions, T_HASH);
1434
+ if (Qnil != (v = rb_hash_aref(roptions, sym_prefix_length))) {
1435
+ pre_len = FIX2INT(v);
1436
+ }
1437
+ if (Qnil != (v = rb_hash_aref(roptions, sym_min_similarity))) {
1438
+ min_sim = (float)NUM2DBL(v);
1439
+ }
1440
+ if (Qnil != (v = rb_hash_aref(roptions, sym_max_terms))) {
1441
+ max_terms = FIX2INT(v);
1442
+ }
1443
+ }
1444
+
1445
+ if (min_sim >= 1.0) {
1446
+ rb_raise(rb_eArgError,
1447
+ "%f >= 1.0. :min_similarity must be < 1.0", min_sim);
1448
+ } else if (min_sim < 0.0) {
1449
+ rb_raise(rb_eArgError,
1450
+ "%f < 0.0. :min_similarity must be > 0.0", min_sim);
1451
+ }
1452
+ if (pre_len < 0) {
1453
+ rb_raise(rb_eArgError,
1454
+ "%d < 0. :prefix_length must be >= 0", pre_len);
1455
+ }
1456
+ if (max_terms < 0) {
1457
+ rb_raise(rb_eArgError,
1458
+ "%d < 0. :max_terms must be >= 0", max_terms);
1459
+ }
1460
+
1461
+ q = fuzq_new_conf(frb_field(rfield), StringValuePtr(rterm),
1462
+ min_sim, pre_len, max_terms);
1463
+ Frt_Wrap_Struct(self, NULL, &frb_q_free, q);
1464
+ object_add(q, self);
1465
+ return self;
1466
+ }
1467
+
1468
+ /*
1469
+ * call-seq:
1470
+ * FuzzyQuery.prefix_length -> prefix_length
1471
+ *
1472
+ * Get the +:prefix_length+ for the query.
1473
+ */
1474
+ static VALUE
1475
+ frb_fq_pre_len(VALUE self)
1476
+ {
1477
+ GET_Q();
1478
+ return INT2FIX(((FuzzyQuery *)q)->pre_len);
1479
+ }
1480
+
1481
+ /*
1482
+ * call-seq:
1483
+ * FuzzyQuery.min_similarity -> min_similarity
1484
+ *
1485
+ * Get the +:min_similarity+ for the query.
1486
+ */
1487
+ static VALUE
1488
+ frb_fq_min_sim(VALUE self)
1489
+ {
1490
+ GET_Q();
1491
+ return rb_float_new((double)((FuzzyQuery *)q)->min_sim);
1492
+ }
1493
+
1494
+ /*
1495
+ * call-seq:
1496
+ * FuzzyQuery.default_min_similarity -> number
1497
+ *
1498
+ * Get the default value for +:min_similarity+
1499
+ */
1500
+ static VALUE
1501
+ frb_fq_get_dms(VALUE self)
1502
+ {
1503
+ return rb_cvar_get(cFuzzyQuery, id_default_min_similarity);
1504
+ }
1505
+
1506
+ extern float qp_default_fuzzy_min_sim;
1507
+ /*
1508
+ * call-seq:
1509
+ * FuzzyQuery.default_min_similarity = min_sim -> min_sim
1510
+ *
1511
+ * Set the default value for +:min_similarity+
1512
+ */
1513
+ static VALUE
1514
+ frb_fq_set_dms(VALUE self, VALUE val)
1515
+ {
1516
+ double min_sim = NUM2DBL(val);
1517
+ if (min_sim >= 1.0) {
1518
+ rb_raise(rb_eArgError,
1519
+ "%f >= 1.0. :min_similarity must be < 1.0", min_sim);
1520
+ } else if (min_sim < 0.0) {
1521
+ rb_raise(rb_eArgError,
1522
+ "%f < 0.0. :min_similarity must be > 0.0", min_sim);
1523
+ }
1524
+ qp_default_fuzzy_min_sim = (float)min_sim;
1525
+ #ifdef FRT_RUBY_VERSION_1_9
1526
+ rb_cvar_set(cFuzzyQuery, id_default_min_similarity, val);
1527
+ #else
1528
+ rb_cvar_set(cFuzzyQuery, id_default_min_similarity, val, Qfalse);
1529
+ #endif
1530
+ return val;
1531
+ }
1532
+
1533
+ /*
1534
+ * call-seq:
1535
+ * FuzzyQuery.default_prefix_length -> number
1536
+ *
1537
+ * Get the default value for +:prefix_length+
1538
+ */
1539
+ static VALUE
1540
+ frb_fq_get_dpl(VALUE self)
1541
+ {
1542
+ return rb_cvar_get(cFuzzyQuery, id_default_prefix_length);
1543
+ }
1544
+
1545
+ extern int qp_default_fuzzy_pre_len;
1546
+ /*
1547
+ * call-seq:
1548
+ * FuzzyQuery.default_prefix_length = prefix_length -> prefix_length
1549
+ *
1550
+ * Set the default value for +:prefix_length+
1551
+ */
1552
+ static VALUE
1553
+ frb_fq_set_dpl(VALUE self, VALUE val)
1554
+ {
1555
+ int pre_len = FIX2INT(val);
1556
+ if (pre_len < 0) {
1557
+ rb_raise(rb_eArgError,
1558
+ "%d < 0. :prefix_length must be >= 0", pre_len);
1559
+ }
1560
+ qp_default_fuzzy_pre_len = pre_len;
1561
+ #ifdef FRT_RUBY_VERSION_1_9
1562
+ rb_cvar_set(cFuzzyQuery, id_default_prefix_length, val);
1563
+ #else
1564
+ rb_cvar_set(cFuzzyQuery, id_default_prefix_length, val, Qfalse);
1565
+ #endif
1566
+ return val;
1567
+ }
1568
+
1569
+
1570
+ /****************************************************************************
1571
+ *
1572
+ * MatchAllQuery Methods
1573
+ *
1574
+ ****************************************************************************/
1575
+
1576
+ static VALUE
1577
+ frb_maq_alloc(VALUE klass)
1578
+ {
1579
+ Query *q = maq_new();
1580
+ VALUE self = Data_Wrap_Struct(klass, NULL, &frb_q_free, q);
1581
+ object_add(q, self);
1582
+ return self;
1583
+ }
1584
+
1585
+ /*
1586
+ * call-seq:
1587
+ * MatchAllQuery.new -> query
1588
+ *
1589
+ * Create a query which matches all documents.
1590
+ */
1591
+ static VALUE
1592
+ frb_maq_init(VALUE self)
1593
+ {
1594
+ return self;
1595
+ }
1596
+
1597
+ /****************************************************************************
1598
+ *
1599
+ * ConstantScoreQuery Methods
1600
+ *
1601
+ ****************************************************************************/
1602
+
1603
+ /*
1604
+ * call-seq:
1605
+ * ConstantScoreQuery.new(filter) -> query
1606
+ *
1607
+ * Create a ConstantScoreQuery which uses +filter+ to match documents giving
1608
+ * each document a constant score.
1609
+ */
1610
+ static VALUE
1611
+ frb_csq_init(VALUE self, VALUE rfilter)
1612
+ {
1613
+ Query *q;
1614
+ Filter *filter;
1615
+ Data_Get_Struct(rfilter, Filter, filter);
1616
+ q = csq_new(filter);
1617
+
1618
+ Frt_Wrap_Struct(self, NULL, &frb_q_free, q);
1619
+ object_add(q, self);
1620
+ return self;
1621
+ }
1622
+
1623
+ /****************************************************************************
1624
+ *
1625
+ * FilteredQuery Methods
1626
+ *
1627
+ ****************************************************************************/
1628
+
1629
+ static void
1630
+ frb_fqq_mark(void *p)
1631
+ {
1632
+ FilteredQuery *fq = (FilteredQuery *)p;
1633
+ frb_gc_mark(fq->query);
1634
+ frb_gc_mark(fq->filter);
1635
+ }
1636
+
1637
+ /*
1638
+ * call-seq:
1639
+ * FilteredQuery.new(query, filter) -> query
1640
+ *
1641
+ * Create a new FilteredQuery which filters +query+ with +filter+.
1642
+ */
1643
+ static VALUE
1644
+ frb_fqq_init(VALUE self, VALUE rquery, VALUE rfilter)
1645
+ {
1646
+ Query *sq, *q;
1647
+ Filter *f;
1648
+ Data_Get_Struct(rquery, Query, sq);
1649
+ Data_Get_Struct(rfilter, Filter, f);
1650
+ q = fq_new(sq, f);
1651
+ REF(sq);
1652
+ REF(f);
1653
+ Frt_Wrap_Struct(self, &frb_fqq_mark, &frb_q_free, q);
1654
+ object_add(q, self);
1655
+ return self;
1656
+ }
1657
+
1658
+ /****************************************************************************
1659
+ *
1660
+ * SpanTermQuery Methods
1661
+ *
1662
+ ****************************************************************************/
1663
+
1664
+ /*
1665
+ * call-seq:
1666
+ * SpanTermQuery.new(field, term) -> query
1667
+ *
1668
+ * Create a new SpanTermQuery which matches all documents with the term
1669
+ * +term+ in the field +field+.
1670
+ */
1671
+ static VALUE
1672
+ frb_spantq_init(VALUE self, VALUE rfield, VALUE rterm)
1673
+ {
1674
+ Query *q = spantq_new(frb_field(rfield), StringValuePtr(rterm));
1675
+ Frt_Wrap_Struct(self, NULL, &frb_q_free, q);
1676
+ object_add(q, self);
1677
+ return self;
1678
+ }
1679
+
1680
+ /****************************************************************************
1681
+ *
1682
+ * SpanMultiTermQuery Methods
1683
+ *
1684
+ ****************************************************************************/
1685
+
1686
+ /*
1687
+ * call-seq:
1688
+ * SpanMultiTermQuery.new(field, terms) -> query
1689
+ *
1690
+ * Create a new SpanMultiTermQuery which matches all documents with the terms
1691
+ * +terms+ in the field +field+. +terms+ should be an array of Strings.
1692
+ */
1693
+ static VALUE
1694
+ frb_spanmtq_init(VALUE self, VALUE rfield, VALUE rterms)
1695
+ {
1696
+ Query *q = spanmtq_new(frb_field(rfield));
1697
+ int i;
1698
+ for (i = RARRAY_LEN(rterms) - 1; i >= 0; i--) {
1699
+ spanmtq_add_term(q, StringValuePtr(RARRAY_PTR(rterms)[i]));
1700
+ }
1701
+ Frt_Wrap_Struct(self, NULL, &frb_q_free, q);
1702
+ object_add(q, self);
1703
+ return self;
1704
+ }
1705
+
1706
+ /****************************************************************************
1707
+ *
1708
+ * SpanPrefixQuery Methods
1709
+ *
1710
+ ****************************************************************************/
1711
+
1712
+ /*
1713
+ * call-seq:
1714
+ * SpanPrefixQuery.new(field, prefix, max_terms = 256) -> query
1715
+ *
1716
+ * Create a new SpanPrefixQuery which matches all documents with the prefix
1717
+ * +prefix+ in the field +field+.
1718
+ */
1719
+ static VALUE
1720
+ frb_spanprq_init(int argc, VALUE *argv, VALUE self)
1721
+ {
1722
+ VALUE rfield, rprefix, rmax_terms;
1723
+ int max_terms = SPAN_PREFIX_QUERY_MAX_TERMS;
1724
+ Query *q;
1725
+ if (rb_scan_args(argc, argv, "21", &rfield, &rprefix, &rmax_terms) == 3) {
1726
+ max_terms = FIX2INT(rmax_terms);
1727
+ }
1728
+ q = spanprq_new(frb_field(rfield), StringValuePtr(rprefix));
1729
+ ((SpanPrefixQuery *)q)->max_terms = max_terms;
1730
+ Frt_Wrap_Struct(self, NULL, &frb_q_free, q);
1731
+ object_add(q, self);
1732
+ return self;
1733
+ }
1734
+
1735
+ /****************************************************************************
1736
+ *
1737
+ * SpanFirstQuery Methods
1738
+ *
1739
+ ****************************************************************************/
1740
+
1741
+ /*
1742
+ * call-seq:
1743
+ * SpanFirstQuery.new(span_query, end) -> query
1744
+ *
1745
+ * Create a new SpanFirstQuery which matches all documents where +span_query+
1746
+ * matches before +end+ where +end+ is a byte-offset from the start of the
1747
+ * field
1748
+ */
1749
+ static VALUE
1750
+ frb_spanfq_init(VALUE self, VALUE rmatch, VALUE rend)
1751
+ {
1752
+ Query *q;
1753
+ Query *match;
1754
+ Data_Get_Struct(rmatch, Query, match);
1755
+ q = spanfq_new(match, FIX2INT(rend));
1756
+ Frt_Wrap_Struct(self, NULL, &frb_q_free, q);
1757
+ object_add(q, self);
1758
+ return self;
1759
+ }
1760
+
1761
+ /****************************************************************************
1762
+ *
1763
+ * SpanNearQuery Methods
1764
+ *
1765
+ ****************************************************************************/
1766
+
1767
+ static void
1768
+ frb_spannq_mark(void *p)
1769
+ {
1770
+ int i;
1771
+ SpanNearQuery *snq = (SpanNearQuery *)p;
1772
+ for (i = 0; i < snq->c_cnt; i++) {
1773
+ frb_gc_mark(snq->clauses[i]);
1774
+ }
1775
+ }
1776
+
1777
+ /*
1778
+ * call-seq:
1779
+ * SpanNearQuery.new(options = {}) -> query
1780
+ *
1781
+ * Create a new SpanNearQuery. You can add an array of clauses with the
1782
+ * +:clause+ parameter or you can add clauses individually using the
1783
+ * SpanNearQuery#add method.
1784
+ *
1785
+ * query = SpanNearQuery.new(:clauses => [spanq1, spanq2, spanq3])
1786
+ * # is equivalent to
1787
+ * query = SpanNearQuery.new()
1788
+ * query << spanq1 << spanq2 << spanq3
1789
+ *
1790
+ * You have two other options which you can set.
1791
+ *
1792
+ * :slop:: Default: 0. Works exactly like a PhraseQuery slop. It is the
1793
+ * amount of slop allowed in the match (the term edit distance
1794
+ * allowed in the match).
1795
+ * :in_order:: Default: false. Specifies whether or not the matches have to
1796
+ * occur in the order they were added to the query. When slop is
1797
+ * set to 0, this parameter will make no difference.
1798
+ */
1799
+ static VALUE
1800
+ frb_spannq_init(int argc, VALUE *argv, VALUE self)
1801
+ {
1802
+ Query *q;
1803
+ VALUE roptions;
1804
+ int slop = 0;
1805
+ bool in_order = false;
1806
+
1807
+ if (rb_scan_args(argc, argv, "01", &roptions) > 0) {
1808
+ VALUE v;
1809
+ if (Qnil != (v = rb_hash_aref(roptions, sym_slop))) {
1810
+ slop = FIX2INT(v);
1811
+ }
1812
+ if (Qnil != (v = rb_hash_aref(roptions, sym_in_order))) {
1813
+ in_order = RTEST(v);
1814
+ }
1815
+ }
1816
+ q = spannq_new(slop, in_order);
1817
+ if (argc > 0) {
1818
+ VALUE v;
1819
+ if (Qnil != (v = rb_hash_aref(roptions, sym_clauses))) {
1820
+ int i;
1821
+ Query *clause;
1822
+ Check_Type(v, T_ARRAY);
1823
+ for (i = 0; i < RARRAY_LEN(v); i++) {
1824
+ Data_Get_Struct(RARRAY_PTR(v)[i], Query, clause);
1825
+ spannq_add_clause(q, clause);
1826
+ }
1827
+ }
1828
+ }
1829
+
1830
+ Frt_Wrap_Struct(self, &frb_spannq_mark, &frb_q_free, q);
1831
+ object_add(q, self);
1832
+ return self;
1833
+ }
1834
+
1835
+ /*
1836
+ * call-seq:
1837
+ * query.add(span_query) -> self
1838
+ * query << span_query -> self
1839
+ *
1840
+ * Add a clause to the SpanNearQuery. Clauses are stored in the order they
1841
+ * are added to the query which is important for matching. Note that clauses
1842
+ * must be SpanQueries, not other types of query.
1843
+ */
1844
+ static VALUE
1845
+ frb_spannq_add(VALUE self, VALUE rclause)
1846
+ {
1847
+ GET_Q();
1848
+ Query *clause;
1849
+ Data_Get_Struct(rclause, Query, clause);
1850
+ spannq_add_clause(q, clause);
1851
+ return self;
1852
+ }
1853
+
1854
+ /****************************************************************************
1855
+ *
1856
+ * SpanOrQuery Methods
1857
+ *
1858
+ ****************************************************************************/
1859
+
1860
+ static void
1861
+ frb_spanoq_mark(void *p)
1862
+ {
1863
+ int i;
1864
+ SpanOrQuery *soq = (SpanOrQuery *)p;
1865
+ for (i = 0; i < soq->c_cnt; i++) {
1866
+ frb_gc_mark(soq->clauses[i]);
1867
+ }
1868
+ }
1869
+
1870
+ /*
1871
+ * call-seq:
1872
+ * SpanOrQuery.new(options = {}) -> query
1873
+ *
1874
+ * Create a new SpanOrQuery. This is just like a BooleanQuery with all
1875
+ * clauses with the occur value of :should. The difference is that it can be
1876
+ * passed to other SpanQuerys like SpanNearQuery.
1877
+ */
1878
+ static VALUE
1879
+ frb_spanoq_init(int argc, VALUE *argv, VALUE self)
1880
+ {
1881
+ Query *q;
1882
+ VALUE rclauses;
1883
+
1884
+ q = spanoq_new();
1885
+ if (rb_scan_args(argc, argv, "01", &rclauses) > 0) {
1886
+ int i;
1887
+ Query *clause;
1888
+ Check_Type(rclauses, T_ARRAY);
1889
+ for (i = 0; i < RARRAY_LEN(rclauses); i++) {
1890
+ Data_Get_Struct(RARRAY_PTR(rclauses)[i], Query, clause);
1891
+ spanoq_add_clause(q, clause);
1892
+ }
1893
+ }
1894
+ Frt_Wrap_Struct(self, &frb_spanoq_mark, &frb_q_free, q);
1895
+ object_add(q, self);
1896
+ return self;
1897
+ }
1898
+
1899
+ /*
1900
+ * call-seq:
1901
+ * query.add(span_query) -> self
1902
+ * query << span_query -> self
1903
+ *
1904
+ * Add a clause to the SpanOrQuery. Note that clauses must be SpanQueries,
1905
+ * not other types of query.
1906
+ */
1907
+ static VALUE
1908
+ frb_spanoq_add(VALUE self, VALUE rclause)
1909
+ {
1910
+ GET_Q();
1911
+ Query *clause;
1912
+ Data_Get_Struct(rclause, Query, clause);
1913
+ spanoq_add_clause(q, clause);
1914
+ return self;
1915
+ }
1916
+
1917
+ /****************************************************************************
1918
+ *
1919
+ * SpanNotQuery Methods
1920
+ *
1921
+ ****************************************************************************/
1922
+
1923
+ static void
1924
+ frb_spanxq_mark(void *p)
1925
+ {
1926
+ SpanNotQuery *sxq = (SpanNotQuery *)p;
1927
+ frb_gc_mark(sxq->inc);
1928
+ frb_gc_mark(sxq->exc);
1929
+ }
1930
+
1931
+ /*
1932
+ * call-seq:
1933
+ * SpanNotQuery.new(include_query, exclude_query) -> query
1934
+ *
1935
+ * Create a new SpanNotQuery which matches all documents which match
1936
+ * +include_query+ and don't match +exclude_query+.
1937
+ */
1938
+ static VALUE
1939
+ frb_spanxq_init(VALUE self, VALUE rinc, VALUE rexc)
1940
+ {
1941
+ Query *q;
1942
+ Check_Type(rinc, T_DATA);
1943
+ Check_Type(rexc, T_DATA);
1944
+ q = spanxq_new(DATA_PTR(rinc), DATA_PTR(rexc));
1945
+ Frt_Wrap_Struct(self, &frb_spanxq_mark, &frb_q_free, q);
1946
+ object_add(q, self);
1947
+ return self;
1948
+ }
1949
+
1950
+ /****************************************************************************
1951
+ *
1952
+ * Filter Methods
1953
+ *
1954
+ ****************************************************************************/
1955
+
1956
+ static void
1957
+ frb_f_free(void *p)
1958
+ {
1959
+ object_del(p);
1960
+ filt_deref((Filter *)p);
1961
+ }
1962
+
1963
+ #define GET_F() Filter *f = (Filter *)DATA_PTR(self)
1964
+
1965
+ /*
1966
+ * call-seq:
1967
+ * filter.to_s -> string
1968
+ *
1969
+ * Return a human readable string representing the Filter object that the
1970
+ * method was called on.
1971
+ */
1972
+ static VALUE
1973
+ frb_f_to_s(VALUE self)
1974
+ {
1975
+ VALUE rstr;
1976
+ char *str;
1977
+ GET_F();
1978
+ str = f->to_s(f);
1979
+ rstr = rb_str_new2(str);
1980
+ free(str);
1981
+ return rstr;
1982
+ }
1983
+
1984
+ extern VALUE frb_get_bv(BitVector *bv);
1985
+
1986
+ /*
1987
+ * call-seq:
1988
+ * filter.bits(index_reader) -> bit_vector
1989
+ *
1990
+ * Get the bit_vector used by this filter. This method will usually be used
1991
+ * to group filters or apply filters to other filters.
1992
+ */
1993
+ static VALUE
1994
+ frb_f_get_bits(VALUE self, VALUE rindex_reader)
1995
+ {
1996
+ BitVector *bv;
1997
+ IndexReader *ir;
1998
+ GET_F();
1999
+ Data_Get_Struct(rindex_reader, IndexReader, ir);
2000
+ bv = filt_get_bv(f, ir);
2001
+ return frb_get_bv(bv);
2002
+ }
2003
+
2004
+ /****************************************************************************
2005
+ *
2006
+ * RangeFilter Methods
2007
+ *
2008
+ ****************************************************************************/
2009
+
2010
+
2011
+ /*
2012
+ * call-seq:
2013
+ * RangeFilter.new(field, options = {}) -> range_query
2014
+ *
2015
+ * Create a new RangeFilter on field +field+. There are two ways to build a
2016
+ * range filter. With the old-style options; +:lower+, +:upper+,
2017
+ * +:include_lower+ and +:include_upper+ or the new style options; +:<+,
2018
+ * +:<=+, +:>+ and +:>=+. The options' names should speak for themselves.
2019
+ * In the old-style options, limits are inclusive by default.
2020
+ *
2021
+ * == Examples
2022
+ *
2023
+ * f = RangeFilter.new(:date, :lower => "200501", :include_lower => false)
2024
+ * # is equivalent to
2025
+ * f = RangeFilter.new(:date, :< => "200501")
2026
+ * # is equivalent to
2027
+ * f = RangeFilter.new(:date, :lower_exclusive => "200501")
2028
+ *
2029
+ * f = RangeFilter.new(:date, :lower => "200501", :upper => 200502)
2030
+ * # is equivalent to
2031
+ * f = RangeFilter.new(:date, :>= => "200501", :<= => 200502)
2032
+ */
2033
+ static VALUE
2034
+ frb_rf_init(VALUE self, VALUE rfield, VALUE roptions)
2035
+ {
2036
+ Filter *f;
2037
+ char *lterm = NULL;
2038
+ char *uterm = NULL;
2039
+ bool include_lower = false;
2040
+ bool include_upper = false;
2041
+
2042
+ get_range_params(roptions, &lterm, &uterm, &include_lower, &include_upper);
2043
+ f = rfilt_new(frb_field(rfield), lterm, uterm,
2044
+ include_lower, include_upper);
2045
+ Frt_Wrap_Struct(self, NULL, &frb_f_free, f);
2046
+ object_add(f, self);
2047
+ return self;
2048
+ }
2049
+
2050
+ /****************************************************************************
2051
+ *
2052
+ * TypedRangeFilter Methods
2053
+ *
2054
+ ****************************************************************************/
2055
+
2056
+
2057
+ /*
2058
+ * call-seq:
2059
+ * TypedRangeFilter.new(field, options = {}) -> range_query
2060
+ *
2061
+ * Create a new TypedRangeFilter on field +field+. There are two ways to
2062
+ * build a range filter. With the old-style options; +:lower+, +:upper+,
2063
+ * +:include_lower+ and +:include_upper+ or the new style options; +:<+,
2064
+ * +:<=+, +:>+ and +:>=+. The options' names should speak for themselves.
2065
+ * In the old-style options, limits are inclusive by default.
2066
+ *
2067
+ * == Examples
2068
+ *
2069
+ * f = TypedRangeFilter.new(:date, :lower => "0.1", :include_lower => false)
2070
+ * # is equivalent to
2071
+ * f = TypedRangeFilter.new(:date, :< => "0.1")
2072
+ * # is equivalent to
2073
+ * f = TypedRangeFilter.new(:date, :lower_exclusive => "0.1")
2074
+ *
2075
+ * # Note that you numbers can be strings or actual numbers
2076
+ * f = TypedRangeFilter.new(:date, :lower => "-132.2", :upper => -1.4)
2077
+ * # is equivalent to
2078
+ * f = TypedRangeFilter.new(:date, :>= => "-132.2", :<= => -1.4)
2079
+ */
2080
+ static VALUE
2081
+ frb_trf_init(VALUE self, VALUE rfield, VALUE roptions)
2082
+ {
2083
+ Filter *f;
2084
+ char *lterm = NULL;
2085
+ char *uterm = NULL;
2086
+ bool include_lower = false;
2087
+ bool include_upper = false;
2088
+
2089
+ get_range_params(roptions, &lterm, &uterm, &include_lower, &include_upper);
2090
+ f = trfilt_new(frb_field(rfield), lterm, uterm,
2091
+ include_lower, include_upper);
2092
+ Frt_Wrap_Struct(self, NULL, &frb_f_free, f);
2093
+ object_add(f, self);
2094
+ return self;
2095
+ }
2096
+
2097
+ /****************************************************************************
2098
+ *
2099
+ * QueryFilter Methods
2100
+ *
2101
+ ****************************************************************************/
2102
+
2103
+ /*
2104
+ * call-seq:
2105
+ * QueryFilter.new(query) -> filter
2106
+ *
2107
+ * Create a new QueryFilter which applies the query +query+.
2108
+ */
2109
+ static VALUE
2110
+ frb_qf_init(VALUE self, VALUE rquery)
2111
+ {
2112
+ Query *q;
2113
+ Filter *f;
2114
+ Data_Get_Struct(rquery, Query, q);
2115
+ f = qfilt_new(q);
2116
+ Frt_Wrap_Struct(self, NULL, &frb_f_free, f);
2117
+ object_add(f, self);
2118
+ return self;
2119
+ }
2120
+
2121
+ /****************************************************************************
2122
+ *
2123
+ * SortField Methods
2124
+ *
2125
+ ****************************************************************************/
2126
+
2127
+ static void
2128
+ frb_sf_free(void *p)
2129
+ {
2130
+ object_del(p);
2131
+ sort_field_destroy((SortField *)p);
2132
+ }
2133
+
2134
+ static VALUE
2135
+ frb_get_sf(SortField *sf)
2136
+ {
2137
+ VALUE self = object_get(sf);
2138
+ if (self == Qnil) {
2139
+ self = Data_Wrap_Struct(cSortField, NULL, &frb_sf_free, sf);
2140
+ object_add(sf, self);
2141
+ }
2142
+ return self;
2143
+ }
2144
+
2145
+ static int
2146
+ get_sort_type(VALUE rtype)
2147
+ {
2148
+ Check_Type(rtype, T_SYMBOL);
2149
+ if (rtype == sym_byte) {
2150
+ return SORT_TYPE_BYTE;
2151
+ } else if (rtype == sym_integer) {
2152
+ return SORT_TYPE_INTEGER;
2153
+ } else if (rtype == sym_string) {
2154
+ return SORT_TYPE_STRING;
2155
+ } else if (rtype == sym_score) {
2156
+ return SORT_TYPE_SCORE;
2157
+ } else if (rtype == sym_doc_id) {
2158
+ return SORT_TYPE_DOC;
2159
+ } else if (rtype == sym_float) {
2160
+ return SORT_TYPE_FLOAT;
2161
+ } else if (rtype == sym_auto) {
2162
+ return SORT_TYPE_AUTO;
2163
+ } else {
2164
+ rb_raise(rb_eArgError, ":%s is an unknown sort-type. Please choose "
2165
+ "from [:integer, :float, :string, :auto, :score, :doc_id]",
2166
+ rb_id2name(SYM2ID(rtype)));
2167
+ }
2168
+ return SORT_TYPE_DOC;
2169
+ }
2170
+
2171
+ /*
2172
+ * call-seq:
2173
+ * SortField.new(field, options = {}) -> sort_field
2174
+ *
2175
+ * Create a new SortField which can be used to sort the result-set by the
2176
+ * value in field +field+.
2177
+ *
2178
+ * === Options
2179
+ *
2180
+ * :type:: Default: +:auto+. Specifies how a field should be sorted.
2181
+ * Choose from one of; +:auto+, +:integer+, +:float+,
2182
+ * +:string+, +:byte+, +:doc_id+ or +:score+. +:auto+ will
2183
+ * check the datatype of the field by trying to parse it into
2184
+ * either a number or a float before settling on a string
2185
+ * sort. String sort is locale dependent and works for
2186
+ * multibyte character sets like UTF-8 if you have your
2187
+ * locale set correctly.
2188
+ * :reverse Default: false. Set to true if you want to reverse the
2189
+ * sort.
2190
+ */
2191
+ static VALUE
2192
+ frb_sf_init(int argc, VALUE *argv, VALUE self)
2193
+ {
2194
+ SortField *sf;
2195
+ VALUE rfield, roptions;
2196
+ VALUE rval;
2197
+ int type = SORT_TYPE_AUTO;
2198
+ int is_reverse = false;
2199
+ Symbol field;
2200
+
2201
+ if (rb_scan_args(argc, argv, "11", &rfield, &roptions) == 2) {
2202
+ if (Qnil != (rval = rb_hash_aref(roptions, sym_type))) {
2203
+ type = get_sort_type(rval);
2204
+ }
2205
+ if (Qnil != (rval = rb_hash_aref(roptions, sym_reverse))) {
2206
+ is_reverse = RTEST(rval);
2207
+ }
2208
+ if (Qnil != (rval = rb_hash_aref(roptions, sym_comparator))) {
2209
+ rb_raise(rb_eArgError, "Unsupported argument ':comparator'");
2210
+ }
2211
+ }
2212
+ if (NIL_P(rfield)) rb_raise(rb_eArgError, "must pass a valid field name");
2213
+ field = frb_field(rfield);
2214
+
2215
+ sf = sort_field_new(field, type, is_reverse);
2216
+ if (sf->field == NULL) {
2217
+ sf->field = field;
2218
+ }
2219
+
2220
+ Frt_Wrap_Struct(self, NULL, &frb_sf_free, sf);
2221
+ object_add(sf, self);
2222
+ return self;
2223
+ }
2224
+
2225
+ #define GET_SF() SortField *sf = (SortField *)DATA_PTR(self)
2226
+
2227
+ /*
2228
+ * call-seq:
2229
+ * sort_field.reverse? -> bool
2230
+ *
2231
+ * Return true if the field is to be reverse sorted. This attribute is set
2232
+ * when you create the sort_field.
2233
+ */
2234
+ static VALUE
2235
+ frb_sf_is_reverse(VALUE self)
2236
+ {
2237
+ GET_SF();
2238
+ return sf->reverse ? Qtrue : Qfalse;
2239
+ }
2240
+
2241
+ /*
2242
+ * call-seq:
2243
+ * sort_field.name -> symbol
2244
+ *
2245
+ * Returns the name of the field to be sorted.
2246
+ */
2247
+ static VALUE
2248
+ frb_sf_get_name(VALUE self)
2249
+ {
2250
+ GET_SF();
2251
+ return sf->field ? FSYM2SYM(sf->field) : Qnil;
2252
+ }
2253
+
2254
+ /*
2255
+ * call-seq:
2256
+ * sort_field.type -> symbol
2257
+ *
2258
+ * Return the type of sort. Should be one of; +:auto+, +:integer+, +:float+,
2259
+ * +:string+, +:byte+, +:doc_id+ or +:score+.
2260
+ */
2261
+ static VALUE
2262
+ frb_sf_get_type(VALUE self)
2263
+ {
2264
+ GET_SF();
2265
+ switch (sf->type) {
2266
+ case SORT_TYPE_BYTE: return sym_byte;
2267
+ case SORT_TYPE_INTEGER: return sym_integer;
2268
+ case SORT_TYPE_FLOAT: return sym_float;
2269
+ case SORT_TYPE_STRING: return sym_string;
2270
+ case SORT_TYPE_AUTO: return sym_auto;
2271
+ case SORT_TYPE_DOC: return sym_doc_id;
2272
+ case SORT_TYPE_SCORE: return sym_score;
2273
+ }
2274
+ return Qnil;
2275
+ }
2276
+
2277
+ /*
2278
+ * call-seq:
2279
+ * sort_field.comparator -> symbol
2280
+ *
2281
+ * TODO: currently unsupported
2282
+ */
2283
+ static VALUE
2284
+ frb_sf_get_comparator(VALUE self)
2285
+ {
2286
+ return Qnil;
2287
+ }
2288
+
2289
+ /*
2290
+ * call-seq:
2291
+ * sort_field.to_s -> string
2292
+ *
2293
+ * Return a human readable string describing this +sort_field+.
2294
+ */
2295
+ static VALUE
2296
+ frb_sf_to_s(VALUE self)
2297
+ {
2298
+ GET_SF();
2299
+ char *str = sort_field_to_s(sf);
2300
+ VALUE rstr = rb_str_new2(str);
2301
+ free(str);
2302
+ return rstr;
2303
+ }
2304
+
2305
+ /****************************************************************************
2306
+ *
2307
+ * Sort Methods
2308
+ *
2309
+ ****************************************************************************/
2310
+
2311
+ static void
2312
+ frb_sort_free(void *p)
2313
+ {
2314
+ Sort *sort = (Sort *)p;
2315
+ object_del(sort);
2316
+ sort_destroy(sort);
2317
+ }
2318
+
2319
+ static void
2320
+ frb_sort_mark(void *p)
2321
+ {
2322
+ Sort *sort = (Sort *)p;
2323
+ int i;
2324
+ for (i = 0; i < sort->size; i++) {
2325
+ frb_gc_mark(sort->sort_fields[i]);
2326
+ }
2327
+ }
2328
+
2329
+ static VALUE
2330
+ frb_sort_alloc(VALUE klass)
2331
+ {
2332
+ VALUE self;
2333
+ Sort *sort = sort_new();
2334
+ sort->destroy_all = false;
2335
+ self = Data_Wrap_Struct(klass, &frb_sort_mark, &frb_sort_free, sort);
2336
+ object_add(sort, self);
2337
+ return self;
2338
+ }
2339
+
2340
+ static void
2341
+ frb_parse_sort_str(Sort *sort, char *xsort_str)
2342
+ {
2343
+ SortField *sf;
2344
+ char *comma, *end, *e, *s;
2345
+ const int len = strlen(xsort_str);
2346
+ char *sort_str = ALLOC_N(char, len + 2);
2347
+ strcpy(sort_str, xsort_str);
2348
+
2349
+ end = &sort_str[len];
2350
+
2351
+ s = sort_str;
2352
+
2353
+ while ((s < end)
2354
+ && (NULL != (comma = strchr(s, ',')) || (NULL != (comma = end)))) {
2355
+ bool reverse = false;
2356
+ /* strip spaces */
2357
+ e = comma;
2358
+ while ((isspace(*s) || *s == ':') && s < e) s++;
2359
+ while (isspace(e[-1]) && s < e) e--;
2360
+ *e = '\0';
2361
+ if (e > (s + 4) && strcmp("DESC", &e[-4]) == 0) {
2362
+ reverse = true;
2363
+ e -= 4;
2364
+ while (isspace(e[-1]) && s < e) e--;
2365
+ }
2366
+ *e = '\0';
2367
+
2368
+ if (strcmp("SCORE", s) == 0) {
2369
+ sf = sort_field_score_new(reverse);
2370
+ } else if (strcmp("DOC_ID", s) == 0) {
2371
+ sf = sort_field_doc_new(reverse);
2372
+ } else {
2373
+ sf = sort_field_auto_new(I(s), reverse);
2374
+ }
2375
+ frb_get_sf(sf);
2376
+ sort_add_sort_field(sort, sf);
2377
+ s = comma + 1;
2378
+ }
2379
+ free(sort_str);
2380
+ }
2381
+
2382
+ static void
2383
+ frb_sort_add(Sort *sort, VALUE rsf, bool reverse)
2384
+ {
2385
+ SortField *sf;
2386
+ switch (TYPE(rsf)) {
2387
+ case T_DATA:
2388
+ Data_Get_Struct(rsf, SortField, sf);
2389
+ if (reverse) sf->reverse = !sf->reverse;
2390
+ sort_add_sort_field(sort, sf);
2391
+ break;
2392
+ case T_SYMBOL:
2393
+ sf = sort_field_auto_new(frb_field(rsf), reverse);
2394
+ /* need to give it a ruby object so it'll be freed when the
2395
+ * sort is garbage collected */
2396
+ rsf = frb_get_sf(sf);
2397
+ sort_add_sort_field(sort, sf);
2398
+ break;
2399
+ case T_STRING:
2400
+ frb_parse_sort_str(sort, rs2s(rsf));
2401
+ break;
2402
+ default:
2403
+ rb_raise(rb_eArgError, "Unknown SortField Type");
2404
+ break;
2405
+ }
2406
+ }
2407
+
2408
+ #define GET_SORT() Sort *sort = (Sort *)DATA_PTR(self)
2409
+ /*
2410
+ * call-seq:
2411
+ * Sort.new(sort_fields = [SortField::SCORE, SortField::DOC_ID], reverse = false) -> Sort
2412
+ *
2413
+ * Create a new Sort object. If +reverse+ is true, all sort_fields will be
2414
+ * reversed so if any of them are already reversed the will be turned back
2415
+ * to their natural order again. By default
2416
+ */
2417
+ static VALUE
2418
+ frb_sort_init(int argc, VALUE *argv, VALUE self)
2419
+ {
2420
+ int i;
2421
+ VALUE rfields, rreverse;
2422
+ bool reverse = false;
2423
+ bool has_sfd = false;
2424
+ GET_SORT();
2425
+ switch (rb_scan_args(argc, argv, "02", &rfields, &rreverse)) {
2426
+ case 2: reverse = RTEST(rreverse);
2427
+ case 1:
2428
+ if (TYPE(rfields) == T_ARRAY) {
2429
+ int i;
2430
+ for (i = 0; i < RARRAY_LEN(rfields); i++) {
2431
+ frb_sort_add(sort, RARRAY_PTR(rfields)[i], reverse);
2432
+ }
2433
+ } else {
2434
+ frb_sort_add(sort, rfields, reverse);
2435
+ }
2436
+ for (i = 0; i < sort->size; i++) {
2437
+ if (sort->sort_fields[i] == &SORT_FIELD_DOC) has_sfd = true;
2438
+ }
2439
+ if (!has_sfd) {
2440
+ sort_add_sort_field(sort, (SortField *)&SORT_FIELD_DOC);
2441
+ }
2442
+ break;
2443
+ case 0:
2444
+ sort_add_sort_field(sort, (SortField *)&SORT_FIELD_SCORE);
2445
+ sort_add_sort_field(sort, (SortField *)&SORT_FIELD_DOC);
2446
+ }
2447
+
2448
+ return self;
2449
+ }
2450
+
2451
+ /*
2452
+ * call-seq:
2453
+ * sort.fields -> Array
2454
+ *
2455
+ * Returns an array of the SortFields held by the Sort object.
2456
+ */
2457
+ static VALUE
2458
+ frb_sort_get_fields(VALUE self)
2459
+ {
2460
+ GET_SORT();
2461
+ VALUE rfields = rb_ary_new2(sort->size);
2462
+ int i;
2463
+ for (i = 0; i < sort->size; i++) {
2464
+ rb_ary_store(rfields, i, object_get(sort->sort_fields[i]));
2465
+ }
2466
+ return rfields;
2467
+ }
2468
+
2469
+
2470
+ /*
2471
+ * call-seq:
2472
+ * sort.to_s -> string
2473
+ *
2474
+ * Returns a human readable string representing the sort object.
2475
+ */
2476
+ static VALUE
2477
+ frb_sort_to_s(VALUE self)
2478
+ {
2479
+ GET_SORT();
2480
+ char *str = sort_to_s(sort);
2481
+ VALUE rstr = rb_str_new2(str);
2482
+ free(str);
2483
+ return rstr;
2484
+ }
2485
+
2486
+ /****************************************************************************
2487
+ *
2488
+ * Searcher Methods
2489
+ *
2490
+ ****************************************************************************/
2491
+
2492
+ static void
2493
+ frb_sea_free(void *p)
2494
+ {
2495
+ Searcher *sea = (Searcher *)p;
2496
+ object_del(sea);
2497
+ sea->close(sea);
2498
+ }
2499
+
2500
+ #define GET_SEA() Searcher *sea = (Searcher *)DATA_PTR(self)
2501
+
2502
+ /*
2503
+ * call-seq:
2504
+ * searcher.close -> nil
2505
+ *
2506
+ * Close the searcher. The garbage collector will do this for you or you can
2507
+ * call this method explicitly.
2508
+ */
2509
+ static VALUE
2510
+ frb_sea_close(VALUE self)
2511
+ {
2512
+ GET_SEA();
2513
+ Frt_Unwrap_Struct(self);
2514
+ object_del(sea);
2515
+ sea->close(sea);
2516
+ return Qnil;
2517
+ }
2518
+
2519
+ /*
2520
+ * call-seq:
2521
+ * searcher.reader -> IndexReader
2522
+ *
2523
+ * Return the IndexReader wrapped by this searcher.
2524
+ */
2525
+ static VALUE
2526
+ frb_sea_get_reader(VALUE self, VALUE rterm)
2527
+ {
2528
+ GET_SEA();
2529
+ return object_get(((IndexSearcher *)sea)->ir);
2530
+ }
2531
+
2532
+ /*
2533
+ * call-seq:
2534
+ * searcher.doc_freq(field, term) -> integer
2535
+ *
2536
+ * Return the number of documents in which the term +term+ appears in the
2537
+ * field +field+.
2538
+ */
2539
+ static VALUE
2540
+ frb_sea_doc_freq(VALUE self, VALUE rfield, VALUE rterm)
2541
+ {
2542
+ GET_SEA();
2543
+ return INT2FIX(sea->doc_freq(sea,
2544
+ frb_field(rfield),
2545
+ StringValuePtr(rterm)));
2546
+ }
2547
+
2548
+ /*
2549
+ * call-seq:
2550
+ * searcher.get_document(doc_id) -> LazyDoc
2551
+ * searcher[doc_id] -> LazyDoc
2552
+ *
2553
+ * Retrieve a document from the index. See LazyDoc for more details on the
2554
+ * document returned. Documents are referenced internally by document ids
2555
+ * which are returned by the Searchers search methods.
2556
+ */
2557
+ static VALUE
2558
+ frb_sea_doc(VALUE self, VALUE rdoc_id)
2559
+ {
2560
+ GET_SEA();
2561
+ return frb_get_lazy_doc(sea->get_lazy_doc(sea, FIX2INT(rdoc_id)));
2562
+ }
2563
+
2564
+ /*
2565
+ * call-seq:
2566
+ * searcher.max_doc -> number
2567
+ *
2568
+ * Returns 1 + the maximum document id in the index. It is the
2569
+ * document_id that will be used by the next document added to the index. If
2570
+ * there are no deletions, this number also refers to the number of documents
2571
+ * in the index.
2572
+ */
2573
+ static VALUE
2574
+ frb_sea_max_doc(VALUE self)
2575
+ {
2576
+ GET_SEA();
2577
+ return INT2FIX(sea->max_doc(sea));
2578
+ }
2579
+
2580
+ static float
2581
+ call_filter_proc(int doc_id, float score, Searcher *self, void *arg)
2582
+ {
2583
+ VALUE val = rb_funcall((VALUE)arg, id_call, 3,
2584
+ INT2FIX(doc_id),
2585
+ rb_float_new((double)score),
2586
+ object_get(self));
2587
+ switch (TYPE(val)) {
2588
+ case T_NIL:
2589
+ case T_FALSE:
2590
+ return 0.0f;
2591
+ case T_FLOAT:
2592
+ {
2593
+ double d = NUM2DBL(val);
2594
+ return (d >= 0.0 && d <= 1.0) ? (float)d : 1.0f;
2595
+ }
2596
+ default:
2597
+ return 1.0f;
2598
+ }
2599
+ }
2600
+
2601
+ typedef struct CWrappedFilter
2602
+ {
2603
+ Filter super;
2604
+ VALUE rfilter;
2605
+ } CWrappedFilter;
2606
+ #define CWF(filt) ((CWrappedFilter *)(filt))
2607
+
2608
+ static unsigned long
2609
+ cwfilt_hash(Filter *filt)
2610
+ {
2611
+ return NUM2ULONG(rb_funcall(CWF(filt)->rfilter, id_hash, 0));
2612
+ }
2613
+
2614
+ static int
2615
+ cwfilt_eq(Filter *filt, Filter *o)
2616
+ {
2617
+ return RTEST(rb_funcall(CWF(filt)->rfilter, id_eql, 1, CWF(o)->rfilter));
2618
+ }
2619
+
2620
+ static BitVector *
2621
+ cwfilt_get_bv_i(Filter *filt, IndexReader *ir)
2622
+ {
2623
+ VALUE rbv = rb_funcall(CWF(filt)->rfilter, id_bits, 1, object_get(ir));
2624
+ BitVector *bv;
2625
+ Data_Get_Struct(rbv, BitVector, bv);
2626
+ REF(bv);
2627
+ return bv;
2628
+ }
2629
+
2630
+ Filter *
2631
+ frb_get_cwrapped_filter(VALUE rval)
2632
+ {
2633
+ Filter *filter;
2634
+ if (frb_is_cclass(rval) && DATA_PTR(rval)) {
2635
+ Data_Get_Struct(rval, Filter, filter);
2636
+ REF(filter);
2637
+ }
2638
+ else {
2639
+ filter = filt_new(CWrappedFilter);
2640
+ filter->hash = &cwfilt_hash;
2641
+ filter->eq = &cwfilt_eq;
2642
+ filter->get_bv_i = &cwfilt_get_bv_i;
2643
+ CWF(filter)->rfilter = rval;
2644
+ }
2645
+ return filter;
2646
+ }
2647
+
2648
+ static TopDocs *
2649
+ frb_sea_search_internal(Query *query, VALUE roptions, Searcher *sea)
2650
+ {
2651
+ VALUE rval;
2652
+ int offset = 0, limit = 10;
2653
+ Filter *filter = NULL;
2654
+ Sort *sort = NULL;
2655
+ TopDocs *td;
2656
+
2657
+ PostFilter post_filter_holder;
2658
+ PostFilter *post_filter = NULL;
2659
+
2660
+ if (Qnil != roptions) {
2661
+ if (Qnil != (rval = rb_hash_aref(roptions, sym_offset))) {
2662
+ offset = FIX2INT(rval);
2663
+ if (offset < 0)
2664
+ rb_raise(rb_eArgError, ":offset must be >= 0");
2665
+ }
2666
+ if (Qnil != (rval = rb_hash_aref(roptions, sym_limit))) {
2667
+ if (TYPE(rval) == T_FIXNUM) {
2668
+ limit = FIX2INT(rval);
2669
+ if (limit <= 0) {
2670
+ rb_raise(rb_eArgError, ":limit must be > 0");
2671
+ }
2672
+ }
2673
+ else if (rval == sym_all) {
2674
+ limit = INT_MAX;
2675
+ }
2676
+ else {
2677
+ rb_raise(rb_eArgError, "%s is not a sensible :limit value "
2678
+ "Please use a positive integer or :all",
2679
+ rb_obj_as_string(rval));
2680
+ }
2681
+ }
2682
+ if (Qnil != (rval = rb_hash_aref(roptions, sym_filter))) {
2683
+ filter = frb_get_cwrapped_filter(rval);
2684
+ }
2685
+ if (Qnil != (rval = rb_hash_aref(roptions, sym_c_filter_proc))) {
2686
+ post_filter = DATA_PTR(rval);
2687
+ }
2688
+ if (Qnil != (rval = rb_hash_aref(roptions, sym_filter_proc))) {
2689
+ if (rb_respond_to(rval, id_call)) {
2690
+ if (post_filter) {
2691
+ rb_raise(rb_eArgError, "Cannot pass both :filter_proc and "
2692
+ ":c_filter_proc to the same search");
2693
+ }
2694
+ post_filter_holder.filter_func = &call_filter_proc;
2695
+ post_filter_holder.arg = (void *)rval;
2696
+ post_filter = &post_filter_holder;
2697
+ }
2698
+ else {
2699
+ post_filter = DATA_PTR(rval);
2700
+ }
2701
+ }
2702
+ if (Qnil != (rval = rb_hash_aref(roptions, sym_sort))) {
2703
+ if (TYPE(rval) != T_DATA || CLASS_OF(rval) == cSortField) {
2704
+ rval = frb_sort_init(1, &rval, frb_sort_alloc(cSort));
2705
+ }
2706
+ Data_Get_Struct(rval, Sort, sort);
2707
+ }
2708
+ }
2709
+
2710
+ td = sea->search(sea, query, offset, limit, filter, sort, post_filter, 0);
2711
+ if (filter) filt_deref(filter);
2712
+ return td;
2713
+ }
2714
+
2715
+ /*
2716
+ * call-seq:
2717
+ * searcher.search(query, options = {}) -> TopDocs
2718
+ *
2719
+ * Run a query through the Searcher on the index. A TopDocs object is
2720
+ * returned with the relevant results. The +query+ is a built in Query
2721
+ * object. Here are the options;
2722
+ *
2723
+ * === Options
2724
+ *
2725
+ * :offset:: Default: 0. The offset of the start of the section of the
2726
+ * result-set to return. This is used for paging through
2727
+ * results. Let's say you have a page size of 10. If you
2728
+ * don't find the result you want among the first 10 results
2729
+ * then set +:offset+ to 10 and look at the next 10 results,
2730
+ * then 20 and so on.
2731
+ * :limit:: Default: 10. This is the number of results you want
2732
+ * returned, also called the page size. Set +:limit+ to
2733
+ * +:all+ to return all results
2734
+ * :sort:: A Sort object or sort string describing how the field
2735
+ * should be sorted. A sort string is made up of field names
2736
+ * which cannot contain spaces and the word "DESC" if you
2737
+ * want the field reversed, all separated by commas. For
2738
+ * example; "rating DESC, author, title". Note that Ferret
2739
+ * will try to determine a field's type by looking at the
2740
+ * first term in the index and seeing if it can be parsed as
2741
+ * an integer or a float. Keep this in mind as you may need
2742
+ * to specify a fields type to sort it correctly. For more
2743
+ * on this, see the documentation for SortField
2744
+ * :filter:: a Filter object to filter the search results with
2745
+ * :filter_proc:: a filter Proc is a Proc which takes the doc_id, the score
2746
+ * and the Searcher object as its parameters and returns
2747
+ * either a Boolean value specifying whether the result
2748
+ * should be included in the result set, or a Float between 0
2749
+ * and 1.0 to be used as a factor to scale the score of the
2750
+ * object. This can be used, for example, to weight the score
2751
+ * of a matched document by it's age.
2752
+ */
2753
+ static VALUE
2754
+ frb_sea_search(int argc, VALUE *argv, VALUE self)
2755
+ {
2756
+ GET_SEA();
2757
+ VALUE rquery, roptions;
2758
+ Query *query;
2759
+ rb_scan_args(argc, argv, "11", &rquery, &roptions);
2760
+ Data_Get_Struct(rquery, Query, query);
2761
+ return frb_get_td(frb_sea_search_internal(query, roptions, sea), self);
2762
+ }
2763
+
2764
+ /*
2765
+ * call-seq:
2766
+ * searcher.search_each(query, options = {}) {|doc_id, score| do_something}
2767
+ * -> total_hits
2768
+ *
2769
+ * Run a query through the Searcher on the index. A TopDocs object is
2770
+ * returned with the relevant results. The +query+ is a Query object. The
2771
+ * Searcher#search_each method yields the internal document id (used to
2772
+ * reference documents in the Searcher object like this; +searcher[doc_id]+)
2773
+ * and the search score for that document. It is possible for the score to be
2774
+ * greater than 1.0 for some queries and taking boosts into account. This
2775
+ * method will also normalize scores to the range 0.0..1.0 when the max-score
2776
+ * is greater than 1.0. Here are the options;
2777
+ *
2778
+ * === Options
2779
+ *
2780
+ * :offset:: Default: 0. The offset of the start of the section of the
2781
+ * result-set to return. This is used for paging through
2782
+ * results. Let's say you have a page size of 10. If you
2783
+ * don't find the result you want among the first 10 results
2784
+ * then set +:offset+ to 10 and look at the next 10 results,
2785
+ * then 20 and so on.
2786
+ * :limit:: Default: 10. This is the number of results you want
2787
+ * returned, also called the page size. Set +:limit+ to
2788
+ * +:all+ to return all results
2789
+ * :sort:: A Sort object or sort string describing how the field
2790
+ * should be sorted. A sort string is made up of field names
2791
+ * which cannot contain spaces and the word "DESC" if you
2792
+ * want the field reversed, all separated by commas. For
2793
+ * example; "rating DESC, author, title". Note that Ferret
2794
+ * will try to determine a field's type by looking at the
2795
+ * first term in the index and seeing if it can be parsed as
2796
+ * an integer or a float. Keep this in mind as you may need
2797
+ * to specify a fields type to sort it correctly. For more
2798
+ * on this, see the documentation for SortField
2799
+ * :filter:: a Filter object to filter the search results with
2800
+ * :filter_proc:: a filter Proc is a Proc which takes the doc_id, the score
2801
+ * and the Searcher object as its parameters and returns a
2802
+ * Boolean value specifying whether the result should be
2803
+ * included in the result set.
2804
+ */
2805
+ static VALUE
2806
+ frb_sea_search_each(int argc, VALUE *argv, VALUE self)
2807
+ {
2808
+ int i;
2809
+ Query *q;
2810
+ float max_score;
2811
+ TopDocs *td;
2812
+ VALUE rquery, roptions, rtotal_hits;
2813
+ GET_SEA();
2814
+
2815
+ rb_scan_args(argc, argv, "11", &rquery, &roptions);
2816
+
2817
+ #ifndef FRT_RUBY_VERSION_1_9
2818
+ rb_thread_critical = Qtrue;
2819
+ #endif
2820
+ Data_Get_Struct(rquery, Query, q);
2821
+ td = frb_sea_search_internal(q, roptions, sea);
2822
+
2823
+ max_score = (td->max_score > 1.0) ? td->max_score : 1.0;
2824
+
2825
+ /* yield normalized scores */
2826
+ for (i = 0; i < td->size; i++) {
2827
+ rb_yield_values(2, INT2FIX(td->hits[i]->doc),
2828
+ rb_float_new((double)(td->hits[i]->score/max_score)));
2829
+ }
2830
+
2831
+ rtotal_hits = INT2FIX(td->total_hits);
2832
+ td_destroy(td);
2833
+
2834
+ #ifndef FRT_RUBY_VERSION_1_9
2835
+ rb_thread_critical = 0;
2836
+ #endif
2837
+ return rtotal_hits;
2838
+ }
2839
+
2840
+ /*
2841
+ * call-seq:
2842
+ * searcher.scan(query, options = {}) -> Array (doc_nums)
2843
+ *
2844
+ * Run a query through the Searcher on the index, ignoring scoring and
2845
+ * starting at +:start_doc+ and stopping when +:limit+ matches have been
2846
+ * found. It returns an array of the matching document numbers.
2847
+ *
2848
+ * There is a big performance advange when using this search method on a very
2849
+ * large index when there are potentially thousands of matching documents and
2850
+ * you only want say 50 of them. The other search methods need to look at
2851
+ * every single match to decide which one has the highest score. This search
2852
+ * method just needs to find +:limit+ number of matches before it returns.
2853
+ *
2854
+ * === Options
2855
+ *
2856
+ * :start_doc:: Default: 0. The start document to start the search from.
2857
+ * NOTE very carefully that this is not the same as the
2858
+ * +:offset+ parameter used in the other search methods which
2859
+ * refers to the offset in the result-set. This is the
2860
+ * document to start the scan from. So if you scanning
2861
+ * through the index in increments of 50 documents at a time
2862
+ * you need to use the last matched doc in the previous
2863
+ * search to start your next search. See the example below.
2864
+ * :limit:: Default: 50. This is the number of results you want
2865
+ * returned, also called the page size. Set +:limit+ to
2866
+ * +:all+ to return all results.
2867
+ * TODO: add option to return loaded documents instead
2868
+ *
2869
+ * === Options
2870
+ *
2871
+ * start_doc = 0
2872
+ * begin
2873
+ * results = @searcher.scan(query, :start_doc => start_doc)
2874
+ * yield results # or do something with them
2875
+ * start_doc = results.last
2876
+ * # start_doc will be nil now if results is empty, ie no more matches
2877
+ * end while start_doc
2878
+ */
2879
+ static VALUE
2880
+ frb_sea_scan(int argc, VALUE *argv, VALUE self)
2881
+ {
2882
+ Query *q;
2883
+ int i, count;
2884
+ VALUE rval, rquery, roptions;
2885
+ int *doc_array;
2886
+ VALUE rdoc_array;
2887
+ int start_doc = 0, limit = 50;
2888
+ GET_SEA();
2889
+ rb_scan_args(argc, argv, "11", &rquery, &roptions);
2890
+ Data_Get_Struct(rquery, Query, q);
2891
+
2892
+ if (Qnil != roptions) {
2893
+ Check_Type(roptions, T_HASH);
2894
+ if (Qnil != (rval = rb_hash_aref(roptions, sym_start_doc))) {
2895
+ Check_Type(rval, T_FIXNUM);
2896
+ start_doc = FIX2INT(rval);
2897
+ if (start_doc < 0) {
2898
+ rb_raise(rb_eArgError, ":start_doc must be >= 0");
2899
+ }
2900
+ }
2901
+ if (Qnil != (rval = rb_hash_aref(roptions, sym_limit))) {
2902
+ if (TYPE(rval) == T_FIXNUM) {
2903
+ limit = FIX2INT(rval);
2904
+ if (limit <= 0) {
2905
+ rb_raise(rb_eArgError, ":limit must be > 0");
2906
+ }
2907
+ }
2908
+ else if (rval == sym_all) {
2909
+ limit = INT_MAX;
2910
+ }
2911
+ else {
2912
+ rb_raise(rb_eArgError, "%s is not a sensible :limit value "
2913
+ "Please use a positive integer or :all",
2914
+ rb_obj_as_string(rval));
2915
+ }
2916
+ }
2917
+ }
2918
+
2919
+ #ifndef FRT_RUBY_VERSION_1_9
2920
+ rb_thread_critical = Qtrue;
2921
+ #endif
2922
+ doc_array = ALLOC_N(int, limit);
2923
+ count = searcher_search_unscored(sea, q, doc_array, limit, start_doc);
2924
+ rdoc_array = rb_ary_new2(count);
2925
+ for (i = 0; i < count; i++) {
2926
+ rb_ary_store(rdoc_array, i, INT2FIX(doc_array[i]));
2927
+ }
2928
+ free(doc_array);
2929
+ #ifndef FRT_RUBY_VERSION_1_9
2930
+ rb_thread_critical = 0;
2931
+ #endif
2932
+ return rdoc_array;
2933
+ }
2934
+
2935
+ /*
2936
+ * call-seq:
2937
+ * searcher.explain(query, doc_id) -> Explanation
2938
+ *
2939
+ * Create an explanation object to explain the score returned for a
2940
+ * particular document at +doc_id+ in the index for the query +query+.
2941
+ *
2942
+ * Usually used like this;
2943
+ *
2944
+ * puts searcher.explain(query, doc_id).to_s
2945
+ */
2946
+ static VALUE
2947
+ frb_sea_explain(VALUE self, VALUE rquery, VALUE rdoc_id)
2948
+ {
2949
+ GET_SEA();
2950
+ Query *query;
2951
+ Explanation *expl;
2952
+ Data_Get_Struct(rquery, Query, query);
2953
+ expl = sea->explain(sea, query, FIX2INT(rdoc_id));
2954
+ return Data_Wrap_Struct(cExplanation, NULL, &expl_destroy, expl);
2955
+ }
2956
+
2957
+ /*
2958
+ * call-seq:
2959
+ * searcher.highlight(query, doc_id, field, options = {}) -> Array
2960
+ *
2961
+ * Returns an array of strings with the matches highlighted.
2962
+ *
2963
+ * === Options
2964
+ *
2965
+ * :excerpt_length:: Default: 150. Length of excerpt to show. Highlighted
2966
+ * terms will be in the centre of the excerpt. Set to
2967
+ * :all to highlight the entire field.
2968
+ * :num_excerpts:: Default: 2. Number of excerpts to return.
2969
+ * :pre_tag:: Default: "<b>". Tag to place to the left of the match.
2970
+ * You'll probably want to change this to a "<span>" tag
2971
+ * with a class. Try "\033[7m" for use in a terminal.
2972
+ * :post_tag:: Default: "</b>". This tag should close the +:pre_tag+.
2973
+ * Try tag "\033[m" in the terminal.
2974
+ * :ellipsis:: Default: "...". This is the string that is appended at
2975
+ * the beginning and end of excerpts (unless the excerpt
2976
+ * hits the start or end of the field. You'll probably
2977
+ * want to change this so a Unicode ellipsis character.
2978
+ */
2979
+ static VALUE
2980
+ frb_sea_highlight(int argc, VALUE *argv, VALUE self)
2981
+ {
2982
+ GET_SEA();
2983
+ VALUE rquery, rdoc_id, rfield, roptions, v;
2984
+ Query *query;
2985
+ int excerpt_length = 150;
2986
+ int num_excerpts = 2;
2987
+ char *pre_tag = "<b>";
2988
+ char *post_tag = "</b>";
2989
+ char *ellipsis = "...";
2990
+ char **excerpts;
2991
+
2992
+ rb_scan_args(argc, argv, "31", &rquery, &rdoc_id, &rfield, &roptions);
2993
+ Data_Get_Struct(rquery, Query, query);
2994
+ if (argc > 3) {
2995
+ if (TYPE(roptions) != T_HASH) {
2996
+ rb_raise(rb_eArgError, "The fourth argument to Searcher#highlight must be a hash");
2997
+ }
2998
+ if (Qnil != (v = rb_hash_aref(roptions, sym_num_excerpts))) {
2999
+ num_excerpts = FIX2INT(v);
3000
+ }
3001
+ if (Qnil != (v = rb_hash_aref(roptions, sym_excerpt_length))) {
3002
+ if (v == sym_all) {
3003
+ num_excerpts = 1;
3004
+ excerpt_length = INT_MAX/2;
3005
+ }
3006
+ else {
3007
+ excerpt_length = FIX2INT(v);
3008
+ }
3009
+ }
3010
+ if (Qnil != (v = rb_hash_aref(roptions, sym_pre_tag))) {
3011
+ pre_tag = rs2s(rb_obj_as_string(v));
3012
+ }
3013
+ if (Qnil != (v = rb_hash_aref(roptions, sym_post_tag))) {
3014
+ post_tag = rs2s(rb_obj_as_string(v));
3015
+ }
3016
+ if (Qnil != (v = rb_hash_aref(roptions, sym_ellipsis))) {
3017
+ ellipsis = rs2s(rb_obj_as_string(v));
3018
+ }
3019
+ }
3020
+
3021
+ if ((excerpts = searcher_highlight(sea,
3022
+ query,
3023
+ FIX2INT(rdoc_id),
3024
+ frb_field(rfield),
3025
+ excerpt_length,
3026
+ num_excerpts,
3027
+ pre_tag,
3028
+ post_tag,
3029
+ ellipsis)) != NULL) {
3030
+ const int size = ary_size(excerpts);
3031
+ int i;
3032
+ VALUE rexcerpts = rb_ary_new2(size);
3033
+
3034
+ for (i = 0; i < size; i++) {
3035
+ rb_ary_store(rexcerpts, i, rb_str_new2(excerpts[i]));
3036
+ }
3037
+ ary_destroy(excerpts, &free);
3038
+ return rexcerpts;
3039
+ }
3040
+ return Qnil;
3041
+ }
3042
+
3043
+ /****************************************************************************
3044
+ *
3045
+ * Searcher Methods
3046
+ *
3047
+ ****************************************************************************/
3048
+
3049
+ static void
3050
+ frb_sea_mark(void *p)
3051
+ {
3052
+ IndexSearcher *isea = (IndexSearcher *)p;
3053
+ frb_gc_mark(isea->ir);
3054
+ frb_gc_mark(isea->ir->store);
3055
+ }
3056
+
3057
+ #define FRT_GET_IR(rir, ir) do {\
3058
+ rir = Data_Wrap_Struct(cIndexReader, &frb_ir_mark, &frb_ir_free, ir);\
3059
+ object_add(ir, rir);\
3060
+ } while (0)
3061
+
3062
+ /*
3063
+ * call-seq:
3064
+ * Searcher.new(obj) -> Searcher
3065
+ *
3066
+ * Create a new Searcher object. +dir+ can either be a string path to an
3067
+ * index directory on the file-system, an actual Ferret::Store::Directory
3068
+ * object or a Ferret::Index::IndexReader. You should use the IndexReader for
3069
+ * searching multiple indexes. Just open the IndexReader on multiple
3070
+ * directories.
3071
+ */
3072
+ static VALUE
3073
+ frb_sea_init(VALUE self, VALUE obj)
3074
+ {
3075
+ Store *store = NULL;
3076
+ IndexReader *ir = NULL;
3077
+ Searcher *sea;
3078
+ if (TYPE(obj) == T_STRING) {
3079
+ frb_create_dir(obj);
3080
+ store = open_fs_store(StringValueCStr(obj));
3081
+ ir = ir_open(store);
3082
+ DEREF(store);
3083
+ FRT_GET_IR(obj, ir);
3084
+ } else {
3085
+ Check_Type(obj, T_DATA);
3086
+ if (rb_obj_is_kind_of(obj, cDirectory) == Qtrue) {
3087
+ Data_Get_Struct(obj, Store, store);
3088
+ ir = ir_open(store);
3089
+ FRT_GET_IR(obj, ir);
3090
+ } else if (rb_obj_is_kind_of(obj, cIndexReader) == Qtrue) {
3091
+ Data_Get_Struct(obj, IndexReader, ir);
3092
+ } else {
3093
+ rb_raise(rb_eArgError, "Unknown type for argument to IndexSearcher.new");
3094
+ }
3095
+ }
3096
+
3097
+ sea = isea_new(ir);
3098
+ ((IndexSearcher *)sea)->close_ir = false;
3099
+ Frt_Wrap_Struct(self, &frb_sea_mark, &frb_sea_free, sea);
3100
+ object_add(sea, self);
3101
+
3102
+ return self;
3103
+ }
3104
+
3105
+ /****************************************************************************
3106
+ *
3107
+ * MultiSearcher Methods
3108
+ *
3109
+ ****************************************************************************/
3110
+
3111
+ static void
3112
+ frb_ms_free(void *p)
3113
+ {
3114
+ Searcher *sea = (Searcher *)p;
3115
+ MultiSearcher *msea = (MultiSearcher *)sea;
3116
+ free(msea->searchers);
3117
+ object_del(sea);
3118
+ searcher_close(sea);
3119
+ }
3120
+
3121
+ static void
3122
+ frb_ms_mark(void *p)
3123
+ {
3124
+ int i;
3125
+ MultiSearcher *msea = (MultiSearcher *)p;
3126
+ for (i = 0; i < msea->s_cnt; i++) {
3127
+ frb_gc_mark(msea->searchers[i]);
3128
+ }
3129
+ }
3130
+
3131
+ /*
3132
+ * call-seq:
3133
+ * MultiSearcher.new(searcher*) -> searcher
3134
+ *
3135
+ * Create a new MultiSearcher by passing a list of subsearchers to the
3136
+ * constructor.
3137
+ */
3138
+ static VALUE
3139
+ frb_ms_init(int argc, VALUE *argv, VALUE self)
3140
+ {
3141
+ int i, j, top = 0, capa = argc;
3142
+
3143
+ VALUE rsearcher;
3144
+ Searcher **searchers = ALLOC_N(Searcher *, capa);
3145
+ Searcher *s;
3146
+
3147
+ for (i = 0; i < argc; i++) {
3148
+ rsearcher = argv[i];
3149
+ switch (TYPE(rsearcher)) {
3150
+ case T_ARRAY:
3151
+ capa += RARRAY_LEN(rsearcher);
3152
+ REALLOC_N(searchers, Searcher *, capa);
3153
+ for (j = 0; j < RARRAY_LEN(rsearcher); j++) {
3154
+ VALUE rs = RARRAY_PTR(rsearcher)[j];
3155
+ Data_Get_Struct(rs, Searcher, s);
3156
+ searchers[top++] = s;
3157
+ }
3158
+ break;
3159
+ case T_DATA:
3160
+ Data_Get_Struct(rsearcher, Searcher, s);
3161
+ searchers[top++] = s;
3162
+ break;
3163
+ default:
3164
+ rb_raise(rb_eArgError, "Can't add class %s to MultiSearcher",
3165
+ rb_obj_classname(rsearcher));
3166
+ break;
3167
+ }
3168
+ }
3169
+ s = msea_new(searchers, top, false);
3170
+ Frt_Wrap_Struct(self, &frb_ms_mark, &frb_ms_free, s);
3171
+ object_add(s, self);
3172
+ return self;
3173
+ }
3174
+
3175
+ /****************************************************************************
3176
+ *
3177
+ * Init Function
3178
+ *
3179
+ ****************************************************************************/
3180
+
3181
+ /* rdochack
3182
+ cTopDocs = rb_define_class_under(mSearch, "TopDocs", rb_cObject);
3183
+ */
3184
+
3185
+ /*
3186
+ * Document-class: Ferret::Search::Hit
3187
+ *
3188
+ * == Summary
3189
+ *
3190
+ * A hit represents a single document match for a search. It holds the
3191
+ * document id of the document that matches along with the score for the
3192
+ * match. The score is a positive Float value. The score contained in a hit
3193
+ * is not normalized so it can be greater than 1.0. To normalize scores to
3194
+ * the range 0.0..1.0 divide the scores by TopDocs#max_score.
3195
+ */
3196
+ static void
3197
+ Init_Hit(void)
3198
+ {
3199
+ const char *hit_class = "Hit";
3200
+ /* rdochack
3201
+ cHit = rb_define_class_under(mSearch, "Hit", rb_cObject);
3202
+ */
3203
+ cHit = rb_struct_define(hit_class, "doc", "score", NULL);
3204
+ rb_set_class_path(cHit, mSearch, hit_class);
3205
+ rb_const_set(mSearch, rb_intern(hit_class), cHit);
3206
+ id_doc = rb_intern("doc");
3207
+ id_score = rb_intern("score");
3208
+ }
3209
+
3210
+ /*
3211
+ * Document-class: Ferret::Search::TopDocs
3212
+ *
3213
+ * == Summary
3214
+ *
3215
+ * A TopDocs object holds a result set for a search. The number of documents
3216
+ * that matched the query his held in TopDocs#total_hits. The actual
3217
+ * results are in the Array TopDocs#hits. The number of hits returned is
3218
+ * limited by the +:limit+ option so the size of the +hits+ array will not
3219
+ * always be equal to the value of +total_hits+. Finally TopDocs#max_score
3220
+ * holds the maximum score of any match (not necessarily the maximum score
3221
+ * contained in the +hits+ array) so it can be used to normalize scores. For
3222
+ * example, to print doc ids with scores out of 100.0 you could do this;
3223
+ *
3224
+ * top_docs.hits.each do |hit|
3225
+ * puts "#{hit.doc} scored #{hit.score * 100.0 / top_docs.max_score}"
3226
+ * end
3227
+ */
3228
+ static void
3229
+ Init_TopDocs(void)
3230
+ {
3231
+ const char *td_class = "TopDocs";
3232
+ /* rdochack
3233
+ cTopDocs = rb_define_class_under(mSearch, "TopDocs", rb_cObject);
3234
+ */
3235
+ cTopDocs = rb_struct_define(td_class,
3236
+ "total_hits",
3237
+ "hits",
3238
+ "max_score",
3239
+ "searcher",
3240
+ NULL);
3241
+ rb_set_class_path(cTopDocs, mSearch, td_class);
3242
+ rb_const_set(mSearch, rb_intern(td_class), cTopDocs);
3243
+ rb_define_method(cTopDocs, "to_s", frb_td_to_s, -1);
3244
+ rb_define_method(cTopDocs, "to_json", frb_td_to_json, 0);
3245
+ id_hits = rb_intern("hits");
3246
+ id_total_hits = rb_intern("total_hits");
3247
+ id_max_score = rb_intern("max_score");
3248
+ id_searcher = rb_intern("searcher");
3249
+ }
3250
+
3251
+ /*
3252
+ * Document-class: Ferret::Search::Explanation
3253
+ *
3254
+ * == Summary
3255
+ *
3256
+ * Explanation is used to give a description of why a document matched with
3257
+ * the score that it did. Use the Explanation#to_s or Explanation#to_html
3258
+ * methods to display the explanation in a human readable format. Creating
3259
+ * explanations is an expensive operation so it should only be used for
3260
+ * debugging purposes. To create an explanation use the Searcher#explain
3261
+ * method.
3262
+ *
3263
+ * == Example
3264
+ *
3265
+ * puts searcher.explain(query, doc_id).to_s
3266
+ */
3267
+ static void
3268
+ Init_Explanation(void)
3269
+ {
3270
+ cExplanation = rb_define_class_under(mSearch, "Explanation", rb_cObject);
3271
+ rb_define_alloc_func(cExplanation, frb_data_alloc);
3272
+
3273
+ rb_define_method(cExplanation, "to_s", frb_expl_to_s, 0);
3274
+ rb_define_method(cExplanation, "to_html", frb_expl_to_html, 0);
3275
+ rb_define_method(cExplanation, "score", frb_expl_score, 0);
3276
+ }
3277
+
3278
+ /*
3279
+ * Document-class: Ferret::Search::Query
3280
+ *
3281
+ * == Summary
3282
+ *
3283
+ * Abstract class representing a query to the index. There are a number of
3284
+ * concrete Query implementations;
3285
+ *
3286
+ * * TermQuery
3287
+ * * MultiTermQuery
3288
+ * * BooleanQuery
3289
+ * * PhraseQuery
3290
+ * * ConstantScoreQuery
3291
+ * * FilteredQuery
3292
+ * * MatchAllQuery
3293
+ * * RangeQuery
3294
+ * * WildcardQuery
3295
+ * * FuzzyQuery
3296
+ * * PrefixQuery
3297
+ * * Spans::SpanTermQuery
3298
+ * * Spans::SpanFirstQuery
3299
+ * * Spans::SpanOrQuery
3300
+ * * Spans::SpanNotQuery
3301
+ * * Spans::SpanNearQuery
3302
+ *
3303
+ * Explore these classes for the query right for you. The queries are passed
3304
+ * to the Searcher#search* methods.
3305
+ *
3306
+ * === Query Boosts
3307
+ *
3308
+ * Queries have a boost value so that you can make the results of one query
3309
+ * more important than the results of another query when combining them in a
3310
+ * BooleanQuery. For example, documents on Rails. To avoid getting results
3311
+ * for train rails you might also add the tern Ruby but Rails is the more
3312
+ * important term so you'd give it a boost.
3313
+ */
3314
+ static void
3315
+ Init_Query(void)
3316
+ {
3317
+ cQuery = rb_define_class_under(mSearch, "Query", rb_cObject);
3318
+
3319
+ rb_define_method(cQuery, "to_s", frb_q_to_s, -1);
3320
+ rb_define_method(cQuery, "boost", frb_q_get_boost, 0);
3321
+ rb_define_method(cQuery, "boost=", frb_q_set_boost, 1);
3322
+ rb_define_method(cQuery, "eql?", frb_q_eql, 1);
3323
+ rb_define_method(cQuery, "==", frb_q_eql, 1);
3324
+ rb_define_method(cQuery, "hash", frb_q_hash, 0);
3325
+ rb_define_method(cQuery, "terms", frb_q_get_terms, 1);
3326
+ }
3327
+
3328
+ /*
3329
+ * Document-class: Ferret::Search::TermQuery
3330
+ *
3331
+ * == Summary
3332
+ *
3333
+ * TermQuery is the most basic query and it is the building block for most
3334
+ * other queries. It basically matches documents that contain a specific term
3335
+ * in a specific field.
3336
+ *
3337
+ * == Example
3338
+ *
3339
+ * query = TermQuery.new(:content, "rails")
3340
+ *
3341
+ * # untokenized fields can also be searched with this query;
3342
+ * query = TermQuery.new(:title, "Shawshank Redemption")
3343
+ *
3344
+ * Notice the all lowercase term Rails. This is important as most analyzers will
3345
+ * downcase all text added to the index. The title in this case was not
3346
+ * tokenized so the case would have been left as is.
3347
+ */
3348
+ static void
3349
+ Init_TermQuery(void)
3350
+ {
3351
+ cTermQuery = rb_define_class_under(mSearch, "TermQuery", cQuery);
3352
+ rb_define_alloc_func(cTermQuery, frb_data_alloc);
3353
+
3354
+ rb_define_method(cTermQuery, "initialize", frb_tq_init, 2);
3355
+ }
3356
+
3357
+ /*
3358
+ * Document-class: Ferret::Search::MultiTermQuery
3359
+ *
3360
+ * == Summary
3361
+ *
3362
+ * MultiTermQuery matches documents that contain one of a list of terms in a
3363
+ * specific field. This is the basic building block for queries such as;
3364
+ *
3365
+ * * PrefixQuery
3366
+ * * WildcardQuery
3367
+ * * FuzzyQuery
3368
+ *
3369
+ * MultiTermQuery is very similar to a boolean "Or" query. It is highly
3370
+ * optimized though as it focuses on a single field.
3371
+ *
3372
+ * == Example
3373
+ *
3374
+ * multi_term_query = MultiTermQuery.new(:content, :max_term => 10)
3375
+ *
3376
+ * multi_term_query << "Ruby" << "Ferret" << "Rails" << "Search"
3377
+ */
3378
+ static void
3379
+ Init_MultiTermQuery(void)
3380
+ {
3381
+ id_default_max_terms = rb_intern("@@default_max_terms");
3382
+ sym_max_terms = ID2SYM(rb_intern("max_terms"));
3383
+ sym_min_score = ID2SYM(rb_intern("min_score"));
3384
+
3385
+ cMultiTermQuery = rb_define_class_under(mSearch, "MultiTermQuery", cQuery);
3386
+ rb_define_alloc_func(cMultiTermQuery, frb_data_alloc);
3387
+ #ifdef FRT_RUBY_VERSION_1_9
3388
+ rb_cvar_set(cMultiTermQuery, id_default_max_terms, INT2FIX(512));
3389
+ #else
3390
+ rb_cvar_set(cMultiTermQuery, id_default_max_terms, INT2FIX(512), Qfalse);
3391
+ #endif
3392
+ rb_define_singleton_method(cMultiTermQuery, "default_max_terms",
3393
+ frb_mtq_get_dmt, 0);
3394
+ rb_define_singleton_method(cMultiTermQuery, "default_max_terms=",
3395
+ frb_mtq_set_dmt, 1);
3396
+
3397
+ rb_define_method(cMultiTermQuery, "initialize", frb_mtq_init, -1);
3398
+ rb_define_method(cMultiTermQuery, "add_term", frb_mtq_add_term, -1);
3399
+ rb_define_method(cMultiTermQuery, "<<", frb_mtq_add_term, -1);
3400
+ }
3401
+
3402
+ static void Init_BooleanClause(void);
3403
+
3404
+ /*
3405
+ * Document-class: Ferret::Search::BooleanQuery
3406
+ *
3407
+ * == Summary
3408
+ *
3409
+ * A BooleanQuery is used for combining many queries into one. This is best
3410
+ * illustrated with an example.
3411
+ *
3412
+ * == Example
3413
+ *
3414
+ * Lets say we wanted to find all documents with the term "Ruby" in the
3415
+ * +:title+ and the term "Ferret" in the +:content+ field or the +:title+
3416
+ * field written before January 2006. You could build the query like this.
3417
+ *
3418
+ * tq1 = TermQuery.new(:title, "ruby")
3419
+ * tq21 = TermQuery.new(:title, "ferret")
3420
+ * tq22 = TermQuery.new(:content, "ferret")
3421
+ * bq2 = BooleanQuery.new
3422
+ * bq2 << tq21 << tq22
3423
+ *
3424
+ * rq3 = RangeQuery.new(:written, :< => "200601")
3425
+ *
3426
+ * query = BooleanQuery.new
3427
+ * query.add_query(tq1, :must).add_query(bq2, :must).add_query(rq3, :must)
3428
+ */
3429
+ static void
3430
+ Init_BooleanQuery(void)
3431
+ {
3432
+ cBooleanQuery = rb_define_class_under(mSearch, "BooleanQuery", cQuery);
3433
+ rb_define_alloc_func(cBooleanQuery, frb_data_alloc);
3434
+
3435
+ rb_define_method(cBooleanQuery, "initialize", frb_bq_init, -1);
3436
+ rb_define_method(cBooleanQuery, "add_query", frb_bq_add_query, -1);
3437
+ rb_define_method(cBooleanQuery, "<<", frb_bq_add_query, -1);
3438
+
3439
+ Init_BooleanClause();
3440
+ }
3441
+
3442
+ /*
3443
+ * Document-class: Ferret::Search::BooleanQuery::BooleanClause
3444
+ *
3445
+ * == Summary
3446
+ *
3447
+ * A BooleanClause holes a single query within a BooleanQuery specifying
3448
+ * wither the query +:must+ match, +:should+ match or +:must_not+ match.
3449
+ * BooleanClauses can be used to pass a clause from one BooleanQuery to
3450
+ * another although it is generally easier just to add a query directly to a
3451
+ * BooleanQuery using the BooleanQuery#add_query method.
3452
+ *
3453
+ * == Example
3454
+ *
3455
+ * clause1 = BooleanClause.new(query1, :should)
3456
+ * clause2 = BooleanClause.new(query2, :should)
3457
+ *
3458
+ * query = BooleanQuery.new
3459
+ * query << clause1 << clause2
3460
+ */
3461
+ static void
3462
+ Init_BooleanClause(void)
3463
+ {
3464
+ sym_should = ID2SYM(rb_intern("should"));
3465
+ sym_must = ID2SYM(rb_intern("must"));
3466
+ sym_must_not = ID2SYM(rb_intern("must_not"));
3467
+
3468
+ cBooleanClause = rb_define_class_under(cBooleanQuery, "BooleanClause",
3469
+ rb_cObject);
3470
+ rb_define_alloc_func(cBooleanClause, frb_data_alloc);
3471
+
3472
+ rb_define_method(cBooleanClause, "initialize", frb_bc_init, -1);
3473
+ rb_define_method(cBooleanClause, "query", frb_bc_get_query, 0);
3474
+ rb_define_method(cBooleanClause, "query=", frb_bc_set_query, 1);
3475
+ rb_define_method(cBooleanClause, "required?", frb_bc_is_required, 0);
3476
+ rb_define_method(cBooleanClause, "prohibited?", frb_bc_is_prohibited, 0);
3477
+ rb_define_method(cBooleanClause, "occur=", frb_bc_set_occur, 1);
3478
+ rb_define_method(cBooleanClause, "to_s", frb_bc_to_s, 0);
3479
+ }
3480
+
3481
+ /*
3482
+ * Document-class: Ferret::Search::RangeQuery
3483
+ *
3484
+ * == Summary
3485
+ *
3486
+ * RangeQuery is used to find documents with terms in a range.
3487
+ * RangeQuerys are usually used on untokenized fields like date fields or
3488
+ * number fields.
3489
+ *
3490
+ * == Example
3491
+ *
3492
+ * To find all documents written between January 1st 2006 and January 26th
3493
+ * 2006 inclusive you would write the query like this;
3494
+ *
3495
+ * query = RangeQuery.new(:create_date, :>= "20060101", :<= "20060126")
3496
+ *
3497
+ * == Range queries on numbers
3498
+ *
3499
+ * There is now a new query called TypedRangeQuery which detects the type of
3500
+ * the range and if the range is numerical it will find a numerical range.
3501
+ * This allows you to do range queries with negative numbers and without
3502
+ * having to pad the field. However, RangeQuery will perform a lot faster on
3503
+ * large indexes so if you are working with a very large index you will need
3504
+ * to normalize your number fields so that they are a fixed width and always
3505
+ * positive. That way the standard String range query will do fine.
3506
+ *
3507
+ * For example, if you have the numbers;
3508
+ *
3509
+ * [10, -999, -90, 100, 534]
3510
+ *
3511
+ * Then the can be normalized to;
3512
+ *
3513
+ * # note that we have added 1000 to all numbers to make them all positive
3514
+ * [1010, 0001, 0910, 1100, 1534]
3515
+ *
3516
+ */
3517
+ static void
3518
+ Init_RangeQuery(void)
3519
+ {
3520
+ sym_upper = ID2SYM(rb_intern("upper"));
3521
+ sym_lower = ID2SYM(rb_intern("lower"));
3522
+ sym_upper_exclusive = ID2SYM(rb_intern("upper_exclusive"));
3523
+ sym_lower_exclusive = ID2SYM(rb_intern("lower_exclusive"));
3524
+ sym_include_upper = ID2SYM(rb_intern("include_upper"));
3525
+ sym_include_lower = ID2SYM(rb_intern("include_lower"));
3526
+
3527
+ sym_less_than = ID2SYM(rb_intern("<"));
3528
+ sym_less_than_or_equal_to = ID2SYM(rb_intern("<="));
3529
+ sym_greater_than = ID2SYM(rb_intern(">"));
3530
+ sym_greater_than_or_equal_to = ID2SYM(rb_intern(">="));
3531
+
3532
+ cRangeQuery = rb_define_class_under(mSearch, "RangeQuery", cQuery);
3533
+ rb_define_alloc_func(cRangeQuery, frb_data_alloc);
3534
+
3535
+ rb_define_method(cRangeQuery, "initialize", frb_rq_init, 2);
3536
+ }
3537
+
3538
+ /*
3539
+ * Document-class: Ferret::Search::TypedRangeQuery
3540
+ *
3541
+ * == Summary
3542
+ *
3543
+ * TypedRangeQuery is used to find documents with terms in a range.
3544
+ * RangeQuerys are usually used on untokenized fields like date fields or
3545
+ * number fields. TypedRangeQuery is particularly useful for fields with
3546
+ * unnormalized numbers, both positive and negative, integer and float.
3547
+ *
3548
+ * == Example
3549
+ *
3550
+ * To find all documents written between January 1st 2006 and January 26th
3551
+ * 2006 inclusive you would write the query like this;
3552
+ *
3553
+ * query = RangeQuery.new(:create_date, :>= "-1.0", :<= "10.0")
3554
+ *
3555
+ * == Performance Note
3556
+ *
3557
+ * TypedRangeQuery works by converting all the terms in a field to numbers
3558
+ * and then comparing those numbers with the range bondaries. This can have
3559
+ * quite an impact on performance on large indexes so in those cases it is
3560
+ * usually better to use a standard RangeQuery. This will require a little
3561
+ * work on your behalf. See RangeQuery for notes on how to do this.
3562
+ */
3563
+ static void
3564
+ Init_TypedRangeQuery(void)
3565
+ {
3566
+ cTypedRangeQuery =
3567
+ rb_define_class_under(mSearch, "TypedRangeQuery", cQuery);
3568
+ rb_define_alloc_func(cTypedRangeQuery, frb_data_alloc);
3569
+
3570
+ rb_define_method(cTypedRangeQuery, "initialize", frb_trq_init, 2);
3571
+ }
3572
+
3573
+ /*
3574
+ * Document-class: Ferret::Search::PhraseQuery
3575
+ *
3576
+ * == Summary
3577
+ *
3578
+ * PhraseQuery matches phrases like "the quick brown fox". Most people are
3579
+ * familiar with phrase queries having used them in most internet search
3580
+ * engines.
3581
+ *
3582
+ * === Slop
3583
+ *
3584
+ * Ferret's phrase queries a slightly more advanced. You can match phrases
3585
+ * with a slop, ie the match isn't exact but it is good enough. The slop is
3586
+ * basically the word edit distance of the phrase. For example, "the quick
3587
+ * brown fox" with a slop of 1 would match "the quick little brown fox". With
3588
+ * a slop of 2 it would match "the brown quick fox".
3589
+ *
3590
+ * query = PhraseQuery.new(:content)
3591
+ * query << "the" << "quick" << "brown" << "fox"
3592
+ *
3593
+ * # matches => "the quick brown fox"
3594
+ *
3595
+ * query.slop = 1
3596
+ * # matches => "the quick little brown fox"
3597
+ * |__1__^
3598
+ *
3599
+ * query.slop = 2
3600
+ * # matches => "the brown quick _____ fox"
3601
+ * ^_____2_____|
3602
+ *
3603
+ * == Multi-PhraseQuery
3604
+ *
3605
+ * Phrase queries can also have multiple terms in a single position. Let's
3606
+ * say for example that we want to match synonyms for quick like "fast" and
3607
+ * "speedy". You could the query like this;
3608
+ *
3609
+ * query = PhraseQuery.new(:content)
3610
+ * query << "the" << ["quick", "fast", "speed"] << ["brown", "red"] << "fox"
3611
+ * # matches => "the quick red fox"
3612
+ * # matches => "the fast brown fox"
3613
+ *
3614
+ * query.slop = 1
3615
+ * # matches => "the speedy little red fox"
3616
+ *
3617
+ * You can also leave positions blank. Lets say you wanted to match "the
3618
+ * quick <> fox" where "<>" could match anything (but not nothing). You'd
3619
+ * build this query like this;
3620
+ *
3621
+ * query = PhraseQuery.new(:content)
3622
+ * query.add_term("the").add_term("quick").add_term("fox", 2)
3623
+ * # matches => "the quick yellow fox"
3624
+ * # matches => "the quick alkgdhaskghaskjdh fox"
3625
+ *
3626
+ * The second parameter to PhraseQuery#add_term is the position increment for
3627
+ * the term. It is one by default meaning that every time you add a term it
3628
+ * is expected to follow the previous term. But setting it to 2 or greater
3629
+ * you are leaving empty spaces in the term.
3630
+ *
3631
+ * There are also so tricks you can do by setting the position increment to
3632
+ * 0. With a little help from your analyzer you can actually tag bold or
3633
+ * italic text for example. If you want more information about this, ask on
3634
+ * the mailing list.
3635
+ */
3636
+ static void
3637
+ Init_PhraseQuery(void)
3638
+ {
3639
+ cPhraseQuery = rb_define_class_under(mSearch, "PhraseQuery", cQuery);
3640
+ rb_define_alloc_func(cPhraseQuery, frb_data_alloc);
3641
+
3642
+ rb_define_method(cPhraseQuery, "initialize", frb_phq_init, -1);
3643
+ rb_define_method(cPhraseQuery, "add_term", frb_phq_add, -1);
3644
+ rb_define_method(cPhraseQuery, "<<", frb_phq_add, -1);
3645
+ rb_define_method(cPhraseQuery, "slop", frb_phq_get_slop, 0);
3646
+ rb_define_method(cPhraseQuery, "slop=", frb_phq_set_slop, 1);
3647
+ }
3648
+
3649
+ /*
3650
+ * Document-class: Ferret::Search::PrefixQuery
3651
+ *
3652
+ * == Summary
3653
+ *
3654
+ * A prefix query is like a TermQuery except that it matches any term with a
3655
+ * specific prefix. PrefixQuery is expanded into a MultiTermQuery when
3656
+ * submitted in a search.
3657
+ *
3658
+ * == Example
3659
+ *
3660
+ * PrefixQuery is very useful for matching a tree structure category
3661
+ * hierarchy. For example, let's say you have the categories;
3662
+ *
3663
+ * "cat1/"
3664
+ * "cat1/sub_cat1"
3665
+ * "cat1/sub_cat2"
3666
+ * "cat2"
3667
+ * "cat2/sub_cat1"
3668
+ * "cat2/sub_cat2"
3669
+ *
3670
+ * Lets say you want to match everything in category 2. You'd build the query
3671
+ * like this;
3672
+ *
3673
+ * query = PrefixQuery.new(:category, "cat2")
3674
+ * # matches => "cat2"
3675
+ * # matches => "cat2/sub_cat1"
3676
+ * # matches => "cat2/sub_cat2"
3677
+ */
3678
+ static void
3679
+ Init_PrefixQuery(void)
3680
+ {
3681
+ cPrefixQuery = rb_define_class_under(mSearch, "PrefixQuery", cQuery);
3682
+ rb_define_alloc_func(cPrefixQuery, frb_data_alloc);
3683
+
3684
+ rb_define_method(cPrefixQuery, "initialize", frb_prq_init, -1);
3685
+ }
3686
+
3687
+ /*
3688
+ * Document-class: Ferret::Search::WildcardQuery
3689
+ *
3690
+ * == Summary
3691
+ *
3692
+ * WildcardQuery is a simple pattern matching query. There are two wild-card
3693
+ * characters.
3694
+ *
3695
+ * * "*" which matches 0 or more characters
3696
+ * * "?" which matches a single character
3697
+ *
3698
+ * == Example
3699
+ *
3700
+ * query = WildcardQuery.new(:field, "h*og")
3701
+ * # matches => "hog"
3702
+ * # matches => "hot dog"
3703
+ *
3704
+ * query = WildcardQuery.new(:field, "fe?t")
3705
+ * # matches => "feat"
3706
+ * # matches => "feet"
3707
+ *
3708
+ * query = WildcardQuery.new(:field, "f?ll*")
3709
+ * # matches => "fill"
3710
+ * # matches => "falling"
3711
+ * # matches => "folly"
3712
+ */
3713
+ static void
3714
+ Init_WildcardQuery(void)
3715
+ {
3716
+ cWildcardQuery = rb_define_class_under(mSearch, "WildcardQuery", cQuery);
3717
+ rb_define_alloc_func(cWildcardQuery, frb_data_alloc);
3718
+
3719
+ rb_define_method(cWildcardQuery, "initialize", frb_wcq_init, -1);
3720
+ }
3721
+
3722
+ /*
3723
+ * Document-class: Ferret::Search::FuzzyQuery
3724
+ *
3725
+ * == Summary
3726
+ *
3727
+ * FuzzyQuery uses the Levenshtein distance formula for measuring the
3728
+ * similarity between two terms. For example, weak and week have one letter
3729
+ * difference and they are four characters long so the simlarity is 75% or
3730
+ * 0.75. You can use this query to match terms that are very close to the
3731
+ * search term.
3732
+ *
3733
+ * == Example
3734
+ *
3735
+ * FuzzyQuery can be quite useful for find documents that wouldn't normally
3736
+ * be found because of typos.
3737
+ *
3738
+ * FuzzyQuery.new(:field, "google",
3739
+ * :min_similarity => 0.6,
3740
+ * :prefix_length => 2)
3741
+ * # matches => "gogle", "goggle", "googol", "googel"
3742
+ */
3743
+ static void
3744
+ Init_FuzzyQuery(void)
3745
+ {
3746
+ id_default_min_similarity = rb_intern("@@default_min_similarity");
3747
+ id_default_prefix_length = rb_intern("@@default_prefix_length");
3748
+
3749
+ sym_min_similarity = ID2SYM(rb_intern("min_similarity"));
3750
+ sym_prefix_length = ID2SYM(rb_intern("prefix_length"));
3751
+
3752
+ cFuzzyQuery = rb_define_class_under(mSearch, "FuzzyQuery", cQuery);
3753
+ rb_define_alloc_func(cFuzzyQuery, frb_data_alloc);
3754
+ #ifdef FRT_RUBY_VERSION_1_9
3755
+ rb_cvar_set(cFuzzyQuery, id_default_min_similarity,
3756
+ rb_float_new(0.5));
3757
+ rb_cvar_set(cFuzzyQuery, id_default_prefix_length,
3758
+ INT2FIX(0));
3759
+ #else
3760
+ rb_cvar_set(cFuzzyQuery, id_default_min_similarity,
3761
+ rb_float_new(0.5), Qfalse);
3762
+ rb_cvar_set(cFuzzyQuery, id_default_prefix_length,
3763
+ INT2FIX(0), Qfalse);
3764
+ #endif
3765
+
3766
+ rb_define_singleton_method(cFuzzyQuery, "default_min_similarity",
3767
+ frb_fq_get_dms, 0);
3768
+ rb_define_singleton_method(cFuzzyQuery, "default_min_similarity=",
3769
+ frb_fq_set_dms, 1);
3770
+ rb_define_singleton_method(cFuzzyQuery, "default_prefix_length",
3771
+ frb_fq_get_dpl, 0);
3772
+ rb_define_singleton_method(cFuzzyQuery, "default_prefix_length=",
3773
+ frb_fq_set_dpl, 1);
3774
+
3775
+ rb_define_method(cFuzzyQuery, "initialize", frb_fq_init, -1);
3776
+ rb_define_method(cFuzzyQuery, "prefix_length", frb_fq_pre_len, 0);
3777
+ rb_define_method(cFuzzyQuery, "min_similarity", frb_fq_min_sim, 0);
3778
+ }
3779
+
3780
+ /*
3781
+ * Document-class: Ferret::Search::MatchAllQuery
3782
+ *
3783
+ * == Summary
3784
+ *
3785
+ * MatchAllQuery matches all documents in the index. You might want use this
3786
+ * query in combination with a filter, however, ConstantScoreQuery is
3787
+ * probably better in that circumstance.
3788
+ */
3789
+ static void
3790
+ Init_MatchAllQuery(void)
3791
+ {
3792
+ cMatchAllQuery = rb_define_class_under(mSearch, "MatchAllQuery", cQuery);
3793
+ rb_define_alloc_func(cMatchAllQuery, frb_maq_alloc);
3794
+
3795
+ rb_define_method(cMatchAllQuery, "initialize", frb_maq_init, 0);
3796
+ }
3797
+
3798
+ /*
3799
+ * Document-class: Ferret::Search::ConstantScoreQuery
3800
+ *
3801
+ * == Summary
3802
+ *
3803
+ * ConstantScoreQuery is a way to turn a Filter into a Query. It matches all
3804
+ * documents that its filter matches with a constant score. This is a very
3805
+ * fast query, particularly when run more than once (since filters are
3806
+ * cached). It is also used internally be RangeQuery.
3807
+ *
3808
+ * == Example
3809
+ *
3810
+ * Let's say for example that you often need to display all documents created
3811
+ * on or after June 1st. You could create a ConstantScoreQuery like this;
3812
+ *
3813
+ * query = ConstantScoreQuery.new(RangeFilter.new(:created_on, :>= => "200606"))
3814
+ *
3815
+ * Once this is run once the results are cached and will be returned very
3816
+ * quickly in future requests.
3817
+ */
3818
+ static void
3819
+ Init_ConstantScoreQuery(void)
3820
+ {
3821
+ cConstantScoreQuery = rb_define_class_under(mSearch,
3822
+ "ConstantScoreQuery", cQuery);
3823
+ rb_define_alloc_func(cConstantScoreQuery, frb_data_alloc);
3824
+
3825
+ rb_define_method(cConstantScoreQuery, "initialize", frb_csq_init, 1);
3826
+ }
3827
+
3828
+ /*
3829
+ * Document-class: Ferret::Search::FilteredQuery
3830
+ *
3831
+ * == Summary
3832
+ *
3833
+ * FilteredQuery offers you a way to apply a filter to a specific query.
3834
+ * The FilteredQuery would then by added to a BooleanQuery to be combined
3835
+ * with other queries. There is not much point in passing a FilteredQuery
3836
+ * directly to a Searcher#search method unless you are applying more than one
3837
+ * filter since the search method also takes a filter as a parameter.
3838
+ */
3839
+ static void
3840
+ Init_FilteredQuery(void)
3841
+ {
3842
+ cFilteredQuery = rb_define_class_under(mSearch, "FilteredQuery", cQuery);
3843
+ rb_define_alloc_func(cFilteredQuery, frb_data_alloc);
3844
+
3845
+ rb_define_method(cFilteredQuery, "initialize", frb_fqq_init, 2);
3846
+ }
3847
+
3848
+ /*
3849
+ * Document-class: Ferret::Search::Spans::SpanTermQuery
3850
+ *
3851
+ * == Summary
3852
+ *
3853
+ * A SpanTermQuery is the Spans version of TermQuery, the only difference
3854
+ * being that it returns the start and end offset of all of its matches for
3855
+ * use by enclosing SpanQueries.
3856
+ */
3857
+ static void
3858
+ Init_SpanTermQuery(void)
3859
+ {
3860
+ cSpanTermQuery = rb_define_class_under(mSpans, "SpanTermQuery", cQuery);
3861
+ rb_define_alloc_func(cSpanTermQuery, frb_data_alloc);
3862
+
3863
+ rb_define_method(cSpanTermQuery, "initialize", frb_spantq_init, 2);
3864
+ }
3865
+
3866
+ /*
3867
+ * Document-class: Ferret::Search::Spans::SpanMultiTermQuery
3868
+ *
3869
+ * == Summary
3870
+ *
3871
+ * A SpanMultiTermQuery is the Spans version of MultiTermQuery, the only
3872
+ * difference being that it returns the start and end offset of all of its
3873
+ * matches for use by enclosing SpanQueries.
3874
+ */
3875
+ static void
3876
+ Init_SpanMultiTermQuery(void)
3877
+ {
3878
+ cSpanMultiTermQuery = rb_define_class_under(mSpans, "SpanMultiTermQuery", cQuery);
3879
+ rb_define_alloc_func(cSpanMultiTermQuery, frb_data_alloc);
3880
+
3881
+ rb_define_method(cSpanMultiTermQuery, "initialize", frb_spanmtq_init, 2);
3882
+ }
3883
+
3884
+ /*
3885
+ * Document-class: Ferret::Search::Spans::SpanPrefixQuery
3886
+ *
3887
+ * == Summary
3888
+ *
3889
+ * A SpanPrefixQuery is the Spans version of PrefixQuery, the only difference
3890
+ * being that it returns the start and end offset of all of its matches for
3891
+ * use by enclosing SpanQueries.
3892
+ */
3893
+ static void
3894
+ Init_SpanPrefixQuery(void)
3895
+ {
3896
+ cSpanPrefixQuery = rb_define_class_under(mSpans, "SpanPrefixQuery", cQuery);
3897
+ rb_define_alloc_func(cSpanPrefixQuery, frb_data_alloc);
3898
+
3899
+ rb_define_method(cSpanPrefixQuery, "initialize", frb_spanprq_init, -1);
3900
+ }
3901
+
3902
+ /*
3903
+ * Document-class: Ferret::Search::Spans::SpanFirstQuery
3904
+ *
3905
+ * == Summary
3906
+ *
3907
+ * A SpanFirstQuery restricts a query to search in the first +end+ bytes of a
3908
+ * field. This is useful since often the most important information in a
3909
+ * document is at the start of the document.
3910
+ *
3911
+ * == Example
3912
+ *
3913
+ * To find all documents where "ferret" is within the first 100 characters
3914
+ * (really bytes);
3915
+ *
3916
+ * query = SpanFirstQuery.new(SpanTermQuery.new(:content, "ferret"), 100)
3917
+ *
3918
+ * == NOTE
3919
+ *
3920
+ * SpanFirstQuery only works with other SpanQueries.
3921
+ */
3922
+ static void
3923
+ Init_SpanFirstQuery(void)
3924
+ {
3925
+ cSpanFirstQuery = rb_define_class_under(mSpans, "SpanFirstQuery", cQuery);
3926
+ rb_define_alloc_func(cSpanFirstQuery, frb_data_alloc);
3927
+
3928
+ rb_define_method(cSpanFirstQuery, "initialize", frb_spanfq_init, 2);
3929
+ }
3930
+
3931
+ /*
3932
+ * Document-class: Ferret::Search::Spans::SpanNearQuery
3933
+ *
3934
+ * == Summary
3935
+ *
3936
+ * A SpanNearQuery is like a combination between a PhraseQuery and a
3937
+ * BooleanQuery. It matches sub-SpanQueries which are added as clauses but
3938
+ * those clauses must occur within a +slop+ edit distance of each other. You
3939
+ * can also specify that clauses must occur +in_order+.
3940
+ *
3941
+ * == Example
3942
+ *
3943
+ * query = SpanNearQuery.new(:slop => 2)
3944
+ * query << SpanTermQuery.new(:field, "quick")
3945
+ * query << SpanTermQuery.new(:field, "brown")
3946
+ * query << SpanTermQuery.new(:field, "fox")
3947
+ * # matches => "quick brown speckled sleepy fox"
3948
+ * |______2______^
3949
+ * # matches => "quick brown speckled fox"
3950
+ * |__1__^
3951
+ * # matches => "brown quick _____ fox"
3952
+ * ^_____2_____|
3953
+ *
3954
+ * query = SpanNearQuery.new(:slop => 2, :in_order => true)
3955
+ * query << SpanTermQuery.new(:field, "quick")
3956
+ * query << SpanTermQuery.new(:field, "brown")
3957
+ * query << SpanTermQuery.new(:field, "fox")
3958
+ * # matches => "quick brown speckled sleepy fox"
3959
+ * |______2______^
3960
+ * # matches => "quick brown speckled fox"
3961
+ * |__1__^
3962
+ * # doesn't match => "brown quick _____ fox"
3963
+ * # not in order ^_____2_____|
3964
+ *
3965
+ * == NOTE
3966
+ *
3967
+ * SpanNearQuery only works with other SpanQueries.
3968
+ */
3969
+ static void
3970
+ Init_SpanNearQuery(void)
3971
+ {
3972
+ sym_slop = ID2SYM(rb_intern("slop"));
3973
+ sym_in_order = ID2SYM(rb_intern("in_order"));
3974
+ sym_clauses = ID2SYM(rb_intern("clauses"));
3975
+
3976
+ cSpanNearQuery = rb_define_class_under(mSpans, "SpanNearQuery", cQuery);
3977
+ rb_define_alloc_func(cSpanNearQuery, frb_data_alloc);
3978
+
3979
+ rb_define_method(cSpanNearQuery, "initialize", frb_spannq_init, -1);
3980
+ rb_define_method(cSpanNearQuery, "add", frb_spannq_add, 1);
3981
+ rb_define_method(cSpanNearQuery, "<<", frb_spannq_add, 1);
3982
+ }
3983
+
3984
+ /*
3985
+ * Document-class: Ferret::Search::Spans::SpanOrQuery
3986
+ *
3987
+ * == Summary
3988
+ *
3989
+ * SpanOrQuery is just like a BooleanQuery with all +:should+ clauses.
3990
+ * However, the difference is that all sub-clauses must be SpanQueries and
3991
+ * the resulting query can then be used within other SpanQueries like
3992
+ * SpanNearQuery.
3993
+ *
3994
+ * == Example
3995
+ *
3996
+ * Combined with SpanNearQuery we can create a multi-PhraseQuery like query;
3997
+ *
3998
+ * quick_query = SpanOrQuery.new()
3999
+ * quick_query << SpanTermQuery.new(:field, "quick")
4000
+ * quick_query << SpanTermQuery.new(:field, "fast")
4001
+ * quick_query << SpanTermQuery.new(:field, "speedy")
4002
+ *
4003
+ * colour_query = SpanOrQuery.new()
4004
+ * colour_query << SpanTermQuery.new(:field, "red")
4005
+ * colour_query << SpanTermQuery.new(:field, "brown")
4006
+ *
4007
+ *
4008
+ * query = SpanNearQuery.new(:slop => 2, :in_order => true)
4009
+ * query << quick_query
4010
+ * query << colour_query
4011
+ * query << SpanTermQuery.new(:field, "fox")
4012
+ * # matches => "quick red speckled sleepy fox"
4013
+ * |______2______^
4014
+ * # matches => "speedy brown speckled fox"
4015
+ * |__1__^
4016
+ * # doesn't match => "brown fast _____ fox"
4017
+ * # not in order ^_____2____|
4018
+ *
4019
+ * == NOTE
4020
+ *
4021
+ * SpanOrQuery only works with other SpanQueries.
4022
+ */
4023
+ static void
4024
+ Init_SpanOrQuery(void)
4025
+ {
4026
+ cSpanOrQuery = rb_define_class_under(mSpans, "SpanOrQuery", cQuery);
4027
+ rb_define_alloc_func(cSpanOrQuery, frb_data_alloc);
4028
+
4029
+ rb_define_method(cSpanOrQuery, "initialize", frb_spanoq_init, -1);
4030
+ rb_define_method(cSpanOrQuery, "add", frb_spanoq_add, 1);
4031
+ rb_define_method(cSpanOrQuery, "<<", frb_spanoq_add, 1);
4032
+ }
4033
+
4034
+ /*
4035
+ * Document-class: Ferret::Search::Spans::SpanNotQuery
4036
+ *
4037
+ * == Summary
4038
+ *
4039
+ * SpanNotQuery is like a BooleanQuery with a +:must_not+ clause. The
4040
+ * difference being that the resulting query can be used in another
4041
+ * SpanQuery.
4042
+ *
4043
+ * == Example
4044
+ *
4045
+ * Let's say you wanted to search for all documents with the term "rails"
4046
+ * near the start but without the term "train" near the start. This would
4047
+ * allow the term "train" to occur later on in the document.
4048
+ *
4049
+ * rails_query = SpanFirstQuery.new(SpanTermQuery.new(:content, "rails"), 100)
4050
+ * train_query = SpanFirstQuery.new(SpanTermQuery.new(:content, "train"), 100)
4051
+ * query = SpanNotQuery.new(rails_query, train_query)
4052
+ *
4053
+ * == NOTE
4054
+ *
4055
+ * SpanOrQuery only works with other SpanQueries.
4056
+ */
4057
+ static void
4058
+ Init_SpanNotQuery(void)
4059
+ {
4060
+ cSpanNotQuery = rb_define_class_under(mSpans, "SpanNotQuery", cQuery);
4061
+ rb_define_alloc_func(cSpanNotQuery, frb_data_alloc);
4062
+
4063
+ rb_define_method(cSpanNotQuery, "initialize", frb_spanxq_init, 2);
4064
+ }
4065
+
4066
+ /* rdoc hack
4067
+ extern VALUE mFerret = rb_define_module("Ferret");
4068
+ extern VALUE mSearch = rb_define_module_under(mFerret, "Search");
4069
+ */
4070
+
4071
+ /*
4072
+ * Document-module: Ferret::Search::Spans
4073
+ *
4074
+ * == Summary
4075
+ *
4076
+ * The Spans module contains a number of SpanQueries. SpanQueries, unlike
4077
+ * regular queries, also return the start and end offsets of all of their
4078
+ * matches so they can be used to limit queries to a certain position in the
4079
+ * field. They are often used in combination to perform special types of
4080
+ * PhraseQuery.
4081
+ */
4082
+ static void
4083
+ Init_Spans(void)
4084
+ {
4085
+ mSpans = rb_define_module_under(mSearch, "Spans");
4086
+ Init_SpanTermQuery();
4087
+ Init_SpanMultiTermQuery();
4088
+ Init_SpanPrefixQuery();
4089
+ Init_SpanFirstQuery();
4090
+ Init_SpanNearQuery();
4091
+ Init_SpanOrQuery();
4092
+ Init_SpanNotQuery();
4093
+ }
4094
+
4095
+ /*
4096
+ * Document-class: Ferret::Search::RangeFilter
4097
+ *
4098
+ * == Summary
4099
+ *
4100
+ * RangeFilter filters a set of documents which contain a lexicographical
4101
+ * range of terms (ie "aaa", "aab", "aac", etc). See also RangeQuery
4102
+ *
4103
+ * == Example
4104
+ *
4105
+ * Find all documents created before 5th of September 2002.
4106
+ *
4107
+ * filter = RangeFilter.new(:created_on, :< => "20020905")
4108
+ *
4109
+ * == Number fields
4110
+ *
4111
+ * See RangeQuery for notes on how to use the RangeFilter on a field
4112
+ * containing numbers.
4113
+ */
4114
+ static void
4115
+ Init_RangeFilter(void)
4116
+ {
4117
+ cRangeFilter = rb_define_class_under(mSearch, "RangeFilter", cFilter);
4118
+ frb_mark_cclass(cRangeFilter);
4119
+ rb_define_alloc_func(cRangeFilter, frb_data_alloc);
4120
+
4121
+ rb_define_method(cRangeFilter, "initialize", frb_rf_init, 2);
4122
+ }
4123
+
4124
+ /*
4125
+ * Document-class: Ferret::Search::TypedRangeFilter
4126
+ *
4127
+ * == Summary
4128
+ *
4129
+ * TypedRangeFilter filters a set of documents which contain a
4130
+ * lexicographical range of terms (ie "aaa", "aab", "aac", etc), unless the
4131
+ * range boundaries happen to be numbers (positive, negative, integer,
4132
+ * float), in which case a numerical filter is applied. See also
4133
+ * TypedRangeQuery
4134
+ *
4135
+ * == Example
4136
+ *
4137
+ * Find all products that cost less than or equal to $50.00.
4138
+ *
4139
+ * filter = TypedRangeFilter.new(:created_on, :<= => "50.00")
4140
+ */
4141
+ static void
4142
+ Init_TypedRangeFilter(void)
4143
+ {
4144
+ cTypedRangeFilter =
4145
+ rb_define_class_under(mSearch, "TypedRangeFilter", cFilter);
4146
+ frb_mark_cclass(cTypedRangeFilter);
4147
+ rb_define_alloc_func(cTypedRangeFilter, frb_data_alloc);
4148
+
4149
+ rb_define_method(cTypedRangeFilter, "initialize", frb_trf_init, 2);
4150
+ }
4151
+
4152
+ /*
4153
+ * Document-class: Ferret::Search::QueryFilter
4154
+ *
4155
+ * == Summary
4156
+ *
4157
+ * QueryFilter can be used to restrict one queries results by another queries
4158
+ * results, basically "and"ing them together. Of course you could easily use
4159
+ * a BooleanQuery to do this. The reason you may choose to use a QueryFilter
4160
+ * is that Filter results are cached so if you have one query that is often
4161
+ * added to other queries you may want to use a QueryFilter for performance
4162
+ * reasons.
4163
+ *
4164
+ * == Example
4165
+ *
4166
+ * Let's say you have a field +:approved+ which you set to yes when a
4167
+ * document is approved for display. You'll probably want to add a Filter
4168
+ * which filters approved documents to display to your users. This is the
4169
+ * perfect use case for a QueryFilter.
4170
+ *
4171
+ * filter = QueryFilter.new(TermQuery.new(:approved, "yes"))
4172
+ *
4173
+ * Just remember to use the same QueryFilter each time to take advantage of
4174
+ * caching. Don't create a new one for each request. Of course, this won't
4175
+ * work in a CGI application.
4176
+ */
4177
+ static void
4178
+ Init_QueryFilter(void)
4179
+ {
4180
+ cQueryFilter = rb_define_class_under(mSearch, "QueryFilter", cFilter);
4181
+ frb_mark_cclass(cQueryFilter);
4182
+ rb_define_alloc_func(cQueryFilter, frb_data_alloc);
4183
+
4184
+ rb_define_method(cQueryFilter, "initialize", frb_qf_init, 1);
4185
+ }
4186
+
4187
+ /*
4188
+ * Document-class: Ferret::Search::Filter
4189
+ *
4190
+ * == Summary
4191
+ *
4192
+ * A Filter is used to filter query results. It is usually passed to one of
4193
+ * Searcher's search methods however it can also be used inside a
4194
+ * ConstantScoreQuery or a FilteredQuery. To implement your own Filter you
4195
+ * must implement the method #get_bitvector(index_reader) which returns a
4196
+ * BitVector with set bits corresponding to documents that are allowed by
4197
+ * this Filter.
4198
+ *
4199
+ * TODO add support for user implemented Filter.
4200
+ * TODO add example of user implemented Filter.
4201
+ */
4202
+ static void
4203
+ Init_Filter(void)
4204
+ {
4205
+ id_bits = rb_intern("bits");
4206
+ cFilter = rb_define_class_under(mSearch, "Filter", rb_cObject);
4207
+ frb_mark_cclass(cFilter);
4208
+ rb_define_alloc_func(cConstantScoreQuery, frb_data_alloc);
4209
+
4210
+ rb_define_method(cFilter, "bits", frb_f_get_bits, 1);
4211
+ rb_define_method(cFilter, "to_s", frb_f_to_s, 0);
4212
+ }
4213
+
4214
+ /*
4215
+ * Document-class: Ferret::Search::SortField
4216
+ *
4217
+ * == Summary
4218
+ *
4219
+ * A SortField is used to sort the result-set of a search be the contents of
4220
+ * a field. The following types of sort_field are available;
4221
+ *
4222
+ * * :auto
4223
+ * * :integer
4224
+ * * :float
4225
+ * * :string
4226
+ * * :byte
4227
+ * * :doc_id
4228
+ * * :score
4229
+ *
4230
+ * The type of the SortField is set by passing it as a parameter to the
4231
+ * constructor. The +:auto+ type specifies that the SortField should detect
4232
+ * the sort type by looking at the data in the field. This is the default
4233
+ * :type value although it is recommended that you explicitly specify the
4234
+ * fields type.
4235
+ *
4236
+ * == Example
4237
+ *
4238
+ * title_sf = SortField.new(:title, :type => :string)
4239
+ * rating_sf = SortField.new(:rating, :type => float, :reverse => true)
4240
+ *
4241
+ *
4242
+ * Note 1: Care should be taken when using the :auto sort-type since numbers
4243
+ * will occur before other strings in the index so if you are sorting a field
4244
+ * with both numbers and strings (like a title field which might have "24"
4245
+ * and "Prison Break") then the sort_field will think it is sorting integers
4246
+ * when it really should be sorting strings.
4247
+ *
4248
+ * Note 2: When sorting by integer, integers are only 4 bytes so anything
4249
+ * larger will cause strange sorting behaviour.
4250
+ */
4251
+ static void
4252
+ Init_SortField(void)
4253
+ {
4254
+ /* option hash keys for SortField#initialize */
4255
+ sym_type = ID2SYM(rb_intern("type"));
4256
+ sym_reverse = ID2SYM(rb_intern("reverse"));
4257
+ sym_comparator = ID2SYM(rb_intern("comparator"));
4258
+
4259
+ /* Sort types */
4260
+ sym_integer = ID2SYM(rb_intern("integer"));
4261
+ sym_float = ID2SYM(rb_intern("float"));
4262
+ sym_string = ID2SYM(rb_intern("string"));
4263
+ sym_auto = ID2SYM(rb_intern("auto"));
4264
+ sym_doc_id = ID2SYM(rb_intern("doc_id"));
4265
+ sym_score = ID2SYM(rb_intern("score"));
4266
+ sym_byte = ID2SYM(rb_intern("byte"));
4267
+
4268
+ cSortField = rb_define_class_under(mSearch, "SortField", rb_cObject);
4269
+ rb_define_alloc_func(cSortField, frb_data_alloc);
4270
+
4271
+ rb_define_method(cSortField, "initialize", frb_sf_init, -1);
4272
+ rb_define_method(cSortField, "reverse?", frb_sf_is_reverse, 0);
4273
+ rb_define_method(cSortField, "name", frb_sf_get_name, 0);
4274
+ rb_define_method(cSortField, "type", frb_sf_get_type, 0);
4275
+ rb_define_method(cSortField, "comparator", frb_sf_get_comparator, 0);
4276
+ rb_define_method(cSortField, "to_s", frb_sf_to_s, 0);
4277
+
4278
+ rb_define_const(cSortField, "SCORE",
4279
+ Data_Wrap_Struct(cSortField, NULL,
4280
+ &frb_deref_free,
4281
+ (SortField *)&SORT_FIELD_SCORE));
4282
+ object_add((SortField *)&SORT_FIELD_SCORE,
4283
+ rb_const_get(cSortField, rb_intern("SCORE")));
4284
+
4285
+ rb_define_const(cSortField, "SCORE_REV",
4286
+ Data_Wrap_Struct(cSortField, NULL,
4287
+ &frb_deref_free,
4288
+ (SortField *)&SORT_FIELD_SCORE_REV));
4289
+ object_add((SortField *)&SORT_FIELD_SCORE_REV,
4290
+ rb_const_get(cSortField, rb_intern("SCORE_REV")));
4291
+
4292
+ rb_define_const(cSortField, "DOC_ID",
4293
+ Data_Wrap_Struct(cSortField, NULL,
4294
+ &frb_deref_free,
4295
+ (SortField *)&SORT_FIELD_DOC));
4296
+
4297
+ oSORT_FIELD_DOC = rb_const_get(cSortField, rb_intern("DOC_ID"));
4298
+ object_add((SortField *)&SORT_FIELD_DOC, oSORT_FIELD_DOC);
4299
+
4300
+ rb_define_const(cSortField, "DOC_ID_REV",
4301
+ Data_Wrap_Struct(cSortField, NULL,
4302
+ &frb_deref_free,
4303
+ (SortField *)&SORT_FIELD_DOC_REV));
4304
+ object_add((SortField *)&SORT_FIELD_DOC_REV,
4305
+ rb_const_get(cSortField, rb_intern("DOC_ID_REV")));
4306
+ }
4307
+
4308
+ /*
4309
+ * Document-class: Ferret::Search::Sort
4310
+ *
4311
+ * == Summary
4312
+ *
4313
+ * A Sort object is used to combine and apply a list of SortFields. The
4314
+ * SortFields are applied in the order they are added to the SortObject.
4315
+ *
4316
+ * == Example
4317
+ *
4318
+ * Here is how you would create a Sort object that sorts first by rating and
4319
+ * then by title;
4320
+ *
4321
+ * sf_rating = SortField.new(:rating, :type => :float, :reverse => true)
4322
+ * sf_title = SortField.new(:title, :type => :string)
4323
+ * sort = Sort.new([sf_rating, sf_title])
4324
+ *
4325
+ * Remember that the :type parameter for SortField is set to :auto be default
4326
+ * be I strongly recommend you specify a :type value.
4327
+ */
4328
+ static void
4329
+ Init_Sort(void)
4330
+ {
4331
+ /* Sort */
4332
+ cSort = rb_define_class_under(mSearch, "Sort", rb_cObject);
4333
+ rb_define_alloc_func(cSort, frb_sort_alloc);
4334
+
4335
+ rb_define_method(cSort, "initialize", frb_sort_init, -1);
4336
+ rb_define_method(cSort, "fields", frb_sort_get_fields, 0);
4337
+ rb_define_method(cSort, "to_s", frb_sort_to_s, 0);
4338
+
4339
+ rb_define_const(cSort, "RELEVANCE",
4340
+ frb_sort_init(0, NULL, frb_sort_alloc(cSort)));
4341
+ rb_define_const(cSort, "INDEX_ORDER",
4342
+ frb_sort_init(1, &oSORT_FIELD_DOC, frb_sort_alloc(cSort)));
4343
+ }
4344
+
4345
+ /*
4346
+ * Document-class: Ferret::Search::Searcher
4347
+ *
4348
+ * == Summary
4349
+ *
4350
+ * The Searcher class basically performs the task that Ferret was built for.
4351
+ * It searches the index. To search the index the Searcher class wraps an
4352
+ * IndexReader so many of the tasks that you can perform on an IndexReader
4353
+ * are also available on a searcher including, most importantly, accessing
4354
+ * stored documents.
4355
+ *
4356
+ * The main methods that you need to know about when using a Searcher are the
4357
+ * search methods. There is the Searcher#search_each method which iterates
4358
+ * through the results by document id and score and there is the
4359
+ * Searcher#search method which returns a TopDocs object. Another important
4360
+ * difference to note is that the Searcher#search_each method normalizes the
4361
+ * score to a value in the range 0.0..1.0 if the max_score is greater than
4362
+ * 1.0. Searcher#search does not. Apart from that they take the same
4363
+ * parameters and work the same way.
4364
+ *
4365
+ * == Example
4366
+ *
4367
+ * searcher = Searcher.new("/path/to/index")
4368
+ *
4369
+ * searcher.search_each(TermQuery.new(:content, "ferret")
4370
+ * :filter => RangeFilter.new(:date, :< => "2006"),
4371
+ * :sort => "date DESC, title") do |doc_id, score|
4372
+ * puts "#{searcher[doc_id][title] scored #{score}"
4373
+ * end
4374
+ */
4375
+ static void
4376
+ Init_Searcher(void)
4377
+ {
4378
+ /* option hash keys for Searcher#search */
4379
+ sym_offset = ID2SYM(rb_intern("offset"));
4380
+ sym_limit = ID2SYM(rb_intern("limit"));
4381
+ sym_start_doc = ID2SYM(rb_intern("start_doc"));
4382
+ sym_all = ID2SYM(rb_intern("all"));
4383
+ sym_filter = ID2SYM(rb_intern("filter"));
4384
+ sym_filter_proc = ID2SYM(rb_intern("filter_proc"));
4385
+ sym_c_filter_proc = ID2SYM(rb_intern("c_filter_proc"));
4386
+ sym_sort = ID2SYM(rb_intern("sort"));
4387
+
4388
+ sym_excerpt_length = ID2SYM(rb_intern("excerpt_length"));
4389
+ sym_num_excerpts = ID2SYM(rb_intern("num_excerpts"));
4390
+ sym_pre_tag = ID2SYM(rb_intern("pre_tag"));
4391
+ sym_post_tag = ID2SYM(rb_intern("post_tag"));
4392
+ sym_ellipsis = ID2SYM(rb_intern("ellipsis"));
4393
+
4394
+ /* Searcher */
4395
+ cSearcher = rb_define_class_under(mSearch, "Searcher", rb_cObject);
4396
+ rb_define_alloc_func(cSearcher, frb_data_alloc);
4397
+
4398
+ rb_define_method(cSearcher, "initialize", frb_sea_init, 1);
4399
+ rb_define_method(cSearcher, "close", frb_sea_close, 0);
4400
+ rb_define_method(cSearcher, "reader", frb_sea_get_reader, 0);
4401
+ rb_define_method(cSearcher, "doc_freq", frb_sea_doc_freq, 2);
4402
+ rb_define_method(cSearcher, "get_document", frb_sea_doc, 1);
4403
+ rb_define_method(cSearcher, "[]", frb_sea_doc, 1);
4404
+ rb_define_method(cSearcher, "max_doc", frb_sea_max_doc, 0);
4405
+ rb_define_method(cSearcher, "search", frb_sea_search, -1);
4406
+ rb_define_method(cSearcher, "search_each", frb_sea_search_each, -1);
4407
+ rb_define_method(cSearcher, "scan", frb_sea_scan, -1);
4408
+ rb_define_method(cSearcher, "explain", frb_sea_explain, 2);
4409
+ rb_define_method(cSearcher, "highlight", frb_sea_highlight, -1);
4410
+ }
4411
+
4412
+ /*
4413
+ * Document-class: Ferret::Search::MultiSearcher
4414
+ *
4415
+ * == Summary
4416
+ *
4417
+ * See Searcher for the methods that you can use on this object. A
4418
+ * MultiSearcher is used to search multiple sub-searchers. The most efficient
4419
+ * way to do this would be to open up an IndexReader on multiple directories
4420
+ * and creating a Searcher with that. However, if you decide to implement a
4421
+ * RemoteSearcher, the MultiSearcher can be used to search multiple machines
4422
+ * at once.
4423
+ */
4424
+ static void
4425
+ Init_MultiSearcher(void)
4426
+ {
4427
+ cMultiSearcher = rb_define_class_under(mSearch, "MultiSearcher", cSearcher);
4428
+ rb_define_alloc_func(cMultiSearcher, frb_data_alloc);
4429
+ rb_define_method(cMultiSearcher, "initialize", frb_ms_init, -1);
4430
+ }
4431
+
4432
+ /*
4433
+ * Document-module: Ferret::Search
4434
+ *
4435
+ * == Summary
4436
+ *
4437
+ * The Search module contains all the classes used for searching the index;
4438
+ * what Ferret was designed to do. The important classes to take a look at in
4439
+ * this module are (in order);
4440
+ *
4441
+ * * Query
4442
+ * * Searcher
4443
+ * * Filter
4444
+ * * Sort
4445
+ *
4446
+ * Happy Ferreting!!
4447
+ */
4448
+ void
4449
+ Init_Search(void)
4450
+ {
4451
+ mSearch = rb_define_module_under(mFerret, "Search");
4452
+
4453
+ fsym_id = I("id");
4454
+
4455
+ Init_Hit();
4456
+ Init_TopDocs();
4457
+ Init_Explanation();
4458
+
4459
+ /* Queries */
4460
+ Init_Query();
4461
+
4462
+ Init_TermQuery();
4463
+ Init_MultiTermQuery();
4464
+ Init_BooleanQuery();
4465
+ Init_RangeQuery();
4466
+ Init_TypedRangeQuery();
4467
+ Init_PhraseQuery();
4468
+ Init_PrefixQuery();
4469
+ Init_WildcardQuery();
4470
+ Init_FuzzyQuery();
4471
+ Init_MatchAllQuery();
4472
+ Init_ConstantScoreQuery();
4473
+ Init_FilteredQuery();
4474
+
4475
+ Init_Spans();
4476
+
4477
+ /* Filters */
4478
+ Init_Filter();
4479
+ Init_RangeFilter();
4480
+ Init_TypedRangeFilter();
4481
+ Init_QueryFilter();
4482
+
4483
+ /* Sorting */
4484
+ Init_SortField(); /* must be before Init_Sort */
4485
+ Init_Sort();
4486
+
4487
+ /* Searchers */
4488
+ Init_Searcher();
4489
+ Init_MultiSearcher();
4490
+ }