isomorfeus-ferret 0.12.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (222) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +612 -0
  3. data/README.md +44 -0
  4. data/ext/isomorfeus_ferret_ext/benchmark.c +223 -0
  5. data/ext/isomorfeus_ferret_ext/benchmark.h +45 -0
  6. data/ext/isomorfeus_ferret_ext/benchmarks_all.h +25 -0
  7. data/ext/isomorfeus_ferret_ext/bm_bitvector.c +123 -0
  8. data/ext/isomorfeus_ferret_ext/bm_hash.c +118 -0
  9. data/ext/isomorfeus_ferret_ext/bm_micro_string.c +40 -0
  10. data/ext/isomorfeus_ferret_ext/bm_store.c +93 -0
  11. data/ext/isomorfeus_ferret_ext/email.rl +21 -0
  12. data/ext/isomorfeus_ferret_ext/extconf.rb +5 -0
  13. data/ext/isomorfeus_ferret_ext/fio_tmpfile.h +53 -0
  14. data/ext/isomorfeus_ferret_ext/frb_analysis.c +2577 -0
  15. data/ext/isomorfeus_ferret_ext/frb_index.c +3457 -0
  16. data/ext/isomorfeus_ferret_ext/frb_lang.c +9 -0
  17. data/ext/isomorfeus_ferret_ext/frb_lang.h +17 -0
  18. data/ext/isomorfeus_ferret_ext/frb_qparser.c +629 -0
  19. data/ext/isomorfeus_ferret_ext/frb_search.c +4460 -0
  20. data/ext/isomorfeus_ferret_ext/frb_store.c +515 -0
  21. data/ext/isomorfeus_ferret_ext/frb_threading.h +30 -0
  22. data/ext/isomorfeus_ferret_ext/frb_utils.c +1127 -0
  23. data/ext/isomorfeus_ferret_ext/frt_analysis.c +1644 -0
  24. data/ext/isomorfeus_ferret_ext/frt_analysis.h +247 -0
  25. data/ext/isomorfeus_ferret_ext/frt_array.c +124 -0
  26. data/ext/isomorfeus_ferret_ext/frt_array.h +54 -0
  27. data/ext/isomorfeus_ferret_ext/frt_bitvector.c +95 -0
  28. data/ext/isomorfeus_ferret_ext/frt_bitvector.h +586 -0
  29. data/ext/isomorfeus_ferret_ext/frt_compound_io.c +374 -0
  30. data/ext/isomorfeus_ferret_ext/frt_config.h +44 -0
  31. data/ext/isomorfeus_ferret_ext/frt_document.c +134 -0
  32. data/ext/isomorfeus_ferret_ext/frt_document.h +52 -0
  33. data/ext/isomorfeus_ferret_ext/frt_except.c +95 -0
  34. data/ext/isomorfeus_ferret_ext/frt_except.h +188 -0
  35. data/ext/isomorfeus_ferret_ext/frt_field_index.c +233 -0
  36. data/ext/isomorfeus_ferret_ext/frt_field_index.h +42 -0
  37. data/ext/isomorfeus_ferret_ext/frt_filter.c +157 -0
  38. data/ext/isomorfeus_ferret_ext/frt_fs_store.c +502 -0
  39. data/ext/isomorfeus_ferret_ext/frt_global.c +427 -0
  40. data/ext/isomorfeus_ferret_ext/frt_global.h +290 -0
  41. data/ext/isomorfeus_ferret_ext/frt_hash.c +518 -0
  42. data/ext/isomorfeus_ferret_ext/frt_hash.h +466 -0
  43. data/ext/isomorfeus_ferret_ext/frt_hashset.c +191 -0
  44. data/ext/isomorfeus_ferret_ext/frt_hashset.h +206 -0
  45. data/ext/isomorfeus_ferret_ext/frt_helper.c +62 -0
  46. data/ext/isomorfeus_ferret_ext/frt_helper.h +13 -0
  47. data/ext/isomorfeus_ferret_ext/frt_ind.c +353 -0
  48. data/ext/isomorfeus_ferret_ext/frt_ind.h +54 -0
  49. data/ext/isomorfeus_ferret_ext/frt_index.c +6377 -0
  50. data/ext/isomorfeus_ferret_ext/frt_index.h +880 -0
  51. data/ext/isomorfeus_ferret_ext/frt_lang.c +104 -0
  52. data/ext/isomorfeus_ferret_ext/frt_lang.h +44 -0
  53. data/ext/isomorfeus_ferret_ext/frt_mempool.c +87 -0
  54. data/ext/isomorfeus_ferret_ext/frt_mempool.h +33 -0
  55. data/ext/isomorfeus_ferret_ext/frt_multimapper.c +349 -0
  56. data/ext/isomorfeus_ferret_ext/frt_multimapper.h +52 -0
  57. data/ext/isomorfeus_ferret_ext/frt_posh.c +1006 -0
  58. data/ext/isomorfeus_ferret_ext/frt_posh.h +973 -0
  59. data/ext/isomorfeus_ferret_ext/frt_priorityqueue.c +147 -0
  60. data/ext/isomorfeus_ferret_ext/frt_priorityqueue.h +147 -0
  61. data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +1612 -0
  62. data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +157 -0
  63. data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +209 -0
  64. data/ext/isomorfeus_ferret_ext/frt_q_fuzzy.c +281 -0
  65. data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +147 -0
  66. data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +672 -0
  67. data/ext/isomorfeus_ferret_ext/frt_q_parser.c +3084 -0
  68. data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +1182 -0
  69. data/ext/isomorfeus_ferret_ext/frt_q_prefix.c +98 -0
  70. data/ext/isomorfeus_ferret_ext/frt_q_range.c +665 -0
  71. data/ext/isomorfeus_ferret_ext/frt_q_span.c +2386 -0
  72. data/ext/isomorfeus_ferret_ext/frt_q_term.c +311 -0
  73. data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +166 -0
  74. data/ext/isomorfeus_ferret_ext/frt_ram_store.c +460 -0
  75. data/ext/isomorfeus_ferret_ext/frt_scanner.c +899 -0
  76. data/ext/isomorfeus_ferret_ext/frt_scanner.h +28 -0
  77. data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +6705 -0
  78. data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +4419 -0
  79. data/ext/isomorfeus_ferret_ext/frt_search.c +1824 -0
  80. data/ext/isomorfeus_ferret_ext/frt_search.h +924 -0
  81. data/ext/isomorfeus_ferret_ext/frt_similarity.c +150 -0
  82. data/ext/isomorfeus_ferret_ext/frt_similarity.h +79 -0
  83. data/ext/isomorfeus_ferret_ext/frt_sort.c +796 -0
  84. data/ext/isomorfeus_ferret_ext/frt_stopwords.c +395 -0
  85. data/ext/isomorfeus_ferret_ext/frt_store.c +680 -0
  86. data/ext/isomorfeus_ferret_ext/frt_store.h +789 -0
  87. data/ext/isomorfeus_ferret_ext/frt_term_vectors.c +72 -0
  88. data/ext/isomorfeus_ferret_ext/frt_threading.h +23 -0
  89. data/ext/isomorfeus_ferret_ext/frt_win32.h +54 -0
  90. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +409 -0
  91. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +95 -0
  92. data/ext/isomorfeus_ferret_ext/libstemmer.c +93 -0
  93. data/ext/isomorfeus_ferret_ext/libstemmer.h +73 -0
  94. data/ext/isomorfeus_ferret_ext/q_parser.y +1366 -0
  95. data/ext/isomorfeus_ferret_ext/scanner.h +28 -0
  96. data/ext/isomorfeus_ferret_ext/scanner.in +43 -0
  97. data/ext/isomorfeus_ferret_ext/scanner.rl +84 -0
  98. data/ext/isomorfeus_ferret_ext/scanner_mb.rl +200 -0
  99. data/ext/isomorfeus_ferret_ext/scanner_utf8.rl +85 -0
  100. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +324 -0
  101. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +7 -0
  102. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +610 -0
  103. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +6 -0
  104. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +1104 -0
  105. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +6 -0
  106. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +749 -0
  107. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +7 -0
  108. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +1233 -0
  109. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +6 -0
  110. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +490 -0
  111. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +6 -0
  112. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +1217 -0
  113. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.h +7 -0
  114. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +1052 -0
  115. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +6 -0
  116. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +283 -0
  117. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +6 -0
  118. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +735 -0
  119. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +6 -0
  120. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +1003 -0
  121. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +7 -0
  122. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +1079 -0
  123. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +6 -0
  124. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +293 -0
  125. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +6 -0
  126. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +984 -0
  127. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +6 -0
  128. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +686 -0
  129. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +6 -0
  130. data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.c +325 -0
  131. data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.h +6 -0
  132. data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.c +620 -0
  133. data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.h +6 -0
  134. data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.c +1111 -0
  135. data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.h +6 -0
  136. data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.c +754 -0
  137. data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.h +6 -0
  138. data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.c +1242 -0
  139. data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.h +6 -0
  140. data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.c +495 -0
  141. data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.h +6 -0
  142. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.c +1220 -0
  143. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.h +6 -0
  144. data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.c +1059 -0
  145. data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.h +6 -0
  146. data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.c +285 -0
  147. data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.h +6 -0
  148. data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.c +741 -0
  149. data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.h +6 -0
  150. data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.c +1009 -0
  151. data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.h +6 -0
  152. data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.c +990 -0
  153. data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.h +6 -0
  154. data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.c +680 -0
  155. data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.h +6 -0
  156. data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.c +1083 -0
  157. data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.h +6 -0
  158. data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.c +294 -0
  159. data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.h +6 -0
  160. data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.c +2191 -0
  161. data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.h +6 -0
  162. data/ext/isomorfeus_ferret_ext/stem_api.c +66 -0
  163. data/ext/isomorfeus_ferret_ext/stem_api.h +26 -0
  164. data/ext/isomorfeus_ferret_ext/stem_header.h +57 -0
  165. data/ext/isomorfeus_ferret_ext/stem_modules.h +190 -0
  166. data/ext/isomorfeus_ferret_ext/stem_modules.txt +50 -0
  167. data/ext/isomorfeus_ferret_ext/stem_utilities.c +478 -0
  168. data/ext/isomorfeus_ferret_ext/test.c +850 -0
  169. data/ext/isomorfeus_ferret_ext/test.h +416 -0
  170. data/ext/isomorfeus_ferret_ext/test_1710.c +63 -0
  171. data/ext/isomorfeus_ferret_ext/test_analysis.c +1221 -0
  172. data/ext/isomorfeus_ferret_ext/test_array.c +272 -0
  173. data/ext/isomorfeus_ferret_ext/test_bitvector.c +600 -0
  174. data/ext/isomorfeus_ferret_ext/test_compound_io.c +170 -0
  175. data/ext/isomorfeus_ferret_ext/test_document.c +156 -0
  176. data/ext/isomorfeus_ferret_ext/test_except.c +244 -0
  177. data/ext/isomorfeus_ferret_ext/test_fields.c +522 -0
  178. data/ext/isomorfeus_ferret_ext/test_file_deleter.c +185 -0
  179. data/ext/isomorfeus_ferret_ext/test_filter.c +331 -0
  180. data/ext/isomorfeus_ferret_ext/test_fs_store.c +25 -0
  181. data/ext/isomorfeus_ferret_ext/test_global.c +299 -0
  182. data/ext/isomorfeus_ferret_ext/test_hash.c +485 -0
  183. data/ext/isomorfeus_ferret_ext/test_hashset.c +288 -0
  184. data/ext/isomorfeus_ferret_ext/test_helper.c +47 -0
  185. data/ext/isomorfeus_ferret_ext/test_highlighter.c +548 -0
  186. data/ext/isomorfeus_ferret_ext/test_index.c +2323 -0
  187. data/ext/isomorfeus_ferret_ext/test_lang.c +74 -0
  188. data/ext/isomorfeus_ferret_ext/test_mempool.c +102 -0
  189. data/ext/isomorfeus_ferret_ext/test_multimapper.c +64 -0
  190. data/ext/isomorfeus_ferret_ext/test_priorityqueue.c +213 -0
  191. data/ext/isomorfeus_ferret_ext/test_q_const_score.c +84 -0
  192. data/ext/isomorfeus_ferret_ext/test_q_filtered.c +61 -0
  193. data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +241 -0
  194. data/ext/isomorfeus_ferret_ext/test_q_parser.c +464 -0
  195. data/ext/isomorfeus_ferret_ext/test_q_span.c +575 -0
  196. data/ext/isomorfeus_ferret_ext/test_ram_store.c +77 -0
  197. data/ext/isomorfeus_ferret_ext/test_search.c +1874 -0
  198. data/ext/isomorfeus_ferret_ext/test_segments.c +167 -0
  199. data/ext/isomorfeus_ferret_ext/test_similarity.c +25 -0
  200. data/ext/isomorfeus_ferret_ext/test_sort.c +333 -0
  201. data/ext/isomorfeus_ferret_ext/test_store.c +591 -0
  202. data/ext/isomorfeus_ferret_ext/test_store.h +3 -0
  203. data/ext/isomorfeus_ferret_ext/test_term.c +351 -0
  204. data/ext/isomorfeus_ferret_ext/test_term_vectors.c +373 -0
  205. data/ext/isomorfeus_ferret_ext/test_test.c +83 -0
  206. data/ext/isomorfeus_ferret_ext/test_threading.c +188 -0
  207. data/ext/isomorfeus_ferret_ext/testhelper.c +561 -0
  208. data/ext/isomorfeus_ferret_ext/testhelper.h +25 -0
  209. data/ext/isomorfeus_ferret_ext/tests_all.h +87 -0
  210. data/ext/isomorfeus_ferret_ext/uchar-ucs4.rl +1854 -0
  211. data/ext/isomorfeus_ferret_ext/uchar-utf8.rl +1999 -0
  212. data/ext/isomorfeus_ferret_ext/url.rl +27 -0
  213. data/ext/isomorfeus_ferret_ext/word_list.h +15156 -0
  214. data/lib/isomorfeus/ferret/document.rb +132 -0
  215. data/lib/isomorfeus/ferret/field_symbol.rb +85 -0
  216. data/lib/isomorfeus/ferret/index/field_infos.rb +48 -0
  217. data/lib/isomorfeus/ferret/index/index.rb +970 -0
  218. data/lib/isomorfeus/ferret/monitor.rb +323 -0
  219. data/lib/isomorfeus/ferret/stdlib_patches.rb +151 -0
  220. data/lib/isomorfeus/ferret/version.rb +5 -0
  221. data/lib/isomorfeus-ferret.rb +8 -0
  222. metadata +307 -0
@@ -0,0 +1,4460 @@
1
+ #include <ctype.h>
2
+ #include "frt_array.h"
3
+ #include "frt_search.h"
4
+ #include "isomorfeus_ferret.h"
5
+ #include <ruby/st.h>
6
+
7
+ VALUE mSearch;
8
+
9
+ static VALUE cHit;
10
+ static VALUE cTopDocs;
11
+ static VALUE cExplanation;
12
+ static VALUE cSearcher;
13
+ static VALUE cMultiSearcher;
14
+ static VALUE cSortField;
15
+ static VALUE cSort;
16
+
17
+ /* Queries */
18
+ static VALUE cQuery;
19
+ static VALUE cTermQuery;
20
+ static VALUE cMultiTermQuery;
21
+ static VALUE cBooleanQuery;
22
+ static VALUE cBooleanClause;
23
+ static VALUE cRangeQuery;
24
+ static VALUE cTypedRangeQuery;
25
+ static VALUE cPhraseQuery;
26
+ static VALUE cPrefixQuery;
27
+ static VALUE cWildcardQuery;
28
+ static VALUE cFuzzyQuery;
29
+ static VALUE cMatchAllQuery;
30
+ static VALUE cConstantScoreQuery;
31
+ static VALUE cFilteredQuery;
32
+ static VALUE cSpanTermQuery;
33
+ static VALUE cSpanMultiTermQuery;
34
+ static VALUE cSpanPrefixQuery;
35
+ static VALUE cSpanFirstQuery;
36
+ static VALUE cSpanNearQuery;
37
+ static VALUE cSpanOrQuery;
38
+ static VALUE cSpanNotQuery;
39
+
40
+ /* Filters */
41
+ static ID id_bits;
42
+ static VALUE cFilter;
43
+ static VALUE cRangeFilter;
44
+ static VALUE cTypedRangeFilter;
45
+ static VALUE cQueryFilter;
46
+
47
+ /* MultiTermQuery */
48
+ static ID id_default_max_terms;
49
+ static VALUE sym_max_terms;
50
+ static VALUE sym_min_score;
51
+
52
+ /** Option hash keys **/
53
+ /* BooleanClause */
54
+ static VALUE sym_should;
55
+ static VALUE sym_must;
56
+ static VALUE sym_must_not;
57
+
58
+ /* RangeQuery */
59
+ static VALUE sym_upper;
60
+ static VALUE sym_lower;
61
+ static VALUE sym_include_upper;
62
+ static VALUE sym_include_lower;
63
+ static VALUE sym_upper_exclusive;
64
+ static VALUE sym_lower_exclusive;
65
+
66
+ static VALUE sym_less_than;
67
+ static VALUE sym_less_than_or_equal_to;
68
+ static VALUE sym_greater_than;
69
+ static VALUE sym_greater_than_or_equal_to;
70
+
71
+ /* FuzzyQuery */
72
+ static VALUE sym_min_similarity;
73
+ static VALUE sym_prefix_length;
74
+
75
+ /* SpanNearQuery */
76
+ static VALUE sym_slop;
77
+ static VALUE sym_in_order;
78
+ static VALUE sym_clauses;
79
+
80
+ /* Class variable ids */
81
+ static ID id_default_min_similarity;
82
+ static ID id_default_prefix_length;
83
+
84
+
85
+ /** Sort **/
86
+ static VALUE oSORT_FIELD_DOC;
87
+
88
+ /* Sort types */
89
+ static VALUE sym_integer;
90
+ static VALUE sym_float;
91
+ static VALUE sym_string;
92
+ static VALUE sym_auto;
93
+ static VALUE sym_doc_id;
94
+ static VALUE sym_score;
95
+ static VALUE sym_byte;
96
+
97
+ /* Sort params */
98
+ static VALUE sym_type;
99
+ static VALUE sym_reverse;
100
+ static VALUE sym_comparator;
101
+
102
+ /* Hits */
103
+ static ID id_doc;
104
+ static ID id_score;
105
+
106
+ /* TopDocs */
107
+ static ID id_hits;
108
+ static ID id_total_hits;
109
+ static ID id_max_score;
110
+ static ID id_searcher;
111
+
112
+ /* Search */
113
+ static VALUE sym_offset;
114
+ static VALUE sym_limit;
115
+ static VALUE sym_start_doc;
116
+ static VALUE sym_all;
117
+ static VALUE sym_sort;
118
+ static VALUE sym_filter;
119
+ static VALUE sym_filter_proc;
120
+ static VALUE sym_c_filter_proc;
121
+
122
+ static VALUE sym_excerpt_length;
123
+ static VALUE sym_num_excerpts;
124
+ static VALUE sym_pre_tag;
125
+ static VALUE sym_post_tag;
126
+ static VALUE sym_ellipsis;
127
+
128
+ static FrtSymbol fsym_id;
129
+
130
+ extern VALUE cIndexReader;
131
+ extern void frb_ir_free(void *p);
132
+ extern void frb_ir_mark(void *p);
133
+
134
+ extern void frb_set_term(VALUE rterm, FrtTerm *t);
135
+ extern VALUE frb_get_analyzer(FrtAnalyzer *a);
136
+ extern FrtHashSet *frb_get_fields(VALUE rfields);
137
+ extern FrtAnalyzer *frb_get_cwrapped_analyzer(VALUE ranalyzer);
138
+ extern VALUE frb_get_lazy_doc(FrtLazyDoc *lazy_doc);
139
+
140
+ /****************************************************************************
141
+ *
142
+ * FrtHit Methods
143
+ *
144
+ ****************************************************************************/
145
+
146
+ static VALUE
147
+ frb_get_hit(FrtHit *hit)
148
+ {
149
+ return rb_struct_new(cHit,
150
+ INT2FIX(hit->doc),
151
+ rb_float_new((double)hit->score),
152
+ NULL);
153
+ }
154
+
155
+ /****************************************************************************
156
+ *
157
+ * TopDocs Methods
158
+ *
159
+ ****************************************************************************/
160
+
161
+ static VALUE
162
+ frb_get_td(FrtTopDocs *td, VALUE rsearcher)
163
+ {
164
+ int i;
165
+ VALUE rtop_docs;
166
+ VALUE hit_ary = rb_ary_new2(td->size);
167
+
168
+ for (i = 0; i < td->size; i++) {
169
+ rb_ary_store(hit_ary, i, frb_get_hit(td->hits[i]));
170
+ }
171
+
172
+ rtop_docs = rb_struct_new(cTopDocs,
173
+ INT2FIX(td->total_hits),
174
+ hit_ary,
175
+ rb_float_new((double)td->max_score),
176
+ rsearcher,
177
+ NULL);
178
+ frt_td_destroy(td);
179
+ return rtop_docs;
180
+ }
181
+
182
+ /*
183
+ * call-seq:
184
+ * top_doc.to_s(field = :id) -> string
185
+ *
186
+ * Returns a string representation of the top_doc in readable format.
187
+ */
188
+ static VALUE
189
+ frb_td_to_s(int argc, VALUE *argv, VALUE self)
190
+ {
191
+ int i;
192
+ VALUE rhits = rb_funcall(self, id_hits, 0);
193
+ FrtSearcher *sea = (FrtSearcher *)DATA_PTR(rb_funcall(self, id_searcher, 0));
194
+ const int len = RARRAY_LEN(rhits);
195
+ unsigned int capa = len * 64 + 100;
196
+ int p = 0;
197
+ char *str = FRT_ALLOC_N(char, len * 64 + 100);
198
+ FrtSymbol field = fsym_id;
199
+ VALUE rstr;
200
+
201
+ if (argc) {
202
+ field = frb_field(argv[0]);
203
+ }
204
+
205
+ sprintf(str, "TopDocs: total_hits = %ld, max_score = %lf [\n",
206
+ FIX2LONG(rb_funcall(self, id_total_hits, 0)),
207
+ NUM2DBL(rb_funcall(self, id_max_score, 0)));
208
+ p = (int)strlen(str);
209
+
210
+ for (i = 0; i < len; i++) {
211
+ VALUE rhit = RARRAY_PTR(rhits)[i];
212
+ int doc_id = FIX2INT(rb_funcall(rhit, id_doc, 0));
213
+ const char *value = "";
214
+ size_t value_len = 0;
215
+ FrtLazyDoc *lzd = sea->get_lazy_doc(sea, doc_id);
216
+ FrtLazyDocField *lzdf = frt_lazy_doc_get(lzd, field);
217
+ if (NULL != lzdf) {
218
+ value = frt_lazy_df_get_data(lzdf, 0);
219
+ value_len = strlen(value);
220
+ }
221
+ if (p + value_len + 64 > capa) {
222
+ capa += (value_len + 64) * (len - i);
223
+ FRT_REALLOC_N(str, char, capa);
224
+ }
225
+
226
+ sprintf(str + p, "\t%d \"%s\": %0.5f\n", doc_id, value,
227
+ NUM2DBL(rb_funcall(rhit, id_score, 0)));
228
+ p += strlen(str + p);
229
+ frt_lazy_doc_close(lzd);
230
+ }
231
+
232
+ sprintf(str + p, "]\n");
233
+ rstr = rb_str_new2(str);
234
+ free(str);
235
+ return rstr;
236
+ }
237
+
238
+ static char *
239
+ frb_lzd_load_to_json(FrtLazyDoc *lzd, char **str, char *s, int *slen)
240
+ {
241
+ int i, j;
242
+ int diff = s - *str;
243
+ int len = diff, l;
244
+ FrtLazyDocField *f;
245
+
246
+ for (i = 0; i < lzd->size; i++) {
247
+ f = lzd->fields[i];
248
+ /* 3 times length of field to make space for quoted quotes ('"') and
249
+ * 4 times field elements to make space for '"' around fields and ','
250
+ * between fields. Add 100 for '[', ']' and good safety.
251
+ */
252
+ len += strlen(rb_id2name(f->name)) + f->len * 3 + 100 + 4 * f->size;
253
+ }
254
+
255
+ if (len > *slen) {
256
+ while (len > *slen) *slen = *slen << 1;
257
+ FRT_REALLOC_N(*str, char, *slen);
258
+ s = *str + diff;
259
+ }
260
+
261
+ for (i = 0; i < lzd->size; i++) {
262
+ const char *field_name;
263
+ f = lzd->fields[i];
264
+ field_name = rb_id2name(f->name);
265
+ if (i) *(s++) = ',';
266
+ *(s++) = '"';
267
+ l = strlen(field_name);
268
+ memcpy(s, field_name, l);
269
+ s += l;
270
+ *(s++) = '"';
271
+ *(s++) = ':';
272
+ if (f->size > 1) *(s++) = '[';
273
+ for (j = 0; j < f->size; j++) {
274
+ if (j) *(s++) = ',';
275
+ s = json_concat_string(s, frt_lazy_df_get_data(f, j));
276
+ }
277
+ if (f->size > 1) *(s++) = ']';
278
+ }
279
+ return s;
280
+ }
281
+
282
+ /*
283
+ * call-seq:
284
+ * top_doc.to_json() -> string
285
+ *
286
+ * Returns a json representation of the top_doc.
287
+ */
288
+ static VALUE
289
+ frb_td_to_json(VALUE self)
290
+ {
291
+ int i;
292
+ VALUE rhits = rb_funcall(self, id_hits, 0);
293
+ VALUE rhit;
294
+ FrtLazyDoc *lzd;
295
+ FrtSearcher *sea = (FrtSearcher *)DATA_PTR(rb_funcall(self, id_searcher, 0));
296
+ const int num_hits = RARRAY_LEN(rhits);
297
+ int doc_id;
298
+ int len = 32768;
299
+ char *str = FRT_ALLOC_N(char, len);
300
+ char *s = str;
301
+ VALUE rstr;
302
+
303
+ *(s++) = '[';
304
+ for (i = 0; i < num_hits; i++) {
305
+ if (i) *(s++) = ',';
306
+ *(s++) = '{';
307
+ rhit = RARRAY_PTR(rhits)[i];
308
+ doc_id = FIX2INT(rb_funcall(rhit, id_doc, 0));
309
+ lzd = sea->get_lazy_doc(sea, doc_id);
310
+ s = frb_lzd_load_to_json(lzd, &str, s, &len);
311
+ frt_lazy_doc_close(lzd);
312
+ *(s++) = '}';
313
+ }
314
+ *(s++) = ']';
315
+ *(s++) = '\0';
316
+ rstr = rb_str_new2(str);
317
+ free(str);
318
+ return rstr;
319
+ }
320
+
321
+
322
+ /****************************************************************************
323
+ *
324
+ * Explanation Methods
325
+ *
326
+ ****************************************************************************/
327
+
328
+ #define GET_EXPL() FrtExplanation *expl = (FrtExplanation *)DATA_PTR(self)
329
+
330
+ /*
331
+ * call-seq:
332
+ * explanation.to_s -> string
333
+ *
334
+ * Returns a string representation of the explanation in readable format.
335
+ */
336
+ static VALUE
337
+ frb_expl_to_s(VALUE self)
338
+ {
339
+ GET_EXPL();
340
+ char *str = frt_expl_to_s(expl);
341
+ VALUE rstr = rb_str_new2(str);
342
+ free(str);
343
+ return rstr;
344
+ }
345
+
346
+ /*
347
+ * call-seq:
348
+ * explanation.to_html -> string
349
+ *
350
+ * Returns an html representation of the explanation in readable format.
351
+ */
352
+ static VALUE
353
+ frb_expl_to_html(VALUE self)
354
+ {
355
+ GET_EXPL();
356
+ char *str = frt_expl_to_html(expl);
357
+ VALUE rstr = rb_str_new2(str);
358
+ free(str);
359
+ return rstr;
360
+ }
361
+
362
+ /*
363
+ * call-seq:
364
+ * explanation.score -> float
365
+ *
366
+ * Returns the score represented by the query. This can be used for debugging
367
+ * purposes mainly to check that the score returned by the explanation
368
+ * matches that of the score for the document in the original query.
369
+ */
370
+ static VALUE
371
+ frb_expl_score(VALUE self)
372
+ {
373
+ GET_EXPL();
374
+ return rb_float_new((double)expl->value);
375
+ }
376
+
377
+ /****************************************************************************
378
+ *
379
+ * Query Methods
380
+ *
381
+ ****************************************************************************/
382
+
383
+ static void
384
+ frb_q_free(void *p)
385
+ {
386
+ object_del(p);
387
+ frt_q_deref((FrtQuery *)p);
388
+ }
389
+
390
+ #define GET_Q() FrtQuery *q = (FrtQuery *)DATA_PTR(self)
391
+
392
+ /*
393
+ * call-seq:
394
+ * query.to_s -> string
395
+ *
396
+ * Return a string representation of the query. Most of the time, passing
397
+ * this string through the Query parser will give you the exact Query you
398
+ * began with. This can be a good way to explore how the QueryParser works.
399
+ */
400
+ static VALUE
401
+ frb_q_to_s(int argc, VALUE *argv, VALUE self)
402
+ {
403
+ GET_Q();
404
+ VALUE rstr, rfield;
405
+ char *str;
406
+ FrtSymbol field = (FrtSymbol)NULL;
407
+ if (rb_scan_args(argc, argv, "01", &rfield)) {
408
+ field = frb_field(rfield);
409
+ }
410
+ str = q->to_s(q, field);
411
+ rstr = rb_str_new2(str);
412
+ free(str);
413
+ return rstr;
414
+ }
415
+
416
+ /*
417
+ * call-seq:
418
+ * query.boost
419
+ *
420
+ * Returns the queries boost value. See the Query description for more
421
+ * information on Query boosts.
422
+ */
423
+ static VALUE
424
+ frb_q_get_boost(VALUE self)
425
+ {
426
+ GET_Q();
427
+ return rb_float_new((double)q->boost);
428
+ }
429
+
430
+ /*
431
+ * call-seq:
432
+ * query.boost = boost -> boost
433
+ *
434
+ * Set the boost for a query. See the Query description for more information
435
+ * on Query boosts.
436
+ */
437
+ static VALUE
438
+ frb_q_set_boost(VALUE self, VALUE rboost)
439
+ {
440
+ GET_Q();
441
+ q->boost = (float)NUM2DBL(rboost);
442
+ return rboost;
443
+ }
444
+
445
+ /*
446
+ * call-seq:
447
+ * query.hash -> number
448
+ *
449
+ * Return a hash value for the query. This is used for caching query results
450
+ * in a hash object.
451
+ */
452
+ static VALUE
453
+ frb_q_hash(VALUE self)
454
+ {
455
+ GET_Q();
456
+ return INT2FIX(q->hash(q));
457
+ }
458
+
459
+ /*
460
+ * call-seq;
461
+ * query.eql?(other_query) -> bool
462
+ * query == other_query -> bool
463
+ *
464
+ * Return true if +query+ equals +other_query+. Theoretically, two queries are
465
+ * equal if the always return the same results, no matter what the contents
466
+ * of the index. Practically, however, this is difficult to implement
467
+ * efficiently for queries like BooleanQuery since the ordering of clauses
468
+ * unspecified. "Ruby AND Rails" will not match "Rails AND Ruby" for example,
469
+ * although their result sets will be identical. Most queries should match as
470
+ * expected however.
471
+ */
472
+ static VALUE
473
+ frb_q_eql(VALUE self, VALUE other)
474
+ {
475
+ GET_Q();
476
+ FrtQuery *oq;
477
+ Data_Get_Struct(other, FrtQuery, oq);
478
+ return q->eq(q, oq) ? Qtrue : Qfalse;
479
+ }
480
+
481
+ /*
482
+ * call-seq:
483
+ * query.terms(searcher) -> term_array
484
+ *
485
+ * Returns an array of terms searched for by this query. This can be used for
486
+ * implementing an external query highlighter for example. You must supply a
487
+ * searcher so that the query can be rewritten and optimized like it would be
488
+ * in a real search.
489
+ */
490
+ static VALUE
491
+ frb_q_get_terms(VALUE self, VALUE searcher)
492
+ {
493
+ VALUE rterms = rb_ary_new();
494
+ FrtHashSet *terms = frt_hs_new((frt_hash_ft)&frt_term_hash,
495
+ (frt_eq_ft)&frt_term_eq,
496
+ (frt_free_ft)frt_term_destroy);
497
+ FrtHashSetEntry *hse;
498
+ GET_Q();
499
+ FrtSearcher *sea = (FrtSearcher *)DATA_PTR(searcher);
500
+ FrtQuery *rq = sea->rewrite(sea, q);
501
+ rq->extract_terms(rq, terms);
502
+ frt_q_deref(rq);
503
+
504
+ for (hse = terms->first; hse; hse = hse->next) {
505
+ FrtTerm *term = (FrtTerm *)hse->elem;
506
+ rb_ary_push(rterms, frb_get_term(term->field, term->text));
507
+ }
508
+ frt_hs_destroy(terms);
509
+ return rterms;
510
+ }
511
+
512
+ #define MK_QUERY(klass, q) Data_Wrap_Struct(klass, NULL, &frb_q_free, q)
513
+ VALUE
514
+ frb_get_q(FrtQuery *q)
515
+ {
516
+ VALUE self = object_get(q);
517
+
518
+ if (self == Qnil) {
519
+ switch (q->type) {
520
+ case TERM_QUERY:
521
+ self = MK_QUERY(cTermQuery, q);
522
+ break;
523
+ case MULTI_TERM_QUERY:
524
+ self = MK_QUERY(cMultiTermQuery, q);
525
+ break;
526
+ case BOOLEAN_QUERY:
527
+ self = MK_QUERY(cBooleanQuery, q);
528
+ break;
529
+ case PHRASE_QUERY:
530
+ self = MK_QUERY(cPhraseQuery, q);
531
+ break;
532
+ case CONSTANT_QUERY:
533
+ self = MK_QUERY(cConstantScoreQuery, q);
534
+ break;
535
+ case FILTERED_QUERY:
536
+ self = MK_QUERY(cFilteredQuery, q);
537
+ break;
538
+ case MATCH_ALL_QUERY:
539
+ self = MK_QUERY(cMatchAllQuery, q);
540
+ break;
541
+ case RANGE_QUERY:
542
+ self = MK_QUERY(cRangeQuery, q);
543
+ break;
544
+ case TYPED_RANGE_QUERY:
545
+ self = MK_QUERY(cTypedRangeQuery, q);
546
+ break;
547
+ case WILD_CARD_QUERY:
548
+ self = MK_QUERY(cWildcardQuery, q);
549
+ break;
550
+ case FUZZY_QUERY:
551
+ self = MK_QUERY(cFuzzyQuery, q);
552
+ break;
553
+ case PREFIX_QUERY:
554
+ self = MK_QUERY(cPrefixQuery, q);
555
+ break;
556
+ case SPAN_TERM_QUERY:
557
+ self = MK_QUERY(cSpanMultiTermQuery, q);
558
+ break;
559
+ case SPAN_MULTI_TERM_QUERY:
560
+ self = MK_QUERY(cSpanPrefixQuery, q);
561
+ break;
562
+ case SPAN_PREFIX_QUERY:
563
+ self = MK_QUERY(cSpanTermQuery, q);
564
+ break;
565
+ case SPAN_FIRST_QUERY:
566
+ self = MK_QUERY(cSpanFirstQuery, q);
567
+ break;
568
+ case SPAN_OR_QUERY:
569
+ self = MK_QUERY(cSpanOrQuery, q);
570
+ break;
571
+ case SPAN_NOT_QUERY:
572
+ self = MK_QUERY(cSpanNotQuery, q);
573
+ break;
574
+ case SPAN_NEAR_QUERY:
575
+ self = MK_QUERY(cSpanNearQuery, q);
576
+ break;
577
+ default:
578
+ rb_raise(rb_eArgError, "Unknown query type");
579
+ break;
580
+ }
581
+ object_add(q, self);
582
+ }
583
+ return self;
584
+ }
585
+
586
+ /****************************************************************************
587
+ *
588
+ * TermQuery Methods
589
+ *
590
+ ****************************************************************************/
591
+
592
+ /*
593
+ * call-seq:
594
+ * TermQuery.new(field, term) -> term_query
595
+ *
596
+ * Create a new TermQuery object which will match all documents with the term
597
+ * +term+ in the field +field+.
598
+ *
599
+ * Note: As usual, field should be a symbol
600
+ */
601
+ static VALUE
602
+ frb_tq_init(VALUE self, VALUE rfield, VALUE rterm)
603
+ {
604
+ FrtSymbol field = frb_field(rfield);
605
+ char *term = rs2s(rb_obj_as_string(rterm));
606
+ FrtQuery *q = frt_tq_new(field, term);
607
+ Frt_Wrap_Struct(self, NULL, &frb_q_free, q);
608
+ object_add(q, self);
609
+ return self;
610
+ }
611
+
612
+ /****************************************************************************
613
+ *
614
+ * MultiTermQuery Methods
615
+ *
616
+ ****************************************************************************/
617
+
618
+ /*
619
+ * call-seq:
620
+ * MultiTermQuery.default_max_terms -> number
621
+ *
622
+ * Get the default value for +:max_terms+ in a MultiTermQuery. This value is
623
+ * also used by PrefixQuery, FuzzyQuery and WildcardQuery.
624
+ */
625
+ static VALUE
626
+ frb_mtq_get_dmt(VALUE self)
627
+ {
628
+ return rb_cvar_get(cMultiTermQuery, id_default_max_terms);
629
+ }
630
+
631
+ /*
632
+ * call-seq:
633
+ * MultiTermQuery.default_max_terms = max_terms -> max_terms
634
+ *
635
+ * Set the default value for +:max_terms+ in a MultiTermQuery. This value is
636
+ * also used by PrefixQuery, FuzzyQuery and WildcardQuery.
637
+ */
638
+ static VALUE
639
+ frb_mtq_set_dmt(VALUE self, VALUE rnum_terms)
640
+ {
641
+ int max_terms = FIX2INT(rnum_terms);
642
+ if (max_terms <= 0) {
643
+ rb_raise(rb_eArgError,
644
+ "%d <= 0. @@max_terms must be > 0", max_terms);
645
+ }
646
+ rb_cvar_set(cMultiTermQuery, id_default_max_terms, rnum_terms);
647
+ return rnum_terms;
648
+ }
649
+
650
+ /*
651
+ * call-seq:
652
+ * MultiTermQuery.new(field, options = {}) -> multi_term_query
653
+ *
654
+ * Create a new MultiTermQuery on field +field+. You will also need to add
655
+ * terms to the query using the MultiTermQuery#add_term method.
656
+ *
657
+ * There are several options available to you when creating a
658
+ * MultiTermQueries;
659
+ *
660
+ * === Options
661
+ *
662
+ * :max_terms:: You can specify the maximum number of terms that can be
663
+ * added to the query. This is to prevent memory usage overflow,
664
+ * particularly when don't directly control the addition of
665
+ * terms to the Query object like when you create Wildcard
666
+ * queries. For example, searching for "content:*" would cause
667
+ * problems without this limit.
668
+ * :min_score:: The minimum score a term must have to be added to the query.
669
+ * For example you could implement your own wild-card queries
670
+ * that gives matches a score. To limit the number of terms
671
+ * added to the query you could set a lower limit to this score.
672
+ * FuzzyQuery in particular makes use of this parameter.
673
+ */
674
+ static VALUE
675
+ frb_mtq_init(int argc, VALUE *argv, VALUE self)
676
+ {
677
+ VALUE rfield, roptions;
678
+ float min_score = 0.0f;
679
+ int max_terms = FIX2INT(frb_mtq_get_dmt(self));
680
+ FrtQuery *q;
681
+
682
+ if (rb_scan_args(argc, argv, "11", &rfield, &roptions) == 2) {
683
+ VALUE v;
684
+ if (Qnil != (v = rb_hash_aref(roptions, sym_max_terms))) {
685
+ max_terms = FIX2INT(v);
686
+ }
687
+ if (Qnil != (v = rb_hash_aref(roptions, sym_min_score))) {
688
+ min_score = (float)NUM2DBL(v);
689
+ }
690
+ }
691
+ q = frt_multi_tq_new_conf(frb_field(rfield), max_terms, min_score);
692
+ Frt_Wrap_Struct(self, NULL, &frb_q_free, q);
693
+ object_add(q, self);
694
+ return self;
695
+ }
696
+
697
+ /*
698
+ * call-seq:
699
+ * multi_term_query.add_term(term, score = 1.0) -> self
700
+ * multi_term_query << term1 << term2 << term3 -> self
701
+ *
702
+ * Add a term to the MultiTermQuery with the score 1.0 unless specified
703
+ * otherwise.
704
+ */
705
+ static VALUE
706
+ frb_mtq_add_term(int argc, VALUE *argv, VALUE self)
707
+ {
708
+ GET_Q();
709
+ VALUE rterm, rboost;
710
+ float boost = 1.0f;
711
+ char *term = NULL;
712
+ if (rb_scan_args(argc, argv, "11", &rterm, &rboost) == 2) {
713
+ boost = (float)NUM2DBL(rboost);
714
+ }
715
+ term = StringValuePtr(rterm);
716
+ frt_multi_tq_add_term_boost(q, term, boost);
717
+
718
+ return self;
719
+ }
720
+
721
+ typedef FrtQuery *(*mtq_maker_ft)(FrtSymbol field, const char *term);
722
+
723
+ static int
724
+ get_max_terms(VALUE rmax_terms, int max_terms)
725
+ {
726
+ VALUE v;
727
+ switch (TYPE(rmax_terms)) {
728
+ case T_HASH:
729
+ if (Qnil != (v = rb_hash_aref(rmax_terms, sym_max_terms))) {
730
+ max_terms = FIX2INT(v);
731
+ }
732
+ break;
733
+ case T_FIXNUM:
734
+ max_terms = FIX2INT(rmax_terms);
735
+ break;
736
+ default:
737
+ rb_raise(rb_eArgError, "max_terms must be an integer");
738
+ }
739
+ return max_terms;
740
+ }
741
+
742
+ static VALUE
743
+ frb_mtq_init_specific(int argc, VALUE *argv, VALUE self, mtq_maker_ft mm)
744
+ {
745
+ VALUE rfield, rterm, rmax_terms;
746
+ int max_terms =
747
+ FIX2INT(rb_cvar_get(cMultiTermQuery, id_default_max_terms));
748
+ FrtQuery *q;
749
+
750
+ if (rb_scan_args(argc, argv, "21", &rfield, &rterm, &rmax_terms) == 3) {
751
+ max_terms = get_max_terms(rmax_terms, max_terms);
752
+ }
753
+
754
+ q = (*mm)(frb_field(rfield), StringValuePtr(rterm));
755
+ FrtMTQMaxTerms(q) = max_terms;
756
+ Frt_Wrap_Struct(self, NULL, &frb_q_free, q);
757
+ object_add(q, self);
758
+ return self;
759
+ }
760
+
761
+ /****************************************************************************
762
+ *
763
+ * BooleanClause Methods
764
+ *
765
+ ****************************************************************************/
766
+
767
+ static void
768
+ frb_bc_mark(void *p)
769
+ {
770
+ frb_gc_mark(((FrtBooleanClause *)p)->query);
771
+ }
772
+
773
+ static void
774
+ frb_bc_free(void *p)
775
+ {
776
+ object_del(p);
777
+ frt_bc_deref((FrtBooleanClause *)p);
778
+ }
779
+
780
+ static VALUE
781
+ frb_bc_wrap(FrtBooleanClause *bc)
782
+ {
783
+ VALUE self = Data_Wrap_Struct(cBooleanClause, &frb_bc_mark, &frb_bc_free, bc);
784
+ FRT_REF(bc);
785
+ object_add(bc, self);
786
+ return self;
787
+ }
788
+
789
+ static FrtBCType
790
+ frb_get_occur(VALUE roccur)
791
+ {
792
+ FrtBCType occur = FRT_BC_SHOULD;
793
+
794
+ if (roccur == sym_should) {
795
+ occur = FRT_BC_SHOULD;
796
+ } else if (roccur == sym_must) {
797
+ occur = FRT_BC_MUST;
798
+ } else if (roccur == sym_must_not) {
799
+ occur = FRT_BC_MUST_NOT;
800
+ } else {
801
+ rb_raise(rb_eArgError, "occur argument must be one of [:must, "
802
+ ":should, :must_not]");
803
+ }
804
+ return occur;
805
+ }
806
+
807
+ /*
808
+ * call-seq:
809
+ * BooleanClause.new(query, occur = :should) -> BooleanClause
810
+ *
811
+ * Create a new BooleanClause object, wrapping the query +query+. +occur+
812
+ * must be one of +:must+, +:should+ or +:must_not+.
813
+ */
814
+ static VALUE
815
+ frb_bc_init(int argc, VALUE *argv, VALUE self)
816
+ {
817
+ FrtBooleanClause *bc;
818
+ VALUE rquery, roccur;
819
+ unsigned int occur = FRT_BC_SHOULD;
820
+ FrtQuery *sub_q;
821
+ if (rb_scan_args(argc, argv, "11", &rquery, &roccur) == 2) {
822
+ occur = frb_get_occur(roccur);
823
+ }
824
+ Data_Get_Struct(rquery, FrtQuery, sub_q);
825
+ FRT_REF(sub_q);
826
+ bc = frt_bc_new(sub_q, occur);
827
+ Frt_Wrap_Struct(self, &frb_bc_mark, &frb_bc_free, bc);
828
+ object_add(bc, self);
829
+ return self;
830
+ }
831
+
832
+ #define GET_BC() FrtBooleanClause *bc = (FrtBooleanClause *)DATA_PTR(self)
833
+ /*
834
+ * call-seq:
835
+ * clause.query -> query
836
+ *
837
+ * Return the query object wrapped by this BooleanClause.
838
+ */
839
+ static VALUE
840
+ frb_bc_get_query(VALUE self)
841
+ {
842
+ GET_BC();
843
+ return object_get(bc->query);
844
+ }
845
+
846
+ /*
847
+ * call-seq:
848
+ * clause.query = query -> query
849
+ *
850
+ * Set the query wrapped by this BooleanClause.
851
+ */
852
+ static VALUE
853
+ frb_bc_set_query(VALUE self, VALUE rquery)
854
+ {
855
+ GET_BC();
856
+ Data_Get_Struct(rquery, FrtQuery, bc->query);
857
+ return rquery;
858
+ }
859
+
860
+ /*
861
+ * call-seq:
862
+ * clause.required? -> bool
863
+ *
864
+ * Return true if this clause is required. ie, this will be true if occur was
865
+ * equal to +:must+.
866
+ */
867
+ static VALUE
868
+ frb_bc_is_required(VALUE self)
869
+ {
870
+ GET_BC();
871
+ return bc->is_required ? Qtrue : Qfalse;
872
+ }
873
+
874
+ /*
875
+ * call-seq:
876
+ * clause.prohibited? -> bool
877
+ *
878
+ * Return true if this clause is prohibited. ie, this will be true if occur was
879
+ * equal to +:must_not+.
880
+ */
881
+ static VALUE
882
+ frb_bc_is_prohibited(VALUE self)
883
+ {
884
+ GET_BC();
885
+ return bc->is_prohibited ? Qtrue : Qfalse;
886
+ }
887
+
888
+ /*
889
+ * call-seq:
890
+ * clause.occur = occur -> occur
891
+ *
892
+ * Set the +occur+ value for this BooleanClause. +occur+ must be one of
893
+ * +:must+, +:should+ or +:must_not+.
894
+ */
895
+ static VALUE
896
+ frb_bc_set_occur(VALUE self, VALUE roccur)
897
+ {
898
+ GET_BC();
899
+ FrtBCType occur = frb_get_occur(roccur);
900
+ frt_bc_set_occur(bc, occur);
901
+
902
+ return roccur;
903
+ }
904
+
905
+ /*
906
+ * call-seq:
907
+ * clause.to_s -> string
908
+ *
909
+ * Return a string representation of this clause. This will not be used by
910
+ * BooleanQuery#to_s. It is only used by BooleanClause#to_s and will specify
911
+ * whether the clause is +:must+, +:should+ or +:must_not+.
912
+ */
913
+ static VALUE
914
+ frb_bc_to_s(VALUE self)
915
+ {
916
+ VALUE rstr;
917
+ char *qstr, *str;
918
+ const char *ostr = "";
919
+ int len;
920
+ GET_BC();
921
+ qstr = bc->query->to_s(bc->query, (FrtSymbol)NULL);
922
+ switch (bc->occur) {
923
+ case FRT_BC_SHOULD:
924
+ ostr = "Should";
925
+ break;
926
+ case FRT_BC_MUST:
927
+ ostr = "Must";
928
+ break;
929
+ case FRT_BC_MUST_NOT:
930
+ ostr = "Must Not";
931
+ break;
932
+ }
933
+ len = strlen(ostr) + strlen(qstr) + 2;
934
+ str = FRT_ALLOC_N(char, len);
935
+ sprintf(str, "%s:%s", ostr, qstr);
936
+ rstr = rb_str_new(str, len);
937
+ free(qstr);
938
+ free(str);
939
+ return rstr;
940
+ }
941
+
942
+ /****************************************************************************
943
+ *
944
+ * BooleanQuery Methods
945
+ *
946
+ ****************************************************************************/
947
+
948
+ static void
949
+ frb_bq_mark(void *p)
950
+ {
951
+ int i;
952
+ FrtQuery *q = (FrtQuery *)p;
953
+ FrtBooleanQuery *bq = (FrtBooleanQuery *)q;
954
+ for (i = 0; i < bq->clause_cnt; i++) {
955
+ frb_gc_mark(bq->clauses[i]);
956
+ }
957
+ }
958
+
959
+ /*
960
+ * call-seq:
961
+ * BooleanQuery.new(coord_disable = false)
962
+ *
963
+ * Create a new BooleanQuery. If you don't care about the scores of the
964
+ * sub-queries added to the query (as would be the case for many
965
+ * automatically generated queries) you can disable the coord_factor of the
966
+ * score. This will slightly improve performance for the query. Usually you
967
+ * should leave this parameter as is.
968
+ */
969
+ static VALUE
970
+ frb_bq_init(int argc, VALUE *argv, VALUE self)
971
+ {
972
+ VALUE rcoord_disabled;
973
+ bool coord_disabled = false;
974
+ FrtQuery *q;
975
+ if (rb_scan_args(argc, argv, "01", &rcoord_disabled)) {
976
+ coord_disabled = RTEST(rcoord_disabled);
977
+ }
978
+ q = frt_bq_new(coord_disabled);
979
+ Frt_Wrap_Struct(self, &frb_bq_mark, &frb_q_free, q);
980
+ object_add(q, self);
981
+ return self;
982
+ }
983
+
984
+ /*
985
+ * call-seq:
986
+ * boolean_query.add_query(query, occur = :should) -> boolean_clause
987
+ * boolean_query.<<(query, occur = :should) -> boolean_clause
988
+ * boolean_query << boolean_clause -> boolean_clause
989
+ *
990
+ * Us this method to add sub-queries to a BooleanQuery. You can either add
991
+ * a straight Query or a BooleanClause. When adding a Query, the default
992
+ * occurrence requirement is :should. That is the Query's match will be
993
+ * scored but it isn't essential for a match. If the query should be
994
+ * essential, use :must. For exclusive queries use :must_not.
995
+ *
996
+ * When adding a Boolean clause to a BooleanQuery there is no need to set the
997
+ * occurrence property because it is already set in the BooleanClause.
998
+ * Therefor the +occur+ parameter will be ignored in this case.
999
+ *
1000
+ * query:: Query to add to the BooleanQuery
1001
+ * occur:: occurrence requirement for the query being added. Must be one of
1002
+ * [:must, :should, :must_not]
1003
+ * returns:: BooleanClause which was added
1004
+ */
1005
+ static VALUE
1006
+ frb_bq_add_query(int argc, VALUE *argv, VALUE self)
1007
+ {
1008
+ GET_Q();
1009
+ VALUE rquery, roccur;
1010
+ FrtBCType occur = FRT_BC_SHOULD;
1011
+ FrtQuery *sub_q;
1012
+ VALUE klass;
1013
+
1014
+ if (rb_scan_args(argc, argv, "11", &rquery, &roccur) == 2) {
1015
+ occur = frb_get_occur(roccur);
1016
+ }
1017
+ klass = CLASS_OF(rquery);
1018
+ if (klass == cBooleanClause) {
1019
+ FrtBooleanClause *bc = (FrtBooleanClause *)DATA_PTR(rquery);
1020
+ if (argc > 1) {
1021
+ rb_warning("Second argument to BooleanQuery#add is ignored "
1022
+ "when adding BooleanClause");
1023
+ }
1024
+ frt_bq_add_clause(q, bc);
1025
+ return rquery;
1026
+ } else if (TYPE(rquery) == T_DATA) {
1027
+ Data_Get_Struct(rquery, FrtQuery, sub_q);
1028
+ return frb_bc_wrap(frt_bq_add_query(q, sub_q, occur));
1029
+ } else {
1030
+ rb_raise(rb_eArgError, "Cannot add %s to a BooleanQuery",
1031
+ rb_class2name(klass));
1032
+ }
1033
+ return self;
1034
+ }
1035
+
1036
+ /****************************************************************************
1037
+ *
1038
+ * RangeQuery Methods
1039
+ *
1040
+ ****************************************************************************/
1041
+
1042
+ static void
1043
+ get_range_params(VALUE roptions, char **lterm, char **uterm,
1044
+ bool *include_lower, bool *include_upper)
1045
+ {
1046
+ VALUE v;
1047
+ Check_Type(roptions, T_HASH);
1048
+ if (Qnil != (v = rb_hash_aref(roptions, sym_lower))) {
1049
+ *lterm = rs2s(rb_obj_as_string(v));
1050
+ *include_lower = true;
1051
+ }
1052
+ if (Qnil != (v = rb_hash_aref(roptions, sym_upper))) {
1053
+ *uterm = rs2s(rb_obj_as_string(v));
1054
+ *include_upper = true;
1055
+ }
1056
+ if (Qnil != (v = rb_hash_aref(roptions, sym_lower_exclusive))) {
1057
+ *lterm = rs2s(rb_obj_as_string(v));
1058
+ *include_lower = false;
1059
+ }
1060
+ if (Qnil != (v = rb_hash_aref(roptions, sym_upper_exclusive))) {
1061
+ *uterm = rs2s(rb_obj_as_string(v));
1062
+ *include_upper = false;
1063
+ }
1064
+ if (Qnil != (v = rb_hash_aref(roptions, sym_include_lower))) {
1065
+ *include_lower = RTEST(v);
1066
+ }
1067
+ if (Qnil != (v = rb_hash_aref(roptions, sym_include_upper))) {
1068
+ *include_upper = RTEST(v);
1069
+ }
1070
+ if (Qnil != (v = rb_hash_aref(roptions, sym_greater_than))) {
1071
+ *lterm = rs2s(rb_obj_as_string(v));
1072
+ *include_lower = false;
1073
+ }
1074
+ if (Qnil != (v = rb_hash_aref(roptions, sym_greater_than_or_equal_to))) {
1075
+ *lterm = rs2s(rb_obj_as_string(v));
1076
+ *include_lower = true;
1077
+ }
1078
+ if (Qnil != (v = rb_hash_aref(roptions, sym_less_than))) {
1079
+ *uterm = rs2s(rb_obj_as_string(v));
1080
+ *include_upper = false;
1081
+ }
1082
+ if (Qnil != (v = rb_hash_aref(roptions, sym_less_than_or_equal_to))) {
1083
+ *uterm = rs2s(rb_obj_as_string(v));
1084
+ *include_upper = true;
1085
+ }
1086
+ if (!*lterm && !*uterm) {
1087
+ rb_raise(rb_eArgError,
1088
+ "The bounds of a range should not both be nil");
1089
+ }
1090
+ if (*include_lower && !*lterm) {
1091
+ rb_raise(rb_eArgError,
1092
+ "The lower bound should not be nil if it is inclusive");
1093
+ }
1094
+ if (*include_upper && !*uterm) {
1095
+ rb_raise(rb_eArgError,
1096
+ "The upper bound should not be nil if it is inclusive");
1097
+ }
1098
+ }
1099
+
1100
+ /*
1101
+ * call-seq:
1102
+ * RangeQuery.new(field, options = {}) -> range_query
1103
+ *
1104
+ * Create a new RangeQuery on field +field+. There are two ways to build a
1105
+ * range query. With the old-style options; +:lower+, +:upper+,
1106
+ * +:include_lower+ and +:include_upper+ or the new style options; +:<+,
1107
+ * +:<=+, +:>+ and +:>=+. The options' names should speak for themselves.
1108
+ * In the old-style options, limits are inclusive by default.
1109
+ *
1110
+ * == Examples
1111
+ *
1112
+ * q = RangeQuery.new(:date, :lower => "200501", :include_lower => false)
1113
+ * # is equivalent to
1114
+ * q = RangeQuery.new(:date, :< => "200501")
1115
+ * # is equivalent to
1116
+ * q = RangeQuery.new(:date, :lower_exclusive => "200501")
1117
+ *
1118
+ * q = RangeQuery.new(:date, :lower => "200501", :upper => 200502)
1119
+ * # is equivalent to
1120
+ * q = RangeQuery.new(:date, :>= => "200501", :<= => 200502)
1121
+ *
1122
+ */
1123
+ static VALUE
1124
+ frb_rq_init(VALUE self, VALUE rfield, VALUE roptions)
1125
+ {
1126
+ FrtQuery *q;
1127
+ char *lterm = NULL;
1128
+ char *uterm = NULL;
1129
+ bool include_lower = false;
1130
+ bool include_upper = false;
1131
+
1132
+ get_range_params(roptions, &lterm, &uterm, &include_lower, &include_upper);
1133
+ q = frt_rq_new(frb_field(rfield),
1134
+ lterm, uterm,
1135
+ include_lower, include_upper);
1136
+ Frt_Wrap_Struct(self, NULL, &frb_q_free, q);
1137
+ object_add(q, self);
1138
+ return self;
1139
+ }
1140
+
1141
+ /****************************************************************************
1142
+ *
1143
+ * TypedRangeQuery Methods
1144
+ *
1145
+ ****************************************************************************/
1146
+
1147
+ /*
1148
+ * call-seq:
1149
+ * TypedRangeQuery.new(field, options = {}) -> range_query
1150
+ *
1151
+ * Create a new TypedRangeQuery on field +field+. This differs from the
1152
+ * standard RangeQuery in that it allows range queries with unpadded numbers,
1153
+ * both positive and negative, integer and float. You can even use
1154
+ * hexadecimal numbers. However it could be a lot slower than the standard
1155
+ * RangeQuery on large indexes.
1156
+ *
1157
+ * There are two ways to build a range query. With the old-style options;
1158
+ * +:lower+, +:upper+, +:include_lower+ and +:include_upper+ or the new style
1159
+ * options; +:<+, +:<=+, +:>+ and +:>=+. The options' names should speak for
1160
+ * themselves. In the old-style options, limits are inclusive by default.
1161
+ *
1162
+ * == Examples
1163
+ *
1164
+ * q = TypedRangeQuery.new(:date, :lower => "0.1", :include_lower => false)
1165
+ * # is equivalent to
1166
+ * q = TypedRangeQuery.new(:date, :< => "0.1")
1167
+ * # is equivalent to
1168
+ * q = TypedRangeQuery.new(:date, :lower_exclusive => "0.1")
1169
+ *
1170
+ * # Note that you numbers can be strings or actual numbers
1171
+ * q = TypedRangeQuery.new(:date, :lower => "-12.32", :upper => 0.21)
1172
+ * # is equivalent to
1173
+ * q = TypedRangeQuery.new(:date, :>= => "-12.32", :<= => 0.21)
1174
+ */
1175
+ static VALUE
1176
+ frb_trq_init(VALUE self, VALUE rfield, VALUE roptions)
1177
+ {
1178
+ FrtQuery *q;
1179
+ char *lterm = NULL;
1180
+ char *uterm = NULL;
1181
+ bool include_lower = false;
1182
+ bool include_upper = false;
1183
+
1184
+ get_range_params(roptions, &lterm, &uterm, &include_lower, &include_upper);
1185
+ q = frt_trq_new(frb_field(rfield),
1186
+ lterm, uterm,
1187
+ include_lower, include_upper);
1188
+ Frt_Wrap_Struct(self, NULL, &frb_q_free, q);
1189
+ object_add(q, self);
1190
+ return self;
1191
+ }
1192
+
1193
+ /****************************************************************************
1194
+ *
1195
+ * PhraseQuery Methods
1196
+ *
1197
+ ****************************************************************************/
1198
+
1199
+ /*
1200
+ * call-seq:
1201
+ * PhraseQuery.new(field, slop = 0) -> phrase_query
1202
+ *
1203
+ * Create a new PhraseQuery on the field +field+. You need to add terms to
1204
+ * the query it will do anything of value. See PhraseQuery#add_term.
1205
+ */
1206
+ static VALUE
1207
+ frb_phq_init(int argc, VALUE *argv, VALUE self)
1208
+ {
1209
+ VALUE rfield, rslop;
1210
+ FrtQuery *q;
1211
+ rb_scan_args(argc, argv, "11", &rfield, &rslop);
1212
+ q = frt_phq_new(frb_field(rfield));
1213
+ if (argc == 2) {
1214
+ ((FrtPhraseQuery *)q)->slop = FIX2INT(rslop);
1215
+ }
1216
+ Frt_Wrap_Struct(self, NULL, &frb_q_free, q);
1217
+ object_add(q, self);
1218
+ return self;
1219
+ }
1220
+
1221
+ /*
1222
+ * call-seq:
1223
+ * phrase_query.add_term(term, position_increment = 1) -> phrase_query
1224
+ * phrase_query << term -> phrase_query
1225
+ *
1226
+ * Add a term to the phrase query. By default the position_increment is set
1227
+ * to 1 so each term you add is expected to come directly after the previous
1228
+ * term. By setting position_increment to 2 you are specifying that the term
1229
+ * you just added should occur two terms after the previous term. For
1230
+ * example;
1231
+ *
1232
+ * phrase_query.add_term("big").add_term("house", 2)
1233
+ * # matches => "big brick house"
1234
+ * # matches => "big red house"
1235
+ * # doesn't match => "big house"
1236
+ */
1237
+ static VALUE
1238
+ frb_phq_add(int argc, VALUE *argv, VALUE self)
1239
+ {
1240
+ VALUE rterm, rpos_inc;
1241
+ int pos_inc = 1;
1242
+ GET_Q();
1243
+ if (rb_scan_args(argc, argv, "11", &rterm, &rpos_inc) == 2) {
1244
+ pos_inc = FIX2INT(rpos_inc);
1245
+ }
1246
+ switch (TYPE(rterm)) {
1247
+ case T_STRING:
1248
+ {
1249
+ frt_phq_add_term(q, StringValuePtr(rterm), pos_inc);
1250
+ break;
1251
+ }
1252
+ case T_ARRAY:
1253
+ {
1254
+ int i;
1255
+ char *t;
1256
+ if (RARRAY_LEN(rterm) < 1) {
1257
+ rb_raise(rb_eArgError, "Cannot add empty array to a "
1258
+ "PhraseQuery. You must add either a string or "
1259
+ "an array of strings");
1260
+ }
1261
+ t = StringValuePtr(RARRAY_PTR(rterm)[0]);
1262
+ frt_phq_add_term(q, t, pos_inc);
1263
+ for (i = 1; i < RARRAY_LEN(rterm); i++) {
1264
+ t = StringValuePtr(RARRAY_PTR(rterm)[i]);
1265
+ frt_phq_append_multi_term(q, t);
1266
+ }
1267
+ break;
1268
+ }
1269
+ default:
1270
+ rb_raise(rb_eArgError, "You can only add a string or an array of "
1271
+ "strings to a PhraseQuery, not a %s\n",
1272
+ rs2s(rb_obj_as_string(rterm)));
1273
+ }
1274
+ return self;
1275
+ }
1276
+
1277
+ /*
1278
+ * call-seq:
1279
+ * phrase_query.slop -> integer
1280
+ *
1281
+ * Return the slop set for this phrase query. See the PhraseQuery
1282
+ * description for more information on slop
1283
+ */
1284
+ static VALUE
1285
+ frb_phq_get_slop(VALUE self)
1286
+ {
1287
+ GET_Q();
1288
+ return INT2FIX(((FrtPhraseQuery *)q)->slop);
1289
+ }
1290
+
1291
+ /*
1292
+ * call-seq:
1293
+ * phrase_query.slop = slop -> slop
1294
+ *
1295
+ * Set the slop set for this phrase query. See the PhraseQuery description
1296
+ * for more information on slop
1297
+ */
1298
+ static VALUE
1299
+ frb_phq_set_slop(VALUE self, VALUE rslop)
1300
+ {
1301
+ GET_Q();
1302
+ ((FrtPhraseQuery *)q)->slop = FIX2INT(rslop);
1303
+ return self;
1304
+ }
1305
+
1306
+ /****************************************************************************
1307
+ *
1308
+ * PrefixQuery Methods
1309
+ *
1310
+ ****************************************************************************/
1311
+
1312
+ /*
1313
+ * call-seq:
1314
+ * PrefixQuery.new(field, prefix, options = {}) -> prefix-query
1315
+ *
1316
+ * Create a new PrefixQuery to search for all terms with the prefix +prefix+
1317
+ * in the field +field+. There is one option that you can set to change the
1318
+ * behaviour of this query. +:max_terms+ specifies the maximum number of
1319
+ * terms to be added to the query when it is expanded into a MultiTermQuery.
1320
+ * Let's say for example you search an index with a million terms for all
1321
+ * terms beginning with the letter "s". You would end up with a very large
1322
+ * query which would use a lot of memory and take a long time to get results,
1323
+ * not to mention that it would probably match every document in the index.
1324
+ * To prevent queries like this crashing your application you can set
1325
+ * +:max_terms+ which limits the number of terms that get added to the query.
1326
+ * By default it is set to 512.
1327
+ */
1328
+ static VALUE
1329
+ frb_prq_init(int argc, VALUE *argv, VALUE self)
1330
+ {
1331
+ return frb_mtq_init_specific(argc, argv, self, &frt_prefixq_new);
1332
+ }
1333
+
1334
+ /****************************************************************************
1335
+ *
1336
+ * WildcardQuery Methods
1337
+ *
1338
+ ****************************************************************************/
1339
+
1340
+ /*
1341
+ * call-seq:
1342
+ * WildcardQuery.new(field, pattern, options = {}) -> wild-card-query
1343
+ *
1344
+ * Create a new WildcardQuery to search for all terms where the pattern
1345
+ * +pattern+ matches in the field +field+.
1346
+ *
1347
+ * There is one option that you can set to change the behaviour of this
1348
+ * query. +:max_terms+ specifies the maximum number of terms to be added to
1349
+ * the query when it is expanded into a MultiTermQuery. Let's say for
1350
+ * example you have a million terms in your index and you let your users do
1351
+ * wild-card queries and one runs a search for "*". You would end up with a
1352
+ * very large query which would use a lot of memory and take a long time to
1353
+ * get results, not to mention that it would probably match every document in
1354
+ * the index. To prevent queries like this crashing your application you can
1355
+ * set +:max_terms+ which limits the number of terms that get added to the
1356
+ * query. By default it is set to 512.
1357
+ */
1358
+ static VALUE
1359
+ frb_wcq_init(int argc, VALUE *argv, VALUE self)
1360
+ {
1361
+ return frb_mtq_init_specific(argc, argv, self, &frt_wcq_new);
1362
+ }
1363
+
1364
+ /****************************************************************************
1365
+ *
1366
+ * FuzzyQuery Methods
1367
+ *
1368
+ ****************************************************************************/
1369
+
1370
+ /*
1371
+ * call-seq:
1372
+ * FuzzyQuery.new(field, term, options = {}) -> fuzzy-query
1373
+ *
1374
+ * Create a new FuzzyQuery that will match terms with a similarity of at
1375
+ * least +:min_similarity+ to +term+. Similarity is scored using the
1376
+ * Levenshtein edit distance formula. See
1377
+ * http://en.wikipedia.org/wiki/Levenshtein_distance
1378
+ *
1379
+ * If a +:prefix_length+ > 0 is specified, a common prefix of that length is
1380
+ * also required.
1381
+ *
1382
+ * You can also set +:max_terms+ to prevent memory overflow problems. By
1383
+ * default it is set to 512.
1384
+ *
1385
+ * == Example
1386
+ *
1387
+ * FuzzyQuery.new(:content, "levenshtein",
1388
+ * :min_similarity => 0.8,
1389
+ * :prefix_length => 5,
1390
+ * :max_terms => 1024)
1391
+ *
1392
+ * field:: field to search
1393
+ * term:: term to search for including it's close matches
1394
+ * :min_similarity:: Default: 0.5. minimum levenshtein distance score for a
1395
+ * match
1396
+ * :prefix_length:: Default: 0. minimum prefix_match before levenshtein
1397
+ * distance is measured. This parameter is used to improve
1398
+ * performance. With a +:prefix_length+ of 0, all terms in
1399
+ * the index must be checked which can be quite a
1400
+ * performance hit. By setting the prefix length to a
1401
+ * larger number you minimize the number of terms that need
1402
+ * to be checked. Even 1 will cut down the work by a
1403
+ * factor of about 26 depending on your character set and
1404
+ * the first letter.
1405
+ * :max_terms:: Limits the number of terms that can be added to the
1406
+ * query when it is expanded as a MultiTermQuery. This is
1407
+ * not usually a problem with FuzzyQueries unless you set
1408
+ * +:min_similarity+ to a very low value.
1409
+ */
1410
+ static VALUE
1411
+ frb_fq_init(int argc, VALUE *argv, VALUE self)
1412
+ {
1413
+ FrtQuery *q;
1414
+ VALUE rfield, rterm, roptions;
1415
+ float min_sim =
1416
+ (float)NUM2DBL(rb_cvar_get(cFuzzyQuery, id_default_min_similarity));
1417
+ int pre_len =
1418
+ FIX2INT(rb_cvar_get(cFuzzyQuery, id_default_prefix_length));
1419
+ int max_terms =
1420
+ FIX2INT(rb_cvar_get(cMultiTermQuery, id_default_max_terms));
1421
+
1422
+
1423
+ if (rb_scan_args(argc, argv, "21", &rfield, &rterm, &roptions) >= 3) {
1424
+ VALUE v;
1425
+ Check_Type(roptions, T_HASH);
1426
+ if (Qnil != (v = rb_hash_aref(roptions, sym_prefix_length))) {
1427
+ pre_len = FIX2INT(v);
1428
+ }
1429
+ if (Qnil != (v = rb_hash_aref(roptions, sym_min_similarity))) {
1430
+ min_sim = (float)NUM2DBL(v);
1431
+ }
1432
+ if (Qnil != (v = rb_hash_aref(roptions, sym_max_terms))) {
1433
+ max_terms = FIX2INT(v);
1434
+ }
1435
+ }
1436
+
1437
+ if (min_sim >= 1.0f) {
1438
+ rb_raise(rb_eArgError,
1439
+ "%f >= 1.0. :min_similarity must be < 1.0", min_sim);
1440
+ } else if (min_sim < 0.0f) {
1441
+ rb_raise(rb_eArgError,
1442
+ "%f < 0.0. :min_similarity must be > 0.0", min_sim);
1443
+ }
1444
+ if (pre_len < 0) {
1445
+ rb_raise(rb_eArgError,
1446
+ "%d < 0. :prefix_length must be >= 0", pre_len);
1447
+ }
1448
+ if (max_terms < 0) {
1449
+ rb_raise(rb_eArgError,
1450
+ "%d < 0. :max_terms must be >= 0", max_terms);
1451
+ }
1452
+
1453
+ q = frt_fuzq_new_conf(frb_field(rfield), StringValuePtr(rterm),
1454
+ min_sim, pre_len, max_terms);
1455
+ Frt_Wrap_Struct(self, NULL, &frb_q_free, q);
1456
+ object_add(q, self);
1457
+ return self;
1458
+ }
1459
+
1460
+ /*
1461
+ * call-seq:
1462
+ * FuzzyQuery.prefix_length -> prefix_length
1463
+ *
1464
+ * Get the +:prefix_length+ for the query.
1465
+ */
1466
+ static VALUE
1467
+ frb_fq_pre_len(VALUE self)
1468
+ {
1469
+ GET_Q();
1470
+ return INT2FIX(((FrtFuzzyQuery *)q)->pre_len);
1471
+ }
1472
+
1473
+ /*
1474
+ * call-seq:
1475
+ * FuzzyQuery.min_similarity -> min_similarity
1476
+ *
1477
+ * Get the +:min_similarity+ for the query.
1478
+ */
1479
+ static VALUE
1480
+ frb_fq_min_sim(VALUE self)
1481
+ {
1482
+ GET_Q();
1483
+ return rb_float_new((double)((FrtFuzzyQuery *)q)->min_sim);
1484
+ }
1485
+
1486
+ /*
1487
+ * call-seq:
1488
+ * FuzzyQuery.default_min_similarity -> number
1489
+ *
1490
+ * Get the default value for +:min_similarity+
1491
+ */
1492
+ static VALUE
1493
+ frb_fq_get_dms(VALUE self)
1494
+ {
1495
+ return rb_cvar_get(cFuzzyQuery, id_default_min_similarity);
1496
+ }
1497
+
1498
+ extern float frt_qp_default_fuzzy_min_sim;
1499
+ /*
1500
+ * call-seq:
1501
+ * FuzzyQuery.default_min_similarity = min_sim -> min_sim
1502
+ *
1503
+ * Set the default value for +:min_similarity+
1504
+ */
1505
+ static VALUE
1506
+ frb_fq_set_dms(VALUE self, VALUE val)
1507
+ {
1508
+ double min_sim = NUM2DBL(val);
1509
+ if (min_sim >= 1.0) {
1510
+ rb_raise(rb_eArgError,
1511
+ "%f >= 1.0. :min_similarity must be < 1.0", min_sim);
1512
+ } else if (min_sim < 0.0) {
1513
+ rb_raise(rb_eArgError,
1514
+ "%f < 0.0. :min_similarity must be > 0.0", min_sim);
1515
+ }
1516
+ frt_qp_default_fuzzy_min_sim = (float)min_sim;
1517
+ rb_cvar_set(cFuzzyQuery, id_default_min_similarity, val);
1518
+ return val;
1519
+ }
1520
+
1521
+ /*
1522
+ * call-seq:
1523
+ * FuzzyQuery.default_prefix_length -> number
1524
+ *
1525
+ * Get the default value for +:prefix_length+
1526
+ */
1527
+ static VALUE
1528
+ frb_fq_get_dpl(VALUE self)
1529
+ {
1530
+ return rb_cvar_get(cFuzzyQuery, id_default_prefix_length);
1531
+ }
1532
+
1533
+ extern int frt_qp_default_fuzzy_pre_len;
1534
+ /*
1535
+ * call-seq:
1536
+ * FuzzyQuery.default_prefix_length = prefix_length -> prefix_length
1537
+ *
1538
+ * Set the default value for +:prefix_length+
1539
+ */
1540
+ static VALUE
1541
+ frb_fq_set_dpl(VALUE self, VALUE val)
1542
+ {
1543
+ int pre_len = FIX2INT(val);
1544
+ if (pre_len < 0) {
1545
+ rb_raise(rb_eArgError,
1546
+ "%d < 0. :prefix_length must be >= 0", pre_len);
1547
+ }
1548
+ frt_qp_default_fuzzy_pre_len = pre_len;
1549
+ rb_cvar_set(cFuzzyQuery, id_default_prefix_length, val);
1550
+ return val;
1551
+ }
1552
+
1553
+
1554
+ /****************************************************************************
1555
+ *
1556
+ * MatchAllQuery Methods
1557
+ *
1558
+ ****************************************************************************/
1559
+
1560
+ static VALUE
1561
+ frb_maq_alloc(VALUE klass)
1562
+ {
1563
+ FrtQuery *q = frt_maq_new();
1564
+ VALUE self = Data_Wrap_Struct(klass, NULL, &frb_q_free, q);
1565
+ object_add(q, self);
1566
+ return self;
1567
+ }
1568
+
1569
+ /*
1570
+ * call-seq:
1571
+ * MatchAllQuery.new -> query
1572
+ *
1573
+ * Create a query which matches all documents.
1574
+ */
1575
+ static VALUE
1576
+ frb_maq_init(VALUE self)
1577
+ {
1578
+ return self;
1579
+ }
1580
+
1581
+ /****************************************************************************
1582
+ *
1583
+ * ConstantScoreQuery Methods
1584
+ *
1585
+ ****************************************************************************/
1586
+
1587
+ /*
1588
+ * call-seq:
1589
+ * ConstantScoreQuery.new(filter) -> query
1590
+ *
1591
+ * Create a ConstantScoreQuery which uses +filter+ to match documents giving
1592
+ * each document a constant score.
1593
+ */
1594
+ static VALUE
1595
+ frb_csq_init(VALUE self, VALUE rfilter)
1596
+ {
1597
+ FrtQuery *q;
1598
+ FrtFilter *filter;
1599
+ Data_Get_Struct(rfilter, FrtFilter, filter);
1600
+ q = frt_csq_new(filter);
1601
+
1602
+ Frt_Wrap_Struct(self, NULL, &frb_q_free, q);
1603
+ object_add(q, self);
1604
+ return self;
1605
+ }
1606
+
1607
+ /****************************************************************************
1608
+ *
1609
+ * FilteredQuery Methods
1610
+ *
1611
+ ****************************************************************************/
1612
+
1613
+ static void
1614
+ frb_fqq_mark(void *p)
1615
+ {
1616
+ FrtFilteredQuery *fq = (FrtFilteredQuery *)p;
1617
+ frb_gc_mark(fq->query);
1618
+ frb_gc_mark(fq->filter);
1619
+ }
1620
+
1621
+ /*
1622
+ * call-seq:
1623
+ * FilteredQuery.new(query, filter) -> query
1624
+ *
1625
+ * Create a new FilteredQuery which filters +query+ with +filter+.
1626
+ */
1627
+ static VALUE
1628
+ frb_fqq_init(VALUE self, VALUE rquery, VALUE rfilter)
1629
+ {
1630
+ FrtQuery *sq, *q;
1631
+ FrtFilter *f;
1632
+ Data_Get_Struct(rquery, FrtQuery, sq);
1633
+ Data_Get_Struct(rfilter, FrtFilter, f);
1634
+ q = frt_fq_new(sq, f);
1635
+ FRT_REF(sq);
1636
+ FRT_REF(f);
1637
+ Frt_Wrap_Struct(self, &frb_fqq_mark, &frb_q_free, q);
1638
+ object_add(q, self);
1639
+ return self;
1640
+ }
1641
+
1642
+ /****************************************************************************
1643
+ *
1644
+ * SpanTermQuery Methods
1645
+ *
1646
+ ****************************************************************************/
1647
+
1648
+ /*
1649
+ * call-seq:
1650
+ * SpanTermQuery.new(field, term) -> query
1651
+ *
1652
+ * Create a new SpanTermQuery which matches all documents with the term
1653
+ * +term+ in the field +field+.
1654
+ */
1655
+ static VALUE
1656
+ frb_spantq_init(VALUE self, VALUE rfield, VALUE rterm)
1657
+ {
1658
+ FrtQuery *q = frt_spantq_new(frb_field(rfield), StringValuePtr(rterm));
1659
+ Frt_Wrap_Struct(self, NULL, &frb_q_free, q);
1660
+ object_add(q, self);
1661
+ return self;
1662
+ }
1663
+
1664
+ /****************************************************************************
1665
+ *
1666
+ * SpanMultiTermQuery Methods
1667
+ *
1668
+ ****************************************************************************/
1669
+
1670
+ /*
1671
+ * call-seq:
1672
+ * SpanMultiTermQuery.new(field, terms) -> query
1673
+ *
1674
+ * Create a new SpanMultiTermQuery which matches all documents with the terms
1675
+ * +terms+ in the field +field+. +terms+ should be an array of Strings.
1676
+ */
1677
+ static VALUE
1678
+ frb_spanmtq_init(VALUE self, VALUE rfield, VALUE rterms)
1679
+ {
1680
+ FrtQuery *q = frt_spanmtq_new(frb_field(rfield));
1681
+ int i;
1682
+ for (i = RARRAY_LEN(rterms) - 1; i >= 0; i--) {
1683
+ frt_spanmtq_add_term(q, StringValuePtr(RARRAY_PTR(rterms)[i]));
1684
+ }
1685
+ Frt_Wrap_Struct(self, NULL, &frb_q_free, q);
1686
+ object_add(q, self);
1687
+ return self;
1688
+ }
1689
+
1690
+ /****************************************************************************
1691
+ *
1692
+ * SpanPrefixQuery Methods
1693
+ *
1694
+ ****************************************************************************/
1695
+
1696
+ /*
1697
+ * call-seq:
1698
+ * SpanPrefixQuery.new(field, prefix, max_terms = 256) -> query
1699
+ *
1700
+ * Create a new SpanPrefixQuery which matches all documents with the prefix
1701
+ * +prefix+ in the field +field+.
1702
+ */
1703
+ static VALUE
1704
+ frb_spanprq_init(int argc, VALUE *argv, VALUE self)
1705
+ {
1706
+ VALUE rfield, rprefix, rmax_terms;
1707
+ int max_terms = FRT_SPAN_PREFIX_QUERY_MAX_TERMS;
1708
+ FrtQuery *q;
1709
+ if (rb_scan_args(argc, argv, "21", &rfield, &rprefix, &rmax_terms) == 3) {
1710
+ max_terms = FIX2INT(rmax_terms);
1711
+ }
1712
+ q = frt_spanprq_new(frb_field(rfield), StringValuePtr(rprefix));
1713
+ ((FrtSpanPrefixQuery *)q)->max_terms = max_terms;
1714
+ Frt_Wrap_Struct(self, NULL, &frb_q_free, q);
1715
+ object_add(q, self);
1716
+ return self;
1717
+ }
1718
+
1719
+ /****************************************************************************
1720
+ *
1721
+ * SpanFirstQuery Methods
1722
+ *
1723
+ ****************************************************************************/
1724
+
1725
+ /*
1726
+ * call-seq:
1727
+ * SpanFirstQuery.new(span_query, end) -> query
1728
+ *
1729
+ * Create a new SpanFirstQuery which matches all documents where +span_query+
1730
+ * matches before +end+ where +end+ is a byte-offset from the start of the
1731
+ * field
1732
+ */
1733
+ static VALUE
1734
+ frb_spanfq_init(VALUE self, VALUE rmatch, VALUE rend)
1735
+ {
1736
+ FrtQuery *q;
1737
+ FrtQuery *match;
1738
+ Data_Get_Struct(rmatch, FrtQuery, match);
1739
+ q = frt_spanfq_new(match, FIX2INT(rend));
1740
+ Frt_Wrap_Struct(self, NULL, &frb_q_free, q);
1741
+ object_add(q, self);
1742
+ return self;
1743
+ }
1744
+
1745
+ /****************************************************************************
1746
+ *
1747
+ * SpanNearQuery Methods
1748
+ *
1749
+ ****************************************************************************/
1750
+
1751
+ static void
1752
+ frb_spannq_mark(void *p)
1753
+ {
1754
+ int i;
1755
+ FrtSpanNearQuery *snq = (FrtSpanNearQuery *)p;
1756
+ for (i = 0; i < snq->c_cnt; i++) {
1757
+ frb_gc_mark(snq->clauses[i]);
1758
+ }
1759
+ }
1760
+
1761
+ /*
1762
+ * call-seq:
1763
+ * SpanNearQuery.new(options = {}) -> query
1764
+ *
1765
+ * Create a new SpanNearQuery. You can add an array of clauses with the
1766
+ * +:clause+ parameter or you can add clauses individually using the
1767
+ * SpanNearQuery#add method.
1768
+ *
1769
+ * query = SpanNearQuery.new(:clauses => [spanq1, spanq2, spanq3])
1770
+ * # is equivalent to
1771
+ * query = SpanNearQuery.new()
1772
+ * query << spanq1 << spanq2 << spanq3
1773
+ *
1774
+ * You have two other options which you can set.
1775
+ *
1776
+ * :slop:: Default: 0. Works exactly like a PhraseQuery slop. It is the
1777
+ * amount of slop allowed in the match (the term edit distance
1778
+ * allowed in the match).
1779
+ * :in_order:: Default: false. Specifies whether or not the matches have to
1780
+ * occur in the order they were added to the query. When slop is
1781
+ * set to 0, this parameter will make no difference.
1782
+ */
1783
+ static VALUE
1784
+ frb_spannq_init(int argc, VALUE *argv, VALUE self)
1785
+ {
1786
+ FrtQuery *q;
1787
+ VALUE roptions;
1788
+ int slop = 0;
1789
+ bool in_order = false;
1790
+
1791
+ if (rb_scan_args(argc, argv, "01", &roptions) > 0) {
1792
+ VALUE v;
1793
+ if (Qnil != (v = rb_hash_aref(roptions, sym_slop))) {
1794
+ slop = FIX2INT(v);
1795
+ }
1796
+ if (Qnil != (v = rb_hash_aref(roptions, sym_in_order))) {
1797
+ in_order = RTEST(v);
1798
+ }
1799
+ }
1800
+ q = frt_spannq_new(slop, in_order);
1801
+ if (argc > 0) {
1802
+ VALUE v;
1803
+ if (Qnil != (v = rb_hash_aref(roptions, sym_clauses))) {
1804
+ int i;
1805
+ FrtQuery *clause;
1806
+ Check_Type(v, T_ARRAY);
1807
+ for (i = 0; i < RARRAY_LEN(v); i++) {
1808
+ Data_Get_Struct(RARRAY_PTR(v)[i], FrtQuery, clause);
1809
+ frt_spannq_add_clause(q, clause);
1810
+ }
1811
+ }
1812
+ }
1813
+
1814
+ Frt_Wrap_Struct(self, &frb_spannq_mark, &frb_q_free, q);
1815
+ object_add(q, self);
1816
+ return self;
1817
+ }
1818
+
1819
+ /*
1820
+ * call-seq:
1821
+ * query.add(span_query) -> self
1822
+ * query << span_query -> self
1823
+ *
1824
+ * Add a clause to the SpanNearQuery. Clauses are stored in the order they
1825
+ * are added to the query which is important for matching. Note that clauses
1826
+ * must be SpanQueries, not other types of query.
1827
+ */
1828
+ static VALUE
1829
+ frb_spannq_add(VALUE self, VALUE rclause)
1830
+ {
1831
+ GET_Q();
1832
+ FrtQuery *clause;
1833
+ Data_Get_Struct(rclause, FrtQuery, clause);
1834
+ frt_spannq_add_clause(q, clause);
1835
+ return self;
1836
+ }
1837
+
1838
+ /****************************************************************************
1839
+ *
1840
+ * SpanOrQuery Methods
1841
+ *
1842
+ ****************************************************************************/
1843
+
1844
+ static void
1845
+ frb_spanoq_mark(void *p)
1846
+ {
1847
+ int i;
1848
+ FrtSpanOrQuery *soq = (FrtSpanOrQuery *)p;
1849
+ for (i = 0; i < soq->c_cnt; i++) {
1850
+ frb_gc_mark(soq->clauses[i]);
1851
+ }
1852
+ }
1853
+
1854
+ /*
1855
+ * call-seq:
1856
+ * SpanOrQuery.new(options = {}) -> query
1857
+ *
1858
+ * Create a new SpanOrQuery. This is just like a BooleanQuery with all
1859
+ * clauses with the occur value of :should. The difference is that it can be
1860
+ * passed to other SpanQuerys like SpanNearQuery.
1861
+ */
1862
+ static VALUE
1863
+ frb_spanoq_init(int argc, VALUE *argv, VALUE self)
1864
+ {
1865
+ FrtQuery *q;
1866
+ VALUE rclauses;
1867
+
1868
+ q = frt_spanoq_new();
1869
+ if (rb_scan_args(argc, argv, "01", &rclauses) > 0) {
1870
+ int i;
1871
+ FrtQuery *clause;
1872
+ Check_Type(rclauses, T_ARRAY);
1873
+ for (i = 0; i < RARRAY_LEN(rclauses); i++) {
1874
+ Data_Get_Struct(RARRAY_PTR(rclauses)[i], FrtQuery, clause);
1875
+ frt_spanoq_add_clause(q, clause);
1876
+ }
1877
+ }
1878
+ Frt_Wrap_Struct(self, &frb_spanoq_mark, &frb_q_free, q);
1879
+ object_add(q, self);
1880
+ return self;
1881
+ }
1882
+
1883
+ /*
1884
+ * call-seq:
1885
+ * query.add(span_query) -> self
1886
+ * query << span_query -> self
1887
+ *
1888
+ * Add a clause to the SpanOrQuery. Note that clauses must be SpanQueries,
1889
+ * not other types of query.
1890
+ */
1891
+ static VALUE
1892
+ frb_spanoq_add(VALUE self, VALUE rclause)
1893
+ {
1894
+ GET_Q();
1895
+ FrtQuery *clause;
1896
+ Data_Get_Struct(rclause, FrtQuery, clause);
1897
+ frt_spanoq_add_clause(q, clause);
1898
+ return self;
1899
+ }
1900
+
1901
+ /****************************************************************************
1902
+ *
1903
+ * SpanNotQuery Methods
1904
+ *
1905
+ ****************************************************************************/
1906
+
1907
+ static void
1908
+ frb_spanxq_mark(void *p)
1909
+ {
1910
+ FrtSpanNotQuery *sxq = (FrtSpanNotQuery *)p;
1911
+ frb_gc_mark(sxq->inc);
1912
+ frb_gc_mark(sxq->exc);
1913
+ }
1914
+
1915
+ /*
1916
+ * call-seq:
1917
+ * SpanNotQuery.new(include_query, exclude_query) -> query
1918
+ *
1919
+ * Create a new SpanNotQuery which matches all documents which match
1920
+ * +include_query+ and don't match +exclude_query+.
1921
+ */
1922
+ static VALUE
1923
+ frb_spanxq_init(VALUE self, VALUE rinc, VALUE rexc)
1924
+ {
1925
+ FrtQuery *q;
1926
+ Check_Type(rinc, T_DATA);
1927
+ Check_Type(rexc, T_DATA);
1928
+ q = frt_spanxq_new(DATA_PTR(rinc), DATA_PTR(rexc));
1929
+ Frt_Wrap_Struct(self, &frb_spanxq_mark, &frb_q_free, q);
1930
+ object_add(q, self);
1931
+ return self;
1932
+ }
1933
+
1934
+ /****************************************************************************
1935
+ *
1936
+ * Filter Methods
1937
+ *
1938
+ ****************************************************************************/
1939
+
1940
+ static void
1941
+ frb_f_free(void *p)
1942
+ {
1943
+ object_del(p);
1944
+ frt_filt_deref((FrtFilter *)p);
1945
+ }
1946
+
1947
+ #define GET_F() FrtFilter *f = (FrtFilter *)DATA_PTR(self)
1948
+
1949
+ /*
1950
+ * call-seq:
1951
+ * filter.to_s -> string
1952
+ *
1953
+ * Return a human readable string representing the Filter object that the
1954
+ * method was called on.
1955
+ */
1956
+ static VALUE
1957
+ frb_f_to_s(VALUE self)
1958
+ {
1959
+ VALUE rstr;
1960
+ char *str;
1961
+ GET_F();
1962
+ str = f->to_s(f);
1963
+ rstr = rb_str_new2(str);
1964
+ free(str);
1965
+ return rstr;
1966
+ }
1967
+
1968
+ extern VALUE frb_get_bv(FrtBitVector *bv);
1969
+
1970
+ /*
1971
+ * call-seq:
1972
+ * filter.bits(index_reader) -> bit_vector
1973
+ *
1974
+ * Get the bit_vector used by this filter. This method will usually be used
1975
+ * to group filters or apply filters to other filters.
1976
+ */
1977
+ static VALUE
1978
+ frb_f_get_bits(VALUE self, VALUE rindex_reader)
1979
+ {
1980
+ FrtBitVector *bv;
1981
+ FrtIndexReader *ir;
1982
+ GET_F();
1983
+ Data_Get_Struct(rindex_reader, FrtIndexReader, ir);
1984
+ bv = frt_filt_get_bv(f, ir);
1985
+ return frb_get_bv(bv);
1986
+ }
1987
+
1988
+ /****************************************************************************
1989
+ *
1990
+ * RangeFilter Methods
1991
+ *
1992
+ ****************************************************************************/
1993
+
1994
+
1995
+ /*
1996
+ * call-seq:
1997
+ * RangeFilter.new(field, options = {}) -> range_query
1998
+ *
1999
+ * Create a new RangeFilter on field +field+. There are two ways to build a
2000
+ * range filter. With the old-style options; +:lower+, +:upper+,
2001
+ * +:include_lower+ and +:include_upper+ or the new style options; +:<+,
2002
+ * +:<=+, +:>+ and +:>=+. The options' names should speak for themselves.
2003
+ * In the old-style options, limits are inclusive by default.
2004
+ *
2005
+ * == Examples
2006
+ *
2007
+ * f = RangeFilter.new(:date, :lower => "200501", :include_lower => false)
2008
+ * # is equivalent to
2009
+ * f = RangeFilter.new(:date, :< => "200501")
2010
+ * # is equivalent to
2011
+ * f = RangeFilter.new(:date, :lower_exclusive => "200501")
2012
+ *
2013
+ * f = RangeFilter.new(:date, :lower => "200501", :upper => 200502)
2014
+ * # is equivalent to
2015
+ * f = RangeFilter.new(:date, :>= => "200501", :<= => 200502)
2016
+ */
2017
+ static VALUE
2018
+ frb_rf_init(VALUE self, VALUE rfield, VALUE roptions)
2019
+ {
2020
+ FrtFilter *f;
2021
+ char *lterm = NULL;
2022
+ char *uterm = NULL;
2023
+ bool include_lower = false;
2024
+ bool include_upper = false;
2025
+ int ex_code = 0;
2026
+ const char *msg = NULL;
2027
+ get_range_params(roptions, &lterm, &uterm, &include_lower, &include_upper);
2028
+ FRT_TRY
2029
+ f = frt_rfilt_new(frb_field(rfield), lterm, uterm, include_lower, include_upper);
2030
+ break;
2031
+ default:
2032
+ ex_code = xcontext.excode;
2033
+ msg = xcontext.msg;
2034
+ FRT_HANDLED();
2035
+ FRT_XENDTRY
2036
+
2037
+ if (ex_code && msg) { frb_raise(ex_code, msg); }
2038
+
2039
+ Frt_Wrap_Struct(self, NULL, &frb_f_free, f);
2040
+ object_add(f, self);
2041
+ return self;
2042
+ }
2043
+
2044
+ /****************************************************************************
2045
+ *
2046
+ * TypedRangeFilter Methods
2047
+ *
2048
+ ****************************************************************************/
2049
+
2050
+
2051
+ /*
2052
+ * call-seq:
2053
+ * TypedRangeFilter.new(field, options = {}) -> range_query
2054
+ *
2055
+ * Create a new TypedRangeFilter on field +field+. There are two ways to
2056
+ * build a range filter. With the old-style options; +:lower+, +:upper+,
2057
+ * +:include_lower+ and +:include_upper+ or the new style options; +:<+,
2058
+ * +:<=+, +:>+ and +:>=+. The options' names should speak for themselves.
2059
+ * In the old-style options, limits are inclusive by default.
2060
+ *
2061
+ * == Examples
2062
+ *
2063
+ * f = TypedRangeFilter.new(:date, :lower => "0.1", :include_lower => false)
2064
+ * # is equivalent to
2065
+ * f = TypedRangeFilter.new(:date, :< => "0.1")
2066
+ * # is equivalent to
2067
+ * f = TypedRangeFilter.new(:date, :lower_exclusive => "0.1")
2068
+ *
2069
+ * # Note that you numbers can be strings or actual numbers
2070
+ * f = TypedRangeFilter.new(:date, :lower => "-132.2", :upper => -1.4)
2071
+ * # is equivalent to
2072
+ * f = TypedRangeFilter.new(:date, :>= => "-132.2", :<= => -1.4)
2073
+ */
2074
+ static VALUE
2075
+ frb_trf_init(VALUE self, VALUE rfield, VALUE roptions)
2076
+ {
2077
+ FrtFilter *f;
2078
+ char *lterm = NULL;
2079
+ char *uterm = NULL;
2080
+ bool include_lower = false;
2081
+ bool include_upper = false;
2082
+
2083
+ get_range_params(roptions, &lterm, &uterm, &include_lower, &include_upper);
2084
+ f = frt_trfilt_new(frb_field(rfield), lterm, uterm,
2085
+ include_lower, include_upper);
2086
+ Frt_Wrap_Struct(self, NULL, &frb_f_free, f);
2087
+ object_add(f, self);
2088
+ return self;
2089
+ }
2090
+
2091
+ /****************************************************************************
2092
+ *
2093
+ * QueryFilter Methods
2094
+ *
2095
+ ****************************************************************************/
2096
+
2097
+ /*
2098
+ * call-seq:
2099
+ * QueryFilter.new(query) -> filter
2100
+ *
2101
+ * Create a new QueryFilter which applies the query +query+.
2102
+ */
2103
+ static VALUE
2104
+ frb_qf_init(VALUE self, VALUE rquery)
2105
+ {
2106
+ FrtQuery *q;
2107
+ FrtFilter *f;
2108
+ Data_Get_Struct(rquery, FrtQuery, q);
2109
+ f = frt_qfilt_new(q);
2110
+ Frt_Wrap_Struct(self, NULL, &frb_f_free, f);
2111
+ object_add(f, self);
2112
+ return self;
2113
+ }
2114
+
2115
+ /****************************************************************************
2116
+ *
2117
+ * SortField Methods
2118
+ *
2119
+ ****************************************************************************/
2120
+
2121
+ static void
2122
+ frb_sf_free(void *p)
2123
+ {
2124
+ object_del(p);
2125
+ frt_sort_field_destroy((FrtSortField *)p);
2126
+ }
2127
+
2128
+ static VALUE
2129
+ frb_get_sf(FrtSortField *sf)
2130
+ {
2131
+ VALUE self = object_get(sf);
2132
+ if (self == Qnil) {
2133
+ self = Data_Wrap_Struct(cSortField, NULL, &frb_sf_free, sf);
2134
+ object_add(sf, self);
2135
+ }
2136
+ return self;
2137
+ }
2138
+
2139
+ static int
2140
+ get_sort_type(VALUE rtype)
2141
+ {
2142
+ Check_Type(rtype, T_SYMBOL);
2143
+ if (rtype == sym_byte) {
2144
+ return FRT_SORT_TYPE_BYTE;
2145
+ } else if (rtype == sym_integer) {
2146
+ return FRT_SORT_TYPE_INTEGER;
2147
+ } else if (rtype == sym_string) {
2148
+ return FRT_SORT_TYPE_STRING;
2149
+ } else if (rtype == sym_score) {
2150
+ return FRT_SORT_TYPE_SCORE;
2151
+ } else if (rtype == sym_doc_id) {
2152
+ return FRT_SORT_TYPE_DOC;
2153
+ } else if (rtype == sym_float) {
2154
+ return FRT_SORT_TYPE_FLOAT;
2155
+ } else if (rtype == sym_auto) {
2156
+ return FRT_SORT_TYPE_AUTO;
2157
+ } else {
2158
+ rb_raise(rb_eArgError, ":%s is an unknown sort-type. Please choose "
2159
+ "from [:integer, :float, :string, :auto, :score, :doc_id]",
2160
+ rb_id2name(SYM2ID(rtype)));
2161
+ }
2162
+ return FRT_SORT_TYPE_DOC;
2163
+ }
2164
+
2165
+ /*
2166
+ * call-seq:
2167
+ * SortField.new(field, options = {}) -> sort_field
2168
+ *
2169
+ * Create a new SortField which can be used to sort the result-set by the
2170
+ * value in field +field+.
2171
+ *
2172
+ * === Options
2173
+ *
2174
+ * :type:: Default: +:auto+. Specifies how a field should be sorted.
2175
+ * Choose from one of; +:auto+, +:integer+, +:float+,
2176
+ * +:string+, +:byte+, +:doc_id+ or +:score+. +:auto+ will
2177
+ * check the datatype of the field by trying to parse it into
2178
+ * either a number or a float before settling on a string
2179
+ * sort. String sort is locale dependent and works for
2180
+ * multibyte character sets like UTF-8 if you have your
2181
+ * locale set correctly.
2182
+ * :reverse Default: false. Set to true if you want to reverse the
2183
+ * sort.
2184
+ */
2185
+ static VALUE
2186
+ frb_sf_init(int argc, VALUE *argv, VALUE self)
2187
+ {
2188
+ FrtSortField *sf;
2189
+ VALUE rfield, roptions;
2190
+ VALUE rval;
2191
+ int type = FRT_SORT_TYPE_AUTO;
2192
+ int is_reverse = false;
2193
+ FrtSymbol field;
2194
+
2195
+ if (rb_scan_args(argc, argv, "11", &rfield, &roptions) == 2) {
2196
+ if (Qnil != (rval = rb_hash_aref(roptions, sym_type))) {
2197
+ type = get_sort_type(rval);
2198
+ }
2199
+ if (Qnil != (rval = rb_hash_aref(roptions, sym_reverse))) {
2200
+ is_reverse = RTEST(rval);
2201
+ }
2202
+ if (Qnil != (rval = rb_hash_aref(roptions, sym_comparator))) {
2203
+ rb_raise(rb_eArgError, "Unsupported argument ':comparator'");
2204
+ }
2205
+ }
2206
+ if (NIL_P(rfield)) rb_raise(rb_eArgError, "must pass a valid field name");
2207
+ field = frb_field(rfield);
2208
+
2209
+ sf = frt_sort_field_new(field, type, is_reverse);
2210
+ if (sf->field == (FrtSymbol)NULL) {
2211
+ sf->field = field;
2212
+ }
2213
+
2214
+ Frt_Wrap_Struct(self, NULL, &frb_sf_free, sf);
2215
+ object_add(sf, self);
2216
+ return self;
2217
+ }
2218
+
2219
+ #define GET_SF() FrtSortField *sf = (FrtSortField *)DATA_PTR(self)
2220
+
2221
+ /*
2222
+ * call-seq:
2223
+ * sort_field.reverse? -> bool
2224
+ *
2225
+ * Return true if the field is to be reverse sorted. This attribute is set
2226
+ * when you create the sort_field.
2227
+ */
2228
+ static VALUE
2229
+ frb_sf_is_reverse(VALUE self)
2230
+ {
2231
+ GET_SF();
2232
+ return sf->reverse ? Qtrue : Qfalse;
2233
+ }
2234
+
2235
+ /*
2236
+ * call-seq:
2237
+ * sort_field.name -> symbol
2238
+ *
2239
+ * Returns the name of the field to be sorted.
2240
+ */
2241
+ static VALUE
2242
+ frb_sf_get_name(VALUE self)
2243
+ {
2244
+ GET_SF();
2245
+ return sf->field ? ID2SYM(sf->field) : Qnil;
2246
+ }
2247
+
2248
+ /*
2249
+ * call-seq:
2250
+ * sort_field.type -> symbol
2251
+ *
2252
+ * Return the type of sort. Should be one of; +:auto+, +:integer+, +:float+,
2253
+ * +:string+, +:byte+, +:doc_id+ or +:score+.
2254
+ */
2255
+ static VALUE
2256
+ frb_sf_get_type(VALUE self)
2257
+ {
2258
+ GET_SF();
2259
+ switch (sf->type) {
2260
+ case FRT_SORT_TYPE_BYTE: return sym_byte;
2261
+ case FRT_SORT_TYPE_INTEGER: return sym_integer;
2262
+ case FRT_SORT_TYPE_FLOAT: return sym_float;
2263
+ case FRT_SORT_TYPE_STRING: return sym_string;
2264
+ case FRT_SORT_TYPE_AUTO: return sym_auto;
2265
+ case FRT_SORT_TYPE_DOC: return sym_doc_id;
2266
+ case FRT_SORT_TYPE_SCORE: return sym_score;
2267
+ }
2268
+ return Qnil;
2269
+ }
2270
+
2271
+ /*
2272
+ * call-seq:
2273
+ * sort_field.comparator -> symbol
2274
+ *
2275
+ * TODO: currently unsupported
2276
+ */
2277
+ static VALUE
2278
+ frb_sf_get_comparator(VALUE self)
2279
+ {
2280
+ return Qnil;
2281
+ }
2282
+
2283
+ /*
2284
+ * call-seq:
2285
+ * sort_field.to_s -> string
2286
+ *
2287
+ * Return a human readable string describing this +sort_field+.
2288
+ */
2289
+ static VALUE
2290
+ frb_sf_to_s(VALUE self)
2291
+ {
2292
+ GET_SF();
2293
+ char *str = frt_sort_field_to_s(sf);
2294
+ VALUE rstr = rb_str_new2(str);
2295
+ free(str);
2296
+ return rstr;
2297
+ }
2298
+
2299
+ /****************************************************************************
2300
+ *
2301
+ * Sort Methods
2302
+ *
2303
+ ****************************************************************************/
2304
+
2305
+ static void
2306
+ frb_sort_free(void *p)
2307
+ {
2308
+ FrtSort *sort = (FrtSort *)p;
2309
+ object_del(sort);
2310
+ frt_sort_destroy(sort);
2311
+ }
2312
+
2313
+ static void
2314
+ frb_sort_mark(void *p)
2315
+ {
2316
+ FrtSort *sort = (FrtSort *)p;
2317
+ int i;
2318
+ for (i = 0; i < sort->size; i++) {
2319
+ frb_gc_mark(sort->sort_fields[i]);
2320
+ }
2321
+ }
2322
+
2323
+ static VALUE
2324
+ frb_sort_alloc(VALUE klass)
2325
+ {
2326
+ VALUE self;
2327
+ FrtSort *sort = frt_sort_new();
2328
+ sort->destroy_all = false;
2329
+ self = Data_Wrap_Struct(klass, &frb_sort_mark, &frb_sort_free, sort);
2330
+ object_add(sort, self);
2331
+ return self;
2332
+ }
2333
+
2334
+ static void
2335
+ frb_parse_sort_str(FrtSort *sort, char *xsort_str)
2336
+ {
2337
+ FrtSortField *sf;
2338
+ char *comma, *end, *e, *s;
2339
+ const int len = strlen(xsort_str);
2340
+ char *sort_str = FRT_ALLOC_N(char, len + 2);
2341
+ strcpy(sort_str, xsort_str);
2342
+
2343
+ end = &sort_str[len];
2344
+
2345
+ s = sort_str;
2346
+
2347
+ while ((s < end)
2348
+ && (NULL != (comma = strchr(s, ',')) || (NULL != (comma = end)))) {
2349
+ bool reverse = false;
2350
+ /* strip spaces */
2351
+ e = comma;
2352
+ while ((isspace(*s) || *s == ':') && s < e) s++;
2353
+ while (isspace(e[-1]) && s < e) e--;
2354
+ *e = '\0';
2355
+ if (e > (s + 4) && strcmp("DESC", &e[-4]) == 0) {
2356
+ reverse = true;
2357
+ e -= 4;
2358
+ while (isspace(e[-1]) && s < e) e--;
2359
+ }
2360
+ *e = '\0';
2361
+
2362
+ if (strcmp("SCORE", s) == 0) {
2363
+ sf = frt_sort_field_score_new(reverse);
2364
+ } else if (strcmp("DOC_ID", s) == 0) {
2365
+ sf = frt_sort_field_doc_new(reverse);
2366
+ } else {
2367
+ sf = frt_sort_field_auto_new(rb_intern(s), reverse);
2368
+ }
2369
+ frb_get_sf(sf);
2370
+ frt_sort_add_sort_field(sort, sf);
2371
+ s = comma + 1;
2372
+ }
2373
+ free(sort_str);
2374
+ }
2375
+
2376
+ static void
2377
+ frb_sort_add(FrtSort *sort, VALUE rsf, bool reverse)
2378
+ {
2379
+ FrtSortField *sf;
2380
+ switch (TYPE(rsf)) {
2381
+ case T_DATA:
2382
+ Data_Get_Struct(rsf, FrtSortField, sf);
2383
+ if (reverse) sf->reverse = !sf->reverse;
2384
+ frt_sort_add_sort_field(sort, sf);
2385
+ break;
2386
+ case T_SYMBOL:
2387
+ sf = frt_sort_field_auto_new(frb_field(rsf), reverse);
2388
+ /* need to give it a ruby object so it'll be freed when the
2389
+ * sort is garbage collected */
2390
+ rsf = frb_get_sf(sf);
2391
+ frt_sort_add_sort_field(sort, sf);
2392
+ break;
2393
+ case T_STRING:
2394
+ frb_parse_sort_str(sort, rs2s(rsf));
2395
+ break;
2396
+ default:
2397
+ rb_raise(rb_eArgError, "Unknown SortField Type");
2398
+ break;
2399
+ }
2400
+ }
2401
+
2402
+ #define GET_SORT() FrtSort *sort = (FrtSort *)DATA_PTR(self)
2403
+ /*
2404
+ * call-seq:
2405
+ * Sort.new(sort_fields = [SortField::SCORE, SortField::DOC_ID], reverse = false) -> Sort
2406
+ *
2407
+ * Create a new Sort object. If +reverse+ is true, all sort_fields will be
2408
+ * reversed so if any of them are already reversed the will be turned back
2409
+ * to their natural order again. By default
2410
+ */
2411
+ static VALUE
2412
+ frb_sort_init(int argc, VALUE *argv, VALUE self)
2413
+ {
2414
+ int i;
2415
+ VALUE rfields, rreverse;
2416
+ bool reverse = false;
2417
+ bool has_sfd = false;
2418
+ GET_SORT();
2419
+ switch (rb_scan_args(argc, argv, "02", &rfields, &rreverse)) {
2420
+ case 2: reverse = RTEST(rreverse);
2421
+ case 1:
2422
+ if (TYPE(rfields) == T_ARRAY) {
2423
+ int i;
2424
+ for (i = 0; i < RARRAY_LEN(rfields); i++) {
2425
+ frb_sort_add(sort, RARRAY_PTR(rfields)[i], reverse);
2426
+ }
2427
+ } else {
2428
+ frb_sort_add(sort, rfields, reverse);
2429
+ }
2430
+ for (i = 0; i < sort->size; i++) {
2431
+ if (sort->sort_fields[i] == &FRT_SORT_FIELD_DOC) has_sfd = true;
2432
+ }
2433
+ if (!has_sfd) {
2434
+ frt_sort_add_sort_field(sort, (FrtSortField *)&FRT_SORT_FIELD_DOC);
2435
+ }
2436
+ break;
2437
+ case 0:
2438
+ frt_sort_add_sort_field(sort, (FrtSortField *)&FRT_SORT_FIELD_SCORE);
2439
+ frt_sort_add_sort_field(sort, (FrtSortField *)&FRT_SORT_FIELD_DOC);
2440
+ }
2441
+
2442
+ return self;
2443
+ }
2444
+
2445
+ /*
2446
+ * call-seq:
2447
+ * sort.fields -> Array
2448
+ *
2449
+ * Returns an array of the SortFields held by the Sort object.
2450
+ */
2451
+ static VALUE
2452
+ frb_sort_get_fields(VALUE self)
2453
+ {
2454
+ GET_SORT();
2455
+ VALUE rfields = rb_ary_new2(sort->size);
2456
+ int i;
2457
+ for (i = 0; i < sort->size; i++) {
2458
+ rb_ary_store(rfields, i, object_get(sort->sort_fields[i]));
2459
+ }
2460
+ return rfields;
2461
+ }
2462
+
2463
+
2464
+ /*
2465
+ * call-seq:
2466
+ * sort.to_s -> string
2467
+ *
2468
+ * Returns a human readable string representing the sort object.
2469
+ */
2470
+ static VALUE
2471
+ frb_sort_to_s(VALUE self)
2472
+ {
2473
+ GET_SORT();
2474
+ char *str = frt_sort_to_s(sort);
2475
+ VALUE rstr = rb_str_new2(str);
2476
+ free(str);
2477
+ return rstr;
2478
+ }
2479
+
2480
+ /****************************************************************************
2481
+ *
2482
+ * Searcher Methods
2483
+ *
2484
+ ****************************************************************************/
2485
+
2486
+ static void
2487
+ frb_sea_free(void *p)
2488
+ {
2489
+ FrtSearcher *sea = (FrtSearcher *)p;
2490
+ object_del(sea);
2491
+ sea->close(sea);
2492
+ }
2493
+
2494
+ #define GET_SEA() FrtSearcher *sea = (FrtSearcher *)DATA_PTR(self)
2495
+
2496
+ /*
2497
+ * call-seq:
2498
+ * searcher.close -> nil
2499
+ *
2500
+ * Close the searcher. The garbage collector will do this for you or you can
2501
+ * call this method explicitly.
2502
+ */
2503
+ static VALUE
2504
+ frb_sea_close(VALUE self)
2505
+ {
2506
+ GET_SEA();
2507
+ Frt_Unwrap_Struct(self);
2508
+ object_del(sea);
2509
+ sea->close(sea);
2510
+ return Qnil;
2511
+ }
2512
+
2513
+ /*
2514
+ * call-seq:
2515
+ * searcher.reader -> IndexReader
2516
+ *
2517
+ * Return the IndexReader wrapped by this searcher.
2518
+ */
2519
+ static VALUE
2520
+ frb_sea_get_reader(VALUE self)
2521
+ {
2522
+ GET_SEA();
2523
+ return object_get(((FrtIndexSearcher *)sea)->ir);
2524
+ }
2525
+
2526
+ /*
2527
+ * call-seq:
2528
+ * searcher.doc_freq(field, term) -> integer
2529
+ *
2530
+ * Return the number of documents in which the term +term+ appears in the
2531
+ * field +field+.
2532
+ */
2533
+ static VALUE
2534
+ frb_sea_doc_freq(VALUE self, VALUE rfield, VALUE rterm)
2535
+ {
2536
+ GET_SEA();
2537
+ return INT2FIX(sea->doc_freq(sea,
2538
+ frb_field(rfield),
2539
+ StringValuePtr(rterm)));
2540
+ }
2541
+
2542
+ /*
2543
+ * call-seq:
2544
+ * searcher.get_document(doc_id) -> LazyDoc
2545
+ * searcher[doc_id] -> LazyDoc
2546
+ *
2547
+ * Retrieve a document from the index. See LazyDoc for more details on the
2548
+ * document returned. Documents are referenced internally by document ids
2549
+ * which are returned by the Searchers search methods.
2550
+ */
2551
+ static VALUE
2552
+ frb_sea_doc(VALUE self, VALUE rdoc_id)
2553
+ {
2554
+ GET_SEA();
2555
+ return frb_get_lazy_doc(sea->get_lazy_doc(sea, FIX2INT(rdoc_id)));
2556
+ }
2557
+
2558
+ /*
2559
+ * call-seq:
2560
+ * searcher.max_doc -> number
2561
+ *
2562
+ * Returns 1 + the maximum document id in the index. It is the
2563
+ * document_id that will be used by the next document added to the index. If
2564
+ * there are no deletions, this number also refers to the number of documents
2565
+ * in the index.
2566
+ */
2567
+ static VALUE
2568
+ frb_sea_max_doc(VALUE self)
2569
+ {
2570
+ GET_SEA();
2571
+ return INT2FIX(sea->max_doc(sea));
2572
+ }
2573
+
2574
+ static float
2575
+ call_filter_proc(int doc_id, float score, FrtSearcher *self, void *arg)
2576
+ {
2577
+ VALUE val = rb_funcall((VALUE)arg, id_call, 3,
2578
+ INT2FIX(doc_id),
2579
+ rb_float_new((double)score),
2580
+ object_get(self));
2581
+ switch (TYPE(val)) {
2582
+ case T_NIL:
2583
+ case T_FALSE:
2584
+ return 0.0f;
2585
+ case T_FLOAT:
2586
+ {
2587
+ double d = NUM2DBL(val);
2588
+ return (d >= 0.0 && d <= 1.0) ? (float)d : 1.0f;
2589
+ }
2590
+ default:
2591
+ return 1.0f;
2592
+ }
2593
+ }
2594
+
2595
+ typedef struct CWrappedFilter
2596
+ {
2597
+ FrtFilter super;
2598
+ VALUE rfilter;
2599
+ } CWrappedFilter;
2600
+ #define CWF(filt) ((CWrappedFilter *)(filt))
2601
+
2602
+ static unsigned long long
2603
+ cwfilt_hash(FrtFilter *filt)
2604
+ {
2605
+ return (unsigned long long)NUM2ULONG(rb_funcall(CWF(filt)->rfilter, id_hash, 0));
2606
+ }
2607
+
2608
+ static int
2609
+ cwfilt_eq(FrtFilter *filt, FrtFilter *o)
2610
+ {
2611
+ return RTEST(rb_funcall(CWF(filt)->rfilter, id_eql, 1, CWF(o)->rfilter));
2612
+ }
2613
+
2614
+ static FrtBitVector *
2615
+ cwfilt_get_bv_i(FrtFilter *filt, FrtIndexReader *ir)
2616
+ {
2617
+ VALUE rbv = rb_funcall(CWF(filt)->rfilter, id_bits, 1, object_get(ir));
2618
+ FrtBitVector *bv;
2619
+ Data_Get_Struct(rbv, FrtBitVector, bv);
2620
+ FRT_REF(bv);
2621
+ return bv;
2622
+ }
2623
+
2624
+ FrtFilter *
2625
+ frb_get_cwrapped_filter(VALUE rval)
2626
+ {
2627
+ FrtFilter *filter;
2628
+ if (frb_is_cclass(rval) && DATA_PTR(rval)) {
2629
+ Data_Get_Struct(rval, FrtFilter, filter);
2630
+ FRT_REF(filter);
2631
+ }
2632
+ else {
2633
+ filter = filt_new(CWrappedFilter);
2634
+ filter->hash = &cwfilt_hash;
2635
+ filter->eq = &cwfilt_eq;
2636
+ filter->get_bv_i = &cwfilt_get_bv_i;
2637
+ CWF(filter)->rfilter = rval;
2638
+ }
2639
+ return filter;
2640
+ }
2641
+
2642
+ static FrtTopDocs *
2643
+ frb_sea_search_internal(FrtQuery *query, VALUE roptions, FrtSearcher *sea)
2644
+ {
2645
+ VALUE rval;
2646
+ int offset = 0, limit = 10;
2647
+ FrtFilter *filter = NULL;
2648
+ FrtSort *sort = NULL;
2649
+ FrtTopDocs *td;
2650
+
2651
+ FrtPostFilter post_filter_holder;
2652
+ FrtPostFilter *post_filter = NULL;
2653
+
2654
+ if (Qnil != roptions) {
2655
+ if (Qnil != (rval = rb_hash_aref(roptions, sym_offset))) {
2656
+ offset = FIX2INT(rval);
2657
+ if (offset < 0)
2658
+ rb_raise(rb_eArgError, ":offset must be >= 0");
2659
+ }
2660
+ if (Qnil != (rval = rb_hash_aref(roptions, sym_limit))) {
2661
+ if (TYPE(rval) == T_FIXNUM) {
2662
+ limit = FIX2INT(rval);
2663
+ if (limit <= 0) {
2664
+ rb_raise(rb_eArgError, ":limit must be > 0");
2665
+ }
2666
+ }
2667
+ else if (rval == sym_all) {
2668
+ limit = INT_MAX;
2669
+ }
2670
+ else {
2671
+ rb_raise(rb_eArgError, "%s is not a sensible :limit value "
2672
+ "Please use a positive integer or :all",
2673
+ rs2s(rb_obj_as_string(rval)));
2674
+ }
2675
+ }
2676
+ if (Qnil != (rval = rb_hash_aref(roptions, sym_filter))) {
2677
+ filter = frb_get_cwrapped_filter(rval);
2678
+ }
2679
+ if (Qnil != (rval = rb_hash_aref(roptions, sym_c_filter_proc))) {
2680
+ post_filter = DATA_PTR(rval);
2681
+ }
2682
+ if (Qnil != (rval = rb_hash_aref(roptions, sym_filter_proc))) {
2683
+ if (rb_respond_to(rval, id_call)) {
2684
+ if (post_filter) {
2685
+ rb_raise(rb_eArgError, "Cannot pass both :filter_proc and "
2686
+ ":c_filter_proc to the same search");
2687
+ }
2688
+ post_filter_holder.filter_func = &call_filter_proc;
2689
+ post_filter_holder.arg = (void *)rval;
2690
+ post_filter = &post_filter_holder;
2691
+ }
2692
+ else {
2693
+ post_filter = DATA_PTR(rval);
2694
+ }
2695
+ }
2696
+ if (Qnil != (rval = rb_hash_aref(roptions, sym_sort))) {
2697
+ if (TYPE(rval) != T_DATA || CLASS_OF(rval) == cSortField) {
2698
+ rval = frb_sort_init(1, &rval, frb_sort_alloc(cSort));
2699
+ }
2700
+ Data_Get_Struct(rval, FrtSort, sort);
2701
+ }
2702
+ }
2703
+
2704
+ td = sea->search(sea, query, offset, limit, filter, sort, post_filter, 0);
2705
+ if (filter) frt_filt_deref(filter);
2706
+ return td;
2707
+ }
2708
+
2709
+ /*
2710
+ * call-seq:
2711
+ * searcher.search(query, options = {}) -> TopDocs
2712
+ *
2713
+ * Run a query through the Searcher on the index. A TopDocs object is
2714
+ * returned with the relevant results. The +query+ is a built in Query
2715
+ * object. Here are the options;
2716
+ *
2717
+ * === Options
2718
+ *
2719
+ * :offset:: Default: 0. The offset of the start of the section of the
2720
+ * result-set to return. This is used for paging through
2721
+ * results. Let's say you have a page size of 10. If you
2722
+ * don't find the result you want among the first 10 results
2723
+ * then set +:offset+ to 10 and look at the next 10 results,
2724
+ * then 20 and so on.
2725
+ * :limit:: Default: 10. This is the number of results you want
2726
+ * returned, also called the page size. Set +:limit+ to
2727
+ * +:all+ to return all results
2728
+ * :sort:: A Sort object or sort string describing how the field
2729
+ * should be sorted. A sort string is made up of field names
2730
+ * which cannot contain spaces and the word "DESC" if you
2731
+ * want the field reversed, all separated by commas. For
2732
+ * example; "rating DESC, author, title". Note that Ferret
2733
+ * will try to determine a field's type by looking at the
2734
+ * first term in the index and seeing if it can be parsed as
2735
+ * an integer or a float. Keep this in mind as you may need
2736
+ * to specify a fields type to sort it correctly. For more
2737
+ * on this, see the documentation for SortField
2738
+ * :filter:: a Filter object to filter the search results with
2739
+ * :filter_proc:: a filter Proc is a Proc which takes the doc_id, the score
2740
+ * and the Searcher object as its parameters and returns
2741
+ * either a Boolean value specifying whether the result
2742
+ * should be included in the result set, or a Float between 0
2743
+ * and 1.0 to be used as a factor to scale the score of the
2744
+ * object. This can be used, for example, to weight the score
2745
+ * of a matched document by it's age.
2746
+ */
2747
+ static VALUE
2748
+ frb_sea_search(int argc, VALUE *argv, VALUE self)
2749
+ {
2750
+ GET_SEA();
2751
+ VALUE rquery, roptions;
2752
+ FrtQuery *query;
2753
+ rb_scan_args(argc, argv, "11", &rquery, &roptions);
2754
+ Data_Get_Struct(rquery, FrtQuery, query);
2755
+ FrtTopDocs *td = frb_sea_search_internal(query, roptions, sea);
2756
+ return frb_get_td(td, self);
2757
+ }
2758
+
2759
+ /*
2760
+ * call-seq:
2761
+ * searcher.search_each(query, options = {}) {|doc_id, score| do_something}
2762
+ * -> total_hits
2763
+ *
2764
+ * Run a query through the Searcher on the index. A TopDocs object is
2765
+ * returned with the relevant results. The +query+ is a Query object. The
2766
+ * Searcher#search_each method yields the internal document id (used to
2767
+ * reference documents in the Searcher object like this; +searcher[doc_id]+)
2768
+ * and the search score for that document. It is possible for the score to be
2769
+ * greater than 1.0 for some queries and taking boosts into account. This
2770
+ * method will also normalize scores to the range 0.0..1.0 when the max-score
2771
+ * is greater than 1.0. Here are the options;
2772
+ *
2773
+ * === Options
2774
+ *
2775
+ * :offset:: Default: 0. The offset of the start of the section of the
2776
+ * result-set to return. This is used for paging through
2777
+ * results. Let's say you have a page size of 10. If you
2778
+ * don't find the result you want among the first 10 results
2779
+ * then set +:offset+ to 10 and look at the next 10 results,
2780
+ * then 20 and so on.
2781
+ * :limit:: Default: 10. This is the number of results you want
2782
+ * returned, also called the page size. Set +:limit+ to
2783
+ * +:all+ to return all results
2784
+ * :sort:: A Sort object or sort string describing how the field
2785
+ * should be sorted. A sort string is made up of field names
2786
+ * which cannot contain spaces and the word "DESC" if you
2787
+ * want the field reversed, all separated by commas. For
2788
+ * example; "rating DESC, author, title". Note that Ferret
2789
+ * will try to determine a field's type by looking at the
2790
+ * first term in the index and seeing if it can be parsed as
2791
+ * an integer or a float. Keep this in mind as you may need
2792
+ * to specify a fields type to sort it correctly. For more
2793
+ * on this, see the documentation for SortField
2794
+ * :filter:: a Filter object to filter the search results with
2795
+ * :filter_proc:: a filter Proc is a Proc which takes the doc_id, the score
2796
+ * and the Searcher object as its parameters and returns a
2797
+ * Boolean value specifying whether the result should be
2798
+ * included in the result set.
2799
+ */
2800
+ static VALUE
2801
+ frb_sea_search_each(int argc, VALUE *argv, VALUE self)
2802
+ {
2803
+ int i;
2804
+ FrtQuery *q;
2805
+ float max_score;
2806
+ FrtTopDocs *td;
2807
+ VALUE rquery, roptions, rtotal_hits;
2808
+ GET_SEA();
2809
+
2810
+ rb_scan_args(argc, argv, "11", &rquery, &roptions);
2811
+
2812
+ Data_Get_Struct(rquery, FrtQuery, q);
2813
+ td = frb_sea_search_internal(q, roptions, sea);
2814
+
2815
+ max_score = (td->max_score > 1.0f) ? td->max_score : 1.0f;
2816
+
2817
+ /* yield normalized scores */
2818
+ for (i = 0; i < td->size; i++) {
2819
+ rb_yield_values(2, INT2FIX(td->hits[i]->doc),
2820
+ rb_float_new((double)(td->hits[i]->score/max_score)));
2821
+ }
2822
+
2823
+ rtotal_hits = INT2FIX(td->total_hits);
2824
+ frt_td_destroy(td);
2825
+
2826
+ return rtotal_hits;
2827
+ }
2828
+
2829
+ /*
2830
+ * call-seq:
2831
+ * searcher.scan(query, options = {}) -> Array (doc_nums)
2832
+ *
2833
+ * Run a query through the Searcher on the index, ignoring scoring and
2834
+ * starting at +:start_doc+ and stopping when +:limit+ matches have been
2835
+ * found. It returns an array of the matching document numbers.
2836
+ *
2837
+ * There is a big performance advange when using this search method on a very
2838
+ * large index when there are potentially thousands of matching documents and
2839
+ * you only want say 50 of them. The other search methods need to look at
2840
+ * every single match to decide which one has the highest score. This search
2841
+ * method just needs to find +:limit+ number of matches before it returns.
2842
+ *
2843
+ * === Options
2844
+ *
2845
+ * :start_doc:: Default: 0. The start document to start the search from.
2846
+ * NOTE very carefully that this is not the same as the
2847
+ * +:offset+ parameter used in the other search methods which
2848
+ * refers to the offset in the result-set. This is the
2849
+ * document to start the scan from. So if you scanning
2850
+ * through the index in increments of 50 documents at a time
2851
+ * you need to use the last matched doc in the previous
2852
+ * search to start your next search. See the example below.
2853
+ * :limit:: Default: 50. This is the number of results you want
2854
+ * returned, also called the page size. Set +:limit+ to
2855
+ * +:all+ to return all results.
2856
+ * TODO: add option to return loaded documents instead
2857
+ *
2858
+ * === Options
2859
+ *
2860
+ * start_doc = 0
2861
+ * begin
2862
+ * results = @searcher.scan(query, :start_doc => start_doc)
2863
+ * yield results # or do something with them
2864
+ * start_doc = results.last
2865
+ * # start_doc will be nil now if results is empty, ie no more matches
2866
+ * end while start_doc
2867
+ */
2868
+ static VALUE
2869
+ frb_sea_scan(int argc, VALUE *argv, VALUE self)
2870
+ {
2871
+ FrtQuery *q;
2872
+ int i, count;
2873
+ VALUE rval, rquery, roptions;
2874
+ int *doc_array;
2875
+ VALUE rdoc_array;
2876
+ int start_doc = 0, limit = 50;
2877
+ GET_SEA();
2878
+ rb_scan_args(argc, argv, "11", &rquery, &roptions);
2879
+ Data_Get_Struct(rquery, FrtQuery, q);
2880
+
2881
+ if (Qnil != roptions) {
2882
+ Check_Type(roptions, T_HASH);
2883
+ if (Qnil != (rval = rb_hash_aref(roptions, sym_start_doc))) {
2884
+ Check_Type(rval, T_FIXNUM);
2885
+ start_doc = FIX2INT(rval);
2886
+ if (start_doc < 0) {
2887
+ rb_raise(rb_eArgError, ":start_doc must be >= 0");
2888
+ }
2889
+ }
2890
+ if (Qnil != (rval = rb_hash_aref(roptions, sym_limit))) {
2891
+ if (TYPE(rval) == T_FIXNUM) {
2892
+ limit = FIX2INT(rval);
2893
+ if (limit <= 0) {
2894
+ rb_raise(rb_eArgError, ":limit must be > 0");
2895
+ }
2896
+ }
2897
+ else if (rval == sym_all) {
2898
+ limit = INT_MAX;
2899
+ }
2900
+ else {
2901
+ rb_raise(rb_eArgError, "%s is not a sensible :limit value "
2902
+ "Please use a positive integer or :all",
2903
+ rs2s(rb_obj_as_string(rval)));
2904
+ }
2905
+ }
2906
+ }
2907
+
2908
+ doc_array = FRT_ALLOC_N(int, limit);
2909
+ count = sea->search_unscored(sea, q, doc_array, limit, start_doc);
2910
+ rdoc_array = rb_ary_new2(count);
2911
+ for (i = 0; i < count; i++) {
2912
+ rb_ary_store(rdoc_array, i, INT2FIX(doc_array[i]));
2913
+ }
2914
+ free(doc_array);
2915
+ return rdoc_array;
2916
+ }
2917
+
2918
+ /*
2919
+ * call-seq:
2920
+ * searcher.explain(query, doc_id) -> Explanation
2921
+ *
2922
+ * Create an explanation object to explain the score returned for a
2923
+ * particular document at +doc_id+ in the index for the query +query+.
2924
+ *
2925
+ * Usually used like this;
2926
+ *
2927
+ * puts searcher.explain(query, doc_id).to_s
2928
+ */
2929
+ static VALUE
2930
+ frb_sea_explain(VALUE self, VALUE rquery, VALUE rdoc_id)
2931
+ {
2932
+ GET_SEA();
2933
+ FrtQuery *query;
2934
+ FrtExplanation *expl;
2935
+ Data_Get_Struct(rquery, FrtQuery, query);
2936
+ expl = sea->explain(sea, query, FIX2INT(rdoc_id));
2937
+ return Data_Wrap_Struct(cExplanation, NULL, &frt_expl_destroy, expl);
2938
+ }
2939
+
2940
+ /*
2941
+ * call-seq:
2942
+ * searcher.highlight(query, doc_id, field, options = {}) -> Array
2943
+ *
2944
+ * Returns an array of strings with the matches highlighted.
2945
+ *
2946
+ * === Options
2947
+ *
2948
+ * :excerpt_length:: Default: 150. Length of excerpt to show. Highlighted
2949
+ * terms will be in the centre of the excerpt. Set to
2950
+ * :all to highlight the entire field.
2951
+ * :num_excerpts:: Default: 2. Number of excerpts to return.
2952
+ * :pre_tag:: Default: "<b>". Tag to place to the left of the match.
2953
+ * You'll probably want to change this to a "<span>" tag
2954
+ * with a class. Try "\033[7m" for use in a terminal.
2955
+ * :post_tag:: Default: "</b>". This tag should close the +:pre_tag+.
2956
+ * Try tag "\033[m" in the terminal.
2957
+ * :ellipsis:: Default: "...". This is the string that is appended at
2958
+ * the beginning and end of excerpts (unless the excerpt
2959
+ * hits the start or end of the field. You'll probably
2960
+ * want to change this so a Unicode ellipsis character.
2961
+ */
2962
+ static VALUE
2963
+ frb_sea_highlight(int argc, VALUE *argv, VALUE self)
2964
+ {
2965
+ GET_SEA();
2966
+ VALUE rquery, rdoc_id, rfield, roptions, v;
2967
+ FrtQuery *query;
2968
+ int excerpt_length = 150;
2969
+ int num_excerpts = 2;
2970
+ const char *pre_tag = "<b>";
2971
+ const char *post_tag = "</b>";
2972
+ const char *ellipsis = "...";
2973
+ char **excerpts;
2974
+
2975
+ rb_scan_args(argc, argv, "31", &rquery, &rdoc_id, &rfield, &roptions);
2976
+ Data_Get_Struct(rquery, FrtQuery, query);
2977
+ if (argc > 3) {
2978
+ if (TYPE(roptions) != T_HASH) {
2979
+ rb_raise(rb_eArgError, "The fourth argument to Searcher#highlight must be a hash");
2980
+ }
2981
+ if (Qnil != (v = rb_hash_aref(roptions, sym_num_excerpts))) {
2982
+ num_excerpts = FIX2INT(v);
2983
+ }
2984
+ if (Qnil != (v = rb_hash_aref(roptions, sym_excerpt_length))) {
2985
+ if (v == sym_all) {
2986
+ num_excerpts = 1;
2987
+ excerpt_length = INT_MAX/2;
2988
+ }
2989
+ else {
2990
+ excerpt_length = FIX2INT(v);
2991
+ }
2992
+ }
2993
+ if (Qnil != (v = rb_hash_aref(roptions, sym_pre_tag))) {
2994
+ pre_tag = rs2s(rb_obj_as_string(v));
2995
+ }
2996
+ if (Qnil != (v = rb_hash_aref(roptions, sym_post_tag))) {
2997
+ post_tag = rs2s(rb_obj_as_string(v));
2998
+ }
2999
+ if (Qnil != (v = rb_hash_aref(roptions, sym_ellipsis))) {
3000
+ ellipsis = rs2s(rb_obj_as_string(v));
3001
+ }
3002
+ }
3003
+
3004
+ if ((excerpts = frt_searcher_highlight(sea,
3005
+ query,
3006
+ FIX2INT(rdoc_id),
3007
+ frb_field(rfield),
3008
+ excerpt_length,
3009
+ num_excerpts,
3010
+ pre_tag,
3011
+ post_tag,
3012
+ ellipsis)) != NULL) {
3013
+ const int size = frt_ary_size(excerpts);
3014
+ int i;
3015
+ VALUE rexcerpts = rb_ary_new2(size);
3016
+
3017
+ for (i = 0; i < size; i++) {
3018
+ rb_ary_store(rexcerpts, i, rb_str_new2(excerpts[i]));
3019
+ }
3020
+ frt_ary_destroy(excerpts, &free);
3021
+ return rexcerpts;
3022
+ }
3023
+ return Qnil;
3024
+ }
3025
+
3026
+ /****************************************************************************
3027
+ *
3028
+ * Searcher Methods
3029
+ *
3030
+ ****************************************************************************/
3031
+
3032
+ static void
3033
+ frb_sea_mark(void *p)
3034
+ {
3035
+ FrtIndexSearcher *isea = (FrtIndexSearcher *)p;
3036
+ frb_gc_mark(isea->ir);
3037
+ frb_gc_mark(isea->ir->store);
3038
+ }
3039
+
3040
+ #define FRT_GET_IR(rir, ir) do {\
3041
+ rir = Data_Wrap_Struct(cIndexReader, &frb_ir_mark, &frb_ir_free, ir);\
3042
+ object_add(ir, rir);\
3043
+ } while (0)
3044
+
3045
+ /*
3046
+ * call-seq:
3047
+ * Searcher.new(obj) -> Searcher
3048
+ *
3049
+ * Create a new Searcher object. +dir+ can either be a string path to an
3050
+ * index directory on the file-system, an actual Ferret::Store::Directory
3051
+ * object or a Ferret::Index::IndexReader. You should use the IndexReader for
3052
+ * searching multiple indexes. Just open the IndexReader on multiple
3053
+ * directories.
3054
+ */
3055
+ static VALUE
3056
+ frb_sea_init(VALUE self, VALUE obj)
3057
+ {
3058
+ FrtStore *store = NULL;
3059
+ FrtIndexReader *ir = NULL;
3060
+ FrtSearcher *sea;
3061
+ if (TYPE(obj) == T_STRING) {
3062
+ frb_create_dir(obj);
3063
+ store = frt_open_fs_store(rs2s(obj));
3064
+ ir = frt_ir_open(store);
3065
+ FRT_DEREF(store);
3066
+ FRT_GET_IR(obj, ir);
3067
+ } else {
3068
+ Check_Type(obj, T_DATA);
3069
+ if (rb_obj_is_kind_of(obj, cDirectory) == Qtrue) {
3070
+ Data_Get_Struct(obj, FrtStore, store);
3071
+ ir = frt_ir_open(store);
3072
+ FRT_GET_IR(obj, ir);
3073
+ } else if (rb_obj_is_kind_of(obj, cIndexReader) == Qtrue) {
3074
+ Data_Get_Struct(obj, FrtIndexReader, ir);
3075
+ } else {
3076
+ rb_raise(rb_eArgError, "Unknown type for argument to IndexSearcher.new");
3077
+ }
3078
+ }
3079
+ sea = frt_isea_new(ir);
3080
+ ((FrtIndexSearcher *)sea)->close_ir = false;
3081
+ Frt_Wrap_Struct(self, &frb_sea_mark, &frb_sea_free, sea);
3082
+ object_add(sea, self);
3083
+ return self;
3084
+ }
3085
+
3086
+ /****************************************************************************
3087
+ *
3088
+ * MultiSearcher Methods
3089
+ *
3090
+ ****************************************************************************/
3091
+
3092
+ static void
3093
+ frb_ms_free(void *p)
3094
+ {
3095
+ FrtSearcher *sea = (FrtSearcher *)p;
3096
+ FrtMultiSearcher *msea = (FrtMultiSearcher *)sea;
3097
+ free(msea->searchers);
3098
+ object_del(sea);
3099
+ frt_searcher_close(sea);
3100
+ }
3101
+
3102
+ static void
3103
+ frb_ms_mark(void *p)
3104
+ {
3105
+ int i;
3106
+ FrtMultiSearcher *msea = (FrtMultiSearcher *)p;
3107
+ for (i = 0; i < msea->s_cnt; i++) {
3108
+ frb_gc_mark(msea->searchers[i]);
3109
+ }
3110
+ }
3111
+
3112
+ /*
3113
+ * call-seq:
3114
+ * MultiSearcher.new(searcher*) -> searcher
3115
+ *
3116
+ * Create a new MultiSearcher by passing a list of subsearchers to the
3117
+ * constructor.
3118
+ */
3119
+ static VALUE
3120
+ frb_ms_init(int argc, VALUE *argv, VALUE self)
3121
+ {
3122
+ int i, j, top = 0, capa = argc;
3123
+
3124
+ VALUE rsearcher;
3125
+ FrtSearcher **searchers = FRT_ALLOC_N(FrtSearcher *, capa);
3126
+ FrtSearcher *s;
3127
+
3128
+ for (i = 0; i < argc; i++) {
3129
+ rsearcher = argv[i];
3130
+ switch (TYPE(rsearcher)) {
3131
+ case T_ARRAY:
3132
+ capa += RARRAY_LEN(rsearcher);
3133
+ FRT_REALLOC_N(searchers, FrtSearcher *, capa);
3134
+ for (j = 0; j < RARRAY_LEN(rsearcher); j++) {
3135
+ VALUE rs = RARRAY_PTR(rsearcher)[j];
3136
+ Data_Get_Struct(rs, FrtSearcher, s);
3137
+ searchers[top++] = s;
3138
+ }
3139
+ break;
3140
+ case T_DATA:
3141
+ Data_Get_Struct(rsearcher, FrtSearcher, s);
3142
+ searchers[top++] = s;
3143
+ break;
3144
+ default:
3145
+ rb_raise(rb_eArgError, "Can't add class %s to MultiSearcher",
3146
+ rb_obj_classname(rsearcher));
3147
+ break;
3148
+ }
3149
+ }
3150
+ s = frt_msea_new(searchers, top, false);
3151
+ Frt_Wrap_Struct(self, &frb_ms_mark, &frb_ms_free, s);
3152
+ object_add(s, self);
3153
+ return self;
3154
+ }
3155
+
3156
+ /****************************************************************************
3157
+ *
3158
+ * Init Function
3159
+ *
3160
+ ****************************************************************************/
3161
+
3162
+ /* rdochack
3163
+ cTopDocs = rb_define_class_under(mSearch, "TopDocs", rb_cObject);
3164
+ */
3165
+
3166
+ /*
3167
+ * Document-class: Ferret::Search::Hit
3168
+ *
3169
+ * == Summary
3170
+ *
3171
+ * A hit represents a single document match for a search. It holds the
3172
+ * document id of the document that matches along with the score for the
3173
+ * match. The score is a positive Float value. The score contained in a hit
3174
+ * is not normalized so it can be greater than 1.0. To normalize scores to
3175
+ * the range 0.0..1.0 divide the scores by TopDocs#max_score.
3176
+ */
3177
+ static void
3178
+ Init_Hit(void)
3179
+ {
3180
+ const char *hit_class = "Hit";
3181
+ /* rdochack
3182
+ cHit = rb_define_class_under(mSearch, "Hit", rb_cObject);
3183
+ */
3184
+ cHit = rb_struct_define(hit_class, "doc", "score", NULL);
3185
+ rb_set_class_path(cHit, mSearch, hit_class);
3186
+ rb_const_set(mSearch, rb_intern(hit_class), cHit);
3187
+ id_doc = rb_intern("doc");
3188
+ id_score = rb_intern("score");
3189
+ }
3190
+
3191
+ /*
3192
+ * Document-class: Ferret::Search::TopDocs
3193
+ *
3194
+ * == Summary
3195
+ *
3196
+ * A TopDocs object holds a result set for a search. The number of documents
3197
+ * that matched the query his held in TopDocs#total_hits. The actual
3198
+ * results are in the Array TopDocs#hits. The number of hits returned is
3199
+ * limited by the +:limit+ option so the size of the +hits+ array will not
3200
+ * always be equal to the value of +total_hits+. Finally TopDocs#max_score
3201
+ * holds the maximum score of any match (not necessarily the maximum score
3202
+ * contained in the +hits+ array) so it can be used to normalize scores. For
3203
+ * example, to print doc ids with scores out of 100.0 you could do this;
3204
+ *
3205
+ * top_docs.hits.each do |hit|
3206
+ * puts "#{hit.doc} scored #{hit.score * 100.0 / top_docs.max_score}"
3207
+ * end
3208
+ */
3209
+ static void
3210
+ Init_TopDocs(void)
3211
+ {
3212
+ const char *td_class = "TopDocs";
3213
+ /* rdochack
3214
+ cTopDocs = rb_define_class_under(mSearch, "TopDocs", rb_cObject);
3215
+ */
3216
+ cTopDocs = rb_struct_define(td_class,
3217
+ "total_hits",
3218
+ "hits",
3219
+ "max_score",
3220
+ "searcher",
3221
+ NULL);
3222
+ rb_set_class_path(cTopDocs, mSearch, td_class);
3223
+ rb_const_set(mSearch, rb_intern(td_class), cTopDocs);
3224
+ rb_define_method(cTopDocs, "to_s", frb_td_to_s, -1);
3225
+ rb_define_method(cTopDocs, "to_json", frb_td_to_json, 0);
3226
+ id_hits = rb_intern("hits");
3227
+ id_total_hits = rb_intern("total_hits");
3228
+ id_max_score = rb_intern("max_score");
3229
+ id_searcher = rb_intern("searcher");
3230
+ }
3231
+
3232
+ /*
3233
+ * Document-class: Ferret::Search::Explanation
3234
+ *
3235
+ * == Summary
3236
+ *
3237
+ * Explanation is used to give a description of why a document matched with
3238
+ * the score that it did. Use the Explanation#to_s or Explanation#to_html
3239
+ * methods to display the explanation in a human readable format. Creating
3240
+ * explanations is an expensive operation so it should only be used for
3241
+ * debugging purposes. To create an explanation use the Searcher#explain
3242
+ * method.
3243
+ *
3244
+ * == Example
3245
+ *
3246
+ * puts searcher.explain(query, doc_id).to_s
3247
+ */
3248
+ static void
3249
+ Init_Explanation(void)
3250
+ {
3251
+ cExplanation = rb_define_class_under(mSearch, "Explanation", rb_cObject);
3252
+ rb_define_alloc_func(cExplanation, frb_data_alloc);
3253
+
3254
+ rb_define_method(cExplanation, "to_s", frb_expl_to_s, 0);
3255
+ rb_define_method(cExplanation, "to_html", frb_expl_to_html, 0);
3256
+ rb_define_method(cExplanation, "score", frb_expl_score, 0);
3257
+ }
3258
+
3259
+ /*
3260
+ * Document-class: Ferret::Search::Query
3261
+ *
3262
+ * == Summary
3263
+ *
3264
+ * Abstract class representing a query to the index. There are a number of
3265
+ * concrete Query implementations;
3266
+ *
3267
+ * * TermQuery
3268
+ * * MultiTermQuery
3269
+ * * BooleanQuery
3270
+ * * PhraseQuery
3271
+ * * ConstantScoreQuery
3272
+ * * FilteredQuery
3273
+ * * MatchAllQuery
3274
+ * * RangeQuery
3275
+ * * WildcardQuery
3276
+ * * FuzzyQuery
3277
+ * * PrefixQuery
3278
+ * * Spans::SpanTermQuery
3279
+ * * Spans::SpanFirstQuery
3280
+ * * Spans::SpanOrQuery
3281
+ * * Spans::SpanNotQuery
3282
+ * * Spans::SpanNearQuery
3283
+ *
3284
+ * Explore these classes for the query right for you. The queries are passed
3285
+ * to the Searcher#search* methods.
3286
+ *
3287
+ * === Query Boosts
3288
+ *
3289
+ * Queries have a boost value so that you can make the results of one query
3290
+ * more important than the results of another query when combining them in a
3291
+ * BooleanQuery. For example, documents on Rails. To avoid getting results
3292
+ * for train rails you might also add the tern Ruby but Rails is the more
3293
+ * important term so you'd give it a boost.
3294
+ */
3295
+ static void
3296
+ Init_Query(void)
3297
+ {
3298
+ cQuery = rb_define_class_under(mSearch, "Query", rb_cObject);
3299
+
3300
+ rb_define_method(cQuery, "to_s", frb_q_to_s, -1);
3301
+ rb_define_method(cQuery, "boost", frb_q_get_boost, 0);
3302
+ rb_define_method(cQuery, "boost=", frb_q_set_boost, 1);
3303
+ rb_define_method(cQuery, "eql?", frb_q_eql, 1);
3304
+ rb_define_method(cQuery, "==", frb_q_eql, 1);
3305
+ rb_define_method(cQuery, "hash", frb_q_hash, 0);
3306
+ rb_define_method(cQuery, "terms", frb_q_get_terms, 1);
3307
+ }
3308
+
3309
+ /*
3310
+ * Document-class: Ferret::Search::TermQuery
3311
+ *
3312
+ * == Summary
3313
+ *
3314
+ * TermQuery is the most basic query and it is the building block for most
3315
+ * other queries. It basically matches documents that contain a specific term
3316
+ * in a specific field.
3317
+ *
3318
+ * == Example
3319
+ *
3320
+ * query = TermQuery.new(:content, "rails")
3321
+ *
3322
+ * # untokenized fields can also be searched with this query;
3323
+ * query = TermQuery.new(:title, "Shawshank Redemption")
3324
+ *
3325
+ * Notice the all lowercase term Rails. This is important as most analyzers will
3326
+ * downcase all text added to the index. The title in this case was not
3327
+ * tokenized so the case would have been left as is.
3328
+ */
3329
+ static void
3330
+ Init_TermQuery(void)
3331
+ {
3332
+ cTermQuery = rb_define_class_under(mSearch, "TermQuery", cQuery);
3333
+ rb_define_alloc_func(cTermQuery, frb_data_alloc);
3334
+
3335
+ rb_define_method(cTermQuery, "initialize", frb_tq_init, 2);
3336
+ }
3337
+
3338
+ /*
3339
+ * Document-class: Ferret::Search::MultiTermQuery
3340
+ *
3341
+ * == Summary
3342
+ *
3343
+ * MultiTermQuery matches documents that contain one of a list of terms in a
3344
+ * specific field. This is the basic building block for queries such as;
3345
+ *
3346
+ * * PrefixQuery
3347
+ * * WildcardQuery
3348
+ * * FuzzyQuery
3349
+ *
3350
+ * MultiTermQuery is very similar to a boolean "Or" query. It is highly
3351
+ * optimized though as it focuses on a single field.
3352
+ *
3353
+ * == Example
3354
+ *
3355
+ * multi_term_query = MultiTermQuery.new(:content, :max_term => 10)
3356
+ *
3357
+ * multi_term_query << "Ruby" << "Ferret" << "Rails" << "Search"
3358
+ */
3359
+ static void
3360
+ Init_MultiTermQuery(void)
3361
+ {
3362
+ id_default_max_terms = rb_intern("@@default_max_terms");
3363
+ sym_max_terms = ID2SYM(rb_intern("max_terms"));
3364
+ sym_min_score = ID2SYM(rb_intern("min_score"));
3365
+
3366
+ cMultiTermQuery = rb_define_class_under(mSearch, "MultiTermQuery", cQuery);
3367
+ rb_define_alloc_func(cMultiTermQuery, frb_data_alloc);
3368
+ rb_cvar_set(cMultiTermQuery, id_default_max_terms, INT2FIX(512));
3369
+ rb_define_singleton_method(cMultiTermQuery, "default_max_terms",
3370
+ frb_mtq_get_dmt, 0);
3371
+ rb_define_singleton_method(cMultiTermQuery, "default_max_terms=",
3372
+ frb_mtq_set_dmt, 1);
3373
+
3374
+ rb_define_method(cMultiTermQuery, "initialize", frb_mtq_init, -1);
3375
+ rb_define_method(cMultiTermQuery, "add_term", frb_mtq_add_term, -1);
3376
+ rb_define_method(cMultiTermQuery, "<<", frb_mtq_add_term, -1);
3377
+ }
3378
+
3379
+ static void Init_BooleanClause(void);
3380
+
3381
+ /*
3382
+ * Document-class: Ferret::Search::BooleanQuery
3383
+ *
3384
+ * == Summary
3385
+ *
3386
+ * A BooleanQuery is used for combining many queries into one. This is best
3387
+ * illustrated with an example.
3388
+ *
3389
+ * == Example
3390
+ *
3391
+ * Lets say we wanted to find all documents with the term "Ruby" in the
3392
+ * +:title+ and the term "Ferret" in the +:content+ field or the +:title+
3393
+ * field written before January 2006. You could build the query like this.
3394
+ *
3395
+ * tq1 = TermQuery.new(:title, "ruby")
3396
+ * tq21 = TermQuery.new(:title, "ferret")
3397
+ * tq22 = TermQuery.new(:content, "ferret")
3398
+ * bq2 = BooleanQuery.new
3399
+ * bq2 << tq21 << tq22
3400
+ *
3401
+ * rq3 = RangeQuery.new(:written, :< => "200601")
3402
+ *
3403
+ * query = BooleanQuery.new
3404
+ * query.add_query(tq1, :must).add_query(bq2, :must).add_query(rq3, :must)
3405
+ */
3406
+ static void
3407
+ Init_BooleanQuery(void)
3408
+ {
3409
+ cBooleanQuery = rb_define_class_under(mSearch, "BooleanQuery", cQuery);
3410
+ rb_define_alloc_func(cBooleanQuery, frb_data_alloc);
3411
+
3412
+ rb_define_method(cBooleanQuery, "initialize", frb_bq_init, -1);
3413
+ rb_define_method(cBooleanQuery, "add_query", frb_bq_add_query, -1);
3414
+ rb_define_method(cBooleanQuery, "<<", frb_bq_add_query, -1);
3415
+
3416
+ Init_BooleanClause();
3417
+ }
3418
+
3419
+ /*
3420
+ * Document-class: Ferret::Search::BooleanQuery::BooleanClause
3421
+ *
3422
+ * == Summary
3423
+ *
3424
+ * A BooleanClause holes a single query within a BooleanQuery specifying
3425
+ * wither the query +:must+ match, +:should+ match or +:must_not+ match.
3426
+ * BooleanClauses can be used to pass a clause from one BooleanQuery to
3427
+ * another although it is generally easier just to add a query directly to a
3428
+ * BooleanQuery using the BooleanQuery#add_query method.
3429
+ *
3430
+ * == Example
3431
+ *
3432
+ * clause1 = BooleanClause.new(query1, :should)
3433
+ * clause2 = BooleanClause.new(query2, :should)
3434
+ *
3435
+ * query = BooleanQuery.new
3436
+ * query << clause1 << clause2
3437
+ */
3438
+ static void
3439
+ Init_BooleanClause(void)
3440
+ {
3441
+ sym_should = ID2SYM(rb_intern("should"));
3442
+ sym_must = ID2SYM(rb_intern("must"));
3443
+ sym_must_not = ID2SYM(rb_intern("must_not"));
3444
+
3445
+ cBooleanClause = rb_define_class_under(cBooleanQuery, "BooleanClause",
3446
+ rb_cObject);
3447
+ rb_define_alloc_func(cBooleanClause, frb_data_alloc);
3448
+
3449
+ rb_define_method(cBooleanClause, "initialize", frb_bc_init, -1);
3450
+ rb_define_method(cBooleanClause, "query", frb_bc_get_query, 0);
3451
+ rb_define_method(cBooleanClause, "query=", frb_bc_set_query, 1);
3452
+ rb_define_method(cBooleanClause, "required?", frb_bc_is_required, 0);
3453
+ rb_define_method(cBooleanClause, "prohibited?", frb_bc_is_prohibited, 0);
3454
+ rb_define_method(cBooleanClause, "occur=", frb_bc_set_occur, 1);
3455
+ rb_define_method(cBooleanClause, "to_s", frb_bc_to_s, 0);
3456
+ }
3457
+
3458
+ /*
3459
+ * Document-class: Ferret::Search::RangeQuery
3460
+ *
3461
+ * == Summary
3462
+ *
3463
+ * RangeQuery is used to find documents with terms in a range.
3464
+ * RangeQuerys are usually used on untokenized fields like date fields or
3465
+ * number fields.
3466
+ *
3467
+ * == Example
3468
+ *
3469
+ * To find all documents written between January 1st 2006 and January 26th
3470
+ * 2006 inclusive you would write the query like this;
3471
+ *
3472
+ * query = RangeQuery.new(:create_date, :>= "20060101", :<= "20060126")
3473
+ *
3474
+ * == Range queries on numbers
3475
+ *
3476
+ * There is now a new query called TypedRangeQuery which detects the type of
3477
+ * the range and if the range is numerical it will find a numerical range.
3478
+ * This allows you to do range queries with negative numbers and without
3479
+ * having to pad the field. However, RangeQuery will perform a lot faster on
3480
+ * large indexes so if you are working with a very large index you will need
3481
+ * to normalize your number fields so that they are a fixed width and always
3482
+ * positive. That way the standard String range query will do fine.
3483
+ *
3484
+ * For example, if you have the numbers;
3485
+ *
3486
+ * [10, -999, -90, 100, 534]
3487
+ *
3488
+ * Then the can be normalized to;
3489
+ *
3490
+ * # note that we have added 1000 to all numbers to make them all positive
3491
+ * [1010, 0001, 0910, 1100, 1534]
3492
+ *
3493
+ */
3494
+ static void
3495
+ Init_RangeQuery(void)
3496
+ {
3497
+ sym_upper = ID2SYM(rb_intern("upper"));
3498
+ sym_lower = ID2SYM(rb_intern("lower"));
3499
+ sym_upper_exclusive = ID2SYM(rb_intern("upper_exclusive"));
3500
+ sym_lower_exclusive = ID2SYM(rb_intern("lower_exclusive"));
3501
+ sym_include_upper = ID2SYM(rb_intern("include_upper"));
3502
+ sym_include_lower = ID2SYM(rb_intern("include_lower"));
3503
+
3504
+ sym_less_than = ID2SYM(rb_intern("<"));
3505
+ sym_less_than_or_equal_to = ID2SYM(rb_intern("<="));
3506
+ sym_greater_than = ID2SYM(rb_intern(">"));
3507
+ sym_greater_than_or_equal_to = ID2SYM(rb_intern(">="));
3508
+
3509
+ cRangeQuery = rb_define_class_under(mSearch, "RangeQuery", cQuery);
3510
+ rb_define_alloc_func(cRangeQuery, frb_data_alloc);
3511
+
3512
+ rb_define_method(cRangeQuery, "initialize", frb_rq_init, 2);
3513
+ }
3514
+
3515
+ /*
3516
+ * Document-class: Ferret::Search::TypedRangeQuery
3517
+ *
3518
+ * == Summary
3519
+ *
3520
+ * TypedRangeQuery is used to find documents with terms in a range.
3521
+ * RangeQuerys are usually used on untokenized fields like date fields or
3522
+ * number fields. TypedRangeQuery is particularly useful for fields with
3523
+ * unnormalized numbers, both positive and negative, integer and float.
3524
+ *
3525
+ * == Example
3526
+ *
3527
+ * To find all documents written between January 1st 2006 and January 26th
3528
+ * 2006 inclusive you would write the query like this;
3529
+ *
3530
+ * query = RangeQuery.new(:create_date, :>= "-1.0", :<= "10.0")
3531
+ *
3532
+ * == Performance Note
3533
+ *
3534
+ * TypedRangeQuery works by converting all the terms in a field to numbers
3535
+ * and then comparing those numbers with the range bondaries. This can have
3536
+ * quite an impact on performance on large indexes so in those cases it is
3537
+ * usually better to use a standard RangeQuery. This will require a little
3538
+ * work on your behalf. See RangeQuery for notes on how to do this.
3539
+ */
3540
+ static void
3541
+ Init_TypedRangeQuery(void)
3542
+ {
3543
+ cTypedRangeQuery =
3544
+ rb_define_class_under(mSearch, "TypedRangeQuery", cQuery);
3545
+ rb_define_alloc_func(cTypedRangeQuery, frb_data_alloc);
3546
+
3547
+ rb_define_method(cTypedRangeQuery, "initialize", frb_trq_init, 2);
3548
+ }
3549
+
3550
+ /*
3551
+ * Document-class: Ferret::Search::PhraseQuery
3552
+ *
3553
+ * == Summary
3554
+ *
3555
+ * PhraseQuery matches phrases like "the quick brown fox". Most people are
3556
+ * familiar with phrase queries having used them in most internet search
3557
+ * engines.
3558
+ *
3559
+ * === Slop
3560
+ *
3561
+ * Ferret's phrase queries a slightly more advanced. You can match phrases
3562
+ * with a slop, ie the match isn't exact but it is good enough. The slop is
3563
+ * basically the word edit distance of the phrase. For example, "the quick
3564
+ * brown fox" with a slop of 1 would match "the quick little brown fox". With
3565
+ * a slop of 2 it would match "the brown quick fox".
3566
+ *
3567
+ * query = PhraseQuery.new(:content)
3568
+ * query << "the" << "quick" << "brown" << "fox"
3569
+ *
3570
+ * # matches => "the quick brown fox"
3571
+ *
3572
+ * query.slop = 1
3573
+ * # matches => "the quick little brown fox"
3574
+ * |__1__^
3575
+ *
3576
+ * query.slop = 2
3577
+ * # matches => "the brown quick _____ fox"
3578
+ * ^_____2_____|
3579
+ *
3580
+ * == Multi-PhraseQuery
3581
+ *
3582
+ * Phrase queries can also have multiple terms in a single position. Let's
3583
+ * say for example that we want to match synonyms for quick like "fast" and
3584
+ * "speedy". You could the query like this;
3585
+ *
3586
+ * query = PhraseQuery.new(:content)
3587
+ * query << "the" << ["quick", "fast", "speed"] << ["brown", "red"] << "fox"
3588
+ * # matches => "the quick red fox"
3589
+ * # matches => "the fast brown fox"
3590
+ *
3591
+ * query.slop = 1
3592
+ * # matches => "the speedy little red fox"
3593
+ *
3594
+ * You can also leave positions blank. Lets say you wanted to match "the
3595
+ * quick <> fox" where "<>" could match anything (but not nothing). You'd
3596
+ * build this query like this;
3597
+ *
3598
+ * query = PhraseQuery.new(:content)
3599
+ * query.add_term("the").add_term("quick").add_term("fox", 2)
3600
+ * # matches => "the quick yellow fox"
3601
+ * # matches => "the quick alkgdhaskghaskjdh fox"
3602
+ *
3603
+ * The second parameter to PhraseQuery#add_term is the position increment for
3604
+ * the term. It is one by default meaning that every time you add a term it
3605
+ * is expected to follow the previous term. But setting it to 2 or greater
3606
+ * you are leaving empty spaces in the term.
3607
+ *
3608
+ * There are also so tricks you can do by setting the position increment to
3609
+ * 0. With a little help from your analyzer you can actually tag bold or
3610
+ * italic text for example. If you want more information about this, ask on
3611
+ * the mailing list.
3612
+ */
3613
+ static void
3614
+ Init_PhraseQuery(void)
3615
+ {
3616
+ cPhraseQuery = rb_define_class_under(mSearch, "PhraseQuery", cQuery);
3617
+ rb_define_alloc_func(cPhraseQuery, frb_data_alloc);
3618
+
3619
+ rb_define_method(cPhraseQuery, "initialize", frb_phq_init, -1);
3620
+ rb_define_method(cPhraseQuery, "add_term", frb_phq_add, -1);
3621
+ rb_define_method(cPhraseQuery, "<<", frb_phq_add, -1);
3622
+ rb_define_method(cPhraseQuery, "slop", frb_phq_get_slop, 0);
3623
+ rb_define_method(cPhraseQuery, "slop=", frb_phq_set_slop, 1);
3624
+ }
3625
+
3626
+ /*
3627
+ * Document-class: Ferret::Search::PrefixQuery
3628
+ *
3629
+ * == Summary
3630
+ *
3631
+ * A prefix query is like a TermQuery except that it matches any term with a
3632
+ * specific prefix. PrefixQuery is expanded into a MultiTermQuery when
3633
+ * submitted in a search.
3634
+ *
3635
+ * == Example
3636
+ *
3637
+ * PrefixQuery is very useful for matching a tree structure category
3638
+ * hierarchy. For example, let's say you have the categories;
3639
+ *
3640
+ * "cat1/"
3641
+ * "cat1/sub_cat1"
3642
+ * "cat1/sub_cat2"
3643
+ * "cat2"
3644
+ * "cat2/sub_cat1"
3645
+ * "cat2/sub_cat2"
3646
+ *
3647
+ * Lets say you want to match everything in category 2. You'd build the query
3648
+ * like this;
3649
+ *
3650
+ * query = PrefixQuery.new(:category, "cat2")
3651
+ * # matches => "cat2"
3652
+ * # matches => "cat2/sub_cat1"
3653
+ * # matches => "cat2/sub_cat2"
3654
+ */
3655
+ static void
3656
+ Init_PrefixQuery(void)
3657
+ {
3658
+ cPrefixQuery = rb_define_class_under(mSearch, "PrefixQuery", cQuery);
3659
+ rb_define_alloc_func(cPrefixQuery, frb_data_alloc);
3660
+
3661
+ rb_define_method(cPrefixQuery, "initialize", frb_prq_init, -1);
3662
+ }
3663
+
3664
+ /*
3665
+ * Document-class: Ferret::Search::WildcardQuery
3666
+ *
3667
+ * == Summary
3668
+ *
3669
+ * WildcardQuery is a simple pattern matching query. There are two wild-card
3670
+ * characters.
3671
+ *
3672
+ * * "*" which matches 0 or more characters
3673
+ * * "?" which matches a single character
3674
+ *
3675
+ * == Example
3676
+ *
3677
+ * query = WildcardQuery.new(:field, "h*og")
3678
+ * # matches => "hog"
3679
+ * # matches => "hot dog"
3680
+ *
3681
+ * query = WildcardQuery.new(:field, "fe?t")
3682
+ * # matches => "feat"
3683
+ * # matches => "feet"
3684
+ *
3685
+ * query = WildcardQuery.new(:field, "f?ll*")
3686
+ * # matches => "fill"
3687
+ * # matches => "falling"
3688
+ * # matches => "folly"
3689
+ */
3690
+ static void
3691
+ Init_WildcardQuery(void)
3692
+ {
3693
+ cWildcardQuery = rb_define_class_under(mSearch, "WildcardQuery", cQuery);
3694
+ rb_define_alloc_func(cWildcardQuery, frb_data_alloc);
3695
+
3696
+ rb_define_method(cWildcardQuery, "initialize", frb_wcq_init, -1);
3697
+ }
3698
+
3699
+ /*
3700
+ * Document-class: Ferret::Search::FuzzyQuery
3701
+ *
3702
+ * == Summary
3703
+ *
3704
+ * FuzzyQuery uses the Levenshtein distance formula for measuring the
3705
+ * similarity between two terms. For example, weak and week have one letter
3706
+ * difference and they are four characters long so the simlarity is 75% or
3707
+ * 0.75. You can use this query to match terms that are very close to the
3708
+ * search term.
3709
+ *
3710
+ * == Example
3711
+ *
3712
+ * FuzzyQuery can be quite useful for find documents that wouldn't normally
3713
+ * be found because of typos.
3714
+ *
3715
+ * FuzzyQuery.new(:field, "google",
3716
+ * :min_similarity => 0.6,
3717
+ * :prefix_length => 2)
3718
+ * # matches => "gogle", "goggle", "googol", "googel"
3719
+ */
3720
+ static void
3721
+ Init_FuzzyQuery(void)
3722
+ {
3723
+ id_default_min_similarity = rb_intern("@@default_min_similarity");
3724
+ id_default_prefix_length = rb_intern("@@default_prefix_length");
3725
+
3726
+ sym_min_similarity = ID2SYM(rb_intern("min_similarity"));
3727
+ sym_prefix_length = ID2SYM(rb_intern("prefix_length"));
3728
+
3729
+ cFuzzyQuery = rb_define_class_under(mSearch, "FuzzyQuery", cQuery);
3730
+ rb_define_alloc_func(cFuzzyQuery, frb_data_alloc);
3731
+ rb_cvar_set(cFuzzyQuery, id_default_min_similarity,
3732
+ rb_float_new(0.5));
3733
+ rb_cvar_set(cFuzzyQuery, id_default_prefix_length,
3734
+ INT2FIX(0));
3735
+
3736
+ rb_define_singleton_method(cFuzzyQuery, "default_min_similarity",
3737
+ frb_fq_get_dms, 0);
3738
+ rb_define_singleton_method(cFuzzyQuery, "default_min_similarity=",
3739
+ frb_fq_set_dms, 1);
3740
+ rb_define_singleton_method(cFuzzyQuery, "default_prefix_length",
3741
+ frb_fq_get_dpl, 0);
3742
+ rb_define_singleton_method(cFuzzyQuery, "default_prefix_length=",
3743
+ frb_fq_set_dpl, 1);
3744
+
3745
+ rb_define_method(cFuzzyQuery, "initialize", frb_fq_init, -1);
3746
+ rb_define_method(cFuzzyQuery, "prefix_length", frb_fq_pre_len, 0);
3747
+ rb_define_method(cFuzzyQuery, "min_similarity", frb_fq_min_sim, 0);
3748
+ }
3749
+
3750
+ /*
3751
+ * Document-class: Ferret::Search::MatchAllQuery
3752
+ *
3753
+ * == Summary
3754
+ *
3755
+ * MatchAllQuery matches all documents in the index. You might want use this
3756
+ * query in combination with a filter, however, ConstantScoreQuery is
3757
+ * probably better in that circumstance.
3758
+ */
3759
+ static void
3760
+ Init_MatchAllQuery(void)
3761
+ {
3762
+ cMatchAllQuery = rb_define_class_under(mSearch, "MatchAllQuery", cQuery);
3763
+ rb_define_alloc_func(cMatchAllQuery, frb_maq_alloc);
3764
+
3765
+ rb_define_method(cMatchAllQuery, "initialize", frb_maq_init, 0);
3766
+ }
3767
+
3768
+ /*
3769
+ * Document-class: Ferret::Search::ConstantScoreQuery
3770
+ *
3771
+ * == Summary
3772
+ *
3773
+ * ConstantScoreQuery is a way to turn a Filter into a Query. It matches all
3774
+ * documents that its filter matches with a constant score. This is a very
3775
+ * fast query, particularly when run more than once (since filters are
3776
+ * cached). It is also used internally be RangeQuery.
3777
+ *
3778
+ * == Example
3779
+ *
3780
+ * Let's say for example that you often need to display all documents created
3781
+ * on or after June 1st. You could create a ConstantScoreQuery like this;
3782
+ *
3783
+ * query = ConstantScoreQuery.new(RangeFilter.new(:created_on, :>= => "200606"))
3784
+ *
3785
+ * Once this is run once the results are cached and will be returned very
3786
+ * quickly in future requests.
3787
+ */
3788
+ static void
3789
+ Init_ConstantScoreQuery(void)
3790
+ {
3791
+ cConstantScoreQuery = rb_define_class_under(mSearch,
3792
+ "ConstantScoreQuery", cQuery);
3793
+ rb_define_alloc_func(cConstantScoreQuery, frb_data_alloc);
3794
+
3795
+ rb_define_method(cConstantScoreQuery, "initialize", frb_csq_init, 1);
3796
+ }
3797
+
3798
+ /*
3799
+ * Document-class: Ferret::Search::FilteredQuery
3800
+ *
3801
+ * == Summary
3802
+ *
3803
+ * FilteredQuery offers you a way to apply a filter to a specific query.
3804
+ * The FilteredQuery would then by added to a BooleanQuery to be combined
3805
+ * with other queries. There is not much point in passing a FilteredQuery
3806
+ * directly to a Searcher#search method unless you are applying more than one
3807
+ * filter since the search method also takes a filter as a parameter.
3808
+ */
3809
+ static void
3810
+ Init_FilteredQuery(void)
3811
+ {
3812
+ cFilteredQuery = rb_define_class_under(mSearch, "FilteredQuery", cQuery);
3813
+ rb_define_alloc_func(cFilteredQuery, frb_data_alloc);
3814
+
3815
+ rb_define_method(cFilteredQuery, "initialize", frb_fqq_init, 2);
3816
+ }
3817
+
3818
+ /*
3819
+ * Document-class: Ferret::Search::Spans::SpanTermQuery
3820
+ *
3821
+ * == Summary
3822
+ *
3823
+ * A SpanTermQuery is the Spans version of TermQuery, the only difference
3824
+ * being that it returns the start and end offset of all of its matches for
3825
+ * use by enclosing SpanQueries.
3826
+ */
3827
+ static void
3828
+ Init_SpanTermQuery(void)
3829
+ {
3830
+ cSpanTermQuery = rb_define_class_under(mSpans, "SpanTermQuery", cQuery);
3831
+ rb_define_alloc_func(cSpanTermQuery, frb_data_alloc);
3832
+
3833
+ rb_define_method(cSpanTermQuery, "initialize", frb_spantq_init, 2);
3834
+ }
3835
+
3836
+ /*
3837
+ * Document-class: Ferret::Search::Spans::SpanMultiTermQuery
3838
+ *
3839
+ * == Summary
3840
+ *
3841
+ * A SpanMultiTermQuery is the Spans version of MultiTermQuery, the only
3842
+ * difference being that it returns the start and end offset of all of its
3843
+ * matches for use by enclosing SpanQueries.
3844
+ */
3845
+ static void
3846
+ Init_SpanMultiTermQuery(void)
3847
+ {
3848
+ cSpanMultiTermQuery = rb_define_class_under(mSpans, "SpanMultiTermQuery", cQuery);
3849
+ rb_define_alloc_func(cSpanMultiTermQuery, frb_data_alloc);
3850
+
3851
+ rb_define_method(cSpanMultiTermQuery, "initialize", frb_spanmtq_init, 2);
3852
+ }
3853
+
3854
+ /*
3855
+ * Document-class: Ferret::Search::Spans::SpanPrefixQuery
3856
+ *
3857
+ * == Summary
3858
+ *
3859
+ * A SpanPrefixQuery is the Spans version of PrefixQuery, the only difference
3860
+ * being that it returns the start and end offset of all of its matches for
3861
+ * use by enclosing SpanQueries.
3862
+ */
3863
+ static void
3864
+ Init_SpanPrefixQuery(void)
3865
+ {
3866
+ cSpanPrefixQuery = rb_define_class_under(mSpans, "SpanPrefixQuery", cQuery);
3867
+ rb_define_alloc_func(cSpanPrefixQuery, frb_data_alloc);
3868
+
3869
+ rb_define_method(cSpanPrefixQuery, "initialize", frb_spanprq_init, -1);
3870
+ }
3871
+
3872
+ /*
3873
+ * Document-class: Ferret::Search::Spans::SpanFirstQuery
3874
+ *
3875
+ * == Summary
3876
+ *
3877
+ * A SpanFirstQuery restricts a query to search in the first +end+ bytes of a
3878
+ * field. This is useful since often the most important information in a
3879
+ * document is at the start of the document.
3880
+ *
3881
+ * == Example
3882
+ *
3883
+ * To find all documents where "ferret" is within the first 100 characters
3884
+ * (really bytes);
3885
+ *
3886
+ * query = SpanFirstQuery.new(SpanTermQuery.new(:content, "ferret"), 100)
3887
+ *
3888
+ * == NOTE
3889
+ *
3890
+ * SpanFirstQuery only works with other SpanQueries.
3891
+ */
3892
+ static void
3893
+ Init_SpanFirstQuery(void)
3894
+ {
3895
+ cSpanFirstQuery = rb_define_class_under(mSpans, "SpanFirstQuery", cQuery);
3896
+ rb_define_alloc_func(cSpanFirstQuery, frb_data_alloc);
3897
+
3898
+ rb_define_method(cSpanFirstQuery, "initialize", frb_spanfq_init, 2);
3899
+ }
3900
+
3901
+ /*
3902
+ * Document-class: Ferret::Search::Spans::SpanNearQuery
3903
+ *
3904
+ * == Summary
3905
+ *
3906
+ * A SpanNearQuery is like a combination between a PhraseQuery and a
3907
+ * BooleanQuery. It matches sub-SpanQueries which are added as clauses but
3908
+ * those clauses must occur within a +slop+ edit distance of each other. You
3909
+ * can also specify that clauses must occur +in_order+.
3910
+ *
3911
+ * == Example
3912
+ *
3913
+ * query = SpanNearQuery.new(:slop => 2)
3914
+ * query << SpanTermQuery.new(:field, "quick")
3915
+ * query << SpanTermQuery.new(:field, "brown")
3916
+ * query << SpanTermQuery.new(:field, "fox")
3917
+ * # matches => "quick brown speckled sleepy fox"
3918
+ * |______2______^
3919
+ * # matches => "quick brown speckled fox"
3920
+ * |__1__^
3921
+ * # matches => "brown quick _____ fox"
3922
+ * ^_____2_____|
3923
+ *
3924
+ * query = SpanNearQuery.new(:slop => 2, :in_order => true)
3925
+ * query << SpanTermQuery.new(:field, "quick")
3926
+ * query << SpanTermQuery.new(:field, "brown")
3927
+ * query << SpanTermQuery.new(:field, "fox")
3928
+ * # matches => "quick brown speckled sleepy fox"
3929
+ * |______2______^
3930
+ * # matches => "quick brown speckled fox"
3931
+ * |__1__^
3932
+ * # doesn't match => "brown quick _____ fox"
3933
+ * # not in order ^_____2_____|
3934
+ *
3935
+ * == NOTE
3936
+ *
3937
+ * SpanNearQuery only works with other SpanQueries.
3938
+ */
3939
+ static void
3940
+ Init_SpanNearQuery(void)
3941
+ {
3942
+ sym_slop = ID2SYM(rb_intern("slop"));
3943
+ sym_in_order = ID2SYM(rb_intern("in_order"));
3944
+ sym_clauses = ID2SYM(rb_intern("clauses"));
3945
+
3946
+ cSpanNearQuery = rb_define_class_under(mSpans, "SpanNearQuery", cQuery);
3947
+ rb_define_alloc_func(cSpanNearQuery, frb_data_alloc);
3948
+
3949
+ rb_define_method(cSpanNearQuery, "initialize", frb_spannq_init, -1);
3950
+ rb_define_method(cSpanNearQuery, "add", frb_spannq_add, 1);
3951
+ rb_define_method(cSpanNearQuery, "<<", frb_spannq_add, 1);
3952
+ }
3953
+
3954
+ /*
3955
+ * Document-class: Ferret::Search::Spans::SpanOrQuery
3956
+ *
3957
+ * == Summary
3958
+ *
3959
+ * SpanOrQuery is just like a BooleanQuery with all +:should+ clauses.
3960
+ * However, the difference is that all sub-clauses must be SpanQueries and
3961
+ * the resulting query can then be used within other SpanQueries like
3962
+ * SpanNearQuery.
3963
+ *
3964
+ * == Example
3965
+ *
3966
+ * Combined with SpanNearQuery we can create a multi-PhraseQuery like query;
3967
+ *
3968
+ * quick_query = SpanOrQuery.new()
3969
+ * quick_query << SpanTermQuery.new(:field, "quick")
3970
+ * quick_query << SpanTermQuery.new(:field, "fast")
3971
+ * quick_query << SpanTermQuery.new(:field, "speedy")
3972
+ *
3973
+ * colour_query = SpanOrQuery.new()
3974
+ * colour_query << SpanTermQuery.new(:field, "red")
3975
+ * colour_query << SpanTermQuery.new(:field, "brown")
3976
+ *
3977
+ *
3978
+ * query = SpanNearQuery.new(:slop => 2, :in_order => true)
3979
+ * query << quick_query
3980
+ * query << colour_query
3981
+ * query << SpanTermQuery.new(:field, "fox")
3982
+ * # matches => "quick red speckled sleepy fox"
3983
+ * |______2______^
3984
+ * # matches => "speedy brown speckled fox"
3985
+ * |__1__^
3986
+ * # doesn't match => "brown fast _____ fox"
3987
+ * # not in order ^_____2____|
3988
+ *
3989
+ * == NOTE
3990
+ *
3991
+ * SpanOrQuery only works with other SpanQueries.
3992
+ */
3993
+ static void
3994
+ Init_SpanOrQuery(void)
3995
+ {
3996
+ cSpanOrQuery = rb_define_class_under(mSpans, "SpanOrQuery", cQuery);
3997
+ rb_define_alloc_func(cSpanOrQuery, frb_data_alloc);
3998
+
3999
+ rb_define_method(cSpanOrQuery, "initialize", frb_spanoq_init, -1);
4000
+ rb_define_method(cSpanOrQuery, "add", frb_spanoq_add, 1);
4001
+ rb_define_method(cSpanOrQuery, "<<", frb_spanoq_add, 1);
4002
+ }
4003
+
4004
+ /*
4005
+ * Document-class: Ferret::Search::Spans::SpanNotQuery
4006
+ *
4007
+ * == Summary
4008
+ *
4009
+ * SpanNotQuery is like a BooleanQuery with a +:must_not+ clause. The
4010
+ * difference being that the resulting query can be used in another
4011
+ * SpanQuery.
4012
+ *
4013
+ * == Example
4014
+ *
4015
+ * Let's say you wanted to search for all documents with the term "rails"
4016
+ * near the start but without the term "train" near the start. This would
4017
+ * allow the term "train" to occur later on in the document.
4018
+ *
4019
+ * rails_query = SpanFirstQuery.new(FrtSpanTermQuery.new(:content, "rails"), 100)
4020
+ * train_query = SpanFirstQuery.new(FrtSpanTermQuery.new(:content, "train"), 100)
4021
+ * query = SpanNotQuery.new(rails_query, train_query)
4022
+ *
4023
+ * == NOTE
4024
+ *
4025
+ * SpanOrQuery only works with other SpanQueries.
4026
+ */
4027
+ static void
4028
+ Init_SpanNotQuery(void)
4029
+ {
4030
+ cSpanNotQuery = rb_define_class_under(mSpans, "SpanNotQuery", cQuery);
4031
+ rb_define_alloc_func(cSpanNotQuery, frb_data_alloc);
4032
+
4033
+ rb_define_method(cSpanNotQuery, "initialize", frb_spanxq_init, 2);
4034
+ }
4035
+
4036
+ /* rdoc hack
4037
+ extern VALUE mFerret = rb_define_module("Ferret");
4038
+ extern VALUE mSearch = rb_define_module_under(mFerret, "Search");
4039
+ */
4040
+
4041
+ /*
4042
+ * Document-module: Ferret::Search::Spans
4043
+ *
4044
+ * == Summary
4045
+ *
4046
+ * The Spans module contains a number of SpanQueries. SpanQueries, unlike
4047
+ * regular queries, also return the start and end offsets of all of their
4048
+ * matches so they can be used to limit queries to a certain position in the
4049
+ * field. They are often used in combination to perform special types of
4050
+ * PhraseQuery.
4051
+ */
4052
+ static void
4053
+ Init_Spans(void)
4054
+ {
4055
+ mSpans = rb_define_module_under(mSearch, "Spans");
4056
+ Init_SpanTermQuery();
4057
+ Init_SpanMultiTermQuery();
4058
+ Init_SpanPrefixQuery();
4059
+ Init_SpanFirstQuery();
4060
+ Init_SpanNearQuery();
4061
+ Init_SpanOrQuery();
4062
+ Init_SpanNotQuery();
4063
+ }
4064
+
4065
+ /*
4066
+ * Document-class: Ferret::Search::RangeFilter
4067
+ *
4068
+ * == Summary
4069
+ *
4070
+ * RangeFilter filters a set of documents which contain a lexicographical
4071
+ * range of terms (ie "aaa", "aab", "aac", etc). See also RangeQuery
4072
+ *
4073
+ * == Example
4074
+ *
4075
+ * Find all documents created before 5th of September 2002.
4076
+ *
4077
+ * filter = RangeFilter.new(:created_on, :< => "20020905")
4078
+ *
4079
+ * == Number fields
4080
+ *
4081
+ * See RangeQuery for notes on how to use the RangeFilter on a field
4082
+ * containing numbers.
4083
+ */
4084
+ static void
4085
+ Init_RangeFilter(void)
4086
+ {
4087
+ cRangeFilter = rb_define_class_under(mSearch, "RangeFilter", cFilter);
4088
+ frb_mark_cclass(cRangeFilter);
4089
+ rb_define_alloc_func(cRangeFilter, frb_data_alloc);
4090
+
4091
+ rb_define_method(cRangeFilter, "initialize", frb_rf_init, 2);
4092
+ }
4093
+
4094
+ /*
4095
+ * Document-class: Ferret::Search::TypedRangeFilter
4096
+ *
4097
+ * == Summary
4098
+ *
4099
+ * TypedRangeFilter filters a set of documents which contain a
4100
+ * lexicographical range of terms (ie "aaa", "aab", "aac", etc), unless the
4101
+ * range boundaries happen to be numbers (positive, negative, integer,
4102
+ * float), in which case a numerical filter is applied. See also
4103
+ * TypedRangeQuery
4104
+ *
4105
+ * == Example
4106
+ *
4107
+ * Find all products that cost less than or equal to $50.00.
4108
+ *
4109
+ * filter = TypedRangeFilter.new(:created_on, :<= => "50.00")
4110
+ */
4111
+ static void
4112
+ Init_TypedRangeFilter(void)
4113
+ {
4114
+ cTypedRangeFilter =
4115
+ rb_define_class_under(mSearch, "TypedRangeFilter", cFilter);
4116
+ frb_mark_cclass(cTypedRangeFilter);
4117
+ rb_define_alloc_func(cTypedRangeFilter, frb_data_alloc);
4118
+
4119
+ rb_define_method(cTypedRangeFilter, "initialize", frb_trf_init, 2);
4120
+ }
4121
+
4122
+ /*
4123
+ * Document-class: Ferret::Search::QueryFilter
4124
+ *
4125
+ * == Summary
4126
+ *
4127
+ * QueryFilter can be used to restrict one queries results by another queries
4128
+ * results, basically "and"ing them together. Of course you could easily use
4129
+ * a BooleanQuery to do this. The reason you may choose to use a QueryFilter
4130
+ * is that Filter results are cached so if you have one query that is often
4131
+ * added to other queries you may want to use a QueryFilter for performance
4132
+ * reasons.
4133
+ *
4134
+ * == Example
4135
+ *
4136
+ * Let's say you have a field +:approved+ which you set to yes when a
4137
+ * document is approved for display. You'll probably want to add a Filter
4138
+ * which filters approved documents to display to your users. This is the
4139
+ * perfect use case for a QueryFilter.
4140
+ *
4141
+ * filter = QueryFilter.new(TermQuery.new(:approved, "yes"))
4142
+ *
4143
+ * Just remember to use the same QueryFilter each time to take advantage of
4144
+ * caching. Don't create a new one for each request. Of course, this won't
4145
+ * work in a CGI application.
4146
+ */
4147
+ static void
4148
+ Init_QueryFilter(void)
4149
+ {
4150
+ cQueryFilter = rb_define_class_under(mSearch, "QueryFilter", cFilter);
4151
+ frb_mark_cclass(cQueryFilter);
4152
+ rb_define_alloc_func(cQueryFilter, frb_data_alloc);
4153
+
4154
+ rb_define_method(cQueryFilter, "initialize", frb_qf_init, 1);
4155
+ }
4156
+
4157
+ /*
4158
+ * Document-class: Ferret::Search::Filter
4159
+ *
4160
+ * == Summary
4161
+ *
4162
+ * A Filter is used to filter query results. It is usually passed to one of
4163
+ * Searcher's search methods however it can also be used inside a
4164
+ * ConstantScoreQuery or a FilteredQuery. To implement your own Filter you
4165
+ * must implement the method #get_bitvector(index_reader) which returns a
4166
+ * BitVector with set bits corresponding to documents that are allowed by
4167
+ * this Filter.
4168
+ *
4169
+ * TODO add support for user implemented Filter.
4170
+ * TODO add example of user implemented Filter.
4171
+ */
4172
+ static void
4173
+ Init_Filter(void)
4174
+ {
4175
+ id_bits = rb_intern("bits");
4176
+ cFilter = rb_define_class_under(mSearch, "Filter", rb_cObject);
4177
+ frb_mark_cclass(cFilter);
4178
+ rb_define_alloc_func(cConstantScoreQuery, frb_data_alloc);
4179
+
4180
+ rb_define_method(cFilter, "bits", frb_f_get_bits, 1);
4181
+ rb_define_method(cFilter, "to_s", frb_f_to_s, 0);
4182
+ }
4183
+
4184
+ /*
4185
+ * Document-class: Ferret::Search::SortField
4186
+ *
4187
+ * == Summary
4188
+ *
4189
+ * A SortField is used to sort the result-set of a search be the contents of
4190
+ * a field. The following types of sort_field are available;
4191
+ *
4192
+ * * :auto
4193
+ * * :integer
4194
+ * * :float
4195
+ * * :string
4196
+ * * :byte
4197
+ * * :doc_id
4198
+ * * :score
4199
+ *
4200
+ * The type of the SortField is set by passing it as a parameter to the
4201
+ * constructor. The +:auto+ type specifies that the SortField should detect
4202
+ * the sort type by looking at the data in the field. This is the default
4203
+ * :type value although it is recommended that you explicitly specify the
4204
+ * fields type.
4205
+ *
4206
+ * == Example
4207
+ *
4208
+ * title_sf = SortField.new(:title, :type => :string)
4209
+ * rating_sf = SortField.new(:rating, :type => float, :reverse => true)
4210
+ *
4211
+ *
4212
+ * Note 1: Care should be taken when using the :auto sort-type since numbers
4213
+ * will occur before other strings in the index so if you are sorting a field
4214
+ * with both numbers and strings (like a title field which might have "24"
4215
+ * and "Prison Break") then the sort_field will think it is sorting integers
4216
+ * when it really should be sorting strings.
4217
+ *
4218
+ * Note 2: When sorting by integer, integers are only 4 bytes so anything
4219
+ * larger will cause strange sorting behaviour.
4220
+ */
4221
+ static void
4222
+ Init_SortField(void)
4223
+ {
4224
+ /* option hash keys for SortField#initialize */
4225
+ sym_type = ID2SYM(rb_intern("type"));
4226
+ sym_reverse = ID2SYM(rb_intern("reverse"));
4227
+ sym_comparator = ID2SYM(rb_intern("comparator"));
4228
+
4229
+ /* Sort types */
4230
+ sym_integer = ID2SYM(rb_intern("integer"));
4231
+ sym_float = ID2SYM(rb_intern("float"));
4232
+ sym_string = ID2SYM(rb_intern("string"));
4233
+ sym_auto = ID2SYM(rb_intern("auto"));
4234
+ sym_doc_id = ID2SYM(rb_intern("doc_id"));
4235
+ sym_score = ID2SYM(rb_intern("score"));
4236
+ sym_byte = ID2SYM(rb_intern("byte"));
4237
+
4238
+ cSortField = rb_define_class_under(mSearch, "SortField", rb_cObject);
4239
+ rb_define_alloc_func(cSortField, frb_data_alloc);
4240
+
4241
+ rb_define_method(cSortField, "initialize", frb_sf_init, -1);
4242
+ rb_define_method(cSortField, "reverse?", frb_sf_is_reverse, 0);
4243
+ rb_define_method(cSortField, "name", frb_sf_get_name, 0);
4244
+ rb_define_method(cSortField, "type", frb_sf_get_type, 0);
4245
+ rb_define_method(cSortField, "comparator", frb_sf_get_comparator, 0);
4246
+ rb_define_method(cSortField, "to_s", frb_sf_to_s, 0);
4247
+
4248
+ rb_define_const(cSortField, "SCORE",
4249
+ Data_Wrap_Struct(cSortField, NULL,
4250
+ &frb_deref_free,
4251
+ (FrtSortField *)&FRT_SORT_FIELD_SCORE));
4252
+ object_add((FrtSortField *)&FRT_SORT_FIELD_SCORE,
4253
+ rb_const_get(cSortField, rb_intern("SCORE")));
4254
+
4255
+ rb_define_const(cSortField, "SCORE_REV",
4256
+ Data_Wrap_Struct(cSortField, NULL,
4257
+ &frb_deref_free,
4258
+ (FrtSortField *)&FRT_SORT_FIELD_SCORE_REV));
4259
+ object_add((FrtSortField *)&FRT_SORT_FIELD_SCORE_REV,
4260
+ rb_const_get(cSortField, rb_intern("SCORE_REV")));
4261
+
4262
+ rb_define_const(cSortField, "DOC_ID",
4263
+ Data_Wrap_Struct(cSortField, NULL,
4264
+ &frb_deref_free,
4265
+ (FrtSortField *)&FRT_SORT_FIELD_DOC));
4266
+
4267
+ oSORT_FIELD_DOC = rb_const_get(cSortField, rb_intern("DOC_ID"));
4268
+ object_add((FrtSortField *)&FRT_SORT_FIELD_DOC, oSORT_FIELD_DOC);
4269
+
4270
+ rb_define_const(cSortField, "DOC_ID_REV",
4271
+ Data_Wrap_Struct(cSortField, NULL,
4272
+ &frb_deref_free,
4273
+ (FrtSortField *)&FRT_SORT_FIELD_DOC_REV));
4274
+ object_add((FrtSortField *)&FRT_SORT_FIELD_DOC_REV,
4275
+ rb_const_get(cSortField, rb_intern("DOC_ID_REV")));
4276
+ }
4277
+
4278
+ /*
4279
+ * Document-class: Ferret::Search::Sort
4280
+ *
4281
+ * == Summary
4282
+ *
4283
+ * A Sort object is used to combine and apply a list of SortFields. The
4284
+ * SortFields are applied in the order they are added to the SortObject.
4285
+ *
4286
+ * == Example
4287
+ *
4288
+ * Here is how you would create a Sort object that sorts first by rating and
4289
+ * then by title;
4290
+ *
4291
+ * sf_rating = SortField.new(:rating, :type => :float, :reverse => true)
4292
+ * sf_title = SortField.new(:title, :type => :string)
4293
+ * sort = Sort.new([sf_rating, sf_title])
4294
+ *
4295
+ * Remember that the :type parameter for SortField is set to :auto be default
4296
+ * be I strongly recommend you specify a :type value.
4297
+ */
4298
+ static void
4299
+ Init_Sort(void)
4300
+ {
4301
+ /* Sort */
4302
+ cSort = rb_define_class_under(mSearch, "Sort", rb_cObject);
4303
+ rb_define_alloc_func(cSort, frb_sort_alloc);
4304
+
4305
+ rb_define_method(cSort, "initialize", frb_sort_init, -1);
4306
+ rb_define_method(cSort, "fields", frb_sort_get_fields, 0);
4307
+ rb_define_method(cSort, "to_s", frb_sort_to_s, 0);
4308
+
4309
+ rb_define_const(cSort, "RELEVANCE",
4310
+ frb_sort_init(0, NULL, frb_sort_alloc(cSort)));
4311
+ rb_define_const(cSort, "INDEX_ORDER",
4312
+ frb_sort_init(1, &oSORT_FIELD_DOC, frb_sort_alloc(cSort)));
4313
+ }
4314
+
4315
+ /*
4316
+ * Document-class: Ferret::Search::Searcher
4317
+ *
4318
+ * == Summary
4319
+ *
4320
+ * The Searcher class basically performs the task that Ferret was built for.
4321
+ * It searches the index. To search the index the Searcher class wraps an
4322
+ * IndexReader so many of the tasks that you can perform on an IndexReader
4323
+ * are also available on a searcher including, most importantly, accessing
4324
+ * stored documents.
4325
+ *
4326
+ * The main methods that you need to know about when using a Searcher are the
4327
+ * search methods. There is the Searcher#search_each method which iterates
4328
+ * through the results by document id and score and there is the
4329
+ * Searcher#search method which returns a TopDocs object. Another important
4330
+ * difference to note is that the Searcher#search_each method normalizes the
4331
+ * score to a value in the range 0.0..1.0 if the max_score is greater than
4332
+ * 1.0. Searcher#search does not. Apart from that they take the same
4333
+ * parameters and work the same way.
4334
+ *
4335
+ * == Example
4336
+ *
4337
+ * searcher = Searcher.new("/path/to/index")
4338
+ *
4339
+ * searcher.search_each(TermQuery.new(:content, "ferret")
4340
+ * :filter => RangeFilter.new(:date, :< => "2006"),
4341
+ * :sort => "date DESC, title") do |doc_id, score|
4342
+ * puts "#{searcher[doc_id][title] scored #{score}"
4343
+ * end
4344
+ */
4345
+ static void
4346
+ Init_Searcher(void)
4347
+ {
4348
+ /* option hash keys for Searcher#search */
4349
+ sym_offset = ID2SYM(rb_intern("offset"));
4350
+ sym_limit = ID2SYM(rb_intern("limit"));
4351
+ sym_start_doc = ID2SYM(rb_intern("start_doc"));
4352
+ sym_all = ID2SYM(rb_intern("all"));
4353
+ sym_filter = ID2SYM(rb_intern("filter"));
4354
+ sym_filter_proc = ID2SYM(rb_intern("filter_proc"));
4355
+ sym_c_filter_proc = ID2SYM(rb_intern("c_filter_proc"));
4356
+ sym_sort = ID2SYM(rb_intern("sort"));
4357
+
4358
+ sym_excerpt_length = ID2SYM(rb_intern("excerpt_length"));
4359
+ sym_num_excerpts = ID2SYM(rb_intern("num_excerpts"));
4360
+ sym_pre_tag = ID2SYM(rb_intern("pre_tag"));
4361
+ sym_post_tag = ID2SYM(rb_intern("post_tag"));
4362
+ sym_ellipsis = ID2SYM(rb_intern("ellipsis"));
4363
+
4364
+ /* Searcher */
4365
+ cSearcher = rb_define_class_under(mSearch, "Searcher", rb_cObject);
4366
+ rb_define_alloc_func(cSearcher, frb_data_alloc);
4367
+
4368
+ rb_define_method(cSearcher, "initialize", frb_sea_init, 1);
4369
+ rb_define_method(cSearcher, "close", frb_sea_close, 0);
4370
+ rb_define_method(cSearcher, "reader", frb_sea_get_reader, 0);
4371
+ rb_define_method(cSearcher, "doc_freq", frb_sea_doc_freq, 2);
4372
+ rb_define_method(cSearcher, "get_document", frb_sea_doc, 1);
4373
+ rb_define_method(cSearcher, "[]", frb_sea_doc, 1);
4374
+ rb_define_method(cSearcher, "max_doc", frb_sea_max_doc, 0);
4375
+ rb_define_method(cSearcher, "search", frb_sea_search, -1);
4376
+ rb_define_method(cSearcher, "search_each", frb_sea_search_each, -1);
4377
+ rb_define_method(cSearcher, "scan", frb_sea_scan, -1);
4378
+ rb_define_method(cSearcher, "explain", frb_sea_explain, 2);
4379
+ rb_define_method(cSearcher, "highlight", frb_sea_highlight, -1);
4380
+ }
4381
+
4382
+ /*
4383
+ * Document-class: Ferret::Search::MultiSearcher
4384
+ *
4385
+ * == Summary
4386
+ *
4387
+ * See Searcher for the methods that you can use on this object. A
4388
+ * MultiSearcher is used to search multiple sub-searchers. The most efficient
4389
+ * way to do this would be to open up an IndexReader on multiple directories
4390
+ * and creating a Searcher with that. However, if you decide to implement a
4391
+ * RemoteSearcher, the MultiSearcher can be used to search multiple machines
4392
+ * at once.
4393
+ */
4394
+ static void
4395
+ Init_MultiSearcher(void)
4396
+ {
4397
+ cMultiSearcher = rb_define_class_under(mSearch, "MultiSearcher", cSearcher);
4398
+ rb_define_alloc_func(cMultiSearcher, frb_data_alloc);
4399
+ rb_define_method(cMultiSearcher, "initialize", frb_ms_init, -1);
4400
+ }
4401
+
4402
+ /*
4403
+ * Document-module: Ferret::Search
4404
+ *
4405
+ * == Summary
4406
+ *
4407
+ * The Search module contains all the classes used for searching the index;
4408
+ * what Ferret was designed to do. The important classes to take a look at in
4409
+ * this module are (in order);
4410
+ *
4411
+ * * Query
4412
+ * * Searcher
4413
+ * * Filter
4414
+ * * Sort
4415
+ *
4416
+ * Happy Ferreting!!
4417
+ */
4418
+ void
4419
+ Init_Search(void)
4420
+ {
4421
+ mSearch = rb_define_module_under(mFerret, "Search");
4422
+
4423
+ fsym_id = rb_intern("id");
4424
+
4425
+ Init_Hit();
4426
+ Init_TopDocs();
4427
+ Init_Explanation();
4428
+
4429
+ /* Queries */
4430
+ Init_Query();
4431
+
4432
+ Init_TermQuery();
4433
+ Init_MultiTermQuery();
4434
+ Init_BooleanQuery();
4435
+ Init_RangeQuery();
4436
+ Init_TypedRangeQuery();
4437
+ Init_PhraseQuery();
4438
+ Init_PrefixQuery();
4439
+ Init_WildcardQuery();
4440
+ Init_FuzzyQuery();
4441
+ Init_MatchAllQuery();
4442
+ Init_ConstantScoreQuery();
4443
+ Init_FilteredQuery();
4444
+
4445
+ Init_Spans();
4446
+
4447
+ /* Filters */
4448
+ Init_Filter();
4449
+ Init_RangeFilter();
4450
+ Init_TypedRangeFilter();
4451
+ Init_QueryFilter();
4452
+
4453
+ /* Sorting */
4454
+ Init_SortField(); /* must be before Init_Sort */
4455
+ Init_Sort();
4456
+
4457
+ /* Searchers */
4458
+ Init_Searcher();
4459
+ Init_MultiSearcher();
4460
+ }