isomorfeus-ferret 0.12.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (222) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +612 -0
  3. data/README.md +44 -0
  4. data/ext/isomorfeus_ferret_ext/benchmark.c +223 -0
  5. data/ext/isomorfeus_ferret_ext/benchmark.h +45 -0
  6. data/ext/isomorfeus_ferret_ext/benchmarks_all.h +25 -0
  7. data/ext/isomorfeus_ferret_ext/bm_bitvector.c +123 -0
  8. data/ext/isomorfeus_ferret_ext/bm_hash.c +118 -0
  9. data/ext/isomorfeus_ferret_ext/bm_micro_string.c +40 -0
  10. data/ext/isomorfeus_ferret_ext/bm_store.c +93 -0
  11. data/ext/isomorfeus_ferret_ext/email.rl +21 -0
  12. data/ext/isomorfeus_ferret_ext/extconf.rb +5 -0
  13. data/ext/isomorfeus_ferret_ext/fio_tmpfile.h +53 -0
  14. data/ext/isomorfeus_ferret_ext/frb_analysis.c +2577 -0
  15. data/ext/isomorfeus_ferret_ext/frb_index.c +3457 -0
  16. data/ext/isomorfeus_ferret_ext/frb_lang.c +9 -0
  17. data/ext/isomorfeus_ferret_ext/frb_lang.h +17 -0
  18. data/ext/isomorfeus_ferret_ext/frb_qparser.c +629 -0
  19. data/ext/isomorfeus_ferret_ext/frb_search.c +4460 -0
  20. data/ext/isomorfeus_ferret_ext/frb_store.c +515 -0
  21. data/ext/isomorfeus_ferret_ext/frb_threading.h +30 -0
  22. data/ext/isomorfeus_ferret_ext/frb_utils.c +1127 -0
  23. data/ext/isomorfeus_ferret_ext/frt_analysis.c +1644 -0
  24. data/ext/isomorfeus_ferret_ext/frt_analysis.h +247 -0
  25. data/ext/isomorfeus_ferret_ext/frt_array.c +124 -0
  26. data/ext/isomorfeus_ferret_ext/frt_array.h +54 -0
  27. data/ext/isomorfeus_ferret_ext/frt_bitvector.c +95 -0
  28. data/ext/isomorfeus_ferret_ext/frt_bitvector.h +586 -0
  29. data/ext/isomorfeus_ferret_ext/frt_compound_io.c +374 -0
  30. data/ext/isomorfeus_ferret_ext/frt_config.h +44 -0
  31. data/ext/isomorfeus_ferret_ext/frt_document.c +134 -0
  32. data/ext/isomorfeus_ferret_ext/frt_document.h +52 -0
  33. data/ext/isomorfeus_ferret_ext/frt_except.c +95 -0
  34. data/ext/isomorfeus_ferret_ext/frt_except.h +188 -0
  35. data/ext/isomorfeus_ferret_ext/frt_field_index.c +233 -0
  36. data/ext/isomorfeus_ferret_ext/frt_field_index.h +42 -0
  37. data/ext/isomorfeus_ferret_ext/frt_filter.c +157 -0
  38. data/ext/isomorfeus_ferret_ext/frt_fs_store.c +502 -0
  39. data/ext/isomorfeus_ferret_ext/frt_global.c +427 -0
  40. data/ext/isomorfeus_ferret_ext/frt_global.h +290 -0
  41. data/ext/isomorfeus_ferret_ext/frt_hash.c +518 -0
  42. data/ext/isomorfeus_ferret_ext/frt_hash.h +466 -0
  43. data/ext/isomorfeus_ferret_ext/frt_hashset.c +191 -0
  44. data/ext/isomorfeus_ferret_ext/frt_hashset.h +206 -0
  45. data/ext/isomorfeus_ferret_ext/frt_helper.c +62 -0
  46. data/ext/isomorfeus_ferret_ext/frt_helper.h +13 -0
  47. data/ext/isomorfeus_ferret_ext/frt_ind.c +353 -0
  48. data/ext/isomorfeus_ferret_ext/frt_ind.h +54 -0
  49. data/ext/isomorfeus_ferret_ext/frt_index.c +6377 -0
  50. data/ext/isomorfeus_ferret_ext/frt_index.h +880 -0
  51. data/ext/isomorfeus_ferret_ext/frt_lang.c +104 -0
  52. data/ext/isomorfeus_ferret_ext/frt_lang.h +44 -0
  53. data/ext/isomorfeus_ferret_ext/frt_mempool.c +87 -0
  54. data/ext/isomorfeus_ferret_ext/frt_mempool.h +33 -0
  55. data/ext/isomorfeus_ferret_ext/frt_multimapper.c +349 -0
  56. data/ext/isomorfeus_ferret_ext/frt_multimapper.h +52 -0
  57. data/ext/isomorfeus_ferret_ext/frt_posh.c +1006 -0
  58. data/ext/isomorfeus_ferret_ext/frt_posh.h +973 -0
  59. data/ext/isomorfeus_ferret_ext/frt_priorityqueue.c +147 -0
  60. data/ext/isomorfeus_ferret_ext/frt_priorityqueue.h +147 -0
  61. data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +1612 -0
  62. data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +157 -0
  63. data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +209 -0
  64. data/ext/isomorfeus_ferret_ext/frt_q_fuzzy.c +281 -0
  65. data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +147 -0
  66. data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +672 -0
  67. data/ext/isomorfeus_ferret_ext/frt_q_parser.c +3084 -0
  68. data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +1182 -0
  69. data/ext/isomorfeus_ferret_ext/frt_q_prefix.c +98 -0
  70. data/ext/isomorfeus_ferret_ext/frt_q_range.c +665 -0
  71. data/ext/isomorfeus_ferret_ext/frt_q_span.c +2386 -0
  72. data/ext/isomorfeus_ferret_ext/frt_q_term.c +311 -0
  73. data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +166 -0
  74. data/ext/isomorfeus_ferret_ext/frt_ram_store.c +460 -0
  75. data/ext/isomorfeus_ferret_ext/frt_scanner.c +899 -0
  76. data/ext/isomorfeus_ferret_ext/frt_scanner.h +28 -0
  77. data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +6705 -0
  78. data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +4419 -0
  79. data/ext/isomorfeus_ferret_ext/frt_search.c +1824 -0
  80. data/ext/isomorfeus_ferret_ext/frt_search.h +924 -0
  81. data/ext/isomorfeus_ferret_ext/frt_similarity.c +150 -0
  82. data/ext/isomorfeus_ferret_ext/frt_similarity.h +79 -0
  83. data/ext/isomorfeus_ferret_ext/frt_sort.c +796 -0
  84. data/ext/isomorfeus_ferret_ext/frt_stopwords.c +395 -0
  85. data/ext/isomorfeus_ferret_ext/frt_store.c +680 -0
  86. data/ext/isomorfeus_ferret_ext/frt_store.h +789 -0
  87. data/ext/isomorfeus_ferret_ext/frt_term_vectors.c +72 -0
  88. data/ext/isomorfeus_ferret_ext/frt_threading.h +23 -0
  89. data/ext/isomorfeus_ferret_ext/frt_win32.h +54 -0
  90. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +409 -0
  91. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +95 -0
  92. data/ext/isomorfeus_ferret_ext/libstemmer.c +93 -0
  93. data/ext/isomorfeus_ferret_ext/libstemmer.h +73 -0
  94. data/ext/isomorfeus_ferret_ext/q_parser.y +1366 -0
  95. data/ext/isomorfeus_ferret_ext/scanner.h +28 -0
  96. data/ext/isomorfeus_ferret_ext/scanner.in +43 -0
  97. data/ext/isomorfeus_ferret_ext/scanner.rl +84 -0
  98. data/ext/isomorfeus_ferret_ext/scanner_mb.rl +200 -0
  99. data/ext/isomorfeus_ferret_ext/scanner_utf8.rl +85 -0
  100. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +324 -0
  101. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +7 -0
  102. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +610 -0
  103. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +6 -0
  104. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +1104 -0
  105. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +6 -0
  106. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +749 -0
  107. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +7 -0
  108. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +1233 -0
  109. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +6 -0
  110. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +490 -0
  111. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +6 -0
  112. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +1217 -0
  113. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.h +7 -0
  114. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +1052 -0
  115. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +6 -0
  116. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +283 -0
  117. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +6 -0
  118. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +735 -0
  119. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +6 -0
  120. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +1003 -0
  121. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +7 -0
  122. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +1079 -0
  123. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +6 -0
  124. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +293 -0
  125. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +6 -0
  126. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +984 -0
  127. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +6 -0
  128. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +686 -0
  129. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +6 -0
  130. data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.c +325 -0
  131. data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.h +6 -0
  132. data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.c +620 -0
  133. data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.h +6 -0
  134. data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.c +1111 -0
  135. data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.h +6 -0
  136. data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.c +754 -0
  137. data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.h +6 -0
  138. data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.c +1242 -0
  139. data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.h +6 -0
  140. data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.c +495 -0
  141. data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.h +6 -0
  142. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.c +1220 -0
  143. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.h +6 -0
  144. data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.c +1059 -0
  145. data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.h +6 -0
  146. data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.c +285 -0
  147. data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.h +6 -0
  148. data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.c +741 -0
  149. data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.h +6 -0
  150. data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.c +1009 -0
  151. data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.h +6 -0
  152. data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.c +990 -0
  153. data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.h +6 -0
  154. data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.c +680 -0
  155. data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.h +6 -0
  156. data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.c +1083 -0
  157. data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.h +6 -0
  158. data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.c +294 -0
  159. data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.h +6 -0
  160. data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.c +2191 -0
  161. data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.h +6 -0
  162. data/ext/isomorfeus_ferret_ext/stem_api.c +66 -0
  163. data/ext/isomorfeus_ferret_ext/stem_api.h +26 -0
  164. data/ext/isomorfeus_ferret_ext/stem_header.h +57 -0
  165. data/ext/isomorfeus_ferret_ext/stem_modules.h +190 -0
  166. data/ext/isomorfeus_ferret_ext/stem_modules.txt +50 -0
  167. data/ext/isomorfeus_ferret_ext/stem_utilities.c +478 -0
  168. data/ext/isomorfeus_ferret_ext/test.c +850 -0
  169. data/ext/isomorfeus_ferret_ext/test.h +416 -0
  170. data/ext/isomorfeus_ferret_ext/test_1710.c +63 -0
  171. data/ext/isomorfeus_ferret_ext/test_analysis.c +1221 -0
  172. data/ext/isomorfeus_ferret_ext/test_array.c +272 -0
  173. data/ext/isomorfeus_ferret_ext/test_bitvector.c +600 -0
  174. data/ext/isomorfeus_ferret_ext/test_compound_io.c +170 -0
  175. data/ext/isomorfeus_ferret_ext/test_document.c +156 -0
  176. data/ext/isomorfeus_ferret_ext/test_except.c +244 -0
  177. data/ext/isomorfeus_ferret_ext/test_fields.c +522 -0
  178. data/ext/isomorfeus_ferret_ext/test_file_deleter.c +185 -0
  179. data/ext/isomorfeus_ferret_ext/test_filter.c +331 -0
  180. data/ext/isomorfeus_ferret_ext/test_fs_store.c +25 -0
  181. data/ext/isomorfeus_ferret_ext/test_global.c +299 -0
  182. data/ext/isomorfeus_ferret_ext/test_hash.c +485 -0
  183. data/ext/isomorfeus_ferret_ext/test_hashset.c +288 -0
  184. data/ext/isomorfeus_ferret_ext/test_helper.c +47 -0
  185. data/ext/isomorfeus_ferret_ext/test_highlighter.c +548 -0
  186. data/ext/isomorfeus_ferret_ext/test_index.c +2323 -0
  187. data/ext/isomorfeus_ferret_ext/test_lang.c +74 -0
  188. data/ext/isomorfeus_ferret_ext/test_mempool.c +102 -0
  189. data/ext/isomorfeus_ferret_ext/test_multimapper.c +64 -0
  190. data/ext/isomorfeus_ferret_ext/test_priorityqueue.c +213 -0
  191. data/ext/isomorfeus_ferret_ext/test_q_const_score.c +84 -0
  192. data/ext/isomorfeus_ferret_ext/test_q_filtered.c +61 -0
  193. data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +241 -0
  194. data/ext/isomorfeus_ferret_ext/test_q_parser.c +464 -0
  195. data/ext/isomorfeus_ferret_ext/test_q_span.c +575 -0
  196. data/ext/isomorfeus_ferret_ext/test_ram_store.c +77 -0
  197. data/ext/isomorfeus_ferret_ext/test_search.c +1874 -0
  198. data/ext/isomorfeus_ferret_ext/test_segments.c +167 -0
  199. data/ext/isomorfeus_ferret_ext/test_similarity.c +25 -0
  200. data/ext/isomorfeus_ferret_ext/test_sort.c +333 -0
  201. data/ext/isomorfeus_ferret_ext/test_store.c +591 -0
  202. data/ext/isomorfeus_ferret_ext/test_store.h +3 -0
  203. data/ext/isomorfeus_ferret_ext/test_term.c +351 -0
  204. data/ext/isomorfeus_ferret_ext/test_term_vectors.c +373 -0
  205. data/ext/isomorfeus_ferret_ext/test_test.c +83 -0
  206. data/ext/isomorfeus_ferret_ext/test_threading.c +188 -0
  207. data/ext/isomorfeus_ferret_ext/testhelper.c +561 -0
  208. data/ext/isomorfeus_ferret_ext/testhelper.h +25 -0
  209. data/ext/isomorfeus_ferret_ext/tests_all.h +87 -0
  210. data/ext/isomorfeus_ferret_ext/uchar-ucs4.rl +1854 -0
  211. data/ext/isomorfeus_ferret_ext/uchar-utf8.rl +1999 -0
  212. data/ext/isomorfeus_ferret_ext/url.rl +27 -0
  213. data/ext/isomorfeus_ferret_ext/word_list.h +15156 -0
  214. data/lib/isomorfeus/ferret/document.rb +132 -0
  215. data/lib/isomorfeus/ferret/field_symbol.rb +85 -0
  216. data/lib/isomorfeus/ferret/index/field_infos.rb +48 -0
  217. data/lib/isomorfeus/ferret/index/index.rb +970 -0
  218. data/lib/isomorfeus/ferret/monitor.rb +323 -0
  219. data/lib/isomorfeus/ferret/stdlib_patches.rb +151 -0
  220. data/lib/isomorfeus/ferret/version.rb +5 -0
  221. data/lib/isomorfeus-ferret.rb +8 -0
  222. metadata +307 -0
@@ -0,0 +1,665 @@
1
+ #include <string.h>
2
+ #include "frt_global.h"
3
+ #include "frt_search.h"
4
+
5
+ /*****************************************************************************
6
+ *
7
+ * Range
8
+ *
9
+ *****************************************************************************/
10
+
11
+ typedef struct Range
12
+ {
13
+ FrtSymbol field;
14
+ char *lower_term;
15
+ char *upper_term;
16
+ bool include_lower : 1;
17
+ bool include_upper : 1;
18
+ } Range;
19
+
20
+ static char *range_to_s(Range *range, FrtSymbol default_field, float boost)
21
+ {
22
+ char *buffer, *b;
23
+ size_t flen, llen, ulen;
24
+ const char *field_name = rb_id2name(range->field);
25
+
26
+ flen = strlen(field_name);
27
+ llen = range->lower_term ? strlen(range->lower_term) : 0;
28
+ ulen = range->upper_term ? strlen(range->upper_term) : 0;
29
+ buffer = FRT_ALLOC_N(char, flen + llen + ulen + 40);
30
+ b = buffer;
31
+
32
+ if (default_field != range->field) {
33
+ memcpy(buffer, field_name, flen * sizeof(char));
34
+ b += flen;
35
+ *b = ':';
36
+ b++;
37
+ }
38
+
39
+ if (range->lower_term) {
40
+ *b = range->include_lower ? '[' : '{';
41
+ b++;
42
+ memcpy(b, range->lower_term, llen);
43
+ b += llen;
44
+ } else {
45
+ *b = '<';
46
+ b++;
47
+ }
48
+
49
+ if (range->upper_term && range->lower_term) {
50
+ *b = ' '; b++;
51
+ }
52
+
53
+ if (range->upper_term) {
54
+ memcpy(b, range->upper_term, ulen);
55
+ b += ulen;
56
+ *b = range->include_upper ? ']' : '}';
57
+ b++;
58
+ } else {
59
+ *b = '>';
60
+ b++;
61
+ }
62
+
63
+ *b = 0;
64
+ if (boost != 1.0) {
65
+ *b = '^';
66
+ frt_dbl_to_s(b + 1, boost);
67
+ }
68
+ return buffer;
69
+ }
70
+
71
+ static void range_destroy(Range *range)
72
+ {
73
+ free(range->lower_term);
74
+ free(range->upper_term);
75
+ free(range);
76
+ }
77
+
78
+ static unsigned long long range_hash(Range *filt)
79
+ {
80
+ return filt->include_lower | (filt->include_upper << 1)
81
+ | ((frt_str_hash(rb_id2name(filt->field))
82
+ ^ (filt->lower_term ? frt_str_hash(filt->lower_term) : 0)
83
+ ^ (filt->upper_term ? frt_str_hash(filt->upper_term) : 0)) << 2);
84
+ }
85
+
86
+ static int range_eq(Range *filt, Range *o)
87
+ {
88
+ if ((filt->lower_term && !o->lower_term) || (!filt->lower_term && o->lower_term)) { return false; }
89
+ if ((filt->upper_term && !o->upper_term) || (!filt->upper_term && o->upper_term)) { return false; }
90
+ return ((filt->field == o->field)
91
+ && ((filt->lower_term && o->lower_term) ? (strcmp(filt->lower_term, o->lower_term) == 0) : 1)
92
+ && ((filt->upper_term && o->upper_term) ? (strcmp(filt->upper_term, o->upper_term) == 0) : 1)
93
+ && (filt->include_lower == o->include_lower)
94
+ && (filt->include_upper == o->include_upper));
95
+ }
96
+
97
+ static Range *range_new(FrtSymbol field, const char *lower_term,
98
+ const char *upper_term, bool include_lower,
99
+ bool include_upper)
100
+ {
101
+ Range *range;
102
+
103
+ if (!lower_term && !upper_term) {
104
+ FRT_RAISE(FRT_ARG_ERROR, "Nil bounds for range. A range must include either "
105
+ "lower bound or an upper bound");
106
+ }
107
+ if (include_lower && !lower_term) {
108
+ FRT_RAISE(FRT_ARG_ERROR, "Lower bound must be non-nil to be inclusive. That "
109
+ "is, if you specify :include_lower => true when you create a "
110
+ "range you must include a :lower_term");
111
+ }
112
+ if (include_upper && !upper_term) {
113
+ FRT_RAISE(FRT_ARG_ERROR, "Upper bound must be non-nil to be inclusive. That "
114
+ "is, if you specify :include_upper => true when you create a "
115
+ "range you must include a :upper_term");
116
+ }
117
+ if (upper_term && lower_term && (strcmp(upper_term, lower_term) < 0)) {
118
+ FRT_RAISE(FRT_ARG_ERROR, "Upper bound must be greater than lower bound. "
119
+ "\"%s\" < \"%s\"", upper_term, lower_term);
120
+ }
121
+
122
+ range = FRT_ALLOC(Range);
123
+
124
+ range->field = field;
125
+ range->lower_term = lower_term ? frt_estrdup(lower_term) : NULL;
126
+ range->upper_term = upper_term ? frt_estrdup(upper_term) : NULL;
127
+ range->include_lower = include_lower;
128
+ range->include_upper = include_upper;
129
+ return range;
130
+ }
131
+
132
+ static Range *trange_new(FrtSymbol field, const char *lower_term,
133
+ const char *upper_term, bool include_lower,
134
+ bool include_upper)
135
+ {
136
+ Range *range;
137
+ int len;
138
+ double upper_num, lower_num;
139
+
140
+ if (!lower_term && !upper_term) {
141
+ FRT_RAISE(FRT_ARG_ERROR, "Nil bounds for range. A range must include either "
142
+ "lower bound or an upper bound");
143
+ }
144
+ if (include_lower && !lower_term) {
145
+ FRT_RAISE(FRT_ARG_ERROR, "Lower bound must be non-nil to be inclusive. That "
146
+ "is, if you specify :include_lower => true when you create a "
147
+ "range you must include a :lower_term");
148
+ }
149
+ if (include_upper && !upper_term) {
150
+ FRT_RAISE(FRT_ARG_ERROR, "Upper bound must be non-nil to be inclusive. That "
151
+ "is, if you specify :include_upper => true when you create a "
152
+ "range you must include a :upper_term");
153
+ }
154
+ if (upper_term && lower_term) {
155
+ if ((!lower_term ||
156
+ (sscanf(lower_term, "%lg%n", &lower_num, &len) &&
157
+ (int)strlen(lower_term) == len)) &&
158
+ (!upper_term ||
159
+ (sscanf(upper_term, "%lg%n", &upper_num, &len) &&
160
+ (int)strlen(upper_term) == len)))
161
+ {
162
+ if (upper_num < lower_num) {
163
+ FRT_RAISE(FRT_ARG_ERROR, "Upper bound must be greater than lower bound."
164
+ " numbers \"%lg\" < \"%lg\"", upper_num, lower_num);
165
+ }
166
+ }
167
+ else {
168
+ if (upper_term && lower_term &&
169
+ (strcmp(upper_term, lower_term) < 0)) {
170
+ FRT_RAISE(FRT_ARG_ERROR, "Upper bound must be greater than lower bound."
171
+ " \"%s\" < \"%s\"", upper_term, lower_term);
172
+ }
173
+ }
174
+ }
175
+
176
+ range = FRT_ALLOC(Range);
177
+
178
+ range->field = field;
179
+ range->lower_term = lower_term ? frt_estrdup(lower_term) : NULL;
180
+ range->upper_term = upper_term ? frt_estrdup(upper_term) : NULL;
181
+ range->include_lower = include_lower;
182
+ range->include_upper = include_upper;
183
+ return range;
184
+ }
185
+
186
+ /***************************************************************************
187
+ *
188
+ * RangeFilter
189
+ *
190
+ ***************************************************************************/
191
+
192
+ typedef struct RangeFilter
193
+ {
194
+ FrtFilter super;
195
+ Range *range;
196
+ } RangeFilter;
197
+
198
+ #define RF(filt) ((RangeFilter *)(filt))
199
+
200
+ static void frt_rfilt_destroy_i(FrtFilter *filt)
201
+ {
202
+ range_destroy(RF(filt)->range);
203
+ frt_filt_destroy_i(filt);
204
+ }
205
+
206
+ static char *frt_rfilt_to_s(FrtFilter *filt)
207
+ {
208
+ char *rstr = range_to_s(RF(filt)->range, (FrtSymbol)NULL, 1.0);
209
+ char *rfstr = frt_strfmt("RangeFilter< %s >", rstr);
210
+ free(rstr);
211
+ return rfstr;
212
+ }
213
+
214
+ static FrtBitVector *frt_rfilt_get_bv_i(FrtFilter *filt, FrtIndexReader *ir)
215
+ {
216
+ FrtBitVector *bv = frt_bv_new_capa(ir->max_doc(ir));
217
+ Range *range = RF(filt)->range;
218
+ FrtFieldInfo *fi = frt_fis_get_field(ir->fis, range->field);
219
+ /* the field info exists we need to add docs to the bit vector, otherwise
220
+ * we just return an empty bit vector */
221
+ if (fi) {
222
+ const char *lower_term =
223
+ range->lower_term ? range->lower_term : FRT_EMPTY_STRING;
224
+ const char *upper_term = range->upper_term;
225
+ const bool include_upper = range->include_upper;
226
+ const int field_num = fi->number;
227
+ char *term;
228
+ FrtTermEnum* te;
229
+ FrtTermDocEnum *tde;
230
+ bool check_lower;
231
+
232
+ te = ir->terms(ir, field_num);
233
+ if (te->skip_to(te, lower_term) == NULL) {
234
+ te->close(te);
235
+ return bv;
236
+ }
237
+
238
+ check_lower = !(range->include_lower || (lower_term == FRT_EMPTY_STRING));
239
+
240
+ tde = ir->term_docs(ir);
241
+ term = te->curr_term;
242
+ do {
243
+ if (!check_lower
244
+ || (strcmp(term, lower_term) > 0)) {
245
+ check_lower = false;
246
+ if (upper_term) {
247
+ int compare = strcmp(upper_term, term);
248
+ /* Break if upper term is greater than or equal to upper
249
+ * term and include_upper is false or ther term is fully
250
+ * greater than upper term. This is optimized so that only
251
+ * one check is done except in last check or two */
252
+ if ((compare <= 0)
253
+ && (!include_upper || (compare < 0))) {
254
+ break;
255
+ }
256
+ }
257
+ /* we have a good term, find the docs */
258
+ /* text is already pointing to term buffer text */
259
+ tde->seek_te(tde, te);
260
+ while (tde->next(tde)) {
261
+ frt_bv_set(bv, tde->doc_num(tde));
262
+ }
263
+ }
264
+ } while (te->next(te));
265
+
266
+ tde->close(tde);
267
+ te->close(te);
268
+ }
269
+
270
+ return bv;
271
+ }
272
+
273
+ static unsigned long long frt_rfilt_hash(FrtFilter *filt) {
274
+ return range_hash(RF(filt)->range);
275
+ }
276
+
277
+ static int frt_rfilt_eq(FrtFilter *filt, FrtFilter *o) {
278
+ return range_eq(RF(filt)->range, RF(o)->range);
279
+ }
280
+
281
+ FrtFilter *frt_rfilt_new(FrtSymbol field,
282
+ const char *lower_term, const char *upper_term,
283
+ bool include_lower, bool include_upper)
284
+ {
285
+ FrtFilter *filt = filt_new(RangeFilter);
286
+ RF(filt)->range = range_new(field, lower_term, upper_term,
287
+ include_lower, include_upper);
288
+ filt->get_bv_i = &frt_rfilt_get_bv_i;
289
+ filt->hash = &frt_rfilt_hash;
290
+ filt->eq = &frt_rfilt_eq;
291
+ filt->to_s = &frt_rfilt_to_s;
292
+ filt->destroy_i = &frt_rfilt_destroy_i;
293
+ return filt;
294
+ }
295
+
296
+ /***************************************************************************
297
+ *
298
+ * RangeFilter
299
+ *
300
+ ***************************************************************************/
301
+
302
+ static char *frt_trfilt_to_s(FrtFilter *filt)
303
+ {
304
+ char *rstr = range_to_s(RF(filt)->range, (FrtSymbol)NULL, 1.0);
305
+ char *rfstr = frt_strfmt("TypedRangeFilter< %s >", rstr);
306
+ free(rstr);
307
+ return rfstr;
308
+ }
309
+
310
+ typedef enum {
311
+ TRC_NONE = 0x00,
312
+ TRC_LE = 0x01,
313
+ TRC_LT = 0x02,
314
+ TRC_GE = 0x04,
315
+ TRC_GE_LE = 0x05,
316
+ TRC_GE_LT = 0x06,
317
+ TRC_GT = 0x08,
318
+ TRC_GT_LE = 0x09,
319
+ TRC_GT_LT = 0x0a
320
+ } TypedRangeCheck;
321
+
322
+ #define SET_DOCS(cond)\
323
+ do {\
324
+ if (term[0] > '9') break; /* done */\
325
+ sscanf(term, "%lg%n", &num, &len);\
326
+ if (len == te->curr_term_len) { /* We have a number */\
327
+ if (cond) {\
328
+ tde->seek_te(tde, te);\
329
+ while (tde->next(tde)) {\
330
+ frt_bv_set(bv, tde->doc_num(tde));\
331
+ }\
332
+ }\
333
+ }\
334
+ } while (te->next(te))
335
+
336
+
337
+ static FrtBitVector *frt_trfilt_get_bv_i(FrtFilter *filt, FrtIndexReader *ir)
338
+ {
339
+ Range *range = RF(filt)->range;
340
+ double lnum = 0.0, unum = 0.0;
341
+ int len = 0;
342
+ const char *lt = range->lower_term;
343
+ const char *ut = range->upper_term;
344
+ if ((!lt || (sscanf(lt, "%lg%n", &lnum, &len) && (int)strlen(lt) == len)) &&
345
+ (!ut || (sscanf(ut, "%lg%n", &unum, &len) && (int)strlen(ut) == len)))
346
+ {
347
+ FrtBitVector *bv = frt_bv_new_capa(ir->max_doc(ir));
348
+ FrtFieldInfo *fi = frt_fis_get_field(ir->fis, range->field);
349
+ /* the field info exists we need to add docs to the bit vector,
350
+ * otherwise we just return an empty bit vector */
351
+ if (fi) {
352
+ const int field_num = fi->number;
353
+ char *term;
354
+ double num;
355
+ FrtTermEnum* te;
356
+ FrtTermDocEnum *tde;
357
+ TypedRangeCheck check = TRC_NONE;
358
+
359
+ te = ir->terms(ir, field_num);
360
+ if (te->skip_to(te, "+.") == NULL) {
361
+ te->close(te);
362
+ return bv;
363
+ }
364
+
365
+ tde = ir->term_docs(ir);
366
+ term = te->curr_term;
367
+
368
+ if (lt) {
369
+ check = range->include_lower ? TRC_GE : TRC_GT;
370
+ }
371
+ if (ut) {
372
+ check = (TypedRangeCheck)(check | (range->include_upper
373
+ ? TRC_LE
374
+ : TRC_LT));
375
+ }
376
+
377
+ switch(check) {
378
+ case TRC_LE:
379
+ SET_DOCS(num <= unum);
380
+ break;
381
+ case TRC_LT:
382
+ SET_DOCS(num < unum);
383
+ break;
384
+ case TRC_GE:
385
+ SET_DOCS(num >= lnum);
386
+ break;
387
+ case TRC_GE_LE:
388
+ SET_DOCS(num >= lnum && num <= unum);
389
+ break;
390
+ case TRC_GE_LT:
391
+ SET_DOCS(num >= lnum && num < unum);
392
+ break;
393
+ case TRC_GT:
394
+ SET_DOCS(num > lnum);
395
+ break;
396
+ case TRC_GT_LE:
397
+ SET_DOCS(num > lnum && num <= unum);
398
+ break;
399
+ case TRC_GT_LT:
400
+ SET_DOCS(num > lnum && num < unum);
401
+ break;
402
+ case TRC_NONE:
403
+ /* should never happen. Error should have been rb_raised */
404
+ assert(false);
405
+ }
406
+ tde->close(tde);
407
+ te->close(te);
408
+ }
409
+
410
+ return bv;
411
+ }
412
+ else {
413
+ return frt_rfilt_get_bv_i(filt, ir);
414
+ }
415
+ }
416
+
417
+ FrtFilter *frt_trfilt_new(FrtSymbol field,
418
+ const char *lower_term, const char *upper_term,
419
+ bool include_lower, bool include_upper)
420
+ {
421
+ FrtFilter *filt = filt_new(RangeFilter);
422
+ RF(filt)->range = trange_new(field, lower_term, upper_term,
423
+ include_lower, include_upper);
424
+
425
+ filt->get_bv_i = &frt_trfilt_get_bv_i;
426
+ filt->hash = &frt_rfilt_hash;
427
+ filt->eq = &frt_rfilt_eq;
428
+ filt->to_s = &frt_trfilt_to_s;
429
+ filt->destroy_i = &frt_rfilt_destroy_i;
430
+ return filt;
431
+ }
432
+
433
+ /*****************************************************************************
434
+ *
435
+ * RangeQuery
436
+ *
437
+ *****************************************************************************/
438
+
439
+ #define RQ(query) ((FrtRangeQuery *)(query))
440
+ typedef struct FrtRangeQuery
441
+ {
442
+ FrtQuery f;
443
+ Range *range;
444
+ } FrtRangeQuery;
445
+
446
+ static char *frt_rq_to_s(FrtQuery *self, FrtSymbol field)
447
+ {
448
+ return range_to_s(RQ(self)->range, field, self->boost);
449
+ }
450
+
451
+ static void frt_rq_destroy(FrtQuery *self)
452
+ {
453
+ range_destroy(RQ(self)->range);
454
+ frt_q_destroy_i(self);
455
+ }
456
+
457
+ static FrtMatchVector *rq_get_matchv_i(FrtQuery *self, FrtMatchVector *mv,
458
+ FrtTermVector *tv)
459
+ {
460
+ Range *range = RQ(((FrtConstantScoreQuery *)self)->original)->range;
461
+ if (tv->field == range->field) {
462
+ const int term_cnt = tv->term_cnt;
463
+ int i, j;
464
+ char *upper_text = range->upper_term;
465
+ char *lower_text = range->lower_term;
466
+ int upper_limit = range->include_upper ? 1 : 0;
467
+
468
+ i = lower_text ? frt_tv_scan_to_term_index(tv, lower_text) : 0;
469
+ if (i < term_cnt && !range->include_lower && lower_text
470
+ && 0 == strcmp(lower_text, tv->terms[i].text)) {
471
+ i++;
472
+ }
473
+
474
+ for (; i < term_cnt; i++) {
475
+ FrtTVTerm *tv_term = &(tv->terms[i]);
476
+ char *text = tv_term->text;
477
+ const int tv_term_freq = tv_term->freq;
478
+ if (upper_text && strcmp(text, upper_text) >= upper_limit) {
479
+ break;
480
+ }
481
+ for (j = 0; j < tv_term_freq; j++) {
482
+ int pos = tv_term->positions[j];
483
+ frt_matchv_add(mv, pos, pos);
484
+ }
485
+ }
486
+ }
487
+ return mv;
488
+ }
489
+
490
+ static FrtQuery *frt_rq_rewrite(FrtQuery *self, FrtIndexReader *ir)
491
+ {
492
+ FrtQuery *csq;
493
+ Range *r = RQ(self)->range;
494
+ FrtFilter *filter = frt_rfilt_new(r->field, r->lower_term, r->upper_term,
495
+ r->include_lower, r->include_upper);
496
+ (void)ir;
497
+ csq = frt_csq_new_nr(filter);
498
+ ((FrtConstantScoreQuery *)csq)->original = self;
499
+ csq->get_matchv_i = &rq_get_matchv_i;
500
+ return (FrtQuery *)csq;
501
+ }
502
+
503
+ static unsigned long long frt_rq_hash(FrtQuery *self)
504
+ {
505
+ return range_hash(RQ(self)->range);
506
+ }
507
+
508
+ static int frt_rq_eq(FrtQuery *self, FrtQuery *o) {
509
+ return range_eq(RQ(self)->range, RQ(o)->range);
510
+ }
511
+
512
+ FrtQuery *frt_rq_new_less(FrtSymbol field, const char *upper_term, bool include_upper) {
513
+ return frt_rq_new(field, NULL, upper_term, false, include_upper);
514
+ }
515
+
516
+ FrtQuery *frt_rq_new_more(FrtSymbol field, const char *lower_term, bool include_lower) {
517
+ return frt_rq_new(field, lower_term, NULL, include_lower, false);
518
+ }
519
+
520
+ FrtQuery *frt_rq_new(FrtSymbol field, const char *lower_term,
521
+ const char *upper_term, bool include_lower, bool include_upper)
522
+ {
523
+ FrtQuery *self;
524
+ Range *range = range_new(field, lower_term, upper_term,
525
+ include_lower, include_upper);
526
+ self = frt_q_new(FrtRangeQuery);
527
+ RQ(self)->range = range;
528
+
529
+ self->type = RANGE_QUERY;
530
+ self->rewrite = &frt_rq_rewrite;
531
+ self->to_s = &frt_rq_to_s;
532
+ self->hash = &frt_rq_hash;
533
+ self->eq = &frt_rq_eq;
534
+ self->destroy_i = &frt_rq_destroy;
535
+ self->create_weight_i = &frt_q_create_weight_unsup;
536
+ return self;
537
+ }
538
+
539
+ /*****************************************************************************
540
+ *
541
+ * TypedRangeQuery
542
+ *
543
+ *****************************************************************************/
544
+
545
+ #define SET_TERMS(cond)\
546
+ for (i = tv->term_cnt - 1; i >= 0; i--) {\
547
+ FrtTVTerm *tv_term = &(tv->terms[i]);\
548
+ char *text = tv_term->text;\
549
+ double num;\
550
+ sscanf(text, "%lg%n", &num, &len);\
551
+ if ((int)strlen(text) == len) { /* We have a number */\
552
+ if (cond) {\
553
+ const int tv_term_freq = tv_term->freq;\
554
+ for (j = 0; j < tv_term_freq; j++) {\
555
+ int pos = tv_term->positions[j];\
556
+ frt_matchv_add(mv, pos, pos);\
557
+ }\
558
+ }\
559
+ }\
560
+ }\
561
+
562
+ static FrtMatchVector *trq_get_matchv_i(FrtQuery *self, FrtMatchVector *mv,
563
+ FrtTermVector *tv)
564
+ {
565
+ Range *range = RQ(((FrtConstantScoreQuery *)self)->original)->range;
566
+ if (tv->field == range->field) {
567
+ double lnum = 0.0, unum = 0.0;
568
+ int len = 0;
569
+ const char *lt = range->lower_term;
570
+ const char *ut = range->upper_term;
571
+ if ((!lt
572
+ || (sscanf(lt,"%lg%n",&lnum,&len) && (int)strlen(lt) == len))
573
+ &&
574
+ (!ut
575
+ || (sscanf(ut,"%lg%n",&unum,&len) && (int)strlen(ut) == len)))
576
+ {
577
+ TypedRangeCheck check = TRC_NONE;
578
+ int i = 0, j = 0;
579
+
580
+ if (lt) {
581
+ check = range->include_lower ? TRC_GE : TRC_GT;
582
+ }
583
+ if (ut) {
584
+ check = (TypedRangeCheck)(check | (range->include_upper
585
+ ? TRC_LE
586
+ : TRC_LT));
587
+ }
588
+
589
+ switch(check) {
590
+ case TRC_LE:
591
+ SET_TERMS(num <= unum);
592
+ break;
593
+ case TRC_LT:
594
+ SET_TERMS(num < unum);
595
+ break;
596
+ case TRC_GE:
597
+ SET_TERMS(num >= lnum);
598
+ break;
599
+ case TRC_GE_LE:
600
+ SET_TERMS(num >= lnum && num <= unum);
601
+ break;
602
+ case TRC_GE_LT:
603
+ SET_TERMS(num >= lnum && num < unum);
604
+ break;
605
+ case TRC_GT:
606
+ SET_TERMS(num > lnum);
607
+ break;
608
+ case TRC_GT_LE:
609
+ SET_TERMS(num > lnum && num <= unum);
610
+ break;
611
+ case TRC_GT_LT:
612
+ SET_TERMS(num > lnum && num < unum);
613
+ break;
614
+ case TRC_NONE:
615
+ /* should never happen. Error should have been rb_raised */
616
+ assert(false);
617
+ }
618
+
619
+ }
620
+ else {
621
+ return rq_get_matchv_i(self, mv, tv);
622
+ }
623
+ }
624
+ return mv;
625
+ }
626
+
627
+ static FrtQuery *frt_trq_rewrite(FrtQuery *self, FrtIndexReader *ir)
628
+ {
629
+ FrtQuery *csq;
630
+ Range *r = RQ(self)->range;
631
+ FrtFilter *filter = frt_trfilt_new(r->field, r->lower_term, r->upper_term,
632
+ r->include_lower, r->include_upper);
633
+ (void)ir;
634
+ csq = frt_csq_new_nr(filter);
635
+ ((FrtConstantScoreQuery *)csq)->original = self;
636
+ csq->get_matchv_i = &trq_get_matchv_i;
637
+ return (FrtQuery *)csq;
638
+ }
639
+
640
+ FrtQuery *frt_trq_new_less(FrtSymbol field, const char *upper_term, bool include_upper) {
641
+ return frt_trq_new(field, NULL, upper_term, false, include_upper);
642
+ }
643
+
644
+ FrtQuery *frt_trq_new_more(FrtSymbol field, const char *lower_term, bool include_lower) {
645
+ return frt_trq_new(field, lower_term, NULL, include_lower, false);
646
+ }
647
+
648
+ FrtQuery *frt_trq_new(FrtSymbol field, const char *lower_term,
649
+ const char *upper_term, bool include_lower, bool include_upper)
650
+ {
651
+ FrtQuery *self;
652
+ Range *range = trange_new(field, lower_term, upper_term,
653
+ include_lower, include_upper);
654
+ self = frt_q_new(FrtRangeQuery);
655
+ RQ(self)->range = range;
656
+
657
+ self->type = TYPED_RANGE_QUERY;
658
+ self->rewrite = &frt_trq_rewrite;
659
+ self->to_s = &frt_rq_to_s;
660
+ self->hash = &frt_rq_hash;
661
+ self->eq = &frt_rq_eq;
662
+ self->destroy_i = &frt_rq_destroy;
663
+ self->create_weight_i = &frt_q_create_weight_unsup;
664
+ return self;
665
+ }