isomorfeus-ferret 0.12.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (222) hide show
  1. checksums.yaml +7 -0
  2. data/LICENSE +612 -0
  3. data/README.md +44 -0
  4. data/ext/isomorfeus_ferret_ext/benchmark.c +223 -0
  5. data/ext/isomorfeus_ferret_ext/benchmark.h +45 -0
  6. data/ext/isomorfeus_ferret_ext/benchmarks_all.h +25 -0
  7. data/ext/isomorfeus_ferret_ext/bm_bitvector.c +123 -0
  8. data/ext/isomorfeus_ferret_ext/bm_hash.c +118 -0
  9. data/ext/isomorfeus_ferret_ext/bm_micro_string.c +40 -0
  10. data/ext/isomorfeus_ferret_ext/bm_store.c +93 -0
  11. data/ext/isomorfeus_ferret_ext/email.rl +21 -0
  12. data/ext/isomorfeus_ferret_ext/extconf.rb +5 -0
  13. data/ext/isomorfeus_ferret_ext/fio_tmpfile.h +53 -0
  14. data/ext/isomorfeus_ferret_ext/frb_analysis.c +2577 -0
  15. data/ext/isomorfeus_ferret_ext/frb_index.c +3457 -0
  16. data/ext/isomorfeus_ferret_ext/frb_lang.c +9 -0
  17. data/ext/isomorfeus_ferret_ext/frb_lang.h +17 -0
  18. data/ext/isomorfeus_ferret_ext/frb_qparser.c +629 -0
  19. data/ext/isomorfeus_ferret_ext/frb_search.c +4460 -0
  20. data/ext/isomorfeus_ferret_ext/frb_store.c +515 -0
  21. data/ext/isomorfeus_ferret_ext/frb_threading.h +30 -0
  22. data/ext/isomorfeus_ferret_ext/frb_utils.c +1127 -0
  23. data/ext/isomorfeus_ferret_ext/frt_analysis.c +1644 -0
  24. data/ext/isomorfeus_ferret_ext/frt_analysis.h +247 -0
  25. data/ext/isomorfeus_ferret_ext/frt_array.c +124 -0
  26. data/ext/isomorfeus_ferret_ext/frt_array.h +54 -0
  27. data/ext/isomorfeus_ferret_ext/frt_bitvector.c +95 -0
  28. data/ext/isomorfeus_ferret_ext/frt_bitvector.h +586 -0
  29. data/ext/isomorfeus_ferret_ext/frt_compound_io.c +374 -0
  30. data/ext/isomorfeus_ferret_ext/frt_config.h +44 -0
  31. data/ext/isomorfeus_ferret_ext/frt_document.c +134 -0
  32. data/ext/isomorfeus_ferret_ext/frt_document.h +52 -0
  33. data/ext/isomorfeus_ferret_ext/frt_except.c +95 -0
  34. data/ext/isomorfeus_ferret_ext/frt_except.h +188 -0
  35. data/ext/isomorfeus_ferret_ext/frt_field_index.c +233 -0
  36. data/ext/isomorfeus_ferret_ext/frt_field_index.h +42 -0
  37. data/ext/isomorfeus_ferret_ext/frt_filter.c +157 -0
  38. data/ext/isomorfeus_ferret_ext/frt_fs_store.c +502 -0
  39. data/ext/isomorfeus_ferret_ext/frt_global.c +427 -0
  40. data/ext/isomorfeus_ferret_ext/frt_global.h +290 -0
  41. data/ext/isomorfeus_ferret_ext/frt_hash.c +518 -0
  42. data/ext/isomorfeus_ferret_ext/frt_hash.h +466 -0
  43. data/ext/isomorfeus_ferret_ext/frt_hashset.c +191 -0
  44. data/ext/isomorfeus_ferret_ext/frt_hashset.h +206 -0
  45. data/ext/isomorfeus_ferret_ext/frt_helper.c +62 -0
  46. data/ext/isomorfeus_ferret_ext/frt_helper.h +13 -0
  47. data/ext/isomorfeus_ferret_ext/frt_ind.c +353 -0
  48. data/ext/isomorfeus_ferret_ext/frt_ind.h +54 -0
  49. data/ext/isomorfeus_ferret_ext/frt_index.c +6377 -0
  50. data/ext/isomorfeus_ferret_ext/frt_index.h +880 -0
  51. data/ext/isomorfeus_ferret_ext/frt_lang.c +104 -0
  52. data/ext/isomorfeus_ferret_ext/frt_lang.h +44 -0
  53. data/ext/isomorfeus_ferret_ext/frt_mempool.c +87 -0
  54. data/ext/isomorfeus_ferret_ext/frt_mempool.h +33 -0
  55. data/ext/isomorfeus_ferret_ext/frt_multimapper.c +349 -0
  56. data/ext/isomorfeus_ferret_ext/frt_multimapper.h +52 -0
  57. data/ext/isomorfeus_ferret_ext/frt_posh.c +1006 -0
  58. data/ext/isomorfeus_ferret_ext/frt_posh.h +973 -0
  59. data/ext/isomorfeus_ferret_ext/frt_priorityqueue.c +147 -0
  60. data/ext/isomorfeus_ferret_ext/frt_priorityqueue.h +147 -0
  61. data/ext/isomorfeus_ferret_ext/frt_q_boolean.c +1612 -0
  62. data/ext/isomorfeus_ferret_ext/frt_q_const_score.c +157 -0
  63. data/ext/isomorfeus_ferret_ext/frt_q_filtered_query.c +209 -0
  64. data/ext/isomorfeus_ferret_ext/frt_q_fuzzy.c +281 -0
  65. data/ext/isomorfeus_ferret_ext/frt_q_match_all.c +147 -0
  66. data/ext/isomorfeus_ferret_ext/frt_q_multi_term.c +672 -0
  67. data/ext/isomorfeus_ferret_ext/frt_q_parser.c +3084 -0
  68. data/ext/isomorfeus_ferret_ext/frt_q_phrase.c +1182 -0
  69. data/ext/isomorfeus_ferret_ext/frt_q_prefix.c +98 -0
  70. data/ext/isomorfeus_ferret_ext/frt_q_range.c +665 -0
  71. data/ext/isomorfeus_ferret_ext/frt_q_span.c +2386 -0
  72. data/ext/isomorfeus_ferret_ext/frt_q_term.c +311 -0
  73. data/ext/isomorfeus_ferret_ext/frt_q_wildcard.c +166 -0
  74. data/ext/isomorfeus_ferret_ext/frt_ram_store.c +460 -0
  75. data/ext/isomorfeus_ferret_ext/frt_scanner.c +899 -0
  76. data/ext/isomorfeus_ferret_ext/frt_scanner.h +28 -0
  77. data/ext/isomorfeus_ferret_ext/frt_scanner_mb.c +6705 -0
  78. data/ext/isomorfeus_ferret_ext/frt_scanner_utf8.c +4419 -0
  79. data/ext/isomorfeus_ferret_ext/frt_search.c +1824 -0
  80. data/ext/isomorfeus_ferret_ext/frt_search.h +924 -0
  81. data/ext/isomorfeus_ferret_ext/frt_similarity.c +150 -0
  82. data/ext/isomorfeus_ferret_ext/frt_similarity.h +79 -0
  83. data/ext/isomorfeus_ferret_ext/frt_sort.c +796 -0
  84. data/ext/isomorfeus_ferret_ext/frt_stopwords.c +395 -0
  85. data/ext/isomorfeus_ferret_ext/frt_store.c +680 -0
  86. data/ext/isomorfeus_ferret_ext/frt_store.h +789 -0
  87. data/ext/isomorfeus_ferret_ext/frt_term_vectors.c +72 -0
  88. data/ext/isomorfeus_ferret_ext/frt_threading.h +23 -0
  89. data/ext/isomorfeus_ferret_ext/frt_win32.h +54 -0
  90. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.c +409 -0
  91. data/ext/isomorfeus_ferret_ext/isomorfeus_ferret.h +95 -0
  92. data/ext/isomorfeus_ferret_ext/libstemmer.c +93 -0
  93. data/ext/isomorfeus_ferret_ext/libstemmer.h +73 -0
  94. data/ext/isomorfeus_ferret_ext/q_parser.y +1366 -0
  95. data/ext/isomorfeus_ferret_ext/scanner.h +28 -0
  96. data/ext/isomorfeus_ferret_ext/scanner.in +43 -0
  97. data/ext/isomorfeus_ferret_ext/scanner.rl +84 -0
  98. data/ext/isomorfeus_ferret_ext/scanner_mb.rl +200 -0
  99. data/ext/isomorfeus_ferret_ext/scanner_utf8.rl +85 -0
  100. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.c +324 -0
  101. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_danish.h +7 -0
  102. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.c +610 -0
  103. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_dutch.h +6 -0
  104. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.c +1104 -0
  105. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_english.h +6 -0
  106. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.c +749 -0
  107. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_finnish.h +7 -0
  108. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.c +1233 -0
  109. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_french.h +6 -0
  110. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.c +490 -0
  111. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_german.h +6 -0
  112. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.c +1217 -0
  113. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_hungarian.h +7 -0
  114. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.c +1052 -0
  115. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_italian.h +6 -0
  116. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.c +283 -0
  117. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_norwegian.h +6 -0
  118. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.c +735 -0
  119. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_porter.h +6 -0
  120. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.c +1003 -0
  121. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_portuguese.h +7 -0
  122. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.c +1079 -0
  123. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_spanish.h +6 -0
  124. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.c +293 -0
  125. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_1_swedish.h +6 -0
  126. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.c +984 -0
  127. data/ext/isomorfeus_ferret_ext/stem_ISO_8859_2_romanian.h +6 -0
  128. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.c +686 -0
  129. data/ext/isomorfeus_ferret_ext/stem_KOI8_R_russian.h +6 -0
  130. data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.c +325 -0
  131. data/ext/isomorfeus_ferret_ext/stem_UTF_8_danish.h +6 -0
  132. data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.c +620 -0
  133. data/ext/isomorfeus_ferret_ext/stem_UTF_8_dutch.h +6 -0
  134. data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.c +1111 -0
  135. data/ext/isomorfeus_ferret_ext/stem_UTF_8_english.h +6 -0
  136. data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.c +754 -0
  137. data/ext/isomorfeus_ferret_ext/stem_UTF_8_finnish.h +6 -0
  138. data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.c +1242 -0
  139. data/ext/isomorfeus_ferret_ext/stem_UTF_8_french.h +6 -0
  140. data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.c +495 -0
  141. data/ext/isomorfeus_ferret_ext/stem_UTF_8_german.h +6 -0
  142. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.c +1220 -0
  143. data/ext/isomorfeus_ferret_ext/stem_UTF_8_hungarian.h +6 -0
  144. data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.c +1059 -0
  145. data/ext/isomorfeus_ferret_ext/stem_UTF_8_italian.h +6 -0
  146. data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.c +285 -0
  147. data/ext/isomorfeus_ferret_ext/stem_UTF_8_norwegian.h +6 -0
  148. data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.c +741 -0
  149. data/ext/isomorfeus_ferret_ext/stem_UTF_8_porter.h +6 -0
  150. data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.c +1009 -0
  151. data/ext/isomorfeus_ferret_ext/stem_UTF_8_portuguese.h +6 -0
  152. data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.c +990 -0
  153. data/ext/isomorfeus_ferret_ext/stem_UTF_8_romanian.h +6 -0
  154. data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.c +680 -0
  155. data/ext/isomorfeus_ferret_ext/stem_UTF_8_russian.h +6 -0
  156. data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.c +1083 -0
  157. data/ext/isomorfeus_ferret_ext/stem_UTF_8_spanish.h +6 -0
  158. data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.c +294 -0
  159. data/ext/isomorfeus_ferret_ext/stem_UTF_8_swedish.h +6 -0
  160. data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.c +2191 -0
  161. data/ext/isomorfeus_ferret_ext/stem_UTF_8_turkish.h +6 -0
  162. data/ext/isomorfeus_ferret_ext/stem_api.c +66 -0
  163. data/ext/isomorfeus_ferret_ext/stem_api.h +26 -0
  164. data/ext/isomorfeus_ferret_ext/stem_header.h +57 -0
  165. data/ext/isomorfeus_ferret_ext/stem_modules.h +190 -0
  166. data/ext/isomorfeus_ferret_ext/stem_modules.txt +50 -0
  167. data/ext/isomorfeus_ferret_ext/stem_utilities.c +478 -0
  168. data/ext/isomorfeus_ferret_ext/test.c +850 -0
  169. data/ext/isomorfeus_ferret_ext/test.h +416 -0
  170. data/ext/isomorfeus_ferret_ext/test_1710.c +63 -0
  171. data/ext/isomorfeus_ferret_ext/test_analysis.c +1221 -0
  172. data/ext/isomorfeus_ferret_ext/test_array.c +272 -0
  173. data/ext/isomorfeus_ferret_ext/test_bitvector.c +600 -0
  174. data/ext/isomorfeus_ferret_ext/test_compound_io.c +170 -0
  175. data/ext/isomorfeus_ferret_ext/test_document.c +156 -0
  176. data/ext/isomorfeus_ferret_ext/test_except.c +244 -0
  177. data/ext/isomorfeus_ferret_ext/test_fields.c +522 -0
  178. data/ext/isomorfeus_ferret_ext/test_file_deleter.c +185 -0
  179. data/ext/isomorfeus_ferret_ext/test_filter.c +331 -0
  180. data/ext/isomorfeus_ferret_ext/test_fs_store.c +25 -0
  181. data/ext/isomorfeus_ferret_ext/test_global.c +299 -0
  182. data/ext/isomorfeus_ferret_ext/test_hash.c +485 -0
  183. data/ext/isomorfeus_ferret_ext/test_hashset.c +288 -0
  184. data/ext/isomorfeus_ferret_ext/test_helper.c +47 -0
  185. data/ext/isomorfeus_ferret_ext/test_highlighter.c +548 -0
  186. data/ext/isomorfeus_ferret_ext/test_index.c +2323 -0
  187. data/ext/isomorfeus_ferret_ext/test_lang.c +74 -0
  188. data/ext/isomorfeus_ferret_ext/test_mempool.c +102 -0
  189. data/ext/isomorfeus_ferret_ext/test_multimapper.c +64 -0
  190. data/ext/isomorfeus_ferret_ext/test_priorityqueue.c +213 -0
  191. data/ext/isomorfeus_ferret_ext/test_q_const_score.c +84 -0
  192. data/ext/isomorfeus_ferret_ext/test_q_filtered.c +61 -0
  193. data/ext/isomorfeus_ferret_ext/test_q_fuzzy.c +241 -0
  194. data/ext/isomorfeus_ferret_ext/test_q_parser.c +464 -0
  195. data/ext/isomorfeus_ferret_ext/test_q_span.c +575 -0
  196. data/ext/isomorfeus_ferret_ext/test_ram_store.c +77 -0
  197. data/ext/isomorfeus_ferret_ext/test_search.c +1874 -0
  198. data/ext/isomorfeus_ferret_ext/test_segments.c +167 -0
  199. data/ext/isomorfeus_ferret_ext/test_similarity.c +25 -0
  200. data/ext/isomorfeus_ferret_ext/test_sort.c +333 -0
  201. data/ext/isomorfeus_ferret_ext/test_store.c +591 -0
  202. data/ext/isomorfeus_ferret_ext/test_store.h +3 -0
  203. data/ext/isomorfeus_ferret_ext/test_term.c +351 -0
  204. data/ext/isomorfeus_ferret_ext/test_term_vectors.c +373 -0
  205. data/ext/isomorfeus_ferret_ext/test_test.c +83 -0
  206. data/ext/isomorfeus_ferret_ext/test_threading.c +188 -0
  207. data/ext/isomorfeus_ferret_ext/testhelper.c +561 -0
  208. data/ext/isomorfeus_ferret_ext/testhelper.h +25 -0
  209. data/ext/isomorfeus_ferret_ext/tests_all.h +87 -0
  210. data/ext/isomorfeus_ferret_ext/uchar-ucs4.rl +1854 -0
  211. data/ext/isomorfeus_ferret_ext/uchar-utf8.rl +1999 -0
  212. data/ext/isomorfeus_ferret_ext/url.rl +27 -0
  213. data/ext/isomorfeus_ferret_ext/word_list.h +15156 -0
  214. data/lib/isomorfeus/ferret/document.rb +132 -0
  215. data/lib/isomorfeus/ferret/field_symbol.rb +85 -0
  216. data/lib/isomorfeus/ferret/index/field_infos.rb +48 -0
  217. data/lib/isomorfeus/ferret/index/index.rb +970 -0
  218. data/lib/isomorfeus/ferret/monitor.rb +323 -0
  219. data/lib/isomorfeus/ferret/stdlib_patches.rb +151 -0
  220. data/lib/isomorfeus/ferret/version.rb +5 -0
  221. data/lib/isomorfeus-ferret.rb +8 -0
  222. metadata +307 -0
@@ -0,0 +1,2386 @@
1
+ #include <string.h>
2
+ #include <limits.h>
3
+ #include "frt_global.h"
4
+ #include "frt_search.h"
5
+ #include "frt_hashset.h"
6
+
7
+ #define CLAUSE_INIT_CAPA 4
8
+
9
+ /*****************************************************************************
10
+ *
11
+ * SpanQuery
12
+ *
13
+ *****************************************************************************/
14
+
15
+ #define SpQ(query) ((FrtSpanQuery *)(query))
16
+
17
+ static unsigned long long spanq_hash(FrtQuery *self)
18
+ {
19
+ return SpQ(self)->field ? frt_str_hash(rb_id2name(SpQ(self)->field)) : 0;
20
+ }
21
+
22
+ static int spanq_eq(FrtQuery *self, FrtQuery *o)
23
+ {
24
+ return SpQ(self)->field == SpQ(o)->field;
25
+ }
26
+
27
+ static void spanq_destroy_i(FrtQuery *self)
28
+ {
29
+ frt_q_destroy_i(self);
30
+ }
31
+
32
+ static FrtMatchVector *mv_to_term_mv(FrtMatchVector *term_mv, FrtMatchVector *full_mv,
33
+ FrtHashSet *terms, FrtTermVector *tv)
34
+ {
35
+ FrtHashSetEntry *hse;
36
+ for (hse = terms->first; hse; hse = hse->next) {
37
+ char *term = (char *)hse->elem;
38
+ FrtTVTerm *tv_term = frt_tv_get_tv_term(tv, term);
39
+ if (tv_term) {
40
+ int i, m_idx = 0;
41
+ for (i = 0; i < tv_term->freq; i++) {
42
+ int pos = tv_term->positions[i];
43
+ for (; m_idx < full_mv->size; m_idx++) {
44
+ if (pos <= full_mv->matches[m_idx].end) {
45
+ if (pos >= full_mv->matches[m_idx].start) {
46
+ frt_matchv_add(term_mv, pos, pos);
47
+ }
48
+ break;
49
+ }
50
+ }
51
+ }
52
+ }
53
+ }
54
+
55
+ return term_mv;
56
+ }
57
+
58
+ /***************************************************************************
59
+ * TVTermDocEnum
60
+ * dummy TermDocEnum used by the highlighter to find matches
61
+ ***************************************************************************/
62
+
63
+ #define TV_TDE(tde) ((TVTermDocEnum *)(tde))
64
+
65
+ typedef struct TVTermDocEnum
66
+ {
67
+ FrtTermDocEnum super;
68
+ int doc;
69
+ int index;
70
+ int freq;
71
+ int *positions;
72
+ FrtTermVector *tv;
73
+ } TVTermDocEnum;
74
+
75
+ static void tv_tde_seek(FrtTermDocEnum *tde, int field_num, const char *term)
76
+ {
77
+ TVTermDocEnum *tv_tde = TV_TDE(tde);
78
+ FrtTVTerm *tv_term = frt_tv_get_tv_term(tv_tde->tv, term);
79
+ (void)field_num;
80
+ if (tv_term) {
81
+ tv_tde->doc = -1;
82
+ tv_tde->index = 0;
83
+ tv_tde->freq = tv_term->freq;
84
+ tv_tde->positions = tv_term->positions;
85
+ }
86
+ else {
87
+ tv_tde->doc = INT_MAX;
88
+ }
89
+ }
90
+
91
+ static bool tv_tde_next(FrtTermDocEnum *tde)
92
+ {
93
+ if (TV_TDE(tde)->doc == -1) {
94
+ TV_TDE(tde)->doc = 0;
95
+ return true;
96
+ }
97
+ else {
98
+ TV_TDE(tde)->doc = INT_MAX;
99
+ return false;
100
+ }
101
+ }
102
+
103
+ static bool tv_tde_skip_to(FrtTermDocEnum *tde, int doc_num)
104
+ {
105
+ if (doc_num == 0) {
106
+ TV_TDE(tde)->doc = 0;
107
+ return true;
108
+ }
109
+ else {
110
+ TV_TDE(tde)->doc = INT_MAX;
111
+ return false;
112
+ }
113
+ }
114
+
115
+ static int tv_tde_next_position(FrtTermDocEnum *tde)
116
+ {
117
+ return TV_TDE(tde)->positions[TV_TDE(tde)->index++];
118
+ }
119
+
120
+ static int tv_tde_freq(FrtTermDocEnum *tde)
121
+ {
122
+ return TV_TDE(tde)->freq;
123
+ }
124
+
125
+ static int tv_tde_doc_num(FrtTermDocEnum *tde)
126
+ {
127
+ return TV_TDE(tde)->doc;
128
+ }
129
+
130
+ static FrtTermDocEnum *spanq_ir_term_positions(FrtIndexReader *ir)
131
+ {
132
+ TVTermDocEnum *tv_tde = FRT_ALLOC(TVTermDocEnum);
133
+ FrtTermDocEnum *tde = (FrtTermDocEnum *)tv_tde;
134
+ tv_tde->tv = (FrtTermVector *)ir->store;
135
+ tde->seek = &tv_tde_seek;
136
+ tde->doc_num = &tv_tde_doc_num;
137
+ tde->freq = &tv_tde_freq;
138
+ tde->next = &tv_tde_next;
139
+ tde->skip_to = &tv_tde_skip_to;
140
+ tde->next_position = &tv_tde_next_position;
141
+ tde->close = (void (*)(FrtTermDocEnum *tde))&free;
142
+
143
+ return tde;
144
+ }
145
+
146
+ static FrtMatchVector *spanq_get_matchv_i(FrtQuery *self, FrtMatchVector *mv,
147
+ FrtTermVector *tv)
148
+ {
149
+ if (SpQ(self)->field == tv->field) {
150
+ FrtSpanEnum *sp_enum;
151
+ FrtIndexReader *ir = FRT_ALLOC(FrtIndexReader);
152
+ FrtMatchVector *full_mv = frt_matchv_new();
153
+ FrtHashSet *terms = SpQ(self)->get_terms(self);
154
+ /* FIXME What is going on here? Need to document this! */
155
+ ir->fis = frt_fis_new(FRT_STORE_NO, FRT_INDEX_NO, FRT_TERM_VECTOR_NO);
156
+ frt_fis_add_field(ir->fis,
157
+ frt_fi_new(tv->field, FRT_STORE_NO, FRT_INDEX_NO, FRT_TERM_VECTOR_NO));
158
+ ir->store = (FrtStore *)tv;
159
+ ir->term_positions = &spanq_ir_term_positions;
160
+ sp_enum = SpQ(self)->get_spans(self, ir);
161
+ while (sp_enum->next(sp_enum)) {
162
+ frt_matchv_add(full_mv,
163
+ sp_enum->start(sp_enum),
164
+ sp_enum->end(sp_enum) - 1);
165
+ }
166
+ sp_enum->destroy(sp_enum);
167
+
168
+ frt_fis_deref(ir->fis);
169
+ free(ir);
170
+
171
+ frt_matchv_compact(full_mv);
172
+ mv_to_term_mv(mv, full_mv, terms, tv);
173
+ frt_matchv_destroy(full_mv);
174
+ frt_hs_destroy(terms);
175
+ }
176
+ return mv;
177
+ }
178
+
179
+ /***************************************************************************
180
+ *
181
+ * SpanScorer
182
+ *
183
+ ***************************************************************************/
184
+
185
+ #define SpSc(scorer) ((SpanScorer *)(scorer))
186
+ typedef struct SpanScorer
187
+ {
188
+ FrtScorer super;
189
+ FrtIndexReader *ir;
190
+ FrtSpanEnum *spans;
191
+ FrtSimilarity *sim;
192
+ frt_uchar *norms;
193
+ FrtWeight *weight;
194
+ float value;
195
+ float freq;
196
+ bool first_time : 1;
197
+ bool more : 1;
198
+ } SpanScorer;
199
+
200
+ static float spansc_score(FrtScorer *self)
201
+ {
202
+ SpanScorer *spansc = SpSc(self);
203
+ float raw = frt_sim_tf(spansc->sim, spansc->freq) * spansc->value;
204
+
205
+ /* normalize */
206
+ return raw * frt_sim_decode_norm(self->similarity, spansc->norms[self->doc]);
207
+ }
208
+
209
+ static bool spansc_next(FrtScorer *self)
210
+ {
211
+ SpanScorer *spansc = SpSc(self);
212
+ FrtSpanEnum *se = spansc->spans;
213
+ int match_length;
214
+
215
+ if (spansc->first_time) {
216
+ spansc->more = se->next(se);
217
+ spansc->first_time = false;
218
+ }
219
+
220
+ if (!spansc->more) {
221
+ return false;
222
+ }
223
+
224
+ spansc->freq = 0.0f;
225
+ self->doc = se->doc(se);
226
+
227
+ do {
228
+ match_length = se->end(se) - se->start(se);
229
+ spansc->freq += frt_sim_sloppy_freq(spansc->sim, match_length);
230
+ spansc->more = se->next(se);
231
+ } while (spansc->more && (self->doc == se->doc(se)));
232
+
233
+ return (spansc->more || (spansc->freq != 0.0));
234
+ }
235
+
236
+ static bool spansc_skip_to(FrtScorer *self, int target)
237
+ {
238
+ SpanScorer *spansc = SpSc(self);
239
+ FrtSpanEnum *se = spansc->spans;
240
+
241
+ spansc->more = se->skip_to(se, target);
242
+ if (!spansc->more) {
243
+ return false;
244
+ }
245
+
246
+ spansc->freq = 0.0f;
247
+ self->doc = se->doc(se);
248
+
249
+ while (spansc->more && (se->doc(se) == target)) {
250
+ spansc->freq += frt_sim_sloppy_freq(spansc->sim, se->end(se) - se->start(se));
251
+ spansc->more = se->next(se);
252
+ if (spansc->first_time) {
253
+ spansc->first_time = false;
254
+ }
255
+ }
256
+
257
+ return (spansc->more || (spansc->freq != 0.0));
258
+ }
259
+
260
+ static FrtExplanation *spansc_explain(FrtScorer *self, int target)
261
+ {
262
+ FrtExplanation *tf_explanation;
263
+ SpanScorer *spansc = SpSc(self);
264
+ float phrase_freq;
265
+ self->skip_to(self, target);
266
+ phrase_freq = (self->doc == target) ? spansc->freq : (float)0.0;
267
+
268
+ tf_explanation = frt_expl_new(frt_sim_tf(self->similarity, phrase_freq),
269
+ "tf(phrase_freq(%f)", phrase_freq);
270
+
271
+ return tf_explanation;
272
+ }
273
+
274
+ static void spansc_destroy(FrtScorer *self)
275
+ {
276
+ SpanScorer *spansc = SpSc(self);
277
+ if (spansc->spans) {
278
+ spansc->spans->destroy(spansc->spans);
279
+ }
280
+ frt_scorer_destroy_i(self);
281
+ }
282
+
283
+ static FrtScorer *spansc_new(FrtWeight *weight, FrtIndexReader *ir)
284
+ {
285
+ FrtScorer *self = NULL;
286
+ const int field_num = frt_fis_get_field_num(ir->fis, SpQ(weight->query)->field);
287
+ if (field_num >= 0) {
288
+ FrtQuery *spanq = weight->query;
289
+ self = frt_scorer_new(SpanScorer, weight->similarity);
290
+
291
+ SpSc(self)->first_time = true;
292
+ SpSc(self)->more = true;
293
+ SpSc(self)->spans = SpQ(spanq)->get_spans(spanq, ir);
294
+ SpSc(self)->sim = weight->similarity;
295
+ SpSc(self)->norms = ir->get_norms(ir, field_num);
296
+ SpSc(self)->weight = weight;
297
+ SpSc(self)->value = weight->value;
298
+ SpSc(self)->freq = 0.0f;
299
+
300
+ self->score = &spansc_score;
301
+ self->next = &spansc_next;
302
+ self->skip_to = &spansc_skip_to;
303
+ self->explain = &spansc_explain;
304
+ self->destroy = &spansc_destroy;
305
+ }
306
+ return self;
307
+ }
308
+
309
+ /*****************************************************************************
310
+ * SpanTermEnum
311
+ *****************************************************************************/
312
+
313
+ #define SpTEn(span_enum) ((SpanTermEnum *)(span_enum))
314
+ #define SpTQ(query) ((FrtSpanTermQuery *)(query))
315
+
316
+ typedef struct SpanTermEnum
317
+ {
318
+ FrtSpanEnum super;
319
+ FrtTermDocEnum *positions;
320
+ int position;
321
+ int doc;
322
+ int count;
323
+ int freq;
324
+ } SpanTermEnum;
325
+
326
+
327
+ static bool spante_next(FrtSpanEnum *self)
328
+ {
329
+ SpanTermEnum *ste = SpTEn(self);
330
+ FrtTermDocEnum *tde = ste->positions;
331
+
332
+ if (ste->count == ste->freq) {
333
+ if (! tde->next(tde)) {
334
+ ste->doc = INT_MAX;
335
+ return false;
336
+ }
337
+ ste->doc = tde->doc_num(tde);
338
+ ste->freq = tde->freq(tde);
339
+ ste->count = 0;
340
+ }
341
+ ste->position = tde->next_position(tde);
342
+ ste->count++;
343
+ return true;
344
+ }
345
+
346
+ static bool spante_skip_to(FrtSpanEnum *self, int target)
347
+ {
348
+ SpanTermEnum *ste = SpTEn(self);
349
+ FrtTermDocEnum *tde = ste->positions;
350
+
351
+ /* are we already at the correct position? */
352
+ /* FIXME: perhaps this the the better solution but currently it ->skip_to
353
+ * does a ->next not matter what
354
+ if (ste->doc >= target) {
355
+ return true;
356
+ }
357
+ */
358
+
359
+ if (! tde->skip_to(tde, target)) {
360
+ ste->doc = INT_MAX;
361
+ return false;
362
+ }
363
+
364
+ ste->doc = tde->doc_num(tde);
365
+ ste->freq = tde->freq(tde);
366
+ ste->count = 0;
367
+
368
+ ste->position = tde->next_position(tde);
369
+ ste->count++;
370
+ return true;
371
+ }
372
+
373
+ static int spante_doc(FrtSpanEnum *self)
374
+ {
375
+ return SpTEn(self)->doc;
376
+ }
377
+
378
+ static int spante_start(FrtSpanEnum *self)
379
+ {
380
+ return SpTEn(self)->position;
381
+ }
382
+
383
+ static int spante_end(FrtSpanEnum *self)
384
+ {
385
+ return SpTEn(self)->position + 1;
386
+ }
387
+
388
+ static char *spante_to_s(FrtSpanEnum *self)
389
+ {
390
+ char *query_str = self->query->to_s(self->query, (FrtSymbol)NULL);
391
+ char pos_str[20];
392
+ size_t len = strlen(query_str);
393
+ int pos;
394
+ char *str = FRT_ALLOC_N(char, len + 40);
395
+
396
+ if (self->doc(self) < 0) {
397
+ sprintf(pos_str, "START");
398
+ }
399
+ else {
400
+ if (self->doc(self) == INT_MAX) {
401
+ sprintf(pos_str, "END");
402
+ }
403
+ else {
404
+ pos = SpTEn(self)->position;
405
+ sprintf(pos_str, "%d", self->doc(self) - pos);
406
+ }
407
+ }
408
+ sprintf(str, "SpanTermEnum(%s)@%s", query_str, pos_str);
409
+ free(query_str);
410
+ return str;
411
+ }
412
+
413
+ static void spante_destroy(FrtSpanEnum *self)
414
+ {
415
+ FrtTermDocEnum *tde = SpTEn(self)->positions;
416
+ tde->close(tde);
417
+ free(self);
418
+ }
419
+
420
+ static FrtSpanEnum *spante_new(FrtQuery *query, FrtIndexReader *ir)
421
+ {
422
+ char *term = SpTQ(query)->term;
423
+ FrtSpanEnum *self = (FrtSpanEnum *)FRT_ALLOC(SpanTermEnum);
424
+
425
+ SpTEn(self)->positions = frt_ir_term_positions_for(ir, SpQ(query)->field,
426
+ term);
427
+ SpTEn(self)->position = -1;
428
+ SpTEn(self)->doc = -1;
429
+ SpTEn(self)->count = 0;
430
+ SpTEn(self)->freq = 0;
431
+
432
+ self->query = query;
433
+ self->next = &spante_next;
434
+ self->skip_to = &spante_skip_to;
435
+ self->doc = &spante_doc;
436
+ self->start = &spante_start;
437
+ self->end = &spante_end;
438
+ self->destroy = &spante_destroy;
439
+ self->to_s = &spante_to_s;
440
+
441
+ return self;
442
+ }
443
+
444
+ /*****************************************************************************
445
+ * SpanMultiTermEnum
446
+ *****************************************************************************/
447
+
448
+ /* * TermPosEnumWrapper * */
449
+ #define TPE_READ_SIZE 16
450
+
451
+ typedef struct TermPosEnumWrapper
452
+ {
453
+ const char *term;
454
+ FrtTermDocEnum *tpe;
455
+ int doc;
456
+ int pos;
457
+ } TermPosEnumWrapper;
458
+
459
+ static bool tpew_less_than(const TermPosEnumWrapper *tpew1,
460
+ const TermPosEnumWrapper *tpew2)
461
+ {
462
+ return (tpew1->doc < tpew2->doc)
463
+ || (tpew1->doc == tpew2->doc && tpew1->pos < tpew2->pos);
464
+ }
465
+
466
+ static bool tpew_next(TermPosEnumWrapper *self)
467
+ {
468
+ FrtTermDocEnum *tpe = self->tpe;
469
+ if (0 > (self->pos = tpe->next_position(tpe))) {
470
+ if (!tpe->next(tpe)) return false;
471
+ self->doc = tpe->doc_num(tpe);
472
+ self->pos = tpe->next_position(tpe);
473
+ }
474
+ return true;
475
+ }
476
+
477
+ static bool tpew_skip_to(TermPosEnumWrapper *self, int doc_num)
478
+ {
479
+ FrtTermDocEnum *tpe = self->tpe;
480
+
481
+ if (tpe->skip_to(tpe, doc_num)) {
482
+ self->doc = tpe->doc_num(tpe);
483
+ self->pos = tpe->next_position(tpe);
484
+ return true;
485
+ }
486
+ else {
487
+ return false;
488
+ }
489
+ }
490
+
491
+ static void tpew_destroy(TermPosEnumWrapper *self)
492
+ {
493
+ self->tpe->close(self->tpe);
494
+ free(self);
495
+ }
496
+
497
+ static TermPosEnumWrapper *tpew_new(const char *term, FrtTermDocEnum *tpe)
498
+ {
499
+ TermPosEnumWrapper *self = FRT_ALLOC_AND_ZERO(TermPosEnumWrapper);
500
+ self->term = term;
501
+ self->tpe = tpe;
502
+ self->doc = -1;
503
+ self->pos = -1;
504
+ return self;
505
+ }
506
+ #define SpMTEn(span_enum) ((SpanMultiTermEnum *)(span_enum))
507
+ #define SpMTQ(query) ((FrtSpanMultiTermQuery *)(query))
508
+
509
+ typedef struct SpanMultiTermEnum
510
+ {
511
+ FrtSpanEnum super;
512
+ FrtPriorityQueue *tpew_pq;
513
+ TermPosEnumWrapper **tpews;
514
+ int tpew_cnt;
515
+ int pos;
516
+ int doc;
517
+ } SpanMultiTermEnum;
518
+
519
+ static bool spanmte_next(FrtSpanEnum *self)
520
+ {
521
+ int curr_doc, curr_pos;
522
+ TermPosEnumWrapper *tpew;
523
+ SpanMultiTermEnum *mte = SpMTEn(self);
524
+ FrtPriorityQueue *tpew_pq = mte->tpew_pq;
525
+ if (tpew_pq == NULL) {
526
+ TermPosEnumWrapper **tpews = mte->tpews;
527
+ int i;
528
+ tpew_pq = frt_pq_new(mte->tpew_cnt, (frt_lt_ft)tpew_less_than, (frt_free_ft)NULL);
529
+ for (i = mte->tpew_cnt - 1; i >= 0; i--) {
530
+ if (tpew_next(tpews[i])) {
531
+ frt_pq_push(tpew_pq, tpews[i]);
532
+ }
533
+ }
534
+ mte->tpew_pq = tpew_pq;
535
+ }
536
+
537
+ tpew = (TermPosEnumWrapper *)frt_pq_top(tpew_pq);
538
+ if (tpew == NULL) {
539
+ return false;
540
+ }
541
+
542
+ mte->doc = curr_doc = tpew->doc;
543
+ mte->pos = curr_pos = tpew->pos;
544
+
545
+ do {
546
+ if (tpew_next(tpew)) {
547
+ frt_pq_down(tpew_pq);
548
+ }
549
+ else {
550
+ frt_pq_pop(tpew_pq);
551
+ }
552
+ } while (((tpew = (TermPosEnumWrapper *)frt_pq_top(tpew_pq)) != NULL)
553
+ && tpew->doc == curr_doc && tpew->pos == curr_pos);
554
+ return true;
555
+ }
556
+
557
+ static bool spanmte_skip_to(FrtSpanEnum *self, int target)
558
+ {
559
+ SpanMultiTermEnum *mte = SpMTEn(self);
560
+ FrtPriorityQueue *tpew_pq = mte->tpew_pq;
561
+ TermPosEnumWrapper *tpew;
562
+ if (tpew_pq == NULL) {
563
+ TermPosEnumWrapper **tpews = mte->tpews;
564
+ int i;
565
+ tpew_pq = frt_pq_new(mte->tpew_cnt, (frt_lt_ft)tpew_less_than, (frt_free_ft)NULL);
566
+ for (i = mte->tpew_cnt - 1; i >= 0; i--) {
567
+ tpew_skip_to(tpews[i], target);
568
+ frt_pq_push(tpew_pq, tpews[i]);
569
+ }
570
+ mte->tpew_pq = tpew_pq;
571
+ }
572
+ if (tpew_pq->size == 0) {
573
+ mte->doc = -1;
574
+ return false;
575
+ }
576
+ while ((tpew = (TermPosEnumWrapper *)frt_pq_top(tpew_pq)) != NULL
577
+ && (target > tpew->doc)) {
578
+ if (tpew_skip_to(tpew, target)) {
579
+ frt_pq_down(tpew_pq);
580
+ }
581
+ else {
582
+ frt_pq_pop(tpew_pq);
583
+ }
584
+ }
585
+ return spanmte_next(self);
586
+ }
587
+
588
+ static int spanmte_doc(FrtSpanEnum *self)
589
+ {
590
+ return SpMTEn(self)->doc;
591
+ }
592
+
593
+ static int spanmte_start(FrtSpanEnum *self)
594
+ {
595
+ return SpMTEn(self)->pos;
596
+ }
597
+
598
+ static int spanmte_end(FrtSpanEnum *self)
599
+ {
600
+ return SpMTEn(self)->pos + 1;
601
+ }
602
+
603
+ static void spanmte_destroy(FrtSpanEnum *self)
604
+ {
605
+ SpanMultiTermEnum *mte = SpMTEn(self);
606
+ int i;
607
+ if (mte->tpew_pq) frt_pq_destroy(mte->tpew_pq);
608
+ for (i = 0; i < mte->tpew_cnt; i++) {
609
+ tpew_destroy(mte->tpews[i]);
610
+ }
611
+ free(mte->tpews);
612
+ free(self);
613
+ }
614
+
615
+ static FrtSpanEnum *spanmte_new(FrtQuery *query, FrtIndexReader *ir)
616
+ {
617
+ FrtSpanEnum *self = (FrtSpanEnum *)FRT_ALLOC(SpanMultiTermEnum);
618
+ SpanMultiTermEnum *smte = SpMTEn(self);
619
+ FrtSpanMultiTermQuery *smtq = SpMTQ(query);
620
+ int i;
621
+
622
+
623
+ smte->tpews = FRT_ALLOC_N(TermPosEnumWrapper *, smtq->term_cnt);
624
+ for (i = 0; i < smtq->term_cnt; i++) {
625
+ char *term = smtq->terms[i];
626
+ smte->tpews[i] = tpew_new(term,
627
+ frt_ir_term_positions_for(ir, SpQ(query)->field, term));
628
+ }
629
+ smte->tpew_cnt = smtq->term_cnt;
630
+ smte->tpew_pq = NULL;
631
+ smte->pos = -1;
632
+ smte->doc = -1;
633
+
634
+ self->query = query;
635
+ self->next = &spanmte_next;
636
+ self->skip_to = &spanmte_skip_to;
637
+ self->doc = &spanmte_doc;
638
+ self->start = &spanmte_start;
639
+ self->end = &spanmte_end;
640
+ self->destroy = &spanmte_destroy;
641
+ self->to_s = &spante_to_s;
642
+
643
+ return self;
644
+ }
645
+
646
+
647
+ /*****************************************************************************
648
+ * SpanFirstEnum
649
+ *****************************************************************************/
650
+
651
+ #define SpFEn(span_enum) ((SpanFirstEnum *)(span_enum))
652
+ #define SpFQ(query) ((FrtSpanFirstQuery *)(query))
653
+
654
+ typedef struct SpanFirstEnum
655
+ {
656
+ FrtSpanEnum super;
657
+ FrtSpanEnum *sub_enum;
658
+ } SpanFirstEnum;
659
+
660
+
661
+ static bool spanfe_next(FrtSpanEnum *self)
662
+ {
663
+ FrtSpanEnum *sub_enum = SpFEn(self)->sub_enum;
664
+ int end = SpFQ(self->query)->end;
665
+ while (sub_enum->next(sub_enum)) { /* scan to next match */
666
+ if (sub_enum->end(sub_enum) <= end) {
667
+ return true;
668
+ }
669
+ }
670
+ return false;
671
+ }
672
+
673
+ static bool spanfe_skip_to(FrtSpanEnum *self, int target)
674
+ {
675
+ FrtSpanEnum *sub_enum = SpFEn(self)->sub_enum;
676
+ int end = SpFQ(self->query)->end;
677
+
678
+ if (! sub_enum->skip_to(sub_enum, target)) {
679
+ return false;
680
+ }
681
+
682
+ if (sub_enum->end(sub_enum) <= end) { /* there is a match */
683
+ return true;
684
+ }
685
+
686
+ return spanfe_next(self); /* scan to next match */
687
+ }
688
+
689
+ static int spanfe_doc(FrtSpanEnum *self)
690
+ {
691
+ FrtSpanEnum *sub_enum = SpFEn(self)->sub_enum;
692
+ return sub_enum->doc(sub_enum);
693
+ }
694
+
695
+ static int spanfe_start(FrtSpanEnum *self)
696
+ {
697
+ FrtSpanEnum *sub_enum = SpFEn(self)->sub_enum;
698
+ return sub_enum->start(sub_enum);
699
+ }
700
+
701
+ static int spanfe_end(FrtSpanEnum *self)
702
+ {
703
+ FrtSpanEnum *sub_enum = SpFEn(self)->sub_enum;
704
+ return sub_enum->end(sub_enum);
705
+ }
706
+
707
+ static char *spanfe_to_s(FrtSpanEnum *self)
708
+ {
709
+ char *query_str = self->query->to_s(self->query, (FrtSymbol)NULL);
710
+ char *res = frt_strfmt("SpanFirstEnum(%s)", query_str);
711
+ free(query_str);
712
+ return res;
713
+ }
714
+
715
+ static void spanfe_destroy(FrtSpanEnum *self)
716
+ {
717
+ FrtSpanEnum *sub_enum = SpFEn(self)->sub_enum;
718
+ sub_enum->destroy(sub_enum);
719
+ free(self);
720
+ }
721
+
722
+ static FrtSpanEnum *spanfe_new(FrtQuery *query, FrtIndexReader *ir)
723
+ {
724
+ FrtSpanEnum *self = (FrtSpanEnum *)FRT_ALLOC(SpanFirstEnum);
725
+ FrtSpanFirstQuery *sfq = SpFQ(query);
726
+
727
+ SpFEn(self)->sub_enum = SpQ(sfq->match)->get_spans(sfq->match, ir);
728
+
729
+ self->query = query;
730
+ self->next = &spanfe_next;
731
+ self->skip_to = &spanfe_skip_to;
732
+ self->doc = &spanfe_doc;
733
+ self->start = &spanfe_start;
734
+ self->end = &spanfe_end;
735
+ self->destroy = &spanfe_destroy;
736
+ self->to_s = &spanfe_to_s;
737
+
738
+ return self;
739
+ }
740
+
741
+
742
+ /*****************************************************************************
743
+ * SpanOrEnum
744
+ *****************************************************************************/
745
+
746
+ #define SpOEn(span_enum) ((SpanOrEnum *)(span_enum))
747
+ #define SpOQ(query) ((FrtSpanOrQuery *)(query))
748
+
749
+ typedef struct SpanOrEnum
750
+ {
751
+ FrtSpanEnum super;
752
+ FrtPriorityQueue *queue;
753
+ FrtSpanEnum **span_enums;
754
+ int s_cnt;
755
+ bool first_time : 1;
756
+ } SpanOrEnum;
757
+
758
+
759
+ static bool span_less_than(FrtSpanEnum *s1, FrtSpanEnum *s2)
760
+ {
761
+ int doc_diff, start_diff;
762
+ doc_diff = s1->doc(s1) - s2->doc(s2);
763
+ if (doc_diff == 0) {
764
+ start_diff = s1->start(s1) - s2->start(s2);
765
+ if (start_diff == 0) {
766
+ return s1->end(s1) < s2->end(s2);
767
+ }
768
+ else {
769
+ return start_diff < 0;
770
+ }
771
+ }
772
+ else {
773
+ return doc_diff < 0;
774
+ }
775
+ }
776
+
777
+ static bool spanoe_next(FrtSpanEnum *self)
778
+ {
779
+ SpanOrEnum *soe = SpOEn(self);
780
+ FrtSpanEnum *se;
781
+ int i;
782
+
783
+ if (soe->first_time) { /* first time -- initialize */
784
+ for (i = 0; i < soe->s_cnt; i++) {
785
+ se = soe->span_enums[i];
786
+ if (se->next(se)) { /* move to first entry */
787
+ frt_pq_push(soe->queue, se);
788
+ }
789
+ }
790
+ soe->first_time = false;
791
+ return soe->queue->size != 0;
792
+ }
793
+
794
+ if (soe->queue->size == 0) {
795
+ return false; /* all done */
796
+ }
797
+
798
+ se = (FrtSpanEnum *)frt_pq_top(soe->queue);
799
+ if (se->next(se)) { /* move to next */
800
+ frt_pq_down(soe->queue);
801
+ return true;
802
+ }
803
+
804
+ frt_pq_pop(soe->queue); /* exhausted a clause */
805
+
806
+ return soe->queue->size != 0;
807
+ }
808
+
809
+ static bool spanoe_skip_to(FrtSpanEnum *self, int target)
810
+ {
811
+ SpanOrEnum *soe = SpOEn(self);
812
+ FrtSpanEnum *se;
813
+ int i;
814
+
815
+ if (soe->first_time) { /* first time -- initialize */
816
+ for (i = 0; i < soe->s_cnt; i++) {
817
+ se = soe->span_enums[i];
818
+ if (se->skip_to(se, target)) {/* move to target */
819
+ frt_pq_push(soe->queue, se);
820
+ }
821
+ }
822
+ soe->first_time = false;
823
+ }
824
+ else {
825
+ while ((soe->queue->size != 0) &&
826
+ ((se = (FrtSpanEnum *)frt_pq_top(soe->queue)) != NULL) &&
827
+ (se->doc(se) < target)) {
828
+ if (se->skip_to(se, target)) {
829
+ frt_pq_down(soe->queue);
830
+ }
831
+ else {
832
+ frt_pq_pop(soe->queue);
833
+ }
834
+ }
835
+ }
836
+
837
+ return soe->queue->size != 0;
838
+ }
839
+
840
+ #define SpOEn_Top_SE(self) (FrtSpanEnum *)frt_pq_top(SpOEn(self)->queue)
841
+
842
+ static int spanoe_doc(FrtSpanEnum *self)
843
+ {
844
+ FrtSpanEnum *se = SpOEn_Top_SE(self);
845
+ return se->doc(se);
846
+ }
847
+
848
+ static int spanoe_start(FrtSpanEnum *self)
849
+ {
850
+ FrtSpanEnum *se = SpOEn_Top_SE(self);
851
+ return se->start(se);
852
+ }
853
+
854
+ static int spanoe_end(FrtSpanEnum *self)
855
+ {
856
+ FrtSpanEnum *se = SpOEn_Top_SE(self);
857
+ return se->end(se);
858
+ }
859
+
860
+ static char *spanoe_to_s(FrtSpanEnum *self)
861
+ {
862
+ SpanOrEnum *soe = SpOEn(self);
863
+ char *query_str = self->query->to_s(self->query, (FrtSymbol)NULL);
864
+ char doc_str[62];
865
+ size_t len = strlen(query_str);
866
+ char *str = FRT_ALLOC_N(char, len + 80);
867
+
868
+ if (soe->first_time) {
869
+ sprintf(doc_str, "START");
870
+ }
871
+ else {
872
+ if (soe->queue->size == 0) {
873
+ sprintf(doc_str, "END");
874
+ }
875
+ else {
876
+ sprintf(doc_str, "%d:%d-%d", self->doc(self),
877
+ self->start(self), self->end(self));
878
+ }
879
+ }
880
+ sprintf(str, "SpanOrEnum(%s)@%s", query_str, doc_str);
881
+ free(query_str);
882
+ return str;
883
+ }
884
+
885
+ static void spanoe_destroy(FrtSpanEnum *self)
886
+ {
887
+ FrtSpanEnum *se;
888
+ SpanOrEnum *soe = SpOEn(self);
889
+ int i;
890
+ frt_pq_destroy(soe->queue);
891
+ for (i = 0; i < soe->s_cnt; i++) {
892
+ se = soe->span_enums[i];
893
+ se->destroy(se);
894
+ }
895
+ free(soe->span_enums);
896
+ free(self);
897
+ }
898
+
899
+ static FrtSpanEnum *spanoe_new(FrtQuery *query, FrtIndexReader *ir)
900
+ {
901
+ FrtQuery *clause;
902
+ FrtSpanEnum *self = (FrtSpanEnum *)FRT_ALLOC(SpanOrEnum);
903
+ FrtSpanOrQuery *soq = SpOQ(query);
904
+ int i;
905
+
906
+ SpOEn(self)->first_time = true;
907
+ SpOEn(self)->s_cnt = soq->c_cnt;
908
+ SpOEn(self)->span_enums = FRT_ALLOC_N(FrtSpanEnum *, SpOEn(self)->s_cnt);
909
+
910
+ for (i = 0; i < SpOEn(self)->s_cnt; i++) {
911
+ clause = soq->clauses[i];
912
+ SpOEn(self)->span_enums[i] = SpQ(clause)->get_spans(clause, ir);
913
+ }
914
+
915
+ SpOEn(self)->queue = frt_pq_new(SpOEn(self)->s_cnt, (frt_lt_ft)&span_less_than,
916
+ (frt_free_ft)NULL);
917
+
918
+ self->query = query;
919
+ self->next = &spanoe_next;
920
+ self->skip_to = &spanoe_skip_to;
921
+ self->doc = &spanoe_doc;
922
+ self->start = &spanoe_start;
923
+ self->end = &spanoe_end;
924
+ self->destroy = &spanoe_destroy;
925
+ self->to_s = &spanoe_to_s;
926
+
927
+ return self;
928
+ }
929
+
930
+ /*****************************************************************************
931
+ * SpanNearEnum
932
+ *****************************************************************************/
933
+
934
+ #define SpNEn(span_enum) ((SpanNearEnum *)(span_enum))
935
+ #define SpNQ(query) ((FrtSpanNearQuery *)(query))
936
+
937
+ typedef struct SpanNearEnum
938
+ {
939
+ FrtSpanEnum super;
940
+ FrtSpanEnum **span_enums;
941
+ int s_cnt;
942
+ int slop;
943
+ int current;
944
+ int doc;
945
+ int start;
946
+ int end;
947
+ bool first_time : 1;
948
+ bool in_order : 1;
949
+ } SpanNearEnum;
950
+
951
+
952
+ #define SpNEn_NEXT() do {\
953
+ sne->current = (sne->current+1) % sne->s_cnt;\
954
+ se = sne->span_enums[sne->current];\
955
+ } while (0);
956
+
957
+ static bool sne_init(SpanNearEnum *sne)
958
+ {
959
+ FrtSpanEnum *se = sne->span_enums[sne->current];
960
+ int prev_doc = se->doc(se);
961
+ int i;
962
+
963
+ for (i = 1; i < sne->s_cnt; i++) {
964
+ SpNEn_NEXT();
965
+ if (!se->skip_to(se, prev_doc)) {
966
+ return false;
967
+ }
968
+ prev_doc = se->doc(se);
969
+ }
970
+ return true;
971
+ }
972
+
973
+ static bool sne_goto_next_doc(SpanNearEnum *sne)
974
+ {
975
+ FrtSpanEnum *se = sne->span_enums[sne->current];
976
+ int prev_doc = se->doc(se);
977
+
978
+ SpNEn_NEXT();
979
+
980
+ while (se->doc(se) < prev_doc) {
981
+ if (! se->skip_to(se, prev_doc)) {
982
+ return false;
983
+ }
984
+ prev_doc = se->doc(se);
985
+ SpNEn_NEXT();
986
+ }
987
+ return true;
988
+ }
989
+
990
+ static bool sne_next_unordered_match(FrtSpanEnum *self)
991
+ {
992
+ SpanNearEnum *sne = SpNEn(self);
993
+ FrtSpanEnum *se, *min_se = NULL;
994
+ int i;
995
+ int max_end, end, min_start, start, doc;
996
+ int lengths_sum;
997
+
998
+ while (true) {
999
+ max_end = 0;
1000
+ min_start = INT_MAX;
1001
+ lengths_sum = 0;
1002
+
1003
+ for (i = 0; i < sne->s_cnt; i++) {
1004
+ se = sne->span_enums[i];
1005
+ if ((end=se->end(se)) > max_end) {
1006
+ max_end = end;
1007
+ }
1008
+ if ((start=se->start(se)) < min_start) {
1009
+ min_start = start;
1010
+ min_se = se;
1011
+ sne->current = i; /* current should point to the minimum span */
1012
+ }
1013
+ lengths_sum += end - start;
1014
+ }
1015
+
1016
+ if ((max_end - min_start - lengths_sum) <= sne->slop) {
1017
+ /* we have a match */
1018
+ sne->start = min_start;
1019
+ sne->end = max_end;
1020
+ sne->doc = min_se->doc(min_se);
1021
+ return true;
1022
+ }
1023
+
1024
+ /* increment the minimum span_enum and try again */
1025
+ doc = min_se->doc(min_se);
1026
+ if (!min_se->next(min_se)) {
1027
+ return false;
1028
+ }
1029
+ if (doc < min_se->doc(min_se)) {
1030
+ if (!sne_goto_next_doc(sne)) return false;
1031
+ }
1032
+ }
1033
+ }
1034
+
1035
+ static bool sne_next_ordered_match(FrtSpanEnum *self)
1036
+ {
1037
+ SpanNearEnum *sne = SpNEn(self);
1038
+ FrtSpanEnum *se;
1039
+ int i;
1040
+ int prev_doc, prev_start, prev_end;
1041
+ int doc=0, start=0, end=0;
1042
+ int lengths_sum;
1043
+
1044
+ while (true) {
1045
+ se = sne->span_enums[0];
1046
+
1047
+ prev_doc = se->doc(se);
1048
+ sne->start = prev_start = se->start(se);
1049
+ prev_end = se->end(se);
1050
+
1051
+ i = 1;
1052
+ lengths_sum = prev_end - prev_start;
1053
+
1054
+ while (i < sne->s_cnt) {
1055
+ se = sne->span_enums[i];
1056
+ doc = se->doc(se);
1057
+ start = se->start(se);
1058
+ end = se->end(se);
1059
+ while ((doc == prev_doc) && ((start < prev_start) ||
1060
+ ((start == prev_start) && (end < prev_end)))) {
1061
+ if (!se->next(se)) {
1062
+ return false;
1063
+ }
1064
+ doc = se->doc(se);
1065
+ start = se->start(se);
1066
+ end = se->end(se);
1067
+ }
1068
+ if (doc != prev_doc) {
1069
+ sne->current = i;
1070
+ if (!sne_goto_next_doc(sne)) {
1071
+ return false;
1072
+ }
1073
+ break;
1074
+ }
1075
+ i++;
1076
+ lengths_sum += end - start;
1077
+ prev_doc = doc;
1078
+ prev_start = start;
1079
+ prev_end = end;
1080
+ }
1081
+ if (i == sne->s_cnt) {
1082
+ if ((end - sne->start - lengths_sum) <= sne->slop) {
1083
+ /* we have a match */
1084
+ sne->end = end;
1085
+ sne->doc = doc;
1086
+
1087
+ /* the minimum span is always the first span so it needs to be
1088
+ * incremented next time around */
1089
+ sne->current = 0;
1090
+ return true;
1091
+
1092
+ }
1093
+ else {
1094
+ se = sne->span_enums[0];
1095
+ if (!se->next(se)) {
1096
+ return false;
1097
+ }
1098
+ if (se->doc(se) != prev_doc) {
1099
+ sne->current = 0;
1100
+ if (!sne_goto_next_doc(sne)) {
1101
+ return false;
1102
+ }
1103
+ }
1104
+ }
1105
+ }
1106
+ }
1107
+ }
1108
+
1109
+ static bool sne_next_match(FrtSpanEnum *self)
1110
+ {
1111
+ SpanNearEnum *sne = SpNEn(self);
1112
+ FrtSpanEnum *se_curr, *se_next;
1113
+
1114
+ if (!sne->first_time) {
1115
+ if (!sne_init(sne)) {
1116
+ return false;
1117
+ }
1118
+ sne->first_time = false;
1119
+ }
1120
+ se_curr = sne->span_enums[sne->current];
1121
+ se_next = sne->span_enums[(sne->current+1)%sne->s_cnt];
1122
+ if (se_curr->doc(se_curr) > se_next->doc(se_next)) {
1123
+ if (!sne_goto_next_doc(sne)) {
1124
+ return false;
1125
+ }
1126
+ }
1127
+
1128
+ if (sne->in_order) {
1129
+ return sne_next_ordered_match(self);
1130
+ }
1131
+ else {
1132
+ return sne_next_unordered_match(self);
1133
+ }
1134
+ }
1135
+
1136
+ static bool spanne_next(FrtSpanEnum *self)
1137
+ {
1138
+ SpanNearEnum *sne = SpNEn(self);
1139
+ FrtSpanEnum *se;
1140
+
1141
+ se = sne->span_enums[sne->current];
1142
+ if (!se->next(se)) return false;
1143
+
1144
+ return sne_next_match(self);
1145
+ }
1146
+
1147
+ static bool spanne_skip_to(FrtSpanEnum *self, int target)
1148
+ {
1149
+ FrtSpanEnum *se = SpNEn(self)->span_enums[SpNEn(self)->current];
1150
+ if (!se->skip_to(se, target)) {
1151
+ return false;
1152
+ }
1153
+
1154
+ return sne_next_match(self);
1155
+ }
1156
+
1157
+ static int spanne_doc(FrtSpanEnum *self)
1158
+ {
1159
+ return SpNEn(self)->doc;
1160
+ }
1161
+
1162
+ static int spanne_start(FrtSpanEnum *self)
1163
+ {
1164
+ return SpNEn(self)->start;
1165
+ }
1166
+
1167
+ static int spanne_end(FrtSpanEnum *self)
1168
+ {
1169
+ return SpNEn(self)->end;
1170
+ }
1171
+
1172
+ static char *spanne_to_s(FrtSpanEnum *self)
1173
+ {
1174
+ SpanNearEnum *sne = SpNEn(self);
1175
+ char *query_str = self->query->to_s(self->query, (FrtSymbol)NULL);
1176
+ char doc_str[62];
1177
+ size_t len = strlen(query_str);
1178
+ char *str = FRT_ALLOC_N(char, len + 80);
1179
+
1180
+ if (sne->first_time) {
1181
+ sprintf(doc_str, "START");
1182
+ }
1183
+ else {
1184
+ sprintf(doc_str, "%d:%d-%d", self->doc(self),
1185
+ self->start(self), self->end(self));
1186
+ }
1187
+ sprintf(str, "SpanNearEnum(%s)@%s", query_str, doc_str);
1188
+ free(query_str);
1189
+ return str;
1190
+ }
1191
+
1192
+ static void spanne_destroy(FrtSpanEnum *self)
1193
+ {
1194
+ FrtSpanEnum *se;
1195
+ SpanNearEnum *sne = SpNEn(self);
1196
+ int i;
1197
+ for (i = 0; i < sne->s_cnt; i++) {
1198
+ se = sne->span_enums[i];
1199
+ se->destroy(se);
1200
+ }
1201
+ free(sne->span_enums);
1202
+ free(self);
1203
+ }
1204
+
1205
+ static FrtSpanEnum *spanne_new(FrtQuery *query, FrtIndexReader *ir)
1206
+ {
1207
+ int i;
1208
+ FrtQuery *clause;
1209
+ FrtSpanEnum *self = (FrtSpanEnum *)FRT_ALLOC(SpanNearEnum);
1210
+ FrtSpanNearQuery *snq = SpNQ(query);
1211
+
1212
+ SpNEn(self)->first_time = true;
1213
+ SpNEn(self)->in_order = snq->in_order;
1214
+ SpNEn(self)->slop = snq->slop;
1215
+ SpNEn(self)->s_cnt = snq->c_cnt;
1216
+ SpNEn(self)->span_enums = FRT_ALLOC_N(FrtSpanEnum *, SpNEn(self)->s_cnt);
1217
+
1218
+ for (i = 0; i < SpNEn(self)->s_cnt; i++) {
1219
+ clause = snq->clauses[i];
1220
+ SpNEn(self)->span_enums[i] = SpQ(clause)->get_spans(clause, ir);
1221
+ }
1222
+ SpNEn(self)->current = 0;
1223
+
1224
+ SpNEn(self)->doc = -1;
1225
+ SpNEn(self)->start = -1;
1226
+ SpNEn(self)->end = -1;
1227
+
1228
+ self->query = query;
1229
+ self->next = &spanne_next;
1230
+ self->skip_to = &spanne_skip_to;
1231
+ self->doc = &spanne_doc;
1232
+ self->start = &spanne_start;
1233
+ self->end = &spanne_end;
1234
+ self->destroy = &spanne_destroy;
1235
+ self->to_s = &spanne_to_s;
1236
+
1237
+ return self;
1238
+ }
1239
+
1240
+ /*****************************************************************************
1241
+ *
1242
+ * SpanNotEnum
1243
+ *
1244
+ *****************************************************************************/
1245
+
1246
+ #define SpXEn(span_enum) ((SpanNotEnum *)(span_enum))
1247
+ #define SpXQ(query) ((FrtSpanNotQuery *)(query))
1248
+
1249
+ typedef struct SpanNotEnum
1250
+ {
1251
+ FrtSpanEnum super;
1252
+ FrtSpanEnum *inc;
1253
+ FrtSpanEnum *exc;
1254
+ bool more_inc : 1;
1255
+ bool more_exc : 1;
1256
+ } SpanNotEnum;
1257
+
1258
+
1259
+ static bool spanxe_next(FrtSpanEnum *self)
1260
+ {
1261
+ SpanNotEnum *sxe = SpXEn(self);
1262
+ FrtSpanEnum *inc = sxe->inc, *exc = sxe->exc;
1263
+ if (sxe->more_inc) { /* move to next incl */
1264
+ sxe->more_inc = inc->next(inc);
1265
+ }
1266
+
1267
+ while (sxe->more_inc && sxe->more_exc) {
1268
+ if (inc->doc(inc) > exc->doc(exc)) { /* skip excl */
1269
+ sxe->more_exc = exc->skip_to(exc, inc->doc(inc));
1270
+ }
1271
+
1272
+ while (sxe->more_exc /* while excl is before */
1273
+ && (inc->doc(inc) == exc->doc(exc))
1274
+ && (exc->end(exc) <= inc->start(inc))) {
1275
+ sxe->more_exc = exc->next(exc); /* increment excl */
1276
+ }
1277
+
1278
+ if (! sxe->more_exc || /* if no intersection */
1279
+ (inc->doc(inc) != exc->doc(exc)) ||
1280
+ inc->end(inc) <= exc->start(exc)) {
1281
+ break; /* we found a match */
1282
+ }
1283
+
1284
+ sxe->more_inc = inc->next(inc); /* intersected: keep scanning */
1285
+ }
1286
+ return sxe->more_inc;
1287
+ }
1288
+
1289
+ static bool spanxe_skip_to(FrtSpanEnum *self, int target)
1290
+ {
1291
+ SpanNotEnum *sxe = SpXEn(self);
1292
+ FrtSpanEnum *inc = sxe->inc, *exc = sxe->exc;
1293
+ int doc;
1294
+
1295
+ if (sxe->more_inc) { /* move to next incl */
1296
+ if (!(sxe->more_inc=sxe->inc->skip_to(sxe->inc, target))) return false;
1297
+ }
1298
+
1299
+ if (sxe->more_inc && ((doc=inc->doc(inc)) > exc->doc(exc))) {
1300
+ sxe->more_exc = exc->skip_to(exc, doc);
1301
+ }
1302
+
1303
+ while (sxe->more_exc /* while excl is before */
1304
+ && inc->doc(inc) == exc->doc(exc)
1305
+ && exc->end(exc) <= inc->start(inc)) {
1306
+ sxe->more_exc = exc->next(exc); /* increment excl */
1307
+ }
1308
+
1309
+ if (!sxe->more_exc || /* if no intersection */
1310
+ inc->doc(inc) != exc->doc(exc) ||
1311
+ inc->end(inc) <= exc->start(exc)) {
1312
+ return true; /* we found a match */
1313
+ }
1314
+
1315
+ return spanxe_next(self); /* scan to next match */
1316
+ }
1317
+
1318
+ static int spanxe_doc(FrtSpanEnum *self)
1319
+ {
1320
+ FrtSpanEnum *inc = SpXEn(self)->inc;
1321
+ return inc->doc(inc);
1322
+ }
1323
+
1324
+ static int spanxe_start(FrtSpanEnum *self)
1325
+ {
1326
+ FrtSpanEnum *inc = SpXEn(self)->inc;
1327
+ return inc->start(inc);
1328
+ }
1329
+
1330
+ static int spanxe_end(FrtSpanEnum *self)
1331
+ {
1332
+ FrtSpanEnum *inc = SpXEn(self)->inc;
1333
+ return inc->end(inc);
1334
+ }
1335
+
1336
+ static char *spanxe_to_s(FrtSpanEnum *self)
1337
+ {
1338
+ char *query_str = self->query->to_s(self->query, (FrtSymbol)NULL);
1339
+ char *res = frt_strfmt("SpanNotEnum(%s)", query_str);
1340
+ free(query_str);
1341
+ return res;
1342
+ }
1343
+
1344
+ static void spanxe_destroy(FrtSpanEnum *self)
1345
+ {
1346
+ SpanNotEnum *sxe = SpXEn(self);
1347
+ sxe->inc->destroy(sxe->inc);
1348
+ sxe->exc->destroy(sxe->exc);
1349
+ free(self);
1350
+ }
1351
+
1352
+ static FrtSpanEnum *spanxe_new(FrtQuery *query, FrtIndexReader *ir)
1353
+ {
1354
+ FrtSpanEnum *self = (FrtSpanEnum *)FRT_ALLOC(SpanNotEnum);
1355
+ SpanNotEnum *sxe = SpXEn(self);
1356
+ FrtSpanNotQuery *sxq = SpXQ(query);
1357
+
1358
+ sxe->inc = SpQ(sxq->inc)->get_spans(sxq->inc, ir);
1359
+ sxe->exc = SpQ(sxq->exc)->get_spans(sxq->exc, ir);
1360
+ sxe->more_inc = true;
1361
+ sxe->more_exc = sxe->exc->next(sxe->exc);
1362
+
1363
+ self->query = query;
1364
+ self->next = &spanxe_next;
1365
+ self->skip_to = &spanxe_skip_to;
1366
+ self->doc = &spanxe_doc;
1367
+ self->start = &spanxe_start;
1368
+ self->end = &spanxe_end;
1369
+ self->destroy = &spanxe_destroy;
1370
+ self->to_s = &spanxe_to_s;
1371
+
1372
+ return self;
1373
+ }
1374
+
1375
+ /*****************************************************************************
1376
+ *
1377
+ * SpanWeight
1378
+ *
1379
+ *****************************************************************************/
1380
+
1381
+ #define SpW(weight) ((SpanWeight *)(weight))
1382
+ typedef struct SpanWeight
1383
+ {
1384
+ FrtWeight super;
1385
+ FrtHashSet *terms;
1386
+ } SpanWeight;
1387
+
1388
+ static FrtExplanation *spanw_explain(FrtWeight *self, FrtIndexReader *ir, int target)
1389
+ {
1390
+ FrtExplanation *expl;
1391
+ FrtExplanation *idf_expl1;
1392
+ FrtExplanation *idf_expl2;
1393
+ FrtExplanation *query_expl;
1394
+ FrtExplanation *qnorm_expl;
1395
+ FrtExplanation *field_expl;
1396
+ FrtExplanation *tf_expl;
1397
+ FrtScorer *scorer;
1398
+ frt_uchar *field_norms;
1399
+ float field_norm;
1400
+ FrtExplanation *field_norm_expl;
1401
+ const char *field_name = rb_id2name(SpQ(self->query)->field);
1402
+
1403
+ char *query_str;
1404
+ FrtHashSet *terms = SpW(self)->terms;
1405
+ const int field_num = frt_fis_get_field_num(ir->fis, SpQ(self->query)->field);
1406
+ char *doc_freqs = NULL;
1407
+ size_t df_i = 0;
1408
+ FrtHashSetEntry *hse;
1409
+
1410
+ if (field_num < 0) {
1411
+ return frt_expl_new(0.0, "field \"%s\" does not exist in the index", field_name);
1412
+ }
1413
+
1414
+ query_str = self->query->to_s(self->query, (FrtSymbol)NULL);
1415
+
1416
+ for (hse = terms->first; hse; hse = hse->next) {
1417
+ char *term = (char *)hse->elem;
1418
+ FRT_REALLOC_N(doc_freqs, char, df_i + strlen(term) + 23);
1419
+ df_i += sprintf(doc_freqs + df_i, "%s=%d, ", term,
1420
+ ir->doc_freq(ir, field_num, term));
1421
+ }
1422
+ /* remove the ',' at the end of the string if it exists */
1423
+ if (terms->size > 0) {
1424
+ df_i -= 2;
1425
+ doc_freqs[df_i] = '\0';
1426
+ }
1427
+ else {
1428
+ doc_freqs = frt_estrdup("");
1429
+ }
1430
+
1431
+ expl = frt_expl_new(0.0, "weight(%s in %d), product of:", query_str, target);
1432
+
1433
+ /* We need two of these as it's included in both the query explanation
1434
+ * and the field explanation */
1435
+ idf_expl1 = frt_expl_new(self->idf, "idf(%s: %s)", field_name, doc_freqs);
1436
+ idf_expl2 = frt_expl_new(self->idf, "idf(%s: %s)", field_name, doc_freqs);
1437
+ if (terms->size > 0) {
1438
+ free(doc_freqs); /* only free if allocated */
1439
+ }
1440
+
1441
+ /* explain query weight */
1442
+ query_expl = frt_expl_new(0.0, "query_weight(%s), product of:", query_str);
1443
+
1444
+ if (self->query->boost != 1.0) {
1445
+ frt_expl_add_detail(query_expl, frt_expl_new(self->query->boost, "boost"));
1446
+ }
1447
+
1448
+ frt_expl_add_detail(query_expl, idf_expl1);
1449
+
1450
+ qnorm_expl = frt_expl_new(self->qnorm, "query_norm");
1451
+ frt_expl_add_detail(query_expl, qnorm_expl);
1452
+
1453
+ query_expl->value = self->query->boost * idf_expl1->value * qnorm_expl->value;
1454
+
1455
+ frt_expl_add_detail(expl, query_expl);
1456
+
1457
+ /* explain field weight */
1458
+ field_expl = frt_expl_new(0.0, "field_weight(%s:%s in %d), product of:", field_name, query_str, target);
1459
+ free(query_str);
1460
+
1461
+ scorer = self->scorer(self, ir);
1462
+ tf_expl = scorer->explain(scorer, target);
1463
+ scorer->destroy(scorer);
1464
+ frt_expl_add_detail(field_expl, tf_expl);
1465
+ frt_expl_add_detail(field_expl, idf_expl2);
1466
+
1467
+ field_norms = ir->get_norms(ir, field_num);
1468
+ field_norm = (field_norms
1469
+ ? frt_sim_decode_norm(self->similarity, field_norms[target])
1470
+ : (float)0.0);
1471
+ field_norm_expl = frt_expl_new(field_norm, "field_norm(field=%s, doc=%d)",
1472
+ field_name, target);
1473
+ frt_expl_add_detail(field_expl, field_norm_expl);
1474
+
1475
+ field_expl->value = tf_expl->value * idf_expl2->value * field_norm_expl->value;
1476
+
1477
+ /* combine them */
1478
+ if (query_expl->value == 1.0) {
1479
+ frt_expl_destroy(expl);
1480
+ return field_expl;
1481
+ }
1482
+ else {
1483
+ expl->value = (query_expl->value * field_expl->value);
1484
+ frt_expl_add_detail(expl, field_expl);
1485
+ return expl;
1486
+ }
1487
+ }
1488
+
1489
+ static char *spanw_to_s(FrtWeight *self)
1490
+ {
1491
+ return frt_strfmt("SpanWeight(%f)", self->value);
1492
+ }
1493
+
1494
+ static void spanw_destroy(FrtWeight *self)
1495
+ {
1496
+ frt_hs_destroy(SpW(self)->terms);
1497
+ frt_w_destroy(self);
1498
+ }
1499
+
1500
+ static FrtWeight *spanw_new(FrtQuery *query, FrtSearcher *searcher)
1501
+ {
1502
+ FrtHashSetEntry *hse;
1503
+ FrtWeight *self = w_new(SpanWeight, query);
1504
+ FrtHashSet *terms = SpQ(query)->get_terms(query);
1505
+
1506
+ SpW(self)->terms = terms;
1507
+ self->scorer = &spansc_new;
1508
+ self->explain = &spanw_explain;
1509
+ self->to_s = &spanw_to_s;
1510
+ self->destroy = &spanw_destroy;
1511
+
1512
+ self->similarity = query->get_similarity(query, searcher);
1513
+
1514
+ self->idf = 0.0f;
1515
+
1516
+ for (hse = terms->first; hse; hse = hse->next) {
1517
+ self->idf += frt_sim_idf_term(self->similarity, SpQ(query)->field,
1518
+ (char *)hse->elem, searcher);
1519
+ }
1520
+
1521
+ return self;
1522
+ }
1523
+
1524
+ /*****************************************************************************
1525
+ * FrtSpanTermQuery
1526
+ *****************************************************************************/
1527
+
1528
+ static char *spantq_to_s(FrtQuery *self, FrtSymbol default_field)
1529
+ {
1530
+ if (default_field && default_field == SpQ(self)->field) {
1531
+ return frt_strfmt("span_terms(%s)", SpTQ(self)->term);
1532
+ } else {
1533
+ return frt_strfmt("span_terms(%s:%s)", rb_id2name(SpQ(self)->field), SpTQ(self)->term);
1534
+ }
1535
+ }
1536
+
1537
+ static void spantq_destroy_i(FrtQuery *self)
1538
+ {
1539
+ free(SpTQ(self)->term);
1540
+ spanq_destroy_i(self);
1541
+ }
1542
+
1543
+ static void spantq_extract_terms(FrtQuery *self, FrtHashSet *terms)
1544
+ {
1545
+ frt_hs_add(terms, frt_term_new(SpQ(self)->field, SpTQ(self)->term));
1546
+ }
1547
+
1548
+ static FrtHashSet *spantq_get_terms(FrtQuery *self)
1549
+ {
1550
+ FrtHashSet *terms = frt_hs_new_str(&free);
1551
+ frt_hs_add(terms, frt_estrdup(SpTQ(self)->term));
1552
+ return terms;
1553
+ }
1554
+
1555
+ static unsigned long long spantq_hash(FrtQuery *self)
1556
+ {
1557
+ return spanq_hash(self) ^ frt_str_hash(SpTQ(self)->term);
1558
+ }
1559
+
1560
+ static int spantq_eq(FrtQuery *self, FrtQuery *o)
1561
+ {
1562
+ return spanq_eq(self, o) && strcmp(SpTQ(self)->term, SpTQ(o)->term) == 0;
1563
+ }
1564
+
1565
+ FrtQuery *frt_spantq_new(FrtSymbol field, const char *term)
1566
+ {
1567
+ FrtQuery *self = frt_q_new(FrtSpanTermQuery);
1568
+
1569
+ SpTQ(self)->term = frt_estrdup(term);
1570
+ SpQ(self)->field = field;
1571
+ SpQ(self)->get_spans = &spante_new;
1572
+ SpQ(self)->get_terms = &spantq_get_terms;
1573
+
1574
+ self->type = SPAN_TERM_QUERY;
1575
+ self->extract_terms = &spantq_extract_terms;
1576
+ self->to_s = &spantq_to_s;
1577
+ self->hash = &spantq_hash;
1578
+ self->eq = &spantq_eq;
1579
+ self->destroy_i = &spantq_destroy_i;
1580
+ self->create_weight_i = &spanw_new;
1581
+ self->get_matchv_i = &spanq_get_matchv_i;
1582
+ return self;
1583
+ }
1584
+
1585
+ /*****************************************************************************
1586
+ * SpanMultiTermQuery
1587
+ *****************************************************************************/
1588
+
1589
+ static char *spanmtq_to_s(FrtQuery *self, FrtSymbol field)
1590
+ {
1591
+ char *terms = NULL, *p;
1592
+ int len = 3, i;
1593
+ FrtSpanMultiTermQuery *smtq = SpMTQ(self);
1594
+ for (i = 0; i < smtq->term_cnt; i++) {
1595
+ len += strlen(smtq->terms[i]) + 2;
1596
+ }
1597
+ p = terms = FRT_ALLOC_N(char, len);
1598
+ *(p++) = '[';
1599
+ for (i = 0; i < smtq->term_cnt; i++) {
1600
+ if (i != 0) *(p++) = ',';
1601
+ strcpy(p, smtq->terms[i]);
1602
+ p += strlen(smtq->terms[i]);
1603
+ }
1604
+ *(p++) = ']';
1605
+ *p = '\0';
1606
+
1607
+ if (field == SpQ(self)->field) {
1608
+ p = frt_strfmt("span_terms(%s)", terms);
1609
+ }
1610
+ else {
1611
+ p = frt_strfmt("span_terms(%s:%s)", rb_id2name(SpQ(self)->field), terms);
1612
+ }
1613
+ free(terms);
1614
+ return p;
1615
+ }
1616
+
1617
+ static void spanmtq_destroy_i(FrtQuery *self)
1618
+ {
1619
+ FrtSpanMultiTermQuery *smtq = SpMTQ(self);
1620
+ int i;
1621
+ for (i = 0; i < smtq->term_cnt; i++) {
1622
+ free(smtq->terms[i]);
1623
+ }
1624
+ free(smtq->terms);
1625
+ spanq_destroy_i(self);
1626
+ }
1627
+
1628
+ static void spanmtq_extract_terms(FrtQuery *self, FrtHashSet *terms)
1629
+ {
1630
+ FrtSpanMultiTermQuery *smtq = SpMTQ(self);
1631
+ int i;
1632
+ for (i = 0; i < smtq->term_cnt; i++) {
1633
+ frt_hs_add(terms, frt_term_new(SpQ(self)->field, smtq->terms[i]));
1634
+ }
1635
+ }
1636
+
1637
+ static FrtHashSet *spanmtq_get_terms(FrtQuery *self)
1638
+ {
1639
+ FrtHashSet *terms = frt_hs_new_str(&free);
1640
+ FrtSpanMultiTermQuery *smtq = SpMTQ(self);
1641
+ int i;
1642
+ for (i = 0; i < smtq->term_cnt; i++) {
1643
+ frt_hs_add(terms, frt_estrdup(smtq->terms[i]));
1644
+ }
1645
+ return terms;
1646
+ }
1647
+
1648
+ static unsigned long long spanmtq_hash(FrtQuery *self)
1649
+ {
1650
+ unsigned long long hash = spanq_hash(self);
1651
+ FrtSpanMultiTermQuery *smtq = SpMTQ(self);
1652
+ int i;
1653
+ for (i = 0; i < smtq->term_cnt; i++) {
1654
+ hash ^= frt_str_hash(smtq->terms[i]);
1655
+ }
1656
+ return hash;
1657
+ }
1658
+
1659
+ static int spanmtq_eq(FrtQuery *self, FrtQuery *o)
1660
+ {
1661
+ FrtSpanMultiTermQuery *smtq = SpMTQ(self);
1662
+ FrtSpanMultiTermQuery *smtqo = SpMTQ(o);
1663
+ int i;
1664
+ if (!spanq_eq(self, o)) return false;
1665
+ if (smtq->term_cnt != smtqo->term_cnt) return false;
1666
+ for (i = 0; i < smtq->term_cnt; i++) {
1667
+ if (strcmp(smtq->terms[i], smtqo->terms[i]) != 0) return false;
1668
+ }
1669
+ return true;;
1670
+ }
1671
+
1672
+ FrtQuery *frt_spanmtq_new_conf(FrtSymbol field, int max_terms)
1673
+ {
1674
+ FrtQuery *self = frt_q_new(FrtSpanMultiTermQuery);
1675
+
1676
+ SpMTQ(self)->terms = FRT_ALLOC_N(char *, max_terms);
1677
+ SpMTQ(self)->term_cnt = 0;
1678
+ SpMTQ(self)->term_capa = max_terms;
1679
+
1680
+ SpQ(self)->field = field;
1681
+ SpQ(self)->get_spans = &spanmte_new;
1682
+ SpQ(self)->get_terms = &spanmtq_get_terms;
1683
+
1684
+ self->type = SPAN_MULTI_TERM_QUERY;
1685
+ self->extract_terms = &spanmtq_extract_terms;
1686
+ self->to_s = &spanmtq_to_s;
1687
+ self->hash = &spanmtq_hash;
1688
+ self->eq = &spanmtq_eq;
1689
+ self->destroy_i = &spanmtq_destroy_i;
1690
+ self->create_weight_i = &spanw_new;
1691
+ self->get_matchv_i = &spanq_get_matchv_i;
1692
+
1693
+ return self;
1694
+ }
1695
+
1696
+ FrtQuery *frt_spanmtq_new(FrtSymbol field)
1697
+ {
1698
+ return frt_spanmtq_new_conf(field, SPAN_MULTI_TERM_QUERY_CAPA);
1699
+ }
1700
+
1701
+ void frt_spanmtq_add_term(FrtQuery *self, const char *term)
1702
+ {
1703
+ FrtSpanMultiTermQuery *smtq = SpMTQ(self);
1704
+ if (smtq->term_cnt < smtq->term_capa) {
1705
+ smtq->terms[smtq->term_cnt++] = frt_estrdup(term);
1706
+ }
1707
+ }
1708
+
1709
+ /*****************************************************************************
1710
+ *
1711
+ * SpanFirstQuery
1712
+ *
1713
+ *****************************************************************************/
1714
+
1715
+ static char *spanfq_to_s(FrtQuery *self, FrtSymbol field)
1716
+ {
1717
+ FrtQuery *match = SpFQ(self)->match;
1718
+ char *q_str = match->to_s(match, field);
1719
+ char *res = frt_strfmt("span_first(%s, %d)", q_str, SpFQ(self)->end);
1720
+ free(q_str);
1721
+ return res;
1722
+ }
1723
+
1724
+ static void spanfq_extract_terms(FrtQuery *self, FrtHashSet *terms)
1725
+ {
1726
+ SpFQ(self)->match->extract_terms(SpFQ(self)->match, terms);
1727
+ }
1728
+
1729
+ static FrtHashSet *spanfq_get_terms(FrtQuery *self)
1730
+ {
1731
+ FrtSpanFirstQuery *sfq = SpFQ(self);
1732
+ return SpQ(sfq->match)->get_terms(sfq->match);
1733
+ }
1734
+
1735
+ static FrtQuery *spanfq_rewrite(FrtQuery *self, FrtIndexReader *ir)
1736
+ {
1737
+ FrtQuery *q, *rq;
1738
+
1739
+ q = SpFQ(self)->match;
1740
+ rq = q->rewrite(q, ir);
1741
+ frt_q_deref(q);
1742
+ SpFQ(self)->match = rq;
1743
+
1744
+ self->ref_cnt++;
1745
+ return self; /* no clauses rewrote */
1746
+ }
1747
+
1748
+ static void spanfq_destroy_i(FrtQuery *self)
1749
+ {
1750
+ frt_q_deref(SpFQ(self)->match);
1751
+ spanq_destroy_i(self);
1752
+ }
1753
+
1754
+ static unsigned long long spanfq_hash(FrtQuery *self)
1755
+ {
1756
+ return spanq_hash(self) ^ SpFQ(self)->match->hash(SpFQ(self)->match)
1757
+ ^ SpFQ(self)->end;
1758
+ }
1759
+
1760
+ static int spanfq_eq(FrtQuery *self, FrtQuery *o)
1761
+ {
1762
+ FrtSpanFirstQuery *sfq1 = SpFQ(self);
1763
+ FrtSpanFirstQuery *sfq2 = SpFQ(o);
1764
+ return spanq_eq(self, o) && sfq1->match->eq(sfq1->match, sfq2->match)
1765
+ && (sfq1->end == sfq2->end);
1766
+ }
1767
+
1768
+ FrtQuery *frt_spanfq_new_nr(FrtQuery *match, int end)
1769
+ {
1770
+ FrtQuery *self = frt_q_new(FrtSpanFirstQuery);
1771
+
1772
+ SpFQ(self)->match = match;
1773
+ SpFQ(self)->end = end;
1774
+
1775
+ SpQ(self)->field = SpQ(match)->field;
1776
+ SpQ(self)->get_spans = &spanfe_new;
1777
+ SpQ(self)->get_terms = &spanfq_get_terms;
1778
+
1779
+ self->type = SPAN_FIRST_QUERY;
1780
+ self->rewrite = &spanfq_rewrite;
1781
+ self->extract_terms = &spanfq_extract_terms;
1782
+ self->to_s = &spanfq_to_s;
1783
+ self->hash = &spanfq_hash;
1784
+ self->eq = &spanfq_eq;
1785
+ self->destroy_i = &spanfq_destroy_i;
1786
+ self->create_weight_i = &spanw_new;
1787
+ self->get_matchv_i = &spanq_get_matchv_i;
1788
+
1789
+ return self;
1790
+ }
1791
+
1792
+ FrtQuery *frt_spanfq_new(FrtQuery *match, int end)
1793
+ {
1794
+ FRT_REF(match);
1795
+ return frt_spanfq_new_nr(match, end);
1796
+ }
1797
+
1798
+ /*****************************************************************************
1799
+ *
1800
+ * FrtSpanOrQuery
1801
+ *
1802
+ *****************************************************************************/
1803
+
1804
+ static char *spanoq_to_s(FrtQuery *self, FrtSymbol field)
1805
+ {
1806
+ int i;
1807
+ FrtSpanOrQuery *soq = SpOQ(self);
1808
+ char *res, *res_p;
1809
+ char **q_strs = FRT_ALLOC_N(char *, soq->c_cnt);
1810
+ int len = 50;
1811
+ for (i = 0; i < soq->c_cnt; i++) {
1812
+ FrtQuery *clause = soq->clauses[i];
1813
+ q_strs[i] = clause->to_s(clause, field);
1814
+ len += strlen(q_strs[i]) + 2;
1815
+ }
1816
+
1817
+ res_p = res = FRT_ALLOC_N(char, len);
1818
+ res_p += sprintf(res_p, "span_or[");
1819
+ for (i = 0; i < soq->c_cnt; i++) {
1820
+ if (i != 0) *(res_p++) = ',';
1821
+ res_p += sprintf(res_p, "%s", q_strs[i]);
1822
+ free(q_strs[i]);
1823
+ }
1824
+ free(q_strs);
1825
+
1826
+ *(res_p)++ = ']';
1827
+ *res_p = 0;
1828
+ return res;
1829
+ }
1830
+
1831
+ static void spanoq_extract_terms(FrtQuery *self, FrtHashSet *terms)
1832
+ {
1833
+ FrtSpanOrQuery *soq = SpOQ(self);
1834
+ int i;
1835
+ for (i = 0; i < soq->c_cnt; i++) {
1836
+ FrtQuery *clause = soq->clauses[i];
1837
+ clause->extract_terms(clause, terms);
1838
+ }
1839
+ }
1840
+
1841
+ static FrtHashSet *spanoq_get_terms(FrtQuery *self)
1842
+ {
1843
+ FrtSpanOrQuery *soq = SpOQ(self);
1844
+ FrtHashSet *terms = frt_hs_new_str(&free);
1845
+ int i;
1846
+ for (i = 0; i < soq->c_cnt; i++) {
1847
+ FrtQuery *clause = soq->clauses[i];
1848
+ FrtHashSet *sub_terms = SpQ(clause)->get_terms(clause);
1849
+ frt_hs_merge(terms, sub_terms);
1850
+ }
1851
+
1852
+ return terms;
1853
+ }
1854
+
1855
+ static FrtSpanEnum *spanoq_get_spans(FrtQuery *self, FrtIndexReader *ir)
1856
+ {
1857
+ FrtSpanOrQuery *soq = SpOQ(self);
1858
+ if (soq->c_cnt == 1) {
1859
+ FrtQuery *q = soq->clauses[0];
1860
+ return SpQ(q)->get_spans(q, ir);
1861
+ }
1862
+
1863
+ return spanoe_new(self, ir);
1864
+ }
1865
+
1866
+ static FrtQuery *spanoq_rewrite(FrtQuery *self, FrtIndexReader *ir)
1867
+ {
1868
+ FrtSpanOrQuery *soq = SpOQ(self);
1869
+ int i;
1870
+
1871
+ /* replace clauses with their rewritten queries */
1872
+ for (i = 0; i < soq->c_cnt; i++) {
1873
+ FrtQuery *clause = soq->clauses[i];
1874
+ FrtQuery *rewritten = clause->rewrite(clause, ir);
1875
+ frt_q_deref(clause);
1876
+ soq->clauses[i] = rewritten;
1877
+ }
1878
+
1879
+ self->ref_cnt++;
1880
+ return self;
1881
+ }
1882
+
1883
+ static void spanoq_destroy_i(FrtQuery *self)
1884
+ {
1885
+ FrtSpanOrQuery *soq = SpOQ(self);
1886
+
1887
+ int i;
1888
+ for (i = 0; i < soq->c_cnt; i++) {
1889
+ FrtQuery *clause = soq->clauses[i];
1890
+ frt_q_deref(clause);
1891
+ }
1892
+ free(soq->clauses);
1893
+
1894
+ spanq_destroy_i(self);
1895
+ }
1896
+
1897
+ static unsigned long long spanoq_hash(FrtQuery *self)
1898
+ {
1899
+ int i;
1900
+ unsigned long long hash = spanq_hash(self);
1901
+ FrtSpanOrQuery *soq = SpOQ(self);
1902
+
1903
+ for (i = 0; i < soq->c_cnt; i++) {
1904
+ FrtQuery *q = soq->clauses[i];
1905
+ hash ^= q->hash(q);
1906
+ }
1907
+ return hash;
1908
+ }
1909
+
1910
+ static int spanoq_eq(FrtQuery *self, FrtQuery *o)
1911
+ {
1912
+ int i;
1913
+ FrtQuery *q1, *q2;
1914
+ FrtSpanOrQuery *soq1 = SpOQ(self);
1915
+ FrtSpanOrQuery *soq2 = SpOQ(o);
1916
+
1917
+ if (!spanq_eq(self, o) || soq1->c_cnt != soq2->c_cnt) {
1918
+ return false;
1919
+ }
1920
+ for (i = 0; i < soq1->c_cnt; i++) {
1921
+ q1 = soq1->clauses[i];
1922
+ q2 = soq2->clauses[i];
1923
+ if (!q1->eq(q1, q2)) {
1924
+ return false;
1925
+ }
1926
+ }
1927
+ return true;
1928
+ }
1929
+
1930
+ FrtQuery *frt_spanoq_new()
1931
+ {
1932
+ FrtQuery *self = frt_q_new(FrtSpanOrQuery);
1933
+ SpOQ(self)->clauses = FRT_ALLOC_N(FrtQuery *, CLAUSE_INIT_CAPA);
1934
+ SpOQ(self)->c_capa = CLAUSE_INIT_CAPA;
1935
+
1936
+ SpQ(self)->field = (FrtSymbol)NULL;
1937
+ SpQ(self)->get_spans = &spanoq_get_spans;
1938
+ SpQ(self)->get_terms = &spanoq_get_terms;
1939
+
1940
+ self->type = SPAN_OR_QUERY;
1941
+ self->rewrite = &spanoq_rewrite;
1942
+ self->extract_terms = &spanoq_extract_terms;
1943
+ self->to_s = &spanoq_to_s;
1944
+ self->hash = &spanoq_hash;
1945
+ self->eq = &spanoq_eq;
1946
+ self->destroy_i = &spanoq_destroy_i;
1947
+ self->create_weight_i = &spanw_new;
1948
+ self->get_matchv_i = &spanq_get_matchv_i;
1949
+
1950
+ return self;
1951
+ }
1952
+
1953
+ FrtQuery *frt_spanoq_add_clause_nr(FrtQuery *self, FrtQuery *clause)
1954
+ {
1955
+ const int curr_index = SpOQ(self)->c_cnt++;
1956
+ if (clause->type < SPAN_TERM_QUERY || clause->type > SPAN_NEAR_QUERY) {
1957
+ FRT_RAISE(FRT_ARG_ERROR, "Tried to add a %s to a SpanOrQuery. This is not a "
1958
+ "SpanQuery.", frt_q_get_query_name(clause->type));
1959
+ }
1960
+ if (curr_index == 0) {
1961
+ SpQ(self)->field = SpQ(clause)->field;
1962
+ }
1963
+ else if (SpQ(self)->field != SpQ(clause)->field) {
1964
+ FRT_RAISE(FRT_ARG_ERROR, "All clauses in a SpanQuery must have the same field. "
1965
+ "Attempted to add a SpanQuery with field \"%s\" to a SpanOrQuery "
1966
+ "with field \"%s\"", rb_id2name(SpQ(clause)->field), rb_id2name(SpQ(self)->field));
1967
+ }
1968
+ if (curr_index >= SpOQ(self)->c_capa) {
1969
+ SpOQ(self)->c_capa <<= 1;
1970
+ FRT_REALLOC_N(SpOQ(self)->clauses, FrtQuery *, SpOQ(self)->c_capa);
1971
+ }
1972
+ SpOQ(self)->clauses[curr_index] = clause;
1973
+ return clause;
1974
+ }
1975
+
1976
+ FrtQuery *frt_spanoq_add_clause(FrtQuery *self, FrtQuery *clause)
1977
+ {
1978
+ FRT_REF(clause);
1979
+ return frt_spanoq_add_clause_nr(self, clause);
1980
+ }
1981
+
1982
+ /*****************************************************************************
1983
+ *
1984
+ * SpanNearQuery
1985
+ *
1986
+ *****************************************************************************/
1987
+
1988
+ static char *spannq_to_s(FrtQuery *self, FrtSymbol field)
1989
+ {
1990
+ int i;
1991
+ FrtSpanNearQuery *snq = SpNQ(self);
1992
+ char *res, *res_p;
1993
+ char **q_strs = FRT_ALLOC_N(char *, snq->c_cnt);
1994
+ int len = 50;
1995
+ for (i = 0; i < snq->c_cnt; i++) {
1996
+ FrtQuery *clause = snq->clauses[i];
1997
+ q_strs[i] = clause->to_s(clause, field);
1998
+ len += strlen(q_strs[i]);
1999
+ }
2000
+
2001
+ res_p = res = FRT_ALLOC_N(char, len);
2002
+ res_p += sprintf(res_p, "span_near[");
2003
+ for (i = 0; i < snq->c_cnt; i++) {
2004
+ if (i != 0) *(res_p)++ = ',';
2005
+ res_p += sprintf(res_p, "%s", q_strs[i]);
2006
+ free(q_strs[i]);
2007
+ }
2008
+ free(q_strs);
2009
+
2010
+ *(res_p++) = ']';
2011
+ *res_p = 0;
2012
+ return res;
2013
+ }
2014
+
2015
+ static void spannq_extract_terms(FrtQuery *self, FrtHashSet *terms)
2016
+ {
2017
+ FrtSpanNearQuery *snq = SpNQ(self);
2018
+ int i;
2019
+ for (i = 0; i < snq->c_cnt; i++) {
2020
+ FrtQuery *clause = snq->clauses[i];
2021
+ clause->extract_terms(clause, terms);
2022
+ }
2023
+ }
2024
+
2025
+ static FrtHashSet *spannq_get_terms(FrtQuery *self)
2026
+ {
2027
+ FrtSpanNearQuery *snq = SpNQ(self);
2028
+ FrtHashSet *terms = frt_hs_new_str(&free);
2029
+ int i;
2030
+ for (i = 0; i < snq->c_cnt; i++) {
2031
+ FrtQuery *clause = snq->clauses[i];
2032
+ FrtHashSet *sub_terms = SpQ(clause)->get_terms(clause);
2033
+ frt_hs_merge(terms, sub_terms);
2034
+ }
2035
+
2036
+ return terms;
2037
+ }
2038
+
2039
+ static FrtSpanEnum *spannq_get_spans(FrtQuery *self, FrtIndexReader *ir)
2040
+ {
2041
+ FrtSpanNearQuery *snq = SpNQ(self);
2042
+
2043
+ if (snq->c_cnt == 1) {
2044
+ FrtQuery *q = snq->clauses[0];
2045
+ return SpQ(q)->get_spans(q, ir);
2046
+ }
2047
+
2048
+ return spanne_new(self, ir);
2049
+ }
2050
+
2051
+ static FrtQuery *spannq_rewrite(FrtQuery *self, FrtIndexReader *ir)
2052
+ {
2053
+ FrtSpanNearQuery *snq = SpNQ(self);
2054
+ int i;
2055
+ for (i = 0; i < snq->c_cnt; i++) {
2056
+ FrtQuery *clause = snq->clauses[i];
2057
+ FrtQuery *rewritten = clause->rewrite(clause, ir);
2058
+ frt_q_deref(clause);
2059
+ snq->clauses[i] = rewritten;
2060
+ }
2061
+
2062
+ self->ref_cnt++;
2063
+ return self;
2064
+ }
2065
+
2066
+ static void spannq_destroy(FrtQuery *self)
2067
+ {
2068
+ FrtSpanNearQuery *snq = SpNQ(self);
2069
+
2070
+ int i;
2071
+ for (i = 0; i < snq->c_cnt; i++) {
2072
+ FrtQuery *clause = snq->clauses[i];
2073
+ frt_q_deref(clause);
2074
+ }
2075
+ free(snq->clauses);
2076
+
2077
+ spanq_destroy_i(self);
2078
+ }
2079
+
2080
+ static unsigned long long spannq_hash(FrtQuery *self)
2081
+ {
2082
+ int i;
2083
+ unsigned long long hash = spanq_hash(self);
2084
+ FrtSpanNearQuery *snq = SpNQ(self);
2085
+
2086
+ for (i = 0; i < snq->c_cnt; i++) {
2087
+ FrtQuery *q = snq->clauses[i];
2088
+ hash ^= q->hash(q);
2089
+ }
2090
+ return ((hash ^ snq->slop) << 1) | snq->in_order;
2091
+ }
2092
+
2093
+ static int spannq_eq(FrtQuery *self, FrtQuery *o)
2094
+ {
2095
+ int i;
2096
+ FrtQuery *q1, *q2;
2097
+ FrtSpanNearQuery *snq1 = SpNQ(self);
2098
+ FrtSpanNearQuery *snq2 = SpNQ(o);
2099
+ if (! spanq_eq(self, o)
2100
+ || (snq1->c_cnt != snq2->c_cnt)
2101
+ || (snq1->slop != snq2->slop)
2102
+ || (snq1->in_order != snq2->in_order)) {
2103
+ return false;
2104
+ }
2105
+
2106
+ for (i = 0; i < snq1->c_cnt; i++) {
2107
+ q1 = snq1->clauses[i];
2108
+ q2 = snq2->clauses[i];
2109
+ if (!q1->eq(q1, q2)) {
2110
+ return false;
2111
+ }
2112
+ }
2113
+
2114
+ return true;
2115
+ }
2116
+
2117
+ FrtQuery *frt_spannq_new(int slop, bool in_order)
2118
+ {
2119
+ FrtQuery *self = frt_q_new(FrtSpanNearQuery);
2120
+
2121
+ SpNQ(self)->clauses = FRT_ALLOC_N(FrtQuery *, CLAUSE_INIT_CAPA);
2122
+ SpNQ(self)->c_capa = CLAUSE_INIT_CAPA;
2123
+ SpNQ(self)->slop = slop;
2124
+ SpNQ(self)->in_order = in_order;
2125
+
2126
+ SpQ(self)->get_spans = &spannq_get_spans;
2127
+ SpQ(self)->get_terms = &spannq_get_terms;
2128
+ SpQ(self)->field = (FrtSymbol)NULL;
2129
+
2130
+ self->type = SPAN_NEAR_QUERY;
2131
+ self->rewrite = &spannq_rewrite;
2132
+ self->extract_terms = &spannq_extract_terms;
2133
+ self->to_s = &spannq_to_s;
2134
+ self->hash = &spannq_hash;
2135
+ self->eq = &spannq_eq;
2136
+ self->destroy_i = &spannq_destroy;
2137
+ self->create_weight_i = &spanw_new;
2138
+ self->get_matchv_i = &spanq_get_matchv_i;
2139
+
2140
+ return self;
2141
+ }
2142
+
2143
+ FrtQuery *frt_spannq_add_clause_nr(FrtQuery *self, FrtQuery *clause)
2144
+ {
2145
+ const int curr_index = SpNQ(self)->c_cnt++;
2146
+ if (clause->type < SPAN_TERM_QUERY || clause->type > SPAN_NEAR_QUERY) {
2147
+ FRT_RAISE(FRT_ARG_ERROR, "Tried to add a %s to a SpanNearQuery. This is not a "
2148
+ "SpanQuery.", frt_q_get_query_name(clause->type));
2149
+ }
2150
+ if (curr_index == 0) {
2151
+ SpQ(self)->field = SpQ(clause)->field;
2152
+ }
2153
+ else if (SpQ(self)->field != SpQ(clause)->field) {
2154
+ FRT_RAISE(FRT_ARG_ERROR, "All clauses in a SpanQuery must have the same field. "
2155
+ "Attempted to add a SpanQuery with field \"%s\" to SpanNearQuery "
2156
+ "with field \"%s\"", rb_id2name(SpQ(clause)->field), rb_id2name(SpQ(self)->field));
2157
+ }
2158
+ if (curr_index >= SpNQ(self)->c_capa) {
2159
+ SpNQ(self)->c_capa <<= 1;
2160
+ FRT_REALLOC_N(SpNQ(self)->clauses, FrtQuery *, SpNQ(self)->c_capa);
2161
+ }
2162
+ SpNQ(self)->clauses[curr_index] = clause;
2163
+ return clause;
2164
+ }
2165
+
2166
+ FrtQuery *frt_spannq_add_clause(FrtQuery *self, FrtQuery *clause)
2167
+ {
2168
+ FRT_REF(clause);
2169
+ return frt_spannq_add_clause_nr(self, clause);
2170
+ }
2171
+
2172
+ /*****************************************************************************
2173
+ *
2174
+ * FrtSpanNotQuery
2175
+ *
2176
+ *****************************************************************************/
2177
+
2178
+ static char *spanxq_to_s(FrtQuery *self, FrtSymbol field)
2179
+ {
2180
+ FrtSpanNotQuery *sxq = SpXQ(self);
2181
+ char *inc_s = sxq->inc->to_s(sxq->inc, field);
2182
+ char *exc_s = sxq->exc->to_s(sxq->exc, field);
2183
+ char *res = frt_strfmt("span_not(inc:<%s>, exc:<%s>)", inc_s, exc_s);
2184
+
2185
+ free(inc_s);
2186
+ free(exc_s);
2187
+ return res;
2188
+ }
2189
+
2190
+ static void spanxq_extract_terms(FrtQuery *self, FrtHashSet *terms)
2191
+ {
2192
+ SpXQ(self)->inc->extract_terms(SpXQ(self)->inc, terms);
2193
+ }
2194
+
2195
+ static FrtHashSet *spanxq_get_terms(FrtQuery *self)
2196
+ {
2197
+ return SpQ(SpXQ(self)->inc)->get_terms(SpXQ(self)->inc);
2198
+ }
2199
+
2200
+ static FrtQuery *spanxq_rewrite(FrtQuery *self, FrtIndexReader *ir)
2201
+ {
2202
+ FrtSpanNotQuery *sxq = SpXQ(self);
2203
+ FrtQuery *q, *rq;
2204
+
2205
+ /* rewrite inclusive query */
2206
+ q = sxq->inc;
2207
+ rq = q->rewrite(q, ir);
2208
+ frt_q_deref(q);
2209
+ sxq->inc = rq;
2210
+
2211
+ /* rewrite exclusive query */
2212
+ q = sxq->exc;
2213
+ rq = q->rewrite(q, ir);
2214
+ frt_q_deref(q);
2215
+ sxq->exc = rq;
2216
+
2217
+ self->ref_cnt++;
2218
+ return self;
2219
+ }
2220
+
2221
+ static void spanxq_destroy(FrtQuery *self)
2222
+ {
2223
+ FrtSpanNotQuery *sxq = SpXQ(self);
2224
+
2225
+ frt_q_deref(sxq->inc);
2226
+ frt_q_deref(sxq->exc);
2227
+
2228
+ spanq_destroy_i(self);
2229
+ }
2230
+
2231
+ static unsigned long long spanxq_hash(FrtQuery *self)
2232
+ {
2233
+ FrtSpanNotQuery *sxq = SpXQ(self);
2234
+ return spanq_hash(self) ^ sxq->inc->hash(sxq->inc)
2235
+ ^ sxq->exc->hash(sxq->exc);
2236
+ }
2237
+
2238
+ static int spanxq_eq(FrtQuery *self, FrtQuery *o)
2239
+ {
2240
+ FrtSpanNotQuery *sxq1 = SpXQ(self);
2241
+ FrtSpanNotQuery *sxq2 = SpXQ(o);
2242
+ return spanq_eq(self, o) && sxq1->inc->eq(sxq1->inc, sxq2->inc)
2243
+ && sxq1->exc->eq(sxq1->exc, sxq2->exc);
2244
+ }
2245
+
2246
+
2247
+ FrtQuery *frt_spanxq_new_nr(FrtQuery *inc, FrtQuery *exc)
2248
+ {
2249
+ FrtQuery *self;
2250
+ if (SpQ(inc)->field != SpQ(exc)->field) {
2251
+ FRT_RAISE(FRT_ARG_ERROR, "All clauses in a SpanQuery must have the same field. "
2252
+ "Attempted to add a SpanQuery with field \"%s\" along with a "
2253
+ "SpanQuery with field \"%s\" to an SpanNotQuery",
2254
+ rb_id2name(SpQ(inc)->field), rb_id2name(SpQ(exc)->field));
2255
+ }
2256
+ self = frt_q_new(FrtSpanNotQuery);
2257
+
2258
+ SpXQ(self)->inc = inc;
2259
+ SpXQ(self)->exc = exc;
2260
+
2261
+ SpQ(self)->field = SpQ(inc)->field;
2262
+ SpQ(self)->get_spans = &spanxe_new;
2263
+ SpQ(self)->get_terms = &spanxq_get_terms;
2264
+
2265
+ self->type = SPAN_NOT_QUERY;
2266
+ self->rewrite = &spanxq_rewrite;
2267
+ self->extract_terms = &spanxq_extract_terms;
2268
+ self->to_s = &spanxq_to_s;
2269
+ self->hash = &spanxq_hash;
2270
+ self->eq = &spanxq_eq;
2271
+ self->destroy_i = &spanxq_destroy;
2272
+ self->create_weight_i = &spanw_new;
2273
+ self->get_matchv_i = &spanq_get_matchv_i;
2274
+
2275
+ return self;
2276
+ }
2277
+
2278
+ FrtQuery *frt_spanxq_new(FrtQuery *inc, FrtQuery *exc)
2279
+ {
2280
+ FRT_REF(inc);
2281
+ FRT_REF(exc);
2282
+ return frt_spanxq_new_nr(inc, exc);
2283
+ }
2284
+
2285
+
2286
+ /*****************************************************************************
2287
+ *
2288
+ * Rewritables
2289
+ *
2290
+ *****************************************************************************/
2291
+
2292
+ /*****************************************************************************
2293
+ *
2294
+ * FrtSpanPrefixQuery
2295
+ *
2296
+ *****************************************************************************/
2297
+
2298
+ #define SpPfxQ(query) ((FrtSpanPrefixQuery *)(query))
2299
+
2300
+ static char *spanprq_to_s(FrtQuery *self, FrtSymbol default_field)
2301
+ {
2302
+ char *buffer, *bptr;
2303
+ const char *prefix = SpPfxQ(self)->prefix;
2304
+ size_t plen = strlen(prefix);
2305
+ FrtSymbol field = SpQ(self)->field;
2306
+ const char *field_name = rb_id2name(field);
2307
+ size_t flen = strlen(field_name);
2308
+
2309
+
2310
+ bptr = buffer = FRT_ALLOC_N(char, plen + flen + 35);
2311
+
2312
+ if (default_field == (FrtSymbol)NULL || (field != default_field)) {
2313
+ bptr += sprintf(bptr, "%s:", field_name);
2314
+ }
2315
+
2316
+ bptr += sprintf(bptr, "%s*", prefix);
2317
+ if (self->boost != 1.0) {
2318
+ *bptr = '^';
2319
+ frt_dbl_to_s(++bptr, self->boost);
2320
+ }
2321
+
2322
+ return buffer;
2323
+ }
2324
+
2325
+ static FrtQuery *spanprq_rewrite(FrtQuery *self, FrtIndexReader *ir)
2326
+ {
2327
+ const int field_num = frt_fis_get_field_num(ir->fis, SpQ(self)->field);
2328
+ FrtQuery *volatile q = frt_spanmtq_new_conf(SpQ(self)->field, SpPfxQ(self)->max_terms);
2329
+ q->boost = self->boost; /* set the boost */
2330
+
2331
+ if (field_num >= 0) {
2332
+ const char *prefix = SpPfxQ(self)->prefix;
2333
+ FrtTermEnum *te = ir->terms_from(ir, field_num, prefix);
2334
+ const char *term = te->curr_term;
2335
+ size_t prefix_len = strlen(prefix);
2336
+
2337
+ FRT_TRY
2338
+ do {
2339
+ if (strncmp(term, prefix, prefix_len) != 0) {
2340
+ break;
2341
+ }
2342
+ frt_spanmtq_add_term(q, term); /* found a match */
2343
+ } while (te->next(te));
2344
+ FRT_XFINALLY
2345
+ te->close(te);
2346
+ FRT_XENDTRY
2347
+ }
2348
+
2349
+ return q;
2350
+ }
2351
+
2352
+ static void spanprq_destroy(FrtQuery *self)
2353
+ {
2354
+ free(SpPfxQ(self)->prefix);
2355
+ spanq_destroy_i(self);
2356
+ }
2357
+
2358
+ static unsigned long long spanprq_hash(FrtQuery *self)
2359
+ {
2360
+ return frt_str_hash(rb_id2name(SpQ(self)->field)) ^ frt_str_hash(SpPfxQ(self)->prefix);
2361
+ }
2362
+
2363
+ static int spanprq_eq(FrtQuery *self, FrtQuery *o)
2364
+ {
2365
+ return (strcmp(SpPfxQ(self)->prefix, SpPfxQ(o)->prefix) == 0)
2366
+ && (SpQ(self)->field == SpQ(o)->field);
2367
+ }
2368
+
2369
+ FrtQuery *frt_spanprq_new(FrtSymbol field, const char *prefix)
2370
+ {
2371
+ FrtQuery *self = frt_q_new(FrtSpanPrefixQuery);
2372
+
2373
+ SpQ(self)->field = field;
2374
+ SpPfxQ(self)->prefix = frt_estrdup(prefix);
2375
+ SpPfxQ(self)->max_terms = FRT_SPAN_PREFIX_QUERY_MAX_TERMS;
2376
+
2377
+ self->type = SPAN_PREFIX_QUERY;
2378
+ self->rewrite = &spanprq_rewrite;
2379
+ self->to_s = &spanprq_to_s;
2380
+ self->hash = &spanprq_hash;
2381
+ self->eq = &spanprq_eq;
2382
+ self->destroy_i = &spanprq_destroy;
2383
+ self->create_weight_i = &frt_q_create_weight_unsup;
2384
+
2385
+ return self;
2386
+ }