ferret 0.9.6 → 0.10.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (295) hide show
  1. data/MIT-LICENSE +1 -1
  2. data/README +12 -24
  3. data/Rakefile +38 -54
  4. data/TODO +14 -17
  5. data/ext/analysis.c +982 -823
  6. data/ext/analysis.h +133 -76
  7. data/ext/array.c +96 -58
  8. data/ext/array.h +40 -13
  9. data/ext/bitvector.c +476 -118
  10. data/ext/bitvector.h +264 -22
  11. data/ext/compound_io.c +217 -229
  12. data/ext/defines.h +49 -0
  13. data/ext/document.c +107 -317
  14. data/ext/document.h +31 -65
  15. data/ext/except.c +81 -36
  16. data/ext/except.h +117 -55
  17. data/ext/extconf.rb +2 -9
  18. data/ext/ferret.c +211 -104
  19. data/ext/ferret.h +22 -11
  20. data/ext/filter.c +97 -82
  21. data/ext/fs_store.c +348 -367
  22. data/ext/global.c +226 -188
  23. data/ext/global.h +44 -26
  24. data/ext/hash.c +474 -391
  25. data/ext/hash.h +441 -68
  26. data/ext/hashset.c +124 -96
  27. data/ext/hashset.h +169 -20
  28. data/ext/helper.c +56 -5
  29. data/ext/helper.h +7 -0
  30. data/ext/inc/lang.h +29 -49
  31. data/ext/inc/threading.h +31 -0
  32. data/ext/ind.c +288 -278
  33. data/ext/ind.h +68 -0
  34. data/ext/index.c +5688 -0
  35. data/ext/index.h +663 -616
  36. data/ext/lang.h +29 -49
  37. data/ext/libstemmer.c +3 -3
  38. data/ext/mem_pool.c +84 -0
  39. data/ext/mem_pool.h +35 -0
  40. data/ext/posh.c +1006 -0
  41. data/ext/posh.h +1007 -0
  42. data/ext/priorityqueue.c +117 -194
  43. data/ext/priorityqueue.h +135 -39
  44. data/ext/q_boolean.c +1305 -1108
  45. data/ext/q_const_score.c +106 -93
  46. data/ext/q_filtered_query.c +138 -135
  47. data/ext/q_fuzzy.c +206 -242
  48. data/ext/q_match_all.c +94 -80
  49. data/ext/q_multi_term.c +663 -0
  50. data/ext/q_parser.c +667 -593
  51. data/ext/q_phrase.c +992 -555
  52. data/ext/q_prefix.c +72 -61
  53. data/ext/q_range.c +235 -210
  54. data/ext/q_span.c +1480 -1166
  55. data/ext/q_term.c +273 -246
  56. data/ext/q_wildcard.c +127 -114
  57. data/ext/r_analysis.c +1720 -711
  58. data/ext/r_index.c +3049 -0
  59. data/ext/r_qparser.c +433 -146
  60. data/ext/r_search.c +2934 -1993
  61. data/ext/r_store.c +372 -143
  62. data/ext/r_utils.c +941 -0
  63. data/ext/ram_store.c +330 -326
  64. data/ext/search.c +1291 -668
  65. data/ext/search.h +403 -702
  66. data/ext/similarity.c +91 -113
  67. data/ext/similarity.h +45 -30
  68. data/ext/sort.c +721 -484
  69. data/ext/stopwords.c +361 -273
  70. data/ext/store.c +556 -58
  71. data/ext/store.h +706 -126
  72. data/ext/tags +3578 -2780
  73. data/ext/term_vectors.c +352 -0
  74. data/ext/threading.h +31 -0
  75. data/ext/win32.h +54 -0
  76. data/lib/ferret.rb +5 -17
  77. data/lib/ferret/document.rb +130 -2
  78. data/lib/ferret/index.rb +577 -26
  79. data/lib/ferret/number_tools.rb +157 -0
  80. data/lib/ferret_version.rb +3 -0
  81. data/test/test_helper.rb +5 -13
  82. data/test/unit/analysis/tc_analyzer.rb +513 -1
  83. data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
  84. data/test/unit/index/tc_index.rb +183 -240
  85. data/test/unit/index/tc_index_reader.rb +312 -479
  86. data/test/unit/index/tc_index_writer.rb +397 -13
  87. data/test/unit/index/th_doc.rb +269 -206
  88. data/test/unit/query_parser/tc_query_parser.rb +40 -33
  89. data/test/unit/search/tc_filter.rb +59 -71
  90. data/test/unit/search/tc_fuzzy_query.rb +24 -16
  91. data/test/unit/search/tc_index_searcher.rb +23 -201
  92. data/test/unit/search/tc_multi_searcher.rb +78 -226
  93. data/test/unit/search/tc_search_and_sort.rb +93 -81
  94. data/test/unit/search/tc_sort.rb +23 -23
  95. data/test/unit/search/tc_sort_field.rb +7 -7
  96. data/test/unit/search/tc_spans.rb +51 -47
  97. data/test/unit/search/tm_searcher.rb +339 -0
  98. data/test/unit/store/tc_fs_store.rb +1 -1
  99. data/test/unit/store/tm_store_lock.rb +3 -3
  100. data/test/unit/tc_document.rb +81 -0
  101. data/test/unit/ts_analysis.rb +1 -1
  102. data/test/unit/ts_utils.rb +1 -1
  103. data/test/unit/utils/tc_bit_vector.rb +288 -0
  104. data/test/unit/utils/tc_number_tools.rb +117 -0
  105. data/test/unit/utils/tc_priority_queue.rb +106 -0
  106. metadata +140 -301
  107. data/CHANGELOG +0 -9
  108. data/ext/dummy.exe +0 -0
  109. data/ext/field.c +0 -408
  110. data/ext/frtio.h +0 -13
  111. data/ext/inc/except.h +0 -90
  112. data/ext/index_io.c +0 -382
  113. data/ext/index_rw.c +0 -2658
  114. data/ext/lang.c +0 -41
  115. data/ext/nix_io.c +0 -134
  116. data/ext/q_multi_phrase.c +0 -380
  117. data/ext/r_doc.c +0 -582
  118. data/ext/r_index_io.c +0 -1021
  119. data/ext/r_term.c +0 -219
  120. data/ext/term.c +0 -820
  121. data/ext/termdocs.c +0 -611
  122. data/ext/vector.c +0 -637
  123. data/ext/w32_io.c +0 -150
  124. data/lib/ferret/analysis.rb +0 -11
  125. data/lib/ferret/analysis/analyzers.rb +0 -112
  126. data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
  127. data/lib/ferret/analysis/token.rb +0 -100
  128. data/lib/ferret/analysis/token_filters.rb +0 -86
  129. data/lib/ferret/analysis/token_stream.rb +0 -26
  130. data/lib/ferret/analysis/tokenizers.rb +0 -112
  131. data/lib/ferret/analysis/word_list_loader.rb +0 -27
  132. data/lib/ferret/document/document.rb +0 -152
  133. data/lib/ferret/document/field.rb +0 -312
  134. data/lib/ferret/index/compound_file_io.rb +0 -338
  135. data/lib/ferret/index/document_writer.rb +0 -289
  136. data/lib/ferret/index/field_infos.rb +0 -279
  137. data/lib/ferret/index/fields_io.rb +0 -181
  138. data/lib/ferret/index/index.rb +0 -675
  139. data/lib/ferret/index/index_file_names.rb +0 -33
  140. data/lib/ferret/index/index_reader.rb +0 -503
  141. data/lib/ferret/index/index_writer.rb +0 -534
  142. data/lib/ferret/index/multi_reader.rb +0 -377
  143. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
  144. data/lib/ferret/index/segment_infos.rb +0 -130
  145. data/lib/ferret/index/segment_merge_info.rb +0 -49
  146. data/lib/ferret/index/segment_merge_queue.rb +0 -16
  147. data/lib/ferret/index/segment_merger.rb +0 -358
  148. data/lib/ferret/index/segment_reader.rb +0 -412
  149. data/lib/ferret/index/segment_term_enum.rb +0 -169
  150. data/lib/ferret/index/segment_term_vector.rb +0 -58
  151. data/lib/ferret/index/term.rb +0 -53
  152. data/lib/ferret/index/term_buffer.rb +0 -83
  153. data/lib/ferret/index/term_doc_enum.rb +0 -291
  154. data/lib/ferret/index/term_enum.rb +0 -52
  155. data/lib/ferret/index/term_info.rb +0 -37
  156. data/lib/ferret/index/term_infos_io.rb +0 -321
  157. data/lib/ferret/index/term_vector_offset_info.rb +0 -20
  158. data/lib/ferret/index/term_vectors_io.rb +0 -553
  159. data/lib/ferret/query_parser.rb +0 -312
  160. data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
  161. data/lib/ferret/search.rb +0 -50
  162. data/lib/ferret/search/boolean_clause.rb +0 -100
  163. data/lib/ferret/search/boolean_query.rb +0 -299
  164. data/lib/ferret/search/boolean_scorer.rb +0 -294
  165. data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
  166. data/lib/ferret/search/conjunction_scorer.rb +0 -99
  167. data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
  168. data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
  169. data/lib/ferret/search/explanation.rb +0 -41
  170. data/lib/ferret/search/field_cache.rb +0 -215
  171. data/lib/ferret/search/field_doc.rb +0 -31
  172. data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
  173. data/lib/ferret/search/filter.rb +0 -11
  174. data/lib/ferret/search/filtered_query.rb +0 -130
  175. data/lib/ferret/search/filtered_term_enum.rb +0 -79
  176. data/lib/ferret/search/fuzzy_query.rb +0 -154
  177. data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
  178. data/lib/ferret/search/hit_collector.rb +0 -34
  179. data/lib/ferret/search/hit_queue.rb +0 -11
  180. data/lib/ferret/search/index_searcher.rb +0 -200
  181. data/lib/ferret/search/match_all_query.rb +0 -104
  182. data/lib/ferret/search/multi_phrase_query.rb +0 -216
  183. data/lib/ferret/search/multi_searcher.rb +0 -261
  184. data/lib/ferret/search/multi_term_query.rb +0 -65
  185. data/lib/ferret/search/non_matching_scorer.rb +0 -22
  186. data/lib/ferret/search/phrase_positions.rb +0 -55
  187. data/lib/ferret/search/phrase_query.rb +0 -214
  188. data/lib/ferret/search/phrase_scorer.rb +0 -152
  189. data/lib/ferret/search/prefix_query.rb +0 -54
  190. data/lib/ferret/search/query.rb +0 -140
  191. data/lib/ferret/search/query_filter.rb +0 -51
  192. data/lib/ferret/search/range_filter.rb +0 -103
  193. data/lib/ferret/search/range_query.rb +0 -139
  194. data/lib/ferret/search/req_excl_scorer.rb +0 -125
  195. data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
  196. data/lib/ferret/search/score_doc.rb +0 -38
  197. data/lib/ferret/search/score_doc_comparator.rb +0 -114
  198. data/lib/ferret/search/scorer.rb +0 -91
  199. data/lib/ferret/search/similarity.rb +0 -278
  200. data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
  201. data/lib/ferret/search/sort.rb +0 -112
  202. data/lib/ferret/search/sort_comparator.rb +0 -60
  203. data/lib/ferret/search/sort_field.rb +0 -91
  204. data/lib/ferret/search/spans.rb +0 -12
  205. data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
  206. data/lib/ferret/search/spans/span_first_query.rb +0 -79
  207. data/lib/ferret/search/spans/span_near_query.rb +0 -108
  208. data/lib/ferret/search/spans/span_not_query.rb +0 -130
  209. data/lib/ferret/search/spans/span_or_query.rb +0 -176
  210. data/lib/ferret/search/spans/span_query.rb +0 -25
  211. data/lib/ferret/search/spans/span_scorer.rb +0 -74
  212. data/lib/ferret/search/spans/span_term_query.rb +0 -105
  213. data/lib/ferret/search/spans/span_weight.rb +0 -84
  214. data/lib/ferret/search/spans/spans_enum.rb +0 -44
  215. data/lib/ferret/search/term_query.rb +0 -128
  216. data/lib/ferret/search/term_scorer.rb +0 -183
  217. data/lib/ferret/search/top_docs.rb +0 -36
  218. data/lib/ferret/search/top_field_docs.rb +0 -17
  219. data/lib/ferret/search/weight.rb +0 -54
  220. data/lib/ferret/search/wildcard_query.rb +0 -26
  221. data/lib/ferret/search/wildcard_term_enum.rb +0 -61
  222. data/lib/ferret/stemmers.rb +0 -1
  223. data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
  224. data/lib/ferret/store.rb +0 -5
  225. data/lib/ferret/store/buffered_index_io.rb +0 -190
  226. data/lib/ferret/store/directory.rb +0 -141
  227. data/lib/ferret/store/fs_store.rb +0 -381
  228. data/lib/ferret/store/index_io.rb +0 -245
  229. data/lib/ferret/store/ram_store.rb +0 -286
  230. data/lib/ferret/utils.rb +0 -8
  231. data/lib/ferret/utils/bit_vector.rb +0 -123
  232. data/lib/ferret/utils/date_tools.rb +0 -138
  233. data/lib/ferret/utils/number_tools.rb +0 -91
  234. data/lib/ferret/utils/parameter.rb +0 -41
  235. data/lib/ferret/utils/priority_queue.rb +0 -120
  236. data/lib/ferret/utils/string_helper.rb +0 -47
  237. data/lib/ferret/utils/thread_local.rb +0 -28
  238. data/lib/ferret/utils/weak_key_hash.rb +0 -60
  239. data/lib/rferret.rb +0 -37
  240. data/rake_utils/code_statistics.rb +0 -106
  241. data/test/benchmark/tb_ram_store.rb +0 -76
  242. data/test/benchmark/tb_rw_vint.rb +0 -26
  243. data/test/functional/thread_safety_index_test.rb +0 -81
  244. data/test/functional/thread_safety_test.rb +0 -137
  245. data/test/longrunning/tc_numbertools.rb +0 -60
  246. data/test/longrunning/tm_store.rb +0 -19
  247. data/test/unit/analysis/ctc_analyzer.rb +0 -532
  248. data/test/unit/analysis/data/wordfile +0 -6
  249. data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
  250. data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
  251. data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
  252. data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
  253. data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
  254. data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
  255. data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
  256. data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
  257. data/test/unit/analysis/rtc_stop_filter.rb +0 -14
  258. data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
  259. data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
  260. data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
  261. data/test/unit/analysis/tc_token.rb +0 -25
  262. data/test/unit/document/rtc_field.rb +0 -28
  263. data/test/unit/document/tc_document.rb +0 -47
  264. data/test/unit/document/tc_field.rb +0 -98
  265. data/test/unit/index/rtc_compound_file_io.rb +0 -107
  266. data/test/unit/index/rtc_field_infos.rb +0 -127
  267. data/test/unit/index/rtc_fields_io.rb +0 -167
  268. data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
  269. data/test/unit/index/rtc_segment_infos.rb +0 -74
  270. data/test/unit/index/rtc_segment_term_docs.rb +0 -17
  271. data/test/unit/index/rtc_segment_term_enum.rb +0 -60
  272. data/test/unit/index/rtc_segment_term_vector.rb +0 -71
  273. data/test/unit/index/rtc_term_buffer.rb +0 -57
  274. data/test/unit/index/rtc_term_info.rb +0 -19
  275. data/test/unit/index/rtc_term_infos_io.rb +0 -192
  276. data/test/unit/index/rtc_term_vectors_io.rb +0 -108
  277. data/test/unit/index/tc_term.rb +0 -27
  278. data/test/unit/index/tc_term_voi.rb +0 -18
  279. data/test/unit/search/rtc_similarity.rb +0 -37
  280. data/test/unit/search/rtc_sort_field.rb +0 -14
  281. data/test/unit/search/tc_multi_searcher2.rb +0 -126
  282. data/test/unit/store/rtc_fs_store.rb +0 -62
  283. data/test/unit/store/rtc_ram_store.rb +0 -15
  284. data/test/unit/store/rtm_store.rb +0 -150
  285. data/test/unit/store/rtm_store_lock.rb +0 -2
  286. data/test/unit/ts_document.rb +0 -2
  287. data/test/unit/utils/rtc_bit_vector.rb +0 -73
  288. data/test/unit/utils/rtc_date_tools.rb +0 -50
  289. data/test/unit/utils/rtc_number_tools.rb +0 -59
  290. data/test/unit/utils/rtc_parameter.rb +0 -40
  291. data/test/unit/utils/rtc_priority_queue.rb +0 -62
  292. data/test/unit/utils/rtc_string_helper.rb +0 -21
  293. data/test/unit/utils/rtc_thread.rb +0 -61
  294. data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
  295. data/test/utils/number_to_spoken.rb +0 -132
data/ext/q_span.c CHANGED
@@ -1,807 +1,1038 @@
1
1
  #include <string.h>
2
+ #include <limits.h>
2
3
  #include "search.h"
4
+ #include "hashset.h"
3
5
 
4
- /*****************************************************************************
5
- *
6
- * NearSpanEnum
7
- *
8
- *****************************************************************************/
6
+ #define CLAUSE_INIT_CAPA 4
9
7
 
10
8
  /*****************************************************************************
11
9
  *
12
- * SpanWeight
10
+ * SpanQuery
13
11
  *
14
12
  *****************************************************************************/
15
13
 
16
- Explanation *spanw_explain(Weight *self, IndexReader *ir, int target)
17
- {
18
- Explanation *expl;
19
- Explanation *idf_expl1;
20
- Explanation *idf_expl2;
21
- Explanation *query_expl;
22
- Explanation *qnorm_expl;
23
- Explanation *field_expl;
24
- Explanation *tf_expl;
25
- Scorer *scorer;
26
- uchar *field_norms;
27
- float field_norm;
28
- Explanation *field_norm_expl;
29
-
30
- char *query_str = self->query->to_s(self->query, "");
31
- HashSet *terms = (HashSet *)self->data;
32
- char *field = ((SpanQuery *)self->query->data)->field;
33
- char *doc_freqs = NULL;
34
- size_t df_i = 0;
35
- int i;
36
- Term *t;
37
-
38
-
39
- for (i = 0; i < terms->size; i++) {
40
- t = (Term *)terms->elems[i];
41
- REALLOC_N(doc_freqs, char, df_i + strlen(t->text) + 23);
42
- sprintf(doc_freqs + df_i, "%s=%d, ", t->text, ir->doc_freq(ir, t));
43
- df_i = strlen(doc_freqs);
44
- }
45
- /* remove the ',' at the end of the string if it exists */
46
- if (terms->size > 0) {
47
- df_i -= 2;
48
- doc_freqs[df_i] = '\0';
49
- } else {
50
- doc_freqs = "";
51
- }
14
+ /***************************************************************************
15
+ * SpanQuery
16
+ ***************************************************************************/
52
17
 
53
- expl = expl_create(0.0,
54
- strfmt("weight(%s in %d), product of:", query_str, target));
18
+ #define SpQ(query) ((SpanQuery *)(query))
55
19
 
56
- /* We need two of these as it's included in both the query explanation
57
- * and the field explanation */
58
- idf_expl1 = expl_create(self->idf,
59
- strfmt("idf(%s: %s)", field, doc_freqs));
60
- idf_expl2 = expl_create(self->idf,
61
- strfmt("idf(%s: %s)", field, doc_freqs));
62
- if (terms->size > 0) {
63
- free(doc_freqs); /* only free if allocated */
64
- }
20
+ static ulong spanq_hash(Query *self)
21
+ {
22
+ return str_hash(SpQ(self)->field);
23
+ }
65
24
 
66
- /* explain query weight */
67
- query_expl = expl_create(0.0,
68
- strfmt("query_weight(%s), product of:", query_str));
25
+ static int spanq_eq(Query *self, Query *o)
26
+ {
27
+ return strcmp(SpQ(self)->field, SpQ(o)->field) == 0;
28
+ }
69
29
 
70
- if (self->query->boost != 1.0) {
71
- expl_add_detail(query_expl, expl_create(self->query->boost, estrdup("boost")));
72
- }
30
+ static void spanq_destroy_i(Query *self)
31
+ {
32
+ q_destroy_i(self);
33
+ }
73
34
 
74
- expl_add_detail(query_expl, idf_expl1);
35
+ static MatchVector *mv_to_term_mv(MatchVector *term_mv, MatchVector *full_mv,
36
+ HashSet *terms, TermVector *tv)
37
+ {
38
+ int i;
39
+ for (i = 0; i < terms->size; i++) {
40
+ char *term = (char *)terms->elems[i];
41
+ TVTerm *tv_term = tv_get_tv_term(tv, term);
42
+ if (tv_term) {
43
+ int j;
44
+ int m_idx = 0;
45
+ for (j = 0; j < tv_term->freq; j++) {
46
+ int pos = tv_term->positions[j];
47
+ for (; m_idx < full_mv->size; m_idx++) {
48
+ if (pos <= full_mv->matches[m_idx].end) {
49
+ if (pos >= full_mv->matches[m_idx].start) {
50
+ matchv_add(term_mv, pos, pos);
51
+ }
52
+ break;
53
+ }
54
+ }
55
+ }
56
+ }
57
+ }
75
58
 
76
- qnorm_expl = expl_create(self->qnorm, estrdup("query_norm"));
77
- expl_add_detail(query_expl, qnorm_expl);
59
+ return term_mv;
60
+ }
78
61
 
79
- query_expl->value = self->query->boost * idf_expl1->value * qnorm_expl->value;
62
+ /***************************************************************************
63
+ * TVTermDocEnum
64
+ * dummy TermDocEnum used by the highlighter to find matches
65
+ ***************************************************************************/
80
66
 
81
- expl_add_detail(expl, query_expl);
67
+ #define TV_TDE(tde) ((TVTermDocEnum *)(tde))
82
68
 
83
- /* explain field weight */
84
- field_expl = expl_create(0.0,
85
- strfmt("field_weight(%s:%s in %d), product of:", field, query_str, target));
86
- free(query_str);
69
+ typedef struct TVTermDocEnum
70
+ {
71
+ TermDocEnum super;
72
+ int doc;
73
+ int index;
74
+ int freq;
75
+ int *positions;
76
+ TermVector *tv;
77
+ } TVTermDocEnum;
87
78
 
88
- scorer = self->scorer(self, ir);
89
- tf_expl = scorer->explain(scorer, target);
90
- scorer->destroy(scorer);
91
- expl_add_detail(field_expl, tf_expl);
92
- expl_add_detail(field_expl, idf_expl2);
79
+ static void tv_tde_seek(TermDocEnum *tde, int field_num, const char *term)
80
+ {
81
+ TVTermDocEnum *tv_tde = TV_TDE(tde);
82
+ TVTerm *tv_term = tv_get_tv_term(tv_tde->tv, term);
83
+ (void)field_num;
84
+ if (tv_term) {
85
+ tv_tde->doc = -1;
86
+ tv_tde->index = 0;
87
+ tv_tde->freq = tv_term->freq;
88
+ tv_tde->positions = tv_term->positions;
89
+ }
90
+ else {
91
+ tv_tde->doc = INT_MAX;
92
+ }
93
+ }
94
+
95
+ static bool tv_tde_next(TermDocEnum *tde)
96
+ {
97
+ if (TV_TDE(tde)->doc == -1) {
98
+ TV_TDE(tde)->doc = 0;
99
+ return true;
100
+ }
101
+ else {
102
+ TV_TDE(tde)->doc = INT_MAX;
103
+ return false;
104
+ }
105
+ }
93
106
 
94
- field_norms = ir->get_norms(ir, field);
95
- field_norm = (field_norms
96
- ? sim_decode_norm(self->similarity, field_norms[target])
97
- : (float)0.0);
98
- field_norm_expl = expl_create(field_norm,
99
- strfmt("field_norm(field=%s, doc=%d)", field, target));
100
- expl_add_detail(field_expl, field_norm_expl);
107
+ static bool tv_tde_skip_to(TermDocEnum *tde, int doc_num)
108
+ {
109
+ if (doc_num == 0) {
110
+ TV_TDE(tde)->doc = 0;
111
+ return true;
112
+ }
113
+ else {
114
+ TV_TDE(tde)->doc = INT_MAX;
115
+ return false;
116
+ }
117
+ }
118
+
119
+ static int tv_tde_next_position(TermDocEnum *tde)
120
+ {
121
+ return TV_TDE(tde)->positions[TV_TDE(tde)->index++];
122
+ }
123
+
124
+ static int tv_tde_freq(TermDocEnum *tde)
125
+ {
126
+ return TV_TDE(tde)->freq;
127
+ }
128
+
129
+ static int tv_tde_doc_num(TermDocEnum *tde)
130
+ {
131
+ return TV_TDE(tde)->doc;
132
+ }
101
133
 
102
- field_expl->value = tf_expl->value * idf_expl2->value * field_norm_expl->value;
134
+ static TermDocEnum *spanq_ir_term_positions(IndexReader *ir)
135
+ {
136
+ TVTermDocEnum *tv_tde = ALLOC(TVTermDocEnum);
137
+ TermDocEnum *tde = (TermDocEnum *)tv_tde;
138
+ tv_tde->tv = (TermVector *)ir->store;
139
+ tde->seek = &tv_tde_seek;
140
+ tde->doc_num = &tv_tde_doc_num;
141
+ tde->freq = &tv_tde_freq;
142
+ tde->next = &tv_tde_next;
143
+ tde->skip_to = &tv_tde_skip_to;
144
+ tde->next_position = &tv_tde_next_position;
145
+ tde->close = (void (*)(TermDocEnum *tde))&free;
146
+
147
+ return tde;
148
+ }
103
149
 
104
- /* combine them */
105
- if (query_expl->value == 1.0) {
106
- expl_destoy(expl);
107
- return field_expl;
108
- } else {
109
- expl->value = (query_expl->value * field_expl->value);
110
- expl_add_detail(expl, field_expl);
111
- return expl;
112
- }
150
+ static MatchVector *spanq_get_matchv_i(Query *self, MatchVector *mv,
151
+ TermVector *tv)
152
+ {
153
+ if (strcmp(SpQ(self)->field, tv->field) == 0) {
154
+ SpanEnum *sp_enum;
155
+ IndexReader *ir = ALLOC(IndexReader);
156
+ MatchVector *full_mv = matchv_new();
157
+ HashSet *terms = SpQ(self)->get_terms(self);
158
+ ir->fis = fis_new(0, 0, 0);
159
+ fis_add_field(ir->fis, fi_new(tv->field, 0, 0, 0));
160
+ ir->store = (Store *)tv;
161
+ ir->term_positions = &spanq_ir_term_positions;
162
+ sp_enum = SpQ(self)->get_spans(self, ir);
163
+ while (sp_enum->next(sp_enum)) {
164
+ matchv_add(full_mv,
165
+ sp_enum->start(sp_enum),
166
+ sp_enum->end(sp_enum) - 1);
167
+ }
168
+ sp_enum->destroy(sp_enum);
169
+
170
+ fis_deref(ir->fis);
171
+ free(ir);
172
+
173
+ matchv_compact(full_mv);
174
+ mv_to_term_mv(mv, full_mv, terms, tv);
175
+ matchv_destroy(full_mv);
176
+ hs_destroy(terms);
177
+ }
178
+ return mv;
113
179
  }
114
180
 
115
- char *spanw_to_s(Weight *self)
181
+ /***************************************************************************
182
+ *
183
+ * SpanScorer
184
+ *
185
+ ***************************************************************************/
186
+
187
+ #define SpSc(scorer) ((SpanScorer *)(scorer))
188
+ typedef struct SpanScorer
189
+ {
190
+ Scorer super;
191
+ IndexReader *ir;
192
+ SpanEnum *spans;
193
+ Similarity *sim;
194
+ uchar *norms;
195
+ Weight *weight;
196
+ float value;
197
+ float freq;
198
+ bool first_time : 1;
199
+ bool more : 1;
200
+ } SpanScorer;
201
+
202
+ static float spansc_score(Scorer *self)
116
203
  {
117
- return strfmt("SpanWeight(%f)", self->value);
204
+ SpanScorer *spansc = SpSc(self);
205
+ float raw = sim_tf(spansc->sim, spansc->freq) * spansc->value;
206
+
207
+ /* normalize */
208
+ return raw * sim_decode_norm(self->similarity, spansc->norms[self->doc]);
118
209
  }
119
210
 
120
- void spanw_destroy(Weight *self)
211
+ static bool spansc_next(Scorer *self)
121
212
  {
122
- hs_destroy_all(self->data);
123
- w_destroy(self);
213
+ SpanScorer *spansc = SpSc(self);
214
+ SpanEnum *se = spansc->spans;
215
+ int match_length;
216
+
217
+ if (spansc->first_time) {
218
+ spansc->more = se->next(se);
219
+ spansc->first_time = false;
220
+ }
221
+
222
+ if (!spansc->more) {
223
+ return false;
224
+ }
225
+
226
+ spansc->freq = 0.0;
227
+ self->doc = se->doc(se);
228
+
229
+ while (spansc->more && (self->doc == se->doc(se))) {
230
+ match_length = se->end(se) - se->start(se);
231
+ spansc->freq += sim_sloppy_freq(spansc->sim, match_length);
232
+ spansc->more = se->next(se);
233
+ }
234
+
235
+ return (spansc->more || (spansc->freq != 0.0));
124
236
  }
125
237
 
126
- Weight *spanw_create(Query *query, Searcher *searcher)
238
+ static bool spansc_skip_to(Scorer *self, int target)
127
239
  {
128
- Weight *self = w_create(query);
129
- SpanQuery *spanq = (SpanQuery *)query->data;
130
- HashSet *terms = spanq->get_terms(query);
240
+ SpanScorer *spansc = SpSc(self);
241
+ SpanEnum *se = spansc->spans;
131
242
 
132
- self->data = terms;
133
- self->scorer = &spansc_create;
134
- self->explain = &spanw_explain;
135
- self->to_s = &spanw_to_s;
136
- self->destroy = &spanw_destroy;
137
- self->sum_of_squared_weights = &w_sum_of_squared_weights;
243
+ spansc->more = se->skip_to(se, target);
138
244
 
139
- self->similarity = query->get_similarity(query, searcher);
245
+ if (!spansc->more) {
246
+ return false;
247
+ }
140
248
 
141
- self->idf = sim_idf_phrase(self->similarity, (Term **)terms->elems,
142
- terms->size, searcher);
249
+ spansc->freq = 0.0;
250
+ self->doc = se->doc(se);
143
251
 
144
- return self;
252
+ while (spansc->more && (se->doc(se) == target)) {
253
+ spansc->freq += sim_sloppy_freq(spansc->sim, se->end(se) - se->start(se));
254
+ spansc->more = se->next(se);
255
+ }
256
+
257
+ return (spansc->more || (spansc->freq != 0.0));
258
+ }
259
+
260
+ static Explanation *spansc_explain(Scorer *self, int target)
261
+ {
262
+ Explanation *tf_explanation;
263
+ SpanScorer *spansc = SpSc(self);
264
+ float phrase_freq;
265
+ self->skip_to(self, target);
266
+ phrase_freq = (self->doc == target) ? spansc->freq : (float)0.0;
267
+
268
+ tf_explanation = expl_new(sim_tf(self->similarity, phrase_freq),
269
+ "tf(phrase_freq(%f)", phrase_freq);
270
+
271
+ return tf_explanation;
145
272
  }
146
273
 
274
+ static void spansc_destroy(Scorer *self)
275
+ {
276
+ SpanScorer *spansc = SpSc(self);
277
+ if (spansc->spans) {
278
+ spansc->spans->destroy(spansc->spans);
279
+ }
280
+ scorer_destroy_i(self);
281
+ }
282
+
283
+ Scorer *spansc_new(Weight *weight, IndexReader *ir)
284
+ {
285
+ Scorer *self = NULL;
286
+ const int field_num = fis_get_field_num(ir->fis, SpQ(weight->query)->field);
287
+ if (field_num >= 0) {
288
+ Query *spanq = weight->query;
289
+ self = scorer_new(SpanScorer, weight->similarity);
290
+
291
+ SpSc(self)->first_time = true;
292
+ SpSc(self)->more = true;
293
+ SpSc(self)->spans = SpQ(spanq)->get_spans(spanq, ir);
294
+ SpSc(self)->sim = weight->similarity;
295
+ SpSc(self)->norms = ir->get_norms(ir, field_num);
296
+ SpSc(self)->weight = weight;
297
+ SpSc(self)->value = weight->value;
298
+ SpSc(self)->freq = 0.0;
299
+
300
+ self->score = &spansc_score;
301
+ self->next = &spansc_next;
302
+ self->skip_to = &spansc_skip_to;
303
+ self->explain = &spansc_explain;
304
+ self->destroy = &spansc_destroy;
305
+ }
306
+ return self;
307
+ }
147
308
 
148
309
  /*****************************************************************************
149
- *
150
310
  * SpanTermEnum
151
- *
152
311
  *****************************************************************************/
153
312
 
154
- bool spante_next(SpanEnum *self)
313
+ #define SpTEn(span_enum) ((SpanTermEnum *)(span_enum))
314
+ #define SpTQ(query) ((SpanTermQuery *)(query))
315
+
316
+ typedef struct SpanTermEnum
317
+ {
318
+ SpanEnum super;
319
+ TermDocEnum *positions;
320
+ int position;
321
+ int doc;
322
+ int count;
323
+ int freq;
324
+ } SpanTermEnum;
325
+
326
+
327
+ static bool spante_next(SpanEnum *self)
155
328
  {
156
- SpanTermEnum *ste = (SpanTermEnum *)self->data;
157
- TermDocEnum *tde = ste->positions;
329
+ SpanTermEnum *ste = SpTEn(self);
330
+ TermDocEnum *tde = ste->positions;
158
331
 
159
- if (ste->count == ste->freq) {
160
- if (! tde->next(tde)) {
161
- ste->doc = INT_MAX;
162
- return false;
332
+ if (ste->count == ste->freq) {
333
+ if (! tde->next(tde)) {
334
+ ste->doc = INT_MAX;
335
+ return false;
336
+ }
337
+ ste->doc = tde->doc_num(tde);
338
+ ste->freq = tde->freq(tde);
339
+ ste->count = 0;
163
340
  }
164
- ste->doc = tde->doc_num(tde);
165
- ste->freq = tde->freq(tde);
166
- ste->count = 0;
167
- }
168
- ste->position = tde->next_position(tde);
169
- ste->count++;
170
- return true;
341
+ ste->position = tde->next_position(tde);
342
+ ste->count++;
343
+ return true;
171
344
  }
172
345
 
173
- bool spante_skip_to(SpanEnum *self, int target)
346
+ static bool spante_skip_to(SpanEnum *self, int target)
174
347
  {
175
- SpanTermEnum *ste = (SpanTermEnum *)self->data;
176
- TermDocEnum *tde = ste->positions;
348
+ SpanTermEnum *ste = SpTEn(self);
349
+ TermDocEnum *tde = ste->positions;
177
350
 
178
- /* are we already at the correct position? */
179
- if (ste->doc >= target) return true;
351
+ /* are we already at the correct position? */
352
+ if (ste->doc >= target) {
353
+ return true;
354
+ }
180
355
 
181
- if (! tde->skip_to(tde, target)) {
182
- ste->doc = INT_MAX;
183
- return false;
184
- }
356
+ if (! tde->skip_to(tde, target)) {
357
+ ste->doc = INT_MAX;
358
+ return false;
359
+ }
185
360
 
186
- ste->doc = tde->doc_num(tde);
187
- ste->freq = tde->freq(tde);
188
- ste->count = 0;
361
+ ste->doc = tde->doc_num(tde);
362
+ ste->freq = tde->freq(tde);
363
+ ste->count = 0;
189
364
 
190
- ste->position = tde->next_position(tde);
191
- ste->count++;
192
- return true;
365
+ ste->position = tde->next_position(tde);
366
+ ste->count++;
367
+ return true;
193
368
  }
194
369
 
195
- int spante_doc(SpanEnum *self)
370
+ static int spante_doc(SpanEnum *self)
196
371
  {
197
- SpanTermEnum *ste = (SpanTermEnum *)self->data;
198
- return ste->doc;
372
+ return SpTEn(self)->doc;
199
373
  }
200
374
 
201
- int spante_start(SpanEnum *self)
375
+ static int spante_start(SpanEnum *self)
202
376
  {
203
- SpanTermEnum *ste = (SpanTermEnum *)self->data;
204
- return ste->position;
377
+ return SpTEn(self)->position;
205
378
  }
206
379
 
207
- int spante_end(SpanEnum *self)
380
+ static int spante_end(SpanEnum *self)
208
381
  {
209
- SpanTermEnum *ste = (SpanTermEnum *)self->data;
210
- return ste->position + 1;
382
+ return SpTEn(self)->position + 1;
211
383
  }
212
384
 
213
- char *spante_to_s(SpanEnum *self)
385
+ static char *spante_to_s(SpanEnum *self)
214
386
  {
215
- char *field = ((SpanQuery *)self->query->data)->field;
216
- char *query_str = self->query->to_s(self->query, field);
217
- char pos_str[20];
218
- size_t len = strlen(query_str);
219
- int pos;
220
- char *str = ALLOC_N(char, len + 40);
387
+ char *field = SpQ(self->query)->field;
388
+ char *query_str = self->query->to_s(self->query, field);
389
+ char pos_str[20];
390
+ size_t len = strlen(query_str);
391
+ int pos;
392
+ char *str = ALLOC_N(char, len + 40);
221
393
 
222
- if (self->doc(self) < 0) {
223
- sprintf(pos_str, "START");
224
- } else {
225
- if (self->doc(self) == INT_MAX) {
226
- sprintf(pos_str, "END");
227
- } else {
228
- pos = ((SpanTermEnum *)self->data)->position;
229
- sprintf(pos_str, "%d", self->doc(self) - pos);
394
+ if (self->doc(self) < 0) {
395
+ sprintf(pos_str, "START");
230
396
  }
231
- }
232
- sprintf("SpanTermEnum(%s)@%s", query_str, pos_str);
233
- free(query_str);
234
- return str;
397
+ else {
398
+ if (self->doc(self) == INT_MAX) {
399
+ sprintf(pos_str, "END");
400
+ }
401
+ else {
402
+ pos = SpTEn(self)->position;
403
+ sprintf(pos_str, "%d", self->doc(self) - pos);
404
+ }
405
+ }
406
+ sprintf("SpanTermEnum(%s)@%s", query_str, pos_str);
407
+ free(query_str);
408
+ return str;
235
409
  }
236
410
 
237
- void spante_destroy(SpanEnum *self)
411
+ static void spante_destroy(SpanEnum *self)
238
412
  {
239
- SpanTermEnum *ste = (SpanTermEnum *)self->data;
240
- TermDocEnum *tde = ste->positions;
241
- tde->close(tde);
242
- free(ste);
243
- free(self);
413
+ TermDocEnum *tde = SpTEn(self)->positions;
414
+ tde->close(tde);
415
+ free(self);
244
416
  }
245
417
 
246
- SpanEnum *spante_create(Query *query, IndexReader *ir)
418
+ static SpanEnum *spante_new(Query *query, IndexReader *ir)
247
419
  {
248
- Term *term = (Term *)((SpanQuery *)query->data)->data;
249
- SpanEnum *self = ALLOC(SpanEnum);
420
+ char *term = SpTQ(query)->term;
421
+ char *field = SpQ(query)->field;
422
+ SpanEnum *self = (SpanEnum *)emalloc(sizeof(SpanTermEnum));
250
423
 
251
- SpanTermEnum *ste = ALLOC(SpanTermEnum);
252
- ste->positions = ir_term_positions_for(ir, term);
253
- ste->position = -1;
254
- ste->doc = -1;
255
- ste->count = 0;
256
- ste->freq = 0;
424
+ SpTEn(self)->positions = ir_term_positions_for(ir, field, term);
425
+ SpTEn(self)->position = -1;
426
+ SpTEn(self)->doc = -1;
427
+ SpTEn(self)->count = 0;
428
+ SpTEn(self)->freq = 0;
257
429
 
258
- self->data = ste;
430
+ self->query = query;
431
+ self->next = &spante_next;
432
+ self->skip_to = &spante_skip_to;
433
+ self->doc = &spante_doc;
434
+ self->start = &spante_start;
435
+ self->end = &spante_end;
436
+ self->destroy = &spante_destroy;
437
+ self->to_s = &spante_to_s;
259
438
 
260
- self->query = query;
261
- self->next = &spante_next;
262
- self->skip_to = &spante_skip_to;
263
- self->doc = &spante_doc;
264
- self->start = &spante_start;
265
- self->end = &spante_end;
266
- self->destroy = &spante_destroy;
267
- self->to_s = &spante_to_s;
268
-
269
- return self;
439
+ return self;
270
440
  }
271
441
 
272
442
 
273
443
  /*****************************************************************************
274
- *
275
444
  * SpanFirstEnum
276
- *
277
445
  *****************************************************************************/
278
446
 
279
- bool spanfe_next(SpanEnum *self)
447
+ #define SpFEn(span_enum) ((SpanFirstEnum *)(span_enum))
448
+ #define SpFQ(query) ((SpanFirstQuery *)(query))
449
+
450
+ typedef struct SpanFirstEnum
451
+ {
452
+ SpanEnum super;
453
+ SpanEnum *sub_enum;
454
+ } SpanFirstEnum;
455
+
456
+
457
+ static bool spanfe_next(SpanEnum *self)
280
458
  {
281
- SpanEnum *se = (SpanEnum *)(self->data);
282
- int end = ((SpanFirstQuery *)((SpanQuery *)self->query->data)->data)->end;
283
- while (se->next(se)) { /* scan to next match */
284
- if (se->end(se) <= end) return true;
285
- }
286
- return false;
459
+ SpanEnum *sub_enum = SpFEn(self)->sub_enum;
460
+ int end = SpFQ(self->query)->end;
461
+ while (sub_enum->next(sub_enum)) { /* scan to next match */
462
+ if (sub_enum->end(sub_enum) <= end) {
463
+ return true;
464
+ }
465
+ }
466
+ return false;
287
467
  }
288
468
 
289
- bool spanfe_skip_to(SpanEnum *self, int target)
469
+ static bool spanfe_skip_to(SpanEnum *self, int target)
290
470
  {
291
- SpanEnum *se = (SpanEnum *)(self->data);
292
- int end = ((SpanFirstQuery *)((SpanQuery *)self->query->data)->data)->end;
471
+ SpanEnum *sub_enum = SpFEn(self)->sub_enum;
472
+ int end = SpFQ(self->query)->end;
293
473
 
294
- if (! se->skip_to(se, target)) return false;
474
+ if (! sub_enum->skip_to(sub_enum, target)) {
475
+ return false;
476
+ }
295
477
 
296
- if (se->end(se) <= end) /* there is a match */
297
- return true;
478
+ if (sub_enum->end(sub_enum) <= end) { /* there is a match */
479
+ return true;
480
+ }
298
481
 
299
- return se->next(se); /* scan to next match */
482
+ return sub_enum->next(sub_enum); /* scan to next match */
300
483
  }
301
484
 
302
- int spanfe_doc(SpanEnum *self)
485
+ static int spanfe_doc(SpanEnum *self)
303
486
  {
304
- SpanEnum *se = (SpanEnum *)(self->data);
305
- return se->doc(se);
487
+ SpanEnum *sub_enum = SpFEn(self)->sub_enum;
488
+ return sub_enum->doc(sub_enum);
306
489
  }
307
490
 
308
- int spanfe_start(SpanEnum *self)
491
+ static int spanfe_start(SpanEnum *self)
309
492
  {
310
- SpanEnum *se = (SpanEnum *)(self->data);
311
- return se->start(se);
493
+ SpanEnum *sub_enum = SpFEn(self)->sub_enum;
494
+ return sub_enum->start(sub_enum);
312
495
  }
313
496
 
314
- int spanfe_end(SpanEnum *self)
497
+ static int spanfe_end(SpanEnum *self)
315
498
  {
316
- SpanEnum *se = (SpanEnum *)(self->data);
317
- return se->end(se);
499
+ SpanEnum *sub_enum = SpFEn(self)->sub_enum;
500
+ return sub_enum->end(sub_enum);
318
501
  }
319
502
 
320
- char *spanfe_to_s(SpanEnum *self)
503
+ static char *spanfe_to_s(SpanEnum *self)
321
504
  {
322
- char *field = ((SpanQuery *)self->query->data)->field;
323
- char *query_str = self->query->to_s(self->query, field);
324
- char *res = strfmt("SpanFirstEnum(%s)", query_str);
325
- free(query_str);
326
- return res;
505
+ char *field = SpQ(self->query)->field;
506
+ char *query_str = self->query->to_s(self->query, field);
507
+ char *res = strfmt("SpanFirstEnum(%s)", query_str);
508
+ free(query_str);
509
+ return res;
327
510
  }
328
511
 
329
- void spanfe_destroy(SpanEnum *self)
512
+ static void spanfe_destroy(SpanEnum *self)
330
513
  {
331
- SpanEnum *se = (SpanEnum *)self->data;
332
- se->destroy(se);
333
- free(self);
514
+ SpanEnum *sub_enum = SpFEn(self)->sub_enum;
515
+ sub_enum->destroy(sub_enum);
516
+ free(self);
334
517
  }
335
518
 
336
- SpanEnum *spanfe_create(Query *query, IndexReader *ir)
519
+ static SpanEnum *spanfe_new(Query *query, IndexReader *ir)
337
520
  {
338
- SpanEnum *self = ALLOC(SpanEnum);
339
- SpanFirstQuery *sfq = (SpanFirstQuery *)((SpanQuery *)query->data)->data;
521
+ SpanEnum *self = (SpanEnum *)emalloc(sizeof(SpanFirstEnum));
522
+ SpanFirstQuery *sfq = SpFQ(query);
340
523
 
341
- self->data = ((SpanQuery *)sfq->match->data)->get_spans(sfq->match, ir);
524
+ SpFEn(self)->sub_enum = SpQ(sfq->match)->get_spans(sfq->match, ir);
342
525
 
343
- self->query = query;
344
- self->next = &spanfe_next;
345
- self->skip_to = &spanfe_skip_to;
346
- self->doc = &spanfe_doc;
347
- self->start = &spanfe_start;
348
- self->end = &spanfe_end;
349
- self->destroy = &spanfe_destroy;
350
- self->to_s = &spanfe_to_s;
526
+ self->query = query;
527
+ self->next = &spanfe_next;
528
+ self->skip_to = &spanfe_skip_to;
529
+ self->doc = &spanfe_doc;
530
+ self->start = &spanfe_start;
531
+ self->end = &spanfe_end;
532
+ self->destroy = &spanfe_destroy;
533
+ self->to_s = &spanfe_to_s;
351
534
 
352
- return self;
535
+ return self;
353
536
  }
354
537
 
355
538
 
356
539
  /*****************************************************************************
357
- *
358
540
  * SpanOrEnum
359
- *
360
541
  *****************************************************************************/
361
542
 
362
- bool span_less_than(void *p1, void *p2)
543
+ #define SpOEn(span_enum) ((SpanOrEnum *)(span_enum))
544
+ #define SpOQ(query) ((SpanOrQuery *)(query))
545
+
546
+ typedef struct SpanOrEnum
547
+ {
548
+ SpanEnum super;
549
+ PriorityQueue *queue;
550
+ SpanEnum **span_enums;
551
+ int s_cnt;
552
+ bool first_time : 1;
553
+ } SpanOrEnum;
554
+
555
+
556
+ static bool span_less_than(SpanEnum *s1, SpanEnum *s2)
363
557
  {
364
- SpanEnum *s1 = (SpanEnum *)p1;
365
- SpanEnum *s2 = (SpanEnum *)p2;
366
- int doc_diff, start_diff;
367
- doc_diff = s1->doc(s1) - s2->doc(s2);
368
- if (doc_diff == 0) {
369
- start_diff = s1->start(s1) - s2->start(s2);
370
- if (start_diff == 0) {
371
- return s1->end(s1) < s2->end(s2);
372
- } else {
373
- return start_diff < 0;
558
+ int doc_diff, start_diff;
559
+ doc_diff = s1->doc(s1) - s2->doc(s2);
560
+ if (doc_diff == 0) {
561
+ start_diff = s1->start(s1) - s2->start(s2);
562
+ if (start_diff == 0) {
563
+ return s1->end(s1) < s2->end(s2);
564
+ }
565
+ else {
566
+ return start_diff < 0;
567
+ }
568
+ }
569
+ else {
570
+ return doc_diff < 0;
374
571
  }
375
- } else {
376
- return doc_diff < 0;
377
- }
378
572
  }
379
573
 
380
- bool spanoe_next(SpanEnum *self)
574
+ static bool spanoe_next(SpanEnum *self)
381
575
  {
382
- SpanOrEnum *soe = (SpanOrEnum *)self->data;
383
- SpanEnum *se;
384
- int i;
576
+ SpanOrEnum *soe = SpOEn(self);
577
+ SpanEnum *se;
578
+ int i;
385
579
 
386
- if (soe->first_time) { /* first time -- initialize */
387
- for (i = 0; i < soe->s_cnt; i++) {
388
- se = soe->span_enums[i];
389
- if (se->next(se)) /* move to first entry */
390
- pq_push(soe->queue, se);
580
+ if (soe->first_time) { /* first time -- initialize */
581
+ for (i = 0; i < soe->s_cnt; i++) {
582
+ se = soe->span_enums[i];
583
+ if (se->next(se)) { /* move to first entry */
584
+ pq_push(soe->queue, se);
585
+ }
586
+ }
587
+ soe->first_time = false;
588
+ return soe->queue->size != 0;
391
589
  }
392
- soe->first_time = false;
393
- return soe->queue->count != 0;
394
- }
395
590
 
396
- if (soe->queue->count == 0) return false; /* all done */
591
+ if (soe->queue->size == 0) {
592
+ return false; /* all done */
593
+ }
397
594
 
398
- se = (SpanEnum *)pq_top(soe->queue);
399
- if (se->next(se)) { /* move to next */
400
- pq_down(soe->queue);
401
- return true;
402
- }
595
+ se = (SpanEnum *)pq_top(soe->queue);
596
+ if (se->next(se)) { /* move to next */
597
+ pq_down(soe->queue);
598
+ return true;
599
+ }
403
600
 
404
- pq_pop(soe->queue); /* exhausted a clause */
601
+ pq_pop(soe->queue); /* exhausted a clause */
405
602
 
406
- return soe->queue->count != 0;
603
+ return soe->queue->size != 0;
407
604
  }
408
605
 
409
- bool spanoe_skip_to(SpanEnum *self, int target)
606
+ static bool spanoe_skip_to(SpanEnum *self, int target)
410
607
  {
411
- SpanOrEnum *soe = (SpanOrEnum *)self->data;
412
- SpanEnum *se;
413
- int i;
608
+ SpanOrEnum *soe = SpOEn(self);
609
+ SpanEnum *se;
610
+ int i;
414
611
 
415
- if (soe->first_time) { /* first time -- initialize */
416
- for (i = 0; i < soe->s_cnt; i++) {
417
- se = soe->span_enums[i];
418
- if (se->skip_to(se, target)) /* move to target */
419
- pq_push(soe->queue, se);
420
- }
421
- soe->first_time = false;
422
- } else {
423
- while ((soe->queue->count != 0) &&
424
- ((se=(SpanEnum *)pq_top(soe->queue))->doc(se) < target)) {
425
- if (se->skip_to(se, target)) {
426
- pq_down(soe->queue);
427
- } else {
428
- pq_pop(soe->queue);
429
- }
612
+ if (soe->first_time) { /* first time -- initialize */
613
+ for (i = 0; i < soe->s_cnt; i++) {
614
+ se = soe->span_enums[i];
615
+ if (se->skip_to(se, target)) {/* move to target */
616
+ pq_push(soe->queue, se);
617
+ }
618
+ }
619
+ soe->first_time = false;
620
+ }
621
+ else {
622
+ while ((soe->queue->size != 0) &&
623
+ ((se = (SpanEnum *)pq_top(soe->queue))->doc(se) < target)) {
624
+ if (se->skip_to(se, target)) {
625
+ pq_down(soe->queue);
626
+ }
627
+ else {
628
+ pq_pop(soe->queue);
629
+ }
630
+ }
430
631
  }
431
- }
432
632
 
433
- return soe->queue->count != 0;
633
+ return soe->queue->size != 0;
434
634
  }
435
635
 
436
- #define GET_TOP_SOE SpanOrEnum *soe = (SpanOrEnum *)self->data;\
437
- SpanEnum *se = (SpanEnum *)pq_top(soe->queue)
438
- int spanoe_doc(SpanEnum *self)
636
+ #define SpOEn_Top_SE(self) (SpanEnum *)pq_top(SpOEn(self)->queue)
637
+
638
+ static int spanoe_doc(SpanEnum *self)
439
639
  {
440
- GET_TOP_SOE;
441
- return se->doc(se);
640
+ SpanEnum *se = SpOEn_Top_SE(self);
641
+ return se->doc(se);
442
642
  }
443
643
 
444
- int spanoe_start(SpanEnum *self)
644
+ static int spanoe_start(SpanEnum *self)
445
645
  {
446
- GET_TOP_SOE;
447
- return se->start(se);
646
+ SpanEnum *se = SpOEn_Top_SE(self);
647
+ return se->start(se);
448
648
  }
449
649
 
450
- int spanoe_end(SpanEnum *self)
650
+ static int spanoe_end(SpanEnum *self)
451
651
  {
452
- GET_TOP_SOE;
453
- return se->end(se);
652
+ SpanEnum *se = SpOEn_Top_SE(self);
653
+ return se->end(se);
454
654
  }
455
655
 
456
- char *spanoe_to_s(SpanEnum *self)
656
+ static char *spanoe_to_s(SpanEnum *self)
457
657
  {
458
- SpanOrEnum *soe = (SpanOrEnum *)self->data;
459
- char *field = ((SpanQuery *)self->query->data)->field;
460
- char *query_str = self->query->to_s(self->query, field);
461
- char doc_str[62];
462
- size_t len = strlen(query_str);
463
- char *str = ALLOC_N(char, len + 80);
658
+ SpanOrEnum *soe = SpOEn(self);
659
+ char *field = SpQ(self->query)->field;
660
+ char *query_str = self->query->to_s(self->query, field);
661
+ char doc_str[62];
662
+ size_t len = strlen(query_str);
663
+ char *str = ALLOC_N(char, len + 80);
464
664
 
465
- if (soe->first_time) {
466
- sprintf(doc_str, "START");
467
- } else {
468
- if (soe->queue->count == 0) {
469
- sprintf(doc_str, "END");
470
- } else {
471
- sprintf(doc_str, "%d:%d-%d", self->doc(self),
472
- self->start(self), self->end(self));
665
+ if (soe->first_time) {
666
+ sprintf(doc_str, "START");
473
667
  }
474
- }
475
- sprintf("SpanOrEnum(%s)@%s", query_str, doc_str);
476
- free(query_str);
477
- return str;
668
+ else {
669
+ if (soe->queue->size == 0) {
670
+ sprintf(doc_str, "END");
671
+ }
672
+ else {
673
+ sprintf(doc_str, "%d:%d-%d", self->doc(self),
674
+ self->start(self), self->end(self));
675
+ }
676
+ }
677
+ sprintf("SpanOrEnum(%s)@%s", query_str, doc_str);
678
+ free(query_str);
679
+ return str;
478
680
  }
479
681
 
480
- void spanoe_destroy(SpanEnum *self)
682
+ static void spanoe_destroy(SpanEnum *self)
481
683
  {
482
- SpanEnum *se;
483
- SpanOrEnum *soe = (SpanOrEnum *)self->data;
484
- int i;
485
- pq_destroy(soe->queue);
486
- for (i = 0; i < soe->s_cnt; i++) {
487
- se = soe->span_enums[i];
488
- se->destroy(se);
489
- }
490
- free(soe->span_enums);
491
- free(soe);
492
- free(self);
684
+ SpanEnum *se;
685
+ SpanOrEnum *soe = SpOEn(self);
686
+ int i;
687
+ pq_destroy(soe->queue);
688
+ for (i = 0; i < soe->s_cnt; i++) {
689
+ se = soe->span_enums[i];
690
+ se->destroy(se);
691
+ }
692
+ free(soe->span_enums);
693
+ free(self);
493
694
  }
494
695
 
495
- SpanEnum *spanoe_create(Query *query, IndexReader *ir)
696
+ SpanEnum *spanoe_new(Query *query, IndexReader *ir)
496
697
  {
497
- Query *clause;
498
- SpanEnum *self = ALLOC(SpanEnum);
499
- SpanOrQuery *soq = (SpanOrQuery *)((SpanQuery *)query->data)->data;
500
- SpanOrEnum *soe = ALLOC(SpanOrEnum);
501
- int i;
502
- soe->first_time = true;
503
- soe->s_cnt = soq->c_cnt;
504
- soe->span_enums = ALLOC_N(SpanEnum *, soe->s_cnt);
505
- for (i = 0; i < soe->s_cnt; i++) {
506
- clause = soq->clauses[i];
507
- soe->span_enums[i] = ((SpanQuery *)clause->data)->get_spans(clause, ir);
508
- }
509
-
510
- soe->queue = pq_create(soe->s_cnt, &span_less_than);
698
+ Query *clause;
699
+ SpanEnum *self = (SpanEnum *)emalloc(sizeof(SpanOrEnum));
700
+ SpanOrQuery *soq = SpOQ(query);
701
+ int i;
702
+
703
+ SpOEn(self)->first_time = true;
704
+ SpOEn(self)->s_cnt = soq->c_cnt;
705
+ SpOEn(self)->span_enums = ALLOC_N(SpanEnum *, SpOEn(self)->s_cnt);
511
706
 
512
- self->data = soe;
707
+ for (i = 0; i < SpOEn(self)->s_cnt; i++) {
708
+ clause = soq->clauses[i];
709
+ SpOEn(self)->span_enums[i] = SpQ(clause)->get_spans(clause, ir);
710
+ }
711
+
712
+ SpOEn(self)->queue = pq_new(SpOEn(self)->s_cnt, (lt_ft)&span_less_than,
713
+ (free_ft)NULL);
513
714
 
514
- self->query = query;
515
- self->next = &spanoe_next;
516
- self->skip_to = &spanoe_skip_to;
517
- self->doc = &spanoe_doc;
518
- self->start = &spanoe_start;
519
- self->end = &spanoe_end;
520
- self->destroy = &spanoe_destroy;
521
- self->to_s = &spanoe_to_s;
715
+ self->query = query;
716
+ self->next = &spanoe_next;
717
+ self->skip_to = &spanoe_skip_to;
718
+ self->doc = &spanoe_doc;
719
+ self->start = &spanoe_start;
720
+ self->end = &spanoe_end;
721
+ self->destroy = &spanoe_destroy;
722
+ self->to_s = &spanoe_to_s;
522
723
 
523
- return self;
724
+ return self;
524
725
  }
525
726
 
526
727
  /*****************************************************************************
527
- *
528
728
  * SpanNearEnum
529
- *
530
729
  *****************************************************************************/
531
730
 
532
- #define SNE_NEXT() do {\
533
- sne->current = (sne->current+1) % sne->s_cnt;\
534
- se = sne->span_enums[sne->current];\
731
+ #define SpNEn(span_enum) ((SpanNearEnum *)(span_enum))
732
+ #define SpNQ(query) ((SpanNearQuery *)(query))
733
+
734
+ typedef struct SpanNearEnum
735
+ {
736
+ SpanEnum super;
737
+ SpanEnum **span_enums;
738
+ int s_cnt;
739
+ int slop;
740
+ int current;
741
+ int doc;
742
+ int start;
743
+ int end;
744
+ bool first_time : 1;
745
+ bool in_order : 1;
746
+ } SpanNearEnum;
747
+
748
+
749
+ #define SpNEn_NEXT() do {\
750
+ sne->current = (sne->current+1) % sne->s_cnt;\
751
+ se = sne->span_enums[sne->current];\
535
752
  } while (0);
536
753
 
537
- bool sne_init(SpanNearEnum *sne)
754
+ static bool sne_init(SpanNearEnum *sne)
538
755
  {
539
- SpanEnum *se = sne->span_enums[sne->current];
540
- int prev_doc = se->doc(se);
541
- int i;
756
+ SpanEnum *se = sne->span_enums[sne->current];
757
+ int prev_doc = se->doc(se);
758
+ int i;
542
759
 
543
- for (i = 1; i < sne->s_cnt; i++) {
544
- SNE_NEXT();
545
- if (!se->skip_to(se, prev_doc)) return false;
546
- prev_doc = se->doc(se);
547
- }
548
- return true;
760
+ for (i = 1; i < sne->s_cnt; i++) {
761
+ SpNEn_NEXT();
762
+ if (!se->skip_to(se, prev_doc)) {
763
+ return false;
764
+ }
765
+ prev_doc = se->doc(se);
766
+ }
767
+ return true;
549
768
  }
550
769
 
551
- bool sne_goto_next_doc(SpanNearEnum *sne)
770
+ static bool sne_goto_next_doc(SpanNearEnum *sne)
552
771
  {
553
- SpanEnum *se = sne->span_enums[sne->current];
554
- int prev_doc = se->doc(se);
772
+ SpanEnum *se = sne->span_enums[sne->current];
773
+ int prev_doc = se->doc(se);
555
774
 
556
- SNE_NEXT();
775
+ SpNEn_NEXT();
557
776
 
558
- while (se->doc(se) < prev_doc) {
559
- if (! se->skip_to(se, prev_doc)) return false;
560
- prev_doc = se->doc(se);
561
- SNE_NEXT();
562
- }
563
- return true;
777
+ while (se->doc(se) < prev_doc) {
778
+ if (! se->skip_to(se, prev_doc)) {
779
+ return false;
780
+ }
781
+ prev_doc = se->doc(se);
782
+ SpNEn_NEXT();
783
+ }
784
+ return true;
564
785
  }
565
786
 
566
- bool sne_next_unordered_match(SpanEnum *self)
787
+ static bool sne_next_unordered_match(SpanEnum *self)
567
788
  {
568
- SpanNearEnum *sne = (SpanNearEnum *)self->data;
569
- SpanEnum *se, *min_se = NULL;
570
- int i;
571
- int max_end, end, min_start, start, doc;
572
- int lengths_sum;
789
+ SpanNearEnum *sne = SpNEn(self);
790
+ SpanEnum *se, *min_se = NULL;
791
+ int i;
792
+ int max_end, end, min_start, start, doc;
793
+ int lengths_sum;
794
+
795
+ while (true) {
796
+ max_end = 0;
797
+ min_start = INT_MAX;
798
+ lengths_sum = 0;
799
+
800
+ for (i = 0; i < sne->s_cnt; i++) {
801
+ se = sne->span_enums[i];
802
+ if ((end=se->end(se)) > max_end) {
803
+ max_end = end;
804
+ }
805
+ if ((start=se->start(se)) < min_start) {
806
+ min_start = start;
807
+ min_se = se;
808
+ sne->current = i; /* current should point to the minimum span */
809
+ }
810
+ lengths_sum += end - start;
811
+ }
573
812
 
574
- while (true) {
813
+ if ((max_end - min_start - lengths_sum) <= sne->slop) {
814
+ /* we have a match */
815
+ sne->start = min_start;
816
+ sne->end = max_end;
817
+ sne->doc = min_se->doc(min_se);
818
+ return true;
819
+ }
575
820
 
576
- max_end = 0;
577
- min_start = INT_MAX;
578
- lengths_sum = 0;
821
+ /* increment the minimum span_enum and try again */
822
+ doc = min_se->doc(min_se);
823
+ if (!min_se->next(min_se)) {
824
+ return false;
825
+ }
826
+ if (doc < min_se->doc(min_se)) {
827
+ if (!sne_goto_next_doc(sne)) return false;
828
+ }
829
+ }
830
+ }
579
831
 
580
- for (i = 0; i < sne->s_cnt; i++) {
581
- se = sne->span_enums[i];
582
- if ((end=se->end(se)) > max_end) max_end = end;
583
- if ((start=se->start(se)) < min_start) {
584
- min_start = start;
585
- min_se = se;
586
- sne->current = i; /* current should point to the minimum span */
587
- }
588
- lengths_sum += end - start;
589
- }
590
-
591
- if ((max_end - min_start - lengths_sum) <= sne->slop) {
592
- /* we have a match */
593
- sne->start = min_start;
594
- sne->end = max_end;
595
- sne->doc = min_se->doc(min_se);
596
- return true;
597
- }
598
-
599
- /* increment the minimum span_enum and try again */
600
- doc = min_se->doc(min_se);
601
- if (!min_se->next(min_se)) return false;
602
- if (doc < min_se->doc(min_se)) {
603
- if (!sne_goto_next_doc(sne)) return false;
604
- }
605
- }
606
- }
607
-
608
- bool sne_next_ordered_match(SpanEnum *self)
609
- {
610
- SpanNearEnum *sne = (SpanNearEnum *)self->data;
611
- SpanEnum *se;
612
- int i;
613
- int prev_doc, prev_start, prev_end;
614
- int doc=0, start=0, end=0;
615
- int lengths_sum;
616
-
617
- while (true) {
618
- se = sne->span_enums[0];
619
-
620
- prev_doc = se->doc(se);
621
- sne->start = prev_start = se->start(se);
622
- prev_end = se->end(se);
623
-
624
- i = 1;
625
- lengths_sum = prev_end - prev_start;
626
-
627
- while (i < sne->s_cnt) {
628
- se = sne->span_enums[i];
629
- doc = se->doc(se);
630
- start = se->start(se);
631
- end = se->end(se);
632
- while ((doc == prev_doc) && ((start < prev_start) ||
633
- ((start == prev_start) && (end < prev_end)))) {
634
- if (!se->next(se)) return false;
635
- doc = se->doc(se);
636
- start = se->start(se);
637
- end = se->end(se);
638
- }
639
- if (doc != prev_doc) {
640
- sne->current = i;
641
- if (!sne_goto_next_doc(sne)) return false;
642
- break;
643
- }
644
- i++;
645
- lengths_sum += end - start;
646
- prev_doc = doc;
647
- prev_start = start;
648
- prev_end = end;
649
- }
650
- if (i == sne->s_cnt) {
651
- if ((end - sne->start - lengths_sum) <= sne->slop) {
652
- /* we have a match */
653
- sne->end = end;
654
- sne->doc = doc;
655
-
656
- /* the minimum span is always the first span so it needs to be
657
- * incremented next time around */
658
- sne->current = 0;
659
- return true;
832
+ static bool sne_next_ordered_match(SpanEnum *self)
833
+ {
834
+ SpanNearEnum *sne = SpNEn(self);
835
+ SpanEnum *se;
836
+ int i;
837
+ int prev_doc, prev_start, prev_end;
838
+ int doc=0, start=0, end=0;
839
+ int lengths_sum;
660
840
 
661
- } else {
841
+ while (true) {
662
842
  se = sne->span_enums[0];
663
- if (!se->next(se)) return false;
664
- if (se->doc(se) != prev_doc) {
665
- sne->current = 0;
666
- if (!sne_goto_next_doc(sne)) return false;
843
+
844
+ prev_doc = se->doc(se);
845
+ sne->start = prev_start = se->start(se);
846
+ prev_end = se->end(se);
847
+
848
+ i = 1;
849
+ lengths_sum = prev_end - prev_start;
850
+
851
+ while (i < sne->s_cnt) {
852
+ se = sne->span_enums[i];
853
+ doc = se->doc(se);
854
+ start = se->start(se);
855
+ end = se->end(se);
856
+ while ((doc == prev_doc) && ((start < prev_start) ||
857
+ ((start == prev_start) && (end < prev_end)))) {
858
+ if (!se->next(se)) {
859
+ return false;
860
+ }
861
+ doc = se->doc(se);
862
+ start = se->start(se);
863
+ end = se->end(se);
864
+ }
865
+ if (doc != prev_doc) {
866
+ sne->current = i;
867
+ if (!sne_goto_next_doc(sne)) {
868
+ return false;
869
+ }
870
+ break;
871
+ }
872
+ i++;
873
+ lengths_sum += end - start;
874
+ prev_doc = doc;
875
+ prev_start = start;
876
+ prev_end = end;
877
+ }
878
+ if (i == sne->s_cnt) {
879
+ if ((end - sne->start - lengths_sum) <= sne->slop) {
880
+ /* we have a match */
881
+ sne->end = end;
882
+ sne->doc = doc;
883
+
884
+ /* the minimum span is always the first span so it needs to be
885
+ * incremented next time around */
886
+ sne->current = 0;
887
+ return true;
888
+
889
+ }
890
+ else {
891
+ se = sne->span_enums[0];
892
+ if (!se->next(se)) {
893
+ return false;
894
+ }
895
+ if (se->doc(se) != prev_doc) {
896
+ sne->current = 0;
897
+ if (!sne_goto_next_doc(sne)) {
898
+ return false;
899
+ }
900
+ }
901
+ }
667
902
  }
668
- }
669
903
  }
670
- }
671
904
  }
672
-
673
- bool sne_next_match(SpanEnum *self)
905
+
906
+ static bool sne_next_match(SpanEnum *self)
674
907
  {
675
- SpanNearEnum *sne = (SpanNearEnum *)self->data;
676
- SpanEnum *se_curr, *se_next;
908
+ SpanNearEnum *sne = SpNEn(self);
909
+ SpanEnum *se_curr, *se_next;
677
910
 
678
- if (!sne->first_time) {
679
- if (!sne_init(sne)) return false;
680
- sne->first_time = false;
681
- }
682
- se_curr = sne->span_enums[sne->current];
683
- se_next = sne->span_enums[(sne->current+1)%sne->s_cnt];
684
- if (se_curr->doc(se_curr) > se_next->doc(se_next)) {
685
- if (!sne_goto_next_doc(sne)) return false;
686
- }
911
+ if (!sne->first_time) {
912
+ if (!sne_init(sne)) {
913
+ return false;
914
+ }
915
+ sne->first_time = false;
916
+ }
917
+ se_curr = sne->span_enums[sne->current];
918
+ se_next = sne->span_enums[(sne->current+1)%sne->s_cnt];
919
+ if (se_curr->doc(se_curr) > se_next->doc(se_next)) {
920
+ if (!sne_goto_next_doc(sne)) {
921
+ return false;
922
+ }
923
+ }
687
924
 
688
- if (sne->in_order) {
689
- return sne_next_ordered_match(self);
690
- } else {
691
- return sne_next_unordered_match(self);
692
- }
925
+ if (sne->in_order) {
926
+ return sne_next_ordered_match(self);
927
+ }
928
+ else {
929
+ return sne_next_unordered_match(self);
930
+ }
693
931
  }
694
932
 
695
- bool spanne_next(SpanEnum *self)
933
+ static bool spanne_next(SpanEnum *self)
696
934
  {
697
- SpanNearEnum *sne = (SpanNearEnum *)self->data;
698
- SpanEnum *se;
935
+ SpanNearEnum *sne = SpNEn(self);
936
+ SpanEnum *se;
699
937
 
700
- se = sne->span_enums[sne->current];
701
- if (!se->next(se)) return false;
938
+ se = sne->span_enums[sne->current];
939
+ if (!se->next(se)) return false;
702
940
 
703
- return sne_next_match(self);
941
+ return sne_next_match(self);
704
942
  }
705
943
 
706
- bool spanne_skip_to(SpanEnum *self, int target)
944
+ static bool spanne_skip_to(SpanEnum *self, int target)
707
945
  {
708
- SpanNearEnum *sne = (SpanNearEnum *)self->data;
709
- SpanEnum *se;
710
-
711
- se = sne->span_enums[sne->current];
712
- if (!se->skip_to(se, target)) return false;
946
+ SpanEnum *se = SpNEn(self)->span_enums[SpNEn(self)->current];
947
+ if (!se->skip_to(se, target)) {
948
+ return false;
949
+ }
713
950
 
714
- return sne_next_match(self);
951
+ return sne_next_match(self);
715
952
  }
716
953
 
717
- #define GET_TOP_SNE SpanNearEnum *sne = (SpanNearEnum *)self->data;
718
-
719
- int spanne_doc(SpanEnum *self)
954
+ static int spanne_doc(SpanEnum *self)
720
955
  {
721
- GET_TOP_SNE;
722
- return sne->doc;
956
+ return SpNEn(self)->doc;
723
957
  }
724
958
 
725
- int spanne_start(SpanEnum *self)
959
+ static int spanne_start(SpanEnum *self)
726
960
  {
727
- GET_TOP_SNE;
728
- return sne->start;
961
+ return SpNEn(self)->start;
729
962
  }
730
963
 
731
- int spanne_end(SpanEnum *self)
964
+ static int spanne_end(SpanEnum *self)
732
965
  {
733
- GET_TOP_SNE;
734
- return sne->end;
966
+ return SpNEn(self)->end;
735
967
  }
736
968
 
737
- char *spanne_to_s(SpanEnum *self)
969
+ static char *spanne_to_s(SpanEnum *self)
738
970
  {
739
- SpanNearEnum *sne = (SpanNearEnum *)self->data;
740
- char *field = ((SpanQuery *)self->query->data)->field;
741
- char *query_str = self->query->to_s(self->query, field);
742
- char doc_str[62];
743
- size_t len = strlen(query_str);
744
- char *str = ALLOC_N(char, len + 80);
971
+ SpanNearEnum *sne = SpNEn(self);
972
+ char *field = SpQ(self->query)->field;
973
+ char *query_str = self->query->to_s(self->query, field);
974
+ char doc_str[62];
975
+ size_t len = strlen(query_str);
976
+ char *str = ALLOC_N(char, len + 80);
745
977
 
746
- if (sne->first_time) {
747
- sprintf(doc_str, "START");
748
- } else {
749
- sprintf(doc_str, "%d:%d-%d", self->doc(self),
750
- self->start(self), self->end(self));
751
- }
752
- sprintf("SpanNearEnum(%s)@%s", query_str, doc_str);
753
- free(query_str);
754
- return str;
978
+ if (sne->first_time) {
979
+ sprintf(doc_str, "START");
980
+ }
981
+ else {
982
+ sprintf(doc_str, "%d:%d-%d", self->doc(self),
983
+ self->start(self), self->end(self));
984
+ }
985
+ sprintf("SpanNearEnum(%s)@%s", query_str, doc_str);
986
+ free(query_str);
987
+ return str;
755
988
  }
756
989
 
757
- void spanne_destroy(SpanEnum *self)
990
+ static void spanne_destroy(SpanEnum *self)
758
991
  {
759
- SpanEnum *se;
760
- SpanNearEnum *sne = (SpanNearEnum *)self->data;
761
- int i;
762
- for (i = 0; i < sne->s_cnt; i++) {
763
- se = sne->span_enums[i];
764
- se->destroy(se);
765
- }
766
- free(sne->span_enums);
767
- free(sne);
768
- free(self);
992
+ SpanEnum *se;
993
+ SpanNearEnum *sne = SpNEn(self);
994
+ int i;
995
+ for (i = 0; i < sne->s_cnt; i++) {
996
+ se = sne->span_enums[i];
997
+ se->destroy(se);
998
+ }
999
+ free(sne->span_enums);
1000
+ free(self);
769
1001
  }
770
1002
 
771
- SpanEnum *spanne_create(Query *query, IndexReader *ir)
1003
+ static SpanEnum *spanne_new(Query *query, IndexReader *ir)
772
1004
  {
773
- Query *clause;
774
- SpanEnum *self = ALLOC(SpanEnum);
775
- SpanNearQuery *snq = (SpanNearQuery *)((SpanQuery *)query->data)->data;
776
- SpanNearEnum *sne = ALLOC(SpanNearEnum);
777
- int i;
778
- sne->first_time = true;
779
- sne->in_order = snq->in_order;
780
- sne->slop = snq->slop;
781
- sne->s_cnt = snq->c_cnt;
782
- sne->span_enums = ALLOC_N(SpanEnum *, sne->s_cnt);
783
- for (i = 0; i < sne->s_cnt; i++) {
784
- clause = snq->clauses[i];
785
- sne->span_enums[i] = ((SpanQuery *)clause->data)->get_spans(clause, ir);
786
- }
787
- sne->current = 0;
788
-
789
- sne->doc = -1;
790
- sne->start = -1;
791
- sne->end = -1;
1005
+ int i;
1006
+ Query *clause;
1007
+ SpanEnum *self = (SpanEnum *)emalloc(sizeof(SpanNearEnum));
1008
+ SpanNearQuery *snq = SpNQ(query);
1009
+
1010
+ SpNEn(self)->first_time = true;
1011
+ SpNEn(self)->in_order = snq->in_order;
1012
+ SpNEn(self)->slop = snq->slop;
1013
+ SpNEn(self)->s_cnt = snq->c_cnt;
1014
+ SpNEn(self)->span_enums = ALLOC_N(SpanEnum *, SpNEn(self)->s_cnt);
1015
+
1016
+ for (i = 0; i < SpNEn(self)->s_cnt; i++) {
1017
+ clause = snq->clauses[i];
1018
+ SpNEn(self)->span_enums[i] = SpQ(clause)->get_spans(clause, ir);
1019
+ }
1020
+ SpNEn(self)->current = 0;
792
1021
 
793
- self->data = sne;
1022
+ SpNEn(self)->doc = -1;
1023
+ SpNEn(self)->start = -1;
1024
+ SpNEn(self)->end = -1;
794
1025
 
795
- self->query = query;
796
- self->next = &spanne_next;
797
- self->skip_to = &spanne_skip_to;
798
- self->doc = &spanne_doc;
799
- self->start = &spanne_start;
800
- self->end = &spanne_end;
801
- self->destroy = &spanne_destroy;
802
- self->to_s = &spanne_to_s;
1026
+ self->query = query;
1027
+ self->next = &spanne_next;
1028
+ self->skip_to = &spanne_skip_to;
1029
+ self->doc = &spanne_doc;
1030
+ self->start = &spanne_start;
1031
+ self->end = &spanne_end;
1032
+ self->destroy = &spanne_destroy;
1033
+ self->to_s = &spanne_to_s;
803
1034
 
804
- return self;
1035
+ return self;
805
1036
  }
806
1037
 
807
1038
  /*****************************************************************************
@@ -810,211 +1041,348 @@ SpanEnum *spanne_create(Query *query, IndexReader *ir)
810
1041
  *
811
1042
  *****************************************************************************/
812
1043
 
813
- bool spanxe_next(SpanEnum *self)
1044
+ #define SpXEn(span_enum) ((SpanNotEnum *)(span_enum))
1045
+ #define SpXQ(query) ((SpanNotQuery *)(query))
1046
+
1047
+ typedef struct SpanNotEnum
814
1048
  {
815
- SpanNotEnum *sxe = (SpanNotEnum *)(self->data);
816
- SpanEnum *inc = sxe->inc, *exc = sxe->exc;
817
- if (sxe->more_inc) { // move to next incl
818
- sxe->more_inc = inc->next(inc);
819
- }
1049
+ SpanEnum super;
1050
+ SpanEnum *inc;
1051
+ SpanEnum *exc;
1052
+ bool more_inc : 1;
1053
+ bool more_exc : 1;
1054
+ } SpanNotEnum;
820
1055
 
821
- while (sxe->more_inc && sxe->more_exc) {
822
- if (inc->doc(inc) > exc->doc(exc)) { // skip excl
823
- sxe->more_exc = exc->skip_to(exc, inc->doc(inc));
824
- }
825
1056
 
826
- while (sxe->more_exc && // while excl is before
827
- (inc->doc(inc) == exc->doc(exc)) &&
828
- (exc->end(exc) <= inc->start(inc))) {
829
- sxe->more_exc = exc->next(exc); // increment excl
1057
+ static bool spanxe_next(SpanEnum *self)
1058
+ {
1059
+ SpanNotEnum *sxe = SpXEn(self);
1060
+ SpanEnum *inc = sxe->inc, *exc = sxe->exc;
1061
+ if (sxe->more_inc) { /* move to next incl */
1062
+ sxe->more_inc = inc->next(inc);
830
1063
  }
831
1064
 
832
- if (! sxe->more_exc || // if no intersection
833
- (inc->doc(inc) != exc->doc(exc)) ||
834
- inc->end(inc) <= exc->start(exc)) {
835
- break; // we found a match
836
- }
1065
+ while (sxe->more_inc && sxe->more_exc) {
1066
+ if (inc->doc(inc) > exc->doc(exc)) { /* skip excl */
1067
+ sxe->more_exc = exc->skip_to(exc, inc->doc(inc));
1068
+ }
1069
+
1070
+ while (sxe->more_exc /* while excl is before */
1071
+ && (inc->doc(inc) == exc->doc(exc))
1072
+ && (exc->end(exc) <= inc->start(inc))) {
1073
+ sxe->more_exc = exc->next(exc); /* increment excl */
1074
+ }
1075
+
1076
+ if (! sxe->more_exc || /* if no intersection */
1077
+ (inc->doc(inc) != exc->doc(exc)) ||
1078
+ inc->end(inc) <= exc->start(exc)) {
1079
+ break; /* we found a match */
1080
+ }
837
1081
 
838
- sxe->more_inc = inc->next(inc); // intersected: keep scanning
839
- }
840
- return sxe->more_inc;
1082
+ sxe->more_inc = inc->next(inc); /* intersected: keep scanning */
1083
+ }
1084
+ return sxe->more_inc;
841
1085
  }
842
1086
 
843
- bool spanxe_skip_to(SpanEnum *self, int target)
1087
+ static bool spanxe_skip_to(SpanEnum *self, int target)
844
1088
  {
845
- SpanNotEnum *sxe = (SpanNotEnum *)(self->data);
846
- SpanEnum *inc = sxe->inc, *exc = sxe->exc;
847
- int doc;
1089
+ SpanNotEnum *sxe = SpXEn(self);
1090
+ SpanEnum *inc = sxe->inc, *exc = sxe->exc;
1091
+ int doc;
848
1092
 
849
- if (sxe->more_inc) { // move to next incl
850
- if (!(sxe->more_inc=sxe->inc->skip_to(sxe->inc, target))) return false;
851
- }
1093
+ if (sxe->more_inc) { /* move to next incl */
1094
+ if (!(sxe->more_inc=sxe->inc->skip_to(sxe->inc, target))) return false;
1095
+ }
852
1096
 
853
- if (sxe->more_inc && ((doc=inc->doc(inc)) > exc->doc(exc))) {
854
- sxe->more_exc = exc->skip_to(exc, doc);
855
- }
1097
+ if (sxe->more_inc && ((doc=inc->doc(inc)) > exc->doc(exc))) {
1098
+ sxe->more_exc = exc->skip_to(exc, doc);
1099
+ }
856
1100
 
857
- while (sxe->more_exc && // while excl is before
858
- inc->doc(inc) == exc->doc(exc) &&
859
- exc->end(exc) <= inc->start(inc)) {
860
- sxe->more_exc = exc->next(exc); // increment excl
861
- }
1101
+ while (sxe->more_exc /* while excl is before */
1102
+ && inc->doc(inc) == exc->doc(exc)
1103
+ && exc->end(exc) <= inc->start(inc)) {
1104
+ sxe->more_exc = exc->next(exc); /* increment excl */
1105
+ }
862
1106
 
863
- if (!sxe->more_exc || // if no intersection
1107
+ if (!sxe->more_exc || /* if no intersection */
864
1108
  inc->doc(inc) != exc->doc(exc) ||
865
1109
  inc->end(inc) <= exc->start(exc)) {
866
- return true; // we found a match
867
- }
1110
+ return true; /* we found a match */
1111
+ }
868
1112
 
869
- return spanxe_next(self); // scan to next match
1113
+ return spanxe_next(self); /* scan to next match */
870
1114
  }
871
1115
 
872
- int spanxe_doc(SpanEnum *self)
1116
+ static int spanxe_doc(SpanEnum *self)
873
1117
  {
874
- SpanEnum *inc = ((SpanNotEnum *)(self->data))->inc;
875
- return inc->doc(inc);
1118
+ SpanEnum *inc = SpXEn(self)->inc;
1119
+ return inc->doc(inc);
876
1120
  }
877
1121
 
878
- int spanxe_start(SpanEnum *self)
1122
+ static int spanxe_start(SpanEnum *self)
879
1123
  {
880
- SpanEnum *inc = ((SpanNotEnum *)(self->data))->inc;
881
- return inc->start(inc);
1124
+ SpanEnum *inc = SpXEn(self)->inc;
1125
+ return inc->start(inc);
882
1126
  }
883
1127
 
884
- int spanxe_end(SpanEnum *self)
1128
+ static int spanxe_end(SpanEnum *self)
885
1129
  {
886
- SpanEnum *inc = ((SpanNotEnum *)(self->data))->inc;
887
- return inc->end(inc);
1130
+ SpanEnum *inc = SpXEn(self)->inc;
1131
+ return inc->end(inc);
888
1132
  }
889
1133
 
890
- char *spanxe_to_s(SpanEnum *self)
1134
+ static char *spanxe_to_s(SpanEnum *self)
891
1135
  {
892
- char *field = ((SpanQuery *)self->query->data)->field;
893
- char *query_str = self->query->to_s(self->query, field);
894
- char *res = strfmt("SpanNotEnum(%s)", query_str);
895
- free(query_str);
896
- return res;
1136
+ char *field = SpQ(self->query)->field;
1137
+ char *query_str = self->query->to_s(self->query, field);
1138
+ char *res = strfmt("SpanNotEnum(%s)", query_str);
1139
+ free(query_str);
1140
+ return res;
897
1141
  }
898
1142
 
899
- void spanxe_destroy(SpanEnum *self)
1143
+ static void spanxe_destroy(SpanEnum *self)
900
1144
  {
901
- SpanNotEnum *sxe = (SpanNotEnum *)self->data;
902
- sxe->inc->destroy(sxe->inc);
903
- sxe->exc->destroy(sxe->exc);
904
- free(sxe);
905
- free(self);
1145
+ SpanNotEnum *sxe = SpXEn(self);
1146
+ sxe->inc->destroy(sxe->inc);
1147
+ sxe->exc->destroy(sxe->exc);
1148
+ free(self);
906
1149
  }
907
1150
 
908
- SpanEnum *spanxe_create(Query *query, IndexReader *ir)
1151
+ static SpanEnum *spanxe_new(Query *query, IndexReader *ir)
909
1152
  {
910
- SpanEnum *self = ALLOC(SpanEnum);
911
- SpanNotQuery *sxq = (SpanNotQuery *)((SpanQuery *)query->data)->data;
1153
+ SpanEnum *self = (SpanEnum *)emalloc(sizeof(SpanNotEnum));
1154
+ SpanNotEnum *sxe = SpXEn(self);
1155
+ SpanNotQuery *sxq = SpXQ(query);
912
1156
 
913
- SpanNotEnum *sxe = self->data = ALLOC(SpanNotEnum);
914
- sxe->inc = ((SpanQuery *)sxq->inc->data)->get_spans(sxq->inc, ir);
915
- sxe->exc = ((SpanQuery *)sxq->exc->data)->get_spans(sxq->exc, ir);
916
- sxe->more_inc = true;
917
- sxe->more_exc = sxe->exc->next(sxe->exc);
1157
+ sxe->inc = SpQ(sxq->inc)->get_spans(sxq->inc, ir);
1158
+ sxe->exc = SpQ(sxq->exc)->get_spans(sxq->exc, ir);
1159
+ sxe->more_inc = true;
1160
+ sxe->more_exc = sxe->exc->next(sxe->exc);
918
1161
 
919
- self->query = query;
920
- self->next = &spanxe_next;
921
- self->skip_to = &spanxe_skip_to;
922
- self->doc = &spanxe_doc;
923
- self->start = &spanxe_start;
924
- self->end = &spanxe_end;
925
- self->destroy = &spanxe_destroy;
926
- self->to_s = &spanxe_to_s;
1162
+ self->query = query;
1163
+ self->next = &spanxe_next;
1164
+ self->skip_to = &spanxe_skip_to;
1165
+ self->doc = &spanxe_doc;
1166
+ self->start = &spanxe_start;
1167
+ self->end = &spanxe_end;
1168
+ self->destroy = &spanxe_destroy;
1169
+ self->to_s = &spanxe_to_s;
927
1170
 
928
- return self;
1171
+ return self;
929
1172
  }
1173
+
930
1174
  /*****************************************************************************
931
1175
  *
932
- * SpanQuery
1176
+ * SpanWeight
933
1177
  *
934
1178
  *****************************************************************************/
935
1179
 
936
- void spanq_destroy(Query *self)
1180
+ #define SpW(weight) ((SpanWeight *)(weight))
1181
+ typedef struct SpanWeight
1182
+ {
1183
+ Weight super;
1184
+ HashSet *terms;
1185
+ } SpanWeight;
1186
+
1187
+ static Explanation *spanw_explain(Weight *self, IndexReader *ir, int target)
1188
+ {
1189
+ Explanation *expl;
1190
+ Explanation *idf_expl1;
1191
+ Explanation *idf_expl2;
1192
+ Explanation *query_expl;
1193
+ Explanation *qnorm_expl;
1194
+ Explanation *field_expl;
1195
+ Explanation *tf_expl;
1196
+ Scorer *scorer;
1197
+ uchar *field_norms;
1198
+ float field_norm;
1199
+ Explanation *field_norm_expl;
1200
+
1201
+ char *query_str;
1202
+ HashSet *terms = SpW(self)->terms;
1203
+ char *field = SpQ(self->query)->field;
1204
+ const int field_num = fis_get_field_num(ir->fis, field);
1205
+ char *doc_freqs = NULL;
1206
+ size_t df_i = 0;
1207
+ int i;
1208
+
1209
+ if (field_num < 0) {
1210
+ return expl_new(0.0, "field \"%s\" does not exist in the index", field);
1211
+ }
1212
+
1213
+ query_str = self->query->to_s(self->query, "");
1214
+
1215
+ for (i = 0; i < terms->size; i++) {
1216
+ char *term = (char *)terms->elems[i];
1217
+ REALLOC_N(doc_freqs, char, df_i + strlen(term) + 23);
1218
+ sprintf(doc_freqs + df_i, "%s=%d, ", term,
1219
+ ir->doc_freq(ir, field_num, term));
1220
+ df_i = strlen(doc_freqs);
1221
+ }
1222
+ /* remove the ',' at the end of the string if it exists */
1223
+ if (terms->size > 0) {
1224
+ df_i -= 2;
1225
+ doc_freqs[df_i] = '\0';
1226
+ }
1227
+ else {
1228
+ doc_freqs = "";
1229
+ }
1230
+
1231
+ expl = expl_new(0.0, "weight(%s in %d), product of:", query_str, target);
1232
+
1233
+ /* We need two of these as it's included in both the query explanation
1234
+ * and the field explanation */
1235
+ idf_expl1 = expl_new(self->idf, "idf(%s: %s)", field, doc_freqs);
1236
+ idf_expl2 = expl_new(self->idf, "idf(%s: %s)", field, doc_freqs);
1237
+ if (terms->size > 0) {
1238
+ free(doc_freqs); /* only free if allocated */
1239
+ }
1240
+
1241
+ /* explain query weight */
1242
+ query_expl = expl_new(0.0, "query_weight(%s), product of:", query_str);
1243
+
1244
+ if (self->query->boost != 1.0) {
1245
+ expl_add_detail(query_expl, expl_new(self->query->boost, "boost"));
1246
+ }
1247
+
1248
+ expl_add_detail(query_expl, idf_expl1);
1249
+
1250
+ qnorm_expl = expl_new(self->qnorm, "query_norm");
1251
+ expl_add_detail(query_expl, qnorm_expl);
1252
+
1253
+ query_expl->value = self->query->boost * idf_expl1->value * qnorm_expl->value;
1254
+
1255
+ expl_add_detail(expl, query_expl);
1256
+
1257
+ /* explain field weight */
1258
+ field_expl = expl_new(0.0, "field_weight(%s:%s in %d), product of:",
1259
+ field, query_str, target);
1260
+ free(query_str);
1261
+
1262
+ scorer = self->scorer(self, ir);
1263
+ tf_expl = scorer->explain(scorer, target);
1264
+ scorer->destroy(scorer);
1265
+ expl_add_detail(field_expl, tf_expl);
1266
+ expl_add_detail(field_expl, idf_expl2);
1267
+
1268
+ field_norms = ir->get_norms(ir, field_num);
1269
+ field_norm = (field_norms
1270
+ ? sim_decode_norm(self->similarity, field_norms[target])
1271
+ : (float)0.0);
1272
+ field_norm_expl = expl_new(field_norm, "field_norm(field=%s, doc=%d)",
1273
+ field, target);
1274
+ expl_add_detail(field_expl, field_norm_expl);
1275
+
1276
+ field_expl->value = tf_expl->value * idf_expl2->value * field_norm_expl->value;
1277
+
1278
+ /* combine them */
1279
+ if (query_expl->value == 1.0) {
1280
+ expl_destroy(expl);
1281
+ return field_expl;
1282
+ }
1283
+ else {
1284
+ expl->value = (query_expl->value * field_expl->value);
1285
+ expl_add_detail(expl, field_expl);
1286
+ return expl;
1287
+ }
1288
+ }
1289
+
1290
+ static char *spanw_to_s(Weight *self)
1291
+ {
1292
+ return strfmt("SpanWeight(%f)", self->value);
1293
+ }
1294
+
1295
+ static void spanw_destroy(Weight *self)
937
1296
  {
938
- SpanQuery *sq = (SpanQuery *)self->data;
939
- free(sq);
940
- q_destroy_i(self);
1297
+ hs_destroy(SpW(self)->terms);
1298
+ w_destroy(self);
1299
+ }
1300
+
1301
+ static Weight *spanw_new(Query *query, Searcher *searcher)
1302
+ {
1303
+ int i;
1304
+ Weight *self = w_new(SpanWeight, query);
1305
+ HashSet *terms = SpQ(query)->get_terms(query);
1306
+
1307
+ SpW(self)->terms = terms;
1308
+ self->scorer = &spansc_new;
1309
+ self->explain = &spanw_explain;
1310
+ self->to_s = &spanw_to_s;
1311
+ self->destroy = &spanw_destroy;
1312
+
1313
+ self->similarity = query->get_similarity(query, searcher);
1314
+
1315
+ self->idf = 0.0;
1316
+
1317
+ for (i = terms->size - 1; i >= 0; i--) {
1318
+ self->idf += sim_idf_term(self->similarity, SpQ(query)->field,
1319
+ (char *)terms->elems[i], searcher);
1320
+ }
1321
+
1322
+ return self;
941
1323
  }
942
1324
 
943
1325
  /*****************************************************************************
944
- *
945
1326
  * SpanTermQuery
946
- *
947
1327
  *****************************************************************************/
948
1328
 
949
- char *spantq_to_s(Query *self, char *field)
1329
+ static char *spantq_to_s(Query *self, const char *field)
950
1330
  {
951
- Term *term = (Term *)((SpanQuery *)self->data)->data;
952
- char *term_str, *res;
953
- if (field == term->field) {
954
- term_str = estrdup(term->text);
955
- } else {
956
- term_str = term_to_s(term);
957
- }
958
- res = strfmt("span_term(%s)", term_str);
959
- free(term_str);
960
- return res;
1331
+ if (field == SpQ(self)->field) {
1332
+ return strfmt("span_terms(%s)", SpTQ(self)->term);
1333
+ }
1334
+ else {
1335
+ return strfmt("span_terms(%s:%s)", SpQ(self)->field, SpTQ(self)->term);
1336
+ }
961
1337
  }
962
1338
 
963
- static void spantq_destroy(Query *self)
1339
+ static void spantq_destroy_i(Query *self)
964
1340
  {
965
- SpanQuery *sq = (SpanQuery *)self->data;
966
- if (self->destroy_all) {
967
- Term *term = (Term *)sq->data;
968
- term_destroy(term);
969
- }
970
- free(sq);
971
- q_destroy_i(self);
1341
+ free(SpTQ(self)->term);
1342
+ free(SpQ(self)->field);
1343
+ spanq_destroy_i(self);
972
1344
  }
973
1345
 
974
1346
  static void spantq_extract_terms(Query *self, HashSet *terms)
975
1347
  {
976
- Term *term = (Term *)((SpanQuery *)self->data)->data;
977
- hs_add(terms, term_clone(term));
1348
+ hs_add(terms, term_new(SpQ(self)->field, SpTQ(self)->term));
978
1349
  }
979
1350
 
980
1351
  static HashSet *spantq_get_terms(Query *self)
981
1352
  {
982
- Term *term = (Term *)((SpanQuery *)self->data)->data;
983
- HashSet *terms = term_set_create();
984
- hs_add(terms, term_clone(term));
985
- return terms;
1353
+ HashSet *terms = hs_new_str(&free);
1354
+ hs_add(terms, estrdup(SpTQ(self)->term));
1355
+ return terms;
986
1356
  }
987
1357
 
988
- static uint spantq_hash(Query *self)
1358
+ static ulong spantq_hash(Query *self)
989
1359
  {
990
- return term_hash((Term *)((SpanQuery *)self->data)->data);
1360
+ return spanq_hash(self) ^ str_hash(SpTQ(self)->term);
991
1361
  }
992
1362
 
993
1363
  static int spantq_eq(Query *self, Query *o)
994
1364
  {
995
- return term_eq((Term *)((SpanQuery *)self->data)->data,
996
- (Term *)((SpanQuery *)o->data)->data);
1365
+ return spanq_eq(self, o) && strcmp(SpTQ(self)->term, SpTQ(o)->term) == 0;
997
1366
  }
998
1367
 
999
- Query *spantq_create(Term *term)
1368
+ Query *spantq_new(const char *field, const char *term)
1000
1369
  {
1001
- Query *self = q_create();
1370
+ Query *self = q_new(SpanTermQuery);
1002
1371
 
1003
- SpanQuery *sq = ALLOC(SpanQuery);
1004
- sq->data = term;
1005
- sq->get_spans = &spante_create;
1006
- sq->get_terms = &spantq_get_terms;
1007
- sq->field = term->field;
1008
- self->data = sq;
1372
+ SpTQ(self)->term = estrdup(term);
1373
+ SpQ(self)->field = estrdup(field);
1374
+ SpQ(self)->get_spans = &spante_new;
1375
+ SpQ(self)->get_terms = &spantq_get_terms;
1009
1376
 
1010
- self->type = SPAN_TERM_QUERY;
1011
- self->extract_terms = &spantq_extract_terms;
1012
- self->to_s = &spantq_to_s;
1013
- self->hash = &spantq_hash;
1014
- self->eq = &spantq_eq;
1015
- self->destroy_i = &spantq_destroy;
1016
- self->create_weight_i = &spanw_create;
1017
- return self;
1377
+ self->type = SPAN_TERM_QUERY;
1378
+ self->extract_terms = &spantq_extract_terms;
1379
+ self->to_s = &spantq_to_s;
1380
+ self->hash = &spantq_hash;
1381
+ self->eq = &spantq_eq;
1382
+ self->destroy_i = &spantq_destroy_i;
1383
+ self->create_weight_i = &spanw_new;
1384
+ self->get_matchv_i = &spanq_get_matchv_i;
1385
+ return self;
1018
1386
  }
1019
1387
 
1020
1388
  /*****************************************************************************
@@ -1023,91 +1391,87 @@ Query *spantq_create(Term *term)
1023
1391
  *
1024
1392
  *****************************************************************************/
1025
1393
 
1026
- char *spanfq_to_s(Query *self, char *field)
1394
+ static char *spanfq_to_s(Query *self, const char *field)
1027
1395
  {
1028
- SpanFirstQuery *sfq = (SpanFirstQuery *)((SpanQuery *)self->data)->data;
1029
- Query *match = sfq->match;
1030
- char *q_str = match->to_s(match, field);
1031
- char *res = strfmt("span_first(%s, %d)", q_str, sfq->end);
1032
- free(q_str);
1033
- return res;
1396
+ Query *match = SpFQ(self)->match;
1397
+ char *q_str = match->to_s(match, field);
1398
+ char *res = strfmt("span_first(%s, %d)", q_str, SpFQ(self)->end);
1399
+ free(q_str);
1400
+ return res;
1034
1401
  }
1035
1402
 
1036
- void spanfq_extract_terms(Query *self, HashSet *terms)
1403
+ static void spanfq_extract_terms(Query *self, HashSet *terms)
1037
1404
  {
1038
- SpanFirstQuery *sfq = (SpanFirstQuery *)((SpanQuery *)self->data)->data;
1039
- sfq->match->extract_terms(sfq->match, terms);
1405
+ SpFQ(self)->match->extract_terms(SpFQ(self)->match, terms);
1040
1406
  }
1041
1407
 
1042
- HashSet *spanfq_get_terms(Query *self)
1408
+ static HashSet *spanfq_get_terms(Query *self)
1043
1409
  {
1044
- SpanFirstQuery *sfq = (SpanFirstQuery *)((SpanQuery *)self->data)->data;
1045
- SpanQuery *match_sq = (SpanQuery *)sfq->match->data;
1046
- return match_sq->get_terms(sfq->match);
1410
+ SpanFirstQuery *sfq = SpFQ(self);
1411
+ return SpQ(sfq->match)->get_terms(sfq->match);
1047
1412
  }
1048
1413
 
1049
- Query *spanfq_rewrite(Query *self, IndexReader *ir)
1414
+ static Query *spanfq_rewrite(Query *self, IndexReader *ir)
1050
1415
  {
1051
- SpanFirstQuery *sfq = (SpanFirstQuery *)((SpanQuery *)self->data)->data;
1052
- Query *q, *rq;
1416
+ Query *q, *rq;
1053
1417
 
1054
- q = sfq->match;
1055
- rq = q->rewrite(q, ir);
1056
- if (rq == q || self->destroy_all) q_deref(q);
1057
- sfq->match = rq;
1418
+ q = SpFQ(self)->match;
1419
+ rq = q->rewrite(q, ir);
1420
+ q_deref(q);
1421
+ SpFQ(self)->match = rq;
1058
1422
 
1059
- self->ref_cnt++;
1060
- return self; /* no clauses rewrote */
1423
+ self->ref_cnt++;
1424
+ return self; /* no clauses rewrote */
1061
1425
  }
1062
1426
 
1063
- void spanfq_destroy(Query *self)
1427
+ static void spanfq_destroy_i(Query *self)
1064
1428
  {
1065
- SpanQuery *sq = (SpanQuery *)self->data;
1066
- SpanFirstQuery *sfq = (SpanFirstQuery *)sq->data;
1067
- if (self->destroy_all) q_deref(sfq->match);
1068
- free(sfq);
1069
- free(sq);
1070
- q_destroy_i(self);
1429
+ q_deref(SpFQ(self)->match);
1430
+ spanq_destroy_i(self);
1071
1431
  }
1072
1432
 
1073
- static uint spanfq_hash(Query *self)
1433
+ static ulong spanfq_hash(Query *self)
1074
1434
  {
1075
- SpanFirstQuery *sfq = (SpanFirstQuery *)((SpanQuery *)self->data)->data;
1076
- return sfq->match->hash(sfq->match) ^ sfq->end;
1435
+ return spanq_hash(self) ^ SpFQ(self)->match->hash(SpFQ(self)->match)
1436
+ ^ SpFQ(self)->end;
1077
1437
  }
1078
1438
 
1079
1439
  static int spanfq_eq(Query *self, Query *o)
1080
1440
  {
1081
- SpanFirstQuery *sfq1 = (SpanFirstQuery *)((SpanQuery *)self->data)->data;
1082
- SpanFirstQuery *sfq2 = (SpanFirstQuery *)((SpanQuery *)o->data)->data;
1083
- return sfq1->match->eq(sfq1->match, sfq2->match) && (sfq1->end == sfq2->end);
1441
+ SpanFirstQuery *sfq1 = SpFQ(self);
1442
+ SpanFirstQuery *sfq2 = SpFQ(o);
1443
+ return spanq_eq(self, o) && sfq1->match->eq(sfq1->match, sfq2->match)
1444
+ && (sfq1->end == sfq2->end);
1084
1445
  }
1085
1446
 
1086
- Query *spanfq_create(Query *match, int end)
1447
+ Query *spanfq_new_nr(Query *match, int end)
1087
1448
  {
1088
- Query *self = q_create();
1449
+ Query *self = q_new(SpanFirstQuery);
1089
1450
 
1090
- SpanQuery *sq = ALLOC(SpanQuery);
1451
+ SpFQ(self)->match = match;
1452
+ SpFQ(self)->end = end;
1091
1453
 
1092
- SpanFirstQuery *sfq = ALLOC(SpanFirstQuery);
1093
- sfq->match = match;
1094
- sfq->end = end;
1095
- sq->data = sfq;
1454
+ SpQ(self)->field = SpQ(match)->field;
1455
+ SpQ(self)->get_spans = &spanfe_new;
1456
+ SpQ(self)->get_terms = &spanfq_get_terms;
1096
1457
 
1097
- sq->get_spans = &spanfe_create;
1098
- sq->get_terms = &spanfq_get_terms;
1099
- sq->field = ((SpanQuery *)match->data)->field;
1100
- self->data = sq;
1458
+ self->type = SPAN_FIRST_QUERY;
1459
+ self->rewrite = &spanfq_rewrite;
1460
+ self->extract_terms = &spanfq_extract_terms;
1461
+ self->to_s = &spanfq_to_s;
1462
+ self->hash = &spanfq_hash;
1463
+ self->eq = &spanfq_eq;
1464
+ self->destroy_i = &spanfq_destroy_i;
1465
+ self->create_weight_i = &spanw_new;
1466
+ self->get_matchv_i = &spanq_get_matchv_i;
1467
+
1468
+ return self;
1469
+ }
1101
1470
 
1102
- self->type = SPAN_FIRST_QUERY;
1103
- self->rewrite = &spanfq_rewrite;
1104
- self->extract_terms = &spanfq_extract_terms;
1105
- self->to_s = &spanfq_to_s;
1106
- self->hash = &spanfq_hash;
1107
- self->eq = &spanfq_eq;
1108
- self->destroy_i = &spanfq_destroy;
1109
- self->create_weight_i = &spanw_create;
1110
- return self;
1471
+ Query *spanfq_new(Query *match, int end)
1472
+ {
1473
+ REF(match);
1474
+ return spanfq_new_nr(match, end);
1111
1475
  }
1112
1476
 
1113
1477
  /*****************************************************************************
@@ -1116,154 +1480,182 @@ Query *spanfq_create(Query *match, int end)
1116
1480
  *
1117
1481
  *****************************************************************************/
1118
1482
 
1119
- char *spanoq_to_s(Query *self, char *field)
1120
- {
1121
- SpanOrQuery *soq = (SpanOrQuery *)((SpanQuery *)self->data)->data;
1122
- char *res = estrdup("span_or["), *q_str;
1123
- Query *clause;
1124
- int i;
1125
- for (i = 0; i < soq->c_cnt; i++) {
1126
- clause = soq->clauses[i];
1127
- q_str = clause->to_s(clause, field);
1128
- REALLOC_N(res, char, strlen(res) + strlen(q_str) + 10);
1129
- if (i > 0) strcat(res, ", ");
1130
- strcat(res, q_str);
1131
- free(q_str);
1132
- }
1133
- strcat(res, "]");
1483
+ static char *spanoq_to_s(Query *self, const char *field)
1484
+ {
1485
+ int i;
1486
+ SpanOrQuery *soq = SpOQ(self);
1487
+ char *res, *res_p;
1488
+ char **q_strs = ALLOC_N(char *, soq->c_cnt);
1489
+ int len = 50;
1490
+ for (i = 0; i < soq->c_cnt; i++) {
1491
+ Query *clause = soq->clauses[i];
1492
+ q_strs[i] = clause->to_s(clause, field);
1493
+ len += strlen(q_strs[i]) + 2;
1494
+ }
1134
1495
 
1135
- return res;
1496
+ res_p = res = ALLOC_N(char, len);
1497
+ sprintf(res_p, "span_or[ ");
1498
+ res_p += strlen(res_p);
1499
+ for (i = 0; i < soq->c_cnt; i++) {
1500
+ sprintf(res_p, "%s, ", q_strs[i]);
1501
+ free(q_strs[i]);
1502
+ res_p += strlen(res_p);
1503
+ }
1504
+ free(q_strs);
1505
+
1506
+ sprintf(res_p - 2, " ]");
1507
+ return res;
1136
1508
  }
1137
1509
 
1138
- void spanoq_extract_terms(Query *self, HashSet *terms)
1510
+ static void spanoq_extract_terms(Query *self, HashSet *terms)
1139
1511
  {
1140
- SpanOrQuery *soq = (SpanOrQuery *)((SpanQuery *)self->data)->data;
1141
- Query *clause;
1142
- int i;
1143
- for (i = 0; i < soq->c_cnt; i++) {
1144
- clause = soq->clauses[i];
1145
- clause->extract_terms(clause, terms);
1146
- }
1512
+ SpanOrQuery *soq = SpOQ(self);
1513
+ int i;
1514
+ for (i = 0; i < soq->c_cnt; i++) {
1515
+ Query *clause = soq->clauses[i];
1516
+ clause->extract_terms(clause, terms);
1517
+ }
1147
1518
  }
1148
1519
 
1149
- HashSet *spanoq_get_terms(Query *self)
1520
+ static HashSet *spanoq_get_terms(Query *self)
1150
1521
  {
1151
- SpanOrQuery *soq = (SpanOrQuery *)((SpanQuery *)self->data)->data;
1152
- HashSet *terms = term_set_create();
1153
- Query *clause;
1154
- int i;
1155
- for (i = 0; i < soq->c_cnt; i++) {
1156
- clause = soq->clauses[i];
1157
- clause->extract_terms(clause, terms);
1158
- }
1522
+ SpanOrQuery *soq = SpOQ(self);
1523
+ HashSet *terms = hs_new_str(&free);
1524
+ int i;
1525
+ for (i = 0; i < soq->c_cnt; i++) {
1526
+ Query *clause = soq->clauses[i];
1527
+ HashSet *sub_terms = SpQ(clause)->get_terms(clause);
1528
+ hs_merge(terms, sub_terms);
1529
+ }
1159
1530
 
1160
- return terms;
1531
+ return terms;
1161
1532
  }
1162
1533
 
1163
- SpanEnum *spanoq_get_spans(Query *self, IndexReader *ir)
1534
+ static SpanEnum *spanoq_get_spans(Query *self, IndexReader *ir)
1164
1535
  {
1165
- SpanOrQuery *soq = (SpanOrQuery *)((SpanQuery *)self->data)->data;
1166
- Query *q;
1167
- if (soq->c_cnt == 1) {
1168
- q = soq->clauses[0];
1169
- return ((SpanQuery *)q->data)->get_spans(q, ir);
1170
- }
1536
+ SpanOrQuery *soq = SpOQ(self);
1537
+ if (soq->c_cnt == 1) {
1538
+ Query *q = soq->clauses[0];
1539
+ return SpQ(q)->get_spans(q, ir);
1540
+ }
1171
1541
 
1172
- return spanoe_create(self, ir);
1542
+ return spanoe_new(self, ir);
1173
1543
  }
1174
1544
 
1175
- Query *spanoq_rewrite(Query *self, IndexReader *ir)
1545
+ static Query *spanoq_rewrite(Query *self, IndexReader *ir)
1176
1546
  {
1177
- SpanOrQuery *soq = (SpanOrQuery *)((SpanQuery *)self->data)->data;
1178
- Query *clause, *rewritten;
1179
- int i;
1180
- /* replace clauses with their rewritten queries */
1181
- for (i = 0; i < soq->c_cnt; i++) {
1182
- clause = soq->clauses[i];
1183
- rewritten = clause->rewrite(clause, ir);
1184
- if ((rewritten == clause) || self->destroy_all) q_deref(clause);
1185
- soq->clauses[i] = rewritten;
1186
- }
1547
+ SpanOrQuery *soq = SpOQ(self);
1548
+ int i;
1187
1549
 
1188
- self->ref_cnt++;
1189
- return self;
1550
+ /* replace clauses with their rewritten queries */
1551
+ for (i = 0; i < soq->c_cnt; i++) {
1552
+ Query *clause = soq->clauses[i];
1553
+ Query *rewritten = clause->rewrite(clause, ir);
1554
+ q_deref(clause);
1555
+ soq->clauses[i] = rewritten;
1556
+ }
1557
+
1558
+ self->ref_cnt++;
1559
+ return self;
1190
1560
  }
1191
1561
 
1192
- void spanoq_destroy(Query *self)
1562
+ static void spanoq_destroy_i(Query *self)
1193
1563
  {
1194
- SpanQuery *sq = (SpanQuery *)self->data;
1195
- SpanOrQuery *soq = (SpanOrQuery *)sq->data;
1564
+ SpanOrQuery *soq = SpOQ(self);
1196
1565
 
1197
- if (self->destroy_all) {
1198
- Query *clause;
1199
1566
  int i;
1200
1567
  for (i = 0; i < soq->c_cnt; i++) {
1201
- clause = soq->clauses[i];
1202
- q_deref(clause);
1568
+ Query *clause = soq->clauses[i];
1569
+ q_deref(clause);
1203
1570
  }
1204
1571
  free(soq->clauses);
1205
- }
1206
1572
 
1207
-
1208
- free(soq);
1209
- free(sq);
1210
- q_destroy_i(self);
1573
+ spanq_destroy_i(self);
1211
1574
  }
1212
1575
 
1213
- static uint spanoq_hash(Query *self)
1576
+ static ulong spanoq_hash(Query *self)
1214
1577
  {
1215
- int i;
1216
- uint hash = 0;
1217
- Query *q;
1218
- SpanOrQuery *soq = (SpanOrQuery *)((SpanQuery *)self->data)->data;
1578
+ int i;
1579
+ ulong hash = spanq_hash(self);
1580
+ SpanOrQuery *soq = SpOQ(self);
1219
1581
 
1220
- for (i = 0; i < soq->c_cnt; i++) {
1221
- q = soq->clauses[i];
1222
- hash ^= q->hash(q);
1223
- }
1224
- return hash;
1582
+ for (i = 0; i < soq->c_cnt; i++) {
1583
+ Query *q = soq->clauses[i];
1584
+ hash ^= q->hash(q);
1585
+ }
1586
+ return hash;
1225
1587
  }
1226
1588
 
1227
1589
  static int spanoq_eq(Query *self, Query *o)
1228
1590
  {
1229
- int i;
1230
- Query *q1, *q2;
1231
- SpanOrQuery *soq1 = (SpanOrQuery *)((SpanQuery *)self->data)->data;
1232
- SpanOrQuery *soq2 = (SpanOrQuery *)((SpanQuery *)o->data)->data;
1233
- if (soq1->c_cnt != soq2->c_cnt) return false;
1234
- for (i = 0; i < soq1->c_cnt; i++) {
1235
- q1 = soq1->clauses[i];
1236
- q2 = soq2->clauses[i];
1237
- if (!q1->eq(q1, q2)) return false;
1238
- }
1239
- return true;
1591
+ int i;
1592
+ Query *q1, *q2;
1593
+ SpanOrQuery *soq1 = SpOQ(self);
1594
+ SpanOrQuery *soq2 = SpOQ(o);
1595
+
1596
+ if (!spanq_eq(self, o) || soq1->c_cnt != soq2->c_cnt) {
1597
+ return false;
1598
+ }
1599
+ for (i = 0; i < soq1->c_cnt; i++) {
1600
+ q1 = soq1->clauses[i];
1601
+ q2 = soq2->clauses[i];
1602
+ if (!q1->eq(q1, q2)) {
1603
+ return false;
1604
+ }
1605
+ }
1606
+ return true;
1240
1607
  }
1241
1608
 
1242
- Query *spanoq_create(Query **clauses, int c_cnt)
1609
+ Query *spanoq_new()
1243
1610
  {
1244
- Query *self = q_create();
1611
+ Query *self = q_new(SpanOrQuery);
1612
+ SpOQ(self)->clauses = ALLOC_N(Query *, CLAUSE_INIT_CAPA);
1613
+ SpOQ(self)->c_capa = CLAUSE_INIT_CAPA;
1614
+
1615
+ SpQ(self)->field = (char *)EMPTY_STRING;
1616
+ SpQ(self)->get_spans = &spanoq_get_spans;
1617
+ SpQ(self)->get_terms = &spanoq_get_terms;
1245
1618
 
1246
- SpanQuery *sq = ALLOC(SpanQuery);
1619
+ self->type = SPAN_OR_QUERY;
1620
+ self->rewrite = &spanoq_rewrite;
1621
+ self->extract_terms = &spanoq_extract_terms;
1622
+ self->to_s = &spanoq_to_s;
1623
+ self->hash = &spanoq_hash;
1624
+ self->eq = &spanoq_eq;
1625
+ self->destroy_i = &spanoq_destroy_i;
1626
+ self->create_weight_i = &spanw_new;
1627
+ self->get_matchv_i = &spanq_get_matchv_i;
1247
1628
 
1248
- SpanOrQuery *soq = ALLOC(SpanOrQuery);
1249
- soq->clauses = clauses;
1250
- soq->c_cnt = c_cnt;
1251
- sq->data = soq;
1629
+ return self;
1630
+ }
1252
1631
 
1253
- sq->get_spans = &spanoq_get_spans;
1254
- sq->get_terms = &spanoq_get_terms;
1255
- sq->field = ((SpanQuery *)clauses[0]->data)->field;
1256
- self->data = sq;
1632
+ Query *spanoq_add_clause_nr(Query *self, Query *clause)
1633
+ {
1634
+ const int curr_index = SpOQ(self)->c_cnt++;
1635
+ if (clause->type < SPAN_TERM_QUERY || clause->type > SPAN_NEAR_QUERY) {
1636
+ RAISE(ARG_ERROR, "Tried to add a %s to a SpanOrQuery. This is not a "
1637
+ "SpanQuery.", q_get_query_name(clause->type));
1638
+ }
1639
+ if (curr_index == 0) {
1640
+ SpQ(self)->field = SpQ(clause)->field;
1641
+ }
1642
+ else if (strcmp(SpQ(self)->field, SpQ(clause)->field) != 0) {
1643
+ RAISE(ARG_ERROR, "All clauses in a SpanQuery must have the same field. "
1644
+ "Attempted to add a SpanQuery with field \"%s\" to a SpanOrQuery "
1645
+ "with field \"%s\"", SpQ(clause)->field, SpQ(self)->field);
1646
+ }
1647
+ if (curr_index >= SpOQ(self)->c_capa) {
1648
+ SpOQ(self)->c_capa <<= 1;
1649
+ REALLOC_N(SpOQ(self)->clauses, Query *, SpOQ(self)->c_capa);
1650
+ }
1651
+ SpOQ(self)->clauses[curr_index] = clause;
1652
+ return clause;
1653
+ }
1257
1654
 
1258
- self->type = SPAN_OR_QUERY;
1259
- self->rewrite = &spanoq_rewrite;
1260
- self->extract_terms = &spanoq_extract_terms;
1261
- self->to_s = &spanoq_to_s;
1262
- self->hash = &spanoq_hash;
1263
- self->eq = &spanoq_eq;
1264
- self->destroy_i = &spanoq_destroy;
1265
- self->create_weight_i = &spanw_create;
1266
- return self;
1655
+ Query *spanoq_add_clause(Query *self, Query *clause)
1656
+ {
1657
+ REF(clause);
1658
+ return spanoq_add_clause_nr(self, clause);
1267
1659
  }
1268
1660
 
1269
1661
  /*****************************************************************************
@@ -1272,163 +1664,188 @@ Query *spanoq_create(Query **clauses, int c_cnt)
1272
1664
  *
1273
1665
  *****************************************************************************/
1274
1666
 
1275
- char *spannq_to_s(Query *self, char *field)
1276
- {
1277
- SpanNearQuery *snq = (SpanNearQuery *)((SpanQuery *)self->data)->data;
1278
- char *res = estrdup("span_near(["), *q_str;
1279
- Query *clause;
1280
- int i;
1281
- for (i = 0; i < snq->c_cnt; i++) {
1282
- clause = snq->clauses[i];
1283
- q_str = clause->to_s(clause, field);
1284
- REALLOC_N(res, char, strlen(res) + strlen(q_str) + 10);
1285
- if (i > 0) strcat(res, ", ");
1286
- strcat(res, q_str);
1287
- free(q_str);
1288
- }
1289
- REALLOC_N(res, char, strlen(res) + 40);
1290
- sprintf(res + strlen(res), "], %d, %s)", snq->slop,
1291
- snq->in_order ? "Ordered" : "Unordered");
1667
+ static char *spannq_to_s(Query *self, const char *field)
1668
+ {
1669
+ int i;
1670
+ SpanNearQuery *snq = SpNQ(self);
1671
+ char *res, *res_p;
1672
+ char **q_strs = ALLOC_N(char *, snq->c_cnt);
1673
+ int len = 50;
1674
+ for (i = 0; i < snq->c_cnt; i++) {
1675
+ Query *clause = snq->clauses[i];
1676
+ q_strs[i] = clause->to_s(clause, field);
1677
+ len += strlen(q_strs[i]);
1678
+ }
1292
1679
 
1293
- return res;
1680
+ res_p = res = ALLOC_N(char, len);
1681
+ sprintf(res_p, "span_near[ ");
1682
+ res_p += strlen(res_p);
1683
+ for (i = 0; i < snq->c_cnt; i++) {
1684
+ sprintf(res_p, "%s, ", q_strs[i]);
1685
+ free(q_strs[i]);
1686
+ res_p += strlen(res_p);
1687
+ }
1688
+ free(q_strs);
1689
+
1690
+ sprintf(res_p - 2, " ]");
1691
+ return res;
1294
1692
  }
1295
1693
 
1296
- void spannq_extract_terms(Query *self, HashSet *terms)
1694
+ static void spannq_extract_terms(Query *self, HashSet *terms)
1297
1695
  {
1298
- SpanNearQuery *snq = (SpanNearQuery *)((SpanQuery *)self->data)->data;
1299
- Query *clause;
1300
- int i;
1301
- for (i = 0; i < snq->c_cnt; i++) {
1302
- clause = snq->clauses[i];
1303
- clause->extract_terms(clause, terms);
1304
- }
1696
+ SpanNearQuery *snq = SpNQ(self);
1697
+ int i;
1698
+ for (i = 0; i < snq->c_cnt; i++) {
1699
+ Query *clause = snq->clauses[i];
1700
+ clause->extract_terms(clause, terms);
1701
+ }
1305
1702
  }
1306
1703
 
1307
- HashSet *spannq_get_terms(Query *self)
1704
+ static HashSet *spannq_get_terms(Query *self)
1308
1705
  {
1309
- SpanNearQuery *snq = (SpanNearQuery *)((SpanQuery *)self->data)->data;
1310
- HashSet *terms = term_set_create();
1311
- Query *clause;
1312
- int i;
1313
- for (i = 0; i < snq->c_cnt; i++) {
1314
- clause = snq->clauses[i];
1315
- clause->extract_terms(clause, terms);
1316
- }
1706
+ SpanNearQuery *snq = SpNQ(self);
1707
+ HashSet *terms = hs_new_str(&free);
1708
+ int i;
1709
+ for (i = 0; i < snq->c_cnt; i++) {
1710
+ Query *clause = snq->clauses[i];
1711
+ HashSet *sub_terms = SpQ(clause)->get_terms(clause);
1712
+ hs_merge(terms, sub_terms);
1713
+ }
1317
1714
 
1318
- return terms;
1715
+ return terms;
1319
1716
  }
1320
1717
 
1321
- SpanEnum *spannq_get_spans(Query *self, IndexReader *ir)
1718
+ static SpanEnum *spannq_get_spans(Query *self, IndexReader *ir)
1322
1719
  {
1323
- SpanNearQuery *snq = (SpanNearQuery *)((SpanQuery *)self->data)->data;
1324
- Query *q;
1720
+ SpanNearQuery *snq = SpNQ(self);
1325
1721
 
1326
- if (snq->c_cnt == 1) {
1327
- q = snq->clauses[0];
1328
- return ((SpanQuery *)q->data)->get_spans(q, ir);
1329
- }
1722
+ if (snq->c_cnt == 1) {
1723
+ Query *q = snq->clauses[0];
1724
+ return SpQ(q)->get_spans(q, ir);
1725
+ }
1330
1726
 
1331
- return spanne_create(self, ir);
1727
+ return spanne_new(self, ir);
1332
1728
  }
1333
1729
 
1334
- Query *spannq_rewrite(Query *self, IndexReader *ir)
1730
+ static Query *spannq_rewrite(Query *self, IndexReader *ir)
1335
1731
  {
1336
- SpanNearQuery *snq = (SpanNearQuery *)((SpanQuery *)self->data)->data;
1337
- Query *clause, *rewritten;
1338
- int i;
1339
- for (i = 0; i < snq->c_cnt; i++) {
1340
- clause = snq->clauses[i];
1341
- rewritten = clause->rewrite(clause, ir);
1342
- if ((rewritten == clause) || self->destroy_all) q_deref(clause);
1343
- snq->clauses[i] = rewritten;
1344
- }
1732
+ SpanNearQuery *snq = SpNQ(self);
1733
+ int i;
1734
+ for (i = 0; i < snq->c_cnt; i++) {
1735
+ Query *clause = snq->clauses[i];
1736
+ Query *rewritten = clause->rewrite(clause, ir);
1737
+ q_deref(clause);
1738
+ snq->clauses[i] = rewritten;
1739
+ }
1345
1740
 
1346
- self->ref_cnt++;
1347
- return self;
1741
+ self->ref_cnt++;
1742
+ return self;
1348
1743
  }
1349
1744
 
1350
- void spannq_destroy(Query *self)
1745
+ static void spannq_destroy(Query *self)
1351
1746
  {
1352
- SpanQuery *sq = (SpanQuery *)self->data;
1353
- SpanNearQuery *snq = (SpanNearQuery *)sq->data;
1747
+ SpanNearQuery *snq = SpNQ(self);
1354
1748
 
1355
- if (self->destroy_all) {
1356
- Query *clause;
1357
1749
  int i;
1358
1750
  for (i = 0; i < snq->c_cnt; i++) {
1359
- clause = snq->clauses[i];
1360
- q_deref(clause);
1751
+ Query *clause = snq->clauses[i];
1752
+ q_deref(clause);
1361
1753
  }
1362
1754
  free(snq->clauses);
1363
- }
1364
1755
 
1365
- free(snq);
1366
- free(sq);
1367
- q_destroy_i(self);
1756
+ spanq_destroy_i(self);
1368
1757
  }
1369
1758
 
1370
- static uint spannq_hash(Query *self)
1759
+ static ulong spannq_hash(Query *self)
1371
1760
  {
1372
- int i;
1373
- uint hash = 0;
1374
- Query *q;
1375
- SpanNearQuery *snq = (SpanNearQuery *)((SpanQuery *)self->data)->data;
1761
+ int i;
1762
+ ulong hash = spanq_hash(self);
1763
+ SpanNearQuery *snq = SpNQ(self);
1376
1764
 
1377
- for (i = 0; i < snq->c_cnt; i++) {
1378
- q = snq->clauses[i];
1379
- hash ^= q->hash(q);
1380
- }
1381
- return ((hash ^ snq->slop) << 1) | snq->in_order;
1765
+ for (i = 0; i < snq->c_cnt; i++) {
1766
+ Query *q = snq->clauses[i];
1767
+ hash ^= q->hash(q);
1768
+ }
1769
+ return ((hash ^ snq->slop) << 1) | snq->in_order;
1382
1770
  }
1383
1771
 
1384
1772
  static int spannq_eq(Query *self, Query *o)
1385
1773
  {
1386
- int i;
1387
- Query *q1, *q2;
1388
- SpanNearQuery *snq1 = (SpanNearQuery *)((SpanQuery *)self->data)->data;
1389
- SpanNearQuery *snq2 = (SpanNearQuery *)((SpanQuery *)o->data)->data;
1390
- if (snq1->c_cnt != snq2->c_cnt ||
1391
- snq1->slop != snq2->slop ||
1392
- snq1->in_order != snq2->in_order) {
1393
- return false;
1394
- }
1774
+ int i;
1775
+ Query *q1, *q2;
1776
+ SpanNearQuery *snq1 = SpNQ(self);
1777
+ SpanNearQuery *snq2 = SpNQ(o);
1778
+ if (! spanq_eq(self, o)
1779
+ || (snq1->c_cnt != snq2->c_cnt)
1780
+ || (snq1->slop != snq2->slop)
1781
+ || (snq1->in_order != snq2->in_order)) {
1782
+ return false;
1783
+ }
1395
1784
 
1396
- for (i = 0; i < snq1->c_cnt; i++) {
1397
- q1 = snq1->clauses[i];
1398
- q2 = snq2->clauses[i];
1399
- if (!q1->eq(q1, q2)) return false;
1400
- }
1785
+ for (i = 0; i < snq1->c_cnt; i++) {
1786
+ q1 = snq1->clauses[i];
1787
+ q2 = snq2->clauses[i];
1788
+ if (!q1->eq(q1, q2)) {
1789
+ return false;
1790
+ }
1791
+ }
1401
1792
 
1402
- return true;
1793
+ return true;
1403
1794
  }
1404
1795
 
1405
- Query *spannq_create(Query **clauses, int c_cnt, int slop, bool in_order)
1796
+ Query *spannq_new(int slop, bool in_order)
1406
1797
  {
1407
- Query *self = q_create();
1798
+ Query *self = q_new(SpanNearQuery);
1408
1799
 
1409
- SpanQuery *sq = ALLOC(SpanQuery);
1800
+ SpNQ(self)->clauses = ALLOC_N(Query *, CLAUSE_INIT_CAPA);
1801
+ SpNQ(self)->c_capa = CLAUSE_INIT_CAPA;
1802
+ SpNQ(self)->slop = slop;
1803
+ SpNQ(self)->in_order = in_order;
1410
1804
 
1411
- SpanNearQuery *snq = ALLOC(SpanNearQuery);
1412
- snq->clauses = clauses;
1413
- snq->c_cnt = c_cnt;
1414
- snq->slop = slop;
1415
- snq->in_order = in_order;
1416
- sq->data = snq;
1805
+ SpQ(self)->get_spans = &spannq_get_spans;
1806
+ SpQ(self)->get_terms = &spannq_get_terms;
1807
+ SpQ(self)->field = (char *)EMPTY_STRING;
1417
1808
 
1418
- sq->get_spans = &spannq_get_spans;
1419
- sq->get_terms = &spannq_get_terms;
1420
- sq->field = ((SpanQuery *)clauses[0]->data)->field;
1421
- self->data = sq;
1809
+ self->type = SPAN_NEAR_QUERY;
1810
+ self->rewrite = &spannq_rewrite;
1811
+ self->extract_terms = &spannq_extract_terms;
1812
+ self->to_s = &spannq_to_s;
1813
+ self->hash = &spannq_hash;
1814
+ self->eq = &spannq_eq;
1815
+ self->destroy_i = &spannq_destroy;
1816
+ self->create_weight_i = &spanw_new;
1817
+ self->get_matchv_i = &spanq_get_matchv_i;
1422
1818
 
1423
- self->type = SPAN_NEAR_QUERY;
1424
- self->rewrite = &spannq_rewrite;
1425
- self->extract_terms = &spannq_extract_terms;
1426
- self->to_s = &spannq_to_s;
1427
- self->hash = &spannq_hash;
1428
- self->eq = &spannq_eq;
1429
- self->destroy_i = &spannq_destroy;
1430
- self->create_weight_i = &spanw_create;
1431
- return self;
1819
+ return self;
1820
+ }
1821
+
1822
+ Query *spannq_add_clause_nr(Query *self, Query *clause)
1823
+ {
1824
+ const int curr_index = SpNQ(self)->c_cnt++;
1825
+ if (clause->type < SPAN_TERM_QUERY || clause->type > SPAN_NEAR_QUERY) {
1826
+ RAISE(ARG_ERROR, "Tried to add a %s to a SpanNearQuery. This is not a "
1827
+ "SpanQuery.", q_get_query_name(clause->type));
1828
+ }
1829
+ if (curr_index == 0) {
1830
+ SpQ(self)->field = SpQ(clause)->field;
1831
+ }
1832
+ else if (strcmp(SpQ(self)->field, SpQ(clause)->field) != 0) {
1833
+ RAISE(ARG_ERROR, "All clauses in a SpanQuery must have the same field. "
1834
+ "Attempted to add a SpanQuery with field \"%s\" to SpanNearQuery "
1835
+ "with field \"%s\"", SpQ(clause)->field, SpQ(self)->field);
1836
+ }
1837
+ if (curr_index >= SpNQ(self)->c_capa) {
1838
+ SpNQ(self)->c_capa <<= 1;
1839
+ REALLOC_N(SpNQ(self)->clauses, Query *, SpNQ(self)->c_capa);
1840
+ }
1841
+ SpNQ(self)->clauses[curr_index] = clause;
1842
+ return clause;
1843
+ }
1844
+
1845
+ Query *spannq_add_clause(Query *self, Query *clause)
1846
+ {
1847
+ REF(clause);
1848
+ return spannq_add_clause_nr(self, clause);
1432
1849
  }
1433
1850
 
1434
1851
  /*****************************************************************************
@@ -1437,213 +1854,110 @@ Query *spannq_create(Query **clauses, int c_cnt, int slop, bool in_order)
1437
1854
  *
1438
1855
  *****************************************************************************/
1439
1856
 
1440
- char *spanxq_to_s(Query *self, char *field)
1857
+ static char *spanxq_to_s(Query *self, const char *field)
1441
1858
  {
1442
- SpanNotQuery *sxq = (SpanNotQuery *)((SpanQuery *)self->data)->data;
1443
- char *inc_s = sxq->inc->to_s(sxq->inc, field);
1444
- char *exc_s = sxq->exc->to_s(sxq->exc, field);
1445
- char *res = strfmt("span_not(inc:<%s>, exc:<%s>)", inc_s, exc_s);
1446
-
1447
- free(inc_s);
1448
- free(exc_s);
1449
- return res;
1859
+ SpanNotQuery *sxq = SpXQ(self);
1860
+ char *inc_s = sxq->inc->to_s(sxq->inc, field);
1861
+ char *exc_s = sxq->exc->to_s(sxq->exc, field);
1862
+ char *res = strfmt("span_not(inc:<%s>, exc:<%s>)", inc_s, exc_s);
1863
+
1864
+ free(inc_s);
1865
+ free(exc_s);
1866
+ return res;
1450
1867
  }
1451
1868
 
1452
- void spanxq_extract_terms(Query *self, HashSet *terms)
1869
+ static void spanxq_extract_terms(Query *self, HashSet *terms)
1453
1870
  {
1454
- SpanNotQuery *sxq = (SpanNotQuery *)((SpanQuery *)self->data)->data;
1455
- sxq->inc->extract_terms(sxq->inc, terms);
1871
+ SpXQ(self)->inc->extract_terms(SpXQ(self)->inc, terms);
1456
1872
  }
1457
1873
 
1458
- HashSet *spanxq_get_terms(Query *self)
1874
+ static HashSet *spanxq_get_terms(Query *self)
1459
1875
  {
1460
- SpanNotQuery *sxq = (SpanNotQuery *)((SpanQuery *)self->data)->data;
1461
- HashSet *terms = term_set_create();
1462
- sxq->inc->extract_terms(sxq->inc, terms);
1463
- return terms;
1876
+ return SpQ(SpXQ(self)->inc)->get_terms(SpXQ(self)->inc);
1464
1877
  }
1465
1878
 
1466
- Query *spanxq_rewrite(Query *self, IndexReader *ir)
1879
+ static Query *spanxq_rewrite(Query *self, IndexReader *ir)
1467
1880
  {
1468
- SpanNotQuery *sxq = (SpanNotQuery *)((SpanQuery *)self->data)->data;
1469
- Query *q, *rq;
1881
+ SpanNotQuery *sxq = SpXQ(self);
1882
+ Query *q, *rq;
1470
1883
 
1471
- /* rewrite inclusive query */
1472
- q = sxq->inc;
1473
- rq = q->rewrite(q, ir);
1474
- if (rq == q || self->destroy_all) q_deref(q);
1475
- sxq->inc = rq;
1884
+ /* rewrite inclusive query */
1885
+ q = sxq->inc;
1886
+ rq = q->rewrite(q, ir);
1887
+ q_deref(q);
1888
+ sxq->inc = rq;
1476
1889
 
1477
- /* rewrite exclusive query */
1478
- q = sxq->exc;
1479
- rq = q->rewrite(q, ir);
1480
- if (rq == q || self->destroy_all) q_deref(q);
1481
- sxq->exc = rq;
1890
+ /* rewrite exclusive query */
1891
+ q = sxq->exc;
1892
+ rq = q->rewrite(q, ir);
1893
+ q_deref(q);
1894
+ sxq->exc = rq;
1482
1895
 
1483
- self->ref_cnt++;
1484
- return self;
1896
+ self->ref_cnt++;
1897
+ return self;
1485
1898
  }
1486
1899
 
1487
- void spanxq_destroy(Query *self)
1900
+ static void spanxq_destroy(Query *self)
1488
1901
  {
1489
- SpanQuery *sq = (SpanQuery *)self->data;
1490
- SpanNotQuery *sxq = (SpanNotQuery *)sq->data;
1902
+ SpanNotQuery *sxq = SpXQ(self);
1491
1903
 
1492
- if (self->destroy_all) {
1493
1904
  q_deref(sxq->inc);
1494
1905
  q_deref(sxq->exc);
1495
- }
1496
1906
 
1497
- free(sxq);
1498
- free(sq);
1499
- q_destroy_i(self);
1907
+ spanq_destroy_i(self);
1500
1908
  }
1501
1909
 
1502
- static uint spanxq_hash(Query *self)
1910
+ static ulong spanxq_hash(Query *self)
1503
1911
  {
1504
- SpanNotQuery *sxq = (SpanNotQuery *)((SpanQuery *)self->data)->data;
1505
- return sxq->inc->hash(sxq->inc) ^ sxq->exc->hash(sxq->exc);
1912
+ SpanNotQuery *sxq = SpXQ(self);
1913
+ return spanq_hash(self) ^ sxq->inc->hash(sxq->inc)
1914
+ ^ sxq->exc->hash(sxq->exc);
1506
1915
  }
1507
1916
 
1508
1917
  static int spanxq_eq(Query *self, Query *o)
1509
1918
  {
1510
- SpanNotQuery *sxq1 = (SpanNotQuery *)((SpanQuery *)self->data)->data;
1511
- SpanNotQuery *sxq2 = (SpanNotQuery *)((SpanQuery *)o->data)->data;
1512
- return sxq1->inc->eq(sxq1->inc, sxq2->inc) &&
1513
- sxq1->exc->eq(sxq1->exc, sxq2->exc);
1514
- }
1515
-
1516
-
1517
- Query *spanxq_create(Query *inc, Query *exc)
1518
- {
1519
- Query *self = q_create();
1520
-
1521
- SpanQuery *sq = ALLOC(SpanQuery);
1522
-
1523
- SpanNotQuery *sxq = ALLOC(SpanNotQuery);
1524
- sxq->inc = inc;
1525
- sxq->exc = exc;
1526
- sq->data = sxq;
1527
-
1528
- sq->get_spans = &spanxe_create;
1529
- sq->get_terms = &spanxq_get_terms;
1530
- sq->field = ((SpanQuery *)inc->data)->field;
1531
- self->data = sq;
1532
-
1533
- self->type = SPAN_NOT_QUERY;
1534
- self->rewrite = &spanxq_rewrite;
1535
- self->extract_terms = &spanxq_extract_terms;
1536
- self->to_s = &spanxq_to_s;
1537
- self->hash = &spanxq_hash;
1538
- self->eq = &spanxq_eq;
1539
- self->destroy_i = &spanxq_destroy;
1540
- self->create_weight_i = &spanw_create;
1541
-
1542
- return self;
1543
- }
1544
-
1545
- /***************************************************************************
1546
- *
1547
- * SpanScorer
1548
- *
1549
- ***************************************************************************/
1550
-
1551
- float spansc_score(Scorer *self)
1552
- {
1553
- SpanScorer *spansc = (SpanScorer *)self->data;
1554
- float raw = sim_tf(spansc->sim, spansc->freq) * spansc->value;
1555
-
1556
- /* normalize */
1557
- return raw * sim_decode_norm(self->similarity, spansc->norms[self->doc]);
1919
+ SpanNotQuery *sxq1 = SpXQ(self);
1920
+ SpanNotQuery *sxq2 = SpXQ(o);
1921
+ return spanq_eq(self, o) && sxq1->inc->eq(sxq1->inc, sxq2->inc)
1922
+ && sxq1->exc->eq(sxq1->exc, sxq2->exc);
1558
1923
  }
1559
1924
 
1560
- bool spansc_next(Scorer *self)
1561
- {
1562
- SpanScorer *spansc = (SpanScorer *)self->data;
1563
- SpanEnum *se = spansc->spans;
1564
- int match_length;
1565
-
1566
- if (spansc->first_time) {
1567
- spansc->more = se->next(se);
1568
- spansc->first_time = false;
1569
- }
1570
-
1571
- if (!spansc->more) return false;
1572
-
1573
- spansc->freq = 0.0;
1574
- self->doc = se->doc(se);
1575
-
1576
- while (spansc->more && (self->doc == se->doc(se))) {
1577
- match_length = se->end(se) - se->start(se);
1578
- spansc->freq += sim_sloppy_freq(spansc->sim, match_length);
1579
- spansc->more = se->next(se);
1580
- }
1581
-
1582
- return (spansc->more || (spansc->freq != 0.0));
1583
- }
1584
1925
 
1585
- bool spansc_skip_to(Scorer *self, int target)
1926
+ Query *spanxq_new_nr(Query *inc, Query *exc)
1586
1927
  {
1587
- SpanScorer *spansc = (SpanScorer *)self->data;
1588
- SpanEnum *se = spansc->spans;
1589
-
1590
- spansc->more = se->skip_to(se, target);
1928
+ Query *self;
1929
+ if (strcmp(SpQ(inc)->field, SpQ(inc)->field) != 0) {
1930
+ RAISE(ARG_ERROR, "All clauses in a SpanQuery must have the same field. "
1931
+ "Attempted to add a SpanQuery with field \"%s\" along with a "
1932
+ "SpanQuery with field \"%s\" to an SpanNotQuery",
1933
+ SpQ(inc)->field, SpQ(exc)->field);
1934
+ }
1935
+ self = q_new(SpanNotQuery);
1591
1936
 
1592
- if (!spansc->more) return false;
1937
+ SpXQ(self)->inc = inc;
1938
+ SpXQ(self)->exc = exc;
1593
1939
 
1594
- spansc->freq = 0.0;
1595
- self->doc = se->doc(se);
1940
+ SpQ(self)->field = SpQ(inc)->field;
1941
+ SpQ(self)->get_spans = &spanxe_new;
1942
+ SpQ(self)->get_terms = &spanxq_get_terms;
1596
1943
 
1597
- while (spansc->more && (se->doc(se) == target)) {
1598
- spansc->freq += sim_sloppy_freq(spansc->sim, se->end(se) - se->start(se));
1599
- spansc->more = se->next(se);
1600
- }
1944
+ self->type = SPAN_NOT_QUERY;
1945
+ self->rewrite = &spanxq_rewrite;
1946
+ self->extract_terms = &spanxq_extract_terms;
1947
+ self->to_s = &spanxq_to_s;
1948
+ self->hash = &spanxq_hash;
1949
+ self->eq = &spanxq_eq;
1950
+ self->destroy_i = &spanxq_destroy;
1951
+ self->create_weight_i = &spanw_new;
1952
+ self->get_matchv_i = &spanq_get_matchv_i;
1601
1953
 
1602
- return (spansc->more || (spansc->freq != 0.0));
1954
+ return self;
1603
1955
  }
1604
1956
 
1605
- Explanation *spansc_explain(Scorer *self, int target)
1957
+ Query *spanxq_new(Query *inc, Query *exc)
1606
1958
  {
1607
- Explanation *tf_explanation;
1608
- SpanScorer *spansc = (SpanScorer *)self->data;
1609
- float phrase_freq;
1610
- self->skip_to(self, target);
1611
- phrase_freq = (self->doc == target) ? spansc->freq : (float)0.0;
1612
-
1613
- tf_explanation = expl_create(sim_tf(self->similarity, phrase_freq),
1614
- strfmt("tf(phrase_freq(%f)", phrase_freq));
1615
-
1616
- return tf_explanation;
1959
+ REF(inc);
1960
+ REF(exc);
1961
+ return spanxq_new_nr(inc, exc);
1617
1962
  }
1618
1963
 
1619
- void spansc_destroy(Scorer *self)
1620
- {
1621
- SpanScorer *spansc = (SpanScorer *)self->data;
1622
- if (spansc->spans) spansc->spans->destroy(spansc->spans);
1623
- scorer_destroy_i(self);
1624
- }
1625
-
1626
- Scorer *spansc_create(Weight *weight, IndexReader *ir)
1627
- {
1628
- Scorer *self = scorer_create(weight->similarity);
1629
- SpanScorer *spansc = ALLOC(SpanScorer);
1630
- SpanQuery *spanq = (SpanQuery *)weight->query->data;
1631
- ZEROSET(spansc, SpanScorer, 1);
1632
- spansc->first_time = true;
1633
- spansc->more = true;
1634
- spansc->spans = spanq->get_spans(weight->query, ir);
1635
- spansc->sim = weight->similarity;
1636
- spansc->norms = ir->get_norms(ir, spanq->field);
1637
- spansc->weight = weight;
1638
- spansc->value = weight->value;
1639
- spansc->freq = 0.0;
1640
-
1641
- self->data = spansc;
1642
-
1643
- self->score = &spansc_score;
1644
- self->next = &spansc_next;
1645
- self->skip_to = &spansc_skip_to;
1646
- self->explain = &spansc_explain;
1647
- self->destroy = &spansc_destroy;
1648
- return self;
1649
- }