ferret 0.9.6 → 0.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (295) hide show
  1. data/MIT-LICENSE +1 -1
  2. data/README +12 -24
  3. data/Rakefile +38 -54
  4. data/TODO +14 -17
  5. data/ext/analysis.c +982 -823
  6. data/ext/analysis.h +133 -76
  7. data/ext/array.c +96 -58
  8. data/ext/array.h +40 -13
  9. data/ext/bitvector.c +476 -118
  10. data/ext/bitvector.h +264 -22
  11. data/ext/compound_io.c +217 -229
  12. data/ext/defines.h +49 -0
  13. data/ext/document.c +107 -317
  14. data/ext/document.h +31 -65
  15. data/ext/except.c +81 -36
  16. data/ext/except.h +117 -55
  17. data/ext/extconf.rb +2 -9
  18. data/ext/ferret.c +211 -104
  19. data/ext/ferret.h +22 -11
  20. data/ext/filter.c +97 -82
  21. data/ext/fs_store.c +348 -367
  22. data/ext/global.c +226 -188
  23. data/ext/global.h +44 -26
  24. data/ext/hash.c +474 -391
  25. data/ext/hash.h +441 -68
  26. data/ext/hashset.c +124 -96
  27. data/ext/hashset.h +169 -20
  28. data/ext/helper.c +56 -5
  29. data/ext/helper.h +7 -0
  30. data/ext/inc/lang.h +29 -49
  31. data/ext/inc/threading.h +31 -0
  32. data/ext/ind.c +288 -278
  33. data/ext/ind.h +68 -0
  34. data/ext/index.c +5688 -0
  35. data/ext/index.h +663 -616
  36. data/ext/lang.h +29 -49
  37. data/ext/libstemmer.c +3 -3
  38. data/ext/mem_pool.c +84 -0
  39. data/ext/mem_pool.h +35 -0
  40. data/ext/posh.c +1006 -0
  41. data/ext/posh.h +1007 -0
  42. data/ext/priorityqueue.c +117 -194
  43. data/ext/priorityqueue.h +135 -39
  44. data/ext/q_boolean.c +1305 -1108
  45. data/ext/q_const_score.c +106 -93
  46. data/ext/q_filtered_query.c +138 -135
  47. data/ext/q_fuzzy.c +206 -242
  48. data/ext/q_match_all.c +94 -80
  49. data/ext/q_multi_term.c +663 -0
  50. data/ext/q_parser.c +667 -593
  51. data/ext/q_phrase.c +992 -555
  52. data/ext/q_prefix.c +72 -61
  53. data/ext/q_range.c +235 -210
  54. data/ext/q_span.c +1480 -1166
  55. data/ext/q_term.c +273 -246
  56. data/ext/q_wildcard.c +127 -114
  57. data/ext/r_analysis.c +1720 -711
  58. data/ext/r_index.c +3049 -0
  59. data/ext/r_qparser.c +433 -146
  60. data/ext/r_search.c +2934 -1993
  61. data/ext/r_store.c +372 -143
  62. data/ext/r_utils.c +941 -0
  63. data/ext/ram_store.c +330 -326
  64. data/ext/search.c +1291 -668
  65. data/ext/search.h +403 -702
  66. data/ext/similarity.c +91 -113
  67. data/ext/similarity.h +45 -30
  68. data/ext/sort.c +721 -484
  69. data/ext/stopwords.c +361 -273
  70. data/ext/store.c +556 -58
  71. data/ext/store.h +706 -126
  72. data/ext/tags +3578 -2780
  73. data/ext/term_vectors.c +352 -0
  74. data/ext/threading.h +31 -0
  75. data/ext/win32.h +54 -0
  76. data/lib/ferret.rb +5 -17
  77. data/lib/ferret/document.rb +130 -2
  78. data/lib/ferret/index.rb +577 -26
  79. data/lib/ferret/number_tools.rb +157 -0
  80. data/lib/ferret_version.rb +3 -0
  81. data/test/test_helper.rb +5 -13
  82. data/test/unit/analysis/tc_analyzer.rb +513 -1
  83. data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
  84. data/test/unit/index/tc_index.rb +183 -240
  85. data/test/unit/index/tc_index_reader.rb +312 -479
  86. data/test/unit/index/tc_index_writer.rb +397 -13
  87. data/test/unit/index/th_doc.rb +269 -206
  88. data/test/unit/query_parser/tc_query_parser.rb +40 -33
  89. data/test/unit/search/tc_filter.rb +59 -71
  90. data/test/unit/search/tc_fuzzy_query.rb +24 -16
  91. data/test/unit/search/tc_index_searcher.rb +23 -201
  92. data/test/unit/search/tc_multi_searcher.rb +78 -226
  93. data/test/unit/search/tc_search_and_sort.rb +93 -81
  94. data/test/unit/search/tc_sort.rb +23 -23
  95. data/test/unit/search/tc_sort_field.rb +7 -7
  96. data/test/unit/search/tc_spans.rb +51 -47
  97. data/test/unit/search/tm_searcher.rb +339 -0
  98. data/test/unit/store/tc_fs_store.rb +1 -1
  99. data/test/unit/store/tm_store_lock.rb +3 -3
  100. data/test/unit/tc_document.rb +81 -0
  101. data/test/unit/ts_analysis.rb +1 -1
  102. data/test/unit/ts_utils.rb +1 -1
  103. data/test/unit/utils/tc_bit_vector.rb +288 -0
  104. data/test/unit/utils/tc_number_tools.rb +117 -0
  105. data/test/unit/utils/tc_priority_queue.rb +106 -0
  106. metadata +140 -301
  107. data/CHANGELOG +0 -9
  108. data/ext/dummy.exe +0 -0
  109. data/ext/field.c +0 -408
  110. data/ext/frtio.h +0 -13
  111. data/ext/inc/except.h +0 -90
  112. data/ext/index_io.c +0 -382
  113. data/ext/index_rw.c +0 -2658
  114. data/ext/lang.c +0 -41
  115. data/ext/nix_io.c +0 -134
  116. data/ext/q_multi_phrase.c +0 -380
  117. data/ext/r_doc.c +0 -582
  118. data/ext/r_index_io.c +0 -1021
  119. data/ext/r_term.c +0 -219
  120. data/ext/term.c +0 -820
  121. data/ext/termdocs.c +0 -611
  122. data/ext/vector.c +0 -637
  123. data/ext/w32_io.c +0 -150
  124. data/lib/ferret/analysis.rb +0 -11
  125. data/lib/ferret/analysis/analyzers.rb +0 -112
  126. data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
  127. data/lib/ferret/analysis/token.rb +0 -100
  128. data/lib/ferret/analysis/token_filters.rb +0 -86
  129. data/lib/ferret/analysis/token_stream.rb +0 -26
  130. data/lib/ferret/analysis/tokenizers.rb +0 -112
  131. data/lib/ferret/analysis/word_list_loader.rb +0 -27
  132. data/lib/ferret/document/document.rb +0 -152
  133. data/lib/ferret/document/field.rb +0 -312
  134. data/lib/ferret/index/compound_file_io.rb +0 -338
  135. data/lib/ferret/index/document_writer.rb +0 -289
  136. data/lib/ferret/index/field_infos.rb +0 -279
  137. data/lib/ferret/index/fields_io.rb +0 -181
  138. data/lib/ferret/index/index.rb +0 -675
  139. data/lib/ferret/index/index_file_names.rb +0 -33
  140. data/lib/ferret/index/index_reader.rb +0 -503
  141. data/lib/ferret/index/index_writer.rb +0 -534
  142. data/lib/ferret/index/multi_reader.rb +0 -377
  143. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
  144. data/lib/ferret/index/segment_infos.rb +0 -130
  145. data/lib/ferret/index/segment_merge_info.rb +0 -49
  146. data/lib/ferret/index/segment_merge_queue.rb +0 -16
  147. data/lib/ferret/index/segment_merger.rb +0 -358
  148. data/lib/ferret/index/segment_reader.rb +0 -412
  149. data/lib/ferret/index/segment_term_enum.rb +0 -169
  150. data/lib/ferret/index/segment_term_vector.rb +0 -58
  151. data/lib/ferret/index/term.rb +0 -53
  152. data/lib/ferret/index/term_buffer.rb +0 -83
  153. data/lib/ferret/index/term_doc_enum.rb +0 -291
  154. data/lib/ferret/index/term_enum.rb +0 -52
  155. data/lib/ferret/index/term_info.rb +0 -37
  156. data/lib/ferret/index/term_infos_io.rb +0 -321
  157. data/lib/ferret/index/term_vector_offset_info.rb +0 -20
  158. data/lib/ferret/index/term_vectors_io.rb +0 -553
  159. data/lib/ferret/query_parser.rb +0 -312
  160. data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
  161. data/lib/ferret/search.rb +0 -50
  162. data/lib/ferret/search/boolean_clause.rb +0 -100
  163. data/lib/ferret/search/boolean_query.rb +0 -299
  164. data/lib/ferret/search/boolean_scorer.rb +0 -294
  165. data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
  166. data/lib/ferret/search/conjunction_scorer.rb +0 -99
  167. data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
  168. data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
  169. data/lib/ferret/search/explanation.rb +0 -41
  170. data/lib/ferret/search/field_cache.rb +0 -215
  171. data/lib/ferret/search/field_doc.rb +0 -31
  172. data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
  173. data/lib/ferret/search/filter.rb +0 -11
  174. data/lib/ferret/search/filtered_query.rb +0 -130
  175. data/lib/ferret/search/filtered_term_enum.rb +0 -79
  176. data/lib/ferret/search/fuzzy_query.rb +0 -154
  177. data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
  178. data/lib/ferret/search/hit_collector.rb +0 -34
  179. data/lib/ferret/search/hit_queue.rb +0 -11
  180. data/lib/ferret/search/index_searcher.rb +0 -200
  181. data/lib/ferret/search/match_all_query.rb +0 -104
  182. data/lib/ferret/search/multi_phrase_query.rb +0 -216
  183. data/lib/ferret/search/multi_searcher.rb +0 -261
  184. data/lib/ferret/search/multi_term_query.rb +0 -65
  185. data/lib/ferret/search/non_matching_scorer.rb +0 -22
  186. data/lib/ferret/search/phrase_positions.rb +0 -55
  187. data/lib/ferret/search/phrase_query.rb +0 -214
  188. data/lib/ferret/search/phrase_scorer.rb +0 -152
  189. data/lib/ferret/search/prefix_query.rb +0 -54
  190. data/lib/ferret/search/query.rb +0 -140
  191. data/lib/ferret/search/query_filter.rb +0 -51
  192. data/lib/ferret/search/range_filter.rb +0 -103
  193. data/lib/ferret/search/range_query.rb +0 -139
  194. data/lib/ferret/search/req_excl_scorer.rb +0 -125
  195. data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
  196. data/lib/ferret/search/score_doc.rb +0 -38
  197. data/lib/ferret/search/score_doc_comparator.rb +0 -114
  198. data/lib/ferret/search/scorer.rb +0 -91
  199. data/lib/ferret/search/similarity.rb +0 -278
  200. data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
  201. data/lib/ferret/search/sort.rb +0 -112
  202. data/lib/ferret/search/sort_comparator.rb +0 -60
  203. data/lib/ferret/search/sort_field.rb +0 -91
  204. data/lib/ferret/search/spans.rb +0 -12
  205. data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
  206. data/lib/ferret/search/spans/span_first_query.rb +0 -79
  207. data/lib/ferret/search/spans/span_near_query.rb +0 -108
  208. data/lib/ferret/search/spans/span_not_query.rb +0 -130
  209. data/lib/ferret/search/spans/span_or_query.rb +0 -176
  210. data/lib/ferret/search/spans/span_query.rb +0 -25
  211. data/lib/ferret/search/spans/span_scorer.rb +0 -74
  212. data/lib/ferret/search/spans/span_term_query.rb +0 -105
  213. data/lib/ferret/search/spans/span_weight.rb +0 -84
  214. data/lib/ferret/search/spans/spans_enum.rb +0 -44
  215. data/lib/ferret/search/term_query.rb +0 -128
  216. data/lib/ferret/search/term_scorer.rb +0 -183
  217. data/lib/ferret/search/top_docs.rb +0 -36
  218. data/lib/ferret/search/top_field_docs.rb +0 -17
  219. data/lib/ferret/search/weight.rb +0 -54
  220. data/lib/ferret/search/wildcard_query.rb +0 -26
  221. data/lib/ferret/search/wildcard_term_enum.rb +0 -61
  222. data/lib/ferret/stemmers.rb +0 -1
  223. data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
  224. data/lib/ferret/store.rb +0 -5
  225. data/lib/ferret/store/buffered_index_io.rb +0 -190
  226. data/lib/ferret/store/directory.rb +0 -141
  227. data/lib/ferret/store/fs_store.rb +0 -381
  228. data/lib/ferret/store/index_io.rb +0 -245
  229. data/lib/ferret/store/ram_store.rb +0 -286
  230. data/lib/ferret/utils.rb +0 -8
  231. data/lib/ferret/utils/bit_vector.rb +0 -123
  232. data/lib/ferret/utils/date_tools.rb +0 -138
  233. data/lib/ferret/utils/number_tools.rb +0 -91
  234. data/lib/ferret/utils/parameter.rb +0 -41
  235. data/lib/ferret/utils/priority_queue.rb +0 -120
  236. data/lib/ferret/utils/string_helper.rb +0 -47
  237. data/lib/ferret/utils/thread_local.rb +0 -28
  238. data/lib/ferret/utils/weak_key_hash.rb +0 -60
  239. data/lib/rferret.rb +0 -37
  240. data/rake_utils/code_statistics.rb +0 -106
  241. data/test/benchmark/tb_ram_store.rb +0 -76
  242. data/test/benchmark/tb_rw_vint.rb +0 -26
  243. data/test/functional/thread_safety_index_test.rb +0 -81
  244. data/test/functional/thread_safety_test.rb +0 -137
  245. data/test/longrunning/tc_numbertools.rb +0 -60
  246. data/test/longrunning/tm_store.rb +0 -19
  247. data/test/unit/analysis/ctc_analyzer.rb +0 -532
  248. data/test/unit/analysis/data/wordfile +0 -6
  249. data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
  250. data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
  251. data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
  252. data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
  253. data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
  254. data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
  255. data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
  256. data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
  257. data/test/unit/analysis/rtc_stop_filter.rb +0 -14
  258. data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
  259. data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
  260. data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
  261. data/test/unit/analysis/tc_token.rb +0 -25
  262. data/test/unit/document/rtc_field.rb +0 -28
  263. data/test/unit/document/tc_document.rb +0 -47
  264. data/test/unit/document/tc_field.rb +0 -98
  265. data/test/unit/index/rtc_compound_file_io.rb +0 -107
  266. data/test/unit/index/rtc_field_infos.rb +0 -127
  267. data/test/unit/index/rtc_fields_io.rb +0 -167
  268. data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
  269. data/test/unit/index/rtc_segment_infos.rb +0 -74
  270. data/test/unit/index/rtc_segment_term_docs.rb +0 -17
  271. data/test/unit/index/rtc_segment_term_enum.rb +0 -60
  272. data/test/unit/index/rtc_segment_term_vector.rb +0 -71
  273. data/test/unit/index/rtc_term_buffer.rb +0 -57
  274. data/test/unit/index/rtc_term_info.rb +0 -19
  275. data/test/unit/index/rtc_term_infos_io.rb +0 -192
  276. data/test/unit/index/rtc_term_vectors_io.rb +0 -108
  277. data/test/unit/index/tc_term.rb +0 -27
  278. data/test/unit/index/tc_term_voi.rb +0 -18
  279. data/test/unit/search/rtc_similarity.rb +0 -37
  280. data/test/unit/search/rtc_sort_field.rb +0 -14
  281. data/test/unit/search/tc_multi_searcher2.rb +0 -126
  282. data/test/unit/store/rtc_fs_store.rb +0 -62
  283. data/test/unit/store/rtc_ram_store.rb +0 -15
  284. data/test/unit/store/rtm_store.rb +0 -150
  285. data/test/unit/store/rtm_store_lock.rb +0 -2
  286. data/test/unit/ts_document.rb +0 -2
  287. data/test/unit/utils/rtc_bit_vector.rb +0 -73
  288. data/test/unit/utils/rtc_date_tools.rb +0 -50
  289. data/test/unit/utils/rtc_number_tools.rb +0 -59
  290. data/test/unit/utils/rtc_parameter.rb +0 -40
  291. data/test/unit/utils/rtc_priority_queue.rb +0 -62
  292. data/test/unit/utils/rtc_string_helper.rb +0 -21
  293. data/test/unit/utils/rtc_thread.rb +0 -61
  294. data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
  295. data/test/utils/number_to_spoken.rb +0 -132
data/ext/q_span.c CHANGED
@@ -1,807 +1,1038 @@
1
1
  #include <string.h>
2
+ #include <limits.h>
2
3
  #include "search.h"
4
+ #include "hashset.h"
3
5
 
4
- /*****************************************************************************
5
- *
6
- * NearSpanEnum
7
- *
8
- *****************************************************************************/
6
+ #define CLAUSE_INIT_CAPA 4
9
7
 
10
8
  /*****************************************************************************
11
9
  *
12
- * SpanWeight
10
+ * SpanQuery
13
11
  *
14
12
  *****************************************************************************/
15
13
 
16
- Explanation *spanw_explain(Weight *self, IndexReader *ir, int target)
17
- {
18
- Explanation *expl;
19
- Explanation *idf_expl1;
20
- Explanation *idf_expl2;
21
- Explanation *query_expl;
22
- Explanation *qnorm_expl;
23
- Explanation *field_expl;
24
- Explanation *tf_expl;
25
- Scorer *scorer;
26
- uchar *field_norms;
27
- float field_norm;
28
- Explanation *field_norm_expl;
29
-
30
- char *query_str = self->query->to_s(self->query, "");
31
- HashSet *terms = (HashSet *)self->data;
32
- char *field = ((SpanQuery *)self->query->data)->field;
33
- char *doc_freqs = NULL;
34
- size_t df_i = 0;
35
- int i;
36
- Term *t;
37
-
38
-
39
- for (i = 0; i < terms->size; i++) {
40
- t = (Term *)terms->elems[i];
41
- REALLOC_N(doc_freqs, char, df_i + strlen(t->text) + 23);
42
- sprintf(doc_freqs + df_i, "%s=%d, ", t->text, ir->doc_freq(ir, t));
43
- df_i = strlen(doc_freqs);
44
- }
45
- /* remove the ',' at the end of the string if it exists */
46
- if (terms->size > 0) {
47
- df_i -= 2;
48
- doc_freqs[df_i] = '\0';
49
- } else {
50
- doc_freqs = "";
51
- }
14
+ /***************************************************************************
15
+ * SpanQuery
16
+ ***************************************************************************/
52
17
 
53
- expl = expl_create(0.0,
54
- strfmt("weight(%s in %d), product of:", query_str, target));
18
+ #define SpQ(query) ((SpanQuery *)(query))
55
19
 
56
- /* We need two of these as it's included in both the query explanation
57
- * and the field explanation */
58
- idf_expl1 = expl_create(self->idf,
59
- strfmt("idf(%s: %s)", field, doc_freqs));
60
- idf_expl2 = expl_create(self->idf,
61
- strfmt("idf(%s: %s)", field, doc_freqs));
62
- if (terms->size > 0) {
63
- free(doc_freqs); /* only free if allocated */
64
- }
20
+ static ulong spanq_hash(Query *self)
21
+ {
22
+ return str_hash(SpQ(self)->field);
23
+ }
65
24
 
66
- /* explain query weight */
67
- query_expl = expl_create(0.0,
68
- strfmt("query_weight(%s), product of:", query_str));
25
+ static int spanq_eq(Query *self, Query *o)
26
+ {
27
+ return strcmp(SpQ(self)->field, SpQ(o)->field) == 0;
28
+ }
69
29
 
70
- if (self->query->boost != 1.0) {
71
- expl_add_detail(query_expl, expl_create(self->query->boost, estrdup("boost")));
72
- }
30
+ static void spanq_destroy_i(Query *self)
31
+ {
32
+ q_destroy_i(self);
33
+ }
73
34
 
74
- expl_add_detail(query_expl, idf_expl1);
35
+ static MatchVector *mv_to_term_mv(MatchVector *term_mv, MatchVector *full_mv,
36
+ HashSet *terms, TermVector *tv)
37
+ {
38
+ int i;
39
+ for (i = 0; i < terms->size; i++) {
40
+ char *term = (char *)terms->elems[i];
41
+ TVTerm *tv_term = tv_get_tv_term(tv, term);
42
+ if (tv_term) {
43
+ int j;
44
+ int m_idx = 0;
45
+ for (j = 0; j < tv_term->freq; j++) {
46
+ int pos = tv_term->positions[j];
47
+ for (; m_idx < full_mv->size; m_idx++) {
48
+ if (pos <= full_mv->matches[m_idx].end) {
49
+ if (pos >= full_mv->matches[m_idx].start) {
50
+ matchv_add(term_mv, pos, pos);
51
+ }
52
+ break;
53
+ }
54
+ }
55
+ }
56
+ }
57
+ }
75
58
 
76
- qnorm_expl = expl_create(self->qnorm, estrdup("query_norm"));
77
- expl_add_detail(query_expl, qnorm_expl);
59
+ return term_mv;
60
+ }
78
61
 
79
- query_expl->value = self->query->boost * idf_expl1->value * qnorm_expl->value;
62
+ /***************************************************************************
63
+ * TVTermDocEnum
64
+ * dummy TermDocEnum used by the highlighter to find matches
65
+ ***************************************************************************/
80
66
 
81
- expl_add_detail(expl, query_expl);
67
+ #define TV_TDE(tde) ((TVTermDocEnum *)(tde))
82
68
 
83
- /* explain field weight */
84
- field_expl = expl_create(0.0,
85
- strfmt("field_weight(%s:%s in %d), product of:", field, query_str, target));
86
- free(query_str);
69
+ typedef struct TVTermDocEnum
70
+ {
71
+ TermDocEnum super;
72
+ int doc;
73
+ int index;
74
+ int freq;
75
+ int *positions;
76
+ TermVector *tv;
77
+ } TVTermDocEnum;
87
78
 
88
- scorer = self->scorer(self, ir);
89
- tf_expl = scorer->explain(scorer, target);
90
- scorer->destroy(scorer);
91
- expl_add_detail(field_expl, tf_expl);
92
- expl_add_detail(field_expl, idf_expl2);
79
+ static void tv_tde_seek(TermDocEnum *tde, int field_num, const char *term)
80
+ {
81
+ TVTermDocEnum *tv_tde = TV_TDE(tde);
82
+ TVTerm *tv_term = tv_get_tv_term(tv_tde->tv, term);
83
+ (void)field_num;
84
+ if (tv_term) {
85
+ tv_tde->doc = -1;
86
+ tv_tde->index = 0;
87
+ tv_tde->freq = tv_term->freq;
88
+ tv_tde->positions = tv_term->positions;
89
+ }
90
+ else {
91
+ tv_tde->doc = INT_MAX;
92
+ }
93
+ }
94
+
95
+ static bool tv_tde_next(TermDocEnum *tde)
96
+ {
97
+ if (TV_TDE(tde)->doc == -1) {
98
+ TV_TDE(tde)->doc = 0;
99
+ return true;
100
+ }
101
+ else {
102
+ TV_TDE(tde)->doc = INT_MAX;
103
+ return false;
104
+ }
105
+ }
93
106
 
94
- field_norms = ir->get_norms(ir, field);
95
- field_norm = (field_norms
96
- ? sim_decode_norm(self->similarity, field_norms[target])
97
- : (float)0.0);
98
- field_norm_expl = expl_create(field_norm,
99
- strfmt("field_norm(field=%s, doc=%d)", field, target));
100
- expl_add_detail(field_expl, field_norm_expl);
107
+ static bool tv_tde_skip_to(TermDocEnum *tde, int doc_num)
108
+ {
109
+ if (doc_num == 0) {
110
+ TV_TDE(tde)->doc = 0;
111
+ return true;
112
+ }
113
+ else {
114
+ TV_TDE(tde)->doc = INT_MAX;
115
+ return false;
116
+ }
117
+ }
118
+
119
+ static int tv_tde_next_position(TermDocEnum *tde)
120
+ {
121
+ return TV_TDE(tde)->positions[TV_TDE(tde)->index++];
122
+ }
123
+
124
+ static int tv_tde_freq(TermDocEnum *tde)
125
+ {
126
+ return TV_TDE(tde)->freq;
127
+ }
128
+
129
+ static int tv_tde_doc_num(TermDocEnum *tde)
130
+ {
131
+ return TV_TDE(tde)->doc;
132
+ }
101
133
 
102
- field_expl->value = tf_expl->value * idf_expl2->value * field_norm_expl->value;
134
+ static TermDocEnum *spanq_ir_term_positions(IndexReader *ir)
135
+ {
136
+ TVTermDocEnum *tv_tde = ALLOC(TVTermDocEnum);
137
+ TermDocEnum *tde = (TermDocEnum *)tv_tde;
138
+ tv_tde->tv = (TermVector *)ir->store;
139
+ tde->seek = &tv_tde_seek;
140
+ tde->doc_num = &tv_tde_doc_num;
141
+ tde->freq = &tv_tde_freq;
142
+ tde->next = &tv_tde_next;
143
+ tde->skip_to = &tv_tde_skip_to;
144
+ tde->next_position = &tv_tde_next_position;
145
+ tde->close = (void (*)(TermDocEnum *tde))&free;
146
+
147
+ return tde;
148
+ }
103
149
 
104
- /* combine them */
105
- if (query_expl->value == 1.0) {
106
- expl_destoy(expl);
107
- return field_expl;
108
- } else {
109
- expl->value = (query_expl->value * field_expl->value);
110
- expl_add_detail(expl, field_expl);
111
- return expl;
112
- }
150
+ static MatchVector *spanq_get_matchv_i(Query *self, MatchVector *mv,
151
+ TermVector *tv)
152
+ {
153
+ if (strcmp(SpQ(self)->field, tv->field) == 0) {
154
+ SpanEnum *sp_enum;
155
+ IndexReader *ir = ALLOC(IndexReader);
156
+ MatchVector *full_mv = matchv_new();
157
+ HashSet *terms = SpQ(self)->get_terms(self);
158
+ ir->fis = fis_new(0, 0, 0);
159
+ fis_add_field(ir->fis, fi_new(tv->field, 0, 0, 0));
160
+ ir->store = (Store *)tv;
161
+ ir->term_positions = &spanq_ir_term_positions;
162
+ sp_enum = SpQ(self)->get_spans(self, ir);
163
+ while (sp_enum->next(sp_enum)) {
164
+ matchv_add(full_mv,
165
+ sp_enum->start(sp_enum),
166
+ sp_enum->end(sp_enum) - 1);
167
+ }
168
+ sp_enum->destroy(sp_enum);
169
+
170
+ fis_deref(ir->fis);
171
+ free(ir);
172
+
173
+ matchv_compact(full_mv);
174
+ mv_to_term_mv(mv, full_mv, terms, tv);
175
+ matchv_destroy(full_mv);
176
+ hs_destroy(terms);
177
+ }
178
+ return mv;
113
179
  }
114
180
 
115
- char *spanw_to_s(Weight *self)
181
+ /***************************************************************************
182
+ *
183
+ * SpanScorer
184
+ *
185
+ ***************************************************************************/
186
+
187
+ #define SpSc(scorer) ((SpanScorer *)(scorer))
188
+ typedef struct SpanScorer
189
+ {
190
+ Scorer super;
191
+ IndexReader *ir;
192
+ SpanEnum *spans;
193
+ Similarity *sim;
194
+ uchar *norms;
195
+ Weight *weight;
196
+ float value;
197
+ float freq;
198
+ bool first_time : 1;
199
+ bool more : 1;
200
+ } SpanScorer;
201
+
202
+ static float spansc_score(Scorer *self)
116
203
  {
117
- return strfmt("SpanWeight(%f)", self->value);
204
+ SpanScorer *spansc = SpSc(self);
205
+ float raw = sim_tf(spansc->sim, spansc->freq) * spansc->value;
206
+
207
+ /* normalize */
208
+ return raw * sim_decode_norm(self->similarity, spansc->norms[self->doc]);
118
209
  }
119
210
 
120
- void spanw_destroy(Weight *self)
211
+ static bool spansc_next(Scorer *self)
121
212
  {
122
- hs_destroy_all(self->data);
123
- w_destroy(self);
213
+ SpanScorer *spansc = SpSc(self);
214
+ SpanEnum *se = spansc->spans;
215
+ int match_length;
216
+
217
+ if (spansc->first_time) {
218
+ spansc->more = se->next(se);
219
+ spansc->first_time = false;
220
+ }
221
+
222
+ if (!spansc->more) {
223
+ return false;
224
+ }
225
+
226
+ spansc->freq = 0.0;
227
+ self->doc = se->doc(se);
228
+
229
+ while (spansc->more && (self->doc == se->doc(se))) {
230
+ match_length = se->end(se) - se->start(se);
231
+ spansc->freq += sim_sloppy_freq(spansc->sim, match_length);
232
+ spansc->more = se->next(se);
233
+ }
234
+
235
+ return (spansc->more || (spansc->freq != 0.0));
124
236
  }
125
237
 
126
- Weight *spanw_create(Query *query, Searcher *searcher)
238
+ static bool spansc_skip_to(Scorer *self, int target)
127
239
  {
128
- Weight *self = w_create(query);
129
- SpanQuery *spanq = (SpanQuery *)query->data;
130
- HashSet *terms = spanq->get_terms(query);
240
+ SpanScorer *spansc = SpSc(self);
241
+ SpanEnum *se = spansc->spans;
131
242
 
132
- self->data = terms;
133
- self->scorer = &spansc_create;
134
- self->explain = &spanw_explain;
135
- self->to_s = &spanw_to_s;
136
- self->destroy = &spanw_destroy;
137
- self->sum_of_squared_weights = &w_sum_of_squared_weights;
243
+ spansc->more = se->skip_to(se, target);
138
244
 
139
- self->similarity = query->get_similarity(query, searcher);
245
+ if (!spansc->more) {
246
+ return false;
247
+ }
140
248
 
141
- self->idf = sim_idf_phrase(self->similarity, (Term **)terms->elems,
142
- terms->size, searcher);
249
+ spansc->freq = 0.0;
250
+ self->doc = se->doc(se);
143
251
 
144
- return self;
252
+ while (spansc->more && (se->doc(se) == target)) {
253
+ spansc->freq += sim_sloppy_freq(spansc->sim, se->end(se) - se->start(se));
254
+ spansc->more = se->next(se);
255
+ }
256
+
257
+ return (spansc->more || (spansc->freq != 0.0));
258
+ }
259
+
260
+ static Explanation *spansc_explain(Scorer *self, int target)
261
+ {
262
+ Explanation *tf_explanation;
263
+ SpanScorer *spansc = SpSc(self);
264
+ float phrase_freq;
265
+ self->skip_to(self, target);
266
+ phrase_freq = (self->doc == target) ? spansc->freq : (float)0.0;
267
+
268
+ tf_explanation = expl_new(sim_tf(self->similarity, phrase_freq),
269
+ "tf(phrase_freq(%f)", phrase_freq);
270
+
271
+ return tf_explanation;
145
272
  }
146
273
 
274
+ static void spansc_destroy(Scorer *self)
275
+ {
276
+ SpanScorer *spansc = SpSc(self);
277
+ if (spansc->spans) {
278
+ spansc->spans->destroy(spansc->spans);
279
+ }
280
+ scorer_destroy_i(self);
281
+ }
282
+
283
+ Scorer *spansc_new(Weight *weight, IndexReader *ir)
284
+ {
285
+ Scorer *self = NULL;
286
+ const int field_num = fis_get_field_num(ir->fis, SpQ(weight->query)->field);
287
+ if (field_num >= 0) {
288
+ Query *spanq = weight->query;
289
+ self = scorer_new(SpanScorer, weight->similarity);
290
+
291
+ SpSc(self)->first_time = true;
292
+ SpSc(self)->more = true;
293
+ SpSc(self)->spans = SpQ(spanq)->get_spans(spanq, ir);
294
+ SpSc(self)->sim = weight->similarity;
295
+ SpSc(self)->norms = ir->get_norms(ir, field_num);
296
+ SpSc(self)->weight = weight;
297
+ SpSc(self)->value = weight->value;
298
+ SpSc(self)->freq = 0.0;
299
+
300
+ self->score = &spansc_score;
301
+ self->next = &spansc_next;
302
+ self->skip_to = &spansc_skip_to;
303
+ self->explain = &spansc_explain;
304
+ self->destroy = &spansc_destroy;
305
+ }
306
+ return self;
307
+ }
147
308
 
148
309
  /*****************************************************************************
149
- *
150
310
  * SpanTermEnum
151
- *
152
311
  *****************************************************************************/
153
312
 
154
- bool spante_next(SpanEnum *self)
313
+ #define SpTEn(span_enum) ((SpanTermEnum *)(span_enum))
314
+ #define SpTQ(query) ((SpanTermQuery *)(query))
315
+
316
+ typedef struct SpanTermEnum
317
+ {
318
+ SpanEnum super;
319
+ TermDocEnum *positions;
320
+ int position;
321
+ int doc;
322
+ int count;
323
+ int freq;
324
+ } SpanTermEnum;
325
+
326
+
327
+ static bool spante_next(SpanEnum *self)
155
328
  {
156
- SpanTermEnum *ste = (SpanTermEnum *)self->data;
157
- TermDocEnum *tde = ste->positions;
329
+ SpanTermEnum *ste = SpTEn(self);
330
+ TermDocEnum *tde = ste->positions;
158
331
 
159
- if (ste->count == ste->freq) {
160
- if (! tde->next(tde)) {
161
- ste->doc = INT_MAX;
162
- return false;
332
+ if (ste->count == ste->freq) {
333
+ if (! tde->next(tde)) {
334
+ ste->doc = INT_MAX;
335
+ return false;
336
+ }
337
+ ste->doc = tde->doc_num(tde);
338
+ ste->freq = tde->freq(tde);
339
+ ste->count = 0;
163
340
  }
164
- ste->doc = tde->doc_num(tde);
165
- ste->freq = tde->freq(tde);
166
- ste->count = 0;
167
- }
168
- ste->position = tde->next_position(tde);
169
- ste->count++;
170
- return true;
341
+ ste->position = tde->next_position(tde);
342
+ ste->count++;
343
+ return true;
171
344
  }
172
345
 
173
- bool spante_skip_to(SpanEnum *self, int target)
346
+ static bool spante_skip_to(SpanEnum *self, int target)
174
347
  {
175
- SpanTermEnum *ste = (SpanTermEnum *)self->data;
176
- TermDocEnum *tde = ste->positions;
348
+ SpanTermEnum *ste = SpTEn(self);
349
+ TermDocEnum *tde = ste->positions;
177
350
 
178
- /* are we already at the correct position? */
179
- if (ste->doc >= target) return true;
351
+ /* are we already at the correct position? */
352
+ if (ste->doc >= target) {
353
+ return true;
354
+ }
180
355
 
181
- if (! tde->skip_to(tde, target)) {
182
- ste->doc = INT_MAX;
183
- return false;
184
- }
356
+ if (! tde->skip_to(tde, target)) {
357
+ ste->doc = INT_MAX;
358
+ return false;
359
+ }
185
360
 
186
- ste->doc = tde->doc_num(tde);
187
- ste->freq = tde->freq(tde);
188
- ste->count = 0;
361
+ ste->doc = tde->doc_num(tde);
362
+ ste->freq = tde->freq(tde);
363
+ ste->count = 0;
189
364
 
190
- ste->position = tde->next_position(tde);
191
- ste->count++;
192
- return true;
365
+ ste->position = tde->next_position(tde);
366
+ ste->count++;
367
+ return true;
193
368
  }
194
369
 
195
- int spante_doc(SpanEnum *self)
370
+ static int spante_doc(SpanEnum *self)
196
371
  {
197
- SpanTermEnum *ste = (SpanTermEnum *)self->data;
198
- return ste->doc;
372
+ return SpTEn(self)->doc;
199
373
  }
200
374
 
201
- int spante_start(SpanEnum *self)
375
+ static int spante_start(SpanEnum *self)
202
376
  {
203
- SpanTermEnum *ste = (SpanTermEnum *)self->data;
204
- return ste->position;
377
+ return SpTEn(self)->position;
205
378
  }
206
379
 
207
- int spante_end(SpanEnum *self)
380
+ static int spante_end(SpanEnum *self)
208
381
  {
209
- SpanTermEnum *ste = (SpanTermEnum *)self->data;
210
- return ste->position + 1;
382
+ return SpTEn(self)->position + 1;
211
383
  }
212
384
 
213
- char *spante_to_s(SpanEnum *self)
385
+ static char *spante_to_s(SpanEnum *self)
214
386
  {
215
- char *field = ((SpanQuery *)self->query->data)->field;
216
- char *query_str = self->query->to_s(self->query, field);
217
- char pos_str[20];
218
- size_t len = strlen(query_str);
219
- int pos;
220
- char *str = ALLOC_N(char, len + 40);
387
+ char *field = SpQ(self->query)->field;
388
+ char *query_str = self->query->to_s(self->query, field);
389
+ char pos_str[20];
390
+ size_t len = strlen(query_str);
391
+ int pos;
392
+ char *str = ALLOC_N(char, len + 40);
221
393
 
222
- if (self->doc(self) < 0) {
223
- sprintf(pos_str, "START");
224
- } else {
225
- if (self->doc(self) == INT_MAX) {
226
- sprintf(pos_str, "END");
227
- } else {
228
- pos = ((SpanTermEnum *)self->data)->position;
229
- sprintf(pos_str, "%d", self->doc(self) - pos);
394
+ if (self->doc(self) < 0) {
395
+ sprintf(pos_str, "START");
230
396
  }
231
- }
232
- sprintf("SpanTermEnum(%s)@%s", query_str, pos_str);
233
- free(query_str);
234
- return str;
397
+ else {
398
+ if (self->doc(self) == INT_MAX) {
399
+ sprintf(pos_str, "END");
400
+ }
401
+ else {
402
+ pos = SpTEn(self)->position;
403
+ sprintf(pos_str, "%d", self->doc(self) - pos);
404
+ }
405
+ }
406
+ sprintf("SpanTermEnum(%s)@%s", query_str, pos_str);
407
+ free(query_str);
408
+ return str;
235
409
  }
236
410
 
237
- void spante_destroy(SpanEnum *self)
411
+ static void spante_destroy(SpanEnum *self)
238
412
  {
239
- SpanTermEnum *ste = (SpanTermEnum *)self->data;
240
- TermDocEnum *tde = ste->positions;
241
- tde->close(tde);
242
- free(ste);
243
- free(self);
413
+ TermDocEnum *tde = SpTEn(self)->positions;
414
+ tde->close(tde);
415
+ free(self);
244
416
  }
245
417
 
246
- SpanEnum *spante_create(Query *query, IndexReader *ir)
418
+ static SpanEnum *spante_new(Query *query, IndexReader *ir)
247
419
  {
248
- Term *term = (Term *)((SpanQuery *)query->data)->data;
249
- SpanEnum *self = ALLOC(SpanEnum);
420
+ char *term = SpTQ(query)->term;
421
+ char *field = SpQ(query)->field;
422
+ SpanEnum *self = (SpanEnum *)emalloc(sizeof(SpanTermEnum));
250
423
 
251
- SpanTermEnum *ste = ALLOC(SpanTermEnum);
252
- ste->positions = ir_term_positions_for(ir, term);
253
- ste->position = -1;
254
- ste->doc = -1;
255
- ste->count = 0;
256
- ste->freq = 0;
424
+ SpTEn(self)->positions = ir_term_positions_for(ir, field, term);
425
+ SpTEn(self)->position = -1;
426
+ SpTEn(self)->doc = -1;
427
+ SpTEn(self)->count = 0;
428
+ SpTEn(self)->freq = 0;
257
429
 
258
- self->data = ste;
430
+ self->query = query;
431
+ self->next = &spante_next;
432
+ self->skip_to = &spante_skip_to;
433
+ self->doc = &spante_doc;
434
+ self->start = &spante_start;
435
+ self->end = &spante_end;
436
+ self->destroy = &spante_destroy;
437
+ self->to_s = &spante_to_s;
259
438
 
260
- self->query = query;
261
- self->next = &spante_next;
262
- self->skip_to = &spante_skip_to;
263
- self->doc = &spante_doc;
264
- self->start = &spante_start;
265
- self->end = &spante_end;
266
- self->destroy = &spante_destroy;
267
- self->to_s = &spante_to_s;
268
-
269
- return self;
439
+ return self;
270
440
  }
271
441
 
272
442
 
273
443
  /*****************************************************************************
274
- *
275
444
  * SpanFirstEnum
276
- *
277
445
  *****************************************************************************/
278
446
 
279
- bool spanfe_next(SpanEnum *self)
447
+ #define SpFEn(span_enum) ((SpanFirstEnum *)(span_enum))
448
+ #define SpFQ(query) ((SpanFirstQuery *)(query))
449
+
450
+ typedef struct SpanFirstEnum
451
+ {
452
+ SpanEnum super;
453
+ SpanEnum *sub_enum;
454
+ } SpanFirstEnum;
455
+
456
+
457
+ static bool spanfe_next(SpanEnum *self)
280
458
  {
281
- SpanEnum *se = (SpanEnum *)(self->data);
282
- int end = ((SpanFirstQuery *)((SpanQuery *)self->query->data)->data)->end;
283
- while (se->next(se)) { /* scan to next match */
284
- if (se->end(se) <= end) return true;
285
- }
286
- return false;
459
+ SpanEnum *sub_enum = SpFEn(self)->sub_enum;
460
+ int end = SpFQ(self->query)->end;
461
+ while (sub_enum->next(sub_enum)) { /* scan to next match */
462
+ if (sub_enum->end(sub_enum) <= end) {
463
+ return true;
464
+ }
465
+ }
466
+ return false;
287
467
  }
288
468
 
289
- bool spanfe_skip_to(SpanEnum *self, int target)
469
+ static bool spanfe_skip_to(SpanEnum *self, int target)
290
470
  {
291
- SpanEnum *se = (SpanEnum *)(self->data);
292
- int end = ((SpanFirstQuery *)((SpanQuery *)self->query->data)->data)->end;
471
+ SpanEnum *sub_enum = SpFEn(self)->sub_enum;
472
+ int end = SpFQ(self->query)->end;
293
473
 
294
- if (! se->skip_to(se, target)) return false;
474
+ if (! sub_enum->skip_to(sub_enum, target)) {
475
+ return false;
476
+ }
295
477
 
296
- if (se->end(se) <= end) /* there is a match */
297
- return true;
478
+ if (sub_enum->end(sub_enum) <= end) { /* there is a match */
479
+ return true;
480
+ }
298
481
 
299
- return se->next(se); /* scan to next match */
482
+ return sub_enum->next(sub_enum); /* scan to next match */
300
483
  }
301
484
 
302
- int spanfe_doc(SpanEnum *self)
485
+ static int spanfe_doc(SpanEnum *self)
303
486
  {
304
- SpanEnum *se = (SpanEnum *)(self->data);
305
- return se->doc(se);
487
+ SpanEnum *sub_enum = SpFEn(self)->sub_enum;
488
+ return sub_enum->doc(sub_enum);
306
489
  }
307
490
 
308
- int spanfe_start(SpanEnum *self)
491
+ static int spanfe_start(SpanEnum *self)
309
492
  {
310
- SpanEnum *se = (SpanEnum *)(self->data);
311
- return se->start(se);
493
+ SpanEnum *sub_enum = SpFEn(self)->sub_enum;
494
+ return sub_enum->start(sub_enum);
312
495
  }
313
496
 
314
- int spanfe_end(SpanEnum *self)
497
+ static int spanfe_end(SpanEnum *self)
315
498
  {
316
- SpanEnum *se = (SpanEnum *)(self->data);
317
- return se->end(se);
499
+ SpanEnum *sub_enum = SpFEn(self)->sub_enum;
500
+ return sub_enum->end(sub_enum);
318
501
  }
319
502
 
320
- char *spanfe_to_s(SpanEnum *self)
503
+ static char *spanfe_to_s(SpanEnum *self)
321
504
  {
322
- char *field = ((SpanQuery *)self->query->data)->field;
323
- char *query_str = self->query->to_s(self->query, field);
324
- char *res = strfmt("SpanFirstEnum(%s)", query_str);
325
- free(query_str);
326
- return res;
505
+ char *field = SpQ(self->query)->field;
506
+ char *query_str = self->query->to_s(self->query, field);
507
+ char *res = strfmt("SpanFirstEnum(%s)", query_str);
508
+ free(query_str);
509
+ return res;
327
510
  }
328
511
 
329
- void spanfe_destroy(SpanEnum *self)
512
+ static void spanfe_destroy(SpanEnum *self)
330
513
  {
331
- SpanEnum *se = (SpanEnum *)self->data;
332
- se->destroy(se);
333
- free(self);
514
+ SpanEnum *sub_enum = SpFEn(self)->sub_enum;
515
+ sub_enum->destroy(sub_enum);
516
+ free(self);
334
517
  }
335
518
 
336
- SpanEnum *spanfe_create(Query *query, IndexReader *ir)
519
+ static SpanEnum *spanfe_new(Query *query, IndexReader *ir)
337
520
  {
338
- SpanEnum *self = ALLOC(SpanEnum);
339
- SpanFirstQuery *sfq = (SpanFirstQuery *)((SpanQuery *)query->data)->data;
521
+ SpanEnum *self = (SpanEnum *)emalloc(sizeof(SpanFirstEnum));
522
+ SpanFirstQuery *sfq = SpFQ(query);
340
523
 
341
- self->data = ((SpanQuery *)sfq->match->data)->get_spans(sfq->match, ir);
524
+ SpFEn(self)->sub_enum = SpQ(sfq->match)->get_spans(sfq->match, ir);
342
525
 
343
- self->query = query;
344
- self->next = &spanfe_next;
345
- self->skip_to = &spanfe_skip_to;
346
- self->doc = &spanfe_doc;
347
- self->start = &spanfe_start;
348
- self->end = &spanfe_end;
349
- self->destroy = &spanfe_destroy;
350
- self->to_s = &spanfe_to_s;
526
+ self->query = query;
527
+ self->next = &spanfe_next;
528
+ self->skip_to = &spanfe_skip_to;
529
+ self->doc = &spanfe_doc;
530
+ self->start = &spanfe_start;
531
+ self->end = &spanfe_end;
532
+ self->destroy = &spanfe_destroy;
533
+ self->to_s = &spanfe_to_s;
351
534
 
352
- return self;
535
+ return self;
353
536
  }
354
537
 
355
538
 
356
539
  /*****************************************************************************
357
- *
358
540
  * SpanOrEnum
359
- *
360
541
  *****************************************************************************/
361
542
 
362
- bool span_less_than(void *p1, void *p2)
543
+ #define SpOEn(span_enum) ((SpanOrEnum *)(span_enum))
544
+ #define SpOQ(query) ((SpanOrQuery *)(query))
545
+
546
+ typedef struct SpanOrEnum
547
+ {
548
+ SpanEnum super;
549
+ PriorityQueue *queue;
550
+ SpanEnum **span_enums;
551
+ int s_cnt;
552
+ bool first_time : 1;
553
+ } SpanOrEnum;
554
+
555
+
556
+ static bool span_less_than(SpanEnum *s1, SpanEnum *s2)
363
557
  {
364
- SpanEnum *s1 = (SpanEnum *)p1;
365
- SpanEnum *s2 = (SpanEnum *)p2;
366
- int doc_diff, start_diff;
367
- doc_diff = s1->doc(s1) - s2->doc(s2);
368
- if (doc_diff == 0) {
369
- start_diff = s1->start(s1) - s2->start(s2);
370
- if (start_diff == 0) {
371
- return s1->end(s1) < s2->end(s2);
372
- } else {
373
- return start_diff < 0;
558
+ int doc_diff, start_diff;
559
+ doc_diff = s1->doc(s1) - s2->doc(s2);
560
+ if (doc_diff == 0) {
561
+ start_diff = s1->start(s1) - s2->start(s2);
562
+ if (start_diff == 0) {
563
+ return s1->end(s1) < s2->end(s2);
564
+ }
565
+ else {
566
+ return start_diff < 0;
567
+ }
568
+ }
569
+ else {
570
+ return doc_diff < 0;
374
571
  }
375
- } else {
376
- return doc_diff < 0;
377
- }
378
572
  }
379
573
 
380
- bool spanoe_next(SpanEnum *self)
574
+ static bool spanoe_next(SpanEnum *self)
381
575
  {
382
- SpanOrEnum *soe = (SpanOrEnum *)self->data;
383
- SpanEnum *se;
384
- int i;
576
+ SpanOrEnum *soe = SpOEn(self);
577
+ SpanEnum *se;
578
+ int i;
385
579
 
386
- if (soe->first_time) { /* first time -- initialize */
387
- for (i = 0; i < soe->s_cnt; i++) {
388
- se = soe->span_enums[i];
389
- if (se->next(se)) /* move to first entry */
390
- pq_push(soe->queue, se);
580
+ if (soe->first_time) { /* first time -- initialize */
581
+ for (i = 0; i < soe->s_cnt; i++) {
582
+ se = soe->span_enums[i];
583
+ if (se->next(se)) { /* move to first entry */
584
+ pq_push(soe->queue, se);
585
+ }
586
+ }
587
+ soe->first_time = false;
588
+ return soe->queue->size != 0;
391
589
  }
392
- soe->first_time = false;
393
- return soe->queue->count != 0;
394
- }
395
590
 
396
- if (soe->queue->count == 0) return false; /* all done */
591
+ if (soe->queue->size == 0) {
592
+ return false; /* all done */
593
+ }
397
594
 
398
- se = (SpanEnum *)pq_top(soe->queue);
399
- if (se->next(se)) { /* move to next */
400
- pq_down(soe->queue);
401
- return true;
402
- }
595
+ se = (SpanEnum *)pq_top(soe->queue);
596
+ if (se->next(se)) { /* move to next */
597
+ pq_down(soe->queue);
598
+ return true;
599
+ }
403
600
 
404
- pq_pop(soe->queue); /* exhausted a clause */
601
+ pq_pop(soe->queue); /* exhausted a clause */
405
602
 
406
- return soe->queue->count != 0;
603
+ return soe->queue->size != 0;
407
604
  }
408
605
 
409
- bool spanoe_skip_to(SpanEnum *self, int target)
606
+ static bool spanoe_skip_to(SpanEnum *self, int target)
410
607
  {
411
- SpanOrEnum *soe = (SpanOrEnum *)self->data;
412
- SpanEnum *se;
413
- int i;
608
+ SpanOrEnum *soe = SpOEn(self);
609
+ SpanEnum *se;
610
+ int i;
414
611
 
415
- if (soe->first_time) { /* first time -- initialize */
416
- for (i = 0; i < soe->s_cnt; i++) {
417
- se = soe->span_enums[i];
418
- if (se->skip_to(se, target)) /* move to target */
419
- pq_push(soe->queue, se);
420
- }
421
- soe->first_time = false;
422
- } else {
423
- while ((soe->queue->count != 0) &&
424
- ((se=(SpanEnum *)pq_top(soe->queue))->doc(se) < target)) {
425
- if (se->skip_to(se, target)) {
426
- pq_down(soe->queue);
427
- } else {
428
- pq_pop(soe->queue);
429
- }
612
+ if (soe->first_time) { /* first time -- initialize */
613
+ for (i = 0; i < soe->s_cnt; i++) {
614
+ se = soe->span_enums[i];
615
+ if (se->skip_to(se, target)) {/* move to target */
616
+ pq_push(soe->queue, se);
617
+ }
618
+ }
619
+ soe->first_time = false;
620
+ }
621
+ else {
622
+ while ((soe->queue->size != 0) &&
623
+ ((se = (SpanEnum *)pq_top(soe->queue))->doc(se) < target)) {
624
+ if (se->skip_to(se, target)) {
625
+ pq_down(soe->queue);
626
+ }
627
+ else {
628
+ pq_pop(soe->queue);
629
+ }
630
+ }
430
631
  }
431
- }
432
632
 
433
- return soe->queue->count != 0;
633
+ return soe->queue->size != 0;
434
634
  }
435
635
 
436
- #define GET_TOP_SOE SpanOrEnum *soe = (SpanOrEnum *)self->data;\
437
- SpanEnum *se = (SpanEnum *)pq_top(soe->queue)
438
- int spanoe_doc(SpanEnum *self)
636
+ #define SpOEn_Top_SE(self) (SpanEnum *)pq_top(SpOEn(self)->queue)
637
+
638
+ static int spanoe_doc(SpanEnum *self)
439
639
  {
440
- GET_TOP_SOE;
441
- return se->doc(se);
640
+ SpanEnum *se = SpOEn_Top_SE(self);
641
+ return se->doc(se);
442
642
  }
443
643
 
444
- int spanoe_start(SpanEnum *self)
644
+ static int spanoe_start(SpanEnum *self)
445
645
  {
446
- GET_TOP_SOE;
447
- return se->start(se);
646
+ SpanEnum *se = SpOEn_Top_SE(self);
647
+ return se->start(se);
448
648
  }
449
649
 
450
- int spanoe_end(SpanEnum *self)
650
+ static int spanoe_end(SpanEnum *self)
451
651
  {
452
- GET_TOP_SOE;
453
- return se->end(se);
652
+ SpanEnum *se = SpOEn_Top_SE(self);
653
+ return se->end(se);
454
654
  }
455
655
 
456
- char *spanoe_to_s(SpanEnum *self)
656
+ static char *spanoe_to_s(SpanEnum *self)
457
657
  {
458
- SpanOrEnum *soe = (SpanOrEnum *)self->data;
459
- char *field = ((SpanQuery *)self->query->data)->field;
460
- char *query_str = self->query->to_s(self->query, field);
461
- char doc_str[62];
462
- size_t len = strlen(query_str);
463
- char *str = ALLOC_N(char, len + 80);
658
+ SpanOrEnum *soe = SpOEn(self);
659
+ char *field = SpQ(self->query)->field;
660
+ char *query_str = self->query->to_s(self->query, field);
661
+ char doc_str[62];
662
+ size_t len = strlen(query_str);
663
+ char *str = ALLOC_N(char, len + 80);
464
664
 
465
- if (soe->first_time) {
466
- sprintf(doc_str, "START");
467
- } else {
468
- if (soe->queue->count == 0) {
469
- sprintf(doc_str, "END");
470
- } else {
471
- sprintf(doc_str, "%d:%d-%d", self->doc(self),
472
- self->start(self), self->end(self));
665
+ if (soe->first_time) {
666
+ sprintf(doc_str, "START");
473
667
  }
474
- }
475
- sprintf("SpanOrEnum(%s)@%s", query_str, doc_str);
476
- free(query_str);
477
- return str;
668
+ else {
669
+ if (soe->queue->size == 0) {
670
+ sprintf(doc_str, "END");
671
+ }
672
+ else {
673
+ sprintf(doc_str, "%d:%d-%d", self->doc(self),
674
+ self->start(self), self->end(self));
675
+ }
676
+ }
677
+ sprintf("SpanOrEnum(%s)@%s", query_str, doc_str);
678
+ free(query_str);
679
+ return str;
478
680
  }
479
681
 
480
- void spanoe_destroy(SpanEnum *self)
682
+ static void spanoe_destroy(SpanEnum *self)
481
683
  {
482
- SpanEnum *se;
483
- SpanOrEnum *soe = (SpanOrEnum *)self->data;
484
- int i;
485
- pq_destroy(soe->queue);
486
- for (i = 0; i < soe->s_cnt; i++) {
487
- se = soe->span_enums[i];
488
- se->destroy(se);
489
- }
490
- free(soe->span_enums);
491
- free(soe);
492
- free(self);
684
+ SpanEnum *se;
685
+ SpanOrEnum *soe = SpOEn(self);
686
+ int i;
687
+ pq_destroy(soe->queue);
688
+ for (i = 0; i < soe->s_cnt; i++) {
689
+ se = soe->span_enums[i];
690
+ se->destroy(se);
691
+ }
692
+ free(soe->span_enums);
693
+ free(self);
493
694
  }
494
695
 
495
- SpanEnum *spanoe_create(Query *query, IndexReader *ir)
696
+ SpanEnum *spanoe_new(Query *query, IndexReader *ir)
496
697
  {
497
- Query *clause;
498
- SpanEnum *self = ALLOC(SpanEnum);
499
- SpanOrQuery *soq = (SpanOrQuery *)((SpanQuery *)query->data)->data;
500
- SpanOrEnum *soe = ALLOC(SpanOrEnum);
501
- int i;
502
- soe->first_time = true;
503
- soe->s_cnt = soq->c_cnt;
504
- soe->span_enums = ALLOC_N(SpanEnum *, soe->s_cnt);
505
- for (i = 0; i < soe->s_cnt; i++) {
506
- clause = soq->clauses[i];
507
- soe->span_enums[i] = ((SpanQuery *)clause->data)->get_spans(clause, ir);
508
- }
509
-
510
- soe->queue = pq_create(soe->s_cnt, &span_less_than);
698
+ Query *clause;
699
+ SpanEnum *self = (SpanEnum *)emalloc(sizeof(SpanOrEnum));
700
+ SpanOrQuery *soq = SpOQ(query);
701
+ int i;
702
+
703
+ SpOEn(self)->first_time = true;
704
+ SpOEn(self)->s_cnt = soq->c_cnt;
705
+ SpOEn(self)->span_enums = ALLOC_N(SpanEnum *, SpOEn(self)->s_cnt);
511
706
 
512
- self->data = soe;
707
+ for (i = 0; i < SpOEn(self)->s_cnt; i++) {
708
+ clause = soq->clauses[i];
709
+ SpOEn(self)->span_enums[i] = SpQ(clause)->get_spans(clause, ir);
710
+ }
711
+
712
+ SpOEn(self)->queue = pq_new(SpOEn(self)->s_cnt, (lt_ft)&span_less_than,
713
+ (free_ft)NULL);
513
714
 
514
- self->query = query;
515
- self->next = &spanoe_next;
516
- self->skip_to = &spanoe_skip_to;
517
- self->doc = &spanoe_doc;
518
- self->start = &spanoe_start;
519
- self->end = &spanoe_end;
520
- self->destroy = &spanoe_destroy;
521
- self->to_s = &spanoe_to_s;
715
+ self->query = query;
716
+ self->next = &spanoe_next;
717
+ self->skip_to = &spanoe_skip_to;
718
+ self->doc = &spanoe_doc;
719
+ self->start = &spanoe_start;
720
+ self->end = &spanoe_end;
721
+ self->destroy = &spanoe_destroy;
722
+ self->to_s = &spanoe_to_s;
522
723
 
523
- return self;
724
+ return self;
524
725
  }
525
726
 
526
727
  /*****************************************************************************
527
- *
528
728
  * SpanNearEnum
529
- *
530
729
  *****************************************************************************/
531
730
 
532
- #define SNE_NEXT() do {\
533
- sne->current = (sne->current+1) % sne->s_cnt;\
534
- se = sne->span_enums[sne->current];\
731
+ #define SpNEn(span_enum) ((SpanNearEnum *)(span_enum))
732
+ #define SpNQ(query) ((SpanNearQuery *)(query))
733
+
734
+ typedef struct SpanNearEnum
735
+ {
736
+ SpanEnum super;
737
+ SpanEnum **span_enums;
738
+ int s_cnt;
739
+ int slop;
740
+ int current;
741
+ int doc;
742
+ int start;
743
+ int end;
744
+ bool first_time : 1;
745
+ bool in_order : 1;
746
+ } SpanNearEnum;
747
+
748
+
749
+ #define SpNEn_NEXT() do {\
750
+ sne->current = (sne->current+1) % sne->s_cnt;\
751
+ se = sne->span_enums[sne->current];\
535
752
  } while (0);
536
753
 
537
- bool sne_init(SpanNearEnum *sne)
754
+ static bool sne_init(SpanNearEnum *sne)
538
755
  {
539
- SpanEnum *se = sne->span_enums[sne->current];
540
- int prev_doc = se->doc(se);
541
- int i;
756
+ SpanEnum *se = sne->span_enums[sne->current];
757
+ int prev_doc = se->doc(se);
758
+ int i;
542
759
 
543
- for (i = 1; i < sne->s_cnt; i++) {
544
- SNE_NEXT();
545
- if (!se->skip_to(se, prev_doc)) return false;
546
- prev_doc = se->doc(se);
547
- }
548
- return true;
760
+ for (i = 1; i < sne->s_cnt; i++) {
761
+ SpNEn_NEXT();
762
+ if (!se->skip_to(se, prev_doc)) {
763
+ return false;
764
+ }
765
+ prev_doc = se->doc(se);
766
+ }
767
+ return true;
549
768
  }
550
769
 
551
- bool sne_goto_next_doc(SpanNearEnum *sne)
770
+ static bool sne_goto_next_doc(SpanNearEnum *sne)
552
771
  {
553
- SpanEnum *se = sne->span_enums[sne->current];
554
- int prev_doc = se->doc(se);
772
+ SpanEnum *se = sne->span_enums[sne->current];
773
+ int prev_doc = se->doc(se);
555
774
 
556
- SNE_NEXT();
775
+ SpNEn_NEXT();
557
776
 
558
- while (se->doc(se) < prev_doc) {
559
- if (! se->skip_to(se, prev_doc)) return false;
560
- prev_doc = se->doc(se);
561
- SNE_NEXT();
562
- }
563
- return true;
777
+ while (se->doc(se) < prev_doc) {
778
+ if (! se->skip_to(se, prev_doc)) {
779
+ return false;
780
+ }
781
+ prev_doc = se->doc(se);
782
+ SpNEn_NEXT();
783
+ }
784
+ return true;
564
785
  }
565
786
 
566
- bool sne_next_unordered_match(SpanEnum *self)
787
+ static bool sne_next_unordered_match(SpanEnum *self)
567
788
  {
568
- SpanNearEnum *sne = (SpanNearEnum *)self->data;
569
- SpanEnum *se, *min_se = NULL;
570
- int i;
571
- int max_end, end, min_start, start, doc;
572
- int lengths_sum;
789
+ SpanNearEnum *sne = SpNEn(self);
790
+ SpanEnum *se, *min_se = NULL;
791
+ int i;
792
+ int max_end, end, min_start, start, doc;
793
+ int lengths_sum;
794
+
795
+ while (true) {
796
+ max_end = 0;
797
+ min_start = INT_MAX;
798
+ lengths_sum = 0;
799
+
800
+ for (i = 0; i < sne->s_cnt; i++) {
801
+ se = sne->span_enums[i];
802
+ if ((end=se->end(se)) > max_end) {
803
+ max_end = end;
804
+ }
805
+ if ((start=se->start(se)) < min_start) {
806
+ min_start = start;
807
+ min_se = se;
808
+ sne->current = i; /* current should point to the minimum span */
809
+ }
810
+ lengths_sum += end - start;
811
+ }
573
812
 
574
- while (true) {
813
+ if ((max_end - min_start - lengths_sum) <= sne->slop) {
814
+ /* we have a match */
815
+ sne->start = min_start;
816
+ sne->end = max_end;
817
+ sne->doc = min_se->doc(min_se);
818
+ return true;
819
+ }
575
820
 
576
- max_end = 0;
577
- min_start = INT_MAX;
578
- lengths_sum = 0;
821
+ /* increment the minimum span_enum and try again */
822
+ doc = min_se->doc(min_se);
823
+ if (!min_se->next(min_se)) {
824
+ return false;
825
+ }
826
+ if (doc < min_se->doc(min_se)) {
827
+ if (!sne_goto_next_doc(sne)) return false;
828
+ }
829
+ }
830
+ }
579
831
 
580
- for (i = 0; i < sne->s_cnt; i++) {
581
- se = sne->span_enums[i];
582
- if ((end=se->end(se)) > max_end) max_end = end;
583
- if ((start=se->start(se)) < min_start) {
584
- min_start = start;
585
- min_se = se;
586
- sne->current = i; /* current should point to the minimum span */
587
- }
588
- lengths_sum += end - start;
589
- }
590
-
591
- if ((max_end - min_start - lengths_sum) <= sne->slop) {
592
- /* we have a match */
593
- sne->start = min_start;
594
- sne->end = max_end;
595
- sne->doc = min_se->doc(min_se);
596
- return true;
597
- }
598
-
599
- /* increment the minimum span_enum and try again */
600
- doc = min_se->doc(min_se);
601
- if (!min_se->next(min_se)) return false;
602
- if (doc < min_se->doc(min_se)) {
603
- if (!sne_goto_next_doc(sne)) return false;
604
- }
605
- }
606
- }
607
-
608
- bool sne_next_ordered_match(SpanEnum *self)
609
- {
610
- SpanNearEnum *sne = (SpanNearEnum *)self->data;
611
- SpanEnum *se;
612
- int i;
613
- int prev_doc, prev_start, prev_end;
614
- int doc=0, start=0, end=0;
615
- int lengths_sum;
616
-
617
- while (true) {
618
- se = sne->span_enums[0];
619
-
620
- prev_doc = se->doc(se);
621
- sne->start = prev_start = se->start(se);
622
- prev_end = se->end(se);
623
-
624
- i = 1;
625
- lengths_sum = prev_end - prev_start;
626
-
627
- while (i < sne->s_cnt) {
628
- se = sne->span_enums[i];
629
- doc = se->doc(se);
630
- start = se->start(se);
631
- end = se->end(se);
632
- while ((doc == prev_doc) && ((start < prev_start) ||
633
- ((start == prev_start) && (end < prev_end)))) {
634
- if (!se->next(se)) return false;
635
- doc = se->doc(se);
636
- start = se->start(se);
637
- end = se->end(se);
638
- }
639
- if (doc != prev_doc) {
640
- sne->current = i;
641
- if (!sne_goto_next_doc(sne)) return false;
642
- break;
643
- }
644
- i++;
645
- lengths_sum += end - start;
646
- prev_doc = doc;
647
- prev_start = start;
648
- prev_end = end;
649
- }
650
- if (i == sne->s_cnt) {
651
- if ((end - sne->start - lengths_sum) <= sne->slop) {
652
- /* we have a match */
653
- sne->end = end;
654
- sne->doc = doc;
655
-
656
- /* the minimum span is always the first span so it needs to be
657
- * incremented next time around */
658
- sne->current = 0;
659
- return true;
832
+ static bool sne_next_ordered_match(SpanEnum *self)
833
+ {
834
+ SpanNearEnum *sne = SpNEn(self);
835
+ SpanEnum *se;
836
+ int i;
837
+ int prev_doc, prev_start, prev_end;
838
+ int doc=0, start=0, end=0;
839
+ int lengths_sum;
660
840
 
661
- } else {
841
+ while (true) {
662
842
  se = sne->span_enums[0];
663
- if (!se->next(se)) return false;
664
- if (se->doc(se) != prev_doc) {
665
- sne->current = 0;
666
- if (!sne_goto_next_doc(sne)) return false;
843
+
844
+ prev_doc = se->doc(se);
845
+ sne->start = prev_start = se->start(se);
846
+ prev_end = se->end(se);
847
+
848
+ i = 1;
849
+ lengths_sum = prev_end - prev_start;
850
+
851
+ while (i < sne->s_cnt) {
852
+ se = sne->span_enums[i];
853
+ doc = se->doc(se);
854
+ start = se->start(se);
855
+ end = se->end(se);
856
+ while ((doc == prev_doc) && ((start < prev_start) ||
857
+ ((start == prev_start) && (end < prev_end)))) {
858
+ if (!se->next(se)) {
859
+ return false;
860
+ }
861
+ doc = se->doc(se);
862
+ start = se->start(se);
863
+ end = se->end(se);
864
+ }
865
+ if (doc != prev_doc) {
866
+ sne->current = i;
867
+ if (!sne_goto_next_doc(sne)) {
868
+ return false;
869
+ }
870
+ break;
871
+ }
872
+ i++;
873
+ lengths_sum += end - start;
874
+ prev_doc = doc;
875
+ prev_start = start;
876
+ prev_end = end;
877
+ }
878
+ if (i == sne->s_cnt) {
879
+ if ((end - sne->start - lengths_sum) <= sne->slop) {
880
+ /* we have a match */
881
+ sne->end = end;
882
+ sne->doc = doc;
883
+
884
+ /* the minimum span is always the first span so it needs to be
885
+ * incremented next time around */
886
+ sne->current = 0;
887
+ return true;
888
+
889
+ }
890
+ else {
891
+ se = sne->span_enums[0];
892
+ if (!se->next(se)) {
893
+ return false;
894
+ }
895
+ if (se->doc(se) != prev_doc) {
896
+ sne->current = 0;
897
+ if (!sne_goto_next_doc(sne)) {
898
+ return false;
899
+ }
900
+ }
901
+ }
667
902
  }
668
- }
669
903
  }
670
- }
671
904
  }
672
-
673
- bool sne_next_match(SpanEnum *self)
905
+
906
+ static bool sne_next_match(SpanEnum *self)
674
907
  {
675
- SpanNearEnum *sne = (SpanNearEnum *)self->data;
676
- SpanEnum *se_curr, *se_next;
908
+ SpanNearEnum *sne = SpNEn(self);
909
+ SpanEnum *se_curr, *se_next;
677
910
 
678
- if (!sne->first_time) {
679
- if (!sne_init(sne)) return false;
680
- sne->first_time = false;
681
- }
682
- se_curr = sne->span_enums[sne->current];
683
- se_next = sne->span_enums[(sne->current+1)%sne->s_cnt];
684
- if (se_curr->doc(se_curr) > se_next->doc(se_next)) {
685
- if (!sne_goto_next_doc(sne)) return false;
686
- }
911
+ if (!sne->first_time) {
912
+ if (!sne_init(sne)) {
913
+ return false;
914
+ }
915
+ sne->first_time = false;
916
+ }
917
+ se_curr = sne->span_enums[sne->current];
918
+ se_next = sne->span_enums[(sne->current+1)%sne->s_cnt];
919
+ if (se_curr->doc(se_curr) > se_next->doc(se_next)) {
920
+ if (!sne_goto_next_doc(sne)) {
921
+ return false;
922
+ }
923
+ }
687
924
 
688
- if (sne->in_order) {
689
- return sne_next_ordered_match(self);
690
- } else {
691
- return sne_next_unordered_match(self);
692
- }
925
+ if (sne->in_order) {
926
+ return sne_next_ordered_match(self);
927
+ }
928
+ else {
929
+ return sne_next_unordered_match(self);
930
+ }
693
931
  }
694
932
 
695
- bool spanne_next(SpanEnum *self)
933
+ static bool spanne_next(SpanEnum *self)
696
934
  {
697
- SpanNearEnum *sne = (SpanNearEnum *)self->data;
698
- SpanEnum *se;
935
+ SpanNearEnum *sne = SpNEn(self);
936
+ SpanEnum *se;
699
937
 
700
- se = sne->span_enums[sne->current];
701
- if (!se->next(se)) return false;
938
+ se = sne->span_enums[sne->current];
939
+ if (!se->next(se)) return false;
702
940
 
703
- return sne_next_match(self);
941
+ return sne_next_match(self);
704
942
  }
705
943
 
706
- bool spanne_skip_to(SpanEnum *self, int target)
944
+ static bool spanne_skip_to(SpanEnum *self, int target)
707
945
  {
708
- SpanNearEnum *sne = (SpanNearEnum *)self->data;
709
- SpanEnum *se;
710
-
711
- se = sne->span_enums[sne->current];
712
- if (!se->skip_to(se, target)) return false;
946
+ SpanEnum *se = SpNEn(self)->span_enums[SpNEn(self)->current];
947
+ if (!se->skip_to(se, target)) {
948
+ return false;
949
+ }
713
950
 
714
- return sne_next_match(self);
951
+ return sne_next_match(self);
715
952
  }
716
953
 
717
- #define GET_TOP_SNE SpanNearEnum *sne = (SpanNearEnum *)self->data;
718
-
719
- int spanne_doc(SpanEnum *self)
954
+ static int spanne_doc(SpanEnum *self)
720
955
  {
721
- GET_TOP_SNE;
722
- return sne->doc;
956
+ return SpNEn(self)->doc;
723
957
  }
724
958
 
725
- int spanne_start(SpanEnum *self)
959
+ static int spanne_start(SpanEnum *self)
726
960
  {
727
- GET_TOP_SNE;
728
- return sne->start;
961
+ return SpNEn(self)->start;
729
962
  }
730
963
 
731
- int spanne_end(SpanEnum *self)
964
+ static int spanne_end(SpanEnum *self)
732
965
  {
733
- GET_TOP_SNE;
734
- return sne->end;
966
+ return SpNEn(self)->end;
735
967
  }
736
968
 
737
- char *spanne_to_s(SpanEnum *self)
969
+ static char *spanne_to_s(SpanEnum *self)
738
970
  {
739
- SpanNearEnum *sne = (SpanNearEnum *)self->data;
740
- char *field = ((SpanQuery *)self->query->data)->field;
741
- char *query_str = self->query->to_s(self->query, field);
742
- char doc_str[62];
743
- size_t len = strlen(query_str);
744
- char *str = ALLOC_N(char, len + 80);
971
+ SpanNearEnum *sne = SpNEn(self);
972
+ char *field = SpQ(self->query)->field;
973
+ char *query_str = self->query->to_s(self->query, field);
974
+ char doc_str[62];
975
+ size_t len = strlen(query_str);
976
+ char *str = ALLOC_N(char, len + 80);
745
977
 
746
- if (sne->first_time) {
747
- sprintf(doc_str, "START");
748
- } else {
749
- sprintf(doc_str, "%d:%d-%d", self->doc(self),
750
- self->start(self), self->end(self));
751
- }
752
- sprintf("SpanNearEnum(%s)@%s", query_str, doc_str);
753
- free(query_str);
754
- return str;
978
+ if (sne->first_time) {
979
+ sprintf(doc_str, "START");
980
+ }
981
+ else {
982
+ sprintf(doc_str, "%d:%d-%d", self->doc(self),
983
+ self->start(self), self->end(self));
984
+ }
985
+ sprintf("SpanNearEnum(%s)@%s", query_str, doc_str);
986
+ free(query_str);
987
+ return str;
755
988
  }
756
989
 
757
- void spanne_destroy(SpanEnum *self)
990
+ static void spanne_destroy(SpanEnum *self)
758
991
  {
759
- SpanEnum *se;
760
- SpanNearEnum *sne = (SpanNearEnum *)self->data;
761
- int i;
762
- for (i = 0; i < sne->s_cnt; i++) {
763
- se = sne->span_enums[i];
764
- se->destroy(se);
765
- }
766
- free(sne->span_enums);
767
- free(sne);
768
- free(self);
992
+ SpanEnum *se;
993
+ SpanNearEnum *sne = SpNEn(self);
994
+ int i;
995
+ for (i = 0; i < sne->s_cnt; i++) {
996
+ se = sne->span_enums[i];
997
+ se->destroy(se);
998
+ }
999
+ free(sne->span_enums);
1000
+ free(self);
769
1001
  }
770
1002
 
771
- SpanEnum *spanne_create(Query *query, IndexReader *ir)
1003
+ static SpanEnum *spanne_new(Query *query, IndexReader *ir)
772
1004
  {
773
- Query *clause;
774
- SpanEnum *self = ALLOC(SpanEnum);
775
- SpanNearQuery *snq = (SpanNearQuery *)((SpanQuery *)query->data)->data;
776
- SpanNearEnum *sne = ALLOC(SpanNearEnum);
777
- int i;
778
- sne->first_time = true;
779
- sne->in_order = snq->in_order;
780
- sne->slop = snq->slop;
781
- sne->s_cnt = snq->c_cnt;
782
- sne->span_enums = ALLOC_N(SpanEnum *, sne->s_cnt);
783
- for (i = 0; i < sne->s_cnt; i++) {
784
- clause = snq->clauses[i];
785
- sne->span_enums[i] = ((SpanQuery *)clause->data)->get_spans(clause, ir);
786
- }
787
- sne->current = 0;
788
-
789
- sne->doc = -1;
790
- sne->start = -1;
791
- sne->end = -1;
1005
+ int i;
1006
+ Query *clause;
1007
+ SpanEnum *self = (SpanEnum *)emalloc(sizeof(SpanNearEnum));
1008
+ SpanNearQuery *snq = SpNQ(query);
1009
+
1010
+ SpNEn(self)->first_time = true;
1011
+ SpNEn(self)->in_order = snq->in_order;
1012
+ SpNEn(self)->slop = snq->slop;
1013
+ SpNEn(self)->s_cnt = snq->c_cnt;
1014
+ SpNEn(self)->span_enums = ALLOC_N(SpanEnum *, SpNEn(self)->s_cnt);
1015
+
1016
+ for (i = 0; i < SpNEn(self)->s_cnt; i++) {
1017
+ clause = snq->clauses[i];
1018
+ SpNEn(self)->span_enums[i] = SpQ(clause)->get_spans(clause, ir);
1019
+ }
1020
+ SpNEn(self)->current = 0;
792
1021
 
793
- self->data = sne;
1022
+ SpNEn(self)->doc = -1;
1023
+ SpNEn(self)->start = -1;
1024
+ SpNEn(self)->end = -1;
794
1025
 
795
- self->query = query;
796
- self->next = &spanne_next;
797
- self->skip_to = &spanne_skip_to;
798
- self->doc = &spanne_doc;
799
- self->start = &spanne_start;
800
- self->end = &spanne_end;
801
- self->destroy = &spanne_destroy;
802
- self->to_s = &spanne_to_s;
1026
+ self->query = query;
1027
+ self->next = &spanne_next;
1028
+ self->skip_to = &spanne_skip_to;
1029
+ self->doc = &spanne_doc;
1030
+ self->start = &spanne_start;
1031
+ self->end = &spanne_end;
1032
+ self->destroy = &spanne_destroy;
1033
+ self->to_s = &spanne_to_s;
803
1034
 
804
- return self;
1035
+ return self;
805
1036
  }
806
1037
 
807
1038
  /*****************************************************************************
@@ -810,211 +1041,348 @@ SpanEnum *spanne_create(Query *query, IndexReader *ir)
810
1041
  *
811
1042
  *****************************************************************************/
812
1043
 
813
- bool spanxe_next(SpanEnum *self)
1044
+ #define SpXEn(span_enum) ((SpanNotEnum *)(span_enum))
1045
+ #define SpXQ(query) ((SpanNotQuery *)(query))
1046
+
1047
+ typedef struct SpanNotEnum
814
1048
  {
815
- SpanNotEnum *sxe = (SpanNotEnum *)(self->data);
816
- SpanEnum *inc = sxe->inc, *exc = sxe->exc;
817
- if (sxe->more_inc) { // move to next incl
818
- sxe->more_inc = inc->next(inc);
819
- }
1049
+ SpanEnum super;
1050
+ SpanEnum *inc;
1051
+ SpanEnum *exc;
1052
+ bool more_inc : 1;
1053
+ bool more_exc : 1;
1054
+ } SpanNotEnum;
820
1055
 
821
- while (sxe->more_inc && sxe->more_exc) {
822
- if (inc->doc(inc) > exc->doc(exc)) { // skip excl
823
- sxe->more_exc = exc->skip_to(exc, inc->doc(inc));
824
- }
825
1056
 
826
- while (sxe->more_exc && // while excl is before
827
- (inc->doc(inc) == exc->doc(exc)) &&
828
- (exc->end(exc) <= inc->start(inc))) {
829
- sxe->more_exc = exc->next(exc); // increment excl
1057
+ static bool spanxe_next(SpanEnum *self)
1058
+ {
1059
+ SpanNotEnum *sxe = SpXEn(self);
1060
+ SpanEnum *inc = sxe->inc, *exc = sxe->exc;
1061
+ if (sxe->more_inc) { /* move to next incl */
1062
+ sxe->more_inc = inc->next(inc);
830
1063
  }
831
1064
 
832
- if (! sxe->more_exc || // if no intersection
833
- (inc->doc(inc) != exc->doc(exc)) ||
834
- inc->end(inc) <= exc->start(exc)) {
835
- break; // we found a match
836
- }
1065
+ while (sxe->more_inc && sxe->more_exc) {
1066
+ if (inc->doc(inc) > exc->doc(exc)) { /* skip excl */
1067
+ sxe->more_exc = exc->skip_to(exc, inc->doc(inc));
1068
+ }
1069
+
1070
+ while (sxe->more_exc /* while excl is before */
1071
+ && (inc->doc(inc) == exc->doc(exc))
1072
+ && (exc->end(exc) <= inc->start(inc))) {
1073
+ sxe->more_exc = exc->next(exc); /* increment excl */
1074
+ }
1075
+
1076
+ if (! sxe->more_exc || /* if no intersection */
1077
+ (inc->doc(inc) != exc->doc(exc)) ||
1078
+ inc->end(inc) <= exc->start(exc)) {
1079
+ break; /* we found a match */
1080
+ }
837
1081
 
838
- sxe->more_inc = inc->next(inc); // intersected: keep scanning
839
- }
840
- return sxe->more_inc;
1082
+ sxe->more_inc = inc->next(inc); /* intersected: keep scanning */
1083
+ }
1084
+ return sxe->more_inc;
841
1085
  }
842
1086
 
843
- bool spanxe_skip_to(SpanEnum *self, int target)
1087
+ static bool spanxe_skip_to(SpanEnum *self, int target)
844
1088
  {
845
- SpanNotEnum *sxe = (SpanNotEnum *)(self->data);
846
- SpanEnum *inc = sxe->inc, *exc = sxe->exc;
847
- int doc;
1089
+ SpanNotEnum *sxe = SpXEn(self);
1090
+ SpanEnum *inc = sxe->inc, *exc = sxe->exc;
1091
+ int doc;
848
1092
 
849
- if (sxe->more_inc) { // move to next incl
850
- if (!(sxe->more_inc=sxe->inc->skip_to(sxe->inc, target))) return false;
851
- }
1093
+ if (sxe->more_inc) { /* move to next incl */
1094
+ if (!(sxe->more_inc=sxe->inc->skip_to(sxe->inc, target))) return false;
1095
+ }
852
1096
 
853
- if (sxe->more_inc && ((doc=inc->doc(inc)) > exc->doc(exc))) {
854
- sxe->more_exc = exc->skip_to(exc, doc);
855
- }
1097
+ if (sxe->more_inc && ((doc=inc->doc(inc)) > exc->doc(exc))) {
1098
+ sxe->more_exc = exc->skip_to(exc, doc);
1099
+ }
856
1100
 
857
- while (sxe->more_exc && // while excl is before
858
- inc->doc(inc) == exc->doc(exc) &&
859
- exc->end(exc) <= inc->start(inc)) {
860
- sxe->more_exc = exc->next(exc); // increment excl
861
- }
1101
+ while (sxe->more_exc /* while excl is before */
1102
+ && inc->doc(inc) == exc->doc(exc)
1103
+ && exc->end(exc) <= inc->start(inc)) {
1104
+ sxe->more_exc = exc->next(exc); /* increment excl */
1105
+ }
862
1106
 
863
- if (!sxe->more_exc || // if no intersection
1107
+ if (!sxe->more_exc || /* if no intersection */
864
1108
  inc->doc(inc) != exc->doc(exc) ||
865
1109
  inc->end(inc) <= exc->start(exc)) {
866
- return true; // we found a match
867
- }
1110
+ return true; /* we found a match */
1111
+ }
868
1112
 
869
- return spanxe_next(self); // scan to next match
1113
+ return spanxe_next(self); /* scan to next match */
870
1114
  }
871
1115
 
872
- int spanxe_doc(SpanEnum *self)
1116
+ static int spanxe_doc(SpanEnum *self)
873
1117
  {
874
- SpanEnum *inc = ((SpanNotEnum *)(self->data))->inc;
875
- return inc->doc(inc);
1118
+ SpanEnum *inc = SpXEn(self)->inc;
1119
+ return inc->doc(inc);
876
1120
  }
877
1121
 
878
- int spanxe_start(SpanEnum *self)
1122
+ static int spanxe_start(SpanEnum *self)
879
1123
  {
880
- SpanEnum *inc = ((SpanNotEnum *)(self->data))->inc;
881
- return inc->start(inc);
1124
+ SpanEnum *inc = SpXEn(self)->inc;
1125
+ return inc->start(inc);
882
1126
  }
883
1127
 
884
- int spanxe_end(SpanEnum *self)
1128
+ static int spanxe_end(SpanEnum *self)
885
1129
  {
886
- SpanEnum *inc = ((SpanNotEnum *)(self->data))->inc;
887
- return inc->end(inc);
1130
+ SpanEnum *inc = SpXEn(self)->inc;
1131
+ return inc->end(inc);
888
1132
  }
889
1133
 
890
- char *spanxe_to_s(SpanEnum *self)
1134
+ static char *spanxe_to_s(SpanEnum *self)
891
1135
  {
892
- char *field = ((SpanQuery *)self->query->data)->field;
893
- char *query_str = self->query->to_s(self->query, field);
894
- char *res = strfmt("SpanNotEnum(%s)", query_str);
895
- free(query_str);
896
- return res;
1136
+ char *field = SpQ(self->query)->field;
1137
+ char *query_str = self->query->to_s(self->query, field);
1138
+ char *res = strfmt("SpanNotEnum(%s)", query_str);
1139
+ free(query_str);
1140
+ return res;
897
1141
  }
898
1142
 
899
- void spanxe_destroy(SpanEnum *self)
1143
+ static void spanxe_destroy(SpanEnum *self)
900
1144
  {
901
- SpanNotEnum *sxe = (SpanNotEnum *)self->data;
902
- sxe->inc->destroy(sxe->inc);
903
- sxe->exc->destroy(sxe->exc);
904
- free(sxe);
905
- free(self);
1145
+ SpanNotEnum *sxe = SpXEn(self);
1146
+ sxe->inc->destroy(sxe->inc);
1147
+ sxe->exc->destroy(sxe->exc);
1148
+ free(self);
906
1149
  }
907
1150
 
908
- SpanEnum *spanxe_create(Query *query, IndexReader *ir)
1151
+ static SpanEnum *spanxe_new(Query *query, IndexReader *ir)
909
1152
  {
910
- SpanEnum *self = ALLOC(SpanEnum);
911
- SpanNotQuery *sxq = (SpanNotQuery *)((SpanQuery *)query->data)->data;
1153
+ SpanEnum *self = (SpanEnum *)emalloc(sizeof(SpanNotEnum));
1154
+ SpanNotEnum *sxe = SpXEn(self);
1155
+ SpanNotQuery *sxq = SpXQ(query);
912
1156
 
913
- SpanNotEnum *sxe = self->data = ALLOC(SpanNotEnum);
914
- sxe->inc = ((SpanQuery *)sxq->inc->data)->get_spans(sxq->inc, ir);
915
- sxe->exc = ((SpanQuery *)sxq->exc->data)->get_spans(sxq->exc, ir);
916
- sxe->more_inc = true;
917
- sxe->more_exc = sxe->exc->next(sxe->exc);
1157
+ sxe->inc = SpQ(sxq->inc)->get_spans(sxq->inc, ir);
1158
+ sxe->exc = SpQ(sxq->exc)->get_spans(sxq->exc, ir);
1159
+ sxe->more_inc = true;
1160
+ sxe->more_exc = sxe->exc->next(sxe->exc);
918
1161
 
919
- self->query = query;
920
- self->next = &spanxe_next;
921
- self->skip_to = &spanxe_skip_to;
922
- self->doc = &spanxe_doc;
923
- self->start = &spanxe_start;
924
- self->end = &spanxe_end;
925
- self->destroy = &spanxe_destroy;
926
- self->to_s = &spanxe_to_s;
1162
+ self->query = query;
1163
+ self->next = &spanxe_next;
1164
+ self->skip_to = &spanxe_skip_to;
1165
+ self->doc = &spanxe_doc;
1166
+ self->start = &spanxe_start;
1167
+ self->end = &spanxe_end;
1168
+ self->destroy = &spanxe_destroy;
1169
+ self->to_s = &spanxe_to_s;
927
1170
 
928
- return self;
1171
+ return self;
929
1172
  }
1173
+
930
1174
  /*****************************************************************************
931
1175
  *
932
- * SpanQuery
1176
+ * SpanWeight
933
1177
  *
934
1178
  *****************************************************************************/
935
1179
 
936
- void spanq_destroy(Query *self)
1180
+ #define SpW(weight) ((SpanWeight *)(weight))
1181
+ typedef struct SpanWeight
1182
+ {
1183
+ Weight super;
1184
+ HashSet *terms;
1185
+ } SpanWeight;
1186
+
1187
+ static Explanation *spanw_explain(Weight *self, IndexReader *ir, int target)
1188
+ {
1189
+ Explanation *expl;
1190
+ Explanation *idf_expl1;
1191
+ Explanation *idf_expl2;
1192
+ Explanation *query_expl;
1193
+ Explanation *qnorm_expl;
1194
+ Explanation *field_expl;
1195
+ Explanation *tf_expl;
1196
+ Scorer *scorer;
1197
+ uchar *field_norms;
1198
+ float field_norm;
1199
+ Explanation *field_norm_expl;
1200
+
1201
+ char *query_str;
1202
+ HashSet *terms = SpW(self)->terms;
1203
+ char *field = SpQ(self->query)->field;
1204
+ const int field_num = fis_get_field_num(ir->fis, field);
1205
+ char *doc_freqs = NULL;
1206
+ size_t df_i = 0;
1207
+ int i;
1208
+
1209
+ if (field_num < 0) {
1210
+ return expl_new(0.0, "field \"%s\" does not exist in the index", field);
1211
+ }
1212
+
1213
+ query_str = self->query->to_s(self->query, "");
1214
+
1215
+ for (i = 0; i < terms->size; i++) {
1216
+ char *term = (char *)terms->elems[i];
1217
+ REALLOC_N(doc_freqs, char, df_i + strlen(term) + 23);
1218
+ sprintf(doc_freqs + df_i, "%s=%d, ", term,
1219
+ ir->doc_freq(ir, field_num, term));
1220
+ df_i = strlen(doc_freqs);
1221
+ }
1222
+ /* remove the ',' at the end of the string if it exists */
1223
+ if (terms->size > 0) {
1224
+ df_i -= 2;
1225
+ doc_freqs[df_i] = '\0';
1226
+ }
1227
+ else {
1228
+ doc_freqs = "";
1229
+ }
1230
+
1231
+ expl = expl_new(0.0, "weight(%s in %d), product of:", query_str, target);
1232
+
1233
+ /* We need two of these as it's included in both the query explanation
1234
+ * and the field explanation */
1235
+ idf_expl1 = expl_new(self->idf, "idf(%s: %s)", field, doc_freqs);
1236
+ idf_expl2 = expl_new(self->idf, "idf(%s: %s)", field, doc_freqs);
1237
+ if (terms->size > 0) {
1238
+ free(doc_freqs); /* only free if allocated */
1239
+ }
1240
+
1241
+ /* explain query weight */
1242
+ query_expl = expl_new(0.0, "query_weight(%s), product of:", query_str);
1243
+
1244
+ if (self->query->boost != 1.0) {
1245
+ expl_add_detail(query_expl, expl_new(self->query->boost, "boost"));
1246
+ }
1247
+
1248
+ expl_add_detail(query_expl, idf_expl1);
1249
+
1250
+ qnorm_expl = expl_new(self->qnorm, "query_norm");
1251
+ expl_add_detail(query_expl, qnorm_expl);
1252
+
1253
+ query_expl->value = self->query->boost * idf_expl1->value * qnorm_expl->value;
1254
+
1255
+ expl_add_detail(expl, query_expl);
1256
+
1257
+ /* explain field weight */
1258
+ field_expl = expl_new(0.0, "field_weight(%s:%s in %d), product of:",
1259
+ field, query_str, target);
1260
+ free(query_str);
1261
+
1262
+ scorer = self->scorer(self, ir);
1263
+ tf_expl = scorer->explain(scorer, target);
1264
+ scorer->destroy(scorer);
1265
+ expl_add_detail(field_expl, tf_expl);
1266
+ expl_add_detail(field_expl, idf_expl2);
1267
+
1268
+ field_norms = ir->get_norms(ir, field_num);
1269
+ field_norm = (field_norms
1270
+ ? sim_decode_norm(self->similarity, field_norms[target])
1271
+ : (float)0.0);
1272
+ field_norm_expl = expl_new(field_norm, "field_norm(field=%s, doc=%d)",
1273
+ field, target);
1274
+ expl_add_detail(field_expl, field_norm_expl);
1275
+
1276
+ field_expl->value = tf_expl->value * idf_expl2->value * field_norm_expl->value;
1277
+
1278
+ /* combine them */
1279
+ if (query_expl->value == 1.0) {
1280
+ expl_destroy(expl);
1281
+ return field_expl;
1282
+ }
1283
+ else {
1284
+ expl->value = (query_expl->value * field_expl->value);
1285
+ expl_add_detail(expl, field_expl);
1286
+ return expl;
1287
+ }
1288
+ }
1289
+
1290
+ static char *spanw_to_s(Weight *self)
1291
+ {
1292
+ return strfmt("SpanWeight(%f)", self->value);
1293
+ }
1294
+
1295
+ static void spanw_destroy(Weight *self)
937
1296
  {
938
- SpanQuery *sq = (SpanQuery *)self->data;
939
- free(sq);
940
- q_destroy_i(self);
1297
+ hs_destroy(SpW(self)->terms);
1298
+ w_destroy(self);
1299
+ }
1300
+
1301
+ static Weight *spanw_new(Query *query, Searcher *searcher)
1302
+ {
1303
+ int i;
1304
+ Weight *self = w_new(SpanWeight, query);
1305
+ HashSet *terms = SpQ(query)->get_terms(query);
1306
+
1307
+ SpW(self)->terms = terms;
1308
+ self->scorer = &spansc_new;
1309
+ self->explain = &spanw_explain;
1310
+ self->to_s = &spanw_to_s;
1311
+ self->destroy = &spanw_destroy;
1312
+
1313
+ self->similarity = query->get_similarity(query, searcher);
1314
+
1315
+ self->idf = 0.0;
1316
+
1317
+ for (i = terms->size - 1; i >= 0; i--) {
1318
+ self->idf += sim_idf_term(self->similarity, SpQ(query)->field,
1319
+ (char *)terms->elems[i], searcher);
1320
+ }
1321
+
1322
+ return self;
941
1323
  }
942
1324
 
943
1325
  /*****************************************************************************
944
- *
945
1326
  * SpanTermQuery
946
- *
947
1327
  *****************************************************************************/
948
1328
 
949
- char *spantq_to_s(Query *self, char *field)
1329
+ static char *spantq_to_s(Query *self, const char *field)
950
1330
  {
951
- Term *term = (Term *)((SpanQuery *)self->data)->data;
952
- char *term_str, *res;
953
- if (field == term->field) {
954
- term_str = estrdup(term->text);
955
- } else {
956
- term_str = term_to_s(term);
957
- }
958
- res = strfmt("span_term(%s)", term_str);
959
- free(term_str);
960
- return res;
1331
+ if (field == SpQ(self)->field) {
1332
+ return strfmt("span_terms(%s)", SpTQ(self)->term);
1333
+ }
1334
+ else {
1335
+ return strfmt("span_terms(%s:%s)", SpQ(self)->field, SpTQ(self)->term);
1336
+ }
961
1337
  }
962
1338
 
963
- static void spantq_destroy(Query *self)
1339
+ static void spantq_destroy_i(Query *self)
964
1340
  {
965
- SpanQuery *sq = (SpanQuery *)self->data;
966
- if (self->destroy_all) {
967
- Term *term = (Term *)sq->data;
968
- term_destroy(term);
969
- }
970
- free(sq);
971
- q_destroy_i(self);
1341
+ free(SpTQ(self)->term);
1342
+ free(SpQ(self)->field);
1343
+ spanq_destroy_i(self);
972
1344
  }
973
1345
 
974
1346
  static void spantq_extract_terms(Query *self, HashSet *terms)
975
1347
  {
976
- Term *term = (Term *)((SpanQuery *)self->data)->data;
977
- hs_add(terms, term_clone(term));
1348
+ hs_add(terms, term_new(SpQ(self)->field, SpTQ(self)->term));
978
1349
  }
979
1350
 
980
1351
  static HashSet *spantq_get_terms(Query *self)
981
1352
  {
982
- Term *term = (Term *)((SpanQuery *)self->data)->data;
983
- HashSet *terms = term_set_create();
984
- hs_add(terms, term_clone(term));
985
- return terms;
1353
+ HashSet *terms = hs_new_str(&free);
1354
+ hs_add(terms, estrdup(SpTQ(self)->term));
1355
+ return terms;
986
1356
  }
987
1357
 
988
- static uint spantq_hash(Query *self)
1358
+ static ulong spantq_hash(Query *self)
989
1359
  {
990
- return term_hash((Term *)((SpanQuery *)self->data)->data);
1360
+ return spanq_hash(self) ^ str_hash(SpTQ(self)->term);
991
1361
  }
992
1362
 
993
1363
  static int spantq_eq(Query *self, Query *o)
994
1364
  {
995
- return term_eq((Term *)((SpanQuery *)self->data)->data,
996
- (Term *)((SpanQuery *)o->data)->data);
1365
+ return spanq_eq(self, o) && strcmp(SpTQ(self)->term, SpTQ(o)->term) == 0;
997
1366
  }
998
1367
 
999
- Query *spantq_create(Term *term)
1368
+ Query *spantq_new(const char *field, const char *term)
1000
1369
  {
1001
- Query *self = q_create();
1370
+ Query *self = q_new(SpanTermQuery);
1002
1371
 
1003
- SpanQuery *sq = ALLOC(SpanQuery);
1004
- sq->data = term;
1005
- sq->get_spans = &spante_create;
1006
- sq->get_terms = &spantq_get_terms;
1007
- sq->field = term->field;
1008
- self->data = sq;
1372
+ SpTQ(self)->term = estrdup(term);
1373
+ SpQ(self)->field = estrdup(field);
1374
+ SpQ(self)->get_spans = &spante_new;
1375
+ SpQ(self)->get_terms = &spantq_get_terms;
1009
1376
 
1010
- self->type = SPAN_TERM_QUERY;
1011
- self->extract_terms = &spantq_extract_terms;
1012
- self->to_s = &spantq_to_s;
1013
- self->hash = &spantq_hash;
1014
- self->eq = &spantq_eq;
1015
- self->destroy_i = &spantq_destroy;
1016
- self->create_weight_i = &spanw_create;
1017
- return self;
1377
+ self->type = SPAN_TERM_QUERY;
1378
+ self->extract_terms = &spantq_extract_terms;
1379
+ self->to_s = &spantq_to_s;
1380
+ self->hash = &spantq_hash;
1381
+ self->eq = &spantq_eq;
1382
+ self->destroy_i = &spantq_destroy_i;
1383
+ self->create_weight_i = &spanw_new;
1384
+ self->get_matchv_i = &spanq_get_matchv_i;
1385
+ return self;
1018
1386
  }
1019
1387
 
1020
1388
  /*****************************************************************************
@@ -1023,91 +1391,87 @@ Query *spantq_create(Term *term)
1023
1391
  *
1024
1392
  *****************************************************************************/
1025
1393
 
1026
- char *spanfq_to_s(Query *self, char *field)
1394
+ static char *spanfq_to_s(Query *self, const char *field)
1027
1395
  {
1028
- SpanFirstQuery *sfq = (SpanFirstQuery *)((SpanQuery *)self->data)->data;
1029
- Query *match = sfq->match;
1030
- char *q_str = match->to_s(match, field);
1031
- char *res = strfmt("span_first(%s, %d)", q_str, sfq->end);
1032
- free(q_str);
1033
- return res;
1396
+ Query *match = SpFQ(self)->match;
1397
+ char *q_str = match->to_s(match, field);
1398
+ char *res = strfmt("span_first(%s, %d)", q_str, SpFQ(self)->end);
1399
+ free(q_str);
1400
+ return res;
1034
1401
  }
1035
1402
 
1036
- void spanfq_extract_terms(Query *self, HashSet *terms)
1403
+ static void spanfq_extract_terms(Query *self, HashSet *terms)
1037
1404
  {
1038
- SpanFirstQuery *sfq = (SpanFirstQuery *)((SpanQuery *)self->data)->data;
1039
- sfq->match->extract_terms(sfq->match, terms);
1405
+ SpFQ(self)->match->extract_terms(SpFQ(self)->match, terms);
1040
1406
  }
1041
1407
 
1042
- HashSet *spanfq_get_terms(Query *self)
1408
+ static HashSet *spanfq_get_terms(Query *self)
1043
1409
  {
1044
- SpanFirstQuery *sfq = (SpanFirstQuery *)((SpanQuery *)self->data)->data;
1045
- SpanQuery *match_sq = (SpanQuery *)sfq->match->data;
1046
- return match_sq->get_terms(sfq->match);
1410
+ SpanFirstQuery *sfq = SpFQ(self);
1411
+ return SpQ(sfq->match)->get_terms(sfq->match);
1047
1412
  }
1048
1413
 
1049
- Query *spanfq_rewrite(Query *self, IndexReader *ir)
1414
+ static Query *spanfq_rewrite(Query *self, IndexReader *ir)
1050
1415
  {
1051
- SpanFirstQuery *sfq = (SpanFirstQuery *)((SpanQuery *)self->data)->data;
1052
- Query *q, *rq;
1416
+ Query *q, *rq;
1053
1417
 
1054
- q = sfq->match;
1055
- rq = q->rewrite(q, ir);
1056
- if (rq == q || self->destroy_all) q_deref(q);
1057
- sfq->match = rq;
1418
+ q = SpFQ(self)->match;
1419
+ rq = q->rewrite(q, ir);
1420
+ q_deref(q);
1421
+ SpFQ(self)->match = rq;
1058
1422
 
1059
- self->ref_cnt++;
1060
- return self; /* no clauses rewrote */
1423
+ self->ref_cnt++;
1424
+ return self; /* no clauses rewrote */
1061
1425
  }
1062
1426
 
1063
- void spanfq_destroy(Query *self)
1427
+ static void spanfq_destroy_i(Query *self)
1064
1428
  {
1065
- SpanQuery *sq = (SpanQuery *)self->data;
1066
- SpanFirstQuery *sfq = (SpanFirstQuery *)sq->data;
1067
- if (self->destroy_all) q_deref(sfq->match);
1068
- free(sfq);
1069
- free(sq);
1070
- q_destroy_i(self);
1429
+ q_deref(SpFQ(self)->match);
1430
+ spanq_destroy_i(self);
1071
1431
  }
1072
1432
 
1073
- static uint spanfq_hash(Query *self)
1433
+ static ulong spanfq_hash(Query *self)
1074
1434
  {
1075
- SpanFirstQuery *sfq = (SpanFirstQuery *)((SpanQuery *)self->data)->data;
1076
- return sfq->match->hash(sfq->match) ^ sfq->end;
1435
+ return spanq_hash(self) ^ SpFQ(self)->match->hash(SpFQ(self)->match)
1436
+ ^ SpFQ(self)->end;
1077
1437
  }
1078
1438
 
1079
1439
  static int spanfq_eq(Query *self, Query *o)
1080
1440
  {
1081
- SpanFirstQuery *sfq1 = (SpanFirstQuery *)((SpanQuery *)self->data)->data;
1082
- SpanFirstQuery *sfq2 = (SpanFirstQuery *)((SpanQuery *)o->data)->data;
1083
- return sfq1->match->eq(sfq1->match, sfq2->match) && (sfq1->end == sfq2->end);
1441
+ SpanFirstQuery *sfq1 = SpFQ(self);
1442
+ SpanFirstQuery *sfq2 = SpFQ(o);
1443
+ return spanq_eq(self, o) && sfq1->match->eq(sfq1->match, sfq2->match)
1444
+ && (sfq1->end == sfq2->end);
1084
1445
  }
1085
1446
 
1086
- Query *spanfq_create(Query *match, int end)
1447
+ Query *spanfq_new_nr(Query *match, int end)
1087
1448
  {
1088
- Query *self = q_create();
1449
+ Query *self = q_new(SpanFirstQuery);
1089
1450
 
1090
- SpanQuery *sq = ALLOC(SpanQuery);
1451
+ SpFQ(self)->match = match;
1452
+ SpFQ(self)->end = end;
1091
1453
 
1092
- SpanFirstQuery *sfq = ALLOC(SpanFirstQuery);
1093
- sfq->match = match;
1094
- sfq->end = end;
1095
- sq->data = sfq;
1454
+ SpQ(self)->field = SpQ(match)->field;
1455
+ SpQ(self)->get_spans = &spanfe_new;
1456
+ SpQ(self)->get_terms = &spanfq_get_terms;
1096
1457
 
1097
- sq->get_spans = &spanfe_create;
1098
- sq->get_terms = &spanfq_get_terms;
1099
- sq->field = ((SpanQuery *)match->data)->field;
1100
- self->data = sq;
1458
+ self->type = SPAN_FIRST_QUERY;
1459
+ self->rewrite = &spanfq_rewrite;
1460
+ self->extract_terms = &spanfq_extract_terms;
1461
+ self->to_s = &spanfq_to_s;
1462
+ self->hash = &spanfq_hash;
1463
+ self->eq = &spanfq_eq;
1464
+ self->destroy_i = &spanfq_destroy_i;
1465
+ self->create_weight_i = &spanw_new;
1466
+ self->get_matchv_i = &spanq_get_matchv_i;
1467
+
1468
+ return self;
1469
+ }
1101
1470
 
1102
- self->type = SPAN_FIRST_QUERY;
1103
- self->rewrite = &spanfq_rewrite;
1104
- self->extract_terms = &spanfq_extract_terms;
1105
- self->to_s = &spanfq_to_s;
1106
- self->hash = &spanfq_hash;
1107
- self->eq = &spanfq_eq;
1108
- self->destroy_i = &spanfq_destroy;
1109
- self->create_weight_i = &spanw_create;
1110
- return self;
1471
+ Query *spanfq_new(Query *match, int end)
1472
+ {
1473
+ REF(match);
1474
+ return spanfq_new_nr(match, end);
1111
1475
  }
1112
1476
 
1113
1477
  /*****************************************************************************
@@ -1116,154 +1480,182 @@ Query *spanfq_create(Query *match, int end)
1116
1480
  *
1117
1481
  *****************************************************************************/
1118
1482
 
1119
- char *spanoq_to_s(Query *self, char *field)
1120
- {
1121
- SpanOrQuery *soq = (SpanOrQuery *)((SpanQuery *)self->data)->data;
1122
- char *res = estrdup("span_or["), *q_str;
1123
- Query *clause;
1124
- int i;
1125
- for (i = 0; i < soq->c_cnt; i++) {
1126
- clause = soq->clauses[i];
1127
- q_str = clause->to_s(clause, field);
1128
- REALLOC_N(res, char, strlen(res) + strlen(q_str) + 10);
1129
- if (i > 0) strcat(res, ", ");
1130
- strcat(res, q_str);
1131
- free(q_str);
1132
- }
1133
- strcat(res, "]");
1483
+ static char *spanoq_to_s(Query *self, const char *field)
1484
+ {
1485
+ int i;
1486
+ SpanOrQuery *soq = SpOQ(self);
1487
+ char *res, *res_p;
1488
+ char **q_strs = ALLOC_N(char *, soq->c_cnt);
1489
+ int len = 50;
1490
+ for (i = 0; i < soq->c_cnt; i++) {
1491
+ Query *clause = soq->clauses[i];
1492
+ q_strs[i] = clause->to_s(clause, field);
1493
+ len += strlen(q_strs[i]) + 2;
1494
+ }
1134
1495
 
1135
- return res;
1496
+ res_p = res = ALLOC_N(char, len);
1497
+ sprintf(res_p, "span_or[ ");
1498
+ res_p += strlen(res_p);
1499
+ for (i = 0; i < soq->c_cnt; i++) {
1500
+ sprintf(res_p, "%s, ", q_strs[i]);
1501
+ free(q_strs[i]);
1502
+ res_p += strlen(res_p);
1503
+ }
1504
+ free(q_strs);
1505
+
1506
+ sprintf(res_p - 2, " ]");
1507
+ return res;
1136
1508
  }
1137
1509
 
1138
- void spanoq_extract_terms(Query *self, HashSet *terms)
1510
+ static void spanoq_extract_terms(Query *self, HashSet *terms)
1139
1511
  {
1140
- SpanOrQuery *soq = (SpanOrQuery *)((SpanQuery *)self->data)->data;
1141
- Query *clause;
1142
- int i;
1143
- for (i = 0; i < soq->c_cnt; i++) {
1144
- clause = soq->clauses[i];
1145
- clause->extract_terms(clause, terms);
1146
- }
1512
+ SpanOrQuery *soq = SpOQ(self);
1513
+ int i;
1514
+ for (i = 0; i < soq->c_cnt; i++) {
1515
+ Query *clause = soq->clauses[i];
1516
+ clause->extract_terms(clause, terms);
1517
+ }
1147
1518
  }
1148
1519
 
1149
- HashSet *spanoq_get_terms(Query *self)
1520
+ static HashSet *spanoq_get_terms(Query *self)
1150
1521
  {
1151
- SpanOrQuery *soq = (SpanOrQuery *)((SpanQuery *)self->data)->data;
1152
- HashSet *terms = term_set_create();
1153
- Query *clause;
1154
- int i;
1155
- for (i = 0; i < soq->c_cnt; i++) {
1156
- clause = soq->clauses[i];
1157
- clause->extract_terms(clause, terms);
1158
- }
1522
+ SpanOrQuery *soq = SpOQ(self);
1523
+ HashSet *terms = hs_new_str(&free);
1524
+ int i;
1525
+ for (i = 0; i < soq->c_cnt; i++) {
1526
+ Query *clause = soq->clauses[i];
1527
+ HashSet *sub_terms = SpQ(clause)->get_terms(clause);
1528
+ hs_merge(terms, sub_terms);
1529
+ }
1159
1530
 
1160
- return terms;
1531
+ return terms;
1161
1532
  }
1162
1533
 
1163
- SpanEnum *spanoq_get_spans(Query *self, IndexReader *ir)
1534
+ static SpanEnum *spanoq_get_spans(Query *self, IndexReader *ir)
1164
1535
  {
1165
- SpanOrQuery *soq = (SpanOrQuery *)((SpanQuery *)self->data)->data;
1166
- Query *q;
1167
- if (soq->c_cnt == 1) {
1168
- q = soq->clauses[0];
1169
- return ((SpanQuery *)q->data)->get_spans(q, ir);
1170
- }
1536
+ SpanOrQuery *soq = SpOQ(self);
1537
+ if (soq->c_cnt == 1) {
1538
+ Query *q = soq->clauses[0];
1539
+ return SpQ(q)->get_spans(q, ir);
1540
+ }
1171
1541
 
1172
- return spanoe_create(self, ir);
1542
+ return spanoe_new(self, ir);
1173
1543
  }
1174
1544
 
1175
- Query *spanoq_rewrite(Query *self, IndexReader *ir)
1545
+ static Query *spanoq_rewrite(Query *self, IndexReader *ir)
1176
1546
  {
1177
- SpanOrQuery *soq = (SpanOrQuery *)((SpanQuery *)self->data)->data;
1178
- Query *clause, *rewritten;
1179
- int i;
1180
- /* replace clauses with their rewritten queries */
1181
- for (i = 0; i < soq->c_cnt; i++) {
1182
- clause = soq->clauses[i];
1183
- rewritten = clause->rewrite(clause, ir);
1184
- if ((rewritten == clause) || self->destroy_all) q_deref(clause);
1185
- soq->clauses[i] = rewritten;
1186
- }
1547
+ SpanOrQuery *soq = SpOQ(self);
1548
+ int i;
1187
1549
 
1188
- self->ref_cnt++;
1189
- return self;
1550
+ /* replace clauses with their rewritten queries */
1551
+ for (i = 0; i < soq->c_cnt; i++) {
1552
+ Query *clause = soq->clauses[i];
1553
+ Query *rewritten = clause->rewrite(clause, ir);
1554
+ q_deref(clause);
1555
+ soq->clauses[i] = rewritten;
1556
+ }
1557
+
1558
+ self->ref_cnt++;
1559
+ return self;
1190
1560
  }
1191
1561
 
1192
- void spanoq_destroy(Query *self)
1562
+ static void spanoq_destroy_i(Query *self)
1193
1563
  {
1194
- SpanQuery *sq = (SpanQuery *)self->data;
1195
- SpanOrQuery *soq = (SpanOrQuery *)sq->data;
1564
+ SpanOrQuery *soq = SpOQ(self);
1196
1565
 
1197
- if (self->destroy_all) {
1198
- Query *clause;
1199
1566
  int i;
1200
1567
  for (i = 0; i < soq->c_cnt; i++) {
1201
- clause = soq->clauses[i];
1202
- q_deref(clause);
1568
+ Query *clause = soq->clauses[i];
1569
+ q_deref(clause);
1203
1570
  }
1204
1571
  free(soq->clauses);
1205
- }
1206
1572
 
1207
-
1208
- free(soq);
1209
- free(sq);
1210
- q_destroy_i(self);
1573
+ spanq_destroy_i(self);
1211
1574
  }
1212
1575
 
1213
- static uint spanoq_hash(Query *self)
1576
+ static ulong spanoq_hash(Query *self)
1214
1577
  {
1215
- int i;
1216
- uint hash = 0;
1217
- Query *q;
1218
- SpanOrQuery *soq = (SpanOrQuery *)((SpanQuery *)self->data)->data;
1578
+ int i;
1579
+ ulong hash = spanq_hash(self);
1580
+ SpanOrQuery *soq = SpOQ(self);
1219
1581
 
1220
- for (i = 0; i < soq->c_cnt; i++) {
1221
- q = soq->clauses[i];
1222
- hash ^= q->hash(q);
1223
- }
1224
- return hash;
1582
+ for (i = 0; i < soq->c_cnt; i++) {
1583
+ Query *q = soq->clauses[i];
1584
+ hash ^= q->hash(q);
1585
+ }
1586
+ return hash;
1225
1587
  }
1226
1588
 
1227
1589
  static int spanoq_eq(Query *self, Query *o)
1228
1590
  {
1229
- int i;
1230
- Query *q1, *q2;
1231
- SpanOrQuery *soq1 = (SpanOrQuery *)((SpanQuery *)self->data)->data;
1232
- SpanOrQuery *soq2 = (SpanOrQuery *)((SpanQuery *)o->data)->data;
1233
- if (soq1->c_cnt != soq2->c_cnt) return false;
1234
- for (i = 0; i < soq1->c_cnt; i++) {
1235
- q1 = soq1->clauses[i];
1236
- q2 = soq2->clauses[i];
1237
- if (!q1->eq(q1, q2)) return false;
1238
- }
1239
- return true;
1591
+ int i;
1592
+ Query *q1, *q2;
1593
+ SpanOrQuery *soq1 = SpOQ(self);
1594
+ SpanOrQuery *soq2 = SpOQ(o);
1595
+
1596
+ if (!spanq_eq(self, o) || soq1->c_cnt != soq2->c_cnt) {
1597
+ return false;
1598
+ }
1599
+ for (i = 0; i < soq1->c_cnt; i++) {
1600
+ q1 = soq1->clauses[i];
1601
+ q2 = soq2->clauses[i];
1602
+ if (!q1->eq(q1, q2)) {
1603
+ return false;
1604
+ }
1605
+ }
1606
+ return true;
1240
1607
  }
1241
1608
 
1242
- Query *spanoq_create(Query **clauses, int c_cnt)
1609
+ Query *spanoq_new()
1243
1610
  {
1244
- Query *self = q_create();
1611
+ Query *self = q_new(SpanOrQuery);
1612
+ SpOQ(self)->clauses = ALLOC_N(Query *, CLAUSE_INIT_CAPA);
1613
+ SpOQ(self)->c_capa = CLAUSE_INIT_CAPA;
1614
+
1615
+ SpQ(self)->field = (char *)EMPTY_STRING;
1616
+ SpQ(self)->get_spans = &spanoq_get_spans;
1617
+ SpQ(self)->get_terms = &spanoq_get_terms;
1245
1618
 
1246
- SpanQuery *sq = ALLOC(SpanQuery);
1619
+ self->type = SPAN_OR_QUERY;
1620
+ self->rewrite = &spanoq_rewrite;
1621
+ self->extract_terms = &spanoq_extract_terms;
1622
+ self->to_s = &spanoq_to_s;
1623
+ self->hash = &spanoq_hash;
1624
+ self->eq = &spanoq_eq;
1625
+ self->destroy_i = &spanoq_destroy_i;
1626
+ self->create_weight_i = &spanw_new;
1627
+ self->get_matchv_i = &spanq_get_matchv_i;
1247
1628
 
1248
- SpanOrQuery *soq = ALLOC(SpanOrQuery);
1249
- soq->clauses = clauses;
1250
- soq->c_cnt = c_cnt;
1251
- sq->data = soq;
1629
+ return self;
1630
+ }
1252
1631
 
1253
- sq->get_spans = &spanoq_get_spans;
1254
- sq->get_terms = &spanoq_get_terms;
1255
- sq->field = ((SpanQuery *)clauses[0]->data)->field;
1256
- self->data = sq;
1632
+ Query *spanoq_add_clause_nr(Query *self, Query *clause)
1633
+ {
1634
+ const int curr_index = SpOQ(self)->c_cnt++;
1635
+ if (clause->type < SPAN_TERM_QUERY || clause->type > SPAN_NEAR_QUERY) {
1636
+ RAISE(ARG_ERROR, "Tried to add a %s to a SpanOrQuery. This is not a "
1637
+ "SpanQuery.", q_get_query_name(clause->type));
1638
+ }
1639
+ if (curr_index == 0) {
1640
+ SpQ(self)->field = SpQ(clause)->field;
1641
+ }
1642
+ else if (strcmp(SpQ(self)->field, SpQ(clause)->field) != 0) {
1643
+ RAISE(ARG_ERROR, "All clauses in a SpanQuery must have the same field. "
1644
+ "Attempted to add a SpanQuery with field \"%s\" to a SpanOrQuery "
1645
+ "with field \"%s\"", SpQ(clause)->field, SpQ(self)->field);
1646
+ }
1647
+ if (curr_index >= SpOQ(self)->c_capa) {
1648
+ SpOQ(self)->c_capa <<= 1;
1649
+ REALLOC_N(SpOQ(self)->clauses, Query *, SpOQ(self)->c_capa);
1650
+ }
1651
+ SpOQ(self)->clauses[curr_index] = clause;
1652
+ return clause;
1653
+ }
1257
1654
 
1258
- self->type = SPAN_OR_QUERY;
1259
- self->rewrite = &spanoq_rewrite;
1260
- self->extract_terms = &spanoq_extract_terms;
1261
- self->to_s = &spanoq_to_s;
1262
- self->hash = &spanoq_hash;
1263
- self->eq = &spanoq_eq;
1264
- self->destroy_i = &spanoq_destroy;
1265
- self->create_weight_i = &spanw_create;
1266
- return self;
1655
+ Query *spanoq_add_clause(Query *self, Query *clause)
1656
+ {
1657
+ REF(clause);
1658
+ return spanoq_add_clause_nr(self, clause);
1267
1659
  }
1268
1660
 
1269
1661
  /*****************************************************************************
@@ -1272,163 +1664,188 @@ Query *spanoq_create(Query **clauses, int c_cnt)
1272
1664
  *
1273
1665
  *****************************************************************************/
1274
1666
 
1275
- char *spannq_to_s(Query *self, char *field)
1276
- {
1277
- SpanNearQuery *snq = (SpanNearQuery *)((SpanQuery *)self->data)->data;
1278
- char *res = estrdup("span_near(["), *q_str;
1279
- Query *clause;
1280
- int i;
1281
- for (i = 0; i < snq->c_cnt; i++) {
1282
- clause = snq->clauses[i];
1283
- q_str = clause->to_s(clause, field);
1284
- REALLOC_N(res, char, strlen(res) + strlen(q_str) + 10);
1285
- if (i > 0) strcat(res, ", ");
1286
- strcat(res, q_str);
1287
- free(q_str);
1288
- }
1289
- REALLOC_N(res, char, strlen(res) + 40);
1290
- sprintf(res + strlen(res), "], %d, %s)", snq->slop,
1291
- snq->in_order ? "Ordered" : "Unordered");
1667
+ static char *spannq_to_s(Query *self, const char *field)
1668
+ {
1669
+ int i;
1670
+ SpanNearQuery *snq = SpNQ(self);
1671
+ char *res, *res_p;
1672
+ char **q_strs = ALLOC_N(char *, snq->c_cnt);
1673
+ int len = 50;
1674
+ for (i = 0; i < snq->c_cnt; i++) {
1675
+ Query *clause = snq->clauses[i];
1676
+ q_strs[i] = clause->to_s(clause, field);
1677
+ len += strlen(q_strs[i]);
1678
+ }
1292
1679
 
1293
- return res;
1680
+ res_p = res = ALLOC_N(char, len);
1681
+ sprintf(res_p, "span_near[ ");
1682
+ res_p += strlen(res_p);
1683
+ for (i = 0; i < snq->c_cnt; i++) {
1684
+ sprintf(res_p, "%s, ", q_strs[i]);
1685
+ free(q_strs[i]);
1686
+ res_p += strlen(res_p);
1687
+ }
1688
+ free(q_strs);
1689
+
1690
+ sprintf(res_p - 2, " ]");
1691
+ return res;
1294
1692
  }
1295
1693
 
1296
- void spannq_extract_terms(Query *self, HashSet *terms)
1694
+ static void spannq_extract_terms(Query *self, HashSet *terms)
1297
1695
  {
1298
- SpanNearQuery *snq = (SpanNearQuery *)((SpanQuery *)self->data)->data;
1299
- Query *clause;
1300
- int i;
1301
- for (i = 0; i < snq->c_cnt; i++) {
1302
- clause = snq->clauses[i];
1303
- clause->extract_terms(clause, terms);
1304
- }
1696
+ SpanNearQuery *snq = SpNQ(self);
1697
+ int i;
1698
+ for (i = 0; i < snq->c_cnt; i++) {
1699
+ Query *clause = snq->clauses[i];
1700
+ clause->extract_terms(clause, terms);
1701
+ }
1305
1702
  }
1306
1703
 
1307
- HashSet *spannq_get_terms(Query *self)
1704
+ static HashSet *spannq_get_terms(Query *self)
1308
1705
  {
1309
- SpanNearQuery *snq = (SpanNearQuery *)((SpanQuery *)self->data)->data;
1310
- HashSet *terms = term_set_create();
1311
- Query *clause;
1312
- int i;
1313
- for (i = 0; i < snq->c_cnt; i++) {
1314
- clause = snq->clauses[i];
1315
- clause->extract_terms(clause, terms);
1316
- }
1706
+ SpanNearQuery *snq = SpNQ(self);
1707
+ HashSet *terms = hs_new_str(&free);
1708
+ int i;
1709
+ for (i = 0; i < snq->c_cnt; i++) {
1710
+ Query *clause = snq->clauses[i];
1711
+ HashSet *sub_terms = SpQ(clause)->get_terms(clause);
1712
+ hs_merge(terms, sub_terms);
1713
+ }
1317
1714
 
1318
- return terms;
1715
+ return terms;
1319
1716
  }
1320
1717
 
1321
- SpanEnum *spannq_get_spans(Query *self, IndexReader *ir)
1718
+ static SpanEnum *spannq_get_spans(Query *self, IndexReader *ir)
1322
1719
  {
1323
- SpanNearQuery *snq = (SpanNearQuery *)((SpanQuery *)self->data)->data;
1324
- Query *q;
1720
+ SpanNearQuery *snq = SpNQ(self);
1325
1721
 
1326
- if (snq->c_cnt == 1) {
1327
- q = snq->clauses[0];
1328
- return ((SpanQuery *)q->data)->get_spans(q, ir);
1329
- }
1722
+ if (snq->c_cnt == 1) {
1723
+ Query *q = snq->clauses[0];
1724
+ return SpQ(q)->get_spans(q, ir);
1725
+ }
1330
1726
 
1331
- return spanne_create(self, ir);
1727
+ return spanne_new(self, ir);
1332
1728
  }
1333
1729
 
1334
- Query *spannq_rewrite(Query *self, IndexReader *ir)
1730
+ static Query *spannq_rewrite(Query *self, IndexReader *ir)
1335
1731
  {
1336
- SpanNearQuery *snq = (SpanNearQuery *)((SpanQuery *)self->data)->data;
1337
- Query *clause, *rewritten;
1338
- int i;
1339
- for (i = 0; i < snq->c_cnt; i++) {
1340
- clause = snq->clauses[i];
1341
- rewritten = clause->rewrite(clause, ir);
1342
- if ((rewritten == clause) || self->destroy_all) q_deref(clause);
1343
- snq->clauses[i] = rewritten;
1344
- }
1732
+ SpanNearQuery *snq = SpNQ(self);
1733
+ int i;
1734
+ for (i = 0; i < snq->c_cnt; i++) {
1735
+ Query *clause = snq->clauses[i];
1736
+ Query *rewritten = clause->rewrite(clause, ir);
1737
+ q_deref(clause);
1738
+ snq->clauses[i] = rewritten;
1739
+ }
1345
1740
 
1346
- self->ref_cnt++;
1347
- return self;
1741
+ self->ref_cnt++;
1742
+ return self;
1348
1743
  }
1349
1744
 
1350
- void spannq_destroy(Query *self)
1745
+ static void spannq_destroy(Query *self)
1351
1746
  {
1352
- SpanQuery *sq = (SpanQuery *)self->data;
1353
- SpanNearQuery *snq = (SpanNearQuery *)sq->data;
1747
+ SpanNearQuery *snq = SpNQ(self);
1354
1748
 
1355
- if (self->destroy_all) {
1356
- Query *clause;
1357
1749
  int i;
1358
1750
  for (i = 0; i < snq->c_cnt; i++) {
1359
- clause = snq->clauses[i];
1360
- q_deref(clause);
1751
+ Query *clause = snq->clauses[i];
1752
+ q_deref(clause);
1361
1753
  }
1362
1754
  free(snq->clauses);
1363
- }
1364
1755
 
1365
- free(snq);
1366
- free(sq);
1367
- q_destroy_i(self);
1756
+ spanq_destroy_i(self);
1368
1757
  }
1369
1758
 
1370
- static uint spannq_hash(Query *self)
1759
+ static ulong spannq_hash(Query *self)
1371
1760
  {
1372
- int i;
1373
- uint hash = 0;
1374
- Query *q;
1375
- SpanNearQuery *snq = (SpanNearQuery *)((SpanQuery *)self->data)->data;
1761
+ int i;
1762
+ ulong hash = spanq_hash(self);
1763
+ SpanNearQuery *snq = SpNQ(self);
1376
1764
 
1377
- for (i = 0; i < snq->c_cnt; i++) {
1378
- q = snq->clauses[i];
1379
- hash ^= q->hash(q);
1380
- }
1381
- return ((hash ^ snq->slop) << 1) | snq->in_order;
1765
+ for (i = 0; i < snq->c_cnt; i++) {
1766
+ Query *q = snq->clauses[i];
1767
+ hash ^= q->hash(q);
1768
+ }
1769
+ return ((hash ^ snq->slop) << 1) | snq->in_order;
1382
1770
  }
1383
1771
 
1384
1772
  static int spannq_eq(Query *self, Query *o)
1385
1773
  {
1386
- int i;
1387
- Query *q1, *q2;
1388
- SpanNearQuery *snq1 = (SpanNearQuery *)((SpanQuery *)self->data)->data;
1389
- SpanNearQuery *snq2 = (SpanNearQuery *)((SpanQuery *)o->data)->data;
1390
- if (snq1->c_cnt != snq2->c_cnt ||
1391
- snq1->slop != snq2->slop ||
1392
- snq1->in_order != snq2->in_order) {
1393
- return false;
1394
- }
1774
+ int i;
1775
+ Query *q1, *q2;
1776
+ SpanNearQuery *snq1 = SpNQ(self);
1777
+ SpanNearQuery *snq2 = SpNQ(o);
1778
+ if (! spanq_eq(self, o)
1779
+ || (snq1->c_cnt != snq2->c_cnt)
1780
+ || (snq1->slop != snq2->slop)
1781
+ || (snq1->in_order != snq2->in_order)) {
1782
+ return false;
1783
+ }
1395
1784
 
1396
- for (i = 0; i < snq1->c_cnt; i++) {
1397
- q1 = snq1->clauses[i];
1398
- q2 = snq2->clauses[i];
1399
- if (!q1->eq(q1, q2)) return false;
1400
- }
1785
+ for (i = 0; i < snq1->c_cnt; i++) {
1786
+ q1 = snq1->clauses[i];
1787
+ q2 = snq2->clauses[i];
1788
+ if (!q1->eq(q1, q2)) {
1789
+ return false;
1790
+ }
1791
+ }
1401
1792
 
1402
- return true;
1793
+ return true;
1403
1794
  }
1404
1795
 
1405
- Query *spannq_create(Query **clauses, int c_cnt, int slop, bool in_order)
1796
+ Query *spannq_new(int slop, bool in_order)
1406
1797
  {
1407
- Query *self = q_create();
1798
+ Query *self = q_new(SpanNearQuery);
1408
1799
 
1409
- SpanQuery *sq = ALLOC(SpanQuery);
1800
+ SpNQ(self)->clauses = ALLOC_N(Query *, CLAUSE_INIT_CAPA);
1801
+ SpNQ(self)->c_capa = CLAUSE_INIT_CAPA;
1802
+ SpNQ(self)->slop = slop;
1803
+ SpNQ(self)->in_order = in_order;
1410
1804
 
1411
- SpanNearQuery *snq = ALLOC(SpanNearQuery);
1412
- snq->clauses = clauses;
1413
- snq->c_cnt = c_cnt;
1414
- snq->slop = slop;
1415
- snq->in_order = in_order;
1416
- sq->data = snq;
1805
+ SpQ(self)->get_spans = &spannq_get_spans;
1806
+ SpQ(self)->get_terms = &spannq_get_terms;
1807
+ SpQ(self)->field = (char *)EMPTY_STRING;
1417
1808
 
1418
- sq->get_spans = &spannq_get_spans;
1419
- sq->get_terms = &spannq_get_terms;
1420
- sq->field = ((SpanQuery *)clauses[0]->data)->field;
1421
- self->data = sq;
1809
+ self->type = SPAN_NEAR_QUERY;
1810
+ self->rewrite = &spannq_rewrite;
1811
+ self->extract_terms = &spannq_extract_terms;
1812
+ self->to_s = &spannq_to_s;
1813
+ self->hash = &spannq_hash;
1814
+ self->eq = &spannq_eq;
1815
+ self->destroy_i = &spannq_destroy;
1816
+ self->create_weight_i = &spanw_new;
1817
+ self->get_matchv_i = &spanq_get_matchv_i;
1422
1818
 
1423
- self->type = SPAN_NEAR_QUERY;
1424
- self->rewrite = &spannq_rewrite;
1425
- self->extract_terms = &spannq_extract_terms;
1426
- self->to_s = &spannq_to_s;
1427
- self->hash = &spannq_hash;
1428
- self->eq = &spannq_eq;
1429
- self->destroy_i = &spannq_destroy;
1430
- self->create_weight_i = &spanw_create;
1431
- return self;
1819
+ return self;
1820
+ }
1821
+
1822
+ Query *spannq_add_clause_nr(Query *self, Query *clause)
1823
+ {
1824
+ const int curr_index = SpNQ(self)->c_cnt++;
1825
+ if (clause->type < SPAN_TERM_QUERY || clause->type > SPAN_NEAR_QUERY) {
1826
+ RAISE(ARG_ERROR, "Tried to add a %s to a SpanNearQuery. This is not a "
1827
+ "SpanQuery.", q_get_query_name(clause->type));
1828
+ }
1829
+ if (curr_index == 0) {
1830
+ SpQ(self)->field = SpQ(clause)->field;
1831
+ }
1832
+ else if (strcmp(SpQ(self)->field, SpQ(clause)->field) != 0) {
1833
+ RAISE(ARG_ERROR, "All clauses in a SpanQuery must have the same field. "
1834
+ "Attempted to add a SpanQuery with field \"%s\" to SpanNearQuery "
1835
+ "with field \"%s\"", SpQ(clause)->field, SpQ(self)->field);
1836
+ }
1837
+ if (curr_index >= SpNQ(self)->c_capa) {
1838
+ SpNQ(self)->c_capa <<= 1;
1839
+ REALLOC_N(SpNQ(self)->clauses, Query *, SpNQ(self)->c_capa);
1840
+ }
1841
+ SpNQ(self)->clauses[curr_index] = clause;
1842
+ return clause;
1843
+ }
1844
+
1845
+ Query *spannq_add_clause(Query *self, Query *clause)
1846
+ {
1847
+ REF(clause);
1848
+ return spannq_add_clause_nr(self, clause);
1432
1849
  }
1433
1850
 
1434
1851
  /*****************************************************************************
@@ -1437,213 +1854,110 @@ Query *spannq_create(Query **clauses, int c_cnt, int slop, bool in_order)
1437
1854
  *
1438
1855
  *****************************************************************************/
1439
1856
 
1440
- char *spanxq_to_s(Query *self, char *field)
1857
+ static char *spanxq_to_s(Query *self, const char *field)
1441
1858
  {
1442
- SpanNotQuery *sxq = (SpanNotQuery *)((SpanQuery *)self->data)->data;
1443
- char *inc_s = sxq->inc->to_s(sxq->inc, field);
1444
- char *exc_s = sxq->exc->to_s(sxq->exc, field);
1445
- char *res = strfmt("span_not(inc:<%s>, exc:<%s>)", inc_s, exc_s);
1446
-
1447
- free(inc_s);
1448
- free(exc_s);
1449
- return res;
1859
+ SpanNotQuery *sxq = SpXQ(self);
1860
+ char *inc_s = sxq->inc->to_s(sxq->inc, field);
1861
+ char *exc_s = sxq->exc->to_s(sxq->exc, field);
1862
+ char *res = strfmt("span_not(inc:<%s>, exc:<%s>)", inc_s, exc_s);
1863
+
1864
+ free(inc_s);
1865
+ free(exc_s);
1866
+ return res;
1450
1867
  }
1451
1868
 
1452
- void spanxq_extract_terms(Query *self, HashSet *terms)
1869
+ static void spanxq_extract_terms(Query *self, HashSet *terms)
1453
1870
  {
1454
- SpanNotQuery *sxq = (SpanNotQuery *)((SpanQuery *)self->data)->data;
1455
- sxq->inc->extract_terms(sxq->inc, terms);
1871
+ SpXQ(self)->inc->extract_terms(SpXQ(self)->inc, terms);
1456
1872
  }
1457
1873
 
1458
- HashSet *spanxq_get_terms(Query *self)
1874
+ static HashSet *spanxq_get_terms(Query *self)
1459
1875
  {
1460
- SpanNotQuery *sxq = (SpanNotQuery *)((SpanQuery *)self->data)->data;
1461
- HashSet *terms = term_set_create();
1462
- sxq->inc->extract_terms(sxq->inc, terms);
1463
- return terms;
1876
+ return SpQ(SpXQ(self)->inc)->get_terms(SpXQ(self)->inc);
1464
1877
  }
1465
1878
 
1466
- Query *spanxq_rewrite(Query *self, IndexReader *ir)
1879
+ static Query *spanxq_rewrite(Query *self, IndexReader *ir)
1467
1880
  {
1468
- SpanNotQuery *sxq = (SpanNotQuery *)((SpanQuery *)self->data)->data;
1469
- Query *q, *rq;
1881
+ SpanNotQuery *sxq = SpXQ(self);
1882
+ Query *q, *rq;
1470
1883
 
1471
- /* rewrite inclusive query */
1472
- q = sxq->inc;
1473
- rq = q->rewrite(q, ir);
1474
- if (rq == q || self->destroy_all) q_deref(q);
1475
- sxq->inc = rq;
1884
+ /* rewrite inclusive query */
1885
+ q = sxq->inc;
1886
+ rq = q->rewrite(q, ir);
1887
+ q_deref(q);
1888
+ sxq->inc = rq;
1476
1889
 
1477
- /* rewrite exclusive query */
1478
- q = sxq->exc;
1479
- rq = q->rewrite(q, ir);
1480
- if (rq == q || self->destroy_all) q_deref(q);
1481
- sxq->exc = rq;
1890
+ /* rewrite exclusive query */
1891
+ q = sxq->exc;
1892
+ rq = q->rewrite(q, ir);
1893
+ q_deref(q);
1894
+ sxq->exc = rq;
1482
1895
 
1483
- self->ref_cnt++;
1484
- return self;
1896
+ self->ref_cnt++;
1897
+ return self;
1485
1898
  }
1486
1899
 
1487
- void spanxq_destroy(Query *self)
1900
+ static void spanxq_destroy(Query *self)
1488
1901
  {
1489
- SpanQuery *sq = (SpanQuery *)self->data;
1490
- SpanNotQuery *sxq = (SpanNotQuery *)sq->data;
1902
+ SpanNotQuery *sxq = SpXQ(self);
1491
1903
 
1492
- if (self->destroy_all) {
1493
1904
  q_deref(sxq->inc);
1494
1905
  q_deref(sxq->exc);
1495
- }
1496
1906
 
1497
- free(sxq);
1498
- free(sq);
1499
- q_destroy_i(self);
1907
+ spanq_destroy_i(self);
1500
1908
  }
1501
1909
 
1502
- static uint spanxq_hash(Query *self)
1910
+ static ulong spanxq_hash(Query *self)
1503
1911
  {
1504
- SpanNotQuery *sxq = (SpanNotQuery *)((SpanQuery *)self->data)->data;
1505
- return sxq->inc->hash(sxq->inc) ^ sxq->exc->hash(sxq->exc);
1912
+ SpanNotQuery *sxq = SpXQ(self);
1913
+ return spanq_hash(self) ^ sxq->inc->hash(sxq->inc)
1914
+ ^ sxq->exc->hash(sxq->exc);
1506
1915
  }
1507
1916
 
1508
1917
  static int spanxq_eq(Query *self, Query *o)
1509
1918
  {
1510
- SpanNotQuery *sxq1 = (SpanNotQuery *)((SpanQuery *)self->data)->data;
1511
- SpanNotQuery *sxq2 = (SpanNotQuery *)((SpanQuery *)o->data)->data;
1512
- return sxq1->inc->eq(sxq1->inc, sxq2->inc) &&
1513
- sxq1->exc->eq(sxq1->exc, sxq2->exc);
1514
- }
1515
-
1516
-
1517
- Query *spanxq_create(Query *inc, Query *exc)
1518
- {
1519
- Query *self = q_create();
1520
-
1521
- SpanQuery *sq = ALLOC(SpanQuery);
1522
-
1523
- SpanNotQuery *sxq = ALLOC(SpanNotQuery);
1524
- sxq->inc = inc;
1525
- sxq->exc = exc;
1526
- sq->data = sxq;
1527
-
1528
- sq->get_spans = &spanxe_create;
1529
- sq->get_terms = &spanxq_get_terms;
1530
- sq->field = ((SpanQuery *)inc->data)->field;
1531
- self->data = sq;
1532
-
1533
- self->type = SPAN_NOT_QUERY;
1534
- self->rewrite = &spanxq_rewrite;
1535
- self->extract_terms = &spanxq_extract_terms;
1536
- self->to_s = &spanxq_to_s;
1537
- self->hash = &spanxq_hash;
1538
- self->eq = &spanxq_eq;
1539
- self->destroy_i = &spanxq_destroy;
1540
- self->create_weight_i = &spanw_create;
1541
-
1542
- return self;
1543
- }
1544
-
1545
- /***************************************************************************
1546
- *
1547
- * SpanScorer
1548
- *
1549
- ***************************************************************************/
1550
-
1551
- float spansc_score(Scorer *self)
1552
- {
1553
- SpanScorer *spansc = (SpanScorer *)self->data;
1554
- float raw = sim_tf(spansc->sim, spansc->freq) * spansc->value;
1555
-
1556
- /* normalize */
1557
- return raw * sim_decode_norm(self->similarity, spansc->norms[self->doc]);
1919
+ SpanNotQuery *sxq1 = SpXQ(self);
1920
+ SpanNotQuery *sxq2 = SpXQ(o);
1921
+ return spanq_eq(self, o) && sxq1->inc->eq(sxq1->inc, sxq2->inc)
1922
+ && sxq1->exc->eq(sxq1->exc, sxq2->exc);
1558
1923
  }
1559
1924
 
1560
- bool spansc_next(Scorer *self)
1561
- {
1562
- SpanScorer *spansc = (SpanScorer *)self->data;
1563
- SpanEnum *se = spansc->spans;
1564
- int match_length;
1565
-
1566
- if (spansc->first_time) {
1567
- spansc->more = se->next(se);
1568
- spansc->first_time = false;
1569
- }
1570
-
1571
- if (!spansc->more) return false;
1572
-
1573
- spansc->freq = 0.0;
1574
- self->doc = se->doc(se);
1575
-
1576
- while (spansc->more && (self->doc == se->doc(se))) {
1577
- match_length = se->end(se) - se->start(se);
1578
- spansc->freq += sim_sloppy_freq(spansc->sim, match_length);
1579
- spansc->more = se->next(se);
1580
- }
1581
-
1582
- return (spansc->more || (spansc->freq != 0.0));
1583
- }
1584
1925
 
1585
- bool spansc_skip_to(Scorer *self, int target)
1926
+ Query *spanxq_new_nr(Query *inc, Query *exc)
1586
1927
  {
1587
- SpanScorer *spansc = (SpanScorer *)self->data;
1588
- SpanEnum *se = spansc->spans;
1589
-
1590
- spansc->more = se->skip_to(se, target);
1928
+ Query *self;
1929
+ if (strcmp(SpQ(inc)->field, SpQ(inc)->field) != 0) {
1930
+ RAISE(ARG_ERROR, "All clauses in a SpanQuery must have the same field. "
1931
+ "Attempted to add a SpanQuery with field \"%s\" along with a "
1932
+ "SpanQuery with field \"%s\" to an SpanNotQuery",
1933
+ SpQ(inc)->field, SpQ(exc)->field);
1934
+ }
1935
+ self = q_new(SpanNotQuery);
1591
1936
 
1592
- if (!spansc->more) return false;
1937
+ SpXQ(self)->inc = inc;
1938
+ SpXQ(self)->exc = exc;
1593
1939
 
1594
- spansc->freq = 0.0;
1595
- self->doc = se->doc(se);
1940
+ SpQ(self)->field = SpQ(inc)->field;
1941
+ SpQ(self)->get_spans = &spanxe_new;
1942
+ SpQ(self)->get_terms = &spanxq_get_terms;
1596
1943
 
1597
- while (spansc->more && (se->doc(se) == target)) {
1598
- spansc->freq += sim_sloppy_freq(spansc->sim, se->end(se) - se->start(se));
1599
- spansc->more = se->next(se);
1600
- }
1944
+ self->type = SPAN_NOT_QUERY;
1945
+ self->rewrite = &spanxq_rewrite;
1946
+ self->extract_terms = &spanxq_extract_terms;
1947
+ self->to_s = &spanxq_to_s;
1948
+ self->hash = &spanxq_hash;
1949
+ self->eq = &spanxq_eq;
1950
+ self->destroy_i = &spanxq_destroy;
1951
+ self->create_weight_i = &spanw_new;
1952
+ self->get_matchv_i = &spanq_get_matchv_i;
1601
1953
 
1602
- return (spansc->more || (spansc->freq != 0.0));
1954
+ return self;
1603
1955
  }
1604
1956
 
1605
- Explanation *spansc_explain(Scorer *self, int target)
1957
+ Query *spanxq_new(Query *inc, Query *exc)
1606
1958
  {
1607
- Explanation *tf_explanation;
1608
- SpanScorer *spansc = (SpanScorer *)self->data;
1609
- float phrase_freq;
1610
- self->skip_to(self, target);
1611
- phrase_freq = (self->doc == target) ? spansc->freq : (float)0.0;
1612
-
1613
- tf_explanation = expl_create(sim_tf(self->similarity, phrase_freq),
1614
- strfmt("tf(phrase_freq(%f)", phrase_freq));
1615
-
1616
- return tf_explanation;
1959
+ REF(inc);
1960
+ REF(exc);
1961
+ return spanxq_new_nr(inc, exc);
1617
1962
  }
1618
1963
 
1619
- void spansc_destroy(Scorer *self)
1620
- {
1621
- SpanScorer *spansc = (SpanScorer *)self->data;
1622
- if (spansc->spans) spansc->spans->destroy(spansc->spans);
1623
- scorer_destroy_i(self);
1624
- }
1625
-
1626
- Scorer *spansc_create(Weight *weight, IndexReader *ir)
1627
- {
1628
- Scorer *self = scorer_create(weight->similarity);
1629
- SpanScorer *spansc = ALLOC(SpanScorer);
1630
- SpanQuery *spanq = (SpanQuery *)weight->query->data;
1631
- ZEROSET(spansc, SpanScorer, 1);
1632
- spansc->first_time = true;
1633
- spansc->more = true;
1634
- spansc->spans = spanq->get_spans(weight->query, ir);
1635
- spansc->sim = weight->similarity;
1636
- spansc->norms = ir->get_norms(ir, spanq->field);
1637
- spansc->weight = weight;
1638
- spansc->value = weight->value;
1639
- spansc->freq = 0.0;
1640
-
1641
- self->data = spansc;
1642
-
1643
- self->score = &spansc_score;
1644
- self->next = &spansc_next;
1645
- self->skip_to = &spansc_skip_to;
1646
- self->explain = &spansc_explain;
1647
- self->destroy = &spansc_destroy;
1648
- return self;
1649
- }