ferret 0.9.1 → 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (105) hide show
  1. data/README +6 -5
  2. data/Rakefile +34 -13
  3. data/TODO +1 -0
  4. data/TUTORIAL +1 -1
  5. data/ext/analysis.c +87 -70
  6. data/ext/analysis.h +18 -6
  7. data/ext/array.c +1 -2
  8. data/ext/array.h +1 -1
  9. data/ext/bitvector.c +10 -6
  10. data/ext/bitvector.h +2 -2
  11. data/ext/compound_io.c +30 -27
  12. data/ext/document.c +15 -15
  13. data/ext/document.h +5 -5
  14. data/ext/except.c +2 -0
  15. data/ext/except.h +25 -23
  16. data/ext/extconf.rb +1 -0
  17. data/ext/ferret.c +10 -8
  18. data/ext/ferret.h +9 -8
  19. data/ext/field.c +29 -25
  20. data/ext/filter.c +52 -14
  21. data/ext/frtio.h +13 -0
  22. data/ext/fs_store.c +115 -170
  23. data/ext/global.c +9 -8
  24. data/ext/global.h +17 -13
  25. data/ext/hash.c +13 -19
  26. data/ext/hash.h +11 -11
  27. data/ext/hashset.c +5 -7
  28. data/ext/hashset.h +9 -8
  29. data/ext/helper.c +1 -1
  30. data/ext/helper.h +2 -1
  31. data/ext/inc/except.h +25 -23
  32. data/ext/inc/lang.h +11 -1
  33. data/ext/ind.c +33 -21
  34. data/ext/index.h +44 -39
  35. data/ext/index_io.c +61 -57
  36. data/ext/index_rw.c +418 -361
  37. data/ext/lang.c +10 -0
  38. data/ext/lang.h +11 -1
  39. data/ext/nix_io.c +135 -0
  40. data/ext/priorityqueue.c +16 -16
  41. data/ext/priorityqueue.h +9 -6
  42. data/ext/q_boolean.c +128 -76
  43. data/ext/q_const_score.c +20 -20
  44. data/ext/q_filtered_query.c +20 -20
  45. data/ext/q_fuzzy.c +37 -23
  46. data/ext/q_match_all.c +15 -19
  47. data/ext/q_multi_phrase.c +87 -46
  48. data/ext/q_parser.c +247 -119
  49. data/ext/q_phrase.c +86 -52
  50. data/ext/q_prefix.c +25 -14
  51. data/ext/q_range.c +59 -14
  52. data/ext/q_span.c +263 -172
  53. data/ext/q_term.c +62 -51
  54. data/ext/q_wildcard.c +24 -13
  55. data/ext/r_analysis.c +328 -80
  56. data/ext/r_doc.c +11 -6
  57. data/ext/r_index_io.c +40 -32
  58. data/ext/r_qparser.c +15 -14
  59. data/ext/r_search.c +270 -152
  60. data/ext/r_store.c +32 -17
  61. data/ext/ram_store.c +38 -22
  62. data/ext/search.c +617 -87
  63. data/ext/search.h +227 -163
  64. data/ext/similarity.c +54 -45
  65. data/ext/similarity.h +3 -3
  66. data/ext/sort.c +132 -53
  67. data/ext/store.c +21 -2
  68. data/ext/store.h +14 -14
  69. data/ext/tags +4322 -232
  70. data/ext/term.c +140 -109
  71. data/ext/termdocs.c +74 -60
  72. data/ext/vector.c +181 -152
  73. data/ext/w32_io.c +150 -0
  74. data/lib/ferret.rb +1 -1
  75. data/lib/ferret/analysis/standard_tokenizer.rb +4 -3
  76. data/lib/ferret/document/field.rb +1 -1
  77. data/lib/ferret/index/field_infos.rb +1 -1
  78. data/lib/ferret/index/term.rb +1 -1
  79. data/lib/ferret/query_parser/query_parser.tab.rb +8 -24
  80. data/lib/ferret/search.rb +1 -0
  81. data/lib/ferret/search/boolean_query.rb +0 -4
  82. data/lib/ferret/search/index_searcher.rb +21 -8
  83. data/lib/ferret/search/multi_phrase_query.rb +7 -0
  84. data/lib/ferret/search/multi_searcher.rb +261 -0
  85. data/lib/ferret/search/phrase_query.rb +1 -1
  86. data/lib/ferret/search/query.rb +34 -5
  87. data/lib/ferret/search/sort.rb +7 -3
  88. data/lib/ferret/search/sort_field.rb +8 -4
  89. data/lib/ferret/store/fs_store.rb +13 -6
  90. data/lib/ferret/store/index_io.rb +0 -14
  91. data/lib/ferret/store/ram_store.rb +3 -2
  92. data/lib/rferret.rb +1 -1
  93. data/test/unit/analysis/ctc_analyzer.rb +131 -0
  94. data/test/unit/analysis/ctc_tokenstream.rb +98 -9
  95. data/test/unit/index/tc_index.rb +40 -1
  96. data/test/unit/index/tc_term.rb +7 -0
  97. data/test/unit/index/th_doc.rb +8 -0
  98. data/test/unit/query_parser/tc_query_parser.rb +6 -4
  99. data/test/unit/search/rtc_sort_field.rb +6 -6
  100. data/test/unit/search/tc_index_searcher.rb +8 -0
  101. data/test/unit/search/tc_multi_searcher.rb +275 -0
  102. data/test/unit/search/tc_multi_searcher2.rb +126 -0
  103. data/test/unit/search/tc_search_and_sort.rb +66 -0
  104. metadata +31 -26
  105. data/test/unit/query_parser/rtc_query_parser.rb +0 -138
data/ext/q_span.c CHANGED
@@ -1,7 +1,6 @@
1
1
  #include <string.h>
2
2
  #include "search.h"
3
3
 
4
-
5
4
  /*****************************************************************************
6
5
  *
7
6
  * NearSpanEnum
@@ -16,11 +15,24 @@
16
15
 
17
16
  Explanation *spanw_explain(Weight *self, IndexReader *ir, int target)
18
17
  {
18
+ Explanation *expl;
19
+ Explanation *idf_expl1;
20
+ Explanation *idf_expl2;
21
+ Explanation *query_expl;
22
+ Explanation *qnorm_expl;
23
+ Explanation *field_expl;
24
+ Explanation *tf_expl;
25
+ Scorer *scorer;
26
+ uchar *field_norms;
27
+ float field_norm;
28
+ Explanation *field_norm_expl;
29
+
19
30
  char *query_str = self->query->to_s(self->query, "");
20
- Array *terms = (Array *)self->data;
31
+ HashSet *terms = (HashSet *)self->data;
21
32
  char *field = ((SpanQuery *)self->query->data)->field;
22
33
  char *doc_freqs = NULL;
23
- int df_i = 0, i;
34
+ size_t df_i = 0;
35
+ int i;
24
36
  Term *t;
25
37
 
26
38
 
@@ -38,19 +50,21 @@ Explanation *spanw_explain(Weight *self, IndexReader *ir, int target)
38
50
  doc_freqs = "";
39
51
  }
40
52
 
41
- Explanation *expl = expl_create(0.0,
53
+ expl = expl_create(0.0,
42
54
  strfmt("weight(%s in %d), product of:", query_str, target));
43
55
 
44
56
  /* We need two of these as it's included in both the query explanation
45
57
  * and the field explanation */
46
- Explanation *idf_expl1 = expl_create(self->idf,
58
+ idf_expl1 = expl_create(self->idf,
47
59
  strfmt("idf(%s: %s)", field, doc_freqs));
48
- Explanation *idf_expl2 = expl_create(self->idf,
60
+ idf_expl2 = expl_create(self->idf,
49
61
  strfmt("idf(%s: %s)", field, doc_freqs));
50
- if (terms->size > 0) free(doc_freqs); /* only free if allocated */
62
+ if (terms->size > 0) {
63
+ free(doc_freqs); /* only free if allocated */
64
+ }
51
65
 
52
66
  /* explain query weight */
53
- Explanation *query_expl = expl_create(0.0,
67
+ query_expl = expl_create(0.0,
54
68
  strfmt("query_weight(%s), product of:", query_str));
55
69
 
56
70
  if (self->query->boost != 1.0) {
@@ -59,7 +73,7 @@ Explanation *spanw_explain(Weight *self, IndexReader *ir, int target)
59
73
 
60
74
  expl_add_detail(query_expl, idf_expl1);
61
75
 
62
- Explanation *qnorm_expl = expl_create(self->qnorm, estrdup("query_norm"));
76
+ qnorm_expl = expl_create(self->qnorm, estrdup("query_norm"));
63
77
  expl_add_detail(query_expl, qnorm_expl);
64
78
 
65
79
  query_expl->value = self->query->boost * idf_expl1->value * qnorm_expl->value;
@@ -67,19 +81,21 @@ Explanation *spanw_explain(Weight *self, IndexReader *ir, int target)
67
81
  expl_add_detail(expl, query_expl);
68
82
 
69
83
  /* explain field weight */
70
- Explanation *field_expl = expl_create(0.0,
84
+ field_expl = expl_create(0.0,
71
85
  strfmt("field_weight(%s:%s in %d), product of:", field, query_str, target));
72
86
  free(query_str);
73
87
 
74
- Scorer *scorer = self->scorer(self, ir);
75
- Explanation *tf_expl = scorer->explain(scorer, target);
88
+ scorer = self->scorer(self, ir);
89
+ tf_expl = scorer->explain(scorer, target);
76
90
  scorer->destroy(scorer);
77
91
  expl_add_detail(field_expl, tf_expl);
78
92
  expl_add_detail(field_expl, idf_expl2);
79
93
 
80
- uchar *field_norms = ir->get_norms(ir, field);
81
- float field_norm = (field_norms ? sim_decode_norm(self->similarity, field_norms[target]) : 0.0);
82
- Explanation *field_norm_expl = expl_create(field_norm,
94
+ field_norms = ir->get_norms(ir, field);
95
+ field_norm = (field_norms
96
+ ? sim_decode_norm(self->similarity, field_norms[target])
97
+ : (float)0.0);
98
+ field_norm_expl = expl_create(field_norm,
83
99
  strfmt("field_norm(field=%s, doc=%d)", field, target));
84
100
  expl_add_detail(field_expl, field_norm_expl);
85
101
 
@@ -101,22 +117,19 @@ char *spanw_to_s(Weight *self)
101
117
  return strfmt("SpanWeight(%f)", self->value);
102
118
  }
103
119
 
104
- void spanw_destroy(void *p)
120
+ void spanw_destroy(Weight *self)
105
121
  {
106
- Weight *self = (Weight *)p;
107
- ary_destroy(self->data);
108
- free(p);
122
+ hs_destroy_all(self->data);
123
+ w_destroy(self);
109
124
  }
110
125
 
111
126
  Weight *spanw_create(Query *query, Searcher *searcher)
112
127
  {
113
- Weight *self = ALLOC(Weight);
128
+ Weight *self = w_create(query);
114
129
  SpanQuery *spanq = (SpanQuery *)query->data;
115
- Array *terms = spanq->get_terms(query);
116
- ZEROSET(self, Weight, 1);
117
- self->get_query = &w_get_query;
118
- self->get_value = &w_get_value;
119
- self->normalize = &w_normalize;
130
+ HashSet *terms = spanq->get_terms(query);
131
+
132
+ self->data = terms;
120
133
  self->scorer = &spansc_create;
121
134
  self->explain = &spanw_explain;
122
135
  self->to_s = &spanw_to_s;
@@ -125,10 +138,8 @@ Weight *spanw_create(Query *query, Searcher *searcher)
125
138
 
126
139
  self->similarity = query->get_similarity(query, searcher);
127
140
 
128
- self->idf = sim_idf_phrase(self->similarity, (Term **)terms->elems, terms->size, searcher);
129
- self->query = query;
130
- self->value = 0.0;
131
- self->data = terms;
141
+ self->idf = sim_idf_phrase(self->similarity, (Term **)terms->elems,
142
+ terms->size, searcher);
132
143
 
133
144
  return self;
134
145
  }
@@ -204,7 +215,8 @@ char *spante_to_s(SpanEnum *self)
204
215
  char *field = ((SpanQuery *)self->query->data)->field;
205
216
  char *query_str = self->query->to_s(self->query, field);
206
217
  char pos_str[20];
207
- int len = strlen(query_str), pos;
218
+ size_t len = strlen(query_str);
219
+ int pos;
208
220
  char *str = ALLOC_N(char, len + 40);
209
221
 
210
222
  if (self->doc(self) < 0) {
@@ -222,9 +234,8 @@ char *spante_to_s(SpanEnum *self)
222
234
  return str;
223
235
  }
224
236
 
225
- void spante_destroy(void *p)
237
+ void spante_destroy(SpanEnum *self)
226
238
  {
227
- SpanEnum *self = (SpanEnum *)p;
228
239
  SpanTermEnum *ste = (SpanTermEnum *)self->data;
229
240
  TermDocEnum *tde = ste->positions;
230
241
  tde->close(tde);
@@ -315,9 +326,8 @@ char *spanfe_to_s(SpanEnum *self)
315
326
  return res;
316
327
  }
317
328
 
318
- void spanfe_destroy(void *p)
329
+ void spanfe_destroy(SpanEnum *self)
319
330
  {
320
- SpanEnum *self = (SpanEnum *)p;
321
331
  SpanEnum *se = (SpanEnum *)self->data;
322
332
  se->destroy(se);
323
333
  free(self);
@@ -449,7 +459,7 @@ char *spanoe_to_s(SpanEnum *self)
449
459
  char *field = ((SpanQuery *)self->query->data)->field;
450
460
  char *query_str = self->query->to_s(self->query, field);
451
461
  char doc_str[62];
452
- int len = strlen(query_str);
462
+ size_t len = strlen(query_str);
453
463
  char *str = ALLOC_N(char, len + 80);
454
464
 
455
465
  if (soe->first_time) {
@@ -467,9 +477,9 @@ char *spanoe_to_s(SpanEnum *self)
467
477
  return str;
468
478
  }
469
479
 
470
- void spanoe_destroy(void *p)
480
+ void spanoe_destroy(SpanEnum *self)
471
481
  {
472
- SpanEnum *self = (SpanEnum *)p, *se;
482
+ SpanEnum *se;
473
483
  SpanOrEnum *soe = (SpanOrEnum *)self->data;
474
484
  int i;
475
485
  pq_destroy(soe->queue);
@@ -730,7 +740,7 @@ char *spanne_to_s(SpanEnum *self)
730
740
  char *field = ((SpanQuery *)self->query->data)->field;
731
741
  char *query_str = self->query->to_s(self->query, field);
732
742
  char doc_str[62];
733
- int len = strlen(query_str);
743
+ size_t len = strlen(query_str);
734
744
  char *str = ALLOC_N(char, len + 80);
735
745
 
736
746
  if (sne->first_time) {
@@ -744,9 +754,9 @@ char *spanne_to_s(SpanEnum *self)
744
754
  return str;
745
755
  }
746
756
 
747
- void spanne_destroy(void *p)
757
+ void spanne_destroy(SpanEnum *self)
748
758
  {
749
- SpanEnum *self = (SpanEnum *)p, *se;
759
+ SpanEnum *se;
750
760
  SpanNearEnum *sne = (SpanNearEnum *)self->data;
751
761
  int i;
752
762
  for (i = 0; i < sne->s_cnt; i++) {
@@ -886,9 +896,8 @@ char *spanxe_to_s(SpanEnum *self)
886
896
  return res;
887
897
  }
888
898
 
889
- void spanxe_destroy(void *p)
899
+ void spanxe_destroy(SpanEnum *self)
890
900
  {
891
- SpanEnum *self = (SpanEnum *)p;
892
901
  SpanNotEnum *sxe = (SpanNotEnum *)self->data;
893
902
  sxe->inc->destroy(sxe->inc);
894
903
  sxe->exc->destroy(sxe->exc);
@@ -924,12 +933,11 @@ SpanEnum *spanxe_create(Query *query, IndexReader *ir)
924
933
  *
925
934
  *****************************************************************************/
926
935
 
927
- void spanq_destroy(void *p)
936
+ void spanq_destroy(Query *self)
928
937
  {
929
- Query *self = (Query *)p;
930
938
  SpanQuery *sq = (SpanQuery *)self->data;
931
939
  free(sq);
932
- q_destroy(self);
940
+ q_destroy_i(self);
933
941
  }
934
942
 
935
943
  /*****************************************************************************
@@ -952,48 +960,60 @@ char *spantq_to_s(Query *self, char *field)
952
960
  return res;
953
961
  }
954
962
 
955
- void spantq_destroy(void *p)
963
+ static void spantq_destroy(Query *self)
956
964
  {
957
- Query *self = (Query *)p;
958
965
  SpanQuery *sq = (SpanQuery *)self->data;
959
966
  if (self->destroy_all) {
960
967
  Term *term = (Term *)sq->data;
961
968
  term_destroy(term);
962
969
  }
963
970
  free(sq);
964
- q_destroy(self);
971
+ q_destroy_i(self);
965
972
  }
966
973
 
967
- void spantq_extract_terms(Query *self, Array *terms)
974
+ static void spantq_extract_terms(Query *self, HashSet *terms)
968
975
  {
969
976
  Term *term = (Term *)((SpanQuery *)self->data)->data;
970
- ary_append(terms, term);
977
+ hs_add(terms, term_clone(term));
971
978
  }
972
979
 
973
- Array *spantq_get_terms(Query *self)
980
+ static HashSet *spantq_get_terms(Query *self)
974
981
  {
975
982
  Term *term = (Term *)((SpanQuery *)self->data)->data;
976
- Array *terms = ary_create(1, &term_destroy);
977
- ary_append(terms, term_clone(term));
983
+ HashSet *terms = term_set_create();
984
+ hs_add(terms, term_clone(term));
978
985
  return terms;
979
986
  }
980
987
 
988
+ static uint spantq_hash(Query *self)
989
+ {
990
+ return term_hash((Term *)((SpanQuery *)self->data)->data);
991
+ }
992
+
993
+ static int spantq_eq(Query *self, Query *o)
994
+ {
995
+ return term_eq((Term *)((SpanQuery *)self->data)->data,
996
+ (Term *)((SpanQuery *)o->data)->data);
997
+ }
998
+
981
999
  Query *spantq_create(Term *term)
982
1000
  {
983
1001
  Query *self = q_create();
1002
+
984
1003
  SpanQuery *sq = ALLOC(SpanQuery);
985
1004
  sq->data = term;
986
-
987
1005
  sq->get_spans = &spante_create;
988
1006
  sq->get_terms = &spantq_get_terms;
989
1007
  sq->field = term->field;
1008
+ self->data = sq;
990
1009
 
991
1010
  self->type = SPAN_TERM_QUERY;
992
- self->data = sq;
993
- self->create_weight = &spanw_create;
994
1011
  self->extract_terms = &spantq_extract_terms;
995
1012
  self->to_s = &spantq_to_s;
996
- self->destroy = &spantq_destroy;
1013
+ self->hash = &spantq_hash;
1014
+ self->eq = &spantq_eq;
1015
+ self->destroy_i = &spantq_destroy;
1016
+ self->create_weight_i = &spanw_create;
997
1017
  return self;
998
1018
  }
999
1019
 
@@ -1013,13 +1033,13 @@ char *spanfq_to_s(Query *self, char *field)
1013
1033
  return res;
1014
1034
  }
1015
1035
 
1016
- void spanfq_extract_terms(Query *self, Array *terms)
1036
+ void spanfq_extract_terms(Query *self, HashSet *terms)
1017
1037
  {
1018
1038
  SpanFirstQuery *sfq = (SpanFirstQuery *)((SpanQuery *)self->data)->data;
1019
1039
  sfq->match->extract_terms(sfq->match, terms);
1020
1040
  }
1021
1041
 
1022
- Array *spanfq_get_terms(Query *self)
1042
+ HashSet *spanfq_get_terms(Query *self)
1023
1043
  {
1024
1044
  SpanFirstQuery *sfq = (SpanFirstQuery *)((SpanQuery *)self->data)->data;
1025
1045
  SpanQuery *match_sq = (SpanQuery *)sfq->match->data;
@@ -1029,36 +1049,46 @@ Array *spanfq_get_terms(Query *self)
1029
1049
  Query *spanfq_rewrite(Query *self, IndexReader *ir)
1030
1050
  {
1031
1051
  SpanFirstQuery *sfq = (SpanFirstQuery *)((SpanQuery *)self->data)->data;
1032
- Query *clone = NULL;
1033
- Query *rewritten = sfq->match->rewrite(sfq->match, ir);
1034
- if (rewritten != sfq->match) {
1035
- clone = spanfq_create(rewritten, sfq->end);
1036
- sfq->match->rewritten = NULL; /* it will get destroyed with the clone */
1037
- }
1052
+ Query *q, *rq;
1038
1053
 
1039
- if (clone != NULL) {
1040
- if (self->rewritten) self->rewritten->destroy(self->rewritten);
1041
- return self->rewritten = clone; /* some clauses rewrote */
1042
- } else {
1043
- return self; /* no clauses rewrote */
1044
- }
1054
+ q = sfq->match;
1055
+ rq = q->rewrite(q, ir);
1056
+ if (rq == q || self->destroy_all) q_deref(q);
1057
+ sfq->match = rq;
1058
+
1059
+ self->ref_cnt++;
1060
+ return self; /* no clauses rewrote */
1045
1061
  }
1046
1062
 
1047
- void spanfq_destroy(void *p)
1063
+ void spanfq_destroy(Query *self)
1048
1064
  {
1049
- Query *self = (Query *)p;
1050
1065
  SpanQuery *sq = (SpanQuery *)self->data;
1051
1066
  SpanFirstQuery *sfq = (SpanFirstQuery *)sq->data;
1052
- if (self->destroy_all) sfq->match->destroy(sfq->match);
1067
+ if (self->destroy_all) q_deref(sfq->match);
1053
1068
  free(sfq);
1054
1069
  free(sq);
1055
- q_destroy(self);
1070
+ q_destroy_i(self);
1071
+ }
1072
+
1073
+ static uint spanfq_hash(Query *self)
1074
+ {
1075
+ SpanFirstQuery *sfq = (SpanFirstQuery *)((SpanQuery *)self->data)->data;
1076
+ return sfq->match->hash(sfq->match) ^ sfq->end;
1077
+ }
1078
+
1079
+ static int spanfq_eq(Query *self, Query *o)
1080
+ {
1081
+ SpanFirstQuery *sfq1 = (SpanFirstQuery *)((SpanQuery *)self->data)->data;
1082
+ SpanFirstQuery *sfq2 = (SpanFirstQuery *)((SpanQuery *)o->data)->data;
1083
+ return sfq1->match->eq(sfq1->match, sfq2->match) && (sfq1->end == sfq2->end);
1056
1084
  }
1057
1085
 
1058
1086
  Query *spanfq_create(Query *match, int end)
1059
1087
  {
1060
1088
  Query *self = q_create();
1089
+
1061
1090
  SpanQuery *sq = ALLOC(SpanQuery);
1091
+
1062
1092
  SpanFirstQuery *sfq = ALLOC(SpanFirstQuery);
1063
1093
  sfq->match = match;
1064
1094
  sfq->end = end;
@@ -1067,14 +1097,16 @@ Query *spanfq_create(Query *match, int end)
1067
1097
  sq->get_spans = &spanfe_create;
1068
1098
  sq->get_terms = &spanfq_get_terms;
1069
1099
  sq->field = ((SpanQuery *)match->data)->field;
1100
+ self->data = sq;
1070
1101
 
1071
1102
  self->type = SPAN_FIRST_QUERY;
1072
- self->data = sq;
1073
- self->create_weight = &spanw_create;
1074
- self->extract_terms = &spanfq_extract_terms;
1075
1103
  self->rewrite = &spanfq_rewrite;
1104
+ self->extract_terms = &spanfq_extract_terms;
1076
1105
  self->to_s = &spanfq_to_s;
1077
- self->destroy = &spanfq_destroy;
1106
+ self->hash = &spanfq_hash;
1107
+ self->eq = &spanfq_eq;
1108
+ self->destroy_i = &spanfq_destroy;
1109
+ self->create_weight_i = &spanw_create;
1078
1110
  return self;
1079
1111
  }
1080
1112
 
@@ -1103,7 +1135,7 @@ char *spanoq_to_s(Query *self, char *field)
1103
1135
  return res;
1104
1136
  }
1105
1137
 
1106
- void spanoq_extract_terms(Query *self, Array *terms)
1138
+ void spanoq_extract_terms(Query *self, HashSet *terms)
1107
1139
  {
1108
1140
  SpanOrQuery *soq = (SpanOrQuery *)((SpanQuery *)self->data)->data;
1109
1141
  Query *clause;
@@ -1114,10 +1146,10 @@ void spanoq_extract_terms(Query *self, Array *terms)
1114
1146
  }
1115
1147
  }
1116
1148
 
1117
- Array *spanoq_get_terms(Query *self)
1149
+ HashSet *spanoq_get_terms(Query *self)
1118
1150
  {
1119
1151
  SpanOrQuery *soq = (SpanOrQuery *)((SpanQuery *)self->data)->data;
1120
- Array *terms = ary_create(soq->c_cnt, NULL);
1152
+ HashSet *terms = term_set_create();
1121
1153
  Query *clause;
1122
1154
  int i;
1123
1155
  for (i = 0; i < soq->c_cnt; i++) {
@@ -1143,34 +1175,22 @@ SpanEnum *spanoq_get_spans(Query *self, IndexReader *ir)
1143
1175
  Query *spanoq_rewrite(Query *self, IndexReader *ir)
1144
1176
  {
1145
1177
  SpanOrQuery *soq = (SpanOrQuery *)((SpanQuery *)self->data)->data;
1146
- Query *clone = NULL;
1147
-
1148
1178
  Query *clause, *rewritten;
1149
- Query **new_clauses = ALLOC_N(Query *, soq->c_cnt);
1150
1179
  int i;
1180
+ /* replace clauses with their rewritten queries */
1151
1181
  for (i = 0; i < soq->c_cnt; i++) {
1152
1182
  clause = soq->clauses[i];
1153
1183
  rewritten = clause->rewrite(clause, ir);
1154
- if ((clause != rewritten) && (clone == NULL)) {
1155
- clone = spanoq_create(new_clauses, soq->c_cnt);
1156
- /* The sub-clauses will be handled by the original query */
1157
- clone->destroy_all = false;
1158
- }
1159
- new_clauses[i] = rewritten;
1184
+ if ((rewritten == clause) || self->destroy_all) q_deref(clause);
1185
+ soq->clauses[i] = rewritten;
1160
1186
  }
1161
1187
 
1162
- if (clone != NULL) {
1163
- if (self->rewritten) self->rewritten->destroy(self->rewritten);
1164
- return self->rewritten = clone; /* some clauses rewrote */
1165
- } else {
1166
- free(new_clauses); /* no clauses rewrote */
1167
- return self;
1168
- }
1188
+ self->ref_cnt++;
1189
+ return self;
1169
1190
  }
1170
1191
 
1171
- void spanoq_destroy(void *p)
1192
+ void spanoq_destroy(Query *self)
1172
1193
  {
1173
- Query *self = (Query *)p;
1174
1194
  SpanQuery *sq = (SpanQuery *)self->data;
1175
1195
  SpanOrQuery *soq = (SpanOrQuery *)sq->data;
1176
1196
 
@@ -1179,7 +1199,7 @@ void spanoq_destroy(void *p)
1179
1199
  int i;
1180
1200
  for (i = 0; i < soq->c_cnt; i++) {
1181
1201
  clause = soq->clauses[i];
1182
- clause->destroy(clause);
1202
+ q_deref(clause);
1183
1203
  }
1184
1204
  free(soq->clauses);
1185
1205
  }
@@ -1187,13 +1207,44 @@ void spanoq_destroy(void *p)
1187
1207
 
1188
1208
  free(soq);
1189
1209
  free(sq);
1190
- q_destroy(self);
1210
+ q_destroy_i(self);
1211
+ }
1212
+
1213
+ static uint spanoq_hash(Query *self)
1214
+ {
1215
+ int i;
1216
+ uint hash = 0;
1217
+ Query *q;
1218
+ SpanOrQuery *soq = (SpanOrQuery *)((SpanQuery *)self->data)->data;
1219
+
1220
+ for (i = 0; i < soq->c_cnt; i++) {
1221
+ q = soq->clauses[i];
1222
+ hash ^= q->hash(q);
1223
+ }
1224
+ return hash;
1225
+ }
1226
+
1227
+ static int spanoq_eq(Query *self, Query *o)
1228
+ {
1229
+ int i;
1230
+ Query *q1, *q2;
1231
+ SpanOrQuery *soq1 = (SpanOrQuery *)((SpanQuery *)self->data)->data;
1232
+ SpanOrQuery *soq2 = (SpanOrQuery *)((SpanQuery *)o->data)->data;
1233
+ if (soq1->c_cnt != soq2->c_cnt) return false;
1234
+ for (i = 0; i < soq1->c_cnt; i++) {
1235
+ q1 = soq1->clauses[i];
1236
+ q2 = soq2->clauses[i];
1237
+ if (!q1->eq(q1, q2)) return false;
1238
+ }
1239
+ return true;
1191
1240
  }
1192
1241
 
1193
1242
  Query *spanoq_create(Query **clauses, int c_cnt)
1194
1243
  {
1195
1244
  Query *self = q_create();
1245
+
1196
1246
  SpanQuery *sq = ALLOC(SpanQuery);
1247
+
1197
1248
  SpanOrQuery *soq = ALLOC(SpanOrQuery);
1198
1249
  soq->clauses = clauses;
1199
1250
  soq->c_cnt = c_cnt;
@@ -1202,14 +1253,16 @@ Query *spanoq_create(Query **clauses, int c_cnt)
1202
1253
  sq->get_spans = &spanoq_get_spans;
1203
1254
  sq->get_terms = &spanoq_get_terms;
1204
1255
  sq->field = ((SpanQuery *)clauses[0]->data)->field;
1256
+ self->data = sq;
1205
1257
 
1206
1258
  self->type = SPAN_OR_QUERY;
1207
- self->data = sq;
1208
- self->create_weight = &spanw_create;
1209
- self->extract_terms = &spanoq_extract_terms;
1210
1259
  self->rewrite = &spanoq_rewrite;
1260
+ self->extract_terms = &spanoq_extract_terms;
1211
1261
  self->to_s = &spanoq_to_s;
1212
- self->destroy = &spanoq_destroy;
1262
+ self->hash = &spanoq_hash;
1263
+ self->eq = &spanoq_eq;
1264
+ self->destroy_i = &spanoq_destroy;
1265
+ self->create_weight_i = &spanw_create;
1213
1266
  return self;
1214
1267
  }
1215
1268
 
@@ -1240,7 +1293,7 @@ char *spannq_to_s(Query *self, char *field)
1240
1293
  return res;
1241
1294
  }
1242
1295
 
1243
- void spannq_extract_terms(Query *self, Array *terms)
1296
+ void spannq_extract_terms(Query *self, HashSet *terms)
1244
1297
  {
1245
1298
  SpanNearQuery *snq = (SpanNearQuery *)((SpanQuery *)self->data)->data;
1246
1299
  Query *clause;
@@ -1251,10 +1304,10 @@ void spannq_extract_terms(Query *self, Array *terms)
1251
1304
  }
1252
1305
  }
1253
1306
 
1254
- Array *spannq_get_terms(Query *self)
1307
+ HashSet *spannq_get_terms(Query *self)
1255
1308
  {
1256
1309
  SpanNearQuery *snq = (SpanNearQuery *)((SpanQuery *)self->data)->data;
1257
- Array *terms = ary_create(snq->c_cnt, NULL);
1310
+ HashSet *terms = term_set_create();
1258
1311
  Query *clause;
1259
1312
  int i;
1260
1313
  for (i = 0; i < snq->c_cnt; i++) {
@@ -1281,34 +1334,21 @@ SpanEnum *spannq_get_spans(Query *self, IndexReader *ir)
1281
1334
  Query *spannq_rewrite(Query *self, IndexReader *ir)
1282
1335
  {
1283
1336
  SpanNearQuery *snq = (SpanNearQuery *)((SpanQuery *)self->data)->data;
1284
- Query *clone = NULL;
1285
-
1286
1337
  Query *clause, *rewritten;
1287
- Query **new_clauses = ALLOC_N(Query *, snq->c_cnt);
1288
1338
  int i;
1289
1339
  for (i = 0; i < snq->c_cnt; i++) {
1290
1340
  clause = snq->clauses[i];
1291
1341
  rewritten = clause->rewrite(clause, ir);
1292
- if ((clause != rewritten) && (clone == NULL)) {
1293
- clone = spannq_create(new_clauses, snq->c_cnt, snq->slop, snq->in_order);
1294
- /* The sub-clauses will be handled by the original query */
1295
- clone->destroy_all = false;
1296
- }
1297
- new_clauses[i] = rewritten;
1342
+ if ((rewritten == clause) || self->destroy_all) q_deref(clause);
1343
+ snq->clauses[i] = rewritten;
1298
1344
  }
1299
1345
 
1300
- if (clone != NULL) {
1301
- if (self->rewritten) self->rewritten->destroy(self->rewritten);
1302
- return self->rewritten = clone; /* some clauses rewrote */
1303
- } else {
1304
- free(new_clauses); /* no clauses rewrote */
1305
- return self;
1306
- }
1346
+ self->ref_cnt++;
1347
+ return self;
1307
1348
  }
1308
1349
 
1309
- void spannq_destroy(void *p)
1350
+ void spannq_destroy(Query *self)
1310
1351
  {
1311
- Query *self = (Query *)p;
1312
1352
  SpanQuery *sq = (SpanQuery *)self->data;
1313
1353
  SpanNearQuery *snq = (SpanNearQuery *)sq->data;
1314
1354
 
@@ -1317,21 +1357,57 @@ void spannq_destroy(void *p)
1317
1357
  int i;
1318
1358
  for (i = 0; i < snq->c_cnt; i++) {
1319
1359
  clause = snq->clauses[i];
1320
- clause->destroy(clause);
1360
+ q_deref(clause);
1321
1361
  }
1322
1362
  free(snq->clauses);
1323
1363
  }
1324
1364
 
1325
-
1326
1365
  free(snq);
1327
1366
  free(sq);
1328
- q_destroy(self);
1367
+ q_destroy_i(self);
1368
+ }
1369
+
1370
+ static uint spannq_hash(Query *self)
1371
+ {
1372
+ int i;
1373
+ uint hash = 0;
1374
+ Query *q;
1375
+ SpanNearQuery *snq = (SpanNearQuery *)((SpanQuery *)self->data)->data;
1376
+
1377
+ for (i = 0; i < snq->c_cnt; i++) {
1378
+ q = snq->clauses[i];
1379
+ hash ^= q->hash(q);
1380
+ }
1381
+ return ((hash ^ snq->slop) << 1) | snq->in_order;
1382
+ }
1383
+
1384
+ static int spannq_eq(Query *self, Query *o)
1385
+ {
1386
+ int i;
1387
+ Query *q1, *q2;
1388
+ SpanNearQuery *snq1 = (SpanNearQuery *)((SpanQuery *)self->data)->data;
1389
+ SpanNearQuery *snq2 = (SpanNearQuery *)((SpanQuery *)o->data)->data;
1390
+ if (snq1->c_cnt != snq2->c_cnt ||
1391
+ snq1->slop != snq2->slop ||
1392
+ snq1->in_order != snq2->in_order) {
1393
+ return false;
1394
+ }
1395
+
1396
+ for (i = 0; i < snq1->c_cnt; i++) {
1397
+ q1 = snq1->clauses[i];
1398
+ q2 = snq2->clauses[i];
1399
+ if (!q1->eq(q1, q2)) return false;
1400
+ }
1401
+
1402
+ return true;
1329
1403
  }
1330
1404
 
1331
1405
  Query *spannq_create(Query **clauses, int c_cnt, int slop, bool in_order)
1332
1406
  {
1333
1407
  Query *self = q_create();
1408
+
1334
1409
  SpanQuery *sq = ALLOC(SpanQuery);
1410
+
1335
1411
  SpanNearQuery *snq = ALLOC(SpanNearQuery);
1336
1412
  snq->clauses = clauses;
1337
1413
  snq->c_cnt = c_cnt;
@@ -1341,16 +1417,17 @@ Query *spannq_create(Query **clauses, int c_cnt, int slop, bool in_order)
1341
1417
 
1342
1418
  sq->get_spans = &spannq_get_spans;
1343
1419
  sq->get_terms = &spannq_get_terms;
1344
-
1345
1420
  sq->field = ((SpanQuery *)clauses[0]->data)->field;
1421
+ self->data = sq;
1346
1422
 
1347
1423
  self->type = SPAN_NEAR_QUERY;
1348
- self->data = sq;
1349
- self->create_weight = &spanw_create;
1350
- self->extract_terms = &spannq_extract_terms;
1351
1424
  self->rewrite = &spannq_rewrite;
1425
+ self->extract_terms = &spannq_extract_terms;
1352
1426
  self->to_s = &spannq_to_s;
1353
- self->destroy = &spannq_destroy;
1427
+ self->hash = &spannq_hash;
1428
+ self->eq = &spannq_eq;
1429
+ self->destroy_i = &spannq_destroy;
1430
+ self->create_weight_i = &spanw_create;
1354
1431
  return self;
1355
1432
  }
1356
1433
 
@@ -1372,16 +1449,16 @@ char *spanxq_to_s(Query *self, char *field)
1372
1449
  return res;
1373
1450
  }
1374
1451
 
1375
- void spanxq_extract_terms(Query *self, Array *terms)
1452
+ void spanxq_extract_terms(Query *self, HashSet *terms)
1376
1453
  {
1377
1454
  SpanNotQuery *sxq = (SpanNotQuery *)((SpanQuery *)self->data)->data;
1378
1455
  sxq->inc->extract_terms(sxq->inc, terms);
1379
1456
  }
1380
1457
 
1381
- Array *spanxq_get_terms(Query *self)
1458
+ HashSet *spanxq_get_terms(Query *self)
1382
1459
  {
1383
1460
  SpanNotQuery *sxq = (SpanNotQuery *)((SpanQuery *)self->data)->data;
1384
- Array *terms = ary_create(1, NULL);
1461
+ HashSet *terms = term_set_create();
1385
1462
  sxq->inc->extract_terms(sxq->inc, terms);
1386
1463
  return terms;
1387
1464
  }
@@ -1389,48 +1466,60 @@ Array *spanxq_get_terms(Query *self)
1389
1466
  Query *spanxq_rewrite(Query *self, IndexReader *ir)
1390
1467
  {
1391
1468
  SpanNotQuery *sxq = (SpanNotQuery *)((SpanQuery *)self->data)->data;
1392
- Query *clone = NULL;
1393
- Query *inc, *exc;
1394
- Query *inc_rewritten, *exc_rewritten;
1395
-
1396
- inc = sxq->inc;
1397
- inc_rewritten = inc->rewrite(inc, ir);
1398
- exc = sxq->exc;
1399
- exc_rewritten = exc->rewrite(exc, ir);
1400
- if ((inc_rewritten != inc) || (exc_rewritten != exc)) {
1401
- clone = spanxq_create(inc_rewritten, exc_rewritten);
1402
- /* The sub-clauses will be handled by the original query */
1403
- clone->destroy_all = false;
1404
- }
1469
+ Query *q, *rq;
1405
1470
 
1406
- if (clone != NULL) {
1407
- if (self->rewritten) self->rewritten->destroy(self->rewritten);
1408
- return self->rewritten = clone; /* some clauses rewrote */
1409
- } else {
1410
- return self; /* no clauses rewrote */
1411
- }
1471
+ /* rewrite inclusive query */
1472
+ q = sxq->inc;
1473
+ rq = q->rewrite(q, ir);
1474
+ if (rq == q || self->destroy_all) q_deref(q);
1475
+ sxq->inc = rq;
1476
+
1477
+ /* rewrite exclusive query */
1478
+ q = sxq->exc;
1479
+ rq = q->rewrite(q, ir);
1480
+ if (rq == q || self->destroy_all) q_deref(q);
1481
+ sxq->exc = rq;
1482
+
1483
+ self->ref_cnt++;
1484
+ return self;
1412
1485
  }
1413
1486
 
1414
- void spanxq_destroy(void *p)
1487
+ void spanxq_destroy(Query *self)
1415
1488
  {
1416
- Query *self = (Query *)p;
1417
1489
  SpanQuery *sq = (SpanQuery *)self->data;
1418
1490
  SpanNotQuery *sxq = (SpanNotQuery *)sq->data;
1419
1491
 
1420
1492
  if (self->destroy_all) {
1421
- sxq->inc->destroy(sxq->inc);
1422
- sxq->exc->destroy(sxq->exc);
1493
+ q_deref(sxq->inc);
1494
+ q_deref(sxq->exc);
1423
1495
  }
1424
1496
 
1425
1497
  free(sxq);
1426
1498
  free(sq);
1427
- q_destroy(self);
1499
+ q_destroy_i(self);
1428
1500
  }
1429
1501
 
1502
+ static uint spanxq_hash(Query *self)
1503
+ {
1504
+ SpanNotQuery *sxq = (SpanNotQuery *)((SpanQuery *)self->data)->data;
1505
+ return sxq->inc->hash(sxq->inc) ^ sxq->exc->hash(sxq->exc);
1506
+ }
1507
+
1508
+ static int spanxq_eq(Query *self, Query *o)
1509
+ {
1510
+ SpanNotQuery *sxq1 = (SpanNotQuery *)((SpanQuery *)self->data)->data;
1511
+ SpanNotQuery *sxq2 = (SpanNotQuery *)((SpanQuery *)o->data)->data;
1512
+ return sxq1->inc->eq(sxq1->inc, sxq2->inc) &&
1513
+ sxq1->exc->eq(sxq1->exc, sxq2->exc);
1514
+ }
1515
+
1516
+
1430
1517
  Query *spanxq_create(Query *inc, Query *exc)
1431
1518
  {
1432
1519
  Query *self = q_create();
1520
+
1433
1521
  SpanQuery *sq = ALLOC(SpanQuery);
1522
+
1434
1523
  SpanNotQuery *sxq = ALLOC(SpanNotQuery);
1435
1524
  sxq->inc = inc;
1436
1525
  sxq->exc = exc;
@@ -1439,14 +1528,17 @@ Query *spanxq_create(Query *inc, Query *exc)
1439
1528
  sq->get_spans = &spanxe_create;
1440
1529
  sq->get_terms = &spanxq_get_terms;
1441
1530
  sq->field = ((SpanQuery *)inc->data)->field;
1531
+ self->data = sq;
1442
1532
 
1443
1533
  self->type = SPAN_NOT_QUERY;
1444
- self->data = sq;
1445
- self->create_weight = &spanw_create;
1446
- self->extract_terms = &spanxq_extract_terms;
1447
1534
  self->rewrite = &spanxq_rewrite;
1535
+ self->extract_terms = &spanxq_extract_terms;
1448
1536
  self->to_s = &spanxq_to_s;
1449
- self->destroy = &spanxq_destroy;
1537
+ self->hash = &spanxq_hash;
1538
+ self->eq = &spanxq_eq;
1539
+ self->destroy_i = &spanxq_destroy;
1540
+ self->create_weight_i = &spanw_create;
1541
+
1450
1542
  return self;
1451
1543
  }
1452
1544
 
@@ -1512,24 +1604,23 @@ bool spansc_skip_to(Scorer *self, int target)
1512
1604
 
1513
1605
  Explanation *spansc_explain(Scorer *self, int target)
1514
1606
  {
1607
+ Explanation *tf_explanation;
1515
1608
  SpanScorer *spansc = (SpanScorer *)self->data;
1516
1609
  float phrase_freq;
1517
1610
  self->skip_to(self, target);
1518
- phrase_freq = (self->doc == target) ? spansc->freq : 0.0;
1611
+ phrase_freq = (self->doc == target) ? spansc->freq : (float)0.0;
1519
1612
 
1520
- Explanation *tf_explanation = expl_create(sim_tf(self->similarity, phrase_freq),
1613
+ tf_explanation = expl_create(sim_tf(self->similarity, phrase_freq),
1521
1614
  strfmt("tf(phrase_freq(%f)", phrase_freq));
1522
1615
 
1523
1616
  return tf_explanation;
1524
1617
  }
1525
1618
 
1526
- void spansc_destroy(void *p)
1619
+ void spansc_destroy(Scorer *self)
1527
1620
  {
1528
- Scorer *self = (Scorer *)p;
1529
1621
  SpanScorer *spansc = (SpanScorer *)self->data;
1530
1622
  if (spansc->spans) spansc->spans->destroy(spansc->spans);
1531
- //free(spansc->norms);
1532
- scorer_destroy(p);
1623
+ scorer_destroy_i(self);
1533
1624
  }
1534
1625
 
1535
1626
  Scorer *spansc_create(Weight *weight, IndexReader *ir)