ferret 0.9.1 → 0.9.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (105) hide show
  1. data/README +6 -5
  2. data/Rakefile +34 -13
  3. data/TODO +1 -0
  4. data/TUTORIAL +1 -1
  5. data/ext/analysis.c +87 -70
  6. data/ext/analysis.h +18 -6
  7. data/ext/array.c +1 -2
  8. data/ext/array.h +1 -1
  9. data/ext/bitvector.c +10 -6
  10. data/ext/bitvector.h +2 -2
  11. data/ext/compound_io.c +30 -27
  12. data/ext/document.c +15 -15
  13. data/ext/document.h +5 -5
  14. data/ext/except.c +2 -0
  15. data/ext/except.h +25 -23
  16. data/ext/extconf.rb +1 -0
  17. data/ext/ferret.c +10 -8
  18. data/ext/ferret.h +9 -8
  19. data/ext/field.c +29 -25
  20. data/ext/filter.c +52 -14
  21. data/ext/frtio.h +13 -0
  22. data/ext/fs_store.c +115 -170
  23. data/ext/global.c +9 -8
  24. data/ext/global.h +17 -13
  25. data/ext/hash.c +13 -19
  26. data/ext/hash.h +11 -11
  27. data/ext/hashset.c +5 -7
  28. data/ext/hashset.h +9 -8
  29. data/ext/helper.c +1 -1
  30. data/ext/helper.h +2 -1
  31. data/ext/inc/except.h +25 -23
  32. data/ext/inc/lang.h +11 -1
  33. data/ext/ind.c +33 -21
  34. data/ext/index.h +44 -39
  35. data/ext/index_io.c +61 -57
  36. data/ext/index_rw.c +418 -361
  37. data/ext/lang.c +10 -0
  38. data/ext/lang.h +11 -1
  39. data/ext/nix_io.c +135 -0
  40. data/ext/priorityqueue.c +16 -16
  41. data/ext/priorityqueue.h +9 -6
  42. data/ext/q_boolean.c +128 -76
  43. data/ext/q_const_score.c +20 -20
  44. data/ext/q_filtered_query.c +20 -20
  45. data/ext/q_fuzzy.c +37 -23
  46. data/ext/q_match_all.c +15 -19
  47. data/ext/q_multi_phrase.c +87 -46
  48. data/ext/q_parser.c +247 -119
  49. data/ext/q_phrase.c +86 -52
  50. data/ext/q_prefix.c +25 -14
  51. data/ext/q_range.c +59 -14
  52. data/ext/q_span.c +263 -172
  53. data/ext/q_term.c +62 -51
  54. data/ext/q_wildcard.c +24 -13
  55. data/ext/r_analysis.c +328 -80
  56. data/ext/r_doc.c +11 -6
  57. data/ext/r_index_io.c +40 -32
  58. data/ext/r_qparser.c +15 -14
  59. data/ext/r_search.c +270 -152
  60. data/ext/r_store.c +32 -17
  61. data/ext/ram_store.c +38 -22
  62. data/ext/search.c +617 -87
  63. data/ext/search.h +227 -163
  64. data/ext/similarity.c +54 -45
  65. data/ext/similarity.h +3 -3
  66. data/ext/sort.c +132 -53
  67. data/ext/store.c +21 -2
  68. data/ext/store.h +14 -14
  69. data/ext/tags +4322 -232
  70. data/ext/term.c +140 -109
  71. data/ext/termdocs.c +74 -60
  72. data/ext/vector.c +181 -152
  73. data/ext/w32_io.c +150 -0
  74. data/lib/ferret.rb +1 -1
  75. data/lib/ferret/analysis/standard_tokenizer.rb +4 -3
  76. data/lib/ferret/document/field.rb +1 -1
  77. data/lib/ferret/index/field_infos.rb +1 -1
  78. data/lib/ferret/index/term.rb +1 -1
  79. data/lib/ferret/query_parser/query_parser.tab.rb +8 -24
  80. data/lib/ferret/search.rb +1 -0
  81. data/lib/ferret/search/boolean_query.rb +0 -4
  82. data/lib/ferret/search/index_searcher.rb +21 -8
  83. data/lib/ferret/search/multi_phrase_query.rb +7 -0
  84. data/lib/ferret/search/multi_searcher.rb +261 -0
  85. data/lib/ferret/search/phrase_query.rb +1 -1
  86. data/lib/ferret/search/query.rb +34 -5
  87. data/lib/ferret/search/sort.rb +7 -3
  88. data/lib/ferret/search/sort_field.rb +8 -4
  89. data/lib/ferret/store/fs_store.rb +13 -6
  90. data/lib/ferret/store/index_io.rb +0 -14
  91. data/lib/ferret/store/ram_store.rb +3 -2
  92. data/lib/rferret.rb +1 -1
  93. data/test/unit/analysis/ctc_analyzer.rb +131 -0
  94. data/test/unit/analysis/ctc_tokenstream.rb +98 -9
  95. data/test/unit/index/tc_index.rb +40 -1
  96. data/test/unit/index/tc_term.rb +7 -0
  97. data/test/unit/index/th_doc.rb +8 -0
  98. data/test/unit/query_parser/tc_query_parser.rb +6 -4
  99. data/test/unit/search/rtc_sort_field.rb +6 -6
  100. data/test/unit/search/tc_index_searcher.rb +8 -0
  101. data/test/unit/search/tc_multi_searcher.rb +275 -0
  102. data/test/unit/search/tc_multi_searcher2.rb +126 -0
  103. data/test/unit/search/tc_search_and_sort.rb +66 -0
  104. metadata +31 -26
  105. data/test/unit/query_parser/rtc_query_parser.rb +0 -138
data/ext/q_span.c CHANGED
@@ -1,7 +1,6 @@
1
1
  #include <string.h>
2
2
  #include "search.h"
3
3
 
4
-
5
4
  /*****************************************************************************
6
5
  *
7
6
  * NearSpanEnum
@@ -16,11 +15,24 @@
16
15
 
17
16
  Explanation *spanw_explain(Weight *self, IndexReader *ir, int target)
18
17
  {
18
+ Explanation *expl;
19
+ Explanation *idf_expl1;
20
+ Explanation *idf_expl2;
21
+ Explanation *query_expl;
22
+ Explanation *qnorm_expl;
23
+ Explanation *field_expl;
24
+ Explanation *tf_expl;
25
+ Scorer *scorer;
26
+ uchar *field_norms;
27
+ float field_norm;
28
+ Explanation *field_norm_expl;
29
+
19
30
  char *query_str = self->query->to_s(self->query, "");
20
- Array *terms = (Array *)self->data;
31
+ HashSet *terms = (HashSet *)self->data;
21
32
  char *field = ((SpanQuery *)self->query->data)->field;
22
33
  char *doc_freqs = NULL;
23
- int df_i = 0, i;
34
+ size_t df_i = 0;
35
+ int i;
24
36
  Term *t;
25
37
 
26
38
 
@@ -38,19 +50,21 @@ Explanation *spanw_explain(Weight *self, IndexReader *ir, int target)
38
50
  doc_freqs = "";
39
51
  }
40
52
 
41
- Explanation *expl = expl_create(0.0,
53
+ expl = expl_create(0.0,
42
54
  strfmt("weight(%s in %d), product of:", query_str, target));
43
55
 
44
56
  /* We need two of these as it's included in both the query explanation
45
57
  * and the field explanation */
46
- Explanation *idf_expl1 = expl_create(self->idf,
58
+ idf_expl1 = expl_create(self->idf,
47
59
  strfmt("idf(%s: %s)", field, doc_freqs));
48
- Explanation *idf_expl2 = expl_create(self->idf,
60
+ idf_expl2 = expl_create(self->idf,
49
61
  strfmt("idf(%s: %s)", field, doc_freqs));
50
- if (terms->size > 0) free(doc_freqs); /* only free if allocated */
62
+ if (terms->size > 0) {
63
+ free(doc_freqs); /* only free if allocated */
64
+ }
51
65
 
52
66
  /* explain query weight */
53
- Explanation *query_expl = expl_create(0.0,
67
+ query_expl = expl_create(0.0,
54
68
  strfmt("query_weight(%s), product of:", query_str));
55
69
 
56
70
  if (self->query->boost != 1.0) {
@@ -59,7 +73,7 @@ Explanation *spanw_explain(Weight *self, IndexReader *ir, int target)
59
73
 
60
74
  expl_add_detail(query_expl, idf_expl1);
61
75
 
62
- Explanation *qnorm_expl = expl_create(self->qnorm, estrdup("query_norm"));
76
+ qnorm_expl = expl_create(self->qnorm, estrdup("query_norm"));
63
77
  expl_add_detail(query_expl, qnorm_expl);
64
78
 
65
79
  query_expl->value = self->query->boost * idf_expl1->value * qnorm_expl->value;
@@ -67,19 +81,21 @@ Explanation *spanw_explain(Weight *self, IndexReader *ir, int target)
67
81
  expl_add_detail(expl, query_expl);
68
82
 
69
83
  /* explain field weight */
70
- Explanation *field_expl = expl_create(0.0,
84
+ field_expl = expl_create(0.0,
71
85
  strfmt("field_weight(%s:%s in %d), product of:", field, query_str, target));
72
86
  free(query_str);
73
87
 
74
- Scorer *scorer = self->scorer(self, ir);
75
- Explanation *tf_expl = scorer->explain(scorer, target);
88
+ scorer = self->scorer(self, ir);
89
+ tf_expl = scorer->explain(scorer, target);
76
90
  scorer->destroy(scorer);
77
91
  expl_add_detail(field_expl, tf_expl);
78
92
  expl_add_detail(field_expl, idf_expl2);
79
93
 
80
- uchar *field_norms = ir->get_norms(ir, field);
81
- float field_norm = (field_norms ? sim_decode_norm(self->similarity, field_norms[target]) : 0.0);
82
- Explanation *field_norm_expl = expl_create(field_norm,
94
+ field_norms = ir->get_norms(ir, field);
95
+ field_norm = (field_norms
96
+ ? sim_decode_norm(self->similarity, field_norms[target])
97
+ : (float)0.0);
98
+ field_norm_expl = expl_create(field_norm,
83
99
  strfmt("field_norm(field=%s, doc=%d)", field, target));
84
100
  expl_add_detail(field_expl, field_norm_expl);
85
101
 
@@ -101,22 +117,19 @@ char *spanw_to_s(Weight *self)
101
117
  return strfmt("SpanWeight(%f)", self->value);
102
118
  }
103
119
 
104
- void spanw_destroy(void *p)
120
+ void spanw_destroy(Weight *self)
105
121
  {
106
- Weight *self = (Weight *)p;
107
- ary_destroy(self->data);
108
- free(p);
122
+ hs_destroy_all(self->data);
123
+ w_destroy(self);
109
124
  }
110
125
 
111
126
  Weight *spanw_create(Query *query, Searcher *searcher)
112
127
  {
113
- Weight *self = ALLOC(Weight);
128
+ Weight *self = w_create(query);
114
129
  SpanQuery *spanq = (SpanQuery *)query->data;
115
- Array *terms = spanq->get_terms(query);
116
- ZEROSET(self, Weight, 1);
117
- self->get_query = &w_get_query;
118
- self->get_value = &w_get_value;
119
- self->normalize = &w_normalize;
130
+ HashSet *terms = spanq->get_terms(query);
131
+
132
+ self->data = terms;
120
133
  self->scorer = &spansc_create;
121
134
  self->explain = &spanw_explain;
122
135
  self->to_s = &spanw_to_s;
@@ -125,10 +138,8 @@ Weight *spanw_create(Query *query, Searcher *searcher)
125
138
 
126
139
  self->similarity = query->get_similarity(query, searcher);
127
140
 
128
- self->idf = sim_idf_phrase(self->similarity, (Term **)terms->elems, terms->size, searcher);
129
- self->query = query;
130
- self->value = 0.0;
131
- self->data = terms;
141
+ self->idf = sim_idf_phrase(self->similarity, (Term **)terms->elems,
142
+ terms->size, searcher);
132
143
 
133
144
  return self;
134
145
  }
@@ -204,7 +215,8 @@ char *spante_to_s(SpanEnum *self)
204
215
  char *field = ((SpanQuery *)self->query->data)->field;
205
216
  char *query_str = self->query->to_s(self->query, field);
206
217
  char pos_str[20];
207
- int len = strlen(query_str), pos;
218
+ size_t len = strlen(query_str);
219
+ int pos;
208
220
  char *str = ALLOC_N(char, len + 40);
209
221
 
210
222
  if (self->doc(self) < 0) {
@@ -222,9 +234,8 @@ char *spante_to_s(SpanEnum *self)
222
234
  return str;
223
235
  }
224
236
 
225
- void spante_destroy(void *p)
237
+ void spante_destroy(SpanEnum *self)
226
238
  {
227
- SpanEnum *self = (SpanEnum *)p;
228
239
  SpanTermEnum *ste = (SpanTermEnum *)self->data;
229
240
  TermDocEnum *tde = ste->positions;
230
241
  tde->close(tde);
@@ -315,9 +326,8 @@ char *spanfe_to_s(SpanEnum *self)
315
326
  return res;
316
327
  }
317
328
 
318
- void spanfe_destroy(void *p)
329
+ void spanfe_destroy(SpanEnum *self)
319
330
  {
320
- SpanEnum *self = (SpanEnum *)p;
321
331
  SpanEnum *se = (SpanEnum *)self->data;
322
332
  se->destroy(se);
323
333
  free(self);
@@ -449,7 +459,7 @@ char *spanoe_to_s(SpanEnum *self)
449
459
  char *field = ((SpanQuery *)self->query->data)->field;
450
460
  char *query_str = self->query->to_s(self->query, field);
451
461
  char doc_str[62];
452
- int len = strlen(query_str);
462
+ size_t len = strlen(query_str);
453
463
  char *str = ALLOC_N(char, len + 80);
454
464
 
455
465
  if (soe->first_time) {
@@ -467,9 +477,9 @@ char *spanoe_to_s(SpanEnum *self)
467
477
  return str;
468
478
  }
469
479
 
470
- void spanoe_destroy(void *p)
480
+ void spanoe_destroy(SpanEnum *self)
471
481
  {
472
- SpanEnum *self = (SpanEnum *)p, *se;
482
+ SpanEnum *se;
473
483
  SpanOrEnum *soe = (SpanOrEnum *)self->data;
474
484
  int i;
475
485
  pq_destroy(soe->queue);
@@ -730,7 +740,7 @@ char *spanne_to_s(SpanEnum *self)
730
740
  char *field = ((SpanQuery *)self->query->data)->field;
731
741
  char *query_str = self->query->to_s(self->query, field);
732
742
  char doc_str[62];
733
- int len = strlen(query_str);
743
+ size_t len = strlen(query_str);
734
744
  char *str = ALLOC_N(char, len + 80);
735
745
 
736
746
  if (sne->first_time) {
@@ -744,9 +754,9 @@ char *spanne_to_s(SpanEnum *self)
744
754
  return str;
745
755
  }
746
756
 
747
- void spanne_destroy(void *p)
757
+ void spanne_destroy(SpanEnum *self)
748
758
  {
749
- SpanEnum *self = (SpanEnum *)p, *se;
759
+ SpanEnum *se;
750
760
  SpanNearEnum *sne = (SpanNearEnum *)self->data;
751
761
  int i;
752
762
  for (i = 0; i < sne->s_cnt; i++) {
@@ -886,9 +896,8 @@ char *spanxe_to_s(SpanEnum *self)
886
896
  return res;
887
897
  }
888
898
 
889
- void spanxe_destroy(void *p)
899
+ void spanxe_destroy(SpanEnum *self)
890
900
  {
891
- SpanEnum *self = (SpanEnum *)p;
892
901
  SpanNotEnum *sxe = (SpanNotEnum *)self->data;
893
902
  sxe->inc->destroy(sxe->inc);
894
903
  sxe->exc->destroy(sxe->exc);
@@ -924,12 +933,11 @@ SpanEnum *spanxe_create(Query *query, IndexReader *ir)
924
933
  *
925
934
  *****************************************************************************/
926
935
 
927
- void spanq_destroy(void *p)
936
+ void spanq_destroy(Query *self)
928
937
  {
929
- Query *self = (Query *)p;
930
938
  SpanQuery *sq = (SpanQuery *)self->data;
931
939
  free(sq);
932
- q_destroy(self);
940
+ q_destroy_i(self);
933
941
  }
934
942
 
935
943
  /*****************************************************************************
@@ -952,48 +960,60 @@ char *spantq_to_s(Query *self, char *field)
952
960
  return res;
953
961
  }
954
962
 
955
- void spantq_destroy(void *p)
963
+ static void spantq_destroy(Query *self)
956
964
  {
957
- Query *self = (Query *)p;
958
965
  SpanQuery *sq = (SpanQuery *)self->data;
959
966
  if (self->destroy_all) {
960
967
  Term *term = (Term *)sq->data;
961
968
  term_destroy(term);
962
969
  }
963
970
  free(sq);
964
- q_destroy(self);
971
+ q_destroy_i(self);
965
972
  }
966
973
 
967
- void spantq_extract_terms(Query *self, Array *terms)
974
+ static void spantq_extract_terms(Query *self, HashSet *terms)
968
975
  {
969
976
  Term *term = (Term *)((SpanQuery *)self->data)->data;
970
- ary_append(terms, term);
977
+ hs_add(terms, term_clone(term));
971
978
  }
972
979
 
973
- Array *spantq_get_terms(Query *self)
980
+ static HashSet *spantq_get_terms(Query *self)
974
981
  {
975
982
  Term *term = (Term *)((SpanQuery *)self->data)->data;
976
- Array *terms = ary_create(1, &term_destroy);
977
- ary_append(terms, term_clone(term));
983
+ HashSet *terms = term_set_create();
984
+ hs_add(terms, term_clone(term));
978
985
  return terms;
979
986
  }
980
987
 
988
+ static uint spantq_hash(Query *self)
989
+ {
990
+ return term_hash((Term *)((SpanQuery *)self->data)->data);
991
+ }
992
+
993
+ static int spantq_eq(Query *self, Query *o)
994
+ {
995
+ return term_eq((Term *)((SpanQuery *)self->data)->data,
996
+ (Term *)((SpanQuery *)o->data)->data);
997
+ }
998
+
981
999
  Query *spantq_create(Term *term)
982
1000
  {
983
1001
  Query *self = q_create();
1002
+
984
1003
  SpanQuery *sq = ALLOC(SpanQuery);
985
1004
  sq->data = term;
986
-
987
1005
  sq->get_spans = &spante_create;
988
1006
  sq->get_terms = &spantq_get_terms;
989
1007
  sq->field = term->field;
1008
+ self->data = sq;
990
1009
 
991
1010
  self->type = SPAN_TERM_QUERY;
992
- self->data = sq;
993
- self->create_weight = &spanw_create;
994
1011
  self->extract_terms = &spantq_extract_terms;
995
1012
  self->to_s = &spantq_to_s;
996
- self->destroy = &spantq_destroy;
1013
+ self->hash = &spantq_hash;
1014
+ self->eq = &spantq_eq;
1015
+ self->destroy_i = &spantq_destroy;
1016
+ self->create_weight_i = &spanw_create;
997
1017
  return self;
998
1018
  }
999
1019
 
@@ -1013,13 +1033,13 @@ char *spanfq_to_s(Query *self, char *field)
1013
1033
  return res;
1014
1034
  }
1015
1035
 
1016
- void spanfq_extract_terms(Query *self, Array *terms)
1036
+ void spanfq_extract_terms(Query *self, HashSet *terms)
1017
1037
  {
1018
1038
  SpanFirstQuery *sfq = (SpanFirstQuery *)((SpanQuery *)self->data)->data;
1019
1039
  sfq->match->extract_terms(sfq->match, terms);
1020
1040
  }
1021
1041
 
1022
- Array *spanfq_get_terms(Query *self)
1042
+ HashSet *spanfq_get_terms(Query *self)
1023
1043
  {
1024
1044
  SpanFirstQuery *sfq = (SpanFirstQuery *)((SpanQuery *)self->data)->data;
1025
1045
  SpanQuery *match_sq = (SpanQuery *)sfq->match->data;
@@ -1029,36 +1049,46 @@ Array *spanfq_get_terms(Query *self)
1029
1049
  Query *spanfq_rewrite(Query *self, IndexReader *ir)
1030
1050
  {
1031
1051
  SpanFirstQuery *sfq = (SpanFirstQuery *)((SpanQuery *)self->data)->data;
1032
- Query *clone = NULL;
1033
- Query *rewritten = sfq->match->rewrite(sfq->match, ir);
1034
- if (rewritten != sfq->match) {
1035
- clone = spanfq_create(rewritten, sfq->end);
1036
- sfq->match->rewritten = NULL; /* it will get destroyed with the clone */
1037
- }
1052
+ Query *q, *rq;
1038
1053
 
1039
- if (clone != NULL) {
1040
- if (self->rewritten) self->rewritten->destroy(self->rewritten);
1041
- return self->rewritten = clone; /* some clauses rewrote */
1042
- } else {
1043
- return self; /* no clauses rewrote */
1044
- }
1054
+ q = sfq->match;
1055
+ rq = q->rewrite(q, ir);
1056
+ if (rq == q || self->destroy_all) q_deref(q);
1057
+ sfq->match = rq;
1058
+
1059
+ self->ref_cnt++;
1060
+ return self; /* no clauses rewrote */
1045
1061
  }
1046
1062
 
1047
- void spanfq_destroy(void *p)
1063
+ void spanfq_destroy(Query *self)
1048
1064
  {
1049
- Query *self = (Query *)p;
1050
1065
  SpanQuery *sq = (SpanQuery *)self->data;
1051
1066
  SpanFirstQuery *sfq = (SpanFirstQuery *)sq->data;
1052
- if (self->destroy_all) sfq->match->destroy(sfq->match);
1067
+ if (self->destroy_all) q_deref(sfq->match);
1053
1068
  free(sfq);
1054
1069
  free(sq);
1055
- q_destroy(self);
1070
+ q_destroy_i(self);
1071
+ }
1072
+
1073
+ static uint spanfq_hash(Query *self)
1074
+ {
1075
+ SpanFirstQuery *sfq = (SpanFirstQuery *)((SpanQuery *)self->data)->data;
1076
+ return sfq->match->hash(sfq->match) ^ sfq->end;
1077
+ }
1078
+
1079
+ static int spanfq_eq(Query *self, Query *o)
1080
+ {
1081
+ SpanFirstQuery *sfq1 = (SpanFirstQuery *)((SpanQuery *)self->data)->data;
1082
+ SpanFirstQuery *sfq2 = (SpanFirstQuery *)((SpanQuery *)o->data)->data;
1083
+ return sfq1->match->eq(sfq1->match, sfq2->match) && (sfq1->end == sfq2->end);
1056
1084
  }
1057
1085
 
1058
1086
  Query *spanfq_create(Query *match, int end)
1059
1087
  {
1060
1088
  Query *self = q_create();
1089
+
1061
1090
  SpanQuery *sq = ALLOC(SpanQuery);
1091
+
1062
1092
  SpanFirstQuery *sfq = ALLOC(SpanFirstQuery);
1063
1093
  sfq->match = match;
1064
1094
  sfq->end = end;
@@ -1067,14 +1097,16 @@ Query *spanfq_create(Query *match, int end)
1067
1097
  sq->get_spans = &spanfe_create;
1068
1098
  sq->get_terms = &spanfq_get_terms;
1069
1099
  sq->field = ((SpanQuery *)match->data)->field;
1100
+ self->data = sq;
1070
1101
 
1071
1102
  self->type = SPAN_FIRST_QUERY;
1072
- self->data = sq;
1073
- self->create_weight = &spanw_create;
1074
- self->extract_terms = &spanfq_extract_terms;
1075
1103
  self->rewrite = &spanfq_rewrite;
1104
+ self->extract_terms = &spanfq_extract_terms;
1076
1105
  self->to_s = &spanfq_to_s;
1077
- self->destroy = &spanfq_destroy;
1106
+ self->hash = &spanfq_hash;
1107
+ self->eq = &spanfq_eq;
1108
+ self->destroy_i = &spanfq_destroy;
1109
+ self->create_weight_i = &spanw_create;
1078
1110
  return self;
1079
1111
  }
1080
1112
 
@@ -1103,7 +1135,7 @@ char *spanoq_to_s(Query *self, char *field)
1103
1135
  return res;
1104
1136
  }
1105
1137
 
1106
- void spanoq_extract_terms(Query *self, Array *terms)
1138
+ void spanoq_extract_terms(Query *self, HashSet *terms)
1107
1139
  {
1108
1140
  SpanOrQuery *soq = (SpanOrQuery *)((SpanQuery *)self->data)->data;
1109
1141
  Query *clause;
@@ -1114,10 +1146,10 @@ void spanoq_extract_terms(Query *self, Array *terms)
1114
1146
  }
1115
1147
  }
1116
1148
 
1117
- Array *spanoq_get_terms(Query *self)
1149
+ HashSet *spanoq_get_terms(Query *self)
1118
1150
  {
1119
1151
  SpanOrQuery *soq = (SpanOrQuery *)((SpanQuery *)self->data)->data;
1120
- Array *terms = ary_create(soq->c_cnt, NULL);
1152
+ HashSet *terms = term_set_create();
1121
1153
  Query *clause;
1122
1154
  int i;
1123
1155
  for (i = 0; i < soq->c_cnt; i++) {
@@ -1143,34 +1175,22 @@ SpanEnum *spanoq_get_spans(Query *self, IndexReader *ir)
1143
1175
  Query *spanoq_rewrite(Query *self, IndexReader *ir)
1144
1176
  {
1145
1177
  SpanOrQuery *soq = (SpanOrQuery *)((SpanQuery *)self->data)->data;
1146
- Query *clone = NULL;
1147
-
1148
1178
  Query *clause, *rewritten;
1149
- Query **new_clauses = ALLOC_N(Query *, soq->c_cnt);
1150
1179
  int i;
1180
+ /* replace clauses with their rewritten queries */
1151
1181
  for (i = 0; i < soq->c_cnt; i++) {
1152
1182
  clause = soq->clauses[i];
1153
1183
  rewritten = clause->rewrite(clause, ir);
1154
- if ((clause != rewritten) && (clone == NULL)) {
1155
- clone = spanoq_create(new_clauses, soq->c_cnt);
1156
- /* The sub-clauses will be handled by the original query */
1157
- clone->destroy_all = false;
1158
- }
1159
- new_clauses[i] = rewritten;
1184
+ if ((rewritten == clause) || self->destroy_all) q_deref(clause);
1185
+ soq->clauses[i] = rewritten;
1160
1186
  }
1161
1187
 
1162
- if (clone != NULL) {
1163
- if (self->rewritten) self->rewritten->destroy(self->rewritten);
1164
- return self->rewritten = clone; /* some clauses rewrote */
1165
- } else {
1166
- free(new_clauses); /* no clauses rewrote */
1167
- return self;
1168
- }
1188
+ self->ref_cnt++;
1189
+ return self;
1169
1190
  }
1170
1191
 
1171
- void spanoq_destroy(void *p)
1192
+ void spanoq_destroy(Query *self)
1172
1193
  {
1173
- Query *self = (Query *)p;
1174
1194
  SpanQuery *sq = (SpanQuery *)self->data;
1175
1195
  SpanOrQuery *soq = (SpanOrQuery *)sq->data;
1176
1196
 
@@ -1179,7 +1199,7 @@ void spanoq_destroy(void *p)
1179
1199
  int i;
1180
1200
  for (i = 0; i < soq->c_cnt; i++) {
1181
1201
  clause = soq->clauses[i];
1182
- clause->destroy(clause);
1202
+ q_deref(clause);
1183
1203
  }
1184
1204
  free(soq->clauses);
1185
1205
  }
@@ -1187,13 +1207,44 @@ void spanoq_destroy(void *p)
1187
1207
 
1188
1208
  free(soq);
1189
1209
  free(sq);
1190
- q_destroy(self);
1210
+ q_destroy_i(self);
1211
+ }
1212
+
1213
+ static uint spanoq_hash(Query *self)
1214
+ {
1215
+ int i;
1216
+ uint hash = 0;
1217
+ Query *q;
1218
+ SpanOrQuery *soq = (SpanOrQuery *)((SpanQuery *)self->data)->data;
1219
+
1220
+ for (i = 0; i < soq->c_cnt; i++) {
1221
+ q = soq->clauses[i];
1222
+ hash ^= q->hash(q);
1223
+ }
1224
+ return hash;
1225
+ }
1226
+
1227
+ static int spanoq_eq(Query *self, Query *o)
1228
+ {
1229
+ int i;
1230
+ Query *q1, *q2;
1231
+ SpanOrQuery *soq1 = (SpanOrQuery *)((SpanQuery *)self->data)->data;
1232
+ SpanOrQuery *soq2 = (SpanOrQuery *)((SpanQuery *)o->data)->data;
1233
+ if (soq1->c_cnt != soq2->c_cnt) return false;
1234
+ for (i = 0; i < soq1->c_cnt; i++) {
1235
+ q1 = soq1->clauses[i];
1236
+ q2 = soq2->clauses[i];
1237
+ if (!q1->eq(q1, q2)) return false;
1238
+ }
1239
+ return true;
1191
1240
  }
1192
1241
 
1193
1242
  Query *spanoq_create(Query **clauses, int c_cnt)
1194
1243
  {
1195
1244
  Query *self = q_create();
1245
+
1196
1246
  SpanQuery *sq = ALLOC(SpanQuery);
1247
+
1197
1248
  SpanOrQuery *soq = ALLOC(SpanOrQuery);
1198
1249
  soq->clauses = clauses;
1199
1250
  soq->c_cnt = c_cnt;
@@ -1202,14 +1253,16 @@ Query *spanoq_create(Query **clauses, int c_cnt)
1202
1253
  sq->get_spans = &spanoq_get_spans;
1203
1254
  sq->get_terms = &spanoq_get_terms;
1204
1255
  sq->field = ((SpanQuery *)clauses[0]->data)->field;
1256
+ self->data = sq;
1205
1257
 
1206
1258
  self->type = SPAN_OR_QUERY;
1207
- self->data = sq;
1208
- self->create_weight = &spanw_create;
1209
- self->extract_terms = &spanoq_extract_terms;
1210
1259
  self->rewrite = &spanoq_rewrite;
1260
+ self->extract_terms = &spanoq_extract_terms;
1211
1261
  self->to_s = &spanoq_to_s;
1212
- self->destroy = &spanoq_destroy;
1262
+ self->hash = &spanoq_hash;
1263
+ self->eq = &spanoq_eq;
1264
+ self->destroy_i = &spanoq_destroy;
1265
+ self->create_weight_i = &spanw_create;
1213
1266
  return self;
1214
1267
  }
1215
1268
 
@@ -1240,7 +1293,7 @@ char *spannq_to_s(Query *self, char *field)
1240
1293
  return res;
1241
1294
  }
1242
1295
 
1243
- void spannq_extract_terms(Query *self, Array *terms)
1296
+ void spannq_extract_terms(Query *self, HashSet *terms)
1244
1297
  {
1245
1298
  SpanNearQuery *snq = (SpanNearQuery *)((SpanQuery *)self->data)->data;
1246
1299
  Query *clause;
@@ -1251,10 +1304,10 @@ void spannq_extract_terms(Query *self, Array *terms)
1251
1304
  }
1252
1305
  }
1253
1306
 
1254
- Array *spannq_get_terms(Query *self)
1307
+ HashSet *spannq_get_terms(Query *self)
1255
1308
  {
1256
1309
  SpanNearQuery *snq = (SpanNearQuery *)((SpanQuery *)self->data)->data;
1257
- Array *terms = ary_create(snq->c_cnt, NULL);
1310
+ HashSet *terms = term_set_create();
1258
1311
  Query *clause;
1259
1312
  int i;
1260
1313
  for (i = 0; i < snq->c_cnt; i++) {
@@ -1281,34 +1334,21 @@ SpanEnum *spannq_get_spans(Query *self, IndexReader *ir)
1281
1334
  Query *spannq_rewrite(Query *self, IndexReader *ir)
1282
1335
  {
1283
1336
  SpanNearQuery *snq = (SpanNearQuery *)((SpanQuery *)self->data)->data;
1284
- Query *clone = NULL;
1285
-
1286
1337
  Query *clause, *rewritten;
1287
- Query **new_clauses = ALLOC_N(Query *, snq->c_cnt);
1288
1338
  int i;
1289
1339
  for (i = 0; i < snq->c_cnt; i++) {
1290
1340
  clause = snq->clauses[i];
1291
1341
  rewritten = clause->rewrite(clause, ir);
1292
- if ((clause != rewritten) && (clone == NULL)) {
1293
- clone = spannq_create(new_clauses, snq->c_cnt, snq->slop, snq->in_order);
1294
- /* The sub-clauses will be handled by the original query */
1295
- clone->destroy_all = false;
1296
- }
1297
- new_clauses[i] = rewritten;
1342
+ if ((rewritten == clause) || self->destroy_all) q_deref(clause);
1343
+ snq->clauses[i] = rewritten;
1298
1344
  }
1299
1345
 
1300
- if (clone != NULL) {
1301
- if (self->rewritten) self->rewritten->destroy(self->rewritten);
1302
- return self->rewritten = clone; /* some clauses rewrote */
1303
- } else {
1304
- free(new_clauses); /* no clauses rewrote */
1305
- return self;
1306
- }
1346
+ self->ref_cnt++;
1347
+ return self;
1307
1348
  }
1308
1349
 
1309
- void spannq_destroy(void *p)
1350
+ void spannq_destroy(Query *self)
1310
1351
  {
1311
- Query *self = (Query *)p;
1312
1352
  SpanQuery *sq = (SpanQuery *)self->data;
1313
1353
  SpanNearQuery *snq = (SpanNearQuery *)sq->data;
1314
1354
 
@@ -1317,21 +1357,57 @@ void spannq_destroy(void *p)
1317
1357
  int i;
1318
1358
  for (i = 0; i < snq->c_cnt; i++) {
1319
1359
  clause = snq->clauses[i];
1320
- clause->destroy(clause);
1360
+ q_deref(clause);
1321
1361
  }
1322
1362
  free(snq->clauses);
1323
1363
  }
1324
1364
 
1325
-
1326
1365
  free(snq);
1327
1366
  free(sq);
1328
- q_destroy(self);
1367
+ q_destroy_i(self);
1368
+ }
1369
+
1370
+ static uint spannq_hash(Query *self)
1371
+ {
1372
+ int i;
1373
+ uint hash = 0;
1374
+ Query *q;
1375
+ SpanNearQuery *snq = (SpanNearQuery *)((SpanQuery *)self->data)->data;
1376
+
1377
+ for (i = 0; i < snq->c_cnt; i++) {
1378
+ q = snq->clauses[i];
1379
+ hash ^= q->hash(q);
1380
+ }
1381
+ return ((hash ^ snq->slop) << 1) | snq->in_order;
1382
+ }
1383
+
1384
+ static int spannq_eq(Query *self, Query *o)
1385
+ {
1386
+ int i;
1387
+ Query *q1, *q2;
1388
+ SpanNearQuery *snq1 = (SpanNearQuery *)((SpanQuery *)self->data)->data;
1389
+ SpanNearQuery *snq2 = (SpanNearQuery *)((SpanQuery *)o->data)->data;
1390
+ if (snq1->c_cnt != snq2->c_cnt ||
1391
+ snq1->slop != snq2->slop ||
1392
+ snq1->in_order != snq2->in_order) {
1393
+ return false;
1394
+ }
1395
+
1396
+ for (i = 0; i < snq1->c_cnt; i++) {
1397
+ q1 = snq1->clauses[i];
1398
+ q2 = snq2->clauses[i];
1399
+ if (!q1->eq(q1, q2)) return false;
1400
+ }
1401
+
1402
+ return true;
1329
1403
  }
1330
1404
 
1331
1405
  Query *spannq_create(Query **clauses, int c_cnt, int slop, bool in_order)
1332
1406
  {
1333
1407
  Query *self = q_create();
1408
+
1334
1409
  SpanQuery *sq = ALLOC(SpanQuery);
1410
+
1335
1411
  SpanNearQuery *snq = ALLOC(SpanNearQuery);
1336
1412
  snq->clauses = clauses;
1337
1413
  snq->c_cnt = c_cnt;
@@ -1341,16 +1417,17 @@ Query *spannq_create(Query **clauses, int c_cnt, int slop, bool in_order)
1341
1417
 
1342
1418
  sq->get_spans = &spannq_get_spans;
1343
1419
  sq->get_terms = &spannq_get_terms;
1344
-
1345
1420
  sq->field = ((SpanQuery *)clauses[0]->data)->field;
1421
+ self->data = sq;
1346
1422
 
1347
1423
  self->type = SPAN_NEAR_QUERY;
1348
- self->data = sq;
1349
- self->create_weight = &spanw_create;
1350
- self->extract_terms = &spannq_extract_terms;
1351
1424
  self->rewrite = &spannq_rewrite;
1425
+ self->extract_terms = &spannq_extract_terms;
1352
1426
  self->to_s = &spannq_to_s;
1353
- self->destroy = &spannq_destroy;
1427
+ self->hash = &spannq_hash;
1428
+ self->eq = &spannq_eq;
1429
+ self->destroy_i = &spannq_destroy;
1430
+ self->create_weight_i = &spanw_create;
1354
1431
  return self;
1355
1432
  }
1356
1433
 
@@ -1372,16 +1449,16 @@ char *spanxq_to_s(Query *self, char *field)
1372
1449
  return res;
1373
1450
  }
1374
1451
 
1375
- void spanxq_extract_terms(Query *self, Array *terms)
1452
+ void spanxq_extract_terms(Query *self, HashSet *terms)
1376
1453
  {
1377
1454
  SpanNotQuery *sxq = (SpanNotQuery *)((SpanQuery *)self->data)->data;
1378
1455
  sxq->inc->extract_terms(sxq->inc, terms);
1379
1456
  }
1380
1457
 
1381
- Array *spanxq_get_terms(Query *self)
1458
+ HashSet *spanxq_get_terms(Query *self)
1382
1459
  {
1383
1460
  SpanNotQuery *sxq = (SpanNotQuery *)((SpanQuery *)self->data)->data;
1384
- Array *terms = ary_create(1, NULL);
1461
+ HashSet *terms = term_set_create();
1385
1462
  sxq->inc->extract_terms(sxq->inc, terms);
1386
1463
  return terms;
1387
1464
  }
@@ -1389,48 +1466,60 @@ Array *spanxq_get_terms(Query *self)
1389
1466
  Query *spanxq_rewrite(Query *self, IndexReader *ir)
1390
1467
  {
1391
1468
  SpanNotQuery *sxq = (SpanNotQuery *)((SpanQuery *)self->data)->data;
1392
- Query *clone = NULL;
1393
- Query *inc, *exc;
1394
- Query *inc_rewritten, *exc_rewritten;
1395
-
1396
- inc = sxq->inc;
1397
- inc_rewritten = inc->rewrite(inc, ir);
1398
- exc = sxq->exc;
1399
- exc_rewritten = exc->rewrite(exc, ir);
1400
- if ((inc_rewritten != inc) || (exc_rewritten != exc)) {
1401
- clone = spanxq_create(inc_rewritten, exc_rewritten);
1402
- /* The sub-clauses will be handled by the original query */
1403
- clone->destroy_all = false;
1404
- }
1469
+ Query *q, *rq;
1405
1470
 
1406
- if (clone != NULL) {
1407
- if (self->rewritten) self->rewritten->destroy(self->rewritten);
1408
- return self->rewritten = clone; /* some clauses rewrote */
1409
- } else {
1410
- return self; /* no clauses rewrote */
1411
- }
1471
+ /* rewrite inclusive query */
1472
+ q = sxq->inc;
1473
+ rq = q->rewrite(q, ir);
1474
+ if (rq == q || self->destroy_all) q_deref(q);
1475
+ sxq->inc = rq;
1476
+
1477
+ /* rewrite exclusive query */
1478
+ q = sxq->exc;
1479
+ rq = q->rewrite(q, ir);
1480
+ if (rq == q || self->destroy_all) q_deref(q);
1481
+ sxq->exc = rq;
1482
+
1483
+ self->ref_cnt++;
1484
+ return self;
1412
1485
  }
1413
1486
 
1414
- void spanxq_destroy(void *p)
1487
+ void spanxq_destroy(Query *self)
1415
1488
  {
1416
- Query *self = (Query *)p;
1417
1489
  SpanQuery *sq = (SpanQuery *)self->data;
1418
1490
  SpanNotQuery *sxq = (SpanNotQuery *)sq->data;
1419
1491
 
1420
1492
  if (self->destroy_all) {
1421
- sxq->inc->destroy(sxq->inc);
1422
- sxq->exc->destroy(sxq->exc);
1493
+ q_deref(sxq->inc);
1494
+ q_deref(sxq->exc);
1423
1495
  }
1424
1496
 
1425
1497
  free(sxq);
1426
1498
  free(sq);
1427
- q_destroy(self);
1499
+ q_destroy_i(self);
1428
1500
  }
1429
1501
 
1502
+ static uint spanxq_hash(Query *self)
1503
+ {
1504
+ SpanNotQuery *sxq = (SpanNotQuery *)((SpanQuery *)self->data)->data;
1505
+ return sxq->inc->hash(sxq->inc) ^ sxq->exc->hash(sxq->exc);
1506
+ }
1507
+
1508
+ static int spanxq_eq(Query *self, Query *o)
1509
+ {
1510
+ SpanNotQuery *sxq1 = (SpanNotQuery *)((SpanQuery *)self->data)->data;
1511
+ SpanNotQuery *sxq2 = (SpanNotQuery *)((SpanQuery *)o->data)->data;
1512
+ return sxq1->inc->eq(sxq1->inc, sxq2->inc) &&
1513
+ sxq1->exc->eq(sxq1->exc, sxq2->exc);
1514
+ }
1515
+
1516
+
1430
1517
  Query *spanxq_create(Query *inc, Query *exc)
1431
1518
  {
1432
1519
  Query *self = q_create();
1520
+
1433
1521
  SpanQuery *sq = ALLOC(SpanQuery);
1522
+
1434
1523
  SpanNotQuery *sxq = ALLOC(SpanNotQuery);
1435
1524
  sxq->inc = inc;
1436
1525
  sxq->exc = exc;
@@ -1439,14 +1528,17 @@ Query *spanxq_create(Query *inc, Query *exc)
1439
1528
  sq->get_spans = &spanxe_create;
1440
1529
  sq->get_terms = &spanxq_get_terms;
1441
1530
  sq->field = ((SpanQuery *)inc->data)->field;
1531
+ self->data = sq;
1442
1532
 
1443
1533
  self->type = SPAN_NOT_QUERY;
1444
- self->data = sq;
1445
- self->create_weight = &spanw_create;
1446
- self->extract_terms = &spanxq_extract_terms;
1447
1534
  self->rewrite = &spanxq_rewrite;
1535
+ self->extract_terms = &spanxq_extract_terms;
1448
1536
  self->to_s = &spanxq_to_s;
1449
- self->destroy = &spanxq_destroy;
1537
+ self->hash = &spanxq_hash;
1538
+ self->eq = &spanxq_eq;
1539
+ self->destroy_i = &spanxq_destroy;
1540
+ self->create_weight_i = &spanw_create;
1541
+
1450
1542
  return self;
1451
1543
  }
1452
1544
 
@@ -1512,24 +1604,23 @@ bool spansc_skip_to(Scorer *self, int target)
1512
1604
 
1513
1605
  Explanation *spansc_explain(Scorer *self, int target)
1514
1606
  {
1607
+ Explanation *tf_explanation;
1515
1608
  SpanScorer *spansc = (SpanScorer *)self->data;
1516
1609
  float phrase_freq;
1517
1610
  self->skip_to(self, target);
1518
- phrase_freq = (self->doc == target) ? spansc->freq : 0.0;
1611
+ phrase_freq = (self->doc == target) ? spansc->freq : (float)0.0;
1519
1612
 
1520
- Explanation *tf_explanation = expl_create(sim_tf(self->similarity, phrase_freq),
1613
+ tf_explanation = expl_create(sim_tf(self->similarity, phrase_freq),
1521
1614
  strfmt("tf(phrase_freq(%f)", phrase_freq));
1522
1615
 
1523
1616
  return tf_explanation;
1524
1617
  }
1525
1618
 
1526
- void spansc_destroy(void *p)
1619
+ void spansc_destroy(Scorer *self)
1527
1620
  {
1528
- Scorer *self = (Scorer *)p;
1529
1621
  SpanScorer *spansc = (SpanScorer *)self->data;
1530
1622
  if (spansc->spans) spansc->spans->destroy(spansc->spans);
1531
- //free(spansc->norms);
1532
- scorer_destroy(p);
1623
+ scorer_destroy_i(self);
1533
1624
  }
1534
1625
 
1535
1626
  Scorer *spansc_create(Weight *weight, IndexReader *ir)