ferret 0.3.2 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (141) hide show
  1. data/CHANGELOG +9 -0
  2. data/Rakefile +51 -25
  3. data/ext/analysis.c +553 -0
  4. data/ext/analysis.h +76 -0
  5. data/ext/array.c +83 -0
  6. data/ext/array.h +19 -0
  7. data/ext/bitvector.c +164 -0
  8. data/ext/bitvector.h +29 -0
  9. data/ext/compound_io.c +335 -0
  10. data/ext/document.c +336 -0
  11. data/ext/document.h +87 -0
  12. data/ext/ferret.c +88 -47
  13. data/ext/ferret.h +43 -109
  14. data/ext/field.c +395 -0
  15. data/ext/filter.c +103 -0
  16. data/ext/fs_store.c +352 -0
  17. data/ext/global.c +219 -0
  18. data/ext/global.h +73 -0
  19. data/ext/hash.c +446 -0
  20. data/ext/hash.h +80 -0
  21. data/ext/hashset.c +141 -0
  22. data/ext/hashset.h +37 -0
  23. data/ext/helper.c +11 -0
  24. data/ext/helper.h +5 -0
  25. data/ext/inc/lang.h +41 -0
  26. data/ext/ind.c +389 -0
  27. data/ext/index.h +884 -0
  28. data/ext/index_io.c +269 -415
  29. data/ext/index_rw.c +2543 -0
  30. data/ext/lang.c +31 -0
  31. data/ext/lang.h +41 -0
  32. data/ext/priorityqueue.c +228 -0
  33. data/ext/priorityqueue.h +44 -0
  34. data/ext/q_boolean.c +1331 -0
  35. data/ext/q_const_score.c +154 -0
  36. data/ext/q_fuzzy.c +287 -0
  37. data/ext/q_match_all.c +142 -0
  38. data/ext/q_multi_phrase.c +343 -0
  39. data/ext/q_parser.c +2180 -0
  40. data/ext/q_phrase.c +657 -0
  41. data/ext/q_prefix.c +75 -0
  42. data/ext/q_range.c +247 -0
  43. data/ext/q_span.c +1566 -0
  44. data/ext/q_term.c +308 -0
  45. data/ext/q_wildcard.c +146 -0
  46. data/ext/r_analysis.c +255 -0
  47. data/ext/r_doc.c +578 -0
  48. data/ext/r_index_io.c +996 -0
  49. data/ext/r_qparser.c +158 -0
  50. data/ext/r_search.c +2321 -0
  51. data/ext/r_store.c +263 -0
  52. data/ext/r_term.c +219 -0
  53. data/ext/ram_store.c +447 -0
  54. data/ext/search.c +524 -0
  55. data/ext/search.h +1065 -0
  56. data/ext/similarity.c +143 -39
  57. data/ext/sort.c +661 -0
  58. data/ext/store.c +35 -0
  59. data/ext/store.h +152 -0
  60. data/ext/term.c +704 -143
  61. data/ext/termdocs.c +599 -0
  62. data/ext/vector.c +594 -0
  63. data/lib/ferret.rb +9 -10
  64. data/lib/ferret/analysis/analyzers.rb +2 -2
  65. data/lib/ferret/analysis/standard_tokenizer.rb +1 -1
  66. data/lib/ferret/analysis/token.rb +14 -14
  67. data/lib/ferret/analysis/token_filters.rb +3 -3
  68. data/lib/ferret/document/field.rb +16 -17
  69. data/lib/ferret/index/document_writer.rb +4 -4
  70. data/lib/ferret/index/index.rb +39 -23
  71. data/lib/ferret/index/index_writer.rb +2 -2
  72. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +1 -8
  73. data/lib/ferret/index/segment_term_vector.rb +4 -4
  74. data/lib/ferret/index/term.rb +5 -1
  75. data/lib/ferret/index/term_vector_offset_info.rb +6 -6
  76. data/lib/ferret/index/term_vectors_io.rb +5 -5
  77. data/lib/ferret/query_parser/query_parser.tab.rb +81 -77
  78. data/lib/ferret/search.rb +1 -1
  79. data/lib/ferret/search/boolean_query.rb +2 -1
  80. data/lib/ferret/search/field_sorted_hit_queue.rb +3 -3
  81. data/lib/ferret/search/fuzzy_query.rb +2 -1
  82. data/lib/ferret/search/index_searcher.rb +3 -0
  83. data/lib/ferret/search/{match_all_docs_query.rb → match_all_query.rb} +7 -7
  84. data/lib/ferret/search/multi_phrase_query.rb +6 -5
  85. data/lib/ferret/search/phrase_query.rb +3 -6
  86. data/lib/ferret/search/prefix_query.rb +4 -4
  87. data/lib/ferret/search/sort.rb +3 -1
  88. data/lib/ferret/search/sort_field.rb +9 -9
  89. data/lib/ferret/search/spans/near_spans_enum.rb +1 -1
  90. data/lib/ferret/search/spans/span_near_query.rb +1 -1
  91. data/lib/ferret/search/spans/span_weight.rb +1 -1
  92. data/lib/ferret/search/spans/spans_enum.rb +7 -7
  93. data/lib/ferret/store/fs_store.rb +10 -6
  94. data/lib/ferret/store/ram_store.rb +3 -3
  95. data/lib/rferret.rb +36 -0
  96. data/test/functional/thread_safety_index_test.rb +2 -2
  97. data/test/test_helper.rb +16 -2
  98. data/test/unit/analysis/c_token.rb +25 -0
  99. data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +1 -1
  100. data/test/unit/analysis/tc_standard_analyzer.rb +1 -1
  101. data/test/unit/document/{tc_document.rb → c_document.rb} +0 -0
  102. data/test/unit/document/c_field.rb +98 -0
  103. data/test/unit/document/tc_field.rb +0 -66
  104. data/test/unit/index/{tc_index.rb → c_index.rb} +62 -6
  105. data/test/unit/index/{tc_index_reader.rb → c_index_reader.rb} +51 -10
  106. data/test/unit/index/{tc_index_writer.rb → c_index_writer.rb} +0 -4
  107. data/test/unit/index/{tc_term.rb → c_term.rb} +1 -3
  108. data/test/unit/index/{tc_term_vector_offset_info.rb → c_term_voi.rb} +5 -5
  109. data/test/unit/index/tc_segment_term_vector.rb +2 -2
  110. data/test/unit/index/tc_term_vectors_io.rb +4 -4
  111. data/test/unit/query_parser/c_query_parser.rb +138 -0
  112. data/test/unit/search/{tc_filter.rb → c_filter.rb} +24 -24
  113. data/test/unit/search/{tc_fuzzy_query.rb → c_fuzzy_query.rb} +0 -0
  114. data/test/unit/search/{tc_index_searcher.rb → c_index_searcher.rb} +9 -26
  115. data/test/unit/search/{tc_search_and_sort.rb → c_search_and_sort.rb} +15 -15
  116. data/test/unit/search/{tc_sort.rb → c_sort.rb} +2 -1
  117. data/test/unit/search/c_sort_field.rb +27 -0
  118. data/test/unit/search/{tc_spans.rb → c_spans.rb} +0 -0
  119. data/test/unit/search/tc_sort_field.rb +7 -20
  120. data/test/unit/store/c_fs_store.rb +76 -0
  121. data/test/unit/store/c_ram_store.rb +35 -0
  122. data/test/unit/store/m_store.rb +34 -0
  123. data/test/unit/store/m_store_lock.rb +68 -0
  124. data/test/unit/store/tc_fs_store.rb +0 -53
  125. data/test/unit/store/tc_ram_store.rb +0 -20
  126. data/test/unit/store/tm_store.rb +0 -30
  127. data/test/unit/store/tm_store_lock.rb +0 -66
  128. metadata +84 -31
  129. data/ext/Makefile +0 -140
  130. data/ext/ferret_ext.so +0 -0
  131. data/ext/priority_queue.c +0 -232
  132. data/ext/ram_directory.c +0 -321
  133. data/ext/segment_merge_queue.c +0 -37
  134. data/ext/segment_term_enum.c +0 -326
  135. data/ext/string_helper.c +0 -42
  136. data/ext/tags +0 -344
  137. data/ext/term_buffer.c +0 -230
  138. data/ext/term_infos_reader.c +0 -54
  139. data/ext/terminfo.c +0 -160
  140. data/ext/token.c +0 -93
  141. data/ext/util.c +0 -12
@@ -0,0 +1,154 @@
1
+ #include "search.h"
2
+ #include <string.h>
3
+
4
+ /***************************************************************************
5
+ *
6
+ * Weight
7
+ *
8
+ ***************************************************************************/
9
+
10
+ char *csw_to_s(Weight *self)
11
+ {
12
+ char dbuf[32];
13
+ dbl_to_s(dbuf, self->value);
14
+ return epstrdup("ConstantScoreWeight(%s)", strlen(dbuf), dbuf);
15
+ }
16
+
17
+ void csw_destroy(void *p)
18
+ {
19
+ free(p);
20
+ }
21
+
22
+ Explanation *csw_explain(Weight *self, IndexReader *ir, int doc_num)
23
+ {
24
+ Filter *filter = (Filter *)self->query->data;
25
+ Explanation *expl;
26
+ char *filter_str = filter->to_s(filter);
27
+ BitVector *bv = filt_get_bv(filter, ir);
28
+
29
+ if (bv_get(bv, doc_num)) {
30
+ expl = expl_create(self->value,
31
+ epstrdup("ConstantScoreQuery(%s), product of:",
32
+ strlen(filter_str), filter_str));
33
+ expl_add_detail(expl, expl_create(self->query->boost, estrdup("boost")));
34
+ expl_add_detail(expl, expl_create(self->qnorm, estrdup("query_norm")));
35
+ } else {
36
+ expl = expl_create(self->value,
37
+ epstrdup("ConstantScoreQuery(%s), does not match id %d",
38
+ strlen(filter_str) + 20, filter_str, doc_num));
39
+ }
40
+ free(filter_str);
41
+ return expl;
42
+ }
43
+
44
+ Weight *csw_create(Query *query, Searcher *searcher)
45
+ {
46
+ Weight *self = ALLOC(Weight);
47
+ ZEROSET(self, Weight, 1);
48
+ self->get_query = &w_get_query;
49
+ self->get_value = &w_get_value;
50
+ self->normalize = &w_normalize;
51
+ self->scorer = &cssc_create;
52
+ self->explain = &csw_explain;
53
+ self->to_s = &csw_to_s;
54
+ self->destroy = &csw_destroy;
55
+ self->sum_of_squared_weights = &w_sum_of_squared_weights;
56
+
57
+ self->similarity = query->get_similarity(query, searcher);
58
+ self->idf = 1.0;
59
+ self->query = query;
60
+ self->value = 0.0;
61
+
62
+ return self;
63
+ }
64
+
65
+ /***************************************************************************
66
+ *
67
+ * ConstantScoreQuery
68
+ *
69
+ ***************************************************************************/
70
+
71
+ char *csq_to_s(Query *self, char *field)
72
+ {
73
+ Filter *filter = (Filter *)self->data;
74
+ char *filter_str = filter->to_s(filter);
75
+ char *buffer;
76
+ if (self->boost == 1.0) {
77
+ buffer = epstrdup("ConstantScore(%s)", strlen(filter_str), filter_str);
78
+ } else {
79
+ char dbuf[32];
80
+ dbl_to_s(dbuf, self->boost);
81
+ buffer = epstrdup("ConstantScore(%s)^%s",
82
+ strlen(filter_str) + strlen(dbuf), filter_str, dbuf);
83
+ }
84
+ free(filter_str);
85
+ return buffer;;
86
+ }
87
+
88
+ void csq_destroy(void *p)
89
+ {
90
+ Query *self = (Query *)p;
91
+ if (self->destroy_all) {
92
+ Filter *filter = (Filter *)self->data;
93
+ filter->destroy(filter);
94
+ }
95
+ q_destroy(self);
96
+ }
97
+
98
+ Query *csq_create(Filter *filter)
99
+ {
100
+ Query *self = q_create();
101
+ self->type = CONSTANT_QUERY;
102
+ self->data = filter;
103
+ self->create_weight = &csw_create;
104
+ self->to_s = &csq_to_s;
105
+ self->destroy = &csq_destroy;
106
+
107
+ return self;
108
+ }
109
+
110
+ /***************************************************************************
111
+ *
112
+ * ConstantScoreScorer
113
+ *
114
+ ***************************************************************************/
115
+
116
+ float cssc_score(Scorer *self)
117
+ {
118
+ return ((ConstantScoreScorer *)self->data)->score;
119
+ }
120
+
121
+ bool cssc_next(Scorer *self)
122
+ {
123
+ BitVector *bv = ((ConstantScoreScorer *)self->data)->bv;
124
+ return ((self->doc = bv_scan_next(bv)) >= 0);
125
+ }
126
+
127
+ bool cssc_skip_to(Scorer *self, int doc_num)
128
+ {
129
+ BitVector *bv = ((ConstantScoreScorer *)self->data)->bv;
130
+ return ((self->doc = bv_scan_next_from(bv, doc_num)) >= 0);
131
+ }
132
+
133
+ Explanation *cssc_explain(Scorer *self, int doc_num)
134
+ {
135
+ return expl_create(1.0, estrdup("ConstantScoreScorer"));
136
+ }
137
+
138
+ Scorer *cssc_create(Weight *weight, IndexReader *ir)
139
+ {
140
+ Scorer *self = scorer_create(weight->similarity);
141
+ Filter *filter = (Filter *)weight->query->data;
142
+ ConstantScoreScorer *cssc = ALLOC(ConstantScoreScorer);
143
+ ZEROSET(cssc, ConstantScoreScorer, 1);
144
+ self->data = cssc;
145
+ cssc->score = weight->value;
146
+ cssc->bv = filt_get_bv(filter, ir);
147
+
148
+ self->score = &cssc_score;
149
+ self->next = &cssc_next;
150
+ self->skip_to = &cssc_skip_to;
151
+ self->explain = &cssc_explain;
152
+ self->destroy = &scorer_destroy;
153
+ return self;
154
+ }
data/ext/q_fuzzy.c ADDED
@@ -0,0 +1,287 @@
1
+ #include <string.h>
2
+ #include "search.h"
3
+
4
+ /****************************************************************************
5
+ *
6
+ * FuzzyStuff
7
+ *
8
+ * The main method here is the fuzq_score method which scores a term against
9
+ * another term. The other methods all act in support.
10
+ *
11
+ ****************************************************************************/
12
+
13
+
14
+ int fuzq_calculate_max_distance(FuzzyQuery *fuzq, int m)
15
+ {
16
+ return (int)((1.0 - fuzq->min_sim) * (MIN(fuzq->text_len, m) + fuzq->pre_len));
17
+ }
18
+
19
+ void fuzq_initialize_max_distances(FuzzyQuery *fuzq)
20
+ {
21
+ int i;
22
+ for (i = 0; i < TYPICAL_LONGEST_WORD; i++) {
23
+ fuzq->max_distances[i] = fuzq_calculate_max_distance(fuzq, i);
24
+ }
25
+ }
26
+
27
+ float fuzq_get_max_distance(FuzzyQuery *fuzq, int m)
28
+ {
29
+ return (m < TYPICAL_LONGEST_WORD) ? fuzq->max_distances[m]
30
+ : fuzq_calculate_max_distance(fuzq, m);
31
+ }
32
+
33
+ float fuzq_score(FuzzyQuery *fuzq, char *target)
34
+ {
35
+ int i, j;
36
+ int max_distance;
37
+ int m = strlen(target);
38
+ int n = fuzq->text_len;
39
+ int *d = fuzq->da;
40
+ char *text = fuzq->text;
41
+ if (n == 0) {
42
+ /* we don't have anything to compare. That means if we just add
43
+ * the letters for m we get the new word */
44
+ return fuzq->pre_len == 0 ? 0.0f : 1.0f - ((float) m / fuzq->pre_len);
45
+ }
46
+ if (m == 0) {
47
+ return fuzq->pre_len == 0 ? 0.0f : 1.0f - ((float) n / fuzq->pre_len);
48
+ }
49
+
50
+ max_distance = fuzq_get_max_distance(fuzq, m);
51
+
52
+ //printf("n%dm%dmd%ddiff%d<%s><%s>\n", n, m, max_distance, m-n, fuzq->text, target);
53
+ if (max_distance < ((m > n) ? (m-n) : (n-m))) { /* abs */
54
+ /* Just adding the characters of m to n or vice-versa results in too many
55
+ * edits for example "pre" length is 3 and "prefixes" length is 8. We can
56
+ * see that given this optimal circumstance, the edit distance cannot be
57
+ * less than 5 which is 8-3 or more precisesly Math.abs(3-8). If our
58
+ * maximum edit distance is 4, then we can discard this word without
59
+ * looking at it. */
60
+ return 0.0f;
61
+ }
62
+
63
+ /* Let's make sure we have enough room in our array to do the distance
64
+ * calculations. */
65
+ if (((m+1) * (n+1)) >= fuzq->da_capa) {
66
+ fuzq->da_capa = (m * (fuzq->text_len+1)) * 2;
67
+ REALLOC_N(fuzq->da, int, fuzq->da_capa);
68
+ d = fuzq->da;
69
+ }
70
+
71
+ /* init matrix d */
72
+ for (i = 0; i <= n; i++) d[i + m * 0] = i;
73
+ for (j = 0; j <= m; j++) d[0 + m * j] = j;
74
+
75
+ /* start computing edit distance */
76
+ for (i = 1; i <= n; i++) {
77
+ int best_pos_ed_dist = m;
78
+ char s_i = text[i - 1];
79
+ for (j = 1; j <= m; j++) {
80
+ if (s_i != target[j-1]) {
81
+ d[i + m*j] = min3(d[i-1 + m*j], d[i + m*(j-1)], d[i-1 + m*(j-1)])+1;
82
+ } else {
83
+ d[i + m*j] = min3(d[i-1 + m*j]+1, d[i + m*(j-1)]+1, d[i-1 + m*(j-1)]);
84
+ }
85
+ best_pos_ed_dist = min(best_pos_ed_dist, d[i + m*j]);
86
+ }
87
+ //printf("(bped = %d, i = %d, md = %d)", best_pos_ed_dist, i, max_distance);
88
+
89
+ /* After calculating row i, the best possible edit distance can be found
90
+ * by found by finding the smallest value in a given column. If the
91
+ * best_pos_ed_dist is greater than the max distance, abort.
92
+ */
93
+
94
+ if ((i > max_distance) && (best_pos_ed_dist > max_distance)) {
95
+ /* equal is okay, but not greater
96
+ * the closest the target can be to the text is just too far away.
97
+ * this target is leaving the party early. */
98
+ return 0.0f;
99
+ }
100
+ }
101
+ //printf("<%f, d[n + m*m] = %d min_len = %d>", 1.0f - ((float)d[n + m*m] / (float) (fuzq->pre_len + min(n, m))), d[n + m*m], fuzq->pre_len + min(n, m));
102
+
103
+ /* this will return less than 0.0 when the edit distance is greater than the
104
+ * number of characters in the shorter word. but this was the formula that
105
+ * was previously used in FuzzyTermEnum, so it has not been changed (even
106
+ * though min_sim must be greater than 0.0) */
107
+ return 1.0f - ((float)d[n + m*m] / (float) (fuzq->pre_len + min(n, m)));
108
+ }
109
+
110
+ /****************************************************************************
111
+ *
112
+ * FuzzyQuery
113
+ *
114
+ ****************************************************************************/
115
+
116
+ char *fuzq_to_s(Query *self, char *field)
117
+ {
118
+ char *buffer, *bptr;
119
+ FuzzyQuery *fuzq = (FuzzyQuery *)self->data;
120
+ Term *term = fuzq->term;
121
+ int tlen = strlen(term->text);
122
+ int flen = strlen(term->field);
123
+ bptr = buffer = ALLOC_N(char, tlen + flen + 35);
124
+
125
+ if (strcmp(term->field, field) != 0) {
126
+ sprintf(bptr, "%s:", term->field);
127
+ bptr += strlen(term->field) + 1;
128
+ }
129
+ sprintf(bptr, "%s~", term->text);
130
+ bptr += strlen(bptr);
131
+ if (fuzq->min_sim != 0.5) {
132
+ dbl_to_s(bptr, fuzq->min_sim);
133
+ bptr += strlen(bptr);
134
+ }
135
+ if (self->boost != 1.0) {
136
+ *bptr = '^';
137
+ dbl_to_s(++bptr, self->boost);
138
+ }
139
+ return buffer;
140
+ }
141
+
142
+ typedef struct ScoredTerm {
143
+ Term *term;
144
+ float score;
145
+ } ScoredTerm;
146
+
147
+ bool scored_term_less_than(void *p1, void *p2)
148
+ {
149
+ ScoredTerm *st1 = (ScoredTerm *)p1;
150
+ ScoredTerm *st2 = (ScoredTerm *)p2;
151
+
152
+ if (st1->score == st2->score)
153
+ return (strcmp(st1->term->text, st2->term->text) < 0);
154
+
155
+ return (st1->score < st2->score);
156
+ }
157
+
158
+ void scored_term_destroy(void *p)
159
+ {
160
+ ScoredTerm *st = (ScoredTerm *)p;
161
+ term_destroy(st->term);
162
+ free(st);
163
+ }
164
+
165
+ ScoredTerm *scored_term_create(Term *term, float score)
166
+ {
167
+ ScoredTerm *self = ALLOC(ScoredTerm);
168
+ self->term = term;
169
+ self->score = score;
170
+ return self;
171
+ }
172
+
173
+ Query *fuzq_rewrite(Query *self, IndexReader *ir)
174
+ {
175
+ Query *q;
176
+ Query *tq;
177
+ FuzzyQuery *fuzq = (FuzzyQuery *)self->data;
178
+
179
+ Term *term = fuzq->term;
180
+ char *text = term->text;
181
+ char *field = term->field;
182
+ Term prefix_term;
183
+ prefix_term.field = field;
184
+ if (fuzq->pre_len >= strlen(text)) {
185
+ q = tq_create(term_clone(term));
186
+ } else {
187
+ PriorityQueue *term_pq;
188
+ TermEnum *te;
189
+ Term prefix_term;
190
+ char *prefix = NULL;
191
+ int pre_len = fuzq->pre_len;
192
+ ScoredTerm *scored_term;
193
+
194
+ q = bq_create(true);
195
+
196
+ term_pq = pq_create(((BooleanQuery *)q->data)->max_clause_cnt,
197
+ &scored_term_less_than);
198
+ term_pq->free_elem = &scored_term_destroy;
199
+
200
+ prefix_term.field = field;
201
+ prefix_term.text = (char *)EMPTY_STRING;
202
+ if (pre_len >= 0) {
203
+ prefix = ALLOC_N(char, pre_len + 1);
204
+ strncpy(prefix, text, pre_len);
205
+ prefix_term.text = prefix;
206
+ prefix_term.text[pre_len] = '\0';
207
+ }
208
+ te = ir->terms_from(ir, &prefix_term);
209
+
210
+ fuzq->scale_factor = 1.0 / (1.0 - fuzq->min_sim);
211
+ fuzq->text = fuzq->term->text + pre_len;
212
+ fuzq->text_len = strlen(fuzq->text);
213
+ fuzq_initialize_max_distances(fuzq);
214
+
215
+ if (te) {
216
+ TermBuffer *tb = te->tb_curr;
217
+ float score = 0.0, min_score = fuzq->min_sim;
218
+
219
+ do {
220
+ if (strcmp(tb->field, field) != 0 ||
221
+ (prefix && strncmp(tb->text, prefix, pre_len) != 0))
222
+ break;
223
+
224
+ score = fuzq_score(fuzq, tb->text + pre_len);
225
+ //printf("%s:%s:%f\n", tb->text, fuzq->text, score);
226
+
227
+ if (score > min_score) {
228
+ pq_insert(term_pq, scored_term_create(tb_get_term(tb), score));
229
+ if (pq_full(term_pq))
230
+ min_score = ((ScoredTerm *)pq_top(term_pq))->score;
231
+ }
232
+ } while ((tb = te->next(te)) != NULL);
233
+ te->close(te);
234
+ }
235
+ free(prefix);
236
+
237
+ while ((scored_term = pq_pop(term_pq)) != NULL) {
238
+ tq = tq_create(scored_term->term); /* found match */
239
+ tq->boost = self->boost; /* set boost */
240
+ bq_add_query(q, tq, BC_SHOULD); /* add query */
241
+ free(scored_term); /* no need to free the term as it's in the query */
242
+ }
243
+ pq_destroy(term_pq);
244
+ }
245
+
246
+ if (self->rewritten) self->rewritten->destroy(self->rewritten);
247
+ return self->rewritten = q;
248
+ }
249
+
250
+ void fuzq_destroy(void *p)
251
+ {
252
+ Query *self = (Query *)p;
253
+ FuzzyQuery *fuzq = (FuzzyQuery *)self->data;
254
+ if (self->destroy_all) term_destroy((Term *)fuzq->term);
255
+ free(fuzq->da);
256
+ free(fuzq);
257
+ q_destroy(self);
258
+ }
259
+
260
+ Query *fuzq_create(Term *term)
261
+ {
262
+ Query *self = q_create();
263
+ FuzzyQuery *fq = ALLOC(FuzzyQuery);
264
+ ZEROSET(fq, FuzzyQuery, 1);
265
+
266
+ fq->term = term;
267
+ fq->pre_len = DEF_PRE_LEN;
268
+ fq->min_sim = DEF_MIN_SIM;
269
+ self->data = fq;
270
+ self->type = FUZZY_QUERY;
271
+ self->create_weight = NULL;
272
+ self->to_s = &fuzq_to_s;
273
+ self->rewrite = &fuzq_rewrite;
274
+ self->destroy = &fuzq_destroy;
275
+ self->rewritten = NULL;
276
+
277
+ return self;
278
+ }
279
+
280
+ Query *fuzq_create_mp(Term *term, float min_sim, int pre_len)
281
+ {
282
+ Query *self = fuzq_create(term);
283
+ FuzzyQuery *fuzq = (FuzzyQuery *)self->data;
284
+ if (pre_len) fuzq->pre_len = pre_len;
285
+ if (min_sim) fuzq->min_sim = min_sim;
286
+ return self;
287
+ }
data/ext/q_match_all.c ADDED
@@ -0,0 +1,142 @@
1
+ #include "search.h"
2
+ #include <string.h>
3
+
4
+ /***************************************************************************
5
+ *
6
+ * Weight
7
+ *
8
+ ***************************************************************************/
9
+
10
+ char *maw_to_s(Weight *self)
11
+ {
12
+ char dbuf[32];
13
+ dbl_to_s(dbuf, self->value);
14
+ return epstrdup("MatchAllWeight(%s)", strlen(dbuf), dbuf);
15
+ }
16
+
17
+ void maw_destroy(void *p)
18
+ {
19
+ free(p);
20
+ }
21
+
22
+ Explanation *maw_explain(Weight *self, IndexReader *ir, int doc_num)
23
+ {
24
+ Explanation *expl;
25
+ if (!ir->is_deleted(ir, doc_num)) {
26
+ expl = expl_create(self->value, estrdup("MatchAllQuery: product of:"));
27
+ expl_add_detail(expl, expl_create(self->query->boost, estrdup("boost")));
28
+ expl_add_detail(expl, expl_create(self->qnorm, estrdup("query_norm")));
29
+ } else {
30
+ expl = expl_create(self->value,
31
+ epstrdup("MatchAllQuery: doc %d was deleted", 20, doc_num));
32
+ }
33
+
34
+ return expl;
35
+ }
36
+
37
+ Weight *maw_create(Query *query, Searcher *searcher)
38
+ {
39
+ Weight *self = ALLOC(Weight);
40
+ ZEROSET(self, Weight, 1);
41
+ self->get_query = &w_get_query;
42
+ self->get_value = &w_get_value;
43
+ self->normalize = &w_normalize;
44
+ self->scorer = &masc_create;
45
+ self->explain = &maw_explain;
46
+ self->to_s = &maw_to_s;
47
+ self->destroy = &maw_destroy;
48
+ self->sum_of_squared_weights = &w_sum_of_squared_weights;
49
+
50
+ self->similarity = query->get_similarity(query, searcher);
51
+ self->idf = 1.0;
52
+ self->query = query;
53
+ self->value = 0.0;
54
+
55
+ return self;
56
+ }
57
+
58
+ /***************************************************************************
59
+ *
60
+ * MatchAllQuery
61
+ *
62
+ ***************************************************************************/
63
+
64
+ char *maq_to_s(Query *self, char *field)
65
+ {
66
+ if (self->boost == 1.0) {
67
+ return estrdup("MatchAll");
68
+ } else {
69
+ char dbuf[32];
70
+ dbl_to_s(dbuf, self->boost);
71
+ return epstrdup("MatchAll^%s", strlen(dbuf), dbuf);
72
+ }
73
+ }
74
+
75
+ void maq_destroy(void *p)
76
+ {
77
+ Query *self = (Query *)p;
78
+ q_destroy(self);
79
+ }
80
+
81
+ Query *maq_create()
82
+ {
83
+ Query *self = q_create();
84
+ self->type = MATCH_ALL_QUERY;
85
+ self->create_weight = &maw_create;
86
+ self->to_s = &maq_to_s;
87
+ self->destroy = &maq_destroy;
88
+
89
+ return self;
90
+ }
91
+
92
+ /***************************************************************************
93
+ *
94
+ * MatchAllScorer
95
+ *
96
+ ***************************************************************************/
97
+
98
+ float masc_score(Scorer *self)
99
+ {
100
+ return ((MatchAllScorer *)self->data)->score;
101
+ }
102
+
103
+ bool masc_next(Scorer *self)
104
+ {
105
+ MatchAllScorer *mas = (MatchAllScorer *)self->data;
106
+ while (self->doc < (mas->max_doc - 1)) {
107
+ self->doc++;
108
+ if (!mas->ir->is_deleted(mas->ir, self->doc)) {
109
+ return true;
110
+ }
111
+ }
112
+ return false;
113
+ }
114
+
115
+ bool masc_skip_to(Scorer *self, int doc_num)
116
+ {
117
+ self->doc = doc_num - 1;
118
+ return masc_next(self);
119
+ }
120
+
121
+ Explanation *masc_explain(Scorer *self, int doc_num)
122
+ {
123
+ return expl_create(1.0, estrdup("MatchAllScorer"));
124
+ }
125
+
126
+ Scorer *masc_create(Weight *weight, IndexReader *ir)
127
+ {
128
+ Scorer *self = scorer_create(weight->similarity);
129
+ MatchAllScorer *mas = ALLOC(MatchAllScorer);
130
+ mas->ir = ir;
131
+ mas->max_doc = ir->max_doc(ir);
132
+ mas->score = weight->value;
133
+ self->data = mas;
134
+
135
+ self->doc = -1;
136
+ self->score = &masc_score;
137
+ self->next = &masc_next;
138
+ self->skip_to = &masc_skip_to;
139
+ self->explain = &masc_explain;
140
+ self->destroy = &scorer_destroy;
141
+ return self;
142
+ }