ferret 0.3.2 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (141) hide show
  1. data/CHANGELOG +9 -0
  2. data/Rakefile +51 -25
  3. data/ext/analysis.c +553 -0
  4. data/ext/analysis.h +76 -0
  5. data/ext/array.c +83 -0
  6. data/ext/array.h +19 -0
  7. data/ext/bitvector.c +164 -0
  8. data/ext/bitvector.h +29 -0
  9. data/ext/compound_io.c +335 -0
  10. data/ext/document.c +336 -0
  11. data/ext/document.h +87 -0
  12. data/ext/ferret.c +88 -47
  13. data/ext/ferret.h +43 -109
  14. data/ext/field.c +395 -0
  15. data/ext/filter.c +103 -0
  16. data/ext/fs_store.c +352 -0
  17. data/ext/global.c +219 -0
  18. data/ext/global.h +73 -0
  19. data/ext/hash.c +446 -0
  20. data/ext/hash.h +80 -0
  21. data/ext/hashset.c +141 -0
  22. data/ext/hashset.h +37 -0
  23. data/ext/helper.c +11 -0
  24. data/ext/helper.h +5 -0
  25. data/ext/inc/lang.h +41 -0
  26. data/ext/ind.c +389 -0
  27. data/ext/index.h +884 -0
  28. data/ext/index_io.c +269 -415
  29. data/ext/index_rw.c +2543 -0
  30. data/ext/lang.c +31 -0
  31. data/ext/lang.h +41 -0
  32. data/ext/priorityqueue.c +228 -0
  33. data/ext/priorityqueue.h +44 -0
  34. data/ext/q_boolean.c +1331 -0
  35. data/ext/q_const_score.c +154 -0
  36. data/ext/q_fuzzy.c +287 -0
  37. data/ext/q_match_all.c +142 -0
  38. data/ext/q_multi_phrase.c +343 -0
  39. data/ext/q_parser.c +2180 -0
  40. data/ext/q_phrase.c +657 -0
  41. data/ext/q_prefix.c +75 -0
  42. data/ext/q_range.c +247 -0
  43. data/ext/q_span.c +1566 -0
  44. data/ext/q_term.c +308 -0
  45. data/ext/q_wildcard.c +146 -0
  46. data/ext/r_analysis.c +255 -0
  47. data/ext/r_doc.c +578 -0
  48. data/ext/r_index_io.c +996 -0
  49. data/ext/r_qparser.c +158 -0
  50. data/ext/r_search.c +2321 -0
  51. data/ext/r_store.c +263 -0
  52. data/ext/r_term.c +219 -0
  53. data/ext/ram_store.c +447 -0
  54. data/ext/search.c +524 -0
  55. data/ext/search.h +1065 -0
  56. data/ext/similarity.c +143 -39
  57. data/ext/sort.c +661 -0
  58. data/ext/store.c +35 -0
  59. data/ext/store.h +152 -0
  60. data/ext/term.c +704 -143
  61. data/ext/termdocs.c +599 -0
  62. data/ext/vector.c +594 -0
  63. data/lib/ferret.rb +9 -10
  64. data/lib/ferret/analysis/analyzers.rb +2 -2
  65. data/lib/ferret/analysis/standard_tokenizer.rb +1 -1
  66. data/lib/ferret/analysis/token.rb +14 -14
  67. data/lib/ferret/analysis/token_filters.rb +3 -3
  68. data/lib/ferret/document/field.rb +16 -17
  69. data/lib/ferret/index/document_writer.rb +4 -4
  70. data/lib/ferret/index/index.rb +39 -23
  71. data/lib/ferret/index/index_writer.rb +2 -2
  72. data/lib/ferret/index/multiple_term_doc_pos_enum.rb +1 -8
  73. data/lib/ferret/index/segment_term_vector.rb +4 -4
  74. data/lib/ferret/index/term.rb +5 -1
  75. data/lib/ferret/index/term_vector_offset_info.rb +6 -6
  76. data/lib/ferret/index/term_vectors_io.rb +5 -5
  77. data/lib/ferret/query_parser/query_parser.tab.rb +81 -77
  78. data/lib/ferret/search.rb +1 -1
  79. data/lib/ferret/search/boolean_query.rb +2 -1
  80. data/lib/ferret/search/field_sorted_hit_queue.rb +3 -3
  81. data/lib/ferret/search/fuzzy_query.rb +2 -1
  82. data/lib/ferret/search/index_searcher.rb +3 -0
  83. data/lib/ferret/search/{match_all_docs_query.rb → match_all_query.rb} +7 -7
  84. data/lib/ferret/search/multi_phrase_query.rb +6 -5
  85. data/lib/ferret/search/phrase_query.rb +3 -6
  86. data/lib/ferret/search/prefix_query.rb +4 -4
  87. data/lib/ferret/search/sort.rb +3 -1
  88. data/lib/ferret/search/sort_field.rb +9 -9
  89. data/lib/ferret/search/spans/near_spans_enum.rb +1 -1
  90. data/lib/ferret/search/spans/span_near_query.rb +1 -1
  91. data/lib/ferret/search/spans/span_weight.rb +1 -1
  92. data/lib/ferret/search/spans/spans_enum.rb +7 -7
  93. data/lib/ferret/store/fs_store.rb +10 -6
  94. data/lib/ferret/store/ram_store.rb +3 -3
  95. data/lib/rferret.rb +36 -0
  96. data/test/functional/thread_safety_index_test.rb +2 -2
  97. data/test/test_helper.rb +16 -2
  98. data/test/unit/analysis/c_token.rb +25 -0
  99. data/test/unit/analysis/tc_per_field_analyzer_wrapper.rb +1 -1
  100. data/test/unit/analysis/tc_standard_analyzer.rb +1 -1
  101. data/test/unit/document/{tc_document.rb → c_document.rb} +0 -0
  102. data/test/unit/document/c_field.rb +98 -0
  103. data/test/unit/document/tc_field.rb +0 -66
  104. data/test/unit/index/{tc_index.rb → c_index.rb} +62 -6
  105. data/test/unit/index/{tc_index_reader.rb → c_index_reader.rb} +51 -10
  106. data/test/unit/index/{tc_index_writer.rb → c_index_writer.rb} +0 -4
  107. data/test/unit/index/{tc_term.rb → c_term.rb} +1 -3
  108. data/test/unit/index/{tc_term_vector_offset_info.rb → c_term_voi.rb} +5 -5
  109. data/test/unit/index/tc_segment_term_vector.rb +2 -2
  110. data/test/unit/index/tc_term_vectors_io.rb +4 -4
  111. data/test/unit/query_parser/c_query_parser.rb +138 -0
  112. data/test/unit/search/{tc_filter.rb → c_filter.rb} +24 -24
  113. data/test/unit/search/{tc_fuzzy_query.rb → c_fuzzy_query.rb} +0 -0
  114. data/test/unit/search/{tc_index_searcher.rb → c_index_searcher.rb} +9 -26
  115. data/test/unit/search/{tc_search_and_sort.rb → c_search_and_sort.rb} +15 -15
  116. data/test/unit/search/{tc_sort.rb → c_sort.rb} +2 -1
  117. data/test/unit/search/c_sort_field.rb +27 -0
  118. data/test/unit/search/{tc_spans.rb → c_spans.rb} +0 -0
  119. data/test/unit/search/tc_sort_field.rb +7 -20
  120. data/test/unit/store/c_fs_store.rb +76 -0
  121. data/test/unit/store/c_ram_store.rb +35 -0
  122. data/test/unit/store/m_store.rb +34 -0
  123. data/test/unit/store/m_store_lock.rb +68 -0
  124. data/test/unit/store/tc_fs_store.rb +0 -53
  125. data/test/unit/store/tc_ram_store.rb +0 -20
  126. data/test/unit/store/tm_store.rb +0 -30
  127. data/test/unit/store/tm_store_lock.rb +0 -66
  128. metadata +84 -31
  129. data/ext/Makefile +0 -140
  130. data/ext/ferret_ext.so +0 -0
  131. data/ext/priority_queue.c +0 -232
  132. data/ext/ram_directory.c +0 -321
  133. data/ext/segment_merge_queue.c +0 -37
  134. data/ext/segment_term_enum.c +0 -326
  135. data/ext/string_helper.c +0 -42
  136. data/ext/tags +0 -344
  137. data/ext/term_buffer.c +0 -230
  138. data/ext/term_infos_reader.c +0 -54
  139. data/ext/terminfo.c +0 -160
  140. data/ext/token.c +0 -93
  141. data/ext/util.c +0 -12
@@ -0,0 +1,154 @@
1
+ #include "search.h"
2
+ #include <string.h>
3
+
4
+ /***************************************************************************
5
+ *
6
+ * Weight
7
+ *
8
+ ***************************************************************************/
9
+
10
+ char *csw_to_s(Weight *self)
11
+ {
12
+ char dbuf[32];
13
+ dbl_to_s(dbuf, self->value);
14
+ return epstrdup("ConstantScoreWeight(%s)", strlen(dbuf), dbuf);
15
+ }
16
+
17
+ void csw_destroy(void *p)
18
+ {
19
+ free(p);
20
+ }
21
+
22
+ Explanation *csw_explain(Weight *self, IndexReader *ir, int doc_num)
23
+ {
24
+ Filter *filter = (Filter *)self->query->data;
25
+ Explanation *expl;
26
+ char *filter_str = filter->to_s(filter);
27
+ BitVector *bv = filt_get_bv(filter, ir);
28
+
29
+ if (bv_get(bv, doc_num)) {
30
+ expl = expl_create(self->value,
31
+ epstrdup("ConstantScoreQuery(%s), product of:",
32
+ strlen(filter_str), filter_str));
33
+ expl_add_detail(expl, expl_create(self->query->boost, estrdup("boost")));
34
+ expl_add_detail(expl, expl_create(self->qnorm, estrdup("query_norm")));
35
+ } else {
36
+ expl = expl_create(self->value,
37
+ epstrdup("ConstantScoreQuery(%s), does not match id %d",
38
+ strlen(filter_str) + 20, filter_str, doc_num));
39
+ }
40
+ free(filter_str);
41
+ return expl;
42
+ }
43
+
44
+ Weight *csw_create(Query *query, Searcher *searcher)
45
+ {
46
+ Weight *self = ALLOC(Weight);
47
+ ZEROSET(self, Weight, 1);
48
+ self->get_query = &w_get_query;
49
+ self->get_value = &w_get_value;
50
+ self->normalize = &w_normalize;
51
+ self->scorer = &cssc_create;
52
+ self->explain = &csw_explain;
53
+ self->to_s = &csw_to_s;
54
+ self->destroy = &csw_destroy;
55
+ self->sum_of_squared_weights = &w_sum_of_squared_weights;
56
+
57
+ self->similarity = query->get_similarity(query, searcher);
58
+ self->idf = 1.0;
59
+ self->query = query;
60
+ self->value = 0.0;
61
+
62
+ return self;
63
+ }
64
+
65
+ /***************************************************************************
66
+ *
67
+ * ConstantScoreQuery
68
+ *
69
+ ***************************************************************************/
70
+
71
+ char *csq_to_s(Query *self, char *field)
72
+ {
73
+ Filter *filter = (Filter *)self->data;
74
+ char *filter_str = filter->to_s(filter);
75
+ char *buffer;
76
+ if (self->boost == 1.0) {
77
+ buffer = epstrdup("ConstantScore(%s)", strlen(filter_str), filter_str);
78
+ } else {
79
+ char dbuf[32];
80
+ dbl_to_s(dbuf, self->boost);
81
+ buffer = epstrdup("ConstantScore(%s)^%s",
82
+ strlen(filter_str) + strlen(dbuf), filter_str, dbuf);
83
+ }
84
+ free(filter_str);
85
+ return buffer;;
86
+ }
87
+
88
+ void csq_destroy(void *p)
89
+ {
90
+ Query *self = (Query *)p;
91
+ if (self->destroy_all) {
92
+ Filter *filter = (Filter *)self->data;
93
+ filter->destroy(filter);
94
+ }
95
+ q_destroy(self);
96
+ }
97
+
98
+ Query *csq_create(Filter *filter)
99
+ {
100
+ Query *self = q_create();
101
+ self->type = CONSTANT_QUERY;
102
+ self->data = filter;
103
+ self->create_weight = &csw_create;
104
+ self->to_s = &csq_to_s;
105
+ self->destroy = &csq_destroy;
106
+
107
+ return self;
108
+ }
109
+
110
+ /***************************************************************************
111
+ *
112
+ * ConstantScoreScorer
113
+ *
114
+ ***************************************************************************/
115
+
116
+ float cssc_score(Scorer *self)
117
+ {
118
+ return ((ConstantScoreScorer *)self->data)->score;
119
+ }
120
+
121
+ bool cssc_next(Scorer *self)
122
+ {
123
+ BitVector *bv = ((ConstantScoreScorer *)self->data)->bv;
124
+ return ((self->doc = bv_scan_next(bv)) >= 0);
125
+ }
126
+
127
+ bool cssc_skip_to(Scorer *self, int doc_num)
128
+ {
129
+ BitVector *bv = ((ConstantScoreScorer *)self->data)->bv;
130
+ return ((self->doc = bv_scan_next_from(bv, doc_num)) >= 0);
131
+ }
132
+
133
+ Explanation *cssc_explain(Scorer *self, int doc_num)
134
+ {
135
+ return expl_create(1.0, estrdup("ConstantScoreScorer"));
136
+ }
137
+
138
+ Scorer *cssc_create(Weight *weight, IndexReader *ir)
139
+ {
140
+ Scorer *self = scorer_create(weight->similarity);
141
+ Filter *filter = (Filter *)weight->query->data;
142
+ ConstantScoreScorer *cssc = ALLOC(ConstantScoreScorer);
143
+ ZEROSET(cssc, ConstantScoreScorer, 1);
144
+ self->data = cssc;
145
+ cssc->score = weight->value;
146
+ cssc->bv = filt_get_bv(filter, ir);
147
+
148
+ self->score = &cssc_score;
149
+ self->next = &cssc_next;
150
+ self->skip_to = &cssc_skip_to;
151
+ self->explain = &cssc_explain;
152
+ self->destroy = &scorer_destroy;
153
+ return self;
154
+ }
data/ext/q_fuzzy.c ADDED
@@ -0,0 +1,287 @@
1
+ #include <string.h>
2
+ #include "search.h"
3
+
4
+ /****************************************************************************
5
+ *
6
+ * FuzzyStuff
7
+ *
8
+ * The main method here is the fuzq_score method which scores a term against
9
+ * another term. The other methods all act in support.
10
+ *
11
+ ****************************************************************************/
12
+
13
+
14
+ int fuzq_calculate_max_distance(FuzzyQuery *fuzq, int m)
15
+ {
16
+ return (int)((1.0 - fuzq->min_sim) * (MIN(fuzq->text_len, m) + fuzq->pre_len));
17
+ }
18
+
19
+ void fuzq_initialize_max_distances(FuzzyQuery *fuzq)
20
+ {
21
+ int i;
22
+ for (i = 0; i < TYPICAL_LONGEST_WORD; i++) {
23
+ fuzq->max_distances[i] = fuzq_calculate_max_distance(fuzq, i);
24
+ }
25
+ }
26
+
27
+ float fuzq_get_max_distance(FuzzyQuery *fuzq, int m)
28
+ {
29
+ return (m < TYPICAL_LONGEST_WORD) ? fuzq->max_distances[m]
30
+ : fuzq_calculate_max_distance(fuzq, m);
31
+ }
32
+
33
+ float fuzq_score(FuzzyQuery *fuzq, char *target)
34
+ {
35
+ int i, j;
36
+ int max_distance;
37
+ int m = strlen(target);
38
+ int n = fuzq->text_len;
39
+ int *d = fuzq->da;
40
+ char *text = fuzq->text;
41
+ if (n == 0) {
42
+ /* we don't have anything to compare. That means if we just add
43
+ * the letters for m we get the new word */
44
+ return fuzq->pre_len == 0 ? 0.0f : 1.0f - ((float) m / fuzq->pre_len);
45
+ }
46
+ if (m == 0) {
47
+ return fuzq->pre_len == 0 ? 0.0f : 1.0f - ((float) n / fuzq->pre_len);
48
+ }
49
+
50
+ max_distance = fuzq_get_max_distance(fuzq, m);
51
+
52
+ //printf("n%dm%dmd%ddiff%d<%s><%s>\n", n, m, max_distance, m-n, fuzq->text, target);
53
+ if (max_distance < ((m > n) ? (m-n) : (n-m))) { /* abs */
54
+ /* Just adding the characters of m to n or vice-versa results in too many
55
+ * edits for example "pre" length is 3 and "prefixes" length is 8. We can
56
+ * see that given this optimal circumstance, the edit distance cannot be
57
+ * less than 5 which is 8-3 or more precisesly Math.abs(3-8). If our
58
+ * maximum edit distance is 4, then we can discard this word without
59
+ * looking at it. */
60
+ return 0.0f;
61
+ }
62
+
63
+ /* Let's make sure we have enough room in our array to do the distance
64
+ * calculations. */
65
+ if (((m+1) * (n+1)) >= fuzq->da_capa) {
66
+ fuzq->da_capa = (m * (fuzq->text_len+1)) * 2;
67
+ REALLOC_N(fuzq->da, int, fuzq->da_capa);
68
+ d = fuzq->da;
69
+ }
70
+
71
+ /* init matrix d */
72
+ for (i = 0; i <= n; i++) d[i + m * 0] = i;
73
+ for (j = 0; j <= m; j++) d[0 + m * j] = j;
74
+
75
+ /* start computing edit distance */
76
+ for (i = 1; i <= n; i++) {
77
+ int best_pos_ed_dist = m;
78
+ char s_i = text[i - 1];
79
+ for (j = 1; j <= m; j++) {
80
+ if (s_i != target[j-1]) {
81
+ d[i + m*j] = min3(d[i-1 + m*j], d[i + m*(j-1)], d[i-1 + m*(j-1)])+1;
82
+ } else {
83
+ d[i + m*j] = min3(d[i-1 + m*j]+1, d[i + m*(j-1)]+1, d[i-1 + m*(j-1)]);
84
+ }
85
+ best_pos_ed_dist = min(best_pos_ed_dist, d[i + m*j]);
86
+ }
87
+ //printf("(bped = %d, i = %d, md = %d)", best_pos_ed_dist, i, max_distance);
88
+
89
+ /* After calculating row i, the best possible edit distance can be found
90
+ * by found by finding the smallest value in a given column. If the
91
+ * best_pos_ed_dist is greater than the max distance, abort.
92
+ */
93
+
94
+ if ((i > max_distance) && (best_pos_ed_dist > max_distance)) {
95
+ /* equal is okay, but not greater
96
+ * the closest the target can be to the text is just too far away.
97
+ * this target is leaving the party early. */
98
+ return 0.0f;
99
+ }
100
+ }
101
+ //printf("<%f, d[n + m*m] = %d min_len = %d>", 1.0f - ((float)d[n + m*m] / (float) (fuzq->pre_len + min(n, m))), d[n + m*m], fuzq->pre_len + min(n, m));
102
+
103
+ /* this will return less than 0.0 when the edit distance is greater than the
104
+ * number of characters in the shorter word. but this was the formula that
105
+ * was previously used in FuzzyTermEnum, so it has not been changed (even
106
+ * though min_sim must be greater than 0.0) */
107
+ return 1.0f - ((float)d[n + m*m] / (float) (fuzq->pre_len + min(n, m)));
108
+ }
109
+
110
+ /****************************************************************************
111
+ *
112
+ * FuzzyQuery
113
+ *
114
+ ****************************************************************************/
115
+
116
+ char *fuzq_to_s(Query *self, char *field)
117
+ {
118
+ char *buffer, *bptr;
119
+ FuzzyQuery *fuzq = (FuzzyQuery *)self->data;
120
+ Term *term = fuzq->term;
121
+ int tlen = strlen(term->text);
122
+ int flen = strlen(term->field);
123
+ bptr = buffer = ALLOC_N(char, tlen + flen + 35);
124
+
125
+ if (strcmp(term->field, field) != 0) {
126
+ sprintf(bptr, "%s:", term->field);
127
+ bptr += strlen(term->field) + 1;
128
+ }
129
+ sprintf(bptr, "%s~", term->text);
130
+ bptr += strlen(bptr);
131
+ if (fuzq->min_sim != 0.5) {
132
+ dbl_to_s(bptr, fuzq->min_sim);
133
+ bptr += strlen(bptr);
134
+ }
135
+ if (self->boost != 1.0) {
136
+ *bptr = '^';
137
+ dbl_to_s(++bptr, self->boost);
138
+ }
139
+ return buffer;
140
+ }
141
+
142
+ typedef struct ScoredTerm {
143
+ Term *term;
144
+ float score;
145
+ } ScoredTerm;
146
+
147
+ bool scored_term_less_than(void *p1, void *p2)
148
+ {
149
+ ScoredTerm *st1 = (ScoredTerm *)p1;
150
+ ScoredTerm *st2 = (ScoredTerm *)p2;
151
+
152
+ if (st1->score == st2->score)
153
+ return (strcmp(st1->term->text, st2->term->text) < 0);
154
+
155
+ return (st1->score < st2->score);
156
+ }
157
+
158
+ void scored_term_destroy(void *p)
159
+ {
160
+ ScoredTerm *st = (ScoredTerm *)p;
161
+ term_destroy(st->term);
162
+ free(st);
163
+ }
164
+
165
+ ScoredTerm *scored_term_create(Term *term, float score)
166
+ {
167
+ ScoredTerm *self = ALLOC(ScoredTerm);
168
+ self->term = term;
169
+ self->score = score;
170
+ return self;
171
+ }
172
+
173
+ Query *fuzq_rewrite(Query *self, IndexReader *ir)
174
+ {
175
+ Query *q;
176
+ Query *tq;
177
+ FuzzyQuery *fuzq = (FuzzyQuery *)self->data;
178
+
179
+ Term *term = fuzq->term;
180
+ char *text = term->text;
181
+ char *field = term->field;
182
+ Term prefix_term;
183
+ prefix_term.field = field;
184
+ if (fuzq->pre_len >= strlen(text)) {
185
+ q = tq_create(term_clone(term));
186
+ } else {
187
+ PriorityQueue *term_pq;
188
+ TermEnum *te;
189
+ Term prefix_term;
190
+ char *prefix = NULL;
191
+ int pre_len = fuzq->pre_len;
192
+ ScoredTerm *scored_term;
193
+
194
+ q = bq_create(true);
195
+
196
+ term_pq = pq_create(((BooleanQuery *)q->data)->max_clause_cnt,
197
+ &scored_term_less_than);
198
+ term_pq->free_elem = &scored_term_destroy;
199
+
200
+ prefix_term.field = field;
201
+ prefix_term.text = (char *)EMPTY_STRING;
202
+ if (pre_len >= 0) {
203
+ prefix = ALLOC_N(char, pre_len + 1);
204
+ strncpy(prefix, text, pre_len);
205
+ prefix_term.text = prefix;
206
+ prefix_term.text[pre_len] = '\0';
207
+ }
208
+ te = ir->terms_from(ir, &prefix_term);
209
+
210
+ fuzq->scale_factor = 1.0 / (1.0 - fuzq->min_sim);
211
+ fuzq->text = fuzq->term->text + pre_len;
212
+ fuzq->text_len = strlen(fuzq->text);
213
+ fuzq_initialize_max_distances(fuzq);
214
+
215
+ if (te) {
216
+ TermBuffer *tb = te->tb_curr;
217
+ float score = 0.0, min_score = fuzq->min_sim;
218
+
219
+ do {
220
+ if (strcmp(tb->field, field) != 0 ||
221
+ (prefix && strncmp(tb->text, prefix, pre_len) != 0))
222
+ break;
223
+
224
+ score = fuzq_score(fuzq, tb->text + pre_len);
225
+ //printf("%s:%s:%f\n", tb->text, fuzq->text, score);
226
+
227
+ if (score > min_score) {
228
+ pq_insert(term_pq, scored_term_create(tb_get_term(tb), score));
229
+ if (pq_full(term_pq))
230
+ min_score = ((ScoredTerm *)pq_top(term_pq))->score;
231
+ }
232
+ } while ((tb = te->next(te)) != NULL);
233
+ te->close(te);
234
+ }
235
+ free(prefix);
236
+
237
+ while ((scored_term = pq_pop(term_pq)) != NULL) {
238
+ tq = tq_create(scored_term->term); /* found match */
239
+ tq->boost = self->boost; /* set boost */
240
+ bq_add_query(q, tq, BC_SHOULD); /* add query */
241
+ free(scored_term); /* no need to free the term as it's in the query */
242
+ }
243
+ pq_destroy(term_pq);
244
+ }
245
+
246
+ if (self->rewritten) self->rewritten->destroy(self->rewritten);
247
+ return self->rewritten = q;
248
+ }
249
+
250
+ void fuzq_destroy(void *p)
251
+ {
252
+ Query *self = (Query *)p;
253
+ FuzzyQuery *fuzq = (FuzzyQuery *)self->data;
254
+ if (self->destroy_all) term_destroy((Term *)fuzq->term);
255
+ free(fuzq->da);
256
+ free(fuzq);
257
+ q_destroy(self);
258
+ }
259
+
260
+ Query *fuzq_create(Term *term)
261
+ {
262
+ Query *self = q_create();
263
+ FuzzyQuery *fq = ALLOC(FuzzyQuery);
264
+ ZEROSET(fq, FuzzyQuery, 1);
265
+
266
+ fq->term = term;
267
+ fq->pre_len = DEF_PRE_LEN;
268
+ fq->min_sim = DEF_MIN_SIM;
269
+ self->data = fq;
270
+ self->type = FUZZY_QUERY;
271
+ self->create_weight = NULL;
272
+ self->to_s = &fuzq_to_s;
273
+ self->rewrite = &fuzq_rewrite;
274
+ self->destroy = &fuzq_destroy;
275
+ self->rewritten = NULL;
276
+
277
+ return self;
278
+ }
279
+
280
+ Query *fuzq_create_mp(Term *term, float min_sim, int pre_len)
281
+ {
282
+ Query *self = fuzq_create(term);
283
+ FuzzyQuery *fuzq = (FuzzyQuery *)self->data;
284
+ if (pre_len) fuzq->pre_len = pre_len;
285
+ if (min_sim) fuzq->min_sim = min_sim;
286
+ return self;
287
+ }
data/ext/q_match_all.c ADDED
@@ -0,0 +1,142 @@
1
+ #include "search.h"
2
+ #include <string.h>
3
+
4
+ /***************************************************************************
5
+ *
6
+ * Weight
7
+ *
8
+ ***************************************************************************/
9
+
10
+ char *maw_to_s(Weight *self)
11
+ {
12
+ char dbuf[32];
13
+ dbl_to_s(dbuf, self->value);
14
+ return epstrdup("MatchAllWeight(%s)", strlen(dbuf), dbuf);
15
+ }
16
+
17
+ void maw_destroy(void *p)
18
+ {
19
+ free(p);
20
+ }
21
+
22
+ Explanation *maw_explain(Weight *self, IndexReader *ir, int doc_num)
23
+ {
24
+ Explanation *expl;
25
+ if (!ir->is_deleted(ir, doc_num)) {
26
+ expl = expl_create(self->value, estrdup("MatchAllQuery: product of:"));
27
+ expl_add_detail(expl, expl_create(self->query->boost, estrdup("boost")));
28
+ expl_add_detail(expl, expl_create(self->qnorm, estrdup("query_norm")));
29
+ } else {
30
+ expl = expl_create(self->value,
31
+ epstrdup("MatchAllQuery: doc %d was deleted", 20, doc_num));
32
+ }
33
+
34
+ return expl;
35
+ }
36
+
37
+ Weight *maw_create(Query *query, Searcher *searcher)
38
+ {
39
+ Weight *self = ALLOC(Weight);
40
+ ZEROSET(self, Weight, 1);
41
+ self->get_query = &w_get_query;
42
+ self->get_value = &w_get_value;
43
+ self->normalize = &w_normalize;
44
+ self->scorer = &masc_create;
45
+ self->explain = &maw_explain;
46
+ self->to_s = &maw_to_s;
47
+ self->destroy = &maw_destroy;
48
+ self->sum_of_squared_weights = &w_sum_of_squared_weights;
49
+
50
+ self->similarity = query->get_similarity(query, searcher);
51
+ self->idf = 1.0;
52
+ self->query = query;
53
+ self->value = 0.0;
54
+
55
+ return self;
56
+ }
57
+
58
+ /***************************************************************************
59
+ *
60
+ * MatchAllQuery
61
+ *
62
+ ***************************************************************************/
63
+
64
+ char *maq_to_s(Query *self, char *field)
65
+ {
66
+ if (self->boost == 1.0) {
67
+ return estrdup("MatchAll");
68
+ } else {
69
+ char dbuf[32];
70
+ dbl_to_s(dbuf, self->boost);
71
+ return epstrdup("MatchAll^%s", strlen(dbuf), dbuf);
72
+ }
73
+ }
74
+
75
+ void maq_destroy(void *p)
76
+ {
77
+ Query *self = (Query *)p;
78
+ q_destroy(self);
79
+ }
80
+
81
+ Query *maq_create()
82
+ {
83
+ Query *self = q_create();
84
+ self->type = MATCH_ALL_QUERY;
85
+ self->create_weight = &maw_create;
86
+ self->to_s = &maq_to_s;
87
+ self->destroy = &maq_destroy;
88
+
89
+ return self;
90
+ }
91
+
92
+ /***************************************************************************
93
+ *
94
+ * MatchAllScorer
95
+ *
96
+ ***************************************************************************/
97
+
98
+ float masc_score(Scorer *self)
99
+ {
100
+ return ((MatchAllScorer *)self->data)->score;
101
+ }
102
+
103
+ bool masc_next(Scorer *self)
104
+ {
105
+ MatchAllScorer *mas = (MatchAllScorer *)self->data;
106
+ while (self->doc < (mas->max_doc - 1)) {
107
+ self->doc++;
108
+ if (!mas->ir->is_deleted(mas->ir, self->doc)) {
109
+ return true;
110
+ }
111
+ }
112
+ return false;
113
+ }
114
+
115
+ bool masc_skip_to(Scorer *self, int doc_num)
116
+ {
117
+ self->doc = doc_num - 1;
118
+ return masc_next(self);
119
+ }
120
+
121
+ Explanation *masc_explain(Scorer *self, int doc_num)
122
+ {
123
+ return expl_create(1.0, estrdup("MatchAllScorer"));
124
+ }
125
+
126
+ Scorer *masc_create(Weight *weight, IndexReader *ir)
127
+ {
128
+ Scorer *self = scorer_create(weight->similarity);
129
+ MatchAllScorer *mas = ALLOC(MatchAllScorer);
130
+ mas->ir = ir;
131
+ mas->max_doc = ir->max_doc(ir);
132
+ mas->score = weight->value;
133
+ self->data = mas;
134
+
135
+ self->doc = -1;
136
+ self->score = &masc_score;
137
+ self->next = &masc_next;
138
+ self->skip_to = &masc_skip_to;
139
+ self->explain = &masc_explain;
140
+ self->destroy = &scorer_destroy;
141
+ return self;
142
+ }