ferret 0.9.0 → 0.9.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (187) hide show
  1. data/Rakefile +23 -5
  2. data/TODO +2 -1
  3. data/ext/analysis.c +838 -177
  4. data/ext/analysis.h +55 -7
  5. data/ext/api.c +69 -0
  6. data/ext/api.h +27 -0
  7. data/ext/array.c +8 -5
  8. data/ext/compound_io.c +132 -96
  9. data/ext/document.c +58 -28
  10. data/ext/except.c +59 -0
  11. data/ext/except.h +88 -0
  12. data/ext/ferret.c +47 -3
  13. data/ext/ferret.h +3 -0
  14. data/ext/field.c +15 -9
  15. data/ext/filter.c +1 -1
  16. data/ext/fs_store.c +215 -34
  17. data/ext/global.c +72 -3
  18. data/ext/global.h +4 -3
  19. data/ext/hash.c +44 -3
  20. data/ext/hash.h +9 -0
  21. data/ext/header.h +58 -0
  22. data/ext/inc/except.h +88 -0
  23. data/ext/inc/lang.h +23 -13
  24. data/ext/ind.c +16 -10
  25. data/ext/index.h +2 -22
  26. data/ext/index_io.c +3 -11
  27. data/ext/index_rw.c +245 -193
  28. data/ext/lang.h +23 -13
  29. data/ext/libstemmer.c +92 -0
  30. data/ext/libstemmer.h +79 -0
  31. data/ext/modules.h +162 -0
  32. data/ext/q_boolean.c +34 -21
  33. data/ext/q_const_score.c +6 -12
  34. data/ext/q_filtered_query.c +206 -0
  35. data/ext/q_fuzzy.c +18 -15
  36. data/ext/q_match_all.c +3 -7
  37. data/ext/q_multi_phrase.c +10 -14
  38. data/ext/q_parser.c +29 -2
  39. data/ext/q_phrase.c +14 -21
  40. data/ext/q_prefix.c +15 -12
  41. data/ext/q_range.c +30 -28
  42. data/ext/q_span.c +13 -21
  43. data/ext/q_term.c +17 -26
  44. data/ext/r_analysis.c +693 -21
  45. data/ext/r_doc.c +11 -12
  46. data/ext/r_index_io.c +4 -1
  47. data/ext/r_qparser.c +21 -2
  48. data/ext/r_search.c +285 -18
  49. data/ext/ram_store.c +5 -2
  50. data/ext/search.c +11 -17
  51. data/ext/search.h +21 -45
  52. data/ext/similarity.h +67 -0
  53. data/ext/sort.c +30 -25
  54. data/ext/stem_ISO_8859_1_danish.c +338 -0
  55. data/ext/stem_ISO_8859_1_danish.h +16 -0
  56. data/ext/stem_ISO_8859_1_dutch.c +635 -0
  57. data/ext/stem_ISO_8859_1_dutch.h +16 -0
  58. data/ext/stem_ISO_8859_1_english.c +1156 -0
  59. data/ext/stem_ISO_8859_1_english.h +16 -0
  60. data/ext/stem_ISO_8859_1_finnish.c +792 -0
  61. data/ext/stem_ISO_8859_1_finnish.h +16 -0
  62. data/ext/stem_ISO_8859_1_french.c +1276 -0
  63. data/ext/stem_ISO_8859_1_french.h +16 -0
  64. data/ext/stem_ISO_8859_1_german.c +512 -0
  65. data/ext/stem_ISO_8859_1_german.h +16 -0
  66. data/ext/stem_ISO_8859_1_italian.c +1091 -0
  67. data/ext/stem_ISO_8859_1_italian.h +16 -0
  68. data/ext/stem_ISO_8859_1_norwegian.c +296 -0
  69. data/ext/stem_ISO_8859_1_norwegian.h +16 -0
  70. data/ext/stem_ISO_8859_1_porter.c +776 -0
  71. data/ext/stem_ISO_8859_1_porter.h +16 -0
  72. data/ext/stem_ISO_8859_1_portuguese.c +1035 -0
  73. data/ext/stem_ISO_8859_1_portuguese.h +16 -0
  74. data/ext/stem_ISO_8859_1_spanish.c +1119 -0
  75. data/ext/stem_ISO_8859_1_spanish.h +16 -0
  76. data/ext/stem_ISO_8859_1_swedish.c +307 -0
  77. data/ext/stem_ISO_8859_1_swedish.h +16 -0
  78. data/ext/stem_KOI8_R_russian.c +701 -0
  79. data/ext/stem_KOI8_R_russian.h +16 -0
  80. data/ext/stem_UTF_8_danish.c +344 -0
  81. data/ext/stem_UTF_8_danish.h +16 -0
  82. data/ext/stem_UTF_8_dutch.c +653 -0
  83. data/ext/stem_UTF_8_dutch.h +16 -0
  84. data/ext/stem_UTF_8_english.c +1176 -0
  85. data/ext/stem_UTF_8_english.h +16 -0
  86. data/ext/stem_UTF_8_finnish.c +808 -0
  87. data/ext/stem_UTF_8_finnish.h +16 -0
  88. data/ext/stem_UTF_8_french.c +1296 -0
  89. data/ext/stem_UTF_8_french.h +16 -0
  90. data/ext/stem_UTF_8_german.c +526 -0
  91. data/ext/stem_UTF_8_german.h +16 -0
  92. data/ext/stem_UTF_8_italian.c +1113 -0
  93. data/ext/stem_UTF_8_italian.h +16 -0
  94. data/ext/stem_UTF_8_norwegian.c +302 -0
  95. data/ext/stem_UTF_8_norwegian.h +16 -0
  96. data/ext/stem_UTF_8_porter.c +794 -0
  97. data/ext/stem_UTF_8_porter.h +16 -0
  98. data/ext/stem_UTF_8_portuguese.c +1055 -0
  99. data/ext/stem_UTF_8_portuguese.h +16 -0
  100. data/ext/stem_UTF_8_russian.c +709 -0
  101. data/ext/stem_UTF_8_russian.h +16 -0
  102. data/ext/stem_UTF_8_spanish.c +1137 -0
  103. data/ext/stem_UTF_8_spanish.h +16 -0
  104. data/ext/stem_UTF_8_swedish.c +313 -0
  105. data/ext/stem_UTF_8_swedish.h +16 -0
  106. data/ext/stopwords.c +325 -0
  107. data/ext/store.c +34 -2
  108. data/ext/tags +2953 -0
  109. data/ext/term.c +21 -15
  110. data/ext/termdocs.c +5 -3
  111. data/ext/utilities.c +446 -0
  112. data/ext/vector.c +27 -13
  113. data/lib/ferret/document/document.rb +1 -1
  114. data/lib/ferret/index/index.rb +44 -6
  115. data/lib/ferret/query_parser/query_parser.tab.rb +7 -3
  116. data/lib/rferret.rb +2 -1
  117. data/test/test_helper.rb +2 -2
  118. data/test/unit/analysis/ctc_analyzer.rb +401 -0
  119. data/test/unit/analysis/ctc_tokenstream.rb +423 -0
  120. data/test/unit/analysis/{tc_letter_tokenizer.rb → rtc_letter_tokenizer.rb} +0 -0
  121. data/test/unit/analysis/{tc_lower_case_filter.rb → rtc_lower_case_filter.rb} +0 -0
  122. data/test/unit/analysis/{tc_lower_case_tokenizer.rb → rtc_lower_case_tokenizer.rb} +0 -0
  123. data/test/unit/analysis/{tc_per_field_analyzer_wrapper.rb → rtc_per_field_analyzer_wrapper.rb} +0 -0
  124. data/test/unit/analysis/{tc_porter_stem_filter.rb → rtc_porter_stem_filter.rb} +0 -0
  125. data/test/unit/analysis/{tc_standard_analyzer.rb → rtc_standard_analyzer.rb} +0 -0
  126. data/test/unit/analysis/{tc_standard_tokenizer.rb → rtc_standard_tokenizer.rb} +0 -0
  127. data/test/unit/analysis/{tc_stop_analyzer.rb → rtc_stop_analyzer.rb} +0 -0
  128. data/test/unit/analysis/{tc_stop_filter.rb → rtc_stop_filter.rb} +0 -0
  129. data/test/unit/analysis/{tc_white_space_analyzer.rb → rtc_white_space_analyzer.rb} +0 -0
  130. data/test/unit/analysis/{tc_white_space_tokenizer.rb → rtc_white_space_tokenizer.rb} +0 -0
  131. data/test/unit/analysis/{tc_word_list_loader.rb → rtc_word_list_loader.rb} +0 -0
  132. data/test/unit/analysis/tc_analyzer.rb +1 -2
  133. data/test/unit/analysis/{c_token.rb → tc_token.rb} +0 -0
  134. data/test/unit/document/rtc_field.rb +28 -0
  135. data/test/unit/document/{c_document.rb → tc_document.rb} +0 -0
  136. data/test/unit/document/tc_field.rb +82 -12
  137. data/test/unit/index/{tc_compound_file_io.rb → rtc_compound_file_io.rb} +0 -0
  138. data/test/unit/index/{tc_field_infos.rb → rtc_field_infos.rb} +0 -0
  139. data/test/unit/index/{tc_fields_io.rb → rtc_fields_io.rb} +0 -0
  140. data/test/unit/index/{tc_multiple_term_doc_pos_enum.rb → rtc_multiple_term_doc_pos_enum.rb} +0 -0
  141. data/test/unit/index/{tc_segment_infos.rb → rtc_segment_infos.rb} +0 -0
  142. data/test/unit/index/{tc_segment_term_docs.rb → rtc_segment_term_docs.rb} +0 -0
  143. data/test/unit/index/{tc_segment_term_enum.rb → rtc_segment_term_enum.rb} +0 -0
  144. data/test/unit/index/{tc_segment_term_vector.rb → rtc_segment_term_vector.rb} +0 -0
  145. data/test/unit/index/{tc_term_buffer.rb → rtc_term_buffer.rb} +0 -0
  146. data/test/unit/index/{tc_term_info.rb → rtc_term_info.rb} +0 -0
  147. data/test/unit/index/{tc_term_infos_io.rb → rtc_term_infos_io.rb} +0 -0
  148. data/test/unit/index/{tc_term_vectors_io.rb → rtc_term_vectors_io.rb} +0 -0
  149. data/test/unit/index/{c_index.rb → tc_index.rb} +26 -6
  150. data/test/unit/index/{c_index_reader.rb → tc_index_reader.rb} +0 -0
  151. data/test/unit/index/{c_index_writer.rb → tc_index_writer.rb} +0 -0
  152. data/test/unit/index/{c_term.rb → tc_term.rb} +0 -0
  153. data/test/unit/index/{c_term_voi.rb → tc_term_voi.rb} +0 -0
  154. data/test/unit/query_parser/{c_query_parser.rb → rtc_query_parser.rb} +14 -14
  155. data/test/unit/query_parser/tc_query_parser.rb +24 -16
  156. data/test/unit/search/{tc_similarity.rb → rtc_similarity.rb} +0 -0
  157. data/test/unit/search/rtc_sort_field.rb +14 -0
  158. data/test/unit/search/{c_filter.rb → tc_filter.rb} +11 -11
  159. data/test/unit/search/{c_fuzzy_query.rb → tc_fuzzy_query.rb} +0 -0
  160. data/test/unit/search/{c_index_searcher.rb → tc_index_searcher.rb} +0 -0
  161. data/test/unit/search/{c_search_and_sort.rb → tc_search_and_sort.rb} +0 -0
  162. data/test/unit/search/{c_sort.rb → tc_sort.rb} +0 -0
  163. data/test/unit/search/tc_sort_field.rb +20 -7
  164. data/test/unit/search/{c_spans.rb → tc_spans.rb} +0 -0
  165. data/test/unit/store/rtc_fs_store.rb +62 -0
  166. data/test/unit/store/rtc_ram_store.rb +15 -0
  167. data/test/unit/store/rtm_store.rb +150 -0
  168. data/test/unit/store/rtm_store_lock.rb +2 -0
  169. data/test/unit/store/tc_fs_store.rb +54 -40
  170. data/test/unit/store/tc_ram_store.rb +20 -0
  171. data/test/unit/store/tm_store.rb +30 -146
  172. data/test/unit/store/tm_store_lock.rb +66 -0
  173. data/test/unit/utils/{tc_bit_vector.rb → rtc_bit_vector.rb} +0 -0
  174. data/test/unit/utils/{tc_date_tools.rb → rtc_date_tools.rb} +0 -0
  175. data/test/unit/utils/{tc_number_tools.rb → rtc_number_tools.rb} +0 -0
  176. data/test/unit/utils/{tc_parameter.rb → rtc_parameter.rb} +0 -0
  177. data/test/unit/utils/{tc_priority_queue.rb → rtc_priority_queue.rb} +0 -0
  178. data/test/unit/utils/{tc_string_helper.rb → rtc_string_helper.rb} +0 -0
  179. data/test/unit/utils/{tc_thread.rb → rtc_thread.rb} +0 -0
  180. data/test/unit/utils/{tc_weak_key_hash.rb → rtc_weak_key_hash.rb} +0 -0
  181. metadata +360 -289
  182. data/test/unit/document/c_field.rb +0 -98
  183. data/test/unit/search/c_sort_field.rb +0 -27
  184. data/test/unit/store/c_fs_store.rb +0 -76
  185. data/test/unit/store/c_ram_store.rb +0 -35
  186. data/test/unit/store/m_store.rb +0 -34
  187. data/test/unit/store/m_store_lock.rb +0 -68
@@ -9,9 +9,7 @@
9
9
 
10
10
  char *csw_to_s(Weight *self)
11
11
  {
12
- char dbuf[32];
13
- dbl_to_s(dbuf, self->value);
14
- return epstrdup("ConstantScoreWeight(%s)", strlen(dbuf), dbuf);
12
+ return strfmt("ConstantScoreWeight(%f)", self->value);
15
13
  }
16
14
 
17
15
  void csw_destroy(void *p)
@@ -28,14 +26,13 @@ Explanation *csw_explain(Weight *self, IndexReader *ir, int doc_num)
28
26
 
29
27
  if (bv_get(bv, doc_num)) {
30
28
  expl = expl_create(self->value,
31
- epstrdup("ConstantScoreQuery(%s), product of:",
32
- strlen(filter_str), filter_str));
29
+ strfmt("ConstantScoreQuery(%s), product of:", filter_str));
33
30
  expl_add_detail(expl, expl_create(self->query->boost, estrdup("boost")));
34
31
  expl_add_detail(expl, expl_create(self->qnorm, estrdup("query_norm")));
35
32
  } else {
36
33
  expl = expl_create(self->value,
37
- epstrdup("ConstantScoreQuery(%s), does not match id %d",
38
- strlen(filter_str) + 20, filter_str, doc_num));
34
+ strfmt("ConstantScoreQuery(%s), does not match id %d",
35
+ filter_str, doc_num));
39
36
  }
40
37
  free(filter_str);
41
38
  return expl;
@@ -74,12 +71,9 @@ char *csq_to_s(Query *self, char *field)
74
71
  char *filter_str = filter->to_s(filter);
75
72
  char *buffer;
76
73
  if (self->boost == 1.0) {
77
- buffer = epstrdup("ConstantScore(%s)", strlen(filter_str), filter_str);
74
+ buffer = strfmt("ConstantScore(%s)", filter_str);
78
75
  } else {
79
- char dbuf[32];
80
- dbl_to_s(dbuf, self->boost);
81
- buffer = epstrdup("ConstantScore(%s)^%s",
82
- strlen(filter_str) + strlen(dbuf), filter_str, dbuf);
76
+ buffer = strfmt("ConstantScore(%s)^%f", filter_str, self->boost);
83
77
  }
84
78
  free(filter_str);
85
79
  return buffer;;
@@ -0,0 +1,206 @@
1
+ #include "search.h"
2
+ #include <string.h>
3
+
4
+ Scorer *fqsc_create(Scorer *scorer, BitVector *bv, Similarity *sim);
5
+
6
+ /***************************************************************************
7
+ *
8
+ * Weight
9
+ *
10
+ ***************************************************************************/
11
+
12
+ char *fqw_to_s(Weight *self)
13
+ {
14
+ return strfmt("FilteredQueryWeight(%f)", self->value);
15
+ }
16
+
17
+ void fqw_destroy(void *p)
18
+ {
19
+ free(p);
20
+ }
21
+
22
+ float fqw_sum_of_squared_weights(Weight *self)
23
+ {
24
+ Weight *sw = (Weight *)self->data;
25
+ return sw->sum_of_squared_weights(sw);
26
+ }
27
+
28
+ void fqw_normalize(Weight *self, float normalization_factor)
29
+ {
30
+ Weight *sw = (Weight *)self->data;
31
+ return sw->normalize(sw, normalization_factor);
32
+ }
33
+
34
+ float fqw_get_value(Weight *self)
35
+ {
36
+ Weight *sw = (Weight *)self->data;
37
+ return sw->get_value(sw);
38
+ }
39
+
40
+ Explanation *fqw_explain(Weight *self, IndexReader *ir, int doc_num)
41
+ {
42
+ Weight *sw = (Weight *)self->data;
43
+ return sw->explain(sw, ir, doc_num);
44
+ }
45
+
46
+ Scorer *fqw_scorer(Weight *self, IndexReader *ir)
47
+ {
48
+ Weight *sw = (Weight *)self->data;
49
+ Scorer *scorer = sw->scorer(sw, ir);
50
+ Filter *filter = ((FilteredQuery *)self->query->data)->filter;
51
+
52
+ return fqsc_create(scorer, filter->get_bv(filter, ir), self->similarity);
53
+ }
54
+
55
+ Weight *fqw_create(Query *query, Weight *sub_weight, Similarity *sim)
56
+ {
57
+ Weight *self = ALLOC(Weight);
58
+ ZEROSET(self, Weight, 1);
59
+ self->data = sub_weight;
60
+
61
+ self->get_query = &w_get_query;
62
+ self->get_value = &fqw_get_value;
63
+ self->normalize = &fqw_normalize;
64
+ self->scorer = &fqw_scorer;
65
+ self->explain = &fqw_explain;
66
+ self->to_s = &fqw_to_s;
67
+ self->destroy = &fqw_destroy;
68
+ self->sum_of_squared_weights = &fqw_sum_of_squared_weights;
69
+
70
+ self->similarity = sim;
71
+ self->idf = 1.0;
72
+ self->query = query;
73
+ self->value = sub_weight->value;
74
+
75
+ return self;
76
+ }
77
+
78
+ /***************************************************************************
79
+ *
80
+ * FilteredQueryQuery
81
+ *
82
+ ***************************************************************************/
83
+
84
+ char *fq_to_s(Query *self, char *field)
85
+ {
86
+ FilteredQuery *fq = (FilteredQuery *)self->data;
87
+ char *filter_str = fq->filter->to_s(fq->filter);
88
+ char *query_str = fq->query->to_s(fq->query, field);
89
+ char *buffer;
90
+ if (self->boost == 1.0) {
91
+ buffer = strfmt("FilteredQuery(query:%s, filter:%s)",
92
+ query_str, filter_str);
93
+ } else {
94
+ buffer = strfmt("FilteredQuery(query:%s, filter:%s)^%f",
95
+ query_str, filter_str, self->boost);
96
+ }
97
+ free(filter_str);
98
+ free(query_str);
99
+ return buffer;;
100
+ }
101
+
102
+ void fq_destroy(void *p)
103
+ {
104
+ Query *self = (Query *)p;
105
+ if (self->destroy_all) {
106
+ FilteredQuery *fq = (FilteredQuery *)self->data;
107
+ fq->filter->destroy(fq->filter);
108
+ fq->query->destroy(fq->query);
109
+ }
110
+ free(self->data);
111
+ q_destroy(self);
112
+ }
113
+
114
+ Weight *fq_create_weight(Query *self, Searcher *searcher)
115
+ {
116
+ Query *sub_query = ((FilteredQuery *)self->data)->query;
117
+ return fqw_create(self, q_weight(sub_query, searcher),
118
+ searcher->similarity);
119
+ }
120
+
121
+ Query *fq_create(Query *query, Filter *filter)
122
+ {
123
+ Query *self = q_create();
124
+ FilteredQuery *fq = ALLOC(FilteredQuery);
125
+ fq->query = query;
126
+ fq->filter = filter;
127
+ self->type = FILTERED_QUERY;
128
+ self->data = fq;
129
+ self->create_weight = &fq_create_weight;
130
+ self->to_s = &fq_to_s;
131
+ self->destroy = &fq_destroy;
132
+
133
+ return self;
134
+ }
135
+
136
+ /***************************************************************************
137
+ *
138
+ * FilteredQueryScorer
139
+ *
140
+ ***************************************************************************/
141
+
142
+ typedef struct FilteredQueryScorer {
143
+ Scorer *sub_scorer;
144
+ BitVector *bv;
145
+ } FilteredQueryScorer;
146
+
147
+ float fqsc_score(Scorer *self)
148
+ {
149
+ Scorer *sub_sc = ((FilteredQueryScorer *)self->data)->sub_scorer;
150
+ return sub_sc->score(sub_sc);
151
+ }
152
+
153
+ bool fqsc_next(Scorer *self)
154
+ {
155
+ Scorer *sub_sc = ((FilteredQueryScorer *)self->data)->sub_scorer;
156
+ BitVector *bv = ((FilteredQueryScorer *)self->data)->bv;
157
+ while (sub_sc->next(sub_sc)) {
158
+ self->doc = sub_sc->doc;
159
+ if (bv_get(bv, self->doc)) return true;
160
+ }
161
+ return false;
162
+ }
163
+
164
+ bool fqsc_skip_to(Scorer *self, int doc_num)
165
+ {
166
+ Scorer *sub_sc = ((FilteredQueryScorer *)self->data)->sub_scorer;
167
+ BitVector *bv = ((FilteredQueryScorer *)self->data)->bv;
168
+ if (sub_sc->skip_to(sub_sc, doc_num)) {
169
+ self->doc = sub_sc->doc;
170
+ do {
171
+ if (bv_get(bv, self->doc)) return true;
172
+ } while (sub_sc->next(sub_sc));
173
+ }
174
+ return false;
175
+ }
176
+
177
+ Explanation *fqsc_explain(Scorer *self, int doc_num)
178
+ {
179
+ Scorer *sub_sc = ((FilteredQueryScorer *)self->data)->sub_scorer;
180
+ return sub_sc->explain(sub_sc, doc_num);
181
+ }
182
+
183
+ void fqsc_destroy(void *p)
184
+ {
185
+ Scorer *self = (Scorer *)p;
186
+ FilteredQueryScorer *fqsc = (FilteredQueryScorer *)self->data;
187
+ bv_destroy(fqsc->bv);
188
+ fqsc->sub_scorer->destroy(fqsc->sub_scorer);
189
+ scorer_destroy(self);
190
+ }
191
+
192
+ Scorer *fqsc_create(Scorer *scorer, BitVector *bv, Similarity *sim)
193
+ {
194
+ Scorer *self = scorer_create(sim);
195
+ FilteredQueryScorer *fqsc = ALLOC(FilteredQueryScorer);
196
+ fqsc->sub_scorer = scorer;
197
+ fqsc->bv = bv;
198
+ self->data = fqsc;
199
+
200
+ self->score = &fqsc_score;
201
+ self->next = &fqsc_next;
202
+ self->skip_to = &fqsc_skip_to;
203
+ self->explain = &fqsc_explain;
204
+ self->destroy = &fqsc_destroy;
205
+ return self;
206
+ }
@@ -216,21 +216,24 @@ Query *fuzq_rewrite(Query *self, IndexReader *ir)
216
216
  TermBuffer *tb = te->tb_curr;
217
217
  float score = 0.0, min_score = fuzq->min_sim;
218
218
 
219
- do {
220
- if (strcmp(tb->field, field) != 0 ||
221
- (prefix && strncmp(tb->text, prefix, pre_len) != 0))
222
- break;
223
-
224
- score = fuzq_score(fuzq, tb->text + pre_len);
225
- //printf("%s:%s:%f\n", tb->text, fuzq->text, score);
226
-
227
- if (score > min_score) {
228
- pq_insert(term_pq, scored_term_create(tb_get_term(tb), score));
229
- if (pq_full(term_pq))
230
- min_score = ((ScoredTerm *)pq_top(term_pq))->score;
231
- }
232
- } while ((tb = te->next(te)) != NULL);
233
- te->close(te);
219
+ TRY
220
+ do {
221
+ if (strcmp(tb->field, field) != 0 ||
222
+ (prefix && strncmp(tb->text, prefix, pre_len) != 0))
223
+ break;
224
+
225
+ score = fuzq_score(fuzq, tb->text + pre_len);
226
+ //printf("%s:%s:%f\n", tb->text, fuzq->text, score);
227
+
228
+ if (score > min_score) {
229
+ pq_insert(term_pq, scored_term_create(tb_get_term(tb), score));
230
+ if (pq_full(term_pq))
231
+ min_score = ((ScoredTerm *)pq_top(term_pq))->score;
232
+ }
233
+ } while ((tb = te->next(te)) != NULL);
234
+ XFINALLY
235
+ te->close(te);
236
+ XENDTRY
234
237
  }
235
238
  free(prefix);
236
239
 
@@ -9,9 +9,7 @@
9
9
 
10
10
  char *maw_to_s(Weight *self)
11
11
  {
12
- char dbuf[32];
13
- dbl_to_s(dbuf, self->value);
14
- return epstrdup("MatchAllWeight(%s)", strlen(dbuf), dbuf);
12
+ return strfmt("MatchAllWeight(%f)", self->value);
15
13
  }
16
14
 
17
15
  void maw_destroy(void *p)
@@ -28,7 +26,7 @@ Explanation *maw_explain(Weight *self, IndexReader *ir, int doc_num)
28
26
  expl_add_detail(expl, expl_create(self->qnorm, estrdup("query_norm")));
29
27
  } else {
30
28
  expl = expl_create(self->value,
31
- epstrdup("MatchAllQuery: doc %d was deleted", 20, doc_num));
29
+ strfmt("MatchAllQuery: doc %d was deleted", doc_num));
32
30
  }
33
31
 
34
32
  return expl;
@@ -66,9 +64,7 @@ char *maq_to_s(Query *self, char *field)
66
64
  if (self->boost == 1.0) {
67
65
  return estrdup("MatchAll");
68
66
  } else {
69
- char dbuf[32];
70
- dbl_to_s(dbuf, self->boost);
71
- return epstrdup("MatchAll^%s", strlen(dbuf), dbuf);
67
+ return strfmt("MatchAll^%f", self->boost);
72
68
  }
73
69
  }
74
70
 
@@ -1,6 +1,8 @@
1
1
  #include <string.h>
2
2
  #include "search.h"
3
3
 
4
+ static char * const FIELD_CHANGE_ERROR_MSG = "All phrase terms must be in the same field.";
5
+
4
6
  /***************************************************************************
5
7
  *
6
8
  * MultiPhraseWeight
@@ -9,9 +11,7 @@
9
11
 
10
12
  char *mphw_to_s(Weight *self)
11
13
  {
12
- char dbuf[32];
13
- dbl_to_s(dbuf, self->value);
14
- return epstrdup("MultiPhraseWeight(%s)", strlen(dbuf), dbuf);
14
+ return strfmt("MultiPhraseWeight(%f)", self->value);
15
15
  }
16
16
 
17
17
  Scorer *mphw_scorer(Weight *self, IndexReader *ir)
@@ -63,9 +63,7 @@ Explanation *mphw_explain(Weight *self, IndexReader *ir, int doc_num)
63
63
  int len = 0, pos = 0;
64
64
 
65
65
  Explanation *expl = expl_create(0.0,
66
- epstrdup("weight(%s in %d), product of:",
67
- strlen(query_str) + 20,
68
- query_str, doc_num));
66
+ strfmt("weight(%s in %d), product of:", query_str, doc_num));
69
67
 
70
68
  for (i = 0; i < mphq->t_cnt; i++) {
71
69
  for (j = 0; j < mphq->pt_cnt[i]; j++) {
@@ -84,14 +82,14 @@ Explanation *mphw_explain(Weight *self, IndexReader *ir, int doc_num)
84
82
  doc_freqs[pos] = 0;
85
83
 
86
84
  Explanation *idf_expl1 = expl_create(self->idf,
87
- epstrdup("idf(%s:<%s>)", strlen(mphq->field) + pos, mphq->field, doc_freqs));
85
+ strfmt("idf(%s:<%s>)", mphq->field, doc_freqs));
88
86
  Explanation *idf_expl2 = expl_create(self->idf,
89
- epstrdup("idf(%s:<%s>)", strlen(mphq->field) + pos, mphq->field, doc_freqs));
87
+ strfmt("idf(%s:<%s>)", mphq->field, doc_freqs));
90
88
  free(doc_freqs);
91
89
 
92
90
  // explain query weight
93
91
  Explanation *query_expl = expl_create(0.0,
94
- epstrdup("query_weight(%s), product of:", strlen(query_str), query_str));
92
+ strfmt("query_weight(%s), product of:", query_str));
95
93
 
96
94
  if (self->query->boost != 1.0) {
97
95
  expl_add_detail(query_expl, expl_create(self->query->boost, estrdup("boost")));
@@ -107,8 +105,7 @@ Explanation *mphw_explain(Weight *self, IndexReader *ir, int doc_num)
107
105
 
108
106
  // explain field weight
109
107
  Explanation *field_expl = expl_create(0.0,
110
- epstrdup("field_weight(%s in %d), product of:",
111
- strlen(query_str) + 20, query_str, doc_num));
108
+ strfmt("field_weight(%s in %d), product of:", query_str, doc_num));
112
109
  free(query_str);
113
110
 
114
111
  Scorer *scorer = self->scorer(self, ir);
@@ -122,8 +119,7 @@ Explanation *mphw_explain(Weight *self, IndexReader *ir, int doc_num)
122
119
  ? sim_decode_norm(self->similarity, field_norms[doc_num])
123
120
  : 0.0;
124
121
  Explanation *field_norm_expl = expl_create(field_norm,
125
- epstrdup("field_norm(field=%s, doc=%d)",
126
- strlen(mphq->field) + 20, mphq->field, doc_num));
122
+ strfmt("field_norm(field=%s, doc=%d)", mphq->field, doc_num));
127
123
 
128
124
  expl_add_detail(field_expl, field_norm_expl);
129
125
 
@@ -202,7 +198,7 @@ void mphq_add_terms(Query *self, Term **terms, int t_cnt, int pos_inc)
202
198
  position = mphq->positions[index - 1] + pos_inc;
203
199
  for (i = 0; i < t_cnt; i++) {
204
200
  if (strcmp(terms[i]->field, mphq->field) != 0) {
205
- eprintf(ARG_ERROR, "All phrase terms must be in the same field. Current phrase is %s, tried to add %s\n", mphq->field, terms[i]->field);
201
+ RAISE(ARG_ERROR, FIELD_CHANGE_ERROR_MSG);
206
202
  }
207
203
  }
208
204
  }
@@ -1686,7 +1686,7 @@ int yyerror(QParser *qp, char const *msg)
1686
1686
  {
1687
1687
  if (!qp->handle_parse_errors) {
1688
1688
  if (qp->clean_str) free(qp->qstr);
1689
- eprintf(PARSE_ERROR, msg);
1689
+ RAISE(PARSE_ERROR, (char *)msg);
1690
1690
  }
1691
1691
  return 0;
1692
1692
  }
@@ -1811,8 +1811,35 @@ Query *get_fuzzy_q(QParser *qp, char *field, char *word, char *slop_str)
1811
1811
 
1812
1812
  Query *get_wild_q(QParser *qp, char *field, char *pattern)
1813
1813
  {
1814
+ Query *q;
1815
+ bool is_prefix = false;
1816
+ char *p;
1817
+ int len = strlen(pattern);
1818
+
1814
1819
  if (qp->wild_lower) lower_str(pattern);
1815
- return wcq_create(term_create(field, pattern));;
1820
+
1821
+ /* simplify the wildcard query to a prefix query if possible. Basically a
1822
+ * prefix query is any wildcard query that has a '*' as the last character
1823
+ * and no other wildcard characters before it. */
1824
+ if (pattern[len-1] == '*') {
1825
+ is_prefix = true;
1826
+ for (p = &pattern[len-2]; p >= pattern; p--) {
1827
+ if (*p == '*' || *p == '?') {
1828
+ is_prefix = false;
1829
+ break;
1830
+ }
1831
+ }
1832
+ }
1833
+
1834
+ if (is_prefix) {
1835
+ /* chop off the '*' temporarily to create the query */
1836
+ pattern[len-1] = 0;
1837
+ q = prefixq_create(term_create(field, pattern));;
1838
+ pattern[len-1] = '*';
1839
+ } else {
1840
+ q = wcq_create(term_create(field, pattern));;
1841
+ }
1842
+ return q;
1816
1843
  }
1817
1844
 
1818
1845
  HashSet *add_field(QParser *qp, char *field)
@@ -1,6 +1,8 @@
1
1
  #include <string.h>
2
2
  #include "search.h"
3
3
 
4
+ static char * const FIELD_CHANGE_ERROR_MSG = "Field illegally changed in the phrase";
5
+
4
6
  /***************************************************************************
5
7
  *
6
8
  * PhraseWeight
@@ -52,9 +54,7 @@ Explanation *phw_explain(Weight *self, IndexReader *ir, int doc_num)
52
54
  int len = 0, pos = 0;
53
55
 
54
56
  Explanation *expl = expl_create(0.0,
55
- epstrdup("weight(%s in %d), product of:",
56
- strlen(query_str) + 20,
57
- query_str, doc_num));
57
+ strfmt("weight(%s in %d), product of:", query_str, doc_num));
58
58
 
59
59
  for (i = 0; i < phq->t_cnt; i++) {
60
60
  len += strlen(phq->terms[i]->text) + 30;
@@ -69,14 +69,14 @@ Explanation *phw_explain(Weight *self, IndexReader *ir, int doc_num)
69
69
  doc_freqs[pos] = 0;
70
70
 
71
71
  Explanation *idf_expl1 = expl_create(self->idf,
72
- epstrdup("idf(%s:<%s>)", strlen(phq->field) + pos, phq->field, doc_freqs));
72
+ strfmt("idf(%s:<%s>)", phq->field, doc_freqs));
73
73
  Explanation *idf_expl2 = expl_create(self->idf,
74
- epstrdup("idf(%s:<%s>)", strlen(phq->field) + pos, phq->field, doc_freqs));
74
+ strfmt("idf(%s:<%s>)", phq->field, doc_freqs));
75
75
  free(doc_freqs);
76
76
 
77
77
  // explain query weight
78
78
  Explanation *query_expl = expl_create(0.0,
79
- epstrdup("query_weight(%s), product of:", strlen(query_str), query_str));
79
+ strfmt("query_weight(%s), product of:", query_str));
80
80
 
81
81
  if (self->query->boost != 1.0) {
82
82
  expl_add_detail(query_expl, expl_create(self->query->boost, estrdup("boost")));
@@ -92,8 +92,7 @@ Explanation *phw_explain(Weight *self, IndexReader *ir, int doc_num)
92
92
 
93
93
  // explain field weight
94
94
  Explanation *field_expl = expl_create(0.0,
95
- epstrdup("field_weight(%s in %d), product of:",
96
- strlen(query_str) + 20, query_str, doc_num));
95
+ strfmt("field_weight(%s in %d), product of:", query_str, doc_num));
97
96
  free(query_str);
98
97
 
99
98
  Scorer *scorer = self->scorer(self, ir);
@@ -107,8 +106,7 @@ Explanation *phw_explain(Weight *self, IndexReader *ir, int doc_num)
107
106
  ? sim_decode_norm(self->similarity, field_norms[doc_num])
108
107
  : 0.0;
109
108
  Explanation *field_norm_expl = expl_create(field_norm,
110
- epstrdup("field_norm(field=%s, doc=%d)",
111
- strlen(phq->field) + 20, phq->field, doc_num));
109
+ strfmt("field_norm(field=%s, doc=%d)", phq->field, doc_num));
112
110
 
113
111
  expl_add_detail(field_expl, field_norm_expl);
114
112
 
@@ -127,9 +125,7 @@ Explanation *phw_explain(Weight *self, IndexReader *ir, int doc_num)
127
125
 
128
126
  char *phw_to_s(Weight *self)
129
127
  {
130
- char dbuf[32];
131
- dbl_to_s(dbuf, self->value);
132
- return epstrdup("PhraseWeight(%s)", strlen(dbuf), dbuf);
128
+ return strfmt("PhraseWeight(%f)", self->value);
133
129
  }
134
130
 
135
131
  Weight *phw_create(Query *query, Searcher *searcher)
@@ -216,9 +212,8 @@ char *phq_to_s(Query *self, char *field)
216
212
  buf_index += strlen(buffer + buf_index);
217
213
  }
218
214
  if (self->boost != 1.0) {
219
- char dbuf[32];
220
- dbl_to_s(dbuf, self->boost);
221
- sprintf(buffer + buf_index, "^%s", dbuf);
215
+ buffer[buf_index++] = '^';
216
+ dbl_to_s(buffer + buf_index, self->boost);
222
217
  }
223
218
  return buffer;
224
219
  }
@@ -270,7 +265,7 @@ void phq_add_term(Query *self, Term *term, int pos_inc)
270
265
  } else {
271
266
  position = phq->positions[index - 1] + pos_inc;
272
267
  if (strcmp(term->field, phq->field) != 0) {
273
- eprintf(ARG_ERROR, "All phrase terms must be in the same field. Current phrase is %s, tried to add %s\n", phq->field, term->field);
268
+ RAISE(ARG_ERROR, FIELD_CHANGE_ERROR_MSG);
274
269
  }
275
270
  }
276
271
  phq->terms[index] = term;
@@ -356,7 +351,7 @@ bool pp_first_position(PhrasePosition *self)
356
351
 
357
352
  char *pp_to_s(PhrasePosition *self)
358
353
  {
359
- return epstrdup("pp->(doc => %d, position => %d)", 40, self->doc, self->position);
354
+ return strfmt("pp->(doc => %d, position => %d)", self->doc, self->position);
360
355
  }
361
356
 
362
357
  inline int pp_cmp(const void *const p1, const void *const p2)
@@ -491,15 +486,13 @@ bool phsc_skip_to(Scorer *self, int doc_num)
491
486
 
492
487
  Explanation *phsc_explain(Scorer *self, int doc_num)
493
488
  {
494
- char dbuf[32];
495
489
  GET_PHSC;
496
490
  while (phsc_next(self) && self->doc < doc_num)
497
491
  ;
498
492
 
499
493
  float phrase_freq = (self->doc == doc_num) ? phsc->freq : 0.0;
500
- dbl_to_s(dbuf, phrase_freq);
501
494
  return expl_create(sim_tf(self->similarity, phrase_freq),
502
- epstrdup("tf(phrase_freq=%s)", strlen(dbuf), dbuf));
495
+ strfmt("tf(phrase_freq=%f)", phrase_freq));
503
496
  }
504
497
 
505
498
  void phsc_destroy(void *p)