ferret 0.9.0 → 0.9.1
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +23 -5
- data/TODO +2 -1
- data/ext/analysis.c +838 -177
- data/ext/analysis.h +55 -7
- data/ext/api.c +69 -0
- data/ext/api.h +27 -0
- data/ext/array.c +8 -5
- data/ext/compound_io.c +132 -96
- data/ext/document.c +58 -28
- data/ext/except.c +59 -0
- data/ext/except.h +88 -0
- data/ext/ferret.c +47 -3
- data/ext/ferret.h +3 -0
- data/ext/field.c +15 -9
- data/ext/filter.c +1 -1
- data/ext/fs_store.c +215 -34
- data/ext/global.c +72 -3
- data/ext/global.h +4 -3
- data/ext/hash.c +44 -3
- data/ext/hash.h +9 -0
- data/ext/header.h +58 -0
- data/ext/inc/except.h +88 -0
- data/ext/inc/lang.h +23 -13
- data/ext/ind.c +16 -10
- data/ext/index.h +2 -22
- data/ext/index_io.c +3 -11
- data/ext/index_rw.c +245 -193
- data/ext/lang.h +23 -13
- data/ext/libstemmer.c +92 -0
- data/ext/libstemmer.h +79 -0
- data/ext/modules.h +162 -0
- data/ext/q_boolean.c +34 -21
- data/ext/q_const_score.c +6 -12
- data/ext/q_filtered_query.c +206 -0
- data/ext/q_fuzzy.c +18 -15
- data/ext/q_match_all.c +3 -7
- data/ext/q_multi_phrase.c +10 -14
- data/ext/q_parser.c +29 -2
- data/ext/q_phrase.c +14 -21
- data/ext/q_prefix.c +15 -12
- data/ext/q_range.c +30 -28
- data/ext/q_span.c +13 -21
- data/ext/q_term.c +17 -26
- data/ext/r_analysis.c +693 -21
- data/ext/r_doc.c +11 -12
- data/ext/r_index_io.c +4 -1
- data/ext/r_qparser.c +21 -2
- data/ext/r_search.c +285 -18
- data/ext/ram_store.c +5 -2
- data/ext/search.c +11 -17
- data/ext/search.h +21 -45
- data/ext/similarity.h +67 -0
- data/ext/sort.c +30 -25
- data/ext/stem_ISO_8859_1_danish.c +338 -0
- data/ext/stem_ISO_8859_1_danish.h +16 -0
- data/ext/stem_ISO_8859_1_dutch.c +635 -0
- data/ext/stem_ISO_8859_1_dutch.h +16 -0
- data/ext/stem_ISO_8859_1_english.c +1156 -0
- data/ext/stem_ISO_8859_1_english.h +16 -0
- data/ext/stem_ISO_8859_1_finnish.c +792 -0
- data/ext/stem_ISO_8859_1_finnish.h +16 -0
- data/ext/stem_ISO_8859_1_french.c +1276 -0
- data/ext/stem_ISO_8859_1_french.h +16 -0
- data/ext/stem_ISO_8859_1_german.c +512 -0
- data/ext/stem_ISO_8859_1_german.h +16 -0
- data/ext/stem_ISO_8859_1_italian.c +1091 -0
- data/ext/stem_ISO_8859_1_italian.h +16 -0
- data/ext/stem_ISO_8859_1_norwegian.c +296 -0
- data/ext/stem_ISO_8859_1_norwegian.h +16 -0
- data/ext/stem_ISO_8859_1_porter.c +776 -0
- data/ext/stem_ISO_8859_1_porter.h +16 -0
- data/ext/stem_ISO_8859_1_portuguese.c +1035 -0
- data/ext/stem_ISO_8859_1_portuguese.h +16 -0
- data/ext/stem_ISO_8859_1_spanish.c +1119 -0
- data/ext/stem_ISO_8859_1_spanish.h +16 -0
- data/ext/stem_ISO_8859_1_swedish.c +307 -0
- data/ext/stem_ISO_8859_1_swedish.h +16 -0
- data/ext/stem_KOI8_R_russian.c +701 -0
- data/ext/stem_KOI8_R_russian.h +16 -0
- data/ext/stem_UTF_8_danish.c +344 -0
- data/ext/stem_UTF_8_danish.h +16 -0
- data/ext/stem_UTF_8_dutch.c +653 -0
- data/ext/stem_UTF_8_dutch.h +16 -0
- data/ext/stem_UTF_8_english.c +1176 -0
- data/ext/stem_UTF_8_english.h +16 -0
- data/ext/stem_UTF_8_finnish.c +808 -0
- data/ext/stem_UTF_8_finnish.h +16 -0
- data/ext/stem_UTF_8_french.c +1296 -0
- data/ext/stem_UTF_8_french.h +16 -0
- data/ext/stem_UTF_8_german.c +526 -0
- data/ext/stem_UTF_8_german.h +16 -0
- data/ext/stem_UTF_8_italian.c +1113 -0
- data/ext/stem_UTF_8_italian.h +16 -0
- data/ext/stem_UTF_8_norwegian.c +302 -0
- data/ext/stem_UTF_8_norwegian.h +16 -0
- data/ext/stem_UTF_8_porter.c +794 -0
- data/ext/stem_UTF_8_porter.h +16 -0
- data/ext/stem_UTF_8_portuguese.c +1055 -0
- data/ext/stem_UTF_8_portuguese.h +16 -0
- data/ext/stem_UTF_8_russian.c +709 -0
- data/ext/stem_UTF_8_russian.h +16 -0
- data/ext/stem_UTF_8_spanish.c +1137 -0
- data/ext/stem_UTF_8_spanish.h +16 -0
- data/ext/stem_UTF_8_swedish.c +313 -0
- data/ext/stem_UTF_8_swedish.h +16 -0
- data/ext/stopwords.c +325 -0
- data/ext/store.c +34 -2
- data/ext/tags +2953 -0
- data/ext/term.c +21 -15
- data/ext/termdocs.c +5 -3
- data/ext/utilities.c +446 -0
- data/ext/vector.c +27 -13
- data/lib/ferret/document/document.rb +1 -1
- data/lib/ferret/index/index.rb +44 -6
- data/lib/ferret/query_parser/query_parser.tab.rb +7 -3
- data/lib/rferret.rb +2 -1
- data/test/test_helper.rb +2 -2
- data/test/unit/analysis/ctc_analyzer.rb +401 -0
- data/test/unit/analysis/ctc_tokenstream.rb +423 -0
- data/test/unit/analysis/{tc_letter_tokenizer.rb → rtc_letter_tokenizer.rb} +0 -0
- data/test/unit/analysis/{tc_lower_case_filter.rb → rtc_lower_case_filter.rb} +0 -0
- data/test/unit/analysis/{tc_lower_case_tokenizer.rb → rtc_lower_case_tokenizer.rb} +0 -0
- data/test/unit/analysis/{tc_per_field_analyzer_wrapper.rb → rtc_per_field_analyzer_wrapper.rb} +0 -0
- data/test/unit/analysis/{tc_porter_stem_filter.rb → rtc_porter_stem_filter.rb} +0 -0
- data/test/unit/analysis/{tc_standard_analyzer.rb → rtc_standard_analyzer.rb} +0 -0
- data/test/unit/analysis/{tc_standard_tokenizer.rb → rtc_standard_tokenizer.rb} +0 -0
- data/test/unit/analysis/{tc_stop_analyzer.rb → rtc_stop_analyzer.rb} +0 -0
- data/test/unit/analysis/{tc_stop_filter.rb → rtc_stop_filter.rb} +0 -0
- data/test/unit/analysis/{tc_white_space_analyzer.rb → rtc_white_space_analyzer.rb} +0 -0
- data/test/unit/analysis/{tc_white_space_tokenizer.rb → rtc_white_space_tokenizer.rb} +0 -0
- data/test/unit/analysis/{tc_word_list_loader.rb → rtc_word_list_loader.rb} +0 -0
- data/test/unit/analysis/tc_analyzer.rb +1 -2
- data/test/unit/analysis/{c_token.rb → tc_token.rb} +0 -0
- data/test/unit/document/rtc_field.rb +28 -0
- data/test/unit/document/{c_document.rb → tc_document.rb} +0 -0
- data/test/unit/document/tc_field.rb +82 -12
- data/test/unit/index/{tc_compound_file_io.rb → rtc_compound_file_io.rb} +0 -0
- data/test/unit/index/{tc_field_infos.rb → rtc_field_infos.rb} +0 -0
- data/test/unit/index/{tc_fields_io.rb → rtc_fields_io.rb} +0 -0
- data/test/unit/index/{tc_multiple_term_doc_pos_enum.rb → rtc_multiple_term_doc_pos_enum.rb} +0 -0
- data/test/unit/index/{tc_segment_infos.rb → rtc_segment_infos.rb} +0 -0
- data/test/unit/index/{tc_segment_term_docs.rb → rtc_segment_term_docs.rb} +0 -0
- data/test/unit/index/{tc_segment_term_enum.rb → rtc_segment_term_enum.rb} +0 -0
- data/test/unit/index/{tc_segment_term_vector.rb → rtc_segment_term_vector.rb} +0 -0
- data/test/unit/index/{tc_term_buffer.rb → rtc_term_buffer.rb} +0 -0
- data/test/unit/index/{tc_term_info.rb → rtc_term_info.rb} +0 -0
- data/test/unit/index/{tc_term_infos_io.rb → rtc_term_infos_io.rb} +0 -0
- data/test/unit/index/{tc_term_vectors_io.rb → rtc_term_vectors_io.rb} +0 -0
- data/test/unit/index/{c_index.rb → tc_index.rb} +26 -6
- data/test/unit/index/{c_index_reader.rb → tc_index_reader.rb} +0 -0
- data/test/unit/index/{c_index_writer.rb → tc_index_writer.rb} +0 -0
- data/test/unit/index/{c_term.rb → tc_term.rb} +0 -0
- data/test/unit/index/{c_term_voi.rb → tc_term_voi.rb} +0 -0
- data/test/unit/query_parser/{c_query_parser.rb → rtc_query_parser.rb} +14 -14
- data/test/unit/query_parser/tc_query_parser.rb +24 -16
- data/test/unit/search/{tc_similarity.rb → rtc_similarity.rb} +0 -0
- data/test/unit/search/rtc_sort_field.rb +14 -0
- data/test/unit/search/{c_filter.rb → tc_filter.rb} +11 -11
- data/test/unit/search/{c_fuzzy_query.rb → tc_fuzzy_query.rb} +0 -0
- data/test/unit/search/{c_index_searcher.rb → tc_index_searcher.rb} +0 -0
- data/test/unit/search/{c_search_and_sort.rb → tc_search_and_sort.rb} +0 -0
- data/test/unit/search/{c_sort.rb → tc_sort.rb} +0 -0
- data/test/unit/search/tc_sort_field.rb +20 -7
- data/test/unit/search/{c_spans.rb → tc_spans.rb} +0 -0
- data/test/unit/store/rtc_fs_store.rb +62 -0
- data/test/unit/store/rtc_ram_store.rb +15 -0
- data/test/unit/store/rtm_store.rb +150 -0
- data/test/unit/store/rtm_store_lock.rb +2 -0
- data/test/unit/store/tc_fs_store.rb +54 -40
- data/test/unit/store/tc_ram_store.rb +20 -0
- data/test/unit/store/tm_store.rb +30 -146
- data/test/unit/store/tm_store_lock.rb +66 -0
- data/test/unit/utils/{tc_bit_vector.rb → rtc_bit_vector.rb} +0 -0
- data/test/unit/utils/{tc_date_tools.rb → rtc_date_tools.rb} +0 -0
- data/test/unit/utils/{tc_number_tools.rb → rtc_number_tools.rb} +0 -0
- data/test/unit/utils/{tc_parameter.rb → rtc_parameter.rb} +0 -0
- data/test/unit/utils/{tc_priority_queue.rb → rtc_priority_queue.rb} +0 -0
- data/test/unit/utils/{tc_string_helper.rb → rtc_string_helper.rb} +0 -0
- data/test/unit/utils/{tc_thread.rb → rtc_thread.rb} +0 -0
- data/test/unit/utils/{tc_weak_key_hash.rb → rtc_weak_key_hash.rb} +0 -0
- metadata +360 -289
- data/test/unit/document/c_field.rb +0 -98
- data/test/unit/search/c_sort_field.rb +0 -27
- data/test/unit/store/c_fs_store.rb +0 -76
- data/test/unit/store/c_ram_store.rb +0 -35
- data/test/unit/store/m_store.rb +0 -34
- data/test/unit/store/m_store_lock.rb +0 -68
data/ext/q_const_score.c
CHANGED
@@ -9,9 +9,7 @@
|
|
9
9
|
|
10
10
|
char *csw_to_s(Weight *self)
|
11
11
|
{
|
12
|
-
|
13
|
-
dbl_to_s(dbuf, self->value);
|
14
|
-
return epstrdup("ConstantScoreWeight(%s)", strlen(dbuf), dbuf);
|
12
|
+
return strfmt("ConstantScoreWeight(%f)", self->value);
|
15
13
|
}
|
16
14
|
|
17
15
|
void csw_destroy(void *p)
|
@@ -28,14 +26,13 @@ Explanation *csw_explain(Weight *self, IndexReader *ir, int doc_num)
|
|
28
26
|
|
29
27
|
if (bv_get(bv, doc_num)) {
|
30
28
|
expl = expl_create(self->value,
|
31
|
-
|
32
|
-
strlen(filter_str), filter_str));
|
29
|
+
strfmt("ConstantScoreQuery(%s), product of:", filter_str));
|
33
30
|
expl_add_detail(expl, expl_create(self->query->boost, estrdup("boost")));
|
34
31
|
expl_add_detail(expl, expl_create(self->qnorm, estrdup("query_norm")));
|
35
32
|
} else {
|
36
33
|
expl = expl_create(self->value,
|
37
|
-
|
38
|
-
|
34
|
+
strfmt("ConstantScoreQuery(%s), does not match id %d",
|
35
|
+
filter_str, doc_num));
|
39
36
|
}
|
40
37
|
free(filter_str);
|
41
38
|
return expl;
|
@@ -74,12 +71,9 @@ char *csq_to_s(Query *self, char *field)
|
|
74
71
|
char *filter_str = filter->to_s(filter);
|
75
72
|
char *buffer;
|
76
73
|
if (self->boost == 1.0) {
|
77
|
-
buffer =
|
74
|
+
buffer = strfmt("ConstantScore(%s)", filter_str);
|
78
75
|
} else {
|
79
|
-
|
80
|
-
dbl_to_s(dbuf, self->boost);
|
81
|
-
buffer = epstrdup("ConstantScore(%s)^%s",
|
82
|
-
strlen(filter_str) + strlen(dbuf), filter_str, dbuf);
|
76
|
+
buffer = strfmt("ConstantScore(%s)^%f", filter_str, self->boost);
|
83
77
|
}
|
84
78
|
free(filter_str);
|
85
79
|
return buffer;;
|
@@ -0,0 +1,206 @@
|
|
1
|
+
#include "search.h"
|
2
|
+
#include <string.h>
|
3
|
+
|
4
|
+
Scorer *fqsc_create(Scorer *scorer, BitVector *bv, Similarity *sim);
|
5
|
+
|
6
|
+
/***************************************************************************
|
7
|
+
*
|
8
|
+
* Weight
|
9
|
+
*
|
10
|
+
***************************************************************************/
|
11
|
+
|
12
|
+
char *fqw_to_s(Weight *self)
|
13
|
+
{
|
14
|
+
return strfmt("FilteredQueryWeight(%f)", self->value);
|
15
|
+
}
|
16
|
+
|
17
|
+
void fqw_destroy(void *p)
|
18
|
+
{
|
19
|
+
free(p);
|
20
|
+
}
|
21
|
+
|
22
|
+
float fqw_sum_of_squared_weights(Weight *self)
|
23
|
+
{
|
24
|
+
Weight *sw = (Weight *)self->data;
|
25
|
+
return sw->sum_of_squared_weights(sw);
|
26
|
+
}
|
27
|
+
|
28
|
+
void fqw_normalize(Weight *self, float normalization_factor)
|
29
|
+
{
|
30
|
+
Weight *sw = (Weight *)self->data;
|
31
|
+
return sw->normalize(sw, normalization_factor);
|
32
|
+
}
|
33
|
+
|
34
|
+
float fqw_get_value(Weight *self)
|
35
|
+
{
|
36
|
+
Weight *sw = (Weight *)self->data;
|
37
|
+
return sw->get_value(sw);
|
38
|
+
}
|
39
|
+
|
40
|
+
Explanation *fqw_explain(Weight *self, IndexReader *ir, int doc_num)
|
41
|
+
{
|
42
|
+
Weight *sw = (Weight *)self->data;
|
43
|
+
return sw->explain(sw, ir, doc_num);
|
44
|
+
}
|
45
|
+
|
46
|
+
Scorer *fqw_scorer(Weight *self, IndexReader *ir)
|
47
|
+
{
|
48
|
+
Weight *sw = (Weight *)self->data;
|
49
|
+
Scorer *scorer = sw->scorer(sw, ir);
|
50
|
+
Filter *filter = ((FilteredQuery *)self->query->data)->filter;
|
51
|
+
|
52
|
+
return fqsc_create(scorer, filter->get_bv(filter, ir), self->similarity);
|
53
|
+
}
|
54
|
+
|
55
|
+
Weight *fqw_create(Query *query, Weight *sub_weight, Similarity *sim)
|
56
|
+
{
|
57
|
+
Weight *self = ALLOC(Weight);
|
58
|
+
ZEROSET(self, Weight, 1);
|
59
|
+
self->data = sub_weight;
|
60
|
+
|
61
|
+
self->get_query = &w_get_query;
|
62
|
+
self->get_value = &fqw_get_value;
|
63
|
+
self->normalize = &fqw_normalize;
|
64
|
+
self->scorer = &fqw_scorer;
|
65
|
+
self->explain = &fqw_explain;
|
66
|
+
self->to_s = &fqw_to_s;
|
67
|
+
self->destroy = &fqw_destroy;
|
68
|
+
self->sum_of_squared_weights = &fqw_sum_of_squared_weights;
|
69
|
+
|
70
|
+
self->similarity = sim;
|
71
|
+
self->idf = 1.0;
|
72
|
+
self->query = query;
|
73
|
+
self->value = sub_weight->value;
|
74
|
+
|
75
|
+
return self;
|
76
|
+
}
|
77
|
+
|
78
|
+
/***************************************************************************
|
79
|
+
*
|
80
|
+
* FilteredQueryQuery
|
81
|
+
*
|
82
|
+
***************************************************************************/
|
83
|
+
|
84
|
+
char *fq_to_s(Query *self, char *field)
|
85
|
+
{
|
86
|
+
FilteredQuery *fq = (FilteredQuery *)self->data;
|
87
|
+
char *filter_str = fq->filter->to_s(fq->filter);
|
88
|
+
char *query_str = fq->query->to_s(fq->query, field);
|
89
|
+
char *buffer;
|
90
|
+
if (self->boost == 1.0) {
|
91
|
+
buffer = strfmt("FilteredQuery(query:%s, filter:%s)",
|
92
|
+
query_str, filter_str);
|
93
|
+
} else {
|
94
|
+
buffer = strfmt("FilteredQuery(query:%s, filter:%s)^%f",
|
95
|
+
query_str, filter_str, self->boost);
|
96
|
+
}
|
97
|
+
free(filter_str);
|
98
|
+
free(query_str);
|
99
|
+
return buffer;;
|
100
|
+
}
|
101
|
+
|
102
|
+
void fq_destroy(void *p)
|
103
|
+
{
|
104
|
+
Query *self = (Query *)p;
|
105
|
+
if (self->destroy_all) {
|
106
|
+
FilteredQuery *fq = (FilteredQuery *)self->data;
|
107
|
+
fq->filter->destroy(fq->filter);
|
108
|
+
fq->query->destroy(fq->query);
|
109
|
+
}
|
110
|
+
free(self->data);
|
111
|
+
q_destroy(self);
|
112
|
+
}
|
113
|
+
|
114
|
+
Weight *fq_create_weight(Query *self, Searcher *searcher)
|
115
|
+
{
|
116
|
+
Query *sub_query = ((FilteredQuery *)self->data)->query;
|
117
|
+
return fqw_create(self, q_weight(sub_query, searcher),
|
118
|
+
searcher->similarity);
|
119
|
+
}
|
120
|
+
|
121
|
+
Query *fq_create(Query *query, Filter *filter)
|
122
|
+
{
|
123
|
+
Query *self = q_create();
|
124
|
+
FilteredQuery *fq = ALLOC(FilteredQuery);
|
125
|
+
fq->query = query;
|
126
|
+
fq->filter = filter;
|
127
|
+
self->type = FILTERED_QUERY;
|
128
|
+
self->data = fq;
|
129
|
+
self->create_weight = &fq_create_weight;
|
130
|
+
self->to_s = &fq_to_s;
|
131
|
+
self->destroy = &fq_destroy;
|
132
|
+
|
133
|
+
return self;
|
134
|
+
}
|
135
|
+
|
136
|
+
/***************************************************************************
|
137
|
+
*
|
138
|
+
* FilteredQueryScorer
|
139
|
+
*
|
140
|
+
***************************************************************************/
|
141
|
+
|
142
|
+
typedef struct FilteredQueryScorer {
|
143
|
+
Scorer *sub_scorer;
|
144
|
+
BitVector *bv;
|
145
|
+
} FilteredQueryScorer;
|
146
|
+
|
147
|
+
float fqsc_score(Scorer *self)
|
148
|
+
{
|
149
|
+
Scorer *sub_sc = ((FilteredQueryScorer *)self->data)->sub_scorer;
|
150
|
+
return sub_sc->score(sub_sc);
|
151
|
+
}
|
152
|
+
|
153
|
+
bool fqsc_next(Scorer *self)
|
154
|
+
{
|
155
|
+
Scorer *sub_sc = ((FilteredQueryScorer *)self->data)->sub_scorer;
|
156
|
+
BitVector *bv = ((FilteredQueryScorer *)self->data)->bv;
|
157
|
+
while (sub_sc->next(sub_sc)) {
|
158
|
+
self->doc = sub_sc->doc;
|
159
|
+
if (bv_get(bv, self->doc)) return true;
|
160
|
+
}
|
161
|
+
return false;
|
162
|
+
}
|
163
|
+
|
164
|
+
bool fqsc_skip_to(Scorer *self, int doc_num)
|
165
|
+
{
|
166
|
+
Scorer *sub_sc = ((FilteredQueryScorer *)self->data)->sub_scorer;
|
167
|
+
BitVector *bv = ((FilteredQueryScorer *)self->data)->bv;
|
168
|
+
if (sub_sc->skip_to(sub_sc, doc_num)) {
|
169
|
+
self->doc = sub_sc->doc;
|
170
|
+
do {
|
171
|
+
if (bv_get(bv, self->doc)) return true;
|
172
|
+
} while (sub_sc->next(sub_sc));
|
173
|
+
}
|
174
|
+
return false;
|
175
|
+
}
|
176
|
+
|
177
|
+
Explanation *fqsc_explain(Scorer *self, int doc_num)
|
178
|
+
{
|
179
|
+
Scorer *sub_sc = ((FilteredQueryScorer *)self->data)->sub_scorer;
|
180
|
+
return sub_sc->explain(sub_sc, doc_num);
|
181
|
+
}
|
182
|
+
|
183
|
+
void fqsc_destroy(void *p)
|
184
|
+
{
|
185
|
+
Scorer *self = (Scorer *)p;
|
186
|
+
FilteredQueryScorer *fqsc = (FilteredQueryScorer *)self->data;
|
187
|
+
bv_destroy(fqsc->bv);
|
188
|
+
fqsc->sub_scorer->destroy(fqsc->sub_scorer);
|
189
|
+
scorer_destroy(self);
|
190
|
+
}
|
191
|
+
|
192
|
+
Scorer *fqsc_create(Scorer *scorer, BitVector *bv, Similarity *sim)
|
193
|
+
{
|
194
|
+
Scorer *self = scorer_create(sim);
|
195
|
+
FilteredQueryScorer *fqsc = ALLOC(FilteredQueryScorer);
|
196
|
+
fqsc->sub_scorer = scorer;
|
197
|
+
fqsc->bv = bv;
|
198
|
+
self->data = fqsc;
|
199
|
+
|
200
|
+
self->score = &fqsc_score;
|
201
|
+
self->next = &fqsc_next;
|
202
|
+
self->skip_to = &fqsc_skip_to;
|
203
|
+
self->explain = &fqsc_explain;
|
204
|
+
self->destroy = &fqsc_destroy;
|
205
|
+
return self;
|
206
|
+
}
|
data/ext/q_fuzzy.c
CHANGED
@@ -216,21 +216,24 @@ Query *fuzq_rewrite(Query *self, IndexReader *ir)
|
|
216
216
|
TermBuffer *tb = te->tb_curr;
|
217
217
|
float score = 0.0, min_score = fuzq->min_sim;
|
218
218
|
|
219
|
-
|
220
|
-
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
219
|
+
TRY
|
220
|
+
do {
|
221
|
+
if (strcmp(tb->field, field) != 0 ||
|
222
|
+
(prefix && strncmp(tb->text, prefix, pre_len) != 0))
|
223
|
+
break;
|
224
|
+
|
225
|
+
score = fuzq_score(fuzq, tb->text + pre_len);
|
226
|
+
//printf("%s:%s:%f\n", tb->text, fuzq->text, score);
|
227
|
+
|
228
|
+
if (score > min_score) {
|
229
|
+
pq_insert(term_pq, scored_term_create(tb_get_term(tb), score));
|
230
|
+
if (pq_full(term_pq))
|
231
|
+
min_score = ((ScoredTerm *)pq_top(term_pq))->score;
|
232
|
+
}
|
233
|
+
} while ((tb = te->next(te)) != NULL);
|
234
|
+
XFINALLY
|
235
|
+
te->close(te);
|
236
|
+
XENDTRY
|
234
237
|
}
|
235
238
|
free(prefix);
|
236
239
|
|
data/ext/q_match_all.c
CHANGED
@@ -9,9 +9,7 @@
|
|
9
9
|
|
10
10
|
char *maw_to_s(Weight *self)
|
11
11
|
{
|
12
|
-
|
13
|
-
dbl_to_s(dbuf, self->value);
|
14
|
-
return epstrdup("MatchAllWeight(%s)", strlen(dbuf), dbuf);
|
12
|
+
return strfmt("MatchAllWeight(%f)", self->value);
|
15
13
|
}
|
16
14
|
|
17
15
|
void maw_destroy(void *p)
|
@@ -28,7 +26,7 @@ Explanation *maw_explain(Weight *self, IndexReader *ir, int doc_num)
|
|
28
26
|
expl_add_detail(expl, expl_create(self->qnorm, estrdup("query_norm")));
|
29
27
|
} else {
|
30
28
|
expl = expl_create(self->value,
|
31
|
-
|
29
|
+
strfmt("MatchAllQuery: doc %d was deleted", doc_num));
|
32
30
|
}
|
33
31
|
|
34
32
|
return expl;
|
@@ -66,9 +64,7 @@ char *maq_to_s(Query *self, char *field)
|
|
66
64
|
if (self->boost == 1.0) {
|
67
65
|
return estrdup("MatchAll");
|
68
66
|
} else {
|
69
|
-
|
70
|
-
dbl_to_s(dbuf, self->boost);
|
71
|
-
return epstrdup("MatchAll^%s", strlen(dbuf), dbuf);
|
67
|
+
return strfmt("MatchAll^%f", self->boost);
|
72
68
|
}
|
73
69
|
}
|
74
70
|
|
data/ext/q_multi_phrase.c
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
#include <string.h>
|
2
2
|
#include "search.h"
|
3
3
|
|
4
|
+
static char * const FIELD_CHANGE_ERROR_MSG = "All phrase terms must be in the same field.";
|
5
|
+
|
4
6
|
/***************************************************************************
|
5
7
|
*
|
6
8
|
* MultiPhraseWeight
|
@@ -9,9 +11,7 @@
|
|
9
11
|
|
10
12
|
char *mphw_to_s(Weight *self)
|
11
13
|
{
|
12
|
-
|
13
|
-
dbl_to_s(dbuf, self->value);
|
14
|
-
return epstrdup("MultiPhraseWeight(%s)", strlen(dbuf), dbuf);
|
14
|
+
return strfmt("MultiPhraseWeight(%f)", self->value);
|
15
15
|
}
|
16
16
|
|
17
17
|
Scorer *mphw_scorer(Weight *self, IndexReader *ir)
|
@@ -63,9 +63,7 @@ Explanation *mphw_explain(Weight *self, IndexReader *ir, int doc_num)
|
|
63
63
|
int len = 0, pos = 0;
|
64
64
|
|
65
65
|
Explanation *expl = expl_create(0.0,
|
66
|
-
|
67
|
-
strlen(query_str) + 20,
|
68
|
-
query_str, doc_num));
|
66
|
+
strfmt("weight(%s in %d), product of:", query_str, doc_num));
|
69
67
|
|
70
68
|
for (i = 0; i < mphq->t_cnt; i++) {
|
71
69
|
for (j = 0; j < mphq->pt_cnt[i]; j++) {
|
@@ -84,14 +82,14 @@ Explanation *mphw_explain(Weight *self, IndexReader *ir, int doc_num)
|
|
84
82
|
doc_freqs[pos] = 0;
|
85
83
|
|
86
84
|
Explanation *idf_expl1 = expl_create(self->idf,
|
87
|
-
|
85
|
+
strfmt("idf(%s:<%s>)", mphq->field, doc_freqs));
|
88
86
|
Explanation *idf_expl2 = expl_create(self->idf,
|
89
|
-
|
87
|
+
strfmt("idf(%s:<%s>)", mphq->field, doc_freqs));
|
90
88
|
free(doc_freqs);
|
91
89
|
|
92
90
|
// explain query weight
|
93
91
|
Explanation *query_expl = expl_create(0.0,
|
94
|
-
|
92
|
+
strfmt("query_weight(%s), product of:", query_str));
|
95
93
|
|
96
94
|
if (self->query->boost != 1.0) {
|
97
95
|
expl_add_detail(query_expl, expl_create(self->query->boost, estrdup("boost")));
|
@@ -107,8 +105,7 @@ Explanation *mphw_explain(Weight *self, IndexReader *ir, int doc_num)
|
|
107
105
|
|
108
106
|
// explain field weight
|
109
107
|
Explanation *field_expl = expl_create(0.0,
|
110
|
-
|
111
|
-
strlen(query_str) + 20, query_str, doc_num));
|
108
|
+
strfmt("field_weight(%s in %d), product of:", query_str, doc_num));
|
112
109
|
free(query_str);
|
113
110
|
|
114
111
|
Scorer *scorer = self->scorer(self, ir);
|
@@ -122,8 +119,7 @@ Explanation *mphw_explain(Weight *self, IndexReader *ir, int doc_num)
|
|
122
119
|
? sim_decode_norm(self->similarity, field_norms[doc_num])
|
123
120
|
: 0.0;
|
124
121
|
Explanation *field_norm_expl = expl_create(field_norm,
|
125
|
-
|
126
|
-
strlen(mphq->field) + 20, mphq->field, doc_num));
|
122
|
+
strfmt("field_norm(field=%s, doc=%d)", mphq->field, doc_num));
|
127
123
|
|
128
124
|
expl_add_detail(field_expl, field_norm_expl);
|
129
125
|
|
@@ -202,7 +198,7 @@ void mphq_add_terms(Query *self, Term **terms, int t_cnt, int pos_inc)
|
|
202
198
|
position = mphq->positions[index - 1] + pos_inc;
|
203
199
|
for (i = 0; i < t_cnt; i++) {
|
204
200
|
if (strcmp(terms[i]->field, mphq->field) != 0) {
|
205
|
-
|
201
|
+
RAISE(ARG_ERROR, FIELD_CHANGE_ERROR_MSG);
|
206
202
|
}
|
207
203
|
}
|
208
204
|
}
|
data/ext/q_parser.c
CHANGED
@@ -1686,7 +1686,7 @@ int yyerror(QParser *qp, char const *msg)
|
|
1686
1686
|
{
|
1687
1687
|
if (!qp->handle_parse_errors) {
|
1688
1688
|
if (qp->clean_str) free(qp->qstr);
|
1689
|
-
|
1689
|
+
RAISE(PARSE_ERROR, (char *)msg);
|
1690
1690
|
}
|
1691
1691
|
return 0;
|
1692
1692
|
}
|
@@ -1811,8 +1811,35 @@ Query *get_fuzzy_q(QParser *qp, char *field, char *word, char *slop_str)
|
|
1811
1811
|
|
1812
1812
|
Query *get_wild_q(QParser *qp, char *field, char *pattern)
|
1813
1813
|
{
|
1814
|
+
Query *q;
|
1815
|
+
bool is_prefix = false;
|
1816
|
+
char *p;
|
1817
|
+
int len = strlen(pattern);
|
1818
|
+
|
1814
1819
|
if (qp->wild_lower) lower_str(pattern);
|
1815
|
-
|
1820
|
+
|
1821
|
+
/* simplify the wildcard query to a prefix query if possible. Basically a
|
1822
|
+
* prefix query is any wildcard query that has a '*' as the last character
|
1823
|
+
* and no other wildcard characters before it. */
|
1824
|
+
if (pattern[len-1] == '*') {
|
1825
|
+
is_prefix = true;
|
1826
|
+
for (p = &pattern[len-2]; p >= pattern; p--) {
|
1827
|
+
if (*p == '*' || *p == '?') {
|
1828
|
+
is_prefix = false;
|
1829
|
+
break;
|
1830
|
+
}
|
1831
|
+
}
|
1832
|
+
}
|
1833
|
+
|
1834
|
+
if (is_prefix) {
|
1835
|
+
/* chop off the '*' temporarily to create the query */
|
1836
|
+
pattern[len-1] = 0;
|
1837
|
+
q = prefixq_create(term_create(field, pattern));;
|
1838
|
+
pattern[len-1] = '*';
|
1839
|
+
} else {
|
1840
|
+
q = wcq_create(term_create(field, pattern));;
|
1841
|
+
}
|
1842
|
+
return q;
|
1816
1843
|
}
|
1817
1844
|
|
1818
1845
|
HashSet *add_field(QParser *qp, char *field)
|
data/ext/q_phrase.c
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
#include <string.h>
|
2
2
|
#include "search.h"
|
3
3
|
|
4
|
+
static char * const FIELD_CHANGE_ERROR_MSG = "Field illegally changed in the phrase";
|
5
|
+
|
4
6
|
/***************************************************************************
|
5
7
|
*
|
6
8
|
* PhraseWeight
|
@@ -52,9 +54,7 @@ Explanation *phw_explain(Weight *self, IndexReader *ir, int doc_num)
|
|
52
54
|
int len = 0, pos = 0;
|
53
55
|
|
54
56
|
Explanation *expl = expl_create(0.0,
|
55
|
-
|
56
|
-
strlen(query_str) + 20,
|
57
|
-
query_str, doc_num));
|
57
|
+
strfmt("weight(%s in %d), product of:", query_str, doc_num));
|
58
58
|
|
59
59
|
for (i = 0; i < phq->t_cnt; i++) {
|
60
60
|
len += strlen(phq->terms[i]->text) + 30;
|
@@ -69,14 +69,14 @@ Explanation *phw_explain(Weight *self, IndexReader *ir, int doc_num)
|
|
69
69
|
doc_freqs[pos] = 0;
|
70
70
|
|
71
71
|
Explanation *idf_expl1 = expl_create(self->idf,
|
72
|
-
|
72
|
+
strfmt("idf(%s:<%s>)", phq->field, doc_freqs));
|
73
73
|
Explanation *idf_expl2 = expl_create(self->idf,
|
74
|
-
|
74
|
+
strfmt("idf(%s:<%s>)", phq->field, doc_freqs));
|
75
75
|
free(doc_freqs);
|
76
76
|
|
77
77
|
// explain query weight
|
78
78
|
Explanation *query_expl = expl_create(0.0,
|
79
|
-
|
79
|
+
strfmt("query_weight(%s), product of:", query_str));
|
80
80
|
|
81
81
|
if (self->query->boost != 1.0) {
|
82
82
|
expl_add_detail(query_expl, expl_create(self->query->boost, estrdup("boost")));
|
@@ -92,8 +92,7 @@ Explanation *phw_explain(Weight *self, IndexReader *ir, int doc_num)
|
|
92
92
|
|
93
93
|
// explain field weight
|
94
94
|
Explanation *field_expl = expl_create(0.0,
|
95
|
-
|
96
|
-
strlen(query_str) + 20, query_str, doc_num));
|
95
|
+
strfmt("field_weight(%s in %d), product of:", query_str, doc_num));
|
97
96
|
free(query_str);
|
98
97
|
|
99
98
|
Scorer *scorer = self->scorer(self, ir);
|
@@ -107,8 +106,7 @@ Explanation *phw_explain(Weight *self, IndexReader *ir, int doc_num)
|
|
107
106
|
? sim_decode_norm(self->similarity, field_norms[doc_num])
|
108
107
|
: 0.0;
|
109
108
|
Explanation *field_norm_expl = expl_create(field_norm,
|
110
|
-
|
111
|
-
strlen(phq->field) + 20, phq->field, doc_num));
|
109
|
+
strfmt("field_norm(field=%s, doc=%d)", phq->field, doc_num));
|
112
110
|
|
113
111
|
expl_add_detail(field_expl, field_norm_expl);
|
114
112
|
|
@@ -127,9 +125,7 @@ Explanation *phw_explain(Weight *self, IndexReader *ir, int doc_num)
|
|
127
125
|
|
128
126
|
char *phw_to_s(Weight *self)
|
129
127
|
{
|
130
|
-
|
131
|
-
dbl_to_s(dbuf, self->value);
|
132
|
-
return epstrdup("PhraseWeight(%s)", strlen(dbuf), dbuf);
|
128
|
+
return strfmt("PhraseWeight(%f)", self->value);
|
133
129
|
}
|
134
130
|
|
135
131
|
Weight *phw_create(Query *query, Searcher *searcher)
|
@@ -216,9 +212,8 @@ char *phq_to_s(Query *self, char *field)
|
|
216
212
|
buf_index += strlen(buffer + buf_index);
|
217
213
|
}
|
218
214
|
if (self->boost != 1.0) {
|
219
|
-
|
220
|
-
dbl_to_s(
|
221
|
-
sprintf(buffer + buf_index, "^%s", dbuf);
|
215
|
+
buffer[buf_index++] = '^';
|
216
|
+
dbl_to_s(buffer + buf_index, self->boost);
|
222
217
|
}
|
223
218
|
return buffer;
|
224
219
|
}
|
@@ -270,7 +265,7 @@ void phq_add_term(Query *self, Term *term, int pos_inc)
|
|
270
265
|
} else {
|
271
266
|
position = phq->positions[index - 1] + pos_inc;
|
272
267
|
if (strcmp(term->field, phq->field) != 0) {
|
273
|
-
|
268
|
+
RAISE(ARG_ERROR, FIELD_CHANGE_ERROR_MSG);
|
274
269
|
}
|
275
270
|
}
|
276
271
|
phq->terms[index] = term;
|
@@ -356,7 +351,7 @@ bool pp_first_position(PhrasePosition *self)
|
|
356
351
|
|
357
352
|
char *pp_to_s(PhrasePosition *self)
|
358
353
|
{
|
359
|
-
return
|
354
|
+
return strfmt("pp->(doc => %d, position => %d)", self->doc, self->position);
|
360
355
|
}
|
361
356
|
|
362
357
|
inline int pp_cmp(const void *const p1, const void *const p2)
|
@@ -491,15 +486,13 @@ bool phsc_skip_to(Scorer *self, int doc_num)
|
|
491
486
|
|
492
487
|
Explanation *phsc_explain(Scorer *self, int doc_num)
|
493
488
|
{
|
494
|
-
char dbuf[32];
|
495
489
|
GET_PHSC;
|
496
490
|
while (phsc_next(self) && self->doc < doc_num)
|
497
491
|
;
|
498
492
|
|
499
493
|
float phrase_freq = (self->doc == doc_num) ? phsc->freq : 0.0;
|
500
|
-
dbl_to_s(dbuf, phrase_freq);
|
501
494
|
return expl_create(sim_tf(self->similarity, phrase_freq),
|
502
|
-
|
495
|
+
strfmt("tf(phrase_freq=%f)", phrase_freq));
|
503
496
|
}
|
504
497
|
|
505
498
|
void phsc_destroy(void *p)
|