ferret 0.9.6 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/MIT-LICENSE +1 -1
- data/README +12 -24
- data/Rakefile +38 -54
- data/TODO +14 -17
- data/ext/analysis.c +982 -823
- data/ext/analysis.h +133 -76
- data/ext/array.c +96 -58
- data/ext/array.h +40 -13
- data/ext/bitvector.c +476 -118
- data/ext/bitvector.h +264 -22
- data/ext/compound_io.c +217 -229
- data/ext/defines.h +49 -0
- data/ext/document.c +107 -317
- data/ext/document.h +31 -65
- data/ext/except.c +81 -36
- data/ext/except.h +117 -55
- data/ext/extconf.rb +2 -9
- data/ext/ferret.c +211 -104
- data/ext/ferret.h +22 -11
- data/ext/filter.c +97 -82
- data/ext/fs_store.c +348 -367
- data/ext/global.c +226 -188
- data/ext/global.h +44 -26
- data/ext/hash.c +474 -391
- data/ext/hash.h +441 -68
- data/ext/hashset.c +124 -96
- data/ext/hashset.h +169 -20
- data/ext/helper.c +56 -5
- data/ext/helper.h +7 -0
- data/ext/inc/lang.h +29 -49
- data/ext/inc/threading.h +31 -0
- data/ext/ind.c +288 -278
- data/ext/ind.h +68 -0
- data/ext/index.c +5688 -0
- data/ext/index.h +663 -616
- data/ext/lang.h +29 -49
- data/ext/libstemmer.c +3 -3
- data/ext/mem_pool.c +84 -0
- data/ext/mem_pool.h +35 -0
- data/ext/posh.c +1006 -0
- data/ext/posh.h +1007 -0
- data/ext/priorityqueue.c +117 -194
- data/ext/priorityqueue.h +135 -39
- data/ext/q_boolean.c +1305 -1108
- data/ext/q_const_score.c +106 -93
- data/ext/q_filtered_query.c +138 -135
- data/ext/q_fuzzy.c +206 -242
- data/ext/q_match_all.c +94 -80
- data/ext/q_multi_term.c +663 -0
- data/ext/q_parser.c +667 -593
- data/ext/q_phrase.c +992 -555
- data/ext/q_prefix.c +72 -61
- data/ext/q_range.c +235 -210
- data/ext/q_span.c +1480 -1166
- data/ext/q_term.c +273 -246
- data/ext/q_wildcard.c +127 -114
- data/ext/r_analysis.c +1720 -711
- data/ext/r_index.c +3049 -0
- data/ext/r_qparser.c +433 -146
- data/ext/r_search.c +2934 -1993
- data/ext/r_store.c +372 -143
- data/ext/r_utils.c +941 -0
- data/ext/ram_store.c +330 -326
- data/ext/search.c +1291 -668
- data/ext/search.h +403 -702
- data/ext/similarity.c +91 -113
- data/ext/similarity.h +45 -30
- data/ext/sort.c +721 -484
- data/ext/stopwords.c +361 -273
- data/ext/store.c +556 -58
- data/ext/store.h +706 -126
- data/ext/tags +3578 -2780
- data/ext/term_vectors.c +352 -0
- data/ext/threading.h +31 -0
- data/ext/win32.h +54 -0
- data/lib/ferret.rb +5 -17
- data/lib/ferret/document.rb +130 -2
- data/lib/ferret/index.rb +577 -26
- data/lib/ferret/number_tools.rb +157 -0
- data/lib/ferret_version.rb +3 -0
- data/test/test_helper.rb +5 -13
- data/test/unit/analysis/tc_analyzer.rb +513 -1
- data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
- data/test/unit/index/tc_index.rb +183 -240
- data/test/unit/index/tc_index_reader.rb +312 -479
- data/test/unit/index/tc_index_writer.rb +397 -13
- data/test/unit/index/th_doc.rb +269 -206
- data/test/unit/query_parser/tc_query_parser.rb +40 -33
- data/test/unit/search/tc_filter.rb +59 -71
- data/test/unit/search/tc_fuzzy_query.rb +24 -16
- data/test/unit/search/tc_index_searcher.rb +23 -201
- data/test/unit/search/tc_multi_searcher.rb +78 -226
- data/test/unit/search/tc_search_and_sort.rb +93 -81
- data/test/unit/search/tc_sort.rb +23 -23
- data/test/unit/search/tc_sort_field.rb +7 -7
- data/test/unit/search/tc_spans.rb +51 -47
- data/test/unit/search/tm_searcher.rb +339 -0
- data/test/unit/store/tc_fs_store.rb +1 -1
- data/test/unit/store/tm_store_lock.rb +3 -3
- data/test/unit/tc_document.rb +81 -0
- data/test/unit/ts_analysis.rb +1 -1
- data/test/unit/ts_utils.rb +1 -1
- data/test/unit/utils/tc_bit_vector.rb +288 -0
- data/test/unit/utils/tc_number_tools.rb +117 -0
- data/test/unit/utils/tc_priority_queue.rb +106 -0
- metadata +140 -301
- data/CHANGELOG +0 -9
- data/ext/dummy.exe +0 -0
- data/ext/field.c +0 -408
- data/ext/frtio.h +0 -13
- data/ext/inc/except.h +0 -90
- data/ext/index_io.c +0 -382
- data/ext/index_rw.c +0 -2658
- data/ext/lang.c +0 -41
- data/ext/nix_io.c +0 -134
- data/ext/q_multi_phrase.c +0 -380
- data/ext/r_doc.c +0 -582
- data/ext/r_index_io.c +0 -1021
- data/ext/r_term.c +0 -219
- data/ext/term.c +0 -820
- data/ext/termdocs.c +0 -611
- data/ext/vector.c +0 -637
- data/ext/w32_io.c +0 -150
- data/lib/ferret/analysis.rb +0 -11
- data/lib/ferret/analysis/analyzers.rb +0 -112
- data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
- data/lib/ferret/analysis/token.rb +0 -100
- data/lib/ferret/analysis/token_filters.rb +0 -86
- data/lib/ferret/analysis/token_stream.rb +0 -26
- data/lib/ferret/analysis/tokenizers.rb +0 -112
- data/lib/ferret/analysis/word_list_loader.rb +0 -27
- data/lib/ferret/document/document.rb +0 -152
- data/lib/ferret/document/field.rb +0 -312
- data/lib/ferret/index/compound_file_io.rb +0 -338
- data/lib/ferret/index/document_writer.rb +0 -289
- data/lib/ferret/index/field_infos.rb +0 -279
- data/lib/ferret/index/fields_io.rb +0 -181
- data/lib/ferret/index/index.rb +0 -675
- data/lib/ferret/index/index_file_names.rb +0 -33
- data/lib/ferret/index/index_reader.rb +0 -503
- data/lib/ferret/index/index_writer.rb +0 -534
- data/lib/ferret/index/multi_reader.rb +0 -377
- data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
- data/lib/ferret/index/segment_infos.rb +0 -130
- data/lib/ferret/index/segment_merge_info.rb +0 -49
- data/lib/ferret/index/segment_merge_queue.rb +0 -16
- data/lib/ferret/index/segment_merger.rb +0 -358
- data/lib/ferret/index/segment_reader.rb +0 -412
- data/lib/ferret/index/segment_term_enum.rb +0 -169
- data/lib/ferret/index/segment_term_vector.rb +0 -58
- data/lib/ferret/index/term.rb +0 -53
- data/lib/ferret/index/term_buffer.rb +0 -83
- data/lib/ferret/index/term_doc_enum.rb +0 -291
- data/lib/ferret/index/term_enum.rb +0 -52
- data/lib/ferret/index/term_info.rb +0 -37
- data/lib/ferret/index/term_infos_io.rb +0 -321
- data/lib/ferret/index/term_vector_offset_info.rb +0 -20
- data/lib/ferret/index/term_vectors_io.rb +0 -553
- data/lib/ferret/query_parser.rb +0 -312
- data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
- data/lib/ferret/search.rb +0 -50
- data/lib/ferret/search/boolean_clause.rb +0 -100
- data/lib/ferret/search/boolean_query.rb +0 -299
- data/lib/ferret/search/boolean_scorer.rb +0 -294
- data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
- data/lib/ferret/search/conjunction_scorer.rb +0 -99
- data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
- data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
- data/lib/ferret/search/explanation.rb +0 -41
- data/lib/ferret/search/field_cache.rb +0 -215
- data/lib/ferret/search/field_doc.rb +0 -31
- data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
- data/lib/ferret/search/filter.rb +0 -11
- data/lib/ferret/search/filtered_query.rb +0 -130
- data/lib/ferret/search/filtered_term_enum.rb +0 -79
- data/lib/ferret/search/fuzzy_query.rb +0 -154
- data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
- data/lib/ferret/search/hit_collector.rb +0 -34
- data/lib/ferret/search/hit_queue.rb +0 -11
- data/lib/ferret/search/index_searcher.rb +0 -200
- data/lib/ferret/search/match_all_query.rb +0 -104
- data/lib/ferret/search/multi_phrase_query.rb +0 -216
- data/lib/ferret/search/multi_searcher.rb +0 -261
- data/lib/ferret/search/multi_term_query.rb +0 -65
- data/lib/ferret/search/non_matching_scorer.rb +0 -22
- data/lib/ferret/search/phrase_positions.rb +0 -55
- data/lib/ferret/search/phrase_query.rb +0 -214
- data/lib/ferret/search/phrase_scorer.rb +0 -152
- data/lib/ferret/search/prefix_query.rb +0 -54
- data/lib/ferret/search/query.rb +0 -140
- data/lib/ferret/search/query_filter.rb +0 -51
- data/lib/ferret/search/range_filter.rb +0 -103
- data/lib/ferret/search/range_query.rb +0 -139
- data/lib/ferret/search/req_excl_scorer.rb +0 -125
- data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
- data/lib/ferret/search/score_doc.rb +0 -38
- data/lib/ferret/search/score_doc_comparator.rb +0 -114
- data/lib/ferret/search/scorer.rb +0 -91
- data/lib/ferret/search/similarity.rb +0 -278
- data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
- data/lib/ferret/search/sort.rb +0 -112
- data/lib/ferret/search/sort_comparator.rb +0 -60
- data/lib/ferret/search/sort_field.rb +0 -91
- data/lib/ferret/search/spans.rb +0 -12
- data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
- data/lib/ferret/search/spans/span_first_query.rb +0 -79
- data/lib/ferret/search/spans/span_near_query.rb +0 -108
- data/lib/ferret/search/spans/span_not_query.rb +0 -130
- data/lib/ferret/search/spans/span_or_query.rb +0 -176
- data/lib/ferret/search/spans/span_query.rb +0 -25
- data/lib/ferret/search/spans/span_scorer.rb +0 -74
- data/lib/ferret/search/spans/span_term_query.rb +0 -105
- data/lib/ferret/search/spans/span_weight.rb +0 -84
- data/lib/ferret/search/spans/spans_enum.rb +0 -44
- data/lib/ferret/search/term_query.rb +0 -128
- data/lib/ferret/search/term_scorer.rb +0 -183
- data/lib/ferret/search/top_docs.rb +0 -36
- data/lib/ferret/search/top_field_docs.rb +0 -17
- data/lib/ferret/search/weight.rb +0 -54
- data/lib/ferret/search/wildcard_query.rb +0 -26
- data/lib/ferret/search/wildcard_term_enum.rb +0 -61
- data/lib/ferret/stemmers.rb +0 -1
- data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
- data/lib/ferret/store.rb +0 -5
- data/lib/ferret/store/buffered_index_io.rb +0 -190
- data/lib/ferret/store/directory.rb +0 -141
- data/lib/ferret/store/fs_store.rb +0 -381
- data/lib/ferret/store/index_io.rb +0 -245
- data/lib/ferret/store/ram_store.rb +0 -286
- data/lib/ferret/utils.rb +0 -8
- data/lib/ferret/utils/bit_vector.rb +0 -123
- data/lib/ferret/utils/date_tools.rb +0 -138
- data/lib/ferret/utils/number_tools.rb +0 -91
- data/lib/ferret/utils/parameter.rb +0 -41
- data/lib/ferret/utils/priority_queue.rb +0 -120
- data/lib/ferret/utils/string_helper.rb +0 -47
- data/lib/ferret/utils/thread_local.rb +0 -28
- data/lib/ferret/utils/weak_key_hash.rb +0 -60
- data/lib/rferret.rb +0 -37
- data/rake_utils/code_statistics.rb +0 -106
- data/test/benchmark/tb_ram_store.rb +0 -76
- data/test/benchmark/tb_rw_vint.rb +0 -26
- data/test/functional/thread_safety_index_test.rb +0 -81
- data/test/functional/thread_safety_test.rb +0 -137
- data/test/longrunning/tc_numbertools.rb +0 -60
- data/test/longrunning/tm_store.rb +0 -19
- data/test/unit/analysis/ctc_analyzer.rb +0 -532
- data/test/unit/analysis/data/wordfile +0 -6
- data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
- data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
- data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
- data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
- data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
- data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
- data/test/unit/analysis/rtc_stop_filter.rb +0 -14
- data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
- data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
- data/test/unit/analysis/tc_token.rb +0 -25
- data/test/unit/document/rtc_field.rb +0 -28
- data/test/unit/document/tc_document.rb +0 -47
- data/test/unit/document/tc_field.rb +0 -98
- data/test/unit/index/rtc_compound_file_io.rb +0 -107
- data/test/unit/index/rtc_field_infos.rb +0 -127
- data/test/unit/index/rtc_fields_io.rb +0 -167
- data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
- data/test/unit/index/rtc_segment_infos.rb +0 -74
- data/test/unit/index/rtc_segment_term_docs.rb +0 -17
- data/test/unit/index/rtc_segment_term_enum.rb +0 -60
- data/test/unit/index/rtc_segment_term_vector.rb +0 -71
- data/test/unit/index/rtc_term_buffer.rb +0 -57
- data/test/unit/index/rtc_term_info.rb +0 -19
- data/test/unit/index/rtc_term_infos_io.rb +0 -192
- data/test/unit/index/rtc_term_vectors_io.rb +0 -108
- data/test/unit/index/tc_term.rb +0 -27
- data/test/unit/index/tc_term_voi.rb +0 -18
- data/test/unit/search/rtc_similarity.rb +0 -37
- data/test/unit/search/rtc_sort_field.rb +0 -14
- data/test/unit/search/tc_multi_searcher2.rb +0 -126
- data/test/unit/store/rtc_fs_store.rb +0 -62
- data/test/unit/store/rtc_ram_store.rb +0 -15
- data/test/unit/store/rtm_store.rb +0 -150
- data/test/unit/store/rtm_store_lock.rb +0 -2
- data/test/unit/ts_document.rb +0 -2
- data/test/unit/utils/rtc_bit_vector.rb +0 -73
- data/test/unit/utils/rtc_date_tools.rb +0 -50
- data/test/unit/utils/rtc_number_tools.rb +0 -59
- data/test/unit/utils/rtc_parameter.rb +0 -40
- data/test/unit/utils/rtc_priority_queue.rb +0 -62
- data/test/unit/utils/rtc_string_helper.rb +0 -21
- data/test/unit/utils/rtc_thread.rb +0 -61
- data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
- data/test/utils/number_to_spoken.rb +0 -132
data/ext/q_span.c
CHANGED
@@ -1,807 +1,1038 @@
|
|
1
1
|
#include <string.h>
|
2
|
+
#include <limits.h>
|
2
3
|
#include "search.h"
|
4
|
+
#include "hashset.h"
|
3
5
|
|
4
|
-
|
5
|
-
*
|
6
|
-
* NearSpanEnum
|
7
|
-
*
|
8
|
-
*****************************************************************************/
|
6
|
+
#define CLAUSE_INIT_CAPA 4
|
9
7
|
|
10
8
|
/*****************************************************************************
|
11
9
|
*
|
12
|
-
*
|
10
|
+
* SpanQuery
|
13
11
|
*
|
14
12
|
*****************************************************************************/
|
15
13
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
Explanation *idf_expl1;
|
20
|
-
Explanation *idf_expl2;
|
21
|
-
Explanation *query_expl;
|
22
|
-
Explanation *qnorm_expl;
|
23
|
-
Explanation *field_expl;
|
24
|
-
Explanation *tf_expl;
|
25
|
-
Scorer *scorer;
|
26
|
-
uchar *field_norms;
|
27
|
-
float field_norm;
|
28
|
-
Explanation *field_norm_expl;
|
29
|
-
|
30
|
-
char *query_str = self->query->to_s(self->query, "");
|
31
|
-
HashSet *terms = (HashSet *)self->data;
|
32
|
-
char *field = ((SpanQuery *)self->query->data)->field;
|
33
|
-
char *doc_freqs = NULL;
|
34
|
-
size_t df_i = 0;
|
35
|
-
int i;
|
36
|
-
Term *t;
|
37
|
-
|
38
|
-
|
39
|
-
for (i = 0; i < terms->size; i++) {
|
40
|
-
t = (Term *)terms->elems[i];
|
41
|
-
REALLOC_N(doc_freqs, char, df_i + strlen(t->text) + 23);
|
42
|
-
sprintf(doc_freqs + df_i, "%s=%d, ", t->text, ir->doc_freq(ir, t));
|
43
|
-
df_i = strlen(doc_freqs);
|
44
|
-
}
|
45
|
-
/* remove the ',' at the end of the string if it exists */
|
46
|
-
if (terms->size > 0) {
|
47
|
-
df_i -= 2;
|
48
|
-
doc_freqs[df_i] = '\0';
|
49
|
-
} else {
|
50
|
-
doc_freqs = "";
|
51
|
-
}
|
14
|
+
/***************************************************************************
|
15
|
+
* SpanQuery
|
16
|
+
***************************************************************************/
|
52
17
|
|
53
|
-
|
54
|
-
strfmt("weight(%s in %d), product of:", query_str, target));
|
18
|
+
#define SpQ(query) ((SpanQuery *)(query))
|
55
19
|
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
idf_expl2 = expl_create(self->idf,
|
61
|
-
strfmt("idf(%s: %s)", field, doc_freqs));
|
62
|
-
if (terms->size > 0) {
|
63
|
-
free(doc_freqs); /* only free if allocated */
|
64
|
-
}
|
20
|
+
static ulong spanq_hash(Query *self)
|
21
|
+
{
|
22
|
+
return str_hash(SpQ(self)->field);
|
23
|
+
}
|
65
24
|
|
66
|
-
|
67
|
-
|
68
|
-
|
25
|
+
static int spanq_eq(Query *self, Query *o)
|
26
|
+
{
|
27
|
+
return strcmp(SpQ(self)->field, SpQ(o)->field) == 0;
|
28
|
+
}
|
69
29
|
|
70
|
-
|
71
|
-
|
72
|
-
|
30
|
+
static void spanq_destroy_i(Query *self)
|
31
|
+
{
|
32
|
+
q_destroy_i(self);
|
33
|
+
}
|
73
34
|
|
74
|
-
|
35
|
+
static MatchVector *mv_to_term_mv(MatchVector *term_mv, MatchVector *full_mv,
|
36
|
+
HashSet *terms, TermVector *tv)
|
37
|
+
{
|
38
|
+
int i;
|
39
|
+
for (i = 0; i < terms->size; i++) {
|
40
|
+
char *term = (char *)terms->elems[i];
|
41
|
+
TVTerm *tv_term = tv_get_tv_term(tv, term);
|
42
|
+
if (tv_term) {
|
43
|
+
int j;
|
44
|
+
int m_idx = 0;
|
45
|
+
for (j = 0; j < tv_term->freq; j++) {
|
46
|
+
int pos = tv_term->positions[j];
|
47
|
+
for (; m_idx < full_mv->size; m_idx++) {
|
48
|
+
if (pos <= full_mv->matches[m_idx].end) {
|
49
|
+
if (pos >= full_mv->matches[m_idx].start) {
|
50
|
+
matchv_add(term_mv, pos, pos);
|
51
|
+
}
|
52
|
+
break;
|
53
|
+
}
|
54
|
+
}
|
55
|
+
}
|
56
|
+
}
|
57
|
+
}
|
75
58
|
|
76
|
-
|
77
|
-
|
59
|
+
return term_mv;
|
60
|
+
}
|
78
61
|
|
79
|
-
|
62
|
+
/***************************************************************************
|
63
|
+
* TVTermDocEnum
|
64
|
+
* dummy TermDocEnum used by the highlighter to find matches
|
65
|
+
***************************************************************************/
|
80
66
|
|
81
|
-
|
67
|
+
#define TV_TDE(tde) ((TVTermDocEnum *)(tde))
|
82
68
|
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
69
|
+
typedef struct TVTermDocEnum
|
70
|
+
{
|
71
|
+
TermDocEnum super;
|
72
|
+
int doc;
|
73
|
+
int index;
|
74
|
+
int freq;
|
75
|
+
int *positions;
|
76
|
+
TermVector *tv;
|
77
|
+
} TVTermDocEnum;
|
87
78
|
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
79
|
+
static void tv_tde_seek(TermDocEnum *tde, int field_num, const char *term)
|
80
|
+
{
|
81
|
+
TVTermDocEnum *tv_tde = TV_TDE(tde);
|
82
|
+
TVTerm *tv_term = tv_get_tv_term(tv_tde->tv, term);
|
83
|
+
(void)field_num;
|
84
|
+
if (tv_term) {
|
85
|
+
tv_tde->doc = -1;
|
86
|
+
tv_tde->index = 0;
|
87
|
+
tv_tde->freq = tv_term->freq;
|
88
|
+
tv_tde->positions = tv_term->positions;
|
89
|
+
}
|
90
|
+
else {
|
91
|
+
tv_tde->doc = INT_MAX;
|
92
|
+
}
|
93
|
+
}
|
94
|
+
|
95
|
+
static bool tv_tde_next(TermDocEnum *tde)
|
96
|
+
{
|
97
|
+
if (TV_TDE(tde)->doc == -1) {
|
98
|
+
TV_TDE(tde)->doc = 0;
|
99
|
+
return true;
|
100
|
+
}
|
101
|
+
else {
|
102
|
+
TV_TDE(tde)->doc = INT_MAX;
|
103
|
+
return false;
|
104
|
+
}
|
105
|
+
}
|
93
106
|
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
107
|
+
static bool tv_tde_skip_to(TermDocEnum *tde, int doc_num)
|
108
|
+
{
|
109
|
+
if (doc_num == 0) {
|
110
|
+
TV_TDE(tde)->doc = 0;
|
111
|
+
return true;
|
112
|
+
}
|
113
|
+
else {
|
114
|
+
TV_TDE(tde)->doc = INT_MAX;
|
115
|
+
return false;
|
116
|
+
}
|
117
|
+
}
|
118
|
+
|
119
|
+
static int tv_tde_next_position(TermDocEnum *tde)
|
120
|
+
{
|
121
|
+
return TV_TDE(tde)->positions[TV_TDE(tde)->index++];
|
122
|
+
}
|
123
|
+
|
124
|
+
static int tv_tde_freq(TermDocEnum *tde)
|
125
|
+
{
|
126
|
+
return TV_TDE(tde)->freq;
|
127
|
+
}
|
128
|
+
|
129
|
+
static int tv_tde_doc_num(TermDocEnum *tde)
|
130
|
+
{
|
131
|
+
return TV_TDE(tde)->doc;
|
132
|
+
}
|
101
133
|
|
102
|
-
|
134
|
+
static TermDocEnum *spanq_ir_term_positions(IndexReader *ir)
|
135
|
+
{
|
136
|
+
TVTermDocEnum *tv_tde = ALLOC(TVTermDocEnum);
|
137
|
+
TermDocEnum *tde = (TermDocEnum *)tv_tde;
|
138
|
+
tv_tde->tv = (TermVector *)ir->store;
|
139
|
+
tde->seek = &tv_tde_seek;
|
140
|
+
tde->doc_num = &tv_tde_doc_num;
|
141
|
+
tde->freq = &tv_tde_freq;
|
142
|
+
tde->next = &tv_tde_next;
|
143
|
+
tde->skip_to = &tv_tde_skip_to;
|
144
|
+
tde->next_position = &tv_tde_next_position;
|
145
|
+
tde->close = (void (*)(TermDocEnum *tde))&free;
|
146
|
+
|
147
|
+
return tde;
|
148
|
+
}
|
103
149
|
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
150
|
+
static MatchVector *spanq_get_matchv_i(Query *self, MatchVector *mv,
|
151
|
+
TermVector *tv)
|
152
|
+
{
|
153
|
+
if (strcmp(SpQ(self)->field, tv->field) == 0) {
|
154
|
+
SpanEnum *sp_enum;
|
155
|
+
IndexReader *ir = ALLOC(IndexReader);
|
156
|
+
MatchVector *full_mv = matchv_new();
|
157
|
+
HashSet *terms = SpQ(self)->get_terms(self);
|
158
|
+
ir->fis = fis_new(0, 0, 0);
|
159
|
+
fis_add_field(ir->fis, fi_new(tv->field, 0, 0, 0));
|
160
|
+
ir->store = (Store *)tv;
|
161
|
+
ir->term_positions = &spanq_ir_term_positions;
|
162
|
+
sp_enum = SpQ(self)->get_spans(self, ir);
|
163
|
+
while (sp_enum->next(sp_enum)) {
|
164
|
+
matchv_add(full_mv,
|
165
|
+
sp_enum->start(sp_enum),
|
166
|
+
sp_enum->end(sp_enum) - 1);
|
167
|
+
}
|
168
|
+
sp_enum->destroy(sp_enum);
|
169
|
+
|
170
|
+
fis_deref(ir->fis);
|
171
|
+
free(ir);
|
172
|
+
|
173
|
+
matchv_compact(full_mv);
|
174
|
+
mv_to_term_mv(mv, full_mv, terms, tv);
|
175
|
+
matchv_destroy(full_mv);
|
176
|
+
hs_destroy(terms);
|
177
|
+
}
|
178
|
+
return mv;
|
113
179
|
}
|
114
180
|
|
115
|
-
|
181
|
+
/***************************************************************************
|
182
|
+
*
|
183
|
+
* SpanScorer
|
184
|
+
*
|
185
|
+
***************************************************************************/
|
186
|
+
|
187
|
+
#define SpSc(scorer) ((SpanScorer *)(scorer))
|
188
|
+
typedef struct SpanScorer
|
189
|
+
{
|
190
|
+
Scorer super;
|
191
|
+
IndexReader *ir;
|
192
|
+
SpanEnum *spans;
|
193
|
+
Similarity *sim;
|
194
|
+
uchar *norms;
|
195
|
+
Weight *weight;
|
196
|
+
float value;
|
197
|
+
float freq;
|
198
|
+
bool first_time : 1;
|
199
|
+
bool more : 1;
|
200
|
+
} SpanScorer;
|
201
|
+
|
202
|
+
static float spansc_score(Scorer *self)
|
116
203
|
{
|
117
|
-
|
204
|
+
SpanScorer *spansc = SpSc(self);
|
205
|
+
float raw = sim_tf(spansc->sim, spansc->freq) * spansc->value;
|
206
|
+
|
207
|
+
/* normalize */
|
208
|
+
return raw * sim_decode_norm(self->similarity, spansc->norms[self->doc]);
|
118
209
|
}
|
119
210
|
|
120
|
-
|
211
|
+
static bool spansc_next(Scorer *self)
|
121
212
|
{
|
122
|
-
|
123
|
-
|
213
|
+
SpanScorer *spansc = SpSc(self);
|
214
|
+
SpanEnum *se = spansc->spans;
|
215
|
+
int match_length;
|
216
|
+
|
217
|
+
if (spansc->first_time) {
|
218
|
+
spansc->more = se->next(se);
|
219
|
+
spansc->first_time = false;
|
220
|
+
}
|
221
|
+
|
222
|
+
if (!spansc->more) {
|
223
|
+
return false;
|
224
|
+
}
|
225
|
+
|
226
|
+
spansc->freq = 0.0;
|
227
|
+
self->doc = se->doc(se);
|
228
|
+
|
229
|
+
while (spansc->more && (self->doc == se->doc(se))) {
|
230
|
+
match_length = se->end(se) - se->start(se);
|
231
|
+
spansc->freq += sim_sloppy_freq(spansc->sim, match_length);
|
232
|
+
spansc->more = se->next(se);
|
233
|
+
}
|
234
|
+
|
235
|
+
return (spansc->more || (spansc->freq != 0.0));
|
124
236
|
}
|
125
237
|
|
126
|
-
|
238
|
+
static bool spansc_skip_to(Scorer *self, int target)
|
127
239
|
{
|
128
|
-
|
129
|
-
|
130
|
-
HashSet *terms = spanq->get_terms(query);
|
240
|
+
SpanScorer *spansc = SpSc(self);
|
241
|
+
SpanEnum *se = spansc->spans;
|
131
242
|
|
132
|
-
|
133
|
-
self->scorer = &spansc_create;
|
134
|
-
self->explain = &spanw_explain;
|
135
|
-
self->to_s = &spanw_to_s;
|
136
|
-
self->destroy = &spanw_destroy;
|
137
|
-
self->sum_of_squared_weights = &w_sum_of_squared_weights;
|
243
|
+
spansc->more = se->skip_to(se, target);
|
138
244
|
|
139
|
-
|
245
|
+
if (!spansc->more) {
|
246
|
+
return false;
|
247
|
+
}
|
140
248
|
|
141
|
-
|
142
|
-
|
249
|
+
spansc->freq = 0.0;
|
250
|
+
self->doc = se->doc(se);
|
143
251
|
|
144
|
-
|
252
|
+
while (spansc->more && (se->doc(se) == target)) {
|
253
|
+
spansc->freq += sim_sloppy_freq(spansc->sim, se->end(se) - se->start(se));
|
254
|
+
spansc->more = se->next(se);
|
255
|
+
}
|
256
|
+
|
257
|
+
return (spansc->more || (spansc->freq != 0.0));
|
258
|
+
}
|
259
|
+
|
260
|
+
static Explanation *spansc_explain(Scorer *self, int target)
|
261
|
+
{
|
262
|
+
Explanation *tf_explanation;
|
263
|
+
SpanScorer *spansc = SpSc(self);
|
264
|
+
float phrase_freq;
|
265
|
+
self->skip_to(self, target);
|
266
|
+
phrase_freq = (self->doc == target) ? spansc->freq : (float)0.0;
|
267
|
+
|
268
|
+
tf_explanation = expl_new(sim_tf(self->similarity, phrase_freq),
|
269
|
+
"tf(phrase_freq(%f)", phrase_freq);
|
270
|
+
|
271
|
+
return tf_explanation;
|
145
272
|
}
|
146
273
|
|
274
|
+
static void spansc_destroy(Scorer *self)
|
275
|
+
{
|
276
|
+
SpanScorer *spansc = SpSc(self);
|
277
|
+
if (spansc->spans) {
|
278
|
+
spansc->spans->destroy(spansc->spans);
|
279
|
+
}
|
280
|
+
scorer_destroy_i(self);
|
281
|
+
}
|
282
|
+
|
283
|
+
Scorer *spansc_new(Weight *weight, IndexReader *ir)
|
284
|
+
{
|
285
|
+
Scorer *self = NULL;
|
286
|
+
const int field_num = fis_get_field_num(ir->fis, SpQ(weight->query)->field);
|
287
|
+
if (field_num >= 0) {
|
288
|
+
Query *spanq = weight->query;
|
289
|
+
self = scorer_new(SpanScorer, weight->similarity);
|
290
|
+
|
291
|
+
SpSc(self)->first_time = true;
|
292
|
+
SpSc(self)->more = true;
|
293
|
+
SpSc(self)->spans = SpQ(spanq)->get_spans(spanq, ir);
|
294
|
+
SpSc(self)->sim = weight->similarity;
|
295
|
+
SpSc(self)->norms = ir->get_norms(ir, field_num);
|
296
|
+
SpSc(self)->weight = weight;
|
297
|
+
SpSc(self)->value = weight->value;
|
298
|
+
SpSc(self)->freq = 0.0;
|
299
|
+
|
300
|
+
self->score = &spansc_score;
|
301
|
+
self->next = &spansc_next;
|
302
|
+
self->skip_to = &spansc_skip_to;
|
303
|
+
self->explain = &spansc_explain;
|
304
|
+
self->destroy = &spansc_destroy;
|
305
|
+
}
|
306
|
+
return self;
|
307
|
+
}
|
147
308
|
|
148
309
|
/*****************************************************************************
|
149
|
-
*
|
150
310
|
* SpanTermEnum
|
151
|
-
*
|
152
311
|
*****************************************************************************/
|
153
312
|
|
154
|
-
|
313
|
+
#define SpTEn(span_enum) ((SpanTermEnum *)(span_enum))
|
314
|
+
#define SpTQ(query) ((SpanTermQuery *)(query))
|
315
|
+
|
316
|
+
typedef struct SpanTermEnum
|
317
|
+
{
|
318
|
+
SpanEnum super;
|
319
|
+
TermDocEnum *positions;
|
320
|
+
int position;
|
321
|
+
int doc;
|
322
|
+
int count;
|
323
|
+
int freq;
|
324
|
+
} SpanTermEnum;
|
325
|
+
|
326
|
+
|
327
|
+
static bool spante_next(SpanEnum *self)
|
155
328
|
{
|
156
|
-
|
157
|
-
|
329
|
+
SpanTermEnum *ste = SpTEn(self);
|
330
|
+
TermDocEnum *tde = ste->positions;
|
158
331
|
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
332
|
+
if (ste->count == ste->freq) {
|
333
|
+
if (! tde->next(tde)) {
|
334
|
+
ste->doc = INT_MAX;
|
335
|
+
return false;
|
336
|
+
}
|
337
|
+
ste->doc = tde->doc_num(tde);
|
338
|
+
ste->freq = tde->freq(tde);
|
339
|
+
ste->count = 0;
|
163
340
|
}
|
164
|
-
ste->
|
165
|
-
ste->
|
166
|
-
|
167
|
-
}
|
168
|
-
ste->position = tde->next_position(tde);
|
169
|
-
ste->count++;
|
170
|
-
return true;
|
341
|
+
ste->position = tde->next_position(tde);
|
342
|
+
ste->count++;
|
343
|
+
return true;
|
171
344
|
}
|
172
345
|
|
173
|
-
bool spante_skip_to(SpanEnum *self, int target)
|
346
|
+
static bool spante_skip_to(SpanEnum *self, int target)
|
174
347
|
{
|
175
|
-
|
176
|
-
|
348
|
+
SpanTermEnum *ste = SpTEn(self);
|
349
|
+
TermDocEnum *tde = ste->positions;
|
177
350
|
|
178
|
-
|
179
|
-
|
351
|
+
/* are we already at the correct position? */
|
352
|
+
if (ste->doc >= target) {
|
353
|
+
return true;
|
354
|
+
}
|
180
355
|
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
356
|
+
if (! tde->skip_to(tde, target)) {
|
357
|
+
ste->doc = INT_MAX;
|
358
|
+
return false;
|
359
|
+
}
|
185
360
|
|
186
|
-
|
187
|
-
|
188
|
-
|
361
|
+
ste->doc = tde->doc_num(tde);
|
362
|
+
ste->freq = tde->freq(tde);
|
363
|
+
ste->count = 0;
|
189
364
|
|
190
|
-
|
191
|
-
|
192
|
-
|
365
|
+
ste->position = tde->next_position(tde);
|
366
|
+
ste->count++;
|
367
|
+
return true;
|
193
368
|
}
|
194
369
|
|
195
|
-
int
|
370
|
+
static int spante_doc(SpanEnum *self)
|
196
371
|
{
|
197
|
-
|
198
|
-
return ste->doc;
|
372
|
+
return SpTEn(self)->doc;
|
199
373
|
}
|
200
374
|
|
201
|
-
int
|
375
|
+
static int spante_start(SpanEnum *self)
|
202
376
|
{
|
203
|
-
|
204
|
-
return ste->position;
|
377
|
+
return SpTEn(self)->position;
|
205
378
|
}
|
206
379
|
|
207
|
-
int
|
380
|
+
static int spante_end(SpanEnum *self)
|
208
381
|
{
|
209
|
-
|
210
|
-
return ste->position + 1;
|
382
|
+
return SpTEn(self)->position + 1;
|
211
383
|
}
|
212
384
|
|
213
|
-
char *spante_to_s(SpanEnum *self)
|
385
|
+
static char *spante_to_s(SpanEnum *self)
|
214
386
|
{
|
215
|
-
|
216
|
-
|
217
|
-
|
218
|
-
|
219
|
-
|
220
|
-
|
387
|
+
char *field = SpQ(self->query)->field;
|
388
|
+
char *query_str = self->query->to_s(self->query, field);
|
389
|
+
char pos_str[20];
|
390
|
+
size_t len = strlen(query_str);
|
391
|
+
int pos;
|
392
|
+
char *str = ALLOC_N(char, len + 40);
|
221
393
|
|
222
|
-
|
223
|
-
|
224
|
-
} else {
|
225
|
-
if (self->doc(self) == INT_MAX) {
|
226
|
-
sprintf(pos_str, "END");
|
227
|
-
} else {
|
228
|
-
pos = ((SpanTermEnum *)self->data)->position;
|
229
|
-
sprintf(pos_str, "%d", self->doc(self) - pos);
|
394
|
+
if (self->doc(self) < 0) {
|
395
|
+
sprintf(pos_str, "START");
|
230
396
|
}
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
397
|
+
else {
|
398
|
+
if (self->doc(self) == INT_MAX) {
|
399
|
+
sprintf(pos_str, "END");
|
400
|
+
}
|
401
|
+
else {
|
402
|
+
pos = SpTEn(self)->position;
|
403
|
+
sprintf(pos_str, "%d", self->doc(self) - pos);
|
404
|
+
}
|
405
|
+
}
|
406
|
+
sprintf("SpanTermEnum(%s)@%s", query_str, pos_str);
|
407
|
+
free(query_str);
|
408
|
+
return str;
|
235
409
|
}
|
236
410
|
|
237
|
-
void spante_destroy(SpanEnum *self)
|
411
|
+
static void spante_destroy(SpanEnum *self)
|
238
412
|
{
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
free(ste);
|
243
|
-
free(self);
|
413
|
+
TermDocEnum *tde = SpTEn(self)->positions;
|
414
|
+
tde->close(tde);
|
415
|
+
free(self);
|
244
416
|
}
|
245
417
|
|
246
|
-
SpanEnum *
|
418
|
+
static SpanEnum *spante_new(Query *query, IndexReader *ir)
|
247
419
|
{
|
248
|
-
|
249
|
-
|
420
|
+
char *term = SpTQ(query)->term;
|
421
|
+
char *field = SpQ(query)->field;
|
422
|
+
SpanEnum *self = (SpanEnum *)emalloc(sizeof(SpanTermEnum));
|
250
423
|
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
ste->freq = 0;
|
424
|
+
SpTEn(self)->positions = ir_term_positions_for(ir, field, term);
|
425
|
+
SpTEn(self)->position = -1;
|
426
|
+
SpTEn(self)->doc = -1;
|
427
|
+
SpTEn(self)->count = 0;
|
428
|
+
SpTEn(self)->freq = 0;
|
257
429
|
|
258
|
-
|
430
|
+
self->query = query;
|
431
|
+
self->next = &spante_next;
|
432
|
+
self->skip_to = &spante_skip_to;
|
433
|
+
self->doc = &spante_doc;
|
434
|
+
self->start = &spante_start;
|
435
|
+
self->end = &spante_end;
|
436
|
+
self->destroy = &spante_destroy;
|
437
|
+
self->to_s = &spante_to_s;
|
259
438
|
|
260
|
-
|
261
|
-
self->next = &spante_next;
|
262
|
-
self->skip_to = &spante_skip_to;
|
263
|
-
self->doc = &spante_doc;
|
264
|
-
self->start = &spante_start;
|
265
|
-
self->end = &spante_end;
|
266
|
-
self->destroy = &spante_destroy;
|
267
|
-
self->to_s = &spante_to_s;
|
268
|
-
|
269
|
-
return self;
|
439
|
+
return self;
|
270
440
|
}
|
271
441
|
|
272
442
|
|
273
443
|
/*****************************************************************************
|
274
|
-
*
|
275
444
|
* SpanFirstEnum
|
276
|
-
*
|
277
445
|
*****************************************************************************/
|
278
446
|
|
279
|
-
|
447
|
+
#define SpFEn(span_enum) ((SpanFirstEnum *)(span_enum))
|
448
|
+
#define SpFQ(query) ((SpanFirstQuery *)(query))
|
449
|
+
|
450
|
+
typedef struct SpanFirstEnum
|
451
|
+
{
|
452
|
+
SpanEnum super;
|
453
|
+
SpanEnum *sub_enum;
|
454
|
+
} SpanFirstEnum;
|
455
|
+
|
456
|
+
|
457
|
+
static bool spanfe_next(SpanEnum *self)
|
280
458
|
{
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
459
|
+
SpanEnum *sub_enum = SpFEn(self)->sub_enum;
|
460
|
+
int end = SpFQ(self->query)->end;
|
461
|
+
while (sub_enum->next(sub_enum)) { /* scan to next match */
|
462
|
+
if (sub_enum->end(sub_enum) <= end) {
|
463
|
+
return true;
|
464
|
+
}
|
465
|
+
}
|
466
|
+
return false;
|
287
467
|
}
|
288
468
|
|
289
|
-
bool spanfe_skip_to(SpanEnum *self, int target)
|
469
|
+
static bool spanfe_skip_to(SpanEnum *self, int target)
|
290
470
|
{
|
291
|
-
|
292
|
-
|
471
|
+
SpanEnum *sub_enum = SpFEn(self)->sub_enum;
|
472
|
+
int end = SpFQ(self->query)->end;
|
293
473
|
|
294
|
-
|
474
|
+
if (! sub_enum->skip_to(sub_enum, target)) {
|
475
|
+
return false;
|
476
|
+
}
|
295
477
|
|
296
|
-
|
297
|
-
|
478
|
+
if (sub_enum->end(sub_enum) <= end) { /* there is a match */
|
479
|
+
return true;
|
480
|
+
}
|
298
481
|
|
299
|
-
|
482
|
+
return sub_enum->next(sub_enum); /* scan to next match */
|
300
483
|
}
|
301
484
|
|
302
|
-
int
|
485
|
+
static int spanfe_doc(SpanEnum *self)
|
303
486
|
{
|
304
|
-
|
305
|
-
|
487
|
+
SpanEnum *sub_enum = SpFEn(self)->sub_enum;
|
488
|
+
return sub_enum->doc(sub_enum);
|
306
489
|
}
|
307
490
|
|
308
|
-
int
|
491
|
+
static int spanfe_start(SpanEnum *self)
|
309
492
|
{
|
310
|
-
|
311
|
-
|
493
|
+
SpanEnum *sub_enum = SpFEn(self)->sub_enum;
|
494
|
+
return sub_enum->start(sub_enum);
|
312
495
|
}
|
313
496
|
|
314
|
-
int
|
497
|
+
static int spanfe_end(SpanEnum *self)
|
315
498
|
{
|
316
|
-
|
317
|
-
|
499
|
+
SpanEnum *sub_enum = SpFEn(self)->sub_enum;
|
500
|
+
return sub_enum->end(sub_enum);
|
318
501
|
}
|
319
502
|
|
320
|
-
char *spanfe_to_s(SpanEnum *self)
|
503
|
+
static char *spanfe_to_s(SpanEnum *self)
|
321
504
|
{
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
505
|
+
char *field = SpQ(self->query)->field;
|
506
|
+
char *query_str = self->query->to_s(self->query, field);
|
507
|
+
char *res = strfmt("SpanFirstEnum(%s)", query_str);
|
508
|
+
free(query_str);
|
509
|
+
return res;
|
327
510
|
}
|
328
511
|
|
329
|
-
void spanfe_destroy(SpanEnum *self)
|
512
|
+
static void spanfe_destroy(SpanEnum *self)
|
330
513
|
{
|
331
|
-
|
332
|
-
|
333
|
-
|
514
|
+
SpanEnum *sub_enum = SpFEn(self)->sub_enum;
|
515
|
+
sub_enum->destroy(sub_enum);
|
516
|
+
free(self);
|
334
517
|
}
|
335
518
|
|
336
|
-
SpanEnum *
|
519
|
+
static SpanEnum *spanfe_new(Query *query, IndexReader *ir)
|
337
520
|
{
|
338
|
-
|
339
|
-
|
521
|
+
SpanEnum *self = (SpanEnum *)emalloc(sizeof(SpanFirstEnum));
|
522
|
+
SpanFirstQuery *sfq = SpFQ(query);
|
340
523
|
|
341
|
-
|
524
|
+
SpFEn(self)->sub_enum = SpQ(sfq->match)->get_spans(sfq->match, ir);
|
342
525
|
|
343
|
-
|
344
|
-
|
345
|
-
|
346
|
-
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
526
|
+
self->query = query;
|
527
|
+
self->next = &spanfe_next;
|
528
|
+
self->skip_to = &spanfe_skip_to;
|
529
|
+
self->doc = &spanfe_doc;
|
530
|
+
self->start = &spanfe_start;
|
531
|
+
self->end = &spanfe_end;
|
532
|
+
self->destroy = &spanfe_destroy;
|
533
|
+
self->to_s = &spanfe_to_s;
|
351
534
|
|
352
|
-
|
535
|
+
return self;
|
353
536
|
}
|
354
537
|
|
355
538
|
|
356
539
|
/*****************************************************************************
|
357
|
-
*
|
358
540
|
* SpanOrEnum
|
359
|
-
*
|
360
541
|
*****************************************************************************/
|
361
542
|
|
362
|
-
|
543
|
+
#define SpOEn(span_enum) ((SpanOrEnum *)(span_enum))
|
544
|
+
#define SpOQ(query) ((SpanOrQuery *)(query))
|
545
|
+
|
546
|
+
typedef struct SpanOrEnum
|
547
|
+
{
|
548
|
+
SpanEnum super;
|
549
|
+
PriorityQueue *queue;
|
550
|
+
SpanEnum **span_enums;
|
551
|
+
int s_cnt;
|
552
|
+
bool first_time : 1;
|
553
|
+
} SpanOrEnum;
|
554
|
+
|
555
|
+
|
556
|
+
static bool span_less_than(SpanEnum *s1, SpanEnum *s2)
|
363
557
|
{
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
558
|
+
int doc_diff, start_diff;
|
559
|
+
doc_diff = s1->doc(s1) - s2->doc(s2);
|
560
|
+
if (doc_diff == 0) {
|
561
|
+
start_diff = s1->start(s1) - s2->start(s2);
|
562
|
+
if (start_diff == 0) {
|
563
|
+
return s1->end(s1) < s2->end(s2);
|
564
|
+
}
|
565
|
+
else {
|
566
|
+
return start_diff < 0;
|
567
|
+
}
|
568
|
+
}
|
569
|
+
else {
|
570
|
+
return doc_diff < 0;
|
374
571
|
}
|
375
|
-
} else {
|
376
|
-
return doc_diff < 0;
|
377
|
-
}
|
378
572
|
}
|
379
573
|
|
380
|
-
bool spanoe_next(SpanEnum *self)
|
574
|
+
static bool spanoe_next(SpanEnum *self)
|
381
575
|
{
|
382
|
-
|
383
|
-
|
384
|
-
|
576
|
+
SpanOrEnum *soe = SpOEn(self);
|
577
|
+
SpanEnum *se;
|
578
|
+
int i;
|
385
579
|
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
580
|
+
if (soe->first_time) { /* first time -- initialize */
|
581
|
+
for (i = 0; i < soe->s_cnt; i++) {
|
582
|
+
se = soe->span_enums[i];
|
583
|
+
if (se->next(se)) { /* move to first entry */
|
584
|
+
pq_push(soe->queue, se);
|
585
|
+
}
|
586
|
+
}
|
587
|
+
soe->first_time = false;
|
588
|
+
return soe->queue->size != 0;
|
391
589
|
}
|
392
|
-
soe->first_time = false;
|
393
|
-
return soe->queue->count != 0;
|
394
|
-
}
|
395
590
|
|
396
|
-
|
591
|
+
if (soe->queue->size == 0) {
|
592
|
+
return false; /* all done */
|
593
|
+
}
|
397
594
|
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
595
|
+
se = (SpanEnum *)pq_top(soe->queue);
|
596
|
+
if (se->next(se)) { /* move to next */
|
597
|
+
pq_down(soe->queue);
|
598
|
+
return true;
|
599
|
+
}
|
403
600
|
|
404
|
-
|
601
|
+
pq_pop(soe->queue); /* exhausted a clause */
|
405
602
|
|
406
|
-
|
603
|
+
return soe->queue->size != 0;
|
407
604
|
}
|
408
605
|
|
409
|
-
bool spanoe_skip_to(SpanEnum *self, int target)
|
606
|
+
static bool spanoe_skip_to(SpanEnum *self, int target)
|
410
607
|
{
|
411
|
-
|
412
|
-
|
413
|
-
|
608
|
+
SpanOrEnum *soe = SpOEn(self);
|
609
|
+
SpanEnum *se;
|
610
|
+
int i;
|
414
611
|
|
415
|
-
|
416
|
-
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
429
|
-
|
612
|
+
if (soe->first_time) { /* first time -- initialize */
|
613
|
+
for (i = 0; i < soe->s_cnt; i++) {
|
614
|
+
se = soe->span_enums[i];
|
615
|
+
if (se->skip_to(se, target)) {/* move to target */
|
616
|
+
pq_push(soe->queue, se);
|
617
|
+
}
|
618
|
+
}
|
619
|
+
soe->first_time = false;
|
620
|
+
}
|
621
|
+
else {
|
622
|
+
while ((soe->queue->size != 0) &&
|
623
|
+
((se = (SpanEnum *)pq_top(soe->queue))->doc(se) < target)) {
|
624
|
+
if (se->skip_to(se, target)) {
|
625
|
+
pq_down(soe->queue);
|
626
|
+
}
|
627
|
+
else {
|
628
|
+
pq_pop(soe->queue);
|
629
|
+
}
|
630
|
+
}
|
430
631
|
}
|
431
|
-
}
|
432
632
|
|
433
|
-
|
633
|
+
return soe->queue->size != 0;
|
434
634
|
}
|
435
635
|
|
436
|
-
#define
|
437
|
-
|
438
|
-
int
|
636
|
+
#define SpOEn_Top_SE(self) (SpanEnum *)pq_top(SpOEn(self)->queue)
|
637
|
+
|
638
|
+
static int spanoe_doc(SpanEnum *self)
|
439
639
|
{
|
440
|
-
|
441
|
-
|
640
|
+
SpanEnum *se = SpOEn_Top_SE(self);
|
641
|
+
return se->doc(se);
|
442
642
|
}
|
443
643
|
|
444
|
-
int
|
644
|
+
static int spanoe_start(SpanEnum *self)
|
445
645
|
{
|
446
|
-
|
447
|
-
|
646
|
+
SpanEnum *se = SpOEn_Top_SE(self);
|
647
|
+
return se->start(se);
|
448
648
|
}
|
449
649
|
|
450
|
-
int
|
650
|
+
static int spanoe_end(SpanEnum *self)
|
451
651
|
{
|
452
|
-
|
453
|
-
|
652
|
+
SpanEnum *se = SpOEn_Top_SE(self);
|
653
|
+
return se->end(se);
|
454
654
|
}
|
455
655
|
|
456
|
-
char *spanoe_to_s(SpanEnum *self)
|
656
|
+
static char *spanoe_to_s(SpanEnum *self)
|
457
657
|
{
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
658
|
+
SpanOrEnum *soe = SpOEn(self);
|
659
|
+
char *field = SpQ(self->query)->field;
|
660
|
+
char *query_str = self->query->to_s(self->query, field);
|
661
|
+
char doc_str[62];
|
662
|
+
size_t len = strlen(query_str);
|
663
|
+
char *str = ALLOC_N(char, len + 80);
|
464
664
|
|
465
|
-
|
466
|
-
|
467
|
-
} else {
|
468
|
-
if (soe->queue->count == 0) {
|
469
|
-
sprintf(doc_str, "END");
|
470
|
-
} else {
|
471
|
-
sprintf(doc_str, "%d:%d-%d", self->doc(self),
|
472
|
-
self->start(self), self->end(self));
|
665
|
+
if (soe->first_time) {
|
666
|
+
sprintf(doc_str, "START");
|
473
667
|
}
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
668
|
+
else {
|
669
|
+
if (soe->queue->size == 0) {
|
670
|
+
sprintf(doc_str, "END");
|
671
|
+
}
|
672
|
+
else {
|
673
|
+
sprintf(doc_str, "%d:%d-%d", self->doc(self),
|
674
|
+
self->start(self), self->end(self));
|
675
|
+
}
|
676
|
+
}
|
677
|
+
sprintf("SpanOrEnum(%s)@%s", query_str, doc_str);
|
678
|
+
free(query_str);
|
679
|
+
return str;
|
478
680
|
}
|
479
681
|
|
480
|
-
void spanoe_destroy(SpanEnum *self)
|
682
|
+
static void spanoe_destroy(SpanEnum *self)
|
481
683
|
{
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
free(self);
|
684
|
+
SpanEnum *se;
|
685
|
+
SpanOrEnum *soe = SpOEn(self);
|
686
|
+
int i;
|
687
|
+
pq_destroy(soe->queue);
|
688
|
+
for (i = 0; i < soe->s_cnt; i++) {
|
689
|
+
se = soe->span_enums[i];
|
690
|
+
se->destroy(se);
|
691
|
+
}
|
692
|
+
free(soe->span_enums);
|
693
|
+
free(self);
|
493
694
|
}
|
494
695
|
|
495
|
-
SpanEnum *
|
696
|
+
SpanEnum *spanoe_new(Query *query, IndexReader *ir)
|
496
697
|
{
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
for (i = 0; i < soe->s_cnt; i++) {
|
506
|
-
clause = soq->clauses[i];
|
507
|
-
soe->span_enums[i] = ((SpanQuery *)clause->data)->get_spans(clause, ir);
|
508
|
-
}
|
509
|
-
|
510
|
-
soe->queue = pq_create(soe->s_cnt, &span_less_than);
|
698
|
+
Query *clause;
|
699
|
+
SpanEnum *self = (SpanEnum *)emalloc(sizeof(SpanOrEnum));
|
700
|
+
SpanOrQuery *soq = SpOQ(query);
|
701
|
+
int i;
|
702
|
+
|
703
|
+
SpOEn(self)->first_time = true;
|
704
|
+
SpOEn(self)->s_cnt = soq->c_cnt;
|
705
|
+
SpOEn(self)->span_enums = ALLOC_N(SpanEnum *, SpOEn(self)->s_cnt);
|
511
706
|
|
512
|
-
|
707
|
+
for (i = 0; i < SpOEn(self)->s_cnt; i++) {
|
708
|
+
clause = soq->clauses[i];
|
709
|
+
SpOEn(self)->span_enums[i] = SpQ(clause)->get_spans(clause, ir);
|
710
|
+
}
|
711
|
+
|
712
|
+
SpOEn(self)->queue = pq_new(SpOEn(self)->s_cnt, (lt_ft)&span_less_than,
|
713
|
+
(free_ft)NULL);
|
513
714
|
|
514
|
-
|
515
|
-
|
516
|
-
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
715
|
+
self->query = query;
|
716
|
+
self->next = &spanoe_next;
|
717
|
+
self->skip_to = &spanoe_skip_to;
|
718
|
+
self->doc = &spanoe_doc;
|
719
|
+
self->start = &spanoe_start;
|
720
|
+
self->end = &spanoe_end;
|
721
|
+
self->destroy = &spanoe_destroy;
|
722
|
+
self->to_s = &spanoe_to_s;
|
522
723
|
|
523
|
-
|
724
|
+
return self;
|
524
725
|
}
|
525
726
|
|
526
727
|
/*****************************************************************************
|
527
|
-
*
|
528
728
|
* SpanNearEnum
|
529
|
-
*
|
530
729
|
*****************************************************************************/
|
531
730
|
|
532
|
-
#define
|
533
|
-
|
534
|
-
|
731
|
+
#define SpNEn(span_enum) ((SpanNearEnum *)(span_enum))
|
732
|
+
#define SpNQ(query) ((SpanNearQuery *)(query))
|
733
|
+
|
734
|
+
typedef struct SpanNearEnum
|
735
|
+
{
|
736
|
+
SpanEnum super;
|
737
|
+
SpanEnum **span_enums;
|
738
|
+
int s_cnt;
|
739
|
+
int slop;
|
740
|
+
int current;
|
741
|
+
int doc;
|
742
|
+
int start;
|
743
|
+
int end;
|
744
|
+
bool first_time : 1;
|
745
|
+
bool in_order : 1;
|
746
|
+
} SpanNearEnum;
|
747
|
+
|
748
|
+
|
749
|
+
#define SpNEn_NEXT() do {\
|
750
|
+
sne->current = (sne->current+1) % sne->s_cnt;\
|
751
|
+
se = sne->span_enums[sne->current];\
|
535
752
|
} while (0);
|
536
753
|
|
537
|
-
bool sne_init(SpanNearEnum *sne)
|
754
|
+
static bool sne_init(SpanNearEnum *sne)
|
538
755
|
{
|
539
|
-
|
540
|
-
|
541
|
-
|
756
|
+
SpanEnum *se = sne->span_enums[sne->current];
|
757
|
+
int prev_doc = se->doc(se);
|
758
|
+
int i;
|
542
759
|
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
|
547
|
-
|
548
|
-
|
760
|
+
for (i = 1; i < sne->s_cnt; i++) {
|
761
|
+
SpNEn_NEXT();
|
762
|
+
if (!se->skip_to(se, prev_doc)) {
|
763
|
+
return false;
|
764
|
+
}
|
765
|
+
prev_doc = se->doc(se);
|
766
|
+
}
|
767
|
+
return true;
|
549
768
|
}
|
550
769
|
|
551
|
-
bool sne_goto_next_doc(SpanNearEnum *sne)
|
770
|
+
static bool sne_goto_next_doc(SpanNearEnum *sne)
|
552
771
|
{
|
553
|
-
|
554
|
-
|
772
|
+
SpanEnum *se = sne->span_enums[sne->current];
|
773
|
+
int prev_doc = se->doc(se);
|
555
774
|
|
556
|
-
|
775
|
+
SpNEn_NEXT();
|
557
776
|
|
558
|
-
|
559
|
-
|
560
|
-
|
561
|
-
|
562
|
-
|
563
|
-
|
777
|
+
while (se->doc(se) < prev_doc) {
|
778
|
+
if (! se->skip_to(se, prev_doc)) {
|
779
|
+
return false;
|
780
|
+
}
|
781
|
+
prev_doc = se->doc(se);
|
782
|
+
SpNEn_NEXT();
|
783
|
+
}
|
784
|
+
return true;
|
564
785
|
}
|
565
786
|
|
566
|
-
bool sne_next_unordered_match(SpanEnum *self)
|
787
|
+
static bool sne_next_unordered_match(SpanEnum *self)
|
567
788
|
{
|
568
|
-
|
569
|
-
|
570
|
-
|
571
|
-
|
572
|
-
|
789
|
+
SpanNearEnum *sne = SpNEn(self);
|
790
|
+
SpanEnum *se, *min_se = NULL;
|
791
|
+
int i;
|
792
|
+
int max_end, end, min_start, start, doc;
|
793
|
+
int lengths_sum;
|
794
|
+
|
795
|
+
while (true) {
|
796
|
+
max_end = 0;
|
797
|
+
min_start = INT_MAX;
|
798
|
+
lengths_sum = 0;
|
799
|
+
|
800
|
+
for (i = 0; i < sne->s_cnt; i++) {
|
801
|
+
se = sne->span_enums[i];
|
802
|
+
if ((end=se->end(se)) > max_end) {
|
803
|
+
max_end = end;
|
804
|
+
}
|
805
|
+
if ((start=se->start(se)) < min_start) {
|
806
|
+
min_start = start;
|
807
|
+
min_se = se;
|
808
|
+
sne->current = i; /* current should point to the minimum span */
|
809
|
+
}
|
810
|
+
lengths_sum += end - start;
|
811
|
+
}
|
573
812
|
|
574
|
-
|
813
|
+
if ((max_end - min_start - lengths_sum) <= sne->slop) {
|
814
|
+
/* we have a match */
|
815
|
+
sne->start = min_start;
|
816
|
+
sne->end = max_end;
|
817
|
+
sne->doc = min_se->doc(min_se);
|
818
|
+
return true;
|
819
|
+
}
|
575
820
|
|
576
|
-
|
577
|
-
|
578
|
-
|
821
|
+
/* increment the minimum span_enum and try again */
|
822
|
+
doc = min_se->doc(min_se);
|
823
|
+
if (!min_se->next(min_se)) {
|
824
|
+
return false;
|
825
|
+
}
|
826
|
+
if (doc < min_se->doc(min_se)) {
|
827
|
+
if (!sne_goto_next_doc(sne)) return false;
|
828
|
+
}
|
829
|
+
}
|
830
|
+
}
|
579
831
|
|
580
|
-
|
581
|
-
|
582
|
-
|
583
|
-
|
584
|
-
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
lengths_sum += end - start;
|
589
|
-
}
|
590
|
-
|
591
|
-
if ((max_end - min_start - lengths_sum) <= sne->slop) {
|
592
|
-
/* we have a match */
|
593
|
-
sne->start = min_start;
|
594
|
-
sne->end = max_end;
|
595
|
-
sne->doc = min_se->doc(min_se);
|
596
|
-
return true;
|
597
|
-
}
|
598
|
-
|
599
|
-
/* increment the minimum span_enum and try again */
|
600
|
-
doc = min_se->doc(min_se);
|
601
|
-
if (!min_se->next(min_se)) return false;
|
602
|
-
if (doc < min_se->doc(min_se)) {
|
603
|
-
if (!sne_goto_next_doc(sne)) return false;
|
604
|
-
}
|
605
|
-
}
|
606
|
-
}
|
607
|
-
|
608
|
-
bool sne_next_ordered_match(SpanEnum *self)
|
609
|
-
{
|
610
|
-
SpanNearEnum *sne = (SpanNearEnum *)self->data;
|
611
|
-
SpanEnum *se;
|
612
|
-
int i;
|
613
|
-
int prev_doc, prev_start, prev_end;
|
614
|
-
int doc=0, start=0, end=0;
|
615
|
-
int lengths_sum;
|
616
|
-
|
617
|
-
while (true) {
|
618
|
-
se = sne->span_enums[0];
|
619
|
-
|
620
|
-
prev_doc = se->doc(se);
|
621
|
-
sne->start = prev_start = se->start(se);
|
622
|
-
prev_end = se->end(se);
|
623
|
-
|
624
|
-
i = 1;
|
625
|
-
lengths_sum = prev_end - prev_start;
|
626
|
-
|
627
|
-
while (i < sne->s_cnt) {
|
628
|
-
se = sne->span_enums[i];
|
629
|
-
doc = se->doc(se);
|
630
|
-
start = se->start(se);
|
631
|
-
end = se->end(se);
|
632
|
-
while ((doc == prev_doc) && ((start < prev_start) ||
|
633
|
-
((start == prev_start) && (end < prev_end)))) {
|
634
|
-
if (!se->next(se)) return false;
|
635
|
-
doc = se->doc(se);
|
636
|
-
start = se->start(se);
|
637
|
-
end = se->end(se);
|
638
|
-
}
|
639
|
-
if (doc != prev_doc) {
|
640
|
-
sne->current = i;
|
641
|
-
if (!sne_goto_next_doc(sne)) return false;
|
642
|
-
break;
|
643
|
-
}
|
644
|
-
i++;
|
645
|
-
lengths_sum += end - start;
|
646
|
-
prev_doc = doc;
|
647
|
-
prev_start = start;
|
648
|
-
prev_end = end;
|
649
|
-
}
|
650
|
-
if (i == sne->s_cnt) {
|
651
|
-
if ((end - sne->start - lengths_sum) <= sne->slop) {
|
652
|
-
/* we have a match */
|
653
|
-
sne->end = end;
|
654
|
-
sne->doc = doc;
|
655
|
-
|
656
|
-
/* the minimum span is always the first span so it needs to be
|
657
|
-
* incremented next time around */
|
658
|
-
sne->current = 0;
|
659
|
-
return true;
|
832
|
+
static bool sne_next_ordered_match(SpanEnum *self)
|
833
|
+
{
|
834
|
+
SpanNearEnum *sne = SpNEn(self);
|
835
|
+
SpanEnum *se;
|
836
|
+
int i;
|
837
|
+
int prev_doc, prev_start, prev_end;
|
838
|
+
int doc=0, start=0, end=0;
|
839
|
+
int lengths_sum;
|
660
840
|
|
661
|
-
|
841
|
+
while (true) {
|
662
842
|
se = sne->span_enums[0];
|
663
|
-
|
664
|
-
|
665
|
-
|
666
|
-
|
843
|
+
|
844
|
+
prev_doc = se->doc(se);
|
845
|
+
sne->start = prev_start = se->start(se);
|
846
|
+
prev_end = se->end(se);
|
847
|
+
|
848
|
+
i = 1;
|
849
|
+
lengths_sum = prev_end - prev_start;
|
850
|
+
|
851
|
+
while (i < sne->s_cnt) {
|
852
|
+
se = sne->span_enums[i];
|
853
|
+
doc = se->doc(se);
|
854
|
+
start = se->start(se);
|
855
|
+
end = se->end(se);
|
856
|
+
while ((doc == prev_doc) && ((start < prev_start) ||
|
857
|
+
((start == prev_start) && (end < prev_end)))) {
|
858
|
+
if (!se->next(se)) {
|
859
|
+
return false;
|
860
|
+
}
|
861
|
+
doc = se->doc(se);
|
862
|
+
start = se->start(se);
|
863
|
+
end = se->end(se);
|
864
|
+
}
|
865
|
+
if (doc != prev_doc) {
|
866
|
+
sne->current = i;
|
867
|
+
if (!sne_goto_next_doc(sne)) {
|
868
|
+
return false;
|
869
|
+
}
|
870
|
+
break;
|
871
|
+
}
|
872
|
+
i++;
|
873
|
+
lengths_sum += end - start;
|
874
|
+
prev_doc = doc;
|
875
|
+
prev_start = start;
|
876
|
+
prev_end = end;
|
877
|
+
}
|
878
|
+
if (i == sne->s_cnt) {
|
879
|
+
if ((end - sne->start - lengths_sum) <= sne->slop) {
|
880
|
+
/* we have a match */
|
881
|
+
sne->end = end;
|
882
|
+
sne->doc = doc;
|
883
|
+
|
884
|
+
/* the minimum span is always the first span so it needs to be
|
885
|
+
* incremented next time around */
|
886
|
+
sne->current = 0;
|
887
|
+
return true;
|
888
|
+
|
889
|
+
}
|
890
|
+
else {
|
891
|
+
se = sne->span_enums[0];
|
892
|
+
if (!se->next(se)) {
|
893
|
+
return false;
|
894
|
+
}
|
895
|
+
if (se->doc(se) != prev_doc) {
|
896
|
+
sne->current = 0;
|
897
|
+
if (!sne_goto_next_doc(sne)) {
|
898
|
+
return false;
|
899
|
+
}
|
900
|
+
}
|
901
|
+
}
|
667
902
|
}
|
668
|
-
}
|
669
903
|
}
|
670
|
-
}
|
671
904
|
}
|
672
|
-
|
673
|
-
bool sne_next_match(SpanEnum *self)
|
905
|
+
|
906
|
+
static bool sne_next_match(SpanEnum *self)
|
674
907
|
{
|
675
|
-
|
676
|
-
|
908
|
+
SpanNearEnum *sne = SpNEn(self);
|
909
|
+
SpanEnum *se_curr, *se_next;
|
677
910
|
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
911
|
+
if (!sne->first_time) {
|
912
|
+
if (!sne_init(sne)) {
|
913
|
+
return false;
|
914
|
+
}
|
915
|
+
sne->first_time = false;
|
916
|
+
}
|
917
|
+
se_curr = sne->span_enums[sne->current];
|
918
|
+
se_next = sne->span_enums[(sne->current+1)%sne->s_cnt];
|
919
|
+
if (se_curr->doc(se_curr) > se_next->doc(se_next)) {
|
920
|
+
if (!sne_goto_next_doc(sne)) {
|
921
|
+
return false;
|
922
|
+
}
|
923
|
+
}
|
687
924
|
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
925
|
+
if (sne->in_order) {
|
926
|
+
return sne_next_ordered_match(self);
|
927
|
+
}
|
928
|
+
else {
|
929
|
+
return sne_next_unordered_match(self);
|
930
|
+
}
|
693
931
|
}
|
694
932
|
|
695
|
-
bool spanne_next(SpanEnum *self)
|
933
|
+
static bool spanne_next(SpanEnum *self)
|
696
934
|
{
|
697
|
-
|
698
|
-
|
935
|
+
SpanNearEnum *sne = SpNEn(self);
|
936
|
+
SpanEnum *se;
|
699
937
|
|
700
|
-
|
701
|
-
|
938
|
+
se = sne->span_enums[sne->current];
|
939
|
+
if (!se->next(se)) return false;
|
702
940
|
|
703
|
-
|
941
|
+
return sne_next_match(self);
|
704
942
|
}
|
705
943
|
|
706
|
-
bool spanne_skip_to(SpanEnum *self, int target)
|
944
|
+
static bool spanne_skip_to(SpanEnum *self, int target)
|
707
945
|
{
|
708
|
-
|
709
|
-
|
710
|
-
|
711
|
-
|
712
|
-
if (!se->skip_to(se, target)) return false;
|
946
|
+
SpanEnum *se = SpNEn(self)->span_enums[SpNEn(self)->current];
|
947
|
+
if (!se->skip_to(se, target)) {
|
948
|
+
return false;
|
949
|
+
}
|
713
950
|
|
714
|
-
|
951
|
+
return sne_next_match(self);
|
715
952
|
}
|
716
953
|
|
717
|
-
|
718
|
-
|
719
|
-
int spanne_doc(SpanEnum *self)
|
954
|
+
static int spanne_doc(SpanEnum *self)
|
720
955
|
{
|
721
|
-
|
722
|
-
return sne->doc;
|
956
|
+
return SpNEn(self)->doc;
|
723
957
|
}
|
724
958
|
|
725
|
-
int
|
959
|
+
static int spanne_start(SpanEnum *self)
|
726
960
|
{
|
727
|
-
|
728
|
-
return sne->start;
|
961
|
+
return SpNEn(self)->start;
|
729
962
|
}
|
730
963
|
|
731
|
-
int
|
964
|
+
static int spanne_end(SpanEnum *self)
|
732
965
|
{
|
733
|
-
|
734
|
-
return sne->end;
|
966
|
+
return SpNEn(self)->end;
|
735
967
|
}
|
736
968
|
|
737
|
-
char *spanne_to_s(SpanEnum *self)
|
969
|
+
static char *spanne_to_s(SpanEnum *self)
|
738
970
|
{
|
739
|
-
|
740
|
-
|
741
|
-
|
742
|
-
|
743
|
-
|
744
|
-
|
971
|
+
SpanNearEnum *sne = SpNEn(self);
|
972
|
+
char *field = SpQ(self->query)->field;
|
973
|
+
char *query_str = self->query->to_s(self->query, field);
|
974
|
+
char doc_str[62];
|
975
|
+
size_t len = strlen(query_str);
|
976
|
+
char *str = ALLOC_N(char, len + 80);
|
745
977
|
|
746
|
-
|
747
|
-
|
748
|
-
|
749
|
-
|
750
|
-
|
751
|
-
|
752
|
-
|
753
|
-
|
754
|
-
|
978
|
+
if (sne->first_time) {
|
979
|
+
sprintf(doc_str, "START");
|
980
|
+
}
|
981
|
+
else {
|
982
|
+
sprintf(doc_str, "%d:%d-%d", self->doc(self),
|
983
|
+
self->start(self), self->end(self));
|
984
|
+
}
|
985
|
+
sprintf("SpanNearEnum(%s)@%s", query_str, doc_str);
|
986
|
+
free(query_str);
|
987
|
+
return str;
|
755
988
|
}
|
756
989
|
|
757
|
-
void spanne_destroy(SpanEnum *self)
|
990
|
+
static void spanne_destroy(SpanEnum *self)
|
758
991
|
{
|
759
|
-
|
760
|
-
|
761
|
-
|
762
|
-
|
763
|
-
|
764
|
-
|
765
|
-
|
766
|
-
|
767
|
-
|
768
|
-
free(self);
|
992
|
+
SpanEnum *se;
|
993
|
+
SpanNearEnum *sne = SpNEn(self);
|
994
|
+
int i;
|
995
|
+
for (i = 0; i < sne->s_cnt; i++) {
|
996
|
+
se = sne->span_enums[i];
|
997
|
+
se->destroy(se);
|
998
|
+
}
|
999
|
+
free(sne->span_enums);
|
1000
|
+
free(self);
|
769
1001
|
}
|
770
1002
|
|
771
|
-
SpanEnum *
|
1003
|
+
static SpanEnum *spanne_new(Query *query, IndexReader *ir)
|
772
1004
|
{
|
773
|
-
|
774
|
-
|
775
|
-
|
776
|
-
|
777
|
-
|
778
|
-
|
779
|
-
|
780
|
-
|
781
|
-
|
782
|
-
|
783
|
-
|
784
|
-
|
785
|
-
|
786
|
-
|
787
|
-
|
788
|
-
|
789
|
-
sne->doc = -1;
|
790
|
-
sne->start = -1;
|
791
|
-
sne->end = -1;
|
1005
|
+
int i;
|
1006
|
+
Query *clause;
|
1007
|
+
SpanEnum *self = (SpanEnum *)emalloc(sizeof(SpanNearEnum));
|
1008
|
+
SpanNearQuery *snq = SpNQ(query);
|
1009
|
+
|
1010
|
+
SpNEn(self)->first_time = true;
|
1011
|
+
SpNEn(self)->in_order = snq->in_order;
|
1012
|
+
SpNEn(self)->slop = snq->slop;
|
1013
|
+
SpNEn(self)->s_cnt = snq->c_cnt;
|
1014
|
+
SpNEn(self)->span_enums = ALLOC_N(SpanEnum *, SpNEn(self)->s_cnt);
|
1015
|
+
|
1016
|
+
for (i = 0; i < SpNEn(self)->s_cnt; i++) {
|
1017
|
+
clause = snq->clauses[i];
|
1018
|
+
SpNEn(self)->span_enums[i] = SpQ(clause)->get_spans(clause, ir);
|
1019
|
+
}
|
1020
|
+
SpNEn(self)->current = 0;
|
792
1021
|
|
793
|
-
|
1022
|
+
SpNEn(self)->doc = -1;
|
1023
|
+
SpNEn(self)->start = -1;
|
1024
|
+
SpNEn(self)->end = -1;
|
794
1025
|
|
795
|
-
|
796
|
-
|
797
|
-
|
798
|
-
|
799
|
-
|
800
|
-
|
801
|
-
|
802
|
-
|
1026
|
+
self->query = query;
|
1027
|
+
self->next = &spanne_next;
|
1028
|
+
self->skip_to = &spanne_skip_to;
|
1029
|
+
self->doc = &spanne_doc;
|
1030
|
+
self->start = &spanne_start;
|
1031
|
+
self->end = &spanne_end;
|
1032
|
+
self->destroy = &spanne_destroy;
|
1033
|
+
self->to_s = &spanne_to_s;
|
803
1034
|
|
804
|
-
|
1035
|
+
return self;
|
805
1036
|
}
|
806
1037
|
|
807
1038
|
/*****************************************************************************
|
@@ -810,211 +1041,348 @@ SpanEnum *spanne_create(Query *query, IndexReader *ir)
|
|
810
1041
|
*
|
811
1042
|
*****************************************************************************/
|
812
1043
|
|
813
|
-
|
1044
|
+
#define SpXEn(span_enum) ((SpanNotEnum *)(span_enum))
|
1045
|
+
#define SpXQ(query) ((SpanNotQuery *)(query))
|
1046
|
+
|
1047
|
+
typedef struct SpanNotEnum
|
814
1048
|
{
|
815
|
-
|
816
|
-
|
817
|
-
|
818
|
-
|
819
|
-
|
1049
|
+
SpanEnum super;
|
1050
|
+
SpanEnum *inc;
|
1051
|
+
SpanEnum *exc;
|
1052
|
+
bool more_inc : 1;
|
1053
|
+
bool more_exc : 1;
|
1054
|
+
} SpanNotEnum;
|
820
1055
|
|
821
|
-
while (sxe->more_inc && sxe->more_exc) {
|
822
|
-
if (inc->doc(inc) > exc->doc(exc)) { // skip excl
|
823
|
-
sxe->more_exc = exc->skip_to(exc, inc->doc(inc));
|
824
|
-
}
|
825
1056
|
|
826
|
-
|
827
|
-
|
828
|
-
|
829
|
-
|
1057
|
+
static bool spanxe_next(SpanEnum *self)
|
1058
|
+
{
|
1059
|
+
SpanNotEnum *sxe = SpXEn(self);
|
1060
|
+
SpanEnum *inc = sxe->inc, *exc = sxe->exc;
|
1061
|
+
if (sxe->more_inc) { /* move to next incl */
|
1062
|
+
sxe->more_inc = inc->next(inc);
|
830
1063
|
}
|
831
1064
|
|
832
|
-
|
833
|
-
(inc->doc(inc)
|
834
|
-
|
835
|
-
|
836
|
-
|
1065
|
+
while (sxe->more_inc && sxe->more_exc) {
|
1066
|
+
if (inc->doc(inc) > exc->doc(exc)) { /* skip excl */
|
1067
|
+
sxe->more_exc = exc->skip_to(exc, inc->doc(inc));
|
1068
|
+
}
|
1069
|
+
|
1070
|
+
while (sxe->more_exc /* while excl is before */
|
1071
|
+
&& (inc->doc(inc) == exc->doc(exc))
|
1072
|
+
&& (exc->end(exc) <= inc->start(inc))) {
|
1073
|
+
sxe->more_exc = exc->next(exc); /* increment excl */
|
1074
|
+
}
|
1075
|
+
|
1076
|
+
if (! sxe->more_exc || /* if no intersection */
|
1077
|
+
(inc->doc(inc) != exc->doc(exc)) ||
|
1078
|
+
inc->end(inc) <= exc->start(exc)) {
|
1079
|
+
break; /* we found a match */
|
1080
|
+
}
|
837
1081
|
|
838
|
-
|
839
|
-
|
840
|
-
|
1082
|
+
sxe->more_inc = inc->next(inc); /* intersected: keep scanning */
|
1083
|
+
}
|
1084
|
+
return sxe->more_inc;
|
841
1085
|
}
|
842
1086
|
|
843
|
-
bool spanxe_skip_to(SpanEnum *self, int target)
|
1087
|
+
static bool spanxe_skip_to(SpanEnum *self, int target)
|
844
1088
|
{
|
845
|
-
|
846
|
-
|
847
|
-
|
1089
|
+
SpanNotEnum *sxe = SpXEn(self);
|
1090
|
+
SpanEnum *inc = sxe->inc, *exc = sxe->exc;
|
1091
|
+
int doc;
|
848
1092
|
|
849
|
-
|
850
|
-
|
851
|
-
|
1093
|
+
if (sxe->more_inc) { /* move to next incl */
|
1094
|
+
if (!(sxe->more_inc=sxe->inc->skip_to(sxe->inc, target))) return false;
|
1095
|
+
}
|
852
1096
|
|
853
|
-
|
854
|
-
|
855
|
-
|
1097
|
+
if (sxe->more_inc && ((doc=inc->doc(inc)) > exc->doc(exc))) {
|
1098
|
+
sxe->more_exc = exc->skip_to(exc, doc);
|
1099
|
+
}
|
856
1100
|
|
857
|
-
|
858
|
-
|
859
|
-
|
860
|
-
|
861
|
-
|
1101
|
+
while (sxe->more_exc /* while excl is before */
|
1102
|
+
&& inc->doc(inc) == exc->doc(exc)
|
1103
|
+
&& exc->end(exc) <= inc->start(inc)) {
|
1104
|
+
sxe->more_exc = exc->next(exc); /* increment excl */
|
1105
|
+
}
|
862
1106
|
|
863
|
-
|
1107
|
+
if (!sxe->more_exc || /* if no intersection */
|
864
1108
|
inc->doc(inc) != exc->doc(exc) ||
|
865
1109
|
inc->end(inc) <= exc->start(exc)) {
|
866
|
-
|
867
|
-
|
1110
|
+
return true; /* we found a match */
|
1111
|
+
}
|
868
1112
|
|
869
|
-
|
1113
|
+
return spanxe_next(self); /* scan to next match */
|
870
1114
|
}
|
871
1115
|
|
872
|
-
int
|
1116
|
+
static int spanxe_doc(SpanEnum *self)
|
873
1117
|
{
|
874
|
-
|
875
|
-
|
1118
|
+
SpanEnum *inc = SpXEn(self)->inc;
|
1119
|
+
return inc->doc(inc);
|
876
1120
|
}
|
877
1121
|
|
878
|
-
int
|
1122
|
+
static int spanxe_start(SpanEnum *self)
|
879
1123
|
{
|
880
|
-
|
881
|
-
|
1124
|
+
SpanEnum *inc = SpXEn(self)->inc;
|
1125
|
+
return inc->start(inc);
|
882
1126
|
}
|
883
1127
|
|
884
|
-
int
|
1128
|
+
static int spanxe_end(SpanEnum *self)
|
885
1129
|
{
|
886
|
-
|
887
|
-
|
1130
|
+
SpanEnum *inc = SpXEn(self)->inc;
|
1131
|
+
return inc->end(inc);
|
888
1132
|
}
|
889
1133
|
|
890
|
-
char *spanxe_to_s(SpanEnum *self)
|
1134
|
+
static char *spanxe_to_s(SpanEnum *self)
|
891
1135
|
{
|
892
|
-
|
893
|
-
|
894
|
-
|
895
|
-
|
896
|
-
|
1136
|
+
char *field = SpQ(self->query)->field;
|
1137
|
+
char *query_str = self->query->to_s(self->query, field);
|
1138
|
+
char *res = strfmt("SpanNotEnum(%s)", query_str);
|
1139
|
+
free(query_str);
|
1140
|
+
return res;
|
897
1141
|
}
|
898
1142
|
|
899
|
-
void spanxe_destroy(SpanEnum *self)
|
1143
|
+
static void spanxe_destroy(SpanEnum *self)
|
900
1144
|
{
|
901
|
-
|
902
|
-
|
903
|
-
|
904
|
-
|
905
|
-
free(self);
|
1145
|
+
SpanNotEnum *sxe = SpXEn(self);
|
1146
|
+
sxe->inc->destroy(sxe->inc);
|
1147
|
+
sxe->exc->destroy(sxe->exc);
|
1148
|
+
free(self);
|
906
1149
|
}
|
907
1150
|
|
908
|
-
SpanEnum *
|
1151
|
+
static SpanEnum *spanxe_new(Query *query, IndexReader *ir)
|
909
1152
|
{
|
910
|
-
|
911
|
-
|
1153
|
+
SpanEnum *self = (SpanEnum *)emalloc(sizeof(SpanNotEnum));
|
1154
|
+
SpanNotEnum *sxe = SpXEn(self);
|
1155
|
+
SpanNotQuery *sxq = SpXQ(query);
|
912
1156
|
|
913
|
-
|
914
|
-
|
915
|
-
|
916
|
-
|
917
|
-
sxe->more_exc = sxe->exc->next(sxe->exc);
|
1157
|
+
sxe->inc = SpQ(sxq->inc)->get_spans(sxq->inc, ir);
|
1158
|
+
sxe->exc = SpQ(sxq->exc)->get_spans(sxq->exc, ir);
|
1159
|
+
sxe->more_inc = true;
|
1160
|
+
sxe->more_exc = sxe->exc->next(sxe->exc);
|
918
1161
|
|
919
|
-
|
920
|
-
|
921
|
-
|
922
|
-
|
923
|
-
|
924
|
-
|
925
|
-
|
926
|
-
|
1162
|
+
self->query = query;
|
1163
|
+
self->next = &spanxe_next;
|
1164
|
+
self->skip_to = &spanxe_skip_to;
|
1165
|
+
self->doc = &spanxe_doc;
|
1166
|
+
self->start = &spanxe_start;
|
1167
|
+
self->end = &spanxe_end;
|
1168
|
+
self->destroy = &spanxe_destroy;
|
1169
|
+
self->to_s = &spanxe_to_s;
|
927
1170
|
|
928
|
-
|
1171
|
+
return self;
|
929
1172
|
}
|
1173
|
+
|
930
1174
|
/*****************************************************************************
|
931
1175
|
*
|
932
|
-
*
|
1176
|
+
* SpanWeight
|
933
1177
|
*
|
934
1178
|
*****************************************************************************/
|
935
1179
|
|
936
|
-
|
1180
|
+
#define SpW(weight) ((SpanWeight *)(weight))
|
1181
|
+
typedef struct SpanWeight
|
1182
|
+
{
|
1183
|
+
Weight super;
|
1184
|
+
HashSet *terms;
|
1185
|
+
} SpanWeight;
|
1186
|
+
|
1187
|
+
static Explanation *spanw_explain(Weight *self, IndexReader *ir, int target)
|
1188
|
+
{
|
1189
|
+
Explanation *expl;
|
1190
|
+
Explanation *idf_expl1;
|
1191
|
+
Explanation *idf_expl2;
|
1192
|
+
Explanation *query_expl;
|
1193
|
+
Explanation *qnorm_expl;
|
1194
|
+
Explanation *field_expl;
|
1195
|
+
Explanation *tf_expl;
|
1196
|
+
Scorer *scorer;
|
1197
|
+
uchar *field_norms;
|
1198
|
+
float field_norm;
|
1199
|
+
Explanation *field_norm_expl;
|
1200
|
+
|
1201
|
+
char *query_str;
|
1202
|
+
HashSet *terms = SpW(self)->terms;
|
1203
|
+
char *field = SpQ(self->query)->field;
|
1204
|
+
const int field_num = fis_get_field_num(ir->fis, field);
|
1205
|
+
char *doc_freqs = NULL;
|
1206
|
+
size_t df_i = 0;
|
1207
|
+
int i;
|
1208
|
+
|
1209
|
+
if (field_num < 0) {
|
1210
|
+
return expl_new(0.0, "field \"%s\" does not exist in the index", field);
|
1211
|
+
}
|
1212
|
+
|
1213
|
+
query_str = self->query->to_s(self->query, "");
|
1214
|
+
|
1215
|
+
for (i = 0; i < terms->size; i++) {
|
1216
|
+
char *term = (char *)terms->elems[i];
|
1217
|
+
REALLOC_N(doc_freqs, char, df_i + strlen(term) + 23);
|
1218
|
+
sprintf(doc_freqs + df_i, "%s=%d, ", term,
|
1219
|
+
ir->doc_freq(ir, field_num, term));
|
1220
|
+
df_i = strlen(doc_freqs);
|
1221
|
+
}
|
1222
|
+
/* remove the ',' at the end of the string if it exists */
|
1223
|
+
if (terms->size > 0) {
|
1224
|
+
df_i -= 2;
|
1225
|
+
doc_freqs[df_i] = '\0';
|
1226
|
+
}
|
1227
|
+
else {
|
1228
|
+
doc_freqs = "";
|
1229
|
+
}
|
1230
|
+
|
1231
|
+
expl = expl_new(0.0, "weight(%s in %d), product of:", query_str, target);
|
1232
|
+
|
1233
|
+
/* We need two of these as it's included in both the query explanation
|
1234
|
+
* and the field explanation */
|
1235
|
+
idf_expl1 = expl_new(self->idf, "idf(%s: %s)", field, doc_freqs);
|
1236
|
+
idf_expl2 = expl_new(self->idf, "idf(%s: %s)", field, doc_freqs);
|
1237
|
+
if (terms->size > 0) {
|
1238
|
+
free(doc_freqs); /* only free if allocated */
|
1239
|
+
}
|
1240
|
+
|
1241
|
+
/* explain query weight */
|
1242
|
+
query_expl = expl_new(0.0, "query_weight(%s), product of:", query_str);
|
1243
|
+
|
1244
|
+
if (self->query->boost != 1.0) {
|
1245
|
+
expl_add_detail(query_expl, expl_new(self->query->boost, "boost"));
|
1246
|
+
}
|
1247
|
+
|
1248
|
+
expl_add_detail(query_expl, idf_expl1);
|
1249
|
+
|
1250
|
+
qnorm_expl = expl_new(self->qnorm, "query_norm");
|
1251
|
+
expl_add_detail(query_expl, qnorm_expl);
|
1252
|
+
|
1253
|
+
query_expl->value = self->query->boost * idf_expl1->value * qnorm_expl->value;
|
1254
|
+
|
1255
|
+
expl_add_detail(expl, query_expl);
|
1256
|
+
|
1257
|
+
/* explain field weight */
|
1258
|
+
field_expl = expl_new(0.0, "field_weight(%s:%s in %d), product of:",
|
1259
|
+
field, query_str, target);
|
1260
|
+
free(query_str);
|
1261
|
+
|
1262
|
+
scorer = self->scorer(self, ir);
|
1263
|
+
tf_expl = scorer->explain(scorer, target);
|
1264
|
+
scorer->destroy(scorer);
|
1265
|
+
expl_add_detail(field_expl, tf_expl);
|
1266
|
+
expl_add_detail(field_expl, idf_expl2);
|
1267
|
+
|
1268
|
+
field_norms = ir->get_norms(ir, field_num);
|
1269
|
+
field_norm = (field_norms
|
1270
|
+
? sim_decode_norm(self->similarity, field_norms[target])
|
1271
|
+
: (float)0.0);
|
1272
|
+
field_norm_expl = expl_new(field_norm, "field_norm(field=%s, doc=%d)",
|
1273
|
+
field, target);
|
1274
|
+
expl_add_detail(field_expl, field_norm_expl);
|
1275
|
+
|
1276
|
+
field_expl->value = tf_expl->value * idf_expl2->value * field_norm_expl->value;
|
1277
|
+
|
1278
|
+
/* combine them */
|
1279
|
+
if (query_expl->value == 1.0) {
|
1280
|
+
expl_destroy(expl);
|
1281
|
+
return field_expl;
|
1282
|
+
}
|
1283
|
+
else {
|
1284
|
+
expl->value = (query_expl->value * field_expl->value);
|
1285
|
+
expl_add_detail(expl, field_expl);
|
1286
|
+
return expl;
|
1287
|
+
}
|
1288
|
+
}
|
1289
|
+
|
1290
|
+
static char *spanw_to_s(Weight *self)
|
1291
|
+
{
|
1292
|
+
return strfmt("SpanWeight(%f)", self->value);
|
1293
|
+
}
|
1294
|
+
|
1295
|
+
static void spanw_destroy(Weight *self)
|
937
1296
|
{
|
938
|
-
|
939
|
-
|
940
|
-
|
1297
|
+
hs_destroy(SpW(self)->terms);
|
1298
|
+
w_destroy(self);
|
1299
|
+
}
|
1300
|
+
|
1301
|
+
static Weight *spanw_new(Query *query, Searcher *searcher)
|
1302
|
+
{
|
1303
|
+
int i;
|
1304
|
+
Weight *self = w_new(SpanWeight, query);
|
1305
|
+
HashSet *terms = SpQ(query)->get_terms(query);
|
1306
|
+
|
1307
|
+
SpW(self)->terms = terms;
|
1308
|
+
self->scorer = &spansc_new;
|
1309
|
+
self->explain = &spanw_explain;
|
1310
|
+
self->to_s = &spanw_to_s;
|
1311
|
+
self->destroy = &spanw_destroy;
|
1312
|
+
|
1313
|
+
self->similarity = query->get_similarity(query, searcher);
|
1314
|
+
|
1315
|
+
self->idf = 0.0;
|
1316
|
+
|
1317
|
+
for (i = terms->size - 1; i >= 0; i--) {
|
1318
|
+
self->idf += sim_idf_term(self->similarity, SpQ(query)->field,
|
1319
|
+
(char *)terms->elems[i], searcher);
|
1320
|
+
}
|
1321
|
+
|
1322
|
+
return self;
|
941
1323
|
}
|
942
1324
|
|
943
1325
|
/*****************************************************************************
|
944
|
-
*
|
945
1326
|
* SpanTermQuery
|
946
|
-
*
|
947
1327
|
*****************************************************************************/
|
948
1328
|
|
949
|
-
char *spantq_to_s(Query *self, char *field)
|
1329
|
+
static char *spantq_to_s(Query *self, const char *field)
|
950
1330
|
{
|
951
|
-
|
952
|
-
|
953
|
-
|
954
|
-
|
955
|
-
|
956
|
-
|
957
|
-
}
|
958
|
-
res = strfmt("span_term(%s)", term_str);
|
959
|
-
free(term_str);
|
960
|
-
return res;
|
1331
|
+
if (field == SpQ(self)->field) {
|
1332
|
+
return strfmt("span_terms(%s)", SpTQ(self)->term);
|
1333
|
+
}
|
1334
|
+
else {
|
1335
|
+
return strfmt("span_terms(%s:%s)", SpQ(self)->field, SpTQ(self)->term);
|
1336
|
+
}
|
961
1337
|
}
|
962
1338
|
|
963
|
-
static void
|
1339
|
+
static void spantq_destroy_i(Query *self)
|
964
1340
|
{
|
965
|
-
|
966
|
-
|
967
|
-
|
968
|
-
term_destroy(term);
|
969
|
-
}
|
970
|
-
free(sq);
|
971
|
-
q_destroy_i(self);
|
1341
|
+
free(SpTQ(self)->term);
|
1342
|
+
free(SpQ(self)->field);
|
1343
|
+
spanq_destroy_i(self);
|
972
1344
|
}
|
973
1345
|
|
974
1346
|
static void spantq_extract_terms(Query *self, HashSet *terms)
|
975
1347
|
{
|
976
|
-
|
977
|
-
hs_add(terms, term_clone(term));
|
1348
|
+
hs_add(terms, term_new(SpQ(self)->field, SpTQ(self)->term));
|
978
1349
|
}
|
979
1350
|
|
980
1351
|
static HashSet *spantq_get_terms(Query *self)
|
981
1352
|
{
|
982
|
-
|
983
|
-
|
984
|
-
|
985
|
-
return terms;
|
1353
|
+
HashSet *terms = hs_new_str(&free);
|
1354
|
+
hs_add(terms, estrdup(SpTQ(self)->term));
|
1355
|
+
return terms;
|
986
1356
|
}
|
987
1357
|
|
988
|
-
static
|
1358
|
+
static ulong spantq_hash(Query *self)
|
989
1359
|
{
|
990
|
-
|
1360
|
+
return spanq_hash(self) ^ str_hash(SpTQ(self)->term);
|
991
1361
|
}
|
992
1362
|
|
993
1363
|
static int spantq_eq(Query *self, Query *o)
|
994
1364
|
{
|
995
|
-
|
996
|
-
(Term *)((SpanQuery *)o->data)->data);
|
1365
|
+
return spanq_eq(self, o) && strcmp(SpTQ(self)->term, SpTQ(o)->term) == 0;
|
997
1366
|
}
|
998
1367
|
|
999
|
-
Query *
|
1368
|
+
Query *spantq_new(const char *field, const char *term)
|
1000
1369
|
{
|
1001
|
-
|
1370
|
+
Query *self = q_new(SpanTermQuery);
|
1002
1371
|
|
1003
|
-
|
1004
|
-
|
1005
|
-
|
1006
|
-
|
1007
|
-
sq->field = term->field;
|
1008
|
-
self->data = sq;
|
1372
|
+
SpTQ(self)->term = estrdup(term);
|
1373
|
+
SpQ(self)->field = estrdup(field);
|
1374
|
+
SpQ(self)->get_spans = &spante_new;
|
1375
|
+
SpQ(self)->get_terms = &spantq_get_terms;
|
1009
1376
|
|
1010
|
-
|
1011
|
-
|
1012
|
-
|
1013
|
-
|
1014
|
-
|
1015
|
-
|
1016
|
-
|
1017
|
-
|
1377
|
+
self->type = SPAN_TERM_QUERY;
|
1378
|
+
self->extract_terms = &spantq_extract_terms;
|
1379
|
+
self->to_s = &spantq_to_s;
|
1380
|
+
self->hash = &spantq_hash;
|
1381
|
+
self->eq = &spantq_eq;
|
1382
|
+
self->destroy_i = &spantq_destroy_i;
|
1383
|
+
self->create_weight_i = &spanw_new;
|
1384
|
+
self->get_matchv_i = &spanq_get_matchv_i;
|
1385
|
+
return self;
|
1018
1386
|
}
|
1019
1387
|
|
1020
1388
|
/*****************************************************************************
|
@@ -1023,91 +1391,87 @@ Query *spantq_create(Term *term)
|
|
1023
1391
|
*
|
1024
1392
|
*****************************************************************************/
|
1025
1393
|
|
1026
|
-
char *spanfq_to_s(Query *self, char *field)
|
1394
|
+
static char *spanfq_to_s(Query *self, const char *field)
|
1027
1395
|
{
|
1028
|
-
|
1029
|
-
|
1030
|
-
|
1031
|
-
|
1032
|
-
|
1033
|
-
return res;
|
1396
|
+
Query *match = SpFQ(self)->match;
|
1397
|
+
char *q_str = match->to_s(match, field);
|
1398
|
+
char *res = strfmt("span_first(%s, %d)", q_str, SpFQ(self)->end);
|
1399
|
+
free(q_str);
|
1400
|
+
return res;
|
1034
1401
|
}
|
1035
1402
|
|
1036
|
-
void spanfq_extract_terms(Query *self, HashSet *terms)
|
1403
|
+
static void spanfq_extract_terms(Query *self, HashSet *terms)
|
1037
1404
|
{
|
1038
|
-
|
1039
|
-
sfq->match->extract_terms(sfq->match, terms);
|
1405
|
+
SpFQ(self)->match->extract_terms(SpFQ(self)->match, terms);
|
1040
1406
|
}
|
1041
1407
|
|
1042
|
-
HashSet *spanfq_get_terms(Query *self)
|
1408
|
+
static HashSet *spanfq_get_terms(Query *self)
|
1043
1409
|
{
|
1044
|
-
|
1045
|
-
|
1046
|
-
return match_sq->get_terms(sfq->match);
|
1410
|
+
SpanFirstQuery *sfq = SpFQ(self);
|
1411
|
+
return SpQ(sfq->match)->get_terms(sfq->match);
|
1047
1412
|
}
|
1048
1413
|
|
1049
|
-
Query *spanfq_rewrite(Query *self, IndexReader *ir)
|
1414
|
+
static Query *spanfq_rewrite(Query *self, IndexReader *ir)
|
1050
1415
|
{
|
1051
|
-
|
1052
|
-
Query *q, *rq;
|
1416
|
+
Query *q, *rq;
|
1053
1417
|
|
1054
|
-
|
1055
|
-
|
1056
|
-
|
1057
|
-
|
1418
|
+
q = SpFQ(self)->match;
|
1419
|
+
rq = q->rewrite(q, ir);
|
1420
|
+
q_deref(q);
|
1421
|
+
SpFQ(self)->match = rq;
|
1058
1422
|
|
1059
|
-
|
1060
|
-
|
1423
|
+
self->ref_cnt++;
|
1424
|
+
return self; /* no clauses rewrote */
|
1061
1425
|
}
|
1062
1426
|
|
1063
|
-
void
|
1427
|
+
static void spanfq_destroy_i(Query *self)
|
1064
1428
|
{
|
1065
|
-
|
1066
|
-
|
1067
|
-
if (self->destroy_all) q_deref(sfq->match);
|
1068
|
-
free(sfq);
|
1069
|
-
free(sq);
|
1070
|
-
q_destroy_i(self);
|
1429
|
+
q_deref(SpFQ(self)->match);
|
1430
|
+
spanq_destroy_i(self);
|
1071
1431
|
}
|
1072
1432
|
|
1073
|
-
static
|
1433
|
+
static ulong spanfq_hash(Query *self)
|
1074
1434
|
{
|
1075
|
-
|
1076
|
-
|
1435
|
+
return spanq_hash(self) ^ SpFQ(self)->match->hash(SpFQ(self)->match)
|
1436
|
+
^ SpFQ(self)->end;
|
1077
1437
|
}
|
1078
1438
|
|
1079
1439
|
static int spanfq_eq(Query *self, Query *o)
|
1080
1440
|
{
|
1081
|
-
|
1082
|
-
|
1083
|
-
|
1441
|
+
SpanFirstQuery *sfq1 = SpFQ(self);
|
1442
|
+
SpanFirstQuery *sfq2 = SpFQ(o);
|
1443
|
+
return spanq_eq(self, o) && sfq1->match->eq(sfq1->match, sfq2->match)
|
1444
|
+
&& (sfq1->end == sfq2->end);
|
1084
1445
|
}
|
1085
1446
|
|
1086
|
-
Query *
|
1447
|
+
Query *spanfq_new_nr(Query *match, int end)
|
1087
1448
|
{
|
1088
|
-
|
1449
|
+
Query *self = q_new(SpanFirstQuery);
|
1089
1450
|
|
1090
|
-
|
1451
|
+
SpFQ(self)->match = match;
|
1452
|
+
SpFQ(self)->end = end;
|
1091
1453
|
|
1092
|
-
|
1093
|
-
|
1094
|
-
|
1095
|
-
sq->data = sfq;
|
1454
|
+
SpQ(self)->field = SpQ(match)->field;
|
1455
|
+
SpQ(self)->get_spans = &spanfe_new;
|
1456
|
+
SpQ(self)->get_terms = &spanfq_get_terms;
|
1096
1457
|
|
1097
|
-
|
1098
|
-
|
1099
|
-
|
1100
|
-
|
1458
|
+
self->type = SPAN_FIRST_QUERY;
|
1459
|
+
self->rewrite = &spanfq_rewrite;
|
1460
|
+
self->extract_terms = &spanfq_extract_terms;
|
1461
|
+
self->to_s = &spanfq_to_s;
|
1462
|
+
self->hash = &spanfq_hash;
|
1463
|
+
self->eq = &spanfq_eq;
|
1464
|
+
self->destroy_i = &spanfq_destroy_i;
|
1465
|
+
self->create_weight_i = &spanw_new;
|
1466
|
+
self->get_matchv_i = &spanq_get_matchv_i;
|
1467
|
+
|
1468
|
+
return self;
|
1469
|
+
}
|
1101
1470
|
|
1102
|
-
|
1103
|
-
|
1104
|
-
|
1105
|
-
|
1106
|
-
self->hash = &spanfq_hash;
|
1107
|
-
self->eq = &spanfq_eq;
|
1108
|
-
self->destroy_i = &spanfq_destroy;
|
1109
|
-
self->create_weight_i = &spanw_create;
|
1110
|
-
return self;
|
1471
|
+
Query *spanfq_new(Query *match, int end)
|
1472
|
+
{
|
1473
|
+
REF(match);
|
1474
|
+
return spanfq_new_nr(match, end);
|
1111
1475
|
}
|
1112
1476
|
|
1113
1477
|
/*****************************************************************************
|
@@ -1116,154 +1480,182 @@ Query *spanfq_create(Query *match, int end)
|
|
1116
1480
|
*
|
1117
1481
|
*****************************************************************************/
|
1118
1482
|
|
1119
|
-
char *spanoq_to_s(Query *self, char *field)
|
1120
|
-
{
|
1121
|
-
|
1122
|
-
|
1123
|
-
|
1124
|
-
|
1125
|
-
|
1126
|
-
|
1127
|
-
|
1128
|
-
|
1129
|
-
|
1130
|
-
|
1131
|
-
free(q_str);
|
1132
|
-
}
|
1133
|
-
strcat(res, "]");
|
1483
|
+
static char *spanoq_to_s(Query *self, const char *field)
|
1484
|
+
{
|
1485
|
+
int i;
|
1486
|
+
SpanOrQuery *soq = SpOQ(self);
|
1487
|
+
char *res, *res_p;
|
1488
|
+
char **q_strs = ALLOC_N(char *, soq->c_cnt);
|
1489
|
+
int len = 50;
|
1490
|
+
for (i = 0; i < soq->c_cnt; i++) {
|
1491
|
+
Query *clause = soq->clauses[i];
|
1492
|
+
q_strs[i] = clause->to_s(clause, field);
|
1493
|
+
len += strlen(q_strs[i]) + 2;
|
1494
|
+
}
|
1134
1495
|
|
1135
|
-
|
1496
|
+
res_p = res = ALLOC_N(char, len);
|
1497
|
+
sprintf(res_p, "span_or[ ");
|
1498
|
+
res_p += strlen(res_p);
|
1499
|
+
for (i = 0; i < soq->c_cnt; i++) {
|
1500
|
+
sprintf(res_p, "%s, ", q_strs[i]);
|
1501
|
+
free(q_strs[i]);
|
1502
|
+
res_p += strlen(res_p);
|
1503
|
+
}
|
1504
|
+
free(q_strs);
|
1505
|
+
|
1506
|
+
sprintf(res_p - 2, " ]");
|
1507
|
+
return res;
|
1136
1508
|
}
|
1137
1509
|
|
1138
|
-
void spanoq_extract_terms(Query *self, HashSet *terms)
|
1510
|
+
static void spanoq_extract_terms(Query *self, HashSet *terms)
|
1139
1511
|
{
|
1140
|
-
|
1141
|
-
|
1142
|
-
|
1143
|
-
|
1144
|
-
|
1145
|
-
|
1146
|
-
}
|
1512
|
+
SpanOrQuery *soq = SpOQ(self);
|
1513
|
+
int i;
|
1514
|
+
for (i = 0; i < soq->c_cnt; i++) {
|
1515
|
+
Query *clause = soq->clauses[i];
|
1516
|
+
clause->extract_terms(clause, terms);
|
1517
|
+
}
|
1147
1518
|
}
|
1148
1519
|
|
1149
|
-
HashSet *spanoq_get_terms(Query *self)
|
1520
|
+
static HashSet *spanoq_get_terms(Query *self)
|
1150
1521
|
{
|
1151
|
-
|
1152
|
-
|
1153
|
-
|
1154
|
-
|
1155
|
-
|
1156
|
-
|
1157
|
-
|
1158
|
-
|
1522
|
+
SpanOrQuery *soq = SpOQ(self);
|
1523
|
+
HashSet *terms = hs_new_str(&free);
|
1524
|
+
int i;
|
1525
|
+
for (i = 0; i < soq->c_cnt; i++) {
|
1526
|
+
Query *clause = soq->clauses[i];
|
1527
|
+
HashSet *sub_terms = SpQ(clause)->get_terms(clause);
|
1528
|
+
hs_merge(terms, sub_terms);
|
1529
|
+
}
|
1159
1530
|
|
1160
|
-
|
1531
|
+
return terms;
|
1161
1532
|
}
|
1162
1533
|
|
1163
|
-
SpanEnum *spanoq_get_spans(Query *self, IndexReader *ir)
|
1534
|
+
static SpanEnum *spanoq_get_spans(Query *self, IndexReader *ir)
|
1164
1535
|
{
|
1165
|
-
|
1166
|
-
|
1167
|
-
|
1168
|
-
|
1169
|
-
|
1170
|
-
}
|
1536
|
+
SpanOrQuery *soq = SpOQ(self);
|
1537
|
+
if (soq->c_cnt == 1) {
|
1538
|
+
Query *q = soq->clauses[0];
|
1539
|
+
return SpQ(q)->get_spans(q, ir);
|
1540
|
+
}
|
1171
1541
|
|
1172
|
-
|
1542
|
+
return spanoe_new(self, ir);
|
1173
1543
|
}
|
1174
1544
|
|
1175
|
-
Query *spanoq_rewrite(Query *self, IndexReader *ir)
|
1545
|
+
static Query *spanoq_rewrite(Query *self, IndexReader *ir)
|
1176
1546
|
{
|
1177
|
-
|
1178
|
-
|
1179
|
-
int i;
|
1180
|
-
/* replace clauses with their rewritten queries */
|
1181
|
-
for (i = 0; i < soq->c_cnt; i++) {
|
1182
|
-
clause = soq->clauses[i];
|
1183
|
-
rewritten = clause->rewrite(clause, ir);
|
1184
|
-
if ((rewritten == clause) || self->destroy_all) q_deref(clause);
|
1185
|
-
soq->clauses[i] = rewritten;
|
1186
|
-
}
|
1547
|
+
SpanOrQuery *soq = SpOQ(self);
|
1548
|
+
int i;
|
1187
1549
|
|
1188
|
-
|
1189
|
-
|
1550
|
+
/* replace clauses with their rewritten queries */
|
1551
|
+
for (i = 0; i < soq->c_cnt; i++) {
|
1552
|
+
Query *clause = soq->clauses[i];
|
1553
|
+
Query *rewritten = clause->rewrite(clause, ir);
|
1554
|
+
q_deref(clause);
|
1555
|
+
soq->clauses[i] = rewritten;
|
1556
|
+
}
|
1557
|
+
|
1558
|
+
self->ref_cnt++;
|
1559
|
+
return self;
|
1190
1560
|
}
|
1191
1561
|
|
1192
|
-
void
|
1562
|
+
static void spanoq_destroy_i(Query *self)
|
1193
1563
|
{
|
1194
|
-
|
1195
|
-
SpanOrQuery *soq = (SpanOrQuery *)sq->data;
|
1564
|
+
SpanOrQuery *soq = SpOQ(self);
|
1196
1565
|
|
1197
|
-
if (self->destroy_all) {
|
1198
|
-
Query *clause;
|
1199
1566
|
int i;
|
1200
1567
|
for (i = 0; i < soq->c_cnt; i++) {
|
1201
|
-
|
1202
|
-
|
1568
|
+
Query *clause = soq->clauses[i];
|
1569
|
+
q_deref(clause);
|
1203
1570
|
}
|
1204
1571
|
free(soq->clauses);
|
1205
|
-
}
|
1206
1572
|
|
1207
|
-
|
1208
|
-
free(soq);
|
1209
|
-
free(sq);
|
1210
|
-
q_destroy_i(self);
|
1573
|
+
spanq_destroy_i(self);
|
1211
1574
|
}
|
1212
1575
|
|
1213
|
-
static
|
1576
|
+
static ulong spanoq_hash(Query *self)
|
1214
1577
|
{
|
1215
|
-
|
1216
|
-
|
1217
|
-
|
1218
|
-
SpanOrQuery *soq = (SpanOrQuery *)((SpanQuery *)self->data)->data;
|
1578
|
+
int i;
|
1579
|
+
ulong hash = spanq_hash(self);
|
1580
|
+
SpanOrQuery *soq = SpOQ(self);
|
1219
1581
|
|
1220
|
-
|
1221
|
-
|
1222
|
-
|
1223
|
-
|
1224
|
-
|
1582
|
+
for (i = 0; i < soq->c_cnt; i++) {
|
1583
|
+
Query *q = soq->clauses[i];
|
1584
|
+
hash ^= q->hash(q);
|
1585
|
+
}
|
1586
|
+
return hash;
|
1225
1587
|
}
|
1226
1588
|
|
1227
1589
|
static int spanoq_eq(Query *self, Query *o)
|
1228
1590
|
{
|
1229
|
-
|
1230
|
-
|
1231
|
-
|
1232
|
-
|
1233
|
-
|
1234
|
-
|
1235
|
-
|
1236
|
-
|
1237
|
-
|
1238
|
-
|
1239
|
-
|
1591
|
+
int i;
|
1592
|
+
Query *q1, *q2;
|
1593
|
+
SpanOrQuery *soq1 = SpOQ(self);
|
1594
|
+
SpanOrQuery *soq2 = SpOQ(o);
|
1595
|
+
|
1596
|
+
if (!spanq_eq(self, o) || soq1->c_cnt != soq2->c_cnt) {
|
1597
|
+
return false;
|
1598
|
+
}
|
1599
|
+
for (i = 0; i < soq1->c_cnt; i++) {
|
1600
|
+
q1 = soq1->clauses[i];
|
1601
|
+
q2 = soq2->clauses[i];
|
1602
|
+
if (!q1->eq(q1, q2)) {
|
1603
|
+
return false;
|
1604
|
+
}
|
1605
|
+
}
|
1606
|
+
return true;
|
1240
1607
|
}
|
1241
1608
|
|
1242
|
-
Query *
|
1609
|
+
Query *spanoq_new()
|
1243
1610
|
{
|
1244
|
-
|
1611
|
+
Query *self = q_new(SpanOrQuery);
|
1612
|
+
SpOQ(self)->clauses = ALLOC_N(Query *, CLAUSE_INIT_CAPA);
|
1613
|
+
SpOQ(self)->c_capa = CLAUSE_INIT_CAPA;
|
1614
|
+
|
1615
|
+
SpQ(self)->field = (char *)EMPTY_STRING;
|
1616
|
+
SpQ(self)->get_spans = &spanoq_get_spans;
|
1617
|
+
SpQ(self)->get_terms = &spanoq_get_terms;
|
1245
1618
|
|
1246
|
-
|
1619
|
+
self->type = SPAN_OR_QUERY;
|
1620
|
+
self->rewrite = &spanoq_rewrite;
|
1621
|
+
self->extract_terms = &spanoq_extract_terms;
|
1622
|
+
self->to_s = &spanoq_to_s;
|
1623
|
+
self->hash = &spanoq_hash;
|
1624
|
+
self->eq = &spanoq_eq;
|
1625
|
+
self->destroy_i = &spanoq_destroy_i;
|
1626
|
+
self->create_weight_i = &spanw_new;
|
1627
|
+
self->get_matchv_i = &spanq_get_matchv_i;
|
1247
1628
|
|
1248
|
-
|
1249
|
-
|
1250
|
-
soq->c_cnt = c_cnt;
|
1251
|
-
sq->data = soq;
|
1629
|
+
return self;
|
1630
|
+
}
|
1252
1631
|
|
1253
|
-
|
1254
|
-
|
1255
|
-
|
1256
|
-
|
1632
|
+
Query *spanoq_add_clause_nr(Query *self, Query *clause)
|
1633
|
+
{
|
1634
|
+
const int curr_index = SpOQ(self)->c_cnt++;
|
1635
|
+
if (clause->type < SPAN_TERM_QUERY || clause->type > SPAN_NEAR_QUERY) {
|
1636
|
+
RAISE(ARG_ERROR, "Tried to add a %s to a SpanOrQuery. This is not a "
|
1637
|
+
"SpanQuery.", q_get_query_name(clause->type));
|
1638
|
+
}
|
1639
|
+
if (curr_index == 0) {
|
1640
|
+
SpQ(self)->field = SpQ(clause)->field;
|
1641
|
+
}
|
1642
|
+
else if (strcmp(SpQ(self)->field, SpQ(clause)->field) != 0) {
|
1643
|
+
RAISE(ARG_ERROR, "All clauses in a SpanQuery must have the same field. "
|
1644
|
+
"Attempted to add a SpanQuery with field \"%s\" to a SpanOrQuery "
|
1645
|
+
"with field \"%s\"", SpQ(clause)->field, SpQ(self)->field);
|
1646
|
+
}
|
1647
|
+
if (curr_index >= SpOQ(self)->c_capa) {
|
1648
|
+
SpOQ(self)->c_capa <<= 1;
|
1649
|
+
REALLOC_N(SpOQ(self)->clauses, Query *, SpOQ(self)->c_capa);
|
1650
|
+
}
|
1651
|
+
SpOQ(self)->clauses[curr_index] = clause;
|
1652
|
+
return clause;
|
1653
|
+
}
|
1257
1654
|
|
1258
|
-
|
1259
|
-
|
1260
|
-
|
1261
|
-
|
1262
|
-
self->hash = &spanoq_hash;
|
1263
|
-
self->eq = &spanoq_eq;
|
1264
|
-
self->destroy_i = &spanoq_destroy;
|
1265
|
-
self->create_weight_i = &spanw_create;
|
1266
|
-
return self;
|
1655
|
+
Query *spanoq_add_clause(Query *self, Query *clause)
|
1656
|
+
{
|
1657
|
+
REF(clause);
|
1658
|
+
return spanoq_add_clause_nr(self, clause);
|
1267
1659
|
}
|
1268
1660
|
|
1269
1661
|
/*****************************************************************************
|
@@ -1272,163 +1664,188 @@ Query *spanoq_create(Query **clauses, int c_cnt)
|
|
1272
1664
|
*
|
1273
1665
|
*****************************************************************************/
|
1274
1666
|
|
1275
|
-
char *spannq_to_s(Query *self, char *field)
|
1276
|
-
{
|
1277
|
-
|
1278
|
-
|
1279
|
-
|
1280
|
-
|
1281
|
-
|
1282
|
-
|
1283
|
-
|
1284
|
-
|
1285
|
-
|
1286
|
-
|
1287
|
-
free(q_str);
|
1288
|
-
}
|
1289
|
-
REALLOC_N(res, char, strlen(res) + 40);
|
1290
|
-
sprintf(res + strlen(res), "], %d, %s)", snq->slop,
|
1291
|
-
snq->in_order ? "Ordered" : "Unordered");
|
1667
|
+
static char *spannq_to_s(Query *self, const char *field)
|
1668
|
+
{
|
1669
|
+
int i;
|
1670
|
+
SpanNearQuery *snq = SpNQ(self);
|
1671
|
+
char *res, *res_p;
|
1672
|
+
char **q_strs = ALLOC_N(char *, snq->c_cnt);
|
1673
|
+
int len = 50;
|
1674
|
+
for (i = 0; i < snq->c_cnt; i++) {
|
1675
|
+
Query *clause = snq->clauses[i];
|
1676
|
+
q_strs[i] = clause->to_s(clause, field);
|
1677
|
+
len += strlen(q_strs[i]);
|
1678
|
+
}
|
1292
1679
|
|
1293
|
-
|
1680
|
+
res_p = res = ALLOC_N(char, len);
|
1681
|
+
sprintf(res_p, "span_near[ ");
|
1682
|
+
res_p += strlen(res_p);
|
1683
|
+
for (i = 0; i < snq->c_cnt; i++) {
|
1684
|
+
sprintf(res_p, "%s, ", q_strs[i]);
|
1685
|
+
free(q_strs[i]);
|
1686
|
+
res_p += strlen(res_p);
|
1687
|
+
}
|
1688
|
+
free(q_strs);
|
1689
|
+
|
1690
|
+
sprintf(res_p - 2, " ]");
|
1691
|
+
return res;
|
1294
1692
|
}
|
1295
1693
|
|
1296
|
-
void spannq_extract_terms(Query *self, HashSet *terms)
|
1694
|
+
static void spannq_extract_terms(Query *self, HashSet *terms)
|
1297
1695
|
{
|
1298
|
-
|
1299
|
-
|
1300
|
-
|
1301
|
-
|
1302
|
-
|
1303
|
-
|
1304
|
-
}
|
1696
|
+
SpanNearQuery *snq = SpNQ(self);
|
1697
|
+
int i;
|
1698
|
+
for (i = 0; i < snq->c_cnt; i++) {
|
1699
|
+
Query *clause = snq->clauses[i];
|
1700
|
+
clause->extract_terms(clause, terms);
|
1701
|
+
}
|
1305
1702
|
}
|
1306
1703
|
|
1307
|
-
HashSet *spannq_get_terms(Query *self)
|
1704
|
+
static HashSet *spannq_get_terms(Query *self)
|
1308
1705
|
{
|
1309
|
-
|
1310
|
-
|
1311
|
-
|
1312
|
-
|
1313
|
-
|
1314
|
-
|
1315
|
-
|
1316
|
-
|
1706
|
+
SpanNearQuery *snq = SpNQ(self);
|
1707
|
+
HashSet *terms = hs_new_str(&free);
|
1708
|
+
int i;
|
1709
|
+
for (i = 0; i < snq->c_cnt; i++) {
|
1710
|
+
Query *clause = snq->clauses[i];
|
1711
|
+
HashSet *sub_terms = SpQ(clause)->get_terms(clause);
|
1712
|
+
hs_merge(terms, sub_terms);
|
1713
|
+
}
|
1317
1714
|
|
1318
|
-
|
1715
|
+
return terms;
|
1319
1716
|
}
|
1320
1717
|
|
1321
|
-
SpanEnum *spannq_get_spans(Query *self, IndexReader *ir)
|
1718
|
+
static SpanEnum *spannq_get_spans(Query *self, IndexReader *ir)
|
1322
1719
|
{
|
1323
|
-
|
1324
|
-
Query *q;
|
1720
|
+
SpanNearQuery *snq = SpNQ(self);
|
1325
1721
|
|
1326
|
-
|
1327
|
-
|
1328
|
-
|
1329
|
-
|
1722
|
+
if (snq->c_cnt == 1) {
|
1723
|
+
Query *q = snq->clauses[0];
|
1724
|
+
return SpQ(q)->get_spans(q, ir);
|
1725
|
+
}
|
1330
1726
|
|
1331
|
-
|
1727
|
+
return spanne_new(self, ir);
|
1332
1728
|
}
|
1333
1729
|
|
1334
|
-
Query *spannq_rewrite(Query *self, IndexReader *ir)
|
1730
|
+
static Query *spannq_rewrite(Query *self, IndexReader *ir)
|
1335
1731
|
{
|
1336
|
-
|
1337
|
-
|
1338
|
-
|
1339
|
-
|
1340
|
-
|
1341
|
-
|
1342
|
-
|
1343
|
-
|
1344
|
-
}
|
1732
|
+
SpanNearQuery *snq = SpNQ(self);
|
1733
|
+
int i;
|
1734
|
+
for (i = 0; i < snq->c_cnt; i++) {
|
1735
|
+
Query *clause = snq->clauses[i];
|
1736
|
+
Query *rewritten = clause->rewrite(clause, ir);
|
1737
|
+
q_deref(clause);
|
1738
|
+
snq->clauses[i] = rewritten;
|
1739
|
+
}
|
1345
1740
|
|
1346
|
-
|
1347
|
-
|
1741
|
+
self->ref_cnt++;
|
1742
|
+
return self;
|
1348
1743
|
}
|
1349
1744
|
|
1350
|
-
void spannq_destroy(Query *self)
|
1745
|
+
static void spannq_destroy(Query *self)
|
1351
1746
|
{
|
1352
|
-
|
1353
|
-
SpanNearQuery *snq = (SpanNearQuery *)sq->data;
|
1747
|
+
SpanNearQuery *snq = SpNQ(self);
|
1354
1748
|
|
1355
|
-
if (self->destroy_all) {
|
1356
|
-
Query *clause;
|
1357
1749
|
int i;
|
1358
1750
|
for (i = 0; i < snq->c_cnt; i++) {
|
1359
|
-
|
1360
|
-
|
1751
|
+
Query *clause = snq->clauses[i];
|
1752
|
+
q_deref(clause);
|
1361
1753
|
}
|
1362
1754
|
free(snq->clauses);
|
1363
|
-
}
|
1364
1755
|
|
1365
|
-
|
1366
|
-
free(sq);
|
1367
|
-
q_destroy_i(self);
|
1756
|
+
spanq_destroy_i(self);
|
1368
1757
|
}
|
1369
1758
|
|
1370
|
-
static
|
1759
|
+
static ulong spannq_hash(Query *self)
|
1371
1760
|
{
|
1372
|
-
|
1373
|
-
|
1374
|
-
|
1375
|
-
SpanNearQuery *snq = (SpanNearQuery *)((SpanQuery *)self->data)->data;
|
1761
|
+
int i;
|
1762
|
+
ulong hash = spanq_hash(self);
|
1763
|
+
SpanNearQuery *snq = SpNQ(self);
|
1376
1764
|
|
1377
|
-
|
1378
|
-
|
1379
|
-
|
1380
|
-
|
1381
|
-
|
1765
|
+
for (i = 0; i < snq->c_cnt; i++) {
|
1766
|
+
Query *q = snq->clauses[i];
|
1767
|
+
hash ^= q->hash(q);
|
1768
|
+
}
|
1769
|
+
return ((hash ^ snq->slop) << 1) | snq->in_order;
|
1382
1770
|
}
|
1383
1771
|
|
1384
1772
|
static int spannq_eq(Query *self, Query *o)
|
1385
1773
|
{
|
1386
|
-
|
1387
|
-
|
1388
|
-
|
1389
|
-
|
1390
|
-
|
1391
|
-
|
1392
|
-
|
1393
|
-
|
1394
|
-
|
1774
|
+
int i;
|
1775
|
+
Query *q1, *q2;
|
1776
|
+
SpanNearQuery *snq1 = SpNQ(self);
|
1777
|
+
SpanNearQuery *snq2 = SpNQ(o);
|
1778
|
+
if (! spanq_eq(self, o)
|
1779
|
+
|| (snq1->c_cnt != snq2->c_cnt)
|
1780
|
+
|| (snq1->slop != snq2->slop)
|
1781
|
+
|| (snq1->in_order != snq2->in_order)) {
|
1782
|
+
return false;
|
1783
|
+
}
|
1395
1784
|
|
1396
|
-
|
1397
|
-
|
1398
|
-
|
1399
|
-
|
1400
|
-
|
1785
|
+
for (i = 0; i < snq1->c_cnt; i++) {
|
1786
|
+
q1 = snq1->clauses[i];
|
1787
|
+
q2 = snq2->clauses[i];
|
1788
|
+
if (!q1->eq(q1, q2)) {
|
1789
|
+
return false;
|
1790
|
+
}
|
1791
|
+
}
|
1401
1792
|
|
1402
|
-
|
1793
|
+
return true;
|
1403
1794
|
}
|
1404
1795
|
|
1405
|
-
Query *
|
1796
|
+
Query *spannq_new(int slop, bool in_order)
|
1406
1797
|
{
|
1407
|
-
|
1798
|
+
Query *self = q_new(SpanNearQuery);
|
1408
1799
|
|
1409
|
-
|
1800
|
+
SpNQ(self)->clauses = ALLOC_N(Query *, CLAUSE_INIT_CAPA);
|
1801
|
+
SpNQ(self)->c_capa = CLAUSE_INIT_CAPA;
|
1802
|
+
SpNQ(self)->slop = slop;
|
1803
|
+
SpNQ(self)->in_order = in_order;
|
1410
1804
|
|
1411
|
-
|
1412
|
-
|
1413
|
-
|
1414
|
-
snq->slop = slop;
|
1415
|
-
snq->in_order = in_order;
|
1416
|
-
sq->data = snq;
|
1805
|
+
SpQ(self)->get_spans = &spannq_get_spans;
|
1806
|
+
SpQ(self)->get_terms = &spannq_get_terms;
|
1807
|
+
SpQ(self)->field = (char *)EMPTY_STRING;
|
1417
1808
|
|
1418
|
-
|
1419
|
-
|
1420
|
-
|
1421
|
-
|
1809
|
+
self->type = SPAN_NEAR_QUERY;
|
1810
|
+
self->rewrite = &spannq_rewrite;
|
1811
|
+
self->extract_terms = &spannq_extract_terms;
|
1812
|
+
self->to_s = &spannq_to_s;
|
1813
|
+
self->hash = &spannq_hash;
|
1814
|
+
self->eq = &spannq_eq;
|
1815
|
+
self->destroy_i = &spannq_destroy;
|
1816
|
+
self->create_weight_i = &spanw_new;
|
1817
|
+
self->get_matchv_i = &spanq_get_matchv_i;
|
1422
1818
|
|
1423
|
-
|
1424
|
-
|
1425
|
-
|
1426
|
-
|
1427
|
-
|
1428
|
-
|
1429
|
-
|
1430
|
-
|
1431
|
-
|
1819
|
+
return self;
|
1820
|
+
}
|
1821
|
+
|
1822
|
+
Query *spannq_add_clause_nr(Query *self, Query *clause)
|
1823
|
+
{
|
1824
|
+
const int curr_index = SpNQ(self)->c_cnt++;
|
1825
|
+
if (clause->type < SPAN_TERM_QUERY || clause->type > SPAN_NEAR_QUERY) {
|
1826
|
+
RAISE(ARG_ERROR, "Tried to add a %s to a SpanNearQuery. This is not a "
|
1827
|
+
"SpanQuery.", q_get_query_name(clause->type));
|
1828
|
+
}
|
1829
|
+
if (curr_index == 0) {
|
1830
|
+
SpQ(self)->field = SpQ(clause)->field;
|
1831
|
+
}
|
1832
|
+
else if (strcmp(SpQ(self)->field, SpQ(clause)->field) != 0) {
|
1833
|
+
RAISE(ARG_ERROR, "All clauses in a SpanQuery must have the same field. "
|
1834
|
+
"Attempted to add a SpanQuery with field \"%s\" to SpanNearQuery "
|
1835
|
+
"with field \"%s\"", SpQ(clause)->field, SpQ(self)->field);
|
1836
|
+
}
|
1837
|
+
if (curr_index >= SpNQ(self)->c_capa) {
|
1838
|
+
SpNQ(self)->c_capa <<= 1;
|
1839
|
+
REALLOC_N(SpNQ(self)->clauses, Query *, SpNQ(self)->c_capa);
|
1840
|
+
}
|
1841
|
+
SpNQ(self)->clauses[curr_index] = clause;
|
1842
|
+
return clause;
|
1843
|
+
}
|
1844
|
+
|
1845
|
+
Query *spannq_add_clause(Query *self, Query *clause)
|
1846
|
+
{
|
1847
|
+
REF(clause);
|
1848
|
+
return spannq_add_clause_nr(self, clause);
|
1432
1849
|
}
|
1433
1850
|
|
1434
1851
|
/*****************************************************************************
|
@@ -1437,213 +1854,110 @@ Query *spannq_create(Query **clauses, int c_cnt, int slop, bool in_order)
|
|
1437
1854
|
*
|
1438
1855
|
*****************************************************************************/
|
1439
1856
|
|
1440
|
-
char *spanxq_to_s(Query *self, char *field)
|
1857
|
+
static char *spanxq_to_s(Query *self, const char *field)
|
1441
1858
|
{
|
1442
|
-
|
1443
|
-
|
1444
|
-
|
1445
|
-
|
1446
|
-
|
1447
|
-
|
1448
|
-
|
1449
|
-
|
1859
|
+
SpanNotQuery *sxq = SpXQ(self);
|
1860
|
+
char *inc_s = sxq->inc->to_s(sxq->inc, field);
|
1861
|
+
char *exc_s = sxq->exc->to_s(sxq->exc, field);
|
1862
|
+
char *res = strfmt("span_not(inc:<%s>, exc:<%s>)", inc_s, exc_s);
|
1863
|
+
|
1864
|
+
free(inc_s);
|
1865
|
+
free(exc_s);
|
1866
|
+
return res;
|
1450
1867
|
}
|
1451
1868
|
|
1452
|
-
void spanxq_extract_terms(Query *self, HashSet *terms)
|
1869
|
+
static void spanxq_extract_terms(Query *self, HashSet *terms)
|
1453
1870
|
{
|
1454
|
-
|
1455
|
-
sxq->inc->extract_terms(sxq->inc, terms);
|
1871
|
+
SpXQ(self)->inc->extract_terms(SpXQ(self)->inc, terms);
|
1456
1872
|
}
|
1457
1873
|
|
1458
|
-
HashSet *spanxq_get_terms(Query *self)
|
1874
|
+
static HashSet *spanxq_get_terms(Query *self)
|
1459
1875
|
{
|
1460
|
-
|
1461
|
-
HashSet *terms = term_set_create();
|
1462
|
-
sxq->inc->extract_terms(sxq->inc, terms);
|
1463
|
-
return terms;
|
1876
|
+
return SpQ(SpXQ(self)->inc)->get_terms(SpXQ(self)->inc);
|
1464
1877
|
}
|
1465
1878
|
|
1466
|
-
Query *spanxq_rewrite(Query *self, IndexReader *ir)
|
1879
|
+
static Query *spanxq_rewrite(Query *self, IndexReader *ir)
|
1467
1880
|
{
|
1468
|
-
|
1469
|
-
|
1881
|
+
SpanNotQuery *sxq = SpXQ(self);
|
1882
|
+
Query *q, *rq;
|
1470
1883
|
|
1471
|
-
|
1472
|
-
|
1473
|
-
|
1474
|
-
|
1475
|
-
|
1884
|
+
/* rewrite inclusive query */
|
1885
|
+
q = sxq->inc;
|
1886
|
+
rq = q->rewrite(q, ir);
|
1887
|
+
q_deref(q);
|
1888
|
+
sxq->inc = rq;
|
1476
1889
|
|
1477
|
-
|
1478
|
-
|
1479
|
-
|
1480
|
-
|
1481
|
-
|
1890
|
+
/* rewrite exclusive query */
|
1891
|
+
q = sxq->exc;
|
1892
|
+
rq = q->rewrite(q, ir);
|
1893
|
+
q_deref(q);
|
1894
|
+
sxq->exc = rq;
|
1482
1895
|
|
1483
|
-
|
1484
|
-
|
1896
|
+
self->ref_cnt++;
|
1897
|
+
return self;
|
1485
1898
|
}
|
1486
1899
|
|
1487
|
-
void spanxq_destroy(Query *self)
|
1900
|
+
static void spanxq_destroy(Query *self)
|
1488
1901
|
{
|
1489
|
-
|
1490
|
-
SpanNotQuery *sxq = (SpanNotQuery *)sq->data;
|
1902
|
+
SpanNotQuery *sxq = SpXQ(self);
|
1491
1903
|
|
1492
|
-
if (self->destroy_all) {
|
1493
1904
|
q_deref(sxq->inc);
|
1494
1905
|
q_deref(sxq->exc);
|
1495
|
-
}
|
1496
1906
|
|
1497
|
-
|
1498
|
-
free(sq);
|
1499
|
-
q_destroy_i(self);
|
1907
|
+
spanq_destroy_i(self);
|
1500
1908
|
}
|
1501
1909
|
|
1502
|
-
static
|
1910
|
+
static ulong spanxq_hash(Query *self)
|
1503
1911
|
{
|
1504
|
-
|
1505
|
-
|
1912
|
+
SpanNotQuery *sxq = SpXQ(self);
|
1913
|
+
return spanq_hash(self) ^ sxq->inc->hash(sxq->inc)
|
1914
|
+
^ sxq->exc->hash(sxq->exc);
|
1506
1915
|
}
|
1507
1916
|
|
1508
1917
|
static int spanxq_eq(Query *self, Query *o)
|
1509
1918
|
{
|
1510
|
-
|
1511
|
-
|
1512
|
-
|
1513
|
-
|
1514
|
-
}
|
1515
|
-
|
1516
|
-
|
1517
|
-
Query *spanxq_create(Query *inc, Query *exc)
|
1518
|
-
{
|
1519
|
-
Query *self = q_create();
|
1520
|
-
|
1521
|
-
SpanQuery *sq = ALLOC(SpanQuery);
|
1522
|
-
|
1523
|
-
SpanNotQuery *sxq = ALLOC(SpanNotQuery);
|
1524
|
-
sxq->inc = inc;
|
1525
|
-
sxq->exc = exc;
|
1526
|
-
sq->data = sxq;
|
1527
|
-
|
1528
|
-
sq->get_spans = &spanxe_create;
|
1529
|
-
sq->get_terms = &spanxq_get_terms;
|
1530
|
-
sq->field = ((SpanQuery *)inc->data)->field;
|
1531
|
-
self->data = sq;
|
1532
|
-
|
1533
|
-
self->type = SPAN_NOT_QUERY;
|
1534
|
-
self->rewrite = &spanxq_rewrite;
|
1535
|
-
self->extract_terms = &spanxq_extract_terms;
|
1536
|
-
self->to_s = &spanxq_to_s;
|
1537
|
-
self->hash = &spanxq_hash;
|
1538
|
-
self->eq = &spanxq_eq;
|
1539
|
-
self->destroy_i = &spanxq_destroy;
|
1540
|
-
self->create_weight_i = &spanw_create;
|
1541
|
-
|
1542
|
-
return self;
|
1543
|
-
}
|
1544
|
-
|
1545
|
-
/***************************************************************************
|
1546
|
-
*
|
1547
|
-
* SpanScorer
|
1548
|
-
*
|
1549
|
-
***************************************************************************/
|
1550
|
-
|
1551
|
-
float spansc_score(Scorer *self)
|
1552
|
-
{
|
1553
|
-
SpanScorer *spansc = (SpanScorer *)self->data;
|
1554
|
-
float raw = sim_tf(spansc->sim, spansc->freq) * spansc->value;
|
1555
|
-
|
1556
|
-
/* normalize */
|
1557
|
-
return raw * sim_decode_norm(self->similarity, spansc->norms[self->doc]);
|
1919
|
+
SpanNotQuery *sxq1 = SpXQ(self);
|
1920
|
+
SpanNotQuery *sxq2 = SpXQ(o);
|
1921
|
+
return spanq_eq(self, o) && sxq1->inc->eq(sxq1->inc, sxq2->inc)
|
1922
|
+
&& sxq1->exc->eq(sxq1->exc, sxq2->exc);
|
1558
1923
|
}
|
1559
1924
|
|
1560
|
-
bool spansc_next(Scorer *self)
|
1561
|
-
{
|
1562
|
-
SpanScorer *spansc = (SpanScorer *)self->data;
|
1563
|
-
SpanEnum *se = spansc->spans;
|
1564
|
-
int match_length;
|
1565
|
-
|
1566
|
-
if (spansc->first_time) {
|
1567
|
-
spansc->more = se->next(se);
|
1568
|
-
spansc->first_time = false;
|
1569
|
-
}
|
1570
|
-
|
1571
|
-
if (!spansc->more) return false;
|
1572
|
-
|
1573
|
-
spansc->freq = 0.0;
|
1574
|
-
self->doc = se->doc(se);
|
1575
|
-
|
1576
|
-
while (spansc->more && (self->doc == se->doc(se))) {
|
1577
|
-
match_length = se->end(se) - se->start(se);
|
1578
|
-
spansc->freq += sim_sloppy_freq(spansc->sim, match_length);
|
1579
|
-
spansc->more = se->next(se);
|
1580
|
-
}
|
1581
|
-
|
1582
|
-
return (spansc->more || (spansc->freq != 0.0));
|
1583
|
-
}
|
1584
1925
|
|
1585
|
-
|
1926
|
+
Query *spanxq_new_nr(Query *inc, Query *exc)
|
1586
1927
|
{
|
1587
|
-
|
1588
|
-
|
1589
|
-
|
1590
|
-
|
1928
|
+
Query *self;
|
1929
|
+
if (strcmp(SpQ(inc)->field, SpQ(inc)->field) != 0) {
|
1930
|
+
RAISE(ARG_ERROR, "All clauses in a SpanQuery must have the same field. "
|
1931
|
+
"Attempted to add a SpanQuery with field \"%s\" along with a "
|
1932
|
+
"SpanQuery with field \"%s\" to an SpanNotQuery",
|
1933
|
+
SpQ(inc)->field, SpQ(exc)->field);
|
1934
|
+
}
|
1935
|
+
self = q_new(SpanNotQuery);
|
1591
1936
|
|
1592
|
-
|
1937
|
+
SpXQ(self)->inc = inc;
|
1938
|
+
SpXQ(self)->exc = exc;
|
1593
1939
|
|
1594
|
-
|
1595
|
-
|
1940
|
+
SpQ(self)->field = SpQ(inc)->field;
|
1941
|
+
SpQ(self)->get_spans = &spanxe_new;
|
1942
|
+
SpQ(self)->get_terms = &spanxq_get_terms;
|
1596
1943
|
|
1597
|
-
|
1598
|
-
|
1599
|
-
|
1600
|
-
|
1944
|
+
self->type = SPAN_NOT_QUERY;
|
1945
|
+
self->rewrite = &spanxq_rewrite;
|
1946
|
+
self->extract_terms = &spanxq_extract_terms;
|
1947
|
+
self->to_s = &spanxq_to_s;
|
1948
|
+
self->hash = &spanxq_hash;
|
1949
|
+
self->eq = &spanxq_eq;
|
1950
|
+
self->destroy_i = &spanxq_destroy;
|
1951
|
+
self->create_weight_i = &spanw_new;
|
1952
|
+
self->get_matchv_i = &spanq_get_matchv_i;
|
1601
1953
|
|
1602
|
-
|
1954
|
+
return self;
|
1603
1955
|
}
|
1604
1956
|
|
1605
|
-
|
1957
|
+
Query *spanxq_new(Query *inc, Query *exc)
|
1606
1958
|
{
|
1607
|
-
|
1608
|
-
|
1609
|
-
|
1610
|
-
self->skip_to(self, target);
|
1611
|
-
phrase_freq = (self->doc == target) ? spansc->freq : (float)0.0;
|
1612
|
-
|
1613
|
-
tf_explanation = expl_create(sim_tf(self->similarity, phrase_freq),
|
1614
|
-
strfmt("tf(phrase_freq(%f)", phrase_freq));
|
1615
|
-
|
1616
|
-
return tf_explanation;
|
1959
|
+
REF(inc);
|
1960
|
+
REF(exc);
|
1961
|
+
return spanxq_new_nr(inc, exc);
|
1617
1962
|
}
|
1618
1963
|
|
1619
|
-
void spansc_destroy(Scorer *self)
|
1620
|
-
{
|
1621
|
-
SpanScorer *spansc = (SpanScorer *)self->data;
|
1622
|
-
if (spansc->spans) spansc->spans->destroy(spansc->spans);
|
1623
|
-
scorer_destroy_i(self);
|
1624
|
-
}
|
1625
|
-
|
1626
|
-
Scorer *spansc_create(Weight *weight, IndexReader *ir)
|
1627
|
-
{
|
1628
|
-
Scorer *self = scorer_create(weight->similarity);
|
1629
|
-
SpanScorer *spansc = ALLOC(SpanScorer);
|
1630
|
-
SpanQuery *spanq = (SpanQuery *)weight->query->data;
|
1631
|
-
ZEROSET(spansc, SpanScorer, 1);
|
1632
|
-
spansc->first_time = true;
|
1633
|
-
spansc->more = true;
|
1634
|
-
spansc->spans = spanq->get_spans(weight->query, ir);
|
1635
|
-
spansc->sim = weight->similarity;
|
1636
|
-
spansc->norms = ir->get_norms(ir, spanq->field);
|
1637
|
-
spansc->weight = weight;
|
1638
|
-
spansc->value = weight->value;
|
1639
|
-
spansc->freq = 0.0;
|
1640
|
-
|
1641
|
-
self->data = spansc;
|
1642
|
-
|
1643
|
-
self->score = &spansc_score;
|
1644
|
-
self->next = &spansc_next;
|
1645
|
-
self->skip_to = &spansc_skip_to;
|
1646
|
-
self->explain = &spansc_explain;
|
1647
|
-
self->destroy = &spansc_destroy;
|
1648
|
-
return self;
|
1649
|
-
}
|