ferret 0.9.6 → 0.10.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/MIT-LICENSE +1 -1
- data/README +12 -24
- data/Rakefile +38 -54
- data/TODO +14 -17
- data/ext/analysis.c +982 -823
- data/ext/analysis.h +133 -76
- data/ext/array.c +96 -58
- data/ext/array.h +40 -13
- data/ext/bitvector.c +476 -118
- data/ext/bitvector.h +264 -22
- data/ext/compound_io.c +217 -229
- data/ext/defines.h +49 -0
- data/ext/document.c +107 -317
- data/ext/document.h +31 -65
- data/ext/except.c +81 -36
- data/ext/except.h +117 -55
- data/ext/extconf.rb +2 -9
- data/ext/ferret.c +211 -104
- data/ext/ferret.h +22 -11
- data/ext/filter.c +97 -82
- data/ext/fs_store.c +348 -367
- data/ext/global.c +226 -188
- data/ext/global.h +44 -26
- data/ext/hash.c +474 -391
- data/ext/hash.h +441 -68
- data/ext/hashset.c +124 -96
- data/ext/hashset.h +169 -20
- data/ext/helper.c +56 -5
- data/ext/helper.h +7 -0
- data/ext/inc/lang.h +29 -49
- data/ext/inc/threading.h +31 -0
- data/ext/ind.c +288 -278
- data/ext/ind.h +68 -0
- data/ext/index.c +5688 -0
- data/ext/index.h +663 -616
- data/ext/lang.h +29 -49
- data/ext/libstemmer.c +3 -3
- data/ext/mem_pool.c +84 -0
- data/ext/mem_pool.h +35 -0
- data/ext/posh.c +1006 -0
- data/ext/posh.h +1007 -0
- data/ext/priorityqueue.c +117 -194
- data/ext/priorityqueue.h +135 -39
- data/ext/q_boolean.c +1305 -1108
- data/ext/q_const_score.c +106 -93
- data/ext/q_filtered_query.c +138 -135
- data/ext/q_fuzzy.c +206 -242
- data/ext/q_match_all.c +94 -80
- data/ext/q_multi_term.c +663 -0
- data/ext/q_parser.c +667 -593
- data/ext/q_phrase.c +992 -555
- data/ext/q_prefix.c +72 -61
- data/ext/q_range.c +235 -210
- data/ext/q_span.c +1480 -1166
- data/ext/q_term.c +273 -246
- data/ext/q_wildcard.c +127 -114
- data/ext/r_analysis.c +1720 -711
- data/ext/r_index.c +3049 -0
- data/ext/r_qparser.c +433 -146
- data/ext/r_search.c +2934 -1993
- data/ext/r_store.c +372 -143
- data/ext/r_utils.c +941 -0
- data/ext/ram_store.c +330 -326
- data/ext/search.c +1291 -668
- data/ext/search.h +403 -702
- data/ext/similarity.c +91 -113
- data/ext/similarity.h +45 -30
- data/ext/sort.c +721 -484
- data/ext/stopwords.c +361 -273
- data/ext/store.c +556 -58
- data/ext/store.h +706 -126
- data/ext/tags +3578 -2780
- data/ext/term_vectors.c +352 -0
- data/ext/threading.h +31 -0
- data/ext/win32.h +54 -0
- data/lib/ferret.rb +5 -17
- data/lib/ferret/document.rb +130 -2
- data/lib/ferret/index.rb +577 -26
- data/lib/ferret/number_tools.rb +157 -0
- data/lib/ferret_version.rb +3 -0
- data/test/test_helper.rb +5 -13
- data/test/unit/analysis/tc_analyzer.rb +513 -1
- data/test/unit/analysis/{ctc_tokenstream.rb → tc_token_stream.rb} +23 -0
- data/test/unit/index/tc_index.rb +183 -240
- data/test/unit/index/tc_index_reader.rb +312 -479
- data/test/unit/index/tc_index_writer.rb +397 -13
- data/test/unit/index/th_doc.rb +269 -206
- data/test/unit/query_parser/tc_query_parser.rb +40 -33
- data/test/unit/search/tc_filter.rb +59 -71
- data/test/unit/search/tc_fuzzy_query.rb +24 -16
- data/test/unit/search/tc_index_searcher.rb +23 -201
- data/test/unit/search/tc_multi_searcher.rb +78 -226
- data/test/unit/search/tc_search_and_sort.rb +93 -81
- data/test/unit/search/tc_sort.rb +23 -23
- data/test/unit/search/tc_sort_field.rb +7 -7
- data/test/unit/search/tc_spans.rb +51 -47
- data/test/unit/search/tm_searcher.rb +339 -0
- data/test/unit/store/tc_fs_store.rb +1 -1
- data/test/unit/store/tm_store_lock.rb +3 -3
- data/test/unit/tc_document.rb +81 -0
- data/test/unit/ts_analysis.rb +1 -1
- data/test/unit/ts_utils.rb +1 -1
- data/test/unit/utils/tc_bit_vector.rb +288 -0
- data/test/unit/utils/tc_number_tools.rb +117 -0
- data/test/unit/utils/tc_priority_queue.rb +106 -0
- metadata +140 -301
- data/CHANGELOG +0 -9
- data/ext/dummy.exe +0 -0
- data/ext/field.c +0 -408
- data/ext/frtio.h +0 -13
- data/ext/inc/except.h +0 -90
- data/ext/index_io.c +0 -382
- data/ext/index_rw.c +0 -2658
- data/ext/lang.c +0 -41
- data/ext/nix_io.c +0 -134
- data/ext/q_multi_phrase.c +0 -380
- data/ext/r_doc.c +0 -582
- data/ext/r_index_io.c +0 -1021
- data/ext/r_term.c +0 -219
- data/ext/term.c +0 -820
- data/ext/termdocs.c +0 -611
- data/ext/vector.c +0 -637
- data/ext/w32_io.c +0 -150
- data/lib/ferret/analysis.rb +0 -11
- data/lib/ferret/analysis/analyzers.rb +0 -112
- data/lib/ferret/analysis/standard_tokenizer.rb +0 -71
- data/lib/ferret/analysis/token.rb +0 -100
- data/lib/ferret/analysis/token_filters.rb +0 -86
- data/lib/ferret/analysis/token_stream.rb +0 -26
- data/lib/ferret/analysis/tokenizers.rb +0 -112
- data/lib/ferret/analysis/word_list_loader.rb +0 -27
- data/lib/ferret/document/document.rb +0 -152
- data/lib/ferret/document/field.rb +0 -312
- data/lib/ferret/index/compound_file_io.rb +0 -338
- data/lib/ferret/index/document_writer.rb +0 -289
- data/lib/ferret/index/field_infos.rb +0 -279
- data/lib/ferret/index/fields_io.rb +0 -181
- data/lib/ferret/index/index.rb +0 -675
- data/lib/ferret/index/index_file_names.rb +0 -33
- data/lib/ferret/index/index_reader.rb +0 -503
- data/lib/ferret/index/index_writer.rb +0 -534
- data/lib/ferret/index/multi_reader.rb +0 -377
- data/lib/ferret/index/multiple_term_doc_pos_enum.rb +0 -98
- data/lib/ferret/index/segment_infos.rb +0 -130
- data/lib/ferret/index/segment_merge_info.rb +0 -49
- data/lib/ferret/index/segment_merge_queue.rb +0 -16
- data/lib/ferret/index/segment_merger.rb +0 -358
- data/lib/ferret/index/segment_reader.rb +0 -412
- data/lib/ferret/index/segment_term_enum.rb +0 -169
- data/lib/ferret/index/segment_term_vector.rb +0 -58
- data/lib/ferret/index/term.rb +0 -53
- data/lib/ferret/index/term_buffer.rb +0 -83
- data/lib/ferret/index/term_doc_enum.rb +0 -291
- data/lib/ferret/index/term_enum.rb +0 -52
- data/lib/ferret/index/term_info.rb +0 -37
- data/lib/ferret/index/term_infos_io.rb +0 -321
- data/lib/ferret/index/term_vector_offset_info.rb +0 -20
- data/lib/ferret/index/term_vectors_io.rb +0 -553
- data/lib/ferret/query_parser.rb +0 -312
- data/lib/ferret/query_parser/query_parser.tab.rb +0 -928
- data/lib/ferret/search.rb +0 -50
- data/lib/ferret/search/boolean_clause.rb +0 -100
- data/lib/ferret/search/boolean_query.rb +0 -299
- data/lib/ferret/search/boolean_scorer.rb +0 -294
- data/lib/ferret/search/caching_wrapper_filter.rb +0 -40
- data/lib/ferret/search/conjunction_scorer.rb +0 -99
- data/lib/ferret/search/disjunction_sum_scorer.rb +0 -205
- data/lib/ferret/search/exact_phrase_scorer.rb +0 -32
- data/lib/ferret/search/explanation.rb +0 -41
- data/lib/ferret/search/field_cache.rb +0 -215
- data/lib/ferret/search/field_doc.rb +0 -31
- data/lib/ferret/search/field_sorted_hit_queue.rb +0 -184
- data/lib/ferret/search/filter.rb +0 -11
- data/lib/ferret/search/filtered_query.rb +0 -130
- data/lib/ferret/search/filtered_term_enum.rb +0 -79
- data/lib/ferret/search/fuzzy_query.rb +0 -154
- data/lib/ferret/search/fuzzy_term_enum.rb +0 -247
- data/lib/ferret/search/hit_collector.rb +0 -34
- data/lib/ferret/search/hit_queue.rb +0 -11
- data/lib/ferret/search/index_searcher.rb +0 -200
- data/lib/ferret/search/match_all_query.rb +0 -104
- data/lib/ferret/search/multi_phrase_query.rb +0 -216
- data/lib/ferret/search/multi_searcher.rb +0 -261
- data/lib/ferret/search/multi_term_query.rb +0 -65
- data/lib/ferret/search/non_matching_scorer.rb +0 -22
- data/lib/ferret/search/phrase_positions.rb +0 -55
- data/lib/ferret/search/phrase_query.rb +0 -214
- data/lib/ferret/search/phrase_scorer.rb +0 -152
- data/lib/ferret/search/prefix_query.rb +0 -54
- data/lib/ferret/search/query.rb +0 -140
- data/lib/ferret/search/query_filter.rb +0 -51
- data/lib/ferret/search/range_filter.rb +0 -103
- data/lib/ferret/search/range_query.rb +0 -139
- data/lib/ferret/search/req_excl_scorer.rb +0 -125
- data/lib/ferret/search/req_opt_sum_scorer.rb +0 -70
- data/lib/ferret/search/score_doc.rb +0 -38
- data/lib/ferret/search/score_doc_comparator.rb +0 -114
- data/lib/ferret/search/scorer.rb +0 -91
- data/lib/ferret/search/similarity.rb +0 -278
- data/lib/ferret/search/sloppy_phrase_scorer.rb +0 -47
- data/lib/ferret/search/sort.rb +0 -112
- data/lib/ferret/search/sort_comparator.rb +0 -60
- data/lib/ferret/search/sort_field.rb +0 -91
- data/lib/ferret/search/spans.rb +0 -12
- data/lib/ferret/search/spans/near_spans_enum.rb +0 -304
- data/lib/ferret/search/spans/span_first_query.rb +0 -79
- data/lib/ferret/search/spans/span_near_query.rb +0 -108
- data/lib/ferret/search/spans/span_not_query.rb +0 -130
- data/lib/ferret/search/spans/span_or_query.rb +0 -176
- data/lib/ferret/search/spans/span_query.rb +0 -25
- data/lib/ferret/search/spans/span_scorer.rb +0 -74
- data/lib/ferret/search/spans/span_term_query.rb +0 -105
- data/lib/ferret/search/spans/span_weight.rb +0 -84
- data/lib/ferret/search/spans/spans_enum.rb +0 -44
- data/lib/ferret/search/term_query.rb +0 -128
- data/lib/ferret/search/term_scorer.rb +0 -183
- data/lib/ferret/search/top_docs.rb +0 -36
- data/lib/ferret/search/top_field_docs.rb +0 -17
- data/lib/ferret/search/weight.rb +0 -54
- data/lib/ferret/search/wildcard_query.rb +0 -26
- data/lib/ferret/search/wildcard_term_enum.rb +0 -61
- data/lib/ferret/stemmers.rb +0 -1
- data/lib/ferret/stemmers/porter_stemmer.rb +0 -218
- data/lib/ferret/store.rb +0 -5
- data/lib/ferret/store/buffered_index_io.rb +0 -190
- data/lib/ferret/store/directory.rb +0 -141
- data/lib/ferret/store/fs_store.rb +0 -381
- data/lib/ferret/store/index_io.rb +0 -245
- data/lib/ferret/store/ram_store.rb +0 -286
- data/lib/ferret/utils.rb +0 -8
- data/lib/ferret/utils/bit_vector.rb +0 -123
- data/lib/ferret/utils/date_tools.rb +0 -138
- data/lib/ferret/utils/number_tools.rb +0 -91
- data/lib/ferret/utils/parameter.rb +0 -41
- data/lib/ferret/utils/priority_queue.rb +0 -120
- data/lib/ferret/utils/string_helper.rb +0 -47
- data/lib/ferret/utils/thread_local.rb +0 -28
- data/lib/ferret/utils/weak_key_hash.rb +0 -60
- data/lib/rferret.rb +0 -37
- data/rake_utils/code_statistics.rb +0 -106
- data/test/benchmark/tb_ram_store.rb +0 -76
- data/test/benchmark/tb_rw_vint.rb +0 -26
- data/test/functional/thread_safety_index_test.rb +0 -81
- data/test/functional/thread_safety_test.rb +0 -137
- data/test/longrunning/tc_numbertools.rb +0 -60
- data/test/longrunning/tm_store.rb +0 -19
- data/test/unit/analysis/ctc_analyzer.rb +0 -532
- data/test/unit/analysis/data/wordfile +0 -6
- data/test/unit/analysis/rtc_letter_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_lower_case_filter.rb +0 -20
- data/test/unit/analysis/rtc_lower_case_tokenizer.rb +0 -27
- data/test/unit/analysis/rtc_per_field_analyzer_wrapper.rb +0 -39
- data/test/unit/analysis/rtc_porter_stem_filter.rb +0 -16
- data/test/unit/analysis/rtc_standard_analyzer.rb +0 -20
- data/test/unit/analysis/rtc_standard_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_stop_analyzer.rb +0 -20
- data/test/unit/analysis/rtc_stop_filter.rb +0 -14
- data/test/unit/analysis/rtc_white_space_analyzer.rb +0 -21
- data/test/unit/analysis/rtc_white_space_tokenizer.rb +0 -20
- data/test/unit/analysis/rtc_word_list_loader.rb +0 -32
- data/test/unit/analysis/tc_token.rb +0 -25
- data/test/unit/document/rtc_field.rb +0 -28
- data/test/unit/document/tc_document.rb +0 -47
- data/test/unit/document/tc_field.rb +0 -98
- data/test/unit/index/rtc_compound_file_io.rb +0 -107
- data/test/unit/index/rtc_field_infos.rb +0 -127
- data/test/unit/index/rtc_fields_io.rb +0 -167
- data/test/unit/index/rtc_multiple_term_doc_pos_enum.rb +0 -83
- data/test/unit/index/rtc_segment_infos.rb +0 -74
- data/test/unit/index/rtc_segment_term_docs.rb +0 -17
- data/test/unit/index/rtc_segment_term_enum.rb +0 -60
- data/test/unit/index/rtc_segment_term_vector.rb +0 -71
- data/test/unit/index/rtc_term_buffer.rb +0 -57
- data/test/unit/index/rtc_term_info.rb +0 -19
- data/test/unit/index/rtc_term_infos_io.rb +0 -192
- data/test/unit/index/rtc_term_vectors_io.rb +0 -108
- data/test/unit/index/tc_term.rb +0 -27
- data/test/unit/index/tc_term_voi.rb +0 -18
- data/test/unit/search/rtc_similarity.rb +0 -37
- data/test/unit/search/rtc_sort_field.rb +0 -14
- data/test/unit/search/tc_multi_searcher2.rb +0 -126
- data/test/unit/store/rtc_fs_store.rb +0 -62
- data/test/unit/store/rtc_ram_store.rb +0 -15
- data/test/unit/store/rtm_store.rb +0 -150
- data/test/unit/store/rtm_store_lock.rb +0 -2
- data/test/unit/ts_document.rb +0 -2
- data/test/unit/utils/rtc_bit_vector.rb +0 -73
- data/test/unit/utils/rtc_date_tools.rb +0 -50
- data/test/unit/utils/rtc_number_tools.rb +0 -59
- data/test/unit/utils/rtc_parameter.rb +0 -40
- data/test/unit/utils/rtc_priority_queue.rb +0 -62
- data/test/unit/utils/rtc_string_helper.rb +0 -21
- data/test/unit/utils/rtc_thread.rb +0 -61
- data/test/unit/utils/rtc_weak_key_hash.rb +0 -25
- data/test/utils/number_to_spoken.rb +0 -132
data/ext/q_boolean.c
CHANGED
@@ -1,1390 +1,1587 @@
|
|
1
1
|
#include <string.h>
|
2
2
|
#include "search.h"
|
3
|
+
#include "array.h"
|
3
4
|
|
4
|
-
|
5
|
-
|
6
|
-
static char * const MIN_NUM_MATCHES_ERROR_MSG = "Minimum nr of matches must be positive";
|
7
|
-
static char * const TWO_SUB_ERROR_MSG = "There must be at least 2 sub_scorers";
|
8
|
-
static char * const UNKNOWN_OCCUR_VAL_ERROR_MSG = "Unknown value for occur";
|
5
|
+
#define BQ(query) ((BooleanQuery *)(query))
|
6
|
+
#define BW(weight) ((BooleanWeight *)(weight))
|
9
7
|
|
10
8
|
/***************************************************************************
|
11
9
|
*
|
12
|
-
*
|
10
|
+
* BooleanScorer
|
13
11
|
*
|
14
12
|
***************************************************************************/
|
15
13
|
|
16
|
-
|
14
|
+
/***************************************************************************
|
15
|
+
* Coordinator
|
16
|
+
***************************************************************************/
|
17
|
+
|
18
|
+
typedef struct Coordinator
|
17
19
|
{
|
18
|
-
|
19
|
-
|
20
|
-
|
20
|
+
int max_coord;
|
21
|
+
float *coord_factors;
|
22
|
+
Similarity *similarity;
|
23
|
+
int num_matches;
|
24
|
+
} Coordinator;
|
21
25
|
|
22
|
-
|
23
|
-
|
26
|
+
static Coordinator *coord_new(Similarity *similarity)
|
27
|
+
{
|
28
|
+
Coordinator *self = ALLOC_AND_ZERO(Coordinator);
|
29
|
+
self->similarity = similarity;
|
30
|
+
return self;
|
31
|
+
}
|
24
32
|
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
}
|
30
|
-
}
|
33
|
+
static Coordinator *coord_init(Coordinator *self)
|
34
|
+
{
|
35
|
+
int i;
|
36
|
+
self->coord_factors = ALLOC_N(float, self->max_coord + 1);
|
31
37
|
|
32
|
-
|
38
|
+
for (i = 0; i <= self->max_coord; i++) {
|
39
|
+
self->coord_factors[i]
|
40
|
+
= sim_coord(self->similarity, i, self->max_coord);
|
41
|
+
}
|
33
42
|
|
34
|
-
|
43
|
+
return self;
|
35
44
|
}
|
36
45
|
|
37
|
-
|
46
|
+
/***************************************************************************
|
47
|
+
* DisjunctionSumScorer
|
48
|
+
***************************************************************************/
|
49
|
+
|
50
|
+
#define DSSc(scorer) ((DisjunctionSumScorer *)(scorer))
|
51
|
+
|
52
|
+
typedef struct DisjunctionSumScorer
|
38
53
|
{
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
54
|
+
Scorer super;
|
55
|
+
float cum_score;
|
56
|
+
int num_matches;
|
57
|
+
int min_num_matches;
|
58
|
+
Scorer **sub_scorers;
|
59
|
+
int ss_cnt;
|
60
|
+
PriorityQueue *scorer_queue;
|
61
|
+
Coordinator *coordinator;
|
62
|
+
} DisjunctionSumScorer;
|
44
63
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
weight->normalize(weight, normalization_factor); // sum sub-weights
|
49
|
-
}
|
50
|
-
}
|
64
|
+
static float dssc_score(Scorer *self)
|
65
|
+
{
|
66
|
+
return DSSc(self)->cum_score;
|
51
67
|
}
|
52
68
|
|
53
|
-
|
69
|
+
static void dssc_init_scorer_queue(DisjunctionSumScorer *dssc)
|
54
70
|
{
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
sub_scorer = weight->scorer(weight, ir);
|
66
|
-
if (sub_scorer) {
|
67
|
-
bsc_add_scorer(bsc, sub_scorer, clause->occur);
|
68
|
-
} else if (clause->is_required) {
|
69
|
-
bsc->destroy(bsc);
|
70
|
-
return NULL;
|
71
|
+
int i;
|
72
|
+
Scorer *sub_scorer;
|
73
|
+
PriorityQueue *pq = dssc->scorer_queue
|
74
|
+
= pq_new(dssc->ss_cnt, (lt_ft)&scorer_doc_less_than, NULL);
|
75
|
+
|
76
|
+
for (i = 0; i < dssc->ss_cnt; i++) {
|
77
|
+
sub_scorer = dssc->sub_scorers[i];
|
78
|
+
if (sub_scorer->next(sub_scorer)) {
|
79
|
+
pq_insert(pq, sub_scorer);
|
80
|
+
}
|
71
81
|
}
|
72
|
-
|
82
|
+
}
|
83
|
+
|
84
|
+
static bool dssc_advance_after_current(Scorer *self)
|
85
|
+
{
|
86
|
+
DisjunctionSumScorer *dssc = DSSc(self);
|
87
|
+
PriorityQueue *scorer_queue = dssc->scorer_queue;
|
88
|
+
|
89
|
+
/* repeat until minimum number of matches is found */
|
90
|
+
while (true) {
|
91
|
+
Scorer *top = (Scorer *)pq_top(scorer_queue);
|
92
|
+
self->doc = top->doc;
|
93
|
+
dssc->cum_score = top->score(top);
|
94
|
+
dssc->num_matches = 1;
|
95
|
+
/* Until all sub-scorers are after self->doc */
|
96
|
+
while (true) {
|
97
|
+
if (top->next(top)) {
|
98
|
+
pq_down(scorer_queue);
|
99
|
+
}
|
100
|
+
else {
|
101
|
+
pq_pop(scorer_queue);
|
102
|
+
if (scorer_queue->size
|
103
|
+
< (dssc->min_num_matches - dssc->num_matches)) {
|
104
|
+
/* Not enough subscorers left for a match on this
|
105
|
+
* document, also no more chance of any further match */
|
106
|
+
return false;
|
107
|
+
}
|
108
|
+
if (scorer_queue->size == 0) {
|
109
|
+
/* nothing more to advance, check for last match. */
|
110
|
+
break;
|
111
|
+
}
|
112
|
+
}
|
113
|
+
top = pq_top(scorer_queue);
|
114
|
+
if (top->doc != self->doc) {
|
115
|
+
/* All remaining subscorers are after self->doc */
|
116
|
+
break;
|
117
|
+
}
|
118
|
+
else {
|
119
|
+
dssc->cum_score += top->score(top);
|
120
|
+
dssc->num_matches++;
|
121
|
+
}
|
122
|
+
}
|
73
123
|
|
74
|
-
|
124
|
+
if (dssc->num_matches >= dssc->min_num_matches) {
|
125
|
+
return true;
|
126
|
+
}
|
127
|
+
else if (scorer_queue->size < dssc->min_num_matches) {
|
128
|
+
return false;
|
129
|
+
}
|
130
|
+
}
|
75
131
|
}
|
76
132
|
|
77
|
-
|
133
|
+
static bool dssc_next(Scorer *self)
|
78
134
|
{
|
79
|
-
|
135
|
+
if (DSSc(self)->scorer_queue == NULL) {
|
136
|
+
dssc_init_scorer_queue(DSSc(self));
|
137
|
+
}
|
138
|
+
|
139
|
+
if (DSSc(self)->scorer_queue->size < DSSc(self)->min_num_matches) {
|
140
|
+
return false;
|
141
|
+
}
|
142
|
+
else {
|
143
|
+
return dssc_advance_after_current(self);
|
144
|
+
}
|
80
145
|
}
|
81
146
|
|
82
|
-
|
147
|
+
static bool dssc_skip_to(Scorer *self, int doc_num)
|
83
148
|
{
|
84
|
-
|
85
|
-
|
149
|
+
DisjunctionSumScorer *dssc = DSSc(self);
|
150
|
+
PriorityQueue *scorer_queue = dssc->scorer_queue;
|
86
151
|
|
87
|
-
|
88
|
-
|
89
|
-
|
152
|
+
if (scorer_queue == NULL) {
|
153
|
+
dssc_init_scorer_queue(dssc);
|
154
|
+
scorer_queue = dssc->scorer_queue;
|
155
|
+
}
|
90
156
|
|
91
|
-
|
92
|
-
|
93
|
-
|
157
|
+
if (scorer_queue->size < dssc->min_num_matches) {
|
158
|
+
return false;
|
159
|
+
}
|
160
|
+
if (doc_num <= self->doc) {
|
161
|
+
doc_num = self->doc + 1;
|
162
|
+
}
|
163
|
+
while (true) {
|
164
|
+
Scorer *top = pq_top(scorer_queue);
|
165
|
+
if (top->doc >= doc_num) {
|
166
|
+
return dssc_advance_after_current(self);
|
167
|
+
}
|
168
|
+
else if (top->skip_to(top, doc_num)) {
|
169
|
+
pq_down(scorer_queue);
|
170
|
+
}
|
171
|
+
else {
|
172
|
+
pq_pop(scorer_queue);
|
173
|
+
if (scorer_queue->size < dssc->min_num_matches) {
|
174
|
+
return false;
|
175
|
+
}
|
176
|
+
}
|
177
|
+
}
|
94
178
|
}
|
95
179
|
|
96
|
-
Explanation *
|
180
|
+
static Explanation *dssc_explain(Scorer *self, int doc_num)
|
97
181
|
{
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
182
|
+
int i;
|
183
|
+
DisjunctionSumScorer *dssc = DSSc(self);
|
184
|
+
Scorer *sub_scorer;
|
185
|
+
Explanation *e
|
186
|
+
= expl_new(0.0, "At least %d of:", dssc->min_num_matches);
|
187
|
+
for (i = 0; i < dssc->ss_cnt; i++) {
|
188
|
+
sub_scorer = dssc->sub_scorers[i];
|
189
|
+
expl_add_detail(e, sub_scorer->explain(sub_scorer, doc_num));
|
190
|
+
}
|
191
|
+
return e;
|
192
|
+
}
|
109
193
|
|
110
|
-
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
115
|
-
|
116
|
-
if (!clause->is_prohibited) {
|
117
|
-
expl_add_detail(sum_expl, explanation);
|
118
|
-
sum += explanation->value;
|
119
|
-
coord++;
|
120
|
-
} else {
|
121
|
-
expl_destoy(explanation);
|
122
|
-
expl_destoy(sum_expl);
|
123
|
-
return expl_create(0.0, estrdup("match prohibited"));
|
124
|
-
}
|
125
|
-
} else if (clause->is_required) {
|
126
|
-
expl_destoy(explanation);
|
127
|
-
expl_destoy(sum_expl);
|
128
|
-
return expl_create(0.0, estrdup("match required"));
|
129
|
-
} else {
|
130
|
-
expl_destoy(explanation);
|
194
|
+
static void dssc_destroy(Scorer *self)
|
195
|
+
{
|
196
|
+
DisjunctionSumScorer *dssc = DSSc(self);
|
197
|
+
int i;
|
198
|
+
for (i = 0; i < dssc->ss_cnt; i++) {
|
199
|
+
dssc->sub_scorers[i]->destroy(dssc->sub_scorers[i]);
|
131
200
|
}
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
explanation = sum_expl; /* eliminate wrapper */
|
137
|
-
sum_expl->dcnt = 0;
|
138
|
-
sum_expl = sum_expl->details[0];
|
139
|
-
expl_destoy(explanation);
|
140
|
-
}
|
141
|
-
|
142
|
-
coord_factor = sim_coord(self->similarity, coord, max_coord);
|
143
|
-
|
144
|
-
if (coord_factor == 1.0) { /* coord is no-op */
|
145
|
-
return sum_expl; /* eliminate wrapper */
|
146
|
-
} else {
|
147
|
-
explanation = expl_create(sum * coord_factor, estrdup("product of:"));
|
148
|
-
expl_add_detail(explanation, sum_expl);
|
149
|
-
expl_add_detail(explanation, expl_create(coord_factor,
|
150
|
-
strfmt("coord(%d/%d)", coord, max_coord)));
|
151
|
-
return explanation;
|
152
|
-
}
|
153
|
-
}
|
154
|
-
|
155
|
-
Weight *bw_create(Query *query, Searcher *searcher)
|
156
|
-
{
|
157
|
-
int i;
|
158
|
-
Weight *self = w_create(query);
|
159
|
-
BooleanWeight *bw = ALLOC(BooleanWeight);
|
160
|
-
BooleanQuery *bq = (BooleanQuery *)query->data;
|
161
|
-
|
162
|
-
bw->w_cnt = bq->clause_cnt;
|
163
|
-
bw->weights = ALLOC_N(Weight *, bw->w_cnt);
|
164
|
-
for (i = 0; i < bw->w_cnt; i++) {
|
165
|
-
bw->weights[i] = q_weight(bq->clauses[i]->query, searcher);
|
166
|
-
}
|
167
|
-
self->data = bw;
|
168
|
-
|
169
|
-
self->normalize = &bw_normalize;
|
170
|
-
self->scorer = &bw_scorer;
|
171
|
-
self->explain = &bw_explain;
|
172
|
-
self->to_s = &bw_to_s;
|
173
|
-
self->destroy = &bw_destroy;
|
174
|
-
self->sum_of_squared_weights = &bw_sum_of_squared_weights;
|
175
|
-
|
176
|
-
self->similarity = query->get_similarity(query, searcher);
|
177
|
-
self->value = query->boost;
|
178
|
-
|
179
|
-
return self;
|
201
|
+
if (dssc->scorer_queue) {
|
202
|
+
pq_destroy(dssc->scorer_queue);
|
203
|
+
}
|
204
|
+
scorer_destroy_i(self);
|
180
205
|
}
|
181
206
|
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
*
|
186
|
-
|
207
|
+
static Scorer *disjunction_sum_scorer_new(Scorer **sub_scorers, int ss_cnt,
|
208
|
+
int min_num_matches)
|
209
|
+
{
|
210
|
+
Scorer *self = scorer_new(DisjunctionSumScorer, NULL);
|
211
|
+
DSSc(self)->ss_cnt = ss_cnt;
|
212
|
+
|
213
|
+
/* The document number of the current match */
|
214
|
+
self->doc = -1;
|
215
|
+
DSSc(self)->cum_score = -1.0;
|
216
|
+
|
217
|
+
/* The number of subscorers that provide the current match. */
|
218
|
+
DSSc(self)->num_matches = -1;
|
219
|
+
DSSc(self)->coordinator = NULL;
|
187
220
|
|
188
|
-
|
189
|
-
{
|
190
|
-
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
221
|
+
#ifdef DEBUG
|
222
|
+
if (min_num_matches <= 0) {
|
223
|
+
RAISE(ARG_ERROR, "The min_num_matches value <%d> should not be less "
|
224
|
+
"than 0\n", min_num_matches);
|
225
|
+
}
|
226
|
+
if (ss_cnt <= 1) {
|
227
|
+
RAISE(ARG_ERROR, "There should be at least 2 sub_scorers in a "
|
228
|
+
"DiscjunctionSumScorer. <%d> is not enough", ss_cnt);
|
229
|
+
}
|
230
|
+
#endif
|
231
|
+
|
232
|
+
DSSc(self)->min_num_matches = min_num_matches;
|
233
|
+
DSSc(self)->sub_scorers = sub_scorers;
|
234
|
+
DSSc(self)->scorer_queue = NULL;
|
235
|
+
|
236
|
+
self->score = &dssc_score;
|
237
|
+
self->next = &dssc_next;
|
238
|
+
self->skip_to = &dssc_skip_to;
|
239
|
+
self->explain = &dssc_explain;
|
240
|
+
self->destroy = &dssc_destroy;
|
241
|
+
|
242
|
+
return self;
|
207
243
|
}
|
208
244
|
|
209
|
-
|
245
|
+
static float cdssc_score(Scorer *self)
|
210
246
|
{
|
211
|
-
|
212
|
-
|
213
|
-
free(self);
|
214
|
-
}
|
247
|
+
DSSc(self)->coordinator->num_matches += DSSc(self)->num_matches;
|
248
|
+
return DSSc(self)->cum_score;
|
215
249
|
}
|
216
250
|
|
217
|
-
|
251
|
+
static Scorer *counting_disjunction_sum_scorer_new(
|
252
|
+
Coordinator *coordinator, Scorer **sub_scorers, int ss_cnt,
|
253
|
+
int min_num_matches)
|
218
254
|
{
|
219
|
-
|
255
|
+
Scorer *self = disjunction_sum_scorer_new(sub_scorers, ss_cnt,
|
256
|
+
min_num_matches);
|
257
|
+
DSSc(self)->coordinator = coordinator;
|
258
|
+
self->score = &cdssc_score;
|
259
|
+
return self;
|
220
260
|
}
|
221
261
|
|
222
|
-
|
262
|
+
/***************************************************************************
|
263
|
+
* ConjunctionScorer
|
264
|
+
***************************************************************************/
|
265
|
+
|
266
|
+
#define CSc(scorer) ((ConjunctionScorer *)(scorer))
|
267
|
+
|
268
|
+
typedef struct ConjunctionScorer
|
223
269
|
{
|
224
|
-
|
225
|
-
|
270
|
+
Scorer super;
|
271
|
+
bool first_time : 1;
|
272
|
+
bool more : 1;
|
273
|
+
float coord;
|
274
|
+
Scorer **sub_scorers;
|
275
|
+
int ss_cnt;
|
276
|
+
int first_idx;
|
277
|
+
Coordinator *coordinator;
|
278
|
+
int last_scored_doc;
|
279
|
+
} ConjunctionScorer;
|
226
280
|
|
227
|
-
|
281
|
+
static void csc_sort_scorers(ConjunctionScorer *csc)
|
228
282
|
{
|
229
|
-
|
230
|
-
|
231
|
-
self->query = query;
|
232
|
-
bc_set_occur(self, occur);
|
233
|
-
return self;
|
283
|
+
qsort(csc->sub_scorers, csc->ss_cnt, sizeof(Scorer *), &scorer_doc_cmp);
|
284
|
+
csc->first_idx = 0;
|
234
285
|
}
|
235
286
|
|
236
|
-
|
237
|
-
|
238
|
-
*
|
239
|
-
|
240
|
-
***************************************************************************/
|
287
|
+
static void csc_init(Scorer *self, bool init_scorers)
|
288
|
+
{
|
289
|
+
ConjunctionScorer *csc = CSc(self);
|
290
|
+
const int sub_sc_cnt = csc->ss_cnt;
|
241
291
|
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
-
|
253
|
-
|
254
|
-
|
255
|
-
|
256
|
-
* something else it means this query has already been boosted before
|
257
|
-
* so boost from the original value */
|
258
|
-
if ((q == clause->query) && bq->original_boost) {
|
259
|
-
/* rewrite was no-op */
|
260
|
-
q->boost = bq->original_boost * self->boost;
|
261
|
-
} else {
|
262
|
-
bq->original_boost = q->boost; /* save original boost */
|
263
|
-
q->boost *= self->boost;
|
292
|
+
/* compute coord factor */
|
293
|
+
csc->coord = sim_coord(self->similarity, sub_sc_cnt, sub_sc_cnt);
|
294
|
+
|
295
|
+
csc->more = (sub_sc_cnt > 0);
|
296
|
+
|
297
|
+
if (init_scorers) {
|
298
|
+
int i;
|
299
|
+
/* move each scorer to its first entry */
|
300
|
+
for (i = 0; i < sub_sc_cnt; i++) {
|
301
|
+
Scorer *sub_scorer = csc->sub_scorers[i];
|
302
|
+
if (!csc->more) {
|
303
|
+
break;
|
304
|
+
}
|
305
|
+
csc->more = sub_scorer->next(sub_scorer);
|
264
306
|
}
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
}
|
269
|
-
|
270
|
-
|
271
|
-
/* replace each clause's query with its rewritten query */
|
272
|
-
for (i = 0; i < bq->clause_cnt; i++) {
|
273
|
-
clause = bq->clauses[i];
|
274
|
-
rq = clause->query->rewrite(clause->query, ir);
|
275
|
-
q_deref(clause->query);
|
276
|
-
clause->query = rq;
|
277
|
-
}
|
278
|
-
|
279
|
-
self->ref_cnt++;
|
280
|
-
return self;
|
281
|
-
}
|
282
|
-
|
283
|
-
void bq_extract_terms(Query *self, HashSet *terms)
|
284
|
-
{
|
285
|
-
BooleanQuery *bq = (BooleanQuery *)self->data;
|
286
|
-
BooleanClause *clause;
|
287
|
-
int i;
|
288
|
-
for (i = 0; i < bq->clause_cnt; i++) {
|
289
|
-
clause = bq->clauses[i];
|
290
|
-
clause->query->extract_terms(clause->query, terms);
|
291
|
-
}
|
292
|
-
}
|
293
|
-
|
294
|
-
char *bq_to_s(Query *self, char *field)
|
295
|
-
{
|
296
|
-
int i;
|
297
|
-
BooleanQuery *bq = (BooleanQuery *)self->data;
|
298
|
-
BooleanClause *clause;
|
299
|
-
Query *sub_query;
|
300
|
-
char *buffer;
|
301
|
-
char *clause_str;
|
302
|
-
int bp = 0;
|
303
|
-
int size = QUERY_STRING_START_SIZE;
|
304
|
-
int needed;
|
305
|
-
int clause_len;
|
306
|
-
|
307
|
-
buffer = ALLOC_N(char, size);
|
308
|
-
if (self->boost != 1.0) {
|
309
|
-
buffer[0] = '(';
|
310
|
-
bp++;
|
311
|
-
}
|
312
|
-
|
313
|
-
for (i = 0; i < bq->clause_cnt; i++) {
|
314
|
-
clause = bq->clauses[i];
|
315
|
-
clause_str = clause->query->to_s(clause->query, field);
|
316
|
-
clause_len = (int)strlen(clause_str);
|
317
|
-
needed = clause_len + 5;
|
318
|
-
while ((size - bp) < needed) {
|
319
|
-
size *= 2;
|
320
|
-
REALLOC_N(buffer, char, size);
|
321
|
-
}
|
322
|
-
|
323
|
-
if (i > 0) {
|
324
|
-
buffer[bp++] = ' ';
|
325
|
-
}
|
326
|
-
if (clause->is_prohibited) {
|
327
|
-
buffer[bp++] = '-';
|
328
|
-
} else if (clause->is_required) {
|
329
|
-
buffer[bp++] = '+';
|
330
|
-
}
|
331
|
-
|
332
|
-
sub_query = clause->query;
|
333
|
-
if (sub_query->type == BOOLEAN_QUERY) { // wrap sub-bools in parens
|
334
|
-
buffer[bp++] = '(';
|
335
|
-
memcpy(buffer + bp, clause_str, sizeof(char) * clause_len);
|
336
|
-
bp += clause_len;
|
337
|
-
buffer[bp++] = ')';
|
338
|
-
} else {
|
339
|
-
memcpy(buffer + bp, clause_str, sizeof(char) * clause_len);
|
340
|
-
bp += clause_len;
|
341
|
-
}
|
342
|
-
free(clause_str);
|
343
|
-
}
|
344
|
-
|
345
|
-
if (self->boost != 1.0) {
|
346
|
-
char *boost_str = strfmt(")^%f", self->boost);
|
347
|
-
int boost_len = (int)strlen(boost_str);
|
348
|
-
REALLOC_N(buffer, char, bp + boost_len + 1);
|
349
|
-
memcpy(buffer + bp, boost_str, sizeof(char) * boost_len);
|
350
|
-
bp += boost_len;
|
351
|
-
free(boost_str);
|
352
|
-
}
|
353
|
-
buffer[bp] = 0;
|
354
|
-
return buffer;
|
307
|
+
if (csc->more) {
|
308
|
+
csc_sort_scorers(csc);
|
309
|
+
}
|
310
|
+
}
|
311
|
+
|
312
|
+
csc->first_time = false;
|
355
313
|
}
|
356
314
|
|
357
|
-
static
|
315
|
+
static float csc_score(Scorer *self)
|
358
316
|
{
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
367
|
-
|
368
|
-
|
369
|
-
|
317
|
+
ConjunctionScorer *csc = CSc(self);
|
318
|
+
const int sub_sc_cnt = csc->ss_cnt;
|
319
|
+
float score = 0.0; /* sum scores */
|
320
|
+
int i;
|
321
|
+
for (i = 0; i < sub_sc_cnt; i++) {
|
322
|
+
Scorer *sub_scorer = csc->sub_scorers[i];
|
323
|
+
score += sub_scorer->score(sub_scorer);
|
324
|
+
}
|
325
|
+
score *= csc->coord;
|
326
|
+
return score;
|
327
|
+
}
|
328
|
+
|
329
|
+
static bool csc_do_next(Scorer *self)
|
330
|
+
{
|
331
|
+
ConjunctionScorer *csc = CSc(self);
|
332
|
+
const int sub_sc_cnt = csc->ss_cnt;
|
333
|
+
int first_idx = csc->first_idx;
|
334
|
+
Scorer *first_sc = csc->sub_scorers[first_idx];
|
335
|
+
Scorer *last_sc = csc->sub_scorers[PREV_NUM(first_idx, sub_sc_cnt)];
|
336
|
+
|
337
|
+
/* skip to doc with all clauses */
|
338
|
+
while (csc->more && (first_sc->doc < last_sc->doc)) {
|
339
|
+
/* skip first upto last */
|
340
|
+
csc->more = first_sc->skip_to(first_sc, last_sc->doc);
|
341
|
+
/* move first to last */
|
342
|
+
last_sc = first_sc;
|
343
|
+
first_idx = NEXT_NUM(first_idx, sub_sc_cnt);
|
344
|
+
first_sc = csc->sub_scorers[first_idx];
|
345
|
+
}
|
346
|
+
self->doc = first_sc->doc;
|
347
|
+
csc->first_idx = first_idx;
|
348
|
+
return csc->more;
|
370
349
|
}
|
371
350
|
|
372
|
-
|
351
|
+
static bool csc_next(Scorer *self)
|
373
352
|
{
|
374
|
-
|
353
|
+
ConjunctionScorer *csc = CSc(self);
|
354
|
+
if (csc->first_time) {
|
355
|
+
csc_init(self, true);
|
356
|
+
}
|
357
|
+
else if (csc->more) {
|
358
|
+
/* trigger further scanning */
|
359
|
+
const int last_idx = PREV_NUM(csc->first_idx, csc->ss_cnt);
|
360
|
+
Scorer *sub_scorer = csc->sub_scorers[last_idx];
|
361
|
+
csc->more = sub_scorer->next(sub_scorer);
|
362
|
+
}
|
363
|
+
return csc_do_next(self);
|
375
364
|
}
|
376
365
|
|
377
|
-
|
366
|
+
static bool csc_skip_to(Scorer *self, int doc_num)
|
378
367
|
{
|
379
|
-
|
380
|
-
|
381
|
-
|
382
|
-
|
383
|
-
memcpy(bq->similarity, sim, sizeof(Similarity));
|
384
|
-
bq->similarity->coord = &bq_coord_disabled;
|
385
|
-
bq->similarity->destroy = (void (*)(Similarity *))&free;
|
386
|
-
}
|
368
|
+
ConjunctionScorer *csc = CSc(self);
|
369
|
+
const int sub_sc_cnt = csc->ss_cnt;
|
370
|
+
int i;
|
371
|
+
bool more = csc->more;
|
387
372
|
|
388
|
-
|
389
|
-
|
373
|
+
if (csc->first_time) {
|
374
|
+
csc_init(self, true);
|
375
|
+
}
|
390
376
|
|
391
|
-
|
392
|
-
{
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
-
|
397
|
-
|
398
|
-
|
399
|
-
|
377
|
+
for (i = 0; i < sub_sc_cnt; i++) {
|
378
|
+
if (!more) {
|
379
|
+
break;
|
380
|
+
}
|
381
|
+
else {
|
382
|
+
Scorer *sub_scorer = csc->sub_scorers[i];
|
383
|
+
more = sub_scorer->skip_to(sub_scorer, doc_num);
|
384
|
+
}
|
385
|
+
}
|
386
|
+
if (more) {
|
387
|
+
/* resort the scorers */
|
388
|
+
csc_sort_scorers(csc);
|
389
|
+
}
|
390
|
+
|
391
|
+
more = csc->more;
|
392
|
+
return csc_do_next(self);
|
400
393
|
}
|
401
394
|
|
402
|
-
static
|
395
|
+
static void csc_destroy(Scorer *self)
|
403
396
|
{
|
404
|
-
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
for (i = 0; i < bq1->clause_cnt; i++) {
|
414
|
-
if (!bc_eq(bq1->clauses[i], bq2->clauses[i])) {
|
415
|
-
return false;
|
416
|
-
}
|
417
|
-
}
|
418
|
-
return true;
|
419
|
-
}
|
420
|
-
|
421
|
-
Query *bq_create(bool coord_disabled)
|
422
|
-
{
|
423
|
-
Query *self = q_create();
|
424
|
-
BooleanQuery *bq = ALLOC(BooleanQuery);
|
425
|
-
bq->coord_disabled = coord_disabled;
|
426
|
-
if (coord_disabled) {
|
427
|
-
self->get_similarity = &bq_get_similarity;
|
428
|
-
}
|
429
|
-
bq->max_clause_cnt = DEFAULT_MAX_CLAUSE_COUNT;
|
430
|
-
bq->clause_cnt = 0;
|
431
|
-
bq->clause_capa = BOOLEAN_CLAUSES_START_CAPA;
|
432
|
-
bq->clauses = ALLOC_N(BooleanClause *, BOOLEAN_CLAUSES_START_CAPA);
|
433
|
-
bq->similarity = NULL;
|
434
|
-
bq->original_boost = 0.0;
|
435
|
-
self->data = bq;
|
436
|
-
|
437
|
-
self->type = BOOLEAN_QUERY;
|
438
|
-
self->rewrite = &bq_rewrite;
|
439
|
-
self->extract_terms = &bq_extract_terms;
|
440
|
-
self->to_s = &bq_to_s;
|
441
|
-
self->hash = &bq_hash;
|
442
|
-
self->eq = &bq_eq;
|
443
|
-
self->destroy_i = &bq_destroy;
|
444
|
-
self->create_weight_i = &bw_create;
|
445
|
-
return self;
|
397
|
+
ConjunctionScorer *csc = CSc(self);
|
398
|
+
const int sub_sc_cnt = csc->ss_cnt;
|
399
|
+
int i;
|
400
|
+
for (i = 0; i < sub_sc_cnt; i++) {
|
401
|
+
csc->sub_scorers[i]->destroy(csc->sub_scorers[i]);
|
402
|
+
}
|
403
|
+
free(csc->sub_scorers);
|
404
|
+
scorer_destroy_i(self);
|
446
405
|
}
|
447
406
|
|
448
|
-
|
407
|
+
static Scorer *conjunction_scorer_new(Similarity *similarity)
|
449
408
|
{
|
450
|
-
|
451
|
-
if (!self->destroy_all) ref(bc);
|
452
|
-
if (bq->clause_cnt >= bq->clause_capa) {
|
453
|
-
bq->clause_capa *= 2;
|
454
|
-
REALLOC_N(bq->clauses, BooleanClause *, bq->clause_capa);
|
455
|
-
}
|
456
|
-
if (bq->clause_cnt > bq->max_clause_cnt) {
|
457
|
-
RAISE(STATE_ERROR, TOO_MANY_CLAUSES_ERROR_MSG);
|
458
|
-
}
|
459
|
-
bq->clauses[bq->clause_cnt] = bc;
|
460
|
-
bq->clause_cnt++;
|
461
|
-
return bc;
|
462
|
-
}
|
463
|
-
|
464
|
-
BooleanClause *bq_add_query(Query *self, Query *sub_query, unsigned int occur)
|
465
|
-
{
|
466
|
-
BooleanClause *bc = bc_create(sub_query, occur);
|
467
|
-
bq_add_clause(self, bc);
|
468
|
-
if (!self->destroy_all) {
|
469
|
-
ref(sub_query);
|
470
|
-
bc_deref(bc); /* bc would have been referenced unnecessarily */
|
471
|
-
}
|
472
|
-
return bc;
|
473
|
-
}
|
409
|
+
Scorer *self = scorer_new(ConjunctionScorer, similarity);
|
474
410
|
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
*
|
479
|
-
***************************************************************************/
|
411
|
+
CSc(self)->first_time = true;
|
412
|
+
CSc(self)->more = true;
|
413
|
+
CSc(self)->coordinator = NULL;
|
480
414
|
|
481
|
-
|
482
|
-
|
483
|
-
|
415
|
+
self->score = &csc_score;
|
416
|
+
self->next = &csc_next;
|
417
|
+
self->skip_to = &csc_skip_to;
|
418
|
+
self->destroy = &csc_destroy;
|
484
419
|
|
485
|
-
|
420
|
+
return self;
|
421
|
+
}
|
422
|
+
|
423
|
+
static float ccsc_score(Scorer *self)
|
486
424
|
{
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
425
|
+
ConjunctionScorer *csc = CSc(self);
|
426
|
+
|
427
|
+
int doc;
|
428
|
+
if ((doc = self->doc) > csc->last_scored_doc) {
|
429
|
+
csc->last_scored_doc = doc;
|
430
|
+
csc->coordinator->num_matches += csc->ss_cnt;
|
431
|
+
}
|
432
|
+
|
433
|
+
return csc_score(self);
|
491
434
|
}
|
492
435
|
|
493
|
-
|
436
|
+
static Scorer *counting_conjunction_sum_scorer_new(
|
437
|
+
Coordinator *coordinator, Scorer **sub_scorers, int ss_cnt)
|
494
438
|
{
|
495
|
-
|
496
|
-
|
439
|
+
Scorer *self = conjunction_scorer_new(sim_create_default());
|
440
|
+
ConjunctionScorer *csc = CSc(self);
|
441
|
+
csc->coordinator = coordinator;
|
442
|
+
csc->last_scored_doc = -1;
|
443
|
+
csc->sub_scorers = ALLOC_N(Scorer *, ss_cnt);
|
444
|
+
memcpy(csc->sub_scorers, sub_scorers, sizeof(Scorer *) * ss_cnt);
|
445
|
+
csc->ss_cnt = ss_cnt;
|
497
446
|
|
498
|
-
|
499
|
-
self->coord_factors[i] = sim_coord(self->similarity, i, self->max_coord);
|
500
|
-
}
|
447
|
+
self->score = &ccsc_score;
|
501
448
|
|
502
|
-
|
449
|
+
return self;
|
503
450
|
}
|
504
451
|
|
505
452
|
/***************************************************************************
|
506
|
-
*
|
453
|
+
* SingleMatchScorer
|
507
454
|
***************************************************************************/
|
508
455
|
|
509
|
-
|
456
|
+
#define SMSc(scorer) ((SingleMatchScorer *)(scorer))
|
457
|
+
|
458
|
+
typedef struct SingleMatchScorer
|
510
459
|
{
|
511
|
-
|
512
|
-
|
513
|
-
|
460
|
+
Scorer super;
|
461
|
+
Coordinator *coordinator;
|
462
|
+
Scorer *scorer;
|
463
|
+
} SingleMatchScorer;
|
464
|
+
|
514
465
|
|
515
|
-
|
466
|
+
static float smsc_score(Scorer *self)
|
516
467
|
{
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
pq_create(dssc->ss_cnt, &scorer_doc_less_than);
|
468
|
+
SMSc(self)->coordinator->num_matches++;
|
469
|
+
return SMSc(self)->scorer->score(SMSc(self)->scorer);
|
470
|
+
}
|
521
471
|
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
472
|
+
static bool smsc_next(Scorer *self)
|
473
|
+
{
|
474
|
+
Scorer *scorer = SMSc(self)->scorer;
|
475
|
+
if (scorer->next(scorer)) {
|
476
|
+
self->doc = scorer->doc;
|
477
|
+
return true;
|
526
478
|
}
|
527
|
-
|
479
|
+
return false;
|
528
480
|
}
|
529
481
|
|
530
|
-
bool
|
482
|
+
static bool smsc_skip_to(Scorer *self, int doc_num)
|
531
483
|
{
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
536
|
-
top = (Scorer *)pq_top(scorer_queue);
|
537
|
-
self->doc = top->doc;
|
538
|
-
dssc->cum_score = top->score(top);
|
539
|
-
dssc->num_matches = 1;
|
540
|
-
while (true) { // Until all subscorers are after self->hit.doc
|
541
|
-
if (top->next(top)) {
|
542
|
-
pq_down(scorer_queue);
|
543
|
-
} else {
|
544
|
-
pq_pop(scorer_queue);
|
545
|
-
if (scorer_queue->count < (dssc->min_num_matches - dssc->num_matches)) {
|
546
|
-
// Not enough subscorers left for a match on this document,
|
547
|
-
// and also no more chance of any further match.
|
548
|
-
return false;
|
549
|
-
}
|
550
|
-
if (scorer_queue->count == 0) {
|
551
|
-
break; // nothing more to advance, check for last match.
|
552
|
-
}
|
553
|
-
}
|
554
|
-
top = pq_top(scorer_queue);
|
555
|
-
if (top->doc != self->doc) {
|
556
|
-
break; // All remaining subscorers are after self->hit.doc.
|
557
|
-
} else {
|
558
|
-
dssc->cum_score += top->score(top);
|
559
|
-
dssc->num_matches++;
|
560
|
-
}
|
484
|
+
Scorer *scorer = SMSc(self)->scorer;
|
485
|
+
if (scorer->skip_to(scorer, doc_num)) {
|
486
|
+
self->doc = scorer->doc;
|
487
|
+
return true;
|
561
488
|
}
|
489
|
+
return false;
|
490
|
+
}
|
562
491
|
|
563
|
-
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
}
|
568
|
-
}
|
492
|
+
static Explanation *smsc_explain(Scorer *self, int doc_num)
|
493
|
+
{
|
494
|
+
Scorer *scorer = SMSc(self)->scorer;
|
495
|
+
return scorer->explain(scorer, doc_num);
|
569
496
|
}
|
570
497
|
|
571
|
-
|
498
|
+
static void smsc_destroy(Scorer *self)
|
572
499
|
{
|
573
|
-
|
500
|
+
Scorer *scorer = SMSc(self)->scorer;
|
501
|
+
scorer->destroy(scorer);
|
502
|
+
scorer_destroy_i(self);
|
503
|
+
}
|
574
504
|
|
575
|
-
|
576
|
-
|
577
|
-
|
505
|
+
static Scorer *single_match_scorer_new(Coordinator *coordinator,
|
506
|
+
Scorer *scorer)
|
507
|
+
{
|
508
|
+
Scorer *self = scorer_new(SingleMatchScorer, scorer->similarity);
|
509
|
+
SMSc(self)->coordinator = coordinator;
|
510
|
+
SMSc(self)->scorer = scorer;
|
578
511
|
|
579
|
-
|
580
|
-
|
581
|
-
|
582
|
-
|
583
|
-
|
512
|
+
self->score = &smsc_score;
|
513
|
+
self->next = &smsc_next;
|
514
|
+
self->skip_to = &smsc_skip_to;
|
515
|
+
self->explain = &smsc_explain;
|
516
|
+
self->destroy = &smsc_destroy;
|
517
|
+
return self;
|
584
518
|
}
|
585
519
|
|
586
|
-
|
520
|
+
/***************************************************************************
|
521
|
+
* ReqOptSumScorer
|
522
|
+
***************************************************************************/
|
523
|
+
|
524
|
+
#define ROSSc(scorer) ((ReqOptSumScorer *)(scorer))
|
525
|
+
|
526
|
+
typedef struct ReqOptSumScorer
|
587
527
|
{
|
588
|
-
|
589
|
-
|
590
|
-
|
528
|
+
Scorer super;
|
529
|
+
Scorer *req_scorer;
|
530
|
+
Scorer *opt_scorer;
|
531
|
+
bool first_time_opt;
|
532
|
+
} ReqOptSumScorer;
|
591
533
|
|
592
|
-
|
593
|
-
|
594
|
-
|
595
|
-
|
534
|
+
static float rossc_score(Scorer *self)
|
535
|
+
{
|
536
|
+
ReqOptSumScorer *rossc = ROSSc(self);
|
537
|
+
Scorer *req_scorer = rossc->req_scorer;
|
538
|
+
Scorer *opt_scorer = rossc->opt_scorer;
|
539
|
+
int cur_doc = req_scorer->doc;
|
540
|
+
float req_score = req_scorer->score(req_scorer);
|
596
541
|
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
return dssc_advance_after_current(self);
|
607
|
-
} else if (top->skip_to(top, doc_num)) {
|
608
|
-
pq_down(scorer_queue);
|
609
|
-
} else {
|
610
|
-
pq_pop(scorer_queue);
|
611
|
-
if (scorer_queue->count < dssc->min_num_matches) {
|
612
|
-
return false;
|
613
|
-
}
|
542
|
+
if (rossc->first_time_opt) {
|
543
|
+
rossc->first_time_opt = false;
|
544
|
+
if (! opt_scorer->skip_to(opt_scorer, cur_doc)) {
|
545
|
+
SCORER_NULLIFY(rossc->opt_scorer);
|
546
|
+
return req_score;
|
547
|
+
}
|
548
|
+
}
|
549
|
+
else if (opt_scorer == NULL) {
|
550
|
+
return req_score;
|
614
551
|
}
|
615
|
-
|
552
|
+
else if ((opt_scorer->doc < cur_doc)
|
553
|
+
&& ! opt_scorer->skip_to(opt_scorer, cur_doc)) {
|
554
|
+
SCORER_NULLIFY(rossc->opt_scorer);
|
555
|
+
return req_score;
|
556
|
+
}
|
557
|
+
/* assert (@opt_scorer != nil) and (@opt_scorer.doc() >= cur_doc) */
|
558
|
+
return (opt_scorer->doc == cur_doc)
|
559
|
+
? req_score + opt_scorer->score(opt_scorer)
|
560
|
+
: req_score;
|
616
561
|
}
|
617
562
|
|
618
|
-
|
563
|
+
static bool rossc_next(Scorer *self)
|
619
564
|
{
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
|
626
|
-
sub_scorer = dssc->sub_scorers[i];
|
627
|
-
expl_add_detail(e, sub_scorer->explain(sub_scorer, doc_num));
|
628
|
-
}
|
629
|
-
return e;
|
565
|
+
Scorer *req_scorer = ROSSc(self)->req_scorer;
|
566
|
+
if (req_scorer->next(req_scorer)) {
|
567
|
+
self->doc = req_scorer->doc;
|
568
|
+
return true;
|
569
|
+
}
|
570
|
+
return false;
|
630
571
|
}
|
631
572
|
|
632
|
-
|
573
|
+
static bool rossc_skip_to(Scorer *self, int doc_num)
|
633
574
|
{
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
scorer_destroy_i(self);
|
575
|
+
Scorer *req_scorer = ROSSc(self)->req_scorer;
|
576
|
+
if (req_scorer->skip_to(req_scorer, doc_num)) {
|
577
|
+
self->doc = req_scorer->doc;
|
578
|
+
return true;
|
579
|
+
}
|
580
|
+
return false;
|
641
581
|
}
|
642
582
|
|
643
|
-
|
644
|
-
int min_num_matches)
|
583
|
+
static Explanation *rossc_explain(Scorer *self, int doc_num)
|
645
584
|
{
|
646
|
-
|
647
|
-
|
648
|
-
self->data = dssc;
|
649
|
-
dssc->ss_cnt = ss_cnt;
|
650
|
-
|
651
|
-
// The document number of the current match.
|
652
|
-
self->doc = -1;
|
653
|
-
dssc->cum_score = -1.0;
|
585
|
+
Scorer *req_scorer = ROSSc(self)->req_scorer;
|
586
|
+
Scorer *opt_scorer = ROSSc(self)->opt_scorer;
|
654
587
|
|
655
|
-
|
656
|
-
|
657
|
-
|
658
|
-
|
659
|
-
if (min_num_matches <= 0) {
|
660
|
-
RAISE(ARG_ERROR, MIN_NUM_MATCHES_ERROR_MSG);
|
661
|
-
}
|
662
|
-
if (ss_cnt <= 1) {
|
663
|
-
RAISE(ARG_ERROR, TWO_SUB_ERROR_MSG);
|
664
|
-
}
|
665
|
-
|
666
|
-
dssc->min_num_matches = min_num_matches;
|
667
|
-
dssc->sub_scorers = sub_scorers;
|
668
|
-
|
669
|
-
dssc->scorer_queue = NULL;
|
670
|
-
|
671
|
-
self->score = &dssc_score;
|
672
|
-
self->next = &dssc_next;
|
673
|
-
self->skip_to = &dssc_skip_to;
|
674
|
-
self->explain = &dssc_explain;
|
675
|
-
self->destroy = &dssc_destroy;
|
676
|
-
|
677
|
-
return self;
|
588
|
+
Explanation *e = expl_new(self->score(self),"required, optional:");
|
589
|
+
expl_add_detail(e, req_scorer->explain(req_scorer, doc_num));
|
590
|
+
expl_add_detail(e, opt_scorer->explain(opt_scorer, doc_num));
|
591
|
+
return e;
|
678
592
|
}
|
679
593
|
|
680
|
-
|
594
|
+
static void rossc_destroy(Scorer *self)
|
681
595
|
{
|
682
|
-
|
683
|
-
|
684
|
-
|
596
|
+
ReqOptSumScorer *rossc = ROSSc(self);
|
597
|
+
if (rossc->req_scorer) {
|
598
|
+
rossc->req_scorer->destroy(rossc->req_scorer);
|
599
|
+
}
|
600
|
+
if (rossc->opt_scorer) {
|
601
|
+
rossc->opt_scorer->destroy(rossc->opt_scorer);
|
602
|
+
}
|
603
|
+
scorer_destroy_i(self);
|
685
604
|
}
|
686
605
|
|
687
|
-
|
688
|
-
|
606
|
+
|
607
|
+
static Scorer *req_opt_sum_scorer_new(Scorer *req_scorer, Scorer *opt_scorer)
|
689
608
|
{
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
609
|
+
Scorer *self = scorer_new(ReqOptSumScorer, NULL);
|
610
|
+
|
611
|
+
ROSSc(self)->req_scorer = req_scorer;
|
612
|
+
ROSSc(self)->opt_scorer = opt_scorer;
|
613
|
+
ROSSc(self)->first_time_opt = true;
|
614
|
+
|
615
|
+
self->score = &rossc_score;
|
616
|
+
self->next = &rossc_next;
|
617
|
+
self->skip_to = &rossc_skip_to;
|
618
|
+
self->explain = &rossc_explain;
|
619
|
+
self->destroy = &rossc_destroy;
|
620
|
+
|
621
|
+
return self;
|
696
622
|
}
|
697
623
|
|
698
624
|
/***************************************************************************
|
699
|
-
*
|
625
|
+
* ReqExclScorer
|
700
626
|
***************************************************************************/
|
701
627
|
|
702
|
-
|
628
|
+
#define RXSc(scorer) ((ReqExclScorer *)(scorer))
|
629
|
+
typedef struct ReqExclScorer
|
703
630
|
{
|
704
|
-
|
705
|
-
|
706
|
-
|
707
|
-
|
631
|
+
Scorer super;
|
632
|
+
Scorer *req_scorer;
|
633
|
+
Scorer *excl_scorer;
|
634
|
+
bool first_time;
|
635
|
+
} ReqExclScorer;
|
708
636
|
|
709
|
-
|
637
|
+
static bool rxsc_to_non_excluded(Scorer *self)
|
710
638
|
{
|
711
|
-
|
712
|
-
|
713
|
-
|
639
|
+
Scorer *req_scorer = RXSc(self)->req_scorer;
|
640
|
+
Scorer *excl_scorer = RXSc(self)->excl_scorer;
|
641
|
+
int excl_doc = excl_scorer->doc, req_doc;
|
714
642
|
|
715
|
-
|
716
|
-
|
717
|
-
|
718
|
-
|
643
|
+
do {
|
644
|
+
/* may be excluded */
|
645
|
+
req_doc = req_scorer->doc;
|
646
|
+
if (req_doc < excl_doc) {
|
647
|
+
/* req_scorer advanced to before excl_scorer, ie. not excluded */
|
648
|
+
self->doc = req_doc;
|
649
|
+
return true;
|
650
|
+
}
|
651
|
+
else if (req_doc > excl_doc) {
|
652
|
+
if (! excl_scorer->skip_to(excl_scorer, req_doc)) {
|
653
|
+
/* emptied, no more exclusions */
|
654
|
+
SCORER_NULLIFY(RXSc(self)->excl_scorer);
|
655
|
+
self->doc = req_doc;
|
656
|
+
return true;
|
657
|
+
}
|
658
|
+
excl_doc = excl_scorer->doc;
|
659
|
+
if (excl_doc > req_doc) {
|
660
|
+
self->doc = req_doc;
|
661
|
+
return true; /* not excluded */
|
662
|
+
}
|
663
|
+
}
|
664
|
+
} while (req_scorer->next(req_scorer));
|
665
|
+
/* emptied, nothing left */
|
666
|
+
SCORER_NULLIFY(RXSc(self)->req_scorer);
|
667
|
+
return false;
|
668
|
+
}
|
719
669
|
|
720
|
-
|
721
|
-
|
670
|
+
static bool rxsc_next(Scorer *self)
|
671
|
+
{
|
672
|
+
ReqExclScorer *rxsc = RXSc(self);
|
673
|
+
Scorer *req_scorer = rxsc->req_scorer;
|
674
|
+
Scorer *excl_scorer = rxsc->excl_scorer;
|
722
675
|
|
723
|
-
|
724
|
-
|
725
|
-
|
726
|
-
|
676
|
+
if (rxsc->first_time) {
|
677
|
+
if (! excl_scorer->next(excl_scorer)) {
|
678
|
+
/* emptied at start */
|
679
|
+
SCORER_NULLIFY(rxsc->excl_scorer);
|
680
|
+
excl_scorer = NULL;
|
681
|
+
}
|
682
|
+
rxsc->first_time = false;
|
683
|
+
}
|
684
|
+
if (req_scorer == NULL) {
|
685
|
+
return false;
|
727
686
|
}
|
728
|
-
if (
|
729
|
-
|
687
|
+
if (! req_scorer->next(req_scorer)) {
|
688
|
+
/* emptied, nothing left */
|
689
|
+
SCORER_NULLIFY(rxsc->req_scorer);
|
690
|
+
return false;
|
691
|
+
}
|
692
|
+
if (excl_scorer == NULL) {
|
693
|
+
self->doc = req_scorer->doc;
|
694
|
+
/* req_scorer->next() already returned true */
|
695
|
+
return true;
|
696
|
+
}
|
697
|
+
return rxsc_to_non_excluded(self);
|
698
|
+
}
|
699
|
+
|
700
|
+
static bool rxsc_skip_to(Scorer *self, int doc_num)
|
701
|
+
{
|
702
|
+
ReqExclScorer *rxsc = RXSc(self);
|
703
|
+
Scorer *req_scorer = rxsc->req_scorer;
|
704
|
+
Scorer *excl_scorer = rxsc->excl_scorer;
|
730
705
|
|
731
|
-
|
706
|
+
if (rxsc->first_time) {
|
707
|
+
rxsc->first_time = false;
|
708
|
+
if (! excl_scorer->skip_to(excl_scorer, doc_num)) {
|
709
|
+
/* emptied */
|
710
|
+
SCORER_NULLIFY(rxsc->excl_scorer);
|
711
|
+
excl_scorer = NULL;
|
712
|
+
}
|
713
|
+
}
|
714
|
+
if (req_scorer == NULL) {
|
715
|
+
return false;
|
716
|
+
}
|
717
|
+
if (excl_scorer == NULL) {
|
718
|
+
if (req_scorer->skip_to(req_scorer, doc_num)) {
|
719
|
+
self->doc = req_scorer->doc;
|
720
|
+
return true;
|
721
|
+
}
|
722
|
+
return false;
|
723
|
+
}
|
724
|
+
if (! req_scorer->skip_to(req_scorer, doc_num)) {
|
725
|
+
SCORER_NULLIFY(rxsc->req_scorer);
|
726
|
+
return false;
|
727
|
+
}
|
728
|
+
return rxsc_to_non_excluded(self);
|
732
729
|
}
|
733
730
|
|
734
|
-
float
|
731
|
+
static float rxsc_score(Scorer *self)
|
735
732
|
{
|
736
|
-
|
737
|
-
|
738
|
-
float score = 0.0; // sum scores
|
739
|
-
int i;
|
740
|
-
for (i = 0; i < csc->ss_cnt; i++) {
|
741
|
-
sub_scorer = csc->sub_scorers[i];
|
742
|
-
score += sub_scorer->score(sub_scorer);
|
743
|
-
}
|
744
|
-
score *= csc->coord;
|
745
|
-
return score;
|
733
|
+
Scorer *req_scorer = RXSc(self)->req_scorer;
|
734
|
+
return req_scorer->score(req_scorer);
|
746
735
|
}
|
747
736
|
|
748
|
-
|
737
|
+
static Explanation *rxsc_explain(Scorer *self, int doc_num)
|
749
738
|
{
|
750
|
-
|
751
|
-
|
752
|
-
|
739
|
+
ReqExclScorer *rxsc = RXSc(self);
|
740
|
+
Scorer *req_scorer = rxsc->req_scorer;
|
741
|
+
Scorer *excl_scorer = rxsc->excl_scorer;
|
742
|
+
Explanation *e;
|
753
743
|
|
754
|
-
|
755
|
-
|
756
|
-
|
757
|
-
|
758
|
-
|
759
|
-
|
760
|
-
|
761
|
-
|
762
|
-
|
763
|
-
self->doc = first->doc;
|
764
|
-
return csc->more;
|
744
|
+
if (excl_scorer->skip_to(excl_scorer, doc_num)
|
745
|
+
&& excl_scorer->doc == doc_num) {
|
746
|
+
e = expl_new(0.0, "excluded:");
|
747
|
+
}
|
748
|
+
else {
|
749
|
+
e = expl_new(0.0, "not excluded:");
|
750
|
+
expl_add_detail(e, req_scorer->explain(req_scorer, doc_num));
|
751
|
+
}
|
752
|
+
return e;
|
765
753
|
}
|
766
754
|
|
767
|
-
|
755
|
+
static void rxsc_destroy(Scorer *self)
|
768
756
|
{
|
769
|
-
|
770
|
-
|
771
|
-
|
772
|
-
|
773
|
-
|
774
|
-
|
775
|
-
|
776
|
-
|
777
|
-
return csc_do_next(self);
|
757
|
+
ReqExclScorer *rxsc = RXSc(self);
|
758
|
+
if (rxsc->req_scorer) {
|
759
|
+
rxsc->req_scorer->destroy(rxsc->req_scorer);
|
760
|
+
}
|
761
|
+
if (rxsc->excl_scorer) {
|
762
|
+
rxsc->excl_scorer->destroy(rxsc->excl_scorer);
|
763
|
+
}
|
764
|
+
scorer_destroy_i(self);
|
778
765
|
}
|
779
766
|
|
780
|
-
|
767
|
+
static Scorer *req_excl_scorer_new(Scorer *req_scorer, Scorer *excl_scorer)
|
781
768
|
{
|
782
|
-
|
783
|
-
|
784
|
-
|
769
|
+
Scorer *self = scorer_new(ReqExclScorer, NULL);
|
770
|
+
RXSc(self)->req_scorer = req_scorer;
|
771
|
+
RXSc(self)->excl_scorer = excl_scorer;
|
772
|
+
RXSc(self)->first_time = true;
|
785
773
|
|
786
|
-
|
787
|
-
|
788
|
-
|
774
|
+
self->score = &rxsc_score;
|
775
|
+
self->next = &rxsc_next;
|
776
|
+
self->skip_to = &rxsc_skip_to;
|
777
|
+
self->explain = &rxsc_explain;
|
778
|
+
self->destroy = &rxsc_destroy;
|
789
779
|
|
790
|
-
|
791
|
-
if (!csc->more) break;
|
792
|
-
sub_scorer = csc->sub_scorers[i];
|
793
|
-
csc->more = sub_scorer->skip_to(sub_scorer, doc_num);
|
794
|
-
}
|
795
|
-
if (csc->more) csc_sort_scorers(csc); // resort the scorers
|
796
|
-
|
797
|
-
return csc_do_next(self);
|
780
|
+
return self;
|
798
781
|
}
|
799
782
|
|
800
|
-
|
783
|
+
/***************************************************************************
|
784
|
+
* NonMatchScorer
|
785
|
+
***************************************************************************/
|
786
|
+
|
787
|
+
static float nmsc_score(Scorer *self)
|
801
788
|
{
|
802
|
-
|
803
|
-
|
804
|
-
for (i = 0; i < csc->ss_cnt; i++) {
|
805
|
-
csc->sub_scorers[i]->destroy(csc->sub_scorers[i]);
|
806
|
-
}
|
807
|
-
free(csc->sub_scorers);
|
808
|
-
scorer_destroy_i(self);
|
789
|
+
(void)self;
|
790
|
+
return 0.0;
|
809
791
|
}
|
810
792
|
|
811
|
-
|
793
|
+
static bool nmsc_next(Scorer *self)
|
812
794
|
{
|
813
|
-
|
814
|
-
|
815
|
-
ZEROSET(csc, ConjunctionScorer, 1);
|
816
|
-
self->data = csc;
|
817
|
-
csc->first_time = true;
|
818
|
-
csc->more = true;
|
819
|
-
csc->coordinator = NULL;
|
820
|
-
|
821
|
-
self->score = &csc_score;
|
822
|
-
self->next = &csc_next;
|
823
|
-
self->skip_to = &csc_skip_to;
|
824
|
-
self->destroy = &csc_destroy;
|
825
|
-
|
826
|
-
return self;
|
795
|
+
(void)self;
|
796
|
+
return false;
|
827
797
|
}
|
828
798
|
|
829
|
-
|
799
|
+
static bool nmsc_skip_to(Scorer *self, int doc_num)
|
830
800
|
{
|
831
|
-
|
832
|
-
|
833
|
-
int doc;
|
834
|
-
if ((doc = self->doc) > csc->last_scored_doc) {
|
835
|
-
csc->last_scored_doc = doc;
|
836
|
-
csc->coordinator->num_matches += csc->ss_cnt;
|
837
|
-
}
|
838
|
-
|
839
|
-
return csc_score(self);
|
801
|
+
(void)self; (void)doc_num;
|
802
|
+
return false;
|
840
803
|
}
|
841
804
|
|
842
|
-
|
843
|
-
Scorer **sub_scorers, int ss_cnt)
|
805
|
+
static Explanation *nmsc_explain(Scorer *self, int doc_num)
|
844
806
|
{
|
845
|
-
|
846
|
-
|
847
|
-
|
848
|
-
csc->last_scored_doc = -1;
|
849
|
-
csc->sub_scorers = ALLOC_N(Scorer *, ss_cnt);
|
850
|
-
memcpy(csc->sub_scorers, sub_scorers, sizeof(Scorer *) * ss_cnt);
|
851
|
-
csc->ss_capa = csc->ss_cnt = ss_cnt;
|
807
|
+
(void)self; (void)doc_num;
|
808
|
+
return expl_new(0.0, "No documents matched");
|
809
|
+
}
|
852
810
|
|
853
|
-
|
811
|
+
static Scorer *non_matching_scorer_new()
|
812
|
+
{
|
813
|
+
Scorer *self = scorer_new(Scorer, NULL);
|
814
|
+
self->score = &nmsc_score;
|
815
|
+
self->next = &nmsc_next;
|
816
|
+
self->skip_to = &nmsc_skip_to;
|
817
|
+
self->explain = &nmsc_explain;
|
854
818
|
|
855
|
-
|
819
|
+
return self;
|
856
820
|
}
|
857
821
|
|
858
822
|
/***************************************************************************
|
859
|
-
*
|
823
|
+
* BooleanScorer
|
860
824
|
***************************************************************************/
|
861
825
|
|
862
|
-
|
826
|
+
#define BSc(scorer) ((BooleanScorer *)(scorer))
|
827
|
+
typedef struct BooleanScorer
|
828
|
+
{
|
829
|
+
Scorer super;
|
830
|
+
Scorer **required_scorers;
|
831
|
+
int rs_cnt;
|
832
|
+
int rs_capa;
|
833
|
+
Scorer **optional_scorers;
|
834
|
+
int os_cnt;
|
835
|
+
int os_capa;
|
836
|
+
Scorer **prohibited_scorers;
|
837
|
+
int ps_cnt;
|
838
|
+
int ps_capa;
|
839
|
+
Scorer *counting_sum_scorer;
|
840
|
+
Coordinator *coordinator;
|
841
|
+
} BooleanScorer;
|
842
|
+
|
843
|
+
static Scorer *counting_sum_scorer_create3(BooleanScorer *bsc,
|
844
|
+
Scorer *req_scorer,
|
845
|
+
Scorer *opt_scorer)
|
863
846
|
{
|
864
|
-
|
865
|
-
|
866
|
-
|
847
|
+
if (bsc->ps_cnt == 0) {
|
848
|
+
/* no prohibited */
|
849
|
+
return req_opt_sum_scorer_new(req_scorer, opt_scorer);
|
850
|
+
}
|
851
|
+
else if (bsc->ps_cnt == 1) {
|
852
|
+
/* 1 prohibited */
|
853
|
+
return req_opt_sum_scorer_new(
|
854
|
+
req_excl_scorer_new(req_scorer, bsc->prohibited_scorers[0]),
|
855
|
+
opt_scorer);
|
856
|
+
}
|
857
|
+
else {
|
858
|
+
/* more prohibited */
|
859
|
+
return req_opt_sum_scorer_new(
|
860
|
+
req_excl_scorer_new(
|
861
|
+
req_scorer,
|
862
|
+
disjunction_sum_scorer_new(bsc->prohibited_scorers,
|
863
|
+
bsc->ps_cnt, 1)),
|
864
|
+
opt_scorer);
|
865
|
+
}
|
867
866
|
}
|
868
867
|
|
869
|
-
|
868
|
+
static Scorer *counting_sum_scorer_create2(BooleanScorer *bsc,
|
869
|
+
Scorer *req_scorer,
|
870
|
+
Scorer **optional_scorers,
|
871
|
+
int os_cnt)
|
870
872
|
{
|
871
|
-
|
872
|
-
|
873
|
-
|
874
|
-
|
875
|
-
|
876
|
-
|
873
|
+
if (os_cnt == 0) {
|
874
|
+
if (bsc->ps_cnt == 0) {
|
875
|
+
return req_scorer;
|
876
|
+
}
|
877
|
+
else if (bsc->ps_cnt == 1) {
|
878
|
+
return req_excl_scorer_new(req_scorer,
|
879
|
+
bsc->prohibited_scorers[0]);
|
880
|
+
}
|
881
|
+
else {
|
882
|
+
/* no optional, more than 1 prohibited */
|
883
|
+
return req_excl_scorer_new(
|
884
|
+
req_scorer,
|
885
|
+
disjunction_sum_scorer_new(bsc->prohibited_scorers,
|
886
|
+
bsc->ps_cnt, 1));
|
887
|
+
}
|
888
|
+
}
|
889
|
+
else if (os_cnt == 1) {
|
890
|
+
return counting_sum_scorer_create3(
|
891
|
+
bsc,
|
892
|
+
req_scorer,
|
893
|
+
single_match_scorer_new(bsc->coordinator, optional_scorers[0]));
|
894
|
+
}
|
895
|
+
else {
|
896
|
+
/* more optional */
|
897
|
+
return counting_sum_scorer_create3(
|
898
|
+
bsc,
|
899
|
+
req_scorer,
|
900
|
+
counting_disjunction_sum_scorer_new(bsc->coordinator,
|
901
|
+
optional_scorers, os_cnt, 1));
|
902
|
+
}
|
877
903
|
}
|
878
904
|
|
879
|
-
|
905
|
+
static Scorer *counting_sum_scorer_create(BooleanScorer *bsc)
|
880
906
|
{
|
881
|
-
|
882
|
-
|
883
|
-
|
884
|
-
|
885
|
-
|
886
|
-
|
907
|
+
if (bsc->rs_cnt == 0) {
|
908
|
+
if (bsc->os_cnt == 0) {
|
909
|
+
int i;
|
910
|
+
/* only prohibited scorers so return non_matching scorer */
|
911
|
+
for (i = 0; i < bsc->ps_cnt; i++) {
|
912
|
+
bsc->prohibited_scorers[i]->destroy(
|
913
|
+
bsc->prohibited_scorers[i]);
|
914
|
+
}
|
915
|
+
return non_matching_scorer_new();
|
916
|
+
}
|
917
|
+
else if (bsc->os_cnt == 1) {
|
918
|
+
/* the only optional scorer is required */
|
919
|
+
return counting_sum_scorer_create2(
|
920
|
+
bsc,
|
921
|
+
single_match_scorer_new(bsc->coordinator,
|
922
|
+
bsc->optional_scorers[0]),
|
923
|
+
NULL, 0); /* no optional scorers left */
|
924
|
+
}
|
925
|
+
else {
|
926
|
+
/* more than 1 optional_scorers, no required scorers */
|
927
|
+
return counting_sum_scorer_create2(
|
928
|
+
bsc,
|
929
|
+
counting_disjunction_sum_scorer_new(bsc->coordinator,
|
930
|
+
bsc->optional_scorers,
|
931
|
+
bsc->os_cnt, 1),
|
932
|
+
NULL, 0); /* no optional scorers left */
|
933
|
+
}
|
934
|
+
}
|
935
|
+
else if (bsc->rs_cnt == 1) {
|
936
|
+
/* 1 required */
|
937
|
+
return counting_sum_scorer_create2(
|
938
|
+
bsc,
|
939
|
+
single_match_scorer_new(bsc->coordinator, bsc->required_scorers[0]),
|
940
|
+
bsc->optional_scorers, bsc->os_cnt);
|
941
|
+
}
|
942
|
+
else {
|
943
|
+
/* more required scorers */
|
944
|
+
return counting_sum_scorer_create2(
|
945
|
+
bsc,
|
946
|
+
counting_conjunction_sum_scorer_new(bsc->coordinator,
|
947
|
+
bsc->required_scorers,
|
948
|
+
bsc->rs_cnt),
|
949
|
+
bsc->optional_scorers, bsc->os_cnt);
|
950
|
+
}
|
887
951
|
}
|
888
952
|
|
889
|
-
|
953
|
+
static Scorer *bsc_init_counting_sum_scorer(BooleanScorer *bsc)
|
890
954
|
{
|
891
|
-
|
892
|
-
|
955
|
+
coord_init(bsc->coordinator);
|
956
|
+
return bsc->counting_sum_scorer = counting_sum_scorer_create(bsc);
|
893
957
|
}
|
894
958
|
|
895
|
-
void
|
959
|
+
static void bsc_add_scorer(Scorer *self, Scorer *scorer, unsigned int occur)
|
896
960
|
{
|
897
|
-
|
898
|
-
|
899
|
-
|
961
|
+
BooleanScorer *bsc = BSc(self);
|
962
|
+
if (occur != BC_MUST_NOT) {
|
963
|
+
bsc->coordinator->max_coord++;
|
964
|
+
}
|
965
|
+
|
966
|
+
switch (occur) {
|
967
|
+
case BC_MUST:
|
968
|
+
RECAPA(bsc, rs_cnt, rs_capa, required_scorers, Scorer *);
|
969
|
+
bsc->required_scorers[bsc->rs_cnt++] = scorer;
|
970
|
+
break;
|
971
|
+
case BC_SHOULD:
|
972
|
+
RECAPA(bsc, os_cnt, os_capa, optional_scorers, Scorer *);
|
973
|
+
bsc->optional_scorers[bsc->os_cnt++] = scorer;
|
974
|
+
break;
|
975
|
+
case BC_MUST_NOT:
|
976
|
+
RECAPA(bsc, ps_cnt, ps_capa, prohibited_scorers, Scorer *);
|
977
|
+
bsc->prohibited_scorers[bsc->ps_cnt++] = scorer;
|
978
|
+
break;
|
979
|
+
default:
|
980
|
+
RAISE(ARG_ERROR, "Invalid value for :occur. Try :should, :must or "
|
981
|
+
":must_not instead");
|
982
|
+
}
|
900
983
|
}
|
901
984
|
|
902
|
-
|
985
|
+
static float bsc_score(Scorer *self)
|
903
986
|
{
|
904
|
-
|
905
|
-
|
906
|
-
|
907
|
-
|
908
|
-
|
909
|
-
|
910
|
-
self->score = &smsc_score;
|
911
|
-
self->next = &smsc_next;
|
912
|
-
self->skip_to = &smsc_skip_to;
|
913
|
-
self->explain = &smsc_explain;
|
914
|
-
self->destroy = &smsc_destroy;
|
915
|
-
return self;
|
987
|
+
BooleanScorer *bsc = BSc(self);
|
988
|
+
Coordinator *coord = bsc->coordinator;
|
989
|
+
float sum;
|
990
|
+
coord->num_matches = 0;
|
991
|
+
sum = bsc->counting_sum_scorer->score(bsc->counting_sum_scorer);
|
992
|
+
return sum * coord->coord_factors[coord->num_matches];
|
916
993
|
}
|
917
994
|
|
918
|
-
|
919
|
-
|
920
|
-
|
995
|
+
static bool bsc_next(Scorer *self)
|
996
|
+
{
|
997
|
+
Scorer *cnt_sum_sc = BSc(self)->counting_sum_scorer;
|
921
998
|
|
922
|
-
|
923
|
-
|
924
|
-
|
925
|
-
|
926
|
-
|
927
|
-
|
928
|
-
|
929
|
-
|
930
|
-
|
931
|
-
|
932
|
-
if (! opt_scorer->skip_to(opt_scorer, cur_doc)) {
|
933
|
-
SCORER_NULLIFY(rossc->opt_scorer);
|
934
|
-
return req_score;
|
935
|
-
}
|
936
|
-
} else if (opt_scorer == NULL) {
|
937
|
-
return req_score;
|
938
|
-
} else if ((opt_scorer->doc < cur_doc) &&
|
939
|
-
! opt_scorer->skip_to(opt_scorer, cur_doc)) {
|
940
|
-
SCORER_NULLIFY(rossc->opt_scorer);
|
941
|
-
return req_score;
|
942
|
-
}
|
943
|
-
// assert (@opt_scorer != nil) and (@opt_scorer.doc() >= cur_doc)
|
944
|
-
return (opt_scorer->doc == cur_doc)
|
945
|
-
? req_score + opt_scorer->score(opt_scorer)
|
946
|
-
: req_score;
|
947
|
-
}
|
948
|
-
|
949
|
-
bool rossc_next(Scorer *self)
|
950
|
-
{
|
951
|
-
Scorer *req_scorer = ((ReqOptSumScorer *)self->data)->req_scorer;
|
952
|
-
if (req_scorer->next(req_scorer)) {
|
953
|
-
self->doc = req_scorer->doc;
|
954
|
-
return true;
|
955
|
-
}
|
956
|
-
return false;
|
999
|
+
if (!cnt_sum_sc) {
|
1000
|
+
cnt_sum_sc = bsc_init_counting_sum_scorer(BSc(self));
|
1001
|
+
}
|
1002
|
+
if (cnt_sum_sc->next(cnt_sum_sc)) {
|
1003
|
+
self->doc = cnt_sum_sc->doc;
|
1004
|
+
return true;
|
1005
|
+
}
|
1006
|
+
else {
|
1007
|
+
return false;
|
1008
|
+
}
|
957
1009
|
}
|
958
1010
|
|
959
|
-
bool
|
1011
|
+
static bool bsc_skip_to(Scorer *self, int doc_num)
|
960
1012
|
{
|
961
|
-
|
962
|
-
|
963
|
-
self->
|
964
|
-
|
965
|
-
|
966
|
-
|
1013
|
+
Scorer *cnt_sum_sc = BSc(self)->counting_sum_scorer;
|
1014
|
+
|
1015
|
+
if (!BSc(self)->counting_sum_scorer) {
|
1016
|
+
cnt_sum_sc = bsc_init_counting_sum_scorer(BSc(self));
|
1017
|
+
}
|
1018
|
+
if (cnt_sum_sc->skip_to(cnt_sum_sc, doc_num)) {
|
1019
|
+
self->doc = cnt_sum_sc->doc;
|
1020
|
+
return true;
|
1021
|
+
}
|
1022
|
+
else {
|
1023
|
+
return false;
|
1024
|
+
}
|
967
1025
|
}
|
968
1026
|
|
969
|
-
|
1027
|
+
static void bsc_destroy(Scorer *self)
|
970
1028
|
{
|
971
|
-
|
972
|
-
|
973
|
-
|
1029
|
+
BooleanScorer *bsc = BSc(self);
|
1030
|
+
Coordinator *coord = bsc->coordinator;
|
1031
|
+
|
1032
|
+
free(coord->coord_factors);
|
1033
|
+
free(coord);
|
1034
|
+
|
1035
|
+
if (bsc->counting_sum_scorer) {
|
1036
|
+
bsc->counting_sum_scorer->destroy(bsc->counting_sum_scorer);
|
1037
|
+
}
|
1038
|
+
else {
|
1039
|
+
int i;
|
1040
|
+
for (i = 0; i < bsc->rs_cnt; i++) {
|
1041
|
+
bsc->required_scorers[i]->destroy(bsc->required_scorers[i]);
|
1042
|
+
}
|
1043
|
+
|
1044
|
+
for (i = 0; i < bsc->os_cnt; i++) {
|
1045
|
+
bsc->optional_scorers[i]->destroy(bsc->optional_scorers[i]);
|
1046
|
+
}
|
974
1047
|
|
975
|
-
|
976
|
-
|
977
|
-
|
978
|
-
|
1048
|
+
for (i = 0; i < bsc->ps_cnt; i++) {
|
1049
|
+
bsc->prohibited_scorers[i]->destroy(bsc->prohibited_scorers[i]);
|
1050
|
+
}
|
1051
|
+
}
|
1052
|
+
free(bsc->required_scorers);
|
1053
|
+
free(bsc->optional_scorers);
|
1054
|
+
free(bsc->prohibited_scorers);
|
1055
|
+
scorer_destroy_i(self);
|
979
1056
|
}
|
980
1057
|
|
981
|
-
|
1058
|
+
static Explanation *bsc_explain(Scorer *self, int doc_num)
|
982
1059
|
{
|
983
|
-
|
984
|
-
|
985
|
-
if (rossc->opt_scorer) rossc->opt_scorer->destroy(rossc->opt_scorer);
|
986
|
-
scorer_destroy_i(self);
|
1060
|
+
(void)self; (void)doc_num;
|
1061
|
+
return expl_new(0.0, "This explanation is not supported");
|
987
1062
|
}
|
988
1063
|
|
989
|
-
|
990
|
-
Scorer *req_opt_sum_scorer_create(Scorer *req_scorer, Scorer *opt_scorer)
|
1064
|
+
static Scorer *bsc_new(Similarity *similarity)
|
991
1065
|
{
|
992
|
-
|
993
|
-
|
994
|
-
|
995
|
-
rossc->req_scorer = req_scorer;
|
996
|
-
rossc->opt_scorer = opt_scorer;
|
997
|
-
rossc->first_time_opt = true;
|
998
|
-
|
999
|
-
self->score = &rossc_score;
|
1000
|
-
self->next = &rossc_next;
|
1001
|
-
self->skip_to = &rossc_skip_to;
|
1002
|
-
self->explain = &rossc_explain;
|
1003
|
-
self->destroy = &rossc_destroy;
|
1066
|
+
Scorer *self = scorer_new(BooleanScorer, similarity);
|
1067
|
+
BSc(self)->coordinator = coord_new(similarity);
|
1068
|
+
BSc(self)->counting_sum_scorer = NULL;
|
1004
1069
|
|
1005
|
-
|
1070
|
+
self->score = &bsc_score;
|
1071
|
+
self->next = &bsc_next;
|
1072
|
+
self->skip_to = &bsc_skip_to;
|
1073
|
+
self->explain = &bsc_explain;
|
1074
|
+
self->destroy = &bsc_destroy;
|
1075
|
+
return self;
|
1006
1076
|
}
|
1007
1077
|
|
1008
1078
|
/***************************************************************************
|
1009
|
-
*
|
1079
|
+
*
|
1080
|
+
* BooleanWeight
|
1081
|
+
*
|
1010
1082
|
***************************************************************************/
|
1011
1083
|
|
1012
|
-
|
1013
|
-
{
|
1014
|
-
|
1015
|
-
|
1016
|
-
|
1017
|
-
|
1018
|
-
|
1019
|
-
|
1020
|
-
|
1021
|
-
|
1022
|
-
|
1023
|
-
|
1024
|
-
|
1025
|
-
|
1026
|
-
|
1027
|
-
|
1028
|
-
|
1029
|
-
|
1030
|
-
|
1031
|
-
|
1032
|
-
if (excl_doc > req_doc) {
|
1033
|
-
self->doc = req_doc;
|
1034
|
-
return true; // not excluded
|
1035
|
-
}
|
1084
|
+
typedef struct BooleanWeight
|
1085
|
+
{
|
1086
|
+
Weight w;
|
1087
|
+
Weight **weights;
|
1088
|
+
int w_cnt;
|
1089
|
+
} BooleanWeight;
|
1090
|
+
|
1091
|
+
|
1092
|
+
static float bw_sum_of_squared_weights(Weight *self)
|
1093
|
+
{
|
1094
|
+
BooleanQuery *bq = BQ(self->query);
|
1095
|
+
float sum = 0.0;
|
1096
|
+
int i;
|
1097
|
+
|
1098
|
+
for (i = 0; i < BW(self)->w_cnt; i++) {
|
1099
|
+
if (! bq->clauses[i]->is_prohibited) {
|
1100
|
+
Weight *weight = BW(self)->weights[i];
|
1101
|
+
/* sum sub-weights */
|
1102
|
+
sum += weight->sum_of_squared_weights(weight);
|
1103
|
+
}
|
1036
1104
|
}
|
1037
|
-
|
1038
|
-
|
1039
|
-
|
1105
|
+
|
1106
|
+
/* boost each sub-weight */
|
1107
|
+
sum *= self->value * self->value;
|
1108
|
+
return sum;
|
1040
1109
|
}
|
1041
1110
|
|
1042
|
-
|
1111
|
+
static void bw_normalize(Weight *self, float normalization_factor)
|
1043
1112
|
{
|
1044
|
-
|
1045
|
-
|
1046
|
-
Scorer *excl_scorer = rxsc->excl_scorer;
|
1113
|
+
BooleanQuery *bq = BQ(self->query);
|
1114
|
+
int i;
|
1047
1115
|
|
1048
|
-
|
1049
|
-
|
1050
|
-
|
1051
|
-
|
1116
|
+
normalization_factor *= self->value; /* multiply by query boost */
|
1117
|
+
|
1118
|
+
for (i = 0; i < BW(self)->w_cnt; i++) {
|
1119
|
+
if (! bq->clauses[i]->is_prohibited) {
|
1120
|
+
Weight *weight = BW(self)->weights[i];
|
1121
|
+
/* sum sub-weights */
|
1122
|
+
weight->normalize(weight, normalization_factor);
|
1123
|
+
}
|
1052
1124
|
}
|
1053
|
-
rxsc->first_time = false;
|
1054
|
-
}
|
1055
|
-
if (req_scorer == NULL) {
|
1056
|
-
return false;
|
1057
|
-
}
|
1058
|
-
if (! req_scorer->next(req_scorer)) {
|
1059
|
-
SCORER_NULLIFY(rxsc->req_scorer); // exhausted, nothing left
|
1060
|
-
return false;
|
1061
|
-
}
|
1062
|
-
if (excl_scorer == NULL) {
|
1063
|
-
self->doc = req_scorer->doc;
|
1064
|
-
return true; // req_scorer->next() already returned true
|
1065
|
-
}
|
1066
|
-
return rxsc_to_non_excluded(self);
|
1067
1125
|
}
|
1068
1126
|
|
1069
|
-
|
1127
|
+
static Scorer *bw_scorer(Weight *self, IndexReader *ir)
|
1070
1128
|
{
|
1071
|
-
|
1072
|
-
|
1073
|
-
|
1129
|
+
Scorer *bsc = bsc_new(self->similarity);
|
1130
|
+
BooleanQuery *bq = BQ(self->query);
|
1131
|
+
int i;
|
1074
1132
|
|
1075
|
-
|
1076
|
-
|
1077
|
-
|
1078
|
-
|
1079
|
-
|
1080
|
-
|
1081
|
-
|
1082
|
-
|
1083
|
-
|
1084
|
-
|
1085
|
-
|
1086
|
-
if (req_scorer->skip_to(req_scorer, doc_num)) {
|
1087
|
-
self->doc = req_scorer->doc;
|
1088
|
-
return true;
|
1133
|
+
for (i = 0; i < BW(self)->w_cnt; i++) {
|
1134
|
+
BooleanClause *clause = bq->clauses[i];
|
1135
|
+
Weight *weight = BW(self)->weights[i];
|
1136
|
+
Scorer *sub_scorer = weight->scorer(weight, ir);
|
1137
|
+
if (sub_scorer) {
|
1138
|
+
bsc_add_scorer(bsc, sub_scorer, clause->occur);
|
1139
|
+
}
|
1140
|
+
else if (clause->is_required) {
|
1141
|
+
bsc->destroy(bsc);
|
1142
|
+
return NULL;
|
1143
|
+
}
|
1089
1144
|
}
|
1090
|
-
|
1091
|
-
|
1092
|
-
if (! req_scorer->skip_to(req_scorer, doc_num)) {
|
1093
|
-
SCORER_NULLIFY(rxsc->req_scorer);
|
1094
|
-
return false;
|
1095
|
-
}
|
1096
|
-
return rxsc_to_non_excluded(self);
|
1145
|
+
|
1146
|
+
return bsc;
|
1097
1147
|
}
|
1098
1148
|
|
1099
|
-
|
1149
|
+
static char *bw_to_s(Weight *self)
|
1100
1150
|
{
|
1101
|
-
|
1102
|
-
return req_scorer->score(req_scorer);
|
1151
|
+
return strfmt("BooleanWeight(%f)", self->value);
|
1103
1152
|
}
|
1104
1153
|
|
1105
|
-
|
1154
|
+
static void bw_destroy(Weight *self)
|
1106
1155
|
{
|
1107
|
-
|
1108
|
-
Scorer *req_scorer = rxsc->req_scorer;
|
1109
|
-
Scorer *excl_scorer = rxsc->excl_scorer;
|
1156
|
+
int i;
|
1110
1157
|
|
1111
|
-
|
1112
|
-
|
1113
|
-
|
1114
|
-
|
1115
|
-
|
1116
|
-
|
1117
|
-
}
|
1118
|
-
return e;
|
1158
|
+
for (i = 0; i < BW(self)->w_cnt; i++) {
|
1159
|
+
BW(self)->weights[i]->destroy(BW(self)->weights[i]);
|
1160
|
+
}
|
1161
|
+
|
1162
|
+
free(BW(self)->weights);
|
1163
|
+
w_destroy(self);
|
1119
1164
|
}
|
1120
1165
|
|
1121
|
-
|
1166
|
+
static Explanation *bw_explain(Weight *self, IndexReader *ir, int doc_num)
|
1122
1167
|
{
|
1123
|
-
|
1124
|
-
|
1125
|
-
|
1126
|
-
|
1168
|
+
BooleanQuery *bq = BQ(self->query);
|
1169
|
+
Explanation *sum_expl = expl_new(0.0, "sum of:");
|
1170
|
+
Explanation *explanation;
|
1171
|
+
int coord = 0;
|
1172
|
+
int max_coord = 0;
|
1173
|
+
float coord_factor = 0.0;
|
1174
|
+
float sum = 0.0;
|
1175
|
+
int i;
|
1176
|
+
|
1177
|
+
for (i = 0; i < BW(self)->w_cnt; i++) {
|
1178
|
+
Weight *weight = BW(self)->weights[i];
|
1179
|
+
BooleanClause *clause = bq->clauses[i];
|
1180
|
+
explanation = weight->explain(weight, ir, doc_num);
|
1181
|
+
if (!clause->is_prohibited) {
|
1182
|
+
max_coord++;
|
1183
|
+
}
|
1184
|
+
if (explanation->value > 0.0) {
|
1185
|
+
if (!clause->is_prohibited) {
|
1186
|
+
expl_add_detail(sum_expl, explanation);
|
1187
|
+
sum += explanation->value;
|
1188
|
+
coord++;
|
1189
|
+
}
|
1190
|
+
else {
|
1191
|
+
expl_destroy(explanation);
|
1192
|
+
expl_destroy(sum_expl);
|
1193
|
+
return expl_new(0.0, "match prohibited");
|
1194
|
+
}
|
1195
|
+
}
|
1196
|
+
else if (clause->is_required) {
|
1197
|
+
expl_destroy(explanation);
|
1198
|
+
expl_destroy(sum_expl);
|
1199
|
+
return expl_new(0.0, "match required");
|
1200
|
+
}
|
1201
|
+
else {
|
1202
|
+
expl_destroy(explanation);
|
1203
|
+
}
|
1204
|
+
}
|
1205
|
+
sum_expl->value = sum;
|
1206
|
+
|
1207
|
+
if (coord == 1) { /* only one clause matched */
|
1208
|
+
explanation = sum_expl; /* eliminate wrapper */
|
1209
|
+
ary_size(sum_expl->details) = 0;
|
1210
|
+
sum_expl = sum_expl->details[0];
|
1211
|
+
expl_destroy(explanation);
|
1212
|
+
}
|
1213
|
+
|
1214
|
+
coord_factor = sim_coord(self->similarity, coord, max_coord);
|
1215
|
+
|
1216
|
+
if (coord_factor == 1.0) { /* coord is no-op */
|
1217
|
+
return sum_expl; /* eliminate wrapper */
|
1218
|
+
}
|
1219
|
+
else {
|
1220
|
+
explanation = expl_new(sum * coord_factor, "product of:");
|
1221
|
+
expl_add_detail(explanation, sum_expl);
|
1222
|
+
expl_add_detail(explanation, expl_new(coord_factor, "coord(%d/%d)",
|
1223
|
+
coord, max_coord));
|
1224
|
+
return explanation;
|
1225
|
+
}
|
1127
1226
|
}
|
1128
1227
|
|
1129
|
-
|
1228
|
+
static Weight *bw_new(Query *query, Searcher *searcher)
|
1130
1229
|
{
|
1131
|
-
|
1132
|
-
|
1133
|
-
|
1134
|
-
|
1135
|
-
|
1136
|
-
|
1137
|
-
|
1138
|
-
|
1139
|
-
self->next = &rxsc_next;
|
1140
|
-
self->skip_to = &rxsc_skip_to;
|
1141
|
-
self->explain = &rxsc_explain;
|
1142
|
-
self->destroy = &rxsc_destroy;
|
1230
|
+
int i;
|
1231
|
+
Weight *self = w_new(BooleanWeight, query);
|
1232
|
+
|
1233
|
+
BW(self)->w_cnt = BQ(query)->clause_cnt;
|
1234
|
+
BW(self)->weights = ALLOC_N(Weight *, BW(self)->w_cnt);
|
1235
|
+
for (i = 0; i < BW(self)->w_cnt; i++) {
|
1236
|
+
BW(self)->weights[i] = q_weight(BQ(query)->clauses[i]->query, searcher);
|
1237
|
+
}
|
1143
1238
|
|
1144
|
-
|
1239
|
+
self->normalize = &bw_normalize;
|
1240
|
+
self->scorer = &bw_scorer;
|
1241
|
+
self->explain = &bw_explain;
|
1242
|
+
self->to_s = &bw_to_s;
|
1243
|
+
self->destroy = &bw_destroy;
|
1244
|
+
self->sum_of_squared_weights = &bw_sum_of_squared_weights;
|
1245
|
+
|
1246
|
+
self->similarity = query->get_similarity(query, searcher);
|
1247
|
+
self->value = query->boost;
|
1248
|
+
|
1249
|
+
return self;
|
1145
1250
|
}
|
1146
1251
|
|
1147
1252
|
/***************************************************************************
|
1148
|
-
*
|
1253
|
+
*
|
1254
|
+
* BooleanClause
|
1255
|
+
*
|
1149
1256
|
***************************************************************************/
|
1150
1257
|
|
1151
|
-
|
1152
|
-
{
|
1153
|
-
|
1258
|
+
void bc_set_occur(BooleanClause *self, enum BC_TYPE occur)
|
1259
|
+
{
|
1260
|
+
self->occur = occur;
|
1261
|
+
switch (occur) {
|
1262
|
+
case BC_SHOULD:
|
1263
|
+
self->is_prohibited = false;
|
1264
|
+
self->is_required = false;
|
1265
|
+
break;
|
1266
|
+
case BC_MUST:
|
1267
|
+
self->is_prohibited = false;
|
1268
|
+
self->is_required = true;
|
1269
|
+
break;
|
1270
|
+
case BC_MUST_NOT:
|
1271
|
+
self->is_prohibited = true;
|
1272
|
+
self->is_required = false;
|
1273
|
+
break;
|
1274
|
+
default:
|
1275
|
+
RAISE(ARG_ERROR, "Invalid value for :occur. Try :occur => :should, "
|
1276
|
+
":must or :must_not instead");
|
1277
|
+
}
|
1154
1278
|
}
|
1155
1279
|
|
1156
|
-
|
1280
|
+
void bc_deref(BooleanClause *self)
|
1157
1281
|
{
|
1158
|
-
|
1282
|
+
if (--self->ref_cnt <= 0) {
|
1283
|
+
q_deref(self->query);
|
1284
|
+
free(self);
|
1285
|
+
}
|
1159
1286
|
}
|
1160
1287
|
|
1161
|
-
|
1288
|
+
static ulong bc_hash(BooleanClause *self)
|
1162
1289
|
{
|
1163
|
-
|
1290
|
+
return ((q_hash(self->query) << 2) | self->occur);
|
1164
1291
|
}
|
1165
1292
|
|
1166
|
-
|
1293
|
+
static int bc_eq(BooleanClause *self, BooleanClause *o)
|
1167
1294
|
{
|
1168
|
-
|
1295
|
+
return ((self->occur == o->occur) && q_eq(self->query, o->query));
|
1169
1296
|
}
|
1170
1297
|
|
1171
|
-
|
1298
|
+
BooleanClause *bc_new(Query *query, enum BC_TYPE occur)
|
1172
1299
|
{
|
1173
|
-
|
1174
|
-
|
1175
|
-
|
1176
|
-
|
1177
|
-
|
1178
|
-
|
1179
|
-
return self;
|
1300
|
+
BooleanClause *self = ALLOC(BooleanClause);
|
1301
|
+
self->ref_cnt = 1;
|
1302
|
+
self->query = query;
|
1303
|
+
bc_set_occur(self, occur);
|
1304
|
+
return self;
|
1180
1305
|
}
|
1181
1306
|
|
1182
|
-
|
1183
1307
|
/***************************************************************************
|
1184
|
-
*
|
1308
|
+
*
|
1309
|
+
* BooleanQuery
|
1310
|
+
*
|
1185
1311
|
***************************************************************************/
|
1186
1312
|
|
1187
|
-
|
1188
|
-
|
1313
|
+
static MatchVector *bq_get_matchv_i(Query *self, MatchVector *mv,
|
1314
|
+
TermVector *tv)
|
1189
1315
|
{
|
1190
|
-
|
1191
|
-
|
1192
|
-
|
1193
|
-
|
1194
|
-
|
1195
|
-
|
1196
|
-
|
1197
|
-
return
|
1198
|
-
req_excl_scorer_create(req_scorer,
|
1199
|
-
disjunction_sum_scorer_create(bsc->prohibited_scorers, bsc->ps_cnt, 1)),
|
1200
|
-
opt_scorer);
|
1201
|
-
}
|
1316
|
+
int i;
|
1317
|
+
for (i = BQ(self)->clause_cnt - 1; i >= 0; i--) {
|
1318
|
+
if (BQ(self)->clauses[i]->occur != BC_MUST_NOT) {
|
1319
|
+
Query *q = BQ(self)->clauses[i]->query;
|
1320
|
+
q->get_matchv_i(q, mv, tv);
|
1321
|
+
}
|
1322
|
+
}
|
1323
|
+
return mv;
|
1202
1324
|
}
|
1203
1325
|
|
1204
|
-
|
1205
|
-
Scorer **optional_scorers, int os_cnt)
|
1326
|
+
static Query *bq_rewrite(Query *self, IndexReader *ir)
|
1206
1327
|
{
|
1207
|
-
|
1208
|
-
|
1209
|
-
|
1210
|
-
|
1211
|
-
|
1212
|
-
|
1213
|
-
|
1214
|
-
|
1215
|
-
|
1216
|
-
|
1217
|
-
|
1218
|
-
|
1219
|
-
|
1220
|
-
|
1221
|
-
|
1222
|
-
|
1223
|
-
|
1224
|
-
|
1225
|
-
|
1226
|
-
|
1227
|
-
|
1228
|
-
|
1229
|
-
|
1230
|
-
|
1231
|
-
|
1232
|
-
|
1233
|
-
|
1234
|
-
|
1235
|
-
|
1236
|
-
|
1237
|
-
|
1238
|
-
|
1239
|
-
|
1240
|
-
|
1241
|
-
|
1242
|
-
|
1243
|
-
|
1244
|
-
|
1245
|
-
|
1246
|
-
|
1247
|
-
|
1248
|
-
|
1249
|
-
|
1250
|
-
|
1251
|
-
|
1252
|
-
|
1253
|
-
|
1254
|
-
|
1255
|
-
|
1256
|
-
|
1257
|
-
|
1258
|
-
|
1259
|
-
|
1260
|
-
|
1261
|
-
|
1262
|
-
|
1263
|
-
|
1264
|
-
|
1265
|
-
|
1266
|
-
|
1267
|
-
|
1268
|
-
|
1269
|
-
|
1270
|
-
|
1271
|
-
|
1272
|
-
|
1273
|
-
|
1274
|
-
|
1275
|
-
BooleanScorer *bsc = (BooleanScorer *)self->data;
|
1276
|
-
if (occur != BC_MUST_NOT) {
|
1277
|
-
bsc->coordinator->max_coord++;
|
1278
|
-
}
|
1279
|
-
|
1280
|
-
switch (occur) {
|
1281
|
-
case BC_MUST:
|
1282
|
-
RECAPA(bsc, rs_cnt, rs_capa, required_scorers, Scorer *);
|
1283
|
-
bsc->required_scorers[bsc->rs_cnt++] = scorer;
|
1284
|
-
break;
|
1285
|
-
case BC_SHOULD:
|
1286
|
-
RECAPA(bsc, os_cnt, os_capa, optional_scorers, Scorer *);
|
1287
|
-
bsc->optional_scorers[bsc->os_cnt++] = scorer;
|
1288
|
-
break;
|
1289
|
-
case BC_MUST_NOT:
|
1290
|
-
RECAPA(bsc, ps_cnt, ps_capa, prohibited_scorers, Scorer *);
|
1291
|
-
bsc->prohibited_scorers[bsc->ps_cnt++] = scorer;
|
1292
|
-
break;
|
1293
|
-
default:
|
1294
|
-
RAISE(ARG_ERROR, UNKNOWN_OCCUR_VAL_ERROR_MSG);
|
1295
|
-
}
|
1296
|
-
}
|
1297
|
-
|
1298
|
-
float bsc_score(Scorer *self)
|
1299
|
-
{
|
1300
|
-
BooleanScorer *bsc = (BooleanScorer *)self->data;
|
1301
|
-
Coordinator *coord = bsc->coordinator;
|
1302
|
-
float sum;
|
1303
|
-
coord->num_matches = 0;
|
1304
|
-
sum = bsc->counting_sum_scorer->score(bsc->counting_sum_scorer);
|
1305
|
-
return sum * coord->coord_factors[coord->num_matches];
|
1306
|
-
}
|
1307
|
-
|
1308
|
-
bool bsc_next(Scorer *self)
|
1309
|
-
{
|
1310
|
-
BooleanScorer *bsc = (BooleanScorer *)self->data;
|
1311
|
-
|
1312
|
-
if (!bsc->counting_sum_scorer) {
|
1313
|
-
bsc_init_counting_sum_scorer(bsc);
|
1314
|
-
}
|
1315
|
-
if (bsc->counting_sum_scorer->next(bsc->counting_sum_scorer)) {
|
1316
|
-
self->doc = bsc->counting_sum_scorer->doc;
|
1317
|
-
return true;
|
1318
|
-
} else {
|
1319
|
-
return false;
|
1320
|
-
}
|
1328
|
+
int i;
|
1329
|
+
|
1330
|
+
bool rewritten = false;
|
1331
|
+
|
1332
|
+
if (BQ(self)->clause_cnt == 1) {
|
1333
|
+
/* optimize 1-clause queries */
|
1334
|
+
BooleanClause *clause = BQ(self)->clauses[0];
|
1335
|
+
if (! clause->is_prohibited) {
|
1336
|
+
/* just return clause. Re-write first. */
|
1337
|
+
Query *q = clause->query->rewrite(clause->query, ir);
|
1338
|
+
|
1339
|
+
if (self->boost != 1.0) {
|
1340
|
+
/* original_boost is initialized to 0.0. If it has been set to
|
1341
|
+
* something else it means this query has already been boosted
|
1342
|
+
* before so boost from the original value */
|
1343
|
+
if ((q == clause->query) && BQ(self)->original_boost) {
|
1344
|
+
/* rewrite was no-op */
|
1345
|
+
q->boost = BQ(self)->original_boost * self->boost;
|
1346
|
+
}
|
1347
|
+
else {
|
1348
|
+
/* save original boost in case query is rewritten again */
|
1349
|
+
BQ(self)->original_boost = q->boost;
|
1350
|
+
q->boost *= self->boost;
|
1351
|
+
}
|
1352
|
+
}
|
1353
|
+
|
1354
|
+
return q;
|
1355
|
+
}
|
1356
|
+
}
|
1357
|
+
|
1358
|
+
self->ref_cnt++;
|
1359
|
+
/* replace each clause's query with its rewritten query */
|
1360
|
+
for (i = 0; i < BQ(self)->clause_cnt; i++) {
|
1361
|
+
BooleanClause *clause = BQ(self)->clauses[i];
|
1362
|
+
Query *rq = clause->query->rewrite(clause->query, ir);
|
1363
|
+
if (rq != clause->query) {
|
1364
|
+
if (!rewritten) {
|
1365
|
+
int j;
|
1366
|
+
Query *new_self = q_new(BooleanQuery);
|
1367
|
+
memcpy(new_self, self, sizeof(BooleanQuery));
|
1368
|
+
BQ(new_self)->clauses = ALLOC_N(BooleanClause *, BQ(self)->clause_capa);
|
1369
|
+
memcpy(BQ(new_self)->clauses, BQ(self)->clauses,
|
1370
|
+
BQ(self)->clause_capa * sizeof(BooleanClause *));
|
1371
|
+
for (j = 0; j < BQ(self)->clause_cnt; j++) {
|
1372
|
+
REF(BQ(self)->clauses[j]);
|
1373
|
+
}
|
1374
|
+
self->ref_cnt--;
|
1375
|
+
self = new_self;
|
1376
|
+
self->ref_cnt = 1;
|
1377
|
+
rewritten = true;
|
1378
|
+
}
|
1379
|
+
DEREF(clause);
|
1380
|
+
BQ(self)->clauses[i] = bc_new(rq, clause->occur);
|
1381
|
+
} else {
|
1382
|
+
DEREF(rq);
|
1383
|
+
}
|
1384
|
+
}
|
1385
|
+
|
1386
|
+
return self;
|
1387
|
+
}
|
1388
|
+
|
1389
|
+
static void bq_extract_terms(Query *self, HashSet *terms)
|
1390
|
+
{
|
1391
|
+
int i;
|
1392
|
+
for (i = 0; i < BQ(self)->clause_cnt; i++) {
|
1393
|
+
BooleanClause *clause = BQ(self)->clauses[i];
|
1394
|
+
clause->query->extract_terms(clause->query, terms);
|
1395
|
+
}
|
1321
1396
|
}
|
1322
1397
|
|
1323
|
-
|
1398
|
+
static char *bq_to_s(Query *self, const char *field)
|
1324
1399
|
{
|
1325
|
-
|
1400
|
+
int i;
|
1401
|
+
BooleanClause *clause;
|
1402
|
+
Query *sub_query;
|
1403
|
+
char *buffer;
|
1404
|
+
char *clause_str;
|
1405
|
+
int bp = 0;
|
1406
|
+
int size = QUERY_STRING_START_SIZE;
|
1407
|
+
int needed;
|
1408
|
+
int clause_len;
|
1409
|
+
|
1410
|
+
buffer = ALLOC_N(char, size);
|
1411
|
+
if (self->boost != 1.0) {
|
1412
|
+
buffer[0] = '(';
|
1413
|
+
bp++;
|
1414
|
+
}
|
1326
1415
|
|
1327
|
-
|
1328
|
-
|
1329
|
-
|
1330
|
-
|
1331
|
-
|
1332
|
-
|
1333
|
-
|
1334
|
-
|
1335
|
-
|
1416
|
+
for (i = 0; i < BQ(self)->clause_cnt; i++) {
|
1417
|
+
clause = BQ(self)->clauses[i];
|
1418
|
+
clause_str = clause->query->to_s(clause->query, field);
|
1419
|
+
clause_len = (int)strlen(clause_str);
|
1420
|
+
needed = clause_len + 5;
|
1421
|
+
while ((size - bp) < needed) {
|
1422
|
+
size *= 2;
|
1423
|
+
REALLOC_N(buffer, char, size);
|
1424
|
+
}
|
1425
|
+
|
1426
|
+
if (i > 0) {
|
1427
|
+
buffer[bp++] = ' ';
|
1428
|
+
}
|
1429
|
+
if (clause->is_prohibited) {
|
1430
|
+
buffer[bp++] = '-';
|
1431
|
+
}
|
1432
|
+
else if (clause->is_required) {
|
1433
|
+
buffer[bp++] = '+';
|
1434
|
+
}
|
1435
|
+
|
1436
|
+
sub_query = clause->query;
|
1437
|
+
if (sub_query->type == BOOLEAN_QUERY) {
|
1438
|
+
/* wrap sub-bools in parens */
|
1439
|
+
buffer[bp++] = '(';
|
1440
|
+
memcpy(buffer + bp, clause_str, sizeof(char) * clause_len);
|
1441
|
+
bp += clause_len;
|
1442
|
+
buffer[bp++] = ')';
|
1443
|
+
}
|
1444
|
+
else {
|
1445
|
+
memcpy(buffer + bp, clause_str, sizeof(char) * clause_len);
|
1446
|
+
bp += clause_len;
|
1447
|
+
}
|
1448
|
+
free(clause_str);
|
1449
|
+
}
|
1450
|
+
|
1451
|
+
if (self->boost != 1.0) {
|
1452
|
+
char *boost_str = strfmt(")^%f", self->boost);
|
1453
|
+
int boost_len = (int)strlen(boost_str);
|
1454
|
+
REALLOC_N(buffer, char, bp + boost_len + 1);
|
1455
|
+
memcpy(buffer + bp, boost_str, sizeof(char) * boost_len);
|
1456
|
+
bp += boost_len;
|
1457
|
+
free(boost_str);
|
1458
|
+
}
|
1459
|
+
buffer[bp] = 0;
|
1460
|
+
return buffer;
|
1461
|
+
}
|
1462
|
+
|
1463
|
+
static void bq_destroy(Query *self)
|
1464
|
+
{
|
1465
|
+
int i;
|
1466
|
+
for (i = 0; i < BQ(self)->clause_cnt; i++) {
|
1467
|
+
bc_deref(BQ(self)->clauses[i]);
|
1468
|
+
}
|
1469
|
+
free(BQ(self)->clauses);
|
1470
|
+
if (BQ(self)->similarity) {
|
1471
|
+
BQ(self)->similarity->destroy(BQ(self)->similarity);
|
1472
|
+
}
|
1473
|
+
q_destroy_i(self);
|
1474
|
+
}
|
1475
|
+
|
1476
|
+
static float bq_coord_disabled(Similarity *sim, int overlap, int max_overlap)
|
1477
|
+
{
|
1478
|
+
(void)sim; (void)overlap; (void)max_overlap;
|
1479
|
+
return 1.0;
|
1336
1480
|
}
|
1337
1481
|
|
1338
|
-
|
1482
|
+
static Similarity *bq_get_similarity(Query *self, Searcher *searcher)
|
1339
1483
|
{
|
1340
|
-
|
1341
|
-
|
1484
|
+
if (!BQ(self)->similarity) {
|
1485
|
+
Similarity *sim = q_get_similarity_i(self, searcher);
|
1486
|
+
BQ(self)->similarity = ALLOC(Similarity);
|
1487
|
+
memcpy(BQ(self)->similarity, sim, sizeof(Similarity));
|
1488
|
+
BQ(self)->similarity->coord = &bq_coord_disabled;
|
1489
|
+
BQ(self)->similarity->destroy = (void (*)(Similarity *))&free;
|
1490
|
+
}
|
1342
1491
|
|
1343
|
-
|
1344
|
-
|
1492
|
+
return BQ(self)->similarity;
|
1493
|
+
}
|
1345
1494
|
|
1346
|
-
|
1347
|
-
|
1348
|
-
} else {
|
1495
|
+
static ulong bq_hash(Query *self)
|
1496
|
+
{
|
1349
1497
|
int i;
|
1350
|
-
|
1351
|
-
|
1498
|
+
ulong hash = 0;
|
1499
|
+
for (i = 0; i < BQ(self)->clause_cnt; i++) {
|
1500
|
+
hash ^= bc_hash(BQ(self)->clauses[i]);
|
1352
1501
|
}
|
1353
|
-
|
1354
|
-
|
1355
|
-
|
1502
|
+
return (hash << 1) | BQ(self)->coord_disabled;
|
1503
|
+
}
|
1504
|
+
|
1505
|
+
static int bq_eq(Query *self, Query *o)
|
1506
|
+
{
|
1507
|
+
int i;
|
1508
|
+
BooleanQuery *bq1 = BQ(self);
|
1509
|
+
BooleanQuery *bq2 = BQ(o);
|
1510
|
+
if ((bq1->coord_disabled != bq2->coord_disabled)
|
1511
|
+
|| (bq1->max_clause_cnt != bq1->max_clause_cnt)
|
1512
|
+
|| (bq1->clause_cnt != bq2->clause_cnt)) {
|
1513
|
+
return false;
|
1356
1514
|
}
|
1357
1515
|
|
1358
|
-
for (i = 0; i <
|
1359
|
-
|
1516
|
+
for (i = 0; i < bq1->clause_cnt; i++) {
|
1517
|
+
if (!bc_eq(bq1->clauses[i], bq2->clauses[i])) {
|
1518
|
+
return false;
|
1519
|
+
}
|
1360
1520
|
}
|
1361
|
-
|
1362
|
-
free(bsc->required_scorers);
|
1363
|
-
free(bsc->optional_scorers);
|
1364
|
-
free(bsc->prohibited_scorers);
|
1365
|
-
scorer_destroy_i(self);
|
1521
|
+
return true;
|
1366
1522
|
}
|
1367
1523
|
|
1368
|
-
|
1524
|
+
Query *bq_new(bool coord_disabled)
|
1369
1525
|
{
|
1370
|
-
|
1526
|
+
Query *self = q_new(BooleanQuery);
|
1527
|
+
BQ(self)->coord_disabled = coord_disabled;
|
1528
|
+
if (coord_disabled) {
|
1529
|
+
self->get_similarity = &bq_get_similarity;
|
1530
|
+
}
|
1531
|
+
BQ(self)->max_clause_cnt = DEFAULT_MAX_CLAUSE_COUNT;
|
1532
|
+
BQ(self)->clause_cnt = 0;
|
1533
|
+
BQ(self)->clause_capa = BOOLEAN_CLAUSES_START_CAPA;
|
1534
|
+
BQ(self)->clauses = ALLOC_N(BooleanClause *, BOOLEAN_CLAUSES_START_CAPA);
|
1535
|
+
BQ(self)->similarity = NULL;
|
1536
|
+
BQ(self)->original_boost = 0.0;
|
1537
|
+
|
1538
|
+
self->type = BOOLEAN_QUERY;
|
1539
|
+
self->rewrite = &bq_rewrite;
|
1540
|
+
self->extract_terms = &bq_extract_terms;
|
1541
|
+
self->to_s = &bq_to_s;
|
1542
|
+
self->hash = &bq_hash;
|
1543
|
+
self->eq = &bq_eq;
|
1544
|
+
self->destroy_i = &bq_destroy;
|
1545
|
+
self->create_weight_i = &bw_new;
|
1546
|
+
self->get_matchv_i = &bq_get_matchv_i;
|
1547
|
+
|
1548
|
+
return self;
|
1549
|
+
}
|
1550
|
+
|
1551
|
+
BooleanClause *bq_add_clause_nr(Query *self, BooleanClause *bc)
|
1552
|
+
{
|
1553
|
+
if (BQ(self)->clause_cnt >= BQ(self)->clause_capa) {
|
1554
|
+
BQ(self)->clause_capa *= 2;
|
1555
|
+
REALLOC_N(BQ(self)->clauses, BooleanClause *, BQ(self)->clause_capa);
|
1556
|
+
}
|
1557
|
+
if (BQ(self)->clause_cnt > BQ(self)->max_clause_cnt) {
|
1558
|
+
RAISE(STATE_ERROR, "Two many clauses. The max clause limit is set to "
|
1559
|
+
"<%d> but your query has <%d> clauses. You can try increasing "
|
1560
|
+
":max_clause_count for the BooleanQuery or using a different "
|
1561
|
+
"type of query.", BQ(self)->clause_cnt, BQ(self)->max_clause_cnt);
|
1562
|
+
}
|
1563
|
+
BQ(self)->clauses[BQ(self)->clause_cnt] = bc;
|
1564
|
+
BQ(self)->clause_cnt++;
|
1565
|
+
return bc;
|
1371
1566
|
}
|
1372
1567
|
|
1373
|
-
|
1568
|
+
BooleanClause *bq_add_clause(Query *self, BooleanClause *bc)
|
1374
1569
|
{
|
1375
|
-
|
1376
|
-
|
1377
|
-
|
1378
|
-
bsc->coordinator = coord_create(similarity);
|
1379
|
-
bsc->counting_sum_scorer = NULL;
|
1380
|
-
self->data = bsc;
|
1570
|
+
REF(bc);
|
1571
|
+
return bq_add_clause_nr(self, bc);
|
1572
|
+
}
|
1381
1573
|
|
1382
|
-
|
1383
|
-
|
1384
|
-
|
1385
|
-
|
1386
|
-
|
1387
|
-
|
1574
|
+
BooleanClause *bq_add_query_nr(Query *self, Query *sub_query, enum BC_TYPE occur)
|
1575
|
+
{
|
1576
|
+
BooleanClause *bc = bc_new(sub_query, occur);
|
1577
|
+
bq_add_clause(self, bc);
|
1578
|
+
bc_deref(bc); /* bc would have been referenced unnecessarily */
|
1579
|
+
return bc;
|
1388
1580
|
}
|
1389
1581
|
|
1582
|
+
BooleanClause *bq_add_query(Query *self, Query *sub_query, enum BC_TYPE occur)
|
1583
|
+
{
|
1584
|
+
REF(sub_query);
|
1585
|
+
return bq_add_query_nr(self, sub_query, occur);
|
1586
|
+
}
|
1390
1587
|
|