ferret 0.9.1 → 0.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +6 -5
- data/Rakefile +34 -13
- data/TODO +1 -0
- data/TUTORIAL +1 -1
- data/ext/analysis.c +87 -70
- data/ext/analysis.h +18 -6
- data/ext/array.c +1 -2
- data/ext/array.h +1 -1
- data/ext/bitvector.c +10 -6
- data/ext/bitvector.h +2 -2
- data/ext/compound_io.c +30 -27
- data/ext/document.c +15 -15
- data/ext/document.h +5 -5
- data/ext/except.c +2 -0
- data/ext/except.h +25 -23
- data/ext/extconf.rb +1 -0
- data/ext/ferret.c +10 -8
- data/ext/ferret.h +9 -8
- data/ext/field.c +29 -25
- data/ext/filter.c +52 -14
- data/ext/frtio.h +13 -0
- data/ext/fs_store.c +115 -170
- data/ext/global.c +9 -8
- data/ext/global.h +17 -13
- data/ext/hash.c +13 -19
- data/ext/hash.h +11 -11
- data/ext/hashset.c +5 -7
- data/ext/hashset.h +9 -8
- data/ext/helper.c +1 -1
- data/ext/helper.h +2 -1
- data/ext/inc/except.h +25 -23
- data/ext/inc/lang.h +11 -1
- data/ext/ind.c +33 -21
- data/ext/index.h +44 -39
- data/ext/index_io.c +61 -57
- data/ext/index_rw.c +418 -361
- data/ext/lang.c +10 -0
- data/ext/lang.h +11 -1
- data/ext/nix_io.c +135 -0
- data/ext/priorityqueue.c +16 -16
- data/ext/priorityqueue.h +9 -6
- data/ext/q_boolean.c +128 -76
- data/ext/q_const_score.c +20 -20
- data/ext/q_filtered_query.c +20 -20
- data/ext/q_fuzzy.c +37 -23
- data/ext/q_match_all.c +15 -19
- data/ext/q_multi_phrase.c +87 -46
- data/ext/q_parser.c +247 -119
- data/ext/q_phrase.c +86 -52
- data/ext/q_prefix.c +25 -14
- data/ext/q_range.c +59 -14
- data/ext/q_span.c +263 -172
- data/ext/q_term.c +62 -51
- data/ext/q_wildcard.c +24 -13
- data/ext/r_analysis.c +328 -80
- data/ext/r_doc.c +11 -6
- data/ext/r_index_io.c +40 -32
- data/ext/r_qparser.c +15 -14
- data/ext/r_search.c +270 -152
- data/ext/r_store.c +32 -17
- data/ext/ram_store.c +38 -22
- data/ext/search.c +617 -87
- data/ext/search.h +227 -163
- data/ext/similarity.c +54 -45
- data/ext/similarity.h +3 -3
- data/ext/sort.c +132 -53
- data/ext/store.c +21 -2
- data/ext/store.h +14 -14
- data/ext/tags +4322 -232
- data/ext/term.c +140 -109
- data/ext/termdocs.c +74 -60
- data/ext/vector.c +181 -152
- data/ext/w32_io.c +150 -0
- data/lib/ferret.rb +1 -1
- data/lib/ferret/analysis/standard_tokenizer.rb +4 -3
- data/lib/ferret/document/field.rb +1 -1
- data/lib/ferret/index/field_infos.rb +1 -1
- data/lib/ferret/index/term.rb +1 -1
- data/lib/ferret/query_parser/query_parser.tab.rb +8 -24
- data/lib/ferret/search.rb +1 -0
- data/lib/ferret/search/boolean_query.rb +0 -4
- data/lib/ferret/search/index_searcher.rb +21 -8
- data/lib/ferret/search/multi_phrase_query.rb +7 -0
- data/lib/ferret/search/multi_searcher.rb +261 -0
- data/lib/ferret/search/phrase_query.rb +1 -1
- data/lib/ferret/search/query.rb +34 -5
- data/lib/ferret/search/sort.rb +7 -3
- data/lib/ferret/search/sort_field.rb +8 -4
- data/lib/ferret/store/fs_store.rb +13 -6
- data/lib/ferret/store/index_io.rb +0 -14
- data/lib/ferret/store/ram_store.rb +3 -2
- data/lib/rferret.rb +1 -1
- data/test/unit/analysis/ctc_analyzer.rb +131 -0
- data/test/unit/analysis/ctc_tokenstream.rb +98 -9
- data/test/unit/index/tc_index.rb +40 -1
- data/test/unit/index/tc_term.rb +7 -0
- data/test/unit/index/th_doc.rb +8 -0
- data/test/unit/query_parser/tc_query_parser.rb +6 -4
- data/test/unit/search/rtc_sort_field.rb +6 -6
- data/test/unit/search/tc_index_searcher.rb +8 -0
- data/test/unit/search/tc_multi_searcher.rb +275 -0
- data/test/unit/search/tc_multi_searcher2.rb +126 -0
- data/test/unit/search/tc_search_and_sort.rb +66 -0
- metadata +31 -26
- data/test/unit/query_parser/rtc_query_parser.rb +0 -138
data/ext/lang.c
CHANGED
@@ -29,3 +29,13 @@ void ft_raise(char *file, int line_num, VALUE etype, const char *fmt, ...)
|
|
29
29
|
sprintf(buf_ptr, "\n");
|
30
30
|
rb_raise(etype, buf); /* conventional value for failed execution */
|
31
31
|
}
|
32
|
+
|
33
|
+
#ifdef WIN32
|
34
|
+
void eprintf(VALUE etype, const char *fmt, ...)
|
35
|
+
{
|
36
|
+
va_list args;
|
37
|
+
va_start(args, fmt);
|
38
|
+
ft_raise("Windows", -1, etype, fmt, args);
|
39
|
+
va_end(args);
|
40
|
+
}
|
41
|
+
#endif
|
data/ext/lang.h
CHANGED
@@ -7,7 +7,17 @@
|
|
7
7
|
#define FERRET_EXT
|
8
8
|
|
9
9
|
#define MAX_ERROR_LEN 2048
|
10
|
-
|
10
|
+
|
11
|
+
typedef LONG_LONG llong;
|
12
|
+
typedef unsigned LONG_LONG ullong;
|
13
|
+
|
14
|
+
#ifdef WIN32
|
15
|
+
# undef close
|
16
|
+
# undef rename
|
17
|
+
extern void eprintf(VALUE etype, const char *fmt, ...);
|
18
|
+
#else
|
19
|
+
# define eprintf(...) ft_raise(__FILE__, __LINE__, __VA_ARGS__)
|
20
|
+
#endif
|
11
21
|
extern void ft_raise(char *file, int line_num, VALUE etype, const char *fmt, ...);
|
12
22
|
extern void weprintf(const char *fmt, ...);
|
13
23
|
extern char *progname(void);
|
data/ext/nix_io.c
ADDED
@@ -0,0 +1,135 @@
|
|
1
|
+
#ifndef WIN32
|
2
|
+
|
3
|
+
#include "global.h"
|
4
|
+
#include "store.h"
|
5
|
+
#include <sys/dir.h>
|
6
|
+
#include <dirent.h>
|
7
|
+
#include <unistd.h>
|
8
|
+
#include <string.h>
|
9
|
+
#include <errno.h>
|
10
|
+
#include <fcntl.h>
|
11
|
+
|
12
|
+
/**
|
13
|
+
* Create a filepath for a file in the store using the operating systems
|
14
|
+
* default file seperator.
|
15
|
+
*/
|
16
|
+
char *join_path(char *buf, const char *base, const char *filename)
|
17
|
+
{
|
18
|
+
sprintf(buf, "%s/%s", base, filename);
|
19
|
+
return buf;
|
20
|
+
}
|
21
|
+
|
22
|
+
bool exists(char *path)
|
23
|
+
{
|
24
|
+
int fd = open(path, 0);
|
25
|
+
if (fd < 0) {
|
26
|
+
if (errno != ENOENT) {
|
27
|
+
RAISE(IO_ERROR, strerror(errno));
|
28
|
+
}
|
29
|
+
return false;
|
30
|
+
}
|
31
|
+
close(fd);
|
32
|
+
return true;
|
33
|
+
}
|
34
|
+
|
35
|
+
int fcount(char *path)
|
36
|
+
{
|
37
|
+
int cnt = 0;
|
38
|
+
struct dirent *de;
|
39
|
+
DIR *d = opendir(path);
|
40
|
+
|
41
|
+
if (!d) RAISE(IO_ERROR, strerror(errno));
|
42
|
+
|
43
|
+
while ((de = readdir(d)) != NULL) {
|
44
|
+
if (de->d_name[0] != '.') {
|
45
|
+
cnt++;
|
46
|
+
}
|
47
|
+
}
|
48
|
+
closedir(d);
|
49
|
+
|
50
|
+
return cnt;
|
51
|
+
}
|
52
|
+
|
53
|
+
void dir_each(char *path, void (*func)(char *fname, void *arg), void *arg)
|
54
|
+
{
|
55
|
+
struct dirent *de;
|
56
|
+
DIR *d = opendir(path);
|
57
|
+
|
58
|
+
if (!d) RAISE(IO_ERROR, strerror(errno));
|
59
|
+
|
60
|
+
while ((de = readdir(d)) != NULL) {
|
61
|
+
if (de->d_name[0] != '.' && !file_is_lock(de->d_name)) {
|
62
|
+
func(de->d_name, arg);
|
63
|
+
}
|
64
|
+
}
|
65
|
+
closedir(d);
|
66
|
+
}
|
67
|
+
|
68
|
+
/**
|
69
|
+
* Clear all the locks in the store.
|
70
|
+
*
|
71
|
+
* @param store the store to clear the locks from
|
72
|
+
* @throws IO_ERROR if there is an error opening the directory
|
73
|
+
*/
|
74
|
+
void fs_clear_locks(Store *store)
|
75
|
+
{
|
76
|
+
struct dirent *de;
|
77
|
+
DIR *d = opendir(store->dir.path);
|
78
|
+
|
79
|
+
if (!d) RAISE(IO_ERROR, strerror(errno));
|
80
|
+
|
81
|
+
while ((de = readdir(d)) != NULL) {
|
82
|
+
if (file_is_lock(de->d_name)) {
|
83
|
+
char buf[MAX_FILE_PATH];
|
84
|
+
remove(join_path(buf, store->dir.path, de->d_name));
|
85
|
+
}
|
86
|
+
}
|
87
|
+
closedir(d);
|
88
|
+
}
|
89
|
+
|
90
|
+
/**
|
91
|
+
* Clear all files from the store except the lock files.
|
92
|
+
*
|
93
|
+
* @param store the store to clear all the files from
|
94
|
+
* @throws IO_ERROR if there is an error deleting the files
|
95
|
+
*/
|
96
|
+
void fs_clear(Store *store)
|
97
|
+
{
|
98
|
+
struct dirent *de;
|
99
|
+
DIR *d = opendir(store->dir.path);
|
100
|
+
|
101
|
+
if (!d) RAISE(IO_ERROR, strerror(errno));
|
102
|
+
|
103
|
+
while ((de = readdir(d)) != NULL) {
|
104
|
+
if (de->d_name[0] != '.' && !file_is_lock(de->d_name)) {
|
105
|
+
char buf[MAX_FILE_PATH];
|
106
|
+
remove(join_path(buf, store->dir.path, de->d_name));
|
107
|
+
}
|
108
|
+
}
|
109
|
+
closedir(d);
|
110
|
+
}
|
111
|
+
|
112
|
+
/**
|
113
|
+
* Clear all files from the store including the lock files.
|
114
|
+
*
|
115
|
+
* @param store the store to clear all the files from
|
116
|
+
* @throws IO_ERROR if there is an error deleting the files
|
117
|
+
*/
|
118
|
+
void fs_clear_all(Store *store)
|
119
|
+
{
|
120
|
+
struct dirent *de;
|
121
|
+
DIR *d = opendir(store->dir.path);
|
122
|
+
|
123
|
+
if (!d) RAISE(IO_ERROR, strerror(errno));
|
124
|
+
|
125
|
+
while ((de = readdir(d)) != NULL) {
|
126
|
+
if (de->d_name[0] != '.') {
|
127
|
+
char buf[MAX_FILE_PATH];
|
128
|
+
remove(join_path(buf, store->dir.path, de->d_name));
|
129
|
+
}
|
130
|
+
}
|
131
|
+
closedir(d);
|
132
|
+
}
|
133
|
+
|
134
|
+
#endif
|
135
|
+
|
data/ext/priorityqueue.c
CHANGED
@@ -11,20 +11,20 @@ PriorityQueue *pq_create(int max_size, bool (*less_than)(void *p1, void *p2))
|
|
11
11
|
return pq;
|
12
12
|
}
|
13
13
|
|
14
|
-
void pq_destroy(
|
14
|
+
void pq_destroy(PriorityQueue *pq)
|
15
15
|
{
|
16
|
-
PriorityQueue *pq = (PriorityQueue *)p;
|
17
16
|
free(pq->heap);
|
18
|
-
free(
|
17
|
+
free(pq);
|
19
18
|
}
|
20
19
|
|
21
20
|
void pq_up(PriorityQueue *pq)
|
22
21
|
{
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
22
|
+
void **heap = pq->heap;
|
23
|
+
void *node;
|
24
|
+
int i = pq->count;
|
25
|
+
int j = i >> 1;
|
26
|
+
|
27
|
+
node = heap[i];
|
28
28
|
|
29
29
|
while ((j > 0) && pq->lt(node, heap[j])) {
|
30
30
|
heap[i] = heap[j];
|
@@ -117,7 +117,7 @@ int pq_insert(PriorityQueue *pq, void *elem)
|
|
117
117
|
|
118
118
|
PriorityQueue2 *pq2_create(int max_size,
|
119
119
|
bool (*less_than)(PriorityQueue2 *pq, void *p1, void *p2),
|
120
|
-
|
120
|
+
free_ft destroy)
|
121
121
|
{
|
122
122
|
PriorityQueue2 *pq = ALLOC(PriorityQueue2);
|
123
123
|
pq->count = 0;
|
@@ -129,20 +129,20 @@ PriorityQueue2 *pq2_create(int max_size,
|
|
129
129
|
return pq;
|
130
130
|
}
|
131
131
|
|
132
|
-
void pq2_destroy(
|
132
|
+
void pq2_destroy(PriorityQueue2 *pq)
|
133
133
|
{
|
134
|
-
PriorityQueue2 *pq = (PriorityQueue2 *)p;
|
135
134
|
free(pq->heap);
|
136
|
-
free(
|
135
|
+
free(pq);
|
137
136
|
}
|
138
137
|
|
139
138
|
void pq2_up(PriorityQueue2 *pq)
|
140
139
|
{
|
141
|
-
int i,j;
|
142
|
-
i = pq->count;
|
143
|
-
j = i >> 1;
|
144
140
|
void **heap = pq->heap;
|
145
|
-
void *node
|
141
|
+
void *node;
|
142
|
+
int i = pq->count;
|
143
|
+
int j = i >> 1;
|
144
|
+
|
145
|
+
node = heap[i];
|
146
146
|
|
147
147
|
while ((j > 0) && pq->lt(pq, node, heap[j])) {
|
148
148
|
heap[i] = heap[j];
|
data/ext/priorityqueue.h
CHANGED
@@ -3,16 +3,19 @@
|
|
3
3
|
|
4
4
|
#include "global.h"
|
5
5
|
|
6
|
+
typedef bool (*lt_ft)(void *p1, void *p2);
|
7
|
+
|
6
8
|
typedef struct PriorityQueue {
|
7
9
|
int count;
|
8
10
|
int size;
|
9
11
|
void **heap;
|
10
|
-
|
11
|
-
|
12
|
+
lt_ft lt;
|
13
|
+
//bool (*lt)(void *p1, void *p2);
|
14
|
+
free_ft free_elem;
|
12
15
|
} PriorityQueue;
|
13
16
|
|
14
17
|
PriorityQueue *pq_create(int max_size, bool (*less_than)(void *p1, void *p2));
|
15
|
-
void pq_destroy(
|
18
|
+
void pq_destroy(PriorityQueue *pq);
|
16
19
|
void pq_push(PriorityQueue *pq, void *elem);
|
17
20
|
void *pq_top(PriorityQueue *pq);
|
18
21
|
void *pq_pop(PriorityQueue *pq);
|
@@ -28,13 +31,13 @@ typedef struct PriorityQueue2 {
|
|
28
31
|
void *data;
|
29
32
|
bool (*lt)(struct PriorityQueue2 *pq, void *p1, void *p2);
|
30
33
|
void (*free_elem)(void *p);
|
31
|
-
|
34
|
+
free_ft destroy;
|
32
35
|
} PriorityQueue2;
|
33
36
|
|
34
37
|
PriorityQueue2 *pq2_create(int max_size,
|
35
38
|
bool (*less_than)(PriorityQueue2 *pq, void *p1, void *p2),
|
36
|
-
|
37
|
-
void pq2_destroy(
|
39
|
+
free_ft destroy);
|
40
|
+
void pq2_destroy(PriorityQueue2 *pq);
|
38
41
|
void pq2_push(PriorityQueue2 *pq, void *elem);
|
39
42
|
void *pq2_top(PriorityQueue2 *pq);
|
40
43
|
void *pq2_pop(PriorityQueue2 *pq);
|
data/ext/q_boolean.c
CHANGED
@@ -38,9 +38,9 @@ void bw_normalize(Weight *self, float normalization_factor)
|
|
38
38
|
{
|
39
39
|
BooleanWeight *bw = (BooleanWeight *)self->data;
|
40
40
|
BooleanQuery *bq = (BooleanQuery *)self->query->data;
|
41
|
-
normalization_factor *= self->value; // multiply by query boost
|
42
41
|
Weight *weight;
|
43
42
|
int i;
|
43
|
+
normalization_factor *= self->value; /* multiply by query boost */
|
44
44
|
|
45
45
|
for (i = 0; i < bw->w_cnt; i++) {
|
46
46
|
if (! bq->clauses[i]->is_prohibited) {
|
@@ -79,13 +79,18 @@ char *bw_to_s(Weight *self)
|
|
79
79
|
return strfmt("BooleanWeight(%f)", self->value);
|
80
80
|
}
|
81
81
|
|
82
|
-
void bw_destroy(
|
82
|
+
void bw_destroy(Weight *self)
|
83
83
|
{
|
84
|
-
|
85
|
-
BooleanWeight *bw = (BooleanWeight *)
|
84
|
+
int i;
|
85
|
+
BooleanWeight *bw = (BooleanWeight *)self->data;
|
86
|
+
|
87
|
+
for (i = 0; i < bw->w_cnt; i++) {
|
88
|
+
bw->weights[i]->destroy(bw->weights[i]);
|
89
|
+
}
|
90
|
+
|
86
91
|
free(bw->weights);
|
87
92
|
free(bw);
|
88
|
-
|
93
|
+
w_destroy(self);
|
89
94
|
}
|
90
95
|
|
91
96
|
Explanation *bw_explain(Weight *self, IndexReader *ir, int doc_num)
|
@@ -127,8 +132,8 @@ Explanation *bw_explain(Weight *self, IndexReader *ir, int doc_num)
|
|
127
132
|
}
|
128
133
|
sum_expl->value = sum;
|
129
134
|
|
130
|
-
if (coord == 1) {
|
131
|
-
explanation = sum_expl;
|
135
|
+
if (coord == 1) { /* only one clause matched */
|
136
|
+
explanation = sum_expl; /* eliminate wrapper */
|
132
137
|
sum_expl->dcnt = 0;
|
133
138
|
sum_expl = sum_expl->details[0];
|
134
139
|
expl_destoy(explanation);
|
@@ -136,8 +141,8 @@ Explanation *bw_explain(Weight *self, IndexReader *ir, int doc_num)
|
|
136
141
|
|
137
142
|
coord_factor = sim_coord(self->similarity, coord, max_coord);
|
138
143
|
|
139
|
-
if (coord_factor == 1.0) {
|
140
|
-
return sum_expl;
|
144
|
+
if (coord_factor == 1.0) { /* coord is no-op */
|
145
|
+
return sum_expl; /* eliminate wrapper */
|
141
146
|
} else {
|
142
147
|
explanation = expl_create(sum * coord_factor, estrdup("product of:"));
|
143
148
|
expl_add_detail(explanation, sum_expl);
|
@@ -150,13 +155,17 @@ Explanation *bw_explain(Weight *self, IndexReader *ir, int doc_num)
|
|
150
155
|
Weight *bw_create(Query *query, Searcher *searcher)
|
151
156
|
{
|
152
157
|
int i;
|
153
|
-
|
158
|
+
Weight *self = w_create(query);
|
154
159
|
BooleanWeight *bw = ALLOC(BooleanWeight);
|
155
|
-
|
156
|
-
|
160
|
+
BooleanQuery *bq = (BooleanQuery *)query->data;
|
161
|
+
|
162
|
+
bw->w_cnt = bq->clause_cnt;
|
163
|
+
bw->weights = ALLOC_N(Weight *, bw->w_cnt);
|
164
|
+
for (i = 0; i < bw->w_cnt; i++) {
|
165
|
+
bw->weights[i] = q_weight(bq->clauses[i]->query, searcher);
|
166
|
+
}
|
157
167
|
self->data = bw;
|
158
|
-
|
159
|
-
self->get_value = &w_get_value;
|
168
|
+
|
160
169
|
self->normalize = &bw_normalize;
|
161
170
|
self->scorer = &bw_scorer;
|
162
171
|
self->explain = &bw_explain;
|
@@ -165,15 +174,8 @@ Weight *bw_create(Query *query, Searcher *searcher)
|
|
165
174
|
self->sum_of_squared_weights = &bw_sum_of_squared_weights;
|
166
175
|
|
167
176
|
self->similarity = query->get_similarity(query, searcher);
|
168
|
-
self->query = query;
|
169
177
|
self->value = query->boost;
|
170
178
|
|
171
|
-
bw->w_cnt = bq->clause_cnt;
|
172
|
-
bw->weights = ALLOC_N(Weight *, bw->w_cnt);
|
173
|
-
for (i = 0; i < bw->w_cnt; i++) {
|
174
|
-
bw->weights[i] = q_weight(bq->clauses[i]->query, searcher);
|
175
|
-
}
|
176
|
-
|
177
179
|
return self;
|
178
180
|
}
|
179
181
|
|
@@ -204,15 +206,28 @@ void bc_set_occur(BooleanClause *self, unsigned int occur)
|
|
204
206
|
}
|
205
207
|
}
|
206
208
|
|
207
|
-
void
|
209
|
+
void bc_deref(BooleanClause *self)
|
210
|
+
{
|
211
|
+
if (--self->ref_cnt <= 0) {
|
212
|
+
q_deref(self->query);
|
213
|
+
free(self);
|
214
|
+
}
|
215
|
+
}
|
216
|
+
|
217
|
+
uint bc_hash(BooleanClause *self)
|
218
|
+
{
|
219
|
+
return ((q_hash(self->query) << 2) | self->occur);
|
220
|
+
}
|
221
|
+
|
222
|
+
int bc_eq(BooleanClause *self, BooleanClause *o)
|
208
223
|
{
|
209
|
-
self->
|
210
|
-
free(self);
|
224
|
+
return ((self->occur == o->occur) && q_eq(self->query, o->query));
|
211
225
|
}
|
212
226
|
|
213
227
|
BooleanClause *bc_create(Query *query, unsigned int occur)
|
214
228
|
{
|
215
229
|
BooleanClause *self = ALLOC(BooleanClause);
|
230
|
+
self->ref_cnt = 1;
|
216
231
|
self->query = query;
|
217
232
|
bc_set_occur(self, occur);
|
218
233
|
return self;
|
@@ -228,38 +243,44 @@ Query *bq_rewrite(Query *self, IndexReader *ir)
|
|
228
243
|
{
|
229
244
|
BooleanQuery *bq = (BooleanQuery *)self->data;
|
230
245
|
BooleanClause *clause;
|
231
|
-
Query *
|
246
|
+
Query *q, *rq;
|
232
247
|
int i;
|
233
248
|
|
234
249
|
if (bq->clause_cnt == 1) { // optimize 1-clause queries
|
235
250
|
clause = bq->clauses[0];
|
236
251
|
if (! clause->is_prohibited) { // just return clause
|
237
|
-
|
252
|
+
q = clause->query->rewrite(clause->query, ir); // rewrite first
|
238
253
|
|
239
254
|
if (self->boost != 1.0) {// incorporate boost
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
if ((
|
244
|
-
|
255
|
+
/* original_boost is initialized to 0.0. If it has been set to
|
256
|
+
* something else it means this query has already been boosted before
|
257
|
+
* so boost from the original value */
|
258
|
+
if ((q == clause->query) && bq->original_boost) {
|
259
|
+
/* rewrite was no-op */
|
260
|
+
q->boost = bq->original_boost * self->boost;
|
245
261
|
} else {
|
246
|
-
|
247
|
-
|
262
|
+
bq->original_boost = q->boost; /* save original boost */
|
263
|
+
q->boost *= self->boost;
|
248
264
|
}
|
249
265
|
}
|
250
266
|
|
251
|
-
return
|
267
|
+
return q;
|
252
268
|
}
|
253
269
|
}
|
254
270
|
|
271
|
+
/* replace each clause's query with its rewritten query */
|
255
272
|
for (i = 0; i < bq->clause_cnt; i++) {
|
256
273
|
clause = bq->clauses[i];
|
257
|
-
|
274
|
+
rq = clause->query->rewrite(clause->query, ir);
|
275
|
+
q_deref(clause->query);
|
276
|
+
clause->query = rq;
|
258
277
|
}
|
259
|
-
|
278
|
+
|
279
|
+
self->ref_cnt++;
|
280
|
+
return self;
|
260
281
|
}
|
261
282
|
|
262
|
-
void bq_extract_terms(Query *self,
|
283
|
+
void bq_extract_terms(Query *self, HashSet *terms)
|
263
284
|
{
|
264
285
|
BooleanQuery *bq = (BooleanQuery *)self->data;
|
265
286
|
BooleanClause *clause;
|
@@ -272,6 +293,7 @@ void bq_extract_terms(Query *self, Array *terms)
|
|
272
293
|
|
273
294
|
char *bq_to_s(Query *self, char *field)
|
274
295
|
{
|
296
|
+
int i;
|
275
297
|
BooleanQuery *bq = (BooleanQuery *)self->data;
|
276
298
|
BooleanClause *clause;
|
277
299
|
Query *sub_query;
|
@@ -288,11 +310,10 @@ char *bq_to_s(Query *self, char *field)
|
|
288
310
|
bp++;
|
289
311
|
}
|
290
312
|
|
291
|
-
int i;
|
292
313
|
for (i = 0; i < bq->clause_cnt; i++) {
|
293
314
|
clause = bq->clauses[i];
|
294
315
|
clause_str = clause->query->to_s(clause->query, field);
|
295
|
-
clause_len = strlen(clause_str);
|
316
|
+
clause_len = (int)strlen(clause_str);
|
296
317
|
needed = clause_len + 5;
|
297
318
|
while ((size - bp) < needed) {
|
298
319
|
size *= 2;
|
@@ -323,7 +344,7 @@ char *bq_to_s(Query *self, char *field)
|
|
323
344
|
|
324
345
|
if (self->boost != 1.0) {
|
325
346
|
char *boost_str = strfmt(")^%f", self->boost);
|
326
|
-
int boost_len = strlen(boost_str);
|
347
|
+
int boost_len = (int)strlen(boost_str);
|
327
348
|
REALLOC_N(buffer, char, bp + boost_len + 1);
|
328
349
|
memcpy(buffer + bp, boost_str, sizeof(char) * boost_len);
|
329
350
|
bp += boost_len;
|
@@ -333,22 +354,19 @@ char *bq_to_s(Query *self, char *field)
|
|
333
354
|
return buffer;
|
334
355
|
}
|
335
356
|
|
336
|
-
void bq_destroy(
|
357
|
+
static void bq_destroy(Query *self)
|
337
358
|
{
|
338
|
-
Query *self = (Query *)p;
|
339
359
|
BooleanQuery *bq = (BooleanQuery *)self->data;
|
340
360
|
int i;
|
341
|
-
|
342
|
-
|
343
|
-
bc_destroy(bq->clauses[i]);
|
344
|
-
}
|
361
|
+
for (i = 0; i < bq->clause_cnt; i++) {
|
362
|
+
bc_deref(bq->clauses[i]);
|
345
363
|
}
|
346
364
|
free(bq->clauses);
|
347
365
|
if (bq->similarity) {
|
348
366
|
bq->similarity->destroy(bq->similarity);
|
349
367
|
}
|
350
368
|
free(bq);
|
351
|
-
|
369
|
+
q_destroy_i(self);
|
352
370
|
}
|
353
371
|
|
354
372
|
float bq_coord_disabled(Similarity *sim, int overlap, int max_overlap)
|
@@ -360,27 +378,50 @@ Similarity *bq_get_similarity(Query *self, Searcher *searcher)
|
|
360
378
|
{
|
361
379
|
BooleanQuery *bq = (BooleanQuery *)self->data;
|
362
380
|
if (!bq->similarity) {
|
363
|
-
Similarity *sim =
|
381
|
+
Similarity *sim = q_get_similarity_i(self, searcher);
|
364
382
|
bq->similarity = ALLOC(Similarity);
|
365
383
|
memcpy(bq->similarity, sim, sizeof(Similarity));
|
366
384
|
bq->similarity->coord = &bq_coord_disabled;
|
367
|
-
bq->similarity->destroy = &free;
|
385
|
+
bq->similarity->destroy = (void (*)(Similarity *))&free;
|
368
386
|
}
|
369
387
|
|
370
388
|
return bq->similarity;
|
371
389
|
}
|
372
390
|
|
391
|
+
static uint bq_hash(Query *self)
|
392
|
+
{
|
393
|
+
int i;
|
394
|
+
uint hash = 0;
|
395
|
+
BooleanQuery *bq = (BooleanQuery *)self->data;
|
396
|
+
for (i = 0; i < bq->clause_cnt; i++) {
|
397
|
+
hash ^= bc_hash(bq->clauses[i]);
|
398
|
+
}
|
399
|
+
return (hash << 1) | bq->coord_disabled;
|
400
|
+
}
|
401
|
+
|
402
|
+
static int bq_eq(Query *self, Query *o)
|
403
|
+
{
|
404
|
+
int i;
|
405
|
+
BooleanQuery *bq1 = (BooleanQuery *)self->data;
|
406
|
+
BooleanQuery *bq2 = (BooleanQuery *)o->data;
|
407
|
+
if ((bq1->coord_disabled != bq2->coord_disabled) ||
|
408
|
+
(bq1->max_clause_cnt != bq1->max_clause_cnt) ||
|
409
|
+
(bq1->clause_cnt != bq2->clause_cnt)) {
|
410
|
+
return false;
|
411
|
+
}
|
412
|
+
|
413
|
+
for (i = 0; i < bq1->clause_cnt; i++) {
|
414
|
+
if (!bc_eq(bq1->clauses[i], bq2->clauses[i])) {
|
415
|
+
return false;
|
416
|
+
}
|
417
|
+
}
|
418
|
+
return true;
|
419
|
+
}
|
420
|
+
|
373
421
|
Query *bq_create(bool coord_disabled)
|
374
422
|
{
|
375
423
|
Query *self = q_create();
|
376
424
|
BooleanQuery *bq = ALLOC(BooleanQuery);
|
377
|
-
self->type = BOOLEAN_QUERY;
|
378
|
-
self->create_weight = &bw_create;
|
379
|
-
self->rewrite = &bq_rewrite;
|
380
|
-
self->extract_terms = &bq_extract_terms;
|
381
|
-
self->to_s = &bq_to_s;
|
382
|
-
self->destroy = &bq_destroy;
|
383
|
-
self->data = bq;
|
384
425
|
bq->coord_disabled = coord_disabled;
|
385
426
|
if (coord_disabled) {
|
386
427
|
self->get_similarity = &bq_get_similarity;
|
@@ -390,13 +431,24 @@ Query *bq_create(bool coord_disabled)
|
|
390
431
|
bq->clause_capa = BOOLEAN_CLAUSES_START_CAPA;
|
391
432
|
bq->clauses = ALLOC_N(BooleanClause *, BOOLEAN_CLAUSES_START_CAPA);
|
392
433
|
bq->similarity = NULL;
|
434
|
+
bq->original_boost = 0.0;
|
435
|
+
self->data = bq;
|
393
436
|
|
437
|
+
self->type = BOOLEAN_QUERY;
|
438
|
+
self->rewrite = &bq_rewrite;
|
439
|
+
self->extract_terms = &bq_extract_terms;
|
440
|
+
self->to_s = &bq_to_s;
|
441
|
+
self->hash = &bq_hash;
|
442
|
+
self->eq = &bq_eq;
|
443
|
+
self->destroy_i = &bq_destroy;
|
444
|
+
self->create_weight_i = &bw_create;
|
394
445
|
return self;
|
395
446
|
}
|
396
447
|
|
397
448
|
BooleanClause *bq_add_clause(Query *self, BooleanClause *bc)
|
398
449
|
{
|
399
450
|
BooleanQuery *bq = (BooleanQuery *)self->data;
|
451
|
+
if (!self->destroy_all) ref(bc);
|
400
452
|
if (bq->clause_cnt >= bq->clause_capa) {
|
401
453
|
bq->clause_capa *= 2;
|
402
454
|
REALLOC_N(bq->clauses, BooleanClause *, bq->clause_capa);
|
@@ -412,7 +464,12 @@ BooleanClause *bq_add_clause(Query *self, BooleanClause *bc)
|
|
412
464
|
BooleanClause *bq_add_query(Query *self, Query *sub_query, unsigned int occur)
|
413
465
|
{
|
414
466
|
BooleanClause *bc = bc_create(sub_query, occur);
|
415
|
-
|
467
|
+
bq_add_clause(self, bc);
|
468
|
+
if (!self->destroy_all) {
|
469
|
+
ref(sub_query);
|
470
|
+
bc_deref(bc); /* bc would have been referenced unnecessarily */
|
471
|
+
}
|
472
|
+
return bc;
|
416
473
|
}
|
417
474
|
|
418
475
|
/***************************************************************************
|
@@ -572,16 +629,15 @@ Explanation *dssc_explain(Scorer *self, int doc_num)
|
|
572
629
|
return e;
|
573
630
|
}
|
574
631
|
|
575
|
-
void dssc_destroy(
|
632
|
+
void dssc_destroy(Scorer *self)
|
576
633
|
{
|
577
|
-
Scorer *self = (Scorer *)p;
|
578
634
|
DisjunctionSumScorer *dssc = (DisjunctionSumScorer *)self->data;
|
579
635
|
int i;
|
580
636
|
for (i = 0; i < dssc->ss_cnt; i++) {
|
581
637
|
dssc->sub_scorers[i]->destroy(dssc->sub_scorers[i]);
|
582
638
|
}
|
583
639
|
if (dssc->scorer_queue) pq_destroy(dssc->scorer_queue);
|
584
|
-
|
640
|
+
scorer_destroy_i(self);
|
585
641
|
}
|
586
642
|
|
587
643
|
Scorer *disjunction_sum_scorer_create(Scorer **sub_scorers, int ss_cnt,
|
@@ -655,7 +711,8 @@ void csc_init(Scorer *self, bool init_scorers)
|
|
655
711
|
ConjunctionScorer *csc = (ConjunctionScorer *)self->data;
|
656
712
|
Scorer *sub_scorer;
|
657
713
|
int i;
|
658
|
-
|
714
|
+
|
715
|
+
/* compute coord factor */
|
659
716
|
csc->coord = sim_coord(self->similarity, csc->ss_cnt, csc->ss_cnt);
|
660
717
|
|
661
718
|
csc->more = (csc->ss_cnt > 0);
|
@@ -746,16 +803,15 @@ bool csc_skip_to(Scorer *self, int doc_num)
|
|
746
803
|
return csc_do_next(self);
|
747
804
|
}
|
748
805
|
|
749
|
-
void csc_destroy(
|
806
|
+
void csc_destroy(Scorer *self)
|
750
807
|
{
|
751
|
-
Scorer *self = (Scorer *)p;
|
752
808
|
ConjunctionScorer *csc = (ConjunctionScorer *)self->data;
|
753
809
|
int i;
|
754
810
|
for (i = 0; i < csc->ss_cnt; i++) {
|
755
811
|
csc->sub_scorers[i]->destroy(csc->sub_scorers[i]);
|
756
812
|
}
|
757
813
|
free(csc->sub_scorers);
|
758
|
-
|
814
|
+
scorer_destroy_i(self);
|
759
815
|
}
|
760
816
|
|
761
817
|
Scorer *conjunction_scorer_create(Similarity *similarity)
|
@@ -842,12 +898,11 @@ Explanation *smsc_explain(Scorer *self, int doc_num)
|
|
842
898
|
return scorer->explain(scorer, doc_num);
|
843
899
|
}
|
844
900
|
|
845
|
-
void smsc_destroy(
|
901
|
+
void smsc_destroy(Scorer *self)
|
846
902
|
{
|
847
|
-
Scorer *self = (Scorer *)p;
|
848
903
|
Scorer *scorer = ((SingleMatchScorer *)self->data)->scorer;
|
849
904
|
scorer->destroy(scorer);
|
850
|
-
|
905
|
+
scorer_destroy_i(self);
|
851
906
|
}
|
852
907
|
|
853
908
|
Scorer *single_match_scorer_create(Coordinator *coordinator, Scorer *scorer)
|
@@ -929,13 +984,12 @@ Explanation *rossc_explain(Scorer *self, int doc_num)
|
|
929
984
|
return e;
|
930
985
|
}
|
931
986
|
|
932
|
-
void rossc_destroy(
|
987
|
+
void rossc_destroy(Scorer *self)
|
933
988
|
{
|
934
|
-
Scorer *self = (Scorer *)p;
|
935
989
|
ReqOptSumScorer *rossc = (ReqOptSumScorer *)self->data;
|
936
990
|
if (rossc->req_scorer) rossc->req_scorer->destroy(rossc->req_scorer);
|
937
991
|
if (rossc->opt_scorer) rossc->opt_scorer->destroy(rossc->opt_scorer);
|
938
|
-
|
992
|
+
scorer_destroy_i(self);
|
939
993
|
}
|
940
994
|
|
941
995
|
|
@@ -1070,13 +1124,12 @@ Explanation *rxsc_explain(Scorer *self, int doc_num)
|
|
1070
1124
|
return e;
|
1071
1125
|
}
|
1072
1126
|
|
1073
|
-
void rxsc_destroy(
|
1127
|
+
void rxsc_destroy(Scorer *self)
|
1074
1128
|
{
|
1075
|
-
Scorer *self = (Scorer *)p;
|
1076
1129
|
ReqExclScorer *rxsc = (ReqExclScorer *)self->data;
|
1077
1130
|
if (rxsc->req_scorer) rxsc->req_scorer->destroy(rxsc->req_scorer);
|
1078
1131
|
if (rxsc->excl_scorer) rxsc->excl_scorer->destroy(rxsc->excl_scorer);
|
1079
|
-
|
1132
|
+
scorer_destroy_i(self);
|
1080
1133
|
}
|
1081
1134
|
|
1082
1135
|
Scorer *req_excl_scorer_create(Scorer *req_scorer, Scorer *excl_scorer)
|
@@ -1288,9 +1341,8 @@ bool bsc_skip_to(Scorer *self, int doc_num)
|
|
1288
1341
|
}
|
1289
1342
|
}
|
1290
1343
|
|
1291
|
-
void bsc_destroy(
|
1344
|
+
void bsc_destroy(Scorer *self)
|
1292
1345
|
{
|
1293
|
-
Scorer *self = (Scorer *)p;
|
1294
1346
|
BooleanScorer *bsc = (BooleanScorer *)self->data;
|
1295
1347
|
Coordinator *coord = bsc->coordinator;
|
1296
1348
|
|
@@ -1316,7 +1368,7 @@ void bsc_destroy(void *p)
|
|
1316
1368
|
free(bsc->required_scorers);
|
1317
1369
|
free(bsc->optional_scorers);
|
1318
1370
|
free(bsc->prohibited_scorers);
|
1319
|
-
|
1371
|
+
scorer_destroy_i(self);
|
1320
1372
|
}
|
1321
1373
|
|
1322
1374
|
Explanation *bsc_explain(Scorer *self, int doc_num)
|