ferret 0.9.1 → 0.9.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README +6 -5
- data/Rakefile +34 -13
- data/TODO +1 -0
- data/TUTORIAL +1 -1
- data/ext/analysis.c +87 -70
- data/ext/analysis.h +18 -6
- data/ext/array.c +1 -2
- data/ext/array.h +1 -1
- data/ext/bitvector.c +10 -6
- data/ext/bitvector.h +2 -2
- data/ext/compound_io.c +30 -27
- data/ext/document.c +15 -15
- data/ext/document.h +5 -5
- data/ext/except.c +2 -0
- data/ext/except.h +25 -23
- data/ext/extconf.rb +1 -0
- data/ext/ferret.c +10 -8
- data/ext/ferret.h +9 -8
- data/ext/field.c +29 -25
- data/ext/filter.c +52 -14
- data/ext/frtio.h +13 -0
- data/ext/fs_store.c +115 -170
- data/ext/global.c +9 -8
- data/ext/global.h +17 -13
- data/ext/hash.c +13 -19
- data/ext/hash.h +11 -11
- data/ext/hashset.c +5 -7
- data/ext/hashset.h +9 -8
- data/ext/helper.c +1 -1
- data/ext/helper.h +2 -1
- data/ext/inc/except.h +25 -23
- data/ext/inc/lang.h +11 -1
- data/ext/ind.c +33 -21
- data/ext/index.h +44 -39
- data/ext/index_io.c +61 -57
- data/ext/index_rw.c +418 -361
- data/ext/lang.c +10 -0
- data/ext/lang.h +11 -1
- data/ext/nix_io.c +135 -0
- data/ext/priorityqueue.c +16 -16
- data/ext/priorityqueue.h +9 -6
- data/ext/q_boolean.c +128 -76
- data/ext/q_const_score.c +20 -20
- data/ext/q_filtered_query.c +20 -20
- data/ext/q_fuzzy.c +37 -23
- data/ext/q_match_all.c +15 -19
- data/ext/q_multi_phrase.c +87 -46
- data/ext/q_parser.c +247 -119
- data/ext/q_phrase.c +86 -52
- data/ext/q_prefix.c +25 -14
- data/ext/q_range.c +59 -14
- data/ext/q_span.c +263 -172
- data/ext/q_term.c +62 -51
- data/ext/q_wildcard.c +24 -13
- data/ext/r_analysis.c +328 -80
- data/ext/r_doc.c +11 -6
- data/ext/r_index_io.c +40 -32
- data/ext/r_qparser.c +15 -14
- data/ext/r_search.c +270 -152
- data/ext/r_store.c +32 -17
- data/ext/ram_store.c +38 -22
- data/ext/search.c +617 -87
- data/ext/search.h +227 -163
- data/ext/similarity.c +54 -45
- data/ext/similarity.h +3 -3
- data/ext/sort.c +132 -53
- data/ext/store.c +21 -2
- data/ext/store.h +14 -14
- data/ext/tags +4322 -232
- data/ext/term.c +140 -109
- data/ext/termdocs.c +74 -60
- data/ext/vector.c +181 -152
- data/ext/w32_io.c +150 -0
- data/lib/ferret.rb +1 -1
- data/lib/ferret/analysis/standard_tokenizer.rb +4 -3
- data/lib/ferret/document/field.rb +1 -1
- data/lib/ferret/index/field_infos.rb +1 -1
- data/lib/ferret/index/term.rb +1 -1
- data/lib/ferret/query_parser/query_parser.tab.rb +8 -24
- data/lib/ferret/search.rb +1 -0
- data/lib/ferret/search/boolean_query.rb +0 -4
- data/lib/ferret/search/index_searcher.rb +21 -8
- data/lib/ferret/search/multi_phrase_query.rb +7 -0
- data/lib/ferret/search/multi_searcher.rb +261 -0
- data/lib/ferret/search/phrase_query.rb +1 -1
- data/lib/ferret/search/query.rb +34 -5
- data/lib/ferret/search/sort.rb +7 -3
- data/lib/ferret/search/sort_field.rb +8 -4
- data/lib/ferret/store/fs_store.rb +13 -6
- data/lib/ferret/store/index_io.rb +0 -14
- data/lib/ferret/store/ram_store.rb +3 -2
- data/lib/rferret.rb +1 -1
- data/test/unit/analysis/ctc_analyzer.rb +131 -0
- data/test/unit/analysis/ctc_tokenstream.rb +98 -9
- data/test/unit/index/tc_index.rb +40 -1
- data/test/unit/index/tc_term.rb +7 -0
- data/test/unit/index/th_doc.rb +8 -0
- data/test/unit/query_parser/tc_query_parser.rb +6 -4
- data/test/unit/search/rtc_sort_field.rb +6 -6
- data/test/unit/search/tc_index_searcher.rb +8 -0
- data/test/unit/search/tc_multi_searcher.rb +275 -0
- data/test/unit/search/tc_multi_searcher2.rb +126 -0
- data/test/unit/search/tc_search_and_sort.rb +66 -0
- metadata +31 -26
- data/test/unit/query_parser/rtc_query_parser.rb +0 -138
data/ext/lang.c
CHANGED
@@ -29,3 +29,13 @@ void ft_raise(char *file, int line_num, VALUE etype, const char *fmt, ...)
|
|
29
29
|
sprintf(buf_ptr, "\n");
|
30
30
|
rb_raise(etype, buf); /* conventional value for failed execution */
|
31
31
|
}
|
32
|
+
|
33
|
+
#ifdef WIN32
|
34
|
+
void eprintf(VALUE etype, const char *fmt, ...)
|
35
|
+
{
|
36
|
+
va_list args;
|
37
|
+
va_start(args, fmt);
|
38
|
+
ft_raise("Windows", -1, etype, fmt, args);
|
39
|
+
va_end(args);
|
40
|
+
}
|
41
|
+
#endif
|
data/ext/lang.h
CHANGED
@@ -7,7 +7,17 @@
|
|
7
7
|
#define FERRET_EXT
|
8
8
|
|
9
9
|
#define MAX_ERROR_LEN 2048
|
10
|
-
|
10
|
+
|
11
|
+
typedef LONG_LONG llong;
|
12
|
+
typedef unsigned LONG_LONG ullong;
|
13
|
+
|
14
|
+
#ifdef WIN32
|
15
|
+
# undef close
|
16
|
+
# undef rename
|
17
|
+
extern void eprintf(VALUE etype, const char *fmt, ...);
|
18
|
+
#else
|
19
|
+
# define eprintf(...) ft_raise(__FILE__, __LINE__, __VA_ARGS__)
|
20
|
+
#endif
|
11
21
|
extern void ft_raise(char *file, int line_num, VALUE etype, const char *fmt, ...);
|
12
22
|
extern void weprintf(const char *fmt, ...);
|
13
23
|
extern char *progname(void);
|
data/ext/nix_io.c
ADDED
@@ -0,0 +1,135 @@
|
|
1
|
+
#ifndef WIN32
|
2
|
+
|
3
|
+
#include "global.h"
|
4
|
+
#include "store.h"
|
5
|
+
#include <sys/dir.h>
|
6
|
+
#include <dirent.h>
|
7
|
+
#include <unistd.h>
|
8
|
+
#include <string.h>
|
9
|
+
#include <errno.h>
|
10
|
+
#include <fcntl.h>
|
11
|
+
|
12
|
+
/**
|
13
|
+
* Create a filepath for a file in the store using the operating systems
|
14
|
+
* default file seperator.
|
15
|
+
*/
|
16
|
+
char *join_path(char *buf, const char *base, const char *filename)
|
17
|
+
{
|
18
|
+
sprintf(buf, "%s/%s", base, filename);
|
19
|
+
return buf;
|
20
|
+
}
|
21
|
+
|
22
|
+
bool exists(char *path)
|
23
|
+
{
|
24
|
+
int fd = open(path, 0);
|
25
|
+
if (fd < 0) {
|
26
|
+
if (errno != ENOENT) {
|
27
|
+
RAISE(IO_ERROR, strerror(errno));
|
28
|
+
}
|
29
|
+
return false;
|
30
|
+
}
|
31
|
+
close(fd);
|
32
|
+
return true;
|
33
|
+
}
|
34
|
+
|
35
|
+
int fcount(char *path)
|
36
|
+
{
|
37
|
+
int cnt = 0;
|
38
|
+
struct dirent *de;
|
39
|
+
DIR *d = opendir(path);
|
40
|
+
|
41
|
+
if (!d) RAISE(IO_ERROR, strerror(errno));
|
42
|
+
|
43
|
+
while ((de = readdir(d)) != NULL) {
|
44
|
+
if (de->d_name[0] != '.') {
|
45
|
+
cnt++;
|
46
|
+
}
|
47
|
+
}
|
48
|
+
closedir(d);
|
49
|
+
|
50
|
+
return cnt;
|
51
|
+
}
|
52
|
+
|
53
|
+
void dir_each(char *path, void (*func)(char *fname, void *arg), void *arg)
|
54
|
+
{
|
55
|
+
struct dirent *de;
|
56
|
+
DIR *d = opendir(path);
|
57
|
+
|
58
|
+
if (!d) RAISE(IO_ERROR, strerror(errno));
|
59
|
+
|
60
|
+
while ((de = readdir(d)) != NULL) {
|
61
|
+
if (de->d_name[0] != '.' && !file_is_lock(de->d_name)) {
|
62
|
+
func(de->d_name, arg);
|
63
|
+
}
|
64
|
+
}
|
65
|
+
closedir(d);
|
66
|
+
}
|
67
|
+
|
68
|
+
/**
|
69
|
+
* Clear all the locks in the store.
|
70
|
+
*
|
71
|
+
* @param store the store to clear the locks from
|
72
|
+
* @throws IO_ERROR if there is an error opening the directory
|
73
|
+
*/
|
74
|
+
void fs_clear_locks(Store *store)
|
75
|
+
{
|
76
|
+
struct dirent *de;
|
77
|
+
DIR *d = opendir(store->dir.path);
|
78
|
+
|
79
|
+
if (!d) RAISE(IO_ERROR, strerror(errno));
|
80
|
+
|
81
|
+
while ((de = readdir(d)) != NULL) {
|
82
|
+
if (file_is_lock(de->d_name)) {
|
83
|
+
char buf[MAX_FILE_PATH];
|
84
|
+
remove(join_path(buf, store->dir.path, de->d_name));
|
85
|
+
}
|
86
|
+
}
|
87
|
+
closedir(d);
|
88
|
+
}
|
89
|
+
|
90
|
+
/**
|
91
|
+
* Clear all files from the store except the lock files.
|
92
|
+
*
|
93
|
+
* @param store the store to clear all the files from
|
94
|
+
* @throws IO_ERROR if there is an error deleting the files
|
95
|
+
*/
|
96
|
+
void fs_clear(Store *store)
|
97
|
+
{
|
98
|
+
struct dirent *de;
|
99
|
+
DIR *d = opendir(store->dir.path);
|
100
|
+
|
101
|
+
if (!d) RAISE(IO_ERROR, strerror(errno));
|
102
|
+
|
103
|
+
while ((de = readdir(d)) != NULL) {
|
104
|
+
if (de->d_name[0] != '.' && !file_is_lock(de->d_name)) {
|
105
|
+
char buf[MAX_FILE_PATH];
|
106
|
+
remove(join_path(buf, store->dir.path, de->d_name));
|
107
|
+
}
|
108
|
+
}
|
109
|
+
closedir(d);
|
110
|
+
}
|
111
|
+
|
112
|
+
/**
|
113
|
+
* Clear all files from the store including the lock files.
|
114
|
+
*
|
115
|
+
* @param store the store to clear all the files from
|
116
|
+
* @throws IO_ERROR if there is an error deleting the files
|
117
|
+
*/
|
118
|
+
void fs_clear_all(Store *store)
|
119
|
+
{
|
120
|
+
struct dirent *de;
|
121
|
+
DIR *d = opendir(store->dir.path);
|
122
|
+
|
123
|
+
if (!d) RAISE(IO_ERROR, strerror(errno));
|
124
|
+
|
125
|
+
while ((de = readdir(d)) != NULL) {
|
126
|
+
if (de->d_name[0] != '.') {
|
127
|
+
char buf[MAX_FILE_PATH];
|
128
|
+
remove(join_path(buf, store->dir.path, de->d_name));
|
129
|
+
}
|
130
|
+
}
|
131
|
+
closedir(d);
|
132
|
+
}
|
133
|
+
|
134
|
+
#endif
|
135
|
+
|
data/ext/priorityqueue.c
CHANGED
@@ -11,20 +11,20 @@ PriorityQueue *pq_create(int max_size, bool (*less_than)(void *p1, void *p2))
|
|
11
11
|
return pq;
|
12
12
|
}
|
13
13
|
|
14
|
-
void pq_destroy(
|
14
|
+
void pq_destroy(PriorityQueue *pq)
|
15
15
|
{
|
16
|
-
PriorityQueue *pq = (PriorityQueue *)p;
|
17
16
|
free(pq->heap);
|
18
|
-
free(
|
17
|
+
free(pq);
|
19
18
|
}
|
20
19
|
|
21
20
|
void pq_up(PriorityQueue *pq)
|
22
21
|
{
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
22
|
+
void **heap = pq->heap;
|
23
|
+
void *node;
|
24
|
+
int i = pq->count;
|
25
|
+
int j = i >> 1;
|
26
|
+
|
27
|
+
node = heap[i];
|
28
28
|
|
29
29
|
while ((j > 0) && pq->lt(node, heap[j])) {
|
30
30
|
heap[i] = heap[j];
|
@@ -117,7 +117,7 @@ int pq_insert(PriorityQueue *pq, void *elem)
|
|
117
117
|
|
118
118
|
PriorityQueue2 *pq2_create(int max_size,
|
119
119
|
bool (*less_than)(PriorityQueue2 *pq, void *p1, void *p2),
|
120
|
-
|
120
|
+
free_ft destroy)
|
121
121
|
{
|
122
122
|
PriorityQueue2 *pq = ALLOC(PriorityQueue2);
|
123
123
|
pq->count = 0;
|
@@ -129,20 +129,20 @@ PriorityQueue2 *pq2_create(int max_size,
|
|
129
129
|
return pq;
|
130
130
|
}
|
131
131
|
|
132
|
-
void pq2_destroy(
|
132
|
+
void pq2_destroy(PriorityQueue2 *pq)
|
133
133
|
{
|
134
|
-
PriorityQueue2 *pq = (PriorityQueue2 *)p;
|
135
134
|
free(pq->heap);
|
136
|
-
free(
|
135
|
+
free(pq);
|
137
136
|
}
|
138
137
|
|
139
138
|
void pq2_up(PriorityQueue2 *pq)
|
140
139
|
{
|
141
|
-
int i,j;
|
142
|
-
i = pq->count;
|
143
|
-
j = i >> 1;
|
144
140
|
void **heap = pq->heap;
|
145
|
-
void *node
|
141
|
+
void *node;
|
142
|
+
int i = pq->count;
|
143
|
+
int j = i >> 1;
|
144
|
+
|
145
|
+
node = heap[i];
|
146
146
|
|
147
147
|
while ((j > 0) && pq->lt(pq, node, heap[j])) {
|
148
148
|
heap[i] = heap[j];
|
data/ext/priorityqueue.h
CHANGED
@@ -3,16 +3,19 @@
|
|
3
3
|
|
4
4
|
#include "global.h"
|
5
5
|
|
6
|
+
typedef bool (*lt_ft)(void *p1, void *p2);
|
7
|
+
|
6
8
|
typedef struct PriorityQueue {
|
7
9
|
int count;
|
8
10
|
int size;
|
9
11
|
void **heap;
|
10
|
-
|
11
|
-
|
12
|
+
lt_ft lt;
|
13
|
+
//bool (*lt)(void *p1, void *p2);
|
14
|
+
free_ft free_elem;
|
12
15
|
} PriorityQueue;
|
13
16
|
|
14
17
|
PriorityQueue *pq_create(int max_size, bool (*less_than)(void *p1, void *p2));
|
15
|
-
void pq_destroy(
|
18
|
+
void pq_destroy(PriorityQueue *pq);
|
16
19
|
void pq_push(PriorityQueue *pq, void *elem);
|
17
20
|
void *pq_top(PriorityQueue *pq);
|
18
21
|
void *pq_pop(PriorityQueue *pq);
|
@@ -28,13 +31,13 @@ typedef struct PriorityQueue2 {
|
|
28
31
|
void *data;
|
29
32
|
bool (*lt)(struct PriorityQueue2 *pq, void *p1, void *p2);
|
30
33
|
void (*free_elem)(void *p);
|
31
|
-
|
34
|
+
free_ft destroy;
|
32
35
|
} PriorityQueue2;
|
33
36
|
|
34
37
|
PriorityQueue2 *pq2_create(int max_size,
|
35
38
|
bool (*less_than)(PriorityQueue2 *pq, void *p1, void *p2),
|
36
|
-
|
37
|
-
void pq2_destroy(
|
39
|
+
free_ft destroy);
|
40
|
+
void pq2_destroy(PriorityQueue2 *pq);
|
38
41
|
void pq2_push(PriorityQueue2 *pq, void *elem);
|
39
42
|
void *pq2_top(PriorityQueue2 *pq);
|
40
43
|
void *pq2_pop(PriorityQueue2 *pq);
|
data/ext/q_boolean.c
CHANGED
@@ -38,9 +38,9 @@ void bw_normalize(Weight *self, float normalization_factor)
|
|
38
38
|
{
|
39
39
|
BooleanWeight *bw = (BooleanWeight *)self->data;
|
40
40
|
BooleanQuery *bq = (BooleanQuery *)self->query->data;
|
41
|
-
normalization_factor *= self->value; // multiply by query boost
|
42
41
|
Weight *weight;
|
43
42
|
int i;
|
43
|
+
normalization_factor *= self->value; /* multiply by query boost */
|
44
44
|
|
45
45
|
for (i = 0; i < bw->w_cnt; i++) {
|
46
46
|
if (! bq->clauses[i]->is_prohibited) {
|
@@ -79,13 +79,18 @@ char *bw_to_s(Weight *self)
|
|
79
79
|
return strfmt("BooleanWeight(%f)", self->value);
|
80
80
|
}
|
81
81
|
|
82
|
-
void bw_destroy(
|
82
|
+
void bw_destroy(Weight *self)
|
83
83
|
{
|
84
|
-
|
85
|
-
BooleanWeight *bw = (BooleanWeight *)
|
84
|
+
int i;
|
85
|
+
BooleanWeight *bw = (BooleanWeight *)self->data;
|
86
|
+
|
87
|
+
for (i = 0; i < bw->w_cnt; i++) {
|
88
|
+
bw->weights[i]->destroy(bw->weights[i]);
|
89
|
+
}
|
90
|
+
|
86
91
|
free(bw->weights);
|
87
92
|
free(bw);
|
88
|
-
|
93
|
+
w_destroy(self);
|
89
94
|
}
|
90
95
|
|
91
96
|
Explanation *bw_explain(Weight *self, IndexReader *ir, int doc_num)
|
@@ -127,8 +132,8 @@ Explanation *bw_explain(Weight *self, IndexReader *ir, int doc_num)
|
|
127
132
|
}
|
128
133
|
sum_expl->value = sum;
|
129
134
|
|
130
|
-
if (coord == 1) {
|
131
|
-
explanation = sum_expl;
|
135
|
+
if (coord == 1) { /* only one clause matched */
|
136
|
+
explanation = sum_expl; /* eliminate wrapper */
|
132
137
|
sum_expl->dcnt = 0;
|
133
138
|
sum_expl = sum_expl->details[0];
|
134
139
|
expl_destoy(explanation);
|
@@ -136,8 +141,8 @@ Explanation *bw_explain(Weight *self, IndexReader *ir, int doc_num)
|
|
136
141
|
|
137
142
|
coord_factor = sim_coord(self->similarity, coord, max_coord);
|
138
143
|
|
139
|
-
if (coord_factor == 1.0) {
|
140
|
-
return sum_expl;
|
144
|
+
if (coord_factor == 1.0) { /* coord is no-op */
|
145
|
+
return sum_expl; /* eliminate wrapper */
|
141
146
|
} else {
|
142
147
|
explanation = expl_create(sum * coord_factor, estrdup("product of:"));
|
143
148
|
expl_add_detail(explanation, sum_expl);
|
@@ -150,13 +155,17 @@ Explanation *bw_explain(Weight *self, IndexReader *ir, int doc_num)
|
|
150
155
|
Weight *bw_create(Query *query, Searcher *searcher)
|
151
156
|
{
|
152
157
|
int i;
|
153
|
-
|
158
|
+
Weight *self = w_create(query);
|
154
159
|
BooleanWeight *bw = ALLOC(BooleanWeight);
|
155
|
-
|
156
|
-
|
160
|
+
BooleanQuery *bq = (BooleanQuery *)query->data;
|
161
|
+
|
162
|
+
bw->w_cnt = bq->clause_cnt;
|
163
|
+
bw->weights = ALLOC_N(Weight *, bw->w_cnt);
|
164
|
+
for (i = 0; i < bw->w_cnt; i++) {
|
165
|
+
bw->weights[i] = q_weight(bq->clauses[i]->query, searcher);
|
166
|
+
}
|
157
167
|
self->data = bw;
|
158
|
-
|
159
|
-
self->get_value = &w_get_value;
|
168
|
+
|
160
169
|
self->normalize = &bw_normalize;
|
161
170
|
self->scorer = &bw_scorer;
|
162
171
|
self->explain = &bw_explain;
|
@@ -165,15 +174,8 @@ Weight *bw_create(Query *query, Searcher *searcher)
|
|
165
174
|
self->sum_of_squared_weights = &bw_sum_of_squared_weights;
|
166
175
|
|
167
176
|
self->similarity = query->get_similarity(query, searcher);
|
168
|
-
self->query = query;
|
169
177
|
self->value = query->boost;
|
170
178
|
|
171
|
-
bw->w_cnt = bq->clause_cnt;
|
172
|
-
bw->weights = ALLOC_N(Weight *, bw->w_cnt);
|
173
|
-
for (i = 0; i < bw->w_cnt; i++) {
|
174
|
-
bw->weights[i] = q_weight(bq->clauses[i]->query, searcher);
|
175
|
-
}
|
176
|
-
|
177
179
|
return self;
|
178
180
|
}
|
179
181
|
|
@@ -204,15 +206,28 @@ void bc_set_occur(BooleanClause *self, unsigned int occur)
|
|
204
206
|
}
|
205
207
|
}
|
206
208
|
|
207
|
-
void
|
209
|
+
void bc_deref(BooleanClause *self)
|
210
|
+
{
|
211
|
+
if (--self->ref_cnt <= 0) {
|
212
|
+
q_deref(self->query);
|
213
|
+
free(self);
|
214
|
+
}
|
215
|
+
}
|
216
|
+
|
217
|
+
uint bc_hash(BooleanClause *self)
|
218
|
+
{
|
219
|
+
return ((q_hash(self->query) << 2) | self->occur);
|
220
|
+
}
|
221
|
+
|
222
|
+
int bc_eq(BooleanClause *self, BooleanClause *o)
|
208
223
|
{
|
209
|
-
self->
|
210
|
-
free(self);
|
224
|
+
return ((self->occur == o->occur) && q_eq(self->query, o->query));
|
211
225
|
}
|
212
226
|
|
213
227
|
BooleanClause *bc_create(Query *query, unsigned int occur)
|
214
228
|
{
|
215
229
|
BooleanClause *self = ALLOC(BooleanClause);
|
230
|
+
self->ref_cnt = 1;
|
216
231
|
self->query = query;
|
217
232
|
bc_set_occur(self, occur);
|
218
233
|
return self;
|
@@ -228,38 +243,44 @@ Query *bq_rewrite(Query *self, IndexReader *ir)
|
|
228
243
|
{
|
229
244
|
BooleanQuery *bq = (BooleanQuery *)self->data;
|
230
245
|
BooleanClause *clause;
|
231
|
-
Query *
|
246
|
+
Query *q, *rq;
|
232
247
|
int i;
|
233
248
|
|
234
249
|
if (bq->clause_cnt == 1) { // optimize 1-clause queries
|
235
250
|
clause = bq->clauses[0];
|
236
251
|
if (! clause->is_prohibited) { // just return clause
|
237
|
-
|
252
|
+
q = clause->query->rewrite(clause->query, ir); // rewrite first
|
238
253
|
|
239
254
|
if (self->boost != 1.0) {// incorporate boost
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
if ((
|
244
|
-
|
255
|
+
/* original_boost is initialized to 0.0. If it has been set to
|
256
|
+
* something else it means this query has already been boosted before
|
257
|
+
* so boost from the original value */
|
258
|
+
if ((q == clause->query) && bq->original_boost) {
|
259
|
+
/* rewrite was no-op */
|
260
|
+
q->boost = bq->original_boost * self->boost;
|
245
261
|
} else {
|
246
|
-
|
247
|
-
|
262
|
+
bq->original_boost = q->boost; /* save original boost */
|
263
|
+
q->boost *= self->boost;
|
248
264
|
}
|
249
265
|
}
|
250
266
|
|
251
|
-
return
|
267
|
+
return q;
|
252
268
|
}
|
253
269
|
}
|
254
270
|
|
271
|
+
/* replace each clause's query with its rewritten query */
|
255
272
|
for (i = 0; i < bq->clause_cnt; i++) {
|
256
273
|
clause = bq->clauses[i];
|
257
|
-
|
274
|
+
rq = clause->query->rewrite(clause->query, ir);
|
275
|
+
q_deref(clause->query);
|
276
|
+
clause->query = rq;
|
258
277
|
}
|
259
|
-
|
278
|
+
|
279
|
+
self->ref_cnt++;
|
280
|
+
return self;
|
260
281
|
}
|
261
282
|
|
262
|
-
void bq_extract_terms(Query *self,
|
283
|
+
void bq_extract_terms(Query *self, HashSet *terms)
|
263
284
|
{
|
264
285
|
BooleanQuery *bq = (BooleanQuery *)self->data;
|
265
286
|
BooleanClause *clause;
|
@@ -272,6 +293,7 @@ void bq_extract_terms(Query *self, Array *terms)
|
|
272
293
|
|
273
294
|
char *bq_to_s(Query *self, char *field)
|
274
295
|
{
|
296
|
+
int i;
|
275
297
|
BooleanQuery *bq = (BooleanQuery *)self->data;
|
276
298
|
BooleanClause *clause;
|
277
299
|
Query *sub_query;
|
@@ -288,11 +310,10 @@ char *bq_to_s(Query *self, char *field)
|
|
288
310
|
bp++;
|
289
311
|
}
|
290
312
|
|
291
|
-
int i;
|
292
313
|
for (i = 0; i < bq->clause_cnt; i++) {
|
293
314
|
clause = bq->clauses[i];
|
294
315
|
clause_str = clause->query->to_s(clause->query, field);
|
295
|
-
clause_len = strlen(clause_str);
|
316
|
+
clause_len = (int)strlen(clause_str);
|
296
317
|
needed = clause_len + 5;
|
297
318
|
while ((size - bp) < needed) {
|
298
319
|
size *= 2;
|
@@ -323,7 +344,7 @@ char *bq_to_s(Query *self, char *field)
|
|
323
344
|
|
324
345
|
if (self->boost != 1.0) {
|
325
346
|
char *boost_str = strfmt(")^%f", self->boost);
|
326
|
-
int boost_len = strlen(boost_str);
|
347
|
+
int boost_len = (int)strlen(boost_str);
|
327
348
|
REALLOC_N(buffer, char, bp + boost_len + 1);
|
328
349
|
memcpy(buffer + bp, boost_str, sizeof(char) * boost_len);
|
329
350
|
bp += boost_len;
|
@@ -333,22 +354,19 @@ char *bq_to_s(Query *self, char *field)
|
|
333
354
|
return buffer;
|
334
355
|
}
|
335
356
|
|
336
|
-
void bq_destroy(
|
357
|
+
static void bq_destroy(Query *self)
|
337
358
|
{
|
338
|
-
Query *self = (Query *)p;
|
339
359
|
BooleanQuery *bq = (BooleanQuery *)self->data;
|
340
360
|
int i;
|
341
|
-
|
342
|
-
|
343
|
-
bc_destroy(bq->clauses[i]);
|
344
|
-
}
|
361
|
+
for (i = 0; i < bq->clause_cnt; i++) {
|
362
|
+
bc_deref(bq->clauses[i]);
|
345
363
|
}
|
346
364
|
free(bq->clauses);
|
347
365
|
if (bq->similarity) {
|
348
366
|
bq->similarity->destroy(bq->similarity);
|
349
367
|
}
|
350
368
|
free(bq);
|
351
|
-
|
369
|
+
q_destroy_i(self);
|
352
370
|
}
|
353
371
|
|
354
372
|
float bq_coord_disabled(Similarity *sim, int overlap, int max_overlap)
|
@@ -360,27 +378,50 @@ Similarity *bq_get_similarity(Query *self, Searcher *searcher)
|
|
360
378
|
{
|
361
379
|
BooleanQuery *bq = (BooleanQuery *)self->data;
|
362
380
|
if (!bq->similarity) {
|
363
|
-
Similarity *sim =
|
381
|
+
Similarity *sim = q_get_similarity_i(self, searcher);
|
364
382
|
bq->similarity = ALLOC(Similarity);
|
365
383
|
memcpy(bq->similarity, sim, sizeof(Similarity));
|
366
384
|
bq->similarity->coord = &bq_coord_disabled;
|
367
|
-
bq->similarity->destroy = &free;
|
385
|
+
bq->similarity->destroy = (void (*)(Similarity *))&free;
|
368
386
|
}
|
369
387
|
|
370
388
|
return bq->similarity;
|
371
389
|
}
|
372
390
|
|
391
|
+
static uint bq_hash(Query *self)
|
392
|
+
{
|
393
|
+
int i;
|
394
|
+
uint hash = 0;
|
395
|
+
BooleanQuery *bq = (BooleanQuery *)self->data;
|
396
|
+
for (i = 0; i < bq->clause_cnt; i++) {
|
397
|
+
hash ^= bc_hash(bq->clauses[i]);
|
398
|
+
}
|
399
|
+
return (hash << 1) | bq->coord_disabled;
|
400
|
+
}
|
401
|
+
|
402
|
+
static int bq_eq(Query *self, Query *o)
|
403
|
+
{
|
404
|
+
int i;
|
405
|
+
BooleanQuery *bq1 = (BooleanQuery *)self->data;
|
406
|
+
BooleanQuery *bq2 = (BooleanQuery *)o->data;
|
407
|
+
if ((bq1->coord_disabled != bq2->coord_disabled) ||
|
408
|
+
(bq1->max_clause_cnt != bq1->max_clause_cnt) ||
|
409
|
+
(bq1->clause_cnt != bq2->clause_cnt)) {
|
410
|
+
return false;
|
411
|
+
}
|
412
|
+
|
413
|
+
for (i = 0; i < bq1->clause_cnt; i++) {
|
414
|
+
if (!bc_eq(bq1->clauses[i], bq2->clauses[i])) {
|
415
|
+
return false;
|
416
|
+
}
|
417
|
+
}
|
418
|
+
return true;
|
419
|
+
}
|
420
|
+
|
373
421
|
Query *bq_create(bool coord_disabled)
|
374
422
|
{
|
375
423
|
Query *self = q_create();
|
376
424
|
BooleanQuery *bq = ALLOC(BooleanQuery);
|
377
|
-
self->type = BOOLEAN_QUERY;
|
378
|
-
self->create_weight = &bw_create;
|
379
|
-
self->rewrite = &bq_rewrite;
|
380
|
-
self->extract_terms = &bq_extract_terms;
|
381
|
-
self->to_s = &bq_to_s;
|
382
|
-
self->destroy = &bq_destroy;
|
383
|
-
self->data = bq;
|
384
425
|
bq->coord_disabled = coord_disabled;
|
385
426
|
if (coord_disabled) {
|
386
427
|
self->get_similarity = &bq_get_similarity;
|
@@ -390,13 +431,24 @@ Query *bq_create(bool coord_disabled)
|
|
390
431
|
bq->clause_capa = BOOLEAN_CLAUSES_START_CAPA;
|
391
432
|
bq->clauses = ALLOC_N(BooleanClause *, BOOLEAN_CLAUSES_START_CAPA);
|
392
433
|
bq->similarity = NULL;
|
434
|
+
bq->original_boost = 0.0;
|
435
|
+
self->data = bq;
|
393
436
|
|
437
|
+
self->type = BOOLEAN_QUERY;
|
438
|
+
self->rewrite = &bq_rewrite;
|
439
|
+
self->extract_terms = &bq_extract_terms;
|
440
|
+
self->to_s = &bq_to_s;
|
441
|
+
self->hash = &bq_hash;
|
442
|
+
self->eq = &bq_eq;
|
443
|
+
self->destroy_i = &bq_destroy;
|
444
|
+
self->create_weight_i = &bw_create;
|
394
445
|
return self;
|
395
446
|
}
|
396
447
|
|
397
448
|
BooleanClause *bq_add_clause(Query *self, BooleanClause *bc)
|
398
449
|
{
|
399
450
|
BooleanQuery *bq = (BooleanQuery *)self->data;
|
451
|
+
if (!self->destroy_all) ref(bc);
|
400
452
|
if (bq->clause_cnt >= bq->clause_capa) {
|
401
453
|
bq->clause_capa *= 2;
|
402
454
|
REALLOC_N(bq->clauses, BooleanClause *, bq->clause_capa);
|
@@ -412,7 +464,12 @@ BooleanClause *bq_add_clause(Query *self, BooleanClause *bc)
|
|
412
464
|
BooleanClause *bq_add_query(Query *self, Query *sub_query, unsigned int occur)
|
413
465
|
{
|
414
466
|
BooleanClause *bc = bc_create(sub_query, occur);
|
415
|
-
|
467
|
+
bq_add_clause(self, bc);
|
468
|
+
if (!self->destroy_all) {
|
469
|
+
ref(sub_query);
|
470
|
+
bc_deref(bc); /* bc would have been referenced unnecessarily */
|
471
|
+
}
|
472
|
+
return bc;
|
416
473
|
}
|
417
474
|
|
418
475
|
/***************************************************************************
|
@@ -572,16 +629,15 @@ Explanation *dssc_explain(Scorer *self, int doc_num)
|
|
572
629
|
return e;
|
573
630
|
}
|
574
631
|
|
575
|
-
void dssc_destroy(
|
632
|
+
void dssc_destroy(Scorer *self)
|
576
633
|
{
|
577
|
-
Scorer *self = (Scorer *)p;
|
578
634
|
DisjunctionSumScorer *dssc = (DisjunctionSumScorer *)self->data;
|
579
635
|
int i;
|
580
636
|
for (i = 0; i < dssc->ss_cnt; i++) {
|
581
637
|
dssc->sub_scorers[i]->destroy(dssc->sub_scorers[i]);
|
582
638
|
}
|
583
639
|
if (dssc->scorer_queue) pq_destroy(dssc->scorer_queue);
|
584
|
-
|
640
|
+
scorer_destroy_i(self);
|
585
641
|
}
|
586
642
|
|
587
643
|
Scorer *disjunction_sum_scorer_create(Scorer **sub_scorers, int ss_cnt,
|
@@ -655,7 +711,8 @@ void csc_init(Scorer *self, bool init_scorers)
|
|
655
711
|
ConjunctionScorer *csc = (ConjunctionScorer *)self->data;
|
656
712
|
Scorer *sub_scorer;
|
657
713
|
int i;
|
658
|
-
|
714
|
+
|
715
|
+
/* compute coord factor */
|
659
716
|
csc->coord = sim_coord(self->similarity, csc->ss_cnt, csc->ss_cnt);
|
660
717
|
|
661
718
|
csc->more = (csc->ss_cnt > 0);
|
@@ -746,16 +803,15 @@ bool csc_skip_to(Scorer *self, int doc_num)
|
|
746
803
|
return csc_do_next(self);
|
747
804
|
}
|
748
805
|
|
749
|
-
void csc_destroy(
|
806
|
+
void csc_destroy(Scorer *self)
|
750
807
|
{
|
751
|
-
Scorer *self = (Scorer *)p;
|
752
808
|
ConjunctionScorer *csc = (ConjunctionScorer *)self->data;
|
753
809
|
int i;
|
754
810
|
for (i = 0; i < csc->ss_cnt; i++) {
|
755
811
|
csc->sub_scorers[i]->destroy(csc->sub_scorers[i]);
|
756
812
|
}
|
757
813
|
free(csc->sub_scorers);
|
758
|
-
|
814
|
+
scorer_destroy_i(self);
|
759
815
|
}
|
760
816
|
|
761
817
|
Scorer *conjunction_scorer_create(Similarity *similarity)
|
@@ -842,12 +898,11 @@ Explanation *smsc_explain(Scorer *self, int doc_num)
|
|
842
898
|
return scorer->explain(scorer, doc_num);
|
843
899
|
}
|
844
900
|
|
845
|
-
void smsc_destroy(
|
901
|
+
void smsc_destroy(Scorer *self)
|
846
902
|
{
|
847
|
-
Scorer *self = (Scorer *)p;
|
848
903
|
Scorer *scorer = ((SingleMatchScorer *)self->data)->scorer;
|
849
904
|
scorer->destroy(scorer);
|
850
|
-
|
905
|
+
scorer_destroy_i(self);
|
851
906
|
}
|
852
907
|
|
853
908
|
Scorer *single_match_scorer_create(Coordinator *coordinator, Scorer *scorer)
|
@@ -929,13 +984,12 @@ Explanation *rossc_explain(Scorer *self, int doc_num)
|
|
929
984
|
return e;
|
930
985
|
}
|
931
986
|
|
932
|
-
void rossc_destroy(
|
987
|
+
void rossc_destroy(Scorer *self)
|
933
988
|
{
|
934
|
-
Scorer *self = (Scorer *)p;
|
935
989
|
ReqOptSumScorer *rossc = (ReqOptSumScorer *)self->data;
|
936
990
|
if (rossc->req_scorer) rossc->req_scorer->destroy(rossc->req_scorer);
|
937
991
|
if (rossc->opt_scorer) rossc->opt_scorer->destroy(rossc->opt_scorer);
|
938
|
-
|
992
|
+
scorer_destroy_i(self);
|
939
993
|
}
|
940
994
|
|
941
995
|
|
@@ -1070,13 +1124,12 @@ Explanation *rxsc_explain(Scorer *self, int doc_num)
|
|
1070
1124
|
return e;
|
1071
1125
|
}
|
1072
1126
|
|
1073
|
-
void rxsc_destroy(
|
1127
|
+
void rxsc_destroy(Scorer *self)
|
1074
1128
|
{
|
1075
|
-
Scorer *self = (Scorer *)p;
|
1076
1129
|
ReqExclScorer *rxsc = (ReqExclScorer *)self->data;
|
1077
1130
|
if (rxsc->req_scorer) rxsc->req_scorer->destroy(rxsc->req_scorer);
|
1078
1131
|
if (rxsc->excl_scorer) rxsc->excl_scorer->destroy(rxsc->excl_scorer);
|
1079
|
-
|
1132
|
+
scorer_destroy_i(self);
|
1080
1133
|
}
|
1081
1134
|
|
1082
1135
|
Scorer *req_excl_scorer_create(Scorer *req_scorer, Scorer *excl_scorer)
|
@@ -1288,9 +1341,8 @@ bool bsc_skip_to(Scorer *self, int doc_num)
|
|
1288
1341
|
}
|
1289
1342
|
}
|
1290
1343
|
|
1291
|
-
void bsc_destroy(
|
1344
|
+
void bsc_destroy(Scorer *self)
|
1292
1345
|
{
|
1293
|
-
Scorer *self = (Scorer *)p;
|
1294
1346
|
BooleanScorer *bsc = (BooleanScorer *)self->data;
|
1295
1347
|
Coordinator *coord = bsc->coordinator;
|
1296
1348
|
|
@@ -1316,7 +1368,7 @@ void bsc_destroy(void *p)
|
|
1316
1368
|
free(bsc->required_scorers);
|
1317
1369
|
free(bsc->optional_scorers);
|
1318
1370
|
free(bsc->prohibited_scorers);
|
1319
|
-
|
1371
|
+
scorer_destroy_i(self);
|
1320
1372
|
}
|
1321
1373
|
|
1322
1374
|
Explanation *bsc_explain(Scorer *self, int doc_num)
|