ferret 0.10.1 → 0.10.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +7 -1
- data/ext/analysis.c +21 -13
- data/ext/array.c +1 -1
- data/ext/bitvector.c +2 -2
- data/ext/defines.h +0 -6
- data/ext/except.c +6 -6
- data/ext/except.h +12 -8
- data/ext/extconf.rb +1 -0
- data/ext/ferret.c +4 -0
- data/ext/ferret.h +1 -0
- data/ext/fs_store.c +18 -4
- data/ext/global.c +18 -16
- data/ext/global.h +7 -2
- data/ext/hash.c +1 -1
- data/ext/helper.c +1 -1
- data/ext/helper.h +1 -1
- data/ext/inc/lang.h +7 -1
- data/ext/ind.c +4 -4
- data/ext/ind.h +3 -3
- data/ext/index.c +33 -26
- data/ext/index.h +1 -1
- data/ext/lang.h +7 -1
- data/ext/mem_pool.c +1 -1
- data/ext/mem_pool.h +1 -1
- data/ext/q_fuzzy.c +2 -2
- data/ext/q_match_all.c +2 -2
- data/ext/q_multi_term.c +1 -1
- data/ext/q_parser.c +60 -52
- data/ext/r_analysis.c +6 -4
- data/ext/r_index.c +57 -4
- data/ext/r_search.c +1 -1
- data/ext/r_utils.c +1 -1
- data/ext/ram_store.c +1 -1
- data/ext/search.c +4 -4
- data/ext/sort.c +3 -3
- data/ext/store.c +9 -9
- data/ext/store.h +4 -4
- data/ext/tags +7841 -0
- data/ext/term_vectors.c +3 -3
- data/lib/ferret/index.rb +69 -7
- data/test/test_helper.rb +3 -2
- data/test/unit/analysis/tc_token_stream.rb +1 -0
- data/test/unit/index/tc_index.rb +157 -2
- data/test/unit/index/tc_index_reader.rb +108 -5
- data/test/unit/query_parser/tc_query_parser.rb +2 -1
- data/test/unit/search/tc_index_searcher.rb +1 -1
- data/test/unit/search/tc_multi_searcher.rb +2 -1
- data/test/unit/search/tc_spans.rb +1 -1
- data/test/unit/store/tc_fs_store.rb +6 -3
- data/test/unit/ts_analysis.rb +1 -1
- data/test/unit/ts_utils.rb +1 -1
- data/test/unit/utils/tc_number_tools.rb +1 -1
- metadata +138 -137
data/ext/global.h
CHANGED
@@ -50,8 +50,13 @@ typedef void (*free_ft)(void *key);
|
|
50
50
|
}\
|
51
51
|
} while (0)
|
52
52
|
|
53
|
-
#
|
54
|
-
#define
|
53
|
+
#ifdef POSH_OS_WIN32
|
54
|
+
# define Jx fprintf(stderr,"%s, %d\n", __FILE__, __LINE__);
|
55
|
+
# define Xj fprintf(stdout,"%s, %d\n", __FILE__, __LINE__);
|
56
|
+
#else
|
57
|
+
# define Jx fprintf(stderr,"%s, %d: %s\n", __FILE__, __LINE__, __func__);
|
58
|
+
# define Xj fprintf(stdout,"%s, %d: %s\n", __FILE__, __LINE__, __func__);
|
59
|
+
#endif
|
55
60
|
|
56
61
|
extern char *progname();
|
57
62
|
extern void setprogname(const char *str);
|
data/ext/hash.c
CHANGED
@@ -58,7 +58,7 @@ typedef HashEntry *(*lookup_ft)(struct HashTable *ht, register const void *key);
|
|
58
58
|
* @param ht the HashTable to do the fast lookup in
|
59
59
|
* @param the hashkey we are looking for
|
60
60
|
*/
|
61
|
-
static
|
61
|
+
static __inline HashEntry *h_resize_lookup(HashTable *ht, register const ulong hash)
|
62
62
|
{
|
63
63
|
register ulong perturb;
|
64
64
|
register int mask = ht->mask;
|
data/ext/helper.c
CHANGED
data/ext/helper.h
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
|
4
4
|
#include "defines.h"
|
5
5
|
|
6
|
-
extern
|
6
|
+
extern __inline int hlp_string_diff(register const char *const s1,
|
7
7
|
register const char *const s2);
|
8
8
|
extern f_i32 float2int(float f);
|
9
9
|
extern float int2float(f_i32 i32);
|
data/ext/inc/lang.h
CHANGED
@@ -1,13 +1,19 @@
|
|
1
1
|
#ifndef FRT_LANG_H
|
2
2
|
#define FRT_LANG_H
|
3
3
|
|
4
|
-
#
|
4
|
+
#define RUBY_BINDINGS 1
|
5
|
+
|
5
6
|
#include <stdarg.h>
|
7
|
+
#include <ruby.h>
|
8
|
+
|
9
|
+
#undef close
|
10
|
+
#undef rename
|
6
11
|
|
7
12
|
#define frt_malloc xmalloc
|
8
13
|
#define frt_calloc(n) xcalloc(n, 1)
|
9
14
|
#define frt_realloc xrealloc
|
10
15
|
|
16
|
+
|
11
17
|
#ifdef FRT_HAS_ISO_VARARGS
|
12
18
|
/* C99-compliant compiler */
|
13
19
|
|
data/ext/ind.c
CHANGED
@@ -108,7 +108,7 @@ void index_flush(Index *self)
|
|
108
108
|
self->has_writes = false;
|
109
109
|
}
|
110
110
|
|
111
|
-
|
111
|
+
__inline void ensure_writer_open(Index *self)
|
112
112
|
{
|
113
113
|
if (!self->iw) {
|
114
114
|
INDEX_CLOSE_READER(self);
|
@@ -124,7 +124,7 @@ inline void ensure_writer_open(Index *self)
|
|
124
124
|
}
|
125
125
|
}
|
126
126
|
|
127
|
-
|
127
|
+
__inline void ensure_reader_open(Index *self)
|
128
128
|
{
|
129
129
|
if (self->ir) {
|
130
130
|
if (self->check_latest && !ir_is_latest(self->ir)) {
|
@@ -140,7 +140,7 @@ inline void ensure_reader_open(Index *self)
|
|
140
140
|
}
|
141
141
|
}
|
142
142
|
|
143
|
-
|
143
|
+
__inline void ensure_searcher_open(Index *self)
|
144
144
|
{
|
145
145
|
ensure_reader_open(self);
|
146
146
|
if (!self->sea) {
|
@@ -187,7 +187,7 @@ bool index_is_deleted(Index *self, int doc_num)
|
|
187
187
|
return is_del;
|
188
188
|
}
|
189
189
|
|
190
|
-
static
|
190
|
+
static __inline void index_add_doc_i(Index *self, Document *doc)
|
191
191
|
{
|
192
192
|
/* If there is a key specified delete the document with the same key */
|
193
193
|
if (self->key) {
|
data/ext/ind.h
CHANGED
@@ -61,8 +61,8 @@ extern Explanation *index_explain(Index *self, Query *q, int doc_num);
|
|
61
61
|
extern void index_auto_flush_ir(Index *self);
|
62
62
|
extern void index_auto_flush_iw(Index *self);
|
63
63
|
|
64
|
-
extern
|
65
|
-
extern
|
66
|
-
extern
|
64
|
+
extern __inline void ensure_searcher_open(Index *self);
|
65
|
+
extern __inline void ensure_reader_open(Index *self);
|
66
|
+
extern __inline void ensure_writer_open(Index *self);
|
67
67
|
|
68
68
|
#endif
|
data/ext/index.c
CHANGED
@@ -91,7 +91,7 @@ HashTable *co_hash_create()
|
|
91
91
|
*
|
92
92
|
****************************************************************************/
|
93
93
|
|
94
|
-
|
94
|
+
__inline void fi_set_store(FieldInfo *fi, int store)
|
95
95
|
{
|
96
96
|
switch (store) {
|
97
97
|
case STORE_NO:
|
@@ -105,7 +105,7 @@ inline void fi_set_store(FieldInfo *fi, int store)
|
|
105
105
|
}
|
106
106
|
}
|
107
107
|
|
108
|
-
|
108
|
+
__inline void fi_set_index(FieldInfo *fi, int index)
|
109
109
|
{
|
110
110
|
switch (index) {
|
111
111
|
case INDEX_NO:
|
@@ -126,7 +126,7 @@ inline void fi_set_index(FieldInfo *fi, int index)
|
|
126
126
|
}
|
127
127
|
}
|
128
128
|
|
129
|
-
|
129
|
+
__inline void fi_set_term_vector(FieldInfo *fi, int term_vector)
|
130
130
|
{
|
131
131
|
switch (term_vector) {
|
132
132
|
case TERM_VECTOR_NO:
|
@@ -1071,7 +1071,7 @@ void fw_close(FieldsWriter *fw)
|
|
1071
1071
|
free(fw);
|
1072
1072
|
}
|
1073
1073
|
|
1074
|
-
static
|
1074
|
+
static __inline void save_data(OutStream *fdt_out, char *data, int dlen)
|
1075
1075
|
{
|
1076
1076
|
os_write_vint(fdt_out, dlen);
|
1077
1077
|
os_write_bytes(fdt_out, (uchar *)data, dlen);
|
@@ -1184,7 +1184,7 @@ void fw_add_postings(FieldsWriter *fw,
|
|
1184
1184
|
|
1185
1185
|
if (fi_store_offsets(fi)) {
|
1186
1186
|
/* use delta encoding for offsets */
|
1187
|
-
int last_end = 0;
|
1187
|
+
int last_end = 0;
|
1188
1188
|
os_write_vint(fdt_out, offset_count); /* write shared prefix length */
|
1189
1189
|
for (i = 0; i < offset_count; i++) {
|
1190
1190
|
int start = offsets[i].start;
|
@@ -1274,7 +1274,7 @@ static void sti_ensure_index_is_read(SegmentTermIndex *sti,
|
|
1274
1274
|
for (i = 0; NULL != ste_next(index_te); i++) {
|
1275
1275
|
#ifdef DEBUG
|
1276
1276
|
if (i >= index_size) {
|
1277
|
-
RAISE(
|
1277
|
+
RAISE(FERRET_ERROR, "index term enum read too many terms");
|
1278
1278
|
}
|
1279
1279
|
#endif
|
1280
1280
|
sti->index_terms[i] = te_get_term(index_te);
|
@@ -1367,7 +1367,7 @@ void sfi_close(SegmentFieldIndex *sfi)
|
|
1367
1367
|
* SegmentTermEnum
|
1368
1368
|
****************************************************************************/
|
1369
1369
|
|
1370
|
-
static
|
1370
|
+
static __inline int term_read(char *buf, InStream *is)
|
1371
1371
|
{
|
1372
1372
|
int start = (int)is_read_vint(is);
|
1373
1373
|
int length = (int)is_read_vint(is);
|
@@ -1444,6 +1444,10 @@ static char *ste_scan_to(TermEnum *te, const char *term)
|
|
1444
1444
|
SegmentTermIndex *sti = h_get_int(sfi->field_dict, te->field_num);
|
1445
1445
|
if (sti && sti->size > 0) {
|
1446
1446
|
SFI_ENSURE_INDEX_IS_READ(sfi, sti);
|
1447
|
+
if (term[0] == '\0') {
|
1448
|
+
ste_index_seek(te, sti, 0);
|
1449
|
+
return ste_next(te);;
|
1450
|
+
}
|
1447
1451
|
/* if current term is less than seek term */
|
1448
1452
|
if (STE(te)->pos < STE(te)->size && strcmp(te->curr_term, term) <= 0) {
|
1449
1453
|
int enum_offset = (int)(STE(te)->pos / sfi->index_interval) + 1;
|
@@ -1748,7 +1752,7 @@ TermEnum *mte_new(MultiReader *mr, int field_num, const char *term)
|
|
1748
1752
|
|
1749
1753
|
tew = tew_setup(&(mte->tews[i]), starts[i], sub_te, reader);
|
1750
1754
|
if (((term == NULL) && tew_next(tew))
|
1751
|
-
|| (tew->term && tew->term[0] != '\0')) {
|
1755
|
+
|| (tew->term && (tew->term[0] != '\0'))) {
|
1752
1756
|
pq_push(mte->tew_queue, tew); /* initialize queue */
|
1753
1757
|
}
|
1754
1758
|
} else {
|
@@ -1788,11 +1792,12 @@ TermInfosReader *tir_open(Store *store,
|
|
1788
1792
|
return tir;
|
1789
1793
|
}
|
1790
1794
|
|
1791
|
-
static
|
1795
|
+
static __inline TermEnum *tir_enum(TermInfosReader *tir)
|
1792
1796
|
{
|
1793
1797
|
TermEnum *te;
|
1794
1798
|
if ((te = thread_getspecific(tir->thread_te)) == NULL) {
|
1795
1799
|
te = ste_clone(tir->orig_te);
|
1800
|
+
ste_set_field(te, tir->field_num);
|
1796
1801
|
ary_push(tir->te_bucket, te);
|
1797
1802
|
thread_setspecific(tir->thread_te, te);
|
1798
1803
|
}
|
@@ -1913,7 +1918,7 @@ TermInfosWriter *tiw_open(Store *store,
|
|
1913
1918
|
return tiw;
|
1914
1919
|
}
|
1915
1920
|
|
1916
|
-
static
|
1921
|
+
static __inline void tw_write_term(TermWriter *tw,
|
1917
1922
|
OutStream *os,
|
1918
1923
|
const char *term,
|
1919
1924
|
int term_len)
|
@@ -1987,7 +1992,7 @@ void tiw_add(TermInfosWriter *tiw,
|
|
1987
1992
|
}
|
1988
1993
|
}
|
1989
1994
|
|
1990
|
-
static
|
1995
|
+
static __inline void tw_reset(TermWriter *tw)
|
1991
1996
|
{
|
1992
1997
|
tw->counter = 0;
|
1993
1998
|
tw->last_term = EMPTY_STRING;
|
@@ -3085,8 +3090,8 @@ bool ir_is_latest(IndexReader *ir)
|
|
3085
3090
|
Lock *commit_lock = ir->store->open_lock(ir->store, COMMIT_LOCK_NAME);
|
3086
3091
|
if (!commit_lock->obtain(commit_lock)) {
|
3087
3092
|
ir->store->close_lock(commit_lock);
|
3088
|
-
RAISE(LOCK_ERROR, "Error
|
3089
|
-
|
3093
|
+
RAISE(LOCK_ERROR, "Error detecting if the current index is latest "
|
3094
|
+
"version. Commit lock currently obtained");
|
3090
3095
|
}
|
3091
3096
|
is_latest = (sis_read_current_version(ir->store) == ir->sis->version);
|
3092
3097
|
commit_lock->release(commit_lock);
|
@@ -3180,7 +3185,7 @@ typedef struct SegmentReader {
|
|
3180
3185
|
#define SR(ir) ((SegmentReader *)(ir))
|
3181
3186
|
#define SR_SIZE(ir) (SR(ir)->fr->size)
|
3182
3187
|
|
3183
|
-
static
|
3188
|
+
static __inline FieldsReader *sr_fr(SegmentReader *sr)
|
3184
3189
|
{
|
3185
3190
|
FieldsReader *fr;
|
3186
3191
|
|
@@ -3192,12 +3197,12 @@ static inline FieldsReader *sr_fr(SegmentReader *sr)
|
|
3192
3197
|
return fr;
|
3193
3198
|
}
|
3194
3199
|
|
3195
|
-
static
|
3200
|
+
static __inline bool sr_is_deleted_i(SegmentReader *sr, int doc_num)
|
3196
3201
|
{
|
3197
3202
|
return (sr->deleted_docs != NULL && bv_get(sr->deleted_docs, doc_num));
|
3198
3203
|
}
|
3199
3204
|
|
3200
|
-
static
|
3205
|
+
static __inline void sr_get_norms_into_i(SegmentReader *sr, int field_num,
|
3201
3206
|
uchar *buf)
|
3202
3207
|
{
|
3203
3208
|
Norm *norm = h_get_int(sr->norms, field_num);
|
@@ -3216,7 +3221,7 @@ static inline void sr_get_norms_into_i(SegmentReader *sr, int field_num,
|
|
3216
3221
|
}
|
3217
3222
|
}
|
3218
3223
|
|
3219
|
-
static
|
3224
|
+
static __inline uchar *sr_get_norms_i(SegmentReader *sr, int field_num)
|
3220
3225
|
{
|
3221
3226
|
Norm *norm = h_get_int(sr->norms, field_num);
|
3222
3227
|
if (norm == NULL) { /* not an indexed field */
|
@@ -3491,16 +3496,16 @@ static bool sr_has_deletions(IndexReader *ir)
|
|
3491
3496
|
static void sr_open_norms(IndexReader *ir, Store *cfs_store)
|
3492
3497
|
{
|
3493
3498
|
int i;
|
3499
|
+
Store *store = ir->store;
|
3494
3500
|
char file_name[SEGMENT_NAME_MAX_LENGTH];
|
3495
3501
|
FieldInfos *fis = ir->fis;
|
3496
3502
|
char *ext_ptr;
|
3497
|
-
const int field_cnt = fis->size;
|
3498
|
-
|
3503
|
+
const int field_cnt = fis->size;
|
3504
|
+
|
3499
3505
|
sprintf(file_name, "%s.", SR(ir)->segment);
|
3500
3506
|
ext_ptr = file_name + strlen(file_name);
|
3501
3507
|
|
3502
3508
|
for (i = 0; i < field_cnt; i++) {
|
3503
|
-
Store *store = ir->store;
|
3504
3509
|
if (fi_has_norms(fis->fields[i])) {
|
3505
3510
|
sprintf(ext_ptr, "s%d", i);
|
3506
3511
|
if (!store->exists(store, file_name)) {
|
@@ -3939,8 +3944,8 @@ IndexReader *mr_open(IndexReader **sub_readers, const int r_cnt)
|
|
3939
3944
|
for (i = 0; i < r_cnt; i++) {
|
3940
3945
|
FieldInfos *sub_fis = sub_readers[i]->fis;
|
3941
3946
|
const int fis_size = fis->size;
|
3942
|
-
|
3943
|
-
mr->field_num_map[i] = ALLOC_N(int, fis_size);
|
3947
|
+
|
3948
|
+
mr->field_num_map[i] = ALLOC_N(int, fis_size);
|
3944
3949
|
for (j = 0; j < fis_size; j++) {
|
3945
3950
|
FieldInfo *fi = fis->fields[j];
|
3946
3951
|
FieldInfo *fi_sub = fis_get_field(sub_fis, fi->name);
|
@@ -4384,7 +4389,7 @@ static void dw_add_posting(MemoryPool *mp,
|
|
4384
4389
|
}
|
4385
4390
|
}
|
4386
4391
|
|
4387
|
-
static
|
4392
|
+
static __inline void dw_add_offsets(DocWriter *dw, int pos, int start, int end)
|
4388
4393
|
{
|
4389
4394
|
if (pos >= dw->offsets_capa) {
|
4390
4395
|
int old_capa = dw->offsets_capa;
|
@@ -5050,7 +5055,8 @@ static char **iw_create_compound_file(Store *store, FieldInfos *fis,
|
|
5050
5055
|
|
5051
5056
|
cw = open_cw(store, cfs_file_name);
|
5052
5057
|
for (i = 0; i < NELEMS(COMPOUND_EXTENSIONS); i++) {
|
5053
|
-
sprintf(file_name, "%s.%s",
|
5058
|
+
sprintf(file_name, "%s.%s",
|
5059
|
+
segment, COMPOUND_EXTENSIONS[i]);
|
5054
5060
|
ary_push(file_names, estrdup(file_name));
|
5055
5061
|
}
|
5056
5062
|
|
@@ -5196,7 +5202,8 @@ static void iw_maybe_merge_segments(IndexWriter *iw)
|
|
5196
5202
|
int min_segment, merge_docs;
|
5197
5203
|
SegmentInfo *si;
|
5198
5204
|
|
5199
|
-
while (target_merge_docs
|
5205
|
+
while (target_merge_docs > 0
|
5206
|
+
&& target_merge_docs <= iw->config.max_merge_docs) {
|
5200
5207
|
/* find segments smaller than current target size */
|
5201
5208
|
min_segment = iw->sis->size - 1;
|
5202
5209
|
merge_docs = 0;
|
@@ -5212,7 +5219,7 @@ static void iw_maybe_merge_segments(IndexWriter *iw)
|
|
5212
5219
|
if (merge_docs >= target_merge_docs) { /* found a merge to do */
|
5213
5220
|
iw_merge_segments_from(iw, min_segment + 1);
|
5214
5221
|
}
|
5215
|
-
else {
|
5222
|
+
else if (min_segment <= 0) {
|
5216
5223
|
break;
|
5217
5224
|
}
|
5218
5225
|
|
data/ext/index.h
CHANGED
@@ -471,7 +471,7 @@ typedef struct Posting
|
|
471
471
|
struct Posting *next;
|
472
472
|
} Posting;
|
473
473
|
|
474
|
-
extern
|
474
|
+
extern __inline Posting *p_new(MemoryPool *mp, int doc_num, int pos);
|
475
475
|
|
476
476
|
/****************************************************************************
|
477
477
|
*
|
data/ext/lang.h
CHANGED
@@ -1,13 +1,19 @@
|
|
1
1
|
#ifndef FRT_LANG_H
|
2
2
|
#define FRT_LANG_H
|
3
3
|
|
4
|
-
#
|
4
|
+
#define RUBY_BINDINGS 1
|
5
|
+
|
5
6
|
#include <stdarg.h>
|
7
|
+
#include <ruby.h>
|
8
|
+
|
9
|
+
#undef close
|
10
|
+
#undef rename
|
6
11
|
|
7
12
|
#define frt_malloc xmalloc
|
8
13
|
#define frt_calloc(n) xcalloc(n, 1)
|
9
14
|
#define frt_realloc xrealloc
|
10
15
|
|
16
|
+
|
11
17
|
#ifdef FRT_HAS_ISO_VARARGS
|
12
18
|
/* C99-compliant compiler */
|
13
19
|
|
data/ext/mem_pool.c
CHANGED
data/ext/mem_pool.h
CHANGED
@@ -16,7 +16,7 @@ typedef struct MemoryPool {
|
|
16
16
|
|
17
17
|
extern MemoryPool *mp_new();
|
18
18
|
extern MemoryPool *mp_new_capa(int chunk_size, int init_capa);
|
19
|
-
extern
|
19
|
+
extern __inline void *mp_alloc(MemoryPool *mp, int size);
|
20
20
|
extern void mp_reset(MemoryPool *mp);
|
21
21
|
extern void mp_destroy(MemoryPool *mp);
|
22
22
|
extern char *mp_strdup(MemoryPool *mp, const char *str);
|
data/ext/q_fuzzy.c
CHANGED
@@ -11,7 +11,7 @@
|
|
11
11
|
*
|
12
12
|
****************************************************************************/
|
13
13
|
|
14
|
-
static
|
14
|
+
static __inline int fuzq_calculate_max_distance(FuzzyQuery *fuzq, int m)
|
15
15
|
{
|
16
16
|
return (int)((1.0 - fuzq->min_sim) * (MIN(fuzq->text_len, m) + fuzq->pre_len));
|
17
17
|
}
|
@@ -24,7 +24,7 @@ static void fuzq_initialize_max_distances(FuzzyQuery *fuzq)
|
|
24
24
|
}
|
25
25
|
}
|
26
26
|
|
27
|
-
static
|
27
|
+
static __inline int fuzq_get_max_distance(FuzzyQuery *fuzq, int m)
|
28
28
|
{
|
29
29
|
return (m < TYPICAL_LONGEST_WORD) ? fuzq->max_distances[m]
|
30
30
|
: fuzq_calculate_max_distance(fuzq, m);
|
data/ext/q_match_all.c
CHANGED
@@ -114,9 +114,9 @@ char *maq_to_s(Query *self, const char *field)
|
|
114
114
|
{
|
115
115
|
(void)field;
|
116
116
|
if (self->boost == 1.0) {
|
117
|
-
return estrdup("
|
117
|
+
return estrdup("*");
|
118
118
|
} else {
|
119
|
-
return strfmt("
|
119
|
+
return strfmt("*^%f", self->boost);
|
120
120
|
}
|
121
121
|
}
|
122
122
|
|
data/ext/q_multi_term.c
CHANGED
@@ -236,7 +236,7 @@ static bool multi_tsc_advance_to(Scorer *self, int target_doc_num)
|
|
236
236
|
return (pq_top(tdew_pq) == NULL) ? false : true;
|
237
237
|
}
|
238
238
|
|
239
|
-
static
|
239
|
+
static __inline bool multi_tsc_skip_to(Scorer *self, int target_doc_num)
|
240
240
|
{
|
241
241
|
return multi_tsc_advance_to(self, target_doc_num) && multi_tsc_next(self);
|
242
242
|
}
|
data/ext/q_parser.c
CHANGED
@@ -56,7 +56,7 @@
|
|
56
56
|
/* Put the tokens into the symbol table, so that GDB and other debuggers
|
57
57
|
know about them. */
|
58
58
|
enum yytokentype {
|
59
|
-
|
59
|
+
QWRD = 258,
|
60
60
|
WILD_STR = 259,
|
61
61
|
LOW = 260,
|
62
62
|
OR = 261,
|
@@ -67,7 +67,7 @@
|
|
67
67
|
};
|
68
68
|
#endif
|
69
69
|
/* Tokens. */
|
70
|
-
#define
|
70
|
+
#define QWRD 258
|
71
71
|
#define WILD_STR 259
|
72
72
|
#define LOW 260
|
73
73
|
#define OR 261
|
@@ -133,7 +133,7 @@ typedef union YYSTYPE {
|
|
133
133
|
char *str;
|
134
134
|
} YYSTYPE;
|
135
135
|
/* Line 196 of yacc.c. */
|
136
|
-
#line 137 "
|
136
|
+
#line 137 "y.tab.c"
|
137
137
|
# define yystype YYSTYPE /* obsolescent; will be withdrawn */
|
138
138
|
# define YYSTYPE_IS_DECLARED 1
|
139
139
|
# define YYSTYPE_IS_TRIVIAL 1
|
@@ -197,7 +197,7 @@ static Query *get_range_q(const char *field, const char *from, const char *to,
|
|
197
197
|
|
198
198
|
|
199
199
|
/* Line 219 of yacc.c. */
|
200
|
-
#line 201 "
|
200
|
+
#line 201 "y.tab.c"
|
201
201
|
|
202
202
|
#if ! defined (YYSIZE_T) && defined (__SIZE_TYPE__)
|
203
203
|
# define YYSIZE_T __SIZE_TYPE__
|
@@ -450,7 +450,7 @@ static const unsigned char yyrline[] =
|
|
450
450
|
First, the terminals, then, starting at YYNTOKENS, nonterminals. */
|
451
451
|
static const char *const yytname[] =
|
452
452
|
{
|
453
|
-
"$end", "error", "$undefined", "
|
453
|
+
"$end", "error", "$undefined", "QWRD", "WILD_STR", "LOW", "OR", "AND",
|
454
454
|
"NOT", "REQ", "':'", "HIGH", "'^'", "'('", "')'", "'~'", "'*'", "'|'",
|
455
455
|
"'\"'", "'<'", "'>'", "'['", "']'", "'}'", "'{'", "'='", "$accept",
|
456
456
|
"bool_q", "bool_clss", "bool_cls", "boosted_q", "q", "term_q", "wild_q",
|
@@ -1250,217 +1250,217 @@ yyreduce:
|
|
1250
1250
|
{
|
1251
1251
|
case 2:
|
1252
1252
|
#line 99 "src/q_parser.y"
|
1253
|
-
{ qp->result = (yyval.query) = NULL;
|
1253
|
+
{ qp->result = (yyval.query) = NULL; }
|
1254
1254
|
break;
|
1255
1255
|
|
1256
1256
|
case 3:
|
1257
1257
|
#line 100 "src/q_parser.y"
|
1258
|
-
{ qp->result = (yyval.query) = get_bool_q((yyvsp[0].bclss));
|
1258
|
+
{ qp->result = (yyval.query) = get_bool_q((yyvsp[0].bclss)); }
|
1259
1259
|
break;
|
1260
1260
|
|
1261
1261
|
case 4:
|
1262
1262
|
#line 102 "src/q_parser.y"
|
1263
|
-
{ (yyval.bclss) = first_cls((yyvsp[0].bcls));
|
1263
|
+
{ (yyval.bclss) = first_cls((yyvsp[0].bcls)); }
|
1264
1264
|
break;
|
1265
1265
|
|
1266
1266
|
case 5:
|
1267
1267
|
#line 103 "src/q_parser.y"
|
1268
|
-
{ (yyval.bclss) = add_and_cls((yyvsp[-2].bclss), (yyvsp[0].bcls));
|
1268
|
+
{ (yyval.bclss) = add_and_cls((yyvsp[-2].bclss), (yyvsp[0].bcls)); }
|
1269
1269
|
break;
|
1270
1270
|
|
1271
1271
|
case 6:
|
1272
1272
|
#line 104 "src/q_parser.y"
|
1273
|
-
{ (yyval.bclss) = add_or_cls((yyvsp[-2].bclss), (yyvsp[0].bcls));
|
1273
|
+
{ (yyval.bclss) = add_or_cls((yyvsp[-2].bclss), (yyvsp[0].bcls)); }
|
1274
1274
|
break;
|
1275
1275
|
|
1276
1276
|
case 7:
|
1277
1277
|
#line 105 "src/q_parser.y"
|
1278
|
-
{ (yyval.bclss) = add_default_cls(qp, (yyvsp[-1].bclss), (yyvsp[0].bcls));
|
1278
|
+
{ (yyval.bclss) = add_default_cls(qp, (yyvsp[-1].bclss), (yyvsp[0].bcls)); }
|
1279
1279
|
break;
|
1280
1280
|
|
1281
1281
|
case 8:
|
1282
1282
|
#line 107 "src/q_parser.y"
|
1283
|
-
{ (yyval.bcls) = get_bool_cls((yyvsp[0].query), BC_MUST);
|
1283
|
+
{ (yyval.bcls) = get_bool_cls((yyvsp[0].query), BC_MUST); }
|
1284
1284
|
break;
|
1285
1285
|
|
1286
1286
|
case 9:
|
1287
1287
|
#line 108 "src/q_parser.y"
|
1288
|
-
{ (yyval.bcls) = get_bool_cls((yyvsp[0].query), BC_MUST_NOT);
|
1288
|
+
{ (yyval.bcls) = get_bool_cls((yyvsp[0].query), BC_MUST_NOT); }
|
1289
1289
|
break;
|
1290
1290
|
|
1291
1291
|
case 10:
|
1292
1292
|
#line 109 "src/q_parser.y"
|
1293
|
-
{ (yyval.bcls) = get_bool_cls((yyvsp[0].query), BC_SHOULD);
|
1293
|
+
{ (yyval.bcls) = get_bool_cls((yyvsp[0].query), BC_SHOULD); }
|
1294
1294
|
break;
|
1295
1295
|
|
1296
1296
|
case 12:
|
1297
1297
|
#line 112 "src/q_parser.y"
|
1298
|
-
{ if ((yyvsp[-2].query)) sscanf((yyvsp[0].str),"%f",&((yyvsp[-2].query)->boost)); (yyval.query)=(yyvsp[-2].query);
|
1298
|
+
{ if ((yyvsp[-2].query)) sscanf((yyvsp[0].str),"%f",&((yyvsp[-2].query)->boost)); (yyval.query)=(yyvsp[-2].query); }
|
1299
1299
|
break;
|
1300
1300
|
|
1301
1301
|
case 14:
|
1302
1302
|
#line 115 "src/q_parser.y"
|
1303
|
-
{ (yyval.query) = get_bool_q((yyvsp[-1].bclss));
|
1303
|
+
{ (yyval.query) = get_bool_q((yyvsp[-1].bclss)); }
|
1304
1304
|
break;
|
1305
1305
|
|
1306
1306
|
case 19:
|
1307
1307
|
#line 121 "src/q_parser.y"
|
1308
|
-
{ FLDS((yyval.query), get_term_q(qp, field, (yyvsp[0].str)));
|
1308
|
+
{ FLDS((yyval.query), get_term_q(qp, field, (yyvsp[0].str))); }
|
1309
1309
|
break;
|
1310
1310
|
|
1311
1311
|
case 20:
|
1312
1312
|
#line 122 "src/q_parser.y"
|
1313
|
-
{ FLDS((yyval.query), get_fuzzy_q(qp, field, (yyvsp[-2].str), (yyvsp[0].str)));
|
1313
|
+
{ FLDS((yyval.query), get_fuzzy_q(qp, field, (yyvsp[-2].str), (yyvsp[0].str))); }
|
1314
1314
|
break;
|
1315
1315
|
|
1316
1316
|
case 21:
|
1317
1317
|
#line 123 "src/q_parser.y"
|
1318
|
-
{ FLDS((yyval.query), get_fuzzy_q(qp, field, (yyvsp[-1].str), NULL));
|
1318
|
+
{ FLDS((yyval.query), get_fuzzy_q(qp, field, (yyvsp[-1].str), NULL)); }
|
1319
1319
|
break;
|
1320
1320
|
|
1321
1321
|
case 22:
|
1322
1322
|
#line 125 "src/q_parser.y"
|
1323
|
-
{ FLDS((yyval.query), get_wild_q(qp, field, (yyvsp[0].str)));
|
1323
|
+
{ FLDS((yyval.query), get_wild_q(qp, field, (yyvsp[0].str))); }
|
1324
1324
|
break;
|
1325
1325
|
|
1326
1326
|
case 23:
|
1327
1327
|
#line 127 "src/q_parser.y"
|
1328
|
-
{ qp->fields = qp->def_fields;
|
1328
|
+
{ qp->fields = qp->def_fields; }
|
1329
1329
|
break;
|
1330
1330
|
|
1331
1331
|
case 24:
|
1332
1332
|
#line 128 "src/q_parser.y"
|
1333
|
-
{ (yyval.query) = (yyvsp[-1].query);
|
1333
|
+
{ (yyval.query) = (yyvsp[-1].query); }
|
1334
1334
|
break;
|
1335
1335
|
|
1336
1336
|
case 25:
|
1337
1337
|
#line 129 "src/q_parser.y"
|
1338
|
-
{ qp->fields = qp->all_fields;
|
1338
|
+
{ qp->fields = qp->all_fields; }
|
1339
1339
|
break;
|
1340
1340
|
|
1341
1341
|
case 26:
|
1342
1342
|
#line 129 "src/q_parser.y"
|
1343
|
-
{qp->fields = qp->def_fields
|
1343
|
+
{qp->fields = qp->def_fields;}
|
1344
1344
|
break;
|
1345
1345
|
|
1346
1346
|
case 27:
|
1347
1347
|
#line 130 "src/q_parser.y"
|
1348
|
-
{ (yyval.query) = (yyvsp[-1].query);
|
1348
|
+
{ (yyval.query) = (yyvsp[-1].query); }
|
1349
1349
|
break;
|
1350
1350
|
|
1351
1351
|
case 28:
|
1352
1352
|
#line 132 "src/q_parser.y"
|
1353
|
-
{ (yyval.hashset) = first_field(qp, (yyvsp[0].str));
|
1353
|
+
{ (yyval.hashset) = first_field(qp, (yyvsp[0].str)); }
|
1354
1354
|
break;
|
1355
1355
|
|
1356
1356
|
case 29:
|
1357
1357
|
#line 133 "src/q_parser.y"
|
1358
|
-
{ (yyval.hashset) = add_field(qp, (yyvsp[0].str))
|
1358
|
+
{ (yyval.hashset) = add_field(qp, (yyvsp[0].str));}
|
1359
1359
|
break;
|
1360
1360
|
|
1361
1361
|
case 30:
|
1362
1362
|
#line 135 "src/q_parser.y"
|
1363
|
-
{ (yyval.query) = get_phrase_q(qp, (yyvsp[-1].phrase), NULL);
|
1363
|
+
{ (yyval.query) = get_phrase_q(qp, (yyvsp[-1].phrase), NULL); }
|
1364
1364
|
break;
|
1365
1365
|
|
1366
1366
|
case 31:
|
1367
1367
|
#line 136 "src/q_parser.y"
|
1368
|
-
{ (yyval.query) = get_phrase_q(qp, (yyvsp[-3].phrase), (yyvsp[0].str));
|
1368
|
+
{ (yyval.query) = get_phrase_q(qp, (yyvsp[-3].phrase), (yyvsp[0].str)); }
|
1369
1369
|
break;
|
1370
1370
|
|
1371
1371
|
case 32:
|
1372
1372
|
#line 137 "src/q_parser.y"
|
1373
|
-
{ (yyval.query) = NULL;
|
1373
|
+
{ (yyval.query) = NULL; }
|
1374
1374
|
break;
|
1375
1375
|
|
1376
1376
|
case 33:
|
1377
1377
|
#line 138 "src/q_parser.y"
|
1378
|
-
{ (yyval.query) = NULL;
|
1378
|
+
{ (yyval.query) = NULL; }
|
1379
1379
|
break;
|
1380
1380
|
|
1381
1381
|
case 34:
|
1382
1382
|
#line 140 "src/q_parser.y"
|
1383
|
-
{ (yyval.phrase) = ph_first_word((yyvsp[0].str));
|
1383
|
+
{ (yyval.phrase) = ph_first_word((yyvsp[0].str)); }
|
1384
1384
|
break;
|
1385
1385
|
|
1386
1386
|
case 35:
|
1387
1387
|
#line 141 "src/q_parser.y"
|
1388
|
-
{ (yyval.phrase) = ph_first_word(NULL);
|
1388
|
+
{ (yyval.phrase) = ph_first_word(NULL); }
|
1389
1389
|
break;
|
1390
1390
|
|
1391
1391
|
case 36:
|
1392
1392
|
#line 142 "src/q_parser.y"
|
1393
|
-
{ (yyval.phrase) = ph_add_word((yyvsp[-1].phrase), (yyvsp[0].str));
|
1393
|
+
{ (yyval.phrase) = ph_add_word((yyvsp[-1].phrase), (yyvsp[0].str)); }
|
1394
1394
|
break;
|
1395
1395
|
|
1396
1396
|
case 37:
|
1397
1397
|
#line 143 "src/q_parser.y"
|
1398
|
-
{ (yyval.phrase) = ph_add_word((yyvsp[-2].phrase), NULL);
|
1398
|
+
{ (yyval.phrase) = ph_add_word((yyvsp[-2].phrase), NULL); }
|
1399
1399
|
break;
|
1400
1400
|
|
1401
1401
|
case 38:
|
1402
1402
|
#line 144 "src/q_parser.y"
|
1403
|
-
{ (yyval.phrase) = ph_add_multi_word((yyvsp[-2].phrase), (yyvsp[0].str));
|
1403
|
+
{ (yyval.phrase) = ph_add_multi_word((yyvsp[-2].phrase), (yyvsp[0].str)); }
|
1404
1404
|
break;
|
1405
1405
|
|
1406
1406
|
case 39:
|
1407
1407
|
#line 146 "src/q_parser.y"
|
1408
|
-
{ FLDS((yyval.query), get_range_q(field, (yyvsp[-2].str), (yyvsp[-1].str), true, true));
|
1408
|
+
{ FLDS((yyval.query), get_range_q(field, (yyvsp[-2].str), (yyvsp[-1].str), true, true)); }
|
1409
1409
|
break;
|
1410
1410
|
|
1411
1411
|
case 40:
|
1412
1412
|
#line 147 "src/q_parser.y"
|
1413
|
-
{ FLDS((yyval.query), get_range_q(field, (yyvsp[-2].str), (yyvsp[-1].str), true, false));
|
1413
|
+
{ FLDS((yyval.query), get_range_q(field, (yyvsp[-2].str), (yyvsp[-1].str), true, false)); }
|
1414
1414
|
break;
|
1415
1415
|
|
1416
1416
|
case 41:
|
1417
1417
|
#line 148 "src/q_parser.y"
|
1418
|
-
{ FLDS((yyval.query), get_range_q(field, (yyvsp[-2].str), (yyvsp[-1].str), false, true));
|
1418
|
+
{ FLDS((yyval.query), get_range_q(field, (yyvsp[-2].str), (yyvsp[-1].str), false, true)); }
|
1419
1419
|
break;
|
1420
1420
|
|
1421
1421
|
case 42:
|
1422
1422
|
#line 149 "src/q_parser.y"
|
1423
|
-
{ FLDS((yyval.query), get_range_q(field, (yyvsp[-2].str), (yyvsp[-1].str), false, false));
|
1423
|
+
{ FLDS((yyval.query), get_range_q(field, (yyvsp[-2].str), (yyvsp[-1].str), false, false)); }
|
1424
1424
|
break;
|
1425
1425
|
|
1426
1426
|
case 43:
|
1427
1427
|
#line 150 "src/q_parser.y"
|
1428
|
-
{ FLDS((yyval.query), get_range_q(field, NULL,(yyvsp[-1].str), false, false));
|
1428
|
+
{ FLDS((yyval.query), get_range_q(field, NULL,(yyvsp[-1].str), false, false)); }
|
1429
1429
|
break;
|
1430
1430
|
|
1431
1431
|
case 44:
|
1432
1432
|
#line 151 "src/q_parser.y"
|
1433
|
-
{ FLDS((yyval.query), get_range_q(field, NULL,(yyvsp[-1].str), false, true));
|
1433
|
+
{ FLDS((yyval.query), get_range_q(field, NULL,(yyvsp[-1].str), false, true)); }
|
1434
1434
|
break;
|
1435
1435
|
|
1436
1436
|
case 45:
|
1437
1437
|
#line 152 "src/q_parser.y"
|
1438
|
-
{ FLDS((yyval.query), get_range_q(field, (yyvsp[-1].str), NULL,true, false));
|
1438
|
+
{ FLDS((yyval.query), get_range_q(field, (yyvsp[-1].str), NULL,true, false)); }
|
1439
1439
|
break;
|
1440
1440
|
|
1441
1441
|
case 46:
|
1442
1442
|
#line 153 "src/q_parser.y"
|
1443
|
-
{ FLDS((yyval.query), get_range_q(field, (yyvsp[-1].str), NULL,false, false));
|
1443
|
+
{ FLDS((yyval.query), get_range_q(field, (yyvsp[-1].str), NULL,false, false)); }
|
1444
1444
|
break;
|
1445
1445
|
|
1446
1446
|
case 47:
|
1447
1447
|
#line 154 "src/q_parser.y"
|
1448
|
-
{ FLDS((yyval.query), get_range_q(field, NULL,(yyvsp[0].str), false, false));
|
1448
|
+
{ FLDS((yyval.query), get_range_q(field, NULL,(yyvsp[0].str), false, false)); }
|
1449
1449
|
break;
|
1450
1450
|
|
1451
1451
|
case 48:
|
1452
1452
|
#line 155 "src/q_parser.y"
|
1453
|
-
{ FLDS((yyval.query), get_range_q(field, NULL,(yyvsp[0].str), false, true));
|
1453
|
+
{ FLDS((yyval.query), get_range_q(field, NULL,(yyvsp[0].str), false, true)); }
|
1454
1454
|
break;
|
1455
1455
|
|
1456
1456
|
case 49:
|
1457
1457
|
#line 156 "src/q_parser.y"
|
1458
|
-
{ FLDS((yyval.query), get_range_q(field, (yyvsp[0].str), NULL,true, false));
|
1458
|
+
{ FLDS((yyval.query), get_range_q(field, (yyvsp[0].str), NULL,true, false)); }
|
1459
1459
|
break;
|
1460
1460
|
|
1461
1461
|
case 50:
|
1462
1462
|
#line 157 "src/q_parser.y"
|
1463
|
-
{ FLDS((yyval.query), get_range_q(field, (yyvsp[0].str), NULL,false, false));
|
1463
|
+
{ FLDS((yyval.query), get_range_q(field, (yyvsp[0].str), NULL,false, false)); }
|
1464
1464
|
break;
|
1465
1465
|
|
1466
1466
|
|
@@ -1468,7 +1468,7 @@ yyreduce:
|
|
1468
1468
|
}
|
1469
1469
|
|
1470
1470
|
/* Line 1126 of yacc.c. */
|
1471
|
-
#line 1472 "
|
1471
|
+
#line 1472 "y.tab.c"
|
1472
1472
|
|
1473
1473
|
yyvsp -= yylen;
|
1474
1474
|
yyssp -= yylen;
|
@@ -1785,8 +1785,10 @@ static int get_word(YYSTYPE *lvalp, QParser *qp)
|
|
1785
1785
|
|
1786
1786
|
/* found a word so return it. */
|
1787
1787
|
lvalp->str = buf;
|
1788
|
-
if (is_wild)
|
1789
|
-
|
1788
|
+
if (is_wild) {
|
1789
|
+
return WILD_STR;
|
1790
|
+
}
|
1791
|
+
return QWRD;
|
1790
1792
|
}
|
1791
1793
|
|
1792
1794
|
static int yylex(YYSTYPE *lvalp, QParser *qp)
|
@@ -1806,6 +1808,8 @@ static int yylex(YYSTYPE *lvalp, QParser *qp)
|
|
1806
1808
|
case '*':
|
1807
1809
|
if (nc == ':') return c;
|
1808
1810
|
break;
|
1811
|
+
case '?':
|
1812
|
+
break;
|
1809
1813
|
case '&':
|
1810
1814
|
if (nc == '&') {
|
1811
1815
|
qp->qstrp++;
|
@@ -2046,7 +2050,11 @@ static Query *get_wild_q(QParser *qp, char *field, char *pattern)
|
|
2046
2050
|
|
2047
2051
|
/* simplify the wildcard query to a prefix query if possible. Basically a
|
2048
2052
|
* prefix query is any wildcard query that has a '*' as the last character
|
2049
|
-
* and no other wildcard characters before it.
|
2053
|
+
* and no other wildcard characters before it. "*" by itself will expand
|
2054
|
+
* to a MatchAllQuery */
|
2055
|
+
if (strcmp(pattern, "*") == 0) {
|
2056
|
+
return maq_new();
|
2057
|
+
}
|
2050
2058
|
if (pattern[len - 1] == '*') {
|
2051
2059
|
is_prefix = true;
|
2052
2060
|
for (p = &pattern[len - 2]; p >= pattern; p--) {
|