ferret 0.10.1 → 0.10.2
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +7 -1
- data/ext/analysis.c +21 -13
- data/ext/array.c +1 -1
- data/ext/bitvector.c +2 -2
- data/ext/defines.h +0 -6
- data/ext/except.c +6 -6
- data/ext/except.h +12 -8
- data/ext/extconf.rb +1 -0
- data/ext/ferret.c +4 -0
- data/ext/ferret.h +1 -0
- data/ext/fs_store.c +18 -4
- data/ext/global.c +18 -16
- data/ext/global.h +7 -2
- data/ext/hash.c +1 -1
- data/ext/helper.c +1 -1
- data/ext/helper.h +1 -1
- data/ext/inc/lang.h +7 -1
- data/ext/ind.c +4 -4
- data/ext/ind.h +3 -3
- data/ext/index.c +33 -26
- data/ext/index.h +1 -1
- data/ext/lang.h +7 -1
- data/ext/mem_pool.c +1 -1
- data/ext/mem_pool.h +1 -1
- data/ext/q_fuzzy.c +2 -2
- data/ext/q_match_all.c +2 -2
- data/ext/q_multi_term.c +1 -1
- data/ext/q_parser.c +60 -52
- data/ext/r_analysis.c +6 -4
- data/ext/r_index.c +57 -4
- data/ext/r_search.c +1 -1
- data/ext/r_utils.c +1 -1
- data/ext/ram_store.c +1 -1
- data/ext/search.c +4 -4
- data/ext/sort.c +3 -3
- data/ext/store.c +9 -9
- data/ext/store.h +4 -4
- data/ext/tags +7841 -0
- data/ext/term_vectors.c +3 -3
- data/lib/ferret/index.rb +69 -7
- data/test/test_helper.rb +3 -2
- data/test/unit/analysis/tc_token_stream.rb +1 -0
- data/test/unit/index/tc_index.rb +157 -2
- data/test/unit/index/tc_index_reader.rb +108 -5
- data/test/unit/query_parser/tc_query_parser.rb +2 -1
- data/test/unit/search/tc_index_searcher.rb +1 -1
- data/test/unit/search/tc_multi_searcher.rb +2 -1
- data/test/unit/search/tc_spans.rb +1 -1
- data/test/unit/store/tc_fs_store.rb +6 -3
- data/test/unit/ts_analysis.rb +1 -1
- data/test/unit/ts_utils.rb +1 -1
- data/test/unit/utils/tc_number_tools.rb +1 -1
- metadata +138 -137
data/ext/global.h
CHANGED
@@ -50,8 +50,13 @@ typedef void (*free_ft)(void *key);
|
|
50
50
|
}\
|
51
51
|
} while (0)
|
52
52
|
|
53
|
-
#
|
54
|
-
#define
|
53
|
+
#ifdef POSH_OS_WIN32
|
54
|
+
# define Jx fprintf(stderr,"%s, %d\n", __FILE__, __LINE__);
|
55
|
+
# define Xj fprintf(stdout,"%s, %d\n", __FILE__, __LINE__);
|
56
|
+
#else
|
57
|
+
# define Jx fprintf(stderr,"%s, %d: %s\n", __FILE__, __LINE__, __func__);
|
58
|
+
# define Xj fprintf(stdout,"%s, %d: %s\n", __FILE__, __LINE__, __func__);
|
59
|
+
#endif
|
55
60
|
|
56
61
|
extern char *progname();
|
57
62
|
extern void setprogname(const char *str);
|
data/ext/hash.c
CHANGED
@@ -58,7 +58,7 @@ typedef HashEntry *(*lookup_ft)(struct HashTable *ht, register const void *key);
|
|
58
58
|
* @param ht the HashTable to do the fast lookup in
|
59
59
|
* @param the hashkey we are looking for
|
60
60
|
*/
|
61
|
-
static
|
61
|
+
static __inline HashEntry *h_resize_lookup(HashTable *ht, register const ulong hash)
|
62
62
|
{
|
63
63
|
register ulong perturb;
|
64
64
|
register int mask = ht->mask;
|
data/ext/helper.c
CHANGED
data/ext/helper.h
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
|
4
4
|
#include "defines.h"
|
5
5
|
|
6
|
-
extern
|
6
|
+
extern __inline int hlp_string_diff(register const char *const s1,
|
7
7
|
register const char *const s2);
|
8
8
|
extern f_i32 float2int(float f);
|
9
9
|
extern float int2float(f_i32 i32);
|
data/ext/inc/lang.h
CHANGED
@@ -1,13 +1,19 @@
|
|
1
1
|
#ifndef FRT_LANG_H
|
2
2
|
#define FRT_LANG_H
|
3
3
|
|
4
|
-
#
|
4
|
+
#define RUBY_BINDINGS 1
|
5
|
+
|
5
6
|
#include <stdarg.h>
|
7
|
+
#include <ruby.h>
|
8
|
+
|
9
|
+
#undef close
|
10
|
+
#undef rename
|
6
11
|
|
7
12
|
#define frt_malloc xmalloc
|
8
13
|
#define frt_calloc(n) xcalloc(n, 1)
|
9
14
|
#define frt_realloc xrealloc
|
10
15
|
|
16
|
+
|
11
17
|
#ifdef FRT_HAS_ISO_VARARGS
|
12
18
|
/* C99-compliant compiler */
|
13
19
|
|
data/ext/ind.c
CHANGED
@@ -108,7 +108,7 @@ void index_flush(Index *self)
|
|
108
108
|
self->has_writes = false;
|
109
109
|
}
|
110
110
|
|
111
|
-
|
111
|
+
__inline void ensure_writer_open(Index *self)
|
112
112
|
{
|
113
113
|
if (!self->iw) {
|
114
114
|
INDEX_CLOSE_READER(self);
|
@@ -124,7 +124,7 @@ inline void ensure_writer_open(Index *self)
|
|
124
124
|
}
|
125
125
|
}
|
126
126
|
|
127
|
-
|
127
|
+
__inline void ensure_reader_open(Index *self)
|
128
128
|
{
|
129
129
|
if (self->ir) {
|
130
130
|
if (self->check_latest && !ir_is_latest(self->ir)) {
|
@@ -140,7 +140,7 @@ inline void ensure_reader_open(Index *self)
|
|
140
140
|
}
|
141
141
|
}
|
142
142
|
|
143
|
-
|
143
|
+
__inline void ensure_searcher_open(Index *self)
|
144
144
|
{
|
145
145
|
ensure_reader_open(self);
|
146
146
|
if (!self->sea) {
|
@@ -187,7 +187,7 @@ bool index_is_deleted(Index *self, int doc_num)
|
|
187
187
|
return is_del;
|
188
188
|
}
|
189
189
|
|
190
|
-
static
|
190
|
+
static __inline void index_add_doc_i(Index *self, Document *doc)
|
191
191
|
{
|
192
192
|
/* If there is a key specified delete the document with the same key */
|
193
193
|
if (self->key) {
|
data/ext/ind.h
CHANGED
@@ -61,8 +61,8 @@ extern Explanation *index_explain(Index *self, Query *q, int doc_num);
|
|
61
61
|
extern void index_auto_flush_ir(Index *self);
|
62
62
|
extern void index_auto_flush_iw(Index *self);
|
63
63
|
|
64
|
-
extern
|
65
|
-
extern
|
66
|
-
extern
|
64
|
+
extern __inline void ensure_searcher_open(Index *self);
|
65
|
+
extern __inline void ensure_reader_open(Index *self);
|
66
|
+
extern __inline void ensure_writer_open(Index *self);
|
67
67
|
|
68
68
|
#endif
|
data/ext/index.c
CHANGED
@@ -91,7 +91,7 @@ HashTable *co_hash_create()
|
|
91
91
|
*
|
92
92
|
****************************************************************************/
|
93
93
|
|
94
|
-
|
94
|
+
__inline void fi_set_store(FieldInfo *fi, int store)
|
95
95
|
{
|
96
96
|
switch (store) {
|
97
97
|
case STORE_NO:
|
@@ -105,7 +105,7 @@ inline void fi_set_store(FieldInfo *fi, int store)
|
|
105
105
|
}
|
106
106
|
}
|
107
107
|
|
108
|
-
|
108
|
+
__inline void fi_set_index(FieldInfo *fi, int index)
|
109
109
|
{
|
110
110
|
switch (index) {
|
111
111
|
case INDEX_NO:
|
@@ -126,7 +126,7 @@ inline void fi_set_index(FieldInfo *fi, int index)
|
|
126
126
|
}
|
127
127
|
}
|
128
128
|
|
129
|
-
|
129
|
+
__inline void fi_set_term_vector(FieldInfo *fi, int term_vector)
|
130
130
|
{
|
131
131
|
switch (term_vector) {
|
132
132
|
case TERM_VECTOR_NO:
|
@@ -1071,7 +1071,7 @@ void fw_close(FieldsWriter *fw)
|
|
1071
1071
|
free(fw);
|
1072
1072
|
}
|
1073
1073
|
|
1074
|
-
static
|
1074
|
+
static __inline void save_data(OutStream *fdt_out, char *data, int dlen)
|
1075
1075
|
{
|
1076
1076
|
os_write_vint(fdt_out, dlen);
|
1077
1077
|
os_write_bytes(fdt_out, (uchar *)data, dlen);
|
@@ -1184,7 +1184,7 @@ void fw_add_postings(FieldsWriter *fw,
|
|
1184
1184
|
|
1185
1185
|
if (fi_store_offsets(fi)) {
|
1186
1186
|
/* use delta encoding for offsets */
|
1187
|
-
int last_end = 0;
|
1187
|
+
int last_end = 0;
|
1188
1188
|
os_write_vint(fdt_out, offset_count); /* write shared prefix length */
|
1189
1189
|
for (i = 0; i < offset_count; i++) {
|
1190
1190
|
int start = offsets[i].start;
|
@@ -1274,7 +1274,7 @@ static void sti_ensure_index_is_read(SegmentTermIndex *sti,
|
|
1274
1274
|
for (i = 0; NULL != ste_next(index_te); i++) {
|
1275
1275
|
#ifdef DEBUG
|
1276
1276
|
if (i >= index_size) {
|
1277
|
-
RAISE(
|
1277
|
+
RAISE(FERRET_ERROR, "index term enum read too many terms");
|
1278
1278
|
}
|
1279
1279
|
#endif
|
1280
1280
|
sti->index_terms[i] = te_get_term(index_te);
|
@@ -1367,7 +1367,7 @@ void sfi_close(SegmentFieldIndex *sfi)
|
|
1367
1367
|
* SegmentTermEnum
|
1368
1368
|
****************************************************************************/
|
1369
1369
|
|
1370
|
-
static
|
1370
|
+
static __inline int term_read(char *buf, InStream *is)
|
1371
1371
|
{
|
1372
1372
|
int start = (int)is_read_vint(is);
|
1373
1373
|
int length = (int)is_read_vint(is);
|
@@ -1444,6 +1444,10 @@ static char *ste_scan_to(TermEnum *te, const char *term)
|
|
1444
1444
|
SegmentTermIndex *sti = h_get_int(sfi->field_dict, te->field_num);
|
1445
1445
|
if (sti && sti->size > 0) {
|
1446
1446
|
SFI_ENSURE_INDEX_IS_READ(sfi, sti);
|
1447
|
+
if (term[0] == '\0') {
|
1448
|
+
ste_index_seek(te, sti, 0);
|
1449
|
+
return ste_next(te);;
|
1450
|
+
}
|
1447
1451
|
/* if current term is less than seek term */
|
1448
1452
|
if (STE(te)->pos < STE(te)->size && strcmp(te->curr_term, term) <= 0) {
|
1449
1453
|
int enum_offset = (int)(STE(te)->pos / sfi->index_interval) + 1;
|
@@ -1748,7 +1752,7 @@ TermEnum *mte_new(MultiReader *mr, int field_num, const char *term)
|
|
1748
1752
|
|
1749
1753
|
tew = tew_setup(&(mte->tews[i]), starts[i], sub_te, reader);
|
1750
1754
|
if (((term == NULL) && tew_next(tew))
|
1751
|
-
|| (tew->term && tew->term[0] != '\0')) {
|
1755
|
+
|| (tew->term && (tew->term[0] != '\0'))) {
|
1752
1756
|
pq_push(mte->tew_queue, tew); /* initialize queue */
|
1753
1757
|
}
|
1754
1758
|
} else {
|
@@ -1788,11 +1792,12 @@ TermInfosReader *tir_open(Store *store,
|
|
1788
1792
|
return tir;
|
1789
1793
|
}
|
1790
1794
|
|
1791
|
-
static
|
1795
|
+
static __inline TermEnum *tir_enum(TermInfosReader *tir)
|
1792
1796
|
{
|
1793
1797
|
TermEnum *te;
|
1794
1798
|
if ((te = thread_getspecific(tir->thread_te)) == NULL) {
|
1795
1799
|
te = ste_clone(tir->orig_te);
|
1800
|
+
ste_set_field(te, tir->field_num);
|
1796
1801
|
ary_push(tir->te_bucket, te);
|
1797
1802
|
thread_setspecific(tir->thread_te, te);
|
1798
1803
|
}
|
@@ -1913,7 +1918,7 @@ TermInfosWriter *tiw_open(Store *store,
|
|
1913
1918
|
return tiw;
|
1914
1919
|
}
|
1915
1920
|
|
1916
|
-
static
|
1921
|
+
static __inline void tw_write_term(TermWriter *tw,
|
1917
1922
|
OutStream *os,
|
1918
1923
|
const char *term,
|
1919
1924
|
int term_len)
|
@@ -1987,7 +1992,7 @@ void tiw_add(TermInfosWriter *tiw,
|
|
1987
1992
|
}
|
1988
1993
|
}
|
1989
1994
|
|
1990
|
-
static
|
1995
|
+
static __inline void tw_reset(TermWriter *tw)
|
1991
1996
|
{
|
1992
1997
|
tw->counter = 0;
|
1993
1998
|
tw->last_term = EMPTY_STRING;
|
@@ -3085,8 +3090,8 @@ bool ir_is_latest(IndexReader *ir)
|
|
3085
3090
|
Lock *commit_lock = ir->store->open_lock(ir->store, COMMIT_LOCK_NAME);
|
3086
3091
|
if (!commit_lock->obtain(commit_lock)) {
|
3087
3092
|
ir->store->close_lock(commit_lock);
|
3088
|
-
RAISE(LOCK_ERROR, "Error
|
3089
|
-
|
3093
|
+
RAISE(LOCK_ERROR, "Error detecting if the current index is latest "
|
3094
|
+
"version. Commit lock currently obtained");
|
3090
3095
|
}
|
3091
3096
|
is_latest = (sis_read_current_version(ir->store) == ir->sis->version);
|
3092
3097
|
commit_lock->release(commit_lock);
|
@@ -3180,7 +3185,7 @@ typedef struct SegmentReader {
|
|
3180
3185
|
#define SR(ir) ((SegmentReader *)(ir))
|
3181
3186
|
#define SR_SIZE(ir) (SR(ir)->fr->size)
|
3182
3187
|
|
3183
|
-
static
|
3188
|
+
static __inline FieldsReader *sr_fr(SegmentReader *sr)
|
3184
3189
|
{
|
3185
3190
|
FieldsReader *fr;
|
3186
3191
|
|
@@ -3192,12 +3197,12 @@ static inline FieldsReader *sr_fr(SegmentReader *sr)
|
|
3192
3197
|
return fr;
|
3193
3198
|
}
|
3194
3199
|
|
3195
|
-
static
|
3200
|
+
static __inline bool sr_is_deleted_i(SegmentReader *sr, int doc_num)
|
3196
3201
|
{
|
3197
3202
|
return (sr->deleted_docs != NULL && bv_get(sr->deleted_docs, doc_num));
|
3198
3203
|
}
|
3199
3204
|
|
3200
|
-
static
|
3205
|
+
static __inline void sr_get_norms_into_i(SegmentReader *sr, int field_num,
|
3201
3206
|
uchar *buf)
|
3202
3207
|
{
|
3203
3208
|
Norm *norm = h_get_int(sr->norms, field_num);
|
@@ -3216,7 +3221,7 @@ static inline void sr_get_norms_into_i(SegmentReader *sr, int field_num,
|
|
3216
3221
|
}
|
3217
3222
|
}
|
3218
3223
|
|
3219
|
-
static
|
3224
|
+
static __inline uchar *sr_get_norms_i(SegmentReader *sr, int field_num)
|
3220
3225
|
{
|
3221
3226
|
Norm *norm = h_get_int(sr->norms, field_num);
|
3222
3227
|
if (norm == NULL) { /* not an indexed field */
|
@@ -3491,16 +3496,16 @@ static bool sr_has_deletions(IndexReader *ir)
|
|
3491
3496
|
static void sr_open_norms(IndexReader *ir, Store *cfs_store)
|
3492
3497
|
{
|
3493
3498
|
int i;
|
3499
|
+
Store *store = ir->store;
|
3494
3500
|
char file_name[SEGMENT_NAME_MAX_LENGTH];
|
3495
3501
|
FieldInfos *fis = ir->fis;
|
3496
3502
|
char *ext_ptr;
|
3497
|
-
const int field_cnt = fis->size;
|
3498
|
-
|
3503
|
+
const int field_cnt = fis->size;
|
3504
|
+
|
3499
3505
|
sprintf(file_name, "%s.", SR(ir)->segment);
|
3500
3506
|
ext_ptr = file_name + strlen(file_name);
|
3501
3507
|
|
3502
3508
|
for (i = 0; i < field_cnt; i++) {
|
3503
|
-
Store *store = ir->store;
|
3504
3509
|
if (fi_has_norms(fis->fields[i])) {
|
3505
3510
|
sprintf(ext_ptr, "s%d", i);
|
3506
3511
|
if (!store->exists(store, file_name)) {
|
@@ -3939,8 +3944,8 @@ IndexReader *mr_open(IndexReader **sub_readers, const int r_cnt)
|
|
3939
3944
|
for (i = 0; i < r_cnt; i++) {
|
3940
3945
|
FieldInfos *sub_fis = sub_readers[i]->fis;
|
3941
3946
|
const int fis_size = fis->size;
|
3942
|
-
|
3943
|
-
mr->field_num_map[i] = ALLOC_N(int, fis_size);
|
3947
|
+
|
3948
|
+
mr->field_num_map[i] = ALLOC_N(int, fis_size);
|
3944
3949
|
for (j = 0; j < fis_size; j++) {
|
3945
3950
|
FieldInfo *fi = fis->fields[j];
|
3946
3951
|
FieldInfo *fi_sub = fis_get_field(sub_fis, fi->name);
|
@@ -4384,7 +4389,7 @@ static void dw_add_posting(MemoryPool *mp,
|
|
4384
4389
|
}
|
4385
4390
|
}
|
4386
4391
|
|
4387
|
-
static
|
4392
|
+
static __inline void dw_add_offsets(DocWriter *dw, int pos, int start, int end)
|
4388
4393
|
{
|
4389
4394
|
if (pos >= dw->offsets_capa) {
|
4390
4395
|
int old_capa = dw->offsets_capa;
|
@@ -5050,7 +5055,8 @@ static char **iw_create_compound_file(Store *store, FieldInfos *fis,
|
|
5050
5055
|
|
5051
5056
|
cw = open_cw(store, cfs_file_name);
|
5052
5057
|
for (i = 0; i < NELEMS(COMPOUND_EXTENSIONS); i++) {
|
5053
|
-
sprintf(file_name, "%s.%s",
|
5058
|
+
sprintf(file_name, "%s.%s",
|
5059
|
+
segment, COMPOUND_EXTENSIONS[i]);
|
5054
5060
|
ary_push(file_names, estrdup(file_name));
|
5055
5061
|
}
|
5056
5062
|
|
@@ -5196,7 +5202,8 @@ static void iw_maybe_merge_segments(IndexWriter *iw)
|
|
5196
5202
|
int min_segment, merge_docs;
|
5197
5203
|
SegmentInfo *si;
|
5198
5204
|
|
5199
|
-
while (target_merge_docs
|
5205
|
+
while (target_merge_docs > 0
|
5206
|
+
&& target_merge_docs <= iw->config.max_merge_docs) {
|
5200
5207
|
/* find segments smaller than current target size */
|
5201
5208
|
min_segment = iw->sis->size - 1;
|
5202
5209
|
merge_docs = 0;
|
@@ -5212,7 +5219,7 @@ static void iw_maybe_merge_segments(IndexWriter *iw)
|
|
5212
5219
|
if (merge_docs >= target_merge_docs) { /* found a merge to do */
|
5213
5220
|
iw_merge_segments_from(iw, min_segment + 1);
|
5214
5221
|
}
|
5215
|
-
else {
|
5222
|
+
else if (min_segment <= 0) {
|
5216
5223
|
break;
|
5217
5224
|
}
|
5218
5225
|
|
data/ext/index.h
CHANGED
@@ -471,7 +471,7 @@ typedef struct Posting
|
|
471
471
|
struct Posting *next;
|
472
472
|
} Posting;
|
473
473
|
|
474
|
-
extern
|
474
|
+
extern __inline Posting *p_new(MemoryPool *mp, int doc_num, int pos);
|
475
475
|
|
476
476
|
/****************************************************************************
|
477
477
|
*
|
data/ext/lang.h
CHANGED
@@ -1,13 +1,19 @@
|
|
1
1
|
#ifndef FRT_LANG_H
|
2
2
|
#define FRT_LANG_H
|
3
3
|
|
4
|
-
#
|
4
|
+
#define RUBY_BINDINGS 1
|
5
|
+
|
5
6
|
#include <stdarg.h>
|
7
|
+
#include <ruby.h>
|
8
|
+
|
9
|
+
#undef close
|
10
|
+
#undef rename
|
6
11
|
|
7
12
|
#define frt_malloc xmalloc
|
8
13
|
#define frt_calloc(n) xcalloc(n, 1)
|
9
14
|
#define frt_realloc xrealloc
|
10
15
|
|
16
|
+
|
11
17
|
#ifdef FRT_HAS_ISO_VARARGS
|
12
18
|
/* C99-compliant compiler */
|
13
19
|
|
data/ext/mem_pool.c
CHANGED
data/ext/mem_pool.h
CHANGED
@@ -16,7 +16,7 @@ typedef struct MemoryPool {
|
|
16
16
|
|
17
17
|
extern MemoryPool *mp_new();
|
18
18
|
extern MemoryPool *mp_new_capa(int chunk_size, int init_capa);
|
19
|
-
extern
|
19
|
+
extern __inline void *mp_alloc(MemoryPool *mp, int size);
|
20
20
|
extern void mp_reset(MemoryPool *mp);
|
21
21
|
extern void mp_destroy(MemoryPool *mp);
|
22
22
|
extern char *mp_strdup(MemoryPool *mp, const char *str);
|
data/ext/q_fuzzy.c
CHANGED
@@ -11,7 +11,7 @@
|
|
11
11
|
*
|
12
12
|
****************************************************************************/
|
13
13
|
|
14
|
-
static
|
14
|
+
static __inline int fuzq_calculate_max_distance(FuzzyQuery *fuzq, int m)
|
15
15
|
{
|
16
16
|
return (int)((1.0 - fuzq->min_sim) * (MIN(fuzq->text_len, m) + fuzq->pre_len));
|
17
17
|
}
|
@@ -24,7 +24,7 @@ static void fuzq_initialize_max_distances(FuzzyQuery *fuzq)
|
|
24
24
|
}
|
25
25
|
}
|
26
26
|
|
27
|
-
static
|
27
|
+
static __inline int fuzq_get_max_distance(FuzzyQuery *fuzq, int m)
|
28
28
|
{
|
29
29
|
return (m < TYPICAL_LONGEST_WORD) ? fuzq->max_distances[m]
|
30
30
|
: fuzq_calculate_max_distance(fuzq, m);
|
data/ext/q_match_all.c
CHANGED
@@ -114,9 +114,9 @@ char *maq_to_s(Query *self, const char *field)
|
|
114
114
|
{
|
115
115
|
(void)field;
|
116
116
|
if (self->boost == 1.0) {
|
117
|
-
return estrdup("
|
117
|
+
return estrdup("*");
|
118
118
|
} else {
|
119
|
-
return strfmt("
|
119
|
+
return strfmt("*^%f", self->boost);
|
120
120
|
}
|
121
121
|
}
|
122
122
|
|
data/ext/q_multi_term.c
CHANGED
@@ -236,7 +236,7 @@ static bool multi_tsc_advance_to(Scorer *self, int target_doc_num)
|
|
236
236
|
return (pq_top(tdew_pq) == NULL) ? false : true;
|
237
237
|
}
|
238
238
|
|
239
|
-
static
|
239
|
+
static __inline bool multi_tsc_skip_to(Scorer *self, int target_doc_num)
|
240
240
|
{
|
241
241
|
return multi_tsc_advance_to(self, target_doc_num) && multi_tsc_next(self);
|
242
242
|
}
|
data/ext/q_parser.c
CHANGED
@@ -56,7 +56,7 @@
|
|
56
56
|
/* Put the tokens into the symbol table, so that GDB and other debuggers
|
57
57
|
know about them. */
|
58
58
|
enum yytokentype {
|
59
|
-
|
59
|
+
QWRD = 258,
|
60
60
|
WILD_STR = 259,
|
61
61
|
LOW = 260,
|
62
62
|
OR = 261,
|
@@ -67,7 +67,7 @@
|
|
67
67
|
};
|
68
68
|
#endif
|
69
69
|
/* Tokens. */
|
70
|
-
#define
|
70
|
+
#define QWRD 258
|
71
71
|
#define WILD_STR 259
|
72
72
|
#define LOW 260
|
73
73
|
#define OR 261
|
@@ -133,7 +133,7 @@ typedef union YYSTYPE {
|
|
133
133
|
char *str;
|
134
134
|
} YYSTYPE;
|
135
135
|
/* Line 196 of yacc.c. */
|
136
|
-
#line 137 "
|
136
|
+
#line 137 "y.tab.c"
|
137
137
|
# define yystype YYSTYPE /* obsolescent; will be withdrawn */
|
138
138
|
# define YYSTYPE_IS_DECLARED 1
|
139
139
|
# define YYSTYPE_IS_TRIVIAL 1
|
@@ -197,7 +197,7 @@ static Query *get_range_q(const char *field, const char *from, const char *to,
|
|
197
197
|
|
198
198
|
|
199
199
|
/* Line 219 of yacc.c. */
|
200
|
-
#line 201 "
|
200
|
+
#line 201 "y.tab.c"
|
201
201
|
|
202
202
|
#if ! defined (YYSIZE_T) && defined (__SIZE_TYPE__)
|
203
203
|
# define YYSIZE_T __SIZE_TYPE__
|
@@ -450,7 +450,7 @@ static const unsigned char yyrline[] =
|
|
450
450
|
First, the terminals, then, starting at YYNTOKENS, nonterminals. */
|
451
451
|
static const char *const yytname[] =
|
452
452
|
{
|
453
|
-
"$end", "error", "$undefined", "
|
453
|
+
"$end", "error", "$undefined", "QWRD", "WILD_STR", "LOW", "OR", "AND",
|
454
454
|
"NOT", "REQ", "':'", "HIGH", "'^'", "'('", "')'", "'~'", "'*'", "'|'",
|
455
455
|
"'\"'", "'<'", "'>'", "'['", "']'", "'}'", "'{'", "'='", "$accept",
|
456
456
|
"bool_q", "bool_clss", "bool_cls", "boosted_q", "q", "term_q", "wild_q",
|
@@ -1250,217 +1250,217 @@ yyreduce:
|
|
1250
1250
|
{
|
1251
1251
|
case 2:
|
1252
1252
|
#line 99 "src/q_parser.y"
|
1253
|
-
{ qp->result = (yyval.query) = NULL;
|
1253
|
+
{ qp->result = (yyval.query) = NULL; }
|
1254
1254
|
break;
|
1255
1255
|
|
1256
1256
|
case 3:
|
1257
1257
|
#line 100 "src/q_parser.y"
|
1258
|
-
{ qp->result = (yyval.query) = get_bool_q((yyvsp[0].bclss));
|
1258
|
+
{ qp->result = (yyval.query) = get_bool_q((yyvsp[0].bclss)); }
|
1259
1259
|
break;
|
1260
1260
|
|
1261
1261
|
case 4:
|
1262
1262
|
#line 102 "src/q_parser.y"
|
1263
|
-
{ (yyval.bclss) = first_cls((yyvsp[0].bcls));
|
1263
|
+
{ (yyval.bclss) = first_cls((yyvsp[0].bcls)); }
|
1264
1264
|
break;
|
1265
1265
|
|
1266
1266
|
case 5:
|
1267
1267
|
#line 103 "src/q_parser.y"
|
1268
|
-
{ (yyval.bclss) = add_and_cls((yyvsp[-2].bclss), (yyvsp[0].bcls));
|
1268
|
+
{ (yyval.bclss) = add_and_cls((yyvsp[-2].bclss), (yyvsp[0].bcls)); }
|
1269
1269
|
break;
|
1270
1270
|
|
1271
1271
|
case 6:
|
1272
1272
|
#line 104 "src/q_parser.y"
|
1273
|
-
{ (yyval.bclss) = add_or_cls((yyvsp[-2].bclss), (yyvsp[0].bcls));
|
1273
|
+
{ (yyval.bclss) = add_or_cls((yyvsp[-2].bclss), (yyvsp[0].bcls)); }
|
1274
1274
|
break;
|
1275
1275
|
|
1276
1276
|
case 7:
|
1277
1277
|
#line 105 "src/q_parser.y"
|
1278
|
-
{ (yyval.bclss) = add_default_cls(qp, (yyvsp[-1].bclss), (yyvsp[0].bcls));
|
1278
|
+
{ (yyval.bclss) = add_default_cls(qp, (yyvsp[-1].bclss), (yyvsp[0].bcls)); }
|
1279
1279
|
break;
|
1280
1280
|
|
1281
1281
|
case 8:
|
1282
1282
|
#line 107 "src/q_parser.y"
|
1283
|
-
{ (yyval.bcls) = get_bool_cls((yyvsp[0].query), BC_MUST);
|
1283
|
+
{ (yyval.bcls) = get_bool_cls((yyvsp[0].query), BC_MUST); }
|
1284
1284
|
break;
|
1285
1285
|
|
1286
1286
|
case 9:
|
1287
1287
|
#line 108 "src/q_parser.y"
|
1288
|
-
{ (yyval.bcls) = get_bool_cls((yyvsp[0].query), BC_MUST_NOT);
|
1288
|
+
{ (yyval.bcls) = get_bool_cls((yyvsp[0].query), BC_MUST_NOT); }
|
1289
1289
|
break;
|
1290
1290
|
|
1291
1291
|
case 10:
|
1292
1292
|
#line 109 "src/q_parser.y"
|
1293
|
-
{ (yyval.bcls) = get_bool_cls((yyvsp[0].query), BC_SHOULD);
|
1293
|
+
{ (yyval.bcls) = get_bool_cls((yyvsp[0].query), BC_SHOULD); }
|
1294
1294
|
break;
|
1295
1295
|
|
1296
1296
|
case 12:
|
1297
1297
|
#line 112 "src/q_parser.y"
|
1298
|
-
{ if ((yyvsp[-2].query)) sscanf((yyvsp[0].str),"%f",&((yyvsp[-2].query)->boost)); (yyval.query)=(yyvsp[-2].query);
|
1298
|
+
{ if ((yyvsp[-2].query)) sscanf((yyvsp[0].str),"%f",&((yyvsp[-2].query)->boost)); (yyval.query)=(yyvsp[-2].query); }
|
1299
1299
|
break;
|
1300
1300
|
|
1301
1301
|
case 14:
|
1302
1302
|
#line 115 "src/q_parser.y"
|
1303
|
-
{ (yyval.query) = get_bool_q((yyvsp[-1].bclss));
|
1303
|
+
{ (yyval.query) = get_bool_q((yyvsp[-1].bclss)); }
|
1304
1304
|
break;
|
1305
1305
|
|
1306
1306
|
case 19:
|
1307
1307
|
#line 121 "src/q_parser.y"
|
1308
|
-
{ FLDS((yyval.query), get_term_q(qp, field, (yyvsp[0].str)));
|
1308
|
+
{ FLDS((yyval.query), get_term_q(qp, field, (yyvsp[0].str))); }
|
1309
1309
|
break;
|
1310
1310
|
|
1311
1311
|
case 20:
|
1312
1312
|
#line 122 "src/q_parser.y"
|
1313
|
-
{ FLDS((yyval.query), get_fuzzy_q(qp, field, (yyvsp[-2].str), (yyvsp[0].str)));
|
1313
|
+
{ FLDS((yyval.query), get_fuzzy_q(qp, field, (yyvsp[-2].str), (yyvsp[0].str))); }
|
1314
1314
|
break;
|
1315
1315
|
|
1316
1316
|
case 21:
|
1317
1317
|
#line 123 "src/q_parser.y"
|
1318
|
-
{ FLDS((yyval.query), get_fuzzy_q(qp, field, (yyvsp[-1].str), NULL));
|
1318
|
+
{ FLDS((yyval.query), get_fuzzy_q(qp, field, (yyvsp[-1].str), NULL)); }
|
1319
1319
|
break;
|
1320
1320
|
|
1321
1321
|
case 22:
|
1322
1322
|
#line 125 "src/q_parser.y"
|
1323
|
-
{ FLDS((yyval.query), get_wild_q(qp, field, (yyvsp[0].str)));
|
1323
|
+
{ FLDS((yyval.query), get_wild_q(qp, field, (yyvsp[0].str))); }
|
1324
1324
|
break;
|
1325
1325
|
|
1326
1326
|
case 23:
|
1327
1327
|
#line 127 "src/q_parser.y"
|
1328
|
-
{ qp->fields = qp->def_fields;
|
1328
|
+
{ qp->fields = qp->def_fields; }
|
1329
1329
|
break;
|
1330
1330
|
|
1331
1331
|
case 24:
|
1332
1332
|
#line 128 "src/q_parser.y"
|
1333
|
-
{ (yyval.query) = (yyvsp[-1].query);
|
1333
|
+
{ (yyval.query) = (yyvsp[-1].query); }
|
1334
1334
|
break;
|
1335
1335
|
|
1336
1336
|
case 25:
|
1337
1337
|
#line 129 "src/q_parser.y"
|
1338
|
-
{ qp->fields = qp->all_fields;
|
1338
|
+
{ qp->fields = qp->all_fields; }
|
1339
1339
|
break;
|
1340
1340
|
|
1341
1341
|
case 26:
|
1342
1342
|
#line 129 "src/q_parser.y"
|
1343
|
-
{qp->fields = qp->def_fields
|
1343
|
+
{qp->fields = qp->def_fields;}
|
1344
1344
|
break;
|
1345
1345
|
|
1346
1346
|
case 27:
|
1347
1347
|
#line 130 "src/q_parser.y"
|
1348
|
-
{ (yyval.query) = (yyvsp[-1].query);
|
1348
|
+
{ (yyval.query) = (yyvsp[-1].query); }
|
1349
1349
|
break;
|
1350
1350
|
|
1351
1351
|
case 28:
|
1352
1352
|
#line 132 "src/q_parser.y"
|
1353
|
-
{ (yyval.hashset) = first_field(qp, (yyvsp[0].str));
|
1353
|
+
{ (yyval.hashset) = first_field(qp, (yyvsp[0].str)); }
|
1354
1354
|
break;
|
1355
1355
|
|
1356
1356
|
case 29:
|
1357
1357
|
#line 133 "src/q_parser.y"
|
1358
|
-
{ (yyval.hashset) = add_field(qp, (yyvsp[0].str))
|
1358
|
+
{ (yyval.hashset) = add_field(qp, (yyvsp[0].str));}
|
1359
1359
|
break;
|
1360
1360
|
|
1361
1361
|
case 30:
|
1362
1362
|
#line 135 "src/q_parser.y"
|
1363
|
-
{ (yyval.query) = get_phrase_q(qp, (yyvsp[-1].phrase), NULL);
|
1363
|
+
{ (yyval.query) = get_phrase_q(qp, (yyvsp[-1].phrase), NULL); }
|
1364
1364
|
break;
|
1365
1365
|
|
1366
1366
|
case 31:
|
1367
1367
|
#line 136 "src/q_parser.y"
|
1368
|
-
{ (yyval.query) = get_phrase_q(qp, (yyvsp[-3].phrase), (yyvsp[0].str));
|
1368
|
+
{ (yyval.query) = get_phrase_q(qp, (yyvsp[-3].phrase), (yyvsp[0].str)); }
|
1369
1369
|
break;
|
1370
1370
|
|
1371
1371
|
case 32:
|
1372
1372
|
#line 137 "src/q_parser.y"
|
1373
|
-
{ (yyval.query) = NULL;
|
1373
|
+
{ (yyval.query) = NULL; }
|
1374
1374
|
break;
|
1375
1375
|
|
1376
1376
|
case 33:
|
1377
1377
|
#line 138 "src/q_parser.y"
|
1378
|
-
{ (yyval.query) = NULL;
|
1378
|
+
{ (yyval.query) = NULL; }
|
1379
1379
|
break;
|
1380
1380
|
|
1381
1381
|
case 34:
|
1382
1382
|
#line 140 "src/q_parser.y"
|
1383
|
-
{ (yyval.phrase) = ph_first_word((yyvsp[0].str));
|
1383
|
+
{ (yyval.phrase) = ph_first_word((yyvsp[0].str)); }
|
1384
1384
|
break;
|
1385
1385
|
|
1386
1386
|
case 35:
|
1387
1387
|
#line 141 "src/q_parser.y"
|
1388
|
-
{ (yyval.phrase) = ph_first_word(NULL);
|
1388
|
+
{ (yyval.phrase) = ph_first_word(NULL); }
|
1389
1389
|
break;
|
1390
1390
|
|
1391
1391
|
case 36:
|
1392
1392
|
#line 142 "src/q_parser.y"
|
1393
|
-
{ (yyval.phrase) = ph_add_word((yyvsp[-1].phrase), (yyvsp[0].str));
|
1393
|
+
{ (yyval.phrase) = ph_add_word((yyvsp[-1].phrase), (yyvsp[0].str)); }
|
1394
1394
|
break;
|
1395
1395
|
|
1396
1396
|
case 37:
|
1397
1397
|
#line 143 "src/q_parser.y"
|
1398
|
-
{ (yyval.phrase) = ph_add_word((yyvsp[-2].phrase), NULL);
|
1398
|
+
{ (yyval.phrase) = ph_add_word((yyvsp[-2].phrase), NULL); }
|
1399
1399
|
break;
|
1400
1400
|
|
1401
1401
|
case 38:
|
1402
1402
|
#line 144 "src/q_parser.y"
|
1403
|
-
{ (yyval.phrase) = ph_add_multi_word((yyvsp[-2].phrase), (yyvsp[0].str));
|
1403
|
+
{ (yyval.phrase) = ph_add_multi_word((yyvsp[-2].phrase), (yyvsp[0].str)); }
|
1404
1404
|
break;
|
1405
1405
|
|
1406
1406
|
case 39:
|
1407
1407
|
#line 146 "src/q_parser.y"
|
1408
|
-
{ FLDS((yyval.query), get_range_q(field, (yyvsp[-2].str), (yyvsp[-1].str), true, true));
|
1408
|
+
{ FLDS((yyval.query), get_range_q(field, (yyvsp[-2].str), (yyvsp[-1].str), true, true)); }
|
1409
1409
|
break;
|
1410
1410
|
|
1411
1411
|
case 40:
|
1412
1412
|
#line 147 "src/q_parser.y"
|
1413
|
-
{ FLDS((yyval.query), get_range_q(field, (yyvsp[-2].str), (yyvsp[-1].str), true, false));
|
1413
|
+
{ FLDS((yyval.query), get_range_q(field, (yyvsp[-2].str), (yyvsp[-1].str), true, false)); }
|
1414
1414
|
break;
|
1415
1415
|
|
1416
1416
|
case 41:
|
1417
1417
|
#line 148 "src/q_parser.y"
|
1418
|
-
{ FLDS((yyval.query), get_range_q(field, (yyvsp[-2].str), (yyvsp[-1].str), false, true));
|
1418
|
+
{ FLDS((yyval.query), get_range_q(field, (yyvsp[-2].str), (yyvsp[-1].str), false, true)); }
|
1419
1419
|
break;
|
1420
1420
|
|
1421
1421
|
case 42:
|
1422
1422
|
#line 149 "src/q_parser.y"
|
1423
|
-
{ FLDS((yyval.query), get_range_q(field, (yyvsp[-2].str), (yyvsp[-1].str), false, false));
|
1423
|
+
{ FLDS((yyval.query), get_range_q(field, (yyvsp[-2].str), (yyvsp[-1].str), false, false)); }
|
1424
1424
|
break;
|
1425
1425
|
|
1426
1426
|
case 43:
|
1427
1427
|
#line 150 "src/q_parser.y"
|
1428
|
-
{ FLDS((yyval.query), get_range_q(field, NULL,(yyvsp[-1].str), false, false));
|
1428
|
+
{ FLDS((yyval.query), get_range_q(field, NULL,(yyvsp[-1].str), false, false)); }
|
1429
1429
|
break;
|
1430
1430
|
|
1431
1431
|
case 44:
|
1432
1432
|
#line 151 "src/q_parser.y"
|
1433
|
-
{ FLDS((yyval.query), get_range_q(field, NULL,(yyvsp[-1].str), false, true));
|
1433
|
+
{ FLDS((yyval.query), get_range_q(field, NULL,(yyvsp[-1].str), false, true)); }
|
1434
1434
|
break;
|
1435
1435
|
|
1436
1436
|
case 45:
|
1437
1437
|
#line 152 "src/q_parser.y"
|
1438
|
-
{ FLDS((yyval.query), get_range_q(field, (yyvsp[-1].str), NULL,true, false));
|
1438
|
+
{ FLDS((yyval.query), get_range_q(field, (yyvsp[-1].str), NULL,true, false)); }
|
1439
1439
|
break;
|
1440
1440
|
|
1441
1441
|
case 46:
|
1442
1442
|
#line 153 "src/q_parser.y"
|
1443
|
-
{ FLDS((yyval.query), get_range_q(field, (yyvsp[-1].str), NULL,false, false));
|
1443
|
+
{ FLDS((yyval.query), get_range_q(field, (yyvsp[-1].str), NULL,false, false)); }
|
1444
1444
|
break;
|
1445
1445
|
|
1446
1446
|
case 47:
|
1447
1447
|
#line 154 "src/q_parser.y"
|
1448
|
-
{ FLDS((yyval.query), get_range_q(field, NULL,(yyvsp[0].str), false, false));
|
1448
|
+
{ FLDS((yyval.query), get_range_q(field, NULL,(yyvsp[0].str), false, false)); }
|
1449
1449
|
break;
|
1450
1450
|
|
1451
1451
|
case 48:
|
1452
1452
|
#line 155 "src/q_parser.y"
|
1453
|
-
{ FLDS((yyval.query), get_range_q(field, NULL,(yyvsp[0].str), false, true));
|
1453
|
+
{ FLDS((yyval.query), get_range_q(field, NULL,(yyvsp[0].str), false, true)); }
|
1454
1454
|
break;
|
1455
1455
|
|
1456
1456
|
case 49:
|
1457
1457
|
#line 156 "src/q_parser.y"
|
1458
|
-
{ FLDS((yyval.query), get_range_q(field, (yyvsp[0].str), NULL,true, false));
|
1458
|
+
{ FLDS((yyval.query), get_range_q(field, (yyvsp[0].str), NULL,true, false)); }
|
1459
1459
|
break;
|
1460
1460
|
|
1461
1461
|
case 50:
|
1462
1462
|
#line 157 "src/q_parser.y"
|
1463
|
-
{ FLDS((yyval.query), get_range_q(field, (yyvsp[0].str), NULL,false, false));
|
1463
|
+
{ FLDS((yyval.query), get_range_q(field, (yyvsp[0].str), NULL,false, false)); }
|
1464
1464
|
break;
|
1465
1465
|
|
1466
1466
|
|
@@ -1468,7 +1468,7 @@ yyreduce:
|
|
1468
1468
|
}
|
1469
1469
|
|
1470
1470
|
/* Line 1126 of yacc.c. */
|
1471
|
-
#line 1472 "
|
1471
|
+
#line 1472 "y.tab.c"
|
1472
1472
|
|
1473
1473
|
yyvsp -= yylen;
|
1474
1474
|
yyssp -= yylen;
|
@@ -1785,8 +1785,10 @@ static int get_word(YYSTYPE *lvalp, QParser *qp)
|
|
1785
1785
|
|
1786
1786
|
/* found a word so return it. */
|
1787
1787
|
lvalp->str = buf;
|
1788
|
-
if (is_wild)
|
1789
|
-
|
1788
|
+
if (is_wild) {
|
1789
|
+
return WILD_STR;
|
1790
|
+
}
|
1791
|
+
return QWRD;
|
1790
1792
|
}
|
1791
1793
|
|
1792
1794
|
static int yylex(YYSTYPE *lvalp, QParser *qp)
|
@@ -1806,6 +1808,8 @@ static int yylex(YYSTYPE *lvalp, QParser *qp)
|
|
1806
1808
|
case '*':
|
1807
1809
|
if (nc == ':') return c;
|
1808
1810
|
break;
|
1811
|
+
case '?':
|
1812
|
+
break;
|
1809
1813
|
case '&':
|
1810
1814
|
if (nc == '&') {
|
1811
1815
|
qp->qstrp++;
|
@@ -2046,7 +2050,11 @@ static Query *get_wild_q(QParser *qp, char *field, char *pattern)
|
|
2046
2050
|
|
2047
2051
|
/* simplify the wildcard query to a prefix query if possible. Basically a
|
2048
2052
|
* prefix query is any wildcard query that has a '*' as the last character
|
2049
|
-
* and no other wildcard characters before it.
|
2053
|
+
* and no other wildcard characters before it. "*" by itself will expand
|
2054
|
+
* to a MatchAllQuery */
|
2055
|
+
if (strcmp(pattern, "*") == 0) {
|
2056
|
+
return maq_new();
|
2057
|
+
}
|
2050
2058
|
if (pattern[len - 1] == '*') {
|
2051
2059
|
is_prefix = true;
|
2052
2060
|
for (p = &pattern[len - 2]; p >= pattern; p--) {
|