whistlepig 0.7 → 0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +2 -2
- data/ext/whistlepig/dump.c +65 -0
- data/ext/whistlepig/extconf.rb +1 -1
- data/ext/whistlepig/query.c +12 -2
- data/ext/whistlepig/query.h +4 -1
- data/ext/whistlepig/search.c +1 -0
- data/ext/whistlepig/segment.c +35 -7
- data/ext/whistlepig/test-segment.c +404 -0
- data/ext/whistlepig/test-stringmap.c +82 -0
- data/ext/whistlepig/test-stringpool.c +67 -0
- data/ext/whistlepig/test-termhash.c +95 -0
- data/ext/whistlepig/test-tokenizer.c +55 -0
- data/ext/whistlepig/test.h +38 -0
- data/ext/whistlepig/timer.h +28 -0
- data/ext/whistlepig/{whistlepigc.c → whistlepig.c} +24 -1
- data/lib/whistlepig.rb +1 -1
- metadata +12 -4
data/README
CHANGED
@@ -8,8 +8,8 @@ the frills, Whistlepig may be for you.
|
|
8
8
|
Whistlepig is written in ANSI C99. It currently provides a C API and Ruby
|
9
9
|
bindings.
|
10
10
|
|
11
|
-
Latest version: 0.
|
12
|
-
Status:
|
11
|
+
Latest version: 0.8, released 2012-03-13.
|
12
|
+
Status: beta
|
13
13
|
News: http://all-thing.net/label/whistlepig/
|
14
14
|
Homepage: http://masanjin.net/whistlepig/
|
15
15
|
Bug reports: http://github.com/wmorgan/whistlepig/issues
|
@@ -0,0 +1,65 @@
|
|
1
|
+
#include <stdio.h>
|
2
|
+
#include "whistlepig.h"
|
3
|
+
|
4
|
+
#define isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)
|
5
|
+
#define isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)
|
6
|
+
#define KEY(h, i) &(h->pool[h->keys[i]])
|
7
|
+
|
8
|
+
RAISING_STATIC(dump_posting_list(wp_segment* s, uint32_t offset)) {
|
9
|
+
posting po;
|
10
|
+
|
11
|
+
while(offset != OFFSET_NONE) {
|
12
|
+
RELAY_ERROR(wp_segment_read_posting(s, offset, &po, 1));
|
13
|
+
|
14
|
+
printf(" @%u doc %u:", offset, po.doc_id);
|
15
|
+
for(uint32_t i = 0; i < po.num_positions; i++) {
|
16
|
+
printf(" %d", po.positions[i]);
|
17
|
+
}
|
18
|
+
printf("\n");
|
19
|
+
|
20
|
+
offset = po.next_offset;
|
21
|
+
free(po.positions);
|
22
|
+
}
|
23
|
+
|
24
|
+
return NO_ERROR;
|
25
|
+
}
|
26
|
+
|
27
|
+
RAISING_STATIC(dump(wp_segment* segment)) {
|
28
|
+
termhash* th = MMAP_OBJ(segment->termhash, termhash);
|
29
|
+
stringmap* sh = MMAP_OBJ(segment->stringmap, stringmap);
|
30
|
+
|
31
|
+
for(uint32_t i = 0; i < th->n_buckets; i++) {
|
32
|
+
if(isempty(th->flags, i)); // do nothing
|
33
|
+
else if(isdel(th->flags, i)) printf("%u: [deleted]", i);
|
34
|
+
else {
|
35
|
+
term t = th->keys[i];
|
36
|
+
const char* field = stringmap_int_to_string(sh, t.field_s);
|
37
|
+
const char* word = stringmap_int_to_string(sh, t.word_s);
|
38
|
+
printf("%u: %s:'%s'\n", i, field, word);
|
39
|
+
RELAY_ERROR(dump_posting_list(segment, th->vals[i]));
|
40
|
+
}
|
41
|
+
}
|
42
|
+
|
43
|
+
return NO_ERROR;
|
44
|
+
}
|
45
|
+
|
46
|
+
int main(int argc, char* argv[]) {
|
47
|
+
if(argc != 2) {
|
48
|
+
fprintf(stderr, "Usage: %s <segment filename>\n", argv[0]);
|
49
|
+
return -1;
|
50
|
+
}
|
51
|
+
|
52
|
+
wp_index* index;
|
53
|
+
DIE_IF_ERROR(wp_index_load(&index, argv[1]));
|
54
|
+
DIE_IF_ERROR(wp_index_dumpinfo(index, stdout));
|
55
|
+
|
56
|
+
for(int i = 0; i < index->num_segments; i++) {
|
57
|
+
printf("\nsegment %d details:\n", i);
|
58
|
+
DIE_IF_ERROR(dump(&index->segments[i]));
|
59
|
+
}
|
60
|
+
|
61
|
+
DIE_IF_ERROR(wp_index_unload(index));
|
62
|
+
|
63
|
+
return 0;
|
64
|
+
}
|
65
|
+
|
data/ext/whistlepig/extconf.rb
CHANGED
data/ext/whistlepig/query.c
CHANGED
@@ -12,7 +12,17 @@ static wp_query* wp_query_new() {
|
|
12
12
|
return ret;
|
13
13
|
}
|
14
14
|
|
15
|
+
static const char* identity(const char* field, const char* word) {
|
16
|
+
(void)field;
|
17
|
+
if(word) return strdup(word);
|
18
|
+
else return NULL;
|
19
|
+
}
|
20
|
+
|
15
21
|
wp_query* wp_query_clone(wp_query* other) {
|
22
|
+
return wp_query_substitute(other, identity);
|
23
|
+
}
|
24
|
+
|
25
|
+
wp_query* wp_query_substitute(wp_query* other, const char *(*substituter)(const char* field, const char* word)) {
|
16
26
|
wp_query* ret = malloc(sizeof(wp_query));
|
17
27
|
ret->type = other->type;
|
18
28
|
ret->num_children = other->num_children;
|
@@ -21,12 +31,12 @@ wp_query* wp_query_clone(wp_query* other) {
|
|
21
31
|
if(other->field) ret->field = strdup(other->field);
|
22
32
|
else ret->field = NULL;
|
23
33
|
|
24
|
-
if(other->word) ret->word =
|
34
|
+
if(other->field && other->word) ret->word = substituter(other->field, other->word);
|
25
35
|
else ret->word = NULL;
|
26
36
|
|
27
37
|
ret->children = ret->next = ret->last = NULL; // set below
|
28
38
|
for(wp_query* child = other->children; child != NULL; child = child->next) {
|
29
|
-
wp_query* clone =
|
39
|
+
wp_query* clone = wp_query_substitute(child, substituter);
|
30
40
|
if(ret->last == NULL) ret->children = ret->last = clone;
|
31
41
|
else {
|
32
42
|
ret->last->next = clone;
|
data/ext/whistlepig/query.h
CHANGED
@@ -64,9 +64,12 @@ wp_query* wp_query_new_empty();
|
|
64
64
|
// public: make an every-document query node.
|
65
65
|
wp_query* wp_query_new_every();
|
66
66
|
|
67
|
-
// public: deep clone of a query,
|
67
|
+
// public: deep clone of a query, dropping all search state.
|
68
68
|
wp_query* wp_query_clone(wp_query* other);
|
69
69
|
|
70
|
+
// public: build a new query by substituting words from the old query, dropping all search state
|
71
|
+
wp_query* wp_query_substitute(wp_query* other, const char *(*substituter)(const char* field, const char* word));
|
72
|
+
|
70
73
|
// public: add a query node as a child of another
|
71
74
|
wp_query* wp_query_add(wp_query* a, wp_query* b);
|
72
75
|
|
data/ext/whistlepig/search.c
CHANGED
@@ -793,6 +793,7 @@ wp_error* wp_search_run_query_on_segment(struct wp_query* q, struct wp_segment*
|
|
793
793
|
#endif
|
794
794
|
|
795
795
|
while(*num_results < max_num_results) {
|
796
|
+
DEBUG("got %d results so far (max is %d)", *num_results, max_num_results);
|
796
797
|
RELAY_ERROR(query_next_doc(q, s, &results[*num_results], &done));
|
797
798
|
if(done) break;
|
798
799
|
DEBUG("got result %u (%u doc matches)", results[*num_results].doc_id, results[*num_results].num_doc_matches);
|
data/ext/whistlepig/segment.c
CHANGED
@@ -396,6 +396,9 @@ wp_error* wp_segment_read_posting(wp_segment* s, uint32_t offset, posting* po, i
|
|
396
396
|
wp_error* wp_segment_add_posting(wp_segment* s, const char* field, const char* word, docid_t doc_id, uint32_t num_positions, pos_t positions[]) {
|
397
397
|
// TODO move this logic up to ensure_fit()
|
398
398
|
int success;
|
399
|
+
|
400
|
+
if(doc_id == 0) RAISE_ERROR("can't add a label to doc 0");
|
401
|
+
|
399
402
|
RELAY_ERROR(bump_stringmap(s, &success));
|
400
403
|
RELAY_ERROR(bump_stringpool(s, &success));
|
401
404
|
RELAY_ERROR(bump_termhash(s, &success));
|
@@ -465,11 +468,14 @@ wp_error* wp_segment_read_label(wp_segment* s, uint32_t offset, posting* po) {
|
|
465
468
|
wp_error* wp_segment_add_label(wp_segment* s, const char* label, docid_t doc_id) {
|
466
469
|
// TODO move this logic up to ensure_fit()
|
467
470
|
int success;
|
471
|
+
|
472
|
+
if(doc_id == 0) RAISE_ERROR("can't add a label to doc 0");
|
473
|
+
|
468
474
|
RELAY_ERROR(bump_stringmap(s, &success));
|
469
475
|
RELAY_ERROR(bump_stringpool(s, &success));
|
470
476
|
RELAY_ERROR(bump_termhash(s, &success));
|
471
477
|
|
472
|
-
DEBUG("adding label %s to doc %u", label, doc_id);
|
478
|
+
DEBUG("adding label '%s' to doc %u", label, doc_id);
|
473
479
|
|
474
480
|
postings_region* pr = MMAP_OBJ(s->labels, postings_region);
|
475
481
|
stringmap* sh = MMAP_OBJ(s->stringmap, stringmap);
|
@@ -485,22 +491,34 @@ wp_error* wp_segment_add_label(wp_segment* s, const char* label, docid_t doc_id)
|
|
485
491
|
// posting
|
486
492
|
uint32_t prev_offset = OFFSET_NONE;
|
487
493
|
uint32_t next_offset = termhash_get_val(th, t);
|
494
|
+
docid_t last_docid = DOCID_NONE;
|
495
|
+
|
488
496
|
if(next_offset == (uint32_t)-1) next_offset = OFFSET_NONE;
|
497
|
+
DEBUG("start offset is %u (none is %u)", next_offset, OFFSET_NONE);
|
489
498
|
|
490
499
|
while(next_offset != OFFSET_NONE) {
|
491
|
-
label_posting*
|
492
|
-
|
500
|
+
label_posting* lp = wp_segment_label_posting_at(pr, next_offset);
|
501
|
+
|
502
|
+
if((last_docid != DOCID_NONE) && (lp->doc_id >= last_docid)) {
|
503
|
+
RAISE_ERROR("whistlepig index corruption! lp %u has docid %u but last docid at lp %u was %u", next_offset, lp->doc_id, prev_offset, last_docid);
|
504
|
+
}
|
505
|
+
else {
|
506
|
+
last_docid = lp->doc_id;
|
507
|
+
}
|
508
|
+
|
509
|
+
DEBUG("got doc id %u next_offset %u at offset %u (looking for doc id %u)", lp->doc_id, lp->next_offset, next_offset, doc_id);
|
510
|
+
if(lp->doc_id == doc_id) {
|
493
511
|
DEBUG("already have label '%s' for doc %u; returning", label, doc_id);
|
494
512
|
return NO_ERROR;
|
495
513
|
}
|
496
|
-
else if(
|
514
|
+
else if(lp->doc_id < doc_id) break;
|
497
515
|
prev_offset = next_offset;
|
498
|
-
next_offset =
|
516
|
+
next_offset = lp->next_offset;
|
499
517
|
}
|
500
518
|
|
501
519
|
// find a space for the posting by first checking for a free postings in the
|
502
|
-
// dead list.
|
503
|
-
//
|
520
|
+
// dead list. the dead list is the list stored under the sentinel term with
|
521
|
+
// field 0 and word 0.
|
504
522
|
term dead_term = { .field_s = 0, .word_s = 0 };
|
505
523
|
uint32_t entry_offset;
|
506
524
|
uint32_t dead_offset = termhash_get_val(th, dead_term);
|
@@ -550,11 +568,21 @@ wp_error* wp_segment_remove_label(wp_segment* s, const char* label, docid_t doc_
|
|
550
568
|
// find the posting and the previous posting in the list, if any
|
551
569
|
uint32_t prev_offset = OFFSET_NONE;
|
552
570
|
uint32_t offset = termhash_get_val(th, t);
|
571
|
+
docid_t last_docid = DOCID_NONE;
|
572
|
+
|
553
573
|
if(offset == (uint32_t)-1) offset = OFFSET_NONE;
|
554
574
|
label_posting* lp = NULL;
|
555
575
|
|
556
576
|
while(offset != OFFSET_NONE) {
|
557
577
|
lp = wp_segment_label_posting_at(pr, offset);
|
578
|
+
|
579
|
+
if((last_docid != DOCID_NONE) && (lp->doc_id >= last_docid)) {
|
580
|
+
RAISE_ERROR("whistlepig index corruption! lp %u has docid %u but last docid at lp %u was %u", offset, lp->doc_id, prev_offset, last_docid);
|
581
|
+
}
|
582
|
+
else {
|
583
|
+
last_docid = lp->doc_id;
|
584
|
+
}
|
585
|
+
|
558
586
|
if(lp->doc_id < doc_id) offset = OFFSET_NONE; // nasty hack to induce failure
|
559
587
|
if(lp->doc_id <= doc_id) break;
|
560
588
|
prev_offset = offset;
|
@@ -0,0 +1,404 @@
|
|
1
|
+
#include "test.h"
|
2
|
+
#include "segment.h"
|
3
|
+
#include "tokenizer.lex.h"
|
4
|
+
#include "query.h"
|
5
|
+
#include "index.h"
|
6
|
+
|
7
|
+
#define SEGMENT_PATH "/tmp/segment-test"
|
8
|
+
|
9
|
+
wp_error* setup(wp_segment* segment) {
|
10
|
+
RELAY_ERROR(wp_segment_delete(SEGMENT_PATH));
|
11
|
+
RELAY_ERROR(wp_segment_create(segment, SEGMENT_PATH));
|
12
|
+
return NO_ERROR;
|
13
|
+
}
|
14
|
+
|
15
|
+
#define ADD_DOC(word, pos) \
|
16
|
+
positions[0] = pos; \
|
17
|
+
RELAY_ERROR(wp_segment_ensure_fit(segment, postings_bytes, 0, &success)); \
|
18
|
+
if(success != 1) RAISE_ERROR("couldn't ensure segment fit"); \
|
19
|
+
RELAY_ERROR(wp_segment_add_posting(segment, "body", word, doc_id, 1, positions));
|
20
|
+
|
21
|
+
wp_error* add_docs(wp_segment* segment) {
|
22
|
+
docid_t doc_id;
|
23
|
+
pos_t positions[10];
|
24
|
+
uint32_t postings_bytes;
|
25
|
+
int success;
|
26
|
+
|
27
|
+
RELAY_ERROR(wp_segment_sizeof_posarray(segment, 1, NULL, &postings_bytes));
|
28
|
+
|
29
|
+
RELAY_ERROR(wp_segment_grab_docid(segment, &doc_id));
|
30
|
+
ADD_DOC("one", 0);
|
31
|
+
ADD_DOC("two", 1);
|
32
|
+
ADD_DOC("three", 2);
|
33
|
+
|
34
|
+
RELAY_ERROR(wp_segment_grab_docid(segment, &doc_id));
|
35
|
+
ADD_DOC("two", 0);
|
36
|
+
ADD_DOC("three", 1);
|
37
|
+
ADD_DOC("four", 2);
|
38
|
+
|
39
|
+
RELAY_ERROR(wp_segment_grab_docid(segment, &doc_id));
|
40
|
+
ADD_DOC("three", 0);
|
41
|
+
ADD_DOC("four", 1);
|
42
|
+
ADD_DOC("five", 2);
|
43
|
+
|
44
|
+
return NO_ERROR;
|
45
|
+
}
|
46
|
+
|
47
|
+
TEST(initial_state) {
|
48
|
+
wp_segment segment;
|
49
|
+
RELAY_ERROR(setup(&segment));
|
50
|
+
|
51
|
+
postings_region* pr = MMAP_OBJ(segment.postings, postings_region);
|
52
|
+
ASSERT(pr->num_docs == 0);
|
53
|
+
ASSERT(pr->num_postings == 0);
|
54
|
+
|
55
|
+
RELAY_ERROR(wp_segment_unload(&segment));
|
56
|
+
return NO_ERROR;
|
57
|
+
}
|
58
|
+
|
59
|
+
TEST(adding_a_doc_increments_counts) {
|
60
|
+
wp_segment segment;
|
61
|
+
pos_t positions[10];
|
62
|
+
docid_t doc_id;
|
63
|
+
|
64
|
+
RELAY_ERROR(setup(&segment));
|
65
|
+
RELAY_ERROR(wp_segment_grab_docid(&segment, &doc_id));
|
66
|
+
|
67
|
+
positions[0] = 0;
|
68
|
+
RELAY_ERROR(wp_segment_add_posting(&segment, "body", "hello", doc_id, 1, positions));
|
69
|
+
positions[0] = 1;
|
70
|
+
RELAY_ERROR(wp_segment_add_posting(&segment, "body", "there", doc_id, 1, positions));
|
71
|
+
|
72
|
+
postings_region* pr = MMAP_OBJ(segment.postings, postings_region);
|
73
|
+
ASSERT(pr->num_docs == 1);
|
74
|
+
ASSERT(pr->num_postings == 2);
|
75
|
+
|
76
|
+
RELAY_ERROR(wp_segment_unload(&segment));
|
77
|
+
return NO_ERROR;
|
78
|
+
}
|
79
|
+
|
80
|
+
#define RUN_QUERY(query) \
|
81
|
+
RELAY_ERROR(wp_search_init_search_state(query, &segment)); \
|
82
|
+
RELAY_ERROR(wp_search_run_query_on_segment(query, &segment, 10, &num_results, &results[0])); \
|
83
|
+
RELAY_ERROR(wp_search_release_search_state(query));
|
84
|
+
|
85
|
+
TEST(simple_term_queries) {
|
86
|
+
wp_segment segment;
|
87
|
+
uint32_t num_results;
|
88
|
+
search_result results[10];
|
89
|
+
wp_query* query;
|
90
|
+
|
91
|
+
RELAY_ERROR(setup(&segment));
|
92
|
+
RELAY_ERROR(add_docs(&segment));
|
93
|
+
|
94
|
+
query = wp_query_new_term("body", "one");
|
95
|
+
RUN_QUERY(query);
|
96
|
+
|
97
|
+
ASSERT(num_results == 1);
|
98
|
+
ASSERT(results[0].doc_id == 1);
|
99
|
+
|
100
|
+
query = wp_query_new_term("body", "two");
|
101
|
+
RUN_QUERY(query);
|
102
|
+
|
103
|
+
ASSERT(num_results == 2);
|
104
|
+
ASSERT(results[0].doc_id == 2);
|
105
|
+
ASSERT(results[1].doc_id == 1);
|
106
|
+
|
107
|
+
RELAY_ERROR(wp_segment_unload(&segment));
|
108
|
+
return NO_ERROR;
|
109
|
+
}
|
110
|
+
|
111
|
+
TEST(simple_conjunctive_queries) {
|
112
|
+
wp_segment segment;
|
113
|
+
uint32_t num_results;
|
114
|
+
search_result results[10];
|
115
|
+
wp_query* query;
|
116
|
+
|
117
|
+
RELAY_ERROR(setup(&segment));
|
118
|
+
RELAY_ERROR(add_docs(&segment));
|
119
|
+
|
120
|
+
query = wp_query_new_conjunction();
|
121
|
+
query = wp_query_add(query, wp_query_new_term("body", "one"));
|
122
|
+
query = wp_query_add(query, wp_query_new_term("body", "two"));
|
123
|
+
|
124
|
+
RUN_QUERY(query);
|
125
|
+
|
126
|
+
ASSERT(num_results == 1);
|
127
|
+
ASSERT(results[0].doc_id == 1);
|
128
|
+
|
129
|
+
query = wp_query_new_conjunction();
|
130
|
+
query = wp_query_add(query, wp_query_new_term("body", "four"));
|
131
|
+
query = wp_query_add(query, wp_query_new_term("body", "two"));
|
132
|
+
|
133
|
+
RUN_QUERY(query);
|
134
|
+
|
135
|
+
ASSERT(num_results == 1);
|
136
|
+
ASSERT(results[0].doc_id == 2);
|
137
|
+
|
138
|
+
// <empty>
|
139
|
+
query = wp_query_new_conjunction();
|
140
|
+
RUN_QUERY(query);
|
141
|
+
ASSERT(num_results == 0);
|
142
|
+
|
143
|
+
RELAY_ERROR(wp_segment_unload(&segment));
|
144
|
+
return NO_ERROR;
|
145
|
+
}
|
146
|
+
|
147
|
+
TEST(simple_phrasal_queries) {
|
148
|
+
wp_segment segment;
|
149
|
+
uint32_t num_results;
|
150
|
+
search_result results[10];
|
151
|
+
wp_query* query;
|
152
|
+
|
153
|
+
RELAY_ERROR(setup(&segment));
|
154
|
+
RELAY_ERROR(add_docs(&segment));
|
155
|
+
|
156
|
+
query = wp_query_new_phrase();
|
157
|
+
query = wp_query_add(query, wp_query_new_term("body", "one"));
|
158
|
+
query = wp_query_add(query, wp_query_new_term("body", "two"));
|
159
|
+
RUN_QUERY(query);
|
160
|
+
ASSERT(num_results == 1);
|
161
|
+
ASSERT(results[0].doc_id == 1);
|
162
|
+
|
163
|
+
query = wp_query_new_phrase();
|
164
|
+
query = wp_query_add(query, wp_query_new_term("body", "two"));
|
165
|
+
query = wp_query_add(query, wp_query_new_term("body", "one"));
|
166
|
+
RUN_QUERY(query);
|
167
|
+
ASSERT(num_results == 0);
|
168
|
+
|
169
|
+
query = wp_query_new_phrase();
|
170
|
+
query = wp_query_add(query, wp_query_new_term("body", "two"));
|
171
|
+
query = wp_query_add(query, wp_query_new_term("body", "three"));
|
172
|
+
RUN_QUERY(query);
|
173
|
+
ASSERT(num_results == 2);
|
174
|
+
ASSERT(results[0].doc_id == 2);
|
175
|
+
ASSERT(results[1].doc_id == 1);
|
176
|
+
|
177
|
+
query = wp_query_new_phrase();
|
178
|
+
query = wp_query_add(query, wp_query_new_term("body", "one"));
|
179
|
+
query = wp_query_add(query, wp_query_new_term("body", "two"));
|
180
|
+
query = wp_query_add(query, wp_query_new_term("body", "three"));
|
181
|
+
RUN_QUERY(query);
|
182
|
+
ASSERT(num_results == 1);
|
183
|
+
ASSERT(results[0].doc_id == 1);
|
184
|
+
|
185
|
+
RELAY_ERROR(wp_segment_unload(&segment));
|
186
|
+
return NO_ERROR;
|
187
|
+
}
|
188
|
+
|
189
|
+
TEST(segment_conjuction_of_phrase_queries) {
|
190
|
+
wp_segment segment;
|
191
|
+
uint32_t num_results;
|
192
|
+
search_result results[10];
|
193
|
+
wp_query* query;
|
194
|
+
wp_query* subquery;
|
195
|
+
|
196
|
+
RELAY_ERROR(setup(&segment));
|
197
|
+
RELAY_ERROR(add_docs(&segment));
|
198
|
+
|
199
|
+
// one "two three"
|
200
|
+
subquery = wp_query_new_phrase();
|
201
|
+
subquery = wp_query_add(subquery, wp_query_new_term("body", "two"));
|
202
|
+
subquery = wp_query_add(subquery, wp_query_new_term("body", "three"));
|
203
|
+
query = wp_query_new_conjunction();
|
204
|
+
query = wp_query_add(query, wp_query_new_term("body", "one"));
|
205
|
+
query = wp_query_add(query, subquery);
|
206
|
+
|
207
|
+
RUN_QUERY(query);
|
208
|
+
ASSERT(num_results == 1);
|
209
|
+
ASSERT(results[0].doc_id == 1);
|
210
|
+
|
211
|
+
// "two three" one
|
212
|
+
subquery = wp_query_new_phrase();
|
213
|
+
subquery = wp_query_add(subquery, wp_query_new_term("body", "two"));
|
214
|
+
subquery = wp_query_add(subquery, wp_query_new_term("body", "three"));
|
215
|
+
query = wp_query_new_conjunction();
|
216
|
+
query = wp_query_add(query, subquery);
|
217
|
+
query = wp_query_add(query, wp_query_new_term("body", "one"));
|
218
|
+
|
219
|
+
RUN_QUERY(query);
|
220
|
+
ASSERT(num_results == 1);
|
221
|
+
ASSERT(results[0].doc_id == 1);
|
222
|
+
|
223
|
+
// one "three two"
|
224
|
+
subquery = wp_query_new_phrase();
|
225
|
+
subquery = wp_query_add(subquery, wp_query_new_term("body", "three"));
|
226
|
+
subquery = wp_query_add(subquery, wp_query_new_term("body", "two"));
|
227
|
+
query = wp_query_new_conjunction();
|
228
|
+
query = wp_query_add(query, wp_query_new_term("body", "one"));
|
229
|
+
query = wp_query_add(query, subquery);
|
230
|
+
|
231
|
+
RUN_QUERY(query);
|
232
|
+
ASSERT(num_results == 0);
|
233
|
+
|
234
|
+
// two "two three"
|
235
|
+
subquery = wp_query_new_phrase();
|
236
|
+
subquery = wp_query_add(subquery, wp_query_new_term("body", "two"));
|
237
|
+
subquery = wp_query_add(subquery, wp_query_new_term("body", "three"));
|
238
|
+
query = wp_query_new_conjunction();
|
239
|
+
query = wp_query_add(query, wp_query_new_term("body", "two"));
|
240
|
+
query = wp_query_add(query, subquery);
|
241
|
+
|
242
|
+
RUN_QUERY(query);
|
243
|
+
ASSERT(num_results == 2);
|
244
|
+
ASSERT(results[0].doc_id == 2);
|
245
|
+
ASSERT(results[1].doc_id == 1);
|
246
|
+
|
247
|
+
RELAY_ERROR(wp_segment_unload(&segment));
|
248
|
+
return NO_ERROR;
|
249
|
+
}
|
250
|
+
|
251
|
+
TEST(negation_queries) {
|
252
|
+
wp_segment segment;
|
253
|
+
uint32_t num_results;
|
254
|
+
search_result results[10];
|
255
|
+
wp_query* query;
|
256
|
+
wp_query* subquery;
|
257
|
+
|
258
|
+
RELAY_ERROR(setup(&segment));
|
259
|
+
RELAY_ERROR(add_docs(&segment));
|
260
|
+
|
261
|
+
// one "two three"
|
262
|
+
subquery = wp_query_new_phrase();
|
263
|
+
subquery = wp_query_add(subquery, wp_query_new_term("body", "two"));
|
264
|
+
subquery = wp_query_add(subquery, wp_query_new_term("body", "three"));
|
265
|
+
query = wp_query_new_conjunction();
|
266
|
+
query = wp_query_add(query, wp_query_new_term("body", "one"));
|
267
|
+
query = wp_query_add(query, subquery);
|
268
|
+
|
269
|
+
RUN_QUERY(query);
|
270
|
+
ASSERT(num_results == 1);
|
271
|
+
ASSERT(results[0].doc_id == 1);
|
272
|
+
|
273
|
+
// "two three" one
|
274
|
+
subquery = wp_query_new_phrase();
|
275
|
+
subquery = wp_query_add(subquery, wp_query_new_term("body", "two"));
|
276
|
+
subquery = wp_query_add(subquery, wp_query_new_term("body", "three"));
|
277
|
+
query = wp_query_new_conjunction();
|
278
|
+
query = wp_query_add(query, subquery);
|
279
|
+
query = wp_query_add(query, wp_query_new_term("body", "one"));
|
280
|
+
|
281
|
+
RUN_QUERY(query);
|
282
|
+
ASSERT(num_results == 1);
|
283
|
+
ASSERT(results[0].doc_id == 1);
|
284
|
+
|
285
|
+
// one "three two"
|
286
|
+
subquery = wp_query_new_phrase();
|
287
|
+
subquery = wp_query_add(subquery, wp_query_new_term("body", "three"));
|
288
|
+
subquery = wp_query_add(subquery, wp_query_new_term("body", "two"));
|
289
|
+
query = wp_query_new_conjunction();
|
290
|
+
query = wp_query_add(query, wp_query_new_term("body", "one"));
|
291
|
+
query = wp_query_add(query, subquery);
|
292
|
+
|
293
|
+
RUN_QUERY(query);
|
294
|
+
ASSERT(num_results == 0);
|
295
|
+
|
296
|
+
// two "two three"
|
297
|
+
subquery = wp_query_new_phrase();
|
298
|
+
subquery = wp_query_add(subquery, wp_query_new_term("body", "two"));
|
299
|
+
subquery = wp_query_add(subquery, wp_query_new_term("body", "three"));
|
300
|
+
query = wp_query_new_conjunction();
|
301
|
+
query = wp_query_add(query, wp_query_new_term("body", "two"));
|
302
|
+
query = wp_query_add(query, subquery);
|
303
|
+
|
304
|
+
RUN_QUERY(query);
|
305
|
+
ASSERT(num_results == 2);
|
306
|
+
ASSERT(results[0].doc_id == 2);
|
307
|
+
ASSERT(results[1].doc_id == 1);
|
308
|
+
|
309
|
+
// <empty>
|
310
|
+
query = wp_query_new_conjunction();
|
311
|
+
RUN_QUERY(query);
|
312
|
+
ASSERT(num_results == 0);
|
313
|
+
|
314
|
+
// -one
|
315
|
+
subquery = wp_query_new_term("body", "one");
|
316
|
+
query = wp_query_new_negation();
|
317
|
+
query = wp_query_add(query, subquery);
|
318
|
+
RUN_QUERY(query);
|
319
|
+
ASSERT(num_results == 2);
|
320
|
+
ASSERT(results[0].doc_id == 3);
|
321
|
+
ASSERT(results[1].doc_id == 2);
|
322
|
+
|
323
|
+
// -two
|
324
|
+
subquery = wp_query_new_term("body", "two");
|
325
|
+
query = wp_query_new_negation();
|
326
|
+
query = wp_query_add(query, subquery);
|
327
|
+
RUN_QUERY(query);
|
328
|
+
ASSERT(num_results == 1);
|
329
|
+
ASSERT(results[0].doc_id == 3);
|
330
|
+
|
331
|
+
// -three
|
332
|
+
subquery = wp_query_new_term("body", "three");
|
333
|
+
query = wp_query_new_negation();
|
334
|
+
query = wp_query_add(query, subquery);
|
335
|
+
RUN_QUERY(query);
|
336
|
+
ASSERT(num_results == 0);
|
337
|
+
|
338
|
+
// -potato
|
339
|
+
subquery = wp_query_new_term("body", "potato");
|
340
|
+
query = wp_query_new_negation();
|
341
|
+
query = wp_query_add(query, subquery);
|
342
|
+
RUN_QUERY(query);
|
343
|
+
ASSERT(num_results == 3);
|
344
|
+
ASSERT(results[0].doc_id == 3);
|
345
|
+
ASSERT(results[1].doc_id == 2);
|
346
|
+
ASSERT(results[2].doc_id == 1);
|
347
|
+
|
348
|
+
// -"one two"
|
349
|
+
subquery = wp_query_new_conjunction();
|
350
|
+
subquery = wp_query_add(subquery, wp_query_new_term("body", "one"));
|
351
|
+
subquery = wp_query_add(subquery, wp_query_new_term("body", "two"));
|
352
|
+
query = wp_query_new_negation();
|
353
|
+
query = wp_query_add(query, subquery);
|
354
|
+
RUN_QUERY(query);
|
355
|
+
ASSERT(num_results == 2);
|
356
|
+
ASSERT(results[0].doc_id == 3);
|
357
|
+
ASSERT(results[1].doc_id == 2);
|
358
|
+
|
359
|
+
// -(AND one three)
|
360
|
+
subquery = wp_query_new_conjunction();
|
361
|
+
subquery = wp_query_add(subquery, wp_query_new_term("body", "one"));
|
362
|
+
subquery = wp_query_add(subquery, wp_query_new_term("body", "three"));
|
363
|
+
query = wp_query_new_negation();
|
364
|
+
query = wp_query_add(query, subquery);
|
365
|
+
RUN_QUERY(query);
|
366
|
+
ASSERT(num_results == 2);
|
367
|
+
ASSERT(results[0].doc_id == 3);
|
368
|
+
ASSERT(results[1].doc_id == 2);
|
369
|
+
|
370
|
+
// -"one three"
|
371
|
+
subquery = wp_query_new_phrase();
|
372
|
+
subquery = wp_query_add(subquery, wp_query_new_term("body", "one"));
|
373
|
+
subquery = wp_query_add(subquery, wp_query_new_term("body", "three"));
|
374
|
+
query = wp_query_new_negation();
|
375
|
+
query = wp_query_add(query, subquery);
|
376
|
+
RUN_QUERY(query);
|
377
|
+
ASSERT(num_results == 3);
|
378
|
+
|
379
|
+
// (AND -one three)
|
380
|
+
subquery = wp_query_new_negation();
|
381
|
+
subquery = wp_query_add(subquery, wp_query_new_term("body", "one"));
|
382
|
+
query = wp_query_new_conjunction();
|
383
|
+
query = wp_query_add(query, subquery);
|
384
|
+
query = wp_query_add(query, wp_query_new_term("body", "three"));
|
385
|
+
RUN_QUERY(query);
|
386
|
+
ASSERT(num_results == 2);
|
387
|
+
ASSERT(results[0].doc_id == 3);
|
388
|
+
ASSERT(results[1].doc_id == 2);
|
389
|
+
|
390
|
+
// (AND three -one)
|
391
|
+
subquery = wp_query_new_negation();
|
392
|
+
subquery = wp_query_add(subquery, wp_query_new_term("body", "one"));
|
393
|
+
query = wp_query_new_conjunction();
|
394
|
+
query = wp_query_add(query, wp_query_new_term("body", "three"));
|
395
|
+
query = wp_query_add(query, subquery);
|
396
|
+
RUN_QUERY(query);
|
397
|
+
ASSERT(num_results == 2);
|
398
|
+
ASSERT(results[0].doc_id == 3);
|
399
|
+
ASSERT(results[1].doc_id == 2);
|
400
|
+
|
401
|
+
RELAY_ERROR(wp_segment_unload(&segment));
|
402
|
+
return NO_ERROR;
|
403
|
+
}
|
404
|
+
|
@@ -0,0 +1,82 @@
|
|
1
|
+
#include <string.h>
|
2
|
+
#include "stringmap.h"
|
3
|
+
#include "error.h"
|
4
|
+
#include "test.h"
|
5
|
+
|
6
|
+
static stringmap* setup() {
|
7
|
+
stringpool* p = malloc(stringpool_initial_size());
|
8
|
+
stringpool_init(p);
|
9
|
+
stringmap* q = malloc(stringmap_initial_size());
|
10
|
+
stringmap_init(q, p);
|
11
|
+
return q;
|
12
|
+
}
|
13
|
+
|
14
|
+
TEST(stringmap_initial_state) {
|
15
|
+
stringmap* q = setup();
|
16
|
+
ASSERT(q->n_occupied == 0);
|
17
|
+
ASSERT(!stringmap_needs_bump(q));
|
18
|
+
|
19
|
+
free(q);
|
20
|
+
return NO_ERROR;
|
21
|
+
}
|
22
|
+
|
23
|
+
TEST(stringmap_lookups_on_empty) {
|
24
|
+
stringmap* q = setup();
|
25
|
+
|
26
|
+
ASSERT(stringmap_string_to_int(q, "hot potato") == (uint32_t)-1);
|
27
|
+
ASSERT(stringmap_int_to_string(q, 0) == NULL);
|
28
|
+
ASSERT(stringmap_int_to_string(q, 1234) == NULL);
|
29
|
+
|
30
|
+
free(q);
|
31
|
+
return NO_ERROR;
|
32
|
+
}
|
33
|
+
|
34
|
+
TEST(stringmap_multiple_adds) {
|
35
|
+
stringmap* q = setup();
|
36
|
+
|
37
|
+
ASSERT(stringmap_string_to_int(q, "hot potato") == (uint32_t)-1);
|
38
|
+
uint32_t x, y;
|
39
|
+
RELAY_ERROR(stringmap_add(q, "hot potato", &x));
|
40
|
+
ASSERT(x != (uint32_t)-1);
|
41
|
+
RELAY_ERROR(stringmap_add(q, "hot potato", &y));
|
42
|
+
ASSERT(y != (uint32_t)-1);
|
43
|
+
ASSERT(x == y);
|
44
|
+
|
45
|
+
free(q);
|
46
|
+
return NO_ERROR;
|
47
|
+
}
|
48
|
+
|
49
|
+
TEST(stringmap_hashing_is_preserved) {
|
50
|
+
stringmap* q = setup();
|
51
|
+
|
52
|
+
uint32_t x, y;
|
53
|
+
RELAY_ERROR(stringmap_add(q, "hello there", &x));
|
54
|
+
ASSERT(x != (uint32_t)-1);
|
55
|
+
const char* a = stringmap_int_to_string(q, x);
|
56
|
+
ASSERT(strcmp(a, "hello there") == 0);
|
57
|
+
|
58
|
+
RELAY_ERROR(stringmap_add(q, "how are you?", &y));
|
59
|
+
const char* b = stringmap_int_to_string(q, y);
|
60
|
+
ASSERT(strcmp(b, "how are you?") == 0);
|
61
|
+
|
62
|
+
ASSERT(x != y);
|
63
|
+
|
64
|
+
free(q);
|
65
|
+
return NO_ERROR;
|
66
|
+
}
|
67
|
+
|
68
|
+
TEST(stringmap_detects_out_of_room) {
|
69
|
+
stringmap* q = setup();
|
70
|
+
|
71
|
+
uint32_t x, y, z, w;
|
72
|
+
RELAY_ERROR(stringmap_add(q, "one", &x));
|
73
|
+
RELAY_ERROR(stringmap_add(q, "two", &y));
|
74
|
+
RELAY_ERROR(stringmap_add(q, "three", &z));
|
75
|
+
|
76
|
+
wp_error* e = stringmap_add(q, "four", &w);
|
77
|
+
ASSERT(e != NULL);
|
78
|
+
wp_error_free(e);
|
79
|
+
|
80
|
+
free(q);
|
81
|
+
return NO_ERROR;
|
82
|
+
}
|
@@ -0,0 +1,67 @@
|
|
1
|
+
#include <string.h>
|
2
|
+
#include "stringpool.h"
|
3
|
+
#include "error.h"
|
4
|
+
#include "test.h"
|
5
|
+
|
6
|
+
TEST(stringpool_initial_state) {
|
7
|
+
stringpool* p = malloc(stringpool_initial_size());
|
8
|
+
stringpool_init(p);
|
9
|
+
|
10
|
+
ASSERT(!stringpool_needs_bump(p));
|
11
|
+
|
12
|
+
free(p);
|
13
|
+
return NO_ERROR;
|
14
|
+
}
|
15
|
+
|
16
|
+
TEST(stringpool_add_gives_unique_ids) {
|
17
|
+
stringpool* p = malloc(stringpool_initial_size());
|
18
|
+
stringpool_init(p);
|
19
|
+
|
20
|
+
uint32_t ret1 = stringpool_add(p, "potato");
|
21
|
+
ASSERT(ret1 > 0);
|
22
|
+
|
23
|
+
uint32_t ret2 = stringpool_add(p, "monkey");
|
24
|
+
ASSERT(ret2 > 0);
|
25
|
+
|
26
|
+
ASSERT(ret1 != ret2);
|
27
|
+
|
28
|
+
free(p);
|
29
|
+
return NO_ERROR;
|
30
|
+
}
|
31
|
+
|
32
|
+
TEST(stringpool_add_gives_ids_that_lookup_returns) {
|
33
|
+
stringpool* p = malloc(stringpool_initial_size());
|
34
|
+
stringpool_init(p);
|
35
|
+
|
36
|
+
uint32_t ret;
|
37
|
+
char* s;
|
38
|
+
|
39
|
+
ret = stringpool_add(p, "potato");
|
40
|
+
s = stringpool_lookup(p, ret);
|
41
|
+
ASSERT(!strcmp(s, "potato"));
|
42
|
+
|
43
|
+
ret = stringpool_add(p, "monkey");
|
44
|
+
s = stringpool_lookup(p, ret);
|
45
|
+
ASSERT(!strcmp(s, "monkey"));
|
46
|
+
|
47
|
+
free(p);
|
48
|
+
return NO_ERROR;
|
49
|
+
}
|
50
|
+
|
51
|
+
TEST(stringpool_detects_out_of_room) {
|
52
|
+
stringpool* p = malloc(stringpool_initial_size());
|
53
|
+
stringpool_init(p);
|
54
|
+
|
55
|
+
uint32_t ret;
|
56
|
+
int times = stringpool_initial_size() / 6;
|
57
|
+
for(int i = 0; i < times - 1; i++) {
|
58
|
+
ret = stringpool_add(p, "12345");
|
59
|
+
ASSERT(ret != (uint32_t)-1);
|
60
|
+
}
|
61
|
+
|
62
|
+
ret = stringpool_add(p, "12345");
|
63
|
+
ASSERT(ret == (uint32_t)-1);
|
64
|
+
|
65
|
+
return NO_ERROR;
|
66
|
+
}
|
67
|
+
|
@@ -0,0 +1,95 @@
|
|
1
|
+
#include "termhash.h"
|
2
|
+
#include "test.h"
|
3
|
+
#include "error.h"
|
4
|
+
|
5
|
+
TEST(termhash_initial_state) {
|
6
|
+
termhash* h = malloc(termhash_initial_size());
|
7
|
+
termhash_init(h);
|
8
|
+
|
9
|
+
ASSERT(h->n_occupied == 0);
|
10
|
+
//ASSERT(!termhash_getting_full(h));
|
11
|
+
|
12
|
+
free(h);
|
13
|
+
return NO_ERROR;
|
14
|
+
}
|
15
|
+
|
16
|
+
TEST(termhash_lookups_on_empty) {
|
17
|
+
termhash* h = malloc(termhash_initial_size());
|
18
|
+
termhash_init(h);
|
19
|
+
|
20
|
+
term t1 = {0, 0};
|
21
|
+
term t2 = {10, 20};
|
22
|
+
term t3 = {123, 345};
|
23
|
+
|
24
|
+
ASSERT(termhash_get_val(h, t1) == (uint32_t)-1);
|
25
|
+
ASSERT(termhash_get_val(h, t2) == (uint32_t)-1);
|
26
|
+
ASSERT(termhash_get_val(h, t3) == (uint32_t)-1);
|
27
|
+
|
28
|
+
free(h);
|
29
|
+
return NO_ERROR;
|
30
|
+
}
|
31
|
+
|
32
|
+
TEST(termhash_overwriting) {
|
33
|
+
termhash* h = malloc(termhash_initial_size());
|
34
|
+
termhash_init(h);
|
35
|
+
|
36
|
+
term t1 = {5, 11};
|
37
|
+
|
38
|
+
ASSERT(termhash_get_val(h, t1) == (uint32_t)-1);
|
39
|
+
RELAY_ERROR(termhash_put_val(h, t1, 1234));
|
40
|
+
ASSERT(termhash_get_val(h, t1) == 1234);
|
41
|
+
|
42
|
+
RELAY_ERROR(termhash_put_val(h, t1, 2345));
|
43
|
+
ASSERT(termhash_get_val(h, t1) == 2345);
|
44
|
+
|
45
|
+
RELAY_ERROR(termhash_put_val(h, t1, 1));
|
46
|
+
ASSERT(termhash_get_val(h, t1) == 1);
|
47
|
+
|
48
|
+
free(h);
|
49
|
+
return NO_ERROR;
|
50
|
+
}
|
51
|
+
|
52
|
+
TEST(termhash_many_puts) { // try and force a resize
|
53
|
+
termhash* h = malloc(termhash_initial_size());
|
54
|
+
termhash_init(h);
|
55
|
+
|
56
|
+
term t1 = {1, 0};
|
57
|
+
|
58
|
+
for(int i = 1; i < 100; i++) {
|
59
|
+
t1.word_s = i;
|
60
|
+
RELAY_ERROR(termhash_put_val(h, t1, 1000 + i));
|
61
|
+
if(termhash_needs_bump(h)) {
|
62
|
+
h = realloc(h, termhash_next_size(h));
|
63
|
+
if(h == NULL) RAISE_SYSERROR("realloc");
|
64
|
+
termhash_setup(h);
|
65
|
+
RELAY_ERROR(termhash_bump_size(h));
|
66
|
+
}
|
67
|
+
}
|
68
|
+
|
69
|
+
t1.word_s = 55;
|
70
|
+
uint32_t v = termhash_get_val(h, t1);
|
71
|
+
ASSERT(v == 1055);
|
72
|
+
|
73
|
+
free(h);
|
74
|
+
return NO_ERROR;
|
75
|
+
}
|
76
|
+
|
77
|
+
TEST(termhash_detects_out_of_room) {
|
78
|
+
termhash* h = malloc(termhash_initial_size());
|
79
|
+
termhash_init(h);
|
80
|
+
|
81
|
+
term t = {1, 0};
|
82
|
+
|
83
|
+
for(int i = 0; i < 3; i++) {
|
84
|
+
t.word_s = i;
|
85
|
+
RELAY_ERROR(termhash_put_val(h, t, 100 + i));
|
86
|
+
}
|
87
|
+
|
88
|
+
t.word_s = 999;
|
89
|
+
wp_error* e = termhash_put_val(h, t, 999);
|
90
|
+
ASSERT(e != NULL);
|
91
|
+
wp_error_free(e);
|
92
|
+
|
93
|
+
free(h);
|
94
|
+
return NO_ERROR;
|
95
|
+
}
|
@@ -0,0 +1,55 @@
|
|
1
|
+
#include "test.h"
|
2
|
+
#include "tokenizer.lex.h"
|
3
|
+
|
4
|
+
#define ASSERT_NEXT_WORD(word) { \
|
5
|
+
int token_type = yylex(scanner); \
|
6
|
+
ASSERT(token_type == TOK_WORD); \
|
7
|
+
ASSERT(!strcmp(word, yyget_text(scanner))); \
|
8
|
+
}
|
9
|
+
|
10
|
+
#define ASSERT_DONE { \
|
11
|
+
int token_type = yylex(scanner); \
|
12
|
+
ASSERT(token_type == TOK_DONE); \
|
13
|
+
}
|
14
|
+
|
15
|
+
TEST(tokenizes_easy_words) {
|
16
|
+
yyscan_t scanner;
|
17
|
+
lexinfo charpos = {0, 0};
|
18
|
+
|
19
|
+
yylex_init_extra(&charpos, &scanner);
|
20
|
+
|
21
|
+
const char* string = "i love mice";
|
22
|
+
YY_BUFFER_STATE state = yy_scan_string(string, scanner);
|
23
|
+
|
24
|
+
ASSERT_NEXT_WORD("i");
|
25
|
+
ASSERT_NEXT_WORD("love");
|
26
|
+
ASSERT_NEXT_WORD("mice");
|
27
|
+
ASSERT_DONE;
|
28
|
+
|
29
|
+
yy_delete_buffer(state, scanner);
|
30
|
+
yylex_destroy(scanner);
|
31
|
+
|
32
|
+
return NO_ERROR;
|
33
|
+
}
|
34
|
+
|
35
|
+
TEST(strips_trailing_punctuation) {
|
36
|
+
yyscan_t scanner;
|
37
|
+
lexinfo charpos = {0, 0};
|
38
|
+
|
39
|
+
yylex_init_extra(&charpos, &scanner);
|
40
|
+
|
41
|
+
const char* string = "hey! this: you're <cool>";
|
42
|
+
YY_BUFFER_STATE state = yy_scan_string(string, scanner);
|
43
|
+
|
44
|
+
ASSERT_NEXT_WORD("hey");
|
45
|
+
ASSERT_NEXT_WORD("this");
|
46
|
+
ASSERT_NEXT_WORD("you're");
|
47
|
+
ASSERT_NEXT_WORD("cool");
|
48
|
+
ASSERT_DONE;
|
49
|
+
|
50
|
+
yy_delete_buffer(state, scanner);
|
51
|
+
yylex_destroy(scanner);
|
52
|
+
|
53
|
+
return NO_ERROR;
|
54
|
+
}
|
55
|
+
|
@@ -0,0 +1,38 @@
|
|
1
|
+
#ifndef WP_TEST_H_
|
2
|
+
#define WP_TEST_H_
|
3
|
+
|
4
|
+
// whistlepig test header file
|
5
|
+
// (c) 2011 William Morgan. See COPYING for license terms.
|
6
|
+
//
|
7
|
+
// macros for the c unit tests
|
8
|
+
|
9
|
+
#define ASSERT(x) do { \
|
10
|
+
(*asserts)++; \
|
11
|
+
if(!(x)) { \
|
12
|
+
printf("-- test failure: (" #x ") is FALSE in %s (%s:%d)\n\n", __PRETTY_FUNCTION__, __FILE__, __LINE__); \
|
13
|
+
*fail = 1; \
|
14
|
+
return NO_ERROR; \
|
15
|
+
} \
|
16
|
+
} while(0)
|
17
|
+
|
18
|
+
#define TEST(x) wp_error* test_##x(int* fail, int* asserts)
|
19
|
+
|
20
|
+
#define RUNTEST(x) do { \
|
21
|
+
int fail = 0; \
|
22
|
+
int this_asserts = 0; \
|
23
|
+
tests++; \
|
24
|
+
wp_error* err = test_##x(&fail, &this_asserts); \
|
25
|
+
asserts += this_asserts; \
|
26
|
+
if(fail) { \
|
27
|
+
printf("FAIL " #x "\n"); \
|
28
|
+
failures++; \
|
29
|
+
} \
|
30
|
+
else if(err) { \
|
31
|
+
errors++; \
|
32
|
+
printf(" ERR " #x "\n"); \
|
33
|
+
PRINT_ERROR(err, stdout); \
|
34
|
+
} \
|
35
|
+
else printf("PASS %d/%d " #x "\n", this_asserts, this_asserts); \
|
36
|
+
} while(0)
|
37
|
+
|
38
|
+
#endif
|
@@ -0,0 +1,28 @@
|
|
1
|
+
#ifndef WP_TIMER_H_
|
2
|
+
#define WP_TIMER_H_
|
3
|
+
|
4
|
+
// whistlepig main header file
|
5
|
+
// (c) 2011 William Morgan. See COPYING for license terms.
|
6
|
+
//
|
7
|
+
// just some timer macros
|
8
|
+
|
9
|
+
#include <sys/time.h>
|
10
|
+
|
11
|
+
#define TIMER(name) \
|
12
|
+
struct timeval name##_startt, name##_endt; \
|
13
|
+
long name##_elapsed;
|
14
|
+
|
15
|
+
#define START_TIMER(name) \
|
16
|
+
TIMER(name) \
|
17
|
+
gettimeofday(&name##_startt, NULL);
|
18
|
+
|
19
|
+
#define RESET_TIMER(name) gettimeofday(&name##_startt, NULL);
|
20
|
+
|
21
|
+
#define MARK_TIMER(name) \
|
22
|
+
gettimeofday(&name##_endt, NULL); \
|
23
|
+
name##_elapsed = ((name##_endt.tv_sec - name##_startt.tv_sec) * 1000) + ((name##_endt.tv_usec - name##_startt.tv_usec) / 1000);
|
24
|
+
|
25
|
+
#define TIMER_MS(name) name##_elapsed
|
26
|
+
#define TIMER_MS(name) name##_elapsed
|
27
|
+
|
28
|
+
#endif
|
@@ -367,6 +367,28 @@ static VALUE query_clone(VALUE self) {
|
|
367
367
|
return o_query;
|
368
368
|
}
|
369
369
|
|
370
|
+
static const char* yielding_substituter(const char* field, const char* term) {
|
371
|
+
VALUE result = rb_yield_values(2, rb_str_new2(field), rb_str_new2(term));
|
372
|
+
if(NIL_P(result)) return strdup(term);
|
373
|
+
else return strdup(RSTRING_PTR(result));
|
374
|
+
}
|
375
|
+
|
376
|
+
/*
|
377
|
+
* Returns a new query that's the result of applying the block to each
|
378
|
+
* word in the query. Useful for transforming queries programmatically
|
379
|
+
* after they've been parsed.
|
380
|
+
*
|
381
|
+
*/
|
382
|
+
static VALUE query_map_terms(VALUE self) {
|
383
|
+
char buf[1024];
|
384
|
+
|
385
|
+
wp_query* query; Data_Get_Struct(self, wp_query, query);
|
386
|
+
wp_query* result = wp_query_substitute(query, yielding_substituter);
|
387
|
+
|
388
|
+
VALUE o_query = Data_Wrap_Struct(c_query, NULL, wp_query_free, result);
|
389
|
+
return o_query;
|
390
|
+
}
|
391
|
+
|
370
392
|
/*
|
371
393
|
* call-seq: and(other)
|
372
394
|
*
|
@@ -504,7 +526,7 @@ static VALUE index_run_query(VALUE self, VALUE v_query, VALUE v_max_num_results)
|
|
504
526
|
return array;
|
505
527
|
}
|
506
528
|
|
507
|
-
void
|
529
|
+
void Init_whistlepig() {
|
508
530
|
VALUE m_whistlepig;
|
509
531
|
|
510
532
|
m_whistlepig = rb_define_module("Whistlepig");
|
@@ -541,6 +563,7 @@ void Init_whistlepigc() {
|
|
541
563
|
rb_define_method(c_query, "or", query_or, 1);
|
542
564
|
rb_define_method(c_query, "to_s", query_to_s, 0);
|
543
565
|
rb_define_method(c_query, "clone", query_clone, 0);
|
566
|
+
rb_define_method(c_query, "term_map", query_map_terms, 0);
|
544
567
|
rb_define_attr(c_query, "query", 1, 0);
|
545
568
|
|
546
569
|
c_error = rb_define_class_under(m_whistlepig, "Error", rb_eStandardError);
|
data/lib/whistlepig.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: whistlepig
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.8'
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2012-03-13 14:35:03.000000000 -07:00
|
13
13
|
default_executable:
|
14
14
|
dependencies: []
|
15
15
|
description: Whistlepig is a minimalist realtime full-text search index. Its goal
|
@@ -22,43 +22,51 @@ extensions:
|
|
22
22
|
- ext/whistlepig/extconf.rb
|
23
23
|
extra_rdoc_files:
|
24
24
|
- README
|
25
|
-
- ext/whistlepig/
|
25
|
+
- ext/whistlepig/whistlepig.c
|
26
26
|
files:
|
27
27
|
- README
|
28
28
|
- ext/whistlepig/extconf.rb
|
29
29
|
- lib/whistlepig.rb
|
30
30
|
- ext/whistlepig/query-parser.lex.h
|
31
31
|
- ext/whistlepig/entry.h
|
32
|
-
- ext/whistlepig/whistlepigc.c
|
33
32
|
- ext/whistlepig/stringmap.c
|
34
33
|
- ext/whistlepig/tokenizer.lex.h
|
35
34
|
- ext/whistlepig/whistlepig.h
|
36
35
|
- ext/whistlepig/error.c
|
37
36
|
- ext/whistlepig/extconf.h
|
38
37
|
- ext/whistlepig/stringmap.h
|
38
|
+
- ext/whistlepig/timer.h
|
39
39
|
- ext/whistlepig/query-parser.lex.c
|
40
40
|
- ext/whistlepig/defaults.h
|
41
41
|
- ext/whistlepig/tokenizer.lex.c
|
42
|
+
- ext/whistlepig/test-stringpool.c
|
42
43
|
- ext/whistlepig/termhash.c
|
43
44
|
- ext/whistlepig/query-parser.h
|
44
45
|
- ext/whistlepig/index.c
|
45
46
|
- ext/whistlepig/stringpool.c
|
47
|
+
- ext/whistlepig/test-termhash.c
|
46
48
|
- ext/whistlepig/query.h
|
47
49
|
- ext/whistlepig/query-parser.c
|
48
50
|
- ext/whistlepig/stringpool.h
|
49
51
|
- ext/whistlepig/mmap-obj.c
|
52
|
+
- ext/whistlepig/whistlepig.c
|
50
53
|
- ext/whistlepig/search.c
|
51
54
|
- ext/whistlepig/termhash.h
|
52
55
|
- ext/whistlepig/query.c
|
56
|
+
- ext/whistlepig/dump.c
|
53
57
|
- ext/whistlepig/query-parser.tab.h
|
58
|
+
- ext/whistlepig/test.h
|
54
59
|
- ext/whistlepig/khash.h
|
55
60
|
- ext/whistlepig/query-parser.tab.c
|
56
61
|
- ext/whistlepig/entry.c
|
57
62
|
- ext/whistlepig/index.h
|
58
63
|
- ext/whistlepig/segment.h
|
64
|
+
- ext/whistlepig/test-stringmap.c
|
59
65
|
- ext/whistlepig/mmap-obj.h
|
60
66
|
- ext/whistlepig/segment.c
|
67
|
+
- ext/whistlepig/test-segment.c
|
61
68
|
- ext/whistlepig/search.h
|
69
|
+
- ext/whistlepig/test-tokenizer.c
|
62
70
|
- ext/whistlepig/error.h
|
63
71
|
has_rdoc: true
|
64
72
|
homepage: http://masanjin.net/whistlepig
|