whistlepig 0.1 → 0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README +13 -14
- data/ext/whistlepig/entry.c +2 -2
- data/ext/whistlepig/extconf.rb +1 -1
- data/ext/whistlepig/index.c +11 -2
- data/ext/whistlepig/khash.h +9 -4
- data/ext/whistlepig/mmap-obj.c +3 -3
- data/ext/whistlepig/query-parser.lex.c +12 -11
- data/ext/whistlepig/query-parser.lex.h +1 -1
- data/ext/whistlepig/query.c +4 -9
- data/ext/whistlepig/query.h +1 -1
- data/ext/whistlepig/segment.c +9 -10
- data/ext/whistlepig/stringmap.c +7 -7
- data/ext/whistlepig/stringpool.c +4 -4
- data/ext/whistlepig/termhash.c +8 -8
- data/ext/whistlepig/tokenizer.lex.c +3 -3
- data/ext/whistlepig/whistlepig.h +3 -0
- data/ext/whistlepig/whistlepigc.c +0 -6
- metadata +6 -3
data/README
CHANGED
@@ -8,32 +8,33 @@ the frills, Whistlepig may be for you.
|
|
8
8
|
Whistlepig is written in ANSI C99. It currently provides a C API and Ruby
|
9
9
|
bindings.
|
10
10
|
|
11
|
-
Latest version: 0.
|
11
|
+
Latest version: 0.2, released 2011-02-09.
|
12
12
|
Status: alpha
|
13
13
|
News: http://all-thing.net/label/whistlepig/
|
14
14
|
Homepage: http://masanjin.net/whistlepig/
|
15
|
+
Bug reports: http://github.com/wmorgan/whistlepig/issues
|
15
16
|
|
16
17
|
= Getting it
|
17
18
|
|
18
|
-
Tarball: http://masanjin.net/whistlepig/whistlepig-0.
|
19
|
+
Tarball: http://masanjin.net/whistlepig/whistlepig-0.2.tar.gz
|
19
20
|
Rubygem: gem install whistlepig
|
20
21
|
Git: git clone git://github.com/wmorgan/whistlepig.git
|
21
22
|
|
22
23
|
= Realtime search
|
23
24
|
|
24
25
|
Roughly speaking, realtime search means:
|
25
|
-
- documents are available to to queries immediately after indexing, without
|
26
|
-
|
26
|
+
- documents are available to to queries immediately after indexing, without any
|
27
|
+
reindexing or index merging;
|
27
28
|
- later documents are more important than earlier documents.
|
28
29
|
|
29
|
-
Whistlepig takes these principles to an extreme.
|
30
|
-
- It only returns documents in the reverse order to which they were
|
31
|
-
|
30
|
+
Whistlepig takes these principles to an extreme.
|
31
|
+
- It only returns documents in the reverse (LIFO) order to which they were
|
32
|
+
added, and performs no ranking, reordering, or scoring.
|
32
33
|
- It only supports incremental indexing. There is no notion of batch indexing
|
33
34
|
or index merging.
|
34
35
|
- It does not support document deletion or modification (except in the
|
35
36
|
special case of labels; see below).
|
36
|
-
-
|
37
|
+
- It only supports in-memory indexes.
|
37
38
|
|
38
39
|
Features that Whistlepig does provide:
|
39
40
|
- Incremental indexing. Updates to the index are immediately available to
|
@@ -42,10 +43,8 @@ Features that Whistlepig does provide:
|
|
42
43
|
- A full query language and parser with conjunctions, disjunctions, phrases,
|
43
44
|
negations, grouping, and nesting.
|
44
45
|
- Labels: arbitrary tokens which can be added to and removed from documents
|
45
|
-
at any point, and incorporated into search queries.
|
46
|
-
|
47
|
-
- Early query termination.
|
48
|
-
- Resumable queries.
|
46
|
+
at any point, and incorporated into search queries.
|
47
|
+
- Early query termination and resumable queries.
|
49
48
|
- A tiny, < 3 KLOC ANSI C99 implementation.
|
50
49
|
|
51
50
|
== Synopsis (using Ruby bindings)
|
@@ -81,6 +80,6 @@ Features that Whistlepig does provide:
|
|
81
80
|
Whistlepig is currently single-process and single-thread only. However, it is
|
82
81
|
built with multi-process access in mind. Per-segment single-writer,
|
83
82
|
multi-reader support is planned in the near future. Multi-writer support can be
|
84
|
-
accomplished via index striping and
|
83
|
+
accomplished via index striping and may be attempted in the distant future.
|
85
84
|
|
86
|
-
Please send bug reports and comments to: wmorgan-whistlepig-
|
85
|
+
Please send bug reports and comments to: wmorgan-whistlepig-readme@masanjin.net.
|
data/ext/whistlepig/entry.c
CHANGED
@@ -46,7 +46,7 @@ wp_entry* wp_entry_new() {
|
|
46
46
|
return ret;
|
47
47
|
}
|
48
48
|
|
49
|
-
RAISING_STATIC(add_token(wp_entry* entry, const char* field, const char* term,
|
49
|
+
RAISING_STATIC(add_token(wp_entry* entry, const char* field, const char* term, size_t field_len, size_t term_len)) {
|
50
50
|
fielded_term ft;
|
51
51
|
int status;
|
52
52
|
|
@@ -89,7 +89,7 @@ uint32_t wp_entry_size(wp_entry* entry) {
|
|
89
89
|
|
90
90
|
RAISING_STATIC(add_from_lexer(wp_entry* entry, yyscan_t* scanner, const char* field)) {
|
91
91
|
int token_type;
|
92
|
-
|
92
|
+
size_t field_len = strlen(field);
|
93
93
|
|
94
94
|
do {
|
95
95
|
token_type = yylex(*scanner);
|
data/ext/whistlepig/extconf.rb
CHANGED
data/ext/whistlepig/index.c
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
#include <inttypes.h>
|
1
2
|
#include <stdio.h>
|
2
3
|
#include <sys/types.h>
|
3
4
|
#include <sys/stat.h>
|
@@ -261,7 +262,7 @@ wp_error* wp_index_add_label(wp_index* index, const char* label, uint64_t doc_id
|
|
261
262
|
else DEBUG("did not find doc %llu in segment %u", doc_id, i - 1);
|
262
263
|
}
|
263
264
|
|
264
|
-
if(!found) RAISE_ERROR("couldn't find doc id %
|
265
|
+
if(!found) RAISE_ERROR("couldn't find doc id %"PRIu64, doc_id);
|
265
266
|
|
266
267
|
return NO_ERROR;
|
267
268
|
}
|
@@ -279,7 +280,7 @@ wp_error* wp_index_remove_label(wp_index* index, const char* label, uint64_t doc
|
|
279
280
|
else DEBUG("did not find doc %llu in segment %u", doc_id, i - 1);
|
280
281
|
}
|
281
282
|
|
282
|
-
if(!found) RAISE_ERROR("couldn't find doc id %
|
283
|
+
if(!found) RAISE_ERROR("couldn't find doc id %"PRIu64, doc_id);
|
283
284
|
|
284
285
|
return NO_ERROR;
|
285
286
|
}
|
@@ -292,3 +293,11 @@ uint64_t wp_index_num_docs(wp_index* index) {
|
|
292
293
|
|
293
294
|
return ret;
|
294
295
|
}
|
296
|
+
|
297
|
+
// insane. but i'm putting this here. not defined in c99. don't want to make a
|
298
|
+
// "utils.c" or "compat.c" or whatever just yet.
|
299
|
+
char* strdup(const char* old) {
|
300
|
+
size_t len = strlen(old) + 1;
|
301
|
+
char *new = malloc(len * sizeof(char));
|
302
|
+
return memcpy(new, old, len);
|
303
|
+
}
|
data/ext/whistlepig/khash.h
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
/* modified by william morgan to cast sizeof() properly.
|
2
|
+
otherwise it has trouble compiling on 64-bit platforms with
|
3
|
+
-Wshorten-64-to-32 enabled.
|
4
|
+
*/
|
5
|
+
|
1
6
|
/* The MIT License
|
2
7
|
|
3
8
|
Copyright (c) 2008, by Attractive Chaos <attractivechaos@aol.co.uk>
|
@@ -102,10 +107,10 @@ static const uint32_t __ac_prime_list[__ac_HASH_PRIME_SIZE] =
|
|
102
107
|
#define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)
|
103
108
|
#define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)
|
104
109
|
#define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3)
|
105
|
-
#define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1)))
|
106
|
-
#define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1)))
|
107
|
-
#define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1)))
|
108
|
-
#define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1))
|
110
|
+
#define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(uint32_t)(1ul<<((i&0xfU)<<1)))
|
111
|
+
#define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(uint32_t)(2ul<<((i&0xfU)<<1)))
|
112
|
+
#define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(uint32_t)(3ul<<((i&0xfU)<<1)))
|
113
|
+
#define __ac_set_isdel_true(flag, i) (flag[i>>4]|=(uint32_t)(1ul<<((i&0xfU)<<1)))
|
109
114
|
|
110
115
|
static const double __ac_HASH_UPPER = 0.77;
|
111
116
|
|
data/ext/whistlepig/mmap-obj.c
CHANGED
@@ -13,7 +13,7 @@ wp_error* mmap_obj_create(mmap_obj* o, const char* magic, const char* pathname,
|
|
13
13
|
o->fd = open(pathname, O_EXCL | O_CREAT | O_RDWR, 0640);
|
14
14
|
if(o->fd == -1) RAISE_SYSERROR("cannot create %s", pathname);
|
15
15
|
|
16
|
-
uint32_t size = initial_size + sizeof(mmap_obj_header);
|
16
|
+
uint32_t size = initial_size + (uint32_t)sizeof(mmap_obj_header);
|
17
17
|
DEBUG("creating %s with %u + %u = %u bytes for %s object", pathname, initial_size, sizeof(mmap_obj_header), size, magic);
|
18
18
|
lseek(o->fd, size - 1, SEEK_SET);
|
19
19
|
ssize_t num_bytes = write(o->fd, "", 1);
|
@@ -39,7 +39,7 @@ wp_error* mmap_obj_load(mmap_obj* o, const char* magic, const char* pathname) {
|
|
39
39
|
|
40
40
|
RELAY_ERROR(validate(o->header, magic));
|
41
41
|
|
42
|
-
uint32_t size = o->header->size + sizeof(mmap_obj_header);
|
42
|
+
uint32_t size = o->header->size + (uint32_t)sizeof(mmap_obj_header);
|
43
43
|
DEBUG("full size is %u bytes (including %u-byte header)", size, sizeof(mmap_obj_header));
|
44
44
|
if(munmap(o->header, sizeof(mmap_obj_header)) == -1) RAISE_SYSERROR("munmap");
|
45
45
|
|
@@ -54,7 +54,7 @@ wp_error* mmap_obj_resize(mmap_obj* o, uint32_t data_size) {
|
|
54
54
|
DEBUG("going to expand from %u to %u bytes. current header is at %p", o->header->size, data_size, o->header);
|
55
55
|
|
56
56
|
if(munmap(o->header, sizeof(mmap_obj_header) + o->header->size) == -1) RAISE_SYSERROR("munmap");
|
57
|
-
uint32_t size = data_size + sizeof(mmap_obj_header);
|
57
|
+
uint32_t size = data_size + (uint32_t)sizeof(mmap_obj_header);
|
58
58
|
|
59
59
|
lseek(o->fd, size - 1, SEEK_SET);
|
60
60
|
ssize_t num_bytes = write(o->fd, "", 1);
|
@@ -677,6 +677,7 @@ static yyconst flex_int32_t yy_rule_can_match_eol[7] =
|
|
677
677
|
#line 1 "query-parser.lex"
|
678
678
|
#line 2 "query-parser.lex"
|
679
679
|
#include <string.h>
|
680
|
+
#include "whistlepig.h"
|
680
681
|
#include "query-parser.h"
|
681
682
|
#include "query-parser.tab.h"
|
682
683
|
|
@@ -694,7 +695,7 @@ static yyconst flex_int32_t yy_rule_can_match_eol[7] =
|
|
694
695
|
} \
|
695
696
|
}
|
696
697
|
|
697
|
-
#line
|
698
|
+
#line 699 "query-parser.lex.c"
|
698
699
|
|
699
700
|
#define INITIAL 0
|
700
701
|
|
@@ -924,10 +925,10 @@ YY_DECL
|
|
924
925
|
register int yy_act;
|
925
926
|
struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
|
926
927
|
|
927
|
-
#line
|
928
|
+
#line 32 "query-parser.lex"
|
928
929
|
|
929
930
|
|
930
|
-
#line
|
931
|
+
#line 932 "query-parser.lex.c"
|
931
932
|
|
932
933
|
yylval = yylval_param;
|
933
934
|
|
@@ -1023,12 +1024,12 @@ do_action: /* This label is used only to access EOF actions. */
|
|
1023
1024
|
|
1024
1025
|
case 1:
|
1025
1026
|
YY_RULE_SETUP
|
1026
|
-
#line
|
1027
|
+
#line 34 "query-parser.lex"
|
1027
1028
|
return OR;
|
1028
1029
|
YY_BREAK
|
1029
1030
|
case 2:
|
1030
1031
|
YY_RULE_SETUP
|
1031
|
-
#line
|
1032
|
+
#line 36 "query-parser.lex"
|
1032
1033
|
{
|
1033
1034
|
yylval->string = strdup(yytext);
|
1034
1035
|
return WORD;
|
@@ -1036,7 +1037,7 @@ YY_RULE_SETUP
|
|
1036
1037
|
YY_BREAK
|
1037
1038
|
case 3:
|
1038
1039
|
YY_RULE_SETUP
|
1039
|
-
#line
|
1040
|
+
#line 41 "query-parser.lex"
|
1040
1041
|
{
|
1041
1042
|
yylval->string = strdup(yytext);
|
1042
1043
|
return WORD;
|
@@ -1045,20 +1046,20 @@ YY_RULE_SETUP
|
|
1045
1046
|
case 4:
|
1046
1047
|
/* rule 4 can match eol */
|
1047
1048
|
YY_RULE_SETUP
|
1048
|
-
#line
|
1049
|
+
#line 46 "query-parser.lex"
|
1049
1050
|
{ } ; // nothing
|
1050
1051
|
YY_BREAK
|
1051
1052
|
case 5:
|
1052
1053
|
YY_RULE_SETUP
|
1053
|
-
#line
|
1054
|
+
#line 48 "query-parser.lex"
|
1054
1055
|
return yytext[0];
|
1055
1056
|
YY_BREAK
|
1056
1057
|
case 6:
|
1057
1058
|
YY_RULE_SETUP
|
1058
|
-
#line
|
1059
|
+
#line 50 "query-parser.lex"
|
1059
1060
|
ECHO;
|
1060
1061
|
YY_BREAK
|
1061
|
-
#line
|
1062
|
+
#line 1063 "query-parser.lex.c"
|
1062
1063
|
case YY_STATE_EOF(INITIAL):
|
1063
1064
|
yyterminate();
|
1064
1065
|
|
@@ -2242,7 +2243,7 @@ void query_parser_free (void * ptr , yyscan_t yyscanner)
|
|
2242
2243
|
|
2243
2244
|
#define YYTABLES_NAME "yytables"
|
2244
2245
|
|
2245
|
-
#line
|
2246
|
+
#line 50 "query-parser.lex"
|
2246
2247
|
|
2247
2248
|
|
2248
2249
|
|
data/ext/whistlepig/query.c
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
#include "whistlepig.h"
|
1
2
|
#include "query.h"
|
2
3
|
|
3
4
|
static wp_query* wp_query_new() {
|
@@ -11,12 +12,6 @@ static wp_query* wp_query_new() {
|
|
11
12
|
return ret;
|
12
13
|
}
|
13
14
|
|
14
|
-
static char* strdup(const char* old) { // sigh... not in c99
|
15
|
-
size_t len = strlen(old) + 1;
|
16
|
-
char *new = malloc(len * sizeof(char));
|
17
|
-
return memcpy(new, old, len);
|
18
|
-
}
|
19
|
-
|
20
15
|
wp_query* wp_query_clone(wp_query* other) {
|
21
16
|
wp_query* ret = malloc(sizeof(wp_query));
|
22
17
|
ret->type = other->type;
|
@@ -128,13 +123,13 @@ static int subquery_to_s(wp_query* q, size_t n, char* buf) {
|
|
128
123
|
buf += wp_query_to_s(child, n - (buf - orig_buf), buf);
|
129
124
|
}
|
130
125
|
|
131
|
-
return buf - orig_buf;
|
126
|
+
return (int)(buf - orig_buf);
|
132
127
|
}
|
133
128
|
|
134
129
|
#define min(a, b) (a < b ? a : b)
|
135
130
|
|
136
|
-
|
137
|
-
|
131
|
+
size_t wp_query_to_s(wp_query* q, size_t n, char* buf) {
|
132
|
+
size_t ret;
|
138
133
|
char* orig_buf = buf;
|
139
134
|
|
140
135
|
if(q->type == WP_QUERY_EMPTY) {
|
data/ext/whistlepig/query.h
CHANGED
@@ -73,6 +73,6 @@ wp_query* wp_query_set_all_child_fields(wp_query* q, const char* field);
|
|
73
73
|
void wp_query_free(wp_query* q);
|
74
74
|
|
75
75
|
// public: build a string representation of a query by writing at most n chars to buf
|
76
|
-
|
76
|
+
size_t wp_query_to_s(wp_query* q, size_t n, char* buf);
|
77
77
|
|
78
78
|
#endif
|
data/ext/whistlepig/segment.c
CHANGED
@@ -241,8 +241,8 @@ wp_error* wp_segment_ensure_fit(wp_segment* seg, uint32_t postings_bytes, uint32
|
|
241
241
|
|
242
242
|
static uint32_t size_of(uint32_t num_positions, pos_t positions[]) {
|
243
243
|
(void)positions;
|
244
|
-
uint32_t position_size = sizeof(pos_t) * num_positions;
|
245
|
-
uint32_t size = sizeof(posting) - sizeof(pos_t*) + position_size;
|
244
|
+
uint32_t position_size = (uint32_t)sizeof(pos_t) * num_positions;
|
245
|
+
uint32_t size = (uint32_t)sizeof(posting) - (uint32_t)sizeof(pos_t*) + position_size;
|
246
246
|
|
247
247
|
return size;
|
248
248
|
}
|
@@ -253,23 +253,22 @@ wp_error* wp_segment_sizeof_posarray(wp_segment* seg, uint32_t num_positions, po
|
|
253
253
|
return NO_ERROR;
|
254
254
|
}
|
255
255
|
|
256
|
-
#define
|
257
|
-
|
256
|
+
#define VALUE_BITMASK 0x7f
|
258
257
|
RAISING_STATIC(write_multibyte(uint8_t* location, uint32_t val, uint32_t* size)) {
|
259
258
|
//printf("xx writing %u to position %p as:\n", val, location);
|
260
259
|
uint8_t* start = location;
|
261
260
|
|
262
|
-
while(val >
|
263
|
-
uint8_t c = (val &
|
261
|
+
while(val > VALUE_BITMASK) {
|
262
|
+
uint8_t c = (val & VALUE_BITMASK) | 0x80;
|
264
263
|
*location = c;
|
265
264
|
//printf("xx %d = %d | %d at %p\n", c, val & BITMASK, 0x80, location);
|
266
265
|
location++;
|
267
266
|
val >>= 7;
|
268
267
|
}
|
269
|
-
uint8_t c = (val &
|
268
|
+
uint8_t c = (val & VALUE_BITMASK);
|
270
269
|
*location = c;
|
271
270
|
//printf("xx %d at %p\n", c, location);
|
272
|
-
*size = location + 1 - start;
|
271
|
+
*size = (uint32_t)(location + 1 - start);
|
273
272
|
//printf("xx total %u bytes\n", *size);
|
274
273
|
return NO_ERROR;
|
275
274
|
}
|
@@ -287,7 +286,7 @@ RAISING_STATIC(read_multibyte(uint8_t* location, uint32_t* val, uint32_t* size))
|
|
287
286
|
}
|
288
287
|
*val |= *location << shift;
|
289
288
|
//printf("yy read final byte %d at %p\n", *location, location);
|
290
|
-
*size = location + 1 - start;
|
289
|
+
*size = (uint32_t)(location + 1 - start);
|
291
290
|
//printf("yy total %d bytes, val = %d\n\n", *size, *val);
|
292
291
|
return NO_ERROR;
|
293
292
|
}
|
@@ -522,7 +521,7 @@ wp_error* wp_segment_add_label(wp_segment* s, const char* label, docid_t doc_id)
|
|
522
521
|
po->doc_id = doc_id;
|
523
522
|
po->next_offset = next_offset;
|
524
523
|
|
525
|
-
pr->postings_head += sizeof(label_posting);
|
524
|
+
pr->postings_head += (uint32_t)sizeof(label_posting);
|
526
525
|
DEBUG("label postings list head now at %u", pr->postings_head);
|
527
526
|
|
528
527
|
// really finally, update either the previous offset or the tail pointer
|
data/ext/whistlepig/stringmap.c
CHANGED
@@ -15,10 +15,10 @@ static const uint32_t prime_list[] = {
|
|
15
15
|
#define isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)
|
16
16
|
#define isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)
|
17
17
|
#define iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3)
|
18
|
-
#define set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1)))
|
19
|
-
#define set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1)))
|
20
|
-
#define set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1)))
|
21
|
-
#define set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1))
|
18
|
+
#define set_isdel_false(flag, i) (flag[i>>4]&=~(uint32_t)(1ul<<((i&0xfU)<<1)))
|
19
|
+
#define set_isempty_false(flag, i) (flag[i>>4]&=~(uint32_t)(2ul<<((i&0xfU)<<1)))
|
20
|
+
#define set_isboth_false(flag, i) (flag[i>>4]&=~(uint32_t)(3ul<<((i&0xfU)<<1)))
|
21
|
+
#define set_isdel_true(flag, i) (flag[i>>4]|=(uint32_t)(1ul<<((i&0xfU)<<1)))
|
22
22
|
|
23
23
|
static const double HASH_UPPER = 0.77;
|
24
24
|
|
@@ -234,9 +234,9 @@ int stringmap_needs_bump(stringmap* h) {
|
|
234
234
|
// ((n_buckets >> 4) + 1) uint32_t's for the flags
|
235
235
|
// n_buckets uint32_t for the keys
|
236
236
|
static uint32_t size(uint32_t n_buckets) {
|
237
|
-
uint32_t size = sizeof(stringmap) +
|
238
|
-
(((n_buckets >> 4) + 1) * sizeof(uint32_t)) +
|
239
|
-
(n_buckets * sizeof(uint32_t));
|
237
|
+
uint32_t size = (uint32_t)sizeof(stringmap) +
|
238
|
+
(((n_buckets >> 4) + 1) * (uint32_t)sizeof(uint32_t)) +
|
239
|
+
(n_buckets * (uint32_t)sizeof(uint32_t));
|
240
240
|
return size;
|
241
241
|
}
|
242
242
|
|
data/ext/whistlepig/stringpool.c
CHANGED
@@ -6,11 +6,11 @@ void stringpool_init(stringpool* p) {
|
|
6
6
|
}
|
7
7
|
|
8
8
|
uint32_t stringpool_size(stringpool* p) {
|
9
|
-
return sizeof(stringpool) + (p->size * sizeof(char));
|
9
|
+
return (uint32_t)sizeof(stringpool) + (p->size * (uint32_t)sizeof(char));
|
10
10
|
}
|
11
11
|
|
12
12
|
uint32_t stringpool_add(stringpool* p, const char* s) {
|
13
|
-
|
13
|
+
uint32_t len = (uint32_t)strlen(s) + 1;
|
14
14
|
if((p->next + len) >= p->size) {
|
15
15
|
DEBUG("out of space in string pool for %s (len %d, next %d, size %d)", s, len, p->next, p->size);
|
16
16
|
return (uint32_t)-1;
|
@@ -27,11 +27,11 @@ int stringpool_needs_bump(stringpool* p) {
|
|
27
27
|
}
|
28
28
|
|
29
29
|
uint32_t stringpool_next_size(stringpool* p) {
|
30
|
-
return sizeof(stringpool) + (2 * (p->size == 0 ? 1 : p->size) * sizeof(char));
|
30
|
+
return (uint32_t)sizeof(stringpool) + (2 * (p->size == 0 ? 1 : p->size) * (uint32_t)sizeof(char));
|
31
31
|
}
|
32
32
|
|
33
33
|
uint32_t stringpool_initial_size() {
|
34
|
-
return sizeof(stringpool) + INITIAL_POOL_SIZE;
|
34
|
+
return (uint32_t)sizeof(stringpool) + INITIAL_POOL_SIZE;
|
35
35
|
}
|
36
36
|
|
37
37
|
void stringpool_bump_size(stringpool* p) {
|
data/ext/whistlepig/termhash.c
CHANGED
@@ -15,10 +15,10 @@ static const uint32_t prime_list[] = {
|
|
15
15
|
#define isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)
|
16
16
|
#define isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)
|
17
17
|
#define iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3)
|
18
|
-
#define set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1)))
|
19
|
-
#define set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1)))
|
20
|
-
#define set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1)))
|
21
|
-
#define set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1))
|
18
|
+
#define set_isdel_false(flag, i) (flag[i>>4]&=~(uint32_t)(1ul<<((i&0xfU)<<1)))
|
19
|
+
#define set_isempty_false(flag, i) (flag[i>>4]&=~(uint32_t)(2ul<<((i&0xfU)<<1)))
|
20
|
+
#define set_isboth_false(flag, i) (flag[i>>4]&=~(uint32_t)(3ul<<((i&0xfU)<<1)))
|
21
|
+
#define set_isdel_true(flag, i) (flag[i>>4]|=(uint32_t)(1ul<<((i&0xfU)<<1)))
|
22
22
|
|
23
23
|
static const double HASH_UPPER = 0.77;
|
24
24
|
|
@@ -264,10 +264,10 @@ int termhash_needs_bump(termhash* h) {
|
|
264
264
|
// n_buckets terms for the keys
|
265
265
|
// n_buckets uint32_t's for the vals (offsets into postings lists)
|
266
266
|
static uint32_t size(uint32_t n_buckets) {
|
267
|
-
uint32_t size = sizeof(termhash) +
|
268
|
-
(((n_buckets >> 4) + 1) * sizeof(uint32_t)) +
|
269
|
-
(n_buckets * sizeof(term)) +
|
270
|
-
(n_buckets * sizeof(uint32_t));
|
267
|
+
uint32_t size = (uint32_t)sizeof(termhash) +
|
268
|
+
(((n_buckets >> 4) + 1) * (uint32_t)sizeof(uint32_t)) +
|
269
|
+
(n_buckets * (uint32_t)sizeof(term)) +
|
270
|
+
(n_buckets * (uint32_t)sizeof(uint32_t));
|
271
271
|
|
272
272
|
DEBUG("size of a termhash with %u buckets is %lu + %lu + %lu + %lu = %u",
|
273
273
|
n_buckets,
|
@@ -1064,7 +1064,7 @@ YY_RULE_SETUP
|
|
1064
1064
|
#line 23 "tokenizer.lex"
|
1065
1065
|
{
|
1066
1066
|
yyextra->start = yyextra->end;
|
1067
|
-
yyextra->end += yyleng;
|
1067
|
+
yyextra->end += (pos_t)yyleng;
|
1068
1068
|
return TOK_NUMBER;
|
1069
1069
|
}
|
1070
1070
|
YY_BREAK
|
@@ -1073,7 +1073,7 @@ YY_RULE_SETUP
|
|
1073
1073
|
#line 29 "tokenizer.lex"
|
1074
1074
|
{
|
1075
1075
|
yyextra->start = yyextra->end;
|
1076
|
-
yyextra->end += yyleng;
|
1076
|
+
yyextra->end += (pos_t)yyleng;
|
1077
1077
|
return TOK_WORD;
|
1078
1078
|
}
|
1079
1079
|
YY_BREAK
|
@@ -1082,7 +1082,7 @@ YY_RULE_SETUP
|
|
1082
1082
|
#line 35 "tokenizer.lex"
|
1083
1083
|
{
|
1084
1084
|
yyextra->start = yyextra->end;
|
1085
|
-
yyextra->end += yyleng;
|
1085
|
+
yyextra->end += (pos_t)yyleng;
|
1086
1086
|
return TOK_WORD;
|
1087
1087
|
}
|
1088
1088
|
YY_BREAK
|
data/ext/whistlepig/whistlepig.h
CHANGED
@@ -9,12 +9,6 @@ static VALUE c_query;
|
|
9
9
|
static VALUE c_error;
|
10
10
|
static VALUE c_parseerror;
|
11
11
|
|
12
|
-
static char* strdup(const char* old) { // wtf stupid
|
13
|
-
size_t len = strlen(old) + 1;
|
14
|
-
char *new = malloc(len * sizeof(char));
|
15
|
-
return (char *)memcpy(new, old, len);
|
16
|
-
}
|
17
|
-
|
18
12
|
static void index_free(wp_index* index) {
|
19
13
|
wp_error* e = wp_index_free(index);
|
20
14
|
//printf("# index free at %p with error %p\n", index, e);
|
metadata
CHANGED
@@ -1,11 +1,12 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: whistlepig
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
+
hash: 15
|
4
5
|
prerelease: false
|
5
6
|
segments:
|
6
7
|
- 0
|
7
|
-
-
|
8
|
-
version: "0.
|
8
|
+
- 2
|
9
|
+
version: "0.2"
|
9
10
|
platform: ruby
|
10
11
|
authors:
|
11
12
|
- William Morgan
|
@@ -13,7 +14,7 @@ autorequire:
|
|
13
14
|
bindir: bin
|
14
15
|
cert_chain: []
|
15
16
|
|
16
|
-
date: 2011-02-
|
17
|
+
date: 2011-02-09 20:32:41 -08:00
|
17
18
|
default_executable:
|
18
19
|
dependencies: []
|
19
20
|
|
@@ -81,6 +82,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
81
82
|
requirements:
|
82
83
|
- - ">="
|
83
84
|
- !ruby/object:Gem::Version
|
85
|
+
hash: 3
|
84
86
|
segments:
|
85
87
|
- 0
|
86
88
|
version: "0"
|
@@ -89,6 +91,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
89
91
|
requirements:
|
90
92
|
- - ">="
|
91
93
|
- !ruby/object:Gem::Version
|
94
|
+
hash: 3
|
92
95
|
segments:
|
93
96
|
- 0
|
94
97
|
version: "0"
|