whistlepig 0.1 → 0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +13 -14
- data/ext/whistlepig/entry.c +2 -2
- data/ext/whistlepig/extconf.rb +1 -1
- data/ext/whistlepig/index.c +11 -2
- data/ext/whistlepig/khash.h +9 -4
- data/ext/whistlepig/mmap-obj.c +3 -3
- data/ext/whistlepig/query-parser.lex.c +12 -11
- data/ext/whistlepig/query-parser.lex.h +1 -1
- data/ext/whistlepig/query.c +4 -9
- data/ext/whistlepig/query.h +1 -1
- data/ext/whistlepig/segment.c +9 -10
- data/ext/whistlepig/stringmap.c +7 -7
- data/ext/whistlepig/stringpool.c +4 -4
- data/ext/whistlepig/termhash.c +8 -8
- data/ext/whistlepig/tokenizer.lex.c +3 -3
- data/ext/whistlepig/whistlepig.h +3 -0
- data/ext/whistlepig/whistlepigc.c +0 -6
- metadata +6 -3
data/README
CHANGED
@@ -8,32 +8,33 @@ the frills, Whistlepig may be for you.
|
|
8
8
|
Whistlepig is written in ANSI C99. It currently provides a C API and Ruby
|
9
9
|
bindings.
|
10
10
|
|
11
|
-
Latest version: 0.
|
11
|
+
Latest version: 0.2, released 2011-02-09.
|
12
12
|
Status: alpha
|
13
13
|
News: http://all-thing.net/label/whistlepig/
|
14
14
|
Homepage: http://masanjin.net/whistlepig/
|
15
|
+
Bug reports: http://github.com/wmorgan/whistlepig/issues
|
15
16
|
|
16
17
|
= Getting it
|
17
18
|
|
18
|
-
Tarball: http://masanjin.net/whistlepig/whistlepig-0.
|
19
|
+
Tarball: http://masanjin.net/whistlepig/whistlepig-0.2.tar.gz
|
19
20
|
Rubygem: gem install whistlepig
|
20
21
|
Git: git clone git://github.com/wmorgan/whistlepig.git
|
21
22
|
|
22
23
|
= Realtime search
|
23
24
|
|
24
25
|
Roughly speaking, realtime search means:
|
25
|
-
- documents are available to to queries immediately after indexing, without
|
26
|
-
|
26
|
+
- documents are available to to queries immediately after indexing, without any
|
27
|
+
reindexing or index merging;
|
27
28
|
- later documents are more important than earlier documents.
|
28
29
|
|
29
|
-
Whistlepig takes these principles to an extreme.
|
30
|
-
- It only returns documents in the reverse order to which they were
|
31
|
-
|
30
|
+
Whistlepig takes these principles to an extreme.
|
31
|
+
- It only returns documents in the reverse (LIFO) order to which they were
|
32
|
+
added, and performs no ranking, reordering, or scoring.
|
32
33
|
- It only supports incremental indexing. There is no notion of batch indexing
|
33
34
|
or index merging.
|
34
35
|
- It does not support document deletion or modification (except in the
|
35
36
|
special case of labels; see below).
|
36
|
-
-
|
37
|
+
- It only supports in-memory indexes.
|
37
38
|
|
38
39
|
Features that Whistlepig does provide:
|
39
40
|
- Incremental indexing. Updates to the index are immediately available to
|
@@ -42,10 +43,8 @@ Features that Whistlepig does provide:
|
|
42
43
|
- A full query language and parser with conjunctions, disjunctions, phrases,
|
43
44
|
negations, grouping, and nesting.
|
44
45
|
- Labels: arbitrary tokens which can be added to and removed from documents
|
45
|
-
at any point, and incorporated into search queries.
|
46
|
-
|
47
|
-
- Early query termination.
|
48
|
-
- Resumable queries.
|
46
|
+
at any point, and incorporated into search queries.
|
47
|
+
- Early query termination and resumable queries.
|
49
48
|
- A tiny, < 3 KLOC ANSI C99 implementation.
|
50
49
|
|
51
50
|
== Synopsis (using Ruby bindings)
|
@@ -81,6 +80,6 @@ Features that Whistlepig does provide:
|
|
81
80
|
Whistlepig is currently single-process and single-thread only. However, it is
|
82
81
|
built with multi-process access in mind. Per-segment single-writer,
|
83
82
|
multi-reader support is planned in the near future. Multi-writer support can be
|
84
|
-
accomplished via index striping and
|
83
|
+
accomplished via index striping and may be attempted in the distant future.
|
85
84
|
|
86
|
-
Please send bug reports and comments to: wmorgan-whistlepig-
|
85
|
+
Please send bug reports and comments to: wmorgan-whistlepig-readme@masanjin.net.
|
data/ext/whistlepig/entry.c
CHANGED
@@ -46,7 +46,7 @@ wp_entry* wp_entry_new() {
|
|
46
46
|
return ret;
|
47
47
|
}
|
48
48
|
|
49
|
-
RAISING_STATIC(add_token(wp_entry* entry, const char* field, const char* term,
|
49
|
+
RAISING_STATIC(add_token(wp_entry* entry, const char* field, const char* term, size_t field_len, size_t term_len)) {
|
50
50
|
fielded_term ft;
|
51
51
|
int status;
|
52
52
|
|
@@ -89,7 +89,7 @@ uint32_t wp_entry_size(wp_entry* entry) {
|
|
89
89
|
|
90
90
|
RAISING_STATIC(add_from_lexer(wp_entry* entry, yyscan_t* scanner, const char* field)) {
|
91
91
|
int token_type;
|
92
|
-
|
92
|
+
size_t field_len = strlen(field);
|
93
93
|
|
94
94
|
do {
|
95
95
|
token_type = yylex(*scanner);
|
data/ext/whistlepig/extconf.rb
CHANGED
data/ext/whistlepig/index.c
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
#include <inttypes.h>
|
1
2
|
#include <stdio.h>
|
2
3
|
#include <sys/types.h>
|
3
4
|
#include <sys/stat.h>
|
@@ -261,7 +262,7 @@ wp_error* wp_index_add_label(wp_index* index, const char* label, uint64_t doc_id
|
|
261
262
|
else DEBUG("did not find doc %llu in segment %u", doc_id, i - 1);
|
262
263
|
}
|
263
264
|
|
264
|
-
if(!found) RAISE_ERROR("couldn't find doc id %
|
265
|
+
if(!found) RAISE_ERROR("couldn't find doc id %"PRIu64, doc_id);
|
265
266
|
|
266
267
|
return NO_ERROR;
|
267
268
|
}
|
@@ -279,7 +280,7 @@ wp_error* wp_index_remove_label(wp_index* index, const char* label, uint64_t doc
|
|
279
280
|
else DEBUG("did not find doc %llu in segment %u", doc_id, i - 1);
|
280
281
|
}
|
281
282
|
|
282
|
-
if(!found) RAISE_ERROR("couldn't find doc id %
|
283
|
+
if(!found) RAISE_ERROR("couldn't find doc id %"PRIu64, doc_id);
|
283
284
|
|
284
285
|
return NO_ERROR;
|
285
286
|
}
|
@@ -292,3 +293,11 @@ uint64_t wp_index_num_docs(wp_index* index) {
|
|
292
293
|
|
293
294
|
return ret;
|
294
295
|
}
|
296
|
+
|
297
|
+
// insane. but i'm putting this here. not defined in c99. don't want to make a
|
298
|
+
// "utils.c" or "compat.c" or whatever just yet.
|
299
|
+
char* strdup(const char* old) {
|
300
|
+
size_t len = strlen(old) + 1;
|
301
|
+
char *new = malloc(len * sizeof(char));
|
302
|
+
return memcpy(new, old, len);
|
303
|
+
}
|
data/ext/whistlepig/khash.h
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
/* modified by william morgan to cast sizeof() properly.
|
2
|
+
otherwise it has trouble compiling on 64-bit platforms with
|
3
|
+
-Wshorten-64-to-32 enabled.
|
4
|
+
*/
|
5
|
+
|
1
6
|
/* The MIT License
|
2
7
|
|
3
8
|
Copyright (c) 2008, by Attractive Chaos <attractivechaos@aol.co.uk>
|
@@ -102,10 +107,10 @@ static const uint32_t __ac_prime_list[__ac_HASH_PRIME_SIZE] =
|
|
102
107
|
#define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)
|
103
108
|
#define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)
|
104
109
|
#define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3)
|
105
|
-
#define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1)))
|
106
|
-
#define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1)))
|
107
|
-
#define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1)))
|
108
|
-
#define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1))
|
110
|
+
#define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(uint32_t)(1ul<<((i&0xfU)<<1)))
|
111
|
+
#define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(uint32_t)(2ul<<((i&0xfU)<<1)))
|
112
|
+
#define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(uint32_t)(3ul<<((i&0xfU)<<1)))
|
113
|
+
#define __ac_set_isdel_true(flag, i) (flag[i>>4]|=(uint32_t)(1ul<<((i&0xfU)<<1)))
|
109
114
|
|
110
115
|
static const double __ac_HASH_UPPER = 0.77;
|
111
116
|
|
data/ext/whistlepig/mmap-obj.c
CHANGED
@@ -13,7 +13,7 @@ wp_error* mmap_obj_create(mmap_obj* o, const char* magic, const char* pathname,
|
|
13
13
|
o->fd = open(pathname, O_EXCL | O_CREAT | O_RDWR, 0640);
|
14
14
|
if(o->fd == -1) RAISE_SYSERROR("cannot create %s", pathname);
|
15
15
|
|
16
|
-
uint32_t size = initial_size + sizeof(mmap_obj_header);
|
16
|
+
uint32_t size = initial_size + (uint32_t)sizeof(mmap_obj_header);
|
17
17
|
DEBUG("creating %s with %u + %u = %u bytes for %s object", pathname, initial_size, sizeof(mmap_obj_header), size, magic);
|
18
18
|
lseek(o->fd, size - 1, SEEK_SET);
|
19
19
|
ssize_t num_bytes = write(o->fd, "", 1);
|
@@ -39,7 +39,7 @@ wp_error* mmap_obj_load(mmap_obj* o, const char* magic, const char* pathname) {
|
|
39
39
|
|
40
40
|
RELAY_ERROR(validate(o->header, magic));
|
41
41
|
|
42
|
-
uint32_t size = o->header->size + sizeof(mmap_obj_header);
|
42
|
+
uint32_t size = o->header->size + (uint32_t)sizeof(mmap_obj_header);
|
43
43
|
DEBUG("full size is %u bytes (including %u-byte header)", size, sizeof(mmap_obj_header));
|
44
44
|
if(munmap(o->header, sizeof(mmap_obj_header)) == -1) RAISE_SYSERROR("munmap");
|
45
45
|
|
@@ -54,7 +54,7 @@ wp_error* mmap_obj_resize(mmap_obj* o, uint32_t data_size) {
|
|
54
54
|
DEBUG("going to expand from %u to %u bytes. current header is at %p", o->header->size, data_size, o->header);
|
55
55
|
|
56
56
|
if(munmap(o->header, sizeof(mmap_obj_header) + o->header->size) == -1) RAISE_SYSERROR("munmap");
|
57
|
-
uint32_t size = data_size + sizeof(mmap_obj_header);
|
57
|
+
uint32_t size = data_size + (uint32_t)sizeof(mmap_obj_header);
|
58
58
|
|
59
59
|
lseek(o->fd, size - 1, SEEK_SET);
|
60
60
|
ssize_t num_bytes = write(o->fd, "", 1);
|
@@ -677,6 +677,7 @@ static yyconst flex_int32_t yy_rule_can_match_eol[7] =
|
|
677
677
|
#line 1 "query-parser.lex"
|
678
678
|
#line 2 "query-parser.lex"
|
679
679
|
#include <string.h>
|
680
|
+
#include "whistlepig.h"
|
680
681
|
#include "query-parser.h"
|
681
682
|
#include "query-parser.tab.h"
|
682
683
|
|
@@ -694,7 +695,7 @@ static yyconst flex_int32_t yy_rule_can_match_eol[7] =
|
|
694
695
|
} \
|
695
696
|
}
|
696
697
|
|
697
|
-
#line
|
698
|
+
#line 699 "query-parser.lex.c"
|
698
699
|
|
699
700
|
#define INITIAL 0
|
700
701
|
|
@@ -924,10 +925,10 @@ YY_DECL
|
|
924
925
|
register int yy_act;
|
925
926
|
struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
|
926
927
|
|
927
|
-
#line
|
928
|
+
#line 32 "query-parser.lex"
|
928
929
|
|
929
930
|
|
930
|
-
#line
|
931
|
+
#line 932 "query-parser.lex.c"
|
931
932
|
|
932
933
|
yylval = yylval_param;
|
933
934
|
|
@@ -1023,12 +1024,12 @@ do_action: /* This label is used only to access EOF actions. */
|
|
1023
1024
|
|
1024
1025
|
case 1:
|
1025
1026
|
YY_RULE_SETUP
|
1026
|
-
#line
|
1027
|
+
#line 34 "query-parser.lex"
|
1027
1028
|
return OR;
|
1028
1029
|
YY_BREAK
|
1029
1030
|
case 2:
|
1030
1031
|
YY_RULE_SETUP
|
1031
|
-
#line
|
1032
|
+
#line 36 "query-parser.lex"
|
1032
1033
|
{
|
1033
1034
|
yylval->string = strdup(yytext);
|
1034
1035
|
return WORD;
|
@@ -1036,7 +1037,7 @@ YY_RULE_SETUP
|
|
1036
1037
|
YY_BREAK
|
1037
1038
|
case 3:
|
1038
1039
|
YY_RULE_SETUP
|
1039
|
-
#line
|
1040
|
+
#line 41 "query-parser.lex"
|
1040
1041
|
{
|
1041
1042
|
yylval->string = strdup(yytext);
|
1042
1043
|
return WORD;
|
@@ -1045,20 +1046,20 @@ YY_RULE_SETUP
|
|
1045
1046
|
case 4:
|
1046
1047
|
/* rule 4 can match eol */
|
1047
1048
|
YY_RULE_SETUP
|
1048
|
-
#line
|
1049
|
+
#line 46 "query-parser.lex"
|
1049
1050
|
{ } ; // nothing
|
1050
1051
|
YY_BREAK
|
1051
1052
|
case 5:
|
1052
1053
|
YY_RULE_SETUP
|
1053
|
-
#line
|
1054
|
+
#line 48 "query-parser.lex"
|
1054
1055
|
return yytext[0];
|
1055
1056
|
YY_BREAK
|
1056
1057
|
case 6:
|
1057
1058
|
YY_RULE_SETUP
|
1058
|
-
#line
|
1059
|
+
#line 50 "query-parser.lex"
|
1059
1060
|
ECHO;
|
1060
1061
|
YY_BREAK
|
1061
|
-
#line
|
1062
|
+
#line 1063 "query-parser.lex.c"
|
1062
1063
|
case YY_STATE_EOF(INITIAL):
|
1063
1064
|
yyterminate();
|
1064
1065
|
|
@@ -2242,7 +2243,7 @@ void query_parser_free (void * ptr , yyscan_t yyscanner)
|
|
2242
2243
|
|
2243
2244
|
#define YYTABLES_NAME "yytables"
|
2244
2245
|
|
2245
|
-
#line
|
2246
|
+
#line 50 "query-parser.lex"
|
2246
2247
|
|
2247
2248
|
|
2248
2249
|
|
data/ext/whistlepig/query.c
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
#include "whistlepig.h"
|
1
2
|
#include "query.h"
|
2
3
|
|
3
4
|
static wp_query* wp_query_new() {
|
@@ -11,12 +12,6 @@ static wp_query* wp_query_new() {
|
|
11
12
|
return ret;
|
12
13
|
}
|
13
14
|
|
14
|
-
static char* strdup(const char* old) { // sigh... not in c99
|
15
|
-
size_t len = strlen(old) + 1;
|
16
|
-
char *new = malloc(len * sizeof(char));
|
17
|
-
return memcpy(new, old, len);
|
18
|
-
}
|
19
|
-
|
20
15
|
wp_query* wp_query_clone(wp_query* other) {
|
21
16
|
wp_query* ret = malloc(sizeof(wp_query));
|
22
17
|
ret->type = other->type;
|
@@ -128,13 +123,13 @@ static int subquery_to_s(wp_query* q, size_t n, char* buf) {
|
|
128
123
|
buf += wp_query_to_s(child, n - (buf - orig_buf), buf);
|
129
124
|
}
|
130
125
|
|
131
|
-
return buf - orig_buf;
|
126
|
+
return (int)(buf - orig_buf);
|
132
127
|
}
|
133
128
|
|
134
129
|
#define min(a, b) (a < b ? a : b)
|
135
130
|
|
136
|
-
|
137
|
-
|
131
|
+
size_t wp_query_to_s(wp_query* q, size_t n, char* buf) {
|
132
|
+
size_t ret;
|
138
133
|
char* orig_buf = buf;
|
139
134
|
|
140
135
|
if(q->type == WP_QUERY_EMPTY) {
|
data/ext/whistlepig/query.h
CHANGED
@@ -73,6 +73,6 @@ wp_query* wp_query_set_all_child_fields(wp_query* q, const char* field);
|
|
73
73
|
void wp_query_free(wp_query* q);
|
74
74
|
|
75
75
|
// public: build a string representation of a query by writing at most n chars to buf
|
76
|
-
|
76
|
+
size_t wp_query_to_s(wp_query* q, size_t n, char* buf);
|
77
77
|
|
78
78
|
#endif
|
data/ext/whistlepig/segment.c
CHANGED
@@ -241,8 +241,8 @@ wp_error* wp_segment_ensure_fit(wp_segment* seg, uint32_t postings_bytes, uint32
|
|
241
241
|
|
242
242
|
static uint32_t size_of(uint32_t num_positions, pos_t positions[]) {
|
243
243
|
(void)positions;
|
244
|
-
uint32_t position_size = sizeof(pos_t) * num_positions;
|
245
|
-
uint32_t size = sizeof(posting) - sizeof(pos_t*) + position_size;
|
244
|
+
uint32_t position_size = (uint32_t)sizeof(pos_t) * num_positions;
|
245
|
+
uint32_t size = (uint32_t)sizeof(posting) - (uint32_t)sizeof(pos_t*) + position_size;
|
246
246
|
|
247
247
|
return size;
|
248
248
|
}
|
@@ -253,23 +253,22 @@ wp_error* wp_segment_sizeof_posarray(wp_segment* seg, uint32_t num_positions, po
|
|
253
253
|
return NO_ERROR;
|
254
254
|
}
|
255
255
|
|
256
|
-
#define
|
257
|
-
|
256
|
+
#define VALUE_BITMASK 0x7f
|
258
257
|
RAISING_STATIC(write_multibyte(uint8_t* location, uint32_t val, uint32_t* size)) {
|
259
258
|
//printf("xx writing %u to position %p as:\n", val, location);
|
260
259
|
uint8_t* start = location;
|
261
260
|
|
262
|
-
while(val >
|
263
|
-
uint8_t c = (val &
|
261
|
+
while(val > VALUE_BITMASK) {
|
262
|
+
uint8_t c = (val & VALUE_BITMASK) | 0x80;
|
264
263
|
*location = c;
|
265
264
|
//printf("xx %d = %d | %d at %p\n", c, val & BITMASK, 0x80, location);
|
266
265
|
location++;
|
267
266
|
val >>= 7;
|
268
267
|
}
|
269
|
-
uint8_t c = (val &
|
268
|
+
uint8_t c = (val & VALUE_BITMASK);
|
270
269
|
*location = c;
|
271
270
|
//printf("xx %d at %p\n", c, location);
|
272
|
-
*size = location + 1 - start;
|
271
|
+
*size = (uint32_t)(location + 1 - start);
|
273
272
|
//printf("xx total %u bytes\n", *size);
|
274
273
|
return NO_ERROR;
|
275
274
|
}
|
@@ -287,7 +286,7 @@ RAISING_STATIC(read_multibyte(uint8_t* location, uint32_t* val, uint32_t* size))
|
|
287
286
|
}
|
288
287
|
*val |= *location << shift;
|
289
288
|
//printf("yy read final byte %d at %p\n", *location, location);
|
290
|
-
*size = location + 1 - start;
|
289
|
+
*size = (uint32_t)(location + 1 - start);
|
291
290
|
//printf("yy total %d bytes, val = %d\n\n", *size, *val);
|
292
291
|
return NO_ERROR;
|
293
292
|
}
|
@@ -522,7 +521,7 @@ wp_error* wp_segment_add_label(wp_segment* s, const char* label, docid_t doc_id)
|
|
522
521
|
po->doc_id = doc_id;
|
523
522
|
po->next_offset = next_offset;
|
524
523
|
|
525
|
-
pr->postings_head += sizeof(label_posting);
|
524
|
+
pr->postings_head += (uint32_t)sizeof(label_posting);
|
526
525
|
DEBUG("label postings list head now at %u", pr->postings_head);
|
527
526
|
|
528
527
|
// really finally, update either the previous offset or the tail pointer
|
data/ext/whistlepig/stringmap.c
CHANGED
@@ -15,10 +15,10 @@ static const uint32_t prime_list[] = {
|
|
15
15
|
#define isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)
|
16
16
|
#define isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)
|
17
17
|
#define iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3)
|
18
|
-
#define set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1)))
|
19
|
-
#define set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1)))
|
20
|
-
#define set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1)))
|
21
|
-
#define set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1))
|
18
|
+
#define set_isdel_false(flag, i) (flag[i>>4]&=~(uint32_t)(1ul<<((i&0xfU)<<1)))
|
19
|
+
#define set_isempty_false(flag, i) (flag[i>>4]&=~(uint32_t)(2ul<<((i&0xfU)<<1)))
|
20
|
+
#define set_isboth_false(flag, i) (flag[i>>4]&=~(uint32_t)(3ul<<((i&0xfU)<<1)))
|
21
|
+
#define set_isdel_true(flag, i) (flag[i>>4]|=(uint32_t)(1ul<<((i&0xfU)<<1)))
|
22
22
|
|
23
23
|
static const double HASH_UPPER = 0.77;
|
24
24
|
|
@@ -234,9 +234,9 @@ int stringmap_needs_bump(stringmap* h) {
|
|
234
234
|
// ((n_buckets >> 4) + 1) uint32_t's for the flags
|
235
235
|
// n_buckets uint32_t for the keys
|
236
236
|
static uint32_t size(uint32_t n_buckets) {
|
237
|
-
uint32_t size = sizeof(stringmap) +
|
238
|
-
(((n_buckets >> 4) + 1) * sizeof(uint32_t)) +
|
239
|
-
(n_buckets * sizeof(uint32_t));
|
237
|
+
uint32_t size = (uint32_t)sizeof(stringmap) +
|
238
|
+
(((n_buckets >> 4) + 1) * (uint32_t)sizeof(uint32_t)) +
|
239
|
+
(n_buckets * (uint32_t)sizeof(uint32_t));
|
240
240
|
return size;
|
241
241
|
}
|
242
242
|
|
data/ext/whistlepig/stringpool.c
CHANGED
@@ -6,11 +6,11 @@ void stringpool_init(stringpool* p) {
|
|
6
6
|
}
|
7
7
|
|
8
8
|
uint32_t stringpool_size(stringpool* p) {
|
9
|
-
return sizeof(stringpool) + (p->size * sizeof(char));
|
9
|
+
return (uint32_t)sizeof(stringpool) + (p->size * (uint32_t)sizeof(char));
|
10
10
|
}
|
11
11
|
|
12
12
|
uint32_t stringpool_add(stringpool* p, const char* s) {
|
13
|
-
|
13
|
+
uint32_t len = (uint32_t)strlen(s) + 1;
|
14
14
|
if((p->next + len) >= p->size) {
|
15
15
|
DEBUG("out of space in string pool for %s (len %d, next %d, size %d)", s, len, p->next, p->size);
|
16
16
|
return (uint32_t)-1;
|
@@ -27,11 +27,11 @@ int stringpool_needs_bump(stringpool* p) {
|
|
27
27
|
}
|
28
28
|
|
29
29
|
uint32_t stringpool_next_size(stringpool* p) {
|
30
|
-
return sizeof(stringpool) + (2 * (p->size == 0 ? 1 : p->size) * sizeof(char));
|
30
|
+
return (uint32_t)sizeof(stringpool) + (2 * (p->size == 0 ? 1 : p->size) * (uint32_t)sizeof(char));
|
31
31
|
}
|
32
32
|
|
33
33
|
uint32_t stringpool_initial_size() {
|
34
|
-
return sizeof(stringpool) + INITIAL_POOL_SIZE;
|
34
|
+
return (uint32_t)sizeof(stringpool) + INITIAL_POOL_SIZE;
|
35
35
|
}
|
36
36
|
|
37
37
|
void stringpool_bump_size(stringpool* p) {
|
data/ext/whistlepig/termhash.c
CHANGED
@@ -15,10 +15,10 @@ static const uint32_t prime_list[] = {
|
|
15
15
|
#define isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)
|
16
16
|
#define isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)
|
17
17
|
#define iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3)
|
18
|
-
#define set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1)))
|
19
|
-
#define set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1)))
|
20
|
-
#define set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1)))
|
21
|
-
#define set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1))
|
18
|
+
#define set_isdel_false(flag, i) (flag[i>>4]&=~(uint32_t)(1ul<<((i&0xfU)<<1)))
|
19
|
+
#define set_isempty_false(flag, i) (flag[i>>4]&=~(uint32_t)(2ul<<((i&0xfU)<<1)))
|
20
|
+
#define set_isboth_false(flag, i) (flag[i>>4]&=~(uint32_t)(3ul<<((i&0xfU)<<1)))
|
21
|
+
#define set_isdel_true(flag, i) (flag[i>>4]|=(uint32_t)(1ul<<((i&0xfU)<<1)))
|
22
22
|
|
23
23
|
static const double HASH_UPPER = 0.77;
|
24
24
|
|
@@ -264,10 +264,10 @@ int termhash_needs_bump(termhash* h) {
|
|
264
264
|
// n_buckets terms for the keys
|
265
265
|
// n_buckets uint32_t's for the vals (offsets into postings lists)
|
266
266
|
static uint32_t size(uint32_t n_buckets) {
|
267
|
-
uint32_t size = sizeof(termhash) +
|
268
|
-
(((n_buckets >> 4) + 1) * sizeof(uint32_t)) +
|
269
|
-
(n_buckets * sizeof(term)) +
|
270
|
-
(n_buckets * sizeof(uint32_t));
|
267
|
+
uint32_t size = (uint32_t)sizeof(termhash) +
|
268
|
+
(((n_buckets >> 4) + 1) * (uint32_t)sizeof(uint32_t)) +
|
269
|
+
(n_buckets * (uint32_t)sizeof(term)) +
|
270
|
+
(n_buckets * (uint32_t)sizeof(uint32_t));
|
271
271
|
|
272
272
|
DEBUG("size of a termhash with %u buckets is %lu + %lu + %lu + %lu = %u",
|
273
273
|
n_buckets,
|
@@ -1064,7 +1064,7 @@ YY_RULE_SETUP
|
|
1064
1064
|
#line 23 "tokenizer.lex"
|
1065
1065
|
{
|
1066
1066
|
yyextra->start = yyextra->end;
|
1067
|
-
yyextra->end += yyleng;
|
1067
|
+
yyextra->end += (pos_t)yyleng;
|
1068
1068
|
return TOK_NUMBER;
|
1069
1069
|
}
|
1070
1070
|
YY_BREAK
|
@@ -1073,7 +1073,7 @@ YY_RULE_SETUP
|
|
1073
1073
|
#line 29 "tokenizer.lex"
|
1074
1074
|
{
|
1075
1075
|
yyextra->start = yyextra->end;
|
1076
|
-
yyextra->end += yyleng;
|
1076
|
+
yyextra->end += (pos_t)yyleng;
|
1077
1077
|
return TOK_WORD;
|
1078
1078
|
}
|
1079
1079
|
YY_BREAK
|
@@ -1082,7 +1082,7 @@ YY_RULE_SETUP
|
|
1082
1082
|
#line 35 "tokenizer.lex"
|
1083
1083
|
{
|
1084
1084
|
yyextra->start = yyextra->end;
|
1085
|
-
yyextra->end += yyleng;
|
1085
|
+
yyextra->end += (pos_t)yyleng;
|
1086
1086
|
return TOK_WORD;
|
1087
1087
|
}
|
1088
1088
|
YY_BREAK
|
data/ext/whistlepig/whistlepig.h
CHANGED
@@ -9,12 +9,6 @@ static VALUE c_query;
|
|
9
9
|
static VALUE c_error;
|
10
10
|
static VALUE c_parseerror;
|
11
11
|
|
12
|
-
static char* strdup(const char* old) { // wtf stupid
|
13
|
-
size_t len = strlen(old) + 1;
|
14
|
-
char *new = malloc(len * sizeof(char));
|
15
|
-
return (char *)memcpy(new, old, len);
|
16
|
-
}
|
17
|
-
|
18
12
|
static void index_free(wp_index* index) {
|
19
13
|
wp_error* e = wp_index_free(index);
|
20
14
|
//printf("# index free at %p with error %p\n", index, e);
|
metadata
CHANGED
@@ -1,11 +1,12 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: whistlepig
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
+
hash: 15
|
4
5
|
prerelease: false
|
5
6
|
segments:
|
6
7
|
- 0
|
7
|
-
-
|
8
|
-
version: "0.
|
8
|
+
- 2
|
9
|
+
version: "0.2"
|
9
10
|
platform: ruby
|
10
11
|
authors:
|
11
12
|
- William Morgan
|
@@ -13,7 +14,7 @@ autorequire:
|
|
13
14
|
bindir: bin
|
14
15
|
cert_chain: []
|
15
16
|
|
16
|
-
date: 2011-02-
|
17
|
+
date: 2011-02-09 20:32:41 -08:00
|
17
18
|
default_executable:
|
18
19
|
dependencies: []
|
19
20
|
|
@@ -81,6 +82,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
81
82
|
requirements:
|
82
83
|
- - ">="
|
83
84
|
- !ruby/object:Gem::Version
|
85
|
+
hash: 3
|
84
86
|
segments:
|
85
87
|
- 0
|
86
88
|
version: "0"
|
@@ -89,6 +91,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
89
91
|
requirements:
|
90
92
|
- - ">="
|
91
93
|
- !ruby/object:Gem::Version
|
94
|
+
hash: 3
|
92
95
|
segments:
|
93
96
|
- 0
|
94
97
|
version: "0"
|