whistlepig 0.1 → 0.2

Sign up to get free protection for your applications and to get access to all the features.
data/README CHANGED
@@ -8,32 +8,33 @@ the frills, Whistlepig may be for you.
8
8
  Whistlepig is written in ANSI C99. It currently provides a C API and Ruby
9
9
  bindings.
10
10
 
11
- Latest version: 0.1, released 2010-02-08.
11
+ Latest version: 0.2, released 2011-02-09.
12
12
  Status: alpha
13
13
  News: http://all-thing.net/label/whistlepig/
14
14
  Homepage: http://masanjin.net/whistlepig/
15
+ Bug reports: http://github.com/wmorgan/whistlepig/issues
15
16
 
16
17
  = Getting it
17
18
 
18
- Tarball: http://masanjin.net/whistlepig/whistlepig-0.1.tar.gz
19
+ Tarball: http://masanjin.net/whistlepig/whistlepig-0.2.tar.gz
19
20
  Rubygem: gem install whistlepig
20
21
  Git: git clone git://github.com/wmorgan/whistlepig.git
21
22
 
22
23
  = Realtime search
23
24
 
24
25
  Roughly speaking, realtime search means:
25
- - documents are available to to queries immediately after indexing, without
26
- any further index merging steps; and
26
+ - documents are available to to queries immediately after indexing, without any
27
+ reindexing or index merging;
27
28
  - later documents are more important than earlier documents.
28
29
 
29
- Whistlepig takes these principles to an extreme. In particular:
30
- - It only returns documents in the reverse order to which they were added
31
- (i.e. LIFO order), and performs no ranking, reordering, or scoring.
30
+ Whistlepig takes these principles to an extreme.
31
+ - It only returns documents in the reverse (LIFO) order to which they were
32
+ added, and performs no ranking, reordering, or scoring.
32
33
  - It only supports incremental indexing. There is no notion of batch indexing
33
34
  or index merging.
34
35
  - It does not support document deletion or modification (except in the
35
36
  special case of labels; see below).
36
- - In only supports in-memory indexes.
37
+ - It only supports in-memory indexes.
37
38
 
38
39
  Features that Whistlepig does provide:
39
40
  - Incremental indexing. Updates to the index are immediately available to
@@ -42,10 +43,8 @@ Features that Whistlepig does provide:
42
43
  - A full query language and parser with conjunctions, disjunctions, phrases,
43
44
  negations, grouping, and nesting.
44
45
  - Labels: arbitrary tokens which can be added to and removed from documents
45
- at any point, and incorporated into search queries. (This is the only
46
- mutable aspect of a document once it has been indexed.)
47
- - Early query termination.
48
- - Resumable queries.
46
+ at any point, and incorporated into search queries.
47
+ - Early query termination and resumable queries.
49
48
  - A tiny, < 3 KLOC ANSI C99 implementation.
50
49
 
51
50
  == Synopsis (using Ruby bindings)
@@ -81,6 +80,6 @@ Features that Whistlepig does provide:
81
80
  Whistlepig is currently single-process and single-thread only. However, it is
82
81
  built with multi-process access in mind. Per-segment single-writer,
83
82
  multi-reader support is planned in the near future. Multi-writer support can be
84
- accomplished via index striping and is planned for the distant future.
83
+ accomplished via index striping and may be attempted in the distant future.
85
84
 
86
- Please send bug reports and comments to: wmorgan-whistlepig-design@masanjin.net.
85
+ Please send bug reports and comments to: wmorgan-whistlepig-readme@masanjin.net.
@@ -46,7 +46,7 @@ wp_entry* wp_entry_new() {
46
46
  return ret;
47
47
  }
48
48
 
49
- RAISING_STATIC(add_token(wp_entry* entry, const char* field, const char* term, int field_len, int term_len)) {
49
+ RAISING_STATIC(add_token(wp_entry* entry, const char* field, const char* term, size_t field_len, size_t term_len)) {
50
50
  fielded_term ft;
51
51
  int status;
52
52
 
@@ -89,7 +89,7 @@ uint32_t wp_entry_size(wp_entry* entry) {
89
89
 
90
90
  RAISING_STATIC(add_from_lexer(wp_entry* entry, yyscan_t* scanner, const char* field)) {
91
91
  int token_type;
92
- int field_len = strlen(field);
92
+ size_t field_len = strlen(field);
93
93
 
94
94
  do {
95
95
  token_type = yylex(*scanner);
@@ -1,6 +1,6 @@
1
1
  require 'mkmf'
2
2
 
3
- $CFLAGS = "-g -O3 -std=c99 $(cflags)"
3
+ $CFLAGS = "-g -O3 -std=c99 $(cflags) -D_ANSI_SOURCE"
4
4
 
5
5
  create_header
6
6
  create_makefile "whistlepigc"
@@ -1,3 +1,4 @@
1
+ #include <inttypes.h>
1
2
  #include <stdio.h>
2
3
  #include <sys/types.h>
3
4
  #include <sys/stat.h>
@@ -261,7 +262,7 @@ wp_error* wp_index_add_label(wp_index* index, const char* label, uint64_t doc_id
261
262
  else DEBUG("did not find doc %llu in segment %u", doc_id, i - 1);
262
263
  }
263
264
 
264
- if(!found) RAISE_ERROR("couldn't find doc id %llu", doc_id);
265
+ if(!found) RAISE_ERROR("couldn't find doc id %"PRIu64, doc_id);
265
266
 
266
267
  return NO_ERROR;
267
268
  }
@@ -279,7 +280,7 @@ wp_error* wp_index_remove_label(wp_index* index, const char* label, uint64_t doc
279
280
  else DEBUG("did not find doc %llu in segment %u", doc_id, i - 1);
280
281
  }
281
282
 
282
- if(!found) RAISE_ERROR("couldn't find doc id %llu", doc_id);
283
+ if(!found) RAISE_ERROR("couldn't find doc id %"PRIu64, doc_id);
283
284
 
284
285
  return NO_ERROR;
285
286
  }
@@ -292,3 +293,11 @@ uint64_t wp_index_num_docs(wp_index* index) {
292
293
 
293
294
  return ret;
294
295
  }
296
+
297
+ // insane. but i'm putting this here. not defined in c99. don't want to make a
298
+ // "utils.c" or "compat.c" or whatever just yet.
299
+ char* strdup(const char* old) {
300
+ size_t len = strlen(old) + 1;
301
+ char *new = malloc(len * sizeof(char));
302
+ return memcpy(new, old, len);
303
+ }
@@ -1,3 +1,8 @@
1
+ /* modified by william morgan to cast sizeof() properly.
2
+ otherwise it has trouble compiling on 64-bit platforms with
3
+ -Wshorten-64-to-32 enabled.
4
+ */
5
+
1
6
  /* The MIT License
2
7
 
3
8
  Copyright (c) 2008, by Attractive Chaos <attractivechaos@aol.co.uk>
@@ -102,10 +107,10 @@ static const uint32_t __ac_prime_list[__ac_HASH_PRIME_SIZE] =
102
107
  #define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)
103
108
  #define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)
104
109
  #define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3)
105
- #define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1)))
106
- #define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1)))
107
- #define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1)))
108
- #define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1))
110
+ #define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(uint32_t)(1ul<<((i&0xfU)<<1)))
111
+ #define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(uint32_t)(2ul<<((i&0xfU)<<1)))
112
+ #define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(uint32_t)(3ul<<((i&0xfU)<<1)))
113
+ #define __ac_set_isdel_true(flag, i) (flag[i>>4]|=(uint32_t)(1ul<<((i&0xfU)<<1)))
109
114
 
110
115
  static const double __ac_HASH_UPPER = 0.77;
111
116
 
@@ -13,7 +13,7 @@ wp_error* mmap_obj_create(mmap_obj* o, const char* magic, const char* pathname,
13
13
  o->fd = open(pathname, O_EXCL | O_CREAT | O_RDWR, 0640);
14
14
  if(o->fd == -1) RAISE_SYSERROR("cannot create %s", pathname);
15
15
 
16
- uint32_t size = initial_size + sizeof(mmap_obj_header);
16
+ uint32_t size = initial_size + (uint32_t)sizeof(mmap_obj_header);
17
17
  DEBUG("creating %s with %u + %u = %u bytes for %s object", pathname, initial_size, sizeof(mmap_obj_header), size, magic);
18
18
  lseek(o->fd, size - 1, SEEK_SET);
19
19
  ssize_t num_bytes = write(o->fd, "", 1);
@@ -39,7 +39,7 @@ wp_error* mmap_obj_load(mmap_obj* o, const char* magic, const char* pathname) {
39
39
 
40
40
  RELAY_ERROR(validate(o->header, magic));
41
41
 
42
- uint32_t size = o->header->size + sizeof(mmap_obj_header);
42
+ uint32_t size = o->header->size + (uint32_t)sizeof(mmap_obj_header);
43
43
  DEBUG("full size is %u bytes (including %u-byte header)", size, sizeof(mmap_obj_header));
44
44
  if(munmap(o->header, sizeof(mmap_obj_header)) == -1) RAISE_SYSERROR("munmap");
45
45
 
@@ -54,7 +54,7 @@ wp_error* mmap_obj_resize(mmap_obj* o, uint32_t data_size) {
54
54
  DEBUG("going to expand from %u to %u bytes. current header is at %p", o->header->size, data_size, o->header);
55
55
 
56
56
  if(munmap(o->header, sizeof(mmap_obj_header) + o->header->size) == -1) RAISE_SYSERROR("munmap");
57
- uint32_t size = data_size + sizeof(mmap_obj_header);
57
+ uint32_t size = data_size + (uint32_t)sizeof(mmap_obj_header);
58
58
 
59
59
  lseek(o->fd, size - 1, SEEK_SET);
60
60
  ssize_t num_bytes = write(o->fd, "", 1);
@@ -677,6 +677,7 @@ static yyconst flex_int32_t yy_rule_can_match_eol[7] =
677
677
  #line 1 "query-parser.lex"
678
678
  #line 2 "query-parser.lex"
679
679
  #include <string.h>
680
+ #include "whistlepig.h"
680
681
  #include "query-parser.h"
681
682
  #include "query-parser.tab.h"
682
683
 
@@ -694,7 +695,7 @@ static yyconst flex_int32_t yy_rule_can_match_eol[7] =
694
695
  } \
695
696
  }
696
697
 
697
- #line 698 "query-parser.lex.c"
698
+ #line 699 "query-parser.lex.c"
698
699
 
699
700
  #define INITIAL 0
700
701
 
@@ -924,10 +925,10 @@ YY_DECL
924
925
  register int yy_act;
925
926
  struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
926
927
 
927
- #line 31 "query-parser.lex"
928
+ #line 32 "query-parser.lex"
928
929
 
929
930
 
930
- #line 931 "query-parser.lex.c"
931
+ #line 932 "query-parser.lex.c"
931
932
 
932
933
  yylval = yylval_param;
933
934
 
@@ -1023,12 +1024,12 @@ do_action: /* This label is used only to access EOF actions. */
1023
1024
 
1024
1025
  case 1:
1025
1026
  YY_RULE_SETUP
1026
- #line 33 "query-parser.lex"
1027
+ #line 34 "query-parser.lex"
1027
1028
  return OR;
1028
1029
  YY_BREAK
1029
1030
  case 2:
1030
1031
  YY_RULE_SETUP
1031
- #line 35 "query-parser.lex"
1032
+ #line 36 "query-parser.lex"
1032
1033
  {
1033
1034
  yylval->string = strdup(yytext);
1034
1035
  return WORD;
@@ -1036,7 +1037,7 @@ YY_RULE_SETUP
1036
1037
  YY_BREAK
1037
1038
  case 3:
1038
1039
  YY_RULE_SETUP
1039
- #line 40 "query-parser.lex"
1040
+ #line 41 "query-parser.lex"
1040
1041
  {
1041
1042
  yylval->string = strdup(yytext);
1042
1043
  return WORD;
@@ -1045,20 +1046,20 @@ YY_RULE_SETUP
1045
1046
  case 4:
1046
1047
  /* rule 4 can match eol */
1047
1048
  YY_RULE_SETUP
1048
- #line 45 "query-parser.lex"
1049
+ #line 46 "query-parser.lex"
1049
1050
  { } ; // nothing
1050
1051
  YY_BREAK
1051
1052
  case 5:
1052
1053
  YY_RULE_SETUP
1053
- #line 47 "query-parser.lex"
1054
+ #line 48 "query-parser.lex"
1054
1055
  return yytext[0];
1055
1056
  YY_BREAK
1056
1057
  case 6:
1057
1058
  YY_RULE_SETUP
1058
- #line 49 "query-parser.lex"
1059
+ #line 50 "query-parser.lex"
1059
1060
  ECHO;
1060
1061
  YY_BREAK
1061
- #line 1062 "query-parser.lex.c"
1062
+ #line 1063 "query-parser.lex.c"
1062
1063
  case YY_STATE_EOF(INITIAL):
1063
1064
  yyterminate();
1064
1065
 
@@ -2242,7 +2243,7 @@ void query_parser_free (void * ptr , yyscan_t yyscanner)
2242
2243
 
2243
2244
  #define YYTABLES_NAME "yytables"
2244
2245
 
2245
- #line 49 "query-parser.lex"
2246
+ #line 50 "query-parser.lex"
2246
2247
 
2247
2248
 
2248
2249
 
@@ -351,7 +351,7 @@ extern int query_parser_lex \
351
351
  #undef YY_DECL
352
352
  #endif
353
353
 
354
- #line 49 "query-parser.lex"
354
+ #line 50 "query-parser.lex"
355
355
 
356
356
 
357
357
  #line 358 "query-parser.lex.h"
@@ -1,3 +1,4 @@
1
+ #include "whistlepig.h"
1
2
  #include "query.h"
2
3
 
3
4
  static wp_query* wp_query_new() {
@@ -11,12 +12,6 @@ static wp_query* wp_query_new() {
11
12
  return ret;
12
13
  }
13
14
 
14
- static char* strdup(const char* old) { // sigh... not in c99
15
- size_t len = strlen(old) + 1;
16
- char *new = malloc(len * sizeof(char));
17
- return memcpy(new, old, len);
18
- }
19
-
20
15
  wp_query* wp_query_clone(wp_query* other) {
21
16
  wp_query* ret = malloc(sizeof(wp_query));
22
17
  ret->type = other->type;
@@ -128,13 +123,13 @@ static int subquery_to_s(wp_query* q, size_t n, char* buf) {
128
123
  buf += wp_query_to_s(child, n - (buf - orig_buf), buf);
129
124
  }
130
125
 
131
- return buf - orig_buf;
126
+ return (int)(buf - orig_buf);
132
127
  }
133
128
 
134
129
  #define min(a, b) (a < b ? a : b)
135
130
 
136
- int wp_query_to_s(wp_query* q, size_t n, char* buf) {
137
- int ret;
131
+ size_t wp_query_to_s(wp_query* q, size_t n, char* buf) {
132
+ size_t ret;
138
133
  char* orig_buf = buf;
139
134
 
140
135
  if(q->type == WP_QUERY_EMPTY) {
@@ -73,6 +73,6 @@ wp_query* wp_query_set_all_child_fields(wp_query* q, const char* field);
73
73
  void wp_query_free(wp_query* q);
74
74
 
75
75
  // public: build a string representation of a query by writing at most n chars to buf
76
- int wp_query_to_s(wp_query* q, size_t n, char* buf);
76
+ size_t wp_query_to_s(wp_query* q, size_t n, char* buf);
77
77
 
78
78
  #endif
@@ -241,8 +241,8 @@ wp_error* wp_segment_ensure_fit(wp_segment* seg, uint32_t postings_bytes, uint32
241
241
 
242
242
  static uint32_t size_of(uint32_t num_positions, pos_t positions[]) {
243
243
  (void)positions;
244
- uint32_t position_size = sizeof(pos_t) * num_positions;
245
- uint32_t size = sizeof(posting) - sizeof(pos_t*) + position_size;
244
+ uint32_t position_size = (uint32_t)sizeof(pos_t) * num_positions;
245
+ uint32_t size = (uint32_t)sizeof(posting) - (uint32_t)sizeof(pos_t*) + position_size;
246
246
 
247
247
  return size;
248
248
  }
@@ -253,23 +253,22 @@ wp_error* wp_segment_sizeof_posarray(wp_segment* seg, uint32_t num_positions, po
253
253
  return NO_ERROR;
254
254
  }
255
255
 
256
- #define BITMASK 0x7f
257
-
256
+ #define VALUE_BITMASK 0x7f
258
257
  RAISING_STATIC(write_multibyte(uint8_t* location, uint32_t val, uint32_t* size)) {
259
258
  //printf("xx writing %u to position %p as:\n", val, location);
260
259
  uint8_t* start = location;
261
260
 
262
- while(val > BITMASK) {
263
- uint8_t c = (val & BITMASK) | 0x80;
261
+ while(val > VALUE_BITMASK) {
262
+ uint8_t c = (val & VALUE_BITMASK) | 0x80;
264
263
  *location = c;
265
264
  //printf("xx %d = %d | %d at %p\n", c, val & BITMASK, 0x80, location);
266
265
  location++;
267
266
  val >>= 7;
268
267
  }
269
- uint8_t c = (val & BITMASK);
268
+ uint8_t c = (val & VALUE_BITMASK);
270
269
  *location = c;
271
270
  //printf("xx %d at %p\n", c, location);
272
- *size = location + 1 - start;
271
+ *size = (uint32_t)(location + 1 - start);
273
272
  //printf("xx total %u bytes\n", *size);
274
273
  return NO_ERROR;
275
274
  }
@@ -287,7 +286,7 @@ RAISING_STATIC(read_multibyte(uint8_t* location, uint32_t* val, uint32_t* size))
287
286
  }
288
287
  *val |= *location << shift;
289
288
  //printf("yy read final byte %d at %p\n", *location, location);
290
- *size = location + 1 - start;
289
+ *size = (uint32_t)(location + 1 - start);
291
290
  //printf("yy total %d bytes, val = %d\n\n", *size, *val);
292
291
  return NO_ERROR;
293
292
  }
@@ -522,7 +521,7 @@ wp_error* wp_segment_add_label(wp_segment* s, const char* label, docid_t doc_id)
522
521
  po->doc_id = doc_id;
523
522
  po->next_offset = next_offset;
524
523
 
525
- pr->postings_head += sizeof(label_posting);
524
+ pr->postings_head += (uint32_t)sizeof(label_posting);
526
525
  DEBUG("label postings list head now at %u", pr->postings_head);
527
526
 
528
527
  // really finally, update either the previous offset or the tail pointer
@@ -15,10 +15,10 @@ static const uint32_t prime_list[] = {
15
15
  #define isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)
16
16
  #define isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)
17
17
  #define iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3)
18
- #define set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1)))
19
- #define set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1)))
20
- #define set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1)))
21
- #define set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1))
18
+ #define set_isdel_false(flag, i) (flag[i>>4]&=~(uint32_t)(1ul<<((i&0xfU)<<1)))
19
+ #define set_isempty_false(flag, i) (flag[i>>4]&=~(uint32_t)(2ul<<((i&0xfU)<<1)))
20
+ #define set_isboth_false(flag, i) (flag[i>>4]&=~(uint32_t)(3ul<<((i&0xfU)<<1)))
21
+ #define set_isdel_true(flag, i) (flag[i>>4]|=(uint32_t)(1ul<<((i&0xfU)<<1)))
22
22
 
23
23
  static const double HASH_UPPER = 0.77;
24
24
 
@@ -234,9 +234,9 @@ int stringmap_needs_bump(stringmap* h) {
234
234
  // ((n_buckets >> 4) + 1) uint32_t's for the flags
235
235
  // n_buckets uint32_t for the keys
236
236
  static uint32_t size(uint32_t n_buckets) {
237
- uint32_t size = sizeof(stringmap) +
238
- (((n_buckets >> 4) + 1) * sizeof(uint32_t)) +
239
- (n_buckets * sizeof(uint32_t));
237
+ uint32_t size = (uint32_t)sizeof(stringmap) +
238
+ (((n_buckets >> 4) + 1) * (uint32_t)sizeof(uint32_t)) +
239
+ (n_buckets * (uint32_t)sizeof(uint32_t));
240
240
  return size;
241
241
  }
242
242
 
@@ -6,11 +6,11 @@ void stringpool_init(stringpool* p) {
6
6
  }
7
7
 
8
8
  uint32_t stringpool_size(stringpool* p) {
9
- return sizeof(stringpool) + (p->size * sizeof(char));
9
+ return (uint32_t)sizeof(stringpool) + (p->size * (uint32_t)sizeof(char));
10
10
  }
11
11
 
12
12
  uint32_t stringpool_add(stringpool* p, const char* s) {
13
- int len = strlen(s) + 1;
13
+ uint32_t len = (uint32_t)strlen(s) + 1;
14
14
  if((p->next + len) >= p->size) {
15
15
  DEBUG("out of space in string pool for %s (len %d, next %d, size %d)", s, len, p->next, p->size);
16
16
  return (uint32_t)-1;
@@ -27,11 +27,11 @@ int stringpool_needs_bump(stringpool* p) {
27
27
  }
28
28
 
29
29
  uint32_t stringpool_next_size(stringpool* p) {
30
- return sizeof(stringpool) + (2 * (p->size == 0 ? 1 : p->size) * sizeof(char));
30
+ return (uint32_t)sizeof(stringpool) + (2 * (p->size == 0 ? 1 : p->size) * (uint32_t)sizeof(char));
31
31
  }
32
32
 
33
33
  uint32_t stringpool_initial_size() {
34
- return sizeof(stringpool) + INITIAL_POOL_SIZE;
34
+ return (uint32_t)sizeof(stringpool) + INITIAL_POOL_SIZE;
35
35
  }
36
36
 
37
37
  void stringpool_bump_size(stringpool* p) {
@@ -15,10 +15,10 @@ static const uint32_t prime_list[] = {
15
15
  #define isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)
16
16
  #define isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)
17
17
  #define iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3)
18
- #define set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1)))
19
- #define set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1)))
20
- #define set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1)))
21
- #define set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1))
18
+ #define set_isdel_false(flag, i) (flag[i>>4]&=~(uint32_t)(1ul<<((i&0xfU)<<1)))
19
+ #define set_isempty_false(flag, i) (flag[i>>4]&=~(uint32_t)(2ul<<((i&0xfU)<<1)))
20
+ #define set_isboth_false(flag, i) (flag[i>>4]&=~(uint32_t)(3ul<<((i&0xfU)<<1)))
21
+ #define set_isdel_true(flag, i) (flag[i>>4]|=(uint32_t)(1ul<<((i&0xfU)<<1)))
22
22
 
23
23
  static const double HASH_UPPER = 0.77;
24
24
 
@@ -264,10 +264,10 @@ int termhash_needs_bump(termhash* h) {
264
264
  // n_buckets terms for the keys
265
265
  // n_buckets uint32_t's for the vals (offsets into postings lists)
266
266
  static uint32_t size(uint32_t n_buckets) {
267
- uint32_t size = sizeof(termhash) +
268
- (((n_buckets >> 4) + 1) * sizeof(uint32_t)) +
269
- (n_buckets * sizeof(term)) +
270
- (n_buckets * sizeof(uint32_t));
267
+ uint32_t size = (uint32_t)sizeof(termhash) +
268
+ (((n_buckets >> 4) + 1) * (uint32_t)sizeof(uint32_t)) +
269
+ (n_buckets * (uint32_t)sizeof(term)) +
270
+ (n_buckets * (uint32_t)sizeof(uint32_t));
271
271
 
272
272
  DEBUG("size of a termhash with %u buckets is %lu + %lu + %lu + %lu = %u",
273
273
  n_buckets,
@@ -1064,7 +1064,7 @@ YY_RULE_SETUP
1064
1064
  #line 23 "tokenizer.lex"
1065
1065
  {
1066
1066
  yyextra->start = yyextra->end;
1067
- yyextra->end += yyleng;
1067
+ yyextra->end += (pos_t)yyleng;
1068
1068
  return TOK_NUMBER;
1069
1069
  }
1070
1070
  YY_BREAK
@@ -1073,7 +1073,7 @@ YY_RULE_SETUP
1073
1073
  #line 29 "tokenizer.lex"
1074
1074
  {
1075
1075
  yyextra->start = yyextra->end;
1076
- yyextra->end += yyleng;
1076
+ yyextra->end += (pos_t)yyleng;
1077
1077
  return TOK_WORD;
1078
1078
  }
1079
1079
  YY_BREAK
@@ -1082,7 +1082,7 @@ YY_RULE_SETUP
1082
1082
  #line 35 "tokenizer.lex"
1083
1083
  {
1084
1084
  yyextra->start = yyextra->end;
1085
- yyextra->end += yyleng;
1085
+ yyextra->end += (pos_t)yyleng;
1086
1086
  return TOK_WORD;
1087
1087
  }
1088
1088
  YY_BREAK
@@ -12,4 +12,7 @@
12
12
  #include "query-parser.h"
13
13
  #include "error.h"
14
14
 
15
+ // see comments in index.c
16
+ char* strdup(const char* old);
17
+
15
18
  #endif
@@ -9,12 +9,6 @@ static VALUE c_query;
9
9
  static VALUE c_error;
10
10
  static VALUE c_parseerror;
11
11
 
12
- static char* strdup(const char* old) { // wtf stupid
13
- size_t len = strlen(old) + 1;
14
- char *new = malloc(len * sizeof(char));
15
- return (char *)memcpy(new, old, len);
16
- }
17
-
18
12
  static void index_free(wp_index* index) {
19
13
  wp_error* e = wp_index_free(index);
20
14
  //printf("# index free at %p with error %p\n", index, e);
metadata CHANGED
@@ -1,11 +1,12 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: whistlepig
3
3
  version: !ruby/object:Gem::Version
4
+ hash: 15
4
5
  prerelease: false
5
6
  segments:
6
7
  - 0
7
- - 1
8
- version: "0.1"
8
+ - 2
9
+ version: "0.2"
9
10
  platform: ruby
10
11
  authors:
11
12
  - William Morgan
@@ -13,7 +14,7 @@ autorequire:
13
14
  bindir: bin
14
15
  cert_chain: []
15
16
 
16
- date: 2011-02-08 21:42:39 -08:00
17
+ date: 2011-02-09 20:32:41 -08:00
17
18
  default_executable:
18
19
  dependencies: []
19
20
 
@@ -81,6 +82,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
81
82
  requirements:
82
83
  - - ">="
83
84
  - !ruby/object:Gem::Version
85
+ hash: 3
84
86
  segments:
85
87
  - 0
86
88
  version: "0"
@@ -89,6 +91,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
89
91
  requirements:
90
92
  - - ">="
91
93
  - !ruby/object:Gem::Version
94
+ hash: 3
92
95
  segments:
93
96
  - 0
94
97
  version: "0"