whistlepig 0.1 → 0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README CHANGED
@@ -8,32 +8,33 @@ the frills, Whistlepig may be for you.
8
8
  Whistlepig is written in ANSI C99. It currently provides a C API and Ruby
9
9
  bindings.
10
10
 
11
- Latest version: 0.1, released 2010-02-08.
11
+ Latest version: 0.2, released 2011-02-09.
12
12
  Status: alpha
13
13
  News: http://all-thing.net/label/whistlepig/
14
14
  Homepage: http://masanjin.net/whistlepig/
15
+ Bug reports: http://github.com/wmorgan/whistlepig/issues
15
16
 
16
17
  = Getting it
17
18
 
18
- Tarball: http://masanjin.net/whistlepig/whistlepig-0.1.tar.gz
19
+ Tarball: http://masanjin.net/whistlepig/whistlepig-0.2.tar.gz
19
20
  Rubygem: gem install whistlepig
20
21
  Git: git clone git://github.com/wmorgan/whistlepig.git
21
22
 
22
23
  = Realtime search
23
24
 
24
25
  Roughly speaking, realtime search means:
25
- - documents are available to to queries immediately after indexing, without
26
- any further index merging steps; and
26
+ - documents are available to to queries immediately after indexing, without any
27
+ reindexing or index merging;
27
28
  - later documents are more important than earlier documents.
28
29
 
29
- Whistlepig takes these principles to an extreme. In particular:
30
- - It only returns documents in the reverse order to which they were added
31
- (i.e. LIFO order), and performs no ranking, reordering, or scoring.
30
+ Whistlepig takes these principles to an extreme.
31
+ - It only returns documents in the reverse (LIFO) order to which they were
32
+ added, and performs no ranking, reordering, or scoring.
32
33
  - It only supports incremental indexing. There is no notion of batch indexing
33
34
  or index merging.
34
35
  - It does not support document deletion or modification (except in the
35
36
  special case of labels; see below).
36
- - In only supports in-memory indexes.
37
+ - It only supports in-memory indexes.
37
38
 
38
39
  Features that Whistlepig does provide:
39
40
  - Incremental indexing. Updates to the index are immediately available to
@@ -42,10 +43,8 @@ Features that Whistlepig does provide:
42
43
  - A full query language and parser with conjunctions, disjunctions, phrases,
43
44
  negations, grouping, and nesting.
44
45
  - Labels: arbitrary tokens which can be added to and removed from documents
45
- at any point, and incorporated into search queries. (This is the only
46
- mutable aspect of a document once it has been indexed.)
47
- - Early query termination.
48
- - Resumable queries.
46
+ at any point, and incorporated into search queries.
47
+ - Early query termination and resumable queries.
49
48
  - A tiny, < 3 KLOC ANSI C99 implementation.
50
49
 
51
50
  == Synopsis (using Ruby bindings)
@@ -81,6 +80,6 @@ Features that Whistlepig does provide:
81
80
  Whistlepig is currently single-process and single-thread only. However, it is
82
81
  built with multi-process access in mind. Per-segment single-writer,
83
82
  multi-reader support is planned in the near future. Multi-writer support can be
84
- accomplished via index striping and is planned for the distant future.
83
+ accomplished via index striping and may be attempted in the distant future.
85
84
 
86
- Please send bug reports and comments to: wmorgan-whistlepig-design@masanjin.net.
85
+ Please send bug reports and comments to: wmorgan-whistlepig-readme@masanjin.net.
@@ -46,7 +46,7 @@ wp_entry* wp_entry_new() {
46
46
  return ret;
47
47
  }
48
48
 
49
- RAISING_STATIC(add_token(wp_entry* entry, const char* field, const char* term, int field_len, int term_len)) {
49
+ RAISING_STATIC(add_token(wp_entry* entry, const char* field, const char* term, size_t field_len, size_t term_len)) {
50
50
  fielded_term ft;
51
51
  int status;
52
52
 
@@ -89,7 +89,7 @@ uint32_t wp_entry_size(wp_entry* entry) {
89
89
 
90
90
  RAISING_STATIC(add_from_lexer(wp_entry* entry, yyscan_t* scanner, const char* field)) {
91
91
  int token_type;
92
- int field_len = strlen(field);
92
+ size_t field_len = strlen(field);
93
93
 
94
94
  do {
95
95
  token_type = yylex(*scanner);
@@ -1,6 +1,6 @@
1
1
  require 'mkmf'
2
2
 
3
- $CFLAGS = "-g -O3 -std=c99 $(cflags)"
3
+ $CFLAGS = "-g -O3 -std=c99 $(cflags) -D_ANSI_SOURCE"
4
4
 
5
5
  create_header
6
6
  create_makefile "whistlepigc"
@@ -1,3 +1,4 @@
1
+ #include <inttypes.h>
1
2
  #include <stdio.h>
2
3
  #include <sys/types.h>
3
4
  #include <sys/stat.h>
@@ -261,7 +262,7 @@ wp_error* wp_index_add_label(wp_index* index, const char* label, uint64_t doc_id
261
262
  else DEBUG("did not find doc %llu in segment %u", doc_id, i - 1);
262
263
  }
263
264
 
264
- if(!found) RAISE_ERROR("couldn't find doc id %llu", doc_id);
265
+ if(!found) RAISE_ERROR("couldn't find doc id %"PRIu64, doc_id);
265
266
 
266
267
  return NO_ERROR;
267
268
  }
@@ -279,7 +280,7 @@ wp_error* wp_index_remove_label(wp_index* index, const char* label, uint64_t doc
279
280
  else DEBUG("did not find doc %llu in segment %u", doc_id, i - 1);
280
281
  }
281
282
 
282
- if(!found) RAISE_ERROR("couldn't find doc id %llu", doc_id);
283
+ if(!found) RAISE_ERROR("couldn't find doc id %"PRIu64, doc_id);
283
284
 
284
285
  return NO_ERROR;
285
286
  }
@@ -292,3 +293,11 @@ uint64_t wp_index_num_docs(wp_index* index) {
292
293
 
293
294
  return ret;
294
295
  }
296
+
297
+ // insane. but i'm putting this here. not defined in c99. don't want to make a
298
+ // "utils.c" or "compat.c" or whatever just yet.
299
+ char* strdup(const char* old) {
300
+ size_t len = strlen(old) + 1;
301
+ char *new = malloc(len * sizeof(char));
302
+ return memcpy(new, old, len);
303
+ }
@@ -1,3 +1,8 @@
1
+ /* modified by william morgan to cast sizeof() properly.
2
+ otherwise it has trouble compiling on 64-bit platforms with
3
+ -Wshorten-64-to-32 enabled.
4
+ */
5
+
1
6
  /* The MIT License
2
7
 
3
8
  Copyright (c) 2008, by Attractive Chaos <attractivechaos@aol.co.uk>
@@ -102,10 +107,10 @@ static const uint32_t __ac_prime_list[__ac_HASH_PRIME_SIZE] =
102
107
  #define __ac_isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)
103
108
  #define __ac_isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)
104
109
  #define __ac_iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3)
105
- #define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1)))
106
- #define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1)))
107
- #define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1)))
108
- #define __ac_set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1))
110
+ #define __ac_set_isdel_false(flag, i) (flag[i>>4]&=~(uint32_t)(1ul<<((i&0xfU)<<1)))
111
+ #define __ac_set_isempty_false(flag, i) (flag[i>>4]&=~(uint32_t)(2ul<<((i&0xfU)<<1)))
112
+ #define __ac_set_isboth_false(flag, i) (flag[i>>4]&=~(uint32_t)(3ul<<((i&0xfU)<<1)))
113
+ #define __ac_set_isdel_true(flag, i) (flag[i>>4]|=(uint32_t)(1ul<<((i&0xfU)<<1)))
109
114
 
110
115
  static const double __ac_HASH_UPPER = 0.77;
111
116
 
@@ -13,7 +13,7 @@ wp_error* mmap_obj_create(mmap_obj* o, const char* magic, const char* pathname,
13
13
  o->fd = open(pathname, O_EXCL | O_CREAT | O_RDWR, 0640);
14
14
  if(o->fd == -1) RAISE_SYSERROR("cannot create %s", pathname);
15
15
 
16
- uint32_t size = initial_size + sizeof(mmap_obj_header);
16
+ uint32_t size = initial_size + (uint32_t)sizeof(mmap_obj_header);
17
17
  DEBUG("creating %s with %u + %u = %u bytes for %s object", pathname, initial_size, sizeof(mmap_obj_header), size, magic);
18
18
  lseek(o->fd, size - 1, SEEK_SET);
19
19
  ssize_t num_bytes = write(o->fd, "", 1);
@@ -39,7 +39,7 @@ wp_error* mmap_obj_load(mmap_obj* o, const char* magic, const char* pathname) {
39
39
 
40
40
  RELAY_ERROR(validate(o->header, magic));
41
41
 
42
- uint32_t size = o->header->size + sizeof(mmap_obj_header);
42
+ uint32_t size = o->header->size + (uint32_t)sizeof(mmap_obj_header);
43
43
  DEBUG("full size is %u bytes (including %u-byte header)", size, sizeof(mmap_obj_header));
44
44
  if(munmap(o->header, sizeof(mmap_obj_header)) == -1) RAISE_SYSERROR("munmap");
45
45
 
@@ -54,7 +54,7 @@ wp_error* mmap_obj_resize(mmap_obj* o, uint32_t data_size) {
54
54
  DEBUG("going to expand from %u to %u bytes. current header is at %p", o->header->size, data_size, o->header);
55
55
 
56
56
  if(munmap(o->header, sizeof(mmap_obj_header) + o->header->size) == -1) RAISE_SYSERROR("munmap");
57
- uint32_t size = data_size + sizeof(mmap_obj_header);
57
+ uint32_t size = data_size + (uint32_t)sizeof(mmap_obj_header);
58
58
 
59
59
  lseek(o->fd, size - 1, SEEK_SET);
60
60
  ssize_t num_bytes = write(o->fd, "", 1);
@@ -677,6 +677,7 @@ static yyconst flex_int32_t yy_rule_can_match_eol[7] =
677
677
  #line 1 "query-parser.lex"
678
678
  #line 2 "query-parser.lex"
679
679
  #include <string.h>
680
+ #include "whistlepig.h"
680
681
  #include "query-parser.h"
681
682
  #include "query-parser.tab.h"
682
683
 
@@ -694,7 +695,7 @@ static yyconst flex_int32_t yy_rule_can_match_eol[7] =
694
695
  } \
695
696
  }
696
697
 
697
- #line 698 "query-parser.lex.c"
698
+ #line 699 "query-parser.lex.c"
698
699
 
699
700
  #define INITIAL 0
700
701
 
@@ -924,10 +925,10 @@ YY_DECL
924
925
  register int yy_act;
925
926
  struct yyguts_t * yyg = (struct yyguts_t*)yyscanner;
926
927
 
927
- #line 31 "query-parser.lex"
928
+ #line 32 "query-parser.lex"
928
929
 
929
930
 
930
- #line 931 "query-parser.lex.c"
931
+ #line 932 "query-parser.lex.c"
931
932
 
932
933
  yylval = yylval_param;
933
934
 
@@ -1023,12 +1024,12 @@ do_action: /* This label is used only to access EOF actions. */
1023
1024
 
1024
1025
  case 1:
1025
1026
  YY_RULE_SETUP
1026
- #line 33 "query-parser.lex"
1027
+ #line 34 "query-parser.lex"
1027
1028
  return OR;
1028
1029
  YY_BREAK
1029
1030
  case 2:
1030
1031
  YY_RULE_SETUP
1031
- #line 35 "query-parser.lex"
1032
+ #line 36 "query-parser.lex"
1032
1033
  {
1033
1034
  yylval->string = strdup(yytext);
1034
1035
  return WORD;
@@ -1036,7 +1037,7 @@ YY_RULE_SETUP
1036
1037
  YY_BREAK
1037
1038
  case 3:
1038
1039
  YY_RULE_SETUP
1039
- #line 40 "query-parser.lex"
1040
+ #line 41 "query-parser.lex"
1040
1041
  {
1041
1042
  yylval->string = strdup(yytext);
1042
1043
  return WORD;
@@ -1045,20 +1046,20 @@ YY_RULE_SETUP
1045
1046
  case 4:
1046
1047
  /* rule 4 can match eol */
1047
1048
  YY_RULE_SETUP
1048
- #line 45 "query-parser.lex"
1049
+ #line 46 "query-parser.lex"
1049
1050
  { } ; // nothing
1050
1051
  YY_BREAK
1051
1052
  case 5:
1052
1053
  YY_RULE_SETUP
1053
- #line 47 "query-parser.lex"
1054
+ #line 48 "query-parser.lex"
1054
1055
  return yytext[0];
1055
1056
  YY_BREAK
1056
1057
  case 6:
1057
1058
  YY_RULE_SETUP
1058
- #line 49 "query-parser.lex"
1059
+ #line 50 "query-parser.lex"
1059
1060
  ECHO;
1060
1061
  YY_BREAK
1061
- #line 1062 "query-parser.lex.c"
1062
+ #line 1063 "query-parser.lex.c"
1062
1063
  case YY_STATE_EOF(INITIAL):
1063
1064
  yyterminate();
1064
1065
 
@@ -2242,7 +2243,7 @@ void query_parser_free (void * ptr , yyscan_t yyscanner)
2242
2243
 
2243
2244
  #define YYTABLES_NAME "yytables"
2244
2245
 
2245
- #line 49 "query-parser.lex"
2246
+ #line 50 "query-parser.lex"
2246
2247
 
2247
2248
 
2248
2249
 
@@ -351,7 +351,7 @@ extern int query_parser_lex \
351
351
  #undef YY_DECL
352
352
  #endif
353
353
 
354
- #line 49 "query-parser.lex"
354
+ #line 50 "query-parser.lex"
355
355
 
356
356
 
357
357
  #line 358 "query-parser.lex.h"
@@ -1,3 +1,4 @@
1
+ #include "whistlepig.h"
1
2
  #include "query.h"
2
3
 
3
4
  static wp_query* wp_query_new() {
@@ -11,12 +12,6 @@ static wp_query* wp_query_new() {
11
12
  return ret;
12
13
  }
13
14
 
14
- static char* strdup(const char* old) { // sigh... not in c99
15
- size_t len = strlen(old) + 1;
16
- char *new = malloc(len * sizeof(char));
17
- return memcpy(new, old, len);
18
- }
19
-
20
15
  wp_query* wp_query_clone(wp_query* other) {
21
16
  wp_query* ret = malloc(sizeof(wp_query));
22
17
  ret->type = other->type;
@@ -128,13 +123,13 @@ static int subquery_to_s(wp_query* q, size_t n, char* buf) {
128
123
  buf += wp_query_to_s(child, n - (buf - orig_buf), buf);
129
124
  }
130
125
 
131
- return buf - orig_buf;
126
+ return (int)(buf - orig_buf);
132
127
  }
133
128
 
134
129
  #define min(a, b) (a < b ? a : b)
135
130
 
136
- int wp_query_to_s(wp_query* q, size_t n, char* buf) {
137
- int ret;
131
+ size_t wp_query_to_s(wp_query* q, size_t n, char* buf) {
132
+ size_t ret;
138
133
  char* orig_buf = buf;
139
134
 
140
135
  if(q->type == WP_QUERY_EMPTY) {
@@ -73,6 +73,6 @@ wp_query* wp_query_set_all_child_fields(wp_query* q, const char* field);
73
73
  void wp_query_free(wp_query* q);
74
74
 
75
75
  // public: build a string representation of a query by writing at most n chars to buf
76
- int wp_query_to_s(wp_query* q, size_t n, char* buf);
76
+ size_t wp_query_to_s(wp_query* q, size_t n, char* buf);
77
77
 
78
78
  #endif
@@ -241,8 +241,8 @@ wp_error* wp_segment_ensure_fit(wp_segment* seg, uint32_t postings_bytes, uint32
241
241
 
242
242
  static uint32_t size_of(uint32_t num_positions, pos_t positions[]) {
243
243
  (void)positions;
244
- uint32_t position_size = sizeof(pos_t) * num_positions;
245
- uint32_t size = sizeof(posting) - sizeof(pos_t*) + position_size;
244
+ uint32_t position_size = (uint32_t)sizeof(pos_t) * num_positions;
245
+ uint32_t size = (uint32_t)sizeof(posting) - (uint32_t)sizeof(pos_t*) + position_size;
246
246
 
247
247
  return size;
248
248
  }
@@ -253,23 +253,22 @@ wp_error* wp_segment_sizeof_posarray(wp_segment* seg, uint32_t num_positions, po
253
253
  return NO_ERROR;
254
254
  }
255
255
 
256
- #define BITMASK 0x7f
257
-
256
+ #define VALUE_BITMASK 0x7f
258
257
  RAISING_STATIC(write_multibyte(uint8_t* location, uint32_t val, uint32_t* size)) {
259
258
  //printf("xx writing %u to position %p as:\n", val, location);
260
259
  uint8_t* start = location;
261
260
 
262
- while(val > BITMASK) {
263
- uint8_t c = (val & BITMASK) | 0x80;
261
+ while(val > VALUE_BITMASK) {
262
+ uint8_t c = (val & VALUE_BITMASK) | 0x80;
264
263
  *location = c;
265
264
  //printf("xx %d = %d | %d at %p\n", c, val & BITMASK, 0x80, location);
266
265
  location++;
267
266
  val >>= 7;
268
267
  }
269
- uint8_t c = (val & BITMASK);
268
+ uint8_t c = (val & VALUE_BITMASK);
270
269
  *location = c;
271
270
  //printf("xx %d at %p\n", c, location);
272
- *size = location + 1 - start;
271
+ *size = (uint32_t)(location + 1 - start);
273
272
  //printf("xx total %u bytes\n", *size);
274
273
  return NO_ERROR;
275
274
  }
@@ -287,7 +286,7 @@ RAISING_STATIC(read_multibyte(uint8_t* location, uint32_t* val, uint32_t* size))
287
286
  }
288
287
  *val |= *location << shift;
289
288
  //printf("yy read final byte %d at %p\n", *location, location);
290
- *size = location + 1 - start;
289
+ *size = (uint32_t)(location + 1 - start);
291
290
  //printf("yy total %d bytes, val = %d\n\n", *size, *val);
292
291
  return NO_ERROR;
293
292
  }
@@ -522,7 +521,7 @@ wp_error* wp_segment_add_label(wp_segment* s, const char* label, docid_t doc_id)
522
521
  po->doc_id = doc_id;
523
522
  po->next_offset = next_offset;
524
523
 
525
- pr->postings_head += sizeof(label_posting);
524
+ pr->postings_head += (uint32_t)sizeof(label_posting);
526
525
  DEBUG("label postings list head now at %u", pr->postings_head);
527
526
 
528
527
  // really finally, update either the previous offset or the tail pointer
@@ -15,10 +15,10 @@ static const uint32_t prime_list[] = {
15
15
  #define isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)
16
16
  #define isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)
17
17
  #define iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3)
18
- #define set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1)))
19
- #define set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1)))
20
- #define set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1)))
21
- #define set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1))
18
+ #define set_isdel_false(flag, i) (flag[i>>4]&=~(uint32_t)(1ul<<((i&0xfU)<<1)))
19
+ #define set_isempty_false(flag, i) (flag[i>>4]&=~(uint32_t)(2ul<<((i&0xfU)<<1)))
20
+ #define set_isboth_false(flag, i) (flag[i>>4]&=~(uint32_t)(3ul<<((i&0xfU)<<1)))
21
+ #define set_isdel_true(flag, i) (flag[i>>4]|=(uint32_t)(1ul<<((i&0xfU)<<1)))
22
22
 
23
23
  static const double HASH_UPPER = 0.77;
24
24
 
@@ -234,9 +234,9 @@ int stringmap_needs_bump(stringmap* h) {
234
234
  // ((n_buckets >> 4) + 1) uint32_t's for the flags
235
235
  // n_buckets uint32_t for the keys
236
236
  static uint32_t size(uint32_t n_buckets) {
237
- uint32_t size = sizeof(stringmap) +
238
- (((n_buckets >> 4) + 1) * sizeof(uint32_t)) +
239
- (n_buckets * sizeof(uint32_t));
237
+ uint32_t size = (uint32_t)sizeof(stringmap) +
238
+ (((n_buckets >> 4) + 1) * (uint32_t)sizeof(uint32_t)) +
239
+ (n_buckets * (uint32_t)sizeof(uint32_t));
240
240
  return size;
241
241
  }
242
242
 
@@ -6,11 +6,11 @@ void stringpool_init(stringpool* p) {
6
6
  }
7
7
 
8
8
  uint32_t stringpool_size(stringpool* p) {
9
- return sizeof(stringpool) + (p->size * sizeof(char));
9
+ return (uint32_t)sizeof(stringpool) + (p->size * (uint32_t)sizeof(char));
10
10
  }
11
11
 
12
12
  uint32_t stringpool_add(stringpool* p, const char* s) {
13
- int len = strlen(s) + 1;
13
+ uint32_t len = (uint32_t)strlen(s) + 1;
14
14
  if((p->next + len) >= p->size) {
15
15
  DEBUG("out of space in string pool for %s (len %d, next %d, size %d)", s, len, p->next, p->size);
16
16
  return (uint32_t)-1;
@@ -27,11 +27,11 @@ int stringpool_needs_bump(stringpool* p) {
27
27
  }
28
28
 
29
29
  uint32_t stringpool_next_size(stringpool* p) {
30
- return sizeof(stringpool) + (2 * (p->size == 0 ? 1 : p->size) * sizeof(char));
30
+ return (uint32_t)sizeof(stringpool) + (2 * (p->size == 0 ? 1 : p->size) * (uint32_t)sizeof(char));
31
31
  }
32
32
 
33
33
  uint32_t stringpool_initial_size() {
34
- return sizeof(stringpool) + INITIAL_POOL_SIZE;
34
+ return (uint32_t)sizeof(stringpool) + INITIAL_POOL_SIZE;
35
35
  }
36
36
 
37
37
  void stringpool_bump_size(stringpool* p) {
@@ -15,10 +15,10 @@ static const uint32_t prime_list[] = {
15
15
  #define isempty(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&2)
16
16
  #define isdel(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&1)
17
17
  #define iseither(flag, i) ((flag[i>>4]>>((i&0xfU)<<1))&3)
18
- #define set_isdel_false(flag, i) (flag[i>>4]&=~(1ul<<((i&0xfU)<<1)))
19
- #define set_isempty_false(flag, i) (flag[i>>4]&=~(2ul<<((i&0xfU)<<1)))
20
- #define set_isboth_false(flag, i) (flag[i>>4]&=~(3ul<<((i&0xfU)<<1)))
21
- #define set_isdel_true(flag, i) (flag[i>>4]|=1ul<<((i&0xfU)<<1))
18
+ #define set_isdel_false(flag, i) (flag[i>>4]&=~(uint32_t)(1ul<<((i&0xfU)<<1)))
19
+ #define set_isempty_false(flag, i) (flag[i>>4]&=~(uint32_t)(2ul<<((i&0xfU)<<1)))
20
+ #define set_isboth_false(flag, i) (flag[i>>4]&=~(uint32_t)(3ul<<((i&0xfU)<<1)))
21
+ #define set_isdel_true(flag, i) (flag[i>>4]|=(uint32_t)(1ul<<((i&0xfU)<<1)))
22
22
 
23
23
  static const double HASH_UPPER = 0.77;
24
24
 
@@ -264,10 +264,10 @@ int termhash_needs_bump(termhash* h) {
264
264
  // n_buckets terms for the keys
265
265
  // n_buckets uint32_t's for the vals (offsets into postings lists)
266
266
  static uint32_t size(uint32_t n_buckets) {
267
- uint32_t size = sizeof(termhash) +
268
- (((n_buckets >> 4) + 1) * sizeof(uint32_t)) +
269
- (n_buckets * sizeof(term)) +
270
- (n_buckets * sizeof(uint32_t));
267
+ uint32_t size = (uint32_t)sizeof(termhash) +
268
+ (((n_buckets >> 4) + 1) * (uint32_t)sizeof(uint32_t)) +
269
+ (n_buckets * (uint32_t)sizeof(term)) +
270
+ (n_buckets * (uint32_t)sizeof(uint32_t));
271
271
 
272
272
  DEBUG("size of a termhash with %u buckets is %lu + %lu + %lu + %lu = %u",
273
273
  n_buckets,
@@ -1064,7 +1064,7 @@ YY_RULE_SETUP
1064
1064
  #line 23 "tokenizer.lex"
1065
1065
  {
1066
1066
  yyextra->start = yyextra->end;
1067
- yyextra->end += yyleng;
1067
+ yyextra->end += (pos_t)yyleng;
1068
1068
  return TOK_NUMBER;
1069
1069
  }
1070
1070
  YY_BREAK
@@ -1073,7 +1073,7 @@ YY_RULE_SETUP
1073
1073
  #line 29 "tokenizer.lex"
1074
1074
  {
1075
1075
  yyextra->start = yyextra->end;
1076
- yyextra->end += yyleng;
1076
+ yyextra->end += (pos_t)yyleng;
1077
1077
  return TOK_WORD;
1078
1078
  }
1079
1079
  YY_BREAK
@@ -1082,7 +1082,7 @@ YY_RULE_SETUP
1082
1082
  #line 35 "tokenizer.lex"
1083
1083
  {
1084
1084
  yyextra->start = yyextra->end;
1085
- yyextra->end += yyleng;
1085
+ yyextra->end += (pos_t)yyleng;
1086
1086
  return TOK_WORD;
1087
1087
  }
1088
1088
  YY_BREAK
@@ -12,4 +12,7 @@
12
12
  #include "query-parser.h"
13
13
  #include "error.h"
14
14
 
15
+ // see comments in index.c
16
+ char* strdup(const char* old);
17
+
15
18
  #endif
@@ -9,12 +9,6 @@ static VALUE c_query;
9
9
  static VALUE c_error;
10
10
  static VALUE c_parseerror;
11
11
 
12
- static char* strdup(const char* old) { // wtf stupid
13
- size_t len = strlen(old) + 1;
14
- char *new = malloc(len * sizeof(char));
15
- return (char *)memcpy(new, old, len);
16
- }
17
-
18
12
  static void index_free(wp_index* index) {
19
13
  wp_error* e = wp_index_free(index);
20
14
  //printf("# index free at %p with error %p\n", index, e);
metadata CHANGED
@@ -1,11 +1,12 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: whistlepig
3
3
  version: !ruby/object:Gem::Version
4
+ hash: 15
4
5
  prerelease: false
5
6
  segments:
6
7
  - 0
7
- - 1
8
- version: "0.1"
8
+ - 2
9
+ version: "0.2"
9
10
  platform: ruby
10
11
  authors:
11
12
  - William Morgan
@@ -13,7 +14,7 @@ autorequire:
13
14
  bindir: bin
14
15
  cert_chain: []
15
16
 
16
- date: 2011-02-08 21:42:39 -08:00
17
+ date: 2011-02-09 20:32:41 -08:00
17
18
  default_executable:
18
19
  dependencies: []
19
20
 
@@ -81,6 +82,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
81
82
  requirements:
82
83
  - - ">="
83
84
  - !ruby/object:Gem::Version
85
+ hash: 3
84
86
  segments:
85
87
  - 0
86
88
  version: "0"
@@ -89,6 +91,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
89
91
  requirements:
90
92
  - - ">="
91
93
  - !ruby/object:Gem::Version
94
+ hash: 3
92
95
  segments:
93
96
  - 0
94
97
  version: "0"