whistlepig 0.9.1 → 0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -33,20 +33,15 @@ static inline int string_equals(const char* a, const char* b) {
33
33
  return strcmp(a, b) == 0;
34
34
  }
35
35
 
36
- // set flags, keys and vals to correct locations based on h->n_buckets
37
- void stringmap_setup(stringmap* h, stringpool* p) {
38
- h->pool = p;
39
- h->flags = (uint32_t*)h->boundary;
40
- h->keys = (uint32_t*)((uint32_t*)h->boundary + ((h->n_buckets >> 4) + 1));
41
- }
36
+ #define STRINGMAP_FLAGS(h) ((uint32_t*)(h)->boundary)
37
+ #define STRINGMAP_KEYS(h) ((uint32_t*)((uint32_t*)(h)->boundary + (((h)->n_buckets >> 4) + 1)))
42
38
 
43
- void stringmap_init(stringmap* h, stringpool* p) {
39
+ void stringmap_init(stringmap* h) {
44
40
  h->n_buckets_idx = INITIAL_N_BUCKETS_IDX;
45
41
  h->n_buckets = prime_list[h->n_buckets_idx];
46
42
  h->upper_bound = (uint32_t)(h->n_buckets * HASH_UPPER + 0.5);
47
43
  h->size = h->n_occupied = 0;
48
- stringmap_setup(h, p);
49
- memset(h->flags, 0xaa, ((h->n_buckets>>4) + 1) * sizeof(uint32_t));
44
+ memset(STRINGMAP_FLAGS(h), 0xaa, ((h->n_buckets>>4) + 1) * sizeof(uint32_t));
50
45
  }
51
46
 
52
47
  /*
@@ -66,22 +61,25 @@ static void kh_clear_##name(kh_##name##_t *h) {
66
61
  }
67
62
  */
68
63
 
69
- uint32_t stringmap_get(stringmap *h, const char* key) {
64
+ uint32_t stringmap_get(stringmap *h, stringpool* pool, const char* key) {
65
+ uint32_t* flags = STRINGMAP_FLAGS(h);
66
+ uint32_t* keys = STRINGMAP_KEYS(h);
67
+
70
68
  if(h->n_buckets) {
71
69
  uint32_t inc, k, i, last;
72
70
  k = string_hash(key); i = k % h->n_buckets;
73
71
  inc = 1 + k % (h->n_buckets - 1); last = i;
74
- while (!isempty(h->flags, i) && (isdel(h->flags, i) || !string_equals(stringpool_lookup(h->pool, h->keys[i]), key))) {
72
+ while (!isempty(flags, i) && (isdel(flags, i) || !string_equals(stringpool_lookup(pool, keys[i]), key))) {
75
73
  if (i + inc >= h->n_buckets) i = i + inc - h->n_buckets;
76
74
  else i += inc;
77
75
  if (i == last) return h->n_buckets;
78
76
  }
79
- return iseither(h->flags, i)? h->n_buckets : i;
77
+ return iseither(flags, i)? h->n_buckets : i;
80
78
  }
81
79
  else return 0;
82
80
  }
83
81
 
84
- wp_error* stringmap_bump_size(stringmap *h) {
82
+ wp_error* stringmap_bump_size(stringmap *h, stringpool* pool) {
85
83
  DEBUG("bumping size for string hash at %p with size %u and boundary %p", h, stringmap_size(h), h->boundary);
86
84
 
87
85
  if(h->n_buckets_idx >= (HASH_PRIME_SIZE - 1)) RAISE_ERROR("stringmap can't be this big");
@@ -89,51 +87,54 @@ wp_error* stringmap_bump_size(stringmap *h) {
89
87
  h->n_buckets_idx++;
90
88
  uint32_t new_n_buckets = prime_list[h->n_buckets_idx];
91
89
 
92
- // first make a backup of the oldflags
93
- size_t oldflagsize = ((h->n_buckets >> 4) + 1) * sizeof(uint32_t);
94
- uint32_t* oldflags = malloc(oldflagsize);
95
- memcpy(oldflags, h->flags, oldflagsize);
90
+ // get pointers to the old locations
91
+ uint32_t* oldkeys = STRINGMAP_KEYS(h);
92
+ uint32_t* oldflags = STRINGMAP_FLAGS(h);
96
93
 
97
- // keep pointers to the old locations
98
- uint32_t* oldkeys = h->keys;
94
+ // make a backup of the old flags in a separate memory region
95
+ size_t flagbaksize = ((h->n_buckets >> 4) + 1) * sizeof(uint32_t);
96
+ uint32_t* flagbaks = malloc(flagbaksize);
97
+ memcpy(flagbaks, oldflags, flagbaksize);
99
98
 
100
- // set pointers to the new locations
101
- h->keys = (uint32_t*)((uint32_t*)h->boundary + ((new_n_buckets >> 4) + 1));
99
+ // get a pointer pointers to the new locations
100
+ //h->keys = (uint32_t*)((uint32_t*)h->boundary + ((new_n_buckets >> 4) + 1));
101
+ uint32_t* newflags = (uint32_t*)h->boundary; // unchanged, actually
102
+ uint32_t* newkeys = (uint32_t*)((uint32_t*)h->boundary + ((new_n_buckets >> 4) + 1));
102
103
 
103
104
  // move the keys
104
- memmove(h->keys, oldkeys, h->n_buckets * sizeof(uint32_t));
105
+ memmove(newkeys, oldkeys, h->n_buckets * sizeof(uint32_t));
105
106
 
106
107
  // clear the new flags
107
- memset(h->flags, 0xaa, ((new_n_buckets>>4) + 1) * sizeof(uint32_t));
108
+ memset(STRINGMAP_FLAGS(h), 0xaa, ((new_n_buckets>>4) + 1) * sizeof(uint32_t));
108
109
 
109
110
  // do the complicated stuff from khash.h
110
111
  for (unsigned int j = 0; j != h->n_buckets; ++j) {
111
- if (iseither(oldflags, j) == 0) {
112
- uint32_t key = h->keys[j];
113
- set_isdel_true(oldflags, j);
112
+ if (iseither(flagbaks, j) == 0) {
113
+ uint32_t key = newkeys[j];
114
+ set_isdel_true(flagbaks, j);
114
115
  while (1) {
115
116
  uint32_t inc, k, i;
116
- k = string_hash(stringpool_lookup(h->pool, key));
117
+ k = string_hash(stringpool_lookup(pool, key));
117
118
  i = k % new_n_buckets;
118
119
  inc = 1 + k % (new_n_buckets - 1);
119
- while (!isempty(h->flags, i)) {
120
+ while (!isempty(newflags, i)) {
120
121
  if (i + inc >= new_n_buckets) i = i + inc - new_n_buckets;
121
122
  else i += inc;
122
123
  }
123
- set_isempty_false(h->flags, i);
124
- if (i < h->n_buckets && iseither(oldflags, i) == 0) {
125
- { uint32_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; }
126
- set_isdel_true(oldflags, i);
124
+ set_isempty_false(newflags, i);
125
+ if (i < h->n_buckets && iseither(flagbaks, i) == 0) {
126
+ { uint32_t tmp = newkeys[i]; newkeys[i] = key; key = tmp; }
127
+ set_isdel_true(flagbaks, i);
127
128
  } else {
128
- h->keys[i] = key;
129
+ newkeys[i] = key;
129
130
  break;
130
131
  }
131
132
  }
132
133
  }
133
134
  }
134
135
 
135
- free(oldflags);
136
- h->n_buckets = new_n_buckets;
136
+ free(flagbaks);
137
+ h->n_buckets = new_n_buckets; // STRINGMAP_KEYS now works
137
138
  h->n_occupied = h->size;
138
139
  h->upper_bound = (uint32_t)(h->n_buckets * HASH_UPPER + 0.5);
139
140
 
@@ -144,8 +145,10 @@ wp_error* stringmap_bump_size(stringmap *h) {
144
145
  return NO_ERROR;
145
146
  }
146
147
 
147
- uint32_t stringmap_put(stringmap *h, const char* key, int *ret) {
148
+ uint32_t stringmap_put(stringmap *h, stringpool* pool, const char* key, int *ret) {
148
149
  uint32_t x;
150
+ uint32_t* flags = STRINGMAP_FLAGS(h);
151
+ uint32_t* keys = STRINGMAP_KEYS(h);
149
152
 
150
153
  {
151
154
  #ifdef DEBUGOUTPUT
@@ -154,27 +157,27 @@ int num_loops = 0;
154
157
  uint32_t inc, k, i, site, last;
155
158
  x = site = h->n_buckets; k = string_hash(key); i = k % h->n_buckets;
156
159
  //DEBUG("asked to hash '%s'. initial hash is %u => %u and n_occupied is %u", key, k, i, h->n_occupied);
157
- if (isempty(h->flags, i)) x = i;
160
+ if (isempty(flags, i)) x = i;
158
161
  else {
159
162
  inc = 1 + k % (h->n_buckets - 1); last = i;
160
- while (!isempty(h->flags, i) && (isdel(h->flags, i) || !string_equals(stringpool_lookup(h->pool, h->keys[i]), key))) {
163
+ while (!isempty(flags, i) && (isdel(flags, i) || !string_equals(stringpool_lookup(pool, keys[i]), key))) {
161
164
  #ifdef DEBUGOUTPUT
162
165
  num_loops++;
163
166
  #endif
164
- if (isdel(h->flags, i)) site = i;
167
+ if (isdel(flags, i)) site = i;
165
168
  if (i + inc >= h->n_buckets) i = i + inc - h->n_buckets;
166
169
  else i += inc;
167
170
  if (i == last) { x = site; break; }
168
171
  }
169
172
  if ((x == h->n_buckets) && (i == last)) { // out of space
170
- if(!string_equals(stringpool_lookup(h->pool, h->keys[i]), key)) {
173
+ if(!string_equals(stringpool_lookup(pool, keys[i]), key)) {
171
174
  DEBUG("out of space!");
172
175
  *ret = -1;
173
176
  return x;
174
177
  }
175
178
  }
176
179
  if (x == h->n_buckets) { // didn't find it on the first try
177
- if (isempty(h->flags, i) && site != h->n_buckets) x = site;
180
+ if (isempty(flags, i) && site != h->n_buckets) x = site;
178
181
  else x = i;
179
182
  }
180
183
  }
@@ -185,15 +188,15 @@ num_loops++;
185
188
  //DEBUG("for pos %u, isempty? %d and isdel %d", x, isempty(h->flags, x), isdel(h->flags, x));
186
189
 
187
190
  uint32_t idx;
188
- if(isempty(h->flags, x) || isdel(h->flags, x)) {
189
- idx = stringpool_add(h->pool, key);
191
+ if(isempty(flags, x) || isdel(flags, x)) {
192
+ idx = stringpool_add(pool, key);
190
193
  if(idx == (uint32_t)-1) {
191
194
  *ret = -2;
192
195
  return x;
193
196
  }
194
- if (isempty(h->flags, x)) ++h->n_occupied;
195
- h->keys[x] = idx;
196
- set_isboth_false(h->flags, x);
197
+ if (isempty(flags, x)) ++h->n_occupied;
198
+ keys[x] = idx;
199
+ set_isboth_false(flags, x);
197
200
  ++h->size;
198
201
  *ret = 1;
199
202
  }
@@ -203,8 +206,9 @@ num_loops++;
203
206
  }
204
207
 
205
208
  void stringmap_del(stringmap *h, uint32_t x) {
206
- if (x != h->n_buckets && !iseither(h->flags, x)) {
207
- set_isdel_true(h->flags, x);
209
+ uint32_t* flags = STRINGMAP_FLAGS(h);
210
+ if (x != h->n_buckets && !iseither(flags, x)) {
211
+ set_isdel_true(flags, x);
208
212
  --h->size;
209
213
  }
210
214
  }
@@ -255,24 +259,25 @@ uint32_t stringmap_next_size(stringmap* h) {
255
259
  return size(prime_list[next_idx]);
256
260
  }
257
261
 
258
- const char* stringmap_int_to_string(stringmap* h, uint32_t i) {
259
- return stringpool_lookup(h->pool, i);
262
+ const char* stringmap_int_to_string(stringmap* h, stringpool* p, uint32_t i) {
263
+ (void)h;
264
+ return stringpool_lookup(p, i);
260
265
  }
261
266
 
262
267
  // returns -1 if not found
263
- uint32_t stringmap_string_to_int(stringmap* h, const char* s) {
264
- uint32_t idx = stringmap_get(h, s);
268
+ uint32_t stringmap_string_to_int(stringmap* h, stringpool* pool, const char* s) {
269
+ uint32_t idx = stringmap_get(h, pool, s);
265
270
  if(idx == h->n_buckets) return (uint32_t)-1; // not there
266
- return h->keys[idx];
271
+ return STRINGMAP_KEYS(h)[idx];
267
272
  }
268
273
 
269
- wp_error* stringmap_add(stringmap *h, const char* s, uint32_t* id) {
274
+ wp_error* stringmap_add(stringmap *h, stringpool* pool, const char* s, uint32_t* id) {
270
275
  int status;
271
- uint32_t idx = stringmap_put(h, s, &status);
276
+ uint32_t idx = stringmap_put(h, pool, s, &status);
272
277
  if(status == -1) RAISE_ERROR("out of space in hash put");
273
278
  if(status == -2) RAISE_ERROR("out of space in pool put");
274
279
 
275
- *id = h->keys[idx];
280
+ *id = STRINGMAP_KEYS(h)[idx];
276
281
 
277
282
  return NO_ERROR;
278
283
  }
@@ -10,9 +10,8 @@
10
10
  // and stringpool, it uses a slightly funny API that never allocates memory,
11
11
  // but instead operates on pointers to preallocated blocks of memory.
12
12
  //
13
- // uses a stringpool internally to do the int->string mapping. so if you're so
14
- // you shouldn't have to interact with the stringpool directly; you can just
15
- // use this object.
13
+ // uses a stringpool internally to do the int->string mapping. you shouldn't
14
+ // have to interact with the stringpool directly; you can just use this object.
16
15
  //
17
16
  // like termhash and pool, has a slightly funny API that is designed to work on
18
17
  // a pre-allocated chunk of memory rather than allocate any of its own.
@@ -36,9 +35,6 @@
36
35
  typedef struct stringmap {
37
36
  uint8_t n_buckets_idx;
38
37
  uint32_t n_buckets, size, n_occupied, upper_bound;
39
- uint32_t *flags;
40
- uint32_t *keys;
41
- stringpool* pool;
42
38
  uint8_t boundary[];
43
39
  // in memory at this point
44
40
  // ((n_buckets >> 4) + 1) uint32_t's for the flags
@@ -48,21 +44,18 @@ typedef struct stringmap {
48
44
  // API methods
49
45
 
50
46
  // public: write a new stringmap to memory
51
- void stringmap_init(stringmap* h, stringpool* p);
52
-
53
- // public: set up an existing stringmap in memory
54
- void stringmap_setup(stringmap* h, stringpool* p);
47
+ void stringmap_init(stringmap* h);
55
48
 
56
49
  // public: add a string. sets id to its id. dupes are fine; will just set the
57
50
  // id correctly.
58
- wp_error* stringmap_add(stringmap *h, const char* s, uint32_t* id) RAISES_ERROR;
51
+ wp_error* stringmap_add(stringmap *h, stringpool* p, const char* s, uint32_t* id) RAISES_ERROR;
59
52
 
60
53
  // public: get the int value given a string. returns (uint32_t)-1 if not found.
61
- uint32_t stringmap_string_to_int(stringmap* h, const char* s);
54
+ uint32_t stringmap_string_to_int(stringmap* h, stringpool* pool, const char* s);
62
55
 
63
56
  // public: get the string value given an int. returns corrupt data if the int
64
57
  // is invalid.
65
- const char* stringmap_int_to_string(stringmap* h, uint32_t i);
58
+ const char* stringmap_int_to_string(stringmap* h, stringpool* p, uint32_t i);
66
59
 
67
60
  // public: returns the byte size of the stringmap
68
61
  uint32_t stringmap_size(stringmap* h);
@@ -77,6 +70,6 @@ uint32_t stringmap_next_size(stringmap* h);
77
70
  int stringmap_needs_bump(stringmap* h);
78
71
 
79
72
  // public: increases the size of the stringmap
80
- wp_error* stringmap_bump_size(stringmap *h) RAISES_ERROR;
73
+ wp_error* stringmap_bump_size(stringmap *h, stringpool* pool) RAISES_ERROR;
81
74
 
82
75
  #endif
@@ -35,23 +35,11 @@ void termhash_init(termhash* h) {
35
35
  h->n_buckets = prime_list[h->n_buckets_idx];
36
36
  h->upper_bound = (uint32_t)(h->n_buckets * HASH_UPPER + 0.5);
37
37
  h->size = h->n_occupied = 0;
38
- termhash_setup(h);
39
- memset(h->flags, 0xaa, ((h->n_buckets>>4) + 1) * sizeof(uint32_t));
38
+ memset(TERMHASH_FLAGS(h), 0xaa, ((h->n_buckets>>4) + 1) * sizeof(uint32_t));
40
39
  }
41
40
 
42
41
  #define OFFSET(a, b) (long)((uint8_t*)a - (uint8_t*)b)
43
42
  // set flags, keys and vals to correct locations based on h->n_buckets
44
- void termhash_setup(termhash* h) {
45
- DEBUG("term hash ranges from %p to %p (size %u)", h, (char*)h + termhash_size(h), termhash_size(h));
46
- DEBUG("boundary is at %p (+%ld)", h->boundary, OFFSET(h->boundary, h));
47
- h->flags = (uint32_t*)h->boundary;
48
- h->keys = (term*)((uint32_t*)h->boundary + ((h->n_buckets >> 4) + 1));
49
- h->vals = (uint32_t*)((term*)h->keys + h->n_buckets);
50
- DEBUG("flags are at %p (+%ld)", h->flags, OFFSET(h->flags, h->boundary));
51
- DEBUG(" keys are at %p (+%ld)", h->keys, OFFSET(h->keys, h->boundary));
52
- DEBUG(" vals are at %p (+%ld)", h->vals, OFFSET(h->vals, h->boundary));
53
- }
54
-
55
43
  /*
56
44
  static void termhash_dump(termhash* h) {
57
45
  for(uint32_t i = 0; i < h->n_buckets; i++) {
@@ -83,89 +71,94 @@ static void kh_clear_##name(kh_##name##_t *h) {
83
71
  */
84
72
 
85
73
  uint32_t termhash_get(termhash *h, term key) {
74
+ uint32_t* flags = TERMHASH_FLAGS(h);
75
+ term* keys = TERMHASH_KEYS(h);
76
+
86
77
  if(h->n_buckets) {
87
78
  uint32_t inc, k, i, last;
88
79
  k = hash_term(key); i = k % h->n_buckets;
89
80
  inc = 1 + k % (h->n_buckets - 1); last = i;
90
- while (!isempty(h->flags, i) && (isdel(h->flags, i) || !term_equals(h->keys[i], key))) {
81
+ while (!isempty(flags, i) && (isdel(flags, i) || !term_equals(keys[i], key))) {
91
82
  if (i + inc >= h->n_buckets) i = i + inc - h->n_buckets;
92
83
  else i += inc;
93
84
  if (i == last) return h->n_buckets;
94
85
  }
95
- return iseither(h->flags, i)? h->n_buckets : i;
86
+ return iseither(flags, i)? h->n_buckets : i;
96
87
  }
97
88
  else return 0;
98
89
  }
99
90
 
100
91
  wp_error* termhash_bump_size(termhash *h) {
101
92
  DEBUG("bumping size for term hash at %p with size %u and boundary %p (+%ld)", h, termhash_size(h), h->boundary, (long)((uint8_t*)h->boundary - (uint8_t*)h));
102
- DEBUG("flags are at %p (+%ld)", h->flags, OFFSET(h->flags, h->boundary));
103
- DEBUG(" keys are at %p (+%ld)", h->keys, OFFSET(h->keys, h->boundary));
104
- DEBUG(" vals are at %p (+%ld)", h->vals, OFFSET(h->vals, h->boundary));
93
+ DEBUG("flags are at %p (+%ld)", TERMHASH_FLAGS(h), OFFSET(TERMHASH_FLAGS(h), h->boundary));
94
+ DEBUG(" keys are at %p (+%ld)", TERMHASH_KEYS(h), OFFSET(TERMHASH_KEYS(h), h->boundary));
95
+ DEBUG(" vals are at %p (+%ld)", TERMHASH_VALS(h), OFFSET(TERMHASH_VALS(h), h->boundary));
96
+
97
+ if(h->n_buckets_idx >= (HASH_PRIME_SIZE - 1)) RAISE_ERROR("termhash can't be this big");
105
98
 
106
99
  h->n_buckets_idx++;
107
- if(h->n_buckets_idx > HASH_PRIME_SIZE) exit(1); // die horribly TODO fixme
108
100
  uint32_t new_n_buckets = prime_list[h->n_buckets_idx];
109
101
 
110
- // first make a backup of the oldflags
111
- size_t oldflagsize = ((h->n_buckets >> 4) + 1) * sizeof(uint32_t);
112
- uint32_t* oldflags = malloc(oldflagsize);
113
- memcpy(oldflags, h->flags, oldflagsize);
102
+ // first make a backup of the old flags in a separate memory region
103
+ size_t flagbaksize = ((h->n_buckets >> 4) + 1) * sizeof(uint32_t);
104
+ uint32_t* flagbaks = malloc(flagbaksize);
105
+ memcpy(flagbaks, TERMHASH_FLAGS(h), flagbaksize);
114
106
 
115
- // keep pointers to the old locations
116
- term* oldkeys = h->keys;
117
- uint32_t* oldvals = h->vals;
107
+ // get pointers to the old locations
108
+ term* oldkeys = TERMHASH_KEYS(h);
109
+ uint32_t* oldvals = TERMHASH_VALS(h);
118
110
 
119
111
  // set pointers to the new locations
120
- h->keys = (term*)((uint32_t*)h->boundary + ((new_n_buckets >> 4) + 1));
121
- h->vals = (uint32_t*)((term*)h->keys + new_n_buckets);
112
+ uint32_t* newflags = (uint32_t*)h->boundary;
113
+ term* newkeys = (term*)(newflags + ((new_n_buckets >> 4) + 1));
114
+ uint32_t* newvals = (uint32_t*)(newkeys + new_n_buckets);
122
115
 
123
116
  // move the vals and keys
124
- memmove(h->vals, oldvals, h->n_buckets * sizeof(uint32_t));
125
- memmove(h->keys, oldkeys, h->n_buckets * sizeof(term));
117
+ memmove(newvals, oldvals, h->n_buckets * sizeof(uint32_t));
118
+ memmove(newkeys, oldkeys, h->n_buckets * sizeof(term));
126
119
 
127
120
  // clear the new flags
128
- memset(h->flags, 0xaa, ((new_n_buckets>>4) + 1) * sizeof(uint32_t));
121
+ memset(newflags, 0xaa, ((new_n_buckets>>4) + 1) * sizeof(uint32_t));
129
122
 
130
123
  // do the complicated stuff from khash.h
131
124
  for (unsigned int j = 0; j != h->n_buckets; ++j) {
132
- if (iseither(oldflags, j) == 0) {
133
- term key = h->keys[j];
125
+ if (iseither(flagbaks, j) == 0) {
126
+ term key = newkeys[j];
134
127
  uint32_t val;
135
- val = h->vals[j];
136
- set_isdel_true(oldflags, j);
128
+ val = newvals[j];
129
+ set_isdel_true(flagbaks, j);
137
130
  while (1) {
138
131
  uint32_t inc, k, i;
139
132
  k = hash_term(key);
140
133
  i = k % new_n_buckets;
141
134
  inc = 1 + k % (new_n_buckets - 1);
142
- while (!isempty(h->flags, i)) {
135
+ while (!isempty(newflags, i)) {
143
136
  if (i + inc >= new_n_buckets) i = i + inc - new_n_buckets;
144
137
  else i += inc;
145
138
  }
146
- set_isempty_false(h->flags, i);
147
- if (i < h->n_buckets && iseither(oldflags, i) == 0) {
148
- { term tmp = h->keys[i]; h->keys[i] = key; key = tmp; }
149
- { uint32_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; }
150
- set_isdel_true(oldflags, i);
139
+ set_isempty_false(newflags, i);
140
+ if (i < h->n_buckets && iseither(flagbaks, i) == 0) {
141
+ { term tmp = newkeys[i]; newkeys[i] = key; key = tmp; }
142
+ { uint32_t tmp = newvals[i]; newvals[i] = val; val = tmp; }
143
+ set_isdel_true(flagbaks, i);
151
144
  } else {
152
- h->keys[i] = key;
153
- h->vals[i] = val;
145
+ newkeys[i] = key;
146
+ newvals[i] = val;
154
147
  break;
155
148
  }
156
149
  }
157
150
  }
158
151
  }
159
152
 
160
- free(oldflags);
153
+ free(flagbaks);
161
154
  h->n_buckets = new_n_buckets;
162
155
  h->n_occupied = h->size;
163
156
  h->upper_bound = (uint32_t)(h->n_buckets * HASH_UPPER + 0.5);
164
157
 
165
158
  DEBUG("after bump, term hash at %p has size %u and boundary %p (+%ld)", h, termhash_size(h), h->boundary, (long)((uint8_t*)h->boundary - (uint8_t*)h));
166
- DEBUG("flags are at %p (+%ld)", h->flags, (long)((uint8_t*)h->flags - (uint8_t*)h->boundary));
167
- DEBUG(" keys are at %p (+%ld)", h->keys, (long)((uint8_t*)h->keys - (uint8_t*)h->boundary));
168
- DEBUG(" vals are at %p (+%ld)", h->vals, (long)((uint8_t*)h->vals - (uint8_t*)h->boundary));
159
+ DEBUG("flags are at %p (+%ld)", TERMHASH_FLAGS(h), OFFSET(TERMHASH_FLAGS(h), h->boundary));
160
+ DEBUG(" keys are at %p (+%ld)", TERMHASH_KEYS(h), OFFSET(TERMHASH_KEYS(h), h->boundary));
161
+ DEBUG(" vals are at %p (+%ld)", TERMHASH_VALS(h), OFFSET(TERMHASH_VALS(h), h->boundary));
169
162
 
170
163
  #ifdef DEBUGOUTPUT
171
164
  //DEBUG("and now i look like this:");
@@ -177,6 +170,8 @@ wp_error* termhash_bump_size(termhash *h) {
177
170
 
178
171
  uint32_t termhash_put(termhash *h, term key, int *ret) {
179
172
  uint32_t x;
173
+ uint32_t* flags = TERMHASH_FLAGS(h);
174
+ term* keys = TERMHASH_KEYS(h);
180
175
 
181
176
  {
182
177
  #ifdef DEBUGOUTPUT
@@ -185,40 +180,40 @@ int num_loops = 0;
185
180
  uint32_t inc, k, i, site, last;
186
181
  x = site = h->n_buckets; k = hash_term(key); i = k % h->n_buckets;
187
182
  DEBUG("initial hash is %u", k);
188
- if (isempty(h->flags, i)) x = i;
183
+ if (isempty(flags, i)) x = i;
189
184
  else {
190
185
  inc = 1 + k % (h->n_buckets - 1); last = i;
191
- while (!isempty(h->flags, i) && (isdel(h->flags, i) || !term_equals(h->keys[i], key))) {
186
+ while (!isempty(flags, i) && (isdel(flags, i) || !term_equals(keys[i], key))) {
192
187
  #ifdef DEBUGOUTPUT
193
188
  num_loops++;
194
189
  #endif
195
- if (isdel(h->flags, i)) site = i;
190
+ if (isdel(flags, i)) site = i;
196
191
  if (i + inc >= h->n_buckets) i = i + inc - h->n_buckets;
197
192
  else i += inc;
198
193
  if (i == last) { x = site; break; }
199
194
  }
200
195
  if ((x == h->n_buckets) && (i == last)) { // out of space
201
- if(!term_equals(h->keys[i], key)) {
196
+ if(!term_equals(keys[i], key)) {
202
197
  *ret = -1;
203
198
  return x;
204
199
  }
205
200
  }
206
201
  if (x == h->n_buckets) { // didn't find it on the first try
207
- if (isempty(h->flags, i) && site != h->n_buckets) x = site;
202
+ if (isempty(flags, i) && site != h->n_buckets) x = site;
208
203
  else x = i;
209
204
  }
210
205
  }
211
206
  DEBUG("looped %u times to put", num_loops);
212
207
  //DEBUG("x is %u, site is %u, n_buckets is %u", x, site, h->n_buckets);
213
208
  }
214
- if (isempty(h->flags, x)) {
215
- h->keys[x] = key;
216
- set_isboth_false(h->flags, x);
209
+ if (isempty(flags, x)) {
210
+ keys[x] = key;
211
+ set_isboth_false(flags, x);
217
212
  ++h->size; ++h->n_occupied;
218
213
  *ret = 1;
219
- } else if (isdel(h->flags, x)) {
220
- h->keys[x] = key;
221
- set_isboth_false(h->flags, x);
214
+ } else if (isdel(flags, x)) {
215
+ keys[x] = key;
216
+ set_isboth_false(flags, x);
222
217
  ++h->size;
223
218
  *ret = 2;
224
219
  }
@@ -233,24 +228,27 @@ num_loops++;
233
228
  }
234
229
 
235
230
  void termhash_del(termhash *h, uint32_t x) {
236
- if (x != h->n_buckets && !iseither(h->flags, x)) {
237
- set_isdel_true(h->flags, x);
231
+ uint32_t* flags = TERMHASH_FLAGS(h);
232
+ if (x != h->n_buckets && !iseither(flags, x)) {
233
+ set_isdel_true(flags, x);
238
234
  --h->size;
239
235
  }
240
236
  }
241
237
 
242
238
  uint32_t termhash_get_val(termhash* h, term t) {
239
+ uint32_t* vals = TERMHASH_VALS(h);
243
240
  uint32_t idx = termhash_get(h, t);
244
241
  if(idx == h->n_buckets) return (uint32_t)-1;
245
- return h->vals[idx];
242
+ return vals[idx];
246
243
  }
247
244
 
248
245
  wp_error* termhash_put_val(termhash* h, term t, uint32_t val) {
249
246
  int status;
247
+ uint32_t* vals = TERMHASH_VALS(h);
250
248
  uint32_t loc = termhash_put(h, t, &status);
251
249
  DEBUG("put(%u,%u) has status %d and loc %u (error val is %u)", t.field_s, t.word_s, status, loc, h->n_buckets);
252
250
  if(status == -1) RAISE_ERROR("out of space in hash");
253
- h->vals[loc] = val;
251
+ vals[loc] = val;
254
252
  return NO_ERROR;
255
253
  }
256
254
 
@@ -27,9 +27,6 @@ typedef struct term {
27
27
  typedef struct termhash {
28
28
  uint8_t n_buckets_idx;
29
29
  uint32_t n_buckets, size, n_occupied, upper_bound;
30
- uint32_t *flags;
31
- term *keys;
32
- uint32_t *vals;
33
30
  uint8_t boundary[];
34
31
  // in memory at this point
35
32
  // ((n_buckets >> 4) + 1) uint32_t's for the flags
@@ -37,14 +34,15 @@ typedef struct termhash {
37
34
  // n_buckets uint32_t's for the vals (offsets into postings lists)
38
35
  } termhash;
39
36
 
37
+ #define TERMHASH_FLAGS(h) ((uint32_t*)(h)->boundary)
38
+ #define TERMHASH_KEYS(h) ((term*)((uint32_t*)(h)->boundary + (((h)->n_buckets >> 4) + 1)))
39
+ #define TERMHASH_VALS(h) ((uint32_t*)(TERMHASH_KEYS(h) + (h)->n_buckets))
40
+
40
41
  // API methods
41
42
 
42
43
  // public: make a new termhash
43
44
  void termhash_init(termhash* h); // makes a new one
44
45
 
45
- // public: set up an existing termhash
46
- void termhash_setup(termhash* h); // inits one from disk
47
-
48
46
  // private: khash-style getter: returns the slot id, if any, given a term key.
49
47
  // you can then look this up within the vals array yourself. returns
50
48
  // h->n_buckets if the term is not in the hash.
@@ -143,7 +143,11 @@ static VALUE index_delete(VALUE class, VALUE v_pathname_base) {
143
143
  static VALUE index_size(VALUE self) {
144
144
  wp_index* index;
145
145
  Data_Get_Struct(self, wp_index, index);
146
- return INT2NUM(wp_index_num_docs(index));
146
+
147
+ uint64_t num_docs;
148
+ wp_error* e = wp_index_num_docs(index, &num_docs);
149
+ RAISE_IF_NECESSARY(e);
150
+ return INT2NUM(num_docs);
147
151
  }
148
152
 
149
153
  static VALUE index_init(VALUE self, VALUE v_pathname_base) {
@@ -10,6 +10,7 @@
10
10
  #include "index.h"
11
11
  #include "query.h"
12
12
  #include "query-parser.h"
13
+ #include "lock.h"
13
14
  #include "error.h"
14
15
 
15
16
  // see comments in index.c