whistlepig 0.9.1 → 0.10

Sign up to get free protection for your applications and to get access to all the features.
@@ -33,20 +33,15 @@ static inline int string_equals(const char* a, const char* b) {
33
33
  return strcmp(a, b) == 0;
34
34
  }
35
35
 
36
- // set flags, keys and vals to correct locations based on h->n_buckets
37
- void stringmap_setup(stringmap* h, stringpool* p) {
38
- h->pool = p;
39
- h->flags = (uint32_t*)h->boundary;
40
- h->keys = (uint32_t*)((uint32_t*)h->boundary + ((h->n_buckets >> 4) + 1));
41
- }
36
+ #define STRINGMAP_FLAGS(h) ((uint32_t*)(h)->boundary)
37
+ #define STRINGMAP_KEYS(h) ((uint32_t*)((uint32_t*)(h)->boundary + (((h)->n_buckets >> 4) + 1)))
42
38
 
43
- void stringmap_init(stringmap* h, stringpool* p) {
39
+ void stringmap_init(stringmap* h) {
44
40
  h->n_buckets_idx = INITIAL_N_BUCKETS_IDX;
45
41
  h->n_buckets = prime_list[h->n_buckets_idx];
46
42
  h->upper_bound = (uint32_t)(h->n_buckets * HASH_UPPER + 0.5);
47
43
  h->size = h->n_occupied = 0;
48
- stringmap_setup(h, p);
49
- memset(h->flags, 0xaa, ((h->n_buckets>>4) + 1) * sizeof(uint32_t));
44
+ memset(STRINGMAP_FLAGS(h), 0xaa, ((h->n_buckets>>4) + 1) * sizeof(uint32_t));
50
45
  }
51
46
 
52
47
  /*
@@ -66,22 +61,25 @@ static void kh_clear_##name(kh_##name##_t *h) {
66
61
  }
67
62
  */
68
63
 
69
- uint32_t stringmap_get(stringmap *h, const char* key) {
64
+ uint32_t stringmap_get(stringmap *h, stringpool* pool, const char* key) {
65
+ uint32_t* flags = STRINGMAP_FLAGS(h);
66
+ uint32_t* keys = STRINGMAP_KEYS(h);
67
+
70
68
  if(h->n_buckets) {
71
69
  uint32_t inc, k, i, last;
72
70
  k = string_hash(key); i = k % h->n_buckets;
73
71
  inc = 1 + k % (h->n_buckets - 1); last = i;
74
- while (!isempty(h->flags, i) && (isdel(h->flags, i) || !string_equals(stringpool_lookup(h->pool, h->keys[i]), key))) {
72
+ while (!isempty(flags, i) && (isdel(flags, i) || !string_equals(stringpool_lookup(pool, keys[i]), key))) {
75
73
  if (i + inc >= h->n_buckets) i = i + inc - h->n_buckets;
76
74
  else i += inc;
77
75
  if (i == last) return h->n_buckets;
78
76
  }
79
- return iseither(h->flags, i)? h->n_buckets : i;
77
+ return iseither(flags, i)? h->n_buckets : i;
80
78
  }
81
79
  else return 0;
82
80
  }
83
81
 
84
- wp_error* stringmap_bump_size(stringmap *h) {
82
+ wp_error* stringmap_bump_size(stringmap *h, stringpool* pool) {
85
83
  DEBUG("bumping size for string hash at %p with size %u and boundary %p", h, stringmap_size(h), h->boundary);
86
84
 
87
85
  if(h->n_buckets_idx >= (HASH_PRIME_SIZE - 1)) RAISE_ERROR("stringmap can't be this big");
@@ -89,51 +87,54 @@ wp_error* stringmap_bump_size(stringmap *h) {
89
87
  h->n_buckets_idx++;
90
88
  uint32_t new_n_buckets = prime_list[h->n_buckets_idx];
91
89
 
92
- // first make a backup of the oldflags
93
- size_t oldflagsize = ((h->n_buckets >> 4) + 1) * sizeof(uint32_t);
94
- uint32_t* oldflags = malloc(oldflagsize);
95
- memcpy(oldflags, h->flags, oldflagsize);
90
+ // get pointers to the old locations
91
+ uint32_t* oldkeys = STRINGMAP_KEYS(h);
92
+ uint32_t* oldflags = STRINGMAP_FLAGS(h);
96
93
 
97
- // keep pointers to the old locations
98
- uint32_t* oldkeys = h->keys;
94
+ // make a backup of the old flags in a separate memory region
95
+ size_t flagbaksize = ((h->n_buckets >> 4) + 1) * sizeof(uint32_t);
96
+ uint32_t* flagbaks = malloc(flagbaksize);
97
+ memcpy(flagbaks, oldflags, flagbaksize);
99
98
 
100
- // set pointers to the new locations
101
- h->keys = (uint32_t*)((uint32_t*)h->boundary + ((new_n_buckets >> 4) + 1));
99
+ // get a pointer pointers to the new locations
100
+ //h->keys = (uint32_t*)((uint32_t*)h->boundary + ((new_n_buckets >> 4) + 1));
101
+ uint32_t* newflags = (uint32_t*)h->boundary; // unchanged, actually
102
+ uint32_t* newkeys = (uint32_t*)((uint32_t*)h->boundary + ((new_n_buckets >> 4) + 1));
102
103
 
103
104
  // move the keys
104
- memmove(h->keys, oldkeys, h->n_buckets * sizeof(uint32_t));
105
+ memmove(newkeys, oldkeys, h->n_buckets * sizeof(uint32_t));
105
106
 
106
107
  // clear the new flags
107
- memset(h->flags, 0xaa, ((new_n_buckets>>4) + 1) * sizeof(uint32_t));
108
+ memset(STRINGMAP_FLAGS(h), 0xaa, ((new_n_buckets>>4) + 1) * sizeof(uint32_t));
108
109
 
109
110
  // do the complicated stuff from khash.h
110
111
  for (unsigned int j = 0; j != h->n_buckets; ++j) {
111
- if (iseither(oldflags, j) == 0) {
112
- uint32_t key = h->keys[j];
113
- set_isdel_true(oldflags, j);
112
+ if (iseither(flagbaks, j) == 0) {
113
+ uint32_t key = newkeys[j];
114
+ set_isdel_true(flagbaks, j);
114
115
  while (1) {
115
116
  uint32_t inc, k, i;
116
- k = string_hash(stringpool_lookup(h->pool, key));
117
+ k = string_hash(stringpool_lookup(pool, key));
117
118
  i = k % new_n_buckets;
118
119
  inc = 1 + k % (new_n_buckets - 1);
119
- while (!isempty(h->flags, i)) {
120
+ while (!isempty(newflags, i)) {
120
121
  if (i + inc >= new_n_buckets) i = i + inc - new_n_buckets;
121
122
  else i += inc;
122
123
  }
123
- set_isempty_false(h->flags, i);
124
- if (i < h->n_buckets && iseither(oldflags, i) == 0) {
125
- { uint32_t tmp = h->keys[i]; h->keys[i] = key; key = tmp; }
126
- set_isdel_true(oldflags, i);
124
+ set_isempty_false(newflags, i);
125
+ if (i < h->n_buckets && iseither(flagbaks, i) == 0) {
126
+ { uint32_t tmp = newkeys[i]; newkeys[i] = key; key = tmp; }
127
+ set_isdel_true(flagbaks, i);
127
128
  } else {
128
- h->keys[i] = key;
129
+ newkeys[i] = key;
129
130
  break;
130
131
  }
131
132
  }
132
133
  }
133
134
  }
134
135
 
135
- free(oldflags);
136
- h->n_buckets = new_n_buckets;
136
+ free(flagbaks);
137
+ h->n_buckets = new_n_buckets; // STRINGMAP_KEYS now works
137
138
  h->n_occupied = h->size;
138
139
  h->upper_bound = (uint32_t)(h->n_buckets * HASH_UPPER + 0.5);
139
140
 
@@ -144,8 +145,10 @@ wp_error* stringmap_bump_size(stringmap *h) {
144
145
  return NO_ERROR;
145
146
  }
146
147
 
147
- uint32_t stringmap_put(stringmap *h, const char* key, int *ret) {
148
+ uint32_t stringmap_put(stringmap *h, stringpool* pool, const char* key, int *ret) {
148
149
  uint32_t x;
150
+ uint32_t* flags = STRINGMAP_FLAGS(h);
151
+ uint32_t* keys = STRINGMAP_KEYS(h);
149
152
 
150
153
  {
151
154
  #ifdef DEBUGOUTPUT
@@ -154,27 +157,27 @@ int num_loops = 0;
154
157
  uint32_t inc, k, i, site, last;
155
158
  x = site = h->n_buckets; k = string_hash(key); i = k % h->n_buckets;
156
159
  //DEBUG("asked to hash '%s'. initial hash is %u => %u and n_occupied is %u", key, k, i, h->n_occupied);
157
- if (isempty(h->flags, i)) x = i;
160
+ if (isempty(flags, i)) x = i;
158
161
  else {
159
162
  inc = 1 + k % (h->n_buckets - 1); last = i;
160
- while (!isempty(h->flags, i) && (isdel(h->flags, i) || !string_equals(stringpool_lookup(h->pool, h->keys[i]), key))) {
163
+ while (!isempty(flags, i) && (isdel(flags, i) || !string_equals(stringpool_lookup(pool, keys[i]), key))) {
161
164
  #ifdef DEBUGOUTPUT
162
165
  num_loops++;
163
166
  #endif
164
- if (isdel(h->flags, i)) site = i;
167
+ if (isdel(flags, i)) site = i;
165
168
  if (i + inc >= h->n_buckets) i = i + inc - h->n_buckets;
166
169
  else i += inc;
167
170
  if (i == last) { x = site; break; }
168
171
  }
169
172
  if ((x == h->n_buckets) && (i == last)) { // out of space
170
- if(!string_equals(stringpool_lookup(h->pool, h->keys[i]), key)) {
173
+ if(!string_equals(stringpool_lookup(pool, keys[i]), key)) {
171
174
  DEBUG("out of space!");
172
175
  *ret = -1;
173
176
  return x;
174
177
  }
175
178
  }
176
179
  if (x == h->n_buckets) { // didn't find it on the first try
177
- if (isempty(h->flags, i) && site != h->n_buckets) x = site;
180
+ if (isempty(flags, i) && site != h->n_buckets) x = site;
178
181
  else x = i;
179
182
  }
180
183
  }
@@ -185,15 +188,15 @@ num_loops++;
185
188
  //DEBUG("for pos %u, isempty? %d and isdel %d", x, isempty(h->flags, x), isdel(h->flags, x));
186
189
 
187
190
  uint32_t idx;
188
- if(isempty(h->flags, x) || isdel(h->flags, x)) {
189
- idx = stringpool_add(h->pool, key);
191
+ if(isempty(flags, x) || isdel(flags, x)) {
192
+ idx = stringpool_add(pool, key);
190
193
  if(idx == (uint32_t)-1) {
191
194
  *ret = -2;
192
195
  return x;
193
196
  }
194
- if (isempty(h->flags, x)) ++h->n_occupied;
195
- h->keys[x] = idx;
196
- set_isboth_false(h->flags, x);
197
+ if (isempty(flags, x)) ++h->n_occupied;
198
+ keys[x] = idx;
199
+ set_isboth_false(flags, x);
197
200
  ++h->size;
198
201
  *ret = 1;
199
202
  }
@@ -203,8 +206,9 @@ num_loops++;
203
206
  }
204
207
 
205
208
  void stringmap_del(stringmap *h, uint32_t x) {
206
- if (x != h->n_buckets && !iseither(h->flags, x)) {
207
- set_isdel_true(h->flags, x);
209
+ uint32_t* flags = STRINGMAP_FLAGS(h);
210
+ if (x != h->n_buckets && !iseither(flags, x)) {
211
+ set_isdel_true(flags, x);
208
212
  --h->size;
209
213
  }
210
214
  }
@@ -255,24 +259,25 @@ uint32_t stringmap_next_size(stringmap* h) {
255
259
  return size(prime_list[next_idx]);
256
260
  }
257
261
 
258
- const char* stringmap_int_to_string(stringmap* h, uint32_t i) {
259
- return stringpool_lookup(h->pool, i);
262
+ const char* stringmap_int_to_string(stringmap* h, stringpool* p, uint32_t i) {
263
+ (void)h;
264
+ return stringpool_lookup(p, i);
260
265
  }
261
266
 
262
267
  // returns -1 if not found
263
- uint32_t stringmap_string_to_int(stringmap* h, const char* s) {
264
- uint32_t idx = stringmap_get(h, s);
268
+ uint32_t stringmap_string_to_int(stringmap* h, stringpool* pool, const char* s) {
269
+ uint32_t idx = stringmap_get(h, pool, s);
265
270
  if(idx == h->n_buckets) return (uint32_t)-1; // not there
266
- return h->keys[idx];
271
+ return STRINGMAP_KEYS(h)[idx];
267
272
  }
268
273
 
269
- wp_error* stringmap_add(stringmap *h, const char* s, uint32_t* id) {
274
+ wp_error* stringmap_add(stringmap *h, stringpool* pool, const char* s, uint32_t* id) {
270
275
  int status;
271
- uint32_t idx = stringmap_put(h, s, &status);
276
+ uint32_t idx = stringmap_put(h, pool, s, &status);
272
277
  if(status == -1) RAISE_ERROR("out of space in hash put");
273
278
  if(status == -2) RAISE_ERROR("out of space in pool put");
274
279
 
275
- *id = h->keys[idx];
280
+ *id = STRINGMAP_KEYS(h)[idx];
276
281
 
277
282
  return NO_ERROR;
278
283
  }
@@ -10,9 +10,8 @@
10
10
  // and stringpool, it uses a slightly funny API that never allocates memory,
11
11
  // but instead operates on pointers to preallocated blocks of memory.
12
12
  //
13
- // uses a stringpool internally to do the int->string mapping. so if you're so
14
- // you shouldn't have to interact with the stringpool directly; you can just
15
- // use this object.
13
+ // uses a stringpool internally to do the int->string mapping. you shouldn't
14
+ // have to interact with the stringpool directly; you can just use this object.
16
15
  //
17
16
  // like termhash and pool, has a slightly funny API that is designed to work on
18
17
  // a pre-allocated chunk of memory rather than allocate any of its own.
@@ -36,9 +35,6 @@
36
35
  typedef struct stringmap {
37
36
  uint8_t n_buckets_idx;
38
37
  uint32_t n_buckets, size, n_occupied, upper_bound;
39
- uint32_t *flags;
40
- uint32_t *keys;
41
- stringpool* pool;
42
38
  uint8_t boundary[];
43
39
  // in memory at this point
44
40
  // ((n_buckets >> 4) + 1) uint32_t's for the flags
@@ -48,21 +44,18 @@ typedef struct stringmap {
48
44
  // API methods
49
45
 
50
46
  // public: write a new stringmap to memory
51
- void stringmap_init(stringmap* h, stringpool* p);
52
-
53
- // public: set up an existing stringmap in memory
54
- void stringmap_setup(stringmap* h, stringpool* p);
47
+ void stringmap_init(stringmap* h);
55
48
 
56
49
  // public: add a string. sets id to its id. dupes are fine; will just set the
57
50
  // id correctly.
58
- wp_error* stringmap_add(stringmap *h, const char* s, uint32_t* id) RAISES_ERROR;
51
+ wp_error* stringmap_add(stringmap *h, stringpool* p, const char* s, uint32_t* id) RAISES_ERROR;
59
52
 
60
53
  // public: get the int value given a string. returns (uint32_t)-1 if not found.
61
- uint32_t stringmap_string_to_int(stringmap* h, const char* s);
54
+ uint32_t stringmap_string_to_int(stringmap* h, stringpool* pool, const char* s);
62
55
 
63
56
  // public: get the string value given an int. returns corrupt data if the int
64
57
  // is invalid.
65
- const char* stringmap_int_to_string(stringmap* h, uint32_t i);
58
+ const char* stringmap_int_to_string(stringmap* h, stringpool* p, uint32_t i);
66
59
 
67
60
  // public: returns the byte size of the stringmap
68
61
  uint32_t stringmap_size(stringmap* h);
@@ -77,6 +70,6 @@ uint32_t stringmap_next_size(stringmap* h);
77
70
  int stringmap_needs_bump(stringmap* h);
78
71
 
79
72
  // public: increases the size of the stringmap
80
- wp_error* stringmap_bump_size(stringmap *h) RAISES_ERROR;
73
+ wp_error* stringmap_bump_size(stringmap *h, stringpool* pool) RAISES_ERROR;
81
74
 
82
75
  #endif
@@ -35,23 +35,11 @@ void termhash_init(termhash* h) {
35
35
  h->n_buckets = prime_list[h->n_buckets_idx];
36
36
  h->upper_bound = (uint32_t)(h->n_buckets * HASH_UPPER + 0.5);
37
37
  h->size = h->n_occupied = 0;
38
- termhash_setup(h);
39
- memset(h->flags, 0xaa, ((h->n_buckets>>4) + 1) * sizeof(uint32_t));
38
+ memset(TERMHASH_FLAGS(h), 0xaa, ((h->n_buckets>>4) + 1) * sizeof(uint32_t));
40
39
  }
41
40
 
42
41
  #define OFFSET(a, b) (long)((uint8_t*)a - (uint8_t*)b)
43
42
  // set flags, keys and vals to correct locations based on h->n_buckets
44
- void termhash_setup(termhash* h) {
45
- DEBUG("term hash ranges from %p to %p (size %u)", h, (char*)h + termhash_size(h), termhash_size(h));
46
- DEBUG("boundary is at %p (+%ld)", h->boundary, OFFSET(h->boundary, h));
47
- h->flags = (uint32_t*)h->boundary;
48
- h->keys = (term*)((uint32_t*)h->boundary + ((h->n_buckets >> 4) + 1));
49
- h->vals = (uint32_t*)((term*)h->keys + h->n_buckets);
50
- DEBUG("flags are at %p (+%ld)", h->flags, OFFSET(h->flags, h->boundary));
51
- DEBUG(" keys are at %p (+%ld)", h->keys, OFFSET(h->keys, h->boundary));
52
- DEBUG(" vals are at %p (+%ld)", h->vals, OFFSET(h->vals, h->boundary));
53
- }
54
-
55
43
  /*
56
44
  static void termhash_dump(termhash* h) {
57
45
  for(uint32_t i = 0; i < h->n_buckets; i++) {
@@ -83,89 +71,94 @@ static void kh_clear_##name(kh_##name##_t *h) {
83
71
  */
84
72
 
85
73
  uint32_t termhash_get(termhash *h, term key) {
74
+ uint32_t* flags = TERMHASH_FLAGS(h);
75
+ term* keys = TERMHASH_KEYS(h);
76
+
86
77
  if(h->n_buckets) {
87
78
  uint32_t inc, k, i, last;
88
79
  k = hash_term(key); i = k % h->n_buckets;
89
80
  inc = 1 + k % (h->n_buckets - 1); last = i;
90
- while (!isempty(h->flags, i) && (isdel(h->flags, i) || !term_equals(h->keys[i], key))) {
81
+ while (!isempty(flags, i) && (isdel(flags, i) || !term_equals(keys[i], key))) {
91
82
  if (i + inc >= h->n_buckets) i = i + inc - h->n_buckets;
92
83
  else i += inc;
93
84
  if (i == last) return h->n_buckets;
94
85
  }
95
- return iseither(h->flags, i)? h->n_buckets : i;
86
+ return iseither(flags, i)? h->n_buckets : i;
96
87
  }
97
88
  else return 0;
98
89
  }
99
90
 
100
91
  wp_error* termhash_bump_size(termhash *h) {
101
92
  DEBUG("bumping size for term hash at %p with size %u and boundary %p (+%ld)", h, termhash_size(h), h->boundary, (long)((uint8_t*)h->boundary - (uint8_t*)h));
102
- DEBUG("flags are at %p (+%ld)", h->flags, OFFSET(h->flags, h->boundary));
103
- DEBUG(" keys are at %p (+%ld)", h->keys, OFFSET(h->keys, h->boundary));
104
- DEBUG(" vals are at %p (+%ld)", h->vals, OFFSET(h->vals, h->boundary));
93
+ DEBUG("flags are at %p (+%ld)", TERMHASH_FLAGS(h), OFFSET(TERMHASH_FLAGS(h), h->boundary));
94
+ DEBUG(" keys are at %p (+%ld)", TERMHASH_KEYS(h), OFFSET(TERMHASH_KEYS(h), h->boundary));
95
+ DEBUG(" vals are at %p (+%ld)", TERMHASH_VALS(h), OFFSET(TERMHASH_VALS(h), h->boundary));
96
+
97
+ if(h->n_buckets_idx >= (HASH_PRIME_SIZE - 1)) RAISE_ERROR("termhash can't be this big");
105
98
 
106
99
  h->n_buckets_idx++;
107
- if(h->n_buckets_idx > HASH_PRIME_SIZE) exit(1); // die horribly TODO fixme
108
100
  uint32_t new_n_buckets = prime_list[h->n_buckets_idx];
109
101
 
110
- // first make a backup of the oldflags
111
- size_t oldflagsize = ((h->n_buckets >> 4) + 1) * sizeof(uint32_t);
112
- uint32_t* oldflags = malloc(oldflagsize);
113
- memcpy(oldflags, h->flags, oldflagsize);
102
+ // first make a backup of the old flags in a separate memory region
103
+ size_t flagbaksize = ((h->n_buckets >> 4) + 1) * sizeof(uint32_t);
104
+ uint32_t* flagbaks = malloc(flagbaksize);
105
+ memcpy(flagbaks, TERMHASH_FLAGS(h), flagbaksize);
114
106
 
115
- // keep pointers to the old locations
116
- term* oldkeys = h->keys;
117
- uint32_t* oldvals = h->vals;
107
+ // get pointers to the old locations
108
+ term* oldkeys = TERMHASH_KEYS(h);
109
+ uint32_t* oldvals = TERMHASH_VALS(h);
118
110
 
119
111
  // set pointers to the new locations
120
- h->keys = (term*)((uint32_t*)h->boundary + ((new_n_buckets >> 4) + 1));
121
- h->vals = (uint32_t*)((term*)h->keys + new_n_buckets);
112
+ uint32_t* newflags = (uint32_t*)h->boundary;
113
+ term* newkeys = (term*)(newflags + ((new_n_buckets >> 4) + 1));
114
+ uint32_t* newvals = (uint32_t*)(newkeys + new_n_buckets);
122
115
 
123
116
  // move the vals and keys
124
- memmove(h->vals, oldvals, h->n_buckets * sizeof(uint32_t));
125
- memmove(h->keys, oldkeys, h->n_buckets * sizeof(term));
117
+ memmove(newvals, oldvals, h->n_buckets * sizeof(uint32_t));
118
+ memmove(newkeys, oldkeys, h->n_buckets * sizeof(term));
126
119
 
127
120
  // clear the new flags
128
- memset(h->flags, 0xaa, ((new_n_buckets>>4) + 1) * sizeof(uint32_t));
121
+ memset(newflags, 0xaa, ((new_n_buckets>>4) + 1) * sizeof(uint32_t));
129
122
 
130
123
  // do the complicated stuff from khash.h
131
124
  for (unsigned int j = 0; j != h->n_buckets; ++j) {
132
- if (iseither(oldflags, j) == 0) {
133
- term key = h->keys[j];
125
+ if (iseither(flagbaks, j) == 0) {
126
+ term key = newkeys[j];
134
127
  uint32_t val;
135
- val = h->vals[j];
136
- set_isdel_true(oldflags, j);
128
+ val = newvals[j];
129
+ set_isdel_true(flagbaks, j);
137
130
  while (1) {
138
131
  uint32_t inc, k, i;
139
132
  k = hash_term(key);
140
133
  i = k % new_n_buckets;
141
134
  inc = 1 + k % (new_n_buckets - 1);
142
- while (!isempty(h->flags, i)) {
135
+ while (!isempty(newflags, i)) {
143
136
  if (i + inc >= new_n_buckets) i = i + inc - new_n_buckets;
144
137
  else i += inc;
145
138
  }
146
- set_isempty_false(h->flags, i);
147
- if (i < h->n_buckets && iseither(oldflags, i) == 0) {
148
- { term tmp = h->keys[i]; h->keys[i] = key; key = tmp; }
149
- { uint32_t tmp = h->vals[i]; h->vals[i] = val; val = tmp; }
150
- set_isdel_true(oldflags, i);
139
+ set_isempty_false(newflags, i);
140
+ if (i < h->n_buckets && iseither(flagbaks, i) == 0) {
141
+ { term tmp = newkeys[i]; newkeys[i] = key; key = tmp; }
142
+ { uint32_t tmp = newvals[i]; newvals[i] = val; val = tmp; }
143
+ set_isdel_true(flagbaks, i);
151
144
  } else {
152
- h->keys[i] = key;
153
- h->vals[i] = val;
145
+ newkeys[i] = key;
146
+ newvals[i] = val;
154
147
  break;
155
148
  }
156
149
  }
157
150
  }
158
151
  }
159
152
 
160
- free(oldflags);
153
+ free(flagbaks);
161
154
  h->n_buckets = new_n_buckets;
162
155
  h->n_occupied = h->size;
163
156
  h->upper_bound = (uint32_t)(h->n_buckets * HASH_UPPER + 0.5);
164
157
 
165
158
  DEBUG("after bump, term hash at %p has size %u and boundary %p (+%ld)", h, termhash_size(h), h->boundary, (long)((uint8_t*)h->boundary - (uint8_t*)h));
166
- DEBUG("flags are at %p (+%ld)", h->flags, (long)((uint8_t*)h->flags - (uint8_t*)h->boundary));
167
- DEBUG(" keys are at %p (+%ld)", h->keys, (long)((uint8_t*)h->keys - (uint8_t*)h->boundary));
168
- DEBUG(" vals are at %p (+%ld)", h->vals, (long)((uint8_t*)h->vals - (uint8_t*)h->boundary));
159
+ DEBUG("flags are at %p (+%ld)", TERMHASH_FLAGS(h), OFFSET(TERMHASH_FLAGS(h), h->boundary));
160
+ DEBUG(" keys are at %p (+%ld)", TERMHASH_KEYS(h), OFFSET(TERMHASH_KEYS(h), h->boundary));
161
+ DEBUG(" vals are at %p (+%ld)", TERMHASH_VALS(h), OFFSET(TERMHASH_VALS(h), h->boundary));
169
162
 
170
163
  #ifdef DEBUGOUTPUT
171
164
  //DEBUG("and now i look like this:");
@@ -177,6 +170,8 @@ wp_error* termhash_bump_size(termhash *h) {
177
170
 
178
171
  uint32_t termhash_put(termhash *h, term key, int *ret) {
179
172
  uint32_t x;
173
+ uint32_t* flags = TERMHASH_FLAGS(h);
174
+ term* keys = TERMHASH_KEYS(h);
180
175
 
181
176
  {
182
177
  #ifdef DEBUGOUTPUT
@@ -185,40 +180,40 @@ int num_loops = 0;
185
180
  uint32_t inc, k, i, site, last;
186
181
  x = site = h->n_buckets; k = hash_term(key); i = k % h->n_buckets;
187
182
  DEBUG("initial hash is %u", k);
188
- if (isempty(h->flags, i)) x = i;
183
+ if (isempty(flags, i)) x = i;
189
184
  else {
190
185
  inc = 1 + k % (h->n_buckets - 1); last = i;
191
- while (!isempty(h->flags, i) && (isdel(h->flags, i) || !term_equals(h->keys[i], key))) {
186
+ while (!isempty(flags, i) && (isdel(flags, i) || !term_equals(keys[i], key))) {
192
187
  #ifdef DEBUGOUTPUT
193
188
  num_loops++;
194
189
  #endif
195
- if (isdel(h->flags, i)) site = i;
190
+ if (isdel(flags, i)) site = i;
196
191
  if (i + inc >= h->n_buckets) i = i + inc - h->n_buckets;
197
192
  else i += inc;
198
193
  if (i == last) { x = site; break; }
199
194
  }
200
195
  if ((x == h->n_buckets) && (i == last)) { // out of space
201
- if(!term_equals(h->keys[i], key)) {
196
+ if(!term_equals(keys[i], key)) {
202
197
  *ret = -1;
203
198
  return x;
204
199
  }
205
200
  }
206
201
  if (x == h->n_buckets) { // didn't find it on the first try
207
- if (isempty(h->flags, i) && site != h->n_buckets) x = site;
202
+ if (isempty(flags, i) && site != h->n_buckets) x = site;
208
203
  else x = i;
209
204
  }
210
205
  }
211
206
  DEBUG("looped %u times to put", num_loops);
212
207
  //DEBUG("x is %u, site is %u, n_buckets is %u", x, site, h->n_buckets);
213
208
  }
214
- if (isempty(h->flags, x)) {
215
- h->keys[x] = key;
216
- set_isboth_false(h->flags, x);
209
+ if (isempty(flags, x)) {
210
+ keys[x] = key;
211
+ set_isboth_false(flags, x);
217
212
  ++h->size; ++h->n_occupied;
218
213
  *ret = 1;
219
- } else if (isdel(h->flags, x)) {
220
- h->keys[x] = key;
221
- set_isboth_false(h->flags, x);
214
+ } else if (isdel(flags, x)) {
215
+ keys[x] = key;
216
+ set_isboth_false(flags, x);
222
217
  ++h->size;
223
218
  *ret = 2;
224
219
  }
@@ -233,24 +228,27 @@ num_loops++;
233
228
  }
234
229
 
235
230
  void termhash_del(termhash *h, uint32_t x) {
236
- if (x != h->n_buckets && !iseither(h->flags, x)) {
237
- set_isdel_true(h->flags, x);
231
+ uint32_t* flags = TERMHASH_FLAGS(h);
232
+ if (x != h->n_buckets && !iseither(flags, x)) {
233
+ set_isdel_true(flags, x);
238
234
  --h->size;
239
235
  }
240
236
  }
241
237
 
242
238
  uint32_t termhash_get_val(termhash* h, term t) {
239
+ uint32_t* vals = TERMHASH_VALS(h);
243
240
  uint32_t idx = termhash_get(h, t);
244
241
  if(idx == h->n_buckets) return (uint32_t)-1;
245
- return h->vals[idx];
242
+ return vals[idx];
246
243
  }
247
244
 
248
245
  wp_error* termhash_put_val(termhash* h, term t, uint32_t val) {
249
246
  int status;
247
+ uint32_t* vals = TERMHASH_VALS(h);
250
248
  uint32_t loc = termhash_put(h, t, &status);
251
249
  DEBUG("put(%u,%u) has status %d and loc %u (error val is %u)", t.field_s, t.word_s, status, loc, h->n_buckets);
252
250
  if(status == -1) RAISE_ERROR("out of space in hash");
253
- h->vals[loc] = val;
251
+ vals[loc] = val;
254
252
  return NO_ERROR;
255
253
  }
256
254
 
@@ -27,9 +27,6 @@ typedef struct term {
27
27
  typedef struct termhash {
28
28
  uint8_t n_buckets_idx;
29
29
  uint32_t n_buckets, size, n_occupied, upper_bound;
30
- uint32_t *flags;
31
- term *keys;
32
- uint32_t *vals;
33
30
  uint8_t boundary[];
34
31
  // in memory at this point
35
32
  // ((n_buckets >> 4) + 1) uint32_t's for the flags
@@ -37,14 +34,15 @@ typedef struct termhash {
37
34
  // n_buckets uint32_t's for the vals (offsets into postings lists)
38
35
  } termhash;
39
36
 
37
+ #define TERMHASH_FLAGS(h) ((uint32_t*)(h)->boundary)
38
+ #define TERMHASH_KEYS(h) ((term*)((uint32_t*)(h)->boundary + (((h)->n_buckets >> 4) + 1)))
39
+ #define TERMHASH_VALS(h) ((uint32_t*)(TERMHASH_KEYS(h) + (h)->n_buckets))
40
+
40
41
  // API methods
41
42
 
42
43
  // public: make a new termhash
43
44
  void termhash_init(termhash* h); // makes a new one
44
45
 
45
- // public: set up an existing termhash
46
- void termhash_setup(termhash* h); // inits one from disk
47
-
48
46
  // private: khash-style getter: returns the slot id, if any, given a term key.
49
47
  // you can then look this up within the vals array yourself. returns
50
48
  // h->n_buckets if the term is not in the hash.
@@ -143,7 +143,11 @@ static VALUE index_delete(VALUE class, VALUE v_pathname_base) {
143
143
  static VALUE index_size(VALUE self) {
144
144
  wp_index* index;
145
145
  Data_Get_Struct(self, wp_index, index);
146
- return INT2NUM(wp_index_num_docs(index));
146
+
147
+ uint64_t num_docs;
148
+ wp_error* e = wp_index_num_docs(index, &num_docs);
149
+ RAISE_IF_NECESSARY(e);
150
+ return INT2NUM(num_docs);
147
151
  }
148
152
 
149
153
  static VALUE index_init(VALUE self, VALUE v_pathname_base) {
@@ -10,6 +10,7 @@
10
10
  #include "index.h"
11
11
  #include "query.h"
12
12
  #include "query-parser.h"
13
+ #include "lock.h"
13
14
  #include "error.h"
14
15
 
15
16
  // see comments in index.c