filedictrb 0.1.2 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3c8fd3a6260d33ec56a995b4116af3cdb60ec8b52fa8aee688c7fd0b305b14ae
4
- data.tar.gz: '0349634431c7e9abde136b1503e4ae23a8855236cdafb21ee9157f946456c16c'
3
+ metadata.gz: 7cb1c1d801bf8943d6493a6c40be882c5d47c58166c56b6860abf8b98fc14a98
4
+ data.tar.gz: 394f0febd55f827bfb3aafb3559ba89b6b458b3a7ed2c3b6d857d900e38b7374
5
5
  SHA512:
6
- metadata.gz: 5811b3a6fba572d75e39d9fec5d7e2d6cb1bde1701604d80e0fa040de624b7b7c7edca928293fa9814b91614a3c5d6289f15e55becc7e4b1153d34212216ec39
7
- data.tar.gz: db8da5021c456578f8654bce8feab2a7f4138f9c3f405423fb441ff3248cc3924b451da94473d5eac1b9d3eccda643d22582b278dbaebb6a7087f819bf896e08
6
+ metadata.gz: 4a700778e49bd15c6686b9457b7a7987f4d6c8b47be4abf805a6a09d95156ce71ff8e15e7cb50ffcfa11f96e78f3eb633c19d10bf472c6f7963deb5ae658f9f4
7
+ data.tar.gz: dc233039e2c4d0e2aade78efc7cd6b55ab088cbdeaa373d9219137ae1241585dbe32b4e5984eeb402c52a32a027ee14f2950fa5540b3a3b932a34d13ff834b8c
@@ -27,6 +27,7 @@ typedef size_t (*filedict_hash_function_t)(const char *);
27
27
  typedef struct filedict_t {
28
28
  const char *error;
29
29
  int fd;
30
+ int flags;
30
31
  void *data;
31
32
  size_t data_len;
32
33
  filedict_hash_function_t hash_function;
@@ -96,6 +97,7 @@ static size_t filedict_copy_string(char *dest, const char *src, size_t max_len)
96
97
  static void filedict_init(filedict_t *filedict) {
97
98
  filedict->error = NULL;
98
99
  filedict->fd = 0;
100
+ filedict->flags = 0;
99
101
  filedict->data_len = 0;
100
102
  filedict->data = NULL;
101
103
  filedict->hash_function = filedict_default_hash_function;
@@ -110,6 +112,7 @@ static void filedict_deinit(filedict_t *filedict) {
110
112
  if (filedict->fd) {
111
113
  close(filedict->fd);
112
114
  filedict->fd = 0;
115
+ filedict->flags = 0;
113
116
  }
114
117
  }
115
118
 
@@ -117,15 +120,39 @@ static void filedict_deinit(filedict_t *filedict) {
117
120
  * This computes the size of the entire filedict file given an initial bucket count and hashmap count.
118
121
  */
119
122
  static size_t filedict_file_size(size_t initial_bucket_count, size_t hashmap_count) {
120
- size_t result = sizeof(filedict_header_t);
121
- size_t i;
123
+ /*
124
+ * We used to size each additional hashmap at 2x the previous, but realistically it seems that
125
+ * most resizes are triggered by keys that are ridiculously large, not by mass collision.
126
+ *
127
+ * A more proper fix might be to re-structure the whole filedict. We could keep the existing
128
+ * hashmap structure, but with buckets that expand dynamically. This would require each bucket
129
+ * to contain a "pointer" to the next bucket object if present.
130
+ *
131
+ * For now, it's easiser to just keep the hashmap duplication without the size doubling.
132
+ */
133
+ return sizeof(filedict_header_t) + initial_bucket_count * hashmap_count * sizeof(filedict_bucket_t);
134
+ }
122
135
 
123
- for (i = 0; i < hashmap_count; ++i) {
124
- /* Bucket count is multiplied by 2 for each additional hashmap. */
125
- result += (initial_bucket_count << i) * sizeof(filedict_bucket_t);
126
- }
136
+ /*
137
+ * Resizes the filedict based on the header hashmap count and initial bucket count.
138
+ * Naturally, your pointers into the map will become invalid after calling this.
139
+ */
140
+ static void filedict_resize(filedict_t *filedict) {
141
+ filedict_header_t *header = (filedict_header_t*)filedict->data;
142
+ size_t computed_size = filedict_file_size(header->initial_bucket_count, header->hashmap_count);
143
+ if (computed_size <= filedict->data_len) return;
127
144
 
128
- return result;
145
+ munmap(filedict->data, filedict->data_len);
146
+ filedict->data = mmap(
147
+ filedict->data,
148
+ computed_size,
149
+ PROT_READ | ((filedict->flags & O_RDWR) ? PROT_WRITE : 0),
150
+ MAP_SHARED,
151
+ filedict->fd,
152
+ 0
153
+ );
154
+ if (filedict->data == MAP_FAILED) { filedict->error = strerror(errno); return; }
155
+ filedict->data_len = computed_size;
129
156
  }
130
157
 
131
158
  /*
@@ -146,15 +173,24 @@ static void filedict_open_f(
146
173
  int flags,
147
174
  unsigned int initial_bucket_count
148
175
  ) {
176
+ struct stat info;
177
+
178
+ filedict->flags = flags;
149
179
  filedict->fd = open(filename, flags, 0666);
150
180
  if (filedict->fd == -1) { filedict->error = strerror(errno); return; }
181
+ if (fstat(filedict->fd, &info) != 0) { filedict->error = strerror(errno); return; }
182
+
183
+ if (info.st_size == 0 && (flags & O_RDWR)) {
184
+ filedict->data_len = filedict_file_size(initial_bucket_count, 1);
185
+ ftruncate(filedict->fd, filedict->data_len);
186
+ } else {
187
+ filedict->data_len = info.st_size;
188
+ }
151
189
 
152
- filedict->data_len = filedict_file_size(initial_bucket_count, 1);
153
- ftruncate(filedict->fd, filedict->data_len);
154
190
  filedict->data = mmap(
155
191
  NULL,
156
192
  filedict->data_len,
157
- PROT_READ | PROT_WRITE,
193
+ PROT_READ | ((flags & O_RDWR) ? PROT_WRITE : 0),
158
194
  MAP_SHARED,
159
195
  filedict->fd,
160
196
  0
@@ -163,8 +199,11 @@ static void filedict_open_f(
163
199
 
164
200
  filedict_header_t *data = (filedict_header_t *)filedict->data;
165
201
  assert(initial_bucket_count <= UINT_MAX);
166
- data->initial_bucket_count = initial_bucket_count;
167
- data->hashmap_count = 1;
202
+
203
+ if (data->initial_bucket_count == 0) {
204
+ data->initial_bucket_count = initial_bucket_count;
205
+ data->hashmap_count = 1;
206
+ }
168
207
  }
169
208
 
170
209
  /*
@@ -252,7 +291,6 @@ try_again:
252
291
 
253
292
  ++hashmap_i;
254
293
  hashmap += bucket_count;
255
- bucket_count = (bucket_count << 1);
256
294
  }
257
295
 
258
296
  /*
@@ -272,7 +310,7 @@ try_again:
272
310
  filedict->data = mmap(
273
311
  filedict->data,
274
312
  new_data_len,
275
- PROT_READ | PROT_WRITE,
313
+ PROT_READ | ((filedict->flags & O_RDWR) ? PROT_WRITE : 0),
276
314
  MAP_SHARED,
277
315
  filedict->fd,
278
316
  0
@@ -365,25 +403,14 @@ static int filedict_read_advance_hashmap(filedict_read_t *read) {
365
403
  size_t offset = filedict_file_size(header->initial_bucket_count, read->hashmap_i);
366
404
 
367
405
  if (offset >= filedict->data_len) {
368
- /* Need to resize! */
369
- size_t computed_size = filedict_file_size(header->initial_bucket_count, header->hashmap_count);
370
- munmap(filedict->data, filedict->data_len);
371
- filedict->data = mmap(
372
- filedict->data,
373
- computed_size,
374
- PROT_READ | PROT_WRITE,
375
- MAP_SHARED,
376
- filedict->fd,
377
- 0
378
- );
379
- if (filedict->data == MAP_FAILED) { filedict->error = strerror(errno); return 0; }
380
- filedict->data_len = computed_size;
406
+ filedict_resize(filedict);
407
+ if (filedict->error) log_return(0);
381
408
  header = (filedict_header_t*)filedict->data;
382
409
  }
383
410
 
384
411
  filedict_bucket_t *hashmap = filedict->data + offset;
385
412
 
386
- read->bucket_count = (size_t)header->initial_bucket_count << read->hashmap_i;
413
+ read->bucket_count = (size_t)header->initial_bucket_count;
387
414
  read->bucket = &hashmap[read->key_hash % read->bucket_count];
388
415
  read->entry = &read->bucket->entries[0];
389
416
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Filedict
4
- VERSION = "0.1.2"
4
+ VERSION = "0.2.0"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: filedictrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nigel Baillie
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-03-31 00:00:00.000000000 Z
11
+ date: 2022-04-06 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email: