filedictrb 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 3c8fd3a6260d33ec56a995b4116af3cdb60ec8b52fa8aee688c7fd0b305b14ae
4
- data.tar.gz: '0349634431c7e9abde136b1503e4ae23a8855236cdafb21ee9157f946456c16c'
3
+ metadata.gz: 7cb1c1d801bf8943d6493a6c40be882c5d47c58166c56b6860abf8b98fc14a98
4
+ data.tar.gz: 394f0febd55f827bfb3aafb3559ba89b6b458b3a7ed2c3b6d857d900e38b7374
5
5
  SHA512:
6
- metadata.gz: 5811b3a6fba572d75e39d9fec5d7e2d6cb1bde1701604d80e0fa040de624b7b7c7edca928293fa9814b91614a3c5d6289f15e55becc7e4b1153d34212216ec39
7
- data.tar.gz: db8da5021c456578f8654bce8feab2a7f4138f9c3f405423fb441ff3248cc3924b451da94473d5eac1b9d3eccda643d22582b278dbaebb6a7087f819bf896e08
6
+ metadata.gz: 4a700778e49bd15c6686b9457b7a7987f4d6c8b47be4abf805a6a09d95156ce71ff8e15e7cb50ffcfa11f96e78f3eb633c19d10bf472c6f7963deb5ae658f9f4
7
+ data.tar.gz: dc233039e2c4d0e2aade78efc7cd6b55ab088cbdeaa373d9219137ae1241585dbe32b4e5984eeb402c52a32a027ee14f2950fa5540b3a3b932a34d13ff834b8c
@@ -27,6 +27,7 @@ typedef size_t (*filedict_hash_function_t)(const char *);
27
27
  typedef struct filedict_t {
28
28
  const char *error;
29
29
  int fd;
30
+ int flags;
30
31
  void *data;
31
32
  size_t data_len;
32
33
  filedict_hash_function_t hash_function;
@@ -96,6 +97,7 @@ static size_t filedict_copy_string(char *dest, const char *src, size_t max_len)
96
97
  static void filedict_init(filedict_t *filedict) {
97
98
  filedict->error = NULL;
98
99
  filedict->fd = 0;
100
+ filedict->flags = 0;
99
101
  filedict->data_len = 0;
100
102
  filedict->data = NULL;
101
103
  filedict->hash_function = filedict_default_hash_function;
@@ -110,6 +112,7 @@ static void filedict_deinit(filedict_t *filedict) {
110
112
  if (filedict->fd) {
111
113
  close(filedict->fd);
112
114
  filedict->fd = 0;
115
+ filedict->flags = 0;
113
116
  }
114
117
  }
115
118
 
@@ -117,15 +120,39 @@ static void filedict_deinit(filedict_t *filedict) {
117
120
  * This computes the size of the entire filedict file given an initial bucket count and hashmap count.
118
121
  */
119
122
  static size_t filedict_file_size(size_t initial_bucket_count, size_t hashmap_count) {
120
- size_t result = sizeof(filedict_header_t);
121
- size_t i;
123
+ /*
124
+ * We used to size each additional hashmap at 2x the previous, but realistically it seems that
125
+ * most resizes are triggered by keys that are ridiculously large, not by mass collision.
126
+ *
127
+ * A more proper fix might be to re-structure the whole filedict. We could keep the existing
128
+ * hashmap structure, but with buckets that expand dynamically. This would require each bucket
129
+ * to contain a "pointer" to the next bucket object if present.
130
+ *
131
+ * For now, it's easiser to just keep the hashmap duplication without the size doubling.
132
+ */
133
+ return sizeof(filedict_header_t) + initial_bucket_count * hashmap_count * sizeof(filedict_bucket_t);
134
+ }
122
135
 
123
- for (i = 0; i < hashmap_count; ++i) {
124
- /* Bucket count is multiplied by 2 for each additional hashmap. */
125
- result += (initial_bucket_count << i) * sizeof(filedict_bucket_t);
126
- }
136
+ /*
137
+ * Resizes the filedict based on the header hashmap count and initial bucket count.
138
+ * Naturally, your pointers into the map will become invalid after calling this.
139
+ */
140
+ static void filedict_resize(filedict_t *filedict) {
141
+ filedict_header_t *header = (filedict_header_t*)filedict->data;
142
+ size_t computed_size = filedict_file_size(header->initial_bucket_count, header->hashmap_count);
143
+ if (computed_size <= filedict->data_len) return;
127
144
 
128
- return result;
145
+ munmap(filedict->data, filedict->data_len);
146
+ filedict->data = mmap(
147
+ filedict->data,
148
+ computed_size,
149
+ PROT_READ | ((filedict->flags & O_RDWR) ? PROT_WRITE : 0),
150
+ MAP_SHARED,
151
+ filedict->fd,
152
+ 0
153
+ );
154
+ if (filedict->data == MAP_FAILED) { filedict->error = strerror(errno); return; }
155
+ filedict->data_len = computed_size;
129
156
  }
130
157
 
131
158
  /*
@@ -146,15 +173,24 @@ static void filedict_open_f(
146
173
  int flags,
147
174
  unsigned int initial_bucket_count
148
175
  ) {
176
+ struct stat info;
177
+
178
+ filedict->flags = flags;
149
179
  filedict->fd = open(filename, flags, 0666);
150
180
  if (filedict->fd == -1) { filedict->error = strerror(errno); return; }
181
+ if (fstat(filedict->fd, &info) != 0) { filedict->error = strerror(errno); return; }
182
+
183
+ if (info.st_size == 0 && (flags & O_RDWR)) {
184
+ filedict->data_len = filedict_file_size(initial_bucket_count, 1);
185
+ ftruncate(filedict->fd, filedict->data_len);
186
+ } else {
187
+ filedict->data_len = info.st_size;
188
+ }
151
189
 
152
- filedict->data_len = filedict_file_size(initial_bucket_count, 1);
153
- ftruncate(filedict->fd, filedict->data_len);
154
190
  filedict->data = mmap(
155
191
  NULL,
156
192
  filedict->data_len,
157
- PROT_READ | PROT_WRITE,
193
+ PROT_READ | ((flags & O_RDWR) ? PROT_WRITE : 0),
158
194
  MAP_SHARED,
159
195
  filedict->fd,
160
196
  0
@@ -163,8 +199,11 @@ static void filedict_open_f(
163
199
 
164
200
  filedict_header_t *data = (filedict_header_t *)filedict->data;
165
201
  assert(initial_bucket_count <= UINT_MAX);
166
- data->initial_bucket_count = initial_bucket_count;
167
- data->hashmap_count = 1;
202
+
203
+ if (data->initial_bucket_count == 0) {
204
+ data->initial_bucket_count = initial_bucket_count;
205
+ data->hashmap_count = 1;
206
+ }
168
207
  }
169
208
 
170
209
  /*
@@ -252,7 +291,6 @@ try_again:
252
291
 
253
292
  ++hashmap_i;
254
293
  hashmap += bucket_count;
255
- bucket_count = (bucket_count << 1);
256
294
  }
257
295
 
258
296
  /*
@@ -272,7 +310,7 @@ try_again:
272
310
  filedict->data = mmap(
273
311
  filedict->data,
274
312
  new_data_len,
275
- PROT_READ | PROT_WRITE,
313
+ PROT_READ | ((filedict->flags & O_RDWR) ? PROT_WRITE : 0),
276
314
  MAP_SHARED,
277
315
  filedict->fd,
278
316
  0
@@ -365,25 +403,14 @@ static int filedict_read_advance_hashmap(filedict_read_t *read) {
365
403
  size_t offset = filedict_file_size(header->initial_bucket_count, read->hashmap_i);
366
404
 
367
405
  if (offset >= filedict->data_len) {
368
- /* Need to resize! */
369
- size_t computed_size = filedict_file_size(header->initial_bucket_count, header->hashmap_count);
370
- munmap(filedict->data, filedict->data_len);
371
- filedict->data = mmap(
372
- filedict->data,
373
- computed_size,
374
- PROT_READ | PROT_WRITE,
375
- MAP_SHARED,
376
- filedict->fd,
377
- 0
378
- );
379
- if (filedict->data == MAP_FAILED) { filedict->error = strerror(errno); return 0; }
380
- filedict->data_len = computed_size;
406
+ filedict_resize(filedict);
407
+ if (filedict->error) log_return(0);
381
408
  header = (filedict_header_t*)filedict->data;
382
409
  }
383
410
 
384
411
  filedict_bucket_t *hashmap = filedict->data + offset;
385
412
 
386
- read->bucket_count = (size_t)header->initial_bucket_count << read->hashmap_i;
413
+ read->bucket_count = (size_t)header->initial_bucket_count;
387
414
  read->bucket = &hashmap[read->key_hash % read->bucket_count];
388
415
  read->entry = &read->bucket->entries[0];
389
416
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Filedict
4
- VERSION = "0.1.2"
4
+ VERSION = "0.2.0"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: filedictrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nigel Baillie
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-03-31 00:00:00.000000000 Z
11
+ date: 2022-04-06 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email: