filedictrb 0.1.2 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/filedict/filedict.h +55 -28
- data/lib/filedict/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7cb1c1d801bf8943d6493a6c40be882c5d47c58166c56b6860abf8b98fc14a98
|
4
|
+
data.tar.gz: 394f0febd55f827bfb3aafb3559ba89b6b458b3a7ed2c3b6d857d900e38b7374
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4a700778e49bd15c6686b9457b7a7987f4d6c8b47be4abf805a6a09d95156ce71ff8e15e7cb50ffcfa11f96e78f3eb633c19d10bf472c6f7963deb5ae658f9f4
|
7
|
+
data.tar.gz: dc233039e2c4d0e2aade78efc7cd6b55ab088cbdeaa373d9219137ae1241585dbe32b4e5984eeb402c52a32a027ee14f2950fa5540b3a3b932a34d13ff834b8c
|
data/ext/filedict/filedict.h
CHANGED
@@ -27,6 +27,7 @@ typedef size_t (*filedict_hash_function_t)(const char *);
|
|
27
27
|
typedef struct filedict_t {
|
28
28
|
const char *error;
|
29
29
|
int fd;
|
30
|
+
int flags;
|
30
31
|
void *data;
|
31
32
|
size_t data_len;
|
32
33
|
filedict_hash_function_t hash_function;
|
@@ -96,6 +97,7 @@ static size_t filedict_copy_string(char *dest, const char *src, size_t max_len)
|
|
96
97
|
static void filedict_init(filedict_t *filedict) {
|
97
98
|
filedict->error = NULL;
|
98
99
|
filedict->fd = 0;
|
100
|
+
filedict->flags = 0;
|
99
101
|
filedict->data_len = 0;
|
100
102
|
filedict->data = NULL;
|
101
103
|
filedict->hash_function = filedict_default_hash_function;
|
@@ -110,6 +112,7 @@ static void filedict_deinit(filedict_t *filedict) {
|
|
110
112
|
if (filedict->fd) {
|
111
113
|
close(filedict->fd);
|
112
114
|
filedict->fd = 0;
|
115
|
+
filedict->flags = 0;
|
113
116
|
}
|
114
117
|
}
|
115
118
|
|
@@ -117,15 +120,39 @@ static void filedict_deinit(filedict_t *filedict) {
|
|
117
120
|
* This computes the size of the entire filedict file given an initial bucket count and hashmap count.
|
118
121
|
*/
|
119
122
|
static size_t filedict_file_size(size_t initial_bucket_count, size_t hashmap_count) {
|
120
|
-
|
121
|
-
|
123
|
+
/*
|
124
|
+
* We used to size each additional hashmap at 2x the previous, but realistically it seems that
|
125
|
+
* most resizes are triggered by keys that are ridiculously large, not by mass collision.
|
126
|
+
*
|
127
|
+
* A more proper fix might be to re-structure the whole filedict. We could keep the existing
|
128
|
+
* hashmap structure, but with buckets that expand dynamically. This would require each bucket
|
129
|
+
* to contain a "pointer" to the next bucket object if present.
|
130
|
+
*
|
131
|
+
* For now, it's easiser to just keep the hashmap duplication without the size doubling.
|
132
|
+
*/
|
133
|
+
return sizeof(filedict_header_t) + initial_bucket_count * hashmap_count * sizeof(filedict_bucket_t);
|
134
|
+
}
|
122
135
|
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
136
|
+
/*
|
137
|
+
* Resizes the filedict based on the header hashmap count and initial bucket count.
|
138
|
+
* Naturally, your pointers into the map will become invalid after calling this.
|
139
|
+
*/
|
140
|
+
static void filedict_resize(filedict_t *filedict) {
|
141
|
+
filedict_header_t *header = (filedict_header_t*)filedict->data;
|
142
|
+
size_t computed_size = filedict_file_size(header->initial_bucket_count, header->hashmap_count);
|
143
|
+
if (computed_size <= filedict->data_len) return;
|
127
144
|
|
128
|
-
|
145
|
+
munmap(filedict->data, filedict->data_len);
|
146
|
+
filedict->data = mmap(
|
147
|
+
filedict->data,
|
148
|
+
computed_size,
|
149
|
+
PROT_READ | ((filedict->flags & O_RDWR) ? PROT_WRITE : 0),
|
150
|
+
MAP_SHARED,
|
151
|
+
filedict->fd,
|
152
|
+
0
|
153
|
+
);
|
154
|
+
if (filedict->data == MAP_FAILED) { filedict->error = strerror(errno); return; }
|
155
|
+
filedict->data_len = computed_size;
|
129
156
|
}
|
130
157
|
|
131
158
|
/*
|
@@ -146,15 +173,24 @@ static void filedict_open_f(
|
|
146
173
|
int flags,
|
147
174
|
unsigned int initial_bucket_count
|
148
175
|
) {
|
176
|
+
struct stat info;
|
177
|
+
|
178
|
+
filedict->flags = flags;
|
149
179
|
filedict->fd = open(filename, flags, 0666);
|
150
180
|
if (filedict->fd == -1) { filedict->error = strerror(errno); return; }
|
181
|
+
if (fstat(filedict->fd, &info) != 0) { filedict->error = strerror(errno); return; }
|
182
|
+
|
183
|
+
if (info.st_size == 0 && (flags & O_RDWR)) {
|
184
|
+
filedict->data_len = filedict_file_size(initial_bucket_count, 1);
|
185
|
+
ftruncate(filedict->fd, filedict->data_len);
|
186
|
+
} else {
|
187
|
+
filedict->data_len = info.st_size;
|
188
|
+
}
|
151
189
|
|
152
|
-
filedict->data_len = filedict_file_size(initial_bucket_count, 1);
|
153
|
-
ftruncate(filedict->fd, filedict->data_len);
|
154
190
|
filedict->data = mmap(
|
155
191
|
NULL,
|
156
192
|
filedict->data_len,
|
157
|
-
PROT_READ | PROT_WRITE,
|
193
|
+
PROT_READ | ((flags & O_RDWR) ? PROT_WRITE : 0),
|
158
194
|
MAP_SHARED,
|
159
195
|
filedict->fd,
|
160
196
|
0
|
@@ -163,8 +199,11 @@ static void filedict_open_f(
|
|
163
199
|
|
164
200
|
filedict_header_t *data = (filedict_header_t *)filedict->data;
|
165
201
|
assert(initial_bucket_count <= UINT_MAX);
|
166
|
-
|
167
|
-
data->
|
202
|
+
|
203
|
+
if (data->initial_bucket_count == 0) {
|
204
|
+
data->initial_bucket_count = initial_bucket_count;
|
205
|
+
data->hashmap_count = 1;
|
206
|
+
}
|
168
207
|
}
|
169
208
|
|
170
209
|
/*
|
@@ -252,7 +291,6 @@ try_again:
|
|
252
291
|
|
253
292
|
++hashmap_i;
|
254
293
|
hashmap += bucket_count;
|
255
|
-
bucket_count = (bucket_count << 1);
|
256
294
|
}
|
257
295
|
|
258
296
|
/*
|
@@ -272,7 +310,7 @@ try_again:
|
|
272
310
|
filedict->data = mmap(
|
273
311
|
filedict->data,
|
274
312
|
new_data_len,
|
275
|
-
PROT_READ | PROT_WRITE,
|
313
|
+
PROT_READ | ((filedict->flags & O_RDWR) ? PROT_WRITE : 0),
|
276
314
|
MAP_SHARED,
|
277
315
|
filedict->fd,
|
278
316
|
0
|
@@ -365,25 +403,14 @@ static int filedict_read_advance_hashmap(filedict_read_t *read) {
|
|
365
403
|
size_t offset = filedict_file_size(header->initial_bucket_count, read->hashmap_i);
|
366
404
|
|
367
405
|
if (offset >= filedict->data_len) {
|
368
|
-
|
369
|
-
|
370
|
-
munmap(filedict->data, filedict->data_len);
|
371
|
-
filedict->data = mmap(
|
372
|
-
filedict->data,
|
373
|
-
computed_size,
|
374
|
-
PROT_READ | PROT_WRITE,
|
375
|
-
MAP_SHARED,
|
376
|
-
filedict->fd,
|
377
|
-
0
|
378
|
-
);
|
379
|
-
if (filedict->data == MAP_FAILED) { filedict->error = strerror(errno); return 0; }
|
380
|
-
filedict->data_len = computed_size;
|
406
|
+
filedict_resize(filedict);
|
407
|
+
if (filedict->error) log_return(0);
|
381
408
|
header = (filedict_header_t*)filedict->data;
|
382
409
|
}
|
383
410
|
|
384
411
|
filedict_bucket_t *hashmap = filedict->data + offset;
|
385
412
|
|
386
|
-
read->bucket_count = (size_t)header->initial_bucket_count
|
413
|
+
read->bucket_count = (size_t)header->initial_bucket_count;
|
387
414
|
read->bucket = &hashmap[read->key_hash % read->bucket_count];
|
388
415
|
read->entry = &read->bucket->entries[0];
|
389
416
|
|
data/lib/filedict/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: filedictrb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nigel Baillie
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-04-06 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email:
|