filedictrb 0.1.2 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/filedict/filedict.h +55 -28
- data/lib/filedict/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7cb1c1d801bf8943d6493a6c40be882c5d47c58166c56b6860abf8b98fc14a98
|
4
|
+
data.tar.gz: 394f0febd55f827bfb3aafb3559ba89b6b458b3a7ed2c3b6d857d900e38b7374
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4a700778e49bd15c6686b9457b7a7987f4d6c8b47be4abf805a6a09d95156ce71ff8e15e7cb50ffcfa11f96e78f3eb633c19d10bf472c6f7963deb5ae658f9f4
|
7
|
+
data.tar.gz: dc233039e2c4d0e2aade78efc7cd6b55ab088cbdeaa373d9219137ae1241585dbe32b4e5984eeb402c52a32a027ee14f2950fa5540b3a3b932a34d13ff834b8c
|
data/ext/filedict/filedict.h
CHANGED
@@ -27,6 +27,7 @@ typedef size_t (*filedict_hash_function_t)(const char *);
|
|
27
27
|
typedef struct filedict_t {
|
28
28
|
const char *error;
|
29
29
|
int fd;
|
30
|
+
int flags;
|
30
31
|
void *data;
|
31
32
|
size_t data_len;
|
32
33
|
filedict_hash_function_t hash_function;
|
@@ -96,6 +97,7 @@ static size_t filedict_copy_string(char *dest, const char *src, size_t max_len)
|
|
96
97
|
static void filedict_init(filedict_t *filedict) {
|
97
98
|
filedict->error = NULL;
|
98
99
|
filedict->fd = 0;
|
100
|
+
filedict->flags = 0;
|
99
101
|
filedict->data_len = 0;
|
100
102
|
filedict->data = NULL;
|
101
103
|
filedict->hash_function = filedict_default_hash_function;
|
@@ -110,6 +112,7 @@ static void filedict_deinit(filedict_t *filedict) {
|
|
110
112
|
if (filedict->fd) {
|
111
113
|
close(filedict->fd);
|
112
114
|
filedict->fd = 0;
|
115
|
+
filedict->flags = 0;
|
113
116
|
}
|
114
117
|
}
|
115
118
|
|
@@ -117,15 +120,39 @@ static void filedict_deinit(filedict_t *filedict) {
|
|
117
120
|
* This computes the size of the entire filedict file given an initial bucket count and hashmap count.
|
118
121
|
*/
|
119
122
|
static size_t filedict_file_size(size_t initial_bucket_count, size_t hashmap_count) {
|
120
|
-
|
121
|
-
|
123
|
+
/*
|
124
|
+
* We used to size each additional hashmap at 2x the previous, but realistically it seems that
|
125
|
+
* most resizes are triggered by keys that are ridiculously large, not by mass collision.
|
126
|
+
*
|
127
|
+
* A more proper fix might be to re-structure the whole filedict. We could keep the existing
|
128
|
+
* hashmap structure, but with buckets that expand dynamically. This would require each bucket
|
129
|
+
* to contain a "pointer" to the next bucket object if present.
|
130
|
+
*
|
131
|
+
* For now, it's easiser to just keep the hashmap duplication without the size doubling.
|
132
|
+
*/
|
133
|
+
return sizeof(filedict_header_t) + initial_bucket_count * hashmap_count * sizeof(filedict_bucket_t);
|
134
|
+
}
|
122
135
|
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
136
|
+
/*
|
137
|
+
* Resizes the filedict based on the header hashmap count and initial bucket count.
|
138
|
+
* Naturally, your pointers into the map will become invalid after calling this.
|
139
|
+
*/
|
140
|
+
static void filedict_resize(filedict_t *filedict) {
|
141
|
+
filedict_header_t *header = (filedict_header_t*)filedict->data;
|
142
|
+
size_t computed_size = filedict_file_size(header->initial_bucket_count, header->hashmap_count);
|
143
|
+
if (computed_size <= filedict->data_len) return;
|
127
144
|
|
128
|
-
|
145
|
+
munmap(filedict->data, filedict->data_len);
|
146
|
+
filedict->data = mmap(
|
147
|
+
filedict->data,
|
148
|
+
computed_size,
|
149
|
+
PROT_READ | ((filedict->flags & O_RDWR) ? PROT_WRITE : 0),
|
150
|
+
MAP_SHARED,
|
151
|
+
filedict->fd,
|
152
|
+
0
|
153
|
+
);
|
154
|
+
if (filedict->data == MAP_FAILED) { filedict->error = strerror(errno); return; }
|
155
|
+
filedict->data_len = computed_size;
|
129
156
|
}
|
130
157
|
|
131
158
|
/*
|
@@ -146,15 +173,24 @@ static void filedict_open_f(
|
|
146
173
|
int flags,
|
147
174
|
unsigned int initial_bucket_count
|
148
175
|
) {
|
176
|
+
struct stat info;
|
177
|
+
|
178
|
+
filedict->flags = flags;
|
149
179
|
filedict->fd = open(filename, flags, 0666);
|
150
180
|
if (filedict->fd == -1) { filedict->error = strerror(errno); return; }
|
181
|
+
if (fstat(filedict->fd, &info) != 0) { filedict->error = strerror(errno); return; }
|
182
|
+
|
183
|
+
if (info.st_size == 0 && (flags & O_RDWR)) {
|
184
|
+
filedict->data_len = filedict_file_size(initial_bucket_count, 1);
|
185
|
+
ftruncate(filedict->fd, filedict->data_len);
|
186
|
+
} else {
|
187
|
+
filedict->data_len = info.st_size;
|
188
|
+
}
|
151
189
|
|
152
|
-
filedict->data_len = filedict_file_size(initial_bucket_count, 1);
|
153
|
-
ftruncate(filedict->fd, filedict->data_len);
|
154
190
|
filedict->data = mmap(
|
155
191
|
NULL,
|
156
192
|
filedict->data_len,
|
157
|
-
PROT_READ | PROT_WRITE,
|
193
|
+
PROT_READ | ((flags & O_RDWR) ? PROT_WRITE : 0),
|
158
194
|
MAP_SHARED,
|
159
195
|
filedict->fd,
|
160
196
|
0
|
@@ -163,8 +199,11 @@ static void filedict_open_f(
|
|
163
199
|
|
164
200
|
filedict_header_t *data = (filedict_header_t *)filedict->data;
|
165
201
|
assert(initial_bucket_count <= UINT_MAX);
|
166
|
-
|
167
|
-
data->
|
202
|
+
|
203
|
+
if (data->initial_bucket_count == 0) {
|
204
|
+
data->initial_bucket_count = initial_bucket_count;
|
205
|
+
data->hashmap_count = 1;
|
206
|
+
}
|
168
207
|
}
|
169
208
|
|
170
209
|
/*
|
@@ -252,7 +291,6 @@ try_again:
|
|
252
291
|
|
253
292
|
++hashmap_i;
|
254
293
|
hashmap += bucket_count;
|
255
|
-
bucket_count = (bucket_count << 1);
|
256
294
|
}
|
257
295
|
|
258
296
|
/*
|
@@ -272,7 +310,7 @@ try_again:
|
|
272
310
|
filedict->data = mmap(
|
273
311
|
filedict->data,
|
274
312
|
new_data_len,
|
275
|
-
PROT_READ | PROT_WRITE,
|
313
|
+
PROT_READ | ((filedict->flags & O_RDWR) ? PROT_WRITE : 0),
|
276
314
|
MAP_SHARED,
|
277
315
|
filedict->fd,
|
278
316
|
0
|
@@ -365,25 +403,14 @@ static int filedict_read_advance_hashmap(filedict_read_t *read) {
|
|
365
403
|
size_t offset = filedict_file_size(header->initial_bucket_count, read->hashmap_i);
|
366
404
|
|
367
405
|
if (offset >= filedict->data_len) {
|
368
|
-
|
369
|
-
|
370
|
-
munmap(filedict->data, filedict->data_len);
|
371
|
-
filedict->data = mmap(
|
372
|
-
filedict->data,
|
373
|
-
computed_size,
|
374
|
-
PROT_READ | PROT_WRITE,
|
375
|
-
MAP_SHARED,
|
376
|
-
filedict->fd,
|
377
|
-
0
|
378
|
-
);
|
379
|
-
if (filedict->data == MAP_FAILED) { filedict->error = strerror(errno); return 0; }
|
380
|
-
filedict->data_len = computed_size;
|
406
|
+
filedict_resize(filedict);
|
407
|
+
if (filedict->error) log_return(0);
|
381
408
|
header = (filedict_header_t*)filedict->data;
|
382
409
|
}
|
383
410
|
|
384
411
|
filedict_bucket_t *hashmap = filedict->data + offset;
|
385
412
|
|
386
|
-
read->bucket_count = (size_t)header->initial_bucket_count
|
413
|
+
read->bucket_count = (size_t)header->initial_bucket_count;
|
387
414
|
read->bucket = &hashmap[read->key_hash % read->bucket_count];
|
388
415
|
read->entry = &read->bucket->entries[0];
|
389
416
|
|
data/lib/filedict/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: filedictrb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nigel Baillie
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-04-06 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email:
|