filedictrb 0.2.0 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/filedict/filedict.h +103 -34
- data/lib/filedict/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 54edb33f9c980c2815d486ba98bcc317209325a2f5a8303114ba2912675ebebf
|
4
|
+
data.tar.gz: f5d8eb13dca465d500621c434440ccb648c4e2b52b4f72a70f2e803ac70c1607
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6fa2c3bc8d94db20229ce1f152ef67dfd2bdc503e090ba46420101a958891b5aa8039bded2a23bf0ee6ccec9ff25e602e5c25ba04faa8ac2ee62195f68960111
|
7
|
+
data.tar.gz: 5b2974454d61502919d45dde3b90b400eb77ab3fea4d87c44b9cae0b3c31c24ace549dc9ee314670ee114d0c7e751566c1b5181569e9120ddbd7bd214d702749
|
data/ext/filedict/filedict.h
CHANGED
@@ -1,17 +1,14 @@
|
|
1
1
|
#ifndef FILEDICT_H
|
2
2
|
#define FILEDICT_H 1
|
3
3
|
|
4
|
-
#
|
5
|
-
#define FILEDICT_KEY_SIZE 256
|
6
|
-
#endif
|
4
|
+
#include <stddef.h>
|
7
5
|
|
8
|
-
#ifndef
|
9
|
-
#define
|
6
|
+
#ifndef FILEDICT_BUCKET_ENTRY_BYTES
|
7
|
+
#define FILEDICT_BUCKET_ENTRY_BYTES 512
|
10
8
|
#endif
|
11
9
|
|
12
10
|
typedef struct filedict_bucket_entry_t {
|
13
|
-
char
|
14
|
-
char value[FILEDICT_VALUE_SIZE];
|
11
|
+
char bytes[FILEDICT_BUCKET_ENTRY_BYTES];
|
15
12
|
} filedict_bucket_entry_t;
|
16
13
|
|
17
14
|
#ifndef FILEDICT_BUCKET_ENTRY_COUNT
|
@@ -58,6 +55,7 @@ typedef struct filedict_read_t {
|
|
58
55
|
|
59
56
|
#ifndef FILEDICT_IMPL
|
60
57
|
#define FILEDICT_IMPL
|
58
|
+
#include <sys/stat.h>
|
61
59
|
#include <sys/mman.h>
|
62
60
|
#include <string.h>
|
63
61
|
#include <unistd.h>
|
@@ -80,7 +78,7 @@ static size_t filedict_default_hash_function(const char *input) {
|
|
80
78
|
|
81
79
|
/*
|
82
80
|
* Writes at most max_len chars from src into dest.
|
83
|
-
* Returns the
|
81
|
+
* Returns the string length of src.
|
84
82
|
*/
|
85
83
|
static size_t filedict_copy_string(char *dest, const char *src, size_t max_len) {
|
86
84
|
size_t src_len = 0;
|
@@ -92,6 +90,23 @@ static size_t filedict_copy_string(char *dest, const char *src, size_t max_len)
|
|
92
90
|
if (c == 0) return src_len;
|
93
91
|
src_len += 1;
|
94
92
|
}
|
93
|
+
|
94
|
+
return src_len;
|
95
|
+
}
|
96
|
+
|
97
|
+
/*
|
98
|
+
* Returns the index of the trailing 0 when str1 and str2 have the same contents.
|
99
|
+
* Returns 0 when str1 and str2 have different contents.
|
100
|
+
*/
|
101
|
+
static size_t filedict_string_includes(const char *str1, const char *str2, size_t max_len) {
|
102
|
+
size_t i;
|
103
|
+
|
104
|
+
for (i = 0; i < max_len; ++i) {
|
105
|
+
if (str1[i] != str2[i]) return 0;
|
106
|
+
if (str1[i] == 0) return i;
|
107
|
+
}
|
108
|
+
|
109
|
+
return 0;
|
95
110
|
}
|
96
111
|
|
97
112
|
static void filedict_init(filedict_t *filedict) {
|
@@ -238,35 +253,42 @@ try_again:
|
|
238
253
|
filedict_bucket_entry_t *entry = &bucket->entries[i];
|
239
254
|
|
240
255
|
/* Easy case: fresh entry. We can just insert here and call it quits. */
|
241
|
-
if (entry->
|
242
|
-
|
243
|
-
size_t value_len = filedict_copy_string(entry->
|
256
|
+
if (entry->bytes[0] == 0) {
|
257
|
+
size_t key_len = filedict_copy_string(entry->bytes, key, FILEDICT_BUCKET_ENTRY_BYTES);
|
258
|
+
size_t value_len = filedict_copy_string(entry->bytes + key_len + 1, value, FILEDICT_BUCKET_ENTRY_BYTES);
|
244
259
|
|
245
|
-
if (value_len >
|
260
|
+
if (key_len + value_len > FILEDICT_BUCKET_ENTRY_BYTES) {
|
246
261
|
filedict->error = "Value too big";
|
247
262
|
}
|
248
263
|
return;
|
249
264
|
}
|
250
265
|
/*
|
251
266
|
* We need to check for room in the value, then append value.
|
252
|
-
* This is also where we might run into a duplicate and duck out.
|
267
|
+
* This is also where we might run into a duplicate and duck out.
|
253
268
|
*/
|
254
|
-
else if (strncmp(entry->
|
269
|
+
else if (strncmp(entry->bytes, key, FILEDICT_BUCKET_ENTRY_BYTES) == 0) {
|
255
270
|
long long first_nonzero = -1;
|
256
271
|
char *candidate = NULL;
|
257
|
-
size_t
|
272
|
+
size_t bytes_i, candidate_max_len;
|
258
273
|
|
259
|
-
for (
|
274
|
+
for (bytes_i = 0; entry->bytes[bytes_i] != 0; ++bytes_i) {
|
275
|
+
if (bytes_i >= FILEDICT_BUCKET_ENTRY_BYTES) {
|
276
|
+
filedict->error = "Mysterious entry overflow!! Does it contain a massive key?";
|
277
|
+
return;
|
278
|
+
}
|
279
|
+
}
|
280
|
+
|
281
|
+
for (bytes_i += 1; bytes_i < FILEDICT_BUCKET_ENTRY_BYTES - 1; ++bytes_i) {
|
260
282
|
if (unique) {
|
261
|
-
if (first_nonzero == -1 && entry->
|
262
|
-
first_nonzero =
|
283
|
+
if (first_nonzero == -1 && entry->bytes[bytes_i] != 0) {
|
284
|
+
first_nonzero = bytes_i;
|
263
285
|
}
|
264
286
|
|
265
|
-
if (entry->
|
287
|
+
if (entry->bytes[bytes_i] == 0) {
|
266
288
|
int cmp = strncmp(
|
267
|
-
&entry->
|
289
|
+
&entry->bytes[first_nonzero],
|
268
290
|
value,
|
269
|
-
|
291
|
+
FILEDICT_BUCKET_ENTRY_BYTES - first_nonzero
|
270
292
|
);
|
271
293
|
if (cmp == 0) {
|
272
294
|
/* Looks like this value already exists! */
|
@@ -276,13 +298,13 @@ try_again:
|
|
276
298
|
}
|
277
299
|
}
|
278
300
|
|
279
|
-
if (entry->
|
280
|
-
candidate = &entry->
|
281
|
-
|
301
|
+
if (entry->bytes[bytes_i] == 0 && entry->bytes[bytes_i + 1] == 0) {
|
302
|
+
candidate = &entry->bytes[bytes_i + 1];
|
303
|
+
candidate_max_len = FILEDICT_BUCKET_ENTRY_BYTES - bytes_i - 1;
|
282
304
|
|
283
|
-
if (strlen(value) >=
|
305
|
+
if (strlen(value) >= candidate_max_len) break;
|
284
306
|
|
285
|
-
strncpy(candidate, value,
|
307
|
+
strncpy(candidate, value, candidate_max_len);
|
286
308
|
return;
|
287
309
|
}
|
288
310
|
}
|
@@ -341,8 +363,8 @@ try_again:
|
|
341
363
|
static int filedict_read_advance_value(filedict_read_t *read) {
|
342
364
|
assert(read->entry != NULL);
|
343
365
|
|
344
|
-
const char *buffer_begin = read->entry->
|
345
|
-
const char *buffer_end = buffer_begin +
|
366
|
+
const char *buffer_begin = read->entry->bytes;
|
367
|
+
const char *buffer_end = buffer_begin + FILEDICT_BUCKET_ENTRY_BYTES;
|
346
368
|
|
347
369
|
const char *c;
|
348
370
|
for (c = read->value; c < buffer_end; ++c) {
|
@@ -366,8 +388,8 @@ static int filedict_read_advance_value(filedict_read_t *read) {
|
|
366
388
|
* Returns 0 when we exhausted all remaining entries and didn't find a match.
|
367
389
|
*/
|
368
390
|
static int filedict_read_advance_entry(filedict_read_t *read) {
|
369
|
-
|
370
|
-
|
391
|
+
size_t value_start_i;
|
392
|
+
|
371
393
|
assert(read->bucket != NULL);
|
372
394
|
|
373
395
|
while (1) {
|
@@ -375,9 +397,22 @@ static int filedict_read_advance_entry(filedict_read_t *read) {
|
|
375
397
|
|
376
398
|
read->entry = &read->bucket->entries[read->entry_i];
|
377
399
|
|
378
|
-
if (
|
379
|
-
|
380
|
-
|
400
|
+
if (read->key == NULL) {
|
401
|
+
if (read->entry->bytes[0] != 0) {
|
402
|
+
value_start_i = strlen(read->entry->bytes) + 1;
|
403
|
+
read->value = &read->entry->bytes[value_start_i];
|
404
|
+
log_return(1);
|
405
|
+
}
|
406
|
+
}
|
407
|
+
else {
|
408
|
+
value_start_i = filedict_string_includes(read->entry->bytes, read->key, FILEDICT_BUCKET_ENTRY_BYTES);
|
409
|
+
|
410
|
+
if (value_start_i > 0) {
|
411
|
+
/* add 1 because it's pointing to the 0 after key; not the first char of value */
|
412
|
+
value_start_i += 1;
|
413
|
+
read->value = &read->entry->bytes[value_start_i];
|
414
|
+
log_return(1);
|
415
|
+
}
|
381
416
|
}
|
382
417
|
|
383
418
|
read->entry_i += 1;
|
@@ -392,6 +427,7 @@ static int filedict_read_advance_entry(filedict_read_t *read) {
|
|
392
427
|
*/
|
393
428
|
static int filedict_read_advance_hashmap(filedict_read_t *read) {
|
394
429
|
filedict_t *filedict = read->filedict;
|
430
|
+
int success = 0;
|
395
431
|
|
396
432
|
assert(filedict);
|
397
433
|
assert(filedict->data);
|
@@ -416,6 +452,19 @@ static int filedict_read_advance_hashmap(filedict_read_t *read) {
|
|
416
452
|
|
417
453
|
read->entry_i = 0;
|
418
454
|
|
455
|
+
if (read->key == NULL) {
|
456
|
+
success = filedict_read_advance_entry(read);
|
457
|
+
while (!success) {
|
458
|
+
read->key_hash += 1;
|
459
|
+
read->bucket = &hashmap[read->key_hash % read->bucket_count];
|
460
|
+
read->entry = &read->bucket->entries[0];
|
461
|
+
read->entry_i = 0;
|
462
|
+
success = filedict_read_advance_entry(read);
|
463
|
+
if (read->key_hash >= read->bucket_count) return 0;
|
464
|
+
}
|
465
|
+
return success;
|
466
|
+
}
|
467
|
+
|
419
468
|
log_return(filedict_read_advance_entry(read));
|
420
469
|
}
|
421
470
|
|
@@ -432,7 +481,14 @@ static filedict_read_t filedict_get(filedict_t *filedict, const char *key) {
|
|
432
481
|
read.entry_i = 0;
|
433
482
|
read.hashmap_i = 0;
|
434
483
|
read.bucket_count = 0;
|
435
|
-
|
484
|
+
|
485
|
+
/* NULL key means we want to iterate the whole entire dictionary */
|
486
|
+
if (key == NULL) {
|
487
|
+
read.key_hash = 0;
|
488
|
+
}
|
489
|
+
else {
|
490
|
+
read.key_hash = filedict->hash_function(key);
|
491
|
+
}
|
436
492
|
|
437
493
|
filedict_read_advance_hashmap(&read);
|
438
494
|
return read;
|
@@ -455,6 +511,19 @@ static int filedict_get_next(filedict_read_t *read) {
|
|
455
511
|
found = filedict_read_advance_entry(read);
|
456
512
|
if (found == 1) return found;
|
457
513
|
|
514
|
+
/*
|
515
|
+
* If read->key is NULL, that means we're iterating through the whole dict.
|
516
|
+
*/
|
517
|
+
if (read->key == NULL) {
|
518
|
+
read->key_hash += 1;
|
519
|
+
if (read->key_hash < read->bucket_count) {
|
520
|
+
return filedict_read_advance_hashmap(read);
|
521
|
+
}
|
522
|
+
else {
|
523
|
+
read->key_hash = 0;
|
524
|
+
}
|
525
|
+
}
|
526
|
+
|
458
527
|
read->hashmap_i += 1;
|
459
528
|
return filedict_read_advance_hashmap(read);
|
460
529
|
}
|
data/lib/filedict/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: filedictrb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nigel Baillie
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-06-19 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email:
|
@@ -58,7 +58,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
58
58
|
- !ruby/object:Gem::Version
|
59
59
|
version: '0'
|
60
60
|
requirements: []
|
61
|
-
rubygems_version: 3.
|
61
|
+
rubygems_version: 3.1.6
|
62
62
|
signing_key:
|
63
63
|
specification_version: 4
|
64
64
|
summary: Uses filedict to emulate a file-backed Hash<Set<String>>
|