filedictrb 0.2.0 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/ext/filedict/filedict.h +103 -34
- data/lib/filedict/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 54edb33f9c980c2815d486ba98bcc317209325a2f5a8303114ba2912675ebebf
|
4
|
+
data.tar.gz: f5d8eb13dca465d500621c434440ccb648c4e2b52b4f72a70f2e803ac70c1607
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6fa2c3bc8d94db20229ce1f152ef67dfd2bdc503e090ba46420101a958891b5aa8039bded2a23bf0ee6ccec9ff25e602e5c25ba04faa8ac2ee62195f68960111
|
7
|
+
data.tar.gz: 5b2974454d61502919d45dde3b90b400eb77ab3fea4d87c44b9cae0b3c31c24ace549dc9ee314670ee114d0c7e751566c1b5181569e9120ddbd7bd214d702749
|
data/ext/filedict/filedict.h
CHANGED
@@ -1,17 +1,14 @@
|
|
1
1
|
#ifndef FILEDICT_H
|
2
2
|
#define FILEDICT_H 1
|
3
3
|
|
4
|
-
#
|
5
|
-
#define FILEDICT_KEY_SIZE 256
|
6
|
-
#endif
|
4
|
+
#include <stddef.h>
|
7
5
|
|
8
|
-
#ifndef
|
9
|
-
#define
|
6
|
+
#ifndef FILEDICT_BUCKET_ENTRY_BYTES
|
7
|
+
#define FILEDICT_BUCKET_ENTRY_BYTES 512
|
10
8
|
#endif
|
11
9
|
|
12
10
|
typedef struct filedict_bucket_entry_t {
|
13
|
-
char
|
14
|
-
char value[FILEDICT_VALUE_SIZE];
|
11
|
+
char bytes[FILEDICT_BUCKET_ENTRY_BYTES];
|
15
12
|
} filedict_bucket_entry_t;
|
16
13
|
|
17
14
|
#ifndef FILEDICT_BUCKET_ENTRY_COUNT
|
@@ -58,6 +55,7 @@ typedef struct filedict_read_t {
|
|
58
55
|
|
59
56
|
#ifndef FILEDICT_IMPL
|
60
57
|
#define FILEDICT_IMPL
|
58
|
+
#include <sys/stat.h>
|
61
59
|
#include <sys/mman.h>
|
62
60
|
#include <string.h>
|
63
61
|
#include <unistd.h>
|
@@ -80,7 +78,7 @@ static size_t filedict_default_hash_function(const char *input) {
|
|
80
78
|
|
81
79
|
/*
|
82
80
|
* Writes at most max_len chars from src into dest.
|
83
|
-
* Returns the
|
81
|
+
* Returns the string length of src.
|
84
82
|
*/
|
85
83
|
static size_t filedict_copy_string(char *dest, const char *src, size_t max_len) {
|
86
84
|
size_t src_len = 0;
|
@@ -92,6 +90,23 @@ static size_t filedict_copy_string(char *dest, const char *src, size_t max_len)
|
|
92
90
|
if (c == 0) return src_len;
|
93
91
|
src_len += 1;
|
94
92
|
}
|
93
|
+
|
94
|
+
return src_len;
|
95
|
+
}
|
96
|
+
|
97
|
+
/*
|
98
|
+
* Returns the index of the trailing 0 when str1 and str2 have the same contents.
|
99
|
+
* Returns 0 when str1 and str2 have different contents.
|
100
|
+
*/
|
101
|
+
static size_t filedict_string_includes(const char *str1, const char *str2, size_t max_len) {
|
102
|
+
size_t i;
|
103
|
+
|
104
|
+
for (i = 0; i < max_len; ++i) {
|
105
|
+
if (str1[i] != str2[i]) return 0;
|
106
|
+
if (str1[i] == 0) return i;
|
107
|
+
}
|
108
|
+
|
109
|
+
return 0;
|
95
110
|
}
|
96
111
|
|
97
112
|
static void filedict_init(filedict_t *filedict) {
|
@@ -238,35 +253,42 @@ try_again:
|
|
238
253
|
filedict_bucket_entry_t *entry = &bucket->entries[i];
|
239
254
|
|
240
255
|
/* Easy case: fresh entry. We can just insert here and call it quits. */
|
241
|
-
if (entry->
|
242
|
-
|
243
|
-
size_t value_len = filedict_copy_string(entry->
|
256
|
+
if (entry->bytes[0] == 0) {
|
257
|
+
size_t key_len = filedict_copy_string(entry->bytes, key, FILEDICT_BUCKET_ENTRY_BYTES);
|
258
|
+
size_t value_len = filedict_copy_string(entry->bytes + key_len + 1, value, FILEDICT_BUCKET_ENTRY_BYTES);
|
244
259
|
|
245
|
-
if (value_len >
|
260
|
+
if (key_len + value_len > FILEDICT_BUCKET_ENTRY_BYTES) {
|
246
261
|
filedict->error = "Value too big";
|
247
262
|
}
|
248
263
|
return;
|
249
264
|
}
|
250
265
|
/*
|
251
266
|
* We need to check for room in the value, then append value.
|
252
|
-
* This is also where we might run into a duplicate and duck out.
|
267
|
+
* This is also where we might run into a duplicate and duck out.
|
253
268
|
*/
|
254
|
-
else if (strncmp(entry->
|
269
|
+
else if (strncmp(entry->bytes, key, FILEDICT_BUCKET_ENTRY_BYTES) == 0) {
|
255
270
|
long long first_nonzero = -1;
|
256
271
|
char *candidate = NULL;
|
257
|
-
size_t
|
272
|
+
size_t bytes_i, candidate_max_len;
|
258
273
|
|
259
|
-
for (
|
274
|
+
for (bytes_i = 0; entry->bytes[bytes_i] != 0; ++bytes_i) {
|
275
|
+
if (bytes_i >= FILEDICT_BUCKET_ENTRY_BYTES) {
|
276
|
+
filedict->error = "Mysterious entry overflow!! Does it contain a massive key?";
|
277
|
+
return;
|
278
|
+
}
|
279
|
+
}
|
280
|
+
|
281
|
+
for (bytes_i += 1; bytes_i < FILEDICT_BUCKET_ENTRY_BYTES - 1; ++bytes_i) {
|
260
282
|
if (unique) {
|
261
|
-
if (first_nonzero == -1 && entry->
|
262
|
-
first_nonzero =
|
283
|
+
if (first_nonzero == -1 && entry->bytes[bytes_i] != 0) {
|
284
|
+
first_nonzero = bytes_i;
|
263
285
|
}
|
264
286
|
|
265
|
-
if (entry->
|
287
|
+
if (entry->bytes[bytes_i] == 0) {
|
266
288
|
int cmp = strncmp(
|
267
|
-
&entry->
|
289
|
+
&entry->bytes[first_nonzero],
|
268
290
|
value,
|
269
|
-
|
291
|
+
FILEDICT_BUCKET_ENTRY_BYTES - first_nonzero
|
270
292
|
);
|
271
293
|
if (cmp == 0) {
|
272
294
|
/* Looks like this value already exists! */
|
@@ -276,13 +298,13 @@ try_again:
|
|
276
298
|
}
|
277
299
|
}
|
278
300
|
|
279
|
-
if (entry->
|
280
|
-
candidate = &entry->
|
281
|
-
|
301
|
+
if (entry->bytes[bytes_i] == 0 && entry->bytes[bytes_i + 1] == 0) {
|
302
|
+
candidate = &entry->bytes[bytes_i + 1];
|
303
|
+
candidate_max_len = FILEDICT_BUCKET_ENTRY_BYTES - bytes_i - 1;
|
282
304
|
|
283
|
-
if (strlen(value) >=
|
305
|
+
if (strlen(value) >= candidate_max_len) break;
|
284
306
|
|
285
|
-
strncpy(candidate, value,
|
307
|
+
strncpy(candidate, value, candidate_max_len);
|
286
308
|
return;
|
287
309
|
}
|
288
310
|
}
|
@@ -341,8 +363,8 @@ try_again:
|
|
341
363
|
static int filedict_read_advance_value(filedict_read_t *read) {
|
342
364
|
assert(read->entry != NULL);
|
343
365
|
|
344
|
-
const char *buffer_begin = read->entry->
|
345
|
-
const char *buffer_end = buffer_begin +
|
366
|
+
const char *buffer_begin = read->entry->bytes;
|
367
|
+
const char *buffer_end = buffer_begin + FILEDICT_BUCKET_ENTRY_BYTES;
|
346
368
|
|
347
369
|
const char *c;
|
348
370
|
for (c = read->value; c < buffer_end; ++c) {
|
@@ -366,8 +388,8 @@ static int filedict_read_advance_value(filedict_read_t *read) {
|
|
366
388
|
* Returns 0 when we exhausted all remaining entries and didn't find a match.
|
367
389
|
*/
|
368
390
|
static int filedict_read_advance_entry(filedict_read_t *read) {
|
369
|
-
|
370
|
-
|
391
|
+
size_t value_start_i;
|
392
|
+
|
371
393
|
assert(read->bucket != NULL);
|
372
394
|
|
373
395
|
while (1) {
|
@@ -375,9 +397,22 @@ static int filedict_read_advance_entry(filedict_read_t *read) {
|
|
375
397
|
|
376
398
|
read->entry = &read->bucket->entries[read->entry_i];
|
377
399
|
|
378
|
-
if (
|
379
|
-
|
380
|
-
|
400
|
+
if (read->key == NULL) {
|
401
|
+
if (read->entry->bytes[0] != 0) {
|
402
|
+
value_start_i = strlen(read->entry->bytes) + 1;
|
403
|
+
read->value = &read->entry->bytes[value_start_i];
|
404
|
+
log_return(1);
|
405
|
+
}
|
406
|
+
}
|
407
|
+
else {
|
408
|
+
value_start_i = filedict_string_includes(read->entry->bytes, read->key, FILEDICT_BUCKET_ENTRY_BYTES);
|
409
|
+
|
410
|
+
if (value_start_i > 0) {
|
411
|
+
/* add 1 because it's pointing to the 0 after key; not the first char of value */
|
412
|
+
value_start_i += 1;
|
413
|
+
read->value = &read->entry->bytes[value_start_i];
|
414
|
+
log_return(1);
|
415
|
+
}
|
381
416
|
}
|
382
417
|
|
383
418
|
read->entry_i += 1;
|
@@ -392,6 +427,7 @@ static int filedict_read_advance_entry(filedict_read_t *read) {
|
|
392
427
|
*/
|
393
428
|
static int filedict_read_advance_hashmap(filedict_read_t *read) {
|
394
429
|
filedict_t *filedict = read->filedict;
|
430
|
+
int success = 0;
|
395
431
|
|
396
432
|
assert(filedict);
|
397
433
|
assert(filedict->data);
|
@@ -416,6 +452,19 @@ static int filedict_read_advance_hashmap(filedict_read_t *read) {
|
|
416
452
|
|
417
453
|
read->entry_i = 0;
|
418
454
|
|
455
|
+
if (read->key == NULL) {
|
456
|
+
success = filedict_read_advance_entry(read);
|
457
|
+
while (!success) {
|
458
|
+
read->key_hash += 1;
|
459
|
+
read->bucket = &hashmap[read->key_hash % read->bucket_count];
|
460
|
+
read->entry = &read->bucket->entries[0];
|
461
|
+
read->entry_i = 0;
|
462
|
+
success = filedict_read_advance_entry(read);
|
463
|
+
if (read->key_hash >= read->bucket_count) return 0;
|
464
|
+
}
|
465
|
+
return success;
|
466
|
+
}
|
467
|
+
|
419
468
|
log_return(filedict_read_advance_entry(read));
|
420
469
|
}
|
421
470
|
|
@@ -432,7 +481,14 @@ static filedict_read_t filedict_get(filedict_t *filedict, const char *key) {
|
|
432
481
|
read.entry_i = 0;
|
433
482
|
read.hashmap_i = 0;
|
434
483
|
read.bucket_count = 0;
|
435
|
-
|
484
|
+
|
485
|
+
/* NULL key means we want to iterate the whole entire dictionary */
|
486
|
+
if (key == NULL) {
|
487
|
+
read.key_hash = 0;
|
488
|
+
}
|
489
|
+
else {
|
490
|
+
read.key_hash = filedict->hash_function(key);
|
491
|
+
}
|
436
492
|
|
437
493
|
filedict_read_advance_hashmap(&read);
|
438
494
|
return read;
|
@@ -455,6 +511,19 @@ static int filedict_get_next(filedict_read_t *read) {
|
|
455
511
|
found = filedict_read_advance_entry(read);
|
456
512
|
if (found == 1) return found;
|
457
513
|
|
514
|
+
/*
|
515
|
+
* If read->key is NULL, that means we're iterating through the whole dict.
|
516
|
+
*/
|
517
|
+
if (read->key == NULL) {
|
518
|
+
read->key_hash += 1;
|
519
|
+
if (read->key_hash < read->bucket_count) {
|
520
|
+
return filedict_read_advance_hashmap(read);
|
521
|
+
}
|
522
|
+
else {
|
523
|
+
read->key_hash = 0;
|
524
|
+
}
|
525
|
+
}
|
526
|
+
|
458
527
|
read->hashmap_i += 1;
|
459
528
|
return filedict_read_advance_hashmap(read);
|
460
529
|
}
|
data/lib/filedict/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: filedictrb
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 1.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Nigel Baillie
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2022-
|
11
|
+
date: 2022-06-19 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email:
|
@@ -58,7 +58,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
58
58
|
- !ruby/object:Gem::Version
|
59
59
|
version: '0'
|
60
60
|
requirements: []
|
61
|
-
rubygems_version: 3.
|
61
|
+
rubygems_version: 3.1.6
|
62
62
|
signing_key:
|
63
63
|
specification_version: 4
|
64
64
|
summary: Uses filedict to emulate a file-backed Hash<Set<String>>
|