filedictrb 0.2.0 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7cb1c1d801bf8943d6493a6c40be882c5d47c58166c56b6860abf8b98fc14a98
4
- data.tar.gz: 394f0febd55f827bfb3aafb3559ba89b6b458b3a7ed2c3b6d857d900e38b7374
3
+ metadata.gz: 54edb33f9c980c2815d486ba98bcc317209325a2f5a8303114ba2912675ebebf
4
+ data.tar.gz: f5d8eb13dca465d500621c434440ccb648c4e2b52b4f72a70f2e803ac70c1607
5
5
  SHA512:
6
- metadata.gz: 4a700778e49bd15c6686b9457b7a7987f4d6c8b47be4abf805a6a09d95156ce71ff8e15e7cb50ffcfa11f96e78f3eb633c19d10bf472c6f7963deb5ae658f9f4
7
- data.tar.gz: dc233039e2c4d0e2aade78efc7cd6b55ab088cbdeaa373d9219137ae1241585dbe32b4e5984eeb402c52a32a027ee14f2950fa5540b3a3b932a34d13ff834b8c
6
+ metadata.gz: 6fa2c3bc8d94db20229ce1f152ef67dfd2bdc503e090ba46420101a958891b5aa8039bded2a23bf0ee6ccec9ff25e602e5c25ba04faa8ac2ee62195f68960111
7
+ data.tar.gz: 5b2974454d61502919d45dde3b90b400eb77ab3fea4d87c44b9cae0b3c31c24ace549dc9ee314670ee114d0c7e751566c1b5181569e9120ddbd7bd214d702749
@@ -1,17 +1,14 @@
1
1
  #ifndef FILEDICT_H
2
2
  #define FILEDICT_H 1
3
3
 
4
- #ifndef FILEDICT_KEY_SIZE
5
- #define FILEDICT_KEY_SIZE 256
6
- #endif
4
+ #include <stddef.h>
7
5
 
8
- #ifndef FILEDICT_VALUE_SIZE
9
- #define FILEDICT_VALUE_SIZE 256
6
+ #ifndef FILEDICT_BUCKET_ENTRY_BYTES
7
+ #define FILEDICT_BUCKET_ENTRY_BYTES 512
10
8
  #endif
11
9
 
12
10
  typedef struct filedict_bucket_entry_t {
13
- char key[FILEDICT_KEY_SIZE];
14
- char value[FILEDICT_VALUE_SIZE];
11
+ char bytes[FILEDICT_BUCKET_ENTRY_BYTES];
15
12
  } filedict_bucket_entry_t;
16
13
 
17
14
  #ifndef FILEDICT_BUCKET_ENTRY_COUNT
@@ -58,6 +55,7 @@ typedef struct filedict_read_t {
58
55
 
59
56
  #ifndef FILEDICT_IMPL
60
57
  #define FILEDICT_IMPL
58
+ #include <sys/stat.h>
61
59
  #include <sys/mman.h>
62
60
  #include <string.h>
63
61
  #include <unistd.h>
@@ -80,7 +78,7 @@ static size_t filedict_default_hash_function(const char *input) {
80
78
 
81
79
  /*
82
80
  * Writes at most max_len chars from src into dest.
83
- * Returns the total number of bytes in src.
81
+ * Returns the string length of src.
84
82
  */
85
83
  static size_t filedict_copy_string(char *dest, const char *src, size_t max_len) {
86
84
  size_t src_len = 0;
@@ -92,6 +90,23 @@ static size_t filedict_copy_string(char *dest, const char *src, size_t max_len)
92
90
  if (c == 0) return src_len;
93
91
  src_len += 1;
94
92
  }
93
+
94
+ return src_len;
95
+ }
96
+
97
+ /*
98
+ * Returns the index of the trailing 0 when str1 and str2 have the same contents.
99
+ * Returns 0 when str1 and str2 have different contents.
100
+ */
101
+ static size_t filedict_string_includes(const char *str1, const char *str2, size_t max_len) {
102
+ size_t i;
103
+
104
+ for (i = 0; i < max_len; ++i) {
105
+ if (str1[i] != str2[i]) return 0;
106
+ if (str1[i] == 0) return i;
107
+ }
108
+
109
+ return 0;
95
110
  }
96
111
 
97
112
  static void filedict_init(filedict_t *filedict) {
@@ -238,35 +253,42 @@ try_again:
238
253
  filedict_bucket_entry_t *entry = &bucket->entries[i];
239
254
 
240
255
  /* Easy case: fresh entry. We can just insert here and call it quits. */
241
- if (entry->key[0] == 0) {
242
- strncpy(entry->key, key, FILEDICT_KEY_SIZE);
243
- size_t value_len = filedict_copy_string(entry->value, value, FILEDICT_VALUE_SIZE);
256
+ if (entry->bytes[0] == 0) {
257
+ size_t key_len = filedict_copy_string(entry->bytes, key, FILEDICT_BUCKET_ENTRY_BYTES);
258
+ size_t value_len = filedict_copy_string(entry->bytes + key_len + 1, value, FILEDICT_BUCKET_ENTRY_BYTES);
244
259
 
245
- if (value_len > FILEDICT_VALUE_SIZE) {
260
+ if (key_len + value_len > FILEDICT_BUCKET_ENTRY_BYTES) {
246
261
  filedict->error = "Value too big";
247
262
  }
248
263
  return;
249
264
  }
250
265
  /*
251
266
  * We need to check for room in the value, then append value.
252
- * This is also where we might run into a duplicate and duck out.existing
267
+ * This is also where we might run into a duplicate and duck out.
253
268
  */
254
- else if (strncmp(entry->key, key, FILEDICT_KEY_SIZE) == 0) {
269
+ else if (strncmp(entry->bytes, key, FILEDICT_BUCKET_ENTRY_BYTES) == 0) {
255
270
  long long first_nonzero = -1;
256
271
  char *candidate = NULL;
257
- size_t value_i, candidate_len;
272
+ size_t bytes_i, candidate_max_len;
258
273
 
259
- for (value_i = 0; value_i < FILEDICT_VALUE_SIZE - 1; ++value_i) {
274
+ for (bytes_i = 0; entry->bytes[bytes_i] != 0; ++bytes_i) {
275
+ if (bytes_i >= FILEDICT_BUCKET_ENTRY_BYTES) {
276
+ filedict->error = "Mysterious entry overflow!! Does it contain a massive key?";
277
+ return;
278
+ }
279
+ }
280
+
281
+ for (bytes_i += 1; bytes_i < FILEDICT_BUCKET_ENTRY_BYTES - 1; ++bytes_i) {
260
282
  if (unique) {
261
- if (first_nonzero == -1 && entry->value[value_i] != 0) {
262
- first_nonzero = value_i;
283
+ if (first_nonzero == -1 && entry->bytes[bytes_i] != 0) {
284
+ first_nonzero = bytes_i;
263
285
  }
264
286
 
265
- if (entry->value[value_i] == 0) {
287
+ if (entry->bytes[bytes_i] == 0) {
266
288
  int cmp = strncmp(
267
- &entry->value[first_nonzero],
289
+ &entry->bytes[first_nonzero],
268
290
  value,
269
- FILEDICT_VALUE_SIZE - first_nonzero
291
+ FILEDICT_BUCKET_ENTRY_BYTES - first_nonzero
270
292
  );
271
293
  if (cmp == 0) {
272
294
  /* Looks like this value already exists! */
@@ -276,13 +298,13 @@ try_again:
276
298
  }
277
299
  }
278
300
 
279
- if (entry->value[value_i] == 0 && entry->value[value_i + 1] == 0) {
280
- candidate = &entry->value[value_i + 1];
281
- candidate_len = FILEDICT_VALUE_SIZE - value_i - 1;
301
+ if (entry->bytes[bytes_i] == 0 && entry->bytes[bytes_i + 1] == 0) {
302
+ candidate = &entry->bytes[bytes_i + 1];
303
+ candidate_max_len = FILEDICT_BUCKET_ENTRY_BYTES - bytes_i - 1;
282
304
 
283
- if (strlen(value) >= candidate_len) break;
305
+ if (strlen(value) >= candidate_max_len) break;
284
306
 
285
- strncpy(candidate, value, candidate_len);
307
+ strncpy(candidate, value, candidate_max_len);
286
308
  return;
287
309
  }
288
310
  }
@@ -341,8 +363,8 @@ try_again:
341
363
  static int filedict_read_advance_value(filedict_read_t *read) {
342
364
  assert(read->entry != NULL);
343
365
 
344
- const char *buffer_begin = read->entry->value;
345
- const char *buffer_end = buffer_begin + FILEDICT_VALUE_SIZE;
366
+ const char *buffer_begin = read->entry->bytes;
367
+ const char *buffer_end = buffer_begin + FILEDICT_BUCKET_ENTRY_BYTES;
346
368
 
347
369
  const char *c;
348
370
  for (c = read->value; c < buffer_end; ++c) {
@@ -366,8 +388,8 @@ static int filedict_read_advance_value(filedict_read_t *read) {
366
388
  * Returns 0 when we exhausted all remaining entries and didn't find a match.
367
389
  */
368
390
  static int filedict_read_advance_entry(filedict_read_t *read) {
369
- assert(read->key != NULL);
370
- assert(strlen(read->key) > 0);
391
+ size_t value_start_i;
392
+
371
393
  assert(read->bucket != NULL);
372
394
 
373
395
  while (1) {
@@ -375,9 +397,22 @@ static int filedict_read_advance_entry(filedict_read_t *read) {
375
397
 
376
398
  read->entry = &read->bucket->entries[read->entry_i];
377
399
 
378
- if (strncmp(read->entry->key, read->key, FILEDICT_KEY_SIZE) == 0) {
379
- read->value = read->entry->value;
380
- log_return(1);
400
+ if (read->key == NULL) {
401
+ if (read->entry->bytes[0] != 0) {
402
+ value_start_i = strlen(read->entry->bytes) + 1;
403
+ read->value = &read->entry->bytes[value_start_i];
404
+ log_return(1);
405
+ }
406
+ }
407
+ else {
408
+ value_start_i = filedict_string_includes(read->entry->bytes, read->key, FILEDICT_BUCKET_ENTRY_BYTES);
409
+
410
+ if (value_start_i > 0) {
411
+ /* add 1 because it's pointing to the 0 after key; not the first char of value */
412
+ value_start_i += 1;
413
+ read->value = &read->entry->bytes[value_start_i];
414
+ log_return(1);
415
+ }
381
416
  }
382
417
 
383
418
  read->entry_i += 1;
@@ -392,6 +427,7 @@ static int filedict_read_advance_entry(filedict_read_t *read) {
392
427
  */
393
428
  static int filedict_read_advance_hashmap(filedict_read_t *read) {
394
429
  filedict_t *filedict = read->filedict;
430
+ int success = 0;
395
431
 
396
432
  assert(filedict);
397
433
  assert(filedict->data);
@@ -416,6 +452,19 @@ static int filedict_read_advance_hashmap(filedict_read_t *read) {
416
452
 
417
453
  read->entry_i = 0;
418
454
 
455
+ if (read->key == NULL) {
456
+ success = filedict_read_advance_entry(read);
457
+ while (!success) {
458
+ read->key_hash += 1;
459
+ read->bucket = &hashmap[read->key_hash % read->bucket_count];
460
+ read->entry = &read->bucket->entries[0];
461
+ read->entry_i = 0;
462
+ success = filedict_read_advance_entry(read);
463
+ if (read->key_hash >= read->bucket_count) return 0;
464
+ }
465
+ return success;
466
+ }
467
+
419
468
  log_return(filedict_read_advance_entry(read));
420
469
  }
421
470
 
@@ -432,7 +481,14 @@ static filedict_read_t filedict_get(filedict_t *filedict, const char *key) {
432
481
  read.entry_i = 0;
433
482
  read.hashmap_i = 0;
434
483
  read.bucket_count = 0;
435
- read.key_hash = filedict->hash_function(key);
484
+
485
+ /* NULL key means we want to iterate the whole entire dictionary */
486
+ if (key == NULL) {
487
+ read.key_hash = 0;
488
+ }
489
+ else {
490
+ read.key_hash = filedict->hash_function(key);
491
+ }
436
492
 
437
493
  filedict_read_advance_hashmap(&read);
438
494
  return read;
@@ -455,6 +511,19 @@ static int filedict_get_next(filedict_read_t *read) {
455
511
  found = filedict_read_advance_entry(read);
456
512
  if (found == 1) return found;
457
513
 
514
+ /*
515
+ * If read->key is NULL, that means we're iterating through the whole dict.
516
+ */
517
+ if (read->key == NULL) {
518
+ read->key_hash += 1;
519
+ if (read->key_hash < read->bucket_count) {
520
+ return filedict_read_advance_hashmap(read);
521
+ }
522
+ else {
523
+ read->key_hash = 0;
524
+ }
525
+ }
526
+
458
527
  read->hashmap_i += 1;
459
528
  return filedict_read_advance_hashmap(read);
460
529
  }
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Filedict
4
- VERSION = "0.2.0"
4
+ VERSION = "1.0.0"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: filedictrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nigel Baillie
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-04-06 00:00:00.000000000 Z
11
+ date: 2022-06-19 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email:
@@ -58,7 +58,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
58
58
  - !ruby/object:Gem::Version
59
59
  version: '0'
60
60
  requirements: []
61
- rubygems_version: 3.3.3
61
+ rubygems_version: 3.1.6
62
62
  signing_key:
63
63
  specification_version: 4
64
64
  summary: Uses filedict to emulate a file-backed Hash<Set<String>>