filedictrb 0.2.0 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 7cb1c1d801bf8943d6493a6c40be882c5d47c58166c56b6860abf8b98fc14a98
4
- data.tar.gz: 394f0febd55f827bfb3aafb3559ba89b6b458b3a7ed2c3b6d857d900e38b7374
3
+ metadata.gz: 54edb33f9c980c2815d486ba98bcc317209325a2f5a8303114ba2912675ebebf
4
+ data.tar.gz: f5d8eb13dca465d500621c434440ccb648c4e2b52b4f72a70f2e803ac70c1607
5
5
  SHA512:
6
- metadata.gz: 4a700778e49bd15c6686b9457b7a7987f4d6c8b47be4abf805a6a09d95156ce71ff8e15e7cb50ffcfa11f96e78f3eb633c19d10bf472c6f7963deb5ae658f9f4
7
- data.tar.gz: dc233039e2c4d0e2aade78efc7cd6b55ab088cbdeaa373d9219137ae1241585dbe32b4e5984eeb402c52a32a027ee14f2950fa5540b3a3b932a34d13ff834b8c
6
+ metadata.gz: 6fa2c3bc8d94db20229ce1f152ef67dfd2bdc503e090ba46420101a958891b5aa8039bded2a23bf0ee6ccec9ff25e602e5c25ba04faa8ac2ee62195f68960111
7
+ data.tar.gz: 5b2974454d61502919d45dde3b90b400eb77ab3fea4d87c44b9cae0b3c31c24ace549dc9ee314670ee114d0c7e751566c1b5181569e9120ddbd7bd214d702749
@@ -1,17 +1,14 @@
1
1
  #ifndef FILEDICT_H
2
2
  #define FILEDICT_H 1
3
3
 
4
- #ifndef FILEDICT_KEY_SIZE
5
- #define FILEDICT_KEY_SIZE 256
6
- #endif
4
+ #include <stddef.h>
7
5
 
8
- #ifndef FILEDICT_VALUE_SIZE
9
- #define FILEDICT_VALUE_SIZE 256
6
+ #ifndef FILEDICT_BUCKET_ENTRY_BYTES
7
+ #define FILEDICT_BUCKET_ENTRY_BYTES 512
10
8
  #endif
11
9
 
12
10
  typedef struct filedict_bucket_entry_t {
13
- char key[FILEDICT_KEY_SIZE];
14
- char value[FILEDICT_VALUE_SIZE];
11
+ char bytes[FILEDICT_BUCKET_ENTRY_BYTES];
15
12
  } filedict_bucket_entry_t;
16
13
 
17
14
  #ifndef FILEDICT_BUCKET_ENTRY_COUNT
@@ -58,6 +55,7 @@ typedef struct filedict_read_t {
58
55
 
59
56
  #ifndef FILEDICT_IMPL
60
57
  #define FILEDICT_IMPL
58
+ #include <sys/stat.h>
61
59
  #include <sys/mman.h>
62
60
  #include <string.h>
63
61
  #include <unistd.h>
@@ -80,7 +78,7 @@ static size_t filedict_default_hash_function(const char *input) {
80
78
 
81
79
  /*
82
80
  * Writes at most max_len chars from src into dest.
83
- * Returns the total number of bytes in src.
81
+ * Returns the string length of src.
84
82
  */
85
83
  static size_t filedict_copy_string(char *dest, const char *src, size_t max_len) {
86
84
  size_t src_len = 0;
@@ -92,6 +90,23 @@ static size_t filedict_copy_string(char *dest, const char *src, size_t max_len)
92
90
  if (c == 0) return src_len;
93
91
  src_len += 1;
94
92
  }
93
+
94
+ return src_len;
95
+ }
96
+
97
+ /*
98
+ * Returns the index of the trailing 0 when str1 and str2 have the same contents.
99
+ * Returns 0 when str1 and str2 have different contents.
100
+ */
101
+ static size_t filedict_string_includes(const char *str1, const char *str2, size_t max_len) {
102
+ size_t i;
103
+
104
+ for (i = 0; i < max_len; ++i) {
105
+ if (str1[i] != str2[i]) return 0;
106
+ if (str1[i] == 0) return i;
107
+ }
108
+
109
+ return 0;
95
110
  }
96
111
 
97
112
  static void filedict_init(filedict_t *filedict) {
@@ -238,35 +253,42 @@ try_again:
238
253
  filedict_bucket_entry_t *entry = &bucket->entries[i];
239
254
 
240
255
  /* Easy case: fresh entry. We can just insert here and call it quits. */
241
- if (entry->key[0] == 0) {
242
- strncpy(entry->key, key, FILEDICT_KEY_SIZE);
243
- size_t value_len = filedict_copy_string(entry->value, value, FILEDICT_VALUE_SIZE);
256
+ if (entry->bytes[0] == 0) {
257
+ size_t key_len = filedict_copy_string(entry->bytes, key, FILEDICT_BUCKET_ENTRY_BYTES);
258
+ size_t value_len = filedict_copy_string(entry->bytes + key_len + 1, value, FILEDICT_BUCKET_ENTRY_BYTES);
244
259
 
245
- if (value_len > FILEDICT_VALUE_SIZE) {
260
+ if (key_len + value_len > FILEDICT_BUCKET_ENTRY_BYTES) {
246
261
  filedict->error = "Value too big";
247
262
  }
248
263
  return;
249
264
  }
250
265
  /*
251
266
  * We need to check for room in the value, then append value.
252
- * This is also where we might run into a duplicate and duck out.existing
267
+ * This is also where we might run into a duplicate and duck out.
253
268
  */
254
- else if (strncmp(entry->key, key, FILEDICT_KEY_SIZE) == 0) {
269
+ else if (strncmp(entry->bytes, key, FILEDICT_BUCKET_ENTRY_BYTES) == 0) {
255
270
  long long first_nonzero = -1;
256
271
  char *candidate = NULL;
257
- size_t value_i, candidate_len;
272
+ size_t bytes_i, candidate_max_len;
258
273
 
259
- for (value_i = 0; value_i < FILEDICT_VALUE_SIZE - 1; ++value_i) {
274
+ for (bytes_i = 0; entry->bytes[bytes_i] != 0; ++bytes_i) {
275
+ if (bytes_i >= FILEDICT_BUCKET_ENTRY_BYTES) {
276
+ filedict->error = "Mysterious entry overflow!! Does it contain a massive key?";
277
+ return;
278
+ }
279
+ }
280
+
281
+ for (bytes_i += 1; bytes_i < FILEDICT_BUCKET_ENTRY_BYTES - 1; ++bytes_i) {
260
282
  if (unique) {
261
- if (first_nonzero == -1 && entry->value[value_i] != 0) {
262
- first_nonzero = value_i;
283
+ if (first_nonzero == -1 && entry->bytes[bytes_i] != 0) {
284
+ first_nonzero = bytes_i;
263
285
  }
264
286
 
265
- if (entry->value[value_i] == 0) {
287
+ if (entry->bytes[bytes_i] == 0) {
266
288
  int cmp = strncmp(
267
- &entry->value[first_nonzero],
289
+ &entry->bytes[first_nonzero],
268
290
  value,
269
- FILEDICT_VALUE_SIZE - first_nonzero
291
+ FILEDICT_BUCKET_ENTRY_BYTES - first_nonzero
270
292
  );
271
293
  if (cmp == 0) {
272
294
  /* Looks like this value already exists! */
@@ -276,13 +298,13 @@ try_again:
276
298
  }
277
299
  }
278
300
 
279
- if (entry->value[value_i] == 0 && entry->value[value_i + 1] == 0) {
280
- candidate = &entry->value[value_i + 1];
281
- candidate_len = FILEDICT_VALUE_SIZE - value_i - 1;
301
+ if (entry->bytes[bytes_i] == 0 && entry->bytes[bytes_i + 1] == 0) {
302
+ candidate = &entry->bytes[bytes_i + 1];
303
+ candidate_max_len = FILEDICT_BUCKET_ENTRY_BYTES - bytes_i - 1;
282
304
 
283
- if (strlen(value) >= candidate_len) break;
305
+ if (strlen(value) >= candidate_max_len) break;
284
306
 
285
- strncpy(candidate, value, candidate_len);
307
+ strncpy(candidate, value, candidate_max_len);
286
308
  return;
287
309
  }
288
310
  }
@@ -341,8 +363,8 @@ try_again:
341
363
  static int filedict_read_advance_value(filedict_read_t *read) {
342
364
  assert(read->entry != NULL);
343
365
 
344
- const char *buffer_begin = read->entry->value;
345
- const char *buffer_end = buffer_begin + FILEDICT_VALUE_SIZE;
366
+ const char *buffer_begin = read->entry->bytes;
367
+ const char *buffer_end = buffer_begin + FILEDICT_BUCKET_ENTRY_BYTES;
346
368
 
347
369
  const char *c;
348
370
  for (c = read->value; c < buffer_end; ++c) {
@@ -366,8 +388,8 @@ static int filedict_read_advance_value(filedict_read_t *read) {
366
388
  * Returns 0 when we exhausted all remaining entries and didn't find a match.
367
389
  */
368
390
  static int filedict_read_advance_entry(filedict_read_t *read) {
369
- assert(read->key != NULL);
370
- assert(strlen(read->key) > 0);
391
+ size_t value_start_i;
392
+
371
393
  assert(read->bucket != NULL);
372
394
 
373
395
  while (1) {
@@ -375,9 +397,22 @@ static int filedict_read_advance_entry(filedict_read_t *read) {
375
397
 
376
398
  read->entry = &read->bucket->entries[read->entry_i];
377
399
 
378
- if (strncmp(read->entry->key, read->key, FILEDICT_KEY_SIZE) == 0) {
379
- read->value = read->entry->value;
380
- log_return(1);
400
+ if (read->key == NULL) {
401
+ if (read->entry->bytes[0] != 0) {
402
+ value_start_i = strlen(read->entry->bytes) + 1;
403
+ read->value = &read->entry->bytes[value_start_i];
404
+ log_return(1);
405
+ }
406
+ }
407
+ else {
408
+ value_start_i = filedict_string_includes(read->entry->bytes, read->key, FILEDICT_BUCKET_ENTRY_BYTES);
409
+
410
+ if (value_start_i > 0) {
411
+ /* add 1 because it's pointing to the 0 after key; not the first char of value */
412
+ value_start_i += 1;
413
+ read->value = &read->entry->bytes[value_start_i];
414
+ log_return(1);
415
+ }
381
416
  }
382
417
 
383
418
  read->entry_i += 1;
@@ -392,6 +427,7 @@ static int filedict_read_advance_entry(filedict_read_t *read) {
392
427
  */
393
428
  static int filedict_read_advance_hashmap(filedict_read_t *read) {
394
429
  filedict_t *filedict = read->filedict;
430
+ int success = 0;
395
431
 
396
432
  assert(filedict);
397
433
  assert(filedict->data);
@@ -416,6 +452,19 @@ static int filedict_read_advance_hashmap(filedict_read_t *read) {
416
452
 
417
453
  read->entry_i = 0;
418
454
 
455
+ if (read->key == NULL) {
456
+ success = filedict_read_advance_entry(read);
457
+ while (!success) {
458
+ read->key_hash += 1;
459
+ read->bucket = &hashmap[read->key_hash % read->bucket_count];
460
+ read->entry = &read->bucket->entries[0];
461
+ read->entry_i = 0;
462
+ success = filedict_read_advance_entry(read);
463
+ if (read->key_hash >= read->bucket_count) return 0;
464
+ }
465
+ return success;
466
+ }
467
+
419
468
  log_return(filedict_read_advance_entry(read));
420
469
  }
421
470
 
@@ -432,7 +481,14 @@ static filedict_read_t filedict_get(filedict_t *filedict, const char *key) {
432
481
  read.entry_i = 0;
433
482
  read.hashmap_i = 0;
434
483
  read.bucket_count = 0;
435
- read.key_hash = filedict->hash_function(key);
484
+
485
+ /* NULL key means we want to iterate the whole entire dictionary */
486
+ if (key == NULL) {
487
+ read.key_hash = 0;
488
+ }
489
+ else {
490
+ read.key_hash = filedict->hash_function(key);
491
+ }
436
492
 
437
493
  filedict_read_advance_hashmap(&read);
438
494
  return read;
@@ -455,6 +511,19 @@ static int filedict_get_next(filedict_read_t *read) {
455
511
  found = filedict_read_advance_entry(read);
456
512
  if (found == 1) return found;
457
513
 
514
+ /*
515
+ * If read->key is NULL, that means we're iterating through the whole dict.
516
+ */
517
+ if (read->key == NULL) {
518
+ read->key_hash += 1;
519
+ if (read->key_hash < read->bucket_count) {
520
+ return filedict_read_advance_hashmap(read);
521
+ }
522
+ else {
523
+ read->key_hash = 0;
524
+ }
525
+ }
526
+
458
527
  read->hashmap_i += 1;
459
528
  return filedict_read_advance_hashmap(read);
460
529
  }
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Filedict
4
- VERSION = "0.2.0"
4
+ VERSION = "1.0.0"
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: filedictrb
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Nigel Baillie
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2022-04-06 00:00:00.000000000 Z
11
+ date: 2022-06-19 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description:
14
14
  email:
@@ -58,7 +58,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
58
58
  - !ruby/object:Gem::Version
59
59
  version: '0'
60
60
  requirements: []
61
- rubygems_version: 3.3.3
61
+ rubygems_version: 3.1.6
62
62
  signing_key:
63
63
  specification_version: 4
64
64
  summary: Uses filedict to emulate a file-backed Hash<Set<String>>