RubyGems - filedictrb - Versions diffs - 0.1.2 → 0.2.0 - Mend

filedictrb 0.1.2 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 3c8fd3a6260d33ec56a995b4116af3cdb60ec8b52fa8aee688c7fd0b305b14ae
-  data.tar.gz: '0349634431c7e9abde136b1503e4ae23a8855236cdafb21ee9157f946456c16c'
+  metadata.gz: 7cb1c1d801bf8943d6493a6c40be882c5d47c58166c56b6860abf8b98fc14a98
+  data.tar.gz: 394f0febd55f827bfb3aafb3559ba89b6b458b3a7ed2c3b6d857d900e38b7374
 SHA512:
-  metadata.gz: 5811b3a6fba572d75e39d9fec5d7e2d6cb1bde1701604d80e0fa040de624b7b7c7edca928293fa9814b91614a3c5d6289f15e55becc7e4b1153d34212216ec39
-  data.tar.gz: db8da5021c456578f8654bce8feab2a7f4138f9c3f405423fb441ff3248cc3924b451da94473d5eac1b9d3eccda643d22582b278dbaebb6a7087f819bf896e08
+  metadata.gz: 4a700778e49bd15c6686b9457b7a7987f4d6c8b47be4abf805a6a09d95156ce71ff8e15e7cb50ffcfa11f96e78f3eb633c19d10bf472c6f7963deb5ae658f9f4
+  data.tar.gz: dc233039e2c4d0e2aade78efc7cd6b55ab088cbdeaa373d9219137ae1241585dbe32b4e5984eeb402c52a32a027ee14f2950fa5540b3a3b932a34d13ff834b8c

data/ext/filedict/filedict.h CHANGED Viewed

@@ -27,6 +27,7 @@ typedef size_t (*filedict_hash_function_t)(const char *);
 typedef struct filedict_t {
     const char *error;
     int fd;
+    int flags;
     void *data;
     size_t data_len;
     filedict_hash_function_t hash_function;
@@ -96,6 +97,7 @@ static size_t filedict_copy_string(char *dest, const char *src, size_t max_len)
 static void filedict_init(filedict_t *filedict) {
     filedict->error = NULL;
     filedict->fd = 0;
+    filedict->flags = 0;
     filedict->data_len = 0;
     filedict->data = NULL;
     filedict->hash_function = filedict_default_hash_function;
@@ -110,6 +112,7 @@ static void filedict_deinit(filedict_t *filedict) {
     if (filedict->fd) {
         close(filedict->fd);
         filedict->fd = 0;
+        filedict->flags = 0;
     }
 }
@@ -117,15 +120,39 @@ static void filedict_deinit(filedict_t *filedict) {
  * This computes the size of the entire filedict file given an initial bucket count and hashmap count.
  */
 static size_t filedict_file_size(size_t initial_bucket_count, size_t hashmap_count) {
-    size_t result = sizeof(filedict_header_t);
-    size_t i;
+    /*
+     * We used to size each additional hashmap at 2x the previous, but realistically it seems that
+     * most resizes are triggered by keys that are ridiculously large, not by mass collision.
+     *
+     * A more proper fix might be to re-structure the whole filedict. We could keep the existing
+     * hashmap structure, but with buckets that expand dynamically. This would require each bucket
+     * to contain a "pointer" to the next bucket object if present.
+     *
+     * For now, it's easiser to just keep the hashmap duplication without the size doubling.
+     */
+    return sizeof(filedict_header_t) + initial_bucket_count * hashmap_count * sizeof(filedict_bucket_t);
+}
-    for (i = 0; i < hashmap_count; ++i) {
-        /* Bucket count is multiplied by 2 for each additional hashmap. */
-        result += (initial_bucket_count << i) * sizeof(filedict_bucket_t);
-    }
+/*
+ * Resizes the filedict based on the header hashmap count and initial bucket count.
+ * Naturally, your pointers into the map will become invalid after calling this.
+ */
+static void filedict_resize(filedict_t *filedict) {
+    filedict_header_t *header = (filedict_header_t*)filedict->data;
+    size_t computed_size = filedict_file_size(header->initial_bucket_count, header->hashmap_count);
+    if (computed_size <= filedict->data_len) return;
-    return result;
+    munmap(filedict->data, filedict->data_len);
+    filedict->data = mmap(
+        filedict->data,
+        computed_size,
+        PROT_READ | ((filedict->flags & O_RDWR) ? PROT_WRITE : 0),
+        MAP_SHARED,
+        filedict->fd,
+        0
+    );
+    if (filedict->data == MAP_FAILED) { filedict->error = strerror(errno); return; }
+    filedict->data_len = computed_size;
 }
 /*
@@ -146,15 +173,24 @@ static void filedict_open_f(
     int flags,
     unsigned int initial_bucket_count
 ) {
+    struct stat info;
+    filedict->flags = flags;
     filedict->fd = open(filename, flags, 0666);
     if (filedict->fd == -1) { filedict->error = strerror(errno); return; }
+    if (fstat(filedict->fd, &info) != 0) { filedict->error = strerror(errno); return; }
+    if (info.st_size == 0 && (flags & O_RDWR)) {
+        filedict->data_len = filedict_file_size(initial_bucket_count, 1);
+        ftruncate(filedict->fd, filedict->data_len);
+    } else {
+        filedict->data_len = info.st_size;
+    }
-    filedict->data_len = filedict_file_size(initial_bucket_count, 1);
-    ftruncate(filedict->fd, filedict->data_len);
     filedict->data = mmap(
         NULL,
         filedict->data_len,
-        PROT_READ | PROT_WRITE,
+        PROT_READ | ((flags & O_RDWR) ? PROT_WRITE : 0),
         MAP_SHARED,
         filedict->fd,
         0
@@ -163,8 +199,11 @@ static void filedict_open_f(
     filedict_header_t *data = (filedict_header_t *)filedict->data;
     assert(initial_bucket_count <= UINT_MAX);
-    data->initial_bucket_count = initial_bucket_count;
-    data->hashmap_count = 1;
+    if (data->initial_bucket_count == 0) {
+        data->initial_bucket_count = initial_bucket_count;
+        data->hashmap_count = 1;
+    }
 }
 /*
@@ -252,7 +291,6 @@ try_again:
         ++hashmap_i;
         hashmap += bucket_count;
-        bucket_count = (bucket_count << 1);
     }
     /*
@@ -272,7 +310,7 @@ try_again:
     filedict->data = mmap(
         filedict->data,
         new_data_len,
-        PROT_READ | PROT_WRITE,
+        PROT_READ | ((filedict->flags & O_RDWR) ? PROT_WRITE : 0),
         MAP_SHARED,
         filedict->fd,
         0
@@ -365,25 +403,14 @@ static int filedict_read_advance_hashmap(filedict_read_t *read) {
     size_t offset = filedict_file_size(header->initial_bucket_count, read->hashmap_i);
     if (offset >= filedict->data_len) {
-        /* Need to resize! */
-        size_t computed_size = filedict_file_size(header->initial_bucket_count, header->hashmap_count);
-        munmap(filedict->data, filedict->data_len);
-        filedict->data = mmap(
-            filedict->data,
-            computed_size,
-            PROT_READ | PROT_WRITE,
-            MAP_SHARED,
-            filedict->fd,
-            0
-        );
-        if (filedict->data == MAP_FAILED) { filedict->error = strerror(errno); return 0; }
-        filedict->data_len = computed_size;
+        filedict_resize(filedict);
+        if (filedict->error) log_return(0);
         header = (filedict_header_t*)filedict->data;
     }
     filedict_bucket_t *hashmap = filedict->data + offset;
-    read->bucket_count = (size_t)header->initial_bucket_count << read->hashmap_i;
+    read->bucket_count = (size_t)header->initial_bucket_count;
     read->bucket = &hashmap[read->key_hash % read->bucket_count];
     read->entry = &read->bucket->entries[0];

data/lib/filedict/version.rb CHANGED Viewed

@@ -1,5 +1,5 @@
 # frozen_string_literal: true
 module Filedict
-  VERSION = "0.1.2"
+  VERSION = "0.2.0"
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: filedictrb
 version: !ruby/object:Gem::Version
-  version: 0.1.2
+  version: 0.2.0
 platform: ruby
 authors:
 - Nigel Baillie
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2022-03-31 00:00:00.000000000 Z
+date: 2022-04-06 00:00:00.000000000 Z
 dependencies: []
 description:
 email: