RubyGems - efficient_join - Versions diffs - 1.2.0 → 2.1.2 - Mend

efficient_join 1.2.0 → 2.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

checksums.yaml +4 -4
data/README.md +13 -6
data/efficient_join.gemspec +3 -3
data/ext/efficient_join/efficient_join.c +92 -23
data/lib/efficient_join.rb +8 -0
data/lib/efficient_join/version.rb +1 -1
metadata +11 -10

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 0eb0fc1fe97206cfe1fc37b6dcc15904d9ab7276add24337afedc487255bab35
-  data.tar.gz: 98a1732e553cb92eae5c957e34353c79924b6ea8d8fdd23259f20366a02142e2
+  metadata.gz: 38066756028c4a6647c608fbf307c70a3ea725cc608cfd6286f030da352a9288
+  data.tar.gz: d75193ef5828362ec8251cbb1be0d1475caa33415f8b50ce844bdbc45be2c9b4
 SHA512:
-  metadata.gz: 9352ba250499feb06620b23ba816c5ab12dbf91e25f689d78c1a36b842f6767a94d1361a92fc903b1a9bbc4d423fca19d72596ffd270666467e0069791f5620e
-  data.tar.gz: 908e42ce26f867477eabf07f5738d2abd33838c2731c54464130b5e93886e484584f5e1f23cb25bd52202d5fd2a797499115212adc9300c54f3f68e14ceb9977
+  metadata.gz: a494b8dab0c78407baa0318cd7d7b3ce8da4c4b4d1a25d74c2776f78d0af3ba47a001a7d527aba5850e6c8570e073b622ddb8b4e1df4e7ced07de09e35ab51b8
+  data.tar.gz: 8ef13b2edcf66858d6815e9a46911caf79f2a16aa7f169677e70567aa1ec49ad959347f4ecd513eb505efb783c1ab356c8f0f82ffe3cedf52e2cc80e73e30ded

data/README.md CHANGED

@@ -1,6 +1,6 @@
 # EfficientJoin
-Very fast and memory-efficient way to join a list of ruby numbers.
+Very fast and memory-efficient way to join ruby lists of numbers and strings.
 Joins are performed with a constant number of ruby object allocations,
 compared to `Array#join`, `PG:TextEncoder::Array.new.encode`, etc, where at least `n` object
@@ -11,8 +11,8 @@ usage and execution time improvements:
 | EfficientJoin function | Equivalent ruby function          | Memory usage | Time         |
 | ---------------------- | --------------------------------- | ------------ | ------------ |
-| join                   | Array#join                        | 30%          | 2.5x faster  |
-| join_pg_array          | PG::TextEncoder::Array.new.encode | 18%          | 2.1x faster  |
+| join                   | Array#join                        | 30%          | 7.0x faster  |
+| join_pg_array          | PG::TextEncoder::Array.new.encode | 18%          | 7.4x faster  |
 ## Installation
@@ -44,15 +44,15 @@ MemoryProfiler.report { (0...1000000).to_a.join(',') }
 With efficient join:
 ```
 require 'efficient_join'
-EfficientJoin.join('','',',',(0...1000000).to_a)
+EfficientJoin.join((0...1000000).to_a)
   ...
   @total_allocated=5,
   @total_allocated_memsize=18525362
 ```
-It takes prefix and suffix:
+It can also take separator, item prefix and item suffix:
 ```
-EfficientJoin.join('(', ',now(),now())', ',', [1,2,3,4])
+EfficientJoin.join([1,2,3,4], separator: ',', item_prefix: '(', item_suffix: ',now(),now())')
  => "(1,now(),now()),(2,now(),now()),(3,now(),now()),(4,now(),now())"
 ```
@@ -64,6 +64,13 @@ EfficientJoin.join_pg_array([1,2,3,4])
  => "{1,2,3,4}"
 ```
+Which is equivalent to:
+```
+EfficientJoin.join([1,2,3,4], header: '{', footer: '}')
+ => "{1,2,3,4}"
+```
 ## Development
 After checking out the repo, run `bin/setup` to install dependencies. You can also run `bin/console` for an interactive prompt that will allow you to experiment.

data/efficient_join.gemspec CHANGED

@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
   spec.email         = ["tomm8086@googlemail.com"]
   spec.summary       = %q{.}
-  spec.description   = %q{Very fast and memory-efficient way to join a list of ruby numbers.}
+  spec.description   = %q{Very fast and memory-efficient way to join ruby lists of numbers and strings.}
   spec.homepage      = "https://github.com/tomm/efficient_join"
   spec.license       = "MIT"
@@ -30,6 +30,6 @@ Gem::Specification.new do |spec|
   spec.extensions    = ["ext/efficient_join/extconf.rb"]
   spec.add_development_dependency "bundler", "~> 2.0"
-  spec.add_development_dependency "rake", "~> 10.0"
-  spec.add_development_dependency "rake-compiler"
+  spec.add_development_dependency "rake", "~> 12.3.3"
+  spec.add_development_dependency "rake-compiler", "~> 1.0"
 end

data/ext/efficient_join/efficient_join.c CHANGED

@@ -1,35 +1,110 @@
 #include "efficient_join.h"
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
-static VALUE _join(const char *header, const char *footer, const char *item_prefix, const char *item_suffix, const char *join, VALUE number_array) {
-    VALUE out;
-    const long array_len = RARRAY_LEN(number_array);
+struct strbuf_t {
     char *buf;
-    size_t buf_len;
-    FILE *stream = open_memstream(&buf, &buf_len);
+    size_t pos;
+    size_t len;
+};
+static struct strbuf_t strbuf_new(size_t initial_size) {
+    struct strbuf_t strbuf = { (char *)malloc(initial_size), 0, initial_size };
+    return strbuf;
+}
+static void strbuf_free(struct strbuf_t *strbuf) {
+    free(strbuf->buf);
+}
+static inline void strbuf_expand(struct strbuf_t *strbuf) {
+    strbuf->buf = (char *)realloc(strbuf->buf, strbuf->len * 2);
+    strbuf->len *= 2;
+}
+static inline void strbuf_write_str(struct strbuf_t *strbuf, const char *str, size_t len)
+{
+    if (strbuf->len < strbuf->pos + len) {
+        strbuf_expand(strbuf);
+    }
+    memcpy(strbuf->buf + strbuf->pos, str, len);
+    strbuf->pos += len;
+}
+static inline void strbuf_write_int64(struct strbuf_t *strbuf, int64_t value)
+{
+    int bytes_written;
+    // 22: maximum length of string representation of 64-bit int
+    if (strbuf->len <= strbuf->pos + 22) {
+        strbuf_expand(strbuf);
+    }
+    bytes_written = snprintf(strbuf->buf + strbuf->pos, 22, "%ld", value);
+    strbuf->pos += bytes_written;
+}
+static VALUE _join(const char *header, const char *footer, const char *item_prefix, const char *item_suffix, const char *join, VALUE array) {
+    VALUE out;
+    const long array_len = RARRAY_LEN(array);
+    VALUE *c_array = RARRAY_PTR(array);
+    const size_t prefix_len = strlen(item_prefix);
+    const size_t suffix_len = strlen(item_suffix);
+    const size_t join_len = strlen(join);
+    struct strbuf_t join_buf = strbuf_new(suffix_len + join_len + prefix_len);
+    // estimate likely maximum buffer size, to avoid reallocs
+    struct strbuf_t buf = strbuf_new((array_len + 1) * (join_buf.pos + 10));
-    fputs(header, stream);
+    // build joining string
+    strbuf_write_str(&join_buf, item_suffix, suffix_len);
+    strbuf_write_str(&join_buf, join, join_len);
+    strbuf_write_str(&join_buf, item_prefix, prefix_len);
+    strbuf_write_str(&buf, header, strlen(header));
+    strbuf_write_str(&buf, item_prefix, prefix_len);
     for (long i=0; i<array_len; ++i) {
-        fprintf(stream, "%s%ld%s", item_prefix, FIX2LONG(RARRAY_PTR(number_array)[i]), item_suffix);
+        VALUE v = c_array[i];
+        switch (TYPE(v)) {
+            case T_FIXNUM:
+                strbuf_write_int64(&buf, FIX2LONG(v));
+                break;
+            case T_BIGNUM:
+                strbuf_write_int64(&buf, rb_big2ll(v));
+                break;
+            case T_STRING:
+                strbuf_write_str(&buf, StringValuePtr(v), RSTRING_LEN(v));
+                break;
+            default:
+                // rb_raise does not return control, so clean up first
+                strbuf_free(&join_buf);
+                strbuf_free(&buf);
+                rb_raise(rb_eTypeError, "array must contain only strings and integers");
+        }
         if (i < array_len - 1) {
-            fputs(join, stream);
-        }
+            strbuf_write_str(&buf, join_buf.buf, join_buf.pos);
+        }
     }
+    strbuf_write_str(&buf, item_suffix, suffix_len);
+    strbuf_write_str(&buf, footer, strlen(footer));
-    fputs(footer, stream);
-    fclose(stream);
+    out = rb_str_new(buf.buf, buf.pos);
-    out = rb_str_new_cstr(buf);
-    free(buf);
+    strbuf_free(&join_buf);
+    strbuf_free(&buf);
     return out;
 }
-VALUE rb_efficient_join(VALUE self, VALUE prefix, VALUE suffix, VALUE join, VALUE number_array) {
+VALUE rb_efficient_join(VALUE self, VALUE header, VALUE footer, VALUE prefix, VALUE suffix, VALUE join, VALUE number_array) {
     return _join(
-        "", "",
+        StringValueCStr(header),
+        StringValueCStr(footer),
         StringValueCStr(prefix),
         StringValueCStr(suffix),
         StringValueCStr(join),
@@ -37,15 +112,9 @@ VALUE rb_efficient_join(VALUE self, VALUE prefix, VALUE suffix, VALUE join, VALU
     );
 }
-VALUE rb_efficient_join_pg_array(VALUE self, VALUE number_array) {
-    return _join("{", "}", "", "", ",", number_array);
-}
 void Init_efficient_join()
 {
     VALUE mod = rb_define_module("EfficientJoinCExt");
-    rb_define_method(mod, "join", rb_efficient_join, 4);
-    rb_define_method(mod, "join_pg_array", rb_efficient_join_pg_array, 1);
+    rb_define_method(mod, "_join", rb_efficient_join, 6);
 }

data/lib/efficient_join.rb CHANGED

@@ -6,5 +6,13 @@ module EfficientJoin
   class << self
     include EfficientJoinCExt
+    def join(array, header: '', footer: '', separator: ',', item_prefix: '', item_suffix: '')
+      _join(header, footer, item_prefix, item_suffix, separator, array)
+    end
+    def join_pg_array(array)
+      _join('{', '}', '', '', ',', array)
+    end
   end
 end

data/lib/efficient_join/version.rb CHANGED

@@ -1,3 +1,3 @@
 module EfficientJoin
-  VERSION = "1.2.0"
+  VERSION = "2.1.2"
 end

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: efficient_join
 version: !ruby/object:Gem::Version
-  version: 1.2.0
+  version: 2.1.2
 platform: ruby
 authors:
 - Tom Morton
 autorequire:
 bindir: exe
 cert_chain: []
-date: 2020-05-19 00:00:00.000000000 Z
+date: 2020-06-08 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: bundler
@@ -30,29 +30,30 @@ dependencies:
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '10.0'
+        version: 12.3.3
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '10.0'
+        version: 12.3.3
 - !ruby/object:Gem::Dependency
   name: rake-compiler
   requirement: !ruby/object:Gem::Requirement
     requirements:
-    - - ">="
+    - - "~>"
       - !ruby/object:Gem::Version
-        version: '0'
+        version: '1.0'
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
-    - - ">="
+    - - "~>"
       - !ruby/object:Gem::Version
-        version: '0'
-description: Very fast and memory-efficient way to join a list of ruby numbers.
+        version: '1.0'
+description: Very fast and memory-efficient way to join ruby lists of numbers and
+  strings.
 email:
 - tomm8086@googlemail.com
 executables: []
@@ -94,7 +95,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.0.4
+rubygems_version: 3.0.8
 signing_key:
 specification_version: 4
 summary: "."