efficient_join 1.4.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e67d8cd6098b472380780f923955d25c1dc2777afa768f22552e780823ac63ab
4
- data.tar.gz: 3d0d6057ee163ef3498f3e9563097489cd04ae074c88fa43a1b35356b156e955
3
+ metadata.gz: ce8cde6fa3699cd119df8e7fa7b7b3167977eeef1db13ff2c16a03a8787333d9
4
+ data.tar.gz: e8c10cb18adf55851f1da719dd8f5e70ac1f8d8f7975978abb79f8f22eef83d3
5
5
  SHA512:
6
- metadata.gz: c3c1f9456f986b2cba749b351a8646fd1476ee825bf47c18bf78061a4c070f350980f40ecb31ef9b4ede1d77be01eed71e82cd3b8f3ab1c87edf37f3112129dc
7
- data.tar.gz: 24d0e958eeff1ed02f7bc5cb37c831de6d5f27253e3da4885db9b6ad728b489e64db0111981281d5e7d1ae1bc8c5d2b66c6bc446d2860bda2f8bc699de08fca4
6
+ metadata.gz: 6f938000b41839c14d1b3e05b52acaddbf9639bb0469af3af5cb537edc37ab3e339782ed9c8183ac2553bdb3aec39f0faefbc1c5a3f52bbe789205edf130e043
7
+ data.tar.gz: 5874883b14afc5b575239e20727adfaedc828839edf1117d3b5982490efb7fc11cf748198bf05ceafa6e46d250042f48e61b2766048c0a75e7f21a1cbdf0027f
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # EfficientJoin
2
2
 
3
- Very fast and memory-efficient way to join a list of ruby numbers.
3
+ Very fast and memory-efficient way to join ruby lists of numbers and strings.
4
4
 
5
5
  Joins are performed with a constant number of ruby object allocations,
6
6
  compared to `Array#join`, `PG:TextEncoder::Array.new.encode`, etc, where at least `n` object
@@ -11,8 +11,8 @@ usage and execution time improvements:
11
11
 
12
12
  | EfficientJoin function | Equivalent ruby function | Memory usage | Time |
13
13
  | ---------------------- | --------------------------------- | ------------ | ------------ |
14
- | join | Array#join | 30% | 2.5x faster |
15
- | join_pg_array | PG::TextEncoder::Array.new.encode | 18% | 2.1x faster |
14
+ | join | Array#join | 30% | 7.0x faster |
15
+ | join_pg_array | PG::TextEncoder::Array.new.encode | 18% | 7.4x faster |
16
16
 
17
17
  ## Installation
18
18
 
@@ -1,37 +1,102 @@
1
1
  #include "efficient_join.h"
2
+ #include <stdlib.h>
3
+ #include <stdio.h>
4
+ #include <string.h>
2
5
 
3
- static VALUE _join(const char *header, const char *footer, const char *item_prefix, const char *item_suffix, const char *join, VALUE number_array) {
4
- VALUE out;
5
- const long array_len = RARRAY_LEN(number_array);
6
+ struct strbuf_t {
6
7
  char *buf;
7
- size_t buf_len;
8
- FILE *stream = open_memstream(&buf, &buf_len);
8
+ size_t pos;
9
+ size_t len;
10
+ };
11
+
12
+ static struct strbuf_t strbuf_new(size_t initial_size) {
13
+ struct strbuf_t strbuf = { (char *)malloc(initial_size), 0, initial_size };
14
+ return strbuf;
15
+ }
16
+
17
+ static void strbuf_free(struct strbuf_t *strbuf) {
18
+ free(strbuf->buf);
19
+ }
20
+
21
+ static inline void strbuf_expand(struct strbuf_t *strbuf) {
22
+ strbuf->buf = (char *)realloc(strbuf->buf, strbuf->len * 2);
23
+ strbuf->len *= 2;
24
+ }
25
+
26
+ static inline void strbuf_write_str(struct strbuf_t *strbuf, const char *str, size_t len)
27
+ {
28
+ if (strbuf->len < strbuf->pos + len) {
29
+ strbuf_expand(strbuf);
30
+ }
31
+
32
+ memcpy(strbuf->buf + strbuf->pos, str, len);
33
+ strbuf->pos += len;
34
+ }
35
+
36
+ static inline void strbuf_write_int64(struct strbuf_t *strbuf, int64_t value)
37
+ {
38
+ int bytes_written;
39
+
40
+ // 22: maximum length of string representation of 64-bit int
41
+ if (strbuf->len <= strbuf->pos + 22) {
42
+ strbuf_expand(strbuf);
43
+ }
44
+
45
+ bytes_written = snprintf(strbuf->buf + strbuf->pos, 22, "%ld", value);
46
+ strbuf->pos += bytes_written;
47
+ }
48
+
49
+ static VALUE _join(const char *header, const char *footer, const char *item_prefix, const char *item_suffix, const char *join, VALUE array) {
50
+ VALUE out;
51
+ const long array_len = RARRAY_LEN(array);
52
+ VALUE *c_array = RARRAY_PTR(array);
53
+ const size_t prefix_len = strlen(item_prefix);
54
+ const size_t suffix_len = strlen(item_suffix);
55
+ const size_t join_len = strlen(join);
56
+
57
+ // build joining string
58
+ struct strbuf_t join_buf = strbuf_new(suffix_len + join_len + prefix_len);
59
+ strbuf_write_str(&join_buf, item_suffix, suffix_len);
60
+ strbuf_write_str(&join_buf, join, join_len);
61
+ strbuf_write_str(&join_buf, item_prefix, prefix_len);
9
62
 
10
- fputs(header, stream);
63
+ // estimate likely maximum buffer size, to avoid reallocs
64
+ struct strbuf_t buf = strbuf_new((array_len + 1) * (join_buf.pos + 10));
65
+
66
+ strbuf_write_str(&buf, header, strlen(header));
67
+ strbuf_write_str(&buf, item_prefix, prefix_len);
11
68
 
12
69
  for (long i=0; i<array_len; ++i) {
13
- const VALUE v = RARRAY_PTR(number_array)[i];
14
-
15
- if (TYPE(v) != T_FIXNUM) {
16
- // rb_raise does not return control, so clean up first
17
- fclose(stream);
18
- free(buf);
19
- rb_raise(rb_eTypeError, "array must contain only integers");
20
- }
70
+ VALUE v = c_array[i];
21
71
 
22
- fprintf(stream, "%s%ld%s", item_prefix, FIX2LONG(v), item_suffix);
72
+ switch (TYPE(v)) {
73
+ case T_FIXNUM:
74
+ strbuf_write_int64(&buf, FIX2LONG(v));
75
+ break;
76
+ case T_BIGNUM:
77
+ strbuf_write_int64(&buf, rb_big2ll(v));
78
+ break;
79
+ case T_STRING:
80
+ strbuf_write_str(&buf, StringValuePtr(v), RSTRING_LEN(v));
81
+ break;
82
+ default:
83
+ // rb_raise does not return control, so clean up first
84
+ strbuf_free(&join_buf);
85
+ strbuf_free(&buf);
86
+ rb_raise(rb_eTypeError, "array must contain only strings and integers");
87
+ }
23
88
 
24
89
  if (i < array_len - 1) {
25
- fputs(join, stream);
26
- }
90
+ strbuf_write_str(&buf, join_buf.buf, join_buf.pos);
91
+ }
27
92
  }
93
+ strbuf_write_str(&buf, item_suffix, suffix_len);
94
+ strbuf_write_str(&buf, footer, strlen(footer));
28
95
 
29
- fputs(footer, stream);
30
-
31
- fclose(stream);
96
+ out = rb_str_new(buf.buf, buf.pos);
32
97
 
33
- out = rb_str_new_cstr(buf);
34
- free(buf);
98
+ strbuf_free(&join_buf);
99
+ strbuf_free(&buf);
35
100
 
36
101
  return out;
37
102
  }
@@ -57,4 +122,3 @@ void Init_efficient_join()
57
122
  rb_define_method(mod, "_join", rb_efficient_join, 4);
58
123
  rb_define_method(mod, "_join_pg_array", rb_efficient_join_pg_array, 1);
59
124
  }
60
-
@@ -1,3 +1,3 @@
1
1
  module EfficientJoin
2
- VERSION = "1.4.0"
2
+ VERSION = "2.0.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: efficient_join
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.0
4
+ version: 2.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tom Morton
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-05-19 00:00:00.000000000 Z
11
+ date: 2020-05-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler