efficient_join 1.4.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: e67d8cd6098b472380780f923955d25c1dc2777afa768f22552e780823ac63ab
4
- data.tar.gz: 3d0d6057ee163ef3498f3e9563097489cd04ae074c88fa43a1b35356b156e955
3
+ metadata.gz: ce8cde6fa3699cd119df8e7fa7b7b3167977eeef1db13ff2c16a03a8787333d9
4
+ data.tar.gz: e8c10cb18adf55851f1da719dd8f5e70ac1f8d8f7975978abb79f8f22eef83d3
5
5
  SHA512:
6
- metadata.gz: c3c1f9456f986b2cba749b351a8646fd1476ee825bf47c18bf78061a4c070f350980f40ecb31ef9b4ede1d77be01eed71e82cd3b8f3ab1c87edf37f3112129dc
7
- data.tar.gz: 24d0e958eeff1ed02f7bc5cb37c831de6d5f27253e3da4885db9b6ad728b489e64db0111981281d5e7d1ae1bc8c5d2b66c6bc446d2860bda2f8bc699de08fca4
6
+ metadata.gz: 6f938000b41839c14d1b3e05b52acaddbf9639bb0469af3af5cb537edc37ab3e339782ed9c8183ac2553bdb3aec39f0faefbc1c5a3f52bbe789205edf130e043
7
+ data.tar.gz: 5874883b14afc5b575239e20727adfaedc828839edf1117d3b5982490efb7fc11cf748198bf05ceafa6e46d250042f48e61b2766048c0a75e7f21a1cbdf0027f
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # EfficientJoin
2
2
 
3
- Very fast and memory-efficient way to join a list of ruby numbers.
3
+ Very fast and memory-efficient way to join ruby lists of numbers and strings.
4
4
 
5
5
  Joins are performed with a constant number of ruby object allocations,
6
6
  compared to `Array#join`, `PG:TextEncoder::Array.new.encode`, etc, where at least `n` object
@@ -11,8 +11,8 @@ usage and execution time improvements:
11
11
 
12
12
  | EfficientJoin function | Equivalent ruby function | Memory usage | Time |
13
13
  | ---------------------- | --------------------------------- | ------------ | ------------ |
14
- | join | Array#join | 30% | 2.5x faster |
15
- | join_pg_array | PG::TextEncoder::Array.new.encode | 18% | 2.1x faster |
14
+ | join | Array#join | 30% | 7.0x faster |
15
+ | join_pg_array | PG::TextEncoder::Array.new.encode | 18% | 7.4x faster |
16
16
 
17
17
  ## Installation
18
18
 
@@ -1,37 +1,102 @@
1
1
  #include "efficient_join.h"
2
+ #include <stdlib.h>
3
+ #include <stdio.h>
4
+ #include <string.h>
2
5
 
3
- static VALUE _join(const char *header, const char *footer, const char *item_prefix, const char *item_suffix, const char *join, VALUE number_array) {
4
- VALUE out;
5
- const long array_len = RARRAY_LEN(number_array);
6
+ struct strbuf_t {
6
7
  char *buf;
7
- size_t buf_len;
8
- FILE *stream = open_memstream(&buf, &buf_len);
8
+ size_t pos;
9
+ size_t len;
10
+ };
11
+
12
+ static struct strbuf_t strbuf_new(size_t initial_size) {
13
+ struct strbuf_t strbuf = { (char *)malloc(initial_size), 0, initial_size };
14
+ return strbuf;
15
+ }
16
+
17
+ static void strbuf_free(struct strbuf_t *strbuf) {
18
+ free(strbuf->buf);
19
+ }
20
+
21
+ static inline void strbuf_expand(struct strbuf_t *strbuf) {
22
+ strbuf->buf = (char *)realloc(strbuf->buf, strbuf->len * 2);
23
+ strbuf->len *= 2;
24
+ }
25
+
26
+ static inline void strbuf_write_str(struct strbuf_t *strbuf, const char *str, size_t len)
27
+ {
28
+ if (strbuf->len < strbuf->pos + len) {
29
+ strbuf_expand(strbuf);
30
+ }
31
+
32
+ memcpy(strbuf->buf + strbuf->pos, str, len);
33
+ strbuf->pos += len;
34
+ }
35
+
36
+ static inline void strbuf_write_int64(struct strbuf_t *strbuf, int64_t value)
37
+ {
38
+ int bytes_written;
39
+
40
+ // 22: maximum length of string representation of 64-bit int
41
+ if (strbuf->len <= strbuf->pos + 22) {
42
+ strbuf_expand(strbuf);
43
+ }
44
+
45
+ bytes_written = snprintf(strbuf->buf + strbuf->pos, 22, "%ld", value);
46
+ strbuf->pos += bytes_written;
47
+ }
48
+
49
+ static VALUE _join(const char *header, const char *footer, const char *item_prefix, const char *item_suffix, const char *join, VALUE array) {
50
+ VALUE out;
51
+ const long array_len = RARRAY_LEN(array);
52
+ VALUE *c_array = RARRAY_PTR(array);
53
+ const size_t prefix_len = strlen(item_prefix);
54
+ const size_t suffix_len = strlen(item_suffix);
55
+ const size_t join_len = strlen(join);
56
+
57
+ // build joining string
58
+ struct strbuf_t join_buf = strbuf_new(suffix_len + join_len + prefix_len);
59
+ strbuf_write_str(&join_buf, item_suffix, suffix_len);
60
+ strbuf_write_str(&join_buf, join, join_len);
61
+ strbuf_write_str(&join_buf, item_prefix, prefix_len);
9
62
 
10
- fputs(header, stream);
63
+ // estimate likely maximum buffer size, to avoid reallocs
64
+ struct strbuf_t buf = strbuf_new((array_len + 1) * (join_buf.pos + 10));
65
+
66
+ strbuf_write_str(&buf, header, strlen(header));
67
+ strbuf_write_str(&buf, item_prefix, prefix_len);
11
68
 
12
69
  for (long i=0; i<array_len; ++i) {
13
- const VALUE v = RARRAY_PTR(number_array)[i];
14
-
15
- if (TYPE(v) != T_FIXNUM) {
16
- // rb_raise does not return control, so clean up first
17
- fclose(stream);
18
- free(buf);
19
- rb_raise(rb_eTypeError, "array must contain only integers");
20
- }
70
+ VALUE v = c_array[i];
21
71
 
22
- fprintf(stream, "%s%ld%s", item_prefix, FIX2LONG(v), item_suffix);
72
+ switch (TYPE(v)) {
73
+ case T_FIXNUM:
74
+ strbuf_write_int64(&buf, FIX2LONG(v));
75
+ break;
76
+ case T_BIGNUM:
77
+ strbuf_write_int64(&buf, rb_big2ll(v));
78
+ break;
79
+ case T_STRING:
80
+ strbuf_write_str(&buf, StringValuePtr(v), RSTRING_LEN(v));
81
+ break;
82
+ default:
83
+ // rb_raise does not return control, so clean up first
84
+ strbuf_free(&join_buf);
85
+ strbuf_free(&buf);
86
+ rb_raise(rb_eTypeError, "array must contain only strings and integers");
87
+ }
23
88
 
24
89
  if (i < array_len - 1) {
25
- fputs(join, stream);
26
- }
90
+ strbuf_write_str(&buf, join_buf.buf, join_buf.pos);
91
+ }
27
92
  }
93
+ strbuf_write_str(&buf, item_suffix, suffix_len);
94
+ strbuf_write_str(&buf, footer, strlen(footer));
28
95
 
29
- fputs(footer, stream);
30
-
31
- fclose(stream);
96
+ out = rb_str_new(buf.buf, buf.pos);
32
97
 
33
- out = rb_str_new_cstr(buf);
34
- free(buf);
98
+ strbuf_free(&join_buf);
99
+ strbuf_free(&buf);
35
100
 
36
101
  return out;
37
102
  }
@@ -57,4 +122,3 @@ void Init_efficient_join()
57
122
  rb_define_method(mod, "_join", rb_efficient_join, 4);
58
123
  rb_define_method(mod, "_join_pg_array", rb_efficient_join_pg_array, 1);
59
124
  }
60
-
@@ -1,3 +1,3 @@
1
1
  module EfficientJoin
2
- VERSION = "1.4.0"
2
+ VERSION = "2.0.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: efficient_join
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.4.0
4
+ version: 2.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tom Morton
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-05-19 00:00:00.000000000 Z
11
+ date: 2020-05-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler