efficient_join 1.2.0 → 2.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 0eb0fc1fe97206cfe1fc37b6dcc15904d9ab7276add24337afedc487255bab35
4
- data.tar.gz: 98a1732e553cb92eae5c957e34353c79924b6ea8d8fdd23259f20366a02142e2
3
+ metadata.gz: 38066756028c4a6647c608fbf307c70a3ea725cc608cfd6286f030da352a9288
4
+ data.tar.gz: d75193ef5828362ec8251cbb1be0d1475caa33415f8b50ce844bdbc45be2c9b4
5
5
  SHA512:
6
- metadata.gz: 9352ba250499feb06620b23ba816c5ab12dbf91e25f689d78c1a36b842f6767a94d1361a92fc903b1a9bbc4d423fca19d72596ffd270666467e0069791f5620e
7
- data.tar.gz: 908e42ce26f867477eabf07f5738d2abd33838c2731c54464130b5e93886e484584f5e1f23cb25bd52202d5fd2a797499115212adc9300c54f3f68e14ceb9977
6
+ metadata.gz: a494b8dab0c78407baa0318cd7d7b3ce8da4c4b4d1a25d74c2776f78d0af3ba47a001a7d527aba5850e6c8570e073b622ddb8b4e1df4e7ced07de09e35ab51b8
7
+ data.tar.gz: 8ef13b2edcf66858d6815e9a46911caf79f2a16aa7f169677e70567aa1ec49ad959347f4ecd513eb505efb783c1ab356c8f0f82ffe3cedf52e2cc80e73e30ded
data/README.md CHANGED
@@ -1,6 +1,6 @@
1
1
  # EfficientJoin
2
2
 
3
- Very fast and memory-efficient way to join a list of ruby numbers.
3
+ Very fast and memory-efficient way to join ruby lists of numbers and strings.
4
4
 
5
5
  Joins are performed with a constant number of ruby object allocations,
6
6
  compared to `Array#join`, `PG:TextEncoder::Array.new.encode`, etc, where at least `n` object
@@ -11,8 +11,8 @@ usage and execution time improvements:
11
11
 
12
12
  | EfficientJoin function | Equivalent ruby function | Memory usage | Time |
13
13
  | ---------------------- | --------------------------------- | ------------ | ------------ |
14
- | join | Array#join | 30% | 2.5x faster |
15
- | join_pg_array | PG::TextEncoder::Array.new.encode | 18% | 2.1x faster |
14
+ | join | Array#join | 30% | 7.0x faster |
15
+ | join_pg_array | PG::TextEncoder::Array.new.encode | 18% | 7.4x faster |
16
16
 
17
17
  ## Installation
18
18
 
@@ -44,15 +44,15 @@ MemoryProfiler.report { (0...1000000).to_a.join(',') }
44
44
  With efficient join:
45
45
  ```
46
46
  require 'efficient_join'
47
- EfficientJoin.join('','',',',(0...1000000).to_a)
47
+ EfficientJoin.join((0...1000000).to_a)
48
48
  ...
49
49
  @total_allocated=5,
50
50
  @total_allocated_memsize=18525362
51
51
  ```
52
52
 
53
- It takes prefix and suffix:
53
+ It can also take separator, item prefix and item suffix:
54
54
  ```
55
- EfficientJoin.join('(', ',now(),now())', ',', [1,2,3,4])
55
+ EfficientJoin.join([1,2,3,4], separator: ',', item_prefix: '(', item_suffix: ',now(),now())')
56
56
  => "(1,now(),now()),(2,now(),now()),(3,now(),now()),(4,now(),now())"
57
57
  ```
58
58
 
@@ -64,6 +64,13 @@ EfficientJoin.join_pg_array([1,2,3,4])
64
64
  => "{1,2,3,4}"
65
65
  ```
66
66
 
67
+ Which is equivalent to:
68
+
69
+ ```
70
+ EfficientJoin.join([1,2,3,4], header: '{', footer: '}')
71
+ => "{1,2,3,4}"
72
+ ```
73
+
67
74
  ## Development
68
75
 
69
76
  After checking out the repo, run `bin/setup` to install dependencies. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
9
9
  spec.email = ["tomm8086@googlemail.com"]
10
10
 
11
11
  spec.summary = %q{.}
12
- spec.description = %q{Very fast and memory-efficient way to join a list of ruby numbers.}
12
+ spec.description = %q{Very fast and memory-efficient way to join ruby lists of numbers and strings.}
13
13
  spec.homepage = "https://github.com/tomm/efficient_join"
14
14
  spec.license = "MIT"
15
15
 
@@ -30,6 +30,6 @@ Gem::Specification.new do |spec|
30
30
  spec.extensions = ["ext/efficient_join/extconf.rb"]
31
31
 
32
32
  spec.add_development_dependency "bundler", "~> 2.0"
33
- spec.add_development_dependency "rake", "~> 10.0"
34
- spec.add_development_dependency "rake-compiler"
33
+ spec.add_development_dependency "rake", "~> 12.3.3"
34
+ spec.add_development_dependency "rake-compiler", "~> 1.0"
35
35
  end
@@ -1,35 +1,110 @@
1
1
  #include "efficient_join.h"
2
+ #include <stdlib.h>
3
+ #include <stdio.h>
4
+ #include <string.h>
2
5
 
3
- static VALUE _join(const char *header, const char *footer, const char *item_prefix, const char *item_suffix, const char *join, VALUE number_array) {
4
- VALUE out;
5
- const long array_len = RARRAY_LEN(number_array);
6
+ struct strbuf_t {
6
7
  char *buf;
7
- size_t buf_len;
8
- FILE *stream = open_memstream(&buf, &buf_len);
8
+ size_t pos;
9
+ size_t len;
10
+ };
11
+
12
+ static struct strbuf_t strbuf_new(size_t initial_size) {
13
+ struct strbuf_t strbuf = { (char *)malloc(initial_size), 0, initial_size };
14
+ return strbuf;
15
+ }
16
+
17
+ static void strbuf_free(struct strbuf_t *strbuf) {
18
+ free(strbuf->buf);
19
+ }
20
+
21
+ static inline void strbuf_expand(struct strbuf_t *strbuf) {
22
+ strbuf->buf = (char *)realloc(strbuf->buf, strbuf->len * 2);
23
+ strbuf->len *= 2;
24
+ }
25
+
26
+ static inline void strbuf_write_str(struct strbuf_t *strbuf, const char *str, size_t len)
27
+ {
28
+ if (strbuf->len < strbuf->pos + len) {
29
+ strbuf_expand(strbuf);
30
+ }
31
+
32
+ memcpy(strbuf->buf + strbuf->pos, str, len);
33
+ strbuf->pos += len;
34
+ }
35
+
36
+ static inline void strbuf_write_int64(struct strbuf_t *strbuf, int64_t value)
37
+ {
38
+ int bytes_written;
39
+
40
+ // 22: maximum length of string representation of 64-bit int
41
+ if (strbuf->len <= strbuf->pos + 22) {
42
+ strbuf_expand(strbuf);
43
+ }
44
+
45
+ bytes_written = snprintf(strbuf->buf + strbuf->pos, 22, "%ld", value);
46
+ strbuf->pos += bytes_written;
47
+ }
48
+
49
+ static VALUE _join(const char *header, const char *footer, const char *item_prefix, const char *item_suffix, const char *join, VALUE array) {
50
+ VALUE out;
51
+ const long array_len = RARRAY_LEN(array);
52
+ VALUE *c_array = RARRAY_PTR(array);
53
+ const size_t prefix_len = strlen(item_prefix);
54
+ const size_t suffix_len = strlen(item_suffix);
55
+ const size_t join_len = strlen(join);
56
+
57
+ struct strbuf_t join_buf = strbuf_new(suffix_len + join_len + prefix_len);
58
+ // estimate likely maximum buffer size, to avoid reallocs
59
+ struct strbuf_t buf = strbuf_new((array_len + 1) * (join_buf.pos + 10));
9
60
 
10
- fputs(header, stream);
61
+ // build joining string
62
+ strbuf_write_str(&join_buf, item_suffix, suffix_len);
63
+ strbuf_write_str(&join_buf, join, join_len);
64
+ strbuf_write_str(&join_buf, item_prefix, prefix_len);
65
+
66
+ strbuf_write_str(&buf, header, strlen(header));
67
+ strbuf_write_str(&buf, item_prefix, prefix_len);
11
68
 
12
69
  for (long i=0; i<array_len; ++i) {
13
- fprintf(stream, "%s%ld%s", item_prefix, FIX2LONG(RARRAY_PTR(number_array)[i]), item_suffix);
70
+ VALUE v = c_array[i];
71
+
72
+ switch (TYPE(v)) {
73
+ case T_FIXNUM:
74
+ strbuf_write_int64(&buf, FIX2LONG(v));
75
+ break;
76
+ case T_BIGNUM:
77
+ strbuf_write_int64(&buf, rb_big2ll(v));
78
+ break;
79
+ case T_STRING:
80
+ strbuf_write_str(&buf, StringValuePtr(v), RSTRING_LEN(v));
81
+ break;
82
+ default:
83
+ // rb_raise does not return control, so clean up first
84
+ strbuf_free(&join_buf);
85
+ strbuf_free(&buf);
86
+ rb_raise(rb_eTypeError, "array must contain only strings and integers");
87
+ }
14
88
 
15
89
  if (i < array_len - 1) {
16
- fputs(join, stream);
17
- }
90
+ strbuf_write_str(&buf, join_buf.buf, join_buf.pos);
91
+ }
18
92
  }
93
+ strbuf_write_str(&buf, item_suffix, suffix_len);
94
+ strbuf_write_str(&buf, footer, strlen(footer));
19
95
 
20
- fputs(footer, stream);
21
-
22
- fclose(stream);
96
+ out = rb_str_new(buf.buf, buf.pos);
23
97
 
24
- out = rb_str_new_cstr(buf);
25
- free(buf);
98
+ strbuf_free(&join_buf);
99
+ strbuf_free(&buf);
26
100
 
27
101
  return out;
28
102
  }
29
103
 
30
- VALUE rb_efficient_join(VALUE self, VALUE prefix, VALUE suffix, VALUE join, VALUE number_array) {
104
+ VALUE rb_efficient_join(VALUE self, VALUE header, VALUE footer, VALUE prefix, VALUE suffix, VALUE join, VALUE number_array) {
31
105
  return _join(
32
- "", "",
106
+ StringValueCStr(header),
107
+ StringValueCStr(footer),
33
108
  StringValueCStr(prefix),
34
109
  StringValueCStr(suffix),
35
110
  StringValueCStr(join),
@@ -37,15 +112,9 @@ VALUE rb_efficient_join(VALUE self, VALUE prefix, VALUE suffix, VALUE join, VALU
37
112
  );
38
113
  }
39
114
 
40
- VALUE rb_efficient_join_pg_array(VALUE self, VALUE number_array) {
41
- return _join("{", "}", "", "", ",", number_array);
42
- }
43
-
44
115
  void Init_efficient_join()
45
116
  {
46
117
  VALUE mod = rb_define_module("EfficientJoinCExt");
47
118
 
48
- rb_define_method(mod, "join", rb_efficient_join, 4);
49
- rb_define_method(mod, "join_pg_array", rb_efficient_join_pg_array, 1);
119
+ rb_define_method(mod, "_join", rb_efficient_join, 6);
50
120
  }
51
-
@@ -6,5 +6,13 @@ module EfficientJoin
6
6
 
7
7
  class << self
8
8
  include EfficientJoinCExt
9
+
10
+ def join(array, header: '', footer: '', separator: ',', item_prefix: '', item_suffix: '')
11
+ _join(header, footer, item_prefix, item_suffix, separator, array)
12
+ end
13
+
14
+ def join_pg_array(array)
15
+ _join('{', '}', '', '', ',', array)
16
+ end
9
17
  end
10
18
  end
@@ -1,3 +1,3 @@
1
1
  module EfficientJoin
2
- VERSION = "1.2.0"
2
+ VERSION = "2.1.2"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: efficient_join
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.0
4
+ version: 2.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tom Morton
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2020-05-19 00:00:00.000000000 Z
11
+ date: 2020-06-08 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -30,29 +30,30 @@ dependencies:
30
30
  requirements:
31
31
  - - "~>"
32
32
  - !ruby/object:Gem::Version
33
- version: '10.0'
33
+ version: 12.3.3
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
38
  - - "~>"
39
39
  - !ruby/object:Gem::Version
40
- version: '10.0'
40
+ version: 12.3.3
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: rake-compiler
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - ">="
45
+ - - "~>"
46
46
  - !ruby/object:Gem::Version
47
- version: '0'
47
+ version: '1.0'
48
48
  type: :development
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - ">="
52
+ - - "~>"
53
53
  - !ruby/object:Gem::Version
54
- version: '0'
55
- description: Very fast and memory-efficient way to join a list of ruby numbers.
54
+ version: '1.0'
55
+ description: Very fast and memory-efficient way to join ruby lists of numbers and
56
+ strings.
56
57
  email:
57
58
  - tomm8086@googlemail.com
58
59
  executables: []
@@ -94,7 +95,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
94
95
  - !ruby/object:Gem::Version
95
96
  version: '0'
96
97
  requirements: []
97
- rubygems_version: 3.0.4
98
+ rubygems_version: 3.0.8
98
99
  signing_key:
99
100
  specification_version: 4
100
101
  summary: "."