efficient_join 1.2.0 → 2.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +13 -6
- data/efficient_join.gemspec +3 -3
- data/ext/efficient_join/efficient_join.c +92 -23
- data/lib/efficient_join.rb +8 -0
- data/lib/efficient_join/version.rb +1 -1
- metadata +11 -10
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 38066756028c4a6647c608fbf307c70a3ea725cc608cfd6286f030da352a9288
|
4
|
+
data.tar.gz: d75193ef5828362ec8251cbb1be0d1475caa33415f8b50ce844bdbc45be2c9b4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a494b8dab0c78407baa0318cd7d7b3ce8da4c4b4d1a25d74c2776f78d0af3ba47a001a7d527aba5850e6c8570e073b622ddb8b4e1df4e7ced07de09e35ab51b8
|
7
|
+
data.tar.gz: 8ef13b2edcf66858d6815e9a46911caf79f2a16aa7f169677e70567aa1ec49ad959347f4ecd513eb505efb783c1ab356c8f0f82ffe3cedf52e2cc80e73e30ded
|
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# EfficientJoin
|
2
2
|
|
3
|
-
Very fast and memory-efficient way to join
|
3
|
+
Very fast and memory-efficient way to join ruby lists of numbers and strings.
|
4
4
|
|
5
5
|
Joins are performed with a constant number of ruby object allocations,
|
6
6
|
compared to `Array#join`, `PG:TextEncoder::Array.new.encode`, etc, where at least `n` object
|
@@ -11,8 +11,8 @@ usage and execution time improvements:
|
|
11
11
|
|
12
12
|
| EfficientJoin function | Equivalent ruby function | Memory usage | Time |
|
13
13
|
| ---------------------- | --------------------------------- | ------------ | ------------ |
|
14
|
-
| join | Array#join | 30% |
|
15
|
-
| join_pg_array | PG::TextEncoder::Array.new.encode | 18% |
|
14
|
+
| join | Array#join | 30% | 7.0x faster |
|
15
|
+
| join_pg_array | PG::TextEncoder::Array.new.encode | 18% | 7.4x faster |
|
16
16
|
|
17
17
|
## Installation
|
18
18
|
|
@@ -44,15 +44,15 @@ MemoryProfiler.report { (0...1000000).to_a.join(',') }
|
|
44
44
|
With efficient join:
|
45
45
|
```
|
46
46
|
require 'efficient_join'
|
47
|
-
EfficientJoin.join(
|
47
|
+
EfficientJoin.join((0...1000000).to_a)
|
48
48
|
...
|
49
49
|
@total_allocated=5,
|
50
50
|
@total_allocated_memsize=18525362
|
51
51
|
```
|
52
52
|
|
53
|
-
It
|
53
|
+
It can also take separator, item prefix and item suffix:
|
54
54
|
```
|
55
|
-
EfficientJoin.join('(', ',now(),now())'
|
55
|
+
EfficientJoin.join([1,2,3,4], separator: ',', item_prefix: '(', item_suffix: ',now(),now())')
|
56
56
|
=> "(1,now(),now()),(2,now(),now()),(3,now(),now()),(4,now(),now())"
|
57
57
|
```
|
58
58
|
|
@@ -64,6 +64,13 @@ EfficientJoin.join_pg_array([1,2,3,4])
|
|
64
64
|
=> "{1,2,3,4}"
|
65
65
|
```
|
66
66
|
|
67
|
+
Which is equivalent to:
|
68
|
+
|
69
|
+
```
|
70
|
+
EfficientJoin.join([1,2,3,4], header: '{', footer: '}')
|
71
|
+
=> "{1,2,3,4}"
|
72
|
+
```
|
73
|
+
|
67
74
|
## Development
|
68
75
|
|
69
76
|
After checking out the repo, run `bin/setup` to install dependencies. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
data/efficient_join.gemspec
CHANGED
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
|
|
9
9
|
spec.email = ["tomm8086@googlemail.com"]
|
10
10
|
|
11
11
|
spec.summary = %q{.}
|
12
|
-
spec.description = %q{Very fast and memory-efficient way to join
|
12
|
+
spec.description = %q{Very fast and memory-efficient way to join ruby lists of numbers and strings.}
|
13
13
|
spec.homepage = "https://github.com/tomm/efficient_join"
|
14
14
|
spec.license = "MIT"
|
15
15
|
|
@@ -30,6 +30,6 @@ Gem::Specification.new do |spec|
|
|
30
30
|
spec.extensions = ["ext/efficient_join/extconf.rb"]
|
31
31
|
|
32
32
|
spec.add_development_dependency "bundler", "~> 2.0"
|
33
|
-
spec.add_development_dependency "rake", "~>
|
34
|
-
spec.add_development_dependency "rake-compiler"
|
33
|
+
spec.add_development_dependency "rake", "~> 12.3.3"
|
34
|
+
spec.add_development_dependency "rake-compiler", "~> 1.0"
|
35
35
|
end
|
@@ -1,35 +1,110 @@
|
|
1
1
|
#include "efficient_join.h"
|
2
|
+
#include <stdlib.h>
|
3
|
+
#include <stdio.h>
|
4
|
+
#include <string.h>
|
2
5
|
|
3
|
-
|
4
|
-
VALUE out;
|
5
|
-
const long array_len = RARRAY_LEN(number_array);
|
6
|
+
struct strbuf_t {
|
6
7
|
char *buf;
|
7
|
-
size_t
|
8
|
-
|
8
|
+
size_t pos;
|
9
|
+
size_t len;
|
10
|
+
};
|
11
|
+
|
12
|
+
static struct strbuf_t strbuf_new(size_t initial_size) {
|
13
|
+
struct strbuf_t strbuf = { (char *)malloc(initial_size), 0, initial_size };
|
14
|
+
return strbuf;
|
15
|
+
}
|
16
|
+
|
17
|
+
static void strbuf_free(struct strbuf_t *strbuf) {
|
18
|
+
free(strbuf->buf);
|
19
|
+
}
|
20
|
+
|
21
|
+
static inline void strbuf_expand(struct strbuf_t *strbuf) {
|
22
|
+
strbuf->buf = (char *)realloc(strbuf->buf, strbuf->len * 2);
|
23
|
+
strbuf->len *= 2;
|
24
|
+
}
|
25
|
+
|
26
|
+
static inline void strbuf_write_str(struct strbuf_t *strbuf, const char *str, size_t len)
|
27
|
+
{
|
28
|
+
if (strbuf->len < strbuf->pos + len) {
|
29
|
+
strbuf_expand(strbuf);
|
30
|
+
}
|
31
|
+
|
32
|
+
memcpy(strbuf->buf + strbuf->pos, str, len);
|
33
|
+
strbuf->pos += len;
|
34
|
+
}
|
35
|
+
|
36
|
+
static inline void strbuf_write_int64(struct strbuf_t *strbuf, int64_t value)
|
37
|
+
{
|
38
|
+
int bytes_written;
|
39
|
+
|
40
|
+
// 22: maximum length of string representation of 64-bit int
|
41
|
+
if (strbuf->len <= strbuf->pos + 22) {
|
42
|
+
strbuf_expand(strbuf);
|
43
|
+
}
|
44
|
+
|
45
|
+
bytes_written = snprintf(strbuf->buf + strbuf->pos, 22, "%ld", value);
|
46
|
+
strbuf->pos += bytes_written;
|
47
|
+
}
|
48
|
+
|
49
|
+
static VALUE _join(const char *header, const char *footer, const char *item_prefix, const char *item_suffix, const char *join, VALUE array) {
|
50
|
+
VALUE out;
|
51
|
+
const long array_len = RARRAY_LEN(array);
|
52
|
+
VALUE *c_array = RARRAY_PTR(array);
|
53
|
+
const size_t prefix_len = strlen(item_prefix);
|
54
|
+
const size_t suffix_len = strlen(item_suffix);
|
55
|
+
const size_t join_len = strlen(join);
|
56
|
+
|
57
|
+
struct strbuf_t join_buf = strbuf_new(suffix_len + join_len + prefix_len);
|
58
|
+
// estimate likely maximum buffer size, to avoid reallocs
|
59
|
+
struct strbuf_t buf = strbuf_new((array_len + 1) * (join_buf.pos + 10));
|
9
60
|
|
10
|
-
|
61
|
+
// build joining string
|
62
|
+
strbuf_write_str(&join_buf, item_suffix, suffix_len);
|
63
|
+
strbuf_write_str(&join_buf, join, join_len);
|
64
|
+
strbuf_write_str(&join_buf, item_prefix, prefix_len);
|
65
|
+
|
66
|
+
strbuf_write_str(&buf, header, strlen(header));
|
67
|
+
strbuf_write_str(&buf, item_prefix, prefix_len);
|
11
68
|
|
12
69
|
for (long i=0; i<array_len; ++i) {
|
13
|
-
|
70
|
+
VALUE v = c_array[i];
|
71
|
+
|
72
|
+
switch (TYPE(v)) {
|
73
|
+
case T_FIXNUM:
|
74
|
+
strbuf_write_int64(&buf, FIX2LONG(v));
|
75
|
+
break;
|
76
|
+
case T_BIGNUM:
|
77
|
+
strbuf_write_int64(&buf, rb_big2ll(v));
|
78
|
+
break;
|
79
|
+
case T_STRING:
|
80
|
+
strbuf_write_str(&buf, StringValuePtr(v), RSTRING_LEN(v));
|
81
|
+
break;
|
82
|
+
default:
|
83
|
+
// rb_raise does not return control, so clean up first
|
84
|
+
strbuf_free(&join_buf);
|
85
|
+
strbuf_free(&buf);
|
86
|
+
rb_raise(rb_eTypeError, "array must contain only strings and integers");
|
87
|
+
}
|
14
88
|
|
15
89
|
if (i < array_len - 1) {
|
16
|
-
|
17
|
-
}
|
90
|
+
strbuf_write_str(&buf, join_buf.buf, join_buf.pos);
|
91
|
+
}
|
18
92
|
}
|
93
|
+
strbuf_write_str(&buf, item_suffix, suffix_len);
|
94
|
+
strbuf_write_str(&buf, footer, strlen(footer));
|
19
95
|
|
20
|
-
|
21
|
-
|
22
|
-
fclose(stream);
|
96
|
+
out = rb_str_new(buf.buf, buf.pos);
|
23
97
|
|
24
|
-
|
25
|
-
|
98
|
+
strbuf_free(&join_buf);
|
99
|
+
strbuf_free(&buf);
|
26
100
|
|
27
101
|
return out;
|
28
102
|
}
|
29
103
|
|
30
|
-
VALUE rb_efficient_join(VALUE self, VALUE prefix, VALUE suffix, VALUE join, VALUE number_array) {
|
104
|
+
VALUE rb_efficient_join(VALUE self, VALUE header, VALUE footer, VALUE prefix, VALUE suffix, VALUE join, VALUE number_array) {
|
31
105
|
return _join(
|
32
|
-
|
106
|
+
StringValueCStr(header),
|
107
|
+
StringValueCStr(footer),
|
33
108
|
StringValueCStr(prefix),
|
34
109
|
StringValueCStr(suffix),
|
35
110
|
StringValueCStr(join),
|
@@ -37,15 +112,9 @@ VALUE rb_efficient_join(VALUE self, VALUE prefix, VALUE suffix, VALUE join, VALU
|
|
37
112
|
);
|
38
113
|
}
|
39
114
|
|
40
|
-
VALUE rb_efficient_join_pg_array(VALUE self, VALUE number_array) {
|
41
|
-
return _join("{", "}", "", "", ",", number_array);
|
42
|
-
}
|
43
|
-
|
44
115
|
void Init_efficient_join()
|
45
116
|
{
|
46
117
|
VALUE mod = rb_define_module("EfficientJoinCExt");
|
47
118
|
|
48
|
-
rb_define_method(mod, "
|
49
|
-
rb_define_method(mod, "join_pg_array", rb_efficient_join_pg_array, 1);
|
119
|
+
rb_define_method(mod, "_join", rb_efficient_join, 6);
|
50
120
|
}
|
51
|
-
|
data/lib/efficient_join.rb
CHANGED
@@ -6,5 +6,13 @@ module EfficientJoin
|
|
6
6
|
|
7
7
|
class << self
|
8
8
|
include EfficientJoinCExt
|
9
|
+
|
10
|
+
def join(array, header: '', footer: '', separator: ',', item_prefix: '', item_suffix: '')
|
11
|
+
_join(header, footer, item_prefix, item_suffix, separator, array)
|
12
|
+
end
|
13
|
+
|
14
|
+
def join_pg_array(array)
|
15
|
+
_join('{', '}', '', '', ',', array)
|
16
|
+
end
|
9
17
|
end
|
10
18
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: efficient_join
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.2
|
4
|
+
version: 2.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tom Morton
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-06-08 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -30,29 +30,30 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
33
|
+
version: 12.3.3
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
40
|
+
version: 12.3.3
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: rake-compiler
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- - "
|
45
|
+
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '0'
|
47
|
+
version: '1.0'
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- - "
|
52
|
+
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '0'
|
55
|
-
description: Very fast and memory-efficient way to join
|
54
|
+
version: '1.0'
|
55
|
+
description: Very fast and memory-efficient way to join ruby lists of numbers and
|
56
|
+
strings.
|
56
57
|
email:
|
57
58
|
- tomm8086@googlemail.com
|
58
59
|
executables: []
|
@@ -94,7 +95,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
94
95
|
- !ruby/object:Gem::Version
|
95
96
|
version: '0'
|
96
97
|
requirements: []
|
97
|
-
rubygems_version: 3.0.
|
98
|
+
rubygems_version: 3.0.8
|
98
99
|
signing_key:
|
99
100
|
specification_version: 4
|
100
101
|
summary: "."
|