efficient_join 1.4.0 → 2.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +10 -3
- data/efficient_join.gemspec +3 -3
- data/ext/efficient_join/efficient_join.c +92 -38
- data/lib/efficient_join.rb +3 -3
- data/lib/efficient_join/version.rb +1 -1
- metadata +10 -9
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5901b92dc6e259cbf76193da6107a3c3dde02c6dddcf66812e9d449322eae6fc
|
4
|
+
data.tar.gz: d9f11fbf636346979abc3654cdfb1e7518b9891532732905404ab2ca08e4617c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f7d39e4ad25fb936ab6213e8d582f2bd9fd3e2a53bf7f24c33452839e473dc984773b8998d880db1a89d602ddd4fc9d45e47eafc39d202b579fad234a2231b8a
|
7
|
+
data.tar.gz: 0c90ce377ad5ed1bd0483cdaaef14702bd743005052833d120f9c813445ff2b92fbfdd204aed18c0576e8b05fb359e6b1e3515b3a9fbdf43c5d35ea12548e85f
|
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# EfficientJoin
|
2
2
|
|
3
|
-
Very fast and memory-efficient way to join
|
3
|
+
Very fast and memory-efficient way to join ruby lists of numbers and strings.
|
4
4
|
|
5
5
|
Joins are performed with a constant number of ruby object allocations,
|
6
6
|
compared to `Array#join`, `PG:TextEncoder::Array.new.encode`, etc, where at least `n` object
|
@@ -11,8 +11,8 @@ usage and execution time improvements:
|
|
11
11
|
|
12
12
|
| EfficientJoin function | Equivalent ruby function | Memory usage | Time |
|
13
13
|
| ---------------------- | --------------------------------- | ------------ | ------------ |
|
14
|
-
| join | Array#join | 30% |
|
15
|
-
| join_pg_array | PG::TextEncoder::Array.new.encode | 18% |
|
14
|
+
| join | Array#join | 30% | 7.0x faster |
|
15
|
+
| join_pg_array | PG::TextEncoder::Array.new.encode | 18% | 7.4x faster |
|
16
16
|
|
17
17
|
## Installation
|
18
18
|
|
@@ -64,6 +64,13 @@ EfficientJoin.join_pg_array([1,2,3,4])
|
|
64
64
|
=> "{1,2,3,4}"
|
65
65
|
```
|
66
66
|
|
67
|
+
Which is equivalent to:
|
68
|
+
|
69
|
+
```
|
70
|
+
EfficientJoin.join([1,2,3,4], header: '{', footer: '}')
|
71
|
+
=> "{1,2,3,4}"
|
72
|
+
```
|
73
|
+
|
67
74
|
## Development
|
68
75
|
|
69
76
|
After checking out the repo, run `bin/setup` to install dependencies. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
data/efficient_join.gemspec
CHANGED
@@ -9,7 +9,7 @@ Gem::Specification.new do |spec|
|
|
9
9
|
spec.email = ["tomm8086@googlemail.com"]
|
10
10
|
|
11
11
|
spec.summary = %q{.}
|
12
|
-
spec.description = %q{Very fast and memory-efficient way to join
|
12
|
+
spec.description = %q{Very fast and memory-efficient way to join ruby lists of numbers and strings.}
|
13
13
|
spec.homepage = "https://github.com/tomm/efficient_join"
|
14
14
|
spec.license = "MIT"
|
15
15
|
|
@@ -30,6 +30,6 @@ Gem::Specification.new do |spec|
|
|
30
30
|
spec.extensions = ["ext/efficient_join/extconf.rb"]
|
31
31
|
|
32
32
|
spec.add_development_dependency "bundler", "~> 2.0"
|
33
|
-
spec.add_development_dependency "rake", "~>
|
34
|
-
spec.add_development_dependency "rake-compiler"
|
33
|
+
spec.add_development_dependency "rake", "~> 12.3.3"
|
34
|
+
spec.add_development_dependency "rake-compiler", "~> 1.0"
|
35
35
|
end
|
@@ -1,60 +1,114 @@
|
|
1
1
|
#include "efficient_join.h"
|
2
|
+
#include <stdlib.h>
|
3
|
+
#include <stdio.h>
|
4
|
+
#include <string.h>
|
2
5
|
|
3
|
-
|
6
|
+
struct strbuf_t {
|
7
|
+
char *buf;
|
8
|
+
size_t pos;
|
9
|
+
size_t len;
|
10
|
+
};
|
11
|
+
|
12
|
+
static struct strbuf_t strbuf_new(size_t initial_size) {
|
13
|
+
struct strbuf_t strbuf = { (char *)malloc(initial_size), 0, initial_size };
|
14
|
+
return strbuf;
|
15
|
+
}
|
16
|
+
|
17
|
+
static void strbuf_free(struct strbuf_t *strbuf) {
|
18
|
+
free(strbuf->buf);
|
19
|
+
}
|
20
|
+
|
21
|
+
static inline void strbuf_expand(struct strbuf_t *strbuf) {
|
22
|
+
strbuf->buf = (char *)realloc(strbuf->buf, strbuf->len * 2);
|
23
|
+
strbuf->len *= 2;
|
24
|
+
}
|
25
|
+
|
26
|
+
static inline void strbuf_write_str(struct strbuf_t *strbuf, const char *str, size_t len)
|
27
|
+
{
|
28
|
+
if (strbuf->len < strbuf->pos + len) {
|
29
|
+
strbuf_expand(strbuf);
|
30
|
+
}
|
31
|
+
|
32
|
+
memcpy(strbuf->buf + strbuf->pos, str, len);
|
33
|
+
strbuf->pos += len;
|
34
|
+
}
|
35
|
+
|
36
|
+
static inline void strbuf_write_int64(struct strbuf_t *strbuf, int64_t value)
|
37
|
+
{
|
38
|
+
int bytes_written;
|
39
|
+
|
40
|
+
// 22: maximum length of string representation of 64-bit int
|
41
|
+
if (strbuf->len <= strbuf->pos + 22) {
|
42
|
+
strbuf_expand(strbuf);
|
43
|
+
}
|
44
|
+
|
45
|
+
bytes_written = snprintf(strbuf->buf + strbuf->pos, 22, "%ld", value);
|
46
|
+
strbuf->pos += bytes_written;
|
47
|
+
}
|
48
|
+
|
49
|
+
VALUE rb_efficient_join(VALUE self, VALUE _header, VALUE _footer, VALUE _item_prefix, VALUE _item_suffix, VALUE _join, VALUE number_array) {
|
4
50
|
VALUE out;
|
5
51
|
const long array_len = RARRAY_LEN(number_array);
|
6
|
-
char
|
7
|
-
|
8
|
-
|
52
|
+
const char* header = StringValuePtr(_header);
|
53
|
+
const char* footer = StringValuePtr(_footer);
|
54
|
+
const char* item_prefix = StringValuePtr(_item_prefix);
|
55
|
+
const char* item_suffix = StringValuePtr(_item_suffix);
|
56
|
+
const char* join = StringValuePtr(_join);
|
57
|
+
VALUE *c_array = RARRAY_PTR(number_array);
|
58
|
+
const size_t prefix_len = RSTRING_LEN(_item_prefix);
|
59
|
+
const size_t suffix_len = RSTRING_LEN(_item_suffix);
|
60
|
+
const size_t join_len = RSTRING_LEN(_join);
|
9
61
|
|
10
|
-
|
62
|
+
struct strbuf_t join_buf = strbuf_new(suffix_len + join_len + prefix_len);
|
63
|
+
// estimate likely maximum buffer size, to avoid reallocs
|
64
|
+
struct strbuf_t buf = strbuf_new((array_len + 1) * (join_buf.pos + 10));
|
65
|
+
|
66
|
+
// build joining string
|
67
|
+
strbuf_write_str(&join_buf, item_suffix, suffix_len);
|
68
|
+
strbuf_write_str(&join_buf, join, join_len);
|
69
|
+
strbuf_write_str(&join_buf, item_prefix, prefix_len);
|
70
|
+
|
71
|
+
strbuf_write_str(&buf, header, RSTRING_LEN(_header));
|
72
|
+
strbuf_write_str(&buf, item_prefix, prefix_len);
|
11
73
|
|
12
74
|
for (long i=0; i<array_len; ++i) {
|
13
|
-
|
14
|
-
|
15
|
-
if (TYPE(v) != T_FIXNUM) {
|
16
|
-
// rb_raise does not return control, so clean up first
|
17
|
-
fclose(stream);
|
18
|
-
free(buf);
|
19
|
-
rb_raise(rb_eTypeError, "array must contain only integers");
|
20
|
-
}
|
75
|
+
VALUE v = c_array[i];
|
21
76
|
|
22
|
-
|
77
|
+
switch (TYPE(v)) {
|
78
|
+
case T_FIXNUM:
|
79
|
+
strbuf_write_int64(&buf, FIX2LONG(v));
|
80
|
+
break;
|
81
|
+
case T_BIGNUM:
|
82
|
+
strbuf_write_int64(&buf, rb_big2ll(v));
|
83
|
+
break;
|
84
|
+
case T_STRING:
|
85
|
+
strbuf_write_str(&buf, StringValuePtr(v), RSTRING_LEN(v));
|
86
|
+
break;
|
87
|
+
default:
|
88
|
+
// rb_raise does not return control, so clean up first
|
89
|
+
strbuf_free(&join_buf);
|
90
|
+
strbuf_free(&buf);
|
91
|
+
rb_raise(rb_eTypeError, "array must contain only strings and integers");
|
92
|
+
}
|
23
93
|
|
24
94
|
if (i < array_len - 1) {
|
25
|
-
|
26
|
-
}
|
95
|
+
strbuf_write_str(&buf, join_buf.buf, join_buf.pos);
|
96
|
+
}
|
27
97
|
}
|
98
|
+
strbuf_write_str(&buf, item_suffix, suffix_len);
|
99
|
+
strbuf_write_str(&buf, footer, RSTRING_LEN(_footer));
|
28
100
|
|
29
|
-
|
30
|
-
|
31
|
-
fclose(stream);
|
101
|
+
out = rb_str_new(buf.buf, buf.pos);
|
32
102
|
|
33
|
-
|
34
|
-
|
103
|
+
strbuf_free(&join_buf);
|
104
|
+
strbuf_free(&buf);
|
35
105
|
|
36
106
|
return out;
|
37
107
|
}
|
38
108
|
|
39
|
-
VALUE rb_efficient_join(VALUE self, VALUE prefix, VALUE suffix, VALUE join, VALUE number_array) {
|
40
|
-
return _join(
|
41
|
-
"", "",
|
42
|
-
StringValueCStr(prefix),
|
43
|
-
StringValueCStr(suffix),
|
44
|
-
StringValueCStr(join),
|
45
|
-
number_array
|
46
|
-
);
|
47
|
-
}
|
48
|
-
|
49
|
-
VALUE rb_efficient_join_pg_array(VALUE self, VALUE number_array) {
|
50
|
-
return _join("{", "}", "", "", ",", number_array);
|
51
|
-
}
|
52
|
-
|
53
109
|
void Init_efficient_join()
|
54
110
|
{
|
55
111
|
VALUE mod = rb_define_module("EfficientJoinCExt");
|
56
112
|
|
57
|
-
rb_define_method(mod, "_join", rb_efficient_join,
|
58
|
-
rb_define_method(mod, "_join_pg_array", rb_efficient_join_pg_array, 1);
|
113
|
+
rb_define_method(mod, "_join", rb_efficient_join, 6);
|
59
114
|
}
|
60
|
-
|
data/lib/efficient_join.rb
CHANGED
@@ -7,12 +7,12 @@ module EfficientJoin
|
|
7
7
|
class << self
|
8
8
|
include EfficientJoinCExt
|
9
9
|
|
10
|
-
def join(array, separator: ',', item_prefix: '', item_suffix: '')
|
11
|
-
_join(item_prefix, item_suffix, separator, array)
|
10
|
+
def join(array, header: '', footer: '', separator: ',', item_prefix: '', item_suffix: '')
|
11
|
+
_join(header, footer, item_prefix, item_suffix, separator, array)
|
12
12
|
end
|
13
13
|
|
14
14
|
def join_pg_array(array)
|
15
|
-
|
15
|
+
_join('{', '}', '', '', ',', array)
|
16
16
|
end
|
17
17
|
end
|
18
18
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: efficient_join
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 2.1.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tom Morton
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-
|
11
|
+
date: 2020-09-30 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -30,29 +30,30 @@ dependencies:
|
|
30
30
|
requirements:
|
31
31
|
- - "~>"
|
32
32
|
- !ruby/object:Gem::Version
|
33
|
-
version:
|
33
|
+
version: 12.3.3
|
34
34
|
type: :development
|
35
35
|
prerelease: false
|
36
36
|
version_requirements: !ruby/object:Gem::Requirement
|
37
37
|
requirements:
|
38
38
|
- - "~>"
|
39
39
|
- !ruby/object:Gem::Version
|
40
|
-
version:
|
40
|
+
version: 12.3.3
|
41
41
|
- !ruby/object:Gem::Dependency
|
42
42
|
name: rake-compiler
|
43
43
|
requirement: !ruby/object:Gem::Requirement
|
44
44
|
requirements:
|
45
|
-
- - "
|
45
|
+
- - "~>"
|
46
46
|
- !ruby/object:Gem::Version
|
47
|
-
version: '0'
|
47
|
+
version: '1.0'
|
48
48
|
type: :development
|
49
49
|
prerelease: false
|
50
50
|
version_requirements: !ruby/object:Gem::Requirement
|
51
51
|
requirements:
|
52
|
-
- - "
|
52
|
+
- - "~>"
|
53
53
|
- !ruby/object:Gem::Version
|
54
|
-
version: '0'
|
55
|
-
description: Very fast and memory-efficient way to join
|
54
|
+
version: '1.0'
|
55
|
+
description: Very fast and memory-efficient way to join ruby lists of numbers and
|
56
|
+
strings.
|
56
57
|
email:
|
57
58
|
- tomm8086@googlemail.com
|
58
59
|
executables: []
|