efficient_join 1.4.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +3 -3
- data/ext/efficient_join/efficient_join.c +87 -23
- data/lib/efficient_join/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ce8cde6fa3699cd119df8e7fa7b7b3167977eeef1db13ff2c16a03a8787333d9
|
4
|
+
data.tar.gz: e8c10cb18adf55851f1da719dd8f5e70ac1f8d8f7975978abb79f8f22eef83d3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6f938000b41839c14d1b3e05b52acaddbf9639bb0469af3af5cb537edc37ab3e339782ed9c8183ac2553bdb3aec39f0faefbc1c5a3f52bbe789205edf130e043
|
7
|
+
data.tar.gz: 5874883b14afc5b575239e20727adfaedc828839edf1117d3b5982490efb7fc11cf748198bf05ceafa6e46d250042f48e61b2766048c0a75e7f21a1cbdf0027f
|
data/README.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
# EfficientJoin
|
2
2
|
|
3
|
-
Very fast and memory-efficient way to join
|
3
|
+
Very fast and memory-efficient way to join ruby lists of numbers and strings.
|
4
4
|
|
5
5
|
Joins are performed with a constant number of ruby object allocations,
|
6
6
|
compared to `Array#join`, `PG:TextEncoder::Array.new.encode`, etc, where at least `n` object
|
@@ -11,8 +11,8 @@ usage and execution time improvements:
|
|
11
11
|
|
12
12
|
| EfficientJoin function | Equivalent ruby function | Memory usage | Time |
|
13
13
|
| ---------------------- | --------------------------------- | ------------ | ------------ |
|
14
|
-
| join | Array#join | 30% |
|
15
|
-
| join_pg_array | PG::TextEncoder::Array.new.encode | 18% |
|
14
|
+
| join | Array#join | 30% | 7.0x faster |
|
15
|
+
| join_pg_array | PG::TextEncoder::Array.new.encode | 18% | 7.4x faster |
|
16
16
|
|
17
17
|
## Installation
|
18
18
|
|
@@ -1,37 +1,102 @@
|
|
1
1
|
#include "efficient_join.h"
|
2
|
+
#include <stdlib.h>
|
3
|
+
#include <stdio.h>
|
4
|
+
#include <string.h>
|
2
5
|
|
3
|
-
|
4
|
-
VALUE out;
|
5
|
-
const long array_len = RARRAY_LEN(number_array);
|
6
|
+
struct strbuf_t {
|
6
7
|
char *buf;
|
7
|
-
size_t
|
8
|
-
|
8
|
+
size_t pos;
|
9
|
+
size_t len;
|
10
|
+
};
|
11
|
+
|
12
|
+
static struct strbuf_t strbuf_new(size_t initial_size) {
|
13
|
+
struct strbuf_t strbuf = { (char *)malloc(initial_size), 0, initial_size };
|
14
|
+
return strbuf;
|
15
|
+
}
|
16
|
+
|
17
|
+
static void strbuf_free(struct strbuf_t *strbuf) {
|
18
|
+
free(strbuf->buf);
|
19
|
+
}
|
20
|
+
|
21
|
+
static inline void strbuf_expand(struct strbuf_t *strbuf) {
|
22
|
+
strbuf->buf = (char *)realloc(strbuf->buf, strbuf->len * 2);
|
23
|
+
strbuf->len *= 2;
|
24
|
+
}
|
25
|
+
|
26
|
+
static inline void strbuf_write_str(struct strbuf_t *strbuf, const char *str, size_t len)
|
27
|
+
{
|
28
|
+
if (strbuf->len < strbuf->pos + len) {
|
29
|
+
strbuf_expand(strbuf);
|
30
|
+
}
|
31
|
+
|
32
|
+
memcpy(strbuf->buf + strbuf->pos, str, len);
|
33
|
+
strbuf->pos += len;
|
34
|
+
}
|
35
|
+
|
36
|
+
static inline void strbuf_write_int64(struct strbuf_t *strbuf, int64_t value)
|
37
|
+
{
|
38
|
+
int bytes_written;
|
39
|
+
|
40
|
+
// 22: maximum length of string representation of 64-bit int
|
41
|
+
if (strbuf->len <= strbuf->pos + 22) {
|
42
|
+
strbuf_expand(strbuf);
|
43
|
+
}
|
44
|
+
|
45
|
+
bytes_written = snprintf(strbuf->buf + strbuf->pos, 22, "%ld", value);
|
46
|
+
strbuf->pos += bytes_written;
|
47
|
+
}
|
48
|
+
|
49
|
+
static VALUE _join(const char *header, const char *footer, const char *item_prefix, const char *item_suffix, const char *join, VALUE array) {
|
50
|
+
VALUE out;
|
51
|
+
const long array_len = RARRAY_LEN(array);
|
52
|
+
VALUE *c_array = RARRAY_PTR(array);
|
53
|
+
const size_t prefix_len = strlen(item_prefix);
|
54
|
+
const size_t suffix_len = strlen(item_suffix);
|
55
|
+
const size_t join_len = strlen(join);
|
56
|
+
|
57
|
+
// build joining string
|
58
|
+
struct strbuf_t join_buf = strbuf_new(suffix_len + join_len + prefix_len);
|
59
|
+
strbuf_write_str(&join_buf, item_suffix, suffix_len);
|
60
|
+
strbuf_write_str(&join_buf, join, join_len);
|
61
|
+
strbuf_write_str(&join_buf, item_prefix, prefix_len);
|
9
62
|
|
10
|
-
|
63
|
+
// estimate likely maximum buffer size, to avoid reallocs
|
64
|
+
struct strbuf_t buf = strbuf_new((array_len + 1) * (join_buf.pos + 10));
|
65
|
+
|
66
|
+
strbuf_write_str(&buf, header, strlen(header));
|
67
|
+
strbuf_write_str(&buf, item_prefix, prefix_len);
|
11
68
|
|
12
69
|
for (long i=0; i<array_len; ++i) {
|
13
|
-
|
14
|
-
|
15
|
-
if (TYPE(v) != T_FIXNUM) {
|
16
|
-
// rb_raise does not return control, so clean up first
|
17
|
-
fclose(stream);
|
18
|
-
free(buf);
|
19
|
-
rb_raise(rb_eTypeError, "array must contain only integers");
|
20
|
-
}
|
70
|
+
VALUE v = c_array[i];
|
21
71
|
|
22
|
-
|
72
|
+
switch (TYPE(v)) {
|
73
|
+
case T_FIXNUM:
|
74
|
+
strbuf_write_int64(&buf, FIX2LONG(v));
|
75
|
+
break;
|
76
|
+
case T_BIGNUM:
|
77
|
+
strbuf_write_int64(&buf, rb_big2ll(v));
|
78
|
+
break;
|
79
|
+
case T_STRING:
|
80
|
+
strbuf_write_str(&buf, StringValuePtr(v), RSTRING_LEN(v));
|
81
|
+
break;
|
82
|
+
default:
|
83
|
+
// rb_raise does not return control, so clean up first
|
84
|
+
strbuf_free(&join_buf);
|
85
|
+
strbuf_free(&buf);
|
86
|
+
rb_raise(rb_eTypeError, "array must contain only strings and integers");
|
87
|
+
}
|
23
88
|
|
24
89
|
if (i < array_len - 1) {
|
25
|
-
|
26
|
-
}
|
90
|
+
strbuf_write_str(&buf, join_buf.buf, join_buf.pos);
|
91
|
+
}
|
27
92
|
}
|
93
|
+
strbuf_write_str(&buf, item_suffix, suffix_len);
|
94
|
+
strbuf_write_str(&buf, footer, strlen(footer));
|
28
95
|
|
29
|
-
|
30
|
-
|
31
|
-
fclose(stream);
|
96
|
+
out = rb_str_new(buf.buf, buf.pos);
|
32
97
|
|
33
|
-
|
34
|
-
|
98
|
+
strbuf_free(&join_buf);
|
99
|
+
strbuf_free(&buf);
|
35
100
|
|
36
101
|
return out;
|
37
102
|
}
|
@@ -57,4 +122,3 @@ void Init_efficient_join()
|
|
57
122
|
rb_define_method(mod, "_join", rb_efficient_join, 4);
|
58
123
|
rb_define_method(mod, "_join_pg_array", rb_efficient_join_pg_array, 1);
|
59
124
|
}
|
60
|
-
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: efficient_join
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tom Morton
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-05-
|
11
|
+
date: 2020-05-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|