hash_unnest 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/ext/hash_unnest/extconf.rb +2 -1
- data/ext/hash_unnest/hash_unnest.c +149 -39
- data/lib/hash_unnest/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 3bcf3dbb4e02471f88372a12cf1908cb623b1c98
|
4
|
+
data.tar.gz: 11f945786febf21b933472abc145e2399bc950b1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ea4c0c8e5b46c13a33c482d6601f1a3e59132f4a061b6f9e3249db98b862c096fd813e3b6992bdf134f3d123faf1be6bb185030c3f6b6860a4cecf473f16cb15
|
7
|
+
data.tar.gz: 3368483ac0fe6f3247d8050b37eb114571573cd57bde99c16e174b096cf696eb57af470d7edf08b9b50ab28509ae5a1a6c0d25ec74f761b396ad5cc9033030cc
|
data/ext/hash_unnest/extconf.rb
CHANGED
@@ -4,9 +4,10 @@ PLATFORM = `uname`.strip.upcase
|
|
4
4
|
SHARED_FLAGS = "--std=c99 -Wall -Wextra -Werror"
|
5
5
|
|
6
6
|
# production
|
7
|
-
$CFLAGS += " #{SHARED_FLAGS} -Os"
|
7
|
+
$CFLAGS += " #{SHARED_FLAGS} -Os -g"
|
8
8
|
|
9
9
|
# development
|
10
10
|
# $CFLAGS += " #{SHARED_FLAGS} -O0 -g -DDEBUG"
|
11
|
+
# $CFLAGS += " #{SHARED_FLAGS} -Os -g"
|
11
12
|
|
12
13
|
create_makefile('hash_unnest/hash_unnest')
|
@@ -2,17 +2,90 @@
|
|
2
2
|
#include <assert.h>
|
3
3
|
#include "hash_unnest.h"
|
4
4
|
|
5
|
+
#define RECORD_MAGIC 0xF00DF00D
|
6
|
+
#define BUFFER_MAGIC 0xBA0BAB00
|
7
|
+
|
5
8
|
// forward declarataions of closures
|
6
|
-
static int
|
7
|
-
static int
|
9
|
+
static int hn_size_closure(VALUE key, VALUE val, VALUE in);
|
10
|
+
static int hn_unnest_closure(VALUE key, VALUE val, VALUE in);
|
11
|
+
|
12
|
+
static VALUE hn_hash_unnest_m = Qnil;
|
8
13
|
|
9
|
-
static VALUE
|
14
|
+
static VALUE hn_buffer_type = Qnil;
|
15
|
+
static VALUE hn_int_type = Qnil;
|
10
16
|
|
11
17
|
// a (sortable) key-value entry
|
12
18
|
typedef struct {
|
19
|
+
#ifndef NDEBUG
|
20
|
+
unsigned int magic;
|
21
|
+
#endif
|
13
22
|
VALUE key;
|
14
23
|
VALUE value;
|
15
|
-
}
|
24
|
+
} hn_entry_t;
|
25
|
+
|
26
|
+
// a safe C array of key, value pairs
|
27
|
+
typedef struct {
|
28
|
+
#ifndef NDEBUG
|
29
|
+
unsigned int magic;
|
30
|
+
#endif
|
31
|
+
int size;
|
32
|
+
int cursor;
|
33
|
+
hn_entry_t* buf;
|
34
|
+
} hn_buffer_t;
|
35
|
+
|
36
|
+
/******************************************************************************/
|
37
|
+
|
38
|
+
static void hn_buffer_free(hn_buffer_t* buf) {
|
39
|
+
LOG("*** hn_buffer_free\n");
|
40
|
+
|
41
|
+
xfree(buf->buf);
|
42
|
+
buf->buf = NULL;
|
43
|
+
#ifndef NDEBUG
|
44
|
+
buf->magic = 0;
|
45
|
+
#endif
|
46
|
+
xfree(buf);
|
47
|
+
}
|
48
|
+
|
49
|
+
static void hn_buffer_mark(hn_buffer_t* buf) {
|
50
|
+
LOG("*** hn_buffer_mark\n");
|
51
|
+
|
52
|
+
assert(buf->magic == BUFFER_MAGIC);
|
53
|
+
|
54
|
+
for (int k = 0; k < buf->cursor; ++k) {
|
55
|
+
LOG(" rb_gc_mark '%s'\n", StringValueCStr(buf->buf[k].key));
|
56
|
+
rb_gc_mark(buf->buf[k].key);
|
57
|
+
rb_gc_mark(buf->buf[k].value);
|
58
|
+
}
|
59
|
+
}
|
60
|
+
|
61
|
+
static VALUE hn_buffer_alloc(int size) {
|
62
|
+
hn_buffer_t* buf = NULL;
|
63
|
+
|
64
|
+
buf = ALLOC(hn_buffer_t);
|
65
|
+
#ifndef NDEBUG
|
66
|
+
buf->magic = BUFFER_MAGIC;
|
67
|
+
#endif
|
68
|
+
buf->buf = ALLOC_N(hn_entry_t, size+1);
|
69
|
+
buf->size = size;
|
70
|
+
buf->cursor = 0;
|
71
|
+
|
72
|
+
for (int k = 0; k < size; ++k) {
|
73
|
+
#ifndef NDEBUG
|
74
|
+
buf->buf[k].magic = RECORD_MAGIC;
|
75
|
+
#endif
|
76
|
+
buf->buf[k].key = Qnil;
|
77
|
+
buf->buf[k].value = Qnil;
|
78
|
+
}
|
79
|
+
|
80
|
+
// sentinel entry
|
81
|
+
#ifndef NDEBUG
|
82
|
+
buf->buf[size].magic = 0;
|
83
|
+
#endif
|
84
|
+
buf->buf[size].key = Qnil;
|
85
|
+
buf->buf[size].value = Qnil;
|
86
|
+
|
87
|
+
return Data_Wrap_Struct(hn_buffer_type, hn_buffer_mark, hn_buffer_free, buf);
|
88
|
+
}
|
16
89
|
|
17
90
|
/******************************************************************************/
|
18
91
|
|
@@ -27,17 +100,17 @@ typedef struct {
|
|
27
100
|
// - When `val` is a hash, call ourselves recursively, appending the
|
28
101
|
// current key to the prefix.
|
29
102
|
//
|
30
|
-
static int
|
103
|
+
static int hn_unnest_closure(VALUE key, VALUE val, VALUE in)
|
31
104
|
{
|
32
105
|
VALUE prefix = rb_ary_entry(in, 0);
|
33
|
-
hn_record_t** output = (hn_record_t**) rb_ary_entry(in, 1);
|
34
106
|
|
35
107
|
#ifdef DEBUG
|
36
108
|
VALUE _str_key = rb_funcall(key, rb_intern("inspect"), 0);
|
37
109
|
VALUE _str_val = rb_funcall(val, rb_intern("inspect"), 0);
|
38
|
-
LOG("***
|
110
|
+
LOG("*** hn_unnest_closure (%s,%s,'%s')\n", StringValueCStr(_str_key), StringValueCStr(_str_val), StringValueCStr(prefix));
|
39
111
|
#endif
|
40
112
|
|
113
|
+
|
41
114
|
switch(TYPE(val)) {
|
42
115
|
case T_HASH:
|
43
116
|
{
|
@@ -50,19 +123,34 @@ static int hash_unnest_closure(VALUE key, VALUE val, VALUE in)
|
|
50
123
|
|
51
124
|
new_in = rb_ary_new2(2);
|
52
125
|
rb_ary_store(new_in, 0, new_prefix);
|
53
|
-
rb_ary_store(new_in, 1, (
|
126
|
+
rb_ary_store(new_in, 1, rb_ary_entry(in, 1));
|
54
127
|
|
55
|
-
rb_hash_foreach(val,
|
128
|
+
rb_hash_foreach(val, hn_unnest_closure, new_in);
|
56
129
|
break;
|
57
130
|
}
|
58
131
|
default:
|
59
132
|
{
|
60
133
|
VALUE new_key = rb_str_dup(prefix);
|
134
|
+
hn_buffer_t* c_buf;
|
61
135
|
|
62
136
|
rb_str_append(new_key, key);
|
63
|
-
(
|
64
|
-
|
65
|
-
|
137
|
+
Data_Get_Struct(rb_ary_entry(in, 1), hn_buffer_t, c_buf);
|
138
|
+
|
139
|
+
#ifdef DEBUG
|
140
|
+
{
|
141
|
+
VALUE _str_key = rb_funcall(new_key, rb_intern("inspect"), 0);
|
142
|
+
VALUE _str_val = rb_funcall(val, rb_intern("inspect"), 0);
|
143
|
+
LOG(" adding item %d: %s, %s\n", c_buf->cursor, StringValueCStr(_str_key), StringValueCStr(_str_val));
|
144
|
+
}
|
145
|
+
#endif
|
146
|
+
|
147
|
+
assert(c_buf->magic == BUFFER_MAGIC);
|
148
|
+
assert(c_buf->cursor >= 0);
|
149
|
+
assert(c_buf->cursor < c_buf->size);
|
150
|
+
assert(c_buf->buf[c_buf->cursor].magic == RECORD_MAGIC);
|
151
|
+
c_buf->buf[c_buf->cursor].key = new_key;
|
152
|
+
c_buf->buf[c_buf->cursor].value = val;
|
153
|
+
++(c_buf->cursor);
|
66
154
|
}
|
67
155
|
}
|
68
156
|
return ST_CONTINUE;
|
@@ -71,17 +159,18 @@ static int hash_unnest_closure(VALUE key, VALUE val, VALUE in)
|
|
71
159
|
/******************************************************************************/
|
72
160
|
|
73
161
|
// Recursively counts the number of leaves ("size") of a nested hash.
|
74
|
-
static int
|
162
|
+
static int hn_size_closure(VALUE key, VALUE val, VALUE in)
|
75
163
|
{
|
164
|
+
int* size = NULL;
|
76
165
|
switch(TYPE(val)) {
|
77
166
|
case T_HASH:
|
78
167
|
{
|
79
|
-
rb_hash_foreach(val,
|
168
|
+
rb_hash_foreach(val, hn_size_closure, in);
|
80
169
|
break;
|
81
170
|
}
|
82
171
|
default:
|
83
172
|
{
|
84
|
-
int
|
173
|
+
Data_Get_Struct(in, int, size);
|
85
174
|
*size = *size + 1;
|
86
175
|
}
|
87
176
|
}
|
@@ -90,10 +179,10 @@ static int hash_unnest_size_closure(VALUE key, VALUE val, VALUE in)
|
|
90
179
|
|
91
180
|
/******************************************************************************/
|
92
181
|
|
93
|
-
|
94
|
-
int
|
95
|
-
|
96
|
-
|
182
|
+
/* Compares the `key`s in `hn_entry_t`s, for sorting purposes. */
|
183
|
+
int hn_entry_compare(const void* a, const void* b) {
|
184
|
+
hn_entry_t* record_a = (hn_entry_t*) a;
|
185
|
+
hn_entry_t* record_b = (hn_entry_t*) b;
|
97
186
|
|
98
187
|
LOG("compare '%s' to '%s'\n", StringValueCStr(record_a->key), StringValueCStr(record_b->key));
|
99
188
|
|
@@ -106,21 +195,26 @@ int hn_record_compare(const void* a, const void* b) {
|
|
106
195
|
|
107
196
|
/******************************************************************************/
|
108
197
|
|
109
|
-
static VALUE
|
198
|
+
static VALUE hn_unnest(VALUE self)
|
110
199
|
{
|
111
|
-
VALUE result, in, prefix;
|
112
|
-
|
113
|
-
hn_record_t* buf_ptr;
|
200
|
+
VALUE result, in, buf, prefix;
|
201
|
+
hn_buffer_t* c_buf = NULL;
|
114
202
|
int size = 0;
|
115
203
|
|
116
204
|
#ifdef DEBUG
|
117
205
|
{
|
118
206
|
VALUE _str_self = rb_funcall(self, rb_intern("to_s"), 0);
|
119
|
-
LOG("***
|
207
|
+
LOG("*** hn_unnest (%s)\n", StringValueCStr(_str_self));
|
120
208
|
}
|
121
209
|
#endif
|
122
210
|
|
123
|
-
|
211
|
+
// count leaves in input
|
212
|
+
rb_hash_foreach(
|
213
|
+
self,
|
214
|
+
hn_size_closure,
|
215
|
+
Data_Wrap_Struct(hn_int_type, NULL, NULL, &size)
|
216
|
+
);
|
217
|
+
|
124
218
|
#ifdef DEBUG
|
125
219
|
{
|
126
220
|
VALUE _str_self = rb_funcall(self, rb_intern("to_s"), 0);
|
@@ -128,28 +222,37 @@ static VALUE hash_unnest_unnest(VALUE self)
|
|
128
222
|
}
|
129
223
|
#endif
|
130
224
|
|
131
|
-
|
132
|
-
buf_ptr = buf;
|
133
|
-
|
225
|
+
// unnest `self` into `buf`
|
134
226
|
prefix = rb_str_new("", 0);
|
227
|
+
buf = hn_buffer_alloc(size);
|
135
228
|
|
136
229
|
in = rb_ary_new2(2);
|
137
230
|
rb_ary_store(in, 0, prefix);
|
138
|
-
rb_ary_store(in, 1,
|
139
|
-
|
140
|
-
rb_hash_foreach(self,
|
231
|
+
rb_ary_store(in, 1, buf);
|
232
|
+
|
233
|
+
rb_hash_foreach(self, hn_unnest_closure, in);
|
141
234
|
|
142
|
-
|
235
|
+
// sort the C array of key, value pairs
|
236
|
+
Data_Get_Struct(buf, hn_buffer_t, c_buf);
|
237
|
+
assert(c_buf != NULL);
|
238
|
+
assert(c_buf->buf != NULL);
|
239
|
+
assert(c_buf->magic == BUFFER_MAGIC);
|
240
|
+
assert(c_buf->cursor == size);
|
143
241
|
|
242
|
+
qsort((void*) c_buf->buf, c_buf->cursor, sizeof(hn_entry_t), hn_entry_compare);
|
243
|
+
|
244
|
+
// copy the array into a new hash
|
144
245
|
result = rb_hash_new();
|
145
|
-
for (int k = 0; k < size; ++k) {
|
146
|
-
hn_record_t entry = buf[k];
|
147
246
|
|
247
|
+
for (int k = 0; k < c_buf->cursor; ++k) {
|
248
|
+
hn_entry_t entry = c_buf->buf[k];
|
249
|
+
|
250
|
+
assert(entry.magic == RECORD_MAGIC);
|
251
|
+
assert(entry.key != Qnil);
|
252
|
+
assert(entry.value != Qnil);
|
148
253
|
rb_hash_aset(result, entry.key, entry.value);
|
149
254
|
}
|
150
255
|
|
151
|
-
free(buf);
|
152
|
-
|
153
256
|
return result;
|
154
257
|
}
|
155
258
|
|
@@ -158,10 +261,17 @@ static VALUE hash_unnest_unnest(VALUE self)
|
|
158
261
|
void Init_hash_unnest(void) {
|
159
262
|
LOG("*** Init_hash_unnest\n");
|
160
263
|
|
264
|
+
LOG("sizeof(VALUE) = %lu\n", sizeof(VALUE));
|
265
|
+
LOG("sizeof(int*) = %lu\n", sizeof(int*));
|
266
|
+
assert(sizeof(VALUE) == sizeof(int*));
|
267
|
+
|
161
268
|
/* assume we haven't yet defined hash_unnest */
|
162
|
-
|
163
|
-
|
269
|
+
hn_hash_unnest_m = rb_define_module("HashUnnest");
|
270
|
+
hn_buffer_type = rb_define_class_under(hn_hash_unnest_m, "CHnBuf", rb_cObject);
|
271
|
+
hn_int_type = rb_define_class_under(hn_hash_unnest_m, "CInt", rb_cObject);
|
272
|
+
|
273
|
+
assert(hn_hash_unnest_m != Qnil);
|
164
274
|
|
165
|
-
rb_define_method(
|
275
|
+
rb_define_method(hn_hash_unnest_m, "unnest_c", hn_unnest, 0);
|
166
276
|
return;
|
167
277
|
}
|
data/lib/hash_unnest/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hash_unnest
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Julien Letessier
|
@@ -100,7 +100,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
100
100
|
version: '0'
|
101
101
|
requirements: []
|
102
102
|
rubyforge_project:
|
103
|
-
rubygems_version: 2.
|
103
|
+
rubygems_version: 2.6.14
|
104
104
|
signing_key:
|
105
105
|
specification_version: 4
|
106
106
|
summary: Fast hash unnesting
|