hash_unnest 1.0.0 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/ext/hash_unnest/extconf.rb +2 -1
- data/ext/hash_unnest/hash_unnest.c +149 -39
- data/lib/hash_unnest/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 3bcf3dbb4e02471f88372a12cf1908cb623b1c98
|
4
|
+
data.tar.gz: 11f945786febf21b933472abc145e2399bc950b1
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ea4c0c8e5b46c13a33c482d6601f1a3e59132f4a061b6f9e3249db98b862c096fd813e3b6992bdf134f3d123faf1be6bb185030c3f6b6860a4cecf473f16cb15
|
7
|
+
data.tar.gz: 3368483ac0fe6f3247d8050b37eb114571573cd57bde99c16e174b096cf696eb57af470d7edf08b9b50ab28509ae5a1a6c0d25ec74f761b396ad5cc9033030cc
|
data/ext/hash_unnest/extconf.rb
CHANGED
@@ -4,9 +4,10 @@ PLATFORM = `uname`.strip.upcase
|
|
4
4
|
SHARED_FLAGS = "--std=c99 -Wall -Wextra -Werror"
|
5
5
|
|
6
6
|
# production
|
7
|
-
$CFLAGS += " #{SHARED_FLAGS} -Os"
|
7
|
+
$CFLAGS += " #{SHARED_FLAGS} -Os -g"
|
8
8
|
|
9
9
|
# development
|
10
10
|
# $CFLAGS += " #{SHARED_FLAGS} -O0 -g -DDEBUG"
|
11
|
+
# $CFLAGS += " #{SHARED_FLAGS} -Os -g"
|
11
12
|
|
12
13
|
create_makefile('hash_unnest/hash_unnest')
|
@@ -2,17 +2,90 @@
|
|
2
2
|
#include <assert.h>
|
3
3
|
#include "hash_unnest.h"
|
4
4
|
|
5
|
+
#define RECORD_MAGIC 0xF00DF00D
|
6
|
+
#define BUFFER_MAGIC 0xBA0BAB00
|
7
|
+
|
5
8
|
// forward declarataions of closures
|
6
|
-
static int
|
7
|
-
static int
|
9
|
+
static int hn_size_closure(VALUE key, VALUE val, VALUE in);
|
10
|
+
static int hn_unnest_closure(VALUE key, VALUE val, VALUE in);
|
11
|
+
|
12
|
+
static VALUE hn_hash_unnest_m = Qnil;
|
8
13
|
|
9
|
-
static VALUE
|
14
|
+
static VALUE hn_buffer_type = Qnil;
|
15
|
+
static VALUE hn_int_type = Qnil;
|
10
16
|
|
11
17
|
// a (sortable) key-value entry
|
12
18
|
typedef struct {
|
19
|
+
#ifndef NDEBUG
|
20
|
+
unsigned int magic;
|
21
|
+
#endif
|
13
22
|
VALUE key;
|
14
23
|
VALUE value;
|
15
|
-
}
|
24
|
+
} hn_entry_t;
|
25
|
+
|
26
|
+
// a safe C array of key, value pairs
|
27
|
+
typedef struct {
|
28
|
+
#ifndef NDEBUG
|
29
|
+
unsigned int magic;
|
30
|
+
#endif
|
31
|
+
int size;
|
32
|
+
int cursor;
|
33
|
+
hn_entry_t* buf;
|
34
|
+
} hn_buffer_t;
|
35
|
+
|
36
|
+
/******************************************************************************/
|
37
|
+
|
38
|
+
static void hn_buffer_free(hn_buffer_t* buf) {
|
39
|
+
LOG("*** hn_buffer_free\n");
|
40
|
+
|
41
|
+
xfree(buf->buf);
|
42
|
+
buf->buf = NULL;
|
43
|
+
#ifndef NDEBUG
|
44
|
+
buf->magic = 0;
|
45
|
+
#endif
|
46
|
+
xfree(buf);
|
47
|
+
}
|
48
|
+
|
49
|
+
static void hn_buffer_mark(hn_buffer_t* buf) {
|
50
|
+
LOG("*** hn_buffer_mark\n");
|
51
|
+
|
52
|
+
assert(buf->magic == BUFFER_MAGIC);
|
53
|
+
|
54
|
+
for (int k = 0; k < buf->cursor; ++k) {
|
55
|
+
LOG(" rb_gc_mark '%s'\n", StringValueCStr(buf->buf[k].key));
|
56
|
+
rb_gc_mark(buf->buf[k].key);
|
57
|
+
rb_gc_mark(buf->buf[k].value);
|
58
|
+
}
|
59
|
+
}
|
60
|
+
|
61
|
+
static VALUE hn_buffer_alloc(int size) {
|
62
|
+
hn_buffer_t* buf = NULL;
|
63
|
+
|
64
|
+
buf = ALLOC(hn_buffer_t);
|
65
|
+
#ifndef NDEBUG
|
66
|
+
buf->magic = BUFFER_MAGIC;
|
67
|
+
#endif
|
68
|
+
buf->buf = ALLOC_N(hn_entry_t, size+1);
|
69
|
+
buf->size = size;
|
70
|
+
buf->cursor = 0;
|
71
|
+
|
72
|
+
for (int k = 0; k < size; ++k) {
|
73
|
+
#ifndef NDEBUG
|
74
|
+
buf->buf[k].magic = RECORD_MAGIC;
|
75
|
+
#endif
|
76
|
+
buf->buf[k].key = Qnil;
|
77
|
+
buf->buf[k].value = Qnil;
|
78
|
+
}
|
79
|
+
|
80
|
+
// sentinel entry
|
81
|
+
#ifndef NDEBUG
|
82
|
+
buf->buf[size].magic = 0;
|
83
|
+
#endif
|
84
|
+
buf->buf[size].key = Qnil;
|
85
|
+
buf->buf[size].value = Qnil;
|
86
|
+
|
87
|
+
return Data_Wrap_Struct(hn_buffer_type, hn_buffer_mark, hn_buffer_free, buf);
|
88
|
+
}
|
16
89
|
|
17
90
|
/******************************************************************************/
|
18
91
|
|
@@ -27,17 +100,17 @@ typedef struct {
|
|
27
100
|
// - When `val` is a hash, call ourselves recursively, appending the
|
28
101
|
// current key to the prefix.
|
29
102
|
//
|
30
|
-
static int
|
103
|
+
static int hn_unnest_closure(VALUE key, VALUE val, VALUE in)
|
31
104
|
{
|
32
105
|
VALUE prefix = rb_ary_entry(in, 0);
|
33
|
-
hn_record_t** output = (hn_record_t**) rb_ary_entry(in, 1);
|
34
106
|
|
35
107
|
#ifdef DEBUG
|
36
108
|
VALUE _str_key = rb_funcall(key, rb_intern("inspect"), 0);
|
37
109
|
VALUE _str_val = rb_funcall(val, rb_intern("inspect"), 0);
|
38
|
-
LOG("***
|
110
|
+
LOG("*** hn_unnest_closure (%s,%s,'%s')\n", StringValueCStr(_str_key), StringValueCStr(_str_val), StringValueCStr(prefix));
|
39
111
|
#endif
|
40
112
|
|
113
|
+
|
41
114
|
switch(TYPE(val)) {
|
42
115
|
case T_HASH:
|
43
116
|
{
|
@@ -50,19 +123,34 @@ static int hash_unnest_closure(VALUE key, VALUE val, VALUE in)
|
|
50
123
|
|
51
124
|
new_in = rb_ary_new2(2);
|
52
125
|
rb_ary_store(new_in, 0, new_prefix);
|
53
|
-
rb_ary_store(new_in, 1, (
|
126
|
+
rb_ary_store(new_in, 1, rb_ary_entry(in, 1));
|
54
127
|
|
55
|
-
rb_hash_foreach(val,
|
128
|
+
rb_hash_foreach(val, hn_unnest_closure, new_in);
|
56
129
|
break;
|
57
130
|
}
|
58
131
|
default:
|
59
132
|
{
|
60
133
|
VALUE new_key = rb_str_dup(prefix);
|
134
|
+
hn_buffer_t* c_buf;
|
61
135
|
|
62
136
|
rb_str_append(new_key, key);
|
63
|
-
(
|
64
|
-
|
65
|
-
|
137
|
+
Data_Get_Struct(rb_ary_entry(in, 1), hn_buffer_t, c_buf);
|
138
|
+
|
139
|
+
#ifdef DEBUG
|
140
|
+
{
|
141
|
+
VALUE _str_key = rb_funcall(new_key, rb_intern("inspect"), 0);
|
142
|
+
VALUE _str_val = rb_funcall(val, rb_intern("inspect"), 0);
|
143
|
+
LOG(" adding item %d: %s, %s\n", c_buf->cursor, StringValueCStr(_str_key), StringValueCStr(_str_val));
|
144
|
+
}
|
145
|
+
#endif
|
146
|
+
|
147
|
+
assert(c_buf->magic == BUFFER_MAGIC);
|
148
|
+
assert(c_buf->cursor >= 0);
|
149
|
+
assert(c_buf->cursor < c_buf->size);
|
150
|
+
assert(c_buf->buf[c_buf->cursor].magic == RECORD_MAGIC);
|
151
|
+
c_buf->buf[c_buf->cursor].key = new_key;
|
152
|
+
c_buf->buf[c_buf->cursor].value = val;
|
153
|
+
++(c_buf->cursor);
|
66
154
|
}
|
67
155
|
}
|
68
156
|
return ST_CONTINUE;
|
@@ -71,17 +159,18 @@ static int hash_unnest_closure(VALUE key, VALUE val, VALUE in)
|
|
71
159
|
/******************************************************************************/
|
72
160
|
|
73
161
|
// Recursively counts the number of leaves ("size") of a nested hash.
|
74
|
-
static int
|
162
|
+
static int hn_size_closure(VALUE key, VALUE val, VALUE in)
|
75
163
|
{
|
164
|
+
int* size = NULL;
|
76
165
|
switch(TYPE(val)) {
|
77
166
|
case T_HASH:
|
78
167
|
{
|
79
|
-
rb_hash_foreach(val,
|
168
|
+
rb_hash_foreach(val, hn_size_closure, in);
|
80
169
|
break;
|
81
170
|
}
|
82
171
|
default:
|
83
172
|
{
|
84
|
-
int
|
173
|
+
Data_Get_Struct(in, int, size);
|
85
174
|
*size = *size + 1;
|
86
175
|
}
|
87
176
|
}
|
@@ -90,10 +179,10 @@ static int hash_unnest_size_closure(VALUE key, VALUE val, VALUE in)
|
|
90
179
|
|
91
180
|
/******************************************************************************/
|
92
181
|
|
93
|
-
|
94
|
-
int
|
95
|
-
|
96
|
-
|
182
|
+
/* Compares the `key`s in `hn_entry_t`s, for sorting purposes. */
|
183
|
+
int hn_entry_compare(const void* a, const void* b) {
|
184
|
+
hn_entry_t* record_a = (hn_entry_t*) a;
|
185
|
+
hn_entry_t* record_b = (hn_entry_t*) b;
|
97
186
|
|
98
187
|
LOG("compare '%s' to '%s'\n", StringValueCStr(record_a->key), StringValueCStr(record_b->key));
|
99
188
|
|
@@ -106,21 +195,26 @@ int hn_record_compare(const void* a, const void* b) {
|
|
106
195
|
|
107
196
|
/******************************************************************************/
|
108
197
|
|
109
|
-
static VALUE
|
198
|
+
static VALUE hn_unnest(VALUE self)
|
110
199
|
{
|
111
|
-
VALUE result, in, prefix;
|
112
|
-
|
113
|
-
hn_record_t* buf_ptr;
|
200
|
+
VALUE result, in, buf, prefix;
|
201
|
+
hn_buffer_t* c_buf = NULL;
|
114
202
|
int size = 0;
|
115
203
|
|
116
204
|
#ifdef DEBUG
|
117
205
|
{
|
118
206
|
VALUE _str_self = rb_funcall(self, rb_intern("to_s"), 0);
|
119
|
-
LOG("***
|
207
|
+
LOG("*** hn_unnest (%s)\n", StringValueCStr(_str_self));
|
120
208
|
}
|
121
209
|
#endif
|
122
210
|
|
123
|
-
|
211
|
+
// count leaves in input
|
212
|
+
rb_hash_foreach(
|
213
|
+
self,
|
214
|
+
hn_size_closure,
|
215
|
+
Data_Wrap_Struct(hn_int_type, NULL, NULL, &size)
|
216
|
+
);
|
217
|
+
|
124
218
|
#ifdef DEBUG
|
125
219
|
{
|
126
220
|
VALUE _str_self = rb_funcall(self, rb_intern("to_s"), 0);
|
@@ -128,28 +222,37 @@ static VALUE hash_unnest_unnest(VALUE self)
|
|
128
222
|
}
|
129
223
|
#endif
|
130
224
|
|
131
|
-
|
132
|
-
buf_ptr = buf;
|
133
|
-
|
225
|
+
// unnest `self` into `buf`
|
134
226
|
prefix = rb_str_new("", 0);
|
227
|
+
buf = hn_buffer_alloc(size);
|
135
228
|
|
136
229
|
in = rb_ary_new2(2);
|
137
230
|
rb_ary_store(in, 0, prefix);
|
138
|
-
rb_ary_store(in, 1,
|
139
|
-
|
140
|
-
rb_hash_foreach(self,
|
231
|
+
rb_ary_store(in, 1, buf);
|
232
|
+
|
233
|
+
rb_hash_foreach(self, hn_unnest_closure, in);
|
141
234
|
|
142
|
-
|
235
|
+
// sort the C array of key, value pairs
|
236
|
+
Data_Get_Struct(buf, hn_buffer_t, c_buf);
|
237
|
+
assert(c_buf != NULL);
|
238
|
+
assert(c_buf->buf != NULL);
|
239
|
+
assert(c_buf->magic == BUFFER_MAGIC);
|
240
|
+
assert(c_buf->cursor == size);
|
143
241
|
|
242
|
+
qsort((void*) c_buf->buf, c_buf->cursor, sizeof(hn_entry_t), hn_entry_compare);
|
243
|
+
|
244
|
+
// copy the array into a new hash
|
144
245
|
result = rb_hash_new();
|
145
|
-
for (int k = 0; k < size; ++k) {
|
146
|
-
hn_record_t entry = buf[k];
|
147
246
|
|
247
|
+
for (int k = 0; k < c_buf->cursor; ++k) {
|
248
|
+
hn_entry_t entry = c_buf->buf[k];
|
249
|
+
|
250
|
+
assert(entry.magic == RECORD_MAGIC);
|
251
|
+
assert(entry.key != Qnil);
|
252
|
+
assert(entry.value != Qnil);
|
148
253
|
rb_hash_aset(result, entry.key, entry.value);
|
149
254
|
}
|
150
255
|
|
151
|
-
free(buf);
|
152
|
-
|
153
256
|
return result;
|
154
257
|
}
|
155
258
|
|
@@ -158,10 +261,17 @@ static VALUE hash_unnest_unnest(VALUE self)
|
|
158
261
|
void Init_hash_unnest(void) {
|
159
262
|
LOG("*** Init_hash_unnest\n");
|
160
263
|
|
264
|
+
LOG("sizeof(VALUE) = %lu\n", sizeof(VALUE));
|
265
|
+
LOG("sizeof(int*) = %lu\n", sizeof(int*));
|
266
|
+
assert(sizeof(VALUE) == sizeof(int*));
|
267
|
+
|
161
268
|
/* assume we haven't yet defined hash_unnest */
|
162
|
-
|
163
|
-
|
269
|
+
hn_hash_unnest_m = rb_define_module("HashUnnest");
|
270
|
+
hn_buffer_type = rb_define_class_under(hn_hash_unnest_m, "CHnBuf", rb_cObject);
|
271
|
+
hn_int_type = rb_define_class_under(hn_hash_unnest_m, "CInt", rb_cObject);
|
272
|
+
|
273
|
+
assert(hn_hash_unnest_m != Qnil);
|
164
274
|
|
165
|
-
rb_define_method(
|
275
|
+
rb_define_method(hn_hash_unnest_m, "unnest_c", hn_unnest, 0);
|
166
276
|
return;
|
167
277
|
}
|
data/lib/hash_unnest/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: hash_unnest
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Julien Letessier
|
@@ -100,7 +100,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
100
100
|
version: '0'
|
101
101
|
requirements: []
|
102
102
|
rubyforge_project:
|
103
|
-
rubygems_version: 2.
|
103
|
+
rubygems_version: 2.6.14
|
104
104
|
signing_key:
|
105
105
|
specification_version: 4
|
106
106
|
summary: Fast hash unnesting
|