hash_unnest 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA256:
3
- metadata.gz: c988aea31df14ef50afcaf73818f844a0ab61aca55fcf0614425a95441a6e810
4
- data.tar.gz: 0f47f4110f2ee9a2ba2bd1133d8d55ec0c7c44135b569abc3efae44f6a18790f
2
+ SHA1:
3
+ metadata.gz: 3bcf3dbb4e02471f88372a12cf1908cb623b1c98
4
+ data.tar.gz: 11f945786febf21b933472abc145e2399bc950b1
5
5
  SHA512:
6
- metadata.gz: d75dfe4e4d93a2ba794d0b0d4cfac7120aca446372063af6e012dee0ffcfcfd6a8ece663394359d4b273dac3ef74f7f039ac0a4b28d80767fa0c1264047b3bac
7
- data.tar.gz: 98252c49b5744768268406c302441eb3654daf8c120cfb9d8ad605df97d22176591a20000616ec46afca8b933517a17bbcf565f15c90cdc5719a16af35438dc6
6
+ metadata.gz: ea4c0c8e5b46c13a33c482d6601f1a3e59132f4a061b6f9e3249db98b862c096fd813e3b6992bdf134f3d123faf1be6bb185030c3f6b6860a4cecf473f16cb15
7
+ data.tar.gz: 3368483ac0fe6f3247d8050b37eb114571573cd57bde99c16e174b096cf696eb57af470d7edf08b9b50ab28509ae5a1a6c0d25ec74f761b396ad5cc9033030cc
@@ -4,9 +4,10 @@ PLATFORM = `uname`.strip.upcase
4
4
  SHARED_FLAGS = "--std=c99 -Wall -Wextra -Werror"
5
5
 
6
6
  # production
7
- $CFLAGS += " #{SHARED_FLAGS} -Os"
7
+ $CFLAGS += " #{SHARED_FLAGS} -Os -g"
8
8
 
9
9
  # development
10
10
  # $CFLAGS += " #{SHARED_FLAGS} -O0 -g -DDEBUG"
11
+ # $CFLAGS += " #{SHARED_FLAGS} -Os -g"
11
12
 
12
13
  create_makefile('hash_unnest/hash_unnest')
@@ -2,17 +2,90 @@
2
2
  #include <assert.h>
3
3
  #include "hash_unnest.h"
4
4
 
5
+ #define RECORD_MAGIC 0xF00DF00D
6
+ #define BUFFER_MAGIC 0xBA0BAB00
7
+
5
8
  // forward declarataions of closures
6
- static int hash_unnest_size_closure(VALUE key, VALUE val, VALUE in);
7
- static int hash_unnest_closure(VALUE key, VALUE val, VALUE in);
9
+ static int hn_size_closure(VALUE key, VALUE val, VALUE in);
10
+ static int hn_unnest_closure(VALUE key, VALUE val, VALUE in);
11
+
12
+ static VALUE hn_hash_unnest_m = Qnil;
8
13
 
9
- static VALUE eHashUnnestModule = Qnil;
14
+ static VALUE hn_buffer_type = Qnil;
15
+ static VALUE hn_int_type = Qnil;
10
16
 
11
17
  // a (sortable) key-value entry
12
18
  typedef struct {
19
+ #ifndef NDEBUG
20
+ unsigned int magic;
21
+ #endif
13
22
  VALUE key;
14
23
  VALUE value;
15
- } hn_record_t;
24
+ } hn_entry_t;
25
+
26
+ // a safe C array of key, value pairs
27
+ typedef struct {
28
+ #ifndef NDEBUG
29
+ unsigned int magic;
30
+ #endif
31
+ int size;
32
+ int cursor;
33
+ hn_entry_t* buf;
34
+ } hn_buffer_t;
35
+
36
+ /******************************************************************************/
37
+
38
+ static void hn_buffer_free(hn_buffer_t* buf) {
39
+ LOG("*** hn_buffer_free\n");
40
+
41
+ xfree(buf->buf);
42
+ buf->buf = NULL;
43
+ #ifndef NDEBUG
44
+ buf->magic = 0;
45
+ #endif
46
+ xfree(buf);
47
+ }
48
+
49
+ static void hn_buffer_mark(hn_buffer_t* buf) {
50
+ LOG("*** hn_buffer_mark\n");
51
+
52
+ assert(buf->magic == BUFFER_MAGIC);
53
+
54
+ for (int k = 0; k < buf->cursor; ++k) {
55
+ LOG(" rb_gc_mark '%s'\n", StringValueCStr(buf->buf[k].key));
56
+ rb_gc_mark(buf->buf[k].key);
57
+ rb_gc_mark(buf->buf[k].value);
58
+ }
59
+ }
60
+
61
+ static VALUE hn_buffer_alloc(int size) {
62
+ hn_buffer_t* buf = NULL;
63
+
64
+ buf = ALLOC(hn_buffer_t);
65
+ #ifndef NDEBUG
66
+ buf->magic = BUFFER_MAGIC;
67
+ #endif
68
+ buf->buf = ALLOC_N(hn_entry_t, size+1);
69
+ buf->size = size;
70
+ buf->cursor = 0;
71
+
72
+ for (int k = 0; k < size; ++k) {
73
+ #ifndef NDEBUG
74
+ buf->buf[k].magic = RECORD_MAGIC;
75
+ #endif
76
+ buf->buf[k].key = Qnil;
77
+ buf->buf[k].value = Qnil;
78
+ }
79
+
80
+ // sentinel entry
81
+ #ifndef NDEBUG
82
+ buf->buf[size].magic = 0;
83
+ #endif
84
+ buf->buf[size].key = Qnil;
85
+ buf->buf[size].value = Qnil;
86
+
87
+ return Data_Wrap_Struct(hn_buffer_type, hn_buffer_mark, hn_buffer_free, buf);
88
+ }
16
89
 
17
90
  /******************************************************************************/
18
91
 
@@ -27,17 +100,17 @@ typedef struct {
27
100
  // - When `val` is a hash, call ourselves recursively, appending the
28
101
  // current key to the prefix.
29
102
  //
30
- static int hash_unnest_closure(VALUE key, VALUE val, VALUE in)
103
+ static int hn_unnest_closure(VALUE key, VALUE val, VALUE in)
31
104
  {
32
105
  VALUE prefix = rb_ary_entry(in, 0);
33
- hn_record_t** output = (hn_record_t**) rb_ary_entry(in, 1);
34
106
 
35
107
  #ifdef DEBUG
36
108
  VALUE _str_key = rb_funcall(key, rb_intern("inspect"), 0);
37
109
  VALUE _str_val = rb_funcall(val, rb_intern("inspect"), 0);
38
- LOG("*** hash_unnest_closure (%s,%s,'%s')\n", StringValueCStr(_str_key), StringValueCStr(_str_val), StringValueCStr(prefix));
110
+ LOG("*** hn_unnest_closure (%s,%s,'%s')\n", StringValueCStr(_str_key), StringValueCStr(_str_val), StringValueCStr(prefix));
39
111
  #endif
40
112
 
113
+
41
114
  switch(TYPE(val)) {
42
115
  case T_HASH:
43
116
  {
@@ -50,19 +123,34 @@ static int hash_unnest_closure(VALUE key, VALUE val, VALUE in)
50
123
 
51
124
  new_in = rb_ary_new2(2);
52
125
  rb_ary_store(new_in, 0, new_prefix);
53
- rb_ary_store(new_in, 1, (VALUE) output);
126
+ rb_ary_store(new_in, 1, rb_ary_entry(in, 1));
54
127
 
55
- rb_hash_foreach(val, hash_unnest_closure, new_in);
128
+ rb_hash_foreach(val, hn_unnest_closure, new_in);
56
129
  break;
57
130
  }
58
131
  default:
59
132
  {
60
133
  VALUE new_key = rb_str_dup(prefix);
134
+ hn_buffer_t* c_buf;
61
135
 
62
136
  rb_str_append(new_key, key);
63
- (*output)->key = new_key;
64
- (*output)->value = val;
65
- *output = *output + 1;
137
+ Data_Get_Struct(rb_ary_entry(in, 1), hn_buffer_t, c_buf);
138
+
139
+ #ifdef DEBUG
140
+ {
141
+ VALUE _str_key = rb_funcall(new_key, rb_intern("inspect"), 0);
142
+ VALUE _str_val = rb_funcall(val, rb_intern("inspect"), 0);
143
+ LOG(" adding item %d: %s, %s\n", c_buf->cursor, StringValueCStr(_str_key), StringValueCStr(_str_val));
144
+ }
145
+ #endif
146
+
147
+ assert(c_buf->magic == BUFFER_MAGIC);
148
+ assert(c_buf->cursor >= 0);
149
+ assert(c_buf->cursor < c_buf->size);
150
+ assert(c_buf->buf[c_buf->cursor].magic == RECORD_MAGIC);
151
+ c_buf->buf[c_buf->cursor].key = new_key;
152
+ c_buf->buf[c_buf->cursor].value = val;
153
+ ++(c_buf->cursor);
66
154
  }
67
155
  }
68
156
  return ST_CONTINUE;
@@ -71,17 +159,18 @@ static int hash_unnest_closure(VALUE key, VALUE val, VALUE in)
71
159
  /******************************************************************************/
72
160
 
73
161
  // Recursively counts the number of leaves ("size") of a nested hash.
74
- static int hash_unnest_size_closure(VALUE key, VALUE val, VALUE in)
162
+ static int hn_size_closure(VALUE key, VALUE val, VALUE in)
75
163
  {
164
+ int* size = NULL;
76
165
  switch(TYPE(val)) {
77
166
  case T_HASH:
78
167
  {
79
- rb_hash_foreach(val, hash_unnest_size_closure, in);
168
+ rb_hash_foreach(val, hn_size_closure, in);
80
169
  break;
81
170
  }
82
171
  default:
83
172
  {
84
- int* size = (int*) in;
173
+ Data_Get_Struct(in, int, size);
85
174
  *size = *size + 1;
86
175
  }
87
176
  }
@@ -90,10 +179,10 @@ static int hash_unnest_size_closure(VALUE key, VALUE val, VALUE in)
90
179
 
91
180
  /******************************************************************************/
92
181
 
93
- // Compares the `key`s in `hn_record_t`s, for sorting purposes.
94
- int hn_record_compare(const void* a, const void* b) {
95
- hn_record_t* record_a = (hn_record_t*) a;
96
- hn_record_t* record_b = (hn_record_t*) b;
182
+ /* Compares the `key`s in `hn_entry_t`s, for sorting purposes. */
183
+ int hn_entry_compare(const void* a, const void* b) {
184
+ hn_entry_t* record_a = (hn_entry_t*) a;
185
+ hn_entry_t* record_b = (hn_entry_t*) b;
97
186
 
98
187
  LOG("compare '%s' to '%s'\n", StringValueCStr(record_a->key), StringValueCStr(record_b->key));
99
188
 
@@ -106,21 +195,26 @@ int hn_record_compare(const void* a, const void* b) {
106
195
 
107
196
  /******************************************************************************/
108
197
 
109
- static VALUE hash_unnest_unnest(VALUE self)
198
+ static VALUE hn_unnest(VALUE self)
110
199
  {
111
- VALUE result, in, prefix;
112
- hn_record_t* buf;
113
- hn_record_t* buf_ptr;
200
+ VALUE result, in, buf, prefix;
201
+ hn_buffer_t* c_buf = NULL;
114
202
  int size = 0;
115
203
 
116
204
  #ifdef DEBUG
117
205
  {
118
206
  VALUE _str_self = rb_funcall(self, rb_intern("to_s"), 0);
119
- LOG("*** hash_unnest_unnest (%s)\n", StringValueCStr(_str_self));
207
+ LOG("*** hn_unnest (%s)\n", StringValueCStr(_str_self));
120
208
  }
121
209
  #endif
122
210
 
123
- rb_hash_foreach(self, hash_unnest_size_closure, (VALUE) &size);
211
+ // count leaves in input
212
+ rb_hash_foreach(
213
+ self,
214
+ hn_size_closure,
215
+ Data_Wrap_Struct(hn_int_type, NULL, NULL, &size)
216
+ );
217
+
124
218
  #ifdef DEBUG
125
219
  {
126
220
  VALUE _str_self = rb_funcall(self, rb_intern("to_s"), 0);
@@ -128,28 +222,37 @@ static VALUE hash_unnest_unnest(VALUE self)
128
222
  }
129
223
  #endif
130
224
 
131
- buf = (hn_record_t*) malloc(size * sizeof(hn_record_t));
132
- buf_ptr = buf;
133
-
225
+ // unnest `self` into `buf`
134
226
  prefix = rb_str_new("", 0);
227
+ buf = hn_buffer_alloc(size);
135
228
 
136
229
  in = rb_ary_new2(2);
137
230
  rb_ary_store(in, 0, prefix);
138
- rb_ary_store(in, 1, (VALUE) &buf_ptr);
139
-
140
- rb_hash_foreach(self, hash_unnest_closure, in);
231
+ rb_ary_store(in, 1, buf);
232
+
233
+ rb_hash_foreach(self, hn_unnest_closure, in);
141
234
 
142
- qsort((void*) buf, size, sizeof(hn_record_t), hn_record_compare);
235
+ // sort the C array of key, value pairs
236
+ Data_Get_Struct(buf, hn_buffer_t, c_buf);
237
+ assert(c_buf != NULL);
238
+ assert(c_buf->buf != NULL);
239
+ assert(c_buf->magic == BUFFER_MAGIC);
240
+ assert(c_buf->cursor == size);
143
241
 
242
+ qsort((void*) c_buf->buf, c_buf->cursor, sizeof(hn_entry_t), hn_entry_compare);
243
+
244
+ // copy the array into a new hash
144
245
  result = rb_hash_new();
145
- for (int k = 0; k < size; ++k) {
146
- hn_record_t entry = buf[k];
147
246
 
247
+ for (int k = 0; k < c_buf->cursor; ++k) {
248
+ hn_entry_t entry = c_buf->buf[k];
249
+
250
+ assert(entry.magic == RECORD_MAGIC);
251
+ assert(entry.key != Qnil);
252
+ assert(entry.value != Qnil);
148
253
  rb_hash_aset(result, entry.key, entry.value);
149
254
  }
150
255
 
151
- free(buf);
152
-
153
256
  return result;
154
257
  }
155
258
 
@@ -158,10 +261,17 @@ static VALUE hash_unnest_unnest(VALUE self)
158
261
  void Init_hash_unnest(void) {
159
262
  LOG("*** Init_hash_unnest\n");
160
263
 
264
+ LOG("sizeof(VALUE) = %lu\n", sizeof(VALUE));
265
+ LOG("sizeof(int*) = %lu\n", sizeof(int*));
266
+ assert(sizeof(VALUE) == sizeof(int*));
267
+
161
268
  /* assume we haven't yet defined hash_unnest */
162
- eHashUnnestModule = rb_define_module("HashUnnest");
163
- assert(eHashUnnestModule != Qnil);
269
+ hn_hash_unnest_m = rb_define_module("HashUnnest");
270
+ hn_buffer_type = rb_define_class_under(hn_hash_unnest_m, "CHnBuf", rb_cObject);
271
+ hn_int_type = rb_define_class_under(hn_hash_unnest_m, "CInt", rb_cObject);
272
+
273
+ assert(hn_hash_unnest_m != Qnil);
164
274
 
165
- rb_define_method(eHashUnnestModule, "unnest_c", hash_unnest_unnest, 0);
275
+ rb_define_method(hn_hash_unnest_m, "unnest_c", hn_unnest, 0);
166
276
  return;
167
277
  }
@@ -1,4 +1,4 @@
1
1
  module HashUnnest
2
- VERSION = '1.0.0'.freeze
2
+ VERSION = '1.0.1'.freeze
3
3
  end
4
4
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: hash_unnest
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.0
4
+ version: 1.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Julien Letessier
@@ -100,7 +100,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
100
100
  version: '0'
101
101
  requirements: []
102
102
  rubyforge_project:
103
- rubygems_version: 2.7.6
103
+ rubygems_version: 2.6.14
104
104
  signing_key:
105
105
  specification_version: 4
106
106
  summary: Fast hash unnesting