fast-xml 1.0.1 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +145 -37
- data/ext/fastxml/ccan/build_assert/build_assert.h +40 -0
- data/ext/fastxml/ccan/check_type/check_type.h +63 -0
- data/ext/fastxml/ccan/container_of/container_of.h +142 -0
- data/ext/fastxml/ccan/list/list.h +773 -0
- data/ext/fastxml/ccan/str/str.h +16 -0
- data/ext/fastxml/fastxml.c +35 -2
- data/ext/fastxml/xh.c +19 -8
- data/ext/fastxml/xh.h +2 -1
- data/ext/fastxml/xh_config.h +3 -0
- data/ext/fastxml/xh_core.h +1 -5
- data/ext/fastxml/xh_log.h +37 -27
- data/ext/fastxml/xh_param.c +3 -11
- data/ext/fastxml/xh_param.h +1 -1
- data/ext/fastxml/xh_reader.c +528 -0
- data/ext/fastxml/xh_reader.h +43 -0
- data/ext/fastxml/xh_ruby_hash.h +384 -0
- data/ext/fastxml/xh_x2h.c +1002 -0
- data/ext/fastxml/xh_x2h.h +133 -0
- data/lib/fastxml/version.rb +1 -1
- metadata +13 -3
@@ -0,0 +1,384 @@
|
|
1
|
+
#ifndef _XH_RUBY_HASH_H_
|
2
|
+
#define _XH_RUBY_HASH_H_
|
3
|
+
|
4
|
+
#include "xh_config.h"
|
5
|
+
#include "xh_core.h"
|
6
|
+
#include "ccan/list/list.h"
|
7
|
+
|
8
|
+
struct RHash {
|
9
|
+
struct RBasic basic;
|
10
|
+
struct st_table *ntbl; /* possibly 0 */
|
11
|
+
int iter_lev;
|
12
|
+
const VALUE ifnone;
|
13
|
+
};
|
14
|
+
|
15
|
+
#define RHASH(obj) (R_CAST(RHash)(obj))
|
16
|
+
|
17
|
+
#ifdef RHASH_ITER_LEV
|
18
|
+
#undef RHASH_ITER_LEV
|
19
|
+
#undef RHASH_IFNONE
|
20
|
+
#undef RHASH_SIZE
|
21
|
+
#define RHASH_ITER_LEV(h) (RHASH(h)->iter_lev)
|
22
|
+
#define RHASH_IFNONE(h) (RHASH(h)->ifnone)
|
23
|
+
#define RHASH_SIZE(h) (RHASH(h)->ntbl ? (st_index_t)RHASH(h)->ntbl->num_entries : 0)
|
24
|
+
#endif
|
25
|
+
|
26
|
+
typedef struct st_table_entry st_table_entry;
|
27
|
+
|
28
|
+
struct st_table_entry {
|
29
|
+
st_index_t hash;
|
30
|
+
st_data_t key;
|
31
|
+
st_data_t record;
|
32
|
+
st_table_entry *next;
|
33
|
+
struct list_node olist;
|
34
|
+
};
|
35
|
+
|
36
|
+
typedef struct st_packed_entry {
|
37
|
+
st_index_t hash;
|
38
|
+
st_data_t key, val;
|
39
|
+
} st_packed_entry;
|
40
|
+
|
41
|
+
#define ST_DEFAULT_MAX_DENSITY 5
|
42
|
+
#define ST_DEFAULT_INIT_TABLE_SIZE 16
|
43
|
+
#define ST_DEFAULT_PACKED_TABLE_SIZE 18
|
44
|
+
#define PACKED_UNIT (int)(sizeof(st_packed_entry) / sizeof(st_table_entry*))
|
45
|
+
#define MAX_PACKED_HASH (int)(ST_DEFAULT_PACKED_TABLE_SIZE * sizeof(st_table_entry*) / sizeof(st_packed_entry))
|
46
|
+
|
47
|
+
#define FNV1_32A_INIT 0x811c9dc5
|
48
|
+
#define FNV_32_PRIME 0x01000193
|
49
|
+
|
50
|
+
#define type_numhash st_hashtype_num
|
51
|
+
const struct st_hash_type st_hashtype_num = {
|
52
|
+
st_numcmp,
|
53
|
+
st_numhash,
|
54
|
+
};
|
55
|
+
|
56
|
+
static st_index_t
|
57
|
+
strcasehash(st_data_t arg)
|
58
|
+
{
|
59
|
+
register const char *string = (const char *)arg;
|
60
|
+
register st_index_t hval = FNV1_32A_INIT;
|
61
|
+
|
62
|
+
/*
|
63
|
+
* FNV-1a hash each octet in the buffer
|
64
|
+
*/
|
65
|
+
while (*string) {
|
66
|
+
unsigned int c = (unsigned char)*string++;
|
67
|
+
if ((unsigned int)(c - 'A') <= ('Z' - 'A')) c += 'a' - 'A';
|
68
|
+
hval ^= c;
|
69
|
+
|
70
|
+
/* multiply by the 32 bit FNV magic prime mod 2^32 */
|
71
|
+
hval *= FNV_32_PRIME;
|
72
|
+
}
|
73
|
+
return hval;
|
74
|
+
}
|
75
|
+
|
76
|
+
static st_index_t
|
77
|
+
strhash(st_data_t arg)
|
78
|
+
{
|
79
|
+
register const char *string = (const char *)arg;
|
80
|
+
return st_hash(string, strlen(string), FNV1_32A_INIT);
|
81
|
+
}
|
82
|
+
|
83
|
+
/* extern int strcmp(const char *, const char *); */
|
84
|
+
static st_index_t strhash(st_data_t);
|
85
|
+
static const struct st_hash_type type_strhash = {
|
86
|
+
strcmp,
|
87
|
+
strhash,
|
88
|
+
};
|
89
|
+
|
90
|
+
static st_index_t strcasehash(st_data_t);
|
91
|
+
static const struct st_hash_type type_strcasehash = {
|
92
|
+
st_locale_insensitive_strcasecmp,
|
93
|
+
strcasehash,
|
94
|
+
};
|
95
|
+
|
96
|
+
#define EQUAL(table,x,ent) ((x)==(ent)->key || (*(table)->type->compare)((x),(ent)->key) == 0)
|
97
|
+
|
98
|
+
#define do_hash(key,table) (st_index_t)(*(table)->type->hash)((key))
|
99
|
+
#define hash_pos(h,n) ((h) & (n - 1))
|
100
|
+
|
101
|
+
#define PTR_NOT_EQUAL(table, ptr, hash_val, key) \
|
102
|
+
((ptr) != 0 && ((ptr)->hash != (hash_val) || !EQUAL((table), (key), (ptr))))
|
103
|
+
|
104
|
+
/* preparation for possible allocation improvements */
|
105
|
+
#define st_alloc_entry() (st_table_entry *)malloc(sizeof(st_table_entry))
|
106
|
+
#define st_free_entry(entry) free(entry)
|
107
|
+
#define st_alloc_table() (st_table *)malloc(sizeof(st_table))
|
108
|
+
#define st_dealloc_table(table) free(table)
|
109
|
+
#define st_alloc_bins(size) (st_table_entry **)calloc(size, sizeof(st_table_entry *))
|
110
|
+
#define st_free_bins(bins, size) free(bins)
|
111
|
+
static inline st_table_entry**
|
112
|
+
st_realloc_bins(st_table_entry **bins, st_index_t newsize, st_index_t oldsize)
|
113
|
+
{
|
114
|
+
bins = (st_table_entry **)realloc(bins, newsize * sizeof(st_table_entry *));
|
115
|
+
MEMZERO(bins, st_table_entry*, newsize);
|
116
|
+
return bins;
|
117
|
+
}
|
118
|
+
|
119
|
+
/* Shortcut */
|
120
|
+
#define bins as.big.bins
|
121
|
+
#define real_entries as.packed.real_entries
|
122
|
+
|
123
|
+
/* preparation for possible packing improvements */
|
124
|
+
#define PACKED_BINS(table) ((table)->as.packed.entries)
|
125
|
+
#define PACKED_ENT(table, i) PACKED_BINS(table)[i]
|
126
|
+
#define PKEY(table, i) PACKED_ENT((table), (i)).key
|
127
|
+
#define PVAL(table, i) PACKED_ENT((table), (i)).val
|
128
|
+
#define PHASH(table, i) PACKED_ENT((table), (i)).hash
|
129
|
+
#define PKEY_SET(table, i, v) (PKEY((table), (i)) = (v))
|
130
|
+
#define PVAL_SET(table, i, v) (PVAL((table), (i)) = (v))
|
131
|
+
#define PHASH_SET(table, i, v) (PHASH((table), (i)) = (v))
|
132
|
+
|
133
|
+
static struct list_head *
|
134
|
+
st_head(const st_table *tbl)
|
135
|
+
{
|
136
|
+
uintptr_t addr = (uintptr_t)&tbl->as.big.private_list_head;
|
137
|
+
return (struct list_head *)addr;
|
138
|
+
}
|
139
|
+
|
140
|
+
static st_index_t
|
141
|
+
next_pow2(st_index_t x)
|
142
|
+
{
|
143
|
+
x |= x >> 1;
|
144
|
+
x |= x >> 2;
|
145
|
+
x |= x >> 4;
|
146
|
+
x |= x >> 8;
|
147
|
+
x |= x >> 16;
|
148
|
+
#if SIZEOF_ST_INDEX_T == 8
|
149
|
+
x |= x >> 32;
|
150
|
+
#endif
|
151
|
+
return x + 1;
|
152
|
+
}
|
153
|
+
|
154
|
+
static st_index_t
|
155
|
+
new_size(st_index_t size)
|
156
|
+
{
|
157
|
+
st_index_t n;
|
158
|
+
|
159
|
+
if (size && (size & ~(size - 1)) == size) /* already a power-of-two? */
|
160
|
+
return size;
|
161
|
+
|
162
|
+
n = next_pow2(size);
|
163
|
+
if (n > size)
|
164
|
+
return n;
|
165
|
+
#ifndef NOT_RUBY
|
166
|
+
rb_raise(rb_eRuntimeError, "st_table too big");
|
167
|
+
#endif
|
168
|
+
return -1; /* should raise exception */
|
169
|
+
}
|
170
|
+
|
171
|
+
static void
|
172
|
+
rehash(register st_table *table)
|
173
|
+
{
|
174
|
+
register st_table_entry *ptr = 0, **new_bins;
|
175
|
+
st_index_t new_num_bins, hash_val;
|
176
|
+
|
177
|
+
new_num_bins = new_size(table->num_bins+1);
|
178
|
+
new_bins = st_realloc_bins(table->bins, new_num_bins, table->num_bins);
|
179
|
+
table->num_bins = new_num_bins;
|
180
|
+
table->bins = new_bins;
|
181
|
+
|
182
|
+
list_for_each(st_head(table), ptr, olist) {
|
183
|
+
hash_val = hash_pos(ptr->hash, new_num_bins);
|
184
|
+
ptr->next = new_bins[hash_val];
|
185
|
+
new_bins[hash_val] = ptr;
|
186
|
+
}
|
187
|
+
}
|
188
|
+
|
189
|
+
static st_table_entry *
|
190
|
+
find_entry(const st_table *table, st_data_t key, st_index_t hash_val,
|
191
|
+
st_index_t bin_pos)
|
192
|
+
{
|
193
|
+
register st_table_entry *ptr = table->bins[bin_pos];
|
194
|
+
if (PTR_NOT_EQUAL(table, ptr, hash_val, key)) {
|
195
|
+
while (PTR_NOT_EQUAL(table, ptr->next, hash_val, key)) {
|
196
|
+
ptr = ptr->next;
|
197
|
+
}
|
198
|
+
ptr = ptr->next;
|
199
|
+
}
|
200
|
+
return ptr;
|
201
|
+
}
|
202
|
+
|
203
|
+
static inline st_index_t
|
204
|
+
find_packed_index_from(const st_table *table, st_index_t hash_val,
|
205
|
+
st_data_t key, st_index_t i)
|
206
|
+
{
|
207
|
+
while (i < table->real_entries &&
|
208
|
+
(PHASH(table, i) != hash_val || !EQUAL(table, key, &PACKED_ENT(table, i)))) {
|
209
|
+
i++;
|
210
|
+
}
|
211
|
+
return i;
|
212
|
+
}
|
213
|
+
|
214
|
+
static inline st_index_t
|
215
|
+
find_packed_index(const st_table *table, st_index_t hash_val, st_data_t key)
|
216
|
+
{
|
217
|
+
return find_packed_index_from(table, hash_val, key, 0);
|
218
|
+
}
|
219
|
+
|
220
|
+
static inline st_table_entry *
|
221
|
+
new_entry(st_table * table, st_data_t key, st_data_t value,
|
222
|
+
st_index_t hash_val, register st_index_t bin_pos)
|
223
|
+
{
|
224
|
+
register st_table_entry *entry = st_alloc_entry();
|
225
|
+
|
226
|
+
entry->next = table->bins[bin_pos];
|
227
|
+
table->bins[bin_pos] = entry;
|
228
|
+
entry->hash = hash_val;
|
229
|
+
entry->key = key;
|
230
|
+
entry->record = value;
|
231
|
+
|
232
|
+
return entry;
|
233
|
+
}
|
234
|
+
|
235
|
+
static inline st_data_t *
|
236
|
+
add_direct(st_table *table, st_data_t key, st_data_t value,
|
237
|
+
st_index_t hash_val, register st_index_t bin_pos)
|
238
|
+
{
|
239
|
+
register st_table_entry *entry;
|
240
|
+
if (table->num_entries > ST_DEFAULT_MAX_DENSITY * table->num_bins) {
|
241
|
+
rehash(table);
|
242
|
+
bin_pos = hash_pos(hash_val, table->num_bins);
|
243
|
+
}
|
244
|
+
|
245
|
+
entry = new_entry(table, key, value, hash_val, bin_pos);
|
246
|
+
list_add_tail(st_head(table), &entry->olist);
|
247
|
+
table->num_entries++;
|
248
|
+
return &entry->record;
|
249
|
+
}
|
250
|
+
|
251
|
+
static void
|
252
|
+
unpack_entries(register st_table *table)
|
253
|
+
{
|
254
|
+
st_index_t i;
|
255
|
+
st_packed_entry packed_bins[MAX_PACKED_HASH];
|
256
|
+
register st_table_entry *entry;
|
257
|
+
st_table tmp_table = *table;
|
258
|
+
|
259
|
+
MEMCPY(packed_bins, PACKED_BINS(table), st_packed_entry, MAX_PACKED_HASH);
|
260
|
+
table->as.packed.entries = packed_bins;
|
261
|
+
tmp_table.entries_packed = 0;
|
262
|
+
#if ST_DEFAULT_INIT_TABLE_SIZE == ST_DEFAULT_PACKED_TABLE_SIZE
|
263
|
+
MEMZERO(tmp_table.bins, st_table_entry*, tmp_table.num_bins);
|
264
|
+
#else
|
265
|
+
tmp_table.bins = st_realloc_bins(tmp_table.bins, ST_DEFAULT_INIT_TABLE_SIZE, tmp_table.num_bins);
|
266
|
+
tmp_table.num_bins = ST_DEFAULT_INIT_TABLE_SIZE;
|
267
|
+
#endif
|
268
|
+
|
269
|
+
/*
|
270
|
+
* order is important here, we need to keep the original table
|
271
|
+
* walkable during GC (GC may be triggered by new_entry call)
|
272
|
+
*/
|
273
|
+
i = 0;
|
274
|
+
list_head_init(st_head(&tmp_table));
|
275
|
+
do {
|
276
|
+
st_data_t key = packed_bins[i].key;
|
277
|
+
st_data_t val = packed_bins[i].val;
|
278
|
+
st_index_t hash = packed_bins[i].hash;
|
279
|
+
entry = new_entry(&tmp_table, key, val, hash,
|
280
|
+
hash_pos(hash, ST_DEFAULT_INIT_TABLE_SIZE));
|
281
|
+
list_add_tail(st_head(&tmp_table), &entry->olist);
|
282
|
+
} while (++i < MAX_PACKED_HASH);
|
283
|
+
*table = tmp_table;
|
284
|
+
list_head_init(st_head(table));
|
285
|
+
list_append_list(st_head(table), st_head(&tmp_table));
|
286
|
+
}
|
287
|
+
|
288
|
+
static st_data_t *
|
289
|
+
add_packed_direct(st_table *table, st_data_t key, st_data_t value, st_index_t hash_val)
|
290
|
+
{
|
291
|
+
st_data_t *lval;
|
292
|
+
|
293
|
+
if (table->real_entries < MAX_PACKED_HASH) {
|
294
|
+
st_index_t i = table->real_entries++;
|
295
|
+
PKEY_SET(table, i, key);
|
296
|
+
PVAL_SET(table, i, value);
|
297
|
+
PHASH_SET(table, i, hash_val);
|
298
|
+
table->num_entries++;
|
299
|
+
lval = &PVAL(table, i);
|
300
|
+
}
|
301
|
+
else {
|
302
|
+
unpack_entries(table);
|
303
|
+
lval = add_direct(table, key, value, hash_val, hash_pos(hash_val, table->num_bins));
|
304
|
+
}
|
305
|
+
|
306
|
+
return lval;
|
307
|
+
}
|
308
|
+
|
309
|
+
static st_data_t *
|
310
|
+
st_store(register st_table *table, register st_data_t key, st_data_t value, xh_bool_t update)
|
311
|
+
{
|
312
|
+
st_index_t hash_val;
|
313
|
+
register st_index_t bin_pos;
|
314
|
+
register st_table_entry *ptr;
|
315
|
+
st_data_t *lval;
|
316
|
+
|
317
|
+
hash_val = do_hash(key, table);
|
318
|
+
|
319
|
+
if (table->entries_packed) {
|
320
|
+
st_index_t i = find_packed_index(table, hash_val, key);
|
321
|
+
if (i < table->real_entries) {
|
322
|
+
lval = &PVAL(table, i);
|
323
|
+
}
|
324
|
+
else {
|
325
|
+
lval = add_packed_direct(table, key, value, hash_val);
|
326
|
+
}
|
327
|
+
}
|
328
|
+
else {
|
329
|
+
ptr = find_entry(table, key, hash_val, bin_pos = hash_pos(hash_val, table->num_bins));
|
330
|
+
|
331
|
+
if (ptr == 0) {
|
332
|
+
lval = add_direct(table, key, value, hash_val, bin_pos);
|
333
|
+
}
|
334
|
+
else {
|
335
|
+
lval = &ptr->record;
|
336
|
+
}
|
337
|
+
}
|
338
|
+
|
339
|
+
if (update) *lval = value;
|
340
|
+
|
341
|
+
return lval;
|
342
|
+
}
|
343
|
+
|
344
|
+
static VALUE *
|
345
|
+
hash_store(VALUE hash, const char *key , size_t keylen, VALUE val)
|
346
|
+
{
|
347
|
+
VALUE key_val = rb_utf8_str_new(key, keylen);
|
348
|
+
return st_store(RHASH(hash)->ntbl, (st_data_t) key_val, val, TRUE);
|
349
|
+
}
|
350
|
+
|
351
|
+
static VALUE *
|
352
|
+
hash_fetch(VALUE hash, const char *key , size_t keylen, VALUE val)
|
353
|
+
{
|
354
|
+
VALUE key_val = rb_utf8_str_new(key, keylen);
|
355
|
+
VALUE *lval;
|
356
|
+
lval = st_store(RHASH(hash)->ntbl, (st_data_t) key_val, val, FALSE);
|
357
|
+
|
358
|
+
return lval;
|
359
|
+
}
|
360
|
+
|
361
|
+
static VALUE
|
362
|
+
hash_new(void)
|
363
|
+
{
|
364
|
+
VALUE hash = rb_hash_new();
|
365
|
+
rb_funcall(hash, xh_id_initialize, 0);
|
366
|
+
return hash;
|
367
|
+
}
|
368
|
+
|
369
|
+
static int
|
370
|
+
hash_first_value_i(VALUE key, VALUE value, VALUE first)
|
371
|
+
{
|
372
|
+
*((VALUE *) first) = value;
|
373
|
+
return ST_STOP;
|
374
|
+
}
|
375
|
+
|
376
|
+
static VALUE
|
377
|
+
hash_first_value(VALUE hash)
|
378
|
+
{
|
379
|
+
VALUE value = Qnil;
|
380
|
+
rb_hash_foreach(hash, hash_first_value_i, (VALUE) &value);
|
381
|
+
return value;
|
382
|
+
}
|
383
|
+
|
384
|
+
#endif /* _XH_RUBY_HASH_H_ */
|