fast-xml 1.0.1 → 1.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +145 -37
- data/ext/fastxml/ccan/build_assert/build_assert.h +40 -0
- data/ext/fastxml/ccan/check_type/check_type.h +63 -0
- data/ext/fastxml/ccan/container_of/container_of.h +142 -0
- data/ext/fastxml/ccan/list/list.h +773 -0
- data/ext/fastxml/ccan/str/str.h +16 -0
- data/ext/fastxml/fastxml.c +35 -2
- data/ext/fastxml/xh.c +19 -8
- data/ext/fastxml/xh.h +2 -1
- data/ext/fastxml/xh_config.h +3 -0
- data/ext/fastxml/xh_core.h +1 -5
- data/ext/fastxml/xh_log.h +37 -27
- data/ext/fastxml/xh_param.c +3 -11
- data/ext/fastxml/xh_param.h +1 -1
- data/ext/fastxml/xh_reader.c +528 -0
- data/ext/fastxml/xh_reader.h +43 -0
- data/ext/fastxml/xh_ruby_hash.h +384 -0
- data/ext/fastxml/xh_x2h.c +1002 -0
- data/ext/fastxml/xh_x2h.h +133 -0
- data/lib/fastxml/version.rb +1 -1
- metadata +13 -3
@@ -0,0 +1,384 @@
|
|
1
|
+
#ifndef _XH_RUBY_HASH_H_
|
2
|
+
#define _XH_RUBY_HASH_H_
|
3
|
+
|
4
|
+
#include "xh_config.h"
|
5
|
+
#include "xh_core.h"
|
6
|
+
#include "ccan/list/list.h"
|
7
|
+
|
8
|
+
struct RHash {
|
9
|
+
struct RBasic basic;
|
10
|
+
struct st_table *ntbl; /* possibly 0 */
|
11
|
+
int iter_lev;
|
12
|
+
const VALUE ifnone;
|
13
|
+
};
|
14
|
+
|
15
|
+
#define RHASH(obj) (R_CAST(RHash)(obj))
|
16
|
+
|
17
|
+
#ifdef RHASH_ITER_LEV
|
18
|
+
#undef RHASH_ITER_LEV
|
19
|
+
#undef RHASH_IFNONE
|
20
|
+
#undef RHASH_SIZE
|
21
|
+
#define RHASH_ITER_LEV(h) (RHASH(h)->iter_lev)
|
22
|
+
#define RHASH_IFNONE(h) (RHASH(h)->ifnone)
|
23
|
+
#define RHASH_SIZE(h) (RHASH(h)->ntbl ? (st_index_t)RHASH(h)->ntbl->num_entries : 0)
|
24
|
+
#endif
|
25
|
+
|
26
|
+
typedef struct st_table_entry st_table_entry;
|
27
|
+
|
28
|
+
struct st_table_entry {
|
29
|
+
st_index_t hash;
|
30
|
+
st_data_t key;
|
31
|
+
st_data_t record;
|
32
|
+
st_table_entry *next;
|
33
|
+
struct list_node olist;
|
34
|
+
};
|
35
|
+
|
36
|
+
typedef struct st_packed_entry {
|
37
|
+
st_index_t hash;
|
38
|
+
st_data_t key, val;
|
39
|
+
} st_packed_entry;
|
40
|
+
|
41
|
+
#define ST_DEFAULT_MAX_DENSITY 5
|
42
|
+
#define ST_DEFAULT_INIT_TABLE_SIZE 16
|
43
|
+
#define ST_DEFAULT_PACKED_TABLE_SIZE 18
|
44
|
+
#define PACKED_UNIT (int)(sizeof(st_packed_entry) / sizeof(st_table_entry*))
|
45
|
+
#define MAX_PACKED_HASH (int)(ST_DEFAULT_PACKED_TABLE_SIZE * sizeof(st_table_entry*) / sizeof(st_packed_entry))
|
46
|
+
|
47
|
+
#define FNV1_32A_INIT 0x811c9dc5
|
48
|
+
#define FNV_32_PRIME 0x01000193
|
49
|
+
|
50
|
+
#define type_numhash st_hashtype_num
|
51
|
+
const struct st_hash_type st_hashtype_num = {
|
52
|
+
st_numcmp,
|
53
|
+
st_numhash,
|
54
|
+
};
|
55
|
+
|
56
|
+
static st_index_t
|
57
|
+
strcasehash(st_data_t arg)
|
58
|
+
{
|
59
|
+
register const char *string = (const char *)arg;
|
60
|
+
register st_index_t hval = FNV1_32A_INIT;
|
61
|
+
|
62
|
+
/*
|
63
|
+
* FNV-1a hash each octet in the buffer
|
64
|
+
*/
|
65
|
+
while (*string) {
|
66
|
+
unsigned int c = (unsigned char)*string++;
|
67
|
+
if ((unsigned int)(c - 'A') <= ('Z' - 'A')) c += 'a' - 'A';
|
68
|
+
hval ^= c;
|
69
|
+
|
70
|
+
/* multiply by the 32 bit FNV magic prime mod 2^32 */
|
71
|
+
hval *= FNV_32_PRIME;
|
72
|
+
}
|
73
|
+
return hval;
|
74
|
+
}
|
75
|
+
|
76
|
+
static st_index_t
|
77
|
+
strhash(st_data_t arg)
|
78
|
+
{
|
79
|
+
register const char *string = (const char *)arg;
|
80
|
+
return st_hash(string, strlen(string), FNV1_32A_INIT);
|
81
|
+
}
|
82
|
+
|
83
|
+
/* extern int strcmp(const char *, const char *); */
|
84
|
+
static st_index_t strhash(st_data_t);
|
85
|
+
static const struct st_hash_type type_strhash = {
|
86
|
+
strcmp,
|
87
|
+
strhash,
|
88
|
+
};
|
89
|
+
|
90
|
+
static st_index_t strcasehash(st_data_t);
|
91
|
+
static const struct st_hash_type type_strcasehash = {
|
92
|
+
st_locale_insensitive_strcasecmp,
|
93
|
+
strcasehash,
|
94
|
+
};
|
95
|
+
|
96
|
+
#define EQUAL(table,x,ent) ((x)==(ent)->key || (*(table)->type->compare)((x),(ent)->key) == 0)
|
97
|
+
|
98
|
+
#define do_hash(key,table) (st_index_t)(*(table)->type->hash)((key))
|
99
|
+
#define hash_pos(h,n) ((h) & (n - 1))
|
100
|
+
|
101
|
+
#define PTR_NOT_EQUAL(table, ptr, hash_val, key) \
|
102
|
+
((ptr) != 0 && ((ptr)->hash != (hash_val) || !EQUAL((table), (key), (ptr))))
|
103
|
+
|
104
|
+
/* preparation for possible allocation improvements */
|
105
|
+
#define st_alloc_entry() (st_table_entry *)malloc(sizeof(st_table_entry))
|
106
|
+
#define st_free_entry(entry) free(entry)
|
107
|
+
#define st_alloc_table() (st_table *)malloc(sizeof(st_table))
|
108
|
+
#define st_dealloc_table(table) free(table)
|
109
|
+
#define st_alloc_bins(size) (st_table_entry **)calloc(size, sizeof(st_table_entry *))
|
110
|
+
#define st_free_bins(bins, size) free(bins)
|
111
|
+
static inline st_table_entry**
|
112
|
+
st_realloc_bins(st_table_entry **bins, st_index_t newsize, st_index_t oldsize)
|
113
|
+
{
|
114
|
+
bins = (st_table_entry **)realloc(bins, newsize * sizeof(st_table_entry *));
|
115
|
+
MEMZERO(bins, st_table_entry*, newsize);
|
116
|
+
return bins;
|
117
|
+
}
|
118
|
+
|
119
|
+
/* Shortcut */
|
120
|
+
#define bins as.big.bins
|
121
|
+
#define real_entries as.packed.real_entries
|
122
|
+
|
123
|
+
/* preparation for possible packing improvements */
|
124
|
+
#define PACKED_BINS(table) ((table)->as.packed.entries)
|
125
|
+
#define PACKED_ENT(table, i) PACKED_BINS(table)[i]
|
126
|
+
#define PKEY(table, i) PACKED_ENT((table), (i)).key
|
127
|
+
#define PVAL(table, i) PACKED_ENT((table), (i)).val
|
128
|
+
#define PHASH(table, i) PACKED_ENT((table), (i)).hash
|
129
|
+
#define PKEY_SET(table, i, v) (PKEY((table), (i)) = (v))
|
130
|
+
#define PVAL_SET(table, i, v) (PVAL((table), (i)) = (v))
|
131
|
+
#define PHASH_SET(table, i, v) (PHASH((table), (i)) = (v))
|
132
|
+
|
133
|
+
static struct list_head *
|
134
|
+
st_head(const st_table *tbl)
|
135
|
+
{
|
136
|
+
uintptr_t addr = (uintptr_t)&tbl->as.big.private_list_head;
|
137
|
+
return (struct list_head *)addr;
|
138
|
+
}
|
139
|
+
|
140
|
+
static st_index_t
|
141
|
+
next_pow2(st_index_t x)
|
142
|
+
{
|
143
|
+
x |= x >> 1;
|
144
|
+
x |= x >> 2;
|
145
|
+
x |= x >> 4;
|
146
|
+
x |= x >> 8;
|
147
|
+
x |= x >> 16;
|
148
|
+
#if SIZEOF_ST_INDEX_T == 8
|
149
|
+
x |= x >> 32;
|
150
|
+
#endif
|
151
|
+
return x + 1;
|
152
|
+
}
|
153
|
+
|
154
|
+
static st_index_t
|
155
|
+
new_size(st_index_t size)
|
156
|
+
{
|
157
|
+
st_index_t n;
|
158
|
+
|
159
|
+
if (size && (size & ~(size - 1)) == size) /* already a power-of-two? */
|
160
|
+
return size;
|
161
|
+
|
162
|
+
n = next_pow2(size);
|
163
|
+
if (n > size)
|
164
|
+
return n;
|
165
|
+
#ifndef NOT_RUBY
|
166
|
+
rb_raise(rb_eRuntimeError, "st_table too big");
|
167
|
+
#endif
|
168
|
+
return -1; /* should raise exception */
|
169
|
+
}
|
170
|
+
|
171
|
+
static void
|
172
|
+
rehash(register st_table *table)
|
173
|
+
{
|
174
|
+
register st_table_entry *ptr = 0, **new_bins;
|
175
|
+
st_index_t new_num_bins, hash_val;
|
176
|
+
|
177
|
+
new_num_bins = new_size(table->num_bins+1);
|
178
|
+
new_bins = st_realloc_bins(table->bins, new_num_bins, table->num_bins);
|
179
|
+
table->num_bins = new_num_bins;
|
180
|
+
table->bins = new_bins;
|
181
|
+
|
182
|
+
list_for_each(st_head(table), ptr, olist) {
|
183
|
+
hash_val = hash_pos(ptr->hash, new_num_bins);
|
184
|
+
ptr->next = new_bins[hash_val];
|
185
|
+
new_bins[hash_val] = ptr;
|
186
|
+
}
|
187
|
+
}
|
188
|
+
|
189
|
+
static st_table_entry *
|
190
|
+
find_entry(const st_table *table, st_data_t key, st_index_t hash_val,
|
191
|
+
st_index_t bin_pos)
|
192
|
+
{
|
193
|
+
register st_table_entry *ptr = table->bins[bin_pos];
|
194
|
+
if (PTR_NOT_EQUAL(table, ptr, hash_val, key)) {
|
195
|
+
while (PTR_NOT_EQUAL(table, ptr->next, hash_val, key)) {
|
196
|
+
ptr = ptr->next;
|
197
|
+
}
|
198
|
+
ptr = ptr->next;
|
199
|
+
}
|
200
|
+
return ptr;
|
201
|
+
}
|
202
|
+
|
203
|
+
static inline st_index_t
|
204
|
+
find_packed_index_from(const st_table *table, st_index_t hash_val,
|
205
|
+
st_data_t key, st_index_t i)
|
206
|
+
{
|
207
|
+
while (i < table->real_entries &&
|
208
|
+
(PHASH(table, i) != hash_val || !EQUAL(table, key, &PACKED_ENT(table, i)))) {
|
209
|
+
i++;
|
210
|
+
}
|
211
|
+
return i;
|
212
|
+
}
|
213
|
+
|
214
|
+
static inline st_index_t
|
215
|
+
find_packed_index(const st_table *table, st_index_t hash_val, st_data_t key)
|
216
|
+
{
|
217
|
+
return find_packed_index_from(table, hash_val, key, 0);
|
218
|
+
}
|
219
|
+
|
220
|
+
static inline st_table_entry *
|
221
|
+
new_entry(st_table * table, st_data_t key, st_data_t value,
|
222
|
+
st_index_t hash_val, register st_index_t bin_pos)
|
223
|
+
{
|
224
|
+
register st_table_entry *entry = st_alloc_entry();
|
225
|
+
|
226
|
+
entry->next = table->bins[bin_pos];
|
227
|
+
table->bins[bin_pos] = entry;
|
228
|
+
entry->hash = hash_val;
|
229
|
+
entry->key = key;
|
230
|
+
entry->record = value;
|
231
|
+
|
232
|
+
return entry;
|
233
|
+
}
|
234
|
+
|
235
|
+
static inline st_data_t *
|
236
|
+
add_direct(st_table *table, st_data_t key, st_data_t value,
|
237
|
+
st_index_t hash_val, register st_index_t bin_pos)
|
238
|
+
{
|
239
|
+
register st_table_entry *entry;
|
240
|
+
if (table->num_entries > ST_DEFAULT_MAX_DENSITY * table->num_bins) {
|
241
|
+
rehash(table);
|
242
|
+
bin_pos = hash_pos(hash_val, table->num_bins);
|
243
|
+
}
|
244
|
+
|
245
|
+
entry = new_entry(table, key, value, hash_val, bin_pos);
|
246
|
+
list_add_tail(st_head(table), &entry->olist);
|
247
|
+
table->num_entries++;
|
248
|
+
return &entry->record;
|
249
|
+
}
|
250
|
+
|
251
|
+
static void
|
252
|
+
unpack_entries(register st_table *table)
|
253
|
+
{
|
254
|
+
st_index_t i;
|
255
|
+
st_packed_entry packed_bins[MAX_PACKED_HASH];
|
256
|
+
register st_table_entry *entry;
|
257
|
+
st_table tmp_table = *table;
|
258
|
+
|
259
|
+
MEMCPY(packed_bins, PACKED_BINS(table), st_packed_entry, MAX_PACKED_HASH);
|
260
|
+
table->as.packed.entries = packed_bins;
|
261
|
+
tmp_table.entries_packed = 0;
|
262
|
+
#if ST_DEFAULT_INIT_TABLE_SIZE == ST_DEFAULT_PACKED_TABLE_SIZE
|
263
|
+
MEMZERO(tmp_table.bins, st_table_entry*, tmp_table.num_bins);
|
264
|
+
#else
|
265
|
+
tmp_table.bins = st_realloc_bins(tmp_table.bins, ST_DEFAULT_INIT_TABLE_SIZE, tmp_table.num_bins);
|
266
|
+
tmp_table.num_bins = ST_DEFAULT_INIT_TABLE_SIZE;
|
267
|
+
#endif
|
268
|
+
|
269
|
+
/*
|
270
|
+
* order is important here, we need to keep the original table
|
271
|
+
* walkable during GC (GC may be triggered by new_entry call)
|
272
|
+
*/
|
273
|
+
i = 0;
|
274
|
+
list_head_init(st_head(&tmp_table));
|
275
|
+
do {
|
276
|
+
st_data_t key = packed_bins[i].key;
|
277
|
+
st_data_t val = packed_bins[i].val;
|
278
|
+
st_index_t hash = packed_bins[i].hash;
|
279
|
+
entry = new_entry(&tmp_table, key, val, hash,
|
280
|
+
hash_pos(hash, ST_DEFAULT_INIT_TABLE_SIZE));
|
281
|
+
list_add_tail(st_head(&tmp_table), &entry->olist);
|
282
|
+
} while (++i < MAX_PACKED_HASH);
|
283
|
+
*table = tmp_table;
|
284
|
+
list_head_init(st_head(table));
|
285
|
+
list_append_list(st_head(table), st_head(&tmp_table));
|
286
|
+
}
|
287
|
+
|
288
|
+
static st_data_t *
|
289
|
+
add_packed_direct(st_table *table, st_data_t key, st_data_t value, st_index_t hash_val)
|
290
|
+
{
|
291
|
+
st_data_t *lval;
|
292
|
+
|
293
|
+
if (table->real_entries < MAX_PACKED_HASH) {
|
294
|
+
st_index_t i = table->real_entries++;
|
295
|
+
PKEY_SET(table, i, key);
|
296
|
+
PVAL_SET(table, i, value);
|
297
|
+
PHASH_SET(table, i, hash_val);
|
298
|
+
table->num_entries++;
|
299
|
+
lval = &PVAL(table, i);
|
300
|
+
}
|
301
|
+
else {
|
302
|
+
unpack_entries(table);
|
303
|
+
lval = add_direct(table, key, value, hash_val, hash_pos(hash_val, table->num_bins));
|
304
|
+
}
|
305
|
+
|
306
|
+
return lval;
|
307
|
+
}
|
308
|
+
|
309
|
+
static st_data_t *
|
310
|
+
st_store(register st_table *table, register st_data_t key, st_data_t value, xh_bool_t update)
|
311
|
+
{
|
312
|
+
st_index_t hash_val;
|
313
|
+
register st_index_t bin_pos;
|
314
|
+
register st_table_entry *ptr;
|
315
|
+
st_data_t *lval;
|
316
|
+
|
317
|
+
hash_val = do_hash(key, table);
|
318
|
+
|
319
|
+
if (table->entries_packed) {
|
320
|
+
st_index_t i = find_packed_index(table, hash_val, key);
|
321
|
+
if (i < table->real_entries) {
|
322
|
+
lval = &PVAL(table, i);
|
323
|
+
}
|
324
|
+
else {
|
325
|
+
lval = add_packed_direct(table, key, value, hash_val);
|
326
|
+
}
|
327
|
+
}
|
328
|
+
else {
|
329
|
+
ptr = find_entry(table, key, hash_val, bin_pos = hash_pos(hash_val, table->num_bins));
|
330
|
+
|
331
|
+
if (ptr == 0) {
|
332
|
+
lval = add_direct(table, key, value, hash_val, bin_pos);
|
333
|
+
}
|
334
|
+
else {
|
335
|
+
lval = &ptr->record;
|
336
|
+
}
|
337
|
+
}
|
338
|
+
|
339
|
+
if (update) *lval = value;
|
340
|
+
|
341
|
+
return lval;
|
342
|
+
}
|
343
|
+
|
344
|
+
static VALUE *
|
345
|
+
hash_store(VALUE hash, const char *key , size_t keylen, VALUE val)
|
346
|
+
{
|
347
|
+
VALUE key_val = rb_utf8_str_new(key, keylen);
|
348
|
+
return st_store(RHASH(hash)->ntbl, (st_data_t) key_val, val, TRUE);
|
349
|
+
}
|
350
|
+
|
351
|
+
static VALUE *
|
352
|
+
hash_fetch(VALUE hash, const char *key , size_t keylen, VALUE val)
|
353
|
+
{
|
354
|
+
VALUE key_val = rb_utf8_str_new(key, keylen);
|
355
|
+
VALUE *lval;
|
356
|
+
lval = st_store(RHASH(hash)->ntbl, (st_data_t) key_val, val, FALSE);
|
357
|
+
|
358
|
+
return lval;
|
359
|
+
}
|
360
|
+
|
361
|
+
static VALUE
|
362
|
+
hash_new(void)
|
363
|
+
{
|
364
|
+
VALUE hash = rb_hash_new();
|
365
|
+
rb_funcall(hash, xh_id_initialize, 0);
|
366
|
+
return hash;
|
367
|
+
}
|
368
|
+
|
369
|
+
static int
|
370
|
+
hash_first_value_i(VALUE key, VALUE value, VALUE first)
|
371
|
+
{
|
372
|
+
*((VALUE *) first) = value;
|
373
|
+
return ST_STOP;
|
374
|
+
}
|
375
|
+
|
376
|
+
static VALUE
|
377
|
+
hash_first_value(VALUE hash)
|
378
|
+
{
|
379
|
+
VALUE value = Qnil;
|
380
|
+
rb_hash_foreach(hash, hash_first_value_i, (VALUE) &value);
|
381
|
+
return value;
|
382
|
+
}
|
383
|
+
|
384
|
+
#endif /* _XH_RUBY_HASH_H_ */
|