fast-xml 1.1.0 → 1.1.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -2,6 +2,7 @@
2
2
  #define _XH_CONFIG_H_
3
3
 
4
4
  #include "ruby.h"
5
+ #include "ruby/version.h"
5
6
  #include "ruby/io.h"
6
7
  #include "ruby/re.h"
7
8
  #if HAVE_RUBY_ENCODING_H
@@ -21,14 +22,31 @@
21
22
  #include <sys/mman.h>
22
23
  #endif
23
24
 
24
- #if __GNUC__ >= 3
25
- # define expect(expr,value) __builtin_expect ((expr), (value))
26
- # define XH_INLINE static inline
27
- # define XH_UNUSED(v) x __attribute__((unused))
25
+ #if defined __GNUC__
26
+ # if __GNUC__ >= 3
27
+ # define expect(expr,value) __builtin_expect ((expr), (value))
28
+ # define XH_INLINE static inline
29
+ # define XH_UNUSED(v) x __attribute__((unused))
30
+ # else
31
+ # define expect(expr,value) (expr)
32
+ # define XH_INLINE static
33
+ # define XH_UNUSED(v) v
34
+ # endif
35
+ #endif
36
+
37
+ #if defined __GNUC__
38
+ # if (__GNUC__ < 4) || defined(__GNUC_MINOR__) && (__GNUC__ == 4) && (__GNUC_MINOR__ < 4)
39
+ /* gcc versions before 4.4.x don't support gnu_printf, so use printf. */
40
+ # define XH_GCC_ATTR __attribute__((__unused__, format(printf, 1, 2)))
41
+ # define XH_GCC_FMT_ATTR(n, m) __attribute__((format(printf, n, m)))
42
+ # else
43
+ /* Use gnu_printf when supported (qemu uses standard format strings). */
44
+ # define XH_GCC_ATTR __attribute__((__unused__, format(gnu_printf, 1, 2)))
45
+ # define XH_GCC_FMT_ATTR(n, m) __attribute__((format(gnu_printf, n, m)))
46
+ # endif
28
47
  #else
29
- # define expect(expr,value) (expr)
30
- # define XH_INLINE static
31
- # define XH_UNUSED(v) v
48
+ # define XH_GCC_ATTR /**/
49
+ # define XH_GCC_FMT_ATTR(n, m)
32
50
  #endif
33
51
 
34
52
  #ifdef _MSC_VER
@@ -105,6 +105,6 @@ typedef enum {
105
105
  #define xh_log_error8(msg, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8) \
106
106
  xh_log(XH_LOG_ERROR, XH_CURRENT_FUNCTION, __LINE__, msg, arg1, arg2, arg3, arg4, arg5, arg6, arg7, arg8)
107
107
 
108
- void xh_log(xh_log_level_t log_level, const char *func, xh_int_t line, const char *msg, ...);
108
+ void xh_log(xh_log_level_t log_level, const char *func, xh_int_t line, const char *msg, ...) XH_GCC_FMT_ATTR(4, 5);
109
109
 
110
110
  #endif /* _XH_LOG_H_ */
@@ -3,344 +3,14 @@
3
3
 
4
4
  #include "xh_config.h"
5
5
  #include "xh_core.h"
6
- #include "ccan/list/list.h"
7
-
8
- struct RHash {
9
- struct RBasic basic;
10
- struct st_table *ntbl; /* possibly 0 */
11
- int iter_lev;
12
- const VALUE ifnone;
13
- };
14
-
15
- #define RHASH(obj) (R_CAST(RHash)(obj))
16
-
17
- #ifdef RHASH_ITER_LEV
18
- #undef RHASH_ITER_LEV
19
- #undef RHASH_IFNONE
20
- #undef RHASH_SIZE
21
- #define RHASH_ITER_LEV(h) (RHASH(h)->iter_lev)
22
- #define RHASH_IFNONE(h) (RHASH(h)->ifnone)
23
- #define RHASH_SIZE(h) (RHASH(h)->ntbl ? (st_index_t)RHASH(h)->ntbl->num_entries : 0)
24
- #endif
25
-
26
- typedef struct st_table_entry st_table_entry;
27
-
28
- struct st_table_entry {
29
- st_index_t hash;
30
- st_data_t key;
31
- st_data_t record;
32
- st_table_entry *next;
33
- struct list_node olist;
34
- };
35
-
36
- typedef struct st_packed_entry {
37
- st_index_t hash;
38
- st_data_t key, val;
39
- } st_packed_entry;
40
-
41
- #define ST_DEFAULT_MAX_DENSITY 5
42
- #define ST_DEFAULT_INIT_TABLE_SIZE 16
43
- #define ST_DEFAULT_PACKED_TABLE_SIZE 18
44
- #define PACKED_UNIT (int)(sizeof(st_packed_entry) / sizeof(st_table_entry*))
45
- #define MAX_PACKED_HASH (int)(ST_DEFAULT_PACKED_TABLE_SIZE * sizeof(st_table_entry*) / sizeof(st_packed_entry))
46
-
47
- #define FNV1_32A_INIT 0x811c9dc5
48
- #define FNV_32_PRIME 0x01000193
49
-
50
- #define type_numhash st_hashtype_num
51
- const struct st_hash_type st_hashtype_num = {
52
- st_numcmp,
53
- st_numhash,
54
- };
55
-
56
- static st_index_t
57
- strcasehash(st_data_t arg)
58
- {
59
- register const char *string = (const char *)arg;
60
- register st_index_t hval = FNV1_32A_INIT;
61
-
62
- /*
63
- * FNV-1a hash each octet in the buffer
64
- */
65
- while (*string) {
66
- unsigned int c = (unsigned char)*string++;
67
- if ((unsigned int)(c - 'A') <= ('Z' - 'A')) c += 'a' - 'A';
68
- hval ^= c;
69
-
70
- /* multiply by the 32 bit FNV magic prime mod 2^32 */
71
- hval *= FNV_32_PRIME;
72
- }
73
- return hval;
74
- }
75
-
76
- static st_index_t
77
- strhash(st_data_t arg)
78
- {
79
- register const char *string = (const char *)arg;
80
- return st_hash(string, strlen(string), FNV1_32A_INIT);
81
- }
82
-
83
- /* extern int strcmp(const char *, const char *); */
84
- static st_index_t strhash(st_data_t);
85
- static const struct st_hash_type type_strhash = {
86
- strcmp,
87
- strhash,
88
- };
89
-
90
- static st_index_t strcasehash(st_data_t);
91
- static const struct st_hash_type type_strcasehash = {
92
- st_locale_insensitive_strcasecmp,
93
- strcasehash,
94
- };
95
-
96
- #define EQUAL(table,x,ent) ((x)==(ent)->key || (*(table)->type->compare)((x),(ent)->key) == 0)
97
-
98
- #define do_hash(key,table) (st_index_t)(*(table)->type->hash)((key))
99
- #define hash_pos(h,n) ((h) & (n - 1))
100
-
101
- #define PTR_NOT_EQUAL(table, ptr, hash_val, key) \
102
- ((ptr) != 0 && ((ptr)->hash != (hash_val) || !EQUAL((table), (key), (ptr))))
103
-
104
- /* preparation for possible allocation improvements */
105
- #define st_alloc_entry() (st_table_entry *)malloc(sizeof(st_table_entry))
106
- #define st_free_entry(entry) free(entry)
107
- #define st_alloc_table() (st_table *)malloc(sizeof(st_table))
108
- #define st_dealloc_table(table) free(table)
109
- #define st_alloc_bins(size) (st_table_entry **)calloc(size, sizeof(st_table_entry *))
110
- #define st_free_bins(bins, size) free(bins)
111
- static inline st_table_entry**
112
- st_realloc_bins(st_table_entry **bins, st_index_t newsize, st_index_t oldsize)
113
- {
114
- bins = (st_table_entry **)realloc(bins, newsize * sizeof(st_table_entry *));
115
- MEMZERO(bins, st_table_entry*, newsize);
116
- return bins;
117
- }
118
-
119
- /* Shortcut */
120
- #define bins as.big.bins
121
- #define real_entries as.packed.real_entries
122
-
123
- /* preparation for possible packing improvements */
124
- #define PACKED_BINS(table) ((table)->as.packed.entries)
125
- #define PACKED_ENT(table, i) PACKED_BINS(table)[i]
126
- #define PKEY(table, i) PACKED_ENT((table), (i)).key
127
- #define PVAL(table, i) PACKED_ENT((table), (i)).val
128
- #define PHASH(table, i) PACKED_ENT((table), (i)).hash
129
- #define PKEY_SET(table, i, v) (PKEY((table), (i)) = (v))
130
- #define PVAL_SET(table, i, v) (PVAL((table), (i)) = (v))
131
- #define PHASH_SET(table, i, v) (PHASH((table), (i)) = (v))
132
-
133
- static struct list_head *
134
- st_head(const st_table *tbl)
135
- {
136
- uintptr_t addr = (uintptr_t)&tbl->as.big.private_list_head;
137
- return (struct list_head *)addr;
138
- }
139
-
140
- static st_index_t
141
- next_pow2(st_index_t x)
142
- {
143
- x |= x >> 1;
144
- x |= x >> 2;
145
- x |= x >> 4;
146
- x |= x >> 8;
147
- x |= x >> 16;
148
- #if SIZEOF_ST_INDEX_T == 8
149
- x |= x >> 32;
150
- #endif
151
- return x + 1;
152
- }
153
-
154
- static st_index_t
155
- new_size(st_index_t size)
156
- {
157
- st_index_t n;
158
-
159
- if (size && (size & ~(size - 1)) == size) /* already a power-of-two? */
160
- return size;
161
-
162
- n = next_pow2(size);
163
- if (n > size)
164
- return n;
165
- #ifndef NOT_RUBY
166
- rb_raise(rb_eRuntimeError, "st_table too big");
167
- #endif
168
- return -1; /* should raise exception */
169
- }
170
-
171
- static void
172
- rehash(register st_table *table)
173
- {
174
- register st_table_entry *ptr = 0, **new_bins;
175
- st_index_t new_num_bins, hash_val;
176
-
177
- new_num_bins = new_size(table->num_bins+1);
178
- new_bins = st_realloc_bins(table->bins, new_num_bins, table->num_bins);
179
- table->num_bins = new_num_bins;
180
- table->bins = new_bins;
181
-
182
- list_for_each(st_head(table), ptr, olist) {
183
- hash_val = hash_pos(ptr->hash, new_num_bins);
184
- ptr->next = new_bins[hash_val];
185
- new_bins[hash_val] = ptr;
186
- }
187
- }
188
-
189
- static st_table_entry *
190
- find_entry(const st_table *table, st_data_t key, st_index_t hash_val,
191
- st_index_t bin_pos)
192
- {
193
- register st_table_entry *ptr = table->bins[bin_pos];
194
- if (PTR_NOT_EQUAL(table, ptr, hash_val, key)) {
195
- while (PTR_NOT_EQUAL(table, ptr->next, hash_val, key)) {
196
- ptr = ptr->next;
197
- }
198
- ptr = ptr->next;
199
- }
200
- return ptr;
201
- }
202
-
203
- static inline st_index_t
204
- find_packed_index_from(const st_table *table, st_index_t hash_val,
205
- st_data_t key, st_index_t i)
206
- {
207
- while (i < table->real_entries &&
208
- (PHASH(table, i) != hash_val || !EQUAL(table, key, &PACKED_ENT(table, i)))) {
209
- i++;
210
- }
211
- return i;
212
- }
213
-
214
- static inline st_index_t
215
- find_packed_index(const st_table *table, st_index_t hash_val, st_data_t key)
216
- {
217
- return find_packed_index_from(table, hash_val, key, 0);
218
- }
219
-
220
- static inline st_table_entry *
221
- new_entry(st_table * table, st_data_t key, st_data_t value,
222
- st_index_t hash_val, register st_index_t bin_pos)
223
- {
224
- register st_table_entry *entry = st_alloc_entry();
225
-
226
- entry->next = table->bins[bin_pos];
227
- table->bins[bin_pos] = entry;
228
- entry->hash = hash_val;
229
- entry->key = key;
230
- entry->record = value;
231
-
232
- return entry;
233
- }
234
-
235
- static inline st_data_t *
236
- add_direct(st_table *table, st_data_t key, st_data_t value,
237
- st_index_t hash_val, register st_index_t bin_pos)
238
- {
239
- register st_table_entry *entry;
240
- if (table->num_entries > ST_DEFAULT_MAX_DENSITY * table->num_bins) {
241
- rehash(table);
242
- bin_pos = hash_pos(hash_val, table->num_bins);
243
- }
244
-
245
- entry = new_entry(table, key, value, hash_val, bin_pos);
246
- list_add_tail(st_head(table), &entry->olist);
247
- table->num_entries++;
248
- return &entry->record;
249
- }
250
-
251
- static void
252
- unpack_entries(register st_table *table)
253
- {
254
- st_index_t i;
255
- st_packed_entry packed_bins[MAX_PACKED_HASH];
256
- register st_table_entry *entry;
257
- st_table tmp_table = *table;
258
-
259
- MEMCPY(packed_bins, PACKED_BINS(table), st_packed_entry, MAX_PACKED_HASH);
260
- table->as.packed.entries = packed_bins;
261
- tmp_table.entries_packed = 0;
262
- #if ST_DEFAULT_INIT_TABLE_SIZE == ST_DEFAULT_PACKED_TABLE_SIZE
263
- MEMZERO(tmp_table.bins, st_table_entry*, tmp_table.num_bins);
264
- #else
265
- tmp_table.bins = st_realloc_bins(tmp_table.bins, ST_DEFAULT_INIT_TABLE_SIZE, tmp_table.num_bins);
266
- tmp_table.num_bins = ST_DEFAULT_INIT_TABLE_SIZE;
6
+ #if RUBY_API_VERSION_MAJOR == 2 && RUBY_API_VERSION_MINOR == 2
7
+ #include "ruby-2.2.7/xh_ruby_st.h"
8
+ #elif RUBY_API_VERSION_MAJOR == 2 && RUBY_API_VERSION_MINOR == 3
9
+ #include "ruby-2.3.4/xh_ruby_st.h"
10
+ #elif RUBY_API_VERSION_MAJOR == 2 && RUBY_API_VERSION_MINOR == 4
11
+ #include "ruby-2.4.1/xh_ruby_st.h"
267
12
  #endif
268
13
 
269
- /*
270
- * order is important here, we need to keep the original table
271
- * walkable during GC (GC may be triggered by new_entry call)
272
- */
273
- i = 0;
274
- list_head_init(st_head(&tmp_table));
275
- do {
276
- st_data_t key = packed_bins[i].key;
277
- st_data_t val = packed_bins[i].val;
278
- st_index_t hash = packed_bins[i].hash;
279
- entry = new_entry(&tmp_table, key, val, hash,
280
- hash_pos(hash, ST_DEFAULT_INIT_TABLE_SIZE));
281
- list_add_tail(st_head(&tmp_table), &entry->olist);
282
- } while (++i < MAX_PACKED_HASH);
283
- *table = tmp_table;
284
- list_head_init(st_head(table));
285
- list_append_list(st_head(table), st_head(&tmp_table));
286
- }
287
-
288
- static st_data_t *
289
- add_packed_direct(st_table *table, st_data_t key, st_data_t value, st_index_t hash_val)
290
- {
291
- st_data_t *lval;
292
-
293
- if (table->real_entries < MAX_PACKED_HASH) {
294
- st_index_t i = table->real_entries++;
295
- PKEY_SET(table, i, key);
296
- PVAL_SET(table, i, value);
297
- PHASH_SET(table, i, hash_val);
298
- table->num_entries++;
299
- lval = &PVAL(table, i);
300
- }
301
- else {
302
- unpack_entries(table);
303
- lval = add_direct(table, key, value, hash_val, hash_pos(hash_val, table->num_bins));
304
- }
305
-
306
- return lval;
307
- }
308
-
309
- static st_data_t *
310
- st_store(register st_table *table, register st_data_t key, st_data_t value, xh_bool_t update)
311
- {
312
- st_index_t hash_val;
313
- register st_index_t bin_pos;
314
- register st_table_entry *ptr;
315
- st_data_t *lval;
316
-
317
- hash_val = do_hash(key, table);
318
-
319
- if (table->entries_packed) {
320
- st_index_t i = find_packed_index(table, hash_val, key);
321
- if (i < table->real_entries) {
322
- lval = &PVAL(table, i);
323
- }
324
- else {
325
- lval = add_packed_direct(table, key, value, hash_val);
326
- }
327
- }
328
- else {
329
- ptr = find_entry(table, key, hash_val, bin_pos = hash_pos(hash_val, table->num_bins));
330
-
331
- if (ptr == 0) {
332
- lval = add_direct(table, key, value, hash_val, bin_pos);
333
- }
334
- else {
335
- lval = &ptr->record;
336
- }
337
- }
338
-
339
- if (update) *lval = value;
340
-
341
- return lval;
342
- }
343
-
344
14
  static VALUE *
345
15
  hash_store(VALUE hash, const char *key , size_t keylen, VALUE val)
346
16
  {
@@ -14,7 +14,7 @@ static const char DEF_CONTENT_KEY[] = "content";
14
14
  rb_str_cat((v), (const char *) (s), (l));
15
15
 
16
16
  #define SAVE_VALUE(lv, v , s, l) \
17
- xh_log_trace2("save value: [%.*s]", l, s); \
17
+ xh_log_trace2("save value: [%.*s]", (int) (l), s); \
18
18
  if ( RTEST(v) ) { \
19
19
  xh_log_trace0("add to array"); \
20
20
  /* get array if value is reference to array */ \
@@ -64,7 +64,7 @@ static const char DEF_CONTENT_KEY[] = "content";
64
64
  (s) = NULL;
65
65
 
66
66
  #define OPEN_TAG(s, l) \
67
- xh_log_trace2("new tag: [%.*s]", l, s); \
67
+ xh_log_trace2("new tag: [%.*s]", (int) (l), s); \
68
68
  if (real_depth == 0) { \
69
69
  if (flags & XH_X2H_ROOT_FOUND) goto INVALID_XML; \
70
70
  flags |= XH_X2H_ROOT_FOUND; \
@@ -72,7 +72,7 @@ static const char DEF_CONTENT_KEY[] = "content";
72
72
  if (XH_X2H_FILTER_SEARCH(flags)) { \
73
73
  xh_x2h_xpath_update(ctx->xpath, s, l); \
74
74
  if (xh_x2h_match_node(ctx->xpath, xh_strlen(ctx->xpath), ctx->opts.filter.expr)) {\
75
- xh_log_trace2("match node: [%.*s]", l, s); \
75
+ xh_log_trace2("match node: [%.*s]", (int) (l), s); \
76
76
  ctx->hash = hash_new(); \
77
77
  nodes[0].lval = lval = &ctx->hash; \
78
78
  depth = 0; \
@@ -132,7 +132,7 @@ static const char DEF_CONTENT_KEY[] = "content";
132
132
  }
133
133
 
134
134
  #define NEW_XML_DECL_ATTRIBUTE(k, kl, v, vl) \
135
- xh_log_trace4("new xml decl attr name: [%.*s] value: [%.*s]", kl, k, vl, v);\
135
+ xh_log_trace4("new xml decl attr name: [%.*s] value: [%.*s]", (int) (kl), k, (int) (vl), v);\
136
136
  /* save encoding parameter to converter context if param found */ \
137
137
  if ((kl) == (sizeof("encoding") - 1) && \
138
138
  xh_strncmp((k), XH_CHAR_CAST "encoding", sizeof("encoding") - 1) == 0) {\
@@ -176,7 +176,7 @@ static const char DEF_CONTENT_KEY[] = "content";
176
176
  } \
177
177
 
178
178
  #define NEW_TEXT(s, l) \
179
- xh_log_trace2("new text: [%.*s]", l, s); \
179
+ xh_log_trace2("new text: [%.*s]", (int) (l), s); \
180
180
  if (real_depth == 0) goto INVALID_XML; \
181
181
  if (!XH_X2H_FILTER_SEARCH(flags)) { \
182
182
  _NEW_TEXT(s, l) \
@@ -349,7 +349,7 @@ XH_PPCAT(loop, _SEARCH_ATTRIBUTES_LOOP): \
349
349
  DO(XH_PPCAT(loop, _PARSE_ATTR_NAME)) \
350
350
  EXPECT_BLANK("end attr name") \
351
351
  end = cur - 1; \
352
- xh_log_trace2("attr name: [%.*s]", end - node, node);\
352
+ xh_log_trace2("attr name: [%.*s]", (int) (end - node), node);\
353
353
  \
354
354
  DO(XH_PPCAT(loop, _ATTR_SKIP_BLANK)) \
355
355
  EXPECT_CHAR("search attr value", '=') \
@@ -361,7 +361,7 @@ XH_PPCAT(loop, _SEARCH_ATTRIBUTES_LOOP): \
361
361
  goto INVALID_XML; \
362
362
  EXPECT_CHAR("end attr name", '=') \
363
363
  end = cur - 1; \
364
- xh_log_trace2("attr name: [%.*s]", end - node, node);\
364
+ xh_log_trace2("attr name: [%.*s]", (int) (end - node), node);\
365
365
  \
366
366
  XH_PPCAT(loop, _SEARCH_ATTRIBUTE_VALUE): \
367
367
  DO(XH_PPCAT(loop, _PARSE_ATTR_VALUE)) \
@@ -571,7 +571,7 @@ XH_PPCAT(loop, _SEARCH_ATTRIBUTE_VALUE): \
571
571
  END(XH_PPCAT(loop, _REFERENCE)) \
572
572
  goto INVALID_REF; \
573
573
  XH_PPCAT(loop, _REFEFENCE_VALUE): \
574
- xh_log_trace1("parse reference value: %lu", code); \
574
+ xh_log_trace1("parse reference value: %u", code); \
575
575
  if (code == 0 || code > 0x10FFFF) goto INVALID_REF; \
576
576
  if (code >= 0x80) { \
577
577
  if (code < 0x800) { \
@@ -676,7 +676,7 @@ xh_x2h_match_node(xh_char_t *name, size_t name_len, VALUE expr)
676
676
  xh_char_t *expr_str;
677
677
  size_t expr_len;
678
678
 
679
- xh_log_trace2("match node: [%.*s]", name_len, name);
679
+ xh_log_trace2("match node: [%.*s]", (int) name_len, name);
680
680
 
681
681
  str = _NEW_STRING(name, name_len, TRUE);
682
682
 
@@ -703,7 +703,7 @@ xh_x2h_match_node(xh_char_t *name, size_t name_len, VALUE expr)
703
703
  xh_log_trace0("match string");
704
704
  expr_str = XH_CHAR_CAST RSTRING_PTR(expr);
705
705
  expr_len = RSTRING_LEN(expr);
706
- xh_log_trace2("expr: [%.*s]", expr_len, expr_str);
706
+ xh_log_trace2("expr: [%.*s]", (int) expr_len, expr_str);
707
707
  if (name_len == expr_len && !xh_strncmp(name, expr_str, name_len)) {
708
708
  xh_log_trace0("match TRUE");
709
709
  return TRUE;
@@ -935,10 +935,10 @@ xh_x2h_parse(xh_x2h_ctx_t *ctx, xh_reader_t *reader)
935
935
  if (ctx->end != NULL) ctx->end -= off;
936
936
  }
937
937
 
938
- xh_log_trace2("read buf: %.*s", len, buf);
938
+ xh_log_trace2("read buf: %.*s", (int) len, buf);
939
939
 
940
940
  do {
941
- xh_log_trace2("parse buf: %.*s", len, buf);
941
+ xh_log_trace2("parse buf: %.*s", (int) len, buf);
942
942
 
943
943
  xh_x2h_parse_chunk(ctx, &buf, &len, eof);
944
944