fast-xml 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/fastxml/xh.h ADDED
@@ -0,0 +1,58 @@
1
+ #ifndef _XH_H_
2
+ #define _XH_H_
3
+
4
+ #include "xh_config.h"
5
+ #include "xh_core.h"
6
+
7
+ #define XH_INTERNAL_ENCODING "utf-8"
8
+
9
+ typedef enum {
10
+ XH_METHOD_NATIVE = 0,
11
+ XH_METHOD_LX
12
+ } xh_method_t;
13
+
14
+ typedef struct {
15
+ xh_method_t method;
16
+ xh_bool_t use_attr;
17
+
18
+ /* native options */
19
+ xh_char_t version[XH_PARAM_LEN];
20
+ xh_char_t encoding[XH_PARAM_LEN];
21
+ xh_char_t root[XH_PARAM_LEN];
22
+ xh_bool_t utf8;
23
+ xh_bool_t xml_decl;
24
+ xh_bool_t keep_root;
25
+ xh_bool_t canonical;
26
+ xh_char_t content[XH_PARAM_LEN];
27
+ xh_int_t indent;
28
+ VALUE output;
29
+ #ifdef XH_HAVE_DOM
30
+ xh_bool_t doc;
31
+ #endif
32
+ xh_int_t max_depth;
33
+ xh_int_t buf_size;
34
+ xh_pattern_t force_array;
35
+ xh_bool_t force_content;
36
+ xh_bool_t merge_text;
37
+ xh_pattern_t filter;
38
+ VALUE cb;
39
+
40
+ /* LX options */
41
+ xh_char_t attr[XH_PARAM_LEN];
42
+ size_t attr_len;
43
+ xh_char_t text[XH_PARAM_LEN];
44
+ xh_bool_t trim;
45
+ xh_char_t cdata[XH_PARAM_LEN];
46
+ xh_char_t comm[XH_PARAM_LEN];
47
+ } xh_opts_t;
48
+
49
+ xh_opts_t *xh_create_opts(void);
50
+ void xh_destroy_opts(xh_opts_t *opts);
51
+ xh_bool_t xh_init_opts(xh_opts_t *opts);
52
+ void xh_parse_args(xh_opts_t *opts, xh_int_t *nparam, xh_int_t argc, VALUE *argv);
53
+ void xh_copy_opts(xh_opts_t *dst, xh_opts_t *src);
54
+ void *xh_get_obj_param(xh_int_t *nparam, xh_int_t argc, VALUE *argv, const char *class);
55
+ VALUE xh_get_hash_param(xh_int_t *nparam, xh_int_t argc, VALUE *argv);
56
+ void xh_merge_opts(xh_opts_t *ctx_opts, xh_opts_t *opts, xh_int_t *nparam, xh_int_t argc, VALUE *argv);
57
+
58
+ #endif /* _XH_H_ */
@@ -0,0 +1,40 @@
1
+ #include "xh_config.h"
2
+ #include "xh_core.h"
3
+
4
+ void
5
+ xh_buffer_init(xh_buffer_t *buf, size_t size)
6
+ {
7
+ buf->start = buf->cur = malloc(size);
8
+ if (buf->start == NULL) {
9
+ rb_raise(rb_eNoMemError, "Memory allocation error");
10
+ }
11
+ buf->end = buf->start + size;
12
+
13
+ xh_log_debug2("buf: %p size: %lu", buf->start, size);
14
+ }
15
+
16
+ void
17
+ xh_buffer_grow(xh_buffer_t *buf, size_t inc)
18
+ {
19
+ size_t size, use;
20
+
21
+ if (inc <= (size_t) (buf->end - buf->cur)) {
22
+ return;
23
+ }
24
+
25
+ size = buf->end - buf->start;
26
+ use = buf->cur - buf->start;
27
+
28
+ xh_log_debug2("old buf: %p size: %lu", buf->start, size);
29
+
30
+ size += inc < size ? size : inc;
31
+
32
+ buf->start = realloc(buf->start, size);
33
+ if (buf->start == NULL) {
34
+ rb_raise(rb_eNoMemError, "Memory allocation error");
35
+ }
36
+ buf->cur = buf->start + use;
37
+ buf->end = buf->start + size;
38
+
39
+ xh_log_debug2("new buf: %p size: %lu", buf->start, size);
40
+ }
@@ -0,0 +1,38 @@
1
+ #ifndef _XH_BUFFER_H_
2
+ #define _XH_BUFFER_H_
3
+
4
+ #include "xh_config.h"
5
+ #include "xh_core.h"
6
+
7
+ typedef struct _xh_buffer_t xh_buffer_t;
8
+ struct _xh_buffer_t {
9
+ xh_char_t *start;
10
+ xh_char_t *cur;
11
+ xh_char_t *end;
12
+ };
13
+
14
+ void xh_buffer_init(xh_buffer_t *buf, size_t size);
15
+ void xh_buffer_grow(xh_buffer_t *buf, size_t inc);
16
+
17
+ XH_INLINE void
18
+ xh_buffer_destroy(xh_buffer_t *buf)
19
+ {
20
+ if (buf->start != NULL) {
21
+ xh_log_debug1("free enc buf: %p", buf->start);
22
+ free(buf->start);
23
+ }
24
+ }
25
+
26
+ #define xh_buffer_avail(b) ((b)->end - (b)->cur)
27
+ #define xh_buffer_use(b) ((b)->cur - (b)->start)
28
+ #define xh_buffer_start(b) ((b)->start)
29
+ #define xh_buffer_pos(b) ((b)->cur)
30
+ #define xh_buffer_end(b) ((b)->end)
31
+ #define xh_buffer_size(b) ((b)->end - (b)->start)
32
+ #define xh_buffer_reset(b) do { (b)->cur = (b)->start; } while (0)
33
+ #define xh_buffer_seek(b, p) (b)->cur = p
34
+ #define xh_buffer_seek_eof(b) (b)->cur = (b)->end
35
+ #define xh_buffer_seek_top(b) (b)->cur = (b)->start
36
+ #define xh_buffer_grow50(b) xh_buffer_grow((b), xh_buffer_size(b) / 2)
37
+
38
+ #endif /* _XH_BUFFER_H_ */
@@ -0,0 +1,97 @@
1
+ #ifndef _XH_BUFFER_HELPER_H_
2
+ #define _XH_BUFFER_HELPER_H_
3
+
4
+ #include "xh_config.h"
5
+ #include "xh_core.h"
6
+
7
+ #define XH_BUFFER_WRITE_LONG_STRING(b, s, l) \
8
+ memcpy(b->cur, s, l); \
9
+ b->cur += l;
10
+ #define XH_BUFFER_WRITE_SHORT_STRING(b, s, l) \
11
+ while (l--) { \
12
+ *b->cur++ = *s++; \
13
+ }
14
+ #define XH_BUFFER_WRITE_STRING(b, s, l) \
15
+ if (l < 17) { \
16
+ XH_BUFFER_WRITE_SHORT_STRING(b, s, l) \
17
+ } \
18
+ else { \
19
+ XH_BUFFER_WRITE_LONG_STRING(b, s, l) \
20
+ }
21
+ #define XH_BUFFER_WRITE_CHAR(b, c) \
22
+ *b->cur++ = c;
23
+ #define XH_BUFFER_WRITE_CHAR2(b, s) \
24
+ *((uint16_t *) b->cur) = *((uint16_t *) (s)); \
25
+ b->cur += 2;
26
+ #define XH_BUFFER_WRITE_CHAR3(b, s) \
27
+ XH_BUFFER_WRITE_CHAR2(b, s) \
28
+ XH_BUFFER_WRITE_CHAR(b, s[2])
29
+ #define XH_BUFFER_WRITE_CHAR4(b, s) \
30
+ *((uint32_t *) b->cur) = *((uint32_t *) (s)); \
31
+ b->cur += 4;
32
+ #define XH_BUFFER_WRITE_CHAR5(b, s) \
33
+ XH_BUFFER_WRITE_CHAR4(b, s) \
34
+ XH_BUFFER_WRITE_CHAR(b, s[4])
35
+ #define XH_BUFFER_WRITE_CHAR6(b, s) \
36
+ XH_BUFFER_WRITE_CHAR4(b, s) \
37
+ XH_BUFFER_WRITE_CHAR2(b, s + 4)
38
+ #define XH_BUFFER_WRITE_CHAR7(b, s) \
39
+ XH_BUFFER_WRITE_CHAR6(b, s) \
40
+ XH_BUFFER_WRITE_CHAR(b, s[6])
41
+ #define XH_BUFFER_WRITE_CHAR8(b, s) \
42
+ XH_BUFFER_WRITE_CHAR4(b, s) \
43
+ XH_BUFFER_WRITE_CHAR4(b, s + 4)
44
+ #define XH_BUFFER_WRITE_CHAR9(b, s) \
45
+ XH_BUFFER_WRITE_CHAR8(b, s) \
46
+ XH_BUFFER_WRITE_CHAR(b, s[8])
47
+ #define XH_BUFFER_WRITE_ESCAPE_STRING(b, s, l) \
48
+ while (l--) { \
49
+ switch (*b->cur = *s++) { \
50
+ case '\r': \
51
+ XH_BUFFER_WRITE_CHAR5(b, "&#13;") \
52
+ break; \
53
+ case '<': \
54
+ XH_BUFFER_WRITE_CHAR4(b, "&lt;") \
55
+ break; \
56
+ case '>': \
57
+ XH_BUFFER_WRITE_CHAR4(b, "&gt;") \
58
+ break; \
59
+ case '&': \
60
+ XH_BUFFER_WRITE_CHAR5(b, "&amp;") \
61
+ break; \
62
+ default: \
63
+ b->cur++; \
64
+ } \
65
+ }
66
+ #define XH_BUFFER_WRITE_ESCAPE_ATTR(b, s, l) \
67
+ while (l--) { \
68
+ switch (*b->cur = *s++) { \
69
+ case '\n': \
70
+ XH_BUFFER_WRITE_CHAR5(b, "&#10;") \
71
+ break; \
72
+ case '\r': \
73
+ XH_BUFFER_WRITE_CHAR5(b, "&#13;") \
74
+ break; \
75
+ case '\t': \
76
+ XH_BUFFER_WRITE_CHAR4(b, "&#9;") \
77
+ break; \
78
+ case '<': \
79
+ XH_BUFFER_WRITE_CHAR4(b, "&lt;") \
80
+ break; \
81
+ case '>': \
82
+ XH_BUFFER_WRITE_CHAR4(b, "&gt;") \
83
+ break; \
84
+ case '&': \
85
+ XH_BUFFER_WRITE_CHAR5(b, "&amp;") \
86
+ break; \
87
+ case '"': \
88
+ XH_BUFFER_WRITE_CHAR6(b, "&quot;") \
89
+ break; \
90
+ default: \
91
+ b->cur++; \
92
+ } \
93
+ }
94
+ #define XH_BUFFER_WRITE_CONSTANT(b, s) \
95
+ XH_BUFFER_WRITE_LONG_STRING(b, s, sizeof(s) - 1)
96
+
97
+ #endif /* _XH_BUFFER_HELPER_H_ */
@@ -0,0 +1,74 @@
1
+ #ifndef _XH_CONFIG_H_
2
+ #define _XH_CONFIG_H_
3
+
4
+ #include "ruby.h"
5
+ #if HAVE_RUBY_ENCODING_H
6
+ #include "ruby/encoding.h"
7
+ #endif
8
+ #include <stdint.h>
9
+ #include <sys/stat.h>
10
+ #include <fcntl.h>
11
+ #include <stdio.h>
12
+ #include <stdlib.h>
13
+ #include <string.h>
14
+ #include <errno.h>
15
+ #ifdef WIN32
16
+ #include <windows.h>
17
+ #include <io.h>
18
+ #else
19
+ #include <sys/mman.h>
20
+ #endif
21
+
22
+ #if __GNUC__ >= 3
23
+ # define expect(expr,value) __builtin_expect ((expr), (value))
24
+ # define XH_INLINE static inline
25
+ # define XH_UNUSED(v) x __attribute__((unused))
26
+ #else
27
+ # define expect(expr,value) (expr)
28
+ # define XH_INLINE static
29
+ # define XH_UNUSED(v) v
30
+ #endif
31
+
32
+ #ifdef _MSC_VER
33
+ #define _CRT_SECURE_NO_WARNINGS
34
+ #define _CRT_NONSTDC_NO_DEPRECATE
35
+ #define strncasecmp _strnicmp
36
+ #define strcasecmp _stricmp
37
+ #endif
38
+
39
+ #define expect_false(expr) expect ((expr) != 0, 0)
40
+ #define expect_true(expr) expect ((expr) != 0, 1)
41
+
42
+ typedef uintptr_t xh_bool_t;
43
+ typedef uintptr_t xh_uint_t;
44
+ typedef intptr_t xh_int_t;
45
+ typedef u_char xh_char_t;
46
+
47
+ #define XH_CHAR_CAST (xh_char_t *)
48
+ #define XH_EMPTY_STRING (XH_CHAR_CAST "")
49
+
50
+ #if defined(XH_HAVE_ICONV) || defined(XH_HAVE_ICU)
51
+ #define XH_HAVE_ENCODER
52
+ #endif
53
+
54
+ #ifdef HAVE_RUBY_ENCODING_H
55
+ #define XH_FORCE_UTF8(s) rb_enc_set_index(s, rb_utf8_encindex())
56
+ #else
57
+ #define XH_FORCE_UTF8(s)
58
+ #endif
59
+
60
+ #if defined(XH_HAVE_XML2) && defined(XH_HAVE_XML__LIBXML)
61
+ #define XH_HAVE_DOM
62
+ #endif
63
+
64
+ #ifdef XH_HAVE_DOM
65
+ #include <libxml/parser.h>
66
+ #endif
67
+
68
+ #define XH_HAVE_MMAP
69
+
70
+ extern VALUE xh_module;
71
+ extern VALUE xh_parse_error_class;
72
+ extern ID xh_id_next;
73
+
74
+ #endif /* _XH_CONFIG_H_ */
@@ -0,0 +1,53 @@
1
+ #ifndef _XH_CORE_H_
2
+ #define _XH_CORE_H_
3
+
4
+ /*
5
+ * Concatenate preprocessor tokens A and B without expanding macro definitions
6
+ * (however, if invoked from a macro, macro arguments are expanded).
7
+ */
8
+ #define XH_PPCAT_NX(A, B) A ## B
9
+
10
+ /*
11
+ * Concatenate preprocessor tokens A and B after macro-expanding them.
12
+ */
13
+ #define XH_PPCAT(A, B) XH_PPCAT_NX(A, B)
14
+
15
+ /*
16
+ * Turn A into a string literal without expanding macro definitions
17
+ * (however, if invoked from a macro, macro arguments are expanded).
18
+ */
19
+ #define XH_STRINGIZE_NX(A) #A
20
+
21
+ /*
22
+ * Turn A into a string literal after macro-expanding it.
23
+ */
24
+ #define XH_STRINGIZE(A) XH_STRINGIZE_NX(A)
25
+
26
+ #ifndef FALSE
27
+ #define FALSE (0)
28
+ #endif
29
+
30
+ #ifndef TRUE
31
+ #define TRUE (1)
32
+ #endif
33
+
34
+ #include "xh_log.h"
35
+ #include "xh_string.h"
36
+ #include "xh_sort.h"
37
+ #include "xh_stack.h"
38
+ //#include "xh_stash.h"
39
+ #include "xh_param.h"
40
+ #include "xh_buffer_helper.h"
41
+ #include "xh_buffer.h"
42
+ #include "xh_ruby_buffer.h"
43
+ #include "xh_encoder.h"
44
+ //#include "xh_reader.h"
45
+ #include "xh_writer.h"
46
+ #include "xh.h"
47
+ #include "xh_h2x.h"
48
+ #include "xh_xml.h"
49
+ /*
50
+ #include "xh_x2h.h"
51
+ #include "xh_dom.h"
52
+ */
53
+ #endif /* _XH_CORE_H_ */
@@ -0,0 +1,193 @@
1
+ #include "xh_config.h"
2
+ #include "xh_core.h"
3
+
4
+ #ifdef XH_HAVE_ENCODER
5
+
6
+ #ifdef XH_HAVE_ICU
7
+ static void
8
+ xh_encoder_uconv_destroy(UConverter *uconv)
9
+ {
10
+ if (uconv != NULL) {
11
+ ucnv_close(uconv);
12
+ }
13
+ }
14
+
15
+ static UConverter *
16
+ xh_encoder_uconv_create(xh_char_t *encoding, xh_bool_t toUnicode)
17
+ {
18
+ UConverter *uconv;
19
+ UErrorCode status = U_ZERO_ERROR;
20
+
21
+ uconv = ucnv_open((char *) encoding, &status);
22
+ if ( U_FAILURE(status) ) {
23
+ return NULL;
24
+ }
25
+
26
+ if (toUnicode) {
27
+ ucnv_setToUCallBack(uconv, UCNV_TO_U_CALLBACK_STOP,
28
+ NULL, NULL, NULL, &status);
29
+ }
30
+ else {
31
+ ucnv_setFromUCallBack(uconv, UCNV_FROM_U_CALLBACK_STOP,
32
+ NULL, NULL, NULL, &status);
33
+ }
34
+
35
+ return uconv;
36
+ }
37
+ #endif
38
+
39
+ void
40
+ xh_encoder_destroy(xh_encoder_t *encoder)
41
+ {
42
+ if (encoder != NULL) {
43
+ #ifdef XH_HAVE_ICONV
44
+ if (encoder->iconv != NULL) {
45
+ xh_log_debug0("destroy iconv encoder");
46
+ iconv_close(encoder->iconv);
47
+ }
48
+ #endif
49
+
50
+ #ifdef XH_HAVE_ICU
51
+ if (encoder->uconv_from != NULL) {
52
+ xh_log_debug0("destroy icu encoder");
53
+ xh_encoder_uconv_destroy(encoder->uconv_from);
54
+ xh_encoder_uconv_destroy(encoder->uconv_to);
55
+ }
56
+ #endif
57
+ free(encoder);
58
+ }
59
+ }
60
+
61
+ xh_encoder_t *
62
+ xh_encoder_create(xh_char_t *tocode, xh_char_t *fromcode)
63
+ {
64
+ xh_encoder_t *encoder;
65
+
66
+ encoder = malloc(sizeof(xh_encoder_t));
67
+ if (encoder == NULL) {
68
+ return NULL;
69
+ }
70
+ memset(encoder, 0, sizeof(xh_encoder_t));
71
+
72
+ xh_str_copy(encoder->tocode, tocode, XH_PARAM_LEN);
73
+ xh_str_copy(encoder->fromcode, fromcode, XH_PARAM_LEN);
74
+
75
+ #ifdef XH_HAVE_ICONV
76
+ xh_log_debug2("create iconv encoder from: '%s' to: '%s'", fromcode, tocode);
77
+ encoder->iconv = iconv_open((char *) tocode, (char *) fromcode);
78
+ if (encoder->iconv != (iconv_t) -1) {
79
+ encoder->type = XH_ENC_ICONV;
80
+ return encoder;
81
+ }
82
+ encoder->iconv = NULL;
83
+ #endif
84
+
85
+ #ifdef XH_HAVE_ICU
86
+ xh_log_debug2("create icu encoder from: '%s' to: '%s'", fromcode, tocode);
87
+ encoder->uconv_to = xh_encoder_uconv_create(tocode, 1);
88
+ if (encoder->uconv_to != NULL) {
89
+ encoder->uconv_from = xh_encoder_uconv_create(fromcode, 0);
90
+ if (encoder->uconv_from != NULL) {
91
+ encoder->type = XH_ENC_ICU;
92
+ encoder->pivotSource = encoder->pivotTarget = encoder->pivotStart = encoder->pivotBuffer;
93
+ encoder->pivotLimit = encoder->pivotBuffer + sizeof(encoder->pivotBuffer) / sizeof(encoder->pivotBuffer[0]);
94
+ return encoder;
95
+ }
96
+ }
97
+ #endif
98
+
99
+ xh_encoder_destroy(encoder);
100
+
101
+ return NULL;
102
+ }
103
+
104
+ void
105
+ xh_encoder_encode_ruby_buffer(xh_encoder_t *encoder, xh_ruby_buffer_t *main_buf, xh_ruby_buffer_t *enc_buf)
106
+ {
107
+ xh_char_t *src = main_buf->start;
108
+
109
+ #ifdef XH_HAVE_ICONV
110
+ if (encoder->type == XH_ENC_ICONV) {
111
+ size_t in_left = main_buf->cur - main_buf->start;
112
+ size_t out_left = enc_buf->end - enc_buf->cur;
113
+
114
+ size_t converted = iconv(encoder->iconv, (char **) &src, &in_left, (char **) &enc_buf->cur, &out_left);
115
+ if (converted == (size_t) -1) {
116
+ rb_raise(xh_parse_error_class, "Encoding error");
117
+ }
118
+ return;
119
+ }
120
+ #endif
121
+
122
+ #ifdef XH_HAVE_ICU
123
+ UErrorCode err = U_ZERO_ERROR;
124
+ ucnv_convertEx(encoder->uconv_to, encoder->uconv_from, (char **) &enc_buf->cur, (char *) enc_buf->end,
125
+ (const char **) &src, (char *) main_buf->cur, NULL, NULL, NULL, NULL,
126
+ FALSE, TRUE, &err);
127
+
128
+ if ( U_FAILURE(err) ) {
129
+ rb_raise(xh_parse_error_class, "Encoding error: %d", err);
130
+ }
131
+ #endif
132
+ }
133
+
134
+ void
135
+ xh_encoder_encode_string(xh_encoder_t *encoder, xh_char_t **src, size_t *src_left, xh_char_t **dst, size_t *dst_left)
136
+ {
137
+ #ifdef XH_HAVE_ICONV
138
+ if (encoder->type == XH_ENC_ICONV) {
139
+ size_t converted = iconv(encoder->iconv, (char **) src, src_left, (char **) dst, dst_left);
140
+ if (converted == (size_t) -1) {
141
+ switch (errno) {
142
+ case EILSEQ:
143
+ rb_raise(xh_parse_error_class, "Encoding error: invalid char found");
144
+ case E2BIG:
145
+ encoder->state = XH_ENC_BUFFER_OVERFLOW;
146
+ break;
147
+ case EINVAL:
148
+ encoder->state = XH_ENC_TRUNCATED_CHAR_FOUND;
149
+ break;
150
+ default:
151
+ rb_raise(xh_parse_error_class, "Encoding error");
152
+ }
153
+ }
154
+ else {
155
+ encoder->state = XH_ENC_OK;
156
+ }
157
+ return;
158
+ }
159
+ #endif
160
+
161
+ #ifdef XH_HAVE_ICU
162
+ UErrorCode err = U_ZERO_ERROR;
163
+ xh_char_t *old_src = *src;
164
+ xh_char_t *old_dst = *dst;
165
+
166
+ ucnv_convertEx(encoder->uconv_to, encoder->uconv_from, (char **) dst, (char *) (*dst + *dst_left),
167
+ (const char **) src, (char *) (*src + *src_left), encoder->pivotStart, &encoder->pivotSource, &encoder->pivotTarget, encoder->pivotLimit,
168
+ FALSE, FALSE, &err);
169
+
170
+ *src_left -= *src - old_src;
171
+ *dst_left -= *dst - old_dst;
172
+
173
+ if ( U_FAILURE(err) ) {
174
+ switch (err) {
175
+ case U_INVALID_CHAR_FOUND:
176
+ rb_raise(xh_parse_error_class, "Encoding error: invalid char found");
177
+ case U_BUFFER_OVERFLOW_ERROR:
178
+ encoder->state = XH_ENC_BUFFER_OVERFLOW;
179
+ break;
180
+ case U_TRUNCATED_CHAR_FOUND:
181
+ encoder->state = XH_ENC_TRUNCATED_CHAR_FOUND;
182
+ break;
183
+ default:
184
+ rb_raise(xh_parse_error_class, "Encoding error: %d", err);
185
+ }
186
+ }
187
+ else {
188
+ encoder->state = XH_ENC_OK;
189
+ }
190
+ #endif
191
+ }
192
+
193
+ #endif /* XH_HAVE_ENCODER */