fast-xml 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/ext/fastxml/xh.h ADDED
@@ -0,0 +1,58 @@
1
+ #ifndef _XH_H_
2
+ #define _XH_H_
3
+
4
+ #include "xh_config.h"
5
+ #include "xh_core.h"
6
+
7
+ #define XH_INTERNAL_ENCODING "utf-8"
8
+
9
+ typedef enum {
10
+ XH_METHOD_NATIVE = 0,
11
+ XH_METHOD_LX
12
+ } xh_method_t;
13
+
14
+ typedef struct {
15
+ xh_method_t method;
16
+ xh_bool_t use_attr;
17
+
18
+ /* native options */
19
+ xh_char_t version[XH_PARAM_LEN];
20
+ xh_char_t encoding[XH_PARAM_LEN];
21
+ xh_char_t root[XH_PARAM_LEN];
22
+ xh_bool_t utf8;
23
+ xh_bool_t xml_decl;
24
+ xh_bool_t keep_root;
25
+ xh_bool_t canonical;
26
+ xh_char_t content[XH_PARAM_LEN];
27
+ xh_int_t indent;
28
+ VALUE output;
29
+ #ifdef XH_HAVE_DOM
30
+ xh_bool_t doc;
31
+ #endif
32
+ xh_int_t max_depth;
33
+ xh_int_t buf_size;
34
+ xh_pattern_t force_array;
35
+ xh_bool_t force_content;
36
+ xh_bool_t merge_text;
37
+ xh_pattern_t filter;
38
+ VALUE cb;
39
+
40
+ /* LX options */
41
+ xh_char_t attr[XH_PARAM_LEN];
42
+ size_t attr_len;
43
+ xh_char_t text[XH_PARAM_LEN];
44
+ xh_bool_t trim;
45
+ xh_char_t cdata[XH_PARAM_LEN];
46
+ xh_char_t comm[XH_PARAM_LEN];
47
+ } xh_opts_t;
48
+
49
+ xh_opts_t *xh_create_opts(void);
50
+ void xh_destroy_opts(xh_opts_t *opts);
51
+ xh_bool_t xh_init_opts(xh_opts_t *opts);
52
+ void xh_parse_args(xh_opts_t *opts, xh_int_t *nparam, xh_int_t argc, VALUE *argv);
53
+ void xh_copy_opts(xh_opts_t *dst, xh_opts_t *src);
54
+ void *xh_get_obj_param(xh_int_t *nparam, xh_int_t argc, VALUE *argv, const char *class);
55
+ VALUE xh_get_hash_param(xh_int_t *nparam, xh_int_t argc, VALUE *argv);
56
+ void xh_merge_opts(xh_opts_t *ctx_opts, xh_opts_t *opts, xh_int_t *nparam, xh_int_t argc, VALUE *argv);
57
+
58
+ #endif /* _XH_H_ */
@@ -0,0 +1,40 @@
1
+ #include "xh_config.h"
2
+ #include "xh_core.h"
3
+
4
+ void
5
+ xh_buffer_init(xh_buffer_t *buf, size_t size)
6
+ {
7
+ buf->start = buf->cur = malloc(size);
8
+ if (buf->start == NULL) {
9
+ rb_raise(rb_eNoMemError, "Memory allocation error");
10
+ }
11
+ buf->end = buf->start + size;
12
+
13
+ xh_log_debug2("buf: %p size: %lu", buf->start, size);
14
+ }
15
+
16
+ void
17
+ xh_buffer_grow(xh_buffer_t *buf, size_t inc)
18
+ {
19
+ size_t size, use;
20
+
21
+ if (inc <= (size_t) (buf->end - buf->cur)) {
22
+ return;
23
+ }
24
+
25
+ size = buf->end - buf->start;
26
+ use = buf->cur - buf->start;
27
+
28
+ xh_log_debug2("old buf: %p size: %lu", buf->start, size);
29
+
30
+ size += inc < size ? size : inc;
31
+
32
+ buf->start = realloc(buf->start, size);
33
+ if (buf->start == NULL) {
34
+ rb_raise(rb_eNoMemError, "Memory allocation error");
35
+ }
36
+ buf->cur = buf->start + use;
37
+ buf->end = buf->start + size;
38
+
39
+ xh_log_debug2("new buf: %p size: %lu", buf->start, size);
40
+ }
@@ -0,0 +1,38 @@
1
+ #ifndef _XH_BUFFER_H_
2
+ #define _XH_BUFFER_H_
3
+
4
+ #include "xh_config.h"
5
+ #include "xh_core.h"
6
+
7
+ typedef struct _xh_buffer_t xh_buffer_t;
8
+ struct _xh_buffer_t {
9
+ xh_char_t *start;
10
+ xh_char_t *cur;
11
+ xh_char_t *end;
12
+ };
13
+
14
+ void xh_buffer_init(xh_buffer_t *buf, size_t size);
15
+ void xh_buffer_grow(xh_buffer_t *buf, size_t inc);
16
+
17
+ XH_INLINE void
18
+ xh_buffer_destroy(xh_buffer_t *buf)
19
+ {
20
+ if (buf->start != NULL) {
21
+ xh_log_debug1("free enc buf: %p", buf->start);
22
+ free(buf->start);
23
+ }
24
+ }
25
+
26
+ #define xh_buffer_avail(b) ((b)->end - (b)->cur)
27
+ #define xh_buffer_use(b) ((b)->cur - (b)->start)
28
+ #define xh_buffer_start(b) ((b)->start)
29
+ #define xh_buffer_pos(b) ((b)->cur)
30
+ #define xh_buffer_end(b) ((b)->end)
31
+ #define xh_buffer_size(b) ((b)->end - (b)->start)
32
+ #define xh_buffer_reset(b) do { (b)->cur = (b)->start; } while (0)
33
+ #define xh_buffer_seek(b, p) (b)->cur = p
34
+ #define xh_buffer_seek_eof(b) (b)->cur = (b)->end
35
+ #define xh_buffer_seek_top(b) (b)->cur = (b)->start
36
+ #define xh_buffer_grow50(b) xh_buffer_grow((b), xh_buffer_size(b) / 2)
37
+
38
+ #endif /* _XH_BUFFER_H_ */
@@ -0,0 +1,97 @@
1
+ #ifndef _XH_BUFFER_HELPER_H_
2
+ #define _XH_BUFFER_HELPER_H_
3
+
4
+ #include "xh_config.h"
5
+ #include "xh_core.h"
6
+
7
+ #define XH_BUFFER_WRITE_LONG_STRING(b, s, l) \
8
+ memcpy(b->cur, s, l); \
9
+ b->cur += l;
10
+ #define XH_BUFFER_WRITE_SHORT_STRING(b, s, l) \
11
+ while (l--) { \
12
+ *b->cur++ = *s++; \
13
+ }
14
+ #define XH_BUFFER_WRITE_STRING(b, s, l) \
15
+ if (l < 17) { \
16
+ XH_BUFFER_WRITE_SHORT_STRING(b, s, l) \
17
+ } \
18
+ else { \
19
+ XH_BUFFER_WRITE_LONG_STRING(b, s, l) \
20
+ }
21
+ #define XH_BUFFER_WRITE_CHAR(b, c) \
22
+ *b->cur++ = c;
23
+ #define XH_BUFFER_WRITE_CHAR2(b, s) \
24
+ *((uint16_t *) b->cur) = *((uint16_t *) (s)); \
25
+ b->cur += 2;
26
+ #define XH_BUFFER_WRITE_CHAR3(b, s) \
27
+ XH_BUFFER_WRITE_CHAR2(b, s) \
28
+ XH_BUFFER_WRITE_CHAR(b, s[2])
29
+ #define XH_BUFFER_WRITE_CHAR4(b, s) \
30
+ *((uint32_t *) b->cur) = *((uint32_t *) (s)); \
31
+ b->cur += 4;
32
+ #define XH_BUFFER_WRITE_CHAR5(b, s) \
33
+ XH_BUFFER_WRITE_CHAR4(b, s) \
34
+ XH_BUFFER_WRITE_CHAR(b, s[4])
35
+ #define XH_BUFFER_WRITE_CHAR6(b, s) \
36
+ XH_BUFFER_WRITE_CHAR4(b, s) \
37
+ XH_BUFFER_WRITE_CHAR2(b, s + 4)
38
+ #define XH_BUFFER_WRITE_CHAR7(b, s) \
39
+ XH_BUFFER_WRITE_CHAR6(b, s) \
40
+ XH_BUFFER_WRITE_CHAR(b, s[6])
41
+ #define XH_BUFFER_WRITE_CHAR8(b, s) \
42
+ XH_BUFFER_WRITE_CHAR4(b, s) \
43
+ XH_BUFFER_WRITE_CHAR4(b, s + 4)
44
+ #define XH_BUFFER_WRITE_CHAR9(b, s) \
45
+ XH_BUFFER_WRITE_CHAR8(b, s) \
46
+ XH_BUFFER_WRITE_CHAR(b, s[8])
47
+ #define XH_BUFFER_WRITE_ESCAPE_STRING(b, s, l) \
48
+ while (l--) { \
49
+ switch (*b->cur = *s++) { \
50
+ case '\r': \
51
+ XH_BUFFER_WRITE_CHAR5(b, "&#13;") \
52
+ break; \
53
+ case '<': \
54
+ XH_BUFFER_WRITE_CHAR4(b, "&lt;") \
55
+ break; \
56
+ case '>': \
57
+ XH_BUFFER_WRITE_CHAR4(b, "&gt;") \
58
+ break; \
59
+ case '&': \
60
+ XH_BUFFER_WRITE_CHAR5(b, "&amp;") \
61
+ break; \
62
+ default: \
63
+ b->cur++; \
64
+ } \
65
+ }
66
+ #define XH_BUFFER_WRITE_ESCAPE_ATTR(b, s, l) \
67
+ while (l--) { \
68
+ switch (*b->cur = *s++) { \
69
+ case '\n': \
70
+ XH_BUFFER_WRITE_CHAR5(b, "&#10;") \
71
+ break; \
72
+ case '\r': \
73
+ XH_BUFFER_WRITE_CHAR5(b, "&#13;") \
74
+ break; \
75
+ case '\t': \
76
+ XH_BUFFER_WRITE_CHAR4(b, "&#9;") \
77
+ break; \
78
+ case '<': \
79
+ XH_BUFFER_WRITE_CHAR4(b, "&lt;") \
80
+ break; \
81
+ case '>': \
82
+ XH_BUFFER_WRITE_CHAR4(b, "&gt;") \
83
+ break; \
84
+ case '&': \
85
+ XH_BUFFER_WRITE_CHAR5(b, "&amp;") \
86
+ break; \
87
+ case '"': \
88
+ XH_BUFFER_WRITE_CHAR6(b, "&quot;") \
89
+ break; \
90
+ default: \
91
+ b->cur++; \
92
+ } \
93
+ }
94
+ #define XH_BUFFER_WRITE_CONSTANT(b, s) \
95
+ XH_BUFFER_WRITE_LONG_STRING(b, s, sizeof(s) - 1)
96
+
97
+ #endif /* _XH_BUFFER_HELPER_H_ */
@@ -0,0 +1,74 @@
1
+ #ifndef _XH_CONFIG_H_
2
+ #define _XH_CONFIG_H_
3
+
4
+ #include "ruby.h"
5
+ #if HAVE_RUBY_ENCODING_H
6
+ #include "ruby/encoding.h"
7
+ #endif
8
+ #include <stdint.h>
9
+ #include <sys/stat.h>
10
+ #include <fcntl.h>
11
+ #include <stdio.h>
12
+ #include <stdlib.h>
13
+ #include <string.h>
14
+ #include <errno.h>
15
+ #ifdef WIN32
16
+ #include <windows.h>
17
+ #include <io.h>
18
+ #else
19
+ #include <sys/mman.h>
20
+ #endif
21
+
22
+ #if __GNUC__ >= 3
23
+ # define expect(expr,value) __builtin_expect ((expr), (value))
24
+ # define XH_INLINE static inline
25
+ # define XH_UNUSED(v) x __attribute__((unused))
26
+ #else
27
+ # define expect(expr,value) (expr)
28
+ # define XH_INLINE static
29
+ # define XH_UNUSED(v) v
30
+ #endif
31
+
32
+ #ifdef _MSC_VER
33
+ #define _CRT_SECURE_NO_WARNINGS
34
+ #define _CRT_NONSTDC_NO_DEPRECATE
35
+ #define strncasecmp _strnicmp
36
+ #define strcasecmp _stricmp
37
+ #endif
38
+
39
+ #define expect_false(expr) expect ((expr) != 0, 0)
40
+ #define expect_true(expr) expect ((expr) != 0, 1)
41
+
42
+ typedef uintptr_t xh_bool_t;
43
+ typedef uintptr_t xh_uint_t;
44
+ typedef intptr_t xh_int_t;
45
+ typedef u_char xh_char_t;
46
+
47
+ #define XH_CHAR_CAST (xh_char_t *)
48
+ #define XH_EMPTY_STRING (XH_CHAR_CAST "")
49
+
50
+ #if defined(XH_HAVE_ICONV) || defined(XH_HAVE_ICU)
51
+ #define XH_HAVE_ENCODER
52
+ #endif
53
+
54
+ #ifdef HAVE_RUBY_ENCODING_H
55
+ #define XH_FORCE_UTF8(s) rb_enc_set_index(s, rb_utf8_encindex())
56
+ #else
57
+ #define XH_FORCE_UTF8(s)
58
+ #endif
59
+
60
+ #if defined(XH_HAVE_XML2) && defined(XH_HAVE_XML__LIBXML)
61
+ #define XH_HAVE_DOM
62
+ #endif
63
+
64
+ #ifdef XH_HAVE_DOM
65
+ #include <libxml/parser.h>
66
+ #endif
67
+
68
+ #define XH_HAVE_MMAP
69
+
70
+ extern VALUE xh_module;
71
+ extern VALUE xh_parse_error_class;
72
+ extern ID xh_id_next;
73
+
74
+ #endif /* _XH_CONFIG_H_ */
@@ -0,0 +1,53 @@
1
+ #ifndef _XH_CORE_H_
2
+ #define _XH_CORE_H_
3
+
4
+ /*
5
+ * Concatenate preprocessor tokens A and B without expanding macro definitions
6
+ * (however, if invoked from a macro, macro arguments are expanded).
7
+ */
8
+ #define XH_PPCAT_NX(A, B) A ## B
9
+
10
+ /*
11
+ * Concatenate preprocessor tokens A and B after macro-expanding them.
12
+ */
13
+ #define XH_PPCAT(A, B) XH_PPCAT_NX(A, B)
14
+
15
+ /*
16
+ * Turn A into a string literal without expanding macro definitions
17
+ * (however, if invoked from a macro, macro arguments are expanded).
18
+ */
19
+ #define XH_STRINGIZE_NX(A) #A
20
+
21
+ /*
22
+ * Turn A into a string literal after macro-expanding it.
23
+ */
24
+ #define XH_STRINGIZE(A) XH_STRINGIZE_NX(A)
25
+
26
+ #ifndef FALSE
27
+ #define FALSE (0)
28
+ #endif
29
+
30
+ #ifndef TRUE
31
+ #define TRUE (1)
32
+ #endif
33
+
34
+ #include "xh_log.h"
35
+ #include "xh_string.h"
36
+ #include "xh_sort.h"
37
+ #include "xh_stack.h"
38
+ //#include "xh_stash.h"
39
+ #include "xh_param.h"
40
+ #include "xh_buffer_helper.h"
41
+ #include "xh_buffer.h"
42
+ #include "xh_ruby_buffer.h"
43
+ #include "xh_encoder.h"
44
+ //#include "xh_reader.h"
45
+ #include "xh_writer.h"
46
+ #include "xh.h"
47
+ #include "xh_h2x.h"
48
+ #include "xh_xml.h"
49
+ /*
50
+ #include "xh_x2h.h"
51
+ #include "xh_dom.h"
52
+ */
53
+ #endif /* _XH_CORE_H_ */
@@ -0,0 +1,193 @@
1
+ #include "xh_config.h"
2
+ #include "xh_core.h"
3
+
4
+ #ifdef XH_HAVE_ENCODER
5
+
6
+ #ifdef XH_HAVE_ICU
7
+ static void
8
+ xh_encoder_uconv_destroy(UConverter *uconv)
9
+ {
10
+ if (uconv != NULL) {
11
+ ucnv_close(uconv);
12
+ }
13
+ }
14
+
15
+ static UConverter *
16
+ xh_encoder_uconv_create(xh_char_t *encoding, xh_bool_t toUnicode)
17
+ {
18
+ UConverter *uconv;
19
+ UErrorCode status = U_ZERO_ERROR;
20
+
21
+ uconv = ucnv_open((char *) encoding, &status);
22
+ if ( U_FAILURE(status) ) {
23
+ return NULL;
24
+ }
25
+
26
+ if (toUnicode) {
27
+ ucnv_setToUCallBack(uconv, UCNV_TO_U_CALLBACK_STOP,
28
+ NULL, NULL, NULL, &status);
29
+ }
30
+ else {
31
+ ucnv_setFromUCallBack(uconv, UCNV_FROM_U_CALLBACK_STOP,
32
+ NULL, NULL, NULL, &status);
33
+ }
34
+
35
+ return uconv;
36
+ }
37
+ #endif
38
+
39
+ void
40
+ xh_encoder_destroy(xh_encoder_t *encoder)
41
+ {
42
+ if (encoder != NULL) {
43
+ #ifdef XH_HAVE_ICONV
44
+ if (encoder->iconv != NULL) {
45
+ xh_log_debug0("destroy iconv encoder");
46
+ iconv_close(encoder->iconv);
47
+ }
48
+ #endif
49
+
50
+ #ifdef XH_HAVE_ICU
51
+ if (encoder->uconv_from != NULL) {
52
+ xh_log_debug0("destroy icu encoder");
53
+ xh_encoder_uconv_destroy(encoder->uconv_from);
54
+ xh_encoder_uconv_destroy(encoder->uconv_to);
55
+ }
56
+ #endif
57
+ free(encoder);
58
+ }
59
+ }
60
+
61
+ xh_encoder_t *
62
+ xh_encoder_create(xh_char_t *tocode, xh_char_t *fromcode)
63
+ {
64
+ xh_encoder_t *encoder;
65
+
66
+ encoder = malloc(sizeof(xh_encoder_t));
67
+ if (encoder == NULL) {
68
+ return NULL;
69
+ }
70
+ memset(encoder, 0, sizeof(xh_encoder_t));
71
+
72
+ xh_str_copy(encoder->tocode, tocode, XH_PARAM_LEN);
73
+ xh_str_copy(encoder->fromcode, fromcode, XH_PARAM_LEN);
74
+
75
+ #ifdef XH_HAVE_ICONV
76
+ xh_log_debug2("create iconv encoder from: '%s' to: '%s'", fromcode, tocode);
77
+ encoder->iconv = iconv_open((char *) tocode, (char *) fromcode);
78
+ if (encoder->iconv != (iconv_t) -1) {
79
+ encoder->type = XH_ENC_ICONV;
80
+ return encoder;
81
+ }
82
+ encoder->iconv = NULL;
83
+ #endif
84
+
85
+ #ifdef XH_HAVE_ICU
86
+ xh_log_debug2("create icu encoder from: '%s' to: '%s'", fromcode, tocode);
87
+ encoder->uconv_to = xh_encoder_uconv_create(tocode, 1);
88
+ if (encoder->uconv_to != NULL) {
89
+ encoder->uconv_from = xh_encoder_uconv_create(fromcode, 0);
90
+ if (encoder->uconv_from != NULL) {
91
+ encoder->type = XH_ENC_ICU;
92
+ encoder->pivotSource = encoder->pivotTarget = encoder->pivotStart = encoder->pivotBuffer;
93
+ encoder->pivotLimit = encoder->pivotBuffer + sizeof(encoder->pivotBuffer) / sizeof(encoder->pivotBuffer[0]);
94
+ return encoder;
95
+ }
96
+ }
97
+ #endif
98
+
99
+ xh_encoder_destroy(encoder);
100
+
101
+ return NULL;
102
+ }
103
+
104
+ void
105
+ xh_encoder_encode_ruby_buffer(xh_encoder_t *encoder, xh_ruby_buffer_t *main_buf, xh_ruby_buffer_t *enc_buf)
106
+ {
107
+ xh_char_t *src = main_buf->start;
108
+
109
+ #ifdef XH_HAVE_ICONV
110
+ if (encoder->type == XH_ENC_ICONV) {
111
+ size_t in_left = main_buf->cur - main_buf->start;
112
+ size_t out_left = enc_buf->end - enc_buf->cur;
113
+
114
+ size_t converted = iconv(encoder->iconv, (char **) &src, &in_left, (char **) &enc_buf->cur, &out_left);
115
+ if (converted == (size_t) -1) {
116
+ rb_raise(xh_parse_error_class, "Encoding error");
117
+ }
118
+ return;
119
+ }
120
+ #endif
121
+
122
+ #ifdef XH_HAVE_ICU
123
+ UErrorCode err = U_ZERO_ERROR;
124
+ ucnv_convertEx(encoder->uconv_to, encoder->uconv_from, (char **) &enc_buf->cur, (char *) enc_buf->end,
125
+ (const char **) &src, (char *) main_buf->cur, NULL, NULL, NULL, NULL,
126
+ FALSE, TRUE, &err);
127
+
128
+ if ( U_FAILURE(err) ) {
129
+ rb_raise(xh_parse_error_class, "Encoding error: %d", err);
130
+ }
131
+ #endif
132
+ }
133
+
134
+ void
135
+ xh_encoder_encode_string(xh_encoder_t *encoder, xh_char_t **src, size_t *src_left, xh_char_t **dst, size_t *dst_left)
136
+ {
137
+ #ifdef XH_HAVE_ICONV
138
+ if (encoder->type == XH_ENC_ICONV) {
139
+ size_t converted = iconv(encoder->iconv, (char **) src, src_left, (char **) dst, dst_left);
140
+ if (converted == (size_t) -1) {
141
+ switch (errno) {
142
+ case EILSEQ:
143
+ rb_raise(xh_parse_error_class, "Encoding error: invalid char found");
144
+ case E2BIG:
145
+ encoder->state = XH_ENC_BUFFER_OVERFLOW;
146
+ break;
147
+ case EINVAL:
148
+ encoder->state = XH_ENC_TRUNCATED_CHAR_FOUND;
149
+ break;
150
+ default:
151
+ rb_raise(xh_parse_error_class, "Encoding error");
152
+ }
153
+ }
154
+ else {
155
+ encoder->state = XH_ENC_OK;
156
+ }
157
+ return;
158
+ }
159
+ #endif
160
+
161
+ #ifdef XH_HAVE_ICU
162
+ UErrorCode err = U_ZERO_ERROR;
163
+ xh_char_t *old_src = *src;
164
+ xh_char_t *old_dst = *dst;
165
+
166
+ ucnv_convertEx(encoder->uconv_to, encoder->uconv_from, (char **) dst, (char *) (*dst + *dst_left),
167
+ (const char **) src, (char *) (*src + *src_left), encoder->pivotStart, &encoder->pivotSource, &encoder->pivotTarget, encoder->pivotLimit,
168
+ FALSE, FALSE, &err);
169
+
170
+ *src_left -= *src - old_src;
171
+ *dst_left -= *dst - old_dst;
172
+
173
+ if ( U_FAILURE(err) ) {
174
+ switch (err) {
175
+ case U_INVALID_CHAR_FOUND:
176
+ rb_raise(xh_parse_error_class, "Encoding error: invalid char found");
177
+ case U_BUFFER_OVERFLOW_ERROR:
178
+ encoder->state = XH_ENC_BUFFER_OVERFLOW;
179
+ break;
180
+ case U_TRUNCATED_CHAR_FOUND:
181
+ encoder->state = XH_ENC_TRUNCATED_CHAR_FOUND;
182
+ break;
183
+ default:
184
+ rb_raise(xh_parse_error_class, "Encoding error: %d", err);
185
+ }
186
+ }
187
+ else {
188
+ encoder->state = XH_ENC_OK;
189
+ }
190
+ #endif
191
+ }
192
+
193
+ #endif /* XH_HAVE_ENCODER */