fast-xml 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +164 -0
- data/ext/fastxml/extconf.rb +17 -0
- data/ext/fastxml/fastxml.c +67 -0
- data/ext/fastxml/fastxml.h +14 -0
- data/ext/fastxml/xh.c +338 -0
- data/ext/fastxml/xh.h +58 -0
- data/ext/fastxml/xh_buffer.c +40 -0
- data/ext/fastxml/xh_buffer.h +38 -0
- data/ext/fastxml/xh_buffer_helper.h +97 -0
- data/ext/fastxml/xh_config.h +74 -0
- data/ext/fastxml/xh_core.h +53 -0
- data/ext/fastxml/xh_encoder.c +193 -0
- data/ext/fastxml/xh_encoder.h +56 -0
- data/ext/fastxml/xh_h2x.c +62 -0
- data/ext/fastxml/xh_h2x.h +93 -0
- data/ext/fastxml/xh_h2x_native.c +89 -0
- data/ext/fastxml/xh_h2x_native_attr.c +161 -0
- data/ext/fastxml/xh_log.c +31 -0
- data/ext/fastxml/xh_log.h +100 -0
- data/ext/fastxml/xh_param.c +77 -0
- data/ext/fastxml/xh_param.h +56 -0
- data/ext/fastxml/xh_ruby_buffer.c +51 -0
- data/ext/fastxml/xh_ruby_buffer.h +30 -0
- data/ext/fastxml/xh_sort.c +40 -0
- data/ext/fastxml/xh_sort.h +20 -0
- data/ext/fastxml/xh_stack.c +19 -0
- data/ext/fastxml/xh_stack.h +41 -0
- data/ext/fastxml/xh_string.h +105 -0
- data/ext/fastxml/xh_writer.c +94 -0
- data/ext/fastxml/xh_writer.h +49 -0
- data/ext/fastxml/xh_xml.h +453 -0
- data/lib/fastxml.rb +59 -0
- data/lib/fastxml/error.rb +7 -0
- data/lib/fastxml/version.rb +3 -0
- metadata +139 -0
data/ext/fastxml/xh.h
ADDED
@@ -0,0 +1,58 @@
|
|
1
|
+
#ifndef _XH_H_
|
2
|
+
#define _XH_H_
|
3
|
+
|
4
|
+
#include "xh_config.h"
|
5
|
+
#include "xh_core.h"
|
6
|
+
|
7
|
+
#define XH_INTERNAL_ENCODING "utf-8"
|
8
|
+
|
9
|
+
typedef enum {
|
10
|
+
XH_METHOD_NATIVE = 0,
|
11
|
+
XH_METHOD_LX
|
12
|
+
} xh_method_t;
|
13
|
+
|
14
|
+
typedef struct {
|
15
|
+
xh_method_t method;
|
16
|
+
xh_bool_t use_attr;
|
17
|
+
|
18
|
+
/* native options */
|
19
|
+
xh_char_t version[XH_PARAM_LEN];
|
20
|
+
xh_char_t encoding[XH_PARAM_LEN];
|
21
|
+
xh_char_t root[XH_PARAM_LEN];
|
22
|
+
xh_bool_t utf8;
|
23
|
+
xh_bool_t xml_decl;
|
24
|
+
xh_bool_t keep_root;
|
25
|
+
xh_bool_t canonical;
|
26
|
+
xh_char_t content[XH_PARAM_LEN];
|
27
|
+
xh_int_t indent;
|
28
|
+
VALUE output;
|
29
|
+
#ifdef XH_HAVE_DOM
|
30
|
+
xh_bool_t doc;
|
31
|
+
#endif
|
32
|
+
xh_int_t max_depth;
|
33
|
+
xh_int_t buf_size;
|
34
|
+
xh_pattern_t force_array;
|
35
|
+
xh_bool_t force_content;
|
36
|
+
xh_bool_t merge_text;
|
37
|
+
xh_pattern_t filter;
|
38
|
+
VALUE cb;
|
39
|
+
|
40
|
+
/* LX options */
|
41
|
+
xh_char_t attr[XH_PARAM_LEN];
|
42
|
+
size_t attr_len;
|
43
|
+
xh_char_t text[XH_PARAM_LEN];
|
44
|
+
xh_bool_t trim;
|
45
|
+
xh_char_t cdata[XH_PARAM_LEN];
|
46
|
+
xh_char_t comm[XH_PARAM_LEN];
|
47
|
+
} xh_opts_t;
|
48
|
+
|
49
|
+
xh_opts_t *xh_create_opts(void);
|
50
|
+
void xh_destroy_opts(xh_opts_t *opts);
|
51
|
+
xh_bool_t xh_init_opts(xh_opts_t *opts);
|
52
|
+
void xh_parse_args(xh_opts_t *opts, xh_int_t *nparam, xh_int_t argc, VALUE *argv);
|
53
|
+
void xh_copy_opts(xh_opts_t *dst, xh_opts_t *src);
|
54
|
+
void *xh_get_obj_param(xh_int_t *nparam, xh_int_t argc, VALUE *argv, const char *class);
|
55
|
+
VALUE xh_get_hash_param(xh_int_t *nparam, xh_int_t argc, VALUE *argv);
|
56
|
+
void xh_merge_opts(xh_opts_t *ctx_opts, xh_opts_t *opts, xh_int_t *nparam, xh_int_t argc, VALUE *argv);
|
57
|
+
|
58
|
+
#endif /* _XH_H_ */
|
@@ -0,0 +1,40 @@
|
|
1
|
+
#include "xh_config.h"
|
2
|
+
#include "xh_core.h"
|
3
|
+
|
4
|
+
void
|
5
|
+
xh_buffer_init(xh_buffer_t *buf, size_t size)
|
6
|
+
{
|
7
|
+
buf->start = buf->cur = malloc(size);
|
8
|
+
if (buf->start == NULL) {
|
9
|
+
rb_raise(rb_eNoMemError, "Memory allocation error");
|
10
|
+
}
|
11
|
+
buf->end = buf->start + size;
|
12
|
+
|
13
|
+
xh_log_debug2("buf: %p size: %lu", buf->start, size);
|
14
|
+
}
|
15
|
+
|
16
|
+
void
|
17
|
+
xh_buffer_grow(xh_buffer_t *buf, size_t inc)
|
18
|
+
{
|
19
|
+
size_t size, use;
|
20
|
+
|
21
|
+
if (inc <= (size_t) (buf->end - buf->cur)) {
|
22
|
+
return;
|
23
|
+
}
|
24
|
+
|
25
|
+
size = buf->end - buf->start;
|
26
|
+
use = buf->cur - buf->start;
|
27
|
+
|
28
|
+
xh_log_debug2("old buf: %p size: %lu", buf->start, size);
|
29
|
+
|
30
|
+
size += inc < size ? size : inc;
|
31
|
+
|
32
|
+
buf->start = realloc(buf->start, size);
|
33
|
+
if (buf->start == NULL) {
|
34
|
+
rb_raise(rb_eNoMemError, "Memory allocation error");
|
35
|
+
}
|
36
|
+
buf->cur = buf->start + use;
|
37
|
+
buf->end = buf->start + size;
|
38
|
+
|
39
|
+
xh_log_debug2("new buf: %p size: %lu", buf->start, size);
|
40
|
+
}
|
@@ -0,0 +1,38 @@
|
|
1
|
+
#ifndef _XH_BUFFER_H_
|
2
|
+
#define _XH_BUFFER_H_
|
3
|
+
|
4
|
+
#include "xh_config.h"
|
5
|
+
#include "xh_core.h"
|
6
|
+
|
7
|
+
typedef struct _xh_buffer_t xh_buffer_t;
|
8
|
+
struct _xh_buffer_t {
|
9
|
+
xh_char_t *start;
|
10
|
+
xh_char_t *cur;
|
11
|
+
xh_char_t *end;
|
12
|
+
};
|
13
|
+
|
14
|
+
void xh_buffer_init(xh_buffer_t *buf, size_t size);
|
15
|
+
void xh_buffer_grow(xh_buffer_t *buf, size_t inc);
|
16
|
+
|
17
|
+
XH_INLINE void
|
18
|
+
xh_buffer_destroy(xh_buffer_t *buf)
|
19
|
+
{
|
20
|
+
if (buf->start != NULL) {
|
21
|
+
xh_log_debug1("free enc buf: %p", buf->start);
|
22
|
+
free(buf->start);
|
23
|
+
}
|
24
|
+
}
|
25
|
+
|
26
|
+
#define xh_buffer_avail(b) ((b)->end - (b)->cur)
|
27
|
+
#define xh_buffer_use(b) ((b)->cur - (b)->start)
|
28
|
+
#define xh_buffer_start(b) ((b)->start)
|
29
|
+
#define xh_buffer_pos(b) ((b)->cur)
|
30
|
+
#define xh_buffer_end(b) ((b)->end)
|
31
|
+
#define xh_buffer_size(b) ((b)->end - (b)->start)
|
32
|
+
#define xh_buffer_reset(b) do { (b)->cur = (b)->start; } while (0)
|
33
|
+
#define xh_buffer_seek(b, p) (b)->cur = p
|
34
|
+
#define xh_buffer_seek_eof(b) (b)->cur = (b)->end
|
35
|
+
#define xh_buffer_seek_top(b) (b)->cur = (b)->start
|
36
|
+
#define xh_buffer_grow50(b) xh_buffer_grow((b), xh_buffer_size(b) / 2)
|
37
|
+
|
38
|
+
#endif /* _XH_BUFFER_H_ */
|
@@ -0,0 +1,97 @@
|
|
1
|
+
#ifndef _XH_BUFFER_HELPER_H_
|
2
|
+
#define _XH_BUFFER_HELPER_H_
|
3
|
+
|
4
|
+
#include "xh_config.h"
|
5
|
+
#include "xh_core.h"
|
6
|
+
|
7
|
+
#define XH_BUFFER_WRITE_LONG_STRING(b, s, l) \
|
8
|
+
memcpy(b->cur, s, l); \
|
9
|
+
b->cur += l;
|
10
|
+
#define XH_BUFFER_WRITE_SHORT_STRING(b, s, l) \
|
11
|
+
while (l--) { \
|
12
|
+
*b->cur++ = *s++; \
|
13
|
+
}
|
14
|
+
#define XH_BUFFER_WRITE_STRING(b, s, l) \
|
15
|
+
if (l < 17) { \
|
16
|
+
XH_BUFFER_WRITE_SHORT_STRING(b, s, l) \
|
17
|
+
} \
|
18
|
+
else { \
|
19
|
+
XH_BUFFER_WRITE_LONG_STRING(b, s, l) \
|
20
|
+
}
|
21
|
+
#define XH_BUFFER_WRITE_CHAR(b, c) \
|
22
|
+
*b->cur++ = c;
|
23
|
+
#define XH_BUFFER_WRITE_CHAR2(b, s) \
|
24
|
+
*((uint16_t *) b->cur) = *((uint16_t *) (s)); \
|
25
|
+
b->cur += 2;
|
26
|
+
#define XH_BUFFER_WRITE_CHAR3(b, s) \
|
27
|
+
XH_BUFFER_WRITE_CHAR2(b, s) \
|
28
|
+
XH_BUFFER_WRITE_CHAR(b, s[2])
|
29
|
+
#define XH_BUFFER_WRITE_CHAR4(b, s) \
|
30
|
+
*((uint32_t *) b->cur) = *((uint32_t *) (s)); \
|
31
|
+
b->cur += 4;
|
32
|
+
#define XH_BUFFER_WRITE_CHAR5(b, s) \
|
33
|
+
XH_BUFFER_WRITE_CHAR4(b, s) \
|
34
|
+
XH_BUFFER_WRITE_CHAR(b, s[4])
|
35
|
+
#define XH_BUFFER_WRITE_CHAR6(b, s) \
|
36
|
+
XH_BUFFER_WRITE_CHAR4(b, s) \
|
37
|
+
XH_BUFFER_WRITE_CHAR2(b, s + 4)
|
38
|
+
#define XH_BUFFER_WRITE_CHAR7(b, s) \
|
39
|
+
XH_BUFFER_WRITE_CHAR6(b, s) \
|
40
|
+
XH_BUFFER_WRITE_CHAR(b, s[6])
|
41
|
+
#define XH_BUFFER_WRITE_CHAR8(b, s) \
|
42
|
+
XH_BUFFER_WRITE_CHAR4(b, s) \
|
43
|
+
XH_BUFFER_WRITE_CHAR4(b, s + 4)
|
44
|
+
#define XH_BUFFER_WRITE_CHAR9(b, s) \
|
45
|
+
XH_BUFFER_WRITE_CHAR8(b, s) \
|
46
|
+
XH_BUFFER_WRITE_CHAR(b, s[8])
|
47
|
+
#define XH_BUFFER_WRITE_ESCAPE_STRING(b, s, l) \
|
48
|
+
while (l--) { \
|
49
|
+
switch (*b->cur = *s++) { \
|
50
|
+
case '\r': \
|
51
|
+
XH_BUFFER_WRITE_CHAR5(b, " ") \
|
52
|
+
break; \
|
53
|
+
case '<': \
|
54
|
+
XH_BUFFER_WRITE_CHAR4(b, "<") \
|
55
|
+
break; \
|
56
|
+
case '>': \
|
57
|
+
XH_BUFFER_WRITE_CHAR4(b, ">") \
|
58
|
+
break; \
|
59
|
+
case '&': \
|
60
|
+
XH_BUFFER_WRITE_CHAR5(b, "&") \
|
61
|
+
break; \
|
62
|
+
default: \
|
63
|
+
b->cur++; \
|
64
|
+
} \
|
65
|
+
}
|
66
|
+
#define XH_BUFFER_WRITE_ESCAPE_ATTR(b, s, l) \
|
67
|
+
while (l--) { \
|
68
|
+
switch (*b->cur = *s++) { \
|
69
|
+
case '\n': \
|
70
|
+
XH_BUFFER_WRITE_CHAR5(b, " ") \
|
71
|
+
break; \
|
72
|
+
case '\r': \
|
73
|
+
XH_BUFFER_WRITE_CHAR5(b, " ") \
|
74
|
+
break; \
|
75
|
+
case '\t': \
|
76
|
+
XH_BUFFER_WRITE_CHAR4(b, "	") \
|
77
|
+
break; \
|
78
|
+
case '<': \
|
79
|
+
XH_BUFFER_WRITE_CHAR4(b, "<") \
|
80
|
+
break; \
|
81
|
+
case '>': \
|
82
|
+
XH_BUFFER_WRITE_CHAR4(b, ">") \
|
83
|
+
break; \
|
84
|
+
case '&': \
|
85
|
+
XH_BUFFER_WRITE_CHAR5(b, "&") \
|
86
|
+
break; \
|
87
|
+
case '"': \
|
88
|
+
XH_BUFFER_WRITE_CHAR6(b, """) \
|
89
|
+
break; \
|
90
|
+
default: \
|
91
|
+
b->cur++; \
|
92
|
+
} \
|
93
|
+
}
|
94
|
+
#define XH_BUFFER_WRITE_CONSTANT(b, s) \
|
95
|
+
XH_BUFFER_WRITE_LONG_STRING(b, s, sizeof(s) - 1)
|
96
|
+
|
97
|
+
#endif /* _XH_BUFFER_HELPER_H_ */
|
@@ -0,0 +1,74 @@
|
|
1
|
+
#ifndef _XH_CONFIG_H_
|
2
|
+
#define _XH_CONFIG_H_
|
3
|
+
|
4
|
+
#include "ruby.h"
|
5
|
+
#if HAVE_RUBY_ENCODING_H
|
6
|
+
#include "ruby/encoding.h"
|
7
|
+
#endif
|
8
|
+
#include <stdint.h>
|
9
|
+
#include <sys/stat.h>
|
10
|
+
#include <fcntl.h>
|
11
|
+
#include <stdio.h>
|
12
|
+
#include <stdlib.h>
|
13
|
+
#include <string.h>
|
14
|
+
#include <errno.h>
|
15
|
+
#ifdef WIN32
|
16
|
+
#include <windows.h>
|
17
|
+
#include <io.h>
|
18
|
+
#else
|
19
|
+
#include <sys/mman.h>
|
20
|
+
#endif
|
21
|
+
|
22
|
+
#if __GNUC__ >= 3
|
23
|
+
# define expect(expr,value) __builtin_expect ((expr), (value))
|
24
|
+
# define XH_INLINE static inline
|
25
|
+
# define XH_UNUSED(v) x __attribute__((unused))
|
26
|
+
#else
|
27
|
+
# define expect(expr,value) (expr)
|
28
|
+
# define XH_INLINE static
|
29
|
+
# define XH_UNUSED(v) v
|
30
|
+
#endif
|
31
|
+
|
32
|
+
#ifdef _MSC_VER
|
33
|
+
#define _CRT_SECURE_NO_WARNINGS
|
34
|
+
#define _CRT_NONSTDC_NO_DEPRECATE
|
35
|
+
#define strncasecmp _strnicmp
|
36
|
+
#define strcasecmp _stricmp
|
37
|
+
#endif
|
38
|
+
|
39
|
+
#define expect_false(expr) expect ((expr) != 0, 0)
|
40
|
+
#define expect_true(expr) expect ((expr) != 0, 1)
|
41
|
+
|
42
|
+
typedef uintptr_t xh_bool_t;
|
43
|
+
typedef uintptr_t xh_uint_t;
|
44
|
+
typedef intptr_t xh_int_t;
|
45
|
+
typedef u_char xh_char_t;
|
46
|
+
|
47
|
+
#define XH_CHAR_CAST (xh_char_t *)
|
48
|
+
#define XH_EMPTY_STRING (XH_CHAR_CAST "")
|
49
|
+
|
50
|
+
#if defined(XH_HAVE_ICONV) || defined(XH_HAVE_ICU)
|
51
|
+
#define XH_HAVE_ENCODER
|
52
|
+
#endif
|
53
|
+
|
54
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
55
|
+
#define XH_FORCE_UTF8(s) rb_enc_set_index(s, rb_utf8_encindex())
|
56
|
+
#else
|
57
|
+
#define XH_FORCE_UTF8(s)
|
58
|
+
#endif
|
59
|
+
|
60
|
+
#if defined(XH_HAVE_XML2) && defined(XH_HAVE_XML__LIBXML)
|
61
|
+
#define XH_HAVE_DOM
|
62
|
+
#endif
|
63
|
+
|
64
|
+
#ifdef XH_HAVE_DOM
|
65
|
+
#include <libxml/parser.h>
|
66
|
+
#endif
|
67
|
+
|
68
|
+
#define XH_HAVE_MMAP
|
69
|
+
|
70
|
+
extern VALUE xh_module;
|
71
|
+
extern VALUE xh_parse_error_class;
|
72
|
+
extern ID xh_id_next;
|
73
|
+
|
74
|
+
#endif /* _XH_CONFIG_H_ */
|
@@ -0,0 +1,53 @@
|
|
1
|
+
#ifndef _XH_CORE_H_
|
2
|
+
#define _XH_CORE_H_
|
3
|
+
|
4
|
+
/*
|
5
|
+
* Concatenate preprocessor tokens A and B without expanding macro definitions
|
6
|
+
* (however, if invoked from a macro, macro arguments are expanded).
|
7
|
+
*/
|
8
|
+
#define XH_PPCAT_NX(A, B) A ## B
|
9
|
+
|
10
|
+
/*
|
11
|
+
* Concatenate preprocessor tokens A and B after macro-expanding them.
|
12
|
+
*/
|
13
|
+
#define XH_PPCAT(A, B) XH_PPCAT_NX(A, B)
|
14
|
+
|
15
|
+
/*
|
16
|
+
* Turn A into a string literal without expanding macro definitions
|
17
|
+
* (however, if invoked from a macro, macro arguments are expanded).
|
18
|
+
*/
|
19
|
+
#define XH_STRINGIZE_NX(A) #A
|
20
|
+
|
21
|
+
/*
|
22
|
+
* Turn A into a string literal after macro-expanding it.
|
23
|
+
*/
|
24
|
+
#define XH_STRINGIZE(A) XH_STRINGIZE_NX(A)
|
25
|
+
|
26
|
+
#ifndef FALSE
|
27
|
+
#define FALSE (0)
|
28
|
+
#endif
|
29
|
+
|
30
|
+
#ifndef TRUE
|
31
|
+
#define TRUE (1)
|
32
|
+
#endif
|
33
|
+
|
34
|
+
#include "xh_log.h"
|
35
|
+
#include "xh_string.h"
|
36
|
+
#include "xh_sort.h"
|
37
|
+
#include "xh_stack.h"
|
38
|
+
//#include "xh_stash.h"
|
39
|
+
#include "xh_param.h"
|
40
|
+
#include "xh_buffer_helper.h"
|
41
|
+
#include "xh_buffer.h"
|
42
|
+
#include "xh_ruby_buffer.h"
|
43
|
+
#include "xh_encoder.h"
|
44
|
+
//#include "xh_reader.h"
|
45
|
+
#include "xh_writer.h"
|
46
|
+
#include "xh.h"
|
47
|
+
#include "xh_h2x.h"
|
48
|
+
#include "xh_xml.h"
|
49
|
+
/*
|
50
|
+
#include "xh_x2h.h"
|
51
|
+
#include "xh_dom.h"
|
52
|
+
*/
|
53
|
+
#endif /* _XH_CORE_H_ */
|
@@ -0,0 +1,193 @@
|
|
1
|
+
#include "xh_config.h"
|
2
|
+
#include "xh_core.h"
|
3
|
+
|
4
|
+
#ifdef XH_HAVE_ENCODER
|
5
|
+
|
6
|
+
#ifdef XH_HAVE_ICU
|
7
|
+
static void
|
8
|
+
xh_encoder_uconv_destroy(UConverter *uconv)
|
9
|
+
{
|
10
|
+
if (uconv != NULL) {
|
11
|
+
ucnv_close(uconv);
|
12
|
+
}
|
13
|
+
}
|
14
|
+
|
15
|
+
static UConverter *
|
16
|
+
xh_encoder_uconv_create(xh_char_t *encoding, xh_bool_t toUnicode)
|
17
|
+
{
|
18
|
+
UConverter *uconv;
|
19
|
+
UErrorCode status = U_ZERO_ERROR;
|
20
|
+
|
21
|
+
uconv = ucnv_open((char *) encoding, &status);
|
22
|
+
if ( U_FAILURE(status) ) {
|
23
|
+
return NULL;
|
24
|
+
}
|
25
|
+
|
26
|
+
if (toUnicode) {
|
27
|
+
ucnv_setToUCallBack(uconv, UCNV_TO_U_CALLBACK_STOP,
|
28
|
+
NULL, NULL, NULL, &status);
|
29
|
+
}
|
30
|
+
else {
|
31
|
+
ucnv_setFromUCallBack(uconv, UCNV_FROM_U_CALLBACK_STOP,
|
32
|
+
NULL, NULL, NULL, &status);
|
33
|
+
}
|
34
|
+
|
35
|
+
return uconv;
|
36
|
+
}
|
37
|
+
#endif
|
38
|
+
|
39
|
+
void
|
40
|
+
xh_encoder_destroy(xh_encoder_t *encoder)
|
41
|
+
{
|
42
|
+
if (encoder != NULL) {
|
43
|
+
#ifdef XH_HAVE_ICONV
|
44
|
+
if (encoder->iconv != NULL) {
|
45
|
+
xh_log_debug0("destroy iconv encoder");
|
46
|
+
iconv_close(encoder->iconv);
|
47
|
+
}
|
48
|
+
#endif
|
49
|
+
|
50
|
+
#ifdef XH_HAVE_ICU
|
51
|
+
if (encoder->uconv_from != NULL) {
|
52
|
+
xh_log_debug0("destroy icu encoder");
|
53
|
+
xh_encoder_uconv_destroy(encoder->uconv_from);
|
54
|
+
xh_encoder_uconv_destroy(encoder->uconv_to);
|
55
|
+
}
|
56
|
+
#endif
|
57
|
+
free(encoder);
|
58
|
+
}
|
59
|
+
}
|
60
|
+
|
61
|
+
xh_encoder_t *
|
62
|
+
xh_encoder_create(xh_char_t *tocode, xh_char_t *fromcode)
|
63
|
+
{
|
64
|
+
xh_encoder_t *encoder;
|
65
|
+
|
66
|
+
encoder = malloc(sizeof(xh_encoder_t));
|
67
|
+
if (encoder == NULL) {
|
68
|
+
return NULL;
|
69
|
+
}
|
70
|
+
memset(encoder, 0, sizeof(xh_encoder_t));
|
71
|
+
|
72
|
+
xh_str_copy(encoder->tocode, tocode, XH_PARAM_LEN);
|
73
|
+
xh_str_copy(encoder->fromcode, fromcode, XH_PARAM_LEN);
|
74
|
+
|
75
|
+
#ifdef XH_HAVE_ICONV
|
76
|
+
xh_log_debug2("create iconv encoder from: '%s' to: '%s'", fromcode, tocode);
|
77
|
+
encoder->iconv = iconv_open((char *) tocode, (char *) fromcode);
|
78
|
+
if (encoder->iconv != (iconv_t) -1) {
|
79
|
+
encoder->type = XH_ENC_ICONV;
|
80
|
+
return encoder;
|
81
|
+
}
|
82
|
+
encoder->iconv = NULL;
|
83
|
+
#endif
|
84
|
+
|
85
|
+
#ifdef XH_HAVE_ICU
|
86
|
+
xh_log_debug2("create icu encoder from: '%s' to: '%s'", fromcode, tocode);
|
87
|
+
encoder->uconv_to = xh_encoder_uconv_create(tocode, 1);
|
88
|
+
if (encoder->uconv_to != NULL) {
|
89
|
+
encoder->uconv_from = xh_encoder_uconv_create(fromcode, 0);
|
90
|
+
if (encoder->uconv_from != NULL) {
|
91
|
+
encoder->type = XH_ENC_ICU;
|
92
|
+
encoder->pivotSource = encoder->pivotTarget = encoder->pivotStart = encoder->pivotBuffer;
|
93
|
+
encoder->pivotLimit = encoder->pivotBuffer + sizeof(encoder->pivotBuffer) / sizeof(encoder->pivotBuffer[0]);
|
94
|
+
return encoder;
|
95
|
+
}
|
96
|
+
}
|
97
|
+
#endif
|
98
|
+
|
99
|
+
xh_encoder_destroy(encoder);
|
100
|
+
|
101
|
+
return NULL;
|
102
|
+
}
|
103
|
+
|
104
|
+
void
|
105
|
+
xh_encoder_encode_ruby_buffer(xh_encoder_t *encoder, xh_ruby_buffer_t *main_buf, xh_ruby_buffer_t *enc_buf)
|
106
|
+
{
|
107
|
+
xh_char_t *src = main_buf->start;
|
108
|
+
|
109
|
+
#ifdef XH_HAVE_ICONV
|
110
|
+
if (encoder->type == XH_ENC_ICONV) {
|
111
|
+
size_t in_left = main_buf->cur - main_buf->start;
|
112
|
+
size_t out_left = enc_buf->end - enc_buf->cur;
|
113
|
+
|
114
|
+
size_t converted = iconv(encoder->iconv, (char **) &src, &in_left, (char **) &enc_buf->cur, &out_left);
|
115
|
+
if (converted == (size_t) -1) {
|
116
|
+
rb_raise(xh_parse_error_class, "Encoding error");
|
117
|
+
}
|
118
|
+
return;
|
119
|
+
}
|
120
|
+
#endif
|
121
|
+
|
122
|
+
#ifdef XH_HAVE_ICU
|
123
|
+
UErrorCode err = U_ZERO_ERROR;
|
124
|
+
ucnv_convertEx(encoder->uconv_to, encoder->uconv_from, (char **) &enc_buf->cur, (char *) enc_buf->end,
|
125
|
+
(const char **) &src, (char *) main_buf->cur, NULL, NULL, NULL, NULL,
|
126
|
+
FALSE, TRUE, &err);
|
127
|
+
|
128
|
+
if ( U_FAILURE(err) ) {
|
129
|
+
rb_raise(xh_parse_error_class, "Encoding error: %d", err);
|
130
|
+
}
|
131
|
+
#endif
|
132
|
+
}
|
133
|
+
|
134
|
+
void
|
135
|
+
xh_encoder_encode_string(xh_encoder_t *encoder, xh_char_t **src, size_t *src_left, xh_char_t **dst, size_t *dst_left)
|
136
|
+
{
|
137
|
+
#ifdef XH_HAVE_ICONV
|
138
|
+
if (encoder->type == XH_ENC_ICONV) {
|
139
|
+
size_t converted = iconv(encoder->iconv, (char **) src, src_left, (char **) dst, dst_left);
|
140
|
+
if (converted == (size_t) -1) {
|
141
|
+
switch (errno) {
|
142
|
+
case EILSEQ:
|
143
|
+
rb_raise(xh_parse_error_class, "Encoding error: invalid char found");
|
144
|
+
case E2BIG:
|
145
|
+
encoder->state = XH_ENC_BUFFER_OVERFLOW;
|
146
|
+
break;
|
147
|
+
case EINVAL:
|
148
|
+
encoder->state = XH_ENC_TRUNCATED_CHAR_FOUND;
|
149
|
+
break;
|
150
|
+
default:
|
151
|
+
rb_raise(xh_parse_error_class, "Encoding error");
|
152
|
+
}
|
153
|
+
}
|
154
|
+
else {
|
155
|
+
encoder->state = XH_ENC_OK;
|
156
|
+
}
|
157
|
+
return;
|
158
|
+
}
|
159
|
+
#endif
|
160
|
+
|
161
|
+
#ifdef XH_HAVE_ICU
|
162
|
+
UErrorCode err = U_ZERO_ERROR;
|
163
|
+
xh_char_t *old_src = *src;
|
164
|
+
xh_char_t *old_dst = *dst;
|
165
|
+
|
166
|
+
ucnv_convertEx(encoder->uconv_to, encoder->uconv_from, (char **) dst, (char *) (*dst + *dst_left),
|
167
|
+
(const char **) src, (char *) (*src + *src_left), encoder->pivotStart, &encoder->pivotSource, &encoder->pivotTarget, encoder->pivotLimit,
|
168
|
+
FALSE, FALSE, &err);
|
169
|
+
|
170
|
+
*src_left -= *src - old_src;
|
171
|
+
*dst_left -= *dst - old_dst;
|
172
|
+
|
173
|
+
if ( U_FAILURE(err) ) {
|
174
|
+
switch (err) {
|
175
|
+
case U_INVALID_CHAR_FOUND:
|
176
|
+
rb_raise(xh_parse_error_class, "Encoding error: invalid char found");
|
177
|
+
case U_BUFFER_OVERFLOW_ERROR:
|
178
|
+
encoder->state = XH_ENC_BUFFER_OVERFLOW;
|
179
|
+
break;
|
180
|
+
case U_TRUNCATED_CHAR_FOUND:
|
181
|
+
encoder->state = XH_ENC_TRUNCATED_CHAR_FOUND;
|
182
|
+
break;
|
183
|
+
default:
|
184
|
+
rb_raise(xh_parse_error_class, "Encoding error: %d", err);
|
185
|
+
}
|
186
|
+
}
|
187
|
+
else {
|
188
|
+
encoder->state = XH_ENC_OK;
|
189
|
+
}
|
190
|
+
#endif
|
191
|
+
}
|
192
|
+
|
193
|
+
#endif /* XH_HAVE_ENCODER */
|