fast-xml 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/LICENSE +21 -0
- data/README.md +164 -0
- data/ext/fastxml/extconf.rb +17 -0
- data/ext/fastxml/fastxml.c +67 -0
- data/ext/fastxml/fastxml.h +14 -0
- data/ext/fastxml/xh.c +338 -0
- data/ext/fastxml/xh.h +58 -0
- data/ext/fastxml/xh_buffer.c +40 -0
- data/ext/fastxml/xh_buffer.h +38 -0
- data/ext/fastxml/xh_buffer_helper.h +97 -0
- data/ext/fastxml/xh_config.h +74 -0
- data/ext/fastxml/xh_core.h +53 -0
- data/ext/fastxml/xh_encoder.c +193 -0
- data/ext/fastxml/xh_encoder.h +56 -0
- data/ext/fastxml/xh_h2x.c +62 -0
- data/ext/fastxml/xh_h2x.h +93 -0
- data/ext/fastxml/xh_h2x_native.c +89 -0
- data/ext/fastxml/xh_h2x_native_attr.c +161 -0
- data/ext/fastxml/xh_log.c +31 -0
- data/ext/fastxml/xh_log.h +100 -0
- data/ext/fastxml/xh_param.c +77 -0
- data/ext/fastxml/xh_param.h +56 -0
- data/ext/fastxml/xh_ruby_buffer.c +51 -0
- data/ext/fastxml/xh_ruby_buffer.h +30 -0
- data/ext/fastxml/xh_sort.c +40 -0
- data/ext/fastxml/xh_sort.h +20 -0
- data/ext/fastxml/xh_stack.c +19 -0
- data/ext/fastxml/xh_stack.h +41 -0
- data/ext/fastxml/xh_string.h +105 -0
- data/ext/fastxml/xh_writer.c +94 -0
- data/ext/fastxml/xh_writer.h +49 -0
- data/ext/fastxml/xh_xml.h +453 -0
- data/lib/fastxml.rb +59 -0
- data/lib/fastxml/error.rb +7 -0
- data/lib/fastxml/version.rb +3 -0
- metadata +139 -0
    
        data/ext/fastxml/xh.h
    ADDED
    
    | @@ -0,0 +1,58 @@ | |
| 1 | 
            +
            #ifndef _XH_H_
         | 
| 2 | 
            +
            #define _XH_H_
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            #include "xh_config.h"
         | 
| 5 | 
            +
            #include "xh_core.h"
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            #define XH_INTERNAL_ENCODING "utf-8"
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            typedef enum {
         | 
| 10 | 
            +
                XH_METHOD_NATIVE = 0,
         | 
| 11 | 
            +
                XH_METHOD_LX
         | 
| 12 | 
            +
            } xh_method_t;
         | 
| 13 | 
            +
             | 
| 14 | 
            +
            typedef struct {
         | 
| 15 | 
            +
                xh_method_t            method;
         | 
| 16 | 
            +
                xh_bool_t              use_attr;
         | 
| 17 | 
            +
             | 
| 18 | 
            +
                /* native options */
         | 
| 19 | 
            +
                xh_char_t              version[XH_PARAM_LEN];
         | 
| 20 | 
            +
                xh_char_t              encoding[XH_PARAM_LEN];
         | 
| 21 | 
            +
                xh_char_t              root[XH_PARAM_LEN];
         | 
| 22 | 
            +
                xh_bool_t              utf8;
         | 
| 23 | 
            +
                xh_bool_t              xml_decl;
         | 
| 24 | 
            +
                xh_bool_t              keep_root;
         | 
| 25 | 
            +
                xh_bool_t              canonical;
         | 
| 26 | 
            +
                xh_char_t              content[XH_PARAM_LEN];
         | 
| 27 | 
            +
                xh_int_t               indent;
         | 
| 28 | 
            +
                VALUE                  output;
         | 
| 29 | 
            +
            #ifdef XH_HAVE_DOM
         | 
| 30 | 
            +
                xh_bool_t              doc;
         | 
| 31 | 
            +
            #endif
         | 
| 32 | 
            +
                xh_int_t               max_depth;
         | 
| 33 | 
            +
                xh_int_t               buf_size;
         | 
| 34 | 
            +
                xh_pattern_t           force_array;
         | 
| 35 | 
            +
                xh_bool_t              force_content;
         | 
| 36 | 
            +
                xh_bool_t              merge_text;
         | 
| 37 | 
            +
                xh_pattern_t           filter;
         | 
| 38 | 
            +
                VALUE                  cb;
         | 
| 39 | 
            +
             | 
| 40 | 
            +
                /* LX options */
         | 
| 41 | 
            +
                xh_char_t              attr[XH_PARAM_LEN];
         | 
| 42 | 
            +
                size_t                 attr_len;
         | 
| 43 | 
            +
                xh_char_t              text[XH_PARAM_LEN];
         | 
| 44 | 
            +
                xh_bool_t              trim;
         | 
| 45 | 
            +
                xh_char_t              cdata[XH_PARAM_LEN];
         | 
| 46 | 
            +
                xh_char_t              comm[XH_PARAM_LEN];
         | 
| 47 | 
            +
            } xh_opts_t;
         | 
| 48 | 
            +
             | 
| 49 | 
            +
            xh_opts_t *xh_create_opts(void);
         | 
| 50 | 
            +
            void xh_destroy_opts(xh_opts_t *opts);
         | 
| 51 | 
            +
            xh_bool_t xh_init_opts(xh_opts_t *opts);
         | 
| 52 | 
            +
            void xh_parse_args(xh_opts_t *opts, xh_int_t *nparam, xh_int_t argc, VALUE *argv);
         | 
| 53 | 
            +
            void xh_copy_opts(xh_opts_t *dst, xh_opts_t *src);
         | 
| 54 | 
            +
            void *xh_get_obj_param(xh_int_t *nparam, xh_int_t argc, VALUE *argv, const char *class);
         | 
| 55 | 
            +
            VALUE xh_get_hash_param(xh_int_t *nparam, xh_int_t argc, VALUE *argv);
         | 
| 56 | 
            +
            void xh_merge_opts(xh_opts_t *ctx_opts, xh_opts_t *opts, xh_int_t *nparam, xh_int_t argc, VALUE *argv);
         | 
| 57 | 
            +
             | 
| 58 | 
            +
            #endif /* _XH_H_ */
         | 
| @@ -0,0 +1,40 @@ | |
| 1 | 
            +
            #include "xh_config.h"
         | 
| 2 | 
            +
            #include "xh_core.h"
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            void
         | 
| 5 | 
            +
            xh_buffer_init(xh_buffer_t *buf, size_t size)
         | 
| 6 | 
            +
            {
         | 
| 7 | 
            +
                buf->start = buf->cur = malloc(size);
         | 
| 8 | 
            +
                if (buf->start == NULL) {
         | 
| 9 | 
            +
                    rb_raise(rb_eNoMemError, "Memory allocation error");
         | 
| 10 | 
            +
                }
         | 
| 11 | 
            +
                buf->end = buf->start + size;
         | 
| 12 | 
            +
             | 
| 13 | 
            +
                xh_log_debug2("buf: %p size: %lu", buf->start, size);
         | 
| 14 | 
            +
            }
         | 
| 15 | 
            +
             | 
| 16 | 
            +
            void
         | 
| 17 | 
            +
            xh_buffer_grow(xh_buffer_t *buf, size_t inc)
         | 
| 18 | 
            +
            {
         | 
| 19 | 
            +
                size_t size, use;
         | 
| 20 | 
            +
             | 
| 21 | 
            +
                if (inc <= (size_t) (buf->end - buf->cur)) {
         | 
| 22 | 
            +
                    return;
         | 
| 23 | 
            +
                }
         | 
| 24 | 
            +
             | 
| 25 | 
            +
                size = buf->end - buf->start;
         | 
| 26 | 
            +
                use  = buf->cur - buf->start;
         | 
| 27 | 
            +
             | 
| 28 | 
            +
                xh_log_debug2("old buf: %p size: %lu", buf->start, size);
         | 
| 29 | 
            +
             | 
| 30 | 
            +
                size += inc < size ? size : inc;
         | 
| 31 | 
            +
             | 
| 32 | 
            +
                buf->start = realloc(buf->start, size);
         | 
| 33 | 
            +
                if (buf->start == NULL) {
         | 
| 34 | 
            +
                    rb_raise(rb_eNoMemError, "Memory allocation error");
         | 
| 35 | 
            +
                }
         | 
| 36 | 
            +
                buf->cur   = buf->start + use;
         | 
| 37 | 
            +
                buf->end   = buf->start + size;
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                xh_log_debug2("new buf: %p size: %lu", buf->start, size);
         | 
| 40 | 
            +
            }
         | 
| @@ -0,0 +1,38 @@ | |
| 1 | 
            +
            #ifndef _XH_BUFFER_H_
         | 
| 2 | 
            +
            #define _XH_BUFFER_H_
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            #include "xh_config.h"
         | 
| 5 | 
            +
            #include "xh_core.h"
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            typedef struct _xh_buffer_t xh_buffer_t;
         | 
| 8 | 
            +
            struct _xh_buffer_t {
         | 
| 9 | 
            +
                xh_char_t *start;
         | 
| 10 | 
            +
                xh_char_t *cur;
         | 
| 11 | 
            +
                xh_char_t *end;
         | 
| 12 | 
            +
            };
         | 
| 13 | 
            +
             | 
| 14 | 
            +
            void xh_buffer_init(xh_buffer_t *buf, size_t size);
         | 
| 15 | 
            +
            void xh_buffer_grow(xh_buffer_t *buf, size_t inc);
         | 
| 16 | 
            +
             | 
| 17 | 
            +
            XH_INLINE void
         | 
| 18 | 
            +
            xh_buffer_destroy(xh_buffer_t *buf)
         | 
| 19 | 
            +
            {
         | 
| 20 | 
            +
                if (buf->start != NULL) {
         | 
| 21 | 
            +
                    xh_log_debug1("free enc buf: %p", buf->start);
         | 
| 22 | 
            +
                    free(buf->start);
         | 
| 23 | 
            +
                }
         | 
| 24 | 
            +
            }
         | 
| 25 | 
            +
             | 
| 26 | 
            +
            #define xh_buffer_avail(b)    ((b)->end - (b)->cur)
         | 
| 27 | 
            +
            #define xh_buffer_use(b)      ((b)->cur - (b)->start)
         | 
| 28 | 
            +
            #define xh_buffer_start(b)    ((b)->start)
         | 
| 29 | 
            +
            #define xh_buffer_pos(b)      ((b)->cur)
         | 
| 30 | 
            +
            #define xh_buffer_end(b)      ((b)->end)
         | 
| 31 | 
            +
            #define xh_buffer_size(b)     ((b)->end - (b)->start)
         | 
| 32 | 
            +
            #define xh_buffer_reset(b)    do { (b)->cur = (b)->start; } while (0)
         | 
| 33 | 
            +
            #define xh_buffer_seek(b, p)  (b)->cur = p
         | 
| 34 | 
            +
            #define xh_buffer_seek_eof(b) (b)->cur = (b)->end
         | 
| 35 | 
            +
            #define xh_buffer_seek_top(b) (b)->cur = (b)->start
         | 
| 36 | 
            +
            #define xh_buffer_grow50(b)   xh_buffer_grow((b), xh_buffer_size(b) / 2)
         | 
| 37 | 
            +
             | 
| 38 | 
            +
            #endif /* _XH_BUFFER_H_ */
         | 
| @@ -0,0 +1,97 @@ | |
| 1 | 
            +
            #ifndef _XH_BUFFER_HELPER_H_
         | 
| 2 | 
            +
            #define _XH_BUFFER_HELPER_H_
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            #include "xh_config.h"
         | 
| 5 | 
            +
            #include "xh_core.h"
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            #define XH_BUFFER_WRITE_LONG_STRING(b, s, l)                           \
         | 
| 8 | 
            +
                memcpy(b->cur, s, l);                                              \
         | 
| 9 | 
            +
                b->cur += l;
         | 
| 10 | 
            +
            #define XH_BUFFER_WRITE_SHORT_STRING(b, s, l)                          \
         | 
| 11 | 
            +
                while (l--) {                                                      \
         | 
| 12 | 
            +
                    *b->cur++ = *s++;                                              \
         | 
| 13 | 
            +
                }
         | 
| 14 | 
            +
            #define XH_BUFFER_WRITE_STRING(b, s, l)                                \
         | 
| 15 | 
            +
                if (l < 17) {                                                      \
         | 
| 16 | 
            +
                    XH_BUFFER_WRITE_SHORT_STRING(b, s, l)                          \
         | 
| 17 | 
            +
                }                                                                  \
         | 
| 18 | 
            +
                else {                                                             \
         | 
| 19 | 
            +
                    XH_BUFFER_WRITE_LONG_STRING(b, s, l)                           \
         | 
| 20 | 
            +
                }
         | 
| 21 | 
            +
            #define XH_BUFFER_WRITE_CHAR(b, c)                                     \
         | 
| 22 | 
            +
                *b->cur++ = c;
         | 
| 23 | 
            +
            #define XH_BUFFER_WRITE_CHAR2(b, s)                                    \
         | 
| 24 | 
            +
                *((uint16_t *) b->cur) = *((uint16_t *) (s));                      \
         | 
| 25 | 
            +
                b->cur += 2;
         | 
| 26 | 
            +
            #define XH_BUFFER_WRITE_CHAR3(b, s)                                    \
         | 
| 27 | 
            +
                XH_BUFFER_WRITE_CHAR2(b, s)                                        \
         | 
| 28 | 
            +
                XH_BUFFER_WRITE_CHAR(b, s[2])
         | 
| 29 | 
            +
            #define XH_BUFFER_WRITE_CHAR4(b, s)                                    \
         | 
| 30 | 
            +
                *((uint32_t *) b->cur) = *((uint32_t *) (s));                      \
         | 
| 31 | 
            +
                b->cur += 4;
         | 
| 32 | 
            +
            #define XH_BUFFER_WRITE_CHAR5(b, s)                                    \
         | 
| 33 | 
            +
                XH_BUFFER_WRITE_CHAR4(b, s)                                        \
         | 
| 34 | 
            +
                XH_BUFFER_WRITE_CHAR(b, s[4])
         | 
| 35 | 
            +
            #define XH_BUFFER_WRITE_CHAR6(b, s)                                    \
         | 
| 36 | 
            +
                XH_BUFFER_WRITE_CHAR4(b, s)                                        \
         | 
| 37 | 
            +
                XH_BUFFER_WRITE_CHAR2(b, s + 4)
         | 
| 38 | 
            +
            #define XH_BUFFER_WRITE_CHAR7(b, s)                                    \
         | 
| 39 | 
            +
                XH_BUFFER_WRITE_CHAR6(b, s)                                        \
         | 
| 40 | 
            +
                XH_BUFFER_WRITE_CHAR(b, s[6])
         | 
| 41 | 
            +
            #define XH_BUFFER_WRITE_CHAR8(b, s)                                    \
         | 
| 42 | 
            +
                XH_BUFFER_WRITE_CHAR4(b, s)                                        \
         | 
| 43 | 
            +
                XH_BUFFER_WRITE_CHAR4(b, s + 4)
         | 
| 44 | 
            +
            #define XH_BUFFER_WRITE_CHAR9(b, s)                                    \
         | 
| 45 | 
            +
                XH_BUFFER_WRITE_CHAR8(b, s)                                        \
         | 
| 46 | 
            +
                XH_BUFFER_WRITE_CHAR(b, s[8])
         | 
| 47 | 
            +
            #define XH_BUFFER_WRITE_ESCAPE_STRING(b, s, l)                         \
         | 
| 48 | 
            +
                while (l--) {                                                      \
         | 
| 49 | 
            +
                    switch (*b->cur = *s++) {                                      \
         | 
| 50 | 
            +
                        case '\r':                                                 \
         | 
| 51 | 
            +
                            XH_BUFFER_WRITE_CHAR5(b, "
")                      \
         | 
| 52 | 
            +
                            break;                                                 \
         | 
| 53 | 
            +
                        case '<':                                                  \
         | 
| 54 | 
            +
                            XH_BUFFER_WRITE_CHAR4(b, "<")                       \
         | 
| 55 | 
            +
                            break;                                                 \
         | 
| 56 | 
            +
                        case '>':                                                  \
         | 
| 57 | 
            +
                            XH_BUFFER_WRITE_CHAR4(b, ">")                       \
         | 
| 58 | 
            +
                            break;                                                 \
         | 
| 59 | 
            +
                        case '&':                                                  \
         | 
| 60 | 
            +
                            XH_BUFFER_WRITE_CHAR5(b, "&")                      \
         | 
| 61 | 
            +
                            break;                                                 \
         | 
| 62 | 
            +
                        default:                                                   \
         | 
| 63 | 
            +
                            b->cur++;                                              \
         | 
| 64 | 
            +
                    }                                                              \
         | 
| 65 | 
            +
                }
         | 
| 66 | 
            +
            #define XH_BUFFER_WRITE_ESCAPE_ATTR(b, s, l)                           \
         | 
| 67 | 
            +
                while (l--) {                                                      \
         | 
| 68 | 
            +
                    switch (*b->cur = *s++) {                                      \
         | 
| 69 | 
            +
                        case '\n':                                                 \
         | 
| 70 | 
            +
                            XH_BUFFER_WRITE_CHAR5(b, "
")                      \
         | 
| 71 | 
            +
                            break;                                                 \
         | 
| 72 | 
            +
                        case '\r':                                                 \
         | 
| 73 | 
            +
                            XH_BUFFER_WRITE_CHAR5(b, "
")                      \
         | 
| 74 | 
            +
                            break;                                                 \
         | 
| 75 | 
            +
                        case '\t':                                                 \
         | 
| 76 | 
            +
                            XH_BUFFER_WRITE_CHAR4(b, "	")                       \
         | 
| 77 | 
            +
                            break;                                                 \
         | 
| 78 | 
            +
                        case '<':                                                  \
         | 
| 79 | 
            +
                            XH_BUFFER_WRITE_CHAR4(b, "<")                       \
         | 
| 80 | 
            +
                            break;                                                 \
         | 
| 81 | 
            +
                        case '>':                                                  \
         | 
| 82 | 
            +
                            XH_BUFFER_WRITE_CHAR4(b, ">")                       \
         | 
| 83 | 
            +
                            break;                                                 \
         | 
| 84 | 
            +
                        case '&':                                                  \
         | 
| 85 | 
            +
                            XH_BUFFER_WRITE_CHAR5(b, "&")                      \
         | 
| 86 | 
            +
                            break;                                                 \
         | 
| 87 | 
            +
                        case '"':                                                  \
         | 
| 88 | 
            +
                            XH_BUFFER_WRITE_CHAR6(b, """)                     \
         | 
| 89 | 
            +
                            break;                                                 \
         | 
| 90 | 
            +
                        default:                                                   \
         | 
| 91 | 
            +
                            b->cur++;                                              \
         | 
| 92 | 
            +
                    }                                                              \
         | 
| 93 | 
            +
                }
         | 
| 94 | 
            +
            #define XH_BUFFER_WRITE_CONSTANT(b, s)                                 \
         | 
| 95 | 
            +
                XH_BUFFER_WRITE_LONG_STRING(b, s, sizeof(s) - 1)
         | 
| 96 | 
            +
             | 
| 97 | 
            +
            #endif /* _XH_BUFFER_HELPER_H_ */
         | 
| @@ -0,0 +1,74 @@ | |
| 1 | 
            +
            #ifndef _XH_CONFIG_H_
         | 
| 2 | 
            +
            #define _XH_CONFIG_H_
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            #include "ruby.h"
         | 
| 5 | 
            +
            #if HAVE_RUBY_ENCODING_H
         | 
| 6 | 
            +
            #include "ruby/encoding.h"
         | 
| 7 | 
            +
            #endif
         | 
| 8 | 
            +
            #include <stdint.h>
         | 
| 9 | 
            +
            #include <sys/stat.h>
         | 
| 10 | 
            +
            #include <fcntl.h>
         | 
| 11 | 
            +
            #include <stdio.h>
         | 
| 12 | 
            +
            #include <stdlib.h>
         | 
| 13 | 
            +
            #include <string.h>
         | 
| 14 | 
            +
            #include <errno.h>
         | 
| 15 | 
            +
            #ifdef WIN32
         | 
| 16 | 
            +
            #include <windows.h>
         | 
| 17 | 
            +
            #include <io.h>
         | 
| 18 | 
            +
            #else
         | 
| 19 | 
            +
            #include <sys/mman.h>
         | 
| 20 | 
            +
            #endif
         | 
| 21 | 
            +
             | 
| 22 | 
            +
            #if __GNUC__ >= 3
         | 
| 23 | 
            +
            # define expect(expr,value)         __builtin_expect ((expr), (value))
         | 
| 24 | 
            +
            # define XH_INLINE                  static inline
         | 
| 25 | 
            +
            # define XH_UNUSED(v)               x __attribute__((unused))
         | 
| 26 | 
            +
            #else
         | 
| 27 | 
            +
            # define expect(expr,value)         (expr)
         | 
| 28 | 
            +
            # define XH_INLINE                  static
         | 
| 29 | 
            +
            # define XH_UNUSED(v)               v
         | 
| 30 | 
            +
            #endif
         | 
| 31 | 
            +
             | 
| 32 | 
            +
            #ifdef _MSC_VER
         | 
| 33 | 
            +
            #define _CRT_SECURE_NO_WARNINGS
         | 
| 34 | 
            +
            #define _CRT_NONSTDC_NO_DEPRECATE
         | 
| 35 | 
            +
            #define strncasecmp _strnicmp
         | 
| 36 | 
            +
            #define strcasecmp _stricmp
         | 
| 37 | 
            +
            #endif
         | 
| 38 | 
            +
             | 
| 39 | 
            +
            #define expect_false(expr) expect ((expr) != 0, 0)
         | 
| 40 | 
            +
            #define expect_true(expr)  expect ((expr) != 0, 1)
         | 
| 41 | 
            +
             | 
| 42 | 
            +
            typedef uintptr_t xh_bool_t;
         | 
| 43 | 
            +
            typedef uintptr_t xh_uint_t;
         | 
| 44 | 
            +
            typedef intptr_t  xh_int_t;
         | 
| 45 | 
            +
            typedef u_char    xh_char_t;
         | 
| 46 | 
            +
             | 
| 47 | 
            +
            #define XH_CHAR_CAST    (xh_char_t *)
         | 
| 48 | 
            +
            #define XH_EMPTY_STRING (XH_CHAR_CAST "")
         | 
| 49 | 
            +
             | 
| 50 | 
            +
            #if defined(XH_HAVE_ICONV) || defined(XH_HAVE_ICU)
         | 
| 51 | 
            +
            #define XH_HAVE_ENCODER
         | 
| 52 | 
            +
            #endif
         | 
| 53 | 
            +
             | 
| 54 | 
            +
            #ifdef HAVE_RUBY_ENCODING_H
         | 
| 55 | 
            +
            #define XH_FORCE_UTF8(s) rb_enc_set_index(s, rb_utf8_encindex())
         | 
| 56 | 
            +
            #else
         | 
| 57 | 
            +
            #define XH_FORCE_UTF8(s)
         | 
| 58 | 
            +
            #endif
         | 
| 59 | 
            +
             | 
| 60 | 
            +
            #if defined(XH_HAVE_XML2) && defined(XH_HAVE_XML__LIBXML)
         | 
| 61 | 
            +
            #define XH_HAVE_DOM
         | 
| 62 | 
            +
            #endif
         | 
| 63 | 
            +
             | 
| 64 | 
            +
            #ifdef XH_HAVE_DOM
         | 
| 65 | 
            +
            #include <libxml/parser.h>
         | 
| 66 | 
            +
            #endif
         | 
| 67 | 
            +
             | 
| 68 | 
            +
            #define XH_HAVE_MMAP
         | 
| 69 | 
            +
             | 
| 70 | 
            +
            extern VALUE xh_module;
         | 
| 71 | 
            +
            extern VALUE xh_parse_error_class;
         | 
| 72 | 
            +
            extern ID    xh_id_next;
         | 
| 73 | 
            +
             | 
| 74 | 
            +
            #endif /* _XH_CONFIG_H_ */
         | 
| @@ -0,0 +1,53 @@ | |
| 1 | 
            +
            #ifndef _XH_CORE_H_
         | 
| 2 | 
            +
            #define _XH_CORE_H_
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            /*
         | 
| 5 | 
            +
             * Concatenate preprocessor tokens A and B without expanding macro definitions
         | 
| 6 | 
            +
             * (however, if invoked from a macro, macro arguments are expanded).
         | 
| 7 | 
            +
             */
         | 
| 8 | 
            +
            #define XH_PPCAT_NX(A, B) A ## B
         | 
| 9 | 
            +
             | 
| 10 | 
            +
            /*
         | 
| 11 | 
            +
             * Concatenate preprocessor tokens A and B after macro-expanding them.
         | 
| 12 | 
            +
             */
         | 
| 13 | 
            +
            #define XH_PPCAT(A, B) XH_PPCAT_NX(A, B)
         | 
| 14 | 
            +
             | 
| 15 | 
            +
            /*
         | 
| 16 | 
            +
             * Turn A into a string literal without expanding macro definitions
         | 
| 17 | 
            +
             * (however, if invoked from a macro, macro arguments are expanded).
         | 
| 18 | 
            +
             */
         | 
| 19 | 
            +
            #define XH_STRINGIZE_NX(A) #A
         | 
| 20 | 
            +
             | 
| 21 | 
            +
            /*
         | 
| 22 | 
            +
             * Turn A into a string literal after macro-expanding it.
         | 
| 23 | 
            +
             */
         | 
| 24 | 
            +
            #define XH_STRINGIZE(A) XH_STRINGIZE_NX(A)
         | 
| 25 | 
            +
             | 
| 26 | 
            +
            #ifndef FALSE
         | 
| 27 | 
            +
            #define FALSE (0)
         | 
| 28 | 
            +
            #endif
         | 
| 29 | 
            +
             | 
| 30 | 
            +
            #ifndef TRUE
         | 
| 31 | 
            +
            #define TRUE  (1)
         | 
| 32 | 
            +
            #endif
         | 
| 33 | 
            +
             | 
| 34 | 
            +
            #include "xh_log.h"
         | 
| 35 | 
            +
            #include "xh_string.h"
         | 
| 36 | 
            +
            #include "xh_sort.h"
         | 
| 37 | 
            +
            #include "xh_stack.h"
         | 
| 38 | 
            +
            //#include "xh_stash.h"
         | 
| 39 | 
            +
            #include "xh_param.h"
         | 
| 40 | 
            +
            #include "xh_buffer_helper.h"
         | 
| 41 | 
            +
            #include "xh_buffer.h"
         | 
| 42 | 
            +
            #include "xh_ruby_buffer.h"
         | 
| 43 | 
            +
            #include "xh_encoder.h"
         | 
| 44 | 
            +
            //#include "xh_reader.h"
         | 
| 45 | 
            +
            #include "xh_writer.h"
         | 
| 46 | 
            +
            #include "xh.h"
         | 
| 47 | 
            +
            #include "xh_h2x.h"
         | 
| 48 | 
            +
            #include "xh_xml.h"
         | 
| 49 | 
            +
            /*
         | 
| 50 | 
            +
            #include "xh_x2h.h"
         | 
| 51 | 
            +
            #include "xh_dom.h"
         | 
| 52 | 
            +
            */
         | 
| 53 | 
            +
            #endif /* _XH_CORE_H_ */
         | 
| @@ -0,0 +1,193 @@ | |
| 1 | 
            +
            #include "xh_config.h"
         | 
| 2 | 
            +
            #include "xh_core.h"
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            #ifdef XH_HAVE_ENCODER
         | 
| 5 | 
            +
             | 
| 6 | 
            +
            #ifdef XH_HAVE_ICU
         | 
| 7 | 
            +
            static void
         | 
| 8 | 
            +
            xh_encoder_uconv_destroy(UConverter *uconv)
         | 
| 9 | 
            +
            {
         | 
| 10 | 
            +
                if (uconv != NULL) {
         | 
| 11 | 
            +
                    ucnv_close(uconv);
         | 
| 12 | 
            +
                }
         | 
| 13 | 
            +
            }
         | 
| 14 | 
            +
             | 
| 15 | 
            +
            static UConverter *
         | 
| 16 | 
            +
            xh_encoder_uconv_create(xh_char_t *encoding, xh_bool_t toUnicode)
         | 
| 17 | 
            +
            {
         | 
| 18 | 
            +
                UConverter *uconv;
         | 
| 19 | 
            +
                UErrorCode  status = U_ZERO_ERROR;
         | 
| 20 | 
            +
             | 
| 21 | 
            +
                uconv = ucnv_open((char *) encoding, &status);
         | 
| 22 | 
            +
                if ( U_FAILURE(status) ) {
         | 
| 23 | 
            +
                    return NULL;
         | 
| 24 | 
            +
                }
         | 
| 25 | 
            +
             | 
| 26 | 
            +
                if (toUnicode) {
         | 
| 27 | 
            +
                    ucnv_setToUCallBack(uconv, UCNV_TO_U_CALLBACK_STOP,
         | 
| 28 | 
            +
                                        NULL, NULL, NULL, &status);
         | 
| 29 | 
            +
                }
         | 
| 30 | 
            +
                else {
         | 
| 31 | 
            +
                    ucnv_setFromUCallBack(uconv, UCNV_FROM_U_CALLBACK_STOP,
         | 
| 32 | 
            +
                                          NULL, NULL, NULL, &status);
         | 
| 33 | 
            +
                }
         | 
| 34 | 
            +
             | 
| 35 | 
            +
                return uconv;
         | 
| 36 | 
            +
            }
         | 
| 37 | 
            +
            #endif
         | 
| 38 | 
            +
             | 
| 39 | 
            +
            void
         | 
| 40 | 
            +
            xh_encoder_destroy(xh_encoder_t *encoder)
         | 
| 41 | 
            +
            {
         | 
| 42 | 
            +
                if (encoder != NULL) {
         | 
| 43 | 
            +
            #ifdef XH_HAVE_ICONV
         | 
| 44 | 
            +
                    if (encoder->iconv != NULL) {
         | 
| 45 | 
            +
                        xh_log_debug0("destroy iconv encoder");
         | 
| 46 | 
            +
                        iconv_close(encoder->iconv);
         | 
| 47 | 
            +
                    }
         | 
| 48 | 
            +
            #endif
         | 
| 49 | 
            +
             | 
| 50 | 
            +
            #ifdef XH_HAVE_ICU
         | 
| 51 | 
            +
                    if (encoder->uconv_from != NULL) {
         | 
| 52 | 
            +
                        xh_log_debug0("destroy icu encoder");
         | 
| 53 | 
            +
                        xh_encoder_uconv_destroy(encoder->uconv_from);
         | 
| 54 | 
            +
                        xh_encoder_uconv_destroy(encoder->uconv_to);
         | 
| 55 | 
            +
                    }
         | 
| 56 | 
            +
            #endif
         | 
| 57 | 
            +
                    free(encoder);
         | 
| 58 | 
            +
                }
         | 
| 59 | 
            +
            }
         | 
| 60 | 
            +
             | 
| 61 | 
            +
            xh_encoder_t *
         | 
| 62 | 
            +
            xh_encoder_create(xh_char_t *tocode, xh_char_t *fromcode)
         | 
| 63 | 
            +
            {
         | 
| 64 | 
            +
                xh_encoder_t *encoder;
         | 
| 65 | 
            +
             | 
| 66 | 
            +
                encoder = malloc(sizeof(xh_encoder_t));
         | 
| 67 | 
            +
                if (encoder == NULL) {
         | 
| 68 | 
            +
                    return NULL;
         | 
| 69 | 
            +
                }
         | 
| 70 | 
            +
                memset(encoder, 0, sizeof(xh_encoder_t));
         | 
| 71 | 
            +
             | 
| 72 | 
            +
                xh_str_copy(encoder->tocode, tocode, XH_PARAM_LEN);
         | 
| 73 | 
            +
                xh_str_copy(encoder->fromcode, fromcode, XH_PARAM_LEN);
         | 
| 74 | 
            +
             | 
| 75 | 
            +
            #ifdef XH_HAVE_ICONV
         | 
| 76 | 
            +
                xh_log_debug2("create iconv encoder from: '%s' to: '%s'", fromcode, tocode);
         | 
| 77 | 
            +
                encoder->iconv = iconv_open((char *) tocode, (char *) fromcode);
         | 
| 78 | 
            +
                if (encoder->iconv != (iconv_t) -1) {
         | 
| 79 | 
            +
                    encoder->type = XH_ENC_ICONV;
         | 
| 80 | 
            +
                    return encoder;
         | 
| 81 | 
            +
                }
         | 
| 82 | 
            +
                encoder->iconv = NULL;
         | 
| 83 | 
            +
            #endif
         | 
| 84 | 
            +
             | 
| 85 | 
            +
            #ifdef XH_HAVE_ICU
         | 
| 86 | 
            +
                xh_log_debug2("create icu encoder from: '%s' to: '%s'", fromcode, tocode);
         | 
| 87 | 
            +
                encoder->uconv_to = xh_encoder_uconv_create(tocode, 1);
         | 
| 88 | 
            +
                if (encoder->uconv_to != NULL) {
         | 
| 89 | 
            +
                    encoder->uconv_from = xh_encoder_uconv_create(fromcode, 0);
         | 
| 90 | 
            +
                    if (encoder->uconv_from != NULL) {
         | 
| 91 | 
            +
                        encoder->type        = XH_ENC_ICU;
         | 
| 92 | 
            +
                        encoder->pivotSource = encoder->pivotTarget = encoder->pivotStart = encoder->pivotBuffer;
         | 
| 93 | 
            +
                        encoder->pivotLimit  = encoder->pivotBuffer + sizeof(encoder->pivotBuffer) / sizeof(encoder->pivotBuffer[0]);
         | 
| 94 | 
            +
                        return encoder;
         | 
| 95 | 
            +
                    }
         | 
| 96 | 
            +
                }
         | 
| 97 | 
            +
            #endif
         | 
| 98 | 
            +
             | 
| 99 | 
            +
                xh_encoder_destroy(encoder);
         | 
| 100 | 
            +
             | 
| 101 | 
            +
                return NULL;
         | 
| 102 | 
            +
            }
         | 
| 103 | 
            +
             | 
| 104 | 
            +
            void
         | 
| 105 | 
            +
            xh_encoder_encode_ruby_buffer(xh_encoder_t *encoder, xh_ruby_buffer_t *main_buf, xh_ruby_buffer_t *enc_buf)
         | 
| 106 | 
            +
            {
         | 
| 107 | 
            +
                xh_char_t *src  = main_buf->start;
         | 
| 108 | 
            +
             | 
| 109 | 
            +
            #ifdef XH_HAVE_ICONV
         | 
| 110 | 
            +
                if (encoder->type == XH_ENC_ICONV) {
         | 
| 111 | 
            +
                    size_t in_left  = main_buf->cur - main_buf->start;
         | 
| 112 | 
            +
                    size_t out_left = enc_buf->end - enc_buf->cur;
         | 
| 113 | 
            +
             | 
| 114 | 
            +
                    size_t converted = iconv(encoder->iconv, (char **) &src, &in_left, (char **) &enc_buf->cur, &out_left);
         | 
| 115 | 
            +
                    if (converted == (size_t) -1) {
         | 
| 116 | 
            +
                        rb_raise(xh_parse_error_class, "Encoding error");
         | 
| 117 | 
            +
                    }
         | 
| 118 | 
            +
                    return;
         | 
| 119 | 
            +
                }
         | 
| 120 | 
            +
            #endif
         | 
| 121 | 
            +
             | 
| 122 | 
            +
            #ifdef XH_HAVE_ICU
         | 
| 123 | 
            +
                UErrorCode  err  = U_ZERO_ERROR;
         | 
| 124 | 
            +
                ucnv_convertEx(encoder->uconv_to, encoder->uconv_from, (char **) &enc_buf->cur, (char *) enc_buf->end,
         | 
| 125 | 
            +
                               (const char **) &src, (char *) main_buf->cur, NULL, NULL, NULL, NULL,
         | 
| 126 | 
            +
                               FALSE, TRUE, &err);
         | 
| 127 | 
            +
             | 
| 128 | 
            +
                if ( U_FAILURE(err) ) {
         | 
| 129 | 
            +
                    rb_raise(xh_parse_error_class, "Encoding error: %d", err);
         | 
| 130 | 
            +
                }
         | 
| 131 | 
            +
            #endif
         | 
| 132 | 
            +
            }
         | 
| 133 | 
            +
             | 
| 134 | 
            +
            void
         | 
| 135 | 
            +
            xh_encoder_encode_string(xh_encoder_t *encoder, xh_char_t **src, size_t *src_left, xh_char_t **dst, size_t *dst_left)
         | 
| 136 | 
            +
            {
         | 
| 137 | 
            +
            #ifdef XH_HAVE_ICONV
         | 
| 138 | 
            +
                if (encoder->type == XH_ENC_ICONV) {
         | 
| 139 | 
            +
                    size_t converted = iconv(encoder->iconv, (char **) src, src_left, (char **) dst, dst_left);
         | 
| 140 | 
            +
                    if (converted == (size_t) -1) {
         | 
| 141 | 
            +
                        switch (errno) {
         | 
| 142 | 
            +
                            case EILSEQ:
         | 
| 143 | 
            +
                                rb_raise(xh_parse_error_class, "Encoding error: invalid char found");
         | 
| 144 | 
            +
                            case E2BIG:
         | 
| 145 | 
            +
                                encoder->state = XH_ENC_BUFFER_OVERFLOW;
         | 
| 146 | 
            +
                                break;
         | 
| 147 | 
            +
                            case EINVAL:
         | 
| 148 | 
            +
                                encoder->state = XH_ENC_TRUNCATED_CHAR_FOUND;
         | 
| 149 | 
            +
                                break;
         | 
| 150 | 
            +
                            default:
         | 
| 151 | 
            +
                                rb_raise(xh_parse_error_class, "Encoding error");
         | 
| 152 | 
            +
                        }
         | 
| 153 | 
            +
                    }
         | 
| 154 | 
            +
                    else {
         | 
| 155 | 
            +
                        encoder->state = XH_ENC_OK;
         | 
| 156 | 
            +
                    }
         | 
| 157 | 
            +
                    return;
         | 
| 158 | 
            +
                }
         | 
| 159 | 
            +
            #endif
         | 
| 160 | 
            +
             | 
| 161 | 
            +
            #ifdef XH_HAVE_ICU
         | 
| 162 | 
            +
                UErrorCode  err = U_ZERO_ERROR;
         | 
| 163 | 
            +
                xh_char_t  *old_src = *src;
         | 
| 164 | 
            +
                xh_char_t  *old_dst = *dst;
         | 
| 165 | 
            +
             | 
| 166 | 
            +
                ucnv_convertEx(encoder->uconv_to, encoder->uconv_from, (char **) dst, (char *) (*dst + *dst_left),
         | 
| 167 | 
            +
                               (const char **) src, (char *) (*src + *src_left), encoder->pivotStart, &encoder->pivotSource, &encoder->pivotTarget, encoder->pivotLimit,
         | 
| 168 | 
            +
                               FALSE, FALSE, &err);
         | 
| 169 | 
            +
             | 
| 170 | 
            +
                *src_left -= *src - old_src;
         | 
| 171 | 
            +
                *dst_left -= *dst - old_dst;
         | 
| 172 | 
            +
             | 
| 173 | 
            +
                if ( U_FAILURE(err) ) {
         | 
| 174 | 
            +
                    switch (err) {
         | 
| 175 | 
            +
                        case U_INVALID_CHAR_FOUND:
         | 
| 176 | 
            +
                            rb_raise(xh_parse_error_class, "Encoding error: invalid char found");
         | 
| 177 | 
            +
                        case U_BUFFER_OVERFLOW_ERROR:
         | 
| 178 | 
            +
                            encoder->state = XH_ENC_BUFFER_OVERFLOW;
         | 
| 179 | 
            +
                            break;
         | 
| 180 | 
            +
                        case U_TRUNCATED_CHAR_FOUND:
         | 
| 181 | 
            +
                            encoder->state = XH_ENC_TRUNCATED_CHAR_FOUND;
         | 
| 182 | 
            +
                            break;
         | 
| 183 | 
            +
                        default:
         | 
| 184 | 
            +
                            rb_raise(xh_parse_error_class, "Encoding error: %d", err);
         | 
| 185 | 
            +
                    }
         | 
| 186 | 
            +
                }
         | 
| 187 | 
            +
                else {
         | 
| 188 | 
            +
                    encoder->state = XH_ENC_OK;
         | 
| 189 | 
            +
                }
         | 
| 190 | 
            +
            #endif
         | 
| 191 | 
            +
            }
         | 
| 192 | 
            +
             | 
| 193 | 
            +
            #endif /* XH_HAVE_ENCODER */
         |