ox 1.6.4 → 1.6.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of ox might be problematic. Click here for more details.
- data/README.md +2 -3
- data/ext/ox/ox.c +9 -0
- data/ext/ox/ox.h +6 -0
- data/ext/ox/parse.c +74 -7
- data/lib/ox/version.rb +1 -1
- metadata +2 -2
    
        data/README.md
    CHANGED
    
    | @@ -34,10 +34,9 @@ A fast XML parser and Object marshaller as a Ruby gem. | |
| 34 34 |  | 
| 35 35 | 
             
            ## <a name="release">Release Notes</a>
         | 
| 36 36 |  | 
| 37 | 
            -
            ### Release 1.6. | 
| 37 | 
            +
            ### Release 1.6.5
         | 
| 38 38 |  | 
| 39 | 
            -
             - Special character handling  | 
| 40 | 
            -
               for really long UTF-8 characters.
         | 
| 39 | 
            +
             - Special character handling now supports UCS-2 and UCS-4 Unicode characters as well as UTF-8 characters.
         | 
| 41 40 |  | 
| 42 41 | 
             
            ## <a name="description">Description</a>
         | 
| 43 42 |  | 
    
        data/ext/ox/ox.c
    CHANGED
    
    | @@ -128,6 +128,12 @@ static VALUE	with_instruct_sym; | |
| 128 128 | 
             
            static VALUE	with_xml_sym;
         | 
| 129 129 | 
             
            static VALUE	xsd_date_sym;
         | 
| 130 130 |  | 
| 131 | 
            +
            #if HAS_ENCODING_SUPPORT
         | 
| 132 | 
            +
            rb_encoding	*ox_utf8_encoding = 0;
         | 
| 133 | 
            +
            #else
         | 
| 134 | 
            +
            void		*ox_utf8_encoding = 0;
         | 
| 135 | 
            +
            #endif
         | 
| 136 | 
            +
             | 
| 131 137 | 
             
            struct _Options	 ox_default_options = {
         | 
| 132 138 | 
             
                { '\0' },		/* encoding */
         | 
| 133 139 | 
             
                2,			/* indent */
         | 
| @@ -773,6 +779,9 @@ void Init_ox() { | |
| 773 779 |  | 
| 774 780 | 
             
                rb_define_module_function(Ox, "cache_test", cache_test, 0);
         | 
| 775 781 | 
             
                rb_define_module_function(Ox, "cache8_test", cache8_test, 0);
         | 
| 782 | 
            +
            #if HAS_ENCODING_SUPPORT
         | 
| 783 | 
            +
                ox_utf8_encoding = rb_enc_find("UTF-8");
         | 
| 784 | 
            +
            #endif
         | 
| 776 785 | 
             
            }
         | 
| 777 786 |  | 
| 778 787 | 
             
            void
         | 
    
        data/ext/ox/ox.h
    CHANGED
    
    | @@ -258,6 +258,12 @@ extern ID	ox_tv_nsec_id; | |
| 258 258 | 
             
            extern ID	ox_tv_usec_id;
         | 
| 259 259 | 
             
            extern ID	ox_value_id;
         | 
| 260 260 |  | 
| 261 | 
            +
            #if HAS_ENCODING_SUPPORT
         | 
| 262 | 
            +
            extern rb_encoding	*ox_utf8_encoding;
         | 
| 263 | 
            +
            #else
         | 
| 264 | 
            +
            extern void		*ox_utf8_encoding;
         | 
| 265 | 
            +
            #endif
         | 
| 266 | 
            +
             | 
| 261 267 | 
             
            extern VALUE	ox_date_class;
         | 
| 262 268 | 
             
            extern VALUE	ox_empty_string;
         | 
| 263 269 | 
             
            extern VALUE	ox_encoding_sym;
         | 
    
        data/ext/ox/parse.c
    CHANGED
    
    | @@ -47,10 +47,10 @@ static char*	read_name_token(PInfo pi); | |
| 47 47 | 
             
            static char*	read_quoted_value(PInfo pi);
         | 
| 48 48 | 
             
            static char*	read_hex_uint64(char *b, uint64_t *up);
         | 
| 49 49 | 
             
            static char*	read_10_uint64(char *b, uint64_t *up);
         | 
| 50 | 
            -
            static char*	 | 
| 50 | 
            +
            static char*	ucs_to_utf8_chars(char *text, uint64_t u);
         | 
| 51 51 | 
             
            static char*	read_coded_chars(PInfo pi, char *text);
         | 
| 52 52 | 
             
            static void	next_non_white(PInfo pi);
         | 
| 53 | 
            -
            static int	collapse_special(char *str);
         | 
| 53 | 
            +
            static int	collapse_special(PInfo pi, char *str);
         | 
| 54 54 |  | 
| 55 55 | 
             
            /* This XML parser is a single pass, destructive, callback parser. It is a
         | 
| 56 56 | 
             
             * single pass parse since it only make one pass over the characters in the
         | 
| @@ -364,7 +364,7 @@ read_element(PInfo pi) { | |
| 364 364 | 
             
            	    next_non_white(pi);
         | 
| 365 365 | 
             
            	    ap->value = read_quoted_value(pi);
         | 
| 366 366 | 
             
            	    if (0 != strchr(ap->value, '&')) {
         | 
| 367 | 
            -
            		if (0 != collapse_special((char*)ap->value)) {
         | 
| 367 | 
            +
            		if (0 != collapse_special(pi, (char*)ap->value)) {
         | 
| 368 368 | 
             
            		    raise_error("invalid format, special character does not end with a semicolon", pi->str, pi->s);
         | 
| 369 369 | 
             
            		}
         | 
| 370 370 | 
             
            	    }
         | 
| @@ -701,12 +701,57 @@ read_10_uint64(char *b, uint64_t *up) { | |
| 701 701 | 
             
                return b;
         | 
| 702 702 | 
             
            }
         | 
| 703 703 |  | 
| 704 | 
            +
            /*
         | 
| 705 | 
            +
            u0000..u007F                00000000000000xxxxxxx  0xxxxxxx
         | 
| 706 | 
            +
            u0080..u07FF                0000000000yyyyyxxxxxx  110yyyyy 10xxxxxx
         | 
| 707 | 
            +
            u0800..uD7FF, uE000..uFFFF  00000zzzzyyyyyyxxxxxx  1110zzzz 10yyyyyy 10xxxxxx
         | 
| 708 | 
            +
            u10000..u10FFFF             uuuzzzzzzyyyyyyxxxxxx  11110uuu 10zzzzzz 10yyyyyy 10xxxxxx
         | 
| 709 | 
            +
            */
         | 
| 704 710 | 
             
            static char*
         | 
| 705 | 
            -
             | 
| 711 | 
            +
            ucs_to_utf8_chars(char *text, uint64_t u) {
         | 
| 706 712 | 
             
                int			reading = 0;
         | 
| 707 713 | 
             
                int			i;
         | 
| 708 714 | 
             
                unsigned char	c;
         | 
| 709 715 |  | 
| 716 | 
            +
                if (u <= 0x000000000000007FULL) {
         | 
| 717 | 
            +
            	/* 0xxxxxxx */
         | 
| 718 | 
            +
            	*text++ = (char)u;
         | 
| 719 | 
            +
                } else if (u <= 0x00000000000007FFULL) {
         | 
| 720 | 
            +
            	/* 110yyyyy 10xxxxxx */
         | 
| 721 | 
            +
            	*text++ = (char)(0x00000000000000C0ULL | (0x000000000000001FULL & (u >> 6)));
         | 
| 722 | 
            +
            	*text++ = (char)(0x0000000000000080ULL | (0x000000000000003FULL & u));
         | 
| 723 | 
            +
                } else if (u <= 0x000000000000D7FFULL || (0x000000000000E000ULL <= u && u <= 0x000000000000FFFFULL)) {
         | 
| 724 | 
            +
            	/* 1110zzzz 10yyyyyy 10xxxxxx */
         | 
| 725 | 
            +
            	*text++ = (char)(0x00000000000000E0ULL | (0x000000000000000FULL & (u >> 12)));
         | 
| 726 | 
            +
            	*text++ = (char)(0x0000000000000080ULL | (0x000000000000003FULL & (u >> 6)));
         | 
| 727 | 
            +
            	*text++ = (char)(0x0000000000000080ULL | (0x000000000000003FULL & u));
         | 
| 728 | 
            +
                } else if (0x0000000000010000ULL <= u && u <= 0x000000000010FFFFULL) {
         | 
| 729 | 
            +
            	/* 11110uuu 10zzzzzz 10yyyyyy 10xxxxxx */
         | 
| 730 | 
            +
            	*text++ = (char)(0x00000000000000F0ULL | (0x0000000000000007ULL & (u >> 18)));
         | 
| 731 | 
            +
            	*text++ = (char)(0x0000000000000080ULL | (0x000000000000003FULL & (u >> 12)));
         | 
| 732 | 
            +
            	*text++ = (char)(0x0000000000000080ULL | (0x000000000000003FULL & (u >> 6)));
         | 
| 733 | 
            +
            	*text++ = (char)(0x0000000000000080ULL | (0x000000000000003FULL & u));
         | 
| 734 | 
            +
                } else {
         | 
| 735 | 
            +
            	/* assume it is UTF-8 encoded directly and not UCS */
         | 
| 736 | 
            +
            	for (i = 56; 0 <= i; i -= 8) {
         | 
| 737 | 
            +
            	    c = (unsigned char)((u >> i) & 0x00000000000000FFULL);
         | 
| 738 | 
            +
            	    if (reading) {
         | 
| 739 | 
            +
            		*text++ = (char)c;
         | 
| 740 | 
            +
            	    } else if ('\0' != c) {
         | 
| 741 | 
            +
            		*text++ = (char)c;
         | 
| 742 | 
            +
            		reading = 1;
         | 
| 743 | 
            +
            	    }
         | 
| 744 | 
            +
            	}
         | 
| 745 | 
            +
                }
         | 
| 746 | 
            +
                return text;
         | 
| 747 | 
            +
            }
         | 
| 748 | 
            +
             | 
| 749 | 
            +
            #if 0
         | 
| 750 | 
            +
            static char*
         | 
| 751 | 
            +
            uint64_to_chars(char *text, uint64_t u) {
         | 
| 752 | 
            +
                int			reading = 0;
         | 
| 753 | 
            +
                int			i;
         | 
| 754 | 
            +
                unsigned char	c;
         | 
| 710 755 |  | 
| 711 756 | 
             
                for (i = 56; 0 <= i; i -= 8) {
         | 
| 712 757 | 
             
            	c = (unsigned char)((u >> i) & 0x00000000000000FFULL);
         | 
| @@ -719,6 +764,7 @@ uint64_to_chars(char *text, uint64_t u) { | |
| 719 764 | 
             
                }
         | 
| 720 765 | 
             
                return text;
         | 
| 721 766 | 
             
            }
         | 
| 767 | 
            +
            #endif
         | 
| 722 768 |  | 
| 723 769 | 
             
            static char*
         | 
| 724 770 | 
             
            read_coded_chars(PInfo pi, char *text) {
         | 
| @@ -749,7 +795,17 @@ read_coded_chars(PInfo pi, char *text) { | |
| 749 795 | 
             
            	    *text++ = *pi->s;
         | 
| 750 796 | 
             
            	} else {
         | 
| 751 797 | 
             
            	    pi->s = s;
         | 
| 752 | 
            -
            	     | 
| 798 | 
            +
            	    if (u <= 0x000000000000007FULL) {
         | 
| 799 | 
            +
            		*text++ = (char)u;
         | 
| 800 | 
            +
            	    } else if (ox_utf8_encoding == pi->encoding) {
         | 
| 801 | 
            +
            		text = ucs_to_utf8_chars(text, u);
         | 
| 802 | 
            +
            	    } else if (0 == pi->encoding) {
         | 
| 803 | 
            +
            		pi->encoding = ox_utf8_encoding;
         | 
| 804 | 
            +
            		text = ucs_to_utf8_chars(text, u);
         | 
| 805 | 
            +
            	    } else {
         | 
| 806 | 
            +
            		/*raise_error("Invalid encoding, need UTF-8 or UTF-16 encoding to parse &#nnnn; character sequences.", pi->str, pi->s); */
         | 
| 807 | 
            +
            		raise_error("Invalid encoding, need UTF-8 encoding to parse &#nnnn; character sequences.", pi->str, pi->s);
         | 
| 808 | 
            +
            	    }
         | 
| 753 809 | 
             
            	}
         | 
| 754 810 | 
             
                } else if (0 == strcasecmp(buf, "nbsp;")) {
         | 
| 755 811 | 
             
            	pi->s = s;
         | 
| @@ -776,7 +832,7 @@ read_coded_chars(PInfo pi, char *text) { | |
| 776 832 | 
             
            }
         | 
| 777 833 |  | 
| 778 834 | 
             
            static int
         | 
| 779 | 
            -
            collapse_special(char *str) {
         | 
| 835 | 
            +
            collapse_special(PInfo pi, char *str) {
         | 
| 780 836 | 
             
                char	*s = str;
         | 
| 781 837 | 
             
                char	*b = str;
         | 
| 782 838 |  | 
| @@ -799,7 +855,18 @@ collapse_special(char *str) { | |
| 799 855 | 
             
            		if (0 == end) {
         | 
| 800 856 | 
             
            		    return EDOM;
         | 
| 801 857 | 
             
            		}
         | 
| 802 | 
            -
            		 | 
| 858 | 
            +
            		if (u <= 0x000000000000007FULL) {
         | 
| 859 | 
            +
            		    *b++ = (char)u;
         | 
| 860 | 
            +
            		} else if (ox_utf8_encoding == pi->encoding) {
         | 
| 861 | 
            +
            		    b = ucs_to_utf8_chars(b, u);
         | 
| 862 | 
            +
            		    /* TBD support UTF-16 */
         | 
| 863 | 
            +
            		} else if (0 == pi->encoding) {
         | 
| 864 | 
            +
            		    pi->encoding = ox_utf8_encoding;
         | 
| 865 | 
            +
            		    b = ucs_to_utf8_chars(b, u);
         | 
| 866 | 
            +
            		} else {
         | 
| 867 | 
            +
            		    /* raise_error("Invalid encoding, need UTF-8 or UTF-16 encoding to parse &#nnnn; character sequences.", pi->str, pi->s);*/
         | 
| 868 | 
            +
            		    raise_error("Invalid encoding, need UTF-8 encoding to parse &#nnnn; character sequences.", pi->str, pi->s);
         | 
| 869 | 
            +
            		}
         | 
| 803 870 | 
             
            		s = end + 1;
         | 
| 804 871 | 
             
            	    } else {
         | 
| 805 872 | 
             
            		if (0 == strncasecmp(s, "lt;", 3)) {
         | 
    
        data/lib/ox/version.rb
    CHANGED
    
    
    
        metadata
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: ox
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 1.6. | 
| 4 | 
            +
              version: 1.6.5
         | 
| 5 5 | 
             
              prerelease: 
         | 
| 6 6 | 
             
            platform: ruby
         | 
| 7 7 | 
             
            authors:
         | 
| @@ -9,7 +9,7 @@ authors: | |
| 9 9 | 
             
            autorequire: 
         | 
| 10 10 | 
             
            bindir: bin
         | 
| 11 11 | 
             
            cert_chain: []
         | 
| 12 | 
            -
            date: 2012-10- | 
| 12 | 
            +
            date: 2012-10-25 00:00:00.000000000 Z
         | 
| 13 13 | 
             
            dependencies: []
         | 
| 14 14 | 
             
            description: ! "A fast XML parser and object serializer that uses only standard C
         | 
| 15 15 | 
             
              lib.\n            \nOptimized XML (Ox), as the name implies was written to provide
         |