ox 1.6.4 → 1.6.5
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of ox might be problematic. Click here for more details.
- data/README.md +2 -3
- data/ext/ox/ox.c +9 -0
- data/ext/ox/ox.h +6 -0
- data/ext/ox/parse.c +74 -7
- data/lib/ox/version.rb +1 -1
- metadata +2 -2
data/README.md
CHANGED
@@ -34,10 +34,9 @@ A fast XML parser and Object marshaller as a Ruby gem.
|
|
34
34
|
|
35
35
|
## <a name="release">Release Notes</a>
|
36
36
|
|
37
|
-
### Release 1.6.
|
37
|
+
### Release 1.6.5
|
38
38
|
|
39
|
-
- Special character handling
|
40
|
-
for really long UTF-8 characters.
|
39
|
+
- Special character handling now supports UCS-2 and UCS-4 Unicode characters as well as UTF-8 characters.
|
41
40
|
|
42
41
|
## <a name="description">Description</a>
|
43
42
|
|
data/ext/ox/ox.c
CHANGED
@@ -128,6 +128,12 @@ static VALUE with_instruct_sym;
|
|
128
128
|
static VALUE with_xml_sym;
|
129
129
|
static VALUE xsd_date_sym;
|
130
130
|
|
131
|
+
#if HAS_ENCODING_SUPPORT
|
132
|
+
rb_encoding *ox_utf8_encoding = 0;
|
133
|
+
#else
|
134
|
+
void *ox_utf8_encoding = 0;
|
135
|
+
#endif
|
136
|
+
|
131
137
|
struct _Options ox_default_options = {
|
132
138
|
{ '\0' }, /* encoding */
|
133
139
|
2, /* indent */
|
@@ -773,6 +779,9 @@ void Init_ox() {
|
|
773
779
|
|
774
780
|
rb_define_module_function(Ox, "cache_test", cache_test, 0);
|
775
781
|
rb_define_module_function(Ox, "cache8_test", cache8_test, 0);
|
782
|
+
#if HAS_ENCODING_SUPPORT
|
783
|
+
ox_utf8_encoding = rb_enc_find("UTF-8");
|
784
|
+
#endif
|
776
785
|
}
|
777
786
|
|
778
787
|
void
|
data/ext/ox/ox.h
CHANGED
@@ -258,6 +258,12 @@ extern ID ox_tv_nsec_id;
|
|
258
258
|
extern ID ox_tv_usec_id;
|
259
259
|
extern ID ox_value_id;
|
260
260
|
|
261
|
+
#if HAS_ENCODING_SUPPORT
|
262
|
+
extern rb_encoding *ox_utf8_encoding;
|
263
|
+
#else
|
264
|
+
extern void *ox_utf8_encoding;
|
265
|
+
#endif
|
266
|
+
|
261
267
|
extern VALUE ox_date_class;
|
262
268
|
extern VALUE ox_empty_string;
|
263
269
|
extern VALUE ox_encoding_sym;
|
data/ext/ox/parse.c
CHANGED
@@ -47,10 +47,10 @@ static char* read_name_token(PInfo pi);
|
|
47
47
|
static char* read_quoted_value(PInfo pi);
|
48
48
|
static char* read_hex_uint64(char *b, uint64_t *up);
|
49
49
|
static char* read_10_uint64(char *b, uint64_t *up);
|
50
|
-
static char*
|
50
|
+
static char* ucs_to_utf8_chars(char *text, uint64_t u);
|
51
51
|
static char* read_coded_chars(PInfo pi, char *text);
|
52
52
|
static void next_non_white(PInfo pi);
|
53
|
-
static int collapse_special(char *str);
|
53
|
+
static int collapse_special(PInfo pi, char *str);
|
54
54
|
|
55
55
|
/* This XML parser is a single pass, destructive, callback parser. It is a
|
56
56
|
* single pass parse since it only make one pass over the characters in the
|
@@ -364,7 +364,7 @@ read_element(PInfo pi) {
|
|
364
364
|
next_non_white(pi);
|
365
365
|
ap->value = read_quoted_value(pi);
|
366
366
|
if (0 != strchr(ap->value, '&')) {
|
367
|
-
if (0 != collapse_special((char*)ap->value)) {
|
367
|
+
if (0 != collapse_special(pi, (char*)ap->value)) {
|
368
368
|
raise_error("invalid format, special character does not end with a semicolon", pi->str, pi->s);
|
369
369
|
}
|
370
370
|
}
|
@@ -701,12 +701,57 @@ read_10_uint64(char *b, uint64_t *up) {
|
|
701
701
|
return b;
|
702
702
|
}
|
703
703
|
|
704
|
+
/*
|
705
|
+
u0000..u007F 00000000000000xxxxxxx 0xxxxxxx
|
706
|
+
u0080..u07FF 0000000000yyyyyxxxxxx 110yyyyy 10xxxxxx
|
707
|
+
u0800..uD7FF, uE000..uFFFF 00000zzzzyyyyyyxxxxxx 1110zzzz 10yyyyyy 10xxxxxx
|
708
|
+
u10000..u10FFFF uuuzzzzzzyyyyyyxxxxxx 11110uuu 10zzzzzz 10yyyyyy 10xxxxxx
|
709
|
+
*/
|
704
710
|
static char*
|
705
|
-
|
711
|
+
ucs_to_utf8_chars(char *text, uint64_t u) {
|
706
712
|
int reading = 0;
|
707
713
|
int i;
|
708
714
|
unsigned char c;
|
709
715
|
|
716
|
+
if (u <= 0x000000000000007FULL) {
|
717
|
+
/* 0xxxxxxx */
|
718
|
+
*text++ = (char)u;
|
719
|
+
} else if (u <= 0x00000000000007FFULL) {
|
720
|
+
/* 110yyyyy 10xxxxxx */
|
721
|
+
*text++ = (char)(0x00000000000000C0ULL | (0x000000000000001FULL & (u >> 6)));
|
722
|
+
*text++ = (char)(0x0000000000000080ULL | (0x000000000000003FULL & u));
|
723
|
+
} else if (u <= 0x000000000000D7FFULL || (0x000000000000E000ULL <= u && u <= 0x000000000000FFFFULL)) {
|
724
|
+
/* 1110zzzz 10yyyyyy 10xxxxxx */
|
725
|
+
*text++ = (char)(0x00000000000000E0ULL | (0x000000000000000FULL & (u >> 12)));
|
726
|
+
*text++ = (char)(0x0000000000000080ULL | (0x000000000000003FULL & (u >> 6)));
|
727
|
+
*text++ = (char)(0x0000000000000080ULL | (0x000000000000003FULL & u));
|
728
|
+
} else if (0x0000000000010000ULL <= u && u <= 0x000000000010FFFFULL) {
|
729
|
+
/* 11110uuu 10zzzzzz 10yyyyyy 10xxxxxx */
|
730
|
+
*text++ = (char)(0x00000000000000F0ULL | (0x0000000000000007ULL & (u >> 18)));
|
731
|
+
*text++ = (char)(0x0000000000000080ULL | (0x000000000000003FULL & (u >> 12)));
|
732
|
+
*text++ = (char)(0x0000000000000080ULL | (0x000000000000003FULL & (u >> 6)));
|
733
|
+
*text++ = (char)(0x0000000000000080ULL | (0x000000000000003FULL & u));
|
734
|
+
} else {
|
735
|
+
/* assume it is UTF-8 encoded directly and not UCS */
|
736
|
+
for (i = 56; 0 <= i; i -= 8) {
|
737
|
+
c = (unsigned char)((u >> i) & 0x00000000000000FFULL);
|
738
|
+
if (reading) {
|
739
|
+
*text++ = (char)c;
|
740
|
+
} else if ('\0' != c) {
|
741
|
+
*text++ = (char)c;
|
742
|
+
reading = 1;
|
743
|
+
}
|
744
|
+
}
|
745
|
+
}
|
746
|
+
return text;
|
747
|
+
}
|
748
|
+
|
749
|
+
#if 0
|
750
|
+
static char*
|
751
|
+
uint64_to_chars(char *text, uint64_t u) {
|
752
|
+
int reading = 0;
|
753
|
+
int i;
|
754
|
+
unsigned char c;
|
710
755
|
|
711
756
|
for (i = 56; 0 <= i; i -= 8) {
|
712
757
|
c = (unsigned char)((u >> i) & 0x00000000000000FFULL);
|
@@ -719,6 +764,7 @@ uint64_to_chars(char *text, uint64_t u) {
|
|
719
764
|
}
|
720
765
|
return text;
|
721
766
|
}
|
767
|
+
#endif
|
722
768
|
|
723
769
|
static char*
|
724
770
|
read_coded_chars(PInfo pi, char *text) {
|
@@ -749,7 +795,17 @@ read_coded_chars(PInfo pi, char *text) {
|
|
749
795
|
*text++ = *pi->s;
|
750
796
|
} else {
|
751
797
|
pi->s = s;
|
752
|
-
|
798
|
+
if (u <= 0x000000000000007FULL) {
|
799
|
+
*text++ = (char)u;
|
800
|
+
} else if (ox_utf8_encoding == pi->encoding) {
|
801
|
+
text = ucs_to_utf8_chars(text, u);
|
802
|
+
} else if (0 == pi->encoding) {
|
803
|
+
pi->encoding = ox_utf8_encoding;
|
804
|
+
text = ucs_to_utf8_chars(text, u);
|
805
|
+
} else {
|
806
|
+
/*raise_error("Invalid encoding, need UTF-8 or UTF-16 encoding to parse &#nnnn; character sequences.", pi->str, pi->s); */
|
807
|
+
raise_error("Invalid encoding, need UTF-8 encoding to parse &#nnnn; character sequences.", pi->str, pi->s);
|
808
|
+
}
|
753
809
|
}
|
754
810
|
} else if (0 == strcasecmp(buf, "nbsp;")) {
|
755
811
|
pi->s = s;
|
@@ -776,7 +832,7 @@ read_coded_chars(PInfo pi, char *text) {
|
|
776
832
|
}
|
777
833
|
|
778
834
|
static int
|
779
|
-
collapse_special(char *str) {
|
835
|
+
collapse_special(PInfo pi, char *str) {
|
780
836
|
char *s = str;
|
781
837
|
char *b = str;
|
782
838
|
|
@@ -799,7 +855,18 @@ collapse_special(char *str) {
|
|
799
855
|
if (0 == end) {
|
800
856
|
return EDOM;
|
801
857
|
}
|
802
|
-
|
858
|
+
if (u <= 0x000000000000007FULL) {
|
859
|
+
*b++ = (char)u;
|
860
|
+
} else if (ox_utf8_encoding == pi->encoding) {
|
861
|
+
b = ucs_to_utf8_chars(b, u);
|
862
|
+
/* TBD support UTF-16 */
|
863
|
+
} else if (0 == pi->encoding) {
|
864
|
+
pi->encoding = ox_utf8_encoding;
|
865
|
+
b = ucs_to_utf8_chars(b, u);
|
866
|
+
} else {
|
867
|
+
/* raise_error("Invalid encoding, need UTF-8 or UTF-16 encoding to parse &#nnnn; character sequences.", pi->str, pi->s);*/
|
868
|
+
raise_error("Invalid encoding, need UTF-8 encoding to parse &#nnnn; character sequences.", pi->str, pi->s);
|
869
|
+
}
|
803
870
|
s = end + 1;
|
804
871
|
} else {
|
805
872
|
if (0 == strncasecmp(s, "lt;", 3)) {
|
data/lib/ox/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ox
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.6.
|
4
|
+
version: 1.6.5
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-10-
|
12
|
+
date: 2012-10-25 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
14
|
description: ! "A fast XML parser and object serializer that uses only standard C
|
15
15
|
lib.\n \nOptimized XML (Ox), as the name implies was written to provide
|