ox 1.6.3 → 1.6.4
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of ox might be problematic. Click here for more details.
- data/README.md +3 -2
- data/ext/ox/parse.c +140 -65
- data/lib/ox/version.rb +1 -1
- metadata +2 -2
data/README.md
CHANGED
@@ -34,9 +34,10 @@ A fast XML parser and Object marshaller as a Ruby gem.
|
|
34
34
|
|
35
35
|
## <a name="release">Release Notes</a>
|
36
36
|
|
37
|
-
### Release 1.6.
|
37
|
+
### Release 1.6.4
|
38
38
|
|
39
|
-
-
|
39
|
+
- Special character handling has been improved. Both hex and base 10 numeric values are allowed up to a 64 bit number
|
40
|
+
for really long UTF-8 characters.
|
40
41
|
|
41
42
|
## <a name="description">Description</a>
|
42
43
|
|
data/ext/ox/parse.c
CHANGED
@@ -45,7 +45,10 @@ static void read_text(PInfo pi);
|
|
45
45
|
static void read_cdata(PInfo pi);
|
46
46
|
static char* read_name_token(PInfo pi);
|
47
47
|
static char* read_quoted_value(PInfo pi);
|
48
|
-
static
|
48
|
+
static char* read_hex_uint64(char *b, uint64_t *up);
|
49
|
+
static char* read_10_uint64(char *b, uint64_t *up);
|
50
|
+
static char* uint64_to_chars(char *text, uint64_t u);
|
51
|
+
static char* read_coded_chars(PInfo pi, char *text);
|
49
52
|
static void next_non_white(PInfo pi);
|
50
53
|
static int collapse_special(char *str);
|
51
54
|
|
@@ -461,10 +464,7 @@ read_text(PInfo pi) {
|
|
461
464
|
case '\0':
|
462
465
|
raise_error("invalid format, document not terminated", pi->str, pi->s);
|
463
466
|
default:
|
464
|
-
if ('&' == c) {
|
465
|
-
c = read_coded_char(pi);
|
466
|
-
}
|
467
|
-
if (end <= b) {
|
467
|
+
if (end <= (b + (('&' == c) ? 7 : 0))) { /* extra 8 for special just in case it is sequence of bytes */
|
468
468
|
unsigned long size;
|
469
469
|
|
470
470
|
if (0 == alloc_buf) {
|
@@ -481,7 +481,11 @@ read_text(PInfo pi) {
|
|
481
481
|
}
|
482
482
|
end = alloc_buf + size - 2;
|
483
483
|
}
|
484
|
-
|
484
|
+
if ('&' == c) {
|
485
|
+
b = read_coded_chars(pi, b);
|
486
|
+
} else {
|
487
|
+
*b++ = c;
|
488
|
+
}
|
485
489
|
break;
|
486
490
|
}
|
487
491
|
}
|
@@ -522,10 +526,7 @@ read_reduced_text(PInfo pi) {
|
|
522
526
|
case '\0':
|
523
527
|
raise_error("invalid format, document not terminated", pi->str, pi->s);
|
524
528
|
default:
|
525
|
-
if ('&' == c) {
|
526
|
-
c = read_coded_char(pi);
|
527
|
-
}
|
528
|
-
if (end <= b + spc) {
|
529
|
+
if (end <= (b + spc + (('&' == c) ? 7 : 0))) { /* extra 8 for special just in case it is sequence of bytes */
|
529
530
|
unsigned long size;
|
530
531
|
|
531
532
|
if (0 == alloc_buf) {
|
@@ -546,7 +547,11 @@ read_reduced_text(PInfo pi) {
|
|
546
547
|
*b++ = ' ';
|
547
548
|
}
|
548
549
|
spc = 0;
|
549
|
-
|
550
|
+
if ('&' == c) {
|
551
|
+
b = read_coded_chars(pi, b);
|
552
|
+
} else {
|
553
|
+
*b++ = c;
|
554
|
+
}
|
550
555
|
break;
|
551
556
|
}
|
552
557
|
}
|
@@ -656,53 +661,118 @@ read_quoted_value(PInfo pi) {
|
|
656
661
|
return value;
|
657
662
|
}
|
658
663
|
|
659
|
-
static
|
660
|
-
|
661
|
-
|
662
|
-
char
|
664
|
+
static char*
|
665
|
+
read_hex_uint64(char *b, uint64_t *up) {
|
666
|
+
uint64_t u = 0;
|
667
|
+
char c;
|
668
|
+
|
669
|
+
for (; ';' != *b; b++) {
|
670
|
+
c = *b;
|
671
|
+
if ('0' <= c && c <= '9') {
|
672
|
+
u = (u << 4) | (uint64_t)(c - '0');
|
673
|
+
} else if ('a' <= c && c <= 'f') {
|
674
|
+
u = (u << 4) | (uint64_t)(c - 'a' + 10);
|
675
|
+
} else if ('A' <= c && c <= 'F') {
|
676
|
+
u = (u << 4) | (uint64_t)(c - 'A' + 10);
|
677
|
+
} else {
|
678
|
+
return 0;
|
679
|
+
}
|
680
|
+
}
|
681
|
+
*up = u;
|
682
|
+
|
683
|
+
return b;
|
684
|
+
}
|
685
|
+
|
686
|
+
static char*
|
687
|
+
read_10_uint64(char *b, uint64_t *up) {
|
688
|
+
uint64_t u = 0;
|
689
|
+
char c;
|
690
|
+
|
691
|
+
for (; ';' != *b; b++) {
|
692
|
+
c = *b;
|
693
|
+
if ('0' <= c && c <= '9') {
|
694
|
+
u = (u * 10) + (uint64_t)(c - '0');
|
695
|
+
} else {
|
696
|
+
return 0;
|
697
|
+
}
|
698
|
+
}
|
699
|
+
*up = u;
|
700
|
+
|
701
|
+
return b;
|
702
|
+
}
|
703
|
+
|
704
|
+
static char*
|
705
|
+
uint64_to_chars(char *text, uint64_t u) {
|
706
|
+
int reading = 0;
|
707
|
+
int i;
|
708
|
+
unsigned char c;
|
709
|
+
|
710
|
+
|
711
|
+
for (i = 56; 0 <= i; i -= 8) {
|
712
|
+
c = (unsigned char)((u >> i) & 0x00000000000000FFULL);
|
713
|
+
if (reading) {
|
714
|
+
*text++ = (char)c;
|
715
|
+
} else if ('\0' != c) {
|
716
|
+
*text++ = (char)c;
|
717
|
+
reading = 1;
|
718
|
+
}
|
719
|
+
}
|
720
|
+
return text;
|
721
|
+
}
|
722
|
+
|
723
|
+
static char*
|
724
|
+
read_coded_chars(PInfo pi, char *text) {
|
725
|
+
char *b, buf[32];
|
726
|
+
char *end = buf + sizeof(buf) - 1;
|
663
727
|
char *s;
|
664
|
-
int c;
|
665
728
|
|
666
729
|
for (b = buf, s = pi->s; b < end; b++, s++) {
|
730
|
+
*b = *s;
|
667
731
|
if (';' == *s) {
|
668
|
-
*b = '\0';
|
732
|
+
*(b + 1) = '\0';
|
669
733
|
s++;
|
670
734
|
break;
|
671
735
|
}
|
672
|
-
*b = *s;
|
673
736
|
}
|
674
737
|
if (b > end) {
|
675
|
-
|
676
|
-
}
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
738
|
+
*text++ = *pi->s;
|
739
|
+
} else if ('#' == *buf) {
|
740
|
+
uint64_t u = 0;
|
741
|
+
|
742
|
+
b = buf + 1;
|
743
|
+
if ('x' == *b || 'X' == *b) {
|
744
|
+
b = read_hex_uint64(b + 1, &u);
|
745
|
+
} else {
|
746
|
+
b = read_10_uint64(b, &u);
|
681
747
|
}
|
748
|
+
if (0 == b) {
|
749
|
+
*text++ = *pi->s;
|
750
|
+
} else {
|
751
|
+
pi->s = s;
|
752
|
+
text = uint64_to_chars(text, u);
|
753
|
+
}
|
754
|
+
} else if (0 == strcasecmp(buf, "nbsp;")) {
|
682
755
|
pi->s = s;
|
683
|
-
|
684
|
-
|
685
|
-
}
|
686
|
-
if (0 == strcasecmp(buf, "nbsp")) {
|
687
|
-
pi->s = s;
|
688
|
-
return ' ';
|
689
|
-
} else if (0 == strcasecmp(buf, "lt")) {
|
756
|
+
*text++ = ' ';
|
757
|
+
} else if (0 == strcasecmp(buf, "lt;")) {
|
690
758
|
pi->s = s;
|
691
|
-
|
692
|
-
} else if (0 == strcasecmp(buf, "gt")) {
|
759
|
+
*text++ = '<';
|
760
|
+
} else if (0 == strcasecmp(buf, "gt;")) {
|
693
761
|
pi->s = s;
|
694
|
-
|
695
|
-
} else if (0 == strcasecmp(buf, "amp")) {
|
762
|
+
*text++ = '>';
|
763
|
+
} else if (0 == strcasecmp(buf, "amp;")) {
|
696
764
|
pi->s = s;
|
697
|
-
|
698
|
-
} else if (0 == strcasecmp(buf, "quot")) {
|
765
|
+
*text++ = '&';
|
766
|
+
} else if (0 == strcasecmp(buf, "quot;")) {
|
699
767
|
pi->s = s;
|
700
|
-
|
701
|
-
} else if (0 == strcasecmp(buf, "apos")) {
|
768
|
+
*text++ = '"';
|
769
|
+
} else if (0 == strcasecmp(buf, "apos;")) {
|
702
770
|
pi->s = s;
|
703
|
-
|
771
|
+
*text++ = '\'';
|
772
|
+
} else {
|
773
|
+
*text++ = *pi->s;
|
704
774
|
}
|
705
|
-
return
|
775
|
+
return text;
|
706
776
|
}
|
707
777
|
|
708
778
|
static int
|
@@ -717,42 +787,47 @@ collapse_special(char *str) {
|
|
717
787
|
|
718
788
|
s++;
|
719
789
|
if ('#' == *s) {
|
790
|
+
uint64_t u = 0;
|
791
|
+
|
720
792
|
s++;
|
721
793
|
if ('x' == *s || 'X' == *s) {
|
722
794
|
s++;
|
723
|
-
|
795
|
+
end = read_hex_uint64(s, &u);
|
724
796
|
} else {
|
725
|
-
|
797
|
+
end = read_10_uint64(s, &u);
|
726
798
|
}
|
727
|
-
if (
|
799
|
+
if (0 == end) {
|
728
800
|
return EDOM;
|
729
801
|
}
|
802
|
+
b = uint64_to_chars(b, u);
|
730
803
|
s = end + 1;
|
731
|
-
} else if (0 == strncasecmp(s, "lt;", 3)) {
|
732
|
-
c = '<';
|
733
|
-
s += 3;
|
734
|
-
} else if (0 == strncasecmp(s, "gt;", 3)) {
|
735
|
-
c = '>';
|
736
|
-
s += 3;
|
737
|
-
} else if (0 == strncasecmp(s, "amp;", 4)) {
|
738
|
-
c = '&';
|
739
|
-
s += 4;
|
740
|
-
} else if (0 == strncasecmp(s, "quot;", 5)) {
|
741
|
-
c = '"';
|
742
|
-
s += 5;
|
743
|
-
} else if (0 == strncasecmp(s, "apos;", 5)) {
|
744
|
-
c = '\'';
|
745
|
-
s += 5;
|
746
804
|
} else {
|
747
|
-
|
748
|
-
|
749
|
-
|
750
|
-
|
805
|
+
if (0 == strncasecmp(s, "lt;", 3)) {
|
806
|
+
c = '<';
|
807
|
+
s += 3;
|
808
|
+
} else if (0 == strncasecmp(s, "gt;", 3)) {
|
809
|
+
c = '>';
|
810
|
+
s += 3;
|
811
|
+
} else if (0 == strncasecmp(s, "amp;", 4)) {
|
812
|
+
c = '&';
|
813
|
+
s += 4;
|
814
|
+
} else if (0 == strncasecmp(s, "quot;", 5)) {
|
815
|
+
c = '"';
|
816
|
+
s += 5;
|
817
|
+
} else if (0 == strncasecmp(s, "apos;", 5)) {
|
818
|
+
c = '\'';
|
819
|
+
s += 5;
|
820
|
+
} else {
|
821
|
+
c = '?';
|
822
|
+
while (';' != *s++) {
|
823
|
+
if ('\0' == *s) {
|
824
|
+
return EDOM;
|
825
|
+
}
|
751
826
|
}
|
827
|
+
s++;
|
752
828
|
}
|
753
|
-
|
829
|
+
*b++ = (char)c;
|
754
830
|
}
|
755
|
-
*b++ = (char)c;
|
756
831
|
} else {
|
757
832
|
*b++ = *s++;
|
758
833
|
}
|
data/lib/ox/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ox
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.6.
|
4
|
+
version: 1.6.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-10-
|
12
|
+
date: 2012-10-24 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
14
|
description: ! "A fast XML parser and object serializer that uses only standard C
|
15
15
|
lib.\n \nOptimized XML (Ox), as the name implies was written to provide
|