ox 1.6.3 → 1.6.4

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of ox might be problematic. Click here for more details.

Files changed (4) hide show
  1. data/README.md +3 -2
  2. data/ext/ox/parse.c +140 -65
  3. data/lib/ox/version.rb +1 -1
  4. metadata +2 -2
data/README.md CHANGED
@@ -34,9 +34,10 @@ A fast XML parser and Object marshaller as a Ruby gem.
34
34
 
35
35
  ## <a name="release">Release Notes</a>
36
36
 
37
- ### Release 1.6.3
37
+ ### Release 1.6.4
38
38
 
39
- - Fixed compatibility issues with Linux (Ubuntu) mostly related to pointer sizes.
39
+ - Special character handling has been improved. Both hex and base 10 numeric values are allowed up to a 64 bit number
40
+ for really long UTF-8 characters.
40
41
 
41
42
  ## <a name="description">Description</a>
42
43
 
data/ext/ox/parse.c CHANGED
@@ -45,7 +45,10 @@ static void read_text(PInfo pi);
45
45
  static void read_cdata(PInfo pi);
46
46
  static char* read_name_token(PInfo pi);
47
47
  static char* read_quoted_value(PInfo pi);
48
- static int read_coded_char(PInfo pi);
48
+ static char* read_hex_uint64(char *b, uint64_t *up);
49
+ static char* read_10_uint64(char *b, uint64_t *up);
50
+ static char* uint64_to_chars(char *text, uint64_t u);
51
+ static char* read_coded_chars(PInfo pi, char *text);
49
52
  static void next_non_white(PInfo pi);
50
53
  static int collapse_special(char *str);
51
54
 
@@ -461,10 +464,7 @@ read_text(PInfo pi) {
461
464
  case '\0':
462
465
  raise_error("invalid format, document not terminated", pi->str, pi->s);
463
466
  default:
464
- if ('&' == c) {
465
- c = read_coded_char(pi);
466
- }
467
- if (end <= b) {
467
+ if (end <= (b + (('&' == c) ? 7 : 0))) { /* extra 8 for special just in case it is sequence of bytes */
468
468
  unsigned long size;
469
469
 
470
470
  if (0 == alloc_buf) {
@@ -481,7 +481,11 @@ read_text(PInfo pi) {
481
481
  }
482
482
  end = alloc_buf + size - 2;
483
483
  }
484
- *b++ = c;
484
+ if ('&' == c) {
485
+ b = read_coded_chars(pi, b);
486
+ } else {
487
+ *b++ = c;
488
+ }
485
489
  break;
486
490
  }
487
491
  }
@@ -522,10 +526,7 @@ read_reduced_text(PInfo pi) {
522
526
  case '\0':
523
527
  raise_error("invalid format, document not terminated", pi->str, pi->s);
524
528
  default:
525
- if ('&' == c) {
526
- c = read_coded_char(pi);
527
- }
528
- if (end <= b + spc) {
529
+ if (end <= (b + spc + (('&' == c) ? 7 : 0))) { /* extra 8 for special just in case it is sequence of bytes */
529
530
  unsigned long size;
530
531
 
531
532
  if (0 == alloc_buf) {
@@ -546,7 +547,11 @@ read_reduced_text(PInfo pi) {
546
547
  *b++ = ' ';
547
548
  }
548
549
  spc = 0;
549
- *b++ = c;
550
+ if ('&' == c) {
551
+ b = read_coded_chars(pi, b);
552
+ } else {
553
+ *b++ = c;
554
+ }
550
555
  break;
551
556
  }
552
557
  }
@@ -656,53 +661,118 @@ read_quoted_value(PInfo pi) {
656
661
  return value;
657
662
  }
658
663
 
659
- static int
660
- read_coded_char(PInfo pi) {
661
- char *b, buf[8];
662
- char *end = buf + sizeof(buf);
664
+ static char*
665
+ read_hex_uint64(char *b, uint64_t *up) {
666
+ uint64_t u = 0;
667
+ char c;
668
+
669
+ for (; ';' != *b; b++) {
670
+ c = *b;
671
+ if ('0' <= c && c <= '9') {
672
+ u = (u << 4) | (uint64_t)(c - '0');
673
+ } else if ('a' <= c && c <= 'f') {
674
+ u = (u << 4) | (uint64_t)(c - 'a' + 10);
675
+ } else if ('A' <= c && c <= 'F') {
676
+ u = (u << 4) | (uint64_t)(c - 'A' + 10);
677
+ } else {
678
+ return 0;
679
+ }
680
+ }
681
+ *up = u;
682
+
683
+ return b;
684
+ }
685
+
686
+ static char*
687
+ read_10_uint64(char *b, uint64_t *up) {
688
+ uint64_t u = 0;
689
+ char c;
690
+
691
+ for (; ';' != *b; b++) {
692
+ c = *b;
693
+ if ('0' <= c && c <= '9') {
694
+ u = (u * 10) + (uint64_t)(c - '0');
695
+ } else {
696
+ return 0;
697
+ }
698
+ }
699
+ *up = u;
700
+
701
+ return b;
702
+ }
703
+
704
+ static char*
705
+ uint64_to_chars(char *text, uint64_t u) {
706
+ int reading = 0;
707
+ int i;
708
+ unsigned char c;
709
+
710
+
711
+ for (i = 56; 0 <= i; i -= 8) {
712
+ c = (unsigned char)((u >> i) & 0x00000000000000FFULL);
713
+ if (reading) {
714
+ *text++ = (char)c;
715
+ } else if ('\0' != c) {
716
+ *text++ = (char)c;
717
+ reading = 1;
718
+ }
719
+ }
720
+ return text;
721
+ }
722
+
723
+ static char*
724
+ read_coded_chars(PInfo pi, char *text) {
725
+ char *b, buf[32];
726
+ char *end = buf + sizeof(buf) - 1;
663
727
  char *s;
664
- int c;
665
728
 
666
729
  for (b = buf, s = pi->s; b < end; b++, s++) {
730
+ *b = *s;
667
731
  if (';' == *s) {
668
- *b = '\0';
732
+ *(b + 1) = '\0';
669
733
  s++;
670
734
  break;
671
735
  }
672
- *b = *s;
673
736
  }
674
737
  if (b > end) {
675
- return *pi->s;
676
- }
677
- if ('#' == *buf) {
678
- c = (int)strtol(buf + 1, &end, 10);
679
- if (0 >= c || '\0' != *end) {
680
- return *pi->s;
738
+ *text++ = *pi->s;
739
+ } else if ('#' == *buf) {
740
+ uint64_t u = 0;
741
+
742
+ b = buf + 1;
743
+ if ('x' == *b || 'X' == *b) {
744
+ b = read_hex_uint64(b + 1, &u);
745
+ } else {
746
+ b = read_10_uint64(b, &u);
681
747
  }
748
+ if (0 == b) {
749
+ *text++ = *pi->s;
750
+ } else {
751
+ pi->s = s;
752
+ text = uint64_to_chars(text, u);
753
+ }
754
+ } else if (0 == strcasecmp(buf, "nbsp;")) {
682
755
  pi->s = s;
683
-
684
- return c;
685
- }
686
- if (0 == strcasecmp(buf, "nbsp")) {
687
- pi->s = s;
688
- return ' ';
689
- } else if (0 == strcasecmp(buf, "lt")) {
756
+ *text++ = ' ';
757
+ } else if (0 == strcasecmp(buf, "lt;")) {
690
758
  pi->s = s;
691
- return '<';
692
- } else if (0 == strcasecmp(buf, "gt")) {
759
+ *text++ = '<';
760
+ } else if (0 == strcasecmp(buf, "gt;")) {
693
761
  pi->s = s;
694
- return '>';
695
- } else if (0 == strcasecmp(buf, "amp")) {
762
+ *text++ = '>';
763
+ } else if (0 == strcasecmp(buf, "amp;")) {
696
764
  pi->s = s;
697
- return '&';
698
- } else if (0 == strcasecmp(buf, "quot")) {
765
+ *text++ = '&';
766
+ } else if (0 == strcasecmp(buf, "quot;")) {
699
767
  pi->s = s;
700
- return '"';
701
- } else if (0 == strcasecmp(buf, "apos")) {
768
+ *text++ = '"';
769
+ } else if (0 == strcasecmp(buf, "apos;")) {
702
770
  pi->s = s;
703
- return '\'';
771
+ *text++ = '\'';
772
+ } else {
773
+ *text++ = *pi->s;
704
774
  }
705
- return *pi->s;
775
+ return text;
706
776
  }
707
777
 
708
778
  static int
@@ -717,42 +787,47 @@ collapse_special(char *str) {
717
787
 
718
788
  s++;
719
789
  if ('#' == *s) {
790
+ uint64_t u = 0;
791
+
720
792
  s++;
721
793
  if ('x' == *s || 'X' == *s) {
722
794
  s++;
723
- c = (int)strtol(s, &end, 16);
795
+ end = read_hex_uint64(s, &u);
724
796
  } else {
725
- c = (int)strtol(s, &end, 10);
797
+ end = read_10_uint64(s, &u);
726
798
  }
727
- if (';' != *end) {
799
+ if (0 == end) {
728
800
  return EDOM;
729
801
  }
802
+ b = uint64_to_chars(b, u);
730
803
  s = end + 1;
731
- } else if (0 == strncasecmp(s, "lt;", 3)) {
732
- c = '<';
733
- s += 3;
734
- } else if (0 == strncasecmp(s, "gt;", 3)) {
735
- c = '>';
736
- s += 3;
737
- } else if (0 == strncasecmp(s, "amp;", 4)) {
738
- c = '&';
739
- s += 4;
740
- } else if (0 == strncasecmp(s, "quot;", 5)) {
741
- c = '"';
742
- s += 5;
743
- } else if (0 == strncasecmp(s, "apos;", 5)) {
744
- c = '\'';
745
- s += 5;
746
804
  } else {
747
- c = '?';
748
- while (';' != *s++) {
749
- if ('\0' == *s) {
750
- return EDOM;
805
+ if (0 == strncasecmp(s, "lt;", 3)) {
806
+ c = '<';
807
+ s += 3;
808
+ } else if (0 == strncasecmp(s, "gt;", 3)) {
809
+ c = '>';
810
+ s += 3;
811
+ } else if (0 == strncasecmp(s, "amp;", 4)) {
812
+ c = '&';
813
+ s += 4;
814
+ } else if (0 == strncasecmp(s, "quot;", 5)) {
815
+ c = '"';
816
+ s += 5;
817
+ } else if (0 == strncasecmp(s, "apos;", 5)) {
818
+ c = '\'';
819
+ s += 5;
820
+ } else {
821
+ c = '?';
822
+ while (';' != *s++) {
823
+ if ('\0' == *s) {
824
+ return EDOM;
825
+ }
751
826
  }
827
+ s++;
752
828
  }
753
- s++;
829
+ *b++ = (char)c;
754
830
  }
755
- *b++ = (char)c;
756
831
  } else {
757
832
  *b++ = *s++;
758
833
  }
data/lib/ox/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
 
2
2
  module Ox
3
3
  # Current version of the module.
4
- VERSION = '1.6.3'
4
+ VERSION = '1.6.4'
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ox
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.6.3
4
+ version: 1.6.4
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-10-22 00:00:00.000000000 Z
12
+ date: 2012-10-24 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description: ! "A fast XML parser and object serializer that uses only standard C
15
15
  lib.\n \nOptimized XML (Ox), as the name implies was written to provide