ox 2.0.3 → 2.0.4

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of ox might be problematic. Click here for more details.

checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9d03526c81d518faeee16a0866d7dcd58652cbb8
4
- data.tar.gz: 606f087bcfadf31e8f3800424a7253bfee95dc17
3
+ metadata.gz: 082093a98968b620e01ba43e96933e0f3710e1fa
4
+ data.tar.gz: 9322b6eaa676a1c5f43838fa414199e7f9586191
5
5
  SHA512:
6
- metadata.gz: decaadbf4e7024b3fbaef0694c49695d6c77739b617a8d24a63ed32f736f2cd4090d84ae1c0cfea2d32b7ef6419575b2b17c74672c96e3c995c1cb978095da44
7
- data.tar.gz: 17fff8235ae614cc166d867c8fb983b953cb3bc2bc6e72a1f056f52d453c09516201199034c894843d3cb691f1cc8a3b4f01688e98737b1f4598c9e76a188721
6
+ metadata.gz: 2f4de79cc51b46e8767b754b87ba2ba7d48e2d0f7bbe4f9997070ca5f9417db978fc01f64e1781e3a4aa54277e3a2335cfe533101a0f15c70e9cf4ece028047a
7
+ data.tar.gz: 312228d635f3765efe2a11f46e9a1b57c20ca7fdf1e55f0f6996d11252e1cedca3dfe0efa4e594bb2391560519936714d101e8b9e66ea66c9fae3a902ee21b2e
data/README.md CHANGED
@@ -34,9 +34,9 @@ A fast XML parser and Object marshaller as a Ruby gem.
34
34
 
35
35
  ## <a name="release">Release Notes</a>
36
36
 
37
- ### Release 2.0.3
37
+ ### Release 2.0.4
38
38
 
39
- - Fixed excessive memory allocation issue for very large file parsing (half a gig).
39
+ - Fixed SAX parser handling of &#nnnn; encoded characters.
40
40
 
41
41
  ## <a name="description">Description</a>
42
42
 
data/ext/ox/parse.c CHANGED
@@ -38,6 +38,7 @@
38
38
  #include "err.h"
39
39
  #include "attr.h"
40
40
  #include "helper.h"
41
+ #include "special.h"
41
42
 
42
43
  static void read_instruction(PInfo pi);
43
44
  static void read_doctype(PInfo pi);
@@ -50,7 +51,6 @@ static char* read_name_token(PInfo pi);
50
51
  static char* read_quoted_value(PInfo pi);
51
52
  static char* read_hex_uint64(char *b, uint64_t *up);
52
53
  static char* read_10_uint64(char *b, uint64_t *up);
53
- static char* ucs_to_utf8_chars(char *text, uint64_t u);
54
54
  static char* read_coded_chars(PInfo pi, char *text);
55
55
  static void next_non_white(PInfo pi);
56
56
  static int collapse_special(PInfo pi, char *str);
@@ -893,51 +893,6 @@ read_10_uint64(char *b, uint64_t *up) {
893
893
  return b;
894
894
  }
895
895
 
896
- /*
897
- u0000..u007F 00000000000000xxxxxxx 0xxxxxxx
898
- u0080..u07FF 0000000000yyyyyxxxxxx 110yyyyy 10xxxxxx
899
- u0800..uD7FF, uE000..uFFFF 00000zzzzyyyyyyxxxxxx 1110zzzz 10yyyyyy 10xxxxxx
900
- u10000..u10FFFF uuuzzzzzzyyyyyyxxxxxx 11110uuu 10zzzzzz 10yyyyyy 10xxxxxx
901
- */
902
- static char*
903
- ucs_to_utf8_chars(char *text, uint64_t u) {
904
- int reading = 0;
905
- int i;
906
- unsigned char c;
907
-
908
- if (u <= 0x000000000000007FULL) {
909
- /* 0xxxxxxx */
910
- *text++ = (char)u;
911
- } else if (u <= 0x00000000000007FFULL) {
912
- /* 110yyyyy 10xxxxxx */
913
- *text++ = (char)(0x00000000000000C0ULL | (0x000000000000001FULL & (u >> 6)));
914
- *text++ = (char)(0x0000000000000080ULL | (0x000000000000003FULL & u));
915
- } else if (u <= 0x000000000000D7FFULL || (0x000000000000E000ULL <= u && u <= 0x000000000000FFFFULL)) {
916
- /* 1110zzzz 10yyyyyy 10xxxxxx */
917
- *text++ = (char)(0x00000000000000E0ULL | (0x000000000000000FULL & (u >> 12)));
918
- *text++ = (char)(0x0000000000000080ULL | (0x000000000000003FULL & (u >> 6)));
919
- *text++ = (char)(0x0000000000000080ULL | (0x000000000000003FULL & u));
920
- } else if (0x0000000000010000ULL <= u && u <= 0x000000000010FFFFULL) {
921
- /* 11110uuu 10zzzzzz 10yyyyyy 10xxxxxx */
922
- *text++ = (char)(0x00000000000000F0ULL | (0x0000000000000007ULL & (u >> 18)));
923
- *text++ = (char)(0x0000000000000080ULL | (0x000000000000003FULL & (u >> 12)));
924
- *text++ = (char)(0x0000000000000080ULL | (0x000000000000003FULL & (u >> 6)));
925
- *text++ = (char)(0x0000000000000080ULL | (0x000000000000003FULL & u));
926
- } else {
927
- /* assume it is UTF-8 encoded directly and not UCS */
928
- for (i = 56; 0 <= i; i -= 8) {
929
- c = (unsigned char)((u >> i) & 0x00000000000000FFULL);
930
- if (reading) {
931
- *text++ = (char)c;
932
- } else if ('\0' != c) {
933
- *text++ = (char)c;
934
- reading = 1;
935
- }
936
- }
937
- }
938
- return text;
939
- }
940
-
941
896
  static char*
942
897
  read_coded_chars(PInfo pi, char *text) {
943
898
  char *b, buf[32];
@@ -974,14 +929,14 @@ read_coded_chars(PInfo pi, char *text) {
974
929
  #else
975
930
  } else if (ox_utf8_encoding == pi->options->rb_enc) {
976
931
  #endif
977
- text = ucs_to_utf8_chars(text, u);
932
+ text = ox_ucs_to_utf8_chars(text, u);
978
933
  #if HAS_PRIVATE_ENCODING
979
934
  } else if (Qnil == pi->options->rb_enc) {
980
935
  #else
981
936
  } else if (0 == pi->options->rb_enc) {
982
937
  #endif
983
938
  pi->options->rb_enc = ox_utf8_encoding;
984
- text = ucs_to_utf8_chars(text, u);
939
+ text = ox_ucs_to_utf8_chars(text, u);
985
940
  } else if (TolerantEffort == pi->options->effort) {
986
941
  *text++ = '&';
987
942
  return text;
@@ -1059,7 +1014,7 @@ collapse_special(PInfo pi, char *str) {
1059
1014
  #else
1060
1015
  } else if (ox_utf8_encoding == pi->options->rb_enc) {
1061
1016
  #endif
1062
- b = ucs_to_utf8_chars(b, u);
1017
+ b = ox_ucs_to_utf8_chars(b, u);
1063
1018
  /* TBD support UTF-16 */
1064
1019
  #if HAS_PRIVATE_ENCODING
1065
1020
  } else if (Qnil == pi->options->rb_enc) {
@@ -1067,7 +1022,7 @@ collapse_special(PInfo pi, char *str) {
1067
1022
  } else if (0 == pi->options->rb_enc) {
1068
1023
  #endif
1069
1024
  pi->options->rb_enc = ox_utf8_encoding;
1070
- b = ucs_to_utf8_chars(b, u);
1025
+ b = ox_ucs_to_utf8_chars(b, u);
1071
1026
  } else {
1072
1027
  /* set_error(&pi->err, "Invalid encoding, need UTF-8 or UTF-16 encoding to parse &#nnnn; character sequences.", pi->str, pi->s);*/
1073
1028
  set_error(&pi->err, "Invalid encoding, need UTF-8 encoding to parse &#nnnn; character sequences.", pi->str, pi->s);
data/ext/ox/sax.c CHANGED
@@ -44,6 +44,7 @@
44
44
  #include "sax.h"
45
45
  #include "sax_stack.h"
46
46
  #include "sax_buf.h"
47
+ #include "special.h"
47
48
 
48
49
  #define NAME_MISMATCH 1
49
50
 
@@ -1121,6 +1122,46 @@ read_quoted_value(SaxDrive dr) {
1121
1122
  return '\0'; // should never get here
1122
1123
  }
1123
1124
 
1125
+ static char*
1126
+ read_hex_uint64(char *b, uint64_t *up) {
1127
+ uint64_t u = 0;
1128
+ char c;
1129
+
1130
+ for (; ';' != *b; b++) {
1131
+ c = *b;
1132
+ if ('0' <= c && c <= '9') {
1133
+ u = (u << 4) | (uint64_t)(c - '0');
1134
+ } else if ('a' <= c && c <= 'f') {
1135
+ u = (u << 4) | (uint64_t)(c - 'a' + 10);
1136
+ } else if ('A' <= c && c <= 'F') {
1137
+ u = (u << 4) | (uint64_t)(c - 'A' + 10);
1138
+ } else {
1139
+ return 0;
1140
+ }
1141
+ }
1142
+ *up = u;
1143
+
1144
+ return b;
1145
+ }
1146
+
1147
+ static char*
1148
+ read_10_uint64(char *b, uint64_t *up) {
1149
+ uint64_t u = 0;
1150
+ char c;
1151
+
1152
+ for (; ';' != *b; b++) {
1153
+ c = *b;
1154
+ if ('0' <= c && c <= '9') {
1155
+ u = (u * 10) + (uint64_t)(c - '0');
1156
+ } else {
1157
+ return 0;
1158
+ }
1159
+ }
1160
+ *up = u;
1161
+
1162
+ return b;
1163
+ }
1164
+
1124
1165
  int
1125
1166
  ox_sax_collapse_special(SaxDrive dr, char *str, int line, int col) {
1126
1167
  char *s = str;
@@ -1128,31 +1169,59 @@ ox_sax_collapse_special(SaxDrive dr, char *str, int line, int col) {
1128
1169
 
1129
1170
  while ('\0' != *s) {
1130
1171
  if ('&' == *s) {
1131
- int c;
1172
+ int c = 0;
1132
1173
  char *end;
1133
- int x = 0;
1174
+ //int x = 0;
1134
1175
 
1135
1176
  s++;
1136
1177
  if ('#' == *s) {
1137
- s++;
1178
+ uint64_t u = 0;
1179
+ char x;
1180
+
1181
+ s++;
1138
1182
  if ('x' == *s || 'X' == *s) {
1183
+ x = *s;
1139
1184
  s++;
1140
- x = 1;
1141
- c = (int)strtol(s, &end, 16);
1185
+ end = read_hex_uint64(s, &u);
1142
1186
  } else {
1143
- c = (int)strtol(s, &end, 10);
1187
+ x = '\0';
1188
+ end = read_10_uint64(s, &u);
1144
1189
  }
1145
- if (';' != *end) {
1190
+ if (0 == end) {
1146
1191
  ox_sax_drive_error(dr, NO_TERM "special character does not end with a semicolon");
1147
1192
  *b++ = '&';
1148
1193
  *b++ = '#';
1149
- if (x) {
1150
- *b++ = *(s - 1);
1194
+ if ('\0' != x) {
1195
+ *b++ = x;
1151
1196
  }
1152
1197
  continue;
1153
- }
1154
- col += (int)(end - s);
1155
- s = end + 1;
1198
+ }
1199
+ if (u <= 0x000000000000007FULL) {
1200
+ *b++ = (char)u;
1201
+ #if HAS_ENCODING_SUPPORT
1202
+ } else if (ox_utf8_encoding == dr->encoding) {
1203
+ b = ox_ucs_to_utf8_chars(b, u);
1204
+ } else if (0 == dr->encoding) {
1205
+ dr->encoding = ox_utf8_encoding;
1206
+ b = ox_ucs_to_utf8_chars(b, u);
1207
+ #elif HAS_PRIVATE_ENCODING
1208
+ } else if (ox_utf8_encoding == dr->encoding ||
1209
+ 0 == strcasecmp(rb_str_ptr(rb_String(ox_utf8_encoding)), rb_str_ptr(rb_String(dr->encoding)))) {
1210
+ b = ox_ucs_to_utf8_chars(b, u);
1211
+ } else if (Qnil == dr->encoding) {
1212
+ dr->encoding = ox_utf8_encoding;
1213
+ b = ox_ucs_to_utf8_chars(b, u);
1214
+ #endif
1215
+ } else {
1216
+ ox_sax_drive_error(dr, NO_TERM "Invalid encoding, need UTF-8 encoding to parse &#nnnn; character sequences.");
1217
+ *b++ = '&';
1218
+ *b++ = '#';
1219
+ if ('\0' != x) {
1220
+ *b++ = x;
1221
+ }
1222
+ continue;
1223
+ }
1224
+ s = end + 1;
1156
1225
  } else if (0 == strncasecmp(s, "lt;", 3)) {
1157
1226
  c = '<';
1158
1227
  s += 3;
data/ext/ox/special.c ADDED
@@ -0,0 +1,76 @@
1
+ /* special.c
2
+ * Copyright (c) 2011, Peter Ohler
3
+ * All rights reserved.
4
+ *
5
+ * Redistribution and use in source and binary forms, with or without
6
+ * modification, are permitted provided that the following conditions are met:
7
+ *
8
+ * - Redistributions of source code must retain the above copyright notice, this
9
+ * list of conditions and the following disclaimer.
10
+ *
11
+ * - Redistributions in binary form must reproduce the above copyright notice,
12
+ * this list of conditions and the following disclaimer in the documentation
13
+ * and/or other materials provided with the distribution.
14
+ *
15
+ * - Neither the name of Peter Ohler nor the names of its contributors may be
16
+ * used to endorse or promote products derived from this software without
17
+ * specific prior written permission.
18
+ *
19
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
+ */
30
+
31
+ #include "special.h"
32
+
33
+ /*
34
+ u0000..u007F 00000000000000xxxxxxx 0xxxxxxx
35
+ u0080..u07FF 0000000000yyyyyxxxxxx 110yyyyy 10xxxxxx
36
+ u0800..uD7FF, uE000..uFFFF 00000zzzzyyyyyyxxxxxx 1110zzzz 10yyyyyy 10xxxxxx
37
+ u10000..u10FFFF uuuzzzzzzyyyyyyxxxxxx 11110uuu 10zzzzzz 10yyyyyy 10xxxxxx
38
+ */
39
+ char*
40
+ ox_ucs_to_utf8_chars(char *text, uint64_t u) {
41
+ int reading = 0;
42
+ int i;
43
+ unsigned char c;
44
+
45
+ if (u <= 0x000000000000007FULL) {
46
+ /* 0xxxxxxx */
47
+ *text++ = (char)u;
48
+ } else if (u <= 0x00000000000007FFULL) {
49
+ /* 110yyyyy 10xxxxxx */
50
+ *text++ = (char)(0x00000000000000C0ULL | (0x000000000000001FULL & (u >> 6)));
51
+ *text++ = (char)(0x0000000000000080ULL | (0x000000000000003FULL & u));
52
+ } else if (u <= 0x000000000000D7FFULL || (0x000000000000E000ULL <= u && u <= 0x000000000000FFFFULL)) {
53
+ /* 1110zzzz 10yyyyyy 10xxxxxx */
54
+ *text++ = (char)(0x00000000000000E0ULL | (0x000000000000000FULL & (u >> 12)));
55
+ *text++ = (char)(0x0000000000000080ULL | (0x000000000000003FULL & (u >> 6)));
56
+ *text++ = (char)(0x0000000000000080ULL | (0x000000000000003FULL & u));
57
+ } else if (0x0000000000010000ULL <= u && u <= 0x000000000010FFFFULL) {
58
+ /* 11110uuu 10zzzzzz 10yyyyyy 10xxxxxx */
59
+ *text++ = (char)(0x00000000000000F0ULL | (0x0000000000000007ULL & (u >> 18)));
60
+ *text++ = (char)(0x0000000000000080ULL | (0x000000000000003FULL & (u >> 12)));
61
+ *text++ = (char)(0x0000000000000080ULL | (0x000000000000003FULL & (u >> 6)));
62
+ *text++ = (char)(0x0000000000000080ULL | (0x000000000000003FULL & u));
63
+ } else {
64
+ /* assume it is UTF-8 encoded directly and not UCS */
65
+ for (i = 56; 0 <= i; i -= 8) {
66
+ c = (unsigned char)((u >> i) & 0x00000000000000FFULL);
67
+ if (reading) {
68
+ *text++ = (char)c;
69
+ } else if ('\0' != c) {
70
+ *text++ = (char)c;
71
+ reading = 1;
72
+ }
73
+ }
74
+ }
75
+ return text;
76
+ }
data/ext/ox/special.h ADDED
@@ -0,0 +1,38 @@
1
+ /* special.h
2
+ * Copyright (c) 2011, Peter Ohler
3
+ * All rights reserved.
4
+ *
5
+ * Redistribution and use in source and binary forms, with or without
6
+ * modification, are permitted provided that the following conditions are met:
7
+ *
8
+ * - Redistributions of source code must retain the above copyright notice, this
9
+ * list of conditions and the following disclaimer.
10
+ *
11
+ * - Redistributions in binary form must reproduce the above copyright notice,
12
+ * this list of conditions and the following disclaimer in the documentation
13
+ * and/or other materials provided with the distribution.
14
+ *
15
+ * - Neither the name of Peter Ohler nor the names of its contributors may be
16
+ * used to endorse or promote products derived from this software without
17
+ * specific prior written permission.
18
+ *
19
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
+ */
30
+
31
+ #ifndef __OX_SPECIAL_H__
32
+ #define __OX_SPECIAL_H__
33
+
34
+ #include <stdint.h>
35
+
36
+ extern char* ox_ucs_to_utf8_chars(char *text, uint64_t u);
37
+
38
+ #endif /* __OX_SPECIAL_H__ */
data/lib/ox/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
 
2
2
  module Ox
3
3
  # Current version of the module.
4
- VERSION = '2.0.3'
4
+ VERSION = '2.0.4'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ox
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.0.3
4
+ version: 2.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Peter Ohler
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2013-06-12 00:00:00.000000000 Z
11
+ date: 2013-06-24 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: "A fast XML parser and object serializer that uses only standard C lib.\n
14
14
  \ \nOptimized XML (Ox), as the name implies was written to provide speed
@@ -47,6 +47,7 @@ files:
47
47
  - ext/ox/sax_has.h
48
48
  - ext/ox/sax_hint.h
49
49
  - ext/ox/sax_stack.h
50
+ - ext/ox/special.h
50
51
  - ext/ox/type.h
51
52
  - ext/ox/base64.c
52
53
  - ext/ox/cache.c
@@ -63,6 +64,7 @@ files:
63
64
  - ext/ox/sax_as.c
64
65
  - ext/ox/sax_buf.c
65
66
  - ext/ox/sax_hint.c
67
+ - ext/ox/special.c
66
68
  - LICENSE
67
69
  - README.md
68
70
  homepage: http://www.ohler.com/ox
@@ -92,3 +94,4 @@ signing_key:
92
94
  specification_version: 4
93
95
  summary: A fast XML parser and object serializer.
94
96
  test_files: []
97
+ has_rdoc: true