ox 2.12.1 → 2.14.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -16,20 +16,15 @@ extern "C" {
16
16
  #define RSTRING_NOT_MODIFIED
17
17
 
18
18
  #include "ruby.h"
19
- #if HAS_ENCODING_SUPPORT
19
+ #if HAVE_RB_ENC_ASSOCIATE
20
20
  #include "ruby/encoding.h"
21
21
  #endif
22
22
 
23
- #ifdef RUBINIUS_RUBY
24
- #undef T_COMPLEX
25
- enum st_retval {ST_CONTINUE = 0, ST_STOP = 1, ST_DELETE = 2, ST_CHECK};
23
+ #if HAVE_RUBY_ST_H
24
+ #include "ruby/st.h"
26
25
  #else
27
- #if HAS_TOP_LEVEL_ST_H
28
- /* Only on travis, local is where it is for all others. Seems to vary depending on the travis machine picked up. */
26
+ // Only on travis, local is where it is for all others. Seems to vary depending on the travis machine picked up.
29
27
  #include "st.h"
30
- #else
31
- #include "ruby/st.h"
32
- #endif
33
28
  #endif
34
29
 
35
30
  #include "cache.h"
@@ -141,15 +136,14 @@ typedef struct _options {
141
136
  char convert_special;// boolean true or false
142
137
  char allow_invalid; // YesNo
143
138
  char no_empty; // boolean - no empty elements when dumping
139
+ char with_cdata; // boolean - hash_load should include cdata
144
140
  char inv_repl[12]; // max 10 valid characters, first character is the length
145
141
  char strip_ns[64]; // namespace to strip, \0 is no-strip, \* is all, else only matches
146
142
  struct _hints *html_hints; // html hints
147
143
  VALUE attr_key_mod;
148
144
  VALUE element_key_mod;
149
- #if HAS_ENCODING_SUPPORT
145
+ #if HAVE_RB_ENC_ASSOCIATE
150
146
  rb_encoding *rb_enc;
151
- #elif HAS_PRIVATE_ENCODING
152
- VALUE rb_enc;
153
147
  #else
154
148
  void *rb_enc;
155
149
  #endif
@@ -236,10 +230,8 @@ extern ID ox_tv_nsec_id;
236
230
  extern ID ox_tv_usec_id;
237
231
  extern ID ox_value_id;
238
232
 
239
- #if HAS_ENCODING_SUPPORT
233
+ #if HAVE_RB_ENC_ASSOCIATE
240
234
  extern rb_encoding *ox_utf8_encoding;
241
- #elif HAS_PRIVATE_ENCODING
242
- extern VALUE ox_utf8_encoding;
243
235
  #else
244
236
  extern void *ox_utf8_encoding;
245
237
  #endif
@@ -441,7 +441,6 @@ read_element(PInfo pi) {
441
441
  // empty element, no attributes and no children
442
442
  pi->s++;
443
443
  if ('>' != *pi->s) {
444
- /*printf("*** '%s' ***\n", pi->s); */
445
444
  attr_stack_cleanup(&attrs);
446
445
  set_error(&pi->err, "invalid format, element not closed", pi->str, pi->s);
447
446
  return 0;
@@ -479,8 +478,8 @@ read_element(PInfo pi) {
479
478
  pi->s++;
480
479
  pi->pcb->add_element(pi, ename, attrs.head, hasChildren);
481
480
  pi->pcb->end_element(pi, ename);
482
-
483
481
  attr_stack_cleanup(&attrs);
482
+
484
483
  return 0;
485
484
  case '>':
486
485
  /* has either children or a value */
@@ -544,6 +543,12 @@ read_element(PInfo pi) {
544
543
  while (!done) {
545
544
  start = pi->s;
546
545
  next_non_white(pi);
546
+ if (OffSkip == pi->options->skip && start < pi->s && '<' == *pi->s) {
547
+ c = *pi->s;
548
+ *pi->s = '\0';
549
+ pi->pcb->add_text(pi, start, 1);
550
+ *pi->s = c;
551
+ }
547
552
  c = *pi->s++;
548
553
  if ('\0' == c) {
549
554
  attr_stack_cleanup(&attrs);
@@ -1000,11 +1005,13 @@ read_coded_chars(PInfo pi, char *text) {
1000
1005
  char *b, buf[32];
1001
1006
  char *end = buf + sizeof(buf) - 1;
1002
1007
  char *s;
1008
+ long blen = 0;
1003
1009
 
1004
1010
  for (b = buf, s = pi->s; b < end; b++, s++) {
1005
1011
  *b = *s;
1006
1012
  if (';' == *s) {
1007
1013
  *(b + 1) = '\0';
1014
+ blen = b - buf;
1008
1015
  s++;
1009
1016
  break;
1010
1017
  }
@@ -1025,18 +1032,9 @@ read_coded_chars(PInfo pi, char *text) {
1025
1032
  } else {
1026
1033
  if (u <= 0x000000000000007FULL) {
1027
1034
  *text++ = (char)u;
1028
- #if HAS_PRIVATE_ENCODING
1029
- } else if (ox_utf8_encoding == pi->options->rb_enc ||
1030
- 0 == strcasecmp(rb_str_ptr(rb_String(ox_utf8_encoding)), rb_str_ptr(rb_String(pi->options->rb_enc)))) {
1031
- #else
1032
1035
  } else if (ox_utf8_encoding == pi->options->rb_enc) {
1033
- #endif
1034
1036
  text = ox_ucs_to_utf8_chars(text, u);
1035
- #if HAS_PRIVATE_ENCODING
1036
- } else if (Qnil == pi->options->rb_enc) {
1037
- #else
1038
1037
  } else if (0 == pi->options->rb_enc) {
1039
- #endif
1040
1038
  pi->options->rb_enc = ox_utf8_encoding;
1041
1039
  text = ox_ucs_to_utf8_chars(text, u);
1042
1040
  } else if (TolerantEffort == pi->options->effort) {
@@ -1047,30 +1045,20 @@ read_coded_chars(PInfo pi, char *text) {
1047
1045
  } else {
1048
1046
  /*set_error(&pi->err, "Invalid encoding, need UTF-8 or UTF-16 encoding to parse &#nnnn; character sequences.", pi->str, pi->s); */
1049
1047
  set_error(&pi->err, "Invalid encoding, need UTF-8 encoding to parse &#nnnn; character sequences.", pi->str, pi->s);
1050
- return 0;
1048
+ return NULL;
1051
1049
  }
1052
1050
  pi->s = s;
1053
1051
  }
1054
- } else if (0 == strcasecmp(buf, "nbsp;")) {
1055
- pi->s = s;
1056
- *text++ = ' ';
1057
- } else if (0 == strcasecmp(buf, "lt;")) {
1058
- pi->s = s;
1059
- *text++ = '<';
1060
- } else if (0 == strcasecmp(buf, "gt;")) {
1061
- pi->s = s;
1062
- *text++ = '>';
1063
- } else if (0 == strcasecmp(buf, "amp;")) {
1064
- pi->s = s;
1065
- *text++ = '&';
1066
- } else if (0 == strcasecmp(buf, "quot;")) {
1067
- pi->s = s;
1068
- *text++ = '"';
1069
- } else if (0 == strcasecmp(buf, "apos;")) {
1070
- pi->s = s;
1071
- *text++ = '\'';
1072
1052
  } else {
1073
- *text++ = '&';
1053
+ char *t2;
1054
+
1055
+ buf[blen] = '\0';
1056
+ if (NULL == (t2 = ox_entity_lookup(text, buf))) {
1057
+ *text++ = '&';
1058
+ } else {
1059
+ text = t2;
1060
+ pi->s = s;
1061
+ }
1074
1062
  }
1075
1063
  return text;
1076
1064
  }
@@ -1112,19 +1100,10 @@ collapse_special(PInfo pi, char *str) {
1112
1100
  }
1113
1101
  if (u <= 0x000000000000007FULL) {
1114
1102
  *b++ = (char)u;
1115
- #if HAS_PRIVATE_ENCODING
1116
- } else if (ox_utf8_encoding == pi->options->rb_enc ||
1117
- 0 == strcasecmp(rb_str_ptr(rb_String(ox_utf8_encoding)), rb_str_ptr(rb_String(pi->options->rb_enc)))) {
1118
- #else
1119
1103
  } else if (ox_utf8_encoding == pi->options->rb_enc) {
1120
- #endif
1121
1104
  b = ox_ucs_to_utf8_chars(b, u);
1122
1105
  /* TBD support UTF-16 */
1123
- #if HAS_PRIVATE_ENCODING
1124
- } else if (Qnil == pi->options->rb_enc) {
1125
- #else
1126
1106
  } else if (0 == pi->options->rb_enc) {
1127
- #endif
1128
1107
  pi->options->rb_enc = ox_utf8_encoding;
1129
1108
  b = ox_ucs_to_utf8_chars(b, u);
1130
1109
  } else {
@@ -1153,16 +1132,30 @@ collapse_special(PInfo pi, char *str) {
1153
1132
  *b++ = '&';
1154
1133
  continue;
1155
1134
  } else {
1156
- c = '?';
1135
+ char key[16];
1136
+ char *k = key;
1137
+ char *kend = key + sizeof(key) - 1;
1138
+
1139
+ *k++ = *s;
1157
1140
  while (';' != *s++) {
1158
1141
  if ('\0' == *s) {
1159
1142
  set_error(&pi->err, "Invalid format, special character does not end with a semicolon", pi->str, pi->s);
1160
1143
  return EDOM;
1161
1144
  }
1145
+ if (kend <= k) {
1146
+ k = key;
1147
+ break;
1148
+ }
1149
+ *k++ = *s;
1162
1150
  }
1163
- s++;
1164
- set_error(&pi->err, "Invalid format, invalid special character sequence", pi->str, pi->s);
1165
- return 0;
1151
+ k--;
1152
+ *k = '\0';
1153
+ if ('\0' == *key || NULL == (b = ox_entity_lookup(b, key))) {
1154
+ set_error(&pi->err, "Invalid format, invalid special character sequence", pi->str, pi->s);
1155
+ c = '?';
1156
+ return 0;
1157
+ }
1158
+ continue;
1166
1159
  }
1167
1160
  *b++ = (char)c;
1168
1161
  }
@@ -9,13 +9,16 @@
9
9
  #include <stdio.h>
10
10
  #include <strings.h>
11
11
  #include <sys/types.h>
12
- #if NEEDS_UIO
12
+ #if HAVE_SYS_UIO_H
13
13
  #include <sys/uio.h>
14
14
  #endif
15
15
  #include <unistd.h>
16
16
  #include <time.h>
17
17
 
18
18
  #include "ruby.h"
19
+ #if HAVE_RB_ENC_ASSOCIATE
20
+ #include "ruby/encoding.h"
21
+ #endif
19
22
  #include "ox.h"
20
23
  #include "sax.h"
21
24
  #include "sax_stack.h"
@@ -68,9 +71,9 @@ static VALUE protect_parse(VALUE drp) {
68
71
  return Qnil;
69
72
  }
70
73
 
71
- #if HAS_ENCODING_SUPPORT || HAS_PRIVATE_ENCODING
74
+ #if HAVE_RB_ENC_ASSOCIATE
72
75
  static int
73
- strIsAscii(const char *s) {
76
+ str_is_ascii(const char *s) {
74
77
  for (; '\0' != *s; s++) {
75
78
  if (*s < ' ' || '~' < *s) {
76
79
  return 0;
@@ -87,8 +90,8 @@ str2sym(SaxDrive dr, const char *str, const char **strp) {
87
90
 
88
91
  if (dr->options.symbolize) {
89
92
  if (Qundef == (sym = ox_cache_get(ox_symbol_cache, str, &slot, strp))) {
90
- #if HAS_ENCODING_SUPPORT
91
- if (0 != dr->encoding && !strIsAscii(str)) {
93
+ #if HAVE_RB_ENC_ASSOCIATE
94
+ if (0 != dr->encoding && !str_is_ascii(str)) {
92
95
  VALUE rstr = rb_str_new2(str);
93
96
 
94
97
  // TBD if sym can be pinned down then use this all the time
@@ -99,20 +102,6 @@ str2sym(SaxDrive dr, const char *str, const char **strp) {
99
102
  sym = ID2SYM(rb_intern(str));
100
103
  *slot = sym;
101
104
  }
102
- #elif HAS_PRIVATE_ENCODING
103
- if (Qnil != dr->encoding && !strIsAscii(str)) {
104
- VALUE rstr = rb_str_new2(str);
105
-
106
- rb_funcall(rstr, ox_force_encoding_id, 1, dr->encoding);
107
- sym = rb_funcall(rstr, ox_to_sym_id, 0);
108
- // Needed for Ruby 2.2 to get around the GC of symbols created
109
- // with to_sym which is needed for encoded symbols.
110
- rb_ary_push(ox_sym_bank, sym);
111
- *slot = Qundef;
112
- } else {
113
- sym = ID2SYM(rb_intern(str));
114
- *slot = sym;
115
- }
116
105
  #else
117
106
  sym = ID2SYM(rb_intern(str));
118
107
  *slot = sym;
@@ -120,14 +109,10 @@ str2sym(SaxDrive dr, const char *str, const char **strp) {
120
109
  }
121
110
  } else {
122
111
  sym = rb_str_new2(str);
123
- #if HAS_ENCODING_SUPPORT
112
+ #if HAVE_RB_ENC_ASSOCIATE
124
113
  if (0 != dr->encoding) {
125
114
  rb_enc_associate(sym, dr->encoding);
126
115
  }
127
- #elif HAS_PRIVATE_ENCODING
128
- if (Qnil != dr->encoding) {
129
- rb_funcall(sym, ox_force_encoding_id, 1, dr->encoding);
130
- }
131
116
  #endif
132
117
  if (0 != strp) {
133
118
  *strp = StringValuePtr(sym);
@@ -182,7 +167,7 @@ sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, SaxOptions options) {
182
167
  dr->blocked = 0;
183
168
  dr->abort = false;
184
169
  has_init(&dr->has, handler);
185
- #if HAS_ENCODING_SUPPORT
170
+ #if HAVE_RB_ENC_FIND
186
171
  if ('\0' == *ox_default_options.encoding) {
187
172
  VALUE encoding;
188
173
 
@@ -196,18 +181,6 @@ sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, SaxOptions options) {
196
181
  } else {
197
182
  dr->encoding = rb_enc_find(ox_default_options.encoding);
198
183
  }
199
- #elif HAS_PRIVATE_ENCODING
200
- if ('\0' == *ox_default_options.encoding) {
201
- VALUE encoding;
202
-
203
- if (rb_respond_to(io, ox_external_encoding_id) && Qnil != (encoding = rb_funcall(io, ox_external_encoding_id, 0))) {
204
- dr->encoding = encoding;
205
- } else {
206
- dr->encoding = Qnil;
207
- }
208
- } else {
209
- dr->encoding = rb_str_new2(ox_default_options.encoding);
210
- }
211
184
  #else
212
185
  dr->encoding = 0;
213
186
  #endif
@@ -255,9 +228,7 @@ skipBOM(SaxDrive dr) {
255
228
 
256
229
  if (0xEF == (uint8_t)c) { /* only UTF8 is supported */
257
230
  if (0xBB == (uint8_t)buf_get(&dr->buf) && 0xBF == (uint8_t)buf_get(&dr->buf)) {
258
- #if HAS_ENCODING_SUPPORT
259
- dr->encoding = ox_utf8_encoding;
260
- #elif HAS_PRIVATE_ENCODING
231
+ #if HAVE_RB_ENC_FIND
261
232
  dr->encoding = ox_utf8_encoding;
262
233
  #else
263
234
  dr->encoding = UTF8_STR;
@@ -364,14 +335,10 @@ parse(SaxDrive dr) {
364
335
  off_t col = dr->buf.col - 1;
365
336
 
366
337
  args[0] = rb_str_new2("");
367
- #if HAS_ENCODING_SUPPORT
338
+ #if HAVE_RB_ENC_ASSOCIATE
368
339
  if (0 != dr->encoding) {
369
340
  rb_enc_associate(args[0], dr->encoding);
370
341
  }
371
- #elif HAS_PRIVATE_ENCODING
372
- if (Qnil != dr->encoding) {
373
- rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
374
- }
375
342
  #endif
376
343
  if (dr->has.pos) {
377
344
  rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
@@ -526,14 +493,10 @@ read_instruction(SaxDrive dr) {
526
493
  ox_sax_collapse_special(dr, content, (int)pos, (int)line, (int)col);
527
494
  }
528
495
  args[0] = rb_str_new2(content);
529
- #if HAS_ENCODING_SUPPORT
496
+ #if HAVE_RB_ENC_ASSOCIATE
530
497
  if (0 != dr->encoding) {
531
498
  rb_enc_associate(args[0], dr->encoding);
532
499
  }
533
- #elif HAS_PRIVATE_ENCODING
534
- if (Qnil != dr->encoding) {
535
- rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
536
- }
537
500
  #endif
538
501
  if (dr->has.line) {
539
502
  rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
@@ -732,14 +695,10 @@ read_cdata(SaxDrive dr) {
732
695
  VALUE args[1];
733
696
 
734
697
  args[0] = rb_str_new2(dr->buf.str);
735
- #if HAS_ENCODING_SUPPORT
698
+ #if HAVE_RB_ENC_ASSOCIATE
736
699
  if (0 != dr->encoding) {
737
700
  rb_enc_associate(args[0], dr->encoding);
738
701
  }
739
- #elif HAS_PRIVATE_ENCODING
740
- if (Qnil != dr->encoding) {
741
- rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
742
- }
743
702
  #endif
744
703
  if (dr->has.pos) {
745
704
  rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
@@ -826,14 +785,10 @@ read_comment(SaxDrive dr) {
826
785
  (NULL != h && (ActiveOverlay == h->overlay || ActiveOverlay == h->overlay))) {
827
786
 
828
787
  args[0] = rb_str_new2(dr->buf.str);
829
- #if HAS_ENCODING_SUPPORT
788
+ #if HAVE_RB_ENC_ASSOCIATE
830
789
  if (0 != dr->encoding) {
831
790
  rb_enc_associate(args[0], dr->encoding);
832
791
  }
833
- #elif HAS_PRIVATE_ENCODING
834
- if (Qnil != dr->encoding) {
835
- rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
836
- }
837
792
  #endif
838
793
  if (dr->has.pos) {
839
794
  rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
@@ -1158,14 +1113,10 @@ read_text(SaxDrive dr) {
1158
1113
  ((NoSkip == dr->options.skip && !isEnd) ||
1159
1114
  (OffSkip == dr->options.skip))) {
1160
1115
  args[0] = rb_str_new2(dr->buf.str);
1161
- #if HAS_ENCODING_SUPPORT
1116
+ #if HAVE_RB_ENC_ASSOCIATE
1162
1117
  if (0 != dr->encoding) {
1163
1118
  rb_enc_associate(args[0], dr->encoding);
1164
1119
  }
1165
- #elif HAS_PRIVATE_ENCODING
1166
- if (Qnil != dr->encoding) {
1167
- rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
1168
- }
1169
1120
  #endif
1170
1121
  if (dr->has.pos) {
1171
1122
  rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
@@ -1213,14 +1164,10 @@ read_text(SaxDrive dr) {
1213
1164
  break;
1214
1165
  }
1215
1166
  args[0] = rb_str_new2(dr->buf.str);
1216
- #if HAS_ENCODING_SUPPORT
1167
+ #if HAVE_RB_ENC_ASSOCIATE
1217
1168
  if (0 != dr->encoding) {
1218
1169
  rb_enc_associate(args[0], dr->encoding);
1219
1170
  }
1220
- #elif HAS_PRIVATE_ENCODING
1221
- if (Qnil != dr->encoding) {
1222
- rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
1223
- }
1224
1171
  #endif
1225
1172
  if (dr->has.pos) {
1226
1173
  rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
@@ -1299,14 +1246,10 @@ read_jump(SaxDrive dr, const char *pat) {
1299
1246
  // TBD check parent overlay
1300
1247
  if (dr->has.text && !dr->blocked) {
1301
1248
  args[0] = rb_str_new2(dr->buf.str);
1302
- #if HAS_ENCODING_SUPPORT
1249
+ #if HAVE_RB_ENC_ASSOCIATE
1303
1250
  if (0 != dr->encoding) {
1304
1251
  rb_enc_associate(args[0], dr->encoding);
1305
1252
  }
1306
- #elif HAS_PRIVATE_ENCODING
1307
- if (Qnil != dr->encoding) {
1308
- rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
1309
- }
1310
1253
  #endif
1311
1254
  if (dr->has.pos) {
1312
1255
  rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
@@ -1377,10 +1320,8 @@ read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req,
1377
1320
  c = read_quoted_value(dr);
1378
1321
  attr_value = dr->buf.str;
1379
1322
  if (is_encoding) {
1380
- #if HAS_ENCODING_SUPPORT
1323
+ #if HAVE_RB_ENC_FIND
1381
1324
  dr->encoding = rb_enc_find(dr->buf.str);
1382
- #elif HAS_PRIVATE_ENCODING
1383
- dr->encoding = rb_str_new2(dr->buf.str);
1384
1325
  #else
1385
1326
  dr->encoding = dr->buf.str;
1386
1327
  #endif
@@ -1411,14 +1352,10 @@ read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req,
1411
1352
  ox_sax_collapse_special(dr, dr->buf.str, pos, line, col);
1412
1353
  }
1413
1354
  args[1] = rb_str_new2(attr_value);
1414
- #if HAS_ENCODING_SUPPORT
1355
+ #if HAVE_RB_ENC_ASSOCIATE
1415
1356
  if (0 != dr->encoding) {
1416
1357
  rb_enc_associate(args[1], dr->encoding);
1417
1358
  }
1418
- #elif HAS_PRIVATE_ENCODING
1419
- if (Qnil != dr->encoding) {
1420
- rb_funcall(args[1], ox_force_encoding_id, 1, dr->encoding);
1421
- }
1422
1359
  #endif
1423
1360
  if (dr->has.pos) {
1424
1361
  rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
@@ -1614,19 +1551,12 @@ ox_sax_collapse_special(SaxDrive dr, char *str, long pos, long line, long col) {
1614
1551
  }
1615
1552
  if (u <= 0x000000000000007FULL) {
1616
1553
  *b++ = (char)u;
1617
- #if HAS_ENCODING_SUPPORT
1554
+ #if HAVE_RB_ENC_FIND
1618
1555
  } else if (ox_utf8_encoding == dr->encoding) {
1619
1556
  b = ox_ucs_to_utf8_chars(b, u);
1620
1557
  } else if (0 == dr->encoding) {
1621
1558
  dr->encoding = ox_utf8_encoding;
1622
1559
  b = ox_ucs_to_utf8_chars(b, u);
1623
- #elif HAS_PRIVATE_ENCODING
1624
- } else if (ox_utf8_encoding == dr->encoding ||
1625
- 0 == strcasecmp(rb_str_ptr(rb_String(ox_utf8_encoding)), rb_str_ptr(rb_String(dr->encoding)))) {
1626
- b = ox_ucs_to_utf8_chars(b, u);
1627
- } else if (Qnil == dr->encoding) {
1628
- dr->encoding = ox_utf8_encoding;
1629
- b = ox_ucs_to_utf8_chars(b, u);
1630
1560
  #else
1631
1561
  } else if (0 == dr->encoding) {
1632
1562
  dr->encoding = UTF8_STR;
@@ -1668,8 +1598,28 @@ ox_sax_collapse_special(SaxDrive dr, char *str, long pos, long line, long col) {
1668
1598
  c = '\'';
1669
1599
  s += 5;
1670
1600
  } else {
1671
- ox_sax_drive_error_at(dr, INVALID_FORMAT "Invalid special character sequence", pos, line, col);
1672
- c = '&';
1601
+ char key[16];
1602
+ char *k = key;
1603
+ char *kend = key + sizeof(key) - 1;
1604
+ char *bn;
1605
+ char *s2 = s;
1606
+
1607
+ for (; ';' != *s2 && '\0' != *s2; s2++, k++) {
1608
+ if (kend <= k) {
1609
+ k = key;
1610
+ break;
1611
+ }
1612
+ *k = *s2;
1613
+ }
1614
+ *k = '\0';
1615
+ if ('\0' == *key || NULL == (bn = ox_entity_lookup(b, key))) {
1616
+ ox_sax_drive_error_at(dr, INVALID_FORMAT "Invalid special character sequence", pos, line, col);
1617
+ c = '&';
1618
+ } else {
1619
+ b = bn;
1620
+ s = s2 + 1;
1621
+ continue;
1622
+ }
1673
1623
  }
1674
1624
  *b++ = (char)c;
1675
1625
  col++;