ox 2.12.1 → 2.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -16,20 +16,15 @@ extern "C" {
16
16
  #define RSTRING_NOT_MODIFIED
17
17
 
18
18
  #include "ruby.h"
19
- #if HAS_ENCODING_SUPPORT
19
+ #if HAVE_RB_ENC_ASSOCIATE
20
20
  #include "ruby/encoding.h"
21
21
  #endif
22
22
 
23
- #ifdef RUBINIUS_RUBY
24
- #undef T_COMPLEX
25
- enum st_retval {ST_CONTINUE = 0, ST_STOP = 1, ST_DELETE = 2, ST_CHECK};
23
+ #if HAVE_RUBY_ST_H
24
+ #include "ruby/st.h"
26
25
  #else
27
- #if HAS_TOP_LEVEL_ST_H
28
- /* Only on travis, local is where it is for all others. Seems to vary depending on the travis machine picked up. */
26
+ // Only on travis, local is where it is for all others. Seems to vary depending on the travis machine picked up.
29
27
  #include "st.h"
30
- #else
31
- #include "ruby/st.h"
32
- #endif
33
28
  #endif
34
29
 
35
30
  #include "cache.h"
@@ -141,15 +136,14 @@ typedef struct _options {
141
136
  char convert_special;// boolean true or false
142
137
  char allow_invalid; // YesNo
143
138
  char no_empty; // boolean - no empty elements when dumping
139
+ char with_cdata; // boolean - hash_load should include cdata
144
140
  char inv_repl[12]; // max 10 valid characters, first character is the length
145
141
  char strip_ns[64]; // namespace to strip, \0 is no-strip, \* is all, else only matches
146
142
  struct _hints *html_hints; // html hints
147
143
  VALUE attr_key_mod;
148
144
  VALUE element_key_mod;
149
- #if HAS_ENCODING_SUPPORT
145
+ #if HAVE_RB_ENC_ASSOCIATE
150
146
  rb_encoding *rb_enc;
151
- #elif HAS_PRIVATE_ENCODING
152
- VALUE rb_enc;
153
147
  #else
154
148
  void *rb_enc;
155
149
  #endif
@@ -236,10 +230,8 @@ extern ID ox_tv_nsec_id;
236
230
  extern ID ox_tv_usec_id;
237
231
  extern ID ox_value_id;
238
232
 
239
- #if HAS_ENCODING_SUPPORT
233
+ #if HAVE_RB_ENC_ASSOCIATE
240
234
  extern rb_encoding *ox_utf8_encoding;
241
- #elif HAS_PRIVATE_ENCODING
242
- extern VALUE ox_utf8_encoding;
243
235
  #else
244
236
  extern void *ox_utf8_encoding;
245
237
  #endif
@@ -441,7 +441,6 @@ read_element(PInfo pi) {
441
441
  // empty element, no attributes and no children
442
442
  pi->s++;
443
443
  if ('>' != *pi->s) {
444
- /*printf("*** '%s' ***\n", pi->s); */
445
444
  attr_stack_cleanup(&attrs);
446
445
  set_error(&pi->err, "invalid format, element not closed", pi->str, pi->s);
447
446
  return 0;
@@ -479,8 +478,8 @@ read_element(PInfo pi) {
479
478
  pi->s++;
480
479
  pi->pcb->add_element(pi, ename, attrs.head, hasChildren);
481
480
  pi->pcb->end_element(pi, ename);
482
-
483
481
  attr_stack_cleanup(&attrs);
482
+
484
483
  return 0;
485
484
  case '>':
486
485
  /* has either children or a value */
@@ -544,6 +543,12 @@ read_element(PInfo pi) {
544
543
  while (!done) {
545
544
  start = pi->s;
546
545
  next_non_white(pi);
546
+ if (OffSkip == pi->options->skip && start < pi->s && '<' == *pi->s) {
547
+ c = *pi->s;
548
+ *pi->s = '\0';
549
+ pi->pcb->add_text(pi, start, 1);
550
+ *pi->s = c;
551
+ }
547
552
  c = *pi->s++;
548
553
  if ('\0' == c) {
549
554
  attr_stack_cleanup(&attrs);
@@ -1000,11 +1005,13 @@ read_coded_chars(PInfo pi, char *text) {
1000
1005
  char *b, buf[32];
1001
1006
  char *end = buf + sizeof(buf) - 1;
1002
1007
  char *s;
1008
+ long blen = 0;
1003
1009
 
1004
1010
  for (b = buf, s = pi->s; b < end; b++, s++) {
1005
1011
  *b = *s;
1006
1012
  if (';' == *s) {
1007
1013
  *(b + 1) = '\0';
1014
+ blen = b - buf;
1008
1015
  s++;
1009
1016
  break;
1010
1017
  }
@@ -1025,18 +1032,9 @@ read_coded_chars(PInfo pi, char *text) {
1025
1032
  } else {
1026
1033
  if (u <= 0x000000000000007FULL) {
1027
1034
  *text++ = (char)u;
1028
- #if HAS_PRIVATE_ENCODING
1029
- } else if (ox_utf8_encoding == pi->options->rb_enc ||
1030
- 0 == strcasecmp(rb_str_ptr(rb_String(ox_utf8_encoding)), rb_str_ptr(rb_String(pi->options->rb_enc)))) {
1031
- #else
1032
1035
  } else if (ox_utf8_encoding == pi->options->rb_enc) {
1033
- #endif
1034
1036
  text = ox_ucs_to_utf8_chars(text, u);
1035
- #if HAS_PRIVATE_ENCODING
1036
- } else if (Qnil == pi->options->rb_enc) {
1037
- #else
1038
1037
  } else if (0 == pi->options->rb_enc) {
1039
- #endif
1040
1038
  pi->options->rb_enc = ox_utf8_encoding;
1041
1039
  text = ox_ucs_to_utf8_chars(text, u);
1042
1040
  } else if (TolerantEffort == pi->options->effort) {
@@ -1047,30 +1045,20 @@ read_coded_chars(PInfo pi, char *text) {
1047
1045
  } else {
1048
1046
  /*set_error(&pi->err, "Invalid encoding, need UTF-8 or UTF-16 encoding to parse &#nnnn; character sequences.", pi->str, pi->s); */
1049
1047
  set_error(&pi->err, "Invalid encoding, need UTF-8 encoding to parse &#nnnn; character sequences.", pi->str, pi->s);
1050
- return 0;
1048
+ return NULL;
1051
1049
  }
1052
1050
  pi->s = s;
1053
1051
  }
1054
- } else if (0 == strcasecmp(buf, "nbsp;")) {
1055
- pi->s = s;
1056
- *text++ = ' ';
1057
- } else if (0 == strcasecmp(buf, "lt;")) {
1058
- pi->s = s;
1059
- *text++ = '<';
1060
- } else if (0 == strcasecmp(buf, "gt;")) {
1061
- pi->s = s;
1062
- *text++ = '>';
1063
- } else if (0 == strcasecmp(buf, "amp;")) {
1064
- pi->s = s;
1065
- *text++ = '&';
1066
- } else if (0 == strcasecmp(buf, "quot;")) {
1067
- pi->s = s;
1068
- *text++ = '"';
1069
- } else if (0 == strcasecmp(buf, "apos;")) {
1070
- pi->s = s;
1071
- *text++ = '\'';
1072
1052
  } else {
1073
- *text++ = '&';
1053
+ char *t2;
1054
+
1055
+ buf[blen] = '\0';
1056
+ if (NULL == (t2 = ox_entity_lookup(text, buf))) {
1057
+ *text++ = '&';
1058
+ } else {
1059
+ text = t2;
1060
+ pi->s = s;
1061
+ }
1074
1062
  }
1075
1063
  return text;
1076
1064
  }
@@ -1112,19 +1100,10 @@ collapse_special(PInfo pi, char *str) {
1112
1100
  }
1113
1101
  if (u <= 0x000000000000007FULL) {
1114
1102
  *b++ = (char)u;
1115
- #if HAS_PRIVATE_ENCODING
1116
- } else if (ox_utf8_encoding == pi->options->rb_enc ||
1117
- 0 == strcasecmp(rb_str_ptr(rb_String(ox_utf8_encoding)), rb_str_ptr(rb_String(pi->options->rb_enc)))) {
1118
- #else
1119
1103
  } else if (ox_utf8_encoding == pi->options->rb_enc) {
1120
- #endif
1121
1104
  b = ox_ucs_to_utf8_chars(b, u);
1122
1105
  /* TBD support UTF-16 */
1123
- #if HAS_PRIVATE_ENCODING
1124
- } else if (Qnil == pi->options->rb_enc) {
1125
- #else
1126
1106
  } else if (0 == pi->options->rb_enc) {
1127
- #endif
1128
1107
  pi->options->rb_enc = ox_utf8_encoding;
1129
1108
  b = ox_ucs_to_utf8_chars(b, u);
1130
1109
  } else {
@@ -1153,16 +1132,30 @@ collapse_special(PInfo pi, char *str) {
1153
1132
  *b++ = '&';
1154
1133
  continue;
1155
1134
  } else {
1156
- c = '?';
1135
+ char key[16];
1136
+ char *k = key;
1137
+ char *kend = key + sizeof(key) - 1;
1138
+
1139
+ *k++ = *s;
1157
1140
  while (';' != *s++) {
1158
1141
  if ('\0' == *s) {
1159
1142
  set_error(&pi->err, "Invalid format, special character does not end with a semicolon", pi->str, pi->s);
1160
1143
  return EDOM;
1161
1144
  }
1145
+ if (kend <= k) {
1146
+ k = key;
1147
+ break;
1148
+ }
1149
+ *k++ = *s;
1162
1150
  }
1163
- s++;
1164
- set_error(&pi->err, "Invalid format, invalid special character sequence", pi->str, pi->s);
1165
- return 0;
1151
+ k--;
1152
+ *k = '\0';
1153
+ if ('\0' == *key || NULL == (b = ox_entity_lookup(b, key))) {
1154
+ set_error(&pi->err, "Invalid format, invalid special character sequence", pi->str, pi->s);
1155
+ c = '?';
1156
+ return 0;
1157
+ }
1158
+ continue;
1166
1159
  }
1167
1160
  *b++ = (char)c;
1168
1161
  }
@@ -9,13 +9,16 @@
9
9
  #include <stdio.h>
10
10
  #include <strings.h>
11
11
  #include <sys/types.h>
12
- #if NEEDS_UIO
12
+ #if HAVE_SYS_UIO_H
13
13
  #include <sys/uio.h>
14
14
  #endif
15
15
  #include <unistd.h>
16
16
  #include <time.h>
17
17
 
18
18
  #include "ruby.h"
19
+ #if HAVE_RB_ENC_ASSOCIATE
20
+ #include "ruby/encoding.h"
21
+ #endif
19
22
  #include "ox.h"
20
23
  #include "sax.h"
21
24
  #include "sax_stack.h"
@@ -68,9 +71,9 @@ static VALUE protect_parse(VALUE drp) {
68
71
  return Qnil;
69
72
  }
70
73
 
71
- #if HAS_ENCODING_SUPPORT || HAS_PRIVATE_ENCODING
74
+ #if HAVE_RB_ENC_ASSOCIATE
72
75
  static int
73
- strIsAscii(const char *s) {
76
+ str_is_ascii(const char *s) {
74
77
  for (; '\0' != *s; s++) {
75
78
  if (*s < ' ' || '~' < *s) {
76
79
  return 0;
@@ -87,8 +90,8 @@ str2sym(SaxDrive dr, const char *str, const char **strp) {
87
90
 
88
91
  if (dr->options.symbolize) {
89
92
  if (Qundef == (sym = ox_cache_get(ox_symbol_cache, str, &slot, strp))) {
90
- #if HAS_ENCODING_SUPPORT
91
- if (0 != dr->encoding && !strIsAscii(str)) {
93
+ #if HAVE_RB_ENC_ASSOCIATE
94
+ if (0 != dr->encoding && !str_is_ascii(str)) {
92
95
  VALUE rstr = rb_str_new2(str);
93
96
 
94
97
  // TBD if sym can be pinned down then use this all the time
@@ -99,20 +102,6 @@ str2sym(SaxDrive dr, const char *str, const char **strp) {
99
102
  sym = ID2SYM(rb_intern(str));
100
103
  *slot = sym;
101
104
  }
102
- #elif HAS_PRIVATE_ENCODING
103
- if (Qnil != dr->encoding && !strIsAscii(str)) {
104
- VALUE rstr = rb_str_new2(str);
105
-
106
- rb_funcall(rstr, ox_force_encoding_id, 1, dr->encoding);
107
- sym = rb_funcall(rstr, ox_to_sym_id, 0);
108
- // Needed for Ruby 2.2 to get around the GC of symbols created
109
- // with to_sym which is needed for encoded symbols.
110
- rb_ary_push(ox_sym_bank, sym);
111
- *slot = Qundef;
112
- } else {
113
- sym = ID2SYM(rb_intern(str));
114
- *slot = sym;
115
- }
116
105
  #else
117
106
  sym = ID2SYM(rb_intern(str));
118
107
  *slot = sym;
@@ -120,14 +109,10 @@ str2sym(SaxDrive dr, const char *str, const char **strp) {
120
109
  }
121
110
  } else {
122
111
  sym = rb_str_new2(str);
123
- #if HAS_ENCODING_SUPPORT
112
+ #if HAVE_RB_ENC_ASSOCIATE
124
113
  if (0 != dr->encoding) {
125
114
  rb_enc_associate(sym, dr->encoding);
126
115
  }
127
- #elif HAS_PRIVATE_ENCODING
128
- if (Qnil != dr->encoding) {
129
- rb_funcall(sym, ox_force_encoding_id, 1, dr->encoding);
130
- }
131
116
  #endif
132
117
  if (0 != strp) {
133
118
  *strp = StringValuePtr(sym);
@@ -182,7 +167,7 @@ sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, SaxOptions options) {
182
167
  dr->blocked = 0;
183
168
  dr->abort = false;
184
169
  has_init(&dr->has, handler);
185
- #if HAS_ENCODING_SUPPORT
170
+ #if HAVE_RB_ENC_FIND
186
171
  if ('\0' == *ox_default_options.encoding) {
187
172
  VALUE encoding;
188
173
 
@@ -196,18 +181,6 @@ sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, SaxOptions options) {
196
181
  } else {
197
182
  dr->encoding = rb_enc_find(ox_default_options.encoding);
198
183
  }
199
- #elif HAS_PRIVATE_ENCODING
200
- if ('\0' == *ox_default_options.encoding) {
201
- VALUE encoding;
202
-
203
- if (rb_respond_to(io, ox_external_encoding_id) && Qnil != (encoding = rb_funcall(io, ox_external_encoding_id, 0))) {
204
- dr->encoding = encoding;
205
- } else {
206
- dr->encoding = Qnil;
207
- }
208
- } else {
209
- dr->encoding = rb_str_new2(ox_default_options.encoding);
210
- }
211
184
  #else
212
185
  dr->encoding = 0;
213
186
  #endif
@@ -255,9 +228,7 @@ skipBOM(SaxDrive dr) {
255
228
 
256
229
  if (0xEF == (uint8_t)c) { /* only UTF8 is supported */
257
230
  if (0xBB == (uint8_t)buf_get(&dr->buf) && 0xBF == (uint8_t)buf_get(&dr->buf)) {
258
- #if HAS_ENCODING_SUPPORT
259
- dr->encoding = ox_utf8_encoding;
260
- #elif HAS_PRIVATE_ENCODING
231
+ #if HAVE_RB_ENC_FIND
261
232
  dr->encoding = ox_utf8_encoding;
262
233
  #else
263
234
  dr->encoding = UTF8_STR;
@@ -364,14 +335,10 @@ parse(SaxDrive dr) {
364
335
  off_t col = dr->buf.col - 1;
365
336
 
366
337
  args[0] = rb_str_new2("");
367
- #if HAS_ENCODING_SUPPORT
338
+ #if HAVE_RB_ENC_ASSOCIATE
368
339
  if (0 != dr->encoding) {
369
340
  rb_enc_associate(args[0], dr->encoding);
370
341
  }
371
- #elif HAS_PRIVATE_ENCODING
372
- if (Qnil != dr->encoding) {
373
- rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
374
- }
375
342
  #endif
376
343
  if (dr->has.pos) {
377
344
  rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
@@ -526,14 +493,10 @@ read_instruction(SaxDrive dr) {
526
493
  ox_sax_collapse_special(dr, content, (int)pos, (int)line, (int)col);
527
494
  }
528
495
  args[0] = rb_str_new2(content);
529
- #if HAS_ENCODING_SUPPORT
496
+ #if HAVE_RB_ENC_ASSOCIATE
530
497
  if (0 != dr->encoding) {
531
498
  rb_enc_associate(args[0], dr->encoding);
532
499
  }
533
- #elif HAS_PRIVATE_ENCODING
534
- if (Qnil != dr->encoding) {
535
- rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
536
- }
537
500
  #endif
538
501
  if (dr->has.line) {
539
502
  rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
@@ -732,14 +695,10 @@ read_cdata(SaxDrive dr) {
732
695
  VALUE args[1];
733
696
 
734
697
  args[0] = rb_str_new2(dr->buf.str);
735
- #if HAS_ENCODING_SUPPORT
698
+ #if HAVE_RB_ENC_ASSOCIATE
736
699
  if (0 != dr->encoding) {
737
700
  rb_enc_associate(args[0], dr->encoding);
738
701
  }
739
- #elif HAS_PRIVATE_ENCODING
740
- if (Qnil != dr->encoding) {
741
- rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
742
- }
743
702
  #endif
744
703
  if (dr->has.pos) {
745
704
  rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
@@ -826,14 +785,10 @@ read_comment(SaxDrive dr) {
826
785
  (NULL != h && (ActiveOverlay == h->overlay || ActiveOverlay == h->overlay))) {
827
786
 
828
787
  args[0] = rb_str_new2(dr->buf.str);
829
- #if HAS_ENCODING_SUPPORT
788
+ #if HAVE_RB_ENC_ASSOCIATE
830
789
  if (0 != dr->encoding) {
831
790
  rb_enc_associate(args[0], dr->encoding);
832
791
  }
833
- #elif HAS_PRIVATE_ENCODING
834
- if (Qnil != dr->encoding) {
835
- rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
836
- }
837
792
  #endif
838
793
  if (dr->has.pos) {
839
794
  rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
@@ -1158,14 +1113,10 @@ read_text(SaxDrive dr) {
1158
1113
  ((NoSkip == dr->options.skip && !isEnd) ||
1159
1114
  (OffSkip == dr->options.skip))) {
1160
1115
  args[0] = rb_str_new2(dr->buf.str);
1161
- #if HAS_ENCODING_SUPPORT
1116
+ #if HAVE_RB_ENC_ASSOCIATE
1162
1117
  if (0 != dr->encoding) {
1163
1118
  rb_enc_associate(args[0], dr->encoding);
1164
1119
  }
1165
- #elif HAS_PRIVATE_ENCODING
1166
- if (Qnil != dr->encoding) {
1167
- rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
1168
- }
1169
1120
  #endif
1170
1121
  if (dr->has.pos) {
1171
1122
  rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
@@ -1213,14 +1164,10 @@ read_text(SaxDrive dr) {
1213
1164
  break;
1214
1165
  }
1215
1166
  args[0] = rb_str_new2(dr->buf.str);
1216
- #if HAS_ENCODING_SUPPORT
1167
+ #if HAVE_RB_ENC_ASSOCIATE
1217
1168
  if (0 != dr->encoding) {
1218
1169
  rb_enc_associate(args[0], dr->encoding);
1219
1170
  }
1220
- #elif HAS_PRIVATE_ENCODING
1221
- if (Qnil != dr->encoding) {
1222
- rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
1223
- }
1224
1171
  #endif
1225
1172
  if (dr->has.pos) {
1226
1173
  rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
@@ -1299,14 +1246,10 @@ read_jump(SaxDrive dr, const char *pat) {
1299
1246
  // TBD check parent overlay
1300
1247
  if (dr->has.text && !dr->blocked) {
1301
1248
  args[0] = rb_str_new2(dr->buf.str);
1302
- #if HAS_ENCODING_SUPPORT
1249
+ #if HAVE_RB_ENC_ASSOCIATE
1303
1250
  if (0 != dr->encoding) {
1304
1251
  rb_enc_associate(args[0], dr->encoding);
1305
1252
  }
1306
- #elif HAS_PRIVATE_ENCODING
1307
- if (Qnil != dr->encoding) {
1308
- rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
1309
- }
1310
1253
  #endif
1311
1254
  if (dr->has.pos) {
1312
1255
  rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
@@ -1377,10 +1320,8 @@ read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req,
1377
1320
  c = read_quoted_value(dr);
1378
1321
  attr_value = dr->buf.str;
1379
1322
  if (is_encoding) {
1380
- #if HAS_ENCODING_SUPPORT
1323
+ #if HAVE_RB_ENC_FIND
1381
1324
  dr->encoding = rb_enc_find(dr->buf.str);
1382
- #elif HAS_PRIVATE_ENCODING
1383
- dr->encoding = rb_str_new2(dr->buf.str);
1384
1325
  #else
1385
1326
  dr->encoding = dr->buf.str;
1386
1327
  #endif
@@ -1411,14 +1352,10 @@ read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req,
1411
1352
  ox_sax_collapse_special(dr, dr->buf.str, pos, line, col);
1412
1353
  }
1413
1354
  args[1] = rb_str_new2(attr_value);
1414
- #if HAS_ENCODING_SUPPORT
1355
+ #if HAVE_RB_ENC_ASSOCIATE
1415
1356
  if (0 != dr->encoding) {
1416
1357
  rb_enc_associate(args[1], dr->encoding);
1417
1358
  }
1418
- #elif HAS_PRIVATE_ENCODING
1419
- if (Qnil != dr->encoding) {
1420
- rb_funcall(args[1], ox_force_encoding_id, 1, dr->encoding);
1421
- }
1422
1359
  #endif
1423
1360
  if (dr->has.pos) {
1424
1361
  rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
@@ -1614,19 +1551,12 @@ ox_sax_collapse_special(SaxDrive dr, char *str, long pos, long line, long col) {
1614
1551
  }
1615
1552
  if (u <= 0x000000000000007FULL) {
1616
1553
  *b++ = (char)u;
1617
- #if HAS_ENCODING_SUPPORT
1554
+ #if HAVE_RB_ENC_FIND
1618
1555
  } else if (ox_utf8_encoding == dr->encoding) {
1619
1556
  b = ox_ucs_to_utf8_chars(b, u);
1620
1557
  } else if (0 == dr->encoding) {
1621
1558
  dr->encoding = ox_utf8_encoding;
1622
1559
  b = ox_ucs_to_utf8_chars(b, u);
1623
- #elif HAS_PRIVATE_ENCODING
1624
- } else if (ox_utf8_encoding == dr->encoding ||
1625
- 0 == strcasecmp(rb_str_ptr(rb_String(ox_utf8_encoding)), rb_str_ptr(rb_String(dr->encoding)))) {
1626
- b = ox_ucs_to_utf8_chars(b, u);
1627
- } else if (Qnil == dr->encoding) {
1628
- dr->encoding = ox_utf8_encoding;
1629
- b = ox_ucs_to_utf8_chars(b, u);
1630
1560
  #else
1631
1561
  } else if (0 == dr->encoding) {
1632
1562
  dr->encoding = UTF8_STR;
@@ -1668,8 +1598,28 @@ ox_sax_collapse_special(SaxDrive dr, char *str, long pos, long line, long col) {
1668
1598
  c = '\'';
1669
1599
  s += 5;
1670
1600
  } else {
1671
- ox_sax_drive_error_at(dr, INVALID_FORMAT "Invalid special character sequence", pos, line, col);
1672
- c = '&';
1601
+ char key[16];
1602
+ char *k = key;
1603
+ char *kend = key + sizeof(key) - 1;
1604
+ char *bn;
1605
+ char *s2 = s;
1606
+
1607
+ for (; ';' != *s2 && '\0' != *s2; s2++, k++) {
1608
+ if (kend <= k) {
1609
+ k = key;
1610
+ break;
1611
+ }
1612
+ *k = *s2;
1613
+ }
1614
+ *k = '\0';
1615
+ if ('\0' == *key || NULL == (bn = ox_entity_lookup(b, key))) {
1616
+ ox_sax_drive_error_at(dr, INVALID_FORMAT "Invalid special character sequence", pos, line, col);
1617
+ c = '&';
1618
+ } else {
1619
+ b = bn;
1620
+ s = s2 + 1;
1621
+ continue;
1622
+ }
1673
1623
  }
1674
1624
  *b++ = (char)c;
1675
1625
  col++;