ox 2.12.1 → 2.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +44 -0
- data/README.md +31 -0
- data/ext/ox/builder.c +5 -1
- data/ext/ox/dump.c +6 -18
- data/ext/ox/extconf.rb +12 -34
- data/ext/ox/gen_load.c +18 -96
- data/ext/ox/hash_load.c +39 -12
- data/ext/ox/obj_load.c +14 -46
- data/ext/ox/ox.c +60 -48
- data/ext/ox/ox.h +7 -15
- data/ext/ox/parse.c +37 -44
- data/ext/ox/sax.c +43 -93
- data/ext/ox/sax.h +1 -3
- data/ext/ox/sax_as.c +2 -6
- data/ext/ox/sax_buf.c +1 -1
- data/ext/ox/special.c +346 -0
- data/ext/ox/special.h +1 -0
- data/lib/ox/element.rb +21 -11
- data/lib/ox/version.rb +1 -1
- metadata +5 -5
data/ext/ox/ox.h
CHANGED
@@ -16,20 +16,15 @@ extern "C" {
|
|
16
16
|
#define RSTRING_NOT_MODIFIED
|
17
17
|
|
18
18
|
#include "ruby.h"
|
19
|
-
#if
|
19
|
+
#if HAVE_RB_ENC_ASSOCIATE
|
20
20
|
#include "ruby/encoding.h"
|
21
21
|
#endif
|
22
22
|
|
23
|
-
#
|
24
|
-
#
|
25
|
-
enum st_retval {ST_CONTINUE = 0, ST_STOP = 1, ST_DELETE = 2, ST_CHECK};
|
23
|
+
#if HAVE_RUBY_ST_H
|
24
|
+
#include "ruby/st.h"
|
26
25
|
#else
|
27
|
-
|
28
|
-
/* Only on travis, local is where it is for all others. Seems to vary depending on the travis machine picked up. */
|
26
|
+
// Only on travis, local is where it is for all others. Seems to vary depending on the travis machine picked up.
|
29
27
|
#include "st.h"
|
30
|
-
#else
|
31
|
-
#include "ruby/st.h"
|
32
|
-
#endif
|
33
28
|
#endif
|
34
29
|
|
35
30
|
#include "cache.h"
|
@@ -141,15 +136,14 @@ typedef struct _options {
|
|
141
136
|
char convert_special;// boolean true or false
|
142
137
|
char allow_invalid; // YesNo
|
143
138
|
char no_empty; // boolean - no empty elements when dumping
|
139
|
+
char with_cdata; // boolean - hash_load should include cdata
|
144
140
|
char inv_repl[12]; // max 10 valid characters, first character is the length
|
145
141
|
char strip_ns[64]; // namespace to strip, \0 is no-strip, \* is all, else only matches
|
146
142
|
struct _hints *html_hints; // html hints
|
147
143
|
VALUE attr_key_mod;
|
148
144
|
VALUE element_key_mod;
|
149
|
-
#if
|
145
|
+
#if HAVE_RB_ENC_ASSOCIATE
|
150
146
|
rb_encoding *rb_enc;
|
151
|
-
#elif HAS_PRIVATE_ENCODING
|
152
|
-
VALUE rb_enc;
|
153
147
|
#else
|
154
148
|
void *rb_enc;
|
155
149
|
#endif
|
@@ -236,10 +230,8 @@ extern ID ox_tv_nsec_id;
|
|
236
230
|
extern ID ox_tv_usec_id;
|
237
231
|
extern ID ox_value_id;
|
238
232
|
|
239
|
-
#if
|
233
|
+
#if HAVE_RB_ENC_ASSOCIATE
|
240
234
|
extern rb_encoding *ox_utf8_encoding;
|
241
|
-
#elif HAS_PRIVATE_ENCODING
|
242
|
-
extern VALUE ox_utf8_encoding;
|
243
235
|
#else
|
244
236
|
extern void *ox_utf8_encoding;
|
245
237
|
#endif
|
data/ext/ox/parse.c
CHANGED
@@ -441,7 +441,6 @@ read_element(PInfo pi) {
|
|
441
441
|
// empty element, no attributes and no children
|
442
442
|
pi->s++;
|
443
443
|
if ('>' != *pi->s) {
|
444
|
-
/*printf("*** '%s' ***\n", pi->s); */
|
445
444
|
attr_stack_cleanup(&attrs);
|
446
445
|
set_error(&pi->err, "invalid format, element not closed", pi->str, pi->s);
|
447
446
|
return 0;
|
@@ -479,8 +478,8 @@ read_element(PInfo pi) {
|
|
479
478
|
pi->s++;
|
480
479
|
pi->pcb->add_element(pi, ename, attrs.head, hasChildren);
|
481
480
|
pi->pcb->end_element(pi, ename);
|
482
|
-
|
483
481
|
attr_stack_cleanup(&attrs);
|
482
|
+
|
484
483
|
return 0;
|
485
484
|
case '>':
|
486
485
|
/* has either children or a value */
|
@@ -544,6 +543,12 @@ read_element(PInfo pi) {
|
|
544
543
|
while (!done) {
|
545
544
|
start = pi->s;
|
546
545
|
next_non_white(pi);
|
546
|
+
if (OffSkip == pi->options->skip && start < pi->s && '<' == *pi->s) {
|
547
|
+
c = *pi->s;
|
548
|
+
*pi->s = '\0';
|
549
|
+
pi->pcb->add_text(pi, start, 1);
|
550
|
+
*pi->s = c;
|
551
|
+
}
|
547
552
|
c = *pi->s++;
|
548
553
|
if ('\0' == c) {
|
549
554
|
attr_stack_cleanup(&attrs);
|
@@ -1000,11 +1005,13 @@ read_coded_chars(PInfo pi, char *text) {
|
|
1000
1005
|
char *b, buf[32];
|
1001
1006
|
char *end = buf + sizeof(buf) - 1;
|
1002
1007
|
char *s;
|
1008
|
+
long blen = 0;
|
1003
1009
|
|
1004
1010
|
for (b = buf, s = pi->s; b < end; b++, s++) {
|
1005
1011
|
*b = *s;
|
1006
1012
|
if (';' == *s) {
|
1007
1013
|
*(b + 1) = '\0';
|
1014
|
+
blen = b - buf;
|
1008
1015
|
s++;
|
1009
1016
|
break;
|
1010
1017
|
}
|
@@ -1025,18 +1032,9 @@ read_coded_chars(PInfo pi, char *text) {
|
|
1025
1032
|
} else {
|
1026
1033
|
if (u <= 0x000000000000007FULL) {
|
1027
1034
|
*text++ = (char)u;
|
1028
|
-
#if HAS_PRIVATE_ENCODING
|
1029
|
-
} else if (ox_utf8_encoding == pi->options->rb_enc ||
|
1030
|
-
0 == strcasecmp(rb_str_ptr(rb_String(ox_utf8_encoding)), rb_str_ptr(rb_String(pi->options->rb_enc)))) {
|
1031
|
-
#else
|
1032
1035
|
} else if (ox_utf8_encoding == pi->options->rb_enc) {
|
1033
|
-
#endif
|
1034
1036
|
text = ox_ucs_to_utf8_chars(text, u);
|
1035
|
-
#if HAS_PRIVATE_ENCODING
|
1036
|
-
} else if (Qnil == pi->options->rb_enc) {
|
1037
|
-
#else
|
1038
1037
|
} else if (0 == pi->options->rb_enc) {
|
1039
|
-
#endif
|
1040
1038
|
pi->options->rb_enc = ox_utf8_encoding;
|
1041
1039
|
text = ox_ucs_to_utf8_chars(text, u);
|
1042
1040
|
} else if (TolerantEffort == pi->options->effort) {
|
@@ -1047,30 +1045,20 @@ read_coded_chars(PInfo pi, char *text) {
|
|
1047
1045
|
} else {
|
1048
1046
|
/*set_error(&pi->err, "Invalid encoding, need UTF-8 or UTF-16 encoding to parse &#nnnn; character sequences.", pi->str, pi->s); */
|
1049
1047
|
set_error(&pi->err, "Invalid encoding, need UTF-8 encoding to parse &#nnnn; character sequences.", pi->str, pi->s);
|
1050
|
-
return
|
1048
|
+
return NULL;
|
1051
1049
|
}
|
1052
1050
|
pi->s = s;
|
1053
1051
|
}
|
1054
|
-
} else if (0 == strcasecmp(buf, "nbsp;")) {
|
1055
|
-
pi->s = s;
|
1056
|
-
*text++ = ' ';
|
1057
|
-
} else if (0 == strcasecmp(buf, "lt;")) {
|
1058
|
-
pi->s = s;
|
1059
|
-
*text++ = '<';
|
1060
|
-
} else if (0 == strcasecmp(buf, "gt;")) {
|
1061
|
-
pi->s = s;
|
1062
|
-
*text++ = '>';
|
1063
|
-
} else if (0 == strcasecmp(buf, "amp;")) {
|
1064
|
-
pi->s = s;
|
1065
|
-
*text++ = '&';
|
1066
|
-
} else if (0 == strcasecmp(buf, "quot;")) {
|
1067
|
-
pi->s = s;
|
1068
|
-
*text++ = '"';
|
1069
|
-
} else if (0 == strcasecmp(buf, "apos;")) {
|
1070
|
-
pi->s = s;
|
1071
|
-
*text++ = '\'';
|
1072
1052
|
} else {
|
1073
|
-
*
|
1053
|
+
char *t2;
|
1054
|
+
|
1055
|
+
buf[blen] = '\0';
|
1056
|
+
if (NULL == (t2 = ox_entity_lookup(text, buf))) {
|
1057
|
+
*text++ = '&';
|
1058
|
+
} else {
|
1059
|
+
text = t2;
|
1060
|
+
pi->s = s;
|
1061
|
+
}
|
1074
1062
|
}
|
1075
1063
|
return text;
|
1076
1064
|
}
|
@@ -1112,19 +1100,10 @@ collapse_special(PInfo pi, char *str) {
|
|
1112
1100
|
}
|
1113
1101
|
if (u <= 0x000000000000007FULL) {
|
1114
1102
|
*b++ = (char)u;
|
1115
|
-
#if HAS_PRIVATE_ENCODING
|
1116
|
-
} else if (ox_utf8_encoding == pi->options->rb_enc ||
|
1117
|
-
0 == strcasecmp(rb_str_ptr(rb_String(ox_utf8_encoding)), rb_str_ptr(rb_String(pi->options->rb_enc)))) {
|
1118
|
-
#else
|
1119
1103
|
} else if (ox_utf8_encoding == pi->options->rb_enc) {
|
1120
|
-
#endif
|
1121
1104
|
b = ox_ucs_to_utf8_chars(b, u);
|
1122
1105
|
/* TBD support UTF-16 */
|
1123
|
-
#if HAS_PRIVATE_ENCODING
|
1124
|
-
} else if (Qnil == pi->options->rb_enc) {
|
1125
|
-
#else
|
1126
1106
|
} else if (0 == pi->options->rb_enc) {
|
1127
|
-
#endif
|
1128
1107
|
pi->options->rb_enc = ox_utf8_encoding;
|
1129
1108
|
b = ox_ucs_to_utf8_chars(b, u);
|
1130
1109
|
} else {
|
@@ -1153,16 +1132,30 @@ collapse_special(PInfo pi, char *str) {
|
|
1153
1132
|
*b++ = '&';
|
1154
1133
|
continue;
|
1155
1134
|
} else {
|
1156
|
-
|
1135
|
+
char key[16];
|
1136
|
+
char *k = key;
|
1137
|
+
char *kend = key + sizeof(key) - 1;
|
1138
|
+
|
1139
|
+
*k++ = *s;
|
1157
1140
|
while (';' != *s++) {
|
1158
1141
|
if ('\0' == *s) {
|
1159
1142
|
set_error(&pi->err, "Invalid format, special character does not end with a semicolon", pi->str, pi->s);
|
1160
1143
|
return EDOM;
|
1161
1144
|
}
|
1145
|
+
if (kend <= k) {
|
1146
|
+
k = key;
|
1147
|
+
break;
|
1148
|
+
}
|
1149
|
+
*k++ = *s;
|
1162
1150
|
}
|
1163
|
-
|
1164
|
-
|
1165
|
-
|
1151
|
+
k--;
|
1152
|
+
*k = '\0';
|
1153
|
+
if ('\0' == *key || NULL == (b = ox_entity_lookup(b, key))) {
|
1154
|
+
set_error(&pi->err, "Invalid format, invalid special character sequence", pi->str, pi->s);
|
1155
|
+
c = '?';
|
1156
|
+
return 0;
|
1157
|
+
}
|
1158
|
+
continue;
|
1166
1159
|
}
|
1167
1160
|
*b++ = (char)c;
|
1168
1161
|
}
|
data/ext/ox/sax.c
CHANGED
@@ -9,13 +9,16 @@
|
|
9
9
|
#include <stdio.h>
|
10
10
|
#include <strings.h>
|
11
11
|
#include <sys/types.h>
|
12
|
-
#if
|
12
|
+
#if HAVE_SYS_UIO_H
|
13
13
|
#include <sys/uio.h>
|
14
14
|
#endif
|
15
15
|
#include <unistd.h>
|
16
16
|
#include <time.h>
|
17
17
|
|
18
18
|
#include "ruby.h"
|
19
|
+
#if HAVE_RB_ENC_ASSOCIATE
|
20
|
+
#include "ruby/encoding.h"
|
21
|
+
#endif
|
19
22
|
#include "ox.h"
|
20
23
|
#include "sax.h"
|
21
24
|
#include "sax_stack.h"
|
@@ -68,9 +71,9 @@ static VALUE protect_parse(VALUE drp) {
|
|
68
71
|
return Qnil;
|
69
72
|
}
|
70
73
|
|
71
|
-
#if
|
74
|
+
#if HAVE_RB_ENC_ASSOCIATE
|
72
75
|
static int
|
73
|
-
|
76
|
+
str_is_ascii(const char *s) {
|
74
77
|
for (; '\0' != *s; s++) {
|
75
78
|
if (*s < ' ' || '~' < *s) {
|
76
79
|
return 0;
|
@@ -87,8 +90,8 @@ str2sym(SaxDrive dr, const char *str, const char **strp) {
|
|
87
90
|
|
88
91
|
if (dr->options.symbolize) {
|
89
92
|
if (Qundef == (sym = ox_cache_get(ox_symbol_cache, str, &slot, strp))) {
|
90
|
-
#if
|
91
|
-
if (0 != dr->encoding && !
|
93
|
+
#if HAVE_RB_ENC_ASSOCIATE
|
94
|
+
if (0 != dr->encoding && !str_is_ascii(str)) {
|
92
95
|
VALUE rstr = rb_str_new2(str);
|
93
96
|
|
94
97
|
// TBD if sym can be pinned down then use this all the time
|
@@ -99,20 +102,6 @@ str2sym(SaxDrive dr, const char *str, const char **strp) {
|
|
99
102
|
sym = ID2SYM(rb_intern(str));
|
100
103
|
*slot = sym;
|
101
104
|
}
|
102
|
-
#elif HAS_PRIVATE_ENCODING
|
103
|
-
if (Qnil != dr->encoding && !strIsAscii(str)) {
|
104
|
-
VALUE rstr = rb_str_new2(str);
|
105
|
-
|
106
|
-
rb_funcall(rstr, ox_force_encoding_id, 1, dr->encoding);
|
107
|
-
sym = rb_funcall(rstr, ox_to_sym_id, 0);
|
108
|
-
// Needed for Ruby 2.2 to get around the GC of symbols created
|
109
|
-
// with to_sym which is needed for encoded symbols.
|
110
|
-
rb_ary_push(ox_sym_bank, sym);
|
111
|
-
*slot = Qundef;
|
112
|
-
} else {
|
113
|
-
sym = ID2SYM(rb_intern(str));
|
114
|
-
*slot = sym;
|
115
|
-
}
|
116
105
|
#else
|
117
106
|
sym = ID2SYM(rb_intern(str));
|
118
107
|
*slot = sym;
|
@@ -120,14 +109,10 @@ str2sym(SaxDrive dr, const char *str, const char **strp) {
|
|
120
109
|
}
|
121
110
|
} else {
|
122
111
|
sym = rb_str_new2(str);
|
123
|
-
#if
|
112
|
+
#if HAVE_RB_ENC_ASSOCIATE
|
124
113
|
if (0 != dr->encoding) {
|
125
114
|
rb_enc_associate(sym, dr->encoding);
|
126
115
|
}
|
127
|
-
#elif HAS_PRIVATE_ENCODING
|
128
|
-
if (Qnil != dr->encoding) {
|
129
|
-
rb_funcall(sym, ox_force_encoding_id, 1, dr->encoding);
|
130
|
-
}
|
131
116
|
#endif
|
132
117
|
if (0 != strp) {
|
133
118
|
*strp = StringValuePtr(sym);
|
@@ -182,7 +167,7 @@ sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, SaxOptions options) {
|
|
182
167
|
dr->blocked = 0;
|
183
168
|
dr->abort = false;
|
184
169
|
has_init(&dr->has, handler);
|
185
|
-
#if
|
170
|
+
#if HAVE_RB_ENC_FIND
|
186
171
|
if ('\0' == *ox_default_options.encoding) {
|
187
172
|
VALUE encoding;
|
188
173
|
|
@@ -196,18 +181,6 @@ sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, SaxOptions options) {
|
|
196
181
|
} else {
|
197
182
|
dr->encoding = rb_enc_find(ox_default_options.encoding);
|
198
183
|
}
|
199
|
-
#elif HAS_PRIVATE_ENCODING
|
200
|
-
if ('\0' == *ox_default_options.encoding) {
|
201
|
-
VALUE encoding;
|
202
|
-
|
203
|
-
if (rb_respond_to(io, ox_external_encoding_id) && Qnil != (encoding = rb_funcall(io, ox_external_encoding_id, 0))) {
|
204
|
-
dr->encoding = encoding;
|
205
|
-
} else {
|
206
|
-
dr->encoding = Qnil;
|
207
|
-
}
|
208
|
-
} else {
|
209
|
-
dr->encoding = rb_str_new2(ox_default_options.encoding);
|
210
|
-
}
|
211
184
|
#else
|
212
185
|
dr->encoding = 0;
|
213
186
|
#endif
|
@@ -255,9 +228,7 @@ skipBOM(SaxDrive dr) {
|
|
255
228
|
|
256
229
|
if (0xEF == (uint8_t)c) { /* only UTF8 is supported */
|
257
230
|
if (0xBB == (uint8_t)buf_get(&dr->buf) && 0xBF == (uint8_t)buf_get(&dr->buf)) {
|
258
|
-
#if
|
259
|
-
dr->encoding = ox_utf8_encoding;
|
260
|
-
#elif HAS_PRIVATE_ENCODING
|
231
|
+
#if HAVE_RB_ENC_FIND
|
261
232
|
dr->encoding = ox_utf8_encoding;
|
262
233
|
#else
|
263
234
|
dr->encoding = UTF8_STR;
|
@@ -364,14 +335,10 @@ parse(SaxDrive dr) {
|
|
364
335
|
off_t col = dr->buf.col - 1;
|
365
336
|
|
366
337
|
args[0] = rb_str_new2("");
|
367
|
-
#if
|
338
|
+
#if HAVE_RB_ENC_ASSOCIATE
|
368
339
|
if (0 != dr->encoding) {
|
369
340
|
rb_enc_associate(args[0], dr->encoding);
|
370
341
|
}
|
371
|
-
#elif HAS_PRIVATE_ENCODING
|
372
|
-
if (Qnil != dr->encoding) {
|
373
|
-
rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
|
374
|
-
}
|
375
342
|
#endif
|
376
343
|
if (dr->has.pos) {
|
377
344
|
rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
|
@@ -526,14 +493,10 @@ read_instruction(SaxDrive dr) {
|
|
526
493
|
ox_sax_collapse_special(dr, content, (int)pos, (int)line, (int)col);
|
527
494
|
}
|
528
495
|
args[0] = rb_str_new2(content);
|
529
|
-
#if
|
496
|
+
#if HAVE_RB_ENC_ASSOCIATE
|
530
497
|
if (0 != dr->encoding) {
|
531
498
|
rb_enc_associate(args[0], dr->encoding);
|
532
499
|
}
|
533
|
-
#elif HAS_PRIVATE_ENCODING
|
534
|
-
if (Qnil != dr->encoding) {
|
535
|
-
rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
|
536
|
-
}
|
537
500
|
#endif
|
538
501
|
if (dr->has.line) {
|
539
502
|
rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
|
@@ -732,14 +695,10 @@ read_cdata(SaxDrive dr) {
|
|
732
695
|
VALUE args[1];
|
733
696
|
|
734
697
|
args[0] = rb_str_new2(dr->buf.str);
|
735
|
-
#if
|
698
|
+
#if HAVE_RB_ENC_ASSOCIATE
|
736
699
|
if (0 != dr->encoding) {
|
737
700
|
rb_enc_associate(args[0], dr->encoding);
|
738
701
|
}
|
739
|
-
#elif HAS_PRIVATE_ENCODING
|
740
|
-
if (Qnil != dr->encoding) {
|
741
|
-
rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
|
742
|
-
}
|
743
702
|
#endif
|
744
703
|
if (dr->has.pos) {
|
745
704
|
rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
|
@@ -826,14 +785,10 @@ read_comment(SaxDrive dr) {
|
|
826
785
|
(NULL != h && (ActiveOverlay == h->overlay || ActiveOverlay == h->overlay))) {
|
827
786
|
|
828
787
|
args[0] = rb_str_new2(dr->buf.str);
|
829
|
-
#if
|
788
|
+
#if HAVE_RB_ENC_ASSOCIATE
|
830
789
|
if (0 != dr->encoding) {
|
831
790
|
rb_enc_associate(args[0], dr->encoding);
|
832
791
|
}
|
833
|
-
#elif HAS_PRIVATE_ENCODING
|
834
|
-
if (Qnil != dr->encoding) {
|
835
|
-
rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
|
836
|
-
}
|
837
792
|
#endif
|
838
793
|
if (dr->has.pos) {
|
839
794
|
rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
|
@@ -1158,14 +1113,10 @@ read_text(SaxDrive dr) {
|
|
1158
1113
|
((NoSkip == dr->options.skip && !isEnd) ||
|
1159
1114
|
(OffSkip == dr->options.skip))) {
|
1160
1115
|
args[0] = rb_str_new2(dr->buf.str);
|
1161
|
-
#if
|
1116
|
+
#if HAVE_RB_ENC_ASSOCIATE
|
1162
1117
|
if (0 != dr->encoding) {
|
1163
1118
|
rb_enc_associate(args[0], dr->encoding);
|
1164
1119
|
}
|
1165
|
-
#elif HAS_PRIVATE_ENCODING
|
1166
|
-
if (Qnil != dr->encoding) {
|
1167
|
-
rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
|
1168
|
-
}
|
1169
1120
|
#endif
|
1170
1121
|
if (dr->has.pos) {
|
1171
1122
|
rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
|
@@ -1213,14 +1164,10 @@ read_text(SaxDrive dr) {
|
|
1213
1164
|
break;
|
1214
1165
|
}
|
1215
1166
|
args[0] = rb_str_new2(dr->buf.str);
|
1216
|
-
#if
|
1167
|
+
#if HAVE_RB_ENC_ASSOCIATE
|
1217
1168
|
if (0 != dr->encoding) {
|
1218
1169
|
rb_enc_associate(args[0], dr->encoding);
|
1219
1170
|
}
|
1220
|
-
#elif HAS_PRIVATE_ENCODING
|
1221
|
-
if (Qnil != dr->encoding) {
|
1222
|
-
rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
|
1223
|
-
}
|
1224
1171
|
#endif
|
1225
1172
|
if (dr->has.pos) {
|
1226
1173
|
rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
|
@@ -1299,14 +1246,10 @@ read_jump(SaxDrive dr, const char *pat) {
|
|
1299
1246
|
// TBD check parent overlay
|
1300
1247
|
if (dr->has.text && !dr->blocked) {
|
1301
1248
|
args[0] = rb_str_new2(dr->buf.str);
|
1302
|
-
#if
|
1249
|
+
#if HAVE_RB_ENC_ASSOCIATE
|
1303
1250
|
if (0 != dr->encoding) {
|
1304
1251
|
rb_enc_associate(args[0], dr->encoding);
|
1305
1252
|
}
|
1306
|
-
#elif HAS_PRIVATE_ENCODING
|
1307
|
-
if (Qnil != dr->encoding) {
|
1308
|
-
rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
|
1309
|
-
}
|
1310
1253
|
#endif
|
1311
1254
|
if (dr->has.pos) {
|
1312
1255
|
rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
|
@@ -1377,10 +1320,8 @@ read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req,
|
|
1377
1320
|
c = read_quoted_value(dr);
|
1378
1321
|
attr_value = dr->buf.str;
|
1379
1322
|
if (is_encoding) {
|
1380
|
-
#if
|
1323
|
+
#if HAVE_RB_ENC_FIND
|
1381
1324
|
dr->encoding = rb_enc_find(dr->buf.str);
|
1382
|
-
#elif HAS_PRIVATE_ENCODING
|
1383
|
-
dr->encoding = rb_str_new2(dr->buf.str);
|
1384
1325
|
#else
|
1385
1326
|
dr->encoding = dr->buf.str;
|
1386
1327
|
#endif
|
@@ -1411,14 +1352,10 @@ read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req,
|
|
1411
1352
|
ox_sax_collapse_special(dr, dr->buf.str, pos, line, col);
|
1412
1353
|
}
|
1413
1354
|
args[1] = rb_str_new2(attr_value);
|
1414
|
-
#if
|
1355
|
+
#if HAVE_RB_ENC_ASSOCIATE
|
1415
1356
|
if (0 != dr->encoding) {
|
1416
1357
|
rb_enc_associate(args[1], dr->encoding);
|
1417
1358
|
}
|
1418
|
-
#elif HAS_PRIVATE_ENCODING
|
1419
|
-
if (Qnil != dr->encoding) {
|
1420
|
-
rb_funcall(args[1], ox_force_encoding_id, 1, dr->encoding);
|
1421
|
-
}
|
1422
1359
|
#endif
|
1423
1360
|
if (dr->has.pos) {
|
1424
1361
|
rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
|
@@ -1614,19 +1551,12 @@ ox_sax_collapse_special(SaxDrive dr, char *str, long pos, long line, long col) {
|
|
1614
1551
|
}
|
1615
1552
|
if (u <= 0x000000000000007FULL) {
|
1616
1553
|
*b++ = (char)u;
|
1617
|
-
#if
|
1554
|
+
#if HAVE_RB_ENC_FIND
|
1618
1555
|
} else if (ox_utf8_encoding == dr->encoding) {
|
1619
1556
|
b = ox_ucs_to_utf8_chars(b, u);
|
1620
1557
|
} else if (0 == dr->encoding) {
|
1621
1558
|
dr->encoding = ox_utf8_encoding;
|
1622
1559
|
b = ox_ucs_to_utf8_chars(b, u);
|
1623
|
-
#elif HAS_PRIVATE_ENCODING
|
1624
|
-
} else if (ox_utf8_encoding == dr->encoding ||
|
1625
|
-
0 == strcasecmp(rb_str_ptr(rb_String(ox_utf8_encoding)), rb_str_ptr(rb_String(dr->encoding)))) {
|
1626
|
-
b = ox_ucs_to_utf8_chars(b, u);
|
1627
|
-
} else if (Qnil == dr->encoding) {
|
1628
|
-
dr->encoding = ox_utf8_encoding;
|
1629
|
-
b = ox_ucs_to_utf8_chars(b, u);
|
1630
1560
|
#else
|
1631
1561
|
} else if (0 == dr->encoding) {
|
1632
1562
|
dr->encoding = UTF8_STR;
|
@@ -1668,8 +1598,28 @@ ox_sax_collapse_special(SaxDrive dr, char *str, long pos, long line, long col) {
|
|
1668
1598
|
c = '\'';
|
1669
1599
|
s += 5;
|
1670
1600
|
} else {
|
1671
|
-
|
1672
|
-
|
1601
|
+
char key[16];
|
1602
|
+
char *k = key;
|
1603
|
+
char *kend = key + sizeof(key) - 1;
|
1604
|
+
char *bn;
|
1605
|
+
char *s2 = s;
|
1606
|
+
|
1607
|
+
for (; ';' != *s2 && '\0' != *s2; s2++, k++) {
|
1608
|
+
if (kend <= k) {
|
1609
|
+
k = key;
|
1610
|
+
break;
|
1611
|
+
}
|
1612
|
+
*k = *s2;
|
1613
|
+
}
|
1614
|
+
*k = '\0';
|
1615
|
+
if ('\0' == *key || NULL == (bn = ox_entity_lookup(b, key))) {
|
1616
|
+
ox_sax_drive_error_at(dr, INVALID_FORMAT "Invalid special character sequence", pos, line, col);
|
1617
|
+
c = '&';
|
1618
|
+
} else {
|
1619
|
+
b = bn;
|
1620
|
+
s = s2 + 1;
|
1621
|
+
continue;
|
1622
|
+
}
|
1673
1623
|
}
|
1674
1624
|
*b++ = (char)c;
|
1675
1625
|
col++;
|