ox 2.12.1 → 2.14.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +44 -0
- data/README.md +31 -0
- data/ext/ox/builder.c +5 -1
- data/ext/ox/dump.c +6 -18
- data/ext/ox/extconf.rb +12 -34
- data/ext/ox/gen_load.c +18 -96
- data/ext/ox/hash_load.c +39 -12
- data/ext/ox/obj_load.c +14 -46
- data/ext/ox/ox.c +60 -48
- data/ext/ox/ox.h +7 -15
- data/ext/ox/parse.c +37 -44
- data/ext/ox/sax.c +43 -93
- data/ext/ox/sax.h +1 -3
- data/ext/ox/sax_as.c +2 -6
- data/ext/ox/sax_buf.c +1 -1
- data/ext/ox/special.c +346 -0
- data/ext/ox/special.h +1 -0
- data/lib/ox/element.rb +21 -11
- data/lib/ox/version.rb +1 -1
- metadata +5 -5
data/ext/ox/ox.h
CHANGED
@@ -16,20 +16,15 @@ extern "C" {
|
|
16
16
|
#define RSTRING_NOT_MODIFIED
|
17
17
|
|
18
18
|
#include "ruby.h"
|
19
|
-
#if
|
19
|
+
#if HAVE_RB_ENC_ASSOCIATE
|
20
20
|
#include "ruby/encoding.h"
|
21
21
|
#endif
|
22
22
|
|
23
|
-
#
|
24
|
-
#
|
25
|
-
enum st_retval {ST_CONTINUE = 0, ST_STOP = 1, ST_DELETE = 2, ST_CHECK};
|
23
|
+
#if HAVE_RUBY_ST_H
|
24
|
+
#include "ruby/st.h"
|
26
25
|
#else
|
27
|
-
|
28
|
-
/* Only on travis, local is where it is for all others. Seems to vary depending on the travis machine picked up. */
|
26
|
+
// Only on travis, local is where it is for all others. Seems to vary depending on the travis machine picked up.
|
29
27
|
#include "st.h"
|
30
|
-
#else
|
31
|
-
#include "ruby/st.h"
|
32
|
-
#endif
|
33
28
|
#endif
|
34
29
|
|
35
30
|
#include "cache.h"
|
@@ -141,15 +136,14 @@ typedef struct _options {
|
|
141
136
|
char convert_special;// boolean true or false
|
142
137
|
char allow_invalid; // YesNo
|
143
138
|
char no_empty; // boolean - no empty elements when dumping
|
139
|
+
char with_cdata; // boolean - hash_load should include cdata
|
144
140
|
char inv_repl[12]; // max 10 valid characters, first character is the length
|
145
141
|
char strip_ns[64]; // namespace to strip, \0 is no-strip, \* is all, else only matches
|
146
142
|
struct _hints *html_hints; // html hints
|
147
143
|
VALUE attr_key_mod;
|
148
144
|
VALUE element_key_mod;
|
149
|
-
#if
|
145
|
+
#if HAVE_RB_ENC_ASSOCIATE
|
150
146
|
rb_encoding *rb_enc;
|
151
|
-
#elif HAS_PRIVATE_ENCODING
|
152
|
-
VALUE rb_enc;
|
153
147
|
#else
|
154
148
|
void *rb_enc;
|
155
149
|
#endif
|
@@ -236,10 +230,8 @@ extern ID ox_tv_nsec_id;
|
|
236
230
|
extern ID ox_tv_usec_id;
|
237
231
|
extern ID ox_value_id;
|
238
232
|
|
239
|
-
#if
|
233
|
+
#if HAVE_RB_ENC_ASSOCIATE
|
240
234
|
extern rb_encoding *ox_utf8_encoding;
|
241
|
-
#elif HAS_PRIVATE_ENCODING
|
242
|
-
extern VALUE ox_utf8_encoding;
|
243
235
|
#else
|
244
236
|
extern void *ox_utf8_encoding;
|
245
237
|
#endif
|
data/ext/ox/parse.c
CHANGED
@@ -441,7 +441,6 @@ read_element(PInfo pi) {
|
|
441
441
|
// empty element, no attributes and no children
|
442
442
|
pi->s++;
|
443
443
|
if ('>' != *pi->s) {
|
444
|
-
/*printf("*** '%s' ***\n", pi->s); */
|
445
444
|
attr_stack_cleanup(&attrs);
|
446
445
|
set_error(&pi->err, "invalid format, element not closed", pi->str, pi->s);
|
447
446
|
return 0;
|
@@ -479,8 +478,8 @@ read_element(PInfo pi) {
|
|
479
478
|
pi->s++;
|
480
479
|
pi->pcb->add_element(pi, ename, attrs.head, hasChildren);
|
481
480
|
pi->pcb->end_element(pi, ename);
|
482
|
-
|
483
481
|
attr_stack_cleanup(&attrs);
|
482
|
+
|
484
483
|
return 0;
|
485
484
|
case '>':
|
486
485
|
/* has either children or a value */
|
@@ -544,6 +543,12 @@ read_element(PInfo pi) {
|
|
544
543
|
while (!done) {
|
545
544
|
start = pi->s;
|
546
545
|
next_non_white(pi);
|
546
|
+
if (OffSkip == pi->options->skip && start < pi->s && '<' == *pi->s) {
|
547
|
+
c = *pi->s;
|
548
|
+
*pi->s = '\0';
|
549
|
+
pi->pcb->add_text(pi, start, 1);
|
550
|
+
*pi->s = c;
|
551
|
+
}
|
547
552
|
c = *pi->s++;
|
548
553
|
if ('\0' == c) {
|
549
554
|
attr_stack_cleanup(&attrs);
|
@@ -1000,11 +1005,13 @@ read_coded_chars(PInfo pi, char *text) {
|
|
1000
1005
|
char *b, buf[32];
|
1001
1006
|
char *end = buf + sizeof(buf) - 1;
|
1002
1007
|
char *s;
|
1008
|
+
long blen = 0;
|
1003
1009
|
|
1004
1010
|
for (b = buf, s = pi->s; b < end; b++, s++) {
|
1005
1011
|
*b = *s;
|
1006
1012
|
if (';' == *s) {
|
1007
1013
|
*(b + 1) = '\0';
|
1014
|
+
blen = b - buf;
|
1008
1015
|
s++;
|
1009
1016
|
break;
|
1010
1017
|
}
|
@@ -1025,18 +1032,9 @@ read_coded_chars(PInfo pi, char *text) {
|
|
1025
1032
|
} else {
|
1026
1033
|
if (u <= 0x000000000000007FULL) {
|
1027
1034
|
*text++ = (char)u;
|
1028
|
-
#if HAS_PRIVATE_ENCODING
|
1029
|
-
} else if (ox_utf8_encoding == pi->options->rb_enc ||
|
1030
|
-
0 == strcasecmp(rb_str_ptr(rb_String(ox_utf8_encoding)), rb_str_ptr(rb_String(pi->options->rb_enc)))) {
|
1031
|
-
#else
|
1032
1035
|
} else if (ox_utf8_encoding == pi->options->rb_enc) {
|
1033
|
-
#endif
|
1034
1036
|
text = ox_ucs_to_utf8_chars(text, u);
|
1035
|
-
#if HAS_PRIVATE_ENCODING
|
1036
|
-
} else if (Qnil == pi->options->rb_enc) {
|
1037
|
-
#else
|
1038
1037
|
} else if (0 == pi->options->rb_enc) {
|
1039
|
-
#endif
|
1040
1038
|
pi->options->rb_enc = ox_utf8_encoding;
|
1041
1039
|
text = ox_ucs_to_utf8_chars(text, u);
|
1042
1040
|
} else if (TolerantEffort == pi->options->effort) {
|
@@ -1047,30 +1045,20 @@ read_coded_chars(PInfo pi, char *text) {
|
|
1047
1045
|
} else {
|
1048
1046
|
/*set_error(&pi->err, "Invalid encoding, need UTF-8 or UTF-16 encoding to parse &#nnnn; character sequences.", pi->str, pi->s); */
|
1049
1047
|
set_error(&pi->err, "Invalid encoding, need UTF-8 encoding to parse &#nnnn; character sequences.", pi->str, pi->s);
|
1050
|
-
return
|
1048
|
+
return NULL;
|
1051
1049
|
}
|
1052
1050
|
pi->s = s;
|
1053
1051
|
}
|
1054
|
-
} else if (0 == strcasecmp(buf, "nbsp;")) {
|
1055
|
-
pi->s = s;
|
1056
|
-
*text++ = ' ';
|
1057
|
-
} else if (0 == strcasecmp(buf, "lt;")) {
|
1058
|
-
pi->s = s;
|
1059
|
-
*text++ = '<';
|
1060
|
-
} else if (0 == strcasecmp(buf, "gt;")) {
|
1061
|
-
pi->s = s;
|
1062
|
-
*text++ = '>';
|
1063
|
-
} else if (0 == strcasecmp(buf, "amp;")) {
|
1064
|
-
pi->s = s;
|
1065
|
-
*text++ = '&';
|
1066
|
-
} else if (0 == strcasecmp(buf, "quot;")) {
|
1067
|
-
pi->s = s;
|
1068
|
-
*text++ = '"';
|
1069
|
-
} else if (0 == strcasecmp(buf, "apos;")) {
|
1070
|
-
pi->s = s;
|
1071
|
-
*text++ = '\'';
|
1072
1052
|
} else {
|
1073
|
-
*
|
1053
|
+
char *t2;
|
1054
|
+
|
1055
|
+
buf[blen] = '\0';
|
1056
|
+
if (NULL == (t2 = ox_entity_lookup(text, buf))) {
|
1057
|
+
*text++ = '&';
|
1058
|
+
} else {
|
1059
|
+
text = t2;
|
1060
|
+
pi->s = s;
|
1061
|
+
}
|
1074
1062
|
}
|
1075
1063
|
return text;
|
1076
1064
|
}
|
@@ -1112,19 +1100,10 @@ collapse_special(PInfo pi, char *str) {
|
|
1112
1100
|
}
|
1113
1101
|
if (u <= 0x000000000000007FULL) {
|
1114
1102
|
*b++ = (char)u;
|
1115
|
-
#if HAS_PRIVATE_ENCODING
|
1116
|
-
} else if (ox_utf8_encoding == pi->options->rb_enc ||
|
1117
|
-
0 == strcasecmp(rb_str_ptr(rb_String(ox_utf8_encoding)), rb_str_ptr(rb_String(pi->options->rb_enc)))) {
|
1118
|
-
#else
|
1119
1103
|
} else if (ox_utf8_encoding == pi->options->rb_enc) {
|
1120
|
-
#endif
|
1121
1104
|
b = ox_ucs_to_utf8_chars(b, u);
|
1122
1105
|
/* TBD support UTF-16 */
|
1123
|
-
#if HAS_PRIVATE_ENCODING
|
1124
|
-
} else if (Qnil == pi->options->rb_enc) {
|
1125
|
-
#else
|
1126
1106
|
} else if (0 == pi->options->rb_enc) {
|
1127
|
-
#endif
|
1128
1107
|
pi->options->rb_enc = ox_utf8_encoding;
|
1129
1108
|
b = ox_ucs_to_utf8_chars(b, u);
|
1130
1109
|
} else {
|
@@ -1153,16 +1132,30 @@ collapse_special(PInfo pi, char *str) {
|
|
1153
1132
|
*b++ = '&';
|
1154
1133
|
continue;
|
1155
1134
|
} else {
|
1156
|
-
|
1135
|
+
char key[16];
|
1136
|
+
char *k = key;
|
1137
|
+
char *kend = key + sizeof(key) - 1;
|
1138
|
+
|
1139
|
+
*k++ = *s;
|
1157
1140
|
while (';' != *s++) {
|
1158
1141
|
if ('\0' == *s) {
|
1159
1142
|
set_error(&pi->err, "Invalid format, special character does not end with a semicolon", pi->str, pi->s);
|
1160
1143
|
return EDOM;
|
1161
1144
|
}
|
1145
|
+
if (kend <= k) {
|
1146
|
+
k = key;
|
1147
|
+
break;
|
1148
|
+
}
|
1149
|
+
*k++ = *s;
|
1162
1150
|
}
|
1163
|
-
|
1164
|
-
|
1165
|
-
|
1151
|
+
k--;
|
1152
|
+
*k = '\0';
|
1153
|
+
if ('\0' == *key || NULL == (b = ox_entity_lookup(b, key))) {
|
1154
|
+
set_error(&pi->err, "Invalid format, invalid special character sequence", pi->str, pi->s);
|
1155
|
+
c = '?';
|
1156
|
+
return 0;
|
1157
|
+
}
|
1158
|
+
continue;
|
1166
1159
|
}
|
1167
1160
|
*b++ = (char)c;
|
1168
1161
|
}
|
data/ext/ox/sax.c
CHANGED
@@ -9,13 +9,16 @@
|
|
9
9
|
#include <stdio.h>
|
10
10
|
#include <strings.h>
|
11
11
|
#include <sys/types.h>
|
12
|
-
#if
|
12
|
+
#if HAVE_SYS_UIO_H
|
13
13
|
#include <sys/uio.h>
|
14
14
|
#endif
|
15
15
|
#include <unistd.h>
|
16
16
|
#include <time.h>
|
17
17
|
|
18
18
|
#include "ruby.h"
|
19
|
+
#if HAVE_RB_ENC_ASSOCIATE
|
20
|
+
#include "ruby/encoding.h"
|
21
|
+
#endif
|
19
22
|
#include "ox.h"
|
20
23
|
#include "sax.h"
|
21
24
|
#include "sax_stack.h"
|
@@ -68,9 +71,9 @@ static VALUE protect_parse(VALUE drp) {
|
|
68
71
|
return Qnil;
|
69
72
|
}
|
70
73
|
|
71
|
-
#if
|
74
|
+
#if HAVE_RB_ENC_ASSOCIATE
|
72
75
|
static int
|
73
|
-
|
76
|
+
str_is_ascii(const char *s) {
|
74
77
|
for (; '\0' != *s; s++) {
|
75
78
|
if (*s < ' ' || '~' < *s) {
|
76
79
|
return 0;
|
@@ -87,8 +90,8 @@ str2sym(SaxDrive dr, const char *str, const char **strp) {
|
|
87
90
|
|
88
91
|
if (dr->options.symbolize) {
|
89
92
|
if (Qundef == (sym = ox_cache_get(ox_symbol_cache, str, &slot, strp))) {
|
90
|
-
#if
|
91
|
-
if (0 != dr->encoding && !
|
93
|
+
#if HAVE_RB_ENC_ASSOCIATE
|
94
|
+
if (0 != dr->encoding && !str_is_ascii(str)) {
|
92
95
|
VALUE rstr = rb_str_new2(str);
|
93
96
|
|
94
97
|
// TBD if sym can be pinned down then use this all the time
|
@@ -99,20 +102,6 @@ str2sym(SaxDrive dr, const char *str, const char **strp) {
|
|
99
102
|
sym = ID2SYM(rb_intern(str));
|
100
103
|
*slot = sym;
|
101
104
|
}
|
102
|
-
#elif HAS_PRIVATE_ENCODING
|
103
|
-
if (Qnil != dr->encoding && !strIsAscii(str)) {
|
104
|
-
VALUE rstr = rb_str_new2(str);
|
105
|
-
|
106
|
-
rb_funcall(rstr, ox_force_encoding_id, 1, dr->encoding);
|
107
|
-
sym = rb_funcall(rstr, ox_to_sym_id, 0);
|
108
|
-
// Needed for Ruby 2.2 to get around the GC of symbols created
|
109
|
-
// with to_sym which is needed for encoded symbols.
|
110
|
-
rb_ary_push(ox_sym_bank, sym);
|
111
|
-
*slot = Qundef;
|
112
|
-
} else {
|
113
|
-
sym = ID2SYM(rb_intern(str));
|
114
|
-
*slot = sym;
|
115
|
-
}
|
116
105
|
#else
|
117
106
|
sym = ID2SYM(rb_intern(str));
|
118
107
|
*slot = sym;
|
@@ -120,14 +109,10 @@ str2sym(SaxDrive dr, const char *str, const char **strp) {
|
|
120
109
|
}
|
121
110
|
} else {
|
122
111
|
sym = rb_str_new2(str);
|
123
|
-
#if
|
112
|
+
#if HAVE_RB_ENC_ASSOCIATE
|
124
113
|
if (0 != dr->encoding) {
|
125
114
|
rb_enc_associate(sym, dr->encoding);
|
126
115
|
}
|
127
|
-
#elif HAS_PRIVATE_ENCODING
|
128
|
-
if (Qnil != dr->encoding) {
|
129
|
-
rb_funcall(sym, ox_force_encoding_id, 1, dr->encoding);
|
130
|
-
}
|
131
116
|
#endif
|
132
117
|
if (0 != strp) {
|
133
118
|
*strp = StringValuePtr(sym);
|
@@ -182,7 +167,7 @@ sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, SaxOptions options) {
|
|
182
167
|
dr->blocked = 0;
|
183
168
|
dr->abort = false;
|
184
169
|
has_init(&dr->has, handler);
|
185
|
-
#if
|
170
|
+
#if HAVE_RB_ENC_FIND
|
186
171
|
if ('\0' == *ox_default_options.encoding) {
|
187
172
|
VALUE encoding;
|
188
173
|
|
@@ -196,18 +181,6 @@ sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, SaxOptions options) {
|
|
196
181
|
} else {
|
197
182
|
dr->encoding = rb_enc_find(ox_default_options.encoding);
|
198
183
|
}
|
199
|
-
#elif HAS_PRIVATE_ENCODING
|
200
|
-
if ('\0' == *ox_default_options.encoding) {
|
201
|
-
VALUE encoding;
|
202
|
-
|
203
|
-
if (rb_respond_to(io, ox_external_encoding_id) && Qnil != (encoding = rb_funcall(io, ox_external_encoding_id, 0))) {
|
204
|
-
dr->encoding = encoding;
|
205
|
-
} else {
|
206
|
-
dr->encoding = Qnil;
|
207
|
-
}
|
208
|
-
} else {
|
209
|
-
dr->encoding = rb_str_new2(ox_default_options.encoding);
|
210
|
-
}
|
211
184
|
#else
|
212
185
|
dr->encoding = 0;
|
213
186
|
#endif
|
@@ -255,9 +228,7 @@ skipBOM(SaxDrive dr) {
|
|
255
228
|
|
256
229
|
if (0xEF == (uint8_t)c) { /* only UTF8 is supported */
|
257
230
|
if (0xBB == (uint8_t)buf_get(&dr->buf) && 0xBF == (uint8_t)buf_get(&dr->buf)) {
|
258
|
-
#if
|
259
|
-
dr->encoding = ox_utf8_encoding;
|
260
|
-
#elif HAS_PRIVATE_ENCODING
|
231
|
+
#if HAVE_RB_ENC_FIND
|
261
232
|
dr->encoding = ox_utf8_encoding;
|
262
233
|
#else
|
263
234
|
dr->encoding = UTF8_STR;
|
@@ -364,14 +335,10 @@ parse(SaxDrive dr) {
|
|
364
335
|
off_t col = dr->buf.col - 1;
|
365
336
|
|
366
337
|
args[0] = rb_str_new2("");
|
367
|
-
#if
|
338
|
+
#if HAVE_RB_ENC_ASSOCIATE
|
368
339
|
if (0 != dr->encoding) {
|
369
340
|
rb_enc_associate(args[0], dr->encoding);
|
370
341
|
}
|
371
|
-
#elif HAS_PRIVATE_ENCODING
|
372
|
-
if (Qnil != dr->encoding) {
|
373
|
-
rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
|
374
|
-
}
|
375
342
|
#endif
|
376
343
|
if (dr->has.pos) {
|
377
344
|
rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
|
@@ -526,14 +493,10 @@ read_instruction(SaxDrive dr) {
|
|
526
493
|
ox_sax_collapse_special(dr, content, (int)pos, (int)line, (int)col);
|
527
494
|
}
|
528
495
|
args[0] = rb_str_new2(content);
|
529
|
-
#if
|
496
|
+
#if HAVE_RB_ENC_ASSOCIATE
|
530
497
|
if (0 != dr->encoding) {
|
531
498
|
rb_enc_associate(args[0], dr->encoding);
|
532
499
|
}
|
533
|
-
#elif HAS_PRIVATE_ENCODING
|
534
|
-
if (Qnil != dr->encoding) {
|
535
|
-
rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
|
536
|
-
}
|
537
500
|
#endif
|
538
501
|
if (dr->has.line) {
|
539
502
|
rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
|
@@ -732,14 +695,10 @@ read_cdata(SaxDrive dr) {
|
|
732
695
|
VALUE args[1];
|
733
696
|
|
734
697
|
args[0] = rb_str_new2(dr->buf.str);
|
735
|
-
#if
|
698
|
+
#if HAVE_RB_ENC_ASSOCIATE
|
736
699
|
if (0 != dr->encoding) {
|
737
700
|
rb_enc_associate(args[0], dr->encoding);
|
738
701
|
}
|
739
|
-
#elif HAS_PRIVATE_ENCODING
|
740
|
-
if (Qnil != dr->encoding) {
|
741
|
-
rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
|
742
|
-
}
|
743
702
|
#endif
|
744
703
|
if (dr->has.pos) {
|
745
704
|
rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
|
@@ -826,14 +785,10 @@ read_comment(SaxDrive dr) {
|
|
826
785
|
(NULL != h && (ActiveOverlay == h->overlay || ActiveOverlay == h->overlay))) {
|
827
786
|
|
828
787
|
args[0] = rb_str_new2(dr->buf.str);
|
829
|
-
#if
|
788
|
+
#if HAVE_RB_ENC_ASSOCIATE
|
830
789
|
if (0 != dr->encoding) {
|
831
790
|
rb_enc_associate(args[0], dr->encoding);
|
832
791
|
}
|
833
|
-
#elif HAS_PRIVATE_ENCODING
|
834
|
-
if (Qnil != dr->encoding) {
|
835
|
-
rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
|
836
|
-
}
|
837
792
|
#endif
|
838
793
|
if (dr->has.pos) {
|
839
794
|
rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
|
@@ -1158,14 +1113,10 @@ read_text(SaxDrive dr) {
|
|
1158
1113
|
((NoSkip == dr->options.skip && !isEnd) ||
|
1159
1114
|
(OffSkip == dr->options.skip))) {
|
1160
1115
|
args[0] = rb_str_new2(dr->buf.str);
|
1161
|
-
#if
|
1116
|
+
#if HAVE_RB_ENC_ASSOCIATE
|
1162
1117
|
if (0 != dr->encoding) {
|
1163
1118
|
rb_enc_associate(args[0], dr->encoding);
|
1164
1119
|
}
|
1165
|
-
#elif HAS_PRIVATE_ENCODING
|
1166
|
-
if (Qnil != dr->encoding) {
|
1167
|
-
rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
|
1168
|
-
}
|
1169
1120
|
#endif
|
1170
1121
|
if (dr->has.pos) {
|
1171
1122
|
rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
|
@@ -1213,14 +1164,10 @@ read_text(SaxDrive dr) {
|
|
1213
1164
|
break;
|
1214
1165
|
}
|
1215
1166
|
args[0] = rb_str_new2(dr->buf.str);
|
1216
|
-
#if
|
1167
|
+
#if HAVE_RB_ENC_ASSOCIATE
|
1217
1168
|
if (0 != dr->encoding) {
|
1218
1169
|
rb_enc_associate(args[0], dr->encoding);
|
1219
1170
|
}
|
1220
|
-
#elif HAS_PRIVATE_ENCODING
|
1221
|
-
if (Qnil != dr->encoding) {
|
1222
|
-
rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
|
1223
|
-
}
|
1224
1171
|
#endif
|
1225
1172
|
if (dr->has.pos) {
|
1226
1173
|
rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
|
@@ -1299,14 +1246,10 @@ read_jump(SaxDrive dr, const char *pat) {
|
|
1299
1246
|
// TBD check parent overlay
|
1300
1247
|
if (dr->has.text && !dr->blocked) {
|
1301
1248
|
args[0] = rb_str_new2(dr->buf.str);
|
1302
|
-
#if
|
1249
|
+
#if HAVE_RB_ENC_ASSOCIATE
|
1303
1250
|
if (0 != dr->encoding) {
|
1304
1251
|
rb_enc_associate(args[0], dr->encoding);
|
1305
1252
|
}
|
1306
|
-
#elif HAS_PRIVATE_ENCODING
|
1307
|
-
if (Qnil != dr->encoding) {
|
1308
|
-
rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
|
1309
|
-
}
|
1310
1253
|
#endif
|
1311
1254
|
if (dr->has.pos) {
|
1312
1255
|
rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
|
@@ -1377,10 +1320,8 @@ read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req,
|
|
1377
1320
|
c = read_quoted_value(dr);
|
1378
1321
|
attr_value = dr->buf.str;
|
1379
1322
|
if (is_encoding) {
|
1380
|
-
#if
|
1323
|
+
#if HAVE_RB_ENC_FIND
|
1381
1324
|
dr->encoding = rb_enc_find(dr->buf.str);
|
1382
|
-
#elif HAS_PRIVATE_ENCODING
|
1383
|
-
dr->encoding = rb_str_new2(dr->buf.str);
|
1384
1325
|
#else
|
1385
1326
|
dr->encoding = dr->buf.str;
|
1386
1327
|
#endif
|
@@ -1411,14 +1352,10 @@ read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req,
|
|
1411
1352
|
ox_sax_collapse_special(dr, dr->buf.str, pos, line, col);
|
1412
1353
|
}
|
1413
1354
|
args[1] = rb_str_new2(attr_value);
|
1414
|
-
#if
|
1355
|
+
#if HAVE_RB_ENC_ASSOCIATE
|
1415
1356
|
if (0 != dr->encoding) {
|
1416
1357
|
rb_enc_associate(args[1], dr->encoding);
|
1417
1358
|
}
|
1418
|
-
#elif HAS_PRIVATE_ENCODING
|
1419
|
-
if (Qnil != dr->encoding) {
|
1420
|
-
rb_funcall(args[1], ox_force_encoding_id, 1, dr->encoding);
|
1421
|
-
}
|
1422
1359
|
#endif
|
1423
1360
|
if (dr->has.pos) {
|
1424
1361
|
rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
|
@@ -1614,19 +1551,12 @@ ox_sax_collapse_special(SaxDrive dr, char *str, long pos, long line, long col) {
|
|
1614
1551
|
}
|
1615
1552
|
if (u <= 0x000000000000007FULL) {
|
1616
1553
|
*b++ = (char)u;
|
1617
|
-
#if
|
1554
|
+
#if HAVE_RB_ENC_FIND
|
1618
1555
|
} else if (ox_utf8_encoding == dr->encoding) {
|
1619
1556
|
b = ox_ucs_to_utf8_chars(b, u);
|
1620
1557
|
} else if (0 == dr->encoding) {
|
1621
1558
|
dr->encoding = ox_utf8_encoding;
|
1622
1559
|
b = ox_ucs_to_utf8_chars(b, u);
|
1623
|
-
#elif HAS_PRIVATE_ENCODING
|
1624
|
-
} else if (ox_utf8_encoding == dr->encoding ||
|
1625
|
-
0 == strcasecmp(rb_str_ptr(rb_String(ox_utf8_encoding)), rb_str_ptr(rb_String(dr->encoding)))) {
|
1626
|
-
b = ox_ucs_to_utf8_chars(b, u);
|
1627
|
-
} else if (Qnil == dr->encoding) {
|
1628
|
-
dr->encoding = ox_utf8_encoding;
|
1629
|
-
b = ox_ucs_to_utf8_chars(b, u);
|
1630
1560
|
#else
|
1631
1561
|
} else if (0 == dr->encoding) {
|
1632
1562
|
dr->encoding = UTF8_STR;
|
@@ -1668,8 +1598,28 @@ ox_sax_collapse_special(SaxDrive dr, char *str, long pos, long line, long col) {
|
|
1668
1598
|
c = '\'';
|
1669
1599
|
s += 5;
|
1670
1600
|
} else {
|
1671
|
-
|
1672
|
-
|
1601
|
+
char key[16];
|
1602
|
+
char *k = key;
|
1603
|
+
char *kend = key + sizeof(key) - 1;
|
1604
|
+
char *bn;
|
1605
|
+
char *s2 = s;
|
1606
|
+
|
1607
|
+
for (; ';' != *s2 && '\0' != *s2; s2++, k++) {
|
1608
|
+
if (kend <= k) {
|
1609
|
+
k = key;
|
1610
|
+
break;
|
1611
|
+
}
|
1612
|
+
*k = *s2;
|
1613
|
+
}
|
1614
|
+
*k = '\0';
|
1615
|
+
if ('\0' == *key || NULL == (bn = ox_entity_lookup(b, key))) {
|
1616
|
+
ox_sax_drive_error_at(dr, INVALID_FORMAT "Invalid special character sequence", pos, line, col);
|
1617
|
+
c = '&';
|
1618
|
+
} else {
|
1619
|
+
b = bn;
|
1620
|
+
s = s2 + 1;
|
1621
|
+
continue;
|
1622
|
+
}
|
1673
1623
|
}
|
1674
1624
|
*b++ = (char)c;
|
1675
1625
|
col++;
|