ox 2.13.1 → 2.14.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +34 -0
- data/README.md +32 -5
- data/ext/ox/builder.c +5 -1
- data/ext/ox/dump.c +7 -19
- data/ext/ox/extconf.rb +12 -34
- data/ext/ox/gen_load.c +18 -96
- data/ext/ox/hash_load.c +39 -12
- data/ext/ox/obj_load.c +35 -64
- data/ext/ox/ox.c +60 -48
- data/ext/ox/ox.h +7 -15
- data/ext/ox/parse.c +7 -20
- data/ext/ox/sax.c +21 -91
- data/ext/ox/sax.h +1 -3
- data/ext/ox/sax_as.c +2 -6
- data/ext/ox/sax_buf.c +1 -1
- data/ext/ox/special.c +4 -3
- data/lib/ox/element.rb +21 -11
- data/lib/ox/version.rb +1 -1
- metadata +6 -6
data/ext/ox/ox.h
CHANGED
@@ -16,20 +16,15 @@ extern "C" {
|
|
16
16
|
#define RSTRING_NOT_MODIFIED
|
17
17
|
|
18
18
|
#include "ruby.h"
|
19
|
-
#if
|
19
|
+
#if HAVE_RB_ENC_ASSOCIATE
|
20
20
|
#include "ruby/encoding.h"
|
21
21
|
#endif
|
22
22
|
|
23
|
-
#
|
24
|
-
#
|
25
|
-
enum st_retval {ST_CONTINUE = 0, ST_STOP = 1, ST_DELETE = 2, ST_CHECK};
|
23
|
+
#if HAVE_RUBY_ST_H
|
24
|
+
#include "ruby/st.h"
|
26
25
|
#else
|
27
|
-
|
28
|
-
/* Only on travis, local is where it is for all others. Seems to vary depending on the travis machine picked up. */
|
26
|
+
// Only on travis, local is where it is for all others. Seems to vary depending on the travis machine picked up.
|
29
27
|
#include "st.h"
|
30
|
-
#else
|
31
|
-
#include "ruby/st.h"
|
32
|
-
#endif
|
33
28
|
#endif
|
34
29
|
|
35
30
|
#include "cache.h"
|
@@ -141,15 +136,14 @@ typedef struct _options {
|
|
141
136
|
char convert_special;// boolean true or false
|
142
137
|
char allow_invalid; // YesNo
|
143
138
|
char no_empty; // boolean - no empty elements when dumping
|
139
|
+
char with_cdata; // boolean - hash_load should include cdata
|
144
140
|
char inv_repl[12]; // max 10 valid characters, first character is the length
|
145
141
|
char strip_ns[64]; // namespace to strip, \0 is no-strip, \* is all, else only matches
|
146
142
|
struct _hints *html_hints; // html hints
|
147
143
|
VALUE attr_key_mod;
|
148
144
|
VALUE element_key_mod;
|
149
|
-
#if
|
145
|
+
#if HAVE_RB_ENC_ASSOCIATE
|
150
146
|
rb_encoding *rb_enc;
|
151
|
-
#elif HAS_PRIVATE_ENCODING
|
152
|
-
VALUE rb_enc;
|
153
147
|
#else
|
154
148
|
void *rb_enc;
|
155
149
|
#endif
|
@@ -236,10 +230,8 @@ extern ID ox_tv_nsec_id;
|
|
236
230
|
extern ID ox_tv_usec_id;
|
237
231
|
extern ID ox_value_id;
|
238
232
|
|
239
|
-
#if
|
233
|
+
#if HAVE_RB_ENC_ASSOCIATE
|
240
234
|
extern rb_encoding *ox_utf8_encoding;
|
241
|
-
#elif HAS_PRIVATE_ENCODING
|
242
|
-
extern VALUE ox_utf8_encoding;
|
243
235
|
#else
|
244
236
|
extern void *ox_utf8_encoding;
|
245
237
|
#endif
|
data/ext/ox/parse.c
CHANGED
@@ -441,7 +441,6 @@ read_element(PInfo pi) {
|
|
441
441
|
// empty element, no attributes and no children
|
442
442
|
pi->s++;
|
443
443
|
if ('>' != *pi->s) {
|
444
|
-
/*printf("*** '%s' ***\n", pi->s); */
|
445
444
|
attr_stack_cleanup(&attrs);
|
446
445
|
set_error(&pi->err, "invalid format, element not closed", pi->str, pi->s);
|
447
446
|
return 0;
|
@@ -479,8 +478,8 @@ read_element(PInfo pi) {
|
|
479
478
|
pi->s++;
|
480
479
|
pi->pcb->add_element(pi, ename, attrs.head, hasChildren);
|
481
480
|
pi->pcb->end_element(pi, ename);
|
482
|
-
|
483
481
|
attr_stack_cleanup(&attrs);
|
482
|
+
|
484
483
|
return 0;
|
485
484
|
case '>':
|
486
485
|
/* has either children or a value */
|
@@ -544,6 +543,12 @@ read_element(PInfo pi) {
|
|
544
543
|
while (!done) {
|
545
544
|
start = pi->s;
|
546
545
|
next_non_white(pi);
|
546
|
+
if (OffSkip == pi->options->skip && start < pi->s && '<' == *pi->s) {
|
547
|
+
c = *pi->s;
|
548
|
+
*pi->s = '\0';
|
549
|
+
pi->pcb->add_text(pi, start, 1);
|
550
|
+
*pi->s = c;
|
551
|
+
}
|
547
552
|
c = *pi->s++;
|
548
553
|
if ('\0' == c) {
|
549
554
|
attr_stack_cleanup(&attrs);
|
@@ -1027,18 +1032,9 @@ read_coded_chars(PInfo pi, char *text) {
|
|
1027
1032
|
} else {
|
1028
1033
|
if (u <= 0x000000000000007FULL) {
|
1029
1034
|
*text++ = (char)u;
|
1030
|
-
#if HAS_PRIVATE_ENCODING
|
1031
|
-
} else if (ox_utf8_encoding == pi->options->rb_enc ||
|
1032
|
-
0 == strcasecmp(rb_str_ptr(rb_String(ox_utf8_encoding)), rb_str_ptr(rb_String(pi->options->rb_enc)))) {
|
1033
|
-
#else
|
1034
1035
|
} else if (ox_utf8_encoding == pi->options->rb_enc) {
|
1035
|
-
#endif
|
1036
1036
|
text = ox_ucs_to_utf8_chars(text, u);
|
1037
|
-
#if HAS_PRIVATE_ENCODING
|
1038
|
-
} else if (Qnil == pi->options->rb_enc) {
|
1039
|
-
#else
|
1040
1037
|
} else if (0 == pi->options->rb_enc) {
|
1041
|
-
#endif
|
1042
1038
|
pi->options->rb_enc = ox_utf8_encoding;
|
1043
1039
|
text = ox_ucs_to_utf8_chars(text, u);
|
1044
1040
|
} else if (TolerantEffort == pi->options->effort) {
|
@@ -1104,19 +1100,10 @@ collapse_special(PInfo pi, char *str) {
|
|
1104
1100
|
}
|
1105
1101
|
if (u <= 0x000000000000007FULL) {
|
1106
1102
|
*b++ = (char)u;
|
1107
|
-
#if HAS_PRIVATE_ENCODING
|
1108
|
-
} else if (ox_utf8_encoding == pi->options->rb_enc ||
|
1109
|
-
0 == strcasecmp(rb_str_ptr(rb_String(ox_utf8_encoding)), rb_str_ptr(rb_String(pi->options->rb_enc)))) {
|
1110
|
-
#else
|
1111
1103
|
} else if (ox_utf8_encoding == pi->options->rb_enc) {
|
1112
|
-
#endif
|
1113
1104
|
b = ox_ucs_to_utf8_chars(b, u);
|
1114
1105
|
/* TBD support UTF-16 */
|
1115
|
-
#if HAS_PRIVATE_ENCODING
|
1116
|
-
} else if (Qnil == pi->options->rb_enc) {
|
1117
|
-
#else
|
1118
1106
|
} else if (0 == pi->options->rb_enc) {
|
1119
|
-
#endif
|
1120
1107
|
pi->options->rb_enc = ox_utf8_encoding;
|
1121
1108
|
b = ox_ucs_to_utf8_chars(b, u);
|
1122
1109
|
} else {
|
data/ext/ox/sax.c
CHANGED
@@ -9,13 +9,16 @@
|
|
9
9
|
#include <stdio.h>
|
10
10
|
#include <strings.h>
|
11
11
|
#include <sys/types.h>
|
12
|
-
#if
|
12
|
+
#if HAVE_SYS_UIO_H
|
13
13
|
#include <sys/uio.h>
|
14
14
|
#endif
|
15
15
|
#include <unistd.h>
|
16
16
|
#include <time.h>
|
17
17
|
|
18
18
|
#include "ruby.h"
|
19
|
+
#if HAVE_RB_ENC_ASSOCIATE
|
20
|
+
#include "ruby/encoding.h"
|
21
|
+
#endif
|
19
22
|
#include "ox.h"
|
20
23
|
#include "sax.h"
|
21
24
|
#include "sax_stack.h"
|
@@ -68,9 +71,9 @@ static VALUE protect_parse(VALUE drp) {
|
|
68
71
|
return Qnil;
|
69
72
|
}
|
70
73
|
|
71
|
-
#if
|
74
|
+
#if HAVE_RB_ENC_ASSOCIATE
|
72
75
|
static int
|
73
|
-
|
76
|
+
str_is_ascii(const char *s) {
|
74
77
|
for (; '\0' != *s; s++) {
|
75
78
|
if (*s < ' ' || '~' < *s) {
|
76
79
|
return 0;
|
@@ -87,8 +90,8 @@ str2sym(SaxDrive dr, const char *str, const char **strp) {
|
|
87
90
|
|
88
91
|
if (dr->options.symbolize) {
|
89
92
|
if (Qundef == (sym = ox_cache_get(ox_symbol_cache, str, &slot, strp))) {
|
90
|
-
#if
|
91
|
-
if (0 != dr->encoding && !
|
93
|
+
#if HAVE_RB_ENC_ASSOCIATE
|
94
|
+
if (0 != dr->encoding && !str_is_ascii(str)) {
|
92
95
|
VALUE rstr = rb_str_new2(str);
|
93
96
|
|
94
97
|
// TBD if sym can be pinned down then use this all the time
|
@@ -99,20 +102,6 @@ str2sym(SaxDrive dr, const char *str, const char **strp) {
|
|
99
102
|
sym = ID2SYM(rb_intern(str));
|
100
103
|
*slot = sym;
|
101
104
|
}
|
102
|
-
#elif HAS_PRIVATE_ENCODING
|
103
|
-
if (Qnil != dr->encoding && !strIsAscii(str)) {
|
104
|
-
VALUE rstr = rb_str_new2(str);
|
105
|
-
|
106
|
-
rb_funcall(rstr, ox_force_encoding_id, 1, dr->encoding);
|
107
|
-
sym = rb_funcall(rstr, ox_to_sym_id, 0);
|
108
|
-
// Needed for Ruby 2.2 to get around the GC of symbols created
|
109
|
-
// with to_sym which is needed for encoded symbols.
|
110
|
-
rb_ary_push(ox_sym_bank, sym);
|
111
|
-
*slot = Qundef;
|
112
|
-
} else {
|
113
|
-
sym = ID2SYM(rb_intern(str));
|
114
|
-
*slot = sym;
|
115
|
-
}
|
116
105
|
#else
|
117
106
|
sym = ID2SYM(rb_intern(str));
|
118
107
|
*slot = sym;
|
@@ -120,14 +109,10 @@ str2sym(SaxDrive dr, const char *str, const char **strp) {
|
|
120
109
|
}
|
121
110
|
} else {
|
122
111
|
sym = rb_str_new2(str);
|
123
|
-
#if
|
112
|
+
#if HAVE_RB_ENC_ASSOCIATE
|
124
113
|
if (0 != dr->encoding) {
|
125
114
|
rb_enc_associate(sym, dr->encoding);
|
126
115
|
}
|
127
|
-
#elif HAS_PRIVATE_ENCODING
|
128
|
-
if (Qnil != dr->encoding) {
|
129
|
-
rb_funcall(sym, ox_force_encoding_id, 1, dr->encoding);
|
130
|
-
}
|
131
116
|
#endif
|
132
117
|
if (0 != strp) {
|
133
118
|
*strp = StringValuePtr(sym);
|
@@ -182,7 +167,7 @@ sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, SaxOptions options) {
|
|
182
167
|
dr->blocked = 0;
|
183
168
|
dr->abort = false;
|
184
169
|
has_init(&dr->has, handler);
|
185
|
-
#if
|
170
|
+
#if HAVE_RB_ENC_FIND
|
186
171
|
if ('\0' == *ox_default_options.encoding) {
|
187
172
|
VALUE encoding;
|
188
173
|
|
@@ -196,18 +181,6 @@ sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, SaxOptions options) {
|
|
196
181
|
} else {
|
197
182
|
dr->encoding = rb_enc_find(ox_default_options.encoding);
|
198
183
|
}
|
199
|
-
#elif HAS_PRIVATE_ENCODING
|
200
|
-
if ('\0' == *ox_default_options.encoding) {
|
201
|
-
VALUE encoding;
|
202
|
-
|
203
|
-
if (rb_respond_to(io, ox_external_encoding_id) && Qnil != (encoding = rb_funcall(io, ox_external_encoding_id, 0))) {
|
204
|
-
dr->encoding = encoding;
|
205
|
-
} else {
|
206
|
-
dr->encoding = Qnil;
|
207
|
-
}
|
208
|
-
} else {
|
209
|
-
dr->encoding = rb_str_new2(ox_default_options.encoding);
|
210
|
-
}
|
211
184
|
#else
|
212
185
|
dr->encoding = 0;
|
213
186
|
#endif
|
@@ -255,9 +228,7 @@ skipBOM(SaxDrive dr) {
|
|
255
228
|
|
256
229
|
if (0xEF == (uint8_t)c) { /* only UTF8 is supported */
|
257
230
|
if (0xBB == (uint8_t)buf_get(&dr->buf) && 0xBF == (uint8_t)buf_get(&dr->buf)) {
|
258
|
-
#if
|
259
|
-
dr->encoding = ox_utf8_encoding;
|
260
|
-
#elif HAS_PRIVATE_ENCODING
|
231
|
+
#if HAVE_RB_ENC_FIND
|
261
232
|
dr->encoding = ox_utf8_encoding;
|
262
233
|
#else
|
263
234
|
dr->encoding = UTF8_STR;
|
@@ -364,14 +335,10 @@ parse(SaxDrive dr) {
|
|
364
335
|
off_t col = dr->buf.col - 1;
|
365
336
|
|
366
337
|
args[0] = rb_str_new2("");
|
367
|
-
#if
|
338
|
+
#if HAVE_RB_ENC_ASSOCIATE
|
368
339
|
if (0 != dr->encoding) {
|
369
340
|
rb_enc_associate(args[0], dr->encoding);
|
370
341
|
}
|
371
|
-
#elif HAS_PRIVATE_ENCODING
|
372
|
-
if (Qnil != dr->encoding) {
|
373
|
-
rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
|
374
|
-
}
|
375
342
|
#endif
|
376
343
|
if (dr->has.pos) {
|
377
344
|
rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
|
@@ -526,14 +493,10 @@ read_instruction(SaxDrive dr) {
|
|
526
493
|
ox_sax_collapse_special(dr, content, (int)pos, (int)line, (int)col);
|
527
494
|
}
|
528
495
|
args[0] = rb_str_new2(content);
|
529
|
-
#if
|
496
|
+
#if HAVE_RB_ENC_ASSOCIATE
|
530
497
|
if (0 != dr->encoding) {
|
531
498
|
rb_enc_associate(args[0], dr->encoding);
|
532
499
|
}
|
533
|
-
#elif HAS_PRIVATE_ENCODING
|
534
|
-
if (Qnil != dr->encoding) {
|
535
|
-
rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
|
536
|
-
}
|
537
500
|
#endif
|
538
501
|
if (dr->has.line) {
|
539
502
|
rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
|
@@ -732,14 +695,10 @@ read_cdata(SaxDrive dr) {
|
|
732
695
|
VALUE args[1];
|
733
696
|
|
734
697
|
args[0] = rb_str_new2(dr->buf.str);
|
735
|
-
#if
|
698
|
+
#if HAVE_RB_ENC_ASSOCIATE
|
736
699
|
if (0 != dr->encoding) {
|
737
700
|
rb_enc_associate(args[0], dr->encoding);
|
738
701
|
}
|
739
|
-
#elif HAS_PRIVATE_ENCODING
|
740
|
-
if (Qnil != dr->encoding) {
|
741
|
-
rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
|
742
|
-
}
|
743
702
|
#endif
|
744
703
|
if (dr->has.pos) {
|
745
704
|
rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
|
@@ -826,14 +785,10 @@ read_comment(SaxDrive dr) {
|
|
826
785
|
(NULL != h && (ActiveOverlay == h->overlay || ActiveOverlay == h->overlay))) {
|
827
786
|
|
828
787
|
args[0] = rb_str_new2(dr->buf.str);
|
829
|
-
#if
|
788
|
+
#if HAVE_RB_ENC_ASSOCIATE
|
830
789
|
if (0 != dr->encoding) {
|
831
790
|
rb_enc_associate(args[0], dr->encoding);
|
832
791
|
}
|
833
|
-
#elif HAS_PRIVATE_ENCODING
|
834
|
-
if (Qnil != dr->encoding) {
|
835
|
-
rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
|
836
|
-
}
|
837
792
|
#endif
|
838
793
|
if (dr->has.pos) {
|
839
794
|
rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
|
@@ -1158,14 +1113,10 @@ read_text(SaxDrive dr) {
|
|
1158
1113
|
((NoSkip == dr->options.skip && !isEnd) ||
|
1159
1114
|
(OffSkip == dr->options.skip))) {
|
1160
1115
|
args[0] = rb_str_new2(dr->buf.str);
|
1161
|
-
#if
|
1116
|
+
#if HAVE_RB_ENC_ASSOCIATE
|
1162
1117
|
if (0 != dr->encoding) {
|
1163
1118
|
rb_enc_associate(args[0], dr->encoding);
|
1164
1119
|
}
|
1165
|
-
#elif HAS_PRIVATE_ENCODING
|
1166
|
-
if (Qnil != dr->encoding) {
|
1167
|
-
rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
|
1168
|
-
}
|
1169
1120
|
#endif
|
1170
1121
|
if (dr->has.pos) {
|
1171
1122
|
rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
|
@@ -1213,14 +1164,10 @@ read_text(SaxDrive dr) {
|
|
1213
1164
|
break;
|
1214
1165
|
}
|
1215
1166
|
args[0] = rb_str_new2(dr->buf.str);
|
1216
|
-
#if
|
1167
|
+
#if HAVE_RB_ENC_ASSOCIATE
|
1217
1168
|
if (0 != dr->encoding) {
|
1218
1169
|
rb_enc_associate(args[0], dr->encoding);
|
1219
1170
|
}
|
1220
|
-
#elif HAS_PRIVATE_ENCODING
|
1221
|
-
if (Qnil != dr->encoding) {
|
1222
|
-
rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
|
1223
|
-
}
|
1224
1171
|
#endif
|
1225
1172
|
if (dr->has.pos) {
|
1226
1173
|
rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
|
@@ -1299,14 +1246,10 @@ read_jump(SaxDrive dr, const char *pat) {
|
|
1299
1246
|
// TBD check parent overlay
|
1300
1247
|
if (dr->has.text && !dr->blocked) {
|
1301
1248
|
args[0] = rb_str_new2(dr->buf.str);
|
1302
|
-
#if
|
1249
|
+
#if HAVE_RB_ENC_ASSOCIATE
|
1303
1250
|
if (0 != dr->encoding) {
|
1304
1251
|
rb_enc_associate(args[0], dr->encoding);
|
1305
1252
|
}
|
1306
|
-
#elif HAS_PRIVATE_ENCODING
|
1307
|
-
if (Qnil != dr->encoding) {
|
1308
|
-
rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
|
1309
|
-
}
|
1310
1253
|
#endif
|
1311
1254
|
if (dr->has.pos) {
|
1312
1255
|
rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
|
@@ -1377,10 +1320,8 @@ read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req,
|
|
1377
1320
|
c = read_quoted_value(dr);
|
1378
1321
|
attr_value = dr->buf.str;
|
1379
1322
|
if (is_encoding) {
|
1380
|
-
#if
|
1323
|
+
#if HAVE_RB_ENC_FIND
|
1381
1324
|
dr->encoding = rb_enc_find(dr->buf.str);
|
1382
|
-
#elif HAS_PRIVATE_ENCODING
|
1383
|
-
dr->encoding = rb_str_new2(dr->buf.str);
|
1384
1325
|
#else
|
1385
1326
|
dr->encoding = dr->buf.str;
|
1386
1327
|
#endif
|
@@ -1411,14 +1352,10 @@ read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req,
|
|
1411
1352
|
ox_sax_collapse_special(dr, dr->buf.str, pos, line, col);
|
1412
1353
|
}
|
1413
1354
|
args[1] = rb_str_new2(attr_value);
|
1414
|
-
#if
|
1355
|
+
#if HAVE_RB_ENC_ASSOCIATE
|
1415
1356
|
if (0 != dr->encoding) {
|
1416
1357
|
rb_enc_associate(args[1], dr->encoding);
|
1417
1358
|
}
|
1418
|
-
#elif HAS_PRIVATE_ENCODING
|
1419
|
-
if (Qnil != dr->encoding) {
|
1420
|
-
rb_funcall(args[1], ox_force_encoding_id, 1, dr->encoding);
|
1421
|
-
}
|
1422
1359
|
#endif
|
1423
1360
|
if (dr->has.pos) {
|
1424
1361
|
rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
|
@@ -1614,19 +1551,12 @@ ox_sax_collapse_special(SaxDrive dr, char *str, long pos, long line, long col) {
|
|
1614
1551
|
}
|
1615
1552
|
if (u <= 0x000000000000007FULL) {
|
1616
1553
|
*b++ = (char)u;
|
1617
|
-
#if
|
1554
|
+
#if HAVE_RB_ENC_FIND
|
1618
1555
|
} else if (ox_utf8_encoding == dr->encoding) {
|
1619
1556
|
b = ox_ucs_to_utf8_chars(b, u);
|
1620
1557
|
} else if (0 == dr->encoding) {
|
1621
1558
|
dr->encoding = ox_utf8_encoding;
|
1622
1559
|
b = ox_ucs_to_utf8_chars(b, u);
|
1623
|
-
#elif HAS_PRIVATE_ENCODING
|
1624
|
-
} else if (ox_utf8_encoding == dr->encoding ||
|
1625
|
-
0 == strcasecmp(rb_str_ptr(rb_String(ox_utf8_encoding)), rb_str_ptr(rb_String(dr->encoding)))) {
|
1626
|
-
b = ox_ucs_to_utf8_chars(b, u);
|
1627
|
-
} else if (Qnil == dr->encoding) {
|
1628
|
-
dr->encoding = ox_utf8_encoding;
|
1629
|
-
b = ox_ucs_to_utf8_chars(b, u);
|
1630
1560
|
#else
|
1631
1561
|
} else if (0 == dr->encoding) {
|
1632
1562
|
dr->encoding = UTF8_STR;
|
data/ext/ox/sax.h
CHANGED
@@ -33,10 +33,8 @@ typedef struct _saxDrive {
|
|
33
33
|
int blocked;
|
34
34
|
bool abort;
|
35
35
|
struct _has has;
|
36
|
-
#if
|
36
|
+
#if HAVE_RB_ENC_ASSOCIATE
|
37
37
|
rb_encoding *encoding;
|
38
|
-
#elif HAS_PRIVATE_ENCODING
|
39
|
-
VALUE encoding;
|
40
38
|
#else
|
41
39
|
const char *encoding;
|
42
40
|
#endif
|
data/ext/ox/sax_as.c
CHANGED
@@ -8,7 +8,7 @@
|
|
8
8
|
#include <stdio.h>
|
9
9
|
#include <strings.h>
|
10
10
|
#include <sys/types.h>
|
11
|
-
#if
|
11
|
+
#if HAVE_SYS_UIO_H
|
12
12
|
#include <sys/uio.h>
|
13
13
|
#endif
|
14
14
|
#include <unistd.h>
|
@@ -136,14 +136,10 @@ sax_value_as_s(VALUE self) {
|
|
136
136
|
break;
|
137
137
|
}
|
138
138
|
rs = rb_str_new2(dr->buf.str);
|
139
|
-
#if
|
139
|
+
#if HAVE_RB_ENC_ASSOCIATE
|
140
140
|
if (0 != dr->encoding) {
|
141
141
|
rb_enc_associate(rs, dr->encoding);
|
142
142
|
}
|
143
|
-
#elif HAS_PRIVATE_ENCODING
|
144
|
-
if (Qnil != dr->encoding) {
|
145
|
-
rb_funcall(rs, ox_force_encoding_id, 1, dr->encoding);
|
146
|
-
}
|
147
143
|
#endif
|
148
144
|
return rs;
|
149
145
|
}
|
data/ext/ox/sax_buf.c
CHANGED
data/ext/ox/special.c
CHANGED
@@ -70,7 +70,7 @@ typedef struct _cache {
|
|
70
70
|
static struct _cache entity_cache;
|
71
71
|
static bool inited = false;
|
72
72
|
|
73
|
-
// HTML entities such as &. This is a complete list from the HTML
|
73
|
+
// HTML entities such as &. This is a complete list from the HTML 5 spec.
|
74
74
|
static struct _slot entities[] = {
|
75
75
|
{ "AElig", 198 }, // latin capital letter AE
|
76
76
|
{ "Aacute", 193 }, // latin capital letter A with acute
|
@@ -136,11 +136,12 @@ static struct _slot entities[] = {
|
|
136
136
|
{ "acute", 180 }, // acute accent = spacing acute
|
137
137
|
{ "aelig", 230 }, // latin small letter ae
|
138
138
|
{ "agrave", 224 }, // latin small letter a with grave
|
139
|
-
{ "alefsym", 8501 }
|
139
|
+
{ "alefsym", 8501 },// alef symbol = first transfinite cardinal
|
140
140
|
{ "alpha", 945 }, // greek small letter alpha
|
141
141
|
{ "amp", 38 }, // -- ampersand, U+0026 ISOnum
|
142
142
|
{ "and", 8743 }, // logical and = wedge, U+2227 ISOtech
|
143
143
|
{ "ang", 8736 }, // angle, U+2220 ISOamso
|
144
|
+
{ "apos", 39 }, // -- single quote
|
144
145
|
{ "aring", 229 }, // latin small letter a with ring above
|
145
146
|
{ "asymp", 8776 }, // almost equal to = asymptotic to
|
146
147
|
{ "atilde", 227 }, // latin small letter a with tilde
|
@@ -301,7 +302,7 @@ static struct _slot entities[] = {
|
|
301
302
|
{ "tau", 964 }, // greek small letter tau, U+03C4 ISOgrk3
|
302
303
|
{ "there4", 8756 }, // therefore, U+2234 ISOtech
|
303
304
|
{ "theta", 952 }, // greek small letter theta
|
304
|
-
{ "thetasym", 977 }
|
305
|
+
{ "thetasym", 977 },// greek small letter theta symbol
|
305
306
|
{ "thinsp", 8201 }, // thin space, U+2009 ISOpub
|
306
307
|
{ "thorn", 254 }, // latin small letter thorn
|
307
308
|
{ "tilde", 732 }, // - small tilde, U+02DC ISOdia
|