ox 1.8.4 → 1.8.5
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of ox might be problematic. Click here for more details.
- data/README.md +2 -6
- data/ext/ox/cache8.c +1 -1
- data/ext/ox/cache8_test.c +3 -3
- data/ext/ox/dump.c +7 -1
- data/ext/ox/extconf.rb +1 -0
- data/ext/ox/gen_load.c +78 -0
- data/ext/ox/obj_load.c +28 -3
- data/ext/ox/ox.c +30 -0
- data/ext/ox/ox.h +5 -0
- data/ext/ox/parse.c +18 -0
- data/ext/ox/sax.c +54 -3
- data/lib/ox/version.rb +1 -1
- metadata +2 -2
data/README.md
CHANGED
@@ -34,13 +34,9 @@ A fast XML parser and Object marshaller as a Ruby gem.
|
|
34
34
|
|
35
35
|
## <a name="release">Release Notes</a>
|
36
36
|
|
37
|
-
### Release 1.8.
|
37
|
+
### Release 1.8.5
|
38
38
|
|
39
|
-
-
|
40
|
-
|
41
|
-
### Release 1.8.3
|
42
|
-
|
43
|
-
- Sax parser now honors encoding specification in the xml prolog correctly.
|
39
|
+
- added encoding support for JRuby where possible when in 1.9 mode.
|
44
40
|
|
45
41
|
## <a name="description">Description</a>
|
46
42
|
|
data/ext/ox/cache8.c
CHANGED
@@ -94,7 +94,7 @@ slot_print(Cache8 c, sid_t key, unsigned int depth) {
|
|
94
94
|
k = (k8 << BITS) | i;
|
95
95
|
/*printf("*** key: 0x%016llx depth: %u i: %u\n", k, depth, i); */
|
96
96
|
if (DEPTH - 1 == depth) {
|
97
|
-
printf("0x%016llx: %4llu\n", k, b->value);
|
97
|
+
printf("0x%016llx: %4llu\n", (unsigned long long)k, (unsigned long long)b->value);
|
98
98
|
} else {
|
99
99
|
slot_print(b->child, k, depth + 1);
|
100
100
|
}
|
data/ext/ox/cache8_test.c
CHANGED
@@ -55,13 +55,13 @@ ox_cache8_test() {
|
|
55
55
|
v = ox_cache8_get(c, *d, &slot);
|
56
56
|
if (0 == v) {
|
57
57
|
if (0 == slot) {
|
58
|
-
printf("*** failed to get a slot for 0x%016llx\n", *d);
|
58
|
+
printf("*** failed to get a slot for 0x%016llx\n", (unsigned long long)*d);
|
59
59
|
} else {
|
60
|
-
printf("*** adding 0x%016llx to cache with value %llu\n", *d, cnt);
|
60
|
+
printf("*** adding 0x%016llx to cache with value %llu\n", (unsigned long long)*d, (unsigned long long)cnt);
|
61
61
|
*slot = cnt++;
|
62
62
|
}
|
63
63
|
} else {
|
64
|
-
printf("*** get on 0x%016llx returned %llu\n", *d, v);
|
64
|
+
printf("*** get on 0x%016llx returned %llu\n", (unsigned long long)*d, (unsigned long long)v);
|
65
65
|
}
|
66
66
|
/*ox_cache8_print(c); */
|
67
67
|
}
|
data/ext/ox/dump.c
CHANGED
@@ -1135,10 +1135,16 @@ dump_gen_nodes(VALUE obj, unsigned int depth, Out out) {
|
|
1135
1135
|
|
1136
1136
|
static int
|
1137
1137
|
dump_gen_attr(VALUE key, VALUE value, Out out) {
|
1138
|
+
#if HAS_PRIVATE_ENCODING
|
1139
|
+
// There seems to be a bug in jruby for converting symbols to strings and preserving the encoding. This is a work
|
1140
|
+
// around.
|
1141
|
+
const char *ks = rb_str_ptr(rb_String(key));
|
1142
|
+
#else
|
1138
1143
|
const char *ks = (T_SYMBOL == rb_type(key)) ? rb_id2name(SYM2ID(key)) : StringValuePtr(key);
|
1144
|
+
#endif
|
1139
1145
|
size_t klen = strlen(ks);
|
1140
1146
|
size_t size = 4 + klen + RSTRING_LEN(value);
|
1141
|
-
|
1147
|
+
|
1142
1148
|
if (out->end - out->cur <= (long)size) {
|
1143
1149
|
grow(out, size);
|
1144
1150
|
}
|
data/ext/ox/extconf.rb
CHANGED
@@ -23,6 +23,7 @@ dflags = {
|
|
23
23
|
!(platform.include?('solaris') || platform.include?('linux') || RUBY_PLATFORM =~ /(win|w)32$/)) ? 1 : 0,
|
24
24
|
'HAS_ENCODING_SUPPORT' => (('ruby' == type || 'rubinius' == type) &&
|
25
25
|
(('1' == version[0] && '9' == version[1]) || '2' <= version[0])) ? 1 : 0,
|
26
|
+
'HAS_PRIVATE_ENCODING' => ('jruby' == type && '1' == version[0] && '9' == version[1]) ? 1 : 0,
|
26
27
|
'HAS_NANO_TIME' => ('ruby' == type && ('1' == version[0] && '9' == version[1]) || '2' <= version[0]) ? 1 : 0,
|
27
28
|
'HAS_RSTRUCT' => ('ruby' == type || 'ree' == type) ? 1 : 0,
|
28
29
|
'HAS_IVAR_HELPERS' => ('ruby' == type && ('1' == version[0] && '9' == version[1]) || '2' <= version[0]) ? 1 : 0,
|
data/ext/ox/gen_load.c
CHANGED
@@ -125,6 +125,15 @@ create_prolog_doc(PInfo pi, const char *target, Attr attrs) {
|
|
125
125
|
} else {
|
126
126
|
sym = ID2SYM(rb_intern(attrs->name));
|
127
127
|
}
|
128
|
+
#elif HAS_PRIVATE_ENCODING
|
129
|
+
if (Qnil != pi->options->rb_enc) {
|
130
|
+
VALUE rstr = rb_str_new2(attrs->name);
|
131
|
+
|
132
|
+
rb_funcall(rstr, ox_force_encoding_id, 1, pi->options->rb_enc);
|
133
|
+
sym = rb_funcall(rstr, ox_to_sym_id, 0);
|
134
|
+
} else {
|
135
|
+
sym = ID2SYM(rb_intern(attrs->name));
|
136
|
+
}
|
128
137
|
#else
|
129
138
|
sym = ID2SYM(rb_intern(attrs->name));
|
130
139
|
#endif
|
@@ -136,6 +145,10 @@ create_prolog_doc(PInfo pi, const char *target, Attr attrs) {
|
|
136
145
|
if (0 != pi->options->rb_enc) {
|
137
146
|
rb_enc_associate(rstr, pi->options->rb_enc);
|
138
147
|
}
|
148
|
+
#elif HAS_PRIVATE_ENCODING
|
149
|
+
if (Qnil != pi->options->rb_enc) {
|
150
|
+
rb_funcall(rstr, ox_force_encoding_id, 1, pi->options->rb_enc);
|
151
|
+
}
|
139
152
|
#endif
|
140
153
|
rb_hash_aset(ah, rstr, rb_str_new2(attrs->value));
|
141
154
|
}
|
@@ -143,6 +156,10 @@ create_prolog_doc(PInfo pi, const char *target, Attr attrs) {
|
|
143
156
|
if (0 == strcmp("encoding", attrs->name)) {
|
144
157
|
pi->options->rb_enc = rb_enc_find(attrs->value);
|
145
158
|
}
|
159
|
+
#elif HAS_PRIVATE_ENCODING
|
160
|
+
if (0 == strcmp("encoding", attrs->name)) {
|
161
|
+
pi->options->rb_enc = rb_str_new2(attrs->value);
|
162
|
+
}
|
146
163
|
#endif
|
147
164
|
}
|
148
165
|
nodes = rb_ary_new();
|
@@ -212,6 +229,10 @@ add_doctype(PInfo pi, const char *docType) {
|
|
212
229
|
if (0 != pi->options->rb_enc) {
|
213
230
|
rb_enc_associate(s, pi->options->rb_enc);
|
214
231
|
}
|
232
|
+
#elif HAS_PRIVATE_ENCODING
|
233
|
+
if (Qnil != pi->options->rb_enc) {
|
234
|
+
rb_funcall(s, ox_force_encoding_id, 1, pi->options->rb_enc);
|
235
|
+
}
|
215
236
|
#endif
|
216
237
|
rb_ivar_set(n, ox_at_value_id, s);
|
217
238
|
if (0 == pi->h) { /* top level object */
|
@@ -229,6 +250,10 @@ add_comment(PInfo pi, const char *comment) {
|
|
229
250
|
if (0 != pi->options->rb_enc) {
|
230
251
|
rb_enc_associate(s, pi->options->rb_enc);
|
231
252
|
}
|
253
|
+
#elif HAS_PRIVATE_ENCODING
|
254
|
+
if (Qnil != pi->options->rb_enc) {
|
255
|
+
rb_funcall(s, ox_force_encoding_id, 1, pi->options->rb_enc);
|
256
|
+
}
|
232
257
|
#endif
|
233
258
|
rb_ivar_set(n, ox_at_value_id, s);
|
234
259
|
if (0 == pi->h) { /* top level object */
|
@@ -246,6 +271,10 @@ add_cdata(PInfo pi, const char *cdata, size_t len) {
|
|
246
271
|
if (0 != pi->options->rb_enc) {
|
247
272
|
rb_enc_associate(s, pi->options->rb_enc);
|
248
273
|
}
|
274
|
+
#elif HAS_PRIVATE_ENCODING
|
275
|
+
if (Qnil != pi->options->rb_enc) {
|
276
|
+
rb_funcall(s, ox_force_encoding_id, 1, pi->options->rb_enc);
|
277
|
+
}
|
249
278
|
#endif
|
250
279
|
rb_ivar_set(n, ox_at_value_id, s);
|
251
280
|
if (0 == pi->h) { /* top level object */
|
@@ -262,6 +291,10 @@ add_text(PInfo pi, char *text, int closed) {
|
|
262
291
|
if (0 != pi->options->rb_enc) {
|
263
292
|
rb_enc_associate(s, pi->options->rb_enc);
|
264
293
|
}
|
294
|
+
#elif HAS_PRIVATE_ENCODING
|
295
|
+
if (Qnil != pi->options->rb_enc) {
|
296
|
+
rb_funcall(s, ox_force_encoding_id, 1, pi->options->rb_enc);
|
297
|
+
}
|
265
298
|
#endif
|
266
299
|
if (0 == pi->h) { /* top level object */
|
267
300
|
create_doc(pi);
|
@@ -278,6 +311,10 @@ add_element(PInfo pi, const char *ename, Attr attrs, int hasChildren) {
|
|
278
311
|
if (0 != pi->options->rb_enc) {
|
279
312
|
rb_enc_associate(s, pi->options->rb_enc);
|
280
313
|
}
|
314
|
+
#elif HAS_PRIVATE_ENCODING
|
315
|
+
if (Qnil != pi->options->rb_enc) {
|
316
|
+
rb_funcall(s, ox_force_encoding_id, 1, pi->options->rb_enc);
|
317
|
+
}
|
281
318
|
#endif
|
282
319
|
e = rb_obj_alloc(ox_element_clas);
|
283
320
|
rb_ivar_set(e, ox_at_value_id, s);
|
@@ -299,6 +336,15 @@ add_element(PInfo pi, const char *ename, Attr attrs, int hasChildren) {
|
|
299
336
|
} else {
|
300
337
|
sym = ID2SYM(rb_intern(attrs->name));
|
301
338
|
}
|
339
|
+
#elif HAS_PRIVATE_ENCODING
|
340
|
+
if (Qnil != pi->options->rb_enc) {
|
341
|
+
VALUE rstr = rb_str_new2(attrs->name);
|
342
|
+
|
343
|
+
rb_funcall(rstr, ox_force_encoding_id, 1, pi->options->rb_enc);
|
344
|
+
sym = rb_funcall(rstr, ox_to_sym_id, 0);
|
345
|
+
} else {
|
346
|
+
sym = ID2SYM(rb_intern(attrs->name));
|
347
|
+
}
|
302
348
|
#else
|
303
349
|
sym = ID2SYM(rb_intern(attrs->name));
|
304
350
|
#endif
|
@@ -310,6 +356,10 @@ add_element(PInfo pi, const char *ename, Attr attrs, int hasChildren) {
|
|
310
356
|
if (0 != pi->options->rb_enc) {
|
311
357
|
rb_enc_associate(sym, pi->options->rb_enc);
|
312
358
|
}
|
359
|
+
#elif HAS_PRIVATE_ENCODING
|
360
|
+
if (Qnil != pi->options->rb_enc) {
|
361
|
+
rb_funcall(sym, ox_force_encoding_id, 1, pi->options->rb_enc);
|
362
|
+
}
|
313
363
|
#endif
|
314
364
|
}
|
315
365
|
s = rb_str_new2(attrs->value);
|
@@ -317,6 +367,10 @@ add_element(PInfo pi, const char *ename, Attr attrs, int hasChildren) {
|
|
317
367
|
if (0 != pi->options->rb_enc) {
|
318
368
|
rb_enc_associate(s, pi->options->rb_enc);
|
319
369
|
}
|
370
|
+
#elif HAS_PRIVATE_ENCODING
|
371
|
+
if (Qnil != pi->options->rb_enc) {
|
372
|
+
rb_funcall(s, ox_force_encoding_id, 1, pi->options->rb_enc);
|
373
|
+
}
|
320
374
|
#endif
|
321
375
|
rb_hash_aset(ah, sym, s);
|
322
376
|
}
|
@@ -364,6 +418,13 @@ add_instruct(PInfo pi, const char *name, Attr attrs, const char *content) {
|
|
364
418
|
rb_enc_associate(c, pi->options->rb_enc);
|
365
419
|
}
|
366
420
|
}
|
421
|
+
#elif HAS_PRIVATE_ENCODING
|
422
|
+
if (Qnil != pi->options->rb_enc) {
|
423
|
+
rb_funcall(s, ox_force_encoding_id, 1, pi->options->rb_enc);
|
424
|
+
if (0 != content) {
|
425
|
+
rb_funcall(c, ox_force_encoding_id, 1, pi->options->rb_enc);
|
426
|
+
}
|
427
|
+
}
|
367
428
|
#endif
|
368
429
|
inst = rb_obj_alloc(ox_instruct_clas);
|
369
430
|
rb_ivar_set(inst, ox_at_value_id, s);
|
@@ -387,6 +448,15 @@ add_instruct(PInfo pi, const char *name, Attr attrs, const char *content) {
|
|
387
448
|
} else {
|
388
449
|
sym = ID2SYM(rb_intern(attrs->name));
|
389
450
|
}
|
451
|
+
#elif HAS_PRIVATE_ENCODING
|
452
|
+
if (Qnil != pi->options->rb_enc) {
|
453
|
+
VALUE rstr = rb_str_new2(attrs->name);
|
454
|
+
|
455
|
+
rb_funcall(rstr, ox_force_encoding_id, 1, pi->options->rb_enc);
|
456
|
+
sym = rb_funcall(rstr, ox_to_sym_id, 0);
|
457
|
+
} else {
|
458
|
+
sym = ID2SYM(rb_intern(attrs->name));
|
459
|
+
}
|
390
460
|
#else
|
391
461
|
sym = ID2SYM(rb_intern(attrs->name));
|
392
462
|
#endif
|
@@ -398,6 +468,10 @@ add_instruct(PInfo pi, const char *name, Attr attrs, const char *content) {
|
|
398
468
|
if (0 != pi->options->rb_enc) {
|
399
469
|
rb_enc_associate(sym, pi->options->rb_enc);
|
400
470
|
}
|
471
|
+
#elif HAS_PRIVATE_ENCODING
|
472
|
+
if (Qnil != pi->options->rb_enc) {
|
473
|
+
rb_funcall(sym, ox_force_encoding_id, 1, pi->options->rb_enc);
|
474
|
+
}
|
401
475
|
#endif
|
402
476
|
}
|
403
477
|
s = rb_str_new2(attrs->value);
|
@@ -405,6 +479,10 @@ add_instruct(PInfo pi, const char *name, Attr attrs, const char *content) {
|
|
405
479
|
if (0 != pi->options->rb_enc) {
|
406
480
|
rb_enc_associate(s, pi->options->rb_enc);
|
407
481
|
}
|
482
|
+
#elif HAS_PRIVATE_ENCODING
|
483
|
+
if (Qnil != pi->options->rb_enc) {
|
484
|
+
rb_funcall(s, ox_force_encoding_id, 1, pi->options->rb_enc);
|
485
|
+
}
|
408
486
|
#endif
|
409
487
|
rb_hash_aset(ah, sym, s);
|
410
488
|
}
|
data/ext/ox/obj_load.c
CHANGED
@@ -183,6 +183,8 @@ structname2obj(const char *name) {
|
|
183
183
|
/* use encoding as the indicator for Ruby 1.8.7 or 1.9.x */
|
184
184
|
#if HAS_ENCODING_SUPPORT
|
185
185
|
return rb_struct_alloc_noinit(ost);
|
186
|
+
#elif HAS_PRIVATE_ENCODING
|
187
|
+
return rb_struct_alloc_noinit(ost);
|
186
188
|
#else
|
187
189
|
return rb_struct_new(ost);
|
188
190
|
#endif
|
@@ -389,6 +391,15 @@ parse_regexp(const char *text) {
|
|
389
391
|
default: break;
|
390
392
|
}
|
391
393
|
}
|
394
|
+
#elif HAS_PRIVATE_ENCODING
|
395
|
+
for (; text < te && '/' != *te; te--) {
|
396
|
+
switch (*te) {
|
397
|
+
case 'i': options |= ONIG_OPTION_IGNORECASE; break;
|
398
|
+
case 'm': options |= ONIG_OPTION_MULTILINE; break;
|
399
|
+
case 'x': options |= ONIG_OPTION_EXTEND; break;
|
400
|
+
default: break;
|
401
|
+
}
|
402
|
+
}
|
392
403
|
#endif
|
393
404
|
return rb_reg_new(text + 1, te - text - 1, options);
|
394
405
|
}
|
@@ -402,6 +413,12 @@ instruct(PInfo pi, const char *target, Attr attrs, const char *content) {
|
|
402
413
|
pi->options->rb_enc = rb_enc_find(attrs->value);
|
403
414
|
}
|
404
415
|
}
|
416
|
+
#elif HAS_PRIVATE_ENCODING
|
417
|
+
for (; 0 != attrs->name; attrs++) {
|
418
|
+
if (0 == strcmp("encoding", attrs->name)) {
|
419
|
+
pi->options->rb_enc = rb_str_new2(attrs->value);
|
420
|
+
}
|
421
|
+
}
|
405
422
|
#endif
|
406
423
|
}
|
407
424
|
}
|
@@ -425,6 +442,10 @@ add_text(PInfo pi, char *text, int closed) {
|
|
425
442
|
if (0 != pi->options->rb_enc) {
|
426
443
|
rb_enc_associate(pi->h->obj, pi->options->rb_enc);
|
427
444
|
}
|
445
|
+
#elif HAS_PRIVATE_ENCODING
|
446
|
+
if (Qnil != pi->options->rb_enc) {
|
447
|
+
rb_funcall(pi->h->obj, ox_force_encoding_id, 1, pi->options->rb_enc);
|
448
|
+
}
|
428
449
|
#endif
|
429
450
|
if (0 != pi->circ_array) {
|
430
451
|
circ_array_set(pi->circ_array, pi->h->obj, (unsigned long)pi->id);
|
@@ -463,7 +484,7 @@ add_text(PInfo pi, char *text, int closed) {
|
|
463
484
|
VALUE *slot;
|
464
485
|
|
465
486
|
if (Qundef == (sym = ox_cache_get(ox_symbol_cache, text, &slot))) {
|
466
|
-
sym = str2sym(text, pi->options->rb_enc);
|
487
|
+
sym = str2sym(text, (void*)pi->options->rb_enc);
|
467
488
|
*slot = sym;
|
468
489
|
}
|
469
490
|
pi->h->obj = sym;
|
@@ -492,6 +513,10 @@ add_text(PInfo pi, char *text, int closed) {
|
|
492
513
|
if (0 != pi->options->rb_enc) {
|
493
514
|
rb_enc_associate(v, pi->options->rb_enc);
|
494
515
|
}
|
516
|
+
#elif HAS_PRIVATE_ENCODING
|
517
|
+
if (0 != pi->options->rb_enc) {
|
518
|
+
rb_funcall(v, ox_force_encoding_id, 1, pi->options->rb_enc);
|
519
|
+
}
|
495
520
|
#endif
|
496
521
|
if (0 != pi->circ_array) {
|
497
522
|
circ_array_set(pi->circ_array, v, (unsigned long)pi->h->obj);
|
@@ -508,7 +533,7 @@ add_text(PInfo pi, char *text, int closed) {
|
|
508
533
|
|
509
534
|
from_base64(text, (uchar*)str);
|
510
535
|
if (Qundef == (sym = ox_cache_get(ox_symbol_cache, str, &slot))) {
|
511
|
-
sym = str2sym(str, pi->options->rb_enc);
|
536
|
+
sym = str2sym(str, (void*)pi->options->rb_enc);
|
512
537
|
*slot = sym;
|
513
538
|
}
|
514
539
|
pi->h->obj = sym;
|
@@ -572,7 +597,7 @@ add_element(PInfo pi, const char *ename, Attr attrs, int hasChildren) {
|
|
572
597
|
}
|
573
598
|
h = pi->h;
|
574
599
|
h->type = *ename;
|
575
|
-
h->var = get_var_sym_from_attrs(attrs, pi->options->rb_enc);
|
600
|
+
h->var = get_var_sym_from_attrs(attrs, (void*)pi->options->rb_enc);
|
576
601
|
switch (h->type) {
|
577
602
|
case NilClassCode:
|
578
603
|
h->obj = Qnil;
|
data/ext/ox/ox.c
CHANGED
@@ -66,6 +66,7 @@ ID ox_error_id;
|
|
66
66
|
ID ox_excl_id;
|
67
67
|
ID ox_external_encoding_id;
|
68
68
|
ID ox_fileno_id;
|
69
|
+
ID ox_force_encoding_id;
|
69
70
|
ID ox_inspect_id;
|
70
71
|
ID ox_instruct_id;
|
71
72
|
ID ox_jd_id;
|
@@ -134,6 +135,8 @@ static VALUE xsd_date_sym;
|
|
134
135
|
|
135
136
|
#if HAS_ENCODING_SUPPORT
|
136
137
|
rb_encoding *ox_utf8_encoding = 0;
|
138
|
+
#elif HAS_PRIVATE_ENCODING
|
139
|
+
VALUE ox_utf8_encoding = Qnil;
|
137
140
|
#else
|
138
141
|
void *ox_utf8_encoding = 0;
|
139
142
|
#endif
|
@@ -150,7 +153,11 @@ struct _Options ox_default_options = {
|
|
150
153
|
NoMode, /* mode */
|
151
154
|
StrictEffort, /* effort */
|
152
155
|
Yes, /* sym_keys */
|
156
|
+
#if HAS_PRIVATE_ENCODING
|
157
|
+
Qnil /* rb_enc */
|
158
|
+
#else
|
153
159
|
0 /* rb_enc */
|
160
|
+
#endif
|
154
161
|
};
|
155
162
|
|
156
163
|
extern ParseCallbacks ox_obj_callbacks;
|
@@ -297,6 +304,9 @@ set_def_opts(VALUE self, VALUE opts) {
|
|
297
304
|
strncpy(ox_default_options.encoding, StringValuePtr(v), sizeof(ox_default_options.encoding) - 1);
|
298
305
|
#ifdef HAVE_RUBY_ENCODING_H
|
299
306
|
ox_default_options.rb_enc = rb_enc_find(ox_default_options.encoding);
|
307
|
+
#elif HAS_PRIVATE_ENCODING
|
308
|
+
ox_default_options.rb_enc = rb_str_new2(ox_default_options.encoding);
|
309
|
+
rb_gc_register_address(&ox_default_options.rb_enc);
|
300
310
|
#endif
|
301
311
|
}
|
302
312
|
|
@@ -468,6 +478,17 @@ load(char *xml, int argc, VALUE *argv, VALUE self, VALUE encoding) {
|
|
468
478
|
} else if (0 == options.rb_enc) {
|
469
479
|
options.rb_enc = rb_enc_find(options.encoding);
|
470
480
|
}
|
481
|
+
#elif HAS_PRIVATE_ENCODING
|
482
|
+
if ('\0' == *options.encoding) {
|
483
|
+
if (Qnil != encoding) {
|
484
|
+
options.rb_enc = encoding;
|
485
|
+
} else {
|
486
|
+
options.rb_enc = Qnil;
|
487
|
+
}
|
488
|
+
} else if (0 == options.rb_enc) {
|
489
|
+
options.rb_enc = rb_str_new2(options.encoding);
|
490
|
+
rb_gc_register_address(&options.rb_enc);
|
491
|
+
}
|
471
492
|
#endif
|
472
493
|
xml = defuse_bom(xml, &options);
|
473
494
|
switch (options.mode) {
|
@@ -525,6 +546,8 @@ load_str(int argc, VALUE *argv, VALUE self) {
|
|
525
546
|
}
|
526
547
|
#ifdef HAVE_RUBY_ENCODING_H
|
527
548
|
encoding = rb_obj_encoding(*argv);
|
549
|
+
#elif HAS_PRIVATE_ENCODING
|
550
|
+
encoding = rb_funcall(*argv, rb_intern("encoding"), 0);
|
528
551
|
#else
|
529
552
|
encoding = Qnil;
|
530
553
|
#endif
|
@@ -706,6 +729,10 @@ dump(int argc, VALUE *argv, VALUE self) {
|
|
706
729
|
if ('\0' != *copts.encoding) {
|
707
730
|
rb_enc_associate(rstr, rb_enc_find(copts.encoding));
|
708
731
|
}
|
732
|
+
#elif HAS_PRIVATE_ENCODING
|
733
|
+
if ('\0' != *copts.encoding) {
|
734
|
+
rb_funcall(rstr, ox_force_encoding_id, 1, rb_str_new2(copts.encoding));
|
735
|
+
}
|
709
736
|
#endif
|
710
737
|
xfree(xml);
|
711
738
|
|
@@ -793,6 +820,7 @@ void Init_ox() {
|
|
793
820
|
ox_error_id = rb_intern("error");
|
794
821
|
ox_excl_id = rb_intern("@excl");
|
795
822
|
ox_fileno_id = rb_intern("fileno");
|
823
|
+
ox_force_encoding_id = rb_intern("force_encoding");
|
796
824
|
ox_inspect_id = rb_intern("inspect");
|
797
825
|
ox_instruct_id = rb_intern("instruct");
|
798
826
|
ox_jd_id = rb_intern("jd");
|
@@ -864,6 +892,8 @@ void Init_ox() {
|
|
864
892
|
rb_define_module_function(Ox, "cache8_test", cache8_test, 0);
|
865
893
|
#if HAS_ENCODING_SUPPORT
|
866
894
|
ox_utf8_encoding = rb_enc_find("UTF-8");
|
895
|
+
#elif HAS_PRIVATE_ENCODING
|
896
|
+
ox_utf8_encoding = rb_str_new2("UTF-8"); rb_gc_register_address(&ox_utf8_encoding);
|
867
897
|
#endif
|
868
898
|
}
|
869
899
|
|
data/ext/ox/ox.h
CHANGED
@@ -188,6 +188,8 @@ typedef struct _Options {
|
|
188
188
|
char sym_keys; /* symbolize keys */
|
189
189
|
#ifdef HAVE_RUBY_ENCODING_H
|
190
190
|
rb_encoding *rb_enc;
|
191
|
+
#elif HAS_PRIVATE_ENCODING
|
192
|
+
VALUE rb_enc;
|
191
193
|
#else
|
192
194
|
void *rb_enc;
|
193
195
|
#endif
|
@@ -238,6 +240,7 @@ extern ID ox_error_id;
|
|
238
240
|
extern ID ox_excl_id;
|
239
241
|
extern ID ox_external_encoding_id;
|
240
242
|
extern ID ox_fileno_id;
|
243
|
+
extern ID ox_force_encoding_id;
|
241
244
|
extern ID ox_inspect_id;
|
242
245
|
extern ID ox_instruct_id;
|
243
246
|
extern ID ox_jd_id;
|
@@ -263,6 +266,8 @@ extern ID ox_value_id;
|
|
263
266
|
|
264
267
|
#if HAS_ENCODING_SUPPORT
|
265
268
|
extern rb_encoding *ox_utf8_encoding;
|
269
|
+
#elif HAS_PRIVATE_ENCODING
|
270
|
+
extern VALUE ox_utf8_encoding;
|
266
271
|
#else
|
267
272
|
extern void *ox_utf8_encoding;
|
268
273
|
#endif
|
data/ext/ox/parse.c
CHANGED
@@ -828,9 +828,18 @@ read_coded_chars(PInfo pi, char *text) {
|
|
828
828
|
pi->s = s;
|
829
829
|
if (u <= 0x000000000000007FULL) {
|
830
830
|
*text++ = (char)u;
|
831
|
+
#if HAS_PRIVATE_ENCODING
|
832
|
+
} else if (ox_utf8_encoding == pi->options->rb_enc ||
|
833
|
+
0 == strcasecmp(rb_str_ptr(rb_String(ox_utf8_encoding)), rb_str_ptr(rb_String(pi->options->rb_enc)))) {
|
834
|
+
#else
|
831
835
|
} else if (ox_utf8_encoding == pi->options->rb_enc) {
|
836
|
+
#endif
|
832
837
|
text = ucs_to_utf8_chars(text, u);
|
838
|
+
#if HAS_PRIVATE_ENCODING
|
839
|
+
} else if (Qnil == pi->options->rb_enc) {
|
840
|
+
#else
|
833
841
|
} else if (0 == pi->options->rb_enc) {
|
842
|
+
#endif
|
834
843
|
pi->options->rb_enc = ox_utf8_encoding;
|
835
844
|
text = ucs_to_utf8_chars(text, u);
|
836
845
|
} else {
|
@@ -888,10 +897,19 @@ collapse_special(PInfo pi, char *str) {
|
|
888
897
|
}
|
889
898
|
if (u <= 0x000000000000007FULL) {
|
890
899
|
*b++ = (char)u;
|
900
|
+
#if HAS_PRIVATE_ENCODING
|
901
|
+
} else if (ox_utf8_encoding == pi->options->rb_enc ||
|
902
|
+
0 == strcasecmp(rb_str_ptr(rb_String(ox_utf8_encoding)), rb_str_ptr(rb_String(pi->options->rb_enc)))) {
|
903
|
+
#else
|
891
904
|
} else if (ox_utf8_encoding == pi->options->rb_enc) {
|
905
|
+
#endif
|
892
906
|
b = ucs_to_utf8_chars(b, u);
|
893
907
|
/* TBD support UTF-16 */
|
908
|
+
#if HAS_PRIVATE_ENCODING
|
909
|
+
} else if (Qnil == pi->options->rb_enc) {
|
910
|
+
#else
|
894
911
|
} else if (0 == pi->options->rb_enc) {
|
912
|
+
#endif
|
895
913
|
pi->options->rb_enc = ox_utf8_encoding;
|
896
914
|
b = ucs_to_utf8_chars(b, u);
|
897
915
|
} else {
|
data/ext/ox/sax.c
CHANGED
@@ -74,6 +74,8 @@ typedef struct _SaxDrive {
|
|
74
74
|
int has_error;
|
75
75
|
#if HAS_ENCODING_SUPPORT
|
76
76
|
rb_encoding *encoding;
|
77
|
+
#elif HAS_PRIVATE_ENCODING
|
78
|
+
VALUE encoding;
|
77
79
|
#endif
|
78
80
|
} *SaxDrive;
|
79
81
|
|
@@ -214,6 +216,15 @@ str2sym(const char *str, SaxDrive dr) {
|
|
214
216
|
} else {
|
215
217
|
sym = ID2SYM(rb_intern(str));
|
216
218
|
}
|
219
|
+
#elif HAS_PRIVATE_ENCODING
|
220
|
+
if (Qnil != dr->encoding) {
|
221
|
+
VALUE rstr = rb_str_new2(str);
|
222
|
+
|
223
|
+
rb_funcall(rstr, ox_force_encoding_id, 1, dr->encoding);
|
224
|
+
sym = rb_funcall(rstr, ox_to_sym_id, 0);
|
225
|
+
} else {
|
226
|
+
sym = ID2SYM(rb_intern(str));
|
227
|
+
}
|
217
228
|
#else
|
218
229
|
sym = ID2SYM(rb_intern(str));
|
219
230
|
#endif
|
@@ -339,6 +350,18 @@ sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, int convert) {
|
|
339
350
|
} else {
|
340
351
|
dr->encoding = rb_enc_find(ox_default_options.encoding);
|
341
352
|
}
|
353
|
+
#elif HAS_PRIVATE_ENCODING
|
354
|
+
if ('\0' == *ox_default_options.encoding) {
|
355
|
+
VALUE encoding;
|
356
|
+
|
357
|
+
if (rb_respond_to(io, ox_external_encoding_id) && Qnil != (encoding = rb_funcall(io, ox_external_encoding_id, 0))) {
|
358
|
+
dr->encoding = encoding;
|
359
|
+
} else {
|
360
|
+
dr->encoding = Qnil;
|
361
|
+
}
|
362
|
+
} else {
|
363
|
+
dr->encoding = rb_str_new2(ox_default_options.encoding);
|
364
|
+
}
|
342
365
|
#endif
|
343
366
|
}
|
344
367
|
|
@@ -412,7 +435,6 @@ static int
|
|
412
435
|
read_children(SaxDrive dr, int first) {
|
413
436
|
int err = 0;
|
414
437
|
int element_read = !first;
|
415
|
-
int doctype_read = !first;
|
416
438
|
char c;
|
417
439
|
|
418
440
|
while (!err) {
|
@@ -423,6 +445,8 @@ read_children(SaxDrive dr, int first) {
|
|
423
445
|
if (0xBB == (uint8_t)sax_drive_get(dr) && 0xBF == (uint8_t)sax_drive_get(dr)) {
|
424
446
|
#if HAS_ENCODING_SUPPORT
|
425
447
|
dr->encoding = ox_utf8_encoding;
|
448
|
+
#elif HAS_PRIVATE_ENCODING
|
449
|
+
dr->encoding = ox_utf8_encoding;
|
426
450
|
#endif
|
427
451
|
c = sax_drive_get(dr);
|
428
452
|
} else {
|
@@ -477,7 +501,6 @@ read_children(SaxDrive dr, int first) {
|
|
477
501
|
if (element_read || !first) {
|
478
502
|
sax_drive_error(dr, "invalid format, DOCTYPE can not come after an element", 0);
|
479
503
|
}
|
480
|
-
doctype_read = 1;
|
481
504
|
err = read_doctype(dr);
|
482
505
|
} else if (0 == strncmp("[CDATA[", dr->str, 7)) {
|
483
506
|
err = read_cdata(dr);
|
@@ -575,6 +598,10 @@ read_instruction(SaxDrive dr) {
|
|
575
598
|
if (0 != dr->encoding) {
|
576
599
|
rb_enc_associate(args[0], dr->encoding);
|
577
600
|
}
|
601
|
+
#elif HAS_PRIVATE_ENCODING
|
602
|
+
if (Qnil != dr->encoding) {
|
603
|
+
rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
|
604
|
+
}
|
578
605
|
#endif
|
579
606
|
rb_funcall2(dr->handler, ox_text_id, 1, args);
|
580
607
|
}
|
@@ -656,6 +683,10 @@ read_cdata(SaxDrive dr) {
|
|
656
683
|
if (0 != dr->encoding) {
|
657
684
|
rb_enc_associate(args[0], dr->encoding);
|
658
685
|
}
|
686
|
+
#elif HAS_PRIVATE_ENCODING
|
687
|
+
if (Qnil != dr->encoding) {
|
688
|
+
rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
|
689
|
+
}
|
659
690
|
#endif
|
660
691
|
rb_funcall2(dr->handler, ox_cdata_id, 1, args);
|
661
692
|
}
|
@@ -700,6 +731,10 @@ read_comment(SaxDrive dr) {
|
|
700
731
|
if (0 != dr->encoding) {
|
701
732
|
rb_enc_associate(args[0], dr->encoding);
|
702
733
|
}
|
734
|
+
#elif HAS_PRIVATE_ENCODING
|
735
|
+
if (Qnil != dr->encoding) {
|
736
|
+
rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
|
737
|
+
}
|
703
738
|
#endif
|
704
739
|
rb_funcall2(dr->handler, ox_comment_id, 1, args);
|
705
740
|
}
|
@@ -716,11 +751,13 @@ read_element(SaxDrive dr) {
|
|
716
751
|
VALUE name = Qnil;
|
717
752
|
const char *err;
|
718
753
|
char c;
|
754
|
+
char *ename = 0;
|
719
755
|
int closed;
|
720
756
|
|
721
757
|
if ('\0' == (c = read_name_token(dr))) {
|
722
758
|
return -1;
|
723
759
|
}
|
760
|
+
ename = dr->str;
|
724
761
|
name = str2sym(dr->str, dr);
|
725
762
|
if (dr->has_start_element) {
|
726
763
|
VALUE args[1];
|
@@ -757,7 +794,7 @@ read_element(SaxDrive dr) {
|
|
757
794
|
if (0 != read_children(dr, 0)) {
|
758
795
|
return -1;
|
759
796
|
}
|
760
|
-
if (0 != strcmp(dr->str,
|
797
|
+
if (0 != strcmp(dr->str, ename)) {
|
761
798
|
sax_drive_error(dr, "invalid format, element start and end names do not match", 1);
|
762
799
|
return -1;
|
763
800
|
}
|
@@ -804,6 +841,10 @@ read_text(SaxDrive dr) {
|
|
804
841
|
if (0 != dr->encoding) {
|
805
842
|
rb_enc_associate(args[0], dr->encoding);
|
806
843
|
}
|
844
|
+
#elif HAS_PRIVATE_ENCODING
|
845
|
+
if (Qnil != dr->encoding) {
|
846
|
+
rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
|
847
|
+
}
|
807
848
|
#endif
|
808
849
|
rb_funcall2(dr->handler, ox_text_id, 1, args);
|
809
850
|
}
|
@@ -848,6 +889,8 @@ read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml) {
|
|
848
889
|
if (is_encoding) {
|
849
890
|
#if HAS_ENCODING_SUPPORT
|
850
891
|
dr->encoding = rb_enc_find(dr->str);
|
892
|
+
#elif HAS_PRIVATE_ENCODING
|
893
|
+
dr->encoding = rb_str_new2(dr->str);
|
851
894
|
#endif
|
852
895
|
is_encoding = 0;
|
853
896
|
}
|
@@ -869,6 +912,10 @@ read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml) {
|
|
869
912
|
if (0 != dr->encoding) {
|
870
913
|
rb_enc_associate(args[1], dr->encoding);
|
871
914
|
}
|
915
|
+
#elif HAS_PRIVATE_ENCODING
|
916
|
+
if (Qnil != dr->encoding) {
|
917
|
+
rb_funcall(args[1], ox_force_encoding_id, 1, dr->encoding);
|
918
|
+
}
|
872
919
|
#endif
|
873
920
|
rb_funcall2(dr->handler, ox_attr_id, 2, args);
|
874
921
|
}
|
@@ -1216,6 +1263,10 @@ sax_value_as_s(VALUE self) {
|
|
1216
1263
|
if (0 != dr->encoding) {
|
1217
1264
|
rb_enc_associate(rs, dr->encoding);
|
1218
1265
|
}
|
1266
|
+
#elif HAS_PRIVATE_ENCODING
|
1267
|
+
if (Qnil != dr->encoding) {
|
1268
|
+
rb_funcall(rs, ox_force_encoding_id, 1, dr->encoding);
|
1269
|
+
}
|
1219
1270
|
#endif
|
1220
1271
|
return rs;
|
1221
1272
|
}
|
data/lib/ox/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ox
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.8.
|
4
|
+
version: 1.8.5
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-02-03 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
14
|
description: ! "A fast XML parser and object serializer that uses only standard C
|
15
15
|
lib.\n \nOptimized XML (Ox), as the name implies was written to provide
|