ox 1.6.6 → 1.6.7
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of ox might be problematic. Click here for more details.
- data/README.md +2 -2
- data/ext/ox/gen_load.c +21 -21
- data/ext/ox/obj_load.c +8 -8
- data/ext/ox/ox.c +27 -4
- data/ext/ox/ox.h +6 -5
- data/ext/ox/parse.c +6 -7
- data/ext/ox/sax.c +7 -1
- data/lib/ox/version.rb +1 -1
- metadata +2 -2
data/README.md
CHANGED
@@ -34,9 +34,9 @@ A fast XML parser and Object marshaller as a Ruby gem.
|
|
34
34
|
|
35
35
|
## <a name="release">Release Notes</a>
|
36
36
|
|
37
|
-
### Release 1.6.
|
37
|
+
### Release 1.6.7
|
38
38
|
|
39
|
-
-
|
39
|
+
- Now uses the encoding of the imput XML as the default encoding for the parsed output if the default options encoding is not set and the encoding is not set in the XML file prolog.
|
40
40
|
|
41
41
|
## <a name="description">Description</a>
|
42
42
|
|
data/ext/ox/gen_load.c
CHANGED
@@ -116,10 +116,10 @@ create_prolog_doc(PInfo pi, const char *target, Attr attrs) {
|
|
116
116
|
for (; 0 != attrs->name; attrs++) {
|
117
117
|
if (Yes == pi->options->sym_keys) {
|
118
118
|
#if HAS_ENCODING_SUPPORT
|
119
|
-
if (0 != pi->
|
119
|
+
if (0 != pi->options->rb_enc) {
|
120
120
|
VALUE rstr = rb_str_new2(attrs->name);
|
121
121
|
|
122
|
-
rb_enc_associate(rstr, pi->
|
122
|
+
rb_enc_associate(rstr, pi->options->rb_enc);
|
123
123
|
sym = rb_funcall(rstr, ox_to_sym_id, 0);
|
124
124
|
} else {
|
125
125
|
sym = ID2SYM(rb_intern(attrs->name));
|
@@ -132,15 +132,15 @@ create_prolog_doc(PInfo pi, const char *target, Attr attrs) {
|
|
132
132
|
VALUE rstr = rb_str_new2(attrs->name);
|
133
133
|
|
134
134
|
#if HAS_ENCODING_SUPPORT
|
135
|
-
if (0 != pi->
|
136
|
-
rb_enc_associate(rstr, pi->
|
135
|
+
if (0 != pi->options->rb_enc) {
|
136
|
+
rb_enc_associate(rstr, pi->options->rb_enc);
|
137
137
|
}
|
138
138
|
#endif
|
139
139
|
rb_hash_aset(ah, rstr, rb_str_new2(attrs->value));
|
140
140
|
}
|
141
141
|
#if HAS_ENCODING_SUPPORT
|
142
142
|
if (0 == strcmp("encoding", attrs->name)) {
|
143
|
-
pi->
|
143
|
+
pi->options->rb_enc = rb_enc_find(attrs->value);
|
144
144
|
}
|
145
145
|
#endif
|
146
146
|
}
|
@@ -210,8 +210,8 @@ add_doctype(PInfo pi, const char *docType) {
|
|
210
210
|
VALUE s = rb_str_new2(docType);
|
211
211
|
|
212
212
|
#if HAS_ENCODING_SUPPORT
|
213
|
-
if (0 != pi->
|
214
|
-
rb_enc_associate(s, pi->
|
213
|
+
if (0 != pi->options->rb_enc) {
|
214
|
+
rb_enc_associate(s, pi->options->rb_enc);
|
215
215
|
}
|
216
216
|
#endif
|
217
217
|
rb_ivar_set(n, ox_at_value_id, s);
|
@@ -227,8 +227,8 @@ add_comment(PInfo pi, const char *comment) {
|
|
227
227
|
VALUE s = rb_str_new2(comment);
|
228
228
|
|
229
229
|
#if HAS_ENCODING_SUPPORT
|
230
|
-
if (0 != pi->
|
231
|
-
rb_enc_associate(s, pi->
|
230
|
+
if (0 != pi->options->rb_enc) {
|
231
|
+
rb_enc_associate(s, pi->options->rb_enc);
|
232
232
|
}
|
233
233
|
#endif
|
234
234
|
rb_ivar_set(n, ox_at_value_id, s);
|
@@ -244,8 +244,8 @@ add_cdata(PInfo pi, const char *cdata, size_t len) {
|
|
244
244
|
VALUE s = rb_str_new2(cdata);
|
245
245
|
|
246
246
|
#if HAS_ENCODING_SUPPORT
|
247
|
-
if (0 != pi->
|
248
|
-
rb_enc_associate(s, pi->
|
247
|
+
if (0 != pi->options->rb_enc) {
|
248
|
+
rb_enc_associate(s, pi->options->rb_enc);
|
249
249
|
}
|
250
250
|
#endif
|
251
251
|
rb_ivar_set(n, ox_at_value_id, s);
|
@@ -260,8 +260,8 @@ add_text(PInfo pi, char *text, int closed) {
|
|
260
260
|
VALUE s = rb_str_new2(text);
|
261
261
|
|
262
262
|
#if HAS_ENCODING_SUPPORT
|
263
|
-
if (0 != pi->
|
264
|
-
rb_enc_associate(s, pi->
|
263
|
+
if (0 != pi->options->rb_enc) {
|
264
|
+
rb_enc_associate(s, pi->options->rb_enc);
|
265
265
|
}
|
266
266
|
#endif
|
267
267
|
if (0 == pi->h) { /* top level object */
|
@@ -276,8 +276,8 @@ add_element(PInfo pi, const char *ename, Attr attrs, int hasChildren) {
|
|
276
276
|
VALUE s = rb_str_new2(ename);
|
277
277
|
|
278
278
|
#if HAS_ENCODING_SUPPORT
|
279
|
-
if (0 != pi->
|
280
|
-
rb_enc_associate(s, pi->
|
279
|
+
if (0 != pi->options->rb_enc) {
|
280
|
+
rb_enc_associate(s, pi->options->rb_enc);
|
281
281
|
}
|
282
282
|
#endif
|
283
283
|
e = rb_obj_alloc(ox_element_clas);
|
@@ -292,10 +292,10 @@ add_element(PInfo pi, const char *ename, Attr attrs, int hasChildren) {
|
|
292
292
|
if (Yes == pi->options->sym_keys) {
|
293
293
|
if (Qundef == (sym = ox_cache_get(ox_symbol_cache, attrs->name, &slot))) {
|
294
294
|
#if HAS_ENCODING_SUPPORT
|
295
|
-
if (0 != pi->
|
295
|
+
if (0 != pi->options->rb_enc) {
|
296
296
|
VALUE rstr = rb_str_new2(attrs->name);
|
297
297
|
|
298
|
-
rb_enc_associate(rstr, pi->
|
298
|
+
rb_enc_associate(rstr, pi->options->rb_enc);
|
299
299
|
sym = rb_funcall(rstr, ox_to_sym_id, 0);
|
300
300
|
} else {
|
301
301
|
sym = ID2SYM(rb_intern(attrs->name));
|
@@ -308,15 +308,15 @@ add_element(PInfo pi, const char *ename, Attr attrs, int hasChildren) {
|
|
308
308
|
} else {
|
309
309
|
sym = rb_str_new2(attrs->name);
|
310
310
|
#if HAS_ENCODING_SUPPORT
|
311
|
-
if (0 != pi->
|
312
|
-
rb_enc_associate(sym, pi->
|
311
|
+
if (0 != pi->options->rb_enc) {
|
312
|
+
rb_enc_associate(sym, pi->options->rb_enc);
|
313
313
|
}
|
314
314
|
#endif
|
315
315
|
}
|
316
316
|
s = rb_str_new2(attrs->value);
|
317
317
|
#if HAS_ENCODING_SUPPORT
|
318
|
-
if (0 != pi->
|
319
|
-
rb_enc_associate(s, pi->
|
318
|
+
if (0 != pi->options->rb_enc) {
|
319
|
+
rb_enc_associate(s, pi->options->rb_enc);
|
320
320
|
}
|
321
321
|
#endif
|
322
322
|
rb_hash_aset(ah, sym, s);
|
data/ext/ox/obj_load.c
CHANGED
@@ -399,7 +399,7 @@ instruct(PInfo pi, const char *target, Attr attrs) {
|
|
399
399
|
#if HAS_ENCODING_SUPPORT
|
400
400
|
for (; 0 != attrs->name; attrs++) {
|
401
401
|
if (0 == strcmp("encoding", attrs->name)) {
|
402
|
-
pi->
|
402
|
+
pi->options->rb_enc = rb_enc_find(attrs->value);
|
403
403
|
}
|
404
404
|
}
|
405
405
|
#endif
|
@@ -422,8 +422,8 @@ add_text(PInfo pi, char *text, int closed) {
|
|
422
422
|
case StringCode:
|
423
423
|
pi->h->obj = rb_str_new2(text);
|
424
424
|
#if HAS_ENCODING_SUPPORT
|
425
|
-
if (0 != pi->
|
426
|
-
rb_enc_associate(pi->h->obj, pi->
|
425
|
+
if (0 != pi->options->rb_enc) {
|
426
|
+
rb_enc_associate(pi->h->obj, pi->options->rb_enc);
|
427
427
|
}
|
428
428
|
#endif
|
429
429
|
if (0 != pi->circ_array) {
|
@@ -463,7 +463,7 @@ add_text(PInfo pi, char *text, int closed) {
|
|
463
463
|
VALUE *slot;
|
464
464
|
|
465
465
|
if (Qundef == (sym = ox_cache_get(ox_symbol_cache, text, &slot))) {
|
466
|
-
sym = str2sym(text, pi->
|
466
|
+
sym = str2sym(text, pi->options->rb_enc);
|
467
467
|
*slot = sym;
|
468
468
|
}
|
469
469
|
pi->h->obj = sym;
|
@@ -489,8 +489,8 @@ add_text(PInfo pi, char *text, int closed) {
|
|
489
489
|
from_base64(text, (uchar*)str);
|
490
490
|
v = rb_str_new(str, str_size);
|
491
491
|
#if HAS_ENCODING_SUPPORT
|
492
|
-
if (0 != pi->
|
493
|
-
rb_enc_associate(v, pi->
|
492
|
+
if (0 != pi->options->rb_enc) {
|
493
|
+
rb_enc_associate(v, pi->options->rb_enc);
|
494
494
|
}
|
495
495
|
#endif
|
496
496
|
if (0 != pi->circ_array) {
|
@@ -508,7 +508,7 @@ add_text(PInfo pi, char *text, int closed) {
|
|
508
508
|
|
509
509
|
from_base64(text, (uchar*)str);
|
510
510
|
if (Qundef == (sym = ox_cache_get(ox_symbol_cache, str, &slot))) {
|
511
|
-
sym = str2sym(str, pi->
|
511
|
+
sym = str2sym(str, pi->options->rb_enc);
|
512
512
|
*slot = sym;
|
513
513
|
}
|
514
514
|
pi->h->obj = sym;
|
@@ -572,7 +572,7 @@ add_element(PInfo pi, const char *ename, Attr attrs, int hasChildren) {
|
|
572
572
|
}
|
573
573
|
h = pi->h;
|
574
574
|
h->type = *ename;
|
575
|
-
h->var = get_var_sym_from_attrs(attrs, pi->
|
575
|
+
h->var = get_var_sym_from_attrs(attrs, pi->options->rb_enc);
|
576
576
|
switch (h->type) {
|
577
577
|
case NilClassCode:
|
578
578
|
h->obj = Qnil;
|
data/ext/ox/ox.c
CHANGED
@@ -62,6 +62,7 @@ ID ox_end_element_id;
|
|
62
62
|
ID ox_end_id;
|
63
63
|
ID ox_error_id;
|
64
64
|
ID ox_excl_id;
|
65
|
+
ID ox_external_encoding_id;
|
65
66
|
ID ox_fileno_id;
|
66
67
|
ID ox_inspect_id;
|
67
68
|
ID ox_instruct_id;
|
@@ -145,7 +146,8 @@ struct _Options ox_default_options = {
|
|
145
146
|
No, /* xsd_date */
|
146
147
|
NoMode, /* mode */
|
147
148
|
StrictEffort, /* effort */
|
148
|
-
Yes
|
149
|
+
Yes, /* sym_keys */
|
150
|
+
0 /* rb_enc */
|
149
151
|
};
|
150
152
|
|
151
153
|
extern ParseCallbacks ox_obj_callbacks;
|
@@ -241,6 +243,9 @@ set_def_opts(VALUE self, VALUE opts) {
|
|
241
243
|
} else {
|
242
244
|
Check_Type(v, T_STRING);
|
243
245
|
strncpy(ox_default_options.encoding, StringValuePtr(v), sizeof(ox_default_options.encoding) - 1);
|
246
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
247
|
+
ox_default_options.rb_enc = rb_enc_find(ox_default_options.encoding);
|
248
|
+
#endif
|
244
249
|
}
|
245
250
|
|
246
251
|
v = rb_hash_aref(opts, indent_sym);
|
@@ -357,7 +362,7 @@ to_gen(VALUE self, VALUE ruby_xml) {
|
|
357
362
|
}
|
358
363
|
|
359
364
|
static VALUE
|
360
|
-
load(char *xml, int argc, VALUE *argv, VALUE self) {
|
365
|
+
load(char *xml, int argc, VALUE *argv, VALUE self, VALUE encoding) {
|
361
366
|
VALUE obj;
|
362
367
|
struct _Options options = ox_default_options;
|
363
368
|
|
@@ -397,6 +402,17 @@ load(char *xml, int argc, VALUE *argv, VALUE self) {
|
|
397
402
|
options.sym_keys = (Qfalse == v) ? No : Yes;
|
398
403
|
}
|
399
404
|
}
|
405
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
406
|
+
if ('\0' == *options.encoding) {
|
407
|
+
if (Qnil != encoding) {
|
408
|
+
options.rb_enc = rb_enc_from_index(rb_enc_get_index(encoding));
|
409
|
+
} else {
|
410
|
+
options.rb_enc = 0;
|
411
|
+
}
|
412
|
+
} else if (0 == options.rb_enc) {
|
413
|
+
options.rb_enc = rb_enc_find(options.encoding);
|
414
|
+
}
|
415
|
+
#endif
|
400
416
|
switch (options.mode) {
|
401
417
|
case ObjMode:
|
402
418
|
obj = ox_parse(xml, ox_obj_callbacks, 0, &options);
|
@@ -440,6 +456,7 @@ load_str(int argc, VALUE *argv, VALUE self) {
|
|
440
456
|
char *xml;
|
441
457
|
size_t len;
|
442
458
|
VALUE obj;
|
459
|
+
VALUE encoding;
|
443
460
|
|
444
461
|
Check_Type(*argv, T_STRING);
|
445
462
|
/* the xml string gets modified so make a copy of it */
|
@@ -449,8 +466,13 @@ load_str(int argc, VALUE *argv, VALUE self) {
|
|
449
466
|
} else {
|
450
467
|
xml = ALLOCA_N(char, len);
|
451
468
|
}
|
469
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
470
|
+
encoding = rb_obj_encoding(*argv);
|
471
|
+
#else
|
472
|
+
encoding = Qnil;
|
473
|
+
#endif
|
452
474
|
strcpy(xml, StringValuePtr(*argv));
|
453
|
-
obj = load(xml, argc - 1, argv + 1, self);
|
475
|
+
obj = load(xml, argc - 1, argv + 1, self, encoding);
|
454
476
|
if (SMALL_XML < len) {
|
455
477
|
xfree(xml);
|
456
478
|
}
|
@@ -502,7 +524,7 @@ load_file(int argc, VALUE *argv, VALUE self) {
|
|
502
524
|
}
|
503
525
|
fclose(f);
|
504
526
|
xml[len] = '\0';
|
505
|
-
obj = load(xml, argc - 1, argv + 1, self);
|
527
|
+
obj = load(xml, argc - 1, argv + 1, self, Qnil);
|
506
528
|
if (SMALL_XML < len) {
|
507
529
|
xfree(xml);
|
508
530
|
}
|
@@ -706,6 +728,7 @@ void Init_ox() {
|
|
706
728
|
ox_comment_id = rb_intern("comment");
|
707
729
|
ox_den_id = rb_intern("@den");
|
708
730
|
ox_doctype_id = rb_intern("doctype");
|
731
|
+
ox_external_encoding_id = rb_intern("external_encoding");
|
709
732
|
ox_end_element_id = rb_intern("end_element");
|
710
733
|
ox_end_id = rb_intern("@end");
|
711
734
|
ox_error_id = rb_intern("error");
|
data/ext/ox/ox.h
CHANGED
@@ -186,6 +186,11 @@ typedef struct _Options {
|
|
186
186
|
char mode; /* LoadMode */
|
187
187
|
char effort; /* Effort */
|
188
188
|
char sym_keys; /* symbolize keys */
|
189
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
190
|
+
rb_encoding *rb_enc;
|
191
|
+
#else
|
192
|
+
void *rb_enc;
|
193
|
+
#endif
|
189
194
|
} *Options;
|
190
195
|
|
191
196
|
/* parse information structure */
|
@@ -197,11 +202,6 @@ struct _PInfo {
|
|
197
202
|
VALUE obj;
|
198
203
|
ParseCallbacks pcb;
|
199
204
|
CircArray circ_array;
|
200
|
-
#ifdef HAVE_RUBY_ENCODING_H
|
201
|
-
rb_encoding *encoding;
|
202
|
-
#else
|
203
|
-
void *encoding;
|
204
|
-
#endif
|
205
205
|
unsigned long id; /* set for text types when cirs_array is set */
|
206
206
|
Options options;
|
207
207
|
};
|
@@ -234,6 +234,7 @@ extern ID ox_end_element_id;
|
|
234
234
|
extern ID ox_end_id;
|
235
235
|
extern ID ox_error_id;
|
236
236
|
extern ID ox_excl_id;
|
237
|
+
extern ID ox_external_encoding_id;
|
237
238
|
extern ID ox_fileno_id;
|
238
239
|
extern ID ox_inspect_id;
|
239
240
|
extern ID ox_instruct_id;
|
data/ext/ox/parse.c
CHANGED
@@ -116,7 +116,6 @@ ox_parse(char *xml, ParseCallbacks pcb, char **endp, Options options) {
|
|
116
116
|
pi.pcb = pcb;
|
117
117
|
pi.obj = Qnil;
|
118
118
|
pi.circ_array = 0;
|
119
|
-
pi.encoding = 0;
|
120
119
|
pi.options = options;
|
121
120
|
while (1) {
|
122
121
|
next_non_white(&pi); /* skip white space */
|
@@ -797,10 +796,10 @@ read_coded_chars(PInfo pi, char *text) {
|
|
797
796
|
pi->s = s;
|
798
797
|
if (u <= 0x000000000000007FULL) {
|
799
798
|
*text++ = (char)u;
|
800
|
-
} else if (ox_utf8_encoding == pi->
|
799
|
+
} else if (ox_utf8_encoding == pi->options->rb_enc) {
|
801
800
|
text = ucs_to_utf8_chars(text, u);
|
802
|
-
} else if (0 == pi->
|
803
|
-
pi->
|
801
|
+
} else if (0 == pi->options->rb_enc) {
|
802
|
+
pi->options->rb_enc = ox_utf8_encoding;
|
804
803
|
text = ucs_to_utf8_chars(text, u);
|
805
804
|
} else {
|
806
805
|
/*raise_error("Invalid encoding, need UTF-8 or UTF-16 encoding to parse &#nnnn; character sequences.", pi->str, pi->s); */
|
@@ -857,11 +856,11 @@ collapse_special(PInfo pi, char *str) {
|
|
857
856
|
}
|
858
857
|
if (u <= 0x000000000000007FULL) {
|
859
858
|
*b++ = (char)u;
|
860
|
-
} else if (ox_utf8_encoding == pi->
|
859
|
+
} else if (ox_utf8_encoding == pi->options->rb_enc) {
|
861
860
|
b = ucs_to_utf8_chars(b, u);
|
862
861
|
/* TBD support UTF-16 */
|
863
|
-
} else if (0 == pi->
|
864
|
-
pi->
|
862
|
+
} else if (0 == pi->options->rb_enc) {
|
863
|
+
pi->options->rb_enc = ox_utf8_encoding;
|
865
864
|
b = ucs_to_utf8_chars(b, u);
|
866
865
|
} else {
|
867
866
|
/* raise_error("Invalid encoding, need UTF-8 or UTF-16 encoding to parse &#nnnn; character sequences.", pi->str, pi->s);*/
|
data/ext/ox/sax.c
CHANGED
@@ -321,7 +321,13 @@ sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, int convert) {
|
|
321
321
|
dr->has_error = respond_to(handler, ox_error_id);
|
322
322
|
#if HAS_ENCODING_SUPPORT
|
323
323
|
if ('\0' == *ox_default_options.encoding) {
|
324
|
-
|
324
|
+
VALUE encoding;
|
325
|
+
|
326
|
+
if (rb_respond_to(io, ox_external_encoding_id) && Qnil != (encoding = rb_funcall(io, ox_external_encoding_id, 0))) {
|
327
|
+
dr->encoding = rb_enc_from_index(rb_enc_get_index(encoding));
|
328
|
+
} else {
|
329
|
+
dr->encoding = 0;
|
330
|
+
}
|
325
331
|
} else {
|
326
332
|
dr->encoding = rb_enc_find(ox_default_options.encoding);
|
327
333
|
}
|
data/lib/ox/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ox
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.6.
|
4
|
+
version: 1.6.7
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-11-
|
12
|
+
date: 2012-11-15 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
14
|
description: ! "A fast XML parser and object serializer that uses only standard C
|
15
15
|
lib.\n \nOptimized XML (Ox), as the name implies was written to provide
|