ox 2.2.4 → 2.3.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of ox might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/README.md +16 -9
- data/ext/ox/dump.c +34 -10
- data/ext/ox/ox.c +160 -6
- data/ext/ox/ox.h +3 -0
- data/ext/ox/parse.c +65 -20
- data/ext/ox/sax.c +9 -0
- data/ext/ox/sax.h +1 -0
- data/ext/ox/sax_buf.c +7 -4
- data/lib/ox/element.rb +12 -3
- data/lib/ox/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1bf8bb0abb35f28192d986125010bca5dac84737
|
4
|
+
data.tar.gz: edfeba0e9a4b9892f4de6b127d7717795a01f007
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9428e17927f1e89e3704401f27c40523c84ad9900826beef3df85af5f71411be9bd6a450a339dff24de28de36ece8c335235b055281b21dd4ca22a2859922dc0
|
7
|
+
data.tar.gz: 853e58cc23f118defd6eafb1c64483cf0a922572a9befde6edb54e4be72d6671ddfc97c4093afa31590f6c239a1cda852c5980d2e27d39e9eb33984492258bc5
|
data/README.md
CHANGED
@@ -34,20 +34,27 @@ A fast XML parser and Object marshaller as a Ruby gem.
|
|
34
34
|
|
35
35
|
## Release Notes
|
36
36
|
|
37
|
-
### Release 2.
|
37
|
+
### Release 2.3.0
|
38
38
|
|
39
|
-
-
|
40
|
-
|
39
|
+
- Added Oj::Element.replace_text() method.
|
40
|
+
|
41
|
+
- Oj::Element nodes variable is now always initialized to an empty Array.
|
42
|
+
|
43
|
+
- Oj::Element attributes variable is now always initialized to an empty Hash.
|
41
44
|
|
42
|
-
|
45
|
+
- A invalid_replace option has been added. It will replace invalid XML
|
46
|
+
character with a provided string. Strict effort now raises an exception if an
|
47
|
+
invalid character is encountered on dump or load.
|
43
48
|
|
44
|
-
-
|
45
|
-
the
|
49
|
+
- Oj.load and Oj.parse now allow for a callback block to handle multiple top
|
50
|
+
level entities in the input.
|
46
51
|
|
47
|
-
- The
|
48
|
-
SAX parser.
|
52
|
+
- The Oj SAX parser now supports strings as input directly without and IO wrapper.
|
49
53
|
|
50
|
-
|
54
|
+
### Release 2.2.4
|
55
|
+
|
56
|
+
- Changed the code to allow compilation on older compilers. No change in
|
57
|
+
functionality otherwise.
|
51
58
|
|
52
59
|
## Description
|
53
60
|
|
data/ext/ox/dump.c
CHANGED
@@ -78,8 +78,10 @@ static int is_xml_friendly(const uchar *str, int len);
|
|
78
78
|
|
79
79
|
static const char hex_chars[17] = "0123456789abcdef";
|
80
80
|
|
81
|
+
// The : character is equivalent to 10. Used for replacement characters up to 10
|
82
|
+
// characters long such as ''.
|
81
83
|
static char xml_friendly_chars[257] = "\
|
82
|
-
|
84
|
+
:::::::::11::1::::::::::::::::::\
|
83
85
|
11611156111111111111111111114141\
|
84
86
|
11111111111111111111111111111111\
|
85
87
|
11111111111111111111111111111111\
|
@@ -315,7 +317,7 @@ dump_value(Out out, const char *value, size_t size) {
|
|
315
317
|
inline static void
|
316
318
|
dump_str_value(Out out, const char *value, size_t size) {
|
317
319
|
size_t xsize = xml_str_len((const uchar*)value, size);
|
318
|
-
|
320
|
+
|
319
321
|
if (out->end - out->cur <= (long)xsize) {
|
320
322
|
grow(out, xsize);
|
321
323
|
}
|
@@ -323,42 +325,63 @@ dump_str_value(Out out, const char *value, size_t size) {
|
|
323
325
|
if ('1' == xml_friendly_chars[(uchar)*value]) {
|
324
326
|
*out->cur++ = *value;
|
325
327
|
} else {
|
326
|
-
*out->cur++ = '&';
|
327
328
|
switch (*value) {
|
328
329
|
case '"':
|
330
|
+
*out->cur++ = '&';
|
329
331
|
*out->cur++ = 'q';
|
330
332
|
*out->cur++ = 'u';
|
331
333
|
*out->cur++ = 'o';
|
332
334
|
*out->cur++ = 't';
|
335
|
+
*out->cur++ = ';';
|
333
336
|
break;
|
334
337
|
case '&':
|
338
|
+
*out->cur++ = '&';
|
335
339
|
*out->cur++ = 'a';
|
336
340
|
*out->cur++ = 'm';
|
337
341
|
*out->cur++ = 'p';
|
342
|
+
*out->cur++ = ';';
|
338
343
|
break;
|
339
344
|
case '\'':
|
345
|
+
*out->cur++ = '&';
|
340
346
|
*out->cur++ = 'a';
|
341
347
|
*out->cur++ = 'p';
|
342
348
|
*out->cur++ = 'o';
|
343
349
|
*out->cur++ = 's';
|
350
|
+
*out->cur++ = ';';
|
344
351
|
break;
|
345
352
|
case '<':
|
353
|
+
*out->cur++ = '&';
|
346
354
|
*out->cur++ = 'l';
|
347
355
|
*out->cur++ = 't';
|
356
|
+
*out->cur++ = ';';
|
348
357
|
break;
|
349
358
|
case '>':
|
359
|
+
*out->cur++ = '&';
|
350
360
|
*out->cur++ = 'g';
|
351
361
|
*out->cur++ = 't';
|
362
|
+
*out->cur++ = ';';
|
352
363
|
break;
|
353
364
|
default:
|
354
|
-
|
355
|
-
|
356
|
-
|
357
|
-
|
358
|
-
|
365
|
+
// Must be one of the invalid characters.
|
366
|
+
if (StrictEffort == out->opts->effort) {
|
367
|
+
rb_raise(rb_eSyntaxError, "'\\#x%02x' is not a valid XML character.", *value);
|
368
|
+
}
|
369
|
+
if (Yes == out->opts->allow_invalid) {
|
370
|
+
*out->cur++ = '&';
|
371
|
+
*out->cur++ = '#';
|
372
|
+
*out->cur++ = 'x';
|
373
|
+
*out->cur++ = '0';
|
374
|
+
*out->cur++ = '0';
|
375
|
+
dump_hex(*value, out);
|
376
|
+
*out->cur++ = ';';
|
377
|
+
} else if ('\0' != *out->opts->inv_repl) {
|
378
|
+
// If the empty string then ignore. The first character of
|
379
|
+
// the replacement is the length.
|
380
|
+
memcpy(out->cur, out->opts->inv_repl + 1, (size_t)*out->opts->inv_repl);
|
381
|
+
out->cur += *out->opts->inv_repl;
|
382
|
+
}
|
359
383
|
break;
|
360
384
|
}
|
361
|
-
*out->cur++ = ';';
|
362
385
|
}
|
363
386
|
}
|
364
387
|
*out->cur = '\0';
|
@@ -1016,7 +1039,7 @@ dump_gen_element(VALUE obj, int depth, Out out) {
|
|
1016
1039
|
if (Qnil != attrs) {
|
1017
1040
|
rb_hash_foreach(attrs, dump_gen_attr, (VALUE)out);
|
1018
1041
|
}
|
1019
|
-
if (Qnil != nodes) {
|
1042
|
+
if (Qnil != nodes && 0 < RARRAY_LEN(nodes)) {
|
1020
1043
|
int do_indent;
|
1021
1044
|
|
1022
1045
|
*out->cur++ = '>';
|
@@ -1199,6 +1222,7 @@ dump_obj_to_xml(VALUE obj, Options copts, Out out) {
|
|
1199
1222
|
ox_cache8_new(&out->circ_cache);
|
1200
1223
|
}
|
1201
1224
|
out->indent = copts->indent;
|
1225
|
+
|
1202
1226
|
if (ox_document_clas == clas) {
|
1203
1227
|
dump_gen_doc(obj, -1, out);
|
1204
1228
|
} else if (ox_element_clas == clas) {
|
data/ext/ox/ox.c
CHANGED
@@ -106,6 +106,7 @@ static VALUE convert_special_sym;
|
|
106
106
|
static VALUE effort_sym;
|
107
107
|
static VALUE generic_sym;
|
108
108
|
static VALUE indent_sym;
|
109
|
+
static VALUE invalid_replace_sym;
|
109
110
|
static VALUE limited_sym;
|
110
111
|
static VALUE mode_sym;
|
111
112
|
static VALUE object_sym;
|
@@ -117,6 +118,7 @@ static VALUE skip_sym;
|
|
117
118
|
static VALUE skip_white_sym;
|
118
119
|
static VALUE smart_sym;
|
119
120
|
static VALUE strict_sym;
|
121
|
+
static VALUE strip_namespace_sym;
|
120
122
|
static VALUE symbolize_keys_sym;
|
121
123
|
static VALUE symbolize_sym;
|
122
124
|
static VALUE tolerant_sym;
|
@@ -126,6 +128,9 @@ static VALUE with_instruct_sym;
|
|
126
128
|
static VALUE with_xml_sym;
|
127
129
|
static VALUE xsd_date_sym;
|
128
130
|
|
131
|
+
static ID encoding_id;
|
132
|
+
static ID has_key_id;
|
133
|
+
|
129
134
|
#if HAS_ENCODING_SUPPORT
|
130
135
|
rb_encoding *ox_utf8_encoding = 0;
|
131
136
|
#elif HAS_PRIVATE_ENCODING
|
@@ -149,6 +154,9 @@ struct _Options ox_default_options = {
|
|
149
154
|
NoSkip, /* skip */
|
150
155
|
No, /* smart */
|
151
156
|
1, /* convert_special */
|
157
|
+
No, /* allow_invalid */
|
158
|
+
{ '\0' }, /* inv_repl */
|
159
|
+
{ '\0' }, /* strip_ns */
|
152
160
|
#if HAS_PRIVATE_ENCODING
|
153
161
|
Qnil /* rb_enc */
|
154
162
|
#else
|
@@ -229,6 +237,8 @@ defuse_bom(char *xml, Options options) {
|
|
229
237
|
* - skip: [:skip_none|:skip_return|:skip_white] determines how to handle white space in text
|
230
238
|
* - smart: [true|false|nil] flag indicating the SAX parser uses hints if available (use with html)
|
231
239
|
* - convert_special: [true|false|nil] flag indicating special characters like < are converted with the SAX parser
|
240
|
+
* - invalid_replace: [nil|String] replacement string for invalid XML characters on dump. nil indicates include anyway as hex. A string, limited to 10 characters will replace the invalid character with the replace.
|
241
|
+
* - strip_namespace: [String|true|false] false or "" results in no namespace stripping. A string of "*" or true will strip all namespaces. Any other non-empty string indicates that matching namespaces will be stripped.
|
232
242
|
* @return [Hash] all current option settings.
|
233
243
|
*
|
234
244
|
* Note that an indent of less than zero will result in a tight one line output
|
@@ -270,6 +280,18 @@ get_def_opts(VALUE self) {
|
|
270
280
|
case SpcSkip: rb_hash_aset(opts, skip_sym, skip_white_sym); break;
|
271
281
|
default: rb_hash_aset(opts, skip_sym, Qnil); break;
|
272
282
|
}
|
283
|
+
if (Yes == ox_default_options.allow_invalid) {
|
284
|
+
rb_hash_aset(opts, invalid_replace_sym, Qnil);
|
285
|
+
} else {
|
286
|
+
rb_hash_aset(opts, invalid_replace_sym, rb_str_new(ox_default_options.inv_repl + 1, (int)*ox_default_options.inv_repl));
|
287
|
+
}
|
288
|
+
if ('\0' == *ox_default_options.strip_ns) {
|
289
|
+
rb_hash_aset(opts, strip_namespace_sym, Qfalse);
|
290
|
+
} else if ('*' == *ox_default_options.strip_ns && '\0' == ox_default_options.strip_ns[1]) {
|
291
|
+
rb_hash_aset(opts, strip_namespace_sym, Qtrue);
|
292
|
+
} else {
|
293
|
+
rb_hash_aset(opts, strip_namespace_sym, rb_str_new(ox_default_options.strip_ns, strlen(ox_default_options.strip_ns)));
|
294
|
+
}
|
273
295
|
return opts;
|
274
296
|
}
|
275
297
|
|
@@ -289,6 +311,8 @@ get_def_opts(VALUE self) {
|
|
289
311
|
* @param [:strict|:tolerant|:auto_define] :effort set the tolerance level for loading
|
290
312
|
* @param [true|false|nil] :symbolize_keys symbolize element attribute keys or leave as Strings
|
291
313
|
* @param [:skip_none|:skip_return|:skip_white] determines how to handle white space in text
|
314
|
+
* @param [nil|String] :invalid_replace replacement string for invalid XML characters on dump. nil indicates include anyway as hex. A string, limited to 10 characters will replace the invalid character with the replace.
|
315
|
+
* @param [nil|String|true|false] :strip_namespace "" or false result in no namespace stripping. A string of "*" or true will strip all namespaces. Any other non-empty string indicates that matching namespaces will be stripped.
|
292
316
|
* @return [nil]
|
293
317
|
*/
|
294
318
|
static VALUE
|
@@ -384,6 +408,43 @@ set_def_opts(VALUE self, VALUE opts) {
|
|
384
408
|
rb_raise(ox_parse_error_class, ":convert_special must be true or false.\n");
|
385
409
|
}
|
386
410
|
|
411
|
+
v = rb_hash_aref(opts, invalid_replace_sym);
|
412
|
+
if (Qnil == v) {
|
413
|
+
ox_default_options.allow_invalid = Yes;
|
414
|
+
} else {
|
415
|
+
long slen;
|
416
|
+
|
417
|
+
Check_Type(v, T_STRING);
|
418
|
+
slen = RSTRING_LEN(v);
|
419
|
+
if (sizeof(ox_default_options.inv_repl) - 2 < slen) {
|
420
|
+
rb_raise(ox_parse_error_class, ":invalid_replace can be no longer than %ld characters.",
|
421
|
+
sizeof(ox_default_options.inv_repl) - 2);
|
422
|
+
}
|
423
|
+
strncpy(ox_default_options.inv_repl + 1, StringValuePtr(v), sizeof(ox_default_options.inv_repl) - 1);
|
424
|
+
ox_default_options.inv_repl[sizeof(ox_default_options.inv_repl) - 1] = '\0';
|
425
|
+
*ox_default_options.inv_repl = (char)slen;
|
426
|
+
ox_default_options.allow_invalid = No;
|
427
|
+
}
|
428
|
+
|
429
|
+
v = rb_hash_aref(opts, strip_namespace_sym);
|
430
|
+
if (Qfalse == v) {
|
431
|
+
*ox_default_options.strip_ns = '\0';
|
432
|
+
} else if (Qtrue == v) {
|
433
|
+
*ox_default_options.strip_ns = '*';
|
434
|
+
ox_default_options.strip_ns[1] = '\0';
|
435
|
+
} else if (Qnil != v) {
|
436
|
+
long slen;
|
437
|
+
|
438
|
+
Check_Type(v, T_STRING);
|
439
|
+
slen = RSTRING_LEN(v);
|
440
|
+
if (sizeof(ox_default_options.strip_ns) - 1 < slen) {
|
441
|
+
rb_raise(ox_parse_error_class, ":strip_namespace can be no longer than %ld characters.",
|
442
|
+
sizeof(ox_default_options.strip_ns) - 1);
|
443
|
+
}
|
444
|
+
strncpy(ox_default_options.strip_ns, StringValuePtr(v), sizeof(ox_default_options.strip_ns) - 1);
|
445
|
+
ox_default_options.strip_ns[sizeof(ox_default_options.strip_ns) - 1] = '\0';
|
446
|
+
}
|
447
|
+
|
387
448
|
for (o = ynos; 0 != o->attr; o++) {
|
388
449
|
v = rb_hash_lookup(opts, o->sym);
|
389
450
|
if (Qnil == v) {
|
@@ -536,6 +597,44 @@ load(char *xml, int argc, VALUE *argv, VALUE self, VALUE encoding, Err err) {
|
|
536
597
|
if (Qnil != (v = rb_hash_lookup(h, convert_special_sym))) {
|
537
598
|
options.convert_special = (Qfalse != v);
|
538
599
|
}
|
600
|
+
|
601
|
+
v = rb_hash_lookup(h, invalid_replace_sym);
|
602
|
+
if (Qnil == v) {
|
603
|
+
if (Qtrue == rb_funcall(h, has_key_id, 1, invalid_replace_sym)) {
|
604
|
+
options.allow_invalid = Yes;
|
605
|
+
}
|
606
|
+
} else {
|
607
|
+
long slen;
|
608
|
+
|
609
|
+
Check_Type(v, T_STRING);
|
610
|
+
slen = RSTRING_LEN(v);
|
611
|
+
if (sizeof(options.inv_repl) - 2 < slen) {
|
612
|
+
rb_raise(ox_parse_error_class, ":invalid_replace can be no longer than %ld characters.",
|
613
|
+
sizeof(options.inv_repl) - 2);
|
614
|
+
}
|
615
|
+
strncpy(options.inv_repl + 1, StringValuePtr(v), sizeof(options.inv_repl) - 1);
|
616
|
+
options.inv_repl[sizeof(options.inv_repl) - 1] = '\0';
|
617
|
+
*options.inv_repl = (char)slen;
|
618
|
+
options.allow_invalid = No;
|
619
|
+
}
|
620
|
+
v = rb_hash_lookup(h, strip_namespace_sym);
|
621
|
+
if (Qfalse == v) {
|
622
|
+
*options.strip_ns = '\0';
|
623
|
+
} else if (Qtrue == v) {
|
624
|
+
*options.strip_ns = '*';
|
625
|
+
options.strip_ns[1] = '\0';
|
626
|
+
} else if (Qnil != v) {
|
627
|
+
long slen;
|
628
|
+
|
629
|
+
Check_Type(v, T_STRING);
|
630
|
+
slen = RSTRING_LEN(v);
|
631
|
+
if (sizeof(options.strip_ns) - 1 < slen) {
|
632
|
+
rb_raise(ox_parse_error_class, ":strip_namespace can be no longer than %ld characters.",
|
633
|
+
sizeof(options.strip_ns) - 1);
|
634
|
+
}
|
635
|
+
strncpy(options.strip_ns, StringValuePtr(v), sizeof(options.strip_ns) - 1);
|
636
|
+
options.strip_ns[sizeof(options.strip_ns) - 1] = '\0';
|
637
|
+
}
|
539
638
|
}
|
540
639
|
#if HAS_ENCODING_SUPPORT
|
541
640
|
if ('\0' == *options.encoding) {
|
@@ -590,8 +689,11 @@ load(char *xml, int argc, VALUE *argv, VALUE self, VALUE encoding, Err err) {
|
|
590
689
|
/* call-seq: load(xml, options) => Ox::Document or Ox::Element or Object
|
591
690
|
*
|
592
691
|
* Parses and XML document String into an Ox::Document, or Ox::Element, or
|
593
|
-
* Object depending on the options. Raises an exception if the XML is
|
594
|
-
*
|
692
|
+
* Object depending on the options. Raises an exception if the XML is malformed
|
693
|
+
* or the classes specified are not valid. If a block is given it will be called
|
694
|
+
* on the completion of each complete top level entity with that entity as it's
|
695
|
+
* only argument.
|
696
|
+
*
|
595
697
|
* @param [String] xml XML String
|
596
698
|
* @param [Hash] options load options
|
597
699
|
* @param [:object|:generic|:limited] :mode format expected
|
@@ -604,6 +706,8 @@ load(char *xml, int argc, VALUE *argv, VALUE self, VALUE encoding, Err err) {
|
|
604
706
|
* - *:auto_define* - auto define missing classes and modules
|
605
707
|
* @param [Fixnum] :trace trace level as a Fixnum, default: 0 (silent)
|
606
708
|
* @param [true|false|nil] :symbolize_keys symbolize element attribute keys or leave as Strings
|
709
|
+
* @param [nil|String] :invalid_replace replacement string for invalid XML characters on dump. nil indicates include anyway as hex. A string, limited to 10 characters will replace the invalid character with the replace.
|
710
|
+
* @param [String|true|false] :strip_namespace "" or false result in no namespace stripping. A string of "*" or true will strip all namespaces. Any other non-empty string indicates that matching namespaces will be stripped.
|
607
711
|
*/
|
608
712
|
static VALUE
|
609
713
|
load_str(int argc, VALUE *argv, VALUE self) {
|
@@ -624,12 +728,12 @@ load_str(int argc, VALUE *argv, VALUE self) {
|
|
624
728
|
}
|
625
729
|
#if HAS_ENCODING_SUPPORT
|
626
730
|
#ifdef MACRUBY_RUBY
|
627
|
-
encoding = rb_funcall(*argv,
|
731
|
+
encoding = rb_funcall(*argv, encoding_id, 0);
|
628
732
|
#else
|
629
733
|
encoding = rb_obj_encoding(*argv);
|
630
734
|
#endif
|
631
735
|
#elif HAS_PRIVATE_ENCODING
|
632
|
-
encoding = rb_funcall(*argv,
|
736
|
+
encoding = rb_funcall(*argv, encoding_id, 0);
|
633
737
|
#else
|
634
738
|
encoding = Qnil;
|
635
739
|
#endif
|
@@ -661,6 +765,8 @@ load_str(int argc, VALUE *argv, VALUE self) {
|
|
661
765
|
* - *:auto_define* - auto define missing classes and modules
|
662
766
|
* @param [Fixnum] :trace trace level as a Fixnum, default: 0 (silent)
|
663
767
|
* @param [true|false|nil] :symbolize_keys symbolize element attribute keys or leave as Strings
|
768
|
+
* @param [nil|String] :invalid_replace replacement string for invalid XML characters on dump. nil indicates include anyway as hex. A string, limited to 10 characters will replace the invalid character with the replace.
|
769
|
+
* @param [String|true|false] :strip_namespace "" or false result in no namespace stripping. A string of "*" or true will strip all namespaces. Any other non-empty string indicates that matching namespaces will be stripped.
|
664
770
|
*/
|
665
771
|
static VALUE
|
666
772
|
load_file(int argc, VALUE *argv, VALUE self) {
|
@@ -713,6 +819,7 @@ load_file(int argc, VALUE *argv, VALUE self) {
|
|
713
819
|
* @param [true|false] :symbolize flag indicating the parser symbolize element and attribute names
|
714
820
|
* @param [true|false] :smart flag indicating the parser uses hints if available (use with html)
|
715
821
|
* @param [:skip_return|:skip_white] :skip flag indicating the parser skips \r or collpase white space into a single space. Default (skip nothing)
|
822
|
+
* @param [nil|String|true|false] :strip_namespace "" or false result in no namespace stripping. A string of "*" or true will strip all namespaces. Any other non-empty string indicates that matching namespaces will be stripped.
|
716
823
|
*/
|
717
824
|
static VALUE
|
718
825
|
sax_parse(int argc, VALUE *argv, VALUE self) {
|
@@ -722,7 +829,8 @@ sax_parse(int argc, VALUE *argv, VALUE self) {
|
|
722
829
|
options.convert_special = ox_default_options.convert_special;
|
723
830
|
options.smart = (Yes == ox_default_options.smart);
|
724
831
|
options.skip = ox_default_options.skip;
|
725
|
-
|
832
|
+
strcpy(options.strip_ns, ox_default_options.strip_ns);
|
833
|
+
|
726
834
|
if (argc < 2) {
|
727
835
|
rb_raise(ox_parse_error_class, "Wrong number of arguments to sax_parse.\n");
|
728
836
|
}
|
@@ -744,6 +852,27 @@ sax_parse(int argc, VALUE *argv, VALUE self) {
|
|
744
852
|
options.skip = CrSkip;
|
745
853
|
} else if (skip_white_sym == v) {
|
746
854
|
options.skip = SpcSkip;
|
855
|
+
} else if (skip_none_sym == v) {
|
856
|
+
options.skip = NoSkip;
|
857
|
+
}
|
858
|
+
}
|
859
|
+
if (Qnil != (v = rb_hash_lookup(h, strip_namespace_sym))) {
|
860
|
+
if (Qfalse == v) {
|
861
|
+
*options.strip_ns = '\0';
|
862
|
+
} else if (Qtrue == v) {
|
863
|
+
*options.strip_ns = '*';
|
864
|
+
options.strip_ns[1] = '\0';
|
865
|
+
} else {
|
866
|
+
long slen;
|
867
|
+
|
868
|
+
Check_Type(v, T_STRING);
|
869
|
+
slen = RSTRING_LEN(v);
|
870
|
+
if (sizeof(options.strip_ns) - 1 < slen) {
|
871
|
+
rb_raise(ox_parse_error_class, ":strip_namespace can be no longer than %ld characters.",
|
872
|
+
sizeof(options.strip_ns) - 1);
|
873
|
+
}
|
874
|
+
strncpy(options.strip_ns, StringValuePtr(v), sizeof(options.strip_ns) - 1);
|
875
|
+
options.strip_ns[sizeof(options.strip_ns) - 1] = '\0';
|
747
876
|
}
|
748
877
|
}
|
749
878
|
}
|
@@ -796,6 +925,26 @@ parse_dump_options(VALUE ropts, Options copts) {
|
|
796
925
|
rb_raise(ox_parse_error_class, ":effort must be :strict, :tolerant, or :auto_define.\n");
|
797
926
|
}
|
798
927
|
}
|
928
|
+
v = rb_hash_lookup(ropts, invalid_replace_sym);
|
929
|
+
if (Qnil == v) {
|
930
|
+
if (Qtrue == rb_funcall(ropts, has_key_id, 1, invalid_replace_sym)) {
|
931
|
+
copts->allow_invalid = Yes;
|
932
|
+
}
|
933
|
+
} else {
|
934
|
+
long slen;
|
935
|
+
|
936
|
+
Check_Type(v, T_STRING);
|
937
|
+
slen = RSTRING_LEN(v);
|
938
|
+
if (sizeof(copts->inv_repl) - 2 < slen) {
|
939
|
+
rb_raise(ox_parse_error_class, ":invalid_replace can be no longer than %ld characters.",
|
940
|
+
sizeof(copts->inv_repl) - 2);
|
941
|
+
}
|
942
|
+
strncpy(copts->inv_repl + 1, StringValuePtr(v), sizeof(copts->inv_repl) - 1);
|
943
|
+
copts->inv_repl[sizeof(copts->inv_repl) - 1] = '\0';
|
944
|
+
*copts->inv_repl = (char)slen;
|
945
|
+
copts->allow_invalid = No;
|
946
|
+
}
|
947
|
+
|
799
948
|
for (o = ynos; 0 != o->attr; o++) {
|
800
949
|
if (Qnil != (v = rb_hash_lookup(ropts, o->sym))) {
|
801
950
|
VALUE c = rb_obj_class(v);
|
@@ -810,7 +959,7 @@ parse_dump_options(VALUE ropts, Options copts) {
|
|
810
959
|
}
|
811
960
|
}
|
812
961
|
}
|
813
|
-
|
962
|
+
}
|
814
963
|
|
815
964
|
/* call-seq: dump(obj, options) => xml-string
|
816
965
|
*
|
@@ -969,6 +1118,9 @@ void Init_ox() {
|
|
969
1118
|
ox_tv_usec_id = rb_intern("tv_usec");
|
970
1119
|
ox_value_id = rb_intern("value");
|
971
1120
|
|
1121
|
+
encoding_id = rb_intern("encoding");
|
1122
|
+
has_key_id = rb_intern("has_key?");
|
1123
|
+
|
972
1124
|
rb_require("ox/version");
|
973
1125
|
rb_require("ox/error");
|
974
1126
|
rb_require("ox/hasattrs");
|
@@ -997,6 +1149,7 @@ void Init_ox() {
|
|
997
1149
|
effort_sym = ID2SYM(rb_intern("effort")); rb_gc_register_address(&effort_sym);
|
998
1150
|
generic_sym = ID2SYM(rb_intern("generic")); rb_gc_register_address(&generic_sym);
|
999
1151
|
indent_sym = ID2SYM(rb_intern("indent")); rb_gc_register_address(&indent_sym);
|
1152
|
+
invalid_replace_sym = ID2SYM(rb_intern("invalid_replace")); rb_gc_register_address(&invalid_replace_sym);
|
1000
1153
|
limited_sym = ID2SYM(rb_intern("limited")); rb_gc_register_address(&limited_sym);
|
1001
1154
|
mode_sym = ID2SYM(rb_intern("mode")); rb_gc_register_address(&mode_sym);
|
1002
1155
|
object_sym = ID2SYM(rb_intern("object")); rb_gc_register_address(&object_sym);
|
@@ -1006,6 +1159,7 @@ void Init_ox() {
|
|
1006
1159
|
skip_none_sym = ID2SYM(rb_intern("skip_none")); rb_gc_register_address(&skip_none_sym);
|
1007
1160
|
skip_return_sym = ID2SYM(rb_intern("skip_return")); rb_gc_register_address(&skip_return_sym);
|
1008
1161
|
skip_sym = ID2SYM(rb_intern("skip")); rb_gc_register_address(&skip_sym);
|
1162
|
+
strip_namespace_sym = ID2SYM(rb_intern("strip_namespace")); rb_gc_register_address(&strip_namespace_sym);
|
1009
1163
|
skip_white_sym = ID2SYM(rb_intern("skip_white")); rb_gc_register_address(&skip_white_sym);
|
1010
1164
|
smart_sym = ID2SYM(rb_intern("smart")); rb_gc_register_address(&smart_sym);
|
1011
1165
|
strict_sym = ID2SYM(rb_intern("strict")); rb_gc_register_address(&strict_sym);
|
data/ext/ox/ox.h
CHANGED
@@ -133,6 +133,9 @@ typedef struct _Options {
|
|
133
133
|
char skip; /* skip mode */
|
134
134
|
char smart; /* YesNo sax smart mode */
|
135
135
|
char convert_special;/* boolean true or false */
|
136
|
+
char allow_invalid; /* YesNo */
|
137
|
+
char inv_repl[12]; /* max 10 valid characters, first character is the length */
|
138
|
+
char strip_ns[64]; /* namespace to strip, \0 is no-strip, \* is all, else only matches */
|
136
139
|
#if HAS_ENCODING_SUPPORT
|
137
140
|
rb_encoding *rb_enc;
|
138
141
|
#elif HAS_PRIVATE_ENCODING
|
data/ext/ox/parse.c
CHANGED
@@ -43,6 +43,8 @@ static int collapse_special(PInfo pi, char *str);
|
|
43
43
|
* all cases to parse the string.
|
44
44
|
*/
|
45
45
|
|
46
|
+
static char xml_valid_lower_chars[34] = "xxxxxxxxxooxxoxxxxxxxxxxxxxxxxxxo";
|
47
|
+
|
46
48
|
inline static int
|
47
49
|
is_white(char c) {
|
48
50
|
switch (c) {
|
@@ -95,6 +97,7 @@ VALUE
|
|
95
97
|
ox_parse(char *xml, ParseCallbacks pcb, char **endp, Options options, Err err) {
|
96
98
|
struct _PInfo pi;
|
97
99
|
int body_read = 0;
|
100
|
+
int block_given = rb_block_given_p();
|
98
101
|
|
99
102
|
if (0 == xml) {
|
100
103
|
set_error(err, "Invalid arg, xml string can not be null", xml, 0);
|
@@ -171,6 +174,9 @@ ox_parse(char *xml, ParseCallbacks pcb, char **endp, Options options, Err err) {
|
|
171
174
|
helper_stack_cleanup(&pi.helpers);
|
172
175
|
return Qnil;
|
173
176
|
}
|
177
|
+
if (block_given && Qnil != pi.obj && Qundef != pi.obj) {
|
178
|
+
rb_yield(pi.obj);
|
179
|
+
}
|
174
180
|
}
|
175
181
|
helper_stack_cleanup(&pi.helpers);
|
176
182
|
return pi.obj;
|
@@ -211,7 +217,6 @@ read_instruction(PInfo pi) {
|
|
211
217
|
char *cend;
|
212
218
|
int attrs_ok = 1;
|
213
219
|
|
214
|
-
|
215
220
|
*content = '\0';
|
216
221
|
attr_stack_init(&attrs);
|
217
222
|
if (0 == (target = read_name_token(pi))) {
|
@@ -567,9 +572,32 @@ read_element(PInfo pi) {
|
|
567
572
|
return 0;
|
568
573
|
}
|
569
574
|
if (first && start != slash - 1) {
|
570
|
-
|
575
|
+
// Some white space between start and here so add as
|
576
|
+
// text after checking skip.
|
571
577
|
*(slash - 1) = '\0';
|
572
|
-
pi->
|
578
|
+
switch (pi->options->skip) {
|
579
|
+
case CrSkip: {
|
580
|
+
char *s = start;
|
581
|
+
char *e = start;
|
582
|
+
|
583
|
+
for (; '\0' != *e; e++) {
|
584
|
+
if ('\r' != *e) {
|
585
|
+
*s++ = *e;
|
586
|
+
}
|
587
|
+
}
|
588
|
+
*s = '\0';
|
589
|
+
break;
|
590
|
+
}
|
591
|
+
case SpcSkip:
|
592
|
+
*start = '\0';
|
593
|
+
break;
|
594
|
+
case NoSkip:
|
595
|
+
default:
|
596
|
+
break;
|
597
|
+
}
|
598
|
+
if ('\0' != *start) {
|
599
|
+
pi->pcb->add_text(pi, start, 1);
|
600
|
+
}
|
573
601
|
}
|
574
602
|
pi->s++;
|
575
603
|
pi->pcb->end_element(pi, ename);
|
@@ -667,27 +695,35 @@ read_text(PInfo pi) {
|
|
667
695
|
return;
|
668
696
|
}
|
669
697
|
} else {
|
670
|
-
|
671
|
-
|
672
|
-
|
673
|
-
|
674
|
-
} else {
|
675
|
-
*b++ = c;
|
698
|
+
if (0 <= c && c <= 0x20) {
|
699
|
+
if (StrictEffort == pi->options->effort && 'x' == xml_valid_lower_chars[(unsigned char)c]) {
|
700
|
+
set_error(&pi->err, "invalid character", pi->str, pi->s);
|
701
|
+
return;
|
676
702
|
}
|
677
|
-
|
678
|
-
|
679
|
-
|
680
|
-
|
681
|
-
|
703
|
+
switch (pi->options->skip) {
|
704
|
+
case CrSkip:
|
705
|
+
if (buf != b && '\n' == c && '\r' == *(b - 1)) {
|
706
|
+
*(b - 1) = '\n';
|
707
|
+
} else {
|
708
|
+
*b++ = c;
|
682
709
|
}
|
683
|
-
|
710
|
+
break;
|
711
|
+
case SpcSkip:
|
712
|
+
if (is_white(c)) {
|
713
|
+
if (buf == b || ' ' != *(b - 1)) {
|
714
|
+
*b++ = ' ';
|
715
|
+
}
|
716
|
+
} else {
|
717
|
+
*b++ = c;
|
718
|
+
}
|
719
|
+
break;
|
720
|
+
case NoSkip:
|
721
|
+
default:
|
684
722
|
*b++ = c;
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
default:
|
723
|
+
break;
|
724
|
+
}
|
725
|
+
} else {
|
689
726
|
*b++ = c;
|
690
|
-
break;
|
691
727
|
}
|
692
728
|
}
|
693
729
|
break;
|
@@ -795,6 +831,15 @@ read_name_token(PInfo pi) {
|
|
795
831
|
set_error(&pi->err, "invalid format, document not terminated", pi->str, pi->s);
|
796
832
|
return 0;
|
797
833
|
break; /* to avoid warnings */
|
834
|
+
case ':':
|
835
|
+
if ('\0' == *pi->options->strip_ns) {
|
836
|
+
break;
|
837
|
+
} else if ('*' == *pi->options->strip_ns && '\0' == pi->options->strip_ns[1]) {
|
838
|
+
start = pi->s + 1;
|
839
|
+
} else if (0 == strncmp(pi->options->strip_ns, start, pi->s - start)) {
|
840
|
+
start = pi->s + 1;
|
841
|
+
}
|
842
|
+
break;
|
798
843
|
default:
|
799
844
|
break;
|
800
845
|
}
|
data/ext/ox/sax.c
CHANGED
@@ -1401,6 +1401,15 @@ read_name_token(SaxDrive dr) {
|
|
1401
1401
|
/* documents never terminate after a name token */
|
1402
1402
|
ox_sax_drive_error(dr, NO_TERM "document not terminated");
|
1403
1403
|
return '\0';
|
1404
|
+
case ':':
|
1405
|
+
if ('\0' == *dr->options.strip_ns) {
|
1406
|
+
break;
|
1407
|
+
} else if ('*' == *dr->options.strip_ns && '\0' == dr->options.strip_ns[1]) {
|
1408
|
+
dr->buf.str = dr->buf.tail;
|
1409
|
+
} else if (0 == strncmp(dr->options.strip_ns, dr->buf.str, dr->buf.tail - dr->buf.str - 1)) {
|
1410
|
+
dr->buf.str = dr->buf.tail;
|
1411
|
+
}
|
1412
|
+
break;
|
1404
1413
|
default:
|
1405
1414
|
break;
|
1406
1415
|
}
|
data/ext/ox/sax.h
CHANGED
data/ext/ox/sax_buf.c
CHANGED
@@ -30,11 +30,14 @@ static int read_from_str(Buf buf);
|
|
30
30
|
|
31
31
|
void
|
32
32
|
ox_sax_buf_init(Buf buf, VALUE io) {
|
33
|
-
VALUE io_class = rb_obj_class(io);
|
34
|
-
VALUE
|
33
|
+
volatile VALUE io_class = rb_obj_class(io);
|
34
|
+
VALUE rfd;
|
35
35
|
|
36
|
-
if (
|
37
|
-
|
36
|
+
if (rb_cString == io_class) {
|
37
|
+
buf->read_func = read_from_str;
|
38
|
+
buf->in.str = StringValuePtr(io);
|
39
|
+
} else if (ox_stringio_class == io_class && 0 == FIX2INT(rb_funcall2(io, ox_pos_id, 0, 0))) {
|
40
|
+
volatile VALUE s = rb_funcall2(io, ox_string_id, 0, 0);
|
38
41
|
|
39
42
|
buf->read_func = read_from_str;
|
40
43
|
buf->in.str = StringValuePtr(s);
|
data/lib/ox/element.rb
CHANGED
@@ -40,8 +40,8 @@ module Ox
|
|
40
40
|
# @param [String] name name of the Element
|
41
41
|
def initialize(name)
|
42
42
|
super
|
43
|
-
@attributes =
|
44
|
-
@nodes =
|
43
|
+
@attributes = {}
|
44
|
+
@nodes = []
|
45
45
|
end
|
46
46
|
alias name value
|
47
47
|
|
@@ -49,7 +49,7 @@ module Ox
|
|
49
49
|
# Element.
|
50
50
|
# @return [Array] all child Nodes.
|
51
51
|
def nodes
|
52
|
-
|
52
|
+
@nodes = [] if !instance_variable_defined?(:@nodes) or @nodes.nil?
|
53
53
|
@nodes
|
54
54
|
end
|
55
55
|
|
@@ -83,6 +83,15 @@ module Ox
|
|
83
83
|
nil
|
84
84
|
end
|
85
85
|
|
86
|
+
# Clears any child nodes of an element and replaces those with a single Text
|
87
|
+
# (String) node. Note the existing nodes array is modified and not replaced.
|
88
|
+
# @param [String] txt to become the only element of the nodes array
|
89
|
+
def replace_text(txt)
|
90
|
+
raise "the argument to replace_text() must be a String" unless txt.is_a?(String)
|
91
|
+
@nodes.clear()
|
92
|
+
@nodes << txt
|
93
|
+
end
|
94
|
+
|
86
95
|
# Returns an array of Nodes or Strings that correspond to the locations
|
87
96
|
# specified by the path parameter. The path parameter describes the path
|
88
97
|
# to the return values which can be either nodes in the XML or
|
data/lib/ox/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ox
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.
|
4
|
+
version: 2.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Peter Ohler
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-02-
|
11
|
+
date: 2016-02-21 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: "A fast XML parser and object serializer that uses only standard C lib.\n
|
14
14
|
\ \nOptimized XML (Ox), as the name implies was written to provide speed
|