ox 2.2.4 → 2.3.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of ox might be problematic. Click here for more details.

checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 46e015e0b3899db57415e3edbf2634590d199b7e
4
- data.tar.gz: 5aeb573b6fdc7342b392513e4731713f4c7caee2
3
+ metadata.gz: 1bf8bb0abb35f28192d986125010bca5dac84737
4
+ data.tar.gz: edfeba0e9a4b9892f4de6b127d7717795a01f007
5
5
  SHA512:
6
- metadata.gz: 1258f0904dd33ebc85adf0f89436dd1d4d783cfd2393f1cdc4fe0cfc55321c33da5fb687dd4cdda0df15922c850a27fa3cdd1a5491b4e4d713598d4de64f36db
7
- data.tar.gz: 3b34e3bbfbb4b6d512f763447a822ce6552f3630e5097b03b8a33ae3ea563452cb9cc2718135b40496640b654b65c180214274594e8605924f434275373e5e98
6
+ metadata.gz: 9428e17927f1e89e3704401f27c40523c84ad9900826beef3df85af5f71411be9bd6a450a339dff24de28de36ece8c335235b055281b21dd4ca22a2859922dc0
7
+ data.tar.gz: 853e58cc23f118defd6eafb1c64483cf0a922572a9befde6edb54e4be72d6671ddfc97c4093afa31590f6c239a1cda852c5980d2e27d39e9eb33984492258bc5
data/README.md CHANGED
@@ -34,20 +34,27 @@ A fast XML parser and Object marshaller as a Ruby gem.
34
34
 
35
35
  ## Release Notes
36
36
 
37
- ### Release 2.2.4
37
+ ### Release 2.3.0
38
38
 
39
- - Changed the code to allow compilation on older compilers. No change in
40
- functionality otherwise.
39
+ - Added Oj::Element.replace_text() method.
40
+
41
+ - Oj::Element nodes variable is now always initialized to an empty Array.
42
+
43
+ - Oj::Element attributes variable is now always initialized to an empty Hash.
41
44
 
42
- ### Release 2.2.3
45
+ - A invalid_replace option has been added. It will replace invalid XML
46
+ character with a provided string. Strict effort now raises an exception if an
47
+ invalid character is encountered on dump or load.
43
48
 
44
- - The convert_special option now applies to attributes as well as elements in
45
- the SAX parser.
49
+ - Oj.load and Oj.parse now allow for a callback block to handle multiple top
50
+ level entities in the input.
46
51
 
47
- - The convert_special option now applies to the regualr parser as well as the
48
- SAX parser.
52
+ - The Oj SAX parser now supports strings as input directly without and IO wrapper.
49
53
 
50
- - Updated to work correctly with Ruby 2.3.0.
54
+ ### Release 2.2.4
55
+
56
+ - Changed the code to allow compilation on older compilers. No change in
57
+ functionality otherwise.
51
58
 
52
59
  ## Description
53
60
 
@@ -78,8 +78,10 @@ static int is_xml_friendly(const uchar *str, int len);
78
78
 
79
79
  static const char hex_chars[17] = "0123456789abcdef";
80
80
 
81
+ // The : character is equivalent to 10. Used for replacement characters up to 10
82
+ // characters long such as ''.
81
83
  static char xml_friendly_chars[257] = "\
82
- 88888888811881888888888888888888\
84
+ :::::::::11::1::::::::::::::::::\
83
85
  11611156111111111111111111114141\
84
86
  11111111111111111111111111111111\
85
87
  11111111111111111111111111111111\
@@ -315,7 +317,7 @@ dump_value(Out out, const char *value, size_t size) {
315
317
  inline static void
316
318
  dump_str_value(Out out, const char *value, size_t size) {
317
319
  size_t xsize = xml_str_len((const uchar*)value, size);
318
-
320
+
319
321
  if (out->end - out->cur <= (long)xsize) {
320
322
  grow(out, xsize);
321
323
  }
@@ -323,42 +325,63 @@ dump_str_value(Out out, const char *value, size_t size) {
323
325
  if ('1' == xml_friendly_chars[(uchar)*value]) {
324
326
  *out->cur++ = *value;
325
327
  } else {
326
- *out->cur++ = '&';
327
328
  switch (*value) {
328
329
  case '"':
330
+ *out->cur++ = '&';
329
331
  *out->cur++ = 'q';
330
332
  *out->cur++ = 'u';
331
333
  *out->cur++ = 'o';
332
334
  *out->cur++ = 't';
335
+ *out->cur++ = ';';
333
336
  break;
334
337
  case '&':
338
+ *out->cur++ = '&';
335
339
  *out->cur++ = 'a';
336
340
  *out->cur++ = 'm';
337
341
  *out->cur++ = 'p';
342
+ *out->cur++ = ';';
338
343
  break;
339
344
  case '\'':
345
+ *out->cur++ = '&';
340
346
  *out->cur++ = 'a';
341
347
  *out->cur++ = 'p';
342
348
  *out->cur++ = 'o';
343
349
  *out->cur++ = 's';
350
+ *out->cur++ = ';';
344
351
  break;
345
352
  case '<':
353
+ *out->cur++ = '&';
346
354
  *out->cur++ = 'l';
347
355
  *out->cur++ = 't';
356
+ *out->cur++ = ';';
348
357
  break;
349
358
  case '>':
359
+ *out->cur++ = '&';
350
360
  *out->cur++ = 'g';
351
361
  *out->cur++ = 't';
362
+ *out->cur++ = ';';
352
363
  break;
353
364
  default:
354
- *out->cur++ = '#';
355
- *out->cur++ = 'x';
356
- *out->cur++ = '0';
357
- *out->cur++ = '0';
358
- dump_hex(*value, out);
365
+ // Must be one of the invalid characters.
366
+ if (StrictEffort == out->opts->effort) {
367
+ rb_raise(rb_eSyntaxError, "'\\#x%02x' is not a valid XML character.", *value);
368
+ }
369
+ if (Yes == out->opts->allow_invalid) {
370
+ *out->cur++ = '&';
371
+ *out->cur++ = '#';
372
+ *out->cur++ = 'x';
373
+ *out->cur++ = '0';
374
+ *out->cur++ = '0';
375
+ dump_hex(*value, out);
376
+ *out->cur++ = ';';
377
+ } else if ('\0' != *out->opts->inv_repl) {
378
+ // If the empty string then ignore. The first character of
379
+ // the replacement is the length.
380
+ memcpy(out->cur, out->opts->inv_repl + 1, (size_t)*out->opts->inv_repl);
381
+ out->cur += *out->opts->inv_repl;
382
+ }
359
383
  break;
360
384
  }
361
- *out->cur++ = ';';
362
385
  }
363
386
  }
364
387
  *out->cur = '\0';
@@ -1016,7 +1039,7 @@ dump_gen_element(VALUE obj, int depth, Out out) {
1016
1039
  if (Qnil != attrs) {
1017
1040
  rb_hash_foreach(attrs, dump_gen_attr, (VALUE)out);
1018
1041
  }
1019
- if (Qnil != nodes) {
1042
+ if (Qnil != nodes && 0 < RARRAY_LEN(nodes)) {
1020
1043
  int do_indent;
1021
1044
 
1022
1045
  *out->cur++ = '>';
@@ -1199,6 +1222,7 @@ dump_obj_to_xml(VALUE obj, Options copts, Out out) {
1199
1222
  ox_cache8_new(&out->circ_cache);
1200
1223
  }
1201
1224
  out->indent = copts->indent;
1225
+
1202
1226
  if (ox_document_clas == clas) {
1203
1227
  dump_gen_doc(obj, -1, out);
1204
1228
  } else if (ox_element_clas == clas) {
@@ -106,6 +106,7 @@ static VALUE convert_special_sym;
106
106
  static VALUE effort_sym;
107
107
  static VALUE generic_sym;
108
108
  static VALUE indent_sym;
109
+ static VALUE invalid_replace_sym;
109
110
  static VALUE limited_sym;
110
111
  static VALUE mode_sym;
111
112
  static VALUE object_sym;
@@ -117,6 +118,7 @@ static VALUE skip_sym;
117
118
  static VALUE skip_white_sym;
118
119
  static VALUE smart_sym;
119
120
  static VALUE strict_sym;
121
+ static VALUE strip_namespace_sym;
120
122
  static VALUE symbolize_keys_sym;
121
123
  static VALUE symbolize_sym;
122
124
  static VALUE tolerant_sym;
@@ -126,6 +128,9 @@ static VALUE with_instruct_sym;
126
128
  static VALUE with_xml_sym;
127
129
  static VALUE xsd_date_sym;
128
130
 
131
+ static ID encoding_id;
132
+ static ID has_key_id;
133
+
129
134
  #if HAS_ENCODING_SUPPORT
130
135
  rb_encoding *ox_utf8_encoding = 0;
131
136
  #elif HAS_PRIVATE_ENCODING
@@ -149,6 +154,9 @@ struct _Options ox_default_options = {
149
154
  NoSkip, /* skip */
150
155
  No, /* smart */
151
156
  1, /* convert_special */
157
+ No, /* allow_invalid */
158
+ { '\0' }, /* inv_repl */
159
+ { '\0' }, /* strip_ns */
152
160
  #if HAS_PRIVATE_ENCODING
153
161
  Qnil /* rb_enc */
154
162
  #else
@@ -229,6 +237,8 @@ defuse_bom(char *xml, Options options) {
229
237
  * - skip: [:skip_none|:skip_return|:skip_white] determines how to handle white space in text
230
238
  * - smart: [true|false|nil] flag indicating the SAX parser uses hints if available (use with html)
231
239
  * - convert_special: [true|false|nil] flag indicating special characters like &lt; are converted with the SAX parser
240
+ * - invalid_replace: [nil|String] replacement string for invalid XML characters on dump. nil indicates include anyway as hex. A string, limited to 10 characters will replace the invalid character with the replace.
241
+ * - strip_namespace: [String|true|false] false or "" results in no namespace stripping. A string of "*" or true will strip all namespaces. Any other non-empty string indicates that matching namespaces will be stripped.
232
242
  * @return [Hash] all current option settings.
233
243
  *
234
244
  * Note that an indent of less than zero will result in a tight one line output
@@ -270,6 +280,18 @@ get_def_opts(VALUE self) {
270
280
  case SpcSkip: rb_hash_aset(opts, skip_sym, skip_white_sym); break;
271
281
  default: rb_hash_aset(opts, skip_sym, Qnil); break;
272
282
  }
283
+ if (Yes == ox_default_options.allow_invalid) {
284
+ rb_hash_aset(opts, invalid_replace_sym, Qnil);
285
+ } else {
286
+ rb_hash_aset(opts, invalid_replace_sym, rb_str_new(ox_default_options.inv_repl + 1, (int)*ox_default_options.inv_repl));
287
+ }
288
+ if ('\0' == *ox_default_options.strip_ns) {
289
+ rb_hash_aset(opts, strip_namespace_sym, Qfalse);
290
+ } else if ('*' == *ox_default_options.strip_ns && '\0' == ox_default_options.strip_ns[1]) {
291
+ rb_hash_aset(opts, strip_namespace_sym, Qtrue);
292
+ } else {
293
+ rb_hash_aset(opts, strip_namespace_sym, rb_str_new(ox_default_options.strip_ns, strlen(ox_default_options.strip_ns)));
294
+ }
273
295
  return opts;
274
296
  }
275
297
 
@@ -289,6 +311,8 @@ get_def_opts(VALUE self) {
289
311
  * @param [:strict|:tolerant|:auto_define] :effort set the tolerance level for loading
290
312
  * @param [true|false|nil] :symbolize_keys symbolize element attribute keys or leave as Strings
291
313
  * @param [:skip_none|:skip_return|:skip_white] determines how to handle white space in text
314
+ * @param [nil|String] :invalid_replace replacement string for invalid XML characters on dump. nil indicates include anyway as hex. A string, limited to 10 characters will replace the invalid character with the replace.
315
+ * @param [nil|String|true|false] :strip_namespace "" or false result in no namespace stripping. A string of "*" or true will strip all namespaces. Any other non-empty string indicates that matching namespaces will be stripped.
292
316
  * @return [nil]
293
317
  */
294
318
  static VALUE
@@ -384,6 +408,43 @@ set_def_opts(VALUE self, VALUE opts) {
384
408
  rb_raise(ox_parse_error_class, ":convert_special must be true or false.\n");
385
409
  }
386
410
 
411
+ v = rb_hash_aref(opts, invalid_replace_sym);
412
+ if (Qnil == v) {
413
+ ox_default_options.allow_invalid = Yes;
414
+ } else {
415
+ long slen;
416
+
417
+ Check_Type(v, T_STRING);
418
+ slen = RSTRING_LEN(v);
419
+ if (sizeof(ox_default_options.inv_repl) - 2 < slen) {
420
+ rb_raise(ox_parse_error_class, ":invalid_replace can be no longer than %ld characters.",
421
+ sizeof(ox_default_options.inv_repl) - 2);
422
+ }
423
+ strncpy(ox_default_options.inv_repl + 1, StringValuePtr(v), sizeof(ox_default_options.inv_repl) - 1);
424
+ ox_default_options.inv_repl[sizeof(ox_default_options.inv_repl) - 1] = '\0';
425
+ *ox_default_options.inv_repl = (char)slen;
426
+ ox_default_options.allow_invalid = No;
427
+ }
428
+
429
+ v = rb_hash_aref(opts, strip_namespace_sym);
430
+ if (Qfalse == v) {
431
+ *ox_default_options.strip_ns = '\0';
432
+ } else if (Qtrue == v) {
433
+ *ox_default_options.strip_ns = '*';
434
+ ox_default_options.strip_ns[1] = '\0';
435
+ } else if (Qnil != v) {
436
+ long slen;
437
+
438
+ Check_Type(v, T_STRING);
439
+ slen = RSTRING_LEN(v);
440
+ if (sizeof(ox_default_options.strip_ns) - 1 < slen) {
441
+ rb_raise(ox_parse_error_class, ":strip_namespace can be no longer than %ld characters.",
442
+ sizeof(ox_default_options.strip_ns) - 1);
443
+ }
444
+ strncpy(ox_default_options.strip_ns, StringValuePtr(v), sizeof(ox_default_options.strip_ns) - 1);
445
+ ox_default_options.strip_ns[sizeof(ox_default_options.strip_ns) - 1] = '\0';
446
+ }
447
+
387
448
  for (o = ynos; 0 != o->attr; o++) {
388
449
  v = rb_hash_lookup(opts, o->sym);
389
450
  if (Qnil == v) {
@@ -536,6 +597,44 @@ load(char *xml, int argc, VALUE *argv, VALUE self, VALUE encoding, Err err) {
536
597
  if (Qnil != (v = rb_hash_lookup(h, convert_special_sym))) {
537
598
  options.convert_special = (Qfalse != v);
538
599
  }
600
+
601
+ v = rb_hash_lookup(h, invalid_replace_sym);
602
+ if (Qnil == v) {
603
+ if (Qtrue == rb_funcall(h, has_key_id, 1, invalid_replace_sym)) {
604
+ options.allow_invalid = Yes;
605
+ }
606
+ } else {
607
+ long slen;
608
+
609
+ Check_Type(v, T_STRING);
610
+ slen = RSTRING_LEN(v);
611
+ if (sizeof(options.inv_repl) - 2 < slen) {
612
+ rb_raise(ox_parse_error_class, ":invalid_replace can be no longer than %ld characters.",
613
+ sizeof(options.inv_repl) - 2);
614
+ }
615
+ strncpy(options.inv_repl + 1, StringValuePtr(v), sizeof(options.inv_repl) - 1);
616
+ options.inv_repl[sizeof(options.inv_repl) - 1] = '\0';
617
+ *options.inv_repl = (char)slen;
618
+ options.allow_invalid = No;
619
+ }
620
+ v = rb_hash_lookup(h, strip_namespace_sym);
621
+ if (Qfalse == v) {
622
+ *options.strip_ns = '\0';
623
+ } else if (Qtrue == v) {
624
+ *options.strip_ns = '*';
625
+ options.strip_ns[1] = '\0';
626
+ } else if (Qnil != v) {
627
+ long slen;
628
+
629
+ Check_Type(v, T_STRING);
630
+ slen = RSTRING_LEN(v);
631
+ if (sizeof(options.strip_ns) - 1 < slen) {
632
+ rb_raise(ox_parse_error_class, ":strip_namespace can be no longer than %ld characters.",
633
+ sizeof(options.strip_ns) - 1);
634
+ }
635
+ strncpy(options.strip_ns, StringValuePtr(v), sizeof(options.strip_ns) - 1);
636
+ options.strip_ns[sizeof(options.strip_ns) - 1] = '\0';
637
+ }
539
638
  }
540
639
  #if HAS_ENCODING_SUPPORT
541
640
  if ('\0' == *options.encoding) {
@@ -590,8 +689,11 @@ load(char *xml, int argc, VALUE *argv, VALUE self, VALUE encoding, Err err) {
590
689
  /* call-seq: load(xml, options) => Ox::Document or Ox::Element or Object
591
690
  *
592
691
  * Parses and XML document String into an Ox::Document, or Ox::Element, or
593
- * Object depending on the options. Raises an exception if the XML is
594
- * malformed or the classes specified are not valid.
692
+ * Object depending on the options. Raises an exception if the XML is malformed
693
+ * or the classes specified are not valid. If a block is given it will be called
694
+ * on the completion of each complete top level entity with that entity as it's
695
+ * only argument.
696
+ *
595
697
  * @param [String] xml XML String
596
698
  * @param [Hash] options load options
597
699
  * @param [:object|:generic|:limited] :mode format expected
@@ -604,6 +706,8 @@ load(char *xml, int argc, VALUE *argv, VALUE self, VALUE encoding, Err err) {
604
706
  * - *:auto_define* - auto define missing classes and modules
605
707
  * @param [Fixnum] :trace trace level as a Fixnum, default: 0 (silent)
606
708
  * @param [true|false|nil] :symbolize_keys symbolize element attribute keys or leave as Strings
709
+ * @param [nil|String] :invalid_replace replacement string for invalid XML characters on dump. nil indicates include anyway as hex. A string, limited to 10 characters will replace the invalid character with the replace.
710
+ * @param [String|true|false] :strip_namespace "" or false result in no namespace stripping. A string of "*" or true will strip all namespaces. Any other non-empty string indicates that matching namespaces will be stripped.
607
711
  */
608
712
  static VALUE
609
713
  load_str(int argc, VALUE *argv, VALUE self) {
@@ -624,12 +728,12 @@ load_str(int argc, VALUE *argv, VALUE self) {
624
728
  }
625
729
  #if HAS_ENCODING_SUPPORT
626
730
  #ifdef MACRUBY_RUBY
627
- encoding = rb_funcall(*argv, rb_intern("encoding"), 0);
731
+ encoding = rb_funcall(*argv, encoding_id, 0);
628
732
  #else
629
733
  encoding = rb_obj_encoding(*argv);
630
734
  #endif
631
735
  #elif HAS_PRIVATE_ENCODING
632
- encoding = rb_funcall(*argv, rb_intern("encoding"), 0);
736
+ encoding = rb_funcall(*argv, encoding_id, 0);
633
737
  #else
634
738
  encoding = Qnil;
635
739
  #endif
@@ -661,6 +765,8 @@ load_str(int argc, VALUE *argv, VALUE self) {
661
765
  * - *:auto_define* - auto define missing classes and modules
662
766
  * @param [Fixnum] :trace trace level as a Fixnum, default: 0 (silent)
663
767
  * @param [true|false|nil] :symbolize_keys symbolize element attribute keys or leave as Strings
768
+ * @param [nil|String] :invalid_replace replacement string for invalid XML characters on dump. nil indicates include anyway as hex. A string, limited to 10 characters will replace the invalid character with the replace.
769
+ * @param [String|true|false] :strip_namespace "" or false result in no namespace stripping. A string of "*" or true will strip all namespaces. Any other non-empty string indicates that matching namespaces will be stripped.
664
770
  */
665
771
  static VALUE
666
772
  load_file(int argc, VALUE *argv, VALUE self) {
@@ -713,6 +819,7 @@ load_file(int argc, VALUE *argv, VALUE self) {
713
819
  * @param [true|false] :symbolize flag indicating the parser symbolize element and attribute names
714
820
  * @param [true|false] :smart flag indicating the parser uses hints if available (use with html)
715
821
  * @param [:skip_return|:skip_white] :skip flag indicating the parser skips \r or collpase white space into a single space. Default (skip nothing)
822
+ * @param [nil|String|true|false] :strip_namespace "" or false result in no namespace stripping. A string of "*" or true will strip all namespaces. Any other non-empty string indicates that matching namespaces will be stripped.
716
823
  */
717
824
  static VALUE
718
825
  sax_parse(int argc, VALUE *argv, VALUE self) {
@@ -722,7 +829,8 @@ sax_parse(int argc, VALUE *argv, VALUE self) {
722
829
  options.convert_special = ox_default_options.convert_special;
723
830
  options.smart = (Yes == ox_default_options.smart);
724
831
  options.skip = ox_default_options.skip;
725
-
832
+ strcpy(options.strip_ns, ox_default_options.strip_ns);
833
+
726
834
  if (argc < 2) {
727
835
  rb_raise(ox_parse_error_class, "Wrong number of arguments to sax_parse.\n");
728
836
  }
@@ -744,6 +852,27 @@ sax_parse(int argc, VALUE *argv, VALUE self) {
744
852
  options.skip = CrSkip;
745
853
  } else if (skip_white_sym == v) {
746
854
  options.skip = SpcSkip;
855
+ } else if (skip_none_sym == v) {
856
+ options.skip = NoSkip;
857
+ }
858
+ }
859
+ if (Qnil != (v = rb_hash_lookup(h, strip_namespace_sym))) {
860
+ if (Qfalse == v) {
861
+ *options.strip_ns = '\0';
862
+ } else if (Qtrue == v) {
863
+ *options.strip_ns = '*';
864
+ options.strip_ns[1] = '\0';
865
+ } else {
866
+ long slen;
867
+
868
+ Check_Type(v, T_STRING);
869
+ slen = RSTRING_LEN(v);
870
+ if (sizeof(options.strip_ns) - 1 < slen) {
871
+ rb_raise(ox_parse_error_class, ":strip_namespace can be no longer than %ld characters.",
872
+ sizeof(options.strip_ns) - 1);
873
+ }
874
+ strncpy(options.strip_ns, StringValuePtr(v), sizeof(options.strip_ns) - 1);
875
+ options.strip_ns[sizeof(options.strip_ns) - 1] = '\0';
747
876
  }
748
877
  }
749
878
  }
@@ -796,6 +925,26 @@ parse_dump_options(VALUE ropts, Options copts) {
796
925
  rb_raise(ox_parse_error_class, ":effort must be :strict, :tolerant, or :auto_define.\n");
797
926
  }
798
927
  }
928
+ v = rb_hash_lookup(ropts, invalid_replace_sym);
929
+ if (Qnil == v) {
930
+ if (Qtrue == rb_funcall(ropts, has_key_id, 1, invalid_replace_sym)) {
931
+ copts->allow_invalid = Yes;
932
+ }
933
+ } else {
934
+ long slen;
935
+
936
+ Check_Type(v, T_STRING);
937
+ slen = RSTRING_LEN(v);
938
+ if (sizeof(copts->inv_repl) - 2 < slen) {
939
+ rb_raise(ox_parse_error_class, ":invalid_replace can be no longer than %ld characters.",
940
+ sizeof(copts->inv_repl) - 2);
941
+ }
942
+ strncpy(copts->inv_repl + 1, StringValuePtr(v), sizeof(copts->inv_repl) - 1);
943
+ copts->inv_repl[sizeof(copts->inv_repl) - 1] = '\0';
944
+ *copts->inv_repl = (char)slen;
945
+ copts->allow_invalid = No;
946
+ }
947
+
799
948
  for (o = ynos; 0 != o->attr; o++) {
800
949
  if (Qnil != (v = rb_hash_lookup(ropts, o->sym))) {
801
950
  VALUE c = rb_obj_class(v);
@@ -810,7 +959,7 @@ parse_dump_options(VALUE ropts, Options copts) {
810
959
  }
811
960
  }
812
961
  }
813
- }
962
+ }
814
963
 
815
964
  /* call-seq: dump(obj, options) => xml-string
816
965
  *
@@ -969,6 +1118,9 @@ void Init_ox() {
969
1118
  ox_tv_usec_id = rb_intern("tv_usec");
970
1119
  ox_value_id = rb_intern("value");
971
1120
 
1121
+ encoding_id = rb_intern("encoding");
1122
+ has_key_id = rb_intern("has_key?");
1123
+
972
1124
  rb_require("ox/version");
973
1125
  rb_require("ox/error");
974
1126
  rb_require("ox/hasattrs");
@@ -997,6 +1149,7 @@ void Init_ox() {
997
1149
  effort_sym = ID2SYM(rb_intern("effort")); rb_gc_register_address(&effort_sym);
998
1150
  generic_sym = ID2SYM(rb_intern("generic")); rb_gc_register_address(&generic_sym);
999
1151
  indent_sym = ID2SYM(rb_intern("indent")); rb_gc_register_address(&indent_sym);
1152
+ invalid_replace_sym = ID2SYM(rb_intern("invalid_replace")); rb_gc_register_address(&invalid_replace_sym);
1000
1153
  limited_sym = ID2SYM(rb_intern("limited")); rb_gc_register_address(&limited_sym);
1001
1154
  mode_sym = ID2SYM(rb_intern("mode")); rb_gc_register_address(&mode_sym);
1002
1155
  object_sym = ID2SYM(rb_intern("object")); rb_gc_register_address(&object_sym);
@@ -1006,6 +1159,7 @@ void Init_ox() {
1006
1159
  skip_none_sym = ID2SYM(rb_intern("skip_none")); rb_gc_register_address(&skip_none_sym);
1007
1160
  skip_return_sym = ID2SYM(rb_intern("skip_return")); rb_gc_register_address(&skip_return_sym);
1008
1161
  skip_sym = ID2SYM(rb_intern("skip")); rb_gc_register_address(&skip_sym);
1162
+ strip_namespace_sym = ID2SYM(rb_intern("strip_namespace")); rb_gc_register_address(&strip_namespace_sym);
1009
1163
  skip_white_sym = ID2SYM(rb_intern("skip_white")); rb_gc_register_address(&skip_white_sym);
1010
1164
  smart_sym = ID2SYM(rb_intern("smart")); rb_gc_register_address(&smart_sym);
1011
1165
  strict_sym = ID2SYM(rb_intern("strict")); rb_gc_register_address(&strict_sym);
@@ -133,6 +133,9 @@ typedef struct _Options {
133
133
  char skip; /* skip mode */
134
134
  char smart; /* YesNo sax smart mode */
135
135
  char convert_special;/* boolean true or false */
136
+ char allow_invalid; /* YesNo */
137
+ char inv_repl[12]; /* max 10 valid characters, first character is the length */
138
+ char strip_ns[64]; /* namespace to strip, \0 is no-strip, \* is all, else only matches */
136
139
  #if HAS_ENCODING_SUPPORT
137
140
  rb_encoding *rb_enc;
138
141
  #elif HAS_PRIVATE_ENCODING
@@ -43,6 +43,8 @@ static int collapse_special(PInfo pi, char *str);
43
43
  * all cases to parse the string.
44
44
  */
45
45
 
46
+ static char xml_valid_lower_chars[34] = "xxxxxxxxxooxxoxxxxxxxxxxxxxxxxxxo";
47
+
46
48
  inline static int
47
49
  is_white(char c) {
48
50
  switch (c) {
@@ -95,6 +97,7 @@ VALUE
95
97
  ox_parse(char *xml, ParseCallbacks pcb, char **endp, Options options, Err err) {
96
98
  struct _PInfo pi;
97
99
  int body_read = 0;
100
+ int block_given = rb_block_given_p();
98
101
 
99
102
  if (0 == xml) {
100
103
  set_error(err, "Invalid arg, xml string can not be null", xml, 0);
@@ -171,6 +174,9 @@ ox_parse(char *xml, ParseCallbacks pcb, char **endp, Options options, Err err) {
171
174
  helper_stack_cleanup(&pi.helpers);
172
175
  return Qnil;
173
176
  }
177
+ if (block_given && Qnil != pi.obj && Qundef != pi.obj) {
178
+ rb_yield(pi.obj);
179
+ }
174
180
  }
175
181
  helper_stack_cleanup(&pi.helpers);
176
182
  return pi.obj;
@@ -211,7 +217,6 @@ read_instruction(PInfo pi) {
211
217
  char *cend;
212
218
  int attrs_ok = 1;
213
219
 
214
-
215
220
  *content = '\0';
216
221
  attr_stack_init(&attrs);
217
222
  if (0 == (target = read_name_token(pi))) {
@@ -567,9 +572,32 @@ read_element(PInfo pi) {
567
572
  return 0;
568
573
  }
569
574
  if (first && start != slash - 1) {
570
- /* some white space between start and here so add as text */
575
+ // Some white space between start and here so add as
576
+ // text after checking skip.
571
577
  *(slash - 1) = '\0';
572
- pi->pcb->add_text(pi, start, 1);
578
+ switch (pi->options->skip) {
579
+ case CrSkip: {
580
+ char *s = start;
581
+ char *e = start;
582
+
583
+ for (; '\0' != *e; e++) {
584
+ if ('\r' != *e) {
585
+ *s++ = *e;
586
+ }
587
+ }
588
+ *s = '\0';
589
+ break;
590
+ }
591
+ case SpcSkip:
592
+ *start = '\0';
593
+ break;
594
+ case NoSkip:
595
+ default:
596
+ break;
597
+ }
598
+ if ('\0' != *start) {
599
+ pi->pcb->add_text(pi, start, 1);
600
+ }
573
601
  }
574
602
  pi->s++;
575
603
  pi->pcb->end_element(pi, ename);
@@ -667,27 +695,35 @@ read_text(PInfo pi) {
667
695
  return;
668
696
  }
669
697
  } else {
670
- switch (pi->options->skip) {
671
- case CrSkip:
672
- if (buf != b && '\n' == c && '\r' == *(b - 1)) {
673
- *(b - 1) = '\n';
674
- } else {
675
- *b++ = c;
698
+ if (0 <= c && c <= 0x20) {
699
+ if (StrictEffort == pi->options->effort && 'x' == xml_valid_lower_chars[(unsigned char)c]) {
700
+ set_error(&pi->err, "invalid character", pi->str, pi->s);
701
+ return;
676
702
  }
677
- break;
678
- case SpcSkip:
679
- if (is_white(c)) {
680
- if (buf == b || ' ' != *(b - 1)) {
681
- *b++ = ' ';
703
+ switch (pi->options->skip) {
704
+ case CrSkip:
705
+ if (buf != b && '\n' == c && '\r' == *(b - 1)) {
706
+ *(b - 1) = '\n';
707
+ } else {
708
+ *b++ = c;
682
709
  }
683
- } else {
710
+ break;
711
+ case SpcSkip:
712
+ if (is_white(c)) {
713
+ if (buf == b || ' ' != *(b - 1)) {
714
+ *b++ = ' ';
715
+ }
716
+ } else {
717
+ *b++ = c;
718
+ }
719
+ break;
720
+ case NoSkip:
721
+ default:
684
722
  *b++ = c;
685
- }
686
- break;
687
- case NoSkip:
688
- default:
723
+ break;
724
+ }
725
+ } else {
689
726
  *b++ = c;
690
- break;
691
727
  }
692
728
  }
693
729
  break;
@@ -795,6 +831,15 @@ read_name_token(PInfo pi) {
795
831
  set_error(&pi->err, "invalid format, document not terminated", pi->str, pi->s);
796
832
  return 0;
797
833
  break; /* to avoid warnings */
834
+ case ':':
835
+ if ('\0' == *pi->options->strip_ns) {
836
+ break;
837
+ } else if ('*' == *pi->options->strip_ns && '\0' == pi->options->strip_ns[1]) {
838
+ start = pi->s + 1;
839
+ } else if (0 == strncmp(pi->options->strip_ns, start, pi->s - start)) {
840
+ start = pi->s + 1;
841
+ }
842
+ break;
798
843
  default:
799
844
  break;
800
845
  }
@@ -1401,6 +1401,15 @@ read_name_token(SaxDrive dr) {
1401
1401
  /* documents never terminate after a name token */
1402
1402
  ox_sax_drive_error(dr, NO_TERM "document not terminated");
1403
1403
  return '\0';
1404
+ case ':':
1405
+ if ('\0' == *dr->options.strip_ns) {
1406
+ break;
1407
+ } else if ('*' == *dr->options.strip_ns && '\0' == dr->options.strip_ns[1]) {
1408
+ dr->buf.str = dr->buf.tail;
1409
+ } else if (0 == strncmp(dr->options.strip_ns, dr->buf.str, dr->buf.tail - dr->buf.str - 1)) {
1410
+ dr->buf.str = dr->buf.tail;
1411
+ }
1412
+ break;
1404
1413
  default:
1405
1414
  break;
1406
1415
  }
@@ -17,6 +17,7 @@ typedef struct _SaxOptions {
17
17
  int convert_special;
18
18
  int smart;
19
19
  SkipMode skip;
20
+ char strip_ns[64];
20
21
  } *SaxOptions;
21
22
 
22
23
  typedef struct _SaxDrive {
@@ -30,11 +30,14 @@ static int read_from_str(Buf buf);
30
30
 
31
31
  void
32
32
  ox_sax_buf_init(Buf buf, VALUE io) {
33
- VALUE io_class = rb_obj_class(io);
34
- VALUE rfd;
33
+ volatile VALUE io_class = rb_obj_class(io);
34
+ VALUE rfd;
35
35
 
36
- if (ox_stringio_class == io_class && 0 == FIX2INT(rb_funcall2(io, ox_pos_id, 0, 0))) {
37
- VALUE s = rb_funcall2(io, ox_string_id, 0, 0);
36
+ if (rb_cString == io_class) {
37
+ buf->read_func = read_from_str;
38
+ buf->in.str = StringValuePtr(io);
39
+ } else if (ox_stringio_class == io_class && 0 == FIX2INT(rb_funcall2(io, ox_pos_id, 0, 0))) {
40
+ volatile VALUE s = rb_funcall2(io, ox_string_id, 0, 0);
38
41
 
39
42
  buf->read_func = read_from_str;
40
43
  buf->in.str = StringValuePtr(s);
@@ -40,8 +40,8 @@ module Ox
40
40
  # @param [String] name name of the Element
41
41
  def initialize(name)
42
42
  super
43
- @attributes = nil
44
- @nodes = nil
43
+ @attributes = {}
44
+ @nodes = []
45
45
  end
46
46
  alias name value
47
47
 
@@ -49,7 +49,7 @@ module Ox
49
49
  # Element.
50
50
  # @return [Array] all child Nodes.
51
51
  def nodes
52
- return [] if !instance_variable_defined?(:@nodes) or @nodes.nil?
52
+ @nodes = [] if !instance_variable_defined?(:@nodes) or @nodes.nil?
53
53
  @nodes
54
54
  end
55
55
 
@@ -83,6 +83,15 @@ module Ox
83
83
  nil
84
84
  end
85
85
 
86
+ # Clears any child nodes of an element and replaces those with a single Text
87
+ # (String) node. Note the existing nodes array is modified and not replaced.
88
+ # @param [String] txt to become the only element of the nodes array
89
+ def replace_text(txt)
90
+ raise "the argument to replace_text() must be a String" unless txt.is_a?(String)
91
+ @nodes.clear()
92
+ @nodes << txt
93
+ end
94
+
86
95
  # Returns an array of Nodes or Strings that correspond to the locations
87
96
  # specified by the path parameter. The path parameter describes the path
88
97
  # to the return values which can be either nodes in the XML or
@@ -1,5 +1,5 @@
1
1
 
2
2
  module Ox
3
3
  # Current version of the module.
4
- VERSION = '2.2.4'
4
+ VERSION = '2.3.0'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ox
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.4
4
+ version: 2.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Peter Ohler
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-02-04 00:00:00.000000000 Z
11
+ date: 2016-02-21 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: "A fast XML parser and object serializer that uses only standard C lib.\n
14
14
  \ \nOptimized XML (Ox), as the name implies was written to provide speed