ox 1.4.6 → 1.5.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of ox might be problematic. Click here for more details.

data/README.md CHANGED
@@ -24,11 +24,17 @@ A fast XML parser and Object marshaller as a Ruby gem.
24
24
 
25
25
  [Fast Ruby XML Serialization](http://www.ohler.com/software/thoughts/Blog/Entries/2011/9/20_Ruby_Object_XML_Serialization.html) to see how Ox can be used as a faster replacement for Marshal.
26
26
 
27
+ *Fast JSON parser and marshaller on RubyGems*: https://rubygems.org/gems/oj
28
+
29
+ *Fast JSON parser and marshaller on GitHub*: https://rubygems.org/gems/oj
30
+
27
31
  ## <a name="release">Release Notes</a>
28
32
 
29
- ### Release 1.4.6
33
+ ### Release 1.5.0
34
+
35
+ - Changed to use escape sequences in XML instead of using B64 encoding. B64 encoding type is still supported on read.
30
36
 
31
- - Adopted the Exception message encoding hack to work for the latests ruby versions.
37
+ - Prefixed extern C functions with ox_ to avoid conflicts.
32
38
 
33
39
  ## <a name="description">Description</a>
34
40
 
@@ -44,6 +44,8 @@
44
44
  #include "cache8.h"
45
45
  #include "ox.h"
46
46
 
47
+ #define USE_B64 0
48
+
47
49
  typedef unsigned long ulong;
48
50
 
49
51
  typedef struct _Str {
@@ -93,6 +95,7 @@ static void dump_end(Out out, Element e);
93
95
  static void grow(Out out, size_t len);
94
96
 
95
97
  static void dump_value(Out out, const char *value, size_t size);
98
+ static void dump_str_value(Out out, const char *value, size_t size);
96
99
  static int dump_var(ID key, VALUE value, Out out);
97
100
  static void dump_num(Out out, VALUE obj);
98
101
  static void dump_time_thin(Out out, VALUE obj);
@@ -101,27 +104,47 @@ static int dump_hash(VALUE key, VALUE value, Out out);
101
104
 
102
105
  static int is_xml_friendly(const u_char *str, int len);
103
106
 
107
+ static const char hex_chars[17] = "0123456789abcdef";
104
108
 
105
109
  static char xml_friendly_chars[256] = "\
106
- xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\
107
- ooxoooxxooooooooooooooooooooxoxo\
108
- oooooooooooooooooooooooooooooooo\
109
- xoooooooooooooooooooooooooooooox\
110
- xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\
111
- xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\
112
- xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx\
113
- xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx";
110
+ 88888888811881888888888888888888\
111
+ 11611156111111111111111111114141\
112
+ 11111111111111111111111111111111\
113
+ 1111111111111111111111111111111x\
114
+ 11111111111111111111111111111111\
115
+ 11111111111111111111111111111111\
116
+ 11111111111111111111111111111111\
117
+ 11111111111111111111111111111111";
114
118
 
115
119
  inline static int
116
120
  is_xml_friendly(const u_char *str, int len) {
117
121
  for (; 0 < len; str++, len--) {
118
- if ('x' == xml_friendly_chars[*str]) {
122
+ if ('1' != xml_friendly_chars[*str]) {
119
123
  return 0;
120
124
  }
121
125
  }
122
126
  return 1;
123
127
  }
124
128
 
129
+ inline static size_t
130
+ xml_str_len(const u_char *str, size_t len) {
131
+ size_t size = 0;
132
+
133
+ for (; 0 < len; str++, len--) {
134
+ size += xml_friendly_chars[*str];
135
+ }
136
+ return size - len * (size_t)'0';
137
+ }
138
+
139
+ inline static void
140
+ dump_hex(u_char c, Out out) {
141
+ u_char d = (c >> 4) & 0x0F;
142
+
143
+ *out->cur++ = hex_chars[d];
144
+ d = c & 0x0F;
145
+ *out->cur++ = hex_chars[d];
146
+ }
147
+
125
148
  inline static Type
126
149
  obj_class_code(VALUE obj) {
127
150
  switch (rb_type(obj)) {
@@ -319,6 +342,59 @@ dump_value(Out out, const char *value, size_t size) {
319
342
  *out->cur = '\0';
320
343
  }
321
344
 
345
+ inline static void
346
+ dump_str_value(Out out, const char *value, size_t size) {
347
+ size_t xsize = xml_str_len((const u_char*)value, size);
348
+
349
+ if (out->end - out->cur <= (long)xsize) {
350
+ grow(out, xsize);
351
+ }
352
+ for (; '\0' != *value; value++) {
353
+ if ('1' == xml_friendly_chars[(u_char)*value]) {
354
+ *out->cur++ = *value;
355
+ } else {
356
+ *out->cur++ = '&';
357
+ switch (*value) {
358
+ case '"':
359
+ *out->cur++ = 'q';
360
+ *out->cur++ = 'u';
361
+ *out->cur++ = 'o';
362
+ *out->cur++ = 't';
363
+ break;
364
+ case '&':
365
+ *out->cur++ = 'a';
366
+ *out->cur++ = 'm';
367
+ *out->cur++ = 'p';
368
+ break;
369
+ case '\'':
370
+ *out->cur++ = 'a';
371
+ *out->cur++ = 'p';
372
+ *out->cur++ = 'o';
373
+ *out->cur++ = 's';
374
+ break;
375
+ case '<':
376
+ *out->cur++ = 'l';
377
+ *out->cur++ = 't';
378
+ break;
379
+ case '>':
380
+ *out->cur++ = 'g';
381
+ *out->cur++ = 't';
382
+ break;
383
+ default:
384
+ *out->cur++ = '&';
385
+ *out->cur++ = '#';
386
+ *out->cur++ = 'x';
387
+ *out->cur++ = '0';
388
+ *out->cur++ = '0';
389
+ dump_hex(*value, out);
390
+ break;
391
+ }
392
+ *out->cur++ = ';';
393
+ }
394
+ }
395
+ *out->cur = '\0';
396
+ }
397
+
322
398
  inline static void
323
399
  dump_num(Out out, VALUE obj) {
324
400
  char buf[32];
@@ -356,8 +432,8 @@ static void
356
432
  dump_time_thin(Out out, VALUE obj) {
357
433
  char buf[64];
358
434
  char *b = buf + sizeof(buf) - 1;
359
- time_t sec = NUM2LONG(rb_funcall2(obj, tv_sec_id, 0, 0));
360
- long usec = NUM2LONG(rb_funcall2(obj, tv_usec_id, 0, 0));
435
+ time_t sec = NUM2LONG(rb_funcall2(obj, ox_tv_sec_id, 0, 0));
436
+ long usec = NUM2LONG(rb_funcall2(obj, ox_tv_usec_id, 0, 0));
361
437
  char *dot = b - 7;
362
438
  long size;
363
439
 
@@ -381,8 +457,8 @@ dump_time_thin(Out out, VALUE obj) {
381
457
  static void
382
458
  dump_time_xsd(Out out, VALUE obj) {
383
459
  struct tm *tm;
384
- time_t sec = NUM2LONG(rb_funcall2(obj, tv_sec_id, 0, 0));
385
- long usec = NUM2LONG(rb_funcall2(obj, tv_usec_id, 0, 0));
460
+ time_t sec = NUM2LONG(rb_funcall2(obj, ox_tv_sec_id, 0, 0));
461
+ long usec = NUM2LONG(rb_funcall2(obj, ox_tv_usec_id, 0, 0));
386
462
  int tzhour, tzmin;
387
463
  char tzsign = '+';
388
464
 
@@ -537,10 +613,11 @@ dump_obj(ID aid, VALUE obj, unsigned int depth, Out out) {
537
613
  }
538
614
  str = StringValuePtr(obj);
539
615
  cnt = (int)RSTRING_LEN(obj);
616
+ #if USE_B64
540
617
  if (is_xml_friendly((u_char*)str, cnt)) {
541
618
  e.type = StringCode;
542
619
  out->w_start(out, &e);
543
- dump_value(out, str, cnt);
620
+ dump_str_value(out, str, cnt);
544
621
  e.indent = -1;
545
622
  out->w_end(out, &e);
546
623
  } else {
@@ -563,6 +640,13 @@ dump_obj(ID aid, VALUE obj, unsigned int depth, Out out) {
563
640
  free(b64);
564
641
  }
565
642
  }
643
+ #else
644
+ e.type = StringCode;
645
+ out->w_start(out, &e);
646
+ dump_str_value(out, str, cnt);
647
+ e.indent = -1;
648
+ out->w_end(out, &e);
649
+ #endif
566
650
  break;
567
651
  }
568
652
  case T_SYMBOL:
@@ -570,10 +654,11 @@ dump_obj(ID aid, VALUE obj, unsigned int depth, Out out) {
570
654
  const char *sym = rb_id2name(SYM2ID(obj));
571
655
 
572
656
  cnt = (int)strlen(sym);
657
+ #if USE_B64
573
658
  if (is_xml_friendly((u_char*)sym, cnt)) {
574
659
  e.type = SymbolCode;
575
660
  out->w_start(out, &e);
576
- dump_value(out, sym, cnt);
661
+ dump_str_value(out, sym, cnt);
577
662
  e.indent = -1;
578
663
  out->w_end(out, &e);
579
664
  } else {
@@ -596,6 +681,13 @@ dump_obj(ID aid, VALUE obj, unsigned int depth, Out out) {
596
681
  free(b64);
597
682
  }
598
683
  }
684
+ #else
685
+ e.type = SymbolCode;
686
+ out->w_start(out, &e);
687
+ dump_str_value(out, sym, cnt);
688
+ e.indent = -1;
689
+ out->w_end(out, &e);
690
+ #endif
599
691
  break;
600
692
  }
601
693
  case T_DATA:
@@ -641,9 +733,9 @@ dump_obj(ID aid, VALUE obj, unsigned int depth, Out out) {
641
733
 
642
734
  e.type = RangeCode; e.clas.len = 5; e.clas.str = "Range";
643
735
  out->w_start(out, &e);
644
- dump_obj(beg_id, beg, d2, out);
645
- dump_obj(end_id, end, d2, out);
646
- dump_obj(excl_id, excl, d2, out);
736
+ dump_obj(ox_beg_id, beg, d2, out);
737
+ dump_obj(ox_end_id, end, d2, out);
738
+ dump_obj(ox_excl_id, excl, d2, out);
647
739
  out->w_end(out, &e);
648
740
  } else {
649
741
  char num_buf[16];
@@ -730,7 +822,7 @@ dump_obj(ID aid, VALUE obj, unsigned int depth, Out out) {
730
822
  case T_REGEXP:
731
823
  {
732
824
  #if 1
733
- VALUE rs = rb_funcall2(obj, inspect_id, 0, 0);
825
+ VALUE rs = rb_funcall2(obj, ox_inspect_id, 0, 0);
734
826
  const char *s = StringValuePtr(rs);
735
827
 
736
828
  cnt = (int)RSTRING_LEN(rs);
@@ -742,9 +834,10 @@ dump_obj(ID aid, VALUE obj, unsigned int depth, Out out) {
742
834
  #endif
743
835
  e.type = RegexpCode;
744
836
  out->w_start(out, &e);
837
+ #if USE_B64
745
838
  if (is_xml_friendly((u_char*)s, cnt)) {
746
839
  //dump_value(out, "/", 1);
747
- dump_value(out, s, cnt);
840
+ dump_str_value(out, s, cnt);
748
841
  } else {
749
842
  char buf64[4096];
750
843
  char *b64 = buf64;
@@ -761,6 +854,9 @@ dump_obj(ID aid, VALUE obj, unsigned int depth, Out out) {
761
854
  free(b64);
762
855
  }
763
856
  }
857
+ #else
858
+ dump_str_value(out, s, cnt);
859
+ #endif
764
860
  #if 0
765
861
  dump_value(out, "/", 1);
766
862
  if (0 != (ONIG_OPTION_MULTILINE & options)) {
@@ -831,7 +927,7 @@ dump_obj(ID aid, VALUE obj, unsigned int depth, Out out) {
831
927
 
832
928
  static int
833
929
  dump_var(ID key, VALUE value, Out out) {
834
- if (T_DATA == rb_type(value) && key == mesg_id) {
930
+ if (T_DATA == rb_type(value) && key == ox_mesg_id) {
835
931
  /* There is a secret recipe that keeps Exception mesg attributes as a
836
932
  * T_DATA until it is needed. The safe way around this hack is to call
837
933
  * the message() method and use the returned string as the
@@ -839,7 +935,7 @@ dump_var(ID key, VALUE value, Out out) {
839
935
  * hack. If there are others they will have to be handled one at a
840
936
  * time.
841
937
  */
842
- value = rb_funcall(out->obj, message_id, 0);
938
+ value = rb_funcall(out->obj, ox_message_id, 0);
843
939
  }
844
940
  dump_obj(key, value, out->depth, out);
845
941
 
@@ -856,11 +952,11 @@ dump_hash(VALUE key, VALUE value, Out out) {
856
952
 
857
953
  static void
858
954
  dump_gen_doc(VALUE obj, unsigned int depth, Out out) {
859
- VALUE attrs = rb_attr_get(obj, attributes_id);
860
- VALUE nodes = rb_attr_get(obj, nodes_id);
955
+ VALUE attrs = rb_attr_get(obj, ox_attributes_id);
956
+ VALUE nodes = rb_attr_get(obj, ox_nodes_id);
861
957
 
862
958
  if ('\0' == *out->opts->encoding && Qnil != attrs) {
863
- VALUE renc = rb_hash_lookup(attrs, encoding_sym);
959
+ VALUE renc = rb_hash_lookup(attrs, ox_encoding_sym);
864
960
 
865
961
  if (Qnil != renc) {
866
962
  const char *enc = StringValuePtr(renc);
@@ -872,7 +968,7 @@ dump_gen_doc(VALUE obj, unsigned int depth, Out out) {
872
968
  dump_value(out, "<?xml", 5);
873
969
  if (Qnil != attrs) {
874
970
  rb_hash_foreach(attrs, dump_gen_attr, (VALUE)out);
875
- }
971
+ }
876
972
  dump_value(out, "?>", 2);
877
973
  }
878
974
  if (Yes == out->opts->with_instruct) {
@@ -889,9 +985,9 @@ dump_gen_doc(VALUE obj, unsigned int depth, Out out) {
889
985
 
890
986
  static void
891
987
  dump_gen_element(VALUE obj, unsigned int depth, Out out) {
892
- VALUE rname = rb_attr_get(obj, value_id);
893
- VALUE attrs = rb_attr_get(obj, attributes_id);
894
- VALUE nodes = rb_attr_get(obj, nodes_id);
988
+ VALUE rname = rb_attr_get(obj, ox_value_id);
989
+ VALUE attrs = rb_attr_get(obj, ox_attributes_id);
990
+ VALUE nodes = rb_attr_get(obj, ox_nodes_id);
895
991
  const char *name = StringValuePtr(rname);
896
992
  long nlen = RSTRING_LEN(rname);
897
993
  size_t size;
@@ -950,7 +1046,7 @@ dump_gen_nodes(VALUE obj, unsigned int depth, Out out) {
950
1046
  if (ox_element_clas == clas) {
951
1047
  dump_gen_element(*np, d2, out);
952
1048
  } else if (rb_cString == clas) {
953
- dump_value(out, StringValuePtr(*np), RSTRING_LEN(*np));
1049
+ dump_str_value(out, StringValuePtr(*np), RSTRING_LEN(*np));
954
1050
  indent_needed = (1 == cnt) ? 0 : 1;
955
1051
  } else if (ox_comment_clas == clas) {
956
1052
  dump_gen_val_node(*np, d2, "<!-- ", 5, " -->", 4, out);
@@ -989,7 +1085,7 @@ static void
989
1085
  dump_gen_val_node(VALUE obj, unsigned int depth,
990
1086
  const char *pre, size_t plen,
991
1087
  const char *suf, size_t slen, Out out) {
992
- VALUE v = rb_attr_get(obj, value_id);
1088
+ VALUE v = rb_attr_get(obj, ox_value_id);
993
1089
  const char *val;
994
1090
  size_t vlen;
995
1091
  size_t size;
@@ -1050,7 +1146,7 @@ dump_obj_to_xml(VALUE obj, Options copts, Out out) {
1050
1146
  }
1051
1147
 
1052
1148
  char*
1053
- write_obj_to_str(VALUE obj, Options copts) {
1149
+ ox_write_obj_to_str(VALUE obj, Options copts) {
1054
1150
  struct _Out out;
1055
1151
 
1056
1152
  dump_obj_to_xml(obj, copts, &out);
@@ -1058,7 +1154,7 @@ write_obj_to_str(VALUE obj, Options copts) {
1058
1154
  }
1059
1155
 
1060
1156
  void
1061
- write_obj_to_file(VALUE obj, const char *path, Options copts) {
1157
+ ox_write_obj_to_file(VALUE obj, const char *path, Options copts) {
1062
1158
  struct _Out out;
1063
1159
  size_t size;
1064
1160
  FILE *f;
@@ -94,8 +94,8 @@ create_doc(PInfo pi) {
94
94
  pi->h = pi->helpers;
95
95
  doc = rb_obj_alloc(ox_document_clas);
96
96
  nodes = rb_ary_new();
97
- rb_ivar_set(doc, attributes_id, rb_hash_new());
98
- rb_ivar_set(doc, nodes_id, nodes);
97
+ rb_ivar_set(doc, ox_attributes_id, rb_hash_new());
98
+ rb_ivar_set(doc, ox_nodes_id, nodes);
99
99
  pi->h->obj = nodes;
100
100
  pi->obj = doc;
101
101
  }
@@ -119,7 +119,7 @@ create_prolog_doc(PInfo pi, const char *target, Attr attrs) {
119
119
  VALUE rstr = rb_str_new2(attrs->name);
120
120
 
121
121
  rb_enc_associate(rstr, pi->encoding);
122
- sym = rb_funcall(rstr, to_sym_id, 0);
122
+ sym = rb_funcall(rstr, ox_to_sym_id, 0);
123
123
  } else {
124
124
  sym = ID2SYM(rb_intern(attrs->name));
125
125
  }
@@ -134,8 +134,8 @@ create_prolog_doc(PInfo pi, const char *target, Attr attrs) {
134
134
  #endif
135
135
  }
136
136
  nodes = rb_ary_new();
137
- rb_ivar_set(doc, attributes_id, ah);
138
- rb_ivar_set(doc, nodes_id, nodes);
137
+ rb_ivar_set(doc, ox_attributes_id, ah);
138
+ rb_ivar_set(doc, ox_nodes_id, nodes);
139
139
  pi->h->obj = nodes;
140
140
  pi->obj = doc;
141
141
  }
@@ -203,7 +203,7 @@ add_doctype(PInfo pi, const char *docType) {
203
203
  rb_enc_associate(s, pi->encoding);
204
204
  }
205
205
  #endif
206
- rb_ivar_set(n, value_id, s);
206
+ rb_ivar_set(n, ox_value_id, s);
207
207
  if (0 == pi->h) { // top level object
208
208
  create_doc(pi);
209
209
  }
@@ -220,7 +220,7 @@ add_comment(PInfo pi, const char *comment) {
220
220
  rb_enc_associate(s, pi->encoding);
221
221
  }
222
222
  #endif
223
- rb_ivar_set(n, value_id, s);
223
+ rb_ivar_set(n, ox_value_id, s);
224
224
  if (0 == pi->h) { // top level object
225
225
  create_doc(pi);
226
226
  }
@@ -237,7 +237,7 @@ add_cdata(PInfo pi, const char *cdata, size_t len) {
237
237
  rb_enc_associate(s, pi->encoding);
238
238
  }
239
239
  #endif
240
- rb_ivar_set(n, value_id, s);
240
+ rb_ivar_set(n, ox_value_id, s);
241
241
  if (0 == pi->h) { // top level object
242
242
  create_doc(pi);
243
243
  }
@@ -270,7 +270,7 @@ add_element(PInfo pi, const char *ename, Attr attrs, int hasChildren) {
270
270
  }
271
271
  #endif
272
272
  e = rb_obj_alloc(ox_element_clas);
273
- rb_ivar_set(e, value_id, s);
273
+ rb_ivar_set(e, ox_value_id, s);
274
274
  if (0 != attrs->name) {
275
275
  VALUE ah = rb_hash_new();
276
276
 
@@ -278,13 +278,13 @@ add_element(PInfo pi, const char *ename, Attr attrs, int hasChildren) {
278
278
  VALUE sym;
279
279
  VALUE *slot;
280
280
 
281
- if (Qundef == (sym = ox_cache_get(symbol_cache, attrs->name, &slot))) {
281
+ if (Qundef == (sym = ox_cache_get(ox_symbol_cache, attrs->name, &slot))) {
282
282
  #ifdef HAVE_RUBY_ENCODING_H
283
283
  if (0 != pi->encoding) {
284
284
  VALUE rstr = rb_str_new2(attrs->name);
285
285
 
286
286
  rb_enc_associate(rstr, pi->encoding);
287
- sym = rb_funcall(rstr, to_sym_id, 0);
287
+ sym = rb_funcall(rstr, ox_to_sym_id, 0);
288
288
  } else {
289
289
  sym = ID2SYM(rb_intern(attrs->name));
290
290
  }
@@ -301,7 +301,7 @@ add_element(PInfo pi, const char *ename, Attr attrs, int hasChildren) {
301
301
  #endif
302
302
  rb_hash_aset(ah, sym, s);
303
303
  }
304
- rb_ivar_set(e, attributes_id, ah);
304
+ rb_ivar_set(e, ox_attributes_id, ah);
305
305
  }
306
306
  if (0 == pi->h) { // top level object
307
307
  pi->h = pi->helpers;
@@ -313,7 +313,7 @@ add_element(PInfo pi, const char *ename, Attr attrs, int hasChildren) {
313
313
  if (hasChildren) {
314
314
  VALUE nodes = rb_ary_new();
315
315
 
316
- rb_ivar_set(e, nodes_id, nodes);
316
+ rb_ivar_set(e, ox_nodes_id, nodes);
317
317
  pi->h->obj = nodes;
318
318
  }
319
319
  }