ox 1.6.6 → 1.6.7

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of ox might be problematic. Click here for more details.

data/README.md CHANGED
@@ -34,9 +34,9 @@ A fast XML parser and Object marshaller as a Ruby gem.
34
34
 
35
35
  ## <a name="release">Release Notes</a>
36
36
 
37
- ### Release 1.6.6
37
+ ### Release 1.6.7
38
38
 
39
- - A SAX parser encoding bug discovered by bughit was fixed.
39
+ - Now uses the encoding of the imput XML as the default encoding for the parsed output if the default options encoding is not set and the encoding is not set in the XML file prolog.
40
40
 
41
41
  ## <a name="description">Description</a>
42
42
 
data/ext/ox/gen_load.c CHANGED
@@ -116,10 +116,10 @@ create_prolog_doc(PInfo pi, const char *target, Attr attrs) {
116
116
  for (; 0 != attrs->name; attrs++) {
117
117
  if (Yes == pi->options->sym_keys) {
118
118
  #if HAS_ENCODING_SUPPORT
119
- if (0 != pi->encoding) {
119
+ if (0 != pi->options->rb_enc) {
120
120
  VALUE rstr = rb_str_new2(attrs->name);
121
121
 
122
- rb_enc_associate(rstr, pi->encoding);
122
+ rb_enc_associate(rstr, pi->options->rb_enc);
123
123
  sym = rb_funcall(rstr, ox_to_sym_id, 0);
124
124
  } else {
125
125
  sym = ID2SYM(rb_intern(attrs->name));
@@ -132,15 +132,15 @@ create_prolog_doc(PInfo pi, const char *target, Attr attrs) {
132
132
  VALUE rstr = rb_str_new2(attrs->name);
133
133
 
134
134
  #if HAS_ENCODING_SUPPORT
135
- if (0 != pi->encoding) {
136
- rb_enc_associate(rstr, pi->encoding);
135
+ if (0 != pi->options->rb_enc) {
136
+ rb_enc_associate(rstr, pi->options->rb_enc);
137
137
  }
138
138
  #endif
139
139
  rb_hash_aset(ah, rstr, rb_str_new2(attrs->value));
140
140
  }
141
141
  #if HAS_ENCODING_SUPPORT
142
142
  if (0 == strcmp("encoding", attrs->name)) {
143
- pi->encoding = rb_enc_find(attrs->value);
143
+ pi->options->rb_enc = rb_enc_find(attrs->value);
144
144
  }
145
145
  #endif
146
146
  }
@@ -210,8 +210,8 @@ add_doctype(PInfo pi, const char *docType) {
210
210
  VALUE s = rb_str_new2(docType);
211
211
 
212
212
  #if HAS_ENCODING_SUPPORT
213
- if (0 != pi->encoding) {
214
- rb_enc_associate(s, pi->encoding);
213
+ if (0 != pi->options->rb_enc) {
214
+ rb_enc_associate(s, pi->options->rb_enc);
215
215
  }
216
216
  #endif
217
217
  rb_ivar_set(n, ox_at_value_id, s);
@@ -227,8 +227,8 @@ add_comment(PInfo pi, const char *comment) {
227
227
  VALUE s = rb_str_new2(comment);
228
228
 
229
229
  #if HAS_ENCODING_SUPPORT
230
- if (0 != pi->encoding) {
231
- rb_enc_associate(s, pi->encoding);
230
+ if (0 != pi->options->rb_enc) {
231
+ rb_enc_associate(s, pi->options->rb_enc);
232
232
  }
233
233
  #endif
234
234
  rb_ivar_set(n, ox_at_value_id, s);
@@ -244,8 +244,8 @@ add_cdata(PInfo pi, const char *cdata, size_t len) {
244
244
  VALUE s = rb_str_new2(cdata);
245
245
 
246
246
  #if HAS_ENCODING_SUPPORT
247
- if (0 != pi->encoding) {
248
- rb_enc_associate(s, pi->encoding);
247
+ if (0 != pi->options->rb_enc) {
248
+ rb_enc_associate(s, pi->options->rb_enc);
249
249
  }
250
250
  #endif
251
251
  rb_ivar_set(n, ox_at_value_id, s);
@@ -260,8 +260,8 @@ add_text(PInfo pi, char *text, int closed) {
260
260
  VALUE s = rb_str_new2(text);
261
261
 
262
262
  #if HAS_ENCODING_SUPPORT
263
- if (0 != pi->encoding) {
264
- rb_enc_associate(s, pi->encoding);
263
+ if (0 != pi->options->rb_enc) {
264
+ rb_enc_associate(s, pi->options->rb_enc);
265
265
  }
266
266
  #endif
267
267
  if (0 == pi->h) { /* top level object */
@@ -276,8 +276,8 @@ add_element(PInfo pi, const char *ename, Attr attrs, int hasChildren) {
276
276
  VALUE s = rb_str_new2(ename);
277
277
 
278
278
  #if HAS_ENCODING_SUPPORT
279
- if (0 != pi->encoding) {
280
- rb_enc_associate(s, pi->encoding);
279
+ if (0 != pi->options->rb_enc) {
280
+ rb_enc_associate(s, pi->options->rb_enc);
281
281
  }
282
282
  #endif
283
283
  e = rb_obj_alloc(ox_element_clas);
@@ -292,10 +292,10 @@ add_element(PInfo pi, const char *ename, Attr attrs, int hasChildren) {
292
292
  if (Yes == pi->options->sym_keys) {
293
293
  if (Qundef == (sym = ox_cache_get(ox_symbol_cache, attrs->name, &slot))) {
294
294
  #if HAS_ENCODING_SUPPORT
295
- if (0 != pi->encoding) {
295
+ if (0 != pi->options->rb_enc) {
296
296
  VALUE rstr = rb_str_new2(attrs->name);
297
297
 
298
- rb_enc_associate(rstr, pi->encoding);
298
+ rb_enc_associate(rstr, pi->options->rb_enc);
299
299
  sym = rb_funcall(rstr, ox_to_sym_id, 0);
300
300
  } else {
301
301
  sym = ID2SYM(rb_intern(attrs->name));
@@ -308,15 +308,15 @@ add_element(PInfo pi, const char *ename, Attr attrs, int hasChildren) {
308
308
  } else {
309
309
  sym = rb_str_new2(attrs->name);
310
310
  #if HAS_ENCODING_SUPPORT
311
- if (0 != pi->encoding) {
312
- rb_enc_associate(sym, pi->encoding);
311
+ if (0 != pi->options->rb_enc) {
312
+ rb_enc_associate(sym, pi->options->rb_enc);
313
313
  }
314
314
  #endif
315
315
  }
316
316
  s = rb_str_new2(attrs->value);
317
317
  #if HAS_ENCODING_SUPPORT
318
- if (0 != pi->encoding) {
319
- rb_enc_associate(s, pi->encoding);
318
+ if (0 != pi->options->rb_enc) {
319
+ rb_enc_associate(s, pi->options->rb_enc);
320
320
  }
321
321
  #endif
322
322
  rb_hash_aset(ah, sym, s);
data/ext/ox/obj_load.c CHANGED
@@ -399,7 +399,7 @@ instruct(PInfo pi, const char *target, Attr attrs) {
399
399
  #if HAS_ENCODING_SUPPORT
400
400
  for (; 0 != attrs->name; attrs++) {
401
401
  if (0 == strcmp("encoding", attrs->name)) {
402
- pi->encoding = rb_enc_find(attrs->value);
402
+ pi->options->rb_enc = rb_enc_find(attrs->value);
403
403
  }
404
404
  }
405
405
  #endif
@@ -422,8 +422,8 @@ add_text(PInfo pi, char *text, int closed) {
422
422
  case StringCode:
423
423
  pi->h->obj = rb_str_new2(text);
424
424
  #if HAS_ENCODING_SUPPORT
425
- if (0 != pi->encoding) {
426
- rb_enc_associate(pi->h->obj, pi->encoding);
425
+ if (0 != pi->options->rb_enc) {
426
+ rb_enc_associate(pi->h->obj, pi->options->rb_enc);
427
427
  }
428
428
  #endif
429
429
  if (0 != pi->circ_array) {
@@ -463,7 +463,7 @@ add_text(PInfo pi, char *text, int closed) {
463
463
  VALUE *slot;
464
464
 
465
465
  if (Qundef == (sym = ox_cache_get(ox_symbol_cache, text, &slot))) {
466
- sym = str2sym(text, pi->encoding);
466
+ sym = str2sym(text, pi->options->rb_enc);
467
467
  *slot = sym;
468
468
  }
469
469
  pi->h->obj = sym;
@@ -489,8 +489,8 @@ add_text(PInfo pi, char *text, int closed) {
489
489
  from_base64(text, (uchar*)str);
490
490
  v = rb_str_new(str, str_size);
491
491
  #if HAS_ENCODING_SUPPORT
492
- if (0 != pi->encoding) {
493
- rb_enc_associate(v, pi->encoding);
492
+ if (0 != pi->options->rb_enc) {
493
+ rb_enc_associate(v, pi->options->rb_enc);
494
494
  }
495
495
  #endif
496
496
  if (0 != pi->circ_array) {
@@ -508,7 +508,7 @@ add_text(PInfo pi, char *text, int closed) {
508
508
 
509
509
  from_base64(text, (uchar*)str);
510
510
  if (Qundef == (sym = ox_cache_get(ox_symbol_cache, str, &slot))) {
511
- sym = str2sym(str, pi->encoding);
511
+ sym = str2sym(str, pi->options->rb_enc);
512
512
  *slot = sym;
513
513
  }
514
514
  pi->h->obj = sym;
@@ -572,7 +572,7 @@ add_element(PInfo pi, const char *ename, Attr attrs, int hasChildren) {
572
572
  }
573
573
  h = pi->h;
574
574
  h->type = *ename;
575
- h->var = get_var_sym_from_attrs(attrs, pi->encoding);
575
+ h->var = get_var_sym_from_attrs(attrs, pi->options->rb_enc);
576
576
  switch (h->type) {
577
577
  case NilClassCode:
578
578
  h->obj = Qnil;
data/ext/ox/ox.c CHANGED
@@ -62,6 +62,7 @@ ID ox_end_element_id;
62
62
  ID ox_end_id;
63
63
  ID ox_error_id;
64
64
  ID ox_excl_id;
65
+ ID ox_external_encoding_id;
65
66
  ID ox_fileno_id;
66
67
  ID ox_inspect_id;
67
68
  ID ox_instruct_id;
@@ -145,7 +146,8 @@ struct _Options ox_default_options = {
145
146
  No, /* xsd_date */
146
147
  NoMode, /* mode */
147
148
  StrictEffort, /* effort */
148
- Yes /* sym_keys */
149
+ Yes, /* sym_keys */
150
+ 0 /* rb_enc */
149
151
  };
150
152
 
151
153
  extern ParseCallbacks ox_obj_callbacks;
@@ -241,6 +243,9 @@ set_def_opts(VALUE self, VALUE opts) {
241
243
  } else {
242
244
  Check_Type(v, T_STRING);
243
245
  strncpy(ox_default_options.encoding, StringValuePtr(v), sizeof(ox_default_options.encoding) - 1);
246
+ #ifdef HAVE_RUBY_ENCODING_H
247
+ ox_default_options.rb_enc = rb_enc_find(ox_default_options.encoding);
248
+ #endif
244
249
  }
245
250
 
246
251
  v = rb_hash_aref(opts, indent_sym);
@@ -357,7 +362,7 @@ to_gen(VALUE self, VALUE ruby_xml) {
357
362
  }
358
363
 
359
364
  static VALUE
360
- load(char *xml, int argc, VALUE *argv, VALUE self) {
365
+ load(char *xml, int argc, VALUE *argv, VALUE self, VALUE encoding) {
361
366
  VALUE obj;
362
367
  struct _Options options = ox_default_options;
363
368
 
@@ -397,6 +402,17 @@ load(char *xml, int argc, VALUE *argv, VALUE self) {
397
402
  options.sym_keys = (Qfalse == v) ? No : Yes;
398
403
  }
399
404
  }
405
+ #ifdef HAVE_RUBY_ENCODING_H
406
+ if ('\0' == *options.encoding) {
407
+ if (Qnil != encoding) {
408
+ options.rb_enc = rb_enc_from_index(rb_enc_get_index(encoding));
409
+ } else {
410
+ options.rb_enc = 0;
411
+ }
412
+ } else if (0 == options.rb_enc) {
413
+ options.rb_enc = rb_enc_find(options.encoding);
414
+ }
415
+ #endif
400
416
  switch (options.mode) {
401
417
  case ObjMode:
402
418
  obj = ox_parse(xml, ox_obj_callbacks, 0, &options);
@@ -440,6 +456,7 @@ load_str(int argc, VALUE *argv, VALUE self) {
440
456
  char *xml;
441
457
  size_t len;
442
458
  VALUE obj;
459
+ VALUE encoding;
443
460
 
444
461
  Check_Type(*argv, T_STRING);
445
462
  /* the xml string gets modified so make a copy of it */
@@ -449,8 +466,13 @@ load_str(int argc, VALUE *argv, VALUE self) {
449
466
  } else {
450
467
  xml = ALLOCA_N(char, len);
451
468
  }
469
+ #ifdef HAVE_RUBY_ENCODING_H
470
+ encoding = rb_obj_encoding(*argv);
471
+ #else
472
+ encoding = Qnil;
473
+ #endif
452
474
  strcpy(xml, StringValuePtr(*argv));
453
- obj = load(xml, argc - 1, argv + 1, self);
475
+ obj = load(xml, argc - 1, argv + 1, self, encoding);
454
476
  if (SMALL_XML < len) {
455
477
  xfree(xml);
456
478
  }
@@ -502,7 +524,7 @@ load_file(int argc, VALUE *argv, VALUE self) {
502
524
  }
503
525
  fclose(f);
504
526
  xml[len] = '\0';
505
- obj = load(xml, argc - 1, argv + 1, self);
527
+ obj = load(xml, argc - 1, argv + 1, self, Qnil);
506
528
  if (SMALL_XML < len) {
507
529
  xfree(xml);
508
530
  }
@@ -706,6 +728,7 @@ void Init_ox() {
706
728
  ox_comment_id = rb_intern("comment");
707
729
  ox_den_id = rb_intern("@den");
708
730
  ox_doctype_id = rb_intern("doctype");
731
+ ox_external_encoding_id = rb_intern("external_encoding");
709
732
  ox_end_element_id = rb_intern("end_element");
710
733
  ox_end_id = rb_intern("@end");
711
734
  ox_error_id = rb_intern("error");
data/ext/ox/ox.h CHANGED
@@ -186,6 +186,11 @@ typedef struct _Options {
186
186
  char mode; /* LoadMode */
187
187
  char effort; /* Effort */
188
188
  char sym_keys; /* symbolize keys */
189
+ #ifdef HAVE_RUBY_ENCODING_H
190
+ rb_encoding *rb_enc;
191
+ #else
192
+ void *rb_enc;
193
+ #endif
189
194
  } *Options;
190
195
 
191
196
  /* parse information structure */
@@ -197,11 +202,6 @@ struct _PInfo {
197
202
  VALUE obj;
198
203
  ParseCallbacks pcb;
199
204
  CircArray circ_array;
200
- #ifdef HAVE_RUBY_ENCODING_H
201
- rb_encoding *encoding;
202
- #else
203
- void *encoding;
204
- #endif
205
205
  unsigned long id; /* set for text types when cirs_array is set */
206
206
  Options options;
207
207
  };
@@ -234,6 +234,7 @@ extern ID ox_end_element_id;
234
234
  extern ID ox_end_id;
235
235
  extern ID ox_error_id;
236
236
  extern ID ox_excl_id;
237
+ extern ID ox_external_encoding_id;
237
238
  extern ID ox_fileno_id;
238
239
  extern ID ox_inspect_id;
239
240
  extern ID ox_instruct_id;
data/ext/ox/parse.c CHANGED
@@ -116,7 +116,6 @@ ox_parse(char *xml, ParseCallbacks pcb, char **endp, Options options) {
116
116
  pi.pcb = pcb;
117
117
  pi.obj = Qnil;
118
118
  pi.circ_array = 0;
119
- pi.encoding = 0;
120
119
  pi.options = options;
121
120
  while (1) {
122
121
  next_non_white(&pi); /* skip white space */
@@ -797,10 +796,10 @@ read_coded_chars(PInfo pi, char *text) {
797
796
  pi->s = s;
798
797
  if (u <= 0x000000000000007FULL) {
799
798
  *text++ = (char)u;
800
- } else if (ox_utf8_encoding == pi->encoding) {
799
+ } else if (ox_utf8_encoding == pi->options->rb_enc) {
801
800
  text = ucs_to_utf8_chars(text, u);
802
- } else if (0 == pi->encoding) {
803
- pi->encoding = ox_utf8_encoding;
801
+ } else if (0 == pi->options->rb_enc) {
802
+ pi->options->rb_enc = ox_utf8_encoding;
804
803
  text = ucs_to_utf8_chars(text, u);
805
804
  } else {
806
805
  /*raise_error("Invalid encoding, need UTF-8 or UTF-16 encoding to parse &#nnnn; character sequences.", pi->str, pi->s); */
@@ -857,11 +856,11 @@ collapse_special(PInfo pi, char *str) {
857
856
  }
858
857
  if (u <= 0x000000000000007FULL) {
859
858
  *b++ = (char)u;
860
- } else if (ox_utf8_encoding == pi->encoding) {
859
+ } else if (ox_utf8_encoding == pi->options->rb_enc) {
861
860
  b = ucs_to_utf8_chars(b, u);
862
861
  /* TBD support UTF-16 */
863
- } else if (0 == pi->encoding) {
864
- pi->encoding = ox_utf8_encoding;
862
+ } else if (0 == pi->options->rb_enc) {
863
+ pi->options->rb_enc = ox_utf8_encoding;
865
864
  b = ucs_to_utf8_chars(b, u);
866
865
  } else {
867
866
  /* raise_error("Invalid encoding, need UTF-8 or UTF-16 encoding to parse &#nnnn; character sequences.", pi->str, pi->s);*/
data/ext/ox/sax.c CHANGED
@@ -321,7 +321,13 @@ sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, int convert) {
321
321
  dr->has_error = respond_to(handler, ox_error_id);
322
322
  #if HAS_ENCODING_SUPPORT
323
323
  if ('\0' == *ox_default_options.encoding) {
324
- dr->encoding = 0;
324
+ VALUE encoding;
325
+
326
+ if (rb_respond_to(io, ox_external_encoding_id) && Qnil != (encoding = rb_funcall(io, ox_external_encoding_id, 0))) {
327
+ dr->encoding = rb_enc_from_index(rb_enc_get_index(encoding));
328
+ } else {
329
+ dr->encoding = 0;
330
+ }
325
331
  } else {
326
332
  dr->encoding = rb_enc_find(ox_default_options.encoding);
327
333
  }
data/lib/ox/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
 
2
2
  module Ox
3
3
  # Current version of the module.
4
- VERSION = '1.6.6'
4
+ VERSION = '1.6.7'
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ox
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.6.6
4
+ version: 1.6.7
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-11-02 00:00:00.000000000 Z
12
+ date: 2012-11-15 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description: ! "A fast XML parser and object serializer that uses only standard C
15
15
  lib.\n \nOptimized XML (Ox), as the name implies was written to provide