ox 1.6.8 → 1.6.9

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of ox might be problematic. Click here for more details.

Files changed (5) hide show
  1. data/README.md +2 -6
  2. data/ext/ox/ox.c +75 -9
  3. data/ext/ox/parse.c +0 -20
  4. data/lib/ox/version.rb +1 -1
  5. metadata +2 -2
data/README.md CHANGED
@@ -34,13 +34,9 @@ A fast XML parser and Object marshaller as a Ruby gem.
34
34
 
35
35
  ## <a name="release">Release Notes</a>
36
36
 
37
- ### Release 1.6.8
37
+ ### Release 1.6.9
38
38
 
39
- - Changed extconf.rb to use RUBY_PLATFORM.
40
-
41
- ### Release 1.6.7
42
-
43
- - Now uses the encoding of the imput XML as the default encoding for the parsed output if the default options encoding is not set and the encoding is not set in the XML file prolog.
39
+ - Added support for BOM. They are honored for and handled correctly for UTF-8. Others cause encoding issues with Ruby or raise an error as others are not ASCII compatible..
44
40
 
45
41
  ## <a name="description">Description</a>
46
42
 
@@ -131,8 +131,16 @@ static VALUE xsd_date_sym;
131
131
 
132
132
  #if HAS_ENCODING_SUPPORT
133
133
  rb_encoding *ox_utf8_encoding = 0;
134
+ static rb_encoding *ox_utf16le_encoding = 0;
135
+ static rb_encoding *ox_utf16be_encoding = 0;
136
+ static rb_encoding *ox_utf32le_encoding = 0;
137
+ static rb_encoding *ox_utf32be_encoding = 0;
134
138
  #else
135
139
  void *ox_utf8_encoding = 0;
140
+ static void *ox_utf16le_encoding = 0;
141
+ static void *ox_utf16be_encoding = 0;
142
+ static void *ox_utf32le_encoding = 0;
143
+ static void *ox_utf32be_encoding = 0;
136
144
  #endif
137
145
 
138
146
  struct _Options ox_default_options = {
@@ -157,6 +165,55 @@ extern ParseCallbacks ox_nomode_callbacks;
157
165
 
158
166
  static void parse_dump_options(VALUE ropts, Options copts);
159
167
 
168
+ static char*
169
+ defuse_bom(char *xml, Options options) {
170
+ switch ((uint8_t)*xml) {
171
+ case 0xEF: /* UTF-8 */
172
+ if (0xBB == (uint8_t)xml[1] && 0xBF == (uint8_t)xml[2]) {
173
+ options->rb_enc = ox_utf8_encoding;
174
+ xml += 3;
175
+ } else {
176
+ rb_raise(rb_eArgError, "Invalid BOM in XML string.\n");
177
+ }
178
+ break;
179
+ #if 0
180
+ case 0xFE: /* UTF-16BE */
181
+ if (0xFF == (uint8_t)xml[1]) {
182
+ options->rb_enc = ox_utf16be_encoding;
183
+ xml += 2;
184
+ } else {
185
+ rb_raise(rb_eArgError, "Invalid BOM in XML string.\n");
186
+ }
187
+ break;
188
+ case 0xFF: /* UTF-16LE or UTF-32LE */
189
+ if (0xFE == (uint8_t)xml[1]) {
190
+ if (0x00 == (uint8_t)xml[2] && 0x00 == (uint8_t)xml[3]) {
191
+ options->rb_enc = ox_utf32le_encoding;
192
+ xml += 4;
193
+ } else {
194
+ options->rb_enc = ox_utf16le_encoding;
195
+ xml += 2;
196
+ }
197
+ } else {
198
+ rb_raise(rb_eArgError, "Invalid BOM in XML string.\n");
199
+ }
200
+ break;
201
+ case 0x00: /* UTF-32BE */
202
+ if (0x00 == (uint8_t)xml[1] && 0xFE == (uint8_t)xml[2] && 0xFF == (uint8_t)xml[3]) {
203
+ options->rb_enc = ox_utf32be_encoding;
204
+ xml += 4;
205
+ } else {
206
+ rb_raise(rb_eArgError, "Invalid BOM in XML string.\n");
207
+ }
208
+ break;
209
+ #endif
210
+ default:
211
+ /* Let it fail if there is a BOM that is not UTF-8. Other BOM options are not ASCII compatible. */
212
+ break;
213
+ }
214
+ return xml;
215
+ }
216
+
160
217
  /* call-seq: ox_default_options() => Hash
161
218
  *
162
219
  * Returns the default load and dump options as a Hash. The options are
@@ -312,9 +369,10 @@ set_def_opts(VALUE self, VALUE opts) {
312
369
  */
313
370
  static VALUE
314
371
  to_obj(VALUE self, VALUE ruby_xml) {
315
- char *xml;
316
- size_t len;
317
- VALUE obj;
372
+ char *xml, *x;
373
+ size_t len;
374
+ VALUE obj;
375
+ struct _Options options = ox_default_options;
318
376
 
319
377
  Check_Type(ruby_xml, T_STRING);
320
378
  /* the xml string gets modified so make a copy of it */
@@ -324,8 +382,9 @@ to_obj(VALUE self, VALUE ruby_xml) {
324
382
  } else {
325
383
  xml = ALLOCA_N(char, len);
326
384
  }
327
- strcpy(xml, StringValuePtr(ruby_xml));
328
- obj = ox_parse(xml, ox_obj_callbacks, 0, &ox_default_options);
385
+ memcpy(xml, StringValuePtr(ruby_xml), len);
386
+ x = defuse_bom(xml, &options);
387
+ obj = ox_parse(x, ox_obj_callbacks, 0, &options);
329
388
  if (SMALL_XML < len) {
330
389
  xfree(xml);
331
390
  }
@@ -341,9 +400,10 @@ to_obj(VALUE self, VALUE ruby_xml) {
341
400
  */
342
401
  static VALUE
343
402
  to_gen(VALUE self, VALUE ruby_xml) {
344
- char *xml;
403
+ char *xml, *x;
345
404
  size_t len;
346
405
  VALUE obj;
406
+ struct _Options options = ox_default_options;
347
407
 
348
408
  Check_Type(ruby_xml, T_STRING);
349
409
  /* the xml string gets modified so make a copy of it */
@@ -353,8 +413,9 @@ to_gen(VALUE self, VALUE ruby_xml) {
353
413
  } else {
354
414
  xml = ALLOCA_N(char, len);
355
415
  }
356
- strcpy(xml, StringValuePtr(ruby_xml));
357
- obj = ox_parse(xml, ox_gen_callbacks, 0, &ox_default_options);
416
+ memcpy(xml, StringValuePtr(ruby_xml), len);
417
+ x = defuse_bom(xml, &options);
418
+ obj = ox_parse(x, ox_gen_callbacks, 0, &options);
358
419
  if (SMALL_XML < len) {
359
420
  xfree(xml);
360
421
  }
@@ -413,6 +474,7 @@ load(char *xml, int argc, VALUE *argv, VALUE self, VALUE encoding) {
413
474
  options.rb_enc = rb_enc_find(options.encoding);
414
475
  }
415
476
  #endif
477
+ xml = defuse_bom(xml, &options);
416
478
  switch (options.mode) {
417
479
  case ObjMode:
418
480
  obj = ox_parse(xml, ox_obj_callbacks, 0, &options);
@@ -471,7 +533,7 @@ load_str(int argc, VALUE *argv, VALUE self) {
471
533
  #else
472
534
  encoding = Qnil;
473
535
  #endif
474
- strcpy(xml, StringValuePtr(*argv));
536
+ memcpy(xml, StringValuePtr(*argv), len);
475
537
  obj = load(xml, argc - 1, argv + 1, self, encoding);
476
538
  if (SMALL_XML < len) {
477
539
  xfree(xml);
@@ -804,6 +866,10 @@ void Init_ox() {
804
866
  rb_define_module_function(Ox, "cache8_test", cache8_test, 0);
805
867
  #if HAS_ENCODING_SUPPORT
806
868
  ox_utf8_encoding = rb_enc_find("UTF-8");
869
+ ox_utf16le_encoding = rb_enc_find("UTF-16LE");
870
+ ox_utf16be_encoding = rb_enc_find("UTF-16BE");
871
+ ox_utf32le_encoding = rb_enc_find("UTF-32LE");
872
+ ox_utf32be_encoding = rb_enc_find("UTF-32BE");
807
873
  #endif
808
874
  }
809
875
 
@@ -745,26 +745,6 @@ ucs_to_utf8_chars(char *text, uint64_t u) {
745
745
  return text;
746
746
  }
747
747
 
748
- #if 0
749
- static char*
750
- uint64_to_chars(char *text, uint64_t u) {
751
- int reading = 0;
752
- int i;
753
- unsigned char c;
754
-
755
- for (i = 56; 0 <= i; i -= 8) {
756
- c = (unsigned char)((u >> i) & 0x00000000000000FFULL);
757
- if (reading) {
758
- *text++ = (char)c;
759
- } else if ('\0' != c) {
760
- *text++ = (char)c;
761
- reading = 1;
762
- }
763
- }
764
- return text;
765
- }
766
- #endif
767
-
768
748
  static char*
769
749
  read_coded_chars(PInfo pi, char *text) {
770
750
  char *b, buf[32];
@@ -1,5 +1,5 @@
1
1
 
2
2
  module Ox
3
3
  # Current version of the module.
4
- VERSION = '1.6.8'
4
+ VERSION = '1.6.9'
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ox
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.6.8
4
+ version: 1.6.9
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-11-18 00:00:00.000000000 Z
12
+ date: 2012-11-25 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description: ! "A fast XML parser and object serializer that uses only standard C
15
15
  lib.\n \nOptimized XML (Ox), as the name implies was written to provide