ox 1.6.8 → 1.6.9
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of ox might be problematic. Click here for more details.
- data/README.md +2 -6
- data/ext/ox/ox.c +75 -9
- data/ext/ox/parse.c +0 -20
- data/lib/ox/version.rb +1 -1
- metadata +2 -2
data/README.md
CHANGED
@@ -34,13 +34,9 @@ A fast XML parser and Object marshaller as a Ruby gem.
|
|
34
34
|
|
35
35
|
## <a name="release">Release Notes</a>
|
36
36
|
|
37
|
-
### Release 1.6.
|
37
|
+
### Release 1.6.9
|
38
38
|
|
39
|
-
-
|
40
|
-
|
41
|
-
### Release 1.6.7
|
42
|
-
|
43
|
-
- Now uses the encoding of the imput XML as the default encoding for the parsed output if the default options encoding is not set and the encoding is not set in the XML file prolog.
|
39
|
+
- Added support for BOM. They are honored for and handled correctly for UTF-8. Others cause encoding issues with Ruby or raise an error as others are not ASCII compatible..
|
44
40
|
|
45
41
|
## <a name="description">Description</a>
|
46
42
|
|
data/ext/ox/ox.c
CHANGED
@@ -131,8 +131,16 @@ static VALUE xsd_date_sym;
|
|
131
131
|
|
132
132
|
#if HAS_ENCODING_SUPPORT
|
133
133
|
rb_encoding *ox_utf8_encoding = 0;
|
134
|
+
static rb_encoding *ox_utf16le_encoding = 0;
|
135
|
+
static rb_encoding *ox_utf16be_encoding = 0;
|
136
|
+
static rb_encoding *ox_utf32le_encoding = 0;
|
137
|
+
static rb_encoding *ox_utf32be_encoding = 0;
|
134
138
|
#else
|
135
139
|
void *ox_utf8_encoding = 0;
|
140
|
+
static void *ox_utf16le_encoding = 0;
|
141
|
+
static void *ox_utf16be_encoding = 0;
|
142
|
+
static void *ox_utf32le_encoding = 0;
|
143
|
+
static void *ox_utf32be_encoding = 0;
|
136
144
|
#endif
|
137
145
|
|
138
146
|
struct _Options ox_default_options = {
|
@@ -157,6 +165,55 @@ extern ParseCallbacks ox_nomode_callbacks;
|
|
157
165
|
|
158
166
|
static void parse_dump_options(VALUE ropts, Options copts);
|
159
167
|
|
168
|
+
static char*
|
169
|
+
defuse_bom(char *xml, Options options) {
|
170
|
+
switch ((uint8_t)*xml) {
|
171
|
+
case 0xEF: /* UTF-8 */
|
172
|
+
if (0xBB == (uint8_t)xml[1] && 0xBF == (uint8_t)xml[2]) {
|
173
|
+
options->rb_enc = ox_utf8_encoding;
|
174
|
+
xml += 3;
|
175
|
+
} else {
|
176
|
+
rb_raise(rb_eArgError, "Invalid BOM in XML string.\n");
|
177
|
+
}
|
178
|
+
break;
|
179
|
+
#if 0
|
180
|
+
case 0xFE: /* UTF-16BE */
|
181
|
+
if (0xFF == (uint8_t)xml[1]) {
|
182
|
+
options->rb_enc = ox_utf16be_encoding;
|
183
|
+
xml += 2;
|
184
|
+
} else {
|
185
|
+
rb_raise(rb_eArgError, "Invalid BOM in XML string.\n");
|
186
|
+
}
|
187
|
+
break;
|
188
|
+
case 0xFF: /* UTF-16LE or UTF-32LE */
|
189
|
+
if (0xFE == (uint8_t)xml[1]) {
|
190
|
+
if (0x00 == (uint8_t)xml[2] && 0x00 == (uint8_t)xml[3]) {
|
191
|
+
options->rb_enc = ox_utf32le_encoding;
|
192
|
+
xml += 4;
|
193
|
+
} else {
|
194
|
+
options->rb_enc = ox_utf16le_encoding;
|
195
|
+
xml += 2;
|
196
|
+
}
|
197
|
+
} else {
|
198
|
+
rb_raise(rb_eArgError, "Invalid BOM in XML string.\n");
|
199
|
+
}
|
200
|
+
break;
|
201
|
+
case 0x00: /* UTF-32BE */
|
202
|
+
if (0x00 == (uint8_t)xml[1] && 0xFE == (uint8_t)xml[2] && 0xFF == (uint8_t)xml[3]) {
|
203
|
+
options->rb_enc = ox_utf32be_encoding;
|
204
|
+
xml += 4;
|
205
|
+
} else {
|
206
|
+
rb_raise(rb_eArgError, "Invalid BOM in XML string.\n");
|
207
|
+
}
|
208
|
+
break;
|
209
|
+
#endif
|
210
|
+
default:
|
211
|
+
/* Let it fail if there is a BOM that is not UTF-8. Other BOM options are not ASCII compatible. */
|
212
|
+
break;
|
213
|
+
}
|
214
|
+
return xml;
|
215
|
+
}
|
216
|
+
|
160
217
|
/* call-seq: ox_default_options() => Hash
|
161
218
|
*
|
162
219
|
* Returns the default load and dump options as a Hash. The options are
|
@@ -312,9 +369,10 @@ set_def_opts(VALUE self, VALUE opts) {
|
|
312
369
|
*/
|
313
370
|
static VALUE
|
314
371
|
to_obj(VALUE self, VALUE ruby_xml) {
|
315
|
-
char
|
316
|
-
size_t
|
317
|
-
VALUE
|
372
|
+
char *xml, *x;
|
373
|
+
size_t len;
|
374
|
+
VALUE obj;
|
375
|
+
struct _Options options = ox_default_options;
|
318
376
|
|
319
377
|
Check_Type(ruby_xml, T_STRING);
|
320
378
|
/* the xml string gets modified so make a copy of it */
|
@@ -324,8 +382,9 @@ to_obj(VALUE self, VALUE ruby_xml) {
|
|
324
382
|
} else {
|
325
383
|
xml = ALLOCA_N(char, len);
|
326
384
|
}
|
327
|
-
|
328
|
-
|
385
|
+
memcpy(xml, StringValuePtr(ruby_xml), len);
|
386
|
+
x = defuse_bom(xml, &options);
|
387
|
+
obj = ox_parse(x, ox_obj_callbacks, 0, &options);
|
329
388
|
if (SMALL_XML < len) {
|
330
389
|
xfree(xml);
|
331
390
|
}
|
@@ -341,9 +400,10 @@ to_obj(VALUE self, VALUE ruby_xml) {
|
|
341
400
|
*/
|
342
401
|
static VALUE
|
343
402
|
to_gen(VALUE self, VALUE ruby_xml) {
|
344
|
-
char *xml;
|
403
|
+
char *xml, *x;
|
345
404
|
size_t len;
|
346
405
|
VALUE obj;
|
406
|
+
struct _Options options = ox_default_options;
|
347
407
|
|
348
408
|
Check_Type(ruby_xml, T_STRING);
|
349
409
|
/* the xml string gets modified so make a copy of it */
|
@@ -353,8 +413,9 @@ to_gen(VALUE self, VALUE ruby_xml) {
|
|
353
413
|
} else {
|
354
414
|
xml = ALLOCA_N(char, len);
|
355
415
|
}
|
356
|
-
|
357
|
-
|
416
|
+
memcpy(xml, StringValuePtr(ruby_xml), len);
|
417
|
+
x = defuse_bom(xml, &options);
|
418
|
+
obj = ox_parse(x, ox_gen_callbacks, 0, &options);
|
358
419
|
if (SMALL_XML < len) {
|
359
420
|
xfree(xml);
|
360
421
|
}
|
@@ -413,6 +474,7 @@ load(char *xml, int argc, VALUE *argv, VALUE self, VALUE encoding) {
|
|
413
474
|
options.rb_enc = rb_enc_find(options.encoding);
|
414
475
|
}
|
415
476
|
#endif
|
477
|
+
xml = defuse_bom(xml, &options);
|
416
478
|
switch (options.mode) {
|
417
479
|
case ObjMode:
|
418
480
|
obj = ox_parse(xml, ox_obj_callbacks, 0, &options);
|
@@ -471,7 +533,7 @@ load_str(int argc, VALUE *argv, VALUE self) {
|
|
471
533
|
#else
|
472
534
|
encoding = Qnil;
|
473
535
|
#endif
|
474
|
-
|
536
|
+
memcpy(xml, StringValuePtr(*argv), len);
|
475
537
|
obj = load(xml, argc - 1, argv + 1, self, encoding);
|
476
538
|
if (SMALL_XML < len) {
|
477
539
|
xfree(xml);
|
@@ -804,6 +866,10 @@ void Init_ox() {
|
|
804
866
|
rb_define_module_function(Ox, "cache8_test", cache8_test, 0);
|
805
867
|
#if HAS_ENCODING_SUPPORT
|
806
868
|
ox_utf8_encoding = rb_enc_find("UTF-8");
|
869
|
+
ox_utf16le_encoding = rb_enc_find("UTF-16LE");
|
870
|
+
ox_utf16be_encoding = rb_enc_find("UTF-16BE");
|
871
|
+
ox_utf32le_encoding = rb_enc_find("UTF-32LE");
|
872
|
+
ox_utf32be_encoding = rb_enc_find("UTF-32BE");
|
807
873
|
#endif
|
808
874
|
}
|
809
875
|
|
data/ext/ox/parse.c
CHANGED
@@ -745,26 +745,6 @@ ucs_to_utf8_chars(char *text, uint64_t u) {
|
|
745
745
|
return text;
|
746
746
|
}
|
747
747
|
|
748
|
-
#if 0
|
749
|
-
static char*
|
750
|
-
uint64_to_chars(char *text, uint64_t u) {
|
751
|
-
int reading = 0;
|
752
|
-
int i;
|
753
|
-
unsigned char c;
|
754
|
-
|
755
|
-
for (i = 56; 0 <= i; i -= 8) {
|
756
|
-
c = (unsigned char)((u >> i) & 0x00000000000000FFULL);
|
757
|
-
if (reading) {
|
758
|
-
*text++ = (char)c;
|
759
|
-
} else if ('\0' != c) {
|
760
|
-
*text++ = (char)c;
|
761
|
-
reading = 1;
|
762
|
-
}
|
763
|
-
}
|
764
|
-
return text;
|
765
|
-
}
|
766
|
-
#endif
|
767
|
-
|
768
748
|
static char*
|
769
749
|
read_coded_chars(PInfo pi, char *text) {
|
770
750
|
char *b, buf[32];
|
data/lib/ox/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ox
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.6.
|
4
|
+
version: 1.6.9
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-11-
|
12
|
+
date: 2012-11-25 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
14
|
description: ! "A fast XML parser and object serializer that uses only standard C
|
15
15
|
lib.\n \nOptimized XML (Ox), as the name implies was written to provide
|