ox 1.4.1 → 1.4.2
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of ox might be problematic. Click here for more details.
- data/README.md +3 -2
- data/ext/ox/dump.c +3 -3
- data/ext/ox/gen_load.c +27 -2
- data/ext/ox/parse.c +195 -194
- data/lib/ox/bag.rb +1 -4
- data/lib/ox/version.rb +1 -1
- data/test/bench.rb +4 -4
- data/test/big.rb +24 -0
- data/test/bug3.rb +13 -11
- data/test/bug4.rb +11 -0
- data/test/func.rb +6 -0
- metadata +23 -30
- data/test/sax_example.rb +0 -37
data/README.md
CHANGED
@@ -26,9 +26,10 @@ A fast XML parser and Object marshaller as a Ruby gem.
|
|
26
26
|
|
27
27
|
## <a name="release">Release Notes</a>
|
28
28
|
|
29
|
-
### Release 1.4.
|
29
|
+
### Release 1.4.2
|
30
30
|
|
31
|
-
-
|
31
|
+
- Made Ox more tolerant when parsing invalid XML files without a prolog.
|
32
|
+
- DOCTYPE value was one character too long.
|
32
33
|
|
33
34
|
## <a name="description">Description</a>
|
34
35
|
|
data/ext/ox/dump.c
CHANGED
@@ -697,7 +697,7 @@ dump_obj(ID aid, VALUE obj, unsigned int depth, Out out) {
|
|
697
697
|
out->w_end(out, &e);
|
698
698
|
}
|
699
699
|
#else
|
700
|
-
#
|
700
|
+
#if (defined JRUBY || defined RBX_Qnil)
|
701
701
|
VALUE vars = rb_funcall2(obj, rb_intern("instance_variables"), 0, 0);
|
702
702
|
#else
|
703
703
|
VALUE vars = rb_obj_instance_variables(obj);
|
@@ -785,7 +785,7 @@ dump_obj(ID aid, VALUE obj, unsigned int depth, Out out) {
|
|
785
785
|
out->w_end(out, &e);
|
786
786
|
break;
|
787
787
|
}
|
788
|
-
#
|
788
|
+
#if (defined T_COMPLEX && defined RCOMPLEX)
|
789
789
|
case T_COMPLEX:
|
790
790
|
e.type = ComplexCode;
|
791
791
|
out->w_start(out, &e);
|
@@ -794,7 +794,7 @@ dump_obj(ID aid, VALUE obj, unsigned int depth, Out out) {
|
|
794
794
|
out->w_end(out, &e);
|
795
795
|
break;
|
796
796
|
#endif
|
797
|
-
#
|
797
|
+
#if (defined T_RATIONAL && defined RRATIONAL)
|
798
798
|
case T_RATIONAL:
|
799
799
|
e.type = RationalCode;
|
800
800
|
out->w_start(out, &e);
|
data/ext/ox/gen_load.c
CHANGED
@@ -38,6 +38,7 @@
|
|
38
38
|
#include "ox.h"
|
39
39
|
|
40
40
|
static void instruct(PInfo pi, const char *target, Attr attrs);
|
41
|
+
static void create_doc(PInfo pi);
|
41
42
|
static void create_prolog_doc(PInfo pi, const char *target, Attr attrs);
|
42
43
|
static void nomode_instruct(PInfo pi, const char *target, Attr attrs);
|
43
44
|
static void add_doctype(PInfo pi, const char *docType);
|
@@ -85,6 +86,20 @@ struct _ParseCallbacks _ox_nomode_callbacks = {
|
|
85
86
|
|
86
87
|
ParseCallbacks ox_nomode_callbacks = &_ox_nomode_callbacks;
|
87
88
|
|
89
|
+
static void
|
90
|
+
create_doc(PInfo pi) {
|
91
|
+
VALUE doc;
|
92
|
+
VALUE nodes;
|
93
|
+
|
94
|
+
pi->h = pi->helpers;
|
95
|
+
doc = rb_obj_alloc(ox_document_clas);
|
96
|
+
nodes = rb_ary_new();
|
97
|
+
rb_ivar_set(doc, attributes_id, rb_hash_new());
|
98
|
+
rb_ivar_set(doc, nodes_id, nodes);
|
99
|
+
pi->h->obj = nodes;
|
100
|
+
pi->obj = doc;
|
101
|
+
}
|
102
|
+
|
88
103
|
static void
|
89
104
|
create_prolog_doc(PInfo pi, const char *target, Attr attrs) {
|
90
105
|
VALUE doc;
|
@@ -112,8 +127,6 @@ create_prolog_doc(PInfo pi, const char *target, Attr attrs) {
|
|
112
127
|
pi->obj = doc;
|
113
128
|
}
|
114
129
|
|
115
|
-
|
116
|
-
|
117
130
|
static void
|
118
131
|
instruct(PInfo pi, const char *target, Attr attrs) {
|
119
132
|
if (0 == strcmp("xml", target)) {
|
@@ -178,6 +191,9 @@ add_doctype(PInfo pi, const char *docType) {
|
|
178
191
|
}
|
179
192
|
#endif
|
180
193
|
rb_ivar_set(n, value_id, s);
|
194
|
+
if (0 == pi->h) { // top level object
|
195
|
+
create_doc(pi);
|
196
|
+
}
|
181
197
|
rb_ary_push(pi->h->obj, n);
|
182
198
|
}
|
183
199
|
|
@@ -192,6 +208,9 @@ add_comment(PInfo pi, const char *comment) {
|
|
192
208
|
}
|
193
209
|
#endif
|
194
210
|
rb_ivar_set(n, value_id, s);
|
211
|
+
if (0 == pi->h) { // top level object
|
212
|
+
create_doc(pi);
|
213
|
+
}
|
195
214
|
rb_ary_push(pi->h->obj, n);
|
196
215
|
}
|
197
216
|
|
@@ -206,6 +225,9 @@ add_cdata(PInfo pi, const char *cdata, size_t len) {
|
|
206
225
|
}
|
207
226
|
#endif
|
208
227
|
rb_ivar_set(n, value_id, s);
|
228
|
+
if (0 == pi->h) { // top level object
|
229
|
+
create_doc(pi);
|
230
|
+
}
|
209
231
|
rb_ary_push(pi->h->obj, n);
|
210
232
|
}
|
211
233
|
|
@@ -218,6 +240,9 @@ add_text(PInfo pi, char *text, int closed) {
|
|
218
240
|
rb_enc_associate(s, pi->encoding);
|
219
241
|
}
|
220
242
|
#endif
|
243
|
+
if (0 == pi->h) { // top level object
|
244
|
+
create_doc(pi);
|
245
|
+
}
|
221
246
|
rb_ary_push(pi->h->obj, s);
|
222
247
|
}
|
223
248
|
|
data/ext/ox/parse.c
CHANGED
@@ -36,18 +36,18 @@
|
|
36
36
|
#include "ruby.h"
|
37
37
|
#include "ox.h"
|
38
38
|
|
39
|
-
static void
|
40
|
-
static void
|
41
|
-
static void
|
42
|
-
static void
|
43
|
-
static void
|
44
|
-
//static void
|
45
|
-
static void
|
46
|
-
static char*
|
47
|
-
static char*
|
48
|
-
static int
|
49
|
-
static void
|
50
|
-
static int
|
39
|
+
static void read_instruction(PInfo pi);
|
40
|
+
static void read_doctype(PInfo pi);
|
41
|
+
static void read_comment(PInfo pi);
|
42
|
+
static void read_element(PInfo pi);
|
43
|
+
static void read_text(PInfo pi);
|
44
|
+
//static void read_reduced_text(PInfo pi);
|
45
|
+
static void read_cdata(PInfo pi);
|
46
|
+
static char* read_name_token(PInfo pi);
|
47
|
+
static char* read_quoted_value(PInfo pi);
|
48
|
+
static int read_coded_char(PInfo pi);
|
49
|
+
static void next_non_white(PInfo pi);
|
50
|
+
static int collapse_special(char *str);
|
51
51
|
|
52
52
|
/* This XML parser is a single pass, destructive, callback parser. It is a
|
53
53
|
* single pass parse since it only make one pass over the characters in the
|
@@ -80,14 +80,14 @@ next_non_white(PInfo pi) {
|
|
80
80
|
|
81
81
|
VALUE
|
82
82
|
parse(char *xml, ParseCallbacks pcb, char **endp, int trace, Effort effort) {
|
83
|
-
struct _PInfo
|
84
|
-
int
|
83
|
+
struct _PInfo pi;
|
84
|
+
int body_read = 0;
|
85
85
|
|
86
86
|
if (0 == xml) {
|
87
87
|
raise_error("Invalid arg, xml string can not be null", xml, 0);
|
88
88
|
}
|
89
89
|
if (DEBUG <= trace) {
|
90
|
-
|
90
|
+
printf("Parsing xml:\n%s\n", xml);
|
91
91
|
}
|
92
92
|
/* initialize parse info */
|
93
93
|
pi.str = xml;
|
@@ -106,10 +106,10 @@ parse(char *xml, ParseCallbacks pcb, char **endp, int trace, Effort effort) {
|
|
106
106
|
if ('\0' == *pi.s) {
|
107
107
|
break;
|
108
108
|
}
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
|
109
|
+
if (body_read && 0 != endp) {
|
110
|
+
*endp = pi.s;
|
111
|
+
break;
|
112
|
+
}
|
113
113
|
if ('<' != *pi.s) { // all top level entities start with <
|
114
114
|
raise_error("invalid format, expected <", pi.str, pi.s);
|
115
115
|
}
|
@@ -142,7 +142,7 @@ parse(char *xml, ParseCallbacks pcb, char **endp, int trace, Effort effort) {
|
|
142
142
|
raise_error("invalid format, document not terminated", pi.str, pi.s);
|
143
143
|
default:
|
144
144
|
read_element(&pi);
|
145
|
-
|
145
|
+
body_read = 1;
|
146
146
|
break;
|
147
147
|
}
|
148
148
|
}
|
@@ -153,12 +153,12 @@ parse(char *xml, ParseCallbacks pcb, char **endp, int trace, Effort effort) {
|
|
153
153
|
*/
|
154
154
|
static void
|
155
155
|
read_instruction(PInfo pi) {
|
156
|
-
struct _Attr
|
157
|
-
Attr
|
158
|
-
char
|
159
|
-
char
|
160
|
-
char
|
161
|
-
|
156
|
+
struct _Attr attrs[MAX_ATTRS + 1];
|
157
|
+
Attr a = attrs;
|
158
|
+
char *target;
|
159
|
+
char *end;
|
160
|
+
char c;
|
161
|
+
|
162
162
|
memset(attrs, 0, sizeof(attrs));
|
163
163
|
target = read_name_token(pi);
|
164
164
|
end = pi->s;
|
@@ -166,37 +166,37 @@ read_instruction(PInfo pi) {
|
|
166
166
|
c = *pi->s;
|
167
167
|
*end = '\0'; // terminate name
|
168
168
|
if ('?' != c) {
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
-
|
169
|
+
while ('?' != *pi->s) {
|
170
|
+
if ('\0' == *pi->s) {
|
171
|
+
raise_error("invalid format, processing instruction not terminated", pi->str, pi->s);
|
172
|
+
}
|
173
|
+
next_non_white(pi);
|
174
|
+
a->name = read_name_token(pi);
|
175
|
+
end = pi->s;
|
176
|
+
next_non_white(pi);
|
177
|
+
if ('=' != *pi->s++) {
|
178
|
+
raise_error("invalid format, no attribute value", pi->str, pi->s);
|
179
|
+
}
|
180
|
+
*end = '\0'; // terminate name
|
181
|
+
// read value
|
182
|
+
next_non_white(pi);
|
183
|
+
a->value = read_quoted_value(pi);
|
184
|
+
a++;
|
185
|
+
if (MAX_ATTRS <= (a - attrs)) {
|
186
|
+
raise_error("too many attributes", pi->str, pi->s);
|
187
|
+
}
|
188
|
+
}
|
189
|
+
if ('?' == *pi->s) {
|
190
|
+
pi->s++;
|
191
|
+
}
|
192
192
|
} else {
|
193
|
-
|
193
|
+
pi->s++;
|
194
194
|
}
|
195
195
|
if ('>' != *pi->s++) {
|
196
|
-
|
196
|
+
raise_error("invalid format, processing instruction not terminated", pi->str, pi->s);
|
197
197
|
}
|
198
198
|
if (0 != pi->pcb->instruct) {
|
199
|
-
|
199
|
+
pi->pcb->instruct(pi, target, attrs);
|
200
200
|
}
|
201
201
|
}
|
202
202
|
|
@@ -205,7 +205,7 @@ read_instruction(PInfo pi) {
|
|
205
205
|
*/
|
206
206
|
static void
|
207
207
|
read_doctype(PInfo pi) {
|
208
|
-
char
|
208
|
+
char *docType;
|
209
209
|
int depth = 1;
|
210
210
|
char c;
|
211
211
|
|
@@ -220,6 +220,7 @@ read_doctype(PInfo pi) {
|
|
220
220
|
} else if ('>' == c) {
|
221
221
|
depth--;
|
222
222
|
if (0 == depth) { /* done, at the end */
|
223
|
+
pi->s--;
|
223
224
|
break;
|
224
225
|
}
|
225
226
|
}
|
@@ -227,7 +228,7 @@ read_doctype(PInfo pi) {
|
|
227
228
|
*pi->s = '\0';
|
228
229
|
pi->s++;
|
229
230
|
if (0 != pi->pcb->add_doctype) {
|
230
|
-
|
231
|
+
pi->pcb->add_doctype(pi, docType);
|
231
232
|
}
|
232
233
|
}
|
233
234
|
|
@@ -237,8 +238,8 @@ static void
|
|
237
238
|
read_comment(PInfo pi) {
|
238
239
|
char *end;
|
239
240
|
char *s;
|
240
|
-
char
|
241
|
-
int
|
241
|
+
char *comment;
|
242
|
+
int done = 0;
|
242
243
|
|
243
244
|
next_non_white(pi);
|
244
245
|
comment = pi->s;
|
@@ -255,7 +256,7 @@ read_comment(PInfo pi) {
|
|
255
256
|
case '\r':
|
256
257
|
break;
|
257
258
|
default:
|
258
|
-
|
259
|
+
*(s + 1) = '\0';
|
259
260
|
done = 1;
|
260
261
|
break;
|
261
262
|
}
|
@@ -263,7 +264,7 @@ read_comment(PInfo pi) {
|
|
263
264
|
*end = '\0'; // in case the comment was blank
|
264
265
|
pi->s = end + 3;
|
265
266
|
if (0 != pi->pcb->add_comment) {
|
266
|
-
|
267
|
+
pi->pcb->add_comment(pi, comment);
|
267
268
|
}
|
268
269
|
}
|
269
270
|
|
@@ -272,15 +273,15 @@ read_comment(PInfo pi) {
|
|
272
273
|
*/
|
273
274
|
static void
|
274
275
|
read_element(PInfo pi) {
|
275
|
-
struct _Attr
|
276
|
-
Attr
|
277
|
-
char
|
278
|
-
char
|
279
|
-
char
|
280
|
-
char
|
281
|
-
long
|
282
|
-
int
|
283
|
-
int
|
276
|
+
struct _Attr attrs[MAX_ATTRS];
|
277
|
+
Attr ap = attrs;
|
278
|
+
char *name;
|
279
|
+
char *ename;
|
280
|
+
char *end;
|
281
|
+
char c;
|
282
|
+
long elen;
|
283
|
+
int hasChildren = 0;
|
284
|
+
int done = 0;
|
284
285
|
|
285
286
|
ename = read_name_token(pi);
|
286
287
|
end = pi->s;
|
@@ -292,22 +293,22 @@ read_element(PInfo pi) {
|
|
292
293
|
/* empty element, no attributes and no children */
|
293
294
|
pi->s++;
|
294
295
|
if ('>' != *pi->s) {
|
295
|
-
|
296
|
+
//printf("*** '%s' ***\n", pi->s);
|
296
297
|
raise_error("invalid format, element not closed", pi->str, pi->s);
|
297
298
|
}
|
298
299
|
pi->s++; /* past > */
|
299
|
-
|
300
|
-
|
301
|
-
|
300
|
+
ap->name = 0;
|
301
|
+
pi->pcb->add_element(pi, ename, attrs, hasChildren);
|
302
|
+
pi->pcb->end_element(pi, ename);
|
302
303
|
|
303
304
|
return;
|
304
305
|
}
|
305
306
|
/* read attribute names until the close (/ or >) is reached */
|
306
307
|
while (!done) {
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
308
|
+
if ('\0' == c) {
|
309
|
+
next_non_white(pi);
|
310
|
+
c = *pi->s;
|
311
|
+
}
|
311
312
|
switch (c) {
|
312
313
|
case '\0':
|
313
314
|
raise_error("invalid format, document not terminated", pi->str, pi->s);
|
@@ -318,9 +319,9 @@ read_element(PInfo pi) {
|
|
318
319
|
raise_error("invalid format, element not closed", pi->str, pi->s);
|
319
320
|
}
|
320
321
|
pi->s++;
|
321
|
-
|
322
|
-
|
323
|
-
|
322
|
+
ap->name = 0;
|
323
|
+
pi->pcb->add_element(pi, ename, attrs, hasChildren);
|
324
|
+
pi->pcb->end_element(pi, ename);
|
324
325
|
|
325
326
|
return;
|
326
327
|
case '>':
|
@@ -328,34 +329,34 @@ read_element(PInfo pi) {
|
|
328
329
|
pi->s++;
|
329
330
|
hasChildren = 1;
|
330
331
|
done = 1;
|
331
|
-
|
332
|
-
|
332
|
+
ap->name = 0;
|
333
|
+
pi->pcb->add_element(pi, ename, attrs, hasChildren);
|
333
334
|
break;
|
334
335
|
default:
|
335
336
|
// Attribute name so it's an element and the attribute will be
|
336
337
|
// added to it.
|
337
338
|
ap->name = read_name_token(pi);
|
338
|
-
|
339
|
+
end = pi->s;
|
339
340
|
next_non_white(pi);
|
340
341
|
if ('=' != *pi->s++) {
|
341
342
|
raise_error("invalid format, no attribute value", pi->str, pi->s);
|
342
343
|
}
|
343
|
-
|
344
|
+
*end = '\0'; // terminate name
|
344
345
|
// read value
|
345
346
|
next_non_white(pi);
|
346
347
|
ap->value = read_quoted_value(pi);
|
347
|
-
|
348
|
-
|
349
|
-
|
350
|
-
|
351
|
-
|
352
|
-
|
353
|
-
|
348
|
+
if (0 != strchr(ap->value, '&')) {
|
349
|
+
if (0 != collapse_special((char*)ap->value)) {
|
350
|
+
raise_error("invalid format, special character does not end with a semicolon", pi->str, pi->s);
|
351
|
+
}
|
352
|
+
}
|
353
|
+
ap++;
|
354
|
+
if (MAX_ATTRS <= (ap - attrs)) {
|
354
355
|
raise_error("too many attributes", pi->str, pi->s);
|
355
|
-
|
356
|
+
}
|
356
357
|
break;
|
357
358
|
}
|
358
|
-
|
359
|
+
c = '\0';
|
359
360
|
}
|
360
361
|
if (hasChildren) {
|
361
362
|
char *start;
|
@@ -386,18 +387,18 @@ read_element(PInfo pi) {
|
|
386
387
|
case '/':
|
387
388
|
pi->s++;
|
388
389
|
name = read_name_token(pi);
|
389
|
-
|
390
|
+
end = pi->s;
|
390
391
|
next_non_white(pi);
|
391
|
-
|
392
|
-
|
392
|
+
c = *pi->s;
|
393
|
+
*end = '\0';
|
393
394
|
if (0 != strcmp(name, ename)) {
|
394
395
|
raise_error("invalid format, elements overlap", pi->str, pi->s);
|
395
396
|
}
|
396
397
|
if ('>' != c) {
|
397
398
|
raise_error("invalid format, element not closed", pi->str, pi->s);
|
398
399
|
}
|
399
|
-
|
400
|
-
|
400
|
+
pi->s++;
|
401
|
+
pi->pcb->end_element(pi, ename);
|
401
402
|
return;
|
402
403
|
case '\0':
|
403
404
|
raise_error("invalid format, document not terminated", pi->str, pi->s);
|
@@ -418,7 +419,7 @@ read_element(PInfo pi) {
|
|
418
419
|
'>' == *(pi->s + elen + 2)) {
|
419
420
|
// close tag after text so treat as a value
|
420
421
|
pi->s += elen + 3;
|
421
|
-
|
422
|
+
pi->pcb->end_element(pi, ename);
|
422
423
|
return;
|
423
424
|
}
|
424
425
|
}
|
@@ -428,9 +429,9 @@ read_element(PInfo pi) {
|
|
428
429
|
|
429
430
|
static void
|
430
431
|
read_text(PInfo pi) {
|
431
|
-
char
|
432
|
+
char buf[MAX_TEXT_LEN];
|
432
433
|
char *b = buf;
|
433
|
-
char
|
434
|
+
char *alloc_buf = 0;
|
434
435
|
char *end = b + sizeof(buf) - 2;
|
435
436
|
char c;
|
436
437
|
int done = 0;
|
@@ -449,25 +450,25 @@ read_text(PInfo pi) {
|
|
449
450
|
c = read_coded_char(pi);
|
450
451
|
}
|
451
452
|
if (end <= b) {
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
453
|
+
unsigned long size;
|
454
|
+
|
455
|
+
if (0 == alloc_buf) {
|
456
|
+
size = sizeof(buf) * 2;
|
457
|
+
if (0 == (alloc_buf = (char*)malloc(size))) {
|
458
|
+
raise_error("text too long", pi->str, pi->s);
|
459
|
+
}
|
460
|
+
memcpy(alloc_buf, buf, b - buf);
|
461
|
+
b = alloc_buf + (b - buf);
|
462
|
+
} else {
|
463
|
+
unsigned long pos = b - alloc_buf;
|
464
|
+
|
465
|
+
size = (end - alloc_buf) * 2;
|
466
|
+
if (0 == (alloc_buf = (char*)realloc(alloc_buf, size))) {
|
467
|
+
raise_error("text too long", pi->str, pi->s);
|
468
|
+
}
|
469
|
+
b = alloc_buf + pos;
|
470
|
+
}
|
471
|
+
end = alloc_buf + size - 2;
|
471
472
|
}
|
472
473
|
*b++ = c;
|
473
474
|
break;
|
@@ -475,19 +476,19 @@ read_text(PInfo pi) {
|
|
475
476
|
}
|
476
477
|
*b = '\0';
|
477
478
|
if (0 != alloc_buf) {
|
478
|
-
|
479
|
-
|
479
|
+
pi->pcb->add_text(pi, alloc_buf, ('/' == *(pi->s + 1)));
|
480
|
+
free(alloc_buf);
|
480
481
|
} else {
|
481
|
-
|
482
|
+
pi->pcb->add_text(pi, buf, ('/' == *(pi->s + 1)));
|
482
483
|
}
|
483
484
|
}
|
484
485
|
|
485
486
|
#if 0
|
486
487
|
static void
|
487
488
|
read_reduced_text(PInfo pi) {
|
488
|
-
char
|
489
|
+
char buf[MAX_TEXT_LEN];
|
489
490
|
char *b = buf;
|
490
|
-
char
|
491
|
+
char *alloc_buf = 0;
|
491
492
|
char *end = b + sizeof(buf) - 2;
|
492
493
|
char c;
|
493
494
|
int spc = 0;
|
@@ -514,25 +515,25 @@ read_reduced_text(PInfo pi) {
|
|
514
515
|
c = read_coded_char(pi);
|
515
516
|
}
|
516
517
|
if (end <= b + spc) {
|
517
|
-
|
518
|
-
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
524
|
-
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
518
|
+
unsigned long size;
|
519
|
+
|
520
|
+
if (0 == alloc_buf) {
|
521
|
+
size = sizeof(buf) * 2;
|
522
|
+
if (0 == (alloc_buf = (char*)malloc(size))) {
|
523
|
+
raise_error("text too long", pi->str, pi->s);
|
524
|
+
}
|
525
|
+
memcpy(alloc_buf, buf, b - buf);
|
526
|
+
b = alloc_buf + (b - buf);
|
527
|
+
} else {
|
528
|
+
unsigned long pos = b - alloc_buf;
|
529
|
+
|
530
|
+
size = (end - alloc_buf) * 2;
|
531
|
+
if (0 == (alloc_buf = (char*)realloc(alloc_buf, size))) {
|
532
|
+
raise_error("text too long", pi->str, pi->s);
|
533
|
+
}
|
534
|
+
b = alloc_buf + pos;
|
535
|
+
}
|
536
|
+
end = alloc_buf + size - 2;
|
536
537
|
}
|
537
538
|
if (spc) {
|
538
539
|
*b++ = ' ';
|
@@ -544,17 +545,17 @@ read_reduced_text(PInfo pi) {
|
|
544
545
|
}
|
545
546
|
*b = '\0';
|
546
547
|
if (0 != alloc_buf) {
|
547
|
-
|
548
|
-
|
548
|
+
pi->pcb->add_text(pi, alloc_buf, ('/' == *(pi->s + 1)));
|
549
|
+
free(alloc_buf);
|
549
550
|
} else {
|
550
|
-
|
551
|
+
pi->pcb->add_text(pi, buf, ('/' == *(pi->s + 1)));
|
551
552
|
}
|
552
553
|
}
|
553
554
|
#endif
|
554
555
|
|
555
556
|
static char*
|
556
557
|
read_name_token(PInfo pi) {
|
557
|
-
char
|
558
|
+
char *start;
|
558
559
|
|
559
560
|
next_non_white(pi);
|
560
561
|
start = pi->s;
|
@@ -571,9 +572,9 @@ read_name_token(PInfo pi) {
|
|
571
572
|
case '\r':
|
572
573
|
return start;
|
573
574
|
case '\0':
|
574
|
-
|
575
|
+
// documents never terminate after a name token
|
575
576
|
raise_error("invalid format, document not terminated", pi->str, pi->s);
|
576
|
-
|
577
|
+
break; // to avoid warnings
|
577
578
|
default:
|
578
579
|
break;
|
579
580
|
}
|
@@ -584,7 +585,7 @@ read_name_token(PInfo pi) {
|
|
584
585
|
static void
|
585
586
|
read_cdata(PInfo pi) {
|
586
587
|
char *start;
|
587
|
-
char
|
588
|
+
char *end;
|
588
589
|
|
589
590
|
start = pi->s;
|
590
591
|
end = strstr(pi->s, "]]>");
|
@@ -594,7 +595,7 @@ read_cdata(PInfo pi) {
|
|
594
595
|
*end = '\0';
|
595
596
|
pi->s = end + 3;
|
596
597
|
if (0 != pi->pcb->add_cdata) {
|
597
|
-
|
598
|
+
pi->pcb->add_cdata(pi, start, end - start);
|
598
599
|
}
|
599
600
|
}
|
600
601
|
|
@@ -603,7 +604,7 @@ read_cdata(PInfo pi) {
|
|
603
604
|
*/
|
604
605
|
static char*
|
605
606
|
read_quoted_value(PInfo pi) {
|
606
|
-
char
|
607
|
+
char *value;
|
607
608
|
|
608
609
|
if ('"' != *pi->s) {
|
609
610
|
raise_error("invalid format, expected a quote character", pi->str, pi->s);
|
@@ -613,10 +614,10 @@ read_quoted_value(PInfo pi) {
|
|
613
614
|
for (; *pi->s != '"'; pi->s++) {
|
614
615
|
if ('\0' == *pi->s) {
|
615
616
|
raise_error("invalid format, document not terminated", pi->str, pi->s);
|
616
|
-
|
617
|
+
}
|
617
618
|
}
|
618
619
|
*pi->s = '\0'; // terminate value
|
619
|
-
pi->s++;
|
620
|
+
pi->s++; // move past quote
|
620
621
|
|
621
622
|
return value;
|
622
623
|
}
|
@@ -672,49 +673,49 @@ read_coded_char(PInfo pi) {
|
|
672
673
|
|
673
674
|
static int
|
674
675
|
collapse_special(char *str) {
|
675
|
-
char
|
676
|
-
char
|
676
|
+
char *s = str;
|
677
|
+
char *b = str;
|
677
678
|
|
678
679
|
while ('\0' != *s) {
|
679
|
-
|
680
|
-
|
681
|
-
|
682
|
-
|
683
|
-
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
694
|
-
|
695
|
-
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
701
|
-
|
702
|
-
|
703
|
-
|
704
|
-
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
|
710
|
-
|
711
|
-
|
712
|
-
|
713
|
-
|
714
|
-
|
715
|
-
|
716
|
-
|
717
|
-
|
680
|
+
if ('&' == *s) {
|
681
|
+
int c;
|
682
|
+
char *end;
|
683
|
+
|
684
|
+
s++;
|
685
|
+
if ('#' == *s) {
|
686
|
+
c = (int)strtol(s, &end, 10);
|
687
|
+
if (';' != *end) {
|
688
|
+
return EDOM;
|
689
|
+
}
|
690
|
+
s = end + 1;
|
691
|
+
} else if (0 == strncasecmp(s, "lt;", 3)) {
|
692
|
+
c = '<';
|
693
|
+
s += 3;
|
694
|
+
} else if (0 == strncasecmp(s, "gt;", 3)) {
|
695
|
+
c = '>';
|
696
|
+
s += 3;
|
697
|
+
} else if (0 == strncasecmp(s, "amp;", 4)) {
|
698
|
+
c = '&';
|
699
|
+
s += 4;
|
700
|
+
} else if (0 == strncasecmp(s, "quot;", 5)) {
|
701
|
+
c = '"';
|
702
|
+
s += 5;
|
703
|
+
} else if (0 == strncasecmp(s, "apos;", 5)) {
|
704
|
+
c = '\'';
|
705
|
+
s += 5;
|
706
|
+
} else {
|
707
|
+
c = '?';
|
708
|
+
while (';' != *s++) {
|
709
|
+
if ('\0' == *s) {
|
710
|
+
return EDOM;
|
711
|
+
}
|
712
|
+
}
|
713
|
+
s++;
|
714
|
+
}
|
715
|
+
*b++ = (char)c;
|
716
|
+
} else {
|
717
|
+
*b++ = *s++;
|
718
|
+
}
|
718
719
|
}
|
719
720
|
*b = '\0';
|
720
721
|
|
data/lib/ox/bag.rb
CHANGED
@@ -16,10 +16,6 @@ module Ox
|
|
16
16
|
def initialize(args={ })
|
17
17
|
args.each do |k,v|
|
18
18
|
self.instance_variable_set(k, v)
|
19
|
-
m = k.to_s[1..-1].to_sym
|
20
|
-
unless respond_to?(m)
|
21
|
-
self.class.define_get(m, k)
|
22
|
-
end
|
23
19
|
end
|
24
20
|
end
|
25
21
|
|
@@ -28,6 +24,7 @@ module Ox
|
|
28
24
|
# @return [Boolean] true for any method that matches an instance
|
29
25
|
# variable reader, otherwise false.
|
30
26
|
def respond_to?(m)
|
27
|
+
return true if super
|
31
28
|
at_m = ('@' + m.to_s).to_sym
|
32
29
|
instance_variables.include?(at_m)
|
33
30
|
end
|
data/lib/ox/version.rb
CHANGED
data/test/bench.rb
CHANGED
@@ -13,7 +13,7 @@
|
|
13
13
|
# unpack: marshal 0.609927
|
14
14
|
# unpack: ox 0.287053
|
15
15
|
|
16
|
-
require 'msgpack'
|
16
|
+
#require 'msgpack'
|
17
17
|
require 'json'
|
18
18
|
require 'ox'
|
19
19
|
|
@@ -32,14 +32,14 @@ end
|
|
32
32
|
|
33
33
|
def bench_all(title, iter, obj)
|
34
34
|
puts "\n#{title} Packing"
|
35
|
-
bench('pack: message pack', iter) { MessagePack.pack(obj) }
|
35
|
+
# bench('pack: message pack', iter) { MessagePack.pack(obj) }
|
36
36
|
bench('pack: marshall', iter) { Marshal.dump(obj) }
|
37
37
|
bench('pack: json', iter) { JSON.dump(obj) }
|
38
38
|
bench('pack: ox', iter) { Ox.dump(obj) }
|
39
39
|
|
40
40
|
puts "\n#{title} Unpacking"
|
41
|
-
mp_obj = MessagePack.pack(obj)
|
42
|
-
bench('unpack: message pack', iter) { MessagePack.unpack(mp_obj) }
|
41
|
+
# mp_obj = MessagePack.pack(obj)
|
42
|
+
# bench('unpack: message pack', iter) { MessagePack.unpack(mp_obj) }
|
43
43
|
mars_obj = Marshal.dump(obj)
|
44
44
|
bench('unpack: marshal', iter) { Marshal.load(mars_obj) }
|
45
45
|
json_obj = JSON.dump(obj)
|
data/test/big.rb
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
#!/usr/bin/env ruby -wW1
|
2
|
+
|
3
|
+
$: << '../lib'
|
4
|
+
$: << '../ext'
|
5
|
+
|
6
|
+
if __FILE__ == $0
|
7
|
+
while (i = ARGV.index('-I'))
|
8
|
+
x,path = ARGV.slice!(i, 2)
|
9
|
+
$: << path
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
require 'ox'
|
14
|
+
|
15
|
+
def dump(cnt = 10000)
|
16
|
+
h = { }
|
17
|
+
cnt.times do |i|
|
18
|
+
h[i] = [i * 2, "this is #{i}"]
|
19
|
+
end
|
20
|
+
xml = Ox.dump(h)
|
21
|
+
puts "size: #{xml.size}"
|
22
|
+
end
|
23
|
+
|
24
|
+
dump(200000)
|
data/test/bug3.rb
CHANGED
@@ -3,19 +3,21 @@
|
|
3
3
|
$: << '../lib'
|
4
4
|
$: << '../ext'
|
5
5
|
|
6
|
+
if __FILE__ == $0
|
7
|
+
if (i = ARGV.index('-I'))
|
8
|
+
x,path = ARGV.slice!(i, 2)
|
9
|
+
$: << path
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
6
13
|
require 'ox'
|
7
14
|
|
8
|
-
def
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
xml = Ox.dump(e, effort: :tolerant)
|
14
|
-
puts xml
|
15
|
-
o = Ox.load(xml, mode: :object)
|
16
|
-
puts o.message
|
17
|
-
puts Ox.dump(e)
|
15
|
+
def dump(cnt = 100000)
|
16
|
+
cnt.times do |i|
|
17
|
+
xml = Ox.dump([:inc, 1])
|
18
|
+
#puts xml
|
19
|
+
|
18
20
|
end
|
19
21
|
end
|
20
22
|
|
21
|
-
|
23
|
+
dump()
|
data/test/bug4.rb
ADDED
data/test/func.rb
CHANGED
@@ -216,6 +216,12 @@ class Func < ::Test::Unit::TestCase
|
|
216
216
|
assert_equal("<!DOCTYPE s SYSTEM \"ox.dtd\">\n<s>test</s>\n", xml)
|
217
217
|
end
|
218
218
|
|
219
|
+
def test_lone_dtd
|
220
|
+
xml = "<!DOCTYPE html>" # not really a valid xml but should pass anyway
|
221
|
+
doc = Ox.parse(xml)
|
222
|
+
assert_equal('html', doc.nodes[0].value)
|
223
|
+
end
|
224
|
+
|
219
225
|
def test_class
|
220
226
|
dump_and_load(Bag, false)
|
221
227
|
end
|
metadata
CHANGED
@@ -1,28 +1,24 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: ox
|
3
|
-
version: !ruby/object:Gem::Version
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.4.2
|
4
5
|
prerelease:
|
5
|
-
version: 1.4.1
|
6
6
|
platform: ruby
|
7
|
-
authors:
|
7
|
+
authors:
|
8
8
|
- Peter Ohler
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
|
13
|
-
date: 2011-11-16 00:00:00 +09:00
|
14
|
-
default_executable:
|
12
|
+
date: 2011-12-03 00:00:00.000000000Z
|
15
13
|
dependencies: []
|
16
|
-
|
17
14
|
description: A fast XML parser and object serializer that uses only standard C lib.
|
18
15
|
email: peter@ohler.com
|
19
16
|
executables: []
|
20
|
-
|
21
|
-
extensions:
|
17
|
+
extensions:
|
22
18
|
- ext/ox/extconf.rb
|
23
|
-
extra_rdoc_files:
|
19
|
+
extra_rdoc_files:
|
24
20
|
- README.md
|
25
|
-
files:
|
21
|
+
files:
|
26
22
|
- lib/ox/bag.rb
|
27
23
|
- lib/ox/cdata.rb
|
28
24
|
- lib/ox/comment.rb
|
@@ -51,9 +47,11 @@ files:
|
|
51
47
|
- ext/ox/parse.c
|
52
48
|
- ext/ox/sax.c
|
53
49
|
- test/bench.rb
|
50
|
+
- test/big.rb
|
54
51
|
- test/bug1.rb
|
55
52
|
- test/bug2.rb
|
56
53
|
- test/bug3.rb
|
54
|
+
- test/bug4.rb
|
57
55
|
- test/cache16_test.rb
|
58
56
|
- test/cache8_test.rb
|
59
57
|
- test/cache_test.rb
|
@@ -81,41 +79,36 @@ files:
|
|
81
79
|
- test/perf_sax.rb
|
82
80
|
- test/perf_write.rb
|
83
81
|
- test/sample.rb
|
84
|
-
- test/sax_example.rb
|
85
82
|
- test/sax_test.rb
|
86
83
|
- test/test.rb
|
87
84
|
- test/Sample.graffle
|
88
85
|
- LICENSE
|
89
86
|
- README.md
|
90
|
-
has_rdoc: true
|
91
87
|
homepage: https://github.com/ohler55/ox
|
92
88
|
licenses: []
|
93
|
-
|
94
89
|
post_install_message:
|
95
|
-
rdoc_options:
|
90
|
+
rdoc_options:
|
96
91
|
- --main
|
97
92
|
- README.md
|
98
|
-
require_paths:
|
93
|
+
require_paths:
|
99
94
|
- lib
|
100
95
|
- ext
|
101
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
96
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
102
97
|
none: false
|
103
|
-
requirements:
|
104
|
-
- -
|
105
|
-
- !ruby/object:Gem::Version
|
106
|
-
version:
|
107
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
98
|
+
requirements:
|
99
|
+
- - ! '>='
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: '0'
|
102
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
108
103
|
none: false
|
109
|
-
requirements:
|
110
|
-
- -
|
111
|
-
- !ruby/object:Gem::Version
|
112
|
-
version:
|
104
|
+
requirements:
|
105
|
+
- - ! '>='
|
106
|
+
- !ruby/object:Gem::Version
|
107
|
+
version: '0'
|
113
108
|
requirements: []
|
114
|
-
|
115
109
|
rubyforge_project: ox
|
116
|
-
rubygems_version: 1.
|
110
|
+
rubygems_version: 1.8.10
|
117
111
|
signing_key:
|
118
112
|
specification_version: 3
|
119
113
|
summary: A fast XML parser and object serializer.
|
120
114
|
test_files: []
|
121
|
-
|
data/test/sax_example.rb
DELETED
@@ -1,37 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby -wW1
|
2
|
-
|
3
|
-
$: << '../lib'
|
4
|
-
$: << '../ext'
|
5
|
-
|
6
|
-
require 'stringio'
|
7
|
-
require 'ox'
|
8
|
-
|
9
|
-
class Sample < ::Ox::Sax
|
10
|
-
def start_element(name); puts "start: #{name}"; end
|
11
|
-
def end_element(name); puts "end: #{name}"; end
|
12
|
-
def attr(name, value); puts " #{name} => #{value}"; end
|
13
|
-
def text(value); puts "text #{value}"; end
|
14
|
-
end
|
15
|
-
|
16
|
-
io = StringIO.new(%{
|
17
|
-
<top name="sample">
|
18
|
-
<middle name="second">
|
19
|
-
<bottom name="third"/>
|
20
|
-
</middle>
|
21
|
-
</top>
|
22
|
-
})
|
23
|
-
|
24
|
-
handler = Sample.new()
|
25
|
-
Ox.sax_parse(handler, io)
|
26
|
-
|
27
|
-
# outputs
|
28
|
-
# start: top
|
29
|
-
# name => sample
|
30
|
-
# start: middle
|
31
|
-
# name => second
|
32
|
-
# start: bottom
|
33
|
-
# name => third
|
34
|
-
# end: bottom
|
35
|
-
# end: middle
|
36
|
-
# end: top
|
37
|
-
|