ox 2.0.0 → 2.0.1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of ox might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/README.md +121 -128
- data/ext/ox/attr.h +109 -0
- data/ext/ox/err.c +64 -0
- data/ext/ox/err.h +61 -0
- data/ext/ox/gen_load.c +32 -33
- data/ext/ox/helper.h +116 -0
- data/ext/ox/obj_load.c +551 -518
- data/ext/ox/ox.c +43 -20
- data/ext/ox/ox.h +9 -47
- data/ext/ox/parse.c +146 -70
- data/ext/ox/sax.c +19 -1
- data/ext/ox/sax_buf.c +5 -4
- data/ext/ox/sax_has.h +2 -0
- data/ext/ox/sax_stack.h +3 -3
- data/ext/ox/type.h +63 -0
- data/lib/ox/sax.rb +12 -6
- data/lib/ox/version.rb +1 -1
- metadata +8 -4
data/ext/ox/ox.c
CHANGED
@@ -38,7 +38,7 @@
|
|
38
38
|
#include "sax.h"
|
39
39
|
|
40
40
|
/* maximum to allocate on the stack, arbitrary limit */
|
41
|
-
#define SMALL_XML
|
41
|
+
#define SMALL_XML 4096
|
42
42
|
|
43
43
|
typedef struct _YesNoOpt {
|
44
44
|
VALUE sym;
|
@@ -57,6 +57,7 @@ ID ox_at_value_id;
|
|
57
57
|
ID ox_attr_id;
|
58
58
|
ID ox_attr_value_id;
|
59
59
|
ID ox_attributes_id;
|
60
|
+
ID ox_attrs_done_id;
|
60
61
|
ID ox_beg_id;
|
61
62
|
ID ox_cdata_id;
|
62
63
|
ID ox_comment_id;
|
@@ -384,21 +385,26 @@ to_obj(VALUE self, VALUE ruby_xml) {
|
|
384
385
|
size_t len;
|
385
386
|
VALUE obj;
|
386
387
|
struct _Options options = ox_default_options;
|
388
|
+
struct _Err err;
|
387
389
|
|
390
|
+
err_init(&err);
|
388
391
|
Check_Type(ruby_xml, T_STRING);
|
389
392
|
/* the xml string gets modified so make a copy of it */
|
390
393
|
len = RSTRING_LEN(ruby_xml) + 1;
|
394
|
+
x = defuse_bom(StringValuePtr(ruby_xml), &options);
|
391
395
|
if (SMALL_XML < len) {
|
392
396
|
xml = ALLOC_N(char, len);
|
393
397
|
} else {
|
394
398
|
xml = ALLOCA_N(char, len);
|
395
399
|
}
|
396
|
-
memcpy(xml,
|
397
|
-
|
398
|
-
obj = ox_parse(x, ox_obj_callbacks, 0, &options);
|
400
|
+
memcpy(xml, x, len);
|
401
|
+
obj = ox_parse(xml, ox_obj_callbacks, 0, &options, &err);
|
399
402
|
if (SMALL_XML < len) {
|
400
403
|
xfree(xml);
|
401
404
|
}
|
405
|
+
if (err_has(&err)) {
|
406
|
+
ox_err_raise(&err);
|
407
|
+
}
|
402
408
|
return obj;
|
403
409
|
}
|
404
410
|
|
@@ -415,26 +421,31 @@ to_gen(VALUE self, VALUE ruby_xml) {
|
|
415
421
|
size_t len;
|
416
422
|
VALUE obj;
|
417
423
|
struct _Options options = ox_default_options;
|
424
|
+
struct _Err err;
|
418
425
|
|
426
|
+
err_init(&err);
|
419
427
|
Check_Type(ruby_xml, T_STRING);
|
420
428
|
/* the xml string gets modified so make a copy of it */
|
421
429
|
len = RSTRING_LEN(ruby_xml) + 1;
|
430
|
+
x = defuse_bom(StringValuePtr(ruby_xml), &options);
|
422
431
|
if (SMALL_XML < len) {
|
423
432
|
xml = ALLOC_N(char, len);
|
424
433
|
} else {
|
425
434
|
xml = ALLOCA_N(char, len);
|
426
435
|
}
|
427
|
-
memcpy(xml,
|
428
|
-
|
429
|
-
obj = ox_parse(x, ox_gen_callbacks, 0, &options);
|
436
|
+
memcpy(xml, x, len);
|
437
|
+
obj = ox_parse(xml, ox_gen_callbacks, 0, &options, &err);
|
430
438
|
if (SMALL_XML < len) {
|
431
439
|
xfree(xml);
|
432
440
|
}
|
441
|
+
if (err_has(&err)) {
|
442
|
+
ox_err_raise(&err);
|
443
|
+
}
|
433
444
|
return obj;
|
434
445
|
}
|
435
446
|
|
436
447
|
static VALUE
|
437
|
-
load(char *xml, int argc, VALUE *argv, VALUE self, VALUE encoding) {
|
448
|
+
load(char *xml, int argc, VALUE *argv, VALUE self, VALUE encoding, Err err) {
|
438
449
|
VALUE obj;
|
439
450
|
struct _Options options = ox_default_options;
|
440
451
|
|
@@ -499,19 +510,19 @@ load(char *xml, int argc, VALUE *argv, VALUE self, VALUE encoding) {
|
|
499
510
|
xml = defuse_bom(xml, &options);
|
500
511
|
switch (options.mode) {
|
501
512
|
case ObjMode:
|
502
|
-
obj = ox_parse(xml, ox_obj_callbacks, 0, &options);
|
513
|
+
obj = ox_parse(xml, ox_obj_callbacks, 0, &options, err);
|
503
514
|
break;
|
504
515
|
case GenMode:
|
505
|
-
obj = ox_parse(xml, ox_gen_callbacks, 0, &options);
|
516
|
+
obj = ox_parse(xml, ox_gen_callbacks, 0, &options, err);
|
506
517
|
break;
|
507
518
|
case LimMode:
|
508
|
-
obj = ox_parse(xml, ox_limited_callbacks, 0, &options);
|
519
|
+
obj = ox_parse(xml, ox_limited_callbacks, 0, &options, err);
|
509
520
|
break;
|
510
521
|
case NoMode:
|
511
|
-
obj = ox_parse(xml, ox_nomode_callbacks, 0, &options);
|
522
|
+
obj = ox_parse(xml, ox_nomode_callbacks, 0, &options, err);
|
512
523
|
break;
|
513
524
|
default:
|
514
|
-
obj = ox_parse(xml, ox_gen_callbacks, 0, &options);
|
525
|
+
obj = ox_parse(xml, ox_gen_callbacks, 0, &options, err);
|
515
526
|
break;
|
516
527
|
}
|
517
528
|
return obj;
|
@@ -541,7 +552,9 @@ load_str(int argc, VALUE *argv, VALUE self) {
|
|
541
552
|
size_t len;
|
542
553
|
VALUE obj;
|
543
554
|
VALUE encoding;
|
544
|
-
|
555
|
+
struct _Err err;
|
556
|
+
|
557
|
+
err_init(&err);
|
545
558
|
Check_Type(*argv, T_STRING);
|
546
559
|
/* the xml string gets modified so make a copy of it */
|
547
560
|
len = RSTRING_LEN(*argv) + 1;
|
@@ -562,10 +575,13 @@ load_str(int argc, VALUE *argv, VALUE self) {
|
|
562
575
|
encoding = Qnil;
|
563
576
|
#endif
|
564
577
|
memcpy(xml, StringValuePtr(*argv), len);
|
565
|
-
obj = load(xml, argc - 1, argv + 1, self, encoding);
|
578
|
+
obj = load(xml, argc - 1, argv + 1, self, encoding, &err);
|
566
579
|
if (SMALL_XML < len) {
|
567
580
|
xfree(xml);
|
568
581
|
}
|
582
|
+
if (err_has(&err)) {
|
583
|
+
ox_err_raise(&err);
|
584
|
+
}
|
569
585
|
return obj;
|
570
586
|
}
|
571
587
|
|
@@ -594,7 +610,9 @@ load_file(int argc, VALUE *argv, VALUE self) {
|
|
594
610
|
FILE *f;
|
595
611
|
size_t len;
|
596
612
|
VALUE obj;
|
597
|
-
|
613
|
+
struct _Err err;
|
614
|
+
|
615
|
+
err_init(&err);
|
598
616
|
Check_Type(*argv, T_STRING);
|
599
617
|
path = StringValuePtr(*argv);
|
600
618
|
if (0 == (f = fopen(path, "r"))) {
|
@@ -609,15 +627,19 @@ load_file(int argc, VALUE *argv, VALUE self) {
|
|
609
627
|
}
|
610
628
|
fseek(f, 0, SEEK_SET);
|
611
629
|
if (len != fread(xml, 1, len, f)) {
|
612
|
-
|
613
|
-
|
630
|
+
ox_err_set(&err, rb_eLoadError, "Failed to read %ld bytes from %s.\n", (long)len, path);
|
631
|
+
obj = Qnil;
|
632
|
+
} else {
|
633
|
+
xml[len] = '\0';
|
634
|
+
obj = load(xml, argc - 1, argv + 1, self, Qnil, &err);
|
614
635
|
}
|
615
636
|
fclose(f);
|
616
|
-
xml[len] = '\0';
|
617
|
-
obj = load(xml, argc - 1, argv + 1, self, Qnil);
|
618
637
|
if (SMALL_XML < len) {
|
619
638
|
xfree(xml);
|
620
639
|
}
|
640
|
+
if (err_has(&err)) {
|
641
|
+
ox_err_raise(&err);
|
642
|
+
}
|
621
643
|
return obj;
|
622
644
|
}
|
623
645
|
|
@@ -832,6 +854,7 @@ void Init_ox() {
|
|
832
854
|
ox_attr_id = rb_intern("attr");
|
833
855
|
ox_attr_value_id = rb_intern("attr_value");
|
834
856
|
ox_attributes_id = rb_intern("@attributes");
|
857
|
+
ox_attrs_done_id = rb_intern("attrs_done");
|
835
858
|
ox_beg_id = rb_intern("@beg");
|
836
859
|
ox_cdata_id = rb_intern("cdata");
|
837
860
|
ox_comment_id = rb_intern("comment");
|
data/ext/ox/ox.h
CHANGED
@@ -59,11 +59,14 @@ enum st_retval {ST_CONTINUE = 0, ST_STOP = 1, ST_DELETE = 2, ST_CHECK};
|
|
59
59
|
|
60
60
|
#include "cache.h"
|
61
61
|
|
62
|
+
#include "err.h"
|
63
|
+
#include "type.h"
|
64
|
+
#include "attr.h"
|
65
|
+
#include "helper.h"
|
66
|
+
|
62
67
|
#define raise_error(msg, xml, current) _ox_raise_error(msg, xml, current, __FILE__, __LINE__)
|
63
68
|
|
64
69
|
#define MAX_TEXT_LEN 4096
|
65
|
-
#define MAX_ATTRS 1024
|
66
|
-
#define MAX_DEPTH 1024
|
67
70
|
|
68
71
|
#define SILENT 0
|
69
72
|
#define TRACE 1
|
@@ -115,46 +118,6 @@ typedef enum {
|
|
115
118
|
NoMode = 0
|
116
119
|
} LoadMode;
|
117
120
|
|
118
|
-
typedef enum {
|
119
|
-
NoCode = 0,
|
120
|
-
ArrayCode = 'a',
|
121
|
-
String64Code = 'b', /* base64 encoded String */
|
122
|
-
ClassCode = 'c',
|
123
|
-
Symbol64Code = 'd', /* base64 encoded Symbol */
|
124
|
-
DateCode = 'D',
|
125
|
-
ExceptionCode = 'e',
|
126
|
-
FloatCode = 'f',
|
127
|
-
RegexpCode = 'g',
|
128
|
-
HashCode = 'h',
|
129
|
-
FixnumCode = 'i',
|
130
|
-
BignumCode = 'j',
|
131
|
-
KeyCode = 'k', /* indicates the value is a hash key, kind of a hack */
|
132
|
-
RationalCode = 'l',
|
133
|
-
SymbolCode = 'm',
|
134
|
-
FalseClassCode = 'n',
|
135
|
-
ObjectCode = 'o',
|
136
|
-
RefCode = 'p',
|
137
|
-
RangeCode = 'r',
|
138
|
-
StringCode = 's',
|
139
|
-
TimeCode = 't',
|
140
|
-
StructCode = 'u',
|
141
|
-
ComplexCode = 'v',
|
142
|
-
RawCode = 'x',
|
143
|
-
TrueClassCode = 'y',
|
144
|
-
NilClassCode = 'z',
|
145
|
-
} Type;
|
146
|
-
|
147
|
-
typedef struct _Attr {
|
148
|
-
const char *name;
|
149
|
-
const char *value;
|
150
|
-
} *Attr;
|
151
|
-
|
152
|
-
typedef struct _Helper {
|
153
|
-
ID var; /* Object var ID */
|
154
|
-
VALUE obj; /* object created or Qundef if not appropriate */
|
155
|
-
Type type; /* type of object in obj */
|
156
|
-
} *Helper;
|
157
|
-
|
158
121
|
typedef struct _PInfo *PInfo;
|
159
122
|
|
160
123
|
typedef struct _ParseCallbacks {
|
@@ -197,8 +160,8 @@ typedef struct _Options {
|
|
197
160
|
|
198
161
|
/* parse information structure */
|
199
162
|
struct _PInfo {
|
200
|
-
struct
|
201
|
-
|
163
|
+
struct _HelperStack helpers;
|
164
|
+
struct _Err err;
|
202
165
|
char *str; /* buffer being read from */
|
203
166
|
char *s; /* current position in buffer */
|
204
167
|
VALUE obj;
|
@@ -209,7 +172,7 @@ struct _PInfo {
|
|
209
172
|
char last; /* last character read, rarely set */
|
210
173
|
};
|
211
174
|
|
212
|
-
extern VALUE ox_parse(char *xml, ParseCallbacks pcb, char **endp, Options options);
|
175
|
+
extern VALUE ox_parse(char *xml, ParseCallbacks pcb, char **endp, Options options, Err err);
|
213
176
|
extern void _ox_raise_error(const char *msg, const char *xml, const char *current, const char* file, int line);
|
214
177
|
|
215
178
|
extern void ox_sax_define(void);
|
@@ -228,6 +191,7 @@ extern ID ox_at_line_id;
|
|
228
191
|
extern ID ox_at_value_id;
|
229
192
|
extern ID ox_attr_id;
|
230
193
|
extern ID ox_attr_value_id;
|
194
|
+
extern ID ox_attrs_done_id;
|
231
195
|
extern ID ox_attributes_id;
|
232
196
|
extern ID ox_beg_id;
|
233
197
|
extern ID ox_cdata_id;
|
@@ -273,11 +237,9 @@ extern VALUE ox_utf8_encoding;
|
|
273
237
|
extern void *ox_utf8_encoding;
|
274
238
|
#endif
|
275
239
|
|
276
|
-
extern VALUE ox_arg_error_class;
|
277
240
|
extern VALUE ox_date_class;
|
278
241
|
extern VALUE ox_empty_string;
|
279
242
|
extern VALUE ox_encoding_sym;
|
280
|
-
extern VALUE ox_parse_error_class;
|
281
243
|
extern VALUE ox_stringio_class;
|
282
244
|
extern VALUE ox_struct_class;
|
283
245
|
extern VALUE ox_time_class;
|
data/ext/ox/parse.c
CHANGED
@@ -35,6 +35,9 @@
|
|
35
35
|
|
36
36
|
#include "ruby.h"
|
37
37
|
#include "ox.h"
|
38
|
+
#include "err.h"
|
39
|
+
#include "attr.h"
|
40
|
+
#include "helper.h"
|
38
41
|
|
39
42
|
static void read_instruction(PInfo pi);
|
40
43
|
static void read_doctype(PInfo pi);
|
@@ -99,20 +102,22 @@ next_white(PInfo pi) {
|
|
99
102
|
}
|
100
103
|
|
101
104
|
VALUE
|
102
|
-
ox_parse(char *xml, ParseCallbacks pcb, char **endp, Options options) {
|
105
|
+
ox_parse(char *xml, ParseCallbacks pcb, char **endp, Options options, Err err) {
|
103
106
|
struct _PInfo pi;
|
104
107
|
int body_read = 0;
|
105
108
|
|
106
109
|
if (0 == xml) {
|
107
|
-
|
110
|
+
set_error(err, "Invalid arg, xml string can not be null", xml, 0);
|
111
|
+
return Qnil;
|
108
112
|
}
|
109
113
|
if (DEBUG <= options->trace) {
|
110
114
|
printf("Parsing xml:\n%s\n", xml);
|
111
115
|
}
|
112
116
|
/* initialize parse info */
|
117
|
+
helper_stack_init(&pi.helpers);
|
118
|
+
err_init(&pi.err);
|
113
119
|
pi.str = xml;
|
114
120
|
pi.s = xml;
|
115
|
-
pi.h = 0;
|
116
121
|
pi.pcb = pcb;
|
117
122
|
pi.obj = Qnil;
|
118
123
|
pi.circ_array = 0;
|
@@ -127,7 +132,9 @@ ox_parse(char *xml, ParseCallbacks pcb, char **endp, Options options) {
|
|
127
132
|
break;
|
128
133
|
}
|
129
134
|
if ('<' != *pi.s) { /* all top level entities start with < */
|
130
|
-
|
135
|
+
set_error(err, "invalid format, expected <", pi.str, pi.s);
|
136
|
+
helper_stack_cleanup(&pi.helpers);
|
137
|
+
return Qnil;
|
131
138
|
}
|
132
139
|
pi.s++; /* past < */
|
133
140
|
switch (*pi.s) {
|
@@ -138,11 +145,15 @@ ox_parse(char *xml, ParseCallbacks pcb, char **endp, Options options) {
|
|
138
145
|
case '!': /* comment or doctype */
|
139
146
|
pi.s++;
|
140
147
|
if ('\0' == *pi.s) {
|
141
|
-
|
148
|
+
set_error(err, "invalid format, DOCTYPE or comment not terminated", pi.str, pi.s);
|
149
|
+
helper_stack_cleanup(&pi.helpers);
|
150
|
+
return Qnil;
|
142
151
|
} else if ('-' == *pi.s) {
|
143
152
|
pi.s++; /* skip - */
|
144
153
|
if ('-' != *pi.s) {
|
145
|
-
|
154
|
+
set_error(err, "invalid format, bad comment format", pi.str, pi.s);
|
155
|
+
helper_stack_cleanup(&pi.helpers);
|
156
|
+
return Qnil;
|
146
157
|
} else {
|
147
158
|
pi.s++; /* skip second - */
|
148
159
|
read_comment(&pi);
|
@@ -151,17 +162,27 @@ ox_parse(char *xml, ParseCallbacks pcb, char **endp, Options options) {
|
|
151
162
|
pi.s += 7;
|
152
163
|
read_doctype(&pi);
|
153
164
|
} else {
|
154
|
-
|
165
|
+
set_error(err, "invalid format, DOCTYPE or comment expected", pi.str, pi.s);
|
166
|
+
helper_stack_cleanup(&pi.helpers);
|
167
|
+
return Qnil;
|
155
168
|
}
|
156
169
|
break;
|
157
170
|
case '\0':
|
158
|
-
|
171
|
+
set_error(err, "invalid format, document not terminated", pi.str, pi.s);
|
172
|
+
helper_stack_cleanup(&pi.helpers);
|
173
|
+
return Qnil;
|
159
174
|
default:
|
160
175
|
read_element(&pi);
|
161
176
|
body_read = 1;
|
162
177
|
break;
|
163
178
|
}
|
179
|
+
if (err_has(&pi.err)) {
|
180
|
+
*err = pi.err;
|
181
|
+
helper_stack_cleanup(&pi.helpers);
|
182
|
+
return Qnil;
|
183
|
+
}
|
164
184
|
}
|
185
|
+
helper_stack_cleanup(&pi.helpers);
|
165
186
|
return pi.obj;
|
166
187
|
}
|
167
188
|
|
@@ -191,20 +212,25 @@ gather_content(const char *src, char *content, size_t len) {
|
|
191
212
|
static void
|
192
213
|
read_instruction(PInfo pi) {
|
193
214
|
char content[1024];
|
194
|
-
struct
|
195
|
-
|
215
|
+
struct _AttrStack attrs;
|
216
|
+
char *attr_name;
|
217
|
+
char *attr_value;
|
196
218
|
char *target;
|
197
219
|
char *end;
|
198
220
|
char c;
|
199
221
|
char *cend;
|
200
222
|
int attrs_ok = 1;
|
201
223
|
|
224
|
+
|
202
225
|
*content = '\0';
|
203
|
-
|
204
|
-
target = read_name_token(pi)
|
226
|
+
attr_stack_init(&attrs);
|
227
|
+
if (0 == (target = read_name_token(pi))) {
|
228
|
+
return;
|
229
|
+
}
|
205
230
|
end = pi->s;
|
206
231
|
if (0 == (cend = gather_content(pi->s, content, sizeof(content) - 1))) {
|
207
|
-
|
232
|
+
set_error(&pi->err, "processing instruction content too large or not terminated", pi->str, pi->s);
|
233
|
+
return;
|
208
234
|
}
|
209
235
|
next_non_white(pi);
|
210
236
|
c = *pi->s;
|
@@ -213,10 +239,15 @@ read_instruction(PInfo pi) {
|
|
213
239
|
while ('?' != c) {
|
214
240
|
pi->last = 0;
|
215
241
|
if ('\0' == *pi->s) {
|
216
|
-
|
242
|
+
attr_stack_cleanup(&attrs);
|
243
|
+
set_error(&pi->err, "invalid format, processing instruction not terminated", pi->str, pi->s);
|
244
|
+
return;
|
217
245
|
}
|
218
246
|
next_non_white(pi);
|
219
|
-
|
247
|
+
if (0 == (attr_name = read_name_token(pi))) {
|
248
|
+
attr_stack_cleanup(&attrs);
|
249
|
+
return;
|
250
|
+
}
|
220
251
|
end = pi->s;
|
221
252
|
next_non_white(pi);
|
222
253
|
if ('=' != *pi->s++) {
|
@@ -226,12 +257,11 @@ read_instruction(PInfo pi) {
|
|
226
257
|
*end = '\0'; /* terminate name */
|
227
258
|
/* read value */
|
228
259
|
next_non_white(pi);
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
attrs_ok = 0;
|
233
|
-
break;
|
260
|
+
if (0 == (attr_value = read_quoted_value(pi))) {
|
261
|
+
attr_stack_cleanup(&attrs);
|
262
|
+
return;
|
234
263
|
}
|
264
|
+
attr_stack_push(&attrs, attr_name, attr_value);
|
235
265
|
next_non_white(pi);
|
236
266
|
if ('\0' == pi->last) {
|
237
267
|
c = *pi->s;
|
@@ -247,18 +277,21 @@ read_instruction(PInfo pi) {
|
|
247
277
|
}
|
248
278
|
if (attrs_ok) {
|
249
279
|
if ('>' != *pi->s++) {
|
250
|
-
|
280
|
+
attr_stack_cleanup(&attrs);
|
281
|
+
set_error(&pi->err, "invalid format, processing instruction not terminated", pi->str, pi->s);
|
282
|
+
return;
|
251
283
|
}
|
252
284
|
} else {
|
253
285
|
pi->s = cend + 1;
|
254
286
|
}
|
255
287
|
if (0 != pi->pcb->instruct) {
|
256
288
|
if (attrs_ok) {
|
257
|
-
pi->pcb->instruct(pi, target, attrs, 0);
|
289
|
+
pi->pcb->instruct(pi, target, attrs.head, 0);
|
258
290
|
} else {
|
259
|
-
pi->pcb->instruct(pi, target, attrs, content);
|
291
|
+
pi->pcb->instruct(pi, target, attrs.head, content);
|
260
292
|
}
|
261
293
|
}
|
294
|
+
attr_stack_cleanup(&attrs);
|
262
295
|
}
|
263
296
|
|
264
297
|
/* Entered after the "<!DOCTYPE" sequence plus the first character after
|
@@ -275,7 +308,8 @@ read_doctype(PInfo pi) {
|
|
275
308
|
while (1) {
|
276
309
|
c = *pi->s++;
|
277
310
|
if ('\0' == c) {
|
278
|
-
|
311
|
+
set_error(&pi->err, "invalid format, prolog not terminated", pi->str, pi->s);
|
312
|
+
return;
|
279
313
|
} else if ('<' == c) {
|
280
314
|
depth++;
|
281
315
|
} else if ('>' == c) {
|
@@ -306,7 +340,8 @@ read_comment(PInfo pi) {
|
|
306
340
|
comment = pi->s;
|
307
341
|
end = strstr(pi->s, "-->");
|
308
342
|
if (0 == end) {
|
309
|
-
|
343
|
+
set_error(&pi->err, "invalid format, comment not terminated", pi->str, pi->s);
|
344
|
+
return;
|
310
345
|
}
|
311
346
|
for (s = end - 1; pi->s < s && !done; s--) {
|
312
347
|
switch(*s) {
|
@@ -334,8 +369,9 @@ read_comment(PInfo pi) {
|
|
334
369
|
*/
|
335
370
|
static char*
|
336
371
|
read_element(PInfo pi) {
|
337
|
-
struct
|
338
|
-
|
372
|
+
struct _AttrStack attrs;
|
373
|
+
const char *attr_name;
|
374
|
+
const char *attr_value;
|
339
375
|
char *name;
|
340
376
|
char *ename;
|
341
377
|
char *end;
|
@@ -344,7 +380,10 @@ read_element(PInfo pi) {
|
|
344
380
|
int hasChildren = 0;
|
345
381
|
int done = 0;
|
346
382
|
|
347
|
-
|
383
|
+
attr_stack_init(&attrs);
|
384
|
+
if (0 == (ename = read_name_token(pi))) {
|
385
|
+
return 0;
|
386
|
+
}
|
348
387
|
end = pi->s;
|
349
388
|
elen = end - ename;
|
350
389
|
next_non_white(pi);
|
@@ -355,13 +394,15 @@ read_element(PInfo pi) {
|
|
355
394
|
pi->s++;
|
356
395
|
if ('>' != *pi->s) {
|
357
396
|
/*printf("*** '%s' ***\n", pi->s); */
|
358
|
-
|
397
|
+
attr_stack_cleanup(&attrs);
|
398
|
+
set_error(&pi->err, "invalid format, element not closed", pi->str, pi->s);
|
399
|
+
return 0;
|
359
400
|
}
|
360
401
|
pi->s++; /* past > */
|
361
|
-
|
362
|
-
pi->pcb->add_element(pi, ename, attrs, hasChildren);
|
402
|
+
pi->pcb->add_element(pi, ename, attrs.head, hasChildren);
|
363
403
|
pi->pcb->end_element(pi, ename);
|
364
404
|
|
405
|
+
attr_stack_cleanup(&attrs);
|
365
406
|
return 0;
|
366
407
|
}
|
367
408
|
/* read attribute names until the close (/ or >) is reached */
|
@@ -373,31 +414,37 @@ read_element(PInfo pi) {
|
|
373
414
|
pi->last = 0;
|
374
415
|
switch (c) {
|
375
416
|
case '\0':
|
376
|
-
|
417
|
+
attr_stack_cleanup(&attrs);
|
418
|
+
set_error(&pi->err, "invalid format, document not terminated", pi->str, pi->s);
|
419
|
+
return 0;
|
377
420
|
case '/':
|
378
421
|
/* Element with just attributes. */
|
379
422
|
pi->s++;
|
380
423
|
if ('>' != *pi->s) {
|
381
|
-
|
424
|
+
attr_stack_cleanup(&attrs);
|
425
|
+
set_error(&pi->err, "invalid format, element not closed", pi->str, pi->s);
|
426
|
+
return 0;
|
382
427
|
}
|
383
428
|
pi->s++;
|
384
|
-
|
385
|
-
pi->pcb->add_element(pi, ename, attrs, hasChildren);
|
429
|
+
pi->pcb->add_element(pi, ename, attrs.head, hasChildren);
|
386
430
|
pi->pcb->end_element(pi, ename);
|
387
431
|
|
432
|
+
attr_stack_cleanup(&attrs);
|
388
433
|
return 0;
|
389
434
|
case '>':
|
390
435
|
/* has either children or a value */
|
391
436
|
pi->s++;
|
392
437
|
hasChildren = 1;
|
393
438
|
done = 1;
|
394
|
-
|
395
|
-
pi->pcb->add_element(pi, ename, attrs, hasChildren);
|
439
|
+
pi->pcb->add_element(pi, ename, attrs.head, hasChildren);
|
396
440
|
break;
|
397
441
|
default:
|
398
442
|
/* Attribute name so it's an element and the attribute will be */
|
399
443
|
/* added to it. */
|
400
|
-
|
444
|
+
if (0 == (attr_name = read_name_token(pi))) {
|
445
|
+
attr_stack_cleanup(&attrs);
|
446
|
+
return 0;
|
447
|
+
}
|
401
448
|
end = pi->s;
|
402
449
|
next_non_white(pi);
|
403
450
|
if ('=' != *pi->s++) {
|
@@ -405,29 +452,29 @@ read_element(PInfo pi) {
|
|
405
452
|
pi->s--;
|
406
453
|
pi->last = *pi->s;
|
407
454
|
*end = '\0'; /* terminate name */
|
408
|
-
|
409
|
-
|
410
|
-
if (MAX_ATTRS <= (ap - attrs)) {
|
411
|
-
raise_error("too many attributes", pi->str, pi->s);
|
412
|
-
}
|
455
|
+
attr_value = "";
|
456
|
+
attr_stack_push(&attrs, attr_name, attr_value);
|
413
457
|
break;
|
414
458
|
} else {
|
415
|
-
|
459
|
+
attr_stack_cleanup(&attrs);
|
460
|
+
set_error(&pi->err, "invalid format, no attribute value", pi->str, pi->s);
|
461
|
+
return 0;
|
416
462
|
}
|
417
463
|
}
|
418
464
|
*end = '\0'; /* terminate name */
|
419
465
|
/* read value */
|
420
466
|
next_non_white(pi);
|
421
|
-
|
422
|
-
|
423
|
-
if (0 != collapse_special(pi, (char*)ap->value)) {
|
424
|
-
raise_error("invalid format, special character does not end with a semicolon", pi->str, pi->s);
|
425
|
-
}
|
467
|
+
if (0 == (attr_value = read_quoted_value(pi))) {
|
468
|
+
return 0;
|
426
469
|
}
|
427
|
-
|
428
|
-
|
429
|
-
|
470
|
+
if (0 != strchr(attr_value, '&')) {
|
471
|
+
if (0 != collapse_special(pi, (char*)attr_value)) {
|
472
|
+
attr_stack_cleanup(&attrs);
|
473
|
+
set_error(&pi->err, "invalid format, special character does not end with a semicolon", pi->str, pi->s);
|
474
|
+
return 0;
|
475
|
+
}
|
430
476
|
}
|
477
|
+
attr_stack_push(&attrs, attr_name, attr_value);
|
431
478
|
break;
|
432
479
|
}
|
433
480
|
if ('\0' == pi->last) {
|
@@ -448,7 +495,9 @@ read_element(PInfo pi) {
|
|
448
495
|
next_non_white(pi);
|
449
496
|
c = *pi->s++;
|
450
497
|
if ('\0' == c) {
|
451
|
-
|
498
|
+
attr_stack_cleanup(&attrs);
|
499
|
+
set_error(&pi->err, "invalid format, document not terminated", pi->str, pi->s);
|
500
|
+
return 0;
|
452
501
|
}
|
453
502
|
if ('<' == c) {
|
454
503
|
char *slash;
|
@@ -465,7 +514,9 @@ read_element(PInfo pi) {
|
|
465
514
|
pi->s += 7;
|
466
515
|
read_cdata(pi);
|
467
516
|
} else {
|
468
|
-
|
517
|
+
attr_stack_cleanup(&attrs);
|
518
|
+
set_error(&pi->err, "invalid format, invalid comment or CDATA format", pi->str, pi->s);
|
519
|
+
return 0;
|
469
520
|
}
|
470
521
|
break;
|
471
522
|
case '?': /* processing instruction */
|
@@ -475,21 +526,28 @@ read_element(PInfo pi) {
|
|
475
526
|
case '/':
|
476
527
|
slash = pi->s;
|
477
528
|
pi->s++;
|
478
|
-
name = read_name_token(pi)
|
529
|
+
if (0 == (name = read_name_token(pi))) {
|
530
|
+
attr_stack_cleanup(&attrs);
|
531
|
+
return 0;
|
532
|
+
}
|
479
533
|
end = pi->s;
|
480
534
|
next_non_white(pi);
|
481
535
|
c = *pi->s;
|
482
536
|
*end = '\0';
|
483
537
|
if (0 != strcmp(name, ename)) {
|
538
|
+
attr_stack_cleanup(&attrs);
|
484
539
|
if (TolerantEffort == pi->options->effort) {
|
485
540
|
pi->pcb->end_element(pi, ename);
|
486
541
|
return name;
|
487
542
|
} else {
|
488
|
-
|
543
|
+
set_error(&pi->err, "invalid format, elements overlap", pi->str, pi->s);
|
544
|
+
return 0;
|
489
545
|
}
|
490
546
|
}
|
491
547
|
if ('>' != c) {
|
492
|
-
|
548
|
+
attr_stack_cleanup(&attrs);
|
549
|
+
set_error(&pi->err, "invalid format, element not closed", pi->str, pi->s);
|
550
|
+
return 0;
|
493
551
|
}
|
494
552
|
if (first && start != slash - 1) {
|
495
553
|
/* some white space between start and here so add as text */
|
@@ -498,18 +556,22 @@ read_element(PInfo pi) {
|
|
498
556
|
}
|
499
557
|
pi->s++;
|
500
558
|
pi->pcb->end_element(pi, ename);
|
559
|
+
attr_stack_cleanup(&attrs);
|
501
560
|
return 0;
|
502
561
|
case '\0':
|
562
|
+
attr_stack_cleanup(&attrs);
|
503
563
|
if (TolerantEffort == pi->options->effort) {
|
504
564
|
return 0;
|
505
565
|
} else {
|
506
|
-
|
566
|
+
set_error(&pi->err, "invalid format, document not terminated", pi->str, pi->s);
|
567
|
+
return 0;
|
507
568
|
}
|
508
569
|
default:
|
509
570
|
first = 0;
|
510
571
|
/* a child element */
|
511
572
|
// Child closed with mismatched name.
|
512
573
|
if (0 != (name = read_element(pi))) {
|
574
|
+
attr_stack_cleanup(&attrs);
|
513
575
|
if (0 == strcmp(name, ename)) {
|
514
576
|
pi->s++;
|
515
577
|
pi->pcb->end_element(pi, ename);
|
@@ -518,6 +580,8 @@ read_element(PInfo pi) {
|
|
518
580
|
pi->pcb->end_element(pi, ename);
|
519
581
|
return name;
|
520
582
|
}
|
583
|
+
} else if (err_has(&pi->err)) {
|
584
|
+
return 0;
|
521
585
|
}
|
522
586
|
break;
|
523
587
|
}
|
@@ -534,11 +598,13 @@ read_element(PInfo pi) {
|
|
534
598
|
/* close tag after text so treat as a value */
|
535
599
|
pi->s += elen + 3;
|
536
600
|
pi->pcb->end_element(pi, ename);
|
601
|
+
attr_stack_cleanup(&attrs);
|
537
602
|
return 0;
|
538
603
|
}
|
539
604
|
}
|
540
605
|
}
|
541
606
|
}
|
607
|
+
attr_stack_cleanup(&attrs);
|
542
608
|
return 0;
|
543
609
|
}
|
544
610
|
|
@@ -559,7 +625,8 @@ read_text(PInfo pi) {
|
|
559
625
|
pi->s--;
|
560
626
|
break;
|
561
627
|
case '\0':
|
562
|
-
|
628
|
+
set_error(&pi->err, "invalid format, document not terminated", pi->str, pi->s);
|
629
|
+
return;
|
563
630
|
default:
|
564
631
|
if (end <= (b + (('&' == c) ? 7 : 0))) { /* extra 8 for special just in case it is sequence of bytes */
|
565
632
|
unsigned long size;
|
@@ -621,7 +688,8 @@ read_reduced_text(PInfo pi) {
|
|
621
688
|
pi->s--;
|
622
689
|
break;
|
623
690
|
case '\0':
|
624
|
-
|
691
|
+
set_error(&pi->err, "invalid format, document not terminated", pi->str, pi->s);
|
692
|
+
return;
|
625
693
|
default:
|
626
694
|
if (end <= (b + spc + (('&' == c) ? 7 : 0))) { /* extra 8 for special just in case it is sequence of bytes */
|
627
695
|
unsigned long size;
|
@@ -682,7 +750,8 @@ read_name_token(PInfo pi) {
|
|
682
750
|
return start;
|
683
751
|
case '\0':
|
684
752
|
/* documents never terminate after a name token */
|
685
|
-
|
753
|
+
set_error(&pi->err, "invalid format, document not terminated", pi->str, pi->s);
|
754
|
+
return 0;
|
686
755
|
break; /* to avoid warnings */
|
687
756
|
default:
|
688
757
|
break;
|
@@ -699,7 +768,8 @@ read_cdata(PInfo pi) {
|
|
699
768
|
start = pi->s;
|
700
769
|
end = strstr(pi->s, "]]>");
|
701
770
|
if (end == 0) {
|
702
|
-
|
771
|
+
set_error(&pi->err, "invalid format, CDATA not terminated", pi->str, pi->s);
|
772
|
+
return;
|
703
773
|
}
|
704
774
|
*end = '\0';
|
705
775
|
pi->s = end + 3;
|
@@ -740,19 +810,22 @@ read_quoted_value(PInfo pi) {
|
|
740
810
|
value = pi->s;
|
741
811
|
for (; *pi->s != term; pi->s++) {
|
742
812
|
if ('\0' == *pi->s) {
|
743
|
-
|
813
|
+
set_error(&pi->err, "invalid format, document not terminated", pi->str, pi->s);
|
814
|
+
return 0;
|
744
815
|
}
|
745
816
|
}
|
746
817
|
*pi->s = '\0'; /* terminate value */
|
747
818
|
pi->s++; /* move past quote */
|
748
819
|
} else if (StrictEffort == pi->options->effort) {
|
749
|
-
|
820
|
+
set_error(&pi->err, "invalid format, expected a quote character", pi->str, pi->s);
|
821
|
+
return 0;
|
750
822
|
} else if (TolerantEffort == pi->options->effort) {
|
751
823
|
value = pi->s;
|
752
824
|
for (; 1; pi->s++) {
|
753
825
|
switch (*pi->s) {
|
754
826
|
case '\0':
|
755
|
-
|
827
|
+
set_error(&pi->err, "invalid format, document not terminated", pi->str, pi->s);
|
828
|
+
return 0;
|
756
829
|
case ' ':
|
757
830
|
case '/':
|
758
831
|
case '>':
|
@@ -772,7 +845,8 @@ read_quoted_value(PInfo pi) {
|
|
772
845
|
value = pi->s;
|
773
846
|
next_white(pi);
|
774
847
|
if ('\0' == *pi->s) {
|
775
|
-
|
848
|
+
set_error(&pi->err, "invalid format, document not terminated", pi->str, pi->s);
|
849
|
+
return 0;
|
776
850
|
}
|
777
851
|
*pi->s++ = '\0'; /* terminate value */
|
778
852
|
}
|
@@ -912,8 +986,9 @@ read_coded_chars(PInfo pi, char *text) {
|
|
912
986
|
*text++ = '&';
|
913
987
|
return text;
|
914
988
|
} else {
|
915
|
-
/*
|
916
|
-
|
989
|
+
/*set_error(&pi->err, "Invalid encoding, need UTF-8 or UTF-16 encoding to parse &#nnnn; character sequences.", pi->str, pi->s); */
|
990
|
+
set_error(&pi->err, "Invalid encoding, need UTF-8 encoding to parse &#nnnn; character sequences.", pi->str, pi->s);
|
991
|
+
return 0;
|
917
992
|
}
|
918
993
|
pi->s = s;
|
919
994
|
}
|
@@ -994,8 +1069,9 @@ collapse_special(PInfo pi, char *str) {
|
|
994
1069
|
pi->options->rb_enc = ox_utf8_encoding;
|
995
1070
|
b = ucs_to_utf8_chars(b, u);
|
996
1071
|
} else {
|
997
|
-
/*
|
998
|
-
|
1072
|
+
/* set_error(&pi->err, "Invalid encoding, need UTF-8 or UTF-16 encoding to parse &#nnnn; character sequences.", pi->str, pi->s);*/
|
1073
|
+
set_error(&pi->err, "Invalid encoding, need UTF-8 encoding to parse &#nnnn; character sequences.", pi->str, pi->s);
|
1074
|
+
return 0;
|
999
1075
|
}
|
1000
1076
|
s = end + 1;
|
1001
1077
|
} else {
|