ox 2.0.0 → 2.0.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of ox might be problematic. Click here for more details.

data/ext/ox/ox.c CHANGED
@@ -38,7 +38,7 @@
38
38
  #include "sax.h"
39
39
 
40
40
  /* maximum to allocate on the stack, arbitrary limit */
41
- #define SMALL_XML 65536
41
+ #define SMALL_XML 4096
42
42
 
43
43
  typedef struct _YesNoOpt {
44
44
  VALUE sym;
@@ -57,6 +57,7 @@ ID ox_at_value_id;
57
57
  ID ox_attr_id;
58
58
  ID ox_attr_value_id;
59
59
  ID ox_attributes_id;
60
+ ID ox_attrs_done_id;
60
61
  ID ox_beg_id;
61
62
  ID ox_cdata_id;
62
63
  ID ox_comment_id;
@@ -384,21 +385,26 @@ to_obj(VALUE self, VALUE ruby_xml) {
384
385
  size_t len;
385
386
  VALUE obj;
386
387
  struct _Options options = ox_default_options;
388
+ struct _Err err;
387
389
 
390
+ err_init(&err);
388
391
  Check_Type(ruby_xml, T_STRING);
389
392
  /* the xml string gets modified so make a copy of it */
390
393
  len = RSTRING_LEN(ruby_xml) + 1;
394
+ x = defuse_bom(StringValuePtr(ruby_xml), &options);
391
395
  if (SMALL_XML < len) {
392
396
  xml = ALLOC_N(char, len);
393
397
  } else {
394
398
  xml = ALLOCA_N(char, len);
395
399
  }
396
- memcpy(xml, StringValuePtr(ruby_xml), len);
397
- x = defuse_bom(xml, &options);
398
- obj = ox_parse(x, ox_obj_callbacks, 0, &options);
400
+ memcpy(xml, x, len);
401
+ obj = ox_parse(xml, ox_obj_callbacks, 0, &options, &err);
399
402
  if (SMALL_XML < len) {
400
403
  xfree(xml);
401
404
  }
405
+ if (err_has(&err)) {
406
+ ox_err_raise(&err);
407
+ }
402
408
  return obj;
403
409
  }
404
410
 
@@ -415,26 +421,31 @@ to_gen(VALUE self, VALUE ruby_xml) {
415
421
  size_t len;
416
422
  VALUE obj;
417
423
  struct _Options options = ox_default_options;
424
+ struct _Err err;
418
425
 
426
+ err_init(&err);
419
427
  Check_Type(ruby_xml, T_STRING);
420
428
  /* the xml string gets modified so make a copy of it */
421
429
  len = RSTRING_LEN(ruby_xml) + 1;
430
+ x = defuse_bom(StringValuePtr(ruby_xml), &options);
422
431
  if (SMALL_XML < len) {
423
432
  xml = ALLOC_N(char, len);
424
433
  } else {
425
434
  xml = ALLOCA_N(char, len);
426
435
  }
427
- memcpy(xml, StringValuePtr(ruby_xml), len);
428
- x = defuse_bom(xml, &options);
429
- obj = ox_parse(x, ox_gen_callbacks, 0, &options);
436
+ memcpy(xml, x, len);
437
+ obj = ox_parse(xml, ox_gen_callbacks, 0, &options, &err);
430
438
  if (SMALL_XML < len) {
431
439
  xfree(xml);
432
440
  }
441
+ if (err_has(&err)) {
442
+ ox_err_raise(&err);
443
+ }
433
444
  return obj;
434
445
  }
435
446
 
436
447
  static VALUE
437
- load(char *xml, int argc, VALUE *argv, VALUE self, VALUE encoding) {
448
+ load(char *xml, int argc, VALUE *argv, VALUE self, VALUE encoding, Err err) {
438
449
  VALUE obj;
439
450
  struct _Options options = ox_default_options;
440
451
 
@@ -499,19 +510,19 @@ load(char *xml, int argc, VALUE *argv, VALUE self, VALUE encoding) {
499
510
  xml = defuse_bom(xml, &options);
500
511
  switch (options.mode) {
501
512
  case ObjMode:
502
- obj = ox_parse(xml, ox_obj_callbacks, 0, &options);
513
+ obj = ox_parse(xml, ox_obj_callbacks, 0, &options, err);
503
514
  break;
504
515
  case GenMode:
505
- obj = ox_parse(xml, ox_gen_callbacks, 0, &options);
516
+ obj = ox_parse(xml, ox_gen_callbacks, 0, &options, err);
506
517
  break;
507
518
  case LimMode:
508
- obj = ox_parse(xml, ox_limited_callbacks, 0, &options);
519
+ obj = ox_parse(xml, ox_limited_callbacks, 0, &options, err);
509
520
  break;
510
521
  case NoMode:
511
- obj = ox_parse(xml, ox_nomode_callbacks, 0, &options);
522
+ obj = ox_parse(xml, ox_nomode_callbacks, 0, &options, err);
512
523
  break;
513
524
  default:
514
- obj = ox_parse(xml, ox_gen_callbacks, 0, &options);
525
+ obj = ox_parse(xml, ox_gen_callbacks, 0, &options, err);
515
526
  break;
516
527
  }
517
528
  return obj;
@@ -541,7 +552,9 @@ load_str(int argc, VALUE *argv, VALUE self) {
541
552
  size_t len;
542
553
  VALUE obj;
543
554
  VALUE encoding;
544
-
555
+ struct _Err err;
556
+
557
+ err_init(&err);
545
558
  Check_Type(*argv, T_STRING);
546
559
  /* the xml string gets modified so make a copy of it */
547
560
  len = RSTRING_LEN(*argv) + 1;
@@ -562,10 +575,13 @@ load_str(int argc, VALUE *argv, VALUE self) {
562
575
  encoding = Qnil;
563
576
  #endif
564
577
  memcpy(xml, StringValuePtr(*argv), len);
565
- obj = load(xml, argc - 1, argv + 1, self, encoding);
578
+ obj = load(xml, argc - 1, argv + 1, self, encoding, &err);
566
579
  if (SMALL_XML < len) {
567
580
  xfree(xml);
568
581
  }
582
+ if (err_has(&err)) {
583
+ ox_err_raise(&err);
584
+ }
569
585
  return obj;
570
586
  }
571
587
 
@@ -594,7 +610,9 @@ load_file(int argc, VALUE *argv, VALUE self) {
594
610
  FILE *f;
595
611
  size_t len;
596
612
  VALUE obj;
597
-
613
+ struct _Err err;
614
+
615
+ err_init(&err);
598
616
  Check_Type(*argv, T_STRING);
599
617
  path = StringValuePtr(*argv);
600
618
  if (0 == (f = fopen(path, "r"))) {
@@ -609,15 +627,19 @@ load_file(int argc, VALUE *argv, VALUE self) {
609
627
  }
610
628
  fseek(f, 0, SEEK_SET);
611
629
  if (len != fread(xml, 1, len, f)) {
612
- fclose(f);
613
- rb_raise(rb_eLoadError, "Failed to read %ld bytes from %s.\n", (long)len, path);
630
+ ox_err_set(&err, rb_eLoadError, "Failed to read %ld bytes from %s.\n", (long)len, path);
631
+ obj = Qnil;
632
+ } else {
633
+ xml[len] = '\0';
634
+ obj = load(xml, argc - 1, argv + 1, self, Qnil, &err);
614
635
  }
615
636
  fclose(f);
616
- xml[len] = '\0';
617
- obj = load(xml, argc - 1, argv + 1, self, Qnil);
618
637
  if (SMALL_XML < len) {
619
638
  xfree(xml);
620
639
  }
640
+ if (err_has(&err)) {
641
+ ox_err_raise(&err);
642
+ }
621
643
  return obj;
622
644
  }
623
645
 
@@ -832,6 +854,7 @@ void Init_ox() {
832
854
  ox_attr_id = rb_intern("attr");
833
855
  ox_attr_value_id = rb_intern("attr_value");
834
856
  ox_attributes_id = rb_intern("@attributes");
857
+ ox_attrs_done_id = rb_intern("attrs_done");
835
858
  ox_beg_id = rb_intern("@beg");
836
859
  ox_cdata_id = rb_intern("cdata");
837
860
  ox_comment_id = rb_intern("comment");
data/ext/ox/ox.h CHANGED
@@ -59,11 +59,14 @@ enum st_retval {ST_CONTINUE = 0, ST_STOP = 1, ST_DELETE = 2, ST_CHECK};
59
59
 
60
60
  #include "cache.h"
61
61
 
62
+ #include "err.h"
63
+ #include "type.h"
64
+ #include "attr.h"
65
+ #include "helper.h"
66
+
62
67
  #define raise_error(msg, xml, current) _ox_raise_error(msg, xml, current, __FILE__, __LINE__)
63
68
 
64
69
  #define MAX_TEXT_LEN 4096
65
- #define MAX_ATTRS 1024
66
- #define MAX_DEPTH 1024
67
70
 
68
71
  #define SILENT 0
69
72
  #define TRACE 1
@@ -115,46 +118,6 @@ typedef enum {
115
118
  NoMode = 0
116
119
  } LoadMode;
117
120
 
118
- typedef enum {
119
- NoCode = 0,
120
- ArrayCode = 'a',
121
- String64Code = 'b', /* base64 encoded String */
122
- ClassCode = 'c',
123
- Symbol64Code = 'd', /* base64 encoded Symbol */
124
- DateCode = 'D',
125
- ExceptionCode = 'e',
126
- FloatCode = 'f',
127
- RegexpCode = 'g',
128
- HashCode = 'h',
129
- FixnumCode = 'i',
130
- BignumCode = 'j',
131
- KeyCode = 'k', /* indicates the value is a hash key, kind of a hack */
132
- RationalCode = 'l',
133
- SymbolCode = 'm',
134
- FalseClassCode = 'n',
135
- ObjectCode = 'o',
136
- RefCode = 'p',
137
- RangeCode = 'r',
138
- StringCode = 's',
139
- TimeCode = 't',
140
- StructCode = 'u',
141
- ComplexCode = 'v',
142
- RawCode = 'x',
143
- TrueClassCode = 'y',
144
- NilClassCode = 'z',
145
- } Type;
146
-
147
- typedef struct _Attr {
148
- const char *name;
149
- const char *value;
150
- } *Attr;
151
-
152
- typedef struct _Helper {
153
- ID var; /* Object var ID */
154
- VALUE obj; /* object created or Qundef if not appropriate */
155
- Type type; /* type of object in obj */
156
- } *Helper;
157
-
158
121
  typedef struct _PInfo *PInfo;
159
122
 
160
123
  typedef struct _ParseCallbacks {
@@ -197,8 +160,8 @@ typedef struct _Options {
197
160
 
198
161
  /* parse information structure */
199
162
  struct _PInfo {
200
- struct _Helper helpers[MAX_DEPTH];
201
- Helper h; /* current helper or 0 if not set */
163
+ struct _HelperStack helpers;
164
+ struct _Err err;
202
165
  char *str; /* buffer being read from */
203
166
  char *s; /* current position in buffer */
204
167
  VALUE obj;
@@ -209,7 +172,7 @@ struct _PInfo {
209
172
  char last; /* last character read, rarely set */
210
173
  };
211
174
 
212
- extern VALUE ox_parse(char *xml, ParseCallbacks pcb, char **endp, Options options);
175
+ extern VALUE ox_parse(char *xml, ParseCallbacks pcb, char **endp, Options options, Err err);
213
176
  extern void _ox_raise_error(const char *msg, const char *xml, const char *current, const char* file, int line);
214
177
 
215
178
  extern void ox_sax_define(void);
@@ -228,6 +191,7 @@ extern ID ox_at_line_id;
228
191
  extern ID ox_at_value_id;
229
192
  extern ID ox_attr_id;
230
193
  extern ID ox_attr_value_id;
194
+ extern ID ox_attrs_done_id;
231
195
  extern ID ox_attributes_id;
232
196
  extern ID ox_beg_id;
233
197
  extern ID ox_cdata_id;
@@ -273,11 +237,9 @@ extern VALUE ox_utf8_encoding;
273
237
  extern void *ox_utf8_encoding;
274
238
  #endif
275
239
 
276
- extern VALUE ox_arg_error_class;
277
240
  extern VALUE ox_date_class;
278
241
  extern VALUE ox_empty_string;
279
242
  extern VALUE ox_encoding_sym;
280
- extern VALUE ox_parse_error_class;
281
243
  extern VALUE ox_stringio_class;
282
244
  extern VALUE ox_struct_class;
283
245
  extern VALUE ox_time_class;
data/ext/ox/parse.c CHANGED
@@ -35,6 +35,9 @@
35
35
 
36
36
  #include "ruby.h"
37
37
  #include "ox.h"
38
+ #include "err.h"
39
+ #include "attr.h"
40
+ #include "helper.h"
38
41
 
39
42
  static void read_instruction(PInfo pi);
40
43
  static void read_doctype(PInfo pi);
@@ -99,20 +102,22 @@ next_white(PInfo pi) {
99
102
  }
100
103
 
101
104
  VALUE
102
- ox_parse(char *xml, ParseCallbacks pcb, char **endp, Options options) {
105
+ ox_parse(char *xml, ParseCallbacks pcb, char **endp, Options options, Err err) {
103
106
  struct _PInfo pi;
104
107
  int body_read = 0;
105
108
 
106
109
  if (0 == xml) {
107
- raise_error("Invalid arg, xml string can not be null", xml, 0);
110
+ set_error(err, "Invalid arg, xml string can not be null", xml, 0);
111
+ return Qnil;
108
112
  }
109
113
  if (DEBUG <= options->trace) {
110
114
  printf("Parsing xml:\n%s\n", xml);
111
115
  }
112
116
  /* initialize parse info */
117
+ helper_stack_init(&pi.helpers);
118
+ err_init(&pi.err);
113
119
  pi.str = xml;
114
120
  pi.s = xml;
115
- pi.h = 0;
116
121
  pi.pcb = pcb;
117
122
  pi.obj = Qnil;
118
123
  pi.circ_array = 0;
@@ -127,7 +132,9 @@ ox_parse(char *xml, ParseCallbacks pcb, char **endp, Options options) {
127
132
  break;
128
133
  }
129
134
  if ('<' != *pi.s) { /* all top level entities start with < */
130
- raise_error("invalid format, expected <", pi.str, pi.s);
135
+ set_error(err, "invalid format, expected <", pi.str, pi.s);
136
+ helper_stack_cleanup(&pi.helpers);
137
+ return Qnil;
131
138
  }
132
139
  pi.s++; /* past < */
133
140
  switch (*pi.s) {
@@ -138,11 +145,15 @@ ox_parse(char *xml, ParseCallbacks pcb, char **endp, Options options) {
138
145
  case '!': /* comment or doctype */
139
146
  pi.s++;
140
147
  if ('\0' == *pi.s) {
141
- raise_error("invalid format, DOCTYPE or comment not terminated", pi.str, pi.s);
148
+ set_error(err, "invalid format, DOCTYPE or comment not terminated", pi.str, pi.s);
149
+ helper_stack_cleanup(&pi.helpers);
150
+ return Qnil;
142
151
  } else if ('-' == *pi.s) {
143
152
  pi.s++; /* skip - */
144
153
  if ('-' != *pi.s) {
145
- raise_error("invalid format, bad comment format", pi.str, pi.s);
154
+ set_error(err, "invalid format, bad comment format", pi.str, pi.s);
155
+ helper_stack_cleanup(&pi.helpers);
156
+ return Qnil;
146
157
  } else {
147
158
  pi.s++; /* skip second - */
148
159
  read_comment(&pi);
@@ -151,17 +162,27 @@ ox_parse(char *xml, ParseCallbacks pcb, char **endp, Options options) {
151
162
  pi.s += 7;
152
163
  read_doctype(&pi);
153
164
  } else {
154
- raise_error("invalid format, DOCTYPE or comment expected", pi.str, pi.s);
165
+ set_error(err, "invalid format, DOCTYPE or comment expected", pi.str, pi.s);
166
+ helper_stack_cleanup(&pi.helpers);
167
+ return Qnil;
155
168
  }
156
169
  break;
157
170
  case '\0':
158
- raise_error("invalid format, document not terminated", pi.str, pi.s);
171
+ set_error(err, "invalid format, document not terminated", pi.str, pi.s);
172
+ helper_stack_cleanup(&pi.helpers);
173
+ return Qnil;
159
174
  default:
160
175
  read_element(&pi);
161
176
  body_read = 1;
162
177
  break;
163
178
  }
179
+ if (err_has(&pi.err)) {
180
+ *err = pi.err;
181
+ helper_stack_cleanup(&pi.helpers);
182
+ return Qnil;
183
+ }
164
184
  }
185
+ helper_stack_cleanup(&pi.helpers);
165
186
  return pi.obj;
166
187
  }
167
188
 
@@ -191,20 +212,25 @@ gather_content(const char *src, char *content, size_t len) {
191
212
  static void
192
213
  read_instruction(PInfo pi) {
193
214
  char content[1024];
194
- struct _Attr attrs[MAX_ATTRS + 1];
195
- Attr a = attrs;
215
+ struct _AttrStack attrs;
216
+ char *attr_name;
217
+ char *attr_value;
196
218
  char *target;
197
219
  char *end;
198
220
  char c;
199
221
  char *cend;
200
222
  int attrs_ok = 1;
201
223
 
224
+
202
225
  *content = '\0';
203
- memset(attrs, 0, sizeof(attrs));
204
- target = read_name_token(pi);
226
+ attr_stack_init(&attrs);
227
+ if (0 == (target = read_name_token(pi))) {
228
+ return;
229
+ }
205
230
  end = pi->s;
206
231
  if (0 == (cend = gather_content(pi->s, content, sizeof(content) - 1))) {
207
- raise_error("processing instruction content too large or not terminated", pi->str, pi->s);
232
+ set_error(&pi->err, "processing instruction content too large or not terminated", pi->str, pi->s);
233
+ return;
208
234
  }
209
235
  next_non_white(pi);
210
236
  c = *pi->s;
@@ -213,10 +239,15 @@ read_instruction(PInfo pi) {
213
239
  while ('?' != c) {
214
240
  pi->last = 0;
215
241
  if ('\0' == *pi->s) {
216
- raise_error("invalid format, processing instruction not terminated", pi->str, pi->s);
242
+ attr_stack_cleanup(&attrs);
243
+ set_error(&pi->err, "invalid format, processing instruction not terminated", pi->str, pi->s);
244
+ return;
217
245
  }
218
246
  next_non_white(pi);
219
- a->name = read_name_token(pi);
247
+ if (0 == (attr_name = read_name_token(pi))) {
248
+ attr_stack_cleanup(&attrs);
249
+ return;
250
+ }
220
251
  end = pi->s;
221
252
  next_non_white(pi);
222
253
  if ('=' != *pi->s++) {
@@ -226,12 +257,11 @@ read_instruction(PInfo pi) {
226
257
  *end = '\0'; /* terminate name */
227
258
  /* read value */
228
259
  next_non_white(pi);
229
- a->value = read_quoted_value(pi);
230
- a++;
231
- if (MAX_ATTRS <= (a - attrs)) {
232
- attrs_ok = 0;
233
- break;
260
+ if (0 == (attr_value = read_quoted_value(pi))) {
261
+ attr_stack_cleanup(&attrs);
262
+ return;
234
263
  }
264
+ attr_stack_push(&attrs, attr_name, attr_value);
235
265
  next_non_white(pi);
236
266
  if ('\0' == pi->last) {
237
267
  c = *pi->s;
@@ -247,18 +277,21 @@ read_instruction(PInfo pi) {
247
277
  }
248
278
  if (attrs_ok) {
249
279
  if ('>' != *pi->s++) {
250
- raise_error("invalid format, processing instruction not terminated", pi->str, pi->s);
280
+ attr_stack_cleanup(&attrs);
281
+ set_error(&pi->err, "invalid format, processing instruction not terminated", pi->str, pi->s);
282
+ return;
251
283
  }
252
284
  } else {
253
285
  pi->s = cend + 1;
254
286
  }
255
287
  if (0 != pi->pcb->instruct) {
256
288
  if (attrs_ok) {
257
- pi->pcb->instruct(pi, target, attrs, 0);
289
+ pi->pcb->instruct(pi, target, attrs.head, 0);
258
290
  } else {
259
- pi->pcb->instruct(pi, target, attrs, content);
291
+ pi->pcb->instruct(pi, target, attrs.head, content);
260
292
  }
261
293
  }
294
+ attr_stack_cleanup(&attrs);
262
295
  }
263
296
 
264
297
  /* Entered after the "<!DOCTYPE" sequence plus the first character after
@@ -275,7 +308,8 @@ read_doctype(PInfo pi) {
275
308
  while (1) {
276
309
  c = *pi->s++;
277
310
  if ('\0' == c) {
278
- raise_error("invalid format, prolog not terminated", pi->str, pi->s);
311
+ set_error(&pi->err, "invalid format, prolog not terminated", pi->str, pi->s);
312
+ return;
279
313
  } else if ('<' == c) {
280
314
  depth++;
281
315
  } else if ('>' == c) {
@@ -306,7 +340,8 @@ read_comment(PInfo pi) {
306
340
  comment = pi->s;
307
341
  end = strstr(pi->s, "-->");
308
342
  if (0 == end) {
309
- raise_error("invalid format, comment not terminated", pi->str, pi->s);
343
+ set_error(&pi->err, "invalid format, comment not terminated", pi->str, pi->s);
344
+ return;
310
345
  }
311
346
  for (s = end - 1; pi->s < s && !done; s--) {
312
347
  switch(*s) {
@@ -334,8 +369,9 @@ read_comment(PInfo pi) {
334
369
  */
335
370
  static char*
336
371
  read_element(PInfo pi) {
337
- struct _Attr attrs[MAX_ATTRS];
338
- Attr ap = attrs;
372
+ struct _AttrStack attrs;
373
+ const char *attr_name;
374
+ const char *attr_value;
339
375
  char *name;
340
376
  char *ename;
341
377
  char *end;
@@ -344,7 +380,10 @@ read_element(PInfo pi) {
344
380
  int hasChildren = 0;
345
381
  int done = 0;
346
382
 
347
- ename = read_name_token(pi);
383
+ attr_stack_init(&attrs);
384
+ if (0 == (ename = read_name_token(pi))) {
385
+ return 0;
386
+ }
348
387
  end = pi->s;
349
388
  elen = end - ename;
350
389
  next_non_white(pi);
@@ -355,13 +394,15 @@ read_element(PInfo pi) {
355
394
  pi->s++;
356
395
  if ('>' != *pi->s) {
357
396
  /*printf("*** '%s' ***\n", pi->s); */
358
- raise_error("invalid format, element not closed", pi->str, pi->s);
397
+ attr_stack_cleanup(&attrs);
398
+ set_error(&pi->err, "invalid format, element not closed", pi->str, pi->s);
399
+ return 0;
359
400
  }
360
401
  pi->s++; /* past > */
361
- ap->name = 0;
362
- pi->pcb->add_element(pi, ename, attrs, hasChildren);
402
+ pi->pcb->add_element(pi, ename, attrs.head, hasChildren);
363
403
  pi->pcb->end_element(pi, ename);
364
404
 
405
+ attr_stack_cleanup(&attrs);
365
406
  return 0;
366
407
  }
367
408
  /* read attribute names until the close (/ or >) is reached */
@@ -373,31 +414,37 @@ read_element(PInfo pi) {
373
414
  pi->last = 0;
374
415
  switch (c) {
375
416
  case '\0':
376
- raise_error("invalid format, document not terminated", pi->str, pi->s);
417
+ attr_stack_cleanup(&attrs);
418
+ set_error(&pi->err, "invalid format, document not terminated", pi->str, pi->s);
419
+ return 0;
377
420
  case '/':
378
421
  /* Element with just attributes. */
379
422
  pi->s++;
380
423
  if ('>' != *pi->s) {
381
- raise_error("invalid format, element not closed", pi->str, pi->s);
424
+ attr_stack_cleanup(&attrs);
425
+ set_error(&pi->err, "invalid format, element not closed", pi->str, pi->s);
426
+ return 0;
382
427
  }
383
428
  pi->s++;
384
- ap->name = 0;
385
- pi->pcb->add_element(pi, ename, attrs, hasChildren);
429
+ pi->pcb->add_element(pi, ename, attrs.head, hasChildren);
386
430
  pi->pcb->end_element(pi, ename);
387
431
 
432
+ attr_stack_cleanup(&attrs);
388
433
  return 0;
389
434
  case '>':
390
435
  /* has either children or a value */
391
436
  pi->s++;
392
437
  hasChildren = 1;
393
438
  done = 1;
394
- ap->name = 0;
395
- pi->pcb->add_element(pi, ename, attrs, hasChildren);
439
+ pi->pcb->add_element(pi, ename, attrs.head, hasChildren);
396
440
  break;
397
441
  default:
398
442
  /* Attribute name so it's an element and the attribute will be */
399
443
  /* added to it. */
400
- ap->name = read_name_token(pi);
444
+ if (0 == (attr_name = read_name_token(pi))) {
445
+ attr_stack_cleanup(&attrs);
446
+ return 0;
447
+ }
401
448
  end = pi->s;
402
449
  next_non_white(pi);
403
450
  if ('=' != *pi->s++) {
@@ -405,29 +452,29 @@ read_element(PInfo pi) {
405
452
  pi->s--;
406
453
  pi->last = *pi->s;
407
454
  *end = '\0'; /* terminate name */
408
- ap->value = "";
409
- ap++;
410
- if (MAX_ATTRS <= (ap - attrs)) {
411
- raise_error("too many attributes", pi->str, pi->s);
412
- }
455
+ attr_value = "";
456
+ attr_stack_push(&attrs, attr_name, attr_value);
413
457
  break;
414
458
  } else {
415
- raise_error("invalid format, no attribute value", pi->str, pi->s);
459
+ attr_stack_cleanup(&attrs);
460
+ set_error(&pi->err, "invalid format, no attribute value", pi->str, pi->s);
461
+ return 0;
416
462
  }
417
463
  }
418
464
  *end = '\0'; /* terminate name */
419
465
  /* read value */
420
466
  next_non_white(pi);
421
- ap->value = read_quoted_value(pi);
422
- if (0 != strchr(ap->value, '&')) {
423
- if (0 != collapse_special(pi, (char*)ap->value)) {
424
- raise_error("invalid format, special character does not end with a semicolon", pi->str, pi->s);
425
- }
467
+ if (0 == (attr_value = read_quoted_value(pi))) {
468
+ return 0;
426
469
  }
427
- ap++;
428
- if (MAX_ATTRS <= (ap - attrs)) {
429
- raise_error("too many attributes", pi->str, pi->s);
470
+ if (0 != strchr(attr_value, '&')) {
471
+ if (0 != collapse_special(pi, (char*)attr_value)) {
472
+ attr_stack_cleanup(&attrs);
473
+ set_error(&pi->err, "invalid format, special character does not end with a semicolon", pi->str, pi->s);
474
+ return 0;
475
+ }
430
476
  }
477
+ attr_stack_push(&attrs, attr_name, attr_value);
431
478
  break;
432
479
  }
433
480
  if ('\0' == pi->last) {
@@ -448,7 +495,9 @@ read_element(PInfo pi) {
448
495
  next_non_white(pi);
449
496
  c = *pi->s++;
450
497
  if ('\0' == c) {
451
- raise_error("invalid format, document not terminated", pi->str, pi->s);
498
+ attr_stack_cleanup(&attrs);
499
+ set_error(&pi->err, "invalid format, document not terminated", pi->str, pi->s);
500
+ return 0;
452
501
  }
453
502
  if ('<' == c) {
454
503
  char *slash;
@@ -465,7 +514,9 @@ read_element(PInfo pi) {
465
514
  pi->s += 7;
466
515
  read_cdata(pi);
467
516
  } else {
468
- raise_error("invalid format, invalid comment or CDATA format", pi->str, pi->s);
517
+ attr_stack_cleanup(&attrs);
518
+ set_error(&pi->err, "invalid format, invalid comment or CDATA format", pi->str, pi->s);
519
+ return 0;
469
520
  }
470
521
  break;
471
522
  case '?': /* processing instruction */
@@ -475,21 +526,28 @@ read_element(PInfo pi) {
475
526
  case '/':
476
527
  slash = pi->s;
477
528
  pi->s++;
478
- name = read_name_token(pi);
529
+ if (0 == (name = read_name_token(pi))) {
530
+ attr_stack_cleanup(&attrs);
531
+ return 0;
532
+ }
479
533
  end = pi->s;
480
534
  next_non_white(pi);
481
535
  c = *pi->s;
482
536
  *end = '\0';
483
537
  if (0 != strcmp(name, ename)) {
538
+ attr_stack_cleanup(&attrs);
484
539
  if (TolerantEffort == pi->options->effort) {
485
540
  pi->pcb->end_element(pi, ename);
486
541
  return name;
487
542
  } else {
488
- raise_error("invalid format, elements overlap", pi->str, pi->s);
543
+ set_error(&pi->err, "invalid format, elements overlap", pi->str, pi->s);
544
+ return 0;
489
545
  }
490
546
  }
491
547
  if ('>' != c) {
492
- raise_error("invalid format, element not closed", pi->str, pi->s);
548
+ attr_stack_cleanup(&attrs);
549
+ set_error(&pi->err, "invalid format, element not closed", pi->str, pi->s);
550
+ return 0;
493
551
  }
494
552
  if (first && start != slash - 1) {
495
553
  /* some white space between start and here so add as text */
@@ -498,18 +556,22 @@ read_element(PInfo pi) {
498
556
  }
499
557
  pi->s++;
500
558
  pi->pcb->end_element(pi, ename);
559
+ attr_stack_cleanup(&attrs);
501
560
  return 0;
502
561
  case '\0':
562
+ attr_stack_cleanup(&attrs);
503
563
  if (TolerantEffort == pi->options->effort) {
504
564
  return 0;
505
565
  } else {
506
- raise_error("invalid format, document not terminated", pi->str, pi->s);
566
+ set_error(&pi->err, "invalid format, document not terminated", pi->str, pi->s);
567
+ return 0;
507
568
  }
508
569
  default:
509
570
  first = 0;
510
571
  /* a child element */
511
572
  // Child closed with mismatched name.
512
573
  if (0 != (name = read_element(pi))) {
574
+ attr_stack_cleanup(&attrs);
513
575
  if (0 == strcmp(name, ename)) {
514
576
  pi->s++;
515
577
  pi->pcb->end_element(pi, ename);
@@ -518,6 +580,8 @@ read_element(PInfo pi) {
518
580
  pi->pcb->end_element(pi, ename);
519
581
  return name;
520
582
  }
583
+ } else if (err_has(&pi->err)) {
584
+ return 0;
521
585
  }
522
586
  break;
523
587
  }
@@ -534,11 +598,13 @@ read_element(PInfo pi) {
534
598
  /* close tag after text so treat as a value */
535
599
  pi->s += elen + 3;
536
600
  pi->pcb->end_element(pi, ename);
601
+ attr_stack_cleanup(&attrs);
537
602
  return 0;
538
603
  }
539
604
  }
540
605
  }
541
606
  }
607
+ attr_stack_cleanup(&attrs);
542
608
  return 0;
543
609
  }
544
610
 
@@ -559,7 +625,8 @@ read_text(PInfo pi) {
559
625
  pi->s--;
560
626
  break;
561
627
  case '\0':
562
- raise_error("invalid format, document not terminated", pi->str, pi->s);
628
+ set_error(&pi->err, "invalid format, document not terminated", pi->str, pi->s);
629
+ return;
563
630
  default:
564
631
  if (end <= (b + (('&' == c) ? 7 : 0))) { /* extra 8 for special just in case it is sequence of bytes */
565
632
  unsigned long size;
@@ -621,7 +688,8 @@ read_reduced_text(PInfo pi) {
621
688
  pi->s--;
622
689
  break;
623
690
  case '\0':
624
- raise_error("invalid format, document not terminated", pi->str, pi->s);
691
+ set_error(&pi->err, "invalid format, document not terminated", pi->str, pi->s);
692
+ return;
625
693
  default:
626
694
  if (end <= (b + spc + (('&' == c) ? 7 : 0))) { /* extra 8 for special just in case it is sequence of bytes */
627
695
  unsigned long size;
@@ -682,7 +750,8 @@ read_name_token(PInfo pi) {
682
750
  return start;
683
751
  case '\0':
684
752
  /* documents never terminate after a name token */
685
- raise_error("invalid format, document not terminated", pi->str, pi->s);
753
+ set_error(&pi->err, "invalid format, document not terminated", pi->str, pi->s);
754
+ return 0;
686
755
  break; /* to avoid warnings */
687
756
  default:
688
757
  break;
@@ -699,7 +768,8 @@ read_cdata(PInfo pi) {
699
768
  start = pi->s;
700
769
  end = strstr(pi->s, "]]>");
701
770
  if (end == 0) {
702
- raise_error("invalid format, CDATA not terminated", pi->str, pi->s);
771
+ set_error(&pi->err, "invalid format, CDATA not terminated", pi->str, pi->s);
772
+ return;
703
773
  }
704
774
  *end = '\0';
705
775
  pi->s = end + 3;
@@ -740,19 +810,22 @@ read_quoted_value(PInfo pi) {
740
810
  value = pi->s;
741
811
  for (; *pi->s != term; pi->s++) {
742
812
  if ('\0' == *pi->s) {
743
- raise_error("invalid format, document not terminated", pi->str, pi->s);
813
+ set_error(&pi->err, "invalid format, document not terminated", pi->str, pi->s);
814
+ return 0;
744
815
  }
745
816
  }
746
817
  *pi->s = '\0'; /* terminate value */
747
818
  pi->s++; /* move past quote */
748
819
  } else if (StrictEffort == pi->options->effort) {
749
- raise_error("invalid format, expected a quote character", pi->str, pi->s);
820
+ set_error(&pi->err, "invalid format, expected a quote character", pi->str, pi->s);
821
+ return 0;
750
822
  } else if (TolerantEffort == pi->options->effort) {
751
823
  value = pi->s;
752
824
  for (; 1; pi->s++) {
753
825
  switch (*pi->s) {
754
826
  case '\0':
755
- raise_error("invalid format, document not terminated", pi->str, pi->s);
827
+ set_error(&pi->err, "invalid format, document not terminated", pi->str, pi->s);
828
+ return 0;
756
829
  case ' ':
757
830
  case '/':
758
831
  case '>':
@@ -772,7 +845,8 @@ read_quoted_value(PInfo pi) {
772
845
  value = pi->s;
773
846
  next_white(pi);
774
847
  if ('\0' == *pi->s) {
775
- raise_error("invalid format, document not terminated", pi->str, pi->s);
848
+ set_error(&pi->err, "invalid format, document not terminated", pi->str, pi->s);
849
+ return 0;
776
850
  }
777
851
  *pi->s++ = '\0'; /* terminate value */
778
852
  }
@@ -912,8 +986,9 @@ read_coded_chars(PInfo pi, char *text) {
912
986
  *text++ = '&';
913
987
  return text;
914
988
  } else {
915
- /*raise_error("Invalid encoding, need UTF-8 or UTF-16 encoding to parse &#nnnn; character sequences.", pi->str, pi->s); */
916
- raise_error("Invalid encoding, need UTF-8 encoding to parse &#nnnn; character sequences.", pi->str, pi->s);
989
+ /*set_error(&pi->err, "Invalid encoding, need UTF-8 or UTF-16 encoding to parse &#nnnn; character sequences.", pi->str, pi->s); */
990
+ set_error(&pi->err, "Invalid encoding, need UTF-8 encoding to parse &#nnnn; character sequences.", pi->str, pi->s);
991
+ return 0;
917
992
  }
918
993
  pi->s = s;
919
994
  }
@@ -994,8 +1069,9 @@ collapse_special(PInfo pi, char *str) {
994
1069
  pi->options->rb_enc = ox_utf8_encoding;
995
1070
  b = ucs_to_utf8_chars(b, u);
996
1071
  } else {
997
- /* raise_error("Invalid encoding, need UTF-8 or UTF-16 encoding to parse &#nnnn; character sequences.", pi->str, pi->s);*/
998
- raise_error("Invalid encoding, need UTF-8 encoding to parse &#nnnn; character sequences.", pi->str, pi->s);
1072
+ /* set_error(&pi->err, "Invalid encoding, need UTF-8 or UTF-16 encoding to parse &#nnnn; character sequences.", pi->str, pi->s);*/
1073
+ set_error(&pi->err, "Invalid encoding, need UTF-8 encoding to parse &#nnnn; character sequences.", pi->str, pi->s);
1074
+ return 0;
999
1075
  }
1000
1076
  s = end + 1;
1001
1077
  } else {