ox 1.0.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of ox might be problematic. Click here for more details.

Files changed (52) hide show
  1. data/LICENSE +27 -0
  2. data/README +153 -0
  3. data/ext/ox/base64.c +123 -0
  4. data/ext/ox/base64.h +44 -0
  5. data/ext/ox/cache.c +148 -0
  6. data/ext/ox/cache.h +43 -0
  7. data/ext/ox/cache8.c +80 -0
  8. data/ext/ox/cache8.h +43 -0
  9. data/ext/ox/cache8_test.c +69 -0
  10. data/ext/ox/cache_test.c +69 -0
  11. data/ext/ox/dump.c +901 -0
  12. data/ext/ox/extconf.rb +7 -0
  13. data/ext/ox/gen_load.c +196 -0
  14. data/ext/ox/obj_load.c +802 -0
  15. data/ext/ox/ox.c +456 -0
  16. data/ext/ox/ox.h +190 -0
  17. data/ext/ox/parse.c +629 -0
  18. data/lib/ox.rb +97 -0
  19. data/lib/ox/cdata.rb +12 -0
  20. data/lib/ox/comment.rb +13 -0
  21. data/lib/ox/doctype.rb +13 -0
  22. data/lib/ox/document.rb +20 -0
  23. data/lib/ox/element.rb +67 -0
  24. data/lib/ox/node.rb +24 -0
  25. data/test/Sample.graffle +2318 -0
  26. data/test/cache16_test.rb +17 -0
  27. data/test/cache8_test.rb +17 -0
  28. data/test/cache_test.rb +17 -0
  29. data/test/files.rb +34 -0
  30. data/test/func.rb +228 -0
  31. data/test/gen_sample.rb +22 -0
  32. data/test/obj_sample.rb +19 -0
  33. data/test/ox/change.rb +16 -0
  34. data/test/ox/dir.rb +21 -0
  35. data/test/ox/doc.rb +39 -0
  36. data/test/ox/file.rb +33 -0
  37. data/test/ox/group.rb +18 -0
  38. data/test/ox/hasprops.rb +18 -0
  39. data/test/ox/layer.rb +14 -0
  40. data/test/ox/line.rb +22 -0
  41. data/test/ox/oval.rb +12 -0
  42. data/test/ox/rect.rb +12 -0
  43. data/test/ox/shape.rb +37 -0
  44. data/test/ox/text.rb +23 -0
  45. data/test/perf_gen.rb +193 -0
  46. data/test/perf_mars.rb +97 -0
  47. data/test/perf_obj.rb +201 -0
  48. data/test/perf_pod.rb +88 -0
  49. data/test/perf_write.rb +80 -0
  50. data/test/sample.rb +62 -0
  51. data/test/test.rb +70 -0
  52. metadata +106 -0
data/ext/ox/ox.h ADDED
@@ -0,0 +1,190 @@
1
+ /* ox.h
2
+ * Copyright (c) 2011, Peter Ohler
3
+ * All rights reserved.
4
+ *
5
+ * Redistribution and use in source and binary forms, with or without
6
+ * modification, are permitted provided that the following conditions are met:
7
+ *
8
+ * - Redistributions of source code must retain the above copyright notice, this
9
+ * list of conditions and the following disclaimer.
10
+ *
11
+ * - Redistributions in binary form must reproduce the above copyright notice,
12
+ * this list of conditions and the following disclaimer in the documentation
13
+ * and/or other materials provided with the distribution.
14
+ *
15
+ * - Neither the name of Peter Ohler nor the names of its contributors may be
16
+ * used to endorse or promote products derived from this software without
17
+ * specific prior written permission.
18
+ *
19
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
+ */
30
+
31
+ #ifndef __OX_H__
32
+ #define __OX_H__
33
+
34
+ #if defined(__cplusplus)
35
+ extern "C" {
36
+ #if 0
37
+ } /* satisfy cc-mode */
38
+ #endif
39
+ #endif
40
+
41
+ #include "ruby/encoding.h"
42
+ #include "cache.h"
43
+
44
+ #define raise_error(msg, xml, current) _raise_error(msg, xml, current, __FILE__, __LINE__)
45
+
46
+ #define MAX_TEXT_LEN 4096
47
+ #define MAX_ATTRS 1024
48
+ #define MAX_DEPTH 1024
49
+
50
+ #define SILENT 0
51
+ #define TRACE 1
52
+ #define DEBUG 2
53
+
54
+ typedef enum {
55
+ UseObj = 1,
56
+ UseAttr = 2,
57
+ UseAttrSet = 3,
58
+ UseArray = 4,
59
+ UseAMember = 5,
60
+ UseHash = 6,
61
+ UseHashKey = 7,
62
+ UseHashVal = 8,
63
+ UseRange = 9,
64
+ UseRangeAttr= 10,
65
+ UseRaw = 11,
66
+ } Use;
67
+
68
+ typedef enum {
69
+ NoCode = 0,
70
+ ArrayCode = 'a',
71
+ Base64Code = 'b',
72
+ ClassCode = 'c',
73
+ FloatCode = 'f',
74
+ RegexpCode = 'g',
75
+ HashCode = 'h',
76
+ FixnumCode = 'i',
77
+ BignumCode = 'j',
78
+ KeyCode = 'k', // indicates the value is a hash key, kind of a hack
79
+ RationalCode = 'l',
80
+ SymbolCode = 'm',
81
+ FalseClassCode = 'n',
82
+ ObjectCode = 'o',
83
+ RefCode = 'p',
84
+ RangeCode = 'r',
85
+ StringCode = 's',
86
+ TimeCode = 't',
87
+ StructCode = 'u',
88
+ ComplexCode = 'v',
89
+ RawCode = 'x',
90
+ TrueClassCode = 'y',
91
+ NilClassCode = 'z',
92
+ } Type;
93
+
94
+ typedef struct _Attr {
95
+ const char *name;
96
+ const char *value;
97
+ } *Attr;
98
+
99
+ typedef struct _Helper {
100
+ ID var; /* Object var ID */
101
+ VALUE obj; /* object created or Qundef if not appropriate */
102
+ Type type; /* type of object in obj */
103
+ } *Helper;
104
+
105
+ typedef struct _PInfo *PInfo;
106
+
107
+ typedef struct _ParseCallbacks {
108
+ void (*add_prolog)(PInfo pi, const char *version, const char *encoding, const char *standalone);
109
+ void (*add_doctype)(PInfo pi, const char *docType);
110
+ void (*add_comment)(PInfo pi, const char *comment);
111
+ void (*add_cdata)(PInfo pi, const char *cdata, size_t len);
112
+ void (*add_text)(PInfo pi, char *text, int closed);
113
+ void (*add_element)(PInfo pi, const char *ename, Attr attrs, int hasChildren);
114
+ void (*end_element)(PInfo pi, const char *ename);
115
+ } *ParseCallbacks;
116
+
117
+ typedef struct _CircArray {
118
+ VALUE obj_array[1024];
119
+ VALUE *objs;
120
+ unsigned long size; // allocated size or initial array size
121
+ unsigned long cnt;
122
+ } *CircArray;
123
+
124
+ /* parse information structure */
125
+ struct _PInfo {
126
+ struct _Helper helpers[MAX_DEPTH];
127
+ Helper h; /* current helper or 0 if not set */
128
+ char *str; /* buffer being read from */
129
+ char *s; /* current position in buffer */
130
+ VALUE obj;
131
+ ParseCallbacks pcb;
132
+ CircArray circ_array;
133
+ rb_encoding *encoding;
134
+ unsigned long id; /* set for text types when cirs_array is set */
135
+ int trace;
136
+ int best_effort;
137
+ };
138
+
139
+ extern VALUE parse(char *xml, ParseCallbacks pcb, char **endp, int trace, int best_effort);
140
+ extern void _raise_error(const char *msg, const char *xml, const char *current, const char* file, int line);
141
+
142
+ extern char* write_obj_to_str(VALUE obj, int indent, int xsd_date, int circular);
143
+ extern void write_obj_to_file(VALUE obj, const char *path, int indent, int xsd_date, int circular);
144
+
145
+ extern VALUE Ox;
146
+
147
+ extern ID at_id;
148
+ extern ID attributes_id;
149
+ extern ID beg_id;
150
+ extern ID den_id;
151
+ extern ID end_id;
152
+ extern ID excl_id;
153
+ extern ID inspect_id;
154
+ extern ID keys_id;
155
+ extern ID local_id;
156
+ extern ID nodes_id;
157
+ extern ID num_id;
158
+ extern ID parse_id;
159
+ extern ID to_c_id;
160
+ extern ID to_s_id;
161
+ extern ID tv_sec_id;
162
+ extern ID tv_usec_id;
163
+ extern ID value_id;
164
+
165
+ extern VALUE empty_string;
166
+ extern VALUE encoding_sym;
167
+ extern VALUE standalone_sym;
168
+ extern VALUE struct_class;
169
+ extern VALUE time_class;
170
+ extern VALUE version_sym;
171
+ extern VALUE zero_fixnum;
172
+
173
+ extern VALUE ox_document_clas;
174
+ extern VALUE ox_element_clas;
175
+ extern VALUE ox_text_clas;
176
+ extern VALUE ox_comment_clas;
177
+ extern VALUE ox_doctype_clas;
178
+ extern VALUE ox_cdata_clas;
179
+
180
+ extern Cache symbol_cache;
181
+ extern Cache class_cache;
182
+ extern Cache attr_cache;
183
+
184
+ #if defined(__cplusplus)
185
+ #if 0
186
+ { /* satisfy cc-mode */
187
+ #endif
188
+ } /* extern "C" { */
189
+ #endif
190
+ #endif /* __OX_H__ */
data/ext/ox/parse.c ADDED
@@ -0,0 +1,629 @@
1
+ /* parse.c
2
+ * Copyright (c) 2011, Peter Ohler
3
+ * All rights reserved.
4
+ *
5
+ * Redistribution and use in source and binary forms, with or without
6
+ * modification, are permitted provided that the following conditions are met:
7
+ *
8
+ * - Redistributions of source code must retain the above copyright notice, this
9
+ * list of conditions and the following disclaimer.
10
+ *
11
+ * - Redistributions in binary form must reproduce the above copyright notice,
12
+ * this list of conditions and the following disclaimer in the documentation
13
+ * and/or other materials provided with the distribution.
14
+ *
15
+ * - Neither the name of Peter Ohler nor the names of its contributors may be
16
+ * used to endorse or promote products derived from this software without
17
+ * specific prior written permission.
18
+ *
19
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
+ */
30
+
31
+ #include <stdlib.h>
32
+ #include <errno.h>
33
+ #include <stdio.h>
34
+ #include <string.h>
35
+
36
+ #include "ruby.h"
37
+ #include "ox.h"
38
+
39
+ static void read_prolog(PInfo pi);
40
+ static void read_doctype(PInfo pi);
41
+ static void read_comment(PInfo pi);
42
+ static void read_element(PInfo pi);
43
+ static void read_text(PInfo pi);
44
+ static void read_cdata(PInfo pi);
45
+ static char* read_name_token(PInfo pi);
46
+ static char* read_quoted_value(PInfo pi);
47
+ static int read_coded_char(PInfo pi);
48
+ static void next_non_white(PInfo pi);
49
+
50
+ static int validateProlog = 1;
51
+
52
+ /* This XML parser is a single pass, destructive, callback parser. It is a
53
+ * single pass parse since it only make one pass over the characters in the
54
+ * XML document string. It is destructive because it re-uses the content of
55
+ * the string for values in the callback and places \0 characters at various
56
+ * places to mark the end of tokens and strings. It is a callback parser like
57
+ * a SAX parser because it uses callback when document elements are
58
+ * encountered.
59
+ *
60
+ * Parsing is very tolerant. Lack of headers and even mispelled element
61
+ * endings are passed over without raising an error. A best attempt is made in
62
+ * all cases to parse the string.
63
+ */
64
+
65
+ inline static void
66
+ next_non_white(PInfo pi) {
67
+ for (; 1; pi->s++) {
68
+ switch(*pi->s) {
69
+ case ' ':
70
+ case '\t':
71
+ case '\f':
72
+ case '\n':
73
+ case '\r':
74
+ break;
75
+ default:
76
+ return;
77
+ }
78
+ }
79
+ }
80
+
81
+ VALUE
82
+ parse(char *xml, ParseCallbacks pcb, char **endp, int trace, int best_effort) {
83
+ struct _PInfo pi;
84
+ int body_read = 0;
85
+
86
+ if (0 == xml) {
87
+ raise_error("Invalid arg, xml string can not be null", xml, 0);
88
+ }
89
+ if (DEBUG <= trace) {
90
+ printf("Parsing xml:\n%s\n", xml);
91
+ }
92
+ /* initialize parse info */
93
+ pi.str = xml;
94
+ pi.s = xml;
95
+ pi.h = 0;
96
+ pi.pcb = pcb;
97
+ pi.obj = Qnil;
98
+ pi.circ_array = 0;
99
+ pi.encoding = 0;
100
+ pi.trace = trace;
101
+ pi.best_effort = best_effort;
102
+ while (1) {
103
+ next_non_white(&pi); // skip white space
104
+ if ('\0' == *pi.s) {
105
+ break;
106
+ }
107
+ if (body_read && 0 != endp) {
108
+ *endp = pi.s;
109
+ break;
110
+ }
111
+ if ('<' != *pi.s) { // all top level entities start with <
112
+ raise_error("invalid format, expected <", pi.str, pi.s);
113
+ }
114
+ pi.s++; // past <
115
+ switch (*pi.s) {
116
+ case '?': // prolog
117
+ pi.s++;
118
+ read_prolog(&pi);
119
+ break;
120
+ case '!': /* comment or doctype */
121
+ pi.s++;
122
+ if ('\0' == *pi.s) {
123
+ raise_error("invalid format, DOCTYPE or comment not terminated", pi.str, pi.s);
124
+ } else if ('-' == *pi.s) {
125
+ pi.s++; // skip -
126
+ if ('-' != *pi.s) {
127
+ raise_error("invalid format, bad comment format", pi.str, pi.s);
128
+ } else {
129
+ pi.s++; // skip second -
130
+ read_comment(&pi);
131
+ }
132
+ } else if (0 == strncmp("DOCTYPE", pi.s, 7)) {
133
+ pi.s += 7;
134
+ read_doctype(&pi);
135
+ } else {
136
+ raise_error("invalid format, DOCTYPE or comment expected", pi.str, pi.s);
137
+ }
138
+ break;
139
+ case '\0':
140
+ raise_error("invalid format, document not terminated", pi.str, pi.s);
141
+ default:
142
+ read_element(&pi);
143
+ body_read = 1;
144
+ break;
145
+ }
146
+ }
147
+ return pi.obj;
148
+ }
149
+
150
+ /* Entered after the "<?" sequence. Ready to read the rest.
151
+ */
152
+ static void
153
+ read_prolog(PInfo pi) {
154
+ char *version = 0;
155
+ char *encoding = 0;
156
+ char *standalone = 0;
157
+ char *name;
158
+ char *end;
159
+ char c;
160
+
161
+ // skip xml string
162
+ if (0 != strncasecmp("xml", pi->s, 3)) {
163
+ raise_error("invalid format, expected 'xml'", pi->str, pi->s);
164
+ }
165
+ pi->s += 3; // past xml
166
+ /* looking for ?> to terminate the prolog */
167
+ while ('?' != *pi->s) {
168
+ if ('\0' == *pi->s) {
169
+ raise_error("invalid format, prolog not terminated", pi->str, pi->s);
170
+ }
171
+ name = read_name_token(pi);
172
+ end = pi->s;
173
+ next_non_white(pi);
174
+ c = *pi->s;
175
+ *end = '\0'; // terminate name
176
+ if ('=' == c) {
177
+ // Figure out what the token is, read a value for it, and check
178
+ // against supported values.
179
+ pi->s++;
180
+ next_non_white(pi);
181
+ if (0 == strcasecmp("version", name)) {
182
+ version = read_quoted_value(pi);
183
+ if (validateProlog &&
184
+ (0 != strcmp("1.0", version) &&
185
+ 0 != strcmp("1.1", version))) {
186
+ raise_error("invalid format, wrong XML version", pi->str, pi->s);
187
+ }
188
+ } else if (0 == strcasecmp("encoding", name)) {
189
+ encoding = read_quoted_value(pi);
190
+ /*
191
+ if (validateProlog && 0 != strcasecmp("UTF-8", encoding)) {
192
+ raise_error("invalid format, only UTF-8 supported", pi->str, pi->s);
193
+ }
194
+ */
195
+ } else if (0 == strcasecmp("standalone", name)) {
196
+ standalone = read_quoted_value(pi);
197
+ if (validateProlog && 0 != strcmp("yes", standalone)) {
198
+ raise_error("invalid format, only standalone XML supported", pi->str, pi->s);
199
+ }
200
+ } else {
201
+ raise_error("invalid format, unknown prolog attribute", pi->str, pi->s);
202
+ }
203
+ } else if ('?' == c) {
204
+ pi->s++;
205
+ if ('>' != *pi->s++) {
206
+ raise_error("invalid format, prolog not terminated", pi->str, pi->s);
207
+ }
208
+ return;
209
+ } else {
210
+ raise_error("invalid format, prolog format error", pi->str, pi->s);
211
+ }
212
+ }
213
+ if ('\0' == pi->s) {
214
+ raise_error("invalid format, prolog not terminated", pi->str, pi->s);
215
+ }
216
+ if ('?' == *pi->s) {
217
+ pi->s++;
218
+ }
219
+ if ('>' != *pi->s++) {
220
+ raise_error("invalid format, prolog not terminated", pi->str, pi->s);
221
+ }
222
+ if (0 != pi->pcb->add_prolog) {
223
+ pi->pcb->add_prolog(pi, version, encoding, standalone);
224
+ }
225
+ }
226
+
227
+ /* Entered after the "<!DOCTYPE" sequence plus the first character after
228
+ * that. Ready to read the rest. Returns error code.
229
+ */
230
+ static void
231
+ read_doctype(PInfo pi) {
232
+ char *docType;
233
+ int depth = 1;
234
+ char c;
235
+
236
+ next_non_white(pi);
237
+ docType = pi->s;
238
+ while (1) {
239
+ c = *pi->s++;
240
+ if ('\0' == c) {
241
+ raise_error("invalid format, prolog not terminated", pi->str, pi->s);
242
+ } else if ('<' == c) {
243
+ depth++;
244
+ } else if ('>' == c) {
245
+ depth--;
246
+ if (0 == depth) { /* done, at the end */
247
+ break;
248
+ }
249
+ }
250
+ }
251
+ *pi->s = '\0';
252
+ pi->s++;
253
+ if (0 != pi->pcb->add_doctype) {
254
+ pi->pcb->add_doctype(pi, docType);
255
+ }
256
+ }
257
+
258
+ /* Entered after "<!--". Returns error code.
259
+ */
260
+ static void
261
+ read_comment(PInfo pi) {
262
+ char *end;
263
+ char *s;
264
+ char *comment;
265
+ int done = 0;
266
+
267
+ next_non_white(pi);
268
+ comment = pi->s;
269
+ end = strstr(pi->s, "-->");
270
+ if (0 == end) {
271
+ raise_error("invalid format, comment not terminated", pi->str, pi->s);
272
+ }
273
+ for (s = end - 1; pi->s < s && !done; s--) {
274
+ switch(*s) {
275
+ case ' ':
276
+ case '\t':
277
+ case '\f':
278
+ case '\n':
279
+ case '\r':
280
+ break;
281
+ default:
282
+ *(s + 1) = '\0';
283
+ done = 1;
284
+ break;
285
+ }
286
+ }
287
+ *end = '\0'; // in case the comment was blank
288
+ pi->s = end + 3;
289
+ if (0 != pi->pcb->add_comment) {
290
+ pi->pcb->add_comment(pi, comment);
291
+ }
292
+ }
293
+
294
+ /* Entered after the '<' and the first character after that. Returns status
295
+ * code.
296
+ */
297
+ static void
298
+ read_element(PInfo pi) {
299
+ struct _Attr attrs[MAX_ATTRS];
300
+ Attr ap = attrs;
301
+ char *name;
302
+ char *ename;
303
+ char *end;
304
+ char c;
305
+ long elen;
306
+ int hasChildren = 0;
307
+ int done = 0;
308
+
309
+ ename = read_name_token(pi);
310
+ end = pi->s;
311
+ elen = end - ename;
312
+ next_non_white(pi);
313
+ c = *pi->s;
314
+ *end = '\0';
315
+ if ('/' == c) {
316
+ /* empty element, no attributes and no children */
317
+ pi->s++;
318
+ if ('>' != *pi->s) {
319
+ printf("*** '%s'***\n", pi->s);
320
+ raise_error("invalid format, element not closed", pi->str, pi->s);
321
+ }
322
+ pi->s++; /* past > */
323
+ ap->name = 0;
324
+ pi->pcb->add_element(pi, ename, attrs, hasChildren);
325
+ pi->pcb->end_element(pi, ename);
326
+
327
+ return;
328
+ }
329
+ /* read attribute names until the close (/ or >) is reached */
330
+ while (!done) {
331
+ if ('\0' == c) {
332
+ next_non_white(pi);
333
+ c = *pi->s;
334
+ }
335
+ switch (c) {
336
+ case '\0':
337
+ raise_error("invalid format, document not terminated", pi->str, pi->s);
338
+ case '/':
339
+ // Element with just attributes.
340
+ pi->s++;
341
+ if ('>' != *pi->s) {
342
+ raise_error("invalid format, element not closed", pi->str, pi->s);
343
+ }
344
+ pi->s++;
345
+ ap->name = 0;
346
+ pi->pcb->add_element(pi, ename, attrs, hasChildren);
347
+ pi->pcb->end_element(pi, ename);
348
+
349
+ return;
350
+ case '>':
351
+ // has either children or a value
352
+ pi->s++;
353
+ hasChildren = 1;
354
+ done = 1;
355
+ ap->name = 0;
356
+ pi->pcb->add_element(pi, ename, attrs, hasChildren);
357
+ break;
358
+ default:
359
+ // Attribute name so it's an element and the attribute will be
360
+ // added to it.
361
+ ap->name = read_name_token(pi);
362
+ end = pi->s;
363
+ next_non_white(pi);
364
+ if ('=' != *pi->s++) {
365
+ raise_error("invalid format, no attribute value", pi->str, pi->s);
366
+ }
367
+ *end = '\0'; // terminate name
368
+ // read value
369
+ next_non_white(pi);
370
+ ap->value = read_quoted_value(pi);
371
+ ap++;
372
+ if (MAX_ATTRS <= (ap - attrs)) {
373
+ raise_error("too many attributes", pi->str, pi->s);
374
+ }
375
+ break;
376
+ }
377
+ c = '\0';
378
+ }
379
+ if (hasChildren) {
380
+ char *start;
381
+
382
+ done = 0;
383
+ // read children
384
+ while (!done) {
385
+ start = pi->s;
386
+ next_non_white(pi);
387
+ c = *pi->s++;
388
+ if ('\0' == c) {
389
+ raise_error("invalid format, document not terminated", pi->str, pi->s);
390
+ }
391
+ if ('<' == c) {
392
+ switch (*pi->s) {
393
+ case '!': /* better be a comment or CDATA */
394
+ pi->s++;
395
+ if ('-' == *pi->s && '-' == *(pi->s + 1)) {
396
+ pi->s += 2;
397
+ read_comment(pi);
398
+ } else if (0 == strncmp("[CDATA[", pi->s, 7)) {
399
+ pi->s += 7;
400
+ read_cdata(pi);
401
+ } else {
402
+ raise_error("invalid format, invalid comment or CDATA format", pi->str, pi->s);
403
+ }
404
+ break;
405
+ case '/':
406
+ pi->s++;
407
+ name = read_name_token(pi);
408
+ end = pi->s;
409
+ next_non_white(pi);
410
+ c = *pi->s;
411
+ *end = '\0';
412
+ if (0 != strcmp(name, ename)) {
413
+ raise_error("invalid format, elements overlap", pi->str, pi->s);
414
+ }
415
+ if ('>' != c) {
416
+ raise_error("invalid format, element not closed", pi->str, pi->s);
417
+ }
418
+ pi->s++;
419
+ pi->pcb->end_element(pi, ename);
420
+ return;
421
+ case '\0':
422
+ raise_error("invalid format, document not terminated", pi->str, pi->s);
423
+ default:
424
+ // a child element
425
+ read_element(pi);
426
+ break;
427
+ }
428
+ } else { // read as TEXT
429
+ pi->s = start;
430
+ //pi->s--;
431
+ read_text(pi);
432
+ // to exit read_text with no errors the next character must be <
433
+ if ('/' == *(pi->s + 1) &&
434
+ 0 == strncmp(ename, pi->s + 2, elen) &&
435
+ '>' == *(pi->s + elen + 2)) {
436
+ // close tag after text so treat as a value
437
+ pi->s += elen + 3;
438
+ pi->pcb->end_element(pi, ename);
439
+ return;
440
+ }
441
+ }
442
+ }
443
+ }
444
+ }
445
+
446
+ static void
447
+ read_text(PInfo pi) {
448
+ char buf[MAX_TEXT_LEN];
449
+ char *b = buf;
450
+ char *alloc_buf = 0;
451
+ char *end = b + sizeof(buf) - 2;
452
+ char c;
453
+ int spc = 0;
454
+ int done = 0;
455
+
456
+ while (!done) {
457
+ c = *pi->s++;
458
+ switch(c) {
459
+ case ' ':
460
+ case '\t':
461
+ case '\f':
462
+ case '\n':
463
+ case '\r':
464
+ spc = 1;
465
+ break;
466
+ case '<':
467
+ done = 1;
468
+ pi->s--;
469
+ break;
470
+ case '\0':
471
+ raise_error("invalid format, document not terminated", pi->str, pi->s);
472
+ default:
473
+ if ('&' == c) {
474
+ c = read_coded_char(pi);
475
+ }
476
+ if (end <= b + spc) {
477
+ unsigned long size;
478
+
479
+ if (0 != alloc_buf) {
480
+ size = sizeof(buf) * 2;
481
+ if (0 == (alloc_buf = (char*)malloc(size))) {
482
+ raise_error("text too long", pi->str, pi->s);
483
+ }
484
+ memcpy(alloc_buf, buf, b - buf);
485
+ b = alloc_buf + (b - buf);
486
+ } else {
487
+ unsigned long pos = b - alloc_buf;
488
+
489
+ size = (end - alloc_buf) * 2;
490
+ if (0 == (alloc_buf = (char*)realloc(alloc_buf, size))) {
491
+ raise_error("text too long", pi->str, pi->s);
492
+ }
493
+ b = alloc_buf + pos;
494
+ }
495
+ end = alloc_buf + size;
496
+ }
497
+ if (spc) {
498
+ *b++ = ' ';
499
+ }
500
+ spc = 0;
501
+ *b++ = c;
502
+ break;
503
+ }
504
+ }
505
+ *b = '\0';
506
+ pi->pcb->add_text(pi, buf, ('/' == *(pi->s + 1)));
507
+ if (0 != alloc_buf) {
508
+ free(alloc_buf);
509
+ }
510
+ }
511
+
512
+ static char*
513
+ read_name_token(PInfo pi) {
514
+ char *start;
515
+
516
+ next_non_white(pi);
517
+ start = pi->s;
518
+ for (; 1; pi->s++) {
519
+ switch (*pi->s) {
520
+ case ' ':
521
+ case '\t':
522
+ case '\f':
523
+ case '?':
524
+ case '=':
525
+ case '/':
526
+ case '>':
527
+ case '\n':
528
+ case '\r':
529
+ return start;
530
+ case '\0':
531
+ // documents never terminate after a name token
532
+ raise_error("invalid format, document not terminated", pi->str, pi->s);
533
+ break; // to avoid warnings
534
+ default:
535
+ break;
536
+ }
537
+ }
538
+ return start;
539
+ }
540
+
541
+ static void
542
+ read_cdata(PInfo pi) {
543
+ char *start;
544
+ char *end;
545
+
546
+ start = pi->s;
547
+ end = strstr(pi->s, "]]>");
548
+ if (end == 0) {
549
+ raise_error("invalid format, CDATA not terminated", pi->str, pi->s);
550
+ }
551
+ *end = '\0';
552
+ pi->s = end + 3;
553
+ if (0 != pi->pcb->add_cdata) {
554
+ pi->pcb->add_cdata(pi, start, end - start);
555
+ }
556
+ }
557
+
558
+ /* Assume the value starts immediately and goes until the quote character is
559
+ * reached again. Do not read the character after the terminating quote.
560
+ */
561
+ static char*
562
+ read_quoted_value(PInfo pi) {
563
+ char *value;
564
+
565
+ if ('"' != *pi->s) {
566
+ raise_error("invalid format, expected a quote character", pi->str, pi->s);
567
+ }
568
+ pi->s++; // skip quote character
569
+ value = pi->s;
570
+ for (; *pi->s != '"'; pi->s++) {
571
+ if ('\0' == *pi->s) {
572
+ raise_error("invalid format, document not terminated", pi->str, pi->s);
573
+ }
574
+ }
575
+ *pi->s = '\0'; // terminate value
576
+ pi->s++; // move past quote
577
+
578
+ return value;
579
+ }
580
+
581
+ static int
582
+ read_coded_char(PInfo pi) {
583
+ char *b, buf[8];
584
+ char *end = buf + sizeof(buf);
585
+ char *s;
586
+ int c;
587
+
588
+ for (b = buf, s = pi->s; b < end; b++, s++) {
589
+ if (';' == *s) {
590
+ *b = '\0';
591
+ s++;
592
+ break;
593
+ }
594
+ *b = *s;
595
+ }
596
+ if (b > end) {
597
+ return *pi->s;
598
+ }
599
+ if ('#' == *buf) {
600
+ c = (int)strtol(buf + 1, &end, 10);
601
+ if (0 >= c || '\0' != *end) {
602
+ return *pi->s;
603
+ }
604
+ pi->s = s;
605
+
606
+ return c;
607
+ }
608
+ if (0 == strcasecmp(buf, "nbsp")) {
609
+ pi->s = s;
610
+ return ' ';
611
+ } else if (0 == strcasecmp(buf, "lt")) {
612
+ pi->s = s;
613
+ return '<';
614
+ } else if (0 == strcasecmp(buf, "gt")) {
615
+ pi->s = s;
616
+ return '>';
617
+ } else if (0 == strcasecmp(buf, "amp")) {
618
+ pi->s = s;
619
+ return '&';
620
+ } else if (0 == strcasecmp(buf, "quot")) {
621
+ pi->s = s;
622
+ return '"';
623
+ } else if (0 == strcasecmp(buf, "apos")) {
624
+ pi->s = s;
625
+ return '\'';
626
+ }
627
+ return *pi->s;
628
+ }
629
+