ox 1.0.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of ox might be problematic. Click here for more details.

Files changed (52) hide show
  1. data/LICENSE +27 -0
  2. data/README +153 -0
  3. data/ext/ox/base64.c +123 -0
  4. data/ext/ox/base64.h +44 -0
  5. data/ext/ox/cache.c +148 -0
  6. data/ext/ox/cache.h +43 -0
  7. data/ext/ox/cache8.c +80 -0
  8. data/ext/ox/cache8.h +43 -0
  9. data/ext/ox/cache8_test.c +69 -0
  10. data/ext/ox/cache_test.c +69 -0
  11. data/ext/ox/dump.c +901 -0
  12. data/ext/ox/extconf.rb +7 -0
  13. data/ext/ox/gen_load.c +196 -0
  14. data/ext/ox/obj_load.c +802 -0
  15. data/ext/ox/ox.c +456 -0
  16. data/ext/ox/ox.h +190 -0
  17. data/ext/ox/parse.c +629 -0
  18. data/lib/ox.rb +97 -0
  19. data/lib/ox/cdata.rb +12 -0
  20. data/lib/ox/comment.rb +13 -0
  21. data/lib/ox/doctype.rb +13 -0
  22. data/lib/ox/document.rb +20 -0
  23. data/lib/ox/element.rb +67 -0
  24. data/lib/ox/node.rb +24 -0
  25. data/test/Sample.graffle +2318 -0
  26. data/test/cache16_test.rb +17 -0
  27. data/test/cache8_test.rb +17 -0
  28. data/test/cache_test.rb +17 -0
  29. data/test/files.rb +34 -0
  30. data/test/func.rb +228 -0
  31. data/test/gen_sample.rb +22 -0
  32. data/test/obj_sample.rb +19 -0
  33. data/test/ox/change.rb +16 -0
  34. data/test/ox/dir.rb +21 -0
  35. data/test/ox/doc.rb +39 -0
  36. data/test/ox/file.rb +33 -0
  37. data/test/ox/group.rb +18 -0
  38. data/test/ox/hasprops.rb +18 -0
  39. data/test/ox/layer.rb +14 -0
  40. data/test/ox/line.rb +22 -0
  41. data/test/ox/oval.rb +12 -0
  42. data/test/ox/rect.rb +12 -0
  43. data/test/ox/shape.rb +37 -0
  44. data/test/ox/text.rb +23 -0
  45. data/test/perf_gen.rb +193 -0
  46. data/test/perf_mars.rb +97 -0
  47. data/test/perf_obj.rb +201 -0
  48. data/test/perf_pod.rb +88 -0
  49. data/test/perf_write.rb +80 -0
  50. data/test/sample.rb +62 -0
  51. data/test/test.rb +70 -0
  52. metadata +106 -0
data/ext/ox/extconf.rb ADDED
@@ -0,0 +1,7 @@
1
+ require 'mkmf'
2
+
3
+ $CPPFLAGS += ' -Wall'
4
+ #puts "*** $CPPFLAGS: #{$CPPFLAGS}"
5
+ extension_name = 'ox'
6
+ dir_config(extension_name)
7
+ create_makefile(extension_name)
data/ext/ox/gen_load.c ADDED
@@ -0,0 +1,196 @@
1
+ /* gen_load.c
2
+ * Copyright (c) 2011, Peter Ohler
3
+ * All rights reserved.
4
+ *
5
+ * Redistribution and use in source and binary forms, with or without
6
+ * modification, are permitted provided that the following conditions are met:
7
+ *
8
+ * - Redistributions of source code must retain the above copyright notice, this
9
+ * list of conditions and the following disclaimer.
10
+ *
11
+ * - Redistributions in binary form must reproduce the above copyright notice,
12
+ * this list of conditions and the following disclaimer in the documentation
13
+ * and/or other materials provided with the distribution.
14
+ *
15
+ * - Neither the name of Peter Ohler nor the names of its contributors may be
16
+ * used to endorse or promote products derived from this software without
17
+ * specific prior written permission.
18
+ *
19
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
+ */
30
+
31
+ #include <stdlib.h>
32
+ #include <errno.h>
33
+ #include <stdio.h>
34
+ #include <string.h>
35
+ #include <stdarg.h>
36
+
37
+ #include "ruby.h"
38
+ #include "ox.h"
39
+
40
+ static void add_prolog(PInfo pi, const char *version, const char *encoding, const char *standalone);
41
+ static void add_doctype(PInfo pi, const char *docType);
42
+ static void add_comment(PInfo pi, const char *comment);
43
+ static void add_cdata(PInfo pi, const char *cdata, size_t len);
44
+ static void add_text(PInfo pi, char *text, int closed);
45
+ static void add_element(PInfo pi, const char *ename, Attr attrs, int hasChildren);
46
+ static void end_element(PInfo pi, const char *ename);
47
+
48
+ struct _ParseCallbacks _ox_gen_callbacks = {
49
+ add_prolog,
50
+ add_doctype,
51
+ add_comment,
52
+ add_cdata,
53
+ add_text,
54
+ add_element,
55
+ end_element,
56
+ };
57
+
58
+ ParseCallbacks ox_gen_callbacks = &_ox_gen_callbacks;
59
+
60
+ struct _ParseCallbacks _ox_limited_callbacks = {
61
+ 0,
62
+ 0,
63
+ 0,
64
+ 0,
65
+ add_text,
66
+ add_element,
67
+ end_element,
68
+ };
69
+
70
+ ParseCallbacks ox_limited_callbacks = &_ox_limited_callbacks;
71
+
72
+ static void
73
+ add_prolog(PInfo pi, const char *version, const char *encoding, const char *standalone) {
74
+ VALUE doc;
75
+ VALUE ah;
76
+ VALUE nodes;
77
+
78
+ if (0 != pi->h) { // top level object
79
+ rb_raise(rb_eEncodingError, "Prolog must be the first element in an XML document.\n");
80
+ }
81
+ pi->h = pi->helpers;
82
+ doc = rb_obj_alloc(ox_document_clas);
83
+ ah = rb_hash_new();
84
+ if (0 != version) {
85
+ rb_hash_aset(ah, version_sym, rb_str_new2(version));
86
+ }
87
+ if (0 != encoding) {
88
+ rb_hash_aset(ah, encoding_sym, rb_str_new2(encoding));
89
+ pi->encoding = rb_enc_find(encoding);
90
+ }
91
+ if (0 != standalone) {
92
+ rb_hash_aset(ah, standalone_sym, rb_str_new2(standalone));
93
+ }
94
+ nodes = rb_ary_new();
95
+ rb_ivar_set(doc, attributes_id, ah);
96
+ rb_ivar_set(doc, nodes_id, nodes);
97
+ pi->h->obj = nodes;
98
+ pi->obj = doc;
99
+ }
100
+
101
+ static void
102
+ add_doctype(PInfo pi, const char *docType) {
103
+ VALUE n = rb_obj_alloc(ox_doctype_clas);
104
+ VALUE s = rb_str_new2(docType);
105
+
106
+ if (0 != pi->encoding) {
107
+ rb_enc_associate(s, pi->encoding);
108
+ }
109
+ rb_ivar_set(n, value_id, s);
110
+ rb_ary_push(pi->h->obj, n);
111
+ }
112
+
113
+ static void
114
+ add_comment(PInfo pi, const char *comment) {
115
+ VALUE n = rb_obj_alloc(ox_comment_clas);
116
+ VALUE s = rb_str_new2(comment);
117
+
118
+ if (0 != pi->encoding) {
119
+ rb_enc_associate(s, pi->encoding);
120
+ }
121
+ rb_ivar_set(n, value_id, s);
122
+ rb_ary_push(pi->h->obj, n);
123
+ }
124
+
125
+ static void
126
+ add_cdata(PInfo pi, const char *cdata, size_t len) {
127
+ VALUE n = rb_obj_alloc(ox_cdata_clas);
128
+ VALUE s = rb_str_new2(cdata);
129
+
130
+ if (0 != pi->encoding) {
131
+ rb_enc_associate(s, pi->encoding);
132
+ }
133
+ rb_ivar_set(n, value_id, s);
134
+ rb_ary_push(pi->h->obj, n);
135
+ }
136
+
137
+ static void
138
+ add_text(PInfo pi, char *text, int closed) {
139
+ VALUE s = rb_str_new2(text);
140
+
141
+ if (0 != pi->encoding) {
142
+ rb_enc_associate(s, pi->encoding);
143
+ }
144
+ rb_ary_push(pi->h->obj, s);
145
+ }
146
+
147
+ static void
148
+ add_element(PInfo pi, const char *ename, Attr attrs, int hasChildren) {
149
+ VALUE e;
150
+ VALUE s = rb_str_new2(ename);
151
+
152
+ if (0 != pi->encoding) {
153
+ rb_enc_associate(s, pi->encoding);
154
+ }
155
+ e = rb_obj_alloc(ox_element_clas);
156
+ rb_ivar_set(e, value_id, s);
157
+ if (0 != attrs->name) {
158
+ VALUE ah = rb_hash_new();
159
+
160
+ for (; 0 != attrs->name; attrs++) {
161
+ VALUE sym;
162
+ VALUE *slot;
163
+
164
+ if (Qundef == (sym = ox_cache_get(symbol_cache, attrs->name, &slot))) {
165
+ sym = ID2SYM(rb_intern(attrs->name));
166
+ *slot = sym;
167
+ }
168
+ s = rb_str_new2(attrs->value);
169
+ if (0 != pi->encoding) {
170
+ rb_enc_associate(s, pi->encoding);
171
+ }
172
+ rb_hash_aset(ah, sym, s);
173
+ }
174
+ rb_ivar_set(e, attributes_id, ah);
175
+ }
176
+ if (0 == pi->h) { // top level object
177
+ pi->h = pi->helpers;
178
+ pi->obj = e;
179
+ } else {
180
+ rb_ary_push(pi->h->obj, e);
181
+ pi->h++;
182
+ }
183
+ if (hasChildren) {
184
+ VALUE nodes = rb_ary_new();
185
+
186
+ rb_ivar_set(e, nodes_id, nodes);
187
+ pi->h->obj = nodes;
188
+ }
189
+ }
190
+
191
+ static void
192
+ end_element(PInfo pi, const char *ename) {
193
+ if (0 != pi->h && pi->helpers <= pi->h) {
194
+ pi->h--;
195
+ }
196
+ }
data/ext/ox/obj_load.c ADDED
@@ -0,0 +1,802 @@
1
+ /* obj_load.c
2
+ * Copyright (c) 2011, Peter Ohler
3
+ * All rights reserved.
4
+ *
5
+ * Redistribution and use in source and binary forms, with or without
6
+ * modification, are permitted provided that the following conditions are met:
7
+ *
8
+ * - Redistributions of source code must retain the above copyright notice, this
9
+ * list of conditions and the following disclaimer.
10
+ *
11
+ * - Redistributions in binary form must reproduce the above copyright notice,
12
+ * this list of conditions and the following disclaimer in the documentation
13
+ * and/or other materials provided with the distribution.
14
+ *
15
+ * - Neither the name of Peter Ohler nor the names of its contributors may be
16
+ * used to endorse or promote products derived from this software without
17
+ * specific prior written permission.
18
+ *
19
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
20
+ * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22
+ * DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
23
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
25
+ * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
26
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
27
+ * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29
+ */
30
+
31
+ #include <stdlib.h>
32
+ #include <errno.h>
33
+ #include <stdio.h>
34
+ #include <string.h>
35
+ #include <stdarg.h>
36
+ #include <time.h>
37
+
38
+ #include "ruby.h"
39
+ #include "ruby/oniguruma.h"
40
+ #include "base64.h"
41
+ #include "ox.h"
42
+
43
+ static void add_prolog(PInfo pi, const char *version, const char *encoding, const char *standalone);
44
+ static void add_text(PInfo pi, char *text, int closed);
45
+ static void add_element(PInfo pi, const char *ename, Attr attrs, int hasChildren);
46
+ static void end_element(PInfo pi, const char *ename);
47
+
48
+ static VALUE parse_time(const char *text, VALUE clas);
49
+ static VALUE parse_xsd_time(const char *text, VALUE clas);
50
+ static VALUE parse_double_time(const char *text, VALUE clas);
51
+ static VALUE parse_regexp(const char *text);
52
+
53
+ static VALUE get_var_sym_from_attrs(Attr a);
54
+ static VALUE get_obj_from_attrs(Attr a, int best_effort);
55
+ static VALUE get_class_from_attrs(Attr a, int best_effort);
56
+ static unsigned long get_id_from_attrs(PInfo pi, Attr a);
57
+ static CircArray circ_array_new(void);
58
+ static void circ_array_free(CircArray ca);
59
+ static void circ_array_set(CircArray ca, VALUE obj, unsigned long id);
60
+ static VALUE circ_array_get(CircArray ca, unsigned long id);
61
+
62
+ static void debug_stack(PInfo pi, const char *comment);
63
+ static void fill_indent(PInfo pi, char *buf, size_t size);
64
+
65
+
66
+ struct _ParseCallbacks _ox_obj_callbacks = {
67
+ add_prolog,
68
+ 0, // add_doctype,
69
+ 0, // add_comment,
70
+ 0, // add_cdata,
71
+ add_text,
72
+ add_element,
73
+ end_element,
74
+ };
75
+
76
+ ParseCallbacks ox_obj_callbacks = &_ox_obj_callbacks;
77
+
78
+ extern ParseCallbacks ox_gen_callbacks;
79
+
80
+
81
+ inline static ID
82
+ name2var(const char *name) {
83
+ VALUE *slot;
84
+ ID var_id;
85
+
86
+ if ('0' <= *name && *name <= '9') {
87
+ var_id = INT2NUM(atoi(name));
88
+ } else if (Qundef == (var_id = ox_cache_get(attr_cache, name, &slot))) {
89
+ var_id = rb_intern(name);
90
+ *slot = var_id;
91
+ }
92
+ return var_id;
93
+ }
94
+
95
+ inline static VALUE
96
+ classname2class(const char *name, int best_effort) {
97
+ VALUE *slot;
98
+ VALUE clas;
99
+
100
+ if (Qundef == (clas = ox_cache_get(class_cache, name, &slot))) {
101
+ char class_name[1024];
102
+ char *s;
103
+ const char *n = name;
104
+ ID ci;
105
+
106
+ clas = rb_cObject;
107
+ for (s = class_name; '\0' != *n; n++) {
108
+ if (':' == *n) {
109
+ *s = '\0';
110
+ n++;
111
+ ci = rb_intern(class_name);
112
+ if (!best_effort || rb_const_defined(clas, ci)) {
113
+ clas = rb_const_get(clas, ci);
114
+ } else {
115
+ return Qundef;
116
+ }
117
+ s = class_name;
118
+ } else {
119
+ *s++ = *n;
120
+ }
121
+ }
122
+ *s = '\0';
123
+ ci = rb_intern(class_name);
124
+ if (!best_effort || rb_const_defined(clas, ci)) {
125
+ clas = rb_const_get(clas, ci);
126
+ } else {
127
+ return Qundef;
128
+ }
129
+ //clas = rb_const_get(clas, rb_intern(class_name));
130
+ *slot = clas;
131
+ }
132
+ return clas;
133
+ }
134
+
135
+ inline static VALUE
136
+ classname2obj(const char *name, int best_effort) {
137
+ VALUE clas = classname2class(name, best_effort);
138
+
139
+ if (Qundef == clas) {
140
+ return Qnil;
141
+ } else {
142
+ return rb_obj_alloc(clas);
143
+ }
144
+ }
145
+
146
+ inline static VALUE
147
+ structname2obj(const char *name) {
148
+ VALUE ost;
149
+ const char *s = name;
150
+
151
+ for (; 1; s++) {
152
+ if ('\0' == *s) {
153
+ s = name;
154
+ break;
155
+ } else if (':' == *s) {
156
+ s += 2;
157
+ break;
158
+ }
159
+ }
160
+ ost = rb_const_get(struct_class, rb_intern(s));
161
+
162
+ return rb_struct_alloc_noinit(ost);
163
+ }
164
+
165
+ // 2010-07-09T10:47:45.895826162+09:00
166
+ inline static VALUE
167
+ parse_time(const char *text, VALUE clas) {
168
+ VALUE t;
169
+
170
+ if (Qnil == (t = parse_double_time(text, clas)) &&
171
+ Qnil == (t = parse_xsd_time(text, clas))) {
172
+ VALUE args[1];
173
+
174
+ //printf("**** time parse\n");
175
+ *args = rb_str_new2(text);
176
+ t = rb_funcall2(time_class, parse_id, 1, args);
177
+ }
178
+ return t;
179
+ }
180
+
181
+ static VALUE
182
+ get_var_sym_from_attrs(Attr a) {
183
+ for (; 0 != a->name; a++) {
184
+ if ('a' == *a->name && '\0' == *(a->name + 1)) {
185
+ return name2var(a->value);
186
+ }
187
+ }
188
+ return Qundef;
189
+ }
190
+
191
+ static VALUE
192
+ get_obj_from_attrs(Attr a, int best_effort) {
193
+ for (; 0 != a->name; a++) {
194
+ if ('c' == *a->name && '\0' == *(a->name + 1)) {
195
+ return classname2obj(a->value, best_effort);
196
+ }
197
+ }
198
+ return Qundef;
199
+ }
200
+
201
+ static VALUE
202
+ get_struct_from_attrs(Attr a) {
203
+ for (; 0 != a->name; a++) {
204
+ if ('c' == *a->name && '\0' == *(a->name + 1)) {
205
+ return structname2obj(a->value);
206
+ }
207
+ }
208
+ return Qundef;
209
+ }
210
+
211
+ static VALUE
212
+ get_class_from_attrs(Attr a, int best_effort) {
213
+ for (; 0 != a->name; a++) {
214
+ if ('c' == *a->name && '\0' == *(a->name + 1)) {
215
+ return classname2class(a->value, best_effort);
216
+ }
217
+ }
218
+ return Qundef;
219
+ }
220
+
221
+ static unsigned long
222
+ get_id_from_attrs(PInfo pi, Attr a) {
223
+ for (; 0 != a->name; a++) {
224
+ if ('i' == *a->name && '\0' == *(a->name + 1)) {
225
+ unsigned long id = 0;
226
+ const char *text = a->value;
227
+ char c;
228
+
229
+ for (; '\0' != *text; text++) {
230
+ c = *text;
231
+ if ('0' <= c && c <= '9') {
232
+ id = id * 10 + (c - '0');
233
+ } else {
234
+ raise_error("bad number format", pi->str, pi->s);
235
+ }
236
+ }
237
+ return id;
238
+ }
239
+ }
240
+ return 0;
241
+ }
242
+
243
+ static CircArray
244
+ circ_array_new() {
245
+ CircArray ca;
246
+
247
+ if (0 == (ca = (CircArray)malloc(sizeof(struct _CircArray)))) {
248
+ rb_raise(rb_eNoMemError, "not enough memory\n");
249
+ }
250
+ ca->objs = ca->obj_array;
251
+ ca->size = sizeof(ca->obj_array) / sizeof(VALUE);
252
+ ca->cnt = 0;
253
+
254
+ return ca;
255
+ }
256
+
257
+ static void
258
+ circ_array_free(CircArray ca) {
259
+ if (ca->objs != ca->obj_array) {
260
+ free(ca->objs);
261
+ }
262
+ free(ca);
263
+ }
264
+
265
+ static void
266
+ circ_array_set(CircArray ca, VALUE obj, unsigned long id) {
267
+ if (0 < id) {
268
+ unsigned long i;
269
+
270
+ if (ca->size < id) {
271
+ unsigned long cnt = id + 512;
272
+
273
+ if (ca->objs == ca->obj_array) {
274
+ if (0 == (ca->objs = (VALUE*)malloc(sizeof(VALUE) * cnt))) {
275
+ rb_raise(rb_eNoMemError, "not enough memory\n");
276
+ }
277
+ memcpy(ca->objs, ca->obj_array, sizeof(VALUE) * ca->cnt);
278
+ } else {
279
+ if (0 == (ca->objs = (VALUE*)realloc(ca->objs, sizeof(VALUE) * cnt))) {
280
+ rb_raise(rb_eNoMemError, "not enough memory\n");
281
+ }
282
+ }
283
+ ca->size = cnt;
284
+ }
285
+ id--;
286
+ for (i = ca->cnt; i < id; i++) {
287
+ ca->objs[i] = Qundef;
288
+ }
289
+ ca->objs[id] = obj;
290
+ if (ca->cnt <= id) {
291
+ ca->cnt = id + 1;
292
+ }
293
+ }
294
+ }
295
+
296
+ static VALUE
297
+ circ_array_get(CircArray ca, unsigned long id) {
298
+ VALUE obj = Qundef;
299
+
300
+ if (id <= ca->cnt) {
301
+ obj = ca->objs[id - 1];
302
+ }
303
+ return obj;
304
+ }
305
+
306
+ static VALUE
307
+ parse_regexp(const char *text) {
308
+ const char *te;
309
+ int options = 0;
310
+
311
+ te = text + strlen(text) - 1;
312
+ for (; text < te && '/' != *te; te--) {
313
+ switch (*te) {
314
+ case 'i': options |= ONIG_OPTION_IGNORECASE; break;
315
+ case 'm': options |= ONIG_OPTION_MULTILINE; break;
316
+ case 'x': options |= ONIG_OPTION_EXTEND; break;
317
+ default: break;
318
+ }
319
+ }
320
+ return rb_reg_new(text + 1, te - text - 1, options);
321
+ }
322
+
323
+ static void
324
+ add_prolog(PInfo pi, const char *version, const char *encoding, const char *standalone) {
325
+ if (0 != encoding) {
326
+ pi->encoding = rb_enc_find(encoding);
327
+ }
328
+ }
329
+
330
+ static void
331
+ add_text(PInfo pi, char *text, int closed) {
332
+ if (!closed) {
333
+ raise_error("Text not closed", pi->str, pi->s);
334
+ }
335
+ if (DEBUG <= pi->trace) {
336
+ char indent[128];
337
+
338
+ fill_indent(pi, indent, sizeof(indent));
339
+ printf("%s '%s' to type %c\n", indent, text, pi->h->type);
340
+ }
341
+ switch (pi->h->type) {
342
+ case NoCode:
343
+ case StringCode:
344
+ pi->h->obj = rb_str_new2(text);
345
+ if (0 != pi->encoding) {
346
+ rb_enc_associate(pi->h->obj, pi->encoding);
347
+ }
348
+ if (0 != pi->circ_array) {
349
+ circ_array_set(pi->circ_array, pi->h->obj, (unsigned long)pi->id);
350
+ }
351
+ break;
352
+ case FixnumCode:
353
+ {
354
+ long n = 0;
355
+ char c;
356
+ int neg = 0;
357
+
358
+ if ('-' == *text) {
359
+ neg = 1;
360
+ text++;
361
+ }
362
+ for (; '\0' != *text; text++) {
363
+ c = *text;
364
+ if ('0' <= c && c <= '9') {
365
+ n = n * 10 + (c - '0');
366
+ } else {
367
+ raise_error("bad number format", pi->str, pi->s);
368
+ }
369
+ }
370
+ if (neg) {
371
+ n = -n;
372
+ }
373
+ pi->h->obj = LONG2FIX(n);
374
+ break;
375
+ }
376
+ case FloatCode:
377
+ pi->h->obj = rb_float_new(strtod(text, 0));
378
+ break;
379
+ case SymbolCode:
380
+ {
381
+ VALUE sym;
382
+ VALUE *slot;
383
+
384
+ if (Qundef == (sym = ox_cache_get(symbol_cache, text, &slot))) {
385
+ sym = ID2SYM(rb_intern(text));
386
+ *slot = sym;
387
+ }
388
+ pi->h->obj = sym;
389
+ break;
390
+ }
391
+ case TimeCode:
392
+ pi->h->obj = parse_time(text, time_class);
393
+ break;
394
+ case Base64Code:
395
+ {
396
+ char buf[1024];
397
+ char *str = buf;
398
+ unsigned long str_size = b64_orig_size(text);
399
+ VALUE v;
400
+
401
+ if (sizeof(buf) <= str_size) {
402
+ if (0 == (str = (char*)malloc(str_size + 1))) {
403
+ rb_raise(rb_eNoMemError, "not enough memory\n");
404
+ }
405
+ }
406
+ from_base64(text, (u_char*)str);
407
+ v = rb_str_new(str, str_size);
408
+ if (0 != pi->encoding) {
409
+ rb_enc_associate(v, pi->encoding);
410
+ }
411
+ if (0 != pi->circ_array) {
412
+ circ_array_set(pi->circ_array, v, (unsigned long)pi->h->obj);
413
+ }
414
+ pi->h->obj = v;
415
+ if (sizeof(buf) <= str_size) {
416
+ free(str);
417
+ }
418
+ break;
419
+ }
420
+ case RegexpCode:
421
+ if ('/' == *text) {
422
+ pi->h->obj = parse_regexp(text);
423
+ } else {
424
+ char buf[1024];
425
+ char *str = buf;
426
+ unsigned long str_size = b64_orig_size(text);
427
+
428
+ if (sizeof(buf) <= str_size) {
429
+ if (0 == (str = (char*)malloc(str_size + 1))) {
430
+ rb_raise(rb_eNoMemError, "not enough memory\n");
431
+ }
432
+ }
433
+ from_base64(text, (u_char*)str);
434
+ pi->h->obj = parse_regexp(str);
435
+ if (sizeof(buf) <= str_size) {
436
+ free(str);
437
+ }
438
+ }
439
+ break;
440
+ case BignumCode:
441
+ pi->h->obj = rb_cstr_to_inum(text, 10, 1);
442
+ break;
443
+ default:
444
+ pi->h->obj = Qnil;
445
+ break;
446
+ }
447
+ }
448
+
449
+ static void
450
+ add_element(PInfo pi, const char *ename, Attr attrs, int hasChildren) {
451
+ Attr a;
452
+ Helper h;
453
+ unsigned long id;
454
+
455
+ if (TRACE <= pi->trace) {
456
+ char buf[1024];
457
+ char indent[128];
458
+ char *s = buf;
459
+ char *end = buf + sizeof(buf) - 2;
460
+
461
+ s += snprintf(s, end - s, " <%s%s", (hasChildren) ? "" : "/", ename);
462
+ for (a = attrs; 0 != a->name; a++) {
463
+ s += snprintf(s, end - s, " %s=%s", a->name, a->value);
464
+ }
465
+ *s++ = '>';
466
+ *s++ = '\0';
467
+ if (DEBUG <= pi->trace) {
468
+ debug_stack(pi, buf);
469
+ } else {
470
+ fill_indent(pi, indent, sizeof(indent));
471
+ printf("%s%s\n", indent, buf);
472
+ }
473
+ }
474
+ if (0 == pi->h) { // top level object
475
+ pi->h = pi->helpers;
476
+ if (0 != (id = get_id_from_attrs(pi, attrs))) {
477
+ pi->circ_array = circ_array_new();
478
+ }
479
+ } else {
480
+ pi->h++;
481
+ }
482
+ if ('\0' != ename[1]) {
483
+ raise_error("Invalid element name", pi->str, pi->s);
484
+ }
485
+ h = pi->h;
486
+ h->type = *ename;
487
+ h->var = get_var_sym_from_attrs(attrs);
488
+ switch (h->type) {
489
+ case NilClassCode:
490
+ h->obj = Qnil;
491
+ break;
492
+ case TrueClassCode:
493
+ h->obj = Qtrue;
494
+ break;
495
+ case FalseClassCode:
496
+ h->obj = Qfalse;
497
+ break;
498
+ case StringCode:
499
+ // h->obj will be replaced by add_text if it is called
500
+ h->obj = empty_string;
501
+ if (0 != pi->circ_array) {
502
+ pi->id = get_id_from_attrs(pi, attrs);
503
+ circ_array_set(pi->circ_array, h->obj, pi->id);
504
+ }
505
+ break;
506
+ case FixnumCode:
507
+ case FloatCode:
508
+ case SymbolCode:
509
+ case RegexpCode:
510
+ case BignumCode:
511
+ case ComplexCode:
512
+ case TimeCode:
513
+ case RationalCode: // sub elements read next
514
+ // value will be read in the following add_text
515
+ h->obj = Qundef;
516
+ break;
517
+ case Base64Code:
518
+ h->obj = Qundef;
519
+ if (0 != pi->circ_array) {
520
+ pi->id = get_id_from_attrs(pi, attrs);
521
+ }
522
+ break;
523
+ case ArrayCode:
524
+ h->obj = rb_ary_new();
525
+ if (0 != pi->circ_array) {
526
+ circ_array_set(pi->circ_array, h->obj, get_id_from_attrs(pi, attrs));
527
+ }
528
+ break;
529
+ case HashCode:
530
+ h->obj = rb_hash_new();
531
+ if (0 != pi->circ_array) {
532
+ circ_array_set(pi->circ_array, h->obj, get_id_from_attrs(pi, attrs));
533
+ }
534
+ break;
535
+ case RangeCode:
536
+ h->obj = rb_range_new(zero_fixnum, zero_fixnum, Qfalse);
537
+ break;
538
+ case RawCode:
539
+ if (hasChildren) {
540
+ h->obj = parse(pi->s, ox_gen_callbacks, &pi->s, pi->trace, pi->best_effort);
541
+ if (0 != pi->circ_array) {
542
+ circ_array_set(pi->circ_array, h->obj, get_id_from_attrs(pi, attrs));
543
+ }
544
+ } else {
545
+ h->obj = Qnil;
546
+ }
547
+ break;
548
+ case ObjectCode:
549
+ h->obj = get_obj_from_attrs(attrs, pi->best_effort);
550
+ if (0 != pi->circ_array && Qnil != h->obj) {
551
+ circ_array_set(pi->circ_array, h->obj, get_id_from_attrs(pi, attrs));
552
+ }
553
+ break;
554
+ case StructCode:
555
+ h->obj = get_struct_from_attrs(attrs);
556
+ if (0 != pi->circ_array) {
557
+ circ_array_set(pi->circ_array, h->obj, get_id_from_attrs(pi, attrs));
558
+ }
559
+ break;
560
+ case ClassCode:
561
+ h->obj = get_class_from_attrs(attrs, pi->best_effort);
562
+ break;
563
+ case RefCode:
564
+ h->obj = Qundef;
565
+ if (0 != pi->circ_array) {
566
+ h->obj = circ_array_get(pi->circ_array, get_id_from_attrs(pi, attrs));
567
+ }
568
+ if (Qundef == h->obj) {
569
+ raise_error("Invalid circular reference", pi->str, pi->s);
570
+ }
571
+ break;
572
+ default:
573
+ raise_error("Invalid element name", pi->str, pi->s);
574
+ break;
575
+ }
576
+ if (DEBUG <= pi->trace) {
577
+ debug_stack(pi, " -----------");
578
+ }
579
+ }
580
+
581
+ static void
582
+ end_element(PInfo pi, const char *ename) {
583
+ if (TRACE <= pi->trace) {
584
+ char indent[128];
585
+
586
+ if (DEBUG <= pi->trace) {
587
+ char buf[1024];
588
+
589
+ snprintf(buf, sizeof(buf) - 1, "</%s>", ename);
590
+ debug_stack(pi, buf);
591
+ } else {
592
+ fill_indent(pi, indent, sizeof(indent));
593
+ printf("%s</%s>\n", indent, ename);
594
+ }
595
+ }
596
+ if (0 != pi->h && pi->helpers <= pi->h) {
597
+ Helper h = pi->h;
598
+
599
+ if (empty_string == h->obj) {
600
+ // special catch for empty strings
601
+ h->obj = rb_str_new2("");
602
+ }
603
+ pi->obj = h->obj;
604
+ pi->h--;
605
+ if (pi->helpers <= pi->h) {
606
+ switch (pi->h->type) {
607
+ case ArrayCode:
608
+ rb_ary_push(pi->h->obj, h->obj);
609
+ break;
610
+ case ObjectCode:
611
+ if (Qnil != pi->h->obj) {
612
+ rb_ivar_set(pi->h->obj, h->var, h->obj);
613
+ }
614
+ break;
615
+ case StructCode:
616
+ rb_struct_aset(pi->h->obj, h->var, h->obj);
617
+ break;
618
+ case HashCode:
619
+ h->type = KeyCode;
620
+ pi->h++;
621
+ break;
622
+ case RangeCode:
623
+ if (beg_id == h->var) {
624
+ RSTRUCT(pi->h->obj)->as.ary[0] = h->obj;
625
+ } else if (end_id == h->var) {
626
+ RSTRUCT(pi->h->obj)->as.ary[1] = h->obj;
627
+ } else if (excl_id == h->var) {
628
+ RSTRUCT(pi->h->obj)->as.ary[2] = h->obj;
629
+ } else {
630
+ raise_error("Invalid range attribute", pi->str, pi->s);
631
+ }
632
+ break;
633
+ case KeyCode:
634
+ rb_hash_aset((pi->h - 1)->obj, pi->h->obj, h->obj);
635
+ pi->h--;
636
+ break;
637
+ case ComplexCode:
638
+ if (Qundef == pi->h->obj) {
639
+ pi->h->obj = h->obj;
640
+ } else {
641
+ pi->h->obj = rb_complex_new(pi->h->obj, h->obj);
642
+ }
643
+ break;
644
+ case RationalCode:
645
+ if (Qundef == pi->h->obj) {
646
+ pi->h->obj = h->obj;
647
+ } else {
648
+ pi->h->obj = rb_rational_new(pi->h->obj, h->obj);
649
+ }
650
+ break;
651
+ default:
652
+ raise_error("Corrupt parse stack, container is wrong type", pi->str, pi->s);
653
+ break;
654
+ }
655
+ }
656
+ }
657
+ if (0 != pi->circ_array && pi->helpers > pi->h) {
658
+ circ_array_free(pi->circ_array);
659
+ pi->circ_array = 0;
660
+ }
661
+ if (DEBUG <= pi->trace) {
662
+ debug_stack(pi, " ----------");
663
+ }
664
+ }
665
+
666
+ static VALUE
667
+ parse_double_time(const char *text, VALUE clas) {
668
+ VALUE args[2];
669
+ long v = 0;
670
+ long v2 = 0;
671
+ const char *dot = 0;
672
+ char c;
673
+
674
+ for (; '.' != *text; text++) {
675
+ c = *text;
676
+ if (c < '0' || '9' < c) {
677
+ return Qnil;
678
+ }
679
+ v = 10 * v + (long)(c - '0');
680
+ }
681
+ dot = text++;
682
+ for (; '\0' != *text && text - dot <= 6; text++) {
683
+ c = *text;
684
+ if (c < '0' || '9' < c) {
685
+ return Qnil;
686
+ }
687
+ v2 = 10 * v2 + (long)(c - '0');
688
+ }
689
+ for (; text - dot <= 6; text++) {
690
+ v2 *= 10;
691
+ }
692
+ args[0] = INT2FIX(v);
693
+ args[1] = INT2FIX(v2);
694
+
695
+ return rb_funcall2(clas, at_id, 2, args);
696
+ }
697
+
698
+ typedef struct _Tp {
699
+ int cnt;
700
+ char end;
701
+ char alt;
702
+ } *Tp;
703
+
704
+ static VALUE
705
+ parse_xsd_time(const char *text, VALUE clas) {
706
+ VALUE args[2];
707
+ long cargs[10];
708
+ long *cp = cargs;
709
+ long v;
710
+ int i;
711
+ char c;
712
+ struct _Tp tpa[10] = { { 4, '-', '-' },
713
+ { 2, '-', '-' },
714
+ { 2, 'T', 'T' },
715
+ { 2, ':', ':' },
716
+ { 2, ':', ':' },
717
+ { 2, '.', '.' },
718
+ { 9, '+', '-' },
719
+ { 2, ':', ':' },
720
+ { 2, '\0', '\0' },
721
+ { 0, '\0', '\0' } };
722
+ Tp tp = tpa;
723
+ struct tm tm;
724
+
725
+ for (; 0 != tp->cnt; tp++) {
726
+ for (i = tp->cnt, v = 0; 0 < i ; text++, i--) {
727
+ c = *text;
728
+ if (c < '0' || '9' < c) {
729
+ if (tp->end == c || tp->alt == c) {
730
+ break;
731
+ }
732
+ return Qnil;
733
+ }
734
+ v = 10 * v + (long)(c - '0');
735
+ }
736
+ c = *text++;
737
+ if (tp->end != c && tp->alt != c) {
738
+ return Qnil;
739
+ }
740
+ *cp++ = v;
741
+ }
742
+ tm.tm_year = (int)cargs[0] - 1900;
743
+ tm.tm_mon = (int)cargs[1] - 1;
744
+ tm.tm_mday = (int)cargs[2];
745
+ tm.tm_hour = (int)cargs[3];
746
+ tm.tm_min = (int)cargs[4];
747
+ tm.tm_sec = (int)cargs[5];
748
+
749
+ args[0] = INT2FIX(mktime(&tm));
750
+ args[1] = INT2FIX(cargs[6]);
751
+
752
+ return rb_funcall2(clas, at_id, 2, args);
753
+ }
754
+
755
+ // debug functions
756
+ static void
757
+ fill_indent(PInfo pi, char *buf, size_t size) {
758
+ if (0 != pi->h) {
759
+ size_t cnt = pi->h - pi->helpers + 1;
760
+
761
+ if (size < cnt + 1) {
762
+ cnt = size - 1;
763
+ }
764
+ memset(buf, ' ', cnt);
765
+ buf += cnt;
766
+ }
767
+ *buf = '\0';
768
+ }
769
+
770
+ static void
771
+ debug_stack(PInfo pi, const char *comment) {
772
+ char indent[128];
773
+ Helper h;
774
+
775
+ fill_indent(pi, indent, sizeof(indent));
776
+ printf("%s%s\n", indent, comment);
777
+ if (0 != pi->h) {
778
+ for (h = pi->helpers; h <= pi->h; h++) {
779
+ const char *clas = "---";
780
+ const char *key = "---";
781
+
782
+ if (Qundef != h->obj) {
783
+ VALUE c = rb_obj_class(h->obj);
784
+
785
+ clas = rb_class2name(c);
786
+ }
787
+ if (Qundef != h->var) {
788
+ if (HashCode == h->type) {
789
+ VALUE v;
790
+
791
+ v = rb_funcall2(h->var, rb_intern("to_s"), 0, 0);
792
+ key = StringValuePtr(v);
793
+ } else if (ObjectCode == (h - 1)->type || RangeCode == (h - 1)->type || StructCode == (h - 1)->type) {
794
+ key = rb_id2name(h->var);
795
+ } else {
796
+ printf("%s*** corrupt stack ***\n", indent);
797
+ }
798
+ }
799
+ printf("%s [%c] %s : %s\n", indent, h->type, clas, key);
800
+ }
801
+ }
802
+ }