ox-bundlecachetest 2.14.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +751 -0
  3. data/LICENSE +21 -0
  4. data/README.md +351 -0
  5. data/ext/ox/attr.h +78 -0
  6. data/ext/ox/base64.c +105 -0
  7. data/ext/ox/base64.h +18 -0
  8. data/ext/ox/buf.h +162 -0
  9. data/ext/ox/builder.c +948 -0
  10. data/ext/ox/cache.c +351 -0
  11. data/ext/ox/cache.h +21 -0
  12. data/ext/ox/cache8.c +106 -0
  13. data/ext/ox/cache8.h +23 -0
  14. data/ext/ox/dump.c +1260 -0
  15. data/ext/ox/err.c +46 -0
  16. data/ext/ox/err.h +36 -0
  17. data/ext/ox/extconf.rb +47 -0
  18. data/ext/ox/gen_load.c +342 -0
  19. data/ext/ox/hash_load.c +309 -0
  20. data/ext/ox/helper.h +84 -0
  21. data/ext/ox/intern.c +157 -0
  22. data/ext/ox/intern.h +25 -0
  23. data/ext/ox/obj_load.c +809 -0
  24. data/ext/ox/ox.c +1649 -0
  25. data/ext/ox/ox.h +245 -0
  26. data/ext/ox/parse.c +1197 -0
  27. data/ext/ox/sax.c +1570 -0
  28. data/ext/ox/sax.h +69 -0
  29. data/ext/ox/sax_as.c +270 -0
  30. data/ext/ox/sax_buf.c +209 -0
  31. data/ext/ox/sax_buf.h +204 -0
  32. data/ext/ox/sax_hint.c +207 -0
  33. data/ext/ox/sax_hint.h +40 -0
  34. data/ext/ox/sax_stack.h +113 -0
  35. data/ext/ox/slotcache.c +158 -0
  36. data/ext/ox/slotcache.h +19 -0
  37. data/ext/ox/special.c +390 -0
  38. data/ext/ox/special.h +14 -0
  39. data/ext/ox/type.h +39 -0
  40. data/lib/ox/bag.rb +103 -0
  41. data/lib/ox/cdata.rb +10 -0
  42. data/lib/ox/comment.rb +11 -0
  43. data/lib/ox/doctype.rb +11 -0
  44. data/lib/ox/document.rb +28 -0
  45. data/lib/ox/element.rb +464 -0
  46. data/lib/ox/error.rb +25 -0
  47. data/lib/ox/hasattrs.rb +54 -0
  48. data/lib/ox/instruct.rb +34 -0
  49. data/lib/ox/node.rb +23 -0
  50. data/lib/ox/raw.rb +12 -0
  51. data/lib/ox/sax.rb +97 -0
  52. data/lib/ox/version.rb +4 -0
  53. data/lib/ox/xmlrpc_adapter.rb +33 -0
  54. data/lib/ox.rb +79 -0
  55. metadata +128 -0
data/ext/ox/ox.c ADDED
@@ -0,0 +1,1649 @@
1
+ /* ox.c
2
+ * Copyright (c) 2011, Peter Ohler
3
+ * All rights reserved.
4
+ */
5
+
6
+ #include "ox.h"
7
+
8
+ #include <errno.h>
9
+ #include <stdbool.h>
10
+ #include <stdint.h>
11
+ #include <stdio.h>
12
+ #include <stdlib.h>
13
+ #include <string.h>
14
+
15
+ #include "intern.h"
16
+ #include "ruby.h"
17
+ #include "sax.h"
18
+
19
+ /* maximum to allocate on the stack, arbitrary limit */
20
+ #define SMALL_XML 4096
21
+ #define WITH_CACHE_TESTS 0
22
+
23
+ typedef struct _yesNoOpt {
24
+ VALUE sym;
25
+ char *attr;
26
+ } *YesNoOpt;
27
+
28
+ void Init_ox();
29
+
30
+ VALUE Ox = Qnil;
31
+
32
+ ID ox_abort_id;
33
+ ID ox_at_column_id;
34
+ ID ox_at_content_id;
35
+ ID ox_at_id;
36
+ ID ox_at_line_id;
37
+ ID ox_at_pos_id;
38
+ ID ox_at_value_id;
39
+ ID ox_attr_id;
40
+ ID ox_attr_value_id;
41
+ ID ox_attributes_id;
42
+ ID ox_attrs_done_id;
43
+ ID ox_beg_id;
44
+ ID ox_bigdecimal_id;
45
+ ID ox_call_id;
46
+ ID ox_cdata_id;
47
+ ID ox_comment_id;
48
+ ID ox_den_id;
49
+ ID ox_doctype_id;
50
+ ID ox_end_element_id;
51
+ ID ox_end_id;
52
+ ID ox_end_instruct_id;
53
+ ID ox_error_id;
54
+ ID ox_excl_id;
55
+ ID ox_external_encoding_id;
56
+ ID ox_fileno_id;
57
+ ID ox_force_encoding_id;
58
+ ID ox_inspect_id;
59
+ ID ox_instruct_id;
60
+ ID ox_jd_id;
61
+ ID ox_keys_id;
62
+ ID ox_local_id;
63
+ ID ox_mesg_id;
64
+ ID ox_message_id;
65
+ ID ox_new_id;
66
+ ID ox_nodes_id;
67
+ ID ox_num_id;
68
+ ID ox_parse_id;
69
+ ID ox_pos_id;
70
+ ID ox_read_id;
71
+ ID ox_readpartial_id;
72
+ ID ox_start_element_id;
73
+ ID ox_string_id;
74
+ ID ox_text_id;
75
+ ID ox_to_c_id;
76
+ ID ox_value_id;
77
+
78
+ VALUE ox_encoding_sym;
79
+ VALUE ox_version_sym;
80
+ VALUE ox_standalone_sym;
81
+ VALUE ox_indent_sym;
82
+ VALUE ox_size_sym;
83
+
84
+ VALUE ox_empty_string;
85
+ VALUE ox_zero_fixnum;
86
+ VALUE ox_sym_bank; // Array
87
+
88
+ VALUE ox_arg_error_class;
89
+ VALUE ox_bag_clas;
90
+ VALUE ox_cdata_clas;
91
+ VALUE ox_comment_clas;
92
+ VALUE ox_raw_clas;
93
+ VALUE ox_date_class;
94
+ VALUE ox_doctype_clas;
95
+ VALUE ox_document_clas;
96
+ VALUE ox_element_clas;
97
+ VALUE ox_instruct_clas;
98
+ VALUE ox_parse_error_class;
99
+ VALUE ox_stringio_class;
100
+ VALUE ox_struct_class;
101
+ VALUE ox_syntax_error_class;
102
+ VALUE ox_time_class;
103
+
104
+ SlotCache ox_class_cache = 0;
105
+
106
+ static VALUE abort_sym;
107
+ static VALUE active_sym;
108
+ static VALUE attr_key_mod_sym;
109
+ static VALUE auto_define_sym;
110
+ static VALUE auto_sym;
111
+ static VALUE block_sym;
112
+ static VALUE circular_sym;
113
+ static VALUE convert_special_sym;
114
+ static VALUE effort_sym;
115
+ static VALUE generic_sym;
116
+ static VALUE hash_no_attrs_sym;
117
+ static VALUE hash_sym;
118
+ static VALUE inactive_sym;
119
+ static VALUE invalid_replace_sym;
120
+ static VALUE limited_sym;
121
+ static VALUE margin_sym;
122
+ static VALUE mode_sym;
123
+ static VALUE nest_ok_sym;
124
+ static VALUE no_empty_sym;
125
+ static VALUE object_sym;
126
+ static VALUE off_sym;
127
+ static VALUE opt_format_sym;
128
+ static VALUE optimized_sym;
129
+ static VALUE overlay_sym;
130
+ static VALUE skip_none_sym;
131
+ static VALUE skip_off_sym;
132
+ static VALUE skip_return_sym;
133
+ static VALUE skip_sym;
134
+ static VALUE skip_white_sym;
135
+ static VALUE smart_sym;
136
+ static VALUE strict_sym;
137
+ static VALUE strip_namespace_sym;
138
+ static VALUE symbolize_keys_sym;
139
+ static VALUE symbolize_sym;
140
+ static VALUE tolerant_sym;
141
+ static VALUE trace_sym;
142
+ static VALUE with_cdata_sym;
143
+ static VALUE with_dtd_sym;
144
+ static VALUE with_instruct_sym;
145
+ static VALUE with_xml_sym;
146
+ static VALUE xsd_date_sym;
147
+ static VALUE element_key_mod_sym;
148
+
149
+ static ID encoding_id;
150
+ static ID has_key_id;
151
+
152
+ rb_encoding *ox_utf8_encoding = 0;
153
+
154
+ struct _options ox_default_options = {
155
+ {'\0'}, // encoding
156
+ {'\0'}, // margin
157
+ 2, // indent
158
+ 0, // trace
159
+ 0, // margin_len
160
+ No, // with_dtd
161
+ No, // with_xml
162
+ No, // with_instruct
163
+ No, // circular
164
+ No, // xsd_date
165
+ NoMode, // mode
166
+ StrictEffort, // effort
167
+ Yes, // sym_keys
168
+ SpcSkip, // skip
169
+ No, // smart
170
+ true, // convert_special
171
+ No, // allow_invalid
172
+ false, // no_empty
173
+ false, // with_cdata
174
+ {'\0'}, // inv_repl
175
+ {'\0'}, // strip_ns
176
+ NULL, // html_hints
177
+ Qnil, // attr_key_mod;
178
+ Qnil, // element_key_mod;
179
+ 0 // rb_enc
180
+ };
181
+
182
+ extern ParseCallbacks ox_obj_callbacks;
183
+ extern ParseCallbacks ox_gen_callbacks;
184
+ extern ParseCallbacks ox_limited_callbacks;
185
+ extern ParseCallbacks ox_nomode_callbacks;
186
+ extern ParseCallbacks ox_hash_callbacks;
187
+ extern ParseCallbacks ox_hash_cdata_callbacks;
188
+ extern ParseCallbacks ox_hash_no_attrs_callbacks;
189
+ extern ParseCallbacks ox_hash_no_attrs_cdata_callbacks;
190
+
191
+ static void parse_dump_options(VALUE ropts, Options copts);
192
+
193
+ static char *defuse_bom(char *xml, Options options) {
194
+ switch ((uint8_t)*xml) {
195
+ case 0xEF: // UTF-8
196
+ if (0xBB == (uint8_t)xml[1] && 0xBF == (uint8_t)xml[2]) {
197
+ options->rb_enc = ox_utf8_encoding;
198
+ xml += 3;
199
+ } else {
200
+ rb_raise(ox_parse_error_class, "Invalid BOM in XML string.\n");
201
+ }
202
+ break;
203
+ #if 0
204
+ case 0xFE: // UTF-16BE
205
+ if (0xFF == (uint8_t)xml[1]) {
206
+ options->rb_enc = ox_utf16be_encoding;
207
+ xml += 2;
208
+ } else {
209
+ rb_raise(ox_parse_error_class, "Invalid BOM in XML string.\n");
210
+ }
211
+ break;
212
+ case 0xFF: // UTF-16LE or UTF-32LE
213
+ if (0xFE == (uint8_t)xml[1]) {
214
+ if (0x00 == (uint8_t)xml[2] && 0x00 == (uint8_t)xml[3]) {
215
+ options->rb_enc = ox_utf32le_encoding;
216
+ xml += 4;
217
+ } else {
218
+ options->rb_enc = ox_utf16le_encoding;
219
+ xml += 2;
220
+ }
221
+ } else {
222
+ rb_raise(ox_parse_error_class, "Invalid BOM in XML string.\n");
223
+ }
224
+ break;
225
+ case 0x00: // UTF-32BE
226
+ if (0x00 == (uint8_t)xml[1] && 0xFE == (uint8_t)xml[2] && 0xFF == (uint8_t)xml[3]) {
227
+ options->rb_enc = ox_utf32be_encoding;
228
+ xml += 4;
229
+ } else {
230
+ rb_raise(ox_parse_error_class, "Invalid BOM in XML string.\n");
231
+ }
232
+ break;
233
+ #endif
234
+ default:
235
+ // Let it fail if there is a BOM that is not UTF-8. Other BOM options
236
+ // are not ASCII compatible.
237
+ break;
238
+ }
239
+ return xml;
240
+ }
241
+
242
+ static VALUE hints_to_overlay(Hints hints) {
243
+ volatile VALUE overlay = rb_hash_new();
244
+ Hint h;
245
+ int i;
246
+ VALUE ov;
247
+
248
+ for (i = hints->size, h = hints->hints; 0 < i; i--, h++) {
249
+ switch (h->overlay) {
250
+ case InactiveOverlay: ov = inactive_sym; break;
251
+ case BlockOverlay: ov = block_sym; break;
252
+ case OffOverlay: ov = off_sym; break;
253
+ case AbortOverlay: ov = abort_sym; break;
254
+ case NestOverlay: ov = nest_ok_sym; break;
255
+ case ActiveOverlay:
256
+ default: ov = active_sym; break;
257
+ }
258
+ rb_hash_aset(overlay, rb_str_new2(h->name), ov);
259
+ }
260
+ return overlay;
261
+ }
262
+
263
+ /* call-seq: default_options() => Hash
264
+ *
265
+ * Returns the default load and dump options as a Hash. The options are
266
+ * - _:margin_ [String] left margin to inset when dumping
267
+ * - _:indent_ [Fixnum] number of spaces to indent each element in an XML document
268
+ * - _:trace_ [Fixnum] trace level where 0 is silent
269
+ * - _:encoding_ [String] character encoding for the XML file
270
+ * - _:with_dtd_ [true|false|nil] include DTD in the dump
271
+ * - _:with_instruct_ [true|false|nil] include instructions in the dump
272
+ * - _:with_xml_ [true|false|nil] include XML prolog in the dump
273
+ * - _:circular_ [true|false|nil] support circular references while dumping
274
+ * - _:xsd_date_ [true|false|nil] use XSD date format instead of decimal format
275
+ * - _:mode_ [:object|:generic|:limited|:hash|:hash_no_attrs|nil] load method to use for XML
276
+ * - _:effort_ [:strict|:tolerant|:auto_define] set the tolerance level for loading
277
+ * - _:symbolize_keys_ [true|false|nil] symbolize element attribute keys or leave as Strings
278
+ * - _:element_key_mod_ [Proc|nil] converts element keys on parse if not nil
279
+ * - _:attr_key_mod_ [Proc|nil] converts attribute keys on parse if not nil
280
+ * - _:skip_ [:skip_none|:skip_return|:skip_white|:skip_off] determines how to handle white space in text
281
+ * - _:smart_ [true|false|nil] flag indicating the SAX parser uses hints if available (use with html)
282
+ * - _:convert_special_ [true|false|nil] flag indicating special characters like &lt; are converted with the SAX parser
283
+ * - _:invalid_replace_ [nil|String] replacement string for invalid XML characters on dump. nil indicates include anyway
284
+ * as hex. A string, limited to 10 characters will replace the invalid character with the replace.
285
+ * - _:no_empty_ [true|false|nil] flag indicating there should be no empty elements in a dump
286
+ * - _:with_cdata_ [true|false] includes cdata in hash_load results
287
+ * - _:strip_namespace_ [String|true|false] false or "" results in no namespace stripping. A string of "*" or true will
288
+ * strip all namespaces. Any other non-empty string indicates that matching namespaces will be stripped.
289
+ * - _:overlay_ [Hash] a Hash of keys that match html element names and values that are one of
290
+ * - _:active_ - make the normal callback for the element
291
+ * - _:nest_ok_ - active but the nesting check is ignored
292
+ * - _:inactive_ - do not make the element start, end, or attribute callbacks for this element only
293
+ * - _:block_ - block this and all children callbacks
294
+ * - _:off_ - block this element and it's children unless the child element is active
295
+ * - _:abort_ - abort the html processing and return
296
+ *
297
+ * *return* [Hash] all current option settings.
298
+ *
299
+ * Note that an indent of less than zero will result in a tight one line output
300
+ * unless the text in the XML fields contain new line characters.
301
+ */
302
+ static VALUE get_def_opts(VALUE self) {
303
+ VALUE opts = rb_hash_new();
304
+ int elen = (int)strlen(ox_default_options.encoding);
305
+
306
+ rb_hash_aset(opts, ox_encoding_sym, (0 == elen) ? Qnil : rb_str_new(ox_default_options.encoding, elen));
307
+ rb_hash_aset(opts, margin_sym, rb_str_new(ox_default_options.margin, ox_default_options.margin_len));
308
+ rb_hash_aset(opts, ox_indent_sym, INT2FIX(ox_default_options.indent));
309
+ rb_hash_aset(opts, trace_sym, INT2FIX(ox_default_options.trace));
310
+ rb_hash_aset(opts,
311
+ with_dtd_sym,
312
+ (Yes == ox_default_options.with_dtd) ? Qtrue : ((No == ox_default_options.with_dtd) ? Qfalse : Qnil));
313
+ rb_hash_aset(opts,
314
+ with_xml_sym,
315
+ (Yes == ox_default_options.with_xml) ? Qtrue : ((No == ox_default_options.with_xml) ? Qfalse : Qnil));
316
+ rb_hash_aset(
317
+ opts,
318
+ with_instruct_sym,
319
+ (Yes == ox_default_options.with_instruct) ? Qtrue : ((No == ox_default_options.with_instruct) ? Qfalse : Qnil));
320
+ rb_hash_aset(opts,
321
+ circular_sym,
322
+ (Yes == ox_default_options.circular) ? Qtrue : ((No == ox_default_options.circular) ? Qfalse : Qnil));
323
+ rb_hash_aset(opts,
324
+ xsd_date_sym,
325
+ (Yes == ox_default_options.xsd_date) ? Qtrue : ((No == ox_default_options.xsd_date) ? Qfalse : Qnil));
326
+ rb_hash_aset(opts,
327
+ symbolize_keys_sym,
328
+ (Yes == ox_default_options.sym_keys) ? Qtrue : ((No == ox_default_options.sym_keys) ? Qfalse : Qnil));
329
+ rb_hash_aset(opts, attr_key_mod_sym, ox_default_options.attr_key_mod);
330
+ rb_hash_aset(opts, element_key_mod_sym, ox_default_options.element_key_mod);
331
+ rb_hash_aset(opts,
332
+ smart_sym,
333
+ (Yes == ox_default_options.smart) ? Qtrue : ((No == ox_default_options.smart) ? Qfalse : Qnil));
334
+ rb_hash_aset(opts, convert_special_sym, (ox_default_options.convert_special) ? Qtrue : Qfalse);
335
+ rb_hash_aset(opts, no_empty_sym, (ox_default_options.no_empty) ? Qtrue : Qfalse);
336
+ rb_hash_aset(opts, with_cdata_sym, (ox_default_options.with_cdata) ? Qtrue : Qfalse);
337
+ switch (ox_default_options.mode) {
338
+ case ObjMode: rb_hash_aset(opts, mode_sym, object_sym); break;
339
+ case GenMode: rb_hash_aset(opts, mode_sym, generic_sym); break;
340
+ case LimMode: rb_hash_aset(opts, mode_sym, limited_sym); break;
341
+ case HashMode: rb_hash_aset(opts, mode_sym, hash_sym); break;
342
+ case HashNoAttrMode: rb_hash_aset(opts, mode_sym, hash_no_attrs_sym); break;
343
+ case NoMode:
344
+ default: rb_hash_aset(opts, mode_sym, Qnil); break;
345
+ }
346
+ switch (ox_default_options.effort) {
347
+ case StrictEffort: rb_hash_aset(opts, effort_sym, strict_sym); break;
348
+ case TolerantEffort: rb_hash_aset(opts, effort_sym, tolerant_sym); break;
349
+ case AutoEffort: rb_hash_aset(opts, effort_sym, auto_define_sym); break;
350
+ case NoEffort:
351
+ default: rb_hash_aset(opts, effort_sym, Qnil); break;
352
+ }
353
+ switch (ox_default_options.skip) {
354
+ case OffSkip: rb_hash_aset(opts, skip_sym, skip_off_sym); break;
355
+ case NoSkip: rb_hash_aset(opts, skip_sym, skip_none_sym); break;
356
+ case CrSkip: rb_hash_aset(opts, skip_sym, skip_return_sym); break;
357
+ case SpcSkip: rb_hash_aset(opts, skip_sym, skip_white_sym); break;
358
+ default: rb_hash_aset(opts, skip_sym, Qnil); break;
359
+ }
360
+ if (Yes == ox_default_options.allow_invalid) {
361
+ rb_hash_aset(opts, invalid_replace_sym, Qnil);
362
+ } else {
363
+ rb_hash_aset(opts,
364
+ invalid_replace_sym,
365
+ rb_str_new(ox_default_options.inv_repl + 1, (int)*ox_default_options.inv_repl));
366
+ }
367
+ if ('\0' == *ox_default_options.strip_ns) {
368
+ rb_hash_aset(opts, strip_namespace_sym, Qfalse);
369
+ } else if ('*' == *ox_default_options.strip_ns && '\0' == ox_default_options.strip_ns[1]) {
370
+ rb_hash_aset(opts, strip_namespace_sym, Qtrue);
371
+ } else {
372
+ rb_hash_aset(opts,
373
+ strip_namespace_sym,
374
+ rb_str_new(ox_default_options.strip_ns, strlen(ox_default_options.strip_ns)));
375
+ }
376
+ if (NULL == ox_default_options.html_hints) {
377
+ // rb_hash_aset(opts, overlay_sym, hints_to_overlay(ox_hints_html()));
378
+ rb_hash_aset(opts, overlay_sym, Qnil);
379
+ } else {
380
+ rb_hash_aset(opts, overlay_sym, hints_to_overlay(ox_default_options.html_hints));
381
+ }
382
+ return opts;
383
+ }
384
+
385
+ static int set_overlay(VALUE key, VALUE value, VALUE ctx) {
386
+ Hints hints = (Hints)ctx;
387
+ Hint hint;
388
+
389
+ if (NULL != (hint = ox_hint_find(hints, StringValuePtr(key)))) {
390
+ if (active_sym == value) {
391
+ hint->overlay = ActiveOverlay;
392
+ } else if (inactive_sym == value) {
393
+ hint->overlay = InactiveOverlay;
394
+ } else if (block_sym == value) {
395
+ hint->overlay = BlockOverlay;
396
+ } else if (nest_ok_sym == value) {
397
+ hint->overlay = NestOverlay;
398
+ } else if (off_sym == value) {
399
+ hint->overlay = OffOverlay;
400
+ } else if (abort_sym == value) {
401
+ hint->overlay = AbortOverlay;
402
+ }
403
+ }
404
+ return ST_CONTINUE;
405
+ }
406
+
407
+ /* call-seq: sax_html_overlay() => Hash
408
+ *
409
+ * Returns an overlay hash that can be modified and used as an overlay in the
410
+ * default options or in the sax_html() function call. Values for the keys are:
411
+ * - _:active_ - make the normal callback for the element
412
+ * - _:nest_ok_ - active but ignore nest check
413
+ * - _:inactive_ - do not make the element start, end, or attribute callbacks for this element only
414
+ * - _:block_ - block this and all children callbacks
415
+ * - _:off_ - block this element and it's children unless the child element is active
416
+ * - _:abort_ - abort the html processing and return
417
+ *
418
+ * *return* [Hash] default SAX HTML settings
419
+ */
420
+ static VALUE sax_html_overlay(VALUE self) {
421
+ return hints_to_overlay(ox_hints_html());
422
+ }
423
+
424
+ /* call-seq: default_options=(opts)
425
+ *
426
+ * Sets the default options for load and dump.
427
+ * - +opts+ [Hash] opts options to change
428
+ * - _:margin_ [String] left margin to inset when dumping
429
+ * - _:indent_ [Fixnum] number of spaces to indent each element in an XML document
430
+ * - _:trace_ [Fixnum] trace level where 0 is silent
431
+ * - _:encoding_ [String] character encoding for the XML file
432
+ * - _:with_dtd_ [true|false|nil] include DTD in the dump
433
+ * - _:with_instruct_ [true|false|nil] include instructions in the dump
434
+ * - _:with_xml_ [true|false|nil] include XML prolog in the dump
435
+ * - _:circular_ [true|false|nil] support circular references while dumping
436
+ * - _:xsd_date_ [true|false|nil] use XSD date format instead of decimal format
437
+ * - _:mode_ [:object|:generic|:limited|:hash|:hash_no_attrs|nil] load method to use for XML
438
+ * - _:effort_ [:strict|:tolerant|:auto_define] set the tolerance level for loading
439
+ * - _:symbolize_keys_ [true|false|nil] symbolize element attribute keys or leave as Strings
440
+ * - _:element_key_mod_ [Proc|nil] converts element keys on parse if not nil
441
+ * - _:attr_key_mod_ [Proc|nil] converts attribute keys on parse if not nil
442
+ * - _:skip_ [:skip_none|:skip_return|:skip_white|:skip_off] determines how to handle white space in text
443
+ * - _:smart_ [true|false|nil] flag indicating the SAX parser uses hints if available (use with html)
444
+ * - _:invalid_replace_ [nil|String] replacement string for invalid XML characters on dump. nil indicates include
445
+ * anyway as hex. A string, limited to 10 characters will replace the invalid character with the replace.
446
+ * - _:strip_namespace_ [nil|String|true|false] "" or false result in no namespace stripping. A string of "*" or true
447
+ * will strip all namespaces. Any other non-empty string indicates that matching namespaces will be stripped.
448
+ * - _:with_cdata_ [true|false] includes cdata in hash_load results
449
+ * - _:overlay_ [Hash] a Hash of keys that match html element names and values that are one of
450
+ * - _:active_ - make the normal callback for the element
451
+ * - _:nest_ok_ - active but ignore nest check
452
+ * - _:inactive_ - do not make the element start, end, or attribute callbacks for this element only
453
+ * - _:block_ - block this and all children callbacks
454
+ * - _:off_ - block this element and it's children unless the child element is active
455
+ * - _:abort_ - abort the html processing and return
456
+ *
457
+ * *return* [nil]
458
+ */
459
+ static VALUE set_def_opts(VALUE self, VALUE opts) {
460
+ struct _yesNoOpt ynos[] = {{with_xml_sym, &ox_default_options.with_xml},
461
+ {with_dtd_sym, &ox_default_options.with_dtd},
462
+ {with_instruct_sym, &ox_default_options.with_instruct},
463
+ {xsd_date_sym, &ox_default_options.xsd_date},
464
+ {circular_sym, &ox_default_options.circular},
465
+ {symbolize_keys_sym, &ox_default_options.sym_keys},
466
+ {smart_sym, &ox_default_options.smart},
467
+ {Qnil, 0}};
468
+ YesNoOpt o;
469
+ VALUE v;
470
+
471
+ Check_Type(opts, T_HASH);
472
+
473
+ v = rb_hash_aref(opts, ox_encoding_sym);
474
+ if (Qnil == v) {
475
+ *ox_default_options.encoding = '\0';
476
+ } else {
477
+ Check_Type(v, T_STRING);
478
+ strncpy(ox_default_options.encoding, StringValuePtr(v), sizeof(ox_default_options.encoding) - 1);
479
+ ox_default_options.rb_enc = rb_enc_find(ox_default_options.encoding);
480
+ }
481
+
482
+ v = rb_hash_aref(opts, ox_indent_sym);
483
+ if (Qnil != v) {
484
+ Check_Type(v, T_FIXNUM);
485
+ ox_default_options.indent = FIX2INT(v);
486
+ }
487
+
488
+ v = rb_hash_aref(opts, trace_sym);
489
+ if (Qnil != v) {
490
+ Check_Type(v, T_FIXNUM);
491
+ ox_default_options.trace = FIX2INT(v);
492
+ }
493
+
494
+ v = rb_hash_aref(opts, mode_sym);
495
+ if (Qnil == v) {
496
+ ox_default_options.mode = NoMode;
497
+ } else if (object_sym == v) {
498
+ ox_default_options.mode = ObjMode;
499
+ } else if (generic_sym == v) {
500
+ ox_default_options.mode = GenMode;
501
+ } else if (limited_sym == v) {
502
+ ox_default_options.mode = LimMode;
503
+ } else if (hash_sym == v) {
504
+ ox_default_options.mode = HashMode;
505
+ } else if (hash_no_attrs_sym == v) {
506
+ ox_default_options.mode = HashNoAttrMode;
507
+ } else {
508
+ rb_raise(ox_parse_error_class, ":mode must be :object, :generic, :limited, :hash, :hash_no_attrs, or nil.\n");
509
+ }
510
+
511
+ v = rb_hash_aref(opts, effort_sym);
512
+ if (Qnil == v) {
513
+ ox_default_options.effort = NoEffort;
514
+ } else if (strict_sym == v) {
515
+ ox_default_options.effort = StrictEffort;
516
+ } else if (tolerant_sym == v) {
517
+ ox_default_options.effort = TolerantEffort;
518
+ } else if (auto_define_sym == v) {
519
+ ox_default_options.effort = AutoEffort;
520
+ } else {
521
+ rb_raise(ox_parse_error_class, ":effort must be :strict, :tolerant, :auto_define, or nil.\n");
522
+ }
523
+
524
+ v = rb_hash_aref(opts, skip_sym);
525
+ if (Qnil == v) {
526
+ ox_default_options.skip = NoSkip;
527
+ } else if (skip_off_sym == v) {
528
+ ox_default_options.skip = OffSkip;
529
+ } else if (skip_none_sym == v) {
530
+ ox_default_options.skip = NoSkip;
531
+ } else if (skip_return_sym == v) {
532
+ ox_default_options.skip = CrSkip;
533
+ } else if (skip_white_sym == v) {
534
+ ox_default_options.skip = SpcSkip;
535
+ } else {
536
+ rb_raise(ox_parse_error_class, ":skip must be :skip_none, :skip_return, :skip_white, :skip_off, or nil.\n");
537
+ }
538
+
539
+ v = rb_hash_lookup(opts, convert_special_sym);
540
+ if (Qnil == v) {
541
+ // no change
542
+ } else if (Qtrue == v) {
543
+ ox_default_options.convert_special = 1;
544
+ } else if (Qfalse == v) {
545
+ ox_default_options.convert_special = 0;
546
+ } else {
547
+ rb_raise(ox_parse_error_class, ":convert_special must be true or false.\n");
548
+ }
549
+
550
+ v = rb_hash_lookup(opts, no_empty_sym);
551
+ if (Qnil == v) {
552
+ // no change
553
+ } else if (Qtrue == v) {
554
+ ox_default_options.no_empty = 1;
555
+ } else if (Qfalse == v) {
556
+ ox_default_options.no_empty = 0;
557
+ } else {
558
+ rb_raise(ox_parse_error_class, ":no_empty must be true or false.\n");
559
+ }
560
+
561
+ v = rb_hash_aref(opts, invalid_replace_sym);
562
+ if (Qnil == v) {
563
+ ox_default_options.allow_invalid = Yes;
564
+ } else {
565
+ long slen;
566
+
567
+ Check_Type(v, T_STRING);
568
+ slen = RSTRING_LEN(v);
569
+ if (sizeof(ox_default_options.inv_repl) - 2 < (size_t)slen) {
570
+ rb_raise(ox_parse_error_class,
571
+ ":invalid_replace can be no longer than %d characters.",
572
+ (int)sizeof(ox_default_options.inv_repl) - 2);
573
+ }
574
+ strncpy(ox_default_options.inv_repl + 1, StringValuePtr(v), sizeof(ox_default_options.inv_repl) - 1);
575
+ ox_default_options.inv_repl[sizeof(ox_default_options.inv_repl) - 1] = '\0';
576
+ *ox_default_options.inv_repl = (char)slen;
577
+ ox_default_options.allow_invalid = No;
578
+ }
579
+
580
+ v = rb_hash_aref(opts, strip_namespace_sym);
581
+ if (Qfalse == v) {
582
+ *ox_default_options.strip_ns = '\0';
583
+ } else if (Qtrue == v) {
584
+ *ox_default_options.strip_ns = '*';
585
+ ox_default_options.strip_ns[1] = '\0';
586
+ } else if (Qnil != v) {
587
+ long slen;
588
+
589
+ Check_Type(v, T_STRING);
590
+ slen = RSTRING_LEN(v);
591
+ if (sizeof(ox_default_options.strip_ns) - 1 < (size_t)slen) {
592
+ rb_raise(ox_parse_error_class,
593
+ ":strip_namespace can be no longer than %d characters.",
594
+ (int)sizeof(ox_default_options.strip_ns) - 1);
595
+ }
596
+ strncpy(ox_default_options.strip_ns, StringValuePtr(v), sizeof(ox_default_options.strip_ns) - 1);
597
+ ox_default_options.strip_ns[sizeof(ox_default_options.strip_ns) - 1] = '\0';
598
+ }
599
+
600
+ v = rb_hash_aref(opts, margin_sym);
601
+ if (Qnil != v) {
602
+ long slen;
603
+
604
+ Check_Type(v, T_STRING);
605
+ slen = RSTRING_LEN(v);
606
+ if (sizeof(ox_default_options.margin) - 1 < (size_t)slen) {
607
+ rb_raise(ox_parse_error_class,
608
+ ":margin can be no longer than %d characters.",
609
+ (int)sizeof(ox_default_options.margin) - 1);
610
+ }
611
+ strncpy(ox_default_options.margin, StringValuePtr(v), sizeof(ox_default_options.margin) - 1);
612
+ ox_default_options.margin[sizeof(ox_default_options.margin) - 1] = '\0';
613
+ ox_default_options.margin_len = strlen(ox_default_options.margin);
614
+ }
615
+
616
+ for (o = ynos; 0 != o->attr; o++) {
617
+ v = rb_hash_lookup(opts, o->sym);
618
+ if (Qnil == v) {
619
+ *o->attr = NotSet;
620
+ } else if (Qtrue == v) {
621
+ *o->attr = Yes;
622
+ } else if (Qfalse == v) {
623
+ *o->attr = No;
624
+ } else {
625
+ rb_raise(ox_parse_error_class, "%s must be true or false.\n", rb_id2name(SYM2ID(o->sym)));
626
+ }
627
+ }
628
+ v = rb_hash_aref(opts, overlay_sym);
629
+ if (Qnil == v) {
630
+ ox_hints_destroy(ox_default_options.html_hints);
631
+ ox_default_options.html_hints = NULL;
632
+ } else {
633
+ int cnt;
634
+
635
+ Check_Type(v, T_HASH);
636
+ cnt = (int)RHASH_SIZE(v);
637
+ if (0 == cnt) {
638
+ ox_hints_destroy(ox_default_options.html_hints);
639
+ ox_default_options.html_hints = NULL;
640
+ } else {
641
+ ox_hints_destroy(ox_default_options.html_hints);
642
+ ox_default_options.html_hints = ox_hints_dup(ox_hints_html());
643
+ rb_hash_foreach(v, set_overlay, (VALUE)ox_default_options.html_hints);
644
+ }
645
+ }
646
+ if (Qnil != (v = rb_hash_lookup(opts, with_cdata_sym))) {
647
+ ox_default_options.with_cdata = (Qtrue == v);
648
+ }
649
+
650
+ ox_default_options.element_key_mod = rb_hash_lookup2(opts, element_key_mod_sym, ox_default_options.element_key_mod);
651
+ ox_default_options.attr_key_mod = rb_hash_lookup2(opts, attr_key_mod_sym, ox_default_options.attr_key_mod);
652
+
653
+ return Qnil;
654
+ }
655
+
656
+ /* call-seq: parse_obj(xml) => Object
657
+ *
658
+ * Parses an XML document String that is in the object format and returns an
659
+ * Object of the type represented by the XML. This function expects an
660
+ * optimized XML formated String. For other formats use the more generic
661
+ * Ox.load() method. Raises an exception if the XML is malformed or the
662
+ * classes specified in the file are not valid.
663
+ * - +xml+ [String] XML String in optimized Object format.
664
+ * *return* [Object] deserialized Object.
665
+ */
666
+ static VALUE to_obj(VALUE self, VALUE ruby_xml) {
667
+ char *xml, *x;
668
+ size_t len;
669
+ VALUE obj;
670
+ struct _options options = ox_default_options;
671
+ struct _err err;
672
+
673
+ err_init(&err);
674
+ Check_Type(ruby_xml, T_STRING);
675
+ /* the xml string gets modified so make a copy of it */
676
+ len = RSTRING_LEN(ruby_xml) + 1;
677
+ x = defuse_bom(StringValuePtr(ruby_xml), &options);
678
+ if (SMALL_XML < len) {
679
+ xml = ALLOC_N(char, len);
680
+ } else {
681
+ xml = ALLOCA_N(char, len);
682
+ }
683
+ memcpy(xml, x, len);
684
+ rb_gc_disable();
685
+ obj = ox_parse(xml, len - 1, ox_obj_callbacks, 0, &options, &err);
686
+ if (SMALL_XML < len) {
687
+ xfree(xml);
688
+ }
689
+ RB_GC_GUARD(obj);
690
+ rb_gc_enable();
691
+ if (err_has(&err)) {
692
+ ox_err_raise(&err);
693
+ }
694
+ return obj;
695
+ }
696
+
697
+ /* call-seq: parse(xml) => Ox::Document or Ox::Element
698
+ *
699
+ * Parses and XML document String into an Ox::Document or Ox::Element.
700
+ * - +xml+ [String] xml XML String
701
+ * *return* [Ox::Document or Ox::Element] parsed XML document.
702
+ *
703
+ * _raise_ [Exception] if the XML is malformed.
704
+ */
705
+ static VALUE to_gen(VALUE self, VALUE ruby_xml) {
706
+ char *xml, *x;
707
+ size_t len;
708
+ VALUE obj;
709
+ struct _options options = ox_default_options;
710
+ struct _err err;
711
+
712
+ err_init(&err);
713
+ Check_Type(ruby_xml, T_STRING);
714
+ /* the xml string gets modified so make a copy of it */
715
+ len = RSTRING_LEN(ruby_xml) + 1;
716
+ x = defuse_bom(StringValuePtr(ruby_xml), &options);
717
+ if (SMALL_XML < len) {
718
+ xml = ALLOC_N(char, len);
719
+ } else {
720
+ xml = ALLOCA_N(char, len);
721
+ }
722
+ memcpy(xml, x, len);
723
+ obj = ox_parse(xml, len - 1, ox_gen_callbacks, 0, &options, &err);
724
+ if (SMALL_XML < len) {
725
+ xfree(xml);
726
+ }
727
+ if (err_has(&err)) {
728
+ ox_err_raise(&err);
729
+ }
730
+ return obj;
731
+ }
732
+
733
+ static int load_options_cb(VALUE k, VALUE v, VALUE opts) {
734
+ Options copts = (Options)opts;
735
+
736
+ if (mode_sym == k) {
737
+ if (object_sym == v) {
738
+ copts->mode = ObjMode;
739
+ } else if (optimized_sym == v) {
740
+ copts->mode = ObjMode;
741
+ } else if (generic_sym == v) {
742
+ copts->mode = GenMode;
743
+ } else if (limited_sym == v) {
744
+ copts->mode = LimMode;
745
+ } else if (hash_sym == v) {
746
+ copts->mode = HashMode;
747
+ } else if (hash_no_attrs_sym == v) {
748
+ copts->mode = HashNoAttrMode;
749
+ } else {
750
+ rb_raise(ox_parse_error_class, ":mode must be :generic, :object, :limited, :hash, :hash_no_attrs.\n");
751
+ }
752
+ } else if (effort_sym == k) {
753
+ if (auto_define_sym == v) {
754
+ copts->effort = AutoEffort;
755
+ } else if (tolerant_sym == v) {
756
+ copts->effort = TolerantEffort;
757
+ } else if (strict_sym == v) {
758
+ copts->effort = StrictEffort;
759
+ } else {
760
+ rb_raise(ox_parse_error_class, ":effort must be :strict, :tolerant, or :auto_define.\n");
761
+ }
762
+ } else if (skip_sym == k) {
763
+ if (skip_none_sym == v) {
764
+ copts->skip = NoSkip;
765
+ } else if (skip_off_sym == v) {
766
+ copts->skip = OffSkip;
767
+ } else if (skip_return_sym == v) {
768
+ copts->skip = CrSkip;
769
+ } else if (skip_white_sym == v) {
770
+ copts->skip = SpcSkip;
771
+ } else {
772
+ rb_raise(ox_parse_error_class, ":skip must be :skip_none, :skip_return, :skip_white, or :skip_off.\n");
773
+ }
774
+ } else if (trace_sym == k) {
775
+ Check_Type(v, T_FIXNUM);
776
+ copts->trace = FIX2INT(v);
777
+ } else if (symbolize_keys_sym == k) {
778
+ copts->sym_keys = (Qfalse == v) ? No : Yes;
779
+ } else if (element_key_mod_sym == k) {
780
+ copts->element_key_mod = v;
781
+ } else if (attr_key_mod_sym == k) {
782
+ copts->attr_key_mod = v;
783
+ } else if (convert_special_sym == k) {
784
+ copts->convert_special = (Qfalse != v);
785
+ } else if (no_empty_sym == k) {
786
+ copts->no_empty = (Qfalse != v);
787
+ } else if (invalid_replace_sym == k) {
788
+ if (Qnil == v) {
789
+ copts->allow_invalid = Yes;
790
+ } else {
791
+ long slen;
792
+
793
+ Check_Type(v, T_STRING);
794
+ slen = RSTRING_LEN(v);
795
+ if (sizeof(copts->inv_repl) - 2 < (size_t)slen) {
796
+ rb_raise(ox_parse_error_class,
797
+ ":invalid_replace can be no longer than %d characters.",
798
+ (int)sizeof(copts->inv_repl) - 2);
799
+ }
800
+ strncpy(copts->inv_repl + 1, StringValuePtr(v), sizeof(copts->inv_repl) - 1);
801
+ copts->inv_repl[sizeof(copts->inv_repl) - 1] = '\0';
802
+ *copts->inv_repl = (char)slen;
803
+ copts->allow_invalid = No;
804
+ }
805
+ } else if (strip_namespace_sym == k) {
806
+ if (Qfalse == v) {
807
+ *copts->strip_ns = '\0';
808
+ } else if (Qtrue == v) {
809
+ *copts->strip_ns = '*';
810
+ copts->strip_ns[1] = '\0';
811
+ } else if (Qnil != v) {
812
+ long slen;
813
+
814
+ Check_Type(v, T_STRING);
815
+ slen = RSTRING_LEN(v);
816
+ if (sizeof(copts->strip_ns) - 1 < (size_t)slen) {
817
+ rb_raise(ox_parse_error_class,
818
+ ":strip_namespace can be no longer than %d characters.",
819
+ (int)sizeof(copts->strip_ns) - 1);
820
+ }
821
+ strncpy(copts->strip_ns, StringValuePtr(v), sizeof(copts->strip_ns) - 1);
822
+ copts->strip_ns[sizeof(copts->strip_ns) - 1] = '\0';
823
+ }
824
+ } else if (margin_sym == k) {
825
+ long slen;
826
+
827
+ Check_Type(v, T_STRING);
828
+ slen = RSTRING_LEN(v);
829
+ if (sizeof(copts->margin) - 1 < (size_t)slen) {
830
+ rb_raise(ox_parse_error_class,
831
+ ":margin can be no longer than %d characters.",
832
+ (int)sizeof(copts->margin) - 1);
833
+ }
834
+ strncpy(copts->margin, StringValuePtr(v), sizeof(copts->margin) - 1);
835
+ copts->margin[sizeof(copts->margin) - 1] = '\0';
836
+ copts->margin_len = strlen(copts->margin);
837
+ } else if (with_cdata_sym == k) {
838
+ copts->with_cdata = (Qtrue == v);
839
+ }
840
+
841
+ return ST_CONTINUE;
842
+ }
843
+
844
+ static VALUE load(char *xml, size_t len, int argc, VALUE *argv, VALUE self, VALUE encoding, Err err) {
845
+ VALUE obj;
846
+ struct _options options = ox_default_options;
847
+
848
+ if (1 == argc && rb_cHash == rb_obj_class(*argv)) {
849
+ rb_hash_foreach(*argv, load_options_cb, (VALUE)&options);
850
+ }
851
+ if ('\0' == *options.encoding) {
852
+ if (Qnil != encoding) {
853
+ options.rb_enc = rb_enc_from_index(rb_enc_get_index(encoding));
854
+ } else {
855
+ options.rb_enc = 0;
856
+ }
857
+ } else if (0 == options.rb_enc) {
858
+ options.rb_enc = rb_enc_find(options.encoding);
859
+ }
860
+ xml = defuse_bom(xml, &options);
861
+ switch (options.mode) {
862
+ case ObjMode:
863
+ rb_gc_disable();
864
+ obj = ox_parse(xml, len, ox_obj_callbacks, 0, &options, err);
865
+ RB_GC_GUARD(obj);
866
+ rb_gc_enable();
867
+ break;
868
+ case GenMode: obj = ox_parse(xml, len, ox_gen_callbacks, 0, &options, err); break;
869
+ case LimMode: obj = ox_parse(xml, len, ox_limited_callbacks, 0, &options, err); break;
870
+ case HashMode:
871
+ if (options.with_cdata) {
872
+ obj = ox_parse(xml, len, ox_hash_cdata_callbacks, 0, &options, err);
873
+ } else {
874
+ obj = ox_parse(xml, len, ox_hash_callbacks, 0, &options, err);
875
+ }
876
+ break;
877
+ case HashNoAttrMode:
878
+ if (options.with_cdata) {
879
+ obj = ox_parse(xml, len, ox_hash_no_attrs_cdata_callbacks, 0, &options, err);
880
+ } else {
881
+ obj = ox_parse(xml, len, ox_hash_no_attrs_callbacks, 0, &options, err);
882
+ }
883
+ break;
884
+ case NoMode: obj = ox_parse(xml, len, ox_nomode_callbacks, 0, &options, err); break;
885
+ default: obj = ox_parse(xml, len, ox_gen_callbacks, 0, &options, err); break;
886
+ }
887
+ return obj;
888
+ }
889
+
890
+ /* call-seq: load(xml, options) => Ox::Document or Ox::Element or Object
891
+ *
892
+ * Parses and XML document String into an Ox::Document, or Ox::Element, or
893
+ * Object depending on the options. Raises an exception if the XML is malformed
894
+ * or the classes specified are not valid. If a block is given it will be called
895
+ * on the completion of each complete top level entity with that entity as it's
896
+ * only argument.
897
+ *
898
+ * - +xml+ [String] XML String
899
+ * - +options+ [Hash] load options
900
+ * - *:mode* [:object|:generic|:limited] format expected
901
+ * - _:object_ - object format
902
+ * - _:generic_ - read as a generic XML file
903
+ * - _:limited_ - read as a generic XML file but with callbacks on text and elements events only
904
+ * - _:hash_ - read and convert to a Hash and core class objects only
905
+ * - _:hash_no_attrs_ - read and convert to a Hash and core class objects only without capturing attributes
906
+ * - *:effort* [:strict|:tolerant|:auto_define] effort to use when an undefined class is encountered, default: :strict
907
+ * - _:strict_ - raise an NameError for missing classes and modules
908
+ * - _:tolerant_ - return nil for missing classes and modules
909
+ * - _:auto_define_ - auto define missing classes and modules
910
+ * - *:trace* [Fixnum] trace level as a Fixnum, default: 0 (silent)
911
+ * - *:symbolize_keys* [true|false|nil] symbolize element attribute keys or leave as Strings
912
+ * - *:invalid_replace* [nil|String] replacement string for invalid XML characters on dump. nil indicates include
913
+ * anyway as hex. A string, limited to 10 characters will replace the invalid character with the replace.
914
+ * - *:strip_namespace* [String|true|false] "" or false result in no namespace stripping. A string of "*" or true will
915
+ * strip all namespaces. Any other non-empty string indicates that matching namespaces will be stripped.
916
+ * - *:with_cdata* [true|false] if true cdata is included in hash_load output otherwise it is not.
917
+ */
918
+ static VALUE load_str(int argc, VALUE *argv, VALUE self) {
919
+ char *xml;
920
+ size_t len;
921
+ VALUE obj;
922
+ VALUE encoding;
923
+ struct _err err;
924
+
925
+ err_init(&err);
926
+ Check_Type(*argv, T_STRING);
927
+ /* the xml string gets modified so make a copy of it */
928
+ len = RSTRING_LEN(*argv) + 1;
929
+ if (SMALL_XML < len) {
930
+ xml = ALLOC_N(char, len);
931
+ } else {
932
+ xml = ALLOCA_N(char, len);
933
+ }
934
+ encoding = rb_obj_encoding(*argv);
935
+ memcpy(xml, StringValuePtr(*argv), len);
936
+ xml[len - 1] = '\0';
937
+ obj = load(xml, len - 1, argc - 1, argv + 1, self, encoding, &err);
938
+ if (SMALL_XML < len) {
939
+ xfree(xml);
940
+ }
941
+ if (err_has(&err)) {
942
+ ox_err_raise(&err);
943
+ }
944
+ return obj;
945
+ }
946
+
947
+ /* call-seq: load_file(file_path, options) => Ox::Document or Ox::Element or Object
948
+ *
949
+ * Parses and XML document from a file into an Ox::Document, or Ox::Element,
950
+ * or Object depending on the options. Raises an exception if the XML is
951
+ * malformed or the classes specified are not valid.
952
+ * - +file_path+ [String] file path to read the XML document from
953
+ * - +options+ [Hash] load options
954
+ * - *:mode* [:object|:generic|:limited] format expected
955
+ * - _:object_ - object format
956
+ * - _:generic_ - read as a generic XML file
957
+ * - _:limited_ - read as a generic XML file but with callbacks on text and elements events only
958
+ * - _:hash_ - read and convert to a Hash and core class objects only
959
+ * - _:hash_no_attrs_ - read and convert to a Hash and core class objects only without capturing attributes
960
+ * - *:effort* [:strict|:tolerant|:auto_define] effort to use when an undefined class is encountered, default: :strict
961
+ * - _:strict_ - raise an NameError for missing classes and modules
962
+ * - _:tolerant_ - return nil for missing classes and modules
963
+ * - _:auto_define_ - auto define missing classes and modules
964
+ * - *:trace* [Fixnum] trace level as a Fixnum, default: 0 (silent)
965
+ * - *:symbolize_keys* [true|false|nil] symbolize element attribute keys or leave as Strings
966
+ * - *:invalid_replace* [nil|String] replacement string for invalid XML characters on dump. nil indicates include
967
+ * anyway as hex. A string, limited to 10 characters will replace the invalid character with the replace.
968
+ * - *:strip_namespace* [String|true|false] "" or false result in no namespace stripping. A string of "*" or true will
969
+ * strip all namespaces. Any other non-empty string indicates that matching namespaces will be stripped.
970
+ */
971
+ static VALUE load_file(int argc, VALUE *argv, VALUE self) {
972
+ char *path;
973
+ char *xml;
974
+ FILE *f;
975
+ off_t len;
976
+ VALUE obj;
977
+ struct _err err;
978
+
979
+ err_init(&err);
980
+ Check_Type(*argv, T_STRING);
981
+ path = StringValuePtr(*argv);
982
+ if (0 == (f = fopen(path, "r"))) {
983
+ rb_raise(rb_eIOError, "%s\n", strerror(errno));
984
+ }
985
+ fseek(f, 0, SEEK_END);
986
+ len = ftello(f);
987
+ if (SMALL_XML < len) {
988
+ xml = ALLOC_N(char, len + 1);
989
+ } else {
990
+ xml = ALLOCA_N(char, len + 1);
991
+ }
992
+ fseek(f, 0, SEEK_SET);
993
+ if ((size_t)len != fread(xml, 1, len, f)) {
994
+ ox_err_set(&err, rb_eLoadError, "Failed to read %ld bytes from %s.\n", (long)len, path);
995
+ obj = Qnil;
996
+ } else {
997
+ xml[len] = '\0';
998
+ obj = load(xml, len, argc - 1, argv + 1, self, Qnil, &err);
999
+ }
1000
+ fclose(f);
1001
+ if (SMALL_XML < len) {
1002
+ xfree(xml);
1003
+ }
1004
+ if (err_has(&err)) {
1005
+ ox_err_raise(&err);
1006
+ }
1007
+ return obj;
1008
+ }
1009
+
1010
+ /* call-seq: sax_parse(handler, io, options)
1011
+ *
1012
+ * Parses an IO stream or file containing an XML document. Raises an exception
1013
+ * if the XML is malformed or the classes specified are not valid.
1014
+ * - +handler+ [Ox::Sax] SAX (responds to OX::Sax methods) like handler
1015
+ * - +io+ [IO|String] IO Object to read from
1016
+ * - +options+ [Hash] options parse options
1017
+ * - *:convert_special* [true|false] flag indicating special characters like &lt; are converted
1018
+ * - *:symbolize* [true|false] flag indicating the parser symbolize element and attribute names
1019
+ * - *:smart* [true|false] flag indicating the parser uses hints if available (use with html)
1020
+ * - *:skip* [:skip_none|:skip_return|:skip_white|:skip_off] flag indicating the parser skips \\r or collpase white
1021
+ * space into a single space. Default (skip space)
1022
+ * - *:strip_namespace* [nil|String|true|false] "" or false result in no namespace stripping. A string of "*" or true
1023
+ * will strip all namespaces. Any other non-empty string indicates that matching namespaces will be stripped.
1024
+ */
1025
+ static VALUE sax_parse(int argc, VALUE *argv, VALUE self) {
1026
+ struct _saxOptions options;
1027
+
1028
+ options.symbolize = (No != ox_default_options.sym_keys);
1029
+ options.convert_special = ox_default_options.convert_special;
1030
+ options.smart = (Yes == ox_default_options.smart);
1031
+ options.skip = ox_default_options.skip;
1032
+ options.hints = NULL;
1033
+ strcpy(options.strip_ns, ox_default_options.strip_ns);
1034
+
1035
+ if (argc < 2) {
1036
+ rb_raise(ox_parse_error_class, "Wrong number of arguments to sax_parse.\n");
1037
+ }
1038
+ if (3 <= argc && rb_cHash == rb_obj_class(argv[2])) {
1039
+ VALUE h = argv[2];
1040
+ VALUE v;
1041
+
1042
+ if (Qnil != (v = rb_hash_lookup(h, convert_special_sym))) {
1043
+ options.convert_special = (Qtrue == v);
1044
+ }
1045
+ if (Qnil != (v = rb_hash_lookup(h, smart_sym))) {
1046
+ options.smart = (Qtrue == v);
1047
+ }
1048
+ if (Qnil != (v = rb_hash_lookup(h, symbolize_sym))) {
1049
+ options.symbolize = (Qtrue == v);
1050
+ }
1051
+ if (Qnil != (v = rb_hash_lookup(h, skip_sym))) {
1052
+ if (skip_return_sym == v) {
1053
+ options.skip = CrSkip;
1054
+ } else if (skip_white_sym == v) {
1055
+ options.skip = SpcSkip;
1056
+ } else if (skip_none_sym == v) {
1057
+ options.skip = NoSkip;
1058
+ } else if (skip_off_sym == v) {
1059
+ options.skip = OffSkip;
1060
+ }
1061
+ }
1062
+ if (Qnil != (v = rb_hash_lookup(h, strip_namespace_sym))) {
1063
+ if (Qfalse == v) {
1064
+ *options.strip_ns = '\0';
1065
+ } else if (Qtrue == v) {
1066
+ *options.strip_ns = '*';
1067
+ options.strip_ns[1] = '\0';
1068
+ } else {
1069
+ long slen;
1070
+
1071
+ Check_Type(v, T_STRING);
1072
+ slen = RSTRING_LEN(v);
1073
+ if (sizeof(options.strip_ns) - 1 < (size_t)slen) {
1074
+ rb_raise(ox_parse_error_class,
1075
+ ":strip_namespace can be no longer than %d characters.",
1076
+ (int)sizeof(options.strip_ns) - 1);
1077
+ }
1078
+ strncpy(options.strip_ns, StringValuePtr(v), sizeof(options.strip_ns) - 1);
1079
+ options.strip_ns[sizeof(options.strip_ns) - 1] = '\0';
1080
+ }
1081
+ }
1082
+ }
1083
+ ox_sax_parse(argv[0], argv[1], &options);
1084
+
1085
+ return Qnil;
1086
+ }
1087
+
1088
+ /* call-seq: sax_html(handler, io, options)
1089
+ *
1090
+ * Parses an IO stream or file containing an XML document. Raises an exception
1091
+ * if the XML is malformed or the classes specified are not valid.
1092
+ * - +handler+ [Ox::Sax] SAX (responds to OX::Sax methods) like handler
1093
+ * - +io+ [IO|String] IO Object to read from
1094
+ * - +options+ [Hash] options parse options
1095
+ * - *:convert_special* [true|false] flag indicating special characters like &lt; are converted
1096
+ * - *:symbolize* [true|false] flag indicating the parser symbolize element and attribute names
1097
+ * - *:skip* [:skip_none|:skip_return|:skip_white|:skip_off] flag indicating the parser skips \\r or collapse white
1098
+ * space into a single space. Default (skip space)
1099
+ * - *:overlay* [Hash] a Hash of keys that match html element names and values that are one of
1100
+ * - _:active_ - make the normal callback for the element
1101
+ * - _:nest_ok_ - active but ignore nest check
1102
+ * - _:inactive_ - do not make the element start, end, or attribute callbacks for this element only
1103
+ * - _:block_ - block this and all children callbacks
1104
+ * - _:off_ - block this element and it's children unless the child element is active
1105
+ * - _:abort_ - abort the html processing and return
1106
+ */
1107
+ static VALUE sax_html(int argc, VALUE *argv, VALUE self) {
1108
+ struct _saxOptions options;
1109
+ bool free_hints = false;
1110
+
1111
+ options.symbolize = (No != ox_default_options.sym_keys);
1112
+ options.convert_special = ox_default_options.convert_special;
1113
+ options.smart = true;
1114
+ options.skip = ox_default_options.skip;
1115
+ options.hints = ox_default_options.html_hints;
1116
+ if (NULL == options.hints) {
1117
+ options.hints = ox_hints_html();
1118
+ }
1119
+ *options.strip_ns = '\0';
1120
+
1121
+ if (argc < 2) {
1122
+ rb_raise(ox_parse_error_class, "Wrong number of arguments to sax_html.\n");
1123
+ }
1124
+ if (3 <= argc && rb_cHash == rb_obj_class(argv[2])) {
1125
+ volatile VALUE h = argv[2];
1126
+ volatile VALUE v;
1127
+
1128
+ if (Qnil != (v = rb_hash_lookup(h, convert_special_sym))) {
1129
+ options.convert_special = (Qtrue == v);
1130
+ }
1131
+ if (Qnil != (v = rb_hash_lookup(h, symbolize_sym))) {
1132
+ options.symbolize = (Qtrue == v);
1133
+ }
1134
+ if (Qnil != (v = rb_hash_lookup(h, skip_sym))) {
1135
+ if (skip_return_sym == v) {
1136
+ options.skip = CrSkip;
1137
+ } else if (skip_white_sym == v) {
1138
+ options.skip = SpcSkip;
1139
+ } else if (skip_none_sym == v) {
1140
+ options.skip = NoSkip;
1141
+ } else if (skip_off_sym == v) {
1142
+ options.skip = OffSkip;
1143
+ }
1144
+ }
1145
+ if (Qnil != (v = rb_hash_lookup(h, overlay_sym))) {
1146
+ int cnt;
1147
+
1148
+ Check_Type(v, T_HASH);
1149
+ cnt = (int)RHASH_SIZE(v);
1150
+ if (0 == cnt) {
1151
+ options.hints = ox_hints_html();
1152
+ } else {
1153
+ options.hints = ox_hints_dup(options.hints);
1154
+ free_hints = true;
1155
+ rb_hash_foreach(v, set_overlay, (VALUE)options.hints);
1156
+ }
1157
+ }
1158
+ }
1159
+ ox_sax_parse(argv[0], argv[1], &options);
1160
+ if (free_hints) {
1161
+ ox_hints_destroy(options.hints);
1162
+ }
1163
+ return Qnil;
1164
+ }
1165
+
1166
+ static void parse_dump_options(VALUE ropts, Options copts) {
1167
+ struct _yesNoOpt ynos[] = {{with_xml_sym, &copts->with_xml},
1168
+ {with_dtd_sym, &copts->with_dtd},
1169
+ {with_instruct_sym, &copts->with_instruct},
1170
+ {xsd_date_sym, &copts->xsd_date},
1171
+ {circular_sym, &copts->circular},
1172
+ {Qnil, 0}};
1173
+ YesNoOpt o;
1174
+
1175
+ if (rb_cHash == rb_obj_class(ropts)) {
1176
+ VALUE v;
1177
+
1178
+ if (Qnil != (v = rb_hash_lookup(ropts, ox_indent_sym))) {
1179
+ if (rb_cInteger != rb_obj_class(v) && T_FIXNUM != rb_type(v)) {
1180
+ rb_raise(ox_parse_error_class, ":indent must be a Fixnum.\n");
1181
+ }
1182
+ copts->indent = NUM2INT(v);
1183
+ }
1184
+ if (Qnil != (v = rb_hash_lookup(ropts, trace_sym))) {
1185
+ if (rb_cInteger != rb_obj_class(v) && T_FIXNUM != rb_type(v)) {
1186
+ rb_raise(ox_parse_error_class, ":trace must be a Fixnum.\n");
1187
+ }
1188
+ copts->trace = NUM2INT(v);
1189
+ }
1190
+ if (Qnil != (v = rb_hash_lookup(ropts, ox_encoding_sym))) {
1191
+ if (rb_cString != rb_obj_class(v)) {
1192
+ rb_raise(ox_parse_error_class, ":encoding must be a String.\n");
1193
+ }
1194
+ strncpy(copts->encoding, StringValuePtr(v), sizeof(copts->encoding) - 1);
1195
+ }
1196
+ if (Qnil != (v = rb_hash_lookup(ropts, no_empty_sym))) {
1197
+ copts->no_empty = (v == Qtrue);
1198
+ }
1199
+ if (Qnil != (v = rb_hash_lookup(ropts, effort_sym))) {
1200
+ if (auto_define_sym == v) {
1201
+ copts->effort = AutoEffort;
1202
+ } else if (tolerant_sym == v) {
1203
+ copts->effort = TolerantEffort;
1204
+ } else if (strict_sym == v) {
1205
+ copts->effort = StrictEffort;
1206
+ } else {
1207
+ rb_raise(ox_parse_error_class, ":effort must be :strict, :tolerant, or :auto_define.\n");
1208
+ }
1209
+ }
1210
+ v = rb_hash_lookup(ropts, invalid_replace_sym);
1211
+ if (Qnil == v) {
1212
+ if (Qtrue == rb_funcall(ropts, has_key_id, 1, invalid_replace_sym)) {
1213
+ copts->allow_invalid = Yes;
1214
+ }
1215
+ } else {
1216
+ long slen;
1217
+
1218
+ Check_Type(v, T_STRING);
1219
+ slen = RSTRING_LEN(v);
1220
+ if (sizeof(copts->inv_repl) - 2 < (size_t)slen) {
1221
+ rb_raise(ox_parse_error_class,
1222
+ ":invalid_replace can be no longer than %d characters.",
1223
+ (int)sizeof(copts->inv_repl) - 2);
1224
+ }
1225
+ strncpy(copts->inv_repl + 1, StringValuePtr(v), sizeof(copts->inv_repl) - 1);
1226
+ copts->inv_repl[sizeof(copts->inv_repl) - 1] = '\0';
1227
+ *copts->inv_repl = (char)slen;
1228
+ copts->allow_invalid = No;
1229
+ }
1230
+ v = rb_hash_lookup(ropts, margin_sym);
1231
+ if (Qnil != v) {
1232
+ long slen;
1233
+
1234
+ Check_Type(v, T_STRING);
1235
+ slen = RSTRING_LEN(v);
1236
+ if (sizeof(copts->margin) - 2 < (size_t)slen) {
1237
+ rb_raise(ox_parse_error_class,
1238
+ ":margin can be no longer than %d characters.",
1239
+ (int)sizeof(copts->margin) - 2);
1240
+ }
1241
+ strncpy(copts->margin, StringValuePtr(v), sizeof(copts->margin) - 1);
1242
+ copts->margin[sizeof(copts->margin) - 1] = '\0';
1243
+ copts->margin_len = (char)slen;
1244
+ }
1245
+
1246
+ for (o = ynos; 0 != o->attr; o++) {
1247
+ if (Qnil != (v = rb_hash_lookup(ropts, o->sym))) {
1248
+ VALUE c = rb_obj_class(v);
1249
+
1250
+ if (rb_cTrueClass == c) {
1251
+ *o->attr = Yes;
1252
+ } else if (rb_cFalseClass == c) {
1253
+ *o->attr = No;
1254
+ } else {
1255
+ rb_raise(ox_parse_error_class, "%s must be true or false.\n", rb_id2name(SYM2ID(o->sym)));
1256
+ }
1257
+ }
1258
+ }
1259
+ }
1260
+ }
1261
+
1262
+ /* call-seq: dump(obj, options) => xml-string
1263
+ *
1264
+ * Dumps an Object (obj) to a string.
1265
+ * - +obj+ [Object] Object to serialize as an XML document String
1266
+ * - +options+ [Hash] formating options
1267
+ * - *:indent* [Fixnum] format expected
1268
+ * - *:no_empty* [true|false] if true don't output empty elements
1269
+ * - *:xsd_date* [true|false] use XSD date format if true, default: false
1270
+ * - *:circular* [true|false] allow circular references, default: false
1271
+ * - *:strict|:tolerant]* [ :effort effort to use when an undumpable object (e.g., IO) is encountered, default:
1272
+ * :strict
1273
+ * - _:strict_ - raise an NotImplementedError if an undumpable object is encountered
1274
+ * - _:tolerant_ - replaces undumplable objects with nil
1275
+ * - *:with_dtd* [true|false|nil] include DTD in the dump
1276
+ * - *:with_instruct* [true|false|nil] include instructions in the dump
1277
+ * - *:with_xml* [true|false|nil] include XML prolog in the dump
1278
+ *
1279
+ * Note that an indent of less than zero will result in a tight one line output
1280
+ * unless the text in the XML fields contain new line characters.
1281
+ */
1282
+ static VALUE dump(int argc, VALUE *argv, VALUE self) {
1283
+ char *xml;
1284
+ struct _options copts = ox_default_options;
1285
+ VALUE rstr;
1286
+
1287
+ if (2 == argc) {
1288
+ parse_dump_options(argv[1], &copts);
1289
+ }
1290
+ if (0 == (xml = ox_write_obj_to_str(*argv, &copts))) {
1291
+ rb_raise(rb_eNoMemError, "Not enough memory.\n");
1292
+ }
1293
+ rstr = rb_str_new2(xml);
1294
+ if ('\0' != *copts.encoding) {
1295
+ rb_enc_associate(rstr, rb_enc_find(copts.encoding));
1296
+ }
1297
+ xfree(xml);
1298
+
1299
+ return rstr;
1300
+ }
1301
+
1302
+ /* call-seq: to_xml(obj, options) => xml-string
1303
+ *
1304
+ * Dumps an Object (obj) to a string.
1305
+ * - +obj+ [Object] Object to serialize as an XML document String
1306
+ * - +options+ [Hash] formating options
1307
+ * - *:indent* [Fixnum] format expected
1308
+ * - *:no_empty* [true|false] if true don't output empty elements
1309
+ * - *:xsd_date* [true|false] use XSD date format if true, default: false
1310
+ * - *:circular* [true|false] allow circular references, default: false
1311
+ * - *:strict|:tolerant]* [ :effort effort to use when an undumpable object (e.g., IO) is encountered, default:
1312
+ * :strict
1313
+ * - _:strict_ - raise an NotImplementedError if an undumpable object is encountered
1314
+ * - _:tolerant_ - replaces undumplable objects with nil
1315
+ * - *:with_dtd* [true|false|nil] include DTD in the dump
1316
+ * - *:with_instruct* [true|false|nil] include instructions in the dump
1317
+ * - *:with_xml* [true|false|nil] include XML prolog in the dump
1318
+ *
1319
+ * Note that an indent of less than zero will result in a tight one line output
1320
+ * unless the text in the XML fields contain new line characters.
1321
+ */
1322
+ static VALUE to_xml(int argc, VALUE *argv, VALUE self) {
1323
+ return dump(argc, argv, self);
1324
+ }
1325
+
1326
+ /* call-seq: to_file(file_path, obj, options) => Object
1327
+ *
1328
+ * Dumps an Object to the specified file.
1329
+ * - +file_path+ [String] file path to write the XML document to
1330
+ * - +obj+ [Object] Object to serialize as an XML document String
1331
+ * - +options+ [Hash] formating options
1332
+ * - *:indent* [Fixnum] format expected
1333
+ * - *:xsd_date* [true|false] use XSD date format if true, default: false
1334
+ * - *:circular* [true|false] allow circular references, default: false
1335
+ * - *:strict|:tolerant]* [ :effort effort to use when an undumpable object (e.g., IO) is encountered, default:
1336
+ * :strict
1337
+ * - _:strict_ - raise an NotImplementedError if an undumpable object is encountered
1338
+ * - _:tolerant_ - replaces undumplable objects with nil
1339
+ * - *:with_dtd* [true|false|nil] include DTD in the dump
1340
+ * - *:with_instruct* [true|false|nil] include instructions in the dump
1341
+ * - *:with_xml* [true|false|nil] include XML prolog in the dump
1342
+ *
1343
+ * Note that an indent of less than zero will result in a tight one line output
1344
+ * unless the text in the XML fields contain new line characters.
1345
+ */
1346
+ static VALUE to_file(int argc, VALUE *argv, VALUE self) {
1347
+ struct _options copts = ox_default_options;
1348
+
1349
+ if (3 == argc) {
1350
+ parse_dump_options(argv[2], &copts);
1351
+ }
1352
+ Check_Type(*argv, T_STRING);
1353
+ ox_write_obj_to_file(argv[1], StringValuePtr(*argv), &copts);
1354
+
1355
+ return Qnil;
1356
+ }
1357
+
1358
+ #if WITH_CACHE_TESTS
1359
+ extern void ox_cache_test(void);
1360
+
1361
+ static VALUE cache_test(VALUE self) {
1362
+ ox_cache_test();
1363
+ return Qnil;
1364
+ }
1365
+
1366
+ extern void ox_cache8_test(void);
1367
+
1368
+ static VALUE cache8_test(VALUE self) {
1369
+ ox_cache8_test();
1370
+ return Qnil;
1371
+ }
1372
+ #endif
1373
+
1374
+ void Init_ox(void) {
1375
+ #if HAVE_RB_EXT_RACTOR_SAFE
1376
+ rb_ext_ractor_safe(true);
1377
+ #endif
1378
+ Ox = rb_define_module("Ox");
1379
+
1380
+ rb_define_module_function(Ox, "default_options", get_def_opts, 0);
1381
+ rb_define_module_function(Ox, "default_options=", set_def_opts, 1);
1382
+
1383
+ rb_define_module_function(Ox, "parse_obj", to_obj, 1);
1384
+ rb_define_module_function(Ox, "parse", to_gen, 1);
1385
+ rb_define_module_function(Ox, "load", load_str, -1);
1386
+ rb_define_module_function(Ox, "sax_parse", sax_parse, -1);
1387
+ rb_define_module_function(Ox, "sax_html", sax_html, -1);
1388
+
1389
+ rb_define_module_function(Ox, "to_xml", to_xml, -1);
1390
+ rb_define_module_function(Ox, "dump", dump, -1);
1391
+
1392
+ rb_define_module_function(Ox, "load_file", load_file, -1);
1393
+ rb_define_module_function(Ox, "to_file", to_file, -1);
1394
+
1395
+ rb_define_module_function(Ox, "sax_html_overlay", sax_html_overlay, 0);
1396
+
1397
+ ox_init_builder(Ox);
1398
+
1399
+ rb_require("time");
1400
+ rb_require("date");
1401
+ // rb_require("bigdecimal");
1402
+ rb_require("stringio");
1403
+
1404
+ ox_abort_id = rb_intern("abort");
1405
+ ox_at_column_id = rb_intern("@column");
1406
+ ox_at_content_id = rb_intern("@content");
1407
+ ox_at_id = rb_intern("at");
1408
+ ox_at_line_id = rb_intern("@line");
1409
+ ox_at_pos_id = rb_intern("@pos");
1410
+ ox_at_value_id = rb_intern("@value");
1411
+ ox_attr_id = rb_intern("attr");
1412
+ ox_attr_value_id = rb_intern("attr_value");
1413
+ ox_attributes_id = rb_intern("@attributes");
1414
+ ox_attrs_done_id = rb_intern("attrs_done");
1415
+ ox_beg_id = rb_intern("@beg");
1416
+ ox_bigdecimal_id = rb_intern("BigDecimal");
1417
+ ox_call_id = rb_intern("call");
1418
+ ox_cdata_id = rb_intern("cdata");
1419
+ ox_comment_id = rb_intern("comment");
1420
+ ox_den_id = rb_intern("@den");
1421
+ ox_doctype_id = rb_intern("doctype");
1422
+ ox_end_element_id = rb_intern("end_element");
1423
+ ox_end_id = rb_intern("@end");
1424
+ ox_end_instruct_id = rb_intern("end_instruct");
1425
+ ox_error_id = rb_intern("error");
1426
+ ox_excl_id = rb_intern("@excl");
1427
+ ox_external_encoding_id = rb_intern("external_encoding");
1428
+ ox_fileno_id = rb_intern("fileno");
1429
+ ox_force_encoding_id = rb_intern("force_encoding");
1430
+ ox_inspect_id = rb_intern("inspect");
1431
+ ox_instruct_id = rb_intern("instruct");
1432
+ ox_jd_id = rb_intern("jd");
1433
+ ox_keys_id = rb_intern("keys");
1434
+ ox_local_id = rb_intern("local");
1435
+ ox_mesg_id = rb_intern("mesg");
1436
+ ox_message_id = rb_intern("message");
1437
+ ox_nodes_id = rb_intern("@nodes");
1438
+ ox_new_id = rb_intern("new");
1439
+ ox_num_id = rb_intern("@num");
1440
+ ox_parse_id = rb_intern("parse");
1441
+ ox_pos_id = rb_intern("pos");
1442
+ ox_read_id = rb_intern("read");
1443
+ ox_readpartial_id = rb_intern("readpartial");
1444
+ ox_start_element_id = rb_intern("start_element");
1445
+ ox_string_id = rb_intern("string");
1446
+ ox_text_id = rb_intern("text");
1447
+ ox_to_c_id = rb_intern("to_c");
1448
+ ox_value_id = rb_intern("value");
1449
+
1450
+ encoding_id = rb_intern("encoding");
1451
+ has_key_id = rb_intern("has_key?");
1452
+
1453
+ rb_require("ox/version");
1454
+ rb_require("ox/error");
1455
+ rb_require("ox/hasattrs");
1456
+ rb_require("ox/node");
1457
+ rb_require("ox/comment");
1458
+ rb_require("ox/instruct");
1459
+ rb_require("ox/cdata");
1460
+ rb_require("ox/doctype");
1461
+ rb_require("ox/element");
1462
+ rb_require("ox/document");
1463
+ rb_require("ox/bag");
1464
+ rb_require("ox/sax");
1465
+
1466
+ ox_time_class = rb_const_get(rb_cObject, rb_intern("Time"));
1467
+ ox_date_class = rb_const_get(rb_cObject, rb_intern("Date"));
1468
+ ox_parse_error_class = rb_const_get_at(Ox, rb_intern("ParseError"));
1469
+ ox_syntax_error_class = rb_const_get_at(Ox, rb_intern("SyntaxError"));
1470
+ ox_arg_error_class = rb_const_get_at(Ox, rb_intern("ArgError"));
1471
+ ox_struct_class = rb_const_get(rb_cObject, rb_intern("Struct"));
1472
+ ox_stringio_class = rb_const_get(rb_cObject, rb_intern("StringIO"));
1473
+
1474
+ abort_sym = ID2SYM(rb_intern("abort"));
1475
+ rb_gc_register_address(&abort_sym);
1476
+ active_sym = ID2SYM(rb_intern("active"));
1477
+ rb_gc_register_address(&active_sym);
1478
+ attr_key_mod_sym = ID2SYM(rb_intern("attr_key_mod"));
1479
+ rb_gc_register_address(&attr_key_mod_sym);
1480
+ auto_define_sym = ID2SYM(rb_intern("auto_define"));
1481
+ rb_gc_register_address(&auto_define_sym);
1482
+ auto_sym = ID2SYM(rb_intern("auto"));
1483
+ rb_gc_register_address(&auto_sym);
1484
+ block_sym = ID2SYM(rb_intern("block"));
1485
+ rb_gc_register_address(&block_sym);
1486
+ circular_sym = ID2SYM(rb_intern("circular"));
1487
+ rb_gc_register_address(&circular_sym);
1488
+ convert_special_sym = ID2SYM(rb_intern("convert_special"));
1489
+ rb_gc_register_address(&convert_special_sym);
1490
+ effort_sym = ID2SYM(rb_intern("effort"));
1491
+ rb_gc_register_address(&effort_sym);
1492
+ element_key_mod_sym = ID2SYM(rb_intern("element_key_mod"));
1493
+ rb_gc_register_address(&element_key_mod_sym);
1494
+ generic_sym = ID2SYM(rb_intern("generic"));
1495
+ rb_gc_register_address(&generic_sym);
1496
+ hash_no_attrs_sym = ID2SYM(rb_intern("hash_no_attrs"));
1497
+ rb_gc_register_address(&hash_no_attrs_sym);
1498
+ hash_sym = ID2SYM(rb_intern("hash"));
1499
+ rb_gc_register_address(&hash_sym);
1500
+ inactive_sym = ID2SYM(rb_intern("inactive"));
1501
+ rb_gc_register_address(&inactive_sym);
1502
+ invalid_replace_sym = ID2SYM(rb_intern("invalid_replace"));
1503
+ rb_gc_register_address(&invalid_replace_sym);
1504
+ limited_sym = ID2SYM(rb_intern("limited"));
1505
+ rb_gc_register_address(&limited_sym);
1506
+ margin_sym = ID2SYM(rb_intern("margin"));
1507
+ rb_gc_register_address(&margin_sym);
1508
+ mode_sym = ID2SYM(rb_intern("mode"));
1509
+ rb_gc_register_address(&mode_sym);
1510
+ nest_ok_sym = ID2SYM(rb_intern("nest_ok"));
1511
+ rb_gc_register_address(&nest_ok_sym);
1512
+ no_empty_sym = ID2SYM(rb_intern("no_empty"));
1513
+ rb_gc_register_address(&no_empty_sym);
1514
+ object_sym = ID2SYM(rb_intern("object"));
1515
+ rb_gc_register_address(&object_sym);
1516
+ off_sym = ID2SYM(rb_intern("off"));
1517
+ rb_gc_register_address(&off_sym);
1518
+ opt_format_sym = ID2SYM(rb_intern("opt_format"));
1519
+ rb_gc_register_address(&opt_format_sym);
1520
+ optimized_sym = ID2SYM(rb_intern("optimized"));
1521
+ rb_gc_register_address(&optimized_sym);
1522
+ overlay_sym = ID2SYM(rb_intern("overlay"));
1523
+ rb_gc_register_address(&overlay_sym);
1524
+ ox_encoding_sym = ID2SYM(rb_intern("encoding"));
1525
+ rb_gc_register_address(&ox_encoding_sym);
1526
+ ox_indent_sym = ID2SYM(rb_intern("indent"));
1527
+ rb_gc_register_address(&ox_indent_sym);
1528
+ ox_size_sym = ID2SYM(rb_intern("size"));
1529
+ rb_gc_register_address(&ox_size_sym);
1530
+ ox_standalone_sym = ID2SYM(rb_intern("standalone"));
1531
+ rb_gc_register_address(&ox_standalone_sym);
1532
+ ox_version_sym = ID2SYM(rb_intern("version"));
1533
+ rb_gc_register_address(&ox_version_sym);
1534
+ skip_none_sym = ID2SYM(rb_intern("skip_none"));
1535
+ rb_gc_register_address(&skip_none_sym);
1536
+ skip_off_sym = ID2SYM(rb_intern("skip_off"));
1537
+ rb_gc_register_address(&skip_off_sym);
1538
+ skip_return_sym = ID2SYM(rb_intern("skip_return"));
1539
+ rb_gc_register_address(&skip_return_sym);
1540
+ skip_sym = ID2SYM(rb_intern("skip"));
1541
+ rb_gc_register_address(&skip_sym);
1542
+ skip_white_sym = ID2SYM(rb_intern("skip_white"));
1543
+ rb_gc_register_address(&skip_white_sym);
1544
+ smart_sym = ID2SYM(rb_intern("smart"));
1545
+ rb_gc_register_address(&smart_sym);
1546
+ strict_sym = ID2SYM(rb_intern("strict"));
1547
+ rb_gc_register_address(&strict_sym);
1548
+ strip_namespace_sym = ID2SYM(rb_intern("strip_namespace"));
1549
+ rb_gc_register_address(&strip_namespace_sym);
1550
+ symbolize_keys_sym = ID2SYM(rb_intern("symbolize_keys"));
1551
+ rb_gc_register_address(&symbolize_keys_sym);
1552
+ symbolize_sym = ID2SYM(rb_intern("symbolize"));
1553
+ rb_gc_register_address(&symbolize_sym);
1554
+ tolerant_sym = ID2SYM(rb_intern("tolerant"));
1555
+ rb_gc_register_address(&tolerant_sym);
1556
+ trace_sym = ID2SYM(rb_intern("trace"));
1557
+ rb_gc_register_address(&trace_sym);
1558
+ with_cdata_sym = ID2SYM(rb_intern("with_cdata"));
1559
+ rb_gc_register_address(&with_cdata_sym);
1560
+ with_dtd_sym = ID2SYM(rb_intern("with_dtd"));
1561
+ rb_gc_register_address(&with_dtd_sym);
1562
+ with_instruct_sym = ID2SYM(rb_intern("with_instructions"));
1563
+ rb_gc_register_address(&with_instruct_sym);
1564
+ with_xml_sym = ID2SYM(rb_intern("with_xml"));
1565
+ rb_gc_register_address(&with_xml_sym);
1566
+ xsd_date_sym = ID2SYM(rb_intern("xsd_date"));
1567
+ rb_gc_register_address(&xsd_date_sym);
1568
+
1569
+ ox_empty_string = rb_str_new2("");
1570
+ rb_gc_register_address(&ox_empty_string);
1571
+ ox_zero_fixnum = INT2NUM(0);
1572
+ rb_gc_register_address(&ox_zero_fixnum);
1573
+ ox_sym_bank = rb_ary_new();
1574
+ rb_gc_register_address(&ox_sym_bank);
1575
+
1576
+ ox_document_clas = rb_const_get_at(Ox, rb_intern("Document"));
1577
+ ox_element_clas = rb_const_get_at(Ox, rb_intern("Element"));
1578
+ ox_instruct_clas = rb_const_get_at(Ox, rb_intern("Instruct"));
1579
+ ox_comment_clas = rb_const_get_at(Ox, rb_intern("Comment"));
1580
+ ox_raw_clas = rb_const_get_at(Ox, rb_intern("Raw"));
1581
+ ox_doctype_clas = rb_const_get_at(Ox, rb_intern("DocType"));
1582
+ ox_cdata_clas = rb_const_get_at(Ox, rb_intern("CData"));
1583
+ ox_bag_clas = rb_const_get_at(Ox, rb_intern("Bag"));
1584
+
1585
+ // Classes can move in more recent versions so register them all.
1586
+ rb_gc_register_address(&Ox);
1587
+ rb_gc_register_address(&ox_arg_error_class);
1588
+ rb_gc_register_address(&ox_bag_clas);
1589
+ rb_gc_register_address(&ox_bag_clas);
1590
+ rb_gc_register_address(&ox_cdata_clas);
1591
+ rb_gc_register_address(&ox_cdata_clas);
1592
+ rb_gc_register_address(&ox_comment_clas);
1593
+ rb_gc_register_address(&ox_comment_clas);
1594
+ rb_gc_register_address(&ox_date_class);
1595
+ rb_gc_register_address(&ox_doctype_clas);
1596
+ rb_gc_register_address(&ox_doctype_clas);
1597
+ rb_gc_register_address(&ox_document_clas);
1598
+ rb_gc_register_address(&ox_document_clas);
1599
+ rb_gc_register_address(&ox_element_clas);
1600
+ rb_gc_register_address(&ox_element_clas);
1601
+ rb_gc_register_address(&ox_encoding_sym);
1602
+ rb_gc_register_address(&ox_indent_sym);
1603
+ rb_gc_register_address(&ox_instruct_clas);
1604
+ rb_gc_register_address(&ox_instruct_clas);
1605
+ rb_gc_register_address(&ox_parse_error_class);
1606
+ rb_gc_register_address(&ox_raw_clas);
1607
+ rb_gc_register_address(&ox_raw_clas);
1608
+ rb_gc_register_address(&ox_size_sym);
1609
+ rb_gc_register_address(&ox_standalone_sym);
1610
+ rb_gc_register_address(&ox_stringio_class);
1611
+ rb_gc_register_address(&ox_struct_class);
1612
+ rb_gc_register_address(&ox_syntax_error_class);
1613
+ rb_gc_register_address(&ox_time_class);
1614
+ rb_gc_register_address(&ox_version_sym);
1615
+
1616
+ slot_cache_new(&ox_class_cache);
1617
+
1618
+ ox_sax_define();
1619
+ ox_hash_init();
1620
+
1621
+ #if WITH_CACHE_TESTS
1622
+ // space added to stop yardoc from trying to document
1623
+ rb_define _module_function(Ox, "cache_test", cache_test, 0);
1624
+ rb_define _module_function(Ox, "cache8_test", cache8_test, 0);
1625
+ #endif
1626
+
1627
+ ox_utf8_encoding = rb_enc_find("UTF-8");
1628
+ }
1629
+
1630
+ #if __GNUC__ > 4
1631
+ _Noreturn void
1632
+ #else
1633
+ void
1634
+ #endif
1635
+ _ox_raise_error(const char *msg, const char *xml, const char *current, const char *file, int line) {
1636
+ int xline = 1;
1637
+ int col = 1;
1638
+
1639
+ for (; xml < current && '\n' != *current; current--) {
1640
+ col++;
1641
+ }
1642
+ for (; xml < current; current--) {
1643
+ if ('\n' == *current) {
1644
+ xline++;
1645
+ }
1646
+ }
1647
+ rb_gc_enable();
1648
+ rb_raise(ox_parse_error_class, "%s at line %d, column %d [%s:%d]\n", msg, xline, col, file, line);
1649
+ }