ox 2.14.3 → 2.14.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +33 -1
- data/README.md +1 -1
- data/ext/ox/builder.c +8 -8
- data/ext/ox/cache.c +320 -131
- data/ext/ox/cache.h +15 -13
- data/ext/ox/dump.c +2 -2
- data/ext/ox/extconf.rb +5 -2
- data/ext/ox/gen_load.c +8 -76
- data/ext/ox/hash_load.c +0 -4
- data/ext/ox/intern.c +158 -0
- data/ext/ox/intern.h +25 -0
- data/ext/ox/obj_load.c +12 -85
- data/ext/ox/ox.c +1018 -931
- data/ext/ox/ox.h +188 -210
- data/ext/ox/oxcache.c +160 -0
- data/ext/ox/oxcache.h +19 -0
- data/ext/ox/parse.c +72 -31
- data/ext/ox/sax.c +1093 -1279
- data/ext/ox/sax.h +45 -31
- data/ext/ox/sax_as.c +3 -5
- data/ext/ox/sax_buf.c +7 -16
- data/lib/ox/version.rb +1 -1
- metadata +11 -5
- data/ext/ox/sax_has.h +0 -53
data/ext/ox/ox.c
CHANGED
@@ -3,214 +3,211 @@
|
|
3
3
|
* All rights reserved.
|
4
4
|
*/
|
5
5
|
|
6
|
-
#include
|
6
|
+
#include "ox.h"
|
7
|
+
|
7
8
|
#include <errno.h>
|
8
|
-
#include <stdint.h>
|
9
9
|
#include <stdbool.h>
|
10
|
+
#include <stdint.h>
|
10
11
|
#include <stdio.h>
|
12
|
+
#include <stdlib.h>
|
11
13
|
#include <string.h>
|
12
14
|
|
15
|
+
#include "intern.h"
|
13
16
|
#include "ruby.h"
|
14
|
-
#include "ox.h"
|
15
17
|
#include "sax.h"
|
16
18
|
|
17
19
|
/* maximum to allocate on the stack, arbitrary limit */
|
18
|
-
#define SMALL_XML
|
19
|
-
#define WITH_CACHE_TESTS
|
20
|
+
#define SMALL_XML 4096
|
21
|
+
#define WITH_CACHE_TESTS 0
|
20
22
|
|
21
23
|
typedef struct _yesNoOpt {
|
22
|
-
VALUE
|
23
|
-
char
|
24
|
-
} *YesNoOpt;
|
24
|
+
VALUE sym;
|
25
|
+
char *attr;
|
26
|
+
} * YesNoOpt;
|
25
27
|
|
26
28
|
void Init_ox();
|
27
29
|
|
28
|
-
VALUE
|
29
|
-
|
30
|
-
ID
|
31
|
-
ID
|
32
|
-
ID
|
33
|
-
ID
|
34
|
-
ID
|
35
|
-
ID
|
36
|
-
ID
|
37
|
-
ID
|
38
|
-
ID
|
39
|
-
ID
|
40
|
-
ID
|
41
|
-
ID
|
42
|
-
ID
|
43
|
-
ID
|
44
|
-
ID
|
45
|
-
ID
|
46
|
-
ID
|
47
|
-
ID
|
48
|
-
ID
|
49
|
-
ID
|
50
|
-
ID
|
51
|
-
ID
|
52
|
-
ID
|
53
|
-
ID
|
54
|
-
ID
|
55
|
-
ID
|
56
|
-
ID
|
57
|
-
ID
|
58
|
-
ID
|
59
|
-
ID
|
60
|
-
ID
|
61
|
-
ID
|
62
|
-
ID
|
63
|
-
ID
|
64
|
-
ID
|
65
|
-
ID
|
66
|
-
ID
|
67
|
-
ID
|
68
|
-
ID
|
69
|
-
ID
|
70
|
-
ID
|
71
|
-
ID
|
72
|
-
ID
|
73
|
-
ID
|
74
|
-
ID
|
75
|
-
ID
|
76
|
-
ID
|
77
|
-
ID
|
78
|
-
ID
|
79
|
-
ID
|
80
|
-
|
81
|
-
VALUE
|
82
|
-
VALUE
|
83
|
-
VALUE
|
84
|
-
VALUE
|
85
|
-
VALUE
|
86
|
-
|
87
|
-
VALUE
|
88
|
-
VALUE
|
89
|
-
VALUE
|
90
|
-
|
91
|
-
VALUE
|
92
|
-
VALUE
|
93
|
-
VALUE
|
94
|
-
VALUE
|
95
|
-
VALUE
|
96
|
-
VALUE
|
97
|
-
VALUE
|
98
|
-
VALUE
|
99
|
-
VALUE
|
100
|
-
VALUE
|
101
|
-
VALUE
|
102
|
-
VALUE
|
103
|
-
VALUE
|
104
|
-
VALUE
|
105
|
-
VALUE
|
106
|
-
VALUE
|
107
|
-
|
108
|
-
Cache
|
109
|
-
Cache
|
110
|
-
Cache
|
111
|
-
|
112
|
-
static VALUE
|
113
|
-
static VALUE
|
114
|
-
static VALUE
|
115
|
-
static VALUE
|
116
|
-
static VALUE
|
117
|
-
static VALUE
|
118
|
-
static VALUE
|
119
|
-
static VALUE
|
120
|
-
static VALUE
|
121
|
-
static VALUE
|
122
|
-
static VALUE
|
123
|
-
static VALUE
|
124
|
-
static VALUE
|
125
|
-
static VALUE
|
126
|
-
static VALUE
|
127
|
-
static VALUE
|
128
|
-
static VALUE
|
129
|
-
static VALUE
|
130
|
-
static VALUE
|
131
|
-
static VALUE
|
132
|
-
static VALUE
|
133
|
-
static VALUE
|
134
|
-
static VALUE
|
135
|
-
static VALUE
|
136
|
-
static VALUE
|
137
|
-
static VALUE
|
138
|
-
static VALUE
|
139
|
-
static VALUE
|
140
|
-
static VALUE
|
141
|
-
static VALUE
|
142
|
-
static VALUE
|
143
|
-
static VALUE
|
144
|
-
static VALUE
|
145
|
-
static VALUE
|
146
|
-
static VALUE
|
147
|
-
static VALUE
|
148
|
-
static VALUE
|
149
|
-
static VALUE
|
150
|
-
static VALUE
|
151
|
-
static VALUE
|
152
|
-
static VALUE
|
153
|
-
static VALUE
|
154
|
-
|
155
|
-
static ID
|
156
|
-
static ID
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
No,
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
NULL, // html_hints
|
187
|
-
Qnil, // attr_key_mod;
|
188
|
-
Qnil, // element_key_mod;
|
189
|
-
0 // rb_enc
|
30
|
+
VALUE Ox = Qnil;
|
31
|
+
|
32
|
+
ID ox_abort_id;
|
33
|
+
ID ox_at_column_id;
|
34
|
+
ID ox_at_content_id;
|
35
|
+
ID ox_at_id;
|
36
|
+
ID ox_at_line_id;
|
37
|
+
ID ox_at_pos_id;
|
38
|
+
ID ox_at_value_id;
|
39
|
+
ID ox_attr_id;
|
40
|
+
ID ox_attr_value_id;
|
41
|
+
ID ox_attributes_id;
|
42
|
+
ID ox_attrs_done_id;
|
43
|
+
ID ox_beg_id;
|
44
|
+
ID ox_bigdecimal_id;
|
45
|
+
ID ox_call_id;
|
46
|
+
ID ox_cdata_id;
|
47
|
+
ID ox_comment_id;
|
48
|
+
ID ox_den_id;
|
49
|
+
ID ox_doctype_id;
|
50
|
+
ID ox_end_element_id;
|
51
|
+
ID ox_end_id;
|
52
|
+
ID ox_end_instruct_id;
|
53
|
+
ID ox_error_id;
|
54
|
+
ID ox_excl_id;
|
55
|
+
ID ox_external_encoding_id;
|
56
|
+
ID ox_fileno_id;
|
57
|
+
ID ox_force_encoding_id;
|
58
|
+
ID ox_inspect_id;
|
59
|
+
ID ox_instruct_id;
|
60
|
+
ID ox_jd_id;
|
61
|
+
ID ox_keys_id;
|
62
|
+
ID ox_local_id;
|
63
|
+
ID ox_mesg_id;
|
64
|
+
ID ox_message_id;
|
65
|
+
ID ox_new_id;
|
66
|
+
ID ox_nodes_id;
|
67
|
+
ID ox_num_id;
|
68
|
+
ID ox_parse_id;
|
69
|
+
ID ox_pos_id;
|
70
|
+
ID ox_read_id;
|
71
|
+
ID ox_readpartial_id;
|
72
|
+
ID ox_start_element_id;
|
73
|
+
ID ox_string_id;
|
74
|
+
ID ox_text_id;
|
75
|
+
ID ox_to_c_id;
|
76
|
+
ID ox_to_s_id;
|
77
|
+
ID ox_to_sym_id;
|
78
|
+
ID ox_tv_nsec_id;
|
79
|
+
ID ox_tv_sec_id;
|
80
|
+
ID ox_tv_usec_id;
|
81
|
+
ID ox_value_id;
|
82
|
+
|
83
|
+
VALUE ox_encoding_sym;
|
84
|
+
VALUE ox_version_sym;
|
85
|
+
VALUE ox_standalone_sym;
|
86
|
+
VALUE ox_indent_sym;
|
87
|
+
VALUE ox_size_sym;
|
88
|
+
|
89
|
+
VALUE ox_empty_string;
|
90
|
+
VALUE ox_zero_fixnum;
|
91
|
+
VALUE ox_sym_bank; // Array
|
92
|
+
|
93
|
+
VALUE ox_arg_error_class;
|
94
|
+
VALUE ox_bag_clas;
|
95
|
+
VALUE ox_bigdecimal_class;
|
96
|
+
VALUE ox_cdata_clas;
|
97
|
+
VALUE ox_comment_clas;
|
98
|
+
VALUE ox_raw_clas;
|
99
|
+
VALUE ox_date_class;
|
100
|
+
VALUE ox_doctype_clas;
|
101
|
+
VALUE ox_document_clas;
|
102
|
+
VALUE ox_element_clas;
|
103
|
+
VALUE ox_instruct_clas;
|
104
|
+
VALUE ox_parse_error_class;
|
105
|
+
VALUE ox_stringio_class;
|
106
|
+
VALUE ox_struct_class;
|
107
|
+
VALUE ox_syntax_error_class;
|
108
|
+
VALUE ox_time_class;
|
109
|
+
|
110
|
+
Cache ox_symbol_cache = 0;
|
111
|
+
Cache ox_class_cache = 0;
|
112
|
+
Cache ox_attr_cache = 0;
|
113
|
+
|
114
|
+
static VALUE abort_sym;
|
115
|
+
static VALUE active_sym;
|
116
|
+
static VALUE attr_key_mod_sym;
|
117
|
+
static VALUE auto_define_sym;
|
118
|
+
static VALUE auto_sym;
|
119
|
+
static VALUE block_sym;
|
120
|
+
static VALUE circular_sym;
|
121
|
+
static VALUE convert_special_sym;
|
122
|
+
static VALUE effort_sym;
|
123
|
+
static VALUE generic_sym;
|
124
|
+
static VALUE hash_no_attrs_sym;
|
125
|
+
static VALUE hash_sym;
|
126
|
+
static VALUE inactive_sym;
|
127
|
+
static VALUE invalid_replace_sym;
|
128
|
+
static VALUE limited_sym;
|
129
|
+
static VALUE margin_sym;
|
130
|
+
static VALUE mode_sym;
|
131
|
+
static VALUE nest_ok_sym;
|
132
|
+
static VALUE no_empty_sym;
|
133
|
+
static VALUE object_sym;
|
134
|
+
static VALUE off_sym;
|
135
|
+
static VALUE opt_format_sym;
|
136
|
+
static VALUE optimized_sym;
|
137
|
+
static VALUE overlay_sym;
|
138
|
+
static VALUE skip_none_sym;
|
139
|
+
static VALUE skip_off_sym;
|
140
|
+
static VALUE skip_return_sym;
|
141
|
+
static VALUE skip_sym;
|
142
|
+
static VALUE skip_white_sym;
|
143
|
+
static VALUE smart_sym;
|
144
|
+
static VALUE strict_sym;
|
145
|
+
static VALUE strip_namespace_sym;
|
146
|
+
static VALUE symbolize_keys_sym;
|
147
|
+
static VALUE symbolize_sym;
|
148
|
+
static VALUE tolerant_sym;
|
149
|
+
static VALUE trace_sym;
|
150
|
+
static VALUE with_cdata_sym;
|
151
|
+
static VALUE with_dtd_sym;
|
152
|
+
static VALUE with_instruct_sym;
|
153
|
+
static VALUE with_xml_sym;
|
154
|
+
static VALUE xsd_date_sym;
|
155
|
+
static VALUE element_key_mod_sym;
|
156
|
+
|
157
|
+
static ID encoding_id;
|
158
|
+
static ID has_key_id;
|
159
|
+
|
160
|
+
rb_encoding *ox_utf8_encoding = 0;
|
161
|
+
|
162
|
+
struct _options ox_default_options = {
|
163
|
+
{'\0'}, // encoding
|
164
|
+
{'\0'}, // margin
|
165
|
+
2, // indent
|
166
|
+
0, // trace
|
167
|
+
0, // margin_len
|
168
|
+
No, // with_dtd
|
169
|
+
No, // with_xml
|
170
|
+
No, // with_instruct
|
171
|
+
No, // circular
|
172
|
+
No, // xsd_date
|
173
|
+
NoMode, // mode
|
174
|
+
StrictEffort, // effort
|
175
|
+
Yes, // sym_keys
|
176
|
+
SpcSkip, // skip
|
177
|
+
No, // smart
|
178
|
+
true, // convert_special
|
179
|
+
No, // allow_invalid
|
180
|
+
false, // no_empty
|
181
|
+
false, // with_cdata
|
182
|
+
{'\0'}, // inv_repl
|
183
|
+
{'\0'}, // strip_ns
|
184
|
+
NULL, // html_hints
|
185
|
+
Qnil, // attr_key_mod;
|
186
|
+
Qnil, // element_key_mod;
|
187
|
+
0 // rb_enc
|
190
188
|
};
|
191
189
|
|
192
|
-
extern ParseCallbacks
|
193
|
-
extern ParseCallbacks
|
194
|
-
extern ParseCallbacks
|
195
|
-
extern ParseCallbacks
|
196
|
-
extern ParseCallbacks
|
197
|
-
extern ParseCallbacks
|
198
|
-
extern ParseCallbacks
|
199
|
-
extern ParseCallbacks
|
190
|
+
extern ParseCallbacks ox_obj_callbacks;
|
191
|
+
extern ParseCallbacks ox_gen_callbacks;
|
192
|
+
extern ParseCallbacks ox_limited_callbacks;
|
193
|
+
extern ParseCallbacks ox_nomode_callbacks;
|
194
|
+
extern ParseCallbacks ox_hash_callbacks;
|
195
|
+
extern ParseCallbacks ox_hash_cdata_callbacks;
|
196
|
+
extern ParseCallbacks ox_hash_no_attrs_callbacks;
|
197
|
+
extern ParseCallbacks ox_hash_no_attrs_cdata_callbacks;
|
200
198
|
|
201
|
-
static void
|
199
|
+
static void parse_dump_options(VALUE ropts, Options copts);
|
202
200
|
|
203
|
-
static char*
|
204
|
-
defuse_bom(char *xml, Options options) {
|
201
|
+
static char *defuse_bom(char *xml, Options options) {
|
205
202
|
switch ((uint8_t)*xml) {
|
206
|
-
case 0xEF:
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
203
|
+
case 0xEF: // UTF-8
|
204
|
+
if (0xBB == (uint8_t)xml[1] && 0xBF == (uint8_t)xml[2]) {
|
205
|
+
options->rb_enc = ox_utf8_encoding;
|
206
|
+
xml += 3;
|
207
|
+
} else {
|
208
|
+
rb_raise(ox_parse_error_class, "Invalid BOM in XML string.\n");
|
209
|
+
}
|
210
|
+
break;
|
214
211
|
#if 0
|
215
212
|
case 0xFE: // UTF-16BE
|
216
213
|
if (0xFF == (uint8_t)xml[1]) {
|
@@ -243,31 +240,30 @@ defuse_bom(char *xml, Options options) {
|
|
243
240
|
break;
|
244
241
|
#endif
|
245
242
|
default:
|
246
|
-
|
247
|
-
|
248
|
-
|
243
|
+
// Let it fail if there is a BOM that is not UTF-8. Other BOM options
|
244
|
+
// are not ASCII compatible.
|
245
|
+
break;
|
249
246
|
}
|
250
247
|
return xml;
|
251
248
|
}
|
252
249
|
|
253
|
-
static VALUE
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
VALUE ov;
|
250
|
+
static VALUE hints_to_overlay(Hints hints) {
|
251
|
+
volatile VALUE overlay = rb_hash_new();
|
252
|
+
Hint h;
|
253
|
+
int i;
|
254
|
+
VALUE ov;
|
259
255
|
|
260
256
|
for (i = hints->size, h = hints->hints; 0 < i; i--, h++) {
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
257
|
+
switch (h->overlay) {
|
258
|
+
case InactiveOverlay: ov = inactive_sym; break;
|
259
|
+
case BlockOverlay: ov = block_sym; break;
|
260
|
+
case OffOverlay: ov = off_sym; break;
|
261
|
+
case AbortOverlay: ov = abort_sym; break;
|
262
|
+
case NestOverlay: ov = nest_ok_sym; break;
|
263
|
+
case ActiveOverlay:
|
264
|
+
default: ov = active_sym; break;
|
265
|
+
}
|
266
|
+
rb_hash_aset(overlay, rb_str_new2(h->name), ov);
|
271
267
|
}
|
272
268
|
return overlay;
|
273
269
|
}
|
@@ -292,10 +288,12 @@ hints_to_overlay(Hints hints) {
|
|
292
288
|
* - _:skip_ [:skip_none|:skip_return|:skip_white|:skip_off] determines how to handle white space in text
|
293
289
|
* - _:smart_ [true|false|nil] flag indicating the SAX parser uses hints if available (use with html)
|
294
290
|
* - _:convert_special_ [true|false|nil] flag indicating special characters like < are converted with the SAX parser
|
295
|
-
* - _:invalid_replace_ [nil|String] replacement string for invalid XML characters on dump. nil indicates include anyway
|
291
|
+
* - _:invalid_replace_ [nil|String] replacement string for invalid XML characters on dump. nil indicates include anyway
|
292
|
+
* as hex. A string, limited to 10 characters will replace the invalid character with the replace.
|
296
293
|
* - _:no_empty_ [true|false|nil] flag indicating there should be no empty elements in a dump
|
297
294
|
* - _:with_cdata_ [true|false] includes cdata in hash_load results
|
298
|
-
* - _:strip_namespace_ [String|true|false] false or "" results in no namespace stripping. A string of "*" or true will
|
295
|
+
* - _:strip_namespace_ [String|true|false] false or "" results in no namespace stripping. A string of "*" or true will
|
296
|
+
* strip all namespaces. Any other non-empty string indicates that matching namespaces will be stripped.
|
299
297
|
* - _:overlay_ [Hash] a Hash of keys that match html element names and values that are one of
|
300
298
|
* - _:active_ - make the normal callback for the element
|
301
299
|
* - _:nest_ok_ - active but the nesting check is ignored
|
@@ -309,90 +307,107 @@ hints_to_overlay(Hints hints) {
|
|
309
307
|
* Note that an indent of less than zero will result in a tight one line output
|
310
308
|
* unless the text in the XML fields contain new line characters.
|
311
309
|
*/
|
312
|
-
static VALUE
|
313
|
-
|
314
|
-
|
315
|
-
int elen = (int)strlen(ox_default_options.encoding);
|
310
|
+
static VALUE get_def_opts(VALUE self) {
|
311
|
+
VALUE opts = rb_hash_new();
|
312
|
+
int elen = (int)strlen(ox_default_options.encoding);
|
316
313
|
|
317
314
|
rb_hash_aset(opts, ox_encoding_sym, (0 == elen) ? Qnil : rb_str_new(ox_default_options.encoding, elen));
|
318
315
|
rb_hash_aset(opts, margin_sym, rb_str_new(ox_default_options.margin, ox_default_options.margin_len));
|
319
316
|
rb_hash_aset(opts, ox_indent_sym, INT2FIX(ox_default_options.indent));
|
320
317
|
rb_hash_aset(opts, trace_sym, INT2FIX(ox_default_options.trace));
|
321
|
-
rb_hash_aset(opts,
|
322
|
-
|
323
|
-
|
324
|
-
rb_hash_aset(opts,
|
325
|
-
|
326
|
-
|
318
|
+
rb_hash_aset(opts,
|
319
|
+
with_dtd_sym,
|
320
|
+
(Yes == ox_default_options.with_dtd) ? Qtrue : ((No == ox_default_options.with_dtd) ? Qfalse : Qnil));
|
321
|
+
rb_hash_aset(opts,
|
322
|
+
with_xml_sym,
|
323
|
+
(Yes == ox_default_options.with_xml) ? Qtrue : ((No == ox_default_options.with_xml) ? Qfalse : Qnil));
|
324
|
+
rb_hash_aset(
|
325
|
+
opts,
|
326
|
+
with_instruct_sym,
|
327
|
+
(Yes == ox_default_options.with_instruct) ? Qtrue : ((No == ox_default_options.with_instruct) ? Qfalse : Qnil));
|
328
|
+
rb_hash_aset(opts,
|
329
|
+
circular_sym,
|
330
|
+
(Yes == ox_default_options.circular) ? Qtrue : ((No == ox_default_options.circular) ? Qfalse : Qnil));
|
331
|
+
rb_hash_aset(opts,
|
332
|
+
xsd_date_sym,
|
333
|
+
(Yes == ox_default_options.xsd_date) ? Qtrue : ((No == ox_default_options.xsd_date) ? Qfalse : Qnil));
|
334
|
+
rb_hash_aset(opts,
|
335
|
+
symbolize_keys_sym,
|
336
|
+
(Yes == ox_default_options.sym_keys) ? Qtrue : ((No == ox_default_options.sym_keys) ? Qfalse : Qnil));
|
327
337
|
rb_hash_aset(opts, attr_key_mod_sym, ox_default_options.attr_key_mod);
|
328
338
|
rb_hash_aset(opts, element_key_mod_sym, ox_default_options.element_key_mod);
|
329
|
-
rb_hash_aset(opts,
|
339
|
+
rb_hash_aset(opts,
|
340
|
+
smart_sym,
|
341
|
+
(Yes == ox_default_options.smart) ? Qtrue : ((No == ox_default_options.smart) ? Qfalse : Qnil));
|
330
342
|
rb_hash_aset(opts, convert_special_sym, (ox_default_options.convert_special) ? Qtrue : Qfalse);
|
331
343
|
rb_hash_aset(opts, no_empty_sym, (ox_default_options.no_empty) ? Qtrue : Qfalse);
|
332
344
|
rb_hash_aset(opts, with_cdata_sym, (ox_default_options.with_cdata) ? Qtrue : Qfalse);
|
333
345
|
switch (ox_default_options.mode) {
|
334
|
-
case ObjMode:
|
335
|
-
case GenMode:
|
336
|
-
case LimMode:
|
337
|
-
case HashMode:
|
338
|
-
case HashNoAttrMode:
|
346
|
+
case ObjMode: rb_hash_aset(opts, mode_sym, object_sym); break;
|
347
|
+
case GenMode: rb_hash_aset(opts, mode_sym, generic_sym); break;
|
348
|
+
case LimMode: rb_hash_aset(opts, mode_sym, limited_sym); break;
|
349
|
+
case HashMode: rb_hash_aset(opts, mode_sym, hash_sym); break;
|
350
|
+
case HashNoAttrMode: rb_hash_aset(opts, mode_sym, hash_no_attrs_sym); break;
|
339
351
|
case NoMode:
|
340
|
-
default:
|
352
|
+
default: rb_hash_aset(opts, mode_sym, Qnil); break;
|
341
353
|
}
|
342
354
|
switch (ox_default_options.effort) {
|
343
|
-
case StrictEffort:
|
344
|
-
case TolerantEffort:
|
345
|
-
case AutoEffort:
|
355
|
+
case StrictEffort: rb_hash_aset(opts, effort_sym, strict_sym); break;
|
356
|
+
case TolerantEffort: rb_hash_aset(opts, effort_sym, tolerant_sym); break;
|
357
|
+
case AutoEffort: rb_hash_aset(opts, effort_sym, auto_define_sym); break;
|
346
358
|
case NoEffort:
|
347
|
-
default:
|
359
|
+
default: rb_hash_aset(opts, effort_sym, Qnil); break;
|
348
360
|
}
|
349
361
|
switch (ox_default_options.skip) {
|
350
|
-
case OffSkip:
|
351
|
-
case NoSkip:
|
352
|
-
case CrSkip:
|
353
|
-
case SpcSkip:
|
354
|
-
default:
|
362
|
+
case OffSkip: rb_hash_aset(opts, skip_sym, skip_off_sym); break;
|
363
|
+
case NoSkip: rb_hash_aset(opts, skip_sym, skip_none_sym); break;
|
364
|
+
case CrSkip: rb_hash_aset(opts, skip_sym, skip_return_sym); break;
|
365
|
+
case SpcSkip: rb_hash_aset(opts, skip_sym, skip_white_sym); break;
|
366
|
+
default: rb_hash_aset(opts, skip_sym, Qnil); break;
|
355
367
|
}
|
356
368
|
if (Yes == ox_default_options.allow_invalid) {
|
357
|
-
|
369
|
+
rb_hash_aset(opts, invalid_replace_sym, Qnil);
|
358
370
|
} else {
|
359
|
-
|
371
|
+
rb_hash_aset(opts,
|
372
|
+
invalid_replace_sym,
|
373
|
+
rb_str_new(ox_default_options.inv_repl + 1, (int)*ox_default_options.inv_repl));
|
360
374
|
}
|
361
375
|
if ('\0' == *ox_default_options.strip_ns) {
|
362
|
-
|
376
|
+
rb_hash_aset(opts, strip_namespace_sym, Qfalse);
|
363
377
|
} else if ('*' == *ox_default_options.strip_ns && '\0' == ox_default_options.strip_ns[1]) {
|
364
|
-
|
378
|
+
rb_hash_aset(opts, strip_namespace_sym, Qtrue);
|
365
379
|
} else {
|
366
|
-
|
380
|
+
rb_hash_aset(opts,
|
381
|
+
strip_namespace_sym,
|
382
|
+
rb_str_new(ox_default_options.strip_ns, strlen(ox_default_options.strip_ns)));
|
367
383
|
}
|
368
384
|
if (NULL == ox_default_options.html_hints) {
|
369
|
-
|
370
|
-
|
385
|
+
// rb_hash_aset(opts, overlay_sym, hints_to_overlay(ox_hints_html()));
|
386
|
+
rb_hash_aset(opts, overlay_sym, Qnil);
|
371
387
|
} else {
|
372
|
-
|
388
|
+
rb_hash_aset(opts, overlay_sym, hints_to_overlay(ox_default_options.html_hints));
|
373
389
|
}
|
374
390
|
return opts;
|
375
391
|
}
|
376
392
|
|
377
|
-
static int
|
378
|
-
|
379
|
-
|
380
|
-
Hint hint;
|
393
|
+
static int set_overlay(VALUE key, VALUE value, VALUE ctx) {
|
394
|
+
Hints hints = (Hints)ctx;
|
395
|
+
Hint hint;
|
381
396
|
|
382
397
|
if (NULL != (hint = ox_hint_find(hints, StringValuePtr(key)))) {
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
398
|
+
if (active_sym == value) {
|
399
|
+
hint->overlay = ActiveOverlay;
|
400
|
+
} else if (inactive_sym == value) {
|
401
|
+
hint->overlay = InactiveOverlay;
|
402
|
+
} else if (block_sym == value) {
|
403
|
+
hint->overlay = BlockOverlay;
|
404
|
+
} else if (nest_ok_sym == value) {
|
405
|
+
hint->overlay = NestOverlay;
|
406
|
+
} else if (off_sym == value) {
|
407
|
+
hint->overlay = OffOverlay;
|
408
|
+
} else if (abort_sym == value) {
|
409
|
+
hint->overlay = AbortOverlay;
|
410
|
+
}
|
396
411
|
}
|
397
412
|
return ST_CONTINUE;
|
398
413
|
}
|
@@ -410,8 +425,7 @@ set_overlay(VALUE key, VALUE value, VALUE ctx) {
|
|
410
425
|
*
|
411
426
|
* *return* [Hash] default SAX HTML settings
|
412
427
|
*/
|
413
|
-
static VALUE
|
414
|
-
sax_html_overlay(VALUE self) {
|
428
|
+
static VALUE sax_html_overlay(VALUE self) {
|
415
429
|
return hints_to_overlay(ox_hints_html());
|
416
430
|
}
|
417
431
|
|
@@ -435,8 +449,10 @@ sax_html_overlay(VALUE self) {
|
|
435
449
|
* - _:attr_key_mod_ [Proc|nil] converts attribute keys on parse if not nil
|
436
450
|
* - _:skip_ [:skip_none|:skip_return|:skip_white|:skip_off] determines how to handle white space in text
|
437
451
|
* - _:smart_ [true|false|nil] flag indicating the SAX parser uses hints if available (use with html)
|
438
|
-
* - _:invalid_replace_ [nil|String] replacement string for invalid XML characters on dump. nil indicates include
|
439
|
-
*
|
452
|
+
* - _:invalid_replace_ [nil|String] replacement string for invalid XML characters on dump. nil indicates include
|
453
|
+
* anyway as hex. A string, limited to 10 characters will replace the invalid character with the replace.
|
454
|
+
* - _:strip_namespace_ [nil|String|true|false] "" or false result in no namespace stripping. A string of "*" or true
|
455
|
+
* will strip all namespaces. Any other non-empty string indicates that matching namespaces will be stripped.
|
440
456
|
* - _:with_cdata_ [true|false] includes cdata in hash_load results
|
441
457
|
* - _:overlay_ [Hash] a Hash of keys that match html element names and values that are one of
|
442
458
|
* - _:active_ - make the normal callback for the element
|
@@ -448,201 +464,199 @@ sax_html_overlay(VALUE self) {
|
|
448
464
|
*
|
449
465
|
* *return* [nil]
|
450
466
|
*/
|
451
|
-
static VALUE
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
};
|
463
|
-
YesNoOpt o;
|
464
|
-
VALUE v;
|
467
|
+
static VALUE set_def_opts(VALUE self, VALUE opts) {
|
468
|
+
struct _yesNoOpt ynos[] = {{with_xml_sym, &ox_default_options.with_xml},
|
469
|
+
{with_dtd_sym, &ox_default_options.with_dtd},
|
470
|
+
{with_instruct_sym, &ox_default_options.with_instruct},
|
471
|
+
{xsd_date_sym, &ox_default_options.xsd_date},
|
472
|
+
{circular_sym, &ox_default_options.circular},
|
473
|
+
{symbolize_keys_sym, &ox_default_options.sym_keys},
|
474
|
+
{smart_sym, &ox_default_options.smart},
|
475
|
+
{Qnil, 0}};
|
476
|
+
YesNoOpt o;
|
477
|
+
VALUE v;
|
465
478
|
|
466
479
|
Check_Type(opts, T_HASH);
|
467
480
|
|
468
481
|
v = rb_hash_aref(opts, ox_encoding_sym);
|
469
482
|
if (Qnil == v) {
|
470
|
-
|
483
|
+
*ox_default_options.encoding = '\0';
|
471
484
|
} else {
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
ox_default_options.rb_enc = rb_enc_find(ox_default_options.encoding);
|
476
|
-
#endif
|
485
|
+
Check_Type(v, T_STRING);
|
486
|
+
strncpy(ox_default_options.encoding, StringValuePtr(v), sizeof(ox_default_options.encoding) - 1);
|
487
|
+
ox_default_options.rb_enc = rb_enc_find(ox_default_options.encoding);
|
477
488
|
}
|
478
489
|
|
479
490
|
v = rb_hash_aref(opts, ox_indent_sym);
|
480
491
|
if (Qnil != v) {
|
481
|
-
|
482
|
-
|
492
|
+
Check_Type(v, T_FIXNUM);
|
493
|
+
ox_default_options.indent = FIX2INT(v);
|
483
494
|
}
|
484
495
|
|
485
496
|
v = rb_hash_aref(opts, trace_sym);
|
486
497
|
if (Qnil != v) {
|
487
|
-
|
488
|
-
|
498
|
+
Check_Type(v, T_FIXNUM);
|
499
|
+
ox_default_options.trace = FIX2INT(v);
|
489
500
|
}
|
490
501
|
|
491
502
|
v = rb_hash_aref(opts, mode_sym);
|
492
503
|
if (Qnil == v) {
|
493
|
-
|
504
|
+
ox_default_options.mode = NoMode;
|
494
505
|
} else if (object_sym == v) {
|
495
|
-
|
506
|
+
ox_default_options.mode = ObjMode;
|
496
507
|
} else if (generic_sym == v) {
|
497
|
-
|
508
|
+
ox_default_options.mode = GenMode;
|
498
509
|
} else if (limited_sym == v) {
|
499
|
-
|
510
|
+
ox_default_options.mode = LimMode;
|
500
511
|
} else if (hash_sym == v) {
|
501
|
-
|
512
|
+
ox_default_options.mode = HashMode;
|
502
513
|
} else if (hash_no_attrs_sym == v) {
|
503
|
-
|
514
|
+
ox_default_options.mode = HashNoAttrMode;
|
504
515
|
} else {
|
505
|
-
|
516
|
+
rb_raise(ox_parse_error_class, ":mode must be :object, :generic, :limited, :hash, :hash_no_attrs, or nil.\n");
|
506
517
|
}
|
507
518
|
|
508
519
|
v = rb_hash_aref(opts, effort_sym);
|
509
520
|
if (Qnil == v) {
|
510
|
-
|
521
|
+
ox_default_options.effort = NoEffort;
|
511
522
|
} else if (strict_sym == v) {
|
512
|
-
|
523
|
+
ox_default_options.effort = StrictEffort;
|
513
524
|
} else if (tolerant_sym == v) {
|
514
|
-
|
525
|
+
ox_default_options.effort = TolerantEffort;
|
515
526
|
} else if (auto_define_sym == v) {
|
516
|
-
|
527
|
+
ox_default_options.effort = AutoEffort;
|
517
528
|
} else {
|
518
|
-
|
529
|
+
rb_raise(ox_parse_error_class, ":effort must be :strict, :tolerant, :auto_define, or nil.\n");
|
519
530
|
}
|
520
531
|
|
521
532
|
v = rb_hash_aref(opts, skip_sym);
|
522
533
|
if (Qnil == v) {
|
523
|
-
|
534
|
+
ox_default_options.skip = NoSkip;
|
524
535
|
} else if (skip_off_sym == v) {
|
525
|
-
|
536
|
+
ox_default_options.skip = OffSkip;
|
526
537
|
} else if (skip_none_sym == v) {
|
527
|
-
|
538
|
+
ox_default_options.skip = NoSkip;
|
528
539
|
} else if (skip_return_sym == v) {
|
529
|
-
|
540
|
+
ox_default_options.skip = CrSkip;
|
530
541
|
} else if (skip_white_sym == v) {
|
531
|
-
|
542
|
+
ox_default_options.skip = SpcSkip;
|
532
543
|
} else {
|
533
|
-
|
544
|
+
rb_raise(ox_parse_error_class, ":skip must be :skip_none, :skip_return, :skip_white, :skip_off, or nil.\n");
|
534
545
|
}
|
535
546
|
|
536
547
|
v = rb_hash_lookup(opts, convert_special_sym);
|
537
548
|
if (Qnil == v) {
|
538
|
-
|
549
|
+
// no change
|
539
550
|
} else if (Qtrue == v) {
|
540
|
-
|
551
|
+
ox_default_options.convert_special = 1;
|
541
552
|
} else if (Qfalse == v) {
|
542
|
-
|
553
|
+
ox_default_options.convert_special = 0;
|
543
554
|
} else {
|
544
|
-
|
555
|
+
rb_raise(ox_parse_error_class, ":convert_special must be true or false.\n");
|
545
556
|
}
|
546
557
|
|
547
558
|
v = rb_hash_lookup(opts, no_empty_sym);
|
548
559
|
if (Qnil == v) {
|
549
|
-
|
560
|
+
// no change
|
550
561
|
} else if (Qtrue == v) {
|
551
|
-
|
562
|
+
ox_default_options.no_empty = 1;
|
552
563
|
} else if (Qfalse == v) {
|
553
|
-
|
564
|
+
ox_default_options.no_empty = 0;
|
554
565
|
} else {
|
555
|
-
|
566
|
+
rb_raise(ox_parse_error_class, ":no_empty must be true or false.\n");
|
556
567
|
}
|
557
568
|
|
558
569
|
v = rb_hash_aref(opts, invalid_replace_sym);
|
559
570
|
if (Qnil == v) {
|
560
|
-
|
571
|
+
ox_default_options.allow_invalid = Yes;
|
561
572
|
} else {
|
562
|
-
|
563
|
-
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
|
568
|
-
|
569
|
-
|
570
|
-
|
571
|
-
|
572
|
-
|
573
|
-
|
573
|
+
long slen;
|
574
|
+
|
575
|
+
Check_Type(v, T_STRING);
|
576
|
+
slen = RSTRING_LEN(v);
|
577
|
+
if (sizeof(ox_default_options.inv_repl) - 2 < (size_t)slen) {
|
578
|
+
rb_raise(ox_parse_error_class,
|
579
|
+
":invalid_replace can be no longer than %d characters.",
|
580
|
+
(int)sizeof(ox_default_options.inv_repl) - 2);
|
581
|
+
}
|
582
|
+
strncpy(ox_default_options.inv_repl + 1, StringValuePtr(v), sizeof(ox_default_options.inv_repl) - 1);
|
583
|
+
ox_default_options.inv_repl[sizeof(ox_default_options.inv_repl) - 1] = '\0';
|
584
|
+
*ox_default_options.inv_repl = (char)slen;
|
585
|
+
ox_default_options.allow_invalid = No;
|
574
586
|
}
|
575
587
|
|
576
588
|
v = rb_hash_aref(opts, strip_namespace_sym);
|
577
589
|
if (Qfalse == v) {
|
578
|
-
|
590
|
+
*ox_default_options.strip_ns = '\0';
|
579
591
|
} else if (Qtrue == v) {
|
580
|
-
|
581
|
-
|
592
|
+
*ox_default_options.strip_ns = '*';
|
593
|
+
ox_default_options.strip_ns[1] = '\0';
|
582
594
|
} else if (Qnil != v) {
|
583
|
-
|
584
|
-
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
595
|
+
long slen;
|
596
|
+
|
597
|
+
Check_Type(v, T_STRING);
|
598
|
+
slen = RSTRING_LEN(v);
|
599
|
+
if (sizeof(ox_default_options.strip_ns) - 1 < (size_t)slen) {
|
600
|
+
rb_raise(ox_parse_error_class,
|
601
|
+
":strip_namespace can be no longer than %d characters.",
|
602
|
+
(int)sizeof(ox_default_options.strip_ns) - 1);
|
603
|
+
}
|
604
|
+
strncpy(ox_default_options.strip_ns, StringValuePtr(v), sizeof(ox_default_options.strip_ns) - 1);
|
605
|
+
ox_default_options.strip_ns[sizeof(ox_default_options.strip_ns) - 1] = '\0';
|
593
606
|
}
|
594
607
|
|
595
608
|
v = rb_hash_aref(opts, margin_sym);
|
596
609
|
if (Qnil != v) {
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
|
607
|
-
|
610
|
+
long slen;
|
611
|
+
|
612
|
+
Check_Type(v, T_STRING);
|
613
|
+
slen = RSTRING_LEN(v);
|
614
|
+
if (sizeof(ox_default_options.margin) - 1 < (size_t)slen) {
|
615
|
+
rb_raise(ox_parse_error_class,
|
616
|
+
":margin can be no longer than %d characters.",
|
617
|
+
(int)sizeof(ox_default_options.margin) - 1);
|
618
|
+
}
|
619
|
+
strncpy(ox_default_options.margin, StringValuePtr(v), sizeof(ox_default_options.margin) - 1);
|
620
|
+
ox_default_options.margin[sizeof(ox_default_options.margin) - 1] = '\0';
|
621
|
+
ox_default_options.margin_len = strlen(ox_default_options.margin);
|
608
622
|
}
|
609
623
|
|
610
624
|
for (o = ynos; 0 != o->attr; o++) {
|
611
|
-
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
625
|
+
v = rb_hash_lookup(opts, o->sym);
|
626
|
+
if (Qnil == v) {
|
627
|
+
*o->attr = NotSet;
|
628
|
+
} else if (Qtrue == v) {
|
629
|
+
*o->attr = Yes;
|
630
|
+
} else if (Qfalse == v) {
|
631
|
+
*o->attr = No;
|
632
|
+
} else {
|
633
|
+
rb_raise(ox_parse_error_class, "%s must be true or false.\n", rb_id2name(SYM2ID(o->sym)));
|
634
|
+
}
|
621
635
|
}
|
622
636
|
v = rb_hash_aref(opts, overlay_sym);
|
623
637
|
if (Qnil == v) {
|
624
|
-
|
625
|
-
|
638
|
+
ox_hints_destroy(ox_default_options.html_hints);
|
639
|
+
ox_default_options.html_hints = NULL;
|
626
640
|
} else {
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
641
|
+
int cnt;
|
642
|
+
|
643
|
+
Check_Type(v, T_HASH);
|
644
|
+
cnt = (int)RHASH_SIZE(v);
|
645
|
+
if (0 == cnt) {
|
646
|
+
ox_hints_destroy(ox_default_options.html_hints);
|
647
|
+
ox_default_options.html_hints = NULL;
|
648
|
+
} else {
|
649
|
+
ox_hints_destroy(ox_default_options.html_hints);
|
650
|
+
ox_default_options.html_hints = ox_hints_dup(ox_hints_html());
|
651
|
+
rb_hash_foreach(v, set_overlay, (VALUE)ox_default_options.html_hints);
|
652
|
+
}
|
639
653
|
}
|
640
654
|
if (Qnil != (v = rb_hash_lookup(opts, with_cdata_sym))) {
|
641
|
-
|
655
|
+
ox_default_options.with_cdata = (Qtrue == v);
|
642
656
|
}
|
643
657
|
|
644
658
|
ox_default_options.element_key_mod = rb_hash_lookup2(opts, element_key_mod_sym, ox_default_options.element_key_mod);
|
645
|
-
ox_default_options.attr_key_mod
|
659
|
+
ox_default_options.attr_key_mod = rb_hash_lookup2(opts, attr_key_mod_sym, ox_default_options.attr_key_mod);
|
646
660
|
|
647
661
|
return Qnil;
|
648
662
|
}
|
@@ -657,23 +671,22 @@ set_def_opts(VALUE self, VALUE opts) {
|
|
657
671
|
* - +xml+ [String] XML String in optimized Object format.
|
658
672
|
* *return* [Object] deserialized Object.
|
659
673
|
*/
|
660
|
-
static VALUE
|
661
|
-
|
662
|
-
|
663
|
-
|
664
|
-
|
665
|
-
struct
|
666
|
-
struct _err err;
|
674
|
+
static VALUE to_obj(VALUE self, VALUE ruby_xml) {
|
675
|
+
char *xml, *x;
|
676
|
+
size_t len;
|
677
|
+
VALUE obj;
|
678
|
+
struct _options options = ox_default_options;
|
679
|
+
struct _err err;
|
667
680
|
|
668
681
|
err_init(&err);
|
669
682
|
Check_Type(ruby_xml, T_STRING);
|
670
683
|
/* the xml string gets modified so make a copy of it */
|
671
684
|
len = RSTRING_LEN(ruby_xml) + 1;
|
672
|
-
x
|
685
|
+
x = defuse_bom(StringValuePtr(ruby_xml), &options);
|
673
686
|
if (SMALL_XML < len) {
|
674
|
-
|
687
|
+
xml = ALLOC_N(char, len);
|
675
688
|
} else {
|
676
|
-
|
689
|
+
xml = ALLOCA_N(char, len);
|
677
690
|
}
|
678
691
|
memcpy(xml, x, len);
|
679
692
|
#ifdef RB_GC_GUARD
|
@@ -681,14 +694,14 @@ to_obj(VALUE self, VALUE ruby_xml) {
|
|
681
694
|
#endif
|
682
695
|
obj = ox_parse(xml, len - 1, ox_obj_callbacks, 0, &options, &err);
|
683
696
|
if (SMALL_XML < len) {
|
684
|
-
|
697
|
+
xfree(xml);
|
685
698
|
}
|
686
699
|
#ifdef RB_GC_GUARD
|
687
700
|
RB_GC_GUARD(obj);
|
688
701
|
rb_gc_enable();
|
689
702
|
#endif
|
690
703
|
if (err_has(&err)) {
|
691
|
-
|
704
|
+
ox_err_raise(&err);
|
692
705
|
}
|
693
706
|
return obj;
|
694
707
|
}
|
@@ -701,207 +714,198 @@ to_obj(VALUE self, VALUE ruby_xml) {
|
|
701
714
|
*
|
702
715
|
* _raise_ [Exception] if the XML is malformed.
|
703
716
|
*/
|
704
|
-
static VALUE
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
struct
|
710
|
-
struct _err err;
|
717
|
+
static VALUE to_gen(VALUE self, VALUE ruby_xml) {
|
718
|
+
char *xml, *x;
|
719
|
+
size_t len;
|
720
|
+
VALUE obj;
|
721
|
+
struct _options options = ox_default_options;
|
722
|
+
struct _err err;
|
711
723
|
|
712
724
|
err_init(&err);
|
713
725
|
Check_Type(ruby_xml, T_STRING);
|
714
726
|
/* the xml string gets modified so make a copy of it */
|
715
727
|
len = RSTRING_LEN(ruby_xml) + 1;
|
716
|
-
x
|
728
|
+
x = defuse_bom(StringValuePtr(ruby_xml), &options);
|
717
729
|
if (SMALL_XML < len) {
|
718
|
-
|
730
|
+
xml = ALLOC_N(char, len);
|
719
731
|
} else {
|
720
|
-
|
732
|
+
xml = ALLOCA_N(char, len);
|
721
733
|
}
|
722
734
|
memcpy(xml, x, len);
|
723
735
|
obj = ox_parse(xml, len - 1, ox_gen_callbacks, 0, &options, &err);
|
724
736
|
if (SMALL_XML < len) {
|
725
|
-
|
737
|
+
xfree(xml);
|
726
738
|
}
|
727
739
|
if (err_has(&err)) {
|
728
|
-
|
740
|
+
ox_err_raise(&err);
|
729
741
|
}
|
730
742
|
return obj;
|
731
743
|
}
|
732
744
|
|
733
|
-
static VALUE
|
734
|
-
|
735
|
-
|
736
|
-
struct _options options = ox_default_options;
|
745
|
+
static VALUE load(char *xml, size_t len, int argc, VALUE *argv, VALUE self, VALUE encoding, Err err) {
|
746
|
+
VALUE obj;
|
747
|
+
struct _options options = ox_default_options;
|
737
748
|
|
738
749
|
if (1 == argc && rb_cHash == rb_obj_class(*argv)) {
|
739
|
-
|
740
|
-
|
741
|
-
|
742
|
-
|
743
|
-
|
744
|
-
|
745
|
-
|
746
|
-
|
747
|
-
|
748
|
-
|
749
|
-
|
750
|
-
|
751
|
-
|
752
|
-
|
753
|
-
|
754
|
-
|
755
|
-
|
756
|
-
|
757
|
-
|
758
|
-
|
759
|
-
|
760
|
-
|
761
|
-
|
762
|
-
|
763
|
-
|
764
|
-
|
765
|
-
|
766
|
-
|
767
|
-
|
768
|
-
|
769
|
-
|
770
|
-
|
771
|
-
|
772
|
-
|
773
|
-
|
774
|
-
|
775
|
-
|
776
|
-
|
777
|
-
|
778
|
-
|
779
|
-
|
780
|
-
|
781
|
-
|
782
|
-
|
783
|
-
|
784
|
-
|
785
|
-
|
786
|
-
|
787
|
-
|
788
|
-
|
789
|
-
|
790
|
-
|
791
|
-
|
792
|
-
|
793
|
-
|
794
|
-
|
795
|
-
|
796
|
-
|
797
|
-
|
798
|
-
|
799
|
-
|
800
|
-
|
801
|
-
|
802
|
-
|
803
|
-
|
804
|
-
|
805
|
-
|
806
|
-
|
807
|
-
|
808
|
-
|
809
|
-
|
810
|
-
|
811
|
-
|
812
|
-
|
813
|
-
|
814
|
-
|
815
|
-
|
816
|
-
|
817
|
-
|
818
|
-
|
819
|
-
|
820
|
-
|
821
|
-
|
822
|
-
|
823
|
-
|
824
|
-
|
825
|
-
|
826
|
-
|
827
|
-
|
828
|
-
|
829
|
-
|
830
|
-
|
831
|
-
|
832
|
-
|
833
|
-
|
834
|
-
|
835
|
-
|
836
|
-
|
837
|
-
|
838
|
-
|
839
|
-
|
840
|
-
|
841
|
-
|
842
|
-
|
843
|
-
|
844
|
-
|
845
|
-
|
846
|
-
|
847
|
-
|
848
|
-
|
849
|
-
|
850
|
-
|
851
|
-
|
852
|
-
|
853
|
-
|
854
|
-
|
750
|
+
VALUE h = *argv;
|
751
|
+
VALUE v;
|
752
|
+
|
753
|
+
if (Qnil != (v = rb_hash_lookup(h, mode_sym))) {
|
754
|
+
if (object_sym == v) {
|
755
|
+
options.mode = ObjMode;
|
756
|
+
} else if (optimized_sym == v) {
|
757
|
+
options.mode = ObjMode;
|
758
|
+
} else if (generic_sym == v) {
|
759
|
+
options.mode = GenMode;
|
760
|
+
} else if (limited_sym == v) {
|
761
|
+
options.mode = LimMode;
|
762
|
+
} else if (hash_sym == v) {
|
763
|
+
options.mode = HashMode;
|
764
|
+
} else if (hash_no_attrs_sym == v) {
|
765
|
+
options.mode = HashNoAttrMode;
|
766
|
+
} else {
|
767
|
+
rb_raise(ox_parse_error_class, ":mode must be :generic, :object, :limited, :hash, :hash_no_attrs.\n");
|
768
|
+
}
|
769
|
+
}
|
770
|
+
if (Qnil != (v = rb_hash_lookup(h, effort_sym))) {
|
771
|
+
if (auto_define_sym == v) {
|
772
|
+
options.effort = AutoEffort;
|
773
|
+
} else if (tolerant_sym == v) {
|
774
|
+
options.effort = TolerantEffort;
|
775
|
+
} else if (strict_sym == v) {
|
776
|
+
options.effort = StrictEffort;
|
777
|
+
} else {
|
778
|
+
rb_raise(ox_parse_error_class, ":effort must be :strict, :tolerant, or :auto_define.\n");
|
779
|
+
}
|
780
|
+
}
|
781
|
+
if (Qnil != (v = rb_hash_lookup(h, skip_sym))) {
|
782
|
+
if (skip_none_sym == v) {
|
783
|
+
options.skip = NoSkip;
|
784
|
+
} else if (skip_off_sym == v) {
|
785
|
+
options.skip = OffSkip;
|
786
|
+
} else if (skip_return_sym == v) {
|
787
|
+
options.skip = CrSkip;
|
788
|
+
} else if (skip_white_sym == v) {
|
789
|
+
options.skip = SpcSkip;
|
790
|
+
} else {
|
791
|
+
rb_raise(ox_parse_error_class, ":skip must be :skip_none, :skip_return, :skip_white, or :skip_off.\n");
|
792
|
+
}
|
793
|
+
}
|
794
|
+
|
795
|
+
if (Qnil != (v = rb_hash_lookup(h, trace_sym))) {
|
796
|
+
Check_Type(v, T_FIXNUM);
|
797
|
+
options.trace = FIX2INT(v);
|
798
|
+
}
|
799
|
+
if (Qnil != (v = rb_hash_lookup(h, symbolize_keys_sym))) {
|
800
|
+
options.sym_keys = (Qfalse == v) ? No : Yes;
|
801
|
+
}
|
802
|
+
options.element_key_mod = rb_hash_lookup2(h, element_key_mod_sym, options.element_key_mod);
|
803
|
+
options.attr_key_mod = rb_hash_lookup2(h, attr_key_mod_sym, options.attr_key_mod);
|
804
|
+
|
805
|
+
if (Qnil != (v = rb_hash_lookup(h, convert_special_sym))) {
|
806
|
+
options.convert_special = (Qfalse != v);
|
807
|
+
}
|
808
|
+
if (Qnil != (v = rb_hash_lookup(h, no_empty_sym))) {
|
809
|
+
options.no_empty = (Qfalse != v);
|
810
|
+
}
|
811
|
+
|
812
|
+
v = rb_hash_lookup(h, invalid_replace_sym);
|
813
|
+
if (Qnil == v) {
|
814
|
+
if (Qtrue == rb_funcall(h, has_key_id, 1, invalid_replace_sym)) {
|
815
|
+
options.allow_invalid = Yes;
|
816
|
+
}
|
817
|
+
} else {
|
818
|
+
long slen;
|
819
|
+
|
820
|
+
Check_Type(v, T_STRING);
|
821
|
+
slen = RSTRING_LEN(v);
|
822
|
+
if (sizeof(options.inv_repl) - 2 < (size_t)slen) {
|
823
|
+
rb_raise(ox_parse_error_class,
|
824
|
+
":invalid_replace can be no longer than %d characters.",
|
825
|
+
(int)sizeof(options.inv_repl) - 2);
|
826
|
+
}
|
827
|
+
strncpy(options.inv_repl + 1, StringValuePtr(v), sizeof(options.inv_repl) - 1);
|
828
|
+
options.inv_repl[sizeof(options.inv_repl) - 1] = '\0';
|
829
|
+
*options.inv_repl = (char)slen;
|
830
|
+
options.allow_invalid = No;
|
831
|
+
}
|
832
|
+
v = rb_hash_lookup(h, strip_namespace_sym);
|
833
|
+
if (Qfalse == v) {
|
834
|
+
*options.strip_ns = '\0';
|
835
|
+
} else if (Qtrue == v) {
|
836
|
+
*options.strip_ns = '*';
|
837
|
+
options.strip_ns[1] = '\0';
|
838
|
+
} else if (Qnil != v) {
|
839
|
+
long slen;
|
840
|
+
|
841
|
+
Check_Type(v, T_STRING);
|
842
|
+
slen = RSTRING_LEN(v);
|
843
|
+
if (sizeof(options.strip_ns) - 1 < (size_t)slen) {
|
844
|
+
rb_raise(ox_parse_error_class,
|
845
|
+
":strip_namespace can be no longer than %d characters.",
|
846
|
+
(int)sizeof(options.strip_ns) - 1);
|
847
|
+
}
|
848
|
+
strncpy(options.strip_ns, StringValuePtr(v), sizeof(options.strip_ns) - 1);
|
849
|
+
options.strip_ns[sizeof(options.strip_ns) - 1] = '\0';
|
850
|
+
}
|
851
|
+
v = rb_hash_lookup(h, margin_sym);
|
852
|
+
if (Qnil != v) {
|
853
|
+
long slen;
|
854
|
+
|
855
|
+
Check_Type(v, T_STRING);
|
856
|
+
slen = RSTRING_LEN(v);
|
857
|
+
if (sizeof(options.margin) - 1 < (size_t)slen) {
|
858
|
+
rb_raise(ox_parse_error_class,
|
859
|
+
":margin can be no longer than %d characters.",
|
860
|
+
(int)sizeof(options.margin) - 1);
|
861
|
+
}
|
862
|
+
strncpy(options.margin, StringValuePtr(v), sizeof(options.margin) - 1);
|
863
|
+
options.margin[sizeof(options.margin) - 1] = '\0';
|
864
|
+
options.margin_len = strlen(options.margin);
|
865
|
+
}
|
866
|
+
if (Qnil != (v = rb_hash_lookup(h, with_cdata_sym))) {
|
867
|
+
options.with_cdata = (Qtrue == v);
|
868
|
+
}
|
855
869
|
}
|
856
|
-
#if HAVE_RB_ENC_FIND
|
857
870
|
if ('\0' == *options.encoding) {
|
858
|
-
|
859
|
-
|
860
|
-
|
861
|
-
|
862
|
-
|
871
|
+
if (Qnil != encoding) {
|
872
|
+
options.rb_enc = rb_enc_from_index(rb_enc_get_index(encoding));
|
873
|
+
} else {
|
874
|
+
options.rb_enc = 0;
|
875
|
+
}
|
863
876
|
} else if (0 == options.rb_enc) {
|
864
|
-
|
877
|
+
options.rb_enc = rb_enc_find(options.encoding);
|
865
878
|
}
|
866
|
-
#endif
|
867
879
|
xml = defuse_bom(xml, &options);
|
868
880
|
switch (options.mode) {
|
869
881
|
case ObjMode:
|
870
882
|
#ifdef RB_GC_GUARD
|
871
|
-
|
883
|
+
rb_gc_disable();
|
872
884
|
#endif
|
873
|
-
|
885
|
+
obj = ox_parse(xml, len, ox_obj_callbacks, 0, &options, err);
|
874
886
|
#ifdef RB_GC_GUARD
|
875
|
-
|
876
|
-
|
887
|
+
RB_GC_GUARD(obj);
|
888
|
+
rb_gc_enable();
|
877
889
|
#endif
|
878
|
-
|
879
|
-
case GenMode:
|
880
|
-
|
881
|
-
break;
|
882
|
-
case LimMode:
|
883
|
-
obj = ox_parse(xml, len, ox_limited_callbacks, 0, &options, err);
|
884
|
-
break;
|
890
|
+
break;
|
891
|
+
case GenMode: obj = ox_parse(xml, len, ox_gen_callbacks, 0, &options, err); break;
|
892
|
+
case LimMode: obj = ox_parse(xml, len, ox_limited_callbacks, 0, &options, err); break;
|
885
893
|
case HashMode:
|
886
|
-
|
887
|
-
|
888
|
-
|
889
|
-
|
890
|
-
|
891
|
-
|
894
|
+
if (options.with_cdata) {
|
895
|
+
obj = ox_parse(xml, len, ox_hash_cdata_callbacks, 0, &options, err);
|
896
|
+
} else {
|
897
|
+
obj = ox_parse(xml, len, ox_hash_callbacks, 0, &options, err);
|
898
|
+
}
|
899
|
+
break;
|
892
900
|
case HashNoAttrMode:
|
893
|
-
|
894
|
-
|
895
|
-
|
896
|
-
|
897
|
-
|
898
|
-
|
899
|
-
case NoMode:
|
900
|
-
|
901
|
-
break;
|
902
|
-
default:
|
903
|
-
obj = ox_parse(xml, len, ox_gen_callbacks, 0, &options, err);
|
904
|
-
break;
|
901
|
+
if (options.with_cdata) {
|
902
|
+
obj = ox_parse(xml, len, ox_hash_no_attrs_cdata_callbacks, 0, &options, err);
|
903
|
+
} else {
|
904
|
+
obj = ox_parse(xml, len, ox_hash_no_attrs_callbacks, 0, &options, err);
|
905
|
+
}
|
906
|
+
break;
|
907
|
+
case NoMode: obj = ox_parse(xml, len, ox_nomode_callbacks, 0, &options, err); break;
|
908
|
+
default: obj = ox_parse(xml, len, ox_gen_callbacks, 0, &options, err); break;
|
905
909
|
}
|
906
910
|
return obj;
|
907
911
|
}
|
@@ -928,26 +932,27 @@ load(char *xml, size_t len, int argc, VALUE *argv, VALUE self, VALUE encoding, E
|
|
928
932
|
* - _:auto_define_ - auto define missing classes and modules
|
929
933
|
* - *:trace* [Fixnum] trace level as a Fixnum, default: 0 (silent)
|
930
934
|
* - *:symbolize_keys* [true|false|nil] symbolize element attribute keys or leave as Strings
|
931
|
-
* - *:invalid_replace* [nil|String] replacement string for invalid XML characters on dump. nil indicates include
|
932
|
-
*
|
935
|
+
* - *:invalid_replace* [nil|String] replacement string for invalid XML characters on dump. nil indicates include
|
936
|
+
* anyway as hex. A string, limited to 10 characters will replace the invalid character with the replace.
|
937
|
+
* - *:strip_namespace* [String|true|false] "" or false result in no namespace stripping. A string of "*" or true will
|
938
|
+
* strip all namespaces. Any other non-empty string indicates that matching namespaces will be stripped.
|
933
939
|
* - *:with_cdata* [true|false] if true cdata is included in hash_load output otherwise it is not.
|
934
940
|
*/
|
935
|
-
static VALUE
|
936
|
-
|
937
|
-
|
938
|
-
|
939
|
-
VALUE
|
940
|
-
|
941
|
-
struct _err err;
|
941
|
+
static VALUE load_str(int argc, VALUE *argv, VALUE self) {
|
942
|
+
char *xml;
|
943
|
+
size_t len;
|
944
|
+
VALUE obj;
|
945
|
+
VALUE encoding;
|
946
|
+
struct _err err;
|
942
947
|
|
943
948
|
err_init(&err);
|
944
949
|
Check_Type(*argv, T_STRING);
|
945
950
|
/* the xml string gets modified so make a copy of it */
|
946
951
|
len = RSTRING_LEN(*argv) + 1;
|
947
952
|
if (SMALL_XML < len) {
|
948
|
-
|
953
|
+
xml = ALLOC_N(char, len);
|
949
954
|
} else {
|
950
|
-
|
955
|
+
xml = ALLOCA_N(char, len);
|
951
956
|
}
|
952
957
|
#if HAVE_RB_OBJ_ENCODING
|
953
958
|
encoding = rb_obj_encoding(*argv);
|
@@ -956,12 +961,12 @@ load_str(int argc, VALUE *argv, VALUE self) {
|
|
956
961
|
#endif
|
957
962
|
memcpy(xml, StringValuePtr(*argv), len);
|
958
963
|
xml[len - 1] = '\0';
|
959
|
-
obj
|
964
|
+
obj = load(xml, len - 1, argc - 1, argv + 1, self, encoding, &err);
|
960
965
|
if (SMALL_XML < len) {
|
961
|
-
|
966
|
+
xfree(xml);
|
962
967
|
}
|
963
968
|
if (err_has(&err)) {
|
964
|
-
|
969
|
+
ox_err_raise(&err);
|
965
970
|
}
|
966
971
|
return obj;
|
967
972
|
}
|
@@ -985,45 +990,46 @@ load_str(int argc, VALUE *argv, VALUE self) {
|
|
985
990
|
* - _:auto_define_ - auto define missing classes and modules
|
986
991
|
* - *:trace* [Fixnum] trace level as a Fixnum, default: 0 (silent)
|
987
992
|
* - *:symbolize_keys* [true|false|nil] symbolize element attribute keys or leave as Strings
|
988
|
-
* - *:invalid_replace* [nil|String] replacement string for invalid XML characters on dump. nil indicates include
|
989
|
-
*
|
993
|
+
* - *:invalid_replace* [nil|String] replacement string for invalid XML characters on dump. nil indicates include
|
994
|
+
* anyway as hex. A string, limited to 10 characters will replace the invalid character with the replace.
|
995
|
+
* - *:strip_namespace* [String|true|false] "" or false result in no namespace stripping. A string of "*" or true will
|
996
|
+
* strip all namespaces. Any other non-empty string indicates that matching namespaces will be stripped.
|
990
997
|
*/
|
991
|
-
static VALUE
|
992
|
-
|
993
|
-
char
|
994
|
-
|
995
|
-
|
996
|
-
|
997
|
-
|
998
|
-
struct _err err;
|
998
|
+
static VALUE load_file(int argc, VALUE *argv, VALUE self) {
|
999
|
+
char *path;
|
1000
|
+
char *xml;
|
1001
|
+
FILE *f;
|
1002
|
+
off_t len;
|
1003
|
+
VALUE obj;
|
1004
|
+
struct _err err;
|
999
1005
|
|
1000
1006
|
err_init(&err);
|
1001
1007
|
Check_Type(*argv, T_STRING);
|
1002
1008
|
path = StringValuePtr(*argv);
|
1003
1009
|
if (0 == (f = fopen(path, "r"))) {
|
1004
|
-
|
1010
|
+
rb_raise(rb_eIOError, "%s\n", strerror(errno));
|
1005
1011
|
}
|
1006
1012
|
fseek(f, 0, SEEK_END);
|
1007
1013
|
len = ftello(f);
|
1008
1014
|
if (SMALL_XML < len) {
|
1009
|
-
|
1015
|
+
xml = ALLOC_N(char, len + 1);
|
1010
1016
|
} else {
|
1011
|
-
|
1017
|
+
xml = ALLOCA_N(char, len + 1);
|
1012
1018
|
}
|
1013
1019
|
fseek(f, 0, SEEK_SET);
|
1014
1020
|
if ((size_t)len != fread(xml, 1, len, f)) {
|
1015
|
-
|
1016
|
-
|
1021
|
+
ox_err_set(&err, rb_eLoadError, "Failed to read %ld bytes from %s.\n", (long)len, path);
|
1022
|
+
obj = Qnil;
|
1017
1023
|
} else {
|
1018
|
-
|
1019
|
-
|
1024
|
+
xml[len] = '\0';
|
1025
|
+
obj = load(xml, len, argc - 1, argv + 1, self, Qnil, &err);
|
1020
1026
|
}
|
1021
1027
|
fclose(f);
|
1022
1028
|
if (SMALL_XML < len) {
|
1023
|
-
|
1029
|
+
xfree(xml);
|
1024
1030
|
}
|
1025
1031
|
if (err_has(&err)) {
|
1026
|
-
|
1032
|
+
ox_err_raise(&err);
|
1027
1033
|
}
|
1028
1034
|
return obj;
|
1029
1035
|
}
|
@@ -1038,66 +1044,68 @@ load_file(int argc, VALUE *argv, VALUE self) {
|
|
1038
1044
|
* - *:convert_special* [true|false] flag indicating special characters like < are converted
|
1039
1045
|
* - *:symbolize* [true|false] flag indicating the parser symbolize element and attribute names
|
1040
1046
|
* - *:smart* [true|false] flag indicating the parser uses hints if available (use with html)
|
1041
|
-
* - *:skip* [:skip_none|:skip_return|:skip_white|:skip_off] flag indicating the parser skips \\r or collpase white
|
1042
|
-
*
|
1047
|
+
* - *:skip* [:skip_none|:skip_return|:skip_white|:skip_off] flag indicating the parser skips \\r or collpase white
|
1048
|
+
* space into a single space. Default (skip space)
|
1049
|
+
* - *:strip_namespace* [nil|String|true|false] "" or false result in no namespace stripping. A string of "*" or true
|
1050
|
+
* will strip all namespaces. Any other non-empty string indicates that matching namespaces will be stripped.
|
1043
1051
|
*/
|
1044
|
-
static VALUE
|
1045
|
-
|
1046
|
-
struct _saxOptions options;
|
1052
|
+
static VALUE sax_parse(int argc, VALUE *argv, VALUE self) {
|
1053
|
+
struct _saxOptions options;
|
1047
1054
|
|
1048
|
-
options.symbolize
|
1055
|
+
options.symbolize = (No != ox_default_options.sym_keys);
|
1049
1056
|
options.convert_special = ox_default_options.convert_special;
|
1050
|
-
options.smart
|
1051
|
-
options.skip
|
1052
|
-
options.hints
|
1057
|
+
options.smart = (Yes == ox_default_options.smart);
|
1058
|
+
options.skip = ox_default_options.skip;
|
1059
|
+
options.hints = NULL;
|
1053
1060
|
strcpy(options.strip_ns, ox_default_options.strip_ns);
|
1054
1061
|
|
1055
1062
|
if (argc < 2) {
|
1056
|
-
|
1063
|
+
rb_raise(ox_parse_error_class, "Wrong number of arguments to sax_parse.\n");
|
1057
1064
|
}
|
1058
1065
|
if (3 <= argc && rb_cHash == rb_obj_class(argv[2])) {
|
1059
|
-
|
1060
|
-
|
1061
|
-
|
1062
|
-
|
1063
|
-
|
1064
|
-
|
1065
|
-
|
1066
|
-
|
1067
|
-
|
1068
|
-
|
1069
|
-
|
1070
|
-
|
1071
|
-
|
1072
|
-
|
1073
|
-
|
1074
|
-
|
1075
|
-
|
1076
|
-
|
1077
|
-
|
1078
|
-
|
1079
|
-
|
1080
|
-
|
1081
|
-
|
1082
|
-
|
1083
|
-
|
1084
|
-
|
1085
|
-
|
1086
|
-
|
1087
|
-
|
1088
|
-
|
1089
|
-
|
1090
|
-
|
1091
|
-
|
1092
|
-
|
1093
|
-
|
1094
|
-
|
1095
|
-
|
1096
|
-
|
1097
|
-
|
1098
|
-
|
1099
|
-
|
1100
|
-
|
1066
|
+
VALUE h = argv[2];
|
1067
|
+
VALUE v;
|
1068
|
+
|
1069
|
+
if (Qnil != (v = rb_hash_lookup(h, convert_special_sym))) {
|
1070
|
+
options.convert_special = (Qtrue == v);
|
1071
|
+
}
|
1072
|
+
if (Qnil != (v = rb_hash_lookup(h, smart_sym))) {
|
1073
|
+
options.smart = (Qtrue == v);
|
1074
|
+
}
|
1075
|
+
if (Qnil != (v = rb_hash_lookup(h, symbolize_sym))) {
|
1076
|
+
options.symbolize = (Qtrue == v);
|
1077
|
+
}
|
1078
|
+
if (Qnil != (v = rb_hash_lookup(h, skip_sym))) {
|
1079
|
+
if (skip_return_sym == v) {
|
1080
|
+
options.skip = CrSkip;
|
1081
|
+
} else if (skip_white_sym == v) {
|
1082
|
+
options.skip = SpcSkip;
|
1083
|
+
} else if (skip_none_sym == v) {
|
1084
|
+
options.skip = NoSkip;
|
1085
|
+
} else if (skip_off_sym == v) {
|
1086
|
+
options.skip = OffSkip;
|
1087
|
+
}
|
1088
|
+
}
|
1089
|
+
if (Qnil != (v = rb_hash_lookup(h, strip_namespace_sym))) {
|
1090
|
+
if (Qfalse == v) {
|
1091
|
+
*options.strip_ns = '\0';
|
1092
|
+
} else if (Qtrue == v) {
|
1093
|
+
*options.strip_ns = '*';
|
1094
|
+
options.strip_ns[1] = '\0';
|
1095
|
+
} else {
|
1096
|
+
long slen;
|
1097
|
+
|
1098
|
+
Check_Type(v, T_STRING);
|
1099
|
+
slen = RSTRING_LEN(v);
|
1100
|
+
if (sizeof(options.strip_ns) - 1 < (size_t)slen) {
|
1101
|
+
rb_raise(ox_parse_error_class,
|
1102
|
+
":strip_namespace can be no longer than %d characters.",
|
1103
|
+
(int)sizeof(options.strip_ns) - 1);
|
1104
|
+
}
|
1105
|
+
strncpy(options.strip_ns, StringValuePtr(v), sizeof(options.strip_ns) - 1);
|
1106
|
+
options.strip_ns[sizeof(options.strip_ns) - 1] = '\0';
|
1107
|
+
}
|
1108
|
+
}
|
1101
1109
|
}
|
1102
1110
|
ox_sax_parse(argv[0], argv[1], &options);
|
1103
1111
|
|
@@ -1113,7 +1121,8 @@ sax_parse(int argc, VALUE *argv, VALUE self) {
|
|
1113
1121
|
* - +options+ [Hash] options parse options
|
1114
1122
|
* - *:convert_special* [true|false] flag indicating special characters like < are converted
|
1115
1123
|
* - *:symbolize* [true|false] flag indicating the parser symbolize element and attribute names
|
1116
|
-
* - *:skip* [:skip_none|:skip_return|:skip_white|:skip_off] flag indicating the parser skips \\r or collapse white
|
1124
|
+
* - *:skip* [:skip_none|:skip_return|:skip_white|:skip_off] flag indicating the parser skips \\r or collapse white
|
1125
|
+
* space into a single space. Default (skip space)
|
1117
1126
|
* - *:overlay* [Hash] a Hash of keys that match html element names and values that are one of
|
1118
1127
|
* - _:active_ - make the normal callback for the element
|
1119
1128
|
* - _:nest_ok_ - active but ignore nest check
|
@@ -1122,168 +1131,166 @@ sax_parse(int argc, VALUE *argv, VALUE self) {
|
|
1122
1131
|
* - _:off_ - block this element and it's children unless the child element is active
|
1123
1132
|
* - _:abort_ - abort the html processing and return
|
1124
1133
|
*/
|
1125
|
-
static VALUE
|
1126
|
-
|
1127
|
-
|
1128
|
-
bool free_hints = false;
|
1134
|
+
static VALUE sax_html(int argc, VALUE *argv, VALUE self) {
|
1135
|
+
struct _saxOptions options;
|
1136
|
+
bool free_hints = false;
|
1129
1137
|
|
1130
|
-
options.symbolize
|
1138
|
+
options.symbolize = (No != ox_default_options.sym_keys);
|
1131
1139
|
options.convert_special = ox_default_options.convert_special;
|
1132
|
-
options.smart
|
1133
|
-
options.skip
|
1134
|
-
options.hints
|
1140
|
+
options.smart = true;
|
1141
|
+
options.skip = ox_default_options.skip;
|
1142
|
+
options.hints = ox_default_options.html_hints;
|
1135
1143
|
if (NULL == options.hints) {
|
1136
|
-
|
1144
|
+
options.hints = ox_hints_html();
|
1137
1145
|
}
|
1138
1146
|
*options.strip_ns = '\0';
|
1139
1147
|
|
1140
1148
|
if (argc < 2) {
|
1141
|
-
|
1149
|
+
rb_raise(ox_parse_error_class, "Wrong number of arguments to sax_html.\n");
|
1142
1150
|
}
|
1143
1151
|
if (3 <= argc && rb_cHash == rb_obj_class(argv[2])) {
|
1144
|
-
|
1145
|
-
|
1146
|
-
|
1147
|
-
|
1148
|
-
|
1149
|
-
|
1150
|
-
|
1151
|
-
|
1152
|
-
|
1153
|
-
|
1154
|
-
|
1155
|
-
|
1156
|
-
|
1157
|
-
|
1158
|
-
|
1159
|
-
|
1160
|
-
|
1161
|
-
|
1162
|
-
|
1163
|
-
|
1164
|
-
|
1165
|
-
|
1166
|
-
|
1167
|
-
|
1168
|
-
|
1169
|
-
|
1170
|
-
|
1171
|
-
|
1172
|
-
|
1173
|
-
|
1174
|
-
|
1175
|
-
|
1176
|
-
|
1152
|
+
volatile VALUE h = argv[2];
|
1153
|
+
volatile VALUE v;
|
1154
|
+
|
1155
|
+
if (Qnil != (v = rb_hash_lookup(h, convert_special_sym))) {
|
1156
|
+
options.convert_special = (Qtrue == v);
|
1157
|
+
}
|
1158
|
+
if (Qnil != (v = rb_hash_lookup(h, symbolize_sym))) {
|
1159
|
+
options.symbolize = (Qtrue == v);
|
1160
|
+
}
|
1161
|
+
if (Qnil != (v = rb_hash_lookup(h, skip_sym))) {
|
1162
|
+
if (skip_return_sym == v) {
|
1163
|
+
options.skip = CrSkip;
|
1164
|
+
} else if (skip_white_sym == v) {
|
1165
|
+
options.skip = SpcSkip;
|
1166
|
+
} else if (skip_none_sym == v) {
|
1167
|
+
options.skip = NoSkip;
|
1168
|
+
} else if (skip_off_sym == v) {
|
1169
|
+
options.skip = OffSkip;
|
1170
|
+
}
|
1171
|
+
}
|
1172
|
+
if (Qnil != (v = rb_hash_lookup(h, overlay_sym))) {
|
1173
|
+
int cnt;
|
1174
|
+
|
1175
|
+
Check_Type(v, T_HASH);
|
1176
|
+
cnt = (int)RHASH_SIZE(v);
|
1177
|
+
if (0 == cnt) {
|
1178
|
+
options.hints = ox_hints_html();
|
1179
|
+
} else {
|
1180
|
+
options.hints = ox_hints_dup(options.hints);
|
1181
|
+
free_hints = true;
|
1182
|
+
rb_hash_foreach(v, set_overlay, (VALUE)options.hints);
|
1183
|
+
}
|
1184
|
+
}
|
1177
1185
|
}
|
1178
1186
|
ox_sax_parse(argv[0], argv[1], &options);
|
1179
1187
|
if (free_hints) {
|
1180
|
-
|
1188
|
+
ox_hints_destroy(options.hints);
|
1181
1189
|
}
|
1182
1190
|
return Qnil;
|
1183
1191
|
}
|
1184
1192
|
|
1185
|
-
static void
|
1186
|
-
|
1187
|
-
|
1188
|
-
|
1189
|
-
|
1190
|
-
|
1191
|
-
|
1192
|
-
|
1193
|
-
{ Qnil, 0 }
|
1194
|
-
};
|
1195
|
-
YesNoOpt o;
|
1193
|
+
static void parse_dump_options(VALUE ropts, Options copts) {
|
1194
|
+
struct _yesNoOpt ynos[] = {{with_xml_sym, &copts->with_xml},
|
1195
|
+
{with_dtd_sym, &copts->with_dtd},
|
1196
|
+
{with_instruct_sym, &copts->with_instruct},
|
1197
|
+
{xsd_date_sym, &copts->xsd_date},
|
1198
|
+
{circular_sym, &copts->circular},
|
1199
|
+
{Qnil, 0}};
|
1200
|
+
YesNoOpt o;
|
1196
1201
|
|
1197
1202
|
if (rb_cHash == rb_obj_class(ropts)) {
|
1198
|
-
|
1203
|
+
VALUE v;
|
1199
1204
|
|
1200
|
-
|
1205
|
+
if (Qnil != (v = rb_hash_lookup(ropts, ox_indent_sym))) {
|
1201
1206
|
#ifdef RUBY_INTEGER_UNIFICATION
|
1202
|
-
|
1207
|
+
if (rb_cInteger != rb_obj_class(v) && T_FIXNUM != rb_type(v)) {
|
1203
1208
|
#else
|
1204
|
-
|
1209
|
+
if (rb_cFixnum != rb_obj_class(v)) {
|
1205
1210
|
#endif
|
1206
|
-
|
1207
|
-
|
1208
|
-
|
1209
|
-
|
1210
|
-
|
1211
|
+
rb_raise(ox_parse_error_class, ":indent must be a Fixnum.\n");
|
1212
|
+
}
|
1213
|
+
copts->indent = NUM2INT(v);
|
1214
|
+
}
|
1215
|
+
if (Qnil != (v = rb_hash_lookup(ropts, trace_sym))) {
|
1211
1216
|
#ifdef RUBY_INTEGER_UNIFICATION
|
1212
|
-
|
1217
|
+
if (rb_cInteger != rb_obj_class(v) && T_FIXNUM != rb_type(v)) {
|
1213
1218
|
#else
|
1214
|
-
|
1219
|
+
if (rb_cFixnum != rb_obj_class(v)) {
|
1215
1220
|
#endif
|
1216
|
-
|
1217
|
-
|
1218
|
-
|
1219
|
-
|
1220
|
-
|
1221
|
-
|
1222
|
-
|
1223
|
-
|
1224
|
-
|
1225
|
-
|
1226
|
-
|
1227
|
-
|
1228
|
-
|
1229
|
-
|
1230
|
-
|
1231
|
-
|
1232
|
-
|
1233
|
-
|
1234
|
-
|
1235
|
-
|
1236
|
-
|
1237
|
-
|
1238
|
-
|
1239
|
-
|
1240
|
-
|
1241
|
-
|
1242
|
-
|
1243
|
-
|
1244
|
-
|
1245
|
-
|
1246
|
-
|
1247
|
-
|
1248
|
-
|
1249
|
-
|
1250
|
-
|
1251
|
-
|
1252
|
-
|
1253
|
-
|
1254
|
-
|
1255
|
-
|
1256
|
-
|
1257
|
-
|
1258
|
-
|
1259
|
-
|
1260
|
-
|
1261
|
-
|
1262
|
-
|
1263
|
-
|
1264
|
-
|
1265
|
-
|
1266
|
-
|
1267
|
-
|
1268
|
-
|
1269
|
-
|
1270
|
-
|
1271
|
-
|
1272
|
-
|
1273
|
-
|
1274
|
-
|
1275
|
-
|
1276
|
-
|
1277
|
-
|
1278
|
-
|
1279
|
-
|
1280
|
-
|
1281
|
-
|
1282
|
-
|
1283
|
-
|
1284
|
-
|
1285
|
-
|
1286
|
-
|
1221
|
+
rb_raise(ox_parse_error_class, ":trace must be a Fixnum.\n");
|
1222
|
+
}
|
1223
|
+
copts->trace = NUM2INT(v);
|
1224
|
+
}
|
1225
|
+
if (Qnil != (v = rb_hash_lookup(ropts, ox_encoding_sym))) {
|
1226
|
+
if (rb_cString != rb_obj_class(v)) {
|
1227
|
+
rb_raise(ox_parse_error_class, ":encoding must be a String.\n");
|
1228
|
+
}
|
1229
|
+
strncpy(copts->encoding, StringValuePtr(v), sizeof(copts->encoding) - 1);
|
1230
|
+
}
|
1231
|
+
if (Qnil != (v = rb_hash_lookup(ropts, no_empty_sym))) {
|
1232
|
+
copts->no_empty = (v == Qtrue);
|
1233
|
+
}
|
1234
|
+
if (Qnil != (v = rb_hash_lookup(ropts, effort_sym))) {
|
1235
|
+
if (auto_define_sym == v) {
|
1236
|
+
copts->effort = AutoEffort;
|
1237
|
+
} else if (tolerant_sym == v) {
|
1238
|
+
copts->effort = TolerantEffort;
|
1239
|
+
} else if (strict_sym == v) {
|
1240
|
+
copts->effort = StrictEffort;
|
1241
|
+
} else {
|
1242
|
+
rb_raise(ox_parse_error_class, ":effort must be :strict, :tolerant, or :auto_define.\n");
|
1243
|
+
}
|
1244
|
+
}
|
1245
|
+
v = rb_hash_lookup(ropts, invalid_replace_sym);
|
1246
|
+
if (Qnil == v) {
|
1247
|
+
if (Qtrue == rb_funcall(ropts, has_key_id, 1, invalid_replace_sym)) {
|
1248
|
+
copts->allow_invalid = Yes;
|
1249
|
+
}
|
1250
|
+
} else {
|
1251
|
+
long slen;
|
1252
|
+
|
1253
|
+
Check_Type(v, T_STRING);
|
1254
|
+
slen = RSTRING_LEN(v);
|
1255
|
+
if (sizeof(copts->inv_repl) - 2 < (size_t)slen) {
|
1256
|
+
rb_raise(ox_parse_error_class,
|
1257
|
+
":invalid_replace can be no longer than %d characters.",
|
1258
|
+
(int)sizeof(copts->inv_repl) - 2);
|
1259
|
+
}
|
1260
|
+
strncpy(copts->inv_repl + 1, StringValuePtr(v), sizeof(copts->inv_repl) - 1);
|
1261
|
+
copts->inv_repl[sizeof(copts->inv_repl) - 1] = '\0';
|
1262
|
+
*copts->inv_repl = (char)slen;
|
1263
|
+
copts->allow_invalid = No;
|
1264
|
+
}
|
1265
|
+
v = rb_hash_lookup(ropts, margin_sym);
|
1266
|
+
if (Qnil != v) {
|
1267
|
+
long slen;
|
1268
|
+
|
1269
|
+
Check_Type(v, T_STRING);
|
1270
|
+
slen = RSTRING_LEN(v);
|
1271
|
+
if (sizeof(copts->margin) - 2 < (size_t)slen) {
|
1272
|
+
rb_raise(ox_parse_error_class,
|
1273
|
+
":margin can be no longer than %d characters.",
|
1274
|
+
(int)sizeof(copts->margin) - 2);
|
1275
|
+
}
|
1276
|
+
strncpy(copts->margin, StringValuePtr(v), sizeof(copts->margin) - 1);
|
1277
|
+
copts->margin[sizeof(copts->margin) - 1] = '\0';
|
1278
|
+
copts->margin_len = (char)slen;
|
1279
|
+
}
|
1280
|
+
|
1281
|
+
for (o = ynos; 0 != o->attr; o++) {
|
1282
|
+
if (Qnil != (v = rb_hash_lookup(ropts, o->sym))) {
|
1283
|
+
VALUE c = rb_obj_class(v);
|
1284
|
+
|
1285
|
+
if (rb_cTrueClass == c) {
|
1286
|
+
*o->attr = Yes;
|
1287
|
+
} else if (rb_cFalseClass == c) {
|
1288
|
+
*o->attr = No;
|
1289
|
+
} else {
|
1290
|
+
rb_raise(ox_parse_error_class, "%s must be true or false.\n", rb_id2name(SYM2ID(o->sym)));
|
1291
|
+
}
|
1292
|
+
}
|
1293
|
+
}
|
1287
1294
|
}
|
1288
1295
|
}
|
1289
1296
|
|
@@ -1296,31 +1303,29 @@ parse_dump_options(VALUE ropts, Options copts) {
|
|
1296
1303
|
* - *:no_empty* [true|false] if true don't output empty elements
|
1297
1304
|
* - *:xsd_date* [true|false] use XSD date format if true, default: false
|
1298
1305
|
* - *:circular* [true|false] allow circular references, default: false
|
1299
|
-
* - *:strict|:tolerant]* [ :effort effort to use when an undumpable object (e.g., IO) is encountered, default:
|
1306
|
+
* - *:strict|:tolerant]* [ :effort effort to use when an undumpable object (e.g., IO) is encountered, default:
|
1307
|
+
* :strict
|
1300
1308
|
* - _:strict_ - raise an NotImplementedError if an undumpable object is encountered
|
1301
1309
|
* - _:tolerant_ - replaces undumplable objects with nil
|
1302
1310
|
*
|
1303
1311
|
* Note that an indent of less than zero will result in a tight one line output
|
1304
1312
|
* unless the text in the XML fields contain new line characters.
|
1305
1313
|
*/
|
1306
|
-
static VALUE
|
1307
|
-
|
1308
|
-
|
1309
|
-
|
1310
|
-
VALUE rstr;
|
1314
|
+
static VALUE dump(int argc, VALUE *argv, VALUE self) {
|
1315
|
+
char *xml;
|
1316
|
+
struct _options copts = ox_default_options;
|
1317
|
+
VALUE rstr;
|
1311
1318
|
|
1312
1319
|
if (2 == argc) {
|
1313
|
-
|
1320
|
+
parse_dump_options(argv[1], &copts);
|
1314
1321
|
}
|
1315
1322
|
if (0 == (xml = ox_write_obj_to_str(*argv, &copts))) {
|
1316
|
-
|
1323
|
+
rb_raise(rb_eNoMemError, "Not enough memory.\n");
|
1317
1324
|
}
|
1318
1325
|
rstr = rb_str_new2(xml);
|
1319
|
-
#if HAVE_RB_ENC_ASSOCIATE
|
1320
1326
|
if ('\0' != *copts.encoding) {
|
1321
|
-
|
1327
|
+
rb_enc_associate(rstr, rb_enc_find(copts.encoding));
|
1322
1328
|
}
|
1323
|
-
#endif
|
1324
1329
|
xfree(xml);
|
1325
1330
|
|
1326
1331
|
return rstr;
|
@@ -1335,15 +1340,15 @@ dump(int argc, VALUE *argv, VALUE self) {
|
|
1335
1340
|
* - *:no_empty* [true|false] if true don't output empty elements
|
1336
1341
|
* - *:xsd_date* [true|false] use XSD date format if true, default: false
|
1337
1342
|
* - *:circular* [true|false] allow circular references, default: false
|
1338
|
-
* - *:strict|:tolerant]* [ :effort effort to use when an undumpable object (e.g., IO) is encountered, default:
|
1343
|
+
* - *:strict|:tolerant]* [ :effort effort to use when an undumpable object (e.g., IO) is encountered, default:
|
1344
|
+
* :strict
|
1339
1345
|
* - _:strict_ - raise an NotImplementedError if an undumpable object is encountered
|
1340
1346
|
* - _:tolerant_ - replaces undumplable objects with nil
|
1341
1347
|
*
|
1342
1348
|
* Note that an indent of less than zero will result in a tight one line output
|
1343
1349
|
* unless the text in the XML fields contain new line characters.
|
1344
1350
|
*/
|
1345
|
-
static VALUE
|
1346
|
-
to_xml(int argc, VALUE *argv, VALUE self) {
|
1351
|
+
static VALUE to_xml(int argc, VALUE *argv, VALUE self) {
|
1347
1352
|
return dump(argc, argv, self);
|
1348
1353
|
}
|
1349
1354
|
|
@@ -1356,19 +1361,19 @@ to_xml(int argc, VALUE *argv, VALUE self) {
|
|
1356
1361
|
* - *:indent* [Fixnum] format expected
|
1357
1362
|
* - *:xsd_date* [true|false] use XSD date format if true, default: false
|
1358
1363
|
* - *:circular* [true|false] allow circular references, default: false
|
1359
|
-
* - *:strict|:tolerant]* [ :effort effort to use when an undumpable object (e.g., IO) is encountered, default:
|
1364
|
+
* - *:strict|:tolerant]* [ :effort effort to use when an undumpable object (e.g., IO) is encountered, default:
|
1365
|
+
* :strict
|
1360
1366
|
* - _:strict_ - raise an NotImplementedError if an undumpable object is encountered
|
1361
1367
|
* - _:tolerant_ - replaces undumplable objects with nil
|
1362
1368
|
*
|
1363
1369
|
* Note that an indent of less than zero will result in a tight one line output
|
1364
1370
|
* unless the text in the XML fields contain new line characters.
|
1365
1371
|
*/
|
1366
|
-
static VALUE
|
1367
|
-
|
1368
|
-
struct _options copts = ox_default_options;
|
1372
|
+
static VALUE to_file(int argc, VALUE *argv, VALUE self) {
|
1373
|
+
struct _options copts = ox_default_options;
|
1369
1374
|
|
1370
1375
|
if (3 == argc) {
|
1371
|
-
|
1376
|
+
parse_dump_options(argv[2], &copts);
|
1372
1377
|
}
|
1373
1378
|
Check_Type(*argv, T_STRING);
|
1374
1379
|
ox_write_obj_to_file(argv[1], StringValuePtr(*argv), &copts);
|
@@ -1377,24 +1382,25 @@ to_file(int argc, VALUE *argv, VALUE self) {
|
|
1377
1382
|
}
|
1378
1383
|
|
1379
1384
|
#if WITH_CACHE_TESTS
|
1380
|
-
extern void
|
1385
|
+
extern void ox_cache_test(void);
|
1381
1386
|
|
1382
|
-
static VALUE
|
1383
|
-
cache_test(VALUE self) {
|
1387
|
+
static VALUE cache_test(VALUE self) {
|
1384
1388
|
ox_cache_test();
|
1385
1389
|
return Qnil;
|
1386
1390
|
}
|
1387
1391
|
|
1388
|
-
extern void
|
1392
|
+
extern void ox_cache8_test(void);
|
1389
1393
|
|
1390
|
-
static VALUE
|
1391
|
-
cache8_test(VALUE self) {
|
1394
|
+
static VALUE cache8_test(VALUE self) {
|
1392
1395
|
ox_cache8_test();
|
1393
1396
|
return Qnil;
|
1394
1397
|
}
|
1395
1398
|
#endif
|
1396
1399
|
|
1397
1400
|
void Init_ox() {
|
1401
|
+
#if HAVE_RB_EXT_RACTOR_SAFE
|
1402
|
+
rb_ext_ractor_safe(true);
|
1403
|
+
#endif
|
1398
1404
|
Ox = rb_define_module("Ox");
|
1399
1405
|
|
1400
1406
|
rb_define_module_function(Ox, "default_options", get_def_opts, 0);
|
@@ -1421,59 +1427,59 @@ void Init_ox() {
|
|
1421
1427
|
rb_require("bigdecimal");
|
1422
1428
|
rb_require("stringio");
|
1423
1429
|
|
1424
|
-
ox_abort_id
|
1425
|
-
ox_at_column_id
|
1426
|
-
ox_at_content_id
|
1427
|
-
ox_at_id
|
1428
|
-
ox_at_line_id
|
1429
|
-
ox_at_pos_id
|
1430
|
-
ox_at_value_id
|
1431
|
-
ox_attr_id
|
1432
|
-
ox_attr_value_id
|
1433
|
-
ox_attributes_id
|
1434
|
-
ox_attrs_done_id
|
1435
|
-
ox_beg_id
|
1436
|
-
ox_bigdecimal_id
|
1437
|
-
ox_call_id
|
1438
|
-
ox_cdata_id
|
1439
|
-
ox_comment_id
|
1440
|
-
ox_den_id
|
1441
|
-
ox_doctype_id
|
1442
|
-
ox_end_element_id
|
1443
|
-
ox_end_id
|
1444
|
-
ox_end_instruct_id
|
1445
|
-
ox_error_id
|
1446
|
-
ox_excl_id
|
1430
|
+
ox_abort_id = rb_intern("abort");
|
1431
|
+
ox_at_column_id = rb_intern("@column");
|
1432
|
+
ox_at_content_id = rb_intern("@content");
|
1433
|
+
ox_at_id = rb_intern("at");
|
1434
|
+
ox_at_line_id = rb_intern("@line");
|
1435
|
+
ox_at_pos_id = rb_intern("@pos");
|
1436
|
+
ox_at_value_id = rb_intern("@value");
|
1437
|
+
ox_attr_id = rb_intern("attr");
|
1438
|
+
ox_attr_value_id = rb_intern("attr_value");
|
1439
|
+
ox_attributes_id = rb_intern("@attributes");
|
1440
|
+
ox_attrs_done_id = rb_intern("attrs_done");
|
1441
|
+
ox_beg_id = rb_intern("@beg");
|
1442
|
+
ox_bigdecimal_id = rb_intern("BigDecimal");
|
1443
|
+
ox_call_id = rb_intern("call");
|
1444
|
+
ox_cdata_id = rb_intern("cdata");
|
1445
|
+
ox_comment_id = rb_intern("comment");
|
1446
|
+
ox_den_id = rb_intern("@den");
|
1447
|
+
ox_doctype_id = rb_intern("doctype");
|
1448
|
+
ox_end_element_id = rb_intern("end_element");
|
1449
|
+
ox_end_id = rb_intern("@end");
|
1450
|
+
ox_end_instruct_id = rb_intern("end_instruct");
|
1451
|
+
ox_error_id = rb_intern("error");
|
1452
|
+
ox_excl_id = rb_intern("@excl");
|
1447
1453
|
ox_external_encoding_id = rb_intern("external_encoding");
|
1448
|
-
ox_fileno_id
|
1449
|
-
ox_force_encoding_id
|
1450
|
-
ox_inspect_id
|
1451
|
-
ox_instruct_id
|
1452
|
-
ox_jd_id
|
1453
|
-
ox_keys_id
|
1454
|
-
ox_local_id
|
1455
|
-
ox_mesg_id
|
1456
|
-
ox_message_id
|
1457
|
-
ox_nodes_id
|
1458
|
-
ox_new_id
|
1459
|
-
ox_num_id
|
1460
|
-
ox_parse_id
|
1461
|
-
ox_pos_id
|
1462
|
-
ox_read_id
|
1463
|
-
ox_readpartial_id
|
1464
|
-
ox_start_element_id
|
1465
|
-
ox_string_id
|
1466
|
-
ox_text_id
|
1467
|
-
ox_to_c_id
|
1468
|
-
ox_to_s_id
|
1469
|
-
ox_to_sym_id
|
1470
|
-
ox_tv_nsec_id
|
1471
|
-
ox_tv_sec_id
|
1472
|
-
ox_tv_usec_id
|
1473
|
-
ox_value_id
|
1454
|
+
ox_fileno_id = rb_intern("fileno");
|
1455
|
+
ox_force_encoding_id = rb_intern("force_encoding");
|
1456
|
+
ox_inspect_id = rb_intern("inspect");
|
1457
|
+
ox_instruct_id = rb_intern("instruct");
|
1458
|
+
ox_jd_id = rb_intern("jd");
|
1459
|
+
ox_keys_id = rb_intern("keys");
|
1460
|
+
ox_local_id = rb_intern("local");
|
1461
|
+
ox_mesg_id = rb_intern("mesg");
|
1462
|
+
ox_message_id = rb_intern("message");
|
1463
|
+
ox_nodes_id = rb_intern("@nodes");
|
1464
|
+
ox_new_id = rb_intern("new");
|
1465
|
+
ox_num_id = rb_intern("@num");
|
1466
|
+
ox_parse_id = rb_intern("parse");
|
1467
|
+
ox_pos_id = rb_intern("pos");
|
1468
|
+
ox_read_id = rb_intern("read");
|
1469
|
+
ox_readpartial_id = rb_intern("readpartial");
|
1470
|
+
ox_start_element_id = rb_intern("start_element");
|
1471
|
+
ox_string_id = rb_intern("string");
|
1472
|
+
ox_text_id = rb_intern("text");
|
1473
|
+
ox_to_c_id = rb_intern("to_c");
|
1474
|
+
ox_to_s_id = rb_intern("to_s");
|
1475
|
+
ox_to_sym_id = rb_intern("to_sym");
|
1476
|
+
ox_tv_nsec_id = rb_intern("tv_nsec");
|
1477
|
+
ox_tv_sec_id = rb_intern("tv_sec");
|
1478
|
+
ox_tv_usec_id = rb_intern("tv_usec");
|
1479
|
+
ox_value_id = rb_intern("value");
|
1474
1480
|
|
1475
1481
|
encoding_id = rb_intern("encoding");
|
1476
|
-
has_key_id
|
1482
|
+
has_key_id = rb_intern("has_key?");
|
1477
1483
|
|
1478
1484
|
rb_require("ox/version");
|
1479
1485
|
rb_require("ox/error");
|
@@ -1488,81 +1494,164 @@ void Init_ox() {
|
|
1488
1494
|
rb_require("ox/bag");
|
1489
1495
|
rb_require("ox/sax");
|
1490
1496
|
|
1491
|
-
ox_time_class
|
1492
|
-
ox_date_class
|
1493
|
-
ox_parse_error_class
|
1497
|
+
ox_time_class = rb_const_get(rb_cObject, rb_intern("Time"));
|
1498
|
+
ox_date_class = rb_const_get(rb_cObject, rb_intern("Date"));
|
1499
|
+
ox_parse_error_class = rb_const_get_at(Ox, rb_intern("ParseError"));
|
1494
1500
|
ox_syntax_error_class = rb_const_get_at(Ox, rb_intern("SyntaxError"));
|
1495
|
-
ox_arg_error_class
|
1496
|
-
ox_struct_class
|
1497
|
-
ox_stringio_class
|
1498
|
-
ox_bigdecimal_class
|
1499
|
-
|
1500
|
-
abort_sym = ID2SYM(rb_intern("abort"));
|
1501
|
-
|
1502
|
-
|
1503
|
-
|
1504
|
-
|
1505
|
-
|
1506
|
-
|
1507
|
-
|
1508
|
-
|
1509
|
-
|
1510
|
-
|
1511
|
-
|
1512
|
-
|
1513
|
-
|
1514
|
-
|
1515
|
-
|
1516
|
-
|
1517
|
-
|
1518
|
-
|
1519
|
-
|
1520
|
-
|
1521
|
-
|
1522
|
-
|
1523
|
-
|
1524
|
-
|
1525
|
-
|
1526
|
-
|
1527
|
-
|
1528
|
-
|
1529
|
-
|
1530
|
-
|
1531
|
-
|
1532
|
-
|
1533
|
-
|
1534
|
-
|
1535
|
-
|
1536
|
-
|
1537
|
-
|
1538
|
-
|
1539
|
-
|
1540
|
-
|
1541
|
-
|
1542
|
-
|
1543
|
-
|
1544
|
-
|
1545
|
-
|
1546
|
-
|
1547
|
-
|
1548
|
-
|
1549
|
-
|
1550
|
-
|
1501
|
+
ox_arg_error_class = rb_const_get_at(Ox, rb_intern("ArgError"));
|
1502
|
+
ox_struct_class = rb_const_get(rb_cObject, rb_intern("Struct"));
|
1503
|
+
ox_stringio_class = rb_const_get(rb_cObject, rb_intern("StringIO"));
|
1504
|
+
ox_bigdecimal_class = rb_const_get(rb_cObject, rb_intern("BigDecimal"));
|
1505
|
+
|
1506
|
+
abort_sym = ID2SYM(rb_intern("abort"));
|
1507
|
+
rb_gc_register_address(&abort_sym);
|
1508
|
+
active_sym = ID2SYM(rb_intern("active"));
|
1509
|
+
rb_gc_register_address(&active_sym);
|
1510
|
+
attr_key_mod_sym = ID2SYM(rb_intern("attr_key_mod"));
|
1511
|
+
rb_gc_register_address(&attr_key_mod_sym);
|
1512
|
+
auto_define_sym = ID2SYM(rb_intern("auto_define"));
|
1513
|
+
rb_gc_register_address(&auto_define_sym);
|
1514
|
+
auto_sym = ID2SYM(rb_intern("auto"));
|
1515
|
+
rb_gc_register_address(&auto_sym);
|
1516
|
+
block_sym = ID2SYM(rb_intern("block"));
|
1517
|
+
rb_gc_register_address(&block_sym);
|
1518
|
+
circular_sym = ID2SYM(rb_intern("circular"));
|
1519
|
+
rb_gc_register_address(&circular_sym);
|
1520
|
+
convert_special_sym = ID2SYM(rb_intern("convert_special"));
|
1521
|
+
rb_gc_register_address(&convert_special_sym);
|
1522
|
+
effort_sym = ID2SYM(rb_intern("effort"));
|
1523
|
+
rb_gc_register_address(&effort_sym);
|
1524
|
+
element_key_mod_sym = ID2SYM(rb_intern("element_key_mod"));
|
1525
|
+
rb_gc_register_address(&element_key_mod_sym);
|
1526
|
+
generic_sym = ID2SYM(rb_intern("generic"));
|
1527
|
+
rb_gc_register_address(&generic_sym);
|
1528
|
+
hash_no_attrs_sym = ID2SYM(rb_intern("hash_no_attrs"));
|
1529
|
+
rb_gc_register_address(&hash_no_attrs_sym);
|
1530
|
+
hash_sym = ID2SYM(rb_intern("hash"));
|
1531
|
+
rb_gc_register_address(&hash_sym);
|
1532
|
+
inactive_sym = ID2SYM(rb_intern("inactive"));
|
1533
|
+
rb_gc_register_address(&inactive_sym);
|
1534
|
+
invalid_replace_sym = ID2SYM(rb_intern("invalid_replace"));
|
1535
|
+
rb_gc_register_address(&invalid_replace_sym);
|
1536
|
+
limited_sym = ID2SYM(rb_intern("limited"));
|
1537
|
+
rb_gc_register_address(&limited_sym);
|
1538
|
+
margin_sym = ID2SYM(rb_intern("margin"));
|
1539
|
+
rb_gc_register_address(&margin_sym);
|
1540
|
+
mode_sym = ID2SYM(rb_intern("mode"));
|
1541
|
+
rb_gc_register_address(&mode_sym);
|
1542
|
+
nest_ok_sym = ID2SYM(rb_intern("nest_ok"));
|
1543
|
+
rb_gc_register_address(&nest_ok_sym);
|
1544
|
+
no_empty_sym = ID2SYM(rb_intern("no_empty"));
|
1545
|
+
rb_gc_register_address(&no_empty_sym);
|
1546
|
+
object_sym = ID2SYM(rb_intern("object"));
|
1547
|
+
rb_gc_register_address(&object_sym);
|
1548
|
+
off_sym = ID2SYM(rb_intern("off"));
|
1549
|
+
rb_gc_register_address(&off_sym);
|
1550
|
+
opt_format_sym = ID2SYM(rb_intern("opt_format"));
|
1551
|
+
rb_gc_register_address(&opt_format_sym);
|
1552
|
+
optimized_sym = ID2SYM(rb_intern("optimized"));
|
1553
|
+
rb_gc_register_address(&optimized_sym);
|
1554
|
+
overlay_sym = ID2SYM(rb_intern("overlay"));
|
1555
|
+
rb_gc_register_address(&overlay_sym);
|
1556
|
+
ox_encoding_sym = ID2SYM(rb_intern("encoding"));
|
1557
|
+
rb_gc_register_address(&ox_encoding_sym);
|
1558
|
+
ox_indent_sym = ID2SYM(rb_intern("indent"));
|
1559
|
+
rb_gc_register_address(&ox_indent_sym);
|
1560
|
+
ox_size_sym = ID2SYM(rb_intern("size"));
|
1561
|
+
rb_gc_register_address(&ox_size_sym);
|
1562
|
+
ox_standalone_sym = ID2SYM(rb_intern("standalone"));
|
1563
|
+
rb_gc_register_address(&ox_standalone_sym);
|
1564
|
+
ox_version_sym = ID2SYM(rb_intern("version"));
|
1565
|
+
rb_gc_register_address(&ox_version_sym);
|
1566
|
+
skip_none_sym = ID2SYM(rb_intern("skip_none"));
|
1567
|
+
rb_gc_register_address(&skip_none_sym);
|
1568
|
+
skip_off_sym = ID2SYM(rb_intern("skip_off"));
|
1569
|
+
rb_gc_register_address(&skip_off_sym);
|
1570
|
+
skip_return_sym = ID2SYM(rb_intern("skip_return"));
|
1571
|
+
rb_gc_register_address(&skip_return_sym);
|
1572
|
+
skip_sym = ID2SYM(rb_intern("skip"));
|
1573
|
+
rb_gc_register_address(&skip_sym);
|
1574
|
+
skip_white_sym = ID2SYM(rb_intern("skip_white"));
|
1575
|
+
rb_gc_register_address(&skip_white_sym);
|
1576
|
+
smart_sym = ID2SYM(rb_intern("smart"));
|
1577
|
+
rb_gc_register_address(&smart_sym);
|
1578
|
+
strict_sym = ID2SYM(rb_intern("strict"));
|
1579
|
+
rb_gc_register_address(&strict_sym);
|
1580
|
+
strip_namespace_sym = ID2SYM(rb_intern("strip_namespace"));
|
1581
|
+
rb_gc_register_address(&strip_namespace_sym);
|
1582
|
+
symbolize_keys_sym = ID2SYM(rb_intern("symbolize_keys"));
|
1583
|
+
rb_gc_register_address(&symbolize_keys_sym);
|
1584
|
+
symbolize_sym = ID2SYM(rb_intern("symbolize"));
|
1585
|
+
rb_gc_register_address(&symbolize_sym);
|
1586
|
+
tolerant_sym = ID2SYM(rb_intern("tolerant"));
|
1587
|
+
rb_gc_register_address(&tolerant_sym);
|
1588
|
+
trace_sym = ID2SYM(rb_intern("trace"));
|
1589
|
+
rb_gc_register_address(&trace_sym);
|
1590
|
+
with_cdata_sym = ID2SYM(rb_intern("with_cdata"));
|
1591
|
+
rb_gc_register_address(&with_cdata_sym);
|
1592
|
+
with_dtd_sym = ID2SYM(rb_intern("with_dtd"));
|
1593
|
+
rb_gc_register_address(&with_dtd_sym);
|
1594
|
+
with_instruct_sym = ID2SYM(rb_intern("with_instructions"));
|
1595
|
+
rb_gc_register_address(&with_instruct_sym);
|
1596
|
+
with_xml_sym = ID2SYM(rb_intern("with_xml"));
|
1597
|
+
rb_gc_register_address(&with_xml_sym);
|
1598
|
+
xsd_date_sym = ID2SYM(rb_intern("xsd_date"));
|
1599
|
+
rb_gc_register_address(&xsd_date_sym);
|
1600
|
+
|
1601
|
+
ox_empty_string = rb_str_new2("");
|
1602
|
+
rb_gc_register_address(&ox_empty_string);
|
1603
|
+
ox_zero_fixnum = INT2NUM(0);
|
1604
|
+
rb_gc_register_address(&ox_zero_fixnum);
|
1605
|
+
ox_sym_bank = rb_ary_new();
|
1606
|
+
rb_gc_register_address(&ox_sym_bank);
|
1551
1607
|
|
1552
1608
|
ox_document_clas = rb_const_get_at(Ox, rb_intern("Document"));
|
1553
|
-
ox_element_clas
|
1609
|
+
ox_element_clas = rb_const_get_at(Ox, rb_intern("Element"));
|
1554
1610
|
ox_instruct_clas = rb_const_get_at(Ox, rb_intern("Instruct"));
|
1555
|
-
ox_comment_clas
|
1556
|
-
ox_raw_clas
|
1557
|
-
ox_doctype_clas
|
1558
|
-
ox_cdata_clas
|
1559
|
-
ox_bag_clas
|
1611
|
+
ox_comment_clas = rb_const_get_at(Ox, rb_intern("Comment"));
|
1612
|
+
ox_raw_clas = rb_const_get_at(Ox, rb_intern("Raw"));
|
1613
|
+
ox_doctype_clas = rb_const_get_at(Ox, rb_intern("DocType"));
|
1614
|
+
ox_cdata_clas = rb_const_get_at(Ox, rb_intern("CData"));
|
1615
|
+
ox_bag_clas = rb_const_get_at(Ox, rb_intern("Bag"));
|
1616
|
+
|
1617
|
+
// Classes can move in more recent versions so register them all.
|
1618
|
+
rb_gc_register_address(&Ox);
|
1619
|
+
rb_gc_register_address(&ox_arg_error_class);
|
1620
|
+
rb_gc_register_address(&ox_bag_clas);
|
1621
|
+
rb_gc_register_address(&ox_bag_clas);
|
1622
|
+
rb_gc_register_address(&ox_bigdecimal_class);
|
1623
|
+
rb_gc_register_address(&ox_cdata_clas);
|
1624
|
+
rb_gc_register_address(&ox_cdata_clas);
|
1625
|
+
rb_gc_register_address(&ox_comment_clas);
|
1626
|
+
rb_gc_register_address(&ox_comment_clas);
|
1627
|
+
rb_gc_register_address(&ox_date_class);
|
1628
|
+
rb_gc_register_address(&ox_doctype_clas);
|
1629
|
+
rb_gc_register_address(&ox_doctype_clas);
|
1630
|
+
rb_gc_register_address(&ox_document_clas);
|
1631
|
+
rb_gc_register_address(&ox_document_clas);
|
1632
|
+
rb_gc_register_address(&ox_element_clas);
|
1633
|
+
rb_gc_register_address(&ox_element_clas);
|
1634
|
+
rb_gc_register_address(&ox_encoding_sym);
|
1635
|
+
rb_gc_register_address(&ox_indent_sym);
|
1636
|
+
rb_gc_register_address(&ox_instruct_clas);
|
1637
|
+
rb_gc_register_address(&ox_instruct_clas);
|
1638
|
+
rb_gc_register_address(&ox_parse_error_class);
|
1639
|
+
rb_gc_register_address(&ox_raw_clas);
|
1640
|
+
rb_gc_register_address(&ox_raw_clas);
|
1641
|
+
rb_gc_register_address(&ox_size_sym);
|
1642
|
+
rb_gc_register_address(&ox_standalone_sym);
|
1643
|
+
rb_gc_register_address(&ox_stringio_class);
|
1644
|
+
rb_gc_register_address(&ox_struct_class);
|
1645
|
+
rb_gc_register_address(&ox_syntax_error_class);
|
1646
|
+
rb_gc_register_address(&ox_time_class);
|
1647
|
+
rb_gc_register_address(&ox_version_sym);
|
1560
1648
|
|
1561
1649
|
ox_cache_new(&ox_symbol_cache);
|
1562
1650
|
ox_cache_new(&ox_class_cache);
|
1563
1651
|
ox_cache_new(&ox_attr_cache);
|
1564
1652
|
|
1565
1653
|
ox_sax_define();
|
1654
|
+
ox_hash_init();
|
1566
1655
|
|
1567
1656
|
#if WITH_CACHE_TESTS
|
1568
1657
|
// space added to stop yardoc from trying to document
|
@@ -1570,9 +1659,7 @@ void Init_ox() {
|
|
1570
1659
|
rb_define _module_function(Ox, "cache8_test", cache8_test, 0);
|
1571
1660
|
#endif
|
1572
1661
|
|
1573
|
-
#if HAVE_RB_ENC_FIND
|
1574
1662
|
ox_utf8_encoding = rb_enc_find("UTF-8");
|
1575
|
-
#endif
|
1576
1663
|
}
|
1577
1664
|
|
1578
1665
|
#if __GNUC__ > 4
|
@@ -1580,17 +1667,17 @@ _Noreturn void
|
|
1580
1667
|
#else
|
1581
1668
|
void
|
1582
1669
|
#endif
|
1583
|
-
_ox_raise_error(const char *msg, const char *xml, const char *current, const char*
|
1584
|
-
int
|
1585
|
-
int
|
1670
|
+
_ox_raise_error(const char *msg, const char *xml, const char *current, const char *file, int line) {
|
1671
|
+
int xline = 1;
|
1672
|
+
int col = 1;
|
1586
1673
|
|
1587
1674
|
for (; xml < current && '\n' != *current; current--) {
|
1588
|
-
|
1675
|
+
col++;
|
1589
1676
|
}
|
1590
1677
|
for (; xml < current; current--) {
|
1591
|
-
|
1592
|
-
|
1593
|
-
|
1678
|
+
if ('\n' == *current) {
|
1679
|
+
xline++;
|
1680
|
+
}
|
1594
1681
|
}
|
1595
1682
|
#ifdef RB_GC_GUARD
|
1596
1683
|
rb_gc_enable();
|