ox 2.14.6 → 2.14.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +26 -0
- data/ext/ox/builder.c +0 -4
- data/ext/ox/cache.c +309 -131
- data/ext/ox/cache.h +10 -10
- data/ext/ox/dump.c +2 -2
- data/ext/ox/extconf.rb +4 -2
- data/ext/ox/gen_load.c +5 -73
- data/ext/ox/hash_load.c +0 -4
- data/ext/ox/intern.c +153 -0
- data/ext/ox/intern.h +25 -0
- data/ext/ox/obj_load.c +14 -86
- data/ext/ox/ox.c +1015 -935
- data/ext/ox/ox.h +186 -210
- data/ext/ox/parse.c +72 -31
- data/ext/ox/sax.c +1100 -1276
- data/ext/ox/sax.h +45 -31
- data/ext/ox/sax_as.c +3 -5
- data/ext/ox/sax_buf.c +7 -16
- data/ext/ox/slotcache.c +158 -0
- data/ext/ox/slotcache.h +19 -0
- data/lib/ox/version.rb +1 -1
- metadata +7 -4
- data/ext/ox/sax_has.h +0 -53
data/ext/ox/ox.c
CHANGED
@@ -3,214 +3,209 @@
|
|
3
3
|
* All rights reserved.
|
4
4
|
*/
|
5
5
|
|
6
|
-
#include
|
6
|
+
#include "ox.h"
|
7
|
+
|
7
8
|
#include <errno.h>
|
8
|
-
#include <stdint.h>
|
9
9
|
#include <stdbool.h>
|
10
|
+
#include <stdint.h>
|
10
11
|
#include <stdio.h>
|
12
|
+
#include <stdlib.h>
|
11
13
|
#include <string.h>
|
12
14
|
|
15
|
+
#include "intern.h"
|
13
16
|
#include "ruby.h"
|
14
|
-
#include "ox.h"
|
15
17
|
#include "sax.h"
|
16
18
|
|
17
19
|
/* maximum to allocate on the stack, arbitrary limit */
|
18
|
-
#define SMALL_XML
|
19
|
-
#define WITH_CACHE_TESTS
|
20
|
+
#define SMALL_XML 4096
|
21
|
+
#define WITH_CACHE_TESTS 0
|
20
22
|
|
21
23
|
typedef struct _yesNoOpt {
|
22
|
-
VALUE
|
23
|
-
char
|
24
|
-
} *YesNoOpt;
|
24
|
+
VALUE sym;
|
25
|
+
char *attr;
|
26
|
+
} * YesNoOpt;
|
25
27
|
|
26
28
|
void Init_ox();
|
27
29
|
|
28
|
-
VALUE
|
29
|
-
|
30
|
-
ID
|
31
|
-
ID
|
32
|
-
ID
|
33
|
-
ID
|
34
|
-
ID
|
35
|
-
ID
|
36
|
-
ID
|
37
|
-
ID
|
38
|
-
ID
|
39
|
-
ID
|
40
|
-
ID
|
41
|
-
ID
|
42
|
-
ID
|
43
|
-
ID
|
44
|
-
ID
|
45
|
-
ID
|
46
|
-
ID
|
47
|
-
ID
|
48
|
-
ID
|
49
|
-
ID
|
50
|
-
ID
|
51
|
-
ID
|
52
|
-
ID
|
53
|
-
ID
|
54
|
-
ID
|
55
|
-
ID
|
56
|
-
ID
|
57
|
-
ID
|
58
|
-
ID
|
59
|
-
ID
|
60
|
-
ID
|
61
|
-
ID
|
62
|
-
ID
|
63
|
-
ID
|
64
|
-
ID
|
65
|
-
ID
|
66
|
-
ID
|
67
|
-
ID
|
68
|
-
ID
|
69
|
-
ID
|
70
|
-
ID
|
71
|
-
ID
|
72
|
-
ID
|
73
|
-
ID
|
74
|
-
ID
|
75
|
-
ID
|
76
|
-
ID
|
77
|
-
ID
|
78
|
-
ID
|
79
|
-
ID
|
80
|
-
|
81
|
-
VALUE
|
82
|
-
VALUE
|
83
|
-
VALUE
|
84
|
-
VALUE
|
85
|
-
VALUE
|
86
|
-
|
87
|
-
VALUE
|
88
|
-
VALUE
|
89
|
-
VALUE
|
90
|
-
|
91
|
-
VALUE
|
92
|
-
VALUE
|
93
|
-
VALUE
|
94
|
-
VALUE
|
95
|
-
VALUE
|
96
|
-
VALUE
|
97
|
-
VALUE
|
98
|
-
VALUE
|
99
|
-
VALUE
|
100
|
-
VALUE
|
101
|
-
VALUE
|
102
|
-
VALUE
|
103
|
-
VALUE
|
104
|
-
VALUE
|
105
|
-
VALUE
|
106
|
-
VALUE
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
111
|
-
|
112
|
-
static VALUE
|
113
|
-
static VALUE
|
114
|
-
static VALUE
|
115
|
-
static VALUE
|
116
|
-
static VALUE
|
117
|
-
static VALUE
|
118
|
-
static VALUE
|
119
|
-
static VALUE
|
120
|
-
static VALUE
|
121
|
-
static VALUE
|
122
|
-
static VALUE
|
123
|
-
static VALUE
|
124
|
-
static VALUE
|
125
|
-
static VALUE
|
126
|
-
static VALUE
|
127
|
-
static VALUE
|
128
|
-
static VALUE
|
129
|
-
static VALUE
|
130
|
-
static VALUE
|
131
|
-
static VALUE
|
132
|
-
static VALUE
|
133
|
-
static VALUE
|
134
|
-
static VALUE
|
135
|
-
static VALUE
|
136
|
-
static VALUE
|
137
|
-
static VALUE
|
138
|
-
static VALUE
|
139
|
-
static VALUE
|
140
|
-
static VALUE
|
141
|
-
static VALUE
|
142
|
-
static VALUE
|
143
|
-
static VALUE
|
144
|
-
static VALUE
|
145
|
-
static VALUE
|
146
|
-
static VALUE
|
147
|
-
static VALUE
|
148
|
-
static VALUE
|
149
|
-
static VALUE
|
150
|
-
static VALUE
|
151
|
-
static VALUE
|
152
|
-
|
153
|
-
static
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
No,
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
{ '\0' }, // inv_repl
|
185
|
-
{ '\0' }, // strip_ns
|
186
|
-
NULL, // html_hints
|
187
|
-
Qnil, // attr_key_mod;
|
188
|
-
Qnil, // element_key_mod;
|
189
|
-
0 // rb_enc
|
30
|
+
VALUE Ox = Qnil;
|
31
|
+
|
32
|
+
ID ox_abort_id;
|
33
|
+
ID ox_at_column_id;
|
34
|
+
ID ox_at_content_id;
|
35
|
+
ID ox_at_id;
|
36
|
+
ID ox_at_line_id;
|
37
|
+
ID ox_at_pos_id;
|
38
|
+
ID ox_at_value_id;
|
39
|
+
ID ox_attr_id;
|
40
|
+
ID ox_attr_value_id;
|
41
|
+
ID ox_attributes_id;
|
42
|
+
ID ox_attrs_done_id;
|
43
|
+
ID ox_beg_id;
|
44
|
+
ID ox_bigdecimal_id;
|
45
|
+
ID ox_call_id;
|
46
|
+
ID ox_cdata_id;
|
47
|
+
ID ox_comment_id;
|
48
|
+
ID ox_den_id;
|
49
|
+
ID ox_doctype_id;
|
50
|
+
ID ox_end_element_id;
|
51
|
+
ID ox_end_id;
|
52
|
+
ID ox_end_instruct_id;
|
53
|
+
ID ox_error_id;
|
54
|
+
ID ox_excl_id;
|
55
|
+
ID ox_external_encoding_id;
|
56
|
+
ID ox_fileno_id;
|
57
|
+
ID ox_force_encoding_id;
|
58
|
+
ID ox_inspect_id;
|
59
|
+
ID ox_instruct_id;
|
60
|
+
ID ox_jd_id;
|
61
|
+
ID ox_keys_id;
|
62
|
+
ID ox_local_id;
|
63
|
+
ID ox_mesg_id;
|
64
|
+
ID ox_message_id;
|
65
|
+
ID ox_new_id;
|
66
|
+
ID ox_nodes_id;
|
67
|
+
ID ox_num_id;
|
68
|
+
ID ox_parse_id;
|
69
|
+
ID ox_pos_id;
|
70
|
+
ID ox_read_id;
|
71
|
+
ID ox_readpartial_id;
|
72
|
+
ID ox_start_element_id;
|
73
|
+
ID ox_string_id;
|
74
|
+
ID ox_text_id;
|
75
|
+
ID ox_to_c_id;
|
76
|
+
ID ox_to_s_id;
|
77
|
+
ID ox_to_sym_id;
|
78
|
+
ID ox_tv_nsec_id;
|
79
|
+
ID ox_tv_sec_id;
|
80
|
+
ID ox_tv_usec_id;
|
81
|
+
ID ox_value_id;
|
82
|
+
|
83
|
+
VALUE ox_encoding_sym;
|
84
|
+
VALUE ox_version_sym;
|
85
|
+
VALUE ox_standalone_sym;
|
86
|
+
VALUE ox_indent_sym;
|
87
|
+
VALUE ox_size_sym;
|
88
|
+
|
89
|
+
VALUE ox_empty_string;
|
90
|
+
VALUE ox_zero_fixnum;
|
91
|
+
VALUE ox_sym_bank; // Array
|
92
|
+
|
93
|
+
VALUE ox_arg_error_class;
|
94
|
+
VALUE ox_bag_clas;
|
95
|
+
VALUE ox_bigdecimal_class;
|
96
|
+
VALUE ox_cdata_clas;
|
97
|
+
VALUE ox_comment_clas;
|
98
|
+
VALUE ox_raw_clas;
|
99
|
+
VALUE ox_date_class;
|
100
|
+
VALUE ox_doctype_clas;
|
101
|
+
VALUE ox_document_clas;
|
102
|
+
VALUE ox_element_clas;
|
103
|
+
VALUE ox_instruct_clas;
|
104
|
+
VALUE ox_parse_error_class;
|
105
|
+
VALUE ox_stringio_class;
|
106
|
+
VALUE ox_struct_class;
|
107
|
+
VALUE ox_syntax_error_class;
|
108
|
+
VALUE ox_time_class;
|
109
|
+
|
110
|
+
SlotCache ox_class_cache = 0;
|
111
|
+
|
112
|
+
static VALUE abort_sym;
|
113
|
+
static VALUE active_sym;
|
114
|
+
static VALUE attr_key_mod_sym;
|
115
|
+
static VALUE auto_define_sym;
|
116
|
+
static VALUE auto_sym;
|
117
|
+
static VALUE block_sym;
|
118
|
+
static VALUE circular_sym;
|
119
|
+
static VALUE convert_special_sym;
|
120
|
+
static VALUE effort_sym;
|
121
|
+
static VALUE generic_sym;
|
122
|
+
static VALUE hash_no_attrs_sym;
|
123
|
+
static VALUE hash_sym;
|
124
|
+
static VALUE inactive_sym;
|
125
|
+
static VALUE invalid_replace_sym;
|
126
|
+
static VALUE limited_sym;
|
127
|
+
static VALUE margin_sym;
|
128
|
+
static VALUE mode_sym;
|
129
|
+
static VALUE nest_ok_sym;
|
130
|
+
static VALUE no_empty_sym;
|
131
|
+
static VALUE object_sym;
|
132
|
+
static VALUE off_sym;
|
133
|
+
static VALUE opt_format_sym;
|
134
|
+
static VALUE optimized_sym;
|
135
|
+
static VALUE overlay_sym;
|
136
|
+
static VALUE skip_none_sym;
|
137
|
+
static VALUE skip_off_sym;
|
138
|
+
static VALUE skip_return_sym;
|
139
|
+
static VALUE skip_sym;
|
140
|
+
static VALUE skip_white_sym;
|
141
|
+
static VALUE smart_sym;
|
142
|
+
static VALUE strict_sym;
|
143
|
+
static VALUE strip_namespace_sym;
|
144
|
+
static VALUE symbolize_keys_sym;
|
145
|
+
static VALUE symbolize_sym;
|
146
|
+
static VALUE tolerant_sym;
|
147
|
+
static VALUE trace_sym;
|
148
|
+
static VALUE with_cdata_sym;
|
149
|
+
static VALUE with_dtd_sym;
|
150
|
+
static VALUE with_instruct_sym;
|
151
|
+
static VALUE with_xml_sym;
|
152
|
+
static VALUE xsd_date_sym;
|
153
|
+
static VALUE element_key_mod_sym;
|
154
|
+
|
155
|
+
static ID encoding_id;
|
156
|
+
static ID has_key_id;
|
157
|
+
|
158
|
+
rb_encoding *ox_utf8_encoding = 0;
|
159
|
+
|
160
|
+
struct _options ox_default_options = {
|
161
|
+
{'\0'}, // encoding
|
162
|
+
{'\0'}, // margin
|
163
|
+
2, // indent
|
164
|
+
0, // trace
|
165
|
+
0, // margin_len
|
166
|
+
No, // with_dtd
|
167
|
+
No, // with_xml
|
168
|
+
No, // with_instruct
|
169
|
+
No, // circular
|
170
|
+
No, // xsd_date
|
171
|
+
NoMode, // mode
|
172
|
+
StrictEffort, // effort
|
173
|
+
Yes, // sym_keys
|
174
|
+
SpcSkip, // skip
|
175
|
+
No, // smart
|
176
|
+
true, // convert_special
|
177
|
+
No, // allow_invalid
|
178
|
+
false, // no_empty
|
179
|
+
false, // with_cdata
|
180
|
+
{'\0'}, // inv_repl
|
181
|
+
{'\0'}, // strip_ns
|
182
|
+
NULL, // html_hints
|
183
|
+
Qnil, // attr_key_mod;
|
184
|
+
Qnil, // element_key_mod;
|
185
|
+
0 // rb_enc
|
190
186
|
};
|
191
187
|
|
192
|
-
extern ParseCallbacks
|
193
|
-
extern ParseCallbacks
|
194
|
-
extern ParseCallbacks
|
195
|
-
extern ParseCallbacks
|
196
|
-
extern ParseCallbacks
|
197
|
-
extern ParseCallbacks
|
198
|
-
extern ParseCallbacks
|
199
|
-
extern ParseCallbacks
|
188
|
+
extern ParseCallbacks ox_obj_callbacks;
|
189
|
+
extern ParseCallbacks ox_gen_callbacks;
|
190
|
+
extern ParseCallbacks ox_limited_callbacks;
|
191
|
+
extern ParseCallbacks ox_nomode_callbacks;
|
192
|
+
extern ParseCallbacks ox_hash_callbacks;
|
193
|
+
extern ParseCallbacks ox_hash_cdata_callbacks;
|
194
|
+
extern ParseCallbacks ox_hash_no_attrs_callbacks;
|
195
|
+
extern ParseCallbacks ox_hash_no_attrs_cdata_callbacks;
|
200
196
|
|
201
|
-
static void
|
197
|
+
static void parse_dump_options(VALUE ropts, Options copts);
|
202
198
|
|
203
|
-
static char*
|
204
|
-
defuse_bom(char *xml, Options options) {
|
199
|
+
static char *defuse_bom(char *xml, Options options) {
|
205
200
|
switch ((uint8_t)*xml) {
|
206
|
-
case 0xEF:
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
201
|
+
case 0xEF: // UTF-8
|
202
|
+
if (0xBB == (uint8_t)xml[1] && 0xBF == (uint8_t)xml[2]) {
|
203
|
+
options->rb_enc = ox_utf8_encoding;
|
204
|
+
xml += 3;
|
205
|
+
} else {
|
206
|
+
rb_raise(ox_parse_error_class, "Invalid BOM in XML string.\n");
|
207
|
+
}
|
208
|
+
break;
|
214
209
|
#if 0
|
215
210
|
case 0xFE: // UTF-16BE
|
216
211
|
if (0xFF == (uint8_t)xml[1]) {
|
@@ -243,31 +238,30 @@ defuse_bom(char *xml, Options options) {
|
|
243
238
|
break;
|
244
239
|
#endif
|
245
240
|
default:
|
246
|
-
|
247
|
-
|
248
|
-
|
241
|
+
// Let it fail if there is a BOM that is not UTF-8. Other BOM options
|
242
|
+
// are not ASCII compatible.
|
243
|
+
break;
|
249
244
|
}
|
250
245
|
return xml;
|
251
246
|
}
|
252
247
|
|
253
|
-
static VALUE
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
VALUE ov;
|
248
|
+
static VALUE hints_to_overlay(Hints hints) {
|
249
|
+
volatile VALUE overlay = rb_hash_new();
|
250
|
+
Hint h;
|
251
|
+
int i;
|
252
|
+
VALUE ov;
|
259
253
|
|
260
254
|
for (i = hints->size, h = hints->hints; 0 < i; i--, h++) {
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
255
|
+
switch (h->overlay) {
|
256
|
+
case InactiveOverlay: ov = inactive_sym; break;
|
257
|
+
case BlockOverlay: ov = block_sym; break;
|
258
|
+
case OffOverlay: ov = off_sym; break;
|
259
|
+
case AbortOverlay: ov = abort_sym; break;
|
260
|
+
case NestOverlay: ov = nest_ok_sym; break;
|
261
|
+
case ActiveOverlay:
|
262
|
+
default: ov = active_sym; break;
|
263
|
+
}
|
264
|
+
rb_hash_aset(overlay, rb_str_new2(h->name), ov);
|
271
265
|
}
|
272
266
|
return overlay;
|
273
267
|
}
|
@@ -292,10 +286,12 @@ hints_to_overlay(Hints hints) {
|
|
292
286
|
* - _:skip_ [:skip_none|:skip_return|:skip_white|:skip_off] determines how to handle white space in text
|
293
287
|
* - _:smart_ [true|false|nil] flag indicating the SAX parser uses hints if available (use with html)
|
294
288
|
* - _:convert_special_ [true|false|nil] flag indicating special characters like < are converted with the SAX parser
|
295
|
-
* - _:invalid_replace_ [nil|String] replacement string for invalid XML characters on dump. nil indicates include anyway
|
289
|
+
* - _:invalid_replace_ [nil|String] replacement string for invalid XML characters on dump. nil indicates include anyway
|
290
|
+
* as hex. A string, limited to 10 characters will replace the invalid character with the replace.
|
296
291
|
* - _:no_empty_ [true|false|nil] flag indicating there should be no empty elements in a dump
|
297
292
|
* - _:with_cdata_ [true|false] includes cdata in hash_load results
|
298
|
-
* - _:strip_namespace_ [String|true|false] false or "" results in no namespace stripping. A string of "*" or true will
|
293
|
+
* - _:strip_namespace_ [String|true|false] false or "" results in no namespace stripping. A string of "*" or true will
|
294
|
+
* strip all namespaces. Any other non-empty string indicates that matching namespaces will be stripped.
|
299
295
|
* - _:overlay_ [Hash] a Hash of keys that match html element names and values that are one of
|
300
296
|
* - _:active_ - make the normal callback for the element
|
301
297
|
* - _:nest_ok_ - active but the nesting check is ignored
|
@@ -309,90 +305,107 @@ hints_to_overlay(Hints hints) {
|
|
309
305
|
* Note that an indent of less than zero will result in a tight one line output
|
310
306
|
* unless the text in the XML fields contain new line characters.
|
311
307
|
*/
|
312
|
-
static VALUE
|
313
|
-
|
314
|
-
|
315
|
-
int elen = (int)strlen(ox_default_options.encoding);
|
308
|
+
static VALUE get_def_opts(VALUE self) {
|
309
|
+
VALUE opts = rb_hash_new();
|
310
|
+
int elen = (int)strlen(ox_default_options.encoding);
|
316
311
|
|
317
312
|
rb_hash_aset(opts, ox_encoding_sym, (0 == elen) ? Qnil : rb_str_new(ox_default_options.encoding, elen));
|
318
313
|
rb_hash_aset(opts, margin_sym, rb_str_new(ox_default_options.margin, ox_default_options.margin_len));
|
319
314
|
rb_hash_aset(opts, ox_indent_sym, INT2FIX(ox_default_options.indent));
|
320
315
|
rb_hash_aset(opts, trace_sym, INT2FIX(ox_default_options.trace));
|
321
|
-
rb_hash_aset(opts,
|
322
|
-
|
323
|
-
|
324
|
-
rb_hash_aset(opts,
|
325
|
-
|
326
|
-
|
316
|
+
rb_hash_aset(opts,
|
317
|
+
with_dtd_sym,
|
318
|
+
(Yes == ox_default_options.with_dtd) ? Qtrue : ((No == ox_default_options.with_dtd) ? Qfalse : Qnil));
|
319
|
+
rb_hash_aset(opts,
|
320
|
+
with_xml_sym,
|
321
|
+
(Yes == ox_default_options.with_xml) ? Qtrue : ((No == ox_default_options.with_xml) ? Qfalse : Qnil));
|
322
|
+
rb_hash_aset(
|
323
|
+
opts,
|
324
|
+
with_instruct_sym,
|
325
|
+
(Yes == ox_default_options.with_instruct) ? Qtrue : ((No == ox_default_options.with_instruct) ? Qfalse : Qnil));
|
326
|
+
rb_hash_aset(opts,
|
327
|
+
circular_sym,
|
328
|
+
(Yes == ox_default_options.circular) ? Qtrue : ((No == ox_default_options.circular) ? Qfalse : Qnil));
|
329
|
+
rb_hash_aset(opts,
|
330
|
+
xsd_date_sym,
|
331
|
+
(Yes == ox_default_options.xsd_date) ? Qtrue : ((No == ox_default_options.xsd_date) ? Qfalse : Qnil));
|
332
|
+
rb_hash_aset(opts,
|
333
|
+
symbolize_keys_sym,
|
334
|
+
(Yes == ox_default_options.sym_keys) ? Qtrue : ((No == ox_default_options.sym_keys) ? Qfalse : Qnil));
|
327
335
|
rb_hash_aset(opts, attr_key_mod_sym, ox_default_options.attr_key_mod);
|
328
336
|
rb_hash_aset(opts, element_key_mod_sym, ox_default_options.element_key_mod);
|
329
|
-
rb_hash_aset(opts,
|
337
|
+
rb_hash_aset(opts,
|
338
|
+
smart_sym,
|
339
|
+
(Yes == ox_default_options.smart) ? Qtrue : ((No == ox_default_options.smart) ? Qfalse : Qnil));
|
330
340
|
rb_hash_aset(opts, convert_special_sym, (ox_default_options.convert_special) ? Qtrue : Qfalse);
|
331
341
|
rb_hash_aset(opts, no_empty_sym, (ox_default_options.no_empty) ? Qtrue : Qfalse);
|
332
342
|
rb_hash_aset(opts, with_cdata_sym, (ox_default_options.with_cdata) ? Qtrue : Qfalse);
|
333
343
|
switch (ox_default_options.mode) {
|
334
|
-
case ObjMode:
|
335
|
-
case GenMode:
|
336
|
-
case LimMode:
|
337
|
-
case HashMode:
|
338
|
-
case HashNoAttrMode:
|
344
|
+
case ObjMode: rb_hash_aset(opts, mode_sym, object_sym); break;
|
345
|
+
case GenMode: rb_hash_aset(opts, mode_sym, generic_sym); break;
|
346
|
+
case LimMode: rb_hash_aset(opts, mode_sym, limited_sym); break;
|
347
|
+
case HashMode: rb_hash_aset(opts, mode_sym, hash_sym); break;
|
348
|
+
case HashNoAttrMode: rb_hash_aset(opts, mode_sym, hash_no_attrs_sym); break;
|
339
349
|
case NoMode:
|
340
|
-
default:
|
350
|
+
default: rb_hash_aset(opts, mode_sym, Qnil); break;
|
341
351
|
}
|
342
352
|
switch (ox_default_options.effort) {
|
343
|
-
case StrictEffort:
|
344
|
-
case TolerantEffort:
|
345
|
-
case AutoEffort:
|
353
|
+
case StrictEffort: rb_hash_aset(opts, effort_sym, strict_sym); break;
|
354
|
+
case TolerantEffort: rb_hash_aset(opts, effort_sym, tolerant_sym); break;
|
355
|
+
case AutoEffort: rb_hash_aset(opts, effort_sym, auto_define_sym); break;
|
346
356
|
case NoEffort:
|
347
|
-
default:
|
357
|
+
default: rb_hash_aset(opts, effort_sym, Qnil); break;
|
348
358
|
}
|
349
359
|
switch (ox_default_options.skip) {
|
350
|
-
case OffSkip:
|
351
|
-
case NoSkip:
|
352
|
-
case CrSkip:
|
353
|
-
case SpcSkip:
|
354
|
-
default:
|
360
|
+
case OffSkip: rb_hash_aset(opts, skip_sym, skip_off_sym); break;
|
361
|
+
case NoSkip: rb_hash_aset(opts, skip_sym, skip_none_sym); break;
|
362
|
+
case CrSkip: rb_hash_aset(opts, skip_sym, skip_return_sym); break;
|
363
|
+
case SpcSkip: rb_hash_aset(opts, skip_sym, skip_white_sym); break;
|
364
|
+
default: rb_hash_aset(opts, skip_sym, Qnil); break;
|
355
365
|
}
|
356
366
|
if (Yes == ox_default_options.allow_invalid) {
|
357
|
-
|
367
|
+
rb_hash_aset(opts, invalid_replace_sym, Qnil);
|
358
368
|
} else {
|
359
|
-
|
369
|
+
rb_hash_aset(opts,
|
370
|
+
invalid_replace_sym,
|
371
|
+
rb_str_new(ox_default_options.inv_repl + 1, (int)*ox_default_options.inv_repl));
|
360
372
|
}
|
361
373
|
if ('\0' == *ox_default_options.strip_ns) {
|
362
|
-
|
374
|
+
rb_hash_aset(opts, strip_namespace_sym, Qfalse);
|
363
375
|
} else if ('*' == *ox_default_options.strip_ns && '\0' == ox_default_options.strip_ns[1]) {
|
364
|
-
|
376
|
+
rb_hash_aset(opts, strip_namespace_sym, Qtrue);
|
365
377
|
} else {
|
366
|
-
|
378
|
+
rb_hash_aset(opts,
|
379
|
+
strip_namespace_sym,
|
380
|
+
rb_str_new(ox_default_options.strip_ns, strlen(ox_default_options.strip_ns)));
|
367
381
|
}
|
368
382
|
if (NULL == ox_default_options.html_hints) {
|
369
|
-
|
370
|
-
|
383
|
+
// rb_hash_aset(opts, overlay_sym, hints_to_overlay(ox_hints_html()));
|
384
|
+
rb_hash_aset(opts, overlay_sym, Qnil);
|
371
385
|
} else {
|
372
|
-
|
386
|
+
rb_hash_aset(opts, overlay_sym, hints_to_overlay(ox_default_options.html_hints));
|
373
387
|
}
|
374
388
|
return opts;
|
375
389
|
}
|
376
390
|
|
377
|
-
static int
|
378
|
-
|
379
|
-
|
380
|
-
Hint hint;
|
391
|
+
static int set_overlay(VALUE key, VALUE value, VALUE ctx) {
|
392
|
+
Hints hints = (Hints)ctx;
|
393
|
+
Hint hint;
|
381
394
|
|
382
395
|
if (NULL != (hint = ox_hint_find(hints, StringValuePtr(key)))) {
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
388
|
-
|
389
|
-
|
390
|
-
|
391
|
-
|
392
|
-
|
393
|
-
|
394
|
-
|
395
|
-
|
396
|
+
if (active_sym == value) {
|
397
|
+
hint->overlay = ActiveOverlay;
|
398
|
+
} else if (inactive_sym == value) {
|
399
|
+
hint->overlay = InactiveOverlay;
|
400
|
+
} else if (block_sym == value) {
|
401
|
+
hint->overlay = BlockOverlay;
|
402
|
+
} else if (nest_ok_sym == value) {
|
403
|
+
hint->overlay = NestOverlay;
|
404
|
+
} else if (off_sym == value) {
|
405
|
+
hint->overlay = OffOverlay;
|
406
|
+
} else if (abort_sym == value) {
|
407
|
+
hint->overlay = AbortOverlay;
|
408
|
+
}
|
396
409
|
}
|
397
410
|
return ST_CONTINUE;
|
398
411
|
}
|
@@ -410,8 +423,7 @@ set_overlay(VALUE key, VALUE value, VALUE ctx) {
|
|
410
423
|
*
|
411
424
|
* *return* [Hash] default SAX HTML settings
|
412
425
|
*/
|
413
|
-
static VALUE
|
414
|
-
sax_html_overlay(VALUE self) {
|
426
|
+
static VALUE sax_html_overlay(VALUE self) {
|
415
427
|
return hints_to_overlay(ox_hints_html());
|
416
428
|
}
|
417
429
|
|
@@ -435,8 +447,10 @@ sax_html_overlay(VALUE self) {
|
|
435
447
|
* - _:attr_key_mod_ [Proc|nil] converts attribute keys on parse if not nil
|
436
448
|
* - _:skip_ [:skip_none|:skip_return|:skip_white|:skip_off] determines how to handle white space in text
|
437
449
|
* - _:smart_ [true|false|nil] flag indicating the SAX parser uses hints if available (use with html)
|
438
|
-
* - _:invalid_replace_ [nil|String] replacement string for invalid XML characters on dump. nil indicates include
|
439
|
-
*
|
450
|
+
* - _:invalid_replace_ [nil|String] replacement string for invalid XML characters on dump. nil indicates include
|
451
|
+
* anyway as hex. A string, limited to 10 characters will replace the invalid character with the replace.
|
452
|
+
* - _:strip_namespace_ [nil|String|true|false] "" or false result in no namespace stripping. A string of "*" or true
|
453
|
+
* will strip all namespaces. Any other non-empty string indicates that matching namespaces will be stripped.
|
440
454
|
* - _:with_cdata_ [true|false] includes cdata in hash_load results
|
441
455
|
* - _:overlay_ [Hash] a Hash of keys that match html element names and values that are one of
|
442
456
|
* - _:active_ - make the normal callback for the element
|
@@ -448,201 +462,199 @@ sax_html_overlay(VALUE self) {
|
|
448
462
|
*
|
449
463
|
* *return* [nil]
|
450
464
|
*/
|
451
|
-
static VALUE
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
};
|
463
|
-
YesNoOpt o;
|
464
|
-
VALUE v;
|
465
|
+
static VALUE set_def_opts(VALUE self, VALUE opts) {
|
466
|
+
struct _yesNoOpt ynos[] = {{with_xml_sym, &ox_default_options.with_xml},
|
467
|
+
{with_dtd_sym, &ox_default_options.with_dtd},
|
468
|
+
{with_instruct_sym, &ox_default_options.with_instruct},
|
469
|
+
{xsd_date_sym, &ox_default_options.xsd_date},
|
470
|
+
{circular_sym, &ox_default_options.circular},
|
471
|
+
{symbolize_keys_sym, &ox_default_options.sym_keys},
|
472
|
+
{smart_sym, &ox_default_options.smart},
|
473
|
+
{Qnil, 0}};
|
474
|
+
YesNoOpt o;
|
475
|
+
VALUE v;
|
465
476
|
|
466
477
|
Check_Type(opts, T_HASH);
|
467
478
|
|
468
479
|
v = rb_hash_aref(opts, ox_encoding_sym);
|
469
480
|
if (Qnil == v) {
|
470
|
-
|
481
|
+
*ox_default_options.encoding = '\0';
|
471
482
|
} else {
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
ox_default_options.rb_enc = rb_enc_find(ox_default_options.encoding);
|
476
|
-
#endif
|
483
|
+
Check_Type(v, T_STRING);
|
484
|
+
strncpy(ox_default_options.encoding, StringValuePtr(v), sizeof(ox_default_options.encoding) - 1);
|
485
|
+
ox_default_options.rb_enc = rb_enc_find(ox_default_options.encoding);
|
477
486
|
}
|
478
487
|
|
479
488
|
v = rb_hash_aref(opts, ox_indent_sym);
|
480
489
|
if (Qnil != v) {
|
481
|
-
|
482
|
-
|
490
|
+
Check_Type(v, T_FIXNUM);
|
491
|
+
ox_default_options.indent = FIX2INT(v);
|
483
492
|
}
|
484
493
|
|
485
494
|
v = rb_hash_aref(opts, trace_sym);
|
486
495
|
if (Qnil != v) {
|
487
|
-
|
488
|
-
|
496
|
+
Check_Type(v, T_FIXNUM);
|
497
|
+
ox_default_options.trace = FIX2INT(v);
|
489
498
|
}
|
490
499
|
|
491
500
|
v = rb_hash_aref(opts, mode_sym);
|
492
501
|
if (Qnil == v) {
|
493
|
-
|
502
|
+
ox_default_options.mode = NoMode;
|
494
503
|
} else if (object_sym == v) {
|
495
|
-
|
504
|
+
ox_default_options.mode = ObjMode;
|
496
505
|
} else if (generic_sym == v) {
|
497
|
-
|
506
|
+
ox_default_options.mode = GenMode;
|
498
507
|
} else if (limited_sym == v) {
|
499
|
-
|
508
|
+
ox_default_options.mode = LimMode;
|
500
509
|
} else if (hash_sym == v) {
|
501
|
-
|
510
|
+
ox_default_options.mode = HashMode;
|
502
511
|
} else if (hash_no_attrs_sym == v) {
|
503
|
-
|
512
|
+
ox_default_options.mode = HashNoAttrMode;
|
504
513
|
} else {
|
505
|
-
|
514
|
+
rb_raise(ox_parse_error_class, ":mode must be :object, :generic, :limited, :hash, :hash_no_attrs, or nil.\n");
|
506
515
|
}
|
507
516
|
|
508
517
|
v = rb_hash_aref(opts, effort_sym);
|
509
518
|
if (Qnil == v) {
|
510
|
-
|
519
|
+
ox_default_options.effort = NoEffort;
|
511
520
|
} else if (strict_sym == v) {
|
512
|
-
|
521
|
+
ox_default_options.effort = StrictEffort;
|
513
522
|
} else if (tolerant_sym == v) {
|
514
|
-
|
523
|
+
ox_default_options.effort = TolerantEffort;
|
515
524
|
} else if (auto_define_sym == v) {
|
516
|
-
|
525
|
+
ox_default_options.effort = AutoEffort;
|
517
526
|
} else {
|
518
|
-
|
527
|
+
rb_raise(ox_parse_error_class, ":effort must be :strict, :tolerant, :auto_define, or nil.\n");
|
519
528
|
}
|
520
529
|
|
521
530
|
v = rb_hash_aref(opts, skip_sym);
|
522
531
|
if (Qnil == v) {
|
523
|
-
|
532
|
+
ox_default_options.skip = NoSkip;
|
524
533
|
} else if (skip_off_sym == v) {
|
525
|
-
|
534
|
+
ox_default_options.skip = OffSkip;
|
526
535
|
} else if (skip_none_sym == v) {
|
527
|
-
|
536
|
+
ox_default_options.skip = NoSkip;
|
528
537
|
} else if (skip_return_sym == v) {
|
529
|
-
|
538
|
+
ox_default_options.skip = CrSkip;
|
530
539
|
} else if (skip_white_sym == v) {
|
531
|
-
|
540
|
+
ox_default_options.skip = SpcSkip;
|
532
541
|
} else {
|
533
|
-
|
542
|
+
rb_raise(ox_parse_error_class, ":skip must be :skip_none, :skip_return, :skip_white, :skip_off, or nil.\n");
|
534
543
|
}
|
535
544
|
|
536
545
|
v = rb_hash_lookup(opts, convert_special_sym);
|
537
546
|
if (Qnil == v) {
|
538
|
-
|
547
|
+
// no change
|
539
548
|
} else if (Qtrue == v) {
|
540
|
-
|
549
|
+
ox_default_options.convert_special = 1;
|
541
550
|
} else if (Qfalse == v) {
|
542
|
-
|
551
|
+
ox_default_options.convert_special = 0;
|
543
552
|
} else {
|
544
|
-
|
553
|
+
rb_raise(ox_parse_error_class, ":convert_special must be true or false.\n");
|
545
554
|
}
|
546
555
|
|
547
556
|
v = rb_hash_lookup(opts, no_empty_sym);
|
548
557
|
if (Qnil == v) {
|
549
|
-
|
558
|
+
// no change
|
550
559
|
} else if (Qtrue == v) {
|
551
|
-
|
560
|
+
ox_default_options.no_empty = 1;
|
552
561
|
} else if (Qfalse == v) {
|
553
|
-
|
562
|
+
ox_default_options.no_empty = 0;
|
554
563
|
} else {
|
555
|
-
|
564
|
+
rb_raise(ox_parse_error_class, ":no_empty must be true or false.\n");
|
556
565
|
}
|
557
566
|
|
558
567
|
v = rb_hash_aref(opts, invalid_replace_sym);
|
559
568
|
if (Qnil == v) {
|
560
|
-
|
569
|
+
ox_default_options.allow_invalid = Yes;
|
561
570
|
} else {
|
562
|
-
|
563
|
-
|
564
|
-
|
565
|
-
|
566
|
-
|
567
|
-
|
568
|
-
|
569
|
-
|
570
|
-
|
571
|
-
|
572
|
-
|
573
|
-
|
571
|
+
long slen;
|
572
|
+
|
573
|
+
Check_Type(v, T_STRING);
|
574
|
+
slen = RSTRING_LEN(v);
|
575
|
+
if (sizeof(ox_default_options.inv_repl) - 2 < (size_t)slen) {
|
576
|
+
rb_raise(ox_parse_error_class,
|
577
|
+
":invalid_replace can be no longer than %d characters.",
|
578
|
+
(int)sizeof(ox_default_options.inv_repl) - 2);
|
579
|
+
}
|
580
|
+
strncpy(ox_default_options.inv_repl + 1, StringValuePtr(v), sizeof(ox_default_options.inv_repl) - 1);
|
581
|
+
ox_default_options.inv_repl[sizeof(ox_default_options.inv_repl) - 1] = '\0';
|
582
|
+
*ox_default_options.inv_repl = (char)slen;
|
583
|
+
ox_default_options.allow_invalid = No;
|
574
584
|
}
|
575
585
|
|
576
586
|
v = rb_hash_aref(opts, strip_namespace_sym);
|
577
587
|
if (Qfalse == v) {
|
578
|
-
|
588
|
+
*ox_default_options.strip_ns = '\0';
|
579
589
|
} else if (Qtrue == v) {
|
580
|
-
|
581
|
-
|
590
|
+
*ox_default_options.strip_ns = '*';
|
591
|
+
ox_default_options.strip_ns[1] = '\0';
|
582
592
|
} else if (Qnil != v) {
|
583
|
-
|
584
|
-
|
585
|
-
|
586
|
-
|
587
|
-
|
588
|
-
|
589
|
-
|
590
|
-
|
591
|
-
|
592
|
-
|
593
|
+
long slen;
|
594
|
+
|
595
|
+
Check_Type(v, T_STRING);
|
596
|
+
slen = RSTRING_LEN(v);
|
597
|
+
if (sizeof(ox_default_options.strip_ns) - 1 < (size_t)slen) {
|
598
|
+
rb_raise(ox_parse_error_class,
|
599
|
+
":strip_namespace can be no longer than %d characters.",
|
600
|
+
(int)sizeof(ox_default_options.strip_ns) - 1);
|
601
|
+
}
|
602
|
+
strncpy(ox_default_options.strip_ns, StringValuePtr(v), sizeof(ox_default_options.strip_ns) - 1);
|
603
|
+
ox_default_options.strip_ns[sizeof(ox_default_options.strip_ns) - 1] = '\0';
|
593
604
|
}
|
594
605
|
|
595
606
|
v = rb_hash_aref(opts, margin_sym);
|
596
607
|
if (Qnil != v) {
|
597
|
-
|
598
|
-
|
599
|
-
|
600
|
-
|
601
|
-
|
602
|
-
|
603
|
-
|
604
|
-
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
+
long slen;
|
609
|
+
|
610
|
+
Check_Type(v, T_STRING);
|
611
|
+
slen = RSTRING_LEN(v);
|
612
|
+
if (sizeof(ox_default_options.margin) - 1 < (size_t)slen) {
|
613
|
+
rb_raise(ox_parse_error_class,
|
614
|
+
":margin can be no longer than %d characters.",
|
615
|
+
(int)sizeof(ox_default_options.margin) - 1);
|
616
|
+
}
|
617
|
+
strncpy(ox_default_options.margin, StringValuePtr(v), sizeof(ox_default_options.margin) - 1);
|
618
|
+
ox_default_options.margin[sizeof(ox_default_options.margin) - 1] = '\0';
|
619
|
+
ox_default_options.margin_len = strlen(ox_default_options.margin);
|
608
620
|
}
|
609
621
|
|
610
622
|
for (o = ynos; 0 != o->attr; o++) {
|
611
|
-
|
612
|
-
|
613
|
-
|
614
|
-
|
615
|
-
|
616
|
-
|
617
|
-
|
618
|
-
|
619
|
-
|
620
|
-
|
623
|
+
v = rb_hash_lookup(opts, o->sym);
|
624
|
+
if (Qnil == v) {
|
625
|
+
*o->attr = NotSet;
|
626
|
+
} else if (Qtrue == v) {
|
627
|
+
*o->attr = Yes;
|
628
|
+
} else if (Qfalse == v) {
|
629
|
+
*o->attr = No;
|
630
|
+
} else {
|
631
|
+
rb_raise(ox_parse_error_class, "%s must be true or false.\n", rb_id2name(SYM2ID(o->sym)));
|
632
|
+
}
|
621
633
|
}
|
622
634
|
v = rb_hash_aref(opts, overlay_sym);
|
623
635
|
if (Qnil == v) {
|
624
|
-
|
625
|
-
|
636
|
+
ox_hints_destroy(ox_default_options.html_hints);
|
637
|
+
ox_default_options.html_hints = NULL;
|
626
638
|
} else {
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
635
|
-
|
636
|
-
|
637
|
-
|
638
|
-
|
639
|
+
int cnt;
|
640
|
+
|
641
|
+
Check_Type(v, T_HASH);
|
642
|
+
cnt = (int)RHASH_SIZE(v);
|
643
|
+
if (0 == cnt) {
|
644
|
+
ox_hints_destroy(ox_default_options.html_hints);
|
645
|
+
ox_default_options.html_hints = NULL;
|
646
|
+
} else {
|
647
|
+
ox_hints_destroy(ox_default_options.html_hints);
|
648
|
+
ox_default_options.html_hints = ox_hints_dup(ox_hints_html());
|
649
|
+
rb_hash_foreach(v, set_overlay, (VALUE)ox_default_options.html_hints);
|
650
|
+
}
|
639
651
|
}
|
640
652
|
if (Qnil != (v = rb_hash_lookup(opts, with_cdata_sym))) {
|
641
|
-
|
653
|
+
ox_default_options.with_cdata = (Qtrue == v);
|
642
654
|
}
|
643
655
|
|
644
656
|
ox_default_options.element_key_mod = rb_hash_lookup2(opts, element_key_mod_sym, ox_default_options.element_key_mod);
|
645
|
-
ox_default_options.attr_key_mod
|
657
|
+
ox_default_options.attr_key_mod = rb_hash_lookup2(opts, attr_key_mod_sym, ox_default_options.attr_key_mod);
|
646
658
|
|
647
659
|
return Qnil;
|
648
660
|
}
|
@@ -657,23 +669,22 @@ set_def_opts(VALUE self, VALUE opts) {
|
|
657
669
|
* - +xml+ [String] XML String in optimized Object format.
|
658
670
|
* *return* [Object] deserialized Object.
|
659
671
|
*/
|
660
|
-
static VALUE
|
661
|
-
|
662
|
-
|
663
|
-
|
664
|
-
|
665
|
-
struct
|
666
|
-
struct _err err;
|
672
|
+
static VALUE to_obj(VALUE self, VALUE ruby_xml) {
|
673
|
+
char *xml, *x;
|
674
|
+
size_t len;
|
675
|
+
VALUE obj;
|
676
|
+
struct _options options = ox_default_options;
|
677
|
+
struct _err err;
|
667
678
|
|
668
679
|
err_init(&err);
|
669
680
|
Check_Type(ruby_xml, T_STRING);
|
670
681
|
/* the xml string gets modified so make a copy of it */
|
671
682
|
len = RSTRING_LEN(ruby_xml) + 1;
|
672
|
-
x
|
683
|
+
x = defuse_bom(StringValuePtr(ruby_xml), &options);
|
673
684
|
if (SMALL_XML < len) {
|
674
|
-
|
685
|
+
xml = ALLOC_N(char, len);
|
675
686
|
} else {
|
676
|
-
|
687
|
+
xml = ALLOCA_N(char, len);
|
677
688
|
}
|
678
689
|
memcpy(xml, x, len);
|
679
690
|
#ifdef RB_GC_GUARD
|
@@ -681,14 +692,14 @@ to_obj(VALUE self, VALUE ruby_xml) {
|
|
681
692
|
#endif
|
682
693
|
obj = ox_parse(xml, len - 1, ox_obj_callbacks, 0, &options, &err);
|
683
694
|
if (SMALL_XML < len) {
|
684
|
-
|
695
|
+
xfree(xml);
|
685
696
|
}
|
686
697
|
#ifdef RB_GC_GUARD
|
687
698
|
RB_GC_GUARD(obj);
|
688
699
|
rb_gc_enable();
|
689
700
|
#endif
|
690
701
|
if (err_has(&err)) {
|
691
|
-
|
702
|
+
ox_err_raise(&err);
|
692
703
|
}
|
693
704
|
return obj;
|
694
705
|
}
|
@@ -701,207 +712,198 @@ to_obj(VALUE self, VALUE ruby_xml) {
|
|
701
712
|
*
|
702
713
|
* _raise_ [Exception] if the XML is malformed.
|
703
714
|
*/
|
704
|
-
static VALUE
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
struct
|
710
|
-
struct _err err;
|
715
|
+
static VALUE to_gen(VALUE self, VALUE ruby_xml) {
|
716
|
+
char *xml, *x;
|
717
|
+
size_t len;
|
718
|
+
VALUE obj;
|
719
|
+
struct _options options = ox_default_options;
|
720
|
+
struct _err err;
|
711
721
|
|
712
722
|
err_init(&err);
|
713
723
|
Check_Type(ruby_xml, T_STRING);
|
714
724
|
/* the xml string gets modified so make a copy of it */
|
715
725
|
len = RSTRING_LEN(ruby_xml) + 1;
|
716
|
-
x
|
726
|
+
x = defuse_bom(StringValuePtr(ruby_xml), &options);
|
717
727
|
if (SMALL_XML < len) {
|
718
|
-
|
728
|
+
xml = ALLOC_N(char, len);
|
719
729
|
} else {
|
720
|
-
|
730
|
+
xml = ALLOCA_N(char, len);
|
721
731
|
}
|
722
732
|
memcpy(xml, x, len);
|
723
733
|
obj = ox_parse(xml, len - 1, ox_gen_callbacks, 0, &options, &err);
|
724
734
|
if (SMALL_XML < len) {
|
725
|
-
|
735
|
+
xfree(xml);
|
726
736
|
}
|
727
737
|
if (err_has(&err)) {
|
728
|
-
|
738
|
+
ox_err_raise(&err);
|
729
739
|
}
|
730
740
|
return obj;
|
731
741
|
}
|
732
742
|
|
733
|
-
static VALUE
|
734
|
-
|
735
|
-
|
736
|
-
struct _options options = ox_default_options;
|
743
|
+
static VALUE load(char *xml, size_t len, int argc, VALUE *argv, VALUE self, VALUE encoding, Err err) {
|
744
|
+
VALUE obj;
|
745
|
+
struct _options options = ox_default_options;
|
737
746
|
|
738
747
|
if (1 == argc && rb_cHash == rb_obj_class(*argv)) {
|
739
|
-
|
740
|
-
|
741
|
-
|
742
|
-
|
743
|
-
|
744
|
-
|
745
|
-
|
746
|
-
|
747
|
-
|
748
|
-
|
749
|
-
|
750
|
-
|
751
|
-
|
752
|
-
|
753
|
-
|
754
|
-
|
755
|
-
|
756
|
-
|
757
|
-
|
758
|
-
|
759
|
-
|
760
|
-
|
761
|
-
|
762
|
-
|
763
|
-
|
764
|
-
|
765
|
-
|
766
|
-
|
767
|
-
|
768
|
-
|
769
|
-
|
770
|
-
|
771
|
-
|
772
|
-
|
773
|
-
|
774
|
-
|
775
|
-
|
776
|
-
|
777
|
-
|
778
|
-
|
779
|
-
|
780
|
-
|
781
|
-
|
782
|
-
|
783
|
-
|
784
|
-
|
785
|
-
|
786
|
-
|
787
|
-
|
788
|
-
|
789
|
-
|
790
|
-
|
791
|
-
|
792
|
-
|
793
|
-
|
794
|
-
|
795
|
-
|
796
|
-
|
797
|
-
|
798
|
-
|
799
|
-
|
800
|
-
|
801
|
-
|
802
|
-
|
803
|
-
|
804
|
-
|
805
|
-
|
806
|
-
|
807
|
-
|
808
|
-
|
809
|
-
|
810
|
-
|
811
|
-
|
812
|
-
|
813
|
-
|
814
|
-
|
815
|
-
|
816
|
-
|
817
|
-
|
818
|
-
|
819
|
-
|
820
|
-
|
821
|
-
|
822
|
-
|
823
|
-
|
824
|
-
|
825
|
-
|
826
|
-
|
827
|
-
|
828
|
-
|
829
|
-
|
830
|
-
|
831
|
-
|
832
|
-
|
833
|
-
|
834
|
-
|
835
|
-
|
836
|
-
|
837
|
-
|
838
|
-
|
839
|
-
|
840
|
-
|
841
|
-
|
842
|
-
|
843
|
-
|
844
|
-
|
845
|
-
|
846
|
-
|
847
|
-
|
848
|
-
|
849
|
-
|
850
|
-
|
851
|
-
|
852
|
-
|
853
|
-
|
854
|
-
|
748
|
+
VALUE h = *argv;
|
749
|
+
VALUE v;
|
750
|
+
|
751
|
+
if (Qnil != (v = rb_hash_lookup(h, mode_sym))) {
|
752
|
+
if (object_sym == v) {
|
753
|
+
options.mode = ObjMode;
|
754
|
+
} else if (optimized_sym == v) {
|
755
|
+
options.mode = ObjMode;
|
756
|
+
} else if (generic_sym == v) {
|
757
|
+
options.mode = GenMode;
|
758
|
+
} else if (limited_sym == v) {
|
759
|
+
options.mode = LimMode;
|
760
|
+
} else if (hash_sym == v) {
|
761
|
+
options.mode = HashMode;
|
762
|
+
} else if (hash_no_attrs_sym == v) {
|
763
|
+
options.mode = HashNoAttrMode;
|
764
|
+
} else {
|
765
|
+
rb_raise(ox_parse_error_class, ":mode must be :generic, :object, :limited, :hash, :hash_no_attrs.\n");
|
766
|
+
}
|
767
|
+
}
|
768
|
+
if (Qnil != (v = rb_hash_lookup(h, effort_sym))) {
|
769
|
+
if (auto_define_sym == v) {
|
770
|
+
options.effort = AutoEffort;
|
771
|
+
} else if (tolerant_sym == v) {
|
772
|
+
options.effort = TolerantEffort;
|
773
|
+
} else if (strict_sym == v) {
|
774
|
+
options.effort = StrictEffort;
|
775
|
+
} else {
|
776
|
+
rb_raise(ox_parse_error_class, ":effort must be :strict, :tolerant, or :auto_define.\n");
|
777
|
+
}
|
778
|
+
}
|
779
|
+
if (Qnil != (v = rb_hash_lookup(h, skip_sym))) {
|
780
|
+
if (skip_none_sym == v) {
|
781
|
+
options.skip = NoSkip;
|
782
|
+
} else if (skip_off_sym == v) {
|
783
|
+
options.skip = OffSkip;
|
784
|
+
} else if (skip_return_sym == v) {
|
785
|
+
options.skip = CrSkip;
|
786
|
+
} else if (skip_white_sym == v) {
|
787
|
+
options.skip = SpcSkip;
|
788
|
+
} else {
|
789
|
+
rb_raise(ox_parse_error_class, ":skip must be :skip_none, :skip_return, :skip_white, or :skip_off.\n");
|
790
|
+
}
|
791
|
+
}
|
792
|
+
|
793
|
+
if (Qnil != (v = rb_hash_lookup(h, trace_sym))) {
|
794
|
+
Check_Type(v, T_FIXNUM);
|
795
|
+
options.trace = FIX2INT(v);
|
796
|
+
}
|
797
|
+
if (Qnil != (v = rb_hash_lookup(h, symbolize_keys_sym))) {
|
798
|
+
options.sym_keys = (Qfalse == v) ? No : Yes;
|
799
|
+
}
|
800
|
+
options.element_key_mod = rb_hash_lookup2(h, element_key_mod_sym, options.element_key_mod);
|
801
|
+
options.attr_key_mod = rb_hash_lookup2(h, attr_key_mod_sym, options.attr_key_mod);
|
802
|
+
|
803
|
+
if (Qnil != (v = rb_hash_lookup(h, convert_special_sym))) {
|
804
|
+
options.convert_special = (Qfalse != v);
|
805
|
+
}
|
806
|
+
if (Qnil != (v = rb_hash_lookup(h, no_empty_sym))) {
|
807
|
+
options.no_empty = (Qfalse != v);
|
808
|
+
}
|
809
|
+
|
810
|
+
v = rb_hash_lookup(h, invalid_replace_sym);
|
811
|
+
if (Qnil == v) {
|
812
|
+
if (Qtrue == rb_funcall(h, has_key_id, 1, invalid_replace_sym)) {
|
813
|
+
options.allow_invalid = Yes;
|
814
|
+
}
|
815
|
+
} else {
|
816
|
+
long slen;
|
817
|
+
|
818
|
+
Check_Type(v, T_STRING);
|
819
|
+
slen = RSTRING_LEN(v);
|
820
|
+
if (sizeof(options.inv_repl) - 2 < (size_t)slen) {
|
821
|
+
rb_raise(ox_parse_error_class,
|
822
|
+
":invalid_replace can be no longer than %d characters.",
|
823
|
+
(int)sizeof(options.inv_repl) - 2);
|
824
|
+
}
|
825
|
+
strncpy(options.inv_repl + 1, StringValuePtr(v), sizeof(options.inv_repl) - 1);
|
826
|
+
options.inv_repl[sizeof(options.inv_repl) - 1] = '\0';
|
827
|
+
*options.inv_repl = (char)slen;
|
828
|
+
options.allow_invalid = No;
|
829
|
+
}
|
830
|
+
v = rb_hash_lookup(h, strip_namespace_sym);
|
831
|
+
if (Qfalse == v) {
|
832
|
+
*options.strip_ns = '\0';
|
833
|
+
} else if (Qtrue == v) {
|
834
|
+
*options.strip_ns = '*';
|
835
|
+
options.strip_ns[1] = '\0';
|
836
|
+
} else if (Qnil != v) {
|
837
|
+
long slen;
|
838
|
+
|
839
|
+
Check_Type(v, T_STRING);
|
840
|
+
slen = RSTRING_LEN(v);
|
841
|
+
if (sizeof(options.strip_ns) - 1 < (size_t)slen) {
|
842
|
+
rb_raise(ox_parse_error_class,
|
843
|
+
":strip_namespace can be no longer than %d characters.",
|
844
|
+
(int)sizeof(options.strip_ns) - 1);
|
845
|
+
}
|
846
|
+
strncpy(options.strip_ns, StringValuePtr(v), sizeof(options.strip_ns) - 1);
|
847
|
+
options.strip_ns[sizeof(options.strip_ns) - 1] = '\0';
|
848
|
+
}
|
849
|
+
v = rb_hash_lookup(h, margin_sym);
|
850
|
+
if (Qnil != v) {
|
851
|
+
long slen;
|
852
|
+
|
853
|
+
Check_Type(v, T_STRING);
|
854
|
+
slen = RSTRING_LEN(v);
|
855
|
+
if (sizeof(options.margin) - 1 < (size_t)slen) {
|
856
|
+
rb_raise(ox_parse_error_class,
|
857
|
+
":margin can be no longer than %d characters.",
|
858
|
+
(int)sizeof(options.margin) - 1);
|
859
|
+
}
|
860
|
+
strncpy(options.margin, StringValuePtr(v), sizeof(options.margin) - 1);
|
861
|
+
options.margin[sizeof(options.margin) - 1] = '\0';
|
862
|
+
options.margin_len = strlen(options.margin);
|
863
|
+
}
|
864
|
+
if (Qnil != (v = rb_hash_lookup(h, with_cdata_sym))) {
|
865
|
+
options.with_cdata = (Qtrue == v);
|
866
|
+
}
|
855
867
|
}
|
856
|
-
#if HAVE_RB_ENC_FIND
|
857
868
|
if ('\0' == *options.encoding) {
|
858
|
-
|
859
|
-
|
860
|
-
|
861
|
-
|
862
|
-
|
869
|
+
if (Qnil != encoding) {
|
870
|
+
options.rb_enc = rb_enc_from_index(rb_enc_get_index(encoding));
|
871
|
+
} else {
|
872
|
+
options.rb_enc = 0;
|
873
|
+
}
|
863
874
|
} else if (0 == options.rb_enc) {
|
864
|
-
|
875
|
+
options.rb_enc = rb_enc_find(options.encoding);
|
865
876
|
}
|
866
|
-
#endif
|
867
877
|
xml = defuse_bom(xml, &options);
|
868
878
|
switch (options.mode) {
|
869
879
|
case ObjMode:
|
870
880
|
#ifdef RB_GC_GUARD
|
871
|
-
|
881
|
+
rb_gc_disable();
|
872
882
|
#endif
|
873
|
-
|
883
|
+
obj = ox_parse(xml, len, ox_obj_callbacks, 0, &options, err);
|
874
884
|
#ifdef RB_GC_GUARD
|
875
|
-
|
876
|
-
|
885
|
+
RB_GC_GUARD(obj);
|
886
|
+
rb_gc_enable();
|
877
887
|
#endif
|
878
|
-
|
879
|
-
case GenMode:
|
880
|
-
|
881
|
-
break;
|
882
|
-
case LimMode:
|
883
|
-
obj = ox_parse(xml, len, ox_limited_callbacks, 0, &options, err);
|
884
|
-
break;
|
888
|
+
break;
|
889
|
+
case GenMode: obj = ox_parse(xml, len, ox_gen_callbacks, 0, &options, err); break;
|
890
|
+
case LimMode: obj = ox_parse(xml, len, ox_limited_callbacks, 0, &options, err); break;
|
885
891
|
case HashMode:
|
886
|
-
|
887
|
-
|
888
|
-
|
889
|
-
|
890
|
-
|
891
|
-
|
892
|
+
if (options.with_cdata) {
|
893
|
+
obj = ox_parse(xml, len, ox_hash_cdata_callbacks, 0, &options, err);
|
894
|
+
} else {
|
895
|
+
obj = ox_parse(xml, len, ox_hash_callbacks, 0, &options, err);
|
896
|
+
}
|
897
|
+
break;
|
892
898
|
case HashNoAttrMode:
|
893
|
-
|
894
|
-
|
895
|
-
|
896
|
-
|
897
|
-
|
898
|
-
|
899
|
-
case NoMode:
|
900
|
-
|
901
|
-
break;
|
902
|
-
default:
|
903
|
-
obj = ox_parse(xml, len, ox_gen_callbacks, 0, &options, err);
|
904
|
-
break;
|
899
|
+
if (options.with_cdata) {
|
900
|
+
obj = ox_parse(xml, len, ox_hash_no_attrs_cdata_callbacks, 0, &options, err);
|
901
|
+
} else {
|
902
|
+
obj = ox_parse(xml, len, ox_hash_no_attrs_callbacks, 0, &options, err);
|
903
|
+
}
|
904
|
+
break;
|
905
|
+
case NoMode: obj = ox_parse(xml, len, ox_nomode_callbacks, 0, &options, err); break;
|
906
|
+
default: obj = ox_parse(xml, len, ox_gen_callbacks, 0, &options, err); break;
|
905
907
|
}
|
906
908
|
return obj;
|
907
909
|
}
|
@@ -928,26 +930,27 @@ load(char *xml, size_t len, int argc, VALUE *argv, VALUE self, VALUE encoding, E
|
|
928
930
|
* - _:auto_define_ - auto define missing classes and modules
|
929
931
|
* - *:trace* [Fixnum] trace level as a Fixnum, default: 0 (silent)
|
930
932
|
* - *:symbolize_keys* [true|false|nil] symbolize element attribute keys or leave as Strings
|
931
|
-
* - *:invalid_replace* [nil|String] replacement string for invalid XML characters on dump. nil indicates include
|
932
|
-
*
|
933
|
+
* - *:invalid_replace* [nil|String] replacement string for invalid XML characters on dump. nil indicates include
|
934
|
+
* anyway as hex. A string, limited to 10 characters will replace the invalid character with the replace.
|
935
|
+
* - *:strip_namespace* [String|true|false] "" or false result in no namespace stripping. A string of "*" or true will
|
936
|
+
* strip all namespaces. Any other non-empty string indicates that matching namespaces will be stripped.
|
933
937
|
* - *:with_cdata* [true|false] if true cdata is included in hash_load output otherwise it is not.
|
934
938
|
*/
|
935
|
-
static VALUE
|
936
|
-
|
937
|
-
|
938
|
-
|
939
|
-
VALUE
|
940
|
-
|
941
|
-
struct _err err;
|
939
|
+
static VALUE load_str(int argc, VALUE *argv, VALUE self) {
|
940
|
+
char *xml;
|
941
|
+
size_t len;
|
942
|
+
VALUE obj;
|
943
|
+
VALUE encoding;
|
944
|
+
struct _err err;
|
942
945
|
|
943
946
|
err_init(&err);
|
944
947
|
Check_Type(*argv, T_STRING);
|
945
948
|
/* the xml string gets modified so make a copy of it */
|
946
949
|
len = RSTRING_LEN(*argv) + 1;
|
947
950
|
if (SMALL_XML < len) {
|
948
|
-
|
951
|
+
xml = ALLOC_N(char, len);
|
949
952
|
} else {
|
950
|
-
|
953
|
+
xml = ALLOCA_N(char, len);
|
951
954
|
}
|
952
955
|
#if HAVE_RB_OBJ_ENCODING
|
953
956
|
encoding = rb_obj_encoding(*argv);
|
@@ -956,12 +959,12 @@ load_str(int argc, VALUE *argv, VALUE self) {
|
|
956
959
|
#endif
|
957
960
|
memcpy(xml, StringValuePtr(*argv), len);
|
958
961
|
xml[len - 1] = '\0';
|
959
|
-
obj
|
962
|
+
obj = load(xml, len - 1, argc - 1, argv + 1, self, encoding, &err);
|
960
963
|
if (SMALL_XML < len) {
|
961
|
-
|
964
|
+
xfree(xml);
|
962
965
|
}
|
963
966
|
if (err_has(&err)) {
|
964
|
-
|
967
|
+
ox_err_raise(&err);
|
965
968
|
}
|
966
969
|
return obj;
|
967
970
|
}
|
@@ -985,45 +988,46 @@ load_str(int argc, VALUE *argv, VALUE self) {
|
|
985
988
|
* - _:auto_define_ - auto define missing classes and modules
|
986
989
|
* - *:trace* [Fixnum] trace level as a Fixnum, default: 0 (silent)
|
987
990
|
* - *:symbolize_keys* [true|false|nil] symbolize element attribute keys or leave as Strings
|
988
|
-
* - *:invalid_replace* [nil|String] replacement string for invalid XML characters on dump. nil indicates include
|
989
|
-
*
|
991
|
+
* - *:invalid_replace* [nil|String] replacement string for invalid XML characters on dump. nil indicates include
|
992
|
+
* anyway as hex. A string, limited to 10 characters will replace the invalid character with the replace.
|
993
|
+
* - *:strip_namespace* [String|true|false] "" or false result in no namespace stripping. A string of "*" or true will
|
994
|
+
* strip all namespaces. Any other non-empty string indicates that matching namespaces will be stripped.
|
990
995
|
*/
|
991
|
-
static VALUE
|
992
|
-
|
993
|
-
char
|
994
|
-
|
995
|
-
|
996
|
-
|
997
|
-
|
998
|
-
struct _err err;
|
996
|
+
static VALUE load_file(int argc, VALUE *argv, VALUE self) {
|
997
|
+
char *path;
|
998
|
+
char *xml;
|
999
|
+
FILE *f;
|
1000
|
+
off_t len;
|
1001
|
+
VALUE obj;
|
1002
|
+
struct _err err;
|
999
1003
|
|
1000
1004
|
err_init(&err);
|
1001
1005
|
Check_Type(*argv, T_STRING);
|
1002
1006
|
path = StringValuePtr(*argv);
|
1003
1007
|
if (0 == (f = fopen(path, "r"))) {
|
1004
|
-
|
1008
|
+
rb_raise(rb_eIOError, "%s\n", strerror(errno));
|
1005
1009
|
}
|
1006
1010
|
fseek(f, 0, SEEK_END);
|
1007
1011
|
len = ftello(f);
|
1008
1012
|
if (SMALL_XML < len) {
|
1009
|
-
|
1013
|
+
xml = ALLOC_N(char, len + 1);
|
1010
1014
|
} else {
|
1011
|
-
|
1015
|
+
xml = ALLOCA_N(char, len + 1);
|
1012
1016
|
}
|
1013
1017
|
fseek(f, 0, SEEK_SET);
|
1014
1018
|
if ((size_t)len != fread(xml, 1, len, f)) {
|
1015
|
-
|
1016
|
-
|
1019
|
+
ox_err_set(&err, rb_eLoadError, "Failed to read %ld bytes from %s.\n", (long)len, path);
|
1020
|
+
obj = Qnil;
|
1017
1021
|
} else {
|
1018
|
-
|
1019
|
-
|
1022
|
+
xml[len] = '\0';
|
1023
|
+
obj = load(xml, len, argc - 1, argv + 1, self, Qnil, &err);
|
1020
1024
|
}
|
1021
1025
|
fclose(f);
|
1022
1026
|
if (SMALL_XML < len) {
|
1023
|
-
|
1027
|
+
xfree(xml);
|
1024
1028
|
}
|
1025
1029
|
if (err_has(&err)) {
|
1026
|
-
|
1030
|
+
ox_err_raise(&err);
|
1027
1031
|
}
|
1028
1032
|
return obj;
|
1029
1033
|
}
|
@@ -1038,66 +1042,68 @@ load_file(int argc, VALUE *argv, VALUE self) {
|
|
1038
1042
|
* - *:convert_special* [true|false] flag indicating special characters like < are converted
|
1039
1043
|
* - *:symbolize* [true|false] flag indicating the parser symbolize element and attribute names
|
1040
1044
|
* - *:smart* [true|false] flag indicating the parser uses hints if available (use with html)
|
1041
|
-
* - *:skip* [:skip_none|:skip_return|:skip_white|:skip_off] flag indicating the parser skips \\r or collpase white
|
1042
|
-
*
|
1045
|
+
* - *:skip* [:skip_none|:skip_return|:skip_white|:skip_off] flag indicating the parser skips \\r or collpase white
|
1046
|
+
* space into a single space. Default (skip space)
|
1047
|
+
* - *:strip_namespace* [nil|String|true|false] "" or false result in no namespace stripping. A string of "*" or true
|
1048
|
+
* will strip all namespaces. Any other non-empty string indicates that matching namespaces will be stripped.
|
1043
1049
|
*/
|
1044
|
-
static VALUE
|
1045
|
-
|
1046
|
-
struct _saxOptions options;
|
1050
|
+
static VALUE sax_parse(int argc, VALUE *argv, VALUE self) {
|
1051
|
+
struct _saxOptions options;
|
1047
1052
|
|
1048
|
-
options.symbolize
|
1053
|
+
options.symbolize = (No != ox_default_options.sym_keys);
|
1049
1054
|
options.convert_special = ox_default_options.convert_special;
|
1050
|
-
options.smart
|
1051
|
-
options.skip
|
1052
|
-
options.hints
|
1055
|
+
options.smart = (Yes == ox_default_options.smart);
|
1056
|
+
options.skip = ox_default_options.skip;
|
1057
|
+
options.hints = NULL;
|
1053
1058
|
strcpy(options.strip_ns, ox_default_options.strip_ns);
|
1054
1059
|
|
1055
1060
|
if (argc < 2) {
|
1056
|
-
|
1061
|
+
rb_raise(ox_parse_error_class, "Wrong number of arguments to sax_parse.\n");
|
1057
1062
|
}
|
1058
1063
|
if (3 <= argc && rb_cHash == rb_obj_class(argv[2])) {
|
1059
|
-
|
1060
|
-
|
1061
|
-
|
1062
|
-
|
1063
|
-
|
1064
|
-
|
1065
|
-
|
1066
|
-
|
1067
|
-
|
1068
|
-
|
1069
|
-
|
1070
|
-
|
1071
|
-
|
1072
|
-
|
1073
|
-
|
1074
|
-
|
1075
|
-
|
1076
|
-
|
1077
|
-
|
1078
|
-
|
1079
|
-
|
1080
|
-
|
1081
|
-
|
1082
|
-
|
1083
|
-
|
1084
|
-
|
1085
|
-
|
1086
|
-
|
1087
|
-
|
1088
|
-
|
1089
|
-
|
1090
|
-
|
1091
|
-
|
1092
|
-
|
1093
|
-
|
1094
|
-
|
1095
|
-
|
1096
|
-
|
1097
|
-
|
1098
|
-
|
1099
|
-
|
1100
|
-
|
1064
|
+
VALUE h = argv[2];
|
1065
|
+
VALUE v;
|
1066
|
+
|
1067
|
+
if (Qnil != (v = rb_hash_lookup(h, convert_special_sym))) {
|
1068
|
+
options.convert_special = (Qtrue == v);
|
1069
|
+
}
|
1070
|
+
if (Qnil != (v = rb_hash_lookup(h, smart_sym))) {
|
1071
|
+
options.smart = (Qtrue == v);
|
1072
|
+
}
|
1073
|
+
if (Qnil != (v = rb_hash_lookup(h, symbolize_sym))) {
|
1074
|
+
options.symbolize = (Qtrue == v);
|
1075
|
+
}
|
1076
|
+
if (Qnil != (v = rb_hash_lookup(h, skip_sym))) {
|
1077
|
+
if (skip_return_sym == v) {
|
1078
|
+
options.skip = CrSkip;
|
1079
|
+
} else if (skip_white_sym == v) {
|
1080
|
+
options.skip = SpcSkip;
|
1081
|
+
} else if (skip_none_sym == v) {
|
1082
|
+
options.skip = NoSkip;
|
1083
|
+
} else if (skip_off_sym == v) {
|
1084
|
+
options.skip = OffSkip;
|
1085
|
+
}
|
1086
|
+
}
|
1087
|
+
if (Qnil != (v = rb_hash_lookup(h, strip_namespace_sym))) {
|
1088
|
+
if (Qfalse == v) {
|
1089
|
+
*options.strip_ns = '\0';
|
1090
|
+
} else if (Qtrue == v) {
|
1091
|
+
*options.strip_ns = '*';
|
1092
|
+
options.strip_ns[1] = '\0';
|
1093
|
+
} else {
|
1094
|
+
long slen;
|
1095
|
+
|
1096
|
+
Check_Type(v, T_STRING);
|
1097
|
+
slen = RSTRING_LEN(v);
|
1098
|
+
if (sizeof(options.strip_ns) - 1 < (size_t)slen) {
|
1099
|
+
rb_raise(ox_parse_error_class,
|
1100
|
+
":strip_namespace can be no longer than %d characters.",
|
1101
|
+
(int)sizeof(options.strip_ns) - 1);
|
1102
|
+
}
|
1103
|
+
strncpy(options.strip_ns, StringValuePtr(v), sizeof(options.strip_ns) - 1);
|
1104
|
+
options.strip_ns[sizeof(options.strip_ns) - 1] = '\0';
|
1105
|
+
}
|
1106
|
+
}
|
1101
1107
|
}
|
1102
1108
|
ox_sax_parse(argv[0], argv[1], &options);
|
1103
1109
|
|
@@ -1113,7 +1119,8 @@ sax_parse(int argc, VALUE *argv, VALUE self) {
|
|
1113
1119
|
* - +options+ [Hash] options parse options
|
1114
1120
|
* - *:convert_special* [true|false] flag indicating special characters like < are converted
|
1115
1121
|
* - *:symbolize* [true|false] flag indicating the parser symbolize element and attribute names
|
1116
|
-
* - *:skip* [:skip_none|:skip_return|:skip_white|:skip_off] flag indicating the parser skips \\r or collapse white
|
1122
|
+
* - *:skip* [:skip_none|:skip_return|:skip_white|:skip_off] flag indicating the parser skips \\r or collapse white
|
1123
|
+
* space into a single space. Default (skip space)
|
1117
1124
|
* - *:overlay* [Hash] a Hash of keys that match html element names and values that are one of
|
1118
1125
|
* - _:active_ - make the normal callback for the element
|
1119
1126
|
* - _:nest_ok_ - active but ignore nest check
|
@@ -1122,168 +1129,166 @@ sax_parse(int argc, VALUE *argv, VALUE self) {
|
|
1122
1129
|
* - _:off_ - block this element and it's children unless the child element is active
|
1123
1130
|
* - _:abort_ - abort the html processing and return
|
1124
1131
|
*/
|
1125
|
-
static VALUE
|
1126
|
-
|
1127
|
-
|
1128
|
-
bool free_hints = false;
|
1132
|
+
static VALUE sax_html(int argc, VALUE *argv, VALUE self) {
|
1133
|
+
struct _saxOptions options;
|
1134
|
+
bool free_hints = false;
|
1129
1135
|
|
1130
|
-
options.symbolize
|
1136
|
+
options.symbolize = (No != ox_default_options.sym_keys);
|
1131
1137
|
options.convert_special = ox_default_options.convert_special;
|
1132
|
-
options.smart
|
1133
|
-
options.skip
|
1134
|
-
options.hints
|
1138
|
+
options.smart = true;
|
1139
|
+
options.skip = ox_default_options.skip;
|
1140
|
+
options.hints = ox_default_options.html_hints;
|
1135
1141
|
if (NULL == options.hints) {
|
1136
|
-
|
1142
|
+
options.hints = ox_hints_html();
|
1137
1143
|
}
|
1138
1144
|
*options.strip_ns = '\0';
|
1139
1145
|
|
1140
1146
|
if (argc < 2) {
|
1141
|
-
|
1147
|
+
rb_raise(ox_parse_error_class, "Wrong number of arguments to sax_html.\n");
|
1142
1148
|
}
|
1143
1149
|
if (3 <= argc && rb_cHash == rb_obj_class(argv[2])) {
|
1144
|
-
|
1145
|
-
|
1146
|
-
|
1147
|
-
|
1148
|
-
|
1149
|
-
|
1150
|
-
|
1151
|
-
|
1152
|
-
|
1153
|
-
|
1154
|
-
|
1155
|
-
|
1156
|
-
|
1157
|
-
|
1158
|
-
|
1159
|
-
|
1160
|
-
|
1161
|
-
|
1162
|
-
|
1163
|
-
|
1164
|
-
|
1165
|
-
|
1166
|
-
|
1167
|
-
|
1168
|
-
|
1169
|
-
|
1170
|
-
|
1171
|
-
|
1172
|
-
|
1173
|
-
|
1174
|
-
|
1175
|
-
|
1176
|
-
|
1150
|
+
volatile VALUE h = argv[2];
|
1151
|
+
volatile VALUE v;
|
1152
|
+
|
1153
|
+
if (Qnil != (v = rb_hash_lookup(h, convert_special_sym))) {
|
1154
|
+
options.convert_special = (Qtrue == v);
|
1155
|
+
}
|
1156
|
+
if (Qnil != (v = rb_hash_lookup(h, symbolize_sym))) {
|
1157
|
+
options.symbolize = (Qtrue == v);
|
1158
|
+
}
|
1159
|
+
if (Qnil != (v = rb_hash_lookup(h, skip_sym))) {
|
1160
|
+
if (skip_return_sym == v) {
|
1161
|
+
options.skip = CrSkip;
|
1162
|
+
} else if (skip_white_sym == v) {
|
1163
|
+
options.skip = SpcSkip;
|
1164
|
+
} else if (skip_none_sym == v) {
|
1165
|
+
options.skip = NoSkip;
|
1166
|
+
} else if (skip_off_sym == v) {
|
1167
|
+
options.skip = OffSkip;
|
1168
|
+
}
|
1169
|
+
}
|
1170
|
+
if (Qnil != (v = rb_hash_lookup(h, overlay_sym))) {
|
1171
|
+
int cnt;
|
1172
|
+
|
1173
|
+
Check_Type(v, T_HASH);
|
1174
|
+
cnt = (int)RHASH_SIZE(v);
|
1175
|
+
if (0 == cnt) {
|
1176
|
+
options.hints = ox_hints_html();
|
1177
|
+
} else {
|
1178
|
+
options.hints = ox_hints_dup(options.hints);
|
1179
|
+
free_hints = true;
|
1180
|
+
rb_hash_foreach(v, set_overlay, (VALUE)options.hints);
|
1181
|
+
}
|
1182
|
+
}
|
1177
1183
|
}
|
1178
1184
|
ox_sax_parse(argv[0], argv[1], &options);
|
1179
1185
|
if (free_hints) {
|
1180
|
-
|
1186
|
+
ox_hints_destroy(options.hints);
|
1181
1187
|
}
|
1182
1188
|
return Qnil;
|
1183
1189
|
}
|
1184
1190
|
|
1185
|
-
static void
|
1186
|
-
|
1187
|
-
|
1188
|
-
|
1189
|
-
|
1190
|
-
|
1191
|
-
|
1192
|
-
|
1193
|
-
{ Qnil, 0 }
|
1194
|
-
};
|
1195
|
-
YesNoOpt o;
|
1191
|
+
static void parse_dump_options(VALUE ropts, Options copts) {
|
1192
|
+
struct _yesNoOpt ynos[] = {{with_xml_sym, &copts->with_xml},
|
1193
|
+
{with_dtd_sym, &copts->with_dtd},
|
1194
|
+
{with_instruct_sym, &copts->with_instruct},
|
1195
|
+
{xsd_date_sym, &copts->xsd_date},
|
1196
|
+
{circular_sym, &copts->circular},
|
1197
|
+
{Qnil, 0}};
|
1198
|
+
YesNoOpt o;
|
1196
1199
|
|
1197
1200
|
if (rb_cHash == rb_obj_class(ropts)) {
|
1198
|
-
|
1201
|
+
VALUE v;
|
1199
1202
|
|
1200
|
-
|
1203
|
+
if (Qnil != (v = rb_hash_lookup(ropts, ox_indent_sym))) {
|
1201
1204
|
#ifdef RUBY_INTEGER_UNIFICATION
|
1202
|
-
|
1205
|
+
if (rb_cInteger != rb_obj_class(v) && T_FIXNUM != rb_type(v)) {
|
1203
1206
|
#else
|
1204
|
-
|
1207
|
+
if (rb_cFixnum != rb_obj_class(v)) {
|
1205
1208
|
#endif
|
1206
|
-
|
1207
|
-
|
1208
|
-
|
1209
|
-
|
1210
|
-
|
1209
|
+
rb_raise(ox_parse_error_class, ":indent must be a Fixnum.\n");
|
1210
|
+
}
|
1211
|
+
copts->indent = NUM2INT(v);
|
1212
|
+
}
|
1213
|
+
if (Qnil != (v = rb_hash_lookup(ropts, trace_sym))) {
|
1211
1214
|
#ifdef RUBY_INTEGER_UNIFICATION
|
1212
|
-
|
1215
|
+
if (rb_cInteger != rb_obj_class(v) && T_FIXNUM != rb_type(v)) {
|
1213
1216
|
#else
|
1214
|
-
|
1217
|
+
if (rb_cFixnum != rb_obj_class(v)) {
|
1215
1218
|
#endif
|
1216
|
-
|
1217
|
-
|
1218
|
-
|
1219
|
-
|
1220
|
-
|
1221
|
-
|
1222
|
-
|
1223
|
-
|
1224
|
-
|
1225
|
-
|
1226
|
-
|
1227
|
-
|
1228
|
-
|
1229
|
-
|
1230
|
-
|
1231
|
-
|
1232
|
-
|
1233
|
-
|
1234
|
-
|
1235
|
-
|
1236
|
-
|
1237
|
-
|
1238
|
-
|
1239
|
-
|
1240
|
-
|
1241
|
-
|
1242
|
-
|
1243
|
-
|
1244
|
-
|
1245
|
-
|
1246
|
-
|
1247
|
-
|
1248
|
-
|
1249
|
-
|
1250
|
-
|
1251
|
-
|
1252
|
-
|
1253
|
-
|
1254
|
-
|
1255
|
-
|
1256
|
-
|
1257
|
-
|
1258
|
-
|
1259
|
-
|
1260
|
-
|
1261
|
-
|
1262
|
-
|
1263
|
-
|
1264
|
-
|
1265
|
-
|
1266
|
-
|
1267
|
-
|
1268
|
-
|
1269
|
-
|
1270
|
-
|
1271
|
-
|
1272
|
-
|
1273
|
-
|
1274
|
-
|
1275
|
-
|
1276
|
-
|
1277
|
-
|
1278
|
-
|
1279
|
-
|
1280
|
-
|
1281
|
-
|
1282
|
-
|
1283
|
-
|
1284
|
-
|
1285
|
-
|
1286
|
-
|
1219
|
+
rb_raise(ox_parse_error_class, ":trace must be a Fixnum.\n");
|
1220
|
+
}
|
1221
|
+
copts->trace = NUM2INT(v);
|
1222
|
+
}
|
1223
|
+
if (Qnil != (v = rb_hash_lookup(ropts, ox_encoding_sym))) {
|
1224
|
+
if (rb_cString != rb_obj_class(v)) {
|
1225
|
+
rb_raise(ox_parse_error_class, ":encoding must be a String.\n");
|
1226
|
+
}
|
1227
|
+
strncpy(copts->encoding, StringValuePtr(v), sizeof(copts->encoding) - 1);
|
1228
|
+
}
|
1229
|
+
if (Qnil != (v = rb_hash_lookup(ropts, no_empty_sym))) {
|
1230
|
+
copts->no_empty = (v == Qtrue);
|
1231
|
+
}
|
1232
|
+
if (Qnil != (v = rb_hash_lookup(ropts, effort_sym))) {
|
1233
|
+
if (auto_define_sym == v) {
|
1234
|
+
copts->effort = AutoEffort;
|
1235
|
+
} else if (tolerant_sym == v) {
|
1236
|
+
copts->effort = TolerantEffort;
|
1237
|
+
} else if (strict_sym == v) {
|
1238
|
+
copts->effort = StrictEffort;
|
1239
|
+
} else {
|
1240
|
+
rb_raise(ox_parse_error_class, ":effort must be :strict, :tolerant, or :auto_define.\n");
|
1241
|
+
}
|
1242
|
+
}
|
1243
|
+
v = rb_hash_lookup(ropts, invalid_replace_sym);
|
1244
|
+
if (Qnil == v) {
|
1245
|
+
if (Qtrue == rb_funcall(ropts, has_key_id, 1, invalid_replace_sym)) {
|
1246
|
+
copts->allow_invalid = Yes;
|
1247
|
+
}
|
1248
|
+
} else {
|
1249
|
+
long slen;
|
1250
|
+
|
1251
|
+
Check_Type(v, T_STRING);
|
1252
|
+
slen = RSTRING_LEN(v);
|
1253
|
+
if (sizeof(copts->inv_repl) - 2 < (size_t)slen) {
|
1254
|
+
rb_raise(ox_parse_error_class,
|
1255
|
+
":invalid_replace can be no longer than %d characters.",
|
1256
|
+
(int)sizeof(copts->inv_repl) - 2);
|
1257
|
+
}
|
1258
|
+
strncpy(copts->inv_repl + 1, StringValuePtr(v), sizeof(copts->inv_repl) - 1);
|
1259
|
+
copts->inv_repl[sizeof(copts->inv_repl) - 1] = '\0';
|
1260
|
+
*copts->inv_repl = (char)slen;
|
1261
|
+
copts->allow_invalid = No;
|
1262
|
+
}
|
1263
|
+
v = rb_hash_lookup(ropts, margin_sym);
|
1264
|
+
if (Qnil != v) {
|
1265
|
+
long slen;
|
1266
|
+
|
1267
|
+
Check_Type(v, T_STRING);
|
1268
|
+
slen = RSTRING_LEN(v);
|
1269
|
+
if (sizeof(copts->margin) - 2 < (size_t)slen) {
|
1270
|
+
rb_raise(ox_parse_error_class,
|
1271
|
+
":margin can be no longer than %d characters.",
|
1272
|
+
(int)sizeof(copts->margin) - 2);
|
1273
|
+
}
|
1274
|
+
strncpy(copts->margin, StringValuePtr(v), sizeof(copts->margin) - 1);
|
1275
|
+
copts->margin[sizeof(copts->margin) - 1] = '\0';
|
1276
|
+
copts->margin_len = (char)slen;
|
1277
|
+
}
|
1278
|
+
|
1279
|
+
for (o = ynos; 0 != o->attr; o++) {
|
1280
|
+
if (Qnil != (v = rb_hash_lookup(ropts, o->sym))) {
|
1281
|
+
VALUE c = rb_obj_class(v);
|
1282
|
+
|
1283
|
+
if (rb_cTrueClass == c) {
|
1284
|
+
*o->attr = Yes;
|
1285
|
+
} else if (rb_cFalseClass == c) {
|
1286
|
+
*o->attr = No;
|
1287
|
+
} else {
|
1288
|
+
rb_raise(ox_parse_error_class, "%s must be true or false.\n", rb_id2name(SYM2ID(o->sym)));
|
1289
|
+
}
|
1290
|
+
}
|
1291
|
+
}
|
1287
1292
|
}
|
1288
1293
|
}
|
1289
1294
|
|
@@ -1296,31 +1301,29 @@ parse_dump_options(VALUE ropts, Options copts) {
|
|
1296
1301
|
* - *:no_empty* [true|false] if true don't output empty elements
|
1297
1302
|
* - *:xsd_date* [true|false] use XSD date format if true, default: false
|
1298
1303
|
* - *:circular* [true|false] allow circular references, default: false
|
1299
|
-
* - *:strict|:tolerant]* [ :effort effort to use when an undumpable object (e.g., IO) is encountered, default:
|
1304
|
+
* - *:strict|:tolerant]* [ :effort effort to use when an undumpable object (e.g., IO) is encountered, default:
|
1305
|
+
* :strict
|
1300
1306
|
* - _:strict_ - raise an NotImplementedError if an undumpable object is encountered
|
1301
1307
|
* - _:tolerant_ - replaces undumplable objects with nil
|
1302
1308
|
*
|
1303
1309
|
* Note that an indent of less than zero will result in a tight one line output
|
1304
1310
|
* unless the text in the XML fields contain new line characters.
|
1305
1311
|
*/
|
1306
|
-
static VALUE
|
1307
|
-
|
1308
|
-
|
1309
|
-
|
1310
|
-
VALUE rstr;
|
1312
|
+
static VALUE dump(int argc, VALUE *argv, VALUE self) {
|
1313
|
+
char *xml;
|
1314
|
+
struct _options copts = ox_default_options;
|
1315
|
+
VALUE rstr;
|
1311
1316
|
|
1312
1317
|
if (2 == argc) {
|
1313
|
-
|
1318
|
+
parse_dump_options(argv[1], &copts);
|
1314
1319
|
}
|
1315
1320
|
if (0 == (xml = ox_write_obj_to_str(*argv, &copts))) {
|
1316
|
-
|
1321
|
+
rb_raise(rb_eNoMemError, "Not enough memory.\n");
|
1317
1322
|
}
|
1318
1323
|
rstr = rb_str_new2(xml);
|
1319
|
-
#if HAVE_RB_ENC_ASSOCIATE
|
1320
1324
|
if ('\0' != *copts.encoding) {
|
1321
|
-
|
1325
|
+
rb_enc_associate(rstr, rb_enc_find(copts.encoding));
|
1322
1326
|
}
|
1323
|
-
#endif
|
1324
1327
|
xfree(xml);
|
1325
1328
|
|
1326
1329
|
return rstr;
|
@@ -1335,15 +1338,15 @@ dump(int argc, VALUE *argv, VALUE self) {
|
|
1335
1338
|
* - *:no_empty* [true|false] if true don't output empty elements
|
1336
1339
|
* - *:xsd_date* [true|false] use XSD date format if true, default: false
|
1337
1340
|
* - *:circular* [true|false] allow circular references, default: false
|
1338
|
-
* - *:strict|:tolerant]* [ :effort effort to use when an undumpable object (e.g., IO) is encountered, default:
|
1341
|
+
* - *:strict|:tolerant]* [ :effort effort to use when an undumpable object (e.g., IO) is encountered, default:
|
1342
|
+
* :strict
|
1339
1343
|
* - _:strict_ - raise an NotImplementedError if an undumpable object is encountered
|
1340
1344
|
* - _:tolerant_ - replaces undumplable objects with nil
|
1341
1345
|
*
|
1342
1346
|
* Note that an indent of less than zero will result in a tight one line output
|
1343
1347
|
* unless the text in the XML fields contain new line characters.
|
1344
1348
|
*/
|
1345
|
-
static VALUE
|
1346
|
-
to_xml(int argc, VALUE *argv, VALUE self) {
|
1349
|
+
static VALUE to_xml(int argc, VALUE *argv, VALUE self) {
|
1347
1350
|
return dump(argc, argv, self);
|
1348
1351
|
}
|
1349
1352
|
|
@@ -1356,19 +1359,19 @@ to_xml(int argc, VALUE *argv, VALUE self) {
|
|
1356
1359
|
* - *:indent* [Fixnum] format expected
|
1357
1360
|
* - *:xsd_date* [true|false] use XSD date format if true, default: false
|
1358
1361
|
* - *:circular* [true|false] allow circular references, default: false
|
1359
|
-
* - *:strict|:tolerant]* [ :effort effort to use when an undumpable object (e.g., IO) is encountered, default:
|
1362
|
+
* - *:strict|:tolerant]* [ :effort effort to use when an undumpable object (e.g., IO) is encountered, default:
|
1363
|
+
* :strict
|
1360
1364
|
* - _:strict_ - raise an NotImplementedError if an undumpable object is encountered
|
1361
1365
|
* - _:tolerant_ - replaces undumplable objects with nil
|
1362
1366
|
*
|
1363
1367
|
* Note that an indent of less than zero will result in a tight one line output
|
1364
1368
|
* unless the text in the XML fields contain new line characters.
|
1365
1369
|
*/
|
1366
|
-
static VALUE
|
1367
|
-
|
1368
|
-
struct _options copts = ox_default_options;
|
1370
|
+
static VALUE to_file(int argc, VALUE *argv, VALUE self) {
|
1371
|
+
struct _options copts = ox_default_options;
|
1369
1372
|
|
1370
1373
|
if (3 == argc) {
|
1371
|
-
|
1374
|
+
parse_dump_options(argv[2], &copts);
|
1372
1375
|
}
|
1373
1376
|
Check_Type(*argv, T_STRING);
|
1374
1377
|
ox_write_obj_to_file(argv[1], StringValuePtr(*argv), &copts);
|
@@ -1377,18 +1380,16 @@ to_file(int argc, VALUE *argv, VALUE self) {
|
|
1377
1380
|
}
|
1378
1381
|
|
1379
1382
|
#if WITH_CACHE_TESTS
|
1380
|
-
extern void
|
1383
|
+
extern void ox_cache_test(void);
|
1381
1384
|
|
1382
|
-
static VALUE
|
1383
|
-
cache_test(VALUE self) {
|
1385
|
+
static VALUE cache_test(VALUE self) {
|
1384
1386
|
ox_cache_test();
|
1385
1387
|
return Qnil;
|
1386
1388
|
}
|
1387
1389
|
|
1388
|
-
extern void
|
1390
|
+
extern void ox_cache8_test(void);
|
1389
1391
|
|
1390
|
-
static VALUE
|
1391
|
-
cache8_test(VALUE self) {
|
1392
|
+
static VALUE cache8_test(VALUE self) {
|
1392
1393
|
ox_cache8_test();
|
1393
1394
|
return Qnil;
|
1394
1395
|
}
|
@@ -1424,59 +1425,59 @@ void Init_ox() {
|
|
1424
1425
|
rb_require("bigdecimal");
|
1425
1426
|
rb_require("stringio");
|
1426
1427
|
|
1427
|
-
ox_abort_id
|
1428
|
-
ox_at_column_id
|
1429
|
-
ox_at_content_id
|
1430
|
-
ox_at_id
|
1431
|
-
ox_at_line_id
|
1432
|
-
ox_at_pos_id
|
1433
|
-
ox_at_value_id
|
1434
|
-
ox_attr_id
|
1435
|
-
ox_attr_value_id
|
1436
|
-
ox_attributes_id
|
1437
|
-
ox_attrs_done_id
|
1438
|
-
ox_beg_id
|
1439
|
-
ox_bigdecimal_id
|
1440
|
-
ox_call_id
|
1441
|
-
ox_cdata_id
|
1442
|
-
ox_comment_id
|
1443
|
-
ox_den_id
|
1444
|
-
ox_doctype_id
|
1445
|
-
ox_end_element_id
|
1446
|
-
ox_end_id
|
1447
|
-
ox_end_instruct_id
|
1448
|
-
ox_error_id
|
1449
|
-
ox_excl_id
|
1428
|
+
ox_abort_id = rb_intern("abort");
|
1429
|
+
ox_at_column_id = rb_intern("@column");
|
1430
|
+
ox_at_content_id = rb_intern("@content");
|
1431
|
+
ox_at_id = rb_intern("at");
|
1432
|
+
ox_at_line_id = rb_intern("@line");
|
1433
|
+
ox_at_pos_id = rb_intern("@pos");
|
1434
|
+
ox_at_value_id = rb_intern("@value");
|
1435
|
+
ox_attr_id = rb_intern("attr");
|
1436
|
+
ox_attr_value_id = rb_intern("attr_value");
|
1437
|
+
ox_attributes_id = rb_intern("@attributes");
|
1438
|
+
ox_attrs_done_id = rb_intern("attrs_done");
|
1439
|
+
ox_beg_id = rb_intern("@beg");
|
1440
|
+
ox_bigdecimal_id = rb_intern("BigDecimal");
|
1441
|
+
ox_call_id = rb_intern("call");
|
1442
|
+
ox_cdata_id = rb_intern("cdata");
|
1443
|
+
ox_comment_id = rb_intern("comment");
|
1444
|
+
ox_den_id = rb_intern("@den");
|
1445
|
+
ox_doctype_id = rb_intern("doctype");
|
1446
|
+
ox_end_element_id = rb_intern("end_element");
|
1447
|
+
ox_end_id = rb_intern("@end");
|
1448
|
+
ox_end_instruct_id = rb_intern("end_instruct");
|
1449
|
+
ox_error_id = rb_intern("error");
|
1450
|
+
ox_excl_id = rb_intern("@excl");
|
1450
1451
|
ox_external_encoding_id = rb_intern("external_encoding");
|
1451
|
-
ox_fileno_id
|
1452
|
-
ox_force_encoding_id
|
1453
|
-
ox_inspect_id
|
1454
|
-
ox_instruct_id
|
1455
|
-
ox_jd_id
|
1456
|
-
ox_keys_id
|
1457
|
-
ox_local_id
|
1458
|
-
ox_mesg_id
|
1459
|
-
ox_message_id
|
1460
|
-
ox_nodes_id
|
1461
|
-
ox_new_id
|
1462
|
-
ox_num_id
|
1463
|
-
ox_parse_id
|
1464
|
-
ox_pos_id
|
1465
|
-
ox_read_id
|
1466
|
-
ox_readpartial_id
|
1467
|
-
ox_start_element_id
|
1468
|
-
ox_string_id
|
1469
|
-
ox_text_id
|
1470
|
-
ox_to_c_id
|
1471
|
-
ox_to_s_id
|
1472
|
-
ox_to_sym_id
|
1473
|
-
ox_tv_nsec_id
|
1474
|
-
ox_tv_sec_id
|
1475
|
-
ox_tv_usec_id
|
1476
|
-
ox_value_id
|
1452
|
+
ox_fileno_id = rb_intern("fileno");
|
1453
|
+
ox_force_encoding_id = rb_intern("force_encoding");
|
1454
|
+
ox_inspect_id = rb_intern("inspect");
|
1455
|
+
ox_instruct_id = rb_intern("instruct");
|
1456
|
+
ox_jd_id = rb_intern("jd");
|
1457
|
+
ox_keys_id = rb_intern("keys");
|
1458
|
+
ox_local_id = rb_intern("local");
|
1459
|
+
ox_mesg_id = rb_intern("mesg");
|
1460
|
+
ox_message_id = rb_intern("message");
|
1461
|
+
ox_nodes_id = rb_intern("@nodes");
|
1462
|
+
ox_new_id = rb_intern("new");
|
1463
|
+
ox_num_id = rb_intern("@num");
|
1464
|
+
ox_parse_id = rb_intern("parse");
|
1465
|
+
ox_pos_id = rb_intern("pos");
|
1466
|
+
ox_read_id = rb_intern("read");
|
1467
|
+
ox_readpartial_id = rb_intern("readpartial");
|
1468
|
+
ox_start_element_id = rb_intern("start_element");
|
1469
|
+
ox_string_id = rb_intern("string");
|
1470
|
+
ox_text_id = rb_intern("text");
|
1471
|
+
ox_to_c_id = rb_intern("to_c");
|
1472
|
+
ox_to_s_id = rb_intern("to_s");
|
1473
|
+
ox_to_sym_id = rb_intern("to_sym");
|
1474
|
+
ox_tv_nsec_id = rb_intern("tv_nsec");
|
1475
|
+
ox_tv_sec_id = rb_intern("tv_sec");
|
1476
|
+
ox_tv_usec_id = rb_intern("tv_usec");
|
1477
|
+
ox_value_id = rb_intern("value");
|
1477
1478
|
|
1478
1479
|
encoding_id = rb_intern("encoding");
|
1479
|
-
has_key_id
|
1480
|
+
has_key_id = rb_intern("has_key?");
|
1480
1481
|
|
1481
1482
|
rb_require("ox/version");
|
1482
1483
|
rb_require("ox/error");
|
@@ -1491,81 +1492,162 @@ void Init_ox() {
|
|
1491
1492
|
rb_require("ox/bag");
|
1492
1493
|
rb_require("ox/sax");
|
1493
1494
|
|
1494
|
-
ox_time_class
|
1495
|
-
ox_date_class
|
1496
|
-
ox_parse_error_class
|
1495
|
+
ox_time_class = rb_const_get(rb_cObject, rb_intern("Time"));
|
1496
|
+
ox_date_class = rb_const_get(rb_cObject, rb_intern("Date"));
|
1497
|
+
ox_parse_error_class = rb_const_get_at(Ox, rb_intern("ParseError"));
|
1497
1498
|
ox_syntax_error_class = rb_const_get_at(Ox, rb_intern("SyntaxError"));
|
1498
|
-
ox_arg_error_class
|
1499
|
-
ox_struct_class
|
1500
|
-
ox_stringio_class
|
1501
|
-
ox_bigdecimal_class
|
1502
|
-
|
1503
|
-
abort_sym = ID2SYM(rb_intern("abort"));
|
1504
|
-
|
1505
|
-
|
1506
|
-
|
1507
|
-
|
1508
|
-
|
1509
|
-
|
1510
|
-
|
1511
|
-
|
1512
|
-
|
1513
|
-
|
1514
|
-
|
1515
|
-
|
1516
|
-
|
1517
|
-
|
1518
|
-
|
1519
|
-
|
1520
|
-
|
1521
|
-
|
1522
|
-
|
1523
|
-
|
1524
|
-
|
1525
|
-
|
1526
|
-
|
1527
|
-
|
1528
|
-
|
1529
|
-
|
1530
|
-
|
1531
|
-
|
1532
|
-
|
1533
|
-
|
1534
|
-
|
1535
|
-
|
1536
|
-
|
1537
|
-
|
1538
|
-
|
1539
|
-
|
1540
|
-
|
1541
|
-
|
1542
|
-
|
1543
|
-
|
1544
|
-
|
1545
|
-
|
1546
|
-
|
1547
|
-
|
1548
|
-
|
1549
|
-
|
1550
|
-
|
1551
|
-
|
1552
|
-
|
1553
|
-
|
1499
|
+
ox_arg_error_class = rb_const_get_at(Ox, rb_intern("ArgError"));
|
1500
|
+
ox_struct_class = rb_const_get(rb_cObject, rb_intern("Struct"));
|
1501
|
+
ox_stringio_class = rb_const_get(rb_cObject, rb_intern("StringIO"));
|
1502
|
+
ox_bigdecimal_class = rb_const_get(rb_cObject, rb_intern("BigDecimal"));
|
1503
|
+
|
1504
|
+
abort_sym = ID2SYM(rb_intern("abort"));
|
1505
|
+
rb_gc_register_address(&abort_sym);
|
1506
|
+
active_sym = ID2SYM(rb_intern("active"));
|
1507
|
+
rb_gc_register_address(&active_sym);
|
1508
|
+
attr_key_mod_sym = ID2SYM(rb_intern("attr_key_mod"));
|
1509
|
+
rb_gc_register_address(&attr_key_mod_sym);
|
1510
|
+
auto_define_sym = ID2SYM(rb_intern("auto_define"));
|
1511
|
+
rb_gc_register_address(&auto_define_sym);
|
1512
|
+
auto_sym = ID2SYM(rb_intern("auto"));
|
1513
|
+
rb_gc_register_address(&auto_sym);
|
1514
|
+
block_sym = ID2SYM(rb_intern("block"));
|
1515
|
+
rb_gc_register_address(&block_sym);
|
1516
|
+
circular_sym = ID2SYM(rb_intern("circular"));
|
1517
|
+
rb_gc_register_address(&circular_sym);
|
1518
|
+
convert_special_sym = ID2SYM(rb_intern("convert_special"));
|
1519
|
+
rb_gc_register_address(&convert_special_sym);
|
1520
|
+
effort_sym = ID2SYM(rb_intern("effort"));
|
1521
|
+
rb_gc_register_address(&effort_sym);
|
1522
|
+
element_key_mod_sym = ID2SYM(rb_intern("element_key_mod"));
|
1523
|
+
rb_gc_register_address(&element_key_mod_sym);
|
1524
|
+
generic_sym = ID2SYM(rb_intern("generic"));
|
1525
|
+
rb_gc_register_address(&generic_sym);
|
1526
|
+
hash_no_attrs_sym = ID2SYM(rb_intern("hash_no_attrs"));
|
1527
|
+
rb_gc_register_address(&hash_no_attrs_sym);
|
1528
|
+
hash_sym = ID2SYM(rb_intern("hash"));
|
1529
|
+
rb_gc_register_address(&hash_sym);
|
1530
|
+
inactive_sym = ID2SYM(rb_intern("inactive"));
|
1531
|
+
rb_gc_register_address(&inactive_sym);
|
1532
|
+
invalid_replace_sym = ID2SYM(rb_intern("invalid_replace"));
|
1533
|
+
rb_gc_register_address(&invalid_replace_sym);
|
1534
|
+
limited_sym = ID2SYM(rb_intern("limited"));
|
1535
|
+
rb_gc_register_address(&limited_sym);
|
1536
|
+
margin_sym = ID2SYM(rb_intern("margin"));
|
1537
|
+
rb_gc_register_address(&margin_sym);
|
1538
|
+
mode_sym = ID2SYM(rb_intern("mode"));
|
1539
|
+
rb_gc_register_address(&mode_sym);
|
1540
|
+
nest_ok_sym = ID2SYM(rb_intern("nest_ok"));
|
1541
|
+
rb_gc_register_address(&nest_ok_sym);
|
1542
|
+
no_empty_sym = ID2SYM(rb_intern("no_empty"));
|
1543
|
+
rb_gc_register_address(&no_empty_sym);
|
1544
|
+
object_sym = ID2SYM(rb_intern("object"));
|
1545
|
+
rb_gc_register_address(&object_sym);
|
1546
|
+
off_sym = ID2SYM(rb_intern("off"));
|
1547
|
+
rb_gc_register_address(&off_sym);
|
1548
|
+
opt_format_sym = ID2SYM(rb_intern("opt_format"));
|
1549
|
+
rb_gc_register_address(&opt_format_sym);
|
1550
|
+
optimized_sym = ID2SYM(rb_intern("optimized"));
|
1551
|
+
rb_gc_register_address(&optimized_sym);
|
1552
|
+
overlay_sym = ID2SYM(rb_intern("overlay"));
|
1553
|
+
rb_gc_register_address(&overlay_sym);
|
1554
|
+
ox_encoding_sym = ID2SYM(rb_intern("encoding"));
|
1555
|
+
rb_gc_register_address(&ox_encoding_sym);
|
1556
|
+
ox_indent_sym = ID2SYM(rb_intern("indent"));
|
1557
|
+
rb_gc_register_address(&ox_indent_sym);
|
1558
|
+
ox_size_sym = ID2SYM(rb_intern("size"));
|
1559
|
+
rb_gc_register_address(&ox_size_sym);
|
1560
|
+
ox_standalone_sym = ID2SYM(rb_intern("standalone"));
|
1561
|
+
rb_gc_register_address(&ox_standalone_sym);
|
1562
|
+
ox_version_sym = ID2SYM(rb_intern("version"));
|
1563
|
+
rb_gc_register_address(&ox_version_sym);
|
1564
|
+
skip_none_sym = ID2SYM(rb_intern("skip_none"));
|
1565
|
+
rb_gc_register_address(&skip_none_sym);
|
1566
|
+
skip_off_sym = ID2SYM(rb_intern("skip_off"));
|
1567
|
+
rb_gc_register_address(&skip_off_sym);
|
1568
|
+
skip_return_sym = ID2SYM(rb_intern("skip_return"));
|
1569
|
+
rb_gc_register_address(&skip_return_sym);
|
1570
|
+
skip_sym = ID2SYM(rb_intern("skip"));
|
1571
|
+
rb_gc_register_address(&skip_sym);
|
1572
|
+
skip_white_sym = ID2SYM(rb_intern("skip_white"));
|
1573
|
+
rb_gc_register_address(&skip_white_sym);
|
1574
|
+
smart_sym = ID2SYM(rb_intern("smart"));
|
1575
|
+
rb_gc_register_address(&smart_sym);
|
1576
|
+
strict_sym = ID2SYM(rb_intern("strict"));
|
1577
|
+
rb_gc_register_address(&strict_sym);
|
1578
|
+
strip_namespace_sym = ID2SYM(rb_intern("strip_namespace"));
|
1579
|
+
rb_gc_register_address(&strip_namespace_sym);
|
1580
|
+
symbolize_keys_sym = ID2SYM(rb_intern("symbolize_keys"));
|
1581
|
+
rb_gc_register_address(&symbolize_keys_sym);
|
1582
|
+
symbolize_sym = ID2SYM(rb_intern("symbolize"));
|
1583
|
+
rb_gc_register_address(&symbolize_sym);
|
1584
|
+
tolerant_sym = ID2SYM(rb_intern("tolerant"));
|
1585
|
+
rb_gc_register_address(&tolerant_sym);
|
1586
|
+
trace_sym = ID2SYM(rb_intern("trace"));
|
1587
|
+
rb_gc_register_address(&trace_sym);
|
1588
|
+
with_cdata_sym = ID2SYM(rb_intern("with_cdata"));
|
1589
|
+
rb_gc_register_address(&with_cdata_sym);
|
1590
|
+
with_dtd_sym = ID2SYM(rb_intern("with_dtd"));
|
1591
|
+
rb_gc_register_address(&with_dtd_sym);
|
1592
|
+
with_instruct_sym = ID2SYM(rb_intern("with_instructions"));
|
1593
|
+
rb_gc_register_address(&with_instruct_sym);
|
1594
|
+
with_xml_sym = ID2SYM(rb_intern("with_xml"));
|
1595
|
+
rb_gc_register_address(&with_xml_sym);
|
1596
|
+
xsd_date_sym = ID2SYM(rb_intern("xsd_date"));
|
1597
|
+
rb_gc_register_address(&xsd_date_sym);
|
1598
|
+
|
1599
|
+
ox_empty_string = rb_str_new2("");
|
1600
|
+
rb_gc_register_address(&ox_empty_string);
|
1601
|
+
ox_zero_fixnum = INT2NUM(0);
|
1602
|
+
rb_gc_register_address(&ox_zero_fixnum);
|
1603
|
+
ox_sym_bank = rb_ary_new();
|
1604
|
+
rb_gc_register_address(&ox_sym_bank);
|
1554
1605
|
|
1555
1606
|
ox_document_clas = rb_const_get_at(Ox, rb_intern("Document"));
|
1556
|
-
ox_element_clas
|
1607
|
+
ox_element_clas = rb_const_get_at(Ox, rb_intern("Element"));
|
1557
1608
|
ox_instruct_clas = rb_const_get_at(Ox, rb_intern("Instruct"));
|
1558
|
-
ox_comment_clas
|
1559
|
-
ox_raw_clas
|
1560
|
-
ox_doctype_clas
|
1561
|
-
ox_cdata_clas
|
1562
|
-
ox_bag_clas
|
1563
|
-
|
1564
|
-
|
1565
|
-
|
1566
|
-
|
1609
|
+
ox_comment_clas = rb_const_get_at(Ox, rb_intern("Comment"));
|
1610
|
+
ox_raw_clas = rb_const_get_at(Ox, rb_intern("Raw"));
|
1611
|
+
ox_doctype_clas = rb_const_get_at(Ox, rb_intern("DocType"));
|
1612
|
+
ox_cdata_clas = rb_const_get_at(Ox, rb_intern("CData"));
|
1613
|
+
ox_bag_clas = rb_const_get_at(Ox, rb_intern("Bag"));
|
1614
|
+
|
1615
|
+
// Classes can move in more recent versions so register them all.
|
1616
|
+
rb_gc_register_address(&Ox);
|
1617
|
+
rb_gc_register_address(&ox_arg_error_class);
|
1618
|
+
rb_gc_register_address(&ox_bag_clas);
|
1619
|
+
rb_gc_register_address(&ox_bag_clas);
|
1620
|
+
rb_gc_register_address(&ox_bigdecimal_class);
|
1621
|
+
rb_gc_register_address(&ox_cdata_clas);
|
1622
|
+
rb_gc_register_address(&ox_cdata_clas);
|
1623
|
+
rb_gc_register_address(&ox_comment_clas);
|
1624
|
+
rb_gc_register_address(&ox_comment_clas);
|
1625
|
+
rb_gc_register_address(&ox_date_class);
|
1626
|
+
rb_gc_register_address(&ox_doctype_clas);
|
1627
|
+
rb_gc_register_address(&ox_doctype_clas);
|
1628
|
+
rb_gc_register_address(&ox_document_clas);
|
1629
|
+
rb_gc_register_address(&ox_document_clas);
|
1630
|
+
rb_gc_register_address(&ox_element_clas);
|
1631
|
+
rb_gc_register_address(&ox_element_clas);
|
1632
|
+
rb_gc_register_address(&ox_encoding_sym);
|
1633
|
+
rb_gc_register_address(&ox_indent_sym);
|
1634
|
+
rb_gc_register_address(&ox_instruct_clas);
|
1635
|
+
rb_gc_register_address(&ox_instruct_clas);
|
1636
|
+
rb_gc_register_address(&ox_parse_error_class);
|
1637
|
+
rb_gc_register_address(&ox_raw_clas);
|
1638
|
+
rb_gc_register_address(&ox_raw_clas);
|
1639
|
+
rb_gc_register_address(&ox_size_sym);
|
1640
|
+
rb_gc_register_address(&ox_standalone_sym);
|
1641
|
+
rb_gc_register_address(&ox_stringio_class);
|
1642
|
+
rb_gc_register_address(&ox_struct_class);
|
1643
|
+
rb_gc_register_address(&ox_syntax_error_class);
|
1644
|
+
rb_gc_register_address(&ox_time_class);
|
1645
|
+
rb_gc_register_address(&ox_version_sym);
|
1646
|
+
|
1647
|
+
slot_cache_new(&ox_class_cache);
|
1567
1648
|
|
1568
1649
|
ox_sax_define();
|
1650
|
+
ox_hash_init();
|
1569
1651
|
|
1570
1652
|
#if WITH_CACHE_TESTS
|
1571
1653
|
// space added to stop yardoc from trying to document
|
@@ -1573,9 +1655,7 @@ void Init_ox() {
|
|
1573
1655
|
rb_define _module_function(Ox, "cache8_test", cache8_test, 0);
|
1574
1656
|
#endif
|
1575
1657
|
|
1576
|
-
#if HAVE_RB_ENC_FIND
|
1577
1658
|
ox_utf8_encoding = rb_enc_find("UTF-8");
|
1578
|
-
#endif
|
1579
1659
|
}
|
1580
1660
|
|
1581
1661
|
#if __GNUC__ > 4
|
@@ -1583,17 +1663,17 @@ _Noreturn void
|
|
1583
1663
|
#else
|
1584
1664
|
void
|
1585
1665
|
#endif
|
1586
|
-
_ox_raise_error(const char *msg, const char *xml, const char *current, const char*
|
1587
|
-
int
|
1588
|
-
int
|
1666
|
+
_ox_raise_error(const char *msg, const char *xml, const char *current, const char *file, int line) {
|
1667
|
+
int xline = 1;
|
1668
|
+
int col = 1;
|
1589
1669
|
|
1590
1670
|
for (; xml < current && '\n' != *current; current--) {
|
1591
|
-
|
1671
|
+
col++;
|
1592
1672
|
}
|
1593
1673
|
for (; xml < current; current--) {
|
1594
|
-
|
1595
|
-
|
1596
|
-
|
1674
|
+
if ('\n' == *current) {
|
1675
|
+
xline++;
|
1676
|
+
}
|
1597
1677
|
}
|
1598
1678
|
#ifdef RB_GC_GUARD
|
1599
1679
|
rb_gc_enable();
|