ox 2.14.6 → 2.14.7

Sign up to get free protection for your applications and to get access to all the features.
data/ext/ox/sax.c CHANGED
@@ -4,66 +4,63 @@
4
4
  */
5
5
 
6
6
  #include <ctype.h>
7
- #include <stdlib.h>
8
7
  #include <errno.h>
9
8
  #include <stdio.h>
9
+ #include <stdlib.h>
10
10
  #include <strings.h>
11
11
  #include <sys/types.h>
12
12
  #if HAVE_SYS_UIO_H
13
13
  #include <sys/uio.h>
14
14
  #endif
15
- #include <unistd.h>
16
15
  #include <time.h>
16
+ #include <unistd.h>
17
17
 
18
+ #include "intern.h"
19
+ #include "ox.h"
18
20
  #include "ruby.h"
19
- #if HAVE_RB_ENC_ASSOCIATE
20
21
  #include "ruby/encoding.h"
21
- #endif
22
- #include "ox.h"
23
22
  #include "sax.h"
24
- #include "sax_stack.h"
25
23
  #include "sax_buf.h"
24
+ #include "sax_stack.h"
26
25
  #include "special.h"
27
26
 
28
- #define NAME_MISMATCH 1
27
+ #define NAME_MISMATCH 1
29
28
 
30
- #define START_STATE 1
31
- #define BODY_STATE 2
32
- #define AFTER_STATE 3
29
+ #define START_STATE 1
30
+ #define BODY_STATE 2
31
+ #define AFTER_STATE 3
33
32
 
34
33
  // error prefixes
35
- #define BAD_BOM "Bad BOM: "
36
- #define NO_TERM "Not Terminated: "
37
- #define INVALID_FORMAT "Invalid Format: "
38
- #define CASE_ERROR "Case Error: "
39
- #define OUT_OF_ORDER "Out of Order: "
40
- #define WRONG_CHAR "Unexpected Character: "
41
- #define EL_MISMATCH "Start End Mismatch: "
42
- #define INV_ELEMENT "Invalid Element: "
43
-
44
- #define UTF8_STR "UTF-8"
45
-
46
- static void sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, SaxOptions options);
47
- static void parse(SaxDrive dr);
34
+ #define BAD_BOM "Bad BOM: "
35
+ #define NO_TERM "Not Terminated: "
36
+ #define INVALID_FORMAT "Invalid Format: "
37
+ #define CASE_ERROR "Case Error: "
38
+ #define OUT_OF_ORDER "Out of Order: "
39
+ #define WRONG_CHAR "Unexpected Character: "
40
+ #define EL_MISMATCH "Start End Mismatch: "
41
+ #define INV_ELEMENT "Invalid Element: "
42
+
43
+ #define UTF8_STR "UTF-8"
44
+
45
+ static void sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, SaxOptions options);
46
+ static void parse(SaxDrive dr);
48
47
  // All read functions should return the next character after the 'thing' that was read and leave dr->cur one after that.
49
- static char read_instruction(SaxDrive dr);
50
- static char read_doctype(SaxDrive dr);
51
- static char read_cdata(SaxDrive dr);
52
- static char read_comment(SaxDrive dr);
53
- static char read_element_start(SaxDrive dr);
54
- static char read_element_end(SaxDrive dr);
55
- static char read_text(SaxDrive dr);
56
- static char read_jump(SaxDrive dr, const char *pat);
57
- static char read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req, Hint h);
58
- static char read_name_token(SaxDrive dr);
59
- static char read_quoted_value(SaxDrive dr);
60
-
61
- static void end_element_cb(SaxDrive dr, VALUE name, long pos, long line, long col, Hint h);
62
-
63
- static void hint_clear_empty(SaxDrive dr);
64
- static Nv hint_try_close(SaxDrive dr, const char *name);
65
-
66
- VALUE ox_sax_value_class = Qnil;
48
+ static char read_instruction(SaxDrive dr);
49
+ static char read_doctype(SaxDrive dr);
50
+ static char read_cdata(SaxDrive dr);
51
+ static char read_comment(SaxDrive dr);
52
+ static char read_element_start(SaxDrive dr);
53
+ static char read_element_end(SaxDrive dr);
54
+ static char read_text(SaxDrive dr);
55
+ static char read_jump(SaxDrive dr, const char *pat);
56
+ static char read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req, Hint h);
57
+ static char read_name_token(SaxDrive dr);
58
+ static char read_quoted_value(SaxDrive dr);
59
+
60
+ static void hint_clear_empty(SaxDrive dr);
61
+ static Nv hint_try_close(SaxDrive dr, const char *name);
62
+
63
+ VALUE ox_sax_value_class = Qnil;
67
64
 
68
65
  static VALUE protect_parse(VALUE drp) {
69
66
  parse((SaxDrive)drp);
@@ -71,562 +68,561 @@ static VALUE protect_parse(VALUE drp) {
71
68
  return Qnil;
72
69
  }
73
70
 
74
- #if HAVE_RB_ENC_ASSOCIATE
75
- static int
76
- str_is_ascii(const char *s) {
77
- for (; '\0' != *s; s++) {
78
- if (*s < ' ' || '~' < *s) {
79
- return 0;
80
- }
81
- }
82
- return 1;
83
- }
84
- #endif
85
-
86
71
  VALUE
87
- str2sym(SaxDrive dr, const char *str, const char **strp) {
88
- VALUE *slot;
89
- VALUE sym;
72
+ str2sym(SaxDrive dr, const char *str, size_t len, const char **strp) {
73
+ VALUE sym;
90
74
 
91
75
  if (dr->options.symbolize) {
92
- if (Qundef == (sym = ox_cache_get(ox_symbol_cache, str, &slot, strp))) {
93
- #if HAVE_RB_ENC_ASSOCIATE
94
- if (0 != dr->encoding && !str_is_ascii(str)) {
95
- VALUE rstr = rb_str_new2(str);
96
-
97
- // TBD if sym can be pinned down then use this all the time
98
- rb_enc_associate(rstr, dr->encoding);
99
- sym = rb_funcall(rstr, ox_to_sym_id, 0);
100
- *slot = Qundef;
101
- } else {
102
- sym = ID2SYM(rb_intern(str));
103
- *slot = sym;
104
- }
105
- #else
106
- sym = ID2SYM(rb_intern(str));
107
- *slot = sym;
108
- #endif
109
- }
76
+ sym = ox_sym_intern(str, len, strp);
110
77
  } else {
111
- sym = rb_str_new2(str);
112
- #if HAVE_RB_ENC_ASSOCIATE
113
- if (0 != dr->encoding) {
114
- rb_enc_associate(sym, dr->encoding);
115
- }
116
- #endif
117
- if (0 != strp) {
118
- *strp = StringValuePtr(sym);
119
- }
78
+ sym = dr->get_name(str, len, dr->encoding, strp);
120
79
  }
121
80
  return sym;
122
81
  }
123
82
 
124
- void
125
- ox_sax_parse(VALUE handler, VALUE io, SaxOptions options) {
83
+ void ox_sax_parse(VALUE handler, VALUE io, SaxOptions options) {
126
84
  #if HAVE_RB_EXT_RACTOR_SAFE
127
85
  rb_ext_ractor_safe(true);
128
86
  #endif
129
- struct _saxDrive dr;
130
- int line = 0;
87
+ struct _saxDrive dr;
88
+ int line = 0;
131
89
 
132
90
  sax_drive_init(&dr, handler, io, options);
133
- #if 0
134
- printf("*** sax_parse with these flags\n");
135
- printf(" has_instruct = %s\n", dr.has.instruct ? "true" : "false");
136
- printf(" has_end_instruct = %s\n", dr.has.end_instruct ? "true" : "false");
137
- printf(" has_attr = %s\n", dr.has.attr ? "true" : "false");
138
- printf(" has_attr_value = %s\n", dr.has.attr_value ? "true" : "false");
139
- printf(" has_attrs_done = %s\n", dr.has.attrs_done ? "true" : "false");
140
- printf(" has_doctype = %s\n", dr.has.doctype ? "true" : "false");
141
- printf(" has_comment = %s\n", dr.has.comment ? "true" : "false");
142
- printf(" has_cdata = %s\n", dr.has.cdata ? "true" : "false");
143
- printf(" has_text = %s\n", dr.has.text ? "true" : "false");
144
- printf(" has_value = %s\n", dr.has.value ? "true" : "false");
145
- printf(" has_start_element = %s\n", dr.has.start_element ? "true" : "false");
146
- printf(" has_end_element = %s\n", dr.has.end_element ? "true" : "false");
147
- printf(" has_error = %s\n", dr.has.error ? "true" : "false");
148
- printf(" has_pos = %s\n", dr.has.pos ? "true" : "false");
149
- printf(" has_line = %s\n", dr.has.line ? "true" : "false");
150
- printf(" has_column = %s\n", dr.has.column ? "true" : "false");
151
- #endif
152
- //parse(&dr);
153
91
  rb_protect(protect_parse, (VALUE)&dr, &line);
154
92
  ox_sax_drive_cleanup(&dr);
155
93
  if (0 != line) {
156
- rb_jump_tag(line);
94
+ rb_jump_tag(line);
95
+ }
96
+ }
97
+
98
+ static void set_long_noop(VALUE handler, long pos) {
99
+ }
100
+
101
+ static void set_pos(VALUE handler, long pos) {
102
+ rb_ivar_set(handler, ox_at_pos_id, LONG2NUM(pos));
103
+ }
104
+
105
+ static void set_line(VALUE handler, long line) {
106
+ rb_ivar_set(handler, ox_at_line_id, LONG2NUM(line));
107
+ }
108
+
109
+ static void set_col(VALUE handler, long col) {
110
+ rb_ivar_set(handler, ox_at_column_id, LONG2NUM(col));
111
+ }
112
+
113
+ static void attr_noop(SaxDrive dr, VALUE name, char *value, long pos, long line, long col) {
114
+ }
115
+
116
+ static void attr_text(SaxDrive dr, VALUE name, char *value, long pos, long line, long col) {
117
+ VALUE args[2];
118
+
119
+ args[0] = name;
120
+ if (dr->options.convert_special) {
121
+ ox_sax_collapse_special(dr, value, pos, line, col);
122
+ }
123
+ args[1] = rb_str_new2(value);
124
+ if (0 != dr->encoding) {
125
+ rb_enc_associate(args[1], dr->encoding);
126
+ }
127
+ dr->set_pos(dr->handler, pos);
128
+ dr->set_line(dr->handler, line);
129
+ dr->set_col(dr->handler, col);
130
+ rb_funcall2(dr->handler, ox_attr_id, 2, args);
131
+ }
132
+
133
+ static void attr_value(SaxDrive dr, VALUE name, char *value, long pos, long line, long col) {
134
+ VALUE args[2];
135
+
136
+ dr->set_pos(dr->handler, pos);
137
+ dr->set_line(dr->handler, line);
138
+ dr->set_col(dr->handler, col);
139
+ args[0] = name;
140
+ args[1] = dr->value_obj;
141
+ rb_funcall2(dr->handler, ox_attr_value_id, 2, args);
142
+ }
143
+
144
+ static void attrs_done_noop(VALUE handler) {
145
+ }
146
+
147
+ static void attrs_done(VALUE handler) {
148
+ rb_funcall(handler, ox_attrs_done_id, 0);
149
+ }
150
+
151
+ static VALUE instruct_noop(SaxDrive dr, const char *target, long pos, long line, long col) {
152
+ return Qnil;
153
+ }
154
+
155
+ static VALUE instruct(SaxDrive dr, const char *target, long pos, long line, long col) {
156
+ VALUE arg = rb_str_new2(target);
157
+
158
+ dr->set_pos(dr->handler, pos);
159
+ dr->set_line(dr->handler, line);
160
+ dr->set_col(dr->handler, col);
161
+ rb_funcall(dr->handler, ox_instruct_id, 1, arg);
162
+
163
+ return arg;
164
+ }
165
+
166
+ static VALUE instruct_just_value(SaxDrive dr, const char *target, long pos, long line, long col) {
167
+ return rb_str_new2(target);
168
+ }
169
+
170
+ static void end_instruct_noop(SaxDrive dr, VALUE target, long pos, long line, long col) {
171
+ }
172
+
173
+ static void end_instruct(SaxDrive dr, VALUE target, long pos, long line, long col) {
174
+ dr->set_pos(dr->handler, pos);
175
+ dr->set_line(dr->handler, line);
176
+ dr->set_col(dr->handler, col);
177
+ rb_funcall(dr->handler, ox_end_instruct_id, 1, target);
178
+ }
179
+
180
+ static void dr_loc_noop(SaxDrive dr, long pos, long line, long col) {
181
+ }
182
+
183
+ static void comment(SaxDrive dr, long pos, long line, long col) {
184
+ if (!dr->blocked) {
185
+ Nv parent = stack_peek(&dr->stack);
186
+ Hint h = ox_hint_find(dr->options.hints, "!--");
187
+
188
+ if (NULL == parent || NULL == parent->hint || OffOverlay != parent->hint->overlay ||
189
+ (NULL != h && (ActiveOverlay == h->overlay || ActiveOverlay == h->overlay))) {
190
+ VALUE arg = rb_str_new2(dr->buf.str);
191
+
192
+ if (0 != dr->encoding) {
193
+ rb_enc_associate(arg, dr->encoding);
194
+ }
195
+ dr->set_pos(dr->handler, pos);
196
+ dr->set_line(dr->handler, line);
197
+ dr->set_col(dr->handler, col);
198
+ rb_funcall(dr->handler, ox_comment_id, 1, arg);
199
+ }
200
+ }
201
+ }
202
+
203
+ static void cdata(SaxDrive dr, long pos, long line, long col) {
204
+ Nv parent = stack_peek(&dr->stack);
205
+
206
+ if (!dr->blocked && (NULL == parent || NULL == parent->hint || OffOverlay != parent->hint->overlay)) {
207
+ VALUE arg = rb_str_new2(dr->buf.str);
208
+
209
+ if (0 != dr->encoding) {
210
+ rb_enc_associate(arg, dr->encoding);
211
+ }
212
+ dr->set_pos(dr->handler, pos);
213
+ dr->set_line(dr->handler, line);
214
+ dr->set_col(dr->handler, col);
215
+ rb_funcall(dr->handler, ox_cdata_id, 1, arg);
216
+ }
217
+ }
218
+
219
+ static void doctype(SaxDrive dr, long pos, long line, long col) {
220
+ dr->set_pos(dr->handler, pos);
221
+ dr->set_line(dr->handler, line);
222
+ dr->set_col(dr->handler, col);
223
+ rb_funcall(dr->handler, ox_doctype_id, 1, rb_str_new2(dr->buf.str));
224
+ }
225
+
226
+ static void error_noop(SaxDrive dr, const char *msg, long pos, long line, long col) {
227
+ }
228
+
229
+ static void error(SaxDrive dr, const char *msg, long pos, long line, long col) {
230
+ VALUE args[3];
231
+
232
+ args[0] = rb_str_new2(msg);
233
+ args[1] = LONG2NUM(line);
234
+ args[2] = LONG2NUM(col);
235
+ dr->set_pos(dr->handler, pos);
236
+ dr->set_line(dr->handler, line);
237
+ dr->set_col(dr->handler, col);
238
+ rb_funcall2(dr->handler, ox_error_id, 3, args);
239
+ }
240
+
241
+ static void end_element_cb(SaxDrive dr, VALUE name, long pos, long line, long col, Hint h) {
242
+ if (dr->has_end_element && 0 >= dr->blocked &&
243
+ (NULL == h || ActiveOverlay == h->overlay || NestOverlay == h->overlay)) {
244
+ dr->set_pos(dr->handler, pos);
245
+ dr->set_line(dr->handler, line);
246
+ dr->set_col(dr->handler, col);
247
+ rb_funcall(dr->handler, ox_end_element_id, 1, name);
248
+ }
249
+ if (NULL != h && BlockOverlay == h->overlay && 0 < dr->blocked) {
250
+ dr->blocked--;
157
251
  }
158
252
  }
159
253
 
160
- static void
161
- sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, SaxOptions options) {
254
+ static void sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, SaxOptions options) {
162
255
  ox_sax_buf_init(&dr->buf, io);
163
256
  dr->buf.dr = dr;
164
257
  stack_init(&dr->stack);
165
- dr->handler = handler;
258
+ dr->handler = handler;
166
259
  dr->value_obj = Data_Wrap_Struct(ox_sax_value_class, 0, 0, dr);
167
260
  rb_gc_register_address(&dr->value_obj);
168
261
  dr->options = *options;
169
- dr->err = 0;
262
+ dr->err = 0;
170
263
  dr->blocked = 0;
171
- dr->abort = false;
172
- has_init(&dr->has, handler);
173
- #if HAVE_RB_ENC_FIND
264
+ dr->abort = false;
265
+
266
+ dr->set_pos = (Qtrue == rb_ivar_defined(handler, ox_at_pos_id)) ? set_pos : set_long_noop;
267
+ dr->set_line = (Qtrue == rb_ivar_defined(handler, ox_at_line_id)) ? set_line : set_long_noop;
268
+ dr->set_col = (Qtrue == rb_ivar_defined(handler, ox_at_column_id)) ? set_col : set_long_noop;
269
+ if (rb_respond_to(handler, ox_attr_value_id)) {
270
+ dr->attr_cb = attr_value;
271
+ dr->want_attr_name = true;
272
+ } else if (rb_respond_to(handler, ox_attr_id)) {
273
+ dr->attr_cb = attr_text;
274
+ dr->want_attr_name = true;
275
+ } else {
276
+ dr->attr_cb = attr_noop;
277
+ dr->want_attr_name = false;
278
+ }
279
+ dr->attrs_done = rb_respond_to(handler, ox_attrs_done_id) ? attrs_done : attrs_done_noop;
280
+ dr->instruct = rb_respond_to(handler, ox_instruct_id) ? instruct : instruct_noop;
281
+ dr->end_instruct = rb_respond_to(handler, ox_end_instruct_id) ? end_instruct : end_instruct_noop;
282
+ if (rb_respond_to(handler, ox_end_instruct_id) && !rb_respond_to(handler, ox_instruct_id)) {
283
+ dr->instruct = instruct_just_value;
284
+ }
285
+ dr->doctype = rb_respond_to(handler, ox_doctype_id) ? doctype : dr_loc_noop;
286
+ dr->comment = rb_respond_to(handler, ox_comment_id) ? comment : dr_loc_noop;
287
+ dr->cdata = rb_respond_to(handler, ox_cdata_id) ? cdata : dr_loc_noop;
288
+ dr->error = rb_respond_to(handler, ox_error_id) ? error : error_noop;
289
+
290
+ dr->has_text = rb_respond_to(handler, ox_text_id);
291
+ dr->has_value = rb_respond_to(handler, ox_value_id);
292
+ dr->has_start_element = rb_respond_to(handler, ox_start_element_id);
293
+ dr->has_end_element = rb_respond_to(handler, ox_end_element_id);
294
+
174
295
  if ('\0' == *ox_default_options.encoding) {
175
- VALUE encoding;
176
-
177
- dr->encoding = 0;
178
- if (rb_respond_to(io, ox_external_encoding_id) && Qnil != (encoding = rb_funcall(io, ox_external_encoding_id, 0))) {
179
- int e = rb_enc_get_index(encoding);
180
- if (0 <= e) {
181
- dr->encoding = rb_enc_from_index(e);
182
- }
183
- }
296
+ VALUE encoding;
297
+
298
+ dr->encoding = 0;
299
+ if (rb_respond_to(io, ox_external_encoding_id) &&
300
+ Qnil != (encoding = rb_funcall(io, ox_external_encoding_id, 0))) {
301
+ int e = rb_enc_get_index(encoding);
302
+ if (0 <= e) {
303
+ dr->encoding = rb_enc_from_index(e);
304
+ }
305
+ }
184
306
  } else {
185
307
  dr->encoding = rb_enc_find(ox_default_options.encoding);
186
308
  }
187
- #else
188
- dr->encoding = 0;
189
- #endif
309
+ dr->utf8 = (NULL == dr->encoding || rb_utf8_encoding() == dr->encoding);
310
+ if (NULL == dr->encoding || rb_utf8_encoding() == dr->encoding) { // UTF-8
311
+ dr->get_name = dr->options.symbolize ? ox_utf8_sym : ox_utf8_name; // TBD UTF8 sym?
312
+ } else {
313
+ dr->get_name = dr->options.symbolize ? ox_enc_sym : ox_enc_name;
314
+ }
190
315
  }
191
316
 
192
- void
193
- ox_sax_drive_cleanup(SaxDrive dr) {
317
+ void ox_sax_drive_cleanup(SaxDrive dr) {
194
318
  rb_gc_unregister_address(&dr->value_obj);
195
319
  buf_cleanup(&dr->buf);
196
320
  stack_cleanup(&dr->stack);
197
321
  }
198
322
 
199
- static void
200
- ox_sax_drive_error_at(SaxDrive dr, const char *msg, off_t pos, off_t line, off_t col) {
201
- if (dr->has.error) {
202
- VALUE args[3];
203
-
204
- args[0] = rb_str_new2(msg);
205
- args[1] = LONG2NUM(line);
206
- args[2] = LONG2NUM(col);
207
- if (dr->has.pos) {
208
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
209
- }
210
- if (dr->has.pos) {
211
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
212
- }
213
- if (dr->has.line) {
214
- rb_ivar_set(dr->handler, ox_at_line_id, args[1]);
215
- }
216
- if (dr->has.column) {
217
- rb_ivar_set(dr->handler, ox_at_column_id, args[2]);
218
- }
219
- rb_funcall2(dr->handler, ox_error_id, 3, args);
220
- }
323
+ static void ox_sax_drive_error_at(SaxDrive dr, const char *msg, off_t pos, off_t line, off_t col) {
324
+ dr->error(dr, msg, pos, line, col);
221
325
  }
222
326
 
223
- void
224
- ox_sax_drive_error(SaxDrive dr, const char *msg) {
327
+ void ox_sax_drive_error(SaxDrive dr, const char *msg) {
225
328
  ox_sax_drive_error_at(dr, msg, dr->buf.pos, dr->buf.line, dr->buf.col);
226
329
  }
227
330
 
228
- static char
229
- skipBOM(SaxDrive dr) {
230
- char c = buf_get(&dr->buf);
331
+ static char skipBOM(SaxDrive dr) {
332
+ char c = buf_get(&dr->buf);
231
333
 
232
334
  if (0xEF == (uint8_t)c) { /* only UTF8 is supported */
233
- if (0xBB == (uint8_t)buf_get(&dr->buf) && 0xBF == (uint8_t)buf_get(&dr->buf)) {
234
- #if HAVE_RB_ENC_FIND
235
- dr->encoding = ox_utf8_encoding;
236
- #else
237
- dr->encoding = UTF8_STR;
238
- #endif
239
- c = buf_get(&dr->buf);
240
- } else {
241
- ox_sax_drive_error(dr, BAD_BOM "invalid BOM or a binary file.");
242
- c = '\0';
243
- }
335
+ if (0xBB == (uint8_t)buf_get(&dr->buf) && 0xBF == (uint8_t)buf_get(&dr->buf)) {
336
+ dr->encoding = ox_utf8_encoding;
337
+ c = buf_get(&dr->buf);
338
+ } else {
339
+ ox_sax_drive_error(dr, BAD_BOM "invalid BOM or a binary file.");
340
+ c = '\0';
341
+ }
244
342
  }
245
343
  return c;
246
344
  }
247
345
 
248
- static void
249
- parse(SaxDrive dr) {
250
- char c = skipBOM(dr);
251
- int state = START_STATE;
252
- Nv parent;
346
+ static void parse(SaxDrive dr) {
347
+ char c = skipBOM(dr);
348
+ int state = START_STATE;
349
+ Nv parent;
253
350
 
254
351
  while ('\0' != c) {
255
- buf_protect(&dr->buf);
256
- if ('<' == c) {
257
- c = buf_get(&dr->buf);
258
- switch (c) {
259
- case '?': /* instructions (xml or otherwise) */
260
- c = read_instruction(dr);
261
- break;
262
- case '!': /* comment or doctype */
263
- buf_protect(&dr->buf);
264
- c = buf_get(&dr->buf);
265
- if ('\0' == c) {
266
- ox_sax_drive_error(dr, NO_TERM "DOCTYPE or comment not terminated");
267
-
268
- goto DONE;
269
- } else if ('-' == c) {
270
- c = buf_get(&dr->buf); /* skip first - and get next character */
271
- if ('-' != c) {
272
- ox_sax_drive_error(dr, INVALID_FORMAT "bad comment format, expected <!--");
273
- } else {
274
- c = buf_get(&dr->buf); /* skip second - */
275
- }
276
- c = read_comment(dr);
277
- } else {
278
- int i;
279
- int spaced = 0;
280
- off_t pos = dr->buf.pos + 1;
281
- off_t line = dr->buf.line;
282
- off_t col = dr->buf.col + 1;
283
-
284
- if (is_white(c)) {
285
- spaced = 1;
286
- c = buf_next_non_white(&dr->buf);
287
- }
288
- dr->buf.str = dr->buf.tail - 1;
289
- for (i = 7; 0 < i; i--) {
290
- c = buf_get(&dr->buf);
291
- }
292
- if (0 == strncmp("DOCTYPE", dr->buf.str, 7)) {
293
- if (spaced) {
294
- ox_sax_drive_error_at(dr, WRONG_CHAR "<!DOCTYPE can not included spaces", pos, line, col);
295
- }
296
- if (START_STATE != state) {
297
- ox_sax_drive_error(dr, OUT_OF_ORDER "DOCTYPE can not come after an element");
298
- }
299
- c = read_doctype(dr);
300
- } else if (0 == strncasecmp("DOCTYPE", dr->buf.str, 7)) {
301
- if (!dr->options.smart) {
302
- ox_sax_drive_error(dr, CASE_ERROR "expected DOCTYPE all in caps");
303
- }
304
- if (START_STATE != state) {
305
- ox_sax_drive_error(dr, OUT_OF_ORDER "DOCTYPE can not come after an element");
306
- }
307
- c = read_doctype(dr);
308
- } else if (0 == strncmp("[CDATA[", dr->buf.str, 7)) {
309
- if (spaced) {
310
- ox_sax_drive_error_at(dr, WRONG_CHAR "<![CDATA[ can not included spaces", pos, line, col);
311
- }
312
- c = read_cdata(dr);
313
- } else if (0 == strncasecmp("[CDATA[", dr->buf.str, 7)) {
314
- if (!dr->options.smart) {
315
- ox_sax_drive_error(dr, CASE_ERROR "expected CDATA all in caps");
316
- }
317
- c = read_cdata(dr);
318
- } else {
319
- Nv parent = stack_peek(&dr->stack);
320
-
321
- if (0 != parent) {
322
- parent->childCnt++;
323
- }
324
- ox_sax_drive_error_at(dr, WRONG_CHAR "DOCTYPE, CDATA, or comment expected", pos, line, col);
325
- c = read_name_token(dr);
326
- if ('>' == c) {
327
- c = buf_get(&dr->buf);
328
- }
329
- }
330
- }
331
- break;
332
- case '/': /* element end */
333
- parent = stack_peek(&dr->stack);
334
- if (0 != parent && 0 == parent->childCnt && dr->has.text && !dr->blocked) {
335
- VALUE args[1];
336
- off_t pos = dr->buf.pos;
337
- off_t line = dr->buf.line;
338
- off_t col = dr->buf.col - 1;
339
-
340
- args[0] = rb_str_new2("");
341
- #if HAVE_RB_ENC_ASSOCIATE
342
- if (0 != dr->encoding) {
343
- rb_enc_associate(args[0], dr->encoding);
344
- }
345
- #endif
346
- if (dr->has.pos) {
347
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
348
- }
349
- if (dr->has.line) {
350
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
351
- }
352
- if (dr->has.column) {
353
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
354
- }
355
- rb_funcall2(dr->handler, ox_text_id, 1, args);
356
- }
357
- c = read_element_end(dr);
358
- if (0 == stack_peek(&dr->stack)) {
359
- state = AFTER_STATE;
360
- }
361
- break;
362
- case '\0':
363
- goto DONE;
364
- default:
365
- buf_backup(&dr->buf);
366
- if (AFTER_STATE == state) {
367
- ox_sax_drive_error(dr, OUT_OF_ORDER "multiple top level elements");
368
- }
369
- state = BODY_STATE;
370
- c = read_element_start(dr);
371
- if (0 == stack_peek(&dr->stack)) {
372
- state = AFTER_STATE;
373
- }
374
- break;
375
- }
376
- } else {
377
- buf_reset(&dr->buf);
378
- c = read_text(dr);
379
- }
380
- }
381
- DONE:
352
+ buf_protect(&dr->buf);
353
+ if ('<' == c) {
354
+ c = buf_get(&dr->buf);
355
+ switch (c) {
356
+ case '?': /* instructions (xml or otherwise) */ c = read_instruction(dr); break;
357
+ case '!': /* comment or doctype */
358
+ buf_protect(&dr->buf);
359
+ c = buf_get(&dr->buf);
360
+ if ('\0' == c) {
361
+ ox_sax_drive_error(dr, NO_TERM "DOCTYPE or comment not terminated");
362
+
363
+ goto DONE;
364
+ } else if ('-' == c) {
365
+ c = buf_get(&dr->buf); /* skip first - and get next character */
366
+ if ('-' != c) {
367
+ ox_sax_drive_error(dr, INVALID_FORMAT "bad comment format, expected <!--");
368
+ } else {
369
+ c = buf_get(&dr->buf); /* skip second - */
370
+ }
371
+ c = read_comment(dr);
372
+ } else {
373
+ int i;
374
+ int spaced = 0;
375
+ off_t pos = dr->buf.pos + 1;
376
+ off_t line = dr->buf.line;
377
+ off_t col = dr->buf.col + 1;
378
+
379
+ if (is_white(c)) {
380
+ spaced = 1;
381
+ c = buf_next_non_white(&dr->buf);
382
+ }
383
+ dr->buf.str = dr->buf.tail - 1;
384
+ for (i = 7; 0 < i; i--) {
385
+ c = buf_get(&dr->buf);
386
+ }
387
+ if (0 == strncmp("DOCTYPE", dr->buf.str, 7)) {
388
+ if (spaced) {
389
+ ox_sax_drive_error_at(dr, WRONG_CHAR "<!DOCTYPE can not included spaces", pos, line, col);
390
+ }
391
+ if (START_STATE != state) {
392
+ ox_sax_drive_error(dr, OUT_OF_ORDER "DOCTYPE can not come after an element");
393
+ }
394
+ c = read_doctype(dr);
395
+ } else if (0 == strncasecmp("DOCTYPE", dr->buf.str, 7)) {
396
+ if (!dr->options.smart) {
397
+ ox_sax_drive_error(dr, CASE_ERROR "expected DOCTYPE all in caps");
398
+ }
399
+ if (START_STATE != state) {
400
+ ox_sax_drive_error(dr, OUT_OF_ORDER "DOCTYPE can not come after an element");
401
+ }
402
+ c = read_doctype(dr);
403
+ } else if (0 == strncmp("[CDATA[", dr->buf.str, 7)) {
404
+ if (spaced) {
405
+ ox_sax_drive_error_at(dr, WRONG_CHAR "<![CDATA[ can not included spaces", pos, line, col);
406
+ }
407
+ c = read_cdata(dr);
408
+ } else if (0 == strncasecmp("[CDATA[", dr->buf.str, 7)) {
409
+ if (!dr->options.smart) {
410
+ ox_sax_drive_error(dr, CASE_ERROR "expected CDATA all in caps");
411
+ }
412
+ c = read_cdata(dr);
413
+ } else {
414
+ Nv parent = stack_peek(&dr->stack);
415
+
416
+ if (0 != parent) {
417
+ parent->childCnt++;
418
+ }
419
+ ox_sax_drive_error_at(dr, WRONG_CHAR "DOCTYPE, CDATA, or comment expected", pos, line, col);
420
+ c = read_name_token(dr);
421
+ if ('>' == c) {
422
+ c = buf_get(&dr->buf);
423
+ }
424
+ }
425
+ }
426
+ break;
427
+ case '/': /* element end */
428
+ parent = stack_peek(&dr->stack);
429
+ if (0 != parent && 0 == parent->childCnt && dr->has_text && !dr->blocked) {
430
+ VALUE args[1];
431
+ args[0] = rb_str_new2("");
432
+ if (0 != dr->encoding) {
433
+ rb_enc_associate(args[0], dr->encoding);
434
+ }
435
+ dr->set_pos(dr->handler, dr->buf.pos);
436
+ dr->set_line(dr->handler, dr->buf.line);
437
+ dr->set_col(dr->handler, dr->buf.col);
438
+ rb_funcall2(dr->handler, ox_text_id, 1, args);
439
+ }
440
+ c = read_element_end(dr);
441
+ if (0 == stack_peek(&dr->stack)) {
442
+ state = AFTER_STATE;
443
+ }
444
+ break;
445
+ case '\0': goto DONE;
446
+ default:
447
+ buf_backup(&dr->buf);
448
+ if (AFTER_STATE == state) {
449
+ ox_sax_drive_error(dr, OUT_OF_ORDER "multiple top level elements");
450
+ }
451
+ state = BODY_STATE;
452
+ c = read_element_start(dr);
453
+ if (0 == stack_peek(&dr->stack)) {
454
+ state = AFTER_STATE;
455
+ }
456
+ break;
457
+ }
458
+ } else {
459
+ buf_reset(&dr->buf);
460
+ c = read_text(dr);
461
+ }
462
+ }
463
+ DONE:
382
464
  if (dr->abort) {
383
- return;
465
+ return;
384
466
  }
385
467
  if (dr->stack.head < dr->stack.tail) {
386
- char msg[256];
387
- Nv sp;
388
-
389
- if (dr->has.pos) {
390
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(dr->buf.pos));
391
- }
392
- if (dr->has.line) {
393
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(dr->buf.line));
394
- }
395
- if (dr->has.column) {
396
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(dr->buf.col));
397
- }
398
- for (sp = dr->stack.tail - 1; dr->stack.head <= sp; sp--) {
399
- snprintf(msg, sizeof(msg) - 1, "%selement '%s' not closed", EL_MISMATCH, sp->name);
400
- ox_sax_drive_error_at(dr, msg, dr->buf.pos, dr->buf.line, dr->buf.col);
401
- if (dr->has.end_element && 0 >= dr->blocked &&
402
- (NULL == sp->hint || ActiveOverlay == sp->hint->overlay || NestOverlay == sp->hint->overlay)) {
403
- VALUE args[1];
404
-
405
- args[0] = sp->val;
406
- rb_funcall2(dr->handler, ox_end_element_id, 1, args);
407
- }
408
- if (dr->blocked && NULL != sp->hint && BlockOverlay == sp->hint->overlay) {
409
- dr->blocked--;
410
- }
468
+ char msg[256];
469
+ Nv sp;
470
+
471
+ for (sp = dr->stack.tail - 1; dr->stack.head <= sp; sp--) {
472
+ snprintf(msg, sizeof(msg) - 1, "%selement '%s' not closed", EL_MISMATCH, sp->name);
473
+ ox_sax_drive_error_at(dr, msg, dr->buf.pos, dr->buf.line, dr->buf.col);
474
+ end_element_cb(dr, sp->val, dr->buf.pos, dr->buf.line, dr->buf.col, sp->hint);
411
475
  }
412
476
  }
413
477
  }
414
478
 
415
- static void
416
- read_content(SaxDrive dr, char *content, size_t len) {
417
- char c;
418
- char *end = content + len;
479
+ static void read_content(SaxDrive dr, char *content, size_t len) {
480
+ char c;
481
+ char *end = content + len;
419
482
 
420
483
  while ('\0' != (c = buf_get(&dr->buf))) {
421
- if (end <= content) {
422
- *content = '\0';
423
- ox_sax_drive_error(dr, "processing instruction content too large");
424
- return;
425
- }
426
- if ('?' == c) {
427
- if ('\0' == (c = buf_get(&dr->buf))) {
428
- ox_sax_drive_error(dr, NO_TERM "document not terminated");
429
- }
430
- if ('>' == c) {
431
- *content = '\0';
432
- return;
433
- } else {
434
- *content++ = c;
435
- }
436
- } else {
437
- *content++ = c;
438
- }
484
+ if (end <= content) {
485
+ *content = '\0';
486
+ ox_sax_drive_error(dr, "processing instruction content too large");
487
+ return;
488
+ }
489
+ if ('?' == c) {
490
+ if ('\0' == (c = buf_get(&dr->buf))) {
491
+ ox_sax_drive_error(dr, NO_TERM "document not terminated");
492
+ }
493
+ if ('>' == c) {
494
+ *content = '\0';
495
+ return;
496
+ } else {
497
+ *content++ = c;
498
+ }
499
+ } else {
500
+ *content++ = c;
501
+ }
439
502
  }
440
503
  *content = '\0';
441
504
  }
442
505
 
443
506
  /* Entered after the "<?" sequence. Ready to read the rest.
444
507
  */
445
- static char
446
- read_instruction(SaxDrive dr) {
447
- char content[4096];
448
- char c;
449
- int coff;
450
- VALUE target = Qnil;
451
- int is_xml;
452
- off_t pos = dr->buf.pos - 1;
453
- off_t line = dr->buf.line;
454
- off_t col = dr->buf.col - 1;
508
+ static char read_instruction(SaxDrive dr) {
509
+ char content[4096];
510
+ char c;
511
+ int coff;
512
+ VALUE target = Qnil;
513
+ int is_xml;
514
+ off_t pos = dr->buf.pos - 1;
515
+ off_t line = dr->buf.line;
516
+ off_t col = dr->buf.col - 1;
455
517
 
456
518
  buf_protect(&dr->buf);
457
519
  if ('\0' == (c = read_name_token(dr))) {
458
520
  return c;
459
521
  }
460
522
  is_xml = (0 == (dr->options.smart ? strcasecmp("xml", dr->buf.str) : strcmp("xml", dr->buf.str)));
461
- if (dr->has.instruct || dr->has.end_instruct) {
462
- target = rb_str_new2(dr->buf.str);
463
- }
464
- if (dr->has.instruct) {
465
- VALUE args[1];
466
-
467
- if (dr->has.pos) {
468
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
469
- }
470
- if (dr->has.line) {
471
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
472
- }
473
- if (dr->has.column) {
474
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
475
- }
476
- args[0] = target;
477
- rb_funcall2(dr->handler, ox_instruct_id, 1, args);
478
- }
523
+
524
+ target = dr->instruct(dr, dr->buf.str, pos, line, col);
479
525
  buf_protect(&dr->buf);
480
- pos = dr->buf.pos;
526
+ pos = dr->buf.pos;
481
527
  line = dr->buf.line;
482
- col = dr->buf.col;
528
+ col = dr->buf.col;
483
529
  read_content(dr, content, sizeof(content) - 1);
484
530
  coff = (int)(dr->buf.tail - dr->buf.head);
485
531
  buf_reset(&dr->buf);
486
532
  dr->err = 0;
487
- c = read_attrs(dr, c, '?', '?', is_xml, 1, NULL);
488
- if (dr->has.attrs_done) {
489
- rb_funcall(dr->handler, ox_attrs_done_id, 0);
490
- }
533
+ c = read_attrs(dr, c, '?', '?', is_xml, 1, NULL);
534
+ dr->attrs_done(dr->handler);
491
535
  if (dr->err) {
492
- if (dr->has.text) {
493
- VALUE args[1];
494
-
495
- if (dr->options.convert_special) {
496
- ox_sax_collapse_special(dr, content, (int)pos, (int)line, (int)col);
497
- }
498
- args[0] = rb_str_new2(content);
499
- #if HAVE_RB_ENC_ASSOCIATE
500
- if (0 != dr->encoding) {
501
- rb_enc_associate(args[0], dr->encoding);
502
- }
503
- #endif
504
- if (dr->has.line) {
505
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
506
- }
507
- if (dr->has.pos) {
508
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
509
- }
510
- if (dr->has.column) {
511
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
512
- }
513
- rb_funcall2(dr->handler, ox_text_id, 1, args);
514
- }
515
- dr->buf.tail = dr->buf.head + coff;
516
- c = buf_get(&dr->buf);
536
+ if (dr->has_text) {
537
+ VALUE args[1];
538
+
539
+ if (dr->options.convert_special) {
540
+ ox_sax_collapse_special(dr, content, (int)pos, (int)line, (int)col);
541
+ }
542
+ args[0] = rb_str_new2(content);
543
+ if (0 != dr->encoding) {
544
+ rb_enc_associate(args[0], dr->encoding);
545
+ }
546
+ dr->set_pos(dr->handler, pos);
547
+ dr->set_line(dr->handler, line);
548
+ dr->set_col(dr->handler, col);
549
+ rb_funcall2(dr->handler, ox_text_id, 1, args);
550
+ }
551
+ dr->buf.tail = dr->buf.head + coff;
552
+ c = buf_get(&dr->buf);
517
553
  } else {
518
- pos = dr->buf.pos;
519
- line = dr->buf.line;
520
- col = dr->buf.col;
521
- c = buf_next_non_white(&dr->buf);
522
- if ('>' == c) {
523
- c = buf_get(&dr->buf);
524
- } else {
525
- ox_sax_drive_error_at(dr, NO_TERM "instruction not terminated", pos, line, col);
526
- if ('>' == c) {
527
- c = buf_get(&dr->buf);
528
- }
529
- }
530
- }
531
- if (dr->has.end_instruct) {
532
- VALUE args[1];
533
-
534
- if (dr->has.pos) {
535
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
536
- }
537
- if (dr->has.line) {
538
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
539
- }
540
- if (dr->has.column) {
541
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
542
- }
543
- args[0] = target;
544
- rb_funcall2(dr->handler, ox_end_instruct_id, 1, args);
554
+ pos = dr->buf.pos;
555
+ line = dr->buf.line;
556
+ col = dr->buf.col;
557
+ c = buf_next_non_white(&dr->buf);
558
+ if ('>' == c) {
559
+ c = buf_get(&dr->buf);
560
+ } else {
561
+ ox_sax_drive_error_at(dr, NO_TERM "instruction not terminated", pos, line, col);
562
+ if ('>' == c) {
563
+ c = buf_get(&dr->buf);
564
+ }
565
+ }
545
566
  }
546
- dr->buf.str = 0;
567
+ dr->end_instruct(dr, target, pos, line, col);
568
+ dr->buf.str = NULL;
547
569
 
548
570
  return c;
549
571
  }
550
572
 
551
- static char
552
- read_delimited(SaxDrive dr, char end) {
553
- char c;
573
+ static char read_delimited(SaxDrive dr, char end) {
574
+ char c;
554
575
 
555
576
  if ('"' == end || '\'' == end) {
556
- while (end != (c = buf_get(&dr->buf))) {
557
- if ('\0' == c) {
558
- ox_sax_drive_error(dr, NO_TERM "doctype not terminated");
559
- return c;
560
- }
561
- }
577
+ while (end != (c = buf_get(&dr->buf))) {
578
+ if ('\0' == c) {
579
+ ox_sax_drive_error(dr, NO_TERM "doctype not terminated");
580
+ return c;
581
+ }
582
+ }
562
583
  } else {
563
- while (1) {
564
- c = buf_get(&dr->buf);
565
- if (end == c) {
566
- return c;
567
- }
568
- switch (c) {
569
- case '\0':
570
- ox_sax_drive_error(dr, NO_TERM "doctype not terminated");
571
- return c;
572
- case '"':
573
- c = read_delimited(dr, c);
574
- break;
575
- case '\'':
576
- c = read_delimited(dr, c);
577
- break;
578
- case '[':
579
- c = read_delimited(dr, ']');
580
- break;
581
- case '<':
582
- c = read_delimited(dr, '>');
583
- break;
584
- default:
585
- break;
586
- }
587
- }
584
+ while (1) {
585
+ c = buf_get(&dr->buf);
586
+ if (end == c) {
587
+ return c;
588
+ }
589
+ switch (c) {
590
+ case '\0': ox_sax_drive_error(dr, NO_TERM "doctype not terminated"); return c;
591
+ case '"': c = read_delimited(dr, c); break;
592
+ case '\'': c = read_delimited(dr, c); break;
593
+ case '[': c = read_delimited(dr, ']'); break;
594
+ case '<': c = read_delimited(dr, '>'); break;
595
+ default: break;
596
+ }
597
+ }
588
598
  }
589
599
  return c;
590
600
  }
591
601
 
592
602
  /* Entered after the "<!DOCTYPE " sequence. Ready to read the rest.
593
603
  */
594
- static char
595
- read_doctype(SaxDrive dr) {
596
- long pos = (long)(dr->buf.pos - 9);
597
- long line = (long)(dr->buf.line);
598
- long col = (long)(dr->buf.col - 9);
599
- char *s;
600
- Nv parent = stack_peek(&dr->stack);
604
+ static char read_doctype(SaxDrive dr) {
605
+ long pos = (long)(dr->buf.pos - 9);
606
+ long line = (long)(dr->buf.line);
607
+ long col = (long)(dr->buf.col - 9);
608
+ char *s;
609
+ Nv parent = stack_peek(&dr->stack);
601
610
 
602
611
  buf_backup(&dr->buf); /* back up to the start in case the doctype is empty */
603
612
  buf_protect(&dr->buf);
604
613
  read_delimited(dr, '>');
605
614
  if (dr->options.smart && 0 == dr->options.hints) {
606
- for (s = dr->buf.str; is_white(*s); s++) { }
607
- if (0 == strncasecmp("HTML", s, 4)) {
608
- dr->options.hints = ox_hints_html();
609
- }
615
+ for (s = dr->buf.str; is_white(*s); s++) {
616
+ }
617
+ if (0 == strncasecmp("HTML", s, 4)) {
618
+ dr->options.hints = ox_hints_html();
619
+ }
610
620
  }
611
621
  *(dr->buf.tail - 1) = '\0';
612
622
  if (0 != parent) {
613
- parent->childCnt++;
614
- }
615
- if (dr->has.doctype) {
616
- VALUE args[1];
617
-
618
- if (dr->has.pos) {
619
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
620
- }
621
- if (dr->has.line) {
622
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
623
- }
624
- if (dr->has.column) {
625
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
626
- }
627
- args[0] = rb_str_new2(dr->buf.str);
628
- rb_funcall2(dr->handler, ox_doctype_id, 1, args);
623
+ parent->childCnt++;
629
624
  }
625
+ dr->doctype(dr, pos, line, col);
630
626
  dr->buf.str = 0;
631
627
 
632
628
  return buf_get(&dr->buf);
@@ -634,89 +630,65 @@ read_doctype(SaxDrive dr) {
634
630
 
635
631
  /* Entered after the "<![CDATA[" sequence. Ready to read the rest.
636
632
  */
637
- static char
638
- read_cdata(SaxDrive dr) {
639
- char c;
640
- char zero = '\0';
641
- int end = 0;
642
- long pos = (long)(dr->buf.pos - 9);
643
- long line = (long)(dr->buf.line);
644
- long col = (long)(dr->buf.col - 9);
645
- struct _checkPt cp = CHECK_PT_INIT;
646
- Nv parent = stack_peek(&dr->stack);
633
+ static char read_cdata(SaxDrive dr) {
634
+ char c;
635
+ char zero = '\0';
636
+ int end = 0;
637
+ long pos = (long)(dr->buf.pos - 9);
638
+ long line = (long)(dr->buf.line);
639
+ long col = (long)(dr->buf.col - 9);
640
+ struct _checkPt cp = CHECK_PT_INIT;
641
+ Nv parent = stack_peek(&dr->stack);
647
642
 
648
643
  // TBD check parent overlay
649
644
  if (0 != parent) {
650
- parent->childCnt++;
645
+ parent->childCnt++;
651
646
  }
652
647
  buf_backup(&dr->buf); /* back up to the start in case the cdata is empty */
653
648
  buf_protect(&dr->buf);
654
649
  while (1) {
655
650
  c = buf_get(&dr->buf);
656
- switch (c) {
657
- case ']':
658
- end++;
659
- break;
660
- case '>':
651
+ switch (c) {
652
+ case ']': end++; break;
653
+ case '>':
661
654
  if (2 <= end) {
662
655
  *(dr->buf.tail - 3) = '\0';
663
- c = buf_get(&dr->buf);
656
+ c = buf_get(&dr->buf);
664
657
  goto CB;
665
658
  }
666
- if (!buf_checkset(&cp)) {
667
- buf_checkpoint(&dr->buf, &cp);
668
- }
659
+ if (!buf_checkset(&cp)) {
660
+ buf_checkpoint(&dr->buf, &cp);
661
+ }
669
662
  end = 0;
670
- break;
671
- case '<':
672
- if (!buf_checkset(&cp)) {
673
- buf_checkpoint(&dr->buf, &cp);
674
- }
675
- end = 0;
676
- break;
677
- case '\0':
678
- if (buf_checkset(&cp)) {
679
- c = buf_checkback(&dr->buf, &cp);
680
- ox_sax_drive_error(dr, NO_TERM "CDATA not terminated");
681
- zero = c;
682
- *(dr->buf.tail - 1) = '\0';
683
- goto CB;
684
- }
663
+ break;
664
+ case '<':
665
+ if (!buf_checkset(&cp)) {
666
+ buf_checkpoint(&dr->buf, &cp);
667
+ }
668
+ end = 0;
669
+ break;
670
+ case '\0':
671
+ if (buf_checkset(&cp)) {
672
+ c = buf_checkback(&dr->buf, &cp);
673
+ ox_sax_drive_error(dr, NO_TERM "CDATA not terminated");
674
+ zero = c;
675
+ *(dr->buf.tail - 1) = '\0';
676
+ goto CB;
677
+ }
685
678
  ox_sax_drive_error(dr, NO_TERM "CDATA not terminated");
686
679
  return '\0';
687
- default:
688
- if (1 < end && !buf_checkset(&cp)) {
689
- buf_checkpoint(&dr->buf, &cp);
690
- }
691
- end = 0;
692
- break;
693
- }
694
- }
695
- CB:
696
- if (!dr->blocked && (NULL == parent || NULL == parent->hint || OffOverlay != parent->hint->overlay)) {
697
- if (dr->has.cdata) {
698
- VALUE args[1];
699
-
700
- args[0] = rb_str_new2(dr->buf.str);
701
- #if HAVE_RB_ENC_ASSOCIATE
702
- if (0 != dr->encoding) {
703
- rb_enc_associate(args[0], dr->encoding);
704
- }
705
- #endif
706
- if (dr->has.pos) {
707
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
708
- }
709
- if (dr->has.line) {
710
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
711
- }
712
- if (dr->has.column) {
713
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
714
- }
715
- rb_funcall2(dr->handler, ox_cdata_id, 1, args);
716
- }
680
+ default:
681
+ if (1 < end && !buf_checkset(&cp)) {
682
+ buf_checkpoint(&dr->buf, &cp);
683
+ }
684
+ end = 0;
685
+ break;
686
+ }
717
687
  }
688
+ CB:
689
+ dr->cdata(dr, pos, line, col);
718
690
  if ('\0' != zero) {
719
- *(dr->buf.tail - 1) = zero;
691
+ *(dr->buf.tail - 1) = zero;
720
692
  }
721
693
  dr->buf.str = 0;
722
694
 
@@ -725,88 +697,60 @@ read_cdata(SaxDrive dr) {
725
697
 
726
698
  /* Entered after the "<!--" sequence. Ready to read the rest.
727
699
  */
728
- static char
729
- read_comment(SaxDrive dr) {
730
- char c;
731
- char zero = '\0';
732
- int end = 0;
733
- long pos = (long)(dr->buf.pos - 4);
734
- long line = (long)(dr->buf.line);
735
- long col = (long)(dr->buf.col - 4);
736
- struct _checkPt cp = CHECK_PT_INIT;
700
+ static char read_comment(SaxDrive dr) {
701
+ char c;
702
+ char zero = '\0';
703
+ int end = 0;
704
+ long pos = (long)(dr->buf.pos - 4);
705
+ long line = (long)(dr->buf.line);
706
+ long col = (long)(dr->buf.col - 4);
707
+ struct _checkPt cp = CHECK_PT_INIT;
737
708
 
738
709
  buf_backup(&dr->buf); /* back up to the start in case the cdata is empty */
739
710
  buf_protect(&dr->buf);
740
711
  while (1) {
741
712
  c = buf_get(&dr->buf);
742
- switch (c) {
743
- case '-':
744
- end++;
745
- break;
746
- case '>':
713
+ switch (c) {
714
+ case '-': end++; break;
715
+ case '>':
747
716
  if (2 <= end) {
748
717
  *(dr->buf.tail - 3) = '\0';
749
- c = buf_get(&dr->buf);
718
+ c = buf_get(&dr->buf);
750
719
  goto CB;
751
720
  }
752
- if (!buf_checkset(&cp)) {
753
- buf_checkpoint(&dr->buf, &cp);
754
- }
721
+ if (!buf_checkset(&cp)) {
722
+ buf_checkpoint(&dr->buf, &cp);
723
+ }
724
+ end = 0;
725
+ break;
726
+ case '<':
727
+ if (!buf_checkset(&cp)) {
728
+ buf_checkpoint(&dr->buf, &cp);
729
+ }
755
730
  end = 0;
756
- break;
757
- case '<':
758
- if (!buf_checkset(&cp)) {
759
- buf_checkpoint(&dr->buf, &cp);
760
- }
761
- end = 0;
762
- break;
763
- case '\0':
764
- if (buf_checkset(&cp)) {
765
- c = buf_checkback(&dr->buf, &cp);
766
- ox_sax_drive_error(dr, NO_TERM "comment not terminated");
767
- zero = c;
768
- *(dr->buf.tail - 1) = '\0';
769
- goto CB;
770
- }
731
+ break;
732
+ case '\0':
733
+ if (buf_checkset(&cp)) {
734
+ c = buf_checkback(&dr->buf, &cp);
735
+ ox_sax_drive_error(dr, NO_TERM "comment not terminated");
736
+ zero = c;
737
+ *(dr->buf.tail - 1) = '\0';
738
+ goto CB;
739
+ }
771
740
  ox_sax_drive_error(dr, NO_TERM "comment not terminated");
772
741
  return '\0';
773
- default:
774
- if (1 < end && !buf_checkset(&cp)) {
775
- buf_checkpoint(&dr->buf, &cp);
776
- }
777
- end = 0;
778
- break;
779
- }
780
- }
781
- CB:
782
- if (dr->has.comment && !dr->blocked) {
783
- VALUE args[1];
784
- Nv parent = stack_peek(&dr->stack);
785
- Hint h = ox_hint_find(dr->options.hints, "!--");
786
-
787
- if (NULL == parent || NULL == parent->hint || OffOverlay != parent->hint->overlay ||
788
- (NULL != h && (ActiveOverlay == h->overlay || ActiveOverlay == h->overlay))) {
789
-
790
- args[0] = rb_str_new2(dr->buf.str);
791
- #if HAVE_RB_ENC_ASSOCIATE
792
- if (0 != dr->encoding) {
793
- rb_enc_associate(args[0], dr->encoding);
794
- }
795
- #endif
796
- if (dr->has.pos) {
797
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
798
- }
799
- if (dr->has.line) {
800
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
801
- }
802
- if (dr->has.column) {
803
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
804
- }
805
- rb_funcall2(dr->handler, ox_comment_id, 1, args);
806
- }
742
+ default:
743
+ if (1 < end && !buf_checkset(&cp)) {
744
+ buf_checkpoint(&dr->buf, &cp);
745
+ }
746
+ end = 0;
747
+ break;
748
+ }
807
749
  }
750
+ CB:
751
+ dr->comment(dr, pos, line, col);
808
752
  if ('\0' != zero) {
809
- *(dr->buf.tail - 1) = zero;
753
+ *(dr->buf.tail - 1) = zero;
810
754
  }
811
755
  dr->buf.str = 0;
812
756
 
@@ -816,106 +760,115 @@ read_comment(SaxDrive dr) {
816
760
  /* Entered after the '<' and the first character after that. Returns status
817
761
  * code.
818
762
  */
819
- static char
820
- read_element_start(SaxDrive dr) {
821
- const char *ename = 0;
822
- volatile VALUE name = Qnil;
823
- char c;
824
- int closed;
825
- long pos = (long)(dr->buf.pos);
826
- long line = (long)(dr->buf.line);
827
- long col = (long)(dr->buf.col);
828
- Hint h = NULL;
829
- int stackless = 0;
830
- Nv parent = stack_peek(&dr->stack);
763
+ static char read_element_start(SaxDrive dr) {
764
+ const char *ename = 0;
765
+ volatile VALUE name = Qnil;
766
+ char c;
767
+ int closed;
768
+ long pos = (long)(dr->buf.pos);
769
+ long line = (long)(dr->buf.line);
770
+ long col = (long)(dr->buf.col);
771
+ Hint h = NULL;
772
+ int stackless = 0;
773
+ Nv parent = stack_peek(&dr->stack);
831
774
 
832
775
  if ('\0' == (c = read_name_token(dr))) {
833
776
  return '\0';
834
777
  }
835
778
  if ('\0' == *dr->buf.str) {
836
- char msg[256];
779
+ char msg[256];
837
780
 
838
- snprintf(msg, sizeof(msg) - 1, "%sempty element", INVALID_FORMAT);
839
- ox_sax_drive_error_at(dr, msg, pos, line, col);
781
+ snprintf(msg, sizeof(msg) - 1, "%sempty element", INVALID_FORMAT);
782
+ ox_sax_drive_error_at(dr, msg, pos, line, col);
840
783
 
841
- return buf_get(&dr->buf);
784
+ return buf_get(&dr->buf);
842
785
  }
843
786
  if (0 != parent) {
844
- parent->childCnt++;
787
+ parent->childCnt++;
845
788
  }
846
- if (dr->options.smart && 0 == dr->options.hints && stack_empty(&dr->stack) && 0 == strcasecmp("html", dr->buf.str)) {
847
- dr->options.hints = ox_hints_html();
789
+ if (dr->options.smart && 0 == dr->options.hints && stack_empty(&dr->stack) &&
790
+ 0 == strcasecmp("html", dr->buf.str)) {
791
+ dr->options.hints = ox_hints_html();
848
792
  }
849
793
  if (NULL != dr->options.hints) {
850
- hint_clear_empty(dr);
851
- h = ox_hint_find(dr->options.hints, dr->buf.str);
852
- if (NULL == h) {
853
- char msg[256];
854
-
855
- snprintf(msg, sizeof(msg), "%s%s is not a valid element type for a %s document type.", INV_ELEMENT, dr->buf.str, dr->options.hints->name);
856
- ox_sax_drive_error(dr, msg);
857
- } else {
858
- Nv top_nv = stack_peek(&dr->stack);
859
-
860
- if (AbortOverlay == h->overlay) {
861
- if (rb_respond_to(dr->handler, ox_abort_id)) {
862
- VALUE args[1];
863
-
864
- args[0] = str2sym(dr, dr->buf.str, NULL);
865
- rb_funcall2(dr->handler, ox_abort_id, 1, args);
866
- }
867
- dr->abort = true;
868
- return '\0';
869
- }
870
- if (BlockOverlay == h->overlay) {
871
- dr->blocked++;
872
- }
873
- if (h->empty) {
874
- stackless = 1;
875
- }
876
- if (0 != top_nv) {
877
- char msg[256];
878
-
879
- if (!h->nest && NestOverlay != h->overlay && 0 == strcasecmp(top_nv->name, h->name)) {
880
- snprintf(msg, sizeof(msg) - 1, "%s%s can not be nested in a %s document, closing previous.",
881
- INV_ELEMENT, dr->buf.str, dr->options.hints->name);
882
- ox_sax_drive_error(dr, msg);
883
- stack_pop(&dr->stack);
884
- end_element_cb(dr, top_nv->val, pos, line, col, top_nv->hint);
885
- top_nv = stack_peek(&dr->stack);
886
- }
887
- if (NULL != top_nv && 0 != h->parents && NestOverlay != h->overlay) {
888
- const char **p;
889
- int ok = 0;
890
-
891
- for (p = h->parents; 0 != *p; p++) {
892
- if (0 == strcasecmp(*p, top_nv->name)) {
893
- ok = 1;
894
- break;
895
- }
896
- }
897
- if (!ok) {
898
- snprintf(msg, sizeof(msg) - 1, "%s%s can not be a child of a %s in a %s document.",
899
- INV_ELEMENT, h->name, top_nv->name, dr->options.hints->name);
900
- ox_sax_drive_error(dr, msg);
901
- }
902
- }
903
- }
904
- }
905
- }
906
- name = str2sym(dr, dr->buf.str, &ename);
907
- if (dr->has.start_element && 0 >= dr->blocked && (NULL == h || ActiveOverlay == h->overlay || NestOverlay == h->overlay)) {
908
- VALUE args[1];
909
-
910
- if (dr->has.pos) {
911
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
912
- }
913
- if (dr->has.line) {
914
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
915
- }
916
- if (dr->has.column) {
917
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
918
- }
794
+ hint_clear_empty(dr);
795
+ h = ox_hint_find(dr->options.hints, dr->buf.str);
796
+ if (NULL == h) {
797
+ char msg[256];
798
+
799
+ snprintf(msg,
800
+ sizeof(msg),
801
+ "%s%s is not a valid element type for a %s document type.",
802
+ INV_ELEMENT,
803
+ dr->buf.str,
804
+ dr->options.hints->name);
805
+ ox_sax_drive_error(dr, msg);
806
+ } else {
807
+ Nv top_nv = stack_peek(&dr->stack);
808
+
809
+ if (AbortOverlay == h->overlay) {
810
+ if (rb_respond_to(dr->handler, ox_abort_id)) {
811
+ VALUE args[1];
812
+
813
+ args[0] = str2sym(dr, dr->buf.str, dr->buf.tail - dr->buf.str - 1, NULL);
814
+ rb_funcall2(dr->handler, ox_abort_id, 1, args);
815
+ }
816
+ dr->abort = true;
817
+ return '\0';
818
+ }
819
+ if (BlockOverlay == h->overlay) {
820
+ dr->blocked++;
821
+ }
822
+ if (h->empty) {
823
+ stackless = 1;
824
+ }
825
+ if (0 != top_nv) {
826
+ char msg[256];
827
+
828
+ if (!h->nest && NestOverlay != h->overlay && 0 == strcasecmp(top_nv->name, h->name)) {
829
+ snprintf(msg,
830
+ sizeof(msg) - 1,
831
+ "%s%s can not be nested in a %s document, closing previous.",
832
+ INV_ELEMENT,
833
+ dr->buf.str,
834
+ dr->options.hints->name);
835
+ ox_sax_drive_error(dr, msg);
836
+ stack_pop(&dr->stack);
837
+ end_element_cb(dr, top_nv->val, pos, line, col, top_nv->hint);
838
+ top_nv = stack_peek(&dr->stack);
839
+ }
840
+ if (NULL != top_nv && 0 != h->parents && NestOverlay != h->overlay) {
841
+ const char **p;
842
+ int ok = 0;
843
+
844
+ for (p = h->parents; 0 != *p; p++) {
845
+ if (0 == strcasecmp(*p, top_nv->name)) {
846
+ ok = 1;
847
+ break;
848
+ }
849
+ }
850
+ if (!ok) {
851
+ snprintf(msg,
852
+ sizeof(msg) - 1,
853
+ "%s%s can not be a child of a %s in a %s document.",
854
+ INV_ELEMENT,
855
+ h->name,
856
+ top_nv->name,
857
+ dr->options.hints->name);
858
+ ox_sax_drive_error(dr, msg);
859
+ }
860
+ }
861
+ }
862
+ }
863
+ }
864
+ name = str2sym(dr, dr->buf.str, dr->buf.tail - dr->buf.str - 1, &ename);
865
+ if (dr->has_start_element && 0 >= dr->blocked &&
866
+ (NULL == h || ActiveOverlay == h->overlay || NestOverlay == h->overlay)) {
867
+ VALUE args[1];
868
+
869
+ dr->set_pos(dr->handler, pos);
870
+ dr->set_line(dr->handler, line);
871
+ dr->set_col(dr->handler, col);
919
872
  args[0] = name;
920
873
  rb_funcall2(dr->handler, ox_start_element_id, 1, args);
921
874
  }
@@ -924,362 +877,302 @@ read_element_start(SaxDrive dr) {
924
877
  } else if ('>' == c) {
925
878
  closed = 0;
926
879
  } else {
927
- buf_protect(&dr->buf);
880
+ buf_protect(&dr->buf);
928
881
  c = read_attrs(dr, c, '/', '>', 0, 0, h);
929
- if (is_white(c)) {
930
- c = buf_next_non_white(&dr->buf);
931
- }
932
- closed = ('/' == c);
882
+ if (is_white(c)) {
883
+ c = buf_next_non_white(&dr->buf);
884
+ }
885
+ closed = ('/' == c);
933
886
  }
934
- if (dr->has.attrs_done && 0 >= dr->blocked && (NULL == h || ActiveOverlay == h->overlay || NestOverlay == h->overlay)) {
935
- rb_funcall(dr->handler, ox_attrs_done_id, 0);
887
+ if (0 >= dr->blocked && (NULL == h || ActiveOverlay == h->overlay || NestOverlay == h->overlay)) {
888
+ dr->attrs_done(dr->handler);
936
889
  }
937
890
  if (closed) {
938
- c = buf_next_non_white(&dr->buf);
939
- pos = dr->buf.pos;
940
- line = dr->buf.line;
941
- col = dr->buf.col;
942
- end_element_cb(dr, name, pos, line, col, h);
891
+ c = buf_next_non_white(&dr->buf);
892
+
893
+ end_element_cb(dr, name, dr->buf.pos, dr->buf.line, dr->buf.col, h);
943
894
  } else if (stackless) {
944
- end_element_cb(dr, name, pos, line, col, h);
895
+ end_element_cb(dr, name, pos, line, col, h);
945
896
  } else if (NULL != h && h->jump) {
946
- stack_push(&dr->stack, ename, name, h);
947
- if ('>' != c) {
948
- ox_sax_drive_error(dr, WRONG_CHAR "element not closed");
949
- return c;
950
- }
951
- read_jump(dr, h->name);
952
- return '<';
897
+ stack_push(&dr->stack, ename, name, h);
898
+ if ('>' != c) {
899
+ ox_sax_drive_error(dr, WRONG_CHAR "element not closed");
900
+ return c;
901
+ }
902
+ read_jump(dr, h->name);
903
+ return '<';
953
904
  } else {
954
- stack_push(&dr->stack, ename, name, h);
905
+ stack_push(&dr->stack, ename, name, h);
955
906
  }
956
907
  if ('>' != c) {
957
- ox_sax_drive_error(dr, WRONG_CHAR "element not closed");
958
- return c;
908
+ ox_sax_drive_error(dr, WRONG_CHAR "element not closed");
909
+ return c;
959
910
  }
960
911
  dr->buf.str = 0;
961
912
 
962
913
  return buf_get(&dr->buf);
963
914
  }
964
915
 
965
- static Nv
966
- stack_rev_find(SaxDrive dr, const char *name) {
967
- Nv nv;
916
+ static Nv stack_rev_find(SaxDrive dr, const char *name) {
917
+ Nv nv;
968
918
 
969
919
  for (nv = dr->stack.tail - 1; dr->stack.head <= nv; nv--) {
970
- if (0 == (dr->options.smart ? strcasecmp(name, nv->name) : strcmp(name, nv->name))) {
971
- return nv;
972
- }
920
+ if (0 == (dr->options.smart ? strcasecmp(name, nv->name) : strcmp(name, nv->name))) {
921
+ return nv;
922
+ }
973
923
  }
974
924
  return 0;
975
925
  }
976
926
 
977
- static char
978
- read_element_end(SaxDrive dr) {
979
- VALUE name = Qnil;
980
- char c;
981
- long pos = (long)(dr->buf.pos - 1);
982
- long line = (long)(dr->buf.line);
983
- long col = (long)(dr->buf.col - 1);
984
- Nv nv;
985
- Hint h = NULL;
927
+ static char read_element_end(SaxDrive dr) {
928
+ VALUE name = Qnil;
929
+ char c;
930
+ long pos = (long)(dr->buf.pos - 1);
931
+ long line = (long)(dr->buf.line);
932
+ long col = (long)(dr->buf.col - 1);
933
+ Nv nv;
934
+ Hint h = NULL;
986
935
 
987
936
  if ('\0' == (c = read_name_token(dr))) {
988
937
  return '\0';
989
938
  }
990
939
  if (is_white(c)) {
991
- c = buf_next_non_white(&dr->buf);
940
+ c = buf_next_non_white(&dr->buf);
992
941
  }
993
942
  // c should be > and current is one past so read another char
994
- c = buf_get(&dr->buf);
943
+ c = buf_get(&dr->buf);
995
944
  nv = stack_peek(&dr->stack);
996
- if (0 != nv &&
997
- 0 == (dr->options.smart ? strcasecmp(dr->buf.str, nv->name) : strcmp(dr->buf.str, nv->name))) {
998
- name = nv->val;
999
- h = nv->hint;
1000
- stack_pop(&dr->stack);
945
+ if (0 != nv && 0 == (dr->options.smart ? strcasecmp(dr->buf.str, nv->name) : strcmp(dr->buf.str, nv->name))) {
946
+ name = nv->val;
947
+ h = nv->hint;
948
+ stack_pop(&dr->stack);
1001
949
  } else {
1002
- // Mismatched start and end
1003
- char msg[256];
1004
- Nv match = stack_rev_find(dr, dr->buf.str);
1005
-
1006
- if (0 == match) {
1007
- // Not found so open and close element.
1008
- h = ox_hint_find(dr->options.hints, dr->buf.str);
1009
- if (NULL != h && h->empty) {
1010
- // Just close normally
1011
- name = str2sym(dr, dr->buf.str, 0);
1012
- snprintf(msg, sizeof(msg) - 1, "%selement '%s' should not have a separate close element", EL_MISMATCH, dr->buf.str);
1013
- ox_sax_drive_error_at(dr, msg, pos, line, col);
1014
- return c;
1015
- } else {
1016
- snprintf(msg, sizeof(msg) - 1, "%selement '%s' closed but not opened", EL_MISMATCH, dr->buf.str);
1017
- ox_sax_drive_error_at(dr, msg, pos, line, col);
1018
- name = str2sym(dr, dr->buf.str, 0);
1019
- if (dr->has.start_element && 0 >= dr->blocked && (NULL == h || ActiveOverlay == h->overlay || NestOverlay == h->overlay)) {
1020
- VALUE args[1];
1021
-
1022
- if (dr->has.pos) {
1023
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
1024
- }
1025
- if (dr->has.line) {
1026
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
1027
- }
1028
- if (dr->has.column) {
1029
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
1030
- }
1031
- args[0] = name;
1032
- rb_funcall2(dr->handler, ox_start_element_id, 1, args);
1033
- }
1034
- if (NULL != h && BlockOverlay == h->overlay && 0 < dr->blocked) {
1035
- dr->blocked--;
1036
- }
1037
- }
1038
- } else {
1039
- // Found a match so close all up to the found element in stack.
1040
- Nv n2;
1041
-
1042
- if (0 != (n2 = hint_try_close(dr, dr->buf.str))) {
1043
- name = n2->val;
1044
- h = n2->hint;
1045
- } else {
1046
- snprintf(msg, sizeof(msg) - 1, "%selement '%s' close does not match '%s' open", EL_MISMATCH, dr->buf.str, nv->name);
1047
- ox_sax_drive_error_at(dr, msg, pos, line, col);
1048
- if (dr->has.pos) {
1049
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
1050
- }
1051
- if (dr->has.line) {
1052
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
1053
- }
1054
- if (dr->has.column) {
1055
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
1056
- }
1057
- for (nv = stack_pop(&dr->stack); match < nv; nv = stack_pop(&dr->stack)) {
1058
- if (dr->has.end_element && 0 >= dr->blocked && (NULL == nv->hint || ActiveOverlay == nv->hint->overlay || NestOverlay == nv->hint->overlay)) {
1059
- rb_funcall(dr->handler, ox_end_element_id, 1, nv->val);
1060
- }
1061
- if (NULL != nv->hint && BlockOverlay == nv->hint->overlay && 0 < dr->blocked) {
1062
- dr->blocked--;
1063
- }
1064
- }
1065
- name = nv->val;
1066
- h = nv->hint;
1067
- }
1068
- }
950
+ // Mismatched start and end
951
+ char msg[256];
952
+ Nv match = stack_rev_find(dr, dr->buf.str);
953
+
954
+ if (0 == match) {
955
+ // Not found so open and close element.
956
+ h = ox_hint_find(dr->options.hints, dr->buf.str);
957
+ if (NULL != h && h->empty) {
958
+ // Just close normally
959
+ name = str2sym(dr, dr->buf.str, dr->buf.tail - dr->buf.str - 2, 0);
960
+ snprintf(msg,
961
+ sizeof(msg) - 1,
962
+ "%selement '%s' should not have a separate close element",
963
+ EL_MISMATCH,
964
+ dr->buf.str);
965
+ ox_sax_drive_error_at(dr, msg, pos, line, col);
966
+ return c;
967
+ } else {
968
+ snprintf(msg, sizeof(msg) - 1, "%selement '%s' closed but not opened", EL_MISMATCH, dr->buf.str);
969
+ ox_sax_drive_error_at(dr, msg, pos, line, col);
970
+ name = str2sym(dr, dr->buf.str, dr->buf.tail - dr->buf.str - 2, 0);
971
+ if (dr->has_start_element && 0 >= dr->blocked &&
972
+ (NULL == h || ActiveOverlay == h->overlay || NestOverlay == h->overlay)) {
973
+ VALUE args[1];
974
+
975
+ dr->set_pos(dr->handler, pos);
976
+ dr->set_line(dr->handler, line);
977
+ dr->set_col(dr->handler, col);
978
+ args[0] = name;
979
+ rb_funcall2(dr->handler, ox_start_element_id, 1, args);
980
+ }
981
+ if (NULL != h && BlockOverlay == h->overlay && 0 < dr->blocked) {
982
+ dr->blocked--;
983
+ }
984
+ }
985
+ } else {
986
+ // Found a match so close all up to the found element in stack.
987
+ Nv n2;
988
+
989
+ if (0 != (n2 = hint_try_close(dr, dr->buf.str))) {
990
+ name = n2->val;
991
+ h = n2->hint;
992
+ } else {
993
+ snprintf(msg,
994
+ sizeof(msg) - 1,
995
+ "%selement '%s' close does not match '%s' open",
996
+ EL_MISMATCH,
997
+ dr->buf.str,
998
+ nv->name);
999
+ ox_sax_drive_error_at(dr, msg, pos, line, col);
1000
+ for (nv = stack_pop(&dr->stack); match < nv; nv = stack_pop(&dr->stack)) {
1001
+ end_element_cb(dr, nv->val, pos, line, col, nv->hint);
1002
+ }
1003
+ name = nv->val;
1004
+ h = nv->hint;
1005
+ }
1006
+ }
1069
1007
  }
1070
1008
  end_element_cb(dr, name, pos, line, col, h);
1071
1009
 
1072
1010
  return c;
1073
1011
  }
1074
1012
 
1075
- static char
1076
- read_text(SaxDrive dr) {
1077
- VALUE args[1];
1078
- char c;
1079
- long pos = (long)(dr->buf.pos);
1080
- long line = (long)(dr->buf.line);
1081
- long col = (long)(dr->buf.col - 1);
1082
- Nv parent = stack_peek(&dr->stack);
1083
- int allWhite = 1;
1013
+ static char read_text(SaxDrive dr) {
1014
+ VALUE args[1];
1015
+ char c;
1016
+ long pos = (long)(dr->buf.pos);
1017
+ long line = (long)(dr->buf.line);
1018
+ long col = (long)(dr->buf.col - 1);
1019
+ Nv parent = stack_peek(&dr->stack);
1020
+ int allWhite = 1;
1084
1021
 
1085
1022
  buf_backup(&dr->buf);
1086
1023
  buf_protect(&dr->buf);
1087
1024
  while ('<' != (c = buf_get(&dr->buf))) {
1088
- switch(c) {
1089
- case ' ':
1090
- case '\t':
1091
- case '\f':
1092
- case '\n':
1093
- case '\r':
1094
- break;
1095
- case '\0':
1096
- if (allWhite) {
1097
- return c;
1098
- }
1025
+ switch (c) {
1026
+ case ' ':
1027
+ case '\t':
1028
+ case '\f':
1029
+ case '\n':
1030
+ case '\r': break;
1031
+ case '\0':
1032
+ if (allWhite) {
1033
+ return c;
1034
+ }
1099
1035
  ox_sax_drive_error(dr, NO_TERM "text not terminated");
1100
- goto END_OF_BUF;
1101
- break;
1102
- default:
1103
- allWhite = 0;
1104
- break;
1105
- }
1106
- }
1107
- END_OF_BUF:
1036
+ goto END_OF_BUF;
1037
+ break;
1038
+ default: allWhite = 0; break;
1039
+ }
1040
+ }
1041
+ END_OF_BUF:
1108
1042
  if ('\0' != c) {
1109
- *(dr->buf.tail - 1) = '\0';
1043
+ *(dr->buf.tail - 1) = '\0';
1110
1044
  }
1111
1045
  if (allWhite) {
1112
- int isEnd = ('/' == buf_get(&dr->buf));
1113
-
1114
- buf_backup(&dr->buf);
1115
- if (dr->has.text &&
1116
- ((NoSkip == dr->options.skip && !isEnd) ||
1117
- (OffSkip == dr->options.skip))) {
1118
- args[0] = rb_str_new2(dr->buf.str);
1119
- #if HAVE_RB_ENC_ASSOCIATE
1120
- if (0 != dr->encoding) {
1121
- rb_enc_associate(args[0], dr->encoding);
1122
- }
1123
- #endif
1124
- if (dr->has.pos) {
1125
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
1126
- }
1127
- if (dr->has.line) {
1128
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
1129
- }
1130
- if (dr->has.column) {
1131
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
1132
- }
1133
- rb_funcall2(dr->handler, ox_text_id, 1, args);
1134
- }
1135
- if (!isEnd || 0 == parent || 0 < parent->childCnt) {
1136
- return c;
1137
- }
1046
+ int isEnd = ('/' == buf_get(&dr->buf));
1047
+
1048
+ buf_backup(&dr->buf);
1049
+ if (dr->has_text && ((NoSkip == dr->options.skip && !isEnd) || (OffSkip == dr->options.skip))) {
1050
+ args[0] = rb_str_new2(dr->buf.str);
1051
+ if (0 != dr->encoding) {
1052
+ rb_enc_associate(args[0], dr->encoding);
1053
+ }
1054
+ dr->set_pos(dr->handler, pos);
1055
+ dr->set_line(dr->handler, line);
1056
+ dr->set_col(dr->handler, col);
1057
+ rb_funcall2(dr->handler, ox_text_id, 1, args);
1058
+ }
1059
+ if (!isEnd || 0 == parent || 0 < parent->childCnt) {
1060
+ return c;
1061
+ }
1138
1062
  }
1139
1063
  if (0 != parent) {
1140
- parent->childCnt++;
1064
+ parent->childCnt++;
1141
1065
  }
1142
1066
  if (!dr->blocked && (NULL == parent || NULL == parent->hint || OffOverlay != parent->hint->overlay)) {
1143
- if (dr->has.value) {
1144
- if (dr->has.pos) {
1145
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
1146
- }
1147
- if (dr->has.line) {
1148
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
1149
- }
1150
- if (dr->has.column) {
1151
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
1152
- }
1153
- *args = dr->value_obj;
1154
- rb_funcall2(dr->handler, ox_value_id, 1, args);
1155
- } else if (dr->has.text) {
1156
- if (dr->options.convert_special) {
1157
- ox_sax_collapse_special(dr, dr->buf.str, pos, line, col);
1158
- }
1159
- switch (dr->options.skip) {
1160
- case CrSkip:
1161
- buf_collapse_return(dr->buf.str);
1162
- break;
1163
- case SpcSkip:
1164
- buf_collapse_white(dr->buf.str);
1165
- break;
1166
- default:
1167
- break;
1168
- }
1169
- args[0] = rb_str_new2(dr->buf.str);
1170
- #if HAVE_RB_ENC_ASSOCIATE
1171
- if (0 != dr->encoding) {
1172
- rb_enc_associate(args[0], dr->encoding);
1173
- }
1174
- #endif
1175
- if (dr->has.pos) {
1176
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
1177
- }
1178
- if (dr->has.line) {
1179
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
1180
- }
1181
- if (dr->has.column) {
1182
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
1183
- }
1184
- rb_funcall2(dr->handler, ox_text_id, 1, args);
1185
- }
1067
+ if (dr->has_value) {
1068
+ dr->set_pos(dr->handler, pos);
1069
+ dr->set_line(dr->handler, line);
1070
+ dr->set_col(dr->handler, col);
1071
+ *args = dr->value_obj;
1072
+ rb_funcall2(dr->handler, ox_value_id, 1, args);
1073
+ } else if (dr->has_text) {
1074
+ if (dr->options.convert_special) {
1075
+ ox_sax_collapse_special(dr, dr->buf.str, pos, line, col);
1076
+ }
1077
+ switch (dr->options.skip) {
1078
+ case CrSkip: buf_collapse_return(dr->buf.str); break;
1079
+ case SpcSkip: buf_collapse_white(dr->buf.str); break;
1080
+ default: break;
1081
+ }
1082
+ args[0] = rb_str_new2(dr->buf.str);
1083
+ if (0 != dr->encoding) {
1084
+ rb_enc_associate(args[0], dr->encoding);
1085
+ }
1086
+ dr->set_pos(dr->handler, pos);
1087
+ dr->set_line(dr->handler, line);
1088
+ dr->set_col(dr->handler, col);
1089
+ rb_funcall2(dr->handler, ox_text_id, 1, args);
1090
+ }
1186
1091
  }
1187
1092
  dr->buf.str = 0;
1188
1093
 
1189
1094
  return c;
1190
1095
  }
1191
1096
 
1192
- static int
1193
- read_jump_term(Buf buf, const char *pat) {
1194
- struct _checkPt cp;
1097
+ static int read_jump_term(Buf buf, const char *pat) {
1098
+ struct _checkPt cp;
1195
1099
 
1196
- buf_checkpoint(buf, &cp); // right after <
1100
+ buf_checkpoint(buf, &cp); // right after <
1197
1101
  if ('/' != buf_next_non_white(buf)) {
1198
- return 0;
1102
+ return 0;
1199
1103
  }
1200
1104
  if (*pat != tolower(buf_next_non_white(buf))) {
1201
- return 0;
1105
+ return 0;
1202
1106
  }
1203
1107
  for (pat++; '\0' != *pat; pat++) {
1204
- if (*pat != tolower(buf_get(buf))) {
1205
- return 0;
1206
- }
1108
+ if (*pat != tolower(buf_get(buf))) {
1109
+ return 0;
1110
+ }
1207
1111
  }
1208
1112
  if ('>' != buf_next_non_white(buf)) {
1209
- return 0;
1113
+ return 0;
1210
1114
  }
1211
1115
  buf_checkback(buf, &cp);
1212
1116
  return 1;
1213
1117
  }
1214
1118
 
1215
- static char
1216
- read_jump(SaxDrive dr, const char *pat) {
1217
- VALUE args[1];
1218
- char c;
1219
- long pos = (long)(dr->buf.pos);
1220
- long line = (long)(dr->buf.line);
1221
- long col = (long)(dr->buf.col - 1);
1222
- Nv parent = stack_peek(&dr->stack);
1119
+ static char read_jump(SaxDrive dr, const char *pat) {
1120
+ VALUE args[1];
1121
+ char c;
1122
+ long pos = (long)(dr->buf.pos);
1123
+ long line = (long)(dr->buf.line);
1124
+ long col = (long)(dr->buf.col - 1);
1125
+ Nv parent = stack_peek(&dr->stack);
1223
1126
 
1224
1127
  buf_protect(&dr->buf);
1225
1128
  while (1) {
1226
- c = buf_get(&dr->buf);
1227
- switch(c) {
1228
- case '<':
1229
- if (read_jump_term(&dr->buf, pat)) {
1230
- goto END_OF_BUF;
1231
- break;
1232
- }
1233
- break;
1234
- case '\0':
1129
+ c = buf_get(&dr->buf);
1130
+ switch (c) {
1131
+ case '<':
1132
+ if (read_jump_term(&dr->buf, pat)) {
1133
+ goto END_OF_BUF;
1134
+ break;
1135
+ }
1136
+ break;
1137
+ case '\0':
1235
1138
  ox_sax_drive_error(dr, NO_TERM "not terminated");
1236
- goto END_OF_BUF;
1237
- break;
1238
- default:
1239
- break;
1240
- }
1139
+ goto END_OF_BUF;
1140
+ break;
1141
+ default: break;
1142
+ }
1241
1143
  }
1242
- END_OF_BUF:
1144
+ END_OF_BUF:
1243
1145
  if ('\0' != c) {
1244
- *(dr->buf.tail - 1) = '\0';
1146
+ *(dr->buf.tail - 1) = '\0';
1245
1147
  }
1246
1148
  if (0 != parent) {
1247
- parent->childCnt++;
1149
+ parent->childCnt++;
1248
1150
  }
1249
1151
  // TBD check parent overlay
1250
- if (dr->has.text && !dr->blocked) {
1152
+ if (dr->has_text && !dr->blocked) {
1251
1153
  args[0] = rb_str_new2(dr->buf.str);
1252
- #if HAVE_RB_ENC_ASSOCIATE
1253
1154
  if (0 != dr->encoding) {
1254
1155
  rb_enc_associate(args[0], dr->encoding);
1255
1156
  }
1256
- #endif
1257
- if (dr->has.pos) {
1258
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
1259
- }
1260
- if (dr->has.line) {
1261
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
1262
- }
1263
- if (dr->has.column) {
1264
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
1265
- }
1157
+ dr->set_pos(dr->handler, pos);
1158
+ dr->set_line(dr->handler, line);
1159
+ dr->set_col(dr->handler, col);
1266
1160
  rb_funcall2(dr->handler, ox_text_id, 1, args);
1267
1161
  }
1268
1162
  dr->buf.str = 0;
1269
1163
  if ('\0' != c) {
1270
- *(dr->buf.tail - 1) = '<';
1164
+ *(dr->buf.tail - 1) = '<';
1271
1165
  }
1272
1166
  return c;
1273
1167
  }
1274
1168
 
1275
- static char
1276
- read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req, Hint h) {
1277
- VALUE name = Qnil;
1278
- int is_encoding = 0;
1279
- off_t pos;
1280
- off_t line;
1281
- off_t col;
1282
- char *attr_value;
1169
+ static char read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req, Hint h) {
1170
+ VALUE name = Qnil;
1171
+ int is_encoding = 0;
1172
+ off_t pos;
1173
+ off_t line;
1174
+ off_t col;
1175
+ char *attr_value;
1283
1176
 
1284
1177
  // already protected by caller
1285
1178
  dr->buf.str = dr->buf.tail;
@@ -1287,94 +1180,52 @@ read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req,
1287
1180
  c = buf_next_non_white(&dr->buf);
1288
1181
  }
1289
1182
  while (termc != c && term2 != c) {
1290
- buf_backup(&dr->buf);
1183
+ buf_backup(&dr->buf);
1291
1184
  if ('\0' == c) {
1292
- ox_sax_drive_error(dr, NO_TERM "attributes not terminated");
1293
- return '\0';
1185
+ ox_sax_drive_error(dr, NO_TERM "attributes not terminated");
1186
+ return '\0';
1294
1187
  }
1295
- pos = dr->buf.pos + 1;
1296
- line = dr->buf.line;
1297
- col = dr->buf.col + 1;
1188
+ pos = dr->buf.pos + 1;
1189
+ line = dr->buf.line;
1190
+ col = dr->buf.col + 1;
1298
1191
  if ('\0' == (c = read_name_token(dr))) {
1299
- ox_sax_drive_error(dr, NO_TERM "error reading token");
1300
- return '\0';
1192
+ ox_sax_drive_error(dr, NO_TERM "error reading token");
1193
+ return '\0';
1301
1194
  }
1302
1195
  if (is_xml && 0 == strcasecmp("encoding", dr->buf.str)) {
1303
1196
  is_encoding = 1;
1304
1197
  }
1305
- if (dr->has.attr || dr->has.attr_value) {
1306
- name = str2sym(dr, dr->buf.str, 0);
1198
+ if (dr->want_attr_name) {
1199
+ name = str2sym(dr, dr->buf.str, dr->buf.tail - dr->buf.str - 1, 0);
1307
1200
  }
1308
1201
  if (is_white(c)) {
1309
1202
  c = buf_next_non_white(&dr->buf);
1310
1203
  }
1311
1204
  if ('=' != c) {
1312
- if (eq_req) {
1313
- dr->err = 1;
1314
- return c;
1315
- } else {
1316
- ox_sax_drive_error(dr, WRONG_CHAR "no attribute value");
1317
- attr_value = (char*)"";
1318
- }
1205
+ if (eq_req) {
1206
+ dr->err = 1;
1207
+ return c;
1208
+ } else {
1209
+ ox_sax_drive_error(dr, WRONG_CHAR "no attribute value");
1210
+ attr_value = (char *)"";
1211
+ }
1319
1212
  } else {
1320
- pos = dr->buf.pos + 1;
1321
- line = dr->buf.line;
1322
- col = dr->buf.col + 1;
1323
- c = read_quoted_value(dr);
1324
- attr_value = dr->buf.str;
1325
- if (is_encoding) {
1326
- #if HAVE_RB_ENC_FIND
1327
- dr->encoding = rb_enc_find(dr->buf.str);
1328
- #else
1329
- dr->encoding = dr->buf.str;
1330
- #endif
1331
- is_encoding = 0;
1332
- }
1333
- }
1334
- if (0 >= dr->blocked && (NULL == h || ActiveOverlay == h->overlay || NestOverlay == h->overlay)) {
1335
- if (dr->has.attr_value) {
1336
- VALUE args[2];
1337
-
1338
- if (dr->has.pos) {
1339
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
1340
- }
1341
- if (dr->has.line) {
1342
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
1343
- }
1344
- if (dr->has.column) {
1345
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
1346
- }
1347
- args[0] = name;
1348
- args[1] = dr->value_obj;
1349
- rb_funcall2(dr->handler, ox_attr_value_id, 2, args);
1350
- } else if (dr->has.attr) {
1351
- VALUE args[2];
1352
-
1353
- args[0] = name;
1354
- if (dr->options.convert_special) {
1355
- ox_sax_collapse_special(dr, dr->buf.str, pos, line, col);
1356
- }
1357
- args[1] = rb_str_new2(attr_value);
1358
- #if HAVE_RB_ENC_ASSOCIATE
1359
- if (0 != dr->encoding) {
1360
- rb_enc_associate(args[1], dr->encoding);
1361
- }
1362
- #endif
1363
- if (dr->has.pos) {
1364
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
1365
- }
1366
- if (dr->has.line) {
1367
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
1368
- }
1369
- if (dr->has.column) {
1370
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
1371
- }
1372
- rb_funcall2(dr->handler, ox_attr_id, 2, args);
1373
- }
1374
- }
1375
- if (is_white(c)) {
1376
- c = buf_next_non_white(&dr->buf);
1377
- }
1213
+ pos = dr->buf.pos + 1;
1214
+ line = dr->buf.line;
1215
+ col = dr->buf.col + 1;
1216
+ c = read_quoted_value(dr);
1217
+ attr_value = dr->buf.str;
1218
+ if (is_encoding) {
1219
+ dr->encoding = rb_enc_find(dr->buf.str);
1220
+ is_encoding = 0;
1221
+ }
1222
+ }
1223
+ if (0 >= dr->blocked && (NULL == h || ActiveOverlay == h->overlay || NestOverlay == h->overlay)) {
1224
+ dr->attr_cb(dr, name, attr_value, pos, line, col);
1225
+ }
1226
+ if (is_white(c)) {
1227
+ c = buf_next_non_white(&dr->buf);
1228
+ }
1378
1229
  }
1379
1230
  dr->buf.str = 0;
1380
1231
 
@@ -1384,66 +1235,62 @@ read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req,
1384
1235
  /* The character after the word is returned. dr->buf.tail is one past
1385
1236
  * that. dr->buf.str will point to the token which will be '\0' terminated.
1386
1237
  */
1387
- static char
1388
- read_name_token(SaxDrive dr) {
1389
- char c;
1238
+ static char read_name_token(SaxDrive dr) {
1239
+ char c;
1390
1240
 
1391
1241
  dr->buf.str = dr->buf.tail;
1392
- c = buf_get(&dr->buf);
1242
+ c = buf_get(&dr->buf);
1393
1243
  if (is_white(c)) {
1394
- c = buf_next_non_white(&dr->buf);
1244
+ c = buf_next_non_white(&dr->buf);
1395
1245
  dr->buf.str = dr->buf.tail - 1;
1396
1246
  }
1397
1247
  while (1) {
1398
- switch (c) {
1399
- case ' ':
1400
- case '\t':
1401
- case '\f':
1402
- case '?':
1403
- case '=':
1404
- case '/':
1405
- case '>':
1406
- case '<':
1407
- case '\n':
1408
- case '\r':
1409
- *(dr->buf.tail - 1) = '\0';
1410
- return c;
1411
- case '\0':
1248
+ switch (c) {
1249
+ case ' ':
1250
+ case '\t':
1251
+ case '\f':
1252
+ case '?':
1253
+ case '=':
1254
+ case '/':
1255
+ case '>':
1256
+ case '<':
1257
+ case '\n':
1258
+ case '\r': *(dr->buf.tail - 1) = '\0'; return c;
1259
+ case '\0':
1412
1260
  /* documents never terminate after a name token */
1413
1261
  ox_sax_drive_error(dr, NO_TERM "document not terminated");
1414
1262
  return '\0';
1415
- case ':':
1416
- if ('\0' == *dr->options.strip_ns) {
1417
- break;
1418
- } else if ('*' == *dr->options.strip_ns && '\0' == dr->options.strip_ns[1]) {
1419
- dr->buf.str = dr->buf.tail;
1420
- } else if (dr->options.smart && 0 == strncasecmp(dr->options.strip_ns, dr->buf.str, dr->buf.tail - dr->buf.str - 1)) {
1421
- dr->buf.str = dr->buf.tail;
1422
- } else if (0 == strncmp(dr->options.strip_ns, dr->buf.str, dr->buf.tail - dr->buf.str - 1)) {
1423
- dr->buf.str = dr->buf.tail;
1424
- }
1425
- break;
1426
- default:
1427
- break;
1428
- }
1263
+ case ':':
1264
+ if ('\0' == *dr->options.strip_ns) {
1265
+ break;
1266
+ } else if ('*' == *dr->options.strip_ns && '\0' == dr->options.strip_ns[1]) {
1267
+ dr->buf.str = dr->buf.tail;
1268
+ } else if (dr->options.smart &&
1269
+ 0 == strncasecmp(dr->options.strip_ns, dr->buf.str, dr->buf.tail - dr->buf.str - 1)) {
1270
+ dr->buf.str = dr->buf.tail;
1271
+ } else if (0 == strncmp(dr->options.strip_ns, dr->buf.str, dr->buf.tail - dr->buf.str - 1)) {
1272
+ dr->buf.str = dr->buf.tail;
1273
+ }
1274
+ break;
1275
+ default: break;
1276
+ }
1429
1277
  c = buf_get(&dr->buf);
1430
1278
  }
1431
1279
  return '\0';
1432
1280
  }
1433
1281
 
1434
- /* The character after the quote or if there is no quote, the character after the word is returned. dr->buf.tail is one past
1435
- * that. dr->buf.str will point to the token which will be '\0' terminated.
1282
+ /* The character after the quote or if there is no quote, the character after the word is returned. dr->buf.tail is one
1283
+ * past that. dr->buf.str will point to the token which will be '\0' terminated.
1436
1284
  */
1437
- static char
1438
- read_quoted_value(SaxDrive dr) {
1439
- char c;
1285
+ static char read_quoted_value(SaxDrive dr) {
1286
+ char c;
1440
1287
 
1441
1288
  c = buf_get(&dr->buf);
1442
1289
  if (is_white(c)) {
1443
1290
  c = buf_next_non_white(&dr->buf);
1444
1291
  }
1445
1292
  if ('"' == c || '\'' == c) {
1446
- char term = c;
1293
+ char term = c;
1447
1294
 
1448
1295
  dr->buf.str = dr->buf.tail;
1449
1296
  while (term != (c = buf_get(&dr->buf))) {
@@ -1452,186 +1299,171 @@ read_quoted_value(SaxDrive dr) {
1452
1299
  return '\0';
1453
1300
  }
1454
1301
  }
1455
- // dr->buf.tail is one past quote char
1456
- *(dr->buf.tail - 1) = '\0'; /* terminate value */
1457
- c = buf_get(&dr->buf);
1458
- return c;
1302
+ // dr->buf.tail is one past quote char
1303
+ *(dr->buf.tail - 1) = '\0'; /* terminate value */
1304
+ c = buf_get(&dr->buf);
1305
+ return c;
1459
1306
  }
1460
1307
  // not quoted, look for something that terminates the string
1461
1308
  dr->buf.str = dr->buf.tail - 1;
1462
1309
  ox_sax_drive_error(dr, WRONG_CHAR "attribute value not in quotes");
1463
1310
  while ('\0' != (c = buf_get(&dr->buf))) {
1464
- switch (c) {
1465
- case ' ':
1466
- //case '/':
1467
- case '>':
1468
- case '?': // for instructions
1469
- case '\t':
1470
- case '\n':
1471
- case '\r':
1472
- *(dr->buf.tail - 1) = '\0'; /* terminate value */
1473
- // dr->buf.tail is in the correct position, one after the word terminator
1474
- return c;
1475
- default:
1476
- break;
1477
- }
1478
- }
1479
- return '\0'; // should never get here
1311
+ switch (c) {
1312
+ case ' ':
1313
+ // case '/':
1314
+ case '>':
1315
+ case '?': // for instructions
1316
+ case '\t':
1317
+ case '\n':
1318
+ case '\r':
1319
+ *(dr->buf.tail - 1) = '\0'; /* terminate value */
1320
+ // dr->buf.tail is in the correct position, one after the word terminator
1321
+ return c;
1322
+ default: break;
1323
+ }
1324
+ }
1325
+ return '\0'; // should never get here
1480
1326
  }
1481
1327
 
1482
- static char*
1483
- read_hex_uint64(char *b, uint64_t *up) {
1484
- uint64_t u = 0;
1485
- char c;
1328
+ static char *read_hex_uint64(char *b, uint64_t *up) {
1329
+ uint64_t u = 0;
1330
+ char c;
1486
1331
 
1487
1332
  for (; ';' != *b; b++) {
1488
- c = *b;
1489
- if ('0' <= c && c <= '9') {
1490
- u = (u << 4) | (uint64_t)(c - '0');
1491
- } else if ('a' <= c && c <= 'f') {
1492
- u = (u << 4) | (uint64_t)(c - 'a' + 10);
1493
- } else if ('A' <= c && c <= 'F') {
1494
- u = (u << 4) | (uint64_t)(c - 'A' + 10);
1495
- } else {
1496
- return 0;
1497
- }
1333
+ c = *b;
1334
+ if ('0' <= c && c <= '9') {
1335
+ u = (u << 4) | (uint64_t)(c - '0');
1336
+ } else if ('a' <= c && c <= 'f') {
1337
+ u = (u << 4) | (uint64_t)(c - 'a' + 10);
1338
+ } else if ('A' <= c && c <= 'F') {
1339
+ u = (u << 4) | (uint64_t)(c - 'A' + 10);
1340
+ } else {
1341
+ return 0;
1342
+ }
1498
1343
  }
1499
1344
  *up = u;
1500
1345
 
1501
1346
  return b;
1502
1347
  }
1503
1348
 
1504
- static char*
1505
- read_10_uint64(char *b, uint64_t *up) {
1506
- uint64_t u = 0;
1507
- char c;
1349
+ static char *read_10_uint64(char *b, uint64_t *up) {
1350
+ uint64_t u = 0;
1351
+ char c;
1508
1352
 
1509
1353
  for (; ';' != *b; b++) {
1510
- c = *b;
1511
- if ('0' <= c && c <= '9') {
1512
- u = (u * 10) + (uint64_t)(c - '0');
1513
- } else {
1514
- return 0;
1515
- }
1354
+ c = *b;
1355
+ if ('0' <= c && c <= '9') {
1356
+ u = (u * 10) + (uint64_t)(c - '0');
1357
+ } else {
1358
+ return 0;
1359
+ }
1516
1360
  }
1517
1361
  *up = u;
1518
1362
 
1519
1363
  return b;
1520
1364
  }
1521
1365
 
1522
- int
1523
- ox_sax_collapse_special(SaxDrive dr, char *str, long pos, long line, long col) {
1524
- char *s = str;
1525
- char *b = str;
1366
+ int ox_sax_collapse_special(SaxDrive dr, char *str, long pos, long line, long col) {
1367
+ char *s = str;
1368
+ char *b = str;
1526
1369
 
1527
1370
  while ('\0' != *s) {
1528
1371
  if ('&' == *s) {
1529
- int c = 0;
1530
- char *end;
1372
+ int c = 0;
1373
+ char *end;
1531
1374
 
1532
1375
  s++;
1533
1376
  if ('#' == *s) {
1534
- uint64_t u = 0;
1535
- char x;
1536
-
1537
- s++;
1538
- if ('x' == *s || 'X' == *s) {
1539
- x = *s;
1540
- s++;
1541
- end = read_hex_uint64(s, &u);
1542
- } else {
1543
- x = '\0';
1544
- end = read_10_uint64(s, &u);
1545
- }
1546
- if (0 == end) {
1547
- ox_sax_drive_error(dr, NO_TERM "special character does not end with a semicolon");
1548
- *b++ = '&';
1549
- *b++ = '#';
1550
- if ('\0' != x) {
1551
- *b++ = x;
1552
- }
1553
- continue;
1554
- }
1555
- if (u <= 0x000000000000007FULL) {
1556
- *b++ = (char)u;
1557
- #if HAVE_RB_ENC_FIND
1558
- } else if (ox_utf8_encoding == dr->encoding) {
1559
- b = ox_ucs_to_utf8_chars(b, u);
1560
- } else if (0 == dr->encoding) {
1561
- dr->encoding = ox_utf8_encoding;
1562
- b = ox_ucs_to_utf8_chars(b, u);
1563
- #else
1564
- } else if (0 == dr->encoding) {
1565
- dr->encoding = UTF8_STR;
1566
- b = ox_ucs_to_utf8_chars(b, u);
1567
- } else if (0 == strcasecmp(UTF8_STR, dr->encoding)) {
1568
- b = ox_ucs_to_utf8_chars(b, u);
1569
- #endif
1570
- } else {
1571
- b = ox_ucs_to_utf8_chars(b, u);
1572
- /*
1573
- ox_sax_drive_error(dr, NO_TERM "Invalid encoding, need UTF-8 encoding to parse &#nnnn; character sequences.");
1574
- *b++ = '&';
1575
- *b++ = '#';
1576
- if ('\0' != x) {
1577
- *b++ = x;
1578
- }
1579
- continue;
1580
- */
1581
- }
1582
- s = end + 1;
1583
- continue;
1377
+ uint64_t u = 0;
1378
+ char x;
1379
+
1380
+ s++;
1381
+ if ('x' == *s || 'X' == *s) {
1382
+ x = *s;
1383
+ s++;
1384
+ end = read_hex_uint64(s, &u);
1385
+ } else {
1386
+ x = '\0';
1387
+ end = read_10_uint64(s, &u);
1388
+ }
1389
+ if (0 == end) {
1390
+ ox_sax_drive_error(dr, NO_TERM "special character does not end with a semicolon");
1391
+ *b++ = '&';
1392
+ *b++ = '#';
1393
+ if ('\0' != x) {
1394
+ *b++ = x;
1395
+ }
1396
+ continue;
1397
+ }
1398
+ if (u <= 0x000000000000007FULL) {
1399
+ *b++ = (char)u;
1400
+ } else if (ox_utf8_encoding == dr->encoding) {
1401
+ b = ox_ucs_to_utf8_chars(b, u);
1402
+ } else if (0 == dr->encoding) {
1403
+ dr->encoding = ox_utf8_encoding;
1404
+ b = ox_ucs_to_utf8_chars(b, u);
1405
+ } else {
1406
+ b = ox_ucs_to_utf8_chars(b, u);
1407
+ /*
1408
+ ox_sax_drive_error(dr, NO_TERM "Invalid encoding, need UTF-8 encoding to parse &#nnnn; character
1409
+ sequences."); *b++ = '&'; *b++ = '#'; if ('\0' != x) { *b++ = x;
1410
+ }
1411
+ continue;
1412
+ */
1413
+ }
1414
+ s = end + 1;
1415
+ continue;
1584
1416
  } else if (0 == strncasecmp(s, "lt;", 3)) {
1585
1417
  c = '<';
1586
1418
  s += 3;
1587
- col += 3;
1419
+ col += 3;
1588
1420
  } else if (0 == strncasecmp(s, "gt;", 3)) {
1589
1421
  c = '>';
1590
1422
  s += 3;
1591
- col += 3;
1423
+ col += 3;
1592
1424
  } else if (0 == strncasecmp(s, "amp;", 4)) {
1593
1425
  c = '&';
1594
1426
  s += 4;
1595
- col += 4;
1427
+ col += 4;
1596
1428
  } else if (0 == strncasecmp(s, "quot;", 5)) {
1597
1429
  c = '"';
1598
1430
  s += 5;
1599
- col += 5;
1431
+ col += 5;
1600
1432
  } else if (0 == strncasecmp(s, "apos;", 5)) {
1601
1433
  c = '\'';
1602
1434
  s += 5;
1603
1435
  } else {
1604
- char key[16];
1605
- char *k = key;
1606
- char *kend = key + sizeof(key) - 1;
1607
- char *bn;
1608
- char *s2 = s;
1609
-
1610
- for (; ';' != *s2 && '\0' != *s2; s2++, k++) {
1611
- if (kend <= k) {
1612
- k = key;
1613
- break;
1614
- }
1615
- *k = *s2;
1616
- }
1617
- *k = '\0';
1618
- if ('\0' == *key || NULL == (bn = ox_entity_lookup(b, key))) {
1619
- ox_sax_drive_error_at(dr, INVALID_FORMAT "Invalid special character sequence", pos, line, col);
1620
- c = '&';
1621
- } else {
1622
- b = bn;
1623
- s = s2 + 1;
1624
- continue;
1625
- }
1436
+ char key[16];
1437
+ char *k = key;
1438
+ char *kend = key + sizeof(key) - 1;
1439
+ char *bn;
1440
+ char *s2 = s;
1441
+
1442
+ for (; ';' != *s2 && '\0' != *s2; s2++, k++) {
1443
+ if (kend <= k) {
1444
+ k = key;
1445
+ break;
1446
+ }
1447
+ *k = *s2;
1448
+ }
1449
+ *k = '\0';
1450
+ if ('\0' == *key || NULL == (bn = ox_entity_lookup(b, key))) {
1451
+ ox_sax_drive_error_at(dr, INVALID_FORMAT "Invalid special character sequence", pos, line, col);
1452
+ c = '&';
1453
+ } else {
1454
+ b = bn;
1455
+ s = s2 + 1;
1456
+ continue;
1457
+ }
1626
1458
  }
1627
1459
  *b++ = (char)c;
1628
- col++;
1460
+ col++;
1629
1461
  } else {
1630
- if ('\n' == *s) {
1631
- line++;
1632
- col = 0;
1633
- }
1634
- col++;
1462
+ if ('\n' == *s) {
1463
+ line++;
1464
+ col = 0;
1465
+ }
1466
+ col++;
1635
1467
  *b++ = *s++;
1636
1468
  }
1637
1469
  }
@@ -1640,64 +1472,43 @@ ox_sax_collapse_special(SaxDrive dr, char *str, long pos, long line, long col) {
1640
1472
  return 0;
1641
1473
  }
1642
1474
 
1643
- static void
1644
- hint_clear_empty(SaxDrive dr) {
1645
- Nv nv;
1475
+ static void hint_clear_empty(SaxDrive dr) {
1476
+ Nv nv;
1646
1477
 
1647
1478
  for (nv = stack_peek(&dr->stack); 0 != nv; nv = stack_peek(&dr->stack)) {
1648
- if (0 == nv->hint) {
1649
- break;
1650
- }
1651
- if (nv->hint->empty) {
1652
- end_element_cb(dr, nv->val, dr->buf.pos, dr->buf.line, dr->buf.col, nv->hint);
1653
- stack_pop(&dr->stack);
1654
- } else {
1655
- break;
1656
- }
1479
+ if (0 == nv->hint) {
1480
+ break;
1481
+ }
1482
+ if (nv->hint->empty) {
1483
+ end_element_cb(dr, nv->val, dr->buf.pos, dr->buf.line, dr->buf.col, nv->hint);
1484
+ stack_pop(&dr->stack);
1485
+ } else {
1486
+ break;
1487
+ }
1657
1488
  }
1658
1489
  }
1659
1490
 
1660
- static Nv
1661
- hint_try_close(SaxDrive dr, const char *name) {
1662
- Hint h = ox_hint_find(dr->options.hints, name);
1663
- Nv nv;
1491
+ static Nv hint_try_close(SaxDrive dr, const char *name) {
1492
+ Hint h = ox_hint_find(dr->options.hints, name);
1493
+ Nv nv;
1664
1494
 
1665
1495
  if (0 == h) {
1666
- return 0;
1496
+ return 0;
1667
1497
  }
1668
1498
  for (nv = stack_peek(&dr->stack); 0 != nv; nv = stack_peek(&dr->stack)) {
1669
- if (0 == strcasecmp(name, nv->name)) {
1670
- stack_pop(&dr->stack);
1671
- return nv;
1672
- }
1673
- if (0 == nv->hint) {
1674
- break;
1675
- }
1676
- if (nv->hint->empty) {
1677
- end_element_cb(dr, nv->val, dr->buf.pos, dr->buf.line, dr->buf.col, nv->hint);
1678
- dr->stack.tail = nv;
1679
- } else {
1680
- break;
1681
- }
1499
+ if (0 == strcasecmp(name, nv->name)) {
1500
+ stack_pop(&dr->stack);
1501
+ return nv;
1502
+ }
1503
+ if (0 == nv->hint) {
1504
+ break;
1505
+ }
1506
+ if (nv->hint->empty) {
1507
+ end_element_cb(dr, nv->val, dr->buf.pos, dr->buf.line, dr->buf.col, nv->hint);
1508
+ dr->stack.tail = nv;
1509
+ } else {
1510
+ break;
1511
+ }
1682
1512
  }
1683
1513
  return 0;
1684
1514
  }
1685
-
1686
- static void
1687
- end_element_cb(SaxDrive dr, VALUE name, long pos, long line, long col, Hint h) {
1688
- if (dr->has.end_element && 0 >= dr->blocked && (NULL == h || ActiveOverlay == h->overlay || NestOverlay == h->overlay)) {
1689
- if (dr->has.pos) {
1690
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
1691
- }
1692
- if (dr->has.line) {
1693
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
1694
- }
1695
- if (dr->has.column) {
1696
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
1697
- }
1698
- rb_funcall(dr->handler, ox_end_element_id, 1, name);
1699
- }
1700
- if (NULL != h && BlockOverlay == h->overlay && 0 < dr->blocked) {
1701
- dr->blocked--;
1702
- }
1703
- }