ox 2.14.3 → 2.14.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/ox/sax.c CHANGED
@@ -4,66 +4,63 @@
4
4
  */
5
5
 
6
6
  #include <ctype.h>
7
- #include <stdlib.h>
8
7
  #include <errno.h>
9
8
  #include <stdio.h>
9
+ #include <stdlib.h>
10
10
  #include <strings.h>
11
11
  #include <sys/types.h>
12
12
  #if HAVE_SYS_UIO_H
13
13
  #include <sys/uio.h>
14
14
  #endif
15
- #include <unistd.h>
16
15
  #include <time.h>
16
+ #include <unistd.h>
17
17
 
18
+ #include "intern.h"
19
+ #include "ox.h"
18
20
  #include "ruby.h"
19
- #if HAVE_RB_ENC_ASSOCIATE
20
21
  #include "ruby/encoding.h"
21
- #endif
22
- #include "ox.h"
23
22
  #include "sax.h"
24
- #include "sax_stack.h"
25
23
  #include "sax_buf.h"
24
+ #include "sax_stack.h"
26
25
  #include "special.h"
27
26
 
28
- #define NAME_MISMATCH 1
27
+ #define NAME_MISMATCH 1
29
28
 
30
- #define START_STATE 1
31
- #define BODY_STATE 2
32
- #define AFTER_STATE 3
29
+ #define START_STATE 1
30
+ #define BODY_STATE 2
31
+ #define AFTER_STATE 3
33
32
 
34
33
  // error prefixes
35
- #define BAD_BOM "Bad BOM: "
36
- #define NO_TERM "Not Terminated: "
37
- #define INVALID_FORMAT "Invalid Format: "
38
- #define CASE_ERROR "Case Error: "
39
- #define OUT_OF_ORDER "Out of Order: "
40
- #define WRONG_CHAR "Unexpected Character: "
41
- #define EL_MISMATCH "Start End Mismatch: "
42
- #define INV_ELEMENT "Invalid Element: "
43
-
44
- #define UTF8_STR "UTF-8"
45
-
46
- static void sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, SaxOptions options);
47
- static void parse(SaxDrive dr);
34
+ #define BAD_BOM "Bad BOM: "
35
+ #define NO_TERM "Not Terminated: "
36
+ #define INVALID_FORMAT "Invalid Format: "
37
+ #define CASE_ERROR "Case Error: "
38
+ #define OUT_OF_ORDER "Out of Order: "
39
+ #define WRONG_CHAR "Unexpected Character: "
40
+ #define EL_MISMATCH "Start End Mismatch: "
41
+ #define INV_ELEMENT "Invalid Element: "
42
+
43
+ #define UTF8_STR "UTF-8"
44
+
45
+ static void sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, SaxOptions options);
46
+ static void parse(SaxDrive dr);
48
47
  // All read functions should return the next character after the 'thing' that was read and leave dr->cur one after that.
49
- static char read_instruction(SaxDrive dr);
50
- static char read_doctype(SaxDrive dr);
51
- static char read_cdata(SaxDrive dr);
52
- static char read_comment(SaxDrive dr);
53
- static char read_element_start(SaxDrive dr);
54
- static char read_element_end(SaxDrive dr);
55
- static char read_text(SaxDrive dr);
56
- static char read_jump(SaxDrive dr, const char *pat);
57
- static char read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req, Hint h);
58
- static char read_name_token(SaxDrive dr);
59
- static char read_quoted_value(SaxDrive dr);
60
-
61
- static void end_element_cb(SaxDrive dr, VALUE name, long pos, long line, long col, Hint h);
62
-
63
- static void hint_clear_empty(SaxDrive dr);
64
- static Nv hint_try_close(SaxDrive dr, const char *name);
65
-
66
- VALUE ox_sax_value_class = Qnil;
48
+ static char read_instruction(SaxDrive dr);
49
+ static char read_doctype(SaxDrive dr);
50
+ static char read_cdata(SaxDrive dr);
51
+ static char read_comment(SaxDrive dr);
52
+ static char read_element_start(SaxDrive dr);
53
+ static char read_element_end(SaxDrive dr);
54
+ static char read_text(SaxDrive dr);
55
+ static char read_jump(SaxDrive dr, const char *pat);
56
+ static char read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req, Hint h);
57
+ static char read_name_token(SaxDrive dr);
58
+ static char read_quoted_value(SaxDrive dr);
59
+
60
+ static void hint_clear_empty(SaxDrive dr);
61
+ static Nv hint_try_close(SaxDrive dr, const char *name);
62
+
63
+ VALUE ox_sax_value_class = Qnil;
67
64
 
68
65
  static VALUE protect_parse(VALUE drp) {
69
66
  parse((SaxDrive)drp);
@@ -71,559 +68,561 @@ static VALUE protect_parse(VALUE drp) {
71
68
  return Qnil;
72
69
  }
73
70
 
74
- #if HAVE_RB_ENC_ASSOCIATE
75
- static int
76
- str_is_ascii(const char *s) {
77
- for (; '\0' != *s; s++) {
78
- if (*s < ' ' || '~' < *s) {
79
- return 0;
80
- }
81
- }
82
- return 1;
83
- }
84
- #endif
85
-
86
71
  VALUE
87
- str2sym(SaxDrive dr, const char *str, const char **strp) {
88
- VALUE *slot;
89
- VALUE sym;
72
+ str2sym(SaxDrive dr, const char *str, size_t len, const char **strp) {
73
+ VALUE sym;
90
74
 
91
75
  if (dr->options.symbolize) {
92
- if (Qundef == (sym = ox_cache_get(ox_symbol_cache, str, &slot, strp))) {
93
- #if HAVE_RB_ENC_ASSOCIATE
94
- if (0 != dr->encoding && !str_is_ascii(str)) {
95
- VALUE rstr = rb_str_new2(str);
96
-
97
- // TBD if sym can be pinned down then use this all the time
98
- rb_enc_associate(rstr, dr->encoding);
99
- sym = rb_funcall(rstr, ox_to_sym_id, 0);
100
- *slot = Qundef;
101
- } else {
102
- sym = ID2SYM(rb_intern(str));
103
- *slot = sym;
104
- }
105
- #else
106
- sym = ID2SYM(rb_intern(str));
107
- *slot = sym;
108
- #endif
109
- }
76
+ sym = ox_sym_intern(str, len, strp);
110
77
  } else {
111
- sym = rb_str_new2(str);
112
- #if HAVE_RB_ENC_ASSOCIATE
113
- if (0 != dr->encoding) {
114
- rb_enc_associate(sym, dr->encoding);
115
- }
116
- #endif
117
- if (0 != strp) {
118
- *strp = StringValuePtr(sym);
119
- }
78
+ sym = dr->get_name(str, len, dr->encoding, strp);
120
79
  }
121
80
  return sym;
122
81
  }
123
82
 
124
- void
125
- ox_sax_parse(VALUE handler, VALUE io, SaxOptions options) {
126
- struct _saxDrive dr;
127
- int line = 0;
83
+ void ox_sax_parse(VALUE handler, VALUE io, SaxOptions options) {
84
+ #if HAVE_RB_EXT_RACTOR_SAFE
85
+ rb_ext_ractor_safe(true);
86
+ #endif
87
+ struct _saxDrive dr;
88
+ int line = 0;
128
89
 
129
90
  sax_drive_init(&dr, handler, io, options);
130
- #if 0
131
- printf("*** sax_parse with these flags\n");
132
- printf(" has_instruct = %s\n", dr.has.instruct ? "true" : "false");
133
- printf(" has_end_instruct = %s\n", dr.has.end_instruct ? "true" : "false");
134
- printf(" has_attr = %s\n", dr.has.attr ? "true" : "false");
135
- printf(" has_attr_value = %s\n", dr.has.attr_value ? "true" : "false");
136
- printf(" has_attrs_done = %s\n", dr.has.attrs_done ? "true" : "false");
137
- printf(" has_doctype = %s\n", dr.has.doctype ? "true" : "false");
138
- printf(" has_comment = %s\n", dr.has.comment ? "true" : "false");
139
- printf(" has_cdata = %s\n", dr.has.cdata ? "true" : "false");
140
- printf(" has_text = %s\n", dr.has.text ? "true" : "false");
141
- printf(" has_value = %s\n", dr.has.value ? "true" : "false");
142
- printf(" has_start_element = %s\n", dr.has.start_element ? "true" : "false");
143
- printf(" has_end_element = %s\n", dr.has.end_element ? "true" : "false");
144
- printf(" has_error = %s\n", dr.has.error ? "true" : "false");
145
- printf(" has_pos = %s\n", dr.has.pos ? "true" : "false");
146
- printf(" has_line = %s\n", dr.has.line ? "true" : "false");
147
- printf(" has_column = %s\n", dr.has.column ? "true" : "false");
148
- #endif
149
- //parse(&dr);
150
91
  rb_protect(protect_parse, (VALUE)&dr, &line);
151
92
  ox_sax_drive_cleanup(&dr);
152
93
  if (0 != line) {
153
- rb_jump_tag(line);
94
+ rb_jump_tag(line);
95
+ }
96
+ }
97
+
98
+ static void set_long_noop(VALUE handler, long pos) {
99
+ }
100
+
101
+ static void set_pos(VALUE handler, long pos) {
102
+ rb_ivar_set(handler, ox_at_pos_id, LONG2NUM(pos));
103
+ }
104
+
105
+ static void set_line(VALUE handler, long line) {
106
+ rb_ivar_set(handler, ox_at_line_id, LONG2NUM(line));
107
+ }
108
+
109
+ static void set_col(VALUE handler, long col) {
110
+ rb_ivar_set(handler, ox_at_column_id, LONG2NUM(col));
111
+ }
112
+
113
+ static void attr_noop(SaxDrive dr, VALUE name, char *value, long pos, long line, long col) {
114
+ }
115
+
116
+ static void attr_text(SaxDrive dr, VALUE name, char *value, long pos, long line, long col) {
117
+ VALUE args[2];
118
+
119
+ args[0] = name;
120
+ if (dr->options.convert_special) {
121
+ ox_sax_collapse_special(dr, value, pos, line, col);
122
+ }
123
+ args[1] = rb_str_new2(value);
124
+ if (0 != dr->encoding) {
125
+ rb_enc_associate(args[1], dr->encoding);
126
+ }
127
+ dr->set_pos(dr->handler, pos);
128
+ dr->set_line(dr->handler, line);
129
+ dr->set_col(dr->handler, col);
130
+ rb_funcall2(dr->handler, ox_attr_id, 2, args);
131
+ }
132
+
133
+ static void attr_value(SaxDrive dr, VALUE name, char *value, long pos, long line, long col) {
134
+ VALUE args[2];
135
+
136
+ dr->set_pos(dr->handler, pos);
137
+ dr->set_line(dr->handler, line);
138
+ dr->set_col(dr->handler, col);
139
+ args[0] = name;
140
+ args[1] = dr->value_obj;
141
+ rb_funcall2(dr->handler, ox_attr_value_id, 2, args);
142
+ }
143
+
144
+ static void attrs_done_noop(VALUE handler) {
145
+ }
146
+
147
+ static void attrs_done(VALUE handler) {
148
+ rb_funcall(handler, ox_attrs_done_id, 0);
149
+ }
150
+
151
+ static VALUE instruct_noop(SaxDrive dr, const char *target, long pos, long line, long col) {
152
+ return Qnil;
153
+ }
154
+
155
+ static VALUE instruct(SaxDrive dr, const char *target, long pos, long line, long col) {
156
+ VALUE arg = rb_str_new2(target);
157
+
158
+ dr->set_pos(dr->handler, pos);
159
+ dr->set_line(dr->handler, line);
160
+ dr->set_col(dr->handler, col);
161
+ rb_funcall(dr->handler, ox_instruct_id, 1, arg);
162
+
163
+ return arg;
164
+ }
165
+
166
+ static VALUE instruct_just_value(SaxDrive dr, const char *target, long pos, long line, long col) {
167
+ return rb_str_new2(target);
168
+ }
169
+
170
+ static void end_instruct_noop(SaxDrive dr, VALUE target, long pos, long line, long col) {
171
+ }
172
+
173
+ static void end_instruct(SaxDrive dr, VALUE target, long pos, long line, long col) {
174
+ dr->set_pos(dr->handler, pos);
175
+ dr->set_line(dr->handler, line);
176
+ dr->set_col(dr->handler, col);
177
+ rb_funcall(dr->handler, ox_end_instruct_id, 1, target);
178
+ }
179
+
180
+ static void dr_loc_noop(SaxDrive dr, long pos, long line, long col) {
181
+ }
182
+
183
+ static void comment(SaxDrive dr, long pos, long line, long col) {
184
+ if (!dr->blocked) {
185
+ Nv parent = stack_peek(&dr->stack);
186
+ Hint h = ox_hint_find(dr->options.hints, "!--");
187
+
188
+ if (NULL == parent || NULL == parent->hint || OffOverlay != parent->hint->overlay ||
189
+ (NULL != h && (ActiveOverlay == h->overlay || ActiveOverlay == h->overlay))) {
190
+ VALUE arg = rb_str_new2(dr->buf.str);
191
+
192
+ if (0 != dr->encoding) {
193
+ rb_enc_associate(arg, dr->encoding);
194
+ }
195
+ dr->set_pos(dr->handler, pos);
196
+ dr->set_line(dr->handler, line);
197
+ dr->set_col(dr->handler, col);
198
+ rb_funcall(dr->handler, ox_comment_id, 1, arg);
199
+ }
200
+ }
201
+ }
202
+
203
+ static void cdata(SaxDrive dr, long pos, long line, long col) {
204
+ Nv parent = stack_peek(&dr->stack);
205
+
206
+ if (!dr->blocked && (NULL == parent || NULL == parent->hint || OffOverlay != parent->hint->overlay)) {
207
+ VALUE arg = rb_str_new2(dr->buf.str);
208
+
209
+ if (0 != dr->encoding) {
210
+ rb_enc_associate(arg, dr->encoding);
211
+ }
212
+ dr->set_pos(dr->handler, pos);
213
+ dr->set_line(dr->handler, line);
214
+ dr->set_col(dr->handler, col);
215
+ rb_funcall(dr->handler, ox_cdata_id, 1, arg);
216
+ }
217
+ }
218
+
219
+ static void doctype(SaxDrive dr, long pos, long line, long col) {
220
+ dr->set_pos(dr->handler, pos);
221
+ dr->set_line(dr->handler, line);
222
+ dr->set_col(dr->handler, col);
223
+ rb_funcall(dr->handler, ox_doctype_id, 1, rb_str_new2(dr->buf.str));
224
+ }
225
+
226
+ static void error_noop(SaxDrive dr, const char *msg, long pos, long line, long col) {
227
+ }
228
+
229
+ static void error(SaxDrive dr, const char *msg, long pos, long line, long col) {
230
+ VALUE args[3];
231
+
232
+ args[0] = rb_str_new2(msg);
233
+ args[1] = LONG2NUM(line);
234
+ args[2] = LONG2NUM(col);
235
+ dr->set_pos(dr->handler, pos);
236
+ dr->set_line(dr->handler, line);
237
+ dr->set_col(dr->handler, col);
238
+ rb_funcall2(dr->handler, ox_error_id, 3, args);
239
+ }
240
+
241
+ static void end_element_cb(SaxDrive dr, VALUE name, long pos, long line, long col, Hint h) {
242
+ if (dr->has_end_element && 0 >= dr->blocked &&
243
+ (NULL == h || ActiveOverlay == h->overlay || NestOverlay == h->overlay)) {
244
+ dr->set_pos(dr->handler, pos);
245
+ dr->set_line(dr->handler, line);
246
+ dr->set_col(dr->handler, col);
247
+ rb_funcall(dr->handler, ox_end_element_id, 1, name);
248
+ }
249
+ if (NULL != h && BlockOverlay == h->overlay && 0 < dr->blocked) {
250
+ dr->blocked--;
154
251
  }
155
252
  }
156
253
 
157
- static void
158
- sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, SaxOptions options) {
254
+ static void sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, SaxOptions options) {
159
255
  ox_sax_buf_init(&dr->buf, io);
160
256
  dr->buf.dr = dr;
161
257
  stack_init(&dr->stack);
162
- dr->handler = handler;
258
+ dr->handler = handler;
163
259
  dr->value_obj = Data_Wrap_Struct(ox_sax_value_class, 0, 0, dr);
164
260
  rb_gc_register_address(&dr->value_obj);
165
261
  dr->options = *options;
166
- dr->err = 0;
262
+ dr->err = 0;
167
263
  dr->blocked = 0;
168
- dr->abort = false;
169
- has_init(&dr->has, handler);
170
- #if HAVE_RB_ENC_FIND
264
+ dr->abort = false;
265
+
266
+ dr->set_pos = (Qtrue == rb_ivar_defined(handler, ox_at_pos_id)) ? set_pos : set_long_noop;
267
+ dr->set_line = (Qtrue == rb_ivar_defined(handler, ox_at_line_id)) ? set_line : set_long_noop;
268
+ dr->set_col = (Qtrue == rb_ivar_defined(handler, ox_at_column_id)) ? set_col : set_long_noop;
269
+ if (rb_respond_to(handler, ox_attr_value_id)) {
270
+ dr->attr_cb = attr_value;
271
+ dr->want_attr_name = true;
272
+ } else if (rb_respond_to(handler, ox_attr_id)) {
273
+ dr->attr_cb = attr_text;
274
+ dr->want_attr_name = true;
275
+ } else {
276
+ dr->attr_cb = attr_noop;
277
+ dr->want_attr_name = false;
278
+ }
279
+ dr->attrs_done = rb_respond_to(handler, ox_attrs_done_id) ? attrs_done : attrs_done_noop;
280
+ dr->instruct = rb_respond_to(handler, ox_instruct_id) ? instruct : instruct_noop;
281
+ dr->end_instruct = rb_respond_to(handler, ox_end_instruct_id) ? end_instruct : end_instruct_noop;
282
+ if (rb_respond_to(handler, ox_end_instruct_id) && !rb_respond_to(handler, ox_instruct_id)) {
283
+ dr->instruct = instruct_just_value;
284
+ }
285
+ dr->doctype = rb_respond_to(handler, ox_doctype_id) ? doctype : dr_loc_noop;
286
+ dr->comment = rb_respond_to(handler, ox_comment_id) ? comment : dr_loc_noop;
287
+ dr->cdata = rb_respond_to(handler, ox_cdata_id) ? cdata : dr_loc_noop;
288
+ dr->error = rb_respond_to(handler, ox_error_id) ? error : error_noop;
289
+
290
+ dr->has_text = rb_respond_to(handler, ox_text_id);
291
+ dr->has_value = rb_respond_to(handler, ox_value_id);
292
+ dr->has_start_element = rb_respond_to(handler, ox_start_element_id);
293
+ dr->has_end_element = rb_respond_to(handler, ox_end_element_id);
294
+
171
295
  if ('\0' == *ox_default_options.encoding) {
172
- VALUE encoding;
173
-
174
- dr->encoding = 0;
175
- if (rb_respond_to(io, ox_external_encoding_id) && Qnil != (encoding = rb_funcall(io, ox_external_encoding_id, 0))) {
176
- int e = rb_enc_get_index(encoding);
177
- if (0 <= e) {
178
- dr->encoding = rb_enc_from_index(e);
179
- }
180
- }
296
+ VALUE encoding;
297
+
298
+ dr->encoding = 0;
299
+ if (rb_respond_to(io, ox_external_encoding_id) &&
300
+ Qnil != (encoding = rb_funcall(io, ox_external_encoding_id, 0))) {
301
+ int e = rb_enc_get_index(encoding);
302
+ if (0 <= e) {
303
+ dr->encoding = rb_enc_from_index(e);
304
+ }
305
+ }
181
306
  } else {
182
307
  dr->encoding = rb_enc_find(ox_default_options.encoding);
183
308
  }
184
- #else
185
- dr->encoding = 0;
186
- #endif
309
+ dr->utf8 = (NULL == dr->encoding || rb_utf8_encoding() == dr->encoding);
310
+ if (NULL == dr->encoding || rb_utf8_encoding() == dr->encoding) { // UTF-8
311
+ dr->get_name = dr->options.symbolize ? ox_utf8_sym : ox_utf8_name; // TBD UTF8 sym?
312
+ } else {
313
+ dr->get_name = dr->options.symbolize ? ox_enc_sym : ox_enc_name;
314
+ }
187
315
  }
188
316
 
189
- void
190
- ox_sax_drive_cleanup(SaxDrive dr) {
317
+ void ox_sax_drive_cleanup(SaxDrive dr) {
191
318
  rb_gc_unregister_address(&dr->value_obj);
192
319
  buf_cleanup(&dr->buf);
193
320
  stack_cleanup(&dr->stack);
194
321
  }
195
322
 
196
- static void
197
- ox_sax_drive_error_at(SaxDrive dr, const char *msg, off_t pos, off_t line, off_t col) {
198
- if (dr->has.error) {
199
- VALUE args[3];
200
-
201
- args[0] = rb_str_new2(msg);
202
- args[1] = LONG2NUM(line);
203
- args[2] = LONG2NUM(col);
204
- if (dr->has.pos) {
205
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
206
- }
207
- if (dr->has.pos) {
208
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
209
- }
210
- if (dr->has.line) {
211
- rb_ivar_set(dr->handler, ox_at_line_id, args[1]);
212
- }
213
- if (dr->has.column) {
214
- rb_ivar_set(dr->handler, ox_at_column_id, args[2]);
215
- }
216
- rb_funcall2(dr->handler, ox_error_id, 3, args);
217
- }
323
+ static void ox_sax_drive_error_at(SaxDrive dr, const char *msg, off_t pos, off_t line, off_t col) {
324
+ dr->error(dr, msg, pos, line, col);
218
325
  }
219
326
 
220
- void
221
- ox_sax_drive_error(SaxDrive dr, const char *msg) {
327
+ void ox_sax_drive_error(SaxDrive dr, const char *msg) {
222
328
  ox_sax_drive_error_at(dr, msg, dr->buf.pos, dr->buf.line, dr->buf.col);
223
329
  }
224
330
 
225
- static char
226
- skipBOM(SaxDrive dr) {
227
- char c = buf_get(&dr->buf);
331
+ static char skipBOM(SaxDrive dr) {
332
+ char c = buf_get(&dr->buf);
228
333
 
229
334
  if (0xEF == (uint8_t)c) { /* only UTF8 is supported */
230
- if (0xBB == (uint8_t)buf_get(&dr->buf) && 0xBF == (uint8_t)buf_get(&dr->buf)) {
231
- #if HAVE_RB_ENC_FIND
232
- dr->encoding = ox_utf8_encoding;
233
- #else
234
- dr->encoding = UTF8_STR;
235
- #endif
236
- c = buf_get(&dr->buf);
237
- } else {
238
- ox_sax_drive_error(dr, BAD_BOM "invalid BOM or a binary file.");
239
- c = '\0';
240
- }
335
+ if (0xBB == (uint8_t)buf_get(&dr->buf) && 0xBF == (uint8_t)buf_get(&dr->buf)) {
336
+ dr->encoding = ox_utf8_encoding;
337
+ c = buf_get(&dr->buf);
338
+ } else {
339
+ ox_sax_drive_error(dr, BAD_BOM "invalid BOM or a binary file.");
340
+ c = '\0';
341
+ }
241
342
  }
242
343
  return c;
243
344
  }
244
345
 
245
- static void
246
- parse(SaxDrive dr) {
247
- char c = skipBOM(dr);
248
- int state = START_STATE;
249
- Nv parent;
346
+ static void parse(SaxDrive dr) {
347
+ char c = skipBOM(dr);
348
+ int state = START_STATE;
349
+ Nv parent;
250
350
 
251
351
  while ('\0' != c) {
252
- buf_protect(&dr->buf);
253
- if ('<' == c) {
254
- c = buf_get(&dr->buf);
255
- switch (c) {
256
- case '?': /* instructions (xml or otherwise) */
257
- c = read_instruction(dr);
258
- break;
259
- case '!': /* comment or doctype */
260
- buf_protect(&dr->buf);
261
- c = buf_get(&dr->buf);
262
- if ('\0' == c) {
263
- ox_sax_drive_error(dr, NO_TERM "DOCTYPE or comment not terminated");
264
-
265
- goto DONE;
266
- } else if ('-' == c) {
267
- c = buf_get(&dr->buf); /* skip first - and get next character */
268
- if ('-' != c) {
269
- ox_sax_drive_error(dr, INVALID_FORMAT "bad comment format, expected <!--");
270
- } else {
271
- c = buf_get(&dr->buf); /* skip second - */
272
- }
273
- c = read_comment(dr);
274
- } else {
275
- int i;
276
- int spaced = 0;
277
- off_t pos = dr->buf.pos + 1;
278
- off_t line = dr->buf.line;
279
- off_t col = dr->buf.col + 1;
280
-
281
- if (is_white(c)) {
282
- spaced = 1;
283
- c = buf_next_non_white(&dr->buf);
284
- }
285
- dr->buf.str = dr->buf.tail - 1;
286
- for (i = 7; 0 < i; i--) {
287
- c = buf_get(&dr->buf);
288
- }
289
- if (0 == strncmp("DOCTYPE", dr->buf.str, 7)) {
290
- if (spaced) {
291
- ox_sax_drive_error_at(dr, WRONG_CHAR "<!DOCTYPE can not included spaces", pos, line, col);
292
- }
293
- if (START_STATE != state) {
294
- ox_sax_drive_error(dr, OUT_OF_ORDER "DOCTYPE can not come after an element");
295
- }
296
- c = read_doctype(dr);
297
- } else if (0 == strncasecmp("DOCTYPE", dr->buf.str, 7)) {
298
- if (!dr->options.smart) {
299
- ox_sax_drive_error(dr, CASE_ERROR "expected DOCTYPE all in caps");
300
- }
301
- if (START_STATE != state) {
302
- ox_sax_drive_error(dr, OUT_OF_ORDER "DOCTYPE can not come after an element");
303
- }
304
- c = read_doctype(dr);
305
- } else if (0 == strncmp("[CDATA[", dr->buf.str, 7)) {
306
- if (spaced) {
307
- ox_sax_drive_error_at(dr, WRONG_CHAR "<![CDATA[ can not included spaces", pos, line, col);
308
- }
309
- c = read_cdata(dr);
310
- } else if (0 == strncasecmp("[CDATA[", dr->buf.str, 7)) {
311
- if (!dr->options.smart) {
312
- ox_sax_drive_error(dr, CASE_ERROR "expected CDATA all in caps");
313
- }
314
- c = read_cdata(dr);
315
- } else {
316
- Nv parent = stack_peek(&dr->stack);
317
-
318
- if (0 != parent) {
319
- parent->childCnt++;
320
- }
321
- ox_sax_drive_error_at(dr, WRONG_CHAR "DOCTYPE, CDATA, or comment expected", pos, line, col);
322
- c = read_name_token(dr);
323
- if ('>' == c) {
324
- c = buf_get(&dr->buf);
325
- }
326
- }
327
- }
328
- break;
329
- case '/': /* element end */
330
- parent = stack_peek(&dr->stack);
331
- if (0 != parent && 0 == parent->childCnt && dr->has.text && !dr->blocked) {
332
- VALUE args[1];
333
- off_t pos = dr->buf.pos;
334
- off_t line = dr->buf.line;
335
- off_t col = dr->buf.col - 1;
336
-
337
- args[0] = rb_str_new2("");
338
- #if HAVE_RB_ENC_ASSOCIATE
339
- if (0 != dr->encoding) {
340
- rb_enc_associate(args[0], dr->encoding);
341
- }
342
- #endif
343
- if (dr->has.pos) {
344
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
345
- }
346
- if (dr->has.line) {
347
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
348
- }
349
- if (dr->has.column) {
350
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
351
- }
352
- rb_funcall2(dr->handler, ox_text_id, 1, args);
353
- }
354
- c = read_element_end(dr);
355
- if (0 == stack_peek(&dr->stack)) {
356
- state = AFTER_STATE;
357
- }
358
- break;
359
- case '\0':
360
- goto DONE;
361
- default:
362
- buf_backup(&dr->buf);
363
- if (AFTER_STATE == state) {
364
- ox_sax_drive_error(dr, OUT_OF_ORDER "multiple top level elements");
365
- }
366
- state = BODY_STATE;
367
- c = read_element_start(dr);
368
- if (0 == stack_peek(&dr->stack)) {
369
- state = AFTER_STATE;
370
- }
371
- break;
372
- }
373
- } else {
374
- buf_reset(&dr->buf);
375
- c = read_text(dr);
376
- }
377
- }
378
- DONE:
352
+ buf_protect(&dr->buf);
353
+ if ('<' == c) {
354
+ c = buf_get(&dr->buf);
355
+ switch (c) {
356
+ case '?': /* instructions (xml or otherwise) */ c = read_instruction(dr); break;
357
+ case '!': /* comment or doctype */
358
+ buf_protect(&dr->buf);
359
+ c = buf_get(&dr->buf);
360
+ if ('\0' == c) {
361
+ ox_sax_drive_error(dr, NO_TERM "DOCTYPE or comment not terminated");
362
+
363
+ goto DONE;
364
+ } else if ('-' == c) {
365
+ c = buf_get(&dr->buf); /* skip first - and get next character */
366
+ if ('-' != c) {
367
+ ox_sax_drive_error(dr, INVALID_FORMAT "bad comment format, expected <!--");
368
+ } else {
369
+ c = buf_get(&dr->buf); /* skip second - */
370
+ }
371
+ c = read_comment(dr);
372
+ } else {
373
+ int i;
374
+ int spaced = 0;
375
+ off_t pos = dr->buf.pos + 1;
376
+ off_t line = dr->buf.line;
377
+ off_t col = dr->buf.col + 1;
378
+
379
+ if (is_white(c)) {
380
+ spaced = 1;
381
+ c = buf_next_non_white(&dr->buf);
382
+ }
383
+ dr->buf.str = dr->buf.tail - 1;
384
+ for (i = 7; 0 < i; i--) {
385
+ c = buf_get(&dr->buf);
386
+ }
387
+ if (0 == strncmp("DOCTYPE", dr->buf.str, 7)) {
388
+ if (spaced) {
389
+ ox_sax_drive_error_at(dr, WRONG_CHAR "<!DOCTYPE can not included spaces", pos, line, col);
390
+ }
391
+ if (START_STATE != state) {
392
+ ox_sax_drive_error(dr, OUT_OF_ORDER "DOCTYPE can not come after an element");
393
+ }
394
+ c = read_doctype(dr);
395
+ } else if (0 == strncasecmp("DOCTYPE", dr->buf.str, 7)) {
396
+ if (!dr->options.smart) {
397
+ ox_sax_drive_error(dr, CASE_ERROR "expected DOCTYPE all in caps");
398
+ }
399
+ if (START_STATE != state) {
400
+ ox_sax_drive_error(dr, OUT_OF_ORDER "DOCTYPE can not come after an element");
401
+ }
402
+ c = read_doctype(dr);
403
+ } else if (0 == strncmp("[CDATA[", dr->buf.str, 7)) {
404
+ if (spaced) {
405
+ ox_sax_drive_error_at(dr, WRONG_CHAR "<![CDATA[ can not included spaces", pos, line, col);
406
+ }
407
+ c = read_cdata(dr);
408
+ } else if (0 == strncasecmp("[CDATA[", dr->buf.str, 7)) {
409
+ if (!dr->options.smart) {
410
+ ox_sax_drive_error(dr, CASE_ERROR "expected CDATA all in caps");
411
+ }
412
+ c = read_cdata(dr);
413
+ } else {
414
+ Nv parent = stack_peek(&dr->stack);
415
+
416
+ if (0 != parent) {
417
+ parent->childCnt++;
418
+ }
419
+ ox_sax_drive_error_at(dr, WRONG_CHAR "DOCTYPE, CDATA, or comment expected", pos, line, col);
420
+ c = read_name_token(dr);
421
+ if ('>' == c) {
422
+ c = buf_get(&dr->buf);
423
+ }
424
+ }
425
+ }
426
+ break;
427
+ case '/': /* element end */
428
+ parent = stack_peek(&dr->stack);
429
+ if (0 != parent && 0 == parent->childCnt && dr->has_text && !dr->blocked) {
430
+ VALUE args[1];
431
+ args[0] = rb_str_new2("");
432
+ if (0 != dr->encoding) {
433
+ rb_enc_associate(args[0], dr->encoding);
434
+ }
435
+ dr->set_pos(dr->handler, dr->buf.pos);
436
+ dr->set_line(dr->handler, dr->buf.line);
437
+ dr->set_col(dr->handler, dr->buf.col);
438
+ rb_funcall2(dr->handler, ox_text_id, 1, args);
439
+ }
440
+ c = read_element_end(dr);
441
+ if (0 == stack_peek(&dr->stack)) {
442
+ state = AFTER_STATE;
443
+ }
444
+ break;
445
+ case '\0': goto DONE;
446
+ default:
447
+ buf_backup(&dr->buf);
448
+ if (AFTER_STATE == state) {
449
+ ox_sax_drive_error(dr, OUT_OF_ORDER "multiple top level elements");
450
+ }
451
+ state = BODY_STATE;
452
+ c = read_element_start(dr);
453
+ if (0 == stack_peek(&dr->stack)) {
454
+ state = AFTER_STATE;
455
+ }
456
+ break;
457
+ }
458
+ } else {
459
+ buf_reset(&dr->buf);
460
+ c = read_text(dr);
461
+ }
462
+ }
463
+ DONE:
379
464
  if (dr->abort) {
380
- return;
465
+ return;
381
466
  }
382
467
  if (dr->stack.head < dr->stack.tail) {
383
- char msg[256];
384
- Nv sp;
385
-
386
- if (dr->has.pos) {
387
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(dr->buf.pos));
388
- }
389
- if (dr->has.line) {
390
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(dr->buf.line));
391
- }
392
- if (dr->has.column) {
393
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(dr->buf.col));
394
- }
395
- for (sp = dr->stack.tail - 1; dr->stack.head <= sp; sp--) {
396
- snprintf(msg, sizeof(msg) - 1, "%selement '%s' not closed", EL_MISMATCH, sp->name);
397
- ox_sax_drive_error_at(dr, msg, dr->buf.pos, dr->buf.line, dr->buf.col);
398
- if (dr->has.end_element && 0 >= dr->blocked &&
399
- (NULL == sp->hint || ActiveOverlay == sp->hint->overlay || NestOverlay == sp->hint->overlay)) {
400
- VALUE args[1];
401
-
402
- args[0] = sp->val;
403
- rb_funcall2(dr->handler, ox_end_element_id, 1, args);
404
- }
405
- if (dr->blocked && NULL != sp->hint && BlockOverlay == sp->hint->overlay) {
406
- dr->blocked--;
407
- }
468
+ char msg[256];
469
+ Nv sp;
470
+
471
+ for (sp = dr->stack.tail - 1; dr->stack.head <= sp; sp--) {
472
+ snprintf(msg, sizeof(msg) - 1, "%selement '%s' not closed", EL_MISMATCH, sp->name);
473
+ ox_sax_drive_error_at(dr, msg, dr->buf.pos, dr->buf.line, dr->buf.col);
474
+ end_element_cb(dr, sp->val, dr->buf.pos, dr->buf.line, dr->buf.col, sp->hint);
408
475
  }
409
476
  }
410
477
  }
411
478
 
412
- static void
413
- read_content(SaxDrive dr, char *content, size_t len) {
414
- char c;
415
- char *end = content + len;
479
+ static void read_content(SaxDrive dr, char *content, size_t len) {
480
+ char c;
481
+ char *end = content + len;
416
482
 
417
483
  while ('\0' != (c = buf_get(&dr->buf))) {
418
- if (end <= content) {
419
- *content = '\0';
420
- ox_sax_drive_error(dr, "processing instruction content too large");
421
- return;
422
- }
423
- if ('?' == c) {
424
- if ('\0' == (c = buf_get(&dr->buf))) {
425
- ox_sax_drive_error(dr, NO_TERM "document not terminated");
426
- }
427
- if ('>' == c) {
428
- *content = '\0';
429
- return;
430
- } else {
431
- *content++ = c;
432
- }
433
- } else {
434
- *content++ = c;
435
- }
484
+ if (end <= content) {
485
+ *content = '\0';
486
+ ox_sax_drive_error(dr, "processing instruction content too large");
487
+ return;
488
+ }
489
+ if ('?' == c) {
490
+ if ('\0' == (c = buf_get(&dr->buf))) {
491
+ ox_sax_drive_error(dr, NO_TERM "document not terminated");
492
+ }
493
+ if ('>' == c) {
494
+ *content = '\0';
495
+ return;
496
+ } else {
497
+ *content++ = c;
498
+ }
499
+ } else {
500
+ *content++ = c;
501
+ }
436
502
  }
437
503
  *content = '\0';
438
504
  }
439
505
 
440
506
  /* Entered after the "<?" sequence. Ready to read the rest.
441
507
  */
442
- static char
443
- read_instruction(SaxDrive dr) {
444
- char content[4096];
445
- char c;
446
- int coff;
447
- VALUE target = Qnil;
448
- int is_xml;
449
- off_t pos = dr->buf.pos - 1;
450
- off_t line = dr->buf.line;
451
- off_t col = dr->buf.col - 1;
508
+ static char read_instruction(SaxDrive dr) {
509
+ char content[4096];
510
+ char c;
511
+ int coff;
512
+ VALUE target = Qnil;
513
+ int is_xml;
514
+ off_t pos = dr->buf.pos - 1;
515
+ off_t line = dr->buf.line;
516
+ off_t col = dr->buf.col - 1;
452
517
 
453
518
  buf_protect(&dr->buf);
454
519
  if ('\0' == (c = read_name_token(dr))) {
455
520
  return c;
456
521
  }
457
522
  is_xml = (0 == (dr->options.smart ? strcasecmp("xml", dr->buf.str) : strcmp("xml", dr->buf.str)));
458
- if (dr->has.instruct || dr->has.end_instruct) {
459
- target = rb_str_new2(dr->buf.str);
460
- }
461
- if (dr->has.instruct) {
462
- VALUE args[1];
463
-
464
- if (dr->has.pos) {
465
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
466
- }
467
- if (dr->has.line) {
468
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
469
- }
470
- if (dr->has.column) {
471
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
472
- }
473
- args[0] = target;
474
- rb_funcall2(dr->handler, ox_instruct_id, 1, args);
475
- }
523
+
524
+ target = dr->instruct(dr, dr->buf.str, pos, line, col);
476
525
  buf_protect(&dr->buf);
477
- pos = dr->buf.pos;
526
+ pos = dr->buf.pos;
478
527
  line = dr->buf.line;
479
- col = dr->buf.col;
528
+ col = dr->buf.col;
480
529
  read_content(dr, content, sizeof(content) - 1);
481
530
  coff = (int)(dr->buf.tail - dr->buf.head);
482
531
  buf_reset(&dr->buf);
483
532
  dr->err = 0;
484
- c = read_attrs(dr, c, '?', '?', is_xml, 1, NULL);
485
- if (dr->has.attrs_done) {
486
- rb_funcall(dr->handler, ox_attrs_done_id, 0);
487
- }
533
+ c = read_attrs(dr, c, '?', '?', is_xml, 1, NULL);
534
+ dr->attrs_done(dr->handler);
488
535
  if (dr->err) {
489
- if (dr->has.text) {
490
- VALUE args[1];
491
-
492
- if (dr->options.convert_special) {
493
- ox_sax_collapse_special(dr, content, (int)pos, (int)line, (int)col);
494
- }
495
- args[0] = rb_str_new2(content);
496
- #if HAVE_RB_ENC_ASSOCIATE
497
- if (0 != dr->encoding) {
498
- rb_enc_associate(args[0], dr->encoding);
499
- }
500
- #endif
501
- if (dr->has.line) {
502
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
503
- }
504
- if (dr->has.pos) {
505
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
506
- }
507
- if (dr->has.column) {
508
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
509
- }
510
- rb_funcall2(dr->handler, ox_text_id, 1, args);
511
- }
512
- dr->buf.tail = dr->buf.head + coff;
513
- c = buf_get(&dr->buf);
536
+ if (dr->has_text) {
537
+ VALUE args[1];
538
+
539
+ if (dr->options.convert_special) {
540
+ ox_sax_collapse_special(dr, content, (int)pos, (int)line, (int)col);
541
+ }
542
+ args[0] = rb_str_new2(content);
543
+ if (0 != dr->encoding) {
544
+ rb_enc_associate(args[0], dr->encoding);
545
+ }
546
+ dr->set_pos(dr->handler, pos);
547
+ dr->set_line(dr->handler, line);
548
+ dr->set_col(dr->handler, col);
549
+ rb_funcall2(dr->handler, ox_text_id, 1, args);
550
+ }
551
+ dr->buf.tail = dr->buf.head + coff;
552
+ c = buf_get(&dr->buf);
514
553
  } else {
515
- pos = dr->buf.pos;
516
- line = dr->buf.line;
517
- col = dr->buf.col;
518
- c = buf_next_non_white(&dr->buf);
519
- if ('>' == c) {
520
- c = buf_get(&dr->buf);
521
- } else {
522
- ox_sax_drive_error_at(dr, NO_TERM "instruction not terminated", pos, line, col);
523
- if ('>' == c) {
524
- c = buf_get(&dr->buf);
525
- }
526
- }
527
- }
528
- if (dr->has.end_instruct) {
529
- VALUE args[1];
530
-
531
- if (dr->has.pos) {
532
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
533
- }
534
- if (dr->has.line) {
535
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
536
- }
537
- if (dr->has.column) {
538
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
539
- }
540
- args[0] = target;
541
- rb_funcall2(dr->handler, ox_end_instruct_id, 1, args);
554
+ pos = dr->buf.pos;
555
+ line = dr->buf.line;
556
+ col = dr->buf.col;
557
+ c = buf_next_non_white(&dr->buf);
558
+ if ('>' == c) {
559
+ c = buf_get(&dr->buf);
560
+ } else {
561
+ ox_sax_drive_error_at(dr, NO_TERM "instruction not terminated", pos, line, col);
562
+ if ('>' == c) {
563
+ c = buf_get(&dr->buf);
564
+ }
565
+ }
542
566
  }
543
- dr->buf.str = 0;
567
+ dr->end_instruct(dr, target, pos, line, col);
568
+ dr->buf.str = NULL;
544
569
 
545
570
  return c;
546
571
  }
547
572
 
548
- static char
549
- read_delimited(SaxDrive dr, char end) {
550
- char c;
573
+ static char read_delimited(SaxDrive dr, char end) {
574
+ char c;
551
575
 
552
576
  if ('"' == end || '\'' == end) {
553
- while (end != (c = buf_get(&dr->buf))) {
554
- if ('\0' == c) {
555
- ox_sax_drive_error(dr, NO_TERM "doctype not terminated");
556
- return c;
557
- }
558
- }
577
+ while (end != (c = buf_get(&dr->buf))) {
578
+ if ('\0' == c) {
579
+ ox_sax_drive_error(dr, NO_TERM "doctype not terminated");
580
+ return c;
581
+ }
582
+ }
559
583
  } else {
560
- while (1) {
561
- c = buf_get(&dr->buf);
562
- if (end == c) {
563
- return c;
564
- }
565
- switch (c) {
566
- case '\0':
567
- ox_sax_drive_error(dr, NO_TERM "doctype not terminated");
568
- return c;
569
- case '"':
570
- c = read_delimited(dr, c);
571
- break;
572
- case '\'':
573
- c = read_delimited(dr, c);
574
- break;
575
- case '[':
576
- c = read_delimited(dr, ']');
577
- break;
578
- case '<':
579
- c = read_delimited(dr, '>');
580
- break;
581
- default:
582
- break;
583
- }
584
- }
584
+ while (1) {
585
+ c = buf_get(&dr->buf);
586
+ if (end == c) {
587
+ return c;
588
+ }
589
+ switch (c) {
590
+ case '\0': ox_sax_drive_error(dr, NO_TERM "doctype not terminated"); return c;
591
+ case '"': c = read_delimited(dr, c); break;
592
+ case '\'': c = read_delimited(dr, c); break;
593
+ case '[': c = read_delimited(dr, ']'); break;
594
+ case '<': c = read_delimited(dr, '>'); break;
595
+ default: break;
596
+ }
597
+ }
585
598
  }
586
599
  return c;
587
600
  }
588
601
 
589
602
  /* Entered after the "<!DOCTYPE " sequence. Ready to read the rest.
590
603
  */
591
- static char
592
- read_doctype(SaxDrive dr) {
593
- long pos = (long)(dr->buf.pos - 9);
594
- long line = (long)(dr->buf.line);
595
- long col = (long)(dr->buf.col - 9);
596
- char *s;
597
- Nv parent = stack_peek(&dr->stack);
604
+ static char read_doctype(SaxDrive dr) {
605
+ long pos = (long)(dr->buf.pos - 9);
606
+ long line = (long)(dr->buf.line);
607
+ long col = (long)(dr->buf.col - 9);
608
+ char *s;
609
+ Nv parent = stack_peek(&dr->stack);
598
610
 
599
611
  buf_backup(&dr->buf); /* back up to the start in case the doctype is empty */
600
612
  buf_protect(&dr->buf);
601
613
  read_delimited(dr, '>');
602
614
  if (dr->options.smart && 0 == dr->options.hints) {
603
- for (s = dr->buf.str; is_white(*s); s++) { }
604
- if (0 == strncasecmp("HTML", s, 4)) {
605
- dr->options.hints = ox_hints_html();
606
- }
615
+ for (s = dr->buf.str; is_white(*s); s++) {
616
+ }
617
+ if (0 == strncasecmp("HTML", s, 4)) {
618
+ dr->options.hints = ox_hints_html();
619
+ }
607
620
  }
608
621
  *(dr->buf.tail - 1) = '\0';
609
622
  if (0 != parent) {
610
- parent->childCnt++;
611
- }
612
- if (dr->has.doctype) {
613
- VALUE args[1];
614
-
615
- if (dr->has.pos) {
616
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
617
- }
618
- if (dr->has.line) {
619
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
620
- }
621
- if (dr->has.column) {
622
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
623
- }
624
- args[0] = rb_str_new2(dr->buf.str);
625
- rb_funcall2(dr->handler, ox_doctype_id, 1, args);
623
+ parent->childCnt++;
626
624
  }
625
+ dr->doctype(dr, pos, line, col);
627
626
  dr->buf.str = 0;
628
627
 
629
628
  return buf_get(&dr->buf);
@@ -631,89 +630,65 @@ read_doctype(SaxDrive dr) {
631
630
 
632
631
  /* Entered after the "<![CDATA[" sequence. Ready to read the rest.
633
632
  */
634
- static char
635
- read_cdata(SaxDrive dr) {
636
- char c;
637
- char zero = '\0';
638
- int end = 0;
639
- long pos = (long)(dr->buf.pos - 9);
640
- long line = (long)(dr->buf.line);
641
- long col = (long)(dr->buf.col - 9);
642
- struct _checkPt cp = CHECK_PT_INIT;
643
- Nv parent = stack_peek(&dr->stack);
633
+ static char read_cdata(SaxDrive dr) {
634
+ char c;
635
+ char zero = '\0';
636
+ int end = 0;
637
+ long pos = (long)(dr->buf.pos - 9);
638
+ long line = (long)(dr->buf.line);
639
+ long col = (long)(dr->buf.col - 9);
640
+ struct _checkPt cp = CHECK_PT_INIT;
641
+ Nv parent = stack_peek(&dr->stack);
644
642
 
645
643
  // TBD check parent overlay
646
644
  if (0 != parent) {
647
- parent->childCnt++;
645
+ parent->childCnt++;
648
646
  }
649
647
  buf_backup(&dr->buf); /* back up to the start in case the cdata is empty */
650
648
  buf_protect(&dr->buf);
651
649
  while (1) {
652
650
  c = buf_get(&dr->buf);
653
- switch (c) {
654
- case ']':
655
- end++;
656
- break;
657
- case '>':
651
+ switch (c) {
652
+ case ']': end++; break;
653
+ case '>':
658
654
  if (2 <= end) {
659
655
  *(dr->buf.tail - 3) = '\0';
660
- c = buf_get(&dr->buf);
656
+ c = buf_get(&dr->buf);
661
657
  goto CB;
662
658
  }
663
- if (!buf_checkset(&cp)) {
664
- buf_checkpoint(&dr->buf, &cp);
665
- }
659
+ if (!buf_checkset(&cp)) {
660
+ buf_checkpoint(&dr->buf, &cp);
661
+ }
662
+ end = 0;
663
+ break;
664
+ case '<':
665
+ if (!buf_checkset(&cp)) {
666
+ buf_checkpoint(&dr->buf, &cp);
667
+ }
666
668
  end = 0;
667
- break;
668
- case '<':
669
- if (!buf_checkset(&cp)) {
670
- buf_checkpoint(&dr->buf, &cp);
671
- }
672
- end = 0;
673
- break;
674
- case '\0':
675
- if (buf_checkset(&cp)) {
676
- c = buf_checkback(&dr->buf, &cp);
677
- ox_sax_drive_error(dr, NO_TERM "CDATA not terminated");
678
- zero = c;
679
- *(dr->buf.tail - 1) = '\0';
680
- goto CB;
681
- }
669
+ break;
670
+ case '\0':
671
+ if (buf_checkset(&cp)) {
672
+ c = buf_checkback(&dr->buf, &cp);
673
+ ox_sax_drive_error(dr, NO_TERM "CDATA not terminated");
674
+ zero = c;
675
+ *(dr->buf.tail - 1) = '\0';
676
+ goto CB;
677
+ }
682
678
  ox_sax_drive_error(dr, NO_TERM "CDATA not terminated");
683
679
  return '\0';
684
- default:
685
- if (1 < end && !buf_checkset(&cp)) {
686
- buf_checkpoint(&dr->buf, &cp);
687
- }
688
- end = 0;
689
- break;
690
- }
691
- }
692
- CB:
693
- if (!dr->blocked && (NULL == parent || NULL == parent->hint || OffOverlay != parent->hint->overlay)) {
694
- if (dr->has.cdata) {
695
- VALUE args[1];
696
-
697
- args[0] = rb_str_new2(dr->buf.str);
698
- #if HAVE_RB_ENC_ASSOCIATE
699
- if (0 != dr->encoding) {
700
- rb_enc_associate(args[0], dr->encoding);
701
- }
702
- #endif
703
- if (dr->has.pos) {
704
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
705
- }
706
- if (dr->has.line) {
707
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
708
- }
709
- if (dr->has.column) {
710
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
711
- }
712
- rb_funcall2(dr->handler, ox_cdata_id, 1, args);
713
- }
680
+ default:
681
+ if (1 < end && !buf_checkset(&cp)) {
682
+ buf_checkpoint(&dr->buf, &cp);
683
+ }
684
+ end = 0;
685
+ break;
686
+ }
714
687
  }
688
+ CB:
689
+ dr->cdata(dr, pos, line, col);
715
690
  if ('\0' != zero) {
716
- *(dr->buf.tail - 1) = zero;
691
+ *(dr->buf.tail - 1) = zero;
717
692
  }
718
693
  dr->buf.str = 0;
719
694
 
@@ -722,88 +697,60 @@ read_cdata(SaxDrive dr) {
722
697
 
723
698
  /* Entered after the "<!--" sequence. Ready to read the rest.
724
699
  */
725
- static char
726
- read_comment(SaxDrive dr) {
727
- char c;
728
- char zero = '\0';
729
- int end = 0;
730
- long pos = (long)(dr->buf.pos - 4);
731
- long line = (long)(dr->buf.line);
732
- long col = (long)(dr->buf.col - 4);
733
- struct _checkPt cp = CHECK_PT_INIT;
700
+ static char read_comment(SaxDrive dr) {
701
+ char c;
702
+ char zero = '\0';
703
+ int end = 0;
704
+ long pos = (long)(dr->buf.pos - 4);
705
+ long line = (long)(dr->buf.line);
706
+ long col = (long)(dr->buf.col - 4);
707
+ struct _checkPt cp = CHECK_PT_INIT;
734
708
 
735
709
  buf_backup(&dr->buf); /* back up to the start in case the cdata is empty */
736
710
  buf_protect(&dr->buf);
737
711
  while (1) {
738
712
  c = buf_get(&dr->buf);
739
- switch (c) {
740
- case '-':
741
- end++;
742
- break;
743
- case '>':
713
+ switch (c) {
714
+ case '-': end++; break;
715
+ case '>':
744
716
  if (2 <= end) {
745
717
  *(dr->buf.tail - 3) = '\0';
746
- c = buf_get(&dr->buf);
718
+ c = buf_get(&dr->buf);
747
719
  goto CB;
748
720
  }
749
- if (!buf_checkset(&cp)) {
750
- buf_checkpoint(&dr->buf, &cp);
751
- }
721
+ if (!buf_checkset(&cp)) {
722
+ buf_checkpoint(&dr->buf, &cp);
723
+ }
752
724
  end = 0;
753
- break;
754
- case '<':
755
- if (!buf_checkset(&cp)) {
756
- buf_checkpoint(&dr->buf, &cp);
757
- }
758
- end = 0;
759
- break;
760
- case '\0':
761
- if (buf_checkset(&cp)) {
762
- c = buf_checkback(&dr->buf, &cp);
763
- ox_sax_drive_error(dr, NO_TERM "comment not terminated");
764
- zero = c;
765
- *(dr->buf.tail - 1) = '\0';
766
- goto CB;
767
- }
725
+ break;
726
+ case '<':
727
+ if (!buf_checkset(&cp)) {
728
+ buf_checkpoint(&dr->buf, &cp);
729
+ }
730
+ end = 0;
731
+ break;
732
+ case '\0':
733
+ if (buf_checkset(&cp)) {
734
+ c = buf_checkback(&dr->buf, &cp);
735
+ ox_sax_drive_error(dr, NO_TERM "comment not terminated");
736
+ zero = c;
737
+ *(dr->buf.tail - 1) = '\0';
738
+ goto CB;
739
+ }
768
740
  ox_sax_drive_error(dr, NO_TERM "comment not terminated");
769
741
  return '\0';
770
- default:
771
- if (1 < end && !buf_checkset(&cp)) {
772
- buf_checkpoint(&dr->buf, &cp);
773
- }
774
- end = 0;
775
- break;
776
- }
777
- }
778
- CB:
779
- if (dr->has.comment && !dr->blocked) {
780
- VALUE args[1];
781
- Nv parent = stack_peek(&dr->stack);
782
- Hint h = ox_hint_find(dr->options.hints, "!--");
783
-
784
- if (NULL == parent || NULL == parent->hint || OffOverlay != parent->hint->overlay ||
785
- (NULL != h && (ActiveOverlay == h->overlay || ActiveOverlay == h->overlay))) {
786
-
787
- args[0] = rb_str_new2(dr->buf.str);
788
- #if HAVE_RB_ENC_ASSOCIATE
789
- if (0 != dr->encoding) {
790
- rb_enc_associate(args[0], dr->encoding);
791
- }
792
- #endif
793
- if (dr->has.pos) {
794
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
795
- }
796
- if (dr->has.line) {
797
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
798
- }
799
- if (dr->has.column) {
800
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
801
- }
802
- rb_funcall2(dr->handler, ox_comment_id, 1, args);
803
- }
742
+ default:
743
+ if (1 < end && !buf_checkset(&cp)) {
744
+ buf_checkpoint(&dr->buf, &cp);
745
+ }
746
+ end = 0;
747
+ break;
748
+ }
804
749
  }
750
+ CB:
751
+ dr->comment(dr, pos, line, col);
805
752
  if ('\0' != zero) {
806
- *(dr->buf.tail - 1) = zero;
753
+ *(dr->buf.tail - 1) = zero;
807
754
  }
808
755
  dr->buf.str = 0;
809
756
 
@@ -813,106 +760,115 @@ read_comment(SaxDrive dr) {
813
760
  /* Entered after the '<' and the first character after that. Returns status
814
761
  * code.
815
762
  */
816
- static char
817
- read_element_start(SaxDrive dr) {
818
- const char *ename = 0;
819
- volatile VALUE name = Qnil;
820
- char c;
821
- int closed;
822
- long pos = (long)(dr->buf.pos);
823
- long line = (long)(dr->buf.line);
824
- long col = (long)(dr->buf.col);
825
- Hint h = NULL;
826
- int stackless = 0;
827
- Nv parent = stack_peek(&dr->stack);
763
+ static char read_element_start(SaxDrive dr) {
764
+ const char *ename = 0;
765
+ volatile VALUE name = Qnil;
766
+ char c;
767
+ int closed;
768
+ long pos = (long)(dr->buf.pos);
769
+ long line = (long)(dr->buf.line);
770
+ long col = (long)(dr->buf.col);
771
+ Hint h = NULL;
772
+ int stackless = 0;
773
+ Nv parent = stack_peek(&dr->stack);
828
774
 
829
775
  if ('\0' == (c = read_name_token(dr))) {
830
776
  return '\0';
831
777
  }
832
778
  if ('\0' == *dr->buf.str) {
833
- char msg[256];
779
+ char msg[256];
834
780
 
835
- snprintf(msg, sizeof(msg) - 1, "%sempty element", INVALID_FORMAT);
836
- ox_sax_drive_error_at(dr, msg, pos, line, col);
781
+ snprintf(msg, sizeof(msg) - 1, "%sempty element", INVALID_FORMAT);
782
+ ox_sax_drive_error_at(dr, msg, pos, line, col);
837
783
 
838
- return buf_get(&dr->buf);
784
+ return buf_get(&dr->buf);
839
785
  }
840
786
  if (0 != parent) {
841
- parent->childCnt++;
787
+ parent->childCnt++;
842
788
  }
843
- if (dr->options.smart && 0 == dr->options.hints && stack_empty(&dr->stack) && 0 == strcasecmp("html", dr->buf.str)) {
844
- dr->options.hints = ox_hints_html();
789
+ if (dr->options.smart && 0 == dr->options.hints && stack_empty(&dr->stack) &&
790
+ 0 == strcasecmp("html", dr->buf.str)) {
791
+ dr->options.hints = ox_hints_html();
845
792
  }
846
793
  if (NULL != dr->options.hints) {
847
- hint_clear_empty(dr);
848
- h = ox_hint_find(dr->options.hints, dr->buf.str);
849
- if (NULL == h) {
850
- char msg[256];
851
-
852
- snprintf(msg, sizeof(msg), "%s%s is not a valid element type for a %s document type.", INV_ELEMENT, dr->buf.str, dr->options.hints->name);
853
- ox_sax_drive_error(dr, msg);
854
- } else {
855
- Nv top_nv = stack_peek(&dr->stack);
856
-
857
- if (AbortOverlay == h->overlay) {
858
- if (rb_respond_to(dr->handler, ox_abort_id)) {
859
- VALUE args[1];
860
-
861
- args[0] = str2sym(dr, dr->buf.str, NULL);
862
- rb_funcall2(dr->handler, ox_abort_id, 1, args);
863
- }
864
- dr->abort = true;
865
- return '\0';
866
- }
867
- if (BlockOverlay == h->overlay) {
868
- dr->blocked++;
869
- }
870
- if (h->empty) {
871
- stackless = 1;
872
- }
873
- if (0 != top_nv) {
874
- char msg[256];
875
-
876
- if (!h->nest && NestOverlay != h->overlay && 0 == strcasecmp(top_nv->name, h->name)) {
877
- snprintf(msg, sizeof(msg) - 1, "%s%s can not be nested in a %s document, closing previous.",
878
- INV_ELEMENT, dr->buf.str, dr->options.hints->name);
879
- ox_sax_drive_error(dr, msg);
880
- stack_pop(&dr->stack);
881
- end_element_cb(dr, top_nv->val, pos, line, col, top_nv->hint);
882
- top_nv = stack_peek(&dr->stack);
883
- }
884
- if (NULL != top_nv && 0 != h->parents && NestOverlay != h->overlay) {
885
- const char **p;
886
- int ok = 0;
887
-
888
- for (p = h->parents; 0 != *p; p++) {
889
- if (0 == strcasecmp(*p, top_nv->name)) {
890
- ok = 1;
891
- break;
892
- }
893
- }
894
- if (!ok) {
895
- snprintf(msg, sizeof(msg) - 1, "%s%s can not be a child of a %s in a %s document.",
896
- INV_ELEMENT, h->name, top_nv->name, dr->options.hints->name);
897
- ox_sax_drive_error(dr, msg);
898
- }
899
- }
900
- }
901
- }
902
- }
903
- name = str2sym(dr, dr->buf.str, &ename);
904
- if (dr->has.start_element && 0 >= dr->blocked && (NULL == h || ActiveOverlay == h->overlay || NestOverlay == h->overlay)) {
905
- VALUE args[1];
906
-
907
- if (dr->has.pos) {
908
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
909
- }
910
- if (dr->has.line) {
911
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
912
- }
913
- if (dr->has.column) {
914
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
915
- }
794
+ hint_clear_empty(dr);
795
+ h = ox_hint_find(dr->options.hints, dr->buf.str);
796
+ if (NULL == h) {
797
+ char msg[256];
798
+
799
+ snprintf(msg,
800
+ sizeof(msg),
801
+ "%s%s is not a valid element type for a %s document type.",
802
+ INV_ELEMENT,
803
+ dr->buf.str,
804
+ dr->options.hints->name);
805
+ ox_sax_drive_error(dr, msg);
806
+ } else {
807
+ Nv top_nv = stack_peek(&dr->stack);
808
+
809
+ if (AbortOverlay == h->overlay) {
810
+ if (rb_respond_to(dr->handler, ox_abort_id)) {
811
+ VALUE args[1];
812
+
813
+ args[0] = str2sym(dr, dr->buf.str, dr->buf.tail - dr->buf.str - 1, NULL);
814
+ rb_funcall2(dr->handler, ox_abort_id, 1, args);
815
+ }
816
+ dr->abort = true;
817
+ return '\0';
818
+ }
819
+ if (BlockOverlay == h->overlay) {
820
+ dr->blocked++;
821
+ }
822
+ if (h->empty) {
823
+ stackless = 1;
824
+ }
825
+ if (0 != top_nv) {
826
+ char msg[256];
827
+
828
+ if (!h->nest && NestOverlay != h->overlay && 0 == strcasecmp(top_nv->name, h->name)) {
829
+ snprintf(msg,
830
+ sizeof(msg) - 1,
831
+ "%s%s can not be nested in a %s document, closing previous.",
832
+ INV_ELEMENT,
833
+ dr->buf.str,
834
+ dr->options.hints->name);
835
+ ox_sax_drive_error(dr, msg);
836
+ stack_pop(&dr->stack);
837
+ end_element_cb(dr, top_nv->val, pos, line, col, top_nv->hint);
838
+ top_nv = stack_peek(&dr->stack);
839
+ }
840
+ if (NULL != top_nv && 0 != h->parents && NestOverlay != h->overlay) {
841
+ const char **p;
842
+ int ok = 0;
843
+
844
+ for (p = h->parents; 0 != *p; p++) {
845
+ if (0 == strcasecmp(*p, top_nv->name)) {
846
+ ok = 1;
847
+ break;
848
+ }
849
+ }
850
+ if (!ok) {
851
+ snprintf(msg,
852
+ sizeof(msg) - 1,
853
+ "%s%s can not be a child of a %s in a %s document.",
854
+ INV_ELEMENT,
855
+ h->name,
856
+ top_nv->name,
857
+ dr->options.hints->name);
858
+ ox_sax_drive_error(dr, msg);
859
+ }
860
+ }
861
+ }
862
+ }
863
+ }
864
+ name = str2sym(dr, dr->buf.str, dr->buf.tail - dr->buf.str - 1, &ename);
865
+ if (dr->has_start_element && 0 >= dr->blocked &&
866
+ (NULL == h || ActiveOverlay == h->overlay || NestOverlay == h->overlay)) {
867
+ VALUE args[1];
868
+
869
+ dr->set_pos(dr->handler, pos);
870
+ dr->set_line(dr->handler, line);
871
+ dr->set_col(dr->handler, col);
916
872
  args[0] = name;
917
873
  rb_funcall2(dr->handler, ox_start_element_id, 1, args);
918
874
  }
@@ -921,362 +877,302 @@ read_element_start(SaxDrive dr) {
921
877
  } else if ('>' == c) {
922
878
  closed = 0;
923
879
  } else {
924
- buf_protect(&dr->buf);
880
+ buf_protect(&dr->buf);
925
881
  c = read_attrs(dr, c, '/', '>', 0, 0, h);
926
- if (is_white(c)) {
927
- c = buf_next_non_white(&dr->buf);
928
- }
929
- closed = ('/' == c);
882
+ if (is_white(c)) {
883
+ c = buf_next_non_white(&dr->buf);
884
+ }
885
+ closed = ('/' == c);
930
886
  }
931
- if (dr->has.attrs_done && 0 >= dr->blocked && (NULL == h || ActiveOverlay == h->overlay || NestOverlay == h->overlay)) {
932
- rb_funcall(dr->handler, ox_attrs_done_id, 0);
887
+ if (0 >= dr->blocked && (NULL == h || ActiveOverlay == h->overlay || NestOverlay == h->overlay)) {
888
+ dr->attrs_done(dr->handler);
933
889
  }
934
890
  if (closed) {
935
- c = buf_next_non_white(&dr->buf);
936
- pos = dr->buf.pos;
937
- line = dr->buf.line;
938
- col = dr->buf.col;
939
- end_element_cb(dr, name, pos, line, col, h);
891
+ c = buf_next_non_white(&dr->buf);
892
+
893
+ end_element_cb(dr, name, dr->buf.pos, dr->buf.line, dr->buf.col, h);
940
894
  } else if (stackless) {
941
- end_element_cb(dr, name, pos, line, col, h);
895
+ end_element_cb(dr, name, pos, line, col, h);
942
896
  } else if (NULL != h && h->jump) {
943
- stack_push(&dr->stack, ename, name, h);
944
- if ('>' != c) {
945
- ox_sax_drive_error(dr, WRONG_CHAR "element not closed");
946
- return c;
947
- }
948
- read_jump(dr, h->name);
949
- return '<';
897
+ stack_push(&dr->stack, ename, name, h);
898
+ if ('>' != c) {
899
+ ox_sax_drive_error(dr, WRONG_CHAR "element not closed");
900
+ return c;
901
+ }
902
+ read_jump(dr, h->name);
903
+ return '<';
950
904
  } else {
951
- stack_push(&dr->stack, ename, name, h);
905
+ stack_push(&dr->stack, ename, name, h);
952
906
  }
953
907
  if ('>' != c) {
954
- ox_sax_drive_error(dr, WRONG_CHAR "element not closed");
955
- return c;
908
+ ox_sax_drive_error(dr, WRONG_CHAR "element not closed");
909
+ return c;
956
910
  }
957
911
  dr->buf.str = 0;
958
912
 
959
913
  return buf_get(&dr->buf);
960
914
  }
961
915
 
962
- static Nv
963
- stack_rev_find(SaxDrive dr, const char *name) {
964
- Nv nv;
916
+ static Nv stack_rev_find(SaxDrive dr, const char *name) {
917
+ Nv nv;
965
918
 
966
919
  for (nv = dr->stack.tail - 1; dr->stack.head <= nv; nv--) {
967
- if (0 == (dr->options.smart ? strcasecmp(name, nv->name) : strcmp(name, nv->name))) {
968
- return nv;
969
- }
920
+ if (0 == (dr->options.smart ? strcasecmp(name, nv->name) : strcmp(name, nv->name))) {
921
+ return nv;
922
+ }
970
923
  }
971
924
  return 0;
972
925
  }
973
926
 
974
- static char
975
- read_element_end(SaxDrive dr) {
976
- VALUE name = Qnil;
977
- char c;
978
- long pos = (long)(dr->buf.pos - 1);
979
- long line = (long)(dr->buf.line);
980
- long col = (long)(dr->buf.col - 1);
981
- Nv nv;
982
- Hint h = NULL;
927
+ static char read_element_end(SaxDrive dr) {
928
+ VALUE name = Qnil;
929
+ char c;
930
+ long pos = (long)(dr->buf.pos - 1);
931
+ long line = (long)(dr->buf.line);
932
+ long col = (long)(dr->buf.col - 1);
933
+ Nv nv;
934
+ Hint h = NULL;
983
935
 
984
936
  if ('\0' == (c = read_name_token(dr))) {
985
937
  return '\0';
986
938
  }
987
939
  if (is_white(c)) {
988
- c = buf_next_non_white(&dr->buf);
940
+ c = buf_next_non_white(&dr->buf);
989
941
  }
990
942
  // c should be > and current is one past so read another char
991
- c = buf_get(&dr->buf);
943
+ c = buf_get(&dr->buf);
992
944
  nv = stack_peek(&dr->stack);
993
- if (0 != nv &&
994
- 0 == (dr->options.smart ? strcasecmp(dr->buf.str, nv->name) : strcmp(dr->buf.str, nv->name))) {
995
- name = nv->val;
996
- h = nv->hint;
997
- stack_pop(&dr->stack);
945
+ if (0 != nv && 0 == (dr->options.smart ? strcasecmp(dr->buf.str, nv->name) : strcmp(dr->buf.str, nv->name))) {
946
+ name = nv->val;
947
+ h = nv->hint;
948
+ stack_pop(&dr->stack);
998
949
  } else {
999
- // Mismatched start and end
1000
- char msg[256];
1001
- Nv match = stack_rev_find(dr, dr->buf.str);
1002
-
1003
- if (0 == match) {
1004
- // Not found so open and close element.
1005
- h = ox_hint_find(dr->options.hints, dr->buf.str);
1006
- if (NULL != h && h->empty) {
1007
- // Just close normally
1008
- name = str2sym(dr, dr->buf.str, 0);
1009
- snprintf(msg, sizeof(msg) - 1, "%selement '%s' should not have a separate close element", EL_MISMATCH, dr->buf.str);
1010
- ox_sax_drive_error_at(dr, msg, pos, line, col);
1011
- return c;
1012
- } else {
1013
- snprintf(msg, sizeof(msg) - 1, "%selement '%s' closed but not opened", EL_MISMATCH, dr->buf.str);
1014
- ox_sax_drive_error_at(dr, msg, pos, line, col);
1015
- name = str2sym(dr, dr->buf.str, 0);
1016
- if (dr->has.start_element && 0 >= dr->blocked && (NULL == h || ActiveOverlay == h->overlay || NestOverlay == h->overlay)) {
1017
- VALUE args[1];
1018
-
1019
- if (dr->has.pos) {
1020
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
1021
- }
1022
- if (dr->has.line) {
1023
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
1024
- }
1025
- if (dr->has.column) {
1026
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
1027
- }
1028
- args[0] = name;
1029
- rb_funcall2(dr->handler, ox_start_element_id, 1, args);
1030
- }
1031
- if (NULL != h && BlockOverlay == h->overlay && 0 < dr->blocked) {
1032
- dr->blocked--;
1033
- }
1034
- }
1035
- } else {
1036
- // Found a match so close all up to the found element in stack.
1037
- Nv n2;
1038
-
1039
- if (0 != (n2 = hint_try_close(dr, dr->buf.str))) {
1040
- name = n2->val;
1041
- h = n2->hint;
1042
- } else {
1043
- snprintf(msg, sizeof(msg) - 1, "%selement '%s' close does not match '%s' open", EL_MISMATCH, dr->buf.str, nv->name);
1044
- ox_sax_drive_error_at(dr, msg, pos, line, col);
1045
- if (dr->has.pos) {
1046
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
1047
- }
1048
- if (dr->has.line) {
1049
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
1050
- }
1051
- if (dr->has.column) {
1052
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
1053
- }
1054
- for (nv = stack_pop(&dr->stack); match < nv; nv = stack_pop(&dr->stack)) {
1055
- if (dr->has.end_element && 0 >= dr->blocked && (NULL == nv->hint || ActiveOverlay == nv->hint->overlay || NestOverlay == nv->hint->overlay)) {
1056
- rb_funcall(dr->handler, ox_end_element_id, 1, nv->val);
1057
- }
1058
- if (NULL != nv->hint && BlockOverlay == nv->hint->overlay && 0 < dr->blocked) {
1059
- dr->blocked--;
1060
- }
1061
- }
1062
- name = nv->val;
1063
- h = nv->hint;
1064
- }
1065
- }
950
+ // Mismatched start and end
951
+ char msg[256];
952
+ Nv match = stack_rev_find(dr, dr->buf.str);
953
+
954
+ if (0 == match) {
955
+ // Not found so open and close element.
956
+ h = ox_hint_find(dr->options.hints, dr->buf.str);
957
+ if (NULL != h && h->empty) {
958
+ // Just close normally
959
+ name = str2sym(dr, dr->buf.str, dr->buf.tail - dr->buf.str - 2, 0);
960
+ snprintf(msg,
961
+ sizeof(msg) - 1,
962
+ "%selement '%s' should not have a separate close element",
963
+ EL_MISMATCH,
964
+ dr->buf.str);
965
+ ox_sax_drive_error_at(dr, msg, pos, line, col);
966
+ return c;
967
+ } else {
968
+ snprintf(msg, sizeof(msg) - 1, "%selement '%s' closed but not opened", EL_MISMATCH, dr->buf.str);
969
+ ox_sax_drive_error_at(dr, msg, pos, line, col);
970
+ name = str2sym(dr, dr->buf.str, dr->buf.tail - dr->buf.str - 2, 0);
971
+ if (dr->has_start_element && 0 >= dr->blocked &&
972
+ (NULL == h || ActiveOverlay == h->overlay || NestOverlay == h->overlay)) {
973
+ VALUE args[1];
974
+
975
+ dr->set_pos(dr->handler, pos);
976
+ dr->set_line(dr->handler, line);
977
+ dr->set_col(dr->handler, col);
978
+ args[0] = name;
979
+ rb_funcall2(dr->handler, ox_start_element_id, 1, args);
980
+ }
981
+ if (NULL != h && BlockOverlay == h->overlay && 0 < dr->blocked) {
982
+ dr->blocked--;
983
+ }
984
+ }
985
+ } else {
986
+ // Found a match so close all up to the found element in stack.
987
+ Nv n2;
988
+
989
+ if (0 != (n2 = hint_try_close(dr, dr->buf.str))) {
990
+ name = n2->val;
991
+ h = n2->hint;
992
+ } else {
993
+ snprintf(msg,
994
+ sizeof(msg) - 1,
995
+ "%selement '%s' close does not match '%s' open",
996
+ EL_MISMATCH,
997
+ dr->buf.str,
998
+ nv->name);
999
+ ox_sax_drive_error_at(dr, msg, pos, line, col);
1000
+ for (nv = stack_pop(&dr->stack); match < nv; nv = stack_pop(&dr->stack)) {
1001
+ end_element_cb(dr, nv->val, pos, line, col, nv->hint);
1002
+ }
1003
+ name = nv->val;
1004
+ h = nv->hint;
1005
+ }
1006
+ }
1066
1007
  }
1067
1008
  end_element_cb(dr, name, pos, line, col, h);
1068
1009
 
1069
1010
  return c;
1070
1011
  }
1071
1012
 
1072
- static char
1073
- read_text(SaxDrive dr) {
1074
- VALUE args[1];
1075
- char c;
1076
- long pos = (long)(dr->buf.pos);
1077
- long line = (long)(dr->buf.line);
1078
- long col = (long)(dr->buf.col - 1);
1079
- Nv parent = stack_peek(&dr->stack);
1080
- int allWhite = 1;
1013
+ static char read_text(SaxDrive dr) {
1014
+ VALUE args[1];
1015
+ char c;
1016
+ long pos = (long)(dr->buf.pos);
1017
+ long line = (long)(dr->buf.line);
1018
+ long col = (long)(dr->buf.col - 1);
1019
+ Nv parent = stack_peek(&dr->stack);
1020
+ int allWhite = 1;
1081
1021
 
1082
1022
  buf_backup(&dr->buf);
1083
1023
  buf_protect(&dr->buf);
1084
1024
  while ('<' != (c = buf_get(&dr->buf))) {
1085
- switch(c) {
1086
- case ' ':
1087
- case '\t':
1088
- case '\f':
1089
- case '\n':
1090
- case '\r':
1091
- break;
1092
- case '\0':
1093
- if (allWhite) {
1094
- return c;
1095
- }
1025
+ switch (c) {
1026
+ case ' ':
1027
+ case '\t':
1028
+ case '\f':
1029
+ case '\n':
1030
+ case '\r': break;
1031
+ case '\0':
1032
+ if (allWhite) {
1033
+ return c;
1034
+ }
1096
1035
  ox_sax_drive_error(dr, NO_TERM "text not terminated");
1097
- goto END_OF_BUF;
1098
- break;
1099
- default:
1100
- allWhite = 0;
1101
- break;
1102
- }
1103
- }
1104
- END_OF_BUF:
1036
+ goto END_OF_BUF;
1037
+ break;
1038
+ default: allWhite = 0; break;
1039
+ }
1040
+ }
1041
+ END_OF_BUF:
1105
1042
  if ('\0' != c) {
1106
- *(dr->buf.tail - 1) = '\0';
1043
+ *(dr->buf.tail - 1) = '\0';
1107
1044
  }
1108
1045
  if (allWhite) {
1109
- int isEnd = ('/' == buf_get(&dr->buf));
1110
-
1111
- buf_backup(&dr->buf);
1112
- if (dr->has.text &&
1113
- ((NoSkip == dr->options.skip && !isEnd) ||
1114
- (OffSkip == dr->options.skip))) {
1115
- args[0] = rb_str_new2(dr->buf.str);
1116
- #if HAVE_RB_ENC_ASSOCIATE
1117
- if (0 != dr->encoding) {
1118
- rb_enc_associate(args[0], dr->encoding);
1119
- }
1120
- #endif
1121
- if (dr->has.pos) {
1122
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
1123
- }
1124
- if (dr->has.line) {
1125
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
1126
- }
1127
- if (dr->has.column) {
1128
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
1129
- }
1130
- rb_funcall2(dr->handler, ox_text_id, 1, args);
1131
- }
1132
- if (!isEnd || 0 == parent || 0 < parent->childCnt) {
1133
- return c;
1134
- }
1046
+ int isEnd = ('/' == buf_get(&dr->buf));
1047
+
1048
+ buf_backup(&dr->buf);
1049
+ if (dr->has_text && ((NoSkip == dr->options.skip && !isEnd) || (OffSkip == dr->options.skip))) {
1050
+ args[0] = rb_str_new2(dr->buf.str);
1051
+ if (0 != dr->encoding) {
1052
+ rb_enc_associate(args[0], dr->encoding);
1053
+ }
1054
+ dr->set_pos(dr->handler, pos);
1055
+ dr->set_line(dr->handler, line);
1056
+ dr->set_col(dr->handler, col);
1057
+ rb_funcall2(dr->handler, ox_text_id, 1, args);
1058
+ }
1059
+ if (!isEnd || 0 == parent || 0 < parent->childCnt) {
1060
+ return c;
1061
+ }
1135
1062
  }
1136
1063
  if (0 != parent) {
1137
- parent->childCnt++;
1064
+ parent->childCnt++;
1138
1065
  }
1139
1066
  if (!dr->blocked && (NULL == parent || NULL == parent->hint || OffOverlay != parent->hint->overlay)) {
1140
- if (dr->has.value) {
1141
- if (dr->has.pos) {
1142
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
1143
- }
1144
- if (dr->has.line) {
1145
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
1146
- }
1147
- if (dr->has.column) {
1148
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
1149
- }
1150
- *args = dr->value_obj;
1151
- rb_funcall2(dr->handler, ox_value_id, 1, args);
1152
- } else if (dr->has.text) {
1153
- if (dr->options.convert_special) {
1154
- ox_sax_collapse_special(dr, dr->buf.str, pos, line, col);
1155
- }
1156
- switch (dr->options.skip) {
1157
- case CrSkip:
1158
- buf_collapse_return(dr->buf.str);
1159
- break;
1160
- case SpcSkip:
1161
- buf_collapse_white(dr->buf.str);
1162
- break;
1163
- default:
1164
- break;
1165
- }
1166
- args[0] = rb_str_new2(dr->buf.str);
1167
- #if HAVE_RB_ENC_ASSOCIATE
1168
- if (0 != dr->encoding) {
1169
- rb_enc_associate(args[0], dr->encoding);
1170
- }
1171
- #endif
1172
- if (dr->has.pos) {
1173
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
1174
- }
1175
- if (dr->has.line) {
1176
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
1177
- }
1178
- if (dr->has.column) {
1179
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
1180
- }
1181
- rb_funcall2(dr->handler, ox_text_id, 1, args);
1182
- }
1067
+ if (dr->has_value) {
1068
+ dr->set_pos(dr->handler, pos);
1069
+ dr->set_line(dr->handler, line);
1070
+ dr->set_col(dr->handler, col);
1071
+ *args = dr->value_obj;
1072
+ rb_funcall2(dr->handler, ox_value_id, 1, args);
1073
+ } else if (dr->has_text) {
1074
+ if (dr->options.convert_special) {
1075
+ ox_sax_collapse_special(dr, dr->buf.str, pos, line, col);
1076
+ }
1077
+ switch (dr->options.skip) {
1078
+ case CrSkip: buf_collapse_return(dr->buf.str); break;
1079
+ case SpcSkip: buf_collapse_white(dr->buf.str); break;
1080
+ default: break;
1081
+ }
1082
+ args[0] = rb_str_new2(dr->buf.str);
1083
+ if (0 != dr->encoding) {
1084
+ rb_enc_associate(args[0], dr->encoding);
1085
+ }
1086
+ dr->set_pos(dr->handler, pos);
1087
+ dr->set_line(dr->handler, line);
1088
+ dr->set_col(dr->handler, col);
1089
+ rb_funcall2(dr->handler, ox_text_id, 1, args);
1090
+ }
1183
1091
  }
1184
1092
  dr->buf.str = 0;
1185
1093
 
1186
1094
  return c;
1187
1095
  }
1188
1096
 
1189
- static int
1190
- read_jump_term(Buf buf, const char *pat) {
1191
- struct _checkPt cp;
1097
+ static int read_jump_term(Buf buf, const char *pat) {
1098
+ struct _checkPt cp;
1192
1099
 
1193
- buf_checkpoint(buf, &cp); // right after <
1100
+ buf_checkpoint(buf, &cp); // right after <
1194
1101
  if ('/' != buf_next_non_white(buf)) {
1195
- return 0;
1102
+ return 0;
1196
1103
  }
1197
1104
  if (*pat != tolower(buf_next_non_white(buf))) {
1198
- return 0;
1105
+ return 0;
1199
1106
  }
1200
1107
  for (pat++; '\0' != *pat; pat++) {
1201
- if (*pat != tolower(buf_get(buf))) {
1202
- return 0;
1203
- }
1108
+ if (*pat != tolower(buf_get(buf))) {
1109
+ return 0;
1110
+ }
1204
1111
  }
1205
1112
  if ('>' != buf_next_non_white(buf)) {
1206
- return 0;
1113
+ return 0;
1207
1114
  }
1208
1115
  buf_checkback(buf, &cp);
1209
1116
  return 1;
1210
1117
  }
1211
1118
 
1212
- static char
1213
- read_jump(SaxDrive dr, const char *pat) {
1214
- VALUE args[1];
1215
- char c;
1216
- long pos = (long)(dr->buf.pos);
1217
- long line = (long)(dr->buf.line);
1218
- long col = (long)(dr->buf.col - 1);
1219
- Nv parent = stack_peek(&dr->stack);
1119
+ static char read_jump(SaxDrive dr, const char *pat) {
1120
+ VALUE args[1];
1121
+ char c;
1122
+ long pos = (long)(dr->buf.pos);
1123
+ long line = (long)(dr->buf.line);
1124
+ long col = (long)(dr->buf.col - 1);
1125
+ Nv parent = stack_peek(&dr->stack);
1220
1126
 
1221
1127
  buf_protect(&dr->buf);
1222
1128
  while (1) {
1223
- c = buf_get(&dr->buf);
1224
- switch(c) {
1225
- case '<':
1226
- if (read_jump_term(&dr->buf, pat)) {
1227
- goto END_OF_BUF;
1228
- break;
1229
- }
1230
- break;
1231
- case '\0':
1129
+ c = buf_get(&dr->buf);
1130
+ switch (c) {
1131
+ case '<':
1132
+ if (read_jump_term(&dr->buf, pat)) {
1133
+ goto END_OF_BUF;
1134
+ break;
1135
+ }
1136
+ break;
1137
+ case '\0':
1232
1138
  ox_sax_drive_error(dr, NO_TERM "not terminated");
1233
- goto END_OF_BUF;
1234
- break;
1235
- default:
1236
- break;
1237
- }
1139
+ goto END_OF_BUF;
1140
+ break;
1141
+ default: break;
1142
+ }
1238
1143
  }
1239
- END_OF_BUF:
1144
+ END_OF_BUF:
1240
1145
  if ('\0' != c) {
1241
- *(dr->buf.tail - 1) = '\0';
1146
+ *(dr->buf.tail - 1) = '\0';
1242
1147
  }
1243
1148
  if (0 != parent) {
1244
- parent->childCnt++;
1149
+ parent->childCnt++;
1245
1150
  }
1246
1151
  // TBD check parent overlay
1247
- if (dr->has.text && !dr->blocked) {
1152
+ if (dr->has_text && !dr->blocked) {
1248
1153
  args[0] = rb_str_new2(dr->buf.str);
1249
- #if HAVE_RB_ENC_ASSOCIATE
1250
1154
  if (0 != dr->encoding) {
1251
1155
  rb_enc_associate(args[0], dr->encoding);
1252
1156
  }
1253
- #endif
1254
- if (dr->has.pos) {
1255
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
1256
- }
1257
- if (dr->has.line) {
1258
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
1259
- }
1260
- if (dr->has.column) {
1261
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
1262
- }
1157
+ dr->set_pos(dr->handler, pos);
1158
+ dr->set_line(dr->handler, line);
1159
+ dr->set_col(dr->handler, col);
1263
1160
  rb_funcall2(dr->handler, ox_text_id, 1, args);
1264
1161
  }
1265
1162
  dr->buf.str = 0;
1266
1163
  if ('\0' != c) {
1267
- *(dr->buf.tail - 1) = '<';
1164
+ *(dr->buf.tail - 1) = '<';
1268
1165
  }
1269
1166
  return c;
1270
1167
  }
1271
1168
 
1272
- static char
1273
- read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req, Hint h) {
1274
- VALUE name = Qnil;
1275
- int is_encoding = 0;
1276
- off_t pos;
1277
- off_t line;
1278
- off_t col;
1279
- char *attr_value;
1169
+ static char read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req, Hint h) {
1170
+ VALUE name = Qnil;
1171
+ int is_encoding = 0;
1172
+ off_t pos;
1173
+ off_t line;
1174
+ off_t col;
1175
+ char *attr_value;
1280
1176
 
1281
1177
  // already protected by caller
1282
1178
  dr->buf.str = dr->buf.tail;
@@ -1284,94 +1180,52 @@ read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req,
1284
1180
  c = buf_next_non_white(&dr->buf);
1285
1181
  }
1286
1182
  while (termc != c && term2 != c) {
1287
- buf_backup(&dr->buf);
1183
+ buf_backup(&dr->buf);
1288
1184
  if ('\0' == c) {
1289
- ox_sax_drive_error(dr, NO_TERM "attributes not terminated");
1290
- return '\0';
1185
+ ox_sax_drive_error(dr, NO_TERM "attributes not terminated");
1186
+ return '\0';
1291
1187
  }
1292
- pos = dr->buf.pos + 1;
1293
- line = dr->buf.line;
1294
- col = dr->buf.col + 1;
1188
+ pos = dr->buf.pos + 1;
1189
+ line = dr->buf.line;
1190
+ col = dr->buf.col + 1;
1295
1191
  if ('\0' == (c = read_name_token(dr))) {
1296
- ox_sax_drive_error(dr, NO_TERM "error reading token");
1297
- return '\0';
1192
+ ox_sax_drive_error(dr, NO_TERM "error reading token");
1193
+ return '\0';
1298
1194
  }
1299
1195
  if (is_xml && 0 == strcasecmp("encoding", dr->buf.str)) {
1300
1196
  is_encoding = 1;
1301
1197
  }
1302
- if (dr->has.attr || dr->has.attr_value) {
1303
- name = str2sym(dr, dr->buf.str, 0);
1198
+ if (dr->want_attr_name) {
1199
+ name = str2sym(dr, dr->buf.str, dr->buf.tail - dr->buf.str - 1, 0);
1304
1200
  }
1305
1201
  if (is_white(c)) {
1306
1202
  c = buf_next_non_white(&dr->buf);
1307
1203
  }
1308
1204
  if ('=' != c) {
1309
- if (eq_req) {
1310
- dr->err = 1;
1311
- return c;
1312
- } else {
1313
- ox_sax_drive_error(dr, WRONG_CHAR "no attribute value");
1314
- attr_value = (char*)"";
1315
- }
1205
+ if (eq_req) {
1206
+ dr->err = 1;
1207
+ return c;
1208
+ } else {
1209
+ ox_sax_drive_error(dr, WRONG_CHAR "no attribute value");
1210
+ attr_value = (char *)"";
1211
+ }
1316
1212
  } else {
1317
- pos = dr->buf.pos + 1;
1318
- line = dr->buf.line;
1319
- col = dr->buf.col + 1;
1320
- c = read_quoted_value(dr);
1321
- attr_value = dr->buf.str;
1322
- if (is_encoding) {
1323
- #if HAVE_RB_ENC_FIND
1324
- dr->encoding = rb_enc_find(dr->buf.str);
1325
- #else
1326
- dr->encoding = dr->buf.str;
1327
- #endif
1328
- is_encoding = 0;
1329
- }
1330
- }
1331
- if (0 >= dr->blocked && (NULL == h || ActiveOverlay == h->overlay || NestOverlay == h->overlay)) {
1332
- if (dr->has.attr_value) {
1333
- VALUE args[2];
1334
-
1335
- if (dr->has.pos) {
1336
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
1337
- }
1338
- if (dr->has.line) {
1339
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
1340
- }
1341
- if (dr->has.column) {
1342
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
1343
- }
1344
- args[0] = name;
1345
- args[1] = dr->value_obj;
1346
- rb_funcall2(dr->handler, ox_attr_value_id, 2, args);
1347
- } else if (dr->has.attr) {
1348
- VALUE args[2];
1349
-
1350
- args[0] = name;
1351
- if (dr->options.convert_special) {
1352
- ox_sax_collapse_special(dr, dr->buf.str, pos, line, col);
1353
- }
1354
- args[1] = rb_str_new2(attr_value);
1355
- #if HAVE_RB_ENC_ASSOCIATE
1356
- if (0 != dr->encoding) {
1357
- rb_enc_associate(args[1], dr->encoding);
1358
- }
1359
- #endif
1360
- if (dr->has.pos) {
1361
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
1362
- }
1363
- if (dr->has.line) {
1364
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
1365
- }
1366
- if (dr->has.column) {
1367
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
1368
- }
1369
- rb_funcall2(dr->handler, ox_attr_id, 2, args);
1370
- }
1371
- }
1372
- if (is_white(c)) {
1373
- c = buf_next_non_white(&dr->buf);
1374
- }
1213
+ pos = dr->buf.pos + 1;
1214
+ line = dr->buf.line;
1215
+ col = dr->buf.col + 1;
1216
+ c = read_quoted_value(dr);
1217
+ attr_value = dr->buf.str;
1218
+ if (is_encoding) {
1219
+ dr->encoding = rb_enc_find(dr->buf.str);
1220
+ is_encoding = 0;
1221
+ }
1222
+ }
1223
+ if (0 >= dr->blocked && (NULL == h || ActiveOverlay == h->overlay || NestOverlay == h->overlay)) {
1224
+ dr->attr_cb(dr, name, attr_value, pos, line, col);
1225
+ }
1226
+ if (is_white(c)) {
1227
+ c = buf_next_non_white(&dr->buf);
1228
+ }
1375
1229
  }
1376
1230
  dr->buf.str = 0;
1377
1231
 
@@ -1381,66 +1235,62 @@ read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req,
1381
1235
  /* The character after the word is returned. dr->buf.tail is one past
1382
1236
  * that. dr->buf.str will point to the token which will be '\0' terminated.
1383
1237
  */
1384
- static char
1385
- read_name_token(SaxDrive dr) {
1386
- char c;
1238
+ static char read_name_token(SaxDrive dr) {
1239
+ char c;
1387
1240
 
1388
1241
  dr->buf.str = dr->buf.tail;
1389
- c = buf_get(&dr->buf);
1242
+ c = buf_get(&dr->buf);
1390
1243
  if (is_white(c)) {
1391
- c = buf_next_non_white(&dr->buf);
1244
+ c = buf_next_non_white(&dr->buf);
1392
1245
  dr->buf.str = dr->buf.tail - 1;
1393
1246
  }
1394
1247
  while (1) {
1395
- switch (c) {
1396
- case ' ':
1397
- case '\t':
1398
- case '\f':
1399
- case '?':
1400
- case '=':
1401
- case '/':
1402
- case '>':
1403
- case '<':
1404
- case '\n':
1405
- case '\r':
1406
- *(dr->buf.tail - 1) = '\0';
1407
- return c;
1408
- case '\0':
1248
+ switch (c) {
1249
+ case ' ':
1250
+ case '\t':
1251
+ case '\f':
1252
+ case '?':
1253
+ case '=':
1254
+ case '/':
1255
+ case '>':
1256
+ case '<':
1257
+ case '\n':
1258
+ case '\r': *(dr->buf.tail - 1) = '\0'; return c;
1259
+ case '\0':
1409
1260
  /* documents never terminate after a name token */
1410
1261
  ox_sax_drive_error(dr, NO_TERM "document not terminated");
1411
1262
  return '\0';
1412
- case ':':
1413
- if ('\0' == *dr->options.strip_ns) {
1414
- break;
1415
- } else if ('*' == *dr->options.strip_ns && '\0' == dr->options.strip_ns[1]) {
1416
- dr->buf.str = dr->buf.tail;
1417
- } else if (dr->options.smart && 0 == strncasecmp(dr->options.strip_ns, dr->buf.str, dr->buf.tail - dr->buf.str - 1)) {
1418
- dr->buf.str = dr->buf.tail;
1419
- } else if (0 == strncmp(dr->options.strip_ns, dr->buf.str, dr->buf.tail - dr->buf.str - 1)) {
1420
- dr->buf.str = dr->buf.tail;
1421
- }
1422
- break;
1423
- default:
1424
- break;
1425
- }
1263
+ case ':':
1264
+ if ('\0' == *dr->options.strip_ns) {
1265
+ break;
1266
+ } else if ('*' == *dr->options.strip_ns && '\0' == dr->options.strip_ns[1]) {
1267
+ dr->buf.str = dr->buf.tail;
1268
+ } else if (dr->options.smart &&
1269
+ 0 == strncasecmp(dr->options.strip_ns, dr->buf.str, dr->buf.tail - dr->buf.str - 1)) {
1270
+ dr->buf.str = dr->buf.tail;
1271
+ } else if (0 == strncmp(dr->options.strip_ns, dr->buf.str, dr->buf.tail - dr->buf.str - 1)) {
1272
+ dr->buf.str = dr->buf.tail;
1273
+ }
1274
+ break;
1275
+ default: break;
1276
+ }
1426
1277
  c = buf_get(&dr->buf);
1427
1278
  }
1428
1279
  return '\0';
1429
1280
  }
1430
1281
 
1431
- /* The character after the quote or if there is no quote, the character after the word is returned. dr->buf.tail is one past
1432
- * that. dr->buf.str will point to the token which will be '\0' terminated.
1282
+ /* The character after the quote or if there is no quote, the character after the word is returned. dr->buf.tail is one
1283
+ * past that. dr->buf.str will point to the token which will be '\0' terminated.
1433
1284
  */
1434
- static char
1435
- read_quoted_value(SaxDrive dr) {
1436
- char c;
1285
+ static char read_quoted_value(SaxDrive dr) {
1286
+ char c;
1437
1287
 
1438
1288
  c = buf_get(&dr->buf);
1439
1289
  if (is_white(c)) {
1440
1290
  c = buf_next_non_white(&dr->buf);
1441
1291
  }
1442
1292
  if ('"' == c || '\'' == c) {
1443
- char term = c;
1293
+ char term = c;
1444
1294
 
1445
1295
  dr->buf.str = dr->buf.tail;
1446
1296
  while (term != (c = buf_get(&dr->buf))) {
@@ -1449,186 +1299,171 @@ read_quoted_value(SaxDrive dr) {
1449
1299
  return '\0';
1450
1300
  }
1451
1301
  }
1452
- // dr->buf.tail is one past quote char
1453
- *(dr->buf.tail - 1) = '\0'; /* terminate value */
1454
- c = buf_get(&dr->buf);
1455
- return c;
1302
+ // dr->buf.tail is one past quote char
1303
+ *(dr->buf.tail - 1) = '\0'; /* terminate value */
1304
+ c = buf_get(&dr->buf);
1305
+ return c;
1456
1306
  }
1457
1307
  // not quoted, look for something that terminates the string
1458
1308
  dr->buf.str = dr->buf.tail - 1;
1459
1309
  ox_sax_drive_error(dr, WRONG_CHAR "attribute value not in quotes");
1460
1310
  while ('\0' != (c = buf_get(&dr->buf))) {
1461
- switch (c) {
1462
- case ' ':
1463
- //case '/':
1464
- case '>':
1465
- case '?': // for instructions
1466
- case '\t':
1467
- case '\n':
1468
- case '\r':
1469
- *(dr->buf.tail - 1) = '\0'; /* terminate value */
1470
- // dr->buf.tail is in the correct position, one after the word terminator
1471
- return c;
1472
- default:
1473
- break;
1474
- }
1475
- }
1476
- return '\0'; // should never get here
1311
+ switch (c) {
1312
+ case ' ':
1313
+ // case '/':
1314
+ case '>':
1315
+ case '?': // for instructions
1316
+ case '\t':
1317
+ case '\n':
1318
+ case '\r':
1319
+ *(dr->buf.tail - 1) = '\0'; /* terminate value */
1320
+ // dr->buf.tail is in the correct position, one after the word terminator
1321
+ return c;
1322
+ default: break;
1323
+ }
1324
+ }
1325
+ return '\0'; // should never get here
1477
1326
  }
1478
1327
 
1479
- static char*
1480
- read_hex_uint64(char *b, uint64_t *up) {
1481
- uint64_t u = 0;
1482
- char c;
1328
+ static char *read_hex_uint64(char *b, uint64_t *up) {
1329
+ uint64_t u = 0;
1330
+ char c;
1483
1331
 
1484
1332
  for (; ';' != *b; b++) {
1485
- c = *b;
1486
- if ('0' <= c && c <= '9') {
1487
- u = (u << 4) | (uint64_t)(c - '0');
1488
- } else if ('a' <= c && c <= 'f') {
1489
- u = (u << 4) | (uint64_t)(c - 'a' + 10);
1490
- } else if ('A' <= c && c <= 'F') {
1491
- u = (u << 4) | (uint64_t)(c - 'A' + 10);
1492
- } else {
1493
- return 0;
1494
- }
1333
+ c = *b;
1334
+ if ('0' <= c && c <= '9') {
1335
+ u = (u << 4) | (uint64_t)(c - '0');
1336
+ } else if ('a' <= c && c <= 'f') {
1337
+ u = (u << 4) | (uint64_t)(c - 'a' + 10);
1338
+ } else if ('A' <= c && c <= 'F') {
1339
+ u = (u << 4) | (uint64_t)(c - 'A' + 10);
1340
+ } else {
1341
+ return 0;
1342
+ }
1495
1343
  }
1496
1344
  *up = u;
1497
1345
 
1498
1346
  return b;
1499
1347
  }
1500
1348
 
1501
- static char*
1502
- read_10_uint64(char *b, uint64_t *up) {
1503
- uint64_t u = 0;
1504
- char c;
1349
+ static char *read_10_uint64(char *b, uint64_t *up) {
1350
+ uint64_t u = 0;
1351
+ char c;
1505
1352
 
1506
1353
  for (; ';' != *b; b++) {
1507
- c = *b;
1508
- if ('0' <= c && c <= '9') {
1509
- u = (u * 10) + (uint64_t)(c - '0');
1510
- } else {
1511
- return 0;
1512
- }
1354
+ c = *b;
1355
+ if ('0' <= c && c <= '9') {
1356
+ u = (u * 10) + (uint64_t)(c - '0');
1357
+ } else {
1358
+ return 0;
1359
+ }
1513
1360
  }
1514
1361
  *up = u;
1515
1362
 
1516
1363
  return b;
1517
1364
  }
1518
1365
 
1519
- int
1520
- ox_sax_collapse_special(SaxDrive dr, char *str, long pos, long line, long col) {
1521
- char *s = str;
1522
- char *b = str;
1366
+ int ox_sax_collapse_special(SaxDrive dr, char *str, long pos, long line, long col) {
1367
+ char *s = str;
1368
+ char *b = str;
1523
1369
 
1524
1370
  while ('\0' != *s) {
1525
1371
  if ('&' == *s) {
1526
- int c = 0;
1527
- char *end;
1372
+ int c = 0;
1373
+ char *end;
1528
1374
 
1529
1375
  s++;
1530
1376
  if ('#' == *s) {
1531
- uint64_t u = 0;
1532
- char x;
1533
-
1534
- s++;
1535
- if ('x' == *s || 'X' == *s) {
1536
- x = *s;
1537
- s++;
1538
- end = read_hex_uint64(s, &u);
1539
- } else {
1540
- x = '\0';
1541
- end = read_10_uint64(s, &u);
1542
- }
1543
- if (0 == end) {
1544
- ox_sax_drive_error(dr, NO_TERM "special character does not end with a semicolon");
1545
- *b++ = '&';
1546
- *b++ = '#';
1547
- if ('\0' != x) {
1548
- *b++ = x;
1549
- }
1550
- continue;
1551
- }
1552
- if (u <= 0x000000000000007FULL) {
1553
- *b++ = (char)u;
1554
- #if HAVE_RB_ENC_FIND
1555
- } else if (ox_utf8_encoding == dr->encoding) {
1556
- b = ox_ucs_to_utf8_chars(b, u);
1557
- } else if (0 == dr->encoding) {
1558
- dr->encoding = ox_utf8_encoding;
1559
- b = ox_ucs_to_utf8_chars(b, u);
1560
- #else
1561
- } else if (0 == dr->encoding) {
1562
- dr->encoding = UTF8_STR;
1563
- b = ox_ucs_to_utf8_chars(b, u);
1564
- } else if (0 == strcasecmp(UTF8_STR, dr->encoding)) {
1565
- b = ox_ucs_to_utf8_chars(b, u);
1566
- #endif
1567
- } else {
1568
- b = ox_ucs_to_utf8_chars(b, u);
1569
- /*
1570
- ox_sax_drive_error(dr, NO_TERM "Invalid encoding, need UTF-8 encoding to parse &#nnnn; character sequences.");
1571
- *b++ = '&';
1572
- *b++ = '#';
1573
- if ('\0' != x) {
1574
- *b++ = x;
1575
- }
1576
- continue;
1577
- */
1578
- }
1579
- s = end + 1;
1580
- continue;
1377
+ uint64_t u = 0;
1378
+ char x;
1379
+
1380
+ s++;
1381
+ if ('x' == *s || 'X' == *s) {
1382
+ x = *s;
1383
+ s++;
1384
+ end = read_hex_uint64(s, &u);
1385
+ } else {
1386
+ x = '\0';
1387
+ end = read_10_uint64(s, &u);
1388
+ }
1389
+ if (0 == end) {
1390
+ ox_sax_drive_error(dr, NO_TERM "special character does not end with a semicolon");
1391
+ *b++ = '&';
1392
+ *b++ = '#';
1393
+ if ('\0' != x) {
1394
+ *b++ = x;
1395
+ }
1396
+ continue;
1397
+ }
1398
+ if (u <= 0x000000000000007FULL) {
1399
+ *b++ = (char)u;
1400
+ } else if (ox_utf8_encoding == dr->encoding) {
1401
+ b = ox_ucs_to_utf8_chars(b, u);
1402
+ } else if (0 == dr->encoding) {
1403
+ dr->encoding = ox_utf8_encoding;
1404
+ b = ox_ucs_to_utf8_chars(b, u);
1405
+ } else {
1406
+ b = ox_ucs_to_utf8_chars(b, u);
1407
+ /*
1408
+ ox_sax_drive_error(dr, NO_TERM "Invalid encoding, need UTF-8 encoding to parse &#nnnn; character
1409
+ sequences."); *b++ = '&'; *b++ = '#'; if ('\0' != x) { *b++ = x;
1410
+ }
1411
+ continue;
1412
+ */
1413
+ }
1414
+ s = end + 1;
1415
+ continue;
1581
1416
  } else if (0 == strncasecmp(s, "lt;", 3)) {
1582
1417
  c = '<';
1583
1418
  s += 3;
1584
- col += 3;
1419
+ col += 3;
1585
1420
  } else if (0 == strncasecmp(s, "gt;", 3)) {
1586
1421
  c = '>';
1587
1422
  s += 3;
1588
- col += 3;
1423
+ col += 3;
1589
1424
  } else if (0 == strncasecmp(s, "amp;", 4)) {
1590
1425
  c = '&';
1591
1426
  s += 4;
1592
- col += 4;
1427
+ col += 4;
1593
1428
  } else if (0 == strncasecmp(s, "quot;", 5)) {
1594
1429
  c = '"';
1595
1430
  s += 5;
1596
- col += 5;
1431
+ col += 5;
1597
1432
  } else if (0 == strncasecmp(s, "apos;", 5)) {
1598
1433
  c = '\'';
1599
1434
  s += 5;
1600
1435
  } else {
1601
- char key[16];
1602
- char *k = key;
1603
- char *kend = key + sizeof(key) - 1;
1604
- char *bn;
1605
- char *s2 = s;
1606
-
1607
- for (; ';' != *s2 && '\0' != *s2; s2++, k++) {
1608
- if (kend <= k) {
1609
- k = key;
1610
- break;
1611
- }
1612
- *k = *s2;
1613
- }
1614
- *k = '\0';
1615
- if ('\0' == *key || NULL == (bn = ox_entity_lookup(b, key))) {
1616
- ox_sax_drive_error_at(dr, INVALID_FORMAT "Invalid special character sequence", pos, line, col);
1617
- c = '&';
1618
- } else {
1619
- b = bn;
1620
- s = s2 + 1;
1621
- continue;
1622
- }
1436
+ char key[16];
1437
+ char *k = key;
1438
+ char *kend = key + sizeof(key) - 1;
1439
+ char *bn;
1440
+ char *s2 = s;
1441
+
1442
+ for (; ';' != *s2 && '\0' != *s2; s2++, k++) {
1443
+ if (kend <= k) {
1444
+ k = key;
1445
+ break;
1446
+ }
1447
+ *k = *s2;
1448
+ }
1449
+ *k = '\0';
1450
+ if ('\0' == *key || NULL == (bn = ox_entity_lookup(b, key))) {
1451
+ ox_sax_drive_error_at(dr, INVALID_FORMAT "Invalid special character sequence", pos, line, col);
1452
+ c = '&';
1453
+ } else {
1454
+ b = bn;
1455
+ s = s2 + 1;
1456
+ continue;
1457
+ }
1623
1458
  }
1624
1459
  *b++ = (char)c;
1625
- col++;
1460
+ col++;
1626
1461
  } else {
1627
- if ('\n' == *s) {
1628
- line++;
1629
- col = 0;
1630
- }
1631
- col++;
1462
+ if ('\n' == *s) {
1463
+ line++;
1464
+ col = 0;
1465
+ }
1466
+ col++;
1632
1467
  *b++ = *s++;
1633
1468
  }
1634
1469
  }
@@ -1637,64 +1472,43 @@ ox_sax_collapse_special(SaxDrive dr, char *str, long pos, long line, long col) {
1637
1472
  return 0;
1638
1473
  }
1639
1474
 
1640
- static void
1641
- hint_clear_empty(SaxDrive dr) {
1642
- Nv nv;
1475
+ static void hint_clear_empty(SaxDrive dr) {
1476
+ Nv nv;
1643
1477
 
1644
1478
  for (nv = stack_peek(&dr->stack); 0 != nv; nv = stack_peek(&dr->stack)) {
1645
- if (0 == nv->hint) {
1646
- break;
1647
- }
1648
- if (nv->hint->empty) {
1649
- end_element_cb(dr, nv->val, dr->buf.pos, dr->buf.line, dr->buf.col, nv->hint);
1650
- stack_pop(&dr->stack);
1651
- } else {
1652
- break;
1653
- }
1479
+ if (0 == nv->hint) {
1480
+ break;
1481
+ }
1482
+ if (nv->hint->empty) {
1483
+ end_element_cb(dr, nv->val, dr->buf.pos, dr->buf.line, dr->buf.col, nv->hint);
1484
+ stack_pop(&dr->stack);
1485
+ } else {
1486
+ break;
1487
+ }
1654
1488
  }
1655
1489
  }
1656
1490
 
1657
- static Nv
1658
- hint_try_close(SaxDrive dr, const char *name) {
1659
- Hint h = ox_hint_find(dr->options.hints, name);
1660
- Nv nv;
1491
+ static Nv hint_try_close(SaxDrive dr, const char *name) {
1492
+ Hint h = ox_hint_find(dr->options.hints, name);
1493
+ Nv nv;
1661
1494
 
1662
1495
  if (0 == h) {
1663
- return 0;
1496
+ return 0;
1664
1497
  }
1665
1498
  for (nv = stack_peek(&dr->stack); 0 != nv; nv = stack_peek(&dr->stack)) {
1666
- if (0 == strcasecmp(name, nv->name)) {
1667
- stack_pop(&dr->stack);
1668
- return nv;
1669
- }
1670
- if (0 == nv->hint) {
1671
- break;
1672
- }
1673
- if (nv->hint->empty) {
1674
- end_element_cb(dr, nv->val, dr->buf.pos, dr->buf.line, dr->buf.col, nv->hint);
1675
- dr->stack.tail = nv;
1676
- } else {
1677
- break;
1678
- }
1499
+ if (0 == strcasecmp(name, nv->name)) {
1500
+ stack_pop(&dr->stack);
1501
+ return nv;
1502
+ }
1503
+ if (0 == nv->hint) {
1504
+ break;
1505
+ }
1506
+ if (nv->hint->empty) {
1507
+ end_element_cb(dr, nv->val, dr->buf.pos, dr->buf.line, dr->buf.col, nv->hint);
1508
+ dr->stack.tail = nv;
1509
+ } else {
1510
+ break;
1511
+ }
1679
1512
  }
1680
1513
  return 0;
1681
1514
  }
1682
-
1683
- static void
1684
- end_element_cb(SaxDrive dr, VALUE name, long pos, long line, long col, Hint h) {
1685
- if (dr->has.end_element && 0 >= dr->blocked && (NULL == h || ActiveOverlay == h->overlay || NestOverlay == h->overlay)) {
1686
- if (dr->has.pos) {
1687
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
1688
- }
1689
- if (dr->has.line) {
1690
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
1691
- }
1692
- if (dr->has.column) {
1693
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
1694
- }
1695
- rb_funcall(dr->handler, ox_end_element_id, 1, name);
1696
- }
1697
- if (NULL != h && BlockOverlay == h->overlay && 0 < dr->blocked) {
1698
- dr->blocked--;
1699
- }
1700
- }