ox 2.14.6 → 2.14.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/ext/ox/sax.c CHANGED
@@ -4,66 +4,63 @@
4
4
  */
5
5
 
6
6
  #include <ctype.h>
7
- #include <stdlib.h>
8
7
  #include <errno.h>
9
8
  #include <stdio.h>
9
+ #include <stdlib.h>
10
10
  #include <strings.h>
11
11
  #include <sys/types.h>
12
12
  #if HAVE_SYS_UIO_H
13
13
  #include <sys/uio.h>
14
14
  #endif
15
- #include <unistd.h>
16
15
  #include <time.h>
16
+ #include <unistd.h>
17
17
 
18
+ #include "intern.h"
19
+ #include "ox.h"
18
20
  #include "ruby.h"
19
- #if HAVE_RB_ENC_ASSOCIATE
20
21
  #include "ruby/encoding.h"
21
- #endif
22
- #include "ox.h"
23
22
  #include "sax.h"
24
- #include "sax_stack.h"
25
23
  #include "sax_buf.h"
24
+ #include "sax_stack.h"
26
25
  #include "special.h"
27
26
 
28
- #define NAME_MISMATCH 1
27
+ #define NAME_MISMATCH 1
29
28
 
30
- #define START_STATE 1
31
- #define BODY_STATE 2
32
- #define AFTER_STATE 3
29
+ #define START_STATE 1
30
+ #define BODY_STATE 2
31
+ #define AFTER_STATE 3
33
32
 
34
33
  // error prefixes
35
- #define BAD_BOM "Bad BOM: "
36
- #define NO_TERM "Not Terminated: "
37
- #define INVALID_FORMAT "Invalid Format: "
38
- #define CASE_ERROR "Case Error: "
39
- #define OUT_OF_ORDER "Out of Order: "
40
- #define WRONG_CHAR "Unexpected Character: "
41
- #define EL_MISMATCH "Start End Mismatch: "
42
- #define INV_ELEMENT "Invalid Element: "
43
-
44
- #define UTF8_STR "UTF-8"
45
-
46
- static void sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, SaxOptions options);
47
- static void parse(SaxDrive dr);
34
+ #define BAD_BOM "Bad BOM: "
35
+ #define NO_TERM "Not Terminated: "
36
+ #define INVALID_FORMAT "Invalid Format: "
37
+ #define CASE_ERROR "Case Error: "
38
+ #define OUT_OF_ORDER "Out of Order: "
39
+ #define WRONG_CHAR "Unexpected Character: "
40
+ #define EL_MISMATCH "Start End Mismatch: "
41
+ #define INV_ELEMENT "Invalid Element: "
42
+
43
+ #define UTF8_STR "UTF-8"
44
+
45
+ static void sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, SaxOptions options);
46
+ static void parse(SaxDrive dr);
48
47
  // All read functions should return the next character after the 'thing' that was read and leave dr->cur one after that.
49
- static char read_instruction(SaxDrive dr);
50
- static char read_doctype(SaxDrive dr);
51
- static char read_cdata(SaxDrive dr);
52
- static char read_comment(SaxDrive dr);
53
- static char read_element_start(SaxDrive dr);
54
- static char read_element_end(SaxDrive dr);
55
- static char read_text(SaxDrive dr);
56
- static char read_jump(SaxDrive dr, const char *pat);
57
- static char read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req, Hint h);
58
- static char read_name_token(SaxDrive dr);
59
- static char read_quoted_value(SaxDrive dr);
60
-
61
- static void end_element_cb(SaxDrive dr, VALUE name, long pos, long line, long col, Hint h);
62
-
63
- static void hint_clear_empty(SaxDrive dr);
64
- static Nv hint_try_close(SaxDrive dr, const char *name);
65
-
66
- VALUE ox_sax_value_class = Qnil;
48
+ static char read_instruction(SaxDrive dr);
49
+ static char read_doctype(SaxDrive dr);
50
+ static char read_cdata(SaxDrive dr);
51
+ static char read_comment(SaxDrive dr);
52
+ static char read_element_start(SaxDrive dr);
53
+ static char read_element_end(SaxDrive dr);
54
+ static char read_text(SaxDrive dr);
55
+ static char read_jump(SaxDrive dr, const char *pat);
56
+ static char read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req, Hint h);
57
+ static char read_name_token(SaxDrive dr);
58
+ static char read_quoted_value(SaxDrive dr);
59
+
60
+ static void hint_clear_empty(SaxDrive dr);
61
+ static Nv hint_try_close(SaxDrive dr, const char *name);
62
+
63
+ VALUE ox_sax_value_class = Qnil;
67
64
 
68
65
  static VALUE protect_parse(VALUE drp) {
69
66
  parse((SaxDrive)drp);
@@ -71,562 +68,561 @@ static VALUE protect_parse(VALUE drp) {
71
68
  return Qnil;
72
69
  }
73
70
 
74
- #if HAVE_RB_ENC_ASSOCIATE
75
- static int
76
- str_is_ascii(const char *s) {
77
- for (; '\0' != *s; s++) {
78
- if (*s < ' ' || '~' < *s) {
79
- return 0;
80
- }
81
- }
82
- return 1;
83
- }
84
- #endif
85
-
86
71
  VALUE
87
- str2sym(SaxDrive dr, const char *str, const char **strp) {
88
- VALUE *slot;
89
- VALUE sym;
72
+ str2sym(SaxDrive dr, const char *str, size_t len, const char **strp) {
73
+ VALUE sym;
90
74
 
91
75
  if (dr->options.symbolize) {
92
- if (Qundef == (sym = ox_cache_get(ox_symbol_cache, str, &slot, strp))) {
93
- #if HAVE_RB_ENC_ASSOCIATE
94
- if (0 != dr->encoding && !str_is_ascii(str)) {
95
- VALUE rstr = rb_str_new2(str);
96
-
97
- // TBD if sym can be pinned down then use this all the time
98
- rb_enc_associate(rstr, dr->encoding);
99
- sym = rb_funcall(rstr, ox_to_sym_id, 0);
100
- *slot = Qundef;
101
- } else {
102
- sym = ID2SYM(rb_intern(str));
103
- *slot = sym;
104
- }
105
- #else
106
- sym = ID2SYM(rb_intern(str));
107
- *slot = sym;
108
- #endif
109
- }
76
+ sym = ox_sym_intern(str, len, strp);
110
77
  } else {
111
- sym = rb_str_new2(str);
112
- #if HAVE_RB_ENC_ASSOCIATE
113
- if (0 != dr->encoding) {
114
- rb_enc_associate(sym, dr->encoding);
115
- }
116
- #endif
117
- if (0 != strp) {
118
- *strp = StringValuePtr(sym);
119
- }
78
+ sym = dr->get_name(str, len, dr->encoding, strp);
120
79
  }
121
80
  return sym;
122
81
  }
123
82
 
124
- void
125
- ox_sax_parse(VALUE handler, VALUE io, SaxOptions options) {
83
+ void ox_sax_parse(VALUE handler, VALUE io, SaxOptions options) {
126
84
  #if HAVE_RB_EXT_RACTOR_SAFE
127
85
  rb_ext_ractor_safe(true);
128
86
  #endif
129
- struct _saxDrive dr;
130
- int line = 0;
87
+ struct _saxDrive dr;
88
+ int line = 0;
131
89
 
132
90
  sax_drive_init(&dr, handler, io, options);
133
- #if 0
134
- printf("*** sax_parse with these flags\n");
135
- printf(" has_instruct = %s\n", dr.has.instruct ? "true" : "false");
136
- printf(" has_end_instruct = %s\n", dr.has.end_instruct ? "true" : "false");
137
- printf(" has_attr = %s\n", dr.has.attr ? "true" : "false");
138
- printf(" has_attr_value = %s\n", dr.has.attr_value ? "true" : "false");
139
- printf(" has_attrs_done = %s\n", dr.has.attrs_done ? "true" : "false");
140
- printf(" has_doctype = %s\n", dr.has.doctype ? "true" : "false");
141
- printf(" has_comment = %s\n", dr.has.comment ? "true" : "false");
142
- printf(" has_cdata = %s\n", dr.has.cdata ? "true" : "false");
143
- printf(" has_text = %s\n", dr.has.text ? "true" : "false");
144
- printf(" has_value = %s\n", dr.has.value ? "true" : "false");
145
- printf(" has_start_element = %s\n", dr.has.start_element ? "true" : "false");
146
- printf(" has_end_element = %s\n", dr.has.end_element ? "true" : "false");
147
- printf(" has_error = %s\n", dr.has.error ? "true" : "false");
148
- printf(" has_pos = %s\n", dr.has.pos ? "true" : "false");
149
- printf(" has_line = %s\n", dr.has.line ? "true" : "false");
150
- printf(" has_column = %s\n", dr.has.column ? "true" : "false");
151
- #endif
152
- //parse(&dr);
153
91
  rb_protect(protect_parse, (VALUE)&dr, &line);
154
92
  ox_sax_drive_cleanup(&dr);
155
93
  if (0 != line) {
156
- rb_jump_tag(line);
94
+ rb_jump_tag(line);
95
+ }
96
+ }
97
+
98
+ static void set_long_noop(VALUE handler, long pos) {
99
+ }
100
+
101
+ static void set_pos(VALUE handler, long pos) {
102
+ rb_ivar_set(handler, ox_at_pos_id, LONG2NUM(pos));
103
+ }
104
+
105
+ static void set_line(VALUE handler, long line) {
106
+ rb_ivar_set(handler, ox_at_line_id, LONG2NUM(line));
107
+ }
108
+
109
+ static void set_col(VALUE handler, long col) {
110
+ rb_ivar_set(handler, ox_at_column_id, LONG2NUM(col));
111
+ }
112
+
113
+ static void attr_noop(SaxDrive dr, VALUE name, char *value, long pos, long line, long col) {
114
+ }
115
+
116
+ static void attr_text(SaxDrive dr, VALUE name, char *value, long pos, long line, long col) {
117
+ VALUE args[2];
118
+
119
+ args[0] = name;
120
+ if (dr->options.convert_special) {
121
+ ox_sax_collapse_special(dr, value, pos, line, col);
122
+ }
123
+ args[1] = rb_str_new2(value);
124
+ if (0 != dr->encoding) {
125
+ rb_enc_associate(args[1], dr->encoding);
126
+ }
127
+ dr->set_pos(dr->handler, pos);
128
+ dr->set_line(dr->handler, line);
129
+ dr->set_col(dr->handler, col);
130
+ rb_funcall2(dr->handler, ox_attr_id, 2, args);
131
+ }
132
+
133
+ static void attr_value(SaxDrive dr, VALUE name, char *value, long pos, long line, long col) {
134
+ VALUE args[2];
135
+
136
+ dr->set_pos(dr->handler, pos);
137
+ dr->set_line(dr->handler, line);
138
+ dr->set_col(dr->handler, col);
139
+ args[0] = name;
140
+ args[1] = dr->value_obj;
141
+ rb_funcall2(dr->handler, ox_attr_value_id, 2, args);
142
+ }
143
+
144
+ static void attrs_done_noop(VALUE handler) {
145
+ }
146
+
147
+ static void attrs_done(VALUE handler) {
148
+ rb_funcall(handler, ox_attrs_done_id, 0);
149
+ }
150
+
151
+ static VALUE instruct_noop(SaxDrive dr, const char *target, long pos, long line, long col) {
152
+ return Qnil;
153
+ }
154
+
155
+ static VALUE instruct(SaxDrive dr, const char *target, long pos, long line, long col) {
156
+ VALUE arg = rb_str_new2(target);
157
+
158
+ dr->set_pos(dr->handler, pos);
159
+ dr->set_line(dr->handler, line);
160
+ dr->set_col(dr->handler, col);
161
+ rb_funcall(dr->handler, ox_instruct_id, 1, arg);
162
+
163
+ return arg;
164
+ }
165
+
166
+ static VALUE instruct_just_value(SaxDrive dr, const char *target, long pos, long line, long col) {
167
+ return rb_str_new2(target);
168
+ }
169
+
170
+ static void end_instruct_noop(SaxDrive dr, VALUE target, long pos, long line, long col) {
171
+ }
172
+
173
+ static void end_instruct(SaxDrive dr, VALUE target, long pos, long line, long col) {
174
+ dr->set_pos(dr->handler, pos);
175
+ dr->set_line(dr->handler, line);
176
+ dr->set_col(dr->handler, col);
177
+ rb_funcall(dr->handler, ox_end_instruct_id, 1, target);
178
+ }
179
+
180
+ static void dr_loc_noop(SaxDrive dr, long pos, long line, long col) {
181
+ }
182
+
183
+ static void comment(SaxDrive dr, long pos, long line, long col) {
184
+ if (!dr->blocked) {
185
+ Nv parent = stack_peek(&dr->stack);
186
+ Hint h = ox_hint_find(dr->options.hints, "!--");
187
+
188
+ if (NULL == parent || NULL == parent->hint || OffOverlay != parent->hint->overlay ||
189
+ (NULL != h && (ActiveOverlay == h->overlay || ActiveOverlay == h->overlay))) {
190
+ VALUE arg = rb_str_new2(dr->buf.str);
191
+
192
+ if (0 != dr->encoding) {
193
+ rb_enc_associate(arg, dr->encoding);
194
+ }
195
+ dr->set_pos(dr->handler, pos);
196
+ dr->set_line(dr->handler, line);
197
+ dr->set_col(dr->handler, col);
198
+ rb_funcall(dr->handler, ox_comment_id, 1, arg);
199
+ }
200
+ }
201
+ }
202
+
203
+ static void cdata(SaxDrive dr, long pos, long line, long col) {
204
+ Nv parent = stack_peek(&dr->stack);
205
+
206
+ if (!dr->blocked && (NULL == parent || NULL == parent->hint || OffOverlay != parent->hint->overlay)) {
207
+ VALUE arg = rb_str_new2(dr->buf.str);
208
+
209
+ if (0 != dr->encoding) {
210
+ rb_enc_associate(arg, dr->encoding);
211
+ }
212
+ dr->set_pos(dr->handler, pos);
213
+ dr->set_line(dr->handler, line);
214
+ dr->set_col(dr->handler, col);
215
+ rb_funcall(dr->handler, ox_cdata_id, 1, arg);
216
+ }
217
+ }
218
+
219
+ static void doctype(SaxDrive dr, long pos, long line, long col) {
220
+ dr->set_pos(dr->handler, pos);
221
+ dr->set_line(dr->handler, line);
222
+ dr->set_col(dr->handler, col);
223
+ rb_funcall(dr->handler, ox_doctype_id, 1, rb_str_new2(dr->buf.str));
224
+ }
225
+
226
+ static void error_noop(SaxDrive dr, const char *msg, long pos, long line, long col) {
227
+ }
228
+
229
+ static void error(SaxDrive dr, const char *msg, long pos, long line, long col) {
230
+ VALUE args[3];
231
+
232
+ args[0] = rb_str_new2(msg);
233
+ args[1] = LONG2NUM(line);
234
+ args[2] = LONG2NUM(col);
235
+ dr->set_pos(dr->handler, pos);
236
+ dr->set_line(dr->handler, line);
237
+ dr->set_col(dr->handler, col);
238
+ rb_funcall2(dr->handler, ox_error_id, 3, args);
239
+ }
240
+
241
+ static void end_element_cb(SaxDrive dr, VALUE name, long pos, long line, long col, Hint h) {
242
+ if (dr->has_end_element && 0 >= dr->blocked &&
243
+ (NULL == h || ActiveOverlay == h->overlay || NestOverlay == h->overlay)) {
244
+ dr->set_pos(dr->handler, pos);
245
+ dr->set_line(dr->handler, line);
246
+ dr->set_col(dr->handler, col);
247
+ rb_funcall(dr->handler, ox_end_element_id, 1, name);
248
+ }
249
+ if (NULL != h && BlockOverlay == h->overlay && 0 < dr->blocked) {
250
+ dr->blocked--;
157
251
  }
158
252
  }
159
253
 
160
- static void
161
- sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, SaxOptions options) {
254
+ static void sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, SaxOptions options) {
162
255
  ox_sax_buf_init(&dr->buf, io);
163
256
  dr->buf.dr = dr;
164
257
  stack_init(&dr->stack);
165
- dr->handler = handler;
258
+ dr->handler = handler;
166
259
  dr->value_obj = Data_Wrap_Struct(ox_sax_value_class, 0, 0, dr);
167
260
  rb_gc_register_address(&dr->value_obj);
168
261
  dr->options = *options;
169
- dr->err = 0;
262
+ dr->err = 0;
170
263
  dr->blocked = 0;
171
- dr->abort = false;
172
- has_init(&dr->has, handler);
173
- #if HAVE_RB_ENC_FIND
174
- if ('\0' == *ox_default_options.encoding) {
175
- VALUE encoding;
264
+ dr->abort = false;
265
+
266
+ dr->set_pos = (Qtrue == rb_ivar_defined(handler, ox_at_pos_id)) ? set_pos : set_long_noop;
267
+ dr->set_line = (Qtrue == rb_ivar_defined(handler, ox_at_line_id)) ? set_line : set_long_noop;
268
+ dr->set_col = (Qtrue == rb_ivar_defined(handler, ox_at_column_id)) ? set_col : set_long_noop;
269
+ if (rb_respond_to(handler, ox_attr_value_id)) {
270
+ dr->attr_cb = attr_value;
271
+ dr->want_attr_name = true;
272
+ } else if (rb_respond_to(handler, ox_attr_id)) {
273
+ dr->attr_cb = attr_text;
274
+ dr->want_attr_name = true;
275
+ } else {
276
+ dr->attr_cb = attr_noop;
277
+ dr->want_attr_name = false;
278
+ }
279
+ dr->attrs_done = rb_respond_to(handler, ox_attrs_done_id) ? attrs_done : attrs_done_noop;
280
+ dr->instruct = rb_respond_to(handler, ox_instruct_id) ? instruct : instruct_noop;
281
+ dr->end_instruct = rb_respond_to(handler, ox_end_instruct_id) ? end_instruct : end_instruct_noop;
282
+ if (rb_respond_to(handler, ox_end_instruct_id) && !rb_respond_to(handler, ox_instruct_id)) {
283
+ dr->instruct = instruct_just_value;
284
+ }
285
+ dr->doctype = rb_respond_to(handler, ox_doctype_id) ? doctype : dr_loc_noop;
286
+ dr->comment = rb_respond_to(handler, ox_comment_id) ? comment : dr_loc_noop;
287
+ dr->cdata = rb_respond_to(handler, ox_cdata_id) ? cdata : dr_loc_noop;
288
+ dr->error = rb_respond_to(handler, ox_error_id) ? error : error_noop;
176
289
 
177
- dr->encoding = 0;
178
- if (rb_respond_to(io, ox_external_encoding_id) && Qnil != (encoding = rb_funcall(io, ox_external_encoding_id, 0))) {
179
- int e = rb_enc_get_index(encoding);
180
- if (0 <= e) {
181
- dr->encoding = rb_enc_from_index(e);
182
- }
183
- }
290
+ dr->has_text = rb_respond_to(handler, ox_text_id);
291
+ dr->has_value = rb_respond_to(handler, ox_value_id);
292
+ dr->has_start_element = rb_respond_to(handler, ox_start_element_id);
293
+ dr->has_end_element = rb_respond_to(handler, ox_end_element_id);
294
+
295
+ if ('\0' == *ox_default_options.encoding) {
296
+ VALUE encoding;
297
+
298
+ dr->encoding = 0;
299
+ if (rb_respond_to(io, ox_external_encoding_id) &&
300
+ Qnil != (encoding = rb_funcall(io, ox_external_encoding_id, 0))) {
301
+ int e = rb_enc_get_index(encoding);
302
+ if (0 <= e) {
303
+ dr->encoding = rb_enc_from_index(e);
304
+ }
305
+ }
184
306
  } else {
185
307
  dr->encoding = rb_enc_find(ox_default_options.encoding);
186
308
  }
187
- #else
188
- dr->encoding = 0;
189
- #endif
309
+ dr->utf8 = (NULL == dr->encoding || rb_utf8_encoding() == dr->encoding);
310
+ if (NULL == dr->encoding || rb_utf8_encoding() == dr->encoding) { // UTF-8
311
+ dr->get_name = dr->options.symbolize ? ox_utf8_sym : ox_utf8_name; // TBD UTF8 sym?
312
+ } else {
313
+ dr->get_name = dr->options.symbolize ? ox_enc_sym : ox_enc_name;
314
+ }
190
315
  }
191
316
 
192
- void
193
- ox_sax_drive_cleanup(SaxDrive dr) {
317
+ void ox_sax_drive_cleanup(SaxDrive dr) {
194
318
  rb_gc_unregister_address(&dr->value_obj);
195
319
  buf_cleanup(&dr->buf);
196
320
  stack_cleanup(&dr->stack);
197
321
  }
198
322
 
199
- static void
200
- ox_sax_drive_error_at(SaxDrive dr, const char *msg, off_t pos, off_t line, off_t col) {
201
- if (dr->has.error) {
202
- VALUE args[3];
203
-
204
- args[0] = rb_str_new2(msg);
205
- args[1] = LONG2NUM(line);
206
- args[2] = LONG2NUM(col);
207
- if (dr->has.pos) {
208
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
209
- }
210
- if (dr->has.pos) {
211
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
212
- }
213
- if (dr->has.line) {
214
- rb_ivar_set(dr->handler, ox_at_line_id, args[1]);
215
- }
216
- if (dr->has.column) {
217
- rb_ivar_set(dr->handler, ox_at_column_id, args[2]);
218
- }
219
- rb_funcall2(dr->handler, ox_error_id, 3, args);
220
- }
323
+ static void ox_sax_drive_error_at(SaxDrive dr, const char *msg, off_t pos, off_t line, off_t col) {
324
+ dr->error(dr, msg, pos, line, col);
221
325
  }
222
326
 
223
- void
224
- ox_sax_drive_error(SaxDrive dr, const char *msg) {
327
+ void ox_sax_drive_error(SaxDrive dr, const char *msg) {
225
328
  ox_sax_drive_error_at(dr, msg, dr->buf.pos, dr->buf.line, dr->buf.col);
226
329
  }
227
330
 
228
- static char
229
- skipBOM(SaxDrive dr) {
230
- char c = buf_get(&dr->buf);
331
+ static char skipBOM(SaxDrive dr) {
332
+ char c = buf_get(&dr->buf);
231
333
 
232
334
  if (0xEF == (uint8_t)c) { /* only UTF8 is supported */
233
- if (0xBB == (uint8_t)buf_get(&dr->buf) && 0xBF == (uint8_t)buf_get(&dr->buf)) {
234
- #if HAVE_RB_ENC_FIND
235
- dr->encoding = ox_utf8_encoding;
236
- #else
237
- dr->encoding = UTF8_STR;
238
- #endif
239
- c = buf_get(&dr->buf);
240
- } else {
241
- ox_sax_drive_error(dr, BAD_BOM "invalid BOM or a binary file.");
242
- c = '\0';
243
- }
335
+ if (0xBB == (uint8_t)buf_get(&dr->buf) && 0xBF == (uint8_t)buf_get(&dr->buf)) {
336
+ dr->encoding = ox_utf8_encoding;
337
+ c = buf_get(&dr->buf);
338
+ } else {
339
+ ox_sax_drive_error(dr, BAD_BOM "invalid BOM or a binary file.");
340
+ c = '\0';
341
+ }
244
342
  }
245
343
  return c;
246
344
  }
247
345
 
248
- static void
249
- parse(SaxDrive dr) {
250
- char c = skipBOM(dr);
251
- int state = START_STATE;
252
- Nv parent;
346
+ static void parse(SaxDrive dr) {
347
+ char c = skipBOM(dr);
348
+ int state = START_STATE;
349
+ Nv parent;
253
350
 
254
351
  while ('\0' != c) {
255
- buf_protect(&dr->buf);
256
- if ('<' == c) {
257
- c = buf_get(&dr->buf);
258
- switch (c) {
259
- case '?': /* instructions (xml or otherwise) */
260
- c = read_instruction(dr);
261
- break;
262
- case '!': /* comment or doctype */
263
- buf_protect(&dr->buf);
264
- c = buf_get(&dr->buf);
265
- if ('\0' == c) {
266
- ox_sax_drive_error(dr, NO_TERM "DOCTYPE or comment not terminated");
267
-
268
- goto DONE;
269
- } else if ('-' == c) {
270
- c = buf_get(&dr->buf); /* skip first - and get next character */
271
- if ('-' != c) {
272
- ox_sax_drive_error(dr, INVALID_FORMAT "bad comment format, expected <!--");
273
- } else {
274
- c = buf_get(&dr->buf); /* skip second - */
275
- }
276
- c = read_comment(dr);
277
- } else {
278
- int i;
279
- int spaced = 0;
280
- off_t pos = dr->buf.pos + 1;
281
- off_t line = dr->buf.line;
282
- off_t col = dr->buf.col + 1;
283
-
284
- if (is_white(c)) {
285
- spaced = 1;
286
- c = buf_next_non_white(&dr->buf);
287
- }
288
- dr->buf.str = dr->buf.tail - 1;
289
- for (i = 7; 0 < i; i--) {
290
- c = buf_get(&dr->buf);
291
- }
292
- if (0 == strncmp("DOCTYPE", dr->buf.str, 7)) {
293
- if (spaced) {
294
- ox_sax_drive_error_at(dr, WRONG_CHAR "<!DOCTYPE can not included spaces", pos, line, col);
295
- }
296
- if (START_STATE != state) {
297
- ox_sax_drive_error(dr, OUT_OF_ORDER "DOCTYPE can not come after an element");
298
- }
299
- c = read_doctype(dr);
300
- } else if (0 == strncasecmp("DOCTYPE", dr->buf.str, 7)) {
301
- if (!dr->options.smart) {
302
- ox_sax_drive_error(dr, CASE_ERROR "expected DOCTYPE all in caps");
303
- }
304
- if (START_STATE != state) {
305
- ox_sax_drive_error(dr, OUT_OF_ORDER "DOCTYPE can not come after an element");
306
- }
307
- c = read_doctype(dr);
308
- } else if (0 == strncmp("[CDATA[", dr->buf.str, 7)) {
309
- if (spaced) {
310
- ox_sax_drive_error_at(dr, WRONG_CHAR "<![CDATA[ can not included spaces", pos, line, col);
311
- }
312
- c = read_cdata(dr);
313
- } else if (0 == strncasecmp("[CDATA[", dr->buf.str, 7)) {
314
- if (!dr->options.smart) {
315
- ox_sax_drive_error(dr, CASE_ERROR "expected CDATA all in caps");
316
- }
317
- c = read_cdata(dr);
318
- } else {
319
- Nv parent = stack_peek(&dr->stack);
320
-
321
- if (0 != parent) {
322
- parent->childCnt++;
323
- }
324
- ox_sax_drive_error_at(dr, WRONG_CHAR "DOCTYPE, CDATA, or comment expected", pos, line, col);
325
- c = read_name_token(dr);
326
- if ('>' == c) {
327
- c = buf_get(&dr->buf);
328
- }
329
- }
330
- }
331
- break;
332
- case '/': /* element end */
333
- parent = stack_peek(&dr->stack);
334
- if (0 != parent && 0 == parent->childCnt && dr->has.text && !dr->blocked) {
335
- VALUE args[1];
336
- off_t pos = dr->buf.pos;
337
- off_t line = dr->buf.line;
338
- off_t col = dr->buf.col - 1;
339
-
340
- args[0] = rb_str_new2("");
341
- #if HAVE_RB_ENC_ASSOCIATE
342
- if (0 != dr->encoding) {
343
- rb_enc_associate(args[0], dr->encoding);
344
- }
345
- #endif
346
- if (dr->has.pos) {
347
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
348
- }
349
- if (dr->has.line) {
350
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
351
- }
352
- if (dr->has.column) {
353
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
354
- }
355
- rb_funcall2(dr->handler, ox_text_id, 1, args);
356
- }
357
- c = read_element_end(dr);
358
- if (0 == stack_peek(&dr->stack)) {
359
- state = AFTER_STATE;
360
- }
361
- break;
362
- case '\0':
363
- goto DONE;
364
- default:
365
- buf_backup(&dr->buf);
366
- if (AFTER_STATE == state) {
367
- ox_sax_drive_error(dr, OUT_OF_ORDER "multiple top level elements");
368
- }
369
- state = BODY_STATE;
370
- c = read_element_start(dr);
371
- if (0 == stack_peek(&dr->stack)) {
372
- state = AFTER_STATE;
373
- }
374
- break;
375
- }
376
- } else {
377
- buf_reset(&dr->buf);
378
- c = read_text(dr);
379
- }
352
+ buf_protect(&dr->buf);
353
+ if ('<' == c) {
354
+ c = buf_get(&dr->buf);
355
+ switch (c) {
356
+ case '?': /* instructions (xml or otherwise) */ c = read_instruction(dr); break;
357
+ case '!': /* comment or doctype */
358
+ buf_protect(&dr->buf);
359
+ c = buf_get(&dr->buf);
360
+ if ('\0' == c) {
361
+ ox_sax_drive_error(dr, NO_TERM "DOCTYPE or comment not terminated");
362
+
363
+ goto DONE;
364
+ } else if ('-' == c) {
365
+ c = buf_get(&dr->buf); /* skip first - and get next character */
366
+ if ('-' != c) {
367
+ ox_sax_drive_error(dr, INVALID_FORMAT "bad comment format, expected <!--");
368
+ } else {
369
+ c = buf_get(&dr->buf); /* skip second - */
370
+ }
371
+ c = read_comment(dr);
372
+ } else {
373
+ int i;
374
+ int spaced = 0;
375
+ off_t pos = dr->buf.pos + 1;
376
+ off_t line = dr->buf.line;
377
+ off_t col = dr->buf.col + 1;
378
+
379
+ if (is_white(c)) {
380
+ spaced = 1;
381
+ c = buf_next_non_white(&dr->buf);
382
+ }
383
+ dr->buf.str = dr->buf.tail - 1;
384
+ for (i = 7; 0 < i; i--) {
385
+ c = buf_get(&dr->buf);
386
+ }
387
+ if (0 == strncmp("DOCTYPE", dr->buf.str, 7)) {
388
+ if (spaced) {
389
+ ox_sax_drive_error_at(dr, WRONG_CHAR "<!DOCTYPE can not included spaces", pos, line, col);
390
+ }
391
+ if (START_STATE != state) {
392
+ ox_sax_drive_error(dr, OUT_OF_ORDER "DOCTYPE can not come after an element");
393
+ }
394
+ c = read_doctype(dr);
395
+ } else if (0 == strncasecmp("DOCTYPE", dr->buf.str, 7)) {
396
+ if (!dr->options.smart) {
397
+ ox_sax_drive_error(dr, CASE_ERROR "expected DOCTYPE all in caps");
398
+ }
399
+ if (START_STATE != state) {
400
+ ox_sax_drive_error(dr, OUT_OF_ORDER "DOCTYPE can not come after an element");
401
+ }
402
+ c = read_doctype(dr);
403
+ } else if (0 == strncmp("[CDATA[", dr->buf.str, 7)) {
404
+ if (spaced) {
405
+ ox_sax_drive_error_at(dr, WRONG_CHAR "<![CDATA[ can not included spaces", pos, line, col);
406
+ }
407
+ c = read_cdata(dr);
408
+ } else if (0 == strncasecmp("[CDATA[", dr->buf.str, 7)) {
409
+ if (!dr->options.smart) {
410
+ ox_sax_drive_error(dr, CASE_ERROR "expected CDATA all in caps");
411
+ }
412
+ c = read_cdata(dr);
413
+ } else {
414
+ Nv parent = stack_peek(&dr->stack);
415
+
416
+ if (0 != parent) {
417
+ parent->childCnt++;
418
+ }
419
+ ox_sax_drive_error_at(dr, WRONG_CHAR "DOCTYPE, CDATA, or comment expected", pos, line, col);
420
+ c = read_name_token(dr);
421
+ if ('>' == c) {
422
+ c = buf_get(&dr->buf);
423
+ }
424
+ }
425
+ }
426
+ break;
427
+ case '/': /* element end */
428
+ parent = stack_peek(&dr->stack);
429
+ if (0 != parent && 0 == parent->childCnt && dr->has_text && !dr->blocked) {
430
+ VALUE args[1];
431
+ args[0] = rb_str_new2("");
432
+ if (0 != dr->encoding) {
433
+ rb_enc_associate(args[0], dr->encoding);
434
+ }
435
+ dr->set_pos(dr->handler, dr->buf.pos);
436
+ dr->set_line(dr->handler, dr->buf.line);
437
+ dr->set_col(dr->handler, dr->buf.col);
438
+ rb_funcall2(dr->handler, ox_text_id, 1, args);
439
+ }
440
+ c = read_element_end(dr);
441
+ if (0 == stack_peek(&dr->stack)) {
442
+ state = AFTER_STATE;
443
+ }
444
+ break;
445
+ case '\0': goto DONE;
446
+ default:
447
+ buf_backup(&dr->buf);
448
+ if (AFTER_STATE == state) {
449
+ ox_sax_drive_error(dr, OUT_OF_ORDER "multiple top level elements");
450
+ }
451
+ state = BODY_STATE;
452
+ c = read_element_start(dr);
453
+ if (0 == stack_peek(&dr->stack)) {
454
+ state = AFTER_STATE;
455
+ }
456
+ break;
457
+ }
458
+ } else {
459
+ buf_reset(&dr->buf);
460
+ c = read_text(dr);
461
+ }
380
462
  }
381
- DONE:
463
+ DONE:
382
464
  if (dr->abort) {
383
- return;
465
+ return;
384
466
  }
385
467
  if (dr->stack.head < dr->stack.tail) {
386
- char msg[256];
387
- Nv sp;
388
-
389
- if (dr->has.pos) {
390
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(dr->buf.pos));
391
- }
392
- if (dr->has.line) {
393
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(dr->buf.line));
394
- }
395
- if (dr->has.column) {
396
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(dr->buf.col));
397
- }
398
- for (sp = dr->stack.tail - 1; dr->stack.head <= sp; sp--) {
399
- snprintf(msg, sizeof(msg) - 1, "%selement '%s' not closed", EL_MISMATCH, sp->name);
400
- ox_sax_drive_error_at(dr, msg, dr->buf.pos, dr->buf.line, dr->buf.col);
401
- if (dr->has.end_element && 0 >= dr->blocked &&
402
- (NULL == sp->hint || ActiveOverlay == sp->hint->overlay || NestOverlay == sp->hint->overlay)) {
403
- VALUE args[1];
404
-
405
- args[0] = sp->val;
406
- rb_funcall2(dr->handler, ox_end_element_id, 1, args);
407
- }
408
- if (dr->blocked && NULL != sp->hint && BlockOverlay == sp->hint->overlay) {
409
- dr->blocked--;
410
- }
468
+ char msg[256];
469
+ Nv sp;
470
+
471
+ for (sp = dr->stack.tail - 1; dr->stack.head <= sp; sp--) {
472
+ snprintf(msg, sizeof(msg) - 1, "%selement '%s' not closed", EL_MISMATCH, sp->name);
473
+ ox_sax_drive_error_at(dr, msg, dr->buf.pos, dr->buf.line, dr->buf.col);
474
+ end_element_cb(dr, sp->val, dr->buf.pos, dr->buf.line, dr->buf.col, sp->hint);
411
475
  }
412
476
  }
413
477
  }
414
478
 
415
- static void
416
- read_content(SaxDrive dr, char *content, size_t len) {
417
- char c;
418
- char *end = content + len;
479
+ static void read_content(SaxDrive dr, char *content, size_t len) {
480
+ char c;
481
+ char *end = content + len;
419
482
 
420
483
  while ('\0' != (c = buf_get(&dr->buf))) {
421
- if (end <= content) {
422
- *content = '\0';
423
- ox_sax_drive_error(dr, "processing instruction content too large");
424
- return;
425
- }
426
- if ('?' == c) {
427
- if ('\0' == (c = buf_get(&dr->buf))) {
428
- ox_sax_drive_error(dr, NO_TERM "document not terminated");
429
- }
430
- if ('>' == c) {
431
- *content = '\0';
432
- return;
433
- } else {
434
- *content++ = c;
435
- }
436
- } else {
437
- *content++ = c;
438
- }
484
+ if (end <= content) {
485
+ *content = '\0';
486
+ ox_sax_drive_error(dr, "processing instruction content too large");
487
+ return;
488
+ }
489
+ if ('?' == c) {
490
+ if ('\0' == (c = buf_get(&dr->buf))) {
491
+ ox_sax_drive_error(dr, NO_TERM "document not terminated");
492
+ }
493
+ if ('>' == c) {
494
+ *content = '\0';
495
+ return;
496
+ } else {
497
+ *content++ = c;
498
+ }
499
+ } else {
500
+ *content++ = c;
501
+ }
439
502
  }
440
503
  *content = '\0';
441
504
  }
442
505
 
443
506
  /* Entered after the "<?" sequence. Ready to read the rest.
444
507
  */
445
- static char
446
- read_instruction(SaxDrive dr) {
447
- char content[4096];
448
- char c;
449
- int coff;
450
- VALUE target = Qnil;
451
- int is_xml;
452
- off_t pos = dr->buf.pos - 1;
453
- off_t line = dr->buf.line;
454
- off_t col = dr->buf.col - 1;
508
+ static char read_instruction(SaxDrive dr) {
509
+ char content[4096];
510
+ char c;
511
+ int coff;
512
+ VALUE target = Qnil;
513
+ int is_xml;
514
+ off_t pos = dr->buf.pos - 1;
515
+ off_t line = dr->buf.line;
516
+ off_t col = dr->buf.col - 1;
455
517
 
456
518
  buf_protect(&dr->buf);
457
519
  if ('\0' == (c = read_name_token(dr))) {
458
520
  return c;
459
521
  }
460
522
  is_xml = (0 == (dr->options.smart ? strcasecmp("xml", dr->buf.str) : strcmp("xml", dr->buf.str)));
461
- if (dr->has.instruct || dr->has.end_instruct) {
462
- target = rb_str_new2(dr->buf.str);
463
- }
464
- if (dr->has.instruct) {
465
- VALUE args[1];
466
-
467
- if (dr->has.pos) {
468
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
469
- }
470
- if (dr->has.line) {
471
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
472
- }
473
- if (dr->has.column) {
474
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
475
- }
476
- args[0] = target;
477
- rb_funcall2(dr->handler, ox_instruct_id, 1, args);
478
- }
523
+
524
+ target = dr->instruct(dr, dr->buf.str, pos, line, col);
479
525
  buf_protect(&dr->buf);
480
- pos = dr->buf.pos;
526
+ pos = dr->buf.pos;
481
527
  line = dr->buf.line;
482
- col = dr->buf.col;
528
+ col = dr->buf.col;
483
529
  read_content(dr, content, sizeof(content) - 1);
484
530
  coff = (int)(dr->buf.tail - dr->buf.head);
485
531
  buf_reset(&dr->buf);
486
532
  dr->err = 0;
487
- c = read_attrs(dr, c, '?', '?', is_xml, 1, NULL);
488
- if (dr->has.attrs_done) {
489
- rb_funcall(dr->handler, ox_attrs_done_id, 0);
490
- }
533
+ c = read_attrs(dr, c, '?', '?', is_xml, 1, NULL);
534
+ dr->attrs_done(dr->handler);
491
535
  if (dr->err) {
492
- if (dr->has.text) {
493
- VALUE args[1];
536
+ if (dr->has_text) {
537
+ VALUE args[1];
494
538
 
495
- if (dr->options.convert_special) {
496
- ox_sax_collapse_special(dr, content, (int)pos, (int)line, (int)col);
497
- }
498
- args[0] = rb_str_new2(content);
499
- #if HAVE_RB_ENC_ASSOCIATE
500
- if (0 != dr->encoding) {
501
- rb_enc_associate(args[0], dr->encoding);
502
- }
503
- #endif
504
- if (dr->has.line) {
505
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
506
- }
507
- if (dr->has.pos) {
508
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
509
- }
510
- if (dr->has.column) {
511
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
512
- }
513
- rb_funcall2(dr->handler, ox_text_id, 1, args);
514
- }
515
- dr->buf.tail = dr->buf.head + coff;
516
- c = buf_get(&dr->buf);
539
+ if (dr->options.convert_special) {
540
+ ox_sax_collapse_special(dr, content, (int)pos, (int)line, (int)col);
541
+ }
542
+ args[0] = rb_str_new2(content);
543
+ if (0 != dr->encoding) {
544
+ rb_enc_associate(args[0], dr->encoding);
545
+ }
546
+ dr->set_pos(dr->handler, pos);
547
+ dr->set_line(dr->handler, line);
548
+ dr->set_col(dr->handler, col);
549
+ rb_funcall2(dr->handler, ox_text_id, 1, args);
550
+ }
551
+ dr->buf.tail = dr->buf.head + coff;
552
+ c = buf_get(&dr->buf);
517
553
  } else {
518
- pos = dr->buf.pos;
519
- line = dr->buf.line;
520
- col = dr->buf.col;
521
- c = buf_next_non_white(&dr->buf);
522
- if ('>' == c) {
523
- c = buf_get(&dr->buf);
524
- } else {
525
- ox_sax_drive_error_at(dr, NO_TERM "instruction not terminated", pos, line, col);
526
- if ('>' == c) {
527
- c = buf_get(&dr->buf);
528
- }
529
- }
530
- }
531
- if (dr->has.end_instruct) {
532
- VALUE args[1];
533
-
534
- if (dr->has.pos) {
535
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
536
- }
537
- if (dr->has.line) {
538
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
539
- }
540
- if (dr->has.column) {
541
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
542
- }
543
- args[0] = target;
544
- rb_funcall2(dr->handler, ox_end_instruct_id, 1, args);
554
+ pos = dr->buf.pos;
555
+ line = dr->buf.line;
556
+ col = dr->buf.col;
557
+ c = buf_next_non_white(&dr->buf);
558
+ if ('>' == c) {
559
+ c = buf_get(&dr->buf);
560
+ } else {
561
+ ox_sax_drive_error_at(dr, NO_TERM "instruction not terminated", pos, line, col);
562
+ if ('>' == c) {
563
+ c = buf_get(&dr->buf);
564
+ }
565
+ }
545
566
  }
546
- dr->buf.str = 0;
567
+ dr->end_instruct(dr, target, pos, line, col);
568
+ dr->buf.str = NULL;
547
569
 
548
570
  return c;
549
571
  }
550
572
 
551
- static char
552
- read_delimited(SaxDrive dr, char end) {
553
- char c;
573
+ static char read_delimited(SaxDrive dr, char end) {
574
+ char c;
554
575
 
555
576
  if ('"' == end || '\'' == end) {
556
- while (end != (c = buf_get(&dr->buf))) {
557
- if ('\0' == c) {
558
- ox_sax_drive_error(dr, NO_TERM "doctype not terminated");
559
- return c;
560
- }
561
- }
577
+ while (end != (c = buf_get(&dr->buf))) {
578
+ if ('\0' == c) {
579
+ ox_sax_drive_error(dr, NO_TERM "doctype not terminated");
580
+ return c;
581
+ }
582
+ }
562
583
  } else {
563
- while (1) {
564
- c = buf_get(&dr->buf);
565
- if (end == c) {
566
- return c;
567
- }
568
- switch (c) {
569
- case '\0':
570
- ox_sax_drive_error(dr, NO_TERM "doctype not terminated");
571
- return c;
572
- case '"':
573
- c = read_delimited(dr, c);
574
- break;
575
- case '\'':
576
- c = read_delimited(dr, c);
577
- break;
578
- case '[':
579
- c = read_delimited(dr, ']');
580
- break;
581
- case '<':
582
- c = read_delimited(dr, '>');
583
- break;
584
- default:
585
- break;
586
- }
587
- }
584
+ while (1) {
585
+ c = buf_get(&dr->buf);
586
+ if (end == c) {
587
+ return c;
588
+ }
589
+ switch (c) {
590
+ case '\0': ox_sax_drive_error(dr, NO_TERM "doctype not terminated"); return c;
591
+ case '"': c = read_delimited(dr, c); break;
592
+ case '\'': c = read_delimited(dr, c); break;
593
+ case '[': c = read_delimited(dr, ']'); break;
594
+ case '<': c = read_delimited(dr, '>'); break;
595
+ default: break;
596
+ }
597
+ }
588
598
  }
589
599
  return c;
590
600
  }
591
601
 
592
602
  /* Entered after the "<!DOCTYPE " sequence. Ready to read the rest.
593
603
  */
594
- static char
595
- read_doctype(SaxDrive dr) {
596
- long pos = (long)(dr->buf.pos - 9);
597
- long line = (long)(dr->buf.line);
598
- long col = (long)(dr->buf.col - 9);
599
- char *s;
600
- Nv parent = stack_peek(&dr->stack);
604
+ static char read_doctype(SaxDrive dr) {
605
+ long pos = (long)(dr->buf.pos - 9);
606
+ long line = (long)(dr->buf.line);
607
+ long col = (long)(dr->buf.col - 9);
608
+ char *s;
609
+ Nv parent = stack_peek(&dr->stack);
601
610
 
602
611
  buf_backup(&dr->buf); /* back up to the start in case the doctype is empty */
603
612
  buf_protect(&dr->buf);
604
613
  read_delimited(dr, '>');
605
614
  if (dr->options.smart && 0 == dr->options.hints) {
606
- for (s = dr->buf.str; is_white(*s); s++) { }
607
- if (0 == strncasecmp("HTML", s, 4)) {
608
- dr->options.hints = ox_hints_html();
609
- }
615
+ for (s = dr->buf.str; is_white(*s); s++) {
616
+ }
617
+ if (0 == strncasecmp("HTML", s, 4)) {
618
+ dr->options.hints = ox_hints_html();
619
+ }
610
620
  }
611
621
  *(dr->buf.tail - 1) = '\0';
612
622
  if (0 != parent) {
613
- parent->childCnt++;
614
- }
615
- if (dr->has.doctype) {
616
- VALUE args[1];
617
-
618
- if (dr->has.pos) {
619
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
620
- }
621
- if (dr->has.line) {
622
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
623
- }
624
- if (dr->has.column) {
625
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
626
- }
627
- args[0] = rb_str_new2(dr->buf.str);
628
- rb_funcall2(dr->handler, ox_doctype_id, 1, args);
623
+ parent->childCnt++;
629
624
  }
625
+ dr->doctype(dr, pos, line, col);
630
626
  dr->buf.str = 0;
631
627
 
632
628
  return buf_get(&dr->buf);
@@ -634,89 +630,65 @@ read_doctype(SaxDrive dr) {
634
630
 
635
631
  /* Entered after the "<![CDATA[" sequence. Ready to read the rest.
636
632
  */
637
- static char
638
- read_cdata(SaxDrive dr) {
639
- char c;
640
- char zero = '\0';
641
- int end = 0;
642
- long pos = (long)(dr->buf.pos - 9);
643
- long line = (long)(dr->buf.line);
644
- long col = (long)(dr->buf.col - 9);
645
- struct _checkPt cp = CHECK_PT_INIT;
646
- Nv parent = stack_peek(&dr->stack);
633
+ static char read_cdata(SaxDrive dr) {
634
+ char c;
635
+ char zero = '\0';
636
+ int end = 0;
637
+ long pos = (long)(dr->buf.pos - 9);
638
+ long line = (long)(dr->buf.line);
639
+ long col = (long)(dr->buf.col - 9);
640
+ struct _checkPt cp = CHECK_PT_INIT;
641
+ Nv parent = stack_peek(&dr->stack);
647
642
 
648
643
  // TBD check parent overlay
649
644
  if (0 != parent) {
650
- parent->childCnt++;
645
+ parent->childCnt++;
651
646
  }
652
647
  buf_backup(&dr->buf); /* back up to the start in case the cdata is empty */
653
648
  buf_protect(&dr->buf);
654
649
  while (1) {
655
650
  c = buf_get(&dr->buf);
656
- switch (c) {
657
- case ']':
658
- end++;
659
- break;
660
- case '>':
651
+ switch (c) {
652
+ case ']': end++; break;
653
+ case '>':
661
654
  if (2 <= end) {
662
655
  *(dr->buf.tail - 3) = '\0';
663
- c = buf_get(&dr->buf);
656
+ c = buf_get(&dr->buf);
664
657
  goto CB;
665
658
  }
666
- if (!buf_checkset(&cp)) {
667
- buf_checkpoint(&dr->buf, &cp);
668
- }
659
+ if (!buf_checkset(&cp)) {
660
+ buf_checkpoint(&dr->buf, &cp);
661
+ }
669
662
  end = 0;
670
- break;
671
- case '<':
672
- if (!buf_checkset(&cp)) {
673
- buf_checkpoint(&dr->buf, &cp);
674
- }
675
- end = 0;
676
- break;
677
- case '\0':
678
- if (buf_checkset(&cp)) {
679
- c = buf_checkback(&dr->buf, &cp);
680
- ox_sax_drive_error(dr, NO_TERM "CDATA not terminated");
681
- zero = c;
682
- *(dr->buf.tail - 1) = '\0';
683
- goto CB;
684
- }
663
+ break;
664
+ case '<':
665
+ if (!buf_checkset(&cp)) {
666
+ buf_checkpoint(&dr->buf, &cp);
667
+ }
668
+ end = 0;
669
+ break;
670
+ case '\0':
671
+ if (buf_checkset(&cp)) {
672
+ c = buf_checkback(&dr->buf, &cp);
673
+ ox_sax_drive_error(dr, NO_TERM "CDATA not terminated");
674
+ zero = c;
675
+ *(dr->buf.tail - 1) = '\0';
676
+ goto CB;
677
+ }
685
678
  ox_sax_drive_error(dr, NO_TERM "CDATA not terminated");
686
679
  return '\0';
687
- default:
688
- if (1 < end && !buf_checkset(&cp)) {
689
- buf_checkpoint(&dr->buf, &cp);
690
- }
691
- end = 0;
692
- break;
693
- }
694
- }
695
- CB:
696
- if (!dr->blocked && (NULL == parent || NULL == parent->hint || OffOverlay != parent->hint->overlay)) {
697
- if (dr->has.cdata) {
698
- VALUE args[1];
699
-
700
- args[0] = rb_str_new2(dr->buf.str);
701
- #if HAVE_RB_ENC_ASSOCIATE
702
- if (0 != dr->encoding) {
703
- rb_enc_associate(args[0], dr->encoding);
704
- }
705
- #endif
706
- if (dr->has.pos) {
707
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
708
- }
709
- if (dr->has.line) {
710
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
711
- }
712
- if (dr->has.column) {
713
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
714
- }
715
- rb_funcall2(dr->handler, ox_cdata_id, 1, args);
716
- }
680
+ default:
681
+ if (1 < end && !buf_checkset(&cp)) {
682
+ buf_checkpoint(&dr->buf, &cp);
683
+ }
684
+ end = 0;
685
+ break;
686
+ }
717
687
  }
688
+ CB:
689
+ dr->cdata(dr, pos, line, col);
718
690
  if ('\0' != zero) {
719
- *(dr->buf.tail - 1) = zero;
691
+ *(dr->buf.tail - 1) = zero;
720
692
  }
721
693
  dr->buf.str = 0;
722
694
 
@@ -725,88 +697,60 @@ read_cdata(SaxDrive dr) {
725
697
 
726
698
  /* Entered after the "<!--" sequence. Ready to read the rest.
727
699
  */
728
- static char
729
- read_comment(SaxDrive dr) {
730
- char c;
731
- char zero = '\0';
732
- int end = 0;
733
- long pos = (long)(dr->buf.pos - 4);
734
- long line = (long)(dr->buf.line);
735
- long col = (long)(dr->buf.col - 4);
736
- struct _checkPt cp = CHECK_PT_INIT;
700
+ static char read_comment(SaxDrive dr) {
701
+ char c;
702
+ char zero = '\0';
703
+ int end = 0;
704
+ long pos = (long)(dr->buf.pos - 4);
705
+ long line = (long)(dr->buf.line);
706
+ long col = (long)(dr->buf.col - 4);
707
+ struct _checkPt cp = CHECK_PT_INIT;
737
708
 
738
709
  buf_backup(&dr->buf); /* back up to the start in case the cdata is empty */
739
710
  buf_protect(&dr->buf);
740
711
  while (1) {
741
712
  c = buf_get(&dr->buf);
742
- switch (c) {
743
- case '-':
744
- end++;
745
- break;
746
- case '>':
713
+ switch (c) {
714
+ case '-': end++; break;
715
+ case '>':
747
716
  if (2 <= end) {
748
717
  *(dr->buf.tail - 3) = '\0';
749
- c = buf_get(&dr->buf);
718
+ c = buf_get(&dr->buf);
750
719
  goto CB;
751
720
  }
752
- if (!buf_checkset(&cp)) {
753
- buf_checkpoint(&dr->buf, &cp);
754
- }
721
+ if (!buf_checkset(&cp)) {
722
+ buf_checkpoint(&dr->buf, &cp);
723
+ }
755
724
  end = 0;
756
- break;
757
- case '<':
758
- if (!buf_checkset(&cp)) {
759
- buf_checkpoint(&dr->buf, &cp);
760
- }
761
- end = 0;
762
- break;
763
- case '\0':
764
- if (buf_checkset(&cp)) {
765
- c = buf_checkback(&dr->buf, &cp);
766
- ox_sax_drive_error(dr, NO_TERM "comment not terminated");
767
- zero = c;
768
- *(dr->buf.tail - 1) = '\0';
769
- goto CB;
770
- }
725
+ break;
726
+ case '<':
727
+ if (!buf_checkset(&cp)) {
728
+ buf_checkpoint(&dr->buf, &cp);
729
+ }
730
+ end = 0;
731
+ break;
732
+ case '\0':
733
+ if (buf_checkset(&cp)) {
734
+ c = buf_checkback(&dr->buf, &cp);
735
+ ox_sax_drive_error(dr, NO_TERM "comment not terminated");
736
+ zero = c;
737
+ *(dr->buf.tail - 1) = '\0';
738
+ goto CB;
739
+ }
771
740
  ox_sax_drive_error(dr, NO_TERM "comment not terminated");
772
741
  return '\0';
773
- default:
774
- if (1 < end && !buf_checkset(&cp)) {
775
- buf_checkpoint(&dr->buf, &cp);
776
- }
777
- end = 0;
778
- break;
779
- }
780
- }
781
- CB:
782
- if (dr->has.comment && !dr->blocked) {
783
- VALUE args[1];
784
- Nv parent = stack_peek(&dr->stack);
785
- Hint h = ox_hint_find(dr->options.hints, "!--");
786
-
787
- if (NULL == parent || NULL == parent->hint || OffOverlay != parent->hint->overlay ||
788
- (NULL != h && (ActiveOverlay == h->overlay || ActiveOverlay == h->overlay))) {
789
-
790
- args[0] = rb_str_new2(dr->buf.str);
791
- #if HAVE_RB_ENC_ASSOCIATE
792
- if (0 != dr->encoding) {
793
- rb_enc_associate(args[0], dr->encoding);
794
- }
795
- #endif
796
- if (dr->has.pos) {
797
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
798
- }
799
- if (dr->has.line) {
800
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
801
- }
802
- if (dr->has.column) {
803
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
804
- }
805
- rb_funcall2(dr->handler, ox_comment_id, 1, args);
806
- }
742
+ default:
743
+ if (1 < end && !buf_checkset(&cp)) {
744
+ buf_checkpoint(&dr->buf, &cp);
745
+ }
746
+ end = 0;
747
+ break;
748
+ }
807
749
  }
750
+ CB:
751
+ dr->comment(dr, pos, line, col);
808
752
  if ('\0' != zero) {
809
- *(dr->buf.tail - 1) = zero;
753
+ *(dr->buf.tail - 1) = zero;
810
754
  }
811
755
  dr->buf.str = 0;
812
756
 
@@ -816,106 +760,115 @@ read_comment(SaxDrive dr) {
816
760
  /* Entered after the '<' and the first character after that. Returns status
817
761
  * code.
818
762
  */
819
- static char
820
- read_element_start(SaxDrive dr) {
821
- const char *ename = 0;
822
- volatile VALUE name = Qnil;
823
- char c;
824
- int closed;
825
- long pos = (long)(dr->buf.pos);
826
- long line = (long)(dr->buf.line);
827
- long col = (long)(dr->buf.col);
828
- Hint h = NULL;
829
- int stackless = 0;
830
- Nv parent = stack_peek(&dr->stack);
763
+ static char read_element_start(SaxDrive dr) {
764
+ const char *ename = 0;
765
+ volatile VALUE name = Qnil;
766
+ char c;
767
+ int closed;
768
+ long pos = (long)(dr->buf.pos);
769
+ long line = (long)(dr->buf.line);
770
+ long col = (long)(dr->buf.col);
771
+ Hint h = NULL;
772
+ int stackless = 0;
773
+ Nv parent = stack_peek(&dr->stack);
831
774
 
832
775
  if ('\0' == (c = read_name_token(dr))) {
833
776
  return '\0';
834
777
  }
835
778
  if ('\0' == *dr->buf.str) {
836
- char msg[256];
779
+ char msg[256];
837
780
 
838
- snprintf(msg, sizeof(msg) - 1, "%sempty element", INVALID_FORMAT);
839
- ox_sax_drive_error_at(dr, msg, pos, line, col);
781
+ snprintf(msg, sizeof(msg) - 1, "%sempty element", INVALID_FORMAT);
782
+ ox_sax_drive_error_at(dr, msg, pos, line, col);
840
783
 
841
- return buf_get(&dr->buf);
784
+ return buf_get(&dr->buf);
842
785
  }
843
786
  if (0 != parent) {
844
- parent->childCnt++;
787
+ parent->childCnt++;
845
788
  }
846
- if (dr->options.smart && 0 == dr->options.hints && stack_empty(&dr->stack) && 0 == strcasecmp("html", dr->buf.str)) {
847
- dr->options.hints = ox_hints_html();
789
+ if (dr->options.smart && 0 == dr->options.hints && stack_empty(&dr->stack) &&
790
+ 0 == strcasecmp("html", dr->buf.str)) {
791
+ dr->options.hints = ox_hints_html();
848
792
  }
849
793
  if (NULL != dr->options.hints) {
850
- hint_clear_empty(dr);
851
- h = ox_hint_find(dr->options.hints, dr->buf.str);
852
- if (NULL == h) {
853
- char msg[256];
854
-
855
- snprintf(msg, sizeof(msg), "%s%s is not a valid element type for a %s document type.", INV_ELEMENT, dr->buf.str, dr->options.hints->name);
856
- ox_sax_drive_error(dr, msg);
857
- } else {
858
- Nv top_nv = stack_peek(&dr->stack);
859
-
860
- if (AbortOverlay == h->overlay) {
861
- if (rb_respond_to(dr->handler, ox_abort_id)) {
862
- VALUE args[1];
863
-
864
- args[0] = str2sym(dr, dr->buf.str, NULL);
865
- rb_funcall2(dr->handler, ox_abort_id, 1, args);
866
- }
867
- dr->abort = true;
868
- return '\0';
869
- }
870
- if (BlockOverlay == h->overlay) {
871
- dr->blocked++;
872
- }
873
- if (h->empty) {
874
- stackless = 1;
875
- }
876
- if (0 != top_nv) {
877
- char msg[256];
878
-
879
- if (!h->nest && NestOverlay != h->overlay && 0 == strcasecmp(top_nv->name, h->name)) {
880
- snprintf(msg, sizeof(msg) - 1, "%s%s can not be nested in a %s document, closing previous.",
881
- INV_ELEMENT, dr->buf.str, dr->options.hints->name);
882
- ox_sax_drive_error(dr, msg);
883
- stack_pop(&dr->stack);
884
- end_element_cb(dr, top_nv->val, pos, line, col, top_nv->hint);
885
- top_nv = stack_peek(&dr->stack);
886
- }
887
- if (NULL != top_nv && 0 != h->parents && NestOverlay != h->overlay) {
888
- const char **p;
889
- int ok = 0;
890
-
891
- for (p = h->parents; 0 != *p; p++) {
892
- if (0 == strcasecmp(*p, top_nv->name)) {
893
- ok = 1;
894
- break;
895
- }
896
- }
897
- if (!ok) {
898
- snprintf(msg, sizeof(msg) - 1, "%s%s can not be a child of a %s in a %s document.",
899
- INV_ELEMENT, h->name, top_nv->name, dr->options.hints->name);
900
- ox_sax_drive_error(dr, msg);
901
- }
902
- }
903
- }
904
- }
905
- }
906
- name = str2sym(dr, dr->buf.str, &ename);
907
- if (dr->has.start_element && 0 >= dr->blocked && (NULL == h || ActiveOverlay == h->overlay || NestOverlay == h->overlay)) {
908
- VALUE args[1];
909
-
910
- if (dr->has.pos) {
911
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
912
- }
913
- if (dr->has.line) {
914
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
915
- }
916
- if (dr->has.column) {
917
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
918
- }
794
+ hint_clear_empty(dr);
795
+ h = ox_hint_find(dr->options.hints, dr->buf.str);
796
+ if (NULL == h) {
797
+ char msg[256];
798
+
799
+ snprintf(msg,
800
+ sizeof(msg),
801
+ "%s%s is not a valid element type for a %s document type.",
802
+ INV_ELEMENT,
803
+ dr->buf.str,
804
+ dr->options.hints->name);
805
+ ox_sax_drive_error(dr, msg);
806
+ } else {
807
+ Nv top_nv = stack_peek(&dr->stack);
808
+
809
+ if (AbortOverlay == h->overlay) {
810
+ if (rb_respond_to(dr->handler, ox_abort_id)) {
811
+ VALUE args[1];
812
+
813
+ args[0] = str2sym(dr, dr->buf.str, dr->buf.tail - dr->buf.str - 1, NULL);
814
+ rb_funcall2(dr->handler, ox_abort_id, 1, args);
815
+ }
816
+ dr->abort = true;
817
+ return '\0';
818
+ }
819
+ if (BlockOverlay == h->overlay) {
820
+ dr->blocked++;
821
+ }
822
+ if (h->empty) {
823
+ stackless = 1;
824
+ }
825
+ if (0 != top_nv) {
826
+ char msg[256];
827
+
828
+ if (!h->nest && NestOverlay != h->overlay && 0 == strcasecmp(top_nv->name, h->name)) {
829
+ snprintf(msg,
830
+ sizeof(msg) - 1,
831
+ "%s%s can not be nested in a %s document, closing previous.",
832
+ INV_ELEMENT,
833
+ dr->buf.str,
834
+ dr->options.hints->name);
835
+ ox_sax_drive_error(dr, msg);
836
+ stack_pop(&dr->stack);
837
+ end_element_cb(dr, top_nv->val, pos, line, col, top_nv->hint);
838
+ top_nv = stack_peek(&dr->stack);
839
+ }
840
+ if (NULL != top_nv && 0 != h->parents && NestOverlay != h->overlay) {
841
+ const char **p;
842
+ int ok = 0;
843
+
844
+ for (p = h->parents; 0 != *p; p++) {
845
+ if (0 == strcasecmp(*p, top_nv->name)) {
846
+ ok = 1;
847
+ break;
848
+ }
849
+ }
850
+ if (!ok) {
851
+ snprintf(msg,
852
+ sizeof(msg) - 1,
853
+ "%s%s can not be a child of a %s in a %s document.",
854
+ INV_ELEMENT,
855
+ h->name,
856
+ top_nv->name,
857
+ dr->options.hints->name);
858
+ ox_sax_drive_error(dr, msg);
859
+ }
860
+ }
861
+ }
862
+ }
863
+ }
864
+ name = str2sym(dr, dr->buf.str, dr->buf.tail - dr->buf.str - 1, &ename);
865
+ if (dr->has_start_element && 0 >= dr->blocked &&
866
+ (NULL == h || ActiveOverlay == h->overlay || NestOverlay == h->overlay)) {
867
+ VALUE args[1];
868
+
869
+ dr->set_pos(dr->handler, pos);
870
+ dr->set_line(dr->handler, line);
871
+ dr->set_col(dr->handler, col);
919
872
  args[0] = name;
920
873
  rb_funcall2(dr->handler, ox_start_element_id, 1, args);
921
874
  }
@@ -924,362 +877,302 @@ read_element_start(SaxDrive dr) {
924
877
  } else if ('>' == c) {
925
878
  closed = 0;
926
879
  } else {
927
- buf_protect(&dr->buf);
880
+ buf_protect(&dr->buf);
928
881
  c = read_attrs(dr, c, '/', '>', 0, 0, h);
929
- if (is_white(c)) {
930
- c = buf_next_non_white(&dr->buf);
931
- }
932
- closed = ('/' == c);
882
+ if (is_white(c)) {
883
+ c = buf_next_non_white(&dr->buf);
884
+ }
885
+ closed = ('/' == c);
933
886
  }
934
- if (dr->has.attrs_done && 0 >= dr->blocked && (NULL == h || ActiveOverlay == h->overlay || NestOverlay == h->overlay)) {
935
- rb_funcall(dr->handler, ox_attrs_done_id, 0);
887
+ if (0 >= dr->blocked && (NULL == h || ActiveOverlay == h->overlay || NestOverlay == h->overlay)) {
888
+ dr->attrs_done(dr->handler);
936
889
  }
937
890
  if (closed) {
938
- c = buf_next_non_white(&dr->buf);
939
- pos = dr->buf.pos;
940
- line = dr->buf.line;
941
- col = dr->buf.col;
942
- end_element_cb(dr, name, pos, line, col, h);
891
+ c = buf_next_non_white(&dr->buf);
892
+
893
+ end_element_cb(dr, name, dr->buf.pos, dr->buf.line, dr->buf.col, h);
943
894
  } else if (stackless) {
944
- end_element_cb(dr, name, pos, line, col, h);
895
+ end_element_cb(dr, name, pos, line, col, h);
945
896
  } else if (NULL != h && h->jump) {
946
- stack_push(&dr->stack, ename, name, h);
947
- if ('>' != c) {
948
- ox_sax_drive_error(dr, WRONG_CHAR "element not closed");
949
- return c;
950
- }
951
- read_jump(dr, h->name);
952
- return '<';
897
+ stack_push(&dr->stack, ename, name, h);
898
+ if ('>' != c) {
899
+ ox_sax_drive_error(dr, WRONG_CHAR "element not closed");
900
+ return c;
901
+ }
902
+ read_jump(dr, h->name);
903
+ return '<';
953
904
  } else {
954
- stack_push(&dr->stack, ename, name, h);
905
+ stack_push(&dr->stack, ename, name, h);
955
906
  }
956
907
  if ('>' != c) {
957
- ox_sax_drive_error(dr, WRONG_CHAR "element not closed");
958
- return c;
908
+ ox_sax_drive_error(dr, WRONG_CHAR "element not closed");
909
+ return c;
959
910
  }
960
911
  dr->buf.str = 0;
961
912
 
962
913
  return buf_get(&dr->buf);
963
914
  }
964
915
 
965
- static Nv
966
- stack_rev_find(SaxDrive dr, const char *name) {
967
- Nv nv;
916
+ static Nv stack_rev_find(SaxDrive dr, const char *name) {
917
+ Nv nv;
968
918
 
969
919
  for (nv = dr->stack.tail - 1; dr->stack.head <= nv; nv--) {
970
- if (0 == (dr->options.smart ? strcasecmp(name, nv->name) : strcmp(name, nv->name))) {
971
- return nv;
972
- }
920
+ if (0 == (dr->options.smart ? strcasecmp(name, nv->name) : strcmp(name, nv->name))) {
921
+ return nv;
922
+ }
973
923
  }
974
924
  return 0;
975
925
  }
976
926
 
977
- static char
978
- read_element_end(SaxDrive dr) {
979
- VALUE name = Qnil;
980
- char c;
981
- long pos = (long)(dr->buf.pos - 1);
982
- long line = (long)(dr->buf.line);
983
- long col = (long)(dr->buf.col - 1);
984
- Nv nv;
985
- Hint h = NULL;
927
+ static char read_element_end(SaxDrive dr) {
928
+ VALUE name = Qnil;
929
+ char c;
930
+ long pos = (long)(dr->buf.pos - 1);
931
+ long line = (long)(dr->buf.line);
932
+ long col = (long)(dr->buf.col - 1);
933
+ Nv nv;
934
+ Hint h = NULL;
986
935
 
987
936
  if ('\0' == (c = read_name_token(dr))) {
988
937
  return '\0';
989
938
  }
990
939
  if (is_white(c)) {
991
- c = buf_next_non_white(&dr->buf);
940
+ c = buf_next_non_white(&dr->buf);
992
941
  }
993
942
  // c should be > and current is one past so read another char
994
- c = buf_get(&dr->buf);
943
+ c = buf_get(&dr->buf);
995
944
  nv = stack_peek(&dr->stack);
996
- if (0 != nv &&
997
- 0 == (dr->options.smart ? strcasecmp(dr->buf.str, nv->name) : strcmp(dr->buf.str, nv->name))) {
998
- name = nv->val;
999
- h = nv->hint;
1000
- stack_pop(&dr->stack);
945
+ if (0 != nv && 0 == (dr->options.smart ? strcasecmp(dr->buf.str, nv->name) : strcmp(dr->buf.str, nv->name))) {
946
+ name = nv->val;
947
+ h = nv->hint;
948
+ stack_pop(&dr->stack);
1001
949
  } else {
1002
- // Mismatched start and end
1003
- char msg[256];
1004
- Nv match = stack_rev_find(dr, dr->buf.str);
1005
-
1006
- if (0 == match) {
1007
- // Not found so open and close element.
1008
- h = ox_hint_find(dr->options.hints, dr->buf.str);
1009
- if (NULL != h && h->empty) {
1010
- // Just close normally
1011
- name = str2sym(dr, dr->buf.str, 0);
1012
- snprintf(msg, sizeof(msg) - 1, "%selement '%s' should not have a separate close element", EL_MISMATCH, dr->buf.str);
1013
- ox_sax_drive_error_at(dr, msg, pos, line, col);
1014
- return c;
1015
- } else {
1016
- snprintf(msg, sizeof(msg) - 1, "%selement '%s' closed but not opened", EL_MISMATCH, dr->buf.str);
1017
- ox_sax_drive_error_at(dr, msg, pos, line, col);
1018
- name = str2sym(dr, dr->buf.str, 0);
1019
- if (dr->has.start_element && 0 >= dr->blocked && (NULL == h || ActiveOverlay == h->overlay || NestOverlay == h->overlay)) {
1020
- VALUE args[1];
1021
-
1022
- if (dr->has.pos) {
1023
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
1024
- }
1025
- if (dr->has.line) {
1026
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
1027
- }
1028
- if (dr->has.column) {
1029
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
1030
- }
1031
- args[0] = name;
1032
- rb_funcall2(dr->handler, ox_start_element_id, 1, args);
1033
- }
1034
- if (NULL != h && BlockOverlay == h->overlay && 0 < dr->blocked) {
1035
- dr->blocked--;
1036
- }
1037
- }
1038
- } else {
1039
- // Found a match so close all up to the found element in stack.
1040
- Nv n2;
1041
-
1042
- if (0 != (n2 = hint_try_close(dr, dr->buf.str))) {
1043
- name = n2->val;
1044
- h = n2->hint;
1045
- } else {
1046
- snprintf(msg, sizeof(msg) - 1, "%selement '%s' close does not match '%s' open", EL_MISMATCH, dr->buf.str, nv->name);
1047
- ox_sax_drive_error_at(dr, msg, pos, line, col);
1048
- if (dr->has.pos) {
1049
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
1050
- }
1051
- if (dr->has.line) {
1052
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
1053
- }
1054
- if (dr->has.column) {
1055
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
1056
- }
1057
- for (nv = stack_pop(&dr->stack); match < nv; nv = stack_pop(&dr->stack)) {
1058
- if (dr->has.end_element && 0 >= dr->blocked && (NULL == nv->hint || ActiveOverlay == nv->hint->overlay || NestOverlay == nv->hint->overlay)) {
1059
- rb_funcall(dr->handler, ox_end_element_id, 1, nv->val);
1060
- }
1061
- if (NULL != nv->hint && BlockOverlay == nv->hint->overlay && 0 < dr->blocked) {
1062
- dr->blocked--;
1063
- }
1064
- }
1065
- name = nv->val;
1066
- h = nv->hint;
1067
- }
1068
- }
950
+ // Mismatched start and end
951
+ char msg[256];
952
+ Nv match = stack_rev_find(dr, dr->buf.str);
953
+
954
+ if (0 == match) {
955
+ // Not found so open and close element.
956
+ h = ox_hint_find(dr->options.hints, dr->buf.str);
957
+ if (NULL != h && h->empty) {
958
+ // Just close normally
959
+ name = str2sym(dr, dr->buf.str, dr->buf.tail - dr->buf.str - 2, 0);
960
+ snprintf(msg,
961
+ sizeof(msg) - 1,
962
+ "%selement '%s' should not have a separate close element",
963
+ EL_MISMATCH,
964
+ dr->buf.str);
965
+ ox_sax_drive_error_at(dr, msg, pos, line, col);
966
+ return c;
967
+ } else {
968
+ snprintf(msg, sizeof(msg) - 1, "%selement '%s' closed but not opened", EL_MISMATCH, dr->buf.str);
969
+ ox_sax_drive_error_at(dr, msg, pos, line, col);
970
+ name = str2sym(dr, dr->buf.str, dr->buf.tail - dr->buf.str - 2, 0);
971
+ if (dr->has_start_element && 0 >= dr->blocked &&
972
+ (NULL == h || ActiveOverlay == h->overlay || NestOverlay == h->overlay)) {
973
+ VALUE args[1];
974
+
975
+ dr->set_pos(dr->handler, pos);
976
+ dr->set_line(dr->handler, line);
977
+ dr->set_col(dr->handler, col);
978
+ args[0] = name;
979
+ rb_funcall2(dr->handler, ox_start_element_id, 1, args);
980
+ }
981
+ if (NULL != h && BlockOverlay == h->overlay && 0 < dr->blocked) {
982
+ dr->blocked--;
983
+ }
984
+ }
985
+ } else {
986
+ // Found a match so close all up to the found element in stack.
987
+ Nv n2;
988
+
989
+ if (0 != (n2 = hint_try_close(dr, dr->buf.str))) {
990
+ name = n2->val;
991
+ h = n2->hint;
992
+ } else {
993
+ snprintf(msg,
994
+ sizeof(msg) - 1,
995
+ "%selement '%s' close does not match '%s' open",
996
+ EL_MISMATCH,
997
+ dr->buf.str,
998
+ nv->name);
999
+ ox_sax_drive_error_at(dr, msg, pos, line, col);
1000
+ for (nv = stack_pop(&dr->stack); match < nv; nv = stack_pop(&dr->stack)) {
1001
+ end_element_cb(dr, nv->val, pos, line, col, nv->hint);
1002
+ }
1003
+ name = nv->val;
1004
+ h = nv->hint;
1005
+ }
1006
+ }
1069
1007
  }
1070
1008
  end_element_cb(dr, name, pos, line, col, h);
1071
1009
 
1072
1010
  return c;
1073
1011
  }
1074
1012
 
1075
- static char
1076
- read_text(SaxDrive dr) {
1077
- VALUE args[1];
1078
- char c;
1079
- long pos = (long)(dr->buf.pos);
1080
- long line = (long)(dr->buf.line);
1081
- long col = (long)(dr->buf.col - 1);
1082
- Nv parent = stack_peek(&dr->stack);
1083
- int allWhite = 1;
1013
+ static char read_text(SaxDrive dr) {
1014
+ VALUE args[1];
1015
+ char c;
1016
+ long pos = (long)(dr->buf.pos);
1017
+ long line = (long)(dr->buf.line);
1018
+ long col = (long)(dr->buf.col - 1);
1019
+ Nv parent = stack_peek(&dr->stack);
1020
+ int allWhite = 1;
1084
1021
 
1085
1022
  buf_backup(&dr->buf);
1086
1023
  buf_protect(&dr->buf);
1087
1024
  while ('<' != (c = buf_get(&dr->buf))) {
1088
- switch(c) {
1089
- case ' ':
1090
- case '\t':
1091
- case '\f':
1092
- case '\n':
1093
- case '\r':
1094
- break;
1095
- case '\0':
1096
- if (allWhite) {
1097
- return c;
1098
- }
1025
+ switch (c) {
1026
+ case ' ':
1027
+ case '\t':
1028
+ case '\f':
1029
+ case '\n':
1030
+ case '\r': break;
1031
+ case '\0':
1032
+ if (allWhite) {
1033
+ return c;
1034
+ }
1099
1035
  ox_sax_drive_error(dr, NO_TERM "text not terminated");
1100
- goto END_OF_BUF;
1101
- break;
1102
- default:
1103
- allWhite = 0;
1104
- break;
1105
- }
1036
+ goto END_OF_BUF;
1037
+ break;
1038
+ default: allWhite = 0; break;
1039
+ }
1106
1040
  }
1107
- END_OF_BUF:
1041
+ END_OF_BUF:
1108
1042
  if ('\0' != c) {
1109
- *(dr->buf.tail - 1) = '\0';
1043
+ *(dr->buf.tail - 1) = '\0';
1110
1044
  }
1111
1045
  if (allWhite) {
1112
- int isEnd = ('/' == buf_get(&dr->buf));
1113
-
1114
- buf_backup(&dr->buf);
1115
- if (dr->has.text &&
1116
- ((NoSkip == dr->options.skip && !isEnd) ||
1117
- (OffSkip == dr->options.skip))) {
1118
- args[0] = rb_str_new2(dr->buf.str);
1119
- #if HAVE_RB_ENC_ASSOCIATE
1120
- if (0 != dr->encoding) {
1121
- rb_enc_associate(args[0], dr->encoding);
1122
- }
1123
- #endif
1124
- if (dr->has.pos) {
1125
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
1126
- }
1127
- if (dr->has.line) {
1128
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
1129
- }
1130
- if (dr->has.column) {
1131
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
1132
- }
1133
- rb_funcall2(dr->handler, ox_text_id, 1, args);
1134
- }
1135
- if (!isEnd || 0 == parent || 0 < parent->childCnt) {
1136
- return c;
1137
- }
1046
+ int isEnd = ('/' == buf_get(&dr->buf));
1047
+
1048
+ buf_backup(&dr->buf);
1049
+ if (dr->has_text && ((NoSkip == dr->options.skip && !isEnd) || (OffSkip == dr->options.skip))) {
1050
+ args[0] = rb_str_new2(dr->buf.str);
1051
+ if (0 != dr->encoding) {
1052
+ rb_enc_associate(args[0], dr->encoding);
1053
+ }
1054
+ dr->set_pos(dr->handler, pos);
1055
+ dr->set_line(dr->handler, line);
1056
+ dr->set_col(dr->handler, col);
1057
+ rb_funcall2(dr->handler, ox_text_id, 1, args);
1058
+ }
1059
+ if (!isEnd || 0 == parent || 0 < parent->childCnt) {
1060
+ return c;
1061
+ }
1138
1062
  }
1139
1063
  if (0 != parent) {
1140
- parent->childCnt++;
1064
+ parent->childCnt++;
1141
1065
  }
1142
1066
  if (!dr->blocked && (NULL == parent || NULL == parent->hint || OffOverlay != parent->hint->overlay)) {
1143
- if (dr->has.value) {
1144
- if (dr->has.pos) {
1145
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
1146
- }
1147
- if (dr->has.line) {
1148
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
1149
- }
1150
- if (dr->has.column) {
1151
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
1152
- }
1153
- *args = dr->value_obj;
1154
- rb_funcall2(dr->handler, ox_value_id, 1, args);
1155
- } else if (dr->has.text) {
1156
- if (dr->options.convert_special) {
1157
- ox_sax_collapse_special(dr, dr->buf.str, pos, line, col);
1158
- }
1159
- switch (dr->options.skip) {
1160
- case CrSkip:
1161
- buf_collapse_return(dr->buf.str);
1162
- break;
1163
- case SpcSkip:
1164
- buf_collapse_white(dr->buf.str);
1165
- break;
1166
- default:
1167
- break;
1168
- }
1169
- args[0] = rb_str_new2(dr->buf.str);
1170
- #if HAVE_RB_ENC_ASSOCIATE
1171
- if (0 != dr->encoding) {
1172
- rb_enc_associate(args[0], dr->encoding);
1173
- }
1174
- #endif
1175
- if (dr->has.pos) {
1176
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
1177
- }
1178
- if (dr->has.line) {
1179
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
1180
- }
1181
- if (dr->has.column) {
1182
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
1183
- }
1184
- rb_funcall2(dr->handler, ox_text_id, 1, args);
1185
- }
1067
+ if (dr->has_value) {
1068
+ dr->set_pos(dr->handler, pos);
1069
+ dr->set_line(dr->handler, line);
1070
+ dr->set_col(dr->handler, col);
1071
+ *args = dr->value_obj;
1072
+ rb_funcall2(dr->handler, ox_value_id, 1, args);
1073
+ } else if (dr->has_text) {
1074
+ if (dr->options.convert_special) {
1075
+ ox_sax_collapse_special(dr, dr->buf.str, pos, line, col);
1076
+ }
1077
+ switch (dr->options.skip) {
1078
+ case CrSkip: buf_collapse_return(dr->buf.str); break;
1079
+ case SpcSkip: buf_collapse_white(dr->buf.str); break;
1080
+ default: break;
1081
+ }
1082
+ args[0] = rb_str_new2(dr->buf.str);
1083
+ if (0 != dr->encoding) {
1084
+ rb_enc_associate(args[0], dr->encoding);
1085
+ }
1086
+ dr->set_pos(dr->handler, pos);
1087
+ dr->set_line(dr->handler, line);
1088
+ dr->set_col(dr->handler, col);
1089
+ rb_funcall2(dr->handler, ox_text_id, 1, args);
1090
+ }
1186
1091
  }
1187
1092
  dr->buf.str = 0;
1188
1093
 
1189
1094
  return c;
1190
1095
  }
1191
1096
 
1192
- static int
1193
- read_jump_term(Buf buf, const char *pat) {
1194
- struct _checkPt cp;
1097
+ static int read_jump_term(Buf buf, const char *pat) {
1098
+ struct _checkPt cp;
1195
1099
 
1196
- buf_checkpoint(buf, &cp); // right after <
1100
+ buf_checkpoint(buf, &cp); // right after <
1197
1101
  if ('/' != buf_next_non_white(buf)) {
1198
- return 0;
1102
+ return 0;
1199
1103
  }
1200
1104
  if (*pat != tolower(buf_next_non_white(buf))) {
1201
- return 0;
1105
+ return 0;
1202
1106
  }
1203
1107
  for (pat++; '\0' != *pat; pat++) {
1204
- if (*pat != tolower(buf_get(buf))) {
1205
- return 0;
1206
- }
1108
+ if (*pat != tolower(buf_get(buf))) {
1109
+ return 0;
1110
+ }
1207
1111
  }
1208
1112
  if ('>' != buf_next_non_white(buf)) {
1209
- return 0;
1113
+ return 0;
1210
1114
  }
1211
1115
  buf_checkback(buf, &cp);
1212
1116
  return 1;
1213
1117
  }
1214
1118
 
1215
- static char
1216
- read_jump(SaxDrive dr, const char *pat) {
1217
- VALUE args[1];
1218
- char c;
1219
- long pos = (long)(dr->buf.pos);
1220
- long line = (long)(dr->buf.line);
1221
- long col = (long)(dr->buf.col - 1);
1222
- Nv parent = stack_peek(&dr->stack);
1119
+ static char read_jump(SaxDrive dr, const char *pat) {
1120
+ VALUE args[1];
1121
+ char c;
1122
+ long pos = (long)(dr->buf.pos);
1123
+ long line = (long)(dr->buf.line);
1124
+ long col = (long)(dr->buf.col - 1);
1125
+ Nv parent = stack_peek(&dr->stack);
1223
1126
 
1224
1127
  buf_protect(&dr->buf);
1225
1128
  while (1) {
1226
- c = buf_get(&dr->buf);
1227
- switch(c) {
1228
- case '<':
1229
- if (read_jump_term(&dr->buf, pat)) {
1230
- goto END_OF_BUF;
1231
- break;
1232
- }
1233
- break;
1234
- case '\0':
1129
+ c = buf_get(&dr->buf);
1130
+ switch (c) {
1131
+ case '<':
1132
+ if (read_jump_term(&dr->buf, pat)) {
1133
+ goto END_OF_BUF;
1134
+ break;
1135
+ }
1136
+ break;
1137
+ case '\0':
1235
1138
  ox_sax_drive_error(dr, NO_TERM "not terminated");
1236
- goto END_OF_BUF;
1237
- break;
1238
- default:
1239
- break;
1240
- }
1139
+ goto END_OF_BUF;
1140
+ break;
1141
+ default: break;
1142
+ }
1241
1143
  }
1242
- END_OF_BUF:
1144
+ END_OF_BUF:
1243
1145
  if ('\0' != c) {
1244
- *(dr->buf.tail - 1) = '\0';
1146
+ *(dr->buf.tail - 1) = '\0';
1245
1147
  }
1246
1148
  if (0 != parent) {
1247
- parent->childCnt++;
1149
+ parent->childCnt++;
1248
1150
  }
1249
1151
  // TBD check parent overlay
1250
- if (dr->has.text && !dr->blocked) {
1152
+ if (dr->has_text && !dr->blocked) {
1251
1153
  args[0] = rb_str_new2(dr->buf.str);
1252
- #if HAVE_RB_ENC_ASSOCIATE
1253
1154
  if (0 != dr->encoding) {
1254
1155
  rb_enc_associate(args[0], dr->encoding);
1255
1156
  }
1256
- #endif
1257
- if (dr->has.pos) {
1258
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
1259
- }
1260
- if (dr->has.line) {
1261
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
1262
- }
1263
- if (dr->has.column) {
1264
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
1265
- }
1157
+ dr->set_pos(dr->handler, pos);
1158
+ dr->set_line(dr->handler, line);
1159
+ dr->set_col(dr->handler, col);
1266
1160
  rb_funcall2(dr->handler, ox_text_id, 1, args);
1267
1161
  }
1268
1162
  dr->buf.str = 0;
1269
1163
  if ('\0' != c) {
1270
- *(dr->buf.tail - 1) = '<';
1164
+ *(dr->buf.tail - 1) = '<';
1271
1165
  }
1272
1166
  return c;
1273
1167
  }
1274
1168
 
1275
- static char
1276
- read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req, Hint h) {
1277
- VALUE name = Qnil;
1278
- int is_encoding = 0;
1279
- off_t pos;
1280
- off_t line;
1281
- off_t col;
1282
- char *attr_value;
1169
+ static char read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req, Hint h) {
1170
+ VALUE name = Qnil;
1171
+ int is_encoding = 0;
1172
+ off_t pos;
1173
+ off_t line;
1174
+ off_t col;
1175
+ char *attr_value;
1283
1176
 
1284
1177
  // already protected by caller
1285
1178
  dr->buf.str = dr->buf.tail;
@@ -1287,94 +1180,52 @@ read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req,
1287
1180
  c = buf_next_non_white(&dr->buf);
1288
1181
  }
1289
1182
  while (termc != c && term2 != c) {
1290
- buf_backup(&dr->buf);
1183
+ buf_backup(&dr->buf);
1291
1184
  if ('\0' == c) {
1292
- ox_sax_drive_error(dr, NO_TERM "attributes not terminated");
1293
- return '\0';
1185
+ ox_sax_drive_error(dr, NO_TERM "attributes not terminated");
1186
+ return '\0';
1294
1187
  }
1295
- pos = dr->buf.pos + 1;
1296
- line = dr->buf.line;
1297
- col = dr->buf.col + 1;
1188
+ pos = dr->buf.pos + 1;
1189
+ line = dr->buf.line;
1190
+ col = dr->buf.col + 1;
1298
1191
  if ('\0' == (c = read_name_token(dr))) {
1299
- ox_sax_drive_error(dr, NO_TERM "error reading token");
1300
- return '\0';
1192
+ ox_sax_drive_error(dr, NO_TERM "error reading token");
1193
+ return '\0';
1301
1194
  }
1302
1195
  if (is_xml && 0 == strcasecmp("encoding", dr->buf.str)) {
1303
1196
  is_encoding = 1;
1304
1197
  }
1305
- if (dr->has.attr || dr->has.attr_value) {
1306
- name = str2sym(dr, dr->buf.str, 0);
1198
+ if (dr->want_attr_name) {
1199
+ name = str2sym(dr, dr->buf.str, dr->buf.tail - dr->buf.str - 1, 0);
1307
1200
  }
1308
1201
  if (is_white(c)) {
1309
1202
  c = buf_next_non_white(&dr->buf);
1310
1203
  }
1311
1204
  if ('=' != c) {
1312
- if (eq_req) {
1313
- dr->err = 1;
1314
- return c;
1315
- } else {
1316
- ox_sax_drive_error(dr, WRONG_CHAR "no attribute value");
1317
- attr_value = (char*)"";
1318
- }
1205
+ if (eq_req) {
1206
+ dr->err = 1;
1207
+ return c;
1208
+ } else {
1209
+ ox_sax_drive_error(dr, WRONG_CHAR "no attribute value");
1210
+ attr_value = (char *)"";
1211
+ }
1319
1212
  } else {
1320
- pos = dr->buf.pos + 1;
1321
- line = dr->buf.line;
1322
- col = dr->buf.col + 1;
1323
- c = read_quoted_value(dr);
1324
- attr_value = dr->buf.str;
1325
- if (is_encoding) {
1326
- #if HAVE_RB_ENC_FIND
1327
- dr->encoding = rb_enc_find(dr->buf.str);
1328
- #else
1329
- dr->encoding = dr->buf.str;
1330
- #endif
1331
- is_encoding = 0;
1332
- }
1333
- }
1334
- if (0 >= dr->blocked && (NULL == h || ActiveOverlay == h->overlay || NestOverlay == h->overlay)) {
1335
- if (dr->has.attr_value) {
1336
- VALUE args[2];
1337
-
1338
- if (dr->has.pos) {
1339
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
1340
- }
1341
- if (dr->has.line) {
1342
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
1343
- }
1344
- if (dr->has.column) {
1345
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
1346
- }
1347
- args[0] = name;
1348
- args[1] = dr->value_obj;
1349
- rb_funcall2(dr->handler, ox_attr_value_id, 2, args);
1350
- } else if (dr->has.attr) {
1351
- VALUE args[2];
1352
-
1353
- args[0] = name;
1354
- if (dr->options.convert_special) {
1355
- ox_sax_collapse_special(dr, dr->buf.str, pos, line, col);
1356
- }
1357
- args[1] = rb_str_new2(attr_value);
1358
- #if HAVE_RB_ENC_ASSOCIATE
1359
- if (0 != dr->encoding) {
1360
- rb_enc_associate(args[1], dr->encoding);
1361
- }
1362
- #endif
1363
- if (dr->has.pos) {
1364
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
1365
- }
1366
- if (dr->has.line) {
1367
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
1368
- }
1369
- if (dr->has.column) {
1370
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
1371
- }
1372
- rb_funcall2(dr->handler, ox_attr_id, 2, args);
1373
- }
1374
- }
1375
- if (is_white(c)) {
1376
- c = buf_next_non_white(&dr->buf);
1377
- }
1213
+ pos = dr->buf.pos + 1;
1214
+ line = dr->buf.line;
1215
+ col = dr->buf.col + 1;
1216
+ c = read_quoted_value(dr);
1217
+ attr_value = dr->buf.str;
1218
+ if (is_encoding) {
1219
+ dr->encoding = rb_enc_find(dr->buf.str);
1220
+ is_encoding = 0;
1221
+ }
1222
+ }
1223
+ if (0 >= dr->blocked && (NULL == h || ActiveOverlay == h->overlay || NestOverlay == h->overlay)) {
1224
+ dr->attr_cb(dr, name, attr_value, pos, line, col);
1225
+ }
1226
+ if (is_white(c)) {
1227
+ c = buf_next_non_white(&dr->buf);
1228
+ }
1378
1229
  }
1379
1230
  dr->buf.str = 0;
1380
1231
 
@@ -1384,66 +1235,62 @@ read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req,
1384
1235
  /* The character after the word is returned. dr->buf.tail is one past
1385
1236
  * that. dr->buf.str will point to the token which will be '\0' terminated.
1386
1237
  */
1387
- static char
1388
- read_name_token(SaxDrive dr) {
1389
- char c;
1238
+ static char read_name_token(SaxDrive dr) {
1239
+ char c;
1390
1240
 
1391
1241
  dr->buf.str = dr->buf.tail;
1392
- c = buf_get(&dr->buf);
1242
+ c = buf_get(&dr->buf);
1393
1243
  if (is_white(c)) {
1394
- c = buf_next_non_white(&dr->buf);
1244
+ c = buf_next_non_white(&dr->buf);
1395
1245
  dr->buf.str = dr->buf.tail - 1;
1396
1246
  }
1397
1247
  while (1) {
1398
- switch (c) {
1399
- case ' ':
1400
- case '\t':
1401
- case '\f':
1402
- case '?':
1403
- case '=':
1404
- case '/':
1405
- case '>':
1406
- case '<':
1407
- case '\n':
1408
- case '\r':
1409
- *(dr->buf.tail - 1) = '\0';
1410
- return c;
1411
- case '\0':
1248
+ switch (c) {
1249
+ case ' ':
1250
+ case '\t':
1251
+ case '\f':
1252
+ case '?':
1253
+ case '=':
1254
+ case '/':
1255
+ case '>':
1256
+ case '<':
1257
+ case '\n':
1258
+ case '\r': *(dr->buf.tail - 1) = '\0'; return c;
1259
+ case '\0':
1412
1260
  /* documents never terminate after a name token */
1413
1261
  ox_sax_drive_error(dr, NO_TERM "document not terminated");
1414
1262
  return '\0';
1415
- case ':':
1416
- if ('\0' == *dr->options.strip_ns) {
1417
- break;
1418
- } else if ('*' == *dr->options.strip_ns && '\0' == dr->options.strip_ns[1]) {
1419
- dr->buf.str = dr->buf.tail;
1420
- } else if (dr->options.smart && 0 == strncasecmp(dr->options.strip_ns, dr->buf.str, dr->buf.tail - dr->buf.str - 1)) {
1421
- dr->buf.str = dr->buf.tail;
1422
- } else if (0 == strncmp(dr->options.strip_ns, dr->buf.str, dr->buf.tail - dr->buf.str - 1)) {
1423
- dr->buf.str = dr->buf.tail;
1424
- }
1425
- break;
1426
- default:
1427
- break;
1428
- }
1263
+ case ':':
1264
+ if ('\0' == *dr->options.strip_ns) {
1265
+ break;
1266
+ } else if ('*' == *dr->options.strip_ns && '\0' == dr->options.strip_ns[1]) {
1267
+ dr->buf.str = dr->buf.tail;
1268
+ } else if (dr->options.smart &&
1269
+ 0 == strncasecmp(dr->options.strip_ns, dr->buf.str, dr->buf.tail - dr->buf.str - 1)) {
1270
+ dr->buf.str = dr->buf.tail;
1271
+ } else if (0 == strncmp(dr->options.strip_ns, dr->buf.str, dr->buf.tail - dr->buf.str - 1)) {
1272
+ dr->buf.str = dr->buf.tail;
1273
+ }
1274
+ break;
1275
+ default: break;
1276
+ }
1429
1277
  c = buf_get(&dr->buf);
1430
1278
  }
1431
1279
  return '\0';
1432
1280
  }
1433
1281
 
1434
- /* The character after the quote or if there is no quote, the character after the word is returned. dr->buf.tail is one past
1435
- * that. dr->buf.str will point to the token which will be '\0' terminated.
1282
+ /* The character after the quote or if there is no quote, the character after the word is returned. dr->buf.tail is one
1283
+ * past that. dr->buf.str will point to the token which will be '\0' terminated.
1436
1284
  */
1437
- static char
1438
- read_quoted_value(SaxDrive dr) {
1439
- char c;
1285
+ static char read_quoted_value(SaxDrive dr) {
1286
+ char c;
1440
1287
 
1441
1288
  c = buf_get(&dr->buf);
1442
1289
  if (is_white(c)) {
1443
1290
  c = buf_next_non_white(&dr->buf);
1444
1291
  }
1445
1292
  if ('"' == c || '\'' == c) {
1446
- char term = c;
1293
+ char term = c;
1447
1294
 
1448
1295
  dr->buf.str = dr->buf.tail;
1449
1296
  while (term != (c = buf_get(&dr->buf))) {
@@ -1452,187 +1299,185 @@ read_quoted_value(SaxDrive dr) {
1452
1299
  return '\0';
1453
1300
  }
1454
1301
  }
1455
- // dr->buf.tail is one past quote char
1456
- *(dr->buf.tail - 1) = '\0'; /* terminate value */
1457
- c = buf_get(&dr->buf);
1458
- return c;
1302
+ // dr->buf.tail is one past quote char
1303
+ *(dr->buf.tail - 1) = '\0'; /* terminate value */
1304
+ c = buf_get(&dr->buf);
1305
+ return c;
1459
1306
  }
1460
1307
  // not quoted, look for something that terminates the string
1461
1308
  dr->buf.str = dr->buf.tail - 1;
1462
1309
  ox_sax_drive_error(dr, WRONG_CHAR "attribute value not in quotes");
1463
1310
  while ('\0' != (c = buf_get(&dr->buf))) {
1464
- switch (c) {
1465
- case ' ':
1466
- //case '/':
1467
- case '>':
1468
- case '?': // for instructions
1469
- case '\t':
1470
- case '\n':
1471
- case '\r':
1472
- *(dr->buf.tail - 1) = '\0'; /* terminate value */
1473
- // dr->buf.tail is in the correct position, one after the word terminator
1474
- return c;
1475
- default:
1476
- break;
1477
- }
1311
+ switch (c) {
1312
+ case ' ':
1313
+ // case '/':
1314
+ case '>':
1315
+ case '?': // for instructions
1316
+ case '\t':
1317
+ case '\n':
1318
+ case '\r':
1319
+ *(dr->buf.tail - 1) = '\0'; /* terminate value */
1320
+ // dr->buf.tail is in the correct position, one after the word terminator
1321
+ return c;
1322
+ default: break;
1323
+ }
1478
1324
  }
1479
- return '\0'; // should never get here
1325
+ return '\0'; // should never get here
1480
1326
  }
1481
1327
 
1482
- static char*
1483
- read_hex_uint64(char *b, uint64_t *up) {
1484
- uint64_t u = 0;
1485
- char c;
1328
+ static char *read_hex_uint64(char *b, uint64_t *up) {
1329
+ uint64_t u = 0;
1330
+ char c;
1486
1331
 
1487
1332
  for (; ';' != *b; b++) {
1488
- c = *b;
1489
- if ('0' <= c && c <= '9') {
1490
- u = (u << 4) | (uint64_t)(c - '0');
1491
- } else if ('a' <= c && c <= 'f') {
1492
- u = (u << 4) | (uint64_t)(c - 'a' + 10);
1493
- } else if ('A' <= c && c <= 'F') {
1494
- u = (u << 4) | (uint64_t)(c - 'A' + 10);
1495
- } else {
1496
- return 0;
1497
- }
1333
+ c = *b;
1334
+ if ('0' <= c && c <= '9') {
1335
+ u = (u << 4) | (uint64_t)(c - '0');
1336
+ } else if ('a' <= c && c <= 'f') {
1337
+ u = (u << 4) | (uint64_t)(c - 'a' + 10);
1338
+ } else if ('A' <= c && c <= 'F') {
1339
+ u = (u << 4) | (uint64_t)(c - 'A' + 10);
1340
+ } else {
1341
+ return 0;
1342
+ }
1498
1343
  }
1499
1344
  *up = u;
1500
1345
 
1501
1346
  return b;
1502
1347
  }
1503
1348
 
1504
- static char*
1505
- read_10_uint64(char *b, uint64_t *up) {
1506
- uint64_t u = 0;
1507
- char c;
1349
+ static char *read_10_uint64(char *b, uint64_t *up) {
1350
+ uint64_t u = 0;
1351
+ char c;
1508
1352
 
1509
1353
  for (; ';' != *b; b++) {
1510
- c = *b;
1511
- if ('0' <= c && c <= '9') {
1512
- u = (u * 10) + (uint64_t)(c - '0');
1513
- } else {
1514
- return 0;
1515
- }
1354
+ c = *b;
1355
+ if ('0' <= c && c <= '9') {
1356
+ u = (u * 10) + (uint64_t)(c - '0');
1357
+ } else {
1358
+ return 0;
1359
+ }
1516
1360
  }
1517
1361
  *up = u;
1518
1362
 
1519
1363
  return b;
1520
1364
  }
1521
1365
 
1522
- int
1523
- ox_sax_collapse_special(SaxDrive dr, char *str, long pos, long line, long col) {
1524
- char *s = str;
1525
- char *b = str;
1366
+ int ox_sax_collapse_special(SaxDrive dr, char *str, long pos, long line, long col) {
1367
+ char *s = str;
1368
+ char *b = str;
1526
1369
 
1527
1370
  while ('\0' != *s) {
1528
- if ('&' == *s) {
1529
- int c = 0;
1530
- char *end;
1371
+ switch (*s) {
1372
+ case '&': {
1373
+ int c = 0;
1374
+ char *end;
1531
1375
 
1532
1376
  s++;
1533
1377
  if ('#' == *s) {
1534
- uint64_t u = 0;
1535
- char x;
1536
-
1537
- s++;
1538
- if ('x' == *s || 'X' == *s) {
1539
- x = *s;
1540
- s++;
1541
- end = read_hex_uint64(s, &u);
1542
- } else {
1543
- x = '\0';
1544
- end = read_10_uint64(s, &u);
1545
- }
1546
- if (0 == end) {
1547
- ox_sax_drive_error(dr, NO_TERM "special character does not end with a semicolon");
1548
- *b++ = '&';
1549
- *b++ = '#';
1550
- if ('\0' != x) {
1551
- *b++ = x;
1552
- }
1553
- continue;
1554
- }
1555
- if (u <= 0x000000000000007FULL) {
1556
- *b++ = (char)u;
1557
- #if HAVE_RB_ENC_FIND
1558
- } else if (ox_utf8_encoding == dr->encoding) {
1559
- b = ox_ucs_to_utf8_chars(b, u);
1560
- } else if (0 == dr->encoding) {
1561
- dr->encoding = ox_utf8_encoding;
1562
- b = ox_ucs_to_utf8_chars(b, u);
1563
- #else
1564
- } else if (0 == dr->encoding) {
1565
- dr->encoding = UTF8_STR;
1566
- b = ox_ucs_to_utf8_chars(b, u);
1567
- } else if (0 == strcasecmp(UTF8_STR, dr->encoding)) {
1568
- b = ox_ucs_to_utf8_chars(b, u);
1569
- #endif
1570
- } else {
1571
- b = ox_ucs_to_utf8_chars(b, u);
1572
- /*
1573
- ox_sax_drive_error(dr, NO_TERM "Invalid encoding, need UTF-8 encoding to parse &#nnnn; character sequences.");
1574
- *b++ = '&';
1575
- *b++ = '#';
1576
- if ('\0' != x) {
1577
- *b++ = x;
1578
- }
1579
- continue;
1580
- */
1581
- }
1582
- s = end + 1;
1583
- continue;
1378
+ uint64_t u = 0;
1379
+ char x;
1380
+
1381
+ s++;
1382
+ if ('x' == *s || 'X' == *s) {
1383
+ x = *s;
1384
+ s++;
1385
+ end = read_hex_uint64(s, &u);
1386
+ } else {
1387
+ x = '\0';
1388
+ end = read_10_uint64(s, &u);
1389
+ }
1390
+ if (0 == end) {
1391
+ ox_sax_drive_error(dr, NO_TERM "special character does not end with a semicolon");
1392
+ *b++ = '&';
1393
+ *b++ = '#';
1394
+ if ('\0' != x) {
1395
+ *b++ = x;
1396
+ }
1397
+ continue;
1398
+ }
1399
+ if (u <= 0x000000000000007FULL) {
1400
+ *b++ = (char)u;
1401
+ } else if (ox_utf8_encoding == dr->encoding) {
1402
+ b = ox_ucs_to_utf8_chars(b, u);
1403
+ } else if (0 == dr->encoding) {
1404
+ dr->encoding = ox_utf8_encoding;
1405
+ b = ox_ucs_to_utf8_chars(b, u);
1406
+ } else {
1407
+ b = ox_ucs_to_utf8_chars(b, u);
1408
+ /*
1409
+ ox_sax_drive_error(dr, NO_TERM "Invalid encoding, need UTF-8 encoding to parse &#nnnn; character
1410
+ sequences."); *b++ = '&'; *b++ = '#'; if ('\0' != x) { *b++ = x;
1411
+ }
1412
+ continue;
1413
+ */
1414
+ }
1415
+ s = end + 1;
1416
+ continue;
1584
1417
  } else if (0 == strncasecmp(s, "lt;", 3)) {
1585
1418
  c = '<';
1586
1419
  s += 3;
1587
- col += 3;
1420
+ col += 3;
1588
1421
  } else if (0 == strncasecmp(s, "gt;", 3)) {
1589
1422
  c = '>';
1590
1423
  s += 3;
1591
- col += 3;
1424
+ col += 3;
1592
1425
  } else if (0 == strncasecmp(s, "amp;", 4)) {
1593
1426
  c = '&';
1594
1427
  s += 4;
1595
- col += 4;
1428
+ col += 4;
1596
1429
  } else if (0 == strncasecmp(s, "quot;", 5)) {
1597
1430
  c = '"';
1598
1431
  s += 5;
1599
- col += 5;
1432
+ col += 5;
1600
1433
  } else if (0 == strncasecmp(s, "apos;", 5)) {
1601
1434
  c = '\'';
1602
1435
  s += 5;
1603
1436
  } else {
1604
- char key[16];
1605
- char *k = key;
1606
- char *kend = key + sizeof(key) - 1;
1607
- char *bn;
1608
- char *s2 = s;
1609
-
1610
- for (; ';' != *s2 && '\0' != *s2; s2++, k++) {
1611
- if (kend <= k) {
1612
- k = key;
1613
- break;
1614
- }
1615
- *k = *s2;
1616
- }
1617
- *k = '\0';
1618
- if ('\0' == *key || NULL == (bn = ox_entity_lookup(b, key))) {
1619
- ox_sax_drive_error_at(dr, INVALID_FORMAT "Invalid special character sequence", pos, line, col);
1620
- c = '&';
1621
- } else {
1622
- b = bn;
1623
- s = s2 + 1;
1624
- continue;
1625
- }
1437
+ char key[16];
1438
+ char *k = key;
1439
+ char *kend = key + sizeof(key) - 1;
1440
+ char *bn;
1441
+ char *s2 = s;
1442
+
1443
+ for (; ';' != *s2 && '\0' != *s2; s2++, k++) {
1444
+ if (kend <= k) {
1445
+ k = key;
1446
+ break;
1447
+ }
1448
+ *k = *s2;
1449
+ }
1450
+ *k = '\0';
1451
+ if ('\0' == *key || NULL == (bn = ox_entity_lookup(b, key))) {
1452
+ ox_sax_drive_error_at(dr, INVALID_FORMAT "Invalid special character sequence", pos, line, col);
1453
+ c = '&';
1454
+ } else {
1455
+ b = bn;
1456
+ s = s2 + 1;
1457
+ continue;
1458
+ }
1626
1459
  }
1627
1460
  *b++ = (char)c;
1628
- col++;
1629
- } else {
1630
- if ('\n' == *s) {
1631
- line++;
1632
- col = 0;
1461
+ col++;
1462
+ break;
1463
+ }
1464
+ case '\r':
1465
+ s++;
1466
+ if ('\n' == *s) {
1467
+ continue;
1633
1468
  }
1634
- col++;
1469
+ line++;
1470
+ col = 1;
1471
+ *b++ = '\n';
1472
+ break;
1473
+ case '\n':
1474
+ line++;
1475
+ col = 0;
1476
+ // fall through
1477
+ default:
1478
+ col++;
1635
1479
  *b++ = *s++;
1480
+ break;
1636
1481
  }
1637
1482
  }
1638
1483
  *b = '\0';
@@ -1640,64 +1485,43 @@ ox_sax_collapse_special(SaxDrive dr, char *str, long pos, long line, long col) {
1640
1485
  return 0;
1641
1486
  }
1642
1487
 
1643
- static void
1644
- hint_clear_empty(SaxDrive dr) {
1645
- Nv nv;
1488
+ static void hint_clear_empty(SaxDrive dr) {
1489
+ Nv nv;
1646
1490
 
1647
1491
  for (nv = stack_peek(&dr->stack); 0 != nv; nv = stack_peek(&dr->stack)) {
1648
- if (0 == nv->hint) {
1649
- break;
1650
- }
1651
- if (nv->hint->empty) {
1652
- end_element_cb(dr, nv->val, dr->buf.pos, dr->buf.line, dr->buf.col, nv->hint);
1653
- stack_pop(&dr->stack);
1654
- } else {
1655
- break;
1656
- }
1492
+ if (0 == nv->hint) {
1493
+ break;
1494
+ }
1495
+ if (nv->hint->empty) {
1496
+ end_element_cb(dr, nv->val, dr->buf.pos, dr->buf.line, dr->buf.col, nv->hint);
1497
+ stack_pop(&dr->stack);
1498
+ } else {
1499
+ break;
1500
+ }
1657
1501
  }
1658
1502
  }
1659
1503
 
1660
- static Nv
1661
- hint_try_close(SaxDrive dr, const char *name) {
1662
- Hint h = ox_hint_find(dr->options.hints, name);
1663
- Nv nv;
1504
+ static Nv hint_try_close(SaxDrive dr, const char *name) {
1505
+ Hint h = ox_hint_find(dr->options.hints, name);
1506
+ Nv nv;
1664
1507
 
1665
1508
  if (0 == h) {
1666
- return 0;
1509
+ return 0;
1667
1510
  }
1668
1511
  for (nv = stack_peek(&dr->stack); 0 != nv; nv = stack_peek(&dr->stack)) {
1669
- if (0 == strcasecmp(name, nv->name)) {
1670
- stack_pop(&dr->stack);
1671
- return nv;
1672
- }
1673
- if (0 == nv->hint) {
1674
- break;
1675
- }
1676
- if (nv->hint->empty) {
1677
- end_element_cb(dr, nv->val, dr->buf.pos, dr->buf.line, dr->buf.col, nv->hint);
1678
- dr->stack.tail = nv;
1679
- } else {
1680
- break;
1681
- }
1512
+ if (0 == strcasecmp(name, nv->name)) {
1513
+ stack_pop(&dr->stack);
1514
+ return nv;
1515
+ }
1516
+ if (0 == nv->hint) {
1517
+ break;
1518
+ }
1519
+ if (nv->hint->empty) {
1520
+ end_element_cb(dr, nv->val, dr->buf.pos, dr->buf.line, dr->buf.col, nv->hint);
1521
+ dr->stack.tail = nv;
1522
+ } else {
1523
+ break;
1524
+ }
1682
1525
  }
1683
1526
  return 0;
1684
1527
  }
1685
-
1686
- static void
1687
- end_element_cb(SaxDrive dr, VALUE name, long pos, long line, long col, Hint h) {
1688
- if (dr->has.end_element && 0 >= dr->blocked && (NULL == h || ActiveOverlay == h->overlay || NestOverlay == h->overlay)) {
1689
- if (dr->has.pos) {
1690
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
1691
- }
1692
- if (dr->has.line) {
1693
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
1694
- }
1695
- if (dr->has.column) {
1696
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
1697
- }
1698
- rb_funcall(dr->handler, ox_end_element_id, 1, name);
1699
- }
1700
- if (NULL != h && BlockOverlay == h->overlay && 0 < dr->blocked) {
1701
- dr->blocked--;
1702
- }
1703
- }