ox 2.14.5 → 2.14.9

Sign up to get free protection for your applications and to get access to all the features.
data/ext/ox/sax.c CHANGED
@@ -4,66 +4,63 @@
4
4
  */
5
5
 
6
6
  #include <ctype.h>
7
- #include <stdlib.h>
8
7
  #include <errno.h>
9
8
  #include <stdio.h>
9
+ #include <stdlib.h>
10
10
  #include <strings.h>
11
11
  #include <sys/types.h>
12
12
  #if HAVE_SYS_UIO_H
13
13
  #include <sys/uio.h>
14
14
  #endif
15
- #include <unistd.h>
16
15
  #include <time.h>
16
+ #include <unistd.h>
17
17
 
18
+ #include "intern.h"
19
+ #include "ox.h"
18
20
  #include "ruby.h"
19
- #if HAVE_RB_ENC_ASSOCIATE
20
21
  #include "ruby/encoding.h"
21
- #endif
22
- #include "ox.h"
23
22
  #include "sax.h"
24
- #include "sax_stack.h"
25
23
  #include "sax_buf.h"
24
+ #include "sax_stack.h"
26
25
  #include "special.h"
27
26
 
28
- #define NAME_MISMATCH 1
27
+ #define NAME_MISMATCH 1
29
28
 
30
- #define START_STATE 1
31
- #define BODY_STATE 2
32
- #define AFTER_STATE 3
29
+ #define START_STATE 1
30
+ #define BODY_STATE 2
31
+ #define AFTER_STATE 3
33
32
 
34
33
  // error prefixes
35
- #define BAD_BOM "Bad BOM: "
36
- #define NO_TERM "Not Terminated: "
37
- #define INVALID_FORMAT "Invalid Format: "
38
- #define CASE_ERROR "Case Error: "
39
- #define OUT_OF_ORDER "Out of Order: "
40
- #define WRONG_CHAR "Unexpected Character: "
41
- #define EL_MISMATCH "Start End Mismatch: "
42
- #define INV_ELEMENT "Invalid Element: "
43
-
44
- #define UTF8_STR "UTF-8"
45
-
46
- static void sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, SaxOptions options);
47
- static void parse(SaxDrive dr);
34
+ #define BAD_BOM "Bad BOM: "
35
+ #define NO_TERM "Not Terminated: "
36
+ #define INVALID_FORMAT "Invalid Format: "
37
+ #define CASE_ERROR "Case Error: "
38
+ #define OUT_OF_ORDER "Out of Order: "
39
+ #define WRONG_CHAR "Unexpected Character: "
40
+ #define EL_MISMATCH "Start End Mismatch: "
41
+ #define INV_ELEMENT "Invalid Element: "
42
+
43
+ #define UTF8_STR "UTF-8"
44
+
45
+ static void sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, SaxOptions options);
46
+ static void parse(SaxDrive dr);
48
47
  // All read functions should return the next character after the 'thing' that was read and leave dr->cur one after that.
49
- static char read_instruction(SaxDrive dr);
50
- static char read_doctype(SaxDrive dr);
51
- static char read_cdata(SaxDrive dr);
52
- static char read_comment(SaxDrive dr);
53
- static char read_element_start(SaxDrive dr);
54
- static char read_element_end(SaxDrive dr);
55
- static char read_text(SaxDrive dr);
56
- static char read_jump(SaxDrive dr, const char *pat);
57
- static char read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req, Hint h);
58
- static char read_name_token(SaxDrive dr);
59
- static char read_quoted_value(SaxDrive dr);
60
-
61
- static void end_element_cb(SaxDrive dr, VALUE name, long pos, long line, long col, Hint h);
62
-
63
- static void hint_clear_empty(SaxDrive dr);
64
- static Nv hint_try_close(SaxDrive dr, const char *name);
65
-
66
- VALUE ox_sax_value_class = Qnil;
48
+ static char read_instruction(SaxDrive dr);
49
+ static char read_doctype(SaxDrive dr);
50
+ static char read_cdata(SaxDrive dr);
51
+ static char read_comment(SaxDrive dr);
52
+ static char read_element_start(SaxDrive dr);
53
+ static char read_element_end(SaxDrive dr);
54
+ static char read_text(SaxDrive dr);
55
+ static char read_jump(SaxDrive dr, const char *pat);
56
+ static char read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req, Hint h);
57
+ static char read_name_token(SaxDrive dr);
58
+ static char read_quoted_value(SaxDrive dr);
59
+
60
+ static void hint_clear_empty(SaxDrive dr);
61
+ static Nv hint_try_close(SaxDrive dr, const char *name);
62
+
63
+ VALUE ox_sax_value_class = Qnil;
67
64
 
68
65
  static VALUE protect_parse(VALUE drp) {
69
66
  parse((SaxDrive)drp);
@@ -71,559 +68,561 @@ static VALUE protect_parse(VALUE drp) {
71
68
  return Qnil;
72
69
  }
73
70
 
74
- #if HAVE_RB_ENC_ASSOCIATE
75
- static int
76
- str_is_ascii(const char *s) {
77
- for (; '\0' != *s; s++) {
78
- if (*s < ' ' || '~' < *s) {
79
- return 0;
80
- }
81
- }
82
- return 1;
83
- }
84
- #endif
85
-
86
71
  VALUE
87
- str2sym(SaxDrive dr, const char *str, const char **strp) {
88
- VALUE *slot;
89
- VALUE sym;
72
+ str2sym(SaxDrive dr, const char *str, size_t len, const char **strp) {
73
+ VALUE sym;
90
74
 
91
75
  if (dr->options.symbolize) {
92
- if (Qundef == (sym = ox_cache_get(ox_symbol_cache, str, &slot, strp))) {
93
- #if HAVE_RB_ENC_ASSOCIATE
94
- if (0 != dr->encoding && !str_is_ascii(str)) {
95
- VALUE rstr = rb_str_new2(str);
96
-
97
- // TBD if sym can be pinned down then use this all the time
98
- rb_enc_associate(rstr, dr->encoding);
99
- sym = rb_funcall(rstr, ox_to_sym_id, 0);
100
- *slot = Qundef;
101
- } else {
102
- sym = ID2SYM(rb_intern(str));
103
- *slot = sym;
104
- }
105
- #else
106
- sym = ID2SYM(rb_intern(str));
107
- *slot = sym;
108
- #endif
109
- }
76
+ sym = ox_sym_intern(str, len, strp);
110
77
  } else {
111
- sym = rb_str_new2(str);
112
- #if HAVE_RB_ENC_ASSOCIATE
113
- if (0 != dr->encoding) {
114
- rb_enc_associate(sym, dr->encoding);
115
- }
116
- #endif
117
- if (0 != strp) {
118
- *strp = StringValuePtr(sym);
119
- }
78
+ sym = dr->get_name(str, len, dr->encoding, strp);
120
79
  }
121
80
  return sym;
122
81
  }
123
82
 
124
- void
125
- ox_sax_parse(VALUE handler, VALUE io, SaxOptions options) {
126
- struct _saxDrive dr;
127
- int line = 0;
83
+ void ox_sax_parse(VALUE handler, VALUE io, SaxOptions options) {
84
+ #if HAVE_RB_EXT_RACTOR_SAFE
85
+ rb_ext_ractor_safe(true);
86
+ #endif
87
+ struct _saxDrive dr;
88
+ int line = 0;
128
89
 
129
90
  sax_drive_init(&dr, handler, io, options);
130
- #if 0
131
- printf("*** sax_parse with these flags\n");
132
- printf(" has_instruct = %s\n", dr.has.instruct ? "true" : "false");
133
- printf(" has_end_instruct = %s\n", dr.has.end_instruct ? "true" : "false");
134
- printf(" has_attr = %s\n", dr.has.attr ? "true" : "false");
135
- printf(" has_attr_value = %s\n", dr.has.attr_value ? "true" : "false");
136
- printf(" has_attrs_done = %s\n", dr.has.attrs_done ? "true" : "false");
137
- printf(" has_doctype = %s\n", dr.has.doctype ? "true" : "false");
138
- printf(" has_comment = %s\n", dr.has.comment ? "true" : "false");
139
- printf(" has_cdata = %s\n", dr.has.cdata ? "true" : "false");
140
- printf(" has_text = %s\n", dr.has.text ? "true" : "false");
141
- printf(" has_value = %s\n", dr.has.value ? "true" : "false");
142
- printf(" has_start_element = %s\n", dr.has.start_element ? "true" : "false");
143
- printf(" has_end_element = %s\n", dr.has.end_element ? "true" : "false");
144
- printf(" has_error = %s\n", dr.has.error ? "true" : "false");
145
- printf(" has_pos = %s\n", dr.has.pos ? "true" : "false");
146
- printf(" has_line = %s\n", dr.has.line ? "true" : "false");
147
- printf(" has_column = %s\n", dr.has.column ? "true" : "false");
148
- #endif
149
- //parse(&dr);
150
91
  rb_protect(protect_parse, (VALUE)&dr, &line);
151
92
  ox_sax_drive_cleanup(&dr);
152
93
  if (0 != line) {
153
- rb_jump_tag(line);
94
+ rb_jump_tag(line);
95
+ }
96
+ }
97
+
98
+ static void set_long_noop(VALUE handler, long pos) {
99
+ }
100
+
101
+ static void set_pos(VALUE handler, long pos) {
102
+ rb_ivar_set(handler, ox_at_pos_id, LONG2NUM(pos));
103
+ }
104
+
105
+ static void set_line(VALUE handler, long line) {
106
+ rb_ivar_set(handler, ox_at_line_id, LONG2NUM(line));
107
+ }
108
+
109
+ static void set_col(VALUE handler, long col) {
110
+ rb_ivar_set(handler, ox_at_column_id, LONG2NUM(col));
111
+ }
112
+
113
+ static void attr_noop(SaxDrive dr, VALUE name, char *value, long pos, long line, long col) {
114
+ }
115
+
116
+ static void attr_text(SaxDrive dr, VALUE name, char *value, long pos, long line, long col) {
117
+ VALUE args[2];
118
+
119
+ args[0] = name;
120
+ if (dr->options.convert_special) {
121
+ ox_sax_collapse_special(dr, value, pos, line, col);
122
+ }
123
+ args[1] = rb_str_new2(value);
124
+ if (0 != dr->encoding) {
125
+ rb_enc_associate(args[1], dr->encoding);
126
+ }
127
+ dr->set_pos(dr->handler, pos);
128
+ dr->set_line(dr->handler, line);
129
+ dr->set_col(dr->handler, col);
130
+ rb_funcall2(dr->handler, ox_attr_id, 2, args);
131
+ }
132
+
133
+ static void attr_value(SaxDrive dr, VALUE name, char *value, long pos, long line, long col) {
134
+ VALUE args[2];
135
+
136
+ dr->set_pos(dr->handler, pos);
137
+ dr->set_line(dr->handler, line);
138
+ dr->set_col(dr->handler, col);
139
+ args[0] = name;
140
+ args[1] = dr->value_obj;
141
+ rb_funcall2(dr->handler, ox_attr_value_id, 2, args);
142
+ }
143
+
144
+ static void attrs_done_noop(VALUE handler) {
145
+ }
146
+
147
+ static void attrs_done(VALUE handler) {
148
+ rb_funcall(handler, ox_attrs_done_id, 0);
149
+ }
150
+
151
+ static VALUE instruct_noop(SaxDrive dr, const char *target, long pos, long line, long col) {
152
+ return Qnil;
153
+ }
154
+
155
+ static VALUE instruct(SaxDrive dr, const char *target, long pos, long line, long col) {
156
+ VALUE arg = rb_str_new2(target);
157
+
158
+ dr->set_pos(dr->handler, pos);
159
+ dr->set_line(dr->handler, line);
160
+ dr->set_col(dr->handler, col);
161
+ rb_funcall(dr->handler, ox_instruct_id, 1, arg);
162
+
163
+ return arg;
164
+ }
165
+
166
+ static VALUE instruct_just_value(SaxDrive dr, const char *target, long pos, long line, long col) {
167
+ return rb_str_new2(target);
168
+ }
169
+
170
+ static void end_instruct_noop(SaxDrive dr, VALUE target, long pos, long line, long col) {
171
+ }
172
+
173
+ static void end_instruct(SaxDrive dr, VALUE target, long pos, long line, long col) {
174
+ dr->set_pos(dr->handler, pos);
175
+ dr->set_line(dr->handler, line);
176
+ dr->set_col(dr->handler, col);
177
+ rb_funcall(dr->handler, ox_end_instruct_id, 1, target);
178
+ }
179
+
180
+ static void dr_loc_noop(SaxDrive dr, long pos, long line, long col) {
181
+ }
182
+
183
+ static void comment(SaxDrive dr, long pos, long line, long col) {
184
+ if (!dr->blocked) {
185
+ Nv parent = stack_peek(&dr->stack);
186
+ Hint h = ox_hint_find(dr->options.hints, "!--");
187
+
188
+ if (NULL == parent || NULL == parent->hint || OffOverlay != parent->hint->overlay ||
189
+ (NULL != h && (ActiveOverlay == h->overlay || ActiveOverlay == h->overlay))) {
190
+ VALUE arg = rb_str_new2(dr->buf.str);
191
+
192
+ if (0 != dr->encoding) {
193
+ rb_enc_associate(arg, dr->encoding);
194
+ }
195
+ dr->set_pos(dr->handler, pos);
196
+ dr->set_line(dr->handler, line);
197
+ dr->set_col(dr->handler, col);
198
+ rb_funcall(dr->handler, ox_comment_id, 1, arg);
199
+ }
200
+ }
201
+ }
202
+
203
+ static void cdata(SaxDrive dr, long pos, long line, long col) {
204
+ Nv parent = stack_peek(&dr->stack);
205
+
206
+ if (!dr->blocked && (NULL == parent || NULL == parent->hint || OffOverlay != parent->hint->overlay)) {
207
+ VALUE arg = rb_str_new2(dr->buf.str);
208
+
209
+ if (0 != dr->encoding) {
210
+ rb_enc_associate(arg, dr->encoding);
211
+ }
212
+ dr->set_pos(dr->handler, pos);
213
+ dr->set_line(dr->handler, line);
214
+ dr->set_col(dr->handler, col);
215
+ rb_funcall(dr->handler, ox_cdata_id, 1, arg);
216
+ }
217
+ }
218
+
219
+ static void doctype(SaxDrive dr, long pos, long line, long col) {
220
+ dr->set_pos(dr->handler, pos);
221
+ dr->set_line(dr->handler, line);
222
+ dr->set_col(dr->handler, col);
223
+ rb_funcall(dr->handler, ox_doctype_id, 1, rb_str_new2(dr->buf.str));
224
+ }
225
+
226
+ static void error_noop(SaxDrive dr, const char *msg, long pos, long line, long col) {
227
+ }
228
+
229
+ static void error(SaxDrive dr, const char *msg, long pos, long line, long col) {
230
+ VALUE args[3];
231
+
232
+ args[0] = rb_str_new2(msg);
233
+ args[1] = LONG2NUM(line);
234
+ args[2] = LONG2NUM(col);
235
+ dr->set_pos(dr->handler, pos);
236
+ dr->set_line(dr->handler, line);
237
+ dr->set_col(dr->handler, col);
238
+ rb_funcall2(dr->handler, ox_error_id, 3, args);
239
+ }
240
+
241
+ static void end_element_cb(SaxDrive dr, VALUE name, long pos, long line, long col, Hint h) {
242
+ if (dr->has_end_element && 0 >= dr->blocked &&
243
+ (NULL == h || ActiveOverlay == h->overlay || NestOverlay == h->overlay)) {
244
+ dr->set_pos(dr->handler, pos);
245
+ dr->set_line(dr->handler, line);
246
+ dr->set_col(dr->handler, col);
247
+ rb_funcall(dr->handler, ox_end_element_id, 1, name);
248
+ }
249
+ if (NULL != h && BlockOverlay == h->overlay && 0 < dr->blocked) {
250
+ dr->blocked--;
154
251
  }
155
252
  }
156
253
 
157
- static void
158
- sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, SaxOptions options) {
254
+ static void sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, SaxOptions options) {
159
255
  ox_sax_buf_init(&dr->buf, io);
160
256
  dr->buf.dr = dr;
161
257
  stack_init(&dr->stack);
162
- dr->handler = handler;
258
+ dr->handler = handler;
163
259
  dr->value_obj = Data_Wrap_Struct(ox_sax_value_class, 0, 0, dr);
164
260
  rb_gc_register_address(&dr->value_obj);
165
261
  dr->options = *options;
166
- dr->err = 0;
262
+ dr->err = 0;
167
263
  dr->blocked = 0;
168
- dr->abort = false;
169
- has_init(&dr->has, handler);
170
- #if HAVE_RB_ENC_FIND
171
- if ('\0' == *ox_default_options.encoding) {
172
- VALUE encoding;
264
+ dr->abort = false;
265
+
266
+ dr->set_pos = (Qtrue == rb_ivar_defined(handler, ox_at_pos_id)) ? set_pos : set_long_noop;
267
+ dr->set_line = (Qtrue == rb_ivar_defined(handler, ox_at_line_id)) ? set_line : set_long_noop;
268
+ dr->set_col = (Qtrue == rb_ivar_defined(handler, ox_at_column_id)) ? set_col : set_long_noop;
269
+ if (rb_respond_to(handler, ox_attr_value_id)) {
270
+ dr->attr_cb = attr_value;
271
+ dr->want_attr_name = true;
272
+ } else if (rb_respond_to(handler, ox_attr_id)) {
273
+ dr->attr_cb = attr_text;
274
+ dr->want_attr_name = true;
275
+ } else {
276
+ dr->attr_cb = attr_noop;
277
+ dr->want_attr_name = false;
278
+ }
279
+ dr->attrs_done = rb_respond_to(handler, ox_attrs_done_id) ? attrs_done : attrs_done_noop;
280
+ dr->instruct = rb_respond_to(handler, ox_instruct_id) ? instruct : instruct_noop;
281
+ dr->end_instruct = rb_respond_to(handler, ox_end_instruct_id) ? end_instruct : end_instruct_noop;
282
+ if (rb_respond_to(handler, ox_end_instruct_id) && !rb_respond_to(handler, ox_instruct_id)) {
283
+ dr->instruct = instruct_just_value;
284
+ }
285
+ dr->doctype = rb_respond_to(handler, ox_doctype_id) ? doctype : dr_loc_noop;
286
+ dr->comment = rb_respond_to(handler, ox_comment_id) ? comment : dr_loc_noop;
287
+ dr->cdata = rb_respond_to(handler, ox_cdata_id) ? cdata : dr_loc_noop;
288
+ dr->error = rb_respond_to(handler, ox_error_id) ? error : error_noop;
173
289
 
174
- dr->encoding = 0;
175
- if (rb_respond_to(io, ox_external_encoding_id) && Qnil != (encoding = rb_funcall(io, ox_external_encoding_id, 0))) {
176
- int e = rb_enc_get_index(encoding);
177
- if (0 <= e) {
178
- dr->encoding = rb_enc_from_index(e);
179
- }
180
- }
290
+ dr->has_text = rb_respond_to(handler, ox_text_id);
291
+ dr->has_value = rb_respond_to(handler, ox_value_id);
292
+ dr->has_start_element = rb_respond_to(handler, ox_start_element_id);
293
+ dr->has_end_element = rb_respond_to(handler, ox_end_element_id);
294
+
295
+ if ('\0' == *ox_default_options.encoding) {
296
+ VALUE encoding;
297
+
298
+ dr->encoding = 0;
299
+ if (rb_respond_to(io, ox_external_encoding_id) &&
300
+ Qnil != (encoding = rb_funcall(io, ox_external_encoding_id, 0))) {
301
+ int e = rb_enc_get_index(encoding);
302
+ if (0 <= e) {
303
+ dr->encoding = rb_enc_from_index(e);
304
+ }
305
+ }
181
306
  } else {
182
307
  dr->encoding = rb_enc_find(ox_default_options.encoding);
183
308
  }
184
- #else
185
- dr->encoding = 0;
186
- #endif
309
+ dr->utf8 = (NULL == dr->encoding || rb_utf8_encoding() == dr->encoding);
310
+ if (NULL == dr->encoding || rb_utf8_encoding() == dr->encoding) { // UTF-8
311
+ dr->get_name = dr->options.symbolize ? ox_utf8_sym : ox_utf8_name; // TBD UTF8 sym?
312
+ } else {
313
+ dr->get_name = dr->options.symbolize ? ox_enc_sym : ox_enc_name;
314
+ }
187
315
  }
188
316
 
189
- void
190
- ox_sax_drive_cleanup(SaxDrive dr) {
317
+ void ox_sax_drive_cleanup(SaxDrive dr) {
191
318
  rb_gc_unregister_address(&dr->value_obj);
192
319
  buf_cleanup(&dr->buf);
193
320
  stack_cleanup(&dr->stack);
194
321
  }
195
322
 
196
- static void
197
- ox_sax_drive_error_at(SaxDrive dr, const char *msg, off_t pos, off_t line, off_t col) {
198
- if (dr->has.error) {
199
- VALUE args[3];
200
-
201
- args[0] = rb_str_new2(msg);
202
- args[1] = LONG2NUM(line);
203
- args[2] = LONG2NUM(col);
204
- if (dr->has.pos) {
205
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
206
- }
207
- if (dr->has.pos) {
208
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
209
- }
210
- if (dr->has.line) {
211
- rb_ivar_set(dr->handler, ox_at_line_id, args[1]);
212
- }
213
- if (dr->has.column) {
214
- rb_ivar_set(dr->handler, ox_at_column_id, args[2]);
215
- }
216
- rb_funcall2(dr->handler, ox_error_id, 3, args);
217
- }
323
+ static void ox_sax_drive_error_at(SaxDrive dr, const char *msg, off_t pos, off_t line, off_t col) {
324
+ dr->error(dr, msg, pos, line, col);
218
325
  }
219
326
 
220
- void
221
- ox_sax_drive_error(SaxDrive dr, const char *msg) {
327
+ void ox_sax_drive_error(SaxDrive dr, const char *msg) {
222
328
  ox_sax_drive_error_at(dr, msg, dr->buf.pos, dr->buf.line, dr->buf.col);
223
329
  }
224
330
 
225
- static char
226
- skipBOM(SaxDrive dr) {
227
- char c = buf_get(&dr->buf);
331
+ static char skipBOM(SaxDrive dr) {
332
+ char c = buf_get(&dr->buf);
228
333
 
229
334
  if (0xEF == (uint8_t)c) { /* only UTF8 is supported */
230
- if (0xBB == (uint8_t)buf_get(&dr->buf) && 0xBF == (uint8_t)buf_get(&dr->buf)) {
231
- #if HAVE_RB_ENC_FIND
232
- dr->encoding = ox_utf8_encoding;
233
- #else
234
- dr->encoding = UTF8_STR;
235
- #endif
236
- c = buf_get(&dr->buf);
237
- } else {
238
- ox_sax_drive_error(dr, BAD_BOM "invalid BOM or a binary file.");
239
- c = '\0';
240
- }
335
+ if (0xBB == (uint8_t)buf_get(&dr->buf) && 0xBF == (uint8_t)buf_get(&dr->buf)) {
336
+ dr->encoding = ox_utf8_encoding;
337
+ c = buf_get(&dr->buf);
338
+ } else {
339
+ ox_sax_drive_error(dr, BAD_BOM "invalid BOM or a binary file.");
340
+ c = '\0';
341
+ }
241
342
  }
242
343
  return c;
243
344
  }
244
345
 
245
- static void
246
- parse(SaxDrive dr) {
247
- char c = skipBOM(dr);
248
- int state = START_STATE;
249
- Nv parent;
346
+ static void parse(SaxDrive dr) {
347
+ char c = skipBOM(dr);
348
+ int state = START_STATE;
349
+ Nv parent;
250
350
 
251
351
  while ('\0' != c) {
252
- buf_protect(&dr->buf);
253
- if ('<' == c) {
254
- c = buf_get(&dr->buf);
255
- switch (c) {
256
- case '?': /* instructions (xml or otherwise) */
257
- c = read_instruction(dr);
258
- break;
259
- case '!': /* comment or doctype */
260
- buf_protect(&dr->buf);
261
- c = buf_get(&dr->buf);
262
- if ('\0' == c) {
263
- ox_sax_drive_error(dr, NO_TERM "DOCTYPE or comment not terminated");
264
-
265
- goto DONE;
266
- } else if ('-' == c) {
267
- c = buf_get(&dr->buf); /* skip first - and get next character */
268
- if ('-' != c) {
269
- ox_sax_drive_error(dr, INVALID_FORMAT "bad comment format, expected <!--");
270
- } else {
271
- c = buf_get(&dr->buf); /* skip second - */
272
- }
273
- c = read_comment(dr);
274
- } else {
275
- int i;
276
- int spaced = 0;
277
- off_t pos = dr->buf.pos + 1;
278
- off_t line = dr->buf.line;
279
- off_t col = dr->buf.col + 1;
280
-
281
- if (is_white(c)) {
282
- spaced = 1;
283
- c = buf_next_non_white(&dr->buf);
284
- }
285
- dr->buf.str = dr->buf.tail - 1;
286
- for (i = 7; 0 < i; i--) {
287
- c = buf_get(&dr->buf);
288
- }
289
- if (0 == strncmp("DOCTYPE", dr->buf.str, 7)) {
290
- if (spaced) {
291
- ox_sax_drive_error_at(dr, WRONG_CHAR "<!DOCTYPE can not included spaces", pos, line, col);
292
- }
293
- if (START_STATE != state) {
294
- ox_sax_drive_error(dr, OUT_OF_ORDER "DOCTYPE can not come after an element");
295
- }
296
- c = read_doctype(dr);
297
- } else if (0 == strncasecmp("DOCTYPE", dr->buf.str, 7)) {
298
- if (!dr->options.smart) {
299
- ox_sax_drive_error(dr, CASE_ERROR "expected DOCTYPE all in caps");
300
- }
301
- if (START_STATE != state) {
302
- ox_sax_drive_error(dr, OUT_OF_ORDER "DOCTYPE can not come after an element");
303
- }
304
- c = read_doctype(dr);
305
- } else if (0 == strncmp("[CDATA[", dr->buf.str, 7)) {
306
- if (spaced) {
307
- ox_sax_drive_error_at(dr, WRONG_CHAR "<![CDATA[ can not included spaces", pos, line, col);
308
- }
309
- c = read_cdata(dr);
310
- } else if (0 == strncasecmp("[CDATA[", dr->buf.str, 7)) {
311
- if (!dr->options.smart) {
312
- ox_sax_drive_error(dr, CASE_ERROR "expected CDATA all in caps");
313
- }
314
- c = read_cdata(dr);
315
- } else {
316
- Nv parent = stack_peek(&dr->stack);
317
-
318
- if (0 != parent) {
319
- parent->childCnt++;
320
- }
321
- ox_sax_drive_error_at(dr, WRONG_CHAR "DOCTYPE, CDATA, or comment expected", pos, line, col);
322
- c = read_name_token(dr);
323
- if ('>' == c) {
324
- c = buf_get(&dr->buf);
325
- }
326
- }
327
- }
328
- break;
329
- case '/': /* element end */
330
- parent = stack_peek(&dr->stack);
331
- if (0 != parent && 0 == parent->childCnt && dr->has.text && !dr->blocked) {
332
- VALUE args[1];
333
- off_t pos = dr->buf.pos;
334
- off_t line = dr->buf.line;
335
- off_t col = dr->buf.col - 1;
336
-
337
- args[0] = rb_str_new2("");
338
- #if HAVE_RB_ENC_ASSOCIATE
339
- if (0 != dr->encoding) {
340
- rb_enc_associate(args[0], dr->encoding);
341
- }
342
- #endif
343
- if (dr->has.pos) {
344
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
345
- }
346
- if (dr->has.line) {
347
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
348
- }
349
- if (dr->has.column) {
350
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
351
- }
352
- rb_funcall2(dr->handler, ox_text_id, 1, args);
353
- }
354
- c = read_element_end(dr);
355
- if (0 == stack_peek(&dr->stack)) {
356
- state = AFTER_STATE;
357
- }
358
- break;
359
- case '\0':
360
- goto DONE;
361
- default:
362
- buf_backup(&dr->buf);
363
- if (AFTER_STATE == state) {
364
- ox_sax_drive_error(dr, OUT_OF_ORDER "multiple top level elements");
365
- }
366
- state = BODY_STATE;
367
- c = read_element_start(dr);
368
- if (0 == stack_peek(&dr->stack)) {
369
- state = AFTER_STATE;
370
- }
371
- break;
372
- }
373
- } else {
374
- buf_reset(&dr->buf);
375
- c = read_text(dr);
376
- }
352
+ buf_protect(&dr->buf);
353
+ if ('<' == c) {
354
+ c = buf_get(&dr->buf);
355
+ switch (c) {
356
+ case '?': /* instructions (xml or otherwise) */ c = read_instruction(dr); break;
357
+ case '!': /* comment or doctype */
358
+ buf_protect(&dr->buf);
359
+ c = buf_get(&dr->buf);
360
+ if ('\0' == c) {
361
+ ox_sax_drive_error(dr, NO_TERM "DOCTYPE or comment not terminated");
362
+
363
+ goto DONE;
364
+ } else if ('-' == c) {
365
+ c = buf_get(&dr->buf); /* skip first - and get next character */
366
+ if ('-' != c) {
367
+ ox_sax_drive_error(dr, INVALID_FORMAT "bad comment format, expected <!--");
368
+ } else {
369
+ c = buf_get(&dr->buf); /* skip second - */
370
+ }
371
+ c = read_comment(dr);
372
+ } else {
373
+ int i;
374
+ int spaced = 0;
375
+ off_t pos = dr->buf.pos + 1;
376
+ off_t line = dr->buf.line;
377
+ off_t col = dr->buf.col + 1;
378
+
379
+ if (is_white(c)) {
380
+ spaced = 1;
381
+ c = buf_next_non_white(&dr->buf);
382
+ }
383
+ dr->buf.str = dr->buf.tail - 1;
384
+ for (i = 7; 0 < i; i--) {
385
+ c = buf_get(&dr->buf);
386
+ }
387
+ if (0 == strncmp("DOCTYPE", dr->buf.str, 7)) {
388
+ if (spaced) {
389
+ ox_sax_drive_error_at(dr, WRONG_CHAR "<!DOCTYPE can not included spaces", pos, line, col);
390
+ }
391
+ if (START_STATE != state) {
392
+ ox_sax_drive_error(dr, OUT_OF_ORDER "DOCTYPE can not come after an element");
393
+ }
394
+ c = read_doctype(dr);
395
+ } else if (0 == strncasecmp("DOCTYPE", dr->buf.str, 7)) {
396
+ if (!dr->options.smart) {
397
+ ox_sax_drive_error(dr, CASE_ERROR "expected DOCTYPE all in caps");
398
+ }
399
+ if (START_STATE != state) {
400
+ ox_sax_drive_error(dr, OUT_OF_ORDER "DOCTYPE can not come after an element");
401
+ }
402
+ c = read_doctype(dr);
403
+ } else if (0 == strncmp("[CDATA[", dr->buf.str, 7)) {
404
+ if (spaced) {
405
+ ox_sax_drive_error_at(dr, WRONG_CHAR "<![CDATA[ can not included spaces", pos, line, col);
406
+ }
407
+ c = read_cdata(dr);
408
+ } else if (0 == strncasecmp("[CDATA[", dr->buf.str, 7)) {
409
+ if (!dr->options.smart) {
410
+ ox_sax_drive_error(dr, CASE_ERROR "expected CDATA all in caps");
411
+ }
412
+ c = read_cdata(dr);
413
+ } else {
414
+ Nv parent = stack_peek(&dr->stack);
415
+
416
+ if (0 != parent) {
417
+ parent->childCnt++;
418
+ }
419
+ ox_sax_drive_error_at(dr, WRONG_CHAR "DOCTYPE, CDATA, or comment expected", pos, line, col);
420
+ c = read_name_token(dr);
421
+ if ('>' == c) {
422
+ c = buf_get(&dr->buf);
423
+ }
424
+ }
425
+ }
426
+ break;
427
+ case '/': /* element end */
428
+ parent = stack_peek(&dr->stack);
429
+ if (0 != parent && 0 == parent->childCnt && dr->has_text && !dr->blocked) {
430
+ VALUE args[1];
431
+ args[0] = rb_str_new2("");
432
+ if (0 != dr->encoding) {
433
+ rb_enc_associate(args[0], dr->encoding);
434
+ }
435
+ dr->set_pos(dr->handler, dr->buf.pos);
436
+ dr->set_line(dr->handler, dr->buf.line);
437
+ dr->set_col(dr->handler, dr->buf.col);
438
+ rb_funcall2(dr->handler, ox_text_id, 1, args);
439
+ }
440
+ c = read_element_end(dr);
441
+ if (0 == stack_peek(&dr->stack)) {
442
+ state = AFTER_STATE;
443
+ }
444
+ break;
445
+ case '\0': goto DONE;
446
+ default:
447
+ buf_backup(&dr->buf);
448
+ if (AFTER_STATE == state) {
449
+ ox_sax_drive_error(dr, OUT_OF_ORDER "multiple top level elements");
450
+ }
451
+ state = BODY_STATE;
452
+ c = read_element_start(dr);
453
+ if (0 == stack_peek(&dr->stack)) {
454
+ state = AFTER_STATE;
455
+ }
456
+ break;
457
+ }
458
+ } else {
459
+ buf_reset(&dr->buf);
460
+ c = read_text(dr);
461
+ }
377
462
  }
378
- DONE:
463
+ DONE:
379
464
  if (dr->abort) {
380
- return;
465
+ return;
381
466
  }
382
467
  if (dr->stack.head < dr->stack.tail) {
383
- char msg[256];
384
- Nv sp;
385
-
386
- if (dr->has.pos) {
387
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(dr->buf.pos));
388
- }
389
- if (dr->has.line) {
390
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(dr->buf.line));
391
- }
392
- if (dr->has.column) {
393
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(dr->buf.col));
394
- }
395
- for (sp = dr->stack.tail - 1; dr->stack.head <= sp; sp--) {
396
- snprintf(msg, sizeof(msg) - 1, "%selement '%s' not closed", EL_MISMATCH, sp->name);
397
- ox_sax_drive_error_at(dr, msg, dr->buf.pos, dr->buf.line, dr->buf.col);
398
- if (dr->has.end_element && 0 >= dr->blocked &&
399
- (NULL == sp->hint || ActiveOverlay == sp->hint->overlay || NestOverlay == sp->hint->overlay)) {
400
- VALUE args[1];
401
-
402
- args[0] = sp->val;
403
- rb_funcall2(dr->handler, ox_end_element_id, 1, args);
404
- }
405
- if (dr->blocked && NULL != sp->hint && BlockOverlay == sp->hint->overlay) {
406
- dr->blocked--;
407
- }
468
+ char msg[256];
469
+ Nv sp;
470
+
471
+ for (sp = dr->stack.tail - 1; dr->stack.head <= sp; sp--) {
472
+ snprintf(msg, sizeof(msg) - 1, "%selement '%s' not closed", EL_MISMATCH, sp->name);
473
+ ox_sax_drive_error_at(dr, msg, dr->buf.pos, dr->buf.line, dr->buf.col);
474
+ end_element_cb(dr, sp->val, dr->buf.pos, dr->buf.line, dr->buf.col, sp->hint);
408
475
  }
409
476
  }
410
477
  }
411
478
 
412
- static void
413
- read_content(SaxDrive dr, char *content, size_t len) {
414
- char c;
415
- char *end = content + len;
479
+ static void read_content(SaxDrive dr, char *content, size_t len) {
480
+ char c;
481
+ char *end = content + len;
416
482
 
417
483
  while ('\0' != (c = buf_get(&dr->buf))) {
418
- if (end <= content) {
419
- *content = '\0';
420
- ox_sax_drive_error(dr, "processing instruction content too large");
421
- return;
422
- }
423
- if ('?' == c) {
424
- if ('\0' == (c = buf_get(&dr->buf))) {
425
- ox_sax_drive_error(dr, NO_TERM "document not terminated");
426
- }
427
- if ('>' == c) {
428
- *content = '\0';
429
- return;
430
- } else {
431
- *content++ = c;
432
- }
433
- } else {
434
- *content++ = c;
435
- }
484
+ if (end <= content) {
485
+ *content = '\0';
486
+ ox_sax_drive_error(dr, "processing instruction content too large");
487
+ return;
488
+ }
489
+ if ('?' == c) {
490
+ if ('\0' == (c = buf_get(&dr->buf))) {
491
+ ox_sax_drive_error(dr, NO_TERM "document not terminated");
492
+ }
493
+ if ('>' == c) {
494
+ *content = '\0';
495
+ return;
496
+ } else {
497
+ *content++ = c;
498
+ }
499
+ } else {
500
+ *content++ = c;
501
+ }
436
502
  }
437
503
  *content = '\0';
438
504
  }
439
505
 
440
506
  /* Entered after the "<?" sequence. Ready to read the rest.
441
507
  */
442
- static char
443
- read_instruction(SaxDrive dr) {
444
- char content[4096];
445
- char c;
446
- int coff;
447
- VALUE target = Qnil;
448
- int is_xml;
449
- off_t pos = dr->buf.pos - 1;
450
- off_t line = dr->buf.line;
451
- off_t col = dr->buf.col - 1;
508
+ static char read_instruction(SaxDrive dr) {
509
+ char content[4096];
510
+ char c;
511
+ int coff;
512
+ VALUE target = Qnil;
513
+ int is_xml;
514
+ off_t pos = dr->buf.pos - 1;
515
+ off_t line = dr->buf.line;
516
+ off_t col = dr->buf.col - 1;
452
517
 
453
518
  buf_protect(&dr->buf);
454
519
  if ('\0' == (c = read_name_token(dr))) {
455
520
  return c;
456
521
  }
457
522
  is_xml = (0 == (dr->options.smart ? strcasecmp("xml", dr->buf.str) : strcmp("xml", dr->buf.str)));
458
- if (dr->has.instruct || dr->has.end_instruct) {
459
- target = rb_str_new2(dr->buf.str);
460
- }
461
- if (dr->has.instruct) {
462
- VALUE args[1];
463
-
464
- if (dr->has.pos) {
465
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
466
- }
467
- if (dr->has.line) {
468
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
469
- }
470
- if (dr->has.column) {
471
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
472
- }
473
- args[0] = target;
474
- rb_funcall2(dr->handler, ox_instruct_id, 1, args);
475
- }
523
+
524
+ target = dr->instruct(dr, dr->buf.str, pos, line, col);
476
525
  buf_protect(&dr->buf);
477
- pos = dr->buf.pos;
526
+ pos = dr->buf.pos;
478
527
  line = dr->buf.line;
479
- col = dr->buf.col;
528
+ col = dr->buf.col;
480
529
  read_content(dr, content, sizeof(content) - 1);
481
530
  coff = (int)(dr->buf.tail - dr->buf.head);
482
531
  buf_reset(&dr->buf);
483
532
  dr->err = 0;
484
- c = read_attrs(dr, c, '?', '?', is_xml, 1, NULL);
485
- if (dr->has.attrs_done) {
486
- rb_funcall(dr->handler, ox_attrs_done_id, 0);
487
- }
533
+ c = read_attrs(dr, c, '?', '?', is_xml, 1, NULL);
534
+ dr->attrs_done(dr->handler);
488
535
  if (dr->err) {
489
- if (dr->has.text) {
490
- VALUE args[1];
536
+ if (dr->has_text) {
537
+ VALUE args[1];
491
538
 
492
- if (dr->options.convert_special) {
493
- ox_sax_collapse_special(dr, content, (int)pos, (int)line, (int)col);
494
- }
495
- args[0] = rb_str_new2(content);
496
- #if HAVE_RB_ENC_ASSOCIATE
497
- if (0 != dr->encoding) {
498
- rb_enc_associate(args[0], dr->encoding);
499
- }
500
- #endif
501
- if (dr->has.line) {
502
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
503
- }
504
- if (dr->has.pos) {
505
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
506
- }
507
- if (dr->has.column) {
508
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
509
- }
510
- rb_funcall2(dr->handler, ox_text_id, 1, args);
511
- }
512
- dr->buf.tail = dr->buf.head + coff;
513
- c = buf_get(&dr->buf);
539
+ if (dr->options.convert_special) {
540
+ ox_sax_collapse_special(dr, content, (int)pos, (int)line, (int)col);
541
+ }
542
+ args[0] = rb_str_new2(content);
543
+ if (0 != dr->encoding) {
544
+ rb_enc_associate(args[0], dr->encoding);
545
+ }
546
+ dr->set_pos(dr->handler, pos);
547
+ dr->set_line(dr->handler, line);
548
+ dr->set_col(dr->handler, col);
549
+ rb_funcall2(dr->handler, ox_text_id, 1, args);
550
+ }
551
+ dr->buf.tail = dr->buf.head + coff;
552
+ c = buf_get(&dr->buf);
514
553
  } else {
515
- pos = dr->buf.pos;
516
- line = dr->buf.line;
517
- col = dr->buf.col;
518
- c = buf_next_non_white(&dr->buf);
519
- if ('>' == c) {
520
- c = buf_get(&dr->buf);
521
- } else {
522
- ox_sax_drive_error_at(dr, NO_TERM "instruction not terminated", pos, line, col);
523
- if ('>' == c) {
524
- c = buf_get(&dr->buf);
525
- }
526
- }
527
- }
528
- if (dr->has.end_instruct) {
529
- VALUE args[1];
530
-
531
- if (dr->has.pos) {
532
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
533
- }
534
- if (dr->has.line) {
535
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
536
- }
537
- if (dr->has.column) {
538
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
539
- }
540
- args[0] = target;
541
- rb_funcall2(dr->handler, ox_end_instruct_id, 1, args);
554
+ pos = dr->buf.pos;
555
+ line = dr->buf.line;
556
+ col = dr->buf.col;
557
+ c = buf_next_non_white(&dr->buf);
558
+ if ('>' == c) {
559
+ c = buf_get(&dr->buf);
560
+ } else {
561
+ ox_sax_drive_error_at(dr, NO_TERM "instruction not terminated", pos, line, col);
562
+ if ('>' == c) {
563
+ c = buf_get(&dr->buf);
564
+ }
565
+ }
542
566
  }
543
- dr->buf.str = 0;
567
+ dr->end_instruct(dr, target, pos, line, col);
568
+ dr->buf.str = NULL;
544
569
 
545
570
  return c;
546
571
  }
547
572
 
548
- static char
549
- read_delimited(SaxDrive dr, char end) {
550
- char c;
573
+ static char read_delimited(SaxDrive dr, char end) {
574
+ char c;
551
575
 
552
576
  if ('"' == end || '\'' == end) {
553
- while (end != (c = buf_get(&dr->buf))) {
554
- if ('\0' == c) {
555
- ox_sax_drive_error(dr, NO_TERM "doctype not terminated");
556
- return c;
557
- }
558
- }
577
+ while (end != (c = buf_get(&dr->buf))) {
578
+ if ('\0' == c) {
579
+ ox_sax_drive_error(dr, NO_TERM "doctype not terminated");
580
+ return c;
581
+ }
582
+ }
559
583
  } else {
560
- while (1) {
561
- c = buf_get(&dr->buf);
562
- if (end == c) {
563
- return c;
564
- }
565
- switch (c) {
566
- case '\0':
567
- ox_sax_drive_error(dr, NO_TERM "doctype not terminated");
568
- return c;
569
- case '"':
570
- c = read_delimited(dr, c);
571
- break;
572
- case '\'':
573
- c = read_delimited(dr, c);
574
- break;
575
- case '[':
576
- c = read_delimited(dr, ']');
577
- break;
578
- case '<':
579
- c = read_delimited(dr, '>');
580
- break;
581
- default:
582
- break;
583
- }
584
- }
584
+ while (1) {
585
+ c = buf_get(&dr->buf);
586
+ if (end == c) {
587
+ return c;
588
+ }
589
+ switch (c) {
590
+ case '\0': ox_sax_drive_error(dr, NO_TERM "doctype not terminated"); return c;
591
+ case '"': c = read_delimited(dr, c); break;
592
+ case '\'': c = read_delimited(dr, c); break;
593
+ case '[': c = read_delimited(dr, ']'); break;
594
+ case '<': c = read_delimited(dr, '>'); break;
595
+ default: break;
596
+ }
597
+ }
585
598
  }
586
599
  return c;
587
600
  }
588
601
 
589
602
  /* Entered after the "<!DOCTYPE " sequence. Ready to read the rest.
590
603
  */
591
- static char
592
- read_doctype(SaxDrive dr) {
593
- long pos = (long)(dr->buf.pos - 9);
594
- long line = (long)(dr->buf.line);
595
- long col = (long)(dr->buf.col - 9);
596
- char *s;
597
- Nv parent = stack_peek(&dr->stack);
604
+ static char read_doctype(SaxDrive dr) {
605
+ long pos = (long)(dr->buf.pos - 9);
606
+ long line = (long)(dr->buf.line);
607
+ long col = (long)(dr->buf.col - 9);
608
+ char *s;
609
+ Nv parent = stack_peek(&dr->stack);
598
610
 
599
611
  buf_backup(&dr->buf); /* back up to the start in case the doctype is empty */
600
612
  buf_protect(&dr->buf);
601
613
  read_delimited(dr, '>');
602
614
  if (dr->options.smart && 0 == dr->options.hints) {
603
- for (s = dr->buf.str; is_white(*s); s++) { }
604
- if (0 == strncasecmp("HTML", s, 4)) {
605
- dr->options.hints = ox_hints_html();
606
- }
615
+ for (s = dr->buf.str; is_white(*s); s++) {
616
+ }
617
+ if (0 == strncasecmp("HTML", s, 4)) {
618
+ dr->options.hints = ox_hints_html();
619
+ }
607
620
  }
608
621
  *(dr->buf.tail - 1) = '\0';
609
622
  if (0 != parent) {
610
- parent->childCnt++;
611
- }
612
- if (dr->has.doctype) {
613
- VALUE args[1];
614
-
615
- if (dr->has.pos) {
616
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
617
- }
618
- if (dr->has.line) {
619
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
620
- }
621
- if (dr->has.column) {
622
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
623
- }
624
- args[0] = rb_str_new2(dr->buf.str);
625
- rb_funcall2(dr->handler, ox_doctype_id, 1, args);
623
+ parent->childCnt++;
626
624
  }
625
+ dr->doctype(dr, pos, line, col);
627
626
  dr->buf.str = 0;
628
627
 
629
628
  return buf_get(&dr->buf);
@@ -631,89 +630,65 @@ read_doctype(SaxDrive dr) {
631
630
 
632
631
  /* Entered after the "<![CDATA[" sequence. Ready to read the rest.
633
632
  */
634
- static char
635
- read_cdata(SaxDrive dr) {
636
- char c;
637
- char zero = '\0';
638
- int end = 0;
639
- long pos = (long)(dr->buf.pos - 9);
640
- long line = (long)(dr->buf.line);
641
- long col = (long)(dr->buf.col - 9);
642
- struct _checkPt cp = CHECK_PT_INIT;
643
- Nv parent = stack_peek(&dr->stack);
633
+ static char read_cdata(SaxDrive dr) {
634
+ char c;
635
+ char zero = '\0';
636
+ int end = 0;
637
+ long pos = (long)(dr->buf.pos - 9);
638
+ long line = (long)(dr->buf.line);
639
+ long col = (long)(dr->buf.col - 9);
640
+ struct _checkPt cp = CHECK_PT_INIT;
641
+ Nv parent = stack_peek(&dr->stack);
644
642
 
645
643
  // TBD check parent overlay
646
644
  if (0 != parent) {
647
- parent->childCnt++;
645
+ parent->childCnt++;
648
646
  }
649
647
  buf_backup(&dr->buf); /* back up to the start in case the cdata is empty */
650
648
  buf_protect(&dr->buf);
651
649
  while (1) {
652
650
  c = buf_get(&dr->buf);
653
- switch (c) {
654
- case ']':
655
- end++;
656
- break;
657
- case '>':
651
+ switch (c) {
652
+ case ']': end++; break;
653
+ case '>':
658
654
  if (2 <= end) {
659
655
  *(dr->buf.tail - 3) = '\0';
660
- c = buf_get(&dr->buf);
656
+ c = buf_get(&dr->buf);
661
657
  goto CB;
662
658
  }
663
- if (!buf_checkset(&cp)) {
664
- buf_checkpoint(&dr->buf, &cp);
665
- }
659
+ if (!buf_checkset(&cp)) {
660
+ buf_checkpoint(&dr->buf, &cp);
661
+ }
666
662
  end = 0;
667
- break;
668
- case '<':
669
- if (!buf_checkset(&cp)) {
670
- buf_checkpoint(&dr->buf, &cp);
671
- }
672
- end = 0;
673
- break;
674
- case '\0':
675
- if (buf_checkset(&cp)) {
676
- c = buf_checkback(&dr->buf, &cp);
677
- ox_sax_drive_error(dr, NO_TERM "CDATA not terminated");
678
- zero = c;
679
- *(dr->buf.tail - 1) = '\0';
680
- goto CB;
681
- }
663
+ break;
664
+ case '<':
665
+ if (!buf_checkset(&cp)) {
666
+ buf_checkpoint(&dr->buf, &cp);
667
+ }
668
+ end = 0;
669
+ break;
670
+ case '\0':
671
+ if (buf_checkset(&cp)) {
672
+ c = buf_checkback(&dr->buf, &cp);
673
+ ox_sax_drive_error(dr, NO_TERM "CDATA not terminated");
674
+ zero = c;
675
+ *(dr->buf.tail - 1) = '\0';
676
+ goto CB;
677
+ }
682
678
  ox_sax_drive_error(dr, NO_TERM "CDATA not terminated");
683
679
  return '\0';
684
- default:
685
- if (1 < end && !buf_checkset(&cp)) {
686
- buf_checkpoint(&dr->buf, &cp);
687
- }
688
- end = 0;
689
- break;
690
- }
691
- }
692
- CB:
693
- if (!dr->blocked && (NULL == parent || NULL == parent->hint || OffOverlay != parent->hint->overlay)) {
694
- if (dr->has.cdata) {
695
- VALUE args[1];
696
-
697
- args[0] = rb_str_new2(dr->buf.str);
698
- #if HAVE_RB_ENC_ASSOCIATE
699
- if (0 != dr->encoding) {
700
- rb_enc_associate(args[0], dr->encoding);
701
- }
702
- #endif
703
- if (dr->has.pos) {
704
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
705
- }
706
- if (dr->has.line) {
707
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
708
- }
709
- if (dr->has.column) {
710
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
711
- }
712
- rb_funcall2(dr->handler, ox_cdata_id, 1, args);
713
- }
680
+ default:
681
+ if (1 < end && !buf_checkset(&cp)) {
682
+ buf_checkpoint(&dr->buf, &cp);
683
+ }
684
+ end = 0;
685
+ break;
686
+ }
714
687
  }
688
+ CB:
689
+ dr->cdata(dr, pos, line, col);
715
690
  if ('\0' != zero) {
716
- *(dr->buf.tail - 1) = zero;
691
+ *(dr->buf.tail - 1) = zero;
717
692
  }
718
693
  dr->buf.str = 0;
719
694
 
@@ -722,88 +697,60 @@ read_cdata(SaxDrive dr) {
722
697
 
723
698
  /* Entered after the "<!--" sequence. Ready to read the rest.
724
699
  */
725
- static char
726
- read_comment(SaxDrive dr) {
727
- char c;
728
- char zero = '\0';
729
- int end = 0;
730
- long pos = (long)(dr->buf.pos - 4);
731
- long line = (long)(dr->buf.line);
732
- long col = (long)(dr->buf.col - 4);
733
- struct _checkPt cp = CHECK_PT_INIT;
700
+ static char read_comment(SaxDrive dr) {
701
+ char c;
702
+ char zero = '\0';
703
+ int end = 0;
704
+ long pos = (long)(dr->buf.pos - 4);
705
+ long line = (long)(dr->buf.line);
706
+ long col = (long)(dr->buf.col - 4);
707
+ struct _checkPt cp = CHECK_PT_INIT;
734
708
 
735
709
  buf_backup(&dr->buf); /* back up to the start in case the cdata is empty */
736
710
  buf_protect(&dr->buf);
737
711
  while (1) {
738
712
  c = buf_get(&dr->buf);
739
- switch (c) {
740
- case '-':
741
- end++;
742
- break;
743
- case '>':
713
+ switch (c) {
714
+ case '-': end++; break;
715
+ case '>':
744
716
  if (2 <= end) {
745
717
  *(dr->buf.tail - 3) = '\0';
746
- c = buf_get(&dr->buf);
718
+ c = buf_get(&dr->buf);
747
719
  goto CB;
748
720
  }
749
- if (!buf_checkset(&cp)) {
750
- buf_checkpoint(&dr->buf, &cp);
751
- }
721
+ if (!buf_checkset(&cp)) {
722
+ buf_checkpoint(&dr->buf, &cp);
723
+ }
752
724
  end = 0;
753
- break;
754
- case '<':
755
- if (!buf_checkset(&cp)) {
756
- buf_checkpoint(&dr->buf, &cp);
757
- }
758
- end = 0;
759
- break;
760
- case '\0':
761
- if (buf_checkset(&cp)) {
762
- c = buf_checkback(&dr->buf, &cp);
763
- ox_sax_drive_error(dr, NO_TERM "comment not terminated");
764
- zero = c;
765
- *(dr->buf.tail - 1) = '\0';
766
- goto CB;
767
- }
725
+ break;
726
+ case '<':
727
+ if (!buf_checkset(&cp)) {
728
+ buf_checkpoint(&dr->buf, &cp);
729
+ }
730
+ end = 0;
731
+ break;
732
+ case '\0':
733
+ if (buf_checkset(&cp)) {
734
+ c = buf_checkback(&dr->buf, &cp);
735
+ ox_sax_drive_error(dr, NO_TERM "comment not terminated");
736
+ zero = c;
737
+ *(dr->buf.tail - 1) = '\0';
738
+ goto CB;
739
+ }
768
740
  ox_sax_drive_error(dr, NO_TERM "comment not terminated");
769
741
  return '\0';
770
- default:
771
- if (1 < end && !buf_checkset(&cp)) {
772
- buf_checkpoint(&dr->buf, &cp);
773
- }
774
- end = 0;
775
- break;
776
- }
777
- }
778
- CB:
779
- if (dr->has.comment && !dr->blocked) {
780
- VALUE args[1];
781
- Nv parent = stack_peek(&dr->stack);
782
- Hint h = ox_hint_find(dr->options.hints, "!--");
783
-
784
- if (NULL == parent || NULL == parent->hint || OffOverlay != parent->hint->overlay ||
785
- (NULL != h && (ActiveOverlay == h->overlay || ActiveOverlay == h->overlay))) {
786
-
787
- args[0] = rb_str_new2(dr->buf.str);
788
- #if HAVE_RB_ENC_ASSOCIATE
789
- if (0 != dr->encoding) {
790
- rb_enc_associate(args[0], dr->encoding);
791
- }
792
- #endif
793
- if (dr->has.pos) {
794
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
795
- }
796
- if (dr->has.line) {
797
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
798
- }
799
- if (dr->has.column) {
800
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
801
- }
802
- rb_funcall2(dr->handler, ox_comment_id, 1, args);
803
- }
742
+ default:
743
+ if (1 < end && !buf_checkset(&cp)) {
744
+ buf_checkpoint(&dr->buf, &cp);
745
+ }
746
+ end = 0;
747
+ break;
748
+ }
804
749
  }
750
+ CB:
751
+ dr->comment(dr, pos, line, col);
805
752
  if ('\0' != zero) {
806
- *(dr->buf.tail - 1) = zero;
753
+ *(dr->buf.tail - 1) = zero;
807
754
  }
808
755
  dr->buf.str = 0;
809
756
 
@@ -813,106 +760,115 @@ read_comment(SaxDrive dr) {
813
760
  /* Entered after the '<' and the first character after that. Returns status
814
761
  * code.
815
762
  */
816
- static char
817
- read_element_start(SaxDrive dr) {
818
- const char *ename = 0;
819
- volatile VALUE name = Qnil;
820
- char c;
821
- int closed;
822
- long pos = (long)(dr->buf.pos);
823
- long line = (long)(dr->buf.line);
824
- long col = (long)(dr->buf.col);
825
- Hint h = NULL;
826
- int stackless = 0;
827
- Nv parent = stack_peek(&dr->stack);
763
+ static char read_element_start(SaxDrive dr) {
764
+ const char *ename = 0;
765
+ volatile VALUE name = Qnil;
766
+ char c;
767
+ int closed;
768
+ long pos = (long)(dr->buf.pos);
769
+ long line = (long)(dr->buf.line);
770
+ long col = (long)(dr->buf.col);
771
+ Hint h = NULL;
772
+ int stackless = 0;
773
+ Nv parent = stack_peek(&dr->stack);
828
774
 
829
775
  if ('\0' == (c = read_name_token(dr))) {
830
776
  return '\0';
831
777
  }
832
778
  if ('\0' == *dr->buf.str) {
833
- char msg[256];
779
+ char msg[256];
834
780
 
835
- snprintf(msg, sizeof(msg) - 1, "%sempty element", INVALID_FORMAT);
836
- ox_sax_drive_error_at(dr, msg, pos, line, col);
781
+ snprintf(msg, sizeof(msg) - 1, "%sempty element", INVALID_FORMAT);
782
+ ox_sax_drive_error_at(dr, msg, pos, line, col);
837
783
 
838
- return buf_get(&dr->buf);
784
+ return buf_get(&dr->buf);
839
785
  }
840
786
  if (0 != parent) {
841
- parent->childCnt++;
787
+ parent->childCnt++;
842
788
  }
843
- if (dr->options.smart && 0 == dr->options.hints && stack_empty(&dr->stack) && 0 == strcasecmp("html", dr->buf.str)) {
844
- dr->options.hints = ox_hints_html();
789
+ if (dr->options.smart && 0 == dr->options.hints && stack_empty(&dr->stack) &&
790
+ 0 == strcasecmp("html", dr->buf.str)) {
791
+ dr->options.hints = ox_hints_html();
845
792
  }
846
793
  if (NULL != dr->options.hints) {
847
- hint_clear_empty(dr);
848
- h = ox_hint_find(dr->options.hints, dr->buf.str);
849
- if (NULL == h) {
850
- char msg[256];
851
-
852
- snprintf(msg, sizeof(msg), "%s%s is not a valid element type for a %s document type.", INV_ELEMENT, dr->buf.str, dr->options.hints->name);
853
- ox_sax_drive_error(dr, msg);
854
- } else {
855
- Nv top_nv = stack_peek(&dr->stack);
856
-
857
- if (AbortOverlay == h->overlay) {
858
- if (rb_respond_to(dr->handler, ox_abort_id)) {
859
- VALUE args[1];
860
-
861
- args[0] = str2sym(dr, dr->buf.str, NULL);
862
- rb_funcall2(dr->handler, ox_abort_id, 1, args);
863
- }
864
- dr->abort = true;
865
- return '\0';
866
- }
867
- if (BlockOverlay == h->overlay) {
868
- dr->blocked++;
869
- }
870
- if (h->empty) {
871
- stackless = 1;
872
- }
873
- if (0 != top_nv) {
874
- char msg[256];
875
-
876
- if (!h->nest && NestOverlay != h->overlay && 0 == strcasecmp(top_nv->name, h->name)) {
877
- snprintf(msg, sizeof(msg) - 1, "%s%s can not be nested in a %s document, closing previous.",
878
- INV_ELEMENT, dr->buf.str, dr->options.hints->name);
879
- ox_sax_drive_error(dr, msg);
880
- stack_pop(&dr->stack);
881
- end_element_cb(dr, top_nv->val, pos, line, col, top_nv->hint);
882
- top_nv = stack_peek(&dr->stack);
883
- }
884
- if (NULL != top_nv && 0 != h->parents && NestOverlay != h->overlay) {
885
- const char **p;
886
- int ok = 0;
887
-
888
- for (p = h->parents; 0 != *p; p++) {
889
- if (0 == strcasecmp(*p, top_nv->name)) {
890
- ok = 1;
891
- break;
892
- }
893
- }
894
- if (!ok) {
895
- snprintf(msg, sizeof(msg) - 1, "%s%s can not be a child of a %s in a %s document.",
896
- INV_ELEMENT, h->name, top_nv->name, dr->options.hints->name);
897
- ox_sax_drive_error(dr, msg);
898
- }
899
- }
900
- }
901
- }
902
- }
903
- name = str2sym(dr, dr->buf.str, &ename);
904
- if (dr->has.start_element && 0 >= dr->blocked && (NULL == h || ActiveOverlay == h->overlay || NestOverlay == h->overlay)) {
905
- VALUE args[1];
906
-
907
- if (dr->has.pos) {
908
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
909
- }
910
- if (dr->has.line) {
911
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
912
- }
913
- if (dr->has.column) {
914
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
915
- }
794
+ hint_clear_empty(dr);
795
+ h = ox_hint_find(dr->options.hints, dr->buf.str);
796
+ if (NULL == h) {
797
+ char msg[256];
798
+
799
+ snprintf(msg,
800
+ sizeof(msg),
801
+ "%s%s is not a valid element type for a %s document type.",
802
+ INV_ELEMENT,
803
+ dr->buf.str,
804
+ dr->options.hints->name);
805
+ ox_sax_drive_error(dr, msg);
806
+ } else {
807
+ Nv top_nv = stack_peek(&dr->stack);
808
+
809
+ if (AbortOverlay == h->overlay) {
810
+ if (rb_respond_to(dr->handler, ox_abort_id)) {
811
+ VALUE args[1];
812
+
813
+ args[0] = str2sym(dr, dr->buf.str, dr->buf.tail - dr->buf.str - 1, NULL);
814
+ rb_funcall2(dr->handler, ox_abort_id, 1, args);
815
+ }
816
+ dr->abort = true;
817
+ return '\0';
818
+ }
819
+ if (BlockOverlay == h->overlay) {
820
+ dr->blocked++;
821
+ }
822
+ if (h->empty) {
823
+ stackless = 1;
824
+ }
825
+ if (0 != top_nv) {
826
+ char msg[256];
827
+
828
+ if (!h->nest && NestOverlay != h->overlay && 0 == strcasecmp(top_nv->name, h->name)) {
829
+ snprintf(msg,
830
+ sizeof(msg) - 1,
831
+ "%s%s can not be nested in a %s document, closing previous.",
832
+ INV_ELEMENT,
833
+ dr->buf.str,
834
+ dr->options.hints->name);
835
+ ox_sax_drive_error(dr, msg);
836
+ stack_pop(&dr->stack);
837
+ end_element_cb(dr, top_nv->val, pos, line, col, top_nv->hint);
838
+ top_nv = stack_peek(&dr->stack);
839
+ }
840
+ if (NULL != top_nv && 0 != h->parents && NestOverlay != h->overlay) {
841
+ const char **p;
842
+ int ok = 0;
843
+
844
+ for (p = h->parents; 0 != *p; p++) {
845
+ if (0 == strcasecmp(*p, top_nv->name)) {
846
+ ok = 1;
847
+ break;
848
+ }
849
+ }
850
+ if (!ok) {
851
+ snprintf(msg,
852
+ sizeof(msg) - 1,
853
+ "%s%s can not be a child of a %s in a %s document.",
854
+ INV_ELEMENT,
855
+ h->name,
856
+ top_nv->name,
857
+ dr->options.hints->name);
858
+ ox_sax_drive_error(dr, msg);
859
+ }
860
+ }
861
+ }
862
+ }
863
+ }
864
+ name = str2sym(dr, dr->buf.str, dr->buf.tail - dr->buf.str - 1, &ename);
865
+ if (dr->has_start_element && 0 >= dr->blocked &&
866
+ (NULL == h || ActiveOverlay == h->overlay || NestOverlay == h->overlay)) {
867
+ VALUE args[1];
868
+
869
+ dr->set_pos(dr->handler, pos);
870
+ dr->set_line(dr->handler, line);
871
+ dr->set_col(dr->handler, col);
916
872
  args[0] = name;
917
873
  rb_funcall2(dr->handler, ox_start_element_id, 1, args);
918
874
  }
@@ -921,362 +877,302 @@ read_element_start(SaxDrive dr) {
921
877
  } else if ('>' == c) {
922
878
  closed = 0;
923
879
  } else {
924
- buf_protect(&dr->buf);
880
+ buf_protect(&dr->buf);
925
881
  c = read_attrs(dr, c, '/', '>', 0, 0, h);
926
- if (is_white(c)) {
927
- c = buf_next_non_white(&dr->buf);
928
- }
929
- closed = ('/' == c);
882
+ if (is_white(c)) {
883
+ c = buf_next_non_white(&dr->buf);
884
+ }
885
+ closed = ('/' == c);
930
886
  }
931
- if (dr->has.attrs_done && 0 >= dr->blocked && (NULL == h || ActiveOverlay == h->overlay || NestOverlay == h->overlay)) {
932
- rb_funcall(dr->handler, ox_attrs_done_id, 0);
887
+ if (0 >= dr->blocked && (NULL == h || ActiveOverlay == h->overlay || NestOverlay == h->overlay)) {
888
+ dr->attrs_done(dr->handler);
933
889
  }
934
890
  if (closed) {
935
- c = buf_next_non_white(&dr->buf);
936
- pos = dr->buf.pos;
937
- line = dr->buf.line;
938
- col = dr->buf.col;
939
- end_element_cb(dr, name, pos, line, col, h);
891
+ c = buf_next_non_white(&dr->buf);
892
+
893
+ end_element_cb(dr, name, dr->buf.pos, dr->buf.line, dr->buf.col, h);
940
894
  } else if (stackless) {
941
- end_element_cb(dr, name, pos, line, col, h);
895
+ end_element_cb(dr, name, pos, line, col, h);
942
896
  } else if (NULL != h && h->jump) {
943
- stack_push(&dr->stack, ename, name, h);
944
- if ('>' != c) {
945
- ox_sax_drive_error(dr, WRONG_CHAR "element not closed");
946
- return c;
947
- }
948
- read_jump(dr, h->name);
949
- return '<';
897
+ stack_push(&dr->stack, ename, name, h);
898
+ if ('>' != c) {
899
+ ox_sax_drive_error(dr, WRONG_CHAR "element not closed");
900
+ return c;
901
+ }
902
+ read_jump(dr, h->name);
903
+ return '<';
950
904
  } else {
951
- stack_push(&dr->stack, ename, name, h);
905
+ stack_push(&dr->stack, ename, name, h);
952
906
  }
953
907
  if ('>' != c) {
954
- ox_sax_drive_error(dr, WRONG_CHAR "element not closed");
955
- return c;
908
+ ox_sax_drive_error(dr, WRONG_CHAR "element not closed");
909
+ return c;
956
910
  }
957
911
  dr->buf.str = 0;
958
912
 
959
913
  return buf_get(&dr->buf);
960
914
  }
961
915
 
962
- static Nv
963
- stack_rev_find(SaxDrive dr, const char *name) {
964
- Nv nv;
916
+ static Nv stack_rev_find(SaxDrive dr, const char *name) {
917
+ Nv nv;
965
918
 
966
919
  for (nv = dr->stack.tail - 1; dr->stack.head <= nv; nv--) {
967
- if (0 == (dr->options.smart ? strcasecmp(name, nv->name) : strcmp(name, nv->name))) {
968
- return nv;
969
- }
920
+ if (0 == (dr->options.smart ? strcasecmp(name, nv->name) : strcmp(name, nv->name))) {
921
+ return nv;
922
+ }
970
923
  }
971
924
  return 0;
972
925
  }
973
926
 
974
- static char
975
- read_element_end(SaxDrive dr) {
976
- VALUE name = Qnil;
977
- char c;
978
- long pos = (long)(dr->buf.pos - 1);
979
- long line = (long)(dr->buf.line);
980
- long col = (long)(dr->buf.col - 1);
981
- Nv nv;
982
- Hint h = NULL;
927
+ static char read_element_end(SaxDrive dr) {
928
+ VALUE name = Qnil;
929
+ char c;
930
+ long pos = (long)(dr->buf.pos - 1);
931
+ long line = (long)(dr->buf.line);
932
+ long col = (long)(dr->buf.col - 1);
933
+ Nv nv;
934
+ Hint h = NULL;
983
935
 
984
936
  if ('\0' == (c = read_name_token(dr))) {
985
937
  return '\0';
986
938
  }
987
939
  if (is_white(c)) {
988
- c = buf_next_non_white(&dr->buf);
940
+ c = buf_next_non_white(&dr->buf);
989
941
  }
990
942
  // c should be > and current is one past so read another char
991
- c = buf_get(&dr->buf);
943
+ c = buf_get(&dr->buf);
992
944
  nv = stack_peek(&dr->stack);
993
- if (0 != nv &&
994
- 0 == (dr->options.smart ? strcasecmp(dr->buf.str, nv->name) : strcmp(dr->buf.str, nv->name))) {
995
- name = nv->val;
996
- h = nv->hint;
997
- stack_pop(&dr->stack);
945
+ if (0 != nv && 0 == (dr->options.smart ? strcasecmp(dr->buf.str, nv->name) : strcmp(dr->buf.str, nv->name))) {
946
+ name = nv->val;
947
+ h = nv->hint;
948
+ stack_pop(&dr->stack);
998
949
  } else {
999
- // Mismatched start and end
1000
- char msg[256];
1001
- Nv match = stack_rev_find(dr, dr->buf.str);
1002
-
1003
- if (0 == match) {
1004
- // Not found so open and close element.
1005
- h = ox_hint_find(dr->options.hints, dr->buf.str);
1006
- if (NULL != h && h->empty) {
1007
- // Just close normally
1008
- name = str2sym(dr, dr->buf.str, 0);
1009
- snprintf(msg, sizeof(msg) - 1, "%selement '%s' should not have a separate close element", EL_MISMATCH, dr->buf.str);
1010
- ox_sax_drive_error_at(dr, msg, pos, line, col);
1011
- return c;
1012
- } else {
1013
- snprintf(msg, sizeof(msg) - 1, "%selement '%s' closed but not opened", EL_MISMATCH, dr->buf.str);
1014
- ox_sax_drive_error_at(dr, msg, pos, line, col);
1015
- name = str2sym(dr, dr->buf.str, 0);
1016
- if (dr->has.start_element && 0 >= dr->blocked && (NULL == h || ActiveOverlay == h->overlay || NestOverlay == h->overlay)) {
1017
- VALUE args[1];
1018
-
1019
- if (dr->has.pos) {
1020
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
1021
- }
1022
- if (dr->has.line) {
1023
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
1024
- }
1025
- if (dr->has.column) {
1026
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
1027
- }
1028
- args[0] = name;
1029
- rb_funcall2(dr->handler, ox_start_element_id, 1, args);
1030
- }
1031
- if (NULL != h && BlockOverlay == h->overlay && 0 < dr->blocked) {
1032
- dr->blocked--;
1033
- }
1034
- }
1035
- } else {
1036
- // Found a match so close all up to the found element in stack.
1037
- Nv n2;
1038
-
1039
- if (0 != (n2 = hint_try_close(dr, dr->buf.str))) {
1040
- name = n2->val;
1041
- h = n2->hint;
1042
- } else {
1043
- snprintf(msg, sizeof(msg) - 1, "%selement '%s' close does not match '%s' open", EL_MISMATCH, dr->buf.str, nv->name);
1044
- ox_sax_drive_error_at(dr, msg, pos, line, col);
1045
- if (dr->has.pos) {
1046
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
1047
- }
1048
- if (dr->has.line) {
1049
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
1050
- }
1051
- if (dr->has.column) {
1052
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
1053
- }
1054
- for (nv = stack_pop(&dr->stack); match < nv; nv = stack_pop(&dr->stack)) {
1055
- if (dr->has.end_element && 0 >= dr->blocked && (NULL == nv->hint || ActiveOverlay == nv->hint->overlay || NestOverlay == nv->hint->overlay)) {
1056
- rb_funcall(dr->handler, ox_end_element_id, 1, nv->val);
1057
- }
1058
- if (NULL != nv->hint && BlockOverlay == nv->hint->overlay && 0 < dr->blocked) {
1059
- dr->blocked--;
1060
- }
1061
- }
1062
- name = nv->val;
1063
- h = nv->hint;
1064
- }
1065
- }
950
+ // Mismatched start and end
951
+ char msg[256];
952
+ Nv match = stack_rev_find(dr, dr->buf.str);
953
+
954
+ if (0 == match) {
955
+ // Not found so open and close element.
956
+ h = ox_hint_find(dr->options.hints, dr->buf.str);
957
+ if (NULL != h && h->empty) {
958
+ // Just close normally
959
+ name = str2sym(dr, dr->buf.str, dr->buf.tail - dr->buf.str - 2, 0);
960
+ snprintf(msg,
961
+ sizeof(msg) - 1,
962
+ "%selement '%s' should not have a separate close element",
963
+ EL_MISMATCH,
964
+ dr->buf.str);
965
+ ox_sax_drive_error_at(dr, msg, pos, line, col);
966
+ return c;
967
+ } else {
968
+ snprintf(msg, sizeof(msg) - 1, "%selement '%s' closed but not opened", EL_MISMATCH, dr->buf.str);
969
+ ox_sax_drive_error_at(dr, msg, pos, line, col);
970
+ name = str2sym(dr, dr->buf.str, dr->buf.tail - dr->buf.str - 2, 0);
971
+ if (dr->has_start_element && 0 >= dr->blocked &&
972
+ (NULL == h || ActiveOverlay == h->overlay || NestOverlay == h->overlay)) {
973
+ VALUE args[1];
974
+
975
+ dr->set_pos(dr->handler, pos);
976
+ dr->set_line(dr->handler, line);
977
+ dr->set_col(dr->handler, col);
978
+ args[0] = name;
979
+ rb_funcall2(dr->handler, ox_start_element_id, 1, args);
980
+ }
981
+ if (NULL != h && BlockOverlay == h->overlay && 0 < dr->blocked) {
982
+ dr->blocked--;
983
+ }
984
+ }
985
+ } else {
986
+ // Found a match so close all up to the found element in stack.
987
+ Nv n2;
988
+
989
+ if (0 != (n2 = hint_try_close(dr, dr->buf.str))) {
990
+ name = n2->val;
991
+ h = n2->hint;
992
+ } else {
993
+ snprintf(msg,
994
+ sizeof(msg) - 1,
995
+ "%selement '%s' close does not match '%s' open",
996
+ EL_MISMATCH,
997
+ dr->buf.str,
998
+ nv->name);
999
+ ox_sax_drive_error_at(dr, msg, pos, line, col);
1000
+ for (nv = stack_pop(&dr->stack); match < nv; nv = stack_pop(&dr->stack)) {
1001
+ end_element_cb(dr, nv->val, pos, line, col, nv->hint);
1002
+ }
1003
+ name = nv->val;
1004
+ h = nv->hint;
1005
+ }
1006
+ }
1066
1007
  }
1067
1008
  end_element_cb(dr, name, pos, line, col, h);
1068
1009
 
1069
1010
  return c;
1070
1011
  }
1071
1012
 
1072
- static char
1073
- read_text(SaxDrive dr) {
1074
- VALUE args[1];
1075
- char c;
1076
- long pos = (long)(dr->buf.pos);
1077
- long line = (long)(dr->buf.line);
1078
- long col = (long)(dr->buf.col - 1);
1079
- Nv parent = stack_peek(&dr->stack);
1080
- int allWhite = 1;
1013
+ static char read_text(SaxDrive dr) {
1014
+ VALUE args[1];
1015
+ char c;
1016
+ long pos = (long)(dr->buf.pos);
1017
+ long line = (long)(dr->buf.line);
1018
+ long col = (long)(dr->buf.col - 1);
1019
+ Nv parent = stack_peek(&dr->stack);
1020
+ int allWhite = 1;
1081
1021
 
1082
1022
  buf_backup(&dr->buf);
1083
1023
  buf_protect(&dr->buf);
1084
1024
  while ('<' != (c = buf_get(&dr->buf))) {
1085
- switch(c) {
1086
- case ' ':
1087
- case '\t':
1088
- case '\f':
1089
- case '\n':
1090
- case '\r':
1091
- break;
1092
- case '\0':
1093
- if (allWhite) {
1094
- return c;
1095
- }
1025
+ switch (c) {
1026
+ case ' ':
1027
+ case '\t':
1028
+ case '\f':
1029
+ case '\n':
1030
+ case '\r': break;
1031
+ case '\0':
1032
+ if (allWhite) {
1033
+ return c;
1034
+ }
1096
1035
  ox_sax_drive_error(dr, NO_TERM "text not terminated");
1097
- goto END_OF_BUF;
1098
- break;
1099
- default:
1100
- allWhite = 0;
1101
- break;
1102
- }
1036
+ goto END_OF_BUF;
1037
+ break;
1038
+ default: allWhite = 0; break;
1039
+ }
1103
1040
  }
1104
- END_OF_BUF:
1041
+ END_OF_BUF:
1105
1042
  if ('\0' != c) {
1106
- *(dr->buf.tail - 1) = '\0';
1043
+ *(dr->buf.tail - 1) = '\0';
1107
1044
  }
1108
1045
  if (allWhite) {
1109
- int isEnd = ('/' == buf_get(&dr->buf));
1110
-
1111
- buf_backup(&dr->buf);
1112
- if (dr->has.text &&
1113
- ((NoSkip == dr->options.skip && !isEnd) ||
1114
- (OffSkip == dr->options.skip))) {
1115
- args[0] = rb_str_new2(dr->buf.str);
1116
- #if HAVE_RB_ENC_ASSOCIATE
1117
- if (0 != dr->encoding) {
1118
- rb_enc_associate(args[0], dr->encoding);
1119
- }
1120
- #endif
1121
- if (dr->has.pos) {
1122
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
1123
- }
1124
- if (dr->has.line) {
1125
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
1126
- }
1127
- if (dr->has.column) {
1128
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
1129
- }
1130
- rb_funcall2(dr->handler, ox_text_id, 1, args);
1131
- }
1132
- if (!isEnd || 0 == parent || 0 < parent->childCnt) {
1133
- return c;
1134
- }
1046
+ int isEnd = ('/' == buf_get(&dr->buf));
1047
+
1048
+ buf_backup(&dr->buf);
1049
+ if (dr->has_text && ((NoSkip == dr->options.skip && !isEnd) || (OffSkip == dr->options.skip))) {
1050
+ args[0] = rb_str_new2(dr->buf.str);
1051
+ if (0 != dr->encoding) {
1052
+ rb_enc_associate(args[0], dr->encoding);
1053
+ }
1054
+ dr->set_pos(dr->handler, pos);
1055
+ dr->set_line(dr->handler, line);
1056
+ dr->set_col(dr->handler, col);
1057
+ rb_funcall2(dr->handler, ox_text_id, 1, args);
1058
+ }
1059
+ if (!isEnd || 0 == parent || 0 < parent->childCnt) {
1060
+ return c;
1061
+ }
1135
1062
  }
1136
1063
  if (0 != parent) {
1137
- parent->childCnt++;
1064
+ parent->childCnt++;
1138
1065
  }
1139
1066
  if (!dr->blocked && (NULL == parent || NULL == parent->hint || OffOverlay != parent->hint->overlay)) {
1140
- if (dr->has.value) {
1141
- if (dr->has.pos) {
1142
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
1143
- }
1144
- if (dr->has.line) {
1145
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
1146
- }
1147
- if (dr->has.column) {
1148
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
1149
- }
1150
- *args = dr->value_obj;
1151
- rb_funcall2(dr->handler, ox_value_id, 1, args);
1152
- } else if (dr->has.text) {
1153
- if (dr->options.convert_special) {
1154
- ox_sax_collapse_special(dr, dr->buf.str, pos, line, col);
1155
- }
1156
- switch (dr->options.skip) {
1157
- case CrSkip:
1158
- buf_collapse_return(dr->buf.str);
1159
- break;
1160
- case SpcSkip:
1161
- buf_collapse_white(dr->buf.str);
1162
- break;
1163
- default:
1164
- break;
1165
- }
1166
- args[0] = rb_str_new2(dr->buf.str);
1167
- #if HAVE_RB_ENC_ASSOCIATE
1168
- if (0 != dr->encoding) {
1169
- rb_enc_associate(args[0], dr->encoding);
1170
- }
1171
- #endif
1172
- if (dr->has.pos) {
1173
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
1174
- }
1175
- if (dr->has.line) {
1176
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
1177
- }
1178
- if (dr->has.column) {
1179
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
1180
- }
1181
- rb_funcall2(dr->handler, ox_text_id, 1, args);
1182
- }
1067
+ if (dr->has_value) {
1068
+ dr->set_pos(dr->handler, pos);
1069
+ dr->set_line(dr->handler, line);
1070
+ dr->set_col(dr->handler, col);
1071
+ *args = dr->value_obj;
1072
+ rb_funcall2(dr->handler, ox_value_id, 1, args);
1073
+ } else if (dr->has_text) {
1074
+ if (dr->options.convert_special) {
1075
+ ox_sax_collapse_special(dr, dr->buf.str, pos, line, col);
1076
+ }
1077
+ switch (dr->options.skip) {
1078
+ case CrSkip: buf_collapse_return(dr->buf.str); break;
1079
+ case SpcSkip: buf_collapse_white(dr->buf.str); break;
1080
+ default: break;
1081
+ }
1082
+ args[0] = rb_str_new2(dr->buf.str);
1083
+ if (0 != dr->encoding) {
1084
+ rb_enc_associate(args[0], dr->encoding);
1085
+ }
1086
+ dr->set_pos(dr->handler, pos);
1087
+ dr->set_line(dr->handler, line);
1088
+ dr->set_col(dr->handler, col);
1089
+ rb_funcall2(dr->handler, ox_text_id, 1, args);
1090
+ }
1183
1091
  }
1184
1092
  dr->buf.str = 0;
1185
1093
 
1186
1094
  return c;
1187
1095
  }
1188
1096
 
1189
- static int
1190
- read_jump_term(Buf buf, const char *pat) {
1191
- struct _checkPt cp;
1097
+ static int read_jump_term(Buf buf, const char *pat) {
1098
+ struct _checkPt cp;
1192
1099
 
1193
- buf_checkpoint(buf, &cp); // right after <
1100
+ buf_checkpoint(buf, &cp); // right after <
1194
1101
  if ('/' != buf_next_non_white(buf)) {
1195
- return 0;
1102
+ return 0;
1196
1103
  }
1197
1104
  if (*pat != tolower(buf_next_non_white(buf))) {
1198
- return 0;
1105
+ return 0;
1199
1106
  }
1200
1107
  for (pat++; '\0' != *pat; pat++) {
1201
- if (*pat != tolower(buf_get(buf))) {
1202
- return 0;
1203
- }
1108
+ if (*pat != tolower(buf_get(buf))) {
1109
+ return 0;
1110
+ }
1204
1111
  }
1205
1112
  if ('>' != buf_next_non_white(buf)) {
1206
- return 0;
1113
+ return 0;
1207
1114
  }
1208
1115
  buf_checkback(buf, &cp);
1209
1116
  return 1;
1210
1117
  }
1211
1118
 
1212
- static char
1213
- read_jump(SaxDrive dr, const char *pat) {
1214
- VALUE args[1];
1215
- char c;
1216
- long pos = (long)(dr->buf.pos);
1217
- long line = (long)(dr->buf.line);
1218
- long col = (long)(dr->buf.col - 1);
1219
- Nv parent = stack_peek(&dr->stack);
1119
+ static char read_jump(SaxDrive dr, const char *pat) {
1120
+ VALUE args[1];
1121
+ char c;
1122
+ long pos = (long)(dr->buf.pos);
1123
+ long line = (long)(dr->buf.line);
1124
+ long col = (long)(dr->buf.col - 1);
1125
+ Nv parent = stack_peek(&dr->stack);
1220
1126
 
1221
1127
  buf_protect(&dr->buf);
1222
1128
  while (1) {
1223
- c = buf_get(&dr->buf);
1224
- switch(c) {
1225
- case '<':
1226
- if (read_jump_term(&dr->buf, pat)) {
1227
- goto END_OF_BUF;
1228
- break;
1229
- }
1230
- break;
1231
- case '\0':
1129
+ c = buf_get(&dr->buf);
1130
+ switch (c) {
1131
+ case '<':
1132
+ if (read_jump_term(&dr->buf, pat)) {
1133
+ goto END_OF_BUF;
1134
+ break;
1135
+ }
1136
+ break;
1137
+ case '\0':
1232
1138
  ox_sax_drive_error(dr, NO_TERM "not terminated");
1233
- goto END_OF_BUF;
1234
- break;
1235
- default:
1236
- break;
1237
- }
1139
+ goto END_OF_BUF;
1140
+ break;
1141
+ default: break;
1142
+ }
1238
1143
  }
1239
- END_OF_BUF:
1144
+ END_OF_BUF:
1240
1145
  if ('\0' != c) {
1241
- *(dr->buf.tail - 1) = '\0';
1146
+ *(dr->buf.tail - 1) = '\0';
1242
1147
  }
1243
1148
  if (0 != parent) {
1244
- parent->childCnt++;
1149
+ parent->childCnt++;
1245
1150
  }
1246
1151
  // TBD check parent overlay
1247
- if (dr->has.text && !dr->blocked) {
1152
+ if (dr->has_text && !dr->blocked) {
1248
1153
  args[0] = rb_str_new2(dr->buf.str);
1249
- #if HAVE_RB_ENC_ASSOCIATE
1250
1154
  if (0 != dr->encoding) {
1251
1155
  rb_enc_associate(args[0], dr->encoding);
1252
1156
  }
1253
- #endif
1254
- if (dr->has.pos) {
1255
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
1256
- }
1257
- if (dr->has.line) {
1258
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
1259
- }
1260
- if (dr->has.column) {
1261
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
1262
- }
1157
+ dr->set_pos(dr->handler, pos);
1158
+ dr->set_line(dr->handler, line);
1159
+ dr->set_col(dr->handler, col);
1263
1160
  rb_funcall2(dr->handler, ox_text_id, 1, args);
1264
1161
  }
1265
1162
  dr->buf.str = 0;
1266
1163
  if ('\0' != c) {
1267
- *(dr->buf.tail - 1) = '<';
1164
+ *(dr->buf.tail - 1) = '<';
1268
1165
  }
1269
1166
  return c;
1270
1167
  }
1271
1168
 
1272
- static char
1273
- read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req, Hint h) {
1274
- VALUE name = Qnil;
1275
- int is_encoding = 0;
1276
- off_t pos;
1277
- off_t line;
1278
- off_t col;
1279
- char *attr_value;
1169
+ static char read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req, Hint h) {
1170
+ VALUE name = Qnil;
1171
+ int is_encoding = 0;
1172
+ off_t pos;
1173
+ off_t line;
1174
+ off_t col;
1175
+ char *attr_value;
1280
1176
 
1281
1177
  // already protected by caller
1282
1178
  dr->buf.str = dr->buf.tail;
@@ -1284,94 +1180,52 @@ read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req,
1284
1180
  c = buf_next_non_white(&dr->buf);
1285
1181
  }
1286
1182
  while (termc != c && term2 != c) {
1287
- buf_backup(&dr->buf);
1183
+ buf_backup(&dr->buf);
1288
1184
  if ('\0' == c) {
1289
- ox_sax_drive_error(dr, NO_TERM "attributes not terminated");
1290
- return '\0';
1185
+ ox_sax_drive_error(dr, NO_TERM "attributes not terminated");
1186
+ return '\0';
1291
1187
  }
1292
- pos = dr->buf.pos + 1;
1293
- line = dr->buf.line;
1294
- col = dr->buf.col + 1;
1188
+ pos = dr->buf.pos + 1;
1189
+ line = dr->buf.line;
1190
+ col = dr->buf.col + 1;
1295
1191
  if ('\0' == (c = read_name_token(dr))) {
1296
- ox_sax_drive_error(dr, NO_TERM "error reading token");
1297
- return '\0';
1192
+ ox_sax_drive_error(dr, NO_TERM "error reading token");
1193
+ return '\0';
1298
1194
  }
1299
1195
  if (is_xml && 0 == strcasecmp("encoding", dr->buf.str)) {
1300
1196
  is_encoding = 1;
1301
1197
  }
1302
- if (dr->has.attr || dr->has.attr_value) {
1303
- name = str2sym(dr, dr->buf.str, 0);
1198
+ if (dr->want_attr_name) {
1199
+ name = str2sym(dr, dr->buf.str, dr->buf.tail - dr->buf.str - 1, 0);
1304
1200
  }
1305
1201
  if (is_white(c)) {
1306
1202
  c = buf_next_non_white(&dr->buf);
1307
1203
  }
1308
1204
  if ('=' != c) {
1309
- if (eq_req) {
1310
- dr->err = 1;
1311
- return c;
1312
- } else {
1313
- ox_sax_drive_error(dr, WRONG_CHAR "no attribute value");
1314
- attr_value = (char*)"";
1315
- }
1205
+ if (eq_req) {
1206
+ dr->err = 1;
1207
+ return c;
1208
+ } else {
1209
+ ox_sax_drive_error(dr, WRONG_CHAR "no attribute value");
1210
+ attr_value = (char *)"";
1211
+ }
1316
1212
  } else {
1317
- pos = dr->buf.pos + 1;
1318
- line = dr->buf.line;
1319
- col = dr->buf.col + 1;
1320
- c = read_quoted_value(dr);
1321
- attr_value = dr->buf.str;
1322
- if (is_encoding) {
1323
- #if HAVE_RB_ENC_FIND
1324
- dr->encoding = rb_enc_find(dr->buf.str);
1325
- #else
1326
- dr->encoding = dr->buf.str;
1327
- #endif
1328
- is_encoding = 0;
1329
- }
1330
- }
1331
- if (0 >= dr->blocked && (NULL == h || ActiveOverlay == h->overlay || NestOverlay == h->overlay)) {
1332
- if (dr->has.attr_value) {
1333
- VALUE args[2];
1334
-
1335
- if (dr->has.pos) {
1336
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
1337
- }
1338
- if (dr->has.line) {
1339
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
1340
- }
1341
- if (dr->has.column) {
1342
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
1343
- }
1344
- args[0] = name;
1345
- args[1] = dr->value_obj;
1346
- rb_funcall2(dr->handler, ox_attr_value_id, 2, args);
1347
- } else if (dr->has.attr) {
1348
- VALUE args[2];
1349
-
1350
- args[0] = name;
1351
- if (dr->options.convert_special) {
1352
- ox_sax_collapse_special(dr, dr->buf.str, pos, line, col);
1353
- }
1354
- args[1] = rb_str_new2(attr_value);
1355
- #if HAVE_RB_ENC_ASSOCIATE
1356
- if (0 != dr->encoding) {
1357
- rb_enc_associate(args[1], dr->encoding);
1358
- }
1359
- #endif
1360
- if (dr->has.pos) {
1361
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
1362
- }
1363
- if (dr->has.line) {
1364
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
1365
- }
1366
- if (dr->has.column) {
1367
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
1368
- }
1369
- rb_funcall2(dr->handler, ox_attr_id, 2, args);
1370
- }
1371
- }
1372
- if (is_white(c)) {
1373
- c = buf_next_non_white(&dr->buf);
1374
- }
1213
+ pos = dr->buf.pos + 1;
1214
+ line = dr->buf.line;
1215
+ col = dr->buf.col + 1;
1216
+ c = read_quoted_value(dr);
1217
+ attr_value = dr->buf.str;
1218
+ if (is_encoding) {
1219
+ dr->encoding = rb_enc_find(dr->buf.str);
1220
+ is_encoding = 0;
1221
+ }
1222
+ }
1223
+ if (0 >= dr->blocked && (NULL == h || ActiveOverlay == h->overlay || NestOverlay == h->overlay)) {
1224
+ dr->attr_cb(dr, name, attr_value, pos, line, col);
1225
+ }
1226
+ if (is_white(c)) {
1227
+ c = buf_next_non_white(&dr->buf);
1228
+ }
1375
1229
  }
1376
1230
  dr->buf.str = 0;
1377
1231
 
@@ -1381,66 +1235,62 @@ read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req,
1381
1235
  /* The character after the word is returned. dr->buf.tail is one past
1382
1236
  * that. dr->buf.str will point to the token which will be '\0' terminated.
1383
1237
  */
1384
- static char
1385
- read_name_token(SaxDrive dr) {
1386
- char c;
1238
+ static char read_name_token(SaxDrive dr) {
1239
+ char c;
1387
1240
 
1388
1241
  dr->buf.str = dr->buf.tail;
1389
- c = buf_get(&dr->buf);
1242
+ c = buf_get(&dr->buf);
1390
1243
  if (is_white(c)) {
1391
- c = buf_next_non_white(&dr->buf);
1244
+ c = buf_next_non_white(&dr->buf);
1392
1245
  dr->buf.str = dr->buf.tail - 1;
1393
1246
  }
1394
1247
  while (1) {
1395
- switch (c) {
1396
- case ' ':
1397
- case '\t':
1398
- case '\f':
1399
- case '?':
1400
- case '=':
1401
- case '/':
1402
- case '>':
1403
- case '<':
1404
- case '\n':
1405
- case '\r':
1406
- *(dr->buf.tail - 1) = '\0';
1407
- return c;
1408
- case '\0':
1248
+ switch (c) {
1249
+ case ' ':
1250
+ case '\t':
1251
+ case '\f':
1252
+ case '?':
1253
+ case '=':
1254
+ case '/':
1255
+ case '>':
1256
+ case '<':
1257
+ case '\n':
1258
+ case '\r': *(dr->buf.tail - 1) = '\0'; return c;
1259
+ case '\0':
1409
1260
  /* documents never terminate after a name token */
1410
1261
  ox_sax_drive_error(dr, NO_TERM "document not terminated");
1411
1262
  return '\0';
1412
- case ':':
1413
- if ('\0' == *dr->options.strip_ns) {
1414
- break;
1415
- } else if ('*' == *dr->options.strip_ns && '\0' == dr->options.strip_ns[1]) {
1416
- dr->buf.str = dr->buf.tail;
1417
- } else if (dr->options.smart && 0 == strncasecmp(dr->options.strip_ns, dr->buf.str, dr->buf.tail - dr->buf.str - 1)) {
1418
- dr->buf.str = dr->buf.tail;
1419
- } else if (0 == strncmp(dr->options.strip_ns, dr->buf.str, dr->buf.tail - dr->buf.str - 1)) {
1420
- dr->buf.str = dr->buf.tail;
1421
- }
1422
- break;
1423
- default:
1424
- break;
1425
- }
1263
+ case ':':
1264
+ if ('\0' == *dr->options.strip_ns) {
1265
+ break;
1266
+ } else if ('*' == *dr->options.strip_ns && '\0' == dr->options.strip_ns[1]) {
1267
+ dr->buf.str = dr->buf.tail;
1268
+ } else if (dr->options.smart &&
1269
+ 0 == strncasecmp(dr->options.strip_ns, dr->buf.str, dr->buf.tail - dr->buf.str - 1)) {
1270
+ dr->buf.str = dr->buf.tail;
1271
+ } else if (0 == strncmp(dr->options.strip_ns, dr->buf.str, dr->buf.tail - dr->buf.str - 1)) {
1272
+ dr->buf.str = dr->buf.tail;
1273
+ }
1274
+ break;
1275
+ default: break;
1276
+ }
1426
1277
  c = buf_get(&dr->buf);
1427
1278
  }
1428
1279
  return '\0';
1429
1280
  }
1430
1281
 
1431
- /* The character after the quote or if there is no quote, the character after the word is returned. dr->buf.tail is one past
1432
- * that. dr->buf.str will point to the token which will be '\0' terminated.
1282
+ /* The character after the quote or if there is no quote, the character after the word is returned. dr->buf.tail is one
1283
+ * past that. dr->buf.str will point to the token which will be '\0' terminated.
1433
1284
  */
1434
- static char
1435
- read_quoted_value(SaxDrive dr) {
1436
- char c;
1285
+ static char read_quoted_value(SaxDrive dr) {
1286
+ char c;
1437
1287
 
1438
1288
  c = buf_get(&dr->buf);
1439
1289
  if (is_white(c)) {
1440
1290
  c = buf_next_non_white(&dr->buf);
1441
1291
  }
1442
1292
  if ('"' == c || '\'' == c) {
1443
- char term = c;
1293
+ char term = c;
1444
1294
 
1445
1295
  dr->buf.str = dr->buf.tail;
1446
1296
  while (term != (c = buf_get(&dr->buf))) {
@@ -1449,187 +1299,185 @@ read_quoted_value(SaxDrive dr) {
1449
1299
  return '\0';
1450
1300
  }
1451
1301
  }
1452
- // dr->buf.tail is one past quote char
1453
- *(dr->buf.tail - 1) = '\0'; /* terminate value */
1454
- c = buf_get(&dr->buf);
1455
- return c;
1302
+ // dr->buf.tail is one past quote char
1303
+ *(dr->buf.tail - 1) = '\0'; /* terminate value */
1304
+ c = buf_get(&dr->buf);
1305
+ return c;
1456
1306
  }
1457
1307
  // not quoted, look for something that terminates the string
1458
1308
  dr->buf.str = dr->buf.tail - 1;
1459
1309
  ox_sax_drive_error(dr, WRONG_CHAR "attribute value not in quotes");
1460
1310
  while ('\0' != (c = buf_get(&dr->buf))) {
1461
- switch (c) {
1462
- case ' ':
1463
- //case '/':
1464
- case '>':
1465
- case '?': // for instructions
1466
- case '\t':
1467
- case '\n':
1468
- case '\r':
1469
- *(dr->buf.tail - 1) = '\0'; /* terminate value */
1470
- // dr->buf.tail is in the correct position, one after the word terminator
1471
- return c;
1472
- default:
1473
- break;
1474
- }
1311
+ switch (c) {
1312
+ case ' ':
1313
+ // case '/':
1314
+ case '>':
1315
+ case '?': // for instructions
1316
+ case '\t':
1317
+ case '\n':
1318
+ case '\r':
1319
+ *(dr->buf.tail - 1) = '\0'; /* terminate value */
1320
+ // dr->buf.tail is in the correct position, one after the word terminator
1321
+ return c;
1322
+ default: break;
1323
+ }
1475
1324
  }
1476
- return '\0'; // should never get here
1325
+ return '\0'; // should never get here
1477
1326
  }
1478
1327
 
1479
- static char*
1480
- read_hex_uint64(char *b, uint64_t *up) {
1481
- uint64_t u = 0;
1482
- char c;
1328
+ static char *read_hex_uint64(char *b, uint64_t *up) {
1329
+ uint64_t u = 0;
1330
+ char c;
1483
1331
 
1484
1332
  for (; ';' != *b; b++) {
1485
- c = *b;
1486
- if ('0' <= c && c <= '9') {
1487
- u = (u << 4) | (uint64_t)(c - '0');
1488
- } else if ('a' <= c && c <= 'f') {
1489
- u = (u << 4) | (uint64_t)(c - 'a' + 10);
1490
- } else if ('A' <= c && c <= 'F') {
1491
- u = (u << 4) | (uint64_t)(c - 'A' + 10);
1492
- } else {
1493
- return 0;
1494
- }
1333
+ c = *b;
1334
+ if ('0' <= c && c <= '9') {
1335
+ u = (u << 4) | (uint64_t)(c - '0');
1336
+ } else if ('a' <= c && c <= 'f') {
1337
+ u = (u << 4) | (uint64_t)(c - 'a' + 10);
1338
+ } else if ('A' <= c && c <= 'F') {
1339
+ u = (u << 4) | (uint64_t)(c - 'A' + 10);
1340
+ } else {
1341
+ return 0;
1342
+ }
1495
1343
  }
1496
1344
  *up = u;
1497
1345
 
1498
1346
  return b;
1499
1347
  }
1500
1348
 
1501
- static char*
1502
- read_10_uint64(char *b, uint64_t *up) {
1503
- uint64_t u = 0;
1504
- char c;
1349
+ static char *read_10_uint64(char *b, uint64_t *up) {
1350
+ uint64_t u = 0;
1351
+ char c;
1505
1352
 
1506
1353
  for (; ';' != *b; b++) {
1507
- c = *b;
1508
- if ('0' <= c && c <= '9') {
1509
- u = (u * 10) + (uint64_t)(c - '0');
1510
- } else {
1511
- return 0;
1512
- }
1354
+ c = *b;
1355
+ if ('0' <= c && c <= '9') {
1356
+ u = (u * 10) + (uint64_t)(c - '0');
1357
+ } else {
1358
+ return 0;
1359
+ }
1513
1360
  }
1514
1361
  *up = u;
1515
1362
 
1516
1363
  return b;
1517
1364
  }
1518
1365
 
1519
- int
1520
- ox_sax_collapse_special(SaxDrive dr, char *str, long pos, long line, long col) {
1521
- char *s = str;
1522
- char *b = str;
1366
+ int ox_sax_collapse_special(SaxDrive dr, char *str, long pos, long line, long col) {
1367
+ char *s = str;
1368
+ char *b = str;
1523
1369
 
1524
1370
  while ('\0' != *s) {
1525
- if ('&' == *s) {
1526
- int c = 0;
1527
- char *end;
1371
+ switch (*s) {
1372
+ case '&': {
1373
+ int c = 0;
1374
+ char *end;
1528
1375
 
1529
1376
  s++;
1530
1377
  if ('#' == *s) {
1531
- uint64_t u = 0;
1532
- char x;
1533
-
1534
- s++;
1535
- if ('x' == *s || 'X' == *s) {
1536
- x = *s;
1537
- s++;
1538
- end = read_hex_uint64(s, &u);
1539
- } else {
1540
- x = '\0';
1541
- end = read_10_uint64(s, &u);
1542
- }
1543
- if (0 == end) {
1544
- ox_sax_drive_error(dr, NO_TERM "special character does not end with a semicolon");
1545
- *b++ = '&';
1546
- *b++ = '#';
1547
- if ('\0' != x) {
1548
- *b++ = x;
1549
- }
1550
- continue;
1551
- }
1552
- if (u <= 0x000000000000007FULL) {
1553
- *b++ = (char)u;
1554
- #if HAVE_RB_ENC_FIND
1555
- } else if (ox_utf8_encoding == dr->encoding) {
1556
- b = ox_ucs_to_utf8_chars(b, u);
1557
- } else if (0 == dr->encoding) {
1558
- dr->encoding = ox_utf8_encoding;
1559
- b = ox_ucs_to_utf8_chars(b, u);
1560
- #else
1561
- } else if (0 == dr->encoding) {
1562
- dr->encoding = UTF8_STR;
1563
- b = ox_ucs_to_utf8_chars(b, u);
1564
- } else if (0 == strcasecmp(UTF8_STR, dr->encoding)) {
1565
- b = ox_ucs_to_utf8_chars(b, u);
1566
- #endif
1567
- } else {
1568
- b = ox_ucs_to_utf8_chars(b, u);
1569
- /*
1570
- ox_sax_drive_error(dr, NO_TERM "Invalid encoding, need UTF-8 encoding to parse &#nnnn; character sequences.");
1571
- *b++ = '&';
1572
- *b++ = '#';
1573
- if ('\0' != x) {
1574
- *b++ = x;
1575
- }
1576
- continue;
1577
- */
1578
- }
1579
- s = end + 1;
1580
- continue;
1378
+ uint64_t u = 0;
1379
+ char x;
1380
+
1381
+ s++;
1382
+ if ('x' == *s || 'X' == *s) {
1383
+ x = *s;
1384
+ s++;
1385
+ end = read_hex_uint64(s, &u);
1386
+ } else {
1387
+ x = '\0';
1388
+ end = read_10_uint64(s, &u);
1389
+ }
1390
+ if (0 == end) {
1391
+ ox_sax_drive_error(dr, NO_TERM "special character does not end with a semicolon");
1392
+ *b++ = '&';
1393
+ *b++ = '#';
1394
+ if ('\0' != x) {
1395
+ *b++ = x;
1396
+ }
1397
+ continue;
1398
+ }
1399
+ if (u <= 0x000000000000007FULL) {
1400
+ *b++ = (char)u;
1401
+ } else if (ox_utf8_encoding == dr->encoding) {
1402
+ b = ox_ucs_to_utf8_chars(b, u);
1403
+ } else if (0 == dr->encoding) {
1404
+ dr->encoding = ox_utf8_encoding;
1405
+ b = ox_ucs_to_utf8_chars(b, u);
1406
+ } else {
1407
+ b = ox_ucs_to_utf8_chars(b, u);
1408
+ /*
1409
+ ox_sax_drive_error(dr, NO_TERM "Invalid encoding, need UTF-8 encoding to parse &#nnnn; character
1410
+ sequences."); *b++ = '&'; *b++ = '#'; if ('\0' != x) { *b++ = x;
1411
+ }
1412
+ continue;
1413
+ */
1414
+ }
1415
+ s = end + 1;
1416
+ continue;
1581
1417
  } else if (0 == strncasecmp(s, "lt;", 3)) {
1582
1418
  c = '<';
1583
1419
  s += 3;
1584
- col += 3;
1420
+ col += 3;
1585
1421
  } else if (0 == strncasecmp(s, "gt;", 3)) {
1586
1422
  c = '>';
1587
1423
  s += 3;
1588
- col += 3;
1424
+ col += 3;
1589
1425
  } else if (0 == strncasecmp(s, "amp;", 4)) {
1590
1426
  c = '&';
1591
1427
  s += 4;
1592
- col += 4;
1428
+ col += 4;
1593
1429
  } else if (0 == strncasecmp(s, "quot;", 5)) {
1594
1430
  c = '"';
1595
1431
  s += 5;
1596
- col += 5;
1432
+ col += 5;
1597
1433
  } else if (0 == strncasecmp(s, "apos;", 5)) {
1598
1434
  c = '\'';
1599
1435
  s += 5;
1600
1436
  } else {
1601
- char key[16];
1602
- char *k = key;
1603
- char *kend = key + sizeof(key) - 1;
1604
- char *bn;
1605
- char *s2 = s;
1606
-
1607
- for (; ';' != *s2 && '\0' != *s2; s2++, k++) {
1608
- if (kend <= k) {
1609
- k = key;
1610
- break;
1611
- }
1612
- *k = *s2;
1613
- }
1614
- *k = '\0';
1615
- if ('\0' == *key || NULL == (bn = ox_entity_lookup(b, key))) {
1616
- ox_sax_drive_error_at(dr, INVALID_FORMAT "Invalid special character sequence", pos, line, col);
1617
- c = '&';
1618
- } else {
1619
- b = bn;
1620
- s = s2 + 1;
1621
- continue;
1622
- }
1437
+ char key[16];
1438
+ char *k = key;
1439
+ char *kend = key + sizeof(key) - 1;
1440
+ char *bn;
1441
+ char *s2 = s;
1442
+
1443
+ for (; ';' != *s2 && '\0' != *s2; s2++, k++) {
1444
+ if (kend <= k) {
1445
+ k = key;
1446
+ break;
1447
+ }
1448
+ *k = *s2;
1449
+ }
1450
+ *k = '\0';
1451
+ if ('\0' == *key || NULL == (bn = ox_entity_lookup(b, key))) {
1452
+ ox_sax_drive_error_at(dr, INVALID_FORMAT "Invalid special character sequence", pos, line, col);
1453
+ c = '&';
1454
+ } else {
1455
+ b = bn;
1456
+ s = s2 + 1;
1457
+ continue;
1458
+ }
1623
1459
  }
1624
1460
  *b++ = (char)c;
1625
- col++;
1626
- } else {
1627
- if ('\n' == *s) {
1628
- line++;
1629
- col = 0;
1461
+ col++;
1462
+ break;
1463
+ }
1464
+ case '\r':
1465
+ s++;
1466
+ if ('\n' == *s) {
1467
+ continue;
1630
1468
  }
1631
- col++;
1469
+ line++;
1470
+ col = 1;
1471
+ *b++ = '\n';
1472
+ break;
1473
+ case '\n':
1474
+ line++;
1475
+ col = 0;
1476
+ // fall through
1477
+ default:
1478
+ col++;
1632
1479
  *b++ = *s++;
1480
+ break;
1633
1481
  }
1634
1482
  }
1635
1483
  *b = '\0';
@@ -1637,64 +1485,43 @@ ox_sax_collapse_special(SaxDrive dr, char *str, long pos, long line, long col) {
1637
1485
  return 0;
1638
1486
  }
1639
1487
 
1640
- static void
1641
- hint_clear_empty(SaxDrive dr) {
1642
- Nv nv;
1488
+ static void hint_clear_empty(SaxDrive dr) {
1489
+ Nv nv;
1643
1490
 
1644
1491
  for (nv = stack_peek(&dr->stack); 0 != nv; nv = stack_peek(&dr->stack)) {
1645
- if (0 == nv->hint) {
1646
- break;
1647
- }
1648
- if (nv->hint->empty) {
1649
- end_element_cb(dr, nv->val, dr->buf.pos, dr->buf.line, dr->buf.col, nv->hint);
1650
- stack_pop(&dr->stack);
1651
- } else {
1652
- break;
1653
- }
1492
+ if (0 == nv->hint) {
1493
+ break;
1494
+ }
1495
+ if (nv->hint->empty) {
1496
+ end_element_cb(dr, nv->val, dr->buf.pos, dr->buf.line, dr->buf.col, nv->hint);
1497
+ stack_pop(&dr->stack);
1498
+ } else {
1499
+ break;
1500
+ }
1654
1501
  }
1655
1502
  }
1656
1503
 
1657
- static Nv
1658
- hint_try_close(SaxDrive dr, const char *name) {
1659
- Hint h = ox_hint_find(dr->options.hints, name);
1660
- Nv nv;
1504
+ static Nv hint_try_close(SaxDrive dr, const char *name) {
1505
+ Hint h = ox_hint_find(dr->options.hints, name);
1506
+ Nv nv;
1661
1507
 
1662
1508
  if (0 == h) {
1663
- return 0;
1509
+ return 0;
1664
1510
  }
1665
1511
  for (nv = stack_peek(&dr->stack); 0 != nv; nv = stack_peek(&dr->stack)) {
1666
- if (0 == strcasecmp(name, nv->name)) {
1667
- stack_pop(&dr->stack);
1668
- return nv;
1669
- }
1670
- if (0 == nv->hint) {
1671
- break;
1672
- }
1673
- if (nv->hint->empty) {
1674
- end_element_cb(dr, nv->val, dr->buf.pos, dr->buf.line, dr->buf.col, nv->hint);
1675
- dr->stack.tail = nv;
1676
- } else {
1677
- break;
1678
- }
1512
+ if (0 == strcasecmp(name, nv->name)) {
1513
+ stack_pop(&dr->stack);
1514
+ return nv;
1515
+ }
1516
+ if (0 == nv->hint) {
1517
+ break;
1518
+ }
1519
+ if (nv->hint->empty) {
1520
+ end_element_cb(dr, nv->val, dr->buf.pos, dr->buf.line, dr->buf.col, nv->hint);
1521
+ dr->stack.tail = nv;
1522
+ } else {
1523
+ break;
1524
+ }
1679
1525
  }
1680
1526
  return 0;
1681
1527
  }
1682
-
1683
- static void
1684
- end_element_cb(SaxDrive dr, VALUE name, long pos, long line, long col, Hint h) {
1685
- if (dr->has.end_element && 0 >= dr->blocked && (NULL == h || ActiveOverlay == h->overlay || NestOverlay == h->overlay)) {
1686
- if (dr->has.pos) {
1687
- rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
1688
- }
1689
- if (dr->has.line) {
1690
- rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
1691
- }
1692
- if (dr->has.column) {
1693
- rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
1694
- }
1695
- rb_funcall(dr->handler, ox_end_element_id, 1, name);
1696
- }
1697
- if (NULL != h && BlockOverlay == h->overlay && 0 < dr->blocked) {
1698
- dr->blocked--;
1699
- }
1700
- }