ox 2.2.0 → 2.2.1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of ox might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/README.md +7 -13
- data/ext/ox/ox.c +2 -0
- data/ext/ox/ox.h +1 -0
- data/ext/ox/sax.c +202 -35
- data/ext/ox/sax.h +1 -1
- data/ext/ox/sax_as.c +1 -1
- data/ext/ox/sax_buf.c +2 -0
- data/ext/ox/sax_buf.h +13 -3
- data/ext/ox/sax_has.h +2 -0
- data/ext/ox/sax_hint.c +121 -121
- data/ext/ox/sax_hint.h +1 -0
- data/lib/ox/sax.rb +3 -1
- data/lib/ox/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bec620893f1af26eaf853fec54ac033da9516689
|
4
|
+
data.tar.gz: 26fd103eb9d31e42ce45bf5371b51740d91d7b21
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 64abf65cab8bd13621831043039a8e4e8ccf7e1bf839bf061a443fb5e9cd7e5d7d6a1334cc36ab5caa9bb71034c03ddcd0f40b0f1d9630920a73be8828230b7c
|
7
|
+
data.tar.gz: 78332e2f9ef6892242bb8ed4a84a4353a122e8980f660575c719520a83aca5be0d8c6f376b1dd9628b168c7ddc347403c59afd783a175d7fbed6f76be407a2b5
|
data/README.md
CHANGED
@@ -34,25 +34,19 @@ A fast XML parser and Object marshaller as a Ruby gem.
|
|
34
34
|
|
35
35
|
## Release Notes
|
36
36
|
|
37
|
-
### Current Release 2.2.
|
37
|
+
### Current Release 2.2.1
|
38
38
|
|
39
|
-
- Added
|
40
|
-
|
41
|
-
- Added the SAX smart option to the default options.
|
42
|
-
|
43
|
-
- Other SAX options are now taken from the defaults if not specified.
|
39
|
+
- Added support to handle script elements in html.
|
44
40
|
|
45
|
-
|
41
|
+
- Added support for position from start for the sax parser.
|
46
42
|
|
47
|
-
|
48
|
-
parser and an IO.pipe.
|
43
|
+
### Release 2.2.0
|
49
44
|
|
50
|
-
|
45
|
+
- Added the SAX convert_special option to the default options.
|
51
46
|
|
52
|
-
-
|
47
|
+
- Added the SAX smart option to the default options.
|
53
48
|
|
54
|
-
-
|
55
|
-
collects Symbols.
|
49
|
+
- Other SAX options are now taken from the defaults if not specified.
|
56
50
|
|
57
51
|
## Description
|
58
52
|
|
data/ext/ox/ox.c
CHANGED
@@ -29,6 +29,7 @@ ID ox_at_column_id;
|
|
29
29
|
ID ox_at_content_id;
|
30
30
|
ID ox_at_id;
|
31
31
|
ID ox_at_line_id;
|
32
|
+
ID ox_at_pos_id;
|
32
33
|
ID ox_at_value_id;
|
33
34
|
ID ox_attr_id;
|
34
35
|
ID ox_attr_value_id;
|
@@ -910,6 +911,7 @@ void Init_ox() {
|
|
910
911
|
ox_at_content_id = rb_intern("@content");
|
911
912
|
ox_at_id = rb_intern("at");
|
912
913
|
ox_at_line_id = rb_intern("@line");
|
914
|
+
ox_at_pos_id = rb_intern("@pos");
|
913
915
|
ox_at_value_id = rb_intern("@value");
|
914
916
|
ox_attr_id = rb_intern("attr");
|
915
917
|
ox_attr_value_id = rb_intern("attr_value");
|
data/ext/ox/ox.h
CHANGED
data/ext/ox/sax.c
CHANGED
@@ -49,11 +49,12 @@ static char read_comment(SaxDrive dr);
|
|
49
49
|
static char read_element_start(SaxDrive dr);
|
50
50
|
static char read_element_end(SaxDrive dr);
|
51
51
|
static char read_text(SaxDrive dr);
|
52
|
+
static char read_jump(SaxDrive dr, const char *pat);
|
52
53
|
static char read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req);
|
53
54
|
static char read_name_token(SaxDrive dr);
|
54
55
|
static char read_quoted_value(SaxDrive dr);
|
55
56
|
|
56
|
-
static void end_element_cb(SaxDrive dr, VALUE name, int line, int col);
|
57
|
+
static void end_element_cb(SaxDrive dr, VALUE name, int pos, int line, int col);
|
57
58
|
|
58
59
|
static void hint_clear_empty(SaxDrive dr);
|
59
60
|
static Nv hint_try_close(SaxDrive dr, const char *name);
|
@@ -155,6 +156,7 @@ ox_sax_parse(VALUE handler, VALUE io, SaxOptions options) {
|
|
155
156
|
printf(" has_start_element = %s\n", dr.has.start_element ? "true" : "false");
|
156
157
|
printf(" has_end_element = %s\n", dr.has.end_element ? "true" : "false");
|
157
158
|
printf(" has_error = %s\n", dr.has.error ? "true" : "false");
|
159
|
+
printf(" has_pos = %s\n", dr.has.pos ? "true" : "false");
|
158
160
|
printf(" has_line = %s\n", dr.has.line ? "true" : "false");
|
159
161
|
printf(" has_column = %s\n", dr.has.column ? "true" : "false");
|
160
162
|
#endif
|
@@ -217,13 +219,19 @@ ox_sax_drive_cleanup(SaxDrive dr) {
|
|
217
219
|
}
|
218
220
|
|
219
221
|
static void
|
220
|
-
ox_sax_drive_error_at(SaxDrive dr, const char *msg, int line, int col) {
|
222
|
+
ox_sax_drive_error_at(SaxDrive dr, const char *msg, int pos, int line, int col) {
|
221
223
|
if (dr->has.error) {
|
222
224
|
VALUE args[3];
|
223
225
|
|
224
226
|
args[0] = rb_str_new2(msg);
|
225
227
|
args[1] = LONG2NUM(line);
|
226
228
|
args[2] = LONG2NUM(col);
|
229
|
+
if (dr->has.pos) {
|
230
|
+
rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
|
231
|
+
}
|
232
|
+
if (dr->has.pos) {
|
233
|
+
rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
|
234
|
+
}
|
227
235
|
if (dr->has.line) {
|
228
236
|
rb_ivar_set(dr->handler, ox_at_line_id, args[1]);
|
229
237
|
}
|
@@ -236,7 +244,7 @@ ox_sax_drive_error_at(SaxDrive dr, const char *msg, int line, int col) {
|
|
236
244
|
|
237
245
|
void
|
238
246
|
ox_sax_drive_error(SaxDrive dr, const char *msg) {
|
239
|
-
ox_sax_drive_error_at(dr, msg, dr->buf.line, dr->buf.col);
|
247
|
+
ox_sax_drive_error_at(dr, msg, dr->buf.pos, dr->buf.line, dr->buf.col);
|
240
248
|
}
|
241
249
|
|
242
250
|
static char
|
@@ -292,8 +300,9 @@ parse(SaxDrive dr) {
|
|
292
300
|
} else {
|
293
301
|
int i;
|
294
302
|
int spaced = 0;
|
303
|
+
int pos = dr->buf.pos + 1;
|
295
304
|
int line = dr->buf.line;
|
296
|
-
int col = dr->buf.col;
|
305
|
+
int col = dr->buf.col + 1;
|
297
306
|
|
298
307
|
if (is_white(c)) {
|
299
308
|
spaced = 1;
|
@@ -305,7 +314,7 @@ parse(SaxDrive dr) {
|
|
305
314
|
}
|
306
315
|
if (0 == strncmp("DOCTYPE", dr->buf.str, 7)) {
|
307
316
|
if (spaced) {
|
308
|
-
ox_sax_drive_error_at(dr, WRONG_CHAR "<!DOCTYPE can not included spaces", line, col);
|
317
|
+
ox_sax_drive_error_at(dr, WRONG_CHAR "<!DOCTYPE can not included spaces", pos, line, col);
|
309
318
|
}
|
310
319
|
if (START_STATE != state) {
|
311
320
|
ox_sax_drive_error(dr, OUT_OF_ORDER "DOCTYPE can not come after an element");
|
@@ -319,7 +328,7 @@ parse(SaxDrive dr) {
|
|
319
328
|
c = read_doctype(dr);
|
320
329
|
} else if (0 == strncmp("[CDATA[", dr->buf.str, 7)) {
|
321
330
|
if (spaced) {
|
322
|
-
ox_sax_drive_error_at(dr, WRONG_CHAR "<![CDATA[ can not included spaces", line, col);
|
331
|
+
ox_sax_drive_error_at(dr, WRONG_CHAR "<![CDATA[ can not included spaces", pos, line, col);
|
323
332
|
}
|
324
333
|
c = read_cdata(dr);
|
325
334
|
} else if (0 == strncasecmp("[CDATA[", dr->buf.str, 7)) {
|
@@ -331,7 +340,7 @@ parse(SaxDrive dr) {
|
|
331
340
|
if (0 != parent) {
|
332
341
|
parent->childCnt++;
|
333
342
|
}
|
334
|
-
ox_sax_drive_error_at(dr, WRONG_CHAR "DOCTYPE, CDATA, or comment expected", line, col);
|
343
|
+
ox_sax_drive_error_at(dr, WRONG_CHAR "DOCTYPE, CDATA, or comment expected", pos, line, col);
|
335
344
|
c = read_name_token(dr);
|
336
345
|
if ('>' == c) {
|
337
346
|
c = buf_get(&dr->buf);
|
@@ -341,8 +350,9 @@ parse(SaxDrive dr) {
|
|
341
350
|
break;
|
342
351
|
case '/': /* element end */
|
343
352
|
parent = stack_peek(&dr->stack);
|
344
|
-
if (0 != parent && 0 == parent->childCnt) {
|
353
|
+
if (0 != parent && 0 == parent->childCnt && dr->has.text) {
|
345
354
|
VALUE args[1];
|
355
|
+
int pos = dr->buf.pos;
|
346
356
|
int line = dr->buf.line;
|
347
357
|
int col = dr->buf.col - 1;
|
348
358
|
|
@@ -356,6 +366,9 @@ parse(SaxDrive dr) {
|
|
356
366
|
rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
|
357
367
|
}
|
358
368
|
#endif
|
369
|
+
if (dr->has.pos) {
|
370
|
+
rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
|
371
|
+
}
|
359
372
|
if (dr->has.line) {
|
360
373
|
rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
|
361
374
|
}
|
@@ -393,6 +406,9 @@ parse(SaxDrive dr) {
|
|
393
406
|
char msg[256];
|
394
407
|
Nv sp;
|
395
408
|
|
409
|
+
if (dr->has.pos) {
|
410
|
+
rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(dr->buf.pos));
|
411
|
+
}
|
396
412
|
if (dr->has.line) {
|
397
413
|
rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(dr->buf.line));
|
398
414
|
}
|
@@ -401,7 +417,7 @@ parse(SaxDrive dr) {
|
|
401
417
|
}
|
402
418
|
for (sp = dr->stack.tail - 1; dr->stack.head <= sp; sp--) {
|
403
419
|
snprintf(msg, sizeof(msg) - 1, "%selement '%s' not closed", EL_MISMATCH, sp->name);
|
404
|
-
ox_sax_drive_error_at(dr, msg, dr->buf.line, dr->buf.col);
|
420
|
+
ox_sax_drive_error_at(dr, msg, dr->buf.pos, dr->buf.line, dr->buf.col);
|
405
421
|
if (dr->has.end_element) {
|
406
422
|
VALUE args[1];
|
407
423
|
|
@@ -448,6 +464,7 @@ read_instruction(SaxDrive dr) {
|
|
448
464
|
char *cend;
|
449
465
|
VALUE target = Qnil;
|
450
466
|
int is_xml;
|
467
|
+
int pos = dr->buf.pos - 1;
|
451
468
|
int line = dr->buf.line;
|
452
469
|
int col = dr->buf.col - 1;
|
453
470
|
|
@@ -462,6 +479,9 @@ read_instruction(SaxDrive dr) {
|
|
462
479
|
if (dr->has.instruct) {
|
463
480
|
VALUE args[1];
|
464
481
|
|
482
|
+
if (dr->has.pos) {
|
483
|
+
rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
|
484
|
+
}
|
465
485
|
if (dr->has.line) {
|
466
486
|
rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
|
467
487
|
}
|
@@ -472,6 +492,7 @@ read_instruction(SaxDrive dr) {
|
|
472
492
|
rb_funcall2(dr->handler, ox_instruct_id, 1, args);
|
473
493
|
}
|
474
494
|
buf_protect(&dr->buf);
|
495
|
+
pos = dr->buf.pos;
|
475
496
|
line = dr->buf.line;
|
476
497
|
col = dr->buf.col;
|
477
498
|
read_content(dr, content, sizeof(content) - 1);
|
@@ -487,7 +508,7 @@ read_instruction(SaxDrive dr) {
|
|
487
508
|
VALUE args[1];
|
488
509
|
|
489
510
|
if (dr->options.convert_special) {
|
490
|
-
ox_sax_collapse_special(dr, content, line, col);
|
511
|
+
ox_sax_collapse_special(dr, content, pos, line, col);
|
491
512
|
}
|
492
513
|
args[0] = rb_str_new2(content);
|
493
514
|
#if HAS_ENCODING_SUPPORT
|
@@ -502,6 +523,9 @@ read_instruction(SaxDrive dr) {
|
|
502
523
|
if (dr->has.line) {
|
503
524
|
rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
|
504
525
|
}
|
526
|
+
if (dr->has.pos) {
|
527
|
+
rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
|
528
|
+
}
|
505
529
|
if (dr->has.column) {
|
506
530
|
rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
|
507
531
|
}
|
@@ -510,13 +534,14 @@ read_instruction(SaxDrive dr) {
|
|
510
534
|
dr->buf.tail = cend;
|
511
535
|
c = buf_get(&dr->buf);
|
512
536
|
} else {
|
537
|
+
pos = dr->buf.pos;
|
513
538
|
line = dr->buf.line;
|
514
539
|
col = dr->buf.col;
|
515
540
|
c = buf_next_non_white(&dr->buf);
|
516
541
|
if ('>' == c) {
|
517
542
|
c = buf_get(&dr->buf);
|
518
543
|
} else {
|
519
|
-
ox_sax_drive_error_at(dr, NO_TERM "instruction not terminated", line, col);
|
544
|
+
ox_sax_drive_error_at(dr, NO_TERM "instruction not terminated", pos, line, col);
|
520
545
|
if ('>' == c) {
|
521
546
|
c = buf_get(&dr->buf);
|
522
547
|
}
|
@@ -525,6 +550,9 @@ read_instruction(SaxDrive dr) {
|
|
525
550
|
if (dr->has.end_instruct) {
|
526
551
|
VALUE args[1];
|
527
552
|
|
553
|
+
if (dr->has.pos) {
|
554
|
+
rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
|
555
|
+
}
|
528
556
|
if (dr->has.line) {
|
529
557
|
rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
|
530
558
|
}
|
@@ -580,16 +608,17 @@ read_delimited(SaxDrive dr, char end) {
|
|
580
608
|
return c;
|
581
609
|
}
|
582
610
|
|
583
|
-
/* Entered after the "<!DOCTYPE" sequence. Ready to read the rest.
|
611
|
+
/* Entered after the "<!DOCTYPE " sequence. Ready to read the rest.
|
584
612
|
*/
|
585
613
|
static char
|
586
614
|
read_doctype(SaxDrive dr) {
|
615
|
+
int pos = dr->buf.pos - 9;
|
587
616
|
int line = dr->buf.line;
|
588
|
-
int col = dr->buf.col -
|
617
|
+
int col = dr->buf.col - 9;
|
589
618
|
char *s;
|
590
619
|
Nv parent = stack_peek(&dr->stack);
|
591
620
|
|
592
|
-
buf_backup(&dr->buf); /* back up to the start in case the
|
621
|
+
buf_backup(&dr->buf); /* back up to the start in case the doctype is empty */
|
593
622
|
buf_protect(&dr->buf);
|
594
623
|
read_delimited(dr, '>');
|
595
624
|
if (dr->options.smart && 0 == dr->hints) {
|
@@ -605,6 +634,9 @@ read_doctype(SaxDrive dr) {
|
|
605
634
|
if (dr->has.doctype) {
|
606
635
|
VALUE args[1];
|
607
636
|
|
637
|
+
if (dr->has.pos) {
|
638
|
+
rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
|
639
|
+
}
|
608
640
|
if (dr->has.line) {
|
609
641
|
rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
|
610
642
|
}
|
@@ -626,8 +658,9 @@ read_cdata(SaxDrive dr) {
|
|
626
658
|
char c;
|
627
659
|
char zero = '\0';
|
628
660
|
int end = 0;
|
661
|
+
int pos = dr->buf.pos - 9;
|
629
662
|
int line = dr->buf.line;
|
630
|
-
int col = dr->buf.col -
|
663
|
+
int col = dr->buf.col - 9;
|
631
664
|
struct _CheckPt cp = CHECK_PT_INIT;
|
632
665
|
Nv parent = stack_peek(&dr->stack);
|
633
666
|
|
@@ -691,6 +724,9 @@ read_cdata(SaxDrive dr) {
|
|
691
724
|
rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
|
692
725
|
}
|
693
726
|
#endif
|
727
|
+
if (dr->has.pos) {
|
728
|
+
rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
|
729
|
+
}
|
694
730
|
if (dr->has.line) {
|
695
731
|
rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
|
696
732
|
}
|
@@ -714,6 +750,7 @@ read_comment(SaxDrive dr) {
|
|
714
750
|
char c;
|
715
751
|
char zero = '\0';
|
716
752
|
int end = 0;
|
753
|
+
int pos = dr->buf.pos - 4;
|
717
754
|
int line = dr->buf.line;
|
718
755
|
int col = dr->buf.col - 4;
|
719
756
|
struct _CheckPt cp = CHECK_PT_INIT;
|
@@ -775,6 +812,9 @@ read_comment(SaxDrive dr) {
|
|
775
812
|
rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
|
776
813
|
}
|
777
814
|
#endif
|
815
|
+
if (dr->has.pos) {
|
816
|
+
rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
|
817
|
+
}
|
778
818
|
if (dr->has.line) {
|
779
819
|
rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
|
780
820
|
}
|
@@ -800,13 +840,13 @@ read_element_start(SaxDrive dr) {
|
|
800
840
|
volatile VALUE name = Qnil;
|
801
841
|
char c;
|
802
842
|
int closed;
|
843
|
+
int pos = dr->buf.pos;
|
803
844
|
int line = dr->buf.line;
|
804
|
-
int col = dr->buf.col
|
845
|
+
int col = dr->buf.col;
|
805
846
|
Hint h = 0;
|
806
847
|
int stackless = 0;
|
807
848
|
Nv parent = stack_peek(&dr->stack);
|
808
849
|
|
809
|
-
|
810
850
|
if ('\0' == (c = read_name_token(dr))) {
|
811
851
|
return '\0';
|
812
852
|
}
|
@@ -838,7 +878,7 @@ read_element_start(SaxDrive dr) {
|
|
838
878
|
INV_ELEMENT, dr->buf.str, dr->hints->name);
|
839
879
|
ox_sax_drive_error(dr, msg);
|
840
880
|
stack_pop(&dr->stack);
|
841
|
-
end_element_cb(dr, top_nv->val, line, col);
|
881
|
+
end_element_cb(dr, top_nv->val, pos, line, col);
|
842
882
|
top_nv = stack_peek(&dr->stack);
|
843
883
|
}
|
844
884
|
if (0 != h->parents) {
|
@@ -864,6 +904,9 @@ read_element_start(SaxDrive dr) {
|
|
864
904
|
if (dr->has.start_element) {
|
865
905
|
VALUE args[1];
|
866
906
|
|
907
|
+
if (dr->has.pos) {
|
908
|
+
rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
|
909
|
+
}
|
867
910
|
if (dr->has.line) {
|
868
911
|
rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
|
869
912
|
}
|
@@ -890,11 +933,20 @@ read_element_start(SaxDrive dr) {
|
|
890
933
|
}
|
891
934
|
if (closed) {
|
892
935
|
c = buf_next_non_white(&dr->buf);
|
936
|
+
pos = dr->buf.pos;
|
893
937
|
line = dr->buf.line;
|
894
|
-
col = dr->buf.col
|
895
|
-
end_element_cb(dr, name, line, col);
|
938
|
+
col = dr->buf.col;
|
939
|
+
end_element_cb(dr, name, pos, line, col);
|
896
940
|
} else if (stackless) {
|
897
|
-
end_element_cb(dr, name, line, col);
|
941
|
+
end_element_cb(dr, name, pos, line, col);
|
942
|
+
} else if (0 != h && h->jump) {
|
943
|
+
stack_push(&dr->stack, ename, name, h);
|
944
|
+
if ('>' != c) {
|
945
|
+
ox_sax_drive_error(dr, WRONG_CHAR "element not closed");
|
946
|
+
return c;
|
947
|
+
}
|
948
|
+
read_jump(dr, h->name);
|
949
|
+
return '<';
|
898
950
|
} else {
|
899
951
|
stack_push(&dr->stack, ename, name, h);
|
900
952
|
}
|
@@ -923,13 +975,17 @@ static char
|
|
923
975
|
read_element_end(SaxDrive dr) {
|
924
976
|
VALUE name = Qnil;
|
925
977
|
char c;
|
978
|
+
int pos = dr->buf.pos - 1;
|
926
979
|
int line = dr->buf.line;
|
927
|
-
int col = dr->buf.col -
|
980
|
+
int col = dr->buf.col - 1;
|
928
981
|
Nv nv;
|
929
982
|
|
930
983
|
if ('\0' == (c = read_name_token(dr))) {
|
931
984
|
return '\0';
|
932
985
|
}
|
986
|
+
if (is_white(c)) {
|
987
|
+
c = buf_next_non_white(&dr->buf);
|
988
|
+
}
|
933
989
|
// c should be > and current is one past so read another char
|
934
990
|
c = buf_get(&dr->buf);
|
935
991
|
nv = stack_peek(&dr->stack);
|
@@ -949,15 +1005,18 @@ read_element_end(SaxDrive dr) {
|
|
949
1005
|
// Just close normally
|
950
1006
|
name = str2sym(dr, dr->buf.str, 0);
|
951
1007
|
snprintf(msg, sizeof(msg) - 1, "%selement '%s' should not have a separate close element", EL_MISMATCH, dr->buf.str);
|
952
|
-
ox_sax_drive_error_at(dr, msg, line, col);
|
1008
|
+
ox_sax_drive_error_at(dr, msg, pos, line, col);
|
953
1009
|
return c;
|
954
1010
|
} else {
|
955
1011
|
snprintf(msg, sizeof(msg) - 1, "%selement '%s' closed but not opened", EL_MISMATCH, dr->buf.str);
|
956
|
-
ox_sax_drive_error_at(dr, msg, line, col);
|
1012
|
+
ox_sax_drive_error_at(dr, msg, pos, line, col);
|
957
1013
|
name = str2sym(dr, dr->buf.str, 0);
|
958
1014
|
if (dr->has.start_element) {
|
959
1015
|
VALUE args[1];
|
960
1016
|
|
1017
|
+
if (dr->has.pos) {
|
1018
|
+
rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
|
1019
|
+
}
|
961
1020
|
if (dr->has.line) {
|
962
1021
|
rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
|
963
1022
|
}
|
@@ -976,7 +1035,10 @@ read_element_end(SaxDrive dr) {
|
|
976
1035
|
name = n2->val;
|
977
1036
|
} else {
|
978
1037
|
snprintf(msg, sizeof(msg) - 1, "%selement '%s' close does not match '%s' open", EL_MISMATCH, dr->buf.str, nv->name);
|
979
|
-
ox_sax_drive_error_at(dr, msg, line, col);
|
1038
|
+
ox_sax_drive_error_at(dr, msg, pos, line, col);
|
1039
|
+
if (dr->has.pos) {
|
1040
|
+
rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
|
1041
|
+
}
|
980
1042
|
if (dr->has.line) {
|
981
1043
|
rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
|
982
1044
|
}
|
@@ -992,7 +1054,7 @@ read_element_end(SaxDrive dr) {
|
|
992
1054
|
}
|
993
1055
|
}
|
994
1056
|
}
|
995
|
-
end_element_cb(dr, name, line, col);
|
1057
|
+
end_element_cb(dr, name, pos, line, col);
|
996
1058
|
|
997
1059
|
return c;
|
998
1060
|
}
|
@@ -1001,6 +1063,7 @@ static char
|
|
1001
1063
|
read_text(SaxDrive dr) {
|
1002
1064
|
VALUE args[1];
|
1003
1065
|
char c;
|
1066
|
+
int pos = dr->buf.pos;
|
1004
1067
|
int line = dr->buf.line;
|
1005
1068
|
int col = dr->buf.col - 1;
|
1006
1069
|
Nv parent = stack_peek(&dr->stack);
|
@@ -1044,6 +1107,9 @@ read_text(SaxDrive dr) {
|
|
1044
1107
|
parent->childCnt++;
|
1045
1108
|
}
|
1046
1109
|
if (dr->has.value) {
|
1110
|
+
if (dr->has.pos) {
|
1111
|
+
rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
|
1112
|
+
}
|
1047
1113
|
if (dr->has.line) {
|
1048
1114
|
rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
|
1049
1115
|
}
|
@@ -1054,7 +1120,7 @@ read_text(SaxDrive dr) {
|
|
1054
1120
|
rb_funcall2(dr->handler, ox_value_id, 1, args);
|
1055
1121
|
} else if (dr->has.text) {
|
1056
1122
|
if (dr->options.convert_special) {
|
1057
|
-
ox_sax_collapse_special(dr, dr->buf.str, line, col);
|
1123
|
+
ox_sax_collapse_special(dr, dr->buf.str, pos, line, col);
|
1058
1124
|
}
|
1059
1125
|
switch (dr->options.skip) {
|
1060
1126
|
case CrSkip:
|
@@ -1076,6 +1142,9 @@ read_text(SaxDrive dr) {
|
|
1076
1142
|
rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
|
1077
1143
|
}
|
1078
1144
|
#endif
|
1145
|
+
if (dr->has.pos) {
|
1146
|
+
rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
|
1147
|
+
}
|
1079
1148
|
if (dr->has.line) {
|
1080
1149
|
rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
|
1081
1150
|
}
|
@@ -1089,10 +1158,97 @@ read_text(SaxDrive dr) {
|
|
1089
1158
|
return c;
|
1090
1159
|
}
|
1091
1160
|
|
1161
|
+
static int
|
1162
|
+
read_jump_term(Buf buf, const char *pat) {
|
1163
|
+
struct _CheckPt cp;
|
1164
|
+
|
1165
|
+
buf_checkpoint(buf, &cp); // right after <
|
1166
|
+
if ('/' != buf_next_non_white(buf)) {
|
1167
|
+
return 0;
|
1168
|
+
}
|
1169
|
+
if (*pat != buf_next_non_white(buf)) {
|
1170
|
+
return 0;
|
1171
|
+
}
|
1172
|
+
for (pat++; '\0' != *pat; pat++) {
|
1173
|
+
if (*pat != buf_get(buf)) {
|
1174
|
+
return 0;
|
1175
|
+
}
|
1176
|
+
}
|
1177
|
+
if ('>' != buf_next_non_white(buf)) {
|
1178
|
+
return 0;
|
1179
|
+
}
|
1180
|
+
buf_checkback(buf, &cp);
|
1181
|
+
return 1;
|
1182
|
+
}
|
1183
|
+
|
1184
|
+
static char
|
1185
|
+
read_jump(SaxDrive dr, const char *pat) {
|
1186
|
+
VALUE args[1];
|
1187
|
+
char c;
|
1188
|
+
int pos = dr->buf.pos;
|
1189
|
+
int line = dr->buf.line;
|
1190
|
+
int col = dr->buf.col - 1;
|
1191
|
+
Nv parent = stack_peek(&dr->stack);
|
1192
|
+
|
1193
|
+
buf_protect(&dr->buf);
|
1194
|
+
while (1) {
|
1195
|
+
c = buf_get(&dr->buf);
|
1196
|
+
switch(c) {
|
1197
|
+
case '<':
|
1198
|
+
if (read_jump_term(&dr->buf, pat)) {
|
1199
|
+
goto END_OF_BUF;
|
1200
|
+
break;
|
1201
|
+
}
|
1202
|
+
break;
|
1203
|
+
case '\0':
|
1204
|
+
ox_sax_drive_error(dr, NO_TERM "not terminated");
|
1205
|
+
goto END_OF_BUF;
|
1206
|
+
break;
|
1207
|
+
default:
|
1208
|
+
break;
|
1209
|
+
}
|
1210
|
+
}
|
1211
|
+
END_OF_BUF:
|
1212
|
+
if ('\0' != c) {
|
1213
|
+
*(dr->buf.tail - 1) = '\0';
|
1214
|
+
}
|
1215
|
+
if (0 != parent) {
|
1216
|
+
parent->childCnt++;
|
1217
|
+
}
|
1218
|
+
if (dr->has.text) {
|
1219
|
+
args[0] = rb_str_new2(dr->buf.str);
|
1220
|
+
#if HAS_ENCODING_SUPPORT
|
1221
|
+
if (0 != dr->encoding) {
|
1222
|
+
rb_enc_associate(args[0], dr->encoding);
|
1223
|
+
}
|
1224
|
+
#elif HAS_PRIVATE_ENCODING
|
1225
|
+
if (Qnil != dr->encoding) {
|
1226
|
+
rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
|
1227
|
+
}
|
1228
|
+
#endif
|
1229
|
+
if (dr->has.pos) {
|
1230
|
+
rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
|
1231
|
+
}
|
1232
|
+
if (dr->has.line) {
|
1233
|
+
rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
|
1234
|
+
}
|
1235
|
+
if (dr->has.column) {
|
1236
|
+
rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
|
1237
|
+
}
|
1238
|
+
rb_funcall2(dr->handler, ox_text_id, 1, args);
|
1239
|
+
}
|
1240
|
+
dr->buf.str = 0;
|
1241
|
+
if ('\0' != c) {
|
1242
|
+
*(dr->buf.tail - 1) = '<';
|
1243
|
+
}
|
1244
|
+
return c;
|
1245
|
+
}
|
1246
|
+
|
1092
1247
|
static char
|
1093
1248
|
read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req) {
|
1094
1249
|
VALUE name = Qnil;
|
1095
1250
|
int is_encoding = 0;
|
1251
|
+
int pos;
|
1096
1252
|
int line;
|
1097
1253
|
int col;
|
1098
1254
|
char *attr_value;
|
@@ -1108,8 +1264,9 @@ read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req)
|
|
1108
1264
|
ox_sax_drive_error(dr, NO_TERM "attributes not terminated");
|
1109
1265
|
return '\0';
|
1110
1266
|
}
|
1267
|
+
pos = dr->buf.pos + 1;
|
1111
1268
|
line = dr->buf.line;
|
1112
|
-
col = dr->buf.col;
|
1269
|
+
col = dr->buf.col + 1;
|
1113
1270
|
if ('\0' == (c = read_name_token(dr))) {
|
1114
1271
|
ox_sax_drive_error(dr, NO_TERM "error reading token");
|
1115
1272
|
return '\0';
|
@@ -1132,8 +1289,9 @@ read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req)
|
|
1132
1289
|
attr_value = (char*)"";
|
1133
1290
|
}
|
1134
1291
|
} else {
|
1292
|
+
pos = dr->buf.pos + 1;
|
1135
1293
|
line = dr->buf.line;
|
1136
|
-
col = dr->buf.col;
|
1294
|
+
col = dr->buf.col + 1;
|
1137
1295
|
c = read_quoted_value(dr);
|
1138
1296
|
attr_value = dr->buf.str;
|
1139
1297
|
if (is_encoding) {
|
@@ -1150,6 +1308,9 @@ read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req)
|
|
1150
1308
|
if (dr->has.attr_value) {
|
1151
1309
|
VALUE args[2];
|
1152
1310
|
|
1311
|
+
if (dr->has.pos) {
|
1312
|
+
rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
|
1313
|
+
}
|
1153
1314
|
if (dr->has.line) {
|
1154
1315
|
rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
|
1155
1316
|
}
|
@@ -1163,7 +1324,7 @@ read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req)
|
|
1163
1324
|
VALUE args[2];
|
1164
1325
|
|
1165
1326
|
args[0] = name;
|
1166
|
-
ox_sax_collapse_special(dr, dr->buf.str, line, col);
|
1327
|
+
ox_sax_collapse_special(dr, dr->buf.str, pos, line, col);
|
1167
1328
|
args[1] = rb_str_new2(attr_value);
|
1168
1329
|
#if HAS_ENCODING_SUPPORT
|
1169
1330
|
if (0 != dr->encoding) {
|
@@ -1174,6 +1335,9 @@ read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req)
|
|
1174
1335
|
rb_funcall(args[1], ox_force_encoding_id, 1, dr->encoding);
|
1175
1336
|
}
|
1176
1337
|
#endif
|
1338
|
+
if (dr->has.pos) {
|
1339
|
+
rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
|
1340
|
+
}
|
1177
1341
|
if (dr->has.line) {
|
1178
1342
|
rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
|
1179
1343
|
}
|
@@ -1319,7 +1483,7 @@ read_10_uint64(char *b, uint64_t *up) {
|
|
1319
1483
|
}
|
1320
1484
|
|
1321
1485
|
int
|
1322
|
-
ox_sax_collapse_special(SaxDrive dr, char *str, int line, int col) {
|
1486
|
+
ox_sax_collapse_special(SaxDrive dr, char *str, int pos, int line, int col) {
|
1323
1487
|
char *s = str;
|
1324
1488
|
char *b = str;
|
1325
1489
|
|
@@ -1407,7 +1571,7 @@ ox_sax_collapse_special(SaxDrive dr, char *str, int line, int col) {
|
|
1407
1571
|
c = '\'';
|
1408
1572
|
s += 5;
|
1409
1573
|
} else {
|
1410
|
-
ox_sax_drive_error_at(dr, NO_TERM "special character does not end with a semicolon", line, col);
|
1574
|
+
ox_sax_drive_error_at(dr, NO_TERM "special character does not end with a semicolon", pos, line, col);
|
1411
1575
|
c = '&';
|
1412
1576
|
}
|
1413
1577
|
*b++ = (char)c;
|
@@ -1435,7 +1599,7 @@ hint_clear_empty(SaxDrive dr) {
|
|
1435
1599
|
break;
|
1436
1600
|
}
|
1437
1601
|
if (nv->hint->empty) {
|
1438
|
-
end_element_cb(dr, nv->val, dr->buf.line, dr->buf.col);
|
1602
|
+
end_element_cb(dr, nv->val, dr->buf.pos, dr->buf.line, dr->buf.col);
|
1439
1603
|
stack_pop(&dr->stack);
|
1440
1604
|
} else {
|
1441
1605
|
break;
|
@@ -1460,7 +1624,7 @@ hint_try_close(SaxDrive dr, const char *name) {
|
|
1460
1624
|
break;
|
1461
1625
|
}
|
1462
1626
|
if (nv->hint->empty) {
|
1463
|
-
end_element_cb(dr, nv->val, dr->buf.line, dr->buf.col);
|
1627
|
+
end_element_cb(dr, nv->val, dr->buf.pos, dr->buf.line, dr->buf.col);
|
1464
1628
|
dr->stack.tail = nv;
|
1465
1629
|
} else {
|
1466
1630
|
break;
|
@@ -1470,8 +1634,11 @@ hint_try_close(SaxDrive dr, const char *name) {
|
|
1470
1634
|
}
|
1471
1635
|
|
1472
1636
|
static void
|
1473
|
-
end_element_cb(SaxDrive dr, VALUE name, int line, int col) {
|
1637
|
+
end_element_cb(SaxDrive dr, VALUE name, int pos, int line, int col) {
|
1474
1638
|
if (dr->has.end_element) {
|
1639
|
+
if (dr->has.pos) {
|
1640
|
+
rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
|
1641
|
+
}
|
1475
1642
|
if (dr->has.line) {
|
1476
1643
|
rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
|
1477
1644
|
}
|
data/ext/ox/sax.h
CHANGED
@@ -41,7 +41,7 @@ extern void ox_collapse_return(char *str);
|
|
41
41
|
extern void ox_sax_parse(VALUE handler, VALUE io, SaxOptions options);
|
42
42
|
extern void ox_sax_drive_cleanup(SaxDrive dr);
|
43
43
|
extern void ox_sax_drive_error(SaxDrive dr, const char *msg);
|
44
|
-
extern int ox_sax_collapse_special(SaxDrive dr, char *str, int line, int col);
|
44
|
+
extern int ox_sax_collapse_special(SaxDrive dr, char *str, int pos, int line, int col);
|
45
45
|
|
46
46
|
extern VALUE ox_sax_value_class;
|
47
47
|
|
data/ext/ox/sax_as.c
CHANGED
@@ -119,7 +119,7 @@ sax_value_as_s(VALUE self) {
|
|
119
119
|
return Qnil;
|
120
120
|
}
|
121
121
|
if (dr->options.convert_special) {
|
122
|
-
ox_sax_collapse_special(dr, dr->buf.str, dr->buf.line, dr->buf.col);
|
122
|
+
ox_sax_collapse_special(dr, dr->buf.str, dr->buf.pos, dr->buf.line, dr->buf.col);
|
123
123
|
}
|
124
124
|
switch (dr->options.skip) {
|
125
125
|
case CrSkip:
|
data/ext/ox/sax_buf.c
CHANGED
data/ext/ox/sax_buf.h
CHANGED
@@ -14,8 +14,10 @@ typedef struct _Buf {
|
|
14
14
|
char *read_end; /* one past last character read */
|
15
15
|
char *pro; /* protection start, buffer can not slide past this point */
|
16
16
|
char *str; /* start of current string being read */
|
17
|
+
int pos;
|
17
18
|
int line;
|
18
19
|
int col;
|
20
|
+
int pro_pos;
|
19
21
|
int pro_line;
|
20
22
|
int pro_col;
|
21
23
|
int (*read_func)(struct _Buf *buf);
|
@@ -29,12 +31,13 @@ typedef struct _Buf {
|
|
29
31
|
|
30
32
|
typedef struct _CheckPt {
|
31
33
|
int pro_dif;
|
34
|
+
int pos;
|
32
35
|
int line;
|
33
36
|
int col;
|
34
37
|
char c;
|
35
38
|
} *CheckPt;
|
36
39
|
|
37
|
-
#define CHECK_PT_INIT { -1, 0, 0, '\0' }
|
40
|
+
#define CHECK_PT_INIT { -1, 0, 0, 0, '\0' }
|
38
41
|
|
39
42
|
extern void ox_sax_buf_init(Buf buf, VALUE io);
|
40
43
|
extern int ox_sax_buf_read(Buf buf);
|
@@ -50,9 +53,11 @@ buf_get(Buf buf) {
|
|
50
53
|
if ('\n' == *buf->tail) {
|
51
54
|
buf->line++;
|
52
55
|
buf->col = 0;
|
56
|
+
} else {
|
57
|
+
buf->col++;
|
53
58
|
}
|
54
|
-
buf->
|
55
|
-
|
59
|
+
buf->pos++;
|
60
|
+
|
56
61
|
return *buf->tail++;
|
57
62
|
}
|
58
63
|
|
@@ -60,6 +65,7 @@ static inline void
|
|
60
65
|
buf_backup(Buf buf) {
|
61
66
|
buf->tail--;
|
62
67
|
buf->col--;
|
68
|
+
buf->pos--;
|
63
69
|
if (0 >= buf->col) {
|
64
70
|
buf->line--;
|
65
71
|
// allow col to be negative since we never backup twice in a row
|
@@ -70,6 +76,7 @@ static inline void
|
|
70
76
|
buf_protect(Buf buf) {
|
71
77
|
buf->pro = buf->tail;
|
72
78
|
buf->str = buf->tail; // can't have str before pro
|
79
|
+
buf->pro_pos = buf->pos;
|
73
80
|
buf->pro_line = buf->line;
|
74
81
|
buf->pro_col = buf->col;
|
75
82
|
}
|
@@ -77,6 +84,7 @@ buf_protect(Buf buf) {
|
|
77
84
|
static inline void
|
78
85
|
buf_reset(Buf buf) {
|
79
86
|
buf->tail = buf->pro;
|
87
|
+
buf->pos = buf->pro_pos;
|
80
88
|
buf->line = buf->pro_line;
|
81
89
|
buf->col = buf->pro_col;
|
82
90
|
}
|
@@ -152,6 +160,7 @@ is_white(char c) {
|
|
152
160
|
static inline void
|
153
161
|
buf_checkpoint(Buf buf, CheckPt cp) {
|
154
162
|
cp->pro_dif = (int)(buf->tail - buf->pro);
|
163
|
+
cp->pos = buf->pos;
|
155
164
|
cp->line = buf->line;
|
156
165
|
cp->col = buf->col;
|
157
166
|
cp->c = *(buf->tail - 1);
|
@@ -165,6 +174,7 @@ buf_checkset(CheckPt cp) {
|
|
165
174
|
static inline char
|
166
175
|
buf_checkback(Buf buf, CheckPt cp) {
|
167
176
|
buf->tail = buf->pro + cp->pro_dif;
|
177
|
+
buf->pos = cp->pos;
|
168
178
|
buf->line = cp->line;
|
169
179
|
buf->col = cp->col;
|
170
180
|
return cp->c;
|
data/ext/ox/sax_has.h
CHANGED
@@ -20,6 +20,7 @@ typedef struct _Has {
|
|
20
20
|
int start_element;
|
21
21
|
int end_element;
|
22
22
|
int error;
|
23
|
+
int pos;
|
23
24
|
int line;
|
24
25
|
int column;
|
25
26
|
} *Has;
|
@@ -44,6 +45,7 @@ has_init(Has has, VALUE handler) {
|
|
44
45
|
has->start_element = respond_to(handler, ox_start_element_id);
|
45
46
|
has->end_element = respond_to(handler, ox_end_element_id);
|
46
47
|
has->error = respond_to(handler, ox_error_id);
|
48
|
+
has->pos = (Qtrue == rb_ivar_defined(handler, ox_at_pos_id));
|
47
49
|
has->line = (Qtrue == rb_ivar_defined(handler, ox_at_line_id));
|
48
50
|
has->column = (Qtrue == rb_ivar_defined(handler, ox_at_column_id));
|
49
51
|
}
|
data/ext/ox/sax_hint.c
CHANGED
@@ -26,127 +26,127 @@ static const char *table_0[] = { "table", 0 };
|
|
26
26
|
static const char *tr_0[] = { "tr", 0 };
|
27
27
|
|
28
28
|
static struct _Hint html_hint_array[] = {
|
29
|
-
{ "a", 0, 0, 0 },
|
30
|
-
{ "abbr", 0, 0, 0 },
|
31
|
-
{ "acronym", 0, 0, 0 },
|
32
|
-
{ "address", 0, 0, 0 },
|
33
|
-
{ "applet", 0, 0, 0 },
|
34
|
-
{ "area", 1, 0, map_0 },
|
35
|
-
{ "article", 0, 0, 0 },
|
36
|
-
{ "aside", 0, 0, 0 },
|
37
|
-
{ "audio", 0, 0, 0 },
|
38
|
-
{ "b", 0, 0, 0 },
|
39
|
-
{ "base", 1, 0, head_0 },
|
40
|
-
{ "basefont", 1, 0, head_0 },
|
41
|
-
{ "bdi", 0, 0, 0 },
|
42
|
-
{ "bdo", 0, 1, 0 },
|
43
|
-
{ "big", 0, 0, 0 },
|
44
|
-
{ "blockquote", 0, 0, 0 },
|
45
|
-
{ "body", 0, 0, html_0 },
|
46
|
-
{ "br", 1, 0, 0 },
|
47
|
-
{ "button", 0, 0, 0 },
|
48
|
-
{ "canvas", 0, 0, 0 },
|
49
|
-
{ "caption", 0, 0, table_0 },
|
50
|
-
{ "center", 0, 0, 0 },
|
51
|
-
{ "cite", 0, 0, 0 },
|
52
|
-
{ "code", 0, 0, 0 },
|
53
|
-
{ "col", 1, 0, colgroup_0 },
|
54
|
-
{ "colgroup", 0, 0, 0 },
|
55
|
-
{ "command", 1, 0, 0 },
|
56
|
-
{ "datalist", 0, 0, 0 },
|
57
|
-
{ "dd", 0, 0, dl_0 },
|
58
|
-
{ "del", 0, 0, 0 },
|
59
|
-
{ "details", 0, 0, 0 },
|
60
|
-
{ "dfn", 0, 0, 0 },
|
61
|
-
{ "dialog", 0, 0, dt_th_0 },
|
62
|
-
{ "dir", 0, 0, 0 },
|
63
|
-
{ "div", 0, 1, 0 },
|
64
|
-
{ "dl", 0, 0, 0 },
|
65
|
-
{ "dt", 0, 1, dl_0 },
|
66
|
-
{ "em", 0, 0, 0 },
|
67
|
-
{ "embed", 1, 0, 0 },
|
68
|
-
{ "fieldset", 0, 0, 0 },
|
69
|
-
{ "figcaption", 0, 0, figure_0 },
|
70
|
-
{ "figure", 0, 0, 0 },
|
71
|
-
{ "font", 0, 1, 0 },
|
72
|
-
{ "footer", 0, 0, 0 },
|
73
|
-
{ "form", 0, 0, 0 },
|
74
|
-
{ "frame", 1, 0, frameset_0 },
|
75
|
-
{ "frameset", 0, 0, 0 },
|
76
|
-
{ "h1", 0, 0, 0 },
|
77
|
-
{ "h2", 0, 0, 0 },
|
78
|
-
{ "h3", 0, 0, 0 },
|
79
|
-
{ "h4", 0, 0, 0 },
|
80
|
-
{ "h5", 0, 0, 0 },
|
81
|
-
{ "h6", 0, 0, 0 },
|
82
|
-
{ "head", 0, 0, html_0 },
|
83
|
-
{ "header", 0, 0, 0 },
|
84
|
-
{ "hgroup", 0, 0, 0 },
|
85
|
-
{ "hr", 1, 0, 0 },
|
86
|
-
{ "html", 0, 0, 0 },
|
87
|
-
{ "i", 0, 0, 0 },
|
88
|
-
{ "iframe", 1, 0, 0 },
|
89
|
-
{ "img", 1, 0, 0 },
|
90
|
-
{ "input", 1, 0, 0 }, // somewhere under a form_0
|
91
|
-
{ "ins", 0, 0, 0 },
|
92
|
-
{ "kbd", 0, 0, 0 },
|
93
|
-
{ "keygen", 1, 0, 0 },
|
94
|
-
{ "label", 0, 0, 0 }, // somewhere under a form_0
|
95
|
-
{ "legend", 0, 0, fieldset_0 },
|
96
|
-
{ "li", 0, 0, ol_ul_menu_0 },
|
97
|
-
{ "link", 1, 0, head_0 },
|
98
|
-
{ "map", 0, 0, 0 },
|
99
|
-
{ "mark", 0, 0, 0 },
|
100
|
-
{ "menu", 0, 0, 0 },
|
101
|
-
{ "meta", 1, 0, head_0 },
|
102
|
-
{ "meter", 0, 0, 0 },
|
103
|
-
{ "nav", 0, 0, 0 },
|
104
|
-
{ "noframes", 0, 0, 0 },
|
105
|
-
{ "noscript", 0, 0, 0 },
|
106
|
-
{ "object", 0, 0, 0 },
|
107
|
-
{ "ol", 0, 1, 0 },
|
108
|
-
{ "optgroup", 0, 0, 0 },
|
109
|
-
{ "option", 0, 0, optgroup_select_datalist_0 },
|
110
|
-
{ "output", 0, 0, 0 },
|
111
|
-
{ "p", 0, 0, 0 },
|
112
|
-
{ "param", 1, 0, 0 },
|
113
|
-
{ "pre", 0, 0, 0 },
|
114
|
-
{ "progress", 0, 0, 0 },
|
115
|
-
{ "q", 0, 0, 0 },
|
116
|
-
{ "rp", 0, 0, ruby_0 },
|
117
|
-
{ "rt", 0, 0, ruby_0 },
|
118
|
-
{ "ruby", 0, 0, 0 },
|
119
|
-
{ "s", 0, 0, 0 },
|
120
|
-
{ "samp", 0, 0, 0 },
|
121
|
-
{ "script", 0, 0, 0 },
|
122
|
-
{ "section", 0, 1, 0 },
|
123
|
-
{ "select", 0, 0, 0 },
|
124
|
-
{ "small", 0, 0, 0 },
|
125
|
-
{ "source", 0, 0, audio_video_0 },
|
126
|
-
{ "span", 0, 1, 0 },
|
127
|
-
{ "strike", 0, 0, 0 },
|
128
|
-
{ "strong", 0, 0, 0 },
|
129
|
-
{ "style", 0, 0, 0 },
|
130
|
-
{ "sub", 0, 0, 0 },
|
131
|
-
{ "summary", 0, 0, details_0 },
|
132
|
-
{ "sup", 0, 0, 0 },
|
133
|
-
{ "table", 0, 0, 0 },
|
134
|
-
{ "tbody", 0, 0, table_0 },
|
135
|
-
{ "td", 0, 0, tr_0 },
|
136
|
-
{ "textarea", 0, 0, 0 },
|
137
|
-
{ "tfoot", 0, 0, table_0 },
|
138
|
-
{ "th", 0, 0, tr_0 },
|
139
|
-
{ "thead", 0, 0, table_0 },
|
140
|
-
{ "time", 0, 0, 0 },
|
141
|
-
{ "title", 0, 0, head_0 },
|
142
|
-
{ "tr", 0, 0, table_0 },
|
143
|
-
{ "track", 1, 0, audio_video_0 },
|
144
|
-
{ "tt", 0, 0, 0 },
|
145
|
-
{ "u", 0, 0, 0 },
|
146
|
-
{ "ul", 0, 0, 0 },
|
147
|
-
{ "var", 0, 0, 0 },
|
148
|
-
{ "video", 0, 0, 0 },
|
149
|
-
{ "wbr", 1, 0, 0 },
|
29
|
+
{ "a", 0, 0, 0, 0 },
|
30
|
+
{ "abbr", 0, 0, 0, 0 },
|
31
|
+
{ "acronym", 0, 0, 0, 0 },
|
32
|
+
{ "address", 0, 0, 0, 0 },
|
33
|
+
{ "applet", 0, 0, 0, 0 },
|
34
|
+
{ "area", 1, 0, 0, map_0 },
|
35
|
+
{ "article", 0, 0, 0, 0 },
|
36
|
+
{ "aside", 0, 0, 0, 0 },
|
37
|
+
{ "audio", 0, 0, 0, 0 },
|
38
|
+
{ "b", 0, 0, 0, 0 },
|
39
|
+
{ "base", 1, 0, 0, head_0 },
|
40
|
+
{ "basefont", 1, 0, 0, head_0 },
|
41
|
+
{ "bdi", 0, 0, 0, 0 },
|
42
|
+
{ "bdo", 0, 1, 0, 0 },
|
43
|
+
{ "big", 0, 0, 0, 0 },
|
44
|
+
{ "blockquote", 0, 0, 0, 0 },
|
45
|
+
{ "body", 0, 0, 0, html_0 },
|
46
|
+
{ "br", 1, 0, 0, 0 },
|
47
|
+
{ "button", 0, 0, 0, 0 },
|
48
|
+
{ "canvas", 0, 0, 0, 0 },
|
49
|
+
{ "caption", 0, 0, 0, table_0 },
|
50
|
+
{ "center", 0, 0, 0, 0 },
|
51
|
+
{ "cite", 0, 0, 0, 0 },
|
52
|
+
{ "code", 0, 0, 0, 0 },
|
53
|
+
{ "col", 1, 0, 0, colgroup_0 },
|
54
|
+
{ "colgroup", 0, 0, 0, 0 },
|
55
|
+
{ "command", 1, 0, 0, 0 },
|
56
|
+
{ "datalist", 0, 0, 0, 0 },
|
57
|
+
{ "dd", 0, 0, 0, dl_0 },
|
58
|
+
{ "del", 0, 0, 0, 0 },
|
59
|
+
{ "details", 0, 0, 0, 0 },
|
60
|
+
{ "dfn", 0, 0, 0, 0 },
|
61
|
+
{ "dialog", 0, 0, 0, dt_th_0 },
|
62
|
+
{ "dir", 0, 0, 0, 0 },
|
63
|
+
{ "div", 0, 1, 0, 0 },
|
64
|
+
{ "dl", 0, 0, 0, 0 },
|
65
|
+
{ "dt", 0, 1, 0, dl_0 },
|
66
|
+
{ "em", 0, 0, 0, 0 },
|
67
|
+
{ "embed", 1, 0, 0, 0 },
|
68
|
+
{ "fieldset", 0, 0, 0, 0 },
|
69
|
+
{ "figcaption", 0, 0, 0, figure_0 },
|
70
|
+
{ "figure", 0, 0, 0, 0 },
|
71
|
+
{ "font", 0, 1, 0, 0 },
|
72
|
+
{ "footer", 0, 0, 0, 0 },
|
73
|
+
{ "form", 0, 0, 0, 0 },
|
74
|
+
{ "frame", 1, 0, 0, frameset_0 },
|
75
|
+
{ "frameset", 0, 0, 0, 0 },
|
76
|
+
{ "h1", 0, 0, 0, 0 },
|
77
|
+
{ "h2", 0, 0, 0, 0 },
|
78
|
+
{ "h3", 0, 0, 0, 0 },
|
79
|
+
{ "h4", 0, 0, 0, 0 },
|
80
|
+
{ "h5", 0, 0, 0, 0 },
|
81
|
+
{ "h6", 0, 0, 0, 0 },
|
82
|
+
{ "head", 0, 0, 0, html_0 },
|
83
|
+
{ "header", 0, 0, 0, 0 },
|
84
|
+
{ "hgroup", 0, 0, 0, 0 },
|
85
|
+
{ "hr", 1, 0, 0, 0 },
|
86
|
+
{ "html", 0, 0, 0, 0 },
|
87
|
+
{ "i", 0, 0, 0, 0 },
|
88
|
+
{ "iframe", 1, 0, 0, 0 },
|
89
|
+
{ "img", 1, 0, 0, 0 },
|
90
|
+
{ "input", 1, 0, 0, 0 }, // somewhere under a form_0
|
91
|
+
{ "ins", 0, 0, 0, 0 },
|
92
|
+
{ "kbd", 0, 0, 0, 0 },
|
93
|
+
{ "keygen", 1, 0, 0, 0 },
|
94
|
+
{ "label", 0, 0, 0, 0 }, // somewhere under a form_0
|
95
|
+
{ "legend", 0, 0, 0, fieldset_0 },
|
96
|
+
{ "li", 0, 0, 0, ol_ul_menu_0 },
|
97
|
+
{ "link", 1, 0, 0, head_0 },
|
98
|
+
{ "map", 0, 0, 0, 0 },
|
99
|
+
{ "mark", 0, 0, 0, 0 },
|
100
|
+
{ "menu", 0, 0, 0, 0 },
|
101
|
+
{ "meta", 1, 0, 0, head_0 },
|
102
|
+
{ "meter", 0, 0, 0, 0 },
|
103
|
+
{ "nav", 0, 0, 0, 0 },
|
104
|
+
{ "noframes", 0, 0, 0, 0 },
|
105
|
+
{ "noscript", 0, 0, 0, 0 },
|
106
|
+
{ "object", 0, 0, 0, 0 },
|
107
|
+
{ "ol", 0, 1, 0, 0 },
|
108
|
+
{ "optgroup", 0, 0, 0, 0 },
|
109
|
+
{ "option", 0, 0, 0, optgroup_select_datalist_0 },
|
110
|
+
{ "output", 0, 0, 0, 0 },
|
111
|
+
{ "p", 0, 0, 0, 0 },
|
112
|
+
{ "param", 1, 0, 0, 0 },
|
113
|
+
{ "pre", 0, 0, 0, 0 },
|
114
|
+
{ "progress", 0, 0, 0, 0 },
|
115
|
+
{ "q", 0, 0, 0, 0 },
|
116
|
+
{ "rp", 0, 0, 0, ruby_0 },
|
117
|
+
{ "rt", 0, 0, 0, ruby_0 },
|
118
|
+
{ "ruby", 0, 0, 0, 0 },
|
119
|
+
{ "s", 0, 0, 0, 0 },
|
120
|
+
{ "samp", 0, 0, 0, 0 },
|
121
|
+
{ "script", 0, 0, 1, 0 },
|
122
|
+
{ "section", 0, 1, 0, 0 },
|
123
|
+
{ "select", 0, 0, 0, 0 },
|
124
|
+
{ "small", 0, 0, 0, 0 },
|
125
|
+
{ "source", 0, 0, 0, audio_video_0 },
|
126
|
+
{ "span", 0, 1, 0, 0 },
|
127
|
+
{ "strike", 0, 0, 0, 0 },
|
128
|
+
{ "strong", 0, 0, 0, 0 },
|
129
|
+
{ "style", 0, 0, 0, 0 },
|
130
|
+
{ "sub", 0, 0, 0, 0 },
|
131
|
+
{ "summary", 0, 0, 0, details_0 },
|
132
|
+
{ "sup", 0, 0, 0, 0 },
|
133
|
+
{ "table", 0, 0, 0, 0 },
|
134
|
+
{ "tbody", 0, 0, 0, table_0 },
|
135
|
+
{ "td", 0, 0, 0, tr_0 },
|
136
|
+
{ "textarea", 0, 0, 0, 0 },
|
137
|
+
{ "tfoot", 0, 0, 0, table_0 },
|
138
|
+
{ "th", 0, 0, 0, tr_0 },
|
139
|
+
{ "thead", 0, 0, 0, table_0 },
|
140
|
+
{ "time", 0, 0, 0, 0 },
|
141
|
+
{ "title", 0, 0, 0, head_0 },
|
142
|
+
{ "tr", 0, 0, 0, table_0 },
|
143
|
+
{ "track", 1, 0, 0, audio_video_0 },
|
144
|
+
{ "tt", 0, 0, 0, 0 },
|
145
|
+
{ "u", 0, 0, 0, 0 },
|
146
|
+
{ "ul", 0, 0, 0, 0 },
|
147
|
+
{ "var", 0, 0, 0, 0 },
|
148
|
+
{ "video", 0, 0, 0, 0 },
|
149
|
+
{ "wbr", 1, 0, 0, 0 },
|
150
150
|
};
|
151
151
|
static struct _Hints html_hints = {
|
152
152
|
"HTML",
|
data/ext/ox/sax_hint.h
CHANGED
data/lib/ox/sax.rb
CHANGED
@@ -48,10 +48,12 @@ module Ox
|
|
48
48
|
# Initializing @line in the initializer will cause that variable to be updated
|
49
49
|
# before each callback with the XML line number. The same is true for the
|
50
50
|
# @column but it will be updated with the column in the XML file that is the
|
51
|
-
# start of the element or node just read.
|
51
|
+
# start of the element or node just read. @pos if defined will hold the number
|
52
|
+
# of bytes from the start of the document.
|
52
53
|
class Sax
|
53
54
|
# Create a new instance of the Sax handler class.
|
54
55
|
def initialize()
|
56
|
+
#@pos = nil
|
55
57
|
#@line = nil
|
56
58
|
#@column = nil
|
57
59
|
end
|
data/lib/ox/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ox
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.2.
|
4
|
+
version: 2.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Peter Ohler
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-07-30 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: "A fast XML parser and object serializer that uses only standard C lib.\n
|
14
14
|
\ \nOptimized XML (Ox), as the name implies was written to provide speed
|