ox 2.2.0 → 2.2.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of ox might be problematic. Click here for more details.

checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a72bd41c796380d832d6fe08b7b7e77aeca0bbe4
4
- data.tar.gz: 5aa7c7ef218a7a97cda25bd1586e133f168e91af
3
+ metadata.gz: bec620893f1af26eaf853fec54ac033da9516689
4
+ data.tar.gz: 26fd103eb9d31e42ce45bf5371b51740d91d7b21
5
5
  SHA512:
6
- metadata.gz: 715a10def5ec9ab769de09a7ac0ed6e7cbd56b48cb75a8e5f347a3143cfb3128df8224bf2a7b3d7d598fd297ef294f3120a5de3ad153e4a175d970005bb5f9a6
7
- data.tar.gz: d6edb43235036f091a8150b998a5ce4115c19317203cc853d60f3e34f77b3971896c4bd87fae16b74ce9e3ce6f950664eec7fb42232bd88c12329e93f887bbc0
6
+ metadata.gz: 64abf65cab8bd13621831043039a8e4e8ccf7e1bf839bf061a443fb5e9cd7e5d7d6a1334cc36ab5caa9bb71034c03ddcd0f40b0f1d9630920a73be8828230b7c
7
+ data.tar.gz: 78332e2f9ef6892242bb8ed4a84a4353a122e8980f660575c719520a83aca5be0d8c6f376b1dd9628b168c7ddc347403c59afd783a175d7fbed6f76be407a2b5
data/README.md CHANGED
@@ -34,25 +34,19 @@ A fast XML parser and Object marshaller as a Ruby gem.
34
34
 
35
35
  ## Release Notes
36
36
 
37
- ### Current Release 2.2.0
37
+ ### Current Release 2.2.1
38
38
 
39
- - Added the SAX convert_special option to the default options.
40
-
41
- - Added the SAX smart option to the default options.
42
-
43
- - Other SAX options are now taken from the defaults if not specified.
39
+ - Added support to handle script elements in html.
44
40
 
45
- ### Release 2.1.8
41
+ - Added support for position from start for the sax parser.
46
42
 
47
- - Fixed a bug that caused all input to be read before parsing with the sax
48
- parser and an IO.pipe.
43
+ ### Release 2.2.0
49
44
 
50
- ### Release 2.1.7
45
+ - Added the SAX convert_special option to the default options.
51
46
 
52
- - Empty elements such as <foo></foo> are now called back with empty text.
47
+ - Added the SAX smart option to the default options.
53
48
 
54
- - Fixed GC problem that occurs with the new GC in Ruby 2.2 that garbage
55
- collects Symbols.
49
+ - Other SAX options are now taken from the defaults if not specified.
56
50
 
57
51
  ## Description
58
52
 
@@ -29,6 +29,7 @@ ID ox_at_column_id;
29
29
  ID ox_at_content_id;
30
30
  ID ox_at_id;
31
31
  ID ox_at_line_id;
32
+ ID ox_at_pos_id;
32
33
  ID ox_at_value_id;
33
34
  ID ox_attr_id;
34
35
  ID ox_attr_value_id;
@@ -910,6 +911,7 @@ void Init_ox() {
910
911
  ox_at_content_id = rb_intern("@content");
911
912
  ox_at_id = rb_intern("at");
912
913
  ox_at_line_id = rb_intern("@line");
914
+ ox_at_pos_id = rb_intern("@pos");
913
915
  ox_at_value_id = rb_intern("@value");
914
916
  ox_attr_id = rb_intern("attr");
915
917
  ox_attr_value_id = rb_intern("attr_value");
@@ -172,6 +172,7 @@ extern ID ox_at_column_id;
172
172
  extern ID ox_at_content_id;
173
173
  extern ID ox_at_id;
174
174
  extern ID ox_at_line_id;
175
+ extern ID ox_at_pos_id;
175
176
  extern ID ox_at_value_id;
176
177
  extern ID ox_attr_id;
177
178
  extern ID ox_attr_value_id;
@@ -49,11 +49,12 @@ static char read_comment(SaxDrive dr);
49
49
  static char read_element_start(SaxDrive dr);
50
50
  static char read_element_end(SaxDrive dr);
51
51
  static char read_text(SaxDrive dr);
52
+ static char read_jump(SaxDrive dr, const char *pat);
52
53
  static char read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req);
53
54
  static char read_name_token(SaxDrive dr);
54
55
  static char read_quoted_value(SaxDrive dr);
55
56
 
56
- static void end_element_cb(SaxDrive dr, VALUE name, int line, int col);
57
+ static void end_element_cb(SaxDrive dr, VALUE name, int pos, int line, int col);
57
58
 
58
59
  static void hint_clear_empty(SaxDrive dr);
59
60
  static Nv hint_try_close(SaxDrive dr, const char *name);
@@ -155,6 +156,7 @@ ox_sax_parse(VALUE handler, VALUE io, SaxOptions options) {
155
156
  printf(" has_start_element = %s\n", dr.has.start_element ? "true" : "false");
156
157
  printf(" has_end_element = %s\n", dr.has.end_element ? "true" : "false");
157
158
  printf(" has_error = %s\n", dr.has.error ? "true" : "false");
159
+ printf(" has_pos = %s\n", dr.has.pos ? "true" : "false");
158
160
  printf(" has_line = %s\n", dr.has.line ? "true" : "false");
159
161
  printf(" has_column = %s\n", dr.has.column ? "true" : "false");
160
162
  #endif
@@ -217,13 +219,19 @@ ox_sax_drive_cleanup(SaxDrive dr) {
217
219
  }
218
220
 
219
221
  static void
220
- ox_sax_drive_error_at(SaxDrive dr, const char *msg, int line, int col) {
222
+ ox_sax_drive_error_at(SaxDrive dr, const char *msg, int pos, int line, int col) {
221
223
  if (dr->has.error) {
222
224
  VALUE args[3];
223
225
 
224
226
  args[0] = rb_str_new2(msg);
225
227
  args[1] = LONG2NUM(line);
226
228
  args[2] = LONG2NUM(col);
229
+ if (dr->has.pos) {
230
+ rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
231
+ }
232
+ if (dr->has.pos) {
233
+ rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
234
+ }
227
235
  if (dr->has.line) {
228
236
  rb_ivar_set(dr->handler, ox_at_line_id, args[1]);
229
237
  }
@@ -236,7 +244,7 @@ ox_sax_drive_error_at(SaxDrive dr, const char *msg, int line, int col) {
236
244
 
237
245
  void
238
246
  ox_sax_drive_error(SaxDrive dr, const char *msg) {
239
- ox_sax_drive_error_at(dr, msg, dr->buf.line, dr->buf.col);
247
+ ox_sax_drive_error_at(dr, msg, dr->buf.pos, dr->buf.line, dr->buf.col);
240
248
  }
241
249
 
242
250
  static char
@@ -292,8 +300,9 @@ parse(SaxDrive dr) {
292
300
  } else {
293
301
  int i;
294
302
  int spaced = 0;
303
+ int pos = dr->buf.pos + 1;
295
304
  int line = dr->buf.line;
296
- int col = dr->buf.col;
305
+ int col = dr->buf.col + 1;
297
306
 
298
307
  if (is_white(c)) {
299
308
  spaced = 1;
@@ -305,7 +314,7 @@ parse(SaxDrive dr) {
305
314
  }
306
315
  if (0 == strncmp("DOCTYPE", dr->buf.str, 7)) {
307
316
  if (spaced) {
308
- ox_sax_drive_error_at(dr, WRONG_CHAR "<!DOCTYPE can not included spaces", line, col);
317
+ ox_sax_drive_error_at(dr, WRONG_CHAR "<!DOCTYPE can not included spaces", pos, line, col);
309
318
  }
310
319
  if (START_STATE != state) {
311
320
  ox_sax_drive_error(dr, OUT_OF_ORDER "DOCTYPE can not come after an element");
@@ -319,7 +328,7 @@ parse(SaxDrive dr) {
319
328
  c = read_doctype(dr);
320
329
  } else if (0 == strncmp("[CDATA[", dr->buf.str, 7)) {
321
330
  if (spaced) {
322
- ox_sax_drive_error_at(dr, WRONG_CHAR "<![CDATA[ can not included spaces", line, col);
331
+ ox_sax_drive_error_at(dr, WRONG_CHAR "<![CDATA[ can not included spaces", pos, line, col);
323
332
  }
324
333
  c = read_cdata(dr);
325
334
  } else if (0 == strncasecmp("[CDATA[", dr->buf.str, 7)) {
@@ -331,7 +340,7 @@ parse(SaxDrive dr) {
331
340
  if (0 != parent) {
332
341
  parent->childCnt++;
333
342
  }
334
- ox_sax_drive_error_at(dr, WRONG_CHAR "DOCTYPE, CDATA, or comment expected", line, col);
343
+ ox_sax_drive_error_at(dr, WRONG_CHAR "DOCTYPE, CDATA, or comment expected", pos, line, col);
335
344
  c = read_name_token(dr);
336
345
  if ('>' == c) {
337
346
  c = buf_get(&dr->buf);
@@ -341,8 +350,9 @@ parse(SaxDrive dr) {
341
350
  break;
342
351
  case '/': /* element end */
343
352
  parent = stack_peek(&dr->stack);
344
- if (0 != parent && 0 == parent->childCnt) {
353
+ if (0 != parent && 0 == parent->childCnt && dr->has.text) {
345
354
  VALUE args[1];
355
+ int pos = dr->buf.pos;
346
356
  int line = dr->buf.line;
347
357
  int col = dr->buf.col - 1;
348
358
 
@@ -356,6 +366,9 @@ parse(SaxDrive dr) {
356
366
  rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
357
367
  }
358
368
  #endif
369
+ if (dr->has.pos) {
370
+ rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
371
+ }
359
372
  if (dr->has.line) {
360
373
  rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
361
374
  }
@@ -393,6 +406,9 @@ parse(SaxDrive dr) {
393
406
  char msg[256];
394
407
  Nv sp;
395
408
 
409
+ if (dr->has.pos) {
410
+ rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(dr->buf.pos));
411
+ }
396
412
  if (dr->has.line) {
397
413
  rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(dr->buf.line));
398
414
  }
@@ -401,7 +417,7 @@ parse(SaxDrive dr) {
401
417
  }
402
418
  for (sp = dr->stack.tail - 1; dr->stack.head <= sp; sp--) {
403
419
  snprintf(msg, sizeof(msg) - 1, "%selement '%s' not closed", EL_MISMATCH, sp->name);
404
- ox_sax_drive_error_at(dr, msg, dr->buf.line, dr->buf.col);
420
+ ox_sax_drive_error_at(dr, msg, dr->buf.pos, dr->buf.line, dr->buf.col);
405
421
  if (dr->has.end_element) {
406
422
  VALUE args[1];
407
423
 
@@ -448,6 +464,7 @@ read_instruction(SaxDrive dr) {
448
464
  char *cend;
449
465
  VALUE target = Qnil;
450
466
  int is_xml;
467
+ int pos = dr->buf.pos - 1;
451
468
  int line = dr->buf.line;
452
469
  int col = dr->buf.col - 1;
453
470
 
@@ -462,6 +479,9 @@ read_instruction(SaxDrive dr) {
462
479
  if (dr->has.instruct) {
463
480
  VALUE args[1];
464
481
 
482
+ if (dr->has.pos) {
483
+ rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
484
+ }
465
485
  if (dr->has.line) {
466
486
  rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
467
487
  }
@@ -472,6 +492,7 @@ read_instruction(SaxDrive dr) {
472
492
  rb_funcall2(dr->handler, ox_instruct_id, 1, args);
473
493
  }
474
494
  buf_protect(&dr->buf);
495
+ pos = dr->buf.pos;
475
496
  line = dr->buf.line;
476
497
  col = dr->buf.col;
477
498
  read_content(dr, content, sizeof(content) - 1);
@@ -487,7 +508,7 @@ read_instruction(SaxDrive dr) {
487
508
  VALUE args[1];
488
509
 
489
510
  if (dr->options.convert_special) {
490
- ox_sax_collapse_special(dr, content, line, col);
511
+ ox_sax_collapse_special(dr, content, pos, line, col);
491
512
  }
492
513
  args[0] = rb_str_new2(content);
493
514
  #if HAS_ENCODING_SUPPORT
@@ -502,6 +523,9 @@ read_instruction(SaxDrive dr) {
502
523
  if (dr->has.line) {
503
524
  rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
504
525
  }
526
+ if (dr->has.pos) {
527
+ rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
528
+ }
505
529
  if (dr->has.column) {
506
530
  rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
507
531
  }
@@ -510,13 +534,14 @@ read_instruction(SaxDrive dr) {
510
534
  dr->buf.tail = cend;
511
535
  c = buf_get(&dr->buf);
512
536
  } else {
537
+ pos = dr->buf.pos;
513
538
  line = dr->buf.line;
514
539
  col = dr->buf.col;
515
540
  c = buf_next_non_white(&dr->buf);
516
541
  if ('>' == c) {
517
542
  c = buf_get(&dr->buf);
518
543
  } else {
519
- ox_sax_drive_error_at(dr, NO_TERM "instruction not terminated", line, col);
544
+ ox_sax_drive_error_at(dr, NO_TERM "instruction not terminated", pos, line, col);
520
545
  if ('>' == c) {
521
546
  c = buf_get(&dr->buf);
522
547
  }
@@ -525,6 +550,9 @@ read_instruction(SaxDrive dr) {
525
550
  if (dr->has.end_instruct) {
526
551
  VALUE args[1];
527
552
 
553
+ if (dr->has.pos) {
554
+ rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
555
+ }
528
556
  if (dr->has.line) {
529
557
  rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
530
558
  }
@@ -580,16 +608,17 @@ read_delimited(SaxDrive dr, char end) {
580
608
  return c;
581
609
  }
582
610
 
583
- /* Entered after the "<!DOCTYPE" sequence. Ready to read the rest.
611
+ /* Entered after the "<!DOCTYPE " sequence. Ready to read the rest.
584
612
  */
585
613
  static char
586
614
  read_doctype(SaxDrive dr) {
615
+ int pos = dr->buf.pos - 9;
587
616
  int line = dr->buf.line;
588
- int col = dr->buf.col - 10;
617
+ int col = dr->buf.col - 9;
589
618
  char *s;
590
619
  Nv parent = stack_peek(&dr->stack);
591
620
 
592
- buf_backup(&dr->buf); /* back up to the start in case the cdata is empty */
621
+ buf_backup(&dr->buf); /* back up to the start in case the doctype is empty */
593
622
  buf_protect(&dr->buf);
594
623
  read_delimited(dr, '>');
595
624
  if (dr->options.smart && 0 == dr->hints) {
@@ -605,6 +634,9 @@ read_doctype(SaxDrive dr) {
605
634
  if (dr->has.doctype) {
606
635
  VALUE args[1];
607
636
 
637
+ if (dr->has.pos) {
638
+ rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
639
+ }
608
640
  if (dr->has.line) {
609
641
  rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
610
642
  }
@@ -626,8 +658,9 @@ read_cdata(SaxDrive dr) {
626
658
  char c;
627
659
  char zero = '\0';
628
660
  int end = 0;
661
+ int pos = dr->buf.pos - 9;
629
662
  int line = dr->buf.line;
630
- int col = dr->buf.col - 10;
663
+ int col = dr->buf.col - 9;
631
664
  struct _CheckPt cp = CHECK_PT_INIT;
632
665
  Nv parent = stack_peek(&dr->stack);
633
666
 
@@ -691,6 +724,9 @@ read_cdata(SaxDrive dr) {
691
724
  rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
692
725
  }
693
726
  #endif
727
+ if (dr->has.pos) {
728
+ rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
729
+ }
694
730
  if (dr->has.line) {
695
731
  rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
696
732
  }
@@ -714,6 +750,7 @@ read_comment(SaxDrive dr) {
714
750
  char c;
715
751
  char zero = '\0';
716
752
  int end = 0;
753
+ int pos = dr->buf.pos - 4;
717
754
  int line = dr->buf.line;
718
755
  int col = dr->buf.col - 4;
719
756
  struct _CheckPt cp = CHECK_PT_INIT;
@@ -775,6 +812,9 @@ read_comment(SaxDrive dr) {
775
812
  rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
776
813
  }
777
814
  #endif
815
+ if (dr->has.pos) {
816
+ rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
817
+ }
778
818
  if (dr->has.line) {
779
819
  rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
780
820
  }
@@ -800,13 +840,13 @@ read_element_start(SaxDrive dr) {
800
840
  volatile VALUE name = Qnil;
801
841
  char c;
802
842
  int closed;
843
+ int pos = dr->buf.pos;
803
844
  int line = dr->buf.line;
804
- int col = dr->buf.col - 1;
845
+ int col = dr->buf.col;
805
846
  Hint h = 0;
806
847
  int stackless = 0;
807
848
  Nv parent = stack_peek(&dr->stack);
808
849
 
809
-
810
850
  if ('\0' == (c = read_name_token(dr))) {
811
851
  return '\0';
812
852
  }
@@ -838,7 +878,7 @@ read_element_start(SaxDrive dr) {
838
878
  INV_ELEMENT, dr->buf.str, dr->hints->name);
839
879
  ox_sax_drive_error(dr, msg);
840
880
  stack_pop(&dr->stack);
841
- end_element_cb(dr, top_nv->val, line, col);
881
+ end_element_cb(dr, top_nv->val, pos, line, col);
842
882
  top_nv = stack_peek(&dr->stack);
843
883
  }
844
884
  if (0 != h->parents) {
@@ -864,6 +904,9 @@ read_element_start(SaxDrive dr) {
864
904
  if (dr->has.start_element) {
865
905
  VALUE args[1];
866
906
 
907
+ if (dr->has.pos) {
908
+ rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
909
+ }
867
910
  if (dr->has.line) {
868
911
  rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
869
912
  }
@@ -890,11 +933,20 @@ read_element_start(SaxDrive dr) {
890
933
  }
891
934
  if (closed) {
892
935
  c = buf_next_non_white(&dr->buf);
936
+ pos = dr->buf.pos;
893
937
  line = dr->buf.line;
894
- col = dr->buf.col - 1;
895
- end_element_cb(dr, name, line, col);
938
+ col = dr->buf.col;
939
+ end_element_cb(dr, name, pos, line, col);
896
940
  } else if (stackless) {
897
- end_element_cb(dr, name, line, col);
941
+ end_element_cb(dr, name, pos, line, col);
942
+ } else if (0 != h && h->jump) {
943
+ stack_push(&dr->stack, ename, name, h);
944
+ if ('>' != c) {
945
+ ox_sax_drive_error(dr, WRONG_CHAR "element not closed");
946
+ return c;
947
+ }
948
+ read_jump(dr, h->name);
949
+ return '<';
898
950
  } else {
899
951
  stack_push(&dr->stack, ename, name, h);
900
952
  }
@@ -923,13 +975,17 @@ static char
923
975
  read_element_end(SaxDrive dr) {
924
976
  VALUE name = Qnil;
925
977
  char c;
978
+ int pos = dr->buf.pos - 1;
926
979
  int line = dr->buf.line;
927
- int col = dr->buf.col - 2;
980
+ int col = dr->buf.col - 1;
928
981
  Nv nv;
929
982
 
930
983
  if ('\0' == (c = read_name_token(dr))) {
931
984
  return '\0';
932
985
  }
986
+ if (is_white(c)) {
987
+ c = buf_next_non_white(&dr->buf);
988
+ }
933
989
  // c should be > and current is one past so read another char
934
990
  c = buf_get(&dr->buf);
935
991
  nv = stack_peek(&dr->stack);
@@ -949,15 +1005,18 @@ read_element_end(SaxDrive dr) {
949
1005
  // Just close normally
950
1006
  name = str2sym(dr, dr->buf.str, 0);
951
1007
  snprintf(msg, sizeof(msg) - 1, "%selement '%s' should not have a separate close element", EL_MISMATCH, dr->buf.str);
952
- ox_sax_drive_error_at(dr, msg, line, col);
1008
+ ox_sax_drive_error_at(dr, msg, pos, line, col);
953
1009
  return c;
954
1010
  } else {
955
1011
  snprintf(msg, sizeof(msg) - 1, "%selement '%s' closed but not opened", EL_MISMATCH, dr->buf.str);
956
- ox_sax_drive_error_at(dr, msg, line, col);
1012
+ ox_sax_drive_error_at(dr, msg, pos, line, col);
957
1013
  name = str2sym(dr, dr->buf.str, 0);
958
1014
  if (dr->has.start_element) {
959
1015
  VALUE args[1];
960
1016
 
1017
+ if (dr->has.pos) {
1018
+ rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
1019
+ }
961
1020
  if (dr->has.line) {
962
1021
  rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
963
1022
  }
@@ -976,7 +1035,10 @@ read_element_end(SaxDrive dr) {
976
1035
  name = n2->val;
977
1036
  } else {
978
1037
  snprintf(msg, sizeof(msg) - 1, "%selement '%s' close does not match '%s' open", EL_MISMATCH, dr->buf.str, nv->name);
979
- ox_sax_drive_error_at(dr, msg, line, col);
1038
+ ox_sax_drive_error_at(dr, msg, pos, line, col);
1039
+ if (dr->has.pos) {
1040
+ rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
1041
+ }
980
1042
  if (dr->has.line) {
981
1043
  rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
982
1044
  }
@@ -992,7 +1054,7 @@ read_element_end(SaxDrive dr) {
992
1054
  }
993
1055
  }
994
1056
  }
995
- end_element_cb(dr, name, line, col);
1057
+ end_element_cb(dr, name, pos, line, col);
996
1058
 
997
1059
  return c;
998
1060
  }
@@ -1001,6 +1063,7 @@ static char
1001
1063
  read_text(SaxDrive dr) {
1002
1064
  VALUE args[1];
1003
1065
  char c;
1066
+ int pos = dr->buf.pos;
1004
1067
  int line = dr->buf.line;
1005
1068
  int col = dr->buf.col - 1;
1006
1069
  Nv parent = stack_peek(&dr->stack);
@@ -1044,6 +1107,9 @@ read_text(SaxDrive dr) {
1044
1107
  parent->childCnt++;
1045
1108
  }
1046
1109
  if (dr->has.value) {
1110
+ if (dr->has.pos) {
1111
+ rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
1112
+ }
1047
1113
  if (dr->has.line) {
1048
1114
  rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
1049
1115
  }
@@ -1054,7 +1120,7 @@ read_text(SaxDrive dr) {
1054
1120
  rb_funcall2(dr->handler, ox_value_id, 1, args);
1055
1121
  } else if (dr->has.text) {
1056
1122
  if (dr->options.convert_special) {
1057
- ox_sax_collapse_special(dr, dr->buf.str, line, col);
1123
+ ox_sax_collapse_special(dr, dr->buf.str, pos, line, col);
1058
1124
  }
1059
1125
  switch (dr->options.skip) {
1060
1126
  case CrSkip:
@@ -1076,6 +1142,9 @@ read_text(SaxDrive dr) {
1076
1142
  rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
1077
1143
  }
1078
1144
  #endif
1145
+ if (dr->has.pos) {
1146
+ rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
1147
+ }
1079
1148
  if (dr->has.line) {
1080
1149
  rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
1081
1150
  }
@@ -1089,10 +1158,97 @@ read_text(SaxDrive dr) {
1089
1158
  return c;
1090
1159
  }
1091
1160
 
1161
+ static int
1162
+ read_jump_term(Buf buf, const char *pat) {
1163
+ struct _CheckPt cp;
1164
+
1165
+ buf_checkpoint(buf, &cp); // right after <
1166
+ if ('/' != buf_next_non_white(buf)) {
1167
+ return 0;
1168
+ }
1169
+ if (*pat != buf_next_non_white(buf)) {
1170
+ return 0;
1171
+ }
1172
+ for (pat++; '\0' != *pat; pat++) {
1173
+ if (*pat != buf_get(buf)) {
1174
+ return 0;
1175
+ }
1176
+ }
1177
+ if ('>' != buf_next_non_white(buf)) {
1178
+ return 0;
1179
+ }
1180
+ buf_checkback(buf, &cp);
1181
+ return 1;
1182
+ }
1183
+
1184
+ static char
1185
+ read_jump(SaxDrive dr, const char *pat) {
1186
+ VALUE args[1];
1187
+ char c;
1188
+ int pos = dr->buf.pos;
1189
+ int line = dr->buf.line;
1190
+ int col = dr->buf.col - 1;
1191
+ Nv parent = stack_peek(&dr->stack);
1192
+
1193
+ buf_protect(&dr->buf);
1194
+ while (1) {
1195
+ c = buf_get(&dr->buf);
1196
+ switch(c) {
1197
+ case '<':
1198
+ if (read_jump_term(&dr->buf, pat)) {
1199
+ goto END_OF_BUF;
1200
+ break;
1201
+ }
1202
+ break;
1203
+ case '\0':
1204
+ ox_sax_drive_error(dr, NO_TERM "not terminated");
1205
+ goto END_OF_BUF;
1206
+ break;
1207
+ default:
1208
+ break;
1209
+ }
1210
+ }
1211
+ END_OF_BUF:
1212
+ if ('\0' != c) {
1213
+ *(dr->buf.tail - 1) = '\0';
1214
+ }
1215
+ if (0 != parent) {
1216
+ parent->childCnt++;
1217
+ }
1218
+ if (dr->has.text) {
1219
+ args[0] = rb_str_new2(dr->buf.str);
1220
+ #if HAS_ENCODING_SUPPORT
1221
+ if (0 != dr->encoding) {
1222
+ rb_enc_associate(args[0], dr->encoding);
1223
+ }
1224
+ #elif HAS_PRIVATE_ENCODING
1225
+ if (Qnil != dr->encoding) {
1226
+ rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
1227
+ }
1228
+ #endif
1229
+ if (dr->has.pos) {
1230
+ rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
1231
+ }
1232
+ if (dr->has.line) {
1233
+ rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
1234
+ }
1235
+ if (dr->has.column) {
1236
+ rb_ivar_set(dr->handler, ox_at_column_id, LONG2NUM(col));
1237
+ }
1238
+ rb_funcall2(dr->handler, ox_text_id, 1, args);
1239
+ }
1240
+ dr->buf.str = 0;
1241
+ if ('\0' != c) {
1242
+ *(dr->buf.tail - 1) = '<';
1243
+ }
1244
+ return c;
1245
+ }
1246
+
1092
1247
  static char
1093
1248
  read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req) {
1094
1249
  VALUE name = Qnil;
1095
1250
  int is_encoding = 0;
1251
+ int pos;
1096
1252
  int line;
1097
1253
  int col;
1098
1254
  char *attr_value;
@@ -1108,8 +1264,9 @@ read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req)
1108
1264
  ox_sax_drive_error(dr, NO_TERM "attributes not terminated");
1109
1265
  return '\0';
1110
1266
  }
1267
+ pos = dr->buf.pos + 1;
1111
1268
  line = dr->buf.line;
1112
- col = dr->buf.col;
1269
+ col = dr->buf.col + 1;
1113
1270
  if ('\0' == (c = read_name_token(dr))) {
1114
1271
  ox_sax_drive_error(dr, NO_TERM "error reading token");
1115
1272
  return '\0';
@@ -1132,8 +1289,9 @@ read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req)
1132
1289
  attr_value = (char*)"";
1133
1290
  }
1134
1291
  } else {
1292
+ pos = dr->buf.pos + 1;
1135
1293
  line = dr->buf.line;
1136
- col = dr->buf.col;
1294
+ col = dr->buf.col + 1;
1137
1295
  c = read_quoted_value(dr);
1138
1296
  attr_value = dr->buf.str;
1139
1297
  if (is_encoding) {
@@ -1150,6 +1308,9 @@ read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req)
1150
1308
  if (dr->has.attr_value) {
1151
1309
  VALUE args[2];
1152
1310
 
1311
+ if (dr->has.pos) {
1312
+ rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
1313
+ }
1153
1314
  if (dr->has.line) {
1154
1315
  rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
1155
1316
  }
@@ -1163,7 +1324,7 @@ read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req)
1163
1324
  VALUE args[2];
1164
1325
 
1165
1326
  args[0] = name;
1166
- ox_sax_collapse_special(dr, dr->buf.str, line, col);
1327
+ ox_sax_collapse_special(dr, dr->buf.str, pos, line, col);
1167
1328
  args[1] = rb_str_new2(attr_value);
1168
1329
  #if HAS_ENCODING_SUPPORT
1169
1330
  if (0 != dr->encoding) {
@@ -1174,6 +1335,9 @@ read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req)
1174
1335
  rb_funcall(args[1], ox_force_encoding_id, 1, dr->encoding);
1175
1336
  }
1176
1337
  #endif
1338
+ if (dr->has.pos) {
1339
+ rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
1340
+ }
1177
1341
  if (dr->has.line) {
1178
1342
  rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
1179
1343
  }
@@ -1319,7 +1483,7 @@ read_10_uint64(char *b, uint64_t *up) {
1319
1483
  }
1320
1484
 
1321
1485
  int
1322
- ox_sax_collapse_special(SaxDrive dr, char *str, int line, int col) {
1486
+ ox_sax_collapse_special(SaxDrive dr, char *str, int pos, int line, int col) {
1323
1487
  char *s = str;
1324
1488
  char *b = str;
1325
1489
 
@@ -1407,7 +1571,7 @@ ox_sax_collapse_special(SaxDrive dr, char *str, int line, int col) {
1407
1571
  c = '\'';
1408
1572
  s += 5;
1409
1573
  } else {
1410
- ox_sax_drive_error_at(dr, NO_TERM "special character does not end with a semicolon", line, col);
1574
+ ox_sax_drive_error_at(dr, NO_TERM "special character does not end with a semicolon", pos, line, col);
1411
1575
  c = '&';
1412
1576
  }
1413
1577
  *b++ = (char)c;
@@ -1435,7 +1599,7 @@ hint_clear_empty(SaxDrive dr) {
1435
1599
  break;
1436
1600
  }
1437
1601
  if (nv->hint->empty) {
1438
- end_element_cb(dr, nv->val, dr->buf.line, dr->buf.col);
1602
+ end_element_cb(dr, nv->val, dr->buf.pos, dr->buf.line, dr->buf.col);
1439
1603
  stack_pop(&dr->stack);
1440
1604
  } else {
1441
1605
  break;
@@ -1460,7 +1624,7 @@ hint_try_close(SaxDrive dr, const char *name) {
1460
1624
  break;
1461
1625
  }
1462
1626
  if (nv->hint->empty) {
1463
- end_element_cb(dr, nv->val, dr->buf.line, dr->buf.col);
1627
+ end_element_cb(dr, nv->val, dr->buf.pos, dr->buf.line, dr->buf.col);
1464
1628
  dr->stack.tail = nv;
1465
1629
  } else {
1466
1630
  break;
@@ -1470,8 +1634,11 @@ hint_try_close(SaxDrive dr, const char *name) {
1470
1634
  }
1471
1635
 
1472
1636
  static void
1473
- end_element_cb(SaxDrive dr, VALUE name, int line, int col) {
1637
+ end_element_cb(SaxDrive dr, VALUE name, int pos, int line, int col) {
1474
1638
  if (dr->has.end_element) {
1639
+ if (dr->has.pos) {
1640
+ rb_ivar_set(dr->handler, ox_at_pos_id, LONG2NUM(pos));
1641
+ }
1475
1642
  if (dr->has.line) {
1476
1643
  rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
1477
1644
  }
@@ -41,7 +41,7 @@ extern void ox_collapse_return(char *str);
41
41
  extern void ox_sax_parse(VALUE handler, VALUE io, SaxOptions options);
42
42
  extern void ox_sax_drive_cleanup(SaxDrive dr);
43
43
  extern void ox_sax_drive_error(SaxDrive dr, const char *msg);
44
- extern int ox_sax_collapse_special(SaxDrive dr, char *str, int line, int col);
44
+ extern int ox_sax_collapse_special(SaxDrive dr, char *str, int pos, int line, int col);
45
45
 
46
46
  extern VALUE ox_sax_value_class;
47
47
 
@@ -119,7 +119,7 @@ sax_value_as_s(VALUE self) {
119
119
  return Qnil;
120
120
  }
121
121
  if (dr->options.convert_special) {
122
- ox_sax_collapse_special(dr, dr->buf.str, dr->buf.line, dr->buf.col);
122
+ ox_sax_collapse_special(dr, dr->buf.str, dr->buf.pos, dr->buf.line, dr->buf.col);
123
123
  }
124
124
  switch (dr->options.skip) {
125
125
  case CrSkip:
@@ -57,8 +57,10 @@ ox_sax_buf_init(Buf buf, VALUE io) {
57
57
  buf->read_end = buf->head;
58
58
  buf->pro = 0;
59
59
  buf->str = 0;
60
+ buf->pos = 0;
60
61
  buf->line = 1;
61
62
  buf->col = 0;
63
+ buf->pro_pos = 1;
62
64
  buf->pro_line = 1;
63
65
  buf->pro_col = 0;
64
66
  buf->dr = 0;
@@ -14,8 +14,10 @@ typedef struct _Buf {
14
14
  char *read_end; /* one past last character read */
15
15
  char *pro; /* protection start, buffer can not slide past this point */
16
16
  char *str; /* start of current string being read */
17
+ int pos;
17
18
  int line;
18
19
  int col;
20
+ int pro_pos;
19
21
  int pro_line;
20
22
  int pro_col;
21
23
  int (*read_func)(struct _Buf *buf);
@@ -29,12 +31,13 @@ typedef struct _Buf {
29
31
 
30
32
  typedef struct _CheckPt {
31
33
  int pro_dif;
34
+ int pos;
32
35
  int line;
33
36
  int col;
34
37
  char c;
35
38
  } *CheckPt;
36
39
 
37
- #define CHECK_PT_INIT { -1, 0, 0, '\0' }
40
+ #define CHECK_PT_INIT { -1, 0, 0, 0, '\0' }
38
41
 
39
42
  extern void ox_sax_buf_init(Buf buf, VALUE io);
40
43
  extern int ox_sax_buf_read(Buf buf);
@@ -50,9 +53,11 @@ buf_get(Buf buf) {
50
53
  if ('\n' == *buf->tail) {
51
54
  buf->line++;
52
55
  buf->col = 0;
56
+ } else {
57
+ buf->col++;
53
58
  }
54
- buf->col++;
55
-
59
+ buf->pos++;
60
+
56
61
  return *buf->tail++;
57
62
  }
58
63
 
@@ -60,6 +65,7 @@ static inline void
60
65
  buf_backup(Buf buf) {
61
66
  buf->tail--;
62
67
  buf->col--;
68
+ buf->pos--;
63
69
  if (0 >= buf->col) {
64
70
  buf->line--;
65
71
  // allow col to be negative since we never backup twice in a row
@@ -70,6 +76,7 @@ static inline void
70
76
  buf_protect(Buf buf) {
71
77
  buf->pro = buf->tail;
72
78
  buf->str = buf->tail; // can't have str before pro
79
+ buf->pro_pos = buf->pos;
73
80
  buf->pro_line = buf->line;
74
81
  buf->pro_col = buf->col;
75
82
  }
@@ -77,6 +84,7 @@ buf_protect(Buf buf) {
77
84
  static inline void
78
85
  buf_reset(Buf buf) {
79
86
  buf->tail = buf->pro;
87
+ buf->pos = buf->pro_pos;
80
88
  buf->line = buf->pro_line;
81
89
  buf->col = buf->pro_col;
82
90
  }
@@ -152,6 +160,7 @@ is_white(char c) {
152
160
  static inline void
153
161
  buf_checkpoint(Buf buf, CheckPt cp) {
154
162
  cp->pro_dif = (int)(buf->tail - buf->pro);
163
+ cp->pos = buf->pos;
155
164
  cp->line = buf->line;
156
165
  cp->col = buf->col;
157
166
  cp->c = *(buf->tail - 1);
@@ -165,6 +174,7 @@ buf_checkset(CheckPt cp) {
165
174
  static inline char
166
175
  buf_checkback(Buf buf, CheckPt cp) {
167
176
  buf->tail = buf->pro + cp->pro_dif;
177
+ buf->pos = cp->pos;
168
178
  buf->line = cp->line;
169
179
  buf->col = cp->col;
170
180
  return cp->c;
@@ -20,6 +20,7 @@ typedef struct _Has {
20
20
  int start_element;
21
21
  int end_element;
22
22
  int error;
23
+ int pos;
23
24
  int line;
24
25
  int column;
25
26
  } *Has;
@@ -44,6 +45,7 @@ has_init(Has has, VALUE handler) {
44
45
  has->start_element = respond_to(handler, ox_start_element_id);
45
46
  has->end_element = respond_to(handler, ox_end_element_id);
46
47
  has->error = respond_to(handler, ox_error_id);
48
+ has->pos = (Qtrue == rb_ivar_defined(handler, ox_at_pos_id));
47
49
  has->line = (Qtrue == rb_ivar_defined(handler, ox_at_line_id));
48
50
  has->column = (Qtrue == rb_ivar_defined(handler, ox_at_column_id));
49
51
  }
@@ -26,127 +26,127 @@ static const char *table_0[] = { "table", 0 };
26
26
  static const char *tr_0[] = { "tr", 0 };
27
27
 
28
28
  static struct _Hint html_hint_array[] = {
29
- { "a", 0, 0, 0 },
30
- { "abbr", 0, 0, 0 },
31
- { "acronym", 0, 0, 0 },
32
- { "address", 0, 0, 0 },
33
- { "applet", 0, 0, 0 },
34
- { "area", 1, 0, map_0 },
35
- { "article", 0, 0, 0 },
36
- { "aside", 0, 0, 0 },
37
- { "audio", 0, 0, 0 },
38
- { "b", 0, 0, 0 },
39
- { "base", 1, 0, head_0 },
40
- { "basefont", 1, 0, head_0 },
41
- { "bdi", 0, 0, 0 },
42
- { "bdo", 0, 1, 0 },
43
- { "big", 0, 0, 0 },
44
- { "blockquote", 0, 0, 0 },
45
- { "body", 0, 0, html_0 },
46
- { "br", 1, 0, 0 },
47
- { "button", 0, 0, 0 },
48
- { "canvas", 0, 0, 0 },
49
- { "caption", 0, 0, table_0 },
50
- { "center", 0, 0, 0 },
51
- { "cite", 0, 0, 0 },
52
- { "code", 0, 0, 0 },
53
- { "col", 1, 0, colgroup_0 },
54
- { "colgroup", 0, 0, 0 },
55
- { "command", 1, 0, 0 },
56
- { "datalist", 0, 0, 0 },
57
- { "dd", 0, 0, dl_0 },
58
- { "del", 0, 0, 0 },
59
- { "details", 0, 0, 0 },
60
- { "dfn", 0, 0, 0 },
61
- { "dialog", 0, 0, dt_th_0 },
62
- { "dir", 0, 0, 0 },
63
- { "div", 0, 1, 0 },
64
- { "dl", 0, 0, 0 },
65
- { "dt", 0, 1, dl_0 },
66
- { "em", 0, 0, 0 },
67
- { "embed", 1, 0, 0 },
68
- { "fieldset", 0, 0, 0 },
69
- { "figcaption", 0, 0, figure_0 },
70
- { "figure", 0, 0, 0 },
71
- { "font", 0, 1, 0 },
72
- { "footer", 0, 0, 0 },
73
- { "form", 0, 0, 0 },
74
- { "frame", 1, 0, frameset_0 },
75
- { "frameset", 0, 0, 0 },
76
- { "h1", 0, 0, 0 },
77
- { "h2", 0, 0, 0 },
78
- { "h3", 0, 0, 0 },
79
- { "h4", 0, 0, 0 },
80
- { "h5", 0, 0, 0 },
81
- { "h6", 0, 0, 0 },
82
- { "head", 0, 0, html_0 },
83
- { "header", 0, 0, 0 },
84
- { "hgroup", 0, 0, 0 },
85
- { "hr", 1, 0, 0 },
86
- { "html", 0, 0, 0 },
87
- { "i", 0, 0, 0 },
88
- { "iframe", 1, 0, 0 },
89
- { "img", 1, 0, 0 },
90
- { "input", 1, 0, 0 }, // somewhere under a form_0
91
- { "ins", 0, 0, 0 },
92
- { "kbd", 0, 0, 0 },
93
- { "keygen", 1, 0, 0 },
94
- { "label", 0, 0, 0 }, // somewhere under a form_0
95
- { "legend", 0, 0, fieldset_0 },
96
- { "li", 0, 0, ol_ul_menu_0 },
97
- { "link", 1, 0, head_0 },
98
- { "map", 0, 0, 0 },
99
- { "mark", 0, 0, 0 },
100
- { "menu", 0, 0, 0 },
101
- { "meta", 1, 0, head_0 },
102
- { "meter", 0, 0, 0 },
103
- { "nav", 0, 0, 0 },
104
- { "noframes", 0, 0, 0 },
105
- { "noscript", 0, 0, 0 },
106
- { "object", 0, 0, 0 },
107
- { "ol", 0, 1, 0 },
108
- { "optgroup", 0, 0, 0 },
109
- { "option", 0, 0, optgroup_select_datalist_0 },
110
- { "output", 0, 0, 0 },
111
- { "p", 0, 0, 0 },
112
- { "param", 1, 0, 0 },
113
- { "pre", 0, 0, 0 },
114
- { "progress", 0, 0, 0 },
115
- { "q", 0, 0, 0 },
116
- { "rp", 0, 0, ruby_0 },
117
- { "rt", 0, 0, ruby_0 },
118
- { "ruby", 0, 0, 0 },
119
- { "s", 0, 0, 0 },
120
- { "samp", 0, 0, 0 },
121
- { "script", 0, 0, 0 },
122
- { "section", 0, 1, 0 },
123
- { "select", 0, 0, 0 },
124
- { "small", 0, 0, 0 },
125
- { "source", 0, 0, audio_video_0 },
126
- { "span", 0, 1, 0 },
127
- { "strike", 0, 0, 0 },
128
- { "strong", 0, 0, 0 },
129
- { "style", 0, 0, 0 },
130
- { "sub", 0, 0, 0 },
131
- { "summary", 0, 0, details_0 },
132
- { "sup", 0, 0, 0 },
133
- { "table", 0, 0, 0 },
134
- { "tbody", 0, 0, table_0 },
135
- { "td", 0, 0, tr_0 },
136
- { "textarea", 0, 0, 0 },
137
- { "tfoot", 0, 0, table_0 },
138
- { "th", 0, 0, tr_0 },
139
- { "thead", 0, 0, table_0 },
140
- { "time", 0, 0, 0 },
141
- { "title", 0, 0, head_0 },
142
- { "tr", 0, 0, table_0 },
143
- { "track", 1, 0, audio_video_0 },
144
- { "tt", 0, 0, 0 },
145
- { "u", 0, 0, 0 },
146
- { "ul", 0, 0, 0 },
147
- { "var", 0, 0, 0 },
148
- { "video", 0, 0, 0 },
149
- { "wbr", 1, 0, 0 },
29
+ { "a", 0, 0, 0, 0 },
30
+ { "abbr", 0, 0, 0, 0 },
31
+ { "acronym", 0, 0, 0, 0 },
32
+ { "address", 0, 0, 0, 0 },
33
+ { "applet", 0, 0, 0, 0 },
34
+ { "area", 1, 0, 0, map_0 },
35
+ { "article", 0, 0, 0, 0 },
36
+ { "aside", 0, 0, 0, 0 },
37
+ { "audio", 0, 0, 0, 0 },
38
+ { "b", 0, 0, 0, 0 },
39
+ { "base", 1, 0, 0, head_0 },
40
+ { "basefont", 1, 0, 0, head_0 },
41
+ { "bdi", 0, 0, 0, 0 },
42
+ { "bdo", 0, 1, 0, 0 },
43
+ { "big", 0, 0, 0, 0 },
44
+ { "blockquote", 0, 0, 0, 0 },
45
+ { "body", 0, 0, 0, html_0 },
46
+ { "br", 1, 0, 0, 0 },
47
+ { "button", 0, 0, 0, 0 },
48
+ { "canvas", 0, 0, 0, 0 },
49
+ { "caption", 0, 0, 0, table_0 },
50
+ { "center", 0, 0, 0, 0 },
51
+ { "cite", 0, 0, 0, 0 },
52
+ { "code", 0, 0, 0, 0 },
53
+ { "col", 1, 0, 0, colgroup_0 },
54
+ { "colgroup", 0, 0, 0, 0 },
55
+ { "command", 1, 0, 0, 0 },
56
+ { "datalist", 0, 0, 0, 0 },
57
+ { "dd", 0, 0, 0, dl_0 },
58
+ { "del", 0, 0, 0, 0 },
59
+ { "details", 0, 0, 0, 0 },
60
+ { "dfn", 0, 0, 0, 0 },
61
+ { "dialog", 0, 0, 0, dt_th_0 },
62
+ { "dir", 0, 0, 0, 0 },
63
+ { "div", 0, 1, 0, 0 },
64
+ { "dl", 0, 0, 0, 0 },
65
+ { "dt", 0, 1, 0, dl_0 },
66
+ { "em", 0, 0, 0, 0 },
67
+ { "embed", 1, 0, 0, 0 },
68
+ { "fieldset", 0, 0, 0, 0 },
69
+ { "figcaption", 0, 0, 0, figure_0 },
70
+ { "figure", 0, 0, 0, 0 },
71
+ { "font", 0, 1, 0, 0 },
72
+ { "footer", 0, 0, 0, 0 },
73
+ { "form", 0, 0, 0, 0 },
74
+ { "frame", 1, 0, 0, frameset_0 },
75
+ { "frameset", 0, 0, 0, 0 },
76
+ { "h1", 0, 0, 0, 0 },
77
+ { "h2", 0, 0, 0, 0 },
78
+ { "h3", 0, 0, 0, 0 },
79
+ { "h4", 0, 0, 0, 0 },
80
+ { "h5", 0, 0, 0, 0 },
81
+ { "h6", 0, 0, 0, 0 },
82
+ { "head", 0, 0, 0, html_0 },
83
+ { "header", 0, 0, 0, 0 },
84
+ { "hgroup", 0, 0, 0, 0 },
85
+ { "hr", 1, 0, 0, 0 },
86
+ { "html", 0, 0, 0, 0 },
87
+ { "i", 0, 0, 0, 0 },
88
+ { "iframe", 1, 0, 0, 0 },
89
+ { "img", 1, 0, 0, 0 },
90
+ { "input", 1, 0, 0, 0 }, // somewhere under a form_0
91
+ { "ins", 0, 0, 0, 0 },
92
+ { "kbd", 0, 0, 0, 0 },
93
+ { "keygen", 1, 0, 0, 0 },
94
+ { "label", 0, 0, 0, 0 }, // somewhere under a form_0
95
+ { "legend", 0, 0, 0, fieldset_0 },
96
+ { "li", 0, 0, 0, ol_ul_menu_0 },
97
+ { "link", 1, 0, 0, head_0 },
98
+ { "map", 0, 0, 0, 0 },
99
+ { "mark", 0, 0, 0, 0 },
100
+ { "menu", 0, 0, 0, 0 },
101
+ { "meta", 1, 0, 0, head_0 },
102
+ { "meter", 0, 0, 0, 0 },
103
+ { "nav", 0, 0, 0, 0 },
104
+ { "noframes", 0, 0, 0, 0 },
105
+ { "noscript", 0, 0, 0, 0 },
106
+ { "object", 0, 0, 0, 0 },
107
+ { "ol", 0, 1, 0, 0 },
108
+ { "optgroup", 0, 0, 0, 0 },
109
+ { "option", 0, 0, 0, optgroup_select_datalist_0 },
110
+ { "output", 0, 0, 0, 0 },
111
+ { "p", 0, 0, 0, 0 },
112
+ { "param", 1, 0, 0, 0 },
113
+ { "pre", 0, 0, 0, 0 },
114
+ { "progress", 0, 0, 0, 0 },
115
+ { "q", 0, 0, 0, 0 },
116
+ { "rp", 0, 0, 0, ruby_0 },
117
+ { "rt", 0, 0, 0, ruby_0 },
118
+ { "ruby", 0, 0, 0, 0 },
119
+ { "s", 0, 0, 0, 0 },
120
+ { "samp", 0, 0, 0, 0 },
121
+ { "script", 0, 0, 1, 0 },
122
+ { "section", 0, 1, 0, 0 },
123
+ { "select", 0, 0, 0, 0 },
124
+ { "small", 0, 0, 0, 0 },
125
+ { "source", 0, 0, 0, audio_video_0 },
126
+ { "span", 0, 1, 0, 0 },
127
+ { "strike", 0, 0, 0, 0 },
128
+ { "strong", 0, 0, 0, 0 },
129
+ { "style", 0, 0, 0, 0 },
130
+ { "sub", 0, 0, 0, 0 },
131
+ { "summary", 0, 0, 0, details_0 },
132
+ { "sup", 0, 0, 0, 0 },
133
+ { "table", 0, 0, 0, 0 },
134
+ { "tbody", 0, 0, 0, table_0 },
135
+ { "td", 0, 0, 0, tr_0 },
136
+ { "textarea", 0, 0, 0, 0 },
137
+ { "tfoot", 0, 0, 0, table_0 },
138
+ { "th", 0, 0, 0, tr_0 },
139
+ { "thead", 0, 0, 0, table_0 },
140
+ { "time", 0, 0, 0, 0 },
141
+ { "title", 0, 0, 0, head_0 },
142
+ { "tr", 0, 0, 0, table_0 },
143
+ { "track", 1, 0, 0, audio_video_0 },
144
+ { "tt", 0, 0, 0, 0 },
145
+ { "u", 0, 0, 0, 0 },
146
+ { "ul", 0, 0, 0, 0 },
147
+ { "var", 0, 0, 0, 0 },
148
+ { "video", 0, 0, 0, 0 },
149
+ { "wbr", 1, 0, 0, 0 },
150
150
  };
151
151
  static struct _Hints html_hints = {
152
152
  "HTML",
@@ -10,6 +10,7 @@ typedef struct _Hint {
10
10
  const char *name;
11
11
  char empty; // must be closed or close auto it, not error
12
12
  char nest; // nesting allowed
13
+ char jump; // jump to end <script> ... </script>
13
14
  const char **parents;
14
15
  } *Hint;
15
16
 
@@ -48,10 +48,12 @@ module Ox
48
48
  # Initializing @line in the initializer will cause that variable to be updated
49
49
  # before each callback with the XML line number. The same is true for the
50
50
  # @column but it will be updated with the column in the XML file that is the
51
- # start of the element or node just read.
51
+ # start of the element or node just read. @pos if defined will hold the number
52
+ # of bytes from the start of the document.
52
53
  class Sax
53
54
  # Create a new instance of the Sax handler class.
54
55
  def initialize()
56
+ #@pos = nil
55
57
  #@line = nil
56
58
  #@column = nil
57
59
  end
@@ -1,5 +1,5 @@
1
1
 
2
2
  module Ox
3
3
  # Current version of the module.
4
- VERSION = '2.2.0'
4
+ VERSION = '2.2.1'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ox
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.0
4
+ version: 2.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Peter Ohler
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-04-20 00:00:00.000000000 Z
11
+ date: 2015-07-30 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: "A fast XML parser and object serializer that uses only standard C lib.\n
14
14
  \ \nOptimized XML (Ox), as the name implies was written to provide speed