ox 1.5.4 → 1.5.5

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of ox might be problematic. Click here for more details.

Files changed (61) hide show
  1. data/README.md +21 -3
  2. data/ext/ox/dump.c +64 -42
  3. data/ext/ox/extconf.rb +34 -2
  4. data/ext/ox/gen_load.c +13 -13
  5. data/ext/ox/obj_load.c +31 -28
  6. data/ext/ox/ox.c +41 -31
  7. data/ext/ox/ox.h +20 -13
  8. data/ext/ox/parse.c +1 -0
  9. data/ext/ox/sax.c +323 -26
  10. data/lib/ox.rb +1 -2
  11. data/lib/ox/element.rb +1 -1
  12. data/lib/ox/sax.rb +24 -11
  13. data/lib/ox/version.rb +1 -1
  14. metadata +4 -50
  15. data/test/Sample.graffle +0 -2318
  16. data/test/bench.rb +0 -53
  17. data/test/bug1.rb +0 -24
  18. data/test/bug2.rb +0 -38
  19. data/test/bug3.rb +0 -21
  20. data/test/cache16_test.rb +0 -17
  21. data/test/cache8_test.rb +0 -17
  22. data/test/cache_test.rb +0 -17
  23. data/test/files.rb +0 -29
  24. data/test/func.rb +0 -652
  25. data/test/gen_sample.rb +0 -22
  26. data/test/obj_sample.rb +0 -19
  27. data/test/ox/change.rb +0 -16
  28. data/test/ox/dir.rb +0 -21
  29. data/test/ox/doc.rb +0 -39
  30. data/test/ox/file.rb +0 -33
  31. data/test/ox/group.rb +0 -18
  32. data/test/ox/hasprops.rb +0 -18
  33. data/test/ox/layer.rb +0 -14
  34. data/test/ox/line.rb +0 -22
  35. data/test/ox/oval.rb +0 -12
  36. data/test/ox/rect.rb +0 -12
  37. data/test/ox/shape.rb +0 -37
  38. data/test/ox/text.rb +0 -23
  39. data/test/parse_cmp.rb +0 -261
  40. data/test/perf.rb +0 -91
  41. data/test/perf_gen.rb +0 -237
  42. data/test/perf_mars.rb +0 -114
  43. data/test/perf_obj.rb +0 -124
  44. data/test/perf_pod.rb +0 -88
  45. data/test/perf_sax.rb +0 -233
  46. data/test/perf_write.rb +0 -80
  47. data/test/sample.rb +0 -55
  48. data/test/sample/change.rb +0 -14
  49. data/test/sample/dir.rb +0 -19
  50. data/test/sample/doc.rb +0 -36
  51. data/test/sample/file.rb +0 -48
  52. data/test/sample/group.rb +0 -16
  53. data/test/sample/hasprops.rb +0 -16
  54. data/test/sample/layer.rb +0 -12
  55. data/test/sample/line.rb +0 -20
  56. data/test/sample/oval.rb +0 -10
  57. data/test/sample/rect.rb +0 -10
  58. data/test/sample/shape.rb +0 -35
  59. data/test/sample/text.rb +0 -20
  60. data/test/sax_test.rb +0 -468
  61. data/test/test.rb +0 -70
data/ext/ox/ox.c CHANGED
@@ -49,7 +49,9 @@ void Init_ox();
49
49
  VALUE Ox = Qnil;
50
50
 
51
51
  ID ox_at_id;
52
+ ID ox_at_value_id;
52
53
  ID ox_attr_id;
54
+ ID ox_attr_value_id;
53
55
  ID ox_attributes_id;
54
56
  ID ox_beg_id;
55
57
  ID ox_cdata_id;
@@ -74,11 +76,13 @@ ID ox_parse_id;
74
76
  ID ox_read_id;
75
77
  ID ox_readpartial_id;
76
78
  ID ox_start_element_id;
79
+ ID ox_string_id;
77
80
  ID ox_text_id;
78
81
  ID ox_to_c_id;
79
82
  ID ox_to_s_id;
80
83
  ID ox_to_sym_id;
81
84
  ID ox_tv_sec_id;
85
+ ID ox_tv_nsec_id;
82
86
  ID ox_tv_usec_id;
83
87
  ID ox_value_id;
84
88
 
@@ -96,6 +100,7 @@ VALUE ox_bag_clas;
96
100
  VALUE ox_struct_class;
97
101
  VALUE ox_time_class;
98
102
  VALUE ox_date_class;
103
+ VALUE ox_stringio_class;
99
104
 
100
105
  Cache ox_symbol_cache = 0;
101
106
  Cache ox_class_cache = 0;
@@ -476,7 +481,7 @@ load_file(int argc, VALUE *argv, VALUE self) {
476
481
  fseek(f, 0, SEEK_SET);
477
482
  if (len != fread(xml, 1, len, f)) {
478
483
  fclose(f);
479
- rb_raise(rb_eLoadError, "Failed to read %ld bytes from %s.\n", len, path);
484
+ rb_raise(rb_eLoadError, "Failed to read %ld bytes from %s.\n", (long)len, path);
480
485
  }
481
486
  fclose(f);
482
487
  xml[len] = '\0';
@@ -601,7 +606,7 @@ dump(int argc, VALUE *argv, VALUE self) {
601
606
  rb_raise(rb_eNoMemError, "Not enough memory.\n");
602
607
  }
603
608
  rstr = rb_str_new2(xml);
604
- #ifdef ENCODING_INLINE_MAX
609
+ #if HAS_ENCODING_SUPPORT
605
610
  if ('\0' != *copts.encoding) {
606
611
  rb_enc_associate(rstr, rb_enc_find(copts.encoding));
607
612
  }
@@ -654,10 +659,7 @@ cache8_test(VALUE self) {
654
659
  }
655
660
 
656
661
  void Init_ox() {
657
- VALUE keep = Qnil;
658
-
659
662
  Ox = rb_define_module("Ox");
660
- keep = rb_cv_get(Ox, "@@keep"); // needed to stop GC from deleting and reusing VALUEs
661
663
 
662
664
  rb_define_module_function(Ox, "default_options", get_def_opts, 0);
663
665
  rb_define_module_function(Ox, "default_options=", set_def_opts, 1);
@@ -674,9 +676,13 @@ void Init_ox() {
674
676
  rb_define_module_function(Ox, "to_file", to_file, -1);
675
677
 
676
678
  rb_require("time");
679
+ rb_require("date");
680
+ rb_require("stringio");
677
681
 
678
682
  ox_at_id = rb_intern("at");
683
+ ox_at_value_id = rb_intern("@value");
679
684
  ox_attr_id = rb_intern("attr");
685
+ ox_attr_value_id = rb_intern("attr_value");
680
686
  ox_attributes_id = rb_intern("@attributes");
681
687
  ox_beg_id = rb_intern("@beg");
682
688
  ox_cdata_id = rb_intern("cdata");
@@ -701,43 +707,45 @@ void Init_ox() {
701
707
  ox_readpartial_id = rb_intern("readpartial");
702
708
  ox_read_id = rb_intern("read");
703
709
  ox_start_element_id = rb_intern("start_element");
710
+ ox_string_id = rb_intern("string");
704
711
  ox_text_id = rb_intern("text");
712
+ ox_value_id = rb_intern("value");
705
713
  ox_to_c_id = rb_intern("to_c");
706
714
  ox_to_s_id = rb_intern("to_s");
707
715
  ox_to_sym_id = rb_intern("to_sym");
708
716
  ox_tv_sec_id = rb_intern("tv_sec");
717
+ ox_tv_nsec_id = rb_intern("tv_nsec");
709
718
  ox_tv_usec_id = rb_intern("tv_usec");
710
- ox_value_id = rb_intern("@value");
711
719
 
712
720
  ox_time_class = rb_const_get(rb_cObject, rb_intern("Time"));
713
721
  ox_date_class = rb_const_get(rb_cObject, rb_intern("Date"));
714
722
  ox_struct_class = rb_const_get(rb_cObject, rb_intern("Struct"));
723
+ ox_stringio_class = rb_const_get(rb_cObject, rb_intern("StringIO"));
724
+
725
+ ox_encoding_sym = ID2SYM(rb_intern("encoding")); rb_gc_register_address(&ox_encoding_sym);
726
+ indent_sym = ID2SYM(rb_intern("indent")); rb_gc_register_address(&indent_sym);
727
+ xsd_date_sym = ID2SYM(rb_intern("xsd_date")); rb_gc_register_address(&xsd_date_sym);
728
+ opt_format_sym = ID2SYM(rb_intern("opt_format")); rb_gc_register_address(&opt_format_sym);
729
+ mode_sym = ID2SYM(rb_intern("mode")); rb_gc_register_address(&mode_sym);
730
+ auto_sym = ID2SYM(rb_intern("auto")); rb_gc_register_address(&auto_sym);
731
+ optimized_sym = ID2SYM(rb_intern("optimized")); rb_gc_register_address(&optimized_sym);
732
+ object_sym = ID2SYM(rb_intern("object")); rb_gc_register_address(&object_sym);
733
+ circular_sym = ID2SYM(rb_intern("circular")); rb_gc_register_address(&circular_sym);
734
+ generic_sym = ID2SYM(rb_intern("generic")); rb_gc_register_address(&generic_sym);
735
+ limited_sym = ID2SYM(rb_intern("limited")); rb_gc_register_address(&limited_sym);
736
+ trace_sym = ID2SYM(rb_intern("trace")); rb_gc_register_address(&trace_sym);
737
+ effort_sym = ID2SYM(rb_intern("effort")); rb_gc_register_address(&effort_sym);
738
+ strict_sym = ID2SYM(rb_intern("strict")); rb_gc_register_address(&strict_sym);
739
+ tolerant_sym = ID2SYM(rb_intern("tolerant")); rb_gc_register_address(&tolerant_sym);
740
+ auto_define_sym = ID2SYM(rb_intern("auto_define")); rb_gc_register_address(&auto_define_sym);
741
+ with_dtd_sym = ID2SYM(rb_intern("with_dtd")); rb_gc_register_address(&with_dtd_sym);
742
+ with_instruct_sym = ID2SYM(rb_intern("with_instructions")); rb_gc_register_address(&with_instruct_sym);
743
+ with_xml_sym = ID2SYM(rb_intern("with_xml")); rb_gc_register_address(&with_xml_sym);
744
+ convert_special_sym = ID2SYM(rb_intern("convert_special")); rb_gc_register_address(&convert_special_sym);
745
+
746
+ ox_empty_string = rb_str_new2(""); rb_gc_register_address(&ox_empty_string);
747
+ ox_zero_fixnum = INT2NUM(0); rb_gc_register_address(&ox_zero_fixnum);
715
748
 
716
- ox_encoding_sym = ID2SYM(rb_intern("encoding")); rb_ary_push(keep, ox_encoding_sym);
717
- indent_sym = ID2SYM(rb_intern("indent")); rb_ary_push(keep, indent_sym);
718
- xsd_date_sym = ID2SYM(rb_intern("xsd_date")); rb_ary_push(keep, xsd_date_sym);
719
- opt_format_sym = ID2SYM(rb_intern("opt_format")); rb_ary_push(keep, opt_format_sym);
720
- mode_sym = ID2SYM(rb_intern("mode")); rb_ary_push(keep, mode_sym);
721
- auto_sym = ID2SYM(rb_intern("auto")); rb_ary_push(keep, auto_sym);
722
- optimized_sym = ID2SYM(rb_intern("optimized")); rb_ary_push(keep, optimized_sym);
723
- object_sym = ID2SYM(rb_intern("object")); rb_ary_push(keep, object_sym);
724
- circular_sym = ID2SYM(rb_intern("circular")); rb_ary_push(keep, circular_sym);
725
- generic_sym = ID2SYM(rb_intern("generic")); rb_ary_push(keep, generic_sym);
726
- limited_sym = ID2SYM(rb_intern("limited")); rb_ary_push(keep, limited_sym);
727
- trace_sym = ID2SYM(rb_intern("trace")); rb_ary_push(keep, trace_sym);
728
- effort_sym = ID2SYM(rb_intern("effort")); rb_ary_push(keep, effort_sym);
729
- strict_sym = ID2SYM(rb_intern("strict")); rb_ary_push(keep, strict_sym);
730
- tolerant_sym = ID2SYM(rb_intern("tolerant")); rb_ary_push(keep, tolerant_sym);
731
- auto_define_sym = ID2SYM(rb_intern("auto_define")); rb_ary_push(keep, auto_define_sym);
732
- with_dtd_sym = ID2SYM(rb_intern("with_dtd")); rb_ary_push(keep, with_dtd_sym);
733
- with_instruct_sym = ID2SYM(rb_intern("with_instructions")); rb_ary_push(keep, with_instruct_sym);
734
- with_xml_sym = ID2SYM(rb_intern("with_xml")); rb_ary_push(keep, with_xml_sym);
735
- convert_special_sym = ID2SYM(rb_intern("convert_special")); rb_ary_push(keep, convert_special_sym);
736
-
737
- ox_empty_string = rb_str_new2(""); rb_ary_push(keep, ox_empty_string);
738
- ox_zero_fixnum = INT2NUM(0); rb_ary_push(keep, ox_zero_fixnum);
739
-
740
- //rb_require("node"); // generic xml node classes
741
749
  ox_document_clas = rb_const_get_at(Ox, rb_intern("Document"));
742
750
  ox_element_clas = rb_const_get_at(Ox, rb_intern("Element"));
743
751
  ox_comment_clas = rb_const_get_at(Ox, rb_intern("Comment"));
@@ -749,6 +757,8 @@ void Init_ox() {
749
757
  ox_cache_new(&ox_class_cache);
750
758
  ox_cache_new(&ox_attr_cache);
751
759
 
760
+ ox_sax_define();
761
+
752
762
  rb_define_module_function(Ox, "cache_test", cache_test, 0);
753
763
  rb_define_module_function(Ox, "cache8_test", cache8_test, 0);
754
764
  }
data/ext/ox/ox.h CHANGED
@@ -38,26 +38,26 @@ extern "C" {
38
38
  #endif
39
39
  #endif
40
40
 
41
+ #define RSTRING_NOT_MODIFIED
42
+
41
43
  #include "ruby.h"
42
- #ifdef HAVE_RUBY_ENCODING_H
43
- // HAVE_RUBY_ENCODING_H defined for Ruby 1.9
44
+ #if HAS_ENCODING_SUPPORT
44
45
  #include "ruby/encoding.h"
45
46
  #endif
46
- #include "cache.h"
47
47
 
48
- #ifdef JRUBY
49
- #define NO_RSTRUCT 1
48
+ #ifdef RUBINIUS_RUBY
49
+ #undef T_COMPLEX
50
+ enum st_retval {ST_CONTINUE = 0, ST_STOP = 1, ST_DELETE = 2, ST_CHECK};
51
+ #else
52
+ #if HAS_TOP_LEVEL_ST_H
53
+ // Only on travis, local is where it is for all others. Seems to vary depending on the travis machine picked up.
54
+ #include "st.h"
55
+ #else
56
+ #include "ruby/st.h"
50
57
  #endif
51
-
52
- #if (defined RBX_Qnil && !defined RUBINIUS)
53
- #define RUBINIUS
54
58
  #endif
55
59
 
56
- #ifdef RUBINIUS
57
- #undef T_RATIONAL
58
- #undef T_COMPLEX
59
- #define NO_RSTRUCT 1
60
- #endif
60
+ #include "cache.h"
61
61
 
62
62
  #define raise_error(msg, xml, current) _ox_raise_error(msg, xml, current, __FILE__, __LINE__)
63
63
 
@@ -210,6 +210,8 @@ extern VALUE ox_parse(char *xml, ParseCallbacks pcb, char **endp, int trace, Eff
210
210
  extern void _ox_raise_error(const char *msg, const char *xml, const char *current, const char* file, int line);
211
211
 
212
212
  extern void ox_sax_parse(VALUE handler, VALUE io, int convert);
213
+ extern void ox_sax_define(void);
214
+
213
215
 
214
216
  extern char* ox_write_obj_to_str(VALUE obj, Options copts);
215
217
  extern void ox_write_obj_to_file(VALUE obj, const char *path, Options copts);
@@ -219,7 +221,9 @@ extern struct _Options ox_default_options;
219
221
  extern VALUE Ox;
220
222
 
221
223
  extern ID ox_at_id;
224
+ extern ID ox_at_value_id;
222
225
  extern ID ox_attr_id;
226
+ extern ID ox_attr_value_id;
223
227
  extern ID ox_attributes_id;
224
228
  extern ID ox_beg_id;
225
229
  extern ID ox_cdata_id;
@@ -244,11 +248,13 @@ extern ID ox_parse_id;
244
248
  extern ID ox_read_id;
245
249
  extern ID ox_readpartial_id;
246
250
  extern ID ox_start_element_id;
251
+ extern ID ox_string_id;
247
252
  extern ID ox_text_id;
248
253
  extern ID ox_to_c_id;
249
254
  extern ID ox_to_s_id;
250
255
  extern ID ox_to_sym_id;
251
256
  extern ID ox_tv_sec_id;
257
+ extern ID ox_tv_nsec_id;
252
258
  extern ID ox_tv_usec_id;
253
259
  extern ID ox_value_id;
254
260
 
@@ -257,6 +263,7 @@ extern VALUE ox_empty_string;
257
263
  extern VALUE ox_encoding_sym;
258
264
  extern VALUE ox_struct_class;
259
265
  extern VALUE ox_time_class;
266
+ extern VALUE ox_stringio_class;
260
267
  extern VALUE ox_zero_fixnum;
261
268
 
262
269
  extern VALUE ox_document_clas;
data/ext/ox/parse.c CHANGED
@@ -200,6 +200,7 @@ read_instruction(PInfo pi) {
200
200
  if (MAX_ATTRS <= (a - attrs)) {
201
201
  raise_error("too many attributes", pi->str, pi->s);
202
202
  }
203
+ next_non_white(pi);
203
204
  }
204
205
  if ('?' == *pi->s) {
205
206
  pi->s++;
data/ext/ox/sax.c CHANGED
@@ -31,10 +31,11 @@
31
31
  #include <stdlib.h>
32
32
  #include <errno.h>
33
33
  #include <stdio.h>
34
- #include <string.h>
34
+ #include <strings.h>
35
35
  #include <sys/types.h>
36
36
  #include <sys/uio.h>
37
37
  #include <unistd.h>
38
+ #include <time.h>
38
39
 
39
40
  #include "ruby.h"
40
41
  #include "ox.h"
@@ -50,22 +51,26 @@ typedef struct _SaxDrive {
50
51
  int line;
51
52
  int col;
52
53
  VALUE handler;
54
+ VALUE value_obj;
53
55
  int (*read_func)(struct _SaxDrive *dr);
54
56
  int convert_special;
55
57
  union {
56
- int fd;
57
- VALUE io;
58
+ int fd;
59
+ VALUE io;
60
+ const char *in_str;
58
61
  };
59
62
  int has_instruct;
60
63
  int has_attr;
64
+ int has_attr_value;
61
65
  int has_doctype;
62
66
  int has_comment;
63
67
  int has_cdata;
64
68
  int has_text;
69
+ int has_value;
65
70
  int has_start_element;
66
71
  int has_end_element;
67
72
  int has_error;
68
- #ifdef HAVE_RUBY_ENCODING_H
73
+ #if HAS_ENCODING_SUPPORT
69
74
  rb_encoding *encoding;
70
75
  #endif
71
76
  } *SaxDrive;
@@ -91,8 +96,14 @@ static VALUE rescue_cb(VALUE rdr, VALUE err);
91
96
  static VALUE io_cb(VALUE rdr);
92
97
  static VALUE partial_io_cb(VALUE rdr);
93
98
  static int read_from_io(SaxDrive dr);
99
+ #ifndef JRUBY_RUBY
94
100
  static int read_from_fd(SaxDrive dr);
101
+ #endif
95
102
  static int read_from_io_partial(SaxDrive dr);
103
+ static int read_from_str(SaxDrive dr);
104
+
105
+ static VALUE sax_value_class;
106
+
96
107
 
97
108
  static inline char
98
109
  sax_drive_get(SaxDrive dr) {
@@ -176,7 +187,7 @@ str2sym(const char *str, SaxDrive dr) {
176
187
  VALUE sym;
177
188
 
178
189
  if (Qundef == (sym = ox_cache_get(ox_symbol_cache, str, &slot))) {
179
- #ifdef HAVE_RUBY_ENCODING_H
190
+ #if HAS_ENCODING_SUPPORT
180
191
  if (0 != dr->encoding) {
181
192
  VALUE rstr = rb_str_new2(str);
182
193
 
@@ -203,10 +214,12 @@ ox_sax_parse(VALUE handler, VALUE io, int convert) {
203
214
  printf("*** sax_parse with these flags\n");
204
215
  printf(" has_instruct = %s\n", dr.has_instruct ? "true" : "false");
205
216
  printf(" has_attr = %s\n", dr.has_attr ? "true" : "false");
217
+ printf(" has_attr_value = %s\n", dr.has_attr_value ? "true" : "false");
206
218
  printf(" has_doctype = %s\n", dr.has_doctype ? "true" : "false");
207
219
  printf(" has_comment = %s\n", dr.has_comment ? "true" : "false");
208
220
  printf(" has_cdata = %s\n", dr.has_cdata ? "true" : "false");
209
221
  printf(" has_text = %s\n", dr.has_text ? "true" : "false");
222
+ printf(" has_value = %s\n", dr.has_value ? "true" : "false");
210
223
  printf(" has_start_element = %s\n", dr.has_start_element ? "true" : "false");
211
224
  printf(" has_end_element = %s\n", dr.has_end_element ? "true" : "false");
212
225
  printf(" has_error = %s\n", dr.has_error ? "true" : "false");
@@ -215,9 +228,34 @@ ox_sax_parse(VALUE handler, VALUE io, int convert) {
215
228
  sax_drive_cleanup(&dr);
216
229
  }
217
230
 
231
+ inline static int
232
+ respond_to(VALUE obj, ID method) {
233
+ #ifdef JRUBY_RUBY
234
+ // There is a bug in JRuby where rb_respond_to() returns true (1) even if
235
+ // a method is private.
236
+ {
237
+ VALUE args[1];
238
+
239
+ *args = ID2SYM(method);
240
+ return (Qtrue == rb_funcall2(obj, rb_intern("respond_to?"), 1, args));
241
+ }
242
+ #else
243
+ return rb_respond_to(obj, method);
244
+ #endif
245
+ }
246
+
218
247
  static void
219
248
  sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, int convert) {
220
- if (rb_respond_to(io, ox_readpartial_id)) {
249
+ if (ox_stringio_class == rb_obj_class(io)) {
250
+ VALUE s = rb_funcall2(io, ox_string_id, 0, 0);
251
+
252
+ dr->read_func = read_from_str;
253
+ dr->in_str = StringValuePtr(s);
254
+ } else if (rb_respond_to(io, ox_readpartial_id)) {
255
+ #ifdef JRUBY_RUBY
256
+ dr->read_func = read_from_io_partial;
257
+ dr->io = io;
258
+ #else
221
259
  VALUE rfd;
222
260
 
223
261
  if (rb_respond_to(io, ox_fileno_id) && Qnil != (rfd = rb_funcall(io, ox_fileno_id, 0))) {
@@ -227,7 +265,12 @@ sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, int convert) {
227
265
  dr->read_func = read_from_io_partial;
228
266
  dr->io = io;
229
267
  }
268
+ #endif
230
269
  } else if (rb_respond_to(io, ox_read_id)) {
270
+ #ifdef JRUBY_RUBY
271
+ dr->read_func = read_from_io;
272
+ dr->io = io;
273
+ #else
231
274
  VALUE rfd;
232
275
 
233
276
  if (rb_respond_to(io, ox_fileno_id) && Qnil != (rfd = rb_funcall(io, ox_fileno_id, 0))) {
@@ -237,6 +280,7 @@ sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, int convert) {
237
280
  dr->read_func = read_from_io;
238
281
  dr->io = io;
239
282
  }
283
+ #endif
240
284
  } else {
241
285
  rb_raise(rb_eArgError, "sax_parser io argument must respond to readpartial() or read().\n");
242
286
  }
@@ -249,17 +293,21 @@ sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, int convert) {
249
293
  dr->line = 1;
250
294
  dr->col = 0;
251
295
  dr->handler = handler;
296
+ dr->value_obj = rb_data_object_alloc(sax_value_class, dr, 0, 0);
297
+ rb_gc_register_address(&dr->value_obj);
252
298
  dr->convert_special = convert;
253
- dr->has_instruct = rb_respond_to(handler, ox_instruct_id);
254
- dr->has_attr = rb_respond_to(handler, ox_attr_id);
255
- dr->has_doctype = rb_respond_to(handler, ox_doctype_id);
256
- dr->has_comment = rb_respond_to(handler, ox_comment_id);
257
- dr->has_cdata = rb_respond_to(handler, ox_cdata_id);
258
- dr->has_text = rb_respond_to(handler, ox_text_id);
259
- dr->has_start_element = rb_respond_to(handler, ox_start_element_id);
260
- dr->has_end_element = rb_respond_to(handler, ox_end_element_id);
261
- dr->has_error = rb_respond_to(handler, ox_error_id);
262
- #ifdef HAVE_RUBY_ENCODING_H
299
+ dr->has_instruct = respond_to(handler, ox_instruct_id);
300
+ dr->has_attr = respond_to(handler, ox_attr_id);
301
+ dr->has_attr_value = respond_to(handler, ox_attr_value_id);
302
+ dr->has_doctype = respond_to(handler, ox_doctype_id);
303
+ dr->has_comment = respond_to(handler, ox_comment_id);
304
+ dr->has_cdata = respond_to(handler, ox_cdata_id);
305
+ dr->has_text = respond_to(handler, ox_text_id);
306
+ dr->has_value = respond_to(handler, ox_value_id);
307
+ dr->has_start_element = respond_to(handler, ox_start_element_id);
308
+ dr->has_end_element = respond_to(handler, ox_end_element_id);
309
+ dr->has_error = respond_to(handler, ox_error_id);
310
+ #if HAS_ENCODING_SUPPORT
263
311
  if ('\0' == *ox_default_options.encoding) {
264
312
  dr->encoding = 0;
265
313
  } else {
@@ -270,6 +318,7 @@ sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, int convert) {
270
318
 
271
319
  static void
272
320
  sax_drive_cleanup(SaxDrive dr) {
321
+ rb_gc_unregister_address(&dr->value_obj);
273
322
  if (dr->base_buf != dr->buf) {
274
323
  xfree(dr->buf);
275
324
  }
@@ -505,7 +554,7 @@ read_cdata(SaxDrive dr) {
505
554
  VALUE args[1];
506
555
 
507
556
  args[0] = rb_str_new2(dr->str);
508
- #ifdef HAVE_RUBY_ENCODING_H
557
+ #if HAS_ENCODING_SUPPORT
509
558
  if (0 != dr->encoding) {
510
559
  rb_enc_associate(args[0], dr->encoding);
511
560
  }
@@ -549,7 +598,7 @@ read_comment(SaxDrive dr) {
549
598
  VALUE args[1];
550
599
 
551
600
  args[0] = rb_str_new2(dr->str);
552
- #ifdef HAVE_RUBY_ENCODING_H
601
+ #if HAS_ENCODING_SUPPORT
553
602
  if (0 != dr->encoding) {
554
603
  rb_enc_associate(args[0], dr->encoding);
555
604
  }
@@ -573,7 +622,6 @@ read_element(SaxDrive dr) {
573
622
  if ('\0' == (c = read_name_token(dr))) {
574
623
  return -1;
575
624
  }
576
- // TBD encode is needed
577
625
  name = str2sym(dr->str, dr);
578
626
  if (dr->has_start_element) {
579
627
  VALUE args[1];
@@ -637,16 +685,21 @@ read_text(SaxDrive dr) {
637
685
  }
638
686
  }
639
687
  *(dr->cur - 1) = '\0';
640
- if (dr->has_text) {
688
+ if (dr->has_value) {
641
689
  VALUE args[1];
642
-
690
+
691
+ *args = dr->value_obj;
692
+ rb_funcall2(dr->handler, ox_value_id, 1, args);
693
+ } else if (dr->has_text) {
694
+ VALUE args[1];
695
+
643
696
  if (dr->convert_special) {
644
697
  if (0 != collapse_special(dr->str) && 0 != strchr(dr->str, '&')) {
645
698
  sax_drive_error(dr, "invalid format, special character does not end with a semicolon", 0);
646
699
  }
647
700
  }
648
701
  args[0] = rb_str_new2(dr->str);
649
- #ifdef HAVE_RUBY_ENCODING_H
702
+ #if HAS_ENCODING_SUPPORT
650
703
  if (0 != dr->encoding) {
651
704
  rb_enc_associate(args[0], dr->encoding);
652
705
  }
@@ -677,7 +730,8 @@ read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml) {
677
730
  if (is_xml && 0 == strcmp("encoding", dr->str)) {
678
731
  is_encoding = 1;
679
732
  }
680
- if (dr->has_attr) {
733
+ // TBD use symbol cache
734
+ if (dr->has_attr || dr->has_attr_value) {
681
735
  name = str2sym(dr->str, dr);
682
736
  }
683
737
  if (is_white(c)) {
@@ -690,12 +744,18 @@ read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml) {
690
744
  if (0 != read_quoted_value(dr)) {
691
745
  return -1;
692
746
  }
693
- #ifdef HAVE_RUBY_ENCODING_H
747
+ #if HAS_ENCODING_SUPPORT
694
748
  if (is_encoding) {
695
749
  dr->encoding = rb_enc_find(dr->str);
696
750
  }
697
751
  #endif
698
- if (dr->has_attr) {
752
+ if (dr->has_attr_value) {
753
+ VALUE args[2];
754
+
755
+ args[0] = name;
756
+ args[1] = dr->value_obj;
757
+ rb_funcall2(dr->handler, ox_attr_value_id, 2, args);
758
+ } else if (dr->has_attr) {
699
759
  VALUE args[2];
700
760
 
701
761
  args[0] = name;
@@ -703,7 +763,7 @@ read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml) {
703
763
  sax_drive_error(dr, "invalid format, special character does not end with a semicolon", 0);
704
764
  }
705
765
  args[1] = rb_str_new2(dr->str);
706
- #ifdef HAVE_RUBY_ENCODING_H
766
+ #if HAS_ENCODING_SUPPORT
707
767
  if (0 != dr->encoding) {
708
768
  rb_enc_associate(args[1], dr->encoding);
709
769
  }
@@ -781,12 +841,21 @@ read_quoted_value(SaxDrive dr) {
781
841
 
782
842
  static VALUE
783
843
  rescue_cb(VALUE rdr, VALUE err) {
844
+ #ifndef JRUBY_RUBY
845
+ // JRuby seems to play by a different set if rules. It passes in an Fixnum
846
+ // instead of an error like other Rubies. For now assume all errors are
847
+ // EOF and deal with the results further down the line.
848
+ #if (defined(RUBINIUS_RUBY) || (1 == RUBY_VERSION_MAJOR && 8 == RUBY_VERSION_MINOR))
849
+ if (rb_obj_class(err) != rb_eTypeError) {
850
+ #else
784
851
  if (rb_obj_class(err) != rb_eEOFError) {
852
+ #endif
785
853
  SaxDrive dr = (SaxDrive)rdr;
786
854
 
787
855
  sax_drive_cleanup(dr);
788
856
  rb_raise(err, "at line %d, column %d\n", dr->line, dr->col);
789
857
  }
858
+ #endif
790
859
  return Qfalse;
791
860
  }
792
861
 
@@ -839,6 +908,7 @@ read_from_io(SaxDrive dr) {
839
908
  return (Qfalse == rb_rescue(io_cb, (VALUE)dr, rescue_cb, (VALUE)dr));
840
909
  }
841
910
 
911
+ #ifndef JRUBY_RUBY
842
912
  static int
843
913
  read_from_fd(SaxDrive dr) {
844
914
  ssize_t cnt;
@@ -853,7 +923,26 @@ read_from_fd(SaxDrive dr) {
853
923
  }
854
924
  return 0;
855
925
  }
926
+ #endif
927
+
928
+ static int
929
+ read_from_str(SaxDrive dr) {
930
+ size_t max = dr->buf_end - dr->cur - 1;
931
+ char *s;
932
+ long cnt;
933
+
934
+ if ('\0' == *dr->in_str) {
935
+ // done
936
+ return -1;
937
+ }
938
+ s = stpncpy(dr->cur, dr->in_str, max);
939
+ *s = '\0';
940
+ cnt = s - dr->cur;
941
+ dr->in_str += cnt;
942
+ dr->read_end = dr->cur + cnt;
856
943
 
944
+ return 0;
945
+ }
857
946
 
858
947
  static int
859
948
  collapse_special(char *str) {
@@ -911,3 +1000,211 @@ collapse_special(char *str) {
911
1000
 
912
1001
  return 0;
913
1002
  }
1003
+
1004
+ static VALUE
1005
+ parse_double_time(const char *text) {
1006
+ long v = 0;
1007
+ long v2 = 0;
1008
+ const char *dot = 0;
1009
+ char c;
1010
+
1011
+ for (; '.' != *text; text++) {
1012
+ c = *text;
1013
+ if (c < '0' || '9' < c) {
1014
+ return Qnil;
1015
+ }
1016
+ v = 10 * v + (long)(c - '0');
1017
+ }
1018
+ dot = text++;
1019
+ for (; '\0' != *text && text - dot <= 6; text++) {
1020
+ c = *text;
1021
+ if (c < '0' || '9' < c) {
1022
+ return Qnil;
1023
+ }
1024
+ v2 = 10 * v2 + (long)(c - '0');
1025
+ }
1026
+ for (; text - dot <= 9; text++) {
1027
+ v2 *= 10;
1028
+ }
1029
+ #if HAS_NANO_TIME
1030
+ return rb_time_nano_new(v, v2);
1031
+ #else
1032
+ return rb_time_new(v, v2 / 1000);
1033
+ #endif
1034
+ }
1035
+
1036
+ typedef struct _Tp {
1037
+ int cnt;
1038
+ char end;
1039
+ char alt;
1040
+ } *Tp;
1041
+
1042
+ static VALUE
1043
+ parse_xsd_time(const char *text) {
1044
+ long cargs[10];
1045
+ long *cp = cargs;
1046
+ long v;
1047
+ int i;
1048
+ char c = '\0';
1049
+ struct _Tp tpa[10] = { { 4, '-', '-' },
1050
+ { 2, '-', '-' },
1051
+ { 2, 'T', ' ' },
1052
+ { 2, ':', ':' },
1053
+ { 2, ':', ':' },
1054
+ { 2, '.', '.' },
1055
+ { 9, '+', '-' },
1056
+ { 2, ':', ':' },
1057
+ { 2, '\0', '\0' },
1058
+ { 0, '\0', '\0' } };
1059
+ Tp tp = tpa;
1060
+ struct tm tm;
1061
+
1062
+ memset(cargs, 0, sizeof(cargs));
1063
+ for (; 0 != tp->cnt; tp++) {
1064
+ for (i = tp->cnt, v = 0; 0 < i ; text++, i--) {
1065
+ c = *text;
1066
+ if (c < '0' || '9' < c) {
1067
+ if ('\0' == c || tp->end == c || tp->alt == c) {
1068
+ break;
1069
+ }
1070
+ return Qnil;
1071
+ }
1072
+ v = 10 * v + (long)(c - '0');
1073
+ }
1074
+ if ('\0' == c) {
1075
+ break;
1076
+ }
1077
+ c = *text++;
1078
+ if (tp->end != c && tp->alt != c) {
1079
+ return Qnil;
1080
+ }
1081
+ *cp++ = v;
1082
+ }
1083
+ tm.tm_year = (int)cargs[0] - 1900;
1084
+ tm.tm_mon = (int)cargs[1] - 1;
1085
+ tm.tm_mday = (int)cargs[2];
1086
+ tm.tm_hour = (int)cargs[3];
1087
+ tm.tm_min = (int)cargs[4];
1088
+ tm.tm_sec = (int)cargs[5];
1089
+ #if HAS_NANO_TIME
1090
+ return rb_time_nano_new(mktime(&tm), cargs[6]);
1091
+ #else
1092
+ return rb_time_new(mktime(&tm), cargs[6] / 1000);
1093
+ #endif
1094
+ }
1095
+
1096
+ static VALUE
1097
+ sax_value_as_s(VALUE self) {
1098
+ SaxDrive dr = DATA_PTR(self);
1099
+ VALUE rs;
1100
+
1101
+ if ('\0' == *dr->str) {
1102
+ return Qnil;
1103
+ }
1104
+ if (dr->convert_special) {
1105
+ if (0 != collapse_special(dr->str) && 0 != strchr(dr->str, '&')) {
1106
+ sax_drive_error(dr, "invalid format, special character does not end with a semicolon", 0);
1107
+ }
1108
+ }
1109
+ rs = rb_str_new2(dr->str);
1110
+ #if HAS_ENCODING_SUPPORT
1111
+ if (0 != dr->encoding) {
1112
+ rb_enc_associate(rs, dr->encoding);
1113
+ }
1114
+ #endif
1115
+ return rs;
1116
+ }
1117
+
1118
+ static VALUE
1119
+ sax_value_as_sym(VALUE self) {
1120
+ SaxDrive dr = DATA_PTR(self);
1121
+
1122
+ if ('\0' == *dr->str) {
1123
+ return Qnil;
1124
+ }
1125
+ return str2sym(dr->str, dr);
1126
+ }
1127
+
1128
+ static VALUE
1129
+ sax_value_as_f(VALUE self) {
1130
+ SaxDrive dr = DATA_PTR(self);
1131
+
1132
+ if ('\0' == *dr->str) {
1133
+ return Qnil;
1134
+ }
1135
+ return rb_float_new(strtod(dr->str, 0));
1136
+ }
1137
+
1138
+ static VALUE
1139
+ sax_value_as_i(VALUE self) {
1140
+ SaxDrive dr = DATA_PTR(self);
1141
+ const char *s = dr->str;
1142
+ long n = 0;
1143
+ int neg = 0;
1144
+
1145
+ if ('\0' == *s) {
1146
+ return Qnil;
1147
+ }
1148
+ if ('-' == *s) {
1149
+ neg = 1;
1150
+ s++;
1151
+ } else if ('+' == *s) {
1152
+ s++;
1153
+ }
1154
+ for (; '\0' != *s; s++) {
1155
+ if ('0' <= *s && *s <= '9') {
1156
+ n = n * 10 + (*s - '0');
1157
+ } else {
1158
+ rb_raise(rb_eArgError, "Not a valid Fixnum.\n");
1159
+ }
1160
+ }
1161
+ if (neg) {
1162
+ n = -n;
1163
+ }
1164
+ return LONG2NUM(n);
1165
+ }
1166
+
1167
+ static VALUE
1168
+ sax_value_as_time(VALUE self) {
1169
+ SaxDrive dr = DATA_PTR(self);
1170
+ const char *str = dr->str;
1171
+ VALUE t;
1172
+
1173
+ if ('\0' == *str) {
1174
+ return Qnil;
1175
+ }
1176
+ if (Qnil == (t = parse_double_time(str)) &&
1177
+ Qnil == (t = parse_xsd_time(str))) {
1178
+ VALUE args[1];
1179
+
1180
+ //printf("**** time parse\n");
1181
+ *args = rb_str_new2(str);
1182
+ t = rb_funcall2(ox_time_class, ox_parse_id, 1, args);
1183
+ }
1184
+ return t;
1185
+ }
1186
+
1187
+ static VALUE
1188
+ sax_value_as_bool(VALUE self) {
1189
+ return (0 == strcasecmp("true", ((SaxDrive)DATA_PTR(self))->str)) ? Qtrue : Qfalse;
1190
+ }
1191
+
1192
+ static VALUE
1193
+ sax_value_empty(VALUE self) {
1194
+ return ('\0' == *((SaxDrive)DATA_PTR(self))->str) ? Qtrue : Qfalse;
1195
+ }
1196
+
1197
+ void
1198
+ ox_sax_define() {
1199
+ VALUE sax_module = rb_const_get_at(Ox, rb_intern("Sax"));
1200
+
1201
+ sax_value_class = rb_define_class_under(sax_module, "Value", rb_cObject);
1202
+
1203
+ rb_define_method(sax_value_class, "as_s", sax_value_as_s, 0);
1204
+ rb_define_method(sax_value_class, "as_sym", sax_value_as_sym, 0);
1205
+ rb_define_method(sax_value_class, "as_i", sax_value_as_i, 0);
1206
+ rb_define_method(sax_value_class, "as_f", sax_value_as_f, 0);
1207
+ rb_define_method(sax_value_class, "as_time", sax_value_as_time, 0);
1208
+ rb_define_method(sax_value_class, "as_bool", sax_value_as_bool, 0);
1209
+ rb_define_method(sax_value_class, "empty?", sax_value_empty, 0);
1210
+ }