ox 1.5.4 → 1.5.5
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of ox might be problematic. Click here for more details.
- data/README.md +21 -3
- data/ext/ox/dump.c +64 -42
- data/ext/ox/extconf.rb +34 -2
- data/ext/ox/gen_load.c +13 -13
- data/ext/ox/obj_load.c +31 -28
- data/ext/ox/ox.c +41 -31
- data/ext/ox/ox.h +20 -13
- data/ext/ox/parse.c +1 -0
- data/ext/ox/sax.c +323 -26
- data/lib/ox.rb +1 -2
- data/lib/ox/element.rb +1 -1
- data/lib/ox/sax.rb +24 -11
- data/lib/ox/version.rb +1 -1
- metadata +4 -50
- data/test/Sample.graffle +0 -2318
- data/test/bench.rb +0 -53
- data/test/bug1.rb +0 -24
- data/test/bug2.rb +0 -38
- data/test/bug3.rb +0 -21
- data/test/cache16_test.rb +0 -17
- data/test/cache8_test.rb +0 -17
- data/test/cache_test.rb +0 -17
- data/test/files.rb +0 -29
- data/test/func.rb +0 -652
- data/test/gen_sample.rb +0 -22
- data/test/obj_sample.rb +0 -19
- data/test/ox/change.rb +0 -16
- data/test/ox/dir.rb +0 -21
- data/test/ox/doc.rb +0 -39
- data/test/ox/file.rb +0 -33
- data/test/ox/group.rb +0 -18
- data/test/ox/hasprops.rb +0 -18
- data/test/ox/layer.rb +0 -14
- data/test/ox/line.rb +0 -22
- data/test/ox/oval.rb +0 -12
- data/test/ox/rect.rb +0 -12
- data/test/ox/shape.rb +0 -37
- data/test/ox/text.rb +0 -23
- data/test/parse_cmp.rb +0 -261
- data/test/perf.rb +0 -91
- data/test/perf_gen.rb +0 -237
- data/test/perf_mars.rb +0 -114
- data/test/perf_obj.rb +0 -124
- data/test/perf_pod.rb +0 -88
- data/test/perf_sax.rb +0 -233
- data/test/perf_write.rb +0 -80
- data/test/sample.rb +0 -55
- data/test/sample/change.rb +0 -14
- data/test/sample/dir.rb +0 -19
- data/test/sample/doc.rb +0 -36
- data/test/sample/file.rb +0 -48
- data/test/sample/group.rb +0 -16
- data/test/sample/hasprops.rb +0 -16
- data/test/sample/layer.rb +0 -12
- data/test/sample/line.rb +0 -20
- data/test/sample/oval.rb +0 -10
- data/test/sample/rect.rb +0 -10
- data/test/sample/shape.rb +0 -35
- data/test/sample/text.rb +0 -20
- data/test/sax_test.rb +0 -468
- data/test/test.rb +0 -70
data/ext/ox/ox.c
CHANGED
@@ -49,7 +49,9 @@ void Init_ox();
|
|
49
49
|
VALUE Ox = Qnil;
|
50
50
|
|
51
51
|
ID ox_at_id;
|
52
|
+
ID ox_at_value_id;
|
52
53
|
ID ox_attr_id;
|
54
|
+
ID ox_attr_value_id;
|
53
55
|
ID ox_attributes_id;
|
54
56
|
ID ox_beg_id;
|
55
57
|
ID ox_cdata_id;
|
@@ -74,11 +76,13 @@ ID ox_parse_id;
|
|
74
76
|
ID ox_read_id;
|
75
77
|
ID ox_readpartial_id;
|
76
78
|
ID ox_start_element_id;
|
79
|
+
ID ox_string_id;
|
77
80
|
ID ox_text_id;
|
78
81
|
ID ox_to_c_id;
|
79
82
|
ID ox_to_s_id;
|
80
83
|
ID ox_to_sym_id;
|
81
84
|
ID ox_tv_sec_id;
|
85
|
+
ID ox_tv_nsec_id;
|
82
86
|
ID ox_tv_usec_id;
|
83
87
|
ID ox_value_id;
|
84
88
|
|
@@ -96,6 +100,7 @@ VALUE ox_bag_clas;
|
|
96
100
|
VALUE ox_struct_class;
|
97
101
|
VALUE ox_time_class;
|
98
102
|
VALUE ox_date_class;
|
103
|
+
VALUE ox_stringio_class;
|
99
104
|
|
100
105
|
Cache ox_symbol_cache = 0;
|
101
106
|
Cache ox_class_cache = 0;
|
@@ -476,7 +481,7 @@ load_file(int argc, VALUE *argv, VALUE self) {
|
|
476
481
|
fseek(f, 0, SEEK_SET);
|
477
482
|
if (len != fread(xml, 1, len, f)) {
|
478
483
|
fclose(f);
|
479
|
-
rb_raise(rb_eLoadError, "Failed to read %ld bytes from %s.\n", len, path);
|
484
|
+
rb_raise(rb_eLoadError, "Failed to read %ld bytes from %s.\n", (long)len, path);
|
480
485
|
}
|
481
486
|
fclose(f);
|
482
487
|
xml[len] = '\0';
|
@@ -601,7 +606,7 @@ dump(int argc, VALUE *argv, VALUE self) {
|
|
601
606
|
rb_raise(rb_eNoMemError, "Not enough memory.\n");
|
602
607
|
}
|
603
608
|
rstr = rb_str_new2(xml);
|
604
|
-
#
|
609
|
+
#if HAS_ENCODING_SUPPORT
|
605
610
|
if ('\0' != *copts.encoding) {
|
606
611
|
rb_enc_associate(rstr, rb_enc_find(copts.encoding));
|
607
612
|
}
|
@@ -654,10 +659,7 @@ cache8_test(VALUE self) {
|
|
654
659
|
}
|
655
660
|
|
656
661
|
void Init_ox() {
|
657
|
-
VALUE keep = Qnil;
|
658
|
-
|
659
662
|
Ox = rb_define_module("Ox");
|
660
|
-
keep = rb_cv_get(Ox, "@@keep"); // needed to stop GC from deleting and reusing VALUEs
|
661
663
|
|
662
664
|
rb_define_module_function(Ox, "default_options", get_def_opts, 0);
|
663
665
|
rb_define_module_function(Ox, "default_options=", set_def_opts, 1);
|
@@ -674,9 +676,13 @@ void Init_ox() {
|
|
674
676
|
rb_define_module_function(Ox, "to_file", to_file, -1);
|
675
677
|
|
676
678
|
rb_require("time");
|
679
|
+
rb_require("date");
|
680
|
+
rb_require("stringio");
|
677
681
|
|
678
682
|
ox_at_id = rb_intern("at");
|
683
|
+
ox_at_value_id = rb_intern("@value");
|
679
684
|
ox_attr_id = rb_intern("attr");
|
685
|
+
ox_attr_value_id = rb_intern("attr_value");
|
680
686
|
ox_attributes_id = rb_intern("@attributes");
|
681
687
|
ox_beg_id = rb_intern("@beg");
|
682
688
|
ox_cdata_id = rb_intern("cdata");
|
@@ -701,43 +707,45 @@ void Init_ox() {
|
|
701
707
|
ox_readpartial_id = rb_intern("readpartial");
|
702
708
|
ox_read_id = rb_intern("read");
|
703
709
|
ox_start_element_id = rb_intern("start_element");
|
710
|
+
ox_string_id = rb_intern("string");
|
704
711
|
ox_text_id = rb_intern("text");
|
712
|
+
ox_value_id = rb_intern("value");
|
705
713
|
ox_to_c_id = rb_intern("to_c");
|
706
714
|
ox_to_s_id = rb_intern("to_s");
|
707
715
|
ox_to_sym_id = rb_intern("to_sym");
|
708
716
|
ox_tv_sec_id = rb_intern("tv_sec");
|
717
|
+
ox_tv_nsec_id = rb_intern("tv_nsec");
|
709
718
|
ox_tv_usec_id = rb_intern("tv_usec");
|
710
|
-
ox_value_id = rb_intern("@value");
|
711
719
|
|
712
720
|
ox_time_class = rb_const_get(rb_cObject, rb_intern("Time"));
|
713
721
|
ox_date_class = rb_const_get(rb_cObject, rb_intern("Date"));
|
714
722
|
ox_struct_class = rb_const_get(rb_cObject, rb_intern("Struct"));
|
723
|
+
ox_stringio_class = rb_const_get(rb_cObject, rb_intern("StringIO"));
|
724
|
+
|
725
|
+
ox_encoding_sym = ID2SYM(rb_intern("encoding")); rb_gc_register_address(&ox_encoding_sym);
|
726
|
+
indent_sym = ID2SYM(rb_intern("indent")); rb_gc_register_address(&indent_sym);
|
727
|
+
xsd_date_sym = ID2SYM(rb_intern("xsd_date")); rb_gc_register_address(&xsd_date_sym);
|
728
|
+
opt_format_sym = ID2SYM(rb_intern("opt_format")); rb_gc_register_address(&opt_format_sym);
|
729
|
+
mode_sym = ID2SYM(rb_intern("mode")); rb_gc_register_address(&mode_sym);
|
730
|
+
auto_sym = ID2SYM(rb_intern("auto")); rb_gc_register_address(&auto_sym);
|
731
|
+
optimized_sym = ID2SYM(rb_intern("optimized")); rb_gc_register_address(&optimized_sym);
|
732
|
+
object_sym = ID2SYM(rb_intern("object")); rb_gc_register_address(&object_sym);
|
733
|
+
circular_sym = ID2SYM(rb_intern("circular")); rb_gc_register_address(&circular_sym);
|
734
|
+
generic_sym = ID2SYM(rb_intern("generic")); rb_gc_register_address(&generic_sym);
|
735
|
+
limited_sym = ID2SYM(rb_intern("limited")); rb_gc_register_address(&limited_sym);
|
736
|
+
trace_sym = ID2SYM(rb_intern("trace")); rb_gc_register_address(&trace_sym);
|
737
|
+
effort_sym = ID2SYM(rb_intern("effort")); rb_gc_register_address(&effort_sym);
|
738
|
+
strict_sym = ID2SYM(rb_intern("strict")); rb_gc_register_address(&strict_sym);
|
739
|
+
tolerant_sym = ID2SYM(rb_intern("tolerant")); rb_gc_register_address(&tolerant_sym);
|
740
|
+
auto_define_sym = ID2SYM(rb_intern("auto_define")); rb_gc_register_address(&auto_define_sym);
|
741
|
+
with_dtd_sym = ID2SYM(rb_intern("with_dtd")); rb_gc_register_address(&with_dtd_sym);
|
742
|
+
with_instruct_sym = ID2SYM(rb_intern("with_instructions")); rb_gc_register_address(&with_instruct_sym);
|
743
|
+
with_xml_sym = ID2SYM(rb_intern("with_xml")); rb_gc_register_address(&with_xml_sym);
|
744
|
+
convert_special_sym = ID2SYM(rb_intern("convert_special")); rb_gc_register_address(&convert_special_sym);
|
745
|
+
|
746
|
+
ox_empty_string = rb_str_new2(""); rb_gc_register_address(&ox_empty_string);
|
747
|
+
ox_zero_fixnum = INT2NUM(0); rb_gc_register_address(&ox_zero_fixnum);
|
715
748
|
|
716
|
-
ox_encoding_sym = ID2SYM(rb_intern("encoding")); rb_ary_push(keep, ox_encoding_sym);
|
717
|
-
indent_sym = ID2SYM(rb_intern("indent")); rb_ary_push(keep, indent_sym);
|
718
|
-
xsd_date_sym = ID2SYM(rb_intern("xsd_date")); rb_ary_push(keep, xsd_date_sym);
|
719
|
-
opt_format_sym = ID2SYM(rb_intern("opt_format")); rb_ary_push(keep, opt_format_sym);
|
720
|
-
mode_sym = ID2SYM(rb_intern("mode")); rb_ary_push(keep, mode_sym);
|
721
|
-
auto_sym = ID2SYM(rb_intern("auto")); rb_ary_push(keep, auto_sym);
|
722
|
-
optimized_sym = ID2SYM(rb_intern("optimized")); rb_ary_push(keep, optimized_sym);
|
723
|
-
object_sym = ID2SYM(rb_intern("object")); rb_ary_push(keep, object_sym);
|
724
|
-
circular_sym = ID2SYM(rb_intern("circular")); rb_ary_push(keep, circular_sym);
|
725
|
-
generic_sym = ID2SYM(rb_intern("generic")); rb_ary_push(keep, generic_sym);
|
726
|
-
limited_sym = ID2SYM(rb_intern("limited")); rb_ary_push(keep, limited_sym);
|
727
|
-
trace_sym = ID2SYM(rb_intern("trace")); rb_ary_push(keep, trace_sym);
|
728
|
-
effort_sym = ID2SYM(rb_intern("effort")); rb_ary_push(keep, effort_sym);
|
729
|
-
strict_sym = ID2SYM(rb_intern("strict")); rb_ary_push(keep, strict_sym);
|
730
|
-
tolerant_sym = ID2SYM(rb_intern("tolerant")); rb_ary_push(keep, tolerant_sym);
|
731
|
-
auto_define_sym = ID2SYM(rb_intern("auto_define")); rb_ary_push(keep, auto_define_sym);
|
732
|
-
with_dtd_sym = ID2SYM(rb_intern("with_dtd")); rb_ary_push(keep, with_dtd_sym);
|
733
|
-
with_instruct_sym = ID2SYM(rb_intern("with_instructions")); rb_ary_push(keep, with_instruct_sym);
|
734
|
-
with_xml_sym = ID2SYM(rb_intern("with_xml")); rb_ary_push(keep, with_xml_sym);
|
735
|
-
convert_special_sym = ID2SYM(rb_intern("convert_special")); rb_ary_push(keep, convert_special_sym);
|
736
|
-
|
737
|
-
ox_empty_string = rb_str_new2(""); rb_ary_push(keep, ox_empty_string);
|
738
|
-
ox_zero_fixnum = INT2NUM(0); rb_ary_push(keep, ox_zero_fixnum);
|
739
|
-
|
740
|
-
//rb_require("node"); // generic xml node classes
|
741
749
|
ox_document_clas = rb_const_get_at(Ox, rb_intern("Document"));
|
742
750
|
ox_element_clas = rb_const_get_at(Ox, rb_intern("Element"));
|
743
751
|
ox_comment_clas = rb_const_get_at(Ox, rb_intern("Comment"));
|
@@ -749,6 +757,8 @@ void Init_ox() {
|
|
749
757
|
ox_cache_new(&ox_class_cache);
|
750
758
|
ox_cache_new(&ox_attr_cache);
|
751
759
|
|
760
|
+
ox_sax_define();
|
761
|
+
|
752
762
|
rb_define_module_function(Ox, "cache_test", cache_test, 0);
|
753
763
|
rb_define_module_function(Ox, "cache8_test", cache8_test, 0);
|
754
764
|
}
|
data/ext/ox/ox.h
CHANGED
@@ -38,26 +38,26 @@ extern "C" {
|
|
38
38
|
#endif
|
39
39
|
#endif
|
40
40
|
|
41
|
+
#define RSTRING_NOT_MODIFIED
|
42
|
+
|
41
43
|
#include "ruby.h"
|
42
|
-
#
|
43
|
-
// HAVE_RUBY_ENCODING_H defined for Ruby 1.9
|
44
|
+
#if HAS_ENCODING_SUPPORT
|
44
45
|
#include "ruby/encoding.h"
|
45
46
|
#endif
|
46
|
-
#include "cache.h"
|
47
47
|
|
48
|
-
#ifdef
|
49
|
-
#
|
48
|
+
#ifdef RUBINIUS_RUBY
|
49
|
+
#undef T_COMPLEX
|
50
|
+
enum st_retval {ST_CONTINUE = 0, ST_STOP = 1, ST_DELETE = 2, ST_CHECK};
|
51
|
+
#else
|
52
|
+
#if HAS_TOP_LEVEL_ST_H
|
53
|
+
// Only on travis, local is where it is for all others. Seems to vary depending on the travis machine picked up.
|
54
|
+
#include "st.h"
|
55
|
+
#else
|
56
|
+
#include "ruby/st.h"
|
50
57
|
#endif
|
51
|
-
|
52
|
-
#if (defined RBX_Qnil && !defined RUBINIUS)
|
53
|
-
#define RUBINIUS
|
54
58
|
#endif
|
55
59
|
|
56
|
-
#
|
57
|
-
#undef T_RATIONAL
|
58
|
-
#undef T_COMPLEX
|
59
|
-
#define NO_RSTRUCT 1
|
60
|
-
#endif
|
60
|
+
#include "cache.h"
|
61
61
|
|
62
62
|
#define raise_error(msg, xml, current) _ox_raise_error(msg, xml, current, __FILE__, __LINE__)
|
63
63
|
|
@@ -210,6 +210,8 @@ extern VALUE ox_parse(char *xml, ParseCallbacks pcb, char **endp, int trace, Eff
|
|
210
210
|
extern void _ox_raise_error(const char *msg, const char *xml, const char *current, const char* file, int line);
|
211
211
|
|
212
212
|
extern void ox_sax_parse(VALUE handler, VALUE io, int convert);
|
213
|
+
extern void ox_sax_define(void);
|
214
|
+
|
213
215
|
|
214
216
|
extern char* ox_write_obj_to_str(VALUE obj, Options copts);
|
215
217
|
extern void ox_write_obj_to_file(VALUE obj, const char *path, Options copts);
|
@@ -219,7 +221,9 @@ extern struct _Options ox_default_options;
|
|
219
221
|
extern VALUE Ox;
|
220
222
|
|
221
223
|
extern ID ox_at_id;
|
224
|
+
extern ID ox_at_value_id;
|
222
225
|
extern ID ox_attr_id;
|
226
|
+
extern ID ox_attr_value_id;
|
223
227
|
extern ID ox_attributes_id;
|
224
228
|
extern ID ox_beg_id;
|
225
229
|
extern ID ox_cdata_id;
|
@@ -244,11 +248,13 @@ extern ID ox_parse_id;
|
|
244
248
|
extern ID ox_read_id;
|
245
249
|
extern ID ox_readpartial_id;
|
246
250
|
extern ID ox_start_element_id;
|
251
|
+
extern ID ox_string_id;
|
247
252
|
extern ID ox_text_id;
|
248
253
|
extern ID ox_to_c_id;
|
249
254
|
extern ID ox_to_s_id;
|
250
255
|
extern ID ox_to_sym_id;
|
251
256
|
extern ID ox_tv_sec_id;
|
257
|
+
extern ID ox_tv_nsec_id;
|
252
258
|
extern ID ox_tv_usec_id;
|
253
259
|
extern ID ox_value_id;
|
254
260
|
|
@@ -257,6 +263,7 @@ extern VALUE ox_empty_string;
|
|
257
263
|
extern VALUE ox_encoding_sym;
|
258
264
|
extern VALUE ox_struct_class;
|
259
265
|
extern VALUE ox_time_class;
|
266
|
+
extern VALUE ox_stringio_class;
|
260
267
|
extern VALUE ox_zero_fixnum;
|
261
268
|
|
262
269
|
extern VALUE ox_document_clas;
|
data/ext/ox/parse.c
CHANGED
data/ext/ox/sax.c
CHANGED
@@ -31,10 +31,11 @@
|
|
31
31
|
#include <stdlib.h>
|
32
32
|
#include <errno.h>
|
33
33
|
#include <stdio.h>
|
34
|
-
#include <
|
34
|
+
#include <strings.h>
|
35
35
|
#include <sys/types.h>
|
36
36
|
#include <sys/uio.h>
|
37
37
|
#include <unistd.h>
|
38
|
+
#include <time.h>
|
38
39
|
|
39
40
|
#include "ruby.h"
|
40
41
|
#include "ox.h"
|
@@ -50,22 +51,26 @@ typedef struct _SaxDrive {
|
|
50
51
|
int line;
|
51
52
|
int col;
|
52
53
|
VALUE handler;
|
54
|
+
VALUE value_obj;
|
53
55
|
int (*read_func)(struct _SaxDrive *dr);
|
54
56
|
int convert_special;
|
55
57
|
union {
|
56
|
-
int
|
57
|
-
VALUE
|
58
|
+
int fd;
|
59
|
+
VALUE io;
|
60
|
+
const char *in_str;
|
58
61
|
};
|
59
62
|
int has_instruct;
|
60
63
|
int has_attr;
|
64
|
+
int has_attr_value;
|
61
65
|
int has_doctype;
|
62
66
|
int has_comment;
|
63
67
|
int has_cdata;
|
64
68
|
int has_text;
|
69
|
+
int has_value;
|
65
70
|
int has_start_element;
|
66
71
|
int has_end_element;
|
67
72
|
int has_error;
|
68
|
-
#
|
73
|
+
#if HAS_ENCODING_SUPPORT
|
69
74
|
rb_encoding *encoding;
|
70
75
|
#endif
|
71
76
|
} *SaxDrive;
|
@@ -91,8 +96,14 @@ static VALUE rescue_cb(VALUE rdr, VALUE err);
|
|
91
96
|
static VALUE io_cb(VALUE rdr);
|
92
97
|
static VALUE partial_io_cb(VALUE rdr);
|
93
98
|
static int read_from_io(SaxDrive dr);
|
99
|
+
#ifndef JRUBY_RUBY
|
94
100
|
static int read_from_fd(SaxDrive dr);
|
101
|
+
#endif
|
95
102
|
static int read_from_io_partial(SaxDrive dr);
|
103
|
+
static int read_from_str(SaxDrive dr);
|
104
|
+
|
105
|
+
static VALUE sax_value_class;
|
106
|
+
|
96
107
|
|
97
108
|
static inline char
|
98
109
|
sax_drive_get(SaxDrive dr) {
|
@@ -176,7 +187,7 @@ str2sym(const char *str, SaxDrive dr) {
|
|
176
187
|
VALUE sym;
|
177
188
|
|
178
189
|
if (Qundef == (sym = ox_cache_get(ox_symbol_cache, str, &slot))) {
|
179
|
-
#
|
190
|
+
#if HAS_ENCODING_SUPPORT
|
180
191
|
if (0 != dr->encoding) {
|
181
192
|
VALUE rstr = rb_str_new2(str);
|
182
193
|
|
@@ -203,10 +214,12 @@ ox_sax_parse(VALUE handler, VALUE io, int convert) {
|
|
203
214
|
printf("*** sax_parse with these flags\n");
|
204
215
|
printf(" has_instruct = %s\n", dr.has_instruct ? "true" : "false");
|
205
216
|
printf(" has_attr = %s\n", dr.has_attr ? "true" : "false");
|
217
|
+
printf(" has_attr_value = %s\n", dr.has_attr_value ? "true" : "false");
|
206
218
|
printf(" has_doctype = %s\n", dr.has_doctype ? "true" : "false");
|
207
219
|
printf(" has_comment = %s\n", dr.has_comment ? "true" : "false");
|
208
220
|
printf(" has_cdata = %s\n", dr.has_cdata ? "true" : "false");
|
209
221
|
printf(" has_text = %s\n", dr.has_text ? "true" : "false");
|
222
|
+
printf(" has_value = %s\n", dr.has_value ? "true" : "false");
|
210
223
|
printf(" has_start_element = %s\n", dr.has_start_element ? "true" : "false");
|
211
224
|
printf(" has_end_element = %s\n", dr.has_end_element ? "true" : "false");
|
212
225
|
printf(" has_error = %s\n", dr.has_error ? "true" : "false");
|
@@ -215,9 +228,34 @@ ox_sax_parse(VALUE handler, VALUE io, int convert) {
|
|
215
228
|
sax_drive_cleanup(&dr);
|
216
229
|
}
|
217
230
|
|
231
|
+
inline static int
|
232
|
+
respond_to(VALUE obj, ID method) {
|
233
|
+
#ifdef JRUBY_RUBY
|
234
|
+
// There is a bug in JRuby where rb_respond_to() returns true (1) even if
|
235
|
+
// a method is private.
|
236
|
+
{
|
237
|
+
VALUE args[1];
|
238
|
+
|
239
|
+
*args = ID2SYM(method);
|
240
|
+
return (Qtrue == rb_funcall2(obj, rb_intern("respond_to?"), 1, args));
|
241
|
+
}
|
242
|
+
#else
|
243
|
+
return rb_respond_to(obj, method);
|
244
|
+
#endif
|
245
|
+
}
|
246
|
+
|
218
247
|
static void
|
219
248
|
sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, int convert) {
|
220
|
-
if (
|
249
|
+
if (ox_stringio_class == rb_obj_class(io)) {
|
250
|
+
VALUE s = rb_funcall2(io, ox_string_id, 0, 0);
|
251
|
+
|
252
|
+
dr->read_func = read_from_str;
|
253
|
+
dr->in_str = StringValuePtr(s);
|
254
|
+
} else if (rb_respond_to(io, ox_readpartial_id)) {
|
255
|
+
#ifdef JRUBY_RUBY
|
256
|
+
dr->read_func = read_from_io_partial;
|
257
|
+
dr->io = io;
|
258
|
+
#else
|
221
259
|
VALUE rfd;
|
222
260
|
|
223
261
|
if (rb_respond_to(io, ox_fileno_id) && Qnil != (rfd = rb_funcall(io, ox_fileno_id, 0))) {
|
@@ -227,7 +265,12 @@ sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, int convert) {
|
|
227
265
|
dr->read_func = read_from_io_partial;
|
228
266
|
dr->io = io;
|
229
267
|
}
|
268
|
+
#endif
|
230
269
|
} else if (rb_respond_to(io, ox_read_id)) {
|
270
|
+
#ifdef JRUBY_RUBY
|
271
|
+
dr->read_func = read_from_io;
|
272
|
+
dr->io = io;
|
273
|
+
#else
|
231
274
|
VALUE rfd;
|
232
275
|
|
233
276
|
if (rb_respond_to(io, ox_fileno_id) && Qnil != (rfd = rb_funcall(io, ox_fileno_id, 0))) {
|
@@ -237,6 +280,7 @@ sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, int convert) {
|
|
237
280
|
dr->read_func = read_from_io;
|
238
281
|
dr->io = io;
|
239
282
|
}
|
283
|
+
#endif
|
240
284
|
} else {
|
241
285
|
rb_raise(rb_eArgError, "sax_parser io argument must respond to readpartial() or read().\n");
|
242
286
|
}
|
@@ -249,17 +293,21 @@ sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, int convert) {
|
|
249
293
|
dr->line = 1;
|
250
294
|
dr->col = 0;
|
251
295
|
dr->handler = handler;
|
296
|
+
dr->value_obj = rb_data_object_alloc(sax_value_class, dr, 0, 0);
|
297
|
+
rb_gc_register_address(&dr->value_obj);
|
252
298
|
dr->convert_special = convert;
|
253
|
-
dr->has_instruct =
|
254
|
-
dr->has_attr =
|
255
|
-
dr->
|
256
|
-
dr->
|
257
|
-
dr->
|
258
|
-
dr->
|
259
|
-
dr->
|
260
|
-
dr->
|
261
|
-
dr->
|
262
|
-
|
299
|
+
dr->has_instruct = respond_to(handler, ox_instruct_id);
|
300
|
+
dr->has_attr = respond_to(handler, ox_attr_id);
|
301
|
+
dr->has_attr_value = respond_to(handler, ox_attr_value_id);
|
302
|
+
dr->has_doctype = respond_to(handler, ox_doctype_id);
|
303
|
+
dr->has_comment = respond_to(handler, ox_comment_id);
|
304
|
+
dr->has_cdata = respond_to(handler, ox_cdata_id);
|
305
|
+
dr->has_text = respond_to(handler, ox_text_id);
|
306
|
+
dr->has_value = respond_to(handler, ox_value_id);
|
307
|
+
dr->has_start_element = respond_to(handler, ox_start_element_id);
|
308
|
+
dr->has_end_element = respond_to(handler, ox_end_element_id);
|
309
|
+
dr->has_error = respond_to(handler, ox_error_id);
|
310
|
+
#if HAS_ENCODING_SUPPORT
|
263
311
|
if ('\0' == *ox_default_options.encoding) {
|
264
312
|
dr->encoding = 0;
|
265
313
|
} else {
|
@@ -270,6 +318,7 @@ sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, int convert) {
|
|
270
318
|
|
271
319
|
static void
|
272
320
|
sax_drive_cleanup(SaxDrive dr) {
|
321
|
+
rb_gc_unregister_address(&dr->value_obj);
|
273
322
|
if (dr->base_buf != dr->buf) {
|
274
323
|
xfree(dr->buf);
|
275
324
|
}
|
@@ -505,7 +554,7 @@ read_cdata(SaxDrive dr) {
|
|
505
554
|
VALUE args[1];
|
506
555
|
|
507
556
|
args[0] = rb_str_new2(dr->str);
|
508
|
-
#
|
557
|
+
#if HAS_ENCODING_SUPPORT
|
509
558
|
if (0 != dr->encoding) {
|
510
559
|
rb_enc_associate(args[0], dr->encoding);
|
511
560
|
}
|
@@ -549,7 +598,7 @@ read_comment(SaxDrive dr) {
|
|
549
598
|
VALUE args[1];
|
550
599
|
|
551
600
|
args[0] = rb_str_new2(dr->str);
|
552
|
-
#
|
601
|
+
#if HAS_ENCODING_SUPPORT
|
553
602
|
if (0 != dr->encoding) {
|
554
603
|
rb_enc_associate(args[0], dr->encoding);
|
555
604
|
}
|
@@ -573,7 +622,6 @@ read_element(SaxDrive dr) {
|
|
573
622
|
if ('\0' == (c = read_name_token(dr))) {
|
574
623
|
return -1;
|
575
624
|
}
|
576
|
-
// TBD encode is needed
|
577
625
|
name = str2sym(dr->str, dr);
|
578
626
|
if (dr->has_start_element) {
|
579
627
|
VALUE args[1];
|
@@ -637,16 +685,21 @@ read_text(SaxDrive dr) {
|
|
637
685
|
}
|
638
686
|
}
|
639
687
|
*(dr->cur - 1) = '\0';
|
640
|
-
if (dr->
|
688
|
+
if (dr->has_value) {
|
641
689
|
VALUE args[1];
|
642
|
-
|
690
|
+
|
691
|
+
*args = dr->value_obj;
|
692
|
+
rb_funcall2(dr->handler, ox_value_id, 1, args);
|
693
|
+
} else if (dr->has_text) {
|
694
|
+
VALUE args[1];
|
695
|
+
|
643
696
|
if (dr->convert_special) {
|
644
697
|
if (0 != collapse_special(dr->str) && 0 != strchr(dr->str, '&')) {
|
645
698
|
sax_drive_error(dr, "invalid format, special character does not end with a semicolon", 0);
|
646
699
|
}
|
647
700
|
}
|
648
701
|
args[0] = rb_str_new2(dr->str);
|
649
|
-
#
|
702
|
+
#if HAS_ENCODING_SUPPORT
|
650
703
|
if (0 != dr->encoding) {
|
651
704
|
rb_enc_associate(args[0], dr->encoding);
|
652
705
|
}
|
@@ -677,7 +730,8 @@ read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml) {
|
|
677
730
|
if (is_xml && 0 == strcmp("encoding", dr->str)) {
|
678
731
|
is_encoding = 1;
|
679
732
|
}
|
680
|
-
|
733
|
+
// TBD use symbol cache
|
734
|
+
if (dr->has_attr || dr->has_attr_value) {
|
681
735
|
name = str2sym(dr->str, dr);
|
682
736
|
}
|
683
737
|
if (is_white(c)) {
|
@@ -690,12 +744,18 @@ read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml) {
|
|
690
744
|
if (0 != read_quoted_value(dr)) {
|
691
745
|
return -1;
|
692
746
|
}
|
693
|
-
#
|
747
|
+
#if HAS_ENCODING_SUPPORT
|
694
748
|
if (is_encoding) {
|
695
749
|
dr->encoding = rb_enc_find(dr->str);
|
696
750
|
}
|
697
751
|
#endif
|
698
|
-
if (dr->
|
752
|
+
if (dr->has_attr_value) {
|
753
|
+
VALUE args[2];
|
754
|
+
|
755
|
+
args[0] = name;
|
756
|
+
args[1] = dr->value_obj;
|
757
|
+
rb_funcall2(dr->handler, ox_attr_value_id, 2, args);
|
758
|
+
} else if (dr->has_attr) {
|
699
759
|
VALUE args[2];
|
700
760
|
|
701
761
|
args[0] = name;
|
@@ -703,7 +763,7 @@ read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml) {
|
|
703
763
|
sax_drive_error(dr, "invalid format, special character does not end with a semicolon", 0);
|
704
764
|
}
|
705
765
|
args[1] = rb_str_new2(dr->str);
|
706
|
-
#
|
766
|
+
#if HAS_ENCODING_SUPPORT
|
707
767
|
if (0 != dr->encoding) {
|
708
768
|
rb_enc_associate(args[1], dr->encoding);
|
709
769
|
}
|
@@ -781,12 +841,21 @@ read_quoted_value(SaxDrive dr) {
|
|
781
841
|
|
782
842
|
static VALUE
|
783
843
|
rescue_cb(VALUE rdr, VALUE err) {
|
844
|
+
#ifndef JRUBY_RUBY
|
845
|
+
// JRuby seems to play by a different set if rules. It passes in an Fixnum
|
846
|
+
// instead of an error like other Rubies. For now assume all errors are
|
847
|
+
// EOF and deal with the results further down the line.
|
848
|
+
#if (defined(RUBINIUS_RUBY) || (1 == RUBY_VERSION_MAJOR && 8 == RUBY_VERSION_MINOR))
|
849
|
+
if (rb_obj_class(err) != rb_eTypeError) {
|
850
|
+
#else
|
784
851
|
if (rb_obj_class(err) != rb_eEOFError) {
|
852
|
+
#endif
|
785
853
|
SaxDrive dr = (SaxDrive)rdr;
|
786
854
|
|
787
855
|
sax_drive_cleanup(dr);
|
788
856
|
rb_raise(err, "at line %d, column %d\n", dr->line, dr->col);
|
789
857
|
}
|
858
|
+
#endif
|
790
859
|
return Qfalse;
|
791
860
|
}
|
792
861
|
|
@@ -839,6 +908,7 @@ read_from_io(SaxDrive dr) {
|
|
839
908
|
return (Qfalse == rb_rescue(io_cb, (VALUE)dr, rescue_cb, (VALUE)dr));
|
840
909
|
}
|
841
910
|
|
911
|
+
#ifndef JRUBY_RUBY
|
842
912
|
static int
|
843
913
|
read_from_fd(SaxDrive dr) {
|
844
914
|
ssize_t cnt;
|
@@ -853,7 +923,26 @@ read_from_fd(SaxDrive dr) {
|
|
853
923
|
}
|
854
924
|
return 0;
|
855
925
|
}
|
926
|
+
#endif
|
927
|
+
|
928
|
+
static int
|
929
|
+
read_from_str(SaxDrive dr) {
|
930
|
+
size_t max = dr->buf_end - dr->cur - 1;
|
931
|
+
char *s;
|
932
|
+
long cnt;
|
933
|
+
|
934
|
+
if ('\0' == *dr->in_str) {
|
935
|
+
// done
|
936
|
+
return -1;
|
937
|
+
}
|
938
|
+
s = stpncpy(dr->cur, dr->in_str, max);
|
939
|
+
*s = '\0';
|
940
|
+
cnt = s - dr->cur;
|
941
|
+
dr->in_str += cnt;
|
942
|
+
dr->read_end = dr->cur + cnt;
|
856
943
|
|
944
|
+
return 0;
|
945
|
+
}
|
857
946
|
|
858
947
|
static int
|
859
948
|
collapse_special(char *str) {
|
@@ -911,3 +1000,211 @@ collapse_special(char *str) {
|
|
911
1000
|
|
912
1001
|
return 0;
|
913
1002
|
}
|
1003
|
+
|
1004
|
+
static VALUE
|
1005
|
+
parse_double_time(const char *text) {
|
1006
|
+
long v = 0;
|
1007
|
+
long v2 = 0;
|
1008
|
+
const char *dot = 0;
|
1009
|
+
char c;
|
1010
|
+
|
1011
|
+
for (; '.' != *text; text++) {
|
1012
|
+
c = *text;
|
1013
|
+
if (c < '0' || '9' < c) {
|
1014
|
+
return Qnil;
|
1015
|
+
}
|
1016
|
+
v = 10 * v + (long)(c - '0');
|
1017
|
+
}
|
1018
|
+
dot = text++;
|
1019
|
+
for (; '\0' != *text && text - dot <= 6; text++) {
|
1020
|
+
c = *text;
|
1021
|
+
if (c < '0' || '9' < c) {
|
1022
|
+
return Qnil;
|
1023
|
+
}
|
1024
|
+
v2 = 10 * v2 + (long)(c - '0');
|
1025
|
+
}
|
1026
|
+
for (; text - dot <= 9; text++) {
|
1027
|
+
v2 *= 10;
|
1028
|
+
}
|
1029
|
+
#if HAS_NANO_TIME
|
1030
|
+
return rb_time_nano_new(v, v2);
|
1031
|
+
#else
|
1032
|
+
return rb_time_new(v, v2 / 1000);
|
1033
|
+
#endif
|
1034
|
+
}
|
1035
|
+
|
1036
|
+
typedef struct _Tp {
|
1037
|
+
int cnt;
|
1038
|
+
char end;
|
1039
|
+
char alt;
|
1040
|
+
} *Tp;
|
1041
|
+
|
1042
|
+
static VALUE
|
1043
|
+
parse_xsd_time(const char *text) {
|
1044
|
+
long cargs[10];
|
1045
|
+
long *cp = cargs;
|
1046
|
+
long v;
|
1047
|
+
int i;
|
1048
|
+
char c = '\0';
|
1049
|
+
struct _Tp tpa[10] = { { 4, '-', '-' },
|
1050
|
+
{ 2, '-', '-' },
|
1051
|
+
{ 2, 'T', ' ' },
|
1052
|
+
{ 2, ':', ':' },
|
1053
|
+
{ 2, ':', ':' },
|
1054
|
+
{ 2, '.', '.' },
|
1055
|
+
{ 9, '+', '-' },
|
1056
|
+
{ 2, ':', ':' },
|
1057
|
+
{ 2, '\0', '\0' },
|
1058
|
+
{ 0, '\0', '\0' } };
|
1059
|
+
Tp tp = tpa;
|
1060
|
+
struct tm tm;
|
1061
|
+
|
1062
|
+
memset(cargs, 0, sizeof(cargs));
|
1063
|
+
for (; 0 != tp->cnt; tp++) {
|
1064
|
+
for (i = tp->cnt, v = 0; 0 < i ; text++, i--) {
|
1065
|
+
c = *text;
|
1066
|
+
if (c < '0' || '9' < c) {
|
1067
|
+
if ('\0' == c || tp->end == c || tp->alt == c) {
|
1068
|
+
break;
|
1069
|
+
}
|
1070
|
+
return Qnil;
|
1071
|
+
}
|
1072
|
+
v = 10 * v + (long)(c - '0');
|
1073
|
+
}
|
1074
|
+
if ('\0' == c) {
|
1075
|
+
break;
|
1076
|
+
}
|
1077
|
+
c = *text++;
|
1078
|
+
if (tp->end != c && tp->alt != c) {
|
1079
|
+
return Qnil;
|
1080
|
+
}
|
1081
|
+
*cp++ = v;
|
1082
|
+
}
|
1083
|
+
tm.tm_year = (int)cargs[0] - 1900;
|
1084
|
+
tm.tm_mon = (int)cargs[1] - 1;
|
1085
|
+
tm.tm_mday = (int)cargs[2];
|
1086
|
+
tm.tm_hour = (int)cargs[3];
|
1087
|
+
tm.tm_min = (int)cargs[4];
|
1088
|
+
tm.tm_sec = (int)cargs[5];
|
1089
|
+
#if HAS_NANO_TIME
|
1090
|
+
return rb_time_nano_new(mktime(&tm), cargs[6]);
|
1091
|
+
#else
|
1092
|
+
return rb_time_new(mktime(&tm), cargs[6] / 1000);
|
1093
|
+
#endif
|
1094
|
+
}
|
1095
|
+
|
1096
|
+
static VALUE
|
1097
|
+
sax_value_as_s(VALUE self) {
|
1098
|
+
SaxDrive dr = DATA_PTR(self);
|
1099
|
+
VALUE rs;
|
1100
|
+
|
1101
|
+
if ('\0' == *dr->str) {
|
1102
|
+
return Qnil;
|
1103
|
+
}
|
1104
|
+
if (dr->convert_special) {
|
1105
|
+
if (0 != collapse_special(dr->str) && 0 != strchr(dr->str, '&')) {
|
1106
|
+
sax_drive_error(dr, "invalid format, special character does not end with a semicolon", 0);
|
1107
|
+
}
|
1108
|
+
}
|
1109
|
+
rs = rb_str_new2(dr->str);
|
1110
|
+
#if HAS_ENCODING_SUPPORT
|
1111
|
+
if (0 != dr->encoding) {
|
1112
|
+
rb_enc_associate(rs, dr->encoding);
|
1113
|
+
}
|
1114
|
+
#endif
|
1115
|
+
return rs;
|
1116
|
+
}
|
1117
|
+
|
1118
|
+
static VALUE
|
1119
|
+
sax_value_as_sym(VALUE self) {
|
1120
|
+
SaxDrive dr = DATA_PTR(self);
|
1121
|
+
|
1122
|
+
if ('\0' == *dr->str) {
|
1123
|
+
return Qnil;
|
1124
|
+
}
|
1125
|
+
return str2sym(dr->str, dr);
|
1126
|
+
}
|
1127
|
+
|
1128
|
+
static VALUE
|
1129
|
+
sax_value_as_f(VALUE self) {
|
1130
|
+
SaxDrive dr = DATA_PTR(self);
|
1131
|
+
|
1132
|
+
if ('\0' == *dr->str) {
|
1133
|
+
return Qnil;
|
1134
|
+
}
|
1135
|
+
return rb_float_new(strtod(dr->str, 0));
|
1136
|
+
}
|
1137
|
+
|
1138
|
+
static VALUE
|
1139
|
+
sax_value_as_i(VALUE self) {
|
1140
|
+
SaxDrive dr = DATA_PTR(self);
|
1141
|
+
const char *s = dr->str;
|
1142
|
+
long n = 0;
|
1143
|
+
int neg = 0;
|
1144
|
+
|
1145
|
+
if ('\0' == *s) {
|
1146
|
+
return Qnil;
|
1147
|
+
}
|
1148
|
+
if ('-' == *s) {
|
1149
|
+
neg = 1;
|
1150
|
+
s++;
|
1151
|
+
} else if ('+' == *s) {
|
1152
|
+
s++;
|
1153
|
+
}
|
1154
|
+
for (; '\0' != *s; s++) {
|
1155
|
+
if ('0' <= *s && *s <= '9') {
|
1156
|
+
n = n * 10 + (*s - '0');
|
1157
|
+
} else {
|
1158
|
+
rb_raise(rb_eArgError, "Not a valid Fixnum.\n");
|
1159
|
+
}
|
1160
|
+
}
|
1161
|
+
if (neg) {
|
1162
|
+
n = -n;
|
1163
|
+
}
|
1164
|
+
return LONG2NUM(n);
|
1165
|
+
}
|
1166
|
+
|
1167
|
+
static VALUE
|
1168
|
+
sax_value_as_time(VALUE self) {
|
1169
|
+
SaxDrive dr = DATA_PTR(self);
|
1170
|
+
const char *str = dr->str;
|
1171
|
+
VALUE t;
|
1172
|
+
|
1173
|
+
if ('\0' == *str) {
|
1174
|
+
return Qnil;
|
1175
|
+
}
|
1176
|
+
if (Qnil == (t = parse_double_time(str)) &&
|
1177
|
+
Qnil == (t = parse_xsd_time(str))) {
|
1178
|
+
VALUE args[1];
|
1179
|
+
|
1180
|
+
//printf("**** time parse\n");
|
1181
|
+
*args = rb_str_new2(str);
|
1182
|
+
t = rb_funcall2(ox_time_class, ox_parse_id, 1, args);
|
1183
|
+
}
|
1184
|
+
return t;
|
1185
|
+
}
|
1186
|
+
|
1187
|
+
static VALUE
|
1188
|
+
sax_value_as_bool(VALUE self) {
|
1189
|
+
return (0 == strcasecmp("true", ((SaxDrive)DATA_PTR(self))->str)) ? Qtrue : Qfalse;
|
1190
|
+
}
|
1191
|
+
|
1192
|
+
static VALUE
|
1193
|
+
sax_value_empty(VALUE self) {
|
1194
|
+
return ('\0' == *((SaxDrive)DATA_PTR(self))->str) ? Qtrue : Qfalse;
|
1195
|
+
}
|
1196
|
+
|
1197
|
+
void
|
1198
|
+
ox_sax_define() {
|
1199
|
+
VALUE sax_module = rb_const_get_at(Ox, rb_intern("Sax"));
|
1200
|
+
|
1201
|
+
sax_value_class = rb_define_class_under(sax_module, "Value", rb_cObject);
|
1202
|
+
|
1203
|
+
rb_define_method(sax_value_class, "as_s", sax_value_as_s, 0);
|
1204
|
+
rb_define_method(sax_value_class, "as_sym", sax_value_as_sym, 0);
|
1205
|
+
rb_define_method(sax_value_class, "as_i", sax_value_as_i, 0);
|
1206
|
+
rb_define_method(sax_value_class, "as_f", sax_value_as_f, 0);
|
1207
|
+
rb_define_method(sax_value_class, "as_time", sax_value_as_time, 0);
|
1208
|
+
rb_define_method(sax_value_class, "as_bool", sax_value_as_bool, 0);
|
1209
|
+
rb_define_method(sax_value_class, "empty?", sax_value_empty, 0);
|
1210
|
+
}
|