ox 1.2.15 → 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of ox might be problematic. Click here for more details.
- data/{README.rdoc → README.md} +92 -64
- data/ext/ox/dump.c +7 -7
- data/ext/ox/ox.c +43 -10
- data/ext/ox/ox.h +11 -0
- data/ext/ox/sax.c +758 -0
- data/lib/ox.rb +1 -0
- data/lib/ox/sax.rb +72 -0
- data/lib/ox/version.rb +1 -1
- data/test/func.rb +24 -1
- data/test/perf_sax.rb +233 -0
- data/test/sax_test.rb +381 -0
- metadata +19 -7
data/{README.rdoc → README.md}
RENAMED
@@ -1,17 +1,38 @@
|
|
1
|
-
|
1
|
+
# Ox gem
|
2
|
+
A fast XML parser and Object marshaller as a Ruby gem.
|
3
|
+
|
4
|
+
## <a name="installation">Installation</a>
|
5
|
+
gem install ox
|
6
|
+
|
7
|
+
## <a name="source">Source</a>
|
2
8
|
|
3
9
|
*GitHub* *repo*: https://github.com/ohler55/ox
|
4
10
|
|
5
11
|
*RubyGems* *repo*: https://rubygems.org/gems/ox
|
6
12
|
|
7
|
-
|
13
|
+
## <a name="follow">Follow @oxgem on Twitter</a>
|
14
|
+
|
15
|
+
[Follow @oxgem on Twitter](http://twitter.com/#!/oxgem) for announcements and news about the Ox gem.
|
16
|
+
|
17
|
+
## <a name="build_status">Build Status</a>
|
18
|
+
|
19
|
+
[![Build Status](http://travis-ci.org/ohler55/ox.png)](http://travis-ci.org/ohler55/ox)
|
20
|
+
|
21
|
+
## <a name="release">Release Notes</a>
|
22
|
+
|
23
|
+
### Release 1.3.0
|
24
|
+
|
25
|
+
- fixed Mutex dump bug
|
26
|
+
- added SAX parser, 30+ times faster than Nokogiri and 10+ times faster than LibXML
|
27
|
+
|
28
|
+
## <a name="description">Description</a>
|
8
29
|
|
9
30
|
Optimized XML (Ox), as the name implies was written to provide speed optimized
|
10
31
|
XML handling. It was designed to be an alternative to Nokogiri in generic XML
|
11
32
|
parsing and as an alternative to Marshal for Object serialization.
|
12
33
|
|
13
|
-
Nokogiri
|
14
|
-
|
34
|
+
Unlike Nokogiri Ox is self contained. Ox uses nothing other than standard C
|
35
|
+
libraries so version issues with libXml are not an issue.
|
15
36
|
|
16
37
|
Marshal uses a binary format for serializing Objects. That binary format
|
17
38
|
changes with releases making Marshal dumped Object incompatible between some
|
@@ -25,9 +46,9 @@ It is possible to write an XML serialization gem with Nokogiri but writing
|
|
25
46
|
such a package in Ruby results in a module significantly slower than
|
26
47
|
Marshal. This is what triggered the start of Ox development.
|
27
48
|
|
28
|
-
Ox handles XML documents in
|
29
|
-
|
30
|
-
replacement for Nokogiri and for Marshal.
|
49
|
+
Ox handles XML documents in three ways. It is a generic XML parser and writer,
|
50
|
+
a fast Object / XML marshaller, and a stream SAX parser. Ox was written for
|
51
|
+
speed as a replacement for Nokogiri, Ruby LibXML, and for Marshal.
|
31
52
|
|
32
53
|
As an XML parser it is 2 or more times faster than Nokogiri and as a generic
|
33
54
|
XML writer it is as much as 20 times faster than Nokogiri. Of course different
|
@@ -36,53 +57,60 @@ files may result in slightly different times.
|
|
36
57
|
As an Object serializer Ox is up to 6 times faster than the standard Ruby
|
37
58
|
Marshal.dump() and up to 3 times faster than Marshal.load().
|
38
59
|
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
60
|
+
The SAX like stream parser is over 30 times faster than Nokogiri and more than
|
61
|
+
10 times faster than LibXML when using a trivial Ruby side set of
|
62
|
+
callbacks. Unlike Nokogiri and LibXML, Ox can be tuned to use only the SAX
|
63
|
+
callbacks that are of interest to the caller. (See the perf_sax.rb file for an
|
64
|
+
example.)
|
65
|
+
|
66
|
+
Ox is compatible with Ruby 1.8.7, 1.9.2, JRuby, and RBX.
|
67
|
+
|
68
|
+
### Object Dump Sample:
|
69
|
+
|
70
|
+
require 'ox'
|
71
|
+
|
72
|
+
class Sample
|
73
|
+
attr_accessor :a, :b, :c
|
74
|
+
|
75
|
+
def initialize(a, b, c)
|
76
|
+
@a = a
|
77
|
+
@b = b
|
78
|
+
@c = c
|
79
|
+
end
|
51
80
|
end
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
=== Object XML format
|
81
|
+
|
82
|
+
# Create Object
|
83
|
+
obj = Sample.new(1, "bee", ['x', :y, 7.0])
|
84
|
+
# Now dump the Object to an XML String.
|
85
|
+
xml = Ox.dump(obj)
|
86
|
+
# Convert the object back into a Sample Object.
|
87
|
+
obj2 = Ox.parse_obj(xml)
|
88
|
+
|
89
|
+
### Generic XML Writing and Parsing:
|
90
|
+
|
91
|
+
require 'ox'
|
92
|
+
|
93
|
+
doc = Ox::Document.new(:version => '1.0')
|
94
|
+
|
95
|
+
top = Ox::Element.new('top')
|
96
|
+
top[:name] = 'sample'
|
97
|
+
doc << top
|
98
|
+
|
99
|
+
mid = Ox::Element.new('middle')
|
100
|
+
mid[:name] = 'second'
|
101
|
+
top << mid
|
102
|
+
|
103
|
+
bot = Ox::Element.new('bottom')
|
104
|
+
bot[:name] = 'third'
|
105
|
+
mid << bot
|
106
|
+
|
107
|
+
xml = Ox.dump(doc)
|
108
|
+
puts xml
|
109
|
+
doc2 = Ox.parse(xml)
|
110
|
+
puts "Same? #{doc == doc2}"
|
111
|
+
|
112
|
+
|
113
|
+
### Object XML format
|
86
114
|
|
87
115
|
The XML format used for Object encoding follows the structure of the
|
88
116
|
Object. Each XML element is encoded so that the XML element name is a type
|
@@ -127,25 +155,25 @@ interpreter.)
|
|
127
155
|
Values are encoded as the text portion of an element or in the sub-elements
|
128
156
|
of the principle. For example, a Fixnum is encoded as:
|
129
157
|
|
130
|
-
|
158
|
+
<i>123</i>
|
131
159
|
|
132
160
|
An Array has sub-elements and is encoded similar to this example.
|
133
161
|
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
162
|
+
<a>
|
163
|
+
<i>1</i>
|
164
|
+
<s>abc</s>
|
165
|
+
</a>
|
138
166
|
|
139
167
|
A Hash is encoded with an even number of elements where the first element is
|
140
168
|
the key and the second is the value. This is repeated for each entry in the
|
141
169
|
Hash. An example is of { 1 => 'one', 2 => 'two' } encoding is:
|
142
170
|
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
171
|
+
<h>
|
172
|
+
<i>1</i>
|
173
|
+
<s>one</s>
|
174
|
+
<i>2</i>
|
175
|
+
<s>two</s>
|
176
|
+
</h>
|
149
177
|
|
150
178
|
Strings with characters not allowed in XML are base64 encoded amd will be
|
151
179
|
converted back into a String when loaded.
|
data/ext/ox/dump.c
CHANGED
@@ -829,14 +829,14 @@ static int
|
|
829
829
|
dump_var(ID key, VALUE value, Out out) {
|
830
830
|
if (T_DATA == rb_type(value) && rb_cTime != rb_obj_class(value)) {
|
831
831
|
/* There is a secret recipe that keeps Exception mesg attributes as a
|
832
|
-
* T_DATA until it is needed. StringValue()
|
833
|
-
*
|
834
|
-
*
|
835
|
-
* callback but it isn't. A
|
836
|
-
*
|
837
|
-
* this will fail.
|
832
|
+
* T_DATA until it is needed. StringValue() or a safer method of
|
833
|
+
* calling to_s() makes the value needed and it is converted to a
|
834
|
+
* regular Ruby Object. It might seem reasonable to expect that this
|
835
|
+
* would be done before calling the foreach callback but it isn't. A
|
836
|
+
* slight hack fixes the inconsistency. If the var is not something
|
837
|
+
* that can be represented as a String then this will fail.
|
838
838
|
*/
|
839
|
-
|
839
|
+
rb_funcall(value, to_s_id, 0);
|
840
840
|
}
|
841
841
|
dump_obj(key, value, out->depth, out);
|
842
842
|
|
data/ext/ox/ox.c
CHANGED
@@ -48,15 +48,24 @@ VALUE Ox = Qnil;
|
|
48
48
|
ID at_id;
|
49
49
|
ID attributes_id;
|
50
50
|
ID beg_id;
|
51
|
+
ID cdata_id;
|
52
|
+
ID comment_id;
|
51
53
|
ID den_id;
|
54
|
+
ID doctype_id;
|
55
|
+
ID end_element_id;
|
52
56
|
ID end_id;
|
57
|
+
ID error_id;
|
53
58
|
ID excl_id;
|
54
59
|
ID inspect_id;
|
60
|
+
ID instruct_id;
|
55
61
|
ID keys_id;
|
56
62
|
ID local_id;
|
57
63
|
ID nodes_id;
|
58
64
|
ID num_id;
|
59
65
|
ID parse_id;
|
66
|
+
ID readpartial_id;
|
67
|
+
ID start_element_id;
|
68
|
+
ID text_id;
|
60
69
|
ID to_c_id;
|
61
70
|
ID to_s_id;
|
62
71
|
ID tv_sec_id;
|
@@ -140,7 +149,7 @@ static VALUE
|
|
140
149
|
get_def_opts(VALUE self) {
|
141
150
|
VALUE opts = rb_hash_new();
|
142
151
|
int elen = (int)strlen(default_options.encoding);
|
143
|
-
|
152
|
+
|
144
153
|
rb_hash_aset(opts, encoding_sym, (0 == elen) ? Qnil : rb_str_new(default_options.encoding, elen));
|
145
154
|
rb_hash_aset(opts, indent_sym, INT2FIX(default_options.indent));
|
146
155
|
rb_hash_aset(opts, trace_sym, INT2FIX(default_options.trace));
|
@@ -434,6 +443,20 @@ load_file(int argc, VALUE *argv, VALUE self) {
|
|
434
443
|
return load(xml, argc - 1, argv + 1, self);
|
435
444
|
}
|
436
445
|
|
446
|
+
/* call-seq: sax_parse(handler, io)
|
447
|
+
*
|
448
|
+
* Parses an IO stream or file containing an XML document. Raises an exception
|
449
|
+
* if the XML is malformed or the classes specified are not valid.
|
450
|
+
* @param [Ox::Sax] handler SAX (responds to OX::Sax methods) like handler
|
451
|
+
* @param [IO|String] io IO Object to read from
|
452
|
+
*/
|
453
|
+
static VALUE
|
454
|
+
sax_parse(VALUE self, VALUE handler, VALUE io) {
|
455
|
+
ox_sax_parse(handler, io);
|
456
|
+
|
457
|
+
return Qnil;
|
458
|
+
}
|
459
|
+
|
437
460
|
static void
|
438
461
|
parse_dump_options(VALUE ropts, Options copts) {
|
439
462
|
struct _YesNoOpt ynos[] = {
|
@@ -583,6 +606,7 @@ void Init_ox() {
|
|
583
606
|
rb_define_module_function(Ox, "parse_obj", to_obj, 1);
|
584
607
|
rb_define_module_function(Ox, "parse", to_gen, 1);
|
585
608
|
rb_define_module_function(Ox, "load", load_str, -1);
|
609
|
+
rb_define_module_function(Ox, "sax_parse", sax_parse, 2);
|
586
610
|
|
587
611
|
rb_define_module_function(Ox, "to_xml", dump, -1);
|
588
612
|
rb_define_module_function(Ox, "dump", dump, -1);
|
@@ -591,23 +615,32 @@ void Init_ox() {
|
|
591
615
|
rb_define_module_function(Ox, "to_file", to_file, -1);
|
592
616
|
|
593
617
|
rb_require("time");
|
594
|
-
parse_id = rb_intern("parse");
|
595
|
-
local_id = rb_intern("local");
|
596
618
|
at_id = rb_intern("at");
|
597
|
-
|
619
|
+
attributes_id = rb_intern("@attributes");
|
598
620
|
beg_id = rb_intern("@beg");
|
599
|
-
|
621
|
+
cdata_id = rb_intern("cdata");
|
622
|
+
comment_id = rb_intern("comment");
|
600
623
|
den_id = rb_intern("@den");
|
624
|
+
doctype_id = rb_intern("doctype");
|
625
|
+
end_element_id = rb_intern("end_element");
|
626
|
+
end_id = rb_intern("@end");
|
627
|
+
error_id = rb_intern("error");
|
601
628
|
excl_id = rb_intern("@excl");
|
602
|
-
|
629
|
+
inspect_id = rb_intern("inspect");
|
630
|
+
instruct_id = rb_intern("instruct");
|
631
|
+
keys_id = rb_intern("keys");
|
632
|
+
local_id = rb_intern("local");
|
603
633
|
nodes_id = rb_intern("@nodes");
|
604
634
|
num_id = rb_intern("@num");
|
605
|
-
|
606
|
-
|
607
|
-
|
608
|
-
|
635
|
+
parse_id = rb_intern("parse");
|
636
|
+
readpartial_id = rb_intern("readpartial");
|
637
|
+
start_element_id = rb_intern("start_element");
|
638
|
+
text_id = rb_intern("text");
|
609
639
|
to_c_id = rb_intern("to_c");
|
610
640
|
to_s_id = rb_intern("to_s");
|
641
|
+
tv_sec_id = rb_intern("tv_sec");
|
642
|
+
tv_usec_id = rb_intern("tv_usec");
|
643
|
+
value_id = rb_intern("@value");
|
611
644
|
|
612
645
|
time_class = rb_const_get(rb_cObject, rb_intern("Time"));
|
613
646
|
struct_class = rb_const_get(rb_cObject, rb_intern("Struct"));
|
data/ext/ox/ox.h
CHANGED
@@ -198,6 +198,8 @@ typedef struct _Options {
|
|
198
198
|
extern VALUE parse(char *xml, ParseCallbacks pcb, char **endp, int trace, Effort effort);
|
199
199
|
extern void _raise_error(const char *msg, const char *xml, const char *current, const char* file, int line);
|
200
200
|
|
201
|
+
extern void ox_sax_parse(VALUE handler, VALUE io);
|
202
|
+
|
201
203
|
extern char* write_obj_to_str(VALUE obj, Options copts);
|
202
204
|
extern void write_obj_to_file(VALUE obj, const char *path, Options copts);
|
203
205
|
|
@@ -206,15 +208,24 @@ extern VALUE Ox;
|
|
206
208
|
extern ID at_id;
|
207
209
|
extern ID attributes_id;
|
208
210
|
extern ID beg_id;
|
211
|
+
extern ID cdata_id;
|
212
|
+
extern ID comment_id;
|
209
213
|
extern ID den_id;
|
214
|
+
extern ID doctype_id;
|
215
|
+
extern ID end_element_id;
|
210
216
|
extern ID end_id;
|
217
|
+
extern ID error_id;
|
211
218
|
extern ID excl_id;
|
212
219
|
extern ID inspect_id;
|
220
|
+
extern ID instruct_id;
|
213
221
|
extern ID keys_id;
|
214
222
|
extern ID local_id;
|
215
223
|
extern ID nodes_id;
|
216
224
|
extern ID num_id;
|
217
225
|
extern ID parse_id;
|
226
|
+
extern ID readpartial_id;
|
227
|
+
extern ID start_element_id;
|
228
|
+
extern ID text_id;
|
218
229
|
extern ID to_c_id;
|
219
230
|
extern ID to_s_id;
|
220
231
|
extern ID tv_sec_id;
|
data/ext/ox/sax.c
ADDED
@@ -0,0 +1,758 @@
|
|
1
|
+
/* sax.c
|
2
|
+
* Copyright (c) 2011, Peter Ohler
|
3
|
+
* All rights reserved.
|
4
|
+
*
|
5
|
+
* Redistribution and use in source and binary forms, with or without
|
6
|
+
* modification, are permitted provided that the following conditions are met:
|
7
|
+
*
|
8
|
+
* - Redistributions of source code must retain the above copyright notice, this
|
9
|
+
* list of conditions and the following disclaimer.
|
10
|
+
*
|
11
|
+
* - Redistributions in binary form must reproduce the above copyright notice,
|
12
|
+
* this list of conditions and the following disclaimer in the documentation
|
13
|
+
* and/or other materials provided with the distribution.
|
14
|
+
*
|
15
|
+
* - Neither the name of Peter Ohler nor the names of its contributors may be
|
16
|
+
* used to endorse or promote products derived from this software without
|
17
|
+
* specific prior written permission.
|
18
|
+
*
|
19
|
+
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
20
|
+
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
21
|
+
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
22
|
+
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
|
23
|
+
* FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
24
|
+
* DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
|
25
|
+
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
|
26
|
+
* CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
|
27
|
+
* OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
28
|
+
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
29
|
+
*/
|
30
|
+
|
31
|
+
#include <stdlib.h>
|
32
|
+
#include <errno.h>
|
33
|
+
#include <stdio.h>
|
34
|
+
#include <string.h>
|
35
|
+
#include <sys/types.h>
|
36
|
+
#include <sys/uio.h>
|
37
|
+
#include <unistd.h>
|
38
|
+
|
39
|
+
#include "ruby.h"
|
40
|
+
#include "ox.h"
|
41
|
+
|
42
|
+
typedef struct _SaxDrive {
|
43
|
+
char base_buf[0x00010000];
|
44
|
+
//char base_buf[0x00000010];
|
45
|
+
char *buf;
|
46
|
+
char *buf_end;
|
47
|
+
char *cur;
|
48
|
+
char *read_end; // one past last character read
|
49
|
+
char *str; // start of current string being read
|
50
|
+
int line;
|
51
|
+
int col;
|
52
|
+
VALUE handler;
|
53
|
+
int (*read_func)(struct _SaxDrive *dr);
|
54
|
+
union {
|
55
|
+
int fd;
|
56
|
+
VALUE io;
|
57
|
+
};
|
58
|
+
int has_instruct;
|
59
|
+
int has_doctype;
|
60
|
+
int has_comment;
|
61
|
+
int has_cdata;
|
62
|
+
int has_text;
|
63
|
+
int has_start_element;
|
64
|
+
int has_end_element;
|
65
|
+
int has_error;
|
66
|
+
rb_encoding *encoding;
|
67
|
+
} *SaxDrive;
|
68
|
+
|
69
|
+
static void sax_drive_init(SaxDrive dr, VALUE handler, VALUE io);
|
70
|
+
static void sax_drive_cleanup(SaxDrive dr);
|
71
|
+
static int sax_drive_read(SaxDrive dr);
|
72
|
+
static void sax_drive_error(SaxDrive dr, const char *msg, int critical);
|
73
|
+
|
74
|
+
static int read_children(SaxDrive dr, int first);
|
75
|
+
static int read_instruction(SaxDrive dr);
|
76
|
+
static int read_doctype(SaxDrive dr);
|
77
|
+
static int read_cdata(SaxDrive dr);
|
78
|
+
static int read_comment(SaxDrive dr);
|
79
|
+
static int read_element(SaxDrive dr);
|
80
|
+
static int read_text(SaxDrive dr);
|
81
|
+
static int read_attrs(SaxDrive dr, VALUE *attrs, char c, char termc, char term2);
|
82
|
+
static char read_name_token(SaxDrive dr);
|
83
|
+
static int read_quoted_value(SaxDrive dr);
|
84
|
+
|
85
|
+
static VALUE io_cb(VALUE rdr);
|
86
|
+
static int read_from_io(SaxDrive dr);
|
87
|
+
static int read_from_fd(SaxDrive dr);
|
88
|
+
|
89
|
+
static inline char
|
90
|
+
sax_drive_get(SaxDrive dr) {
|
91
|
+
if (dr->read_end <= dr->cur) {
|
92
|
+
if (0 != sax_drive_read(dr)) {
|
93
|
+
return 0;
|
94
|
+
}
|
95
|
+
}
|
96
|
+
if ('\n' == *dr->cur) {
|
97
|
+
dr->line++;
|
98
|
+
dr->col = 0;
|
99
|
+
}
|
100
|
+
dr->col++;
|
101
|
+
|
102
|
+
return *dr->cur++;
|
103
|
+
}
|
104
|
+
|
105
|
+
/* Starts by reading a character so it is safe to use with an empty or
|
106
|
+
* compacted buffer.
|
107
|
+
*/
|
108
|
+
inline static char
|
109
|
+
next_non_white(SaxDrive dr) {
|
110
|
+
char c;
|
111
|
+
|
112
|
+
while ('\0' != (c = sax_drive_get(dr))) {
|
113
|
+
switch(c) {
|
114
|
+
case ' ':
|
115
|
+
case '\t':
|
116
|
+
case '\f':
|
117
|
+
case '\n':
|
118
|
+
case '\r':
|
119
|
+
break;
|
120
|
+
default:
|
121
|
+
return c;
|
122
|
+
}
|
123
|
+
}
|
124
|
+
return '\0';
|
125
|
+
}
|
126
|
+
|
127
|
+
inline static int
|
128
|
+
is_white(char c) {
|
129
|
+
switch(c) {
|
130
|
+
case ' ':
|
131
|
+
case '\t':
|
132
|
+
case '\f':
|
133
|
+
case '\n':
|
134
|
+
case '\r':
|
135
|
+
return 1;
|
136
|
+
default:
|
137
|
+
break;
|
138
|
+
}
|
139
|
+
return 0;
|
140
|
+
}
|
141
|
+
|
142
|
+
inline static VALUE
|
143
|
+
str2sym(const char *str) {
|
144
|
+
VALUE *slot;
|
145
|
+
VALUE sym;
|
146
|
+
|
147
|
+
if (Qundef == (sym = ox_cache_get(symbol_cache, str, &slot))) {
|
148
|
+
sym = ID2SYM(rb_intern(str));
|
149
|
+
*slot = sym;
|
150
|
+
}
|
151
|
+
return sym;
|
152
|
+
}
|
153
|
+
|
154
|
+
|
155
|
+
void
|
156
|
+
ox_sax_parse(VALUE handler, VALUE io) {
|
157
|
+
struct _SaxDrive dr;
|
158
|
+
|
159
|
+
sax_drive_init(&dr, handler, io);
|
160
|
+
#if 0
|
161
|
+
printf("*** sax_parse with these flags\n");
|
162
|
+
printf(" has_instruct = %s\n", dr.has_instruct ? "true" : "false");
|
163
|
+
printf(" has_doctype = %s\n", dr.has_doctype ? "true" : "false");
|
164
|
+
printf(" has_comment = %s\n", dr.has_comment ? "true" : "false");
|
165
|
+
printf(" has_cdata = %s\n", dr.has_cdata ? "true" : "false");
|
166
|
+
printf(" has_text = %s\n", dr.has_text ? "true" : "false");
|
167
|
+
printf(" has_start_element = %s\n", dr.has_start_element ? "true" : "false");
|
168
|
+
printf(" has_end_element = %s\n", dr.has_end_element ? "true" : "false");
|
169
|
+
printf(" has_error = %s\n", dr.has_error ? "true" : "false");
|
170
|
+
#endif
|
171
|
+
read_children(&dr, 1);
|
172
|
+
sax_drive_cleanup(&dr);
|
173
|
+
}
|
174
|
+
|
175
|
+
static void
|
176
|
+
sax_drive_init(SaxDrive dr, VALUE handler, VALUE io) {
|
177
|
+
if (rb_respond_to(io, readpartial_id)) {
|
178
|
+
VALUE rfd;
|
179
|
+
|
180
|
+
if (rb_respond_to(io, rb_intern("fileno")) && Qnil != (rfd = rb_funcall(io, rb_intern("fileno"), 0))) {
|
181
|
+
dr->read_func = read_from_fd;
|
182
|
+
dr->fd = FIX2INT(rfd);
|
183
|
+
} else {
|
184
|
+
dr->read_func = read_from_io;
|
185
|
+
dr->io = io;
|
186
|
+
}
|
187
|
+
} else {
|
188
|
+
rb_raise(rb_eArgError, "sax_parser io argument must respond to readpartial().\n");
|
189
|
+
}
|
190
|
+
dr->buf = dr->base_buf;
|
191
|
+
*dr->buf = '\0';
|
192
|
+
dr->buf_end = dr->buf + sizeof(dr->base_buf) - 1; // 1 less to make debugging easier
|
193
|
+
dr->cur = dr->buf;
|
194
|
+
dr->read_end = dr->buf;
|
195
|
+
dr->str = 0;
|
196
|
+
dr->line = 1;
|
197
|
+
dr->col = 0;
|
198
|
+
dr->handler = handler;
|
199
|
+
dr->has_instruct = rb_respond_to(handler, instruct_id);
|
200
|
+
dr->has_doctype = rb_respond_to(handler, doctype_id);
|
201
|
+
dr->has_comment = rb_respond_to(handler, comment_id);
|
202
|
+
dr->has_cdata = rb_respond_to(handler, cdata_id);
|
203
|
+
dr->has_text = rb_respond_to(handler, text_id);
|
204
|
+
dr->has_start_element = rb_respond_to(handler, start_element_id);
|
205
|
+
dr->has_end_element = rb_respond_to(handler, end_element_id);
|
206
|
+
dr->has_error = rb_respond_to(handler, error_id);
|
207
|
+
dr->encoding = 0;
|
208
|
+
}
|
209
|
+
|
210
|
+
static void
|
211
|
+
sax_drive_cleanup(SaxDrive dr) {
|
212
|
+
if (dr->base_buf != dr->buf) {
|
213
|
+
free(dr->buf);
|
214
|
+
}
|
215
|
+
}
|
216
|
+
|
217
|
+
static int
|
218
|
+
sax_drive_read(SaxDrive dr) {
|
219
|
+
int err;
|
220
|
+
size_t shift = 0;
|
221
|
+
|
222
|
+
if (dr->buf < dr->cur) {
|
223
|
+
if (0 == dr->str) {
|
224
|
+
shift = dr->cur - dr->buf;
|
225
|
+
} else {
|
226
|
+
shift = dr->str - dr->buf;
|
227
|
+
}
|
228
|
+
//printf("\n*** shift: %lu\n", shift);
|
229
|
+
if (0 == shift) { // no space left so allocate more
|
230
|
+
char *old = dr->buf;
|
231
|
+
size_t size = dr->buf_end - dr->buf;
|
232
|
+
|
233
|
+
if (dr->buf == dr->base_buf) {
|
234
|
+
if (0 == (dr->buf = (char*)malloc(size * 2))) {
|
235
|
+
rb_raise(rb_eNoMemError, "Could not allocate memory for large element.\n");
|
236
|
+
}
|
237
|
+
memcpy(dr->buf, old, size);
|
238
|
+
} else {
|
239
|
+
if (0 == (dr->buf = (char*)realloc(dr->buf, size * 2))) {
|
240
|
+
rb_raise(rb_eNoMemError, "Could not allocate memory for large element.\n");
|
241
|
+
}
|
242
|
+
}
|
243
|
+
dr->buf_end = dr->buf + size * 2;
|
244
|
+
dr->cur = dr->buf + (dr->cur - old);
|
245
|
+
dr->read_end = dr->buf + (dr->read_end - old);
|
246
|
+
if (0 != dr->str) {
|
247
|
+
dr->str = dr->buf + (dr->str - old);
|
248
|
+
}
|
249
|
+
} else {
|
250
|
+
memmove(dr->buf, dr->buf + shift, dr->read_end - (dr->buf + shift));
|
251
|
+
dr->cur -= shift;
|
252
|
+
dr->read_end -= shift;
|
253
|
+
if (0 != dr->str) {
|
254
|
+
dr->str -= shift;
|
255
|
+
}
|
256
|
+
}
|
257
|
+
}
|
258
|
+
err = dr->read_func(dr);
|
259
|
+
*dr->read_end = '\0';
|
260
|
+
|
261
|
+
return err;
|
262
|
+
}
|
263
|
+
|
264
|
+
static void
|
265
|
+
sax_drive_error(SaxDrive dr, const char *msg, int critical) {
|
266
|
+
if (dr->has_error) {
|
267
|
+
VALUE args[3];
|
268
|
+
|
269
|
+
args[0] = rb_str_new2(msg);
|
270
|
+
args[1] = INT2FIX(dr->line);
|
271
|
+
args[2] = INT2FIX(dr->col);
|
272
|
+
rb_funcall2(dr->handler, error_id, 3, args);
|
273
|
+
} else if (critical) {
|
274
|
+
sax_drive_cleanup(dr);
|
275
|
+
rb_raise(rb_eSyntaxError, "%s at line %d, column %d\n", msg, dr->line, dr->col);
|
276
|
+
}
|
277
|
+
}
|
278
|
+
|
279
|
+
static int
|
280
|
+
read_children(SaxDrive dr, int first) {
|
281
|
+
int err = 0;
|
282
|
+
int element_read = !first;
|
283
|
+
int doctype_read = !first;
|
284
|
+
char c;
|
285
|
+
|
286
|
+
while (!err) {
|
287
|
+
dr->str = dr->cur; // protect the start
|
288
|
+
if ('\0' == (c = next_non_white(dr))) {
|
289
|
+
if (!first) {
|
290
|
+
sax_drive_error(dr, "invalid format, element not terminated", 1);
|
291
|
+
err = 1;
|
292
|
+
}
|
293
|
+
break; // normal completion if first
|
294
|
+
}
|
295
|
+
if ('<' != c) {
|
296
|
+
if (first) { // all top level entities start with <
|
297
|
+
sax_drive_error(dr, "invalid format, expected <", 1);
|
298
|
+
break; // unrecoverable
|
299
|
+
}
|
300
|
+
if (0 != (err = read_text(dr))) { // finished when < is reached
|
301
|
+
break;
|
302
|
+
}
|
303
|
+
}
|
304
|
+
dr->str = dr->cur; // protect the start for elements
|
305
|
+
c = sax_drive_get(dr);
|
306
|
+
switch (c) {
|
307
|
+
case '?': // instructions (xml or otherwise)
|
308
|
+
if (!first || element_read || doctype_read) {
|
309
|
+
sax_drive_error(dr, "invalid format, instruction must come before elements", 0);
|
310
|
+
}
|
311
|
+
err = read_instruction(dr);
|
312
|
+
break;
|
313
|
+
case '!': // comment or doctype
|
314
|
+
dr->str = dr->cur;
|
315
|
+
c = sax_drive_get(dr);
|
316
|
+
if ('\0' == c) {
|
317
|
+
sax_drive_error(dr, "invalid format, DOCTYPE or comment not terminated", 1);
|
318
|
+
err = 1;
|
319
|
+
} else if ('-' == c) {
|
320
|
+
c = sax_drive_get(dr); // skip first - and get next character
|
321
|
+
if ('-' != c) {
|
322
|
+
sax_drive_error(dr, "invalid format, bad comment format", 1);
|
323
|
+
err = 1;
|
324
|
+
} else {
|
325
|
+
c = sax_drive_get(dr); // skip second -
|
326
|
+
err = read_comment(dr);
|
327
|
+
}
|
328
|
+
} else {
|
329
|
+
int i;
|
330
|
+
|
331
|
+
for (i = 7; 0 < i; i--) {
|
332
|
+
sax_drive_get(dr);
|
333
|
+
}
|
334
|
+
if (0 == strncmp("DOCTYPE", dr->str, 7)) {
|
335
|
+
if (element_read || !first) {
|
336
|
+
sax_drive_error(dr, "invalid format, DOCTYPE can not come after an element", 0);
|
337
|
+
}
|
338
|
+
doctype_read = 1;
|
339
|
+
err = read_doctype(dr);
|
340
|
+
} else if (0 == strncmp("[CDATA[", dr->str, 7)) {
|
341
|
+
err = read_cdata(dr);
|
342
|
+
} else {
|
343
|
+
sax_drive_error(dr, "invalid format, DOCTYPE or comment expected", 1);
|
344
|
+
err = 1;
|
345
|
+
}
|
346
|
+
}
|
347
|
+
break;
|
348
|
+
case '/': // element end
|
349
|
+
return ('\0' == read_name_token(dr));
|
350
|
+
break;
|
351
|
+
case '\0':
|
352
|
+
sax_drive_error(dr, "invalid format, document not terminated", 1);
|
353
|
+
err = 1;
|
354
|
+
break;
|
355
|
+
default:
|
356
|
+
dr->cur--; // safe since no read occurred after getting last character
|
357
|
+
if (first && element_read) {
|
358
|
+
sax_drive_error(dr, "invalid format, multiple top level elements", 0);
|
359
|
+
}
|
360
|
+
err = read_element(dr);
|
361
|
+
element_read = 1;
|
362
|
+
break;
|
363
|
+
}
|
364
|
+
}
|
365
|
+
return err;
|
366
|
+
}
|
367
|
+
|
368
|
+
/* Entered after the "<?" sequence. Ready to read the rest.
|
369
|
+
*/
|
370
|
+
static int
|
371
|
+
read_instruction(SaxDrive dr) {
|
372
|
+
VALUE target = Qnil;
|
373
|
+
VALUE attrs = Qnil;
|
374
|
+
char c;
|
375
|
+
|
376
|
+
if ('\0' == (c = read_name_token(dr))) {
|
377
|
+
return -1;
|
378
|
+
}
|
379
|
+
if (dr->has_instruct) {
|
380
|
+
target = rb_str_new2(dr->str);
|
381
|
+
}
|
382
|
+
if (0 != read_attrs(dr, &attrs, c, '?', '?')) {
|
383
|
+
return -1;
|
384
|
+
}
|
385
|
+
c = next_non_white(dr);
|
386
|
+
if ('>' != c) {
|
387
|
+
sax_drive_error(dr, "invalid format, instruction not terminated", 1);
|
388
|
+
return -1;
|
389
|
+
}
|
390
|
+
if (0 != dr->has_instruct) {
|
391
|
+
VALUE args[2];
|
392
|
+
|
393
|
+
args[0] = target;
|
394
|
+
args[1] = attrs;
|
395
|
+
rb_funcall2(dr->handler, instruct_id, 2, args);
|
396
|
+
}
|
397
|
+
dr->str = 0;
|
398
|
+
|
399
|
+
return 0;
|
400
|
+
}
|
401
|
+
|
402
|
+
/* Entered after the "<!DOCTYPE" sequence. Ready to read the rest.
|
403
|
+
*/
|
404
|
+
static int
|
405
|
+
read_doctype(SaxDrive dr) {
|
406
|
+
char c;
|
407
|
+
|
408
|
+
dr->str = dr->cur - 1; // mark the start
|
409
|
+
while ('>' != (c = sax_drive_get(dr))) {
|
410
|
+
if ('\0' == c) {
|
411
|
+
sax_drive_error(dr, "invalid format, doctype terminated unexpectedly", 1);
|
412
|
+
return -1;
|
413
|
+
}
|
414
|
+
}
|
415
|
+
*(dr->cur - 1) = '\0';
|
416
|
+
if (dr->has_doctype) {
|
417
|
+
VALUE args[1];
|
418
|
+
|
419
|
+
args[0] = rb_str_new2(dr->str);
|
420
|
+
rb_funcall2(dr->handler, doctype_id, 1, args);
|
421
|
+
}
|
422
|
+
dr->str = 0;
|
423
|
+
|
424
|
+
return 0;
|
425
|
+
}
|
426
|
+
|
427
|
+
/* Entered after the "<![CDATA[" sequence. Ready to read the rest.
|
428
|
+
*/
|
429
|
+
static int
|
430
|
+
read_cdata(SaxDrive dr) {
|
431
|
+
char c;
|
432
|
+
int end = 0;
|
433
|
+
|
434
|
+
dr->str = dr->cur - 1; // mark the start
|
435
|
+
while (1) {
|
436
|
+
c = sax_drive_get(dr);
|
437
|
+
if (']' == c) {
|
438
|
+
end++;
|
439
|
+
} else if ('>' == c) {
|
440
|
+
if (2 <= end) {
|
441
|
+
*(dr->cur - 3) = '\0';
|
442
|
+
break;
|
443
|
+
}
|
444
|
+
end = 0;
|
445
|
+
} else if ('\0' == c) {
|
446
|
+
sax_drive_error(dr, "invalid format, cdata terminated unexpectedly", 1);
|
447
|
+
return -1;
|
448
|
+
} else {
|
449
|
+
end = 0;
|
450
|
+
}
|
451
|
+
}
|
452
|
+
if (dr->has_cdata) {
|
453
|
+
VALUE args[1];
|
454
|
+
|
455
|
+
args[0] = rb_str_new2(dr->str);
|
456
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
457
|
+
if (0 != dr->encoding) {
|
458
|
+
rb_enc_associate(args[0], dr->encoding);
|
459
|
+
}
|
460
|
+
#endif
|
461
|
+
rb_funcall2(dr->handler, cdata_id, 1, args);
|
462
|
+
}
|
463
|
+
dr->str = 0;
|
464
|
+
|
465
|
+
return 0;
|
466
|
+
}
|
467
|
+
|
468
|
+
/* Entered after the "<!--" sequence. Ready to read the rest.
|
469
|
+
*/
|
470
|
+
static int
|
471
|
+
read_comment(SaxDrive dr) {
|
472
|
+
char c;
|
473
|
+
int end = 0;
|
474
|
+
|
475
|
+
dr->str = dr->cur - 1; // mark the start
|
476
|
+
while (1) {
|
477
|
+
c = sax_drive_get(dr);
|
478
|
+
if ('-' == c) {
|
479
|
+
if (end) {
|
480
|
+
*(dr->cur - 2) = '\0';
|
481
|
+
break;
|
482
|
+
} else {
|
483
|
+
end = 1;
|
484
|
+
}
|
485
|
+
} else if ('\0' == c) {
|
486
|
+
sax_drive_error(dr, "invalid format, comment terminated unexpectedly", 1);
|
487
|
+
return -1;
|
488
|
+
} else {
|
489
|
+
end = 0;
|
490
|
+
}
|
491
|
+
}
|
492
|
+
c = sax_drive_get(dr);
|
493
|
+
if ('>' != c) {
|
494
|
+
sax_drive_error(dr, "invalid format, comment terminated unexpectedly", 1);
|
495
|
+
}
|
496
|
+
if (dr->has_comment) {
|
497
|
+
VALUE args[1];
|
498
|
+
|
499
|
+
args[0] = rb_str_new2(dr->str);
|
500
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
501
|
+
if (0 != dr->encoding) {
|
502
|
+
rb_enc_associate(args[0], dr->encoding);
|
503
|
+
}
|
504
|
+
#endif
|
505
|
+
rb_funcall2(dr->handler, comment_id, 1, args);
|
506
|
+
}
|
507
|
+
dr->str = 0;
|
508
|
+
|
509
|
+
return 0;
|
510
|
+
}
|
511
|
+
|
512
|
+
/* Entered after the '<' and the first character after that. Returns status
|
513
|
+
* code.
|
514
|
+
*/
|
515
|
+
static int
|
516
|
+
read_element(SaxDrive dr) {
|
517
|
+
VALUE name = Qnil;
|
518
|
+
VALUE attrs = Qnil;
|
519
|
+
char c;
|
520
|
+
int closed;
|
521
|
+
|
522
|
+
if ('\0' == (c = read_name_token(dr))) {
|
523
|
+
return -1;
|
524
|
+
}
|
525
|
+
name = str2sym(dr->str);
|
526
|
+
if ('/' == c) {
|
527
|
+
closed = 1;
|
528
|
+
} else if ('>' == c) {
|
529
|
+
closed = 0;
|
530
|
+
} else {
|
531
|
+
if (0 != read_attrs(dr, &attrs, c, '/', '>')) {
|
532
|
+
return -1;
|
533
|
+
}
|
534
|
+
closed = ('/' == *(dr->cur - 1));
|
535
|
+
}
|
536
|
+
if (closed) {
|
537
|
+
c = next_non_white(dr);
|
538
|
+
if ('>' != c) {
|
539
|
+
sax_drive_error(dr, "invalid format, element not closed", 1);
|
540
|
+
return -1;
|
541
|
+
}
|
542
|
+
}
|
543
|
+
if (0 != dr->has_start_element) {
|
544
|
+
VALUE args[2];
|
545
|
+
|
546
|
+
args[0] = name;
|
547
|
+
args[1] = attrs;
|
548
|
+
rb_funcall2(dr->handler, start_element_id, 2, args);
|
549
|
+
if (closed && dr->has_end_element) {
|
550
|
+
rb_funcall2(dr->handler, end_element_id, 1, args);
|
551
|
+
}
|
552
|
+
}
|
553
|
+
if (!closed) {
|
554
|
+
if (0 != read_children(dr, 0)) {
|
555
|
+
return -1;
|
556
|
+
}
|
557
|
+
if (0 != strcmp(dr->str, rb_id2name(SYM2ID(name)))) {
|
558
|
+
sax_drive_error(dr, "invalid format, element start and end names do not match", 1);
|
559
|
+
return -1;
|
560
|
+
}
|
561
|
+
if (0 != dr->has_end_element) {
|
562
|
+
VALUE args[1];
|
563
|
+
|
564
|
+
args[0] = name;
|
565
|
+
rb_funcall2(dr->handler, end_element_id, 1, args);
|
566
|
+
}
|
567
|
+
}
|
568
|
+
dr->str = 0;
|
569
|
+
|
570
|
+
return 0;
|
571
|
+
}
|
572
|
+
|
573
|
+
static int
|
574
|
+
read_text(SaxDrive dr) {
|
575
|
+
char c;
|
576
|
+
|
577
|
+
dr->str = dr->cur - 1; // mark the start
|
578
|
+
while ('<' != (c = sax_drive_get(dr))) {
|
579
|
+
if ('\0' == c) {
|
580
|
+
sax_drive_error(dr, "invalid format, text terminated unexpectedly", 1);
|
581
|
+
return -1;
|
582
|
+
}
|
583
|
+
}
|
584
|
+
*(dr->cur - 1) = '\0';
|
585
|
+
if (dr->has_text) {
|
586
|
+
VALUE args[1];
|
587
|
+
|
588
|
+
args[0] = rb_str_new2(dr->str);
|
589
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
590
|
+
if (0 != dr->encoding) {
|
591
|
+
rb_enc_associate(args[0], dr->encoding);
|
592
|
+
}
|
593
|
+
#endif
|
594
|
+
rb_funcall2(dr->handler, text_id, 1, args);
|
595
|
+
}
|
596
|
+
return 0;
|
597
|
+
}
|
598
|
+
|
599
|
+
static int
|
600
|
+
read_attrs(SaxDrive dr, VALUE *attrs, char c, char termc, char term2) {
|
601
|
+
VALUE name = Qnil;
|
602
|
+
int is_encoding = 0;
|
603
|
+
|
604
|
+
dr->str = dr->cur; // lock it down
|
605
|
+
if (is_white(c)) {
|
606
|
+
c = next_non_white(dr);
|
607
|
+
}
|
608
|
+
while (termc != c && term2 != c) {
|
609
|
+
dr->cur--;
|
610
|
+
if ('\0' == c) {
|
611
|
+
sax_drive_error(dr, "invalid format, processing instruction not terminated", 1);
|
612
|
+
return -1;
|
613
|
+
}
|
614
|
+
if ('\0' == (c = read_name_token(dr))) {
|
615
|
+
return -1;
|
616
|
+
}
|
617
|
+
if ('?' == termc && 0 == strcmp("encoding", dr->str)) {
|
618
|
+
is_encoding = 1;
|
619
|
+
}
|
620
|
+
if (dr->has_instruct) {
|
621
|
+
name = str2sym(dr->str);
|
622
|
+
}
|
623
|
+
if (is_white(c)) {
|
624
|
+
c = next_non_white(dr);
|
625
|
+
}
|
626
|
+
if ('=' != c) {
|
627
|
+
sax_drive_error(dr, "invalid format, no attribute value", 1);
|
628
|
+
return -1;
|
629
|
+
}
|
630
|
+
if (0 != read_quoted_value(dr)) {
|
631
|
+
return -1;
|
632
|
+
}
|
633
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
634
|
+
if (is_encoding) {
|
635
|
+
dr->encoding = rb_enc_find(dr->str);
|
636
|
+
}
|
637
|
+
#endif
|
638
|
+
if (dr->has_instruct) {
|
639
|
+
VALUE rstr = rb_str_new2(dr->str);
|
640
|
+
|
641
|
+
if (Qnil == *attrs) {
|
642
|
+
*attrs = rb_hash_new();
|
643
|
+
}
|
644
|
+
#ifdef HAVE_RUBY_ENCODING_H
|
645
|
+
if (0 != dr->encoding) {
|
646
|
+
rb_enc_associate(rstr, dr->encoding);
|
647
|
+
}
|
648
|
+
#endif
|
649
|
+
rb_hash_aset(*attrs, name, rstr);
|
650
|
+
}
|
651
|
+
c = next_non_white(dr);
|
652
|
+
}
|
653
|
+
return 0;
|
654
|
+
}
|
655
|
+
|
656
|
+
static char
|
657
|
+
read_name_token(SaxDrive dr) {
|
658
|
+
char c;
|
659
|
+
|
660
|
+
dr->str = dr->cur; // make sure the start doesn't get compacted out
|
661
|
+
c = sax_drive_get(dr);
|
662
|
+
if (is_white(c)) {
|
663
|
+
c = next_non_white(dr);
|
664
|
+
dr->str = dr->cur - 1;
|
665
|
+
}
|
666
|
+
while (1) {
|
667
|
+
switch (c) {
|
668
|
+
case ' ':
|
669
|
+
case '\t':
|
670
|
+
case '\f':
|
671
|
+
case '?':
|
672
|
+
case '=':
|
673
|
+
case '/':
|
674
|
+
case '>':
|
675
|
+
case '\n':
|
676
|
+
case '\r':
|
677
|
+
*(dr->cur - 1) = '\0';
|
678
|
+
return c;
|
679
|
+
case '\0':
|
680
|
+
// documents never terminate after a name token
|
681
|
+
sax_drive_error(dr, "invalid format, document not terminated", 1);
|
682
|
+
return '\0';
|
683
|
+
default:
|
684
|
+
break;
|
685
|
+
}
|
686
|
+
c = sax_drive_get(dr);
|
687
|
+
}
|
688
|
+
return '\0';
|
689
|
+
}
|
690
|
+
|
691
|
+
static int
|
692
|
+
read_quoted_value(SaxDrive dr) {
|
693
|
+
char c;
|
694
|
+
|
695
|
+
dr->str = dr->cur;
|
696
|
+
c = sax_drive_get(dr);
|
697
|
+
if (is_white(c)) {
|
698
|
+
c = next_non_white(dr);
|
699
|
+
}
|
700
|
+
if ('"' != c) {
|
701
|
+
sax_drive_error(dr, "invalid format, attibute value not in quotes", 1);
|
702
|
+
return -1;
|
703
|
+
}
|
704
|
+
dr->str = dr->cur;
|
705
|
+
while ('"' != (c = sax_drive_get(dr))) {
|
706
|
+
if ('\0' == c) {
|
707
|
+
sax_drive_error(dr, "invalid format, quoted value not terminated", 1);
|
708
|
+
return -1;
|
709
|
+
}
|
710
|
+
}
|
711
|
+
*(dr->cur - 1) = '\0'; // terminate value
|
712
|
+
|
713
|
+
return 0;
|
714
|
+
}
|
715
|
+
|
716
|
+
static int
|
717
|
+
read_from_io(SaxDrive dr) {
|
718
|
+
int ex = 0;
|
719
|
+
|
720
|
+
rb_protect(io_cb, (VALUE)dr, &ex);
|
721
|
+
// printf("*** io_cb exception = %d\n", ex);
|
722
|
+
// An error code of 6 is always returned not matter what kind of Exception is raised.
|
723
|
+
return ex;
|
724
|
+
}
|
725
|
+
|
726
|
+
static VALUE
|
727
|
+
io_cb(VALUE rdr) {
|
728
|
+
SaxDrive dr = (SaxDrive)rdr;
|
729
|
+
VALUE args[1];
|
730
|
+
VALUE rstr;
|
731
|
+
char *str;
|
732
|
+
size_t cnt;
|
733
|
+
|
734
|
+
args[0] = SIZET2NUM(dr->buf_end - dr->cur);
|
735
|
+
rstr = rb_funcall2(dr->io, readpartial_id, 1, args);
|
736
|
+
str = StringValuePtr(rstr);
|
737
|
+
cnt = strlen(str);
|
738
|
+
//printf("*** read %lu bytes, str: '%s'\n", cnt, str);
|
739
|
+
strcpy(dr->cur, str);
|
740
|
+
dr->read_end = dr->cur + cnt;
|
741
|
+
|
742
|
+
return Qnil;
|
743
|
+
}
|
744
|
+
|
745
|
+
static int
|
746
|
+
read_from_fd(SaxDrive dr) {
|
747
|
+
ssize_t cnt;
|
748
|
+
size_t max = dr->buf_end - dr->cur;
|
749
|
+
|
750
|
+
cnt = read(dr->fd, dr->cur, max);
|
751
|
+
if (cnt < 0) {
|
752
|
+
sax_drive_error(dr, "failed to read from file", 1);
|
753
|
+
return -1;
|
754
|
+
} else if (0 != cnt) {
|
755
|
+
dr->read_end = dr->cur + cnt;
|
756
|
+
}
|
757
|
+
return 0;
|
758
|
+
}
|