ox 1.9.4 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of ox might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/README.md +21 -17
- data/ext/ox/cache.c +1 -2
- data/ext/ox/dump.c +11 -11
- data/ext/ox/extconf.rb +4 -2
- data/ext/ox/obj_load.c +1 -10
- data/ext/ox/ox.c +26 -11
- data/ext/ox/ox.h +1 -2
- data/ext/ox/sax.c +713 -1087
- data/ext/ox/sax.h +116 -0
- data/ext/ox/sax_as.c +254 -0
- data/ext/ox/sax_buf.c +262 -0
- data/ext/ox/sax_buf.h +198 -0
- data/ext/ox/sax_has.h +85 -0
- data/ext/ox/sax_hint.c +217 -0
- data/ext/ox/sax_hint.h +50 -0
- data/ext/ox/sax_stack.h +108 -0
- data/lib/ox/version.rb +1 -1
- metadata +19 -14
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 31bc59694575361e4f784ffc8eb8b37fdbb08057
|
4
|
+
data.tar.gz: 9332c70a159d96aa167cc738ac0a0e5bbb2ff6e9
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 8ee7dccef40d6141d12b1d8eee8a5b92e5d0ad4b04a0c253086c4c333961da1894eb6a450031bf6e4476f6d62c0fb4392793786ae9fdcc2dd84db7597ac8b2bf
|
7
|
+
data.tar.gz: 22bed155c466594191d54ac1ed6db28d6a442c54b03836bf94378590af164fc2d2f67713d44ecd4cfedad1c2aec3bb06df5ca029eff746753b2f8715e2f6f8c9
|
data/README.md
CHANGED
@@ -34,31 +34,29 @@ A fast XML parser and Object marshaller as a Ruby gem.
|
|
34
34
|
|
35
35
|
## <a name="release">Release Notes</a>
|
36
36
|
|
37
|
-
### Release
|
37
|
+
### Release 2.0.0
|
38
38
|
|
39
|
-
- SAX
|
39
|
+
- The SAX parser went through a significant re-write. The options have changed. It is now 15% faster on large files and
|
40
|
+
much better at recovering from errors. So much so that the tolerant option was removed and is now the default and
|
41
|
+
only behavior. A smart option was added however. The smart option recognizes a file as an HTML file and will apply a
|
42
|
+
simple set of validation rules that allow the HTML to be parsed more reasonably. Errors will cause callbacks but the
|
43
|
+
parsing continues with the best guess as to how to recover. Rubymaniac has helped with testing and prompted the
|
44
|
+
rewrite to support parsing HTML pages.
|
40
45
|
|
41
|
-
|
46
|
+
- HTML is now supported with the SAX parser. The parser knows some tags like \<br\> or \<img\> do not have to be
|
47
|
+
closed. Other hints as to how to parse and when to raise errors are also included. The parser does it's best to
|
48
|
+
continue parsing even after errors.
|
42
49
|
|
43
|
-
-
|
50
|
+
- Added symbolize option to the sax parser. This option, if set to false will use strings instead of symbols for
|
51
|
+
element and attribute names.
|
44
52
|
|
45
|
-
-
|
46
|
-
|
47
|
-
at least parsed results are returned.
|
48
|
-
|
49
|
-
- Attribute values need not be quoted or they can be quoted with single
|
50
|
-
quotes or there can be no =value are all.
|
51
|
-
|
52
|
-
- Elements not terminated will be terminated by the next element
|
53
|
-
termination. This effect goes up until a match is found on the element
|
54
|
-
name.
|
55
|
-
|
56
|
-
- SAX parser also given a :tolerant option with the same tolerance as the string parser.
|
53
|
+
- A contrib directory was added for people to submit useful bits of code that can be used with Ox. The first
|
54
|
+
contributor is Notezen with a nice way of building XML.
|
57
55
|
|
58
56
|
## <a name="description">Description</a>
|
59
57
|
|
60
58
|
Optimized XML (Ox), as the name implies was written to provide speed optimized
|
61
|
-
XML handling. It was designed to be an alternative to Nokogiri and other Ruby
|
59
|
+
XML and now HTML handling. It was designed to be an alternative to Nokogiri and other Ruby
|
62
60
|
XML parsers in generic XML parsing and as an alternative to Marshal for Object
|
63
61
|
serialization.
|
64
62
|
|
@@ -99,6 +97,7 @@ Ox is compatible with Ruby 1.8.7, 1.9.2, JRuby, and RBX.
|
|
99
97
|
|
100
98
|
### Object Dump Sample:
|
101
99
|
|
100
|
+
```ruby
|
102
101
|
require 'ox'
|
103
102
|
|
104
103
|
class Sample
|
@@ -117,9 +116,11 @@ Ox is compatible with Ruby 1.8.7, 1.9.2, JRuby, and RBX.
|
|
117
116
|
xml = Ox.dump(obj)
|
118
117
|
# Convert the object back into a Sample Object.
|
119
118
|
obj2 = Ox.parse_obj(xml)
|
119
|
+
```
|
120
120
|
|
121
121
|
### Generic XML Writing and Parsing:
|
122
122
|
|
123
|
+
```ruby
|
123
124
|
require 'ox'
|
124
125
|
|
125
126
|
doc = Ox::Document.new(:version => '1.0')
|
@@ -148,9 +149,11 @@ Ox is compatible with Ruby 1.8.7, 1.9.2, JRuby, and RBX.
|
|
148
149
|
doc2 = Ox.parse(xml)
|
149
150
|
puts "Same? #{doc == doc2}"
|
150
151
|
# true
|
152
|
+
```
|
151
153
|
|
152
154
|
### SAX XML Parsing:
|
153
155
|
|
156
|
+
```ruby
|
154
157
|
require 'stringio'
|
155
158
|
require 'ox'
|
156
159
|
|
@@ -181,6 +184,7 @@ Ox is compatible with Ruby 1.8.7, 1.9.2, JRuby, and RBX.
|
|
181
184
|
# end: bottom
|
182
185
|
# end: middle
|
183
186
|
# end: top
|
187
|
+
```
|
184
188
|
|
185
189
|
### Object XML format
|
186
190
|
|
data/ext/ox/cache.c
CHANGED
@@ -63,7 +63,7 @@ ox_cache_new(Cache *cache) {
|
|
63
63
|
*cache = ALLOC(struct _Cache);
|
64
64
|
(*cache)->key = 0;
|
65
65
|
(*cache)->value = Qundef;
|
66
|
-
|
66
|
+
memset((*cache)->slots, 0, sizeof((*cache)->slots));
|
67
67
|
}
|
68
68
|
|
69
69
|
VALUE
|
@@ -131,7 +131,6 @@ ox_cache_get(Cache cache, const char *key, VALUE **slot, char **keyp) {
|
|
131
131
|
*slot = &cache->value;
|
132
132
|
if (0 != keyp) {
|
133
133
|
if (0 == cache->key) {
|
134
|
-
// TBD bug somewhere
|
135
134
|
printf("*** Error: failed to set the key for %s\n", key);
|
136
135
|
*keyp = 0;
|
137
136
|
} else {
|
data/ext/ox/dump.c
CHANGED
@@ -595,9 +595,9 @@ dump_obj(ID aid, VALUE obj, unsigned int depth, Out out) {
|
|
595
595
|
e.closed = (0 >= cnt);
|
596
596
|
out->w_start(out, &e);
|
597
597
|
if (!e.closed) {
|
598
|
-
VALUE
|
599
|
-
int
|
600
|
-
int
|
598
|
+
const VALUE *np = RARRAY_PTR(obj);
|
599
|
+
int i;
|
600
|
+
int d2 = depth + 1;
|
601
601
|
|
602
602
|
for (i = cnt; 0 < i; i--, np++) {
|
603
603
|
dump_obj(0, *np, d2, out);
|
@@ -835,10 +835,10 @@ dump_obj(ID aid, VALUE obj, unsigned int depth, Out out) {
|
|
835
835
|
e.closed = (0 >= cnt);
|
836
836
|
out->w_start(out, &e);
|
837
837
|
if (0 < cnt) {
|
838
|
-
VALUE
|
839
|
-
ID
|
840
|
-
unsigned int
|
841
|
-
int
|
838
|
+
const VALUE *np = RARRAY_PTR(vars);
|
839
|
+
ID vid;
|
840
|
+
unsigned int od = out->depth;
|
841
|
+
int i;
|
842
842
|
|
843
843
|
out->depth = depth + 1;
|
844
844
|
for (i = cnt; 0 < i; i--, np++) {
|
@@ -1105,9 +1105,9 @@ dump_gen_nodes(VALUE obj, unsigned int depth, Out out) {
|
|
1105
1105
|
int indent_needed = 1;
|
1106
1106
|
|
1107
1107
|
if (0 < cnt) {
|
1108
|
-
VALUE
|
1109
|
-
VALUE
|
1110
|
-
int
|
1108
|
+
const VALUE *np = RARRAY_PTR(obj);
|
1109
|
+
VALUE clas;
|
1110
|
+
int d2 = depth + 1;
|
1111
1111
|
|
1112
1112
|
for (; 0 < cnt; cnt--, np++) {
|
1113
1113
|
clas = rb_obj_class(*np);
|
@@ -1117,7 +1117,7 @@ dump_gen_nodes(VALUE obj, unsigned int depth, Out out) {
|
|
1117
1117
|
dump_gen_instruct(*np, d2, out);
|
1118
1118
|
indent_needed = (1 == cnt) ? 0 : 1;
|
1119
1119
|
} else if (rb_cString == clas) {
|
1120
|
-
dump_str_value(out, StringValuePtr(*np), RSTRING_LEN(*np));
|
1120
|
+
dump_str_value(out, StringValuePtr(*(VALUE*)np), RSTRING_LEN(*np));
|
1121
1121
|
indent_needed = (1 == cnt) ? 0 : 1;
|
1122
1122
|
} else if (ox_comment_clas == clas) {
|
1123
1123
|
dump_gen_val_node(*np, d2, "<!-- ", 5, " -->", 4, out);
|
data/ext/ox/extconf.rb
CHANGED
@@ -4,7 +4,7 @@ extension_name = 'ox'
|
|
4
4
|
dir_config(extension_name)
|
5
5
|
|
6
6
|
parts = RUBY_DESCRIPTION.split(' ')
|
7
|
-
type = parts[0]
|
7
|
+
type = parts[0].downcase()
|
8
8
|
type = 'ree' if 'ruby' == type && RUBY_DESCRIPTION.include?('Ruby Enterprise Edition')
|
9
9
|
platform = RUBY_PLATFORM
|
10
10
|
version = RUBY_VERSION.split('.')
|
@@ -21,8 +21,10 @@ dflags = {
|
|
21
21
|
#'HAS_RB_TIME_TIMESPEC' => ('ruby' == type && ('1.9.3' == RUBY_VERSION || '2' <= version[0])) ? 1 : 0,
|
22
22
|
'HAS_TM_GMTOFF' => ('ruby' == type && (('1' == version[0] && '9' == version[1]) || '2' <= version[0]) &&
|
23
23
|
!(platform.include?('cygwin') || platform.include?('solaris') || platform.include?('linux') || RUBY_PLATFORM =~ /(win|w)32$/)) ? 1 : 0,
|
24
|
-
'HAS_ENCODING_SUPPORT' => (('ruby' == type || 'rubinius' == type) &&
|
24
|
+
'HAS_ENCODING_SUPPORT' => (('ruby' == type || 'rubinius' == type || 'macruby' == type) &&
|
25
25
|
(('1' == version[0] && '9' == version[1]) || '2' <= version[0])) ? 1 : 0,
|
26
|
+
'HAS_ONIG' => (('ruby' == type || 'jruby' == type || 'rubinius' == type) &&
|
27
|
+
(('1' == version[0] && '9' == version[1]) || '2' <= version[0])) ? 1 : 0,
|
26
28
|
'HAS_PRIVATE_ENCODING' => ('jruby' == type && '1' == version[0] && '9' == version[1]) ? 1 : 0,
|
27
29
|
'HAS_NANO_TIME' => ('ruby' == type && ('1' == version[0] && '9' == version[1]) || '2' <= version[0]) ? 1 : 0,
|
28
30
|
'HAS_RSTRUCT' => ('ruby' == type || 'ree' == type) ? 1 : 0,
|
data/ext/ox/obj_load.c
CHANGED
@@ -382,16 +382,7 @@ parse_regexp(const char *text) {
|
|
382
382
|
int options = 0;
|
383
383
|
|
384
384
|
te = text + strlen(text) - 1;
|
385
|
-
#if
|
386
|
-
for (; text < te && '/' != *te; te--) {
|
387
|
-
switch (*te) {
|
388
|
-
case 'i': options |= ONIG_OPTION_IGNORECASE; break;
|
389
|
-
case 'm': options |= ONIG_OPTION_MULTILINE; break;
|
390
|
-
case 'x': options |= ONIG_OPTION_EXTEND; break;
|
391
|
-
default: break;
|
392
|
-
}
|
393
|
-
}
|
394
|
-
#elif HAS_PRIVATE_ENCODING
|
385
|
+
#if HAS_ONIG
|
395
386
|
for (; text < te && '/' != *te; te--) {
|
396
387
|
switch (*te) {
|
397
388
|
case 'i': options |= ONIG_OPTION_IGNORECASE; break;
|
data/ext/ox/ox.c
CHANGED
@@ -35,6 +35,7 @@
|
|
35
35
|
|
36
36
|
#include "ruby.h"
|
37
37
|
#include "ox.h"
|
38
|
+
#include "sax.h"
|
38
39
|
|
39
40
|
/* maximum to allocate on the stack, arbitrary limit */
|
40
41
|
#define SMALL_XML 65536
|
@@ -128,8 +129,9 @@ static VALUE object_sym;
|
|
128
129
|
static VALUE opt_format_sym;
|
129
130
|
static VALUE optimized_sym;
|
130
131
|
static VALUE strict_sym;
|
131
|
-
static VALUE
|
132
|
+
static VALUE smart_sym;
|
132
133
|
static VALUE symbolize_keys_sym;
|
134
|
+
static VALUE symbolize_sym;
|
133
135
|
static VALUE tolerant_sym;
|
134
136
|
static VALUE trace_sym;
|
135
137
|
static VALUE with_dtd_sym;
|
@@ -306,7 +308,7 @@ set_def_opts(VALUE self, VALUE opts) {
|
|
306
308
|
} else {
|
307
309
|
Check_Type(v, T_STRING);
|
308
310
|
strncpy(ox_default_options.encoding, StringValuePtr(v), sizeof(ox_default_options.encoding) - 1);
|
309
|
-
#
|
311
|
+
#if HAS_ENCODING_SUPPORT
|
310
312
|
ox_default_options.rb_enc = rb_enc_find(ox_default_options.encoding);
|
311
313
|
#elif HAS_PRIVATE_ENCODING
|
312
314
|
ox_default_options.rb_enc = rb_str_new2(ox_default_options.encoding);
|
@@ -472,7 +474,7 @@ load(char *xml, int argc, VALUE *argv, VALUE self, VALUE encoding) {
|
|
472
474
|
options.sym_keys = (Qfalse == v) ? No : Yes;
|
473
475
|
}
|
474
476
|
}
|
475
|
-
#
|
477
|
+
#if HAS_ENCODING_SUPPORT
|
476
478
|
if ('\0' == *options.encoding) {
|
477
479
|
if (Qnil != encoding) {
|
478
480
|
options.rb_enc = rb_enc_from_index(rb_enc_get_index(encoding));
|
@@ -548,8 +550,12 @@ load_str(int argc, VALUE *argv, VALUE self) {
|
|
548
550
|
} else {
|
549
551
|
xml = ALLOCA_N(char, len);
|
550
552
|
}
|
551
|
-
#
|
553
|
+
#if HAS_ENCODING_SUPPORT
|
554
|
+
#ifdef MACRUBY_RUBY
|
555
|
+
encoding = rb_funcall(*argv, rb_intern("encoding"), 0);
|
556
|
+
#else
|
552
557
|
encoding = rb_obj_encoding(*argv);
|
558
|
+
#endif
|
553
559
|
#elif HAS_PRIVATE_ENCODING
|
554
560
|
encoding = rb_funcall(*argv, rb_intern("encoding"), 0);
|
555
561
|
#else
|
@@ -623,12 +629,16 @@ load_file(int argc, VALUE *argv, VALUE self) {
|
|
623
629
|
* @param [IO|String] io IO Object to read from
|
624
630
|
* @param [Hash] options parse options
|
625
631
|
* @param [true|false] :convert_special flag indicating special characters like < are converted
|
626
|
-
* @param [true|false] :
|
632
|
+
* @param [true|false] :symbolize flag indicating the parser symbolize element and attribute names
|
633
|
+
* @param [true|false] :smart flag indicating the parser use hints if available (use with html)
|
627
634
|
*/
|
628
635
|
static VALUE
|
629
636
|
sax_parse(int argc, VALUE *argv, VALUE self) {
|
630
|
-
|
631
|
-
|
637
|
+
struct _SaxOptions options;
|
638
|
+
|
639
|
+
options.symbolize = 1;
|
640
|
+
options.convert_special = 0;
|
641
|
+
options.smart = 0;
|
632
642
|
|
633
643
|
if (argc < 2) {
|
634
644
|
rb_raise(ox_parse_error_class, "Wrong number of arguments to sax_parse.\n");
|
@@ -638,13 +648,16 @@ sax_parse(int argc, VALUE *argv, VALUE self) {
|
|
638
648
|
VALUE v;
|
639
649
|
|
640
650
|
if (Qnil != (v = rb_hash_lookup(h, convert_special_sym))) {
|
641
|
-
|
651
|
+
options.convert_special = (Qtrue == v);
|
652
|
+
}
|
653
|
+
if (Qnil != (v = rb_hash_lookup(h, smart_sym))) {
|
654
|
+
options.smart = (Qtrue == v);
|
642
655
|
}
|
643
|
-
if (Qnil != (v = rb_hash_lookup(h,
|
644
|
-
|
656
|
+
if (Qnil != (v = rb_hash_lookup(h, symbolize_sym))) {
|
657
|
+
options.symbolize = (Qtrue == v);
|
645
658
|
}
|
646
659
|
}
|
647
|
-
ox_sax_parse(argv[0], argv[1],
|
660
|
+
ox_sax_parse(argv[0], argv[1], &options);
|
648
661
|
|
649
662
|
return Qnil;
|
650
663
|
}
|
@@ -875,8 +888,10 @@ void Init_ox() {
|
|
875
888
|
opt_format_sym = ID2SYM(rb_intern("opt_format")); rb_gc_register_address(&opt_format_sym);
|
876
889
|
optimized_sym = ID2SYM(rb_intern("optimized")); rb_gc_register_address(&optimized_sym);
|
877
890
|
ox_encoding_sym = ID2SYM(rb_intern("encoding")); rb_gc_register_address(&ox_encoding_sym);
|
891
|
+
smart_sym = ID2SYM(rb_intern("smart")); rb_gc_register_address(&smart_sym);
|
878
892
|
strict_sym = ID2SYM(rb_intern("strict")); rb_gc_register_address(&strict_sym);
|
879
893
|
symbolize_keys_sym = ID2SYM(rb_intern("symbolize_keys")); rb_gc_register_address(&symbolize_keys_sym);
|
894
|
+
symbolize_sym = ID2SYM(rb_intern("symbolize")); rb_gc_register_address(&symbolize_sym);
|
880
895
|
tolerant_sym = ID2SYM(rb_intern("tolerant")); rb_gc_register_address(&tolerant_sym);
|
881
896
|
trace_sym = ID2SYM(rb_intern("trace")); rb_gc_register_address(&trace_sym);
|
882
897
|
with_dtd_sym = ID2SYM(rb_intern("with_dtd")); rb_gc_register_address(&with_dtd_sym);
|
data/ext/ox/ox.h
CHANGED
@@ -186,7 +186,7 @@ typedef struct _Options {
|
|
186
186
|
char mode; /* LoadMode */
|
187
187
|
char effort; /* Effort */
|
188
188
|
char sym_keys; /* symbolize keys */
|
189
|
-
#
|
189
|
+
#if HAS_ENCODING_SUPPORT
|
190
190
|
rb_encoding *rb_enc;
|
191
191
|
#elif HAS_PRIVATE_ENCODING
|
192
192
|
VALUE rb_enc;
|
@@ -212,7 +212,6 @@ struct _PInfo {
|
|
212
212
|
extern VALUE ox_parse(char *xml, ParseCallbacks pcb, char **endp, Options options);
|
213
213
|
extern void _ox_raise_error(const char *msg, const char *xml, const char *current, const char* file, int line);
|
214
214
|
|
215
|
-
extern void ox_sax_parse(VALUE handler, VALUE io, int convert, int tolerant);
|
216
215
|
extern void ox_sax_define(void);
|
217
216
|
|
218
217
|
extern char* ox_write_obj_to_str(VALUE obj, Options copts);
|
data/ext/ox/sax.c
CHANGED
@@ -1,21 +1,21 @@
|
|
1
1
|
/* sax.c
|
2
2
|
* Copyright (c) 2011, Peter Ohler
|
3
3
|
* All rights reserved.
|
4
|
-
*
|
4
|
+
*
|
5
5
|
* Redistribution and use in source and binary forms, with or without
|
6
6
|
* modification, are permitted provided that the following conditions are met:
|
7
|
-
*
|
7
|
+
*
|
8
8
|
* - Redistributions of source code must retain the above copyright notice, this
|
9
9
|
* list of conditions and the following disclaimer.
|
10
|
-
*
|
10
|
+
*
|
11
11
|
* - Redistributions in binary form must reproduce the above copyright notice,
|
12
12
|
* this list of conditions and the following disclaimer in the documentation
|
13
13
|
* and/or other materials provided with the distribution.
|
14
|
-
*
|
14
|
+
*
|
15
15
|
* - Neither the name of Peter Ohler nor the names of its contributors may be
|
16
16
|
* used to endorse or promote products derived from this software without
|
17
17
|
* specific prior written permission.
|
18
|
-
*
|
18
|
+
*
|
19
19
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
20
20
|
* AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
21
21
|
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
@@ -34,93 +34,59 @@
|
|
34
34
|
#include <strings.h>
|
35
35
|
#include <sys/types.h>
|
36
36
|
#if NEEDS_UIO
|
37
|
-
#include <sys/uio.h>
|
37
|
+
#include <sys/uio.h>
|
38
38
|
#endif
|
39
39
|
#include <unistd.h>
|
40
40
|
#include <time.h>
|
41
41
|
|
42
42
|
#include "ruby.h"
|
43
43
|
#include "ox.h"
|
44
|
+
#include "sax.h"
|
45
|
+
#include "sax_stack.h"
|
46
|
+
#include "sax_buf.h"
|
44
47
|
|
45
48
|
#define NAME_MISMATCH 1
|
46
49
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
int has_cdata;
|
73
|
-
int has_text;
|
74
|
-
int has_value;
|
75
|
-
int has_start_element;
|
76
|
-
int has_end_element;
|
77
|
-
int has_error;
|
78
|
-
int has_line;
|
79
|
-
int has_column;
|
80
|
-
#if HAS_ENCODING_SUPPORT
|
81
|
-
rb_encoding *encoding;
|
82
|
-
#elif HAS_PRIVATE_ENCODING
|
83
|
-
VALUE encoding;
|
84
|
-
#endif
|
85
|
-
} *SaxDrive;
|
86
|
-
|
87
|
-
#ifdef NEEDS_STPCPY
|
88
|
-
char *stpncpy(char *dest, const char *src, size_t n);
|
89
|
-
#endif
|
90
|
-
static void sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, int convert, int tolerant);
|
91
|
-
static void sax_drive_cleanup(SaxDrive dr);
|
92
|
-
static int sax_drive_read(SaxDrive dr);
|
93
|
-
static void sax_drive_error(SaxDrive dr, const char *msg, int critical);
|
94
|
-
|
95
|
-
static int read_children(SaxDrive dr, int first);
|
96
|
-
static int read_instruction(SaxDrive dr);
|
97
|
-
static int read_doctype(SaxDrive dr);
|
98
|
-
static int read_cdata(SaxDrive dr);
|
99
|
-
static int read_comment(SaxDrive dr);
|
100
|
-
static int read_element(SaxDrive dr);
|
101
|
-
static int read_text(SaxDrive dr);
|
102
|
-
static const char* read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml);
|
50
|
+
#define START_STATE 1
|
51
|
+
#define BODY_STATE 2
|
52
|
+
#define AFTER_STATE 3
|
53
|
+
|
54
|
+
// error prefixes
|
55
|
+
#define BAD_BOM "Bad BOM: "
|
56
|
+
#define NO_TERM "Not Terminated: "
|
57
|
+
#define INVALID_FORMAT "Invalid Format: "
|
58
|
+
#define CASE_ERROR "Case Error: "
|
59
|
+
#define OUT_OF_ORDER "Out of Order: "
|
60
|
+
#define WRONG_CHAR "Unexpected Character: "
|
61
|
+
#define EL_MISMATCH "Start End Mismatch: "
|
62
|
+
#define INV_ELEMENT "Invalid Element: "
|
63
|
+
|
64
|
+
static void sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, SaxOptions options);
|
65
|
+
static void parse(SaxDrive dr);
|
66
|
+
// All read functions should return the next character after the 'thing' that was read and leave dr->cur one after that.
|
67
|
+
static char read_instruction(SaxDrive dr);
|
68
|
+
static char read_doctype(SaxDrive dr);
|
69
|
+
static char read_cdata(SaxDrive dr);
|
70
|
+
static char read_comment(SaxDrive dr);
|
71
|
+
static char read_element_start(SaxDrive dr);
|
72
|
+
static char read_element_end(SaxDrive dr);
|
73
|
+
static char read_text(SaxDrive dr);
|
74
|
+
static char read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req);
|
103
75
|
static char read_name_token(SaxDrive dr);
|
104
|
-
static
|
105
|
-
static int collapse_special(char *str, int tolerant);
|
106
|
-
|
107
|
-
static VALUE rescue_cb(VALUE rdr, VALUE err);
|
108
|
-
static VALUE io_cb(VALUE rdr);
|
109
|
-
static VALUE partial_io_cb(VALUE rdr);
|
110
|
-
static int read_from_io(SaxDrive dr);
|
111
|
-
#ifndef JRUBY_RUBY
|
112
|
-
static int read_from_fd(SaxDrive dr);
|
113
|
-
#endif
|
114
|
-
static int read_from_io_partial(SaxDrive dr);
|
115
|
-
static int read_from_str(SaxDrive dr);
|
76
|
+
static char read_quoted_value(SaxDrive dr);
|
116
77
|
|
117
|
-
static VALUE
|
78
|
+
static void end_element_cb(SaxDrive dr, VALUE name, int line, int col);
|
79
|
+
|
80
|
+
static void hint_clear_empty(SaxDrive dr);
|
81
|
+
static Nv hint_try_close(SaxDrive dr, const char *name);
|
82
|
+
|
83
|
+
VALUE ox_sax_value_class = Qnil;
|
118
84
|
|
119
85
|
/* This is only for CentOS 5.4 with Ruby 1.9.3-p0 and for OS X 10.6 and Solaris 10. */
|
120
86
|
#ifdef NEEDS_STPCPY
|
121
87
|
char *stpncpy(char *dest, const char *src, size_t n) {
|
122
88
|
size_t cnt = strlen(src) + 1;
|
123
|
-
|
89
|
+
|
124
90
|
if (n < cnt) {
|
125
91
|
cnt = n;
|
126
92
|
}
|
@@ -130,246 +96,54 @@ char *stpncpy(char *dest, const char *src, size_t n) {
|
|
130
96
|
}
|
131
97
|
#endif
|
132
98
|
|
133
|
-
static inline char
|
134
|
-
sax_drive_get(SaxDrive dr) {
|
135
|
-
if (dr->read_end <= dr->cur) {
|
136
|
-
if (0 != sax_drive_read(dr)) {
|
137
|
-
return 0;
|
138
|
-
}
|
139
|
-
}
|
140
|
-
if ('\n' == *dr->cur) {
|
141
|
-
dr->line++;
|
142
|
-
dr->col = 0;
|
143
|
-
}
|
144
|
-
dr->col++;
|
145
|
-
|
146
|
-
return *dr->cur++;
|
147
|
-
}
|
148
|
-
|
149
|
-
static inline void
|
150
|
-
backup(SaxDrive dr) {
|
151
|
-
dr->cur--;
|
152
|
-
dr->col--; // should reverse wrap but not worth it
|
153
|
-
}
|
154
|
-
|
155
|
-
static inline void
|
156
|
-
reset_reader(SaxDrive dr, char *cur, int line, int col) {
|
157
|
-
dr->cur = cur;
|
158
|
-
dr->line = line;
|
159
|
-
dr->col = col;
|
160
|
-
}
|
161
|
-
|
162
|
-
|
163
|
-
/* Starts by reading a character so it is safe to use with an empty or
|
164
|
-
* compacted buffer.
|
165
|
-
*/
|
166
|
-
inline static char
|
167
|
-
next_non_white(SaxDrive dr) {
|
168
|
-
char c;
|
169
|
-
|
170
|
-
while ('\0' != (c = sax_drive_get(dr))) {
|
171
|
-
switch(c) {
|
172
|
-
case ' ':
|
173
|
-
case '\t':
|
174
|
-
case '\f':
|
175
|
-
case '\n':
|
176
|
-
case '\r':
|
177
|
-
break;
|
178
|
-
default:
|
179
|
-
return c;
|
180
|
-
}
|
181
|
-
}
|
182
|
-
return '\0';
|
183
|
-
}
|
184
|
-
|
185
|
-
/* Starts by reading a character so it is safe to use with an empty or
|
186
|
-
* compacted buffer.
|
187
|
-
*/
|
188
|
-
inline static char
|
189
|
-
next_white(SaxDrive dr) {
|
190
|
-
char c;
|
191
|
-
|
192
|
-
while ('\0' != (c = sax_drive_get(dr))) {
|
193
|
-
switch(c) {
|
194
|
-
case ' ':
|
195
|
-
case '\t':
|
196
|
-
case '\f':
|
197
|
-
case '\n':
|
198
|
-
case '\r':
|
199
|
-
case '\0':
|
200
|
-
return c;
|
201
|
-
default:
|
202
|
-
break;
|
203
|
-
}
|
204
|
-
}
|
205
|
-
return '\0';
|
206
|
-
}
|
207
|
-
|
208
|
-
inline static int
|
209
|
-
is_white(char c) {
|
210
|
-
switch(c) {
|
211
|
-
case ' ':
|
212
|
-
case '\t':
|
213
|
-
case '\f':
|
214
|
-
case '\n':
|
215
|
-
case '\r':
|
216
|
-
return 1;
|
217
|
-
default:
|
218
|
-
break;
|
219
|
-
}
|
220
|
-
return 0;
|
221
|
-
}
|
222
|
-
|
223
|
-
inline static VALUE
|
224
|
-
str2sym(const char *str, SaxDrive dr, char **strp) {
|
225
|
-
VALUE *slot;
|
226
|
-
VALUE sym;
|
227
|
-
|
228
|
-
if (Qundef == (sym = ox_cache_get(ox_symbol_cache, str, &slot, strp))) {
|
229
|
-
#if HAS_ENCODING_SUPPORT
|
230
|
-
if (0 != dr->encoding) {
|
231
|
-
VALUE rstr = rb_str_new2(str);
|
232
|
-
|
233
|
-
rb_enc_associate(rstr, dr->encoding);
|
234
|
-
sym = rb_funcall(rstr, ox_to_sym_id, 0);
|
235
|
-
} else {
|
236
|
-
sym = ID2SYM(rb_intern(str));
|
237
|
-
}
|
238
|
-
#elif HAS_PRIVATE_ENCODING
|
239
|
-
if (Qnil != dr->encoding) {
|
240
|
-
VALUE rstr = rb_str_new2(str);
|
241
|
-
|
242
|
-
rb_funcall(rstr, ox_force_encoding_id, 1, dr->encoding);
|
243
|
-
sym = rb_funcall(rstr, ox_to_sym_id, 0);
|
244
|
-
} else {
|
245
|
-
sym = ID2SYM(rb_intern(str));
|
246
|
-
}
|
247
|
-
#else
|
248
|
-
sym = ID2SYM(rb_intern(str));
|
249
|
-
#endif
|
250
|
-
*slot = sym;
|
251
|
-
}
|
252
|
-
return sym;
|
253
|
-
}
|
254
|
-
|
255
|
-
|
256
99
|
void
|
257
|
-
ox_sax_parse(VALUE handler, VALUE io,
|
100
|
+
ox_sax_parse(VALUE handler, VALUE io, SaxOptions options) {
|
258
101
|
struct _SaxDrive dr;
|
259
|
-
|
260
|
-
sax_drive_init(&dr, handler, io,
|
102
|
+
|
103
|
+
sax_drive_init(&dr, handler, io, options);
|
261
104
|
#if 0
|
262
105
|
printf("*** sax_parse with these flags\n");
|
263
|
-
printf(" has_instruct = %s\n", dr.
|
264
|
-
printf(" has_end_instruct = %s\n", dr.
|
265
|
-
printf(" has_attr = %s\n", dr.
|
266
|
-
printf(" has_attr_value = %s\n", dr.
|
267
|
-
printf(" has_doctype = %s\n", dr.
|
268
|
-
printf(" has_comment = %s\n", dr.
|
269
|
-
printf(" has_cdata = %s\n", dr.
|
270
|
-
printf(" has_text = %s\n", dr.
|
271
|
-
printf(" has_value = %s\n", dr.
|
272
|
-
printf(" has_start_element = %s\n", dr.
|
273
|
-
printf(" has_end_element = %s\n", dr.
|
274
|
-
printf(" has_error = %s\n", dr.
|
275
|
-
printf(" has_line = %s\n", dr.
|
276
|
-
printf(" has_column = %s\n", dr.
|
277
|
-
#endif
|
278
|
-
read_children(&dr, 1);
|
279
|
-
sax_drive_cleanup(&dr);
|
280
|
-
}
|
281
|
-
|
282
|
-
inline static int
|
283
|
-
respond_to(VALUE obj, ID method) {
|
284
|
-
#ifdef JRUBY_RUBY
|
285
|
-
/* There is a bug in JRuby where rb_respond_to() returns true (1) even if
|
286
|
-
* a method is private. */
|
287
|
-
{
|
288
|
-
VALUE args[1];
|
289
|
-
|
290
|
-
*args = ID2SYM(method);
|
291
|
-
return (Qtrue == rb_funcall2(obj, rb_intern("respond_to?"), 1, args));
|
292
|
-
}
|
293
|
-
#else
|
294
|
-
return rb_respond_to(obj, method);
|
106
|
+
printf(" has_instruct = %s\n", dr.has.instruct ? "true" : "false");
|
107
|
+
printf(" has_end_instruct = %s\n", dr.has.end_instruct ? "true" : "false");
|
108
|
+
printf(" has_attr = %s\n", dr.has.attr ? "true" : "false");
|
109
|
+
printf(" has_attr_value = %s\n", dr.has.attr_value ? "true" : "false");
|
110
|
+
printf(" has_doctype = %s\n", dr.has.doctype ? "true" : "false");
|
111
|
+
printf(" has_comment = %s\n", dr.has.comment ? "true" : "false");
|
112
|
+
printf(" has_cdata = %s\n", dr.has.cdata ? "true" : "false");
|
113
|
+
printf(" has_text = %s\n", dr.has.text ? "true" : "false");
|
114
|
+
printf(" has_value = %s\n", dr.has.value ? "true" : "false");
|
115
|
+
printf(" has_start_element = %s\n", dr.has.start_element ? "true" : "false");
|
116
|
+
printf(" has_end_element = %s\n", dr.has.end_element ? "true" : "false");
|
117
|
+
printf(" has_error = %s\n", dr.has.error ? "true" : "false");
|
118
|
+
printf(" has_line = %s\n", dr.has.line ? "true" : "false");
|
119
|
+
printf(" has_column = %s\n", dr.has.column ? "true" : "false");
|
295
120
|
#endif
|
121
|
+
parse(&dr);
|
122
|
+
ox_sax_drive_cleanup(&dr);
|
296
123
|
}
|
297
124
|
|
298
125
|
static void
|
299
|
-
sax_drive_init(SaxDrive dr, VALUE handler, VALUE io,
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
dr->read_func = read_from_str;
|
304
|
-
dr->in_str = StringValuePtr(s);
|
305
|
-
} else if (rb_respond_to(io, ox_readpartial_id)) {
|
306
|
-
#ifdef JRUBY_RUBY
|
307
|
-
dr->read_func = read_from_io_partial;
|
308
|
-
dr->io = io;
|
309
|
-
#else
|
310
|
-
VALUE rfd;
|
311
|
-
|
312
|
-
if (rb_respond_to(io, ox_fileno_id) && Qnil != (rfd = rb_funcall(io, ox_fileno_id, 0))) {
|
313
|
-
dr->read_func = read_from_fd;
|
314
|
-
dr->fd = FIX2INT(rfd);
|
315
|
-
} else {
|
316
|
-
dr->read_func = read_from_io_partial;
|
317
|
-
dr->io = io;
|
318
|
-
}
|
319
|
-
#endif
|
320
|
-
} else if (rb_respond_to(io, ox_read_id)) {
|
321
|
-
#ifdef JRUBY_RUBY
|
322
|
-
dr->read_func = read_from_io;
|
323
|
-
dr->io = io;
|
324
|
-
#else
|
325
|
-
VALUE rfd;
|
326
|
-
|
327
|
-
if (rb_respond_to(io, ox_fileno_id) && Qnil != (rfd = rb_funcall(io, ox_fileno_id, 0))) {
|
328
|
-
dr->read_func = read_from_fd;
|
329
|
-
dr->fd = FIX2INT(rfd);
|
330
|
-
} else {
|
331
|
-
dr->read_func = read_from_io;
|
332
|
-
dr->io = io;
|
333
|
-
}
|
334
|
-
#endif
|
335
|
-
} else {
|
336
|
-
rb_raise(ox_arg_error_class, "sax_parser io argument must respond to readpartial() or read().\n");
|
337
|
-
}
|
338
|
-
dr->buf = dr->base_buf;
|
339
|
-
*dr->buf = '\0';
|
340
|
-
dr->buf_end = dr->buf + sizeof(dr->base_buf) - 1; /* 1 less to make debugging easier */
|
341
|
-
dr->cur = dr->buf;
|
342
|
-
dr->read_end = dr->buf;
|
343
|
-
dr->str = 0;
|
344
|
-
dr->line = 1;
|
345
|
-
dr->col = 0;
|
126
|
+
sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, SaxOptions options) {
|
127
|
+
ox_sax_buf_init(&dr->buf, io);
|
128
|
+
dr->buf.dr = dr;
|
129
|
+
stack_init(&dr->stack);
|
346
130
|
dr->handler = handler;
|
347
|
-
dr->value_obj = rb_data_object_alloc(
|
131
|
+
dr->value_obj = rb_data_object_alloc(ox_sax_value_class, dr, 0, 0);
|
348
132
|
rb_gc_register_address(&dr->value_obj);
|
349
|
-
dr->
|
350
|
-
dr->
|
351
|
-
dr->
|
352
|
-
dr->
|
353
|
-
dr->has_attr = respond_to(handler, ox_attr_id);
|
354
|
-
dr->has_attr_value = respond_to(handler, ox_attr_value_id);
|
355
|
-
dr->has_doctype = respond_to(handler, ox_doctype_id);
|
356
|
-
dr->has_comment = respond_to(handler, ox_comment_id);
|
357
|
-
dr->has_cdata = respond_to(handler, ox_cdata_id);
|
358
|
-
dr->has_text = respond_to(handler, ox_text_id);
|
359
|
-
dr->has_value = respond_to(handler, ox_value_id);
|
360
|
-
dr->has_start_element = respond_to(handler, ox_start_element_id);
|
361
|
-
dr->has_end_element = respond_to(handler, ox_end_element_id);
|
362
|
-
dr->has_error = respond_to(handler, ox_error_id);
|
363
|
-
dr->has_line = (Qtrue == rb_ivar_defined(handler, ox_at_line_id));
|
364
|
-
dr->has_column = (Qtrue == rb_ivar_defined(handler, ox_at_column_id));
|
133
|
+
dr->options = *options;
|
134
|
+
dr->hints = 0;
|
135
|
+
dr->err = 0;
|
136
|
+
has_init(&dr->has, handler);
|
365
137
|
#if HAS_ENCODING_SUPPORT
|
366
138
|
if ('\0' == *ox_default_options.encoding) {
|
367
139
|
VALUE encoding;
|
368
140
|
|
141
|
+
dr->encoding = 0;
|
369
142
|
if (rb_respond_to(io, ox_external_encoding_id) && Qnil != (encoding = rb_funcall(io, ox_external_encoding_id, 0))) {
|
370
|
-
|
371
|
-
|
372
|
-
|
143
|
+
int e = rb_enc_get_index(encoding);
|
144
|
+
if (0 <= e) {
|
145
|
+
dr->encoding = rb_enc_from_index(e);
|
146
|
+
}
|
373
147
|
}
|
374
148
|
} else {
|
375
149
|
dr->encoding = rb_enc_find(ox_default_options.encoding);
|
@@ -389,190 +163,178 @@ sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, int convert, int tolerant)
|
|
389
163
|
#endif
|
390
164
|
}
|
391
165
|
|
392
|
-
|
393
|
-
|
166
|
+
void
|
167
|
+
ox_sax_drive_cleanup(SaxDrive dr) {
|
394
168
|
rb_gc_unregister_address(&dr->value_obj);
|
395
|
-
|
396
|
-
|
397
|
-
}
|
398
|
-
}
|
399
|
-
|
400
|
-
static int
|
401
|
-
sax_drive_read(SaxDrive dr) {
|
402
|
-
int err;
|
403
|
-
size_t shift = 0;
|
404
|
-
|
405
|
-
if (dr->buf < dr->cur) {
|
406
|
-
if (0 == dr->str) {
|
407
|
-
shift = dr->cur - dr->buf;
|
408
|
-
} else {
|
409
|
-
shift = dr->str - dr->buf;
|
410
|
-
}
|
411
|
-
/*printf("\n*** shift: %lu\n", shift); */
|
412
|
-
if (0 == shift) { /* no space left so allocate more */
|
413
|
-
char *old = dr->buf;
|
414
|
-
size_t size = dr->buf_end - dr->buf;
|
415
|
-
|
416
|
-
if (dr->buf == dr->base_buf) {
|
417
|
-
dr->buf = ALLOC_N(char, size * 2);
|
418
|
-
memcpy(dr->buf, old, size);
|
419
|
-
} else {
|
420
|
-
REALLOC_N(dr->buf, char, size * 2);
|
421
|
-
}
|
422
|
-
dr->buf_end = dr->buf + size * 2;
|
423
|
-
dr->cur = dr->buf + (dr->cur - old);
|
424
|
-
dr->read_end = dr->buf + (dr->read_end - old);
|
425
|
-
if (0 != dr->str) {
|
426
|
-
dr->str = dr->buf + (dr->str - old);
|
427
|
-
}
|
428
|
-
} else {
|
429
|
-
memmove(dr->buf, dr->buf + shift, dr->read_end - (dr->buf + shift));
|
430
|
-
dr->cur -= shift;
|
431
|
-
dr->read_end -= shift;
|
432
|
-
if (0 != dr->str) {
|
433
|
-
dr->str -= shift;
|
434
|
-
}
|
435
|
-
}
|
436
|
-
}
|
437
|
-
err = dr->read_func(dr);
|
438
|
-
*dr->read_end = '\0';
|
439
|
-
|
440
|
-
return err;
|
169
|
+
buf_cleanup(&dr->buf);
|
170
|
+
stack_cleanup(&dr->stack);
|
441
171
|
}
|
442
172
|
|
443
173
|
static void
|
444
|
-
|
445
|
-
if (dr->
|
174
|
+
ox_sax_drive_error_at(SaxDrive dr, const char *msg, int line, int col) {
|
175
|
+
if (dr->has.error) {
|
446
176
|
VALUE args[3];
|
447
177
|
|
448
178
|
args[0] = rb_str_new2(msg);
|
449
|
-
args[1] = INT2FIX(
|
450
|
-
args[2] = INT2FIX(
|
451
|
-
if (dr->
|
179
|
+
args[1] = INT2FIX(line);
|
180
|
+
args[2] = INT2FIX(col);
|
181
|
+
if (dr->has.line) {
|
452
182
|
rb_ivar_set(dr->handler, ox_at_line_id, args[1]);
|
453
183
|
}
|
454
|
-
if (dr->
|
184
|
+
if (dr->has.column) {
|
455
185
|
rb_ivar_set(dr->handler, ox_at_column_id, args[2]);
|
456
186
|
}
|
457
187
|
rb_funcall2(dr->handler, ox_error_id, 3, args);
|
458
|
-
} else if (critical) {
|
459
|
-
sax_drive_cleanup(dr);
|
460
|
-
rb_raise(ox_parse_error_class, "%s at line %d, column %d\n", msg, dr->line, dr->col);
|
461
188
|
}
|
462
189
|
}
|
463
190
|
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
if (first) {
|
476
|
-
if (0xEF == (uint8_t)c) { /* only UTF8 is supported */
|
477
|
-
if (0xBB == (uint8_t)sax_drive_get(dr) && 0xBF == (uint8_t)sax_drive_get(dr)) {
|
191
|
+
void
|
192
|
+
ox_sax_drive_error(SaxDrive dr, const char *msg) {
|
193
|
+
ox_sax_drive_error_at(dr, msg, dr->buf.line, dr->buf.col);
|
194
|
+
}
|
195
|
+
|
196
|
+
static char
|
197
|
+
skipBOM(SaxDrive dr) {
|
198
|
+
char c = buf_get(&dr->buf);
|
199
|
+
|
200
|
+
if (0xEF == (uint8_t)c) { /* only UTF8 is supported */
|
201
|
+
if (0xBB == (uint8_t)buf_get(&dr->buf) && 0xBF == (uint8_t)buf_get(&dr->buf)) {
|
478
202
|
#if HAS_ENCODING_SUPPORT
|
479
|
-
|
203
|
+
dr->encoding = ox_utf8_encoding;
|
480
204
|
#elif HAS_PRIVATE_ENCODING
|
481
|
-
|
205
|
+
dr->encoding = ox_utf8_encoding;
|
482
206
|
#endif
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
}
|
207
|
+
c = buf_get(&dr->buf);
|
208
|
+
} else {
|
209
|
+
ox_sax_drive_error(dr, BAD_BOM "invalid BOM or a binary file.");
|
210
|
+
c = '\0';
|
488
211
|
}
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
|
212
|
+
}
|
213
|
+
return c;
|
214
|
+
}
|
215
|
+
|
216
|
+
static void
|
217
|
+
parse(SaxDrive dr) {
|
218
|
+
char c = skipBOM(dr);
|
219
|
+
int state = START_STATE;
|
220
|
+
|
221
|
+
while ('\0' != c) {
|
222
|
+
buf_protect(&dr->buf);
|
223
|
+
if (is_white(c) && '\0' == (c = buf_next_non_white(&dr->buf))) {
|
224
|
+
break;
|
495
225
|
}
|
496
|
-
if ('<'
|
497
|
-
|
498
|
-
|
499
|
-
|
500
|
-
|
501
|
-
|
502
|
-
|
503
|
-
|
504
|
-
|
505
|
-
|
506
|
-
|
507
|
-
|
508
|
-
|
509
|
-
|
510
|
-
|
511
|
-
|
512
|
-
|
513
|
-
|
514
|
-
|
515
|
-
|
516
|
-
err = 1;
|
517
|
-
} else if ('-' == c) {
|
518
|
-
c = sax_drive_get(dr); /* skip first - and get next character */
|
519
|
-
if ('-' != c) {
|
520
|
-
sax_drive_error(dr, "invalid format, bad comment format", 1);
|
521
|
-
err = 1;
|
226
|
+
if ('<' == c) {
|
227
|
+
c = buf_get(&dr->buf);
|
228
|
+
switch (c) {
|
229
|
+
case '?': /* instructions (xml or otherwise) */
|
230
|
+
c = read_instruction(dr);
|
231
|
+
break;
|
232
|
+
case '!': /* comment or doctype */
|
233
|
+
buf_protect(&dr->buf);
|
234
|
+
c = buf_get(&dr->buf);
|
235
|
+
if ('\0' == c) {
|
236
|
+
ox_sax_drive_error(dr, NO_TERM "DOCTYPE or comment not terminated");
|
237
|
+
goto DONE;
|
238
|
+
} else if ('-' == c) {
|
239
|
+
c = buf_get(&dr->buf); /* skip first - and get next character */
|
240
|
+
if ('-' != c) {
|
241
|
+
ox_sax_drive_error(dr, INVALID_FORMAT "bad comment format, expected <!--");
|
242
|
+
} else {
|
243
|
+
c = buf_get(&dr->buf); /* skip second - */
|
244
|
+
}
|
245
|
+
c = read_comment(dr);
|
522
246
|
} else {
|
523
|
-
|
524
|
-
|
247
|
+
int i;
|
248
|
+
int spaced = 0;
|
249
|
+
int line = dr->buf.line;
|
250
|
+
int col = dr->buf.col;
|
251
|
+
|
252
|
+
if (is_white(c)) {
|
253
|
+
spaced = 1;
|
254
|
+
c = buf_next_non_white(&dr->buf);
|
255
|
+
}
|
256
|
+
dr->buf.str = dr->buf.tail - 1;
|
257
|
+
for (i = 7; 0 < i; i--) {
|
258
|
+
c = buf_get(&dr->buf);
|
259
|
+
}
|
260
|
+
if (0 == strncmp("DOCTYPE", dr->buf.str, 7)) {
|
261
|
+
if (spaced) {
|
262
|
+
ox_sax_drive_error_at(dr, WRONG_CHAR "<!DOCTYPE can not included spaces", line, col);
|
263
|
+
}
|
264
|
+
if (START_STATE != state) {
|
265
|
+
ox_sax_drive_error(dr, OUT_OF_ORDER "DOCTYPE can not come after an element");
|
266
|
+
}
|
267
|
+
c = read_doctype(dr);
|
268
|
+
} else if (0 == strncasecmp("DOCTYPE", dr->buf.str, 7)) {
|
269
|
+
ox_sax_drive_error(dr, CASE_ERROR "expected DOCTYPE all in caps");
|
270
|
+
if (START_STATE != state) {
|
271
|
+
ox_sax_drive_error(dr, OUT_OF_ORDER "DOCTYPE can not come after an element");
|
272
|
+
}
|
273
|
+
c = read_doctype(dr);
|
274
|
+
} else if (0 == strncmp("[CDATA[", dr->buf.str, 7)) {
|
275
|
+
if (spaced) {
|
276
|
+
ox_sax_drive_error_at(dr, WRONG_CHAR "<![CDATA[ can not included spaces", line, col);
|
277
|
+
}
|
278
|
+
c = read_cdata(dr);
|
279
|
+
} else if (0 == strncasecmp("[CDATA[", dr->buf.str, 7)) {
|
280
|
+
ox_sax_drive_error(dr, CASE_ERROR "expected CDATA all in caps");
|
281
|
+
c = read_cdata(dr);
|
282
|
+
} else {
|
283
|
+
ox_sax_drive_error_at(dr, WRONG_CHAR "DOCTYPE, CDATA, or comment expected", line, col);
|
284
|
+
c = read_name_token(dr);
|
285
|
+
if ('>' == c) {
|
286
|
+
c = buf_get(&dr->buf);
|
287
|
+
}
|
288
|
+
}
|
525
289
|
}
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
531
|
-
|
532
|
-
|
533
|
-
|
534
|
-
|
535
|
-
|
536
|
-
|
537
|
-
|
538
|
-
|
539
|
-
|
540
|
-
|
541
|
-
|
542
|
-
|
543
|
-
|
544
|
-
|
545
|
-
|
546
|
-
line = dr->line;
|
547
|
-
col = dr->col;
|
548
|
-
err = ('\0' == read_name_token(dr));
|
549
|
-
dr->line = line;
|
550
|
-
dr->col = col;
|
551
|
-
if (first && dr->tolerant) {
|
552
|
-
sax_drive_error(dr, "invalid format, unmatched element end", 0);
|
553
|
-
} else {
|
554
|
-
return err;
|
290
|
+
break;
|
291
|
+
case '/': /* element end */
|
292
|
+
c = read_element_end(dr);
|
293
|
+
if (0 == stack_peek(&dr->stack)) {
|
294
|
+
state = AFTER_STATE;
|
295
|
+
}
|
296
|
+
break;
|
297
|
+
case '\0':
|
298
|
+
goto DONE;
|
299
|
+
default:
|
300
|
+
buf_backup(&dr->buf);
|
301
|
+
if (AFTER_STATE == state) {
|
302
|
+
ox_sax_drive_error(dr, OUT_OF_ORDER "multiple top level elements");
|
303
|
+
}
|
304
|
+
state = BODY_STATE;
|
305
|
+
c = read_element_start(dr);
|
306
|
+
if (0 == stack_peek(&dr->stack)) {
|
307
|
+
state = AFTER_STATE;
|
308
|
+
}
|
309
|
+
break;
|
555
310
|
}
|
556
|
-
|
557
|
-
|
558
|
-
|
559
|
-
err = 1;
|
560
|
-
break;
|
561
|
-
default:
|
562
|
-
backup(dr); /* safe since no read occurred after getting last character */
|
563
|
-
if (first && element_read && !dr->tolerant) {
|
564
|
-
sax_drive_error(dr, "invalid format, multiple top level elements", 0);
|
565
|
-
}
|
566
|
-
err = read_element(dr);
|
567
|
-
if (NAME_MISMATCH == err && dr->tolerant && first) {
|
568
|
-
// must have been a end element with no matching start
|
569
|
-
err = 0;
|
570
|
-
}
|
571
|
-
element_read = 1;
|
572
|
-
break;
|
311
|
+
} else {
|
312
|
+
buf_reset(&dr->buf);
|
313
|
+
c = read_text(dr);
|
573
314
|
}
|
574
315
|
}
|
575
|
-
|
316
|
+
DONE:
|
317
|
+
if (dr->stack.head < dr->stack.tail) {
|
318
|
+
char msg[256];
|
319
|
+
Nv sp;
|
320
|
+
|
321
|
+
if (dr->has.line) {
|
322
|
+
rb_ivar_set(dr->handler, ox_at_line_id, INT2FIX(dr->buf.line));
|
323
|
+
}
|
324
|
+
if (dr->has.column) {
|
325
|
+
rb_ivar_set(dr->handler, ox_at_column_id, INT2FIX(dr->buf.col));
|
326
|
+
}
|
327
|
+
for (sp = dr->stack.tail - 1; dr->stack.head <= sp; sp--) {
|
328
|
+
snprintf(msg, sizeof(msg) - 1, "%selement '%s' not closed", EL_MISMATCH, sp->name);
|
329
|
+
ox_sax_drive_error_at(dr, msg, dr->buf.line, dr->buf.col);
|
330
|
+
if (dr->has.end_element) {
|
331
|
+
VALUE args[1];
|
332
|
+
|
333
|
+
args[0] = sp->val;
|
334
|
+
rb_funcall2(dr->handler, ox_end_element_id, 1, args);
|
335
|
+
}
|
336
|
+
}
|
337
|
+
}
|
576
338
|
}
|
577
339
|
|
578
340
|
static void
|
@@ -580,13 +342,14 @@ read_content(SaxDrive dr, char *content, size_t len) {
|
|
580
342
|
char c;
|
581
343
|
char *end = content + len;
|
582
344
|
|
583
|
-
while ('\0' != (c =
|
345
|
+
while ('\0' != (c = buf_get(&dr->buf))) {
|
584
346
|
if (end < content) {
|
585
|
-
|
347
|
+
ox_sax_drive_error(dr, "processing instruction content too large");
|
348
|
+
return;
|
586
349
|
}
|
587
350
|
if ('?' == c) {
|
588
|
-
if ('\0' == (c =
|
589
|
-
|
351
|
+
if ('\0' == (c = buf_get(&dr->buf))) {
|
352
|
+
ox_sax_drive_error(dr, NO_TERM "document not terminated");
|
590
353
|
}
|
591
354
|
if ('>' == c) {
|
592
355
|
*content = '\0';
|
@@ -603,50 +366,50 @@ read_content(SaxDrive dr, char *content, size_t len) {
|
|
603
366
|
|
604
367
|
/* Entered after the "<?" sequence. Ready to read the rest.
|
605
368
|
*/
|
606
|
-
static
|
369
|
+
static char
|
607
370
|
read_instruction(SaxDrive dr) {
|
608
371
|
char content[1024];
|
609
372
|
char c;
|
610
373
|
char *cend;
|
611
|
-
const char *err;
|
612
374
|
VALUE target = Qnil;
|
613
375
|
int is_xml;
|
614
|
-
int line = dr->line;
|
615
|
-
int col = dr->col - 1;
|
376
|
+
int line = dr->buf.line;
|
377
|
+
int col = dr->buf.col - 1;
|
616
378
|
|
379
|
+
buf_protect(&dr->buf);
|
617
380
|
if ('\0' == (c = read_name_token(dr))) {
|
618
|
-
return
|
381
|
+
return c;
|
619
382
|
}
|
620
|
-
is_xml = (0 == strcmp("xml", dr->str));
|
621
|
-
if (dr->
|
622
|
-
target = rb_str_new2(dr->str);
|
383
|
+
is_xml = (0 == strcmp("xml", dr->buf.str));
|
384
|
+
if (dr->has.instruct || dr->has.end_instruct) {
|
385
|
+
target = rb_str_new2(dr->buf.str);
|
623
386
|
}
|
624
|
-
if (dr->
|
387
|
+
if (dr->has.instruct) {
|
625
388
|
VALUE args[1];
|
626
389
|
|
627
|
-
if (dr->
|
390
|
+
if (dr->has.line) {
|
628
391
|
rb_ivar_set(dr->handler, ox_at_line_id, INT2FIX(line));
|
629
392
|
}
|
630
|
-
if (dr->
|
393
|
+
if (dr->has.column) {
|
631
394
|
rb_ivar_set(dr->handler, ox_at_column_id, INT2FIX(col));
|
632
395
|
}
|
633
396
|
args[0] = target;
|
634
397
|
rb_funcall2(dr->handler, ox_instruct_id, 1, args);
|
635
398
|
}
|
636
|
-
dr->
|
637
|
-
line = dr->line;
|
638
|
-
col = dr->col;
|
399
|
+
buf_protect(&dr->buf);
|
400
|
+
line = dr->buf.line;
|
401
|
+
col = dr->buf.col;
|
639
402
|
read_content(dr, content, sizeof(content) - 1);
|
640
|
-
cend = dr->
|
641
|
-
|
642
|
-
|
643
|
-
|
403
|
+
cend = dr->buf.tail;
|
404
|
+
buf_reset(&dr->buf);
|
405
|
+
dr->err = 0;
|
406
|
+
c = read_attrs(dr, c, '?', '?', is_xml, 1);
|
407
|
+
if (dr->err) {
|
408
|
+
if (dr->has.text) {
|
644
409
|
VALUE args[1];
|
645
410
|
|
646
|
-
if (dr->convert_special) {
|
647
|
-
|
648
|
-
sax_drive_error(dr, "invalid format, special character does not end with a semicolon", 0);
|
649
|
-
}
|
411
|
+
if (dr->options.convert_special) {
|
412
|
+
ox_sax_collapse_special(dr, content, line, col);
|
650
413
|
}
|
651
414
|
args[0] = rb_str_new2(content);
|
652
415
|
#if HAS_ENCODING_SUPPORT
|
@@ -658,106 +421,144 @@ read_instruction(SaxDrive dr) {
|
|
658
421
|
rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
|
659
422
|
}
|
660
423
|
#endif
|
661
|
-
if (dr->
|
424
|
+
if (dr->has.line) {
|
662
425
|
rb_ivar_set(dr->handler, ox_at_line_id, INT2FIX(line));
|
663
426
|
}
|
664
|
-
if (dr->
|
427
|
+
if (dr->has.column) {
|
665
428
|
rb_ivar_set(dr->handler, ox_at_column_id, INT2FIX(col));
|
666
429
|
}
|
667
430
|
rb_funcall2(dr->handler, ox_text_id, 1, args);
|
668
431
|
}
|
669
|
-
dr->
|
432
|
+
dr->buf.tail = cend;
|
433
|
+
c = buf_get(&dr->buf);
|
670
434
|
} else {
|
671
|
-
line = dr->line;
|
672
|
-
col = dr->col;
|
673
|
-
c =
|
674
|
-
if ('>'
|
675
|
-
|
676
|
-
|
435
|
+
line = dr->buf.line;
|
436
|
+
col = dr->buf.col;
|
437
|
+
c = buf_next_non_white(&dr->buf);
|
438
|
+
if ('>' == c) {
|
439
|
+
c = buf_get(&dr->buf);
|
440
|
+
} else {
|
441
|
+
ox_sax_drive_error_at(dr, NO_TERM "instruction not terminated", line, col);
|
442
|
+
if ('>' == c) {
|
443
|
+
c = buf_get(&dr->buf);
|
444
|
+
}
|
677
445
|
}
|
678
446
|
}
|
679
|
-
if (dr->
|
447
|
+
if (dr->has.end_instruct) {
|
680
448
|
VALUE args[1];
|
681
449
|
|
682
|
-
if (dr->
|
450
|
+
if (dr->has.line) {
|
683
451
|
rb_ivar_set(dr->handler, ox_at_line_id, INT2FIX(line));
|
684
452
|
}
|
685
|
-
if (dr->
|
453
|
+
if (dr->has.column) {
|
686
454
|
rb_ivar_set(dr->handler, ox_at_column_id, INT2FIX(col));
|
687
455
|
}
|
688
456
|
args[0] = target;
|
689
457
|
rb_funcall2(dr->handler, ox_end_instruct_id, 1, args);
|
690
458
|
}
|
691
|
-
dr->str = 0;
|
459
|
+
dr->buf.str = 0;
|
692
460
|
|
693
|
-
return
|
461
|
+
return c;
|
694
462
|
}
|
695
463
|
|
696
464
|
/* Entered after the "<!DOCTYPE" sequence. Ready to read the rest.
|
697
465
|
*/
|
698
|
-
static
|
466
|
+
static char
|
699
467
|
read_doctype(SaxDrive dr) {
|
700
468
|
char c;
|
701
|
-
int line = dr->line;
|
702
|
-
int col = dr->col - 10;
|
469
|
+
int line = dr->buf.line;
|
470
|
+
int col = dr->buf.col - 10;
|
471
|
+
char *s;
|
703
472
|
|
704
|
-
dr->
|
705
|
-
|
473
|
+
buf_backup(&dr->buf); /* back up to the start in case the cdata is empty */
|
474
|
+
buf_protect(&dr->buf);
|
475
|
+
while ('>' != (c = buf_get(&dr->buf))) {
|
706
476
|
if ('\0' == c) {
|
707
|
-
|
708
|
-
return
|
477
|
+
ox_sax_drive_error(dr, NO_TERM "doctype not terminated");
|
478
|
+
return c;
|
709
479
|
}
|
710
480
|
}
|
711
|
-
|
712
|
-
|
481
|
+
if (dr->options.smart && 0 == dr->hints) {
|
482
|
+
for (s = dr->buf.str; is_white(*s); s++) { }
|
483
|
+
if (0 == strncasecmp("HTML", s, 4)) {
|
484
|
+
dr->hints = ox_hints_html();
|
485
|
+
}
|
486
|
+
}
|
487
|
+
*(dr->buf.tail - 1) = '\0';
|
488
|
+
if (dr->has.doctype) {
|
713
489
|
VALUE args[1];
|
714
490
|
|
715
|
-
if (dr->
|
491
|
+
if (dr->has.line) {
|
716
492
|
rb_ivar_set(dr->handler, ox_at_line_id, INT2FIX(line));
|
717
493
|
}
|
718
|
-
if (dr->
|
494
|
+
if (dr->has.column) {
|
719
495
|
rb_ivar_set(dr->handler, ox_at_column_id, INT2FIX(col));
|
720
496
|
}
|
721
|
-
args[0] = rb_str_new2(dr->str);
|
497
|
+
args[0] = rb_str_new2(dr->buf.str);
|
722
498
|
rb_funcall2(dr->handler, ox_doctype_id, 1, args);
|
723
499
|
}
|
724
|
-
dr->str = 0;
|
500
|
+
dr->buf.str = 0;
|
725
501
|
|
726
|
-
return
|
502
|
+
return buf_get(&dr->buf);
|
727
503
|
}
|
728
504
|
|
729
505
|
/* Entered after the "<![CDATA[" sequence. Ready to read the rest.
|
730
506
|
*/
|
731
|
-
static
|
507
|
+
static char
|
732
508
|
read_cdata(SaxDrive dr) {
|
733
|
-
char
|
734
|
-
int
|
735
|
-
int
|
736
|
-
int
|
737
|
-
|
738
|
-
|
739
|
-
dr->
|
509
|
+
char c;
|
510
|
+
int end = 0;
|
511
|
+
int line = dr->buf.line;
|
512
|
+
int col = dr->buf.col - 10;
|
513
|
+
struct _CheckPt cp = CHECK_PT_INIT;
|
514
|
+
|
515
|
+
buf_backup(&dr->buf); /* back up to the start in case the cdata is empty */
|
516
|
+
buf_protect(&dr->buf);
|
740
517
|
while (1) {
|
741
|
-
c =
|
742
|
-
|
518
|
+
c = buf_get(&dr->buf);
|
519
|
+
switch (c) {
|
520
|
+
case ']':
|
743
521
|
end++;
|
744
|
-
|
522
|
+
break;
|
523
|
+
case '>':
|
745
524
|
if (2 <= end) {
|
746
|
-
*(dr->
|
747
|
-
|
525
|
+
*(dr->buf.tail - 3) = '\0';
|
526
|
+
c = buf_get(&dr->buf);
|
527
|
+
goto CB;
|
748
528
|
}
|
529
|
+
if (!buf_checkset(&cp)) {
|
530
|
+
buf_checkpoint(&dr->buf, &cp);
|
531
|
+
}
|
749
532
|
end = 0;
|
750
|
-
|
751
|
-
|
752
|
-
|
753
|
-
|
754
|
-
|
755
|
-
|
533
|
+
break;
|
534
|
+
case '<':
|
535
|
+
if (!buf_checkset(&cp)) {
|
536
|
+
buf_checkpoint(&dr->buf, &cp);
|
537
|
+
}
|
538
|
+
end = 0;
|
539
|
+
break;
|
540
|
+
case '\0':
|
541
|
+
if (buf_checkset(&cp)) {
|
542
|
+
c = buf_checkback(&dr->buf, &cp);
|
543
|
+
ox_sax_drive_error(dr, NO_TERM "CDATA not terminated");
|
544
|
+
*(dr->buf.tail - 1) = '\0';
|
545
|
+
goto CB;
|
546
|
+
}
|
547
|
+
ox_sax_drive_error(dr, NO_TERM "CDATA not terminated");
|
548
|
+
return '\0';
|
549
|
+
default:
|
550
|
+
if (1 < end && !buf_checkset(&cp)) {
|
551
|
+
buf_checkpoint(&dr->buf, &cp);
|
552
|
+
}
|
553
|
+
end = 0;
|
554
|
+
break;
|
555
|
+
}
|
756
556
|
}
|
757
|
-
|
557
|
+
CB:
|
558
|
+
if (dr->has.cdata) {
|
758
559
|
VALUE args[1];
|
759
560
|
|
760
|
-
args[0] = rb_str_new2(dr->str);
|
561
|
+
args[0] = rb_str_new2(dr->buf.str);
|
761
562
|
#if HAS_ENCODING_SUPPORT
|
762
563
|
if (0 != dr->encoding) {
|
763
564
|
rb_enc_associate(args[0], dr->encoding);
|
@@ -767,53 +568,76 @@ read_cdata(SaxDrive dr) {
|
|
767
568
|
rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
|
768
569
|
}
|
769
570
|
#endif
|
770
|
-
if (dr->
|
571
|
+
if (dr->has.line) {
|
771
572
|
rb_ivar_set(dr->handler, ox_at_line_id, INT2FIX(line));
|
772
573
|
}
|
773
|
-
if (dr->
|
574
|
+
if (dr->has.column) {
|
774
575
|
rb_ivar_set(dr->handler, ox_at_column_id, INT2FIX(col));
|
775
576
|
}
|
776
577
|
rb_funcall2(dr->handler, ox_cdata_id, 1, args);
|
777
578
|
}
|
778
|
-
dr->str = 0;
|
579
|
+
dr->buf.str = 0;
|
779
580
|
|
780
|
-
return
|
581
|
+
return c;
|
781
582
|
}
|
782
583
|
|
783
584
|
/* Entered after the "<!--" sequence. Ready to read the rest.
|
784
585
|
*/
|
785
|
-
static
|
586
|
+
static char
|
786
587
|
read_comment(SaxDrive dr) {
|
787
|
-
char
|
788
|
-
int
|
789
|
-
int
|
790
|
-
int
|
791
|
-
|
792
|
-
|
588
|
+
char c;
|
589
|
+
int end = 0;
|
590
|
+
int line = dr->buf.line;
|
591
|
+
int col = dr->buf.col - 4;
|
592
|
+
struct _CheckPt cp = CHECK_PT_INIT;
|
593
|
+
|
594
|
+
buf_backup(&dr->buf); /* back up to the start in case the cdata is empty */
|
595
|
+
buf_protect(&dr->buf);
|
793
596
|
while (1) {
|
794
|
-
c =
|
795
|
-
|
796
|
-
|
797
|
-
|
798
|
-
|
799
|
-
|
800
|
-
|
597
|
+
c = buf_get(&dr->buf);
|
598
|
+
switch (c) {
|
599
|
+
case '-':
|
600
|
+
end++;
|
601
|
+
break;
|
602
|
+
case '>':
|
603
|
+
if (2 <= end) {
|
604
|
+
*(dr->buf.tail - 3) = '\0';
|
605
|
+
c = buf_get(&dr->buf);
|
606
|
+
goto CB;
|
801
607
|
}
|
802
|
-
|
803
|
-
|
804
|
-
|
805
|
-
} else {
|
608
|
+
if (!buf_checkset(&cp)) {
|
609
|
+
buf_checkpoint(&dr->buf, &cp);
|
610
|
+
}
|
806
611
|
end = 0;
|
807
|
-
|
808
|
-
|
809
|
-
|
810
|
-
|
811
|
-
|
612
|
+
break;
|
613
|
+
case '<':
|
614
|
+
if (!buf_checkset(&cp)) {
|
615
|
+
buf_checkpoint(&dr->buf, &cp);
|
616
|
+
}
|
617
|
+
end = 0;
|
618
|
+
break;
|
619
|
+
case '\0':
|
620
|
+
if (buf_checkset(&cp)) {
|
621
|
+
c = buf_checkback(&dr->buf, &cp);
|
622
|
+
ox_sax_drive_error(dr, NO_TERM "comment not terminated");
|
623
|
+
*(dr->buf.tail - 1) = '\0';
|
624
|
+
goto CB;
|
625
|
+
}
|
626
|
+
ox_sax_drive_error(dr, NO_TERM "comment not terminated");
|
627
|
+
return '\0';
|
628
|
+
default:
|
629
|
+
if (1 < end && !buf_checkset(&cp)) {
|
630
|
+
buf_checkpoint(&dr->buf, &cp);
|
631
|
+
}
|
632
|
+
end = 0;
|
633
|
+
break;
|
634
|
+
}
|
812
635
|
}
|
813
|
-
|
636
|
+
CB:
|
637
|
+
if (dr->has.comment) {
|
814
638
|
VALUE args[1];
|
815
639
|
|
816
|
-
args[0] = rb_str_new2(dr->str);
|
640
|
+
args[0] = rb_str_new2(dr->buf.str);
|
817
641
|
#if HAS_ENCODING_SUPPORT
|
818
642
|
if (0 != dr->encoding) {
|
819
643
|
rb_enc_associate(args[0], dr->encoding);
|
@@ -823,44 +647,91 @@ read_comment(SaxDrive dr) {
|
|
823
647
|
rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
|
824
648
|
}
|
825
649
|
#endif
|
826
|
-
if (dr->
|
650
|
+
if (dr->has.line) {
|
827
651
|
rb_ivar_set(dr->handler, ox_at_line_id, INT2FIX(line));
|
828
652
|
}
|
829
|
-
if (dr->
|
653
|
+
if (dr->has.column) {
|
830
654
|
rb_ivar_set(dr->handler, ox_at_column_id, INT2FIX(col));
|
831
655
|
}
|
832
656
|
rb_funcall2(dr->handler, ox_comment_id, 1, args);
|
833
657
|
}
|
834
|
-
dr->str = 0;
|
658
|
+
dr->buf.str = 0;
|
835
659
|
|
836
|
-
return
|
660
|
+
return c;
|
837
661
|
}
|
838
662
|
|
839
663
|
/* Entered after the '<' and the first character after that. Returns status
|
840
664
|
* code.
|
841
665
|
*/
|
842
|
-
static
|
843
|
-
|
666
|
+
static char
|
667
|
+
read_element_start(SaxDrive dr) {
|
844
668
|
char *ename = 0;
|
845
669
|
VALUE name = Qnil;
|
846
|
-
const char *err;
|
847
670
|
char c;
|
848
671
|
int closed;
|
849
|
-
int line = dr->line;
|
850
|
-
int col = dr->col - 1;
|
851
|
-
|
672
|
+
int line = dr->buf.line;
|
673
|
+
int col = dr->buf.col - 1;
|
674
|
+
Hint h = 0;
|
675
|
+
int stackless = 0;
|
852
676
|
|
853
677
|
if ('\0' == (c = read_name_token(dr))) {
|
854
|
-
return
|
678
|
+
return '\0';
|
679
|
+
}
|
680
|
+
if (dr->options.smart && 0 == dr->hints && stack_empty(&dr->stack) && 0 == strcasecmp("html", dr->buf.str)) {
|
681
|
+
dr->hints = ox_hints_html();
|
682
|
+
}
|
683
|
+
if (0 != dr->hints) {
|
684
|
+
hint_clear_empty(dr);
|
685
|
+
h = ox_hint_find(dr->hints, dr->buf.str);
|
686
|
+
if (0 == h) {
|
687
|
+
char msg[100];
|
688
|
+
|
689
|
+
sprintf(msg, "%s%s is not a valid element type for a %s document type.", INV_ELEMENT, dr->buf.str, dr->hints->name);
|
690
|
+
ox_sax_drive_error(dr, msg);
|
691
|
+
} else {
|
692
|
+
Nv top_nv = stack_peek(&dr->stack);
|
693
|
+
|
694
|
+
if (h->empty) {
|
695
|
+
stackless = 1;
|
696
|
+
}
|
697
|
+
if (0 != top_nv) {
|
698
|
+
char msg[256];
|
699
|
+
|
700
|
+
if (!h->nest && 0 == strcasecmp(top_nv->name, h->name)) {
|
701
|
+
snprintf(msg, sizeof(msg) - 1, "%s%s can not be nested in a %s document, closing previous.",
|
702
|
+
INV_ELEMENT, dr->buf.str, dr->hints->name);
|
703
|
+
ox_sax_drive_error(dr, msg);
|
704
|
+
stack_pop(&dr->stack);
|
705
|
+
end_element_cb(dr, top_nv->val, line, col);
|
706
|
+
top_nv = stack_peek(&dr->stack);
|
707
|
+
}
|
708
|
+
if (0 != h->parents) {
|
709
|
+
const char **p;
|
710
|
+
int ok = 0;
|
711
|
+
|
712
|
+
for (p = h->parents; 0 != *p; p++) {
|
713
|
+
if (0 == strcasecmp(*p, top_nv->name)) {
|
714
|
+
ok = 1;
|
715
|
+
break;
|
716
|
+
}
|
717
|
+
}
|
718
|
+
if (!ok) {
|
719
|
+
snprintf(msg, sizeof(msg) - 1, "%s%s can not be a child of a %s in a %s document.",
|
720
|
+
INV_ELEMENT, h->name, top_nv->name, dr->hints->name);
|
721
|
+
ox_sax_drive_error(dr, msg);
|
722
|
+
}
|
723
|
+
}
|
724
|
+
}
|
725
|
+
}
|
855
726
|
}
|
856
|
-
name = str2sym(dr
|
857
|
-
if (dr->
|
727
|
+
name = str2sym(dr, dr->buf.str, &ename);
|
728
|
+
if (dr->has.start_element) {
|
858
729
|
VALUE args[1];
|
859
730
|
|
860
|
-
if (dr->
|
731
|
+
if (dr->has.line) {
|
861
732
|
rb_ivar_set(dr->handler, ox_at_line_id, INT2FIX(line));
|
862
733
|
}
|
863
|
-
if (dr->
|
734
|
+
if (dr->has.column) {
|
864
735
|
rb_ivar_set(dr->handler, ox_at_column_id, INT2FIX(col));
|
865
736
|
}
|
866
737
|
args[0] = name;
|
@@ -871,142 +742,158 @@ read_element(SaxDrive dr) {
|
|
871
742
|
} else if ('>' == c) {
|
872
743
|
closed = 0;
|
873
744
|
} else {
|
874
|
-
|
875
|
-
|
876
|
-
|
877
|
-
|
878
|
-
|
745
|
+
buf_protect(&dr->buf);
|
746
|
+
c = read_attrs(dr, c, '/', '>', 0, 0);
|
747
|
+
if (is_white(c)) {
|
748
|
+
c = buf_next_non_white(&dr->buf);
|
749
|
+
}
|
750
|
+
closed = ('/' == c);
|
879
751
|
}
|
880
752
|
if (closed) {
|
881
|
-
|
882
|
-
|
883
|
-
|
884
|
-
|
885
|
-
|
753
|
+
c = buf_next_non_white(&dr->buf);
|
754
|
+
line = dr->buf.line;
|
755
|
+
col = dr->buf.col - 1;
|
756
|
+
end_element_cb(dr, name, line, col);
|
757
|
+
} else if (stackless) {
|
758
|
+
end_element_cb(dr, name, line, col);
|
759
|
+
} else {
|
760
|
+
stack_push(&dr->stack, ename, name, h);
|
886
761
|
}
|
887
|
-
if (
|
888
|
-
|
889
|
-
|
890
|
-
|
891
|
-
|
762
|
+
if ('>' != c) {
|
763
|
+
ox_sax_drive_error(dr, WRONG_CHAR "element not closed");
|
764
|
+
return c;
|
765
|
+
}
|
766
|
+
dr->buf.str = 0;
|
892
767
|
|
893
|
-
|
894
|
-
|
895
|
-
|
896
|
-
|
897
|
-
|
898
|
-
|
899
|
-
|
900
|
-
|
901
|
-
|
768
|
+
return buf_get(&dr->buf);
|
769
|
+
}
|
770
|
+
|
771
|
+
static Nv
|
772
|
+
stack_rev_find(NStack stack, const char *name) {
|
773
|
+
Nv nv;
|
774
|
+
|
775
|
+
for (nv = stack->tail - 1; stack->head <= nv; nv--) {
|
776
|
+
if (0 == strcmp(name, nv->name)) {
|
777
|
+
return nv;
|
778
|
+
}
|
779
|
+
}
|
780
|
+
return 0;
|
781
|
+
}
|
782
|
+
|
783
|
+
static char
|
784
|
+
read_element_end(SaxDrive dr) {
|
785
|
+
VALUE name = Qnil;
|
786
|
+
char c;
|
787
|
+
int line = dr->buf.line;
|
788
|
+
int col = dr->buf.col - 2;
|
789
|
+
Nv nv;
|
790
|
+
|
791
|
+
if ('\0' == (c = read_name_token(dr))) {
|
792
|
+
return '\0';
|
793
|
+
}
|
794
|
+
// c should be > and current is one past so read another char
|
795
|
+
c = buf_get(&dr->buf);
|
796
|
+
nv = stack_peek(&dr->stack);
|
797
|
+
if (0 != nv && 0 == strcmp(dr->buf.str, nv->name)) {
|
798
|
+
name = nv->val;
|
799
|
+
stack_pop(&dr->stack);
|
902
800
|
} else {
|
903
|
-
|
904
|
-
|
905
|
-
|
801
|
+
// Mismatched start and end
|
802
|
+
char msg[256];
|
803
|
+
Nv match = stack_rev_find(&dr->stack, dr->buf.str);
|
804
|
+
|
805
|
+
if (0 == match) {
|
806
|
+
// Not found so open and close element.
|
807
|
+
char *ename = 0;
|
808
|
+
Hint h = ox_hint_find(dr->hints, dr->buf.str);
|
809
|
+
|
810
|
+
if (0 != h && h->empty) {
|
811
|
+
// Just close normally
|
812
|
+
name = str2sym(dr, dr->buf.str, &ename);
|
813
|
+
snprintf(msg, sizeof(msg) - 1, "%selement '%s' should not have a separate close element", EL_MISMATCH, dr->buf.str);
|
814
|
+
ox_sax_drive_error_at(dr, msg, line, col);
|
815
|
+
return c;
|
816
|
+
} else {
|
817
|
+
snprintf(msg, sizeof(msg) - 1, "%selement '%s' closed but not opened", EL_MISMATCH, dr->buf.str);
|
818
|
+
ox_sax_drive_error_at(dr, msg, line, col);
|
819
|
+
name = str2sym(dr, dr->buf.str, &ename);
|
820
|
+
if (dr->has.start_element) {
|
906
821
|
VALUE args[1];
|
907
822
|
|
908
|
-
if (dr->
|
823
|
+
if (dr->has.line) {
|
909
824
|
rb_ivar_set(dr->handler, ox_at_line_id, INT2FIX(line));
|
910
825
|
}
|
911
|
-
if (dr->
|
912
|
-
rb_ivar_set(dr->handler, ox_at_column_id, INT2FIX(col
|
826
|
+
if (dr->has.column) {
|
827
|
+
rb_ivar_set(dr->handler, ox_at_column_id, INT2FIX(col));
|
913
828
|
}
|
914
829
|
args[0] = name;
|
915
|
-
rb_funcall2(dr->handler,
|
830
|
+
rb_funcall2(dr->handler, ox_start_element_id, 1, args);
|
916
831
|
}
|
917
|
-
if (0 == strcmp(dr->str, ename)) {
|
918
|
-
return 0;
|
919
|
-
} else {
|
920
|
-
return NAME_MISMATCH;
|
921
|
-
}
|
922
|
-
} else {
|
923
|
-
return -1;
|
924
|
-
}
|
925
|
-
}
|
926
|
-
line = dr->line;
|
927
|
-
col = dr->col;
|
928
|
-
// read_children reads up to the end of the terminating element name
|
929
|
-
dr->col += (uint32_t)(dr->cur - dr->str);
|
930
|
-
if (0 != ename && 0 != strcmp(ename, dr->str)) {
|
931
|
-
if (dr->has_line) {
|
932
|
-
rb_ivar_set(dr->handler, ox_at_line_id, INT2FIX(line));
|
933
|
-
}
|
934
|
-
if (dr->has_column) {
|
935
|
-
rb_ivar_set(dr->handler, ox_at_column_id, INT2FIX(col));
|
936
832
|
}
|
937
|
-
|
938
|
-
//
|
939
|
-
|
940
|
-
if (0 != dr->has_end_element) {
|
941
|
-
VALUE args[1];
|
833
|
+
} else {
|
834
|
+
// Found a match so close all up to the found element in stack.
|
835
|
+
Nv n2;
|
942
836
|
|
943
|
-
|
944
|
-
|
945
|
-
|
946
|
-
|
947
|
-
|
837
|
+
if (0 != (n2 = hint_try_close(dr, dr->buf.str))) {
|
838
|
+
name = n2->val;
|
839
|
+
} else {
|
840
|
+
snprintf(msg, sizeof(msg) - 1, "%selement '%s' close does not match '%s' open", EL_MISMATCH, dr->buf.str, nv->name);
|
841
|
+
ox_sax_drive_error_at(dr, msg, line, col);
|
842
|
+
if (dr->has.line) {
|
843
|
+
rb_ivar_set(dr->handler, ox_at_line_id, INT2FIX(line));
|
844
|
+
}
|
845
|
+
if (dr->has.column) {
|
846
|
+
rb_ivar_set(dr->handler, ox_at_column_id, INT2FIX(col));
|
847
|
+
}
|
848
|
+
for (nv = stack_pop(&dr->stack); match < nv; nv = stack_pop(&dr->stack)) {
|
849
|
+
if (dr->has.end_element) {
|
850
|
+
rb_funcall(dr->handler, ox_end_element_id, 1, nv->val);
|
948
851
|
}
|
949
|
-
args[0] = name;
|
950
|
-
rb_funcall2(dr->handler, ox_end_element_id, 1, args);
|
951
852
|
}
|
952
|
-
|
953
|
-
} else {
|
954
|
-
return -1;
|
853
|
+
name = nv->val;
|
955
854
|
}
|
956
855
|
}
|
957
|
-
if (0 != dr->has_end_element) {
|
958
|
-
VALUE args[1];
|
959
|
-
|
960
|
-
if (dr->has_line) {
|
961
|
-
rb_ivar_set(dr->handler, ox_at_line_id, INT2FIX(line));
|
962
|
-
}
|
963
|
-
if (dr->has_column) {
|
964
|
-
rb_ivar_set(dr->handler, ox_at_column_id, INT2FIX(col - 2));
|
965
|
-
}
|
966
|
-
args[0] = name;
|
967
|
-
rb_funcall2(dr->handler, ox_end_element_id, 1, args);
|
968
|
-
}
|
969
856
|
}
|
970
|
-
dr
|
857
|
+
end_element_cb(dr, name, line, col);
|
971
858
|
|
972
|
-
return
|
859
|
+
return c;
|
973
860
|
}
|
974
861
|
|
975
|
-
static
|
862
|
+
static char
|
976
863
|
read_text(SaxDrive dr) {
|
977
864
|
char c;
|
978
|
-
int line = dr->line;
|
979
|
-
int col = dr->col - 1;
|
865
|
+
int line = dr->buf.line;
|
866
|
+
int col = dr->buf.col - 1;
|
980
867
|
|
981
|
-
|
982
|
-
|
983
|
-
while ('<' != (c =
|
868
|
+
buf_backup(&dr->buf);
|
869
|
+
buf_protect(&dr->buf);
|
870
|
+
while ('<' != (c = buf_get(&dr->buf))) {
|
984
871
|
if ('\0' == c) {
|
985
|
-
|
986
|
-
|
872
|
+
ox_sax_drive_error(dr, NO_TERM "text not terminated");
|
873
|
+
break;
|
987
874
|
}
|
988
875
|
}
|
989
|
-
|
990
|
-
|
876
|
+
if ('\0' != c) {
|
877
|
+
*(dr->buf.tail - 1) = '\0';
|
878
|
+
}
|
879
|
+
if (dr->has.value) {
|
991
880
|
VALUE args[1];
|
992
881
|
|
993
|
-
if (dr->
|
882
|
+
if (dr->has.line) {
|
994
883
|
rb_ivar_set(dr->handler, ox_at_line_id, INT2FIX(line));
|
995
884
|
}
|
996
|
-
if (dr->
|
885
|
+
if (dr->has.column) {
|
997
886
|
rb_ivar_set(dr->handler, ox_at_column_id, INT2FIX(col));
|
998
887
|
}
|
999
888
|
*args = dr->value_obj;
|
1000
889
|
rb_funcall2(dr->handler, ox_value_id, 1, args);
|
1001
|
-
} else if (dr->
|
890
|
+
} else if (dr->has.text) {
|
1002
891
|
VALUE args[1];
|
1003
892
|
|
1004
|
-
if (dr->convert_special) {
|
1005
|
-
|
1006
|
-
sax_drive_error(dr, "invalid format, special character does not end with a semicolon", 0);
|
1007
|
-
}
|
893
|
+
if (dr->options.convert_special) {
|
894
|
+
ox_sax_collapse_special(dr, dr->buf.str, line, col);
|
1008
895
|
}
|
1009
|
-
args[0] = rb_str_new2(dr->str);
|
896
|
+
args[0] = rb_str_new2(dr->buf.str);
|
1010
897
|
#if HAS_ENCODING_SUPPORT
|
1011
898
|
if (0 != dr->encoding) {
|
1012
899
|
rb_enc_associate(args[0], dr->encoding);
|
@@ -1016,93 +903,92 @@ read_text(SaxDrive dr) {
|
|
1016
903
|
rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
|
1017
904
|
}
|
1018
905
|
#endif
|
1019
|
-
if (dr->
|
906
|
+
if (dr->has.line) {
|
1020
907
|
rb_ivar_set(dr->handler, ox_at_line_id, INT2FIX(line));
|
1021
908
|
}
|
1022
|
-
if (dr->
|
909
|
+
if (dr->has.column) {
|
1023
910
|
rb_ivar_set(dr->handler, ox_at_column_id, INT2FIX(col));
|
1024
911
|
}
|
1025
912
|
rb_funcall2(dr->handler, ox_text_id, 1, args);
|
1026
913
|
}
|
1027
|
-
dr->str = 0;
|
914
|
+
dr->buf.str = 0;
|
1028
915
|
|
1029
|
-
return
|
916
|
+
return c;
|
1030
917
|
}
|
1031
918
|
|
1032
|
-
static
|
1033
|
-
read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml) {
|
919
|
+
static char
|
920
|
+
read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml, int eq_req) {
|
1034
921
|
VALUE name = Qnil;
|
1035
922
|
int is_encoding = 0;
|
1036
923
|
int line;
|
1037
924
|
int col;
|
1038
|
-
char last;
|
1039
925
|
char *attr_value;
|
1040
|
-
|
1041
|
-
|
926
|
+
|
927
|
+
// already protected by caller
|
928
|
+
dr->buf.str = dr->buf.tail;
|
1042
929
|
if (is_white(c)) {
|
1043
|
-
c =
|
930
|
+
c = buf_next_non_white(&dr->buf);
|
1044
931
|
}
|
1045
932
|
while (termc != c && term2 != c) {
|
1046
|
-
|
1047
|
-
line = dr->line;
|
1048
|
-
col = dr->col;
|
933
|
+
buf_backup(&dr->buf);
|
1049
934
|
if ('\0' == c) {
|
1050
|
-
|
935
|
+
ox_sax_drive_error(dr, NO_TERM "attributes not terminated");
|
936
|
+
return '\0';
|
1051
937
|
}
|
938
|
+
line = dr->buf.line;
|
939
|
+
col = dr->buf.col;
|
1052
940
|
if ('\0' == (c = read_name_token(dr))) {
|
1053
|
-
|
941
|
+
ox_sax_drive_error(dr, NO_TERM "error reading token");
|
942
|
+
return '\0';
|
1054
943
|
}
|
1055
|
-
if (is_xml && 0 == strcasecmp("encoding", dr->str)) {
|
944
|
+
if (is_xml && 0 == strcasecmp("encoding", dr->buf.str)) {
|
1056
945
|
is_encoding = 1;
|
1057
946
|
}
|
1058
|
-
|
1059
|
-
|
1060
|
-
name = str2sym(dr->str, dr, 0);
|
947
|
+
if (dr->has.attr || dr->has.attr_value) {
|
948
|
+
name = str2sym(dr, dr->buf.str, 0);
|
1061
949
|
}
|
1062
950
|
if (is_white(c)) {
|
1063
|
-
c =
|
951
|
+
c = buf_next_non_white(&dr->buf);
|
1064
952
|
}
|
1065
|
-
last = '\0';
|
1066
953
|
if ('=' != c) {
|
1067
|
-
if (
|
1068
|
-
|
1069
|
-
|
954
|
+
if (eq_req) {
|
955
|
+
dr->err = 1;
|
956
|
+
return c;
|
1070
957
|
} else {
|
1071
|
-
|
958
|
+
ox_sax_drive_error(dr, WRONG_CHAR "no attribute value");
|
959
|
+
attr_value = (char*)"";
|
1072
960
|
}
|
1073
961
|
} else {
|
1074
|
-
|
1075
|
-
|
1076
|
-
|
1077
|
-
attr_value = dr->str;
|
962
|
+
line = dr->buf.line;
|
963
|
+
col = dr->buf.col;
|
964
|
+
c = read_quoted_value(dr);
|
965
|
+
attr_value = dr->buf.str;
|
1078
966
|
if (is_encoding) {
|
1079
967
|
#if HAS_ENCODING_SUPPORT
|
1080
|
-
dr->encoding = rb_enc_find(dr->str);
|
968
|
+
dr->encoding = rb_enc_find(dr->buf.str);
|
1081
969
|
#elif HAS_PRIVATE_ENCODING
|
1082
|
-
dr->encoding = rb_str_new2(dr->str);
|
970
|
+
dr->encoding = rb_str_new2(dr->buf.str);
|
1083
971
|
#endif
|
1084
972
|
is_encoding = 0;
|
1085
973
|
}
|
1086
974
|
}
|
1087
|
-
if (dr->
|
975
|
+
if (dr->has.attr_value) {
|
1088
976
|
VALUE args[2];
|
1089
977
|
|
1090
|
-
if (dr->
|
978
|
+
if (dr->has.line) {
|
1091
979
|
rb_ivar_set(dr->handler, ox_at_line_id, INT2FIX(line));
|
1092
980
|
}
|
1093
|
-
if (dr->
|
981
|
+
if (dr->has.column) {
|
1094
982
|
rb_ivar_set(dr->handler, ox_at_column_id, INT2FIX(col));
|
1095
983
|
}
|
1096
984
|
args[0] = name;
|
1097
985
|
args[1] = dr->value_obj;
|
1098
986
|
rb_funcall2(dr->handler, ox_attr_value_id, 2, args);
|
1099
|
-
} else if (dr->
|
987
|
+
} else if (dr->has.attr) {
|
1100
988
|
VALUE args[2];
|
1101
989
|
|
1102
990
|
args[0] = name;
|
1103
|
-
|
1104
|
-
sax_drive_error(dr, "invalid format, special character does not end with a semicolon", 0);
|
1105
|
-
}
|
991
|
+
ox_sax_collapse_special(dr, dr->buf.str, line, col);
|
1106
992
|
args[1] = rb_str_new2(attr_value);
|
1107
993
|
#if HAS_ENCODING_SUPPORT
|
1108
994
|
if (0 != dr->encoding) {
|
@@ -1113,34 +999,35 @@ read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml) {
|
|
1113
999
|
rb_funcall(args[1], ox_force_encoding_id, 1, dr->encoding);
|
1114
1000
|
}
|
1115
1001
|
#endif
|
1116
|
-
if (dr->
|
1002
|
+
if (dr->has.line) {
|
1117
1003
|
rb_ivar_set(dr->handler, ox_at_line_id, INT2FIX(line));
|
1118
1004
|
}
|
1119
|
-
if (dr->
|
1005
|
+
if (dr->has.column) {
|
1120
1006
|
rb_ivar_set(dr->handler, ox_at_column_id, INT2FIX(col));
|
1121
1007
|
}
|
1122
1008
|
rb_funcall2(dr->handler, ox_attr_id, 2, args);
|
1123
1009
|
}
|
1124
|
-
if (
|
1125
|
-
c =
|
1126
|
-
} else {
|
1127
|
-
c = next_non_white(dr);
|
1010
|
+
if (is_white(c)) {
|
1011
|
+
c = buf_next_non_white(&dr->buf);
|
1128
1012
|
}
|
1129
1013
|
}
|
1130
|
-
dr->str = 0;
|
1014
|
+
dr->buf.str = 0;
|
1131
1015
|
|
1132
|
-
return
|
1016
|
+
return c;
|
1133
1017
|
}
|
1134
1018
|
|
1019
|
+
/* The character after the character after the word is returned. dr->buf.tail is one past that. dr->buf.str will point to the
|
1020
|
+
* token which will be '\0' terminated.
|
1021
|
+
*/
|
1135
1022
|
static char
|
1136
1023
|
read_name_token(SaxDrive dr) {
|
1137
1024
|
char c;
|
1138
1025
|
|
1139
|
-
dr->str = dr->
|
1140
|
-
c =
|
1026
|
+
dr->buf.str = dr->buf.tail;
|
1027
|
+
c = buf_get(&dr->buf);
|
1141
1028
|
if (is_white(c)) {
|
1142
|
-
c =
|
1143
|
-
dr->str = dr->
|
1029
|
+
c = buf_next_non_white(&dr->buf);
|
1030
|
+
dr->buf.str = dr->buf.tail - 1;
|
1144
1031
|
}
|
1145
1032
|
while (1) {
|
1146
1033
|
switch (c) {
|
@@ -1151,179 +1038,73 @@ read_name_token(SaxDrive dr) {
|
|
1151
1038
|
case '=':
|
1152
1039
|
case '/':
|
1153
1040
|
case '>':
|
1041
|
+
case '<':
|
1154
1042
|
case '\n':
|
1155
1043
|
case '\r':
|
1156
|
-
*(dr->
|
1044
|
+
*(dr->buf.tail - 1) = '\0';
|
1157
1045
|
return c;
|
1158
1046
|
case '\0':
|
1159
1047
|
/* documents never terminate after a name token */
|
1160
|
-
|
1048
|
+
ox_sax_drive_error(dr, NO_TERM "document not terminated");
|
1161
1049
|
return '\0';
|
1162
1050
|
default:
|
1163
1051
|
break;
|
1164
1052
|
}
|
1165
|
-
c =
|
1053
|
+
c = buf_get(&dr->buf);
|
1166
1054
|
}
|
1167
1055
|
return '\0';
|
1168
1056
|
}
|
1169
1057
|
|
1170
|
-
|
1171
|
-
|
1172
|
-
|
1058
|
+
/* The character after the quote or if there is no quote, the character after the word is returned. dr->buf.tail is one past
|
1059
|
+
* that. dr->buf.str will point to the token which will be '\0' terminated.
|
1060
|
+
*/
|
1061
|
+
static char
|
1062
|
+
read_quoted_value(SaxDrive dr) {
|
1063
|
+
char c;
|
1173
1064
|
|
1174
|
-
|
1175
|
-
c = sax_drive_get(dr);
|
1065
|
+
c = buf_get(&dr->buf);
|
1176
1066
|
if (is_white(c)) {
|
1177
|
-
c =
|
1067
|
+
c = buf_next_non_white(&dr->buf);
|
1178
1068
|
}
|
1179
1069
|
if ('"' == c || '\'' == c) {
|
1180
1070
|
char term = c;
|
1181
1071
|
|
1182
|
-
dr->str = dr->
|
1183
|
-
while (term != (c =
|
1072
|
+
dr->buf.str = dr->buf.tail;
|
1073
|
+
while (term != (c = buf_get(&dr->buf))) {
|
1184
1074
|
if ('\0' == c) {
|
1185
|
-
|
1186
|
-
return
|
1075
|
+
ox_sax_drive_error(dr, NO_TERM "quoted value not terminated");
|
1076
|
+
return '\0';
|
1187
1077
|
}
|
1188
1078
|
}
|
1189
|
-
|
1190
|
-
dr->
|
1191
|
-
|
1192
|
-
|
1193
|
-
case '\0':
|
1194
|
-
sax_drive_error(dr, "invalid format, non quoted value not terminated", 1);
|
1195
|
-
case ' ':
|
1196
|
-
case '/':
|
1197
|
-
case '>':
|
1198
|
-
case '?': // for instructions
|
1199
|
-
case '\t':
|
1200
|
-
case '\n':
|
1201
|
-
case '\r':
|
1202
|
-
*last = c;
|
1203
|
-
*(dr->cur - 1) = '\0'; /* terminate value */
|
1204
|
-
return 0;
|
1205
|
-
default:
|
1206
|
-
break;
|
1207
|
-
}
|
1208
|
-
}
|
1209
|
-
} else {
|
1210
|
-
dr->str = dr->cur - 1;
|
1211
|
-
if ('\0' == (c = next_white(dr))) {
|
1212
|
-
sax_drive_error(dr, "invalid format, attibute value not in quotes", 1);
|
1213
|
-
}
|
1214
|
-
}
|
1215
|
-
*(dr->cur - 1) = '\0'; /* terminate value */
|
1216
|
-
|
1217
|
-
return 0;
|
1218
|
-
}
|
1219
|
-
|
1220
|
-
static VALUE
|
1221
|
-
rescue_cb(VALUE rdr, VALUE err) {
|
1222
|
-
#ifndef JRUBY_RUBY
|
1223
|
-
/* JRuby seems to play by a different set if rules. It passes in an Fixnum
|
1224
|
-
* instead of an error like other Rubies. For now assume all errors are
|
1225
|
-
* EOF and deal with the results further down the line. */
|
1226
|
-
#if (defined(RUBINIUS_RUBY) || (1 == RUBY_VERSION_MAJOR && 8 == RUBY_VERSION_MINOR))
|
1227
|
-
if (rb_obj_class(err) != rb_eTypeError) {
|
1228
|
-
#else
|
1229
|
-
if (rb_obj_class(err) != rb_eEOFError) {
|
1230
|
-
#endif
|
1231
|
-
SaxDrive dr = (SaxDrive)rdr;
|
1232
|
-
|
1233
|
-
sax_drive_cleanup(dr);
|
1234
|
-
rb_raise(err, "at line %d, column %d\n", dr->line, dr->col);
|
1235
|
-
}
|
1236
|
-
#endif
|
1237
|
-
return Qfalse;
|
1238
|
-
}
|
1239
|
-
|
1240
|
-
static VALUE
|
1241
|
-
partial_io_cb(VALUE rdr) {
|
1242
|
-
SaxDrive dr = (SaxDrive)rdr;
|
1243
|
-
VALUE args[1];
|
1244
|
-
VALUE rstr;
|
1245
|
-
char *str;
|
1246
|
-
size_t cnt;
|
1247
|
-
|
1248
|
-
args[0] = ULONG2NUM(dr->buf_end - dr->cur);
|
1249
|
-
rstr = rb_funcall2(dr->io, ox_readpartial_id, 1, args);
|
1250
|
-
str = StringValuePtr(rstr);
|
1251
|
-
cnt = strlen(str);
|
1252
|
-
/*printf("*** read %lu bytes, str: '%s'\n", cnt, str); */
|
1253
|
-
strcpy(dr->cur, str);
|
1254
|
-
dr->read_end = dr->cur + cnt;
|
1255
|
-
|
1256
|
-
return Qtrue;
|
1257
|
-
}
|
1258
|
-
|
1259
|
-
static VALUE
|
1260
|
-
io_cb(VALUE rdr) {
|
1261
|
-
SaxDrive dr = (SaxDrive)rdr;
|
1262
|
-
VALUE args[1];
|
1263
|
-
VALUE rstr;
|
1264
|
-
char *str;
|
1265
|
-
size_t cnt;
|
1266
|
-
|
1267
|
-
args[0] = ULONG2NUM(dr->buf_end - dr->cur);
|
1268
|
-
/*args[0] = SIZET2NUM(dr->buf_end - dr->cur); */
|
1269
|
-
rstr = rb_funcall2(dr->io, ox_read_id, 1, args);
|
1270
|
-
str = StringValuePtr(rstr);
|
1271
|
-
cnt = strlen(str);
|
1272
|
-
/*printf("*** read %lu bytes, str: '%s'\n", cnt, str); */
|
1273
|
-
strcpy(dr->cur, str);
|
1274
|
-
dr->read_end = dr->cur + cnt;
|
1275
|
-
|
1276
|
-
return Qtrue;
|
1277
|
-
}
|
1278
|
-
|
1279
|
-
static int
|
1280
|
-
read_from_io_partial(SaxDrive dr) {
|
1281
|
-
return (Qfalse == rb_rescue(partial_io_cb, (VALUE)dr, rescue_cb, (VALUE)dr));
|
1282
|
-
}
|
1283
|
-
|
1284
|
-
static int
|
1285
|
-
read_from_io(SaxDrive dr) {
|
1286
|
-
return (Qfalse == rb_rescue(io_cb, (VALUE)dr, rescue_cb, (VALUE)dr));
|
1287
|
-
}
|
1288
|
-
|
1289
|
-
#ifndef JRUBY_RUBY
|
1290
|
-
static int
|
1291
|
-
read_from_fd(SaxDrive dr) {
|
1292
|
-
ssize_t cnt;
|
1293
|
-
size_t max = dr->buf_end - dr->cur;
|
1294
|
-
|
1295
|
-
cnt = read(dr->fd, dr->cur, max);
|
1296
|
-
if (cnt < 0) {
|
1297
|
-
sax_drive_error(dr, "failed to read from file", 1);
|
1298
|
-
return -1;
|
1299
|
-
} else if (0 != cnt) {
|
1300
|
-
dr->read_end = dr->cur + cnt;
|
1079
|
+
// dr->buf.tail is one past quote char
|
1080
|
+
*(dr->buf.tail - 1) = '\0'; /* terminate value */
|
1081
|
+
c = buf_get(&dr->buf);
|
1082
|
+
return c;
|
1301
1083
|
}
|
1302
|
-
|
1303
|
-
|
1304
|
-
|
1305
|
-
|
1306
|
-
|
1307
|
-
|
1308
|
-
|
1309
|
-
|
1310
|
-
|
1311
|
-
|
1312
|
-
|
1313
|
-
|
1314
|
-
|
1084
|
+
// not quoted, look for something that terminates the string
|
1085
|
+
dr->buf.str = dr->buf.tail - 1;
|
1086
|
+
ox_sax_drive_error(dr, WRONG_CHAR "attribute value not in quotes");
|
1087
|
+
while ('\0' != (c = buf_get(&dr->buf))) {
|
1088
|
+
switch (c) {
|
1089
|
+
case ' ':
|
1090
|
+
case '/':
|
1091
|
+
case '>':
|
1092
|
+
case '?': // for instructions
|
1093
|
+
case '\t':
|
1094
|
+
case '\n':
|
1095
|
+
case '\r':
|
1096
|
+
*(dr->buf.tail - 1) = '\0'; /* terminate value */
|
1097
|
+
// dr->buf.tail is in the correct position, one after the word terminator
|
1098
|
+
return c;
|
1099
|
+
default:
|
1100
|
+
break;
|
1101
|
+
}
|
1315
1102
|
}
|
1316
|
-
|
1317
|
-
*s = '\0';
|
1318
|
-
cnt = s - dr->cur;
|
1319
|
-
dr->in_str += cnt;
|
1320
|
-
dr->read_end = dr->cur + cnt;
|
1321
|
-
|
1322
|
-
return 0;
|
1103
|
+
return '\0'; // should never get here
|
1323
1104
|
}
|
1324
1105
|
|
1325
|
-
|
1326
|
-
|
1106
|
+
int
|
1107
|
+
ox_sax_collapse_special(SaxDrive dr, char *str, int line, int col) {
|
1327
1108
|
char *s = str;
|
1328
1109
|
char *b = str;
|
1329
1110
|
|
@@ -1332,7 +1113,7 @@ collapse_special(char *str, int tolerant) {
|
|
1332
1113
|
int c;
|
1333
1114
|
char *end;
|
1334
1115
|
int x = 0;
|
1335
|
-
|
1116
|
+
|
1336
1117
|
s++;
|
1337
1118
|
if ('#' == *s) {
|
1338
1119
|
s++;
|
@@ -1344,46 +1125,47 @@ collapse_special(char *str, int tolerant) {
|
|
1344
1125
|
c = (int)strtol(s, &end, 10);
|
1345
1126
|
}
|
1346
1127
|
if (';' != *end) {
|
1347
|
-
|
1348
|
-
|
1349
|
-
|
1350
|
-
|
1351
|
-
|
1352
|
-
}
|
1353
|
-
continue;
|
1128
|
+
ox_sax_drive_error(dr, NO_TERM "special character does not end with a semicolon");
|
1129
|
+
*b++ = '&';
|
1130
|
+
*b++ = '#';
|
1131
|
+
if (x) {
|
1132
|
+
*b++ = *(s - 1);
|
1354
1133
|
}
|
1355
|
-
|
1134
|
+
continue;
|
1356
1135
|
}
|
1136
|
+
col += (int)(end - s);
|
1357
1137
|
s = end + 1;
|
1358
1138
|
} else if (0 == strncasecmp(s, "lt;", 3)) {
|
1359
1139
|
c = '<';
|
1360
1140
|
s += 3;
|
1141
|
+
col += 3;
|
1361
1142
|
} else if (0 == strncasecmp(s, "gt;", 3)) {
|
1362
1143
|
c = '>';
|
1363
1144
|
s += 3;
|
1145
|
+
col += 3;
|
1364
1146
|
} else if (0 == strncasecmp(s, "amp;", 4)) {
|
1365
1147
|
c = '&';
|
1366
1148
|
s += 4;
|
1149
|
+
col += 4;
|
1367
1150
|
} else if (0 == strncasecmp(s, "quot;", 5)) {
|
1368
1151
|
c = '"';
|
1369
1152
|
s += 5;
|
1153
|
+
col += 5;
|
1370
1154
|
} else if (0 == strncasecmp(s, "apos;", 5)) {
|
1371
1155
|
c = '\'';
|
1372
1156
|
s += 5;
|
1373
|
-
} else if (tolerant) {
|
1374
|
-
*b++ = '&';
|
1375
|
-
continue;
|
1376
1157
|
} else {
|
1377
|
-
|
1378
|
-
|
1379
|
-
if ('\0' == *s) {
|
1380
|
-
return EDOM;
|
1381
|
-
}
|
1382
|
-
}
|
1383
|
-
s++;
|
1158
|
+
ox_sax_drive_error_at(dr, NO_TERM "special character does not end with a semicolon", line, col);
|
1159
|
+
c = '&';
|
1384
1160
|
}
|
1385
1161
|
*b++ = (char)c;
|
1162
|
+
col++;
|
1386
1163
|
} else {
|
1164
|
+
if ('\n' == *s) {
|
1165
|
+
line++;
|
1166
|
+
col = 0;
|
1167
|
+
}
|
1168
|
+
col++;
|
1387
1169
|
*b++ = *s++;
|
1388
1170
|
}
|
1389
1171
|
}
|
@@ -1392,214 +1174,58 @@ collapse_special(char *str, int tolerant) {
|
|
1392
1174
|
return 0;
|
1393
1175
|
}
|
1394
1176
|
|
1395
|
-
static
|
1396
|
-
|
1397
|
-
|
1398
|
-
long v2 = 0;
|
1399
|
-
const char *dot = 0;
|
1400
|
-
char c;
|
1401
|
-
|
1402
|
-
for (; '.' != *text; text++) {
|
1403
|
-
c = *text;
|
1404
|
-
if (c < '0' || '9' < c) {
|
1405
|
-
return Qnil;
|
1406
|
-
}
|
1407
|
-
v = 10 * v + (long)(c - '0');
|
1408
|
-
}
|
1409
|
-
dot = text++;
|
1410
|
-
for (; '\0' != *text && text - dot <= 6; text++) {
|
1411
|
-
c = *text;
|
1412
|
-
if (c < '0' || '9' < c) {
|
1413
|
-
return Qnil;
|
1414
|
-
}
|
1415
|
-
v2 = 10 * v2 + (long)(c - '0');
|
1416
|
-
}
|
1417
|
-
for (; text - dot <= 9; text++) {
|
1418
|
-
v2 *= 10;
|
1419
|
-
}
|
1420
|
-
#if HAS_NANO_TIME
|
1421
|
-
return rb_time_nano_new(v, v2);
|
1422
|
-
#else
|
1423
|
-
return rb_time_new(v, v2 / 1000);
|
1424
|
-
#endif
|
1425
|
-
}
|
1177
|
+
static void
|
1178
|
+
hint_clear_empty(SaxDrive dr) {
|
1179
|
+
Nv nv;
|
1426
1180
|
|
1427
|
-
|
1428
|
-
|
1429
|
-
char end;
|
1430
|
-
char alt;
|
1431
|
-
} *Tp;
|
1432
|
-
|
1433
|
-
static VALUE
|
1434
|
-
parse_xsd_time(const char *text) {
|
1435
|
-
long cargs[10];
|
1436
|
-
long *cp = cargs;
|
1437
|
-
long v;
|
1438
|
-
int i;
|
1439
|
-
char c = '\0';
|
1440
|
-
struct _Tp tpa[10] = { { 4, '-', '-' },
|
1441
|
-
{ 2, '-', '-' },
|
1442
|
-
{ 2, 'T', ' ' },
|
1443
|
-
{ 2, ':', ':' },
|
1444
|
-
{ 2, ':', ':' },
|
1445
|
-
{ 2, '.', '.' },
|
1446
|
-
{ 9, '+', '-' },
|
1447
|
-
{ 2, ':', ':' },
|
1448
|
-
{ 2, '\0', '\0' },
|
1449
|
-
{ 0, '\0', '\0' } };
|
1450
|
-
Tp tp = tpa;
|
1451
|
-
struct tm tm;
|
1452
|
-
|
1453
|
-
memset(cargs, 0, sizeof(cargs));
|
1454
|
-
for (; 0 != tp->cnt; tp++) {
|
1455
|
-
for (i = tp->cnt, v = 0; 0 < i ; text++, i--) {
|
1456
|
-
c = *text;
|
1457
|
-
if (c < '0' || '9' < c) {
|
1458
|
-
if ('\0' == c || tp->end == c || tp->alt == c) {
|
1459
|
-
break;
|
1460
|
-
}
|
1461
|
-
return Qnil;
|
1462
|
-
}
|
1463
|
-
v = 10 * v + (long)(c - '0');
|
1464
|
-
}
|
1465
|
-
if ('\0' == c) {
|
1181
|
+
for (nv = stack_peek(&dr->stack); 0 != nv; nv = stack_peek(&dr->stack)) {
|
1182
|
+
if (0 == nv->hint) {
|
1466
1183
|
break;
|
1467
1184
|
}
|
1468
|
-
|
1469
|
-
|
1470
|
-
|
1471
|
-
|
1472
|
-
|
1473
|
-
}
|
1474
|
-
tm.tm_year = (int)cargs[0] - 1900;
|
1475
|
-
tm.tm_mon = (int)cargs[1] - 1;
|
1476
|
-
tm.tm_mday = (int)cargs[2];
|
1477
|
-
tm.tm_hour = (int)cargs[3];
|
1478
|
-
tm.tm_min = (int)cargs[4];
|
1479
|
-
tm.tm_sec = (int)cargs[5];
|
1480
|
-
#if HAS_NANO_TIME
|
1481
|
-
return rb_time_nano_new(mktime(&tm), cargs[6]);
|
1482
|
-
#else
|
1483
|
-
return rb_time_new(mktime(&tm), cargs[6] / 1000);
|
1484
|
-
#endif
|
1485
|
-
}
|
1486
|
-
|
1487
|
-
static VALUE
|
1488
|
-
sax_value_as_s(VALUE self) {
|
1489
|
-
SaxDrive dr = DATA_PTR(self);
|
1490
|
-
VALUE rs;
|
1491
|
-
|
1492
|
-
if ('\0' == *dr->str) {
|
1493
|
-
return Qnil;
|
1494
|
-
}
|
1495
|
-
if (dr->convert_special) {
|
1496
|
-
if (0 != collapse_special(dr->str, dr->tolerant) && 0 != strchr(dr->str, '&')) {
|
1497
|
-
sax_drive_error(dr, "invalid format, special character does not end with a semicolon", 0);
|
1185
|
+
if (nv->hint->empty) {
|
1186
|
+
end_element_cb(dr, nv->val, dr->buf.line, dr->buf.col);
|
1187
|
+
stack_pop(&dr->stack);
|
1188
|
+
} else {
|
1189
|
+
break;
|
1498
1190
|
}
|
1499
1191
|
}
|
1500
|
-
rs = rb_str_new2(dr->str);
|
1501
|
-
#if HAS_ENCODING_SUPPORT
|
1502
|
-
if (0 != dr->encoding) {
|
1503
|
-
rb_enc_associate(rs, dr->encoding);
|
1504
|
-
}
|
1505
|
-
#elif HAS_PRIVATE_ENCODING
|
1506
|
-
if (Qnil != dr->encoding) {
|
1507
|
-
rb_funcall(rs, ox_force_encoding_id, 1, dr->encoding);
|
1508
|
-
}
|
1509
|
-
#endif
|
1510
|
-
return rs;
|
1511
1192
|
}
|
1512
1193
|
|
1513
|
-
static
|
1514
|
-
|
1515
|
-
|
1194
|
+
static Nv
|
1195
|
+
hint_try_close(SaxDrive dr, const char *name) {
|
1196
|
+
Hint h = ox_hint_find(dr->hints, name);
|
1197
|
+
Nv nv;
|
1516
1198
|
|
1517
|
-
if (
|
1518
|
-
return
|
1199
|
+
if (0 == h) {
|
1200
|
+
return 0;
|
1519
1201
|
}
|
1520
|
-
|
1521
|
-
|
1522
|
-
|
1523
|
-
|
1524
|
-
|
1525
|
-
|
1526
|
-
|
1527
|
-
|
1528
|
-
|
1529
|
-
|
1530
|
-
|
1531
|
-
}
|
1532
|
-
|
1533
|
-
static VALUE
|
1534
|
-
sax_value_as_i(VALUE self) {
|
1535
|
-
SaxDrive dr = DATA_PTR(self);
|
1536
|
-
const char *s = dr->str;
|
1537
|
-
long n = 0;
|
1538
|
-
int neg = 0;
|
1539
|
-
|
1540
|
-
if ('\0' == *s) {
|
1541
|
-
return Qnil;
|
1542
|
-
}
|
1543
|
-
if ('-' == *s) {
|
1544
|
-
neg = 1;
|
1545
|
-
s++;
|
1546
|
-
} else if ('+' == *s) {
|
1547
|
-
s++;
|
1548
|
-
}
|
1549
|
-
for (; '\0' != *s; s++) {
|
1550
|
-
if ('0' <= *s && *s <= '9') {
|
1551
|
-
n = n * 10 + (*s - '0');
|
1202
|
+
for (nv = stack_peek(&dr->stack); 0 != nv; nv = stack_peek(&dr->stack)) {
|
1203
|
+
if (0 == strcasecmp(name, nv->name)) {
|
1204
|
+
stack_pop(&dr->stack);
|
1205
|
+
return nv;
|
1206
|
+
}
|
1207
|
+
if (0 == nv->hint) {
|
1208
|
+
break;
|
1209
|
+
}
|
1210
|
+
if (nv->hint->empty) {
|
1211
|
+
end_element_cb(dr, nv->val, dr->buf.line, dr->buf.col);
|
1212
|
+
dr->stack.tail = nv;
|
1552
1213
|
} else {
|
1553
|
-
|
1214
|
+
break;
|
1554
1215
|
}
|
1555
1216
|
}
|
1556
|
-
|
1557
|
-
n = -n;
|
1558
|
-
}
|
1559
|
-
return LONG2NUM(n);
|
1217
|
+
return 0;
|
1560
1218
|
}
|
1561
1219
|
|
1562
|
-
static
|
1563
|
-
|
1564
|
-
|
1565
|
-
|
1566
|
-
|
1567
|
-
|
1568
|
-
|
1569
|
-
|
1570
|
-
|
1571
|
-
|
1572
|
-
Qnil == (t = parse_xsd_time(str))) {
|
1573
|
-
VALUE args[1];
|
1574
|
-
|
1575
|
-
/*printf("**** time parse\n"); */
|
1576
|
-
*args = rb_str_new2(str);
|
1577
|
-
t = rb_funcall2(ox_time_class, ox_parse_id, 1, args);
|
1220
|
+
static void
|
1221
|
+
end_element_cb(SaxDrive dr, VALUE name, int line, int col) {
|
1222
|
+
if (dr->has.end_element) {
|
1223
|
+
if (dr->has.line) {
|
1224
|
+
rb_ivar_set(dr->handler, ox_at_line_id, INT2FIX(line));
|
1225
|
+
}
|
1226
|
+
if (dr->has.column) {
|
1227
|
+
rb_ivar_set(dr->handler, ox_at_column_id, INT2FIX(col));
|
1228
|
+
}
|
1229
|
+
rb_funcall(dr->handler, ox_end_element_id, 1, name);
|
1578
1230
|
}
|
1579
|
-
return t;
|
1580
|
-
}
|
1581
|
-
|
1582
|
-
static VALUE
|
1583
|
-
sax_value_as_bool(VALUE self) {
|
1584
|
-
return (0 == strcasecmp("true", ((SaxDrive)DATA_PTR(self))->str)) ? Qtrue : Qfalse;
|
1585
|
-
}
|
1586
|
-
|
1587
|
-
static VALUE
|
1588
|
-
sax_value_empty(VALUE self) {
|
1589
|
-
return ('\0' == *((SaxDrive)DATA_PTR(self))->str) ? Qtrue : Qfalse;
|
1590
|
-
}
|
1591
|
-
|
1592
|
-
void
|
1593
|
-
ox_sax_define() {
|
1594
|
-
VALUE sax_module = rb_const_get_at(Ox, rb_intern("Sax"));
|
1595
|
-
|
1596
|
-
sax_value_class = rb_define_class_under(sax_module, "Value", rb_cObject);
|
1597
|
-
|
1598
|
-
rb_define_method(sax_value_class, "as_s", sax_value_as_s, 0);
|
1599
|
-
rb_define_method(sax_value_class, "as_sym", sax_value_as_sym, 0);
|
1600
|
-
rb_define_method(sax_value_class, "as_i", sax_value_as_i, 0);
|
1601
|
-
rb_define_method(sax_value_class, "as_f", sax_value_as_f, 0);
|
1602
|
-
rb_define_method(sax_value_class, "as_time", sax_value_as_time, 0);
|
1603
|
-
rb_define_method(sax_value_class, "as_bool", sax_value_as_bool, 0);
|
1604
|
-
rb_define_method(sax_value_class, "empty?", sax_value_empty, 0);
|
1605
1231
|
}
|