ox 1.7.1 → 1.8.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of ox might be problematic. Click here for more details.
- data/README.md +4 -0
- data/ext/ox/dump.c +37 -0
- data/ext/ox/gen_load.c +74 -7
- data/ext/ox/obj_load.c +2 -2
- data/ext/ox/ox.c +9 -3
- data/ext/ox/ox.h +4 -1
- data/ext/ox/parse.c +51 -9
- data/ext/ox/sax.c +108 -43
- data/lib/ox.rb +2 -0
- data/lib/ox/element.rb +3 -25
- data/lib/ox/hasattrs.rb +49 -0
- data/lib/ox/instruct.rb +38 -0
- data/lib/ox/sax.rb +4 -0
- data/lib/ox/version.rb +1 -1
- metadata +4 -2
data/README.md
CHANGED
@@ -34,6 +34,10 @@ A fast XML parser and Object marshaller as a Ruby gem.
|
|
34
34
|
|
35
35
|
## <a name="release">Release Notes</a>
|
36
36
|
|
37
|
+
### Release 1.8.0
|
38
|
+
|
39
|
+
- Added more complete support for processing instructions in both the generic parser and in the sax parser. This change includes and additional sax handler callback for the end of the instruction processing.
|
40
|
+
|
37
41
|
### Release 1.7.1
|
38
42
|
|
39
43
|
- Pulled in sharpyfox's changes to make Ox with with Windows. (issue #24)
|
data/ext/ox/dump.c
CHANGED
@@ -77,6 +77,7 @@ static void dump_first_obj(VALUE obj, Out out);
|
|
77
77
|
static void dump_obj(ID aid, VALUE obj, unsigned int depth, Out out);
|
78
78
|
static void dump_gen_doc(VALUE obj, unsigned int depth, Out out);
|
79
79
|
static void dump_gen_element(VALUE obj, unsigned int depth, Out out);
|
80
|
+
static void dump_gen_instruct(VALUE obj, unsigned int depth, Out out);
|
80
81
|
static int dump_gen_attr(VALUE key, VALUE value, Out out);
|
81
82
|
static int dump_gen_nodes(VALUE obj, unsigned int depth, Out out);
|
82
83
|
static void dump_gen_val_node(VALUE obj, unsigned int depth,
|
@@ -1064,6 +1065,40 @@ dump_gen_element(VALUE obj, unsigned int depth, Out out) {
|
|
1064
1065
|
*out->cur = '\0';
|
1065
1066
|
}
|
1066
1067
|
|
1068
|
+
static void
|
1069
|
+
dump_gen_instruct(VALUE obj, unsigned int depth, Out out) {
|
1070
|
+
VALUE rname = rb_attr_get(obj, ox_at_value_id);
|
1071
|
+
VALUE attrs = rb_attr_get(obj, ox_attributes_id);
|
1072
|
+
VALUE rcontent = rb_attr_get(obj, ox_at_content_id);
|
1073
|
+
const char *name = StringValuePtr(rname);
|
1074
|
+
const char *content = 0;
|
1075
|
+
long nlen = RSTRING_LEN(rname);
|
1076
|
+
long clen = 0;
|
1077
|
+
size_t size;
|
1078
|
+
|
1079
|
+
if (T_STRING == rb_type(rcontent)) {
|
1080
|
+
content = StringValuePtr(rcontent);
|
1081
|
+
clen = RSTRING_LEN(rcontent);
|
1082
|
+
size = 4 + nlen + clen;
|
1083
|
+
} else {
|
1084
|
+
size = 4 + nlen;
|
1085
|
+
}
|
1086
|
+
if (out->end - out->cur <= (long)size) {
|
1087
|
+
grow(out, size);
|
1088
|
+
}
|
1089
|
+
*out->cur++ = '<';
|
1090
|
+
*out->cur++ = '?';
|
1091
|
+
fill_value(out, name, nlen);
|
1092
|
+
if (0 != content) {
|
1093
|
+
fill_value(out, content, clen);
|
1094
|
+
} else if (Qnil != attrs) {
|
1095
|
+
rb_hash_foreach(attrs, dump_gen_attr, (VALUE)out);
|
1096
|
+
}
|
1097
|
+
*out->cur++ = '?';
|
1098
|
+
*out->cur++ = '>';
|
1099
|
+
*out->cur = '\0';
|
1100
|
+
}
|
1101
|
+
|
1067
1102
|
static int
|
1068
1103
|
dump_gen_nodes(VALUE obj, unsigned int depth, Out out) {
|
1069
1104
|
long cnt = RARRAY_LEN(obj);
|
@@ -1078,6 +1113,8 @@ dump_gen_nodes(VALUE obj, unsigned int depth, Out out) {
|
|
1078
1113
|
clas = rb_obj_class(*np);
|
1079
1114
|
if (ox_element_clas == clas) {
|
1080
1115
|
dump_gen_element(*np, d2, out);
|
1116
|
+
} else if (ox_instruct_clas == clas) {
|
1117
|
+
dump_gen_instruct(*np, d2, out);
|
1081
1118
|
} else if (rb_cString == clas) {
|
1082
1119
|
dump_str_value(out, StringValuePtr(*np), RSTRING_LEN(*np));
|
1083
1120
|
indent_needed = (1 == cnt) ? 0 : 1;
|
data/ext/ox/gen_load.c
CHANGED
@@ -37,16 +37,17 @@
|
|
37
37
|
#include "ruby.h"
|
38
38
|
#include "ox.h"
|
39
39
|
|
40
|
-
static void instruct(PInfo pi, const char *target, Attr attrs);
|
40
|
+
static void instruct(PInfo pi, const char *target, Attr attrs, const char *content);
|
41
41
|
static void create_doc(PInfo pi);
|
42
42
|
static void create_prolog_doc(PInfo pi, const char *target, Attr attrs);
|
43
|
-
static void nomode_instruct(PInfo pi, const char *target, Attr attrs);
|
43
|
+
static void nomode_instruct(PInfo pi, const char *target, Attr attrs, const char *content);
|
44
44
|
static void add_doctype(PInfo pi, const char *docType);
|
45
45
|
static void add_comment(PInfo pi, const char *comment);
|
46
46
|
static void add_cdata(PInfo pi, const char *cdata, size_t len);
|
47
47
|
static void add_text(PInfo pi, char *text, int closed);
|
48
48
|
static void add_element(PInfo pi, const char *ename, Attr attrs, int hasChildren);
|
49
49
|
static void end_element(PInfo pi, const char *ename);
|
50
|
+
static void add_instruct(PInfo pi, const char *name, Attr attrs, const char *content);
|
50
51
|
|
51
52
|
extern ParseCallbacks ox_obj_callbacks;
|
52
53
|
|
@@ -152,7 +153,7 @@ create_prolog_doc(PInfo pi, const char *target, Attr attrs) {
|
|
152
153
|
}
|
153
154
|
|
154
155
|
static void
|
155
|
-
instruct(PInfo pi, const char *target, Attr attrs) {
|
156
|
+
instruct(PInfo pi, const char *target, Attr attrs, const char *content) {
|
156
157
|
if (0 == strcmp("xml", target)) {
|
157
158
|
create_prolog_doc(pi, target, attrs);
|
158
159
|
} else if (0 == strcmp("ox", target)) {
|
@@ -165,14 +166,12 @@ instruct(PInfo pi, const char *target, Attr attrs) {
|
|
165
166
|
/* ignore other instructions */
|
166
167
|
}
|
167
168
|
} else {
|
168
|
-
|
169
|
-
printf("Processing instruction %s ignored.\n", target);
|
170
|
-
}
|
169
|
+
add_instruct(pi, target, attrs, content);
|
171
170
|
}
|
172
171
|
}
|
173
172
|
|
174
173
|
static void
|
175
|
-
nomode_instruct(PInfo pi, const char *target, Attr attrs) {
|
174
|
+
nomode_instruct(PInfo pi, const char *target, Attr attrs, const char *content) {
|
176
175
|
if (0 == strcmp("xml", target)) {
|
177
176
|
create_prolog_doc(pi, target, attrs);
|
178
177
|
} else if (0 == strcmp("ox", target)) {
|
@@ -344,3 +343,71 @@ end_element(PInfo pi, const char *ename) {
|
|
344
343
|
pi->h--;
|
345
344
|
}
|
346
345
|
}
|
346
|
+
|
347
|
+
static void
|
348
|
+
add_instruct(PInfo pi, const char *name, Attr attrs, const char *content) {
|
349
|
+
VALUE inst;
|
350
|
+
VALUE s = rb_str_new2(name);
|
351
|
+
VALUE c = Qnil;
|
352
|
+
|
353
|
+
if (0 != content) {
|
354
|
+
c = rb_str_new2(content);
|
355
|
+
}
|
356
|
+
#if HAS_ENCODING_SUPPORT
|
357
|
+
if (0 != pi->options->rb_enc) {
|
358
|
+
rb_enc_associate(s, pi->options->rb_enc);
|
359
|
+
if (0 != content) {
|
360
|
+
rb_enc_associate(c, pi->options->rb_enc);
|
361
|
+
}
|
362
|
+
}
|
363
|
+
#endif
|
364
|
+
inst = rb_obj_alloc(ox_instruct_clas);
|
365
|
+
rb_ivar_set(inst, ox_at_value_id, s);
|
366
|
+
if (0 != content) {
|
367
|
+
rb_ivar_set(inst, ox_at_content_id, c);
|
368
|
+
} else if (0 != attrs->name) {
|
369
|
+
VALUE ah = rb_hash_new();
|
370
|
+
|
371
|
+
for (; 0 != attrs->name; attrs++) {
|
372
|
+
VALUE sym;
|
373
|
+
VALUE *slot;
|
374
|
+
|
375
|
+
if (Yes == pi->options->sym_keys) {
|
376
|
+
if (Qundef == (sym = ox_cache_get(ox_symbol_cache, attrs->name, &slot))) {
|
377
|
+
#if HAS_ENCODING_SUPPORT
|
378
|
+
if (0 != pi->options->rb_enc) {
|
379
|
+
VALUE rstr = rb_str_new2(attrs->name);
|
380
|
+
|
381
|
+
rb_enc_associate(rstr, pi->options->rb_enc);
|
382
|
+
sym = rb_funcall(rstr, ox_to_sym_id, 0);
|
383
|
+
} else {
|
384
|
+
sym = ID2SYM(rb_intern(attrs->name));
|
385
|
+
}
|
386
|
+
#else
|
387
|
+
sym = ID2SYM(rb_intern(attrs->name));
|
388
|
+
#endif
|
389
|
+
*slot = sym;
|
390
|
+
}
|
391
|
+
} else {
|
392
|
+
sym = rb_str_new2(attrs->name);
|
393
|
+
#if HAS_ENCODING_SUPPORT
|
394
|
+
if (0 != pi->options->rb_enc) {
|
395
|
+
rb_enc_associate(sym, pi->options->rb_enc);
|
396
|
+
}
|
397
|
+
#endif
|
398
|
+
}
|
399
|
+
s = rb_str_new2(attrs->value);
|
400
|
+
#if HAS_ENCODING_SUPPORT
|
401
|
+
if (0 != pi->options->rb_enc) {
|
402
|
+
rb_enc_associate(s, pi->options->rb_enc);
|
403
|
+
}
|
404
|
+
#endif
|
405
|
+
rb_hash_aset(ah, sym, s);
|
406
|
+
}
|
407
|
+
rb_ivar_set(inst, ox_attributes_id, ah);
|
408
|
+
}
|
409
|
+
if (0 == pi->h) { /* top level object */
|
410
|
+
create_doc(pi);
|
411
|
+
}
|
412
|
+
rb_ary_push(pi->h->obj, inst);
|
413
|
+
}
|
data/ext/ox/obj_load.c
CHANGED
@@ -39,7 +39,7 @@
|
|
39
39
|
#include "base64.h"
|
40
40
|
#include "ox.h"
|
41
41
|
|
42
|
-
static void instruct(PInfo pi, const char *target, Attr attrs);
|
42
|
+
static void instruct(PInfo pi, const char *target, Attr attrs, const char *content);
|
43
43
|
static void add_text(PInfo pi, char *text, int closed);
|
44
44
|
static void add_element(PInfo pi, const char *ename, Attr attrs, int hasChildren);
|
45
45
|
static void end_element(PInfo pi, const char *ename);
|
@@ -394,7 +394,7 @@ parse_regexp(const char *text) {
|
|
394
394
|
}
|
395
395
|
|
396
396
|
static void
|
397
|
-
instruct(PInfo pi, const char *target, Attr attrs) {
|
397
|
+
instruct(PInfo pi, const char *target, Attr attrs, const char *content) {
|
398
398
|
if (0 == strcmp("xml", target)) {
|
399
399
|
#if HAS_ENCODING_SUPPORT
|
400
400
|
for (; 0 != attrs->name; attrs++) {
|
data/ext/ox/ox.c
CHANGED
@@ -48,6 +48,7 @@ void Init_ox();
|
|
48
48
|
|
49
49
|
VALUE Ox = Qnil;
|
50
50
|
|
51
|
+
ID ox_at_content_id;
|
51
52
|
ID ox_at_id;
|
52
53
|
ID ox_at_value_id;
|
53
54
|
ID ox_attr_id;
|
@@ -60,6 +61,7 @@ ID ox_den_id;
|
|
60
61
|
ID ox_doctype_id;
|
61
62
|
ID ox_end_element_id;
|
62
63
|
ID ox_end_id;
|
64
|
+
ID ox_end_instruct_id;
|
63
65
|
ID ox_error_id;
|
64
66
|
ID ox_excl_id;
|
65
67
|
ID ox_external_encoding_id;
|
@@ -92,16 +94,17 @@ VALUE ox_encoding_sym;
|
|
92
94
|
VALUE ox_empty_string;
|
93
95
|
VALUE ox_zero_fixnum;
|
94
96
|
|
97
|
+
VALUE ox_bag_clas;
|
95
98
|
VALUE ox_cdata_clas;
|
96
99
|
VALUE ox_comment_clas;
|
100
|
+
VALUE ox_date_class;
|
97
101
|
VALUE ox_doctype_clas;
|
98
102
|
VALUE ox_document_clas;
|
99
103
|
VALUE ox_element_clas;
|
100
|
-
VALUE
|
104
|
+
VALUE ox_instruct_clas;
|
105
|
+
VALUE ox_stringio_class;
|
101
106
|
VALUE ox_struct_class;
|
102
107
|
VALUE ox_time_class;
|
103
|
-
VALUE ox_date_class;
|
104
|
-
VALUE ox_stringio_class;
|
105
108
|
|
106
109
|
Cache ox_symbol_cache = 0;
|
107
110
|
Cache ox_class_cache = 0;
|
@@ -780,11 +783,13 @@ void Init_ox() {
|
|
780
783
|
ox_beg_id = rb_intern("@beg");
|
781
784
|
ox_cdata_id = rb_intern("cdata");
|
782
785
|
ox_comment_id = rb_intern("comment");
|
786
|
+
ox_at_content_id = rb_intern("@content");
|
783
787
|
ox_den_id = rb_intern("@den");
|
784
788
|
ox_doctype_id = rb_intern("doctype");
|
785
789
|
ox_external_encoding_id = rb_intern("external_encoding");
|
786
790
|
ox_end_element_id = rb_intern("end_element");
|
787
791
|
ox_end_id = rb_intern("@end");
|
792
|
+
ox_end_instruct_id = rb_intern("end_instruct");
|
788
793
|
ox_error_id = rb_intern("error");
|
789
794
|
ox_excl_id = rb_intern("@excl");
|
790
795
|
ox_fileno_id = rb_intern("fileno");
|
@@ -843,6 +848,7 @@ void Init_ox() {
|
|
843
848
|
|
844
849
|
ox_document_clas = rb_const_get_at(Ox, rb_intern("Document"));
|
845
850
|
ox_element_clas = rb_const_get_at(Ox, rb_intern("Element"));
|
851
|
+
ox_instruct_clas = rb_const_get_at(Ox, rb_intern("Instruct"));
|
846
852
|
ox_comment_clas = rb_const_get_at(Ox, rb_intern("Comment"));
|
847
853
|
ox_doctype_clas = rb_const_get_at(Ox, rb_intern("DocType"));
|
848
854
|
ox_cdata_clas = rb_const_get_at(Ox, rb_intern("CData"));
|
data/ext/ox/ox.h
CHANGED
@@ -158,7 +158,7 @@ typedef struct _Helper {
|
|
158
158
|
typedef struct _PInfo *PInfo;
|
159
159
|
|
160
160
|
typedef struct _ParseCallbacks {
|
161
|
-
void (*instruct)(PInfo pi, const char *target, Attr attrs);
|
161
|
+
void (*instruct)(PInfo pi, const char *target, Attr attrs, const char *content);
|
162
162
|
void (*add_doctype)(PInfo pi, const char *docType);
|
163
163
|
void (*add_comment)(PInfo pi, const char *comment);
|
164
164
|
void (*add_cdata)(PInfo pi, const char *cdata, size_t len);
|
@@ -220,6 +220,7 @@ extern struct _Options ox_default_options;
|
|
220
220
|
|
221
221
|
extern VALUE Ox;
|
222
222
|
|
223
|
+
extern ID ox_at_content_id;
|
223
224
|
extern ID ox_at_id;
|
224
225
|
extern ID ox_at_value_id;
|
225
226
|
extern ID ox_attr_id;
|
@@ -232,6 +233,7 @@ extern ID ox_den_id;
|
|
232
233
|
extern ID ox_doctype_id;
|
233
234
|
extern ID ox_end_element_id;
|
234
235
|
extern ID ox_end_id;
|
236
|
+
extern ID ox_end_instruct_id;
|
235
237
|
extern ID ox_error_id;
|
236
238
|
extern ID ox_excl_id;
|
237
239
|
extern ID ox_external_encoding_id;
|
@@ -275,6 +277,7 @@ extern VALUE ox_zero_fixnum;
|
|
275
277
|
|
276
278
|
extern VALUE ox_document_clas;
|
277
279
|
extern VALUE ox_element_clas;
|
280
|
+
extern VALUE ox_instruct_clas;
|
278
281
|
extern VALUE ox_bag_clas;
|
279
282
|
extern VALUE ox_comment_clas;
|
280
283
|
extern VALUE ox_doctype_clas;
|
data/ext/ox/parse.c
CHANGED
@@ -68,7 +68,7 @@ static int collapse_special(PInfo pi, char *str);
|
|
68
68
|
inline static void
|
69
69
|
next_non_white(PInfo pi) {
|
70
70
|
for (; 1; pi->s++) {
|
71
|
-
switch(*pi->s) {
|
71
|
+
switch (*pi->s) {
|
72
72
|
case ' ':
|
73
73
|
case '\t':
|
74
74
|
case '\f':
|
@@ -84,7 +84,7 @@ next_non_white(PInfo pi) {
|
|
84
84
|
inline static void
|
85
85
|
next_white(PInfo pi) {
|
86
86
|
for (; 1; pi->s++) {
|
87
|
-
switch(*pi->s) {
|
87
|
+
switch (*pi->s) {
|
88
88
|
case ' ':
|
89
89
|
case '\t':
|
90
90
|
case '\f':
|
@@ -131,7 +131,7 @@ ox_parse(char *xml, ParseCallbacks pcb, char **endp, Options options) {
|
|
131
131
|
}
|
132
132
|
pi.s++; /* past < */
|
133
133
|
switch (*pi.s) {
|
134
|
-
case '?': /*
|
134
|
+
case '?': /* processing instruction */
|
135
135
|
pi.s++;
|
136
136
|
read_instruction(&pi);
|
137
137
|
break;
|
@@ -165,19 +165,47 @@ ox_parse(char *xml, ParseCallbacks pcb, char **endp, Options options) {
|
|
165
165
|
return pi.obj;
|
166
166
|
}
|
167
167
|
|
168
|
+
static char*
|
169
|
+
gather_content(const char *src, char *content, size_t len) {
|
170
|
+
for (; 0 < len; src++, content++, len--) {
|
171
|
+
switch (*src) {
|
172
|
+
case '?':
|
173
|
+
if ('>' == *(src + 1)) {
|
174
|
+
*content = '\0';
|
175
|
+
return (char*)(src + 1);
|
176
|
+
}
|
177
|
+
*content = *src;
|
178
|
+
break;
|
179
|
+
case '\0':
|
180
|
+
return 0;
|
181
|
+
default:
|
182
|
+
*content = *src;
|
183
|
+
break;
|
184
|
+
}
|
185
|
+
}
|
186
|
+
return 0;
|
187
|
+
}
|
188
|
+
|
168
189
|
/* Entered after the "<?" sequence. Ready to read the rest.
|
169
190
|
*/
|
170
191
|
static void
|
171
192
|
read_instruction(PInfo pi) {
|
193
|
+
char content[1024];
|
172
194
|
struct _Attr attrs[MAX_ATTRS + 1];
|
173
195
|
Attr a = attrs;
|
174
196
|
char *target;
|
175
197
|
char *end;
|
176
198
|
char c;
|
177
|
-
|
199
|
+
char *cend;
|
200
|
+
int attrs_ok = 1;
|
201
|
+
|
202
|
+
*content = '\0';
|
178
203
|
memset(attrs, 0, sizeof(attrs));
|
179
204
|
target = read_name_token(pi);
|
180
205
|
end = pi->s;
|
206
|
+
if (0 == (cend = gather_content(pi->s, content, sizeof(content) - 1))) {
|
207
|
+
raise_error("processing instruction content too large or not terminated", pi->str, pi->s);
|
208
|
+
}
|
181
209
|
next_non_white(pi);
|
182
210
|
c = *pi->s;
|
183
211
|
*end = '\0'; /* terminate name */
|
@@ -191,7 +219,8 @@ read_instruction(PInfo pi) {
|
|
191
219
|
end = pi->s;
|
192
220
|
next_non_white(pi);
|
193
221
|
if ('=' != *pi->s++) {
|
194
|
-
|
222
|
+
attrs_ok = 0;
|
223
|
+
break;
|
195
224
|
}
|
196
225
|
*end = '\0'; /* terminate name */
|
197
226
|
/* read value */
|
@@ -199,7 +228,8 @@ read_instruction(PInfo pi) {
|
|
199
228
|
a->value = read_quoted_value(pi);
|
200
229
|
a++;
|
201
230
|
if (MAX_ATTRS <= (a - attrs)) {
|
202
|
-
|
231
|
+
attrs_ok = 0;
|
232
|
+
break;
|
203
233
|
}
|
204
234
|
next_non_white(pi);
|
205
235
|
}
|
@@ -209,11 +239,19 @@ read_instruction(PInfo pi) {
|
|
209
239
|
} else {
|
210
240
|
pi->s++;
|
211
241
|
}
|
212
|
-
if (
|
213
|
-
|
242
|
+
if (attrs_ok) {
|
243
|
+
if ('>' != *pi->s++) {
|
244
|
+
raise_error("invalid format, processing instruction not terminated", pi->str, pi->s);
|
245
|
+
}
|
246
|
+
} else {
|
247
|
+
pi->s = cend + 1;
|
214
248
|
}
|
215
249
|
if (0 != pi->pcb->instruct) {
|
216
|
-
|
250
|
+
if (attrs_ok) {
|
251
|
+
pi->pcb->instruct(pi, target, attrs, 0);
|
252
|
+
} else {
|
253
|
+
pi->pcb->instruct(pi, target, attrs, content);
|
254
|
+
}
|
217
255
|
}
|
218
256
|
}
|
219
257
|
|
@@ -404,6 +442,10 @@ read_element(PInfo pi) {
|
|
404
442
|
raise_error("invalid format, invalid comment or CDATA format", pi->str, pi->s);
|
405
443
|
}
|
406
444
|
break;
|
445
|
+
case '?': /* processing instruction */
|
446
|
+
pi->s++;
|
447
|
+
read_instruction(pi);
|
448
|
+
break;
|
407
449
|
case '/':
|
408
450
|
slash = pi->s;
|
409
451
|
pi->s++;
|
data/ext/ox/sax.c
CHANGED
@@ -61,6 +61,7 @@ typedef struct _SaxDrive {
|
|
61
61
|
const char *in_str;
|
62
62
|
};
|
63
63
|
int has_instruct;
|
64
|
+
int has_end_instruct;
|
64
65
|
int has_attr;
|
65
66
|
int has_attr_value;
|
66
67
|
int has_doctype;
|
@@ -76,34 +77,34 @@ typedef struct _SaxDrive {
|
|
76
77
|
#endif
|
77
78
|
} *SaxDrive;
|
78
79
|
|
79
|
-
static void
|
80
|
-
static void
|
81
|
-
static int
|
82
|
-
static void
|
83
|
-
|
84
|
-
static int
|
85
|
-
static int
|
86
|
-
static int
|
87
|
-
static int
|
88
|
-
static int
|
89
|
-
static int
|
90
|
-
static int
|
91
|
-
static
|
92
|
-
static char
|
93
|
-
static int
|
94
|
-
static int
|
95
|
-
|
96
|
-
static VALUE
|
97
|
-
static VALUE
|
98
|
-
static VALUE
|
99
|
-
static int
|
80
|
+
static void sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, int convert);
|
81
|
+
static void sax_drive_cleanup(SaxDrive dr);
|
82
|
+
static int sax_drive_read(SaxDrive dr);
|
83
|
+
static void sax_drive_error(SaxDrive dr, const char *msg, int critical);
|
84
|
+
|
85
|
+
static int read_children(SaxDrive dr, int first);
|
86
|
+
static int read_instruction(SaxDrive dr);
|
87
|
+
static int read_doctype(SaxDrive dr);
|
88
|
+
static int read_cdata(SaxDrive dr);
|
89
|
+
static int read_comment(SaxDrive dr);
|
90
|
+
static int read_element(SaxDrive dr);
|
91
|
+
static int read_text(SaxDrive dr);
|
92
|
+
static const char* read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml);
|
93
|
+
static char read_name_token(SaxDrive dr);
|
94
|
+
static int read_quoted_value(SaxDrive dr);
|
95
|
+
static int collapse_special(char *str);
|
96
|
+
|
97
|
+
static VALUE rescue_cb(VALUE rdr, VALUE err);
|
98
|
+
static VALUE io_cb(VALUE rdr);
|
99
|
+
static VALUE partial_io_cb(VALUE rdr);
|
100
|
+
static int read_from_io(SaxDrive dr);
|
100
101
|
#ifndef JRUBY_RUBY
|
101
|
-
static int
|
102
|
+
static int read_from_fd(SaxDrive dr);
|
102
103
|
#endif
|
103
|
-
static int
|
104
|
-
static int
|
104
|
+
static int read_from_io_partial(SaxDrive dr);
|
105
|
+
static int read_from_str(SaxDrive dr);
|
105
106
|
|
106
|
-
static VALUE
|
107
|
+
static VALUE sax_value_class;
|
107
108
|
|
108
109
|
/* This is only for CentOS 5.4 with Ruby 1.9.3-p0 and for OS X 10.6. */
|
109
110
|
#ifdef NEEDS_STPCPY
|
@@ -227,6 +228,7 @@ ox_sax_parse(VALUE handler, VALUE io, int convert) {
|
|
227
228
|
#if 0
|
228
229
|
printf("*** sax_parse with these flags\n");
|
229
230
|
printf(" has_instruct = %s\n", dr.has_instruct ? "true" : "false");
|
231
|
+
printf(" has_end_instruct = %s\n", dr.has_end_instruct ? "true" : "false");
|
230
232
|
printf(" has_attr = %s\n", dr.has_attr ? "true" : "false");
|
231
233
|
printf(" has_attr_value = %s\n", dr.has_attr_value ? "true" : "false");
|
232
234
|
printf(" has_doctype = %s\n", dr.has_doctype ? "true" : "false");
|
@@ -311,6 +313,7 @@ sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, int convert) {
|
|
311
313
|
rb_gc_register_address(&dr->value_obj);
|
312
314
|
dr->convert_special = convert;
|
313
315
|
dr->has_instruct = respond_to(handler, ox_instruct_id);
|
316
|
+
dr->has_end_instruct = respond_to(handler, ox_end_instruct_id);
|
314
317
|
dr->has_attr = respond_to(handler, ox_attr_id);
|
315
318
|
dr->has_attr_value = respond_to(handler, ox_attr_value_id);
|
316
319
|
dr->has_doctype = respond_to(handler, ox_doctype_id);
|
@@ -444,9 +447,6 @@ read_children(SaxDrive dr, int first) {
|
|
444
447
|
c = sax_drive_get(dr);
|
445
448
|
switch (c) {
|
446
449
|
case '?': /* instructions (xml or otherwise) */
|
447
|
-
if (!first || element_read || doctype_read) {
|
448
|
-
sax_drive_error(dr, "invalid format, instruction must come before elements", 0);
|
449
|
-
}
|
450
450
|
err = read_instruction(dr);
|
451
451
|
break;
|
452
452
|
case '!': /* comment or doctype */
|
@@ -504,28 +504,88 @@ read_children(SaxDrive dr, int first) {
|
|
504
504
|
return err;
|
505
505
|
}
|
506
506
|
|
507
|
+
static void
|
508
|
+
read_content(SaxDrive dr, char *content, size_t len) {
|
509
|
+
char c;
|
510
|
+
char *end = content + len;
|
511
|
+
|
512
|
+
while ('\0' != (c = sax_drive_get(dr))) {
|
513
|
+
if (end < content) {
|
514
|
+
sax_drive_error(dr, "processing instruction content too large", 1);
|
515
|
+
}
|
516
|
+
if ('?' == c) {
|
517
|
+
if ('\0' == (c = sax_drive_get(dr))) {
|
518
|
+
sax_drive_error(dr, "invalid format, document not terminated", 1);
|
519
|
+
}
|
520
|
+
if ('>' == c) {
|
521
|
+
*content = '\0';
|
522
|
+
return;
|
523
|
+
} else {
|
524
|
+
*content++ = c;
|
525
|
+
}
|
526
|
+
} else {
|
527
|
+
*content++ = c;
|
528
|
+
}
|
529
|
+
}
|
530
|
+
*content = '\0';
|
531
|
+
}
|
532
|
+
|
507
533
|
/* Entered after the "<?" sequence. Ready to read the rest.
|
508
534
|
*/
|
509
535
|
static int
|
510
536
|
read_instruction(SaxDrive dr) {
|
537
|
+
char content[1024];
|
511
538
|
char c;
|
539
|
+
char *cend;
|
540
|
+
const char *err;
|
541
|
+
VALUE target = Qnil;
|
512
542
|
|
513
543
|
if ('\0' == (c = read_name_token(dr))) {
|
514
544
|
return -1;
|
515
545
|
}
|
546
|
+
if (dr->has_instruct || dr->has_end_instruct) {
|
547
|
+
target = rb_str_new2(dr->str);
|
548
|
+
}
|
516
549
|
if (dr->has_instruct) {
|
517
550
|
VALUE args[1];
|
518
551
|
|
519
|
-
args[0] =
|
552
|
+
args[0] = target;
|
520
553
|
rb_funcall2(dr->handler, ox_instruct_id, 1, args);
|
521
554
|
}
|
522
|
-
|
523
|
-
|
555
|
+
dr->str = dr->cur; /* make sure the start doesn't get compacted out */
|
556
|
+
read_content(dr, content, sizeof(content) - 1);
|
557
|
+
cend = dr->cur;
|
558
|
+
dr->cur = dr->str;
|
559
|
+
if (0 != (err = read_attrs(dr, c, '?', '?', (0 == strcmp("xml", dr->str))))) {
|
560
|
+
if (dr->has_text) {
|
561
|
+
VALUE args[1];
|
562
|
+
|
563
|
+
if (dr->convert_special) {
|
564
|
+
if (0 != collapse_special(content)) {
|
565
|
+
sax_drive_error(dr, "invalid format, special character does not end with a semicolon", 0);
|
566
|
+
}
|
567
|
+
}
|
568
|
+
args[0] = rb_str_new2(content);
|
569
|
+
#if HAS_ENCODING_SUPPORT
|
570
|
+
if (0 != dr->encoding) {
|
571
|
+
rb_enc_associate(args[0], dr->encoding);
|
572
|
+
}
|
573
|
+
#endif
|
574
|
+
rb_funcall2(dr->handler, ox_text_id, 1, args);
|
575
|
+
}
|
576
|
+
dr->cur = cend;
|
577
|
+
} else {
|
578
|
+
c = next_non_white(dr);
|
579
|
+
if ('>' != c) {
|
580
|
+
sax_drive_error(dr, "invalid format, instruction not terminated", 1);
|
581
|
+
return -1;
|
582
|
+
}
|
524
583
|
}
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
584
|
+
if (dr->has_end_instruct) {
|
585
|
+
VALUE args[1];
|
586
|
+
|
587
|
+
args[0] = target;
|
588
|
+
rb_funcall2(dr->handler, ox_end_instruct_id, 1, args);
|
529
589
|
}
|
530
590
|
dr->str = 0;
|
531
591
|
|
@@ -649,6 +709,7 @@ read_comment(SaxDrive dr) {
|
|
649
709
|
static int
|
650
710
|
read_element(SaxDrive dr) {
|
651
711
|
VALUE name = Qnil;
|
712
|
+
const char *err;
|
652
713
|
char c;
|
653
714
|
int closed;
|
654
715
|
|
@@ -667,7 +728,8 @@ read_element(SaxDrive dr) {
|
|
667
728
|
} else if ('>' == c) {
|
668
729
|
closed = 0;
|
669
730
|
} else {
|
670
|
-
if (0 != read_attrs(dr, c, '/', '>', 0)) {
|
731
|
+
if (0 != (err = read_attrs(dr, c, '/', '>', 0))) {
|
732
|
+
sax_drive_error(dr, err, 1);
|
671
733
|
return -1;
|
672
734
|
}
|
673
735
|
closed = ('/' == *(dr->cur - 1));
|
@@ -739,10 +801,12 @@ read_text(SaxDrive dr) {
|
|
739
801
|
#endif
|
740
802
|
rb_funcall2(dr->handler, ox_text_id, 1, args);
|
741
803
|
}
|
804
|
+
dr->str = 0;
|
805
|
+
|
742
806
|
return 0;
|
743
807
|
}
|
744
808
|
|
745
|
-
static
|
809
|
+
static const char*
|
746
810
|
read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml) {
|
747
811
|
VALUE name = Qnil;
|
748
812
|
int is_encoding = 0;
|
@@ -754,11 +818,10 @@ read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml) {
|
|
754
818
|
while (termc != c && term2 != c) {
|
755
819
|
dr->cur--;
|
756
820
|
if ('\0' == c) {
|
757
|
-
|
758
|
-
return -1;
|
821
|
+
return "invalid format, attributes not terminated";
|
759
822
|
}
|
760
823
|
if ('\0' == (c = read_name_token(dr))) {
|
761
|
-
return
|
824
|
+
return "error reading tolen";
|
762
825
|
}
|
763
826
|
if (is_xml && 0 == strcmp("encoding", dr->str)) {
|
764
827
|
is_encoding = 1;
|
@@ -771,11 +834,10 @@ read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml) {
|
|
771
834
|
c = next_non_white(dr);
|
772
835
|
}
|
773
836
|
if ('=' != c) {
|
774
|
-
|
775
|
-
return -1;
|
837
|
+
return "invalid format, no attribute value";
|
776
838
|
}
|
777
839
|
if (0 != read_quoted_value(dr)) {
|
778
|
-
return
|
840
|
+
return "error reading quoted value";
|
779
841
|
}
|
780
842
|
if (is_encoding) {
|
781
843
|
#if HAS_ENCODING_SUPPORT
|
@@ -806,6 +868,8 @@ read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml) {
|
|
806
868
|
}
|
807
869
|
c = next_non_white(dr);
|
808
870
|
}
|
871
|
+
dr->str = 0;
|
872
|
+
|
809
873
|
return 0;
|
810
874
|
}
|
811
875
|
|
@@ -870,6 +934,7 @@ read_quoted_value(SaxDrive dr) {
|
|
870
934
|
}
|
871
935
|
}
|
872
936
|
*(dr->cur - 1) = '\0'; /* terminate value */
|
937
|
+
|
873
938
|
return 0;
|
874
939
|
}
|
875
940
|
|
data/lib/ox.rb
CHANGED
data/lib/ox/element.rb
CHANGED
@@ -35,6 +35,7 @@ module Ox
|
|
35
35
|
# => "58"
|
36
36
|
|
37
37
|
class Element < Node
|
38
|
+
include HasAttrs
|
38
39
|
|
39
40
|
# Creates a new Element with the specified name.
|
40
41
|
# @param [String] name name of the Element
|
@@ -61,29 +62,6 @@ module Ox
|
|
61
62
|
@nodes << node
|
62
63
|
end
|
63
64
|
|
64
|
-
# Returns all the attributes of the Element as a Hash.
|
65
|
-
# @return [Hash] all attributes and attribute values.
|
66
|
-
def attributes
|
67
|
-
@attributes = { } if !instance_variable_defined?(:@attributes) or @attributes.nil?
|
68
|
-
@attributes
|
69
|
-
end
|
70
|
-
|
71
|
-
# Returns the value of an attribute.
|
72
|
-
# @param [Symbol|String] attr attribute name or key to return the value for
|
73
|
-
def [](attr)
|
74
|
-
return nil unless instance_variable_defined?(:@attributes) and @attributes.is_a?(Hash)
|
75
|
-
@attributes[attr] or (attr.is_a?(String) ? @attributes[attr.to_sym] : @attributes[attr.to_s])
|
76
|
-
end
|
77
|
-
|
78
|
-
# Adds or set an attribute of the Element.
|
79
|
-
# @param [Symbol|String] attr attribute name or key
|
80
|
-
# @param [Object] value value for the attribute
|
81
|
-
def []=(attr, value)
|
82
|
-
raise "argument to [] must be a Symbol or a String." unless attr.is_a?(Symbol) or attr.is_a?(String)
|
83
|
-
@attributes = { } if !instance_variable_defined?(:@attributes) or @attributes.nil?
|
84
|
-
@attributes[attr] = value.to_s
|
85
|
-
end
|
86
|
-
|
87
65
|
# Returns true if this Object and other are of the same type and have the
|
88
66
|
# equivalent value and the equivalent elements otherwise false is returned.
|
89
67
|
# @param [Object] other Object compare _self_ to.
|
@@ -156,7 +134,7 @@ module Ox
|
|
156
134
|
ids = id.to_s
|
157
135
|
i = args[0].to_i # will be 0 if no arg or parsing fails
|
158
136
|
@nodes.each do |n|
|
159
|
-
if n.is_a?(Element) && (n.value == id || n.value == ids)
|
137
|
+
if (n.is_a?(Element) || n.is_a?(Instruct)) && (n.value == id || n.value == ids)
|
160
138
|
return n if 0 == i
|
161
139
|
i -= 1
|
162
140
|
end
|
@@ -165,7 +143,7 @@ module Ox
|
|
165
143
|
return @attributes[id] if @attributes.has_key?(id)
|
166
144
|
return @attributes[ids] if @attributes.has_key?(ids)
|
167
145
|
end
|
168
|
-
raise NoMethodError.new("#{
|
146
|
+
raise NoMethodError.new("#{ids} not found", name)
|
169
147
|
end
|
170
148
|
|
171
149
|
# @param [Array] path array of steps in a path
|
data/lib/ox/hasattrs.rb
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
|
2
|
+
module Ox
|
3
|
+
|
4
|
+
# An Object that includes the HasAttrs module can have attributes which are a Hash of String values and either String
|
5
|
+
# or Symbol keys.
|
6
|
+
#
|
7
|
+
# To access the attributes there are several options. One is to walk the attributes. The easiest for simple regularly
|
8
|
+
# formatted XML is to reference the attributes simply by name.
|
9
|
+
|
10
|
+
module HasAttrs
|
11
|
+
# Returns all the attributes of the Instruct as a Hash.
|
12
|
+
# @return [Hash] all attributes and attribute values.
|
13
|
+
def attributes
|
14
|
+
@attributes = { } if !instance_variable_defined?(:@attributes) or @attributes.nil?
|
15
|
+
@attributes
|
16
|
+
end
|
17
|
+
|
18
|
+
# Returns the value of an attribute.
|
19
|
+
# @param [Symbol|String] attr attribute name or key to return the value for
|
20
|
+
def [](attr)
|
21
|
+
return nil unless instance_variable_defined?(:@attributes) and @attributes.is_a?(Hash)
|
22
|
+
@attributes[attr] or (attr.is_a?(String) ? @attributes[attr.to_sym] : @attributes[attr.to_s])
|
23
|
+
end
|
24
|
+
|
25
|
+
# Adds or set an attribute of the Instruct.
|
26
|
+
# @param [Symbol|String] attr attribute name or key
|
27
|
+
# @param [Object] value value for the attribute
|
28
|
+
def []=(attr, value)
|
29
|
+
raise "argument to [] must be a Symbol or a String." unless attr.is_a?(Symbol) or attr.is_a?(String)
|
30
|
+
@attributes = { } if !instance_variable_defined?(:@attributes) or @attributes.nil?
|
31
|
+
@attributes[attr] = value.to_s
|
32
|
+
end
|
33
|
+
|
34
|
+
# Handles the 'easy' API that allows navigating a simple XML by
|
35
|
+
# referencing attributes by name.
|
36
|
+
# @param [Symbol] id element or attribute name
|
37
|
+
# @return [String|nil] the attribute value
|
38
|
+
# @raise [NoMethodError] if no match is found
|
39
|
+
def method_missing(id, *args, &block)
|
40
|
+
ids = id.to_s
|
41
|
+
if instance_variable_defined?(:@attributes)
|
42
|
+
return @attributes[id] if @attributes.has_key?(id)
|
43
|
+
return @attributes[ids] if @attributes.has_key?(ids)
|
44
|
+
end
|
45
|
+
raise NoMethodError.new("#{ids} not found", name)
|
46
|
+
end
|
47
|
+
|
48
|
+
end # HasAttrs
|
49
|
+
end # Ox
|
data/lib/ox/instruct.rb
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
|
2
|
+
module Ox
|
3
|
+
|
4
|
+
# An Instruct represents a processing instruction of an XML document. It has a target, attributes, and a value or
|
5
|
+
# content. The content will be all characters with the exception of the target. If the content follows a regular
|
6
|
+
# attribute format then the attributes will be set to the parsed values. If it does not follow the attribute formate
|
7
|
+
# then the attributes will be empty.
|
8
|
+
|
9
|
+
class Instruct < Node
|
10
|
+
include HasAttrs
|
11
|
+
|
12
|
+
# The content of the processing instruction.
|
13
|
+
attr_accessor :content
|
14
|
+
|
15
|
+
# Creates a new Instruct with the specified name.
|
16
|
+
# @param [String] name name of the Instruct
|
17
|
+
def initialize(name)
|
18
|
+
super
|
19
|
+
@attributes = nil
|
20
|
+
@content = nil
|
21
|
+
end
|
22
|
+
alias target value
|
23
|
+
|
24
|
+
# Returns true if this Object and other are of the same type and have the
|
25
|
+
# equivalent value and the equivalent elements otherwise false is returned.
|
26
|
+
# @param [Object] other Object compare _self_ to.
|
27
|
+
# @return [Boolean] true if both Objects are equivalent, otherwise false.
|
28
|
+
def eql?(other)
|
29
|
+
return false if (other.nil? or self.class != other.class)
|
30
|
+
return false unless super(other)
|
31
|
+
return false unless self.attributes == other.attributes
|
32
|
+
return false unless self.content == other.content
|
33
|
+
true
|
34
|
+
end
|
35
|
+
alias == eql?
|
36
|
+
|
37
|
+
end # Instruct
|
38
|
+
end # Ox
|
data/lib/ox/sax.rb
CHANGED
@@ -32,6 +32,7 @@ module Ox
|
|
32
32
|
# public. The same is true for attr() and attr_value().
|
33
33
|
#
|
34
34
|
# def instruct(target); end
|
35
|
+
# def end_instruct(target); end
|
35
36
|
# def attr(name, str); end
|
36
37
|
# def attr_value(name, value); end
|
37
38
|
# def doctype(str); end
|
@@ -55,6 +56,9 @@ module Ox
|
|
55
56
|
def instruct(target)
|
56
57
|
end
|
57
58
|
|
59
|
+
def end_instruct(target)
|
60
|
+
end
|
61
|
+
|
58
62
|
def attr(name, str)
|
59
63
|
end
|
60
64
|
|
data/lib/ox/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ox
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.8.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-12-
|
12
|
+
date: 2012-12-11 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
14
|
description: ! "A fast XML parser and object serializer that uses only standard C
|
15
15
|
lib.\n \nOptimized XML (Ox), as the name implies was written to provide
|
@@ -29,6 +29,8 @@ files:
|
|
29
29
|
- lib/ox/doctype.rb
|
30
30
|
- lib/ox/document.rb
|
31
31
|
- lib/ox/element.rb
|
32
|
+
- lib/ox/hasattrs.rb
|
33
|
+
- lib/ox/instruct.rb
|
32
34
|
- lib/ox/invalidpath.rb
|
33
35
|
- lib/ox/node.rb
|
34
36
|
- lib/ox/sax.rb
|