ox 1.7.1 → 1.8.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of ox might be problematic. Click here for more details.

data/README.md CHANGED
@@ -34,6 +34,10 @@ A fast XML parser and Object marshaller as a Ruby gem.
34
34
 
35
35
  ## <a name="release">Release Notes</a>
36
36
 
37
+ ### Release 1.8.0
38
+
39
+ - Added more complete support for processing instructions in both the generic parser and in the sax parser. This change includes and additional sax handler callback for the end of the instruction processing.
40
+
37
41
  ### Release 1.7.1
38
42
 
39
43
  - Pulled in sharpyfox's changes to make Ox with with Windows. (issue #24)
@@ -77,6 +77,7 @@ static void dump_first_obj(VALUE obj, Out out);
77
77
  static void dump_obj(ID aid, VALUE obj, unsigned int depth, Out out);
78
78
  static void dump_gen_doc(VALUE obj, unsigned int depth, Out out);
79
79
  static void dump_gen_element(VALUE obj, unsigned int depth, Out out);
80
+ static void dump_gen_instruct(VALUE obj, unsigned int depth, Out out);
80
81
  static int dump_gen_attr(VALUE key, VALUE value, Out out);
81
82
  static int dump_gen_nodes(VALUE obj, unsigned int depth, Out out);
82
83
  static void dump_gen_val_node(VALUE obj, unsigned int depth,
@@ -1064,6 +1065,40 @@ dump_gen_element(VALUE obj, unsigned int depth, Out out) {
1064
1065
  *out->cur = '\0';
1065
1066
  }
1066
1067
 
1068
+ static void
1069
+ dump_gen_instruct(VALUE obj, unsigned int depth, Out out) {
1070
+ VALUE rname = rb_attr_get(obj, ox_at_value_id);
1071
+ VALUE attrs = rb_attr_get(obj, ox_attributes_id);
1072
+ VALUE rcontent = rb_attr_get(obj, ox_at_content_id);
1073
+ const char *name = StringValuePtr(rname);
1074
+ const char *content = 0;
1075
+ long nlen = RSTRING_LEN(rname);
1076
+ long clen = 0;
1077
+ size_t size;
1078
+
1079
+ if (T_STRING == rb_type(rcontent)) {
1080
+ content = StringValuePtr(rcontent);
1081
+ clen = RSTRING_LEN(rcontent);
1082
+ size = 4 + nlen + clen;
1083
+ } else {
1084
+ size = 4 + nlen;
1085
+ }
1086
+ if (out->end - out->cur <= (long)size) {
1087
+ grow(out, size);
1088
+ }
1089
+ *out->cur++ = '<';
1090
+ *out->cur++ = '?';
1091
+ fill_value(out, name, nlen);
1092
+ if (0 != content) {
1093
+ fill_value(out, content, clen);
1094
+ } else if (Qnil != attrs) {
1095
+ rb_hash_foreach(attrs, dump_gen_attr, (VALUE)out);
1096
+ }
1097
+ *out->cur++ = '?';
1098
+ *out->cur++ = '>';
1099
+ *out->cur = '\0';
1100
+ }
1101
+
1067
1102
  static int
1068
1103
  dump_gen_nodes(VALUE obj, unsigned int depth, Out out) {
1069
1104
  long cnt = RARRAY_LEN(obj);
@@ -1078,6 +1113,8 @@ dump_gen_nodes(VALUE obj, unsigned int depth, Out out) {
1078
1113
  clas = rb_obj_class(*np);
1079
1114
  if (ox_element_clas == clas) {
1080
1115
  dump_gen_element(*np, d2, out);
1116
+ } else if (ox_instruct_clas == clas) {
1117
+ dump_gen_instruct(*np, d2, out);
1081
1118
  } else if (rb_cString == clas) {
1082
1119
  dump_str_value(out, StringValuePtr(*np), RSTRING_LEN(*np));
1083
1120
  indent_needed = (1 == cnt) ? 0 : 1;
@@ -37,16 +37,17 @@
37
37
  #include "ruby.h"
38
38
  #include "ox.h"
39
39
 
40
- static void instruct(PInfo pi, const char *target, Attr attrs);
40
+ static void instruct(PInfo pi, const char *target, Attr attrs, const char *content);
41
41
  static void create_doc(PInfo pi);
42
42
  static void create_prolog_doc(PInfo pi, const char *target, Attr attrs);
43
- static void nomode_instruct(PInfo pi, const char *target, Attr attrs);
43
+ static void nomode_instruct(PInfo pi, const char *target, Attr attrs, const char *content);
44
44
  static void add_doctype(PInfo pi, const char *docType);
45
45
  static void add_comment(PInfo pi, const char *comment);
46
46
  static void add_cdata(PInfo pi, const char *cdata, size_t len);
47
47
  static void add_text(PInfo pi, char *text, int closed);
48
48
  static void add_element(PInfo pi, const char *ename, Attr attrs, int hasChildren);
49
49
  static void end_element(PInfo pi, const char *ename);
50
+ static void add_instruct(PInfo pi, const char *name, Attr attrs, const char *content);
50
51
 
51
52
  extern ParseCallbacks ox_obj_callbacks;
52
53
 
@@ -152,7 +153,7 @@ create_prolog_doc(PInfo pi, const char *target, Attr attrs) {
152
153
  }
153
154
 
154
155
  static void
155
- instruct(PInfo pi, const char *target, Attr attrs) {
156
+ instruct(PInfo pi, const char *target, Attr attrs, const char *content) {
156
157
  if (0 == strcmp("xml", target)) {
157
158
  create_prolog_doc(pi, target, attrs);
158
159
  } else if (0 == strcmp("ox", target)) {
@@ -165,14 +166,12 @@ instruct(PInfo pi, const char *target, Attr attrs) {
165
166
  /* ignore other instructions */
166
167
  }
167
168
  } else {
168
- if (TRACE <= pi->options->trace) {
169
- printf("Processing instruction %s ignored.\n", target);
170
- }
169
+ add_instruct(pi, target, attrs, content);
171
170
  }
172
171
  }
173
172
 
174
173
  static void
175
- nomode_instruct(PInfo pi, const char *target, Attr attrs) {
174
+ nomode_instruct(PInfo pi, const char *target, Attr attrs, const char *content) {
176
175
  if (0 == strcmp("xml", target)) {
177
176
  create_prolog_doc(pi, target, attrs);
178
177
  } else if (0 == strcmp("ox", target)) {
@@ -344,3 +343,71 @@ end_element(PInfo pi, const char *ename) {
344
343
  pi->h--;
345
344
  }
346
345
  }
346
+
347
+ static void
348
+ add_instruct(PInfo pi, const char *name, Attr attrs, const char *content) {
349
+ VALUE inst;
350
+ VALUE s = rb_str_new2(name);
351
+ VALUE c = Qnil;
352
+
353
+ if (0 != content) {
354
+ c = rb_str_new2(content);
355
+ }
356
+ #if HAS_ENCODING_SUPPORT
357
+ if (0 != pi->options->rb_enc) {
358
+ rb_enc_associate(s, pi->options->rb_enc);
359
+ if (0 != content) {
360
+ rb_enc_associate(c, pi->options->rb_enc);
361
+ }
362
+ }
363
+ #endif
364
+ inst = rb_obj_alloc(ox_instruct_clas);
365
+ rb_ivar_set(inst, ox_at_value_id, s);
366
+ if (0 != content) {
367
+ rb_ivar_set(inst, ox_at_content_id, c);
368
+ } else if (0 != attrs->name) {
369
+ VALUE ah = rb_hash_new();
370
+
371
+ for (; 0 != attrs->name; attrs++) {
372
+ VALUE sym;
373
+ VALUE *slot;
374
+
375
+ if (Yes == pi->options->sym_keys) {
376
+ if (Qundef == (sym = ox_cache_get(ox_symbol_cache, attrs->name, &slot))) {
377
+ #if HAS_ENCODING_SUPPORT
378
+ if (0 != pi->options->rb_enc) {
379
+ VALUE rstr = rb_str_new2(attrs->name);
380
+
381
+ rb_enc_associate(rstr, pi->options->rb_enc);
382
+ sym = rb_funcall(rstr, ox_to_sym_id, 0);
383
+ } else {
384
+ sym = ID2SYM(rb_intern(attrs->name));
385
+ }
386
+ #else
387
+ sym = ID2SYM(rb_intern(attrs->name));
388
+ #endif
389
+ *slot = sym;
390
+ }
391
+ } else {
392
+ sym = rb_str_new2(attrs->name);
393
+ #if HAS_ENCODING_SUPPORT
394
+ if (0 != pi->options->rb_enc) {
395
+ rb_enc_associate(sym, pi->options->rb_enc);
396
+ }
397
+ #endif
398
+ }
399
+ s = rb_str_new2(attrs->value);
400
+ #if HAS_ENCODING_SUPPORT
401
+ if (0 != pi->options->rb_enc) {
402
+ rb_enc_associate(s, pi->options->rb_enc);
403
+ }
404
+ #endif
405
+ rb_hash_aset(ah, sym, s);
406
+ }
407
+ rb_ivar_set(inst, ox_attributes_id, ah);
408
+ }
409
+ if (0 == pi->h) { /* top level object */
410
+ create_doc(pi);
411
+ }
412
+ rb_ary_push(pi->h->obj, inst);
413
+ }
@@ -39,7 +39,7 @@
39
39
  #include "base64.h"
40
40
  #include "ox.h"
41
41
 
42
- static void instruct(PInfo pi, const char *target, Attr attrs);
42
+ static void instruct(PInfo pi, const char *target, Attr attrs, const char *content);
43
43
  static void add_text(PInfo pi, char *text, int closed);
44
44
  static void add_element(PInfo pi, const char *ename, Attr attrs, int hasChildren);
45
45
  static void end_element(PInfo pi, const char *ename);
@@ -394,7 +394,7 @@ parse_regexp(const char *text) {
394
394
  }
395
395
 
396
396
  static void
397
- instruct(PInfo pi, const char *target, Attr attrs) {
397
+ instruct(PInfo pi, const char *target, Attr attrs, const char *content) {
398
398
  if (0 == strcmp("xml", target)) {
399
399
  #if HAS_ENCODING_SUPPORT
400
400
  for (; 0 != attrs->name; attrs++) {
@@ -48,6 +48,7 @@ void Init_ox();
48
48
 
49
49
  VALUE Ox = Qnil;
50
50
 
51
+ ID ox_at_content_id;
51
52
  ID ox_at_id;
52
53
  ID ox_at_value_id;
53
54
  ID ox_attr_id;
@@ -60,6 +61,7 @@ ID ox_den_id;
60
61
  ID ox_doctype_id;
61
62
  ID ox_end_element_id;
62
63
  ID ox_end_id;
64
+ ID ox_end_instruct_id;
63
65
  ID ox_error_id;
64
66
  ID ox_excl_id;
65
67
  ID ox_external_encoding_id;
@@ -92,16 +94,17 @@ VALUE ox_encoding_sym;
92
94
  VALUE ox_empty_string;
93
95
  VALUE ox_zero_fixnum;
94
96
 
97
+ VALUE ox_bag_clas;
95
98
  VALUE ox_cdata_clas;
96
99
  VALUE ox_comment_clas;
100
+ VALUE ox_date_class;
97
101
  VALUE ox_doctype_clas;
98
102
  VALUE ox_document_clas;
99
103
  VALUE ox_element_clas;
100
- VALUE ox_bag_clas;
104
+ VALUE ox_instruct_clas;
105
+ VALUE ox_stringio_class;
101
106
  VALUE ox_struct_class;
102
107
  VALUE ox_time_class;
103
- VALUE ox_date_class;
104
- VALUE ox_stringio_class;
105
108
 
106
109
  Cache ox_symbol_cache = 0;
107
110
  Cache ox_class_cache = 0;
@@ -780,11 +783,13 @@ void Init_ox() {
780
783
  ox_beg_id = rb_intern("@beg");
781
784
  ox_cdata_id = rb_intern("cdata");
782
785
  ox_comment_id = rb_intern("comment");
786
+ ox_at_content_id = rb_intern("@content");
783
787
  ox_den_id = rb_intern("@den");
784
788
  ox_doctype_id = rb_intern("doctype");
785
789
  ox_external_encoding_id = rb_intern("external_encoding");
786
790
  ox_end_element_id = rb_intern("end_element");
787
791
  ox_end_id = rb_intern("@end");
792
+ ox_end_instruct_id = rb_intern("end_instruct");
788
793
  ox_error_id = rb_intern("error");
789
794
  ox_excl_id = rb_intern("@excl");
790
795
  ox_fileno_id = rb_intern("fileno");
@@ -843,6 +848,7 @@ void Init_ox() {
843
848
 
844
849
  ox_document_clas = rb_const_get_at(Ox, rb_intern("Document"));
845
850
  ox_element_clas = rb_const_get_at(Ox, rb_intern("Element"));
851
+ ox_instruct_clas = rb_const_get_at(Ox, rb_intern("Instruct"));
846
852
  ox_comment_clas = rb_const_get_at(Ox, rb_intern("Comment"));
847
853
  ox_doctype_clas = rb_const_get_at(Ox, rb_intern("DocType"));
848
854
  ox_cdata_clas = rb_const_get_at(Ox, rb_intern("CData"));
@@ -158,7 +158,7 @@ typedef struct _Helper {
158
158
  typedef struct _PInfo *PInfo;
159
159
 
160
160
  typedef struct _ParseCallbacks {
161
- void (*instruct)(PInfo pi, const char *target, Attr attrs);
161
+ void (*instruct)(PInfo pi, const char *target, Attr attrs, const char *content);
162
162
  void (*add_doctype)(PInfo pi, const char *docType);
163
163
  void (*add_comment)(PInfo pi, const char *comment);
164
164
  void (*add_cdata)(PInfo pi, const char *cdata, size_t len);
@@ -220,6 +220,7 @@ extern struct _Options ox_default_options;
220
220
 
221
221
  extern VALUE Ox;
222
222
 
223
+ extern ID ox_at_content_id;
223
224
  extern ID ox_at_id;
224
225
  extern ID ox_at_value_id;
225
226
  extern ID ox_attr_id;
@@ -232,6 +233,7 @@ extern ID ox_den_id;
232
233
  extern ID ox_doctype_id;
233
234
  extern ID ox_end_element_id;
234
235
  extern ID ox_end_id;
236
+ extern ID ox_end_instruct_id;
235
237
  extern ID ox_error_id;
236
238
  extern ID ox_excl_id;
237
239
  extern ID ox_external_encoding_id;
@@ -275,6 +277,7 @@ extern VALUE ox_zero_fixnum;
275
277
 
276
278
  extern VALUE ox_document_clas;
277
279
  extern VALUE ox_element_clas;
280
+ extern VALUE ox_instruct_clas;
278
281
  extern VALUE ox_bag_clas;
279
282
  extern VALUE ox_comment_clas;
280
283
  extern VALUE ox_doctype_clas;
@@ -68,7 +68,7 @@ static int collapse_special(PInfo pi, char *str);
68
68
  inline static void
69
69
  next_non_white(PInfo pi) {
70
70
  for (; 1; pi->s++) {
71
- switch(*pi->s) {
71
+ switch (*pi->s) {
72
72
  case ' ':
73
73
  case '\t':
74
74
  case '\f':
@@ -84,7 +84,7 @@ next_non_white(PInfo pi) {
84
84
  inline static void
85
85
  next_white(PInfo pi) {
86
86
  for (; 1; pi->s++) {
87
- switch(*pi->s) {
87
+ switch (*pi->s) {
88
88
  case ' ':
89
89
  case '\t':
90
90
  case '\f':
@@ -131,7 +131,7 @@ ox_parse(char *xml, ParseCallbacks pcb, char **endp, Options options) {
131
131
  }
132
132
  pi.s++; /* past < */
133
133
  switch (*pi.s) {
134
- case '?': /* prolog */
134
+ case '?': /* processing instruction */
135
135
  pi.s++;
136
136
  read_instruction(&pi);
137
137
  break;
@@ -165,19 +165,47 @@ ox_parse(char *xml, ParseCallbacks pcb, char **endp, Options options) {
165
165
  return pi.obj;
166
166
  }
167
167
 
168
+ static char*
169
+ gather_content(const char *src, char *content, size_t len) {
170
+ for (; 0 < len; src++, content++, len--) {
171
+ switch (*src) {
172
+ case '?':
173
+ if ('>' == *(src + 1)) {
174
+ *content = '\0';
175
+ return (char*)(src + 1);
176
+ }
177
+ *content = *src;
178
+ break;
179
+ case '\0':
180
+ return 0;
181
+ default:
182
+ *content = *src;
183
+ break;
184
+ }
185
+ }
186
+ return 0;
187
+ }
188
+
168
189
  /* Entered after the "<?" sequence. Ready to read the rest.
169
190
  */
170
191
  static void
171
192
  read_instruction(PInfo pi) {
193
+ char content[1024];
172
194
  struct _Attr attrs[MAX_ATTRS + 1];
173
195
  Attr a = attrs;
174
196
  char *target;
175
197
  char *end;
176
198
  char c;
177
-
199
+ char *cend;
200
+ int attrs_ok = 1;
201
+
202
+ *content = '\0';
178
203
  memset(attrs, 0, sizeof(attrs));
179
204
  target = read_name_token(pi);
180
205
  end = pi->s;
206
+ if (0 == (cend = gather_content(pi->s, content, sizeof(content) - 1))) {
207
+ raise_error("processing instruction content too large or not terminated", pi->str, pi->s);
208
+ }
181
209
  next_non_white(pi);
182
210
  c = *pi->s;
183
211
  *end = '\0'; /* terminate name */
@@ -191,7 +219,8 @@ read_instruction(PInfo pi) {
191
219
  end = pi->s;
192
220
  next_non_white(pi);
193
221
  if ('=' != *pi->s++) {
194
- raise_error("invalid format, no attribute value", pi->str, pi->s);
222
+ attrs_ok = 0;
223
+ break;
195
224
  }
196
225
  *end = '\0'; /* terminate name */
197
226
  /* read value */
@@ -199,7 +228,8 @@ read_instruction(PInfo pi) {
199
228
  a->value = read_quoted_value(pi);
200
229
  a++;
201
230
  if (MAX_ATTRS <= (a - attrs)) {
202
- raise_error("too many attributes", pi->str, pi->s);
231
+ attrs_ok = 0;
232
+ break;
203
233
  }
204
234
  next_non_white(pi);
205
235
  }
@@ -209,11 +239,19 @@ read_instruction(PInfo pi) {
209
239
  } else {
210
240
  pi->s++;
211
241
  }
212
- if ('>' != *pi->s++) {
213
- raise_error("invalid format, processing instruction not terminated", pi->str, pi->s);
242
+ if (attrs_ok) {
243
+ if ('>' != *pi->s++) {
244
+ raise_error("invalid format, processing instruction not terminated", pi->str, pi->s);
245
+ }
246
+ } else {
247
+ pi->s = cend + 1;
214
248
  }
215
249
  if (0 != pi->pcb->instruct) {
216
- pi->pcb->instruct(pi, target, attrs);
250
+ if (attrs_ok) {
251
+ pi->pcb->instruct(pi, target, attrs, 0);
252
+ } else {
253
+ pi->pcb->instruct(pi, target, attrs, content);
254
+ }
217
255
  }
218
256
  }
219
257
 
@@ -404,6 +442,10 @@ read_element(PInfo pi) {
404
442
  raise_error("invalid format, invalid comment or CDATA format", pi->str, pi->s);
405
443
  }
406
444
  break;
445
+ case '?': /* processing instruction */
446
+ pi->s++;
447
+ read_instruction(pi);
448
+ break;
407
449
  case '/':
408
450
  slash = pi->s;
409
451
  pi->s++;
@@ -61,6 +61,7 @@ typedef struct _SaxDrive {
61
61
  const char *in_str;
62
62
  };
63
63
  int has_instruct;
64
+ int has_end_instruct;
64
65
  int has_attr;
65
66
  int has_attr_value;
66
67
  int has_doctype;
@@ -76,34 +77,34 @@ typedef struct _SaxDrive {
76
77
  #endif
77
78
  } *SaxDrive;
78
79
 
79
- static void sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, int convert);
80
- static void sax_drive_cleanup(SaxDrive dr);
81
- static int sax_drive_read(SaxDrive dr);
82
- static void sax_drive_error(SaxDrive dr, const char *msg, int critical);
83
-
84
- static int read_children(SaxDrive dr, int first);
85
- static int read_instruction(SaxDrive dr);
86
- static int read_doctype(SaxDrive dr);
87
- static int read_cdata(SaxDrive dr);
88
- static int read_comment(SaxDrive dr);
89
- static int read_element(SaxDrive dr);
90
- static int read_text(SaxDrive dr);
91
- static int read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml);
92
- static char read_name_token(SaxDrive dr);
93
- static int read_quoted_value(SaxDrive dr);
94
- static int collapse_special(char *str);
95
-
96
- static VALUE rescue_cb(VALUE rdr, VALUE err);
97
- static VALUE io_cb(VALUE rdr);
98
- static VALUE partial_io_cb(VALUE rdr);
99
- static int read_from_io(SaxDrive dr);
80
+ static void sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, int convert);
81
+ static void sax_drive_cleanup(SaxDrive dr);
82
+ static int sax_drive_read(SaxDrive dr);
83
+ static void sax_drive_error(SaxDrive dr, const char *msg, int critical);
84
+
85
+ static int read_children(SaxDrive dr, int first);
86
+ static int read_instruction(SaxDrive dr);
87
+ static int read_doctype(SaxDrive dr);
88
+ static int read_cdata(SaxDrive dr);
89
+ static int read_comment(SaxDrive dr);
90
+ static int read_element(SaxDrive dr);
91
+ static int read_text(SaxDrive dr);
92
+ static const char* read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml);
93
+ static char read_name_token(SaxDrive dr);
94
+ static int read_quoted_value(SaxDrive dr);
95
+ static int collapse_special(char *str);
96
+
97
+ static VALUE rescue_cb(VALUE rdr, VALUE err);
98
+ static VALUE io_cb(VALUE rdr);
99
+ static VALUE partial_io_cb(VALUE rdr);
100
+ static int read_from_io(SaxDrive dr);
100
101
  #ifndef JRUBY_RUBY
101
- static int read_from_fd(SaxDrive dr);
102
+ static int read_from_fd(SaxDrive dr);
102
103
  #endif
103
- static int read_from_io_partial(SaxDrive dr);
104
- static int read_from_str(SaxDrive dr);
104
+ static int read_from_io_partial(SaxDrive dr);
105
+ static int read_from_str(SaxDrive dr);
105
106
 
106
- static VALUE sax_value_class;
107
+ static VALUE sax_value_class;
107
108
 
108
109
  /* This is only for CentOS 5.4 with Ruby 1.9.3-p0 and for OS X 10.6. */
109
110
  #ifdef NEEDS_STPCPY
@@ -227,6 +228,7 @@ ox_sax_parse(VALUE handler, VALUE io, int convert) {
227
228
  #if 0
228
229
  printf("*** sax_parse with these flags\n");
229
230
  printf(" has_instruct = %s\n", dr.has_instruct ? "true" : "false");
231
+ printf(" has_end_instruct = %s\n", dr.has_end_instruct ? "true" : "false");
230
232
  printf(" has_attr = %s\n", dr.has_attr ? "true" : "false");
231
233
  printf(" has_attr_value = %s\n", dr.has_attr_value ? "true" : "false");
232
234
  printf(" has_doctype = %s\n", dr.has_doctype ? "true" : "false");
@@ -311,6 +313,7 @@ sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, int convert) {
311
313
  rb_gc_register_address(&dr->value_obj);
312
314
  dr->convert_special = convert;
313
315
  dr->has_instruct = respond_to(handler, ox_instruct_id);
316
+ dr->has_end_instruct = respond_to(handler, ox_end_instruct_id);
314
317
  dr->has_attr = respond_to(handler, ox_attr_id);
315
318
  dr->has_attr_value = respond_to(handler, ox_attr_value_id);
316
319
  dr->has_doctype = respond_to(handler, ox_doctype_id);
@@ -444,9 +447,6 @@ read_children(SaxDrive dr, int first) {
444
447
  c = sax_drive_get(dr);
445
448
  switch (c) {
446
449
  case '?': /* instructions (xml or otherwise) */
447
- if (!first || element_read || doctype_read) {
448
- sax_drive_error(dr, "invalid format, instruction must come before elements", 0);
449
- }
450
450
  err = read_instruction(dr);
451
451
  break;
452
452
  case '!': /* comment or doctype */
@@ -504,28 +504,88 @@ read_children(SaxDrive dr, int first) {
504
504
  return err;
505
505
  }
506
506
 
507
+ static void
508
+ read_content(SaxDrive dr, char *content, size_t len) {
509
+ char c;
510
+ char *end = content + len;
511
+
512
+ while ('\0' != (c = sax_drive_get(dr))) {
513
+ if (end < content) {
514
+ sax_drive_error(dr, "processing instruction content too large", 1);
515
+ }
516
+ if ('?' == c) {
517
+ if ('\0' == (c = sax_drive_get(dr))) {
518
+ sax_drive_error(dr, "invalid format, document not terminated", 1);
519
+ }
520
+ if ('>' == c) {
521
+ *content = '\0';
522
+ return;
523
+ } else {
524
+ *content++ = c;
525
+ }
526
+ } else {
527
+ *content++ = c;
528
+ }
529
+ }
530
+ *content = '\0';
531
+ }
532
+
507
533
  /* Entered after the "<?" sequence. Ready to read the rest.
508
534
  */
509
535
  static int
510
536
  read_instruction(SaxDrive dr) {
537
+ char content[1024];
511
538
  char c;
539
+ char *cend;
540
+ const char *err;
541
+ VALUE target = Qnil;
512
542
 
513
543
  if ('\0' == (c = read_name_token(dr))) {
514
544
  return -1;
515
545
  }
546
+ if (dr->has_instruct || dr->has_end_instruct) {
547
+ target = rb_str_new2(dr->str);
548
+ }
516
549
  if (dr->has_instruct) {
517
550
  VALUE args[1];
518
551
 
519
- args[0] = rb_str_new2(dr->str);
552
+ args[0] = target;
520
553
  rb_funcall2(dr->handler, ox_instruct_id, 1, args);
521
554
  }
522
- if (0 != read_attrs(dr, c, '?', '?', (0 == strcmp("xml", dr->str)))) {
523
- return -1;
555
+ dr->str = dr->cur; /* make sure the start doesn't get compacted out */
556
+ read_content(dr, content, sizeof(content) - 1);
557
+ cend = dr->cur;
558
+ dr->cur = dr->str;
559
+ if (0 != (err = read_attrs(dr, c, '?', '?', (0 == strcmp("xml", dr->str))))) {
560
+ if (dr->has_text) {
561
+ VALUE args[1];
562
+
563
+ if (dr->convert_special) {
564
+ if (0 != collapse_special(content)) {
565
+ sax_drive_error(dr, "invalid format, special character does not end with a semicolon", 0);
566
+ }
567
+ }
568
+ args[0] = rb_str_new2(content);
569
+ #if HAS_ENCODING_SUPPORT
570
+ if (0 != dr->encoding) {
571
+ rb_enc_associate(args[0], dr->encoding);
572
+ }
573
+ #endif
574
+ rb_funcall2(dr->handler, ox_text_id, 1, args);
575
+ }
576
+ dr->cur = cend;
577
+ } else {
578
+ c = next_non_white(dr);
579
+ if ('>' != c) {
580
+ sax_drive_error(dr, "invalid format, instruction not terminated", 1);
581
+ return -1;
582
+ }
524
583
  }
525
- c = next_non_white(dr);
526
- if ('>' != c) {
527
- sax_drive_error(dr, "invalid format, instruction not terminated", 1);
528
- return -1;
584
+ if (dr->has_end_instruct) {
585
+ VALUE args[1];
586
+
587
+ args[0] = target;
588
+ rb_funcall2(dr->handler, ox_end_instruct_id, 1, args);
529
589
  }
530
590
  dr->str = 0;
531
591
 
@@ -649,6 +709,7 @@ read_comment(SaxDrive dr) {
649
709
  static int
650
710
  read_element(SaxDrive dr) {
651
711
  VALUE name = Qnil;
712
+ const char *err;
652
713
  char c;
653
714
  int closed;
654
715
 
@@ -667,7 +728,8 @@ read_element(SaxDrive dr) {
667
728
  } else if ('>' == c) {
668
729
  closed = 0;
669
730
  } else {
670
- if (0 != read_attrs(dr, c, '/', '>', 0)) {
731
+ if (0 != (err = read_attrs(dr, c, '/', '>', 0))) {
732
+ sax_drive_error(dr, err, 1);
671
733
  return -1;
672
734
  }
673
735
  closed = ('/' == *(dr->cur - 1));
@@ -739,10 +801,12 @@ read_text(SaxDrive dr) {
739
801
  #endif
740
802
  rb_funcall2(dr->handler, ox_text_id, 1, args);
741
803
  }
804
+ dr->str = 0;
805
+
742
806
  return 0;
743
807
  }
744
808
 
745
- static int
809
+ static const char*
746
810
  read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml) {
747
811
  VALUE name = Qnil;
748
812
  int is_encoding = 0;
@@ -754,11 +818,10 @@ read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml) {
754
818
  while (termc != c && term2 != c) {
755
819
  dr->cur--;
756
820
  if ('\0' == c) {
757
- sax_drive_error(dr, "invalid format, processing instruction not terminated", 1);
758
- return -1;
821
+ return "invalid format, attributes not terminated";
759
822
  }
760
823
  if ('\0' == (c = read_name_token(dr))) {
761
- return -1;
824
+ return "error reading tolen";
762
825
  }
763
826
  if (is_xml && 0 == strcmp("encoding", dr->str)) {
764
827
  is_encoding = 1;
@@ -771,11 +834,10 @@ read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml) {
771
834
  c = next_non_white(dr);
772
835
  }
773
836
  if ('=' != c) {
774
- sax_drive_error(dr, "invalid format, no attribute value", 1);
775
- return -1;
837
+ return "invalid format, no attribute value";
776
838
  }
777
839
  if (0 != read_quoted_value(dr)) {
778
- return -1;
840
+ return "error reading quoted value";
779
841
  }
780
842
  if (is_encoding) {
781
843
  #if HAS_ENCODING_SUPPORT
@@ -806,6 +868,8 @@ read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml) {
806
868
  }
807
869
  c = next_non_white(dr);
808
870
  }
871
+ dr->str = 0;
872
+
809
873
  return 0;
810
874
  }
811
875
 
@@ -870,6 +934,7 @@ read_quoted_value(SaxDrive dr) {
870
934
  }
871
935
  }
872
936
  *(dr->cur - 1) = '\0'; /* terminate value */
937
+
873
938
  return 0;
874
939
  }
875
940
 
data/lib/ox.rb CHANGED
@@ -88,8 +88,10 @@ end
88
88
 
89
89
  require 'ox/version'
90
90
  require 'ox/invalidpath'
91
+ require 'ox/hasattrs'
91
92
  require 'ox/node'
92
93
  require 'ox/comment'
94
+ require 'ox/instruct'
93
95
  require 'ox/cdata'
94
96
  require 'ox/doctype'
95
97
  require 'ox/element'
@@ -35,6 +35,7 @@ module Ox
35
35
  # => "58"
36
36
 
37
37
  class Element < Node
38
+ include HasAttrs
38
39
 
39
40
  # Creates a new Element with the specified name.
40
41
  # @param [String] name name of the Element
@@ -61,29 +62,6 @@ module Ox
61
62
  @nodes << node
62
63
  end
63
64
 
64
- # Returns all the attributes of the Element as a Hash.
65
- # @return [Hash] all attributes and attribute values.
66
- def attributes
67
- @attributes = { } if !instance_variable_defined?(:@attributes) or @attributes.nil?
68
- @attributes
69
- end
70
-
71
- # Returns the value of an attribute.
72
- # @param [Symbol|String] attr attribute name or key to return the value for
73
- def [](attr)
74
- return nil unless instance_variable_defined?(:@attributes) and @attributes.is_a?(Hash)
75
- @attributes[attr] or (attr.is_a?(String) ? @attributes[attr.to_sym] : @attributes[attr.to_s])
76
- end
77
-
78
- # Adds or set an attribute of the Element.
79
- # @param [Symbol|String] attr attribute name or key
80
- # @param [Object] value value for the attribute
81
- def []=(attr, value)
82
- raise "argument to [] must be a Symbol or a String." unless attr.is_a?(Symbol) or attr.is_a?(String)
83
- @attributes = { } if !instance_variable_defined?(:@attributes) or @attributes.nil?
84
- @attributes[attr] = value.to_s
85
- end
86
-
87
65
  # Returns true if this Object and other are of the same type and have the
88
66
  # equivalent value and the equivalent elements otherwise false is returned.
89
67
  # @param [Object] other Object compare _self_ to.
@@ -156,7 +134,7 @@ module Ox
156
134
  ids = id.to_s
157
135
  i = args[0].to_i # will be 0 if no arg or parsing fails
158
136
  @nodes.each do |n|
159
- if n.is_a?(Element) && (n.value == id || n.value == ids)
137
+ if (n.is_a?(Element) || n.is_a?(Instruct)) && (n.value == id || n.value == ids)
160
138
  return n if 0 == i
161
139
  i -= 1
162
140
  end
@@ -165,7 +143,7 @@ module Ox
165
143
  return @attributes[id] if @attributes.has_key?(id)
166
144
  return @attributes[ids] if @attributes.has_key?(ids)
167
145
  end
168
- raise NoMethodError.new("#{name} not found", name)
146
+ raise NoMethodError.new("#{ids} not found", name)
169
147
  end
170
148
 
171
149
  # @param [Array] path array of steps in a path
@@ -0,0 +1,49 @@
1
+
2
+ module Ox
3
+
4
+ # An Object that includes the HasAttrs module can have attributes which are a Hash of String values and either String
5
+ # or Symbol keys.
6
+ #
7
+ # To access the attributes there are several options. One is to walk the attributes. The easiest for simple regularly
8
+ # formatted XML is to reference the attributes simply by name.
9
+
10
+ module HasAttrs
11
+ # Returns all the attributes of the Instruct as a Hash.
12
+ # @return [Hash] all attributes and attribute values.
13
+ def attributes
14
+ @attributes = { } if !instance_variable_defined?(:@attributes) or @attributes.nil?
15
+ @attributes
16
+ end
17
+
18
+ # Returns the value of an attribute.
19
+ # @param [Symbol|String] attr attribute name or key to return the value for
20
+ def [](attr)
21
+ return nil unless instance_variable_defined?(:@attributes) and @attributes.is_a?(Hash)
22
+ @attributes[attr] or (attr.is_a?(String) ? @attributes[attr.to_sym] : @attributes[attr.to_s])
23
+ end
24
+
25
+ # Adds or set an attribute of the Instruct.
26
+ # @param [Symbol|String] attr attribute name or key
27
+ # @param [Object] value value for the attribute
28
+ def []=(attr, value)
29
+ raise "argument to [] must be a Symbol or a String." unless attr.is_a?(Symbol) or attr.is_a?(String)
30
+ @attributes = { } if !instance_variable_defined?(:@attributes) or @attributes.nil?
31
+ @attributes[attr] = value.to_s
32
+ end
33
+
34
+ # Handles the 'easy' API that allows navigating a simple XML by
35
+ # referencing attributes by name.
36
+ # @param [Symbol] id element or attribute name
37
+ # @return [String|nil] the attribute value
38
+ # @raise [NoMethodError] if no match is found
39
+ def method_missing(id, *args, &block)
40
+ ids = id.to_s
41
+ if instance_variable_defined?(:@attributes)
42
+ return @attributes[id] if @attributes.has_key?(id)
43
+ return @attributes[ids] if @attributes.has_key?(ids)
44
+ end
45
+ raise NoMethodError.new("#{ids} not found", name)
46
+ end
47
+
48
+ end # HasAttrs
49
+ end # Ox
@@ -0,0 +1,38 @@
1
+
2
+ module Ox
3
+
4
+ # An Instruct represents a processing instruction of an XML document. It has a target, attributes, and a value or
5
+ # content. The content will be all characters with the exception of the target. If the content follows a regular
6
+ # attribute format then the attributes will be set to the parsed values. If it does not follow the attribute formate
7
+ # then the attributes will be empty.
8
+
9
+ class Instruct < Node
10
+ include HasAttrs
11
+
12
+ # The content of the processing instruction.
13
+ attr_accessor :content
14
+
15
+ # Creates a new Instruct with the specified name.
16
+ # @param [String] name name of the Instruct
17
+ def initialize(name)
18
+ super
19
+ @attributes = nil
20
+ @content = nil
21
+ end
22
+ alias target value
23
+
24
+ # Returns true if this Object and other are of the same type and have the
25
+ # equivalent value and the equivalent elements otherwise false is returned.
26
+ # @param [Object] other Object compare _self_ to.
27
+ # @return [Boolean] true if both Objects are equivalent, otherwise false.
28
+ def eql?(other)
29
+ return false if (other.nil? or self.class != other.class)
30
+ return false unless super(other)
31
+ return false unless self.attributes == other.attributes
32
+ return false unless self.content == other.content
33
+ true
34
+ end
35
+ alias == eql?
36
+
37
+ end # Instruct
38
+ end # Ox
@@ -32,6 +32,7 @@ module Ox
32
32
  # public. The same is true for attr() and attr_value().
33
33
  #
34
34
  # def instruct(target); end
35
+ # def end_instruct(target); end
35
36
  # def attr(name, str); end
36
37
  # def attr_value(name, value); end
37
38
  # def doctype(str); end
@@ -55,6 +56,9 @@ module Ox
55
56
  def instruct(target)
56
57
  end
57
58
 
59
+ def end_instruct(target)
60
+ end
61
+
58
62
  def attr(name, str)
59
63
  end
60
64
 
@@ -1,5 +1,5 @@
1
1
 
2
2
  module Ox
3
3
  # Current version of the module.
4
- VERSION = '1.7.1'
4
+ VERSION = '1.8.0'
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ox
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.7.1
4
+ version: 1.8.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2012-12-06 00:00:00.000000000 Z
12
+ date: 2012-12-11 00:00:00.000000000 Z
13
13
  dependencies: []
14
14
  description: ! "A fast XML parser and object serializer that uses only standard C
15
15
  lib.\n \nOptimized XML (Ox), as the name implies was written to provide
@@ -29,6 +29,8 @@ files:
29
29
  - lib/ox/doctype.rb
30
30
  - lib/ox/document.rb
31
31
  - lib/ox/element.rb
32
+ - lib/ox/hasattrs.rb
33
+ - lib/ox/instruct.rb
32
34
  - lib/ox/invalidpath.rb
33
35
  - lib/ox/node.rb
34
36
  - lib/ox/sax.rb