ox 1.3.2 → 1.3.3

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of ox might be problematic. Click here for more details.

data/README.md CHANGED
@@ -26,9 +26,12 @@ A fast XML parser and Object marshaller as a Ruby gem.
26
26
 
27
27
  ## <a name="release">Release Notes</a>
28
28
 
29
- ### Release 1.3.2
29
+ ### Release 1.3.3
30
+
31
+ - Added an option to the SAX parser to convert special characters.
32
+
33
+ - The default options encoding is now used as the default for SAX parsing.
30
34
 
31
- - Changed SAX parser API for element and instruction attributes
32
35
 
33
36
  ## <a name="description">Description</a>
34
37
 
@@ -94,26 +97,66 @@ Ox is compatible with Ruby 1.8.7, 1.9.2, JRuby, and RBX.
94
97
  ### Generic XML Writing and Parsing:
95
98
 
96
99
  require 'ox'
97
-
100
+
98
101
  doc = Ox::Document.new(:version => '1.0')
99
-
102
+
100
103
  top = Ox::Element.new('top')
101
104
  top[:name] = 'sample'
102
105
  doc << top
103
-
106
+
104
107
  mid = Ox::Element.new('middle')
105
108
  mid[:name] = 'second'
106
109
  top << mid
107
-
110
+
108
111
  bot = Ox::Element.new('bottom')
109
112
  bot[:name] = 'third'
110
113
  mid << bot
111
-
114
+
112
115
  xml = Ox.dump(doc)
113
- puts xml
116
+
117
+ # xml =
118
+ # <top name="sample">
119
+ # <middle name="second">
120
+ # <bottom name="third"/>
121
+ # </middle>
122
+ # </top>
123
+
114
124
  doc2 = Ox.parse(xml)
115
125
  puts "Same? #{doc == doc2}"
126
+ # true
127
+
128
+ ### SAX XML Parsing:
116
129
 
130
+ require 'stringio'
131
+ require 'ox'
132
+
133
+ class Sample < ::Ox::Sax
134
+ def start_element(name); puts "start: #{name}"; end
135
+ def end_element(name); puts "end: #{name}"; end
136
+ def attr(name, value); puts " #{name} => #{value}"; end
137
+ def text(value); puts "text #{value}"; end
138
+ end
139
+
140
+ io = StringIO.new(%{
141
+ <top name="sample">
142
+ <middle name="second">
143
+ <bottom name="third"/>
144
+ </middle>
145
+ </top>
146
+ })
147
+
148
+ handler = Sample.new()
149
+ Ox.sax_parse(handler, io)
150
+ # outputs
151
+ # start: top
152
+ # name => sample
153
+ # start: middle
154
+ # name => second
155
+ # start: bottom
156
+ # name => third
157
+ # end: bottom
158
+ # end: middle
159
+ # end: top
117
160
 
118
161
  ### Object XML format
119
162
 
data/ext/ox/ox.c CHANGED
@@ -94,6 +94,7 @@ VALUE trace_sym;
94
94
  VALUE strict_sym;
95
95
  VALUE with_dtd_sym;
96
96
  VALUE with_instruct_sym;
97
+ VALUE convert_special_sym;
97
98
  VALUE with_xml_sym;
98
99
  VALUE empty_string;
99
100
  VALUE zero_fixnum;
@@ -111,7 +112,7 @@ Cache symbol_cache = 0;
111
112
  Cache class_cache = 0;
112
113
  Cache attr_cache = 0;
113
114
 
114
- static struct _Options default_options = {
115
+ struct _Options default_options = {
115
116
  { '\0' }, // encoding
116
117
  2, // indent
117
118
  0, // trace
@@ -444,16 +445,31 @@ load_file(int argc, VALUE *argv, VALUE self) {
444
445
  return load(xml, argc - 1, argv + 1, self);
445
446
  }
446
447
 
447
- /* call-seq: sax_parse(handler, io)
448
+ /* call-seq: sax_parse(handler, io, options)
448
449
  *
449
450
  * Parses an IO stream or file containing an XML document. Raises an exception
450
451
  * if the XML is malformed or the classes specified are not valid.
451
452
  * @param [Ox::Sax] handler SAX (responds to OX::Sax methods) like handler
452
453
  * @param [IO|String] io IO Object to read from
454
+ * @param [Hash] options parse options
455
+ * @param [true|false] :convert_special flag indicating special special characters like &lt; are converted
453
456
  */
454
457
  static VALUE
455
- sax_parse(VALUE self, VALUE handler, VALUE io) {
456
- ox_sax_parse(handler, io);
458
+ sax_parse(int argc, VALUE *argv, VALUE self) {
459
+ int convert = 0;
460
+
461
+ if (argc < 2) {
462
+ rb_raise(rb_eArgError, "Wrong number of arguments to sax_parse.\n");
463
+ }
464
+ if (3 <= argc && rb_cHash == rb_obj_class(argv[2])) {
465
+ VALUE h = argv[2];
466
+ VALUE v;
467
+
468
+ if (Qnil != (v = rb_hash_lookup(h, convert_special_sym))) {
469
+ convert = (Qtrue == v);
470
+ }
471
+ }
472
+ ox_sax_parse(argv[0], argv[1], convert);
457
473
 
458
474
  return Qnil;
459
475
  }
@@ -607,7 +623,7 @@ void Init_ox() {
607
623
  rb_define_module_function(Ox, "parse_obj", to_obj, 1);
608
624
  rb_define_module_function(Ox, "parse", to_gen, 1);
609
625
  rb_define_module_function(Ox, "load", load_str, -1);
610
- rb_define_module_function(Ox, "sax_parse", sax_parse, 2);
626
+ rb_define_module_function(Ox, "sax_parse", sax_parse, -1);
611
627
 
612
628
  rb_define_module_function(Ox, "to_xml", dump, -1);
613
629
  rb_define_module_function(Ox, "dump", dump, -1);
@@ -668,6 +684,7 @@ void Init_ox() {
668
684
  with_dtd_sym = ID2SYM(rb_intern("with_dtd")); rb_ary_push(keep, with_dtd_sym);
669
685
  with_instruct_sym = ID2SYM(rb_intern("with_instructions")); rb_ary_push(keep, with_instruct_sym);
670
686
  with_xml_sym = ID2SYM(rb_intern("with_xml")); rb_ary_push(keep, with_xml_sym);
687
+ convert_special_sym = ID2SYM(rb_intern("convert_special")); rb_ary_push(keep, convert_special_sym);
671
688
 
672
689
  empty_string = rb_str_new2(""); rb_ary_push(keep, empty_string);
673
690
  zero_fixnum = INT2NUM(0); rb_ary_push(keep, zero_fixnum);
data/ext/ox/ox.h CHANGED
@@ -198,11 +198,13 @@ typedef struct _Options {
198
198
  extern VALUE parse(char *xml, ParseCallbacks pcb, char **endp, int trace, Effort effort);
199
199
  extern void _raise_error(const char *msg, const char *xml, const char *current, const char* file, int line);
200
200
 
201
- extern void ox_sax_parse(VALUE handler, VALUE io);
201
+ extern void ox_sax_parse(VALUE handler, VALUE io, int convert);
202
202
 
203
203
  extern char* write_obj_to_str(VALUE obj, Options copts);
204
204
  extern void write_obj_to_file(VALUE obj, const char *path, Options copts);
205
205
 
206
+ extern struct _Options default_options;
207
+
206
208
  extern VALUE Ox;
207
209
 
208
210
  extern ID at_id;
data/ext/ox/sax.c CHANGED
@@ -51,6 +51,7 @@ typedef struct _SaxDrive {
51
51
  int col;
52
52
  VALUE handler;
53
53
  int (*read_func)(struct _SaxDrive *dr);
54
+ int convert_special;
54
55
  union {
55
56
  int fd;
56
57
  VALUE io;
@@ -69,7 +70,7 @@ typedef struct _SaxDrive {
69
70
  #endif
70
71
  } *SaxDrive;
71
72
 
72
- static void sax_drive_init(SaxDrive dr, VALUE handler, VALUE io);
73
+ static void sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, int convert);
73
74
  static void sax_drive_cleanup(SaxDrive dr);
74
75
  static int sax_drive_read(SaxDrive dr);
75
76
  static void sax_drive_error(SaxDrive dr, const char *msg, int critical);
@@ -84,6 +85,7 @@ static int read_text(SaxDrive dr);
84
85
  static int read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml);
85
86
  static char read_name_token(SaxDrive dr);
86
87
  static int read_quoted_value(SaxDrive dr);
88
+ static int collapse_special(char *str);
87
89
 
88
90
  static VALUE io_cb(VALUE rdr);
89
91
  static int read_from_io(SaxDrive dr);
@@ -156,10 +158,10 @@ str2sym(const char *str) {
156
158
 
157
159
 
158
160
  void
159
- ox_sax_parse(VALUE handler, VALUE io) {
161
+ ox_sax_parse(VALUE handler, VALUE io, int convert) {
160
162
  struct _SaxDrive dr;
161
163
 
162
- sax_drive_init(&dr, handler, io);
164
+ sax_drive_init(&dr, handler, io, convert);
163
165
  #if 0
164
166
  printf("*** sax_parse with these flags\n");
165
167
  printf(" has_instruct = %s\n", dr.has_instruct ? "true" : "false");
@@ -177,7 +179,7 @@ ox_sax_parse(VALUE handler, VALUE io) {
177
179
  }
178
180
 
179
181
  static void
180
- sax_drive_init(SaxDrive dr, VALUE handler, VALUE io) {
182
+ sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, int convert) {
181
183
  if (rb_respond_to(io, readpartial_id)) {
182
184
  VALUE rfd;
183
185
 
@@ -200,6 +202,7 @@ sax_drive_init(SaxDrive dr, VALUE handler, VALUE io) {
200
202
  dr->line = 1;
201
203
  dr->col = 0;
202
204
  dr->handler = handler;
205
+ dr->convert_special = convert;
203
206
  dr->has_instruct = rb_respond_to(handler, instruct_id);
204
207
  dr->has_attr = rb_respond_to(handler, attr_id);
205
208
  dr->has_doctype = rb_respond_to(handler, doctype_id);
@@ -210,7 +213,11 @@ sax_drive_init(SaxDrive dr, VALUE handler, VALUE io) {
210
213
  dr->has_end_element = rb_respond_to(handler, end_element_id);
211
214
  dr->has_error = rb_respond_to(handler, error_id);
212
215
  #ifdef HAVE_RUBY_ENCODING_H
213
- dr->encoding = 0;
216
+ if ('\0' == *default_options.encoding) {
217
+ dr->encoding = 0;
218
+ } else {
219
+ dr->encoding = rb_enc_find(default_options.encoding);
220
+ }
214
221
  #endif
215
222
  }
216
223
 
@@ -586,8 +593,13 @@ read_text(SaxDrive dr) {
586
593
  }
587
594
  *(dr->cur - 1) = '\0';
588
595
  if (dr->has_text) {
589
- VALUE args[1];
590
-
596
+ VALUE args[1];
597
+
598
+ if (dr->convert_special) {
599
+ if (0 != collapse_special(dr->str) && 0 != strchr(dr->str, '&')) {
600
+ sax_drive_error(dr, "invalid format, special character does not end with a semicolon", 0);
601
+ }
602
+ }
591
603
  args[0] = rb_str_new2(dr->str);
592
604
  #ifdef HAVE_RUBY_ENCODING_H
593
605
  if (0 != dr->encoding) {
@@ -642,6 +654,9 @@ read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml) {
642
654
  VALUE args[2];
643
655
 
644
656
  args[0] = name;
657
+ if (0 != collapse_special(dr->str) && 0 != strchr(dr->str, '&')) {
658
+ sax_drive_error(dr, "invalid format, special character does not end with a semicolon", 0);
659
+ }
645
660
  args[1] = rb_str_new2(dr->str);
646
661
  #ifdef HAVE_RUBY_ENCODING_H
647
662
  if (0 != dr->encoding) {
@@ -758,3 +773,56 @@ read_from_fd(SaxDrive dr) {
758
773
  }
759
774
  return 0;
760
775
  }
776
+
777
+
778
+ static int
779
+ collapse_special(char *str) {
780
+ char *s = str;
781
+ char *b = str;
782
+
783
+ while ('\0' != *s) {
784
+ if ('&' == *s) {
785
+ int c;
786
+ char *end;
787
+
788
+ s++;
789
+ if ('#' == *s) {
790
+ s++;
791
+ c = (int)strtol(s, &end, 10);
792
+ if (';' != *end) {
793
+ return EDOM;
794
+ }
795
+ s = end + 1;
796
+ } else if (0 == strncasecmp(s, "lt;", 3)) {
797
+ c = '<';
798
+ s += 3;
799
+ } else if (0 == strncasecmp(s, "gt;", 3)) {
800
+ c = '>';
801
+ s += 3;
802
+ } else if (0 == strncasecmp(s, "amp;", 4)) {
803
+ c = '&';
804
+ s += 4;
805
+ } else if (0 == strncasecmp(s, "quot;", 5)) {
806
+ c = '"';
807
+ s += 5;
808
+ } else if (0 == strncasecmp(s, "apos;", 5)) {
809
+ c = '\'';
810
+ s += 5;
811
+ } else {
812
+ c = '?';
813
+ while (';' != *s++) {
814
+ if ('\0' == *s) {
815
+ return EDOM;
816
+ }
817
+ }
818
+ s++;
819
+ }
820
+ *b++ = (char)c;
821
+ } else {
822
+ *b++ = *s++;
823
+ }
824
+ }
825
+ *b = '\0';
826
+
827
+ return 0;
828
+ }
data/lib/ox/document.rb CHANGED
@@ -16,5 +16,15 @@ module Ox
16
16
  @attributes[:standalone] = prolog[:standalone] unless prolog[:standalone].nil?
17
17
  end
18
18
 
19
+ # Returns the first Element in the document.
20
+ def root()
21
+ unless @nodes.nil?
22
+ @nodes.each do |n|
23
+ return n if n.is_a?(::Ox::Element)
24
+ end
25
+ end
26
+ nil
27
+ end
28
+
19
29
  end # Document
20
30
  end # Ox
data/lib/ox/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
 
2
2
  module Ox
3
3
  # Current version of the module.
4
- VERSION = '1.3.2'
4
+ VERSION = '1.3.3'
5
5
  end
data/test/perf_sax.rb CHANGED
@@ -14,7 +14,6 @@ end
14
14
 
15
15
  require 'optparse'
16
16
  require 'ox'
17
- require 'sample'
18
17
  require 'files'
19
18
  begin
20
19
  require 'nokogiri'
@@ -0,0 +1,37 @@
1
+ #!/usr/bin/env ruby -wW1
2
+
3
+ $: << '../lib'
4
+ $: << '../ext'
5
+
6
+ require 'stringio'
7
+ require 'ox'
8
+
9
+ class Sample < ::Ox::Sax
10
+ def start_element(name); puts "start: #{name}"; end
11
+ def end_element(name); puts "end: #{name}"; end
12
+ def attr(name, value); puts " #{name} => #{value}"; end
13
+ def text(value); puts "text #{value}"; end
14
+ end
15
+
16
+ io = StringIO.new(%{
17
+ <top name="sample">
18
+ <middle name="second">
19
+ <bottom name="third"/>
20
+ </middle>
21
+ </top>
22
+ })
23
+
24
+ handler = Sample.new()
25
+ Ox.sax_parse(handler, io)
26
+
27
+ # outputs
28
+ # start: top
29
+ # name => sample
30
+ # start: middle
31
+ # name => second
32
+ # start: bottom
33
+ # name => third
34
+ # end: bottom
35
+ # end: middle
36
+ # end: top
37
+
data/test/sax_test.rb CHANGED
@@ -92,10 +92,14 @@ class Func < ::Test::Unit::TestCase
92
92
  [:end_element, :top]])
93
93
  end
94
94
 
95
- def parse_compare(xml, expected, handler_class=AllSax)
95
+ def parse_compare(xml, expected, handler_class=AllSax, special=false)
96
96
  handler = handler_class.new()
97
97
  input = StringIO.new(xml)
98
- Ox.sax_parse(handler, input)
98
+ if special
99
+ Ox.sax_parse(handler, input, :convert_special => true)
100
+ else
101
+ Ox.sax_parse(handler, input)
102
+ end
99
103
  assert_equal(expected, handler.calls)
100
104
  end
101
105
 
@@ -245,6 +249,15 @@ encoding = "UTF-8" ?>},
245
249
  ])
246
250
  end
247
251
 
252
+ def test_sax_special
253
+ parse_compare(%{<top name="A&amp;Z">This is &lt;some&gt; text.</top>},
254
+ [[:start_element, :top],
255
+ [:attr, :name, 'A&Z'],
256
+ [:text, "This is <some> text."],
257
+ [:end_element, :top]
258
+ ], AllSax, true)
259
+ end
260
+
248
261
  def test_sax_text_no_term
249
262
  parse_compare(%{<top>This is some text.},
250
263
  [[:start_element, :top],
metadata CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
5
5
  segments:
6
6
  - 1
7
7
  - 3
8
- - 2
9
- version: 1.3.2
8
+ - 3
9
+ version: 1.3.3
10
10
  platform: ruby
11
11
  authors:
12
12
  - Peter Ohler
@@ -14,7 +14,7 @@ autorequire:
14
14
  bindir: bin
15
15
  cert_chain: []
16
16
 
17
- date: 2011-09-21 00:00:00 +09:00
17
+ date: 2011-09-24 00:00:00 +09:00
18
18
  default_executable:
19
19
  dependencies: []
20
20
 
@@ -82,6 +82,7 @@ files:
82
82
  - test/perf_sax.rb
83
83
  - test/perf_write.rb
84
84
  - test/sample.rb
85
+ - test/sax_example.rb
85
86
  - test/sax_test.rb
86
87
  - test/test.rb
87
88
  - test/Sample.graffle