ox 2.1.1 → 2.1.2

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of ox might be problematic. Click here for more details.

checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 4dfc6cdcfabac2cccd4fee65e58d77bd341d0dad
4
- data.tar.gz: 6550189e77b12edc3fa82dc70484f629c0d33961
3
+ metadata.gz: 6e77323356f912ddf4b3db190f9b8224a084a361
4
+ data.tar.gz: 6f8d0822eb29ca69f431f3a5cc40b13634bf44a4
5
5
  SHA512:
6
- metadata.gz: 3ea6d87ceb49d76762801b680f4b75f5715ed5fe1ad3c65aa39023cc4e64ac05b5928ed0c06770ddc32b754d797032979fc5df8ddeca2f6274d77f5d79a04ce2
7
- data.tar.gz: ff66b7c9f54457f89b5287dfe78fa2cfdf4d1c12bff7c67f3932e30323f6066f3bc98d1a2ce15338186e24258a4eb981b6b8d1a14fffbaedb10d467378f9e7a6
6
+ metadata.gz: d242bc1b433db4811d0ef565dca6b3429f48589f73f60e1285566824d7d0c916339112b3bcf1fb079515069e3cdd4cd03b27e9aa86572e2a243808dfa43ad78a
7
+ data.tar.gz: d3040cd0a7146bad8bead518413dc15e1fd863ce4cf697cd1326758134bf0babae038e9a5daaf36388f8f99aab0e36e23f30fd487e1511f2257f8a97b1ae944e
data/README.md CHANGED
@@ -24,7 +24,7 @@ A fast XML parser and Object marshaller as a Ruby gem.
24
24
 
25
25
  ## Links of Interest
26
26
 
27
- [Ruby XML Gem Comparison](http://www.ohler.com/dev/xml_with_ruby/xml_with_ruby.html) for a perfomance comparison between Ox, Nokogiri, and LibXML.
27
+ [Ruby XML Gem Comparison](http://www.ohler.com/dev/xml_with_ruby/xml_with_ruby.html) for a performance comparison between Ox, Nokogiri, and LibXML.
28
28
 
29
29
  [Fast Ruby XML Serialization](http://www.ohler.com/dev/ruby_object_xml_serialization/ruby_object_xml_serialization.html) to see how Ox can be used as a faster replacement for Marshal.
30
30
 
@@ -34,6 +34,13 @@ A fast XML parser and Object marshaller as a Ruby gem.
34
34
 
35
35
  ## Release Notes
36
36
 
37
+ ### Release 2.1.2
38
+
39
+ - Added skip option to parsing. This allows white space to be collapsed in two
40
+ different ways.
41
+
42
+ - Added respond_to? method for easy access method checking.
43
+
37
44
  ### Release 2.1.1
38
45
 
39
46
  - Worked around a module reset and clear that occurs on some Rubies.
@@ -132,8 +132,12 @@ static VALUE mode_sym;
132
132
  static VALUE object_sym;
133
133
  static VALUE opt_format_sym;
134
134
  static VALUE optimized_sym;
135
- static VALUE strict_sym;
135
+ static VALUE skip_none_sym;
136
+ static VALUE skip_return_sym;
137
+ static VALUE skip_sym;
138
+ static VALUE skip_white_sym;
136
139
  static VALUE smart_sym;
140
+ static VALUE strict_sym;
137
141
  static VALUE symbolize_keys_sym;
138
142
  static VALUE symbolize_sym;
139
143
  static VALUE tolerant_sym;
@@ -163,6 +167,7 @@ struct _Options ox_default_options = {
163
167
  NoMode, /* mode */
164
168
  StrictEffort, /* effort */
165
169
  Yes, /* sym_keys */
170
+ NoSkip, /* skip */
166
171
  #if HAS_PRIVATE_ENCODING
167
172
  Qnil /* rb_enc */
168
173
  #else
@@ -240,6 +245,7 @@ defuse_bom(char *xml, Options options) {
240
245
  * - mode: [:object|:generic|:limited|nil] load method to use for XML
241
246
  * - effort: [:strict|:tolerant|:auto_define] set the tolerance level for loading
242
247
  * - symbolize_keys: [true|false|nil] symbolize element attribute keys or leave as Strings
248
+ * - skip: [:skip_none|:skip_return|:skip_white] determines how to handle white space in text
243
249
  * @return [Hash] all current option settings.
244
250
  *
245
251
  * Note that an indent of less than zero will result in a tight one line output
@@ -273,6 +279,12 @@ get_def_opts(VALUE self) {
273
279
  case NoEffort:
274
280
  default: rb_hash_aset(opts, effort_sym, Qnil); break;
275
281
  }
282
+ switch (ox_default_options.skip) {
283
+ case NoSkip: rb_hash_aset(opts, skip_sym, skip_none_sym); break;
284
+ case CrSkip: rb_hash_aset(opts, skip_sym, skip_return_sym); break;
285
+ case SpcSkip: rb_hash_aset(opts, skip_sym, skip_white_sym); break;
286
+ default: rb_hash_aset(opts, skip_sym, Qnil); break;
287
+ }
276
288
  return opts;
277
289
  }
278
290
 
@@ -291,6 +303,7 @@ get_def_opts(VALUE self) {
291
303
  * @param [:object|:generic|:limited|nil] :mode load method to use for XML
292
304
  * @param [:strict|:tolerant|:auto_define] :effort set the tolerance level for loading
293
305
  * @param [true|false|nil] :symbolize_keys symbolize element attribute keys or leave as Strings
306
+ * @param [:skip_none|:skip_return|:skip_white] determines how to handle white space in text
294
307
  * @return [nil]
295
308
  */
296
309
  static VALUE
@@ -360,6 +373,20 @@ set_def_opts(VALUE self, VALUE opts) {
360
373
  } else {
361
374
  rb_raise(ox_parse_error_class, ":effort must be :strict, :tolerant, :auto_define, or nil.\n");
362
375
  }
376
+
377
+ v = rb_hash_aref(opts, skip_sym);
378
+ if (Qnil == v) {
379
+ ox_default_options.skip = NoSkip;
380
+ } else if (skip_none_sym == v) {
381
+ ox_default_options.skip = NoSkip;
382
+ } else if (skip_return_sym == v) {
383
+ ox_default_options.skip = CrSkip;
384
+ } else if (skip_white_sym == v) {
385
+ ox_default_options.skip = SpcSkip;
386
+ } else {
387
+ rb_raise(ox_parse_error_class, ":skip must be :skip_none, :skip_return, :skip_white, or nil.\n");
388
+ }
389
+
363
390
  for (o = ynos; 0 != o->attr; o++) {
364
391
  v = rb_hash_lookup(opts, o->sym);
365
392
  if (Qnil == v) {
@@ -490,6 +517,18 @@ load(char *xml, int argc, VALUE *argv, VALUE self, VALUE encoding, Err err) {
490
517
  rb_raise(ox_parse_error_class, ":effort must be :strict, :tolerant, or :auto_define.\n");
491
518
  }
492
519
  }
520
+ if (Qnil != (v = rb_hash_lookup(h, skip_sym))) {
521
+ if (skip_none_sym == v) {
522
+ options.skip = NoSkip;
523
+ } else if (skip_return_sym == v) {
524
+ options.skip = CrSkip;
525
+ } else if (skip_white_sym == v) {
526
+ options.skip = SpcSkip;
527
+ } else {
528
+ rb_raise(ox_parse_error_class, ":effort must be :skip_none, :skip_return, or :skip_white.\n");
529
+ }
530
+ }
531
+
493
532
  if (Qnil != (v = rb_hash_lookup(h, trace_sym))) {
494
533
  Check_Type(v, T_FIXNUM);
495
534
  options.trace = FIX2INT(v);
@@ -672,7 +711,8 @@ load_file(int argc, VALUE *argv, VALUE self) {
672
711
  * @param [Hash] options parse options
673
712
  * @param [true|false] :convert_special flag indicating special characters like < are converted
674
713
  * @param [true|false] :symbolize flag indicating the parser symbolize element and attribute names
675
- * @param [true|false] :smart flag indicating the parser use hints if available (use with html)
714
+ * @param [true|false] :smart flag indicating the parser uses hints if available (use with html)
715
+ * @param [:skip_return|:skip_white] :skip flag indicating the parser skips \r or collpase white space into a single space. Default (skip nothing)
676
716
  */
677
717
  static VALUE
678
718
  sax_parse(int argc, VALUE *argv, VALUE self) {
@@ -681,6 +721,7 @@ sax_parse(int argc, VALUE *argv, VALUE self) {
681
721
  options.symbolize = 1;
682
722
  options.convert_special = 0;
683
723
  options.smart = 0;
724
+ options.skip = NoSkip;
684
725
 
685
726
  if (argc < 2) {
686
727
  rb_raise(ox_parse_error_class, "Wrong number of arguments to sax_parse.\n");
@@ -698,6 +739,13 @@ sax_parse(int argc, VALUE *argv, VALUE self) {
698
739
  if (Qnil != (v = rb_hash_lookup(h, symbolize_sym))) {
699
740
  options.symbolize = (Qtrue == v);
700
741
  }
742
+ if (Qnil != (v = rb_hash_lookup(h, skip_sym))) {
743
+ if (skip_return_sym == v) {
744
+ options.skip = CrSkip;
745
+ } else if (skip_white_sym == v) {
746
+ options.skip = SpcSkip;
747
+ }
748
+ }
701
749
  }
702
750
  ox_sax_parse(argv[0], argv[1], &options);
703
751
 
@@ -953,6 +1001,10 @@ void Init_ox() {
953
1001
  opt_format_sym = ID2SYM(rb_intern("opt_format")); rb_gc_register_address(&opt_format_sym);
954
1002
  optimized_sym = ID2SYM(rb_intern("optimized")); rb_gc_register_address(&optimized_sym);
955
1003
  ox_encoding_sym = ID2SYM(rb_intern("encoding")); rb_gc_register_address(&ox_encoding_sym);
1004
+ skip_none_sym = ID2SYM(rb_intern("skip_none")); rb_gc_register_address(&skip_none_sym);
1005
+ skip_return_sym = ID2SYM(rb_intern("skip_return")); rb_gc_register_address(&skip_return_sym);
1006
+ skip_sym = ID2SYM(rb_intern("skip")); rb_gc_register_address(&skip_sym);
1007
+ skip_white_sym = ID2SYM(rb_intern("skip_white")); rb_gc_register_address(&skip_white_sym);
956
1008
  smart_sym = ID2SYM(rb_intern("smart")); rb_gc_register_address(&smart_sym);
957
1009
  strict_sym = ID2SYM(rb_intern("strict")); rb_gc_register_address(&strict_sym);
958
1010
  symbolize_keys_sym = ID2SYM(rb_intern("symbolize_keys")); rb_gc_register_address(&symbolize_keys_sym);
@@ -118,6 +118,12 @@ typedef enum {
118
118
  NoMode = 0
119
119
  } LoadMode;
120
120
 
121
+ typedef enum {
122
+ NoSkip = 'n',
123
+ CrSkip = 'r',
124
+ SpcSkip = 's',
125
+ } SkipMode;
126
+
121
127
  typedef struct _PInfo *PInfo;
122
128
 
123
129
  typedef struct _ParseCallbacks {
@@ -149,6 +155,7 @@ typedef struct _Options {
149
155
  char mode; /* LoadMode */
150
156
  char effort; /* Effort */
151
157
  char sym_keys; /* symbolize keys */
158
+ char skip; /* skip mode */
152
159
  #if HAS_ENCODING_SUPPORT
153
160
  rb_encoding *rb_enc;
154
161
  #elif HAS_PRIVATE_ENCODING
@@ -68,6 +68,21 @@ static int collapse_special(PInfo pi, char *str);
68
68
  * all cases to parse the string.
69
69
  */
70
70
 
71
+ inline static int
72
+ is_white(char c) {
73
+ switch (c) {
74
+ case ' ':
75
+ case '\t':
76
+ case '\f':
77
+ case '\n':
78
+ case '\r':
79
+ return 1;
80
+ default:
81
+ return 0;
82
+ }
83
+ }
84
+
85
+
71
86
  inline static void
72
87
  next_non_white(PInfo pi) {
73
88
  for (; 1; pi->s++) {
@@ -678,7 +693,28 @@ read_text(PInfo pi) {
678
693
  return;
679
694
  }
680
695
  } else {
681
- *b++ = c;
696
+ switch (pi->options->skip) {
697
+ case CrSkip:
698
+ if (buf != b && '\n' == c && '\r' == *(b - 1)) {
699
+ *(b - 1) = '\n';
700
+ } else {
701
+ *b++ = c;
702
+ }
703
+ break;
704
+ case SpcSkip:
705
+ if (is_white(c)) {
706
+ if (buf == b || ' ' != *(b - 1)) {
707
+ *b++ = ' ';
708
+ }
709
+ } else {
710
+ *b++ = c;
711
+ }
712
+ break;
713
+ case NoSkip:
714
+ default:
715
+ *b++ = c;
716
+ break;
717
+ }
682
718
  }
683
719
  break;
684
720
  }
@@ -907,6 +907,7 @@ read_element_end(SaxDrive dr) {
907
907
 
908
908
  static char
909
909
  read_text(SaxDrive dr) {
910
+ VALUE args[1];
910
911
  char c;
911
912
  int line = dr->buf.line;
912
913
  int col = dr->buf.col - 1;
@@ -923,8 +924,6 @@ read_text(SaxDrive dr) {
923
924
  *(dr->buf.tail - 1) = '\0';
924
925
  }
925
926
  if (dr->has.value) {
926
- VALUE args[1];
927
-
928
927
  if (dr->has.line) {
929
928
  rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
930
929
  }
@@ -934,11 +933,19 @@ read_text(SaxDrive dr) {
934
933
  *args = dr->value_obj;
935
934
  rb_funcall2(dr->handler, ox_value_id, 1, args);
936
935
  } else if (dr->has.text) {
937
- VALUE args[1];
938
-
939
936
  if (dr->options.convert_special) {
940
937
  ox_sax_collapse_special(dr, dr->buf.str, line, col);
941
938
  }
939
+ switch (dr->options.skip) {
940
+ case CrSkip:
941
+ buf_collapse_return(dr->buf.str);
942
+ break;
943
+ case SpcSkip:
944
+ buf_collapse_white(dr->buf.str);
945
+ break;
946
+ default:
947
+ break;
948
+ }
942
949
  args[0] = rb_str_new2(dr->buf.str);
943
950
  #if HAS_ENCODING_SUPPORT
944
951
  if (0 != dr->encoding) {
@@ -35,11 +35,13 @@
35
35
  #include "sax_has.h"
36
36
  #include "sax_stack.h"
37
37
  #include "sax_hint.h"
38
+ #include "ox.h"
38
39
 
39
40
  typedef struct _SaxOptions {
40
41
  int symbolize;
41
42
  int convert_special;
42
43
  int smart;
44
+ SkipMode skip;
43
45
  } *SaxOptions;
44
46
 
45
47
  typedef struct _SaxDrive {
@@ -60,6 +62,7 @@ typedef struct _SaxDrive {
60
62
  #endif
61
63
  } *SaxDrive;
62
64
 
65
+ extern void ox_collapse_return(char *str);
63
66
  extern void ox_sax_parse(VALUE handler, VALUE io, SaxOptions options);
64
67
  extern void ox_sax_drive_cleanup(SaxDrive dr);
65
68
  extern void ox_sax_drive_error(SaxDrive dr, const char *msg);
@@ -146,6 +146,16 @@ sax_value_as_s(VALUE self) {
146
146
  if (dr->options.convert_special) {
147
147
  ox_sax_collapse_special(dr, dr->buf.str, dr->buf.line, dr->buf.col);
148
148
  }
149
+ switch (dr->options.skip) {
150
+ case CrSkip:
151
+ buf_collapse_return(dr->buf.str);
152
+ break;
153
+ case SpcSkip:
154
+ buf_collapse_white(dr->buf.str);
155
+ break;
156
+ default:
157
+ break;
158
+ }
149
159
  rs = rb_str_new2(dr->buf.str);
150
160
  #if HAS_ENCODING_SUPPORT
151
161
  if (0 != dr->encoding) {
@@ -1,4 +1,4 @@
1
- /* sax_stack.h
1
+ /* sax_buf.h
2
2
  * Copyright (c) 2011, Peter Ohler
3
3
  * All rights reserved.
4
4
  *
@@ -195,4 +195,43 @@ buf_checkback(Buf buf, CheckPt cp) {
195
195
  return cp->c;
196
196
  }
197
197
 
198
+ static inline void
199
+ buf_collapse_return(char *str) {
200
+ char *s = str;
201
+ char *back = str;
202
+
203
+ for (; '\0' != *s; s++) {
204
+ if (back != str && '\n' == *s && '\r' == *(back - 1)) {
205
+ *(back - 1) = '\n';
206
+ } else {
207
+ *back++ = *s;
208
+ }
209
+ }
210
+ *back = '\0';
211
+ }
212
+
213
+ static inline void
214
+ buf_collapse_white(char *str) {
215
+ char *s = str;
216
+ char *back = str;
217
+
218
+ for (; '\0' != *s; s++) {
219
+ switch(*s) {
220
+ case ' ':
221
+ case '\t':
222
+ case '\f':
223
+ case '\n':
224
+ case '\r':
225
+ if (back == str || ' ' != *(back - 1)) {
226
+ *back++ = ' ';
227
+ }
228
+ break;
229
+ default:
230
+ *back++ = *s;
231
+ break;
232
+ }
233
+ }
234
+ *back = '\0';
235
+ }
236
+
198
237
  #endif /* __OX_SAX_BUF_H__ */
@@ -133,11 +133,13 @@ module Ox
133
133
  # @return [Element|Node|String|nil] the element, attribute value, or Node identifed by the name
134
134
  # @raise [NoMethodError] if no match is found
135
135
  def method_missing(id, *args, &block)
136
+ has_some = false
136
137
  ids = id.to_s
137
138
  i = args[0].to_i # will be 0 if no arg or parsing fails
138
139
  nodes.each do |n|
139
140
  if (n.is_a?(Element) || n.is_a?(Instruct)) && (n.value == id || n.value == ids)
140
141
  return n if 0 == i
142
+ has_some = true
141
143
  i -= 1
142
144
  end
143
145
  end
@@ -145,9 +147,27 @@ module Ox
145
147
  return @attributes[id] if @attributes.has_key?(id)
146
148
  return @attributes[ids] if @attributes.has_key?(ids)
147
149
  end
150
+ return nil if has_some
148
151
  raise NoMethodError.new("#{ids} not found", name)
149
152
  end
150
153
 
154
+ # @param [String|Symbol] id identifer of the attribute or method
155
+ # @param inc_all [Boolean] ignored
156
+ # @return true if the element has a member that matches the provided name.
157
+ def respond_to?(id, inc_all=false)
158
+ return true if super
159
+ id_str = id.to_s
160
+ id_sym = id.to_sym
161
+ nodes.each do |n|
162
+ return true if n.value == id_str || n.value == id_sym
163
+ end
164
+ if instance_variable_defined?(:@attributes)
165
+ return true if @attributes.has_key?(id_str)
166
+ return true if @attributes.has_key?(id_sym)
167
+ end
168
+ false
169
+ end
170
+
151
171
  # @param [Array] path array of steps in a path
152
172
  # @param [Array] found matching nodes
153
173
  def alocate(path, found)
@@ -1,5 +1,5 @@
1
1
 
2
2
  module Ox
3
3
  # Current version of the module.
4
- VERSION = '2.1.1'
4
+ VERSION = '2.1.2'
5
5
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ox
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.1
4
+ version: 2.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Peter Ohler
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-12 00:00:00.000000000 Z
11
+ date: 2014-07-17 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: "A fast XML parser and object serializer that uses only standard C lib.\n
14
14
  \ \nOptimized XML (Ox), as the name implies was written to provide speed
@@ -93,7 +93,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
93
93
  version: '0'
94
94
  requirements: []
95
95
  rubyforge_project: ox
96
- rubygems_version: 2.2.0
96
+ rubygems_version: 2.2.2
97
97
  signing_key:
98
98
  specification_version: 4
99
99
  summary: A fast XML parser and object serializer.