ox 2.1.1 → 2.1.2
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of ox might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/README.md +8 -1
- data/ext/ox/ox.c +54 -2
- data/ext/ox/ox.h +7 -0
- data/ext/ox/parse.c +37 -1
- data/ext/ox/sax.c +11 -4
- data/ext/ox/sax.h +3 -0
- data/ext/ox/sax_as.c +10 -0
- data/ext/ox/sax_buf.h +40 -1
- data/lib/ox/element.rb +20 -0
- data/lib/ox/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6e77323356f912ddf4b3db190f9b8224a084a361
|
4
|
+
data.tar.gz: 6f8d0822eb29ca69f431f3a5cc40b13634bf44a4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d242bc1b433db4811d0ef565dca6b3429f48589f73f60e1285566824d7d0c916339112b3bcf1fb079515069e3cdd4cd03b27e9aa86572e2a243808dfa43ad78a
|
7
|
+
data.tar.gz: d3040cd0a7146bad8bead518413dc15e1fd863ce4cf697cd1326758134bf0babae038e9a5daaf36388f8f99aab0e36e23f30fd487e1511f2257f8a97b1ae944e
|
data/README.md
CHANGED
@@ -24,7 +24,7 @@ A fast XML parser and Object marshaller as a Ruby gem.
|
|
24
24
|
|
25
25
|
## Links of Interest
|
26
26
|
|
27
|
-
[Ruby XML Gem Comparison](http://www.ohler.com/dev/xml_with_ruby/xml_with_ruby.html) for a
|
27
|
+
[Ruby XML Gem Comparison](http://www.ohler.com/dev/xml_with_ruby/xml_with_ruby.html) for a performance comparison between Ox, Nokogiri, and LibXML.
|
28
28
|
|
29
29
|
[Fast Ruby XML Serialization](http://www.ohler.com/dev/ruby_object_xml_serialization/ruby_object_xml_serialization.html) to see how Ox can be used as a faster replacement for Marshal.
|
30
30
|
|
@@ -34,6 +34,13 @@ A fast XML parser and Object marshaller as a Ruby gem.
|
|
34
34
|
|
35
35
|
## Release Notes
|
36
36
|
|
37
|
+
### Release 2.1.2
|
38
|
+
|
39
|
+
- Added skip option to parsing. This allows white space to be collapsed in two
|
40
|
+
different ways.
|
41
|
+
|
42
|
+
- Added respond_to? method for easy access method checking.
|
43
|
+
|
37
44
|
### Release 2.1.1
|
38
45
|
|
39
46
|
- Worked around a module reset and clear that occurs on some Rubies.
|
data/ext/ox/ox.c
CHANGED
@@ -132,8 +132,12 @@ static VALUE mode_sym;
|
|
132
132
|
static VALUE object_sym;
|
133
133
|
static VALUE opt_format_sym;
|
134
134
|
static VALUE optimized_sym;
|
135
|
-
static VALUE
|
135
|
+
static VALUE skip_none_sym;
|
136
|
+
static VALUE skip_return_sym;
|
137
|
+
static VALUE skip_sym;
|
138
|
+
static VALUE skip_white_sym;
|
136
139
|
static VALUE smart_sym;
|
140
|
+
static VALUE strict_sym;
|
137
141
|
static VALUE symbolize_keys_sym;
|
138
142
|
static VALUE symbolize_sym;
|
139
143
|
static VALUE tolerant_sym;
|
@@ -163,6 +167,7 @@ struct _Options ox_default_options = {
|
|
163
167
|
NoMode, /* mode */
|
164
168
|
StrictEffort, /* effort */
|
165
169
|
Yes, /* sym_keys */
|
170
|
+
NoSkip, /* skip */
|
166
171
|
#if HAS_PRIVATE_ENCODING
|
167
172
|
Qnil /* rb_enc */
|
168
173
|
#else
|
@@ -240,6 +245,7 @@ defuse_bom(char *xml, Options options) {
|
|
240
245
|
* - mode: [:object|:generic|:limited|nil] load method to use for XML
|
241
246
|
* - effort: [:strict|:tolerant|:auto_define] set the tolerance level for loading
|
242
247
|
* - symbolize_keys: [true|false|nil] symbolize element attribute keys or leave as Strings
|
248
|
+
* - skip: [:skip_none|:skip_return|:skip_white] determines how to handle white space in text
|
243
249
|
* @return [Hash] all current option settings.
|
244
250
|
*
|
245
251
|
* Note that an indent of less than zero will result in a tight one line output
|
@@ -273,6 +279,12 @@ get_def_opts(VALUE self) {
|
|
273
279
|
case NoEffort:
|
274
280
|
default: rb_hash_aset(opts, effort_sym, Qnil); break;
|
275
281
|
}
|
282
|
+
switch (ox_default_options.skip) {
|
283
|
+
case NoSkip: rb_hash_aset(opts, skip_sym, skip_none_sym); break;
|
284
|
+
case CrSkip: rb_hash_aset(opts, skip_sym, skip_return_sym); break;
|
285
|
+
case SpcSkip: rb_hash_aset(opts, skip_sym, skip_white_sym); break;
|
286
|
+
default: rb_hash_aset(opts, skip_sym, Qnil); break;
|
287
|
+
}
|
276
288
|
return opts;
|
277
289
|
}
|
278
290
|
|
@@ -291,6 +303,7 @@ get_def_opts(VALUE self) {
|
|
291
303
|
* @param [:object|:generic|:limited|nil] :mode load method to use for XML
|
292
304
|
* @param [:strict|:tolerant|:auto_define] :effort set the tolerance level for loading
|
293
305
|
* @param [true|false|nil] :symbolize_keys symbolize element attribute keys or leave as Strings
|
306
|
+
* @param [:skip_none|:skip_return|:skip_white] determines how to handle white space in text
|
294
307
|
* @return [nil]
|
295
308
|
*/
|
296
309
|
static VALUE
|
@@ -360,6 +373,20 @@ set_def_opts(VALUE self, VALUE opts) {
|
|
360
373
|
} else {
|
361
374
|
rb_raise(ox_parse_error_class, ":effort must be :strict, :tolerant, :auto_define, or nil.\n");
|
362
375
|
}
|
376
|
+
|
377
|
+
v = rb_hash_aref(opts, skip_sym);
|
378
|
+
if (Qnil == v) {
|
379
|
+
ox_default_options.skip = NoSkip;
|
380
|
+
} else if (skip_none_sym == v) {
|
381
|
+
ox_default_options.skip = NoSkip;
|
382
|
+
} else if (skip_return_sym == v) {
|
383
|
+
ox_default_options.skip = CrSkip;
|
384
|
+
} else if (skip_white_sym == v) {
|
385
|
+
ox_default_options.skip = SpcSkip;
|
386
|
+
} else {
|
387
|
+
rb_raise(ox_parse_error_class, ":skip must be :skip_none, :skip_return, :skip_white, or nil.\n");
|
388
|
+
}
|
389
|
+
|
363
390
|
for (o = ynos; 0 != o->attr; o++) {
|
364
391
|
v = rb_hash_lookup(opts, o->sym);
|
365
392
|
if (Qnil == v) {
|
@@ -490,6 +517,18 @@ load(char *xml, int argc, VALUE *argv, VALUE self, VALUE encoding, Err err) {
|
|
490
517
|
rb_raise(ox_parse_error_class, ":effort must be :strict, :tolerant, or :auto_define.\n");
|
491
518
|
}
|
492
519
|
}
|
520
|
+
if (Qnil != (v = rb_hash_lookup(h, skip_sym))) {
|
521
|
+
if (skip_none_sym == v) {
|
522
|
+
options.skip = NoSkip;
|
523
|
+
} else if (skip_return_sym == v) {
|
524
|
+
options.skip = CrSkip;
|
525
|
+
} else if (skip_white_sym == v) {
|
526
|
+
options.skip = SpcSkip;
|
527
|
+
} else {
|
528
|
+
rb_raise(ox_parse_error_class, ":effort must be :skip_none, :skip_return, or :skip_white.\n");
|
529
|
+
}
|
530
|
+
}
|
531
|
+
|
493
532
|
if (Qnil != (v = rb_hash_lookup(h, trace_sym))) {
|
494
533
|
Check_Type(v, T_FIXNUM);
|
495
534
|
options.trace = FIX2INT(v);
|
@@ -672,7 +711,8 @@ load_file(int argc, VALUE *argv, VALUE self) {
|
|
672
711
|
* @param [Hash] options parse options
|
673
712
|
* @param [true|false] :convert_special flag indicating special characters like < are converted
|
674
713
|
* @param [true|false] :symbolize flag indicating the parser symbolize element and attribute names
|
675
|
-
* @param [true|false] :smart flag indicating the parser
|
714
|
+
* @param [true|false] :smart flag indicating the parser uses hints if available (use with html)
|
715
|
+
* @param [:skip_return|:skip_white] :skip flag indicating the parser skips \r or collpase white space into a single space. Default (skip nothing)
|
676
716
|
*/
|
677
717
|
static VALUE
|
678
718
|
sax_parse(int argc, VALUE *argv, VALUE self) {
|
@@ -681,6 +721,7 @@ sax_parse(int argc, VALUE *argv, VALUE self) {
|
|
681
721
|
options.symbolize = 1;
|
682
722
|
options.convert_special = 0;
|
683
723
|
options.smart = 0;
|
724
|
+
options.skip = NoSkip;
|
684
725
|
|
685
726
|
if (argc < 2) {
|
686
727
|
rb_raise(ox_parse_error_class, "Wrong number of arguments to sax_parse.\n");
|
@@ -698,6 +739,13 @@ sax_parse(int argc, VALUE *argv, VALUE self) {
|
|
698
739
|
if (Qnil != (v = rb_hash_lookup(h, symbolize_sym))) {
|
699
740
|
options.symbolize = (Qtrue == v);
|
700
741
|
}
|
742
|
+
if (Qnil != (v = rb_hash_lookup(h, skip_sym))) {
|
743
|
+
if (skip_return_sym == v) {
|
744
|
+
options.skip = CrSkip;
|
745
|
+
} else if (skip_white_sym == v) {
|
746
|
+
options.skip = SpcSkip;
|
747
|
+
}
|
748
|
+
}
|
701
749
|
}
|
702
750
|
ox_sax_parse(argv[0], argv[1], &options);
|
703
751
|
|
@@ -953,6 +1001,10 @@ void Init_ox() {
|
|
953
1001
|
opt_format_sym = ID2SYM(rb_intern("opt_format")); rb_gc_register_address(&opt_format_sym);
|
954
1002
|
optimized_sym = ID2SYM(rb_intern("optimized")); rb_gc_register_address(&optimized_sym);
|
955
1003
|
ox_encoding_sym = ID2SYM(rb_intern("encoding")); rb_gc_register_address(&ox_encoding_sym);
|
1004
|
+
skip_none_sym = ID2SYM(rb_intern("skip_none")); rb_gc_register_address(&skip_none_sym);
|
1005
|
+
skip_return_sym = ID2SYM(rb_intern("skip_return")); rb_gc_register_address(&skip_return_sym);
|
1006
|
+
skip_sym = ID2SYM(rb_intern("skip")); rb_gc_register_address(&skip_sym);
|
1007
|
+
skip_white_sym = ID2SYM(rb_intern("skip_white")); rb_gc_register_address(&skip_white_sym);
|
956
1008
|
smart_sym = ID2SYM(rb_intern("smart")); rb_gc_register_address(&smart_sym);
|
957
1009
|
strict_sym = ID2SYM(rb_intern("strict")); rb_gc_register_address(&strict_sym);
|
958
1010
|
symbolize_keys_sym = ID2SYM(rb_intern("symbolize_keys")); rb_gc_register_address(&symbolize_keys_sym);
|
data/ext/ox/ox.h
CHANGED
@@ -118,6 +118,12 @@ typedef enum {
|
|
118
118
|
NoMode = 0
|
119
119
|
} LoadMode;
|
120
120
|
|
121
|
+
typedef enum {
|
122
|
+
NoSkip = 'n',
|
123
|
+
CrSkip = 'r',
|
124
|
+
SpcSkip = 's',
|
125
|
+
} SkipMode;
|
126
|
+
|
121
127
|
typedef struct _PInfo *PInfo;
|
122
128
|
|
123
129
|
typedef struct _ParseCallbacks {
|
@@ -149,6 +155,7 @@ typedef struct _Options {
|
|
149
155
|
char mode; /* LoadMode */
|
150
156
|
char effort; /* Effort */
|
151
157
|
char sym_keys; /* symbolize keys */
|
158
|
+
char skip; /* skip mode */
|
152
159
|
#if HAS_ENCODING_SUPPORT
|
153
160
|
rb_encoding *rb_enc;
|
154
161
|
#elif HAS_PRIVATE_ENCODING
|
data/ext/ox/parse.c
CHANGED
@@ -68,6 +68,21 @@ static int collapse_special(PInfo pi, char *str);
|
|
68
68
|
* all cases to parse the string.
|
69
69
|
*/
|
70
70
|
|
71
|
+
inline static int
|
72
|
+
is_white(char c) {
|
73
|
+
switch (c) {
|
74
|
+
case ' ':
|
75
|
+
case '\t':
|
76
|
+
case '\f':
|
77
|
+
case '\n':
|
78
|
+
case '\r':
|
79
|
+
return 1;
|
80
|
+
default:
|
81
|
+
return 0;
|
82
|
+
}
|
83
|
+
}
|
84
|
+
|
85
|
+
|
71
86
|
inline static void
|
72
87
|
next_non_white(PInfo pi) {
|
73
88
|
for (; 1; pi->s++) {
|
@@ -678,7 +693,28 @@ read_text(PInfo pi) {
|
|
678
693
|
return;
|
679
694
|
}
|
680
695
|
} else {
|
681
|
-
|
696
|
+
switch (pi->options->skip) {
|
697
|
+
case CrSkip:
|
698
|
+
if (buf != b && '\n' == c && '\r' == *(b - 1)) {
|
699
|
+
*(b - 1) = '\n';
|
700
|
+
} else {
|
701
|
+
*b++ = c;
|
702
|
+
}
|
703
|
+
break;
|
704
|
+
case SpcSkip:
|
705
|
+
if (is_white(c)) {
|
706
|
+
if (buf == b || ' ' != *(b - 1)) {
|
707
|
+
*b++ = ' ';
|
708
|
+
}
|
709
|
+
} else {
|
710
|
+
*b++ = c;
|
711
|
+
}
|
712
|
+
break;
|
713
|
+
case NoSkip:
|
714
|
+
default:
|
715
|
+
*b++ = c;
|
716
|
+
break;
|
717
|
+
}
|
682
718
|
}
|
683
719
|
break;
|
684
720
|
}
|
data/ext/ox/sax.c
CHANGED
@@ -907,6 +907,7 @@ read_element_end(SaxDrive dr) {
|
|
907
907
|
|
908
908
|
static char
|
909
909
|
read_text(SaxDrive dr) {
|
910
|
+
VALUE args[1];
|
910
911
|
char c;
|
911
912
|
int line = dr->buf.line;
|
912
913
|
int col = dr->buf.col - 1;
|
@@ -923,8 +924,6 @@ read_text(SaxDrive dr) {
|
|
923
924
|
*(dr->buf.tail - 1) = '\0';
|
924
925
|
}
|
925
926
|
if (dr->has.value) {
|
926
|
-
VALUE args[1];
|
927
|
-
|
928
927
|
if (dr->has.line) {
|
929
928
|
rb_ivar_set(dr->handler, ox_at_line_id, LONG2NUM(line));
|
930
929
|
}
|
@@ -934,11 +933,19 @@ read_text(SaxDrive dr) {
|
|
934
933
|
*args = dr->value_obj;
|
935
934
|
rb_funcall2(dr->handler, ox_value_id, 1, args);
|
936
935
|
} else if (dr->has.text) {
|
937
|
-
VALUE args[1];
|
938
|
-
|
939
936
|
if (dr->options.convert_special) {
|
940
937
|
ox_sax_collapse_special(dr, dr->buf.str, line, col);
|
941
938
|
}
|
939
|
+
switch (dr->options.skip) {
|
940
|
+
case CrSkip:
|
941
|
+
buf_collapse_return(dr->buf.str);
|
942
|
+
break;
|
943
|
+
case SpcSkip:
|
944
|
+
buf_collapse_white(dr->buf.str);
|
945
|
+
break;
|
946
|
+
default:
|
947
|
+
break;
|
948
|
+
}
|
942
949
|
args[0] = rb_str_new2(dr->buf.str);
|
943
950
|
#if HAS_ENCODING_SUPPORT
|
944
951
|
if (0 != dr->encoding) {
|
data/ext/ox/sax.h
CHANGED
@@ -35,11 +35,13 @@
|
|
35
35
|
#include "sax_has.h"
|
36
36
|
#include "sax_stack.h"
|
37
37
|
#include "sax_hint.h"
|
38
|
+
#include "ox.h"
|
38
39
|
|
39
40
|
typedef struct _SaxOptions {
|
40
41
|
int symbolize;
|
41
42
|
int convert_special;
|
42
43
|
int smart;
|
44
|
+
SkipMode skip;
|
43
45
|
} *SaxOptions;
|
44
46
|
|
45
47
|
typedef struct _SaxDrive {
|
@@ -60,6 +62,7 @@ typedef struct _SaxDrive {
|
|
60
62
|
#endif
|
61
63
|
} *SaxDrive;
|
62
64
|
|
65
|
+
extern void ox_collapse_return(char *str);
|
63
66
|
extern void ox_sax_parse(VALUE handler, VALUE io, SaxOptions options);
|
64
67
|
extern void ox_sax_drive_cleanup(SaxDrive dr);
|
65
68
|
extern void ox_sax_drive_error(SaxDrive dr, const char *msg);
|
data/ext/ox/sax_as.c
CHANGED
@@ -146,6 +146,16 @@ sax_value_as_s(VALUE self) {
|
|
146
146
|
if (dr->options.convert_special) {
|
147
147
|
ox_sax_collapse_special(dr, dr->buf.str, dr->buf.line, dr->buf.col);
|
148
148
|
}
|
149
|
+
switch (dr->options.skip) {
|
150
|
+
case CrSkip:
|
151
|
+
buf_collapse_return(dr->buf.str);
|
152
|
+
break;
|
153
|
+
case SpcSkip:
|
154
|
+
buf_collapse_white(dr->buf.str);
|
155
|
+
break;
|
156
|
+
default:
|
157
|
+
break;
|
158
|
+
}
|
149
159
|
rs = rb_str_new2(dr->buf.str);
|
150
160
|
#if HAS_ENCODING_SUPPORT
|
151
161
|
if (0 != dr->encoding) {
|
data/ext/ox/sax_buf.h
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
/*
|
1
|
+
/* sax_buf.h
|
2
2
|
* Copyright (c) 2011, Peter Ohler
|
3
3
|
* All rights reserved.
|
4
4
|
*
|
@@ -195,4 +195,43 @@ buf_checkback(Buf buf, CheckPt cp) {
|
|
195
195
|
return cp->c;
|
196
196
|
}
|
197
197
|
|
198
|
+
static inline void
|
199
|
+
buf_collapse_return(char *str) {
|
200
|
+
char *s = str;
|
201
|
+
char *back = str;
|
202
|
+
|
203
|
+
for (; '\0' != *s; s++) {
|
204
|
+
if (back != str && '\n' == *s && '\r' == *(back - 1)) {
|
205
|
+
*(back - 1) = '\n';
|
206
|
+
} else {
|
207
|
+
*back++ = *s;
|
208
|
+
}
|
209
|
+
}
|
210
|
+
*back = '\0';
|
211
|
+
}
|
212
|
+
|
213
|
+
static inline void
|
214
|
+
buf_collapse_white(char *str) {
|
215
|
+
char *s = str;
|
216
|
+
char *back = str;
|
217
|
+
|
218
|
+
for (; '\0' != *s; s++) {
|
219
|
+
switch(*s) {
|
220
|
+
case ' ':
|
221
|
+
case '\t':
|
222
|
+
case '\f':
|
223
|
+
case '\n':
|
224
|
+
case '\r':
|
225
|
+
if (back == str || ' ' != *(back - 1)) {
|
226
|
+
*back++ = ' ';
|
227
|
+
}
|
228
|
+
break;
|
229
|
+
default:
|
230
|
+
*back++ = *s;
|
231
|
+
break;
|
232
|
+
}
|
233
|
+
}
|
234
|
+
*back = '\0';
|
235
|
+
}
|
236
|
+
|
198
237
|
#endif /* __OX_SAX_BUF_H__ */
|
data/lib/ox/element.rb
CHANGED
@@ -133,11 +133,13 @@ module Ox
|
|
133
133
|
# @return [Element|Node|String|nil] the element, attribute value, or Node identifed by the name
|
134
134
|
# @raise [NoMethodError] if no match is found
|
135
135
|
def method_missing(id, *args, &block)
|
136
|
+
has_some = false
|
136
137
|
ids = id.to_s
|
137
138
|
i = args[0].to_i # will be 0 if no arg or parsing fails
|
138
139
|
nodes.each do |n|
|
139
140
|
if (n.is_a?(Element) || n.is_a?(Instruct)) && (n.value == id || n.value == ids)
|
140
141
|
return n if 0 == i
|
142
|
+
has_some = true
|
141
143
|
i -= 1
|
142
144
|
end
|
143
145
|
end
|
@@ -145,9 +147,27 @@ module Ox
|
|
145
147
|
return @attributes[id] if @attributes.has_key?(id)
|
146
148
|
return @attributes[ids] if @attributes.has_key?(ids)
|
147
149
|
end
|
150
|
+
return nil if has_some
|
148
151
|
raise NoMethodError.new("#{ids} not found", name)
|
149
152
|
end
|
150
153
|
|
154
|
+
# @param [String|Symbol] id identifer of the attribute or method
|
155
|
+
# @param inc_all [Boolean] ignored
|
156
|
+
# @return true if the element has a member that matches the provided name.
|
157
|
+
def respond_to?(id, inc_all=false)
|
158
|
+
return true if super
|
159
|
+
id_str = id.to_s
|
160
|
+
id_sym = id.to_sym
|
161
|
+
nodes.each do |n|
|
162
|
+
return true if n.value == id_str || n.value == id_sym
|
163
|
+
end
|
164
|
+
if instance_variable_defined?(:@attributes)
|
165
|
+
return true if @attributes.has_key?(id_str)
|
166
|
+
return true if @attributes.has_key?(id_sym)
|
167
|
+
end
|
168
|
+
false
|
169
|
+
end
|
170
|
+
|
151
171
|
# @param [Array] path array of steps in a path
|
152
172
|
# @param [Array] found matching nodes
|
153
173
|
def alocate(path, found)
|
data/lib/ox/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ox
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.1.
|
4
|
+
version: 2.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Peter Ohler
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-
|
11
|
+
date: 2014-07-17 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: "A fast XML parser and object serializer that uses only standard C lib.\n
|
14
14
|
\ \nOptimized XML (Ox), as the name implies was written to provide speed
|
@@ -93,7 +93,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
93
93
|
version: '0'
|
94
94
|
requirements: []
|
95
95
|
rubyforge_project: ox
|
96
|
-
rubygems_version: 2.2.
|
96
|
+
rubygems_version: 2.2.2
|
97
97
|
signing_key:
|
98
98
|
specification_version: 4
|
99
99
|
summary: A fast XML parser and object serializer.
|