ox 1.8.9 → 1.9.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of ox might be problematic. Click here for more details.
- data/README.md +7 -0
- data/ext/ox/ox.c +10 -6
- data/ext/ox/ox.h +2 -0
- data/ext/ox/sax.c +96 -0
- data/lib/ox/element.rb +20 -3
- data/lib/ox/sax.rb +5 -0
- data/lib/ox/version.rb +1 -1
- metadata +2 -2
data/README.md
CHANGED
@@ -34,6 +34,13 @@ A fast XML parser and Object marshaller as a Ruby gem.
|
|
34
34
|
|
35
35
|
## <a name="release">Release Notes</a>
|
36
36
|
|
37
|
+
### Release 1.9.0
|
38
|
+
|
39
|
+
- Added a new feature to Ox::Element.locate() that allows filtering by node Class.
|
40
|
+
|
41
|
+
- Added feature to the Sax parser. If @line is defined in the handler it is set to the line number of the xml file
|
42
|
+
before making callbacks. The same goes for @column but it is updated with the column.
|
43
|
+
|
37
44
|
### Release 1.8.9
|
38
45
|
|
39
46
|
- Fixed bug in element start and end name checking.
|
data/ext/ox/ox.c
CHANGED
@@ -48,8 +48,10 @@ void Init_ox();
|
|
48
48
|
|
49
49
|
VALUE Ox = Qnil;
|
50
50
|
|
51
|
+
ID ox_at_column_id;
|
51
52
|
ID ox_at_content_id;
|
52
53
|
ID ox_at_id;
|
54
|
+
ID ox_at_line_id;
|
53
55
|
ID ox_at_value_id;
|
54
56
|
ID ox_attr_id;
|
55
57
|
ID ox_attr_value_id;
|
@@ -85,8 +87,8 @@ ID ox_text_id;
|
|
85
87
|
ID ox_to_c_id;
|
86
88
|
ID ox_to_s_id;
|
87
89
|
ID ox_to_sym_id;
|
88
|
-
ID ox_tv_sec_id;
|
89
90
|
ID ox_tv_nsec_id;
|
91
|
+
ID ox_tv_sec_id;
|
90
92
|
ID ox_tv_usec_id;
|
91
93
|
ID ox_value_id;
|
92
94
|
|
@@ -804,7 +806,10 @@ void Init_ox() {
|
|
804
806
|
rb_require("date");
|
805
807
|
rb_require("stringio");
|
806
808
|
|
809
|
+
ox_at_column_id = rb_intern("@column");
|
810
|
+
ox_at_content_id = rb_intern("@content");
|
807
811
|
ox_at_id = rb_intern("at");
|
812
|
+
ox_at_line_id = rb_intern("@line");
|
808
813
|
ox_at_value_id = rb_intern("@value");
|
809
814
|
ox_attr_id = rb_intern("attr");
|
810
815
|
ox_attr_value_id = rb_intern("attr_value");
|
@@ -812,15 +817,14 @@ void Init_ox() {
|
|
812
817
|
ox_beg_id = rb_intern("@beg");
|
813
818
|
ox_cdata_id = rb_intern("cdata");
|
814
819
|
ox_comment_id = rb_intern("comment");
|
815
|
-
ox_at_content_id = rb_intern("@content");
|
816
820
|
ox_den_id = rb_intern("@den");
|
817
821
|
ox_doctype_id = rb_intern("doctype");
|
818
|
-
ox_external_encoding_id = rb_intern("external_encoding");
|
819
822
|
ox_end_element_id = rb_intern("end_element");
|
820
823
|
ox_end_id = rb_intern("@end");
|
821
824
|
ox_end_instruct_id = rb_intern("end_instruct");
|
822
825
|
ox_error_id = rb_intern("error");
|
823
826
|
ox_excl_id = rb_intern("@excl");
|
827
|
+
ox_external_encoding_id = rb_intern("external_encoding");
|
824
828
|
ox_fileno_id = rb_intern("fileno");
|
825
829
|
ox_force_encoding_id = rb_intern("force_encoding");
|
826
830
|
ox_inspect_id = rb_intern("inspect");
|
@@ -833,18 +837,18 @@ void Init_ox() {
|
|
833
837
|
ox_nodes_id = rb_intern("@nodes");
|
834
838
|
ox_num_id = rb_intern("@num");
|
835
839
|
ox_parse_id = rb_intern("parse");
|
836
|
-
ox_readpartial_id = rb_intern("readpartial");
|
837
840
|
ox_read_id = rb_intern("read");
|
841
|
+
ox_readpartial_id = rb_intern("readpartial");
|
838
842
|
ox_start_element_id = rb_intern("start_element");
|
839
843
|
ox_string_id = rb_intern("string");
|
840
844
|
ox_text_id = rb_intern("text");
|
841
|
-
ox_value_id = rb_intern("value");
|
842
845
|
ox_to_c_id = rb_intern("to_c");
|
843
846
|
ox_to_s_id = rb_intern("to_s");
|
844
847
|
ox_to_sym_id = rb_intern("to_sym");
|
845
|
-
ox_tv_sec_id = rb_intern("tv_sec");
|
846
848
|
ox_tv_nsec_id = rb_intern("tv_nsec");
|
849
|
+
ox_tv_sec_id = rb_intern("tv_sec");
|
847
850
|
ox_tv_usec_id = rb_intern("tv_usec");
|
851
|
+
ox_value_id = rb_intern("value");
|
848
852
|
|
849
853
|
ox_time_class = rb_const_get(rb_cObject, rb_intern("Time"));
|
850
854
|
ox_date_class = rb_const_get(rb_cObject, rb_intern("Date"));
|
data/ext/ox/ox.h
CHANGED
@@ -222,8 +222,10 @@ extern struct _Options ox_default_options;
|
|
222
222
|
|
223
223
|
extern VALUE Ox;
|
224
224
|
|
225
|
+
extern ID ox_at_column_id;
|
225
226
|
extern ID ox_at_content_id;
|
226
227
|
extern ID ox_at_id;
|
228
|
+
extern ID ox_at_line_id;
|
227
229
|
extern ID ox_at_value_id;
|
228
230
|
extern ID ox_attr_id;
|
229
231
|
extern ID ox_attr_value_id;
|
data/ext/ox/sax.c
CHANGED
@@ -72,6 +72,8 @@ typedef struct _SaxDrive {
|
|
72
72
|
int has_start_element;
|
73
73
|
int has_end_element;
|
74
74
|
int has_error;
|
75
|
+
int has_line;
|
76
|
+
int has_column;
|
75
77
|
#if HAS_ENCODING_SUPPORT
|
76
78
|
rb_encoding *encoding;
|
77
79
|
#elif HAS_PRIVATE_ENCODING
|
@@ -253,6 +255,8 @@ ox_sax_parse(VALUE handler, VALUE io, int convert) {
|
|
253
255
|
printf(" has_start_element = %s\n", dr.has_start_element ? "true" : "false");
|
254
256
|
printf(" has_end_element = %s\n", dr.has_end_element ? "true" : "false");
|
255
257
|
printf(" has_error = %s\n", dr.has_error ? "true" : "false");
|
258
|
+
printf(" has_line = %s\n", dr.has_line ? "true" : "false");
|
259
|
+
printf(" has_column = %s\n", dr.has_column ? "true" : "false");
|
256
260
|
#endif
|
257
261
|
read_children(&dr, 1);
|
258
262
|
sax_drive_cleanup(&dr);
|
@@ -338,6 +342,8 @@ sax_drive_init(SaxDrive dr, VALUE handler, VALUE io, int convert) {
|
|
338
342
|
dr->has_start_element = respond_to(handler, ox_start_element_id);
|
339
343
|
dr->has_end_element = respond_to(handler, ox_end_element_id);
|
340
344
|
dr->has_error = respond_to(handler, ox_error_id);
|
345
|
+
dr->has_line = (Qtrue == rb_ivar_defined(handler, ox_at_line_id));
|
346
|
+
dr->has_column = (Qtrue == rb_ivar_defined(handler, ox_at_column_id));
|
341
347
|
#if HAS_ENCODING_SUPPORT
|
342
348
|
if ('\0' == *ox_default_options.encoding) {
|
343
349
|
VALUE encoding;
|
@@ -424,6 +430,12 @@ sax_drive_error(SaxDrive dr, const char *msg, int critical) {
|
|
424
430
|
args[0] = rb_str_new2(msg);
|
425
431
|
args[1] = INT2FIX(dr->line);
|
426
432
|
args[2] = INT2FIX(dr->col);
|
433
|
+
if (dr->has_line) {
|
434
|
+
rb_ivar_set(dr->handler, ox_at_line_id, args[1]);
|
435
|
+
}
|
436
|
+
if (dr->has_column) {
|
437
|
+
rb_ivar_set(dr->handler, ox_at_column_id, args[2]);
|
438
|
+
}
|
427
439
|
rb_funcall2(dr->handler, ox_error_id, 3, args);
|
428
440
|
} else if (critical) {
|
429
441
|
sax_drive_cleanup(dr);
|
@@ -577,6 +589,12 @@ read_instruction(SaxDrive dr) {
|
|
577
589
|
if (dr->has_instruct) {
|
578
590
|
VALUE args[1];
|
579
591
|
|
592
|
+
if (dr->has_line) {
|
593
|
+
rb_ivar_set(dr->handler, ox_at_line_id, INT2FIX(dr->line));
|
594
|
+
}
|
595
|
+
if (dr->has_column) {
|
596
|
+
rb_ivar_set(dr->handler, ox_at_column_id, INT2FIX(dr->col));
|
597
|
+
}
|
580
598
|
args[0] = target;
|
581
599
|
rb_funcall2(dr->handler, ox_instruct_id, 1, args);
|
582
600
|
}
|
@@ -603,6 +621,12 @@ read_instruction(SaxDrive dr) {
|
|
603
621
|
rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
|
604
622
|
}
|
605
623
|
#endif
|
624
|
+
if (dr->has_line) {
|
625
|
+
rb_ivar_set(dr->handler, ox_at_line_id, INT2FIX(dr->line));
|
626
|
+
}
|
627
|
+
if (dr->has_column) {
|
628
|
+
rb_ivar_set(dr->handler, ox_at_column_id, INT2FIX(dr->col));
|
629
|
+
}
|
606
630
|
rb_funcall2(dr->handler, ox_text_id, 1, args);
|
607
631
|
}
|
608
632
|
dr->cur = cend;
|
@@ -616,6 +640,12 @@ read_instruction(SaxDrive dr) {
|
|
616
640
|
if (dr->has_end_instruct) {
|
617
641
|
VALUE args[1];
|
618
642
|
|
643
|
+
if (dr->has_line) {
|
644
|
+
rb_ivar_set(dr->handler, ox_at_line_id, INT2FIX(dr->line));
|
645
|
+
}
|
646
|
+
if (dr->has_column) {
|
647
|
+
rb_ivar_set(dr->handler, ox_at_column_id, INT2FIX(dr->col));
|
648
|
+
}
|
619
649
|
args[0] = target;
|
620
650
|
rb_funcall2(dr->handler, ox_end_instruct_id, 1, args);
|
621
651
|
}
|
@@ -641,6 +671,12 @@ read_doctype(SaxDrive dr) {
|
|
641
671
|
if (dr->has_doctype) {
|
642
672
|
VALUE args[1];
|
643
673
|
|
674
|
+
if (dr->has_line) {
|
675
|
+
rb_ivar_set(dr->handler, ox_at_line_id, INT2FIX(dr->line));
|
676
|
+
}
|
677
|
+
if (dr->has_column) {
|
678
|
+
rb_ivar_set(dr->handler, ox_at_column_id, INT2FIX(dr->col));
|
679
|
+
}
|
644
680
|
args[0] = rb_str_new2(dr->str);
|
645
681
|
rb_funcall2(dr->handler, ox_doctype_id, 1, args);
|
646
682
|
}
|
@@ -688,6 +724,12 @@ read_cdata(SaxDrive dr) {
|
|
688
724
|
rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
|
689
725
|
}
|
690
726
|
#endif
|
727
|
+
if (dr->has_line) {
|
728
|
+
rb_ivar_set(dr->handler, ox_at_line_id, INT2FIX(dr->line));
|
729
|
+
}
|
730
|
+
if (dr->has_column) {
|
731
|
+
rb_ivar_set(dr->handler, ox_at_column_id, INT2FIX(dr->col));
|
732
|
+
}
|
691
733
|
rb_funcall2(dr->handler, ox_cdata_id, 1, args);
|
692
734
|
}
|
693
735
|
dr->str = 0;
|
@@ -736,6 +778,12 @@ read_comment(SaxDrive dr) {
|
|
736
778
|
rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
|
737
779
|
}
|
738
780
|
#endif
|
781
|
+
if (dr->has_line) {
|
782
|
+
rb_ivar_set(dr->handler, ox_at_line_id, INT2FIX(dr->line));
|
783
|
+
}
|
784
|
+
if (dr->has_column) {
|
785
|
+
rb_ivar_set(dr->handler, ox_at_column_id, INT2FIX(dr->col));
|
786
|
+
}
|
739
787
|
rb_funcall2(dr->handler, ox_comment_id, 1, args);
|
740
788
|
}
|
741
789
|
dr->str = 0;
|
@@ -761,6 +809,12 @@ read_element(SaxDrive dr) {
|
|
761
809
|
if (dr->has_start_element) {
|
762
810
|
VALUE args[1];
|
763
811
|
|
812
|
+
if (dr->has_line) {
|
813
|
+
rb_ivar_set(dr->handler, ox_at_line_id, INT2FIX(dr->line));
|
814
|
+
}
|
815
|
+
if (dr->has_column) {
|
816
|
+
rb_ivar_set(dr->handler, ox_at_column_id, INT2FIX(dr->col));
|
817
|
+
}
|
764
818
|
args[0] = name;
|
765
819
|
rb_funcall2(dr->handler, ox_start_element_id, 1, args);
|
766
820
|
}
|
@@ -786,6 +840,12 @@ read_element(SaxDrive dr) {
|
|
786
840
|
if (dr->has_end_element) {
|
787
841
|
VALUE args[1];
|
788
842
|
|
843
|
+
if (dr->has_line) {
|
844
|
+
rb_ivar_set(dr->handler, ox_at_line_id, INT2FIX(dr->line));
|
845
|
+
}
|
846
|
+
if (dr->has_column) {
|
847
|
+
rb_ivar_set(dr->handler, ox_at_column_id, INT2FIX(dr->col));
|
848
|
+
}
|
789
849
|
args[0] = name;
|
790
850
|
rb_funcall2(dr->handler, ox_end_element_id, 1, args);
|
791
851
|
}
|
@@ -794,6 +854,12 @@ read_element(SaxDrive dr) {
|
|
794
854
|
return -1;
|
795
855
|
}
|
796
856
|
if (0 != ename && 0 != strcmp(ename, dr->str)) {
|
857
|
+
if (dr->has_line) {
|
858
|
+
rb_ivar_set(dr->handler, ox_at_line_id, INT2FIX(dr->line));
|
859
|
+
}
|
860
|
+
if (dr->has_column) {
|
861
|
+
rb_ivar_set(dr->handler, ox_at_column_id, INT2FIX(dr->col));
|
862
|
+
}
|
797
863
|
//printf("*** ename: %s close: %s\n", ename, dr->str);
|
798
864
|
sax_drive_error(dr, "invalid format, element start and end names do not match", 1);
|
799
865
|
return -1;
|
@@ -801,6 +867,12 @@ read_element(SaxDrive dr) {
|
|
801
867
|
if (0 != dr->has_end_element) {
|
802
868
|
VALUE args[1];
|
803
869
|
|
870
|
+
if (dr->has_line) {
|
871
|
+
rb_ivar_set(dr->handler, ox_at_line_id, INT2FIX(dr->line));
|
872
|
+
}
|
873
|
+
if (dr->has_column) {
|
874
|
+
rb_ivar_set(dr->handler, ox_at_column_id, INT2FIX(dr->col));
|
875
|
+
}
|
804
876
|
args[0] = name;
|
805
877
|
rb_funcall2(dr->handler, ox_end_element_id, 1, args);
|
806
878
|
}
|
@@ -826,6 +898,12 @@ read_text(SaxDrive dr) {
|
|
826
898
|
if (dr->has_value) {
|
827
899
|
VALUE args[1];
|
828
900
|
|
901
|
+
if (dr->has_line) {
|
902
|
+
rb_ivar_set(dr->handler, ox_at_line_id, INT2FIX(dr->line));
|
903
|
+
}
|
904
|
+
if (dr->has_column) {
|
905
|
+
rb_ivar_set(dr->handler, ox_at_column_id, INT2FIX(dr->col));
|
906
|
+
}
|
829
907
|
*args = dr->value_obj;
|
830
908
|
rb_funcall2(dr->handler, ox_value_id, 1, args);
|
831
909
|
} else if (dr->has_text) {
|
@@ -846,6 +924,12 @@ read_text(SaxDrive dr) {
|
|
846
924
|
rb_funcall(args[0], ox_force_encoding_id, 1, dr->encoding);
|
847
925
|
}
|
848
926
|
#endif
|
927
|
+
if (dr->has_line) {
|
928
|
+
rb_ivar_set(dr->handler, ox_at_line_id, INT2FIX(dr->line));
|
929
|
+
}
|
930
|
+
if (dr->has_column) {
|
931
|
+
rb_ivar_set(dr->handler, ox_at_column_id, INT2FIX(dr->col));
|
932
|
+
}
|
849
933
|
rb_funcall2(dr->handler, ox_text_id, 1, args);
|
850
934
|
}
|
851
935
|
dr->str = 0;
|
@@ -897,6 +981,12 @@ read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml) {
|
|
897
981
|
if (dr->has_attr_value) {
|
898
982
|
VALUE args[2];
|
899
983
|
|
984
|
+
if (dr->has_line) {
|
985
|
+
rb_ivar_set(dr->handler, ox_at_line_id, INT2FIX(dr->line));
|
986
|
+
}
|
987
|
+
if (dr->has_column) {
|
988
|
+
rb_ivar_set(dr->handler, ox_at_column_id, INT2FIX(dr->col));
|
989
|
+
}
|
900
990
|
args[0] = name;
|
901
991
|
args[1] = dr->value_obj;
|
902
992
|
rb_funcall2(dr->handler, ox_attr_value_id, 2, args);
|
@@ -917,6 +1007,12 @@ read_attrs(SaxDrive dr, char c, char termc, char term2, int is_xml) {
|
|
917
1007
|
rb_funcall(args[1], ox_force_encoding_id, 1, dr->encoding);
|
918
1008
|
}
|
919
1009
|
#endif
|
1010
|
+
if (dr->has_line) {
|
1011
|
+
rb_ivar_set(dr->handler, ox_at_line_id, INT2FIX(dr->line));
|
1012
|
+
}
|
1013
|
+
if (dr->has_column) {
|
1014
|
+
rb_ivar_set(dr->handler, ox_at_column_id, INT2FIX(dr->col));
|
1015
|
+
}
|
920
1016
|
rb_funcall2(dr->handler, ox_attr_id, 2, args);
|
921
1017
|
}
|
922
1018
|
c = next_non_white(dr);
|
data/lib/ox/element.rb
CHANGED
@@ -106,9 +106,9 @@ module Ox
|
|
106
106
|
# should be matched. Note that unlike XPath, the element index starts at 0
|
107
107
|
# similar to Ruby be contrary to XPath.
|
108
108
|
#
|
109
|
-
# Element names can also be wildcard characters. A * indicates any
|
110
|
-
#
|
111
|
-
#
|
109
|
+
# Element names can also be wildcard characters. A * indicates any decendent should be followed. A ? indicates any
|
110
|
+
# single Element can match the wildcard. A ^ character followed by the name of a Class will match any node of the
|
111
|
+
# specified class. Valid class names are Element, Comment, String (or Text), CData, DocType.
|
112
112
|
#
|
113
113
|
# Examples are:
|
114
114
|
# * <code>element.locate("Family/Pete/*")</code> returns all children of the Pete Element.
|
@@ -116,6 +116,7 @@ module Ox
|
|
116
116
|
# * <code>element.locate("Family/?[<3]")</code> returns the first 3 elements in the Family Element.
|
117
117
|
# * <code>element.locate("Family/?/@age")</code> returns the arg attribute for each child in the Family Element.
|
118
118
|
# * <code>element.locate("Family/*/@type")</code> returns the type attribute value for decendents of the Family.
|
119
|
+
# * <code>element.locate("Family/^Comment")</code> returns any comments that are a child of Family.
|
119
120
|
#
|
120
121
|
# @param [String] path path to the Nodes to locate
|
121
122
|
def locate(path)
|
@@ -178,6 +179,22 @@ module Ox
|
|
178
179
|
end
|
179
180
|
if '?' == name or '*' == name
|
180
181
|
match = nodes
|
182
|
+
elsif '^' == name[0..0] # 1.8.7 thinks name[0] is a fixnum
|
183
|
+
case name[1..-1]
|
184
|
+
when 'Element'
|
185
|
+
match = nodes.select { |e| e.is_a?(Element) }
|
186
|
+
when 'String', 'Text'
|
187
|
+
match = nodes.select { |e| e.is_a?(String) }
|
188
|
+
when 'Comment'
|
189
|
+
match = nodes.select { |e| e.is_a?(Comment) }
|
190
|
+
when 'CData'
|
191
|
+
match = nodes.select { |e| e.is_a?(CData) }
|
192
|
+
when 'DocType'
|
193
|
+
match = nodes.select { |e| e.is_a?(DocType) }
|
194
|
+
else
|
195
|
+
#puts "*** no match on #{name}"
|
196
|
+
match = []
|
197
|
+
end
|
181
198
|
else
|
182
199
|
match = nodes.select { |e| e.is_a?(Element) and name == e.name }
|
183
200
|
end
|
data/lib/ox/sax.rb
CHANGED
@@ -43,9 +43,14 @@ module Ox
|
|
43
43
|
# def start_element(name); end
|
44
44
|
# def end_element(name); end
|
45
45
|
#
|
46
|
+
# Initializing @line in the initializer will cause that variable to be updated before each callback with the XML line
|
47
|
+
# number. The same is true for the @column but it will be updated with the column in the XML file that is the end of
|
48
|
+
# the element or node just read. Not this is the end not the start of the node, attribute, or text.
|
46
49
|
class Sax
|
47
50
|
# Create a new instance of the Sax handler class.
|
48
51
|
def initialize()
|
52
|
+
#@line = nil
|
53
|
+
#@column = nil
|
49
54
|
end
|
50
55
|
|
51
56
|
# To make the desired methods active while parsing the desired method
|
data/lib/ox/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ox
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.9.0
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-02-
|
12
|
+
date: 2013-02-25 00:00:00.000000000 Z
|
13
13
|
dependencies: []
|
14
14
|
description: ! "A fast XML parser and object serializer that uses only standard C
|
15
15
|
lib.\n \nOptimized XML (Ox), as the name implies was written to provide
|