fastxml 0.1.91

Sign up to get free protection for your applications and to get access to all the features.
data/LICENSE ADDED
File without changes
data/README ADDED
@@ -0,0 +1,9 @@
1
+ = FastXml, a simple fast xml library using libxml and libxslt
2
+
3
+ == Overview
4
+
5
+ FastXml is:
6
+
7
+ # not standalone, it *requires libxml* and *libxslt*
8
+ # it attempts to provide the speediest xml parsing library available for ruby
9
+ # it provides an hpricot-like syntax for xml parsing and xslt processing
data/ext/Makefile ADDED
@@ -0,0 +1,153 @@
1
+
2
+ SHELL = /bin/sh
3
+
4
+ #### Start of system configuration section. ####
5
+
6
+ srcdir = .
7
+ topdir = /opt/local/lib/ruby/1.8/i686-darwin9.2.2
8
+ hdrdir = $(topdir)
9
+ VPATH = $(srcdir):$(topdir):$(hdrdir)
10
+ prefix = $(DESTDIR)/opt/local
11
+ exec_prefix = $(prefix)
12
+ sitedir = $(prefix)/lib/ruby/site_ruby
13
+ rubylibdir = $(libdir)/ruby/$(ruby_version)
14
+ docdir = $(datarootdir)/doc/$(PACKAGE)
15
+ dvidir = $(docdir)
16
+ datarootdir = $(prefix)/share
17
+ archdir = $(rubylibdir)/$(arch)
18
+ sbindir = $(exec_prefix)/sbin
19
+ psdir = $(docdir)
20
+ vendordir = $(prefix)/lib/ruby/vendor_ruby
21
+ localedir = $(datarootdir)/locale
22
+ htmldir = $(docdir)
23
+ datadir = $(datarootdir)
24
+ includedir = $(prefix)/include
25
+ infodir = $(datarootdir)/info
26
+ sysconfdir = $(prefix)/etc
27
+ mandir = $(DESTDIR)/opt/local/share/man
28
+ libdir = $(exec_prefix)/lib
29
+ sharedstatedir = $(prefix)/com
30
+ oldincludedir = $(DESTDIR)/usr/include
31
+ pdfdir = $(docdir)
32
+ sitearchdir = $(sitelibdir)/$(sitearch)
33
+ vendorarchdir = $(vendorlibdir)/$(vendorarch)
34
+ bindir = $(exec_prefix)/bin
35
+ localstatedir = $(prefix)/var
36
+ vendorlibdir = $(vendordir)/$(ruby_version)
37
+ sitelibdir = $(sitedir)/$(ruby_version)
38
+ libexecdir = $(exec_prefix)/libexec
39
+
40
+ CC = /usr/bin/gcc-4.0
41
+ LIBRUBY = $(LIBRUBY_SO)
42
+ LIBRUBY_A = lib$(RUBY_SO_NAME)-static.a
43
+ LIBRUBYARG_SHARED = -l$(RUBY_SO_NAME)
44
+ LIBRUBYARG_STATIC = -l$(RUBY_SO_NAME)-static
45
+
46
+ RUBY_EXTCONF_H =
47
+ CFLAGS = -fno-common -O2 -fno-common -pipe -fno-common -Wall -I/opt/local/include/libxml2
48
+ INCFLAGS = -I. -I. -I/opt/local/lib/ruby/1.8/i686-darwin9.2.2 -I. -I/usr/include/libxml2
49
+ CPPFLAGS = -I/opt/local/include
50
+ CXXFLAGS = $(CFLAGS)
51
+ DLDFLAGS = -L. -L/opt/local/lib -L/opt/local/lib -lxml2 -lz -lpthread -liconv -lm
52
+ LDSHARED = cc -dynamic -bundle -undefined suppress -flat_namespace
53
+ AR = ar
54
+ EXEEXT =
55
+
56
+ RUBY_INSTALL_NAME = ruby
57
+ RUBY_SO_NAME = ruby
58
+ arch = i686-darwin9.2.2
59
+ sitearch = i686-darwin9.2.2
60
+ vendorarch = i686-darwin9.2.2
61
+ ruby_version = 1.8
62
+ ruby = /opt/local/bin/ruby
63
+ RUBY = $(ruby)
64
+ RM = rm -f
65
+ MAKEDIRS = mkdir -p
66
+ INSTALL = /usr/bin/install
67
+ INSTALL_PROG = $(INSTALL) -m 0755
68
+ INSTALL_DATA = $(INSTALL) -m 644
69
+ COPY = cp
70
+
71
+ #### End of system configuration section. ####
72
+
73
+ preload =
74
+
75
+ libpath = . $(libdir)
76
+ LIBPATH = -L"." -L"$(libdir)"
77
+ DEFFILE =
78
+
79
+ CLEANFILES = mkmf.log
80
+ DISTCLEANFILES =
81
+
82
+ extout =
83
+ extout_prefix =
84
+ target_prefix =
85
+ LOCAL_LIBS =
86
+ LIBS = $(LIBRUBYARG_SHARED) -lxslt -lxslt -lxml2 -lxml2 -lpthread -ldl -lobjc
87
+ SRCS = fastxml.c fastxml_attrlist.c fastxml_doc.c fastxml_node.c fastxml_nodelist.c
88
+ OBJS = fastxml.o fastxml_attrlist.o fastxml_doc.o fastxml_node.o fastxml_nodelist.o
89
+ TARGET = fastxml
90
+ DLLIB = $(TARGET).bundle
91
+ EXTSTATIC =
92
+ STATIC_LIB =
93
+
94
+ RUBYCOMMONDIR = $(sitedir)$(target_prefix)
95
+ RUBYLIBDIR = $(sitelibdir)$(target_prefix)
96
+ RUBYARCHDIR = $(sitearchdir)$(target_prefix)
97
+
98
+ TARGET_SO = $(DLLIB)
99
+ CLEANLIBS = $(TARGET).bundle $(TARGET).il? $(TARGET).tds $(TARGET).map
100
+ CLEANOBJS = *.o *.a *.s[ol] *.pdb *.exp *.bak
101
+
102
+ all: $(DLLIB)
103
+ static: $(STATIC_LIB)
104
+
105
+ clean:
106
+ @-$(RM) $(CLEANLIBS) $(CLEANOBJS) $(CLEANFILES)
107
+
108
+ distclean: clean
109
+ @-$(RM) Makefile $(RUBY_EXTCONF_H) conftest.* mkmf.log
110
+ @-$(RM) core ruby$(EXEEXT) *~ $(DISTCLEANFILES)
111
+
112
+ realclean: distclean
113
+ install: install-so install-rb
114
+
115
+ install-so: $(RUBYARCHDIR)
116
+ install-so: $(RUBYARCHDIR)/$(DLLIB)
117
+ $(RUBYARCHDIR)/$(DLLIB): $(DLLIB)
118
+ $(INSTALL_PROG) $(DLLIB) $(RUBYARCHDIR)
119
+ install-rb: pre-install-rb install-rb-default
120
+ install-rb-default: pre-install-rb-default
121
+ pre-install-rb: Makefile
122
+ pre-install-rb-default: Makefile
123
+ $(RUBYARCHDIR):
124
+ $(MAKEDIRS) $@
125
+
126
+ site-install: site-install-so site-install-rb
127
+ site-install-so: install-so
128
+ site-install-rb: install-rb
129
+
130
+ .SUFFIXES: .c .m .cc .cxx .cpp .C .o
131
+
132
+ .cc.o:
133
+ $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
134
+
135
+ .cxx.o:
136
+ $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
137
+
138
+ .cpp.o:
139
+ $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
140
+
141
+ .C.o:
142
+ $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
143
+
144
+ .c.o:
145
+ $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) -c $<
146
+
147
+ $(DLLIB): $(OBJS)
148
+ @-$(RM) $@
149
+ $(LDSHARED) -o $@ $(OBJS) $(LIBPATH) $(DLDFLAGS) $(LOCAL_LIBS) $(LIBS)
150
+
151
+
152
+
153
+ $(OBJS): ruby.h defines.h
data/ext/extconf.rb ADDED
@@ -0,0 +1,13 @@
1
+ require 'mkmf'
2
+ ext_name = 'fastxml'
3
+ dir_config ext_name
4
+ find_header( 'libxml/tree.h', "/usr/include/libxml2", "/usr/local/include/libxml2", "/opt/local/include/libxml2" )
5
+ find_header( 'libxslt/xslt.h', "/usr/include/libxslt", "/usr/local/include/libxslt", "/opt/local/include/libxslt" )
6
+ find_library( 'xml2', "xmlInitParser", "/usr/lib", "/usr/local/lib", "/opt/local/lib" )
7
+ have_library( 'xml2', 'xmlInitParser', 'libxml/parser.h' )
8
+ find_library( 'xslt', "xmlInitParser", "/usr/lib", "/usr/local/lib", "/opt/local/lib" )
9
+ have_library( 'xslt', 'xsltParseStylesheetFile', 'libxslt/xslt.h' )
10
+ $LDFLAGS << ' %s' % (`xml2-config --libs`).chomp
11
+ $CFLAGS << ' -Wall %s' % (`xml2-config --cflags`).chomp
12
+
13
+ create_makefile ext_name
Binary file
data/ext/fastxml.c ADDED
@@ -0,0 +1,280 @@
1
+ /*
2
+ * $Id$
3
+ */
4
+ #define fastxml_c
5
+ #include "fastxml.h"
6
+ #include "fastxml_node.h"
7
+ #include "fastxml_doc.h"
8
+ #include "fastxml_nodelist.h"
9
+ #include "fastxml_attrlist.h"
10
+
11
+ VALUE rb_cFastXmlDoc;
12
+ VALUE rb_cFastXmlNode;
13
+ VALUE rb_cFastXmlNodeList;
14
+ VALUE rb_cFastXmlAttrList;
15
+ VALUE rb_sValidateDtd;
16
+ VALUE rb_sForgivingParse;
17
+ VALUE rb_sHtmlParse;
18
+ ID s_readlines;
19
+ ID s_to_s;
20
+
21
+ void Init_fastxml()
22
+ {
23
+ if (xmlHasFeature(XML_WITH_TREE) == 0)
24
+ rb_raise( rb_eRuntimeError, "libxml not built with tree support" );
25
+
26
+ if (xmlHasFeature(XML_WITH_XPATH) == 0)
27
+ rb_raise( rb_eRuntimeError, "libxml not built with xpath support" );
28
+
29
+ s_readlines = rb_intern("readlines");
30
+ s_to_s = rb_intern("to_s");
31
+
32
+ xmlInitParser();
33
+ xmlXPathInit();
34
+ VALUE rb_mFastXml = rb_define_module( "FastXml" );
35
+ rb_define_const( rb_mFastXml, "LIBXML_VERSION", rb_str_new2( LIBXML_DOTTED_VERSION ) );
36
+ rb_cFastXmlDoc = rb_define_class_under( rb_mFastXml, "Doc", rb_cObject );
37
+ rb_cFastXmlNode = rb_define_class_under( rb_mFastXml, "Node", rb_cObject );
38
+ rb_cFastXmlNodeList = rb_define_class_under( rb_mFastXml, "NodeList", rb_cObject );
39
+ rb_cFastXmlAttrList = rb_define_class_under( rb_mFastXml, "AttrList", rb_cObject );
40
+
41
+ /* setting symbols */
42
+ rb_sValidateDtd = ID2SYM( rb_intern("validate") );
43
+ rb_sForgivingParse = ID2SYM( rb_intern("forgiving") );
44
+ rb_sHtmlParse = ID2SYM( rb_intern("html") );
45
+
46
+ /* Doc */
47
+ rb_define_method( rb_cFastXmlDoc, "initialize", fastxml_doc_initialize, -1 );
48
+ rb_define_method( rb_cFastXmlDoc, "search", fastxml_doc_search, 1 );
49
+ rb_define_method( rb_cFastXmlDoc, "to_s", fastxml_doc_to_s, 0 );
50
+ rb_define_method( rb_cFastXmlDoc, "root", fastxml_doc_root, 0 );
51
+ rb_define_method( rb_cFastXmlDoc, "transform", fastxml_doc_transform, 1 );
52
+ rb_define_method( rb_cFastXmlDoc, "stylesheet=", fastxml_doc_stylesheet_set, 1 );
53
+ rb_define_method( rb_cFastXmlDoc, "stylesheet", fastxml_doc_stylesheet, 0 );
54
+ rb_define_method( rb_cFastXmlDoc, "children", fastxml_doc_children, 0 );
55
+ rb_define_method( rb_cFastXmlDoc, "inspect", fastxml_doc_inspect, 0 );
56
+
57
+ /* Node */
58
+ rb_define_method( rb_cFastXmlNode, "initialize", fastxml_node_initialize, 0 );
59
+ rb_define_method( rb_cFastXmlNode, "search", fastxml_node_search, 1 );
60
+ rb_define_method( rb_cFastXmlNode, "to_s", fastxml_node_to_s, 0 );
61
+ rb_define_method( rb_cFastXmlNode, "name", fastxml_node_name, 0 );
62
+ rb_define_method( rb_cFastXmlNode, "content", fastxml_node_value, 0 );
63
+ rb_define_method( rb_cFastXmlNode, "content=", fastxml_node_value_set, 1 );
64
+ rb_define_method( rb_cFastXmlNode, "inner_xml", fastxml_node_innerxml, 0 );
65
+ rb_define_method( rb_cFastXmlNode, "xpath", fastxml_node_xpath, 0 );
66
+ rb_define_method( rb_cFastXmlNode, "attr", fastxml_node_attr, 0 );
67
+ rb_define_method( rb_cFastXmlNode, "children", fastxml_node_children, 0 );
68
+ rb_define_method( rb_cFastXmlNode, "next", fastxml_node_next, 0 );
69
+ rb_define_method( rb_cFastXmlNode, "prev", fastxml_node_prev, 0 );
70
+ rb_define_method( rb_cFastXmlNode, "parent", fastxml_node_parent, 0 );
71
+ rb_define_method( rb_cFastXmlNode, "inspect", fastxml_node_inspect, 0 );
72
+
73
+ /* NodeList */
74
+ rb_include_module( rb_cFastXmlNodeList, rb_mEnumerable );
75
+ rb_define_method( rb_cFastXmlNodeList, "initialize", fastxml_nodelist_initialize, 0 );
76
+ rb_define_method( rb_cFastXmlNodeList, "length", fastxml_nodelist_length, 0 );
77
+ rb_define_method( rb_cFastXmlNodeList, "each", fastxml_nodelist_each, 0 );
78
+ rb_define_method( rb_cFastXmlNodeList, "entry", fastxml_nodelist_entry, 1 );
79
+ rb_define_method( rb_cFastXmlNodeList, "to_ary", fastxml_nodelist_entry, 0 );
80
+
81
+ /* AttrList */
82
+ rb_include_module( rb_cFastXmlAttrList, rb_mEnumerable );
83
+ rb_define_method( rb_cFastXmlAttrList, "initialize", fastxml_attrlist_initialize, 0 );
84
+ rb_define_method( rb_cFastXmlAttrList, "[]", fastxml_attrlist_indexer, 1 );
85
+ rb_define_method( rb_cFastXmlAttrList, "[]=", fastxml_attrlist_indexer_set, 2 );
86
+
87
+ rb_require( "lib/fastxml_lib" );
88
+
89
+ }
90
+
91
+
92
+
93
+
94
+ void fastxml_data_mark( fxml_data_t *data )
95
+ {
96
+ /* do nothing */
97
+ }
98
+
99
+ void fastxml_data_free( fxml_data_t *data )
100
+ {
101
+ if (data != NULL)
102
+ {
103
+ if (data->xpath_obj != NULL)
104
+ xmlXPathFreeObject( data->xpath_obj );
105
+
106
+ if (data->doc != NULL && data->node == NULL && data->list == NULL && data->xpath_obj == NULL)
107
+ xmlFreeDoc( data->doc );
108
+
109
+ // the doc free will cleanup the nodes
110
+
111
+ data->xpath_obj = NULL;
112
+ data->list = NULL;
113
+ data->doc = NULL;
114
+ data->node = NULL;
115
+ free( data );
116
+ }
117
+ data = NULL;
118
+ }
119
+
120
+ VALUE fastxml_data_alloc( VALUE klass )
121
+ {
122
+ return Qnil;
123
+ }
124
+
125
+ VALUE fastxml_raw_node_to_my_obj(xmlNodePtr cur, fxml_data_t *chld)
126
+ {
127
+ VALUE dv_chld, new_tmp;
128
+ chld->node = cur;
129
+ chld->doc = cur->doc;
130
+
131
+ new_tmp = rb_class_new_instance( 0, 0, rb_cFastXmlNode );
132
+ dv_chld = Data_Wrap_Struct( rb_cObject, fastxml_data_mark, fastxml_data_free, chld );
133
+ rb_iv_set( new_tmp, "@lxml_doc", dv_chld );
134
+
135
+ return new_tmp;
136
+ }
137
+
138
+ VALUE fastxml_raw_node_to_obj(xmlNodePtr cur)
139
+ {
140
+ fxml_data_t *chld = ALLOC(fxml_data_t);
141
+ memset( chld, 0, sizeof(fxml_data_t) );
142
+ return fastxml_raw_node_to_my_obj( cur, chld );
143
+ }
144
+
145
+ VALUE fastxml_nodelist_to_obj(xmlNodePtr root, int len)
146
+ {
147
+ VALUE ret, dv_chld;
148
+ xmlNodePtr cur = root;
149
+ fxml_data_t *ndlst = ALLOC(fxml_data_t);
150
+ memset( ndlst, 0, sizeof(fxml_data_t) );
151
+
152
+ ndlst->list_len = len;
153
+ ndlst->list = cur;
154
+ ret = rb_class_new_instance( 0, 0, rb_cFastXmlNodeList );
155
+ dv_chld = Data_Wrap_Struct( rb_cObject, fastxml_data_mark, fastxml_data_free, ndlst );
156
+ rb_iv_set( ret, "@lxml_doc", dv_chld );
157
+
158
+ return ret;
159
+ }
160
+
161
+ VALUE fastxml_nodeset_to_obj(xmlXPathObjectPtr raw_xpath_obj, fxml_data_t *data)
162
+ {
163
+ VALUE ret, dv_chld;
164
+ fxml_data_t *ndlst = ALLOC(fxml_data_t);
165
+ memset( ndlst, 0, sizeof(fxml_data_t) );
166
+
167
+ ndlst->xpath_obj = raw_xpath_obj;
168
+ ndlst->list_len = raw_xpath_obj->nodesetval->nodeNr;
169
+ ret = rb_class_new_instance( 0, 0, rb_cFastXmlNodeList );
170
+ dv_chld = Data_Wrap_Struct( rb_cObject, fastxml_data_mark, fastxml_data_free, ndlst );
171
+ rb_iv_set( ret, "@lxml_doc", dv_chld );
172
+
173
+ return ret;
174
+ }
175
+
176
+ VALUE munge_xpath_namespace( VALUE orig_expr, xmlChar *root_ns )
177
+ {
178
+ VALUE path_bits = rb_str_split( orig_expr, "/" );
179
+ VALUE ns_prefix = rb_str_new2( (const char*)root_ns );
180
+ VALUE ns_indic = rb_str_new2( ":" );
181
+ VALUE slash = rb_str_new2( "/" );
182
+ VALUE path_bit, str_idx;
183
+ VALUE ret_ary = rb_ary_new();
184
+ long i;
185
+
186
+ rb_str_append( ns_prefix, ns_indic );
187
+ for (i=0; i<RARRAY(path_bits)->len; i++) {
188
+ path_bit = RARRAY(path_bits)->ptr[i];
189
+
190
+ if (RSTRING(path_bit)->len > 0) {
191
+ str_idx = rb_funcall( path_bit, rb_intern( "index" ), 1, ns_indic );
192
+ if (str_idx == Qnil || str_idx == Qfalse) // didn't find the :, so it looks like we don't have a namespace
193
+ path_bit = rb_str_plus( ns_prefix, path_bit );
194
+ }
195
+
196
+ rb_ary_push( ret_ary, path_bit );
197
+ }
198
+
199
+ return rb_ary_join( ret_ary, slash );
200
+ }
201
+
202
+ VALUE fastxml_xpath_search(VALUE self, VALUE raw_xpath, VALUE blk)
203
+ {
204
+ VALUE ret, dv, xpath_s;
205
+ xmlXPathCompExprPtr xpath_xpr;
206
+ xmlXPathContextPtr xpath_ctx;
207
+ xmlXPathObjectPtr xpath_obj;
208
+ fxml_data_t *data;
209
+ xmlChar *xpath_expr;
210
+ xmlNodePtr root = NULL;
211
+ xmlNsPtr *ns_list = NULL;
212
+ xmlNsPtr *cur_ns = NULL;
213
+ xmlChar *root_ns = NULL;
214
+ int ns_cnt = 0;
215
+
216
+ if (NIL_P(raw_xpath))
217
+ rb_raise(rb_eArgError, "nil passed as xpath");
218
+
219
+ dv = rb_iv_get( self, "@lxml_doc" );
220
+ Data_Get_Struct( dv, fxml_data_t, data );
221
+
222
+ xpath_ctx = xmlXPathNewContext( data->doc );
223
+ if (xpath_ctx == NULL)
224
+ rb_raise( rb_eRuntimeError, "unable to create xpath context" );
225
+
226
+ root = data->node;
227
+ if (root == NULL)
228
+ root = xmlDocGetRootElement( data->doc );
229
+
230
+ xpath_ctx->node = root;
231
+ cur_ns = ns_list = xmlGetNsList( data->doc, root );
232
+ while (cur_ns != NULL && (*cur_ns) != NULL) {
233
+ xmlXPathRegisterNs( xpath_ctx, (*cur_ns)->prefix, (*cur_ns)->href );
234
+ cur_ns++;
235
+ }
236
+
237
+ if (ns_list != NULL) {
238
+ xpath_ctx->namespaces = ns_list;
239
+ xpath_ctx->nsNr = ns_cnt;
240
+ }
241
+
242
+ xpath_s = rb_obj_as_string( raw_xpath );
243
+ if (root->ns != NULL) { // we have a base namespace, this is going to get "interesting"
244
+ root_ns = (xmlChar*)root->ns->prefix;
245
+ if (root_ns == NULL)
246
+ root_ns = (xmlChar*)"myFunkyLittleRootNsNotToBeUseByAnyoneElseIHope";
247
+ // alternatives? how do other xpath processors handle root/default namespaces?
248
+
249
+ xmlXPathRegisterNs( xpath_ctx, root_ns, root->ns->href );
250
+ // need to update the xpath expression
251
+ xpath_s = munge_xpath_namespace( xpath_s, root_ns );
252
+ xpath_ctx->nsNr++;
253
+ }
254
+
255
+ xpath_expr = (xmlChar*)RSTRING(xpath_s)->ptr;
256
+ xpath_xpr = xmlXPathCompile( xpath_expr );
257
+ if (xpath_xpr == NULL) {
258
+ xmlXPathFreeContext( xpath_ctx );
259
+ xmlFree( ns_list );
260
+ rb_raise( rb_eRuntimeError, "unable to evaluate xpath expression" );
261
+ }
262
+
263
+ xpath_obj = xmlXPathCompiledEval( xpath_xpr, xpath_ctx );
264
+ if (xpath_obj == NULL) {
265
+ rb_raise( rb_eRuntimeError, "unable to evaluate xpath expression" );
266
+ xmlXPathFreeCompExpr( xpath_xpr );
267
+ xmlXPathFreeContext( xpath_ctx );
268
+ xmlFree( ns_list );
269
+ return Qnil;
270
+ }
271
+
272
+ ret = fastxml_nodeset_to_obj( xpath_obj, data );
273
+
274
+ xmlFree( ns_list );
275
+ xmlXPathFreeCompExpr( xpath_xpr );
276
+ xmlXPathFreeContext( xpath_ctx );
277
+
278
+ return ret;
279
+ }
280
+