fastxml 0.1.91

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/LICENSE ADDED
File without changes
data/README ADDED
@@ -0,0 +1,9 @@
1
+ = FastXml, a simple fast xml library using libxml and libxslt
2
+
3
+ == Overview
4
+
5
+ FastXml is:
6
+
7
+ # not standalone, it *requires libxml* and *libxslt*
8
+ # it attempts to provide the speediest xml parsing library available for ruby
9
+ # it provides an hpricot-like syntax for xml parsing and xslt processing
data/ext/Makefile ADDED
@@ -0,0 +1,153 @@
1
+
2
+ SHELL = /bin/sh
3
+
4
+ #### Start of system configuration section. ####
5
+
6
+ srcdir = .
7
+ topdir = /opt/local/lib/ruby/1.8/i686-darwin9.2.2
8
+ hdrdir = $(topdir)
9
+ VPATH = $(srcdir):$(topdir):$(hdrdir)
10
+ prefix = $(DESTDIR)/opt/local
11
+ exec_prefix = $(prefix)
12
+ sitedir = $(prefix)/lib/ruby/site_ruby
13
+ rubylibdir = $(libdir)/ruby/$(ruby_version)
14
+ docdir = $(datarootdir)/doc/$(PACKAGE)
15
+ dvidir = $(docdir)
16
+ datarootdir = $(prefix)/share
17
+ archdir = $(rubylibdir)/$(arch)
18
+ sbindir = $(exec_prefix)/sbin
19
+ psdir = $(docdir)
20
+ vendordir = $(prefix)/lib/ruby/vendor_ruby
21
+ localedir = $(datarootdir)/locale
22
+ htmldir = $(docdir)
23
+ datadir = $(datarootdir)
24
+ includedir = $(prefix)/include
25
+ infodir = $(datarootdir)/info
26
+ sysconfdir = $(prefix)/etc
27
+ mandir = $(DESTDIR)/opt/local/share/man
28
+ libdir = $(exec_prefix)/lib
29
+ sharedstatedir = $(prefix)/com
30
+ oldincludedir = $(DESTDIR)/usr/include
31
+ pdfdir = $(docdir)
32
+ sitearchdir = $(sitelibdir)/$(sitearch)
33
+ vendorarchdir = $(vendorlibdir)/$(vendorarch)
34
+ bindir = $(exec_prefix)/bin
35
+ localstatedir = $(prefix)/var
36
+ vendorlibdir = $(vendordir)/$(ruby_version)
37
+ sitelibdir = $(sitedir)/$(ruby_version)
38
+ libexecdir = $(exec_prefix)/libexec
39
+
40
+ CC = /usr/bin/gcc-4.0
41
+ LIBRUBY = $(LIBRUBY_SO)
42
+ LIBRUBY_A = lib$(RUBY_SO_NAME)-static.a
43
+ LIBRUBYARG_SHARED = -l$(RUBY_SO_NAME)
44
+ LIBRUBYARG_STATIC = -l$(RUBY_SO_NAME)-static
45
+
46
+ RUBY_EXTCONF_H =
47
+ CFLAGS = -fno-common -O2 -fno-common -pipe -fno-common -Wall -I/opt/local/include/libxml2
48
+ INCFLAGS = -I. -I. -I/opt/local/lib/ruby/1.8/i686-darwin9.2.2 -I. -I/usr/include/libxml2
49
+ CPPFLAGS = -I/opt/local/include
50
+ CXXFLAGS = $(CFLAGS)
51
+ DLDFLAGS = -L. -L/opt/local/lib -L/opt/local/lib -lxml2 -lz -lpthread -liconv -lm
52
+ LDSHARED = cc -dynamic -bundle -undefined suppress -flat_namespace
53
+ AR = ar
54
+ EXEEXT =
55
+
56
+ RUBY_INSTALL_NAME = ruby
57
+ RUBY_SO_NAME = ruby
58
+ arch = i686-darwin9.2.2
59
+ sitearch = i686-darwin9.2.2
60
+ vendorarch = i686-darwin9.2.2
61
+ ruby_version = 1.8
62
+ ruby = /opt/local/bin/ruby
63
+ RUBY = $(ruby)
64
+ RM = rm -f
65
+ MAKEDIRS = mkdir -p
66
+ INSTALL = /usr/bin/install
67
+ INSTALL_PROG = $(INSTALL) -m 0755
68
+ INSTALL_DATA = $(INSTALL) -m 644
69
+ COPY = cp
70
+
71
+ #### End of system configuration section. ####
72
+
73
+ preload =
74
+
75
+ libpath = . $(libdir)
76
+ LIBPATH = -L"." -L"$(libdir)"
77
+ DEFFILE =
78
+
79
+ CLEANFILES = mkmf.log
80
+ DISTCLEANFILES =
81
+
82
+ extout =
83
+ extout_prefix =
84
+ target_prefix =
85
+ LOCAL_LIBS =
86
+ LIBS = $(LIBRUBYARG_SHARED) -lxslt -lxslt -lxml2 -lxml2 -lpthread -ldl -lobjc
87
+ SRCS = fastxml.c fastxml_attrlist.c fastxml_doc.c fastxml_node.c fastxml_nodelist.c
88
+ OBJS = fastxml.o fastxml_attrlist.o fastxml_doc.o fastxml_node.o fastxml_nodelist.o
89
+ TARGET = fastxml
90
+ DLLIB = $(TARGET).bundle
91
+ EXTSTATIC =
92
+ STATIC_LIB =
93
+
94
+ RUBYCOMMONDIR = $(sitedir)$(target_prefix)
95
+ RUBYLIBDIR = $(sitelibdir)$(target_prefix)
96
+ RUBYARCHDIR = $(sitearchdir)$(target_prefix)
97
+
98
+ TARGET_SO = $(DLLIB)
99
+ CLEANLIBS = $(TARGET).bundle $(TARGET).il? $(TARGET).tds $(TARGET).map
100
+ CLEANOBJS = *.o *.a *.s[ol] *.pdb *.exp *.bak
101
+
102
+ all: $(DLLIB)
103
+ static: $(STATIC_LIB)
104
+
105
+ clean:
106
+ @-$(RM) $(CLEANLIBS) $(CLEANOBJS) $(CLEANFILES)
107
+
108
+ distclean: clean
109
+ @-$(RM) Makefile $(RUBY_EXTCONF_H) conftest.* mkmf.log
110
+ @-$(RM) core ruby$(EXEEXT) *~ $(DISTCLEANFILES)
111
+
112
+ realclean: distclean
113
+ install: install-so install-rb
114
+
115
+ install-so: $(RUBYARCHDIR)
116
+ install-so: $(RUBYARCHDIR)/$(DLLIB)
117
+ $(RUBYARCHDIR)/$(DLLIB): $(DLLIB)
118
+ $(INSTALL_PROG) $(DLLIB) $(RUBYARCHDIR)
119
+ install-rb: pre-install-rb install-rb-default
120
+ install-rb-default: pre-install-rb-default
121
+ pre-install-rb: Makefile
122
+ pre-install-rb-default: Makefile
123
+ $(RUBYARCHDIR):
124
+ $(MAKEDIRS) $@
125
+
126
+ site-install: site-install-so site-install-rb
127
+ site-install-so: install-so
128
+ site-install-rb: install-rb
129
+
130
+ .SUFFIXES: .c .m .cc .cxx .cpp .C .o
131
+
132
+ .cc.o:
133
+ $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
134
+
135
+ .cxx.o:
136
+ $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
137
+
138
+ .cpp.o:
139
+ $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
140
+
141
+ .C.o:
142
+ $(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
143
+
144
+ .c.o:
145
+ $(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) -c $<
146
+
147
+ $(DLLIB): $(OBJS)
148
+ @-$(RM) $@
149
+ $(LDSHARED) -o $@ $(OBJS) $(LIBPATH) $(DLDFLAGS) $(LOCAL_LIBS) $(LIBS)
150
+
151
+
152
+
153
+ $(OBJS): ruby.h defines.h
data/ext/extconf.rb ADDED
@@ -0,0 +1,13 @@
1
+ require 'mkmf'
2
+ ext_name = 'fastxml'
3
+ dir_config ext_name
4
+ find_header( 'libxml/tree.h', "/usr/include/libxml2", "/usr/local/include/libxml2", "/opt/local/include/libxml2" )
5
+ find_header( 'libxslt/xslt.h', "/usr/include/libxslt", "/usr/local/include/libxslt", "/opt/local/include/libxslt" )
6
+ find_library( 'xml2', "xmlInitParser", "/usr/lib", "/usr/local/lib", "/opt/local/lib" )
7
+ have_library( 'xml2', 'xmlInitParser', 'libxml/parser.h' )
8
+ find_library( 'xslt', "xmlInitParser", "/usr/lib", "/usr/local/lib", "/opt/local/lib" )
9
+ have_library( 'xslt', 'xsltParseStylesheetFile', 'libxslt/xslt.h' )
10
+ $LDFLAGS << ' %s' % (`xml2-config --libs`).chomp
11
+ $CFLAGS << ' -Wall %s' % (`xml2-config --cflags`).chomp
12
+
13
+ create_makefile ext_name
Binary file
data/ext/fastxml.c ADDED
@@ -0,0 +1,280 @@
1
+ /*
2
+ * $Id$
3
+ */
4
+ #define fastxml_c
5
+ #include "fastxml.h"
6
+ #include "fastxml_node.h"
7
+ #include "fastxml_doc.h"
8
+ #include "fastxml_nodelist.h"
9
+ #include "fastxml_attrlist.h"
10
+
11
+ VALUE rb_cFastXmlDoc;
12
+ VALUE rb_cFastXmlNode;
13
+ VALUE rb_cFastXmlNodeList;
14
+ VALUE rb_cFastXmlAttrList;
15
+ VALUE rb_sValidateDtd;
16
+ VALUE rb_sForgivingParse;
17
+ VALUE rb_sHtmlParse;
18
+ ID s_readlines;
19
+ ID s_to_s;
20
+
21
+ void Init_fastxml()
22
+ {
23
+ if (xmlHasFeature(XML_WITH_TREE) == 0)
24
+ rb_raise( rb_eRuntimeError, "libxml not built with tree support" );
25
+
26
+ if (xmlHasFeature(XML_WITH_XPATH) == 0)
27
+ rb_raise( rb_eRuntimeError, "libxml not built with xpath support" );
28
+
29
+ s_readlines = rb_intern("readlines");
30
+ s_to_s = rb_intern("to_s");
31
+
32
+ xmlInitParser();
33
+ xmlXPathInit();
34
+ VALUE rb_mFastXml = rb_define_module( "FastXml" );
35
+ rb_define_const( rb_mFastXml, "LIBXML_VERSION", rb_str_new2( LIBXML_DOTTED_VERSION ) );
36
+ rb_cFastXmlDoc = rb_define_class_under( rb_mFastXml, "Doc", rb_cObject );
37
+ rb_cFastXmlNode = rb_define_class_under( rb_mFastXml, "Node", rb_cObject );
38
+ rb_cFastXmlNodeList = rb_define_class_under( rb_mFastXml, "NodeList", rb_cObject );
39
+ rb_cFastXmlAttrList = rb_define_class_under( rb_mFastXml, "AttrList", rb_cObject );
40
+
41
+ /* setting symbols */
42
+ rb_sValidateDtd = ID2SYM( rb_intern("validate") );
43
+ rb_sForgivingParse = ID2SYM( rb_intern("forgiving") );
44
+ rb_sHtmlParse = ID2SYM( rb_intern("html") );
45
+
46
+ /* Doc */
47
+ rb_define_method( rb_cFastXmlDoc, "initialize", fastxml_doc_initialize, -1 );
48
+ rb_define_method( rb_cFastXmlDoc, "search", fastxml_doc_search, 1 );
49
+ rb_define_method( rb_cFastXmlDoc, "to_s", fastxml_doc_to_s, 0 );
50
+ rb_define_method( rb_cFastXmlDoc, "root", fastxml_doc_root, 0 );
51
+ rb_define_method( rb_cFastXmlDoc, "transform", fastxml_doc_transform, 1 );
52
+ rb_define_method( rb_cFastXmlDoc, "stylesheet=", fastxml_doc_stylesheet_set, 1 );
53
+ rb_define_method( rb_cFastXmlDoc, "stylesheet", fastxml_doc_stylesheet, 0 );
54
+ rb_define_method( rb_cFastXmlDoc, "children", fastxml_doc_children, 0 );
55
+ rb_define_method( rb_cFastXmlDoc, "inspect", fastxml_doc_inspect, 0 );
56
+
57
+ /* Node */
58
+ rb_define_method( rb_cFastXmlNode, "initialize", fastxml_node_initialize, 0 );
59
+ rb_define_method( rb_cFastXmlNode, "search", fastxml_node_search, 1 );
60
+ rb_define_method( rb_cFastXmlNode, "to_s", fastxml_node_to_s, 0 );
61
+ rb_define_method( rb_cFastXmlNode, "name", fastxml_node_name, 0 );
62
+ rb_define_method( rb_cFastXmlNode, "content", fastxml_node_value, 0 );
63
+ rb_define_method( rb_cFastXmlNode, "content=", fastxml_node_value_set, 1 );
64
+ rb_define_method( rb_cFastXmlNode, "inner_xml", fastxml_node_innerxml, 0 );
65
+ rb_define_method( rb_cFastXmlNode, "xpath", fastxml_node_xpath, 0 );
66
+ rb_define_method( rb_cFastXmlNode, "attr", fastxml_node_attr, 0 );
67
+ rb_define_method( rb_cFastXmlNode, "children", fastxml_node_children, 0 );
68
+ rb_define_method( rb_cFastXmlNode, "next", fastxml_node_next, 0 );
69
+ rb_define_method( rb_cFastXmlNode, "prev", fastxml_node_prev, 0 );
70
+ rb_define_method( rb_cFastXmlNode, "parent", fastxml_node_parent, 0 );
71
+ rb_define_method( rb_cFastXmlNode, "inspect", fastxml_node_inspect, 0 );
72
+
73
+ /* NodeList */
74
+ rb_include_module( rb_cFastXmlNodeList, rb_mEnumerable );
75
+ rb_define_method( rb_cFastXmlNodeList, "initialize", fastxml_nodelist_initialize, 0 );
76
+ rb_define_method( rb_cFastXmlNodeList, "length", fastxml_nodelist_length, 0 );
77
+ rb_define_method( rb_cFastXmlNodeList, "each", fastxml_nodelist_each, 0 );
78
+ rb_define_method( rb_cFastXmlNodeList, "entry", fastxml_nodelist_entry, 1 );
79
+ rb_define_method( rb_cFastXmlNodeList, "to_ary", fastxml_nodelist_entry, 0 );
80
+
81
+ /* AttrList */
82
+ rb_include_module( rb_cFastXmlAttrList, rb_mEnumerable );
83
+ rb_define_method( rb_cFastXmlAttrList, "initialize", fastxml_attrlist_initialize, 0 );
84
+ rb_define_method( rb_cFastXmlAttrList, "[]", fastxml_attrlist_indexer, 1 );
85
+ rb_define_method( rb_cFastXmlAttrList, "[]=", fastxml_attrlist_indexer_set, 2 );
86
+
87
+ rb_require( "lib/fastxml_lib" );
88
+
89
+ }
90
+
91
+
92
+
93
+
94
+ void fastxml_data_mark( fxml_data_t *data )
95
+ {
96
+ /* do nothing */
97
+ }
98
+
99
+ void fastxml_data_free( fxml_data_t *data )
100
+ {
101
+ if (data != NULL)
102
+ {
103
+ if (data->xpath_obj != NULL)
104
+ xmlXPathFreeObject( data->xpath_obj );
105
+
106
+ if (data->doc != NULL && data->node == NULL && data->list == NULL && data->xpath_obj == NULL)
107
+ xmlFreeDoc( data->doc );
108
+
109
+ // the doc free will cleanup the nodes
110
+
111
+ data->xpath_obj = NULL;
112
+ data->list = NULL;
113
+ data->doc = NULL;
114
+ data->node = NULL;
115
+ free( data );
116
+ }
117
+ data = NULL;
118
+ }
119
+
120
+ VALUE fastxml_data_alloc( VALUE klass )
121
+ {
122
+ return Qnil;
123
+ }
124
+
125
+ VALUE fastxml_raw_node_to_my_obj(xmlNodePtr cur, fxml_data_t *chld)
126
+ {
127
+ VALUE dv_chld, new_tmp;
128
+ chld->node = cur;
129
+ chld->doc = cur->doc;
130
+
131
+ new_tmp = rb_class_new_instance( 0, 0, rb_cFastXmlNode );
132
+ dv_chld = Data_Wrap_Struct( rb_cObject, fastxml_data_mark, fastxml_data_free, chld );
133
+ rb_iv_set( new_tmp, "@lxml_doc", dv_chld );
134
+
135
+ return new_tmp;
136
+ }
137
+
138
+ VALUE fastxml_raw_node_to_obj(xmlNodePtr cur)
139
+ {
140
+ fxml_data_t *chld = ALLOC(fxml_data_t);
141
+ memset( chld, 0, sizeof(fxml_data_t) );
142
+ return fastxml_raw_node_to_my_obj( cur, chld );
143
+ }
144
+
145
+ VALUE fastxml_nodelist_to_obj(xmlNodePtr root, int len)
146
+ {
147
+ VALUE ret, dv_chld;
148
+ xmlNodePtr cur = root;
149
+ fxml_data_t *ndlst = ALLOC(fxml_data_t);
150
+ memset( ndlst, 0, sizeof(fxml_data_t) );
151
+
152
+ ndlst->list_len = len;
153
+ ndlst->list = cur;
154
+ ret = rb_class_new_instance( 0, 0, rb_cFastXmlNodeList );
155
+ dv_chld = Data_Wrap_Struct( rb_cObject, fastxml_data_mark, fastxml_data_free, ndlst );
156
+ rb_iv_set( ret, "@lxml_doc", dv_chld );
157
+
158
+ return ret;
159
+ }
160
+
161
+ VALUE fastxml_nodeset_to_obj(xmlXPathObjectPtr raw_xpath_obj, fxml_data_t *data)
162
+ {
163
+ VALUE ret, dv_chld;
164
+ fxml_data_t *ndlst = ALLOC(fxml_data_t);
165
+ memset( ndlst, 0, sizeof(fxml_data_t) );
166
+
167
+ ndlst->xpath_obj = raw_xpath_obj;
168
+ ndlst->list_len = raw_xpath_obj->nodesetval->nodeNr;
169
+ ret = rb_class_new_instance( 0, 0, rb_cFastXmlNodeList );
170
+ dv_chld = Data_Wrap_Struct( rb_cObject, fastxml_data_mark, fastxml_data_free, ndlst );
171
+ rb_iv_set( ret, "@lxml_doc", dv_chld );
172
+
173
+ return ret;
174
+ }
175
+
176
+ VALUE munge_xpath_namespace( VALUE orig_expr, xmlChar *root_ns )
177
+ {
178
+ VALUE path_bits = rb_str_split( orig_expr, "/" );
179
+ VALUE ns_prefix = rb_str_new2( (const char*)root_ns );
180
+ VALUE ns_indic = rb_str_new2( ":" );
181
+ VALUE slash = rb_str_new2( "/" );
182
+ VALUE path_bit, str_idx;
183
+ VALUE ret_ary = rb_ary_new();
184
+ long i;
185
+
186
+ rb_str_append( ns_prefix, ns_indic );
187
+ for (i=0; i<RARRAY(path_bits)->len; i++) {
188
+ path_bit = RARRAY(path_bits)->ptr[i];
189
+
190
+ if (RSTRING(path_bit)->len > 0) {
191
+ str_idx = rb_funcall( path_bit, rb_intern( "index" ), 1, ns_indic );
192
+ if (str_idx == Qnil || str_idx == Qfalse) // didn't find the :, so it looks like we don't have a namespace
193
+ path_bit = rb_str_plus( ns_prefix, path_bit );
194
+ }
195
+
196
+ rb_ary_push( ret_ary, path_bit );
197
+ }
198
+
199
+ return rb_ary_join( ret_ary, slash );
200
+ }
201
+
202
+ VALUE fastxml_xpath_search(VALUE self, VALUE raw_xpath, VALUE blk)
203
+ {
204
+ VALUE ret, dv, xpath_s;
205
+ xmlXPathCompExprPtr xpath_xpr;
206
+ xmlXPathContextPtr xpath_ctx;
207
+ xmlXPathObjectPtr xpath_obj;
208
+ fxml_data_t *data;
209
+ xmlChar *xpath_expr;
210
+ xmlNodePtr root = NULL;
211
+ xmlNsPtr *ns_list = NULL;
212
+ xmlNsPtr *cur_ns = NULL;
213
+ xmlChar *root_ns = NULL;
214
+ int ns_cnt = 0;
215
+
216
+ if (NIL_P(raw_xpath))
217
+ rb_raise(rb_eArgError, "nil passed as xpath");
218
+
219
+ dv = rb_iv_get( self, "@lxml_doc" );
220
+ Data_Get_Struct( dv, fxml_data_t, data );
221
+
222
+ xpath_ctx = xmlXPathNewContext( data->doc );
223
+ if (xpath_ctx == NULL)
224
+ rb_raise( rb_eRuntimeError, "unable to create xpath context" );
225
+
226
+ root = data->node;
227
+ if (root == NULL)
228
+ root = xmlDocGetRootElement( data->doc );
229
+
230
+ xpath_ctx->node = root;
231
+ cur_ns = ns_list = xmlGetNsList( data->doc, root );
232
+ while (cur_ns != NULL && (*cur_ns) != NULL) {
233
+ xmlXPathRegisterNs( xpath_ctx, (*cur_ns)->prefix, (*cur_ns)->href );
234
+ cur_ns++;
235
+ }
236
+
237
+ if (ns_list != NULL) {
238
+ xpath_ctx->namespaces = ns_list;
239
+ xpath_ctx->nsNr = ns_cnt;
240
+ }
241
+
242
+ xpath_s = rb_obj_as_string( raw_xpath );
243
+ if (root->ns != NULL) { // we have a base namespace, this is going to get "interesting"
244
+ root_ns = (xmlChar*)root->ns->prefix;
245
+ if (root_ns == NULL)
246
+ root_ns = (xmlChar*)"myFunkyLittleRootNsNotToBeUseByAnyoneElseIHope";
247
+ // alternatives? how do other xpath processors handle root/default namespaces?
248
+
249
+ xmlXPathRegisterNs( xpath_ctx, root_ns, root->ns->href );
250
+ // need to update the xpath expression
251
+ xpath_s = munge_xpath_namespace( xpath_s, root_ns );
252
+ xpath_ctx->nsNr++;
253
+ }
254
+
255
+ xpath_expr = (xmlChar*)RSTRING(xpath_s)->ptr;
256
+ xpath_xpr = xmlXPathCompile( xpath_expr );
257
+ if (xpath_xpr == NULL) {
258
+ xmlXPathFreeContext( xpath_ctx );
259
+ xmlFree( ns_list );
260
+ rb_raise( rb_eRuntimeError, "unable to evaluate xpath expression" );
261
+ }
262
+
263
+ xpath_obj = xmlXPathCompiledEval( xpath_xpr, xpath_ctx );
264
+ if (xpath_obj == NULL) {
265
+ rb_raise( rb_eRuntimeError, "unable to evaluate xpath expression" );
266
+ xmlXPathFreeCompExpr( xpath_xpr );
267
+ xmlXPathFreeContext( xpath_ctx );
268
+ xmlFree( ns_list );
269
+ return Qnil;
270
+ }
271
+
272
+ ret = fastxml_nodeset_to_obj( xpath_obj, data );
273
+
274
+ xmlFree( ns_list );
275
+ xmlXPathFreeCompExpr( xpath_xpr );
276
+ xmlXPathFreeContext( xpath_ctx );
277
+
278
+ return ret;
279
+ }
280
+