fastxml 0.1.91
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +0 -0
- data/README +9 -0
- data/ext/Makefile +153 -0
- data/ext/extconf.rb +13 -0
- data/ext/fastxml.bundle +0 -0
- data/ext/fastxml.c +280 -0
- data/ext/fastxml.h +61 -0
- data/ext/fastxml.o +0 -0
- data/ext/fastxml_attrlist.c +60 -0
- data/ext/fastxml_attrlist.h +11 -0
- data/ext/fastxml_attrlist.o +0 -0
- data/ext/fastxml_doc.c +190 -0
- data/ext/fastxml_doc.h +16 -0
- data/ext/fastxml_doc.o +0 -0
- data/ext/fastxml_node.c +240 -0
- data/ext/fastxml_node.h +21 -0
- data/ext/fastxml_node.o +0 -0
- data/ext/fastxml_nodelist.c +146 -0
- data/ext/fastxml_nodelist.h +13 -0
- data/ext/fastxml_nodelist.o +0 -0
- data/ext/mkmf.log +119 -0
- data/lib/fastxml_lib.rb +93 -0
- data/specs/basic_html_spec.rb +70 -0
- data/specs/basic_parsing_spec.rb +48 -0
- data/specs/fastxml_doc_spec.rb +82 -0
- data/specs/fastxml_node_spec.rb +110 -0
- data/specs/fastxml_nodelist_spec.rb +46 -0
- data/test_data/cnn_main.html +1348 -0
- data/test_data/hasno_feed.html +364 -0
- data/test_data/hasno_feed.xml +301 -0
- data/test_data/hasno_test.xml +301 -0
- data/test_data/labels.xml +23 -0
- data/test_data/simple.xml +10 -0
- data/test_data/twitter_public.html +949 -0
- data/test_data/unicode.xml +10596 -0
- data/test_data/xslspec.xml +24682 -0
- metadata +114 -0
data/LICENSE
ADDED
File without changes
|
data/README
ADDED
@@ -0,0 +1,9 @@
|
|
1
|
+
= FastXml, a simple fast xml library using libxml and libxslt
|
2
|
+
|
3
|
+
== Overview
|
4
|
+
|
5
|
+
FastXml is:
|
6
|
+
|
7
|
+
# not standalone, it *requires libxml* and *libxslt*
|
8
|
+
# it attempts to provide the speediest xml parsing library available for ruby
|
9
|
+
# it provides an hpricot-like syntax for xml parsing and xslt processing
|
data/ext/Makefile
ADDED
@@ -0,0 +1,153 @@
|
|
1
|
+
|
2
|
+
SHELL = /bin/sh
|
3
|
+
|
4
|
+
#### Start of system configuration section. ####
|
5
|
+
|
6
|
+
srcdir = .
|
7
|
+
topdir = /opt/local/lib/ruby/1.8/i686-darwin9.2.2
|
8
|
+
hdrdir = $(topdir)
|
9
|
+
VPATH = $(srcdir):$(topdir):$(hdrdir)
|
10
|
+
prefix = $(DESTDIR)/opt/local
|
11
|
+
exec_prefix = $(prefix)
|
12
|
+
sitedir = $(prefix)/lib/ruby/site_ruby
|
13
|
+
rubylibdir = $(libdir)/ruby/$(ruby_version)
|
14
|
+
docdir = $(datarootdir)/doc/$(PACKAGE)
|
15
|
+
dvidir = $(docdir)
|
16
|
+
datarootdir = $(prefix)/share
|
17
|
+
archdir = $(rubylibdir)/$(arch)
|
18
|
+
sbindir = $(exec_prefix)/sbin
|
19
|
+
psdir = $(docdir)
|
20
|
+
vendordir = $(prefix)/lib/ruby/vendor_ruby
|
21
|
+
localedir = $(datarootdir)/locale
|
22
|
+
htmldir = $(docdir)
|
23
|
+
datadir = $(datarootdir)
|
24
|
+
includedir = $(prefix)/include
|
25
|
+
infodir = $(datarootdir)/info
|
26
|
+
sysconfdir = $(prefix)/etc
|
27
|
+
mandir = $(DESTDIR)/opt/local/share/man
|
28
|
+
libdir = $(exec_prefix)/lib
|
29
|
+
sharedstatedir = $(prefix)/com
|
30
|
+
oldincludedir = $(DESTDIR)/usr/include
|
31
|
+
pdfdir = $(docdir)
|
32
|
+
sitearchdir = $(sitelibdir)/$(sitearch)
|
33
|
+
vendorarchdir = $(vendorlibdir)/$(vendorarch)
|
34
|
+
bindir = $(exec_prefix)/bin
|
35
|
+
localstatedir = $(prefix)/var
|
36
|
+
vendorlibdir = $(vendordir)/$(ruby_version)
|
37
|
+
sitelibdir = $(sitedir)/$(ruby_version)
|
38
|
+
libexecdir = $(exec_prefix)/libexec
|
39
|
+
|
40
|
+
CC = /usr/bin/gcc-4.0
|
41
|
+
LIBRUBY = $(LIBRUBY_SO)
|
42
|
+
LIBRUBY_A = lib$(RUBY_SO_NAME)-static.a
|
43
|
+
LIBRUBYARG_SHARED = -l$(RUBY_SO_NAME)
|
44
|
+
LIBRUBYARG_STATIC = -l$(RUBY_SO_NAME)-static
|
45
|
+
|
46
|
+
RUBY_EXTCONF_H =
|
47
|
+
CFLAGS = -fno-common -O2 -fno-common -pipe -fno-common -Wall -I/opt/local/include/libxml2
|
48
|
+
INCFLAGS = -I. -I. -I/opt/local/lib/ruby/1.8/i686-darwin9.2.2 -I. -I/usr/include/libxml2
|
49
|
+
CPPFLAGS = -I/opt/local/include
|
50
|
+
CXXFLAGS = $(CFLAGS)
|
51
|
+
DLDFLAGS = -L. -L/opt/local/lib -L/opt/local/lib -lxml2 -lz -lpthread -liconv -lm
|
52
|
+
LDSHARED = cc -dynamic -bundle -undefined suppress -flat_namespace
|
53
|
+
AR = ar
|
54
|
+
EXEEXT =
|
55
|
+
|
56
|
+
RUBY_INSTALL_NAME = ruby
|
57
|
+
RUBY_SO_NAME = ruby
|
58
|
+
arch = i686-darwin9.2.2
|
59
|
+
sitearch = i686-darwin9.2.2
|
60
|
+
vendorarch = i686-darwin9.2.2
|
61
|
+
ruby_version = 1.8
|
62
|
+
ruby = /opt/local/bin/ruby
|
63
|
+
RUBY = $(ruby)
|
64
|
+
RM = rm -f
|
65
|
+
MAKEDIRS = mkdir -p
|
66
|
+
INSTALL = /usr/bin/install
|
67
|
+
INSTALL_PROG = $(INSTALL) -m 0755
|
68
|
+
INSTALL_DATA = $(INSTALL) -m 644
|
69
|
+
COPY = cp
|
70
|
+
|
71
|
+
#### End of system configuration section. ####
|
72
|
+
|
73
|
+
preload =
|
74
|
+
|
75
|
+
libpath = . $(libdir)
|
76
|
+
LIBPATH = -L"." -L"$(libdir)"
|
77
|
+
DEFFILE =
|
78
|
+
|
79
|
+
CLEANFILES = mkmf.log
|
80
|
+
DISTCLEANFILES =
|
81
|
+
|
82
|
+
extout =
|
83
|
+
extout_prefix =
|
84
|
+
target_prefix =
|
85
|
+
LOCAL_LIBS =
|
86
|
+
LIBS = $(LIBRUBYARG_SHARED) -lxslt -lxslt -lxml2 -lxml2 -lpthread -ldl -lobjc
|
87
|
+
SRCS = fastxml.c fastxml_attrlist.c fastxml_doc.c fastxml_node.c fastxml_nodelist.c
|
88
|
+
OBJS = fastxml.o fastxml_attrlist.o fastxml_doc.o fastxml_node.o fastxml_nodelist.o
|
89
|
+
TARGET = fastxml
|
90
|
+
DLLIB = $(TARGET).bundle
|
91
|
+
EXTSTATIC =
|
92
|
+
STATIC_LIB =
|
93
|
+
|
94
|
+
RUBYCOMMONDIR = $(sitedir)$(target_prefix)
|
95
|
+
RUBYLIBDIR = $(sitelibdir)$(target_prefix)
|
96
|
+
RUBYARCHDIR = $(sitearchdir)$(target_prefix)
|
97
|
+
|
98
|
+
TARGET_SO = $(DLLIB)
|
99
|
+
CLEANLIBS = $(TARGET).bundle $(TARGET).il? $(TARGET).tds $(TARGET).map
|
100
|
+
CLEANOBJS = *.o *.a *.s[ol] *.pdb *.exp *.bak
|
101
|
+
|
102
|
+
all: $(DLLIB)
|
103
|
+
static: $(STATIC_LIB)
|
104
|
+
|
105
|
+
clean:
|
106
|
+
@-$(RM) $(CLEANLIBS) $(CLEANOBJS) $(CLEANFILES)
|
107
|
+
|
108
|
+
distclean: clean
|
109
|
+
@-$(RM) Makefile $(RUBY_EXTCONF_H) conftest.* mkmf.log
|
110
|
+
@-$(RM) core ruby$(EXEEXT) *~ $(DISTCLEANFILES)
|
111
|
+
|
112
|
+
realclean: distclean
|
113
|
+
install: install-so install-rb
|
114
|
+
|
115
|
+
install-so: $(RUBYARCHDIR)
|
116
|
+
install-so: $(RUBYARCHDIR)/$(DLLIB)
|
117
|
+
$(RUBYARCHDIR)/$(DLLIB): $(DLLIB)
|
118
|
+
$(INSTALL_PROG) $(DLLIB) $(RUBYARCHDIR)
|
119
|
+
install-rb: pre-install-rb install-rb-default
|
120
|
+
install-rb-default: pre-install-rb-default
|
121
|
+
pre-install-rb: Makefile
|
122
|
+
pre-install-rb-default: Makefile
|
123
|
+
$(RUBYARCHDIR):
|
124
|
+
$(MAKEDIRS) $@
|
125
|
+
|
126
|
+
site-install: site-install-so site-install-rb
|
127
|
+
site-install-so: install-so
|
128
|
+
site-install-rb: install-rb
|
129
|
+
|
130
|
+
.SUFFIXES: .c .m .cc .cxx .cpp .C .o
|
131
|
+
|
132
|
+
.cc.o:
|
133
|
+
$(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
|
134
|
+
|
135
|
+
.cxx.o:
|
136
|
+
$(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
|
137
|
+
|
138
|
+
.cpp.o:
|
139
|
+
$(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
|
140
|
+
|
141
|
+
.C.o:
|
142
|
+
$(CXX) $(INCFLAGS) $(CPPFLAGS) $(CXXFLAGS) -c $<
|
143
|
+
|
144
|
+
.c.o:
|
145
|
+
$(CC) $(INCFLAGS) $(CPPFLAGS) $(CFLAGS) -c $<
|
146
|
+
|
147
|
+
$(DLLIB): $(OBJS)
|
148
|
+
@-$(RM) $@
|
149
|
+
$(LDSHARED) -o $@ $(OBJS) $(LIBPATH) $(DLDFLAGS) $(LOCAL_LIBS) $(LIBS)
|
150
|
+
|
151
|
+
|
152
|
+
|
153
|
+
$(OBJS): ruby.h defines.h
|
data/ext/extconf.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'mkmf'
|
2
|
+
ext_name = 'fastxml'
|
3
|
+
dir_config ext_name
|
4
|
+
find_header( 'libxml/tree.h', "/usr/include/libxml2", "/usr/local/include/libxml2", "/opt/local/include/libxml2" )
|
5
|
+
find_header( 'libxslt/xslt.h', "/usr/include/libxslt", "/usr/local/include/libxslt", "/opt/local/include/libxslt" )
|
6
|
+
find_library( 'xml2', "xmlInitParser", "/usr/lib", "/usr/local/lib", "/opt/local/lib" )
|
7
|
+
have_library( 'xml2', 'xmlInitParser', 'libxml/parser.h' )
|
8
|
+
find_library( 'xslt', "xmlInitParser", "/usr/lib", "/usr/local/lib", "/opt/local/lib" )
|
9
|
+
have_library( 'xslt', 'xsltParseStylesheetFile', 'libxslt/xslt.h' )
|
10
|
+
$LDFLAGS << ' %s' % (`xml2-config --libs`).chomp
|
11
|
+
$CFLAGS << ' -Wall %s' % (`xml2-config --cflags`).chomp
|
12
|
+
|
13
|
+
create_makefile ext_name
|
data/ext/fastxml.bundle
ADDED
Binary file
|
data/ext/fastxml.c
ADDED
@@ -0,0 +1,280 @@
|
|
1
|
+
/*
|
2
|
+
* $Id$
|
3
|
+
*/
|
4
|
+
#define fastxml_c
|
5
|
+
#include "fastxml.h"
|
6
|
+
#include "fastxml_node.h"
|
7
|
+
#include "fastxml_doc.h"
|
8
|
+
#include "fastxml_nodelist.h"
|
9
|
+
#include "fastxml_attrlist.h"
|
10
|
+
|
11
|
+
VALUE rb_cFastXmlDoc;
|
12
|
+
VALUE rb_cFastXmlNode;
|
13
|
+
VALUE rb_cFastXmlNodeList;
|
14
|
+
VALUE rb_cFastXmlAttrList;
|
15
|
+
VALUE rb_sValidateDtd;
|
16
|
+
VALUE rb_sForgivingParse;
|
17
|
+
VALUE rb_sHtmlParse;
|
18
|
+
ID s_readlines;
|
19
|
+
ID s_to_s;
|
20
|
+
|
21
|
+
void Init_fastxml()
|
22
|
+
{
|
23
|
+
if (xmlHasFeature(XML_WITH_TREE) == 0)
|
24
|
+
rb_raise( rb_eRuntimeError, "libxml not built with tree support" );
|
25
|
+
|
26
|
+
if (xmlHasFeature(XML_WITH_XPATH) == 0)
|
27
|
+
rb_raise( rb_eRuntimeError, "libxml not built with xpath support" );
|
28
|
+
|
29
|
+
s_readlines = rb_intern("readlines");
|
30
|
+
s_to_s = rb_intern("to_s");
|
31
|
+
|
32
|
+
xmlInitParser();
|
33
|
+
xmlXPathInit();
|
34
|
+
VALUE rb_mFastXml = rb_define_module( "FastXml" );
|
35
|
+
rb_define_const( rb_mFastXml, "LIBXML_VERSION", rb_str_new2( LIBXML_DOTTED_VERSION ) );
|
36
|
+
rb_cFastXmlDoc = rb_define_class_under( rb_mFastXml, "Doc", rb_cObject );
|
37
|
+
rb_cFastXmlNode = rb_define_class_under( rb_mFastXml, "Node", rb_cObject );
|
38
|
+
rb_cFastXmlNodeList = rb_define_class_under( rb_mFastXml, "NodeList", rb_cObject );
|
39
|
+
rb_cFastXmlAttrList = rb_define_class_under( rb_mFastXml, "AttrList", rb_cObject );
|
40
|
+
|
41
|
+
/* setting symbols */
|
42
|
+
rb_sValidateDtd = ID2SYM( rb_intern("validate") );
|
43
|
+
rb_sForgivingParse = ID2SYM( rb_intern("forgiving") );
|
44
|
+
rb_sHtmlParse = ID2SYM( rb_intern("html") );
|
45
|
+
|
46
|
+
/* Doc */
|
47
|
+
rb_define_method( rb_cFastXmlDoc, "initialize", fastxml_doc_initialize, -1 );
|
48
|
+
rb_define_method( rb_cFastXmlDoc, "search", fastxml_doc_search, 1 );
|
49
|
+
rb_define_method( rb_cFastXmlDoc, "to_s", fastxml_doc_to_s, 0 );
|
50
|
+
rb_define_method( rb_cFastXmlDoc, "root", fastxml_doc_root, 0 );
|
51
|
+
rb_define_method( rb_cFastXmlDoc, "transform", fastxml_doc_transform, 1 );
|
52
|
+
rb_define_method( rb_cFastXmlDoc, "stylesheet=", fastxml_doc_stylesheet_set, 1 );
|
53
|
+
rb_define_method( rb_cFastXmlDoc, "stylesheet", fastxml_doc_stylesheet, 0 );
|
54
|
+
rb_define_method( rb_cFastXmlDoc, "children", fastxml_doc_children, 0 );
|
55
|
+
rb_define_method( rb_cFastXmlDoc, "inspect", fastxml_doc_inspect, 0 );
|
56
|
+
|
57
|
+
/* Node */
|
58
|
+
rb_define_method( rb_cFastXmlNode, "initialize", fastxml_node_initialize, 0 );
|
59
|
+
rb_define_method( rb_cFastXmlNode, "search", fastxml_node_search, 1 );
|
60
|
+
rb_define_method( rb_cFastXmlNode, "to_s", fastxml_node_to_s, 0 );
|
61
|
+
rb_define_method( rb_cFastXmlNode, "name", fastxml_node_name, 0 );
|
62
|
+
rb_define_method( rb_cFastXmlNode, "content", fastxml_node_value, 0 );
|
63
|
+
rb_define_method( rb_cFastXmlNode, "content=", fastxml_node_value_set, 1 );
|
64
|
+
rb_define_method( rb_cFastXmlNode, "inner_xml", fastxml_node_innerxml, 0 );
|
65
|
+
rb_define_method( rb_cFastXmlNode, "xpath", fastxml_node_xpath, 0 );
|
66
|
+
rb_define_method( rb_cFastXmlNode, "attr", fastxml_node_attr, 0 );
|
67
|
+
rb_define_method( rb_cFastXmlNode, "children", fastxml_node_children, 0 );
|
68
|
+
rb_define_method( rb_cFastXmlNode, "next", fastxml_node_next, 0 );
|
69
|
+
rb_define_method( rb_cFastXmlNode, "prev", fastxml_node_prev, 0 );
|
70
|
+
rb_define_method( rb_cFastXmlNode, "parent", fastxml_node_parent, 0 );
|
71
|
+
rb_define_method( rb_cFastXmlNode, "inspect", fastxml_node_inspect, 0 );
|
72
|
+
|
73
|
+
/* NodeList */
|
74
|
+
rb_include_module( rb_cFastXmlNodeList, rb_mEnumerable );
|
75
|
+
rb_define_method( rb_cFastXmlNodeList, "initialize", fastxml_nodelist_initialize, 0 );
|
76
|
+
rb_define_method( rb_cFastXmlNodeList, "length", fastxml_nodelist_length, 0 );
|
77
|
+
rb_define_method( rb_cFastXmlNodeList, "each", fastxml_nodelist_each, 0 );
|
78
|
+
rb_define_method( rb_cFastXmlNodeList, "entry", fastxml_nodelist_entry, 1 );
|
79
|
+
rb_define_method( rb_cFastXmlNodeList, "to_ary", fastxml_nodelist_entry, 0 );
|
80
|
+
|
81
|
+
/* AttrList */
|
82
|
+
rb_include_module( rb_cFastXmlAttrList, rb_mEnumerable );
|
83
|
+
rb_define_method( rb_cFastXmlAttrList, "initialize", fastxml_attrlist_initialize, 0 );
|
84
|
+
rb_define_method( rb_cFastXmlAttrList, "[]", fastxml_attrlist_indexer, 1 );
|
85
|
+
rb_define_method( rb_cFastXmlAttrList, "[]=", fastxml_attrlist_indexer_set, 2 );
|
86
|
+
|
87
|
+
rb_require( "lib/fastxml_lib" );
|
88
|
+
|
89
|
+
}
|
90
|
+
|
91
|
+
|
92
|
+
|
93
|
+
|
94
|
+
void fastxml_data_mark( fxml_data_t *data )
|
95
|
+
{
|
96
|
+
/* do nothing */
|
97
|
+
}
|
98
|
+
|
99
|
+
void fastxml_data_free( fxml_data_t *data )
|
100
|
+
{
|
101
|
+
if (data != NULL)
|
102
|
+
{
|
103
|
+
if (data->xpath_obj != NULL)
|
104
|
+
xmlXPathFreeObject( data->xpath_obj );
|
105
|
+
|
106
|
+
if (data->doc != NULL && data->node == NULL && data->list == NULL && data->xpath_obj == NULL)
|
107
|
+
xmlFreeDoc( data->doc );
|
108
|
+
|
109
|
+
// the doc free will cleanup the nodes
|
110
|
+
|
111
|
+
data->xpath_obj = NULL;
|
112
|
+
data->list = NULL;
|
113
|
+
data->doc = NULL;
|
114
|
+
data->node = NULL;
|
115
|
+
free( data );
|
116
|
+
}
|
117
|
+
data = NULL;
|
118
|
+
}
|
119
|
+
|
120
|
+
VALUE fastxml_data_alloc( VALUE klass )
|
121
|
+
{
|
122
|
+
return Qnil;
|
123
|
+
}
|
124
|
+
|
125
|
+
VALUE fastxml_raw_node_to_my_obj(xmlNodePtr cur, fxml_data_t *chld)
|
126
|
+
{
|
127
|
+
VALUE dv_chld, new_tmp;
|
128
|
+
chld->node = cur;
|
129
|
+
chld->doc = cur->doc;
|
130
|
+
|
131
|
+
new_tmp = rb_class_new_instance( 0, 0, rb_cFastXmlNode );
|
132
|
+
dv_chld = Data_Wrap_Struct( rb_cObject, fastxml_data_mark, fastxml_data_free, chld );
|
133
|
+
rb_iv_set( new_tmp, "@lxml_doc", dv_chld );
|
134
|
+
|
135
|
+
return new_tmp;
|
136
|
+
}
|
137
|
+
|
138
|
+
VALUE fastxml_raw_node_to_obj(xmlNodePtr cur)
|
139
|
+
{
|
140
|
+
fxml_data_t *chld = ALLOC(fxml_data_t);
|
141
|
+
memset( chld, 0, sizeof(fxml_data_t) );
|
142
|
+
return fastxml_raw_node_to_my_obj( cur, chld );
|
143
|
+
}
|
144
|
+
|
145
|
+
VALUE fastxml_nodelist_to_obj(xmlNodePtr root, int len)
|
146
|
+
{
|
147
|
+
VALUE ret, dv_chld;
|
148
|
+
xmlNodePtr cur = root;
|
149
|
+
fxml_data_t *ndlst = ALLOC(fxml_data_t);
|
150
|
+
memset( ndlst, 0, sizeof(fxml_data_t) );
|
151
|
+
|
152
|
+
ndlst->list_len = len;
|
153
|
+
ndlst->list = cur;
|
154
|
+
ret = rb_class_new_instance( 0, 0, rb_cFastXmlNodeList );
|
155
|
+
dv_chld = Data_Wrap_Struct( rb_cObject, fastxml_data_mark, fastxml_data_free, ndlst );
|
156
|
+
rb_iv_set( ret, "@lxml_doc", dv_chld );
|
157
|
+
|
158
|
+
return ret;
|
159
|
+
}
|
160
|
+
|
161
|
+
VALUE fastxml_nodeset_to_obj(xmlXPathObjectPtr raw_xpath_obj, fxml_data_t *data)
|
162
|
+
{
|
163
|
+
VALUE ret, dv_chld;
|
164
|
+
fxml_data_t *ndlst = ALLOC(fxml_data_t);
|
165
|
+
memset( ndlst, 0, sizeof(fxml_data_t) );
|
166
|
+
|
167
|
+
ndlst->xpath_obj = raw_xpath_obj;
|
168
|
+
ndlst->list_len = raw_xpath_obj->nodesetval->nodeNr;
|
169
|
+
ret = rb_class_new_instance( 0, 0, rb_cFastXmlNodeList );
|
170
|
+
dv_chld = Data_Wrap_Struct( rb_cObject, fastxml_data_mark, fastxml_data_free, ndlst );
|
171
|
+
rb_iv_set( ret, "@lxml_doc", dv_chld );
|
172
|
+
|
173
|
+
return ret;
|
174
|
+
}
|
175
|
+
|
176
|
+
VALUE munge_xpath_namespace( VALUE orig_expr, xmlChar *root_ns )
|
177
|
+
{
|
178
|
+
VALUE path_bits = rb_str_split( orig_expr, "/" );
|
179
|
+
VALUE ns_prefix = rb_str_new2( (const char*)root_ns );
|
180
|
+
VALUE ns_indic = rb_str_new2( ":" );
|
181
|
+
VALUE slash = rb_str_new2( "/" );
|
182
|
+
VALUE path_bit, str_idx;
|
183
|
+
VALUE ret_ary = rb_ary_new();
|
184
|
+
long i;
|
185
|
+
|
186
|
+
rb_str_append( ns_prefix, ns_indic );
|
187
|
+
for (i=0; i<RARRAY(path_bits)->len; i++) {
|
188
|
+
path_bit = RARRAY(path_bits)->ptr[i];
|
189
|
+
|
190
|
+
if (RSTRING(path_bit)->len > 0) {
|
191
|
+
str_idx = rb_funcall( path_bit, rb_intern( "index" ), 1, ns_indic );
|
192
|
+
if (str_idx == Qnil || str_idx == Qfalse) // didn't find the :, so it looks like we don't have a namespace
|
193
|
+
path_bit = rb_str_plus( ns_prefix, path_bit );
|
194
|
+
}
|
195
|
+
|
196
|
+
rb_ary_push( ret_ary, path_bit );
|
197
|
+
}
|
198
|
+
|
199
|
+
return rb_ary_join( ret_ary, slash );
|
200
|
+
}
|
201
|
+
|
202
|
+
VALUE fastxml_xpath_search(VALUE self, VALUE raw_xpath, VALUE blk)
|
203
|
+
{
|
204
|
+
VALUE ret, dv, xpath_s;
|
205
|
+
xmlXPathCompExprPtr xpath_xpr;
|
206
|
+
xmlXPathContextPtr xpath_ctx;
|
207
|
+
xmlXPathObjectPtr xpath_obj;
|
208
|
+
fxml_data_t *data;
|
209
|
+
xmlChar *xpath_expr;
|
210
|
+
xmlNodePtr root = NULL;
|
211
|
+
xmlNsPtr *ns_list = NULL;
|
212
|
+
xmlNsPtr *cur_ns = NULL;
|
213
|
+
xmlChar *root_ns = NULL;
|
214
|
+
int ns_cnt = 0;
|
215
|
+
|
216
|
+
if (NIL_P(raw_xpath))
|
217
|
+
rb_raise(rb_eArgError, "nil passed as xpath");
|
218
|
+
|
219
|
+
dv = rb_iv_get( self, "@lxml_doc" );
|
220
|
+
Data_Get_Struct( dv, fxml_data_t, data );
|
221
|
+
|
222
|
+
xpath_ctx = xmlXPathNewContext( data->doc );
|
223
|
+
if (xpath_ctx == NULL)
|
224
|
+
rb_raise( rb_eRuntimeError, "unable to create xpath context" );
|
225
|
+
|
226
|
+
root = data->node;
|
227
|
+
if (root == NULL)
|
228
|
+
root = xmlDocGetRootElement( data->doc );
|
229
|
+
|
230
|
+
xpath_ctx->node = root;
|
231
|
+
cur_ns = ns_list = xmlGetNsList( data->doc, root );
|
232
|
+
while (cur_ns != NULL && (*cur_ns) != NULL) {
|
233
|
+
xmlXPathRegisterNs( xpath_ctx, (*cur_ns)->prefix, (*cur_ns)->href );
|
234
|
+
cur_ns++;
|
235
|
+
}
|
236
|
+
|
237
|
+
if (ns_list != NULL) {
|
238
|
+
xpath_ctx->namespaces = ns_list;
|
239
|
+
xpath_ctx->nsNr = ns_cnt;
|
240
|
+
}
|
241
|
+
|
242
|
+
xpath_s = rb_obj_as_string( raw_xpath );
|
243
|
+
if (root->ns != NULL) { // we have a base namespace, this is going to get "interesting"
|
244
|
+
root_ns = (xmlChar*)root->ns->prefix;
|
245
|
+
if (root_ns == NULL)
|
246
|
+
root_ns = (xmlChar*)"myFunkyLittleRootNsNotToBeUseByAnyoneElseIHope";
|
247
|
+
// alternatives? how do other xpath processors handle root/default namespaces?
|
248
|
+
|
249
|
+
xmlXPathRegisterNs( xpath_ctx, root_ns, root->ns->href );
|
250
|
+
// need to update the xpath expression
|
251
|
+
xpath_s = munge_xpath_namespace( xpath_s, root_ns );
|
252
|
+
xpath_ctx->nsNr++;
|
253
|
+
}
|
254
|
+
|
255
|
+
xpath_expr = (xmlChar*)RSTRING(xpath_s)->ptr;
|
256
|
+
xpath_xpr = xmlXPathCompile( xpath_expr );
|
257
|
+
if (xpath_xpr == NULL) {
|
258
|
+
xmlXPathFreeContext( xpath_ctx );
|
259
|
+
xmlFree( ns_list );
|
260
|
+
rb_raise( rb_eRuntimeError, "unable to evaluate xpath expression" );
|
261
|
+
}
|
262
|
+
|
263
|
+
xpath_obj = xmlXPathCompiledEval( xpath_xpr, xpath_ctx );
|
264
|
+
if (xpath_obj == NULL) {
|
265
|
+
rb_raise( rb_eRuntimeError, "unable to evaluate xpath expression" );
|
266
|
+
xmlXPathFreeCompExpr( xpath_xpr );
|
267
|
+
xmlXPathFreeContext( xpath_ctx );
|
268
|
+
xmlFree( ns_list );
|
269
|
+
return Qnil;
|
270
|
+
}
|
271
|
+
|
272
|
+
ret = fastxml_nodeset_to_obj( xpath_obj, data );
|
273
|
+
|
274
|
+
xmlFree( ns_list );
|
275
|
+
xmlXPathFreeCompExpr( xpath_xpr );
|
276
|
+
xmlXPathFreeContext( xpath_ctx );
|
277
|
+
|
278
|
+
return ret;
|
279
|
+
}
|
280
|
+
|