lxml 6.0.0__cp310-cp310-musllinux_1_2_armv7l.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lxml/ElementInclude.py +244 -0
- lxml/__init__.py +22 -0
- lxml/_elementpath.cpython-310-arm-linux-gnueabihf.so +0 -0
- lxml/_elementpath.py +343 -0
- lxml/apihelpers.pxi +1801 -0
- lxml/builder.cpython-310-arm-linux-gnueabihf.so +0 -0
- lxml/builder.py +243 -0
- lxml/classlookup.pxi +580 -0
- lxml/cleanup.pxi +215 -0
- lxml/cssselect.py +101 -0
- lxml/debug.pxi +36 -0
- lxml/docloader.pxi +178 -0
- lxml/doctestcompare.py +488 -0
- lxml/dtd.pxi +479 -0
- lxml/etree.cpython-310-arm-linux-gnueabihf.so +0 -0
- lxml/etree.h +244 -0
- lxml/etree.pyx +3853 -0
- lxml/etree_api.h +204 -0
- lxml/extensions.pxi +830 -0
- lxml/html/ElementSoup.py +10 -0
- lxml/html/__init__.py +1927 -0
- lxml/html/_diffcommand.py +86 -0
- lxml/html/_difflib.cpython-310-arm-linux-gnueabihf.so +0 -0
- lxml/html/_difflib.py +2106 -0
- lxml/html/_html5builder.py +100 -0
- lxml/html/_setmixin.py +56 -0
- lxml/html/builder.py +173 -0
- lxml/html/clean.py +21 -0
- lxml/html/defs.py +135 -0
- lxml/html/diff.cpython-310-arm-linux-gnueabihf.so +0 -0
- lxml/html/diff.py +972 -0
- lxml/html/formfill.py +299 -0
- lxml/html/html5parser.py +260 -0
- lxml/html/soupparser.py +314 -0
- lxml/html/usedoctest.py +13 -0
- lxml/includes/__init__.pxd +0 -0
- lxml/includes/__init__.py +0 -0
- lxml/includes/c14n.pxd +25 -0
- lxml/includes/config.pxd +3 -0
- lxml/includes/dtdvalid.pxd +18 -0
- lxml/includes/etree_defs.h +379 -0
- lxml/includes/etreepublic.pxd +237 -0
- lxml/includes/extlibs/__init__.py +0 -0
- lxml/includes/extlibs/libcharset.h +45 -0
- lxml/includes/extlibs/localcharset.h +137 -0
- lxml/includes/extlibs/zconf.h +543 -0
- lxml/includes/extlibs/zlib.h +1938 -0
- lxml/includes/htmlparser.pxd +56 -0
- lxml/includes/libexslt/__init__.py +0 -0
- lxml/includes/libexslt/exslt.h +108 -0
- lxml/includes/libexslt/exsltconfig.h +70 -0
- lxml/includes/libexslt/exsltexports.h +63 -0
- lxml/includes/libxml/HTMLparser.h +339 -0
- lxml/includes/libxml/HTMLtree.h +148 -0
- lxml/includes/libxml/SAX.h +18 -0
- lxml/includes/libxml/SAX2.h +170 -0
- lxml/includes/libxml/__init__.py +0 -0
- lxml/includes/libxml/c14n.h +115 -0
- lxml/includes/libxml/catalog.h +183 -0
- lxml/includes/libxml/chvalid.h +230 -0
- lxml/includes/libxml/debugXML.h +79 -0
- lxml/includes/libxml/dict.h +82 -0
- lxml/includes/libxml/encoding.h +307 -0
- lxml/includes/libxml/entities.h +147 -0
- lxml/includes/libxml/globals.h +25 -0
- lxml/includes/libxml/hash.h +251 -0
- lxml/includes/libxml/list.h +137 -0
- lxml/includes/libxml/nanoftp.h +16 -0
- lxml/includes/libxml/nanohttp.h +98 -0
- lxml/includes/libxml/parser.h +1633 -0
- lxml/includes/libxml/parserInternals.h +591 -0
- lxml/includes/libxml/relaxng.h +224 -0
- lxml/includes/libxml/schemasInternals.h +959 -0
- lxml/includes/libxml/schematron.h +143 -0
- lxml/includes/libxml/threads.h +81 -0
- lxml/includes/libxml/tree.h +1326 -0
- lxml/includes/libxml/uri.h +106 -0
- lxml/includes/libxml/valid.h +485 -0
- lxml/includes/libxml/xinclude.h +141 -0
- lxml/includes/libxml/xlink.h +193 -0
- lxml/includes/libxml/xmlIO.h +419 -0
- lxml/includes/libxml/xmlautomata.h +163 -0
- lxml/includes/libxml/xmlerror.h +962 -0
- lxml/includes/libxml/xmlexports.h +96 -0
- lxml/includes/libxml/xmlmemory.h +188 -0
- lxml/includes/libxml/xmlmodule.h +61 -0
- lxml/includes/libxml/xmlreader.h +444 -0
- lxml/includes/libxml/xmlregexp.h +116 -0
- lxml/includes/libxml/xmlsave.h +111 -0
- lxml/includes/libxml/xmlschemas.h +254 -0
- lxml/includes/libxml/xmlschemastypes.h +152 -0
- lxml/includes/libxml/xmlstring.h +140 -0
- lxml/includes/libxml/xmlunicode.h +15 -0
- lxml/includes/libxml/xmlversion.h +332 -0
- lxml/includes/libxml/xmlwriter.h +489 -0
- lxml/includes/libxml/xpath.h +569 -0
- lxml/includes/libxml/xpathInternals.h +639 -0
- lxml/includes/libxml/xpointer.h +48 -0
- lxml/includes/libxslt/__init__.py +0 -0
- lxml/includes/libxslt/attributes.h +39 -0
- lxml/includes/libxslt/documents.h +93 -0
- lxml/includes/libxslt/extensions.h +262 -0
- lxml/includes/libxslt/extra.h +72 -0
- lxml/includes/libxslt/functions.h +78 -0
- lxml/includes/libxslt/imports.h +75 -0
- lxml/includes/libxslt/keys.h +53 -0
- lxml/includes/libxslt/namespaces.h +68 -0
- lxml/includes/libxslt/numbersInternals.h +73 -0
- lxml/includes/libxslt/pattern.h +84 -0
- lxml/includes/libxslt/preproc.h +43 -0
- lxml/includes/libxslt/security.h +104 -0
- lxml/includes/libxslt/templates.h +77 -0
- lxml/includes/libxslt/transform.h +207 -0
- lxml/includes/libxslt/variables.h +118 -0
- lxml/includes/libxslt/xslt.h +110 -0
- lxml/includes/libxslt/xsltInternals.h +1995 -0
- lxml/includes/libxslt/xsltconfig.h +146 -0
- lxml/includes/libxslt/xsltexports.h +64 -0
- lxml/includes/libxslt/xsltlocale.h +44 -0
- lxml/includes/libxslt/xsltutils.h +343 -0
- lxml/includes/lxml-version.h +3 -0
- lxml/includes/relaxng.pxd +64 -0
- lxml/includes/schematron.pxd +34 -0
- lxml/includes/tree.pxd +492 -0
- lxml/includes/uri.pxd +5 -0
- lxml/includes/xinclude.pxd +22 -0
- lxml/includes/xmlerror.pxd +852 -0
- lxml/includes/xmlparser.pxd +303 -0
- lxml/includes/xmlschema.pxd +35 -0
- lxml/includes/xpath.pxd +136 -0
- lxml/includes/xslt.pxd +190 -0
- lxml/isoschematron/__init__.py +348 -0
- lxml/isoschematron/resources/rng/iso-schematron.rng +709 -0
- lxml/isoschematron/resources/xsl/RNG2Schtrn.xsl +75 -0
- lxml/isoschematron/resources/xsl/XSD2Schtrn.xsl +77 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_abstract_expand.xsl +313 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_dsdl_include.xsl +1160 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_message.xsl +55 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_skeleton_for_xslt1.xsl +1796 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_svrl_for_xslt1.xsl +588 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt +84 -0
- lxml/iterparse.pxi +438 -0
- lxml/lxml.etree.h +244 -0
- lxml/lxml.etree_api.h +204 -0
- lxml/nsclasses.pxi +281 -0
- lxml/objectify.cpython-310-arm-linux-gnueabihf.so +0 -0
- lxml/objectify.pyx +2149 -0
- lxml/objectpath.pxi +332 -0
- lxml/parser.pxi +2059 -0
- lxml/parsertarget.pxi +180 -0
- lxml/proxy.pxi +619 -0
- lxml/public-api.pxi +178 -0
- lxml/pyclasslookup.py +3 -0
- lxml/readonlytree.pxi +565 -0
- lxml/relaxng.pxi +165 -0
- lxml/sax.cpython-310-arm-linux-gnueabihf.so +0 -0
- lxml/sax.py +286 -0
- lxml/saxparser.pxi +875 -0
- lxml/schematron.pxi +173 -0
- lxml/serializer.pxi +1849 -0
- lxml/usedoctest.py +13 -0
- lxml/xinclude.pxi +67 -0
- lxml/xmlerror.pxi +1654 -0
- lxml/xmlid.pxi +179 -0
- lxml/xmlschema.pxi +215 -0
- lxml/xpath.pxi +487 -0
- lxml/xslt.pxi +957 -0
- lxml/xsltext.pxi +242 -0
- lxml-6.0.0.dist-info/METADATA +163 -0
- lxml-6.0.0.dist-info/RECORD +174 -0
- lxml-6.0.0.dist-info/WHEEL +5 -0
- lxml-6.0.0.dist-info/licenses/LICENSE.txt +31 -0
- lxml-6.0.0.dist-info/licenses/LICENSES.txt +29 -0
- lxml-6.0.0.dist-info/top_level.txt +1 -0
lxml/objectify.pyx
ADDED
@@ -0,0 +1,2149 @@
|
|
1
|
+
# cython: binding=True
|
2
|
+
# cython: auto_pickle=False
|
3
|
+
# cython: language_level=3
|
4
|
+
|
5
|
+
"""
|
6
|
+
The ``lxml.objectify`` module implements a Python object API for XML.
|
7
|
+
It is based on `lxml.etree`.
|
8
|
+
"""
|
9
|
+
|
10
|
+
cimport cython
|
11
|
+
|
12
|
+
from lxml.includes.etreepublic cimport _Document, _Element, ElementBase, ElementClassLookup
|
13
|
+
from lxml.includes.etreepublic cimport elementFactory, import_lxml__etree, textOf, pyunicode
|
14
|
+
from lxml.includes.tree cimport const_xmlChar, _xcstr
|
15
|
+
from lxml cimport python
|
16
|
+
from lxml.includes cimport tree
|
17
|
+
|
18
|
+
cimport lxml.includes.etreepublic as cetree
|
19
|
+
cimport libc.string as cstring_h # not to be confused with stdlib 'string'
|
20
|
+
from libc.string cimport const_char
|
21
|
+
from libc cimport limits
|
22
|
+
|
23
|
+
__all__ = ['BoolElement', 'DataElement', 'E', 'Element', 'ElementMaker',
|
24
|
+
'FloatElement', 'IntElement', 'NoneElement',
|
25
|
+
'NumberElement', 'ObjectPath', 'ObjectifiedDataElement',
|
26
|
+
'ObjectifiedElement', 'ObjectifyElementClassLookup',
|
27
|
+
'PYTYPE_ATTRIBUTE', 'PyType', 'StringElement', 'SubElement',
|
28
|
+
'XML', 'annotate', 'deannotate', 'dump', 'enable_recursive_str',
|
29
|
+
'fromstring', 'getRegisteredTypes', 'makeparser', 'parse',
|
30
|
+
'pyannotate', 'pytypename', 'set_default_parser',
|
31
|
+
'set_pytype_attribute_tag', 'xsiannotate']
|
32
|
+
|
33
|
+
cdef object etree
|
34
|
+
from lxml import etree
|
35
|
+
# initialize C-API of lxml.etree
|
36
|
+
import_lxml__etree()
|
37
|
+
|
38
|
+
__version__ = etree.__version__
|
39
|
+
|
40
|
+
cdef object _float_is_inf, _float_is_nan
|
41
|
+
from math import isinf as _float_is_inf, isnan as _float_is_nan
|
42
|
+
|
43
|
+
cdef object re
|
44
|
+
import re
|
45
|
+
|
46
|
+
cdef tuple IGNORABLE_ERRORS = (ValueError, TypeError)
|
47
|
+
cdef object is_special_method = re.compile('__.*__$').match
|
48
|
+
|
49
|
+
|
50
|
+
cdef object _typename(object t):
|
51
|
+
cdef const_char* c_name
|
52
|
+
c_name = python._fqtypename(t)
|
53
|
+
s = cstring_h.strrchr(c_name, c'.')
|
54
|
+
if s is not NULL:
|
55
|
+
c_name = s + 1
|
56
|
+
return pyunicode(<const_xmlChar*>c_name)
|
57
|
+
|
58
|
+
|
59
|
+
# namespace/name for "pytype" hint attribute
|
60
|
+
cdef object PYTYPE_NAMESPACE
|
61
|
+
cdef bytes PYTYPE_NAMESPACE_UTF8
|
62
|
+
cdef const_xmlChar* _PYTYPE_NAMESPACE
|
63
|
+
|
64
|
+
cdef object PYTYPE_ATTRIBUTE_NAME
|
65
|
+
cdef bytes PYTYPE_ATTRIBUTE_NAME_UTF8
|
66
|
+
cdef const_xmlChar* _PYTYPE_ATTRIBUTE_NAME
|
67
|
+
|
68
|
+
PYTYPE_ATTRIBUTE = None
|
69
|
+
|
70
|
+
cdef unicode TREE_PYTYPE_NAME = "TREE"
|
71
|
+
|
72
|
+
cdef tuple _unicodeAndUtf8(s):
|
73
|
+
return s, python.PyUnicode_AsUTF8String(s)
|
74
|
+
|
75
|
+
def set_pytype_attribute_tag(attribute_tag=None):
|
76
|
+
"""set_pytype_attribute_tag(attribute_tag=None)
|
77
|
+
Change name and namespace of the XML attribute that holds Python type
|
78
|
+
information.
|
79
|
+
|
80
|
+
Do not use this unless you know what you are doing.
|
81
|
+
|
82
|
+
Reset by calling without argument.
|
83
|
+
|
84
|
+
Default: "{http://codespeak.net/lxml/objectify/pytype}pytype"
|
85
|
+
"""
|
86
|
+
global PYTYPE_ATTRIBUTE, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME
|
87
|
+
global PYTYPE_NAMESPACE, PYTYPE_NAMESPACE_UTF8
|
88
|
+
global PYTYPE_ATTRIBUTE_NAME, PYTYPE_ATTRIBUTE_NAME_UTF8
|
89
|
+
if attribute_tag is None:
|
90
|
+
PYTYPE_NAMESPACE, PYTYPE_NAMESPACE_UTF8 = \
|
91
|
+
_unicodeAndUtf8("http://codespeak.net/lxml/objectify/pytype")
|
92
|
+
PYTYPE_ATTRIBUTE_NAME, PYTYPE_ATTRIBUTE_NAME_UTF8 = \
|
93
|
+
_unicodeAndUtf8("pytype")
|
94
|
+
else:
|
95
|
+
PYTYPE_NAMESPACE_UTF8, PYTYPE_ATTRIBUTE_NAME_UTF8 = \
|
96
|
+
cetree.getNsTag(attribute_tag)
|
97
|
+
PYTYPE_NAMESPACE = PYTYPE_NAMESPACE_UTF8.decode('utf8')
|
98
|
+
PYTYPE_ATTRIBUTE_NAME = PYTYPE_ATTRIBUTE_NAME_UTF8.decode('utf8')
|
99
|
+
|
100
|
+
_PYTYPE_NAMESPACE = PYTYPE_NAMESPACE_UTF8
|
101
|
+
_PYTYPE_ATTRIBUTE_NAME = PYTYPE_ATTRIBUTE_NAME_UTF8
|
102
|
+
PYTYPE_ATTRIBUTE = cetree.namespacedNameFromNsName(
|
103
|
+
_PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME)
|
104
|
+
|
105
|
+
set_pytype_attribute_tag()
|
106
|
+
|
107
|
+
|
108
|
+
# namespaces for XML Schema
|
109
|
+
cdef object XML_SCHEMA_NS, XML_SCHEMA_NS_UTF8
|
110
|
+
XML_SCHEMA_NS, XML_SCHEMA_NS_UTF8 = \
|
111
|
+
_unicodeAndUtf8("http://www.w3.org/2001/XMLSchema")
|
112
|
+
cdef const_xmlChar* _XML_SCHEMA_NS = _xcstr(XML_SCHEMA_NS_UTF8)
|
113
|
+
|
114
|
+
cdef object XML_SCHEMA_INSTANCE_NS, XML_SCHEMA_INSTANCE_NS_UTF8
|
115
|
+
XML_SCHEMA_INSTANCE_NS, XML_SCHEMA_INSTANCE_NS_UTF8 = \
|
116
|
+
_unicodeAndUtf8("http://www.w3.org/2001/XMLSchema-instance")
|
117
|
+
cdef const_xmlChar* _XML_SCHEMA_INSTANCE_NS = _xcstr(XML_SCHEMA_INSTANCE_NS_UTF8)
|
118
|
+
|
119
|
+
cdef object XML_SCHEMA_INSTANCE_NIL_ATTR = "{%s}nil" % XML_SCHEMA_INSTANCE_NS
|
120
|
+
cdef object XML_SCHEMA_INSTANCE_TYPE_ATTR = "{%s}type" % XML_SCHEMA_INSTANCE_NS
|
121
|
+
|
122
|
+
|
123
|
+
################################################################################
|
124
|
+
# Element class for the main API
|
125
|
+
|
126
|
+
cdef class ObjectifiedElement(ElementBase):
|
127
|
+
"""Main XML Element class.
|
128
|
+
|
129
|
+
Element children are accessed as object attributes. Multiple children
|
130
|
+
with the same name are available through a list index. Example::
|
131
|
+
|
132
|
+
>>> root = XML("<root><c1><c2>0</c2><c2>1</c2></c1></root>")
|
133
|
+
>>> second_c2 = root.c1.c2[1]
|
134
|
+
>>> print(second_c2.text)
|
135
|
+
1
|
136
|
+
|
137
|
+
Note that you cannot (and must not) instantiate this class or its
|
138
|
+
subclasses.
|
139
|
+
"""
|
140
|
+
def __iter__(self):
|
141
|
+
"""Iterate over self and all siblings with the same tag.
|
142
|
+
"""
|
143
|
+
parent = self.getparent()
|
144
|
+
if parent is None:
|
145
|
+
return iter([self])
|
146
|
+
return etree.ElementChildIterator(parent, tag=self.tag)
|
147
|
+
|
148
|
+
def __str__(self):
|
149
|
+
if __RECURSIVE_STR:
|
150
|
+
return _dump(self, 0)
|
151
|
+
else:
|
152
|
+
return textOf(self._c_node) or ''
|
153
|
+
|
154
|
+
# pickle support for objectified Element
|
155
|
+
def __reduce__(self):
|
156
|
+
return fromstring, (etree.tostring(self),)
|
157
|
+
|
158
|
+
@property
|
159
|
+
def text(self):
|
160
|
+
return textOf(self._c_node)
|
161
|
+
|
162
|
+
@property
|
163
|
+
def __dict__(self):
|
164
|
+
"""A fake implementation for __dict__ to support dir() etc.
|
165
|
+
|
166
|
+
Note that this only considers the first child with a given name.
|
167
|
+
"""
|
168
|
+
cdef _Element child
|
169
|
+
cdef dict children
|
170
|
+
c_ns = tree._getNs(self._c_node)
|
171
|
+
tag = "{%s}*" % pyunicode(c_ns) if c_ns is not NULL else None
|
172
|
+
children = {}
|
173
|
+
for child in etree.ElementChildIterator(self, tag=tag):
|
174
|
+
if c_ns is NULL and tree._getNs(child._c_node) is not NULL:
|
175
|
+
continue
|
176
|
+
name = pyunicode(child._c_node.name)
|
177
|
+
if name not in children:
|
178
|
+
children[name] = child
|
179
|
+
return children
|
180
|
+
|
181
|
+
def __len__(self):
|
182
|
+
"""Count self and siblings with the same tag.
|
183
|
+
"""
|
184
|
+
return _countSiblings(self._c_node)
|
185
|
+
|
186
|
+
def countchildren(self):
|
187
|
+
"""countchildren(self)
|
188
|
+
|
189
|
+
Return the number of children of this element, regardless of their
|
190
|
+
name.
|
191
|
+
"""
|
192
|
+
# copied from etree
|
193
|
+
cdef Py_ssize_t c
|
194
|
+
cdef tree.xmlNode* c_node
|
195
|
+
c = 0
|
196
|
+
c_node = self._c_node.children
|
197
|
+
while c_node is not NULL:
|
198
|
+
if tree._isElement(c_node):
|
199
|
+
c += 1
|
200
|
+
c_node = c_node.next
|
201
|
+
return c
|
202
|
+
|
203
|
+
def getchildren(self):
|
204
|
+
"""getchildren(self)
|
205
|
+
|
206
|
+
Returns a sequence of all direct children. The elements are
|
207
|
+
returned in document order.
|
208
|
+
"""
|
209
|
+
cdef tree.xmlNode* c_node
|
210
|
+
result = []
|
211
|
+
c_node = self._c_node.children
|
212
|
+
while c_node is not NULL:
|
213
|
+
if tree._isElement(c_node):
|
214
|
+
result.append(cetree.elementFactory(self._doc, c_node))
|
215
|
+
c_node = c_node.next
|
216
|
+
return result
|
217
|
+
|
218
|
+
def __getattr__(self, tag):
|
219
|
+
"""Return the (first) child with the given tag name. If no namespace
|
220
|
+
is provided, the child will be looked up in the same one as self.
|
221
|
+
"""
|
222
|
+
return _lookupChildOrRaise(self, tag)
|
223
|
+
|
224
|
+
def __setattr__(self, tag, value):
|
225
|
+
"""Set the value of the (first) child with the given tag name. If no
|
226
|
+
namespace is provided, the child will be looked up in the same one as
|
227
|
+
self.
|
228
|
+
"""
|
229
|
+
cdef _Element element
|
230
|
+
# properties are looked up /after/ __setattr__, so we must emulate them
|
231
|
+
if tag == 'text' or tag == 'pyval':
|
232
|
+
# read-only !
|
233
|
+
raise TypeError, f"attribute '{tag}' of '{_typename(self)}' objects is not writable"
|
234
|
+
elif tag == 'tail':
|
235
|
+
cetree.setTailText(self._c_node, value)
|
236
|
+
return
|
237
|
+
elif tag == 'tag':
|
238
|
+
ElementBase.tag.__set__(self, value)
|
239
|
+
return
|
240
|
+
elif tag == 'base':
|
241
|
+
ElementBase.base.__set__(self, value)
|
242
|
+
return
|
243
|
+
tag = _buildChildTag(self, tag)
|
244
|
+
element = _lookupChild(self, tag)
|
245
|
+
if element is None:
|
246
|
+
_appendValue(self, tag, value)
|
247
|
+
else:
|
248
|
+
_replaceElement(element, value)
|
249
|
+
|
250
|
+
def __delattr__(self, tag):
|
251
|
+
child = _lookupChildOrRaise(self, tag)
|
252
|
+
self.remove(child)
|
253
|
+
|
254
|
+
def addattr(self, tag, value):
|
255
|
+
"""addattr(self, tag, value)
|
256
|
+
|
257
|
+
Add a child value to the element.
|
258
|
+
|
259
|
+
As opposed to append(), it sets a data value, not an element.
|
260
|
+
"""
|
261
|
+
_appendValue(self, _buildChildTag(self, tag), value)
|
262
|
+
|
263
|
+
def __getitem__(self, key):
|
264
|
+
"""Return a sibling, counting from the first child of the parent. The
|
265
|
+
method behaves like both a dict and a sequence.
|
266
|
+
|
267
|
+
* If argument is an integer, returns the sibling at that position.
|
268
|
+
|
269
|
+
* If argument is a string, does the same as getattr(). This can be
|
270
|
+
used to provide namespaces for element lookup, or to look up
|
271
|
+
children with special names (``text`` etc.).
|
272
|
+
|
273
|
+
* If argument is a slice object, returns the matching slice.
|
274
|
+
"""
|
275
|
+
cdef tree.xmlNode* c_self_node
|
276
|
+
cdef tree.xmlNode* c_parent
|
277
|
+
cdef tree.xmlNode* c_node
|
278
|
+
cdef Py_ssize_t c_index
|
279
|
+
if python._isString(key):
|
280
|
+
return _lookupChildOrRaise(self, key)
|
281
|
+
elif isinstance(key, slice):
|
282
|
+
return list(self)[key]
|
283
|
+
# normal item access
|
284
|
+
c_index = key # raises TypeError if necessary
|
285
|
+
c_self_node = self._c_node
|
286
|
+
c_parent = c_self_node.parent
|
287
|
+
if c_parent is NULL:
|
288
|
+
if c_index == 0 or c_index == -1:
|
289
|
+
return self
|
290
|
+
raise IndexError, unicode(key)
|
291
|
+
if c_index < 0:
|
292
|
+
c_node = c_parent.last
|
293
|
+
else:
|
294
|
+
c_node = c_parent.children
|
295
|
+
c_node = _findFollowingSibling(
|
296
|
+
c_node, tree._getNs(c_self_node), c_self_node.name, c_index)
|
297
|
+
if c_node is NULL:
|
298
|
+
raise IndexError, unicode(key)
|
299
|
+
return elementFactory(self._doc, c_node)
|
300
|
+
|
301
|
+
def __setitem__(self, key, value):
|
302
|
+
"""Set the value of a sibling, counting from the first child of the
|
303
|
+
parent. Implements key assignment, item assignment and slice
|
304
|
+
assignment.
|
305
|
+
|
306
|
+
* If argument is an integer, sets the sibling at that position.
|
307
|
+
|
308
|
+
* If argument is a string, does the same as setattr(). This is used
|
309
|
+
to provide namespaces for element lookup.
|
310
|
+
|
311
|
+
* If argument is a sequence (list, tuple, etc.), assign the contained
|
312
|
+
items to the siblings.
|
313
|
+
"""
|
314
|
+
cdef _Element element
|
315
|
+
cdef tree.xmlNode* c_node
|
316
|
+
if python._isString(key):
|
317
|
+
key = _buildChildTag(self, key)
|
318
|
+
element = _lookupChild(self, key)
|
319
|
+
if element is None:
|
320
|
+
_appendValue(self, key, value)
|
321
|
+
else:
|
322
|
+
_replaceElement(element, value)
|
323
|
+
return
|
324
|
+
|
325
|
+
if self._c_node.parent is NULL:
|
326
|
+
# the 'root[i] = ...' case
|
327
|
+
raise TypeError, "assignment to root element is invalid"
|
328
|
+
|
329
|
+
if isinstance(key, slice):
|
330
|
+
# slice assignment
|
331
|
+
_setSlice(key, self, value)
|
332
|
+
else:
|
333
|
+
# normal index assignment
|
334
|
+
if key < 0:
|
335
|
+
c_node = self._c_node.parent.last
|
336
|
+
else:
|
337
|
+
c_node = self._c_node.parent.children
|
338
|
+
c_node = _findFollowingSibling(
|
339
|
+
c_node, tree._getNs(self._c_node), self._c_node.name, key)
|
340
|
+
if c_node is NULL:
|
341
|
+
raise IndexError, unicode(key)
|
342
|
+
element = elementFactory(self._doc, c_node)
|
343
|
+
_replaceElement(element, value)
|
344
|
+
|
345
|
+
def __delitem__(self, key):
|
346
|
+
parent = self.getparent()
|
347
|
+
if parent is None:
|
348
|
+
raise TypeError, "deleting items not supported by root element"
|
349
|
+
if isinstance(key, slice):
|
350
|
+
# slice deletion
|
351
|
+
del_items = list(self)[key]
|
352
|
+
remove = parent.remove
|
353
|
+
for el in del_items:
|
354
|
+
remove(el)
|
355
|
+
else:
|
356
|
+
# normal index deletion
|
357
|
+
sibling = self.__getitem__(key)
|
358
|
+
parent.remove(sibling)
|
359
|
+
|
360
|
+
def descendantpaths(self, prefix=None):
|
361
|
+
"""descendantpaths(self, prefix=None)
|
362
|
+
|
363
|
+
Returns a list of object path expressions for all descendants.
|
364
|
+
"""
|
365
|
+
if prefix is not None and not python._isString(prefix):
|
366
|
+
prefix = '.'.join(prefix)
|
367
|
+
return _build_descendant_paths(self._c_node, prefix)
|
368
|
+
|
369
|
+
|
370
|
+
cdef inline bint _tagMatches(tree.xmlNode* c_node, const_xmlChar* c_href, const_xmlChar* c_name):
|
371
|
+
if c_node.name != c_name:
|
372
|
+
return 0
|
373
|
+
if c_href == NULL:
|
374
|
+
return 1
|
375
|
+
c_node_href = tree._getNs(c_node)
|
376
|
+
if c_node_href == NULL:
|
377
|
+
return c_href[0] == c'\0'
|
378
|
+
return tree.xmlStrcmp(c_node_href, c_href) == 0
|
379
|
+
|
380
|
+
|
381
|
+
cdef Py_ssize_t _countSiblings(tree.xmlNode* c_start_node):
|
382
|
+
cdef tree.xmlNode* c_node
|
383
|
+
cdef Py_ssize_t count
|
384
|
+
c_tag = c_start_node.name
|
385
|
+
c_href = tree._getNs(c_start_node)
|
386
|
+
count = 1
|
387
|
+
c_node = c_start_node.next
|
388
|
+
while c_node is not NULL:
|
389
|
+
if c_node.type == tree.XML_ELEMENT_NODE and \
|
390
|
+
_tagMatches(c_node, c_href, c_tag):
|
391
|
+
count += 1
|
392
|
+
c_node = c_node.next
|
393
|
+
c_node = c_start_node.prev
|
394
|
+
while c_node is not NULL:
|
395
|
+
if c_node.type == tree.XML_ELEMENT_NODE and \
|
396
|
+
_tagMatches(c_node, c_href, c_tag):
|
397
|
+
count += 1
|
398
|
+
c_node = c_node.prev
|
399
|
+
return count
|
400
|
+
|
401
|
+
cdef tree.xmlNode* _findFollowingSibling(tree.xmlNode* c_node,
|
402
|
+
const_xmlChar* href, const_xmlChar* name,
|
403
|
+
Py_ssize_t index):
|
404
|
+
cdef tree.xmlNode* (*next)(tree.xmlNode*)
|
405
|
+
if index >= 0:
|
406
|
+
next = cetree.nextElement
|
407
|
+
else:
|
408
|
+
index = -1 - index
|
409
|
+
next = cetree.previousElement
|
410
|
+
while c_node is not NULL:
|
411
|
+
if c_node.type == tree.XML_ELEMENT_NODE and \
|
412
|
+
_tagMatches(c_node, href, name):
|
413
|
+
index = index - 1
|
414
|
+
if index < 0:
|
415
|
+
return c_node
|
416
|
+
c_node = next(c_node)
|
417
|
+
return NULL
|
418
|
+
|
419
|
+
cdef object _lookupChild(_Element parent, tag):
|
420
|
+
cdef tree.xmlNode* c_result
|
421
|
+
cdef tree.xmlNode* c_node
|
422
|
+
c_node = parent._c_node
|
423
|
+
ns, tag = cetree.getNsTagWithEmptyNs(tag)
|
424
|
+
c_tag_len = len(<bytes> tag)
|
425
|
+
if c_tag_len > limits.INT_MAX:
|
426
|
+
return None
|
427
|
+
c_tag = tree.xmlDictExists(
|
428
|
+
c_node.doc.dict, _xcstr(tag), <int> c_tag_len)
|
429
|
+
if c_tag is NULL:
|
430
|
+
return None # not in the hash map => not in the tree
|
431
|
+
if ns is None:
|
432
|
+
# either inherit ns from parent or use empty (i.e. no) namespace
|
433
|
+
c_href = tree._getNs(c_node) or <const_xmlChar*>''
|
434
|
+
else:
|
435
|
+
c_href = _xcstr(ns)
|
436
|
+
c_result = _findFollowingSibling(c_node.children, c_href, c_tag, 0)
|
437
|
+
if c_result is NULL:
|
438
|
+
return None
|
439
|
+
return elementFactory(parent._doc, c_result)
|
440
|
+
|
441
|
+
cdef object _lookupChildOrRaise(_Element parent, tag):
|
442
|
+
element = _lookupChild(parent, tag)
|
443
|
+
if element is None:
|
444
|
+
raise AttributeError, "no such child: " + _buildChildTag(parent, tag)
|
445
|
+
return element
|
446
|
+
|
447
|
+
cdef object _buildChildTag(_Element parent, tag):
|
448
|
+
ns, tag = cetree.getNsTag(tag)
|
449
|
+
c_tag = _xcstr(tag)
|
450
|
+
c_href = tree._getNs(parent._c_node) if ns is None else _xcstr(ns)
|
451
|
+
return cetree.namespacedNameFromNsName(c_href, c_tag)
|
452
|
+
|
453
|
+
cdef _replaceElement(_Element element, value):
|
454
|
+
cdef _Element new_element
|
455
|
+
if isinstance(value, _Element):
|
456
|
+
# deep copy the new element
|
457
|
+
new_element = cetree.deepcopyNodeToDocument(
|
458
|
+
element._doc, (<_Element>value)._c_node)
|
459
|
+
new_element.tag = element.tag
|
460
|
+
elif isinstance(value, (list, tuple)):
|
461
|
+
element[:] = value
|
462
|
+
return
|
463
|
+
else:
|
464
|
+
new_element = element.makeelement(element.tag)
|
465
|
+
_setElementValue(new_element, value)
|
466
|
+
element.getparent().replace(element, new_element)
|
467
|
+
|
468
|
+
cdef _appendValue(_Element parent, tag, value):
|
469
|
+
cdef _Element new_element
|
470
|
+
if isinstance(value, _Element):
|
471
|
+
# deep copy the new element
|
472
|
+
new_element = cetree.deepcopyNodeToDocument(
|
473
|
+
parent._doc, (<_Element>value)._c_node)
|
474
|
+
new_element.tag = tag
|
475
|
+
cetree.appendChildToElement(parent, new_element)
|
476
|
+
elif isinstance(value, (list, tuple)):
|
477
|
+
for item in value:
|
478
|
+
_appendValue(parent, tag, item)
|
479
|
+
else:
|
480
|
+
new_element = cetree.makeElement(
|
481
|
+
tag, parent._doc, None, None, None, None, None)
|
482
|
+
_setElementValue(new_element, value)
|
483
|
+
cetree.appendChildToElement(parent, new_element)
|
484
|
+
|
485
|
+
cdef _setElementValue(_Element element, value):
|
486
|
+
if value is None:
|
487
|
+
cetree.setAttributeValue(
|
488
|
+
element, XML_SCHEMA_INSTANCE_NIL_ATTR, "true")
|
489
|
+
elif isinstance(value, _Element):
|
490
|
+
_replaceElement(element, value)
|
491
|
+
return
|
492
|
+
else:
|
493
|
+
cetree.delAttributeFromNsName(
|
494
|
+
element._c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"nil")
|
495
|
+
if python._isString(value):
|
496
|
+
pytype_name = "str"
|
497
|
+
py_type = <PyType>_PYTYPE_DICT.get(pytype_name)
|
498
|
+
else:
|
499
|
+
pytype_name = _typename(value)
|
500
|
+
py_type = <PyType>_PYTYPE_DICT.get(pytype_name)
|
501
|
+
if py_type is not None:
|
502
|
+
value = py_type.stringify(value)
|
503
|
+
else:
|
504
|
+
value = unicode(value)
|
505
|
+
if py_type is not None:
|
506
|
+
cetree.setAttributeValue(element, PYTYPE_ATTRIBUTE, pytype_name)
|
507
|
+
else:
|
508
|
+
cetree.delAttributeFromNsName(
|
509
|
+
element._c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME)
|
510
|
+
cetree.setNodeText(element._c_node, value)
|
511
|
+
|
512
|
+
cdef _setSlice(sliceobject, _Element target, items):
|
513
|
+
cdef _Element parent
|
514
|
+
cdef tree.xmlNode* c_node
|
515
|
+
cdef Py_ssize_t c_step, c_start, pos
|
516
|
+
# collect existing slice
|
517
|
+
if (<slice>sliceobject).step is None:
|
518
|
+
c_step = 1
|
519
|
+
else:
|
520
|
+
c_step = (<slice>sliceobject).step
|
521
|
+
if c_step == 0:
|
522
|
+
raise ValueError, "Invalid slice"
|
523
|
+
cdef list del_items = target[sliceobject]
|
524
|
+
|
525
|
+
# collect new values
|
526
|
+
new_items = []
|
527
|
+
tag = target.tag
|
528
|
+
for item in items:
|
529
|
+
if isinstance(item, _Element):
|
530
|
+
# deep copy the new element
|
531
|
+
new_element = cetree.deepcopyNodeToDocument(
|
532
|
+
target._doc, (<_Element>item)._c_node)
|
533
|
+
new_element.tag = tag
|
534
|
+
else:
|
535
|
+
new_element = cetree.makeElement(
|
536
|
+
tag, target._doc, None, None, None, None, None)
|
537
|
+
_setElementValue(new_element, item)
|
538
|
+
new_items.append(new_element)
|
539
|
+
|
540
|
+
# sanity check - raise what a list would raise
|
541
|
+
if c_step != 1 and len(del_items) != len(new_items):
|
542
|
+
raise ValueError, \
|
543
|
+
f"attempt to assign sequence of size {len(new_items)} to extended slice of size {len(del_items)}"
|
544
|
+
|
545
|
+
# replace existing items
|
546
|
+
pos = 0
|
547
|
+
parent = target.getparent()
|
548
|
+
replace = parent.replace
|
549
|
+
while pos < len(new_items) and pos < len(del_items):
|
550
|
+
replace(del_items[pos], new_items[pos])
|
551
|
+
pos += 1
|
552
|
+
# remove leftover items
|
553
|
+
if pos < len(del_items):
|
554
|
+
remove = parent.remove
|
555
|
+
while pos < len(del_items):
|
556
|
+
remove(del_items[pos])
|
557
|
+
pos += 1
|
558
|
+
# append remaining new items
|
559
|
+
if pos < len(new_items):
|
560
|
+
# the sanity check above guarantees (step == 1)
|
561
|
+
if pos > 0:
|
562
|
+
item = new_items[pos-1]
|
563
|
+
else:
|
564
|
+
if (<slice>sliceobject).start > 0:
|
565
|
+
c_node = parent._c_node.children
|
566
|
+
else:
|
567
|
+
c_node = parent._c_node.last
|
568
|
+
c_node = _findFollowingSibling(
|
569
|
+
c_node, tree._getNs(target._c_node), target._c_node.name,
|
570
|
+
(<slice>sliceobject).start - 1)
|
571
|
+
if c_node is NULL:
|
572
|
+
while pos < len(new_items):
|
573
|
+
cetree.appendChildToElement(parent, new_items[pos])
|
574
|
+
pos += 1
|
575
|
+
return
|
576
|
+
item = cetree.elementFactory(parent._doc, c_node)
|
577
|
+
while pos < len(new_items):
|
578
|
+
add = item.addnext
|
579
|
+
item = new_items[pos]
|
580
|
+
add(item)
|
581
|
+
pos += 1
|
582
|
+
|
583
|
+
################################################################################
|
584
|
+
# Data type support in subclasses
|
585
|
+
|
586
|
+
cdef class ObjectifiedDataElement(ObjectifiedElement):
|
587
|
+
"""This is the base class for all data type Elements. Subclasses should
|
588
|
+
override the 'pyval' property and possibly the __str__ method.
|
589
|
+
"""
|
590
|
+
@property
|
591
|
+
def pyval(self):
|
592
|
+
return textOf(self._c_node)
|
593
|
+
|
594
|
+
def __str__(self):
|
595
|
+
return textOf(self._c_node) or ''
|
596
|
+
|
597
|
+
def __repr__(self):
|
598
|
+
return textOf(self._c_node) or ''
|
599
|
+
|
600
|
+
def _setText(self, s):
|
601
|
+
"""For use in subclasses only. Don't use unless you know what you are
|
602
|
+
doing.
|
603
|
+
"""
|
604
|
+
cetree.setNodeText(self._c_node, s)
|
605
|
+
|
606
|
+
|
607
|
+
cdef class NumberElement(ObjectifiedDataElement):
|
608
|
+
cdef object _parse_value
|
609
|
+
|
610
|
+
def _setValueParser(self, function):
|
611
|
+
"""Set the function that parses the Python value from a string.
|
612
|
+
|
613
|
+
Do not use this unless you know what you are doing.
|
614
|
+
"""
|
615
|
+
self._parse_value = function
|
616
|
+
|
617
|
+
@property
|
618
|
+
def pyval(self):
|
619
|
+
return _parseNumber(self)
|
620
|
+
|
621
|
+
def __int__(self):
|
622
|
+
return int(_parseNumber(self))
|
623
|
+
|
624
|
+
def __float__(self):
|
625
|
+
return float(_parseNumber(self))
|
626
|
+
|
627
|
+
def __complex__(self):
|
628
|
+
return complex(_parseNumber(self))
|
629
|
+
|
630
|
+
def __str__(self):
|
631
|
+
return unicode(_parseNumber(self))
|
632
|
+
|
633
|
+
def __repr__(self):
|
634
|
+
return repr(_parseNumber(self))
|
635
|
+
|
636
|
+
def __oct__(self):
|
637
|
+
return oct(_parseNumber(self))
|
638
|
+
|
639
|
+
def __hex__(self):
|
640
|
+
return hex(_parseNumber(self))
|
641
|
+
|
642
|
+
def __richcmp__(self, other, int op):
|
643
|
+
return _richcmpPyvals(self, other, op)
|
644
|
+
|
645
|
+
def __hash__(self):
|
646
|
+
return hash(_parseNumber(self))
|
647
|
+
|
648
|
+
def __add__(self, other):
|
649
|
+
return _numericValueOf(self) + _numericValueOf(other)
|
650
|
+
|
651
|
+
def __radd__(self, other):
|
652
|
+
return _numericValueOf(other) + _numericValueOf(self)
|
653
|
+
|
654
|
+
def __sub__(self, other):
|
655
|
+
return _numericValueOf(self) - _numericValueOf(other)
|
656
|
+
|
657
|
+
def __rsub__(self, other):
|
658
|
+
return _numericValueOf(other) - _numericValueOf(self)
|
659
|
+
|
660
|
+
def __mul__(self, other):
|
661
|
+
return _numericValueOf(self) * _numericValueOf(other)
|
662
|
+
|
663
|
+
def __rmul__(self, other):
|
664
|
+
return _numericValueOf(other) * _numericValueOf(self)
|
665
|
+
|
666
|
+
def __div__(self, other):
|
667
|
+
return _numericValueOf(self) / _numericValueOf(other)
|
668
|
+
|
669
|
+
def __rdiv__(self, other):
|
670
|
+
return _numericValueOf(other) / _numericValueOf(self)
|
671
|
+
|
672
|
+
def __truediv__(self, other):
|
673
|
+
return _numericValueOf(self) / _numericValueOf(other)
|
674
|
+
|
675
|
+
def __rtruediv__(self, other):
|
676
|
+
return _numericValueOf(other) / _numericValueOf(self)
|
677
|
+
|
678
|
+
def __floordiv__(self, other):
|
679
|
+
return _numericValueOf(self) // _numericValueOf(other)
|
680
|
+
|
681
|
+
def __rfloordiv__(self, other):
|
682
|
+
return _numericValueOf(other) // _numericValueOf(self)
|
683
|
+
|
684
|
+
def __mod__(self, other):
|
685
|
+
return _numericValueOf(self) % _numericValueOf(other)
|
686
|
+
|
687
|
+
def __rmod__(self, other):
|
688
|
+
return _numericValueOf(other) % _numericValueOf(self)
|
689
|
+
|
690
|
+
def __divmod__(self, other):
|
691
|
+
return divmod(_numericValueOf(self), _numericValueOf(other))
|
692
|
+
|
693
|
+
def __rdivmod__(self, other):
|
694
|
+
return divmod(_numericValueOf(other), _numericValueOf(self))
|
695
|
+
|
696
|
+
def __pow__(self, other, modulo):
|
697
|
+
if modulo is None:
|
698
|
+
return _numericValueOf(self) ** _numericValueOf(other)
|
699
|
+
else:
|
700
|
+
return pow(_numericValueOf(self), _numericValueOf(other), modulo)
|
701
|
+
|
702
|
+
def __rpow__(self, other, modulo):
|
703
|
+
if modulo is None:
|
704
|
+
return _numericValueOf(other) ** _numericValueOf(self)
|
705
|
+
else:
|
706
|
+
return pow(_numericValueOf(other), _numericValueOf(self), modulo)
|
707
|
+
|
708
|
+
def __neg__(self):
|
709
|
+
return - _numericValueOf(self)
|
710
|
+
|
711
|
+
def __pos__(self):
|
712
|
+
return + _numericValueOf(self)
|
713
|
+
|
714
|
+
def __abs__(self):
|
715
|
+
return abs( _numericValueOf(self) )
|
716
|
+
|
717
|
+
def __bool__(self):
|
718
|
+
return bool(_numericValueOf(self))
|
719
|
+
|
720
|
+
def __invert__(self):
|
721
|
+
return ~ _numericValueOf(self)
|
722
|
+
|
723
|
+
def __lshift__(self, other):
|
724
|
+
return _numericValueOf(self) << _numericValueOf(other)
|
725
|
+
|
726
|
+
def __rlshift__(self, other):
|
727
|
+
return _numericValueOf(other) << _numericValueOf(self)
|
728
|
+
|
729
|
+
def __rshift__(self, other):
|
730
|
+
return _numericValueOf(self) >> _numericValueOf(other)
|
731
|
+
|
732
|
+
def __rrshift__(self, other):
|
733
|
+
return _numericValueOf(other) >> _numericValueOf(self)
|
734
|
+
|
735
|
+
def __and__(self, other):
|
736
|
+
return _numericValueOf(self) & _numericValueOf(other)
|
737
|
+
|
738
|
+
def __rand__(self, other):
|
739
|
+
return _numericValueOf(other) & _numericValueOf(self)
|
740
|
+
|
741
|
+
def __or__(self, other):
|
742
|
+
return _numericValueOf(self) | _numericValueOf(other)
|
743
|
+
|
744
|
+
def __ror__(self, other):
|
745
|
+
return _numericValueOf(other) | _numericValueOf(self)
|
746
|
+
|
747
|
+
def __xor__(self, other):
|
748
|
+
return _numericValueOf(self) ^ _numericValueOf(other)
|
749
|
+
|
750
|
+
def __rxor__(self, other):
|
751
|
+
return _numericValueOf(other) ^ _numericValueOf(self)
|
752
|
+
|
753
|
+
|
754
|
+
cdef class IntElement(NumberElement):
|
755
|
+
def _init(self):
|
756
|
+
self._parse_value = int
|
757
|
+
|
758
|
+
def __index__(self):
|
759
|
+
return int(_parseNumber(self))
|
760
|
+
|
761
|
+
|
762
|
+
cdef class FloatElement(NumberElement):
|
763
|
+
def _init(self):
|
764
|
+
self._parse_value = float
|
765
|
+
|
766
|
+
|
767
|
+
cdef class StringElement(ObjectifiedDataElement):
|
768
|
+
"""String data class.
|
769
|
+
|
770
|
+
Note that this class does *not* support the sequence protocol of strings:
|
771
|
+
len(), iter(), str_attr[0], str_attr[0:1], etc. are *not* supported.
|
772
|
+
Instead, use the .text attribute to get a 'real' string.
|
773
|
+
"""
|
774
|
+
@property
|
775
|
+
def pyval(self):
|
776
|
+
return textOf(self._c_node) or ''
|
777
|
+
|
778
|
+
def __repr__(self):
|
779
|
+
return repr(textOf(self._c_node) or '')
|
780
|
+
|
781
|
+
def strlen(self):
|
782
|
+
text = textOf(self._c_node)
|
783
|
+
if text is None:
|
784
|
+
return 0
|
785
|
+
else:
|
786
|
+
return len(text)
|
787
|
+
|
788
|
+
def __bool__(self):
|
789
|
+
return bool(textOf(self._c_node))
|
790
|
+
|
791
|
+
def __richcmp__(self, other, int op):
|
792
|
+
return _richcmpPyvals(self, other, op)
|
793
|
+
|
794
|
+
def __hash__(self):
|
795
|
+
return hash(textOf(self._c_node) or '')
|
796
|
+
|
797
|
+
def __add__(self, other):
|
798
|
+
text = _strValueOf(self)
|
799
|
+
other = _strValueOf(other)
|
800
|
+
return text + other
|
801
|
+
|
802
|
+
def __radd__(self, other):
|
803
|
+
text = _strValueOf(self)
|
804
|
+
other = _strValueOf(other)
|
805
|
+
return other + text
|
806
|
+
|
807
|
+
def __mul__(self, other):
|
808
|
+
if isinstance(self, StringElement):
|
809
|
+
return (textOf((<StringElement>self)._c_node) or '') * _numericValueOf(other)
|
810
|
+
elif isinstance(other, StringElement):
|
811
|
+
return _numericValueOf(self) * (textOf((<StringElement>other)._c_node) or '')
|
812
|
+
else:
|
813
|
+
return NotImplemented
|
814
|
+
|
815
|
+
def __rmul__(self, other):
|
816
|
+
return _numericValueOf(other) * (textOf((<StringElement>self)._c_node) or '')
|
817
|
+
|
818
|
+
def __mod__(self, other):
|
819
|
+
return (_strValueOf(self) or '') % other
|
820
|
+
|
821
|
+
def __int__(self):
|
822
|
+
return int(textOf(self._c_node))
|
823
|
+
|
824
|
+
def __float__(self):
|
825
|
+
return float(textOf(self._c_node))
|
826
|
+
|
827
|
+
def __complex__(self):
|
828
|
+
return complex(textOf(self._c_node))
|
829
|
+
|
830
|
+
|
831
|
+
cdef class NoneElement(ObjectifiedDataElement):
|
832
|
+
def __str__(self):
|
833
|
+
return "None"
|
834
|
+
|
835
|
+
def __repr__(self):
|
836
|
+
return "None"
|
837
|
+
|
838
|
+
def __bool__(self):
|
839
|
+
return False
|
840
|
+
|
841
|
+
def __richcmp__(self, other, int op):
|
842
|
+
if other is None or self is None:
|
843
|
+
return python.PyObject_RichCompare(None, None, op)
|
844
|
+
if isinstance(self, NoneElement):
|
845
|
+
return python.PyObject_RichCompare(None, other, op)
|
846
|
+
else:
|
847
|
+
return python.PyObject_RichCompare(self, None, op)
|
848
|
+
|
849
|
+
def __hash__(self):
|
850
|
+
return hash(None)
|
851
|
+
|
852
|
+
@property
|
853
|
+
def pyval(self):
|
854
|
+
return None
|
855
|
+
|
856
|
+
|
857
|
+
cdef class BoolElement(IntElement):
|
858
|
+
"""Boolean type base on string values: 'true' or 'false'.
|
859
|
+
|
860
|
+
Note that this inherits from IntElement to mimic the behaviour of
|
861
|
+
Python's bool type.
|
862
|
+
"""
|
863
|
+
def _init(self):
|
864
|
+
self._parse_value = _parseBool # wraps as Python callable
|
865
|
+
|
866
|
+
def __bool__(self):
|
867
|
+
return _parseBool(textOf(self._c_node))
|
868
|
+
|
869
|
+
def __int__(self):
|
870
|
+
return 0 + _parseBool(textOf(self._c_node))
|
871
|
+
|
872
|
+
def __float__(self):
|
873
|
+
return 0.0 + _parseBool(textOf(self._c_node))
|
874
|
+
|
875
|
+
def __richcmp__(self, other, int op):
|
876
|
+
return _richcmpPyvals(self, other, op)
|
877
|
+
|
878
|
+
def __hash__(self):
|
879
|
+
return hash(_parseBool(textOf(self._c_node)))
|
880
|
+
|
881
|
+
def __str__(self):
|
882
|
+
return unicode(_parseBool(textOf(self._c_node)))
|
883
|
+
|
884
|
+
def __repr__(self):
|
885
|
+
return repr(_parseBool(textOf(self._c_node)))
|
886
|
+
|
887
|
+
@property
|
888
|
+
def pyval(self):
|
889
|
+
return _parseBool(textOf(self._c_node))
|
890
|
+
|
891
|
+
|
892
|
+
cdef _checkBool(s):
|
893
|
+
cdef int value = -1
|
894
|
+
if s is not None:
|
895
|
+
value = __parseBoolAsInt(s)
|
896
|
+
if value == -1:
|
897
|
+
raise ValueError
|
898
|
+
|
899
|
+
|
900
|
+
cdef bint _parseBool(s) except -1:
|
901
|
+
cdef int value
|
902
|
+
if s is None:
|
903
|
+
return False
|
904
|
+
value = __parseBoolAsInt(s)
|
905
|
+
if value == -1:
|
906
|
+
raise ValueError, f"Invalid boolean value: '{s}'"
|
907
|
+
return value
|
908
|
+
|
909
|
+
|
910
|
+
cdef inline int __parseBoolAsInt(text) except -2:
|
911
|
+
if text == 'false':
|
912
|
+
return 0
|
913
|
+
elif text == 'true':
|
914
|
+
return 1
|
915
|
+
elif text == '0':
|
916
|
+
return 0
|
917
|
+
elif text == '1':
|
918
|
+
return 1
|
919
|
+
return -1
|
920
|
+
|
921
|
+
|
922
|
+
cdef object _parseNumber(NumberElement element):
|
923
|
+
return element._parse_value(textOf(element._c_node))
|
924
|
+
|
925
|
+
|
926
|
+
cdef enum NumberParserState:
|
927
|
+
NPS_SPACE_PRE = 0
|
928
|
+
NPS_SIGN = 1
|
929
|
+
NPS_DIGITS = 2
|
930
|
+
NPS_POINT_LEAD = 3
|
931
|
+
NPS_POINT = 4
|
932
|
+
NPS_FRACTION = 5
|
933
|
+
NPS_EXP = 6
|
934
|
+
NPS_EXP_SIGN = 7
|
935
|
+
NPS_DIGITS_EXP = 8
|
936
|
+
NPS_SPACE_TAIL = 9
|
937
|
+
NPS_INF1 = 20
|
938
|
+
NPS_INF2 = 21
|
939
|
+
NPS_INF3 = 22
|
940
|
+
NPS_NAN1 = 23
|
941
|
+
NPS_NAN2 = 24
|
942
|
+
NPS_NAN3 = 25
|
943
|
+
NPS_ERROR = 99
|
944
|
+
|
945
|
+
|
946
|
+
ctypedef fused bytes_unicode:
|
947
|
+
bytes
|
948
|
+
unicode
|
949
|
+
|
950
|
+
|
951
|
+
cdef _checkNumber(bytes_unicode s, bint allow_float):
|
952
|
+
cdef Py_UCS4 c
|
953
|
+
cdef NumberParserState state = NPS_SPACE_PRE
|
954
|
+
|
955
|
+
for c in s:
|
956
|
+
if c in '0123456789':
|
957
|
+
if state in (NPS_DIGITS, NPS_FRACTION, NPS_DIGITS_EXP):
|
958
|
+
pass
|
959
|
+
elif state in (NPS_SPACE_PRE, NPS_SIGN):
|
960
|
+
state = NPS_DIGITS
|
961
|
+
elif state in (NPS_POINT_LEAD, NPS_POINT):
|
962
|
+
state = NPS_FRACTION
|
963
|
+
elif state in (NPS_EXP, NPS_EXP_SIGN):
|
964
|
+
state = NPS_DIGITS_EXP
|
965
|
+
else:
|
966
|
+
state = NPS_ERROR
|
967
|
+
else:
|
968
|
+
if c == '.':
|
969
|
+
if state in (NPS_SPACE_PRE, NPS_SIGN):
|
970
|
+
state = NPS_POINT_LEAD
|
971
|
+
elif state == NPS_DIGITS:
|
972
|
+
state = NPS_POINT
|
973
|
+
else:
|
974
|
+
state = NPS_ERROR
|
975
|
+
if not allow_float:
|
976
|
+
state = NPS_ERROR
|
977
|
+
elif c in '-+':
|
978
|
+
if state == NPS_SPACE_PRE:
|
979
|
+
state = NPS_SIGN
|
980
|
+
elif state == NPS_EXP:
|
981
|
+
state = NPS_EXP_SIGN
|
982
|
+
else:
|
983
|
+
state = NPS_ERROR
|
984
|
+
elif c == 'E':
|
985
|
+
if state in (NPS_DIGITS, NPS_POINT, NPS_FRACTION):
|
986
|
+
state = NPS_EXP
|
987
|
+
else:
|
988
|
+
state = NPS_ERROR
|
989
|
+
if not allow_float:
|
990
|
+
state = NPS_ERROR
|
991
|
+
# Allow INF and NaN. XMLSchema requires case, we don't, like Python.
|
992
|
+
elif c in 'iI':
|
993
|
+
state = NPS_INF1 if allow_float and state in (NPS_SPACE_PRE, NPS_SIGN) else NPS_ERROR
|
994
|
+
elif c in 'fF':
|
995
|
+
state = NPS_INF3 if state == NPS_INF2 else NPS_ERROR
|
996
|
+
elif c in 'aA':
|
997
|
+
state = NPS_NAN2 if state == NPS_NAN1 else NPS_ERROR
|
998
|
+
elif c in 'nN':
|
999
|
+
# Python also allows [+-]NaN, so let's accept that.
|
1000
|
+
if state in (NPS_SPACE_PRE, NPS_SIGN):
|
1001
|
+
state = NPS_NAN1 if allow_float else NPS_ERROR
|
1002
|
+
elif state == NPS_NAN2:
|
1003
|
+
state = NPS_NAN3
|
1004
|
+
elif state == NPS_INF1:
|
1005
|
+
state = NPS_INF2
|
1006
|
+
else:
|
1007
|
+
state = NPS_ERROR
|
1008
|
+
# Allow spaces around text values.
|
1009
|
+
else:
|
1010
|
+
if c.isspace() if (bytes_unicode is unicode) else c in b'\x09\x0a\x0b\x0c\x0d\x20':
|
1011
|
+
if state in (NPS_SPACE_PRE, NPS_SPACE_TAIL):
|
1012
|
+
pass
|
1013
|
+
elif state in (NPS_DIGITS, NPS_POINT, NPS_FRACTION, NPS_DIGITS_EXP, NPS_INF3, NPS_NAN3):
|
1014
|
+
state = NPS_SPACE_TAIL
|
1015
|
+
else:
|
1016
|
+
state = NPS_ERROR
|
1017
|
+
else:
|
1018
|
+
state = NPS_ERROR
|
1019
|
+
|
1020
|
+
if state == NPS_ERROR:
|
1021
|
+
break
|
1022
|
+
|
1023
|
+
if state not in (NPS_DIGITS, NPS_FRACTION, NPS_POINT, NPS_DIGITS_EXP, NPS_INF3, NPS_NAN3, NPS_SPACE_TAIL):
|
1024
|
+
raise ValueError
|
1025
|
+
|
1026
|
+
|
1027
|
+
cdef _checkInt(s):
|
1028
|
+
return _checkNumber(<unicode>s, allow_float=False)
|
1029
|
+
|
1030
|
+
|
1031
|
+
cdef _checkFloat(s):
|
1032
|
+
return _checkNumber(<unicode>s, allow_float=True)
|
1033
|
+
|
1034
|
+
|
1035
|
+
cdef object _strValueOf(obj):
|
1036
|
+
if python._isString(obj):
|
1037
|
+
return obj
|
1038
|
+
if isinstance(obj, _Element):
|
1039
|
+
return textOf((<_Element>obj)._c_node) or ''
|
1040
|
+
if obj is None:
|
1041
|
+
return ''
|
1042
|
+
return unicode(obj)
|
1043
|
+
|
1044
|
+
|
1045
|
+
cdef object _numericValueOf(obj):
|
1046
|
+
if isinstance(obj, NumberElement):
|
1047
|
+
return _parseNumber(<NumberElement>obj)
|
1048
|
+
try:
|
1049
|
+
# not always numeric, but Python will raise the right exception
|
1050
|
+
return obj.pyval
|
1051
|
+
except AttributeError:
|
1052
|
+
pass
|
1053
|
+
return obj
|
1054
|
+
|
1055
|
+
|
1056
|
+
cdef _richcmpPyvals(left, right, int op):
|
1057
|
+
left = getattr(left, 'pyval', left)
|
1058
|
+
right = getattr(right, 'pyval', right)
|
1059
|
+
return python.PyObject_RichCompare(left, right, op)
|
1060
|
+
|
1061
|
+
|
1062
|
+
################################################################################
|
1063
|
+
# Python type registry
|
1064
|
+
|
1065
|
+
cdef class PyType:
|
1066
|
+
"""PyType(self, name, type_check, type_class, stringify=None)
|
1067
|
+
User defined type.
|
1068
|
+
|
1069
|
+
Named type that contains a type check function, a type class that
|
1070
|
+
inherits from ObjectifiedDataElement and an optional "stringification"
|
1071
|
+
function. The type check must take a string as argument and raise
|
1072
|
+
ValueError or TypeError if it cannot handle the string value. It may be
|
1073
|
+
None in which case it is not considered for type guessing. For registered
|
1074
|
+
named types, the 'stringify' function (or unicode() if None) is used to
|
1075
|
+
convert a Python object with type name 'name' to the string representation
|
1076
|
+
stored in the XML tree.
|
1077
|
+
|
1078
|
+
Example::
|
1079
|
+
|
1080
|
+
PyType('int', int, MyIntClass).register()
|
1081
|
+
|
1082
|
+
Note that the order in which types are registered matters. The first
|
1083
|
+
matching type will be used.
|
1084
|
+
"""
|
1085
|
+
cdef readonly object name
|
1086
|
+
cdef readonly object type_check
|
1087
|
+
cdef readonly object stringify
|
1088
|
+
cdef object _type
|
1089
|
+
cdef list _schema_types
|
1090
|
+
def __init__(self, name, type_check, type_class, stringify=None):
|
1091
|
+
if isinstance(name, bytes):
|
1092
|
+
name = (<bytes>name).decode('ascii')
|
1093
|
+
elif not isinstance(name, unicode):
|
1094
|
+
raise TypeError, "Type name must be a string"
|
1095
|
+
if type_check is not None and not callable(type_check):
|
1096
|
+
raise TypeError, "Type check function must be callable (or None)"
|
1097
|
+
if name != TREE_PYTYPE_NAME and \
|
1098
|
+
not issubclass(type_class, ObjectifiedDataElement):
|
1099
|
+
raise TypeError, \
|
1100
|
+
"Data classes must inherit from ObjectifiedDataElement"
|
1101
|
+
self.name = name
|
1102
|
+
self._type = type_class
|
1103
|
+
self.type_check = type_check
|
1104
|
+
if stringify is None:
|
1105
|
+
stringify = unicode
|
1106
|
+
self.stringify = stringify
|
1107
|
+
self._schema_types = []
|
1108
|
+
|
1109
|
+
def __repr__(self):
|
1110
|
+
return "PyType(%s, %s)" % (self.name, self._type.__name__)
|
1111
|
+
|
1112
|
+
def register(self, before=None, after=None):
|
1113
|
+
"""register(self, before=None, after=None)
|
1114
|
+
|
1115
|
+
Register the type.
|
1116
|
+
|
1117
|
+
The additional keyword arguments 'before' and 'after' accept a
|
1118
|
+
sequence of type names that must appear before/after the new type in
|
1119
|
+
the type list. If any of them is not currently known, it is simply
|
1120
|
+
ignored. Raises ValueError if the dependencies cannot be fulfilled.
|
1121
|
+
"""
|
1122
|
+
if self.name == TREE_PYTYPE_NAME:
|
1123
|
+
raise ValueError, "Cannot register tree type"
|
1124
|
+
if self.type_check is not None:
|
1125
|
+
for item in _TYPE_CHECKS:
|
1126
|
+
if item[0] is self.type_check:
|
1127
|
+
_TYPE_CHECKS.remove(item)
|
1128
|
+
break
|
1129
|
+
entry = (self.type_check, self)
|
1130
|
+
first_pos = 0
|
1131
|
+
last_pos = -1
|
1132
|
+
if before or after:
|
1133
|
+
if before is None:
|
1134
|
+
before = ()
|
1135
|
+
elif after is None:
|
1136
|
+
after = ()
|
1137
|
+
for i, (check, pytype) in enumerate(_TYPE_CHECKS):
|
1138
|
+
if last_pos == -1 and pytype.name in before:
|
1139
|
+
last_pos = i
|
1140
|
+
if pytype.name in after:
|
1141
|
+
first_pos = i+1
|
1142
|
+
if last_pos == -1:
|
1143
|
+
_TYPE_CHECKS.append(entry)
|
1144
|
+
elif first_pos > last_pos:
|
1145
|
+
raise ValueError, "inconsistent before/after dependencies"
|
1146
|
+
else:
|
1147
|
+
_TYPE_CHECKS.insert(last_pos, entry)
|
1148
|
+
|
1149
|
+
_PYTYPE_DICT[self.name] = self
|
1150
|
+
for xs_type in self._schema_types:
|
1151
|
+
_SCHEMA_TYPE_DICT[xs_type] = self
|
1152
|
+
|
1153
|
+
def unregister(self):
|
1154
|
+
"unregister(self)"
|
1155
|
+
if _PYTYPE_DICT.get(self.name) is self:
|
1156
|
+
del _PYTYPE_DICT[self.name]
|
1157
|
+
for xs_type, pytype in list(_SCHEMA_TYPE_DICT.items()):
|
1158
|
+
if pytype is self:
|
1159
|
+
del _SCHEMA_TYPE_DICT[xs_type]
|
1160
|
+
if self.type_check is None:
|
1161
|
+
return
|
1162
|
+
try:
|
1163
|
+
_TYPE_CHECKS.remove( (self.type_check, self) )
|
1164
|
+
except ValueError:
|
1165
|
+
pass
|
1166
|
+
|
1167
|
+
property xmlSchemaTypes:
|
1168
|
+
"""The list of XML Schema datatypes this Python type maps to.
|
1169
|
+
|
1170
|
+
Note that this must be set before registering the type!
|
1171
|
+
"""
|
1172
|
+
def __get__(self):
|
1173
|
+
return self._schema_types
|
1174
|
+
def __set__(self, types):
|
1175
|
+
self._schema_types = list(map(unicode, types))
|
1176
|
+
|
1177
|
+
|
1178
|
+
cdef dict _PYTYPE_DICT = {}
|
1179
|
+
cdef dict _SCHEMA_TYPE_DICT = {}
|
1180
|
+
cdef list _TYPE_CHECKS = []
|
1181
|
+
|
1182
|
+
cdef unicode _xml_bool(value):
|
1183
|
+
return "true" if value else "false"
|
1184
|
+
|
1185
|
+
cdef unicode _xml_float(value):
|
1186
|
+
if _float_is_inf(value):
|
1187
|
+
if value > 0:
|
1188
|
+
return "INF"
|
1189
|
+
return "-INF"
|
1190
|
+
if _float_is_nan(value):
|
1191
|
+
return "NaN"
|
1192
|
+
return unicode(repr(value))
|
1193
|
+
|
1194
|
+
cdef _pytypename(obj):
|
1195
|
+
return "str" if python._isString(obj) else _typename(obj)
|
1196
|
+
|
1197
|
+
def pytypename(obj):
|
1198
|
+
"""pytypename(obj)
|
1199
|
+
|
1200
|
+
Find the name of the corresponding PyType for a Python object.
|
1201
|
+
"""
|
1202
|
+
return _pytypename(obj)
|
1203
|
+
|
1204
|
+
cdef _registerPyTypes():
|
1205
|
+
pytype = PyType('int', _checkInt, IntElement) # wraps functions for Python
|
1206
|
+
pytype.xmlSchemaTypes = ("integer", "int", "short", "byte", "unsignedShort",
|
1207
|
+
"unsignedByte", "nonPositiveInteger",
|
1208
|
+
"negativeInteger", "long", "nonNegativeInteger",
|
1209
|
+
"unsignedLong", "unsignedInt", "positiveInteger",)
|
1210
|
+
pytype.register()
|
1211
|
+
|
1212
|
+
# 'long' type just for backwards compatibility
|
1213
|
+
pytype = PyType('long', None, IntElement)
|
1214
|
+
pytype.register()
|
1215
|
+
|
1216
|
+
pytype = PyType('float', _checkFloat, FloatElement, _xml_float) # wraps functions for Python
|
1217
|
+
pytype.xmlSchemaTypes = ("double", "float")
|
1218
|
+
pytype.register()
|
1219
|
+
|
1220
|
+
pytype = PyType('bool', _checkBool, BoolElement, _xml_bool) # wraps functions for Python
|
1221
|
+
pytype.xmlSchemaTypes = ("boolean",)
|
1222
|
+
pytype.register()
|
1223
|
+
|
1224
|
+
pytype = PyType('str', None, StringElement)
|
1225
|
+
pytype.xmlSchemaTypes = ("string", "normalizedString", "token", "language",
|
1226
|
+
"Name", "NCName", "ID", "IDREF", "ENTITY",
|
1227
|
+
"NMTOKEN", )
|
1228
|
+
pytype.register()
|
1229
|
+
|
1230
|
+
# since lxml 2.0
|
1231
|
+
pytype = PyType('NoneType', None, NoneElement)
|
1232
|
+
pytype.register()
|
1233
|
+
|
1234
|
+
# backwards compatibility
|
1235
|
+
pytype = PyType('none', None, NoneElement)
|
1236
|
+
pytype.register()
|
1237
|
+
|
1238
|
+
# non-registered PyType for inner tree elements
|
1239
|
+
cdef PyType TREE_PYTYPE = PyType(TREE_PYTYPE_NAME, None, ObjectifiedElement)
|
1240
|
+
|
1241
|
+
_registerPyTypes()
|
1242
|
+
|
1243
|
+
def getRegisteredTypes():
|
1244
|
+
"""getRegisteredTypes()
|
1245
|
+
|
1246
|
+
Returns a list of the currently registered PyType objects.
|
1247
|
+
|
1248
|
+
To add a new type, retrieve this list and call unregister() for all
|
1249
|
+
entries. Then add the new type at a suitable position (possibly replacing
|
1250
|
+
an existing one) and call register() for all entries.
|
1251
|
+
|
1252
|
+
This is necessary if the new type interferes with the type check functions
|
1253
|
+
of existing ones (normally only int/float/bool) and must the tried before
|
1254
|
+
other types. To add a type that is not yet parsable by the current type
|
1255
|
+
check functions, you can simply register() it, which will append it to the
|
1256
|
+
end of the type list.
|
1257
|
+
"""
|
1258
|
+
cdef list types = []
|
1259
|
+
cdef set known = set()
|
1260
|
+
for check, pytype in _TYPE_CHECKS:
|
1261
|
+
name = pytype.name
|
1262
|
+
if name not in known:
|
1263
|
+
known.add(name)
|
1264
|
+
types.append(pytype)
|
1265
|
+
for pytype in _PYTYPE_DICT.values():
|
1266
|
+
name = pytype.name
|
1267
|
+
if name not in known:
|
1268
|
+
known.add(name)
|
1269
|
+
types.append(pytype)
|
1270
|
+
return types
|
1271
|
+
|
1272
|
+
cdef PyType _guessPyType(value, PyType defaulttype):
|
1273
|
+
if value is None:
|
1274
|
+
return None
|
1275
|
+
for type_check, tested_pytype in _TYPE_CHECKS:
|
1276
|
+
try:
|
1277
|
+
type_check(value)
|
1278
|
+
return <PyType>tested_pytype
|
1279
|
+
except IGNORABLE_ERRORS:
|
1280
|
+
# could not be parsed as the specified type => ignore
|
1281
|
+
pass
|
1282
|
+
return defaulttype
|
1283
|
+
|
1284
|
+
cdef object _guessElementClass(tree.xmlNode* c_node):
|
1285
|
+
value = textOf(c_node)
|
1286
|
+
if value is None:
|
1287
|
+
return None
|
1288
|
+
if value == '':
|
1289
|
+
return StringElement
|
1290
|
+
|
1291
|
+
for type_check, pytype in _TYPE_CHECKS:
|
1292
|
+
try:
|
1293
|
+
type_check(value)
|
1294
|
+
return (<PyType>pytype)._type
|
1295
|
+
except IGNORABLE_ERRORS:
|
1296
|
+
pass
|
1297
|
+
return None
|
1298
|
+
|
1299
|
+
################################################################################
|
1300
|
+
# adapted ElementMaker supports registered PyTypes
|
1301
|
+
|
1302
|
+
@cython.final
|
1303
|
+
@cython.internal
|
1304
|
+
cdef class _ObjectifyElementMakerCaller:
|
1305
|
+
cdef object _tag
|
1306
|
+
cdef object _nsmap
|
1307
|
+
cdef object _element_factory
|
1308
|
+
cdef bint _annotate
|
1309
|
+
|
1310
|
+
def __call__(self, *children, **attrib):
|
1311
|
+
"__call__(self, *children, **attrib)"
|
1312
|
+
cdef _ObjectifyElementMakerCaller elementMaker
|
1313
|
+
cdef _Element element
|
1314
|
+
cdef _Element childElement
|
1315
|
+
cdef bint has_children
|
1316
|
+
cdef bint has_string_value
|
1317
|
+
if self._element_factory is None:
|
1318
|
+
element = _makeElement(self._tag, None, attrib, self._nsmap)
|
1319
|
+
else:
|
1320
|
+
element = self._element_factory(self._tag, attrib, self._nsmap)
|
1321
|
+
|
1322
|
+
pytype_name = None
|
1323
|
+
has_children = False
|
1324
|
+
has_string_value = False
|
1325
|
+
for child in children:
|
1326
|
+
if child is None:
|
1327
|
+
if len(children) == 1:
|
1328
|
+
cetree.setAttributeValue(
|
1329
|
+
element, XML_SCHEMA_INSTANCE_NIL_ATTR, "true")
|
1330
|
+
elif python._isString(child):
|
1331
|
+
_add_text(element, child)
|
1332
|
+
has_string_value = True
|
1333
|
+
elif isinstance(child, _Element):
|
1334
|
+
cetree.appendChildToElement(element, <_Element>child)
|
1335
|
+
has_children = True
|
1336
|
+
elif isinstance(child, _ObjectifyElementMakerCaller):
|
1337
|
+
elementMaker = <_ObjectifyElementMakerCaller>child
|
1338
|
+
if elementMaker._element_factory is None:
|
1339
|
+
cetree.makeSubElement(element, elementMaker._tag,
|
1340
|
+
None, None, None, None)
|
1341
|
+
else:
|
1342
|
+
childElement = elementMaker._element_factory(
|
1343
|
+
elementMaker._tag)
|
1344
|
+
cetree.appendChildToElement(element, childElement)
|
1345
|
+
has_children = True
|
1346
|
+
elif isinstance(child, dict):
|
1347
|
+
for name, value in child.items():
|
1348
|
+
# keyword arguments in attrib take precedence
|
1349
|
+
if name in attrib:
|
1350
|
+
continue
|
1351
|
+
pytype = _PYTYPE_DICT.get(_typename(value))
|
1352
|
+
if pytype is not None:
|
1353
|
+
value = (<PyType>pytype).stringify(value)
|
1354
|
+
elif not python._isString(value):
|
1355
|
+
value = unicode(value)
|
1356
|
+
cetree.setAttributeValue(element, name, value)
|
1357
|
+
else:
|
1358
|
+
if pytype_name is not None:
|
1359
|
+
# concatenation always makes the result a string
|
1360
|
+
has_string_value = True
|
1361
|
+
pytype_name = _typename(child)
|
1362
|
+
pytype = _PYTYPE_DICT.get(_typename(child))
|
1363
|
+
if pytype is not None:
|
1364
|
+
_add_text(element, (<PyType>pytype).stringify(child))
|
1365
|
+
else:
|
1366
|
+
has_string_value = True
|
1367
|
+
child = unicode(child)
|
1368
|
+
_add_text(element, child)
|
1369
|
+
|
1370
|
+
if self._annotate and not has_children:
|
1371
|
+
if has_string_value:
|
1372
|
+
cetree.setAttributeValue(element, PYTYPE_ATTRIBUTE, "str")
|
1373
|
+
elif pytype_name is not None:
|
1374
|
+
cetree.setAttributeValue(element, PYTYPE_ATTRIBUTE, pytype_name)
|
1375
|
+
|
1376
|
+
return element
|
1377
|
+
|
1378
|
+
cdef _add_text(_Element elem, text):
|
1379
|
+
# add text to the tree in construction, either as element text or
|
1380
|
+
# tail text, depending on the current tree state
|
1381
|
+
cdef tree.xmlNode* c_child
|
1382
|
+
c_child = cetree.findChildBackwards(elem._c_node, 0)
|
1383
|
+
if c_child is not NULL:
|
1384
|
+
old = cetree.tailOf(c_child)
|
1385
|
+
if old is not None:
|
1386
|
+
text = old + text
|
1387
|
+
cetree.setTailText(c_child, text)
|
1388
|
+
else:
|
1389
|
+
old = cetree.textOf(elem._c_node)
|
1390
|
+
if old is not None:
|
1391
|
+
text = old + text
|
1392
|
+
cetree.setNodeText(elem._c_node, text)
|
1393
|
+
|
1394
|
+
cdef class ElementMaker:
|
1395
|
+
"""ElementMaker(self, namespace=None, nsmap=None, annotate=True, makeelement=None)
|
1396
|
+
|
1397
|
+
An ElementMaker that can be used for constructing trees.
|
1398
|
+
|
1399
|
+
Example::
|
1400
|
+
|
1401
|
+
>>> M = ElementMaker(annotate=False)
|
1402
|
+
>>> attributes = {'class': 'par'}
|
1403
|
+
>>> html = M.html( M.body( M.p('hello', attributes, M.br, 'objectify', style="font-weight: bold") ) )
|
1404
|
+
|
1405
|
+
>>> from lxml.etree import tostring
|
1406
|
+
>>> print(tostring(html, method='html').decode('ascii'))
|
1407
|
+
<html><body><p style="font-weight: bold" class="par">hello<br>objectify</p></body></html>
|
1408
|
+
|
1409
|
+
To create tags that are not valid Python identifiers, call the factory
|
1410
|
+
directly and pass the tag name as first argument::
|
1411
|
+
|
1412
|
+
>>> root = M('tricky-tag', 'some text')
|
1413
|
+
>>> print(root.tag)
|
1414
|
+
tricky-tag
|
1415
|
+
>>> print(root.text)
|
1416
|
+
some text
|
1417
|
+
|
1418
|
+
Note that this module has a predefined ElementMaker instance called ``E``.
|
1419
|
+
"""
|
1420
|
+
cdef object _makeelement
|
1421
|
+
cdef object _namespace
|
1422
|
+
cdef object _nsmap
|
1423
|
+
cdef bint _annotate
|
1424
|
+
cdef dict _cache
|
1425
|
+
def __init__(self, *, namespace=None, nsmap=None, annotate=True,
|
1426
|
+
makeelement=None):
|
1427
|
+
if nsmap is None:
|
1428
|
+
nsmap = _DEFAULT_NSMAP if annotate else {}
|
1429
|
+
self._nsmap = nsmap
|
1430
|
+
self._namespace = None if namespace is None else "{%s}" % namespace
|
1431
|
+
self._annotate = annotate
|
1432
|
+
if makeelement is not None:
|
1433
|
+
if not callable(makeelement):
|
1434
|
+
raise TypeError(
|
1435
|
+
f"argument of 'makeelement' parameter must be callable, got {type(makeelement)}")
|
1436
|
+
self._makeelement = makeelement
|
1437
|
+
else:
|
1438
|
+
self._makeelement = None
|
1439
|
+
self._cache = {}
|
1440
|
+
|
1441
|
+
@cython.final
|
1442
|
+
cdef _build_element_maker(self, tag, bint caching):
|
1443
|
+
cdef _ObjectifyElementMakerCaller element_maker
|
1444
|
+
element_maker = _ObjectifyElementMakerCaller.__new__(_ObjectifyElementMakerCaller)
|
1445
|
+
if self._namespace is not None and tag[0] != "{":
|
1446
|
+
element_maker._tag = self._namespace + tag
|
1447
|
+
else:
|
1448
|
+
element_maker._tag = tag
|
1449
|
+
element_maker._nsmap = self._nsmap
|
1450
|
+
element_maker._annotate = self._annotate
|
1451
|
+
element_maker._element_factory = self._makeelement
|
1452
|
+
if caching:
|
1453
|
+
if len(self._cache) > 200:
|
1454
|
+
self._cache.clear()
|
1455
|
+
self._cache[tag] = element_maker
|
1456
|
+
return element_maker
|
1457
|
+
|
1458
|
+
def __getattr__(self, tag):
|
1459
|
+
element_maker = self._cache.get(tag)
|
1460
|
+
if element_maker is None:
|
1461
|
+
return self._build_element_maker(tag, caching=True)
|
1462
|
+
return element_maker
|
1463
|
+
|
1464
|
+
def __call__(self, tag, *args, **kwargs):
|
1465
|
+
element_maker = self._cache.get(tag)
|
1466
|
+
if element_maker is None:
|
1467
|
+
element_maker = self._build_element_maker(
|
1468
|
+
tag, caching=not is_special_method(tag))
|
1469
|
+
return element_maker(*args, **kwargs)
|
1470
|
+
|
1471
|
+
################################################################################
|
1472
|
+
# Recursive element dumping
|
1473
|
+
|
1474
|
+
cdef bint __RECURSIVE_STR = 0 # default: off
|
1475
|
+
|
1476
|
+
def enable_recursive_str(on=True):
|
1477
|
+
"""enable_recursive_str(on=True)
|
1478
|
+
|
1479
|
+
Enable a recursively generated tree representation for str(element),
|
1480
|
+
based on objectify.dump(element).
|
1481
|
+
"""
|
1482
|
+
global __RECURSIVE_STR
|
1483
|
+
__RECURSIVE_STR = on
|
1484
|
+
|
1485
|
+
def dump(_Element element not None):
|
1486
|
+
"""dump(_Element element not None)
|
1487
|
+
|
1488
|
+
Return a recursively generated string representation of an element.
|
1489
|
+
"""
|
1490
|
+
return _dump(element, 0)
|
1491
|
+
|
1492
|
+
cdef object _dump(_Element element, int indent):
|
1493
|
+
indentstr = " " * indent
|
1494
|
+
if isinstance(element, ObjectifiedDataElement):
|
1495
|
+
value = repr(element)
|
1496
|
+
else:
|
1497
|
+
value = textOf(element._c_node)
|
1498
|
+
if value is not None:
|
1499
|
+
if not value.strip():
|
1500
|
+
value = None
|
1501
|
+
else:
|
1502
|
+
value = repr(value)
|
1503
|
+
result = f"{indentstr}{element.tag} = {value} [{_typename(element)}]\n"
|
1504
|
+
xsi_ns = "{%s}" % XML_SCHEMA_INSTANCE_NS
|
1505
|
+
pytype_ns = "{%s}" % PYTYPE_NAMESPACE
|
1506
|
+
for name, value in sorted(cetree.iterattributes(element, 3)):
|
1507
|
+
if '{' in name:
|
1508
|
+
if name == PYTYPE_ATTRIBUTE:
|
1509
|
+
if value == TREE_PYTYPE_NAME:
|
1510
|
+
continue
|
1511
|
+
else:
|
1512
|
+
name = name.replace(pytype_ns, 'py:')
|
1513
|
+
name = name.replace(xsi_ns, 'xsi:')
|
1514
|
+
result += f"{indentstr} * {name} = {value!r}\n"
|
1515
|
+
|
1516
|
+
indent += 1
|
1517
|
+
for child in element.iterchildren():
|
1518
|
+
result += _dump(child, indent)
|
1519
|
+
if indent == 1:
|
1520
|
+
return result[:-1] # strip last '\n'
|
1521
|
+
else:
|
1522
|
+
return result
|
1523
|
+
|
1524
|
+
|
1525
|
+
################################################################################
|
1526
|
+
# Pickle support for objectified ElementTree
|
1527
|
+
|
1528
|
+
def __unpickleElementTree(data):
|
1529
|
+
return etree.ElementTree(fromstring(data))
|
1530
|
+
|
1531
|
+
cdef _setupPickle(elementTreeReduceFunction):
|
1532
|
+
import copyreg
|
1533
|
+
copyreg.pickle(etree._ElementTree,
|
1534
|
+
elementTreeReduceFunction, __unpickleElementTree)
|
1535
|
+
|
1536
|
+
def pickleReduceElementTree(obj):
|
1537
|
+
return __unpickleElementTree, (etree.tostring(obj),)
|
1538
|
+
|
1539
|
+
_setupPickle(pickleReduceElementTree)
|
1540
|
+
del pickleReduceElementTree
|
1541
|
+
|
1542
|
+
################################################################################
|
1543
|
+
# Element class lookup
|
1544
|
+
|
1545
|
+
cdef class ObjectifyElementClassLookup(ElementClassLookup):
|
1546
|
+
"""ObjectifyElementClassLookup(self, tree_class=None, empty_data_class=None)
|
1547
|
+
Element class lookup method that uses the objectify classes.
|
1548
|
+
"""
|
1549
|
+
cdef object empty_data_class
|
1550
|
+
cdef object tree_class
|
1551
|
+
def __init__(self, tree_class=None, empty_data_class=None):
|
1552
|
+
"""Lookup mechanism for objectify.
|
1553
|
+
|
1554
|
+
The default Element classes can be replaced by passing subclasses of
|
1555
|
+
ObjectifiedElement and ObjectifiedDataElement as keyword arguments.
|
1556
|
+
'tree_class' defines inner tree classes (defaults to
|
1557
|
+
ObjectifiedElement), 'empty_data_class' defines the default class for
|
1558
|
+
empty data elements (defaults to StringElement).
|
1559
|
+
"""
|
1560
|
+
self._lookup_function = _lookupElementClass
|
1561
|
+
if tree_class is None:
|
1562
|
+
tree_class = ObjectifiedElement
|
1563
|
+
self.tree_class = tree_class
|
1564
|
+
if empty_data_class is None:
|
1565
|
+
empty_data_class = StringElement
|
1566
|
+
self.empty_data_class = empty_data_class
|
1567
|
+
|
1568
|
+
cdef object _lookupElementClass(state, _Document doc, tree.xmlNode* c_node):
|
1569
|
+
cdef ObjectifyElementClassLookup lookup
|
1570
|
+
lookup = <ObjectifyElementClassLookup>state
|
1571
|
+
# if element has children => no data class
|
1572
|
+
if cetree.hasChild(c_node):
|
1573
|
+
return lookup.tree_class
|
1574
|
+
|
1575
|
+
# if element is defined as xsi:nil, return NoneElement class
|
1576
|
+
if "true" == cetree.attributeValueFromNsName(
|
1577
|
+
c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"nil"):
|
1578
|
+
return NoneElement
|
1579
|
+
|
1580
|
+
# check for Python type hint
|
1581
|
+
value = cetree.attributeValueFromNsName(
|
1582
|
+
c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME)
|
1583
|
+
if value is not None:
|
1584
|
+
if value == TREE_PYTYPE_NAME:
|
1585
|
+
return lookup.tree_class
|
1586
|
+
py_type = <PyType>_PYTYPE_DICT.get(value)
|
1587
|
+
if py_type is not None:
|
1588
|
+
return py_type._type
|
1589
|
+
# unknown 'pyval' => try to figure it out ourself, just go on
|
1590
|
+
|
1591
|
+
# check for XML Schema type hint
|
1592
|
+
value = cetree.attributeValueFromNsName(
|
1593
|
+
c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"type")
|
1594
|
+
|
1595
|
+
if value is not None:
|
1596
|
+
schema_type = <PyType>_SCHEMA_TYPE_DICT.get(value)
|
1597
|
+
if schema_type is None and ':' in value:
|
1598
|
+
prefix, value = value.split(':', 1)
|
1599
|
+
schema_type = <PyType>_SCHEMA_TYPE_DICT.get(value)
|
1600
|
+
if schema_type is not None:
|
1601
|
+
return schema_type._type
|
1602
|
+
|
1603
|
+
# otherwise determine class based on text content type
|
1604
|
+
el_class = _guessElementClass(c_node)
|
1605
|
+
if el_class is not None:
|
1606
|
+
return el_class
|
1607
|
+
|
1608
|
+
# if element is a root node => default to tree node
|
1609
|
+
if c_node.parent is NULL or not tree._isElement(c_node.parent):
|
1610
|
+
return lookup.tree_class
|
1611
|
+
|
1612
|
+
return lookup.empty_data_class
|
1613
|
+
|
1614
|
+
|
1615
|
+
################################################################################
|
1616
|
+
# Type annotations
|
1617
|
+
|
1618
|
+
cdef PyType _check_type(tree.xmlNode* c_node, PyType pytype):
|
1619
|
+
if pytype is None:
|
1620
|
+
return None
|
1621
|
+
value = textOf(c_node)
|
1622
|
+
try:
|
1623
|
+
pytype.type_check(value)
|
1624
|
+
return pytype
|
1625
|
+
except IGNORABLE_ERRORS:
|
1626
|
+
# could not be parsed as the specified type => ignore
|
1627
|
+
pass
|
1628
|
+
return None
|
1629
|
+
|
1630
|
+
def pyannotate(element_or_tree, *, ignore_old=False, ignore_xsi=False,
|
1631
|
+
empty_pytype=None):
|
1632
|
+
"""pyannotate(element_or_tree, ignore_old=False, ignore_xsi=False, empty_pytype=None)
|
1633
|
+
|
1634
|
+
Recursively annotates the elements of an XML tree with 'pytype'
|
1635
|
+
attributes.
|
1636
|
+
|
1637
|
+
If the 'ignore_old' keyword argument is True (the default), current 'pytype'
|
1638
|
+
attributes will be ignored and replaced. Otherwise, they will be checked
|
1639
|
+
and only replaced if they no longer fit the current text value.
|
1640
|
+
|
1641
|
+
Setting the keyword argument ``ignore_xsi`` to True makes the function
|
1642
|
+
additionally ignore existing ``xsi:type`` annotations. The default is to
|
1643
|
+
use them as a type hint.
|
1644
|
+
|
1645
|
+
The default annotation of empty elements can be set with the
|
1646
|
+
``empty_pytype`` keyword argument. The default is not to annotate empty
|
1647
|
+
elements. Pass 'str', for example, to make string values the default.
|
1648
|
+
"""
|
1649
|
+
cdef _Element element
|
1650
|
+
element = cetree.rootNodeOrRaise(element_or_tree)
|
1651
|
+
_annotate(element, 0, 1, ignore_xsi, ignore_old, None, empty_pytype)
|
1652
|
+
|
1653
|
+
def xsiannotate(element_or_tree, *, ignore_old=False, ignore_pytype=False,
|
1654
|
+
empty_type=None):
|
1655
|
+
"""xsiannotate(element_or_tree, ignore_old=False, ignore_pytype=False, empty_type=None)
|
1656
|
+
|
1657
|
+
Recursively annotates the elements of an XML tree with 'xsi:type'
|
1658
|
+
attributes.
|
1659
|
+
|
1660
|
+
If the 'ignore_old' keyword argument is True (the default), current
|
1661
|
+
'xsi:type' attributes will be ignored and replaced. Otherwise, they will be
|
1662
|
+
checked and only replaced if they no longer fit the current text value.
|
1663
|
+
|
1664
|
+
Note that the mapping from Python types to XSI types is usually ambiguous.
|
1665
|
+
Currently, only the first XSI type name in the corresponding PyType
|
1666
|
+
definition will be used for annotation. Thus, you should consider naming
|
1667
|
+
the widest type first if you define additional types.
|
1668
|
+
|
1669
|
+
Setting the keyword argument ``ignore_pytype`` to True makes the function
|
1670
|
+
additionally ignore existing ``pytype`` annotations. The default is to
|
1671
|
+
use them as a type hint.
|
1672
|
+
|
1673
|
+
The default annotation of empty elements can be set with the
|
1674
|
+
``empty_type`` keyword argument. The default is not to annotate empty
|
1675
|
+
elements. Pass 'string', for example, to make string values the default.
|
1676
|
+
"""
|
1677
|
+
cdef _Element element
|
1678
|
+
element = cetree.rootNodeOrRaise(element_or_tree)
|
1679
|
+
_annotate(element, 1, 0, ignore_old, ignore_pytype, empty_type, None)
|
1680
|
+
|
1681
|
+
def annotate(element_or_tree, *, ignore_old=True, ignore_xsi=False,
|
1682
|
+
empty_pytype=None, empty_type=None, annotate_xsi=0,
|
1683
|
+
annotate_pytype=1):
|
1684
|
+
"""annotate(element_or_tree, ignore_old=True, ignore_xsi=False, empty_pytype=None, empty_type=None, annotate_xsi=0, annotate_pytype=1)
|
1685
|
+
|
1686
|
+
Recursively annotates the elements of an XML tree with 'xsi:type'
|
1687
|
+
and/or 'py:pytype' attributes.
|
1688
|
+
|
1689
|
+
If the 'ignore_old' keyword argument is True (the default), current
|
1690
|
+
'py:pytype' attributes will be ignored for the type annotation. Set to False
|
1691
|
+
if you want reuse existing 'py:pytype' information (iff appropriate for the
|
1692
|
+
element text value).
|
1693
|
+
|
1694
|
+
If the 'ignore_xsi' keyword argument is False (the default), existing
|
1695
|
+
'xsi:type' attributes will be used for the type annotation, if they fit the
|
1696
|
+
element text values.
|
1697
|
+
|
1698
|
+
Note that the mapping from Python types to XSI types is usually ambiguous.
|
1699
|
+
Currently, only the first XSI type name in the corresponding PyType
|
1700
|
+
definition will be used for annotation. Thus, you should consider naming
|
1701
|
+
the widest type first if you define additional types.
|
1702
|
+
|
1703
|
+
The default 'py:pytype' annotation of empty elements can be set with the
|
1704
|
+
``empty_pytype`` keyword argument. Pass 'str', for example, to make
|
1705
|
+
string values the default.
|
1706
|
+
|
1707
|
+
The default 'xsi:type' annotation of empty elements can be set with the
|
1708
|
+
``empty_type`` keyword argument. The default is not to annotate empty
|
1709
|
+
elements. Pass 'string', for example, to make string values the default.
|
1710
|
+
|
1711
|
+
The keyword arguments 'annotate_xsi' (default: 0) and 'annotate_pytype'
|
1712
|
+
(default: 1) control which kind(s) of annotation to use.
|
1713
|
+
"""
|
1714
|
+
cdef _Element element
|
1715
|
+
element = cetree.rootNodeOrRaise(element_or_tree)
|
1716
|
+
_annotate(element, annotate_xsi, annotate_pytype, ignore_xsi,
|
1717
|
+
ignore_old, empty_type, empty_pytype)
|
1718
|
+
|
1719
|
+
|
1720
|
+
cdef _annotate(_Element element, bint annotate_xsi, bint annotate_pytype,
|
1721
|
+
bint ignore_xsi, bint ignore_pytype,
|
1722
|
+
empty_type_name, empty_pytype_name):
|
1723
|
+
cdef _Document doc
|
1724
|
+
cdef tree.xmlNode* c_node
|
1725
|
+
cdef PyType empty_pytype, StrType, NoneType
|
1726
|
+
|
1727
|
+
if not annotate_xsi and not annotate_pytype:
|
1728
|
+
return
|
1729
|
+
|
1730
|
+
if empty_type_name is not None:
|
1731
|
+
if isinstance(empty_type_name, bytes):
|
1732
|
+
empty_type_name = (<bytes>empty_type_name).decode("ascii")
|
1733
|
+
empty_pytype = <PyType>_SCHEMA_TYPE_DICT.get(empty_type_name)
|
1734
|
+
elif empty_pytype_name is not None:
|
1735
|
+
if isinstance(empty_pytype_name, bytes):
|
1736
|
+
empty_pytype_name = (<bytes>empty_pytype_name).decode("ascii")
|
1737
|
+
empty_pytype = <PyType>_PYTYPE_DICT.get(empty_pytype_name)
|
1738
|
+
else:
|
1739
|
+
empty_pytype = None
|
1740
|
+
|
1741
|
+
StrType = <PyType>_PYTYPE_DICT.get('str')
|
1742
|
+
NoneType = <PyType>_PYTYPE_DICT.get('NoneType')
|
1743
|
+
|
1744
|
+
doc = element._doc
|
1745
|
+
c_node = element._c_node
|
1746
|
+
tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1)
|
1747
|
+
if c_node.type == tree.XML_ELEMENT_NODE:
|
1748
|
+
_annotate_element(c_node, doc, annotate_xsi, annotate_pytype,
|
1749
|
+
ignore_xsi, ignore_pytype,
|
1750
|
+
empty_type_name, empty_pytype, StrType, NoneType)
|
1751
|
+
tree.END_FOR_EACH_ELEMENT_FROM(c_node)
|
1752
|
+
|
1753
|
+
cdef int _annotate_element(tree.xmlNode* c_node, _Document doc,
|
1754
|
+
bint annotate_xsi, bint annotate_pytype,
|
1755
|
+
bint ignore_xsi, bint ignore_pytype,
|
1756
|
+
empty_type_name, PyType empty_pytype,
|
1757
|
+
PyType StrType, PyType NoneType) except -1:
|
1758
|
+
cdef tree.xmlNs* c_ns
|
1759
|
+
cdef PyType pytype = None
|
1760
|
+
typename = None
|
1761
|
+
istree = 0
|
1762
|
+
|
1763
|
+
# if element is defined as xsi:nil, represent it as None
|
1764
|
+
if cetree.attributeValueFromNsName(
|
1765
|
+
c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"nil") == "true":
|
1766
|
+
pytype = NoneType
|
1767
|
+
|
1768
|
+
if pytype is None and not ignore_xsi:
|
1769
|
+
# check that old xsi type value is valid
|
1770
|
+
typename = cetree.attributeValueFromNsName(
|
1771
|
+
c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"type")
|
1772
|
+
if typename is not None:
|
1773
|
+
pytype = <PyType>_SCHEMA_TYPE_DICT.get(typename)
|
1774
|
+
if pytype is None and ':' in typename:
|
1775
|
+
prefix, typename = typename.split(':', 1)
|
1776
|
+
pytype = <PyType>_SCHEMA_TYPE_DICT.get(typename)
|
1777
|
+
if pytype is not None and pytype is not StrType:
|
1778
|
+
# StrType does not have a typecheck but is the default
|
1779
|
+
# anyway, so just accept it if given as type
|
1780
|
+
# information
|
1781
|
+
pytype = _check_type(c_node, pytype)
|
1782
|
+
if pytype is None:
|
1783
|
+
typename = None
|
1784
|
+
|
1785
|
+
if pytype is None and not ignore_pytype:
|
1786
|
+
# check that old pytype value is valid
|
1787
|
+
old_pytypename = cetree.attributeValueFromNsName(
|
1788
|
+
c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME)
|
1789
|
+
if old_pytypename is not None:
|
1790
|
+
if old_pytypename == TREE_PYTYPE_NAME:
|
1791
|
+
if not cetree.hasChild(c_node):
|
1792
|
+
# only case where we should keep it,
|
1793
|
+
# everything else is clear enough
|
1794
|
+
pytype = TREE_PYTYPE
|
1795
|
+
else:
|
1796
|
+
if old_pytypename == 'none':
|
1797
|
+
# transition from lxml 1.x
|
1798
|
+
old_pytypename = "NoneType"
|
1799
|
+
pytype = <PyType>_PYTYPE_DICT.get(old_pytypename)
|
1800
|
+
if pytype is not None and pytype is not StrType:
|
1801
|
+
# StrType does not have a typecheck but is the
|
1802
|
+
# default anyway, so just accept it if given as
|
1803
|
+
# type information
|
1804
|
+
pytype = _check_type(c_node, pytype)
|
1805
|
+
|
1806
|
+
if pytype is None:
|
1807
|
+
# try to guess type
|
1808
|
+
if not cetree.hasChild(c_node):
|
1809
|
+
# element has no children => data class
|
1810
|
+
pytype = _guessPyType(textOf(c_node), StrType)
|
1811
|
+
else:
|
1812
|
+
istree = 1
|
1813
|
+
|
1814
|
+
if pytype is None:
|
1815
|
+
# use default type for empty elements
|
1816
|
+
if cetree.hasText(c_node):
|
1817
|
+
pytype = StrType
|
1818
|
+
else:
|
1819
|
+
pytype = empty_pytype
|
1820
|
+
if typename is None:
|
1821
|
+
typename = empty_type_name
|
1822
|
+
|
1823
|
+
if pytype is not None:
|
1824
|
+
if typename is None:
|
1825
|
+
if not istree:
|
1826
|
+
if pytype._schema_types:
|
1827
|
+
# pytype->xsi:type is a 1:n mapping
|
1828
|
+
# simply take the first
|
1829
|
+
typename = pytype._schema_types[0]
|
1830
|
+
elif typename not in pytype._schema_types:
|
1831
|
+
typename = pytype._schema_types[0]
|
1832
|
+
|
1833
|
+
if annotate_xsi:
|
1834
|
+
if typename is None or istree:
|
1835
|
+
cetree.delAttributeFromNsName(
|
1836
|
+
c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>"type")
|
1837
|
+
else:
|
1838
|
+
# update or create attribute
|
1839
|
+
typename_utf8 = cetree.utf8(typename)
|
1840
|
+
c_ns = cetree.findOrBuildNodeNsPrefix(
|
1841
|
+
doc, c_node, _XML_SCHEMA_NS, <unsigned char*>'xsd')
|
1842
|
+
if c_ns is not NULL:
|
1843
|
+
if b':' in typename_utf8:
|
1844
|
+
prefix, name = typename_utf8.split(b':', 1)
|
1845
|
+
if c_ns.prefix is NULL or c_ns.prefix[0] == c'\0':
|
1846
|
+
typename_utf8 = name
|
1847
|
+
elif tree.xmlStrcmp(_xcstr(prefix), c_ns.prefix) != 0:
|
1848
|
+
typename_utf8 = (<unsigned char*>c_ns.prefix) + b':' + name
|
1849
|
+
elif c_ns.prefix is not NULL and c_ns.prefix[0] != c'\0':
|
1850
|
+
typename_utf8 = (<unsigned char*>c_ns.prefix) + b':' + typename_utf8
|
1851
|
+
c_ns = cetree.findOrBuildNodeNsPrefix(
|
1852
|
+
doc, c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>'xsi')
|
1853
|
+
tree.xmlSetNsProp(c_node, c_ns, <unsigned char*>"type", _xcstr(typename_utf8))
|
1854
|
+
|
1855
|
+
if annotate_pytype:
|
1856
|
+
if pytype is None:
|
1857
|
+
# delete attribute if it exists
|
1858
|
+
cetree.delAttributeFromNsName(
|
1859
|
+
c_node, _PYTYPE_NAMESPACE, _PYTYPE_ATTRIBUTE_NAME)
|
1860
|
+
else:
|
1861
|
+
# update or create attribute
|
1862
|
+
c_ns = cetree.findOrBuildNodeNsPrefix(
|
1863
|
+
doc, c_node, _PYTYPE_NAMESPACE, <unsigned char*>'py')
|
1864
|
+
pytype_name = cetree.utf8(pytype.name)
|
1865
|
+
tree.xmlSetNsProp(c_node, c_ns, _PYTYPE_ATTRIBUTE_NAME,
|
1866
|
+
_xcstr(pytype_name))
|
1867
|
+
if pytype is NoneType:
|
1868
|
+
c_ns = cetree.findOrBuildNodeNsPrefix(
|
1869
|
+
doc, c_node, _XML_SCHEMA_INSTANCE_NS, <unsigned char*>'xsi')
|
1870
|
+
tree.xmlSetNsProp(c_node, c_ns, <unsigned char*>"nil", <unsigned char*>"true")
|
1871
|
+
|
1872
|
+
return 0
|
1873
|
+
|
1874
|
+
cdef object _strip_attributes = etree.strip_attributes
|
1875
|
+
cdef object _cleanup_namespaces = etree.cleanup_namespaces
|
1876
|
+
|
1877
|
+
def deannotate(element_or_tree, *, bint pytype=True, bint xsi=True,
|
1878
|
+
bint xsi_nil=False, bint cleanup_namespaces=False):
|
1879
|
+
"""deannotate(element_or_tree, pytype=True, xsi=True, xsi_nil=False, cleanup_namespaces=False)
|
1880
|
+
|
1881
|
+
Recursively de-annotate the elements of an XML tree by removing 'py:pytype'
|
1882
|
+
and/or 'xsi:type' attributes and/or 'xsi:nil' attributes.
|
1883
|
+
|
1884
|
+
If the 'pytype' keyword argument is True (the default), 'py:pytype'
|
1885
|
+
attributes will be removed. If the 'xsi' keyword argument is True (the
|
1886
|
+
default), 'xsi:type' attributes will be removed.
|
1887
|
+
If the 'xsi_nil' keyword argument is True (default: False), 'xsi:nil'
|
1888
|
+
attributes will be removed.
|
1889
|
+
|
1890
|
+
Note that this does not touch the namespace declarations by
|
1891
|
+
default. If you want to remove unused namespace declarations from
|
1892
|
+
the tree, pass the option ``cleanup_namespaces=True``.
|
1893
|
+
"""
|
1894
|
+
cdef list attribute_names = []
|
1895
|
+
|
1896
|
+
if pytype:
|
1897
|
+
attribute_names.append(PYTYPE_ATTRIBUTE)
|
1898
|
+
if xsi:
|
1899
|
+
attribute_names.append(XML_SCHEMA_INSTANCE_TYPE_ATTR)
|
1900
|
+
if xsi_nil:
|
1901
|
+
attribute_names.append(XML_SCHEMA_INSTANCE_NIL_ATTR)
|
1902
|
+
|
1903
|
+
_strip_attributes(element_or_tree, *attribute_names)
|
1904
|
+
if cleanup_namespaces:
|
1905
|
+
_cleanup_namespaces(element_or_tree)
|
1906
|
+
|
1907
|
+
################################################################################
|
1908
|
+
# Module level parser setup
|
1909
|
+
|
1910
|
+
cdef object __DEFAULT_PARSER
|
1911
|
+
__DEFAULT_PARSER = etree.XMLParser(remove_blank_text=True)
|
1912
|
+
__DEFAULT_PARSER.set_element_class_lookup( ObjectifyElementClassLookup() )
|
1913
|
+
|
1914
|
+
cdef object objectify_parser
|
1915
|
+
objectify_parser = __DEFAULT_PARSER
|
1916
|
+
|
1917
|
+
def set_default_parser(new_parser = None):
|
1918
|
+
"""set_default_parser(new_parser = None)
|
1919
|
+
|
1920
|
+
Replace the default parser used by objectify's Element() and
|
1921
|
+
fromstring() functions.
|
1922
|
+
|
1923
|
+
The new parser must be an etree.XMLParser.
|
1924
|
+
|
1925
|
+
Call without arguments to reset to the original parser.
|
1926
|
+
"""
|
1927
|
+
global objectify_parser
|
1928
|
+
if new_parser is None:
|
1929
|
+
objectify_parser = __DEFAULT_PARSER
|
1930
|
+
elif isinstance(new_parser, etree.XMLParser):
|
1931
|
+
objectify_parser = new_parser
|
1932
|
+
else:
|
1933
|
+
raise TypeError, "parser must inherit from lxml.etree.XMLParser"
|
1934
|
+
|
1935
|
+
def makeparser(**kw):
|
1936
|
+
"""makeparser(remove_blank_text=True, **kw)
|
1937
|
+
|
1938
|
+
Create a new XML parser for objectify trees.
|
1939
|
+
|
1940
|
+
You can pass all keyword arguments that are supported by
|
1941
|
+
``etree.XMLParser()``. Note that this parser defaults to removing
|
1942
|
+
blank text. You can disable this by passing the
|
1943
|
+
``remove_blank_text`` boolean keyword option yourself.
|
1944
|
+
"""
|
1945
|
+
if 'remove_blank_text' not in kw:
|
1946
|
+
kw['remove_blank_text'] = True
|
1947
|
+
parser = etree.XMLParser(**kw)
|
1948
|
+
parser.set_element_class_lookup( ObjectifyElementClassLookup() )
|
1949
|
+
return parser
|
1950
|
+
|
1951
|
+
cdef _Element _makeElement(tag, text, attrib, nsmap):
|
1952
|
+
return cetree.makeElement(tag, None, objectify_parser, text, None, attrib, nsmap)
|
1953
|
+
|
1954
|
+
################################################################################
|
1955
|
+
# Module level factory functions
|
1956
|
+
|
1957
|
+
cdef object _fromstring
|
1958
|
+
_fromstring = etree.fromstring
|
1959
|
+
|
1960
|
+
SubElement = etree.SubElement
|
1961
|
+
|
1962
|
+
def fromstring(xml, parser=None, *, base_url=None):
|
1963
|
+
"""fromstring(xml, parser=None, base_url=None)
|
1964
|
+
|
1965
|
+
Objectify specific version of the lxml.etree fromstring() function
|
1966
|
+
that uses the objectify parser.
|
1967
|
+
|
1968
|
+
You can pass a different parser as second argument.
|
1969
|
+
|
1970
|
+
The ``base_url`` keyword argument allows to set the original base URL of
|
1971
|
+
the document to support relative Paths when looking up external entities
|
1972
|
+
(DTD, XInclude, ...).
|
1973
|
+
"""
|
1974
|
+
if parser is None:
|
1975
|
+
parser = objectify_parser
|
1976
|
+
return _fromstring(xml, parser, base_url=base_url)
|
1977
|
+
|
1978
|
+
def XML(xml, parser=None, *, base_url=None):
|
1979
|
+
"""XML(xml, parser=None, base_url=None)
|
1980
|
+
|
1981
|
+
Objectify specific version of the lxml.etree XML() literal factory
|
1982
|
+
that uses the objectify parser.
|
1983
|
+
|
1984
|
+
You can pass a different parser as second argument.
|
1985
|
+
|
1986
|
+
The ``base_url`` keyword argument allows to set the original base URL of
|
1987
|
+
the document to support relative Paths when looking up external entities
|
1988
|
+
(DTD, XInclude, ...).
|
1989
|
+
"""
|
1990
|
+
if parser is None:
|
1991
|
+
parser = objectify_parser
|
1992
|
+
return _fromstring(xml, parser, base_url=base_url)
|
1993
|
+
|
1994
|
+
cdef object _parse
|
1995
|
+
_parse = etree.parse
|
1996
|
+
|
1997
|
+
def parse(f, parser=None, *, base_url=None):
|
1998
|
+
"""parse(f, parser=None, base_url=None)
|
1999
|
+
|
2000
|
+
Parse a file or file-like object with the objectify parser.
|
2001
|
+
|
2002
|
+
You can pass a different parser as second argument.
|
2003
|
+
|
2004
|
+
The ``base_url`` keyword allows setting a URL for the document
|
2005
|
+
when parsing from a file-like object. This is needed when looking
|
2006
|
+
up external entities (DTD, XInclude, ...) with relative paths.
|
2007
|
+
"""
|
2008
|
+
if parser is None:
|
2009
|
+
parser = objectify_parser
|
2010
|
+
return _parse(f, parser, base_url=base_url)
|
2011
|
+
|
2012
|
+
cdef dict _DEFAULT_NSMAP = {
|
2013
|
+
"py" : PYTYPE_NAMESPACE,
|
2014
|
+
"xsi" : XML_SCHEMA_INSTANCE_NS,
|
2015
|
+
"xsd" : XML_SCHEMA_NS
|
2016
|
+
}
|
2017
|
+
|
2018
|
+
E = ElementMaker()
|
2019
|
+
|
2020
|
+
def Element(_tag, attrib=None, nsmap=None, *, _pytype=None, **_attributes):
|
2021
|
+
"""Element(_tag, attrib=None, nsmap=None, _pytype=None, **_attributes)
|
2022
|
+
|
2023
|
+
Objectify specific version of the lxml.etree Element() factory that
|
2024
|
+
always creates a structural (tree) element.
|
2025
|
+
|
2026
|
+
NOTE: requires parser based element class lookup activated in lxml.etree!
|
2027
|
+
"""
|
2028
|
+
if attrib is not None:
|
2029
|
+
if _attributes:
|
2030
|
+
attrib = dict(attrib)
|
2031
|
+
attrib.update(_attributes)
|
2032
|
+
_attributes = attrib
|
2033
|
+
if _pytype is None:
|
2034
|
+
_pytype = TREE_PYTYPE_NAME
|
2035
|
+
if nsmap is None:
|
2036
|
+
nsmap = _DEFAULT_NSMAP
|
2037
|
+
_attributes[PYTYPE_ATTRIBUTE] = _pytype
|
2038
|
+
return _makeElement(_tag, None, _attributes, nsmap)
|
2039
|
+
|
2040
|
+
def DataElement(_value, attrib=None, nsmap=None, *, _pytype=None, _xsi=None,
|
2041
|
+
**_attributes):
|
2042
|
+
"""DataElement(_value, attrib=None, nsmap=None, _pytype=None, _xsi=None, **_attributes)
|
2043
|
+
|
2044
|
+
Create a new element from a Python value and XML attributes taken from
|
2045
|
+
keyword arguments or a dictionary passed as second argument.
|
2046
|
+
|
2047
|
+
Automatically adds a 'pytype' attribute for the Python type of the value,
|
2048
|
+
if the type can be identified. If '_pytype' or '_xsi' are among the
|
2049
|
+
keyword arguments, they will be used instead.
|
2050
|
+
|
2051
|
+
If the _value argument is an ObjectifiedDataElement instance, its py:pytype,
|
2052
|
+
xsi:type and other attributes and nsmap are reused unless they are redefined
|
2053
|
+
in attrib and/or keyword arguments.
|
2054
|
+
"""
|
2055
|
+
if nsmap is None:
|
2056
|
+
nsmap = _DEFAULT_NSMAP
|
2057
|
+
if attrib is not None and attrib:
|
2058
|
+
if _attributes:
|
2059
|
+
attrib = dict(attrib)
|
2060
|
+
attrib.update(_attributes)
|
2061
|
+
_attributes = attrib
|
2062
|
+
if isinstance(_value, ObjectifiedElement):
|
2063
|
+
if _pytype is None:
|
2064
|
+
if _xsi is None and not _attributes and nsmap is _DEFAULT_NSMAP:
|
2065
|
+
# special case: no change!
|
2066
|
+
return _value.__copy__()
|
2067
|
+
if isinstance(_value, ObjectifiedDataElement):
|
2068
|
+
# reuse existing nsmap unless redefined in nsmap parameter
|
2069
|
+
temp = _value.nsmap
|
2070
|
+
if temp is not None and temp:
|
2071
|
+
temp = dict(temp)
|
2072
|
+
temp.update(nsmap)
|
2073
|
+
nsmap = temp
|
2074
|
+
# reuse existing attributes unless redefined in attrib/_attributes
|
2075
|
+
temp = _value.attrib
|
2076
|
+
if temp is not None and temp:
|
2077
|
+
temp = dict(temp)
|
2078
|
+
temp.update(_attributes)
|
2079
|
+
_attributes = temp
|
2080
|
+
# reuse existing xsi:type or py:pytype attributes, unless provided as
|
2081
|
+
# arguments
|
2082
|
+
if _xsi is None and _pytype is None:
|
2083
|
+
_xsi = _attributes.get(XML_SCHEMA_INSTANCE_TYPE_ATTR)
|
2084
|
+
_pytype = _attributes.get(PYTYPE_ATTRIBUTE)
|
2085
|
+
|
2086
|
+
if _xsi is not None:
|
2087
|
+
if ':' in _xsi:
|
2088
|
+
prefix, name = _xsi.split(':', 1)
|
2089
|
+
ns = nsmap.get(prefix)
|
2090
|
+
if ns != XML_SCHEMA_NS:
|
2091
|
+
raise ValueError, "XSD types require the XSD namespace"
|
2092
|
+
elif nsmap is _DEFAULT_NSMAP:
|
2093
|
+
name = _xsi
|
2094
|
+
_xsi = 'xsd:' + _xsi
|
2095
|
+
else:
|
2096
|
+
name = _xsi
|
2097
|
+
for prefix, ns in nsmap.items():
|
2098
|
+
if ns == XML_SCHEMA_NS:
|
2099
|
+
if prefix is not None and prefix:
|
2100
|
+
_xsi = prefix + ':' + _xsi
|
2101
|
+
break
|
2102
|
+
else:
|
2103
|
+
raise ValueError, "XSD types require the XSD namespace"
|
2104
|
+
_attributes[XML_SCHEMA_INSTANCE_TYPE_ATTR] = _xsi
|
2105
|
+
if _pytype is None:
|
2106
|
+
# allow using unregistered or even wrong xsi:type names
|
2107
|
+
py_type = <PyType>_SCHEMA_TYPE_DICT.get(_xsi)
|
2108
|
+
if py_type is None:
|
2109
|
+
py_type = <PyType>_SCHEMA_TYPE_DICT.get(name)
|
2110
|
+
if py_type is not None:
|
2111
|
+
_pytype = py_type.name
|
2112
|
+
|
2113
|
+
if _pytype is None:
|
2114
|
+
_pytype = _pytypename(_value)
|
2115
|
+
|
2116
|
+
if _value is None and _pytype != "str":
|
2117
|
+
_pytype = _pytype or "NoneType"
|
2118
|
+
strval = None
|
2119
|
+
elif python._isString(_value):
|
2120
|
+
strval = _value
|
2121
|
+
elif isinstance(_value, bool):
|
2122
|
+
if _value:
|
2123
|
+
strval = "true"
|
2124
|
+
else:
|
2125
|
+
strval = "false"
|
2126
|
+
else:
|
2127
|
+
py_type = <PyType>_PYTYPE_DICT.get(_pytype)
|
2128
|
+
stringify = unicode if py_type is None else py_type.stringify
|
2129
|
+
strval = stringify(_value)
|
2130
|
+
|
2131
|
+
if _pytype is not None:
|
2132
|
+
if _pytype == "NoneType" or _pytype == "none":
|
2133
|
+
strval = None
|
2134
|
+
_attributes[XML_SCHEMA_INSTANCE_NIL_ATTR] = "true"
|
2135
|
+
else:
|
2136
|
+
# check if type information from arguments is valid
|
2137
|
+
py_type = <PyType>_PYTYPE_DICT.get(_pytype)
|
2138
|
+
if py_type is not None:
|
2139
|
+
if py_type.type_check is not None:
|
2140
|
+
py_type.type_check(strval)
|
2141
|
+
_attributes[PYTYPE_ATTRIBUTE] = _pytype
|
2142
|
+
|
2143
|
+
return _makeElement("value", strval, _attributes, nsmap)
|
2144
|
+
|
2145
|
+
|
2146
|
+
################################################################################
|
2147
|
+
# ObjectPath
|
2148
|
+
|
2149
|
+
include "objectpath.pxi"
|