lxml 6.0.0__cp39-cp39-win_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lxml/ElementInclude.py +244 -0
- lxml/__init__.py +22 -0
- lxml/_elementpath.cp39-win_arm64.pyd +0 -0
- lxml/_elementpath.py +343 -0
- lxml/apihelpers.pxi +1801 -0
- lxml/builder.cp39-win_arm64.pyd +0 -0
- lxml/builder.py +243 -0
- lxml/classlookup.pxi +580 -0
- lxml/cleanup.pxi +215 -0
- lxml/cssselect.py +101 -0
- lxml/debug.pxi +36 -0
- lxml/docloader.pxi +178 -0
- lxml/doctestcompare.py +488 -0
- lxml/dtd.pxi +479 -0
- lxml/etree.cp39-win_arm64.pyd +0 -0
- lxml/etree.h +244 -0
- lxml/etree.pyx +3853 -0
- lxml/etree_api.h +204 -0
- lxml/extensions.pxi +830 -0
- lxml/html/ElementSoup.py +10 -0
- lxml/html/__init__.py +1927 -0
- lxml/html/_diffcommand.py +86 -0
- lxml/html/_difflib.cp39-win_arm64.pyd +0 -0
- lxml/html/_difflib.py +2106 -0
- lxml/html/_html5builder.py +100 -0
- lxml/html/_setmixin.py +56 -0
- lxml/html/builder.py +173 -0
- lxml/html/clean.py +21 -0
- lxml/html/defs.py +135 -0
- lxml/html/diff.cp39-win_arm64.pyd +0 -0
- lxml/html/diff.py +972 -0
- lxml/html/formfill.py +299 -0
- lxml/html/html5parser.py +260 -0
- lxml/html/soupparser.py +314 -0
- lxml/html/usedoctest.py +13 -0
- lxml/includes/__init__.pxd +0 -0
- lxml/includes/__init__.py +0 -0
- lxml/includes/c14n.pxd +25 -0
- lxml/includes/config.pxd +3 -0
- lxml/includes/dtdvalid.pxd +18 -0
- lxml/includes/etree_defs.h +379 -0
- lxml/includes/etreepublic.pxd +237 -0
- lxml/includes/extlibs/__init__.py +0 -0
- lxml/includes/extlibs/zconf.h +543 -0
- lxml/includes/extlibs/zlib.h +1938 -0
- lxml/includes/htmlparser.pxd +56 -0
- lxml/includes/libexslt/__init__.py +0 -0
- lxml/includes/libexslt/exslt.h +108 -0
- lxml/includes/libexslt/exsltconfig.h +70 -0
- lxml/includes/libexslt/exsltexports.h +63 -0
- lxml/includes/libexslt/libexslt.h +29 -0
- lxml/includes/libxml/HTMLparser.h +320 -0
- lxml/includes/libxml/HTMLtree.h +147 -0
- lxml/includes/libxml/SAX.h +204 -0
- lxml/includes/libxml/SAX2.h +173 -0
- lxml/includes/libxml/__init__.py +0 -0
- lxml/includes/libxml/c14n.h +128 -0
- lxml/includes/libxml/catalog.h +182 -0
- lxml/includes/libxml/chvalid.h +230 -0
- lxml/includes/libxml/debugXML.h +217 -0
- lxml/includes/libxml/dict.h +81 -0
- lxml/includes/libxml/encoding.h +233 -0
- lxml/includes/libxml/entities.h +151 -0
- lxml/includes/libxml/globals.h +529 -0
- lxml/includes/libxml/hash.h +236 -0
- lxml/includes/libxml/list.h +137 -0
- lxml/includes/libxml/nanoftp.h +186 -0
- lxml/includes/libxml/nanohttp.h +81 -0
- lxml/includes/libxml/parser.h +1265 -0
- lxml/includes/libxml/parserInternals.h +662 -0
- lxml/includes/libxml/pattern.h +100 -0
- lxml/includes/libxml/relaxng.h +218 -0
- lxml/includes/libxml/schemasInternals.h +958 -0
- lxml/includes/libxml/schematron.h +142 -0
- lxml/includes/libxml/threads.h +94 -0
- lxml/includes/libxml/tree.h +1314 -0
- lxml/includes/libxml/uri.h +94 -0
- lxml/includes/libxml/valid.h +448 -0
- lxml/includes/libxml/xinclude.h +129 -0
- lxml/includes/libxml/xlink.h +189 -0
- lxml/includes/libxml/xmlIO.h +369 -0
- lxml/includes/libxml/xmlautomata.h +146 -0
- lxml/includes/libxml/xmlerror.h +919 -0
- lxml/includes/libxml/xmlexports.h +50 -0
- lxml/includes/libxml/xmlmemory.h +228 -0
- lxml/includes/libxml/xmlmodule.h +57 -0
- lxml/includes/libxml/xmlreader.h +428 -0
- lxml/includes/libxml/xmlregexp.h +222 -0
- lxml/includes/libxml/xmlsave.h +88 -0
- lxml/includes/libxml/xmlschemas.h +246 -0
- lxml/includes/libxml/xmlschemastypes.h +152 -0
- lxml/includes/libxml/xmlstring.h +140 -0
- lxml/includes/libxml/xmlunicode.h +202 -0
- lxml/includes/libxml/xmlversion.h +526 -0
- lxml/includes/libxml/xmlwriter.h +488 -0
- lxml/includes/libxml/xpath.h +575 -0
- lxml/includes/libxml/xpathInternals.h +632 -0
- lxml/includes/libxml/xpointer.h +137 -0
- lxml/includes/libxslt/__init__.py +0 -0
- lxml/includes/libxslt/attributes.h +39 -0
- lxml/includes/libxslt/documents.h +93 -0
- lxml/includes/libxslt/extensions.h +262 -0
- lxml/includes/libxslt/extra.h +72 -0
- lxml/includes/libxslt/functions.h +78 -0
- lxml/includes/libxslt/imports.h +75 -0
- lxml/includes/libxslt/keys.h +53 -0
- lxml/includes/libxslt/libxslt.h +36 -0
- lxml/includes/libxslt/namespaces.h +68 -0
- lxml/includes/libxslt/numbersInternals.h +73 -0
- lxml/includes/libxslt/preproc.h +43 -0
- lxml/includes/libxslt/security.h +104 -0
- lxml/includes/libxslt/templates.h +77 -0
- lxml/includes/libxslt/transform.h +207 -0
- lxml/includes/libxslt/trio.h +216 -0
- lxml/includes/libxslt/triodef.h +220 -0
- lxml/includes/libxslt/variables.h +118 -0
- lxml/includes/libxslt/win32config.h +51 -0
- lxml/includes/libxslt/xslt.h +110 -0
- lxml/includes/libxslt/xsltInternals.h +1992 -0
- lxml/includes/libxslt/xsltconfig.h +179 -0
- lxml/includes/libxslt/xsltexports.h +64 -0
- lxml/includes/libxslt/xsltlocale.h +44 -0
- lxml/includes/libxslt/xsltutils.h +343 -0
- lxml/includes/lxml-version.h +3 -0
- lxml/includes/relaxng.pxd +64 -0
- lxml/includes/schematron.pxd +34 -0
- lxml/includes/tree.pxd +492 -0
- lxml/includes/uri.pxd +5 -0
- lxml/includes/xinclude.pxd +22 -0
- lxml/includes/xmlerror.pxd +852 -0
- lxml/includes/xmlparser.pxd +303 -0
- lxml/includes/xmlschema.pxd +35 -0
- lxml/includes/xpath.pxd +136 -0
- lxml/includes/xslt.pxd +190 -0
- lxml/isoschematron/__init__.py +348 -0
- lxml/isoschematron/resources/rng/iso-schematron.rng +709 -0
- lxml/isoschematron/resources/xsl/RNG2Schtrn.xsl +75 -0
- lxml/isoschematron/resources/xsl/XSD2Schtrn.xsl +77 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_abstract_expand.xsl +313 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_dsdl_include.xsl +1160 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_message.xsl +55 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_skeleton_for_xslt1.xsl +1796 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_svrl_for_xslt1.xsl +588 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt +84 -0
- lxml/iterparse.pxi +438 -0
- lxml/lxml.etree.h +244 -0
- lxml/lxml.etree_api.h +204 -0
- lxml/nsclasses.pxi +281 -0
- lxml/objectify.cp39-win_arm64.pyd +0 -0
- lxml/objectify.pyx +2149 -0
- lxml/objectpath.pxi +332 -0
- lxml/parser.pxi +2059 -0
- lxml/parsertarget.pxi +180 -0
- lxml/proxy.pxi +619 -0
- lxml/public-api.pxi +178 -0
- lxml/pyclasslookup.py +3 -0
- lxml/readonlytree.pxi +565 -0
- lxml/relaxng.pxi +165 -0
- lxml/sax.cp39-win_arm64.pyd +0 -0
- lxml/sax.py +286 -0
- lxml/saxparser.pxi +875 -0
- lxml/schematron.pxi +173 -0
- lxml/serializer.pxi +1849 -0
- lxml/usedoctest.py +13 -0
- lxml/xinclude.pxi +67 -0
- lxml/xmlerror.pxi +1654 -0
- lxml/xmlid.pxi +179 -0
- lxml/xmlschema.pxi +215 -0
- lxml/xpath.pxi +487 -0
- lxml/xslt.pxi +957 -0
- lxml/xsltext.pxi +242 -0
- lxml-6.0.0.dist-info/METADATA +163 -0
- lxml-6.0.0.dist-info/RECORD +177 -0
- lxml-6.0.0.dist-info/WHEEL +5 -0
- lxml-6.0.0.dist-info/licenses/LICENSE.txt +31 -0
- lxml-6.0.0.dist-info/licenses/LICENSES.txt +29 -0
- lxml-6.0.0.dist-info/top_level.txt +1 -0
lxml/proxy.pxi
ADDED
@@ -0,0 +1,619 @@
|
|
1
|
+
# Proxy functions and low level node allocation stuff
|
2
|
+
|
3
|
+
# Proxies represent elements, their reference is stored in the C
|
4
|
+
# structure of the respective node to avoid multiple instantiation of
|
5
|
+
# the Python class.
|
6
|
+
|
7
|
+
@cython.linetrace(False)
|
8
|
+
@cython.profile(False)
|
9
|
+
cdef inline _Element getProxy(xmlNode* c_node):
|
10
|
+
"""Get a proxy for a given node.
|
11
|
+
"""
|
12
|
+
#print "getProxy for:", <int>c_node
|
13
|
+
if c_node is not NULL and c_node._private is not NULL:
|
14
|
+
return <_Element>c_node._private
|
15
|
+
else:
|
16
|
+
return None
|
17
|
+
|
18
|
+
|
19
|
+
@cython.linetrace(False)
|
20
|
+
@cython.profile(False)
|
21
|
+
cdef inline bint hasProxy(xmlNode* c_node):
|
22
|
+
if c_node._private is NULL:
|
23
|
+
return False
|
24
|
+
return True
|
25
|
+
|
26
|
+
|
27
|
+
@cython.linetrace(False)
|
28
|
+
@cython.profile(False)
|
29
|
+
cdef inline int _registerProxy(_Element proxy, _Document doc,
|
30
|
+
xmlNode* c_node) except -1:
|
31
|
+
"""Register a proxy and type for the node it's proxying for.
|
32
|
+
"""
|
33
|
+
#print "registering for:", <int>proxy._c_node
|
34
|
+
assert not hasProxy(c_node), "double registering proxy!"
|
35
|
+
proxy._doc = doc
|
36
|
+
proxy._c_node = c_node
|
37
|
+
c_node._private = <void*>proxy
|
38
|
+
return 0
|
39
|
+
|
40
|
+
|
41
|
+
@cython.linetrace(False)
|
42
|
+
@cython.profile(False)
|
43
|
+
cdef inline int _unregisterProxy(_Element proxy) except -1:
|
44
|
+
"""Unregister a proxy for the node it's proxying for.
|
45
|
+
"""
|
46
|
+
cdef xmlNode* c_node = proxy._c_node
|
47
|
+
assert c_node._private is <void*>proxy, "Tried to unregister unknown proxy"
|
48
|
+
c_node._private = NULL
|
49
|
+
return 0
|
50
|
+
|
51
|
+
|
52
|
+
################################################################################
|
53
|
+
# temporarily make a node the root node of its document
|
54
|
+
|
55
|
+
cdef xmlDoc* _fakeRootDoc(xmlDoc* c_base_doc, xmlNode* c_node) except NULL:
|
56
|
+
return _plainFakeRootDoc(c_base_doc, c_node, 1)
|
57
|
+
|
58
|
+
cdef xmlDoc* _plainFakeRootDoc(xmlDoc* c_base_doc, xmlNode* c_node,
|
59
|
+
bint with_siblings) except NULL:
|
60
|
+
# build a temporary document that has the given node as root node
|
61
|
+
# note that copy and original must not be modified during its lifetime!!
|
62
|
+
# always call _destroyFakeDoc() after use!
|
63
|
+
cdef xmlNode* c_child
|
64
|
+
cdef xmlNode* c_root
|
65
|
+
cdef xmlNode* c_new_root
|
66
|
+
cdef xmlDoc* c_doc
|
67
|
+
if with_siblings or (c_node.prev is NULL and c_node.next is NULL):
|
68
|
+
c_root = tree.xmlDocGetRootElement(c_base_doc)
|
69
|
+
if c_root is c_node:
|
70
|
+
# already the root node, no siblings
|
71
|
+
return c_base_doc
|
72
|
+
|
73
|
+
c_doc = _copyDoc(c_base_doc, 0) # non recursive!
|
74
|
+
c_new_root = tree.xmlDocCopyNode(c_node, c_doc, 2) # non recursive!
|
75
|
+
tree.xmlDocSetRootElement(c_doc, c_new_root)
|
76
|
+
_copyParentNamespaces(c_node, c_new_root)
|
77
|
+
|
78
|
+
c_new_root.children = c_node.children
|
79
|
+
c_new_root.last = c_node.last
|
80
|
+
c_new_root.next = c_new_root.prev = NULL
|
81
|
+
|
82
|
+
# store original node
|
83
|
+
c_doc._private = c_node
|
84
|
+
|
85
|
+
# divert parent pointers of children
|
86
|
+
c_child = c_new_root.children
|
87
|
+
while c_child is not NULL:
|
88
|
+
c_child.parent = c_new_root
|
89
|
+
c_child = c_child.next
|
90
|
+
|
91
|
+
c_doc.children = c_new_root
|
92
|
+
return c_doc
|
93
|
+
|
94
|
+
cdef void _destroyFakeDoc(xmlDoc* c_base_doc, xmlDoc* c_doc) noexcept:
|
95
|
+
# delete a temporary document
|
96
|
+
cdef xmlNode* c_child
|
97
|
+
cdef xmlNode* c_parent
|
98
|
+
cdef xmlNode* c_root
|
99
|
+
if c_doc is c_base_doc:
|
100
|
+
return
|
101
|
+
c_root = tree.xmlDocGetRootElement(c_doc)
|
102
|
+
|
103
|
+
# restore parent pointers of children
|
104
|
+
c_parent = <xmlNode*>c_doc._private
|
105
|
+
c_child = c_root.children
|
106
|
+
while c_child is not NULL:
|
107
|
+
c_child.parent = c_parent
|
108
|
+
c_child = c_child.next
|
109
|
+
|
110
|
+
# prevent recursive removal of children
|
111
|
+
c_root.children = c_root.last = NULL
|
112
|
+
tree.xmlFreeDoc(c_doc)
|
113
|
+
|
114
|
+
cdef _Element _fakeDocElementFactory(_Document doc, xmlNode* c_element):
|
115
|
+
"""Special element factory for cases where we need to create a fake
|
116
|
+
root document, but still need to instantiate arbitrary nodes from
|
117
|
+
it. If we instantiate the fake root node, things will turn bad
|
118
|
+
when it's destroyed.
|
119
|
+
|
120
|
+
Instead, if we are asked to instantiate the fake root node, we
|
121
|
+
instantiate the original node instead.
|
122
|
+
"""
|
123
|
+
if c_element.doc is not doc._c_doc:
|
124
|
+
if c_element.doc._private is not NULL:
|
125
|
+
if c_element is c_element.doc.children:
|
126
|
+
c_element = <xmlNode*>c_element.doc._private
|
127
|
+
#assert c_element.type == tree.XML_ELEMENT_NODE
|
128
|
+
return _elementFactory(doc, c_element)
|
129
|
+
|
130
|
+
################################################################################
|
131
|
+
# support for freeing tree elements when proxy objects are destroyed
|
132
|
+
|
133
|
+
cdef int attemptDeallocation(xmlNode* c_node) noexcept:
|
134
|
+
"""Attempt deallocation of c_node (or higher up in tree).
|
135
|
+
"""
|
136
|
+
cdef xmlNode* c_top
|
137
|
+
# could be we actually aren't referring to the tree at all
|
138
|
+
if c_node is NULL:
|
139
|
+
#print "not freeing, node is NULL"
|
140
|
+
return 0
|
141
|
+
c_top = getDeallocationTop(c_node)
|
142
|
+
if c_top is not NULL:
|
143
|
+
#print "freeing:", c_top.name
|
144
|
+
_removeText(c_top.next) # tail
|
145
|
+
tree.xmlFreeNode(c_top)
|
146
|
+
return 1
|
147
|
+
return 0
|
148
|
+
|
149
|
+
cdef xmlNode* getDeallocationTop(xmlNode* c_node) noexcept:
|
150
|
+
"""Return the top of the tree that can be deallocated, or NULL.
|
151
|
+
"""
|
152
|
+
cdef xmlNode* c_next
|
153
|
+
#print "trying to do deallocating:", c_node.type
|
154
|
+
if hasProxy(c_node):
|
155
|
+
#print "Not freeing: proxies still exist"
|
156
|
+
return NULL
|
157
|
+
while c_node.parent is not NULL:
|
158
|
+
c_node = c_node.parent
|
159
|
+
#print "checking:", c_current.type
|
160
|
+
if c_node.type == tree.XML_DOCUMENT_NODE or \
|
161
|
+
c_node.type == tree.XML_HTML_DOCUMENT_NODE:
|
162
|
+
#print "not freeing: still in doc"
|
163
|
+
return NULL
|
164
|
+
# if we're still attached to the document, don't deallocate
|
165
|
+
if hasProxy(c_node):
|
166
|
+
#print "Not freeing: proxies still exist"
|
167
|
+
return NULL
|
168
|
+
# see whether we have children to deallocate
|
169
|
+
if not canDeallocateChildNodes(c_node):
|
170
|
+
return NULL
|
171
|
+
# see whether we have siblings to deallocate
|
172
|
+
c_next = c_node.prev
|
173
|
+
while c_next:
|
174
|
+
if _isElement(c_next):
|
175
|
+
if hasProxy(c_next) or not canDeallocateChildNodes(c_next):
|
176
|
+
return NULL
|
177
|
+
c_next = c_next.prev
|
178
|
+
c_next = c_node.next
|
179
|
+
while c_next:
|
180
|
+
if _isElement(c_next):
|
181
|
+
if hasProxy(c_next) or not canDeallocateChildNodes(c_next):
|
182
|
+
return NULL
|
183
|
+
c_next = c_next.next
|
184
|
+
return c_node
|
185
|
+
|
186
|
+
cdef int canDeallocateChildNodes(xmlNode* c_parent) noexcept:
|
187
|
+
cdef xmlNode* c_node
|
188
|
+
c_node = c_parent.children
|
189
|
+
tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_parent, c_node, 1)
|
190
|
+
if hasProxy(c_node):
|
191
|
+
return 0
|
192
|
+
tree.END_FOR_EACH_ELEMENT_FROM(c_node)
|
193
|
+
return 1
|
194
|
+
|
195
|
+
################################################################################
|
196
|
+
# fix _Document references and namespaces when a node changes documents
|
197
|
+
|
198
|
+
cdef void _copyParentNamespaces(xmlNode* c_from_node, xmlNode* c_to_node) noexcept nogil:
|
199
|
+
"""Copy the namespaces of all ancestors of c_from_node to c_to_node.
|
200
|
+
"""
|
201
|
+
cdef xmlNode* c_parent
|
202
|
+
cdef xmlNs* c_ns
|
203
|
+
cdef xmlNs* c_new_ns
|
204
|
+
cdef int prefix_known
|
205
|
+
c_parent = c_from_node.parent
|
206
|
+
while c_parent and (tree._isElementOrXInclude(c_parent) or
|
207
|
+
c_parent.type == tree.XML_DOCUMENT_NODE):
|
208
|
+
c_new_ns = c_parent.nsDef
|
209
|
+
while c_new_ns:
|
210
|
+
# libxml2 will check if the prefix is already defined
|
211
|
+
tree.xmlNewNs(c_to_node, c_new_ns.href, c_new_ns.prefix)
|
212
|
+
c_new_ns = c_new_ns.next
|
213
|
+
c_parent = c_parent.parent
|
214
|
+
|
215
|
+
|
216
|
+
ctypedef struct _ns_update_map:
|
217
|
+
xmlNs* old
|
218
|
+
xmlNs* new
|
219
|
+
|
220
|
+
|
221
|
+
ctypedef struct _nscache:
|
222
|
+
_ns_update_map* ns_map
|
223
|
+
size_t size
|
224
|
+
size_t last
|
225
|
+
|
226
|
+
|
227
|
+
cdef int _growNsCache(_nscache* c_ns_cache) except -1:
|
228
|
+
cdef _ns_update_map* ns_map_ptr
|
229
|
+
if c_ns_cache.size == 0:
|
230
|
+
c_ns_cache.size = 20
|
231
|
+
else:
|
232
|
+
c_ns_cache.size *= 2
|
233
|
+
ns_map_ptr = <_ns_update_map*> python.lxml_realloc(
|
234
|
+
c_ns_cache.ns_map, c_ns_cache.size, sizeof(_ns_update_map))
|
235
|
+
if not ns_map_ptr:
|
236
|
+
python.lxml_free(c_ns_cache.ns_map)
|
237
|
+
c_ns_cache.ns_map = NULL
|
238
|
+
raise MemoryError()
|
239
|
+
c_ns_cache.ns_map = ns_map_ptr
|
240
|
+
return 0
|
241
|
+
|
242
|
+
|
243
|
+
cdef inline int _appendToNsCache(_nscache* c_ns_cache,
|
244
|
+
xmlNs* c_old_ns, xmlNs* c_new_ns) except -1:
|
245
|
+
if c_ns_cache.last >= c_ns_cache.size:
|
246
|
+
_growNsCache(c_ns_cache)
|
247
|
+
c_ns_cache.ns_map[c_ns_cache.last] = _ns_update_map(old=c_old_ns, new=c_new_ns)
|
248
|
+
c_ns_cache.last += 1
|
249
|
+
|
250
|
+
|
251
|
+
cdef int _stripRedundantNamespaceDeclarations(xmlNode* c_element, _nscache* c_ns_cache,
|
252
|
+
xmlNs** c_del_ns_list) except -1:
|
253
|
+
"""Removes namespace declarations from an element that are already
|
254
|
+
defined in its parents. Does not free the xmlNs's, just prepends
|
255
|
+
them to the c_del_ns_list.
|
256
|
+
"""
|
257
|
+
cdef xmlNs* c_ns
|
258
|
+
cdef xmlNs* c_ns_next
|
259
|
+
cdef xmlNs** c_nsdef
|
260
|
+
# use a xmlNs** to handle assignments to "c_element.nsDef" correctly
|
261
|
+
c_nsdef = &c_element.nsDef
|
262
|
+
while c_nsdef[0] is not NULL:
|
263
|
+
c_ns = tree.xmlSearchNsByHref(
|
264
|
+
c_element.doc, c_element.parent, c_nsdef[0].href)
|
265
|
+
if c_ns is NULL:
|
266
|
+
# new namespace href => keep and cache the ns declaration
|
267
|
+
_appendToNsCache(c_ns_cache, c_nsdef[0], c_nsdef[0])
|
268
|
+
c_nsdef = &c_nsdef[0].next
|
269
|
+
else:
|
270
|
+
# known namespace href => cache mapping and strip old ns
|
271
|
+
_appendToNsCache(c_ns_cache, c_nsdef[0], c_ns)
|
272
|
+
# cut out c_nsdef.next and prepend it to garbage chain
|
273
|
+
c_ns_next = c_nsdef[0].next
|
274
|
+
c_nsdef[0].next = c_del_ns_list[0]
|
275
|
+
c_del_ns_list[0] = c_nsdef[0]
|
276
|
+
c_nsdef[0] = c_ns_next
|
277
|
+
return 0
|
278
|
+
|
279
|
+
|
280
|
+
cdef void _cleanUpFromNamespaceAdaptation(xmlNode* c_start_node,
|
281
|
+
_nscache* c_ns_cache, xmlNs* c_del_ns_list) noexcept:
|
282
|
+
# Try to recover from exceptions with really bad timing. We were in the middle
|
283
|
+
# of ripping out xmlNS-es and likely ran out of memory. Try to fix up the tree
|
284
|
+
# by re-adding the original xmlNs declarations (which might still be used in some
|
285
|
+
# places).
|
286
|
+
if c_ns_cache.ns_map:
|
287
|
+
python.lxml_free(c_ns_cache.ns_map)
|
288
|
+
if c_del_ns_list:
|
289
|
+
if not c_start_node.nsDef:
|
290
|
+
c_start_node.nsDef = c_del_ns_list
|
291
|
+
else:
|
292
|
+
c_ns = c_start_node.nsDef
|
293
|
+
while c_ns.next:
|
294
|
+
c_ns = c_ns.next
|
295
|
+
c_ns.next = c_del_ns_list
|
296
|
+
|
297
|
+
|
298
|
+
cdef int moveNodeToDocument(_Document doc, xmlDoc* c_source_doc,
|
299
|
+
xmlNode* c_element) except -1:
|
300
|
+
"""Fix the xmlNs pointers of a node and its subtree that were moved.
|
301
|
+
|
302
|
+
Originally copied from libxml2's xmlReconciliateNs(). Expects
|
303
|
+
libxml2 doc pointers of node to be correct already, but fixes
|
304
|
+
_Document references.
|
305
|
+
|
306
|
+
For each node in the subtree, we do this:
|
307
|
+
|
308
|
+
1) Remove redundant declarations of namespace that are already
|
309
|
+
defined in its parents.
|
310
|
+
|
311
|
+
2) Replace namespaces that are *not* defined on the node or its
|
312
|
+
parents by the equivalent namespace declarations that *are*
|
313
|
+
defined on the node or its parents (possibly using a different
|
314
|
+
prefix). If a namespace is unknown, declare a new one on the
|
315
|
+
node.
|
316
|
+
|
317
|
+
3) Reassign the names of tags and attribute from the dict of the
|
318
|
+
target document *iff* it is different from the dict used in the
|
319
|
+
source subtree.
|
320
|
+
|
321
|
+
4) Set the Document reference to the new Document (if different).
|
322
|
+
This is done on backtracking to keep the original Document
|
323
|
+
alive as long as possible, until all its elements are updated.
|
324
|
+
|
325
|
+
Note that the namespace declarations are removed from the tree in
|
326
|
+
step 1), but freed only after the complete subtree was traversed
|
327
|
+
and all occurrences were replaced by tree-internal pointers.
|
328
|
+
"""
|
329
|
+
cdef xmlNode* c_start_node
|
330
|
+
cdef xmlNode* c_node
|
331
|
+
cdef xmlDoc* c_doc = doc._c_doc
|
332
|
+
cdef tree.xmlAttr* c_attr
|
333
|
+
cdef char* c_name
|
334
|
+
cdef _nscache c_ns_cache = [NULL, 0, 0]
|
335
|
+
cdef xmlNs* c_del_ns_list = NULL
|
336
|
+
cdef proxy_count = 0
|
337
|
+
|
338
|
+
if not tree._isElementOrXInclude(c_element):
|
339
|
+
return 0
|
340
|
+
|
341
|
+
c_start_node = c_element
|
342
|
+
|
343
|
+
tree.BEGIN_FOR_EACH_FROM(c_element, c_element, 1)
|
344
|
+
if tree._isElementOrXInclude(c_element):
|
345
|
+
if hasProxy(c_element):
|
346
|
+
proxy_count += 1
|
347
|
+
|
348
|
+
# 1) cut out namespaces defined here that are already known by
|
349
|
+
# the ancestors
|
350
|
+
if c_element.nsDef is not NULL:
|
351
|
+
try:
|
352
|
+
_stripRedundantNamespaceDeclarations(c_element, &c_ns_cache, &c_del_ns_list)
|
353
|
+
except:
|
354
|
+
_cleanUpFromNamespaceAdaptation(c_start_node, &c_ns_cache, c_del_ns_list)
|
355
|
+
raise
|
356
|
+
|
357
|
+
# 2) make sure the namespaces of an element and its attributes
|
358
|
+
# are declared in this document (i.e. on the node or its parents)
|
359
|
+
if c_element.ns is not NULL:
|
360
|
+
_fixCNs(doc, c_start_node, c_element, &c_ns_cache, c_del_ns_list)
|
361
|
+
|
362
|
+
c_node = <xmlNode*>c_element.properties
|
363
|
+
while c_node is not NULL:
|
364
|
+
if c_node.ns is not NULL:
|
365
|
+
_fixCNs(doc, c_start_node, c_node, &c_ns_cache, c_del_ns_list)
|
366
|
+
c_node = c_node.next
|
367
|
+
|
368
|
+
tree.END_FOR_EACH_FROM(c_element)
|
369
|
+
|
370
|
+
# free now unused namespace declarations
|
371
|
+
if c_del_ns_list is not NULL:
|
372
|
+
tree.xmlFreeNsList(c_del_ns_list)
|
373
|
+
|
374
|
+
# cleanup
|
375
|
+
if c_ns_cache.ns_map is not NULL:
|
376
|
+
python.lxml_free(c_ns_cache.ns_map)
|
377
|
+
|
378
|
+
# 3) fix the names in the tree if we moved it from a different thread
|
379
|
+
if doc._c_doc.dict is not c_source_doc.dict:
|
380
|
+
fixThreadDictNames(c_start_node, c_source_doc.dict, doc._c_doc.dict)
|
381
|
+
|
382
|
+
# 4) fix _Document references
|
383
|
+
# (and potentially deallocate the source document)
|
384
|
+
if proxy_count > 0:
|
385
|
+
if proxy_count == 1 and c_start_node._private is not NULL:
|
386
|
+
proxy = getProxy(c_start_node)
|
387
|
+
if proxy is not None:
|
388
|
+
if proxy._doc is not doc:
|
389
|
+
proxy._doc = doc
|
390
|
+
else:
|
391
|
+
fixElementDocument(c_start_node, doc, proxy_count)
|
392
|
+
else:
|
393
|
+
fixElementDocument(c_start_node, doc, proxy_count)
|
394
|
+
|
395
|
+
return 0
|
396
|
+
|
397
|
+
|
398
|
+
cdef void _setTreeDoc(xmlNode* c_node, xmlDoc* c_doc) noexcept:
|
399
|
+
"""Adaptation of 'xmlSetTreeDoc()' that deep-fixes the document links iteratively.
|
400
|
+
It avoids https://gitlab.gnome.org/GNOME/libxml2/issues/42
|
401
|
+
"""
|
402
|
+
tree.BEGIN_FOR_EACH_FROM(c_node, c_node, 1)
|
403
|
+
if c_node.type == tree.XML_ELEMENT_NODE:
|
404
|
+
c_attr = <tree.xmlAttr*>c_node.properties
|
405
|
+
while c_attr:
|
406
|
+
if c_attr.atype == tree.XML_ATTRIBUTE_ID:
|
407
|
+
tree.xmlRemoveID(c_node.doc, c_attr)
|
408
|
+
c_attr.doc = c_doc
|
409
|
+
_fixDocChildren(c_attr.children, c_doc)
|
410
|
+
c_attr = c_attr.next
|
411
|
+
# Set doc link for all nodes, not only elements.
|
412
|
+
c_node.doc = c_doc
|
413
|
+
tree.END_FOR_EACH_FROM(c_node)
|
414
|
+
|
415
|
+
|
416
|
+
cdef inline void _fixDocChildren(xmlNode* c_child, xmlDoc* c_doc) noexcept:
|
417
|
+
while c_child:
|
418
|
+
c_child.doc = c_doc
|
419
|
+
if c_child.children:
|
420
|
+
_fixDocChildren(c_child.children, c_doc)
|
421
|
+
c_child = c_child.next
|
422
|
+
|
423
|
+
|
424
|
+
cdef int _fixCNs(_Document doc, xmlNode* c_start_node, xmlNode* c_node,
|
425
|
+
_nscache* c_ns_cache, xmlNs* c_del_ns_list) except -1:
|
426
|
+
cdef xmlNs* c_ns = NULL
|
427
|
+
cdef bint is_prefixed_attr = (c_node.type == tree.XML_ATTRIBUTE_NODE and c_node.ns.prefix)
|
428
|
+
|
429
|
+
for ns_map in c_ns_cache.ns_map[:c_ns_cache.last]:
|
430
|
+
if c_node.ns is ns_map.old:
|
431
|
+
if is_prefixed_attr and not ns_map.new.prefix:
|
432
|
+
# avoid dropping prefix from attributes
|
433
|
+
continue
|
434
|
+
c_ns = ns_map.new
|
435
|
+
break
|
436
|
+
|
437
|
+
if c_ns:
|
438
|
+
c_node.ns = c_ns
|
439
|
+
else:
|
440
|
+
# not in cache or not acceptable
|
441
|
+
# => find a replacement from this document
|
442
|
+
try:
|
443
|
+
c_ns = doc._findOrBuildNodeNs(
|
444
|
+
c_start_node, c_node.ns.href, c_node.ns.prefix,
|
445
|
+
c_node.type == tree.XML_ATTRIBUTE_NODE)
|
446
|
+
c_node.ns = c_ns
|
447
|
+
_appendToNsCache(c_ns_cache, c_node.ns, c_ns)
|
448
|
+
except:
|
449
|
+
_cleanUpFromNamespaceAdaptation(c_start_node, c_ns_cache, c_del_ns_list)
|
450
|
+
raise
|
451
|
+
return 0
|
452
|
+
|
453
|
+
|
454
|
+
cdef int fixElementDocument(xmlNode* c_element, _Document doc,
|
455
|
+
size_t proxy_count) except -1:
|
456
|
+
cdef xmlNode* c_node = c_element
|
457
|
+
cdef _Element proxy = None # init-to-None required due to fake-loop below
|
458
|
+
tree.BEGIN_FOR_EACH_FROM(c_element, c_node, 1)
|
459
|
+
if c_node._private is not NULL:
|
460
|
+
proxy = getProxy(c_node)
|
461
|
+
if proxy is not None:
|
462
|
+
if proxy._doc is not doc:
|
463
|
+
proxy._doc = doc
|
464
|
+
proxy_count -= 1
|
465
|
+
if proxy_count == 0:
|
466
|
+
return 0
|
467
|
+
tree.END_FOR_EACH_FROM(c_node)
|
468
|
+
|
469
|
+
|
470
|
+
cdef void fixThreadDictNames(xmlNode* c_element,
|
471
|
+
tree.xmlDict* c_src_dict,
|
472
|
+
tree.xmlDict* c_dict) noexcept nogil:
|
473
|
+
# re-assign the names of tags and attributes
|
474
|
+
#
|
475
|
+
# this should only be called when the element is based on a
|
476
|
+
# different libxml2 tag name dictionary
|
477
|
+
if c_element.type == tree.XML_DOCUMENT_NODE or \
|
478
|
+
c_element.type == tree.XML_HTML_DOCUMENT_NODE:
|
479
|
+
# may define "xml" namespace
|
480
|
+
fixThreadDictNsForNode(c_element, c_src_dict, c_dict)
|
481
|
+
if c_element.doc.extSubset:
|
482
|
+
fixThreadDictNamesForDtd(c_element.doc.extSubset, c_src_dict, c_dict)
|
483
|
+
if c_element.doc.intSubset:
|
484
|
+
fixThreadDictNamesForDtd(c_element.doc.intSubset, c_src_dict, c_dict)
|
485
|
+
c_element = c_element.children
|
486
|
+
while c_element is not NULL:
|
487
|
+
fixThreadDictNamesForNode(c_element, c_src_dict, c_dict)
|
488
|
+
c_element = c_element.next
|
489
|
+
elif tree._isElementOrXInclude(c_element):
|
490
|
+
fixThreadDictNamesForNode(c_element, c_src_dict, c_dict)
|
491
|
+
|
492
|
+
|
493
|
+
cdef inline void _fixThreadDictPtr(const_xmlChar** c_ptr,
|
494
|
+
tree.xmlDict* c_src_dict,
|
495
|
+
tree.xmlDict* c_dict) noexcept nogil:
|
496
|
+
c_str = c_ptr[0]
|
497
|
+
if c_str and c_src_dict and tree.xmlDictOwns(c_src_dict, c_str):
|
498
|
+
# return value can be NULL on memory error, but we don't handle that here
|
499
|
+
c_str = tree.xmlDictLookup(c_dict, c_str, -1)
|
500
|
+
if c_str:
|
501
|
+
c_ptr[0] = c_str
|
502
|
+
|
503
|
+
|
504
|
+
cdef void fixThreadDictNamesForNode(xmlNode* c_element,
|
505
|
+
tree.xmlDict* c_src_dict,
|
506
|
+
tree.xmlDict* c_dict) noexcept nogil:
|
507
|
+
cdef xmlNode* c_node = c_element
|
508
|
+
tree.BEGIN_FOR_EACH_FROM(c_element, c_node, 1)
|
509
|
+
if c_node.type in (tree.XML_ELEMENT_NODE, tree.XML_XINCLUDE_START):
|
510
|
+
fixThreadDictNamesForAttributes(
|
511
|
+
c_node.properties, c_src_dict, c_dict)
|
512
|
+
fixThreadDictNsForNode(c_node, c_src_dict, c_dict)
|
513
|
+
_fixThreadDictPtr(&c_node.name, c_src_dict, c_dict)
|
514
|
+
elif c_node.type == tree.XML_TEXT_NODE:
|
515
|
+
# libxml2's SAX2 parser interns some indentation space
|
516
|
+
fixThreadDictContentForNode(c_node, c_src_dict, c_dict)
|
517
|
+
elif c_node.type == tree.XML_COMMENT_NODE:
|
518
|
+
pass # don't touch c_node.name
|
519
|
+
else:
|
520
|
+
_fixThreadDictPtr(&c_node.name, c_src_dict, c_dict)
|
521
|
+
tree.END_FOR_EACH_FROM(c_node)
|
522
|
+
|
523
|
+
|
524
|
+
cdef inline void fixThreadDictNamesForAttributes(tree.xmlAttr* c_attr,
|
525
|
+
tree.xmlDict* c_src_dict,
|
526
|
+
tree.xmlDict* c_dict) noexcept nogil:
|
527
|
+
cdef xmlNode* c_child
|
528
|
+
cdef xmlNode* c_node = <xmlNode*>c_attr
|
529
|
+
while c_node is not NULL:
|
530
|
+
if c_node.type not in (tree.XML_TEXT_NODE, tree.XML_COMMENT_NODE):
|
531
|
+
_fixThreadDictPtr(&c_node.name, c_src_dict, c_dict)
|
532
|
+
# libxml2 keeps some (!) attribute values in the dict
|
533
|
+
c_child = c_node.children
|
534
|
+
while c_child is not NULL:
|
535
|
+
fixThreadDictContentForNode(c_child, c_src_dict, c_dict)
|
536
|
+
c_child = c_child.next
|
537
|
+
c_node = c_node.next
|
538
|
+
|
539
|
+
|
540
|
+
cdef inline void fixThreadDictContentForNode(xmlNode* c_node,
|
541
|
+
tree.xmlDict* c_src_dict,
|
542
|
+
tree.xmlDict* c_dict) noexcept nogil:
|
543
|
+
if c_node.content is not NULL and \
|
544
|
+
c_node.content is not <xmlChar*>&c_node.properties:
|
545
|
+
if tree.xmlDictOwns(c_src_dict, c_node.content):
|
546
|
+
# result can be NULL on memory error, but we don't handle that here
|
547
|
+
c_node.content = <xmlChar*>tree.xmlDictLookup(c_dict, c_node.content, -1)
|
548
|
+
|
549
|
+
|
550
|
+
cdef inline void fixThreadDictNsForNode(xmlNode* c_node,
|
551
|
+
tree.xmlDict* c_src_dict,
|
552
|
+
tree.xmlDict* c_dict) noexcept nogil:
|
553
|
+
cdef xmlNs* c_ns = c_node.nsDef
|
554
|
+
while c_ns is not NULL:
|
555
|
+
_fixThreadDictPtr(&c_ns.href, c_src_dict, c_dict)
|
556
|
+
_fixThreadDictPtr(&c_ns.prefix, c_src_dict, c_dict)
|
557
|
+
c_ns = c_ns.next
|
558
|
+
|
559
|
+
|
560
|
+
cdef void fixThreadDictNamesForDtd(tree.xmlDtd* c_dtd,
|
561
|
+
tree.xmlDict* c_src_dict,
|
562
|
+
tree.xmlDict* c_dict) noexcept nogil:
|
563
|
+
cdef xmlNode* c_node
|
564
|
+
cdef tree.xmlElement* c_element
|
565
|
+
cdef tree.xmlAttribute* c_attribute
|
566
|
+
cdef tree.xmlEntity* c_entity
|
567
|
+
|
568
|
+
c_node = c_dtd.children
|
569
|
+
while c_node:
|
570
|
+
if c_node.type == tree.XML_ELEMENT_DECL:
|
571
|
+
c_element = <tree.xmlElement*>c_node
|
572
|
+
if c_element.content:
|
573
|
+
_fixThreadDictPtr(&c_element.content.name, c_src_dict, c_dict)
|
574
|
+
_fixThreadDictPtr(&c_element.content.prefix, c_src_dict, c_dict)
|
575
|
+
c_attribute = c_element.attributes
|
576
|
+
while c_attribute:
|
577
|
+
_fixThreadDictPtr(&c_attribute.defaultValue, c_src_dict, c_dict)
|
578
|
+
_fixThreadDictPtr(&c_attribute.name, c_src_dict, c_dict)
|
579
|
+
_fixThreadDictPtr(&c_attribute.prefix, c_src_dict, c_dict)
|
580
|
+
_fixThreadDictPtr(&c_attribute.elem, c_src_dict, c_dict)
|
581
|
+
c_attribute = c_attribute.nexth
|
582
|
+
elif c_node.type == tree.XML_ENTITY_DECL:
|
583
|
+
c_entity = <tree.xmlEntity*>c_node
|
584
|
+
_fixThreadDictPtr(&c_entity.name, c_src_dict, c_dict)
|
585
|
+
_fixThreadDictPtr(&c_entity.ExternalID, c_src_dict, c_dict)
|
586
|
+
_fixThreadDictPtr(&c_entity.SystemID, c_src_dict, c_dict)
|
587
|
+
_fixThreadDictPtr(<const_xmlChar**>&c_entity.content, c_src_dict, c_dict)
|
588
|
+
c_node = c_node.next
|
589
|
+
|
590
|
+
|
591
|
+
################################################################################
|
592
|
+
# adopt an xmlDoc from an external libxml2 document source
|
593
|
+
|
594
|
+
cdef _Document _adoptForeignDoc(xmlDoc* c_doc, _BaseParser parser=None, bint is_owned=True):
|
595
|
+
"""Convert and wrap an externally produced xmlDoc for use in lxml.
|
596
|
+
Assures that all '_private' pointers are NULL to prevent accidental
|
597
|
+
dereference into lxml proxy objects.
|
598
|
+
"""
|
599
|
+
if c_doc is NULL:
|
600
|
+
raise ValueError("Illegal document provided: NULL")
|
601
|
+
if c_doc.type not in (tree.XML_DOCUMENT_NODE, tree.XML_HTML_DOCUMENT_NODE):
|
602
|
+
doc_type = c_doc.type
|
603
|
+
if is_owned:
|
604
|
+
tree.xmlFreeDoc(c_doc)
|
605
|
+
raise ValueError(f"Illegal document provided: expected XML or HTML, found {doc_type}")
|
606
|
+
|
607
|
+
cdef xmlNode* c_node = <xmlNode*>c_doc
|
608
|
+
|
609
|
+
if is_owned:
|
610
|
+
tree.BEGIN_FOR_EACH_FROM(<xmlNode*>c_doc, c_node, 1)
|
611
|
+
c_node._private = NULL
|
612
|
+
tree.END_FOR_EACH_FROM(c_node)
|
613
|
+
else:
|
614
|
+
# create a fresh copy that lxml owns
|
615
|
+
c_doc = tree.xmlCopyDoc(c_doc, 1)
|
616
|
+
if c_doc is NULL:
|
617
|
+
raise MemoryError()
|
618
|
+
|
619
|
+
return _documentFactory(c_doc, parser)
|