lxml 6.0.0__cp39-cp39-manylinux_2_31_armv7l.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. lxml/ElementInclude.py +244 -0
  2. lxml/__init__.py +22 -0
  3. lxml/_elementpath.cpython-39-arm-linux-gnueabihf.so +0 -0
  4. lxml/_elementpath.py +343 -0
  5. lxml/apihelpers.pxi +1801 -0
  6. lxml/builder.cpython-39-arm-linux-gnueabihf.so +0 -0
  7. lxml/builder.py +243 -0
  8. lxml/classlookup.pxi +580 -0
  9. lxml/cleanup.pxi +215 -0
  10. lxml/cssselect.py +101 -0
  11. lxml/debug.pxi +36 -0
  12. lxml/docloader.pxi +178 -0
  13. lxml/doctestcompare.py +488 -0
  14. lxml/dtd.pxi +479 -0
  15. lxml/etree.cpython-39-arm-linux-gnueabihf.so +0 -0
  16. lxml/etree.h +244 -0
  17. lxml/etree.pyx +3853 -0
  18. lxml/etree_api.h +204 -0
  19. lxml/extensions.pxi +830 -0
  20. lxml/html/ElementSoup.py +10 -0
  21. lxml/html/__init__.py +1927 -0
  22. lxml/html/_diffcommand.py +86 -0
  23. lxml/html/_difflib.cpython-39-arm-linux-gnueabihf.so +0 -0
  24. lxml/html/_difflib.py +2106 -0
  25. lxml/html/_html5builder.py +100 -0
  26. lxml/html/_setmixin.py +56 -0
  27. lxml/html/builder.py +173 -0
  28. lxml/html/clean.py +21 -0
  29. lxml/html/defs.py +135 -0
  30. lxml/html/diff.cpython-39-arm-linux-gnueabihf.so +0 -0
  31. lxml/html/diff.py +972 -0
  32. lxml/html/formfill.py +299 -0
  33. lxml/html/html5parser.py +260 -0
  34. lxml/html/soupparser.py +314 -0
  35. lxml/html/usedoctest.py +13 -0
  36. lxml/includes/__init__.pxd +0 -0
  37. lxml/includes/__init__.py +0 -0
  38. lxml/includes/c14n.pxd +25 -0
  39. lxml/includes/config.pxd +3 -0
  40. lxml/includes/dtdvalid.pxd +18 -0
  41. lxml/includes/etree_defs.h +379 -0
  42. lxml/includes/etreepublic.pxd +237 -0
  43. lxml/includes/extlibs/__init__.py +0 -0
  44. lxml/includes/extlibs/libcharset.h +45 -0
  45. lxml/includes/extlibs/localcharset.h +137 -0
  46. lxml/includes/extlibs/zconf.h +543 -0
  47. lxml/includes/extlibs/zlib.h +1938 -0
  48. lxml/includes/htmlparser.pxd +56 -0
  49. lxml/includes/libexslt/__init__.py +0 -0
  50. lxml/includes/libexslt/exslt.h +108 -0
  51. lxml/includes/libexslt/exsltconfig.h +70 -0
  52. lxml/includes/libexslt/exsltexports.h +63 -0
  53. lxml/includes/libxml/HTMLparser.h +339 -0
  54. lxml/includes/libxml/HTMLtree.h +148 -0
  55. lxml/includes/libxml/SAX.h +18 -0
  56. lxml/includes/libxml/SAX2.h +170 -0
  57. lxml/includes/libxml/__init__.py +0 -0
  58. lxml/includes/libxml/c14n.h +115 -0
  59. lxml/includes/libxml/catalog.h +183 -0
  60. lxml/includes/libxml/chvalid.h +230 -0
  61. lxml/includes/libxml/debugXML.h +79 -0
  62. lxml/includes/libxml/dict.h +82 -0
  63. lxml/includes/libxml/encoding.h +307 -0
  64. lxml/includes/libxml/entities.h +147 -0
  65. lxml/includes/libxml/globals.h +25 -0
  66. lxml/includes/libxml/hash.h +251 -0
  67. lxml/includes/libxml/list.h +137 -0
  68. lxml/includes/libxml/nanoftp.h +16 -0
  69. lxml/includes/libxml/nanohttp.h +98 -0
  70. lxml/includes/libxml/parser.h +1633 -0
  71. lxml/includes/libxml/parserInternals.h +591 -0
  72. lxml/includes/libxml/relaxng.h +224 -0
  73. lxml/includes/libxml/schemasInternals.h +959 -0
  74. lxml/includes/libxml/schematron.h +143 -0
  75. lxml/includes/libxml/threads.h +81 -0
  76. lxml/includes/libxml/tree.h +1326 -0
  77. lxml/includes/libxml/uri.h +106 -0
  78. lxml/includes/libxml/valid.h +485 -0
  79. lxml/includes/libxml/xinclude.h +141 -0
  80. lxml/includes/libxml/xlink.h +193 -0
  81. lxml/includes/libxml/xmlIO.h +419 -0
  82. lxml/includes/libxml/xmlautomata.h +163 -0
  83. lxml/includes/libxml/xmlerror.h +962 -0
  84. lxml/includes/libxml/xmlexports.h +96 -0
  85. lxml/includes/libxml/xmlmemory.h +188 -0
  86. lxml/includes/libxml/xmlmodule.h +61 -0
  87. lxml/includes/libxml/xmlreader.h +444 -0
  88. lxml/includes/libxml/xmlregexp.h +116 -0
  89. lxml/includes/libxml/xmlsave.h +111 -0
  90. lxml/includes/libxml/xmlschemas.h +254 -0
  91. lxml/includes/libxml/xmlschemastypes.h +152 -0
  92. lxml/includes/libxml/xmlstring.h +140 -0
  93. lxml/includes/libxml/xmlunicode.h +15 -0
  94. lxml/includes/libxml/xmlversion.h +332 -0
  95. lxml/includes/libxml/xmlwriter.h +489 -0
  96. lxml/includes/libxml/xpath.h +569 -0
  97. lxml/includes/libxml/xpathInternals.h +639 -0
  98. lxml/includes/libxml/xpointer.h +48 -0
  99. lxml/includes/libxslt/__init__.py +0 -0
  100. lxml/includes/libxslt/attributes.h +39 -0
  101. lxml/includes/libxslt/documents.h +93 -0
  102. lxml/includes/libxslt/extensions.h +262 -0
  103. lxml/includes/libxslt/extra.h +72 -0
  104. lxml/includes/libxslt/functions.h +78 -0
  105. lxml/includes/libxslt/imports.h +75 -0
  106. lxml/includes/libxslt/keys.h +53 -0
  107. lxml/includes/libxslt/namespaces.h +68 -0
  108. lxml/includes/libxslt/numbersInternals.h +73 -0
  109. lxml/includes/libxslt/pattern.h +84 -0
  110. lxml/includes/libxslt/preproc.h +43 -0
  111. lxml/includes/libxslt/security.h +104 -0
  112. lxml/includes/libxslt/templates.h +77 -0
  113. lxml/includes/libxslt/transform.h +207 -0
  114. lxml/includes/libxslt/variables.h +118 -0
  115. lxml/includes/libxslt/xslt.h +110 -0
  116. lxml/includes/libxslt/xsltInternals.h +1995 -0
  117. lxml/includes/libxslt/xsltconfig.h +146 -0
  118. lxml/includes/libxslt/xsltexports.h +64 -0
  119. lxml/includes/libxslt/xsltlocale.h +44 -0
  120. lxml/includes/libxslt/xsltutils.h +343 -0
  121. lxml/includes/lxml-version.h +3 -0
  122. lxml/includes/relaxng.pxd +64 -0
  123. lxml/includes/schematron.pxd +34 -0
  124. lxml/includes/tree.pxd +492 -0
  125. lxml/includes/uri.pxd +5 -0
  126. lxml/includes/xinclude.pxd +22 -0
  127. lxml/includes/xmlerror.pxd +852 -0
  128. lxml/includes/xmlparser.pxd +303 -0
  129. lxml/includes/xmlschema.pxd +35 -0
  130. lxml/includes/xpath.pxd +136 -0
  131. lxml/includes/xslt.pxd +190 -0
  132. lxml/isoschematron/__init__.py +348 -0
  133. lxml/isoschematron/resources/rng/iso-schematron.rng +709 -0
  134. lxml/isoschematron/resources/xsl/RNG2Schtrn.xsl +75 -0
  135. lxml/isoschematron/resources/xsl/XSD2Schtrn.xsl +77 -0
  136. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_abstract_expand.xsl +313 -0
  137. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_dsdl_include.xsl +1160 -0
  138. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_message.xsl +55 -0
  139. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_skeleton_for_xslt1.xsl +1796 -0
  140. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_svrl_for_xslt1.xsl +588 -0
  141. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt +84 -0
  142. lxml/iterparse.pxi +438 -0
  143. lxml/lxml.etree.h +244 -0
  144. lxml/lxml.etree_api.h +204 -0
  145. lxml/nsclasses.pxi +281 -0
  146. lxml/objectify.cpython-39-arm-linux-gnueabihf.so +0 -0
  147. lxml/objectify.pyx +2149 -0
  148. lxml/objectpath.pxi +332 -0
  149. lxml/parser.pxi +2059 -0
  150. lxml/parsertarget.pxi +180 -0
  151. lxml/proxy.pxi +619 -0
  152. lxml/public-api.pxi +178 -0
  153. lxml/pyclasslookup.py +3 -0
  154. lxml/readonlytree.pxi +565 -0
  155. lxml/relaxng.pxi +165 -0
  156. lxml/sax.cpython-39-arm-linux-gnueabihf.so +0 -0
  157. lxml/sax.py +286 -0
  158. lxml/saxparser.pxi +875 -0
  159. lxml/schematron.pxi +173 -0
  160. lxml/serializer.pxi +1849 -0
  161. lxml/usedoctest.py +13 -0
  162. lxml/xinclude.pxi +67 -0
  163. lxml/xmlerror.pxi +1654 -0
  164. lxml/xmlid.pxi +179 -0
  165. lxml/xmlschema.pxi +215 -0
  166. lxml/xpath.pxi +487 -0
  167. lxml/xslt.pxi +957 -0
  168. lxml/xsltext.pxi +242 -0
  169. lxml-6.0.0.dist-info/METADATA +163 -0
  170. lxml-6.0.0.dist-info/RECORD +174 -0
  171. lxml-6.0.0.dist-info/WHEEL +5 -0
  172. lxml-6.0.0.dist-info/licenses/LICENSE.txt +31 -0
  173. lxml-6.0.0.dist-info/licenses/LICENSES.txt +29 -0
  174. lxml-6.0.0.dist-info/top_level.txt +1 -0
lxml/proxy.pxi ADDED
@@ -0,0 +1,619 @@
1
+ # Proxy functions and low level node allocation stuff
2
+
3
+ # Proxies represent elements, their reference is stored in the C
4
+ # structure of the respective node to avoid multiple instantiation of
5
+ # the Python class.
6
+
7
+ @cython.linetrace(False)
8
+ @cython.profile(False)
9
+ cdef inline _Element getProxy(xmlNode* c_node):
10
+ """Get a proxy for a given node.
11
+ """
12
+ #print "getProxy for:", <int>c_node
13
+ if c_node is not NULL and c_node._private is not NULL:
14
+ return <_Element>c_node._private
15
+ else:
16
+ return None
17
+
18
+
19
+ @cython.linetrace(False)
20
+ @cython.profile(False)
21
+ cdef inline bint hasProxy(xmlNode* c_node):
22
+ if c_node._private is NULL:
23
+ return False
24
+ return True
25
+
26
+
27
+ @cython.linetrace(False)
28
+ @cython.profile(False)
29
+ cdef inline int _registerProxy(_Element proxy, _Document doc,
30
+ xmlNode* c_node) except -1:
31
+ """Register a proxy and type for the node it's proxying for.
32
+ """
33
+ #print "registering for:", <int>proxy._c_node
34
+ assert not hasProxy(c_node), "double registering proxy!"
35
+ proxy._doc = doc
36
+ proxy._c_node = c_node
37
+ c_node._private = <void*>proxy
38
+ return 0
39
+
40
+
41
+ @cython.linetrace(False)
42
+ @cython.profile(False)
43
+ cdef inline int _unregisterProxy(_Element proxy) except -1:
44
+ """Unregister a proxy for the node it's proxying for.
45
+ """
46
+ cdef xmlNode* c_node = proxy._c_node
47
+ assert c_node._private is <void*>proxy, "Tried to unregister unknown proxy"
48
+ c_node._private = NULL
49
+ return 0
50
+
51
+
52
+ ################################################################################
53
+ # temporarily make a node the root node of its document
54
+
55
+ cdef xmlDoc* _fakeRootDoc(xmlDoc* c_base_doc, xmlNode* c_node) except NULL:
56
+ return _plainFakeRootDoc(c_base_doc, c_node, 1)
57
+
58
+ cdef xmlDoc* _plainFakeRootDoc(xmlDoc* c_base_doc, xmlNode* c_node,
59
+ bint with_siblings) except NULL:
60
+ # build a temporary document that has the given node as root node
61
+ # note that copy and original must not be modified during its lifetime!!
62
+ # always call _destroyFakeDoc() after use!
63
+ cdef xmlNode* c_child
64
+ cdef xmlNode* c_root
65
+ cdef xmlNode* c_new_root
66
+ cdef xmlDoc* c_doc
67
+ if with_siblings or (c_node.prev is NULL and c_node.next is NULL):
68
+ c_root = tree.xmlDocGetRootElement(c_base_doc)
69
+ if c_root is c_node:
70
+ # already the root node, no siblings
71
+ return c_base_doc
72
+
73
+ c_doc = _copyDoc(c_base_doc, 0) # non recursive!
74
+ c_new_root = tree.xmlDocCopyNode(c_node, c_doc, 2) # non recursive!
75
+ tree.xmlDocSetRootElement(c_doc, c_new_root)
76
+ _copyParentNamespaces(c_node, c_new_root)
77
+
78
+ c_new_root.children = c_node.children
79
+ c_new_root.last = c_node.last
80
+ c_new_root.next = c_new_root.prev = NULL
81
+
82
+ # store original node
83
+ c_doc._private = c_node
84
+
85
+ # divert parent pointers of children
86
+ c_child = c_new_root.children
87
+ while c_child is not NULL:
88
+ c_child.parent = c_new_root
89
+ c_child = c_child.next
90
+
91
+ c_doc.children = c_new_root
92
+ return c_doc
93
+
94
+ cdef void _destroyFakeDoc(xmlDoc* c_base_doc, xmlDoc* c_doc) noexcept:
95
+ # delete a temporary document
96
+ cdef xmlNode* c_child
97
+ cdef xmlNode* c_parent
98
+ cdef xmlNode* c_root
99
+ if c_doc is c_base_doc:
100
+ return
101
+ c_root = tree.xmlDocGetRootElement(c_doc)
102
+
103
+ # restore parent pointers of children
104
+ c_parent = <xmlNode*>c_doc._private
105
+ c_child = c_root.children
106
+ while c_child is not NULL:
107
+ c_child.parent = c_parent
108
+ c_child = c_child.next
109
+
110
+ # prevent recursive removal of children
111
+ c_root.children = c_root.last = NULL
112
+ tree.xmlFreeDoc(c_doc)
113
+
114
+ cdef _Element _fakeDocElementFactory(_Document doc, xmlNode* c_element):
115
+ """Special element factory for cases where we need to create a fake
116
+ root document, but still need to instantiate arbitrary nodes from
117
+ it. If we instantiate the fake root node, things will turn bad
118
+ when it's destroyed.
119
+
120
+ Instead, if we are asked to instantiate the fake root node, we
121
+ instantiate the original node instead.
122
+ """
123
+ if c_element.doc is not doc._c_doc:
124
+ if c_element.doc._private is not NULL:
125
+ if c_element is c_element.doc.children:
126
+ c_element = <xmlNode*>c_element.doc._private
127
+ #assert c_element.type == tree.XML_ELEMENT_NODE
128
+ return _elementFactory(doc, c_element)
129
+
130
+ ################################################################################
131
+ # support for freeing tree elements when proxy objects are destroyed
132
+
133
+ cdef int attemptDeallocation(xmlNode* c_node) noexcept:
134
+ """Attempt deallocation of c_node (or higher up in tree).
135
+ """
136
+ cdef xmlNode* c_top
137
+ # could be we actually aren't referring to the tree at all
138
+ if c_node is NULL:
139
+ #print "not freeing, node is NULL"
140
+ return 0
141
+ c_top = getDeallocationTop(c_node)
142
+ if c_top is not NULL:
143
+ #print "freeing:", c_top.name
144
+ _removeText(c_top.next) # tail
145
+ tree.xmlFreeNode(c_top)
146
+ return 1
147
+ return 0
148
+
149
+ cdef xmlNode* getDeallocationTop(xmlNode* c_node) noexcept:
150
+ """Return the top of the tree that can be deallocated, or NULL.
151
+ """
152
+ cdef xmlNode* c_next
153
+ #print "trying to do deallocating:", c_node.type
154
+ if hasProxy(c_node):
155
+ #print "Not freeing: proxies still exist"
156
+ return NULL
157
+ while c_node.parent is not NULL:
158
+ c_node = c_node.parent
159
+ #print "checking:", c_current.type
160
+ if c_node.type == tree.XML_DOCUMENT_NODE or \
161
+ c_node.type == tree.XML_HTML_DOCUMENT_NODE:
162
+ #print "not freeing: still in doc"
163
+ return NULL
164
+ # if we're still attached to the document, don't deallocate
165
+ if hasProxy(c_node):
166
+ #print "Not freeing: proxies still exist"
167
+ return NULL
168
+ # see whether we have children to deallocate
169
+ if not canDeallocateChildNodes(c_node):
170
+ return NULL
171
+ # see whether we have siblings to deallocate
172
+ c_next = c_node.prev
173
+ while c_next:
174
+ if _isElement(c_next):
175
+ if hasProxy(c_next) or not canDeallocateChildNodes(c_next):
176
+ return NULL
177
+ c_next = c_next.prev
178
+ c_next = c_node.next
179
+ while c_next:
180
+ if _isElement(c_next):
181
+ if hasProxy(c_next) or not canDeallocateChildNodes(c_next):
182
+ return NULL
183
+ c_next = c_next.next
184
+ return c_node
185
+
186
+ cdef int canDeallocateChildNodes(xmlNode* c_parent) noexcept:
187
+ cdef xmlNode* c_node
188
+ c_node = c_parent.children
189
+ tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_parent, c_node, 1)
190
+ if hasProxy(c_node):
191
+ return 0
192
+ tree.END_FOR_EACH_ELEMENT_FROM(c_node)
193
+ return 1
194
+
195
+ ################################################################################
196
+ # fix _Document references and namespaces when a node changes documents
197
+
198
+ cdef void _copyParentNamespaces(xmlNode* c_from_node, xmlNode* c_to_node) noexcept nogil:
199
+ """Copy the namespaces of all ancestors of c_from_node to c_to_node.
200
+ """
201
+ cdef xmlNode* c_parent
202
+ cdef xmlNs* c_ns
203
+ cdef xmlNs* c_new_ns
204
+ cdef int prefix_known
205
+ c_parent = c_from_node.parent
206
+ while c_parent and (tree._isElementOrXInclude(c_parent) or
207
+ c_parent.type == tree.XML_DOCUMENT_NODE):
208
+ c_new_ns = c_parent.nsDef
209
+ while c_new_ns:
210
+ # libxml2 will check if the prefix is already defined
211
+ tree.xmlNewNs(c_to_node, c_new_ns.href, c_new_ns.prefix)
212
+ c_new_ns = c_new_ns.next
213
+ c_parent = c_parent.parent
214
+
215
+
216
+ ctypedef struct _ns_update_map:
217
+ xmlNs* old
218
+ xmlNs* new
219
+
220
+
221
+ ctypedef struct _nscache:
222
+ _ns_update_map* ns_map
223
+ size_t size
224
+ size_t last
225
+
226
+
227
+ cdef int _growNsCache(_nscache* c_ns_cache) except -1:
228
+ cdef _ns_update_map* ns_map_ptr
229
+ if c_ns_cache.size == 0:
230
+ c_ns_cache.size = 20
231
+ else:
232
+ c_ns_cache.size *= 2
233
+ ns_map_ptr = <_ns_update_map*> python.lxml_realloc(
234
+ c_ns_cache.ns_map, c_ns_cache.size, sizeof(_ns_update_map))
235
+ if not ns_map_ptr:
236
+ python.lxml_free(c_ns_cache.ns_map)
237
+ c_ns_cache.ns_map = NULL
238
+ raise MemoryError()
239
+ c_ns_cache.ns_map = ns_map_ptr
240
+ return 0
241
+
242
+
243
+ cdef inline int _appendToNsCache(_nscache* c_ns_cache,
244
+ xmlNs* c_old_ns, xmlNs* c_new_ns) except -1:
245
+ if c_ns_cache.last >= c_ns_cache.size:
246
+ _growNsCache(c_ns_cache)
247
+ c_ns_cache.ns_map[c_ns_cache.last] = _ns_update_map(old=c_old_ns, new=c_new_ns)
248
+ c_ns_cache.last += 1
249
+
250
+
251
+ cdef int _stripRedundantNamespaceDeclarations(xmlNode* c_element, _nscache* c_ns_cache,
252
+ xmlNs** c_del_ns_list) except -1:
253
+ """Removes namespace declarations from an element that are already
254
+ defined in its parents. Does not free the xmlNs's, just prepends
255
+ them to the c_del_ns_list.
256
+ """
257
+ cdef xmlNs* c_ns
258
+ cdef xmlNs* c_ns_next
259
+ cdef xmlNs** c_nsdef
260
+ # use a xmlNs** to handle assignments to "c_element.nsDef" correctly
261
+ c_nsdef = &c_element.nsDef
262
+ while c_nsdef[0] is not NULL:
263
+ c_ns = tree.xmlSearchNsByHref(
264
+ c_element.doc, c_element.parent, c_nsdef[0].href)
265
+ if c_ns is NULL:
266
+ # new namespace href => keep and cache the ns declaration
267
+ _appendToNsCache(c_ns_cache, c_nsdef[0], c_nsdef[0])
268
+ c_nsdef = &c_nsdef[0].next
269
+ else:
270
+ # known namespace href => cache mapping and strip old ns
271
+ _appendToNsCache(c_ns_cache, c_nsdef[0], c_ns)
272
+ # cut out c_nsdef.next and prepend it to garbage chain
273
+ c_ns_next = c_nsdef[0].next
274
+ c_nsdef[0].next = c_del_ns_list[0]
275
+ c_del_ns_list[0] = c_nsdef[0]
276
+ c_nsdef[0] = c_ns_next
277
+ return 0
278
+
279
+
280
+ cdef void _cleanUpFromNamespaceAdaptation(xmlNode* c_start_node,
281
+ _nscache* c_ns_cache, xmlNs* c_del_ns_list) noexcept:
282
+ # Try to recover from exceptions with really bad timing. We were in the middle
283
+ # of ripping out xmlNS-es and likely ran out of memory. Try to fix up the tree
284
+ # by re-adding the original xmlNs declarations (which might still be used in some
285
+ # places).
286
+ if c_ns_cache.ns_map:
287
+ python.lxml_free(c_ns_cache.ns_map)
288
+ if c_del_ns_list:
289
+ if not c_start_node.nsDef:
290
+ c_start_node.nsDef = c_del_ns_list
291
+ else:
292
+ c_ns = c_start_node.nsDef
293
+ while c_ns.next:
294
+ c_ns = c_ns.next
295
+ c_ns.next = c_del_ns_list
296
+
297
+
298
+ cdef int moveNodeToDocument(_Document doc, xmlDoc* c_source_doc,
299
+ xmlNode* c_element) except -1:
300
+ """Fix the xmlNs pointers of a node and its subtree that were moved.
301
+
302
+ Originally copied from libxml2's xmlReconciliateNs(). Expects
303
+ libxml2 doc pointers of node to be correct already, but fixes
304
+ _Document references.
305
+
306
+ For each node in the subtree, we do this:
307
+
308
+ 1) Remove redundant declarations of namespace that are already
309
+ defined in its parents.
310
+
311
+ 2) Replace namespaces that are *not* defined on the node or its
312
+ parents by the equivalent namespace declarations that *are*
313
+ defined on the node or its parents (possibly using a different
314
+ prefix). If a namespace is unknown, declare a new one on the
315
+ node.
316
+
317
+ 3) Reassign the names of tags and attribute from the dict of the
318
+ target document *iff* it is different from the dict used in the
319
+ source subtree.
320
+
321
+ 4) Set the Document reference to the new Document (if different).
322
+ This is done on backtracking to keep the original Document
323
+ alive as long as possible, until all its elements are updated.
324
+
325
+ Note that the namespace declarations are removed from the tree in
326
+ step 1), but freed only after the complete subtree was traversed
327
+ and all occurrences were replaced by tree-internal pointers.
328
+ """
329
+ cdef xmlNode* c_start_node
330
+ cdef xmlNode* c_node
331
+ cdef xmlDoc* c_doc = doc._c_doc
332
+ cdef tree.xmlAttr* c_attr
333
+ cdef char* c_name
334
+ cdef _nscache c_ns_cache = [NULL, 0, 0]
335
+ cdef xmlNs* c_del_ns_list = NULL
336
+ cdef proxy_count = 0
337
+
338
+ if not tree._isElementOrXInclude(c_element):
339
+ return 0
340
+
341
+ c_start_node = c_element
342
+
343
+ tree.BEGIN_FOR_EACH_FROM(c_element, c_element, 1)
344
+ if tree._isElementOrXInclude(c_element):
345
+ if hasProxy(c_element):
346
+ proxy_count += 1
347
+
348
+ # 1) cut out namespaces defined here that are already known by
349
+ # the ancestors
350
+ if c_element.nsDef is not NULL:
351
+ try:
352
+ _stripRedundantNamespaceDeclarations(c_element, &c_ns_cache, &c_del_ns_list)
353
+ except:
354
+ _cleanUpFromNamespaceAdaptation(c_start_node, &c_ns_cache, c_del_ns_list)
355
+ raise
356
+
357
+ # 2) make sure the namespaces of an element and its attributes
358
+ # are declared in this document (i.e. on the node or its parents)
359
+ if c_element.ns is not NULL:
360
+ _fixCNs(doc, c_start_node, c_element, &c_ns_cache, c_del_ns_list)
361
+
362
+ c_node = <xmlNode*>c_element.properties
363
+ while c_node is not NULL:
364
+ if c_node.ns is not NULL:
365
+ _fixCNs(doc, c_start_node, c_node, &c_ns_cache, c_del_ns_list)
366
+ c_node = c_node.next
367
+
368
+ tree.END_FOR_EACH_FROM(c_element)
369
+
370
+ # free now unused namespace declarations
371
+ if c_del_ns_list is not NULL:
372
+ tree.xmlFreeNsList(c_del_ns_list)
373
+
374
+ # cleanup
375
+ if c_ns_cache.ns_map is not NULL:
376
+ python.lxml_free(c_ns_cache.ns_map)
377
+
378
+ # 3) fix the names in the tree if we moved it from a different thread
379
+ if doc._c_doc.dict is not c_source_doc.dict:
380
+ fixThreadDictNames(c_start_node, c_source_doc.dict, doc._c_doc.dict)
381
+
382
+ # 4) fix _Document references
383
+ # (and potentially deallocate the source document)
384
+ if proxy_count > 0:
385
+ if proxy_count == 1 and c_start_node._private is not NULL:
386
+ proxy = getProxy(c_start_node)
387
+ if proxy is not None:
388
+ if proxy._doc is not doc:
389
+ proxy._doc = doc
390
+ else:
391
+ fixElementDocument(c_start_node, doc, proxy_count)
392
+ else:
393
+ fixElementDocument(c_start_node, doc, proxy_count)
394
+
395
+ return 0
396
+
397
+
398
+ cdef void _setTreeDoc(xmlNode* c_node, xmlDoc* c_doc) noexcept:
399
+ """Adaptation of 'xmlSetTreeDoc()' that deep-fixes the document links iteratively.
400
+ It avoids https://gitlab.gnome.org/GNOME/libxml2/issues/42
401
+ """
402
+ tree.BEGIN_FOR_EACH_FROM(c_node, c_node, 1)
403
+ if c_node.type == tree.XML_ELEMENT_NODE:
404
+ c_attr = <tree.xmlAttr*>c_node.properties
405
+ while c_attr:
406
+ if c_attr.atype == tree.XML_ATTRIBUTE_ID:
407
+ tree.xmlRemoveID(c_node.doc, c_attr)
408
+ c_attr.doc = c_doc
409
+ _fixDocChildren(c_attr.children, c_doc)
410
+ c_attr = c_attr.next
411
+ # Set doc link for all nodes, not only elements.
412
+ c_node.doc = c_doc
413
+ tree.END_FOR_EACH_FROM(c_node)
414
+
415
+
416
+ cdef inline void _fixDocChildren(xmlNode* c_child, xmlDoc* c_doc) noexcept:
417
+ while c_child:
418
+ c_child.doc = c_doc
419
+ if c_child.children:
420
+ _fixDocChildren(c_child.children, c_doc)
421
+ c_child = c_child.next
422
+
423
+
424
+ cdef int _fixCNs(_Document doc, xmlNode* c_start_node, xmlNode* c_node,
425
+ _nscache* c_ns_cache, xmlNs* c_del_ns_list) except -1:
426
+ cdef xmlNs* c_ns = NULL
427
+ cdef bint is_prefixed_attr = (c_node.type == tree.XML_ATTRIBUTE_NODE and c_node.ns.prefix)
428
+
429
+ for ns_map in c_ns_cache.ns_map[:c_ns_cache.last]:
430
+ if c_node.ns is ns_map.old:
431
+ if is_prefixed_attr and not ns_map.new.prefix:
432
+ # avoid dropping prefix from attributes
433
+ continue
434
+ c_ns = ns_map.new
435
+ break
436
+
437
+ if c_ns:
438
+ c_node.ns = c_ns
439
+ else:
440
+ # not in cache or not acceptable
441
+ # => find a replacement from this document
442
+ try:
443
+ c_ns = doc._findOrBuildNodeNs(
444
+ c_start_node, c_node.ns.href, c_node.ns.prefix,
445
+ c_node.type == tree.XML_ATTRIBUTE_NODE)
446
+ c_node.ns = c_ns
447
+ _appendToNsCache(c_ns_cache, c_node.ns, c_ns)
448
+ except:
449
+ _cleanUpFromNamespaceAdaptation(c_start_node, c_ns_cache, c_del_ns_list)
450
+ raise
451
+ return 0
452
+
453
+
454
+ cdef int fixElementDocument(xmlNode* c_element, _Document doc,
455
+ size_t proxy_count) except -1:
456
+ cdef xmlNode* c_node = c_element
457
+ cdef _Element proxy = None # init-to-None required due to fake-loop below
458
+ tree.BEGIN_FOR_EACH_FROM(c_element, c_node, 1)
459
+ if c_node._private is not NULL:
460
+ proxy = getProxy(c_node)
461
+ if proxy is not None:
462
+ if proxy._doc is not doc:
463
+ proxy._doc = doc
464
+ proxy_count -= 1
465
+ if proxy_count == 0:
466
+ return 0
467
+ tree.END_FOR_EACH_FROM(c_node)
468
+
469
+
470
+ cdef void fixThreadDictNames(xmlNode* c_element,
471
+ tree.xmlDict* c_src_dict,
472
+ tree.xmlDict* c_dict) noexcept nogil:
473
+ # re-assign the names of tags and attributes
474
+ #
475
+ # this should only be called when the element is based on a
476
+ # different libxml2 tag name dictionary
477
+ if c_element.type == tree.XML_DOCUMENT_NODE or \
478
+ c_element.type == tree.XML_HTML_DOCUMENT_NODE:
479
+ # may define "xml" namespace
480
+ fixThreadDictNsForNode(c_element, c_src_dict, c_dict)
481
+ if c_element.doc.extSubset:
482
+ fixThreadDictNamesForDtd(c_element.doc.extSubset, c_src_dict, c_dict)
483
+ if c_element.doc.intSubset:
484
+ fixThreadDictNamesForDtd(c_element.doc.intSubset, c_src_dict, c_dict)
485
+ c_element = c_element.children
486
+ while c_element is not NULL:
487
+ fixThreadDictNamesForNode(c_element, c_src_dict, c_dict)
488
+ c_element = c_element.next
489
+ elif tree._isElementOrXInclude(c_element):
490
+ fixThreadDictNamesForNode(c_element, c_src_dict, c_dict)
491
+
492
+
493
+ cdef inline void _fixThreadDictPtr(const_xmlChar** c_ptr,
494
+ tree.xmlDict* c_src_dict,
495
+ tree.xmlDict* c_dict) noexcept nogil:
496
+ c_str = c_ptr[0]
497
+ if c_str and c_src_dict and tree.xmlDictOwns(c_src_dict, c_str):
498
+ # return value can be NULL on memory error, but we don't handle that here
499
+ c_str = tree.xmlDictLookup(c_dict, c_str, -1)
500
+ if c_str:
501
+ c_ptr[0] = c_str
502
+
503
+
504
+ cdef void fixThreadDictNamesForNode(xmlNode* c_element,
505
+ tree.xmlDict* c_src_dict,
506
+ tree.xmlDict* c_dict) noexcept nogil:
507
+ cdef xmlNode* c_node = c_element
508
+ tree.BEGIN_FOR_EACH_FROM(c_element, c_node, 1)
509
+ if c_node.type in (tree.XML_ELEMENT_NODE, tree.XML_XINCLUDE_START):
510
+ fixThreadDictNamesForAttributes(
511
+ c_node.properties, c_src_dict, c_dict)
512
+ fixThreadDictNsForNode(c_node, c_src_dict, c_dict)
513
+ _fixThreadDictPtr(&c_node.name, c_src_dict, c_dict)
514
+ elif c_node.type == tree.XML_TEXT_NODE:
515
+ # libxml2's SAX2 parser interns some indentation space
516
+ fixThreadDictContentForNode(c_node, c_src_dict, c_dict)
517
+ elif c_node.type == tree.XML_COMMENT_NODE:
518
+ pass # don't touch c_node.name
519
+ else:
520
+ _fixThreadDictPtr(&c_node.name, c_src_dict, c_dict)
521
+ tree.END_FOR_EACH_FROM(c_node)
522
+
523
+
524
+ cdef inline void fixThreadDictNamesForAttributes(tree.xmlAttr* c_attr,
525
+ tree.xmlDict* c_src_dict,
526
+ tree.xmlDict* c_dict) noexcept nogil:
527
+ cdef xmlNode* c_child
528
+ cdef xmlNode* c_node = <xmlNode*>c_attr
529
+ while c_node is not NULL:
530
+ if c_node.type not in (tree.XML_TEXT_NODE, tree.XML_COMMENT_NODE):
531
+ _fixThreadDictPtr(&c_node.name, c_src_dict, c_dict)
532
+ # libxml2 keeps some (!) attribute values in the dict
533
+ c_child = c_node.children
534
+ while c_child is not NULL:
535
+ fixThreadDictContentForNode(c_child, c_src_dict, c_dict)
536
+ c_child = c_child.next
537
+ c_node = c_node.next
538
+
539
+
540
+ cdef inline void fixThreadDictContentForNode(xmlNode* c_node,
541
+ tree.xmlDict* c_src_dict,
542
+ tree.xmlDict* c_dict) noexcept nogil:
543
+ if c_node.content is not NULL and \
544
+ c_node.content is not <xmlChar*>&c_node.properties:
545
+ if tree.xmlDictOwns(c_src_dict, c_node.content):
546
+ # result can be NULL on memory error, but we don't handle that here
547
+ c_node.content = <xmlChar*>tree.xmlDictLookup(c_dict, c_node.content, -1)
548
+
549
+
550
+ cdef inline void fixThreadDictNsForNode(xmlNode* c_node,
551
+ tree.xmlDict* c_src_dict,
552
+ tree.xmlDict* c_dict) noexcept nogil:
553
+ cdef xmlNs* c_ns = c_node.nsDef
554
+ while c_ns is not NULL:
555
+ _fixThreadDictPtr(&c_ns.href, c_src_dict, c_dict)
556
+ _fixThreadDictPtr(&c_ns.prefix, c_src_dict, c_dict)
557
+ c_ns = c_ns.next
558
+
559
+
560
+ cdef void fixThreadDictNamesForDtd(tree.xmlDtd* c_dtd,
561
+ tree.xmlDict* c_src_dict,
562
+ tree.xmlDict* c_dict) noexcept nogil:
563
+ cdef xmlNode* c_node
564
+ cdef tree.xmlElement* c_element
565
+ cdef tree.xmlAttribute* c_attribute
566
+ cdef tree.xmlEntity* c_entity
567
+
568
+ c_node = c_dtd.children
569
+ while c_node:
570
+ if c_node.type == tree.XML_ELEMENT_DECL:
571
+ c_element = <tree.xmlElement*>c_node
572
+ if c_element.content:
573
+ _fixThreadDictPtr(&c_element.content.name, c_src_dict, c_dict)
574
+ _fixThreadDictPtr(&c_element.content.prefix, c_src_dict, c_dict)
575
+ c_attribute = c_element.attributes
576
+ while c_attribute:
577
+ _fixThreadDictPtr(&c_attribute.defaultValue, c_src_dict, c_dict)
578
+ _fixThreadDictPtr(&c_attribute.name, c_src_dict, c_dict)
579
+ _fixThreadDictPtr(&c_attribute.prefix, c_src_dict, c_dict)
580
+ _fixThreadDictPtr(&c_attribute.elem, c_src_dict, c_dict)
581
+ c_attribute = c_attribute.nexth
582
+ elif c_node.type == tree.XML_ENTITY_DECL:
583
+ c_entity = <tree.xmlEntity*>c_node
584
+ _fixThreadDictPtr(&c_entity.name, c_src_dict, c_dict)
585
+ _fixThreadDictPtr(&c_entity.ExternalID, c_src_dict, c_dict)
586
+ _fixThreadDictPtr(&c_entity.SystemID, c_src_dict, c_dict)
587
+ _fixThreadDictPtr(<const_xmlChar**>&c_entity.content, c_src_dict, c_dict)
588
+ c_node = c_node.next
589
+
590
+
591
+ ################################################################################
592
+ # adopt an xmlDoc from an external libxml2 document source
593
+
594
+ cdef _Document _adoptForeignDoc(xmlDoc* c_doc, _BaseParser parser=None, bint is_owned=True):
595
+ """Convert and wrap an externally produced xmlDoc for use in lxml.
596
+ Assures that all '_private' pointers are NULL to prevent accidental
597
+ dereference into lxml proxy objects.
598
+ """
599
+ if c_doc is NULL:
600
+ raise ValueError("Illegal document provided: NULL")
601
+ if c_doc.type not in (tree.XML_DOCUMENT_NODE, tree.XML_HTML_DOCUMENT_NODE):
602
+ doc_type = c_doc.type
603
+ if is_owned:
604
+ tree.xmlFreeDoc(c_doc)
605
+ raise ValueError(f"Illegal document provided: expected XML or HTML, found {doc_type}")
606
+
607
+ cdef xmlNode* c_node = <xmlNode*>c_doc
608
+
609
+ if is_owned:
610
+ tree.BEGIN_FOR_EACH_FROM(<xmlNode*>c_doc, c_node, 1)
611
+ c_node._private = NULL
612
+ tree.END_FOR_EACH_FROM(c_node)
613
+ else:
614
+ # create a fresh copy that lxml owns
615
+ c_doc = tree.xmlCopyDoc(c_doc, 1)
616
+ if c_doc is NULL:
617
+ raise MemoryError()
618
+
619
+ return _documentFactory(c_doc, parser)