lxml 6.0.0__cp310-cp310-musllinux_1_2_armv7l.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. lxml/ElementInclude.py +244 -0
  2. lxml/__init__.py +22 -0
  3. lxml/_elementpath.cpython-310-arm-linux-gnueabihf.so +0 -0
  4. lxml/_elementpath.py +343 -0
  5. lxml/apihelpers.pxi +1801 -0
  6. lxml/builder.cpython-310-arm-linux-gnueabihf.so +0 -0
  7. lxml/builder.py +243 -0
  8. lxml/classlookup.pxi +580 -0
  9. lxml/cleanup.pxi +215 -0
  10. lxml/cssselect.py +101 -0
  11. lxml/debug.pxi +36 -0
  12. lxml/docloader.pxi +178 -0
  13. lxml/doctestcompare.py +488 -0
  14. lxml/dtd.pxi +479 -0
  15. lxml/etree.cpython-310-arm-linux-gnueabihf.so +0 -0
  16. lxml/etree.h +244 -0
  17. lxml/etree.pyx +3853 -0
  18. lxml/etree_api.h +204 -0
  19. lxml/extensions.pxi +830 -0
  20. lxml/html/ElementSoup.py +10 -0
  21. lxml/html/__init__.py +1927 -0
  22. lxml/html/_diffcommand.py +86 -0
  23. lxml/html/_difflib.cpython-310-arm-linux-gnueabihf.so +0 -0
  24. lxml/html/_difflib.py +2106 -0
  25. lxml/html/_html5builder.py +100 -0
  26. lxml/html/_setmixin.py +56 -0
  27. lxml/html/builder.py +173 -0
  28. lxml/html/clean.py +21 -0
  29. lxml/html/defs.py +135 -0
  30. lxml/html/diff.cpython-310-arm-linux-gnueabihf.so +0 -0
  31. lxml/html/diff.py +972 -0
  32. lxml/html/formfill.py +299 -0
  33. lxml/html/html5parser.py +260 -0
  34. lxml/html/soupparser.py +314 -0
  35. lxml/html/usedoctest.py +13 -0
  36. lxml/includes/__init__.pxd +0 -0
  37. lxml/includes/__init__.py +0 -0
  38. lxml/includes/c14n.pxd +25 -0
  39. lxml/includes/config.pxd +3 -0
  40. lxml/includes/dtdvalid.pxd +18 -0
  41. lxml/includes/etree_defs.h +379 -0
  42. lxml/includes/etreepublic.pxd +237 -0
  43. lxml/includes/extlibs/__init__.py +0 -0
  44. lxml/includes/extlibs/libcharset.h +45 -0
  45. lxml/includes/extlibs/localcharset.h +137 -0
  46. lxml/includes/extlibs/zconf.h +543 -0
  47. lxml/includes/extlibs/zlib.h +1938 -0
  48. lxml/includes/htmlparser.pxd +56 -0
  49. lxml/includes/libexslt/__init__.py +0 -0
  50. lxml/includes/libexslt/exslt.h +108 -0
  51. lxml/includes/libexslt/exsltconfig.h +70 -0
  52. lxml/includes/libexslt/exsltexports.h +63 -0
  53. lxml/includes/libxml/HTMLparser.h +339 -0
  54. lxml/includes/libxml/HTMLtree.h +148 -0
  55. lxml/includes/libxml/SAX.h +18 -0
  56. lxml/includes/libxml/SAX2.h +170 -0
  57. lxml/includes/libxml/__init__.py +0 -0
  58. lxml/includes/libxml/c14n.h +115 -0
  59. lxml/includes/libxml/catalog.h +183 -0
  60. lxml/includes/libxml/chvalid.h +230 -0
  61. lxml/includes/libxml/debugXML.h +79 -0
  62. lxml/includes/libxml/dict.h +82 -0
  63. lxml/includes/libxml/encoding.h +307 -0
  64. lxml/includes/libxml/entities.h +147 -0
  65. lxml/includes/libxml/globals.h +25 -0
  66. lxml/includes/libxml/hash.h +251 -0
  67. lxml/includes/libxml/list.h +137 -0
  68. lxml/includes/libxml/nanoftp.h +16 -0
  69. lxml/includes/libxml/nanohttp.h +98 -0
  70. lxml/includes/libxml/parser.h +1633 -0
  71. lxml/includes/libxml/parserInternals.h +591 -0
  72. lxml/includes/libxml/relaxng.h +224 -0
  73. lxml/includes/libxml/schemasInternals.h +959 -0
  74. lxml/includes/libxml/schematron.h +143 -0
  75. lxml/includes/libxml/threads.h +81 -0
  76. lxml/includes/libxml/tree.h +1326 -0
  77. lxml/includes/libxml/uri.h +106 -0
  78. lxml/includes/libxml/valid.h +485 -0
  79. lxml/includes/libxml/xinclude.h +141 -0
  80. lxml/includes/libxml/xlink.h +193 -0
  81. lxml/includes/libxml/xmlIO.h +419 -0
  82. lxml/includes/libxml/xmlautomata.h +163 -0
  83. lxml/includes/libxml/xmlerror.h +962 -0
  84. lxml/includes/libxml/xmlexports.h +96 -0
  85. lxml/includes/libxml/xmlmemory.h +188 -0
  86. lxml/includes/libxml/xmlmodule.h +61 -0
  87. lxml/includes/libxml/xmlreader.h +444 -0
  88. lxml/includes/libxml/xmlregexp.h +116 -0
  89. lxml/includes/libxml/xmlsave.h +111 -0
  90. lxml/includes/libxml/xmlschemas.h +254 -0
  91. lxml/includes/libxml/xmlschemastypes.h +152 -0
  92. lxml/includes/libxml/xmlstring.h +140 -0
  93. lxml/includes/libxml/xmlunicode.h +15 -0
  94. lxml/includes/libxml/xmlversion.h +332 -0
  95. lxml/includes/libxml/xmlwriter.h +489 -0
  96. lxml/includes/libxml/xpath.h +569 -0
  97. lxml/includes/libxml/xpathInternals.h +639 -0
  98. lxml/includes/libxml/xpointer.h +48 -0
  99. lxml/includes/libxslt/__init__.py +0 -0
  100. lxml/includes/libxslt/attributes.h +39 -0
  101. lxml/includes/libxslt/documents.h +93 -0
  102. lxml/includes/libxslt/extensions.h +262 -0
  103. lxml/includes/libxslt/extra.h +72 -0
  104. lxml/includes/libxslt/functions.h +78 -0
  105. lxml/includes/libxslt/imports.h +75 -0
  106. lxml/includes/libxslt/keys.h +53 -0
  107. lxml/includes/libxslt/namespaces.h +68 -0
  108. lxml/includes/libxslt/numbersInternals.h +73 -0
  109. lxml/includes/libxslt/pattern.h +84 -0
  110. lxml/includes/libxslt/preproc.h +43 -0
  111. lxml/includes/libxslt/security.h +104 -0
  112. lxml/includes/libxslt/templates.h +77 -0
  113. lxml/includes/libxslt/transform.h +207 -0
  114. lxml/includes/libxslt/variables.h +118 -0
  115. lxml/includes/libxslt/xslt.h +110 -0
  116. lxml/includes/libxslt/xsltInternals.h +1995 -0
  117. lxml/includes/libxslt/xsltconfig.h +146 -0
  118. lxml/includes/libxslt/xsltexports.h +64 -0
  119. lxml/includes/libxslt/xsltlocale.h +44 -0
  120. lxml/includes/libxslt/xsltutils.h +343 -0
  121. lxml/includes/lxml-version.h +3 -0
  122. lxml/includes/relaxng.pxd +64 -0
  123. lxml/includes/schematron.pxd +34 -0
  124. lxml/includes/tree.pxd +492 -0
  125. lxml/includes/uri.pxd +5 -0
  126. lxml/includes/xinclude.pxd +22 -0
  127. lxml/includes/xmlerror.pxd +852 -0
  128. lxml/includes/xmlparser.pxd +303 -0
  129. lxml/includes/xmlschema.pxd +35 -0
  130. lxml/includes/xpath.pxd +136 -0
  131. lxml/includes/xslt.pxd +190 -0
  132. lxml/isoschematron/__init__.py +348 -0
  133. lxml/isoschematron/resources/rng/iso-schematron.rng +709 -0
  134. lxml/isoschematron/resources/xsl/RNG2Schtrn.xsl +75 -0
  135. lxml/isoschematron/resources/xsl/XSD2Schtrn.xsl +77 -0
  136. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_abstract_expand.xsl +313 -0
  137. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_dsdl_include.xsl +1160 -0
  138. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_message.xsl +55 -0
  139. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_skeleton_for_xslt1.xsl +1796 -0
  140. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_svrl_for_xslt1.xsl +588 -0
  141. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt +84 -0
  142. lxml/iterparse.pxi +438 -0
  143. lxml/lxml.etree.h +244 -0
  144. lxml/lxml.etree_api.h +204 -0
  145. lxml/nsclasses.pxi +281 -0
  146. lxml/objectify.cpython-310-arm-linux-gnueabihf.so +0 -0
  147. lxml/objectify.pyx +2149 -0
  148. lxml/objectpath.pxi +332 -0
  149. lxml/parser.pxi +2059 -0
  150. lxml/parsertarget.pxi +180 -0
  151. lxml/proxy.pxi +619 -0
  152. lxml/public-api.pxi +178 -0
  153. lxml/pyclasslookup.py +3 -0
  154. lxml/readonlytree.pxi +565 -0
  155. lxml/relaxng.pxi +165 -0
  156. lxml/sax.cpython-310-arm-linux-gnueabihf.so +0 -0
  157. lxml/sax.py +286 -0
  158. lxml/saxparser.pxi +875 -0
  159. lxml/schematron.pxi +173 -0
  160. lxml/serializer.pxi +1849 -0
  161. lxml/usedoctest.py +13 -0
  162. lxml/xinclude.pxi +67 -0
  163. lxml/xmlerror.pxi +1654 -0
  164. lxml/xmlid.pxi +179 -0
  165. lxml/xmlschema.pxi +215 -0
  166. lxml/xpath.pxi +487 -0
  167. lxml/xslt.pxi +957 -0
  168. lxml/xsltext.pxi +242 -0
  169. lxml-6.0.0.dist-info/METADATA +163 -0
  170. lxml-6.0.0.dist-info/RECORD +174 -0
  171. lxml-6.0.0.dist-info/WHEEL +5 -0
  172. lxml-6.0.0.dist-info/licenses/LICENSE.txt +31 -0
  173. lxml-6.0.0.dist-info/licenses/LICENSES.txt +29 -0
  174. lxml-6.0.0.dist-info/top_level.txt +1 -0
lxml/serializer.pxi ADDED
@@ -0,0 +1,1849 @@
1
+ # XML serialization and output functions
2
+
3
+ cdef object GzipFile
4
+ from gzip import GzipFile
5
+
6
+
7
+ cdef class SerialisationError(LxmlError):
8
+ """A libxml2 error that occurred during serialisation.
9
+ """
10
+
11
+
12
+ cdef enum _OutputMethods:
13
+ OUTPUT_METHOD_XML
14
+ OUTPUT_METHOD_HTML
15
+ OUTPUT_METHOD_TEXT
16
+
17
+
18
+ cdef int _findOutputMethod(method) except -1:
19
+ if method is None:
20
+ return OUTPUT_METHOD_XML
21
+ method = method.lower()
22
+ if method == "xml":
23
+ return OUTPUT_METHOD_XML
24
+ if method == "html":
25
+ return OUTPUT_METHOD_HTML
26
+ if method == "text":
27
+ return OUTPUT_METHOD_TEXT
28
+ raise ValueError(f"unknown output method {method!r}")
29
+
30
+
31
+ cdef _textToString(xmlNode* c_node, encoding, bint with_tail):
32
+ cdef bint needs_conversion
33
+ cdef const_xmlChar* c_text
34
+ cdef xmlNode* c_text_node
35
+ cdef tree.xmlBuffer* c_buffer
36
+ cdef int error_result
37
+
38
+ c_buffer = tree.xmlBufferCreate()
39
+ if c_buffer is NULL:
40
+ raise MemoryError()
41
+
42
+ with nogil:
43
+ error_result = tree.xmlNodeBufGetContent(c_buffer, c_node)
44
+ if with_tail:
45
+ c_text_node = _textNodeOrSkip(c_node.next)
46
+ while c_text_node is not NULL:
47
+ tree.xmlBufferWriteChar(c_buffer, <const_char*>c_text_node.content)
48
+ c_text_node = _textNodeOrSkip(c_text_node.next)
49
+ c_text = tree.xmlBufferContent(c_buffer)
50
+
51
+ if error_result < 0 or c_text is NULL:
52
+ tree.xmlBufferFree(c_buffer)
53
+ raise SerialisationError, "Error during serialisation (out of memory?)"
54
+
55
+ try:
56
+ needs_conversion = 0
57
+ if encoding is unicode:
58
+ needs_conversion = 1
59
+ elif encoding is not None:
60
+ # Python prefers lower case encoding names
61
+ encoding = encoding.lower()
62
+ if encoding not in ('utf8', 'utf-8'):
63
+ if encoding == 'ascii':
64
+ if isutf8l(c_text, tree.xmlBufferLength(c_buffer)):
65
+ # will raise a decode error below
66
+ needs_conversion = 1
67
+ else:
68
+ needs_conversion = 1
69
+
70
+ if needs_conversion:
71
+ text = (<const_char*>c_text)[:tree.xmlBufferLength(c_buffer)].decode('utf8')
72
+ if encoding is not unicode:
73
+ encoding = _utf8(encoding)
74
+ text = python.PyUnicode_AsEncodedString(
75
+ text, encoding, 'strict')
76
+ else:
77
+ text = (<unsigned char*>c_text)[:tree.xmlBufferLength(c_buffer)]
78
+ finally:
79
+ tree.xmlBufferFree(c_buffer)
80
+ return text
81
+
82
+
83
+ cdef _tostring(_Element element, encoding, doctype, method,
84
+ bint write_xml_declaration, bint write_complete_document,
85
+ bint pretty_print, bint with_tail, int standalone):
86
+ """Serialize an element to an encoded string representation of its XML
87
+ tree.
88
+ """
89
+ cdef tree.xmlOutputBuffer* c_buffer
90
+ cdef tree.xmlBuf* c_result_buffer
91
+ cdef tree.xmlCharEncodingHandler* enchandler
92
+ cdef const_char* c_enc
93
+ cdef const_xmlChar* c_version
94
+ cdef const_xmlChar* c_doctype
95
+ cdef int c_method
96
+ cdef int error_result
97
+ if element is None:
98
+ return None
99
+ _assertValidNode(element)
100
+ c_method = _findOutputMethod(method)
101
+ if c_method == OUTPUT_METHOD_TEXT:
102
+ return _textToString(element._c_node, encoding, with_tail)
103
+ if encoding is None or encoding is unicode:
104
+ c_enc = NULL
105
+ else:
106
+ encoding = _utf8(encoding)
107
+ c_enc = _cstr(encoding)
108
+ if doctype is None:
109
+ c_doctype = NULL
110
+ else:
111
+ doctype = _utf8(doctype)
112
+ c_doctype = _xcstr(doctype)
113
+ # it is necessary to *and* find the encoding handler *and* use
114
+ # encoding during output
115
+ enchandler = tree.xmlFindCharEncodingHandler(c_enc)
116
+ if enchandler is NULL and c_enc is not NULL:
117
+ if encoding is not None:
118
+ encoding = encoding.decode('UTF-8')
119
+ raise LookupError, f"unknown encoding: '{encoding}'"
120
+ c_buffer = tree.xmlAllocOutputBuffer(enchandler)
121
+ if c_buffer is NULL:
122
+ tree.xmlCharEncCloseFunc(enchandler)
123
+ raise MemoryError()
124
+
125
+ with nogil:
126
+ _writeNodeToBuffer(c_buffer, element._c_node, c_enc, c_doctype, c_method,
127
+ write_xml_declaration, write_complete_document,
128
+ pretty_print, with_tail, standalone)
129
+ tree.xmlOutputBufferFlush(c_buffer)
130
+ if c_buffer.conv is not NULL:
131
+ c_result_buffer = c_buffer.conv
132
+ else:
133
+ c_result_buffer = c_buffer.buffer
134
+
135
+ error_result = c_buffer.error
136
+ if error_result != xmlerror.XML_ERR_OK:
137
+ tree.xmlOutputBufferClose(c_buffer)
138
+ _raiseSerialisationError(error_result)
139
+
140
+ try:
141
+ if encoding is unicode:
142
+ result = (<unsigned char*>tree.xmlBufContent(
143
+ c_result_buffer))[:tree.xmlBufUse(c_result_buffer)].decode('UTF-8')
144
+ else:
145
+ result = <bytes>(<unsigned char*>tree.xmlBufContent(
146
+ c_result_buffer))[:tree.xmlBufUse(c_result_buffer)]
147
+ finally:
148
+ error_result = tree.xmlOutputBufferClose(c_buffer)
149
+ if error_result == -1:
150
+ _raiseSerialisationError(error_result)
151
+ return result
152
+
153
+ cdef bytes _tostringC14N(element_or_tree, bint exclusive, bint with_comments, inclusive_ns_prefixes):
154
+ cdef xmlDoc* c_doc
155
+ cdef xmlChar* c_buffer = NULL
156
+ cdef int byte_count = -1
157
+ cdef bytes result
158
+ cdef _Document doc
159
+ cdef _Element element
160
+ cdef xmlChar **c_inclusive_ns_prefixes
161
+
162
+ if isinstance(element_or_tree, _Element):
163
+ _assertValidNode(<_Element>element_or_tree)
164
+ doc = (<_Element>element_or_tree)._doc
165
+ c_doc = _plainFakeRootDoc(doc._c_doc, (<_Element>element_or_tree)._c_node, 0)
166
+ else:
167
+ doc = _documentOrRaise(element_or_tree)
168
+ _assertValidDoc(doc)
169
+ c_doc = doc._c_doc
170
+
171
+ c_inclusive_ns_prefixes = _convert_ns_prefixes(c_doc.dict, inclusive_ns_prefixes) if inclusive_ns_prefixes else NULL
172
+ try:
173
+ with nogil:
174
+ byte_count = c14n.xmlC14NDocDumpMemory(
175
+ c_doc, NULL, exclusive, c_inclusive_ns_prefixes, with_comments, &c_buffer)
176
+
177
+ finally:
178
+ _destroyFakeDoc(doc._c_doc, c_doc)
179
+ if c_inclusive_ns_prefixes is not NULL:
180
+ python.lxml_free(c_inclusive_ns_prefixes)
181
+
182
+ if byte_count < 0 or c_buffer is NULL:
183
+ if c_buffer is not NULL:
184
+ tree.xmlFree(c_buffer)
185
+ raise C14NError, "C14N failed"
186
+ try:
187
+ result = c_buffer[:byte_count]
188
+ finally:
189
+ tree.xmlFree(c_buffer)
190
+ return result
191
+
192
+ cdef _raiseSerialisationError(int error_result):
193
+ if error_result == xmlerror.XML_ERR_NO_MEMORY:
194
+ raise MemoryError()
195
+ message = ErrorTypes._getName(error_result)
196
+ if message is None:
197
+ message = f"unknown error {error_result}"
198
+ raise SerialisationError, message
199
+
200
+ ############################################################
201
+ # low-level serialisation functions
202
+
203
+ cdef void _writeDoctype(tree.xmlOutputBuffer* c_buffer,
204
+ const_xmlChar* c_doctype) noexcept nogil:
205
+ tree.xmlOutputBufferWrite(c_buffer, tree.xmlStrlen(c_doctype),
206
+ <const_char*>c_doctype)
207
+ tree.xmlOutputBufferWriteString(c_buffer, "\n")
208
+
209
+ cdef void _writeNodeToBuffer(tree.xmlOutputBuffer* c_buffer,
210
+ xmlNode* c_node, const_char* encoding, const_xmlChar* c_doctype,
211
+ int c_method, bint write_xml_declaration,
212
+ bint write_complete_document,
213
+ bint pretty_print, bint with_tail,
214
+ int standalone) noexcept nogil:
215
+ cdef xmlNode* c_nsdecl_node
216
+ cdef xmlDoc* c_doc = c_node.doc
217
+ if write_xml_declaration and c_method == OUTPUT_METHOD_XML:
218
+ _writeDeclarationToBuffer(c_buffer, c_doc.version, encoding, standalone)
219
+
220
+ # comments/processing instructions before doctype declaration
221
+ if write_complete_document and not c_buffer.error and c_doc.intSubset:
222
+ _writePrevSiblings(c_buffer, <xmlNode*>c_doc.intSubset, encoding, pretty_print)
223
+
224
+ if c_doctype:
225
+ _writeDoctype(c_buffer, c_doctype)
226
+ # write internal DTD subset, preceding PIs/comments, etc.
227
+ if write_complete_document and not c_buffer.error:
228
+ if c_doctype is NULL:
229
+ _writeDtdToBuffer(c_buffer, c_doc, c_node.name, c_method, encoding)
230
+ _writePrevSiblings(c_buffer, c_node, encoding, pretty_print)
231
+
232
+ c_nsdecl_node = c_node
233
+ if not c_node.parent or c_node.parent.type != tree.XML_DOCUMENT_NODE:
234
+ # copy the node and add namespaces from parents
235
+ # this is required to make libxml write them
236
+ c_nsdecl_node = tree.xmlCopyNode(c_node, 2)
237
+ if not c_nsdecl_node:
238
+ c_buffer.error = xmlerror.XML_ERR_NO_MEMORY
239
+ return
240
+ _copyParentNamespaces(c_node, c_nsdecl_node)
241
+
242
+ c_nsdecl_node.parent = c_node.parent
243
+ c_nsdecl_node.children = c_node.children
244
+ c_nsdecl_node.last = c_node.last
245
+
246
+ # write node
247
+ if c_method == OUTPUT_METHOD_HTML:
248
+ tree.htmlNodeDumpFormatOutput(
249
+ c_buffer, c_doc, c_nsdecl_node, encoding, pretty_print)
250
+ else:
251
+ tree.xmlNodeDumpOutput(
252
+ c_buffer, c_doc, c_nsdecl_node, 0, pretty_print, encoding)
253
+
254
+ if c_nsdecl_node is not c_node:
255
+ # clean up
256
+ c_nsdecl_node.children = c_nsdecl_node.last = NULL
257
+ tree.xmlFreeNode(c_nsdecl_node)
258
+
259
+ if c_buffer.error:
260
+ return
261
+
262
+ # write tail, trailing comments, etc.
263
+ if with_tail:
264
+ _writeTail(c_buffer, c_node, encoding, c_method, pretty_print)
265
+ if write_complete_document:
266
+ _writeNextSiblings(c_buffer, c_node, encoding, pretty_print)
267
+ if pretty_print:
268
+ tree.xmlOutputBufferWrite(c_buffer, 1, "\n")
269
+
270
+ cdef void _writeDeclarationToBuffer(tree.xmlOutputBuffer* c_buffer,
271
+ const_xmlChar* version, const_char* encoding,
272
+ int standalone) noexcept nogil:
273
+ if version is NULL:
274
+ version = <unsigned char*>"1.0"
275
+ tree.xmlOutputBufferWrite(c_buffer, 15, "<?xml version='")
276
+ tree.xmlOutputBufferWriteString(c_buffer, <const_char*>version)
277
+ tree.xmlOutputBufferWrite(c_buffer, 12, "' encoding='")
278
+ tree.xmlOutputBufferWriteString(c_buffer, encoding)
279
+ if standalone == 0:
280
+ tree.xmlOutputBufferWrite(c_buffer, 20, "' standalone='no'?>\n")
281
+ elif standalone == 1:
282
+ tree.xmlOutputBufferWrite(c_buffer, 21, "' standalone='yes'?>\n")
283
+ else:
284
+ tree.xmlOutputBufferWrite(c_buffer, 4, "'?>\n")
285
+
286
+ cdef void _writeDtdToBuffer(tree.xmlOutputBuffer* c_buffer,
287
+ xmlDoc* c_doc, const_xmlChar* c_root_name,
288
+ int c_method, const_char* encoding) noexcept nogil:
289
+ cdef tree.xmlDtd* c_dtd
290
+ cdef xmlNode* c_node
291
+ cdef char* quotechar
292
+ c_dtd = c_doc.intSubset
293
+ if not c_dtd or not c_dtd.name:
294
+ return
295
+
296
+ # Name in document type declaration must match the root element tag.
297
+ # For XML, case sensitive match, for HTML insensitive.
298
+ if c_method == OUTPUT_METHOD_HTML:
299
+ if tree.xmlStrcasecmp(c_root_name, c_dtd.name) != 0:
300
+ return
301
+ else:
302
+ if tree.xmlStrcmp(c_root_name, c_dtd.name) != 0:
303
+ return
304
+
305
+ tree.xmlOutputBufferWrite(c_buffer, 10, "<!DOCTYPE ")
306
+ tree.xmlOutputBufferWriteString(c_buffer, <const_char*>c_dtd.name)
307
+
308
+ cdef const_xmlChar* public_id = c_dtd.ExternalID
309
+ cdef const_xmlChar* sys_url = c_dtd.SystemID
310
+ if public_id and public_id[0] == b'\0':
311
+ public_id = NULL
312
+ if sys_url and sys_url[0] == b'\0':
313
+ sys_url = NULL
314
+
315
+ if public_id:
316
+ tree.xmlOutputBufferWrite(c_buffer, 9, ' PUBLIC "')
317
+ tree.xmlOutputBufferWriteString(c_buffer, <const_char*>public_id)
318
+ if sys_url:
319
+ tree.xmlOutputBufferWrite(c_buffer, 2, '" ')
320
+ else:
321
+ tree.xmlOutputBufferWrite(c_buffer, 1, '"')
322
+ elif sys_url:
323
+ tree.xmlOutputBufferWrite(c_buffer, 8, ' SYSTEM ')
324
+
325
+ if sys_url:
326
+ if tree.xmlStrchr(sys_url, b'"'):
327
+ quotechar = '\''
328
+ else:
329
+ quotechar = '"'
330
+ tree.xmlOutputBufferWrite(c_buffer, 1, quotechar)
331
+ tree.xmlOutputBufferWriteString(c_buffer, <const_char*>sys_url)
332
+ tree.xmlOutputBufferWrite(c_buffer, 1, quotechar)
333
+
334
+ if (not c_dtd.entities and not c_dtd.elements and
335
+ not c_dtd.attributes and not c_dtd.notations and
336
+ not c_dtd.pentities):
337
+ tree.xmlOutputBufferWrite(c_buffer, 2, '>\n')
338
+ return
339
+
340
+ tree.xmlOutputBufferWrite(c_buffer, 3, ' [\n')
341
+ if c_dtd.notations and not c_buffer.error:
342
+ c_buf = tree.xmlBufferCreate()
343
+ if not c_buf:
344
+ c_buffer.error = xmlerror.XML_ERR_NO_MEMORY
345
+ return
346
+ tree.xmlDumpNotationTable(c_buf, <tree.xmlNotationTable*>c_dtd.notations)
347
+ tree.xmlOutputBufferWrite(
348
+ c_buffer, tree.xmlBufferLength(c_buf),
349
+ <const_char*>tree.xmlBufferContent(c_buf))
350
+ tree.xmlBufferFree(c_buf)
351
+ c_node = c_dtd.children
352
+ while c_node and not c_buffer.error:
353
+ tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_node, 0, 0, encoding)
354
+ c_node = c_node.next
355
+ tree.xmlOutputBufferWrite(c_buffer, 3, "]>\n")
356
+
357
+ cdef void _writeTail(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node,
358
+ const_char* encoding, int c_method, bint pretty_print) noexcept nogil:
359
+ "Write the element tail."
360
+ c_node = c_node.next
361
+ while c_node and not c_buffer.error and c_node.type in (
362
+ tree.XML_TEXT_NODE, tree.XML_CDATA_SECTION_NODE):
363
+ if c_method == OUTPUT_METHOD_HTML:
364
+ tree.htmlNodeDumpFormatOutput(
365
+ c_buffer, c_node.doc, c_node, encoding, pretty_print)
366
+ else:
367
+ tree.xmlNodeDumpOutput(
368
+ c_buffer, c_node.doc, c_node, 0, pretty_print, encoding)
369
+ c_node = c_node.next
370
+
371
+ cdef void _writePrevSiblings(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node,
372
+ const_char* encoding, bint pretty_print) noexcept nogil:
373
+ cdef xmlNode* c_sibling
374
+ if c_node.parent and _isElement(c_node.parent):
375
+ return
376
+ # we are at a root node, so add PI and comment siblings
377
+ c_sibling = c_node
378
+ while c_sibling.prev and \
379
+ (c_sibling.prev.type == tree.XML_PI_NODE or
380
+ c_sibling.prev.type == tree.XML_COMMENT_NODE):
381
+ c_sibling = c_sibling.prev
382
+ while c_sibling is not c_node and not c_buffer.error:
383
+ tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_sibling, 0,
384
+ pretty_print, encoding)
385
+ if pretty_print:
386
+ tree.xmlOutputBufferWriteString(c_buffer, "\n")
387
+ c_sibling = c_sibling.next
388
+
389
+ cdef void _writeNextSiblings(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node,
390
+ const_char* encoding, bint pretty_print) noexcept nogil:
391
+ cdef xmlNode* c_sibling
392
+ if c_node.parent and _isElement(c_node.parent):
393
+ return
394
+ # we are at a root node, so add PI and comment siblings
395
+ c_sibling = c_node.next
396
+ while not c_buffer.error and c_sibling and \
397
+ (c_sibling.type == tree.XML_PI_NODE or
398
+ c_sibling.type == tree.XML_COMMENT_NODE):
399
+ if pretty_print:
400
+ tree.xmlOutputBufferWriteString(c_buffer, "\n")
401
+ tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_sibling, 0,
402
+ pretty_print, encoding)
403
+ c_sibling = c_sibling.next
404
+
405
+
406
+ # copied and adapted from libxml2 (xmlBufAttrSerializeTxtContent())
407
+ cdef _write_attr_string(tree.xmlOutputBuffer* buf, const char *string):
408
+ cdef const char *base
409
+ cdef const char *cur
410
+
411
+ if string == NULL:
412
+ return
413
+
414
+ base = cur = <const char*>string
415
+ while cur[0] != 0:
416
+ if cur[0] == b'\n':
417
+ if base != cur:
418
+ tree.xmlOutputBufferWrite(buf, cur - base, base)
419
+
420
+ tree.xmlOutputBufferWrite(buf, 5, "&#10;")
421
+ cur += 1
422
+ base = cur
423
+
424
+ elif cur[0] == b'\r':
425
+ if base != cur:
426
+ tree.xmlOutputBufferWrite(buf, cur - base, base)
427
+
428
+ tree.xmlOutputBufferWrite(buf, 5, "&#13;")
429
+ cur += 1
430
+ base = cur
431
+
432
+ elif cur[0] == b'\t':
433
+ if base != cur:
434
+ tree.xmlOutputBufferWrite(buf, cur - base, base)
435
+
436
+ tree.xmlOutputBufferWrite(buf, 4, "&#9;")
437
+ cur += 1
438
+ base = cur
439
+
440
+ elif cur[0] == b'"':
441
+ if base != cur:
442
+ tree.xmlOutputBufferWrite(buf, cur - base, base)
443
+
444
+ tree.xmlOutputBufferWrite(buf, 6, "&quot;")
445
+ cur += 1
446
+ base = cur
447
+
448
+ elif cur[0] == b'<':
449
+ if base != cur:
450
+ tree.xmlOutputBufferWrite(buf, cur - base, base)
451
+
452
+ tree.xmlOutputBufferWrite(buf, 4, "&lt;")
453
+ cur += 1
454
+ base = cur
455
+
456
+ elif cur[0] == b'>':
457
+ if base != cur:
458
+ tree.xmlOutputBufferWrite(buf, cur - base, base)
459
+
460
+ tree.xmlOutputBufferWrite(buf, 4, "&gt;")
461
+ cur += 1
462
+ base = cur
463
+ elif cur[0] == b'&':
464
+ if base != cur:
465
+ tree.xmlOutputBufferWrite(buf, cur - base, base)
466
+
467
+ tree.xmlOutputBufferWrite(buf, 5, "&amp;")
468
+ cur += 1
469
+ base = cur
470
+
471
+ else:
472
+ # Leave further encoding and escaping to the buffer encoder.
473
+ cur += 1
474
+
475
+ if base != cur:
476
+ tree.xmlOutputBufferWrite(buf, cur - base, base)
477
+
478
+
479
+ cdef void _write_cdata_section(tree.xmlOutputBuffer* buf, const char* c_data, const char* c_end):
480
+ tree.xmlOutputBufferWrite(buf, 9, "<![CDATA[")
481
+ while c_end - c_data > limits.INT_MAX:
482
+ tree.xmlOutputBufferWrite(buf, limits.INT_MAX, c_data)
483
+ c_data += limits.INT_MAX
484
+ tree.xmlOutputBufferWrite(buf, c_end - c_data, c_data)
485
+ tree.xmlOutputBufferWrite(buf, 3, "]]>")
486
+
487
+
488
+ cdef _write_cdata_string(tree.xmlOutputBuffer* buf, bytes bstring):
489
+ cdef const char* c_data = bstring
490
+ cdef const char* c_end = c_data + len(bstring)
491
+ cdef const char* c_pos = c_data
492
+ cdef bint nothing_written = True
493
+
494
+ while True:
495
+ c_pos = <const char*> cstring_h.memchr(c_pos, b']', c_end - c_pos)
496
+ if not c_pos:
497
+ break
498
+ c_pos += 1
499
+ next_char = c_pos[0]
500
+ c_pos += 1
501
+ if next_char != b']':
502
+ continue
503
+ # Found ']]', c_pos points to next character.
504
+ while c_pos[0] == b']':
505
+ c_pos += 1
506
+ if c_pos[0] != b'>':
507
+ if c_pos == c_end:
508
+ break
509
+ # c_pos[0] is neither ']' nor '>', continue with next character.
510
+ c_pos += 1
511
+ continue
512
+
513
+ # Write section up to ']]' and start next block at trailing '>'.
514
+ _write_cdata_section(buf, c_data, c_pos)
515
+ nothing_written = False
516
+ c_data = c_pos
517
+ c_pos += 1
518
+
519
+ if nothing_written or c_data < c_end:
520
+ _write_cdata_section(buf, c_data, c_end)
521
+
522
+
523
+ ############################################################
524
+ # output to file-like objects
525
+
526
+ cdef object io_open
527
+ from io import open as io_open
528
+
529
+ cdef object gzip
530
+ import gzip
531
+
532
+ cdef object getwriter
533
+ from codecs import getwriter
534
+ cdef object utf8_writer = getwriter('utf8')
535
+
536
+ cdef object contextmanager
537
+ from contextlib import contextmanager
538
+
539
+ cdef object _open_utf8_file
540
+
541
+ @contextmanager
542
+ def _open_utf8_file(file, compression=0):
543
+ file = _getFSPathOrObject(file)
544
+ if _isString(file):
545
+ if compression:
546
+ with gzip.GzipFile(file, mode='wb', compresslevel=compression) as zf:
547
+ yield utf8_writer(zf)
548
+ else:
549
+ with io_open(file, 'w', encoding='utf8') as f:
550
+ yield f
551
+ else:
552
+ if compression:
553
+ with gzip.GzipFile(fileobj=file, mode='wb', compresslevel=compression) as zf:
554
+ yield utf8_writer(zf)
555
+ else:
556
+ yield utf8_writer(file)
557
+
558
+
559
+ @cython.final
560
+ @cython.internal
561
+ cdef class _FilelikeWriter:
562
+ cdef object _filelike
563
+ cdef object _close_filelike
564
+ cdef _ExceptionContext _exc_context
565
+ cdef _ErrorLog error_log
566
+
567
+ def __cinit__(self, filelike, exc_context=None, compression=None, close=False):
568
+ if compression is not None and compression > 0:
569
+ filelike = GzipFile(
570
+ fileobj=filelike, mode='wb', compresslevel=compression)
571
+ self._close_filelike = filelike.close
572
+ elif close:
573
+ self._close_filelike = filelike.close
574
+ self._filelike = filelike
575
+ if exc_context is None:
576
+ self._exc_context = _ExceptionContext()
577
+ else:
578
+ self._exc_context = exc_context
579
+ self.error_log = _ErrorLog()
580
+
581
+ cdef tree.xmlOutputBuffer* _createOutputBuffer(
582
+ self, tree.xmlCharEncodingHandler* enchandler) except NULL:
583
+ cdef tree.xmlOutputBuffer* c_buffer
584
+ c_buffer = tree.xmlOutputBufferCreateIO(
585
+ <tree.xmlOutputWriteCallback>_writeFilelikeWriter, _closeFilelikeWriter,
586
+ <python.PyObject*>self, enchandler)
587
+ if c_buffer is NULL:
588
+ raise IOError, "Could not create I/O writer context."
589
+ return c_buffer
590
+
591
+ cdef int write(self, char* c_buffer, int size) noexcept:
592
+ try:
593
+ if self._filelike is None:
594
+ raise IOError, "File is already closed"
595
+ py_buffer = <bytes>c_buffer[:size]
596
+ self._filelike.write(py_buffer)
597
+ except:
598
+ size = -1
599
+ self._exc_context._store_raised()
600
+ finally:
601
+ return size # and swallow any further exceptions
602
+
603
+ cdef int close(self) noexcept:
604
+ retval = 0
605
+ try:
606
+ if self._close_filelike is not None:
607
+ self._close_filelike()
608
+ # we should not close the file here as we didn't open it
609
+ self._filelike = None
610
+ except:
611
+ retval = -1
612
+ self._exc_context._store_raised()
613
+ finally:
614
+ return retval # and swallow any further exceptions
615
+
616
+ cdef int _writeFilelikeWriter(void* ctxt, char* c_buffer, int length) noexcept:
617
+ return (<_FilelikeWriter>ctxt).write(c_buffer, length)
618
+
619
+ cdef int _closeFilelikeWriter(void* ctxt) noexcept:
620
+ return (<_FilelikeWriter>ctxt).close()
621
+
622
+ cdef _tofilelike(f, _Element element, encoding, doctype, method,
623
+ bint write_xml_declaration, bint write_doctype,
624
+ bint pretty_print, bint with_tail, int standalone,
625
+ int compression):
626
+ cdef _FilelikeWriter writer = None
627
+ cdef tree.xmlOutputBuffer* c_buffer
628
+ cdef tree.xmlCharEncodingHandler* enchandler
629
+ cdef const_char* c_enc
630
+ cdef const_xmlChar* c_doctype
631
+ cdef int error_result
632
+
633
+ c_method = _findOutputMethod(method)
634
+ if c_method == OUTPUT_METHOD_TEXT:
635
+ data = _textToString(element._c_node, encoding, with_tail)
636
+ if compression:
637
+ bytes_out = BytesIO()
638
+ with GzipFile(fileobj=bytes_out, mode='wb', compresslevel=compression) as gzip_file:
639
+ gzip_file.write(data)
640
+ data = bytes_out.getvalue()
641
+ f = _getFSPathOrObject(f)
642
+ if _isString(f):
643
+ filename8 = _encodeFilename(f)
644
+ with open(filename8, 'wb') as f:
645
+ f.write(data)
646
+ else:
647
+ f.write(data)
648
+ return
649
+
650
+ if encoding is None:
651
+ c_enc = NULL
652
+ else:
653
+ encoding = _utf8(encoding)
654
+ c_enc = _cstr(encoding)
655
+ if doctype is None:
656
+ c_doctype = NULL
657
+ else:
658
+ doctype = _utf8(doctype)
659
+ c_doctype = _xcstr(doctype)
660
+
661
+ writer = _create_output_buffer(f, c_enc, compression, &c_buffer, close=False)
662
+ if writer is None:
663
+ with nogil:
664
+ error_result = _serialise_node(
665
+ c_buffer, c_doctype, c_enc, element._c_node, c_method,
666
+ write_xml_declaration, write_doctype, pretty_print, with_tail, standalone)
667
+ else:
668
+ error_result = _serialise_node(
669
+ c_buffer, c_doctype, c_enc, element._c_node, c_method,
670
+ write_xml_declaration, write_doctype, pretty_print, with_tail, standalone)
671
+
672
+ if writer is not None:
673
+ writer._exc_context._raise_if_stored()
674
+ if error_result != xmlerror.XML_ERR_OK:
675
+ _raiseSerialisationError(error_result)
676
+
677
+
678
+ cdef int _serialise_node(tree.xmlOutputBuffer* c_buffer, const_xmlChar* c_doctype,
679
+ const_char* c_enc, xmlNode* c_node, int c_method,
680
+ bint write_xml_declaration, bint write_doctype, bint pretty_print,
681
+ bint with_tail, int standalone) noexcept nogil:
682
+ _writeNodeToBuffer(
683
+ c_buffer, c_node, c_enc, c_doctype, c_method,
684
+ write_xml_declaration, write_doctype, pretty_print, with_tail, standalone)
685
+ error_result = c_buffer.error
686
+ if error_result == xmlerror.XML_ERR_OK:
687
+ error_result = tree.xmlOutputBufferClose(c_buffer)
688
+ if error_result != -1:
689
+ error_result = xmlerror.XML_ERR_OK
690
+ else:
691
+ tree.xmlOutputBufferClose(c_buffer)
692
+ return error_result
693
+
694
+
695
+ cdef _FilelikeWriter _create_output_buffer(
696
+ f, const_char* c_enc, int c_compression,
697
+ tree.xmlOutputBuffer** c_buffer_ret, bint close):
698
+ cdef tree.xmlOutputBuffer* c_buffer
699
+ cdef _FilelikeWriter writer
700
+ cdef bytes filename8
701
+ enchandler = tree.xmlFindCharEncodingHandler(c_enc)
702
+ if enchandler is NULL:
703
+ raise LookupError(
704
+ f"unknown encoding: '{c_enc.decode('UTF-8') if c_enc is not NULL else u''}'")
705
+ try:
706
+ f = _getFSPathOrObject(f)
707
+
708
+ if c_compression and not HAS_ZLIB_COMPRESSION and _isString(f):
709
+ # Let "_FilelikeWriter" fall back to Python's GzipFile.
710
+ f = open(f, mode="wb")
711
+ close = True
712
+
713
+ if _isString(f):
714
+ filename8 = _encodeFilename(f)
715
+ if b'%' in filename8 and (
716
+ # Exclude absolute Windows paths and file:// URLs.
717
+ _isFilePath(<const xmlChar*>filename8) not in (NO_FILE_PATH, ABS_WIN_FILE_PATH)
718
+ or filename8[:7].lower() == b'file://'):
719
+ # A file path (not a URL) containing the '%' URL escape character.
720
+ # libxml2 uses URL-unescaping on these, so escape the path before passing it in.
721
+ filename8 = filename8.replace(b'%', b'%25')
722
+ c_buffer = tree.xmlOutputBufferCreateFilename(
723
+ _cstr(filename8), enchandler, c_compression)
724
+ if c_buffer is NULL:
725
+ python.PyErr_SetFromErrno(IOError) # raises IOError
726
+ writer = None
727
+ elif hasattr(f, 'write'):
728
+ writer = _FilelikeWriter(f, compression=c_compression, close=close)
729
+ c_buffer = writer._createOutputBuffer(enchandler)
730
+ else:
731
+ raise TypeError(
732
+ f"File or filename expected, got '{python._fqtypename(f).decode('UTF-8')}'")
733
+ except:
734
+ tree.xmlCharEncCloseFunc(enchandler)
735
+ raise
736
+ c_buffer_ret[0] = c_buffer
737
+ return writer
738
+
739
+ cdef xmlChar **_convert_ns_prefixes(tree.xmlDict* c_dict, ns_prefixes) except NULL:
740
+ cdef size_t i, num_ns_prefixes = len(ns_prefixes)
741
+ # Need to allocate one extra memory block to handle last NULL entry
742
+ c_ns_prefixes = <xmlChar **>python.lxml_malloc(num_ns_prefixes + 1, sizeof(xmlChar*))
743
+ if not c_ns_prefixes:
744
+ raise MemoryError()
745
+ i = 0
746
+ try:
747
+ for prefix in ns_prefixes:
748
+ prefix_utf = _utf8(prefix)
749
+ c_prefix_len = len(prefix_utf)
750
+ if c_prefix_len > limits.INT_MAX:
751
+ raise ValueError("Prefix too long")
752
+ c_prefix = tree.xmlDictExists(c_dict, _xcstr(prefix_utf), <int> c_prefix_len)
753
+ if c_prefix:
754
+ # unknown prefixes do not need to get serialised
755
+ c_ns_prefixes[i] = <xmlChar*>c_prefix
756
+ i += 1
757
+ except:
758
+ python.lxml_free(c_ns_prefixes)
759
+ raise
760
+
761
+ c_ns_prefixes[i] = NULL # append end marker
762
+ return c_ns_prefixes
763
+
764
+ cdef _tofilelikeC14N(f, _Element element, bint exclusive, bint with_comments,
765
+ int compression, inclusive_ns_prefixes):
766
+ cdef _FilelikeWriter writer = None
767
+ cdef tree.xmlOutputBuffer* c_buffer
768
+ cdef xmlChar **c_inclusive_ns_prefixes = NULL
769
+ cdef char* c_filename
770
+ cdef xmlDoc* c_base_doc
771
+ cdef xmlDoc* c_doc
772
+ cdef int bytes_count, error = 0
773
+
774
+ c_base_doc = element._c_node.doc
775
+ c_doc = _fakeRootDoc(c_base_doc, element._c_node)
776
+ try:
777
+ c_inclusive_ns_prefixes = (
778
+ _convert_ns_prefixes(c_doc.dict, inclusive_ns_prefixes)
779
+ if inclusive_ns_prefixes else NULL)
780
+
781
+ f = _getFSPathOrObject(f)
782
+
783
+ close = False
784
+ if compression and not HAS_ZLIB_COMPRESSION and _isString(f):
785
+ # Let "_FilelikeWriter" fall back to Python's GzipFile.
786
+ f = open(f, mode="wb")
787
+ close = True
788
+
789
+ if _isString(f):
790
+ filename8 = _encodeFilename(f)
791
+ c_filename = _cstr(filename8)
792
+ with nogil:
793
+ error = c14n.xmlC14NDocSave(
794
+ c_doc, NULL, exclusive, c_inclusive_ns_prefixes,
795
+ with_comments, c_filename, compression)
796
+ elif hasattr(f, 'write'):
797
+ writer = _FilelikeWriter(f, compression=compression, close=close)
798
+ c_buffer = writer._createOutputBuffer(NULL)
799
+ try:
800
+ with writer.error_log:
801
+ bytes_count = c14n.xmlC14NDocSaveTo(
802
+ c_doc, NULL, exclusive, c_inclusive_ns_prefixes,
803
+ with_comments, c_buffer)
804
+ finally:
805
+ error = tree.xmlOutputBufferClose(c_buffer)
806
+ if bytes_count < 0:
807
+ error = bytes_count
808
+ elif error != -1:
809
+ error = xmlerror.XML_ERR_OK
810
+ else:
811
+ raise TypeError(f"File or filename expected, got '{python._fqtypename(f).decode('UTF-8')}'")
812
+ finally:
813
+ _destroyFakeDoc(c_base_doc, c_doc)
814
+ if c_inclusive_ns_prefixes is not NULL:
815
+ python.lxml_free(c_inclusive_ns_prefixes)
816
+
817
+ if writer is not None:
818
+ writer._exc_context._raise_if_stored()
819
+
820
+ if error < 0:
821
+ message = "C14N failed"
822
+ if writer is not None:
823
+ errors = writer.error_log
824
+ if len(errors):
825
+ message = errors[0].message
826
+ raise C14NError(message)
827
+
828
+
829
+ # C14N 2.0
830
+
831
+ def canonicalize(xml_data=None, *, out=None, from_file=None, **options):
832
+ """Convert XML to its C14N 2.0 serialised form.
833
+
834
+ If *out* is provided, it must be a file or file-like object that receives
835
+ the serialised canonical XML output (text, not bytes) through its ``.write()``
836
+ method. To write to a file, open it in text mode with encoding "utf-8".
837
+ If *out* is not provided, this function returns the output as text string.
838
+
839
+ Either *xml_data* (an XML string, tree or Element) or *file*
840
+ (a file path or file-like object) must be provided as input.
841
+
842
+ The configuration options are the same as for the ``C14NWriterTarget``.
843
+ """
844
+ if xml_data is None and from_file is None:
845
+ raise ValueError("Either 'xml_data' or 'from_file' must be provided as input")
846
+
847
+ sio = None
848
+ if out is None:
849
+ sio = out = StringIO()
850
+
851
+ target = C14NWriterTarget(out.write, **options)
852
+
853
+ if xml_data is not None and not isinstance(xml_data, basestring):
854
+ _tree_to_target(xml_data, target)
855
+ return sio.getvalue() if sio is not None else None
856
+
857
+ cdef _FeedParser parser = XMLParser(
858
+ target=target,
859
+ attribute_defaults=True,
860
+ collect_ids=False,
861
+ )
862
+
863
+ if xml_data is not None:
864
+ parser.feed(xml_data)
865
+ parser.close()
866
+ elif from_file is not None:
867
+ try:
868
+ _parseDocument(from_file, parser, base_url=None)
869
+ except _TargetParserResult:
870
+ pass
871
+
872
+ return sio.getvalue() if sio is not None else None
873
+
874
+
875
+ cdef _tree_to_target(element, target):
876
+ for event, elem in iterwalk(element, events=('start', 'end', 'start-ns', 'comment', 'pi')):
877
+ text = None
878
+ if event == 'start':
879
+ target.start(elem.tag, elem.attrib)
880
+ text = elem.text
881
+ elif event == 'end':
882
+ target.end(elem.tag)
883
+ text = elem.tail
884
+ elif event == 'start-ns':
885
+ target.start_ns(*elem)
886
+ continue
887
+ elif event == 'comment':
888
+ target.comment(elem.text)
889
+ text = elem.tail
890
+ elif event == 'pi':
891
+ target.pi(elem.target, elem.text)
892
+ text = elem.tail
893
+ if text:
894
+ target.data(text)
895
+ return target.close()
896
+
897
+
898
+ cdef object _looks_like_prefix_name = re.compile(r'^\w+:\w+$', re.UNICODE).match
899
+
900
+
901
+ cdef class C14NWriterTarget:
902
+ """
903
+ Canonicalization writer target for the XMLParser.
904
+
905
+ Serialises parse events to XML C14N 2.0.
906
+
907
+ Configuration options:
908
+
909
+ - *with_comments*: set to true to include comments
910
+ - *strip_text*: set to true to strip whitespace before and after text content
911
+ - *rewrite_prefixes*: set to true to replace namespace prefixes by "n{number}"
912
+ - *qname_aware_tags*: a set of qname aware tag names in which prefixes
913
+ should be replaced in text content
914
+ - *qname_aware_attrs*: a set of qname aware attribute names in which prefixes
915
+ should be replaced in text content
916
+ - *exclude_attrs*: a set of attribute names that should not be serialised
917
+ - *exclude_tags*: a set of tag names that should not be serialised
918
+ """
919
+ cdef object _write
920
+ cdef list _data
921
+ cdef set _qname_aware_tags
922
+ cdef object _find_qname_aware_attrs
923
+ cdef list _declared_ns_stack
924
+ cdef list _ns_stack
925
+ cdef dict _prefix_map
926
+ cdef list _preserve_space
927
+ cdef tuple _pending_start
928
+ cdef set _exclude_tags
929
+ cdef set _exclude_attrs
930
+ cdef Py_ssize_t _ignored_depth
931
+ cdef bint _with_comments
932
+ cdef bint _strip_text
933
+ cdef bint _rewrite_prefixes
934
+ cdef bint _root_seen
935
+ cdef bint _root_done
936
+
937
+ def __init__(self, write, *,
938
+ with_comments=False, strip_text=False, rewrite_prefixes=False,
939
+ qname_aware_tags=None, qname_aware_attrs=None,
940
+ exclude_attrs=None, exclude_tags=None):
941
+ self._write = write
942
+ self._data = []
943
+ self._with_comments = with_comments
944
+ self._strip_text = strip_text
945
+ self._exclude_attrs = set(exclude_attrs) if exclude_attrs else None
946
+ self._exclude_tags = set(exclude_tags) if exclude_tags else None
947
+
948
+ self._rewrite_prefixes = rewrite_prefixes
949
+ if qname_aware_tags:
950
+ self._qname_aware_tags = set(qname_aware_tags)
951
+ else:
952
+ self._qname_aware_tags = None
953
+ if qname_aware_attrs:
954
+ self._find_qname_aware_attrs = set(qname_aware_attrs).intersection
955
+ else:
956
+ self._find_qname_aware_attrs = None
957
+
958
+ # Stack with globally and newly declared namespaces as (uri, prefix) pairs.
959
+ self._declared_ns_stack = [[
960
+ ("http://www.w3.org/XML/1998/namespace", "xml"),
961
+ ]]
962
+ # Stack with user declared namespace prefixes as (uri, prefix) pairs.
963
+ self._ns_stack = []
964
+ if not rewrite_prefixes:
965
+ self._ns_stack.append(_DEFAULT_NAMESPACE_PREFIXES_ITEMS)
966
+ self._ns_stack.append([])
967
+ self._prefix_map = {}
968
+ self._preserve_space = [False]
969
+ self._pending_start = None
970
+ self._ignored_depth = 0
971
+ self._root_seen = False
972
+ self._root_done = False
973
+
974
+ def _iter_namespaces(self, ns_stack):
975
+ for namespaces in reversed(ns_stack):
976
+ if namespaces: # almost no element declares new namespaces
977
+ yield from namespaces
978
+
979
+ cdef _resolve_prefix_name(self, prefixed_name):
980
+ prefix, name = prefixed_name.split(':', 1)
981
+ for uri, p in self._iter_namespaces(self._ns_stack):
982
+ if p == prefix:
983
+ return f'{{{uri}}}{name}'
984
+ raise ValueError(f'Prefix {prefix} of QName "{prefixed_name}" is not declared in scope')
985
+
986
+ cdef _qname(self, qname, uri=None):
987
+ if uri is None:
988
+ uri, tag = qname[1:].rsplit('}', 1) if qname[:1] == '{' else ('', qname)
989
+ else:
990
+ tag = qname
991
+
992
+ prefixes_seen = set()
993
+ for u, prefix in self._iter_namespaces(self._declared_ns_stack):
994
+ if u == uri and prefix not in prefixes_seen:
995
+ return f'{prefix}:{tag}' if prefix else tag, tag, uri
996
+ prefixes_seen.add(prefix)
997
+
998
+ # Not declared yet => add new declaration.
999
+ if self._rewrite_prefixes:
1000
+ if uri in self._prefix_map:
1001
+ prefix = self._prefix_map[uri]
1002
+ else:
1003
+ prefix = self._prefix_map[uri] = f'n{len(self._prefix_map)}'
1004
+ self._declared_ns_stack[-1].append((uri, prefix))
1005
+ return f'{prefix}:{tag}', tag, uri
1006
+
1007
+ if not uri and '' not in prefixes_seen:
1008
+ # No default namespace declared => no prefix needed.
1009
+ return tag, tag, uri
1010
+
1011
+ for u, prefix in self._iter_namespaces(self._ns_stack):
1012
+ if u == uri:
1013
+ self._declared_ns_stack[-1].append((uri, prefix))
1014
+ return f'{prefix}:{tag}' if prefix else tag, tag, uri
1015
+
1016
+ if not uri:
1017
+ # As soon as a default namespace is defined,
1018
+ # anything that has no namespace (and thus, no prefix) goes there.
1019
+ return tag, tag, uri
1020
+
1021
+ raise ValueError(f'Namespace "{uri}" of name "{tag}" is not declared in scope')
1022
+
1023
+ def data(self, data):
1024
+ if not self._ignored_depth:
1025
+ self._data.append(data)
1026
+
1027
+ cdef _flush(self):
1028
+ cdef unicode data = ''.join(self._data)
1029
+ del self._data[:]
1030
+ if self._strip_text and not self._preserve_space[-1]:
1031
+ data = data.strip()
1032
+ if self._pending_start is not None:
1033
+ (tag, attrs, new_namespaces), self._pending_start = self._pending_start, None
1034
+ qname_text = data if ':' in data and _looks_like_prefix_name(data) else None
1035
+ self._start(tag, attrs, new_namespaces, qname_text)
1036
+ if qname_text is not None:
1037
+ return
1038
+ if data and self._root_seen:
1039
+ self._write(_escape_cdata_c14n(data))
1040
+
1041
+ def start_ns(self, prefix, uri):
1042
+ if self._ignored_depth:
1043
+ return
1044
+ # we may have to resolve qnames in text content
1045
+ if self._data:
1046
+ self._flush()
1047
+ self._ns_stack[-1].append((uri, prefix))
1048
+
1049
+ def start(self, tag, attrs):
1050
+ if self._exclude_tags is not None and (
1051
+ self._ignored_depth or tag in self._exclude_tags):
1052
+ self._ignored_depth += 1
1053
+ return
1054
+ if self._data:
1055
+ self._flush()
1056
+
1057
+ new_namespaces = []
1058
+ self._declared_ns_stack.append(new_namespaces)
1059
+
1060
+ if self._qname_aware_tags is not None and tag in self._qname_aware_tags:
1061
+ # Need to parse text first to see if it requires a prefix declaration.
1062
+ self._pending_start = (tag, attrs, new_namespaces)
1063
+ return
1064
+ self._start(tag, attrs, new_namespaces)
1065
+
1066
+ cdef _start(self, tag, attrs, new_namespaces, qname_text=None):
1067
+ if self._exclude_attrs is not None and attrs:
1068
+ attrs = {k: v for k, v in attrs.items() if k not in self._exclude_attrs}
1069
+
1070
+ qnames = {tag, *attrs}
1071
+ resolved_names = {}
1072
+
1073
+ # Resolve prefixes in attribute and tag text.
1074
+ if qname_text is not None:
1075
+ qname = resolved_names[qname_text] = self._resolve_prefix_name(qname_text)
1076
+ qnames.add(qname)
1077
+ if self._find_qname_aware_attrs is not None and attrs:
1078
+ qattrs = self._find_qname_aware_attrs(attrs)
1079
+ if qattrs:
1080
+ for attr_name in qattrs:
1081
+ value = attrs[attr_name]
1082
+ if _looks_like_prefix_name(value):
1083
+ qname = resolved_names[value] = self._resolve_prefix_name(value)
1084
+ qnames.add(qname)
1085
+ else:
1086
+ qattrs = None
1087
+ else:
1088
+ qattrs = None
1089
+
1090
+ # Assign prefixes in lexicographical order of used URIs.
1091
+ parsed_qnames = {n: self._qname(n) for n in sorted(
1092
+ qnames, key=lambda n: n.split('}', 1))}
1093
+
1094
+ # Write namespace declarations in prefix order ...
1095
+ if new_namespaces:
1096
+ attr_list = [
1097
+ ('xmlns:' + prefix if prefix else 'xmlns', uri)
1098
+ for uri, prefix in new_namespaces
1099
+ ]
1100
+ attr_list.sort()
1101
+ else:
1102
+ # almost always empty
1103
+ attr_list = []
1104
+
1105
+ # ... followed by attributes in URI+name order
1106
+ if attrs:
1107
+ for k, v in sorted(attrs.items()):
1108
+ if qattrs is not None and k in qattrs and v in resolved_names:
1109
+ v = parsed_qnames[resolved_names[v]][0]
1110
+ attr_qname, attr_name, uri = parsed_qnames[k]
1111
+ # No prefix for attributes in default ('') namespace.
1112
+ attr_list.append((attr_qname if uri else attr_name, v))
1113
+
1114
+ # Honour xml:space attributes.
1115
+ space_behaviour = attrs.get('{http://www.w3.org/XML/1998/namespace}space')
1116
+ self._preserve_space.append(
1117
+ space_behaviour == 'preserve' if space_behaviour
1118
+ else self._preserve_space[-1])
1119
+
1120
+ # Write the tag.
1121
+ write = self._write
1122
+ write('<' + parsed_qnames[tag][0])
1123
+ if attr_list:
1124
+ write(''.join([f' {k}="{_escape_attrib_c14n(v)}"' for k, v in attr_list]))
1125
+ write('>')
1126
+
1127
+ # Write the resolved qname text content.
1128
+ if qname_text is not None:
1129
+ write(_escape_cdata_c14n(parsed_qnames[resolved_names[qname_text]][0]))
1130
+
1131
+ self._root_seen = True
1132
+ self._ns_stack.append([])
1133
+
1134
+ def end(self, tag):
1135
+ if self._ignored_depth:
1136
+ self._ignored_depth -= 1
1137
+ return
1138
+ if self._data:
1139
+ self._flush()
1140
+ self._write(f'</{self._qname(tag)[0]}>')
1141
+ self._preserve_space.pop()
1142
+ self._root_done = len(self._preserve_space) == 1
1143
+ self._declared_ns_stack.pop()
1144
+ self._ns_stack.pop()
1145
+
1146
+ def comment(self, text):
1147
+ if not self._with_comments:
1148
+ return
1149
+ if self._ignored_depth:
1150
+ return
1151
+ if self._root_done:
1152
+ self._write('\n')
1153
+ elif self._root_seen and self._data:
1154
+ self._flush()
1155
+ self._write(f'<!--{_escape_cdata_c14n(text)}-->')
1156
+ if not self._root_seen:
1157
+ self._write('\n')
1158
+
1159
+ def pi(self, target, data):
1160
+ if self._ignored_depth:
1161
+ return
1162
+ if self._root_done:
1163
+ self._write('\n')
1164
+ elif self._root_seen and self._data:
1165
+ self._flush()
1166
+ self._write(
1167
+ f'<?{target} {_escape_cdata_c14n(data)}?>' if data else f'<?{target}?>')
1168
+ if not self._root_seen:
1169
+ self._write('\n')
1170
+
1171
+ def close(self):
1172
+ return None
1173
+
1174
+
1175
+ cdef _raise_serialization_error(text):
1176
+ raise TypeError("cannot serialize %r (type %s)" % (text, type(text).__name__))
1177
+
1178
+
1179
+ cdef unicode _escape_cdata_c14n(stext):
1180
+ # escape character data
1181
+ cdef unicode text
1182
+ cdef Py_UCS4 ch
1183
+ cdef Py_ssize_t start = 0, pos = 0
1184
+ cdef list substrings = None
1185
+ try:
1186
+ text = unicode(stext)
1187
+ except (TypeError, AttributeError):
1188
+ return _raise_serialization_error(stext)
1189
+
1190
+ for pos, ch in enumerate(text):
1191
+ if ch == '&':
1192
+ escape = '&amp;'
1193
+ elif ch == '<':
1194
+ escape = '&lt;'
1195
+ elif ch == '>':
1196
+ escape = '&gt;'
1197
+ elif ch == '\r':
1198
+ escape = '&#xD;'
1199
+ else:
1200
+ continue
1201
+
1202
+ if substrings is None:
1203
+ substrings = []
1204
+ if pos > start:
1205
+ substrings.append(text[start:pos])
1206
+ substrings.append(escape)
1207
+ start = pos + 1
1208
+
1209
+ if substrings is None:
1210
+ return text
1211
+ if pos >= start:
1212
+ substrings.append(text[start:pos+1])
1213
+ return ''.join(substrings)
1214
+
1215
+
1216
+ cdef unicode _escape_attrib_c14n(stext):
1217
+ # escape attribute value
1218
+ cdef unicode text
1219
+ cdef Py_UCS4 ch
1220
+ cdef Py_ssize_t start = 0, pos = 0
1221
+ cdef list substrings = None
1222
+ try:
1223
+ text = unicode(stext)
1224
+ except (TypeError, AttributeError):
1225
+ return _raise_serialization_error(stext)
1226
+
1227
+ for pos, ch in enumerate(text):
1228
+ if ch == '&':
1229
+ escape = '&amp;'
1230
+ elif ch == '<':
1231
+ escape = '&lt;'
1232
+ elif ch == '"':
1233
+ escape = '&quot;'
1234
+ elif ch == '\t':
1235
+ escape = '&#x9;'
1236
+ elif ch == '\n':
1237
+ escape = '&#xA;'
1238
+ elif ch == '\r':
1239
+ escape = '&#xD;'
1240
+ else:
1241
+ continue
1242
+
1243
+ if substrings is None:
1244
+ substrings = []
1245
+ if pos > start:
1246
+ substrings.append(text[start:pos])
1247
+ substrings.append(escape)
1248
+ start = pos + 1
1249
+
1250
+ if substrings is None:
1251
+ return text
1252
+ if pos >= start:
1253
+ substrings.append(text[start:pos+1])
1254
+ return ''.join(substrings)
1255
+
1256
+
1257
+ # incremental serialisation
1258
+
1259
+ cdef class xmlfile:
1260
+ """xmlfile(self, output_file, encoding=None, compression=None, close=False, buffered=True)
1261
+
1262
+ A simple mechanism for incremental XML serialisation.
1263
+
1264
+ Usage example::
1265
+
1266
+ with xmlfile("somefile.xml", encoding='utf-8') as xf:
1267
+ xf.write_declaration(standalone=True)
1268
+ xf.write_doctype('<!DOCTYPE root SYSTEM "some.dtd">')
1269
+
1270
+ # generate an element (the root element)
1271
+ with xf.element('root'):
1272
+ # write a complete Element into the open root element
1273
+ xf.write(etree.Element('test'))
1274
+
1275
+ # generate and write more Elements, e.g. through iterparse
1276
+ for element in generate_some_elements():
1277
+ # serialise generated elements into the XML file
1278
+ xf.write(element)
1279
+
1280
+ # or write multiple Elements or strings at once
1281
+ xf.write(etree.Element('start'), "text", etree.Element('end'))
1282
+
1283
+ If 'output_file' is a file(-like) object, passing ``close=True`` will
1284
+ close it when exiting the context manager. By default, it is left
1285
+ to the owner to do that. When a file path is used, lxml will take care
1286
+ of opening and closing the file itself. Also, when a compression level
1287
+ is set, lxml will deliberately close the file to make sure all data gets
1288
+ compressed and written.
1289
+
1290
+ Setting ``buffered=False`` will flush the output after each operation,
1291
+ such as opening or closing an ``xf.element()`` block or calling
1292
+ ``xf.write()``. Alternatively, calling ``xf.flush()`` can be used to
1293
+ explicitly flush any pending output when buffering is enabled.
1294
+ """
1295
+ cdef object output_file
1296
+ cdef bytes encoding
1297
+ cdef _IncrementalFileWriter writer
1298
+ cdef _AsyncIncrementalFileWriter async_writer
1299
+ cdef int compresslevel
1300
+ cdef bint close
1301
+ cdef bint buffered
1302
+ cdef int method
1303
+
1304
+ def __init__(self, output_file not None, encoding=None, compression=None,
1305
+ close=False, buffered=True):
1306
+ self.output_file = output_file
1307
+ self.encoding = _utf8orNone(encoding)
1308
+ self.compresslevel = compression or 0
1309
+ self.close = close
1310
+ self.buffered = buffered
1311
+ self.method = OUTPUT_METHOD_XML
1312
+
1313
+ def __enter__(self):
1314
+ assert self.output_file is not None
1315
+ self.writer = _IncrementalFileWriter(
1316
+ self.output_file, self.encoding, self.compresslevel,
1317
+ self.close, self.buffered, self.method)
1318
+ return self.writer
1319
+
1320
+ def __exit__(self, exc_type, exc_val, exc_tb):
1321
+ if self.writer is not None:
1322
+ old_writer, self.writer = self.writer, None
1323
+ raise_on_error = exc_type is None
1324
+ old_writer._close(raise_on_error)
1325
+ if self.close:
1326
+ self.output_file = None
1327
+
1328
+ async def __aenter__(self):
1329
+ assert self.output_file is not None
1330
+ if isinstance(self.output_file, basestring):
1331
+ raise TypeError("Cannot asynchronously write to a plain file")
1332
+ if not hasattr(self.output_file, 'write'):
1333
+ raise TypeError("Output file needs an async .write() method")
1334
+ self.async_writer = _AsyncIncrementalFileWriter(
1335
+ self.output_file, self.encoding, self.compresslevel,
1336
+ self.close, self.buffered, self.method)
1337
+ return self.async_writer
1338
+
1339
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
1340
+ if self.async_writer is not None:
1341
+ old_writer, self.async_writer = self.async_writer, None
1342
+ raise_on_error = exc_type is None
1343
+ await old_writer._close(raise_on_error)
1344
+ if self.close:
1345
+ self.output_file = None
1346
+
1347
+
1348
+ cdef class htmlfile(xmlfile):
1349
+ """htmlfile(self, output_file, encoding=None, compression=None, close=False, buffered=True)
1350
+
1351
+ A simple mechanism for incremental HTML serialisation. Works the same as
1352
+ xmlfile.
1353
+ """
1354
+ def __init__(self, *args, **kwargs):
1355
+ super().__init__(*args, **kwargs)
1356
+ self.method = OUTPUT_METHOD_HTML
1357
+
1358
+
1359
+ cdef enum _IncrementalFileWriterStatus:
1360
+ WRITER_STARTING = 0
1361
+ WRITER_DECL_WRITTEN = 1
1362
+ WRITER_DTD_WRITTEN = 2
1363
+ WRITER_IN_ELEMENT = 3
1364
+ WRITER_FINISHED = 4
1365
+
1366
+
1367
+ @cython.final
1368
+ @cython.internal
1369
+ cdef class _IncrementalFileWriter:
1370
+ cdef tree.xmlOutputBuffer* _c_out
1371
+ cdef bytes _encoding
1372
+ cdef const_char* _c_encoding
1373
+ cdef _FilelikeWriter _target
1374
+ cdef list _element_stack
1375
+ cdef int _status
1376
+ cdef int _method
1377
+ cdef bint _buffered
1378
+
1379
+ def __cinit__(self, outfile, bytes encoding, int compresslevel, bint close,
1380
+ bint buffered, int method):
1381
+ self._status = WRITER_STARTING
1382
+ self._element_stack = []
1383
+ if encoding is None:
1384
+ # We always need a document encoding to make the attribute serialisation
1385
+ # of libxml2 identical to ours.
1386
+ encoding = b'ASCII'
1387
+ self._encoding = encoding
1388
+ self._c_encoding = _cstr(encoding)
1389
+ self._buffered = buffered
1390
+ self._target = _create_output_buffer(
1391
+ outfile, self._c_encoding, compresslevel, &self._c_out, close)
1392
+ self._method = method
1393
+
1394
+ def __dealloc__(self):
1395
+ if self._c_out is not NULL:
1396
+ tree.xmlOutputBufferClose(self._c_out)
1397
+
1398
+ def write_declaration(self, version=None, standalone=None, doctype=None):
1399
+ """write_declaration(self, version=None, standalone=None, doctype=None)
1400
+
1401
+ Write an XML declaration and (optionally) a doctype into the file.
1402
+ """
1403
+ assert self._c_out is not NULL
1404
+ cdef const_xmlChar* c_version
1405
+ cdef int c_standalone
1406
+ if self._method != OUTPUT_METHOD_XML:
1407
+ raise LxmlSyntaxError("only XML documents have declarations")
1408
+ if self._status >= WRITER_DECL_WRITTEN:
1409
+ raise LxmlSyntaxError("XML declaration already written")
1410
+ version = _utf8orNone(version)
1411
+ c_version = _xcstr(version) if version is not None else NULL
1412
+ doctype = _utf8orNone(doctype)
1413
+ if standalone is None:
1414
+ c_standalone = -1
1415
+ else:
1416
+ c_standalone = 1 if standalone else 0
1417
+ _writeDeclarationToBuffer(self._c_out, c_version, self._c_encoding, c_standalone)
1418
+ if doctype is not None:
1419
+ _writeDoctype(self._c_out, _xcstr(doctype))
1420
+ self._status = WRITER_DTD_WRITTEN
1421
+ else:
1422
+ self._status = WRITER_DECL_WRITTEN
1423
+ if not self._buffered:
1424
+ tree.xmlOutputBufferFlush(self._c_out)
1425
+ self._handle_error(self._c_out.error)
1426
+
1427
+ def write_doctype(self, doctype):
1428
+ """write_doctype(self, doctype)
1429
+
1430
+ Writes the given doctype declaration verbatimly into the file.
1431
+ """
1432
+ assert self._c_out is not NULL
1433
+ if doctype is None:
1434
+ return
1435
+ if self._status >= WRITER_DTD_WRITTEN:
1436
+ raise LxmlSyntaxError("DOCTYPE already written or cannot write it here")
1437
+ doctype = _utf8(doctype)
1438
+ _writeDoctype(self._c_out, _xcstr(doctype))
1439
+ self._status = WRITER_DTD_WRITTEN
1440
+ if not self._buffered:
1441
+ tree.xmlOutputBufferFlush(self._c_out)
1442
+ self._handle_error(self._c_out.error)
1443
+
1444
+ def method(self, method):
1445
+ """method(self, method)
1446
+
1447
+ Returns a context manager that overrides and restores the output method.
1448
+ method is one of (None, 'xml', 'html') where None means 'xml'.
1449
+ """
1450
+ assert self._c_out is not NULL
1451
+ c_method = self._method if method is None else _findOutputMethod(method)
1452
+ return _MethodChanger(self, c_method)
1453
+
1454
+ def element(self, tag, attrib=None, nsmap=None, method=None, **_extra):
1455
+ """element(self, tag, attrib=None, nsmap=None, method, **_extra)
1456
+
1457
+ Returns a context manager that writes an opening and closing tag.
1458
+ method is one of (None, 'xml', 'html') where None means 'xml'.
1459
+ """
1460
+ assert self._c_out is not NULL
1461
+ attributes = []
1462
+ if attrib is not None:
1463
+ for name, value in _iter_attrib(attrib):
1464
+ if name not in _extra:
1465
+ ns, name = _getNsTag(name)
1466
+ attributes.append((ns, name, _utf8(value)))
1467
+ if _extra:
1468
+ for name, value in _extra.iteritems():
1469
+ ns, name = _getNsTag(name)
1470
+ attributes.append((ns, name, _utf8(value)))
1471
+ reversed_nsmap = {}
1472
+ if nsmap:
1473
+ for prefix, ns in nsmap.items():
1474
+ if prefix is not None:
1475
+ prefix = _utf8(prefix)
1476
+ _prefixValidOrRaise(prefix)
1477
+ reversed_nsmap[_utf8(ns)] = prefix
1478
+ ns, name = _getNsTag(tag)
1479
+
1480
+ c_method = self._method if method is None else _findOutputMethod(method)
1481
+
1482
+ return _FileWriterElement(self, (ns, name, attributes, reversed_nsmap), c_method)
1483
+
1484
+ cdef _write_qname(self, bytes name, bytes prefix):
1485
+ if prefix: # empty bytes for no prefix (not None to allow sorting)
1486
+ tree.xmlOutputBufferWrite(self._c_out, len(prefix), _cstr(prefix))
1487
+ tree.xmlOutputBufferWrite(self._c_out, 1, ':')
1488
+ tree.xmlOutputBufferWrite(self._c_out, len(name), _cstr(name))
1489
+
1490
+ cdef _write_start_element(self, element_config):
1491
+ if self._status > WRITER_IN_ELEMENT:
1492
+ raise LxmlSyntaxError("cannot append trailing element to complete XML document")
1493
+ ns, name, attributes, nsmap = element_config
1494
+ flat_namespace_map, new_namespaces = self._collect_namespaces(nsmap)
1495
+ prefix = self._find_prefix(ns, flat_namespace_map, new_namespaces)
1496
+ tree.xmlOutputBufferWrite(self._c_out, 1, '<')
1497
+ self._write_qname(name, prefix)
1498
+
1499
+ self._write_attributes_and_namespaces(
1500
+ attributes, flat_namespace_map, new_namespaces)
1501
+
1502
+ tree.xmlOutputBufferWrite(self._c_out, 1, '>')
1503
+ if not self._buffered:
1504
+ tree.xmlOutputBufferFlush(self._c_out)
1505
+ self._handle_error(self._c_out.error)
1506
+
1507
+ self._element_stack.append((ns, name, prefix, flat_namespace_map))
1508
+ self._status = WRITER_IN_ELEMENT
1509
+
1510
+ cdef _write_attributes_and_namespaces(self, list attributes,
1511
+ dict flat_namespace_map,
1512
+ list new_namespaces):
1513
+ if attributes:
1514
+ # _find_prefix() may append to new_namespaces => build them first
1515
+ attributes = [
1516
+ (self._find_prefix(ns, flat_namespace_map, new_namespaces), name, value)
1517
+ for ns, name, value in attributes ]
1518
+ if new_namespaces:
1519
+ new_namespaces.sort()
1520
+ self._write_attributes_list(new_namespaces)
1521
+ if attributes:
1522
+ self._write_attributes_list(attributes)
1523
+
1524
+ cdef _write_attributes_list(self, list attributes):
1525
+ for prefix, name, value in attributes:
1526
+ tree.xmlOutputBufferWrite(self._c_out, 1, ' ')
1527
+ self._write_qname(name, prefix)
1528
+ tree.xmlOutputBufferWrite(self._c_out, 2, '="')
1529
+ _write_attr_string(self._c_out, _cstr(value))
1530
+
1531
+ tree.xmlOutputBufferWrite(self._c_out, 1, '"')
1532
+
1533
+ cdef _write_end_element(self, element_config):
1534
+ if self._status != WRITER_IN_ELEMENT:
1535
+ raise LxmlSyntaxError("not in an element")
1536
+ if not self._element_stack or self._element_stack[-1][:2] != element_config[:2]:
1537
+ raise LxmlSyntaxError("inconsistent exit action in context manager")
1538
+
1539
+ # If previous write operations failed, the context manager exit might still call us.
1540
+ # That is ok, but we stop writing closing tags and handling errors in that case.
1541
+ # For all non-I/O errors, we continue writing closing tags if we can.
1542
+ ok_to_write = self._c_out.error == xmlerror.XML_ERR_OK
1543
+
1544
+ name, prefix = self._element_stack.pop()[1:3]
1545
+ if ok_to_write:
1546
+ tree.xmlOutputBufferWrite(self._c_out, 2, '</')
1547
+ self._write_qname(name, prefix)
1548
+ tree.xmlOutputBufferWrite(self._c_out, 1, '>')
1549
+
1550
+ if not self._element_stack:
1551
+ self._status = WRITER_FINISHED
1552
+ if ok_to_write:
1553
+ if not self._buffered:
1554
+ tree.xmlOutputBufferFlush(self._c_out)
1555
+ self._handle_error(self._c_out.error)
1556
+
1557
+ cdef _find_prefix(self, bytes href, dict flat_namespaces_map, list new_namespaces):
1558
+ if href is None:
1559
+ return None
1560
+ if href in flat_namespaces_map:
1561
+ return flat_namespaces_map[href]
1562
+ # need to create a new prefix
1563
+ prefixes = flat_namespaces_map.values()
1564
+ i = 0
1565
+ while True:
1566
+ prefix = _utf8('ns%d' % i)
1567
+ if prefix not in prefixes:
1568
+ new_namespaces.append((b'xmlns', prefix, href))
1569
+ flat_namespaces_map[href] = prefix
1570
+ return prefix
1571
+ i += 1
1572
+
1573
+ cdef _collect_namespaces(self, dict nsmap):
1574
+ new_namespaces = []
1575
+ flat_namespaces_map = {}
1576
+ for ns, prefix in nsmap.iteritems():
1577
+ flat_namespaces_map[ns] = prefix
1578
+ if prefix is None:
1579
+ # use empty bytes rather than None to allow sorting
1580
+ new_namespaces.append((b'', b'xmlns', ns))
1581
+ else:
1582
+ new_namespaces.append((b'xmlns', prefix, ns))
1583
+ # merge in flat namespace map of parent
1584
+ if self._element_stack:
1585
+ for ns, prefix in (<dict>self._element_stack[-1][-1]).iteritems():
1586
+ if flat_namespaces_map.get(ns) is None:
1587
+ # unknown or empty prefix => prefer a 'real' prefix
1588
+ flat_namespaces_map[ns] = prefix
1589
+ return flat_namespaces_map, new_namespaces
1590
+
1591
+ def write(self, *args, bint with_tail=True, bint pretty_print=False, method=None):
1592
+ """write(self, *args, with_tail=True, pretty_print=False, method=None)
1593
+
1594
+ Write subtrees or strings into the file.
1595
+
1596
+ If method is not None, it should be one of ('html', 'xml', 'text')
1597
+ to temporarily override the output method.
1598
+ """
1599
+ assert self._c_out is not NULL
1600
+ c_method = self._method if method is None else _findOutputMethod(method)
1601
+
1602
+ for content in args:
1603
+ if _isString(content):
1604
+ if self._status != WRITER_IN_ELEMENT:
1605
+ if self._status > WRITER_IN_ELEMENT or content.strip():
1606
+ raise LxmlSyntaxError("not in an element")
1607
+ bstring = _utf8(content)
1608
+ if not bstring:
1609
+ continue
1610
+
1611
+ ns, name, _, _ = self._element_stack[-1]
1612
+ if (c_method == OUTPUT_METHOD_HTML and
1613
+ ns in (None, b'http://www.w3.org/1999/xhtml') and
1614
+ name in (b'script', b'style')):
1615
+ tree.xmlOutputBufferWrite(self._c_out, len(bstring), _cstr(bstring))
1616
+
1617
+ else:
1618
+ tree.xmlOutputBufferWriteEscape(self._c_out, _xcstr(bstring), NULL)
1619
+
1620
+ elif isinstance(content, CDATA):
1621
+ if self._status > WRITER_IN_ELEMENT:
1622
+ raise LxmlSyntaxError("not in an element")
1623
+ _write_cdata_string(self._c_out, (<CDATA>content)._utf8_data)
1624
+
1625
+ elif iselement(content):
1626
+ if self._status > WRITER_IN_ELEMENT:
1627
+ raise LxmlSyntaxError("cannot append trailing element to complete XML document")
1628
+ _writeNodeToBuffer(self._c_out, (<_Element>content)._c_node,
1629
+ self._c_encoding, NULL, c_method,
1630
+ False, False, pretty_print, with_tail, False)
1631
+ if (<_Element>content)._c_node.type == tree.XML_ELEMENT_NODE:
1632
+ if not self._element_stack:
1633
+ self._status = WRITER_FINISHED
1634
+
1635
+ elif content is not None:
1636
+ raise TypeError(
1637
+ f"got invalid input value of type {type(content)}, expected string, CDATA or Element")
1638
+
1639
+ self._handle_error(self._c_out.error)
1640
+
1641
+ if not self._buffered:
1642
+ tree.xmlOutputBufferFlush(self._c_out)
1643
+ self._handle_error(self._c_out.error)
1644
+
1645
+ def flush(self):
1646
+ """flush(self)
1647
+
1648
+ Write any pending content of the current output buffer to the stream.
1649
+ """
1650
+ assert self._c_out is not NULL
1651
+ tree.xmlOutputBufferFlush(self._c_out)
1652
+ self._handle_error(self._c_out.error)
1653
+
1654
+ cdef _close(self, bint raise_on_error):
1655
+ if raise_on_error:
1656
+ if self._status < WRITER_IN_ELEMENT:
1657
+ raise LxmlSyntaxError("no content written")
1658
+ if self._element_stack:
1659
+ raise LxmlSyntaxError("pending open tags on close")
1660
+ error_result = self._c_out.error
1661
+ if error_result == xmlerror.XML_ERR_OK:
1662
+ error_result = tree.xmlOutputBufferClose(self._c_out)
1663
+ if error_result != -1:
1664
+ error_result = xmlerror.XML_ERR_OK
1665
+ else:
1666
+ tree.xmlOutputBufferClose(self._c_out)
1667
+ self._status = WRITER_FINISHED
1668
+ self._c_out = NULL
1669
+ del self._element_stack[:]
1670
+ if raise_on_error:
1671
+ self._handle_error(error_result)
1672
+
1673
+ cdef _handle_error(self, int error_result):
1674
+ if error_result != xmlerror.XML_ERR_OK:
1675
+ if self._target is not None:
1676
+ self._target._exc_context._raise_if_stored()
1677
+ _raiseSerialisationError(error_result)
1678
+
1679
+
1680
+ @cython.final
1681
+ @cython.internal
1682
+ cdef class _AsyncDataWriter:
1683
+ cdef list _data
1684
+ def __cinit__(self):
1685
+ self._data = []
1686
+
1687
+ cdef bytes collect(self):
1688
+ data = b''.join(self._data)
1689
+ del self._data[:]
1690
+ return data
1691
+
1692
+ def write(self, data):
1693
+ self._data.append(data)
1694
+
1695
+ def close(self):
1696
+ pass
1697
+
1698
+
1699
+ @cython.final
1700
+ @cython.internal
1701
+ cdef class _AsyncIncrementalFileWriter:
1702
+ cdef _IncrementalFileWriter _writer
1703
+ cdef _AsyncDataWriter _buffer
1704
+ cdef object _async_outfile
1705
+ cdef int _flush_after_writes
1706
+ cdef bint _should_close
1707
+ cdef bint _buffered
1708
+
1709
+ def __cinit__(self, async_outfile, bytes encoding, int compresslevel, bint close,
1710
+ bint buffered, int method):
1711
+ self._flush_after_writes = 20
1712
+ self._async_outfile = async_outfile
1713
+ self._should_close = close
1714
+ self._buffered = buffered
1715
+ self._buffer = _AsyncDataWriter()
1716
+ self._writer = _IncrementalFileWriter(
1717
+ self._buffer, encoding, compresslevel, close=True, buffered=False, method=method)
1718
+
1719
+ cdef bytes _flush(self):
1720
+ if not self._buffered or len(self._buffer._data) > self._flush_after_writes:
1721
+ return self._buffer.collect()
1722
+ return None
1723
+
1724
+ async def flush(self):
1725
+ self._writer.flush()
1726
+ data = self._buffer.collect()
1727
+ if data:
1728
+ await self._async_outfile.write(data)
1729
+
1730
+ async def write_declaration(self, version=None, standalone=None, doctype=None):
1731
+ self._writer.write_declaration(version, standalone, doctype)
1732
+ data = self._flush()
1733
+ if data:
1734
+ await self._async_outfile.write(data)
1735
+
1736
+ async def write_doctype(self, doctype):
1737
+ self._writer.write_doctype(doctype)
1738
+ data = self._flush()
1739
+ if data:
1740
+ await self._async_outfile.write(data)
1741
+
1742
+ async def write(self, *args, with_tail=True, pretty_print=False, method=None):
1743
+ self._writer.write(*args, with_tail=with_tail, pretty_print=pretty_print, method=method)
1744
+ data = self._flush()
1745
+ if data:
1746
+ await self._async_outfile.write(data)
1747
+
1748
+ def method(self, method):
1749
+ return self._writer.method(method)
1750
+
1751
+ def element(self, tag, attrib=None, nsmap=None, method=None, **_extra):
1752
+ element_writer = self._writer.element(tag, attrib, nsmap, method, **_extra)
1753
+ return _AsyncFileWriterElement(element_writer, self)
1754
+
1755
+ async def _close(self, bint raise_on_error):
1756
+ self._writer._close(raise_on_error)
1757
+ data = self._buffer.collect()
1758
+ if data:
1759
+ await self._async_outfile.write(data)
1760
+ if self._should_close:
1761
+ await self._async_outfile.close()
1762
+
1763
+
1764
+ @cython.final
1765
+ @cython.internal
1766
+ cdef class _AsyncFileWriterElement:
1767
+ cdef _FileWriterElement _element_writer
1768
+ cdef _AsyncIncrementalFileWriter _writer
1769
+
1770
+ def __cinit__(self, _FileWriterElement element_writer not None,
1771
+ _AsyncIncrementalFileWriter writer not None):
1772
+ self._element_writer = element_writer
1773
+ self._writer = writer
1774
+
1775
+ async def __aenter__(self):
1776
+ self._element_writer.__enter__()
1777
+ data = self._writer._flush()
1778
+ if data:
1779
+ await self._writer._async_outfile.write(data)
1780
+
1781
+ async def __aexit__(self, *args):
1782
+ self._element_writer.__exit__(*args)
1783
+ data = self._writer._flush()
1784
+ if data:
1785
+ await self._writer._async_outfile.write(data)
1786
+
1787
+
1788
+ @cython.final
1789
+ @cython.internal
1790
+ @cython.freelist(8)
1791
+ cdef class _FileWriterElement:
1792
+ cdef _IncrementalFileWriter _writer
1793
+ cdef object _element
1794
+ cdef int _new_method
1795
+ cdef int _old_method
1796
+
1797
+ def __cinit__(self, _IncrementalFileWriter writer not None, element_config, int method):
1798
+ self._writer = writer
1799
+ self._element = element_config
1800
+ self._new_method = method
1801
+ self._old_method = writer._method
1802
+
1803
+ def __enter__(self):
1804
+ self._writer._method = self._new_method
1805
+ self._writer._write_start_element(self._element)
1806
+
1807
+ def __exit__(self, exc_type, exc_val, exc_tb):
1808
+ self._writer._write_end_element(self._element)
1809
+ self._writer._method = self._old_method
1810
+
1811
+
1812
+ @cython.final
1813
+ @cython.internal
1814
+ @cython.freelist(8)
1815
+ cdef class _MethodChanger:
1816
+ cdef _IncrementalFileWriter _writer
1817
+ cdef int _new_method
1818
+ cdef int _old_method
1819
+ cdef bint _entered
1820
+ cdef bint _exited
1821
+
1822
+ def __cinit__(self, _IncrementalFileWriter writer not None, int method):
1823
+ self._writer = writer
1824
+ self._new_method = method
1825
+ self._old_method = writer._method
1826
+ self._entered = False
1827
+ self._exited = False
1828
+
1829
+ def __enter__(self):
1830
+ if self._entered:
1831
+ raise LxmlSyntaxError("Inconsistent enter action in context manager")
1832
+ self._writer._method = self._new_method
1833
+ self._entered = True
1834
+
1835
+ def __exit__(self, exc_type, exc_val, exc_tb):
1836
+ if self._exited:
1837
+ raise LxmlSyntaxError("Inconsistent exit action in context manager")
1838
+ if self._writer._method != self._new_method:
1839
+ raise LxmlSyntaxError("Method changed outside of context manager")
1840
+ self._writer._method = self._old_method
1841
+ self._exited = True
1842
+
1843
+ async def __aenter__(self):
1844
+ # for your async convenience
1845
+ return self.__enter__()
1846
+
1847
+ async def __aexit__(self, *args):
1848
+ # for your async convenience
1849
+ return self.__exit__(*args)