lxml 5.3.2__cp310-cp310-win32.win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (175) hide show
  1. lxml/ElementInclude.py +244 -0
  2. lxml/__init__.py +22 -0
  3. lxml/_elementpath.cp310-win32.pyd +0 -0
  4. lxml/_elementpath.py +341 -0
  5. lxml/apihelpers.pxi +1793 -0
  6. lxml/builder.cp310-win32.pyd +0 -0
  7. lxml/builder.py +232 -0
  8. lxml/classlookup.pxi +580 -0
  9. lxml/cleanup.pxi +215 -0
  10. lxml/cssselect.py +101 -0
  11. lxml/debug.pxi +90 -0
  12. lxml/docloader.pxi +178 -0
  13. lxml/doctestcompare.py +488 -0
  14. lxml/dtd.pxi +479 -0
  15. lxml/etree.cp310-win32.pyd +0 -0
  16. lxml/etree.h +248 -0
  17. lxml/etree.pyx +3732 -0
  18. lxml/etree_api.h +195 -0
  19. lxml/extensions.pxi +833 -0
  20. lxml/html/ElementSoup.py +10 -0
  21. lxml/html/__init__.py +1923 -0
  22. lxml/html/_diffcommand.py +86 -0
  23. lxml/html/_html5builder.py +100 -0
  24. lxml/html/_setmixin.py +56 -0
  25. lxml/html/builder.py +133 -0
  26. lxml/html/clean.py +21 -0
  27. lxml/html/defs.py +135 -0
  28. lxml/html/diff.cp310-win32.pyd +0 -0
  29. lxml/html/diff.py +878 -0
  30. lxml/html/formfill.py +299 -0
  31. lxml/html/html5parser.py +260 -0
  32. lxml/html/soupparser.py +314 -0
  33. lxml/html/usedoctest.py +13 -0
  34. lxml/includes/__init__.pxd +0 -0
  35. lxml/includes/__init__.py +0 -0
  36. lxml/includes/c14n.pxd +25 -0
  37. lxml/includes/config.pxd +3 -0
  38. lxml/includes/dtdvalid.pxd +18 -0
  39. lxml/includes/etree_defs.h +379 -0
  40. lxml/includes/etreepublic.pxd +237 -0
  41. lxml/includes/extlibs/__init__.py +0 -0
  42. lxml/includes/extlibs/zconf.h +543 -0
  43. lxml/includes/extlibs/zlib.h +1938 -0
  44. lxml/includes/htmlparser.pxd +56 -0
  45. lxml/includes/libexslt/__init__.py +0 -0
  46. lxml/includes/libexslt/exslt.h +108 -0
  47. lxml/includes/libexslt/exsltconfig.h +70 -0
  48. lxml/includes/libexslt/exsltexports.h +63 -0
  49. lxml/includes/libexslt/libexslt.h +29 -0
  50. lxml/includes/libxml/HTMLparser.h +320 -0
  51. lxml/includes/libxml/HTMLtree.h +147 -0
  52. lxml/includes/libxml/SAX.h +204 -0
  53. lxml/includes/libxml/SAX2.h +173 -0
  54. lxml/includes/libxml/__init__.py +0 -0
  55. lxml/includes/libxml/c14n.h +128 -0
  56. lxml/includes/libxml/catalog.h +182 -0
  57. lxml/includes/libxml/chvalid.h +230 -0
  58. lxml/includes/libxml/debugXML.h +217 -0
  59. lxml/includes/libxml/dict.h +81 -0
  60. lxml/includes/libxml/encoding.h +233 -0
  61. lxml/includes/libxml/entities.h +151 -0
  62. lxml/includes/libxml/globals.h +529 -0
  63. lxml/includes/libxml/hash.h +236 -0
  64. lxml/includes/libxml/list.h +137 -0
  65. lxml/includes/libxml/nanoftp.h +186 -0
  66. lxml/includes/libxml/nanohttp.h +81 -0
  67. lxml/includes/libxml/parser.h +1265 -0
  68. lxml/includes/libxml/parserInternals.h +662 -0
  69. lxml/includes/libxml/pattern.h +100 -0
  70. lxml/includes/libxml/relaxng.h +218 -0
  71. lxml/includes/libxml/schemasInternals.h +958 -0
  72. lxml/includes/libxml/schematron.h +142 -0
  73. lxml/includes/libxml/threads.h +94 -0
  74. lxml/includes/libxml/tree.h +1314 -0
  75. lxml/includes/libxml/uri.h +94 -0
  76. lxml/includes/libxml/valid.h +448 -0
  77. lxml/includes/libxml/xinclude.h +129 -0
  78. lxml/includes/libxml/xlink.h +189 -0
  79. lxml/includes/libxml/xmlIO.h +369 -0
  80. lxml/includes/libxml/xmlautomata.h +146 -0
  81. lxml/includes/libxml/xmlerror.h +919 -0
  82. lxml/includes/libxml/xmlexports.h +50 -0
  83. lxml/includes/libxml/xmlmemory.h +228 -0
  84. lxml/includes/libxml/xmlmodule.h +57 -0
  85. lxml/includes/libxml/xmlreader.h +428 -0
  86. lxml/includes/libxml/xmlregexp.h +222 -0
  87. lxml/includes/libxml/xmlsave.h +88 -0
  88. lxml/includes/libxml/xmlschemas.h +246 -0
  89. lxml/includes/libxml/xmlschemastypes.h +152 -0
  90. lxml/includes/libxml/xmlstring.h +140 -0
  91. lxml/includes/libxml/xmlunicode.h +202 -0
  92. lxml/includes/libxml/xmlversion.h +526 -0
  93. lxml/includes/libxml/xmlwriter.h +488 -0
  94. lxml/includes/libxml/xpath.h +575 -0
  95. lxml/includes/libxml/xpathInternals.h +632 -0
  96. lxml/includes/libxml/xpointer.h +137 -0
  97. lxml/includes/libxslt/__init__.py +0 -0
  98. lxml/includes/libxslt/attributes.h +39 -0
  99. lxml/includes/libxslt/documents.h +93 -0
  100. lxml/includes/libxslt/extensions.h +262 -0
  101. lxml/includes/libxslt/extra.h +72 -0
  102. lxml/includes/libxslt/functions.h +78 -0
  103. lxml/includes/libxslt/imports.h +75 -0
  104. lxml/includes/libxslt/keys.h +53 -0
  105. lxml/includes/libxslt/libxslt.h +36 -0
  106. lxml/includes/libxslt/namespaces.h +68 -0
  107. lxml/includes/libxslt/numbersInternals.h +73 -0
  108. lxml/includes/libxslt/preproc.h +43 -0
  109. lxml/includes/libxslt/security.h +104 -0
  110. lxml/includes/libxslt/templates.h +77 -0
  111. lxml/includes/libxslt/transform.h +207 -0
  112. lxml/includes/libxslt/trio.h +216 -0
  113. lxml/includes/libxslt/triodef.h +220 -0
  114. lxml/includes/libxslt/variables.h +118 -0
  115. lxml/includes/libxslt/win32config.h +51 -0
  116. lxml/includes/libxslt/xslt.h +110 -0
  117. lxml/includes/libxslt/xsltInternals.h +1992 -0
  118. lxml/includes/libxslt/xsltconfig.h +179 -0
  119. lxml/includes/libxslt/xsltexports.h +64 -0
  120. lxml/includes/libxslt/xsltlocale.h +44 -0
  121. lxml/includes/libxslt/xsltutils.h +343 -0
  122. lxml/includes/lxml-version.h +3 -0
  123. lxml/includes/relaxng.pxd +64 -0
  124. lxml/includes/schematron.pxd +34 -0
  125. lxml/includes/tree.pxd +494 -0
  126. lxml/includes/uri.pxd +5 -0
  127. lxml/includes/xinclude.pxd +22 -0
  128. lxml/includes/xmlerror.pxd +852 -0
  129. lxml/includes/xmlparser.pxd +265 -0
  130. lxml/includes/xmlschema.pxd +35 -0
  131. lxml/includes/xpath.pxd +136 -0
  132. lxml/includes/xslt.pxd +190 -0
  133. lxml/isoschematron/__init__.py +348 -0
  134. lxml/isoschematron/resources/rng/iso-schematron.rng +709 -0
  135. lxml/isoschematron/resources/xsl/RNG2Schtrn.xsl +75 -0
  136. lxml/isoschematron/resources/xsl/XSD2Schtrn.xsl +77 -0
  137. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_abstract_expand.xsl +313 -0
  138. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_dsdl_include.xsl +1160 -0
  139. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_message.xsl +55 -0
  140. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_skeleton_for_xslt1.xsl +1796 -0
  141. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_svrl_for_xslt1.xsl +588 -0
  142. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt +84 -0
  143. lxml/iterparse.pxi +438 -0
  144. lxml/lxml.etree.h +248 -0
  145. lxml/lxml.etree_api.h +195 -0
  146. lxml/nsclasses.pxi +281 -0
  147. lxml/objectify.cp310-win32.pyd +0 -0
  148. lxml/objectify.pyx +2145 -0
  149. lxml/objectpath.pxi +332 -0
  150. lxml/parser.pxi +2000 -0
  151. lxml/parsertarget.pxi +180 -0
  152. lxml/proxy.pxi +619 -0
  153. lxml/public-api.pxi +178 -0
  154. lxml/pyclasslookup.py +3 -0
  155. lxml/readonlytree.pxi +565 -0
  156. lxml/relaxng.pxi +165 -0
  157. lxml/sax.cp310-win32.pyd +0 -0
  158. lxml/sax.py +275 -0
  159. lxml/saxparser.pxi +875 -0
  160. lxml/schematron.pxi +168 -0
  161. lxml/serializer.pxi +1781 -0
  162. lxml/usedoctest.py +13 -0
  163. lxml/xinclude.pxi +67 -0
  164. lxml/xmlerror.pxi +1654 -0
  165. lxml/xmlid.pxi +179 -0
  166. lxml/xmlschema.pxi +215 -0
  167. lxml/xpath.pxi +487 -0
  168. lxml/xslt.pxi +950 -0
  169. lxml/xsltext.pxi +242 -0
  170. lxml-5.3.2.dist-info/METADATA +100 -0
  171. lxml-5.3.2.dist-info/RECORD +175 -0
  172. lxml-5.3.2.dist-info/WHEEL +5 -0
  173. lxml-5.3.2.dist-info/licenses/LICENSE.txt +29 -0
  174. lxml-5.3.2.dist-info/licenses/LICENSES.txt +29 -0
  175. lxml-5.3.2.dist-info/top_level.txt +1 -0
lxml/serializer.pxi ADDED
@@ -0,0 +1,1781 @@
1
+ # XML serialization and output functions
2
+
3
+ cdef object GzipFile
4
+ from gzip import GzipFile
5
+
6
+
7
+ cdef class SerialisationError(LxmlError):
8
+ """A libxml2 error that occurred during serialisation.
9
+ """
10
+
11
+
12
+ cdef enum _OutputMethods:
13
+ OUTPUT_METHOD_XML
14
+ OUTPUT_METHOD_HTML
15
+ OUTPUT_METHOD_TEXT
16
+
17
+
18
+ cdef int _findOutputMethod(method) except -1:
19
+ if method is None:
20
+ return OUTPUT_METHOD_XML
21
+ method = method.lower()
22
+ if method == "xml":
23
+ return OUTPUT_METHOD_XML
24
+ if method == "html":
25
+ return OUTPUT_METHOD_HTML
26
+ if method == "text":
27
+ return OUTPUT_METHOD_TEXT
28
+ raise ValueError(f"unknown output method {method!r}")
29
+
30
+
31
+ cdef _textToString(xmlNode* c_node, encoding, bint with_tail):
32
+ cdef bint needs_conversion
33
+ cdef const_xmlChar* c_text
34
+ cdef xmlNode* c_text_node
35
+ cdef tree.xmlBuffer* c_buffer
36
+ cdef int error_result
37
+
38
+ c_buffer = tree.xmlBufferCreate()
39
+ if c_buffer is NULL:
40
+ raise MemoryError()
41
+
42
+ with nogil:
43
+ error_result = tree.xmlNodeBufGetContent(c_buffer, c_node)
44
+ if with_tail:
45
+ c_text_node = _textNodeOrSkip(c_node.next)
46
+ while c_text_node is not NULL:
47
+ tree.xmlBufferWriteChar(c_buffer, <const_char*>c_text_node.content)
48
+ c_text_node = _textNodeOrSkip(c_text_node.next)
49
+ c_text = tree.xmlBufferContent(c_buffer)
50
+
51
+ if error_result < 0 or c_text is NULL:
52
+ tree.xmlBufferFree(c_buffer)
53
+ raise SerialisationError, "Error during serialisation (out of memory?)"
54
+
55
+ try:
56
+ needs_conversion = 0
57
+ if encoding is unicode:
58
+ needs_conversion = 1
59
+ elif encoding is not None:
60
+ # Python prefers lower case encoding names
61
+ encoding = encoding.lower()
62
+ if encoding not in ('utf8', 'utf-8'):
63
+ if encoding == 'ascii':
64
+ if isutf8l(c_text, tree.xmlBufferLength(c_buffer)):
65
+ # will raise a decode error below
66
+ needs_conversion = 1
67
+ else:
68
+ needs_conversion = 1
69
+
70
+ if needs_conversion:
71
+ text = (<const_char*>c_text)[:tree.xmlBufferLength(c_buffer)].decode('utf8')
72
+ if encoding is not unicode:
73
+ encoding = _utf8(encoding)
74
+ text = python.PyUnicode_AsEncodedString(
75
+ text, encoding, 'strict')
76
+ else:
77
+ text = (<unsigned char*>c_text)[:tree.xmlBufferLength(c_buffer)]
78
+ finally:
79
+ tree.xmlBufferFree(c_buffer)
80
+ return text
81
+
82
+
83
+ cdef _tostring(_Element element, encoding, doctype, method,
84
+ bint write_xml_declaration, bint write_complete_document,
85
+ bint pretty_print, bint with_tail, int standalone):
86
+ """Serialize an element to an encoded string representation of its XML
87
+ tree.
88
+ """
89
+ cdef tree.xmlOutputBuffer* c_buffer
90
+ cdef tree.xmlBuf* c_result_buffer
91
+ cdef tree.xmlCharEncodingHandler* enchandler
92
+ cdef const_char* c_enc
93
+ cdef const_xmlChar* c_version
94
+ cdef const_xmlChar* c_doctype
95
+ cdef int c_method
96
+ cdef int error_result
97
+ if element is None:
98
+ return None
99
+ _assertValidNode(element)
100
+ c_method = _findOutputMethod(method)
101
+ if c_method == OUTPUT_METHOD_TEXT:
102
+ return _textToString(element._c_node, encoding, with_tail)
103
+ if encoding is None or encoding is unicode:
104
+ c_enc = NULL
105
+ else:
106
+ encoding = _utf8(encoding)
107
+ c_enc = _cstr(encoding)
108
+ if doctype is None:
109
+ c_doctype = NULL
110
+ else:
111
+ doctype = _utf8(doctype)
112
+ c_doctype = _xcstr(doctype)
113
+ # it is necessary to *and* find the encoding handler *and* use
114
+ # encoding during output
115
+ enchandler = tree.xmlFindCharEncodingHandler(c_enc)
116
+ if enchandler is NULL and c_enc is not NULL:
117
+ if encoding is not None:
118
+ encoding = encoding.decode('UTF-8')
119
+ raise LookupError, f"unknown encoding: '{encoding}'"
120
+ c_buffer = tree.xmlAllocOutputBuffer(enchandler)
121
+ if c_buffer is NULL:
122
+ tree.xmlCharEncCloseFunc(enchandler)
123
+ raise MemoryError()
124
+
125
+ with nogil:
126
+ _writeNodeToBuffer(c_buffer, element._c_node, c_enc, c_doctype, c_method,
127
+ write_xml_declaration, write_complete_document,
128
+ pretty_print, with_tail, standalone)
129
+ tree.xmlOutputBufferFlush(c_buffer)
130
+ if c_buffer.conv is not NULL:
131
+ c_result_buffer = c_buffer.conv
132
+ else:
133
+ c_result_buffer = c_buffer.buffer
134
+
135
+ error_result = c_buffer.error
136
+ if error_result != xmlerror.XML_ERR_OK:
137
+ tree.xmlOutputBufferClose(c_buffer)
138
+ _raiseSerialisationError(error_result)
139
+
140
+ try:
141
+ if encoding is unicode:
142
+ result = (<unsigned char*>tree.xmlBufContent(
143
+ c_result_buffer))[:tree.xmlBufUse(c_result_buffer)].decode('UTF-8')
144
+ else:
145
+ result = <bytes>(<unsigned char*>tree.xmlBufContent(
146
+ c_result_buffer))[:tree.xmlBufUse(c_result_buffer)]
147
+ finally:
148
+ error_result = tree.xmlOutputBufferClose(c_buffer)
149
+ if error_result == -1:
150
+ _raiseSerialisationError(error_result)
151
+ return result
152
+
153
+ cdef bytes _tostringC14N(element_or_tree, bint exclusive, bint with_comments, inclusive_ns_prefixes):
154
+ cdef xmlDoc* c_doc
155
+ cdef xmlChar* c_buffer = NULL
156
+ cdef int byte_count = -1
157
+ cdef bytes result
158
+ cdef _Document doc
159
+ cdef _Element element
160
+ cdef xmlChar **c_inclusive_ns_prefixes
161
+
162
+ if isinstance(element_or_tree, _Element):
163
+ _assertValidNode(<_Element>element_or_tree)
164
+ doc = (<_Element>element_or_tree)._doc
165
+ c_doc = _plainFakeRootDoc(doc._c_doc, (<_Element>element_or_tree)._c_node, 0)
166
+ else:
167
+ doc = _documentOrRaise(element_or_tree)
168
+ _assertValidDoc(doc)
169
+ c_doc = doc._c_doc
170
+
171
+ c_inclusive_ns_prefixes = _convert_ns_prefixes(c_doc.dict, inclusive_ns_prefixes) if inclusive_ns_prefixes else NULL
172
+ try:
173
+ with nogil:
174
+ byte_count = c14n.xmlC14NDocDumpMemory(
175
+ c_doc, NULL, exclusive, c_inclusive_ns_prefixes, with_comments, &c_buffer)
176
+
177
+ finally:
178
+ _destroyFakeDoc(doc._c_doc, c_doc)
179
+ if c_inclusive_ns_prefixes is not NULL:
180
+ python.lxml_free(c_inclusive_ns_prefixes)
181
+
182
+ if byte_count < 0 or c_buffer is NULL:
183
+ if c_buffer is not NULL:
184
+ tree.xmlFree(c_buffer)
185
+ raise C14NError, "C14N failed"
186
+ try:
187
+ result = c_buffer[:byte_count]
188
+ finally:
189
+ tree.xmlFree(c_buffer)
190
+ return result
191
+
192
+ cdef _raiseSerialisationError(int error_result):
193
+ if error_result == xmlerror.XML_ERR_NO_MEMORY:
194
+ raise MemoryError()
195
+ message = ErrorTypes._getName(error_result)
196
+ if message is None:
197
+ message = f"unknown error {error_result}"
198
+ raise SerialisationError, message
199
+
200
+ ############################################################
201
+ # low-level serialisation functions
202
+
203
+ cdef void _writeDoctype(tree.xmlOutputBuffer* c_buffer,
204
+ const_xmlChar* c_doctype) noexcept nogil:
205
+ tree.xmlOutputBufferWrite(c_buffer, tree.xmlStrlen(c_doctype),
206
+ <const_char*>c_doctype)
207
+ tree.xmlOutputBufferWriteString(c_buffer, "\n")
208
+
209
+ cdef void _writeNodeToBuffer(tree.xmlOutputBuffer* c_buffer,
210
+ xmlNode* c_node, const_char* encoding, const_xmlChar* c_doctype,
211
+ int c_method, bint write_xml_declaration,
212
+ bint write_complete_document,
213
+ bint pretty_print, bint with_tail,
214
+ int standalone) noexcept nogil:
215
+ cdef xmlNode* c_nsdecl_node
216
+ cdef xmlDoc* c_doc = c_node.doc
217
+ if write_xml_declaration and c_method == OUTPUT_METHOD_XML:
218
+ _writeDeclarationToBuffer(c_buffer, c_doc.version, encoding, standalone)
219
+
220
+ # comments/processing instructions before doctype declaration
221
+ if write_complete_document and not c_buffer.error and c_doc.intSubset:
222
+ _writePrevSiblings(c_buffer, <xmlNode*>c_doc.intSubset, encoding, pretty_print)
223
+
224
+ if c_doctype:
225
+ _writeDoctype(c_buffer, c_doctype)
226
+ # write internal DTD subset, preceding PIs/comments, etc.
227
+ if write_complete_document and not c_buffer.error:
228
+ if c_doctype is NULL:
229
+ _writeDtdToBuffer(c_buffer, c_doc, c_node.name, c_method, encoding)
230
+ _writePrevSiblings(c_buffer, c_node, encoding, pretty_print)
231
+
232
+ c_nsdecl_node = c_node
233
+ if not c_node.parent or c_node.parent.type != tree.XML_DOCUMENT_NODE:
234
+ # copy the node and add namespaces from parents
235
+ # this is required to make libxml write them
236
+ c_nsdecl_node = tree.xmlCopyNode(c_node, 2)
237
+ if not c_nsdecl_node:
238
+ c_buffer.error = xmlerror.XML_ERR_NO_MEMORY
239
+ return
240
+ _copyParentNamespaces(c_node, c_nsdecl_node)
241
+
242
+ c_nsdecl_node.parent = c_node.parent
243
+ c_nsdecl_node.children = c_node.children
244
+ c_nsdecl_node.last = c_node.last
245
+
246
+ # write node
247
+ if c_method == OUTPUT_METHOD_HTML:
248
+ tree.htmlNodeDumpFormatOutput(
249
+ c_buffer, c_doc, c_nsdecl_node, encoding, pretty_print)
250
+ else:
251
+ tree.xmlNodeDumpOutput(
252
+ c_buffer, c_doc, c_nsdecl_node, 0, pretty_print, encoding)
253
+
254
+ if c_nsdecl_node is not c_node:
255
+ # clean up
256
+ c_nsdecl_node.children = c_nsdecl_node.last = NULL
257
+ tree.xmlFreeNode(c_nsdecl_node)
258
+
259
+ if c_buffer.error:
260
+ return
261
+
262
+ # write tail, trailing comments, etc.
263
+ if with_tail:
264
+ _writeTail(c_buffer, c_node, encoding, c_method, pretty_print)
265
+ if write_complete_document:
266
+ _writeNextSiblings(c_buffer, c_node, encoding, pretty_print)
267
+ if pretty_print:
268
+ tree.xmlOutputBufferWrite(c_buffer, 1, "\n")
269
+
270
+ cdef void _writeDeclarationToBuffer(tree.xmlOutputBuffer* c_buffer,
271
+ const_xmlChar* version, const_char* encoding,
272
+ int standalone) noexcept nogil:
273
+ if version is NULL:
274
+ version = <unsigned char*>"1.0"
275
+ tree.xmlOutputBufferWrite(c_buffer, 15, "<?xml version='")
276
+ tree.xmlOutputBufferWriteString(c_buffer, <const_char*>version)
277
+ tree.xmlOutputBufferWrite(c_buffer, 12, "' encoding='")
278
+ tree.xmlOutputBufferWriteString(c_buffer, encoding)
279
+ if standalone == 0:
280
+ tree.xmlOutputBufferWrite(c_buffer, 20, "' standalone='no'?>\n")
281
+ elif standalone == 1:
282
+ tree.xmlOutputBufferWrite(c_buffer, 21, "' standalone='yes'?>\n")
283
+ else:
284
+ tree.xmlOutputBufferWrite(c_buffer, 4, "'?>\n")
285
+
286
+ cdef void _writeDtdToBuffer(tree.xmlOutputBuffer* c_buffer,
287
+ xmlDoc* c_doc, const_xmlChar* c_root_name,
288
+ int c_method, const_char* encoding) noexcept nogil:
289
+ cdef tree.xmlDtd* c_dtd
290
+ cdef xmlNode* c_node
291
+ cdef char* quotechar
292
+ c_dtd = c_doc.intSubset
293
+ if not c_dtd or not c_dtd.name:
294
+ return
295
+
296
+ # Name in document type declaration must match the root element tag.
297
+ # For XML, case sensitive match, for HTML insensitive.
298
+ if c_method == OUTPUT_METHOD_HTML:
299
+ if tree.xmlStrcasecmp(c_root_name, c_dtd.name) != 0:
300
+ return
301
+ else:
302
+ if tree.xmlStrcmp(c_root_name, c_dtd.name) != 0:
303
+ return
304
+
305
+ tree.xmlOutputBufferWrite(c_buffer, 10, "<!DOCTYPE ")
306
+ tree.xmlOutputBufferWriteString(c_buffer, <const_char*>c_dtd.name)
307
+
308
+ cdef const_xmlChar* public_id = c_dtd.ExternalID
309
+ cdef const_xmlChar* sys_url = c_dtd.SystemID
310
+ if public_id and public_id[0] == b'\0':
311
+ public_id = NULL
312
+ if sys_url and sys_url[0] == b'\0':
313
+ sys_url = NULL
314
+
315
+ if public_id:
316
+ tree.xmlOutputBufferWrite(c_buffer, 9, ' PUBLIC "')
317
+ tree.xmlOutputBufferWriteString(c_buffer, <const_char*>public_id)
318
+ if sys_url:
319
+ tree.xmlOutputBufferWrite(c_buffer, 2, '" ')
320
+ else:
321
+ tree.xmlOutputBufferWrite(c_buffer, 1, '"')
322
+ elif sys_url:
323
+ tree.xmlOutputBufferWrite(c_buffer, 8, ' SYSTEM ')
324
+
325
+ if sys_url:
326
+ if tree.xmlStrchr(sys_url, b'"'):
327
+ quotechar = '\''
328
+ else:
329
+ quotechar = '"'
330
+ tree.xmlOutputBufferWrite(c_buffer, 1, quotechar)
331
+ tree.xmlOutputBufferWriteString(c_buffer, <const_char*>sys_url)
332
+ tree.xmlOutputBufferWrite(c_buffer, 1, quotechar)
333
+
334
+ if (not c_dtd.entities and not c_dtd.elements and
335
+ not c_dtd.attributes and not c_dtd.notations and
336
+ not c_dtd.pentities):
337
+ tree.xmlOutputBufferWrite(c_buffer, 2, '>\n')
338
+ return
339
+
340
+ tree.xmlOutputBufferWrite(c_buffer, 3, ' [\n')
341
+ if c_dtd.notations and not c_buffer.error:
342
+ c_buf = tree.xmlBufferCreate()
343
+ if not c_buf:
344
+ c_buffer.error = xmlerror.XML_ERR_NO_MEMORY
345
+ return
346
+ tree.xmlDumpNotationTable(c_buf, <tree.xmlNotationTable*>c_dtd.notations)
347
+ tree.xmlOutputBufferWrite(
348
+ c_buffer, tree.xmlBufferLength(c_buf),
349
+ <const_char*>tree.xmlBufferContent(c_buf))
350
+ tree.xmlBufferFree(c_buf)
351
+ c_node = c_dtd.children
352
+ while c_node and not c_buffer.error:
353
+ tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_node, 0, 0, encoding)
354
+ c_node = c_node.next
355
+ tree.xmlOutputBufferWrite(c_buffer, 3, "]>\n")
356
+
357
+ cdef void _writeTail(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node,
358
+ const_char* encoding, int c_method, bint pretty_print) noexcept nogil:
359
+ "Write the element tail."
360
+ c_node = c_node.next
361
+ while c_node and not c_buffer.error and c_node.type in (
362
+ tree.XML_TEXT_NODE, tree.XML_CDATA_SECTION_NODE):
363
+ if c_method == OUTPUT_METHOD_HTML:
364
+ tree.htmlNodeDumpFormatOutput(
365
+ c_buffer, c_node.doc, c_node, encoding, pretty_print)
366
+ else:
367
+ tree.xmlNodeDumpOutput(
368
+ c_buffer, c_node.doc, c_node, 0, pretty_print, encoding)
369
+ c_node = c_node.next
370
+
371
+ cdef void _writePrevSiblings(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node,
372
+ const_char* encoding, bint pretty_print) noexcept nogil:
373
+ cdef xmlNode* c_sibling
374
+ if c_node.parent and _isElement(c_node.parent):
375
+ return
376
+ # we are at a root node, so add PI and comment siblings
377
+ c_sibling = c_node
378
+ while c_sibling.prev and \
379
+ (c_sibling.prev.type == tree.XML_PI_NODE or
380
+ c_sibling.prev.type == tree.XML_COMMENT_NODE):
381
+ c_sibling = c_sibling.prev
382
+ while c_sibling is not c_node and not c_buffer.error:
383
+ tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_sibling, 0,
384
+ pretty_print, encoding)
385
+ if pretty_print:
386
+ tree.xmlOutputBufferWriteString(c_buffer, "\n")
387
+ c_sibling = c_sibling.next
388
+
389
+ cdef void _writeNextSiblings(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node,
390
+ const_char* encoding, bint pretty_print) noexcept nogil:
391
+ cdef xmlNode* c_sibling
392
+ if c_node.parent and _isElement(c_node.parent):
393
+ return
394
+ # we are at a root node, so add PI and comment siblings
395
+ c_sibling = c_node.next
396
+ while not c_buffer.error and c_sibling and \
397
+ (c_sibling.type == tree.XML_PI_NODE or
398
+ c_sibling.type == tree.XML_COMMENT_NODE):
399
+ if pretty_print:
400
+ tree.xmlOutputBufferWriteString(c_buffer, "\n")
401
+ tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_sibling, 0,
402
+ pretty_print, encoding)
403
+ c_sibling = c_sibling.next
404
+
405
+
406
+ # copied and adapted from libxml2 (xmlBufAttrSerializeTxtContent())
407
+ cdef _write_attr_string(tree.xmlOutputBuffer* buf, const char *string):
408
+ cdef const char *base
409
+ cdef const char *cur
410
+
411
+ if string == NULL:
412
+ return
413
+
414
+ base = cur = <const char*>string
415
+ while cur[0] != 0:
416
+ if cur[0] == b'\n':
417
+ if base != cur:
418
+ tree.xmlOutputBufferWrite(buf, cur - base, base)
419
+
420
+ tree.xmlOutputBufferWrite(buf, 5, "&#10;")
421
+ cur += 1
422
+ base = cur
423
+
424
+ elif cur[0] == b'\r':
425
+ if base != cur:
426
+ tree.xmlOutputBufferWrite(buf, cur - base, base)
427
+
428
+ tree.xmlOutputBufferWrite(buf, 5, "&#13;")
429
+ cur += 1
430
+ base = cur
431
+
432
+ elif cur[0] == b'\t':
433
+ if base != cur:
434
+ tree.xmlOutputBufferWrite(buf, cur - base, base)
435
+
436
+ tree.xmlOutputBufferWrite(buf, 4, "&#9;")
437
+ cur += 1
438
+ base = cur
439
+
440
+ elif cur[0] == b'"':
441
+ if base != cur:
442
+ tree.xmlOutputBufferWrite(buf, cur - base, base)
443
+
444
+ tree.xmlOutputBufferWrite(buf, 6, "&quot;")
445
+ cur += 1
446
+ base = cur
447
+
448
+ elif cur[0] == b'<':
449
+ if base != cur:
450
+ tree.xmlOutputBufferWrite(buf, cur - base, base)
451
+
452
+ tree.xmlOutputBufferWrite(buf, 4, "&lt;")
453
+ cur += 1
454
+ base = cur
455
+
456
+ elif cur[0] == b'>':
457
+ if base != cur:
458
+ tree.xmlOutputBufferWrite(buf, cur - base, base)
459
+
460
+ tree.xmlOutputBufferWrite(buf, 4, "&gt;")
461
+ cur += 1
462
+ base = cur
463
+ elif cur[0] == b'&':
464
+ if base != cur:
465
+ tree.xmlOutputBufferWrite(buf, cur - base, base)
466
+
467
+ tree.xmlOutputBufferWrite(buf, 5, "&amp;")
468
+ cur += 1
469
+ base = cur
470
+
471
+ else:
472
+ # Leave further encoding and escaping to the buffer encoder.
473
+ cur += 1
474
+
475
+ if base != cur:
476
+ tree.xmlOutputBufferWrite(buf, cur - base, base)
477
+
478
+
479
+ ############################################################
480
+ # output to file-like objects
481
+
482
+ cdef object io_open
483
+ from io import open as io_open
484
+
485
+ cdef object gzip
486
+ import gzip
487
+
488
+ cdef object getwriter
489
+ from codecs import getwriter
490
+ cdef object utf8_writer = getwriter('utf8')
491
+
492
+ cdef object contextmanager
493
+ from contextlib import contextmanager
494
+
495
+ cdef object _open_utf8_file
496
+
497
+ @contextmanager
498
+ def _open_utf8_file(file, compression=0):
499
+ file = _getFSPathOrObject(file)
500
+ if _isString(file):
501
+ if compression:
502
+ with gzip.GzipFile(file, mode='wb', compresslevel=compression) as zf:
503
+ yield utf8_writer(zf)
504
+ else:
505
+ with io_open(file, 'w', encoding='utf8') as f:
506
+ yield f
507
+ else:
508
+ if compression:
509
+ with gzip.GzipFile(fileobj=file, mode='wb', compresslevel=compression) as zf:
510
+ yield utf8_writer(zf)
511
+ else:
512
+ yield utf8_writer(file)
513
+
514
+
515
+ @cython.final
516
+ @cython.internal
517
+ cdef class _FilelikeWriter:
518
+ cdef object _filelike
519
+ cdef object _close_filelike
520
+ cdef _ExceptionContext _exc_context
521
+ cdef _ErrorLog error_log
522
+ def __cinit__(self, filelike, exc_context=None, compression=None, close=False):
523
+ if compression is not None and compression > 0:
524
+ filelike = GzipFile(
525
+ fileobj=filelike, mode='wb', compresslevel=compression)
526
+ self._close_filelike = filelike.close
527
+ elif close:
528
+ self._close_filelike = filelike.close
529
+ self._filelike = filelike
530
+ if exc_context is None:
531
+ self._exc_context = _ExceptionContext()
532
+ else:
533
+ self._exc_context = exc_context
534
+ self.error_log = _ErrorLog()
535
+
536
+ cdef tree.xmlOutputBuffer* _createOutputBuffer(
537
+ self, tree.xmlCharEncodingHandler* enchandler) except NULL:
538
+ cdef tree.xmlOutputBuffer* c_buffer
539
+ c_buffer = tree.xmlOutputBufferCreateIO(
540
+ <tree.xmlOutputWriteCallback>_writeFilelikeWriter, _closeFilelikeWriter,
541
+ <python.PyObject*>self, enchandler)
542
+ if c_buffer is NULL:
543
+ raise IOError, "Could not create I/O writer context."
544
+ return c_buffer
545
+
546
+ cdef int write(self, char* c_buffer, int size) noexcept:
547
+ try:
548
+ if self._filelike is None:
549
+ raise IOError, "File is already closed"
550
+ py_buffer = <bytes>c_buffer[:size]
551
+ self._filelike.write(py_buffer)
552
+ except:
553
+ size = -1
554
+ self._exc_context._store_raised()
555
+ finally:
556
+ return size # and swallow any further exceptions
557
+
558
+ cdef int close(self) noexcept:
559
+ retval = 0
560
+ try:
561
+ if self._close_filelike is not None:
562
+ self._close_filelike()
563
+ # we should not close the file here as we didn't open it
564
+ self._filelike = None
565
+ except:
566
+ retval = -1
567
+ self._exc_context._store_raised()
568
+ finally:
569
+ return retval # and swallow any further exceptions
570
+
571
+ cdef int _writeFilelikeWriter(void* ctxt, char* c_buffer, int length) noexcept:
572
+ return (<_FilelikeWriter>ctxt).write(c_buffer, length)
573
+
574
+ cdef int _closeFilelikeWriter(void* ctxt) noexcept:
575
+ return (<_FilelikeWriter>ctxt).close()
576
+
577
+ cdef _tofilelike(f, _Element element, encoding, doctype, method,
578
+ bint write_xml_declaration, bint write_doctype,
579
+ bint pretty_print, bint with_tail, int standalone,
580
+ int compression):
581
+ cdef _FilelikeWriter writer = None
582
+ cdef tree.xmlOutputBuffer* c_buffer
583
+ cdef tree.xmlCharEncodingHandler* enchandler
584
+ cdef const_char* c_enc
585
+ cdef const_xmlChar* c_doctype
586
+ cdef int error_result
587
+
588
+ c_method = _findOutputMethod(method)
589
+ if c_method == OUTPUT_METHOD_TEXT:
590
+ data = _textToString(element._c_node, encoding, with_tail)
591
+ if compression:
592
+ bytes_out = BytesIO()
593
+ with GzipFile(fileobj=bytes_out, mode='wb', compresslevel=compression) as gzip_file:
594
+ gzip_file.write(data)
595
+ data = bytes_out.getvalue()
596
+ f = _getFSPathOrObject(f)
597
+ if _isString(f):
598
+ filename8 = _encodeFilename(f)
599
+ with open(filename8, 'wb') as f:
600
+ f.write(data)
601
+ else:
602
+ f.write(data)
603
+ return
604
+
605
+ if encoding is None:
606
+ c_enc = NULL
607
+ else:
608
+ encoding = _utf8(encoding)
609
+ c_enc = _cstr(encoding)
610
+ if doctype is None:
611
+ c_doctype = NULL
612
+ else:
613
+ doctype = _utf8(doctype)
614
+ c_doctype = _xcstr(doctype)
615
+
616
+ writer = _create_output_buffer(f, c_enc, compression, &c_buffer, close=False)
617
+ if writer is None:
618
+ with nogil:
619
+ error_result = _serialise_node(
620
+ c_buffer, c_doctype, c_enc, element._c_node, c_method,
621
+ write_xml_declaration, write_doctype, pretty_print, with_tail, standalone)
622
+ else:
623
+ error_result = _serialise_node(
624
+ c_buffer, c_doctype, c_enc, element._c_node, c_method,
625
+ write_xml_declaration, write_doctype, pretty_print, with_tail, standalone)
626
+
627
+ if writer is not None:
628
+ writer._exc_context._raise_if_stored()
629
+ if error_result != xmlerror.XML_ERR_OK:
630
+ _raiseSerialisationError(error_result)
631
+
632
+
633
+ cdef int _serialise_node(tree.xmlOutputBuffer* c_buffer, const_xmlChar* c_doctype,
634
+ const_char* c_enc, xmlNode* c_node, int c_method,
635
+ bint write_xml_declaration, bint write_doctype, bint pretty_print,
636
+ bint with_tail, int standalone) noexcept nogil:
637
+ _writeNodeToBuffer(
638
+ c_buffer, c_node, c_enc, c_doctype, c_method,
639
+ write_xml_declaration, write_doctype, pretty_print, with_tail, standalone)
640
+ error_result = c_buffer.error
641
+ if error_result == xmlerror.XML_ERR_OK:
642
+ error_result = tree.xmlOutputBufferClose(c_buffer)
643
+ if error_result != -1:
644
+ error_result = xmlerror.XML_ERR_OK
645
+ else:
646
+ tree.xmlOutputBufferClose(c_buffer)
647
+ return error_result
648
+
649
+
650
+ cdef _FilelikeWriter _create_output_buffer(
651
+ f, const_char* c_enc, int c_compression,
652
+ tree.xmlOutputBuffer** c_buffer_ret, bint close):
653
+ cdef tree.xmlOutputBuffer* c_buffer
654
+ cdef _FilelikeWriter writer
655
+ cdef bytes filename8
656
+ enchandler = tree.xmlFindCharEncodingHandler(c_enc)
657
+ if enchandler is NULL:
658
+ raise LookupError(
659
+ f"unknown encoding: '{c_enc.decode('UTF-8') if c_enc is not NULL else u''}'")
660
+ try:
661
+ f = _getFSPathOrObject(f)
662
+ if _isString(f):
663
+ filename8 = _encodeFilename(f)
664
+ if b'%' in filename8 and (
665
+ # Exclude absolute Windows paths and file:// URLs.
666
+ _isFilePath(<const xmlChar*>filename8) not in (NO_FILE_PATH, ABS_WIN_FILE_PATH)
667
+ or filename8[:7].lower() == b'file://'):
668
+ # A file path (not a URL) containing the '%' URL escape character.
669
+ # libxml2 uses URL-unescaping on these, so escape the path before passing it in.
670
+ filename8 = filename8.replace(b'%', b'%25')
671
+ c_buffer = tree.xmlOutputBufferCreateFilename(
672
+ _cstr(filename8), enchandler, c_compression)
673
+ if c_buffer is NULL:
674
+ python.PyErr_SetFromErrno(IOError) # raises IOError
675
+ writer = None
676
+ elif hasattr(f, 'write'):
677
+ writer = _FilelikeWriter(f, compression=c_compression, close=close)
678
+ c_buffer = writer._createOutputBuffer(enchandler)
679
+ else:
680
+ raise TypeError(
681
+ f"File or filename expected, got '{python._fqtypename(f).decode('UTF-8')}'")
682
+ except:
683
+ tree.xmlCharEncCloseFunc(enchandler)
684
+ raise
685
+ c_buffer_ret[0] = c_buffer
686
+ return writer
687
+
688
+ cdef xmlChar **_convert_ns_prefixes(tree.xmlDict* c_dict, ns_prefixes) except NULL:
689
+ cdef size_t i, num_ns_prefixes = len(ns_prefixes)
690
+ # Need to allocate one extra memory block to handle last NULL entry
691
+ c_ns_prefixes = <xmlChar **>python.lxml_malloc(num_ns_prefixes + 1, sizeof(xmlChar*))
692
+ if not c_ns_prefixes:
693
+ raise MemoryError()
694
+ i = 0
695
+ try:
696
+ for prefix in ns_prefixes:
697
+ prefix_utf = _utf8(prefix)
698
+ c_prefix = tree.xmlDictExists(c_dict, _xcstr(prefix_utf), len(prefix_utf))
699
+ if c_prefix:
700
+ # unknown prefixes do not need to get serialised
701
+ c_ns_prefixes[i] = <xmlChar*>c_prefix
702
+ i += 1
703
+ except:
704
+ python.lxml_free(c_ns_prefixes)
705
+ raise
706
+
707
+ c_ns_prefixes[i] = NULL # append end marker
708
+ return c_ns_prefixes
709
+
710
+ cdef _tofilelikeC14N(f, _Element element, bint exclusive, bint with_comments,
711
+ int compression, inclusive_ns_prefixes):
712
+ cdef _FilelikeWriter writer = None
713
+ cdef tree.xmlOutputBuffer* c_buffer
714
+ cdef xmlChar **c_inclusive_ns_prefixes = NULL
715
+ cdef char* c_filename
716
+ cdef xmlDoc* c_base_doc
717
+ cdef xmlDoc* c_doc
718
+ cdef int bytes_count, error = 0
719
+
720
+ c_base_doc = element._c_node.doc
721
+ c_doc = _fakeRootDoc(c_base_doc, element._c_node)
722
+ try:
723
+ c_inclusive_ns_prefixes = (
724
+ _convert_ns_prefixes(c_doc.dict, inclusive_ns_prefixes)
725
+ if inclusive_ns_prefixes else NULL)
726
+
727
+ f = _getFSPathOrObject(f)
728
+ if _isString(f):
729
+ filename8 = _encodeFilename(f)
730
+ c_filename = _cstr(filename8)
731
+ with nogil:
732
+ error = c14n.xmlC14NDocSave(
733
+ c_doc, NULL, exclusive, c_inclusive_ns_prefixes,
734
+ with_comments, c_filename, compression)
735
+ elif hasattr(f, 'write'):
736
+ writer = _FilelikeWriter(f, compression=compression)
737
+ c_buffer = writer._createOutputBuffer(NULL)
738
+ try:
739
+ with writer.error_log:
740
+ bytes_count = c14n.xmlC14NDocSaveTo(
741
+ c_doc, NULL, exclusive, c_inclusive_ns_prefixes,
742
+ with_comments, c_buffer)
743
+ finally:
744
+ error = tree.xmlOutputBufferClose(c_buffer)
745
+ if bytes_count < 0:
746
+ error = bytes_count
747
+ elif error != -1:
748
+ error = xmlerror.XML_ERR_OK
749
+ else:
750
+ raise TypeError(f"File or filename expected, got '{python._fqtypename(f).decode('UTF-8')}'")
751
+ finally:
752
+ _destroyFakeDoc(c_base_doc, c_doc)
753
+ if c_inclusive_ns_prefixes is not NULL:
754
+ python.lxml_free(c_inclusive_ns_prefixes)
755
+
756
+ if writer is not None:
757
+ writer._exc_context._raise_if_stored()
758
+
759
+ if error < 0:
760
+ message = "C14N failed"
761
+ if writer is not None:
762
+ errors = writer.error_log
763
+ if len(errors):
764
+ message = errors[0].message
765
+ raise C14NError(message)
766
+
767
+
768
+ # C14N 2.0
769
+
770
+ def canonicalize(xml_data=None, *, out=None, from_file=None, **options):
771
+ """Convert XML to its C14N 2.0 serialised form.
772
+
773
+ If *out* is provided, it must be a file or file-like object that receives
774
+ the serialised canonical XML output (text, not bytes) through its ``.write()``
775
+ method. To write to a file, open it in text mode with encoding "utf-8".
776
+ If *out* is not provided, this function returns the output as text string.
777
+
778
+ Either *xml_data* (an XML string, tree or Element) or *file*
779
+ (a file path or file-like object) must be provided as input.
780
+
781
+ The configuration options are the same as for the ``C14NWriterTarget``.
782
+ """
783
+ if xml_data is None and from_file is None:
784
+ raise ValueError("Either 'xml_data' or 'from_file' must be provided as input")
785
+
786
+ sio = None
787
+ if out is None:
788
+ sio = out = StringIO()
789
+
790
+ target = C14NWriterTarget(out.write, **options)
791
+
792
+ if xml_data is not None and not isinstance(xml_data, basestring):
793
+ _tree_to_target(xml_data, target)
794
+ return sio.getvalue() if sio is not None else None
795
+
796
+ cdef _FeedParser parser = XMLParser(
797
+ target=target,
798
+ attribute_defaults=True,
799
+ collect_ids=False,
800
+ )
801
+
802
+ if xml_data is not None:
803
+ parser.feed(xml_data)
804
+ parser.close()
805
+ elif from_file is not None:
806
+ try:
807
+ _parseDocument(from_file, parser, base_url=None)
808
+ except _TargetParserResult:
809
+ pass
810
+
811
+ return sio.getvalue() if sio is not None else None
812
+
813
+
814
+ cdef _tree_to_target(element, target):
815
+ for event, elem in iterwalk(element, events=('start', 'end', 'start-ns', 'comment', 'pi')):
816
+ text = None
817
+ if event == 'start':
818
+ target.start(elem.tag, elem.attrib)
819
+ text = elem.text
820
+ elif event == 'end':
821
+ target.end(elem.tag)
822
+ text = elem.tail
823
+ elif event == 'start-ns':
824
+ target.start_ns(*elem)
825
+ continue
826
+ elif event == 'comment':
827
+ target.comment(elem.text)
828
+ text = elem.tail
829
+ elif event == 'pi':
830
+ target.pi(elem.target, elem.text)
831
+ text = elem.tail
832
+ if text:
833
+ target.data(text)
834
+ return target.close()
835
+
836
+
837
+ cdef object _looks_like_prefix_name = re.compile(r'^\w+:\w+$', re.UNICODE).match
838
+
839
+
840
+ cdef class C14NWriterTarget:
841
+ """
842
+ Canonicalization writer target for the XMLParser.
843
+
844
+ Serialises parse events to XML C14N 2.0.
845
+
846
+ Configuration options:
847
+
848
+ - *with_comments*: set to true to include comments
849
+ - *strip_text*: set to true to strip whitespace before and after text content
850
+ - *rewrite_prefixes*: set to true to replace namespace prefixes by "n{number}"
851
+ - *qname_aware_tags*: a set of qname aware tag names in which prefixes
852
+ should be replaced in text content
853
+ - *qname_aware_attrs*: a set of qname aware attribute names in which prefixes
854
+ should be replaced in text content
855
+ - *exclude_attrs*: a set of attribute names that should not be serialised
856
+ - *exclude_tags*: a set of tag names that should not be serialised
857
+ """
858
+ cdef object _write
859
+ cdef list _data
860
+ cdef set _qname_aware_tags
861
+ cdef object _find_qname_aware_attrs
862
+ cdef list _declared_ns_stack
863
+ cdef list _ns_stack
864
+ cdef dict _prefix_map
865
+ cdef list _preserve_space
866
+ cdef tuple _pending_start
867
+ cdef set _exclude_tags
868
+ cdef set _exclude_attrs
869
+ cdef Py_ssize_t _ignored_depth
870
+ cdef bint _with_comments
871
+ cdef bint _strip_text
872
+ cdef bint _rewrite_prefixes
873
+ cdef bint _root_seen
874
+ cdef bint _root_done
875
+
876
+ def __init__(self, write, *,
877
+ with_comments=False, strip_text=False, rewrite_prefixes=False,
878
+ qname_aware_tags=None, qname_aware_attrs=None,
879
+ exclude_attrs=None, exclude_tags=None):
880
+ self._write = write
881
+ self._data = []
882
+ self._with_comments = with_comments
883
+ self._strip_text = strip_text
884
+ self._exclude_attrs = set(exclude_attrs) if exclude_attrs else None
885
+ self._exclude_tags = set(exclude_tags) if exclude_tags else None
886
+
887
+ self._rewrite_prefixes = rewrite_prefixes
888
+ if qname_aware_tags:
889
+ self._qname_aware_tags = set(qname_aware_tags)
890
+ else:
891
+ self._qname_aware_tags = None
892
+ if qname_aware_attrs:
893
+ self._find_qname_aware_attrs = set(qname_aware_attrs).intersection
894
+ else:
895
+ self._find_qname_aware_attrs = None
896
+
897
+ # Stack with globally and newly declared namespaces as (uri, prefix) pairs.
898
+ self._declared_ns_stack = [[
899
+ ("http://www.w3.org/XML/1998/namespace", "xml"),
900
+ ]]
901
+ # Stack with user declared namespace prefixes as (uri, prefix) pairs.
902
+ self._ns_stack = []
903
+ if not rewrite_prefixes:
904
+ self._ns_stack.append(_DEFAULT_NAMESPACE_PREFIXES_ITEMS)
905
+ self._ns_stack.append([])
906
+ self._prefix_map = {}
907
+ self._preserve_space = [False]
908
+ self._pending_start = None
909
+ self._ignored_depth = 0
910
+ self._root_seen = False
911
+ self._root_done = False
912
+
913
+ def _iter_namespaces(self, ns_stack):
914
+ for namespaces in reversed(ns_stack):
915
+ if namespaces: # almost no element declares new namespaces
916
+ yield from namespaces
917
+
918
+ cdef _resolve_prefix_name(self, prefixed_name):
919
+ prefix, name = prefixed_name.split(':', 1)
920
+ for uri, p in self._iter_namespaces(self._ns_stack):
921
+ if p == prefix:
922
+ return f'{{{uri}}}{name}'
923
+ raise ValueError(f'Prefix {prefix} of QName "{prefixed_name}" is not declared in scope')
924
+
925
+ cdef _qname(self, qname, uri=None):
926
+ if uri is None:
927
+ uri, tag = qname[1:].rsplit('}', 1) if qname[:1] == '{' else ('', qname)
928
+ else:
929
+ tag = qname
930
+
931
+ prefixes_seen = set()
932
+ for u, prefix in self._iter_namespaces(self._declared_ns_stack):
933
+ if u == uri and prefix not in prefixes_seen:
934
+ return f'{prefix}:{tag}' if prefix else tag, tag, uri
935
+ prefixes_seen.add(prefix)
936
+
937
+ # Not declared yet => add new declaration.
938
+ if self._rewrite_prefixes:
939
+ if uri in self._prefix_map:
940
+ prefix = self._prefix_map[uri]
941
+ else:
942
+ prefix = self._prefix_map[uri] = f'n{len(self._prefix_map)}'
943
+ self._declared_ns_stack[-1].append((uri, prefix))
944
+ return f'{prefix}:{tag}', tag, uri
945
+
946
+ if not uri and '' not in prefixes_seen:
947
+ # No default namespace declared => no prefix needed.
948
+ return tag, tag, uri
949
+
950
+ for u, prefix in self._iter_namespaces(self._ns_stack):
951
+ if u == uri:
952
+ self._declared_ns_stack[-1].append((uri, prefix))
953
+ return f'{prefix}:{tag}' if prefix else tag, tag, uri
954
+
955
+ if not uri:
956
+ # As soon as a default namespace is defined,
957
+ # anything that has no namespace (and thus, no prefix) goes there.
958
+ return tag, tag, uri
959
+
960
+ raise ValueError(f'Namespace "{uri}" of name "{tag}" is not declared in scope')
961
+
962
+ def data(self, data):
963
+ if not self._ignored_depth:
964
+ self._data.append(data)
965
+
966
+ cdef _flush(self):
967
+ cdef unicode data = ''.join(self._data)
968
+ del self._data[:]
969
+ if self._strip_text and not self._preserve_space[-1]:
970
+ data = data.strip()
971
+ if self._pending_start is not None:
972
+ (tag, attrs, new_namespaces), self._pending_start = self._pending_start, None
973
+ qname_text = data if ':' in data and _looks_like_prefix_name(data) else None
974
+ self._start(tag, attrs, new_namespaces, qname_text)
975
+ if qname_text is not None:
976
+ return
977
+ if data and self._root_seen:
978
+ self._write(_escape_cdata_c14n(data))
979
+
980
+ def start_ns(self, prefix, uri):
981
+ if self._ignored_depth:
982
+ return
983
+ # we may have to resolve qnames in text content
984
+ if self._data:
985
+ self._flush()
986
+ self._ns_stack[-1].append((uri, prefix))
987
+
988
+ def start(self, tag, attrs):
989
+ if self._exclude_tags is not None and (
990
+ self._ignored_depth or tag in self._exclude_tags):
991
+ self._ignored_depth += 1
992
+ return
993
+ if self._data:
994
+ self._flush()
995
+
996
+ new_namespaces = []
997
+ self._declared_ns_stack.append(new_namespaces)
998
+
999
+ if self._qname_aware_tags is not None and tag in self._qname_aware_tags:
1000
+ # Need to parse text first to see if it requires a prefix declaration.
1001
+ self._pending_start = (tag, attrs, new_namespaces)
1002
+ return
1003
+ self._start(tag, attrs, new_namespaces)
1004
+
1005
+ cdef _start(self, tag, attrs, new_namespaces, qname_text=None):
1006
+ if self._exclude_attrs is not None and attrs:
1007
+ attrs = {k: v for k, v in attrs.items() if k not in self._exclude_attrs}
1008
+
1009
+ qnames = {tag, *attrs}
1010
+ resolved_names = {}
1011
+
1012
+ # Resolve prefixes in attribute and tag text.
1013
+ if qname_text is not None:
1014
+ qname = resolved_names[qname_text] = self._resolve_prefix_name(qname_text)
1015
+ qnames.add(qname)
1016
+ if self._find_qname_aware_attrs is not None and attrs:
1017
+ qattrs = self._find_qname_aware_attrs(attrs)
1018
+ if qattrs:
1019
+ for attr_name in qattrs:
1020
+ value = attrs[attr_name]
1021
+ if _looks_like_prefix_name(value):
1022
+ qname = resolved_names[value] = self._resolve_prefix_name(value)
1023
+ qnames.add(qname)
1024
+ else:
1025
+ qattrs = None
1026
+ else:
1027
+ qattrs = None
1028
+
1029
+ # Assign prefixes in lexicographical order of used URIs.
1030
+ parsed_qnames = {n: self._qname(n) for n in sorted(
1031
+ qnames, key=lambda n: n.split('}', 1))}
1032
+
1033
+ # Write namespace declarations in prefix order ...
1034
+ if new_namespaces:
1035
+ attr_list = [
1036
+ ('xmlns:' + prefix if prefix else 'xmlns', uri)
1037
+ for uri, prefix in new_namespaces
1038
+ ]
1039
+ attr_list.sort()
1040
+ else:
1041
+ # almost always empty
1042
+ attr_list = []
1043
+
1044
+ # ... followed by attributes in URI+name order
1045
+ if attrs:
1046
+ for k, v in sorted(attrs.items()):
1047
+ if qattrs is not None and k in qattrs and v in resolved_names:
1048
+ v = parsed_qnames[resolved_names[v]][0]
1049
+ attr_qname, attr_name, uri = parsed_qnames[k]
1050
+ # No prefix for attributes in default ('') namespace.
1051
+ attr_list.append((attr_qname if uri else attr_name, v))
1052
+
1053
+ # Honour xml:space attributes.
1054
+ space_behaviour = attrs.get('{http://www.w3.org/XML/1998/namespace}space')
1055
+ self._preserve_space.append(
1056
+ space_behaviour == 'preserve' if space_behaviour
1057
+ else self._preserve_space[-1])
1058
+
1059
+ # Write the tag.
1060
+ write = self._write
1061
+ write('<' + parsed_qnames[tag][0])
1062
+ if attr_list:
1063
+ write(''.join([f' {k}="{_escape_attrib_c14n(v)}"' for k, v in attr_list]))
1064
+ write('>')
1065
+
1066
+ # Write the resolved qname text content.
1067
+ if qname_text is not None:
1068
+ write(_escape_cdata_c14n(parsed_qnames[resolved_names[qname_text]][0]))
1069
+
1070
+ self._root_seen = True
1071
+ self._ns_stack.append([])
1072
+
1073
+ def end(self, tag):
1074
+ if self._ignored_depth:
1075
+ self._ignored_depth -= 1
1076
+ return
1077
+ if self._data:
1078
+ self._flush()
1079
+ self._write(f'</{self._qname(tag)[0]}>')
1080
+ self._preserve_space.pop()
1081
+ self._root_done = len(self._preserve_space) == 1
1082
+ self._declared_ns_stack.pop()
1083
+ self._ns_stack.pop()
1084
+
1085
+ def comment(self, text):
1086
+ if not self._with_comments:
1087
+ return
1088
+ if self._ignored_depth:
1089
+ return
1090
+ if self._root_done:
1091
+ self._write('\n')
1092
+ elif self._root_seen and self._data:
1093
+ self._flush()
1094
+ self._write(f'<!--{_escape_cdata_c14n(text)}-->')
1095
+ if not self._root_seen:
1096
+ self._write('\n')
1097
+
1098
+ def pi(self, target, data):
1099
+ if self._ignored_depth:
1100
+ return
1101
+ if self._root_done:
1102
+ self._write('\n')
1103
+ elif self._root_seen and self._data:
1104
+ self._flush()
1105
+ self._write(
1106
+ f'<?{target} {_escape_cdata_c14n(data)}?>' if data else f'<?{target}?>')
1107
+ if not self._root_seen:
1108
+ self._write('\n')
1109
+
1110
+ def close(self):
1111
+ return None
1112
+
1113
+
1114
+ cdef _raise_serialization_error(text):
1115
+ raise TypeError("cannot serialize %r (type %s)" % (text, type(text).__name__))
1116
+
1117
+
1118
+ cdef unicode _escape_cdata_c14n(stext):
1119
+ # escape character data
1120
+ cdef unicode text
1121
+ cdef Py_UCS4 ch
1122
+ cdef Py_ssize_t start = 0, pos = 0
1123
+ cdef list substrings = None
1124
+ try:
1125
+ text = unicode(stext)
1126
+ except (TypeError, AttributeError):
1127
+ return _raise_serialization_error(stext)
1128
+
1129
+ for pos, ch in enumerate(text):
1130
+ if ch == '&':
1131
+ escape = '&amp;'
1132
+ elif ch == '<':
1133
+ escape = '&lt;'
1134
+ elif ch == '>':
1135
+ escape = '&gt;'
1136
+ elif ch == '\r':
1137
+ escape = '&#xD;'
1138
+ else:
1139
+ continue
1140
+
1141
+ if substrings is None:
1142
+ substrings = []
1143
+ if pos > start:
1144
+ substrings.append(text[start:pos])
1145
+ substrings.append(escape)
1146
+ start = pos + 1
1147
+
1148
+ if substrings is None:
1149
+ return text
1150
+ if pos >= start:
1151
+ substrings.append(text[start:pos+1])
1152
+ return ''.join(substrings)
1153
+
1154
+
1155
+ cdef unicode _escape_attrib_c14n(stext):
1156
+ # escape attribute value
1157
+ cdef unicode text
1158
+ cdef Py_UCS4 ch
1159
+ cdef Py_ssize_t start = 0, pos = 0
1160
+ cdef list substrings = None
1161
+ try:
1162
+ text = unicode(stext)
1163
+ except (TypeError, AttributeError):
1164
+ return _raise_serialization_error(stext)
1165
+
1166
+ for pos, ch in enumerate(text):
1167
+ if ch == '&':
1168
+ escape = '&amp;'
1169
+ elif ch == '<':
1170
+ escape = '&lt;'
1171
+ elif ch == '"':
1172
+ escape = '&quot;'
1173
+ elif ch == '\t':
1174
+ escape = '&#x9;'
1175
+ elif ch == '\n':
1176
+ escape = '&#xA;'
1177
+ elif ch == '\r':
1178
+ escape = '&#xD;'
1179
+ else:
1180
+ continue
1181
+
1182
+ if substrings is None:
1183
+ substrings = []
1184
+ if pos > start:
1185
+ substrings.append(text[start:pos])
1186
+ substrings.append(escape)
1187
+ start = pos + 1
1188
+
1189
+ if substrings is None:
1190
+ return text
1191
+ if pos >= start:
1192
+ substrings.append(text[start:pos+1])
1193
+ return ''.join(substrings)
1194
+
1195
+
1196
+ # incremental serialisation
1197
+
1198
+ cdef class xmlfile:
1199
+ """xmlfile(self, output_file, encoding=None, compression=None, close=False, buffered=True)
1200
+
1201
+ A simple mechanism for incremental XML serialisation.
1202
+
1203
+ Usage example::
1204
+
1205
+ with xmlfile("somefile.xml", encoding='utf-8') as xf:
1206
+ xf.write_declaration(standalone=True)
1207
+ xf.write_doctype('<!DOCTYPE root SYSTEM "some.dtd">')
1208
+
1209
+ # generate an element (the root element)
1210
+ with xf.element('root'):
1211
+ # write a complete Element into the open root element
1212
+ xf.write(etree.Element('test'))
1213
+
1214
+ # generate and write more Elements, e.g. through iterparse
1215
+ for element in generate_some_elements():
1216
+ # serialise generated elements into the XML file
1217
+ xf.write(element)
1218
+
1219
+ # or write multiple Elements or strings at once
1220
+ xf.write(etree.Element('start'), "text", etree.Element('end'))
1221
+
1222
+ If 'output_file' is a file(-like) object, passing ``close=True`` will
1223
+ close it when exiting the context manager. By default, it is left
1224
+ to the owner to do that. When a file path is used, lxml will take care
1225
+ of opening and closing the file itself. Also, when a compression level
1226
+ is set, lxml will deliberately close the file to make sure all data gets
1227
+ compressed and written.
1228
+
1229
+ Setting ``buffered=False`` will flush the output after each operation,
1230
+ such as opening or closing an ``xf.element()`` block or calling
1231
+ ``xf.write()``. Alternatively, calling ``xf.flush()`` can be used to
1232
+ explicitly flush any pending output when buffering is enabled.
1233
+ """
1234
+ cdef object output_file
1235
+ cdef bytes encoding
1236
+ cdef _IncrementalFileWriter writer
1237
+ cdef _AsyncIncrementalFileWriter async_writer
1238
+ cdef int compresslevel
1239
+ cdef bint close
1240
+ cdef bint buffered
1241
+ cdef int method
1242
+
1243
+ def __init__(self, output_file not None, encoding=None, compression=None,
1244
+ close=False, buffered=True):
1245
+ self.output_file = output_file
1246
+ self.encoding = _utf8orNone(encoding)
1247
+ self.compresslevel = compression or 0
1248
+ self.close = close
1249
+ self.buffered = buffered
1250
+ self.method = OUTPUT_METHOD_XML
1251
+
1252
+ def __enter__(self):
1253
+ assert self.output_file is not None
1254
+ self.writer = _IncrementalFileWriter(
1255
+ self.output_file, self.encoding, self.compresslevel,
1256
+ self.close, self.buffered, self.method)
1257
+ return self.writer
1258
+
1259
+ def __exit__(self, exc_type, exc_val, exc_tb):
1260
+ if self.writer is not None:
1261
+ old_writer, self.writer = self.writer, None
1262
+ raise_on_error = exc_type is None
1263
+ old_writer._close(raise_on_error)
1264
+ if self.close:
1265
+ self.output_file = None
1266
+
1267
+ async def __aenter__(self):
1268
+ assert self.output_file is not None
1269
+ if isinstance(self.output_file, basestring):
1270
+ raise TypeError("Cannot asynchronously write to a plain file")
1271
+ if not hasattr(self.output_file, 'write'):
1272
+ raise TypeError("Output file needs an async .write() method")
1273
+ self.async_writer = _AsyncIncrementalFileWriter(
1274
+ self.output_file, self.encoding, self.compresslevel,
1275
+ self.close, self.buffered, self.method)
1276
+ return self.async_writer
1277
+
1278
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
1279
+ if self.async_writer is not None:
1280
+ old_writer, self.async_writer = self.async_writer, None
1281
+ raise_on_error = exc_type is None
1282
+ await old_writer._close(raise_on_error)
1283
+ if self.close:
1284
+ self.output_file = None
1285
+
1286
+
1287
+ cdef class htmlfile(xmlfile):
1288
+ """htmlfile(self, output_file, encoding=None, compression=None, close=False, buffered=True)
1289
+
1290
+ A simple mechanism for incremental HTML serialisation. Works the same as
1291
+ xmlfile.
1292
+ """
1293
+ def __init__(self, *args, **kwargs):
1294
+ super().__init__(*args, **kwargs)
1295
+ self.method = OUTPUT_METHOD_HTML
1296
+
1297
+
1298
+ cdef enum _IncrementalFileWriterStatus:
1299
+ WRITER_STARTING = 0
1300
+ WRITER_DECL_WRITTEN = 1
1301
+ WRITER_DTD_WRITTEN = 2
1302
+ WRITER_IN_ELEMENT = 3
1303
+ WRITER_FINISHED = 4
1304
+
1305
+
1306
+ @cython.final
1307
+ @cython.internal
1308
+ cdef class _IncrementalFileWriter:
1309
+ cdef tree.xmlOutputBuffer* _c_out
1310
+ cdef bytes _encoding
1311
+ cdef const_char* _c_encoding
1312
+ cdef _FilelikeWriter _target
1313
+ cdef list _element_stack
1314
+ cdef int _status
1315
+ cdef int _method
1316
+ cdef bint _buffered
1317
+
1318
+ def __cinit__(self, outfile, bytes encoding, int compresslevel, bint close,
1319
+ bint buffered, int method):
1320
+ self._status = WRITER_STARTING
1321
+ self._element_stack = []
1322
+ if encoding is None:
1323
+ # We always need a document encoding to make the attribute serialisation
1324
+ # of libxml2 identical to ours.
1325
+ encoding = b'ASCII'
1326
+ self._encoding = encoding
1327
+ self._c_encoding = _cstr(encoding)
1328
+ self._buffered = buffered
1329
+ self._target = _create_output_buffer(
1330
+ outfile, self._c_encoding, compresslevel, &self._c_out, close)
1331
+ self._method = method
1332
+
1333
+ def __dealloc__(self):
1334
+ if self._c_out is not NULL:
1335
+ tree.xmlOutputBufferClose(self._c_out)
1336
+
1337
+ def write_declaration(self, version=None, standalone=None, doctype=None):
1338
+ """write_declaration(self, version=None, standalone=None, doctype=None)
1339
+
1340
+ Write an XML declaration and (optionally) a doctype into the file.
1341
+ """
1342
+ assert self._c_out is not NULL
1343
+ cdef const_xmlChar* c_version
1344
+ cdef int c_standalone
1345
+ if self._method != OUTPUT_METHOD_XML:
1346
+ raise LxmlSyntaxError("only XML documents have declarations")
1347
+ if self._status >= WRITER_DECL_WRITTEN:
1348
+ raise LxmlSyntaxError("XML declaration already written")
1349
+ version = _utf8orNone(version)
1350
+ c_version = _xcstr(version) if version is not None else NULL
1351
+ doctype = _utf8orNone(doctype)
1352
+ if standalone is None:
1353
+ c_standalone = -1
1354
+ else:
1355
+ c_standalone = 1 if standalone else 0
1356
+ _writeDeclarationToBuffer(self._c_out, c_version, self._c_encoding, c_standalone)
1357
+ if doctype is not None:
1358
+ _writeDoctype(self._c_out, _xcstr(doctype))
1359
+ self._status = WRITER_DTD_WRITTEN
1360
+ else:
1361
+ self._status = WRITER_DECL_WRITTEN
1362
+ if not self._buffered:
1363
+ tree.xmlOutputBufferFlush(self._c_out)
1364
+ self._handle_error(self._c_out.error)
1365
+
1366
+ def write_doctype(self, doctype):
1367
+ """write_doctype(self, doctype)
1368
+
1369
+ Writes the given doctype declaration verbatimly into the file.
1370
+ """
1371
+ assert self._c_out is not NULL
1372
+ if doctype is None:
1373
+ return
1374
+ if self._status >= WRITER_DTD_WRITTEN:
1375
+ raise LxmlSyntaxError("DOCTYPE already written or cannot write it here")
1376
+ doctype = _utf8(doctype)
1377
+ _writeDoctype(self._c_out, _xcstr(doctype))
1378
+ self._status = WRITER_DTD_WRITTEN
1379
+ if not self._buffered:
1380
+ tree.xmlOutputBufferFlush(self._c_out)
1381
+ self._handle_error(self._c_out.error)
1382
+
1383
+ def method(self, method):
1384
+ """method(self, method)
1385
+
1386
+ Returns a context manager that overrides and restores the output method.
1387
+ method is one of (None, 'xml', 'html') where None means 'xml'.
1388
+ """
1389
+ assert self._c_out is not NULL
1390
+ c_method = self._method if method is None else _findOutputMethod(method)
1391
+ return _MethodChanger(self, c_method)
1392
+
1393
+ def element(self, tag, attrib=None, nsmap=None, method=None, **_extra):
1394
+ """element(self, tag, attrib=None, nsmap=None, method, **_extra)
1395
+
1396
+ Returns a context manager that writes an opening and closing tag.
1397
+ method is one of (None, 'xml', 'html') where None means 'xml'.
1398
+ """
1399
+ assert self._c_out is not NULL
1400
+ attributes = []
1401
+ if attrib is not None:
1402
+ for name, value in _iter_attrib(attrib):
1403
+ if name not in _extra:
1404
+ ns, name = _getNsTag(name)
1405
+ attributes.append((ns, name, _utf8(value)))
1406
+ if _extra:
1407
+ for name, value in _extra.iteritems():
1408
+ ns, name = _getNsTag(name)
1409
+ attributes.append((ns, name, _utf8(value)))
1410
+ reversed_nsmap = {}
1411
+ if nsmap:
1412
+ for prefix, ns in nsmap.items():
1413
+ if prefix is not None:
1414
+ prefix = _utf8(prefix)
1415
+ _prefixValidOrRaise(prefix)
1416
+ reversed_nsmap[_utf8(ns)] = prefix
1417
+ ns, name = _getNsTag(tag)
1418
+
1419
+ c_method = self._method if method is None else _findOutputMethod(method)
1420
+
1421
+ return _FileWriterElement(self, (ns, name, attributes, reversed_nsmap), c_method)
1422
+
1423
+ cdef _write_qname(self, bytes name, bytes prefix):
1424
+ if prefix: # empty bytes for no prefix (not None to allow sorting)
1425
+ tree.xmlOutputBufferWrite(self._c_out, len(prefix), _cstr(prefix))
1426
+ tree.xmlOutputBufferWrite(self._c_out, 1, ':')
1427
+ tree.xmlOutputBufferWrite(self._c_out, len(name), _cstr(name))
1428
+
1429
+ cdef _write_start_element(self, element_config):
1430
+ if self._status > WRITER_IN_ELEMENT:
1431
+ raise LxmlSyntaxError("cannot append trailing element to complete XML document")
1432
+ ns, name, attributes, nsmap = element_config
1433
+ flat_namespace_map, new_namespaces = self._collect_namespaces(nsmap)
1434
+ prefix = self._find_prefix(ns, flat_namespace_map, new_namespaces)
1435
+ tree.xmlOutputBufferWrite(self._c_out, 1, '<')
1436
+ self._write_qname(name, prefix)
1437
+
1438
+ self._write_attributes_and_namespaces(
1439
+ attributes, flat_namespace_map, new_namespaces)
1440
+
1441
+ tree.xmlOutputBufferWrite(self._c_out, 1, '>')
1442
+ if not self._buffered:
1443
+ tree.xmlOutputBufferFlush(self._c_out)
1444
+ self._handle_error(self._c_out.error)
1445
+
1446
+ self._element_stack.append((ns, name, prefix, flat_namespace_map))
1447
+ self._status = WRITER_IN_ELEMENT
1448
+
1449
+ cdef _write_attributes_and_namespaces(self, list attributes,
1450
+ dict flat_namespace_map,
1451
+ list new_namespaces):
1452
+ if attributes:
1453
+ # _find_prefix() may append to new_namespaces => build them first
1454
+ attributes = [
1455
+ (self._find_prefix(ns, flat_namespace_map, new_namespaces), name, value)
1456
+ for ns, name, value in attributes ]
1457
+ if new_namespaces:
1458
+ new_namespaces.sort()
1459
+ self._write_attributes_list(new_namespaces)
1460
+ if attributes:
1461
+ self._write_attributes_list(attributes)
1462
+
1463
+ cdef _write_attributes_list(self, list attributes):
1464
+ for prefix, name, value in attributes:
1465
+ tree.xmlOutputBufferWrite(self._c_out, 1, ' ')
1466
+ self._write_qname(name, prefix)
1467
+ tree.xmlOutputBufferWrite(self._c_out, 2, '="')
1468
+ _write_attr_string(self._c_out, _cstr(value))
1469
+
1470
+ tree.xmlOutputBufferWrite(self._c_out, 1, '"')
1471
+
1472
+ cdef _write_end_element(self, element_config):
1473
+ if self._status != WRITER_IN_ELEMENT:
1474
+ raise LxmlSyntaxError("not in an element")
1475
+ if not self._element_stack or self._element_stack[-1][:2] != element_config[:2]:
1476
+ raise LxmlSyntaxError("inconsistent exit action in context manager")
1477
+
1478
+ # If previous write operations failed, the context manager exit might still call us.
1479
+ # That is ok, but we stop writing closing tags and handling errors in that case.
1480
+ # For all non-I/O errors, we continue writing closing tags if we can.
1481
+ ok_to_write = self._c_out.error == xmlerror.XML_ERR_OK
1482
+
1483
+ name, prefix = self._element_stack.pop()[1:3]
1484
+ if ok_to_write:
1485
+ tree.xmlOutputBufferWrite(self._c_out, 2, '</')
1486
+ self._write_qname(name, prefix)
1487
+ tree.xmlOutputBufferWrite(self._c_out, 1, '>')
1488
+
1489
+ if not self._element_stack:
1490
+ self._status = WRITER_FINISHED
1491
+ if ok_to_write:
1492
+ if not self._buffered:
1493
+ tree.xmlOutputBufferFlush(self._c_out)
1494
+ self._handle_error(self._c_out.error)
1495
+
1496
+ cdef _find_prefix(self, bytes href, dict flat_namespaces_map, list new_namespaces):
1497
+ if href is None:
1498
+ return None
1499
+ if href in flat_namespaces_map:
1500
+ return flat_namespaces_map[href]
1501
+ # need to create a new prefix
1502
+ prefixes = flat_namespaces_map.values()
1503
+ i = 0
1504
+ while True:
1505
+ prefix = _utf8('ns%d' % i)
1506
+ if prefix not in prefixes:
1507
+ new_namespaces.append((b'xmlns', prefix, href))
1508
+ flat_namespaces_map[href] = prefix
1509
+ return prefix
1510
+ i += 1
1511
+
1512
+ cdef _collect_namespaces(self, dict nsmap):
1513
+ new_namespaces = []
1514
+ flat_namespaces_map = {}
1515
+ for ns, prefix in nsmap.iteritems():
1516
+ flat_namespaces_map[ns] = prefix
1517
+ if prefix is None:
1518
+ # use empty bytes rather than None to allow sorting
1519
+ new_namespaces.append((b'', b'xmlns', ns))
1520
+ else:
1521
+ new_namespaces.append((b'xmlns', prefix, ns))
1522
+ # merge in flat namespace map of parent
1523
+ if self._element_stack:
1524
+ for ns, prefix in (<dict>self._element_stack[-1][-1]).iteritems():
1525
+ if flat_namespaces_map.get(ns) is None:
1526
+ # unknown or empty prefix => prefer a 'real' prefix
1527
+ flat_namespaces_map[ns] = prefix
1528
+ return flat_namespaces_map, new_namespaces
1529
+
1530
+ def write(self, *args, bint with_tail=True, bint pretty_print=False, method=None):
1531
+ """write(self, *args, with_tail=True, pretty_print=False, method=None)
1532
+
1533
+ Write subtrees or strings into the file.
1534
+
1535
+ If method is not None, it should be one of ('html', 'xml', 'text')
1536
+ to temporarily override the output method.
1537
+ """
1538
+ assert self._c_out is not NULL
1539
+ c_method = self._method if method is None else _findOutputMethod(method)
1540
+
1541
+ for content in args:
1542
+ if _isString(content):
1543
+ if self._status != WRITER_IN_ELEMENT:
1544
+ if self._status > WRITER_IN_ELEMENT or content.strip():
1545
+ raise LxmlSyntaxError("not in an element")
1546
+ bstring = _utf8(content)
1547
+ if not bstring:
1548
+ continue
1549
+
1550
+ ns, name, _, _ = self._element_stack[-1]
1551
+ if (c_method == OUTPUT_METHOD_HTML and
1552
+ ns in (None, b'http://www.w3.org/1999/xhtml') and
1553
+ name in (b'script', b'style')):
1554
+ tree.xmlOutputBufferWrite(self._c_out, len(bstring), _cstr(bstring))
1555
+
1556
+ else:
1557
+ tree.xmlOutputBufferWriteEscape(self._c_out, _xcstr(bstring), NULL)
1558
+
1559
+ elif iselement(content):
1560
+ if self._status > WRITER_IN_ELEMENT:
1561
+ raise LxmlSyntaxError("cannot append trailing element to complete XML document")
1562
+ _writeNodeToBuffer(self._c_out, (<_Element>content)._c_node,
1563
+ self._c_encoding, NULL, c_method,
1564
+ False, False, pretty_print, with_tail, False)
1565
+ if (<_Element>content)._c_node.type == tree.XML_ELEMENT_NODE:
1566
+ if not self._element_stack:
1567
+ self._status = WRITER_FINISHED
1568
+
1569
+ elif content is not None:
1570
+ raise TypeError(
1571
+ f"got invalid input value of type {type(content)}, expected string or Element")
1572
+ self._handle_error(self._c_out.error)
1573
+ if not self._buffered:
1574
+ tree.xmlOutputBufferFlush(self._c_out)
1575
+ self._handle_error(self._c_out.error)
1576
+
1577
+ def flush(self):
1578
+ """flush(self)
1579
+
1580
+ Write any pending content of the current output buffer to the stream.
1581
+ """
1582
+ assert self._c_out is not NULL
1583
+ tree.xmlOutputBufferFlush(self._c_out)
1584
+ self._handle_error(self._c_out.error)
1585
+
1586
+ cdef _close(self, bint raise_on_error):
1587
+ if raise_on_error:
1588
+ if self._status < WRITER_IN_ELEMENT:
1589
+ raise LxmlSyntaxError("no content written")
1590
+ if self._element_stack:
1591
+ raise LxmlSyntaxError("pending open tags on close")
1592
+ error_result = self._c_out.error
1593
+ if error_result == xmlerror.XML_ERR_OK:
1594
+ error_result = tree.xmlOutputBufferClose(self._c_out)
1595
+ if error_result != -1:
1596
+ error_result = xmlerror.XML_ERR_OK
1597
+ else:
1598
+ tree.xmlOutputBufferClose(self._c_out)
1599
+ self._status = WRITER_FINISHED
1600
+ self._c_out = NULL
1601
+ del self._element_stack[:]
1602
+ if raise_on_error:
1603
+ self._handle_error(error_result)
1604
+
1605
+ cdef _handle_error(self, int error_result):
1606
+ if error_result != xmlerror.XML_ERR_OK:
1607
+ if self._target is not None:
1608
+ self._target._exc_context._raise_if_stored()
1609
+ _raiseSerialisationError(error_result)
1610
+
1611
+
1612
+ @cython.final
1613
+ @cython.internal
1614
+ cdef class _AsyncDataWriter:
1615
+ cdef list _data
1616
+ def __cinit__(self):
1617
+ self._data = []
1618
+
1619
+ cdef bytes collect(self):
1620
+ data = b''.join(self._data)
1621
+ del self._data[:]
1622
+ return data
1623
+
1624
+ def write(self, data):
1625
+ self._data.append(data)
1626
+
1627
+ def close(self):
1628
+ pass
1629
+
1630
+
1631
+ @cython.final
1632
+ @cython.internal
1633
+ cdef class _AsyncIncrementalFileWriter:
1634
+ cdef _IncrementalFileWriter _writer
1635
+ cdef _AsyncDataWriter _buffer
1636
+ cdef object _async_outfile
1637
+ cdef int _flush_after_writes
1638
+ cdef bint _should_close
1639
+ cdef bint _buffered
1640
+
1641
+ def __cinit__(self, async_outfile, bytes encoding, int compresslevel, bint close,
1642
+ bint buffered, int method):
1643
+ self._flush_after_writes = 20
1644
+ self._async_outfile = async_outfile
1645
+ self._should_close = close
1646
+ self._buffered = buffered
1647
+ self._buffer = _AsyncDataWriter()
1648
+ self._writer = _IncrementalFileWriter(
1649
+ self._buffer, encoding, compresslevel, close=True, buffered=False, method=method)
1650
+
1651
+ cdef bytes _flush(self):
1652
+ if not self._buffered or len(self._buffer._data) > self._flush_after_writes:
1653
+ return self._buffer.collect()
1654
+ return None
1655
+
1656
+ async def flush(self):
1657
+ self._writer.flush()
1658
+ data = self._buffer.collect()
1659
+ if data:
1660
+ await self._async_outfile.write(data)
1661
+
1662
+ async def write_declaration(self, version=None, standalone=None, doctype=None):
1663
+ self._writer.write_declaration(version, standalone, doctype)
1664
+ data = self._flush()
1665
+ if data:
1666
+ await self._async_outfile.write(data)
1667
+
1668
+ async def write_doctype(self, doctype):
1669
+ self._writer.write_doctype(doctype)
1670
+ data = self._flush()
1671
+ if data:
1672
+ await self._async_outfile.write(data)
1673
+
1674
+ async def write(self, *args, with_tail=True, pretty_print=False, method=None):
1675
+ self._writer.write(*args, with_tail=with_tail, pretty_print=pretty_print, method=method)
1676
+ data = self._flush()
1677
+ if data:
1678
+ await self._async_outfile.write(data)
1679
+
1680
+ def method(self, method):
1681
+ return self._writer.method(method)
1682
+
1683
+ def element(self, tag, attrib=None, nsmap=None, method=None, **_extra):
1684
+ element_writer = self._writer.element(tag, attrib, nsmap, method, **_extra)
1685
+ return _AsyncFileWriterElement(element_writer, self)
1686
+
1687
+ async def _close(self, bint raise_on_error):
1688
+ self._writer._close(raise_on_error)
1689
+ data = self._buffer.collect()
1690
+ if data:
1691
+ await self._async_outfile.write(data)
1692
+ if self._should_close:
1693
+ await self._async_outfile.close()
1694
+
1695
+
1696
+ @cython.final
1697
+ @cython.internal
1698
+ cdef class _AsyncFileWriterElement:
1699
+ cdef _FileWriterElement _element_writer
1700
+ cdef _AsyncIncrementalFileWriter _writer
1701
+
1702
+ def __cinit__(self, _FileWriterElement element_writer not None,
1703
+ _AsyncIncrementalFileWriter writer not None):
1704
+ self._element_writer = element_writer
1705
+ self._writer = writer
1706
+
1707
+ async def __aenter__(self):
1708
+ self._element_writer.__enter__()
1709
+ data = self._writer._flush()
1710
+ if data:
1711
+ await self._writer._async_outfile.write(data)
1712
+
1713
+ async def __aexit__(self, *args):
1714
+ self._element_writer.__exit__(*args)
1715
+ data = self._writer._flush()
1716
+ if data:
1717
+ await self._writer._async_outfile.write(data)
1718
+
1719
+
1720
+ @cython.final
1721
+ @cython.internal
1722
+ @cython.freelist(8)
1723
+ cdef class _FileWriterElement:
1724
+ cdef _IncrementalFileWriter _writer
1725
+ cdef object _element
1726
+ cdef int _new_method
1727
+ cdef int _old_method
1728
+
1729
+ def __cinit__(self, _IncrementalFileWriter writer not None, element_config, int method):
1730
+ self._writer = writer
1731
+ self._element = element_config
1732
+ self._new_method = method
1733
+ self._old_method = writer._method
1734
+
1735
+ def __enter__(self):
1736
+ self._writer._method = self._new_method
1737
+ self._writer._write_start_element(self._element)
1738
+
1739
+ def __exit__(self, exc_type, exc_val, exc_tb):
1740
+ self._writer._write_end_element(self._element)
1741
+ self._writer._method = self._old_method
1742
+
1743
+
1744
+ @cython.final
1745
+ @cython.internal
1746
+ @cython.freelist(8)
1747
+ cdef class _MethodChanger:
1748
+ cdef _IncrementalFileWriter _writer
1749
+ cdef int _new_method
1750
+ cdef int _old_method
1751
+ cdef bint _entered
1752
+ cdef bint _exited
1753
+
1754
+ def __cinit__(self, _IncrementalFileWriter writer not None, int method):
1755
+ self._writer = writer
1756
+ self._new_method = method
1757
+ self._old_method = writer._method
1758
+ self._entered = False
1759
+ self._exited = False
1760
+
1761
+ def __enter__(self):
1762
+ if self._entered:
1763
+ raise LxmlSyntaxError("Inconsistent enter action in context manager")
1764
+ self._writer._method = self._new_method
1765
+ self._entered = True
1766
+
1767
+ def __exit__(self, exc_type, exc_val, exc_tb):
1768
+ if self._exited:
1769
+ raise LxmlSyntaxError("Inconsistent exit action in context manager")
1770
+ if self._writer._method != self._new_method:
1771
+ raise LxmlSyntaxError("Method changed outside of context manager")
1772
+ self._writer._method = self._old_method
1773
+ self._exited = True
1774
+
1775
+ async def __aenter__(self):
1776
+ # for your async convenience
1777
+ return self.__enter__()
1778
+
1779
+ async def __aexit__(self, *args):
1780
+ # for your async convenience
1781
+ return self.__exit__(*args)