lxml 5.2.0__cp310-cp310-win32.whl → 5.2.2__cp310-cp310-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. lxml/ElementInclude.py +244 -244
  2. lxml/__init__.py +22 -22
  3. lxml/_elementpath.cp310-win32.pyd +0 -0
  4. lxml/_elementpath.py +341 -341
  5. lxml/apihelpers.pxi +1793 -1793
  6. lxml/builder.cp310-win32.pyd +0 -0
  7. lxml/builder.py +232 -232
  8. lxml/classlookup.pxi +580 -580
  9. lxml/cleanup.pxi +215 -215
  10. lxml/cssselect.py +101 -101
  11. lxml/debug.pxi +90 -90
  12. lxml/docloader.pxi +178 -178
  13. lxml/doctestcompare.py +488 -488
  14. lxml/dtd.pxi +478 -478
  15. lxml/etree.cp310-win32.pyd +0 -0
  16. lxml/etree.h +6 -6
  17. lxml/etree.pyx +3732 -3711
  18. lxml/extensions.pxi +833 -833
  19. lxml/html/ElementSoup.py +10 -10
  20. lxml/html/__init__.py +1923 -1923
  21. lxml/html/_diffcommand.py +86 -86
  22. lxml/html/_html5builder.py +100 -100
  23. lxml/html/_setmixin.py +56 -56
  24. lxml/html/builder.py +133 -133
  25. lxml/html/clean.py +21 -21
  26. lxml/html/defs.py +135 -135
  27. lxml/html/diff.cp310-win32.pyd +0 -0
  28. lxml/html/diff.py +878 -878
  29. lxml/html/formfill.py +299 -299
  30. lxml/html/html5parser.py +260 -260
  31. lxml/html/soupparser.py +314 -314
  32. lxml/html/usedoctest.py +13 -13
  33. lxml/includes/c14n.pxd +25 -25
  34. lxml/includes/config.pxd +3 -3
  35. lxml/includes/dtdvalid.pxd +18 -18
  36. lxml/includes/etree_defs.h +379 -379
  37. lxml/includes/etreepublic.pxd +237 -237
  38. lxml/includes/htmlparser.pxd +56 -56
  39. lxml/includes/lxml-version.h +1 -1
  40. lxml/includes/relaxng.pxd +64 -64
  41. lxml/includes/schematron.pxd +34 -34
  42. lxml/includes/tree.pxd +494 -494
  43. lxml/includes/uri.pxd +5 -5
  44. lxml/includes/xinclude.pxd +22 -22
  45. lxml/includes/xmlerror.pxd +852 -852
  46. lxml/includes/xmlparser.pxd +265 -265
  47. lxml/includes/xmlschema.pxd +35 -35
  48. lxml/includes/xpath.pxd +136 -136
  49. lxml/includes/xslt.pxd +190 -190
  50. lxml/isoschematron/__init__.py +348 -348
  51. lxml/isoschematron/resources/rng/iso-schematron.rng +709 -709
  52. lxml/isoschematron/resources/xsl/RNG2Schtrn.xsl +75 -75
  53. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_abstract_expand.xsl +312 -312
  54. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_dsdl_include.xsl +1159 -1159
  55. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_message.xsl +54 -54
  56. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_skeleton_for_xslt1.xsl +1796 -1796
  57. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_svrl_for_xslt1.xsl +588 -588
  58. lxml/iterparse.pxi +438 -438
  59. lxml/lxml.etree.h +6 -6
  60. lxml/nsclasses.pxi +281 -281
  61. lxml/objectify.cp310-win32.pyd +0 -0
  62. lxml/objectify.pyx +2145 -2145
  63. lxml/objectpath.pxi +332 -332
  64. lxml/parser.pxi +1994 -1994
  65. lxml/parsertarget.pxi +180 -180
  66. lxml/proxy.pxi +619 -619
  67. lxml/public-api.pxi +178 -178
  68. lxml/pyclasslookup.py +3 -3
  69. lxml/readonlytree.pxi +565 -565
  70. lxml/relaxng.pxi +165 -165
  71. lxml/sax.cp310-win32.pyd +0 -0
  72. lxml/sax.py +275 -275
  73. lxml/saxparser.pxi +875 -875
  74. lxml/schematron.pxi +168 -168
  75. lxml/serializer.pxi +1871 -1871
  76. lxml/usedoctest.py +13 -13
  77. lxml/xinclude.pxi +67 -67
  78. lxml/xmlerror.pxi +1654 -1654
  79. lxml/xmlid.pxi +179 -179
  80. lxml/xmlschema.pxi +215 -215
  81. lxml/xpath.pxi +487 -487
  82. lxml/xslt.pxi +950 -950
  83. lxml/xsltext.pxi +242 -242
  84. {lxml-5.2.0.dist-info → lxml-5.2.2.dist-info}/LICENSE.txt +29 -29
  85. {lxml-5.2.0.dist-info → lxml-5.2.2.dist-info}/LICENSES.txt +29 -29
  86. {lxml-5.2.0.dist-info → lxml-5.2.2.dist-info}/METADATA +9 -17
  87. {lxml-5.2.0.dist-info → lxml-5.2.2.dist-info}/RECORD +89 -89
  88. {lxml-5.2.0.dist-info → lxml-5.2.2.dist-info}/WHEEL +0 -0
  89. {lxml-5.2.0.dist-info → lxml-5.2.2.dist-info}/top_level.txt +0 -0
lxml/serializer.pxi CHANGED
@@ -1,1871 +1,1871 @@
1
- # XML serialization and output functions
2
-
3
- cdef object GzipFile
4
- from gzip import GzipFile
5
-
6
-
7
- cdef class SerialisationError(LxmlError):
8
- """A libxml2 error that occurred during serialisation.
9
- """
10
-
11
-
12
- cdef enum _OutputMethods:
13
- OUTPUT_METHOD_XML
14
- OUTPUT_METHOD_HTML
15
- OUTPUT_METHOD_TEXT
16
-
17
-
18
- cdef int _findOutputMethod(method) except -1:
19
- if method is None:
20
- return OUTPUT_METHOD_XML
21
- method = method.lower()
22
- if method == "xml":
23
- return OUTPUT_METHOD_XML
24
- if method == "html":
25
- return OUTPUT_METHOD_HTML
26
- if method == "text":
27
- return OUTPUT_METHOD_TEXT
28
- raise ValueError(f"unknown output method {method!r}")
29
-
30
-
31
- cdef _textToString(xmlNode* c_node, encoding, bint with_tail):
32
- cdef bint needs_conversion
33
- cdef const_xmlChar* c_text
34
- cdef xmlNode* c_text_node
35
- cdef tree.xmlBuffer* c_buffer
36
- cdef int error_result
37
-
38
- c_buffer = tree.xmlBufferCreate()
39
- if c_buffer is NULL:
40
- raise MemoryError()
41
-
42
- with nogil:
43
- error_result = tree.xmlNodeBufGetContent(c_buffer, c_node)
44
- if with_tail:
45
- c_text_node = _textNodeOrSkip(c_node.next)
46
- while c_text_node is not NULL:
47
- tree.xmlBufferWriteChar(c_buffer, <const_char*>c_text_node.content)
48
- c_text_node = _textNodeOrSkip(c_text_node.next)
49
- c_text = tree.xmlBufferContent(c_buffer)
50
-
51
- if error_result < 0 or c_text is NULL:
52
- tree.xmlBufferFree(c_buffer)
53
- raise SerialisationError, "Error during serialisation (out of memory?)"
54
-
55
- try:
56
- needs_conversion = 0
57
- if encoding is unicode:
58
- needs_conversion = 1
59
- elif encoding is not None:
60
- # Python prefers lower case encoding names
61
- encoding = encoding.lower()
62
- if encoding not in ('utf8', 'utf-8'):
63
- if encoding == 'ascii':
64
- if isutf8l(c_text, tree.xmlBufferLength(c_buffer)):
65
- # will raise a decode error below
66
- needs_conversion = 1
67
- else:
68
- needs_conversion = 1
69
-
70
- if needs_conversion:
71
- text = (<const_char*>c_text)[:tree.xmlBufferLength(c_buffer)].decode('utf8')
72
- if encoding is not unicode:
73
- encoding = _utf8(encoding)
74
- text = python.PyUnicode_AsEncodedString(
75
- text, encoding, 'strict')
76
- else:
77
- text = (<unsigned char*>c_text)[:tree.xmlBufferLength(c_buffer)]
78
- finally:
79
- tree.xmlBufferFree(c_buffer)
80
- return text
81
-
82
-
83
- cdef _tostring(_Element element, encoding, doctype, method,
84
- bint write_xml_declaration, bint write_complete_document,
85
- bint pretty_print, bint with_tail, int standalone):
86
- """Serialize an element to an encoded string representation of its XML
87
- tree.
88
- """
89
- cdef tree.xmlOutputBuffer* c_buffer
90
- cdef tree.xmlBuf* c_result_buffer
91
- cdef tree.xmlCharEncodingHandler* enchandler
92
- cdef const_char* c_enc
93
- cdef const_xmlChar* c_version
94
- cdef const_xmlChar* c_doctype
95
- cdef int c_method
96
- cdef int error_result
97
- if element is None:
98
- return None
99
- _assertValidNode(element)
100
- c_method = _findOutputMethod(method)
101
- if c_method == OUTPUT_METHOD_TEXT:
102
- return _textToString(element._c_node, encoding, with_tail)
103
- if encoding is None or encoding is unicode:
104
- c_enc = NULL
105
- else:
106
- encoding = _utf8(encoding)
107
- c_enc = _cstr(encoding)
108
- if doctype is None:
109
- c_doctype = NULL
110
- else:
111
- doctype = _utf8(doctype)
112
- c_doctype = _xcstr(doctype)
113
- # it is necessary to *and* find the encoding handler *and* use
114
- # encoding during output
115
- enchandler = tree.xmlFindCharEncodingHandler(c_enc)
116
- if enchandler is NULL and c_enc is not NULL:
117
- if encoding is not None:
118
- encoding = encoding.decode('UTF-8')
119
- raise LookupError, f"unknown encoding: '{encoding}'"
120
- c_buffer = tree.xmlAllocOutputBuffer(enchandler)
121
- if c_buffer is NULL:
122
- tree.xmlCharEncCloseFunc(enchandler)
123
- raise MemoryError()
124
-
125
- with nogil:
126
- _writeNodeToBuffer(c_buffer, element._c_node, c_enc, c_doctype, c_method,
127
- write_xml_declaration, write_complete_document,
128
- pretty_print, with_tail, standalone)
129
- tree.xmlOutputBufferFlush(c_buffer)
130
- if c_buffer.conv is not NULL:
131
- c_result_buffer = c_buffer.conv
132
- else:
133
- c_result_buffer = c_buffer.buffer
134
-
135
- error_result = c_buffer.error
136
- if error_result != xmlerror.XML_ERR_OK:
137
- tree.xmlOutputBufferClose(c_buffer)
138
- _raiseSerialisationError(error_result)
139
-
140
- try:
141
- if encoding is unicode:
142
- result = (<unsigned char*>tree.xmlBufContent(
143
- c_result_buffer))[:tree.xmlBufUse(c_result_buffer)].decode('UTF-8')
144
- else:
145
- result = <bytes>(<unsigned char*>tree.xmlBufContent(
146
- c_result_buffer))[:tree.xmlBufUse(c_result_buffer)]
147
- finally:
148
- error_result = tree.xmlOutputBufferClose(c_buffer)
149
- if error_result == -1:
150
- _raiseSerialisationError(error_result)
151
- return result
152
-
153
- cdef bytes _tostringC14N(element_or_tree, bint exclusive, bint with_comments, inclusive_ns_prefixes):
154
- cdef xmlDoc* c_doc
155
- cdef xmlChar* c_buffer = NULL
156
- cdef int byte_count = -1
157
- cdef bytes result
158
- cdef _Document doc
159
- cdef _Element element
160
- cdef xmlChar **c_inclusive_ns_prefixes
161
-
162
- if isinstance(element_or_tree, _Element):
163
- _assertValidNode(<_Element>element_or_tree)
164
- doc = (<_Element>element_or_tree)._doc
165
- c_doc = _plainFakeRootDoc(doc._c_doc, (<_Element>element_or_tree)._c_node, 0)
166
- else:
167
- doc = _documentOrRaise(element_or_tree)
168
- _assertValidDoc(doc)
169
- c_doc = doc._c_doc
170
-
171
- c_inclusive_ns_prefixes = _convert_ns_prefixes(c_doc.dict, inclusive_ns_prefixes) if inclusive_ns_prefixes else NULL
172
- try:
173
- with nogil:
174
- byte_count = c14n.xmlC14NDocDumpMemory(
175
- c_doc, NULL, exclusive, c_inclusive_ns_prefixes, with_comments, &c_buffer)
176
-
177
- finally:
178
- _destroyFakeDoc(doc._c_doc, c_doc)
179
- if c_inclusive_ns_prefixes is not NULL:
180
- python.lxml_free(c_inclusive_ns_prefixes)
181
-
182
- if byte_count < 0 or c_buffer is NULL:
183
- if c_buffer is not NULL:
184
- tree.xmlFree(c_buffer)
185
- raise C14NError, "C14N failed"
186
- try:
187
- result = c_buffer[:byte_count]
188
- finally:
189
- tree.xmlFree(c_buffer)
190
- return result
191
-
192
- cdef _raiseSerialisationError(int error_result):
193
- if error_result == xmlerror.XML_ERR_NO_MEMORY:
194
- raise MemoryError()
195
- message = ErrorTypes._getName(error_result)
196
- if message is None:
197
- message = f"unknown error {error_result}"
198
- raise SerialisationError, message
199
-
200
- ############################################################
201
- # low-level serialisation functions
202
-
203
- cdef void _writeDoctype(tree.xmlOutputBuffer* c_buffer,
204
- const_xmlChar* c_doctype) noexcept nogil:
205
- tree.xmlOutputBufferWrite(c_buffer, tree.xmlStrlen(c_doctype),
206
- <const_char*>c_doctype)
207
- tree.xmlOutputBufferWriteString(c_buffer, "\n")
208
-
209
- cdef void _writeNodeToBuffer(tree.xmlOutputBuffer* c_buffer,
210
- xmlNode* c_node, const_char* encoding, const_xmlChar* c_doctype,
211
- int c_method, bint write_xml_declaration,
212
- bint write_complete_document,
213
- bint pretty_print, bint with_tail,
214
- int standalone) noexcept nogil:
215
- cdef xmlNode* c_nsdecl_node
216
- cdef xmlDoc* c_doc = c_node.doc
217
- if write_xml_declaration and c_method == OUTPUT_METHOD_XML:
218
- _writeDeclarationToBuffer(c_buffer, c_doc.version, encoding, standalone)
219
-
220
- # comments/processing instructions before doctype declaration
221
- if write_complete_document and not c_buffer.error and c_doc.intSubset:
222
- _writePrevSiblings(c_buffer, <xmlNode*>c_doc.intSubset, encoding, pretty_print)
223
-
224
- if c_doctype:
225
- _writeDoctype(c_buffer, c_doctype)
226
- # write internal DTD subset, preceding PIs/comments, etc.
227
- if write_complete_document and not c_buffer.error:
228
- if c_doctype is NULL:
229
- _writeDtdToBuffer(c_buffer, c_doc, c_node.name, c_method, encoding)
230
- _writePrevSiblings(c_buffer, c_node, encoding, pretty_print)
231
-
232
- c_nsdecl_node = c_node
233
- if not c_node.parent or c_node.parent.type != tree.XML_DOCUMENT_NODE:
234
- # copy the node and add namespaces from parents
235
- # this is required to make libxml write them
236
- c_nsdecl_node = tree.xmlCopyNode(c_node, 2)
237
- if not c_nsdecl_node:
238
- c_buffer.error = xmlerror.XML_ERR_NO_MEMORY
239
- return
240
- _copyParentNamespaces(c_node, c_nsdecl_node)
241
-
242
- c_nsdecl_node.parent = c_node.parent
243
- c_nsdecl_node.children = c_node.children
244
- c_nsdecl_node.last = c_node.last
245
-
246
- # write node
247
- if c_method == OUTPUT_METHOD_HTML:
248
- tree.htmlNodeDumpFormatOutput(
249
- c_buffer, c_doc, c_nsdecl_node, encoding, pretty_print)
250
- else:
251
- tree.xmlNodeDumpOutput(
252
- c_buffer, c_doc, c_nsdecl_node, 0, pretty_print, encoding)
253
-
254
- if c_nsdecl_node is not c_node:
255
- # clean up
256
- c_nsdecl_node.children = c_nsdecl_node.last = NULL
257
- tree.xmlFreeNode(c_nsdecl_node)
258
-
259
- if c_buffer.error:
260
- return
261
-
262
- # write tail, trailing comments, etc.
263
- if with_tail:
264
- _writeTail(c_buffer, c_node, encoding, c_method, pretty_print)
265
- if write_complete_document:
266
- _writeNextSiblings(c_buffer, c_node, encoding, pretty_print)
267
- if pretty_print:
268
- tree.xmlOutputBufferWrite(c_buffer, 1, "\n")
269
-
270
- cdef void _writeDeclarationToBuffer(tree.xmlOutputBuffer* c_buffer,
271
- const_xmlChar* version, const_char* encoding,
272
- int standalone) noexcept nogil:
273
- if version is NULL:
274
- version = <unsigned char*>"1.0"
275
- tree.xmlOutputBufferWrite(c_buffer, 15, "<?xml version='")
276
- tree.xmlOutputBufferWriteString(c_buffer, <const_char*>version)
277
- tree.xmlOutputBufferWrite(c_buffer, 12, "' encoding='")
278
- tree.xmlOutputBufferWriteString(c_buffer, encoding)
279
- if standalone == 0:
280
- tree.xmlOutputBufferWrite(c_buffer, 20, "' standalone='no'?>\n")
281
- elif standalone == 1:
282
- tree.xmlOutputBufferWrite(c_buffer, 21, "' standalone='yes'?>\n")
283
- else:
284
- tree.xmlOutputBufferWrite(c_buffer, 4, "'?>\n")
285
-
286
- cdef void _writeDtdToBuffer(tree.xmlOutputBuffer* c_buffer,
287
- xmlDoc* c_doc, const_xmlChar* c_root_name,
288
- int c_method, const_char* encoding) noexcept nogil:
289
- cdef tree.xmlDtd* c_dtd
290
- cdef xmlNode* c_node
291
- cdef char* quotechar
292
- c_dtd = c_doc.intSubset
293
- if not c_dtd or not c_dtd.name:
294
- return
295
-
296
- # Name in document type declaration must match the root element tag.
297
- # For XML, case sensitive match, for HTML insensitive.
298
- if c_method == OUTPUT_METHOD_HTML:
299
- if tree.xmlStrcasecmp(c_root_name, c_dtd.name) != 0:
300
- return
301
- else:
302
- if tree.xmlStrcmp(c_root_name, c_dtd.name) != 0:
303
- return
304
-
305
- tree.xmlOutputBufferWrite(c_buffer, 10, "<!DOCTYPE ")
306
- tree.xmlOutputBufferWriteString(c_buffer, <const_char*>c_dtd.name)
307
-
308
- cdef const_xmlChar* public_id = c_dtd.ExternalID
309
- cdef const_xmlChar* sys_url = c_dtd.SystemID
310
- if public_id and public_id[0] == b'\0':
311
- public_id = NULL
312
- if sys_url and sys_url[0] == b'\0':
313
- sys_url = NULL
314
-
315
- if public_id:
316
- tree.xmlOutputBufferWrite(c_buffer, 9, ' PUBLIC "')
317
- tree.xmlOutputBufferWriteString(c_buffer, <const_char*>public_id)
318
- if sys_url:
319
- tree.xmlOutputBufferWrite(c_buffer, 2, '" ')
320
- else:
321
- tree.xmlOutputBufferWrite(c_buffer, 1, '"')
322
- elif sys_url:
323
- tree.xmlOutputBufferWrite(c_buffer, 8, ' SYSTEM ')
324
-
325
- if sys_url:
326
- if tree.xmlStrchr(sys_url, b'"'):
327
- quotechar = '\''
328
- else:
329
- quotechar = '"'
330
- tree.xmlOutputBufferWrite(c_buffer, 1, quotechar)
331
- tree.xmlOutputBufferWriteString(c_buffer, <const_char*>sys_url)
332
- tree.xmlOutputBufferWrite(c_buffer, 1, quotechar)
333
-
334
- if (not c_dtd.entities and not c_dtd.elements and
335
- not c_dtd.attributes and not c_dtd.notations and
336
- not c_dtd.pentities):
337
- tree.xmlOutputBufferWrite(c_buffer, 2, '>\n')
338
- return
339
-
340
- tree.xmlOutputBufferWrite(c_buffer, 3, ' [\n')
341
- if c_dtd.notations and not c_buffer.error:
342
- c_buf = tree.xmlBufferCreate()
343
- if not c_buf:
344
- c_buffer.error = xmlerror.XML_ERR_NO_MEMORY
345
- return
346
- tree.xmlDumpNotationTable(c_buf, <tree.xmlNotationTable*>c_dtd.notations)
347
- tree.xmlOutputBufferWrite(
348
- c_buffer, tree.xmlBufferLength(c_buf),
349
- <const_char*>tree.xmlBufferContent(c_buf))
350
- tree.xmlBufferFree(c_buf)
351
- c_node = c_dtd.children
352
- while c_node and not c_buffer.error:
353
- tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_node, 0, 0, encoding)
354
- c_node = c_node.next
355
- tree.xmlOutputBufferWrite(c_buffer, 3, "]>\n")
356
-
357
- cdef void _writeTail(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node,
358
- const_char* encoding, int c_method, bint pretty_print) noexcept nogil:
359
- "Write the element tail."
360
- c_node = c_node.next
361
- while c_node and not c_buffer.error and c_node.type in (
362
- tree.XML_TEXT_NODE, tree.XML_CDATA_SECTION_NODE):
363
- if c_method == OUTPUT_METHOD_HTML:
364
- tree.htmlNodeDumpFormatOutput(
365
- c_buffer, c_node.doc, c_node, encoding, pretty_print)
366
- else:
367
- tree.xmlNodeDumpOutput(
368
- c_buffer, c_node.doc, c_node, 0, pretty_print, encoding)
369
- c_node = c_node.next
370
-
371
- cdef void _writePrevSiblings(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node,
372
- const_char* encoding, bint pretty_print) noexcept nogil:
373
- cdef xmlNode* c_sibling
374
- if c_node.parent and _isElement(c_node.parent):
375
- return
376
- # we are at a root node, so add PI and comment siblings
377
- c_sibling = c_node
378
- while c_sibling.prev and \
379
- (c_sibling.prev.type == tree.XML_PI_NODE or
380
- c_sibling.prev.type == tree.XML_COMMENT_NODE):
381
- c_sibling = c_sibling.prev
382
- while c_sibling is not c_node and not c_buffer.error:
383
- tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_sibling, 0,
384
- pretty_print, encoding)
385
- if pretty_print:
386
- tree.xmlOutputBufferWriteString(c_buffer, "\n")
387
- c_sibling = c_sibling.next
388
-
389
- cdef void _writeNextSiblings(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node,
390
- const_char* encoding, bint pretty_print) noexcept nogil:
391
- cdef xmlNode* c_sibling
392
- if c_node.parent and _isElement(c_node.parent):
393
- return
394
- # we are at a root node, so add PI and comment siblings
395
- c_sibling = c_node.next
396
- while not c_buffer.error and c_sibling and \
397
- (c_sibling.type == tree.XML_PI_NODE or
398
- c_sibling.type == tree.XML_COMMENT_NODE):
399
- if pretty_print:
400
- tree.xmlOutputBufferWriteString(c_buffer, "\n")
401
- tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_sibling, 0,
402
- pretty_print, encoding)
403
- c_sibling = c_sibling.next
404
-
405
-
406
- # copied and adapted from libxml2
407
- cdef unsigned char *xmlSerializeHexCharRef(unsigned char *out, int val) noexcept:
408
- cdef xmlChar *ptr
409
- cdef const xmlChar* hexdigits = b"0123456789ABCDEF"
410
-
411
- out[0] = b'&'
412
- out += 1
413
- out[0] = b'#'
414
- out += 1
415
- out[0] = b'x'
416
- out += 1
417
-
418
- if val < 0x10:
419
- ptr = out
420
- elif val < 0x100:
421
- ptr = out + 1
422
- elif val < 0x1000:
423
- ptr = out + 2
424
- elif val < 0x10000:
425
- ptr = out + 3
426
- elif val < 0x100000:
427
- ptr = out + 4
428
- else:
429
- ptr = out + 5
430
-
431
- out = ptr + 1
432
- while val > 0:
433
- ptr[0] = hexdigits[val & 0xF]
434
- ptr -= 1
435
- val >>= 4
436
-
437
- out[0] = b';'
438
- out += 1
439
- out[0] = 0
440
-
441
- return out
442
-
443
-
444
- # copied and adapted from libxml2 (xmlBufAttrSerializeTxtContent())
445
- cdef _write_attr_string(tree.xmlOutputBuffer* buf, const char *string):
446
- cdef const char *base
447
- cdef const char *cur
448
- cdef const unsigned char *ucur
449
-
450
- cdef unsigned char tmp[12]
451
- cdef int val = 0
452
- cdef int l
453
-
454
- if string == NULL:
455
- return
456
-
457
- base = cur = <const char*>string
458
- while cur[0] != 0:
459
- if cur[0] == b'\n':
460
- if base != cur:
461
- tree.xmlOutputBufferWrite(buf, cur - base, base)
462
-
463
- tree.xmlOutputBufferWrite(buf, 5, "&#10;")
464
- cur += 1
465
- base = cur
466
-
467
- elif cur[0] == b'\r':
468
- if base != cur:
469
- tree.xmlOutputBufferWrite(buf, cur - base, base)
470
-
471
- tree.xmlOutputBufferWrite(buf, 5, "&#13;")
472
- cur += 1
473
- base = cur
474
-
475
- elif cur[0] == b'\t':
476
- if base != cur:
477
- tree.xmlOutputBufferWrite(buf, cur - base, base)
478
-
479
- tree.xmlOutputBufferWrite(buf, 4, "&#9;")
480
- cur += 1
481
- base = cur
482
-
483
- elif cur[0] == b'"':
484
- if base != cur:
485
- tree.xmlOutputBufferWrite(buf, cur - base, base)
486
-
487
- tree.xmlOutputBufferWrite(buf, 6, "&quot;")
488
- cur += 1
489
- base = cur
490
-
491
- elif cur[0] == b'<':
492
- if base != cur:
493
- tree.xmlOutputBufferWrite(buf, cur - base, base)
494
-
495
- tree.xmlOutputBufferWrite(buf, 4, "&lt;")
496
- cur += 1
497
- base = cur
498
-
499
- elif cur[0] == b'>':
500
- if base != cur:
501
- tree.xmlOutputBufferWrite(buf, cur - base, base)
502
-
503
- tree.xmlOutputBufferWrite(buf, 4, "&gt;")
504
- cur += 1
505
- base = cur
506
- elif cur[0] == b'&':
507
- if base != cur:
508
- tree.xmlOutputBufferWrite(buf, cur - base, base)
509
-
510
- tree.xmlOutputBufferWrite(buf, 5, "&amp;")
511
- cur += 1
512
- base = cur
513
-
514
- elif (<const unsigned char>cur[0] >= 0x80) and (cur[1] != 0):
515
-
516
- if base != cur:
517
- tree.xmlOutputBufferWrite(buf, cur - base, base)
518
-
519
- ucur = <const unsigned char *>cur
520
-
521
- if ucur[0] < 0xC0:
522
- # invalid UTF-8 sequence
523
- val = ucur[0]
524
- l = 1
525
-
526
- elif ucur[0] < 0xE0:
527
- val = (ucur[0]) & 0x1F
528
- val <<= 6
529
- val |= (ucur[1]) & 0x3F
530
- l = 2
531
-
532
- elif (ucur[0] < 0xF0) and (ucur[2] != 0):
533
- val = (ucur[0]) & 0x0F
534
- val <<= 6
535
- val |= (ucur[1]) & 0x3F
536
- val <<= 6
537
- val |= (ucur[2]) & 0x3F
538
- l = 3
539
-
540
- elif (ucur[0] < 0xF8) and (ucur[2] != 0) and (ucur[3] != 0):
541
- val = (ucur[0]) & 0x07
542
- val <<= 6
543
- val |= (ucur[1]) & 0x3F
544
- val <<= 6
545
- val |= (ucur[2]) & 0x3F
546
- val <<= 6
547
- val |= (ucur[3]) & 0x3F
548
- l = 4
549
- else:
550
- # invalid UTF-8 sequence
551
- val = ucur[0]
552
- l = 1
553
-
554
- if (l == 1) or (not tree.xmlIsCharQ(val)):
555
- raise ValueError(f"Invalid character: {val:X}")
556
-
557
- # We could do multiple things here. Just save
558
- # as a char ref
559
- xmlSerializeHexCharRef(tmp, val)
560
- tree.xmlOutputBufferWrite(buf, len(tmp), <const char*> tmp)
561
- cur += l
562
- base = cur
563
-
564
- else:
565
- cur += 1
566
-
567
- if base != cur:
568
- tree.xmlOutputBufferWrite(buf, cur - base, base)
569
-
570
-
571
- ############################################################
572
- # output to file-like objects
573
-
574
- cdef object io_open
575
- from io import open
576
-
577
- cdef object gzip
578
- import gzip
579
-
580
- cdef object getwriter
581
- from codecs import getwriter
582
- cdef object utf8_writer = getwriter('utf8')
583
-
584
- cdef object contextmanager
585
- from contextlib import contextmanager
586
-
587
- cdef object _open_utf8_file
588
-
589
- @contextmanager
590
- def _open_utf8_file(file, compression=0):
591
- file = _getFSPathOrObject(file)
592
- if _isString(file):
593
- if compression:
594
- with gzip.GzipFile(file, mode='wb', compresslevel=compression) as zf:
595
- yield utf8_writer(zf)
596
- else:
597
- with io_open(file, 'w', encoding='utf8') as f:
598
- yield f
599
- else:
600
- if compression:
601
- with gzip.GzipFile(fileobj=file, mode='wb', compresslevel=compression) as zf:
602
- yield utf8_writer(zf)
603
- else:
604
- yield utf8_writer(file)
605
-
606
-
607
- @cython.final
608
- @cython.internal
609
- cdef class _FilelikeWriter:
610
- cdef object _filelike
611
- cdef object _close_filelike
612
- cdef _ExceptionContext _exc_context
613
- cdef _ErrorLog error_log
614
- def __cinit__(self, filelike, exc_context=None, compression=None, close=False):
615
- if compression is not None and compression > 0:
616
- filelike = GzipFile(
617
- fileobj=filelike, mode='wb', compresslevel=compression)
618
- self._close_filelike = filelike.close
619
- elif close:
620
- self._close_filelike = filelike.close
621
- self._filelike = filelike
622
- if exc_context is None:
623
- self._exc_context = _ExceptionContext()
624
- else:
625
- self._exc_context = exc_context
626
- self.error_log = _ErrorLog()
627
-
628
- cdef tree.xmlOutputBuffer* _createOutputBuffer(
629
- self, tree.xmlCharEncodingHandler* enchandler) except NULL:
630
- cdef tree.xmlOutputBuffer* c_buffer
631
- c_buffer = tree.xmlOutputBufferCreateIO(
632
- <tree.xmlOutputWriteCallback>_writeFilelikeWriter, _closeFilelikeWriter,
633
- <python.PyObject*>self, enchandler)
634
- if c_buffer is NULL:
635
- raise IOError, "Could not create I/O writer context."
636
- return c_buffer
637
-
638
- cdef int write(self, char* c_buffer, int size) noexcept:
639
- try:
640
- if self._filelike is None:
641
- raise IOError, "File is already closed"
642
- py_buffer = <bytes>c_buffer[:size]
643
- self._filelike.write(py_buffer)
644
- except:
645
- size = -1
646
- self._exc_context._store_raised()
647
- finally:
648
- return size # and swallow any further exceptions
649
-
650
- cdef int close(self) noexcept:
651
- retval = 0
652
- try:
653
- if self._close_filelike is not None:
654
- self._close_filelike()
655
- # we should not close the file here as we didn't open it
656
- self._filelike = None
657
- except:
658
- retval = -1
659
- self._exc_context._store_raised()
660
- finally:
661
- return retval # and swallow any further exceptions
662
-
663
- cdef int _writeFilelikeWriter(void* ctxt, char* c_buffer, int length) noexcept:
664
- return (<_FilelikeWriter>ctxt).write(c_buffer, length)
665
-
666
- cdef int _closeFilelikeWriter(void* ctxt) noexcept:
667
- return (<_FilelikeWriter>ctxt).close()
668
-
669
- cdef _tofilelike(f, _Element element, encoding, doctype, method,
670
- bint write_xml_declaration, bint write_doctype,
671
- bint pretty_print, bint with_tail, int standalone,
672
- int compression):
673
- cdef _FilelikeWriter writer = None
674
- cdef tree.xmlOutputBuffer* c_buffer
675
- cdef tree.xmlCharEncodingHandler* enchandler
676
- cdef const_char* c_enc
677
- cdef const_xmlChar* c_doctype
678
- cdef int error_result
679
-
680
- c_method = _findOutputMethod(method)
681
- if c_method == OUTPUT_METHOD_TEXT:
682
- data = _textToString(element._c_node, encoding, with_tail)
683
- if compression:
684
- bytes_out = BytesIO()
685
- with GzipFile(fileobj=bytes_out, mode='wb', compresslevel=compression) as gzip_file:
686
- gzip_file.write(data)
687
- data = bytes_out.getvalue()
688
- f = _getFSPathOrObject(f)
689
- if _isString(f):
690
- filename8 = _encodeFilename(f)
691
- with open(filename8, 'wb') as f:
692
- f.write(data)
693
- else:
694
- f.write(data)
695
- return
696
-
697
- if encoding is None:
698
- c_enc = NULL
699
- else:
700
- encoding = _utf8(encoding)
701
- c_enc = _cstr(encoding)
702
- if doctype is None:
703
- c_doctype = NULL
704
- else:
705
- doctype = _utf8(doctype)
706
- c_doctype = _xcstr(doctype)
707
-
708
- writer = _create_output_buffer(f, c_enc, compression, &c_buffer, close=False)
709
- if writer is None:
710
- with nogil:
711
- error_result = _serialise_node(
712
- c_buffer, c_doctype, c_enc, element._c_node, c_method,
713
- write_xml_declaration, write_doctype, pretty_print, with_tail, standalone)
714
- else:
715
- error_result = _serialise_node(
716
- c_buffer, c_doctype, c_enc, element._c_node, c_method,
717
- write_xml_declaration, write_doctype, pretty_print, with_tail, standalone)
718
-
719
- if writer is not None:
720
- writer._exc_context._raise_if_stored()
721
- if error_result != xmlerror.XML_ERR_OK:
722
- _raiseSerialisationError(error_result)
723
-
724
-
725
- cdef int _serialise_node(tree.xmlOutputBuffer* c_buffer, const_xmlChar* c_doctype,
726
- const_char* c_enc, xmlNode* c_node, int c_method,
727
- bint write_xml_declaration, bint write_doctype, bint pretty_print,
728
- bint with_tail, int standalone) noexcept nogil:
729
- _writeNodeToBuffer(
730
- c_buffer, c_node, c_enc, c_doctype, c_method,
731
- write_xml_declaration, write_doctype, pretty_print, with_tail, standalone)
732
- error_result = c_buffer.error
733
- if error_result == xmlerror.XML_ERR_OK:
734
- error_result = tree.xmlOutputBufferClose(c_buffer)
735
- if error_result != -1:
736
- error_result = xmlerror.XML_ERR_OK
737
- else:
738
- tree.xmlOutputBufferClose(c_buffer)
739
- return error_result
740
-
741
-
742
- cdef _FilelikeWriter _create_output_buffer(
743
- f, const_char* c_enc, int c_compression,
744
- tree.xmlOutputBuffer** c_buffer_ret, bint close):
745
- cdef tree.xmlOutputBuffer* c_buffer
746
- cdef _FilelikeWriter writer
747
- cdef bytes filename8
748
- enchandler = tree.xmlFindCharEncodingHandler(c_enc)
749
- if enchandler is NULL:
750
- raise LookupError(
751
- f"unknown encoding: '{c_enc.decode('UTF-8') if c_enc is not NULL else u''}'")
752
- try:
753
- f = _getFSPathOrObject(f)
754
- if _isString(f):
755
- filename8 = _encodeFilename(f)
756
- if b'%' in filename8 and (
757
- # Exclude absolute Windows paths and file:// URLs.
758
- _isFilePath(<const xmlChar*>filename8) not in (NO_FILE_PATH, ABS_WIN_FILE_PATH)
759
- or filename8[:7].lower() == b'file://'):
760
- # A file path (not a URL) containing the '%' URL escape character.
761
- # libxml2 uses URL-unescaping on these, so escape the path before passing it in.
762
- filename8 = filename8.replace(b'%', b'%25')
763
- c_buffer = tree.xmlOutputBufferCreateFilename(
764
- _cstr(filename8), enchandler, c_compression)
765
- if c_buffer is NULL:
766
- python.PyErr_SetFromErrno(IOError) # raises IOError
767
- writer = None
768
- elif hasattr(f, 'write'):
769
- writer = _FilelikeWriter(f, compression=c_compression, close=close)
770
- c_buffer = writer._createOutputBuffer(enchandler)
771
- else:
772
- raise TypeError(
773
- f"File or filename expected, got '{python._fqtypename(f).decode('UTF-8')}'")
774
- except:
775
- tree.xmlCharEncCloseFunc(enchandler)
776
- raise
777
- c_buffer_ret[0] = c_buffer
778
- return writer
779
-
780
- cdef xmlChar **_convert_ns_prefixes(tree.xmlDict* c_dict, ns_prefixes) except NULL:
781
- cdef size_t i, num_ns_prefixes = len(ns_prefixes)
782
- # Need to allocate one extra memory block to handle last NULL entry
783
- c_ns_prefixes = <xmlChar **>python.lxml_malloc(num_ns_prefixes + 1, sizeof(xmlChar*))
784
- if not c_ns_prefixes:
785
- raise MemoryError()
786
- i = 0
787
- try:
788
- for prefix in ns_prefixes:
789
- prefix_utf = _utf8(prefix)
790
- c_prefix = tree.xmlDictExists(c_dict, _xcstr(prefix_utf), len(prefix_utf))
791
- if c_prefix:
792
- # unknown prefixes do not need to get serialised
793
- c_ns_prefixes[i] = <xmlChar*>c_prefix
794
- i += 1
795
- except:
796
- python.lxml_free(c_ns_prefixes)
797
- raise
798
-
799
- c_ns_prefixes[i] = NULL # append end marker
800
- return c_ns_prefixes
801
-
802
- cdef _tofilelikeC14N(f, _Element element, bint exclusive, bint with_comments,
803
- int compression, inclusive_ns_prefixes):
804
- cdef _FilelikeWriter writer = None
805
- cdef tree.xmlOutputBuffer* c_buffer
806
- cdef xmlChar **c_inclusive_ns_prefixes = NULL
807
- cdef char* c_filename
808
- cdef xmlDoc* c_base_doc
809
- cdef xmlDoc* c_doc
810
- cdef int bytes_count, error = 0
811
-
812
- c_base_doc = element._c_node.doc
813
- c_doc = _fakeRootDoc(c_base_doc, element._c_node)
814
- try:
815
- c_inclusive_ns_prefixes = (
816
- _convert_ns_prefixes(c_doc.dict, inclusive_ns_prefixes)
817
- if inclusive_ns_prefixes else NULL)
818
-
819
- f = _getFSPathOrObject(f)
820
- if _isString(f):
821
- filename8 = _encodeFilename(f)
822
- c_filename = _cstr(filename8)
823
- with nogil:
824
- error = c14n.xmlC14NDocSave(
825
- c_doc, NULL, exclusive, c_inclusive_ns_prefixes,
826
- with_comments, c_filename, compression)
827
- elif hasattr(f, 'write'):
828
- writer = _FilelikeWriter(f, compression=compression)
829
- c_buffer = writer._createOutputBuffer(NULL)
830
- try:
831
- with writer.error_log:
832
- bytes_count = c14n.xmlC14NDocSaveTo(
833
- c_doc, NULL, exclusive, c_inclusive_ns_prefixes,
834
- with_comments, c_buffer)
835
- finally:
836
- error = tree.xmlOutputBufferClose(c_buffer)
837
- if bytes_count < 0:
838
- error = bytes_count
839
- elif error != -1:
840
- error = xmlerror.XML_ERR_OK
841
- else:
842
- raise TypeError(f"File or filename expected, got '{python._fqtypename(f).decode('UTF-8')}'")
843
- finally:
844
- _destroyFakeDoc(c_base_doc, c_doc)
845
- if c_inclusive_ns_prefixes is not NULL:
846
- python.lxml_free(c_inclusive_ns_prefixes)
847
-
848
- if writer is not None:
849
- writer._exc_context._raise_if_stored()
850
-
851
- if error < 0:
852
- message = "C14N failed"
853
- if writer is not None:
854
- errors = writer.error_log
855
- if len(errors):
856
- message = errors[0].message
857
- raise C14NError(message)
858
-
859
-
860
- # C14N 2.0
861
-
862
- def canonicalize(xml_data=None, *, out=None, from_file=None, **options):
863
- """Convert XML to its C14N 2.0 serialised form.
864
-
865
- If *out* is provided, it must be a file or file-like object that receives
866
- the serialised canonical XML output (text, not bytes) through its ``.write()``
867
- method. To write to a file, open it in text mode with encoding "utf-8".
868
- If *out* is not provided, this function returns the output as text string.
869
-
870
- Either *xml_data* (an XML string, tree or Element) or *file*
871
- (a file path or file-like object) must be provided as input.
872
-
873
- The configuration options are the same as for the ``C14NWriterTarget``.
874
- """
875
- if xml_data is None and from_file is None:
876
- raise ValueError("Either 'xml_data' or 'from_file' must be provided as input")
877
-
878
- sio = None
879
- if out is None:
880
- sio = out = StringIO()
881
-
882
- target = C14NWriterTarget(out.write, **options)
883
-
884
- if xml_data is not None and not isinstance(xml_data, basestring):
885
- _tree_to_target(xml_data, target)
886
- return sio.getvalue() if sio is not None else None
887
-
888
- cdef _FeedParser parser = XMLParser(
889
- target=target,
890
- attribute_defaults=True,
891
- collect_ids=False,
892
- )
893
-
894
- if xml_data is not None:
895
- parser.feed(xml_data)
896
- parser.close()
897
- elif from_file is not None:
898
- try:
899
- _parseDocument(from_file, parser, base_url=None)
900
- except _TargetParserResult:
901
- pass
902
-
903
- return sio.getvalue() if sio is not None else None
904
-
905
-
906
- cdef _tree_to_target(element, target):
907
- for event, elem in iterwalk(element, events=('start', 'end', 'start-ns', 'comment', 'pi')):
908
- text = None
909
- if event == 'start':
910
- target.start(elem.tag, elem.attrib)
911
- text = elem.text
912
- elif event == 'end':
913
- target.end(elem.tag)
914
- text = elem.tail
915
- elif event == 'start-ns':
916
- target.start_ns(*elem)
917
- continue
918
- elif event == 'comment':
919
- target.comment(elem.text)
920
- text = elem.tail
921
- elif event == 'pi':
922
- target.pi(elem.target, elem.text)
923
- text = elem.tail
924
- if text:
925
- target.data(text)
926
- return target.close()
927
-
928
-
929
- cdef object _looks_like_prefix_name = re.compile(r'^\w+:\w+$', re.UNICODE).match
930
-
931
-
932
- cdef class C14NWriterTarget:
933
- """
934
- Canonicalization writer target for the XMLParser.
935
-
936
- Serialises parse events to XML C14N 2.0.
937
-
938
- Configuration options:
939
-
940
- - *with_comments*: set to true to include comments
941
- - *strip_text*: set to true to strip whitespace before and after text content
942
- - *rewrite_prefixes*: set to true to replace namespace prefixes by "n{number}"
943
- - *qname_aware_tags*: a set of qname aware tag names in which prefixes
944
- should be replaced in text content
945
- - *qname_aware_attrs*: a set of qname aware attribute names in which prefixes
946
- should be replaced in text content
947
- - *exclude_attrs*: a set of attribute names that should not be serialised
948
- - *exclude_tags*: a set of tag names that should not be serialised
949
- """
950
- cdef object _write
951
- cdef list _data
952
- cdef set _qname_aware_tags
953
- cdef object _find_qname_aware_attrs
954
- cdef list _declared_ns_stack
955
- cdef list _ns_stack
956
- cdef dict _prefix_map
957
- cdef list _preserve_space
958
- cdef tuple _pending_start
959
- cdef set _exclude_tags
960
- cdef set _exclude_attrs
961
- cdef Py_ssize_t _ignored_depth
962
- cdef bint _with_comments
963
- cdef bint _strip_text
964
- cdef bint _rewrite_prefixes
965
- cdef bint _root_seen
966
- cdef bint _root_done
967
-
968
- def __init__(self, write, *,
969
- with_comments=False, strip_text=False, rewrite_prefixes=False,
970
- qname_aware_tags=None, qname_aware_attrs=None,
971
- exclude_attrs=None, exclude_tags=None):
972
- self._write = write
973
- self._data = []
974
- self._with_comments = with_comments
975
- self._strip_text = strip_text
976
- self._exclude_attrs = set(exclude_attrs) if exclude_attrs else None
977
- self._exclude_tags = set(exclude_tags) if exclude_tags else None
978
-
979
- self._rewrite_prefixes = rewrite_prefixes
980
- if qname_aware_tags:
981
- self._qname_aware_tags = set(qname_aware_tags)
982
- else:
983
- self._qname_aware_tags = None
984
- if qname_aware_attrs:
985
- self._find_qname_aware_attrs = set(qname_aware_attrs).intersection
986
- else:
987
- self._find_qname_aware_attrs = None
988
-
989
- # Stack with globally and newly declared namespaces as (uri, prefix) pairs.
990
- self._declared_ns_stack = [[
991
- ("http://www.w3.org/XML/1998/namespace", "xml"),
992
- ]]
993
- # Stack with user declared namespace prefixes as (uri, prefix) pairs.
994
- self._ns_stack = []
995
- if not rewrite_prefixes:
996
- self._ns_stack.append(_DEFAULT_NAMESPACE_PREFIXES_ITEMS)
997
- self._ns_stack.append([])
998
- self._prefix_map = {}
999
- self._preserve_space = [False]
1000
- self._pending_start = None
1001
- self._ignored_depth = 0
1002
- self._root_seen = False
1003
- self._root_done = False
1004
-
1005
- def _iter_namespaces(self, ns_stack):
1006
- for namespaces in reversed(ns_stack):
1007
- if namespaces: # almost no element declares new namespaces
1008
- yield from namespaces
1009
-
1010
- cdef _resolve_prefix_name(self, prefixed_name):
1011
- prefix, name = prefixed_name.split(':', 1)
1012
- for uri, p in self._iter_namespaces(self._ns_stack):
1013
- if p == prefix:
1014
- return f'{{{uri}}}{name}'
1015
- raise ValueError(f'Prefix {prefix} of QName "{prefixed_name}" is not declared in scope')
1016
-
1017
- cdef _qname(self, qname, uri=None):
1018
- if uri is None:
1019
- uri, tag = qname[1:].rsplit('}', 1) if qname[:1] == '{' else ('', qname)
1020
- else:
1021
- tag = qname
1022
-
1023
- prefixes_seen = set()
1024
- for u, prefix in self._iter_namespaces(self._declared_ns_stack):
1025
- if u == uri and prefix not in prefixes_seen:
1026
- return f'{prefix}:{tag}' if prefix else tag, tag, uri
1027
- prefixes_seen.add(prefix)
1028
-
1029
- # Not declared yet => add new declaration.
1030
- if self._rewrite_prefixes:
1031
- if uri in self._prefix_map:
1032
- prefix = self._prefix_map[uri]
1033
- else:
1034
- prefix = self._prefix_map[uri] = f'n{len(self._prefix_map)}'
1035
- self._declared_ns_stack[-1].append((uri, prefix))
1036
- return f'{prefix}:{tag}', tag, uri
1037
-
1038
- if not uri and '' not in prefixes_seen:
1039
- # No default namespace declared => no prefix needed.
1040
- return tag, tag, uri
1041
-
1042
- for u, prefix in self._iter_namespaces(self._ns_stack):
1043
- if u == uri:
1044
- self._declared_ns_stack[-1].append((uri, prefix))
1045
- return f'{prefix}:{tag}' if prefix else tag, tag, uri
1046
-
1047
- if not uri:
1048
- # As soon as a default namespace is defined,
1049
- # anything that has no namespace (and thus, no prefix) goes there.
1050
- return tag, tag, uri
1051
-
1052
- raise ValueError(f'Namespace "{uri}" of name "{tag}" is not declared in scope')
1053
-
1054
- def data(self, data):
1055
- if not self._ignored_depth:
1056
- self._data.append(data)
1057
-
1058
- cdef _flush(self):
1059
- cdef unicode data = ''.join(self._data)
1060
- del self._data[:]
1061
- if self._strip_text and not self._preserve_space[-1]:
1062
- data = data.strip()
1063
- if self._pending_start is not None:
1064
- (tag, attrs, new_namespaces), self._pending_start = self._pending_start, None
1065
- qname_text = data if ':' in data and _looks_like_prefix_name(data) else None
1066
- self._start(tag, attrs, new_namespaces, qname_text)
1067
- if qname_text is not None:
1068
- return
1069
- if data and self._root_seen:
1070
- self._write(_escape_cdata_c14n(data))
1071
-
1072
- def start_ns(self, prefix, uri):
1073
- if self._ignored_depth:
1074
- return
1075
- # we may have to resolve qnames in text content
1076
- if self._data:
1077
- self._flush()
1078
- self._ns_stack[-1].append((uri, prefix))
1079
-
1080
- def start(self, tag, attrs):
1081
- if self._exclude_tags is not None and (
1082
- self._ignored_depth or tag in self._exclude_tags):
1083
- self._ignored_depth += 1
1084
- return
1085
- if self._data:
1086
- self._flush()
1087
-
1088
- new_namespaces = []
1089
- self._declared_ns_stack.append(new_namespaces)
1090
-
1091
- if self._qname_aware_tags is not None and tag in self._qname_aware_tags:
1092
- # Need to parse text first to see if it requires a prefix declaration.
1093
- self._pending_start = (tag, attrs, new_namespaces)
1094
- return
1095
- self._start(tag, attrs, new_namespaces)
1096
-
1097
- cdef _start(self, tag, attrs, new_namespaces, qname_text=None):
1098
- if self._exclude_attrs is not None and attrs:
1099
- attrs = {k: v for k, v in attrs.items() if k not in self._exclude_attrs}
1100
-
1101
- qnames = {tag, *attrs}
1102
- resolved_names = {}
1103
-
1104
- # Resolve prefixes in attribute and tag text.
1105
- if qname_text is not None:
1106
- qname = resolved_names[qname_text] = self._resolve_prefix_name(qname_text)
1107
- qnames.add(qname)
1108
- if self._find_qname_aware_attrs is not None and attrs:
1109
- qattrs = self._find_qname_aware_attrs(attrs)
1110
- if qattrs:
1111
- for attr_name in qattrs:
1112
- value = attrs[attr_name]
1113
- if _looks_like_prefix_name(value):
1114
- qname = resolved_names[value] = self._resolve_prefix_name(value)
1115
- qnames.add(qname)
1116
- else:
1117
- qattrs = None
1118
- else:
1119
- qattrs = None
1120
-
1121
- # Assign prefixes in lexicographical order of used URIs.
1122
- parsed_qnames = {n: self._qname(n) for n in sorted(
1123
- qnames, key=lambda n: n.split('}', 1))}
1124
-
1125
- # Write namespace declarations in prefix order ...
1126
- if new_namespaces:
1127
- attr_list = [
1128
- ('xmlns:' + prefix if prefix else 'xmlns', uri)
1129
- for uri, prefix in new_namespaces
1130
- ]
1131
- attr_list.sort()
1132
- else:
1133
- # almost always empty
1134
- attr_list = []
1135
-
1136
- # ... followed by attributes in URI+name order
1137
- if attrs:
1138
- for k, v in sorted(attrs.items()):
1139
- if qattrs is not None and k in qattrs and v in resolved_names:
1140
- v = parsed_qnames[resolved_names[v]][0]
1141
- attr_qname, attr_name, uri = parsed_qnames[k]
1142
- # No prefix for attributes in default ('') namespace.
1143
- attr_list.append((attr_qname if uri else attr_name, v))
1144
-
1145
- # Honour xml:space attributes.
1146
- space_behaviour = attrs.get('{http://www.w3.org/XML/1998/namespace}space')
1147
- self._preserve_space.append(
1148
- space_behaviour == 'preserve' if space_behaviour
1149
- else self._preserve_space[-1])
1150
-
1151
- # Write the tag.
1152
- write = self._write
1153
- write('<' + parsed_qnames[tag][0])
1154
- if attr_list:
1155
- write(''.join([f' {k}="{_escape_attrib_c14n(v)}"' for k, v in attr_list]))
1156
- write('>')
1157
-
1158
- # Write the resolved qname text content.
1159
- if qname_text is not None:
1160
- write(_escape_cdata_c14n(parsed_qnames[resolved_names[qname_text]][0]))
1161
-
1162
- self._root_seen = True
1163
- self._ns_stack.append([])
1164
-
1165
- def end(self, tag):
1166
- if self._ignored_depth:
1167
- self._ignored_depth -= 1
1168
- return
1169
- if self._data:
1170
- self._flush()
1171
- self._write(f'</{self._qname(tag)[0]}>')
1172
- self._preserve_space.pop()
1173
- self._root_done = len(self._preserve_space) == 1
1174
- self._declared_ns_stack.pop()
1175
- self._ns_stack.pop()
1176
-
1177
- def comment(self, text):
1178
- if not self._with_comments:
1179
- return
1180
- if self._ignored_depth:
1181
- return
1182
- if self._root_done:
1183
- self._write('\n')
1184
- elif self._root_seen and self._data:
1185
- self._flush()
1186
- self._write(f'<!--{_escape_cdata_c14n(text)}-->')
1187
- if not self._root_seen:
1188
- self._write('\n')
1189
-
1190
- def pi(self, target, data):
1191
- if self._ignored_depth:
1192
- return
1193
- if self._root_done:
1194
- self._write('\n')
1195
- elif self._root_seen and self._data:
1196
- self._flush()
1197
- self._write(
1198
- f'<?{target} {_escape_cdata_c14n(data)}?>' if data else f'<?{target}?>')
1199
- if not self._root_seen:
1200
- self._write('\n')
1201
-
1202
- def close(self):
1203
- return None
1204
-
1205
-
1206
- cdef _raise_serialization_error(text):
1207
- raise TypeError("cannot serialize %r (type %s)" % (text, type(text).__name__))
1208
-
1209
-
1210
- cdef unicode _escape_cdata_c14n(stext):
1211
- # escape character data
1212
- cdef unicode text
1213
- cdef Py_UCS4 ch
1214
- cdef Py_ssize_t start = 0, pos = 0
1215
- cdef list substrings = None
1216
- try:
1217
- text = unicode(stext)
1218
- except (TypeError, AttributeError):
1219
- return _raise_serialization_error(stext)
1220
-
1221
- for pos, ch in enumerate(text):
1222
- if ch == '&':
1223
- escape = '&amp;'
1224
- elif ch == '<':
1225
- escape = '&lt;'
1226
- elif ch == '>':
1227
- escape = '&gt;'
1228
- elif ch == '\r':
1229
- escape = '&#xD;'
1230
- else:
1231
- continue
1232
-
1233
- if substrings is None:
1234
- substrings = []
1235
- if pos > start:
1236
- substrings.append(text[start:pos])
1237
- substrings.append(escape)
1238
- start = pos + 1
1239
-
1240
- if substrings is None:
1241
- return text
1242
- if pos >= start:
1243
- substrings.append(text[start:pos+1])
1244
- return ''.join(substrings)
1245
-
1246
-
1247
- cdef unicode _escape_attrib_c14n(stext):
1248
- # escape attribute value
1249
- cdef unicode text
1250
- cdef Py_UCS4 ch
1251
- cdef Py_ssize_t start = 0, pos = 0
1252
- cdef list substrings = None
1253
- try:
1254
- text = unicode(stext)
1255
- except (TypeError, AttributeError):
1256
- return _raise_serialization_error(stext)
1257
-
1258
- for pos, ch in enumerate(text):
1259
- if ch == '&':
1260
- escape = '&amp;'
1261
- elif ch == '<':
1262
- escape = '&lt;'
1263
- elif ch == '"':
1264
- escape = '&quot;'
1265
- elif ch == '\t':
1266
- escape = '&#x9;'
1267
- elif ch == '\n':
1268
- escape = '&#xA;'
1269
- elif ch == '\r':
1270
- escape = '&#xD;'
1271
- else:
1272
- continue
1273
-
1274
- if substrings is None:
1275
- substrings = []
1276
- if pos > start:
1277
- substrings.append(text[start:pos])
1278
- substrings.append(escape)
1279
- start = pos + 1
1280
-
1281
- if substrings is None:
1282
- return text
1283
- if pos >= start:
1284
- substrings.append(text[start:pos+1])
1285
- return ''.join(substrings)
1286
-
1287
-
1288
- # incremental serialisation
1289
-
1290
- cdef class xmlfile:
1291
- """xmlfile(self, output_file, encoding=None, compression=None, close=False, buffered=True)
1292
-
1293
- A simple mechanism for incremental XML serialisation.
1294
-
1295
- Usage example::
1296
-
1297
- with xmlfile("somefile.xml", encoding='utf-8') as xf:
1298
- xf.write_declaration(standalone=True)
1299
- xf.write_doctype('<!DOCTYPE root SYSTEM "some.dtd">')
1300
-
1301
- # generate an element (the root element)
1302
- with xf.element('root'):
1303
- # write a complete Element into the open root element
1304
- xf.write(etree.Element('test'))
1305
-
1306
- # generate and write more Elements, e.g. through iterparse
1307
- for element in generate_some_elements():
1308
- # serialise generated elements into the XML file
1309
- xf.write(element)
1310
-
1311
- # or write multiple Elements or strings at once
1312
- xf.write(etree.Element('start'), "text", etree.Element('end'))
1313
-
1314
- If 'output_file' is a file(-like) object, passing ``close=True`` will
1315
- close it when exiting the context manager. By default, it is left
1316
- to the owner to do that. When a file path is used, lxml will take care
1317
- of opening and closing the file itself. Also, when a compression level
1318
- is set, lxml will deliberately close the file to make sure all data gets
1319
- compressed and written.
1320
-
1321
- Setting ``buffered=False`` will flush the output after each operation,
1322
- such as opening or closing an ``xf.element()`` block or calling
1323
- ``xf.write()``. Alternatively, calling ``xf.flush()`` can be used to
1324
- explicitly flush any pending output when buffering is enabled.
1325
- """
1326
- cdef object output_file
1327
- cdef bytes encoding
1328
- cdef _IncrementalFileWriter writer
1329
- cdef _AsyncIncrementalFileWriter async_writer
1330
- cdef int compresslevel
1331
- cdef bint close
1332
- cdef bint buffered
1333
- cdef int method
1334
-
1335
- def __init__(self, output_file not None, encoding=None, compression=None,
1336
- close=False, buffered=True):
1337
- self.output_file = output_file
1338
- self.encoding = _utf8orNone(encoding)
1339
- self.compresslevel = compression or 0
1340
- self.close = close
1341
- self.buffered = buffered
1342
- self.method = OUTPUT_METHOD_XML
1343
-
1344
- def __enter__(self):
1345
- assert self.output_file is not None
1346
- self.writer = _IncrementalFileWriter(
1347
- self.output_file, self.encoding, self.compresslevel,
1348
- self.close, self.buffered, self.method)
1349
- return self.writer
1350
-
1351
- def __exit__(self, exc_type, exc_val, exc_tb):
1352
- if self.writer is not None:
1353
- old_writer, self.writer = self.writer, None
1354
- raise_on_error = exc_type is None
1355
- old_writer._close(raise_on_error)
1356
- if self.close:
1357
- self.output_file = None
1358
-
1359
- async def __aenter__(self):
1360
- assert self.output_file is not None
1361
- if isinstance(self.output_file, basestring):
1362
- raise TypeError("Cannot asynchronously write to a plain file")
1363
- if not hasattr(self.output_file, 'write'):
1364
- raise TypeError("Output file needs an async .write() method")
1365
- self.async_writer = _AsyncIncrementalFileWriter(
1366
- self.output_file, self.encoding, self.compresslevel,
1367
- self.close, self.buffered, self.method)
1368
- return self.async_writer
1369
-
1370
- async def __aexit__(self, exc_type, exc_val, exc_tb):
1371
- if self.async_writer is not None:
1372
- old_writer, self.async_writer = self.async_writer, None
1373
- raise_on_error = exc_type is None
1374
- await old_writer._close(raise_on_error)
1375
- if self.close:
1376
- self.output_file = None
1377
-
1378
-
1379
- cdef class htmlfile(xmlfile):
1380
- """htmlfile(self, output_file, encoding=None, compression=None, close=False, buffered=True)
1381
-
1382
- A simple mechanism for incremental HTML serialisation. Works the same as
1383
- xmlfile.
1384
- """
1385
- def __init__(self, *args, **kwargs):
1386
- super().__init__(*args, **kwargs)
1387
- self.method = OUTPUT_METHOD_HTML
1388
-
1389
-
1390
- cdef enum _IncrementalFileWriterStatus:
1391
- WRITER_STARTING = 0
1392
- WRITER_DECL_WRITTEN = 1
1393
- WRITER_DTD_WRITTEN = 2
1394
- WRITER_IN_ELEMENT = 3
1395
- WRITER_FINISHED = 4
1396
-
1397
-
1398
- @cython.final
1399
- @cython.internal
1400
- cdef class _IncrementalFileWriter:
1401
- cdef tree.xmlOutputBuffer* _c_out
1402
- cdef bytes _encoding
1403
- cdef const_char* _c_encoding
1404
- cdef _FilelikeWriter _target
1405
- cdef list _element_stack
1406
- cdef int _status
1407
- cdef int _method
1408
- cdef bint _buffered
1409
-
1410
- def __cinit__(self, outfile, bytes encoding, int compresslevel, bint close,
1411
- bint buffered, int method):
1412
- self._status = WRITER_STARTING
1413
- self._element_stack = []
1414
- if encoding is None:
1415
- encoding = b'ASCII'
1416
- self._encoding = encoding
1417
- self._c_encoding = _cstr(encoding) if encoding is not None else NULL
1418
- self._buffered = buffered
1419
- self._target = _create_output_buffer(
1420
- outfile, self._c_encoding, compresslevel, &self._c_out, close)
1421
- self._method = method
1422
-
1423
- def __dealloc__(self):
1424
- if self._c_out is not NULL:
1425
- tree.xmlOutputBufferClose(self._c_out)
1426
-
1427
- def write_declaration(self, version=None, standalone=None, doctype=None):
1428
- """write_declaration(self, version=None, standalone=None, doctype=None)
1429
-
1430
- Write an XML declaration and (optionally) a doctype into the file.
1431
- """
1432
- assert self._c_out is not NULL
1433
- cdef const_xmlChar* c_version
1434
- cdef int c_standalone
1435
- if self._method != OUTPUT_METHOD_XML:
1436
- raise LxmlSyntaxError("only XML documents have declarations")
1437
- if self._status >= WRITER_DECL_WRITTEN:
1438
- raise LxmlSyntaxError("XML declaration already written")
1439
- version = _utf8orNone(version)
1440
- c_version = _xcstr(version) if version is not None else NULL
1441
- doctype = _utf8orNone(doctype)
1442
- if standalone is None:
1443
- c_standalone = -1
1444
- else:
1445
- c_standalone = 1 if standalone else 0
1446
- _writeDeclarationToBuffer(self._c_out, c_version, self._c_encoding, c_standalone)
1447
- if doctype is not None:
1448
- _writeDoctype(self._c_out, _xcstr(doctype))
1449
- self._status = WRITER_DTD_WRITTEN
1450
- else:
1451
- self._status = WRITER_DECL_WRITTEN
1452
- if not self._buffered:
1453
- tree.xmlOutputBufferFlush(self._c_out)
1454
- self._handle_error(self._c_out.error)
1455
-
1456
- def write_doctype(self, doctype):
1457
- """write_doctype(self, doctype)
1458
-
1459
- Writes the given doctype declaration verbatimly into the file.
1460
- """
1461
- assert self._c_out is not NULL
1462
- if doctype is None:
1463
- return
1464
- if self._status >= WRITER_DTD_WRITTEN:
1465
- raise LxmlSyntaxError("DOCTYPE already written or cannot write it here")
1466
- doctype = _utf8(doctype)
1467
- _writeDoctype(self._c_out, _xcstr(doctype))
1468
- self._status = WRITER_DTD_WRITTEN
1469
- if not self._buffered:
1470
- tree.xmlOutputBufferFlush(self._c_out)
1471
- self._handle_error(self._c_out.error)
1472
-
1473
- def method(self, method):
1474
- """method(self, method)
1475
-
1476
- Returns a context manager that overrides and restores the output method.
1477
- method is one of (None, 'xml', 'html') where None means 'xml'.
1478
- """
1479
- assert self._c_out is not NULL
1480
- c_method = self._method if method is None else _findOutputMethod(method)
1481
- return _MethodChanger(self, c_method)
1482
-
1483
- def element(self, tag, attrib=None, nsmap=None, method=None, **_extra):
1484
- """element(self, tag, attrib=None, nsmap=None, method, **_extra)
1485
-
1486
- Returns a context manager that writes an opening and closing tag.
1487
- method is one of (None, 'xml', 'html') where None means 'xml'.
1488
- """
1489
- assert self._c_out is not NULL
1490
- attributes = []
1491
- if attrib is not None:
1492
- for name, value in _iter_attrib(attrib):
1493
- if name not in _extra:
1494
- ns, name = _getNsTag(name)
1495
- attributes.append((ns, name, _utf8(value)))
1496
- if _extra:
1497
- for name, value in _extra.iteritems():
1498
- ns, name = _getNsTag(name)
1499
- attributes.append((ns, name, _utf8(value)))
1500
- reversed_nsmap = {}
1501
- if nsmap:
1502
- for prefix, ns in nsmap.items():
1503
- if prefix is not None:
1504
- prefix = _utf8(prefix)
1505
- _prefixValidOrRaise(prefix)
1506
- reversed_nsmap[_utf8(ns)] = prefix
1507
- ns, name = _getNsTag(tag)
1508
-
1509
- c_method = self._method if method is None else _findOutputMethod(method)
1510
-
1511
- return _FileWriterElement(self, (ns, name, attributes, reversed_nsmap), c_method)
1512
-
1513
- cdef _write_qname(self, bytes name, bytes prefix):
1514
- if prefix: # empty bytes for no prefix (not None to allow sorting)
1515
- tree.xmlOutputBufferWrite(self._c_out, len(prefix), _cstr(prefix))
1516
- tree.xmlOutputBufferWrite(self._c_out, 1, ':')
1517
- tree.xmlOutputBufferWrite(self._c_out, len(name), _cstr(name))
1518
-
1519
- cdef _write_start_element(self, element_config):
1520
- if self._status > WRITER_IN_ELEMENT:
1521
- raise LxmlSyntaxError("cannot append trailing element to complete XML document")
1522
- ns, name, attributes, nsmap = element_config
1523
- flat_namespace_map, new_namespaces = self._collect_namespaces(nsmap)
1524
- prefix = self._find_prefix(ns, flat_namespace_map, new_namespaces)
1525
- tree.xmlOutputBufferWrite(self._c_out, 1, '<')
1526
- self._write_qname(name, prefix)
1527
-
1528
- self._write_attributes_and_namespaces(
1529
- attributes, flat_namespace_map, new_namespaces)
1530
-
1531
- tree.xmlOutputBufferWrite(self._c_out, 1, '>')
1532
- if not self._buffered:
1533
- tree.xmlOutputBufferFlush(self._c_out)
1534
- self._handle_error(self._c_out.error)
1535
-
1536
- self._element_stack.append((ns, name, prefix, flat_namespace_map))
1537
- self._status = WRITER_IN_ELEMENT
1538
-
1539
- cdef _write_attributes_and_namespaces(self, list attributes,
1540
- dict flat_namespace_map,
1541
- list new_namespaces):
1542
- if attributes:
1543
- # _find_prefix() may append to new_namespaces => build them first
1544
- attributes = [
1545
- (self._find_prefix(ns, flat_namespace_map, new_namespaces), name, value)
1546
- for ns, name, value in attributes ]
1547
- if new_namespaces:
1548
- new_namespaces.sort()
1549
- self._write_attributes_list(new_namespaces)
1550
- if attributes:
1551
- self._write_attributes_list(attributes)
1552
-
1553
- cdef _write_attributes_list(self, list attributes):
1554
- for prefix, name, value in attributes:
1555
- tree.xmlOutputBufferWrite(self._c_out, 1, ' ')
1556
- self._write_qname(name, prefix)
1557
- tree.xmlOutputBufferWrite(self._c_out, 2, '="')
1558
- _write_attr_string(self._c_out, _cstr(value))
1559
-
1560
- tree.xmlOutputBufferWrite(self._c_out, 1, '"')
1561
-
1562
- cdef _write_end_element(self, element_config):
1563
- if self._status != WRITER_IN_ELEMENT:
1564
- raise LxmlSyntaxError("not in an element")
1565
- if not self._element_stack or self._element_stack[-1][:2] != element_config[:2]:
1566
- raise LxmlSyntaxError("inconsistent exit action in context manager")
1567
-
1568
- # If previous write operations failed, the context manager exit might still call us.
1569
- # That is ok, but we stop writing closing tags and handling errors in that case.
1570
- # For all non-I/O errors, we continue writing closing tags if we can.
1571
- ok_to_write = self._c_out.error == xmlerror.XML_ERR_OK
1572
-
1573
- name, prefix = self._element_stack.pop()[1:3]
1574
- if ok_to_write:
1575
- tree.xmlOutputBufferWrite(self._c_out, 2, '</')
1576
- self._write_qname(name, prefix)
1577
- tree.xmlOutputBufferWrite(self._c_out, 1, '>')
1578
-
1579
- if not self._element_stack:
1580
- self._status = WRITER_FINISHED
1581
- if ok_to_write:
1582
- if not self._buffered:
1583
- tree.xmlOutputBufferFlush(self._c_out)
1584
- self._handle_error(self._c_out.error)
1585
-
1586
- cdef _find_prefix(self, bytes href, dict flat_namespaces_map, list new_namespaces):
1587
- if href is None:
1588
- return None
1589
- if href in flat_namespaces_map:
1590
- return flat_namespaces_map[href]
1591
- # need to create a new prefix
1592
- prefixes = flat_namespaces_map.values()
1593
- i = 0
1594
- while True:
1595
- prefix = _utf8('ns%d' % i)
1596
- if prefix not in prefixes:
1597
- new_namespaces.append((b'xmlns', prefix, href))
1598
- flat_namespaces_map[href] = prefix
1599
- return prefix
1600
- i += 1
1601
-
1602
- cdef _collect_namespaces(self, dict nsmap):
1603
- new_namespaces = []
1604
- flat_namespaces_map = {}
1605
- for ns, prefix in nsmap.iteritems():
1606
- flat_namespaces_map[ns] = prefix
1607
- if prefix is None:
1608
- # use empty bytes rather than None to allow sorting
1609
- new_namespaces.append((b'', b'xmlns', ns))
1610
- else:
1611
- new_namespaces.append((b'xmlns', prefix, ns))
1612
- # merge in flat namespace map of parent
1613
- if self._element_stack:
1614
- for ns, prefix in (<dict>self._element_stack[-1][-1]).iteritems():
1615
- if flat_namespaces_map.get(ns) is None:
1616
- # unknown or empty prefix => prefer a 'real' prefix
1617
- flat_namespaces_map[ns] = prefix
1618
- return flat_namespaces_map, new_namespaces
1619
-
1620
- def write(self, *args, bint with_tail=True, bint pretty_print=False, method=None):
1621
- """write(self, *args, with_tail=True, pretty_print=False, method=None)
1622
-
1623
- Write subtrees or strings into the file.
1624
-
1625
- If method is not None, it should be one of ('html', 'xml', 'text')
1626
- to temporarily override the output method.
1627
- """
1628
- assert self._c_out is not NULL
1629
- c_method = self._method if method is None else _findOutputMethod(method)
1630
-
1631
- for content in args:
1632
- if _isString(content):
1633
- if self._status != WRITER_IN_ELEMENT:
1634
- if self._status > WRITER_IN_ELEMENT or content.strip():
1635
- raise LxmlSyntaxError("not in an element")
1636
- bstring = _utf8(content)
1637
- if not bstring:
1638
- continue
1639
-
1640
- ns, name, _, _ = self._element_stack[-1]
1641
- if (c_method == OUTPUT_METHOD_HTML and
1642
- ns in (None, b'http://www.w3.org/1999/xhtml') and
1643
- name in (b'script', b'style')):
1644
- tree.xmlOutputBufferWrite(self._c_out, len(bstring), _cstr(bstring))
1645
-
1646
- else:
1647
- tree.xmlOutputBufferWriteEscape(self._c_out, _xcstr(bstring), NULL)
1648
-
1649
- elif iselement(content):
1650
- if self._status > WRITER_IN_ELEMENT:
1651
- raise LxmlSyntaxError("cannot append trailing element to complete XML document")
1652
- _writeNodeToBuffer(self._c_out, (<_Element>content)._c_node,
1653
- self._c_encoding, NULL, c_method,
1654
- False, False, pretty_print, with_tail, False)
1655
- if (<_Element>content)._c_node.type == tree.XML_ELEMENT_NODE:
1656
- if not self._element_stack:
1657
- self._status = WRITER_FINISHED
1658
-
1659
- elif content is not None:
1660
- raise TypeError(
1661
- f"got invalid input value of type {type(content)}, expected string or Element")
1662
- self._handle_error(self._c_out.error)
1663
- if not self._buffered:
1664
- tree.xmlOutputBufferFlush(self._c_out)
1665
- self._handle_error(self._c_out.error)
1666
-
1667
- def flush(self):
1668
- """flush(self)
1669
-
1670
- Write any pending content of the current output buffer to the stream.
1671
- """
1672
- assert self._c_out is not NULL
1673
- tree.xmlOutputBufferFlush(self._c_out)
1674
- self._handle_error(self._c_out.error)
1675
-
1676
- cdef _close(self, bint raise_on_error):
1677
- if raise_on_error:
1678
- if self._status < WRITER_IN_ELEMENT:
1679
- raise LxmlSyntaxError("no content written")
1680
- if self._element_stack:
1681
- raise LxmlSyntaxError("pending open tags on close")
1682
- error_result = self._c_out.error
1683
- if error_result == xmlerror.XML_ERR_OK:
1684
- error_result = tree.xmlOutputBufferClose(self._c_out)
1685
- if error_result != -1:
1686
- error_result = xmlerror.XML_ERR_OK
1687
- else:
1688
- tree.xmlOutputBufferClose(self._c_out)
1689
- self._status = WRITER_FINISHED
1690
- self._c_out = NULL
1691
- del self._element_stack[:]
1692
- if raise_on_error:
1693
- self._handle_error(error_result)
1694
-
1695
- cdef _handle_error(self, int error_result):
1696
- if error_result != xmlerror.XML_ERR_OK:
1697
- if self._target is not None:
1698
- self._target._exc_context._raise_if_stored()
1699
- _raiseSerialisationError(error_result)
1700
-
1701
-
1702
- @cython.final
1703
- @cython.internal
1704
- cdef class _AsyncDataWriter:
1705
- cdef list _data
1706
- def __cinit__(self):
1707
- self._data = []
1708
-
1709
- cdef bytes collect(self):
1710
- data = b''.join(self._data)
1711
- del self._data[:]
1712
- return data
1713
-
1714
- def write(self, data):
1715
- self._data.append(data)
1716
-
1717
- def close(self):
1718
- pass
1719
-
1720
-
1721
- @cython.final
1722
- @cython.internal
1723
- cdef class _AsyncIncrementalFileWriter:
1724
- cdef _IncrementalFileWriter _writer
1725
- cdef _AsyncDataWriter _buffer
1726
- cdef object _async_outfile
1727
- cdef int _flush_after_writes
1728
- cdef bint _should_close
1729
- cdef bint _buffered
1730
-
1731
- def __cinit__(self, async_outfile, bytes encoding, int compresslevel, bint close,
1732
- bint buffered, int method):
1733
- self._flush_after_writes = 20
1734
- self._async_outfile = async_outfile
1735
- self._should_close = close
1736
- self._buffered = buffered
1737
- self._buffer = _AsyncDataWriter()
1738
- self._writer = _IncrementalFileWriter(
1739
- self._buffer, encoding, compresslevel, close=True, buffered=False, method=method)
1740
-
1741
- cdef bytes _flush(self):
1742
- if not self._buffered or len(self._buffer._data) > self._flush_after_writes:
1743
- return self._buffer.collect()
1744
- return None
1745
-
1746
- async def flush(self):
1747
- self._writer.flush()
1748
- data = self._buffer.collect()
1749
- if data:
1750
- await self._async_outfile.write(data)
1751
-
1752
- async def write_declaration(self, version=None, standalone=None, doctype=None):
1753
- self._writer.write_declaration(version, standalone, doctype)
1754
- data = self._flush()
1755
- if data:
1756
- await self._async_outfile.write(data)
1757
-
1758
- async def write_doctype(self, doctype):
1759
- self._writer.write_doctype(doctype)
1760
- data = self._flush()
1761
- if data:
1762
- await self._async_outfile.write(data)
1763
-
1764
- async def write(self, *args, with_tail=True, pretty_print=False, method=None):
1765
- self._writer.write(*args, with_tail=with_tail, pretty_print=pretty_print, method=method)
1766
- data = self._flush()
1767
- if data:
1768
- await self._async_outfile.write(data)
1769
-
1770
- def method(self, method):
1771
- return self._writer.method(method)
1772
-
1773
- def element(self, tag, attrib=None, nsmap=None, method=None, **_extra):
1774
- element_writer = self._writer.element(tag, attrib, nsmap, method, **_extra)
1775
- return _AsyncFileWriterElement(element_writer, self)
1776
-
1777
- async def _close(self, bint raise_on_error):
1778
- self._writer._close(raise_on_error)
1779
- data = self._buffer.collect()
1780
- if data:
1781
- await self._async_outfile.write(data)
1782
- if self._should_close:
1783
- await self._async_outfile.close()
1784
-
1785
-
1786
- @cython.final
1787
- @cython.internal
1788
- cdef class _AsyncFileWriterElement:
1789
- cdef _FileWriterElement _element_writer
1790
- cdef _AsyncIncrementalFileWriter _writer
1791
-
1792
- def __cinit__(self, _FileWriterElement element_writer not None,
1793
- _AsyncIncrementalFileWriter writer not None):
1794
- self._element_writer = element_writer
1795
- self._writer = writer
1796
-
1797
- async def __aenter__(self):
1798
- self._element_writer.__enter__()
1799
- data = self._writer._flush()
1800
- if data:
1801
- await self._writer._async_outfile.write(data)
1802
-
1803
- async def __aexit__(self, *args):
1804
- self._element_writer.__exit__(*args)
1805
- data = self._writer._flush()
1806
- if data:
1807
- await self._writer._async_outfile.write(data)
1808
-
1809
-
1810
- @cython.final
1811
- @cython.internal
1812
- @cython.freelist(8)
1813
- cdef class _FileWriterElement:
1814
- cdef _IncrementalFileWriter _writer
1815
- cdef object _element
1816
- cdef int _new_method
1817
- cdef int _old_method
1818
-
1819
- def __cinit__(self, _IncrementalFileWriter writer not None, element_config, int method):
1820
- self._writer = writer
1821
- self._element = element_config
1822
- self._new_method = method
1823
- self._old_method = writer._method
1824
-
1825
- def __enter__(self):
1826
- self._writer._method = self._new_method
1827
- self._writer._write_start_element(self._element)
1828
-
1829
- def __exit__(self, exc_type, exc_val, exc_tb):
1830
- self._writer._write_end_element(self._element)
1831
- self._writer._method = self._old_method
1832
-
1833
-
1834
- @cython.final
1835
- @cython.internal
1836
- @cython.freelist(8)
1837
- cdef class _MethodChanger:
1838
- cdef _IncrementalFileWriter _writer
1839
- cdef int _new_method
1840
- cdef int _old_method
1841
- cdef bint _entered
1842
- cdef bint _exited
1843
-
1844
- def __cinit__(self, _IncrementalFileWriter writer not None, int method):
1845
- self._writer = writer
1846
- self._new_method = method
1847
- self._old_method = writer._method
1848
- self._entered = False
1849
- self._exited = False
1850
-
1851
- def __enter__(self):
1852
- if self._entered:
1853
- raise LxmlSyntaxError("Inconsistent enter action in context manager")
1854
- self._writer._method = self._new_method
1855
- self._entered = True
1856
-
1857
- def __exit__(self, exc_type, exc_val, exc_tb):
1858
- if self._exited:
1859
- raise LxmlSyntaxError("Inconsistent exit action in context manager")
1860
- if self._writer._method != self._new_method:
1861
- raise LxmlSyntaxError("Method changed outside of context manager")
1862
- self._writer._method = self._old_method
1863
- self._exited = True
1864
-
1865
- async def __aenter__(self):
1866
- # for your async convenience
1867
- return self.__enter__()
1868
-
1869
- async def __aexit__(self, *args):
1870
- # for your async convenience
1871
- return self.__exit__(*args)
1
+ # XML serialization and output functions
2
+
3
+ cdef object GzipFile
4
+ from gzip import GzipFile
5
+
6
+
7
+ cdef class SerialisationError(LxmlError):
8
+ """A libxml2 error that occurred during serialisation.
9
+ """
10
+
11
+
12
+ cdef enum _OutputMethods:
13
+ OUTPUT_METHOD_XML
14
+ OUTPUT_METHOD_HTML
15
+ OUTPUT_METHOD_TEXT
16
+
17
+
18
+ cdef int _findOutputMethod(method) except -1:
19
+ if method is None:
20
+ return OUTPUT_METHOD_XML
21
+ method = method.lower()
22
+ if method == "xml":
23
+ return OUTPUT_METHOD_XML
24
+ if method == "html":
25
+ return OUTPUT_METHOD_HTML
26
+ if method == "text":
27
+ return OUTPUT_METHOD_TEXT
28
+ raise ValueError(f"unknown output method {method!r}")
29
+
30
+
31
+ cdef _textToString(xmlNode* c_node, encoding, bint with_tail):
32
+ cdef bint needs_conversion
33
+ cdef const_xmlChar* c_text
34
+ cdef xmlNode* c_text_node
35
+ cdef tree.xmlBuffer* c_buffer
36
+ cdef int error_result
37
+
38
+ c_buffer = tree.xmlBufferCreate()
39
+ if c_buffer is NULL:
40
+ raise MemoryError()
41
+
42
+ with nogil:
43
+ error_result = tree.xmlNodeBufGetContent(c_buffer, c_node)
44
+ if with_tail:
45
+ c_text_node = _textNodeOrSkip(c_node.next)
46
+ while c_text_node is not NULL:
47
+ tree.xmlBufferWriteChar(c_buffer, <const_char*>c_text_node.content)
48
+ c_text_node = _textNodeOrSkip(c_text_node.next)
49
+ c_text = tree.xmlBufferContent(c_buffer)
50
+
51
+ if error_result < 0 or c_text is NULL:
52
+ tree.xmlBufferFree(c_buffer)
53
+ raise SerialisationError, "Error during serialisation (out of memory?)"
54
+
55
+ try:
56
+ needs_conversion = 0
57
+ if encoding is unicode:
58
+ needs_conversion = 1
59
+ elif encoding is not None:
60
+ # Python prefers lower case encoding names
61
+ encoding = encoding.lower()
62
+ if encoding not in ('utf8', 'utf-8'):
63
+ if encoding == 'ascii':
64
+ if isutf8l(c_text, tree.xmlBufferLength(c_buffer)):
65
+ # will raise a decode error below
66
+ needs_conversion = 1
67
+ else:
68
+ needs_conversion = 1
69
+
70
+ if needs_conversion:
71
+ text = (<const_char*>c_text)[:tree.xmlBufferLength(c_buffer)].decode('utf8')
72
+ if encoding is not unicode:
73
+ encoding = _utf8(encoding)
74
+ text = python.PyUnicode_AsEncodedString(
75
+ text, encoding, 'strict')
76
+ else:
77
+ text = (<unsigned char*>c_text)[:tree.xmlBufferLength(c_buffer)]
78
+ finally:
79
+ tree.xmlBufferFree(c_buffer)
80
+ return text
81
+
82
+
83
+ cdef _tostring(_Element element, encoding, doctype, method,
84
+ bint write_xml_declaration, bint write_complete_document,
85
+ bint pretty_print, bint with_tail, int standalone):
86
+ """Serialize an element to an encoded string representation of its XML
87
+ tree.
88
+ """
89
+ cdef tree.xmlOutputBuffer* c_buffer
90
+ cdef tree.xmlBuf* c_result_buffer
91
+ cdef tree.xmlCharEncodingHandler* enchandler
92
+ cdef const_char* c_enc
93
+ cdef const_xmlChar* c_version
94
+ cdef const_xmlChar* c_doctype
95
+ cdef int c_method
96
+ cdef int error_result
97
+ if element is None:
98
+ return None
99
+ _assertValidNode(element)
100
+ c_method = _findOutputMethod(method)
101
+ if c_method == OUTPUT_METHOD_TEXT:
102
+ return _textToString(element._c_node, encoding, with_tail)
103
+ if encoding is None or encoding is unicode:
104
+ c_enc = NULL
105
+ else:
106
+ encoding = _utf8(encoding)
107
+ c_enc = _cstr(encoding)
108
+ if doctype is None:
109
+ c_doctype = NULL
110
+ else:
111
+ doctype = _utf8(doctype)
112
+ c_doctype = _xcstr(doctype)
113
+ # it is necessary to *and* find the encoding handler *and* use
114
+ # encoding during output
115
+ enchandler = tree.xmlFindCharEncodingHandler(c_enc)
116
+ if enchandler is NULL and c_enc is not NULL:
117
+ if encoding is not None:
118
+ encoding = encoding.decode('UTF-8')
119
+ raise LookupError, f"unknown encoding: '{encoding}'"
120
+ c_buffer = tree.xmlAllocOutputBuffer(enchandler)
121
+ if c_buffer is NULL:
122
+ tree.xmlCharEncCloseFunc(enchandler)
123
+ raise MemoryError()
124
+
125
+ with nogil:
126
+ _writeNodeToBuffer(c_buffer, element._c_node, c_enc, c_doctype, c_method,
127
+ write_xml_declaration, write_complete_document,
128
+ pretty_print, with_tail, standalone)
129
+ tree.xmlOutputBufferFlush(c_buffer)
130
+ if c_buffer.conv is not NULL:
131
+ c_result_buffer = c_buffer.conv
132
+ else:
133
+ c_result_buffer = c_buffer.buffer
134
+
135
+ error_result = c_buffer.error
136
+ if error_result != xmlerror.XML_ERR_OK:
137
+ tree.xmlOutputBufferClose(c_buffer)
138
+ _raiseSerialisationError(error_result)
139
+
140
+ try:
141
+ if encoding is unicode:
142
+ result = (<unsigned char*>tree.xmlBufContent(
143
+ c_result_buffer))[:tree.xmlBufUse(c_result_buffer)].decode('UTF-8')
144
+ else:
145
+ result = <bytes>(<unsigned char*>tree.xmlBufContent(
146
+ c_result_buffer))[:tree.xmlBufUse(c_result_buffer)]
147
+ finally:
148
+ error_result = tree.xmlOutputBufferClose(c_buffer)
149
+ if error_result == -1:
150
+ _raiseSerialisationError(error_result)
151
+ return result
152
+
153
+ cdef bytes _tostringC14N(element_or_tree, bint exclusive, bint with_comments, inclusive_ns_prefixes):
154
+ cdef xmlDoc* c_doc
155
+ cdef xmlChar* c_buffer = NULL
156
+ cdef int byte_count = -1
157
+ cdef bytes result
158
+ cdef _Document doc
159
+ cdef _Element element
160
+ cdef xmlChar **c_inclusive_ns_prefixes
161
+
162
+ if isinstance(element_or_tree, _Element):
163
+ _assertValidNode(<_Element>element_or_tree)
164
+ doc = (<_Element>element_or_tree)._doc
165
+ c_doc = _plainFakeRootDoc(doc._c_doc, (<_Element>element_or_tree)._c_node, 0)
166
+ else:
167
+ doc = _documentOrRaise(element_or_tree)
168
+ _assertValidDoc(doc)
169
+ c_doc = doc._c_doc
170
+
171
+ c_inclusive_ns_prefixes = _convert_ns_prefixes(c_doc.dict, inclusive_ns_prefixes) if inclusive_ns_prefixes else NULL
172
+ try:
173
+ with nogil:
174
+ byte_count = c14n.xmlC14NDocDumpMemory(
175
+ c_doc, NULL, exclusive, c_inclusive_ns_prefixes, with_comments, &c_buffer)
176
+
177
+ finally:
178
+ _destroyFakeDoc(doc._c_doc, c_doc)
179
+ if c_inclusive_ns_prefixes is not NULL:
180
+ python.lxml_free(c_inclusive_ns_prefixes)
181
+
182
+ if byte_count < 0 or c_buffer is NULL:
183
+ if c_buffer is not NULL:
184
+ tree.xmlFree(c_buffer)
185
+ raise C14NError, "C14N failed"
186
+ try:
187
+ result = c_buffer[:byte_count]
188
+ finally:
189
+ tree.xmlFree(c_buffer)
190
+ return result
191
+
192
+ cdef _raiseSerialisationError(int error_result):
193
+ if error_result == xmlerror.XML_ERR_NO_MEMORY:
194
+ raise MemoryError()
195
+ message = ErrorTypes._getName(error_result)
196
+ if message is None:
197
+ message = f"unknown error {error_result}"
198
+ raise SerialisationError, message
199
+
200
+ ############################################################
201
+ # low-level serialisation functions
202
+
203
+ cdef void _writeDoctype(tree.xmlOutputBuffer* c_buffer,
204
+ const_xmlChar* c_doctype) noexcept nogil:
205
+ tree.xmlOutputBufferWrite(c_buffer, tree.xmlStrlen(c_doctype),
206
+ <const_char*>c_doctype)
207
+ tree.xmlOutputBufferWriteString(c_buffer, "\n")
208
+
209
+ cdef void _writeNodeToBuffer(tree.xmlOutputBuffer* c_buffer,
210
+ xmlNode* c_node, const_char* encoding, const_xmlChar* c_doctype,
211
+ int c_method, bint write_xml_declaration,
212
+ bint write_complete_document,
213
+ bint pretty_print, bint with_tail,
214
+ int standalone) noexcept nogil:
215
+ cdef xmlNode* c_nsdecl_node
216
+ cdef xmlDoc* c_doc = c_node.doc
217
+ if write_xml_declaration and c_method == OUTPUT_METHOD_XML:
218
+ _writeDeclarationToBuffer(c_buffer, c_doc.version, encoding, standalone)
219
+
220
+ # comments/processing instructions before doctype declaration
221
+ if write_complete_document and not c_buffer.error and c_doc.intSubset:
222
+ _writePrevSiblings(c_buffer, <xmlNode*>c_doc.intSubset, encoding, pretty_print)
223
+
224
+ if c_doctype:
225
+ _writeDoctype(c_buffer, c_doctype)
226
+ # write internal DTD subset, preceding PIs/comments, etc.
227
+ if write_complete_document and not c_buffer.error:
228
+ if c_doctype is NULL:
229
+ _writeDtdToBuffer(c_buffer, c_doc, c_node.name, c_method, encoding)
230
+ _writePrevSiblings(c_buffer, c_node, encoding, pretty_print)
231
+
232
+ c_nsdecl_node = c_node
233
+ if not c_node.parent or c_node.parent.type != tree.XML_DOCUMENT_NODE:
234
+ # copy the node and add namespaces from parents
235
+ # this is required to make libxml write them
236
+ c_nsdecl_node = tree.xmlCopyNode(c_node, 2)
237
+ if not c_nsdecl_node:
238
+ c_buffer.error = xmlerror.XML_ERR_NO_MEMORY
239
+ return
240
+ _copyParentNamespaces(c_node, c_nsdecl_node)
241
+
242
+ c_nsdecl_node.parent = c_node.parent
243
+ c_nsdecl_node.children = c_node.children
244
+ c_nsdecl_node.last = c_node.last
245
+
246
+ # write node
247
+ if c_method == OUTPUT_METHOD_HTML:
248
+ tree.htmlNodeDumpFormatOutput(
249
+ c_buffer, c_doc, c_nsdecl_node, encoding, pretty_print)
250
+ else:
251
+ tree.xmlNodeDumpOutput(
252
+ c_buffer, c_doc, c_nsdecl_node, 0, pretty_print, encoding)
253
+
254
+ if c_nsdecl_node is not c_node:
255
+ # clean up
256
+ c_nsdecl_node.children = c_nsdecl_node.last = NULL
257
+ tree.xmlFreeNode(c_nsdecl_node)
258
+
259
+ if c_buffer.error:
260
+ return
261
+
262
+ # write tail, trailing comments, etc.
263
+ if with_tail:
264
+ _writeTail(c_buffer, c_node, encoding, c_method, pretty_print)
265
+ if write_complete_document:
266
+ _writeNextSiblings(c_buffer, c_node, encoding, pretty_print)
267
+ if pretty_print:
268
+ tree.xmlOutputBufferWrite(c_buffer, 1, "\n")
269
+
270
+ cdef void _writeDeclarationToBuffer(tree.xmlOutputBuffer* c_buffer,
271
+ const_xmlChar* version, const_char* encoding,
272
+ int standalone) noexcept nogil:
273
+ if version is NULL:
274
+ version = <unsigned char*>"1.0"
275
+ tree.xmlOutputBufferWrite(c_buffer, 15, "<?xml version='")
276
+ tree.xmlOutputBufferWriteString(c_buffer, <const_char*>version)
277
+ tree.xmlOutputBufferWrite(c_buffer, 12, "' encoding='")
278
+ tree.xmlOutputBufferWriteString(c_buffer, encoding)
279
+ if standalone == 0:
280
+ tree.xmlOutputBufferWrite(c_buffer, 20, "' standalone='no'?>\n")
281
+ elif standalone == 1:
282
+ tree.xmlOutputBufferWrite(c_buffer, 21, "' standalone='yes'?>\n")
283
+ else:
284
+ tree.xmlOutputBufferWrite(c_buffer, 4, "'?>\n")
285
+
286
+ cdef void _writeDtdToBuffer(tree.xmlOutputBuffer* c_buffer,
287
+ xmlDoc* c_doc, const_xmlChar* c_root_name,
288
+ int c_method, const_char* encoding) noexcept nogil:
289
+ cdef tree.xmlDtd* c_dtd
290
+ cdef xmlNode* c_node
291
+ cdef char* quotechar
292
+ c_dtd = c_doc.intSubset
293
+ if not c_dtd or not c_dtd.name:
294
+ return
295
+
296
+ # Name in document type declaration must match the root element tag.
297
+ # For XML, case sensitive match, for HTML insensitive.
298
+ if c_method == OUTPUT_METHOD_HTML:
299
+ if tree.xmlStrcasecmp(c_root_name, c_dtd.name) != 0:
300
+ return
301
+ else:
302
+ if tree.xmlStrcmp(c_root_name, c_dtd.name) != 0:
303
+ return
304
+
305
+ tree.xmlOutputBufferWrite(c_buffer, 10, "<!DOCTYPE ")
306
+ tree.xmlOutputBufferWriteString(c_buffer, <const_char*>c_dtd.name)
307
+
308
+ cdef const_xmlChar* public_id = c_dtd.ExternalID
309
+ cdef const_xmlChar* sys_url = c_dtd.SystemID
310
+ if public_id and public_id[0] == b'\0':
311
+ public_id = NULL
312
+ if sys_url and sys_url[0] == b'\0':
313
+ sys_url = NULL
314
+
315
+ if public_id:
316
+ tree.xmlOutputBufferWrite(c_buffer, 9, ' PUBLIC "')
317
+ tree.xmlOutputBufferWriteString(c_buffer, <const_char*>public_id)
318
+ if sys_url:
319
+ tree.xmlOutputBufferWrite(c_buffer, 2, '" ')
320
+ else:
321
+ tree.xmlOutputBufferWrite(c_buffer, 1, '"')
322
+ elif sys_url:
323
+ tree.xmlOutputBufferWrite(c_buffer, 8, ' SYSTEM ')
324
+
325
+ if sys_url:
326
+ if tree.xmlStrchr(sys_url, b'"'):
327
+ quotechar = '\''
328
+ else:
329
+ quotechar = '"'
330
+ tree.xmlOutputBufferWrite(c_buffer, 1, quotechar)
331
+ tree.xmlOutputBufferWriteString(c_buffer, <const_char*>sys_url)
332
+ tree.xmlOutputBufferWrite(c_buffer, 1, quotechar)
333
+
334
+ if (not c_dtd.entities and not c_dtd.elements and
335
+ not c_dtd.attributes and not c_dtd.notations and
336
+ not c_dtd.pentities):
337
+ tree.xmlOutputBufferWrite(c_buffer, 2, '>\n')
338
+ return
339
+
340
+ tree.xmlOutputBufferWrite(c_buffer, 3, ' [\n')
341
+ if c_dtd.notations and not c_buffer.error:
342
+ c_buf = tree.xmlBufferCreate()
343
+ if not c_buf:
344
+ c_buffer.error = xmlerror.XML_ERR_NO_MEMORY
345
+ return
346
+ tree.xmlDumpNotationTable(c_buf, <tree.xmlNotationTable*>c_dtd.notations)
347
+ tree.xmlOutputBufferWrite(
348
+ c_buffer, tree.xmlBufferLength(c_buf),
349
+ <const_char*>tree.xmlBufferContent(c_buf))
350
+ tree.xmlBufferFree(c_buf)
351
+ c_node = c_dtd.children
352
+ while c_node and not c_buffer.error:
353
+ tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_node, 0, 0, encoding)
354
+ c_node = c_node.next
355
+ tree.xmlOutputBufferWrite(c_buffer, 3, "]>\n")
356
+
357
+ cdef void _writeTail(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node,
358
+ const_char* encoding, int c_method, bint pretty_print) noexcept nogil:
359
+ "Write the element tail."
360
+ c_node = c_node.next
361
+ while c_node and not c_buffer.error and c_node.type in (
362
+ tree.XML_TEXT_NODE, tree.XML_CDATA_SECTION_NODE):
363
+ if c_method == OUTPUT_METHOD_HTML:
364
+ tree.htmlNodeDumpFormatOutput(
365
+ c_buffer, c_node.doc, c_node, encoding, pretty_print)
366
+ else:
367
+ tree.xmlNodeDumpOutput(
368
+ c_buffer, c_node.doc, c_node, 0, pretty_print, encoding)
369
+ c_node = c_node.next
370
+
371
+ cdef void _writePrevSiblings(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node,
372
+ const_char* encoding, bint pretty_print) noexcept nogil:
373
+ cdef xmlNode* c_sibling
374
+ if c_node.parent and _isElement(c_node.parent):
375
+ return
376
+ # we are at a root node, so add PI and comment siblings
377
+ c_sibling = c_node
378
+ while c_sibling.prev and \
379
+ (c_sibling.prev.type == tree.XML_PI_NODE or
380
+ c_sibling.prev.type == tree.XML_COMMENT_NODE):
381
+ c_sibling = c_sibling.prev
382
+ while c_sibling is not c_node and not c_buffer.error:
383
+ tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_sibling, 0,
384
+ pretty_print, encoding)
385
+ if pretty_print:
386
+ tree.xmlOutputBufferWriteString(c_buffer, "\n")
387
+ c_sibling = c_sibling.next
388
+
389
+ cdef void _writeNextSiblings(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node,
390
+ const_char* encoding, bint pretty_print) noexcept nogil:
391
+ cdef xmlNode* c_sibling
392
+ if c_node.parent and _isElement(c_node.parent):
393
+ return
394
+ # we are at a root node, so add PI and comment siblings
395
+ c_sibling = c_node.next
396
+ while not c_buffer.error and c_sibling and \
397
+ (c_sibling.type == tree.XML_PI_NODE or
398
+ c_sibling.type == tree.XML_COMMENT_NODE):
399
+ if pretty_print:
400
+ tree.xmlOutputBufferWriteString(c_buffer, "\n")
401
+ tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_sibling, 0,
402
+ pretty_print, encoding)
403
+ c_sibling = c_sibling.next
404
+
405
+
406
+ # copied and adapted from libxml2
407
+ cdef unsigned char *xmlSerializeHexCharRef(unsigned char *out, int val) noexcept:
408
+ cdef xmlChar *ptr
409
+ cdef const xmlChar* hexdigits = b"0123456789ABCDEF"
410
+
411
+ out[0] = b'&'
412
+ out += 1
413
+ out[0] = b'#'
414
+ out += 1
415
+ out[0] = b'x'
416
+ out += 1
417
+
418
+ if val < 0x10:
419
+ ptr = out
420
+ elif val < 0x100:
421
+ ptr = out + 1
422
+ elif val < 0x1000:
423
+ ptr = out + 2
424
+ elif val < 0x10000:
425
+ ptr = out + 3
426
+ elif val < 0x100000:
427
+ ptr = out + 4
428
+ else:
429
+ ptr = out + 5
430
+
431
+ out = ptr + 1
432
+ while val > 0:
433
+ ptr[0] = hexdigits[val & 0xF]
434
+ ptr -= 1
435
+ val >>= 4
436
+
437
+ out[0] = b';'
438
+ out += 1
439
+ out[0] = 0
440
+
441
+ return out
442
+
443
+
444
+ # copied and adapted from libxml2 (xmlBufAttrSerializeTxtContent())
445
+ cdef _write_attr_string(tree.xmlOutputBuffer* buf, const char *string):
446
+ cdef const char *base
447
+ cdef const char *cur
448
+ cdef const unsigned char *ucur
449
+
450
+ cdef unsigned char tmp[12]
451
+ cdef int val = 0
452
+ cdef int l
453
+
454
+ if string == NULL:
455
+ return
456
+
457
+ base = cur = <const char*>string
458
+ while cur[0] != 0:
459
+ if cur[0] == b'\n':
460
+ if base != cur:
461
+ tree.xmlOutputBufferWrite(buf, cur - base, base)
462
+
463
+ tree.xmlOutputBufferWrite(buf, 5, "&#10;")
464
+ cur += 1
465
+ base = cur
466
+
467
+ elif cur[0] == b'\r':
468
+ if base != cur:
469
+ tree.xmlOutputBufferWrite(buf, cur - base, base)
470
+
471
+ tree.xmlOutputBufferWrite(buf, 5, "&#13;")
472
+ cur += 1
473
+ base = cur
474
+
475
+ elif cur[0] == b'\t':
476
+ if base != cur:
477
+ tree.xmlOutputBufferWrite(buf, cur - base, base)
478
+
479
+ tree.xmlOutputBufferWrite(buf, 4, "&#9;")
480
+ cur += 1
481
+ base = cur
482
+
483
+ elif cur[0] == b'"':
484
+ if base != cur:
485
+ tree.xmlOutputBufferWrite(buf, cur - base, base)
486
+
487
+ tree.xmlOutputBufferWrite(buf, 6, "&quot;")
488
+ cur += 1
489
+ base = cur
490
+
491
+ elif cur[0] == b'<':
492
+ if base != cur:
493
+ tree.xmlOutputBufferWrite(buf, cur - base, base)
494
+
495
+ tree.xmlOutputBufferWrite(buf, 4, "&lt;")
496
+ cur += 1
497
+ base = cur
498
+
499
+ elif cur[0] == b'>':
500
+ if base != cur:
501
+ tree.xmlOutputBufferWrite(buf, cur - base, base)
502
+
503
+ tree.xmlOutputBufferWrite(buf, 4, "&gt;")
504
+ cur += 1
505
+ base = cur
506
+ elif cur[0] == b'&':
507
+ if base != cur:
508
+ tree.xmlOutputBufferWrite(buf, cur - base, base)
509
+
510
+ tree.xmlOutputBufferWrite(buf, 5, "&amp;")
511
+ cur += 1
512
+ base = cur
513
+
514
+ elif (<const unsigned char>cur[0] >= 0x80) and (cur[1] != 0):
515
+
516
+ if base != cur:
517
+ tree.xmlOutputBufferWrite(buf, cur - base, base)
518
+
519
+ ucur = <const unsigned char *>cur
520
+
521
+ if ucur[0] < 0xC0:
522
+ # invalid UTF-8 sequence
523
+ val = ucur[0]
524
+ l = 1
525
+
526
+ elif ucur[0] < 0xE0:
527
+ val = (ucur[0]) & 0x1F
528
+ val <<= 6
529
+ val |= (ucur[1]) & 0x3F
530
+ l = 2
531
+
532
+ elif (ucur[0] < 0xF0) and (ucur[2] != 0):
533
+ val = (ucur[0]) & 0x0F
534
+ val <<= 6
535
+ val |= (ucur[1]) & 0x3F
536
+ val <<= 6
537
+ val |= (ucur[2]) & 0x3F
538
+ l = 3
539
+
540
+ elif (ucur[0] < 0xF8) and (ucur[2] != 0) and (ucur[3] != 0):
541
+ val = (ucur[0]) & 0x07
542
+ val <<= 6
543
+ val |= (ucur[1]) & 0x3F
544
+ val <<= 6
545
+ val |= (ucur[2]) & 0x3F
546
+ val <<= 6
547
+ val |= (ucur[3]) & 0x3F
548
+ l = 4
549
+ else:
550
+ # invalid UTF-8 sequence
551
+ val = ucur[0]
552
+ l = 1
553
+
554
+ if (l == 1) or (not tree.xmlIsCharQ(val)):
555
+ raise ValueError(f"Invalid character: {val:X}")
556
+
557
+ # We could do multiple things here. Just save
558
+ # as a char ref
559
+ xmlSerializeHexCharRef(tmp, val)
560
+ tree.xmlOutputBufferWrite(buf, len(tmp), <const char*> tmp)
561
+ cur += l
562
+ base = cur
563
+
564
+ else:
565
+ cur += 1
566
+
567
+ if base != cur:
568
+ tree.xmlOutputBufferWrite(buf, cur - base, base)
569
+
570
+
571
+ ############################################################
572
+ # output to file-like objects
573
+
574
+ cdef object io_open
575
+ from io import open as io_open
576
+
577
+ cdef object gzip
578
+ import gzip
579
+
580
+ cdef object getwriter
581
+ from codecs import getwriter
582
+ cdef object utf8_writer = getwriter('utf8')
583
+
584
+ cdef object contextmanager
585
+ from contextlib import contextmanager
586
+
587
+ cdef object _open_utf8_file
588
+
589
+ @contextmanager
590
+ def _open_utf8_file(file, compression=0):
591
+ file = _getFSPathOrObject(file)
592
+ if _isString(file):
593
+ if compression:
594
+ with gzip.GzipFile(file, mode='wb', compresslevel=compression) as zf:
595
+ yield utf8_writer(zf)
596
+ else:
597
+ with io_open(file, 'w', encoding='utf8') as f:
598
+ yield f
599
+ else:
600
+ if compression:
601
+ with gzip.GzipFile(fileobj=file, mode='wb', compresslevel=compression) as zf:
602
+ yield utf8_writer(zf)
603
+ else:
604
+ yield utf8_writer(file)
605
+
606
+
607
+ @cython.final
608
+ @cython.internal
609
+ cdef class _FilelikeWriter:
610
+ cdef object _filelike
611
+ cdef object _close_filelike
612
+ cdef _ExceptionContext _exc_context
613
+ cdef _ErrorLog error_log
614
+ def __cinit__(self, filelike, exc_context=None, compression=None, close=False):
615
+ if compression is not None and compression > 0:
616
+ filelike = GzipFile(
617
+ fileobj=filelike, mode='wb', compresslevel=compression)
618
+ self._close_filelike = filelike.close
619
+ elif close:
620
+ self._close_filelike = filelike.close
621
+ self._filelike = filelike
622
+ if exc_context is None:
623
+ self._exc_context = _ExceptionContext()
624
+ else:
625
+ self._exc_context = exc_context
626
+ self.error_log = _ErrorLog()
627
+
628
+ cdef tree.xmlOutputBuffer* _createOutputBuffer(
629
+ self, tree.xmlCharEncodingHandler* enchandler) except NULL:
630
+ cdef tree.xmlOutputBuffer* c_buffer
631
+ c_buffer = tree.xmlOutputBufferCreateIO(
632
+ <tree.xmlOutputWriteCallback>_writeFilelikeWriter, _closeFilelikeWriter,
633
+ <python.PyObject*>self, enchandler)
634
+ if c_buffer is NULL:
635
+ raise IOError, "Could not create I/O writer context."
636
+ return c_buffer
637
+
638
+ cdef int write(self, char* c_buffer, int size) noexcept:
639
+ try:
640
+ if self._filelike is None:
641
+ raise IOError, "File is already closed"
642
+ py_buffer = <bytes>c_buffer[:size]
643
+ self._filelike.write(py_buffer)
644
+ except:
645
+ size = -1
646
+ self._exc_context._store_raised()
647
+ finally:
648
+ return size # and swallow any further exceptions
649
+
650
+ cdef int close(self) noexcept:
651
+ retval = 0
652
+ try:
653
+ if self._close_filelike is not None:
654
+ self._close_filelike()
655
+ # we should not close the file here as we didn't open it
656
+ self._filelike = None
657
+ except:
658
+ retval = -1
659
+ self._exc_context._store_raised()
660
+ finally:
661
+ return retval # and swallow any further exceptions
662
+
663
+ cdef int _writeFilelikeWriter(void* ctxt, char* c_buffer, int length) noexcept:
664
+ return (<_FilelikeWriter>ctxt).write(c_buffer, length)
665
+
666
+ cdef int _closeFilelikeWriter(void* ctxt) noexcept:
667
+ return (<_FilelikeWriter>ctxt).close()
668
+
669
+ cdef _tofilelike(f, _Element element, encoding, doctype, method,
670
+ bint write_xml_declaration, bint write_doctype,
671
+ bint pretty_print, bint with_tail, int standalone,
672
+ int compression):
673
+ cdef _FilelikeWriter writer = None
674
+ cdef tree.xmlOutputBuffer* c_buffer
675
+ cdef tree.xmlCharEncodingHandler* enchandler
676
+ cdef const_char* c_enc
677
+ cdef const_xmlChar* c_doctype
678
+ cdef int error_result
679
+
680
+ c_method = _findOutputMethod(method)
681
+ if c_method == OUTPUT_METHOD_TEXT:
682
+ data = _textToString(element._c_node, encoding, with_tail)
683
+ if compression:
684
+ bytes_out = BytesIO()
685
+ with GzipFile(fileobj=bytes_out, mode='wb', compresslevel=compression) as gzip_file:
686
+ gzip_file.write(data)
687
+ data = bytes_out.getvalue()
688
+ f = _getFSPathOrObject(f)
689
+ if _isString(f):
690
+ filename8 = _encodeFilename(f)
691
+ with open(filename8, 'wb') as f:
692
+ f.write(data)
693
+ else:
694
+ f.write(data)
695
+ return
696
+
697
+ if encoding is None:
698
+ c_enc = NULL
699
+ else:
700
+ encoding = _utf8(encoding)
701
+ c_enc = _cstr(encoding)
702
+ if doctype is None:
703
+ c_doctype = NULL
704
+ else:
705
+ doctype = _utf8(doctype)
706
+ c_doctype = _xcstr(doctype)
707
+
708
+ writer = _create_output_buffer(f, c_enc, compression, &c_buffer, close=False)
709
+ if writer is None:
710
+ with nogil:
711
+ error_result = _serialise_node(
712
+ c_buffer, c_doctype, c_enc, element._c_node, c_method,
713
+ write_xml_declaration, write_doctype, pretty_print, with_tail, standalone)
714
+ else:
715
+ error_result = _serialise_node(
716
+ c_buffer, c_doctype, c_enc, element._c_node, c_method,
717
+ write_xml_declaration, write_doctype, pretty_print, with_tail, standalone)
718
+
719
+ if writer is not None:
720
+ writer._exc_context._raise_if_stored()
721
+ if error_result != xmlerror.XML_ERR_OK:
722
+ _raiseSerialisationError(error_result)
723
+
724
+
725
+ cdef int _serialise_node(tree.xmlOutputBuffer* c_buffer, const_xmlChar* c_doctype,
726
+ const_char* c_enc, xmlNode* c_node, int c_method,
727
+ bint write_xml_declaration, bint write_doctype, bint pretty_print,
728
+ bint with_tail, int standalone) noexcept nogil:
729
+ _writeNodeToBuffer(
730
+ c_buffer, c_node, c_enc, c_doctype, c_method,
731
+ write_xml_declaration, write_doctype, pretty_print, with_tail, standalone)
732
+ error_result = c_buffer.error
733
+ if error_result == xmlerror.XML_ERR_OK:
734
+ error_result = tree.xmlOutputBufferClose(c_buffer)
735
+ if error_result != -1:
736
+ error_result = xmlerror.XML_ERR_OK
737
+ else:
738
+ tree.xmlOutputBufferClose(c_buffer)
739
+ return error_result
740
+
741
+
742
+ cdef _FilelikeWriter _create_output_buffer(
743
+ f, const_char* c_enc, int c_compression,
744
+ tree.xmlOutputBuffer** c_buffer_ret, bint close):
745
+ cdef tree.xmlOutputBuffer* c_buffer
746
+ cdef _FilelikeWriter writer
747
+ cdef bytes filename8
748
+ enchandler = tree.xmlFindCharEncodingHandler(c_enc)
749
+ if enchandler is NULL:
750
+ raise LookupError(
751
+ f"unknown encoding: '{c_enc.decode('UTF-8') if c_enc is not NULL else u''}'")
752
+ try:
753
+ f = _getFSPathOrObject(f)
754
+ if _isString(f):
755
+ filename8 = _encodeFilename(f)
756
+ if b'%' in filename8 and (
757
+ # Exclude absolute Windows paths and file:// URLs.
758
+ _isFilePath(<const xmlChar*>filename8) not in (NO_FILE_PATH, ABS_WIN_FILE_PATH)
759
+ or filename8[:7].lower() == b'file://'):
760
+ # A file path (not a URL) containing the '%' URL escape character.
761
+ # libxml2 uses URL-unescaping on these, so escape the path before passing it in.
762
+ filename8 = filename8.replace(b'%', b'%25')
763
+ c_buffer = tree.xmlOutputBufferCreateFilename(
764
+ _cstr(filename8), enchandler, c_compression)
765
+ if c_buffer is NULL:
766
+ python.PyErr_SetFromErrno(IOError) # raises IOError
767
+ writer = None
768
+ elif hasattr(f, 'write'):
769
+ writer = _FilelikeWriter(f, compression=c_compression, close=close)
770
+ c_buffer = writer._createOutputBuffer(enchandler)
771
+ else:
772
+ raise TypeError(
773
+ f"File or filename expected, got '{python._fqtypename(f).decode('UTF-8')}'")
774
+ except:
775
+ tree.xmlCharEncCloseFunc(enchandler)
776
+ raise
777
+ c_buffer_ret[0] = c_buffer
778
+ return writer
779
+
780
+ cdef xmlChar **_convert_ns_prefixes(tree.xmlDict* c_dict, ns_prefixes) except NULL:
781
+ cdef size_t i, num_ns_prefixes = len(ns_prefixes)
782
+ # Need to allocate one extra memory block to handle last NULL entry
783
+ c_ns_prefixes = <xmlChar **>python.lxml_malloc(num_ns_prefixes + 1, sizeof(xmlChar*))
784
+ if not c_ns_prefixes:
785
+ raise MemoryError()
786
+ i = 0
787
+ try:
788
+ for prefix in ns_prefixes:
789
+ prefix_utf = _utf8(prefix)
790
+ c_prefix = tree.xmlDictExists(c_dict, _xcstr(prefix_utf), len(prefix_utf))
791
+ if c_prefix:
792
+ # unknown prefixes do not need to get serialised
793
+ c_ns_prefixes[i] = <xmlChar*>c_prefix
794
+ i += 1
795
+ except:
796
+ python.lxml_free(c_ns_prefixes)
797
+ raise
798
+
799
+ c_ns_prefixes[i] = NULL # append end marker
800
+ return c_ns_prefixes
801
+
802
+ cdef _tofilelikeC14N(f, _Element element, bint exclusive, bint with_comments,
803
+ int compression, inclusive_ns_prefixes):
804
+ cdef _FilelikeWriter writer = None
805
+ cdef tree.xmlOutputBuffer* c_buffer
806
+ cdef xmlChar **c_inclusive_ns_prefixes = NULL
807
+ cdef char* c_filename
808
+ cdef xmlDoc* c_base_doc
809
+ cdef xmlDoc* c_doc
810
+ cdef int bytes_count, error = 0
811
+
812
+ c_base_doc = element._c_node.doc
813
+ c_doc = _fakeRootDoc(c_base_doc, element._c_node)
814
+ try:
815
+ c_inclusive_ns_prefixes = (
816
+ _convert_ns_prefixes(c_doc.dict, inclusive_ns_prefixes)
817
+ if inclusive_ns_prefixes else NULL)
818
+
819
+ f = _getFSPathOrObject(f)
820
+ if _isString(f):
821
+ filename8 = _encodeFilename(f)
822
+ c_filename = _cstr(filename8)
823
+ with nogil:
824
+ error = c14n.xmlC14NDocSave(
825
+ c_doc, NULL, exclusive, c_inclusive_ns_prefixes,
826
+ with_comments, c_filename, compression)
827
+ elif hasattr(f, 'write'):
828
+ writer = _FilelikeWriter(f, compression=compression)
829
+ c_buffer = writer._createOutputBuffer(NULL)
830
+ try:
831
+ with writer.error_log:
832
+ bytes_count = c14n.xmlC14NDocSaveTo(
833
+ c_doc, NULL, exclusive, c_inclusive_ns_prefixes,
834
+ with_comments, c_buffer)
835
+ finally:
836
+ error = tree.xmlOutputBufferClose(c_buffer)
837
+ if bytes_count < 0:
838
+ error = bytes_count
839
+ elif error != -1:
840
+ error = xmlerror.XML_ERR_OK
841
+ else:
842
+ raise TypeError(f"File or filename expected, got '{python._fqtypename(f).decode('UTF-8')}'")
843
+ finally:
844
+ _destroyFakeDoc(c_base_doc, c_doc)
845
+ if c_inclusive_ns_prefixes is not NULL:
846
+ python.lxml_free(c_inclusive_ns_prefixes)
847
+
848
+ if writer is not None:
849
+ writer._exc_context._raise_if_stored()
850
+
851
+ if error < 0:
852
+ message = "C14N failed"
853
+ if writer is not None:
854
+ errors = writer.error_log
855
+ if len(errors):
856
+ message = errors[0].message
857
+ raise C14NError(message)
858
+
859
+
860
+ # C14N 2.0
861
+
862
+ def canonicalize(xml_data=None, *, out=None, from_file=None, **options):
863
+ """Convert XML to its C14N 2.0 serialised form.
864
+
865
+ If *out* is provided, it must be a file or file-like object that receives
866
+ the serialised canonical XML output (text, not bytes) through its ``.write()``
867
+ method. To write to a file, open it in text mode with encoding "utf-8".
868
+ If *out* is not provided, this function returns the output as text string.
869
+
870
+ Either *xml_data* (an XML string, tree or Element) or *file*
871
+ (a file path or file-like object) must be provided as input.
872
+
873
+ The configuration options are the same as for the ``C14NWriterTarget``.
874
+ """
875
+ if xml_data is None and from_file is None:
876
+ raise ValueError("Either 'xml_data' or 'from_file' must be provided as input")
877
+
878
+ sio = None
879
+ if out is None:
880
+ sio = out = StringIO()
881
+
882
+ target = C14NWriterTarget(out.write, **options)
883
+
884
+ if xml_data is not None and not isinstance(xml_data, basestring):
885
+ _tree_to_target(xml_data, target)
886
+ return sio.getvalue() if sio is not None else None
887
+
888
+ cdef _FeedParser parser = XMLParser(
889
+ target=target,
890
+ attribute_defaults=True,
891
+ collect_ids=False,
892
+ )
893
+
894
+ if xml_data is not None:
895
+ parser.feed(xml_data)
896
+ parser.close()
897
+ elif from_file is not None:
898
+ try:
899
+ _parseDocument(from_file, parser, base_url=None)
900
+ except _TargetParserResult:
901
+ pass
902
+
903
+ return sio.getvalue() if sio is not None else None
904
+
905
+
906
+ cdef _tree_to_target(element, target):
907
+ for event, elem in iterwalk(element, events=('start', 'end', 'start-ns', 'comment', 'pi')):
908
+ text = None
909
+ if event == 'start':
910
+ target.start(elem.tag, elem.attrib)
911
+ text = elem.text
912
+ elif event == 'end':
913
+ target.end(elem.tag)
914
+ text = elem.tail
915
+ elif event == 'start-ns':
916
+ target.start_ns(*elem)
917
+ continue
918
+ elif event == 'comment':
919
+ target.comment(elem.text)
920
+ text = elem.tail
921
+ elif event == 'pi':
922
+ target.pi(elem.target, elem.text)
923
+ text = elem.tail
924
+ if text:
925
+ target.data(text)
926
+ return target.close()
927
+
928
+
929
+ cdef object _looks_like_prefix_name = re.compile(r'^\w+:\w+$', re.UNICODE).match
930
+
931
+
932
+ cdef class C14NWriterTarget:
933
+ """
934
+ Canonicalization writer target for the XMLParser.
935
+
936
+ Serialises parse events to XML C14N 2.0.
937
+
938
+ Configuration options:
939
+
940
+ - *with_comments*: set to true to include comments
941
+ - *strip_text*: set to true to strip whitespace before and after text content
942
+ - *rewrite_prefixes*: set to true to replace namespace prefixes by "n{number}"
943
+ - *qname_aware_tags*: a set of qname aware tag names in which prefixes
944
+ should be replaced in text content
945
+ - *qname_aware_attrs*: a set of qname aware attribute names in which prefixes
946
+ should be replaced in text content
947
+ - *exclude_attrs*: a set of attribute names that should not be serialised
948
+ - *exclude_tags*: a set of tag names that should not be serialised
949
+ """
950
+ cdef object _write
951
+ cdef list _data
952
+ cdef set _qname_aware_tags
953
+ cdef object _find_qname_aware_attrs
954
+ cdef list _declared_ns_stack
955
+ cdef list _ns_stack
956
+ cdef dict _prefix_map
957
+ cdef list _preserve_space
958
+ cdef tuple _pending_start
959
+ cdef set _exclude_tags
960
+ cdef set _exclude_attrs
961
+ cdef Py_ssize_t _ignored_depth
962
+ cdef bint _with_comments
963
+ cdef bint _strip_text
964
+ cdef bint _rewrite_prefixes
965
+ cdef bint _root_seen
966
+ cdef bint _root_done
967
+
968
+ def __init__(self, write, *,
969
+ with_comments=False, strip_text=False, rewrite_prefixes=False,
970
+ qname_aware_tags=None, qname_aware_attrs=None,
971
+ exclude_attrs=None, exclude_tags=None):
972
+ self._write = write
973
+ self._data = []
974
+ self._with_comments = with_comments
975
+ self._strip_text = strip_text
976
+ self._exclude_attrs = set(exclude_attrs) if exclude_attrs else None
977
+ self._exclude_tags = set(exclude_tags) if exclude_tags else None
978
+
979
+ self._rewrite_prefixes = rewrite_prefixes
980
+ if qname_aware_tags:
981
+ self._qname_aware_tags = set(qname_aware_tags)
982
+ else:
983
+ self._qname_aware_tags = None
984
+ if qname_aware_attrs:
985
+ self._find_qname_aware_attrs = set(qname_aware_attrs).intersection
986
+ else:
987
+ self._find_qname_aware_attrs = None
988
+
989
+ # Stack with globally and newly declared namespaces as (uri, prefix) pairs.
990
+ self._declared_ns_stack = [[
991
+ ("http://www.w3.org/XML/1998/namespace", "xml"),
992
+ ]]
993
+ # Stack with user declared namespace prefixes as (uri, prefix) pairs.
994
+ self._ns_stack = []
995
+ if not rewrite_prefixes:
996
+ self._ns_stack.append(_DEFAULT_NAMESPACE_PREFIXES_ITEMS)
997
+ self._ns_stack.append([])
998
+ self._prefix_map = {}
999
+ self._preserve_space = [False]
1000
+ self._pending_start = None
1001
+ self._ignored_depth = 0
1002
+ self._root_seen = False
1003
+ self._root_done = False
1004
+
1005
+ def _iter_namespaces(self, ns_stack):
1006
+ for namespaces in reversed(ns_stack):
1007
+ if namespaces: # almost no element declares new namespaces
1008
+ yield from namespaces
1009
+
1010
+ cdef _resolve_prefix_name(self, prefixed_name):
1011
+ prefix, name = prefixed_name.split(':', 1)
1012
+ for uri, p in self._iter_namespaces(self._ns_stack):
1013
+ if p == prefix:
1014
+ return f'{{{uri}}}{name}'
1015
+ raise ValueError(f'Prefix {prefix} of QName "{prefixed_name}" is not declared in scope')
1016
+
1017
+ cdef _qname(self, qname, uri=None):
1018
+ if uri is None:
1019
+ uri, tag = qname[1:].rsplit('}', 1) if qname[:1] == '{' else ('', qname)
1020
+ else:
1021
+ tag = qname
1022
+
1023
+ prefixes_seen = set()
1024
+ for u, prefix in self._iter_namespaces(self._declared_ns_stack):
1025
+ if u == uri and prefix not in prefixes_seen:
1026
+ return f'{prefix}:{tag}' if prefix else tag, tag, uri
1027
+ prefixes_seen.add(prefix)
1028
+
1029
+ # Not declared yet => add new declaration.
1030
+ if self._rewrite_prefixes:
1031
+ if uri in self._prefix_map:
1032
+ prefix = self._prefix_map[uri]
1033
+ else:
1034
+ prefix = self._prefix_map[uri] = f'n{len(self._prefix_map)}'
1035
+ self._declared_ns_stack[-1].append((uri, prefix))
1036
+ return f'{prefix}:{tag}', tag, uri
1037
+
1038
+ if not uri and '' not in prefixes_seen:
1039
+ # No default namespace declared => no prefix needed.
1040
+ return tag, tag, uri
1041
+
1042
+ for u, prefix in self._iter_namespaces(self._ns_stack):
1043
+ if u == uri:
1044
+ self._declared_ns_stack[-1].append((uri, prefix))
1045
+ return f'{prefix}:{tag}' if prefix else tag, tag, uri
1046
+
1047
+ if not uri:
1048
+ # As soon as a default namespace is defined,
1049
+ # anything that has no namespace (and thus, no prefix) goes there.
1050
+ return tag, tag, uri
1051
+
1052
+ raise ValueError(f'Namespace "{uri}" of name "{tag}" is not declared in scope')
1053
+
1054
+ def data(self, data):
1055
+ if not self._ignored_depth:
1056
+ self._data.append(data)
1057
+
1058
+ cdef _flush(self):
1059
+ cdef unicode data = ''.join(self._data)
1060
+ del self._data[:]
1061
+ if self._strip_text and not self._preserve_space[-1]:
1062
+ data = data.strip()
1063
+ if self._pending_start is not None:
1064
+ (tag, attrs, new_namespaces), self._pending_start = self._pending_start, None
1065
+ qname_text = data if ':' in data and _looks_like_prefix_name(data) else None
1066
+ self._start(tag, attrs, new_namespaces, qname_text)
1067
+ if qname_text is not None:
1068
+ return
1069
+ if data and self._root_seen:
1070
+ self._write(_escape_cdata_c14n(data))
1071
+
1072
+ def start_ns(self, prefix, uri):
1073
+ if self._ignored_depth:
1074
+ return
1075
+ # we may have to resolve qnames in text content
1076
+ if self._data:
1077
+ self._flush()
1078
+ self._ns_stack[-1].append((uri, prefix))
1079
+
1080
+ def start(self, tag, attrs):
1081
+ if self._exclude_tags is not None and (
1082
+ self._ignored_depth or tag in self._exclude_tags):
1083
+ self._ignored_depth += 1
1084
+ return
1085
+ if self._data:
1086
+ self._flush()
1087
+
1088
+ new_namespaces = []
1089
+ self._declared_ns_stack.append(new_namespaces)
1090
+
1091
+ if self._qname_aware_tags is not None and tag in self._qname_aware_tags:
1092
+ # Need to parse text first to see if it requires a prefix declaration.
1093
+ self._pending_start = (tag, attrs, new_namespaces)
1094
+ return
1095
+ self._start(tag, attrs, new_namespaces)
1096
+
1097
+ cdef _start(self, tag, attrs, new_namespaces, qname_text=None):
1098
+ if self._exclude_attrs is not None and attrs:
1099
+ attrs = {k: v for k, v in attrs.items() if k not in self._exclude_attrs}
1100
+
1101
+ qnames = {tag, *attrs}
1102
+ resolved_names = {}
1103
+
1104
+ # Resolve prefixes in attribute and tag text.
1105
+ if qname_text is not None:
1106
+ qname = resolved_names[qname_text] = self._resolve_prefix_name(qname_text)
1107
+ qnames.add(qname)
1108
+ if self._find_qname_aware_attrs is not None and attrs:
1109
+ qattrs = self._find_qname_aware_attrs(attrs)
1110
+ if qattrs:
1111
+ for attr_name in qattrs:
1112
+ value = attrs[attr_name]
1113
+ if _looks_like_prefix_name(value):
1114
+ qname = resolved_names[value] = self._resolve_prefix_name(value)
1115
+ qnames.add(qname)
1116
+ else:
1117
+ qattrs = None
1118
+ else:
1119
+ qattrs = None
1120
+
1121
+ # Assign prefixes in lexicographical order of used URIs.
1122
+ parsed_qnames = {n: self._qname(n) for n in sorted(
1123
+ qnames, key=lambda n: n.split('}', 1))}
1124
+
1125
+ # Write namespace declarations in prefix order ...
1126
+ if new_namespaces:
1127
+ attr_list = [
1128
+ ('xmlns:' + prefix if prefix else 'xmlns', uri)
1129
+ for uri, prefix in new_namespaces
1130
+ ]
1131
+ attr_list.sort()
1132
+ else:
1133
+ # almost always empty
1134
+ attr_list = []
1135
+
1136
+ # ... followed by attributes in URI+name order
1137
+ if attrs:
1138
+ for k, v in sorted(attrs.items()):
1139
+ if qattrs is not None and k in qattrs and v in resolved_names:
1140
+ v = parsed_qnames[resolved_names[v]][0]
1141
+ attr_qname, attr_name, uri = parsed_qnames[k]
1142
+ # No prefix for attributes in default ('') namespace.
1143
+ attr_list.append((attr_qname if uri else attr_name, v))
1144
+
1145
+ # Honour xml:space attributes.
1146
+ space_behaviour = attrs.get('{http://www.w3.org/XML/1998/namespace}space')
1147
+ self._preserve_space.append(
1148
+ space_behaviour == 'preserve' if space_behaviour
1149
+ else self._preserve_space[-1])
1150
+
1151
+ # Write the tag.
1152
+ write = self._write
1153
+ write('<' + parsed_qnames[tag][0])
1154
+ if attr_list:
1155
+ write(''.join([f' {k}="{_escape_attrib_c14n(v)}"' for k, v in attr_list]))
1156
+ write('>')
1157
+
1158
+ # Write the resolved qname text content.
1159
+ if qname_text is not None:
1160
+ write(_escape_cdata_c14n(parsed_qnames[resolved_names[qname_text]][0]))
1161
+
1162
+ self._root_seen = True
1163
+ self._ns_stack.append([])
1164
+
1165
+ def end(self, tag):
1166
+ if self._ignored_depth:
1167
+ self._ignored_depth -= 1
1168
+ return
1169
+ if self._data:
1170
+ self._flush()
1171
+ self._write(f'</{self._qname(tag)[0]}>')
1172
+ self._preserve_space.pop()
1173
+ self._root_done = len(self._preserve_space) == 1
1174
+ self._declared_ns_stack.pop()
1175
+ self._ns_stack.pop()
1176
+
1177
+ def comment(self, text):
1178
+ if not self._with_comments:
1179
+ return
1180
+ if self._ignored_depth:
1181
+ return
1182
+ if self._root_done:
1183
+ self._write('\n')
1184
+ elif self._root_seen and self._data:
1185
+ self._flush()
1186
+ self._write(f'<!--{_escape_cdata_c14n(text)}-->')
1187
+ if not self._root_seen:
1188
+ self._write('\n')
1189
+
1190
+ def pi(self, target, data):
1191
+ if self._ignored_depth:
1192
+ return
1193
+ if self._root_done:
1194
+ self._write('\n')
1195
+ elif self._root_seen and self._data:
1196
+ self._flush()
1197
+ self._write(
1198
+ f'<?{target} {_escape_cdata_c14n(data)}?>' if data else f'<?{target}?>')
1199
+ if not self._root_seen:
1200
+ self._write('\n')
1201
+
1202
+ def close(self):
1203
+ return None
1204
+
1205
+
1206
+ cdef _raise_serialization_error(text):
1207
+ raise TypeError("cannot serialize %r (type %s)" % (text, type(text).__name__))
1208
+
1209
+
1210
+ cdef unicode _escape_cdata_c14n(stext):
1211
+ # escape character data
1212
+ cdef unicode text
1213
+ cdef Py_UCS4 ch
1214
+ cdef Py_ssize_t start = 0, pos = 0
1215
+ cdef list substrings = None
1216
+ try:
1217
+ text = unicode(stext)
1218
+ except (TypeError, AttributeError):
1219
+ return _raise_serialization_error(stext)
1220
+
1221
+ for pos, ch in enumerate(text):
1222
+ if ch == '&':
1223
+ escape = '&amp;'
1224
+ elif ch == '<':
1225
+ escape = '&lt;'
1226
+ elif ch == '>':
1227
+ escape = '&gt;'
1228
+ elif ch == '\r':
1229
+ escape = '&#xD;'
1230
+ else:
1231
+ continue
1232
+
1233
+ if substrings is None:
1234
+ substrings = []
1235
+ if pos > start:
1236
+ substrings.append(text[start:pos])
1237
+ substrings.append(escape)
1238
+ start = pos + 1
1239
+
1240
+ if substrings is None:
1241
+ return text
1242
+ if pos >= start:
1243
+ substrings.append(text[start:pos+1])
1244
+ return ''.join(substrings)
1245
+
1246
+
1247
+ cdef unicode _escape_attrib_c14n(stext):
1248
+ # escape attribute value
1249
+ cdef unicode text
1250
+ cdef Py_UCS4 ch
1251
+ cdef Py_ssize_t start = 0, pos = 0
1252
+ cdef list substrings = None
1253
+ try:
1254
+ text = unicode(stext)
1255
+ except (TypeError, AttributeError):
1256
+ return _raise_serialization_error(stext)
1257
+
1258
+ for pos, ch in enumerate(text):
1259
+ if ch == '&':
1260
+ escape = '&amp;'
1261
+ elif ch == '<':
1262
+ escape = '&lt;'
1263
+ elif ch == '"':
1264
+ escape = '&quot;'
1265
+ elif ch == '\t':
1266
+ escape = '&#x9;'
1267
+ elif ch == '\n':
1268
+ escape = '&#xA;'
1269
+ elif ch == '\r':
1270
+ escape = '&#xD;'
1271
+ else:
1272
+ continue
1273
+
1274
+ if substrings is None:
1275
+ substrings = []
1276
+ if pos > start:
1277
+ substrings.append(text[start:pos])
1278
+ substrings.append(escape)
1279
+ start = pos + 1
1280
+
1281
+ if substrings is None:
1282
+ return text
1283
+ if pos >= start:
1284
+ substrings.append(text[start:pos+1])
1285
+ return ''.join(substrings)
1286
+
1287
+
1288
+ # incremental serialisation
1289
+
1290
+ cdef class xmlfile:
1291
+ """xmlfile(self, output_file, encoding=None, compression=None, close=False, buffered=True)
1292
+
1293
+ A simple mechanism for incremental XML serialisation.
1294
+
1295
+ Usage example::
1296
+
1297
+ with xmlfile("somefile.xml", encoding='utf-8') as xf:
1298
+ xf.write_declaration(standalone=True)
1299
+ xf.write_doctype('<!DOCTYPE root SYSTEM "some.dtd">')
1300
+
1301
+ # generate an element (the root element)
1302
+ with xf.element('root'):
1303
+ # write a complete Element into the open root element
1304
+ xf.write(etree.Element('test'))
1305
+
1306
+ # generate and write more Elements, e.g. through iterparse
1307
+ for element in generate_some_elements():
1308
+ # serialise generated elements into the XML file
1309
+ xf.write(element)
1310
+
1311
+ # or write multiple Elements or strings at once
1312
+ xf.write(etree.Element('start'), "text", etree.Element('end'))
1313
+
1314
+ If 'output_file' is a file(-like) object, passing ``close=True`` will
1315
+ close it when exiting the context manager. By default, it is left
1316
+ to the owner to do that. When a file path is used, lxml will take care
1317
+ of opening and closing the file itself. Also, when a compression level
1318
+ is set, lxml will deliberately close the file to make sure all data gets
1319
+ compressed and written.
1320
+
1321
+ Setting ``buffered=False`` will flush the output after each operation,
1322
+ such as opening or closing an ``xf.element()`` block or calling
1323
+ ``xf.write()``. Alternatively, calling ``xf.flush()`` can be used to
1324
+ explicitly flush any pending output when buffering is enabled.
1325
+ """
1326
+ cdef object output_file
1327
+ cdef bytes encoding
1328
+ cdef _IncrementalFileWriter writer
1329
+ cdef _AsyncIncrementalFileWriter async_writer
1330
+ cdef int compresslevel
1331
+ cdef bint close
1332
+ cdef bint buffered
1333
+ cdef int method
1334
+
1335
+ def __init__(self, output_file not None, encoding=None, compression=None,
1336
+ close=False, buffered=True):
1337
+ self.output_file = output_file
1338
+ self.encoding = _utf8orNone(encoding)
1339
+ self.compresslevel = compression or 0
1340
+ self.close = close
1341
+ self.buffered = buffered
1342
+ self.method = OUTPUT_METHOD_XML
1343
+
1344
+ def __enter__(self):
1345
+ assert self.output_file is not None
1346
+ self.writer = _IncrementalFileWriter(
1347
+ self.output_file, self.encoding, self.compresslevel,
1348
+ self.close, self.buffered, self.method)
1349
+ return self.writer
1350
+
1351
+ def __exit__(self, exc_type, exc_val, exc_tb):
1352
+ if self.writer is not None:
1353
+ old_writer, self.writer = self.writer, None
1354
+ raise_on_error = exc_type is None
1355
+ old_writer._close(raise_on_error)
1356
+ if self.close:
1357
+ self.output_file = None
1358
+
1359
+ async def __aenter__(self):
1360
+ assert self.output_file is not None
1361
+ if isinstance(self.output_file, basestring):
1362
+ raise TypeError("Cannot asynchronously write to a plain file")
1363
+ if not hasattr(self.output_file, 'write'):
1364
+ raise TypeError("Output file needs an async .write() method")
1365
+ self.async_writer = _AsyncIncrementalFileWriter(
1366
+ self.output_file, self.encoding, self.compresslevel,
1367
+ self.close, self.buffered, self.method)
1368
+ return self.async_writer
1369
+
1370
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
1371
+ if self.async_writer is not None:
1372
+ old_writer, self.async_writer = self.async_writer, None
1373
+ raise_on_error = exc_type is None
1374
+ await old_writer._close(raise_on_error)
1375
+ if self.close:
1376
+ self.output_file = None
1377
+
1378
+
1379
+ cdef class htmlfile(xmlfile):
1380
+ """htmlfile(self, output_file, encoding=None, compression=None, close=False, buffered=True)
1381
+
1382
+ A simple mechanism for incremental HTML serialisation. Works the same as
1383
+ xmlfile.
1384
+ """
1385
+ def __init__(self, *args, **kwargs):
1386
+ super().__init__(*args, **kwargs)
1387
+ self.method = OUTPUT_METHOD_HTML
1388
+
1389
+
1390
+ cdef enum _IncrementalFileWriterStatus:
1391
+ WRITER_STARTING = 0
1392
+ WRITER_DECL_WRITTEN = 1
1393
+ WRITER_DTD_WRITTEN = 2
1394
+ WRITER_IN_ELEMENT = 3
1395
+ WRITER_FINISHED = 4
1396
+
1397
+
1398
+ @cython.final
1399
+ @cython.internal
1400
+ cdef class _IncrementalFileWriter:
1401
+ cdef tree.xmlOutputBuffer* _c_out
1402
+ cdef bytes _encoding
1403
+ cdef const_char* _c_encoding
1404
+ cdef _FilelikeWriter _target
1405
+ cdef list _element_stack
1406
+ cdef int _status
1407
+ cdef int _method
1408
+ cdef bint _buffered
1409
+
1410
+ def __cinit__(self, outfile, bytes encoding, int compresslevel, bint close,
1411
+ bint buffered, int method):
1412
+ self._status = WRITER_STARTING
1413
+ self._element_stack = []
1414
+ if encoding is None:
1415
+ encoding = b'ASCII'
1416
+ self._encoding = encoding
1417
+ self._c_encoding = _cstr(encoding) if encoding is not None else NULL
1418
+ self._buffered = buffered
1419
+ self._target = _create_output_buffer(
1420
+ outfile, self._c_encoding, compresslevel, &self._c_out, close)
1421
+ self._method = method
1422
+
1423
+ def __dealloc__(self):
1424
+ if self._c_out is not NULL:
1425
+ tree.xmlOutputBufferClose(self._c_out)
1426
+
1427
+ def write_declaration(self, version=None, standalone=None, doctype=None):
1428
+ """write_declaration(self, version=None, standalone=None, doctype=None)
1429
+
1430
+ Write an XML declaration and (optionally) a doctype into the file.
1431
+ """
1432
+ assert self._c_out is not NULL
1433
+ cdef const_xmlChar* c_version
1434
+ cdef int c_standalone
1435
+ if self._method != OUTPUT_METHOD_XML:
1436
+ raise LxmlSyntaxError("only XML documents have declarations")
1437
+ if self._status >= WRITER_DECL_WRITTEN:
1438
+ raise LxmlSyntaxError("XML declaration already written")
1439
+ version = _utf8orNone(version)
1440
+ c_version = _xcstr(version) if version is not None else NULL
1441
+ doctype = _utf8orNone(doctype)
1442
+ if standalone is None:
1443
+ c_standalone = -1
1444
+ else:
1445
+ c_standalone = 1 if standalone else 0
1446
+ _writeDeclarationToBuffer(self._c_out, c_version, self._c_encoding, c_standalone)
1447
+ if doctype is not None:
1448
+ _writeDoctype(self._c_out, _xcstr(doctype))
1449
+ self._status = WRITER_DTD_WRITTEN
1450
+ else:
1451
+ self._status = WRITER_DECL_WRITTEN
1452
+ if not self._buffered:
1453
+ tree.xmlOutputBufferFlush(self._c_out)
1454
+ self._handle_error(self._c_out.error)
1455
+
1456
+ def write_doctype(self, doctype):
1457
+ """write_doctype(self, doctype)
1458
+
1459
+ Writes the given doctype declaration verbatimly into the file.
1460
+ """
1461
+ assert self._c_out is not NULL
1462
+ if doctype is None:
1463
+ return
1464
+ if self._status >= WRITER_DTD_WRITTEN:
1465
+ raise LxmlSyntaxError("DOCTYPE already written or cannot write it here")
1466
+ doctype = _utf8(doctype)
1467
+ _writeDoctype(self._c_out, _xcstr(doctype))
1468
+ self._status = WRITER_DTD_WRITTEN
1469
+ if not self._buffered:
1470
+ tree.xmlOutputBufferFlush(self._c_out)
1471
+ self._handle_error(self._c_out.error)
1472
+
1473
+ def method(self, method):
1474
+ """method(self, method)
1475
+
1476
+ Returns a context manager that overrides and restores the output method.
1477
+ method is one of (None, 'xml', 'html') where None means 'xml'.
1478
+ """
1479
+ assert self._c_out is not NULL
1480
+ c_method = self._method if method is None else _findOutputMethod(method)
1481
+ return _MethodChanger(self, c_method)
1482
+
1483
+ def element(self, tag, attrib=None, nsmap=None, method=None, **_extra):
1484
+ """element(self, tag, attrib=None, nsmap=None, method, **_extra)
1485
+
1486
+ Returns a context manager that writes an opening and closing tag.
1487
+ method is one of (None, 'xml', 'html') where None means 'xml'.
1488
+ """
1489
+ assert self._c_out is not NULL
1490
+ attributes = []
1491
+ if attrib is not None:
1492
+ for name, value in _iter_attrib(attrib):
1493
+ if name not in _extra:
1494
+ ns, name = _getNsTag(name)
1495
+ attributes.append((ns, name, _utf8(value)))
1496
+ if _extra:
1497
+ for name, value in _extra.iteritems():
1498
+ ns, name = _getNsTag(name)
1499
+ attributes.append((ns, name, _utf8(value)))
1500
+ reversed_nsmap = {}
1501
+ if nsmap:
1502
+ for prefix, ns in nsmap.items():
1503
+ if prefix is not None:
1504
+ prefix = _utf8(prefix)
1505
+ _prefixValidOrRaise(prefix)
1506
+ reversed_nsmap[_utf8(ns)] = prefix
1507
+ ns, name = _getNsTag(tag)
1508
+
1509
+ c_method = self._method if method is None else _findOutputMethod(method)
1510
+
1511
+ return _FileWriterElement(self, (ns, name, attributes, reversed_nsmap), c_method)
1512
+
1513
+ cdef _write_qname(self, bytes name, bytes prefix):
1514
+ if prefix: # empty bytes for no prefix (not None to allow sorting)
1515
+ tree.xmlOutputBufferWrite(self._c_out, len(prefix), _cstr(prefix))
1516
+ tree.xmlOutputBufferWrite(self._c_out, 1, ':')
1517
+ tree.xmlOutputBufferWrite(self._c_out, len(name), _cstr(name))
1518
+
1519
+ cdef _write_start_element(self, element_config):
1520
+ if self._status > WRITER_IN_ELEMENT:
1521
+ raise LxmlSyntaxError("cannot append trailing element to complete XML document")
1522
+ ns, name, attributes, nsmap = element_config
1523
+ flat_namespace_map, new_namespaces = self._collect_namespaces(nsmap)
1524
+ prefix = self._find_prefix(ns, flat_namespace_map, new_namespaces)
1525
+ tree.xmlOutputBufferWrite(self._c_out, 1, '<')
1526
+ self._write_qname(name, prefix)
1527
+
1528
+ self._write_attributes_and_namespaces(
1529
+ attributes, flat_namespace_map, new_namespaces)
1530
+
1531
+ tree.xmlOutputBufferWrite(self._c_out, 1, '>')
1532
+ if not self._buffered:
1533
+ tree.xmlOutputBufferFlush(self._c_out)
1534
+ self._handle_error(self._c_out.error)
1535
+
1536
+ self._element_stack.append((ns, name, prefix, flat_namespace_map))
1537
+ self._status = WRITER_IN_ELEMENT
1538
+
1539
+ cdef _write_attributes_and_namespaces(self, list attributes,
1540
+ dict flat_namespace_map,
1541
+ list new_namespaces):
1542
+ if attributes:
1543
+ # _find_prefix() may append to new_namespaces => build them first
1544
+ attributes = [
1545
+ (self._find_prefix(ns, flat_namespace_map, new_namespaces), name, value)
1546
+ for ns, name, value in attributes ]
1547
+ if new_namespaces:
1548
+ new_namespaces.sort()
1549
+ self._write_attributes_list(new_namespaces)
1550
+ if attributes:
1551
+ self._write_attributes_list(attributes)
1552
+
1553
+ cdef _write_attributes_list(self, list attributes):
1554
+ for prefix, name, value in attributes:
1555
+ tree.xmlOutputBufferWrite(self._c_out, 1, ' ')
1556
+ self._write_qname(name, prefix)
1557
+ tree.xmlOutputBufferWrite(self._c_out, 2, '="')
1558
+ _write_attr_string(self._c_out, _cstr(value))
1559
+
1560
+ tree.xmlOutputBufferWrite(self._c_out, 1, '"')
1561
+
1562
+ cdef _write_end_element(self, element_config):
1563
+ if self._status != WRITER_IN_ELEMENT:
1564
+ raise LxmlSyntaxError("not in an element")
1565
+ if not self._element_stack or self._element_stack[-1][:2] != element_config[:2]:
1566
+ raise LxmlSyntaxError("inconsistent exit action in context manager")
1567
+
1568
+ # If previous write operations failed, the context manager exit might still call us.
1569
+ # That is ok, but we stop writing closing tags and handling errors in that case.
1570
+ # For all non-I/O errors, we continue writing closing tags if we can.
1571
+ ok_to_write = self._c_out.error == xmlerror.XML_ERR_OK
1572
+
1573
+ name, prefix = self._element_stack.pop()[1:3]
1574
+ if ok_to_write:
1575
+ tree.xmlOutputBufferWrite(self._c_out, 2, '</')
1576
+ self._write_qname(name, prefix)
1577
+ tree.xmlOutputBufferWrite(self._c_out, 1, '>')
1578
+
1579
+ if not self._element_stack:
1580
+ self._status = WRITER_FINISHED
1581
+ if ok_to_write:
1582
+ if not self._buffered:
1583
+ tree.xmlOutputBufferFlush(self._c_out)
1584
+ self._handle_error(self._c_out.error)
1585
+
1586
+ cdef _find_prefix(self, bytes href, dict flat_namespaces_map, list new_namespaces):
1587
+ if href is None:
1588
+ return None
1589
+ if href in flat_namespaces_map:
1590
+ return flat_namespaces_map[href]
1591
+ # need to create a new prefix
1592
+ prefixes = flat_namespaces_map.values()
1593
+ i = 0
1594
+ while True:
1595
+ prefix = _utf8('ns%d' % i)
1596
+ if prefix not in prefixes:
1597
+ new_namespaces.append((b'xmlns', prefix, href))
1598
+ flat_namespaces_map[href] = prefix
1599
+ return prefix
1600
+ i += 1
1601
+
1602
+ cdef _collect_namespaces(self, dict nsmap):
1603
+ new_namespaces = []
1604
+ flat_namespaces_map = {}
1605
+ for ns, prefix in nsmap.iteritems():
1606
+ flat_namespaces_map[ns] = prefix
1607
+ if prefix is None:
1608
+ # use empty bytes rather than None to allow sorting
1609
+ new_namespaces.append((b'', b'xmlns', ns))
1610
+ else:
1611
+ new_namespaces.append((b'xmlns', prefix, ns))
1612
+ # merge in flat namespace map of parent
1613
+ if self._element_stack:
1614
+ for ns, prefix in (<dict>self._element_stack[-1][-1]).iteritems():
1615
+ if flat_namespaces_map.get(ns) is None:
1616
+ # unknown or empty prefix => prefer a 'real' prefix
1617
+ flat_namespaces_map[ns] = prefix
1618
+ return flat_namespaces_map, new_namespaces
1619
+
1620
+ def write(self, *args, bint with_tail=True, bint pretty_print=False, method=None):
1621
+ """write(self, *args, with_tail=True, pretty_print=False, method=None)
1622
+
1623
+ Write subtrees or strings into the file.
1624
+
1625
+ If method is not None, it should be one of ('html', 'xml', 'text')
1626
+ to temporarily override the output method.
1627
+ """
1628
+ assert self._c_out is not NULL
1629
+ c_method = self._method if method is None else _findOutputMethod(method)
1630
+
1631
+ for content in args:
1632
+ if _isString(content):
1633
+ if self._status != WRITER_IN_ELEMENT:
1634
+ if self._status > WRITER_IN_ELEMENT or content.strip():
1635
+ raise LxmlSyntaxError("not in an element")
1636
+ bstring = _utf8(content)
1637
+ if not bstring:
1638
+ continue
1639
+
1640
+ ns, name, _, _ = self._element_stack[-1]
1641
+ if (c_method == OUTPUT_METHOD_HTML and
1642
+ ns in (None, b'http://www.w3.org/1999/xhtml') and
1643
+ name in (b'script', b'style')):
1644
+ tree.xmlOutputBufferWrite(self._c_out, len(bstring), _cstr(bstring))
1645
+
1646
+ else:
1647
+ tree.xmlOutputBufferWriteEscape(self._c_out, _xcstr(bstring), NULL)
1648
+
1649
+ elif iselement(content):
1650
+ if self._status > WRITER_IN_ELEMENT:
1651
+ raise LxmlSyntaxError("cannot append trailing element to complete XML document")
1652
+ _writeNodeToBuffer(self._c_out, (<_Element>content)._c_node,
1653
+ self._c_encoding, NULL, c_method,
1654
+ False, False, pretty_print, with_tail, False)
1655
+ if (<_Element>content)._c_node.type == tree.XML_ELEMENT_NODE:
1656
+ if not self._element_stack:
1657
+ self._status = WRITER_FINISHED
1658
+
1659
+ elif content is not None:
1660
+ raise TypeError(
1661
+ f"got invalid input value of type {type(content)}, expected string or Element")
1662
+ self._handle_error(self._c_out.error)
1663
+ if not self._buffered:
1664
+ tree.xmlOutputBufferFlush(self._c_out)
1665
+ self._handle_error(self._c_out.error)
1666
+
1667
+ def flush(self):
1668
+ """flush(self)
1669
+
1670
+ Write any pending content of the current output buffer to the stream.
1671
+ """
1672
+ assert self._c_out is not NULL
1673
+ tree.xmlOutputBufferFlush(self._c_out)
1674
+ self._handle_error(self._c_out.error)
1675
+
1676
+ cdef _close(self, bint raise_on_error):
1677
+ if raise_on_error:
1678
+ if self._status < WRITER_IN_ELEMENT:
1679
+ raise LxmlSyntaxError("no content written")
1680
+ if self._element_stack:
1681
+ raise LxmlSyntaxError("pending open tags on close")
1682
+ error_result = self._c_out.error
1683
+ if error_result == xmlerror.XML_ERR_OK:
1684
+ error_result = tree.xmlOutputBufferClose(self._c_out)
1685
+ if error_result != -1:
1686
+ error_result = xmlerror.XML_ERR_OK
1687
+ else:
1688
+ tree.xmlOutputBufferClose(self._c_out)
1689
+ self._status = WRITER_FINISHED
1690
+ self._c_out = NULL
1691
+ del self._element_stack[:]
1692
+ if raise_on_error:
1693
+ self._handle_error(error_result)
1694
+
1695
+ cdef _handle_error(self, int error_result):
1696
+ if error_result != xmlerror.XML_ERR_OK:
1697
+ if self._target is not None:
1698
+ self._target._exc_context._raise_if_stored()
1699
+ _raiseSerialisationError(error_result)
1700
+
1701
+
1702
+ @cython.final
1703
+ @cython.internal
1704
+ cdef class _AsyncDataWriter:
1705
+ cdef list _data
1706
+ def __cinit__(self):
1707
+ self._data = []
1708
+
1709
+ cdef bytes collect(self):
1710
+ data = b''.join(self._data)
1711
+ del self._data[:]
1712
+ return data
1713
+
1714
+ def write(self, data):
1715
+ self._data.append(data)
1716
+
1717
+ def close(self):
1718
+ pass
1719
+
1720
+
1721
+ @cython.final
1722
+ @cython.internal
1723
+ cdef class _AsyncIncrementalFileWriter:
1724
+ cdef _IncrementalFileWriter _writer
1725
+ cdef _AsyncDataWriter _buffer
1726
+ cdef object _async_outfile
1727
+ cdef int _flush_after_writes
1728
+ cdef bint _should_close
1729
+ cdef bint _buffered
1730
+
1731
+ def __cinit__(self, async_outfile, bytes encoding, int compresslevel, bint close,
1732
+ bint buffered, int method):
1733
+ self._flush_after_writes = 20
1734
+ self._async_outfile = async_outfile
1735
+ self._should_close = close
1736
+ self._buffered = buffered
1737
+ self._buffer = _AsyncDataWriter()
1738
+ self._writer = _IncrementalFileWriter(
1739
+ self._buffer, encoding, compresslevel, close=True, buffered=False, method=method)
1740
+
1741
+ cdef bytes _flush(self):
1742
+ if not self._buffered or len(self._buffer._data) > self._flush_after_writes:
1743
+ return self._buffer.collect()
1744
+ return None
1745
+
1746
+ async def flush(self):
1747
+ self._writer.flush()
1748
+ data = self._buffer.collect()
1749
+ if data:
1750
+ await self._async_outfile.write(data)
1751
+
1752
+ async def write_declaration(self, version=None, standalone=None, doctype=None):
1753
+ self._writer.write_declaration(version, standalone, doctype)
1754
+ data = self._flush()
1755
+ if data:
1756
+ await self._async_outfile.write(data)
1757
+
1758
+ async def write_doctype(self, doctype):
1759
+ self._writer.write_doctype(doctype)
1760
+ data = self._flush()
1761
+ if data:
1762
+ await self._async_outfile.write(data)
1763
+
1764
+ async def write(self, *args, with_tail=True, pretty_print=False, method=None):
1765
+ self._writer.write(*args, with_tail=with_tail, pretty_print=pretty_print, method=method)
1766
+ data = self._flush()
1767
+ if data:
1768
+ await self._async_outfile.write(data)
1769
+
1770
+ def method(self, method):
1771
+ return self._writer.method(method)
1772
+
1773
+ def element(self, tag, attrib=None, nsmap=None, method=None, **_extra):
1774
+ element_writer = self._writer.element(tag, attrib, nsmap, method, **_extra)
1775
+ return _AsyncFileWriterElement(element_writer, self)
1776
+
1777
+ async def _close(self, bint raise_on_error):
1778
+ self._writer._close(raise_on_error)
1779
+ data = self._buffer.collect()
1780
+ if data:
1781
+ await self._async_outfile.write(data)
1782
+ if self._should_close:
1783
+ await self._async_outfile.close()
1784
+
1785
+
1786
+ @cython.final
1787
+ @cython.internal
1788
+ cdef class _AsyncFileWriterElement:
1789
+ cdef _FileWriterElement _element_writer
1790
+ cdef _AsyncIncrementalFileWriter _writer
1791
+
1792
+ def __cinit__(self, _FileWriterElement element_writer not None,
1793
+ _AsyncIncrementalFileWriter writer not None):
1794
+ self._element_writer = element_writer
1795
+ self._writer = writer
1796
+
1797
+ async def __aenter__(self):
1798
+ self._element_writer.__enter__()
1799
+ data = self._writer._flush()
1800
+ if data:
1801
+ await self._writer._async_outfile.write(data)
1802
+
1803
+ async def __aexit__(self, *args):
1804
+ self._element_writer.__exit__(*args)
1805
+ data = self._writer._flush()
1806
+ if data:
1807
+ await self._writer._async_outfile.write(data)
1808
+
1809
+
1810
+ @cython.final
1811
+ @cython.internal
1812
+ @cython.freelist(8)
1813
+ cdef class _FileWriterElement:
1814
+ cdef _IncrementalFileWriter _writer
1815
+ cdef object _element
1816
+ cdef int _new_method
1817
+ cdef int _old_method
1818
+
1819
+ def __cinit__(self, _IncrementalFileWriter writer not None, element_config, int method):
1820
+ self._writer = writer
1821
+ self._element = element_config
1822
+ self._new_method = method
1823
+ self._old_method = writer._method
1824
+
1825
+ def __enter__(self):
1826
+ self._writer._method = self._new_method
1827
+ self._writer._write_start_element(self._element)
1828
+
1829
+ def __exit__(self, exc_type, exc_val, exc_tb):
1830
+ self._writer._write_end_element(self._element)
1831
+ self._writer._method = self._old_method
1832
+
1833
+
1834
+ @cython.final
1835
+ @cython.internal
1836
+ @cython.freelist(8)
1837
+ cdef class _MethodChanger:
1838
+ cdef _IncrementalFileWriter _writer
1839
+ cdef int _new_method
1840
+ cdef int _old_method
1841
+ cdef bint _entered
1842
+ cdef bint _exited
1843
+
1844
+ def __cinit__(self, _IncrementalFileWriter writer not None, int method):
1845
+ self._writer = writer
1846
+ self._new_method = method
1847
+ self._old_method = writer._method
1848
+ self._entered = False
1849
+ self._exited = False
1850
+
1851
+ def __enter__(self):
1852
+ if self._entered:
1853
+ raise LxmlSyntaxError("Inconsistent enter action in context manager")
1854
+ self._writer._method = self._new_method
1855
+ self._entered = True
1856
+
1857
+ def __exit__(self, exc_type, exc_val, exc_tb):
1858
+ if self._exited:
1859
+ raise LxmlSyntaxError("Inconsistent exit action in context manager")
1860
+ if self._writer._method != self._new_method:
1861
+ raise LxmlSyntaxError("Method changed outside of context manager")
1862
+ self._writer._method = self._old_method
1863
+ self._exited = True
1864
+
1865
+ async def __aenter__(self):
1866
+ # for your async convenience
1867
+ return self.__enter__()
1868
+
1869
+ async def __aexit__(self, *args):
1870
+ # for your async convenience
1871
+ return self.__exit__(*args)