lxml 6.0.0__cp311-cp311-win_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lxml/ElementInclude.py +244 -0
- lxml/__init__.py +22 -0
- lxml/_elementpath.cp311-win_arm64.pyd +0 -0
- lxml/_elementpath.py +343 -0
- lxml/apihelpers.pxi +1801 -0
- lxml/builder.cp311-win_arm64.pyd +0 -0
- lxml/builder.py +243 -0
- lxml/classlookup.pxi +580 -0
- lxml/cleanup.pxi +215 -0
- lxml/cssselect.py +101 -0
- lxml/debug.pxi +36 -0
- lxml/docloader.pxi +178 -0
- lxml/doctestcompare.py +488 -0
- lxml/dtd.pxi +479 -0
- lxml/etree.cp311-win_arm64.pyd +0 -0
- lxml/etree.h +244 -0
- lxml/etree.pyx +3853 -0
- lxml/etree_api.h +204 -0
- lxml/extensions.pxi +830 -0
- lxml/html/ElementSoup.py +10 -0
- lxml/html/__init__.py +1927 -0
- lxml/html/_diffcommand.py +86 -0
- lxml/html/_difflib.cp311-win_arm64.pyd +0 -0
- lxml/html/_difflib.py +2106 -0
- lxml/html/_html5builder.py +100 -0
- lxml/html/_setmixin.py +56 -0
- lxml/html/builder.py +173 -0
- lxml/html/clean.py +21 -0
- lxml/html/defs.py +135 -0
- lxml/html/diff.cp311-win_arm64.pyd +0 -0
- lxml/html/diff.py +972 -0
- lxml/html/formfill.py +299 -0
- lxml/html/html5parser.py +260 -0
- lxml/html/soupparser.py +314 -0
- lxml/html/usedoctest.py +13 -0
- lxml/includes/__init__.pxd +0 -0
- lxml/includes/__init__.py +0 -0
- lxml/includes/c14n.pxd +25 -0
- lxml/includes/config.pxd +3 -0
- lxml/includes/dtdvalid.pxd +18 -0
- lxml/includes/etree_defs.h +379 -0
- lxml/includes/etreepublic.pxd +237 -0
- lxml/includes/extlibs/__init__.py +0 -0
- lxml/includes/extlibs/zconf.h +543 -0
- lxml/includes/extlibs/zlib.h +1938 -0
- lxml/includes/htmlparser.pxd +56 -0
- lxml/includes/libexslt/__init__.py +0 -0
- lxml/includes/libexslt/exslt.h +108 -0
- lxml/includes/libexslt/exsltconfig.h +70 -0
- lxml/includes/libexslt/exsltexports.h +63 -0
- lxml/includes/libexslt/libexslt.h +29 -0
- lxml/includes/libxml/HTMLparser.h +320 -0
- lxml/includes/libxml/HTMLtree.h +147 -0
- lxml/includes/libxml/SAX.h +204 -0
- lxml/includes/libxml/SAX2.h +173 -0
- lxml/includes/libxml/__init__.py +0 -0
- lxml/includes/libxml/c14n.h +128 -0
- lxml/includes/libxml/catalog.h +182 -0
- lxml/includes/libxml/chvalid.h +230 -0
- lxml/includes/libxml/debugXML.h +217 -0
- lxml/includes/libxml/dict.h +81 -0
- lxml/includes/libxml/encoding.h +233 -0
- lxml/includes/libxml/entities.h +151 -0
- lxml/includes/libxml/globals.h +529 -0
- lxml/includes/libxml/hash.h +236 -0
- lxml/includes/libxml/list.h +137 -0
- lxml/includes/libxml/nanoftp.h +186 -0
- lxml/includes/libxml/nanohttp.h +81 -0
- lxml/includes/libxml/parser.h +1265 -0
- lxml/includes/libxml/parserInternals.h +662 -0
- lxml/includes/libxml/pattern.h +100 -0
- lxml/includes/libxml/relaxng.h +218 -0
- lxml/includes/libxml/schemasInternals.h +958 -0
- lxml/includes/libxml/schematron.h +142 -0
- lxml/includes/libxml/threads.h +94 -0
- lxml/includes/libxml/tree.h +1314 -0
- lxml/includes/libxml/uri.h +94 -0
- lxml/includes/libxml/valid.h +448 -0
- lxml/includes/libxml/xinclude.h +129 -0
- lxml/includes/libxml/xlink.h +189 -0
- lxml/includes/libxml/xmlIO.h +369 -0
- lxml/includes/libxml/xmlautomata.h +146 -0
- lxml/includes/libxml/xmlerror.h +919 -0
- lxml/includes/libxml/xmlexports.h +50 -0
- lxml/includes/libxml/xmlmemory.h +228 -0
- lxml/includes/libxml/xmlmodule.h +57 -0
- lxml/includes/libxml/xmlreader.h +428 -0
- lxml/includes/libxml/xmlregexp.h +222 -0
- lxml/includes/libxml/xmlsave.h +88 -0
- lxml/includes/libxml/xmlschemas.h +246 -0
- lxml/includes/libxml/xmlschemastypes.h +152 -0
- lxml/includes/libxml/xmlstring.h +140 -0
- lxml/includes/libxml/xmlunicode.h +202 -0
- lxml/includes/libxml/xmlversion.h +526 -0
- lxml/includes/libxml/xmlwriter.h +488 -0
- lxml/includes/libxml/xpath.h +575 -0
- lxml/includes/libxml/xpathInternals.h +632 -0
- lxml/includes/libxml/xpointer.h +137 -0
- lxml/includes/libxslt/__init__.py +0 -0
- lxml/includes/libxslt/attributes.h +39 -0
- lxml/includes/libxslt/documents.h +93 -0
- lxml/includes/libxslt/extensions.h +262 -0
- lxml/includes/libxslt/extra.h +72 -0
- lxml/includes/libxslt/functions.h +78 -0
- lxml/includes/libxslt/imports.h +75 -0
- lxml/includes/libxslt/keys.h +53 -0
- lxml/includes/libxslt/libxslt.h +36 -0
- lxml/includes/libxslt/namespaces.h +68 -0
- lxml/includes/libxslt/numbersInternals.h +73 -0
- lxml/includes/libxslt/preproc.h +43 -0
- lxml/includes/libxslt/security.h +104 -0
- lxml/includes/libxslt/templates.h +77 -0
- lxml/includes/libxslt/transform.h +207 -0
- lxml/includes/libxslt/trio.h +216 -0
- lxml/includes/libxslt/triodef.h +220 -0
- lxml/includes/libxslt/variables.h +118 -0
- lxml/includes/libxslt/win32config.h +51 -0
- lxml/includes/libxslt/xslt.h +110 -0
- lxml/includes/libxslt/xsltInternals.h +1992 -0
- lxml/includes/libxslt/xsltconfig.h +179 -0
- lxml/includes/libxslt/xsltexports.h +64 -0
- lxml/includes/libxslt/xsltlocale.h +44 -0
- lxml/includes/libxslt/xsltutils.h +343 -0
- lxml/includes/lxml-version.h +3 -0
- lxml/includes/relaxng.pxd +64 -0
- lxml/includes/schematron.pxd +34 -0
- lxml/includes/tree.pxd +492 -0
- lxml/includes/uri.pxd +5 -0
- lxml/includes/xinclude.pxd +22 -0
- lxml/includes/xmlerror.pxd +852 -0
- lxml/includes/xmlparser.pxd +303 -0
- lxml/includes/xmlschema.pxd +35 -0
- lxml/includes/xpath.pxd +136 -0
- lxml/includes/xslt.pxd +190 -0
- lxml/isoschematron/__init__.py +348 -0
- lxml/isoschematron/resources/rng/iso-schematron.rng +709 -0
- lxml/isoschematron/resources/xsl/RNG2Schtrn.xsl +75 -0
- lxml/isoschematron/resources/xsl/XSD2Schtrn.xsl +77 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_abstract_expand.xsl +313 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_dsdl_include.xsl +1160 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_message.xsl +55 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_skeleton_for_xslt1.xsl +1796 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_svrl_for_xslt1.xsl +588 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt +84 -0
- lxml/iterparse.pxi +438 -0
- lxml/lxml.etree.h +244 -0
- lxml/lxml.etree_api.h +204 -0
- lxml/nsclasses.pxi +281 -0
- lxml/objectify.cp311-win_arm64.pyd +0 -0
- lxml/objectify.pyx +2149 -0
- lxml/objectpath.pxi +332 -0
- lxml/parser.pxi +2059 -0
- lxml/parsertarget.pxi +180 -0
- lxml/proxy.pxi +619 -0
- lxml/public-api.pxi +178 -0
- lxml/pyclasslookup.py +3 -0
- lxml/readonlytree.pxi +565 -0
- lxml/relaxng.pxi +165 -0
- lxml/sax.cp311-win_arm64.pyd +0 -0
- lxml/sax.py +286 -0
- lxml/saxparser.pxi +875 -0
- lxml/schematron.pxi +173 -0
- lxml/serializer.pxi +1849 -0
- lxml/usedoctest.py +13 -0
- lxml/xinclude.pxi +67 -0
- lxml/xmlerror.pxi +1654 -0
- lxml/xmlid.pxi +179 -0
- lxml/xmlschema.pxi +215 -0
- lxml/xpath.pxi +487 -0
- lxml/xslt.pxi +957 -0
- lxml/xsltext.pxi +242 -0
- lxml-6.0.0.dist-info/METADATA +163 -0
- lxml-6.0.0.dist-info/RECORD +177 -0
- lxml-6.0.0.dist-info/WHEEL +5 -0
- lxml-6.0.0.dist-info/licenses/LICENSE.txt +31 -0
- lxml-6.0.0.dist-info/licenses/LICENSES.txt +29 -0
- lxml-6.0.0.dist-info/top_level.txt +1 -0
lxml/serializer.pxi
ADDED
@@ -0,0 +1,1849 @@
|
|
1
|
+
# XML serialization and output functions
|
2
|
+
|
3
|
+
cdef object GzipFile
|
4
|
+
from gzip import GzipFile
|
5
|
+
|
6
|
+
|
7
|
+
cdef class SerialisationError(LxmlError):
|
8
|
+
"""A libxml2 error that occurred during serialisation.
|
9
|
+
"""
|
10
|
+
|
11
|
+
|
12
|
+
cdef enum _OutputMethods:
|
13
|
+
OUTPUT_METHOD_XML
|
14
|
+
OUTPUT_METHOD_HTML
|
15
|
+
OUTPUT_METHOD_TEXT
|
16
|
+
|
17
|
+
|
18
|
+
cdef int _findOutputMethod(method) except -1:
|
19
|
+
if method is None:
|
20
|
+
return OUTPUT_METHOD_XML
|
21
|
+
method = method.lower()
|
22
|
+
if method == "xml":
|
23
|
+
return OUTPUT_METHOD_XML
|
24
|
+
if method == "html":
|
25
|
+
return OUTPUT_METHOD_HTML
|
26
|
+
if method == "text":
|
27
|
+
return OUTPUT_METHOD_TEXT
|
28
|
+
raise ValueError(f"unknown output method {method!r}")
|
29
|
+
|
30
|
+
|
31
|
+
cdef _textToString(xmlNode* c_node, encoding, bint with_tail):
|
32
|
+
cdef bint needs_conversion
|
33
|
+
cdef const_xmlChar* c_text
|
34
|
+
cdef xmlNode* c_text_node
|
35
|
+
cdef tree.xmlBuffer* c_buffer
|
36
|
+
cdef int error_result
|
37
|
+
|
38
|
+
c_buffer = tree.xmlBufferCreate()
|
39
|
+
if c_buffer is NULL:
|
40
|
+
raise MemoryError()
|
41
|
+
|
42
|
+
with nogil:
|
43
|
+
error_result = tree.xmlNodeBufGetContent(c_buffer, c_node)
|
44
|
+
if with_tail:
|
45
|
+
c_text_node = _textNodeOrSkip(c_node.next)
|
46
|
+
while c_text_node is not NULL:
|
47
|
+
tree.xmlBufferWriteChar(c_buffer, <const_char*>c_text_node.content)
|
48
|
+
c_text_node = _textNodeOrSkip(c_text_node.next)
|
49
|
+
c_text = tree.xmlBufferContent(c_buffer)
|
50
|
+
|
51
|
+
if error_result < 0 or c_text is NULL:
|
52
|
+
tree.xmlBufferFree(c_buffer)
|
53
|
+
raise SerialisationError, "Error during serialisation (out of memory?)"
|
54
|
+
|
55
|
+
try:
|
56
|
+
needs_conversion = 0
|
57
|
+
if encoding is unicode:
|
58
|
+
needs_conversion = 1
|
59
|
+
elif encoding is not None:
|
60
|
+
# Python prefers lower case encoding names
|
61
|
+
encoding = encoding.lower()
|
62
|
+
if encoding not in ('utf8', 'utf-8'):
|
63
|
+
if encoding == 'ascii':
|
64
|
+
if isutf8l(c_text, tree.xmlBufferLength(c_buffer)):
|
65
|
+
# will raise a decode error below
|
66
|
+
needs_conversion = 1
|
67
|
+
else:
|
68
|
+
needs_conversion = 1
|
69
|
+
|
70
|
+
if needs_conversion:
|
71
|
+
text = (<const_char*>c_text)[:tree.xmlBufferLength(c_buffer)].decode('utf8')
|
72
|
+
if encoding is not unicode:
|
73
|
+
encoding = _utf8(encoding)
|
74
|
+
text = python.PyUnicode_AsEncodedString(
|
75
|
+
text, encoding, 'strict')
|
76
|
+
else:
|
77
|
+
text = (<unsigned char*>c_text)[:tree.xmlBufferLength(c_buffer)]
|
78
|
+
finally:
|
79
|
+
tree.xmlBufferFree(c_buffer)
|
80
|
+
return text
|
81
|
+
|
82
|
+
|
83
|
+
cdef _tostring(_Element element, encoding, doctype, method,
|
84
|
+
bint write_xml_declaration, bint write_complete_document,
|
85
|
+
bint pretty_print, bint with_tail, int standalone):
|
86
|
+
"""Serialize an element to an encoded string representation of its XML
|
87
|
+
tree.
|
88
|
+
"""
|
89
|
+
cdef tree.xmlOutputBuffer* c_buffer
|
90
|
+
cdef tree.xmlBuf* c_result_buffer
|
91
|
+
cdef tree.xmlCharEncodingHandler* enchandler
|
92
|
+
cdef const_char* c_enc
|
93
|
+
cdef const_xmlChar* c_version
|
94
|
+
cdef const_xmlChar* c_doctype
|
95
|
+
cdef int c_method
|
96
|
+
cdef int error_result
|
97
|
+
if element is None:
|
98
|
+
return None
|
99
|
+
_assertValidNode(element)
|
100
|
+
c_method = _findOutputMethod(method)
|
101
|
+
if c_method == OUTPUT_METHOD_TEXT:
|
102
|
+
return _textToString(element._c_node, encoding, with_tail)
|
103
|
+
if encoding is None or encoding is unicode:
|
104
|
+
c_enc = NULL
|
105
|
+
else:
|
106
|
+
encoding = _utf8(encoding)
|
107
|
+
c_enc = _cstr(encoding)
|
108
|
+
if doctype is None:
|
109
|
+
c_doctype = NULL
|
110
|
+
else:
|
111
|
+
doctype = _utf8(doctype)
|
112
|
+
c_doctype = _xcstr(doctype)
|
113
|
+
# it is necessary to *and* find the encoding handler *and* use
|
114
|
+
# encoding during output
|
115
|
+
enchandler = tree.xmlFindCharEncodingHandler(c_enc)
|
116
|
+
if enchandler is NULL and c_enc is not NULL:
|
117
|
+
if encoding is not None:
|
118
|
+
encoding = encoding.decode('UTF-8')
|
119
|
+
raise LookupError, f"unknown encoding: '{encoding}'"
|
120
|
+
c_buffer = tree.xmlAllocOutputBuffer(enchandler)
|
121
|
+
if c_buffer is NULL:
|
122
|
+
tree.xmlCharEncCloseFunc(enchandler)
|
123
|
+
raise MemoryError()
|
124
|
+
|
125
|
+
with nogil:
|
126
|
+
_writeNodeToBuffer(c_buffer, element._c_node, c_enc, c_doctype, c_method,
|
127
|
+
write_xml_declaration, write_complete_document,
|
128
|
+
pretty_print, with_tail, standalone)
|
129
|
+
tree.xmlOutputBufferFlush(c_buffer)
|
130
|
+
if c_buffer.conv is not NULL:
|
131
|
+
c_result_buffer = c_buffer.conv
|
132
|
+
else:
|
133
|
+
c_result_buffer = c_buffer.buffer
|
134
|
+
|
135
|
+
error_result = c_buffer.error
|
136
|
+
if error_result != xmlerror.XML_ERR_OK:
|
137
|
+
tree.xmlOutputBufferClose(c_buffer)
|
138
|
+
_raiseSerialisationError(error_result)
|
139
|
+
|
140
|
+
try:
|
141
|
+
if encoding is unicode:
|
142
|
+
result = (<unsigned char*>tree.xmlBufContent(
|
143
|
+
c_result_buffer))[:tree.xmlBufUse(c_result_buffer)].decode('UTF-8')
|
144
|
+
else:
|
145
|
+
result = <bytes>(<unsigned char*>tree.xmlBufContent(
|
146
|
+
c_result_buffer))[:tree.xmlBufUse(c_result_buffer)]
|
147
|
+
finally:
|
148
|
+
error_result = tree.xmlOutputBufferClose(c_buffer)
|
149
|
+
if error_result == -1:
|
150
|
+
_raiseSerialisationError(error_result)
|
151
|
+
return result
|
152
|
+
|
153
|
+
cdef bytes _tostringC14N(element_or_tree, bint exclusive, bint with_comments, inclusive_ns_prefixes):
|
154
|
+
cdef xmlDoc* c_doc
|
155
|
+
cdef xmlChar* c_buffer = NULL
|
156
|
+
cdef int byte_count = -1
|
157
|
+
cdef bytes result
|
158
|
+
cdef _Document doc
|
159
|
+
cdef _Element element
|
160
|
+
cdef xmlChar **c_inclusive_ns_prefixes
|
161
|
+
|
162
|
+
if isinstance(element_or_tree, _Element):
|
163
|
+
_assertValidNode(<_Element>element_or_tree)
|
164
|
+
doc = (<_Element>element_or_tree)._doc
|
165
|
+
c_doc = _plainFakeRootDoc(doc._c_doc, (<_Element>element_or_tree)._c_node, 0)
|
166
|
+
else:
|
167
|
+
doc = _documentOrRaise(element_or_tree)
|
168
|
+
_assertValidDoc(doc)
|
169
|
+
c_doc = doc._c_doc
|
170
|
+
|
171
|
+
c_inclusive_ns_prefixes = _convert_ns_prefixes(c_doc.dict, inclusive_ns_prefixes) if inclusive_ns_prefixes else NULL
|
172
|
+
try:
|
173
|
+
with nogil:
|
174
|
+
byte_count = c14n.xmlC14NDocDumpMemory(
|
175
|
+
c_doc, NULL, exclusive, c_inclusive_ns_prefixes, with_comments, &c_buffer)
|
176
|
+
|
177
|
+
finally:
|
178
|
+
_destroyFakeDoc(doc._c_doc, c_doc)
|
179
|
+
if c_inclusive_ns_prefixes is not NULL:
|
180
|
+
python.lxml_free(c_inclusive_ns_prefixes)
|
181
|
+
|
182
|
+
if byte_count < 0 or c_buffer is NULL:
|
183
|
+
if c_buffer is not NULL:
|
184
|
+
tree.xmlFree(c_buffer)
|
185
|
+
raise C14NError, "C14N failed"
|
186
|
+
try:
|
187
|
+
result = c_buffer[:byte_count]
|
188
|
+
finally:
|
189
|
+
tree.xmlFree(c_buffer)
|
190
|
+
return result
|
191
|
+
|
192
|
+
cdef _raiseSerialisationError(int error_result):
|
193
|
+
if error_result == xmlerror.XML_ERR_NO_MEMORY:
|
194
|
+
raise MemoryError()
|
195
|
+
message = ErrorTypes._getName(error_result)
|
196
|
+
if message is None:
|
197
|
+
message = f"unknown error {error_result}"
|
198
|
+
raise SerialisationError, message
|
199
|
+
|
200
|
+
############################################################
|
201
|
+
# low-level serialisation functions
|
202
|
+
|
203
|
+
cdef void _writeDoctype(tree.xmlOutputBuffer* c_buffer,
|
204
|
+
const_xmlChar* c_doctype) noexcept nogil:
|
205
|
+
tree.xmlOutputBufferWrite(c_buffer, tree.xmlStrlen(c_doctype),
|
206
|
+
<const_char*>c_doctype)
|
207
|
+
tree.xmlOutputBufferWriteString(c_buffer, "\n")
|
208
|
+
|
209
|
+
cdef void _writeNodeToBuffer(tree.xmlOutputBuffer* c_buffer,
|
210
|
+
xmlNode* c_node, const_char* encoding, const_xmlChar* c_doctype,
|
211
|
+
int c_method, bint write_xml_declaration,
|
212
|
+
bint write_complete_document,
|
213
|
+
bint pretty_print, bint with_tail,
|
214
|
+
int standalone) noexcept nogil:
|
215
|
+
cdef xmlNode* c_nsdecl_node
|
216
|
+
cdef xmlDoc* c_doc = c_node.doc
|
217
|
+
if write_xml_declaration and c_method == OUTPUT_METHOD_XML:
|
218
|
+
_writeDeclarationToBuffer(c_buffer, c_doc.version, encoding, standalone)
|
219
|
+
|
220
|
+
# comments/processing instructions before doctype declaration
|
221
|
+
if write_complete_document and not c_buffer.error and c_doc.intSubset:
|
222
|
+
_writePrevSiblings(c_buffer, <xmlNode*>c_doc.intSubset, encoding, pretty_print)
|
223
|
+
|
224
|
+
if c_doctype:
|
225
|
+
_writeDoctype(c_buffer, c_doctype)
|
226
|
+
# write internal DTD subset, preceding PIs/comments, etc.
|
227
|
+
if write_complete_document and not c_buffer.error:
|
228
|
+
if c_doctype is NULL:
|
229
|
+
_writeDtdToBuffer(c_buffer, c_doc, c_node.name, c_method, encoding)
|
230
|
+
_writePrevSiblings(c_buffer, c_node, encoding, pretty_print)
|
231
|
+
|
232
|
+
c_nsdecl_node = c_node
|
233
|
+
if not c_node.parent or c_node.parent.type != tree.XML_DOCUMENT_NODE:
|
234
|
+
# copy the node and add namespaces from parents
|
235
|
+
# this is required to make libxml write them
|
236
|
+
c_nsdecl_node = tree.xmlCopyNode(c_node, 2)
|
237
|
+
if not c_nsdecl_node:
|
238
|
+
c_buffer.error = xmlerror.XML_ERR_NO_MEMORY
|
239
|
+
return
|
240
|
+
_copyParentNamespaces(c_node, c_nsdecl_node)
|
241
|
+
|
242
|
+
c_nsdecl_node.parent = c_node.parent
|
243
|
+
c_nsdecl_node.children = c_node.children
|
244
|
+
c_nsdecl_node.last = c_node.last
|
245
|
+
|
246
|
+
# write node
|
247
|
+
if c_method == OUTPUT_METHOD_HTML:
|
248
|
+
tree.htmlNodeDumpFormatOutput(
|
249
|
+
c_buffer, c_doc, c_nsdecl_node, encoding, pretty_print)
|
250
|
+
else:
|
251
|
+
tree.xmlNodeDumpOutput(
|
252
|
+
c_buffer, c_doc, c_nsdecl_node, 0, pretty_print, encoding)
|
253
|
+
|
254
|
+
if c_nsdecl_node is not c_node:
|
255
|
+
# clean up
|
256
|
+
c_nsdecl_node.children = c_nsdecl_node.last = NULL
|
257
|
+
tree.xmlFreeNode(c_nsdecl_node)
|
258
|
+
|
259
|
+
if c_buffer.error:
|
260
|
+
return
|
261
|
+
|
262
|
+
# write tail, trailing comments, etc.
|
263
|
+
if with_tail:
|
264
|
+
_writeTail(c_buffer, c_node, encoding, c_method, pretty_print)
|
265
|
+
if write_complete_document:
|
266
|
+
_writeNextSiblings(c_buffer, c_node, encoding, pretty_print)
|
267
|
+
if pretty_print:
|
268
|
+
tree.xmlOutputBufferWrite(c_buffer, 1, "\n")
|
269
|
+
|
270
|
+
cdef void _writeDeclarationToBuffer(tree.xmlOutputBuffer* c_buffer,
|
271
|
+
const_xmlChar* version, const_char* encoding,
|
272
|
+
int standalone) noexcept nogil:
|
273
|
+
if version is NULL:
|
274
|
+
version = <unsigned char*>"1.0"
|
275
|
+
tree.xmlOutputBufferWrite(c_buffer, 15, "<?xml version='")
|
276
|
+
tree.xmlOutputBufferWriteString(c_buffer, <const_char*>version)
|
277
|
+
tree.xmlOutputBufferWrite(c_buffer, 12, "' encoding='")
|
278
|
+
tree.xmlOutputBufferWriteString(c_buffer, encoding)
|
279
|
+
if standalone == 0:
|
280
|
+
tree.xmlOutputBufferWrite(c_buffer, 20, "' standalone='no'?>\n")
|
281
|
+
elif standalone == 1:
|
282
|
+
tree.xmlOutputBufferWrite(c_buffer, 21, "' standalone='yes'?>\n")
|
283
|
+
else:
|
284
|
+
tree.xmlOutputBufferWrite(c_buffer, 4, "'?>\n")
|
285
|
+
|
286
|
+
cdef void _writeDtdToBuffer(tree.xmlOutputBuffer* c_buffer,
|
287
|
+
xmlDoc* c_doc, const_xmlChar* c_root_name,
|
288
|
+
int c_method, const_char* encoding) noexcept nogil:
|
289
|
+
cdef tree.xmlDtd* c_dtd
|
290
|
+
cdef xmlNode* c_node
|
291
|
+
cdef char* quotechar
|
292
|
+
c_dtd = c_doc.intSubset
|
293
|
+
if not c_dtd or not c_dtd.name:
|
294
|
+
return
|
295
|
+
|
296
|
+
# Name in document type declaration must match the root element tag.
|
297
|
+
# For XML, case sensitive match, for HTML insensitive.
|
298
|
+
if c_method == OUTPUT_METHOD_HTML:
|
299
|
+
if tree.xmlStrcasecmp(c_root_name, c_dtd.name) != 0:
|
300
|
+
return
|
301
|
+
else:
|
302
|
+
if tree.xmlStrcmp(c_root_name, c_dtd.name) != 0:
|
303
|
+
return
|
304
|
+
|
305
|
+
tree.xmlOutputBufferWrite(c_buffer, 10, "<!DOCTYPE ")
|
306
|
+
tree.xmlOutputBufferWriteString(c_buffer, <const_char*>c_dtd.name)
|
307
|
+
|
308
|
+
cdef const_xmlChar* public_id = c_dtd.ExternalID
|
309
|
+
cdef const_xmlChar* sys_url = c_dtd.SystemID
|
310
|
+
if public_id and public_id[0] == b'\0':
|
311
|
+
public_id = NULL
|
312
|
+
if sys_url and sys_url[0] == b'\0':
|
313
|
+
sys_url = NULL
|
314
|
+
|
315
|
+
if public_id:
|
316
|
+
tree.xmlOutputBufferWrite(c_buffer, 9, ' PUBLIC "')
|
317
|
+
tree.xmlOutputBufferWriteString(c_buffer, <const_char*>public_id)
|
318
|
+
if sys_url:
|
319
|
+
tree.xmlOutputBufferWrite(c_buffer, 2, '" ')
|
320
|
+
else:
|
321
|
+
tree.xmlOutputBufferWrite(c_buffer, 1, '"')
|
322
|
+
elif sys_url:
|
323
|
+
tree.xmlOutputBufferWrite(c_buffer, 8, ' SYSTEM ')
|
324
|
+
|
325
|
+
if sys_url:
|
326
|
+
if tree.xmlStrchr(sys_url, b'"'):
|
327
|
+
quotechar = '\''
|
328
|
+
else:
|
329
|
+
quotechar = '"'
|
330
|
+
tree.xmlOutputBufferWrite(c_buffer, 1, quotechar)
|
331
|
+
tree.xmlOutputBufferWriteString(c_buffer, <const_char*>sys_url)
|
332
|
+
tree.xmlOutputBufferWrite(c_buffer, 1, quotechar)
|
333
|
+
|
334
|
+
if (not c_dtd.entities and not c_dtd.elements and
|
335
|
+
not c_dtd.attributes and not c_dtd.notations and
|
336
|
+
not c_dtd.pentities):
|
337
|
+
tree.xmlOutputBufferWrite(c_buffer, 2, '>\n')
|
338
|
+
return
|
339
|
+
|
340
|
+
tree.xmlOutputBufferWrite(c_buffer, 3, ' [\n')
|
341
|
+
if c_dtd.notations and not c_buffer.error:
|
342
|
+
c_buf = tree.xmlBufferCreate()
|
343
|
+
if not c_buf:
|
344
|
+
c_buffer.error = xmlerror.XML_ERR_NO_MEMORY
|
345
|
+
return
|
346
|
+
tree.xmlDumpNotationTable(c_buf, <tree.xmlNotationTable*>c_dtd.notations)
|
347
|
+
tree.xmlOutputBufferWrite(
|
348
|
+
c_buffer, tree.xmlBufferLength(c_buf),
|
349
|
+
<const_char*>tree.xmlBufferContent(c_buf))
|
350
|
+
tree.xmlBufferFree(c_buf)
|
351
|
+
c_node = c_dtd.children
|
352
|
+
while c_node and not c_buffer.error:
|
353
|
+
tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_node, 0, 0, encoding)
|
354
|
+
c_node = c_node.next
|
355
|
+
tree.xmlOutputBufferWrite(c_buffer, 3, "]>\n")
|
356
|
+
|
357
|
+
cdef void _writeTail(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node,
|
358
|
+
const_char* encoding, int c_method, bint pretty_print) noexcept nogil:
|
359
|
+
"Write the element tail."
|
360
|
+
c_node = c_node.next
|
361
|
+
while c_node and not c_buffer.error and c_node.type in (
|
362
|
+
tree.XML_TEXT_NODE, tree.XML_CDATA_SECTION_NODE):
|
363
|
+
if c_method == OUTPUT_METHOD_HTML:
|
364
|
+
tree.htmlNodeDumpFormatOutput(
|
365
|
+
c_buffer, c_node.doc, c_node, encoding, pretty_print)
|
366
|
+
else:
|
367
|
+
tree.xmlNodeDumpOutput(
|
368
|
+
c_buffer, c_node.doc, c_node, 0, pretty_print, encoding)
|
369
|
+
c_node = c_node.next
|
370
|
+
|
371
|
+
cdef void _writePrevSiblings(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node,
|
372
|
+
const_char* encoding, bint pretty_print) noexcept nogil:
|
373
|
+
cdef xmlNode* c_sibling
|
374
|
+
if c_node.parent and _isElement(c_node.parent):
|
375
|
+
return
|
376
|
+
# we are at a root node, so add PI and comment siblings
|
377
|
+
c_sibling = c_node
|
378
|
+
while c_sibling.prev and \
|
379
|
+
(c_sibling.prev.type == tree.XML_PI_NODE or
|
380
|
+
c_sibling.prev.type == tree.XML_COMMENT_NODE):
|
381
|
+
c_sibling = c_sibling.prev
|
382
|
+
while c_sibling is not c_node and not c_buffer.error:
|
383
|
+
tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_sibling, 0,
|
384
|
+
pretty_print, encoding)
|
385
|
+
if pretty_print:
|
386
|
+
tree.xmlOutputBufferWriteString(c_buffer, "\n")
|
387
|
+
c_sibling = c_sibling.next
|
388
|
+
|
389
|
+
cdef void _writeNextSiblings(tree.xmlOutputBuffer* c_buffer, xmlNode* c_node,
|
390
|
+
const_char* encoding, bint pretty_print) noexcept nogil:
|
391
|
+
cdef xmlNode* c_sibling
|
392
|
+
if c_node.parent and _isElement(c_node.parent):
|
393
|
+
return
|
394
|
+
# we are at a root node, so add PI and comment siblings
|
395
|
+
c_sibling = c_node.next
|
396
|
+
while not c_buffer.error and c_sibling and \
|
397
|
+
(c_sibling.type == tree.XML_PI_NODE or
|
398
|
+
c_sibling.type == tree.XML_COMMENT_NODE):
|
399
|
+
if pretty_print:
|
400
|
+
tree.xmlOutputBufferWriteString(c_buffer, "\n")
|
401
|
+
tree.xmlNodeDumpOutput(c_buffer, c_node.doc, c_sibling, 0,
|
402
|
+
pretty_print, encoding)
|
403
|
+
c_sibling = c_sibling.next
|
404
|
+
|
405
|
+
|
406
|
+
# copied and adapted from libxml2 (xmlBufAttrSerializeTxtContent())
|
407
|
+
cdef _write_attr_string(tree.xmlOutputBuffer* buf, const char *string):
|
408
|
+
cdef const char *base
|
409
|
+
cdef const char *cur
|
410
|
+
|
411
|
+
if string == NULL:
|
412
|
+
return
|
413
|
+
|
414
|
+
base = cur = <const char*>string
|
415
|
+
while cur[0] != 0:
|
416
|
+
if cur[0] == b'\n':
|
417
|
+
if base != cur:
|
418
|
+
tree.xmlOutputBufferWrite(buf, cur - base, base)
|
419
|
+
|
420
|
+
tree.xmlOutputBufferWrite(buf, 5, " ")
|
421
|
+
cur += 1
|
422
|
+
base = cur
|
423
|
+
|
424
|
+
elif cur[0] == b'\r':
|
425
|
+
if base != cur:
|
426
|
+
tree.xmlOutputBufferWrite(buf, cur - base, base)
|
427
|
+
|
428
|
+
tree.xmlOutputBufferWrite(buf, 5, " ")
|
429
|
+
cur += 1
|
430
|
+
base = cur
|
431
|
+
|
432
|
+
elif cur[0] == b'\t':
|
433
|
+
if base != cur:
|
434
|
+
tree.xmlOutputBufferWrite(buf, cur - base, base)
|
435
|
+
|
436
|
+
tree.xmlOutputBufferWrite(buf, 4, "	")
|
437
|
+
cur += 1
|
438
|
+
base = cur
|
439
|
+
|
440
|
+
elif cur[0] == b'"':
|
441
|
+
if base != cur:
|
442
|
+
tree.xmlOutputBufferWrite(buf, cur - base, base)
|
443
|
+
|
444
|
+
tree.xmlOutputBufferWrite(buf, 6, """)
|
445
|
+
cur += 1
|
446
|
+
base = cur
|
447
|
+
|
448
|
+
elif cur[0] == b'<':
|
449
|
+
if base != cur:
|
450
|
+
tree.xmlOutputBufferWrite(buf, cur - base, base)
|
451
|
+
|
452
|
+
tree.xmlOutputBufferWrite(buf, 4, "<")
|
453
|
+
cur += 1
|
454
|
+
base = cur
|
455
|
+
|
456
|
+
elif cur[0] == b'>':
|
457
|
+
if base != cur:
|
458
|
+
tree.xmlOutputBufferWrite(buf, cur - base, base)
|
459
|
+
|
460
|
+
tree.xmlOutputBufferWrite(buf, 4, ">")
|
461
|
+
cur += 1
|
462
|
+
base = cur
|
463
|
+
elif cur[0] == b'&':
|
464
|
+
if base != cur:
|
465
|
+
tree.xmlOutputBufferWrite(buf, cur - base, base)
|
466
|
+
|
467
|
+
tree.xmlOutputBufferWrite(buf, 5, "&")
|
468
|
+
cur += 1
|
469
|
+
base = cur
|
470
|
+
|
471
|
+
else:
|
472
|
+
# Leave further encoding and escaping to the buffer encoder.
|
473
|
+
cur += 1
|
474
|
+
|
475
|
+
if base != cur:
|
476
|
+
tree.xmlOutputBufferWrite(buf, cur - base, base)
|
477
|
+
|
478
|
+
|
479
|
+
cdef void _write_cdata_section(tree.xmlOutputBuffer* buf, const char* c_data, const char* c_end):
|
480
|
+
tree.xmlOutputBufferWrite(buf, 9, "<![CDATA[")
|
481
|
+
while c_end - c_data > limits.INT_MAX:
|
482
|
+
tree.xmlOutputBufferWrite(buf, limits.INT_MAX, c_data)
|
483
|
+
c_data += limits.INT_MAX
|
484
|
+
tree.xmlOutputBufferWrite(buf, c_end - c_data, c_data)
|
485
|
+
tree.xmlOutputBufferWrite(buf, 3, "]]>")
|
486
|
+
|
487
|
+
|
488
|
+
cdef _write_cdata_string(tree.xmlOutputBuffer* buf, bytes bstring):
|
489
|
+
cdef const char* c_data = bstring
|
490
|
+
cdef const char* c_end = c_data + len(bstring)
|
491
|
+
cdef const char* c_pos = c_data
|
492
|
+
cdef bint nothing_written = True
|
493
|
+
|
494
|
+
while True:
|
495
|
+
c_pos = <const char*> cstring_h.memchr(c_pos, b']', c_end - c_pos)
|
496
|
+
if not c_pos:
|
497
|
+
break
|
498
|
+
c_pos += 1
|
499
|
+
next_char = c_pos[0]
|
500
|
+
c_pos += 1
|
501
|
+
if next_char != b']':
|
502
|
+
continue
|
503
|
+
# Found ']]', c_pos points to next character.
|
504
|
+
while c_pos[0] == b']':
|
505
|
+
c_pos += 1
|
506
|
+
if c_pos[0] != b'>':
|
507
|
+
if c_pos == c_end:
|
508
|
+
break
|
509
|
+
# c_pos[0] is neither ']' nor '>', continue with next character.
|
510
|
+
c_pos += 1
|
511
|
+
continue
|
512
|
+
|
513
|
+
# Write section up to ']]' and start next block at trailing '>'.
|
514
|
+
_write_cdata_section(buf, c_data, c_pos)
|
515
|
+
nothing_written = False
|
516
|
+
c_data = c_pos
|
517
|
+
c_pos += 1
|
518
|
+
|
519
|
+
if nothing_written or c_data < c_end:
|
520
|
+
_write_cdata_section(buf, c_data, c_end)
|
521
|
+
|
522
|
+
|
523
|
+
############################################################
|
524
|
+
# output to file-like objects
|
525
|
+
|
526
|
+
cdef object io_open
|
527
|
+
from io import open as io_open
|
528
|
+
|
529
|
+
cdef object gzip
|
530
|
+
import gzip
|
531
|
+
|
532
|
+
cdef object getwriter
|
533
|
+
from codecs import getwriter
|
534
|
+
cdef object utf8_writer = getwriter('utf8')
|
535
|
+
|
536
|
+
cdef object contextmanager
|
537
|
+
from contextlib import contextmanager
|
538
|
+
|
539
|
+
cdef object _open_utf8_file
|
540
|
+
|
541
|
+
@contextmanager
|
542
|
+
def _open_utf8_file(file, compression=0):
|
543
|
+
file = _getFSPathOrObject(file)
|
544
|
+
if _isString(file):
|
545
|
+
if compression:
|
546
|
+
with gzip.GzipFile(file, mode='wb', compresslevel=compression) as zf:
|
547
|
+
yield utf8_writer(zf)
|
548
|
+
else:
|
549
|
+
with io_open(file, 'w', encoding='utf8') as f:
|
550
|
+
yield f
|
551
|
+
else:
|
552
|
+
if compression:
|
553
|
+
with gzip.GzipFile(fileobj=file, mode='wb', compresslevel=compression) as zf:
|
554
|
+
yield utf8_writer(zf)
|
555
|
+
else:
|
556
|
+
yield utf8_writer(file)
|
557
|
+
|
558
|
+
|
559
|
+
@cython.final
|
560
|
+
@cython.internal
|
561
|
+
cdef class _FilelikeWriter:
|
562
|
+
cdef object _filelike
|
563
|
+
cdef object _close_filelike
|
564
|
+
cdef _ExceptionContext _exc_context
|
565
|
+
cdef _ErrorLog error_log
|
566
|
+
|
567
|
+
def __cinit__(self, filelike, exc_context=None, compression=None, close=False):
|
568
|
+
if compression is not None and compression > 0:
|
569
|
+
filelike = GzipFile(
|
570
|
+
fileobj=filelike, mode='wb', compresslevel=compression)
|
571
|
+
self._close_filelike = filelike.close
|
572
|
+
elif close:
|
573
|
+
self._close_filelike = filelike.close
|
574
|
+
self._filelike = filelike
|
575
|
+
if exc_context is None:
|
576
|
+
self._exc_context = _ExceptionContext()
|
577
|
+
else:
|
578
|
+
self._exc_context = exc_context
|
579
|
+
self.error_log = _ErrorLog()
|
580
|
+
|
581
|
+
cdef tree.xmlOutputBuffer* _createOutputBuffer(
|
582
|
+
self, tree.xmlCharEncodingHandler* enchandler) except NULL:
|
583
|
+
cdef tree.xmlOutputBuffer* c_buffer
|
584
|
+
c_buffer = tree.xmlOutputBufferCreateIO(
|
585
|
+
<tree.xmlOutputWriteCallback>_writeFilelikeWriter, _closeFilelikeWriter,
|
586
|
+
<python.PyObject*>self, enchandler)
|
587
|
+
if c_buffer is NULL:
|
588
|
+
raise IOError, "Could not create I/O writer context."
|
589
|
+
return c_buffer
|
590
|
+
|
591
|
+
cdef int write(self, char* c_buffer, int size) noexcept:
|
592
|
+
try:
|
593
|
+
if self._filelike is None:
|
594
|
+
raise IOError, "File is already closed"
|
595
|
+
py_buffer = <bytes>c_buffer[:size]
|
596
|
+
self._filelike.write(py_buffer)
|
597
|
+
except:
|
598
|
+
size = -1
|
599
|
+
self._exc_context._store_raised()
|
600
|
+
finally:
|
601
|
+
return size # and swallow any further exceptions
|
602
|
+
|
603
|
+
cdef int close(self) noexcept:
|
604
|
+
retval = 0
|
605
|
+
try:
|
606
|
+
if self._close_filelike is not None:
|
607
|
+
self._close_filelike()
|
608
|
+
# we should not close the file here as we didn't open it
|
609
|
+
self._filelike = None
|
610
|
+
except:
|
611
|
+
retval = -1
|
612
|
+
self._exc_context._store_raised()
|
613
|
+
finally:
|
614
|
+
return retval # and swallow any further exceptions
|
615
|
+
|
616
|
+
cdef int _writeFilelikeWriter(void* ctxt, char* c_buffer, int length) noexcept:
|
617
|
+
return (<_FilelikeWriter>ctxt).write(c_buffer, length)
|
618
|
+
|
619
|
+
cdef int _closeFilelikeWriter(void* ctxt) noexcept:
|
620
|
+
return (<_FilelikeWriter>ctxt).close()
|
621
|
+
|
622
|
+
cdef _tofilelike(f, _Element element, encoding, doctype, method,
|
623
|
+
bint write_xml_declaration, bint write_doctype,
|
624
|
+
bint pretty_print, bint with_tail, int standalone,
|
625
|
+
int compression):
|
626
|
+
cdef _FilelikeWriter writer = None
|
627
|
+
cdef tree.xmlOutputBuffer* c_buffer
|
628
|
+
cdef tree.xmlCharEncodingHandler* enchandler
|
629
|
+
cdef const_char* c_enc
|
630
|
+
cdef const_xmlChar* c_doctype
|
631
|
+
cdef int error_result
|
632
|
+
|
633
|
+
c_method = _findOutputMethod(method)
|
634
|
+
if c_method == OUTPUT_METHOD_TEXT:
|
635
|
+
data = _textToString(element._c_node, encoding, with_tail)
|
636
|
+
if compression:
|
637
|
+
bytes_out = BytesIO()
|
638
|
+
with GzipFile(fileobj=bytes_out, mode='wb', compresslevel=compression) as gzip_file:
|
639
|
+
gzip_file.write(data)
|
640
|
+
data = bytes_out.getvalue()
|
641
|
+
f = _getFSPathOrObject(f)
|
642
|
+
if _isString(f):
|
643
|
+
filename8 = _encodeFilename(f)
|
644
|
+
with open(filename8, 'wb') as f:
|
645
|
+
f.write(data)
|
646
|
+
else:
|
647
|
+
f.write(data)
|
648
|
+
return
|
649
|
+
|
650
|
+
if encoding is None:
|
651
|
+
c_enc = NULL
|
652
|
+
else:
|
653
|
+
encoding = _utf8(encoding)
|
654
|
+
c_enc = _cstr(encoding)
|
655
|
+
if doctype is None:
|
656
|
+
c_doctype = NULL
|
657
|
+
else:
|
658
|
+
doctype = _utf8(doctype)
|
659
|
+
c_doctype = _xcstr(doctype)
|
660
|
+
|
661
|
+
writer = _create_output_buffer(f, c_enc, compression, &c_buffer, close=False)
|
662
|
+
if writer is None:
|
663
|
+
with nogil:
|
664
|
+
error_result = _serialise_node(
|
665
|
+
c_buffer, c_doctype, c_enc, element._c_node, c_method,
|
666
|
+
write_xml_declaration, write_doctype, pretty_print, with_tail, standalone)
|
667
|
+
else:
|
668
|
+
error_result = _serialise_node(
|
669
|
+
c_buffer, c_doctype, c_enc, element._c_node, c_method,
|
670
|
+
write_xml_declaration, write_doctype, pretty_print, with_tail, standalone)
|
671
|
+
|
672
|
+
if writer is not None:
|
673
|
+
writer._exc_context._raise_if_stored()
|
674
|
+
if error_result != xmlerror.XML_ERR_OK:
|
675
|
+
_raiseSerialisationError(error_result)
|
676
|
+
|
677
|
+
|
678
|
+
cdef int _serialise_node(tree.xmlOutputBuffer* c_buffer, const_xmlChar* c_doctype,
|
679
|
+
const_char* c_enc, xmlNode* c_node, int c_method,
|
680
|
+
bint write_xml_declaration, bint write_doctype, bint pretty_print,
|
681
|
+
bint with_tail, int standalone) noexcept nogil:
|
682
|
+
_writeNodeToBuffer(
|
683
|
+
c_buffer, c_node, c_enc, c_doctype, c_method,
|
684
|
+
write_xml_declaration, write_doctype, pretty_print, with_tail, standalone)
|
685
|
+
error_result = c_buffer.error
|
686
|
+
if error_result == xmlerror.XML_ERR_OK:
|
687
|
+
error_result = tree.xmlOutputBufferClose(c_buffer)
|
688
|
+
if error_result != -1:
|
689
|
+
error_result = xmlerror.XML_ERR_OK
|
690
|
+
else:
|
691
|
+
tree.xmlOutputBufferClose(c_buffer)
|
692
|
+
return error_result
|
693
|
+
|
694
|
+
|
695
|
+
cdef _FilelikeWriter _create_output_buffer(
|
696
|
+
f, const_char* c_enc, int c_compression,
|
697
|
+
tree.xmlOutputBuffer** c_buffer_ret, bint close):
|
698
|
+
cdef tree.xmlOutputBuffer* c_buffer
|
699
|
+
cdef _FilelikeWriter writer
|
700
|
+
cdef bytes filename8
|
701
|
+
enchandler = tree.xmlFindCharEncodingHandler(c_enc)
|
702
|
+
if enchandler is NULL:
|
703
|
+
raise LookupError(
|
704
|
+
f"unknown encoding: '{c_enc.decode('UTF-8') if c_enc is not NULL else u''}'")
|
705
|
+
try:
|
706
|
+
f = _getFSPathOrObject(f)
|
707
|
+
|
708
|
+
if c_compression and not HAS_ZLIB_COMPRESSION and _isString(f):
|
709
|
+
# Let "_FilelikeWriter" fall back to Python's GzipFile.
|
710
|
+
f = open(f, mode="wb")
|
711
|
+
close = True
|
712
|
+
|
713
|
+
if _isString(f):
|
714
|
+
filename8 = _encodeFilename(f)
|
715
|
+
if b'%' in filename8 and (
|
716
|
+
# Exclude absolute Windows paths and file:// URLs.
|
717
|
+
_isFilePath(<const xmlChar*>filename8) not in (NO_FILE_PATH, ABS_WIN_FILE_PATH)
|
718
|
+
or filename8[:7].lower() == b'file://'):
|
719
|
+
# A file path (not a URL) containing the '%' URL escape character.
|
720
|
+
# libxml2 uses URL-unescaping on these, so escape the path before passing it in.
|
721
|
+
filename8 = filename8.replace(b'%', b'%25')
|
722
|
+
c_buffer = tree.xmlOutputBufferCreateFilename(
|
723
|
+
_cstr(filename8), enchandler, c_compression)
|
724
|
+
if c_buffer is NULL:
|
725
|
+
python.PyErr_SetFromErrno(IOError) # raises IOError
|
726
|
+
writer = None
|
727
|
+
elif hasattr(f, 'write'):
|
728
|
+
writer = _FilelikeWriter(f, compression=c_compression, close=close)
|
729
|
+
c_buffer = writer._createOutputBuffer(enchandler)
|
730
|
+
else:
|
731
|
+
raise TypeError(
|
732
|
+
f"File or filename expected, got '{python._fqtypename(f).decode('UTF-8')}'")
|
733
|
+
except:
|
734
|
+
tree.xmlCharEncCloseFunc(enchandler)
|
735
|
+
raise
|
736
|
+
c_buffer_ret[0] = c_buffer
|
737
|
+
return writer
|
738
|
+
|
739
|
+
cdef xmlChar **_convert_ns_prefixes(tree.xmlDict* c_dict, ns_prefixes) except NULL:
|
740
|
+
cdef size_t i, num_ns_prefixes = len(ns_prefixes)
|
741
|
+
# Need to allocate one extra memory block to handle last NULL entry
|
742
|
+
c_ns_prefixes = <xmlChar **>python.lxml_malloc(num_ns_prefixes + 1, sizeof(xmlChar*))
|
743
|
+
if not c_ns_prefixes:
|
744
|
+
raise MemoryError()
|
745
|
+
i = 0
|
746
|
+
try:
|
747
|
+
for prefix in ns_prefixes:
|
748
|
+
prefix_utf = _utf8(prefix)
|
749
|
+
c_prefix_len = len(prefix_utf)
|
750
|
+
if c_prefix_len > limits.INT_MAX:
|
751
|
+
raise ValueError("Prefix too long")
|
752
|
+
c_prefix = tree.xmlDictExists(c_dict, _xcstr(prefix_utf), <int> c_prefix_len)
|
753
|
+
if c_prefix:
|
754
|
+
# unknown prefixes do not need to get serialised
|
755
|
+
c_ns_prefixes[i] = <xmlChar*>c_prefix
|
756
|
+
i += 1
|
757
|
+
except:
|
758
|
+
python.lxml_free(c_ns_prefixes)
|
759
|
+
raise
|
760
|
+
|
761
|
+
c_ns_prefixes[i] = NULL # append end marker
|
762
|
+
return c_ns_prefixes
|
763
|
+
|
764
|
+
cdef _tofilelikeC14N(f, _Element element, bint exclusive, bint with_comments,
|
765
|
+
int compression, inclusive_ns_prefixes):
|
766
|
+
cdef _FilelikeWriter writer = None
|
767
|
+
cdef tree.xmlOutputBuffer* c_buffer
|
768
|
+
cdef xmlChar **c_inclusive_ns_prefixes = NULL
|
769
|
+
cdef char* c_filename
|
770
|
+
cdef xmlDoc* c_base_doc
|
771
|
+
cdef xmlDoc* c_doc
|
772
|
+
cdef int bytes_count, error = 0
|
773
|
+
|
774
|
+
c_base_doc = element._c_node.doc
|
775
|
+
c_doc = _fakeRootDoc(c_base_doc, element._c_node)
|
776
|
+
try:
|
777
|
+
c_inclusive_ns_prefixes = (
|
778
|
+
_convert_ns_prefixes(c_doc.dict, inclusive_ns_prefixes)
|
779
|
+
if inclusive_ns_prefixes else NULL)
|
780
|
+
|
781
|
+
f = _getFSPathOrObject(f)
|
782
|
+
|
783
|
+
close = False
|
784
|
+
if compression and not HAS_ZLIB_COMPRESSION and _isString(f):
|
785
|
+
# Let "_FilelikeWriter" fall back to Python's GzipFile.
|
786
|
+
f = open(f, mode="wb")
|
787
|
+
close = True
|
788
|
+
|
789
|
+
if _isString(f):
|
790
|
+
filename8 = _encodeFilename(f)
|
791
|
+
c_filename = _cstr(filename8)
|
792
|
+
with nogil:
|
793
|
+
error = c14n.xmlC14NDocSave(
|
794
|
+
c_doc, NULL, exclusive, c_inclusive_ns_prefixes,
|
795
|
+
with_comments, c_filename, compression)
|
796
|
+
elif hasattr(f, 'write'):
|
797
|
+
writer = _FilelikeWriter(f, compression=compression, close=close)
|
798
|
+
c_buffer = writer._createOutputBuffer(NULL)
|
799
|
+
try:
|
800
|
+
with writer.error_log:
|
801
|
+
bytes_count = c14n.xmlC14NDocSaveTo(
|
802
|
+
c_doc, NULL, exclusive, c_inclusive_ns_prefixes,
|
803
|
+
with_comments, c_buffer)
|
804
|
+
finally:
|
805
|
+
error = tree.xmlOutputBufferClose(c_buffer)
|
806
|
+
if bytes_count < 0:
|
807
|
+
error = bytes_count
|
808
|
+
elif error != -1:
|
809
|
+
error = xmlerror.XML_ERR_OK
|
810
|
+
else:
|
811
|
+
raise TypeError(f"File or filename expected, got '{python._fqtypename(f).decode('UTF-8')}'")
|
812
|
+
finally:
|
813
|
+
_destroyFakeDoc(c_base_doc, c_doc)
|
814
|
+
if c_inclusive_ns_prefixes is not NULL:
|
815
|
+
python.lxml_free(c_inclusive_ns_prefixes)
|
816
|
+
|
817
|
+
if writer is not None:
|
818
|
+
writer._exc_context._raise_if_stored()
|
819
|
+
|
820
|
+
if error < 0:
|
821
|
+
message = "C14N failed"
|
822
|
+
if writer is not None:
|
823
|
+
errors = writer.error_log
|
824
|
+
if len(errors):
|
825
|
+
message = errors[0].message
|
826
|
+
raise C14NError(message)
|
827
|
+
|
828
|
+
|
829
|
+
# C14N 2.0
|
830
|
+
|
831
|
+
def canonicalize(xml_data=None, *, out=None, from_file=None, **options):
|
832
|
+
"""Convert XML to its C14N 2.0 serialised form.
|
833
|
+
|
834
|
+
If *out* is provided, it must be a file or file-like object that receives
|
835
|
+
the serialised canonical XML output (text, not bytes) through its ``.write()``
|
836
|
+
method. To write to a file, open it in text mode with encoding "utf-8".
|
837
|
+
If *out* is not provided, this function returns the output as text string.
|
838
|
+
|
839
|
+
Either *xml_data* (an XML string, tree or Element) or *file*
|
840
|
+
(a file path or file-like object) must be provided as input.
|
841
|
+
|
842
|
+
The configuration options are the same as for the ``C14NWriterTarget``.
|
843
|
+
"""
|
844
|
+
if xml_data is None and from_file is None:
|
845
|
+
raise ValueError("Either 'xml_data' or 'from_file' must be provided as input")
|
846
|
+
|
847
|
+
sio = None
|
848
|
+
if out is None:
|
849
|
+
sio = out = StringIO()
|
850
|
+
|
851
|
+
target = C14NWriterTarget(out.write, **options)
|
852
|
+
|
853
|
+
if xml_data is not None and not isinstance(xml_data, basestring):
|
854
|
+
_tree_to_target(xml_data, target)
|
855
|
+
return sio.getvalue() if sio is not None else None
|
856
|
+
|
857
|
+
cdef _FeedParser parser = XMLParser(
|
858
|
+
target=target,
|
859
|
+
attribute_defaults=True,
|
860
|
+
collect_ids=False,
|
861
|
+
)
|
862
|
+
|
863
|
+
if xml_data is not None:
|
864
|
+
parser.feed(xml_data)
|
865
|
+
parser.close()
|
866
|
+
elif from_file is not None:
|
867
|
+
try:
|
868
|
+
_parseDocument(from_file, parser, base_url=None)
|
869
|
+
except _TargetParserResult:
|
870
|
+
pass
|
871
|
+
|
872
|
+
return sio.getvalue() if sio is not None else None
|
873
|
+
|
874
|
+
|
875
|
+
cdef _tree_to_target(element, target):
|
876
|
+
for event, elem in iterwalk(element, events=('start', 'end', 'start-ns', 'comment', 'pi')):
|
877
|
+
text = None
|
878
|
+
if event == 'start':
|
879
|
+
target.start(elem.tag, elem.attrib)
|
880
|
+
text = elem.text
|
881
|
+
elif event == 'end':
|
882
|
+
target.end(elem.tag)
|
883
|
+
text = elem.tail
|
884
|
+
elif event == 'start-ns':
|
885
|
+
target.start_ns(*elem)
|
886
|
+
continue
|
887
|
+
elif event == 'comment':
|
888
|
+
target.comment(elem.text)
|
889
|
+
text = elem.tail
|
890
|
+
elif event == 'pi':
|
891
|
+
target.pi(elem.target, elem.text)
|
892
|
+
text = elem.tail
|
893
|
+
if text:
|
894
|
+
target.data(text)
|
895
|
+
return target.close()
|
896
|
+
|
897
|
+
|
898
|
+
cdef object _looks_like_prefix_name = re.compile(r'^\w+:\w+$', re.UNICODE).match
|
899
|
+
|
900
|
+
|
901
|
+
cdef class C14NWriterTarget:
|
902
|
+
"""
|
903
|
+
Canonicalization writer target for the XMLParser.
|
904
|
+
|
905
|
+
Serialises parse events to XML C14N 2.0.
|
906
|
+
|
907
|
+
Configuration options:
|
908
|
+
|
909
|
+
- *with_comments*: set to true to include comments
|
910
|
+
- *strip_text*: set to true to strip whitespace before and after text content
|
911
|
+
- *rewrite_prefixes*: set to true to replace namespace prefixes by "n{number}"
|
912
|
+
- *qname_aware_tags*: a set of qname aware tag names in which prefixes
|
913
|
+
should be replaced in text content
|
914
|
+
- *qname_aware_attrs*: a set of qname aware attribute names in which prefixes
|
915
|
+
should be replaced in text content
|
916
|
+
- *exclude_attrs*: a set of attribute names that should not be serialised
|
917
|
+
- *exclude_tags*: a set of tag names that should not be serialised
|
918
|
+
"""
|
919
|
+
cdef object _write
|
920
|
+
cdef list _data
|
921
|
+
cdef set _qname_aware_tags
|
922
|
+
cdef object _find_qname_aware_attrs
|
923
|
+
cdef list _declared_ns_stack
|
924
|
+
cdef list _ns_stack
|
925
|
+
cdef dict _prefix_map
|
926
|
+
cdef list _preserve_space
|
927
|
+
cdef tuple _pending_start
|
928
|
+
cdef set _exclude_tags
|
929
|
+
cdef set _exclude_attrs
|
930
|
+
cdef Py_ssize_t _ignored_depth
|
931
|
+
cdef bint _with_comments
|
932
|
+
cdef bint _strip_text
|
933
|
+
cdef bint _rewrite_prefixes
|
934
|
+
cdef bint _root_seen
|
935
|
+
cdef bint _root_done
|
936
|
+
|
937
|
+
def __init__(self, write, *,
|
938
|
+
with_comments=False, strip_text=False, rewrite_prefixes=False,
|
939
|
+
qname_aware_tags=None, qname_aware_attrs=None,
|
940
|
+
exclude_attrs=None, exclude_tags=None):
|
941
|
+
self._write = write
|
942
|
+
self._data = []
|
943
|
+
self._with_comments = with_comments
|
944
|
+
self._strip_text = strip_text
|
945
|
+
self._exclude_attrs = set(exclude_attrs) if exclude_attrs else None
|
946
|
+
self._exclude_tags = set(exclude_tags) if exclude_tags else None
|
947
|
+
|
948
|
+
self._rewrite_prefixes = rewrite_prefixes
|
949
|
+
if qname_aware_tags:
|
950
|
+
self._qname_aware_tags = set(qname_aware_tags)
|
951
|
+
else:
|
952
|
+
self._qname_aware_tags = None
|
953
|
+
if qname_aware_attrs:
|
954
|
+
self._find_qname_aware_attrs = set(qname_aware_attrs).intersection
|
955
|
+
else:
|
956
|
+
self._find_qname_aware_attrs = None
|
957
|
+
|
958
|
+
# Stack with globally and newly declared namespaces as (uri, prefix) pairs.
|
959
|
+
self._declared_ns_stack = [[
|
960
|
+
("http://www.w3.org/XML/1998/namespace", "xml"),
|
961
|
+
]]
|
962
|
+
# Stack with user declared namespace prefixes as (uri, prefix) pairs.
|
963
|
+
self._ns_stack = []
|
964
|
+
if not rewrite_prefixes:
|
965
|
+
self._ns_stack.append(_DEFAULT_NAMESPACE_PREFIXES_ITEMS)
|
966
|
+
self._ns_stack.append([])
|
967
|
+
self._prefix_map = {}
|
968
|
+
self._preserve_space = [False]
|
969
|
+
self._pending_start = None
|
970
|
+
self._ignored_depth = 0
|
971
|
+
self._root_seen = False
|
972
|
+
self._root_done = False
|
973
|
+
|
974
|
+
def _iter_namespaces(self, ns_stack):
|
975
|
+
for namespaces in reversed(ns_stack):
|
976
|
+
if namespaces: # almost no element declares new namespaces
|
977
|
+
yield from namespaces
|
978
|
+
|
979
|
+
cdef _resolve_prefix_name(self, prefixed_name):
|
980
|
+
prefix, name = prefixed_name.split(':', 1)
|
981
|
+
for uri, p in self._iter_namespaces(self._ns_stack):
|
982
|
+
if p == prefix:
|
983
|
+
return f'{{{uri}}}{name}'
|
984
|
+
raise ValueError(f'Prefix {prefix} of QName "{prefixed_name}" is not declared in scope')
|
985
|
+
|
986
|
+
cdef _qname(self, qname, uri=None):
|
987
|
+
if uri is None:
|
988
|
+
uri, tag = qname[1:].rsplit('}', 1) if qname[:1] == '{' else ('', qname)
|
989
|
+
else:
|
990
|
+
tag = qname
|
991
|
+
|
992
|
+
prefixes_seen = set()
|
993
|
+
for u, prefix in self._iter_namespaces(self._declared_ns_stack):
|
994
|
+
if u == uri and prefix not in prefixes_seen:
|
995
|
+
return f'{prefix}:{tag}' if prefix else tag, tag, uri
|
996
|
+
prefixes_seen.add(prefix)
|
997
|
+
|
998
|
+
# Not declared yet => add new declaration.
|
999
|
+
if self._rewrite_prefixes:
|
1000
|
+
if uri in self._prefix_map:
|
1001
|
+
prefix = self._prefix_map[uri]
|
1002
|
+
else:
|
1003
|
+
prefix = self._prefix_map[uri] = f'n{len(self._prefix_map)}'
|
1004
|
+
self._declared_ns_stack[-1].append((uri, prefix))
|
1005
|
+
return f'{prefix}:{tag}', tag, uri
|
1006
|
+
|
1007
|
+
if not uri and '' not in prefixes_seen:
|
1008
|
+
# No default namespace declared => no prefix needed.
|
1009
|
+
return tag, tag, uri
|
1010
|
+
|
1011
|
+
for u, prefix in self._iter_namespaces(self._ns_stack):
|
1012
|
+
if u == uri:
|
1013
|
+
self._declared_ns_stack[-1].append((uri, prefix))
|
1014
|
+
return f'{prefix}:{tag}' if prefix else tag, tag, uri
|
1015
|
+
|
1016
|
+
if not uri:
|
1017
|
+
# As soon as a default namespace is defined,
|
1018
|
+
# anything that has no namespace (and thus, no prefix) goes there.
|
1019
|
+
return tag, tag, uri
|
1020
|
+
|
1021
|
+
raise ValueError(f'Namespace "{uri}" of name "{tag}" is not declared in scope')
|
1022
|
+
|
1023
|
+
def data(self, data):
|
1024
|
+
if not self._ignored_depth:
|
1025
|
+
self._data.append(data)
|
1026
|
+
|
1027
|
+
cdef _flush(self):
|
1028
|
+
cdef unicode data = ''.join(self._data)
|
1029
|
+
del self._data[:]
|
1030
|
+
if self._strip_text and not self._preserve_space[-1]:
|
1031
|
+
data = data.strip()
|
1032
|
+
if self._pending_start is not None:
|
1033
|
+
(tag, attrs, new_namespaces), self._pending_start = self._pending_start, None
|
1034
|
+
qname_text = data if ':' in data and _looks_like_prefix_name(data) else None
|
1035
|
+
self._start(tag, attrs, new_namespaces, qname_text)
|
1036
|
+
if qname_text is not None:
|
1037
|
+
return
|
1038
|
+
if data and self._root_seen:
|
1039
|
+
self._write(_escape_cdata_c14n(data))
|
1040
|
+
|
1041
|
+
def start_ns(self, prefix, uri):
|
1042
|
+
if self._ignored_depth:
|
1043
|
+
return
|
1044
|
+
# we may have to resolve qnames in text content
|
1045
|
+
if self._data:
|
1046
|
+
self._flush()
|
1047
|
+
self._ns_stack[-1].append((uri, prefix))
|
1048
|
+
|
1049
|
+
def start(self, tag, attrs):
|
1050
|
+
if self._exclude_tags is not None and (
|
1051
|
+
self._ignored_depth or tag in self._exclude_tags):
|
1052
|
+
self._ignored_depth += 1
|
1053
|
+
return
|
1054
|
+
if self._data:
|
1055
|
+
self._flush()
|
1056
|
+
|
1057
|
+
new_namespaces = []
|
1058
|
+
self._declared_ns_stack.append(new_namespaces)
|
1059
|
+
|
1060
|
+
if self._qname_aware_tags is not None and tag in self._qname_aware_tags:
|
1061
|
+
# Need to parse text first to see if it requires a prefix declaration.
|
1062
|
+
self._pending_start = (tag, attrs, new_namespaces)
|
1063
|
+
return
|
1064
|
+
self._start(tag, attrs, new_namespaces)
|
1065
|
+
|
1066
|
+
cdef _start(self, tag, attrs, new_namespaces, qname_text=None):
|
1067
|
+
if self._exclude_attrs is not None and attrs:
|
1068
|
+
attrs = {k: v for k, v in attrs.items() if k not in self._exclude_attrs}
|
1069
|
+
|
1070
|
+
qnames = {tag, *attrs}
|
1071
|
+
resolved_names = {}
|
1072
|
+
|
1073
|
+
# Resolve prefixes in attribute and tag text.
|
1074
|
+
if qname_text is not None:
|
1075
|
+
qname = resolved_names[qname_text] = self._resolve_prefix_name(qname_text)
|
1076
|
+
qnames.add(qname)
|
1077
|
+
if self._find_qname_aware_attrs is not None and attrs:
|
1078
|
+
qattrs = self._find_qname_aware_attrs(attrs)
|
1079
|
+
if qattrs:
|
1080
|
+
for attr_name in qattrs:
|
1081
|
+
value = attrs[attr_name]
|
1082
|
+
if _looks_like_prefix_name(value):
|
1083
|
+
qname = resolved_names[value] = self._resolve_prefix_name(value)
|
1084
|
+
qnames.add(qname)
|
1085
|
+
else:
|
1086
|
+
qattrs = None
|
1087
|
+
else:
|
1088
|
+
qattrs = None
|
1089
|
+
|
1090
|
+
# Assign prefixes in lexicographical order of used URIs.
|
1091
|
+
parsed_qnames = {n: self._qname(n) for n in sorted(
|
1092
|
+
qnames, key=lambda n: n.split('}', 1))}
|
1093
|
+
|
1094
|
+
# Write namespace declarations in prefix order ...
|
1095
|
+
if new_namespaces:
|
1096
|
+
attr_list = [
|
1097
|
+
('xmlns:' + prefix if prefix else 'xmlns', uri)
|
1098
|
+
for uri, prefix in new_namespaces
|
1099
|
+
]
|
1100
|
+
attr_list.sort()
|
1101
|
+
else:
|
1102
|
+
# almost always empty
|
1103
|
+
attr_list = []
|
1104
|
+
|
1105
|
+
# ... followed by attributes in URI+name order
|
1106
|
+
if attrs:
|
1107
|
+
for k, v in sorted(attrs.items()):
|
1108
|
+
if qattrs is not None and k in qattrs and v in resolved_names:
|
1109
|
+
v = parsed_qnames[resolved_names[v]][0]
|
1110
|
+
attr_qname, attr_name, uri = parsed_qnames[k]
|
1111
|
+
# No prefix for attributes in default ('') namespace.
|
1112
|
+
attr_list.append((attr_qname if uri else attr_name, v))
|
1113
|
+
|
1114
|
+
# Honour xml:space attributes.
|
1115
|
+
space_behaviour = attrs.get('{http://www.w3.org/XML/1998/namespace}space')
|
1116
|
+
self._preserve_space.append(
|
1117
|
+
space_behaviour == 'preserve' if space_behaviour
|
1118
|
+
else self._preserve_space[-1])
|
1119
|
+
|
1120
|
+
# Write the tag.
|
1121
|
+
write = self._write
|
1122
|
+
write('<' + parsed_qnames[tag][0])
|
1123
|
+
if attr_list:
|
1124
|
+
write(''.join([f' {k}="{_escape_attrib_c14n(v)}"' for k, v in attr_list]))
|
1125
|
+
write('>')
|
1126
|
+
|
1127
|
+
# Write the resolved qname text content.
|
1128
|
+
if qname_text is not None:
|
1129
|
+
write(_escape_cdata_c14n(parsed_qnames[resolved_names[qname_text]][0]))
|
1130
|
+
|
1131
|
+
self._root_seen = True
|
1132
|
+
self._ns_stack.append([])
|
1133
|
+
|
1134
|
+
def end(self, tag):
|
1135
|
+
if self._ignored_depth:
|
1136
|
+
self._ignored_depth -= 1
|
1137
|
+
return
|
1138
|
+
if self._data:
|
1139
|
+
self._flush()
|
1140
|
+
self._write(f'</{self._qname(tag)[0]}>')
|
1141
|
+
self._preserve_space.pop()
|
1142
|
+
self._root_done = len(self._preserve_space) == 1
|
1143
|
+
self._declared_ns_stack.pop()
|
1144
|
+
self._ns_stack.pop()
|
1145
|
+
|
1146
|
+
def comment(self, text):
|
1147
|
+
if not self._with_comments:
|
1148
|
+
return
|
1149
|
+
if self._ignored_depth:
|
1150
|
+
return
|
1151
|
+
if self._root_done:
|
1152
|
+
self._write('\n')
|
1153
|
+
elif self._root_seen and self._data:
|
1154
|
+
self._flush()
|
1155
|
+
self._write(f'<!--{_escape_cdata_c14n(text)}-->')
|
1156
|
+
if not self._root_seen:
|
1157
|
+
self._write('\n')
|
1158
|
+
|
1159
|
+
def pi(self, target, data):
|
1160
|
+
if self._ignored_depth:
|
1161
|
+
return
|
1162
|
+
if self._root_done:
|
1163
|
+
self._write('\n')
|
1164
|
+
elif self._root_seen and self._data:
|
1165
|
+
self._flush()
|
1166
|
+
self._write(
|
1167
|
+
f'<?{target} {_escape_cdata_c14n(data)}?>' if data else f'<?{target}?>')
|
1168
|
+
if not self._root_seen:
|
1169
|
+
self._write('\n')
|
1170
|
+
|
1171
|
+
def close(self):
|
1172
|
+
return None
|
1173
|
+
|
1174
|
+
|
1175
|
+
cdef _raise_serialization_error(text):
|
1176
|
+
raise TypeError("cannot serialize %r (type %s)" % (text, type(text).__name__))
|
1177
|
+
|
1178
|
+
|
1179
|
+
cdef unicode _escape_cdata_c14n(stext):
|
1180
|
+
# escape character data
|
1181
|
+
cdef unicode text
|
1182
|
+
cdef Py_UCS4 ch
|
1183
|
+
cdef Py_ssize_t start = 0, pos = 0
|
1184
|
+
cdef list substrings = None
|
1185
|
+
try:
|
1186
|
+
text = unicode(stext)
|
1187
|
+
except (TypeError, AttributeError):
|
1188
|
+
return _raise_serialization_error(stext)
|
1189
|
+
|
1190
|
+
for pos, ch in enumerate(text):
|
1191
|
+
if ch == '&':
|
1192
|
+
escape = '&'
|
1193
|
+
elif ch == '<':
|
1194
|
+
escape = '<'
|
1195
|
+
elif ch == '>':
|
1196
|
+
escape = '>'
|
1197
|
+
elif ch == '\r':
|
1198
|
+
escape = '
'
|
1199
|
+
else:
|
1200
|
+
continue
|
1201
|
+
|
1202
|
+
if substrings is None:
|
1203
|
+
substrings = []
|
1204
|
+
if pos > start:
|
1205
|
+
substrings.append(text[start:pos])
|
1206
|
+
substrings.append(escape)
|
1207
|
+
start = pos + 1
|
1208
|
+
|
1209
|
+
if substrings is None:
|
1210
|
+
return text
|
1211
|
+
if pos >= start:
|
1212
|
+
substrings.append(text[start:pos+1])
|
1213
|
+
return ''.join(substrings)
|
1214
|
+
|
1215
|
+
|
1216
|
+
cdef unicode _escape_attrib_c14n(stext):
|
1217
|
+
# escape attribute value
|
1218
|
+
cdef unicode text
|
1219
|
+
cdef Py_UCS4 ch
|
1220
|
+
cdef Py_ssize_t start = 0, pos = 0
|
1221
|
+
cdef list substrings = None
|
1222
|
+
try:
|
1223
|
+
text = unicode(stext)
|
1224
|
+
except (TypeError, AttributeError):
|
1225
|
+
return _raise_serialization_error(stext)
|
1226
|
+
|
1227
|
+
for pos, ch in enumerate(text):
|
1228
|
+
if ch == '&':
|
1229
|
+
escape = '&'
|
1230
|
+
elif ch == '<':
|
1231
|
+
escape = '<'
|
1232
|
+
elif ch == '"':
|
1233
|
+
escape = '"'
|
1234
|
+
elif ch == '\t':
|
1235
|
+
escape = '	'
|
1236
|
+
elif ch == '\n':
|
1237
|
+
escape = '
'
|
1238
|
+
elif ch == '\r':
|
1239
|
+
escape = '
'
|
1240
|
+
else:
|
1241
|
+
continue
|
1242
|
+
|
1243
|
+
if substrings is None:
|
1244
|
+
substrings = []
|
1245
|
+
if pos > start:
|
1246
|
+
substrings.append(text[start:pos])
|
1247
|
+
substrings.append(escape)
|
1248
|
+
start = pos + 1
|
1249
|
+
|
1250
|
+
if substrings is None:
|
1251
|
+
return text
|
1252
|
+
if pos >= start:
|
1253
|
+
substrings.append(text[start:pos+1])
|
1254
|
+
return ''.join(substrings)
|
1255
|
+
|
1256
|
+
|
1257
|
+
# incremental serialisation
|
1258
|
+
|
1259
|
+
cdef class xmlfile:
|
1260
|
+
"""xmlfile(self, output_file, encoding=None, compression=None, close=False, buffered=True)
|
1261
|
+
|
1262
|
+
A simple mechanism for incremental XML serialisation.
|
1263
|
+
|
1264
|
+
Usage example::
|
1265
|
+
|
1266
|
+
with xmlfile("somefile.xml", encoding='utf-8') as xf:
|
1267
|
+
xf.write_declaration(standalone=True)
|
1268
|
+
xf.write_doctype('<!DOCTYPE root SYSTEM "some.dtd">')
|
1269
|
+
|
1270
|
+
# generate an element (the root element)
|
1271
|
+
with xf.element('root'):
|
1272
|
+
# write a complete Element into the open root element
|
1273
|
+
xf.write(etree.Element('test'))
|
1274
|
+
|
1275
|
+
# generate and write more Elements, e.g. through iterparse
|
1276
|
+
for element in generate_some_elements():
|
1277
|
+
# serialise generated elements into the XML file
|
1278
|
+
xf.write(element)
|
1279
|
+
|
1280
|
+
# or write multiple Elements or strings at once
|
1281
|
+
xf.write(etree.Element('start'), "text", etree.Element('end'))
|
1282
|
+
|
1283
|
+
If 'output_file' is a file(-like) object, passing ``close=True`` will
|
1284
|
+
close it when exiting the context manager. By default, it is left
|
1285
|
+
to the owner to do that. When a file path is used, lxml will take care
|
1286
|
+
of opening and closing the file itself. Also, when a compression level
|
1287
|
+
is set, lxml will deliberately close the file to make sure all data gets
|
1288
|
+
compressed and written.
|
1289
|
+
|
1290
|
+
Setting ``buffered=False`` will flush the output after each operation,
|
1291
|
+
such as opening or closing an ``xf.element()`` block or calling
|
1292
|
+
``xf.write()``. Alternatively, calling ``xf.flush()`` can be used to
|
1293
|
+
explicitly flush any pending output when buffering is enabled.
|
1294
|
+
"""
|
1295
|
+
cdef object output_file
|
1296
|
+
cdef bytes encoding
|
1297
|
+
cdef _IncrementalFileWriter writer
|
1298
|
+
cdef _AsyncIncrementalFileWriter async_writer
|
1299
|
+
cdef int compresslevel
|
1300
|
+
cdef bint close
|
1301
|
+
cdef bint buffered
|
1302
|
+
cdef int method
|
1303
|
+
|
1304
|
+
def __init__(self, output_file not None, encoding=None, compression=None,
|
1305
|
+
close=False, buffered=True):
|
1306
|
+
self.output_file = output_file
|
1307
|
+
self.encoding = _utf8orNone(encoding)
|
1308
|
+
self.compresslevel = compression or 0
|
1309
|
+
self.close = close
|
1310
|
+
self.buffered = buffered
|
1311
|
+
self.method = OUTPUT_METHOD_XML
|
1312
|
+
|
1313
|
+
def __enter__(self):
|
1314
|
+
assert self.output_file is not None
|
1315
|
+
self.writer = _IncrementalFileWriter(
|
1316
|
+
self.output_file, self.encoding, self.compresslevel,
|
1317
|
+
self.close, self.buffered, self.method)
|
1318
|
+
return self.writer
|
1319
|
+
|
1320
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
1321
|
+
if self.writer is not None:
|
1322
|
+
old_writer, self.writer = self.writer, None
|
1323
|
+
raise_on_error = exc_type is None
|
1324
|
+
old_writer._close(raise_on_error)
|
1325
|
+
if self.close:
|
1326
|
+
self.output_file = None
|
1327
|
+
|
1328
|
+
async def __aenter__(self):
|
1329
|
+
assert self.output_file is not None
|
1330
|
+
if isinstance(self.output_file, basestring):
|
1331
|
+
raise TypeError("Cannot asynchronously write to a plain file")
|
1332
|
+
if not hasattr(self.output_file, 'write'):
|
1333
|
+
raise TypeError("Output file needs an async .write() method")
|
1334
|
+
self.async_writer = _AsyncIncrementalFileWriter(
|
1335
|
+
self.output_file, self.encoding, self.compresslevel,
|
1336
|
+
self.close, self.buffered, self.method)
|
1337
|
+
return self.async_writer
|
1338
|
+
|
1339
|
+
async def __aexit__(self, exc_type, exc_val, exc_tb):
|
1340
|
+
if self.async_writer is not None:
|
1341
|
+
old_writer, self.async_writer = self.async_writer, None
|
1342
|
+
raise_on_error = exc_type is None
|
1343
|
+
await old_writer._close(raise_on_error)
|
1344
|
+
if self.close:
|
1345
|
+
self.output_file = None
|
1346
|
+
|
1347
|
+
|
1348
|
+
cdef class htmlfile(xmlfile):
|
1349
|
+
"""htmlfile(self, output_file, encoding=None, compression=None, close=False, buffered=True)
|
1350
|
+
|
1351
|
+
A simple mechanism for incremental HTML serialisation. Works the same as
|
1352
|
+
xmlfile.
|
1353
|
+
"""
|
1354
|
+
def __init__(self, *args, **kwargs):
|
1355
|
+
super().__init__(*args, **kwargs)
|
1356
|
+
self.method = OUTPUT_METHOD_HTML
|
1357
|
+
|
1358
|
+
|
1359
|
+
cdef enum _IncrementalFileWriterStatus:
|
1360
|
+
WRITER_STARTING = 0
|
1361
|
+
WRITER_DECL_WRITTEN = 1
|
1362
|
+
WRITER_DTD_WRITTEN = 2
|
1363
|
+
WRITER_IN_ELEMENT = 3
|
1364
|
+
WRITER_FINISHED = 4
|
1365
|
+
|
1366
|
+
|
1367
|
+
@cython.final
|
1368
|
+
@cython.internal
|
1369
|
+
cdef class _IncrementalFileWriter:
|
1370
|
+
cdef tree.xmlOutputBuffer* _c_out
|
1371
|
+
cdef bytes _encoding
|
1372
|
+
cdef const_char* _c_encoding
|
1373
|
+
cdef _FilelikeWriter _target
|
1374
|
+
cdef list _element_stack
|
1375
|
+
cdef int _status
|
1376
|
+
cdef int _method
|
1377
|
+
cdef bint _buffered
|
1378
|
+
|
1379
|
+
def __cinit__(self, outfile, bytes encoding, int compresslevel, bint close,
|
1380
|
+
bint buffered, int method):
|
1381
|
+
self._status = WRITER_STARTING
|
1382
|
+
self._element_stack = []
|
1383
|
+
if encoding is None:
|
1384
|
+
# We always need a document encoding to make the attribute serialisation
|
1385
|
+
# of libxml2 identical to ours.
|
1386
|
+
encoding = b'ASCII'
|
1387
|
+
self._encoding = encoding
|
1388
|
+
self._c_encoding = _cstr(encoding)
|
1389
|
+
self._buffered = buffered
|
1390
|
+
self._target = _create_output_buffer(
|
1391
|
+
outfile, self._c_encoding, compresslevel, &self._c_out, close)
|
1392
|
+
self._method = method
|
1393
|
+
|
1394
|
+
def __dealloc__(self):
|
1395
|
+
if self._c_out is not NULL:
|
1396
|
+
tree.xmlOutputBufferClose(self._c_out)
|
1397
|
+
|
1398
|
+
def write_declaration(self, version=None, standalone=None, doctype=None):
|
1399
|
+
"""write_declaration(self, version=None, standalone=None, doctype=None)
|
1400
|
+
|
1401
|
+
Write an XML declaration and (optionally) a doctype into the file.
|
1402
|
+
"""
|
1403
|
+
assert self._c_out is not NULL
|
1404
|
+
cdef const_xmlChar* c_version
|
1405
|
+
cdef int c_standalone
|
1406
|
+
if self._method != OUTPUT_METHOD_XML:
|
1407
|
+
raise LxmlSyntaxError("only XML documents have declarations")
|
1408
|
+
if self._status >= WRITER_DECL_WRITTEN:
|
1409
|
+
raise LxmlSyntaxError("XML declaration already written")
|
1410
|
+
version = _utf8orNone(version)
|
1411
|
+
c_version = _xcstr(version) if version is not None else NULL
|
1412
|
+
doctype = _utf8orNone(doctype)
|
1413
|
+
if standalone is None:
|
1414
|
+
c_standalone = -1
|
1415
|
+
else:
|
1416
|
+
c_standalone = 1 if standalone else 0
|
1417
|
+
_writeDeclarationToBuffer(self._c_out, c_version, self._c_encoding, c_standalone)
|
1418
|
+
if doctype is not None:
|
1419
|
+
_writeDoctype(self._c_out, _xcstr(doctype))
|
1420
|
+
self._status = WRITER_DTD_WRITTEN
|
1421
|
+
else:
|
1422
|
+
self._status = WRITER_DECL_WRITTEN
|
1423
|
+
if not self._buffered:
|
1424
|
+
tree.xmlOutputBufferFlush(self._c_out)
|
1425
|
+
self._handle_error(self._c_out.error)
|
1426
|
+
|
1427
|
+
def write_doctype(self, doctype):
|
1428
|
+
"""write_doctype(self, doctype)
|
1429
|
+
|
1430
|
+
Writes the given doctype declaration verbatimly into the file.
|
1431
|
+
"""
|
1432
|
+
assert self._c_out is not NULL
|
1433
|
+
if doctype is None:
|
1434
|
+
return
|
1435
|
+
if self._status >= WRITER_DTD_WRITTEN:
|
1436
|
+
raise LxmlSyntaxError("DOCTYPE already written or cannot write it here")
|
1437
|
+
doctype = _utf8(doctype)
|
1438
|
+
_writeDoctype(self._c_out, _xcstr(doctype))
|
1439
|
+
self._status = WRITER_DTD_WRITTEN
|
1440
|
+
if not self._buffered:
|
1441
|
+
tree.xmlOutputBufferFlush(self._c_out)
|
1442
|
+
self._handle_error(self._c_out.error)
|
1443
|
+
|
1444
|
+
def method(self, method):
|
1445
|
+
"""method(self, method)
|
1446
|
+
|
1447
|
+
Returns a context manager that overrides and restores the output method.
|
1448
|
+
method is one of (None, 'xml', 'html') where None means 'xml'.
|
1449
|
+
"""
|
1450
|
+
assert self._c_out is not NULL
|
1451
|
+
c_method = self._method if method is None else _findOutputMethod(method)
|
1452
|
+
return _MethodChanger(self, c_method)
|
1453
|
+
|
1454
|
+
def element(self, tag, attrib=None, nsmap=None, method=None, **_extra):
|
1455
|
+
"""element(self, tag, attrib=None, nsmap=None, method, **_extra)
|
1456
|
+
|
1457
|
+
Returns a context manager that writes an opening and closing tag.
|
1458
|
+
method is one of (None, 'xml', 'html') where None means 'xml'.
|
1459
|
+
"""
|
1460
|
+
assert self._c_out is not NULL
|
1461
|
+
attributes = []
|
1462
|
+
if attrib is not None:
|
1463
|
+
for name, value in _iter_attrib(attrib):
|
1464
|
+
if name not in _extra:
|
1465
|
+
ns, name = _getNsTag(name)
|
1466
|
+
attributes.append((ns, name, _utf8(value)))
|
1467
|
+
if _extra:
|
1468
|
+
for name, value in _extra.iteritems():
|
1469
|
+
ns, name = _getNsTag(name)
|
1470
|
+
attributes.append((ns, name, _utf8(value)))
|
1471
|
+
reversed_nsmap = {}
|
1472
|
+
if nsmap:
|
1473
|
+
for prefix, ns in nsmap.items():
|
1474
|
+
if prefix is not None:
|
1475
|
+
prefix = _utf8(prefix)
|
1476
|
+
_prefixValidOrRaise(prefix)
|
1477
|
+
reversed_nsmap[_utf8(ns)] = prefix
|
1478
|
+
ns, name = _getNsTag(tag)
|
1479
|
+
|
1480
|
+
c_method = self._method if method is None else _findOutputMethod(method)
|
1481
|
+
|
1482
|
+
return _FileWriterElement(self, (ns, name, attributes, reversed_nsmap), c_method)
|
1483
|
+
|
1484
|
+
cdef _write_qname(self, bytes name, bytes prefix):
|
1485
|
+
if prefix: # empty bytes for no prefix (not None to allow sorting)
|
1486
|
+
tree.xmlOutputBufferWrite(self._c_out, len(prefix), _cstr(prefix))
|
1487
|
+
tree.xmlOutputBufferWrite(self._c_out, 1, ':')
|
1488
|
+
tree.xmlOutputBufferWrite(self._c_out, len(name), _cstr(name))
|
1489
|
+
|
1490
|
+
cdef _write_start_element(self, element_config):
|
1491
|
+
if self._status > WRITER_IN_ELEMENT:
|
1492
|
+
raise LxmlSyntaxError("cannot append trailing element to complete XML document")
|
1493
|
+
ns, name, attributes, nsmap = element_config
|
1494
|
+
flat_namespace_map, new_namespaces = self._collect_namespaces(nsmap)
|
1495
|
+
prefix = self._find_prefix(ns, flat_namespace_map, new_namespaces)
|
1496
|
+
tree.xmlOutputBufferWrite(self._c_out, 1, '<')
|
1497
|
+
self._write_qname(name, prefix)
|
1498
|
+
|
1499
|
+
self._write_attributes_and_namespaces(
|
1500
|
+
attributes, flat_namespace_map, new_namespaces)
|
1501
|
+
|
1502
|
+
tree.xmlOutputBufferWrite(self._c_out, 1, '>')
|
1503
|
+
if not self._buffered:
|
1504
|
+
tree.xmlOutputBufferFlush(self._c_out)
|
1505
|
+
self._handle_error(self._c_out.error)
|
1506
|
+
|
1507
|
+
self._element_stack.append((ns, name, prefix, flat_namespace_map))
|
1508
|
+
self._status = WRITER_IN_ELEMENT
|
1509
|
+
|
1510
|
+
cdef _write_attributes_and_namespaces(self, list attributes,
|
1511
|
+
dict flat_namespace_map,
|
1512
|
+
list new_namespaces):
|
1513
|
+
if attributes:
|
1514
|
+
# _find_prefix() may append to new_namespaces => build them first
|
1515
|
+
attributes = [
|
1516
|
+
(self._find_prefix(ns, flat_namespace_map, new_namespaces), name, value)
|
1517
|
+
for ns, name, value in attributes ]
|
1518
|
+
if new_namespaces:
|
1519
|
+
new_namespaces.sort()
|
1520
|
+
self._write_attributes_list(new_namespaces)
|
1521
|
+
if attributes:
|
1522
|
+
self._write_attributes_list(attributes)
|
1523
|
+
|
1524
|
+
cdef _write_attributes_list(self, list attributes):
|
1525
|
+
for prefix, name, value in attributes:
|
1526
|
+
tree.xmlOutputBufferWrite(self._c_out, 1, ' ')
|
1527
|
+
self._write_qname(name, prefix)
|
1528
|
+
tree.xmlOutputBufferWrite(self._c_out, 2, '="')
|
1529
|
+
_write_attr_string(self._c_out, _cstr(value))
|
1530
|
+
|
1531
|
+
tree.xmlOutputBufferWrite(self._c_out, 1, '"')
|
1532
|
+
|
1533
|
+
cdef _write_end_element(self, element_config):
|
1534
|
+
if self._status != WRITER_IN_ELEMENT:
|
1535
|
+
raise LxmlSyntaxError("not in an element")
|
1536
|
+
if not self._element_stack or self._element_stack[-1][:2] != element_config[:2]:
|
1537
|
+
raise LxmlSyntaxError("inconsistent exit action in context manager")
|
1538
|
+
|
1539
|
+
# If previous write operations failed, the context manager exit might still call us.
|
1540
|
+
# That is ok, but we stop writing closing tags and handling errors in that case.
|
1541
|
+
# For all non-I/O errors, we continue writing closing tags if we can.
|
1542
|
+
ok_to_write = self._c_out.error == xmlerror.XML_ERR_OK
|
1543
|
+
|
1544
|
+
name, prefix = self._element_stack.pop()[1:3]
|
1545
|
+
if ok_to_write:
|
1546
|
+
tree.xmlOutputBufferWrite(self._c_out, 2, '</')
|
1547
|
+
self._write_qname(name, prefix)
|
1548
|
+
tree.xmlOutputBufferWrite(self._c_out, 1, '>')
|
1549
|
+
|
1550
|
+
if not self._element_stack:
|
1551
|
+
self._status = WRITER_FINISHED
|
1552
|
+
if ok_to_write:
|
1553
|
+
if not self._buffered:
|
1554
|
+
tree.xmlOutputBufferFlush(self._c_out)
|
1555
|
+
self._handle_error(self._c_out.error)
|
1556
|
+
|
1557
|
+
cdef _find_prefix(self, bytes href, dict flat_namespaces_map, list new_namespaces):
|
1558
|
+
if href is None:
|
1559
|
+
return None
|
1560
|
+
if href in flat_namespaces_map:
|
1561
|
+
return flat_namespaces_map[href]
|
1562
|
+
# need to create a new prefix
|
1563
|
+
prefixes = flat_namespaces_map.values()
|
1564
|
+
i = 0
|
1565
|
+
while True:
|
1566
|
+
prefix = _utf8('ns%d' % i)
|
1567
|
+
if prefix not in prefixes:
|
1568
|
+
new_namespaces.append((b'xmlns', prefix, href))
|
1569
|
+
flat_namespaces_map[href] = prefix
|
1570
|
+
return prefix
|
1571
|
+
i += 1
|
1572
|
+
|
1573
|
+
cdef _collect_namespaces(self, dict nsmap):
|
1574
|
+
new_namespaces = []
|
1575
|
+
flat_namespaces_map = {}
|
1576
|
+
for ns, prefix in nsmap.iteritems():
|
1577
|
+
flat_namespaces_map[ns] = prefix
|
1578
|
+
if prefix is None:
|
1579
|
+
# use empty bytes rather than None to allow sorting
|
1580
|
+
new_namespaces.append((b'', b'xmlns', ns))
|
1581
|
+
else:
|
1582
|
+
new_namespaces.append((b'xmlns', prefix, ns))
|
1583
|
+
# merge in flat namespace map of parent
|
1584
|
+
if self._element_stack:
|
1585
|
+
for ns, prefix in (<dict>self._element_stack[-1][-1]).iteritems():
|
1586
|
+
if flat_namespaces_map.get(ns) is None:
|
1587
|
+
# unknown or empty prefix => prefer a 'real' prefix
|
1588
|
+
flat_namespaces_map[ns] = prefix
|
1589
|
+
return flat_namespaces_map, new_namespaces
|
1590
|
+
|
1591
|
+
def write(self, *args, bint with_tail=True, bint pretty_print=False, method=None):
|
1592
|
+
"""write(self, *args, with_tail=True, pretty_print=False, method=None)
|
1593
|
+
|
1594
|
+
Write subtrees or strings into the file.
|
1595
|
+
|
1596
|
+
If method is not None, it should be one of ('html', 'xml', 'text')
|
1597
|
+
to temporarily override the output method.
|
1598
|
+
"""
|
1599
|
+
assert self._c_out is not NULL
|
1600
|
+
c_method = self._method if method is None else _findOutputMethod(method)
|
1601
|
+
|
1602
|
+
for content in args:
|
1603
|
+
if _isString(content):
|
1604
|
+
if self._status != WRITER_IN_ELEMENT:
|
1605
|
+
if self._status > WRITER_IN_ELEMENT or content.strip():
|
1606
|
+
raise LxmlSyntaxError("not in an element")
|
1607
|
+
bstring = _utf8(content)
|
1608
|
+
if not bstring:
|
1609
|
+
continue
|
1610
|
+
|
1611
|
+
ns, name, _, _ = self._element_stack[-1]
|
1612
|
+
if (c_method == OUTPUT_METHOD_HTML and
|
1613
|
+
ns in (None, b'http://www.w3.org/1999/xhtml') and
|
1614
|
+
name in (b'script', b'style')):
|
1615
|
+
tree.xmlOutputBufferWrite(self._c_out, len(bstring), _cstr(bstring))
|
1616
|
+
|
1617
|
+
else:
|
1618
|
+
tree.xmlOutputBufferWriteEscape(self._c_out, _xcstr(bstring), NULL)
|
1619
|
+
|
1620
|
+
elif isinstance(content, CDATA):
|
1621
|
+
if self._status > WRITER_IN_ELEMENT:
|
1622
|
+
raise LxmlSyntaxError("not in an element")
|
1623
|
+
_write_cdata_string(self._c_out, (<CDATA>content)._utf8_data)
|
1624
|
+
|
1625
|
+
elif iselement(content):
|
1626
|
+
if self._status > WRITER_IN_ELEMENT:
|
1627
|
+
raise LxmlSyntaxError("cannot append trailing element to complete XML document")
|
1628
|
+
_writeNodeToBuffer(self._c_out, (<_Element>content)._c_node,
|
1629
|
+
self._c_encoding, NULL, c_method,
|
1630
|
+
False, False, pretty_print, with_tail, False)
|
1631
|
+
if (<_Element>content)._c_node.type == tree.XML_ELEMENT_NODE:
|
1632
|
+
if not self._element_stack:
|
1633
|
+
self._status = WRITER_FINISHED
|
1634
|
+
|
1635
|
+
elif content is not None:
|
1636
|
+
raise TypeError(
|
1637
|
+
f"got invalid input value of type {type(content)}, expected string, CDATA or Element")
|
1638
|
+
|
1639
|
+
self._handle_error(self._c_out.error)
|
1640
|
+
|
1641
|
+
if not self._buffered:
|
1642
|
+
tree.xmlOutputBufferFlush(self._c_out)
|
1643
|
+
self._handle_error(self._c_out.error)
|
1644
|
+
|
1645
|
+
def flush(self):
|
1646
|
+
"""flush(self)
|
1647
|
+
|
1648
|
+
Write any pending content of the current output buffer to the stream.
|
1649
|
+
"""
|
1650
|
+
assert self._c_out is not NULL
|
1651
|
+
tree.xmlOutputBufferFlush(self._c_out)
|
1652
|
+
self._handle_error(self._c_out.error)
|
1653
|
+
|
1654
|
+
cdef _close(self, bint raise_on_error):
|
1655
|
+
if raise_on_error:
|
1656
|
+
if self._status < WRITER_IN_ELEMENT:
|
1657
|
+
raise LxmlSyntaxError("no content written")
|
1658
|
+
if self._element_stack:
|
1659
|
+
raise LxmlSyntaxError("pending open tags on close")
|
1660
|
+
error_result = self._c_out.error
|
1661
|
+
if error_result == xmlerror.XML_ERR_OK:
|
1662
|
+
error_result = tree.xmlOutputBufferClose(self._c_out)
|
1663
|
+
if error_result != -1:
|
1664
|
+
error_result = xmlerror.XML_ERR_OK
|
1665
|
+
else:
|
1666
|
+
tree.xmlOutputBufferClose(self._c_out)
|
1667
|
+
self._status = WRITER_FINISHED
|
1668
|
+
self._c_out = NULL
|
1669
|
+
del self._element_stack[:]
|
1670
|
+
if raise_on_error:
|
1671
|
+
self._handle_error(error_result)
|
1672
|
+
|
1673
|
+
cdef _handle_error(self, int error_result):
|
1674
|
+
if error_result != xmlerror.XML_ERR_OK:
|
1675
|
+
if self._target is not None:
|
1676
|
+
self._target._exc_context._raise_if_stored()
|
1677
|
+
_raiseSerialisationError(error_result)
|
1678
|
+
|
1679
|
+
|
1680
|
+
@cython.final
|
1681
|
+
@cython.internal
|
1682
|
+
cdef class _AsyncDataWriter:
|
1683
|
+
cdef list _data
|
1684
|
+
def __cinit__(self):
|
1685
|
+
self._data = []
|
1686
|
+
|
1687
|
+
cdef bytes collect(self):
|
1688
|
+
data = b''.join(self._data)
|
1689
|
+
del self._data[:]
|
1690
|
+
return data
|
1691
|
+
|
1692
|
+
def write(self, data):
|
1693
|
+
self._data.append(data)
|
1694
|
+
|
1695
|
+
def close(self):
|
1696
|
+
pass
|
1697
|
+
|
1698
|
+
|
1699
|
+
@cython.final
|
1700
|
+
@cython.internal
|
1701
|
+
cdef class _AsyncIncrementalFileWriter:
|
1702
|
+
cdef _IncrementalFileWriter _writer
|
1703
|
+
cdef _AsyncDataWriter _buffer
|
1704
|
+
cdef object _async_outfile
|
1705
|
+
cdef int _flush_after_writes
|
1706
|
+
cdef bint _should_close
|
1707
|
+
cdef bint _buffered
|
1708
|
+
|
1709
|
+
def __cinit__(self, async_outfile, bytes encoding, int compresslevel, bint close,
|
1710
|
+
bint buffered, int method):
|
1711
|
+
self._flush_after_writes = 20
|
1712
|
+
self._async_outfile = async_outfile
|
1713
|
+
self._should_close = close
|
1714
|
+
self._buffered = buffered
|
1715
|
+
self._buffer = _AsyncDataWriter()
|
1716
|
+
self._writer = _IncrementalFileWriter(
|
1717
|
+
self._buffer, encoding, compresslevel, close=True, buffered=False, method=method)
|
1718
|
+
|
1719
|
+
cdef bytes _flush(self):
|
1720
|
+
if not self._buffered or len(self._buffer._data) > self._flush_after_writes:
|
1721
|
+
return self._buffer.collect()
|
1722
|
+
return None
|
1723
|
+
|
1724
|
+
async def flush(self):
|
1725
|
+
self._writer.flush()
|
1726
|
+
data = self._buffer.collect()
|
1727
|
+
if data:
|
1728
|
+
await self._async_outfile.write(data)
|
1729
|
+
|
1730
|
+
async def write_declaration(self, version=None, standalone=None, doctype=None):
|
1731
|
+
self._writer.write_declaration(version, standalone, doctype)
|
1732
|
+
data = self._flush()
|
1733
|
+
if data:
|
1734
|
+
await self._async_outfile.write(data)
|
1735
|
+
|
1736
|
+
async def write_doctype(self, doctype):
|
1737
|
+
self._writer.write_doctype(doctype)
|
1738
|
+
data = self._flush()
|
1739
|
+
if data:
|
1740
|
+
await self._async_outfile.write(data)
|
1741
|
+
|
1742
|
+
async def write(self, *args, with_tail=True, pretty_print=False, method=None):
|
1743
|
+
self._writer.write(*args, with_tail=with_tail, pretty_print=pretty_print, method=method)
|
1744
|
+
data = self._flush()
|
1745
|
+
if data:
|
1746
|
+
await self._async_outfile.write(data)
|
1747
|
+
|
1748
|
+
def method(self, method):
|
1749
|
+
return self._writer.method(method)
|
1750
|
+
|
1751
|
+
def element(self, tag, attrib=None, nsmap=None, method=None, **_extra):
|
1752
|
+
element_writer = self._writer.element(tag, attrib, nsmap, method, **_extra)
|
1753
|
+
return _AsyncFileWriterElement(element_writer, self)
|
1754
|
+
|
1755
|
+
async def _close(self, bint raise_on_error):
|
1756
|
+
self._writer._close(raise_on_error)
|
1757
|
+
data = self._buffer.collect()
|
1758
|
+
if data:
|
1759
|
+
await self._async_outfile.write(data)
|
1760
|
+
if self._should_close:
|
1761
|
+
await self._async_outfile.close()
|
1762
|
+
|
1763
|
+
|
1764
|
+
@cython.final
|
1765
|
+
@cython.internal
|
1766
|
+
cdef class _AsyncFileWriterElement:
|
1767
|
+
cdef _FileWriterElement _element_writer
|
1768
|
+
cdef _AsyncIncrementalFileWriter _writer
|
1769
|
+
|
1770
|
+
def __cinit__(self, _FileWriterElement element_writer not None,
|
1771
|
+
_AsyncIncrementalFileWriter writer not None):
|
1772
|
+
self._element_writer = element_writer
|
1773
|
+
self._writer = writer
|
1774
|
+
|
1775
|
+
async def __aenter__(self):
|
1776
|
+
self._element_writer.__enter__()
|
1777
|
+
data = self._writer._flush()
|
1778
|
+
if data:
|
1779
|
+
await self._writer._async_outfile.write(data)
|
1780
|
+
|
1781
|
+
async def __aexit__(self, *args):
|
1782
|
+
self._element_writer.__exit__(*args)
|
1783
|
+
data = self._writer._flush()
|
1784
|
+
if data:
|
1785
|
+
await self._writer._async_outfile.write(data)
|
1786
|
+
|
1787
|
+
|
1788
|
+
@cython.final
|
1789
|
+
@cython.internal
|
1790
|
+
@cython.freelist(8)
|
1791
|
+
cdef class _FileWriterElement:
|
1792
|
+
cdef _IncrementalFileWriter _writer
|
1793
|
+
cdef object _element
|
1794
|
+
cdef int _new_method
|
1795
|
+
cdef int _old_method
|
1796
|
+
|
1797
|
+
def __cinit__(self, _IncrementalFileWriter writer not None, element_config, int method):
|
1798
|
+
self._writer = writer
|
1799
|
+
self._element = element_config
|
1800
|
+
self._new_method = method
|
1801
|
+
self._old_method = writer._method
|
1802
|
+
|
1803
|
+
def __enter__(self):
|
1804
|
+
self._writer._method = self._new_method
|
1805
|
+
self._writer._write_start_element(self._element)
|
1806
|
+
|
1807
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
1808
|
+
self._writer._write_end_element(self._element)
|
1809
|
+
self._writer._method = self._old_method
|
1810
|
+
|
1811
|
+
|
1812
|
+
@cython.final
|
1813
|
+
@cython.internal
|
1814
|
+
@cython.freelist(8)
|
1815
|
+
cdef class _MethodChanger:
|
1816
|
+
cdef _IncrementalFileWriter _writer
|
1817
|
+
cdef int _new_method
|
1818
|
+
cdef int _old_method
|
1819
|
+
cdef bint _entered
|
1820
|
+
cdef bint _exited
|
1821
|
+
|
1822
|
+
def __cinit__(self, _IncrementalFileWriter writer not None, int method):
|
1823
|
+
self._writer = writer
|
1824
|
+
self._new_method = method
|
1825
|
+
self._old_method = writer._method
|
1826
|
+
self._entered = False
|
1827
|
+
self._exited = False
|
1828
|
+
|
1829
|
+
def __enter__(self):
|
1830
|
+
if self._entered:
|
1831
|
+
raise LxmlSyntaxError("Inconsistent enter action in context manager")
|
1832
|
+
self._writer._method = self._new_method
|
1833
|
+
self._entered = True
|
1834
|
+
|
1835
|
+
def __exit__(self, exc_type, exc_val, exc_tb):
|
1836
|
+
if self._exited:
|
1837
|
+
raise LxmlSyntaxError("Inconsistent exit action in context manager")
|
1838
|
+
if self._writer._method != self._new_method:
|
1839
|
+
raise LxmlSyntaxError("Method changed outside of context manager")
|
1840
|
+
self._writer._method = self._old_method
|
1841
|
+
self._exited = True
|
1842
|
+
|
1843
|
+
async def __aenter__(self):
|
1844
|
+
# for your async convenience
|
1845
|
+
return self.__enter__()
|
1846
|
+
|
1847
|
+
async def __aexit__(self, *args):
|
1848
|
+
# for your async convenience
|
1849
|
+
return self.__exit__(*args)
|