lxml 6.0.0__cp311-cp311-win_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lxml/ElementInclude.py +244 -0
- lxml/__init__.py +22 -0
- lxml/_elementpath.cp311-win_arm64.pyd +0 -0
- lxml/_elementpath.py +343 -0
- lxml/apihelpers.pxi +1801 -0
- lxml/builder.cp311-win_arm64.pyd +0 -0
- lxml/builder.py +243 -0
- lxml/classlookup.pxi +580 -0
- lxml/cleanup.pxi +215 -0
- lxml/cssselect.py +101 -0
- lxml/debug.pxi +36 -0
- lxml/docloader.pxi +178 -0
- lxml/doctestcompare.py +488 -0
- lxml/dtd.pxi +479 -0
- lxml/etree.cp311-win_arm64.pyd +0 -0
- lxml/etree.h +244 -0
- lxml/etree.pyx +3853 -0
- lxml/etree_api.h +204 -0
- lxml/extensions.pxi +830 -0
- lxml/html/ElementSoup.py +10 -0
- lxml/html/__init__.py +1927 -0
- lxml/html/_diffcommand.py +86 -0
- lxml/html/_difflib.cp311-win_arm64.pyd +0 -0
- lxml/html/_difflib.py +2106 -0
- lxml/html/_html5builder.py +100 -0
- lxml/html/_setmixin.py +56 -0
- lxml/html/builder.py +173 -0
- lxml/html/clean.py +21 -0
- lxml/html/defs.py +135 -0
- lxml/html/diff.cp311-win_arm64.pyd +0 -0
- lxml/html/diff.py +972 -0
- lxml/html/formfill.py +299 -0
- lxml/html/html5parser.py +260 -0
- lxml/html/soupparser.py +314 -0
- lxml/html/usedoctest.py +13 -0
- lxml/includes/__init__.pxd +0 -0
- lxml/includes/__init__.py +0 -0
- lxml/includes/c14n.pxd +25 -0
- lxml/includes/config.pxd +3 -0
- lxml/includes/dtdvalid.pxd +18 -0
- lxml/includes/etree_defs.h +379 -0
- lxml/includes/etreepublic.pxd +237 -0
- lxml/includes/extlibs/__init__.py +0 -0
- lxml/includes/extlibs/zconf.h +543 -0
- lxml/includes/extlibs/zlib.h +1938 -0
- lxml/includes/htmlparser.pxd +56 -0
- lxml/includes/libexslt/__init__.py +0 -0
- lxml/includes/libexslt/exslt.h +108 -0
- lxml/includes/libexslt/exsltconfig.h +70 -0
- lxml/includes/libexslt/exsltexports.h +63 -0
- lxml/includes/libexslt/libexslt.h +29 -0
- lxml/includes/libxml/HTMLparser.h +320 -0
- lxml/includes/libxml/HTMLtree.h +147 -0
- lxml/includes/libxml/SAX.h +204 -0
- lxml/includes/libxml/SAX2.h +173 -0
- lxml/includes/libxml/__init__.py +0 -0
- lxml/includes/libxml/c14n.h +128 -0
- lxml/includes/libxml/catalog.h +182 -0
- lxml/includes/libxml/chvalid.h +230 -0
- lxml/includes/libxml/debugXML.h +217 -0
- lxml/includes/libxml/dict.h +81 -0
- lxml/includes/libxml/encoding.h +233 -0
- lxml/includes/libxml/entities.h +151 -0
- lxml/includes/libxml/globals.h +529 -0
- lxml/includes/libxml/hash.h +236 -0
- lxml/includes/libxml/list.h +137 -0
- lxml/includes/libxml/nanoftp.h +186 -0
- lxml/includes/libxml/nanohttp.h +81 -0
- lxml/includes/libxml/parser.h +1265 -0
- lxml/includes/libxml/parserInternals.h +662 -0
- lxml/includes/libxml/pattern.h +100 -0
- lxml/includes/libxml/relaxng.h +218 -0
- lxml/includes/libxml/schemasInternals.h +958 -0
- lxml/includes/libxml/schematron.h +142 -0
- lxml/includes/libxml/threads.h +94 -0
- lxml/includes/libxml/tree.h +1314 -0
- lxml/includes/libxml/uri.h +94 -0
- lxml/includes/libxml/valid.h +448 -0
- lxml/includes/libxml/xinclude.h +129 -0
- lxml/includes/libxml/xlink.h +189 -0
- lxml/includes/libxml/xmlIO.h +369 -0
- lxml/includes/libxml/xmlautomata.h +146 -0
- lxml/includes/libxml/xmlerror.h +919 -0
- lxml/includes/libxml/xmlexports.h +50 -0
- lxml/includes/libxml/xmlmemory.h +228 -0
- lxml/includes/libxml/xmlmodule.h +57 -0
- lxml/includes/libxml/xmlreader.h +428 -0
- lxml/includes/libxml/xmlregexp.h +222 -0
- lxml/includes/libxml/xmlsave.h +88 -0
- lxml/includes/libxml/xmlschemas.h +246 -0
- lxml/includes/libxml/xmlschemastypes.h +152 -0
- lxml/includes/libxml/xmlstring.h +140 -0
- lxml/includes/libxml/xmlunicode.h +202 -0
- lxml/includes/libxml/xmlversion.h +526 -0
- lxml/includes/libxml/xmlwriter.h +488 -0
- lxml/includes/libxml/xpath.h +575 -0
- lxml/includes/libxml/xpathInternals.h +632 -0
- lxml/includes/libxml/xpointer.h +137 -0
- lxml/includes/libxslt/__init__.py +0 -0
- lxml/includes/libxslt/attributes.h +39 -0
- lxml/includes/libxslt/documents.h +93 -0
- lxml/includes/libxslt/extensions.h +262 -0
- lxml/includes/libxslt/extra.h +72 -0
- lxml/includes/libxslt/functions.h +78 -0
- lxml/includes/libxslt/imports.h +75 -0
- lxml/includes/libxslt/keys.h +53 -0
- lxml/includes/libxslt/libxslt.h +36 -0
- lxml/includes/libxslt/namespaces.h +68 -0
- lxml/includes/libxslt/numbersInternals.h +73 -0
- lxml/includes/libxslt/preproc.h +43 -0
- lxml/includes/libxslt/security.h +104 -0
- lxml/includes/libxslt/templates.h +77 -0
- lxml/includes/libxslt/transform.h +207 -0
- lxml/includes/libxslt/trio.h +216 -0
- lxml/includes/libxslt/triodef.h +220 -0
- lxml/includes/libxslt/variables.h +118 -0
- lxml/includes/libxslt/win32config.h +51 -0
- lxml/includes/libxslt/xslt.h +110 -0
- lxml/includes/libxslt/xsltInternals.h +1992 -0
- lxml/includes/libxslt/xsltconfig.h +179 -0
- lxml/includes/libxslt/xsltexports.h +64 -0
- lxml/includes/libxslt/xsltlocale.h +44 -0
- lxml/includes/libxslt/xsltutils.h +343 -0
- lxml/includes/lxml-version.h +3 -0
- lxml/includes/relaxng.pxd +64 -0
- lxml/includes/schematron.pxd +34 -0
- lxml/includes/tree.pxd +492 -0
- lxml/includes/uri.pxd +5 -0
- lxml/includes/xinclude.pxd +22 -0
- lxml/includes/xmlerror.pxd +852 -0
- lxml/includes/xmlparser.pxd +303 -0
- lxml/includes/xmlschema.pxd +35 -0
- lxml/includes/xpath.pxd +136 -0
- lxml/includes/xslt.pxd +190 -0
- lxml/isoschematron/__init__.py +348 -0
- lxml/isoschematron/resources/rng/iso-schematron.rng +709 -0
- lxml/isoschematron/resources/xsl/RNG2Schtrn.xsl +75 -0
- lxml/isoschematron/resources/xsl/XSD2Schtrn.xsl +77 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_abstract_expand.xsl +313 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_dsdl_include.xsl +1160 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_message.xsl +55 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_skeleton_for_xslt1.xsl +1796 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_svrl_for_xslt1.xsl +588 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt +84 -0
- lxml/iterparse.pxi +438 -0
- lxml/lxml.etree.h +244 -0
- lxml/lxml.etree_api.h +204 -0
- lxml/nsclasses.pxi +281 -0
- lxml/objectify.cp311-win_arm64.pyd +0 -0
- lxml/objectify.pyx +2149 -0
- lxml/objectpath.pxi +332 -0
- lxml/parser.pxi +2059 -0
- lxml/parsertarget.pxi +180 -0
- lxml/proxy.pxi +619 -0
- lxml/public-api.pxi +178 -0
- lxml/pyclasslookup.py +3 -0
- lxml/readonlytree.pxi +565 -0
- lxml/relaxng.pxi +165 -0
- lxml/sax.cp311-win_arm64.pyd +0 -0
- lxml/sax.py +286 -0
- lxml/saxparser.pxi +875 -0
- lxml/schematron.pxi +173 -0
- lxml/serializer.pxi +1849 -0
- lxml/usedoctest.py +13 -0
- lxml/xinclude.pxi +67 -0
- lxml/xmlerror.pxi +1654 -0
- lxml/xmlid.pxi +179 -0
- lxml/xmlschema.pxi +215 -0
- lxml/xpath.pxi +487 -0
- lxml/xslt.pxi +957 -0
- lxml/xsltext.pxi +242 -0
- lxml-6.0.0.dist-info/METADATA +163 -0
- lxml-6.0.0.dist-info/RECORD +177 -0
- lxml-6.0.0.dist-info/WHEEL +5 -0
- lxml-6.0.0.dist-info/licenses/LICENSE.txt +31 -0
- lxml-6.0.0.dist-info/licenses/LICENSES.txt +29 -0
- lxml-6.0.0.dist-info/top_level.txt +1 -0
lxml/relaxng.pxi
ADDED
@@ -0,0 +1,165 @@
|
|
1
|
+
# support for RelaxNG validation
|
2
|
+
from lxml.includes cimport relaxng
|
3
|
+
|
4
|
+
cdef object _rnc2rng
|
5
|
+
try:
|
6
|
+
import rnc2rng as _rnc2rng
|
7
|
+
except ImportError:
|
8
|
+
_rnc2rng = None
|
9
|
+
|
10
|
+
|
11
|
+
cdef int _require_rnc2rng() except -1:
|
12
|
+
if _rnc2rng is None:
|
13
|
+
raise RelaxNGParseError(
|
14
|
+
'compact syntax not supported (please install rnc2rng)')
|
15
|
+
return 0
|
16
|
+
|
17
|
+
|
18
|
+
cdef class RelaxNGError(LxmlError):
|
19
|
+
"""Base class for RelaxNG errors.
|
20
|
+
"""
|
21
|
+
|
22
|
+
cdef class RelaxNGParseError(RelaxNGError):
|
23
|
+
"""Error while parsing an XML document as RelaxNG.
|
24
|
+
"""
|
25
|
+
|
26
|
+
cdef class RelaxNGValidateError(RelaxNGError):
|
27
|
+
"""Error while validating an XML document with a RelaxNG schema.
|
28
|
+
"""
|
29
|
+
|
30
|
+
|
31
|
+
################################################################################
|
32
|
+
# RelaxNG
|
33
|
+
|
34
|
+
cdef class RelaxNG(_Validator):
|
35
|
+
"""RelaxNG(self, etree=None, file=None)
|
36
|
+
Turn a document into a Relax NG validator.
|
37
|
+
|
38
|
+
Either pass a schema as Element or ElementTree, or pass a file or
|
39
|
+
filename through the ``file`` keyword argument.
|
40
|
+
"""
|
41
|
+
cdef relaxng.xmlRelaxNG* _c_schema
|
42
|
+
def __cinit__(self):
|
43
|
+
self._c_schema = NULL
|
44
|
+
|
45
|
+
def __init__(self, etree=None, *, file=None):
|
46
|
+
cdef _Document doc
|
47
|
+
cdef _Element root_node
|
48
|
+
cdef xmlDoc* fake_c_doc = NULL
|
49
|
+
cdef relaxng.xmlRelaxNGParserCtxt* parser_ctxt = NULL
|
50
|
+
_Validator.__init__(self)
|
51
|
+
if etree is not None:
|
52
|
+
doc = _documentOrRaise(etree)
|
53
|
+
root_node = _rootNodeOrRaise(etree)
|
54
|
+
fake_c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node)
|
55
|
+
parser_ctxt = relaxng.xmlRelaxNGNewDocParserCtxt(fake_c_doc)
|
56
|
+
elif file is not None:
|
57
|
+
if _isString(file):
|
58
|
+
if file[-4:].lower() == '.rnc':
|
59
|
+
_require_rnc2rng()
|
60
|
+
rng_data_utf8 = _utf8(_rnc2rng.dumps(_rnc2rng.load(file)))
|
61
|
+
doc = _parseMemoryDocument(rng_data_utf8, parser=None, url=file)
|
62
|
+
parser_ctxt = relaxng.xmlRelaxNGNewDocParserCtxt(doc._c_doc)
|
63
|
+
else:
|
64
|
+
doc = None
|
65
|
+
filename = _encodeFilename(file)
|
66
|
+
with self._error_log:
|
67
|
+
orig_loader = _register_document_loader()
|
68
|
+
parser_ctxt = relaxng.xmlRelaxNGNewParserCtxt(_cstr(filename))
|
69
|
+
_reset_document_loader(orig_loader)
|
70
|
+
elif (_getFilenameForFile(file) or '')[-4:].lower() == '.rnc':
|
71
|
+
_require_rnc2rng()
|
72
|
+
rng_data_utf8 = _utf8(_rnc2rng.dumps(_rnc2rng.load(file)))
|
73
|
+
doc = _parseMemoryDocument(
|
74
|
+
rng_data_utf8, parser=None, url=_getFilenameForFile(file))
|
75
|
+
parser_ctxt = relaxng.xmlRelaxNGNewDocParserCtxt(doc._c_doc)
|
76
|
+
else:
|
77
|
+
doc = _parseDocument(file, parser=None, base_url=None)
|
78
|
+
parser_ctxt = relaxng.xmlRelaxNGNewDocParserCtxt(doc._c_doc)
|
79
|
+
else:
|
80
|
+
raise RelaxNGParseError, "No tree or file given"
|
81
|
+
|
82
|
+
if parser_ctxt is NULL:
|
83
|
+
if fake_c_doc is not NULL:
|
84
|
+
_destroyFakeDoc(doc._c_doc, fake_c_doc)
|
85
|
+
raise RelaxNGParseError(
|
86
|
+
self._error_log._buildExceptionMessage(
|
87
|
+
"Document is not parsable as Relax NG"),
|
88
|
+
self._error_log)
|
89
|
+
|
90
|
+
# Need a cast here because older libxml2 releases do not use 'const' in the functype.
|
91
|
+
relaxng.xmlRelaxNGSetParserStructuredErrors(
|
92
|
+
parser_ctxt, <xmlerror.xmlStructuredErrorFunc> _receiveError, <void*>self._error_log)
|
93
|
+
_connectGenericErrorLog(self._error_log, xmlerror.XML_FROM_RELAXNGP)
|
94
|
+
self._c_schema = relaxng.xmlRelaxNGParse(parser_ctxt)
|
95
|
+
_connectGenericErrorLog(None)
|
96
|
+
|
97
|
+
relaxng.xmlRelaxNGFreeParserCtxt(parser_ctxt)
|
98
|
+
if self._c_schema is NULL:
|
99
|
+
if fake_c_doc is not NULL:
|
100
|
+
_destroyFakeDoc(doc._c_doc, fake_c_doc)
|
101
|
+
raise RelaxNGParseError(
|
102
|
+
self._error_log._buildExceptionMessage(
|
103
|
+
"Document is not valid Relax NG"),
|
104
|
+
self._error_log)
|
105
|
+
if fake_c_doc is not NULL:
|
106
|
+
_destroyFakeDoc(doc._c_doc, fake_c_doc)
|
107
|
+
|
108
|
+
def __dealloc__(self):
|
109
|
+
relaxng.xmlRelaxNGFree(self._c_schema)
|
110
|
+
|
111
|
+
def __call__(self, etree):
|
112
|
+
"""__call__(self, etree)
|
113
|
+
|
114
|
+
Validate doc using Relax NG.
|
115
|
+
|
116
|
+
Returns true if document is valid, false if not."""
|
117
|
+
cdef _Document doc
|
118
|
+
cdef _Element root_node
|
119
|
+
cdef xmlDoc* c_doc
|
120
|
+
cdef relaxng.xmlRelaxNGValidCtxt* valid_ctxt
|
121
|
+
cdef int ret
|
122
|
+
|
123
|
+
assert self._c_schema is not NULL, "RelaxNG instance not initialised"
|
124
|
+
doc = _documentOrRaise(etree)
|
125
|
+
root_node = _rootNodeOrRaise(etree)
|
126
|
+
|
127
|
+
valid_ctxt = relaxng.xmlRelaxNGNewValidCtxt(self._c_schema)
|
128
|
+
if valid_ctxt is NULL:
|
129
|
+
raise MemoryError()
|
130
|
+
|
131
|
+
try:
|
132
|
+
self._error_log.clear()
|
133
|
+
# Need a cast here because older libxml2 releases do not use 'const' in the functype.
|
134
|
+
relaxng.xmlRelaxNGSetValidStructuredErrors(
|
135
|
+
valid_ctxt, <xmlerror.xmlStructuredErrorFunc> _receiveError, <void*>self._error_log)
|
136
|
+
_connectGenericErrorLog(self._error_log, xmlerror.XML_FROM_RELAXNGV)
|
137
|
+
c_doc = _fakeRootDoc(doc._c_doc, root_node._c_node)
|
138
|
+
with nogil:
|
139
|
+
ret = relaxng.xmlRelaxNGValidateDoc(valid_ctxt, c_doc)
|
140
|
+
_destroyFakeDoc(doc._c_doc, c_doc)
|
141
|
+
finally:
|
142
|
+
_connectGenericErrorLog(None)
|
143
|
+
relaxng.xmlRelaxNGFreeValidCtxt(valid_ctxt)
|
144
|
+
|
145
|
+
if ret == -1:
|
146
|
+
raise RelaxNGValidateError(
|
147
|
+
"Internal error in Relax NG validation",
|
148
|
+
self._error_log)
|
149
|
+
if ret == 0:
|
150
|
+
return True
|
151
|
+
else:
|
152
|
+
return False
|
153
|
+
|
154
|
+
@classmethod
|
155
|
+
def from_rnc_string(cls, src, base_url=None):
|
156
|
+
"""Parse a RelaxNG schema in compact syntax from a text string
|
157
|
+
|
158
|
+
Requires the rnc2rng package to be installed.
|
159
|
+
|
160
|
+
Passing the source URL or file path of the source as 'base_url'
|
161
|
+
will enable resolving resource references relative to the source.
|
162
|
+
"""
|
163
|
+
_require_rnc2rng()
|
164
|
+
rng_str = utf8(_rnc2rng.dumps(_rnc2rng.loads(src)))
|
165
|
+
return cls(_parseMemoryDocument(rng_str, parser=None, url=base_url))
|
Binary file
|
lxml/sax.py
ADDED
@@ -0,0 +1,286 @@
|
|
1
|
+
# cython: language_level=2
|
2
|
+
|
3
|
+
"""
|
4
|
+
SAX-based adapter to copy trees from/to the Python standard library.
|
5
|
+
|
6
|
+
Use the `ElementTreeContentHandler` class to build an ElementTree from
|
7
|
+
SAX events.
|
8
|
+
|
9
|
+
Use the `ElementTreeProducer` class or the `saxify()` function to fire
|
10
|
+
the SAX events of an ElementTree against a SAX ContentHandler.
|
11
|
+
|
12
|
+
See https://lxml.de/sax.html
|
13
|
+
"""
|
14
|
+
|
15
|
+
|
16
|
+
from xml.sax.handler import ContentHandler
|
17
|
+
from lxml import etree
|
18
|
+
from lxml.etree import ElementTree, SubElement
|
19
|
+
from lxml.etree import Comment, ProcessingInstruction
|
20
|
+
|
21
|
+
try:
|
22
|
+
from types import GenericAlias as _GenericAlias
|
23
|
+
except ImportError:
|
24
|
+
# Python 3.8 - we only need this as return value from "__class_getitem__"
|
25
|
+
def _GenericAlias(cls, item):
|
26
|
+
return f"{cls.__name__}[{item.__name__}]"
|
27
|
+
|
28
|
+
|
29
|
+
class SaxError(etree.LxmlError):
|
30
|
+
"""General SAX error.
|
31
|
+
"""
|
32
|
+
|
33
|
+
|
34
|
+
def _getNsTag(tag):
|
35
|
+
if tag[0] == '{':
|
36
|
+
return tuple(tag[1:].split('}', 1))
|
37
|
+
else:
|
38
|
+
return None, tag
|
39
|
+
|
40
|
+
|
41
|
+
class ElementTreeContentHandler(ContentHandler):
|
42
|
+
"""Build an lxml ElementTree from SAX events.
|
43
|
+
"""
|
44
|
+
def __init__(self, makeelement=None):
|
45
|
+
ContentHandler.__init__(self)
|
46
|
+
self._root = None
|
47
|
+
self._root_siblings = []
|
48
|
+
self._element_stack = []
|
49
|
+
self._default_ns = None
|
50
|
+
self._ns_mapping = { None : [None] }
|
51
|
+
self._new_mappings = {}
|
52
|
+
if makeelement is None:
|
53
|
+
makeelement = etree.Element
|
54
|
+
self._makeelement = makeelement
|
55
|
+
|
56
|
+
def _get_etree(self):
|
57
|
+
"Contains the generated ElementTree after parsing is finished."
|
58
|
+
return ElementTree(self._root)
|
59
|
+
|
60
|
+
etree = property(_get_etree, doc=_get_etree.__doc__)
|
61
|
+
|
62
|
+
def setDocumentLocator(self, locator):
|
63
|
+
pass
|
64
|
+
|
65
|
+
def startDocument(self):
|
66
|
+
pass
|
67
|
+
|
68
|
+
def endDocument(self):
|
69
|
+
pass
|
70
|
+
|
71
|
+
def startPrefixMapping(self, prefix, uri):
|
72
|
+
self._new_mappings[prefix] = uri
|
73
|
+
try:
|
74
|
+
self._ns_mapping[prefix].append(uri)
|
75
|
+
except KeyError:
|
76
|
+
self._ns_mapping[prefix] = [uri]
|
77
|
+
if prefix is None:
|
78
|
+
self._default_ns = uri
|
79
|
+
|
80
|
+
def endPrefixMapping(self, prefix):
|
81
|
+
ns_uri_list = self._ns_mapping[prefix]
|
82
|
+
ns_uri_list.pop()
|
83
|
+
if prefix is None:
|
84
|
+
self._default_ns = ns_uri_list[-1]
|
85
|
+
|
86
|
+
def _buildTag(self, ns_name_tuple):
|
87
|
+
ns_uri, local_name = ns_name_tuple
|
88
|
+
if ns_uri:
|
89
|
+
el_tag = "{%s}%s" % ns_name_tuple
|
90
|
+
elif self._default_ns:
|
91
|
+
el_tag = "{%s}%s" % (self._default_ns, local_name)
|
92
|
+
else:
|
93
|
+
el_tag = local_name
|
94
|
+
return el_tag
|
95
|
+
|
96
|
+
def startElementNS(self, ns_name, qname, attributes=None):
|
97
|
+
el_name = self._buildTag(ns_name)
|
98
|
+
if attributes:
|
99
|
+
attrs = {}
|
100
|
+
try:
|
101
|
+
iter_attributes = attributes.iteritems()
|
102
|
+
except AttributeError:
|
103
|
+
iter_attributes = attributes.items()
|
104
|
+
|
105
|
+
for name_tuple, value in iter_attributes:
|
106
|
+
if name_tuple[0]:
|
107
|
+
attr_name = "{%s}%s" % name_tuple
|
108
|
+
else:
|
109
|
+
attr_name = name_tuple[1]
|
110
|
+
attrs[attr_name] = value
|
111
|
+
else:
|
112
|
+
attrs = None
|
113
|
+
|
114
|
+
element_stack = self._element_stack
|
115
|
+
if self._root is None:
|
116
|
+
element = self._root = \
|
117
|
+
self._makeelement(el_name, attrs, self._new_mappings)
|
118
|
+
if self._root_siblings and hasattr(element, 'addprevious'):
|
119
|
+
for sibling in self._root_siblings:
|
120
|
+
element.addprevious(sibling)
|
121
|
+
del self._root_siblings[:]
|
122
|
+
else:
|
123
|
+
element = SubElement(element_stack[-1], el_name,
|
124
|
+
attrs, self._new_mappings)
|
125
|
+
element_stack.append(element)
|
126
|
+
|
127
|
+
self._new_mappings.clear()
|
128
|
+
|
129
|
+
def processingInstruction(self, target, data):
|
130
|
+
pi = ProcessingInstruction(target, data)
|
131
|
+
if self._root is None:
|
132
|
+
self._root_siblings.append(pi)
|
133
|
+
else:
|
134
|
+
self._element_stack[-1].append(pi)
|
135
|
+
|
136
|
+
def endElementNS(self, ns_name, qname):
|
137
|
+
element = self._element_stack.pop()
|
138
|
+
el_tag = self._buildTag(ns_name)
|
139
|
+
if el_tag != element.tag:
|
140
|
+
raise SaxError("Unexpected element closed: " + el_tag)
|
141
|
+
|
142
|
+
def startElement(self, name, attributes=None):
|
143
|
+
if attributes:
|
144
|
+
attributes = {(None, k): v for k, v in attributes.items()}
|
145
|
+
self.startElementNS((None, name), name, attributes)
|
146
|
+
|
147
|
+
def endElement(self, name):
|
148
|
+
self.endElementNS((None, name), name)
|
149
|
+
|
150
|
+
def characters(self, data):
|
151
|
+
last_element = self._element_stack[-1]
|
152
|
+
try:
|
153
|
+
# if there already is a child element, we must append to its tail
|
154
|
+
last_element = last_element[-1]
|
155
|
+
last_element.tail = (last_element.tail or '') + data
|
156
|
+
except IndexError:
|
157
|
+
# otherwise: append to the text
|
158
|
+
last_element.text = (last_element.text or '') + data
|
159
|
+
|
160
|
+
ignorableWhitespace = characters
|
161
|
+
|
162
|
+
# Allow subscripting sax.ElementTreeContentHandler in type annotions (PEP 560)
|
163
|
+
def __class_getitem__(cls, item):
|
164
|
+
return _GenericAlias(cls, item)
|
165
|
+
|
166
|
+
|
167
|
+
class ElementTreeProducer:
|
168
|
+
"""Produces SAX events for an element and children.
|
169
|
+
"""
|
170
|
+
def __init__(self, element_or_tree, content_handler):
|
171
|
+
try:
|
172
|
+
element = element_or_tree.getroot()
|
173
|
+
except AttributeError:
|
174
|
+
element = element_or_tree
|
175
|
+
self._element = element
|
176
|
+
self._content_handler = content_handler
|
177
|
+
from xml.sax.xmlreader import AttributesNSImpl as attr_class
|
178
|
+
self._attr_class = attr_class
|
179
|
+
self._empty_attributes = attr_class({}, {})
|
180
|
+
|
181
|
+
def saxify(self):
|
182
|
+
self._content_handler.startDocument()
|
183
|
+
|
184
|
+
element = self._element
|
185
|
+
if hasattr(element, 'getprevious'):
|
186
|
+
siblings = []
|
187
|
+
sibling = element.getprevious()
|
188
|
+
while getattr(sibling, 'tag', None) is ProcessingInstruction:
|
189
|
+
siblings.append(sibling)
|
190
|
+
sibling = sibling.getprevious()
|
191
|
+
for sibling in siblings[::-1]:
|
192
|
+
self._recursive_saxify(sibling, {})
|
193
|
+
|
194
|
+
self._recursive_saxify(element, {})
|
195
|
+
|
196
|
+
if hasattr(element, 'getnext'):
|
197
|
+
sibling = element.getnext()
|
198
|
+
while getattr(sibling, 'tag', None) is ProcessingInstruction:
|
199
|
+
self._recursive_saxify(sibling, {})
|
200
|
+
sibling = sibling.getnext()
|
201
|
+
|
202
|
+
self._content_handler.endDocument()
|
203
|
+
|
204
|
+
def _recursive_saxify(self, element, parent_nsmap):
|
205
|
+
content_handler = self._content_handler
|
206
|
+
tag = element.tag
|
207
|
+
if tag is Comment or tag is ProcessingInstruction:
|
208
|
+
if tag is ProcessingInstruction:
|
209
|
+
content_handler.processingInstruction(
|
210
|
+
element.target, element.text)
|
211
|
+
tail = element.tail
|
212
|
+
if tail:
|
213
|
+
content_handler.characters(tail)
|
214
|
+
return
|
215
|
+
|
216
|
+
element_nsmap = element.nsmap
|
217
|
+
new_prefixes = []
|
218
|
+
if element_nsmap != parent_nsmap:
|
219
|
+
# There have been updates to the namespace
|
220
|
+
for prefix, ns_uri in element_nsmap.items():
|
221
|
+
if parent_nsmap.get(prefix) != ns_uri:
|
222
|
+
new_prefixes.append( (prefix, ns_uri) )
|
223
|
+
|
224
|
+
attribs = element.items()
|
225
|
+
if attribs:
|
226
|
+
attr_values = {}
|
227
|
+
attr_qnames = {}
|
228
|
+
for attr_ns_name, value in attribs:
|
229
|
+
attr_ns_tuple = _getNsTag(attr_ns_name)
|
230
|
+
attr_values[attr_ns_tuple] = value
|
231
|
+
attr_qnames[attr_ns_tuple] = self._build_qname(
|
232
|
+
attr_ns_tuple[0], attr_ns_tuple[1], element_nsmap,
|
233
|
+
preferred_prefix=None, is_attribute=True)
|
234
|
+
sax_attributes = self._attr_class(attr_values, attr_qnames)
|
235
|
+
else:
|
236
|
+
sax_attributes = self._empty_attributes
|
237
|
+
|
238
|
+
ns_uri, local_name = _getNsTag(tag)
|
239
|
+
qname = self._build_qname(
|
240
|
+
ns_uri, local_name, element_nsmap, element.prefix, is_attribute=False)
|
241
|
+
|
242
|
+
for prefix, uri in new_prefixes:
|
243
|
+
content_handler.startPrefixMapping(prefix, uri)
|
244
|
+
content_handler.startElementNS(
|
245
|
+
(ns_uri, local_name), qname, sax_attributes)
|
246
|
+
text = element.text
|
247
|
+
if text:
|
248
|
+
content_handler.characters(text)
|
249
|
+
for child in element:
|
250
|
+
self._recursive_saxify(child, element_nsmap)
|
251
|
+
content_handler.endElementNS((ns_uri, local_name), qname)
|
252
|
+
for prefix, uri in new_prefixes:
|
253
|
+
content_handler.endPrefixMapping(prefix)
|
254
|
+
tail = element.tail
|
255
|
+
if tail:
|
256
|
+
content_handler.characters(tail)
|
257
|
+
|
258
|
+
def _build_qname(self, ns_uri, local_name, nsmap, preferred_prefix, is_attribute):
|
259
|
+
if ns_uri is None:
|
260
|
+
return local_name
|
261
|
+
|
262
|
+
if not is_attribute and nsmap.get(preferred_prefix) == ns_uri:
|
263
|
+
prefix = preferred_prefix
|
264
|
+
else:
|
265
|
+
# Pick the first matching prefix, in alphabetical order.
|
266
|
+
candidates = [
|
267
|
+
pfx for (pfx, uri) in nsmap.items()
|
268
|
+
if pfx is not None and uri == ns_uri
|
269
|
+
]
|
270
|
+
prefix = (
|
271
|
+
candidates[0] if len(candidates) == 1
|
272
|
+
else min(candidates) if candidates
|
273
|
+
else None
|
274
|
+
)
|
275
|
+
|
276
|
+
if prefix is None:
|
277
|
+
# Default namespace
|
278
|
+
return local_name
|
279
|
+
return prefix + ':' + local_name
|
280
|
+
|
281
|
+
|
282
|
+
def saxify(element_or_tree, content_handler):
|
283
|
+
"""One-shot helper to generate SAX events from an XML tree and fire
|
284
|
+
them against a SAX ContentHandler.
|
285
|
+
"""
|
286
|
+
return ElementTreeProducer(element_or_tree, content_handler).saxify()
|