lxml 6.0.0__cp310-cp310-musllinux_1_2_armv7l.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lxml/ElementInclude.py +244 -0
- lxml/__init__.py +22 -0
- lxml/_elementpath.cpython-310-arm-linux-gnueabihf.so +0 -0
- lxml/_elementpath.py +343 -0
- lxml/apihelpers.pxi +1801 -0
- lxml/builder.cpython-310-arm-linux-gnueabihf.so +0 -0
- lxml/builder.py +243 -0
- lxml/classlookup.pxi +580 -0
- lxml/cleanup.pxi +215 -0
- lxml/cssselect.py +101 -0
- lxml/debug.pxi +36 -0
- lxml/docloader.pxi +178 -0
- lxml/doctestcompare.py +488 -0
- lxml/dtd.pxi +479 -0
- lxml/etree.cpython-310-arm-linux-gnueabihf.so +0 -0
- lxml/etree.h +244 -0
- lxml/etree.pyx +3853 -0
- lxml/etree_api.h +204 -0
- lxml/extensions.pxi +830 -0
- lxml/html/ElementSoup.py +10 -0
- lxml/html/__init__.py +1927 -0
- lxml/html/_diffcommand.py +86 -0
- lxml/html/_difflib.cpython-310-arm-linux-gnueabihf.so +0 -0
- lxml/html/_difflib.py +2106 -0
- lxml/html/_html5builder.py +100 -0
- lxml/html/_setmixin.py +56 -0
- lxml/html/builder.py +173 -0
- lxml/html/clean.py +21 -0
- lxml/html/defs.py +135 -0
- lxml/html/diff.cpython-310-arm-linux-gnueabihf.so +0 -0
- lxml/html/diff.py +972 -0
- lxml/html/formfill.py +299 -0
- lxml/html/html5parser.py +260 -0
- lxml/html/soupparser.py +314 -0
- lxml/html/usedoctest.py +13 -0
- lxml/includes/__init__.pxd +0 -0
- lxml/includes/__init__.py +0 -0
- lxml/includes/c14n.pxd +25 -0
- lxml/includes/config.pxd +3 -0
- lxml/includes/dtdvalid.pxd +18 -0
- lxml/includes/etree_defs.h +379 -0
- lxml/includes/etreepublic.pxd +237 -0
- lxml/includes/extlibs/__init__.py +0 -0
- lxml/includes/extlibs/libcharset.h +45 -0
- lxml/includes/extlibs/localcharset.h +137 -0
- lxml/includes/extlibs/zconf.h +543 -0
- lxml/includes/extlibs/zlib.h +1938 -0
- lxml/includes/htmlparser.pxd +56 -0
- lxml/includes/libexslt/__init__.py +0 -0
- lxml/includes/libexslt/exslt.h +108 -0
- lxml/includes/libexslt/exsltconfig.h +70 -0
- lxml/includes/libexslt/exsltexports.h +63 -0
- lxml/includes/libxml/HTMLparser.h +339 -0
- lxml/includes/libxml/HTMLtree.h +148 -0
- lxml/includes/libxml/SAX.h +18 -0
- lxml/includes/libxml/SAX2.h +170 -0
- lxml/includes/libxml/__init__.py +0 -0
- lxml/includes/libxml/c14n.h +115 -0
- lxml/includes/libxml/catalog.h +183 -0
- lxml/includes/libxml/chvalid.h +230 -0
- lxml/includes/libxml/debugXML.h +79 -0
- lxml/includes/libxml/dict.h +82 -0
- lxml/includes/libxml/encoding.h +307 -0
- lxml/includes/libxml/entities.h +147 -0
- lxml/includes/libxml/globals.h +25 -0
- lxml/includes/libxml/hash.h +251 -0
- lxml/includes/libxml/list.h +137 -0
- lxml/includes/libxml/nanoftp.h +16 -0
- lxml/includes/libxml/nanohttp.h +98 -0
- lxml/includes/libxml/parser.h +1633 -0
- lxml/includes/libxml/parserInternals.h +591 -0
- lxml/includes/libxml/relaxng.h +224 -0
- lxml/includes/libxml/schemasInternals.h +959 -0
- lxml/includes/libxml/schematron.h +143 -0
- lxml/includes/libxml/threads.h +81 -0
- lxml/includes/libxml/tree.h +1326 -0
- lxml/includes/libxml/uri.h +106 -0
- lxml/includes/libxml/valid.h +485 -0
- lxml/includes/libxml/xinclude.h +141 -0
- lxml/includes/libxml/xlink.h +193 -0
- lxml/includes/libxml/xmlIO.h +419 -0
- lxml/includes/libxml/xmlautomata.h +163 -0
- lxml/includes/libxml/xmlerror.h +962 -0
- lxml/includes/libxml/xmlexports.h +96 -0
- lxml/includes/libxml/xmlmemory.h +188 -0
- lxml/includes/libxml/xmlmodule.h +61 -0
- lxml/includes/libxml/xmlreader.h +444 -0
- lxml/includes/libxml/xmlregexp.h +116 -0
- lxml/includes/libxml/xmlsave.h +111 -0
- lxml/includes/libxml/xmlschemas.h +254 -0
- lxml/includes/libxml/xmlschemastypes.h +152 -0
- lxml/includes/libxml/xmlstring.h +140 -0
- lxml/includes/libxml/xmlunicode.h +15 -0
- lxml/includes/libxml/xmlversion.h +332 -0
- lxml/includes/libxml/xmlwriter.h +489 -0
- lxml/includes/libxml/xpath.h +569 -0
- lxml/includes/libxml/xpathInternals.h +639 -0
- lxml/includes/libxml/xpointer.h +48 -0
- lxml/includes/libxslt/__init__.py +0 -0
- lxml/includes/libxslt/attributes.h +39 -0
- lxml/includes/libxslt/documents.h +93 -0
- lxml/includes/libxslt/extensions.h +262 -0
- lxml/includes/libxslt/extra.h +72 -0
- lxml/includes/libxslt/functions.h +78 -0
- lxml/includes/libxslt/imports.h +75 -0
- lxml/includes/libxslt/keys.h +53 -0
- lxml/includes/libxslt/namespaces.h +68 -0
- lxml/includes/libxslt/numbersInternals.h +73 -0
- lxml/includes/libxslt/pattern.h +84 -0
- lxml/includes/libxslt/preproc.h +43 -0
- lxml/includes/libxslt/security.h +104 -0
- lxml/includes/libxslt/templates.h +77 -0
- lxml/includes/libxslt/transform.h +207 -0
- lxml/includes/libxslt/variables.h +118 -0
- lxml/includes/libxslt/xslt.h +110 -0
- lxml/includes/libxslt/xsltInternals.h +1995 -0
- lxml/includes/libxslt/xsltconfig.h +146 -0
- lxml/includes/libxslt/xsltexports.h +64 -0
- lxml/includes/libxslt/xsltlocale.h +44 -0
- lxml/includes/libxslt/xsltutils.h +343 -0
- lxml/includes/lxml-version.h +3 -0
- lxml/includes/relaxng.pxd +64 -0
- lxml/includes/schematron.pxd +34 -0
- lxml/includes/tree.pxd +492 -0
- lxml/includes/uri.pxd +5 -0
- lxml/includes/xinclude.pxd +22 -0
- lxml/includes/xmlerror.pxd +852 -0
- lxml/includes/xmlparser.pxd +303 -0
- lxml/includes/xmlschema.pxd +35 -0
- lxml/includes/xpath.pxd +136 -0
- lxml/includes/xslt.pxd +190 -0
- lxml/isoschematron/__init__.py +348 -0
- lxml/isoschematron/resources/rng/iso-schematron.rng +709 -0
- lxml/isoschematron/resources/xsl/RNG2Schtrn.xsl +75 -0
- lxml/isoschematron/resources/xsl/XSD2Schtrn.xsl +77 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_abstract_expand.xsl +313 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_dsdl_include.xsl +1160 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_message.xsl +55 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_skeleton_for_xslt1.xsl +1796 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_svrl_for_xslt1.xsl +588 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt +84 -0
- lxml/iterparse.pxi +438 -0
- lxml/lxml.etree.h +244 -0
- lxml/lxml.etree_api.h +204 -0
- lxml/nsclasses.pxi +281 -0
- lxml/objectify.cpython-310-arm-linux-gnueabihf.so +0 -0
- lxml/objectify.pyx +2149 -0
- lxml/objectpath.pxi +332 -0
- lxml/parser.pxi +2059 -0
- lxml/parsertarget.pxi +180 -0
- lxml/proxy.pxi +619 -0
- lxml/public-api.pxi +178 -0
- lxml/pyclasslookup.py +3 -0
- lxml/readonlytree.pxi +565 -0
- lxml/relaxng.pxi +165 -0
- lxml/sax.cpython-310-arm-linux-gnueabihf.so +0 -0
- lxml/sax.py +286 -0
- lxml/saxparser.pxi +875 -0
- lxml/schematron.pxi +173 -0
- lxml/serializer.pxi +1849 -0
- lxml/usedoctest.py +13 -0
- lxml/xinclude.pxi +67 -0
- lxml/xmlerror.pxi +1654 -0
- lxml/xmlid.pxi +179 -0
- lxml/xmlschema.pxi +215 -0
- lxml/xpath.pxi +487 -0
- lxml/xslt.pxi +957 -0
- lxml/xsltext.pxi +242 -0
- lxml-6.0.0.dist-info/METADATA +163 -0
- lxml-6.0.0.dist-info/RECORD +174 -0
- lxml-6.0.0.dist-info/WHEEL +5 -0
- lxml-6.0.0.dist-info/licenses/LICENSE.txt +31 -0
- lxml-6.0.0.dist-info/licenses/LICENSES.txt +29 -0
- lxml-6.0.0.dist-info/top_level.txt +1 -0
lxml/saxparser.pxi
ADDED
@@ -0,0 +1,875 @@
|
|
1
|
+
# SAX-like interfaces
|
2
|
+
|
3
|
+
class XMLSyntaxAssertionError(XMLSyntaxError, AssertionError):
|
4
|
+
"""
|
5
|
+
An XMLSyntaxError that additionally inherits from AssertionError for
|
6
|
+
ElementTree / backwards compatibility reasons.
|
7
|
+
|
8
|
+
This class may get replaced by a plain XMLSyntaxError in a future version.
|
9
|
+
"""
|
10
|
+
def __init__(self, message):
|
11
|
+
XMLSyntaxError.__init__(self, message, None, 0, 1)
|
12
|
+
|
13
|
+
|
14
|
+
ctypedef enum _SaxParserEvents:
|
15
|
+
SAX_EVENT_START = 1 << 0
|
16
|
+
SAX_EVENT_END = 1 << 1
|
17
|
+
SAX_EVENT_DATA = 1 << 2
|
18
|
+
SAX_EVENT_DOCTYPE = 1 << 3
|
19
|
+
SAX_EVENT_PI = 1 << 4
|
20
|
+
SAX_EVENT_COMMENT = 1 << 5
|
21
|
+
SAX_EVENT_START_NS = 1 << 6
|
22
|
+
SAX_EVENT_END_NS = 1 << 7
|
23
|
+
|
24
|
+
ctypedef enum _ParseEventFilter:
|
25
|
+
PARSE_EVENT_FILTER_START = 1 << 0
|
26
|
+
PARSE_EVENT_FILTER_END = 1 << 1
|
27
|
+
PARSE_EVENT_FILTER_START_NS = 1 << 2
|
28
|
+
PARSE_EVENT_FILTER_END_NS = 1 << 3
|
29
|
+
PARSE_EVENT_FILTER_COMMENT = 1 << 4
|
30
|
+
PARSE_EVENT_FILTER_PI = 1 << 5
|
31
|
+
|
32
|
+
|
33
|
+
cdef int _buildParseEventFilter(events) except -1:
|
34
|
+
cdef int event_filter = 0
|
35
|
+
for event in events:
|
36
|
+
if event == 'start':
|
37
|
+
event_filter |= PARSE_EVENT_FILTER_START
|
38
|
+
elif event == 'end':
|
39
|
+
event_filter |= PARSE_EVENT_FILTER_END
|
40
|
+
elif event == 'start-ns':
|
41
|
+
event_filter |= PARSE_EVENT_FILTER_START_NS
|
42
|
+
elif event == 'end-ns':
|
43
|
+
event_filter |= PARSE_EVENT_FILTER_END_NS
|
44
|
+
elif event == 'comment':
|
45
|
+
event_filter |= PARSE_EVENT_FILTER_COMMENT
|
46
|
+
elif event == 'pi':
|
47
|
+
event_filter |= PARSE_EVENT_FILTER_PI
|
48
|
+
else:
|
49
|
+
raise ValueError, f"invalid event name '{event}'"
|
50
|
+
return event_filter
|
51
|
+
|
52
|
+
|
53
|
+
cdef class _SaxParserTarget:
|
54
|
+
cdef int _sax_event_filter
|
55
|
+
|
56
|
+
cdef _handleSaxStart(self, tag, attrib, nsmap):
|
57
|
+
return None
|
58
|
+
cdef _handleSaxEnd(self, tag):
|
59
|
+
return None
|
60
|
+
cdef int _handleSaxData(self, data) except -1:
|
61
|
+
return 0
|
62
|
+
cdef int _handleSaxDoctype(self, root_tag, public_id, system_id) except -1:
|
63
|
+
return 0
|
64
|
+
cdef _handleSaxPi(self, target, data):
|
65
|
+
return None
|
66
|
+
cdef _handleSaxComment(self, comment):
|
67
|
+
return None
|
68
|
+
cdef _handleSaxStartNs(self, prefix, uri):
|
69
|
+
return None
|
70
|
+
cdef _handleSaxEndNs(self, prefix):
|
71
|
+
return None
|
72
|
+
|
73
|
+
|
74
|
+
#@cython.final
|
75
|
+
@cython.internal
|
76
|
+
@cython.no_gc_clear # Required because parent class uses it - Cython bug.
|
77
|
+
cdef class _SaxParserContext(_ParserContext):
|
78
|
+
"""This class maps SAX2 events to parser target events.
|
79
|
+
"""
|
80
|
+
cdef _SaxParserTarget _target
|
81
|
+
cdef _BaseParser _parser
|
82
|
+
cdef xmlparser.startElementNsSAX2Func _origSaxStart
|
83
|
+
cdef xmlparser.endElementNsSAX2Func _origSaxEnd
|
84
|
+
cdef xmlparser.startElementSAXFunc _origSaxStartNoNs
|
85
|
+
cdef xmlparser.endElementSAXFunc _origSaxEndNoNs
|
86
|
+
cdef xmlparser.charactersSAXFunc _origSaxData
|
87
|
+
cdef xmlparser.cdataBlockSAXFunc _origSaxCData
|
88
|
+
cdef xmlparser.internalSubsetSAXFunc _origSaxDoctype
|
89
|
+
cdef xmlparser.commentSAXFunc _origSaxComment
|
90
|
+
cdef xmlparser.processingInstructionSAXFunc _origSaxPI
|
91
|
+
cdef xmlparser.startDocumentSAXFunc _origSaxStartDocument
|
92
|
+
|
93
|
+
# for event collecting
|
94
|
+
cdef int _event_filter
|
95
|
+
cdef list _ns_stack
|
96
|
+
cdef list _node_stack
|
97
|
+
cdef _ParseEventsIterator events_iterator
|
98
|
+
|
99
|
+
# for iterparse
|
100
|
+
cdef _Element _root
|
101
|
+
cdef _MultiTagMatcher _matcher
|
102
|
+
|
103
|
+
def __cinit__(self, _BaseParser parser):
|
104
|
+
self._ns_stack = []
|
105
|
+
self._node_stack = []
|
106
|
+
self._parser = parser
|
107
|
+
self.events_iterator = _ParseEventsIterator()
|
108
|
+
|
109
|
+
cdef void _setSaxParserTarget(self, _SaxParserTarget target) noexcept:
|
110
|
+
self._target = target
|
111
|
+
|
112
|
+
cdef void _initParserContext(self, xmlparser.xmlParserCtxt* c_ctxt) noexcept:
|
113
|
+
_ParserContext._initParserContext(self, c_ctxt)
|
114
|
+
if self._target is not None:
|
115
|
+
self._connectTarget(c_ctxt)
|
116
|
+
elif self._event_filter:
|
117
|
+
self._connectEvents(c_ctxt)
|
118
|
+
|
119
|
+
cdef void _connectTarget(self, xmlparser.xmlParserCtxt* c_ctxt) noexcept:
|
120
|
+
"""Wrap original SAX2 callbacks to call into parser target.
|
121
|
+
"""
|
122
|
+
sax = c_ctxt.sax
|
123
|
+
self._origSaxStart = sax.startElementNs = NULL
|
124
|
+
self._origSaxStartNoNs = sax.startElement = NULL
|
125
|
+
if self._target._sax_event_filter & (SAX_EVENT_START |
|
126
|
+
SAX_EVENT_START_NS |
|
127
|
+
SAX_EVENT_END_NS):
|
128
|
+
# intercept => overwrite orig callback
|
129
|
+
# FIXME: also intercept on when collecting END events
|
130
|
+
if sax.initialized == xmlparser.XML_SAX2_MAGIC:
|
131
|
+
sax.startElementNs = _handleSaxTargetStart
|
132
|
+
if self._target._sax_event_filter & SAX_EVENT_START:
|
133
|
+
sax.startElement = _handleSaxTargetStartNoNs
|
134
|
+
|
135
|
+
self._origSaxEnd = sax.endElementNs = NULL
|
136
|
+
self._origSaxEndNoNs = sax.endElement = NULL
|
137
|
+
if self._target._sax_event_filter & (SAX_EVENT_END |
|
138
|
+
SAX_EVENT_END_NS):
|
139
|
+
if sax.initialized == xmlparser.XML_SAX2_MAGIC:
|
140
|
+
sax.endElementNs = _handleSaxEnd
|
141
|
+
if self._target._sax_event_filter & SAX_EVENT_END:
|
142
|
+
sax.endElement = _handleSaxEndNoNs
|
143
|
+
|
144
|
+
self._origSaxData = sax.characters = sax.cdataBlock = NULL
|
145
|
+
if self._target._sax_event_filter & SAX_EVENT_DATA:
|
146
|
+
sax.characters = sax.cdataBlock = _handleSaxData
|
147
|
+
|
148
|
+
# doctype propagation is always required for entity replacement
|
149
|
+
self._origSaxDoctype = sax.internalSubset
|
150
|
+
if self._target._sax_event_filter & SAX_EVENT_DOCTYPE:
|
151
|
+
sax.internalSubset = _handleSaxTargetDoctype
|
152
|
+
|
153
|
+
self._origSaxPI = sax.processingInstruction = NULL
|
154
|
+
if self._target._sax_event_filter & SAX_EVENT_PI:
|
155
|
+
sax.processingInstruction = _handleSaxTargetPI
|
156
|
+
|
157
|
+
self._origSaxComment = sax.comment = NULL
|
158
|
+
if self._target._sax_event_filter & SAX_EVENT_COMMENT:
|
159
|
+
sax.comment = _handleSaxTargetComment
|
160
|
+
|
161
|
+
# enforce entity replacement
|
162
|
+
sax.reference = NULL
|
163
|
+
c_ctxt.replaceEntities = 1
|
164
|
+
|
165
|
+
cdef void _connectEvents(self, xmlparser.xmlParserCtxt* c_ctxt) noexcept:
|
166
|
+
"""Wrap original SAX2 callbacks to collect parse events without parser target.
|
167
|
+
"""
|
168
|
+
sax = c_ctxt.sax
|
169
|
+
self._origSaxStartDocument = sax.startDocument
|
170
|
+
sax.startDocument = _handleSaxStartDocument
|
171
|
+
|
172
|
+
# only override "start" event handler if needed
|
173
|
+
self._origSaxStart = sax.startElementNs
|
174
|
+
if self._event_filter == 0 or c_ctxt.html or \
|
175
|
+
self._event_filter & (PARSE_EVENT_FILTER_START |
|
176
|
+
PARSE_EVENT_FILTER_END |
|
177
|
+
PARSE_EVENT_FILTER_START_NS |
|
178
|
+
PARSE_EVENT_FILTER_END_NS):
|
179
|
+
sax.startElementNs = <xmlparser.startElementNsSAX2Func>_handleSaxStart
|
180
|
+
|
181
|
+
self._origSaxStartNoNs = sax.startElement
|
182
|
+
if self._event_filter == 0 or c_ctxt.html or \
|
183
|
+
self._event_filter & (PARSE_EVENT_FILTER_START |
|
184
|
+
PARSE_EVENT_FILTER_END):
|
185
|
+
sax.startElement = <xmlparser.startElementSAXFunc>_handleSaxStartNoNs
|
186
|
+
|
187
|
+
# only override "end" event handler if needed
|
188
|
+
self._origSaxEnd = sax.endElementNs
|
189
|
+
if self._event_filter == 0 or \
|
190
|
+
self._event_filter & (PARSE_EVENT_FILTER_END |
|
191
|
+
PARSE_EVENT_FILTER_END_NS):
|
192
|
+
sax.endElementNs = <xmlparser.endElementNsSAX2Func>_handleSaxEnd
|
193
|
+
|
194
|
+
self._origSaxEndNoNs = sax.endElement
|
195
|
+
if self._event_filter == 0 or \
|
196
|
+
self._event_filter & PARSE_EVENT_FILTER_END:
|
197
|
+
sax.endElement = <xmlparser.endElementSAXFunc>_handleSaxEndNoNs
|
198
|
+
|
199
|
+
self._origSaxComment = sax.comment
|
200
|
+
if self._event_filter & PARSE_EVENT_FILTER_COMMENT:
|
201
|
+
sax.comment = <xmlparser.commentSAXFunc>_handleSaxComment
|
202
|
+
|
203
|
+
self._origSaxPI = sax.processingInstruction
|
204
|
+
if self._event_filter & PARSE_EVENT_FILTER_PI:
|
205
|
+
sax.processingInstruction = <xmlparser.processingInstructionSAXFunc>_handleSaxPIEvent
|
206
|
+
|
207
|
+
cdef _setEventFilter(self, events, tag):
|
208
|
+
self._event_filter = _buildParseEventFilter(events)
|
209
|
+
if not self._event_filter or tag is None or tag == '*':
|
210
|
+
self._matcher = None
|
211
|
+
else:
|
212
|
+
self._matcher = _MultiTagMatcher.__new__(_MultiTagMatcher, tag)
|
213
|
+
|
214
|
+
cdef int startDocument(self, xmlDoc* c_doc) except -1:
|
215
|
+
try:
|
216
|
+
self._doc = _documentFactory(c_doc, self._parser)
|
217
|
+
finally:
|
218
|
+
self._parser = None # clear circular reference ASAP
|
219
|
+
if self._matcher is not None:
|
220
|
+
self._matcher.cacheTags(self._doc, force_into_dict=True)
|
221
|
+
return 0
|
222
|
+
|
223
|
+
cdef int pushEvent(self, event, xmlNode* c_node) except -1:
|
224
|
+
cdef _Element root
|
225
|
+
if self._root is None:
|
226
|
+
root = self._doc.getroot()
|
227
|
+
if root is not None and root._c_node.type == tree.XML_ELEMENT_NODE:
|
228
|
+
self._root = root
|
229
|
+
node = _elementFactory(self._doc, c_node)
|
230
|
+
self.events_iterator._events.append( (event, node) )
|
231
|
+
return 0
|
232
|
+
|
233
|
+
cdef int flushEvents(self) except -1:
|
234
|
+
events = self.events_iterator._events
|
235
|
+
while self._node_stack:
|
236
|
+
events.append( ('end', self._node_stack.pop()) )
|
237
|
+
_pushSaxNsEndEvents(self)
|
238
|
+
while self._ns_stack:
|
239
|
+
_pushSaxNsEndEvents(self)
|
240
|
+
|
241
|
+
cdef void _handleSaxException(self, xmlparser.xmlParserCtxt* c_ctxt) noexcept:
|
242
|
+
if c_ctxt.errNo == xmlerror.XML_ERR_OK:
|
243
|
+
c_ctxt.errNo = xmlerror.XML_ERR_INTERNAL_ERROR
|
244
|
+
# stop parsing immediately
|
245
|
+
c_ctxt.wellFormed = 0
|
246
|
+
c_ctxt.disableSAX = 1
|
247
|
+
c_ctxt.instate = xmlparser.XML_PARSER_EOF
|
248
|
+
self._store_raised()
|
249
|
+
|
250
|
+
|
251
|
+
@cython.final
|
252
|
+
@cython.internal
|
253
|
+
cdef class _ParseEventsIterator:
|
254
|
+
"""A reusable parse events iterator"""
|
255
|
+
cdef list _events
|
256
|
+
cdef int _event_index
|
257
|
+
|
258
|
+
def __cinit__(self):
|
259
|
+
self._events = []
|
260
|
+
self._event_index = 0
|
261
|
+
|
262
|
+
def __iter__(self):
|
263
|
+
return self
|
264
|
+
|
265
|
+
def __next__(self):
|
266
|
+
cdef int event_index = self._event_index
|
267
|
+
events = self._events
|
268
|
+
if event_index >= 2**10 or event_index * 2 >= len(events):
|
269
|
+
if event_index:
|
270
|
+
# clean up from time to time
|
271
|
+
del events[:event_index]
|
272
|
+
self._event_index = event_index = 0
|
273
|
+
if event_index >= len(events):
|
274
|
+
raise StopIteration
|
275
|
+
item = events[event_index]
|
276
|
+
self._event_index = event_index + 1
|
277
|
+
return item
|
278
|
+
|
279
|
+
|
280
|
+
cdef list _build_prefix_uri_list(_SaxParserContext context, int c_nb_namespaces,
|
281
|
+
const_xmlChar** c_namespaces):
|
282
|
+
"Build [(prefix, uri)] list of declared namespaces."
|
283
|
+
cdef int i
|
284
|
+
namespaces = []
|
285
|
+
for i in xrange(c_nb_namespaces):
|
286
|
+
namespaces.append((funicodeOrEmpty(c_namespaces[0]), funicode(c_namespaces[1])))
|
287
|
+
c_namespaces += 2
|
288
|
+
return namespaces
|
289
|
+
|
290
|
+
|
291
|
+
cdef void _handleSaxStart(
|
292
|
+
void* ctxt, const_xmlChar* c_localname, const_xmlChar* c_prefix,
|
293
|
+
const_xmlChar* c_namespace, int c_nb_namespaces,
|
294
|
+
const_xmlChar** c_namespaces,
|
295
|
+
int c_nb_attributes, int c_nb_defaulted,
|
296
|
+
const_xmlChar** c_attributes) noexcept with gil:
|
297
|
+
cdef int i
|
298
|
+
cdef size_t c_len
|
299
|
+
c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
|
300
|
+
if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
|
301
|
+
return
|
302
|
+
context = <_SaxParserContext>c_ctxt._private
|
303
|
+
cdef int event_filter = context._event_filter
|
304
|
+
try:
|
305
|
+
if (c_nb_namespaces and
|
306
|
+
event_filter & (PARSE_EVENT_FILTER_START_NS |
|
307
|
+
PARSE_EVENT_FILTER_END_NS)):
|
308
|
+
declared_namespaces = _build_prefix_uri_list(
|
309
|
+
context, c_nb_namespaces, c_namespaces)
|
310
|
+
if event_filter & PARSE_EVENT_FILTER_START_NS:
|
311
|
+
for prefix_uri_tuple in declared_namespaces:
|
312
|
+
context.events_iterator._events.append(("start-ns", prefix_uri_tuple))
|
313
|
+
else:
|
314
|
+
declared_namespaces = None
|
315
|
+
|
316
|
+
context._origSaxStart(c_ctxt, c_localname, c_prefix, c_namespace,
|
317
|
+
c_nb_namespaces, c_namespaces, c_nb_attributes,
|
318
|
+
c_nb_defaulted, c_attributes)
|
319
|
+
if c_ctxt.html:
|
320
|
+
_fixHtmlDictNodeNames(c_ctxt.dict, c_ctxt.node)
|
321
|
+
# The HTML parser in libxml2 reports the missing opening tags when it finds
|
322
|
+
# misplaced ones, but with tag names from C string constants that ignore the
|
323
|
+
# parser dict. Thus, we need to intern the name ourselves.
|
324
|
+
c_localname = tree.xmlDictLookup(c_ctxt.dict, c_localname, -1)
|
325
|
+
if c_localname is NULL:
|
326
|
+
raise MemoryError()
|
327
|
+
|
328
|
+
if event_filter & PARSE_EVENT_FILTER_END_NS:
|
329
|
+
context._ns_stack.append(declared_namespaces)
|
330
|
+
if event_filter & (PARSE_EVENT_FILTER_END |
|
331
|
+
PARSE_EVENT_FILTER_START):
|
332
|
+
_pushSaxStartEvent(context, c_ctxt, c_namespace, c_localname, None)
|
333
|
+
except:
|
334
|
+
context._handleSaxException(c_ctxt)
|
335
|
+
finally:
|
336
|
+
return # swallow any further exceptions
|
337
|
+
|
338
|
+
|
339
|
+
cdef void _handleSaxTargetStart(
|
340
|
+
void* ctxt, const_xmlChar* c_localname, const_xmlChar* c_prefix,
|
341
|
+
const_xmlChar* c_namespace, int c_nb_namespaces,
|
342
|
+
const_xmlChar** c_namespaces,
|
343
|
+
int c_nb_attributes, int c_nb_defaulted,
|
344
|
+
const_xmlChar** c_attributes) noexcept with gil:
|
345
|
+
cdef int i
|
346
|
+
cdef size_t c_len
|
347
|
+
c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
|
348
|
+
if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
|
349
|
+
return
|
350
|
+
context = <_SaxParserContext>c_ctxt._private
|
351
|
+
|
352
|
+
cdef int event_filter = context._event_filter
|
353
|
+
cdef int sax_event_filter = context._target._sax_event_filter
|
354
|
+
try:
|
355
|
+
if c_nb_namespaces:
|
356
|
+
declared_namespaces = _build_prefix_uri_list(
|
357
|
+
context, c_nb_namespaces, c_namespaces)
|
358
|
+
|
359
|
+
if event_filter & PARSE_EVENT_FILTER_START_NS:
|
360
|
+
for prefix_uri_tuple in declared_namespaces:
|
361
|
+
context.events_iterator._events.append(("start-ns", prefix_uri_tuple))
|
362
|
+
|
363
|
+
if sax_event_filter & SAX_EVENT_START_NS:
|
364
|
+
for prefix, uri in declared_namespaces:
|
365
|
+
context._target._handleSaxStartNs(prefix, uri)
|
366
|
+
else:
|
367
|
+
declared_namespaces = None
|
368
|
+
|
369
|
+
if sax_event_filter & SAX_EVENT_START:
|
370
|
+
if c_nb_defaulted > 0:
|
371
|
+
# only add default attributes if we asked for them
|
372
|
+
if c_ctxt.loadsubset & xmlparser.XML_COMPLETE_ATTRS == 0:
|
373
|
+
c_nb_attributes -= c_nb_defaulted
|
374
|
+
if c_nb_attributes == 0:
|
375
|
+
attrib = IMMUTABLE_EMPTY_MAPPING
|
376
|
+
else:
|
377
|
+
attrib = {}
|
378
|
+
for i in xrange(c_nb_attributes):
|
379
|
+
name = _namespacedNameFromNsName(
|
380
|
+
c_attributes[2], c_attributes[0])
|
381
|
+
if c_attributes[3] is NULL:
|
382
|
+
value = ''
|
383
|
+
else:
|
384
|
+
c_len = c_attributes[4] - c_attributes[3]
|
385
|
+
value = c_attributes[3][:c_len].decode('utf8')
|
386
|
+
attrib[name] = value
|
387
|
+
c_attributes += 5
|
388
|
+
|
389
|
+
nsmap = dict(declared_namespaces) if c_nb_namespaces else IMMUTABLE_EMPTY_MAPPING
|
390
|
+
|
391
|
+
element = _callTargetSaxStart(
|
392
|
+
context, c_ctxt,
|
393
|
+
_namespacedNameFromNsName(c_namespace, c_localname),
|
394
|
+
attrib, nsmap)
|
395
|
+
else:
|
396
|
+
element = None
|
397
|
+
|
398
|
+
if (event_filter & PARSE_EVENT_FILTER_END_NS or
|
399
|
+
sax_event_filter & SAX_EVENT_END_NS):
|
400
|
+
context._ns_stack.append(declared_namespaces)
|
401
|
+
if event_filter & (PARSE_EVENT_FILTER_END |
|
402
|
+
PARSE_EVENT_FILTER_START):
|
403
|
+
_pushSaxStartEvent(context, c_ctxt, c_namespace,
|
404
|
+
c_localname, element)
|
405
|
+
except:
|
406
|
+
context._handleSaxException(c_ctxt)
|
407
|
+
finally:
|
408
|
+
return # swallow any further exceptions
|
409
|
+
|
410
|
+
|
411
|
+
cdef void _handleSaxStartNoNs(void* ctxt, const_xmlChar* c_name,
|
412
|
+
const_xmlChar** c_attributes) noexcept with gil:
|
413
|
+
c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
|
414
|
+
if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
|
415
|
+
return
|
416
|
+
context = <_SaxParserContext>c_ctxt._private
|
417
|
+
try:
|
418
|
+
context._origSaxStartNoNs(c_ctxt, c_name, c_attributes)
|
419
|
+
if c_ctxt.html:
|
420
|
+
_fixHtmlDictNodeNames(c_ctxt.dict, c_ctxt.node)
|
421
|
+
# The HTML parser in libxml2 reports the missing opening tags when it finds
|
422
|
+
# misplaced ones, but with tag names from C string constants that ignore the
|
423
|
+
# parser dict. Thus, we need to intern the name ourselves.
|
424
|
+
c_name = tree.xmlDictLookup(c_ctxt.dict, c_name, -1)
|
425
|
+
if c_name is NULL:
|
426
|
+
raise MemoryError()
|
427
|
+
if context._event_filter & (PARSE_EVENT_FILTER_END |
|
428
|
+
PARSE_EVENT_FILTER_START):
|
429
|
+
_pushSaxStartEvent(context, c_ctxt, NULL, c_name, None)
|
430
|
+
except:
|
431
|
+
context._handleSaxException(c_ctxt)
|
432
|
+
finally:
|
433
|
+
return # swallow any further exceptions
|
434
|
+
|
435
|
+
|
436
|
+
cdef void _handleSaxTargetStartNoNs(void* ctxt, const_xmlChar* c_name,
|
437
|
+
const_xmlChar** c_attributes) noexcept with gil:
|
438
|
+
c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
|
439
|
+
if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
|
440
|
+
return
|
441
|
+
context = <_SaxParserContext>c_ctxt._private
|
442
|
+
try:
|
443
|
+
if c_attributes is NULL:
|
444
|
+
attrib = IMMUTABLE_EMPTY_MAPPING
|
445
|
+
else:
|
446
|
+
attrib = {}
|
447
|
+
while c_attributes[0] is not NULL:
|
448
|
+
name = funicode(c_attributes[0])
|
449
|
+
attrib[name] = funicodeOrEmpty(c_attributes[1])
|
450
|
+
c_attributes += 2
|
451
|
+
element = _callTargetSaxStart(
|
452
|
+
context, c_ctxt, funicode(c_name),
|
453
|
+
attrib, IMMUTABLE_EMPTY_MAPPING)
|
454
|
+
if context._event_filter & (PARSE_EVENT_FILTER_END |
|
455
|
+
PARSE_EVENT_FILTER_START):
|
456
|
+
_pushSaxStartEvent(context, c_ctxt, NULL, c_name, element)
|
457
|
+
except:
|
458
|
+
context._handleSaxException(c_ctxt)
|
459
|
+
finally:
|
460
|
+
return # swallow any further exceptions
|
461
|
+
|
462
|
+
|
463
|
+
cdef _callTargetSaxStart(_SaxParserContext context,
|
464
|
+
xmlparser.xmlParserCtxt* c_ctxt,
|
465
|
+
tag, attrib, nsmap):
|
466
|
+
element = context._target._handleSaxStart(tag, attrib, nsmap)
|
467
|
+
if element is not None and c_ctxt.input is not NULL:
|
468
|
+
if isinstance(element, _Element):
|
469
|
+
(<_Element>element)._c_node.line = (
|
470
|
+
<unsigned short>c_ctxt.input.line
|
471
|
+
if c_ctxt.input.line < 65535 else 65535)
|
472
|
+
return element
|
473
|
+
|
474
|
+
|
475
|
+
cdef int _pushSaxStartEvent(_SaxParserContext context,
|
476
|
+
xmlparser.xmlParserCtxt* c_ctxt,
|
477
|
+
const_xmlChar* c_href,
|
478
|
+
const_xmlChar* c_name, node) except -1:
|
479
|
+
if (context._matcher is None or
|
480
|
+
context._matcher.matchesNsTag(c_href, c_name)):
|
481
|
+
if node is None and context._target is None:
|
482
|
+
assert context._doc is not None
|
483
|
+
node = _elementFactory(context._doc, c_ctxt.node)
|
484
|
+
if context._event_filter & PARSE_EVENT_FILTER_START:
|
485
|
+
context.events_iterator._events.append(('start', node))
|
486
|
+
if (context._target is None and
|
487
|
+
context._event_filter & PARSE_EVENT_FILTER_END):
|
488
|
+
context._node_stack.append(node)
|
489
|
+
return 0
|
490
|
+
|
491
|
+
|
492
|
+
cdef void _handleSaxEnd(void* ctxt, const_xmlChar* c_localname,
|
493
|
+
const_xmlChar* c_prefix,
|
494
|
+
const_xmlChar* c_namespace) noexcept with gil:
|
495
|
+
c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
|
496
|
+
if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
|
497
|
+
return
|
498
|
+
context = <_SaxParserContext>c_ctxt._private
|
499
|
+
try:
|
500
|
+
if context._target is not None:
|
501
|
+
if context._target._sax_event_filter & SAX_EVENT_END:
|
502
|
+
node = context._target._handleSaxEnd(
|
503
|
+
_namespacedNameFromNsName(c_namespace, c_localname))
|
504
|
+
else:
|
505
|
+
node = None
|
506
|
+
else:
|
507
|
+
context._origSaxEnd(c_ctxt, c_localname, c_prefix, c_namespace)
|
508
|
+
node = None
|
509
|
+
_pushSaxEndEvent(context, c_namespace, c_localname, node)
|
510
|
+
_pushSaxNsEndEvents(context)
|
511
|
+
except:
|
512
|
+
context._handleSaxException(c_ctxt)
|
513
|
+
finally:
|
514
|
+
return # swallow any further exceptions
|
515
|
+
|
516
|
+
|
517
|
+
cdef void _handleSaxEndNoNs(void* ctxt, const_xmlChar* c_name) noexcept with gil:
|
518
|
+
c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
|
519
|
+
if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
|
520
|
+
return
|
521
|
+
context = <_SaxParserContext>c_ctxt._private
|
522
|
+
try:
|
523
|
+
if context._target is not None:
|
524
|
+
node = context._target._handleSaxEnd(funicode(c_name))
|
525
|
+
else:
|
526
|
+
context._origSaxEndNoNs(c_ctxt, c_name)
|
527
|
+
node = None
|
528
|
+
_pushSaxEndEvent(context, NULL, c_name, node)
|
529
|
+
except:
|
530
|
+
context._handleSaxException(c_ctxt)
|
531
|
+
finally:
|
532
|
+
return # swallow any further exceptions
|
533
|
+
|
534
|
+
|
535
|
+
cdef int _pushSaxNsEndEvents(_SaxParserContext context) except -1:
|
536
|
+
cdef bint build_events = context._event_filter & PARSE_EVENT_FILTER_END_NS
|
537
|
+
cdef bint call_target = (
|
538
|
+
context._target is not None
|
539
|
+
and context._target._sax_event_filter & SAX_EVENT_END_NS)
|
540
|
+
if not build_events and not call_target:
|
541
|
+
return 0
|
542
|
+
|
543
|
+
cdef list declared_namespaces = context._ns_stack.pop()
|
544
|
+
if declared_namespaces is None:
|
545
|
+
return 0
|
546
|
+
|
547
|
+
cdef tuple prefix_uri
|
548
|
+
for prefix_uri in reversed(declared_namespaces):
|
549
|
+
if call_target:
|
550
|
+
context._target._handleSaxEndNs(prefix_uri[0])
|
551
|
+
if build_events:
|
552
|
+
context.events_iterator._events.append(('end-ns', None))
|
553
|
+
|
554
|
+
return 0
|
555
|
+
|
556
|
+
|
557
|
+
cdef int _pushSaxEndEvent(_SaxParserContext context,
|
558
|
+
const_xmlChar* c_href,
|
559
|
+
const_xmlChar* c_name, node) except -1:
|
560
|
+
if context._event_filter & PARSE_EVENT_FILTER_END:
|
561
|
+
if (context._matcher is None or
|
562
|
+
context._matcher.matchesNsTag(c_href, c_name)):
|
563
|
+
if context._target is None:
|
564
|
+
node = context._node_stack.pop()
|
565
|
+
context.events_iterator._events.append(('end', node))
|
566
|
+
return 0
|
567
|
+
|
568
|
+
|
569
|
+
cdef void _handleSaxData(void* ctxt, const_xmlChar* c_data, int data_len) noexcept with gil:
|
570
|
+
# can only be called if parsing with a target
|
571
|
+
c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
|
572
|
+
if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
|
573
|
+
return
|
574
|
+
context = <_SaxParserContext>c_ctxt._private
|
575
|
+
try:
|
576
|
+
context._target._handleSaxData(
|
577
|
+
c_data[:data_len].decode('utf8'))
|
578
|
+
except:
|
579
|
+
context._handleSaxException(c_ctxt)
|
580
|
+
finally:
|
581
|
+
return # swallow any further exceptions
|
582
|
+
|
583
|
+
|
584
|
+
cdef void _handleSaxTargetDoctype(void* ctxt, const_xmlChar* c_name,
|
585
|
+
const_xmlChar* c_public,
|
586
|
+
const_xmlChar* c_system) noexcept with gil:
|
587
|
+
# can only be called if parsing with a target
|
588
|
+
c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
|
589
|
+
if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
|
590
|
+
return
|
591
|
+
context = <_SaxParserContext>c_ctxt._private
|
592
|
+
try:
|
593
|
+
context._target._handleSaxDoctype(
|
594
|
+
funicodeOrNone(c_name),
|
595
|
+
funicodeOrNone(c_public),
|
596
|
+
funicodeOrNone(c_system))
|
597
|
+
except:
|
598
|
+
context._handleSaxException(c_ctxt)
|
599
|
+
finally:
|
600
|
+
return # swallow any further exceptions
|
601
|
+
|
602
|
+
|
603
|
+
cdef void _handleSaxStartDocument(void* ctxt) noexcept with gil:
|
604
|
+
c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
|
605
|
+
if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
|
606
|
+
return
|
607
|
+
context = <_SaxParserContext>c_ctxt._private
|
608
|
+
context._origSaxStartDocument(ctxt)
|
609
|
+
c_doc = c_ctxt.myDoc
|
610
|
+
try:
|
611
|
+
context.startDocument(c_doc)
|
612
|
+
except:
|
613
|
+
context._handleSaxException(c_ctxt)
|
614
|
+
finally:
|
615
|
+
return # swallow any further exceptions
|
616
|
+
|
617
|
+
|
618
|
+
cdef void _handleSaxTargetPI(void* ctxt, const_xmlChar* c_target,
|
619
|
+
const_xmlChar* c_data) noexcept with gil:
|
620
|
+
# can only be called if parsing with a target
|
621
|
+
c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
|
622
|
+
if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
|
623
|
+
return
|
624
|
+
context = <_SaxParserContext>c_ctxt._private
|
625
|
+
try:
|
626
|
+
pi = context._target._handleSaxPi(
|
627
|
+
funicodeOrNone(c_target),
|
628
|
+
funicodeOrEmpty(c_data))
|
629
|
+
if context._event_filter & PARSE_EVENT_FILTER_PI:
|
630
|
+
context.events_iterator._events.append(('pi', pi))
|
631
|
+
except:
|
632
|
+
context._handleSaxException(c_ctxt)
|
633
|
+
finally:
|
634
|
+
return # swallow any further exceptions
|
635
|
+
|
636
|
+
|
637
|
+
cdef void _handleSaxPIEvent(void* ctxt, const_xmlChar* target,
|
638
|
+
const_xmlChar* data) noexcept with gil:
|
639
|
+
# can only be called when collecting pi events
|
640
|
+
c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
|
641
|
+
if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
|
642
|
+
return
|
643
|
+
context = <_SaxParserContext>c_ctxt._private
|
644
|
+
context._origSaxPI(ctxt, target, data)
|
645
|
+
c_node = _findLastEventNode(c_ctxt)
|
646
|
+
if c_node is NULL:
|
647
|
+
return
|
648
|
+
try:
|
649
|
+
context.pushEvent('pi', c_node)
|
650
|
+
except:
|
651
|
+
context._handleSaxException(c_ctxt)
|
652
|
+
finally:
|
653
|
+
return # swallow any further exceptions
|
654
|
+
|
655
|
+
|
656
|
+
cdef void _handleSaxTargetComment(void* ctxt, const_xmlChar* c_data) noexcept with gil:
|
657
|
+
# can only be called if parsing with a target
|
658
|
+
c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
|
659
|
+
if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
|
660
|
+
return
|
661
|
+
context = <_SaxParserContext>c_ctxt._private
|
662
|
+
try:
|
663
|
+
comment = context._target._handleSaxComment(funicodeOrEmpty(c_data))
|
664
|
+
if context._event_filter & PARSE_EVENT_FILTER_COMMENT:
|
665
|
+
context.events_iterator._events.append(('comment', comment))
|
666
|
+
except:
|
667
|
+
context._handleSaxException(c_ctxt)
|
668
|
+
finally:
|
669
|
+
return # swallow any further exceptions
|
670
|
+
|
671
|
+
|
672
|
+
cdef void _handleSaxComment(void* ctxt, const_xmlChar* text) noexcept with gil:
|
673
|
+
# can only be called when collecting comment events
|
674
|
+
c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
|
675
|
+
if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
|
676
|
+
return
|
677
|
+
context = <_SaxParserContext>c_ctxt._private
|
678
|
+
context._origSaxComment(ctxt, text)
|
679
|
+
c_node = _findLastEventNode(c_ctxt)
|
680
|
+
if c_node is NULL:
|
681
|
+
return
|
682
|
+
try:
|
683
|
+
context.pushEvent('comment', c_node)
|
684
|
+
except:
|
685
|
+
context._handleSaxException(c_ctxt)
|
686
|
+
finally:
|
687
|
+
return # swallow any further exceptions
|
688
|
+
|
689
|
+
|
690
|
+
cdef inline xmlNode* _findLastEventNode(xmlparser.xmlParserCtxt* c_ctxt):
|
691
|
+
# this mimics what libxml2 creates for comments/PIs
|
692
|
+
if c_ctxt.inSubset == 1:
|
693
|
+
return c_ctxt.myDoc.intSubset.last
|
694
|
+
elif c_ctxt.inSubset == 2:
|
695
|
+
return c_ctxt.myDoc.extSubset.last
|
696
|
+
elif c_ctxt.node is NULL:
|
697
|
+
return c_ctxt.myDoc.last
|
698
|
+
elif c_ctxt.node.type == tree.XML_ELEMENT_NODE:
|
699
|
+
return c_ctxt.node.last
|
700
|
+
else:
|
701
|
+
return c_ctxt.node.next
|
702
|
+
|
703
|
+
|
704
|
+
############################################################
|
705
|
+
## ET compatible XML tree builder
|
706
|
+
############################################################
|
707
|
+
|
708
|
+
cdef class TreeBuilder(_SaxParserTarget):
|
709
|
+
"""TreeBuilder(self, element_factory=None, parser=None,
|
710
|
+
comment_factory=None, pi_factory=None,
|
711
|
+
insert_comments=True, insert_pis=True)
|
712
|
+
|
713
|
+
Parser target that builds a tree from parse event callbacks.
|
714
|
+
|
715
|
+
The factory arguments can be used to influence the creation of
|
716
|
+
elements, comments and processing instructions.
|
717
|
+
|
718
|
+
By default, comments and processing instructions are inserted into
|
719
|
+
the tree, but they can be ignored by passing the respective flags.
|
720
|
+
|
721
|
+
The final tree is returned by the ``close()`` method.
|
722
|
+
"""
|
723
|
+
cdef _BaseParser _parser
|
724
|
+
cdef object _factory
|
725
|
+
cdef object _comment_factory
|
726
|
+
cdef object _pi_factory
|
727
|
+
cdef list _data
|
728
|
+
cdef list _element_stack
|
729
|
+
cdef object _element_stack_pop
|
730
|
+
cdef _Element _last # may be None
|
731
|
+
cdef bint _in_tail
|
732
|
+
cdef bint _insert_comments
|
733
|
+
cdef bint _insert_pis
|
734
|
+
|
735
|
+
def __init__(self, *, element_factory=None, parser=None,
|
736
|
+
comment_factory=None, pi_factory=None,
|
737
|
+
bint insert_comments=True, bint insert_pis=True):
|
738
|
+
self._sax_event_filter = \
|
739
|
+
SAX_EVENT_START | SAX_EVENT_END | SAX_EVENT_DATA | \
|
740
|
+
SAX_EVENT_PI | SAX_EVENT_COMMENT
|
741
|
+
self._data = [] # data collector
|
742
|
+
self._element_stack = [] # element stack
|
743
|
+
self._element_stack_pop = self._element_stack.pop
|
744
|
+
self._last = None # last element
|
745
|
+
self._in_tail = 0 # true if we're after an end tag
|
746
|
+
self._factory = element_factory
|
747
|
+
self._comment_factory = comment_factory if comment_factory is not None else Comment
|
748
|
+
self._pi_factory = pi_factory if pi_factory is not None else ProcessingInstruction
|
749
|
+
self._insert_comments = insert_comments
|
750
|
+
self._insert_pis = insert_pis
|
751
|
+
self._parser = parser
|
752
|
+
|
753
|
+
@cython.final
|
754
|
+
cdef int _flush(self) except -1:
|
755
|
+
if self._data:
|
756
|
+
if self._last is not None:
|
757
|
+
text = "".join(self._data)
|
758
|
+
if self._in_tail:
|
759
|
+
assert self._last.tail is None, "internal error (tail)"
|
760
|
+
self._last.tail = text
|
761
|
+
else:
|
762
|
+
assert self._last.text is None, "internal error (text)"
|
763
|
+
self._last.text = text
|
764
|
+
del self._data[:]
|
765
|
+
return 0
|
766
|
+
|
767
|
+
# internal SAX event handlers
|
768
|
+
|
769
|
+
@cython.final
|
770
|
+
cdef _handleSaxStart(self, tag, attrib, nsmap):
|
771
|
+
self._flush()
|
772
|
+
if self._factory is not None:
|
773
|
+
self._last = self._factory(tag, attrib)
|
774
|
+
if self._element_stack:
|
775
|
+
_appendChild(self._element_stack[-1], self._last)
|
776
|
+
elif self._element_stack:
|
777
|
+
self._last = _makeSubElement(
|
778
|
+
self._element_stack[-1], tag, None, None, attrib, nsmap, None)
|
779
|
+
else:
|
780
|
+
self._last = _makeElement(
|
781
|
+
tag, NULL, None, self._parser, None, None, attrib, nsmap, None)
|
782
|
+
self._element_stack.append(self._last)
|
783
|
+
self._in_tail = 0
|
784
|
+
return self._last
|
785
|
+
|
786
|
+
@cython.final
|
787
|
+
cdef _handleSaxEnd(self, tag):
|
788
|
+
self._flush()
|
789
|
+
self._last = self._element_stack_pop()
|
790
|
+
self._in_tail = 1
|
791
|
+
return self._last
|
792
|
+
|
793
|
+
@cython.final
|
794
|
+
cdef int _handleSaxData(self, data) except -1:
|
795
|
+
self._data.append(data)
|
796
|
+
|
797
|
+
@cython.final
|
798
|
+
cdef _handleSaxPi(self, target, data):
|
799
|
+
elem = self._pi_factory(target, data)
|
800
|
+
if self._insert_pis:
|
801
|
+
self._flush()
|
802
|
+
self._last = elem
|
803
|
+
if self._element_stack:
|
804
|
+
_appendChild(self._element_stack[-1], self._last)
|
805
|
+
self._in_tail = 1
|
806
|
+
return self._last
|
807
|
+
|
808
|
+
@cython.final
|
809
|
+
cdef _handleSaxComment(self, comment):
|
810
|
+
elem = self._comment_factory(comment)
|
811
|
+
if self._insert_comments:
|
812
|
+
self._flush()
|
813
|
+
self._last = elem
|
814
|
+
if self._element_stack:
|
815
|
+
_appendChild(self._element_stack[-1], self._last)
|
816
|
+
self._in_tail = 1
|
817
|
+
return elem
|
818
|
+
|
819
|
+
# Python level event handlers
|
820
|
+
|
821
|
+
def close(self):
|
822
|
+
"""close(self)
|
823
|
+
|
824
|
+
Flushes the builder buffers, and returns the toplevel document
|
825
|
+
element. Raises XMLSyntaxError on inconsistencies.
|
826
|
+
"""
|
827
|
+
if self._element_stack:
|
828
|
+
raise XMLSyntaxAssertionError("missing end tags")
|
829
|
+
# TODO: this does not necessarily seem like an error case. Why not just return None?
|
830
|
+
if self._last is None:
|
831
|
+
raise XMLSyntaxAssertionError("missing toplevel element")
|
832
|
+
return self._last
|
833
|
+
|
834
|
+
def data(self, data):
|
835
|
+
"""data(self, data)
|
836
|
+
|
837
|
+
Adds text to the current element. The value should be either an
|
838
|
+
8-bit string containing ASCII text, or a Unicode string.
|
839
|
+
"""
|
840
|
+
self._handleSaxData(data)
|
841
|
+
|
842
|
+
def start(self, tag, attrs, nsmap=None):
|
843
|
+
"""start(self, tag, attrs, nsmap=None)
|
844
|
+
|
845
|
+
Opens a new element.
|
846
|
+
"""
|
847
|
+
if nsmap is None:
|
848
|
+
nsmap = IMMUTABLE_EMPTY_MAPPING
|
849
|
+
return self._handleSaxStart(tag, attrs, nsmap)
|
850
|
+
|
851
|
+
def end(self, tag):
|
852
|
+
"""end(self, tag)
|
853
|
+
|
854
|
+
Closes the current element.
|
855
|
+
"""
|
856
|
+
element = self._handleSaxEnd(tag)
|
857
|
+
assert self._last.tag == tag,\
|
858
|
+
f"end tag mismatch (expected {self._last.tag}, got {tag})"
|
859
|
+
return element
|
860
|
+
|
861
|
+
def pi(self, target, data=None):
|
862
|
+
"""pi(self, target, data=None)
|
863
|
+
|
864
|
+
Creates a processing instruction using the factory, appends it
|
865
|
+
(unless disabled) and returns it.
|
866
|
+
"""
|
867
|
+
return self._handleSaxPi(target, data)
|
868
|
+
|
869
|
+
def comment(self, comment):
|
870
|
+
"""comment(self, comment)
|
871
|
+
|
872
|
+
Creates a comment using the factory, appends it (unless disabled)
|
873
|
+
and returns it.
|
874
|
+
"""
|
875
|
+
return self._handleSaxComment(comment)
|