lxml 6.0.0__cp310-cp310-win_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lxml/ElementInclude.py +244 -0
- lxml/__init__.py +22 -0
- lxml/_elementpath.cp310-win_arm64.pyd +0 -0
- lxml/_elementpath.py +343 -0
- lxml/apihelpers.pxi +1801 -0
- lxml/builder.cp310-win_arm64.pyd +0 -0
- lxml/builder.py +243 -0
- lxml/classlookup.pxi +580 -0
- lxml/cleanup.pxi +215 -0
- lxml/cssselect.py +101 -0
- lxml/debug.pxi +36 -0
- lxml/docloader.pxi +178 -0
- lxml/doctestcompare.py +488 -0
- lxml/dtd.pxi +479 -0
- lxml/etree.cp310-win_arm64.pyd +0 -0
- lxml/etree.h +244 -0
- lxml/etree.pyx +3853 -0
- lxml/etree_api.h +204 -0
- lxml/extensions.pxi +830 -0
- lxml/html/ElementSoup.py +10 -0
- lxml/html/__init__.py +1927 -0
- lxml/html/_diffcommand.py +86 -0
- lxml/html/_difflib.cp310-win_arm64.pyd +0 -0
- lxml/html/_difflib.py +2106 -0
- lxml/html/_html5builder.py +100 -0
- lxml/html/_setmixin.py +56 -0
- lxml/html/builder.py +173 -0
- lxml/html/clean.py +21 -0
- lxml/html/defs.py +135 -0
- lxml/html/diff.cp310-win_arm64.pyd +0 -0
- lxml/html/diff.py +972 -0
- lxml/html/formfill.py +299 -0
- lxml/html/html5parser.py +260 -0
- lxml/html/soupparser.py +314 -0
- lxml/html/usedoctest.py +13 -0
- lxml/includes/__init__.pxd +0 -0
- lxml/includes/__init__.py +0 -0
- lxml/includes/c14n.pxd +25 -0
- lxml/includes/config.pxd +3 -0
- lxml/includes/dtdvalid.pxd +18 -0
- lxml/includes/etree_defs.h +379 -0
- lxml/includes/etreepublic.pxd +237 -0
- lxml/includes/extlibs/__init__.py +0 -0
- lxml/includes/extlibs/zconf.h +543 -0
- lxml/includes/extlibs/zlib.h +1938 -0
- lxml/includes/htmlparser.pxd +56 -0
- lxml/includes/libexslt/__init__.py +0 -0
- lxml/includes/libexslt/exslt.h +108 -0
- lxml/includes/libexslt/exsltconfig.h +70 -0
- lxml/includes/libexslt/exsltexports.h +63 -0
- lxml/includes/libexslt/libexslt.h +29 -0
- lxml/includes/libxml/HTMLparser.h +320 -0
- lxml/includes/libxml/HTMLtree.h +147 -0
- lxml/includes/libxml/SAX.h +204 -0
- lxml/includes/libxml/SAX2.h +173 -0
- lxml/includes/libxml/__init__.py +0 -0
- lxml/includes/libxml/c14n.h +128 -0
- lxml/includes/libxml/catalog.h +182 -0
- lxml/includes/libxml/chvalid.h +230 -0
- lxml/includes/libxml/debugXML.h +217 -0
- lxml/includes/libxml/dict.h +81 -0
- lxml/includes/libxml/encoding.h +233 -0
- lxml/includes/libxml/entities.h +151 -0
- lxml/includes/libxml/globals.h +529 -0
- lxml/includes/libxml/hash.h +236 -0
- lxml/includes/libxml/list.h +137 -0
- lxml/includes/libxml/nanoftp.h +186 -0
- lxml/includes/libxml/nanohttp.h +81 -0
- lxml/includes/libxml/parser.h +1265 -0
- lxml/includes/libxml/parserInternals.h +662 -0
- lxml/includes/libxml/pattern.h +100 -0
- lxml/includes/libxml/relaxng.h +218 -0
- lxml/includes/libxml/schemasInternals.h +958 -0
- lxml/includes/libxml/schematron.h +142 -0
- lxml/includes/libxml/threads.h +94 -0
- lxml/includes/libxml/tree.h +1314 -0
- lxml/includes/libxml/uri.h +94 -0
- lxml/includes/libxml/valid.h +448 -0
- lxml/includes/libxml/xinclude.h +129 -0
- lxml/includes/libxml/xlink.h +189 -0
- lxml/includes/libxml/xmlIO.h +369 -0
- lxml/includes/libxml/xmlautomata.h +146 -0
- lxml/includes/libxml/xmlerror.h +919 -0
- lxml/includes/libxml/xmlexports.h +50 -0
- lxml/includes/libxml/xmlmemory.h +228 -0
- lxml/includes/libxml/xmlmodule.h +57 -0
- lxml/includes/libxml/xmlreader.h +428 -0
- lxml/includes/libxml/xmlregexp.h +222 -0
- lxml/includes/libxml/xmlsave.h +88 -0
- lxml/includes/libxml/xmlschemas.h +246 -0
- lxml/includes/libxml/xmlschemastypes.h +152 -0
- lxml/includes/libxml/xmlstring.h +140 -0
- lxml/includes/libxml/xmlunicode.h +202 -0
- lxml/includes/libxml/xmlversion.h +526 -0
- lxml/includes/libxml/xmlwriter.h +488 -0
- lxml/includes/libxml/xpath.h +575 -0
- lxml/includes/libxml/xpathInternals.h +632 -0
- lxml/includes/libxml/xpointer.h +137 -0
- lxml/includes/libxslt/__init__.py +0 -0
- lxml/includes/libxslt/attributes.h +39 -0
- lxml/includes/libxslt/documents.h +93 -0
- lxml/includes/libxslt/extensions.h +262 -0
- lxml/includes/libxslt/extra.h +72 -0
- lxml/includes/libxslt/functions.h +78 -0
- lxml/includes/libxslt/imports.h +75 -0
- lxml/includes/libxslt/keys.h +53 -0
- lxml/includes/libxslt/libxslt.h +36 -0
- lxml/includes/libxslt/namespaces.h +68 -0
- lxml/includes/libxslt/numbersInternals.h +73 -0
- lxml/includes/libxslt/preproc.h +43 -0
- lxml/includes/libxslt/security.h +104 -0
- lxml/includes/libxslt/templates.h +77 -0
- lxml/includes/libxslt/transform.h +207 -0
- lxml/includes/libxslt/trio.h +216 -0
- lxml/includes/libxslt/triodef.h +220 -0
- lxml/includes/libxslt/variables.h +118 -0
- lxml/includes/libxslt/win32config.h +51 -0
- lxml/includes/libxslt/xslt.h +110 -0
- lxml/includes/libxslt/xsltInternals.h +1992 -0
- lxml/includes/libxslt/xsltconfig.h +179 -0
- lxml/includes/libxslt/xsltexports.h +64 -0
- lxml/includes/libxslt/xsltlocale.h +44 -0
- lxml/includes/libxslt/xsltutils.h +343 -0
- lxml/includes/lxml-version.h +3 -0
- lxml/includes/relaxng.pxd +64 -0
- lxml/includes/schematron.pxd +34 -0
- lxml/includes/tree.pxd +492 -0
- lxml/includes/uri.pxd +5 -0
- lxml/includes/xinclude.pxd +22 -0
- lxml/includes/xmlerror.pxd +852 -0
- lxml/includes/xmlparser.pxd +303 -0
- lxml/includes/xmlschema.pxd +35 -0
- lxml/includes/xpath.pxd +136 -0
- lxml/includes/xslt.pxd +190 -0
- lxml/isoschematron/__init__.py +348 -0
- lxml/isoschematron/resources/rng/iso-schematron.rng +709 -0
- lxml/isoschematron/resources/xsl/RNG2Schtrn.xsl +75 -0
- lxml/isoschematron/resources/xsl/XSD2Schtrn.xsl +77 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_abstract_expand.xsl +313 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_dsdl_include.xsl +1160 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_message.xsl +55 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_skeleton_for_xslt1.xsl +1796 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_svrl_for_xslt1.xsl +588 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt +84 -0
- lxml/iterparse.pxi +438 -0
- lxml/lxml.etree.h +244 -0
- lxml/lxml.etree_api.h +204 -0
- lxml/nsclasses.pxi +281 -0
- lxml/objectify.cp310-win_arm64.pyd +0 -0
- lxml/objectify.pyx +2149 -0
- lxml/objectpath.pxi +332 -0
- lxml/parser.pxi +2059 -0
- lxml/parsertarget.pxi +180 -0
- lxml/proxy.pxi +619 -0
- lxml/public-api.pxi +178 -0
- lxml/pyclasslookup.py +3 -0
- lxml/readonlytree.pxi +565 -0
- lxml/relaxng.pxi +165 -0
- lxml/sax.cp310-win_arm64.pyd +0 -0
- lxml/sax.py +286 -0
- lxml/saxparser.pxi +875 -0
- lxml/schematron.pxi +173 -0
- lxml/serializer.pxi +1849 -0
- lxml/usedoctest.py +13 -0
- lxml/xinclude.pxi +67 -0
- lxml/xmlerror.pxi +1654 -0
- lxml/xmlid.pxi +179 -0
- lxml/xmlschema.pxi +215 -0
- lxml/xpath.pxi +487 -0
- lxml/xslt.pxi +957 -0
- lxml/xsltext.pxi +242 -0
- lxml-6.0.0.dist-info/METADATA +163 -0
- lxml-6.0.0.dist-info/RECORD +177 -0
- lxml-6.0.0.dist-info/WHEEL +5 -0
- lxml-6.0.0.dist-info/licenses/LICENSE.txt +31 -0
- lxml-6.0.0.dist-info/licenses/LICENSES.txt +29 -0
- lxml-6.0.0.dist-info/top_level.txt +1 -0
lxml/iterparse.pxi
ADDED
@@ -0,0 +1,438 @@
|
|
1
|
+
# iterparse -- event-driven parsing
|
2
|
+
|
3
|
+
DEF __ITERPARSE_CHUNK_SIZE = 32768
|
4
|
+
|
5
|
+
cdef class iterparse:
|
6
|
+
"""iterparse(self, source, events=("end",), tag=None, \
|
7
|
+
attribute_defaults=False, dtd_validation=False, \
|
8
|
+
load_dtd=False, no_network=True, remove_blank_text=False, \
|
9
|
+
remove_comments=False, remove_pis=False, encoding=None, \
|
10
|
+
html=False, recover=None, huge_tree=False, schema=None)
|
11
|
+
|
12
|
+
Incremental parser.
|
13
|
+
|
14
|
+
Parses XML into a tree and generates tuples (event, element) in a
|
15
|
+
SAX-like fashion. ``event`` is any of 'start', 'end', 'start-ns',
|
16
|
+
'end-ns'.
|
17
|
+
|
18
|
+
For 'start' and 'end', ``element`` is the Element that the parser just
|
19
|
+
found opening or closing. For 'start-ns', it is a tuple (prefix, URI) of
|
20
|
+
a new namespace declaration. For 'end-ns', it is simply None. Note that
|
21
|
+
all start and end events are guaranteed to be properly nested.
|
22
|
+
|
23
|
+
The keyword argument ``events`` specifies a sequence of event type names
|
24
|
+
that should be generated. By default, only 'end' events will be
|
25
|
+
generated.
|
26
|
+
|
27
|
+
The additional ``tag`` argument restricts the 'start' and 'end' events to
|
28
|
+
those elements that match the given tag. The ``tag`` argument can also be
|
29
|
+
a sequence of tags to allow matching more than one tag. By default,
|
30
|
+
events are generated for all elements. Note that the 'start-ns' and
|
31
|
+
'end-ns' events are not impacted by this restriction.
|
32
|
+
|
33
|
+
The other keyword arguments in the constructor are mainly based on the
|
34
|
+
libxml2 parser configuration. A DTD will also be loaded if validation or
|
35
|
+
attribute default values are requested.
|
36
|
+
|
37
|
+
Available boolean keyword arguments:
|
38
|
+
- attribute_defaults: read default attributes from DTD
|
39
|
+
- dtd_validation: validate (if DTD is available)
|
40
|
+
- load_dtd: use DTD for parsing
|
41
|
+
- no_network: prevent network access for related files
|
42
|
+
- remove_blank_text: discard blank text nodes
|
43
|
+
- remove_comments: discard comments
|
44
|
+
- remove_pis: discard processing instructions
|
45
|
+
- strip_cdata: replace CDATA sections by normal text content (default:
|
46
|
+
True for XML, ignored otherwise)
|
47
|
+
- compact: safe memory for short text content (default: True)
|
48
|
+
- resolve_entities: replace entities by their text value (default: True)
|
49
|
+
- huge_tree: disable security restrictions and support very deep trees
|
50
|
+
and very long text content (only affects libxml2 2.7+)
|
51
|
+
- html: parse input as HTML (default: XML)
|
52
|
+
- recover: try hard to parse through broken input (default: True for HTML,
|
53
|
+
False otherwise)
|
54
|
+
|
55
|
+
Other keyword arguments:
|
56
|
+
- encoding: override the document encoding
|
57
|
+
- schema: an XMLSchema to validate against
|
58
|
+
"""
|
59
|
+
cdef _FeedParser _parser
|
60
|
+
cdef object _tag
|
61
|
+
cdef object _events
|
62
|
+
cdef readonly object root
|
63
|
+
cdef object _source
|
64
|
+
cdef object _filename
|
65
|
+
cdef object _error
|
66
|
+
cdef bint _close_source_after_read
|
67
|
+
|
68
|
+
def __init__(self, source, events=("end",), *, tag=None,
|
69
|
+
attribute_defaults=False, dtd_validation=False,
|
70
|
+
load_dtd=False, no_network=True, remove_blank_text=False,
|
71
|
+
compact=True, resolve_entities=True, remove_comments=False,
|
72
|
+
remove_pis=False, strip_cdata=True, encoding=None,
|
73
|
+
html=False, recover=None, huge_tree=False, collect_ids=True,
|
74
|
+
XMLSchema schema=None):
|
75
|
+
if not hasattr(source, 'read'):
|
76
|
+
source = _getFSPathOrObject(source)
|
77
|
+
self._filename = source
|
78
|
+
self._source = open(source, 'rb')
|
79
|
+
self._close_source_after_read = True
|
80
|
+
else:
|
81
|
+
self._filename = _getFilenameForFile(source)
|
82
|
+
self._source = source
|
83
|
+
self._close_source_after_read = False
|
84
|
+
|
85
|
+
if recover is None:
|
86
|
+
recover = html
|
87
|
+
|
88
|
+
if html:
|
89
|
+
# make sure we're not looking for namespaces
|
90
|
+
events = [event for event in events
|
91
|
+
if event not in ('start-ns', 'end-ns')]
|
92
|
+
parser = HTMLPullParser(
|
93
|
+
events,
|
94
|
+
tag=tag,
|
95
|
+
recover=recover,
|
96
|
+
base_url=self._filename,
|
97
|
+
encoding=encoding,
|
98
|
+
remove_blank_text=remove_blank_text,
|
99
|
+
remove_comments=remove_comments,
|
100
|
+
remove_pis=remove_pis,
|
101
|
+
no_network=no_network,
|
102
|
+
target=None, # TODO
|
103
|
+
schema=schema,
|
104
|
+
compact=compact)
|
105
|
+
else:
|
106
|
+
parser = XMLPullParser(
|
107
|
+
events,
|
108
|
+
tag=tag,
|
109
|
+
recover=recover,
|
110
|
+
base_url=self._filename,
|
111
|
+
encoding=encoding,
|
112
|
+
attribute_defaults=attribute_defaults,
|
113
|
+
dtd_validation=dtd_validation,
|
114
|
+
load_dtd=load_dtd,
|
115
|
+
no_network=no_network,
|
116
|
+
schema=schema,
|
117
|
+
huge_tree=huge_tree,
|
118
|
+
remove_blank_text=remove_blank_text,
|
119
|
+
resolve_entities=resolve_entities,
|
120
|
+
remove_comments=remove_comments,
|
121
|
+
remove_pis=remove_pis,
|
122
|
+
strip_cdata=strip_cdata,
|
123
|
+
collect_ids=True,
|
124
|
+
target=None, # TODO
|
125
|
+
compact=compact)
|
126
|
+
|
127
|
+
self._events = parser.read_events()
|
128
|
+
self._parser = parser
|
129
|
+
|
130
|
+
@property
|
131
|
+
def error_log(self):
|
132
|
+
"""The error log of the last (or current) parser run.
|
133
|
+
"""
|
134
|
+
return self._parser.feed_error_log
|
135
|
+
|
136
|
+
@property
|
137
|
+
def resolvers(self):
|
138
|
+
"""The custom resolver registry of the last (or current) parser run.
|
139
|
+
"""
|
140
|
+
return self._parser.resolvers
|
141
|
+
|
142
|
+
@property
|
143
|
+
def version(self):
|
144
|
+
"""The version of the underlying XML parser."""
|
145
|
+
return self._parser.version
|
146
|
+
|
147
|
+
def set_element_class_lookup(self, ElementClassLookup lookup = None):
|
148
|
+
"""set_element_class_lookup(self, lookup = None)
|
149
|
+
|
150
|
+
Set a lookup scheme for element classes generated from this parser.
|
151
|
+
|
152
|
+
Reset it by passing None or nothing.
|
153
|
+
"""
|
154
|
+
self._parser.set_element_class_lookup(lookup)
|
155
|
+
|
156
|
+
def makeelement(self, _tag, attrib=None, nsmap=None, **_extra):
|
157
|
+
"""makeelement(self, _tag, attrib=None, nsmap=None, **_extra)
|
158
|
+
|
159
|
+
Creates a new element associated with this parser.
|
160
|
+
"""
|
161
|
+
self._parser.makeelement(
|
162
|
+
_tag, attrib=None, nsmap=None, **_extra)
|
163
|
+
|
164
|
+
@cython.final
|
165
|
+
cdef _close_source(self):
|
166
|
+
if self._source is None:
|
167
|
+
return
|
168
|
+
if not self._close_source_after_read:
|
169
|
+
self._source = None
|
170
|
+
return
|
171
|
+
try:
|
172
|
+
close = self._source.close
|
173
|
+
except AttributeError:
|
174
|
+
close = None
|
175
|
+
finally:
|
176
|
+
self._source = None
|
177
|
+
if close is not None:
|
178
|
+
close()
|
179
|
+
|
180
|
+
def __iter__(self):
|
181
|
+
return self
|
182
|
+
|
183
|
+
def __next__(self):
|
184
|
+
try:
|
185
|
+
return next(self._events)
|
186
|
+
except StopIteration:
|
187
|
+
pass
|
188
|
+
context = <_SaxParserContext>self._parser._getPushParserContext()
|
189
|
+
if self._source is not None:
|
190
|
+
done = False
|
191
|
+
while not done:
|
192
|
+
try:
|
193
|
+
done = self._read_more_events(context)
|
194
|
+
return next(self._events)
|
195
|
+
except StopIteration:
|
196
|
+
pass # no events yet
|
197
|
+
except Exception as e:
|
198
|
+
self._error = e
|
199
|
+
self._close_source()
|
200
|
+
try:
|
201
|
+
return next(self._events)
|
202
|
+
except StopIteration:
|
203
|
+
break
|
204
|
+
# nothing left to read or return
|
205
|
+
if self._error is not None:
|
206
|
+
error = self._error
|
207
|
+
self._error = None
|
208
|
+
raise error
|
209
|
+
if (context._validator is not None
|
210
|
+
and not context._validator.isvalid()):
|
211
|
+
_raiseParseError(context._c_ctxt, self._filename,
|
212
|
+
context._error_log)
|
213
|
+
# no errors => all done
|
214
|
+
raise StopIteration
|
215
|
+
|
216
|
+
@cython.final
|
217
|
+
cdef bint _read_more_events(self, _SaxParserContext context) except -123:
|
218
|
+
data = self._source.read(__ITERPARSE_CHUNK_SIZE)
|
219
|
+
if not isinstance(data, bytes):
|
220
|
+
self._close_source()
|
221
|
+
raise TypeError("reading file objects must return bytes objects")
|
222
|
+
if not data:
|
223
|
+
try:
|
224
|
+
self.root = self._parser.close()
|
225
|
+
finally:
|
226
|
+
self._close_source()
|
227
|
+
return True
|
228
|
+
self._parser.feed(data)
|
229
|
+
return False
|
230
|
+
|
231
|
+
|
232
|
+
cdef enum _IterwalkSkipStates:
|
233
|
+
IWSKIP_NEXT_IS_START
|
234
|
+
IWSKIP_SKIP_NEXT
|
235
|
+
IWSKIP_CAN_SKIP
|
236
|
+
IWSKIP_CANNOT_SKIP
|
237
|
+
|
238
|
+
|
239
|
+
cdef class iterwalk:
|
240
|
+
"""iterwalk(self, element_or_tree, events=("end",), tag=None)
|
241
|
+
|
242
|
+
A tree walker that generates events from an existing tree as if it
|
243
|
+
was parsing XML data with ``iterparse()``.
|
244
|
+
|
245
|
+
Just as for ``iterparse()``, the ``tag`` argument can be a single tag or a
|
246
|
+
sequence of tags.
|
247
|
+
|
248
|
+
After receiving a 'start' or 'start-ns' event, the children and
|
249
|
+
descendants of the current element can be excluded from iteration
|
250
|
+
by calling the ``skip_subtree()`` method.
|
251
|
+
"""
|
252
|
+
cdef _MultiTagMatcher _matcher
|
253
|
+
cdef list _node_stack
|
254
|
+
cdef list _events
|
255
|
+
cdef object _pop_event
|
256
|
+
cdef object _include_siblings
|
257
|
+
cdef int _index
|
258
|
+
cdef int _event_filter
|
259
|
+
cdef _IterwalkSkipStates _skip_state
|
260
|
+
|
261
|
+
def __init__(self, element_or_tree, events=("end",), tag=None):
|
262
|
+
cdef _Element root
|
263
|
+
cdef int ns_count
|
264
|
+
root = _rootNodeOrRaise(element_or_tree)
|
265
|
+
self._event_filter = _buildParseEventFilter(events)
|
266
|
+
if tag is None or tag == '*':
|
267
|
+
self._matcher = None
|
268
|
+
else:
|
269
|
+
self._matcher = _MultiTagMatcher.__new__(_MultiTagMatcher, tag)
|
270
|
+
self._node_stack = []
|
271
|
+
self._events = []
|
272
|
+
self._pop_event = self._events.pop
|
273
|
+
self._skip_state = IWSKIP_CANNOT_SKIP # ignore all skip requests by default
|
274
|
+
|
275
|
+
if self._event_filter:
|
276
|
+
self._index = 0
|
277
|
+
if self._matcher is not None and self._event_filter & PARSE_EVENT_FILTER_START:
|
278
|
+
self._matcher.cacheTags(root._doc)
|
279
|
+
|
280
|
+
# When processing an ElementTree, add events for the preceding comments/PIs.
|
281
|
+
if self._event_filter & (PARSE_EVENT_FILTER_COMMENT | PARSE_EVENT_FILTER_PI):
|
282
|
+
if isinstance(element_or_tree, _ElementTree):
|
283
|
+
self._include_siblings = root
|
284
|
+
for elem in list(root.itersiblings(preceding=True))[::-1]:
|
285
|
+
if self._event_filter & PARSE_EVENT_FILTER_COMMENT and elem.tag is Comment:
|
286
|
+
self._events.append(('comment', elem))
|
287
|
+
elif self._event_filter & PARSE_EVENT_FILTER_PI and elem.tag is PI:
|
288
|
+
self._events.append(('pi', elem))
|
289
|
+
|
290
|
+
ns_count = self._start_node(root)
|
291
|
+
self._node_stack.append( (root, ns_count) )
|
292
|
+
else:
|
293
|
+
self._index = -1
|
294
|
+
|
295
|
+
def __iter__(self):
|
296
|
+
return self
|
297
|
+
|
298
|
+
def __next__(self):
|
299
|
+
cdef xmlNode* c_child
|
300
|
+
cdef _Element node
|
301
|
+
cdef _Element next_node
|
302
|
+
cdef int ns_count = 0
|
303
|
+
if self._events:
|
304
|
+
return self._next_event()
|
305
|
+
if self._matcher is not None and self._index >= 0:
|
306
|
+
node = self._node_stack[self._index][0]
|
307
|
+
self._matcher.cacheTags(node._doc)
|
308
|
+
|
309
|
+
# find next node
|
310
|
+
while self._index >= 0:
|
311
|
+
node = self._node_stack[self._index][0]
|
312
|
+
|
313
|
+
if self._skip_state == IWSKIP_SKIP_NEXT:
|
314
|
+
c_child = NULL
|
315
|
+
else:
|
316
|
+
c_child = self._process_non_elements(
|
317
|
+
node._doc, _findChildForwards(node._c_node, 0))
|
318
|
+
self._skip_state = IWSKIP_CANNOT_SKIP
|
319
|
+
|
320
|
+
while c_child is NULL:
|
321
|
+
# back off through parents
|
322
|
+
self._index -= 1
|
323
|
+
node = self._end_node()
|
324
|
+
if self._index < 0:
|
325
|
+
break
|
326
|
+
c_child = self._process_non_elements(
|
327
|
+
node._doc, _nextElement(node._c_node))
|
328
|
+
|
329
|
+
if c_child is not NULL:
|
330
|
+
next_node = _elementFactory(node._doc, c_child)
|
331
|
+
if self._event_filter & (PARSE_EVENT_FILTER_START |
|
332
|
+
PARSE_EVENT_FILTER_START_NS):
|
333
|
+
ns_count = self._start_node(next_node)
|
334
|
+
elif self._event_filter & PARSE_EVENT_FILTER_END_NS:
|
335
|
+
ns_count = _countNsDefs(next_node._c_node)
|
336
|
+
self._node_stack.append( (next_node, ns_count) )
|
337
|
+
self._index += 1
|
338
|
+
if self._events:
|
339
|
+
return self._next_event()
|
340
|
+
|
341
|
+
if self._include_siblings is not None:
|
342
|
+
node, self._include_siblings = self._include_siblings, None
|
343
|
+
self._process_non_elements(node._doc, _nextElement(node._c_node))
|
344
|
+
if self._events:
|
345
|
+
return self._next_event()
|
346
|
+
|
347
|
+
raise StopIteration
|
348
|
+
|
349
|
+
@cython.final
|
350
|
+
cdef xmlNode* _process_non_elements(self, _Document doc, xmlNode* c_node):
|
351
|
+
while c_node is not NULL and c_node.type != tree.XML_ELEMENT_NODE:
|
352
|
+
if c_node.type == tree.XML_COMMENT_NODE:
|
353
|
+
if self._event_filter & PARSE_EVENT_FILTER_COMMENT:
|
354
|
+
self._events.append(
|
355
|
+
("comment", _elementFactory(doc, c_node)))
|
356
|
+
c_node = _nextElement(c_node)
|
357
|
+
elif c_node.type == tree.XML_PI_NODE:
|
358
|
+
if self._event_filter & PARSE_EVENT_FILTER_PI:
|
359
|
+
self._events.append(
|
360
|
+
("pi", _elementFactory(doc, c_node)))
|
361
|
+
c_node = _nextElement(c_node)
|
362
|
+
else:
|
363
|
+
break
|
364
|
+
return c_node
|
365
|
+
|
366
|
+
@cython.final
|
367
|
+
cdef _next_event(self):
|
368
|
+
if self._skip_state == IWSKIP_NEXT_IS_START:
|
369
|
+
if self._events[0][0] in ('start', 'start-ns'):
|
370
|
+
self._skip_state = IWSKIP_CAN_SKIP
|
371
|
+
return self._pop_event(0)
|
372
|
+
|
373
|
+
def skip_subtree(self):
|
374
|
+
"""Prevent descending into the current subtree.
|
375
|
+
Instead, the next returned event will be the 'end' event of the current element
|
376
|
+
(if included), ignoring any children or descendants.
|
377
|
+
|
378
|
+
This has no effect right after an 'end' or 'end-ns' event.
|
379
|
+
"""
|
380
|
+
if self._skip_state == IWSKIP_CAN_SKIP:
|
381
|
+
self._skip_state = IWSKIP_SKIP_NEXT
|
382
|
+
|
383
|
+
@cython.final
|
384
|
+
cdef int _start_node(self, _Element node) except -1:
|
385
|
+
cdef int ns_count
|
386
|
+
if self._event_filter & PARSE_EVENT_FILTER_START_NS:
|
387
|
+
ns_count = _appendStartNsEvents(node._c_node, self._events)
|
388
|
+
if self._events:
|
389
|
+
self._skip_state = IWSKIP_NEXT_IS_START
|
390
|
+
elif self._event_filter & PARSE_EVENT_FILTER_END_NS:
|
391
|
+
ns_count = _countNsDefs(node._c_node)
|
392
|
+
else:
|
393
|
+
ns_count = 0
|
394
|
+
if self._event_filter & PARSE_EVENT_FILTER_START:
|
395
|
+
if self._matcher is None or self._matcher.matches(node._c_node):
|
396
|
+
self._events.append( ("start", node) )
|
397
|
+
self._skip_state = IWSKIP_NEXT_IS_START
|
398
|
+
return ns_count
|
399
|
+
|
400
|
+
@cython.final
|
401
|
+
cdef _Element _end_node(self):
|
402
|
+
cdef _Element node
|
403
|
+
cdef int i, ns_count
|
404
|
+
node, ns_count = self._node_stack.pop()
|
405
|
+
if self._event_filter & PARSE_EVENT_FILTER_END:
|
406
|
+
if self._matcher is None or self._matcher.matches(node._c_node):
|
407
|
+
self._events.append( ("end", node) )
|
408
|
+
if self._event_filter & PARSE_EVENT_FILTER_END_NS and ns_count:
|
409
|
+
event = ("end-ns", None)
|
410
|
+
for i in range(ns_count):
|
411
|
+
self._events.append(event)
|
412
|
+
return node
|
413
|
+
|
414
|
+
|
415
|
+
cdef int _countNsDefs(xmlNode* c_node) noexcept:
|
416
|
+
cdef xmlNs* c_ns
|
417
|
+
cdef int count
|
418
|
+
count = 0
|
419
|
+
c_ns = c_node.nsDef
|
420
|
+
while c_ns is not NULL:
|
421
|
+
count += (c_ns.href is not NULL)
|
422
|
+
c_ns = c_ns.next
|
423
|
+
return count
|
424
|
+
|
425
|
+
|
426
|
+
cdef int _appendStartNsEvents(xmlNode* c_node, list event_list) except -1:
|
427
|
+
cdef xmlNs* c_ns
|
428
|
+
cdef int count
|
429
|
+
count = 0
|
430
|
+
c_ns = c_node.nsDef
|
431
|
+
while c_ns is not NULL:
|
432
|
+
if c_ns.href:
|
433
|
+
ns_tuple = (funicodeOrEmpty(c_ns.prefix),
|
434
|
+
funicode(c_ns.href))
|
435
|
+
event_list.append( ("start-ns", ns_tuple) )
|
436
|
+
count += 1
|
437
|
+
c_ns = c_ns.next
|
438
|
+
return count
|
lxml/lxml.etree.h
ADDED
@@ -0,0 +1,244 @@
|
|
1
|
+
/* Generated by Cython 3.1.2 */
|
2
|
+
|
3
|
+
#ifndef __PYX_HAVE__lxml__etree
|
4
|
+
#define __PYX_HAVE__lxml__etree
|
5
|
+
|
6
|
+
#include "Python.h"
|
7
|
+
struct LxmlDocument;
|
8
|
+
struct LxmlElement;
|
9
|
+
struct LxmlElementTree;
|
10
|
+
struct LxmlElementTagMatcher;
|
11
|
+
struct LxmlElementIterator;
|
12
|
+
struct LxmlElementBase;
|
13
|
+
struct LxmlElementClassLookup;
|
14
|
+
struct LxmlFallbackElementClassLookup;
|
15
|
+
|
16
|
+
/* "lxml/etree.pyx":451
|
17
|
+
*
|
18
|
+
* # type of a function that steps from node to node
|
19
|
+
* ctypedef public xmlNode* (*_node_to_node_function)(xmlNode*) # <<<<<<<<<<<<<<
|
20
|
+
*
|
21
|
+
*
|
22
|
+
*/
|
23
|
+
typedef xmlNode *(*_node_to_node_function)(xmlNode *);
|
24
|
+
|
25
|
+
/* "lxml/etree.pyx":465
|
26
|
+
* # Public Python API
|
27
|
+
*
|
28
|
+
* @cython.final # <<<<<<<<<<<<<<
|
29
|
+
* @cython.freelist(8)
|
30
|
+
* cdef public class _Document [ type LxmlDocumentType, object LxmlDocument ]:
|
31
|
+
*/
|
32
|
+
struct LxmlDocument {
|
33
|
+
PyObject_HEAD
|
34
|
+
struct __pyx_vtabstruct_4lxml_5etree__Document *__pyx_vtab;
|
35
|
+
int _ns_counter;
|
36
|
+
PyObject *_prefix_tail;
|
37
|
+
xmlDoc *_c_doc;
|
38
|
+
struct __pyx_obj_4lxml_5etree__BaseParser *_parser;
|
39
|
+
};
|
40
|
+
|
41
|
+
/* "lxml/etree.pyx":817
|
42
|
+
*
|
43
|
+
*
|
44
|
+
* @cython.no_gc_clear # <<<<<<<<<<<<<<
|
45
|
+
* cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
|
46
|
+
* """Element class.
|
47
|
+
*/
|
48
|
+
struct LxmlElement {
|
49
|
+
PyObject_HEAD
|
50
|
+
struct LxmlDocument *_doc;
|
51
|
+
xmlNode *_c_node;
|
52
|
+
PyObject *_tag;
|
53
|
+
};
|
54
|
+
|
55
|
+
/* "lxml/etree.pyx":1991
|
56
|
+
*
|
57
|
+
*
|
58
|
+
* cdef public class _ElementTree [ type LxmlElementTreeType, # <<<<<<<<<<<<<<
|
59
|
+
* object LxmlElementTree ]:
|
60
|
+
* cdef _Document _doc
|
61
|
+
*/
|
62
|
+
struct LxmlElementTree {
|
63
|
+
PyObject_HEAD
|
64
|
+
struct __pyx_vtabstruct_4lxml_5etree__ElementTree *__pyx_vtab;
|
65
|
+
struct LxmlDocument *_doc;
|
66
|
+
struct LxmlElement *_context_node;
|
67
|
+
};
|
68
|
+
|
69
|
+
/* "lxml/etree.pyx":2765
|
70
|
+
*
|
71
|
+
*
|
72
|
+
* cdef public class _ElementTagMatcher [ object LxmlElementTagMatcher, # <<<<<<<<<<<<<<
|
73
|
+
* type LxmlElementTagMatcherType ]:
|
74
|
+
* """
|
75
|
+
*/
|
76
|
+
struct LxmlElementTagMatcher {
|
77
|
+
PyObject_HEAD
|
78
|
+
struct __pyx_vtabstruct_4lxml_5etree__ElementTagMatcher *__pyx_vtab;
|
79
|
+
PyObject *_pystrings;
|
80
|
+
int _node_type;
|
81
|
+
char *_href;
|
82
|
+
char *_name;
|
83
|
+
};
|
84
|
+
|
85
|
+
/* "lxml/etree.pyx":2796
|
86
|
+
* self._name = NULL
|
87
|
+
*
|
88
|
+
* cdef public class _ElementIterator(_ElementTagMatcher) [ # <<<<<<<<<<<<<<
|
89
|
+
* object LxmlElementIterator, type LxmlElementIteratorType ]:
|
90
|
+
* """
|
91
|
+
*/
|
92
|
+
struct LxmlElementIterator {
|
93
|
+
struct LxmlElementTagMatcher __pyx_base;
|
94
|
+
struct LxmlElement *_node;
|
95
|
+
_node_to_node_function _next_element;
|
96
|
+
};
|
97
|
+
|
98
|
+
/* "src/lxml/classlookup.pxi":6
|
99
|
+
* # Custom Element classes
|
100
|
+
*
|
101
|
+
* cdef public class ElementBase(_Element) [ type LxmlElementBaseType, # <<<<<<<<<<<<<<
|
102
|
+
* object LxmlElementBase ]:
|
103
|
+
* """ElementBase(*children, attrib=None, nsmap=None, **_extra)
|
104
|
+
*/
|
105
|
+
struct LxmlElementBase {
|
106
|
+
struct LxmlElement __pyx_base;
|
107
|
+
};
|
108
|
+
|
109
|
+
/* "src/lxml/classlookup.pxi":210
|
110
|
+
* # Element class lookup
|
111
|
+
*
|
112
|
+
* ctypedef public object (*_element_class_lookup_function)(object, _Document, xmlNode*) # <<<<<<<<<<<<<<
|
113
|
+
*
|
114
|
+
* # class to store element class lookup functions
|
115
|
+
*/
|
116
|
+
typedef PyObject *(*_element_class_lookup_function)(PyObject *, struct LxmlDocument *, xmlNode *);
|
117
|
+
|
118
|
+
/* "src/lxml/classlookup.pxi":213
|
119
|
+
*
|
120
|
+
* # class to store element class lookup functions
|
121
|
+
* cdef public class ElementClassLookup [ type LxmlElementClassLookupType, # <<<<<<<<<<<<<<
|
122
|
+
* object LxmlElementClassLookup ]:
|
123
|
+
* """ElementClassLookup(self)
|
124
|
+
*/
|
125
|
+
struct LxmlElementClassLookup {
|
126
|
+
PyObject_HEAD
|
127
|
+
_element_class_lookup_function _lookup_function;
|
128
|
+
};
|
129
|
+
|
130
|
+
/* "src/lxml/classlookup.pxi":221
|
131
|
+
*
|
132
|
+
*
|
133
|
+
* cdef public class FallbackElementClassLookup(ElementClassLookup) \ # <<<<<<<<<<<<<<
|
134
|
+
* [ type LxmlFallbackElementClassLookupType,
|
135
|
+
* object LxmlFallbackElementClassLookup ]:
|
136
|
+
*/
|
137
|
+
struct LxmlFallbackElementClassLookup {
|
138
|
+
struct LxmlElementClassLookup __pyx_base;
|
139
|
+
struct __pyx_vtabstruct_4lxml_5etree_FallbackElementClassLookup *__pyx_vtab;
|
140
|
+
struct LxmlElementClassLookup *fallback;
|
141
|
+
_element_class_lookup_function _fallback_function;
|
142
|
+
};
|
143
|
+
|
144
|
+
#ifndef __PYX_HAVE_API__lxml__etree
|
145
|
+
|
146
|
+
#ifdef CYTHON_EXTERN_C
|
147
|
+
#undef __PYX_EXTERN_C
|
148
|
+
#define __PYX_EXTERN_C CYTHON_EXTERN_C
|
149
|
+
#elif defined(__PYX_EXTERN_C)
|
150
|
+
#ifdef _MSC_VER
|
151
|
+
#pragma message ("Please do not define the '__PYX_EXTERN_C' macro externally. Use 'CYTHON_EXTERN_C' instead.")
|
152
|
+
#else
|
153
|
+
#warning Please do not define the '__PYX_EXTERN_C' macro externally. Use 'CYTHON_EXTERN_C' instead.
|
154
|
+
#endif
|
155
|
+
#else
|
156
|
+
#ifdef __cplusplus
|
157
|
+
#define __PYX_EXTERN_C extern "C"
|
158
|
+
#else
|
159
|
+
#define __PYX_EXTERN_C extern
|
160
|
+
#endif
|
161
|
+
#endif
|
162
|
+
|
163
|
+
#ifndef DL_IMPORT
|
164
|
+
#define DL_IMPORT(_T) _T
|
165
|
+
#endif
|
166
|
+
|
167
|
+
__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlDocumentType;
|
168
|
+
__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementType;
|
169
|
+
__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementTreeType;
|
170
|
+
__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementTagMatcherType;
|
171
|
+
__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementIteratorType;
|
172
|
+
__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementBaseType;
|
173
|
+
__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementClassLookupType;
|
174
|
+
__PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlFallbackElementClassLookupType;
|
175
|
+
|
176
|
+
__PYX_EXTERN_C struct LxmlElement *deepcopyNodeToDocument(struct LxmlDocument *, xmlNode *);
|
177
|
+
__PYX_EXTERN_C struct LxmlElementTree *elementTreeFactory(struct LxmlElement *);
|
178
|
+
__PYX_EXTERN_C struct LxmlElementTree *newElementTree(struct LxmlElement *, PyObject *);
|
179
|
+
__PYX_EXTERN_C struct LxmlElementTree *adoptExternalDocument(xmlDoc *, PyObject *, int);
|
180
|
+
__PYX_EXTERN_C struct LxmlElement *elementFactory(struct LxmlDocument *, xmlNode *);
|
181
|
+
__PYX_EXTERN_C struct LxmlElement *makeElement(PyObject *, struct LxmlDocument *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *);
|
182
|
+
__PYX_EXTERN_C struct LxmlElement *makeSubElement(struct LxmlElement *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *);
|
183
|
+
__PYX_EXTERN_C void setElementClassLookupFunction(_element_class_lookup_function, PyObject *);
|
184
|
+
__PYX_EXTERN_C PyObject *lookupDefaultElementClass(PyObject *, PyObject *, xmlNode *);
|
185
|
+
__PYX_EXTERN_C PyObject *lookupNamespaceElementClass(PyObject *, PyObject *, xmlNode *);
|
186
|
+
__PYX_EXTERN_C PyObject *callLookupFallback(struct LxmlFallbackElementClassLookup *, struct LxmlDocument *, xmlNode *);
|
187
|
+
__PYX_EXTERN_C int tagMatches(xmlNode *, const xmlChar *, const xmlChar *);
|
188
|
+
__PYX_EXTERN_C struct LxmlDocument *documentOrRaise(PyObject *);
|
189
|
+
__PYX_EXTERN_C struct LxmlElement *rootNodeOrRaise(PyObject *);
|
190
|
+
__PYX_EXTERN_C int hasText(xmlNode *);
|
191
|
+
__PYX_EXTERN_C int hasTail(xmlNode *);
|
192
|
+
__PYX_EXTERN_C PyObject *textOf(xmlNode *);
|
193
|
+
__PYX_EXTERN_C PyObject *tailOf(xmlNode *);
|
194
|
+
__PYX_EXTERN_C int setNodeText(xmlNode *, PyObject *);
|
195
|
+
__PYX_EXTERN_C int setTailText(xmlNode *, PyObject *);
|
196
|
+
__PYX_EXTERN_C PyObject *attributeValue(xmlNode *, xmlAttr *);
|
197
|
+
__PYX_EXTERN_C PyObject *attributeValueFromNsName(xmlNode *, const xmlChar *, const xmlChar *);
|
198
|
+
__PYX_EXTERN_C PyObject *getAttributeValue(struct LxmlElement *, PyObject *, PyObject *);
|
199
|
+
__PYX_EXTERN_C PyObject *iterattributes(struct LxmlElement *, int);
|
200
|
+
__PYX_EXTERN_C PyObject *collectAttributes(xmlNode *, int);
|
201
|
+
__PYX_EXTERN_C int setAttributeValue(struct LxmlElement *, PyObject *, PyObject *);
|
202
|
+
__PYX_EXTERN_C int delAttribute(struct LxmlElement *, PyObject *);
|
203
|
+
__PYX_EXTERN_C int delAttributeFromNsName(xmlNode *, const xmlChar *, const xmlChar *);
|
204
|
+
__PYX_EXTERN_C int hasChild(xmlNode *);
|
205
|
+
__PYX_EXTERN_C xmlNode *findChild(xmlNode *, Py_ssize_t);
|
206
|
+
__PYX_EXTERN_C xmlNode *findChildForwards(xmlNode *, Py_ssize_t);
|
207
|
+
__PYX_EXTERN_C xmlNode *findChildBackwards(xmlNode *, Py_ssize_t);
|
208
|
+
__PYX_EXTERN_C xmlNode *nextElement(xmlNode *);
|
209
|
+
__PYX_EXTERN_C xmlNode *previousElement(xmlNode *);
|
210
|
+
__PYX_EXTERN_C void appendChild(struct LxmlElement *, struct LxmlElement *);
|
211
|
+
__PYX_EXTERN_C int appendChildToElement(struct LxmlElement *, struct LxmlElement *);
|
212
|
+
__PYX_EXTERN_C PyObject *pyunicode(const xmlChar *);
|
213
|
+
__PYX_EXTERN_C PyObject *utf8(PyObject *);
|
214
|
+
__PYX_EXTERN_C PyObject *getNsTag(PyObject *);
|
215
|
+
__PYX_EXTERN_C PyObject *getNsTagWithEmptyNs(PyObject *);
|
216
|
+
__PYX_EXTERN_C PyObject *namespacedName(xmlNode *);
|
217
|
+
__PYX_EXTERN_C PyObject *namespacedNameFromNsName(const xmlChar *, const xmlChar *);
|
218
|
+
__PYX_EXTERN_C void iteratorStoreNext(struct LxmlElementIterator *, struct LxmlElement *);
|
219
|
+
__PYX_EXTERN_C void initTagMatch(struct LxmlElementTagMatcher *, PyObject *);
|
220
|
+
__PYX_EXTERN_C xmlNs *findOrBuildNodeNsPrefix(struct LxmlDocument *, xmlNode *, const xmlChar *, const xmlChar *);
|
221
|
+
|
222
|
+
#endif /* !__PYX_HAVE_API__lxml__etree */
|
223
|
+
|
224
|
+
/* WARNING: the interface of the module init function changed in CPython 3.5. */
|
225
|
+
/* It now returns a PyModuleDef instance instead of a PyModule instance. */
|
226
|
+
|
227
|
+
/* WARNING: Use PyImport_AppendInittab("etree", PyInit_etree) instead of calling PyInit_etree directly from Python 3.5 */
|
228
|
+
PyMODINIT_FUNC PyInit_etree(void);
|
229
|
+
|
230
|
+
#if PY_VERSION_HEX >= 0x03050000 && (defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER) || (defined(__cplusplus) && __cplusplus >= 201402L))
|
231
|
+
#if defined(__cplusplus) && __cplusplus >= 201402L
|
232
|
+
[[deprecated("Use PyImport_AppendInittab(\"etree\", PyInit_etree) instead of calling PyInit_etree directly.")]] inline
|
233
|
+
#elif defined(__GNUC__) || defined(__clang__)
|
234
|
+
__attribute__ ((__deprecated__("Use PyImport_AppendInittab(\"etree\", PyInit_etree) instead of calling PyInit_etree directly."), __unused__)) __inline__
|
235
|
+
#elif defined(_MSC_VER)
|
236
|
+
__declspec(deprecated("Use PyImport_AppendInittab(\"etree\", PyInit_etree) instead of calling PyInit_etree directly.")) __inline
|
237
|
+
#endif
|
238
|
+
static PyObject* __PYX_WARN_IF_PyInit_etree_INIT_CALLED(PyObject* res) {
|
239
|
+
return res;
|
240
|
+
}
|
241
|
+
#define PyInit_etree() __PYX_WARN_IF_PyInit_etree_INIT_CALLED(PyInit_etree())
|
242
|
+
#endif
|
243
|
+
|
244
|
+
#endif /* !__PYX_HAVE__lxml__etree */
|