lxml 6.0.0__cp312-cp312-macosx_10_13_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. lxml/ElementInclude.py +244 -0
  2. lxml/__init__.py +22 -0
  3. lxml/_elementpath.cpython-312-darwin.so +0 -0
  4. lxml/_elementpath.py +343 -0
  5. lxml/apihelpers.pxi +1801 -0
  6. lxml/builder.cpython-312-darwin.so +0 -0
  7. lxml/builder.py +243 -0
  8. lxml/classlookup.pxi +580 -0
  9. lxml/cleanup.pxi +215 -0
  10. lxml/cssselect.py +101 -0
  11. lxml/debug.pxi +36 -0
  12. lxml/docloader.pxi +178 -0
  13. lxml/doctestcompare.py +488 -0
  14. lxml/dtd.pxi +479 -0
  15. lxml/etree.cpython-312-darwin.so +0 -0
  16. lxml/etree.h +244 -0
  17. lxml/etree.pyx +3853 -0
  18. lxml/etree_api.h +204 -0
  19. lxml/extensions.pxi +830 -0
  20. lxml/html/ElementSoup.py +10 -0
  21. lxml/html/__init__.py +1927 -0
  22. lxml/html/_diffcommand.py +86 -0
  23. lxml/html/_difflib.cpython-312-darwin.so +0 -0
  24. lxml/html/_difflib.py +2106 -0
  25. lxml/html/_html5builder.py +100 -0
  26. lxml/html/_setmixin.py +56 -0
  27. lxml/html/builder.py +173 -0
  28. lxml/html/clean.py +21 -0
  29. lxml/html/defs.py +135 -0
  30. lxml/html/diff.cpython-312-darwin.so +0 -0
  31. lxml/html/diff.py +972 -0
  32. lxml/html/formfill.py +299 -0
  33. lxml/html/html5parser.py +260 -0
  34. lxml/html/soupparser.py +314 -0
  35. lxml/html/usedoctest.py +13 -0
  36. lxml/includes/__init__.pxd +0 -0
  37. lxml/includes/__init__.py +0 -0
  38. lxml/includes/c14n.pxd +25 -0
  39. lxml/includes/config.pxd +3 -0
  40. lxml/includes/dtdvalid.pxd +18 -0
  41. lxml/includes/etree_defs.h +379 -0
  42. lxml/includes/etreepublic.pxd +237 -0
  43. lxml/includes/extlibs/__init__.py +0 -0
  44. lxml/includes/extlibs/libcharset.h +45 -0
  45. lxml/includes/extlibs/localcharset.h +137 -0
  46. lxml/includes/extlibs/zconf.h +543 -0
  47. lxml/includes/extlibs/zlib.h +1938 -0
  48. lxml/includes/htmlparser.pxd +56 -0
  49. lxml/includes/libexslt/__init__.py +0 -0
  50. lxml/includes/libexslt/exslt.h +108 -0
  51. lxml/includes/libexslt/exsltconfig.h +70 -0
  52. lxml/includes/libexslt/exsltexports.h +63 -0
  53. lxml/includes/libxml/HTMLparser.h +339 -0
  54. lxml/includes/libxml/HTMLtree.h +148 -0
  55. lxml/includes/libxml/SAX.h +18 -0
  56. lxml/includes/libxml/SAX2.h +170 -0
  57. lxml/includes/libxml/__init__.py +0 -0
  58. lxml/includes/libxml/c14n.h +115 -0
  59. lxml/includes/libxml/catalog.h +183 -0
  60. lxml/includes/libxml/chvalid.h +230 -0
  61. lxml/includes/libxml/debugXML.h +79 -0
  62. lxml/includes/libxml/dict.h +82 -0
  63. lxml/includes/libxml/encoding.h +307 -0
  64. lxml/includes/libxml/entities.h +147 -0
  65. lxml/includes/libxml/globals.h +25 -0
  66. lxml/includes/libxml/hash.h +251 -0
  67. lxml/includes/libxml/list.h +137 -0
  68. lxml/includes/libxml/nanoftp.h +16 -0
  69. lxml/includes/libxml/nanohttp.h +98 -0
  70. lxml/includes/libxml/parser.h +1633 -0
  71. lxml/includes/libxml/parserInternals.h +591 -0
  72. lxml/includes/libxml/relaxng.h +224 -0
  73. lxml/includes/libxml/schemasInternals.h +959 -0
  74. lxml/includes/libxml/schematron.h +143 -0
  75. lxml/includes/libxml/threads.h +81 -0
  76. lxml/includes/libxml/tree.h +1326 -0
  77. lxml/includes/libxml/uri.h +106 -0
  78. lxml/includes/libxml/valid.h +485 -0
  79. lxml/includes/libxml/xinclude.h +141 -0
  80. lxml/includes/libxml/xlink.h +193 -0
  81. lxml/includes/libxml/xmlIO.h +419 -0
  82. lxml/includes/libxml/xmlautomata.h +163 -0
  83. lxml/includes/libxml/xmlerror.h +962 -0
  84. lxml/includes/libxml/xmlexports.h +96 -0
  85. lxml/includes/libxml/xmlmemory.h +188 -0
  86. lxml/includes/libxml/xmlmodule.h +61 -0
  87. lxml/includes/libxml/xmlreader.h +444 -0
  88. lxml/includes/libxml/xmlregexp.h +116 -0
  89. lxml/includes/libxml/xmlsave.h +111 -0
  90. lxml/includes/libxml/xmlschemas.h +254 -0
  91. lxml/includes/libxml/xmlschemastypes.h +152 -0
  92. lxml/includes/libxml/xmlstring.h +140 -0
  93. lxml/includes/libxml/xmlunicode.h +15 -0
  94. lxml/includes/libxml/xmlversion.h +332 -0
  95. lxml/includes/libxml/xmlwriter.h +489 -0
  96. lxml/includes/libxml/xpath.h +569 -0
  97. lxml/includes/libxml/xpathInternals.h +639 -0
  98. lxml/includes/libxml/xpointer.h +48 -0
  99. lxml/includes/libxslt/__init__.py +0 -0
  100. lxml/includes/libxslt/attributes.h +39 -0
  101. lxml/includes/libxslt/documents.h +93 -0
  102. lxml/includes/libxslt/extensions.h +262 -0
  103. lxml/includes/libxslt/extra.h +72 -0
  104. lxml/includes/libxslt/functions.h +78 -0
  105. lxml/includes/libxslt/imports.h +75 -0
  106. lxml/includes/libxslt/keys.h +53 -0
  107. lxml/includes/libxslt/namespaces.h +68 -0
  108. lxml/includes/libxslt/numbersInternals.h +73 -0
  109. lxml/includes/libxslt/pattern.h +84 -0
  110. lxml/includes/libxslt/preproc.h +43 -0
  111. lxml/includes/libxslt/security.h +104 -0
  112. lxml/includes/libxslt/templates.h +77 -0
  113. lxml/includes/libxslt/transform.h +207 -0
  114. lxml/includes/libxslt/variables.h +118 -0
  115. lxml/includes/libxslt/xslt.h +110 -0
  116. lxml/includes/libxslt/xsltInternals.h +1995 -0
  117. lxml/includes/libxslt/xsltconfig.h +146 -0
  118. lxml/includes/libxslt/xsltexports.h +64 -0
  119. lxml/includes/libxslt/xsltlocale.h +44 -0
  120. lxml/includes/libxslt/xsltutils.h +343 -0
  121. lxml/includes/lxml-version.h +3 -0
  122. lxml/includes/relaxng.pxd +64 -0
  123. lxml/includes/schematron.pxd +34 -0
  124. lxml/includes/tree.pxd +492 -0
  125. lxml/includes/uri.pxd +5 -0
  126. lxml/includes/xinclude.pxd +22 -0
  127. lxml/includes/xmlerror.pxd +852 -0
  128. lxml/includes/xmlparser.pxd +303 -0
  129. lxml/includes/xmlschema.pxd +35 -0
  130. lxml/includes/xpath.pxd +136 -0
  131. lxml/includes/xslt.pxd +190 -0
  132. lxml/isoschematron/__init__.py +348 -0
  133. lxml/isoschematron/resources/rng/iso-schematron.rng +709 -0
  134. lxml/isoschematron/resources/xsl/RNG2Schtrn.xsl +75 -0
  135. lxml/isoschematron/resources/xsl/XSD2Schtrn.xsl +77 -0
  136. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_abstract_expand.xsl +313 -0
  137. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_dsdl_include.xsl +1160 -0
  138. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_message.xsl +55 -0
  139. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_skeleton_for_xslt1.xsl +1796 -0
  140. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_svrl_for_xslt1.xsl +588 -0
  141. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt +84 -0
  142. lxml/iterparse.pxi +438 -0
  143. lxml/lxml.etree.h +244 -0
  144. lxml/lxml.etree_api.h +204 -0
  145. lxml/nsclasses.pxi +281 -0
  146. lxml/objectify.cpython-312-darwin.so +0 -0
  147. lxml/objectify.pyx +2149 -0
  148. lxml/objectpath.pxi +332 -0
  149. lxml/parser.pxi +2059 -0
  150. lxml/parsertarget.pxi +180 -0
  151. lxml/proxy.pxi +619 -0
  152. lxml/public-api.pxi +178 -0
  153. lxml/pyclasslookup.py +3 -0
  154. lxml/readonlytree.pxi +565 -0
  155. lxml/relaxng.pxi +165 -0
  156. lxml/sax.cpython-312-darwin.so +0 -0
  157. lxml/sax.py +286 -0
  158. lxml/saxparser.pxi +875 -0
  159. lxml/schematron.pxi +173 -0
  160. lxml/serializer.pxi +1849 -0
  161. lxml/usedoctest.py +13 -0
  162. lxml/xinclude.pxi +67 -0
  163. lxml/xmlerror.pxi +1654 -0
  164. lxml/xmlid.pxi +179 -0
  165. lxml/xmlschema.pxi +215 -0
  166. lxml/xpath.pxi +487 -0
  167. lxml/xslt.pxi +957 -0
  168. lxml/xsltext.pxi +242 -0
  169. lxml-6.0.0.dist-info/METADATA +163 -0
  170. lxml-6.0.0.dist-info/RECORD +174 -0
  171. lxml-6.0.0.dist-info/WHEEL +6 -0
  172. lxml-6.0.0.dist-info/licenses/LICENSE.txt +31 -0
  173. lxml-6.0.0.dist-info/licenses/LICENSES.txt +29 -0
  174. lxml-6.0.0.dist-info/top_level.txt +1 -0
lxml/iterparse.pxi ADDED
@@ -0,0 +1,438 @@
1
+ # iterparse -- event-driven parsing
2
+
3
+ DEF __ITERPARSE_CHUNK_SIZE = 32768
4
+
5
+ cdef class iterparse:
6
+ """iterparse(self, source, events=("end",), tag=None, \
7
+ attribute_defaults=False, dtd_validation=False, \
8
+ load_dtd=False, no_network=True, remove_blank_text=False, \
9
+ remove_comments=False, remove_pis=False, encoding=None, \
10
+ html=False, recover=None, huge_tree=False, schema=None)
11
+
12
+ Incremental parser.
13
+
14
+ Parses XML into a tree and generates tuples (event, element) in a
15
+ SAX-like fashion. ``event`` is any of 'start', 'end', 'start-ns',
16
+ 'end-ns'.
17
+
18
+ For 'start' and 'end', ``element`` is the Element that the parser just
19
+ found opening or closing. For 'start-ns', it is a tuple (prefix, URI) of
20
+ a new namespace declaration. For 'end-ns', it is simply None. Note that
21
+ all start and end events are guaranteed to be properly nested.
22
+
23
+ The keyword argument ``events`` specifies a sequence of event type names
24
+ that should be generated. By default, only 'end' events will be
25
+ generated.
26
+
27
+ The additional ``tag`` argument restricts the 'start' and 'end' events to
28
+ those elements that match the given tag. The ``tag`` argument can also be
29
+ a sequence of tags to allow matching more than one tag. By default,
30
+ events are generated for all elements. Note that the 'start-ns' and
31
+ 'end-ns' events are not impacted by this restriction.
32
+
33
+ The other keyword arguments in the constructor are mainly based on the
34
+ libxml2 parser configuration. A DTD will also be loaded if validation or
35
+ attribute default values are requested.
36
+
37
+ Available boolean keyword arguments:
38
+ - attribute_defaults: read default attributes from DTD
39
+ - dtd_validation: validate (if DTD is available)
40
+ - load_dtd: use DTD for parsing
41
+ - no_network: prevent network access for related files
42
+ - remove_blank_text: discard blank text nodes
43
+ - remove_comments: discard comments
44
+ - remove_pis: discard processing instructions
45
+ - strip_cdata: replace CDATA sections by normal text content (default:
46
+ True for XML, ignored otherwise)
47
+ - compact: safe memory for short text content (default: True)
48
+ - resolve_entities: replace entities by their text value (default: True)
49
+ - huge_tree: disable security restrictions and support very deep trees
50
+ and very long text content (only affects libxml2 2.7+)
51
+ - html: parse input as HTML (default: XML)
52
+ - recover: try hard to parse through broken input (default: True for HTML,
53
+ False otherwise)
54
+
55
+ Other keyword arguments:
56
+ - encoding: override the document encoding
57
+ - schema: an XMLSchema to validate against
58
+ """
59
+ cdef _FeedParser _parser
60
+ cdef object _tag
61
+ cdef object _events
62
+ cdef readonly object root
63
+ cdef object _source
64
+ cdef object _filename
65
+ cdef object _error
66
+ cdef bint _close_source_after_read
67
+
68
+ def __init__(self, source, events=("end",), *, tag=None,
69
+ attribute_defaults=False, dtd_validation=False,
70
+ load_dtd=False, no_network=True, remove_blank_text=False,
71
+ compact=True, resolve_entities=True, remove_comments=False,
72
+ remove_pis=False, strip_cdata=True, encoding=None,
73
+ html=False, recover=None, huge_tree=False, collect_ids=True,
74
+ XMLSchema schema=None):
75
+ if not hasattr(source, 'read'):
76
+ source = _getFSPathOrObject(source)
77
+ self._filename = source
78
+ self._source = open(source, 'rb')
79
+ self._close_source_after_read = True
80
+ else:
81
+ self._filename = _getFilenameForFile(source)
82
+ self._source = source
83
+ self._close_source_after_read = False
84
+
85
+ if recover is None:
86
+ recover = html
87
+
88
+ if html:
89
+ # make sure we're not looking for namespaces
90
+ events = [event for event in events
91
+ if event not in ('start-ns', 'end-ns')]
92
+ parser = HTMLPullParser(
93
+ events,
94
+ tag=tag,
95
+ recover=recover,
96
+ base_url=self._filename,
97
+ encoding=encoding,
98
+ remove_blank_text=remove_blank_text,
99
+ remove_comments=remove_comments,
100
+ remove_pis=remove_pis,
101
+ no_network=no_network,
102
+ target=None, # TODO
103
+ schema=schema,
104
+ compact=compact)
105
+ else:
106
+ parser = XMLPullParser(
107
+ events,
108
+ tag=tag,
109
+ recover=recover,
110
+ base_url=self._filename,
111
+ encoding=encoding,
112
+ attribute_defaults=attribute_defaults,
113
+ dtd_validation=dtd_validation,
114
+ load_dtd=load_dtd,
115
+ no_network=no_network,
116
+ schema=schema,
117
+ huge_tree=huge_tree,
118
+ remove_blank_text=remove_blank_text,
119
+ resolve_entities=resolve_entities,
120
+ remove_comments=remove_comments,
121
+ remove_pis=remove_pis,
122
+ strip_cdata=strip_cdata,
123
+ collect_ids=True,
124
+ target=None, # TODO
125
+ compact=compact)
126
+
127
+ self._events = parser.read_events()
128
+ self._parser = parser
129
+
130
+ @property
131
+ def error_log(self):
132
+ """The error log of the last (or current) parser run.
133
+ """
134
+ return self._parser.feed_error_log
135
+
136
+ @property
137
+ def resolvers(self):
138
+ """The custom resolver registry of the last (or current) parser run.
139
+ """
140
+ return self._parser.resolvers
141
+
142
+ @property
143
+ def version(self):
144
+ """The version of the underlying XML parser."""
145
+ return self._parser.version
146
+
147
+ def set_element_class_lookup(self, ElementClassLookup lookup = None):
148
+ """set_element_class_lookup(self, lookup = None)
149
+
150
+ Set a lookup scheme for element classes generated from this parser.
151
+
152
+ Reset it by passing None or nothing.
153
+ """
154
+ self._parser.set_element_class_lookup(lookup)
155
+
156
+ def makeelement(self, _tag, attrib=None, nsmap=None, **_extra):
157
+ """makeelement(self, _tag, attrib=None, nsmap=None, **_extra)
158
+
159
+ Creates a new element associated with this parser.
160
+ """
161
+ self._parser.makeelement(
162
+ _tag, attrib=None, nsmap=None, **_extra)
163
+
164
+ @cython.final
165
+ cdef _close_source(self):
166
+ if self._source is None:
167
+ return
168
+ if not self._close_source_after_read:
169
+ self._source = None
170
+ return
171
+ try:
172
+ close = self._source.close
173
+ except AttributeError:
174
+ close = None
175
+ finally:
176
+ self._source = None
177
+ if close is not None:
178
+ close()
179
+
180
+ def __iter__(self):
181
+ return self
182
+
183
+ def __next__(self):
184
+ try:
185
+ return next(self._events)
186
+ except StopIteration:
187
+ pass
188
+ context = <_SaxParserContext>self._parser._getPushParserContext()
189
+ if self._source is not None:
190
+ done = False
191
+ while not done:
192
+ try:
193
+ done = self._read_more_events(context)
194
+ return next(self._events)
195
+ except StopIteration:
196
+ pass # no events yet
197
+ except Exception as e:
198
+ self._error = e
199
+ self._close_source()
200
+ try:
201
+ return next(self._events)
202
+ except StopIteration:
203
+ break
204
+ # nothing left to read or return
205
+ if self._error is not None:
206
+ error = self._error
207
+ self._error = None
208
+ raise error
209
+ if (context._validator is not None
210
+ and not context._validator.isvalid()):
211
+ _raiseParseError(context._c_ctxt, self._filename,
212
+ context._error_log)
213
+ # no errors => all done
214
+ raise StopIteration
215
+
216
+ @cython.final
217
+ cdef bint _read_more_events(self, _SaxParserContext context) except -123:
218
+ data = self._source.read(__ITERPARSE_CHUNK_SIZE)
219
+ if not isinstance(data, bytes):
220
+ self._close_source()
221
+ raise TypeError("reading file objects must return bytes objects")
222
+ if not data:
223
+ try:
224
+ self.root = self._parser.close()
225
+ finally:
226
+ self._close_source()
227
+ return True
228
+ self._parser.feed(data)
229
+ return False
230
+
231
+
232
+ cdef enum _IterwalkSkipStates:
233
+ IWSKIP_NEXT_IS_START
234
+ IWSKIP_SKIP_NEXT
235
+ IWSKIP_CAN_SKIP
236
+ IWSKIP_CANNOT_SKIP
237
+
238
+
239
+ cdef class iterwalk:
240
+ """iterwalk(self, element_or_tree, events=("end",), tag=None)
241
+
242
+ A tree walker that generates events from an existing tree as if it
243
+ was parsing XML data with ``iterparse()``.
244
+
245
+ Just as for ``iterparse()``, the ``tag`` argument can be a single tag or a
246
+ sequence of tags.
247
+
248
+ After receiving a 'start' or 'start-ns' event, the children and
249
+ descendants of the current element can be excluded from iteration
250
+ by calling the ``skip_subtree()`` method.
251
+ """
252
+ cdef _MultiTagMatcher _matcher
253
+ cdef list _node_stack
254
+ cdef list _events
255
+ cdef object _pop_event
256
+ cdef object _include_siblings
257
+ cdef int _index
258
+ cdef int _event_filter
259
+ cdef _IterwalkSkipStates _skip_state
260
+
261
+ def __init__(self, element_or_tree, events=("end",), tag=None):
262
+ cdef _Element root
263
+ cdef int ns_count
264
+ root = _rootNodeOrRaise(element_or_tree)
265
+ self._event_filter = _buildParseEventFilter(events)
266
+ if tag is None or tag == '*':
267
+ self._matcher = None
268
+ else:
269
+ self._matcher = _MultiTagMatcher.__new__(_MultiTagMatcher, tag)
270
+ self._node_stack = []
271
+ self._events = []
272
+ self._pop_event = self._events.pop
273
+ self._skip_state = IWSKIP_CANNOT_SKIP # ignore all skip requests by default
274
+
275
+ if self._event_filter:
276
+ self._index = 0
277
+ if self._matcher is not None and self._event_filter & PARSE_EVENT_FILTER_START:
278
+ self._matcher.cacheTags(root._doc)
279
+
280
+ # When processing an ElementTree, add events for the preceding comments/PIs.
281
+ if self._event_filter & (PARSE_EVENT_FILTER_COMMENT | PARSE_EVENT_FILTER_PI):
282
+ if isinstance(element_or_tree, _ElementTree):
283
+ self._include_siblings = root
284
+ for elem in list(root.itersiblings(preceding=True))[::-1]:
285
+ if self._event_filter & PARSE_EVENT_FILTER_COMMENT and elem.tag is Comment:
286
+ self._events.append(('comment', elem))
287
+ elif self._event_filter & PARSE_EVENT_FILTER_PI and elem.tag is PI:
288
+ self._events.append(('pi', elem))
289
+
290
+ ns_count = self._start_node(root)
291
+ self._node_stack.append( (root, ns_count) )
292
+ else:
293
+ self._index = -1
294
+
295
+ def __iter__(self):
296
+ return self
297
+
298
+ def __next__(self):
299
+ cdef xmlNode* c_child
300
+ cdef _Element node
301
+ cdef _Element next_node
302
+ cdef int ns_count = 0
303
+ if self._events:
304
+ return self._next_event()
305
+ if self._matcher is not None and self._index >= 0:
306
+ node = self._node_stack[self._index][0]
307
+ self._matcher.cacheTags(node._doc)
308
+
309
+ # find next node
310
+ while self._index >= 0:
311
+ node = self._node_stack[self._index][0]
312
+
313
+ if self._skip_state == IWSKIP_SKIP_NEXT:
314
+ c_child = NULL
315
+ else:
316
+ c_child = self._process_non_elements(
317
+ node._doc, _findChildForwards(node._c_node, 0))
318
+ self._skip_state = IWSKIP_CANNOT_SKIP
319
+
320
+ while c_child is NULL:
321
+ # back off through parents
322
+ self._index -= 1
323
+ node = self._end_node()
324
+ if self._index < 0:
325
+ break
326
+ c_child = self._process_non_elements(
327
+ node._doc, _nextElement(node._c_node))
328
+
329
+ if c_child is not NULL:
330
+ next_node = _elementFactory(node._doc, c_child)
331
+ if self._event_filter & (PARSE_EVENT_FILTER_START |
332
+ PARSE_EVENT_FILTER_START_NS):
333
+ ns_count = self._start_node(next_node)
334
+ elif self._event_filter & PARSE_EVENT_FILTER_END_NS:
335
+ ns_count = _countNsDefs(next_node._c_node)
336
+ self._node_stack.append( (next_node, ns_count) )
337
+ self._index += 1
338
+ if self._events:
339
+ return self._next_event()
340
+
341
+ if self._include_siblings is not None:
342
+ node, self._include_siblings = self._include_siblings, None
343
+ self._process_non_elements(node._doc, _nextElement(node._c_node))
344
+ if self._events:
345
+ return self._next_event()
346
+
347
+ raise StopIteration
348
+
349
+ @cython.final
350
+ cdef xmlNode* _process_non_elements(self, _Document doc, xmlNode* c_node):
351
+ while c_node is not NULL and c_node.type != tree.XML_ELEMENT_NODE:
352
+ if c_node.type == tree.XML_COMMENT_NODE:
353
+ if self._event_filter & PARSE_EVENT_FILTER_COMMENT:
354
+ self._events.append(
355
+ ("comment", _elementFactory(doc, c_node)))
356
+ c_node = _nextElement(c_node)
357
+ elif c_node.type == tree.XML_PI_NODE:
358
+ if self._event_filter & PARSE_EVENT_FILTER_PI:
359
+ self._events.append(
360
+ ("pi", _elementFactory(doc, c_node)))
361
+ c_node = _nextElement(c_node)
362
+ else:
363
+ break
364
+ return c_node
365
+
366
+ @cython.final
367
+ cdef _next_event(self):
368
+ if self._skip_state == IWSKIP_NEXT_IS_START:
369
+ if self._events[0][0] in ('start', 'start-ns'):
370
+ self._skip_state = IWSKIP_CAN_SKIP
371
+ return self._pop_event(0)
372
+
373
+ def skip_subtree(self):
374
+ """Prevent descending into the current subtree.
375
+ Instead, the next returned event will be the 'end' event of the current element
376
+ (if included), ignoring any children or descendants.
377
+
378
+ This has no effect right after an 'end' or 'end-ns' event.
379
+ """
380
+ if self._skip_state == IWSKIP_CAN_SKIP:
381
+ self._skip_state = IWSKIP_SKIP_NEXT
382
+
383
+ @cython.final
384
+ cdef int _start_node(self, _Element node) except -1:
385
+ cdef int ns_count
386
+ if self._event_filter & PARSE_EVENT_FILTER_START_NS:
387
+ ns_count = _appendStartNsEvents(node._c_node, self._events)
388
+ if self._events:
389
+ self._skip_state = IWSKIP_NEXT_IS_START
390
+ elif self._event_filter & PARSE_EVENT_FILTER_END_NS:
391
+ ns_count = _countNsDefs(node._c_node)
392
+ else:
393
+ ns_count = 0
394
+ if self._event_filter & PARSE_EVENT_FILTER_START:
395
+ if self._matcher is None or self._matcher.matches(node._c_node):
396
+ self._events.append( ("start", node) )
397
+ self._skip_state = IWSKIP_NEXT_IS_START
398
+ return ns_count
399
+
400
+ @cython.final
401
+ cdef _Element _end_node(self):
402
+ cdef _Element node
403
+ cdef int i, ns_count
404
+ node, ns_count = self._node_stack.pop()
405
+ if self._event_filter & PARSE_EVENT_FILTER_END:
406
+ if self._matcher is None or self._matcher.matches(node._c_node):
407
+ self._events.append( ("end", node) )
408
+ if self._event_filter & PARSE_EVENT_FILTER_END_NS and ns_count:
409
+ event = ("end-ns", None)
410
+ for i in range(ns_count):
411
+ self._events.append(event)
412
+ return node
413
+
414
+
415
+ cdef int _countNsDefs(xmlNode* c_node) noexcept:
416
+ cdef xmlNs* c_ns
417
+ cdef int count
418
+ count = 0
419
+ c_ns = c_node.nsDef
420
+ while c_ns is not NULL:
421
+ count += (c_ns.href is not NULL)
422
+ c_ns = c_ns.next
423
+ return count
424
+
425
+
426
+ cdef int _appendStartNsEvents(xmlNode* c_node, list event_list) except -1:
427
+ cdef xmlNs* c_ns
428
+ cdef int count
429
+ count = 0
430
+ c_ns = c_node.nsDef
431
+ while c_ns is not NULL:
432
+ if c_ns.href:
433
+ ns_tuple = (funicodeOrEmpty(c_ns.prefix),
434
+ funicode(c_ns.href))
435
+ event_list.append( ("start-ns", ns_tuple) )
436
+ count += 1
437
+ c_ns = c_ns.next
438
+ return count
lxml/lxml.etree.h ADDED
@@ -0,0 +1,244 @@
1
+ /* Generated by Cython 3.1.2 */
2
+
3
+ #ifndef __PYX_HAVE__lxml__etree
4
+ #define __PYX_HAVE__lxml__etree
5
+
6
+ #include "Python.h"
7
+ struct LxmlDocument;
8
+ struct LxmlElement;
9
+ struct LxmlElementTree;
10
+ struct LxmlElementTagMatcher;
11
+ struct LxmlElementIterator;
12
+ struct LxmlElementBase;
13
+ struct LxmlElementClassLookup;
14
+ struct LxmlFallbackElementClassLookup;
15
+
16
+ /* "lxml/etree.pyx":451
17
+ *
18
+ * # type of a function that steps from node to node
19
+ * ctypedef public xmlNode* (*_node_to_node_function)(xmlNode*) # <<<<<<<<<<<<<<
20
+ *
21
+ *
22
+ */
23
+ typedef xmlNode *(*_node_to_node_function)(xmlNode *);
24
+
25
+ /* "lxml/etree.pyx":465
26
+ * # Public Python API
27
+ *
28
+ * @cython.final # <<<<<<<<<<<<<<
29
+ * @cython.freelist(8)
30
+ * cdef public class _Document [ type LxmlDocumentType, object LxmlDocument ]:
31
+ */
32
+ struct LxmlDocument {
33
+ PyObject_HEAD
34
+ struct __pyx_vtabstruct_4lxml_5etree__Document *__pyx_vtab;
35
+ int _ns_counter;
36
+ PyObject *_prefix_tail;
37
+ xmlDoc *_c_doc;
38
+ struct __pyx_obj_4lxml_5etree__BaseParser *_parser;
39
+ };
40
+
41
+ /* "lxml/etree.pyx":817
42
+ *
43
+ *
44
+ * @cython.no_gc_clear # <<<<<<<<<<<<<<
45
+ * cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
46
+ * """Element class.
47
+ */
48
+ struct LxmlElement {
49
+ PyObject_HEAD
50
+ struct LxmlDocument *_doc;
51
+ xmlNode *_c_node;
52
+ PyObject *_tag;
53
+ };
54
+
55
+ /* "lxml/etree.pyx":1991
56
+ *
57
+ *
58
+ * cdef public class _ElementTree [ type LxmlElementTreeType, # <<<<<<<<<<<<<<
59
+ * object LxmlElementTree ]:
60
+ * cdef _Document _doc
61
+ */
62
+ struct LxmlElementTree {
63
+ PyObject_HEAD
64
+ struct __pyx_vtabstruct_4lxml_5etree__ElementTree *__pyx_vtab;
65
+ struct LxmlDocument *_doc;
66
+ struct LxmlElement *_context_node;
67
+ };
68
+
69
+ /* "lxml/etree.pyx":2765
70
+ *
71
+ *
72
+ * cdef public class _ElementTagMatcher [ object LxmlElementTagMatcher, # <<<<<<<<<<<<<<
73
+ * type LxmlElementTagMatcherType ]:
74
+ * """
75
+ */
76
+ struct LxmlElementTagMatcher {
77
+ PyObject_HEAD
78
+ struct __pyx_vtabstruct_4lxml_5etree__ElementTagMatcher *__pyx_vtab;
79
+ PyObject *_pystrings;
80
+ int _node_type;
81
+ char *_href;
82
+ char *_name;
83
+ };
84
+
85
+ /* "lxml/etree.pyx":2796
86
+ * self._name = NULL
87
+ *
88
+ * cdef public class _ElementIterator(_ElementTagMatcher) [ # <<<<<<<<<<<<<<
89
+ * object LxmlElementIterator, type LxmlElementIteratorType ]:
90
+ * """
91
+ */
92
+ struct LxmlElementIterator {
93
+ struct LxmlElementTagMatcher __pyx_base;
94
+ struct LxmlElement *_node;
95
+ _node_to_node_function _next_element;
96
+ };
97
+
98
+ /* "src/lxml/classlookup.pxi":6
99
+ * # Custom Element classes
100
+ *
101
+ * cdef public class ElementBase(_Element) [ type LxmlElementBaseType, # <<<<<<<<<<<<<<
102
+ * object LxmlElementBase ]:
103
+ * """ElementBase(*children, attrib=None, nsmap=None, **_extra)
104
+ */
105
+ struct LxmlElementBase {
106
+ struct LxmlElement __pyx_base;
107
+ };
108
+
109
+ /* "src/lxml/classlookup.pxi":210
110
+ * # Element class lookup
111
+ *
112
+ * ctypedef public object (*_element_class_lookup_function)(object, _Document, xmlNode*) # <<<<<<<<<<<<<<
113
+ *
114
+ * # class to store element class lookup functions
115
+ */
116
+ typedef PyObject *(*_element_class_lookup_function)(PyObject *, struct LxmlDocument *, xmlNode *);
117
+
118
+ /* "src/lxml/classlookup.pxi":213
119
+ *
120
+ * # class to store element class lookup functions
121
+ * cdef public class ElementClassLookup [ type LxmlElementClassLookupType, # <<<<<<<<<<<<<<
122
+ * object LxmlElementClassLookup ]:
123
+ * """ElementClassLookup(self)
124
+ */
125
+ struct LxmlElementClassLookup {
126
+ PyObject_HEAD
127
+ _element_class_lookup_function _lookup_function;
128
+ };
129
+
130
+ /* "src/lxml/classlookup.pxi":221
131
+ *
132
+ *
133
+ * cdef public class FallbackElementClassLookup(ElementClassLookup) \ # <<<<<<<<<<<<<<
134
+ * [ type LxmlFallbackElementClassLookupType,
135
+ * object LxmlFallbackElementClassLookup ]:
136
+ */
137
+ struct LxmlFallbackElementClassLookup {
138
+ struct LxmlElementClassLookup __pyx_base;
139
+ struct __pyx_vtabstruct_4lxml_5etree_FallbackElementClassLookup *__pyx_vtab;
140
+ struct LxmlElementClassLookup *fallback;
141
+ _element_class_lookup_function _fallback_function;
142
+ };
143
+
144
+ #ifndef __PYX_HAVE_API__lxml__etree
145
+
146
+ #ifdef CYTHON_EXTERN_C
147
+ #undef __PYX_EXTERN_C
148
+ #define __PYX_EXTERN_C CYTHON_EXTERN_C
149
+ #elif defined(__PYX_EXTERN_C)
150
+ #ifdef _MSC_VER
151
+ #pragma message ("Please do not define the '__PYX_EXTERN_C' macro externally. Use 'CYTHON_EXTERN_C' instead.")
152
+ #else
153
+ #warning Please do not define the '__PYX_EXTERN_C' macro externally. Use 'CYTHON_EXTERN_C' instead.
154
+ #endif
155
+ #else
156
+ #ifdef __cplusplus
157
+ #define __PYX_EXTERN_C extern "C"
158
+ #else
159
+ #define __PYX_EXTERN_C extern
160
+ #endif
161
+ #endif
162
+
163
+ #ifndef DL_IMPORT
164
+ #define DL_IMPORT(_T) _T
165
+ #endif
166
+
167
+ __PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlDocumentType;
168
+ __PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementType;
169
+ __PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementTreeType;
170
+ __PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementTagMatcherType;
171
+ __PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementIteratorType;
172
+ __PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementBaseType;
173
+ __PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlElementClassLookupType;
174
+ __PYX_EXTERN_C DL_IMPORT(PyTypeObject) LxmlFallbackElementClassLookupType;
175
+
176
+ __PYX_EXTERN_C struct LxmlElement *deepcopyNodeToDocument(struct LxmlDocument *, xmlNode *);
177
+ __PYX_EXTERN_C struct LxmlElementTree *elementTreeFactory(struct LxmlElement *);
178
+ __PYX_EXTERN_C struct LxmlElementTree *newElementTree(struct LxmlElement *, PyObject *);
179
+ __PYX_EXTERN_C struct LxmlElementTree *adoptExternalDocument(xmlDoc *, PyObject *, int);
180
+ __PYX_EXTERN_C struct LxmlElement *elementFactory(struct LxmlDocument *, xmlNode *);
181
+ __PYX_EXTERN_C struct LxmlElement *makeElement(PyObject *, struct LxmlDocument *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *);
182
+ __PYX_EXTERN_C struct LxmlElement *makeSubElement(struct LxmlElement *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *);
183
+ __PYX_EXTERN_C void setElementClassLookupFunction(_element_class_lookup_function, PyObject *);
184
+ __PYX_EXTERN_C PyObject *lookupDefaultElementClass(PyObject *, PyObject *, xmlNode *);
185
+ __PYX_EXTERN_C PyObject *lookupNamespaceElementClass(PyObject *, PyObject *, xmlNode *);
186
+ __PYX_EXTERN_C PyObject *callLookupFallback(struct LxmlFallbackElementClassLookup *, struct LxmlDocument *, xmlNode *);
187
+ __PYX_EXTERN_C int tagMatches(xmlNode *, const xmlChar *, const xmlChar *);
188
+ __PYX_EXTERN_C struct LxmlDocument *documentOrRaise(PyObject *);
189
+ __PYX_EXTERN_C struct LxmlElement *rootNodeOrRaise(PyObject *);
190
+ __PYX_EXTERN_C int hasText(xmlNode *);
191
+ __PYX_EXTERN_C int hasTail(xmlNode *);
192
+ __PYX_EXTERN_C PyObject *textOf(xmlNode *);
193
+ __PYX_EXTERN_C PyObject *tailOf(xmlNode *);
194
+ __PYX_EXTERN_C int setNodeText(xmlNode *, PyObject *);
195
+ __PYX_EXTERN_C int setTailText(xmlNode *, PyObject *);
196
+ __PYX_EXTERN_C PyObject *attributeValue(xmlNode *, xmlAttr *);
197
+ __PYX_EXTERN_C PyObject *attributeValueFromNsName(xmlNode *, const xmlChar *, const xmlChar *);
198
+ __PYX_EXTERN_C PyObject *getAttributeValue(struct LxmlElement *, PyObject *, PyObject *);
199
+ __PYX_EXTERN_C PyObject *iterattributes(struct LxmlElement *, int);
200
+ __PYX_EXTERN_C PyObject *collectAttributes(xmlNode *, int);
201
+ __PYX_EXTERN_C int setAttributeValue(struct LxmlElement *, PyObject *, PyObject *);
202
+ __PYX_EXTERN_C int delAttribute(struct LxmlElement *, PyObject *);
203
+ __PYX_EXTERN_C int delAttributeFromNsName(xmlNode *, const xmlChar *, const xmlChar *);
204
+ __PYX_EXTERN_C int hasChild(xmlNode *);
205
+ __PYX_EXTERN_C xmlNode *findChild(xmlNode *, Py_ssize_t);
206
+ __PYX_EXTERN_C xmlNode *findChildForwards(xmlNode *, Py_ssize_t);
207
+ __PYX_EXTERN_C xmlNode *findChildBackwards(xmlNode *, Py_ssize_t);
208
+ __PYX_EXTERN_C xmlNode *nextElement(xmlNode *);
209
+ __PYX_EXTERN_C xmlNode *previousElement(xmlNode *);
210
+ __PYX_EXTERN_C void appendChild(struct LxmlElement *, struct LxmlElement *);
211
+ __PYX_EXTERN_C int appendChildToElement(struct LxmlElement *, struct LxmlElement *);
212
+ __PYX_EXTERN_C PyObject *pyunicode(const xmlChar *);
213
+ __PYX_EXTERN_C PyObject *utf8(PyObject *);
214
+ __PYX_EXTERN_C PyObject *getNsTag(PyObject *);
215
+ __PYX_EXTERN_C PyObject *getNsTagWithEmptyNs(PyObject *);
216
+ __PYX_EXTERN_C PyObject *namespacedName(xmlNode *);
217
+ __PYX_EXTERN_C PyObject *namespacedNameFromNsName(const xmlChar *, const xmlChar *);
218
+ __PYX_EXTERN_C void iteratorStoreNext(struct LxmlElementIterator *, struct LxmlElement *);
219
+ __PYX_EXTERN_C void initTagMatch(struct LxmlElementTagMatcher *, PyObject *);
220
+ __PYX_EXTERN_C xmlNs *findOrBuildNodeNsPrefix(struct LxmlDocument *, xmlNode *, const xmlChar *, const xmlChar *);
221
+
222
+ #endif /* !__PYX_HAVE_API__lxml__etree */
223
+
224
+ /* WARNING: the interface of the module init function changed in CPython 3.5. */
225
+ /* It now returns a PyModuleDef instance instead of a PyModule instance. */
226
+
227
+ /* WARNING: Use PyImport_AppendInittab("etree", PyInit_etree) instead of calling PyInit_etree directly from Python 3.5 */
228
+ PyMODINIT_FUNC PyInit_etree(void);
229
+
230
+ #if PY_VERSION_HEX >= 0x03050000 && (defined(__GNUC__) || defined(__clang__) || defined(_MSC_VER) || (defined(__cplusplus) && __cplusplus >= 201402L))
231
+ #if defined(__cplusplus) && __cplusplus >= 201402L
232
+ [[deprecated("Use PyImport_AppendInittab(\"etree\", PyInit_etree) instead of calling PyInit_etree directly.")]] inline
233
+ #elif defined(__GNUC__) || defined(__clang__)
234
+ __attribute__ ((__deprecated__("Use PyImport_AppendInittab(\"etree\", PyInit_etree) instead of calling PyInit_etree directly."), __unused__)) __inline__
235
+ #elif defined(_MSC_VER)
236
+ __declspec(deprecated("Use PyImport_AppendInittab(\"etree\", PyInit_etree) instead of calling PyInit_etree directly.")) __inline
237
+ #endif
238
+ static PyObject* __PYX_WARN_IF_PyInit_etree_INIT_CALLED(PyObject* res) {
239
+ return res;
240
+ }
241
+ #define PyInit_etree() __PYX_WARN_IF_PyInit_etree_INIT_CALLED(PyInit_etree())
242
+ #endif
243
+
244
+ #endif /* !__PYX_HAVE__lxml__etree */