lxml 5.3.2__cp311-cp311-win32.whl → 6.0.0__cp311-cp311-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lxml/__init__.py +1 -1
- lxml/_elementpath.cp311-win32.pyd +0 -0
- lxml/_elementpath.py +3 -1
- lxml/apihelpers.pxi +25 -17
- lxml/builder.cp311-win32.pyd +0 -0
- lxml/builder.py +11 -0
- lxml/debug.pxi +0 -54
- lxml/etree.cp311-win32.pyd +0 -0
- lxml/etree.h +244 -248
- lxml/etree.pyx +154 -33
- lxml/etree_api.h +204 -195
- lxml/extensions.pxi +3 -6
- lxml/html/__init__.py +7 -3
- lxml/html/_difflib.cp311-win32.pyd +0 -0
- lxml/html/_difflib.py +2106 -0
- lxml/html/builder.py +40 -0
- lxml/html/defs.py +3 -3
- lxml/html/diff.cp311-win32.pyd +0 -0
- lxml/html/diff.py +406 -312
- lxml/includes/etree_defs.h +6 -6
- lxml/includes/lxml-version.h +1 -1
- lxml/includes/tree.pxd +10 -12
- lxml/includes/xmlparser.pxd +46 -8
- lxml/lxml.etree.h +24 -28
- lxml/lxml.etree_api.h +59 -50
- lxml/objectify.cp311-win32.pyd +0 -0
- lxml/objectify.pyx +11 -7
- lxml/parser.pxi +106 -47
- lxml/sax.cp311-win32.pyd +0 -0
- lxml/sax.py +11 -0
- lxml/saxparser.pxi +14 -14
- lxml/schematron.pxi +8 -3
- lxml/serializer.pxi +71 -3
- lxml/xslt.pxi +10 -3
- lxml-6.0.0.dist-info/METADATA +163 -0
- {lxml-5.3.2.dist-info → lxml-6.0.0.dist-info}/RECORD +40 -38
- {lxml-5.3.2.dist-info → lxml-6.0.0.dist-info}/WHEEL +1 -1
- {lxml-5.3.2.dist-info → lxml-6.0.0.dist-info}/licenses/LICENSE.txt +3 -1
- lxml-5.3.2.dist-info/METADATA +0 -100
- {lxml-5.3.2.dist-info → lxml-6.0.0.dist-info}/licenses/LICENSES.txt +0 -0
- {lxml-5.3.2.dist-info → lxml-6.0.0.dist-info}/top_level.txt +0 -0
lxml/etree.pyx
CHANGED
@@ -19,6 +19,7 @@ __all__ = [
|
|
19
19
|
'FallbackElementClassLookup', 'FunctionNamespace', 'HTML', 'HTMLParser',
|
20
20
|
'ICONV_COMPILED_VERSION',
|
21
21
|
'LIBXML_COMPILED_VERSION', 'LIBXML_VERSION',
|
22
|
+
'LIBXML_FEATURES',
|
22
23
|
'LIBXSLT_COMPILED_VERSION', 'LIBXSLT_VERSION',
|
23
24
|
'LXML_VERSION',
|
24
25
|
'LxmlError', 'LxmlRegistryError', 'LxmlSyntaxError',
|
@@ -299,6 +300,101 @@ cdef extern from *:
|
|
299
300
|
ICONV_COMPILED_VERSION = __unpackIntVersion(LIBICONV_HEX_VERSION, base=0x100)[:2]
|
300
301
|
|
301
302
|
|
303
|
+
cdef extern from "libxml/xmlversion.h":
|
304
|
+
"""
|
305
|
+
static const char* const _lxml_lib_features[] = {
|
306
|
+
#ifdef LIBXML_HTML_ENABLED
|
307
|
+
"html",
|
308
|
+
#endif
|
309
|
+
#ifdef LIBXML_FTP_ENABLED
|
310
|
+
"ftp",
|
311
|
+
#endif
|
312
|
+
#ifdef LIBXML_HTTP_ENABLED
|
313
|
+
"http",
|
314
|
+
#endif
|
315
|
+
#ifdef LIBXML_CATALOG_ENABLED
|
316
|
+
"catalog",
|
317
|
+
#endif
|
318
|
+
#ifdef LIBXML_XPATH_ENABLED
|
319
|
+
"xpath",
|
320
|
+
#endif
|
321
|
+
#ifdef LIBXML_ICONV_ENABLED
|
322
|
+
"iconv",
|
323
|
+
#endif
|
324
|
+
#ifdef LIBXML_ICU_ENABLED
|
325
|
+
"icu",
|
326
|
+
#endif
|
327
|
+
#ifdef LIBXML_REGEXP_ENABLED
|
328
|
+
"regexp",
|
329
|
+
#endif
|
330
|
+
#ifdef LIBXML_SCHEMAS_ENABLED
|
331
|
+
"xmlschema",
|
332
|
+
#endif
|
333
|
+
#ifdef LIBXML_SCHEMATRON_ENABLED
|
334
|
+
"schematron",
|
335
|
+
#endif
|
336
|
+
#ifdef LIBXML_ZLIB_ENABLED
|
337
|
+
"zlib",
|
338
|
+
#endif
|
339
|
+
#ifdef LIBXML_LZMA_ENABLED
|
340
|
+
"lzma",
|
341
|
+
#endif
|
342
|
+
0
|
343
|
+
};
|
344
|
+
"""
|
345
|
+
const char* const* _LXML_LIB_FEATURES "_lxml_lib_features"
|
346
|
+
|
347
|
+
|
348
|
+
cdef set _copy_lib_features():
|
349
|
+
features = set()
|
350
|
+
feature = _LXML_LIB_FEATURES
|
351
|
+
while feature[0]:
|
352
|
+
features.add(feature[0].decode('ASCII'))
|
353
|
+
feature += 1
|
354
|
+
return features
|
355
|
+
|
356
|
+
LIBXML_COMPILED_FEATURES = _copy_lib_features()
|
357
|
+
LIBXML_FEATURES = {
|
358
|
+
feature_name for feature_id, feature_name in [
|
359
|
+
#XML_WITH_THREAD = 1
|
360
|
+
#XML_WITH_TREE = 2
|
361
|
+
#XML_WITH_OUTPUT = 3
|
362
|
+
#XML_WITH_PUSH = 4
|
363
|
+
#XML_WITH_READER = 5
|
364
|
+
#XML_WITH_PATTERN = 6
|
365
|
+
#XML_WITH_WRITER = 7
|
366
|
+
#XML_WITH_SAX1 = 8
|
367
|
+
(xmlparser.XML_WITH_FTP, "ftp"), # XML_WITH_FTP = 9
|
368
|
+
(xmlparser.XML_WITH_HTTP, "http"), # XML_WITH_HTTP = 10
|
369
|
+
#XML_WITH_VALID = 11
|
370
|
+
(xmlparser.XML_WITH_HTML, "html"), # XML_WITH_HTML = 12
|
371
|
+
#XML_WITH_LEGACY = 13
|
372
|
+
#XML_WITH_C14N = 14
|
373
|
+
(xmlparser.XML_WITH_CATALOG, "catalog"), # XML_WITH_CATALOG = 15
|
374
|
+
(xmlparser.XML_WITH_XPATH, "xpath"), # XML_WITH_XPATH = 16
|
375
|
+
#XML_WITH_XPTR = 17
|
376
|
+
#XML_WITH_XINCLUDE = 18
|
377
|
+
(xmlparser.XML_WITH_ICONV, "iconv"), # XML_WITH_ICONV = 19
|
378
|
+
#XML_WITH_ISO8859X = 20
|
379
|
+
#XML_WITH_UNICODE = 21
|
380
|
+
(xmlparser.XML_WITH_REGEXP, "regexp"), # XML_WITH_REGEXP = 22
|
381
|
+
#XML_WITH_AUTOMATA = 23
|
382
|
+
#XML_WITH_EXPR = 24
|
383
|
+
(xmlparser.XML_WITH_SCHEMAS, "xmlschema"), # XML_WITH_SCHEMAS = 25
|
384
|
+
(xmlparser.XML_WITH_SCHEMATRON, "schematron"), # XML_WITH_SCHEMATRON = 26
|
385
|
+
#XML_WITH_MODULES = 27
|
386
|
+
#XML_WITH_DEBUG = 28
|
387
|
+
#XML_WITH_DEBUG_MEM = 29
|
388
|
+
#XML_WITH_DEBUG_RUN = 30 # unused
|
389
|
+
(xmlparser.XML_WITH_ZLIB, "zlib"), # XML_WITH_ZLIB = 31
|
390
|
+
(xmlparser.XML_WITH_ICU, "icu"), # XML_WITH_ICU = 32
|
391
|
+
(xmlparser.XML_WITH_LZMA, "lzma"), # XML_WITH_LZMA = 33
|
392
|
+
] if xmlparser.xmlHasFeature(feature_id)
|
393
|
+
}
|
394
|
+
|
395
|
+
cdef bint HAS_ZLIB_COMPRESSION = xmlparser.xmlHasFeature(xmlparser.XML_WITH_ZLIB)
|
396
|
+
|
397
|
+
|
302
398
|
# class for temporary storage of Python references,
|
303
399
|
# used e.g. for XPath results
|
304
400
|
@cython.final
|
@@ -519,13 +615,15 @@ cdef public class _Document [ type LxmlDocumentType, object LxmlDocument ]:
|
|
519
615
|
c_ns = self._findOrBuildNodeNs(c_node, c_href, NULL, 0)
|
520
616
|
tree.xmlSetNs(c_node, c_ns)
|
521
617
|
|
618
|
+
|
522
619
|
cdef tuple __initPrefixCache():
|
523
620
|
cdef int i
|
524
621
|
return tuple([ python.PyBytes_FromFormat("ns%d", i)
|
525
|
-
for i in range(
|
622
|
+
for i in range(26) ])
|
526
623
|
|
527
624
|
cdef tuple _PREFIX_CACHE = __initPrefixCache()
|
528
625
|
|
626
|
+
|
529
627
|
cdef _Document _documentFactory(xmlDoc* c_doc, _BaseParser parser):
|
530
628
|
cdef _Document result
|
531
629
|
result = _Document.__new__(_Document)
|
@@ -1637,11 +1735,6 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
|
|
1637
1735
|
return CSSSelector(expr, translator=translator)(self)
|
1638
1736
|
|
1639
1737
|
|
1640
|
-
cdef extern from "includes/etree_defs.h":
|
1641
|
-
# macro call to 't->tp_new()' for fast instantiation
|
1642
|
-
cdef object NEW_ELEMENT "PY_NEW" (object t)
|
1643
|
-
|
1644
|
-
|
1645
1738
|
@cython.linetrace(False)
|
1646
1739
|
cdef _Element _elementFactory(_Document doc, xmlNode* c_node):
|
1647
1740
|
cdef _Element result
|
@@ -1651,12 +1744,15 @@ cdef _Element _elementFactory(_Document doc, xmlNode* c_node):
|
|
1651
1744
|
if c_node is NULL:
|
1652
1745
|
return None
|
1653
1746
|
|
1654
|
-
element_class = LOOKUP_ELEMENT_CLASS(
|
1747
|
+
element_class = <type> LOOKUP_ELEMENT_CLASS(
|
1655
1748
|
ELEMENT_CLASS_LOOKUP_STATE, doc, c_node)
|
1749
|
+
if type(element_class) is not type:
|
1750
|
+
if not isinstance(element_class, type):
|
1751
|
+
raise TypeError(f"Element class is not a type, got {type(element_class)}")
|
1656
1752
|
if hasProxy(c_node):
|
1657
1753
|
# prevent re-entry race condition - we just called into Python
|
1658
1754
|
return getProxy(c_node)
|
1659
|
-
result =
|
1755
|
+
result = element_class.__new__(element_class)
|
1660
1756
|
if hasProxy(c_node):
|
1661
1757
|
# prevent re-entry race condition - we just called into Python
|
1662
1758
|
result._c_node = NULL
|
@@ -3082,18 +3178,34 @@ cdef xmlNode* _createEntity(xmlDoc* c_doc, const_xmlChar* name) noexcept:
|
|
3082
3178
|
|
3083
3179
|
# module-level API for ElementTree
|
3084
3180
|
|
3085
|
-
|
3181
|
+
from abc import ABC
|
3182
|
+
|
3183
|
+
class Element(ABC):
|
3086
3184
|
"""Element(_tag, attrib=None, nsmap=None, **_extra)
|
3087
3185
|
|
3088
|
-
Element factory
|
3186
|
+
Element factory, as a class.
|
3187
|
+
|
3188
|
+
An instance of this class is an object implementing the
|
3089
3189
|
Element interface.
|
3090
3190
|
|
3191
|
+
>>> element = Element("test")
|
3192
|
+
>>> type(element)
|
3193
|
+
<class 'lxml.etree._Element'>
|
3194
|
+
>>> isinstance(element, Element)
|
3195
|
+
True
|
3196
|
+
>>> issubclass(_Element, Element)
|
3197
|
+
True
|
3198
|
+
|
3091
3199
|
Also look at the `_Element.makeelement()` and
|
3092
3200
|
`_BaseParser.makeelement()` methods, which provide a faster way to
|
3093
3201
|
create an Element within a specific document or parser context.
|
3094
3202
|
"""
|
3095
|
-
|
3096
|
-
|
3203
|
+
def __new__(cls, _tag, attrib=None, nsmap=None, **_extra):
|
3204
|
+
return _makeElement(_tag, NULL, None, None, None, None,
|
3205
|
+
attrib, nsmap, _extra)
|
3206
|
+
|
3207
|
+
# Register _Element as a virtual subclass of Element
|
3208
|
+
Element.register(_Element)
|
3097
3209
|
|
3098
3210
|
|
3099
3211
|
def Comment(text=None):
|
@@ -3205,32 +3317,41 @@ def SubElement(_Element _parent not None, _tag,
|
|
3205
3317
|
"""
|
3206
3318
|
return _makeSubElement(_parent, _tag, None, None, attrib, nsmap, _extra)
|
3207
3319
|
|
3320
|
+
from typing import Generic, TypeVar
|
3208
3321
|
|
3209
|
-
|
3210
|
-
"""ElementTree(element=None, file=None, parser=None)
|
3322
|
+
T = TypeVar("T")
|
3211
3323
|
|
3212
|
-
|
3213
|
-
|
3214
|
-
|
3215
|
-
cdef xmlNode* c_node
|
3216
|
-
cdef xmlNode* c_node_copy
|
3217
|
-
cdef xmlDoc* c_doc
|
3218
|
-
cdef _ElementTree etree
|
3219
|
-
cdef _Document doc
|
3324
|
+
class ElementTree(ABC, Generic[T]):
|
3325
|
+
def __new__(cls, _Element element=None, *, file=None, _BaseParser parser=None):
|
3326
|
+
"""ElementTree(element=None, file=None, parser=None)
|
3220
3327
|
|
3221
|
-
|
3222
|
-
|
3223
|
-
|
3224
|
-
|
3225
|
-
|
3226
|
-
|
3227
|
-
|
3228
|
-
|
3229
|
-
|
3230
|
-
|
3328
|
+
ElementTree wrapper class.
|
3329
|
+
"""
|
3330
|
+
cdef xmlNode* c_next
|
3331
|
+
cdef xmlNode* c_node
|
3332
|
+
cdef xmlNode* c_node_copy
|
3333
|
+
cdef xmlDoc* c_doc
|
3334
|
+
cdef _ElementTree etree
|
3335
|
+
cdef _Document doc
|
3336
|
+
|
3337
|
+
if element is not None:
|
3338
|
+
doc = element._doc
|
3339
|
+
elif file is not None:
|
3340
|
+
try:
|
3341
|
+
doc = _parseDocument(file, parser, None)
|
3342
|
+
except _TargetParserResult as result_container:
|
3343
|
+
return result_container.result
|
3344
|
+
else:
|
3345
|
+
c_doc = _newXMLDoc()
|
3346
|
+
doc = _documentFactory(c_doc, parser)
|
3347
|
+
|
3348
|
+
return _elementTreeFactory(doc, element)
|
3231
3349
|
|
3232
|
-
|
3350
|
+
# Register _ElementTree as a virtual subclass of ElementTree
|
3351
|
+
ElementTree.register(_ElementTree)
|
3233
3352
|
|
3353
|
+
# Remove "ABC" and typing helpers from module dict
|
3354
|
+
del ABC, Generic, TypeVar, T
|
3234
3355
|
|
3235
3356
|
def HTML(text, _BaseParser parser=None, *, base_url=None):
|
3236
3357
|
"""HTML(text, parser=None, base_url=None)
|