lxml 5.3.2__cp311-cp311-win32.whl → 6.0.0__cp311-cp311-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
lxml/etree.pyx CHANGED
@@ -19,6 +19,7 @@ __all__ = [
19
19
  'FallbackElementClassLookup', 'FunctionNamespace', 'HTML', 'HTMLParser',
20
20
  'ICONV_COMPILED_VERSION',
21
21
  'LIBXML_COMPILED_VERSION', 'LIBXML_VERSION',
22
+ 'LIBXML_FEATURES',
22
23
  'LIBXSLT_COMPILED_VERSION', 'LIBXSLT_VERSION',
23
24
  'LXML_VERSION',
24
25
  'LxmlError', 'LxmlRegistryError', 'LxmlSyntaxError',
@@ -299,6 +300,101 @@ cdef extern from *:
299
300
  ICONV_COMPILED_VERSION = __unpackIntVersion(LIBICONV_HEX_VERSION, base=0x100)[:2]
300
301
 
301
302
 
303
+ cdef extern from "libxml/xmlversion.h":
304
+ """
305
+ static const char* const _lxml_lib_features[] = {
306
+ #ifdef LIBXML_HTML_ENABLED
307
+ "html",
308
+ #endif
309
+ #ifdef LIBXML_FTP_ENABLED
310
+ "ftp",
311
+ #endif
312
+ #ifdef LIBXML_HTTP_ENABLED
313
+ "http",
314
+ #endif
315
+ #ifdef LIBXML_CATALOG_ENABLED
316
+ "catalog",
317
+ #endif
318
+ #ifdef LIBXML_XPATH_ENABLED
319
+ "xpath",
320
+ #endif
321
+ #ifdef LIBXML_ICONV_ENABLED
322
+ "iconv",
323
+ #endif
324
+ #ifdef LIBXML_ICU_ENABLED
325
+ "icu",
326
+ #endif
327
+ #ifdef LIBXML_REGEXP_ENABLED
328
+ "regexp",
329
+ #endif
330
+ #ifdef LIBXML_SCHEMAS_ENABLED
331
+ "xmlschema",
332
+ #endif
333
+ #ifdef LIBXML_SCHEMATRON_ENABLED
334
+ "schematron",
335
+ #endif
336
+ #ifdef LIBXML_ZLIB_ENABLED
337
+ "zlib",
338
+ #endif
339
+ #ifdef LIBXML_LZMA_ENABLED
340
+ "lzma",
341
+ #endif
342
+ 0
343
+ };
344
+ """
345
+ const char* const* _LXML_LIB_FEATURES "_lxml_lib_features"
346
+
347
+
348
+ cdef set _copy_lib_features():
349
+ features = set()
350
+ feature = _LXML_LIB_FEATURES
351
+ while feature[0]:
352
+ features.add(feature[0].decode('ASCII'))
353
+ feature += 1
354
+ return features
355
+
356
+ LIBXML_COMPILED_FEATURES = _copy_lib_features()
357
+ LIBXML_FEATURES = {
358
+ feature_name for feature_id, feature_name in [
359
+ #XML_WITH_THREAD = 1
360
+ #XML_WITH_TREE = 2
361
+ #XML_WITH_OUTPUT = 3
362
+ #XML_WITH_PUSH = 4
363
+ #XML_WITH_READER = 5
364
+ #XML_WITH_PATTERN = 6
365
+ #XML_WITH_WRITER = 7
366
+ #XML_WITH_SAX1 = 8
367
+ (xmlparser.XML_WITH_FTP, "ftp"), # XML_WITH_FTP = 9
368
+ (xmlparser.XML_WITH_HTTP, "http"), # XML_WITH_HTTP = 10
369
+ #XML_WITH_VALID = 11
370
+ (xmlparser.XML_WITH_HTML, "html"), # XML_WITH_HTML = 12
371
+ #XML_WITH_LEGACY = 13
372
+ #XML_WITH_C14N = 14
373
+ (xmlparser.XML_WITH_CATALOG, "catalog"), # XML_WITH_CATALOG = 15
374
+ (xmlparser.XML_WITH_XPATH, "xpath"), # XML_WITH_XPATH = 16
375
+ #XML_WITH_XPTR = 17
376
+ #XML_WITH_XINCLUDE = 18
377
+ (xmlparser.XML_WITH_ICONV, "iconv"), # XML_WITH_ICONV = 19
378
+ #XML_WITH_ISO8859X = 20
379
+ #XML_WITH_UNICODE = 21
380
+ (xmlparser.XML_WITH_REGEXP, "regexp"), # XML_WITH_REGEXP = 22
381
+ #XML_WITH_AUTOMATA = 23
382
+ #XML_WITH_EXPR = 24
383
+ (xmlparser.XML_WITH_SCHEMAS, "xmlschema"), # XML_WITH_SCHEMAS = 25
384
+ (xmlparser.XML_WITH_SCHEMATRON, "schematron"), # XML_WITH_SCHEMATRON = 26
385
+ #XML_WITH_MODULES = 27
386
+ #XML_WITH_DEBUG = 28
387
+ #XML_WITH_DEBUG_MEM = 29
388
+ #XML_WITH_DEBUG_RUN = 30 # unused
389
+ (xmlparser.XML_WITH_ZLIB, "zlib"), # XML_WITH_ZLIB = 31
390
+ (xmlparser.XML_WITH_ICU, "icu"), # XML_WITH_ICU = 32
391
+ (xmlparser.XML_WITH_LZMA, "lzma"), # XML_WITH_LZMA = 33
392
+ ] if xmlparser.xmlHasFeature(feature_id)
393
+ }
394
+
395
+ cdef bint HAS_ZLIB_COMPRESSION = xmlparser.xmlHasFeature(xmlparser.XML_WITH_ZLIB)
396
+
397
+
302
398
  # class for temporary storage of Python references,
303
399
  # used e.g. for XPath results
304
400
  @cython.final
@@ -519,13 +615,15 @@ cdef public class _Document [ type LxmlDocumentType, object LxmlDocument ]:
519
615
  c_ns = self._findOrBuildNodeNs(c_node, c_href, NULL, 0)
520
616
  tree.xmlSetNs(c_node, c_ns)
521
617
 
618
+
522
619
  cdef tuple __initPrefixCache():
523
620
  cdef int i
524
621
  return tuple([ python.PyBytes_FromFormat("ns%d", i)
525
- for i in range(30) ])
622
+ for i in range(26) ])
526
623
 
527
624
  cdef tuple _PREFIX_CACHE = __initPrefixCache()
528
625
 
626
+
529
627
  cdef _Document _documentFactory(xmlDoc* c_doc, _BaseParser parser):
530
628
  cdef _Document result
531
629
  result = _Document.__new__(_Document)
@@ -1637,11 +1735,6 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
1637
1735
  return CSSSelector(expr, translator=translator)(self)
1638
1736
 
1639
1737
 
1640
- cdef extern from "includes/etree_defs.h":
1641
- # macro call to 't->tp_new()' for fast instantiation
1642
- cdef object NEW_ELEMENT "PY_NEW" (object t)
1643
-
1644
-
1645
1738
  @cython.linetrace(False)
1646
1739
  cdef _Element _elementFactory(_Document doc, xmlNode* c_node):
1647
1740
  cdef _Element result
@@ -1651,12 +1744,15 @@ cdef _Element _elementFactory(_Document doc, xmlNode* c_node):
1651
1744
  if c_node is NULL:
1652
1745
  return None
1653
1746
 
1654
- element_class = LOOKUP_ELEMENT_CLASS(
1747
+ element_class = <type> LOOKUP_ELEMENT_CLASS(
1655
1748
  ELEMENT_CLASS_LOOKUP_STATE, doc, c_node)
1749
+ if type(element_class) is not type:
1750
+ if not isinstance(element_class, type):
1751
+ raise TypeError(f"Element class is not a type, got {type(element_class)}")
1656
1752
  if hasProxy(c_node):
1657
1753
  # prevent re-entry race condition - we just called into Python
1658
1754
  return getProxy(c_node)
1659
- result = NEW_ELEMENT(element_class)
1755
+ result = element_class.__new__(element_class)
1660
1756
  if hasProxy(c_node):
1661
1757
  # prevent re-entry race condition - we just called into Python
1662
1758
  result._c_node = NULL
@@ -3082,18 +3178,34 @@ cdef xmlNode* _createEntity(xmlDoc* c_doc, const_xmlChar* name) noexcept:
3082
3178
 
3083
3179
  # module-level API for ElementTree
3084
3180
 
3085
- def Element(_tag, attrib=None, nsmap=None, **_extra):
3181
+ from abc import ABC
3182
+
3183
+ class Element(ABC):
3086
3184
  """Element(_tag, attrib=None, nsmap=None, **_extra)
3087
3185
 
3088
- Element factory. This function returns an object implementing the
3186
+ Element factory, as a class.
3187
+
3188
+ An instance of this class is an object implementing the
3089
3189
  Element interface.
3090
3190
 
3191
+ >>> element = Element("test")
3192
+ >>> type(element)
3193
+ <class 'lxml.etree._Element'>
3194
+ >>> isinstance(element, Element)
3195
+ True
3196
+ >>> issubclass(_Element, Element)
3197
+ True
3198
+
3091
3199
  Also look at the `_Element.makeelement()` and
3092
3200
  `_BaseParser.makeelement()` methods, which provide a faster way to
3093
3201
  create an Element within a specific document or parser context.
3094
3202
  """
3095
- return _makeElement(_tag, NULL, None, None, None, None,
3096
- attrib, nsmap, _extra)
3203
+ def __new__(cls, _tag, attrib=None, nsmap=None, **_extra):
3204
+ return _makeElement(_tag, NULL, None, None, None, None,
3205
+ attrib, nsmap, _extra)
3206
+
3207
+ # Register _Element as a virtual subclass of Element
3208
+ Element.register(_Element)
3097
3209
 
3098
3210
 
3099
3211
  def Comment(text=None):
@@ -3205,32 +3317,41 @@ def SubElement(_Element _parent not None, _tag,
3205
3317
  """
3206
3318
  return _makeSubElement(_parent, _tag, None, None, attrib, nsmap, _extra)
3207
3319
 
3320
+ from typing import Generic, TypeVar
3208
3321
 
3209
- def ElementTree(_Element element=None, *, file=None, _BaseParser parser=None):
3210
- """ElementTree(element=None, file=None, parser=None)
3322
+ T = TypeVar("T")
3211
3323
 
3212
- ElementTree wrapper class.
3213
- """
3214
- cdef xmlNode* c_next
3215
- cdef xmlNode* c_node
3216
- cdef xmlNode* c_node_copy
3217
- cdef xmlDoc* c_doc
3218
- cdef _ElementTree etree
3219
- cdef _Document doc
3324
+ class ElementTree(ABC, Generic[T]):
3325
+ def __new__(cls, _Element element=None, *, file=None, _BaseParser parser=None):
3326
+ """ElementTree(element=None, file=None, parser=None)
3220
3327
 
3221
- if element is not None:
3222
- doc = element._doc
3223
- elif file is not None:
3224
- try:
3225
- doc = _parseDocument(file, parser, None)
3226
- except _TargetParserResult as result_container:
3227
- return result_container.result
3228
- else:
3229
- c_doc = _newXMLDoc()
3230
- doc = _documentFactory(c_doc, parser)
3328
+ ElementTree wrapper class.
3329
+ """
3330
+ cdef xmlNode* c_next
3331
+ cdef xmlNode* c_node
3332
+ cdef xmlNode* c_node_copy
3333
+ cdef xmlDoc* c_doc
3334
+ cdef _ElementTree etree
3335
+ cdef _Document doc
3336
+
3337
+ if element is not None:
3338
+ doc = element._doc
3339
+ elif file is not None:
3340
+ try:
3341
+ doc = _parseDocument(file, parser, None)
3342
+ except _TargetParserResult as result_container:
3343
+ return result_container.result
3344
+ else:
3345
+ c_doc = _newXMLDoc()
3346
+ doc = _documentFactory(c_doc, parser)
3347
+
3348
+ return _elementTreeFactory(doc, element)
3231
3349
 
3232
- return _elementTreeFactory(doc, element)
3350
+ # Register _ElementTree as a virtual subclass of ElementTree
3351
+ ElementTree.register(_ElementTree)
3233
3352
 
3353
+ # Remove "ABC" and typing helpers from module dict
3354
+ del ABC, Generic, TypeVar, T
3234
3355
 
3235
3356
  def HTML(text, _BaseParser parser=None, *, base_url=None):
3236
3357
  """HTML(text, parser=None, base_url=None)