lxml 5.3.2__cp39-cp39-macosx_10_9_universal2.whl → 6.0.0__cp39-cp39-macosx_10_9_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. lxml/__init__.py +1 -1
  2. lxml/_elementpath.cpython-39-darwin.so +0 -0
  3. lxml/_elementpath.py +3 -1
  4. lxml/apihelpers.pxi +25 -17
  5. lxml/builder.cpython-39-darwin.so +0 -0
  6. lxml/builder.py +11 -0
  7. lxml/debug.pxi +0 -54
  8. lxml/etree.cpython-39-darwin.so +0 -0
  9. lxml/etree.h +24 -28
  10. lxml/etree.pyx +154 -33
  11. lxml/etree_api.h +59 -50
  12. lxml/extensions.pxi +3 -6
  13. lxml/html/__init__.py +7 -3
  14. lxml/html/_difflib.cpython-39-darwin.so +0 -0
  15. lxml/html/_difflib.py +2106 -0
  16. lxml/html/builder.py +40 -0
  17. lxml/html/defs.py +3 -3
  18. lxml/html/diff.cpython-39-darwin.so +0 -0
  19. lxml/html/diff.py +406 -312
  20. lxml/includes/etree_defs.h +6 -6
  21. lxml/includes/libexslt/exsltconfig.h +3 -3
  22. lxml/includes/libxml/HTMLparser.h +41 -45
  23. lxml/includes/libxml/HTMLtree.h +1 -0
  24. lxml/includes/libxml/SAX.h +2 -186
  25. lxml/includes/libxml/SAX2.h +2 -3
  26. lxml/includes/libxml/c14n.h +1 -12
  27. lxml/includes/libxml/catalog.h +1 -0
  28. lxml/includes/libxml/debugXML.h +0 -138
  29. lxml/includes/libxml/encoding.h +131 -59
  30. lxml/includes/libxml/entities.h +12 -20
  31. lxml/includes/libxml/globals.h +0 -16
  32. lxml/includes/libxml/hash.h +19 -0
  33. lxml/includes/libxml/list.h +2 -2
  34. lxml/includes/libxml/nanoftp.h +3 -173
  35. lxml/includes/libxml/nanohttp.h +17 -0
  36. lxml/includes/libxml/parser.h +505 -256
  37. lxml/includes/libxml/parserInternals.h +26 -98
  38. lxml/includes/libxml/relaxng.h +7 -2
  39. lxml/includes/libxml/threads.h +0 -6
  40. lxml/includes/libxml/tree.h +61 -97
  41. lxml/includes/libxml/uri.h +11 -0
  42. lxml/includes/libxml/valid.h +49 -14
  43. lxml/includes/libxml/xinclude.h +12 -0
  44. lxml/includes/libxml/xlink.h +4 -0
  45. lxml/includes/libxml/xmlIO.h +33 -35
  46. lxml/includes/libxml/xmlautomata.h +19 -2
  47. lxml/includes/libxml/xmlerror.h +32 -18
  48. lxml/includes/libxml/xmlexports.h +61 -15
  49. lxml/includes/libxml/xmlmemory.h +27 -64
  50. lxml/includes/libxml/xmlmodule.h +4 -0
  51. lxml/includes/libxml/xmlreader.h +13 -3
  52. lxml/includes/libxml/xmlregexp.h +7 -106
  53. lxml/includes/libxml/xmlsave.h +15 -1
  54. lxml/includes/libxml/xmlschemas.h +10 -5
  55. lxml/includes/libxml/xmlunicode.h +3 -190
  56. lxml/includes/libxml/xmlversion.h +15 -194
  57. lxml/includes/libxml/xmlwriter.h +1 -0
  58. lxml/includes/libxml/xpath.h +9 -15
  59. lxml/includes/libxml/xpathInternals.h +9 -3
  60. lxml/includes/libxml/xpointer.h +1 -91
  61. lxml/includes/libxslt/xsltconfig.h +6 -6
  62. lxml/includes/lxml-version.h +1 -1
  63. lxml/includes/tree.pxd +10 -12
  64. lxml/includes/xmlparser.pxd +46 -8
  65. lxml/lxml.etree.h +24 -28
  66. lxml/lxml.etree_api.h +59 -50
  67. lxml/objectify.cpython-39-darwin.so +0 -0
  68. lxml/objectify.pyx +11 -7
  69. lxml/parser.pxi +106 -47
  70. lxml/sax.cpython-39-darwin.so +0 -0
  71. lxml/sax.py +11 -0
  72. lxml/saxparser.pxi +14 -14
  73. lxml/schematron.pxi +8 -3
  74. lxml/serializer.pxi +71 -3
  75. lxml/xslt.pxi +10 -3
  76. lxml-6.0.0.dist-info/METADATA +163 -0
  77. {lxml-5.3.2.dist-info → lxml-6.0.0.dist-info}/RECORD +81 -79
  78. {lxml-5.3.2.dist-info → lxml-6.0.0.dist-info}/WHEEL +2 -1
  79. {lxml-5.3.2.dist-info → lxml-6.0.0.dist-info}/licenses/LICENSE.txt +3 -1
  80. lxml-5.3.2.dist-info/METADATA +0 -100
  81. {lxml-5.3.2.dist-info → lxml-6.0.0.dist-info}/licenses/LICENSES.txt +0 -0
  82. {lxml-5.3.2.dist-info → lxml-6.0.0.dist-info}/top_level.txt +0 -0
lxml/__init__.py CHANGED
@@ -1,6 +1,6 @@
1
1
  # this is a package
2
2
 
3
- __version__ = "5.3.2"
3
+ __version__ = "6.0.0"
4
4
 
5
5
 
6
6
  def get_include():
Binary file
lxml/_elementpath.py CHANGED
@@ -1,4 +1,4 @@
1
- # cython: language_level=2
1
+ # cython: language_level=3
2
2
 
3
3
  #
4
4
  # ElementTree
@@ -85,6 +85,8 @@ def xpath_tokenizer(pattern, namespaces=None, with_prefixes=True):
85
85
  yield ttype, "{%s}%s" % (namespaces[prefix], uri)
86
86
  except KeyError:
87
87
  raise SyntaxError("prefix %r not found in prefix map" % prefix)
88
+ elif tag.isdecimal():
89
+ yield token # index
88
90
  elif default_namespace and not parsing_attribute:
89
91
  yield ttype, "{%s}%s" % (default_namespace, tag)
90
92
  else:
lxml/apihelpers.pxi CHANGED
@@ -439,7 +439,7 @@ cdef int _removeUnusedNamespaceDeclarations(xmlNode* c_element, set prefixes_to_
439
439
  c_nsdef = c_nsdef.next
440
440
  c_nsdef.next = c_nsdef.next.next
441
441
  tree.xmlFreeNs(c_ns_list[i].ns)
442
-
442
+
443
443
  if c_ns_list is not NULL:
444
444
  python.lxml_free(c_ns_list)
445
445
  return 0
@@ -685,7 +685,7 @@ cdef unicode _collectText(xmlNode* c_node):
685
685
  """Collect all text nodes and return them as a unicode string.
686
686
 
687
687
  Start collecting at c_node.
688
-
688
+
689
689
  If there was no text to collect, return None
690
690
  """
691
691
  cdef Py_ssize_t scount
@@ -845,7 +845,7 @@ cdef inline xmlNode* _findChild(xmlNode* c_node, Py_ssize_t index) noexcept:
845
845
  return _findChildBackwards(c_node, -index - 1)
846
846
  else:
847
847
  return _findChildForwards(c_node, index)
848
-
848
+
849
849
  cdef inline xmlNode* _findChildForwards(xmlNode* c_node, Py_ssize_t index) noexcept:
850
850
  """Return child element of c_node with index, or return NULL if not found.
851
851
  """
@@ -876,7 +876,7 @@ cdef inline xmlNode* _findChildBackwards(xmlNode* c_node, Py_ssize_t index) noex
876
876
  c += 1
877
877
  c_child = c_child.prev
878
878
  return NULL
879
-
879
+
880
880
  cdef inline xmlNode* _textNodeOrSkip(xmlNode* c_node) noexcept nogil:
881
881
  """Return the node if it's a text node. Skip over ignorable nodes in a
882
882
  series of text nodes. Return NULL if a non-ignorable node is found.
@@ -1031,23 +1031,31 @@ cdef Py_ssize_t _mapTagsToQnameMatchArray(xmlDoc* c_doc, list ns_tags,
1031
1031
  Note that each qname struct in the array owns its href byte string object
1032
1032
  if it is not NULL.
1033
1033
  """
1034
- cdef Py_ssize_t count = 0, i
1034
+ cdef Py_ssize_t count = 0, i, c_tag_len
1035
1035
  cdef bytes ns, tag
1036
+ cdef const_xmlChar* c_tag
1037
+
1036
1038
  for ns, tag in ns_tags:
1037
1039
  if tag is None:
1038
- c_tag = <const_xmlChar*>NULL
1039
- elif force_into_dict:
1040
- c_tag = tree.xmlDictLookup(c_doc.dict, _xcstr(tag), len(tag))
1041
- if c_tag is NULL:
1042
- # clean up before raising the error
1043
- for i in xrange(count):
1044
- cpython.ref.Py_XDECREF(c_ns_tags[i].href)
1045
- raise MemoryError()
1040
+ c_tag = <const_xmlChar*> NULL
1046
1041
  else:
1047
- c_tag = tree.xmlDictExists(c_doc.dict, _xcstr(tag), len(tag))
1048
- if c_tag is NULL:
1049
- # not in the dict => not in the document
1042
+ c_tag_len = len(tag)
1043
+ if c_tag_len > limits.INT_MAX:
1044
+ # too long, not in the dict => not in the document
1050
1045
  continue
1046
+ elif force_into_dict:
1047
+ c_tag = tree.xmlDictLookup(c_doc.dict, _xcstr(tag), <int> c_tag_len)
1048
+ if c_tag is NULL:
1049
+ # clean up before raising the error
1050
+ for i in xrange(count):
1051
+ cpython.ref.Py_XDECREF(c_ns_tags[i].href)
1052
+ raise MemoryError()
1053
+ else:
1054
+ c_tag = tree.xmlDictExists(c_doc.dict, _xcstr(tag), <int> c_tag_len)
1055
+ if c_tag is NULL:
1056
+ # not in the dict => not in the document
1057
+ continue
1058
+
1051
1059
  c_ns_tags[count].c_name = c_tag
1052
1060
  if ns is None:
1053
1061
  c_ns_tags[count].href = NULL
@@ -1095,7 +1103,7 @@ cdef int _removeSiblings(xmlNode* c_element, tree.xmlElementType node_type, bint
1095
1103
 
1096
1104
  cdef void _moveTail(xmlNode* c_tail, xmlNode* c_target) noexcept:
1097
1105
  cdef xmlNode* c_next
1098
- # tail support: look for any text nodes trailing this node and
1106
+ # tail support: look for any text nodes trailing this node and
1099
1107
  # move them too
1100
1108
  c_tail = _textNodeOrSkip(c_tail)
1101
1109
  while c_tail is not NULL:
Binary file
lxml/builder.py CHANGED
@@ -45,6 +45,13 @@ _QName = ET.QName
45
45
 
46
46
  from functools import partial
47
47
 
48
+ try:
49
+ from types import GenericAlias as _GenericAlias
50
+ except ImportError:
51
+ # Python 3.8 - we only need this as return value from "__class_getitem__"
52
+ def _GenericAlias(cls, item):
53
+ return f"{cls.__name__}[{item.__name__}]"
54
+
48
55
  try:
49
56
  basestring
50
57
  except NameError:
@@ -227,6 +234,10 @@ class ElementMaker:
227
234
  def __getattr__(self, tag):
228
235
  return partial(self, tag)
229
236
 
237
+ # Allow subscripting ElementMaker in type annotions (PEP 560)
238
+ def __class_getitem__(cls, item):
239
+ return _GenericAlias(cls, item)
240
+
230
241
 
231
242
  # create factory object
232
243
  E = ElementMaker()
lxml/debug.pxi CHANGED
@@ -32,59 +32,5 @@ cdef class _MemDebug:
32
32
  raise MemoryError()
33
33
  return tree.xmlDictSize(c_dict)
34
34
 
35
- def dump(self, output_file=None, byte_count=None):
36
- """dump(self, output_file=None, byte_count=None)
37
-
38
- Dumps the current memory blocks allocated by libxml2 to a file.
39
-
40
- The optional parameter 'output_file' specifies the file path. It defaults
41
- to the file ".memorylist" in the current directory.
42
-
43
- The optional parameter 'byte_count' limits the number of bytes in the dump.
44
- Note that this parameter is ignored when lxml is compiled against a libxml2
45
- version before 2.7.0.
46
- """
47
- cdef Py_ssize_t c_count
48
- if output_file is None:
49
- output_file = b'.memorylist'
50
- elif isinstance(output_file, unicode):
51
- output_file.encode(sys.getfilesystemencoding())
52
-
53
- f = stdio.fopen(output_file, "w")
54
- if f is NULL:
55
- raise IOError(f"Failed to create file {output_file.decode(sys.getfilesystemencoding())}")
56
- try:
57
- if byte_count is None:
58
- tree.xmlMemDisplay(f)
59
- else:
60
- c_count = byte_count
61
- tree.xmlMemDisplayLast(f, c_count)
62
- finally:
63
- stdio.fclose(f)
64
-
65
- def show(self, output_file=None, block_count=None):
66
- """show(self, output_file=None, block_count=None)
67
-
68
- Dumps the current memory blocks allocated by libxml2 to a file.
69
- The output file format is suitable for line diffing.
70
-
71
- The optional parameter 'output_file' specifies the file path. It defaults
72
- to the file ".memorydump" in the current directory.
73
-
74
- The optional parameter 'block_count' limits the number of blocks
75
- in the dump.
76
- """
77
- if output_file is None:
78
- output_file = b'.memorydump'
79
- elif isinstance(output_file, unicode):
80
- output_file.encode(sys.getfilesystemencoding())
81
-
82
- f = stdio.fopen(output_file, "w")
83
- if f is NULL:
84
- raise IOError(f"Failed to create file {output_file.decode(sys.getfilesystemencoding())}")
85
- try:
86
- tree.xmlMemShow(f, block_count if block_count is not None else tree.xmlMemBlocks())
87
- finally:
88
- stdio.fclose(f)
89
35
 
90
36
  memory_debugger = _MemDebug()
Binary file
lxml/etree.h CHANGED
@@ -1,4 +1,4 @@
1
- /* Generated by Cython 3.0.12 */
1
+ /* Generated by Cython 3.1.2 */
2
2
 
3
3
  #ifndef __PYX_HAVE__lxml__etree
4
4
  #define __PYX_HAVE__lxml__etree
@@ -13,22 +13,22 @@ struct LxmlElementBase;
13
13
  struct LxmlElementClassLookup;
14
14
  struct LxmlFallbackElementClassLookup;
15
15
 
16
- /* "lxml/etree.pyx":355
16
+ /* "lxml/etree.pyx":451
17
17
  *
18
18
  * # type of a function that steps from node to node
19
19
  * ctypedef public xmlNode* (*_node_to_node_function)(xmlNode*) # <<<<<<<<<<<<<<
20
20
  *
21
21
  *
22
- */
22
+ */
23
23
  typedef xmlNode *(*_node_to_node_function)(xmlNode *);
24
24
 
25
- /* "lxml/etree.pyx":371
26
- * @cython.final
27
- * @cython.freelist(8)
28
- * cdef public class _Document [ type LxmlDocumentType, object LxmlDocument ]: # <<<<<<<<<<<<<<
29
- * """Internal base class to reference a libxml document.
25
+ /* "lxml/etree.pyx":465
26
+ * # Public Python API
30
27
  *
31
- */
28
+ * @cython.final # <<<<<<<<<<<<<<
29
+ * @cython.freelist(8)
30
+ * cdef public class _Document [ type LxmlDocumentType, object LxmlDocument ]:
31
+ */
32
32
  struct LxmlDocument {
33
33
  PyObject_HEAD
34
34
  struct __pyx_vtabstruct_4lxml_5etree__Document *__pyx_vtab;
@@ -38,13 +38,13 @@ struct LxmlDocument {
38
38
  struct __pyx_obj_4lxml_5etree__BaseParser *_parser;
39
39
  };
40
40
 
41
- /* "lxml/etree.pyx":720
41
+ /* "lxml/etree.pyx":817
42
42
  *
43
- * @cython.no_gc_clear
44
- * cdef public class _Element [ type LxmlElementType, object LxmlElement ]: # <<<<<<<<<<<<<<
45
- * """Element class.
46
43
  *
47
- */
44
+ * @cython.no_gc_clear # <<<<<<<<<<<<<<
45
+ * cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
46
+ * """Element class.
47
+ */
48
48
  struct LxmlElement {
49
49
  PyObject_HEAD
50
50
  struct LxmlDocument *_doc;
@@ -52,13 +52,13 @@ struct LxmlElement {
52
52
  PyObject *_tag;
53
53
  };
54
54
 
55
- /* "lxml/etree.pyx":1895
55
+ /* "lxml/etree.pyx":1991
56
56
  *
57
57
  *
58
58
  * cdef public class _ElementTree [ type LxmlElementTreeType, # <<<<<<<<<<<<<<
59
59
  * object LxmlElementTree ]:
60
60
  * cdef _Document _doc
61
- */
61
+ */
62
62
  struct LxmlElementTree {
63
63
  PyObject_HEAD
64
64
  struct __pyx_vtabstruct_4lxml_5etree__ElementTree *__pyx_vtab;
@@ -66,13 +66,13 @@ struct LxmlElementTree {
66
66
  struct LxmlElement *_context_node;
67
67
  };
68
68
 
69
- /* "lxml/etree.pyx":2669
69
+ /* "lxml/etree.pyx":2765
70
70
  *
71
71
  *
72
72
  * cdef public class _ElementTagMatcher [ object LxmlElementTagMatcher, # <<<<<<<<<<<<<<
73
73
  * type LxmlElementTagMatcherType ]:
74
74
  * """
75
- */
75
+ */
76
76
  struct LxmlElementTagMatcher {
77
77
  PyObject_HEAD
78
78
  struct __pyx_vtabstruct_4lxml_5etree__ElementTagMatcher *__pyx_vtab;
@@ -82,13 +82,13 @@ struct LxmlElementTagMatcher {
82
82
  char *_name;
83
83
  };
84
84
 
85
- /* "lxml/etree.pyx":2700
85
+ /* "lxml/etree.pyx":2796
86
86
  * self._name = NULL
87
87
  *
88
88
  * cdef public class _ElementIterator(_ElementTagMatcher) [ # <<<<<<<<<<<<<<
89
89
  * object LxmlElementIterator, type LxmlElementIteratorType ]:
90
90
  * """
91
- */
91
+ */
92
92
  struct LxmlElementIterator {
93
93
  struct LxmlElementTagMatcher __pyx_base;
94
94
  struct LxmlElement *_node;
@@ -101,7 +101,7 @@ struct LxmlElementIterator {
101
101
  * cdef public class ElementBase(_Element) [ type LxmlElementBaseType, # <<<<<<<<<<<<<<
102
102
  * object LxmlElementBase ]:
103
103
  * """ElementBase(*children, attrib=None, nsmap=None, **_extra)
104
- */
104
+ */
105
105
  struct LxmlElementBase {
106
106
  struct LxmlElement __pyx_base;
107
107
  };
@@ -112,7 +112,7 @@ struct LxmlElementBase {
112
112
  * ctypedef public object (*_element_class_lookup_function)(object, _Document, xmlNode*) # <<<<<<<<<<<<<<
113
113
  *
114
114
  * # class to store element class lookup functions
115
- */
115
+ */
116
116
  typedef PyObject *(*_element_class_lookup_function)(PyObject *, struct LxmlDocument *, xmlNode *);
117
117
 
118
118
  /* "src/lxml/classlookup.pxi":213
@@ -121,7 +121,7 @@ typedef PyObject *(*_element_class_lookup_function)(PyObject *, struct LxmlDocum
121
121
  * cdef public class ElementClassLookup [ type LxmlElementClassLookupType, # <<<<<<<<<<<<<<
122
122
  * object LxmlElementClassLookup ]:
123
123
  * """ElementClassLookup(self)
124
- */
124
+ */
125
125
  struct LxmlElementClassLookup {
126
126
  PyObject_HEAD
127
127
  _element_class_lookup_function _lookup_function;
@@ -133,7 +133,7 @@ struct LxmlElementClassLookup {
133
133
  * cdef public class FallbackElementClassLookup(ElementClassLookup) \ # <<<<<<<<<<<<<<
134
134
  * [ type LxmlFallbackElementClassLookupType,
135
135
  * object LxmlFallbackElementClassLookup ]:
136
- */
136
+ */
137
137
  struct LxmlFallbackElementClassLookup {
138
138
  struct LxmlElementClassLookup __pyx_base;
139
139
  struct __pyx_vtabstruct_4lxml_5etree_FallbackElementClassLookup *__pyx_vtab;
@@ -224,9 +224,6 @@ __PYX_EXTERN_C xmlNs *findOrBuildNodeNsPrefix(struct LxmlDocument *, xmlNode *,
224
224
  /* WARNING: the interface of the module init function changed in CPython 3.5. */
225
225
  /* It now returns a PyModuleDef instance instead of a PyModule instance. */
226
226
 
227
- #if PY_MAJOR_VERSION < 3
228
- PyMODINIT_FUNC initetree(void);
229
- #else
230
227
  /* WARNING: Use PyImport_AppendInittab("etree", PyInit_etree) instead of calling PyInit_etree directly from Python 3.5 */
231
228
  PyMODINIT_FUNC PyInit_etree(void);
232
229
 
@@ -243,6 +240,5 @@ static PyObject* __PYX_WARN_IF_PyInit_etree_INIT_CALLED(PyObject* res) {
243
240
  }
244
241
  #define PyInit_etree() __PYX_WARN_IF_PyInit_etree_INIT_CALLED(PyInit_etree())
245
242
  #endif
246
- #endif
247
243
 
248
244
  #endif /* !__PYX_HAVE__lxml__etree */
lxml/etree.pyx CHANGED
@@ -19,6 +19,7 @@ __all__ = [
19
19
  'FallbackElementClassLookup', 'FunctionNamespace', 'HTML', 'HTMLParser',
20
20
  'ICONV_COMPILED_VERSION',
21
21
  'LIBXML_COMPILED_VERSION', 'LIBXML_VERSION',
22
+ 'LIBXML_FEATURES',
22
23
  'LIBXSLT_COMPILED_VERSION', 'LIBXSLT_VERSION',
23
24
  'LXML_VERSION',
24
25
  'LxmlError', 'LxmlRegistryError', 'LxmlSyntaxError',
@@ -299,6 +300,101 @@ cdef extern from *:
299
300
  ICONV_COMPILED_VERSION = __unpackIntVersion(LIBICONV_HEX_VERSION, base=0x100)[:2]
300
301
 
301
302
 
303
+ cdef extern from "libxml/xmlversion.h":
304
+ """
305
+ static const char* const _lxml_lib_features[] = {
306
+ #ifdef LIBXML_HTML_ENABLED
307
+ "html",
308
+ #endif
309
+ #ifdef LIBXML_FTP_ENABLED
310
+ "ftp",
311
+ #endif
312
+ #ifdef LIBXML_HTTP_ENABLED
313
+ "http",
314
+ #endif
315
+ #ifdef LIBXML_CATALOG_ENABLED
316
+ "catalog",
317
+ #endif
318
+ #ifdef LIBXML_XPATH_ENABLED
319
+ "xpath",
320
+ #endif
321
+ #ifdef LIBXML_ICONV_ENABLED
322
+ "iconv",
323
+ #endif
324
+ #ifdef LIBXML_ICU_ENABLED
325
+ "icu",
326
+ #endif
327
+ #ifdef LIBXML_REGEXP_ENABLED
328
+ "regexp",
329
+ #endif
330
+ #ifdef LIBXML_SCHEMAS_ENABLED
331
+ "xmlschema",
332
+ #endif
333
+ #ifdef LIBXML_SCHEMATRON_ENABLED
334
+ "schematron",
335
+ #endif
336
+ #ifdef LIBXML_ZLIB_ENABLED
337
+ "zlib",
338
+ #endif
339
+ #ifdef LIBXML_LZMA_ENABLED
340
+ "lzma",
341
+ #endif
342
+ 0
343
+ };
344
+ """
345
+ const char* const* _LXML_LIB_FEATURES "_lxml_lib_features"
346
+
347
+
348
+ cdef set _copy_lib_features():
349
+ features = set()
350
+ feature = _LXML_LIB_FEATURES
351
+ while feature[0]:
352
+ features.add(feature[0].decode('ASCII'))
353
+ feature += 1
354
+ return features
355
+
356
+ LIBXML_COMPILED_FEATURES = _copy_lib_features()
357
+ LIBXML_FEATURES = {
358
+ feature_name for feature_id, feature_name in [
359
+ #XML_WITH_THREAD = 1
360
+ #XML_WITH_TREE = 2
361
+ #XML_WITH_OUTPUT = 3
362
+ #XML_WITH_PUSH = 4
363
+ #XML_WITH_READER = 5
364
+ #XML_WITH_PATTERN = 6
365
+ #XML_WITH_WRITER = 7
366
+ #XML_WITH_SAX1 = 8
367
+ (xmlparser.XML_WITH_FTP, "ftp"), # XML_WITH_FTP = 9
368
+ (xmlparser.XML_WITH_HTTP, "http"), # XML_WITH_HTTP = 10
369
+ #XML_WITH_VALID = 11
370
+ (xmlparser.XML_WITH_HTML, "html"), # XML_WITH_HTML = 12
371
+ #XML_WITH_LEGACY = 13
372
+ #XML_WITH_C14N = 14
373
+ (xmlparser.XML_WITH_CATALOG, "catalog"), # XML_WITH_CATALOG = 15
374
+ (xmlparser.XML_WITH_XPATH, "xpath"), # XML_WITH_XPATH = 16
375
+ #XML_WITH_XPTR = 17
376
+ #XML_WITH_XINCLUDE = 18
377
+ (xmlparser.XML_WITH_ICONV, "iconv"), # XML_WITH_ICONV = 19
378
+ #XML_WITH_ISO8859X = 20
379
+ #XML_WITH_UNICODE = 21
380
+ (xmlparser.XML_WITH_REGEXP, "regexp"), # XML_WITH_REGEXP = 22
381
+ #XML_WITH_AUTOMATA = 23
382
+ #XML_WITH_EXPR = 24
383
+ (xmlparser.XML_WITH_SCHEMAS, "xmlschema"), # XML_WITH_SCHEMAS = 25
384
+ (xmlparser.XML_WITH_SCHEMATRON, "schematron"), # XML_WITH_SCHEMATRON = 26
385
+ #XML_WITH_MODULES = 27
386
+ #XML_WITH_DEBUG = 28
387
+ #XML_WITH_DEBUG_MEM = 29
388
+ #XML_WITH_DEBUG_RUN = 30 # unused
389
+ (xmlparser.XML_WITH_ZLIB, "zlib"), # XML_WITH_ZLIB = 31
390
+ (xmlparser.XML_WITH_ICU, "icu"), # XML_WITH_ICU = 32
391
+ (xmlparser.XML_WITH_LZMA, "lzma"), # XML_WITH_LZMA = 33
392
+ ] if xmlparser.xmlHasFeature(feature_id)
393
+ }
394
+
395
+ cdef bint HAS_ZLIB_COMPRESSION = xmlparser.xmlHasFeature(xmlparser.XML_WITH_ZLIB)
396
+
397
+
302
398
  # class for temporary storage of Python references,
303
399
  # used e.g. for XPath results
304
400
  @cython.final
@@ -519,13 +615,15 @@ cdef public class _Document [ type LxmlDocumentType, object LxmlDocument ]:
519
615
  c_ns = self._findOrBuildNodeNs(c_node, c_href, NULL, 0)
520
616
  tree.xmlSetNs(c_node, c_ns)
521
617
 
618
+
522
619
  cdef tuple __initPrefixCache():
523
620
  cdef int i
524
621
  return tuple([ python.PyBytes_FromFormat("ns%d", i)
525
- for i in range(30) ])
622
+ for i in range(26) ])
526
623
 
527
624
  cdef tuple _PREFIX_CACHE = __initPrefixCache()
528
625
 
626
+
529
627
  cdef _Document _documentFactory(xmlDoc* c_doc, _BaseParser parser):
530
628
  cdef _Document result
531
629
  result = _Document.__new__(_Document)
@@ -1637,11 +1735,6 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
1637
1735
  return CSSSelector(expr, translator=translator)(self)
1638
1736
 
1639
1737
 
1640
- cdef extern from "includes/etree_defs.h":
1641
- # macro call to 't->tp_new()' for fast instantiation
1642
- cdef object NEW_ELEMENT "PY_NEW" (object t)
1643
-
1644
-
1645
1738
  @cython.linetrace(False)
1646
1739
  cdef _Element _elementFactory(_Document doc, xmlNode* c_node):
1647
1740
  cdef _Element result
@@ -1651,12 +1744,15 @@ cdef _Element _elementFactory(_Document doc, xmlNode* c_node):
1651
1744
  if c_node is NULL:
1652
1745
  return None
1653
1746
 
1654
- element_class = LOOKUP_ELEMENT_CLASS(
1747
+ element_class = <type> LOOKUP_ELEMENT_CLASS(
1655
1748
  ELEMENT_CLASS_LOOKUP_STATE, doc, c_node)
1749
+ if type(element_class) is not type:
1750
+ if not isinstance(element_class, type):
1751
+ raise TypeError(f"Element class is not a type, got {type(element_class)}")
1656
1752
  if hasProxy(c_node):
1657
1753
  # prevent re-entry race condition - we just called into Python
1658
1754
  return getProxy(c_node)
1659
- result = NEW_ELEMENT(element_class)
1755
+ result = element_class.__new__(element_class)
1660
1756
  if hasProxy(c_node):
1661
1757
  # prevent re-entry race condition - we just called into Python
1662
1758
  result._c_node = NULL
@@ -3082,18 +3178,34 @@ cdef xmlNode* _createEntity(xmlDoc* c_doc, const_xmlChar* name) noexcept:
3082
3178
 
3083
3179
  # module-level API for ElementTree
3084
3180
 
3085
- def Element(_tag, attrib=None, nsmap=None, **_extra):
3181
+ from abc import ABC
3182
+
3183
+ class Element(ABC):
3086
3184
  """Element(_tag, attrib=None, nsmap=None, **_extra)
3087
3185
 
3088
- Element factory. This function returns an object implementing the
3186
+ Element factory, as a class.
3187
+
3188
+ An instance of this class is an object implementing the
3089
3189
  Element interface.
3090
3190
 
3191
+ >>> element = Element("test")
3192
+ >>> type(element)
3193
+ <class 'lxml.etree._Element'>
3194
+ >>> isinstance(element, Element)
3195
+ True
3196
+ >>> issubclass(_Element, Element)
3197
+ True
3198
+
3091
3199
  Also look at the `_Element.makeelement()` and
3092
3200
  `_BaseParser.makeelement()` methods, which provide a faster way to
3093
3201
  create an Element within a specific document or parser context.
3094
3202
  """
3095
- return _makeElement(_tag, NULL, None, None, None, None,
3096
- attrib, nsmap, _extra)
3203
+ def __new__(cls, _tag, attrib=None, nsmap=None, **_extra):
3204
+ return _makeElement(_tag, NULL, None, None, None, None,
3205
+ attrib, nsmap, _extra)
3206
+
3207
+ # Register _Element as a virtual subclass of Element
3208
+ Element.register(_Element)
3097
3209
 
3098
3210
 
3099
3211
  def Comment(text=None):
@@ -3205,32 +3317,41 @@ def SubElement(_Element _parent not None, _tag,
3205
3317
  """
3206
3318
  return _makeSubElement(_parent, _tag, None, None, attrib, nsmap, _extra)
3207
3319
 
3320
+ from typing import Generic, TypeVar
3208
3321
 
3209
- def ElementTree(_Element element=None, *, file=None, _BaseParser parser=None):
3210
- """ElementTree(element=None, file=None, parser=None)
3322
+ T = TypeVar("T")
3211
3323
 
3212
- ElementTree wrapper class.
3213
- """
3214
- cdef xmlNode* c_next
3215
- cdef xmlNode* c_node
3216
- cdef xmlNode* c_node_copy
3217
- cdef xmlDoc* c_doc
3218
- cdef _ElementTree etree
3219
- cdef _Document doc
3324
+ class ElementTree(ABC, Generic[T]):
3325
+ def __new__(cls, _Element element=None, *, file=None, _BaseParser parser=None):
3326
+ """ElementTree(element=None, file=None, parser=None)
3220
3327
 
3221
- if element is not None:
3222
- doc = element._doc
3223
- elif file is not None:
3224
- try:
3225
- doc = _parseDocument(file, parser, None)
3226
- except _TargetParserResult as result_container:
3227
- return result_container.result
3228
- else:
3229
- c_doc = _newXMLDoc()
3230
- doc = _documentFactory(c_doc, parser)
3328
+ ElementTree wrapper class.
3329
+ """
3330
+ cdef xmlNode* c_next
3331
+ cdef xmlNode* c_node
3332
+ cdef xmlNode* c_node_copy
3333
+ cdef xmlDoc* c_doc
3334
+ cdef _ElementTree etree
3335
+ cdef _Document doc
3336
+
3337
+ if element is not None:
3338
+ doc = element._doc
3339
+ elif file is not None:
3340
+ try:
3341
+ doc = _parseDocument(file, parser, None)
3342
+ except _TargetParserResult as result_container:
3343
+ return result_container.result
3344
+ else:
3345
+ c_doc = _newXMLDoc()
3346
+ doc = _documentFactory(c_doc, parser)
3347
+
3348
+ return _elementTreeFactory(doc, element)
3231
3349
 
3232
- return _elementTreeFactory(doc, element)
3350
+ # Register _ElementTree as a virtual subclass of ElementTree
3351
+ ElementTree.register(_ElementTree)
3233
3352
 
3353
+ # Remove "ABC" and typing helpers from module dict
3354
+ del ABC, Generic, TypeVar, T
3234
3355
 
3235
3356
  def HTML(text, _BaseParser parser=None, *, base_url=None):
3236
3357
  """HTML(text, parser=None, base_url=None)