lxml 5.3.2__cp313-cp313-macosx_10_13_universal2.whl → 6.0.0__cp313-cp313-macosx_10_13_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lxml/__init__.py +1 -1
- lxml/_elementpath.cpython-313-darwin.so +0 -0
- lxml/_elementpath.py +3 -1
- lxml/apihelpers.pxi +25 -17
- lxml/builder.cpython-313-darwin.so +0 -0
- lxml/builder.py +11 -0
- lxml/debug.pxi +0 -54
- lxml/etree.cpython-313-darwin.so +0 -0
- lxml/etree.h +24 -28
- lxml/etree.pyx +154 -33
- lxml/etree_api.h +59 -50
- lxml/extensions.pxi +3 -6
- lxml/html/__init__.py +7 -3
- lxml/html/_difflib.cpython-313-darwin.so +0 -0
- lxml/html/_difflib.py +2106 -0
- lxml/html/builder.py +40 -0
- lxml/html/defs.py +3 -3
- lxml/html/diff.cpython-313-darwin.so +0 -0
- lxml/html/diff.py +406 -312
- lxml/includes/etree_defs.h +6 -6
- lxml/includes/libexslt/exsltconfig.h +3 -3
- lxml/includes/libxml/HTMLparser.h +41 -45
- lxml/includes/libxml/HTMLtree.h +1 -0
- lxml/includes/libxml/SAX.h +2 -186
- lxml/includes/libxml/SAX2.h +2 -3
- lxml/includes/libxml/c14n.h +1 -12
- lxml/includes/libxml/catalog.h +1 -0
- lxml/includes/libxml/debugXML.h +0 -138
- lxml/includes/libxml/encoding.h +131 -59
- lxml/includes/libxml/entities.h +12 -20
- lxml/includes/libxml/globals.h +0 -16
- lxml/includes/libxml/hash.h +19 -0
- lxml/includes/libxml/list.h +2 -2
- lxml/includes/libxml/nanoftp.h +3 -173
- lxml/includes/libxml/nanohttp.h +17 -0
- lxml/includes/libxml/parser.h +505 -256
- lxml/includes/libxml/parserInternals.h +26 -98
- lxml/includes/libxml/relaxng.h +7 -2
- lxml/includes/libxml/threads.h +0 -6
- lxml/includes/libxml/tree.h +61 -97
- lxml/includes/libxml/uri.h +11 -0
- lxml/includes/libxml/valid.h +49 -14
- lxml/includes/libxml/xinclude.h +12 -0
- lxml/includes/libxml/xlink.h +4 -0
- lxml/includes/libxml/xmlIO.h +33 -35
- lxml/includes/libxml/xmlautomata.h +19 -2
- lxml/includes/libxml/xmlerror.h +32 -18
- lxml/includes/libxml/xmlexports.h +61 -15
- lxml/includes/libxml/xmlmemory.h +27 -64
- lxml/includes/libxml/xmlmodule.h +4 -0
- lxml/includes/libxml/xmlreader.h +13 -3
- lxml/includes/libxml/xmlregexp.h +7 -106
- lxml/includes/libxml/xmlsave.h +15 -1
- lxml/includes/libxml/xmlschemas.h +10 -5
- lxml/includes/libxml/xmlunicode.h +3 -190
- lxml/includes/libxml/xmlversion.h +15 -194
- lxml/includes/libxml/xmlwriter.h +1 -0
- lxml/includes/libxml/xpath.h +9 -15
- lxml/includes/libxml/xpathInternals.h +9 -3
- lxml/includes/libxml/xpointer.h +1 -91
- lxml/includes/libxslt/xsltconfig.h +6 -6
- lxml/includes/lxml-version.h +1 -1
- lxml/includes/tree.pxd +10 -12
- lxml/includes/xmlparser.pxd +46 -8
- lxml/lxml.etree.h +24 -28
- lxml/lxml.etree_api.h +59 -50
- lxml/objectify.cpython-313-darwin.so +0 -0
- lxml/objectify.pyx +11 -7
- lxml/parser.pxi +106 -47
- lxml/sax.cpython-313-darwin.so +0 -0
- lxml/sax.py +11 -0
- lxml/saxparser.pxi +14 -14
- lxml/schematron.pxi +8 -3
- lxml/serializer.pxi +71 -3
- lxml/xslt.pxi +10 -3
- lxml-6.0.0.dist-info/METADATA +163 -0
- {lxml-5.3.2.dist-info → lxml-6.0.0.dist-info}/RECORD +81 -79
- {lxml-5.3.2.dist-info → lxml-6.0.0.dist-info}/WHEEL +2 -1
- {lxml-5.3.2.dist-info → lxml-6.0.0.dist-info}/licenses/LICENSE.txt +3 -1
- lxml-5.3.2.dist-info/METADATA +0 -100
- {lxml-5.3.2.dist-info → lxml-6.0.0.dist-info}/licenses/LICENSES.txt +0 -0
- {lxml-5.3.2.dist-info → lxml-6.0.0.dist-info}/top_level.txt +0 -0
lxml/__init__.py
CHANGED
Binary file
|
lxml/_elementpath.py
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# cython: language_level=
|
1
|
+
# cython: language_level=3
|
2
2
|
|
3
3
|
#
|
4
4
|
# ElementTree
|
@@ -85,6 +85,8 @@ def xpath_tokenizer(pattern, namespaces=None, with_prefixes=True):
|
|
85
85
|
yield ttype, "{%s}%s" % (namespaces[prefix], uri)
|
86
86
|
except KeyError:
|
87
87
|
raise SyntaxError("prefix %r not found in prefix map" % prefix)
|
88
|
+
elif tag.isdecimal():
|
89
|
+
yield token # index
|
88
90
|
elif default_namespace and not parsing_attribute:
|
89
91
|
yield ttype, "{%s}%s" % (default_namespace, tag)
|
90
92
|
else:
|
lxml/apihelpers.pxi
CHANGED
@@ -439,7 +439,7 @@ cdef int _removeUnusedNamespaceDeclarations(xmlNode* c_element, set prefixes_to_
|
|
439
439
|
c_nsdef = c_nsdef.next
|
440
440
|
c_nsdef.next = c_nsdef.next.next
|
441
441
|
tree.xmlFreeNs(c_ns_list[i].ns)
|
442
|
-
|
442
|
+
|
443
443
|
if c_ns_list is not NULL:
|
444
444
|
python.lxml_free(c_ns_list)
|
445
445
|
return 0
|
@@ -685,7 +685,7 @@ cdef unicode _collectText(xmlNode* c_node):
|
|
685
685
|
"""Collect all text nodes and return them as a unicode string.
|
686
686
|
|
687
687
|
Start collecting at c_node.
|
688
|
-
|
688
|
+
|
689
689
|
If there was no text to collect, return None
|
690
690
|
"""
|
691
691
|
cdef Py_ssize_t scount
|
@@ -845,7 +845,7 @@ cdef inline xmlNode* _findChild(xmlNode* c_node, Py_ssize_t index) noexcept:
|
|
845
845
|
return _findChildBackwards(c_node, -index - 1)
|
846
846
|
else:
|
847
847
|
return _findChildForwards(c_node, index)
|
848
|
-
|
848
|
+
|
849
849
|
cdef inline xmlNode* _findChildForwards(xmlNode* c_node, Py_ssize_t index) noexcept:
|
850
850
|
"""Return child element of c_node with index, or return NULL if not found.
|
851
851
|
"""
|
@@ -876,7 +876,7 @@ cdef inline xmlNode* _findChildBackwards(xmlNode* c_node, Py_ssize_t index) noex
|
|
876
876
|
c += 1
|
877
877
|
c_child = c_child.prev
|
878
878
|
return NULL
|
879
|
-
|
879
|
+
|
880
880
|
cdef inline xmlNode* _textNodeOrSkip(xmlNode* c_node) noexcept nogil:
|
881
881
|
"""Return the node if it's a text node. Skip over ignorable nodes in a
|
882
882
|
series of text nodes. Return NULL if a non-ignorable node is found.
|
@@ -1031,23 +1031,31 @@ cdef Py_ssize_t _mapTagsToQnameMatchArray(xmlDoc* c_doc, list ns_tags,
|
|
1031
1031
|
Note that each qname struct in the array owns its href byte string object
|
1032
1032
|
if it is not NULL.
|
1033
1033
|
"""
|
1034
|
-
cdef Py_ssize_t count = 0, i
|
1034
|
+
cdef Py_ssize_t count = 0, i, c_tag_len
|
1035
1035
|
cdef bytes ns, tag
|
1036
|
+
cdef const_xmlChar* c_tag
|
1037
|
+
|
1036
1038
|
for ns, tag in ns_tags:
|
1037
1039
|
if tag is None:
|
1038
|
-
c_tag = <const_xmlChar*>NULL
|
1039
|
-
elif force_into_dict:
|
1040
|
-
c_tag = tree.xmlDictLookup(c_doc.dict, _xcstr(tag), len(tag))
|
1041
|
-
if c_tag is NULL:
|
1042
|
-
# clean up before raising the error
|
1043
|
-
for i in xrange(count):
|
1044
|
-
cpython.ref.Py_XDECREF(c_ns_tags[i].href)
|
1045
|
-
raise MemoryError()
|
1040
|
+
c_tag = <const_xmlChar*> NULL
|
1046
1041
|
else:
|
1047
|
-
|
1048
|
-
if
|
1049
|
-
# not in the dict => not in the document
|
1042
|
+
c_tag_len = len(tag)
|
1043
|
+
if c_tag_len > limits.INT_MAX:
|
1044
|
+
# too long, not in the dict => not in the document
|
1050
1045
|
continue
|
1046
|
+
elif force_into_dict:
|
1047
|
+
c_tag = tree.xmlDictLookup(c_doc.dict, _xcstr(tag), <int> c_tag_len)
|
1048
|
+
if c_tag is NULL:
|
1049
|
+
# clean up before raising the error
|
1050
|
+
for i in xrange(count):
|
1051
|
+
cpython.ref.Py_XDECREF(c_ns_tags[i].href)
|
1052
|
+
raise MemoryError()
|
1053
|
+
else:
|
1054
|
+
c_tag = tree.xmlDictExists(c_doc.dict, _xcstr(tag), <int> c_tag_len)
|
1055
|
+
if c_tag is NULL:
|
1056
|
+
# not in the dict => not in the document
|
1057
|
+
continue
|
1058
|
+
|
1051
1059
|
c_ns_tags[count].c_name = c_tag
|
1052
1060
|
if ns is None:
|
1053
1061
|
c_ns_tags[count].href = NULL
|
@@ -1095,7 +1103,7 @@ cdef int _removeSiblings(xmlNode* c_element, tree.xmlElementType node_type, bint
|
|
1095
1103
|
|
1096
1104
|
cdef void _moveTail(xmlNode* c_tail, xmlNode* c_target) noexcept:
|
1097
1105
|
cdef xmlNode* c_next
|
1098
|
-
# tail support: look for any text nodes trailing this node and
|
1106
|
+
# tail support: look for any text nodes trailing this node and
|
1099
1107
|
# move them too
|
1100
1108
|
c_tail = _textNodeOrSkip(c_tail)
|
1101
1109
|
while c_tail is not NULL:
|
Binary file
|
lxml/builder.py
CHANGED
@@ -45,6 +45,13 @@ _QName = ET.QName
|
|
45
45
|
|
46
46
|
from functools import partial
|
47
47
|
|
48
|
+
try:
|
49
|
+
from types import GenericAlias as _GenericAlias
|
50
|
+
except ImportError:
|
51
|
+
# Python 3.8 - we only need this as return value from "__class_getitem__"
|
52
|
+
def _GenericAlias(cls, item):
|
53
|
+
return f"{cls.__name__}[{item.__name__}]"
|
54
|
+
|
48
55
|
try:
|
49
56
|
basestring
|
50
57
|
except NameError:
|
@@ -227,6 +234,10 @@ class ElementMaker:
|
|
227
234
|
def __getattr__(self, tag):
|
228
235
|
return partial(self, tag)
|
229
236
|
|
237
|
+
# Allow subscripting ElementMaker in type annotions (PEP 560)
|
238
|
+
def __class_getitem__(cls, item):
|
239
|
+
return _GenericAlias(cls, item)
|
240
|
+
|
230
241
|
|
231
242
|
# create factory object
|
232
243
|
E = ElementMaker()
|
lxml/debug.pxi
CHANGED
@@ -32,59 +32,5 @@ cdef class _MemDebug:
|
|
32
32
|
raise MemoryError()
|
33
33
|
return tree.xmlDictSize(c_dict)
|
34
34
|
|
35
|
-
def dump(self, output_file=None, byte_count=None):
|
36
|
-
"""dump(self, output_file=None, byte_count=None)
|
37
|
-
|
38
|
-
Dumps the current memory blocks allocated by libxml2 to a file.
|
39
|
-
|
40
|
-
The optional parameter 'output_file' specifies the file path. It defaults
|
41
|
-
to the file ".memorylist" in the current directory.
|
42
|
-
|
43
|
-
The optional parameter 'byte_count' limits the number of bytes in the dump.
|
44
|
-
Note that this parameter is ignored when lxml is compiled against a libxml2
|
45
|
-
version before 2.7.0.
|
46
|
-
"""
|
47
|
-
cdef Py_ssize_t c_count
|
48
|
-
if output_file is None:
|
49
|
-
output_file = b'.memorylist'
|
50
|
-
elif isinstance(output_file, unicode):
|
51
|
-
output_file.encode(sys.getfilesystemencoding())
|
52
|
-
|
53
|
-
f = stdio.fopen(output_file, "w")
|
54
|
-
if f is NULL:
|
55
|
-
raise IOError(f"Failed to create file {output_file.decode(sys.getfilesystemencoding())}")
|
56
|
-
try:
|
57
|
-
if byte_count is None:
|
58
|
-
tree.xmlMemDisplay(f)
|
59
|
-
else:
|
60
|
-
c_count = byte_count
|
61
|
-
tree.xmlMemDisplayLast(f, c_count)
|
62
|
-
finally:
|
63
|
-
stdio.fclose(f)
|
64
|
-
|
65
|
-
def show(self, output_file=None, block_count=None):
|
66
|
-
"""show(self, output_file=None, block_count=None)
|
67
|
-
|
68
|
-
Dumps the current memory blocks allocated by libxml2 to a file.
|
69
|
-
The output file format is suitable for line diffing.
|
70
|
-
|
71
|
-
The optional parameter 'output_file' specifies the file path. It defaults
|
72
|
-
to the file ".memorydump" in the current directory.
|
73
|
-
|
74
|
-
The optional parameter 'block_count' limits the number of blocks
|
75
|
-
in the dump.
|
76
|
-
"""
|
77
|
-
if output_file is None:
|
78
|
-
output_file = b'.memorydump'
|
79
|
-
elif isinstance(output_file, unicode):
|
80
|
-
output_file.encode(sys.getfilesystemencoding())
|
81
|
-
|
82
|
-
f = stdio.fopen(output_file, "w")
|
83
|
-
if f is NULL:
|
84
|
-
raise IOError(f"Failed to create file {output_file.decode(sys.getfilesystemencoding())}")
|
85
|
-
try:
|
86
|
-
tree.xmlMemShow(f, block_count if block_count is not None else tree.xmlMemBlocks())
|
87
|
-
finally:
|
88
|
-
stdio.fclose(f)
|
89
35
|
|
90
36
|
memory_debugger = _MemDebug()
|
lxml/etree.cpython-313-darwin.so
CHANGED
Binary file
|
lxml/etree.h
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
/* Generated by Cython 3.
|
1
|
+
/* Generated by Cython 3.1.2 */
|
2
2
|
|
3
3
|
#ifndef __PYX_HAVE__lxml__etree
|
4
4
|
#define __PYX_HAVE__lxml__etree
|
@@ -13,22 +13,22 @@ struct LxmlElementBase;
|
|
13
13
|
struct LxmlElementClassLookup;
|
14
14
|
struct LxmlFallbackElementClassLookup;
|
15
15
|
|
16
|
-
/* "lxml/etree.pyx":
|
16
|
+
/* "lxml/etree.pyx":451
|
17
17
|
*
|
18
18
|
* # type of a function that steps from node to node
|
19
19
|
* ctypedef public xmlNode* (*_node_to_node_function)(xmlNode*) # <<<<<<<<<<<<<<
|
20
20
|
*
|
21
21
|
*
|
22
|
-
|
22
|
+
*/
|
23
23
|
typedef xmlNode *(*_node_to_node_function)(xmlNode *);
|
24
24
|
|
25
|
-
/* "lxml/etree.pyx":
|
26
|
-
*
|
27
|
-
* @cython.freelist(8)
|
28
|
-
* cdef public class _Document [ type LxmlDocumentType, object LxmlDocument ]: # <<<<<<<<<<<<<<
|
29
|
-
* """Internal base class to reference a libxml document.
|
25
|
+
/* "lxml/etree.pyx":465
|
26
|
+
* # Public Python API
|
30
27
|
*
|
31
|
-
|
28
|
+
* @cython.final # <<<<<<<<<<<<<<
|
29
|
+
* @cython.freelist(8)
|
30
|
+
* cdef public class _Document [ type LxmlDocumentType, object LxmlDocument ]:
|
31
|
+
*/
|
32
32
|
struct LxmlDocument {
|
33
33
|
PyObject_HEAD
|
34
34
|
struct __pyx_vtabstruct_4lxml_5etree__Document *__pyx_vtab;
|
@@ -38,13 +38,13 @@ struct LxmlDocument {
|
|
38
38
|
struct __pyx_obj_4lxml_5etree__BaseParser *_parser;
|
39
39
|
};
|
40
40
|
|
41
|
-
/* "lxml/etree.pyx":
|
41
|
+
/* "lxml/etree.pyx":817
|
42
42
|
*
|
43
|
-
* @cython.no_gc_clear
|
44
|
-
* cdef public class _Element [ type LxmlElementType, object LxmlElement ]: # <<<<<<<<<<<<<<
|
45
|
-
* """Element class.
|
46
43
|
*
|
47
|
-
|
44
|
+
* @cython.no_gc_clear # <<<<<<<<<<<<<<
|
45
|
+
* cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
|
46
|
+
* """Element class.
|
47
|
+
*/
|
48
48
|
struct LxmlElement {
|
49
49
|
PyObject_HEAD
|
50
50
|
struct LxmlDocument *_doc;
|
@@ -52,13 +52,13 @@ struct LxmlElement {
|
|
52
52
|
PyObject *_tag;
|
53
53
|
};
|
54
54
|
|
55
|
-
/* "lxml/etree.pyx":
|
55
|
+
/* "lxml/etree.pyx":1991
|
56
56
|
*
|
57
57
|
*
|
58
58
|
* cdef public class _ElementTree [ type LxmlElementTreeType, # <<<<<<<<<<<<<<
|
59
59
|
* object LxmlElementTree ]:
|
60
60
|
* cdef _Document _doc
|
61
|
-
|
61
|
+
*/
|
62
62
|
struct LxmlElementTree {
|
63
63
|
PyObject_HEAD
|
64
64
|
struct __pyx_vtabstruct_4lxml_5etree__ElementTree *__pyx_vtab;
|
@@ -66,13 +66,13 @@ struct LxmlElementTree {
|
|
66
66
|
struct LxmlElement *_context_node;
|
67
67
|
};
|
68
68
|
|
69
|
-
/* "lxml/etree.pyx":
|
69
|
+
/* "lxml/etree.pyx":2765
|
70
70
|
*
|
71
71
|
*
|
72
72
|
* cdef public class _ElementTagMatcher [ object LxmlElementTagMatcher, # <<<<<<<<<<<<<<
|
73
73
|
* type LxmlElementTagMatcherType ]:
|
74
74
|
* """
|
75
|
-
|
75
|
+
*/
|
76
76
|
struct LxmlElementTagMatcher {
|
77
77
|
PyObject_HEAD
|
78
78
|
struct __pyx_vtabstruct_4lxml_5etree__ElementTagMatcher *__pyx_vtab;
|
@@ -82,13 +82,13 @@ struct LxmlElementTagMatcher {
|
|
82
82
|
char *_name;
|
83
83
|
};
|
84
84
|
|
85
|
-
/* "lxml/etree.pyx":
|
85
|
+
/* "lxml/etree.pyx":2796
|
86
86
|
* self._name = NULL
|
87
87
|
*
|
88
88
|
* cdef public class _ElementIterator(_ElementTagMatcher) [ # <<<<<<<<<<<<<<
|
89
89
|
* object LxmlElementIterator, type LxmlElementIteratorType ]:
|
90
90
|
* """
|
91
|
-
|
91
|
+
*/
|
92
92
|
struct LxmlElementIterator {
|
93
93
|
struct LxmlElementTagMatcher __pyx_base;
|
94
94
|
struct LxmlElement *_node;
|
@@ -101,7 +101,7 @@ struct LxmlElementIterator {
|
|
101
101
|
* cdef public class ElementBase(_Element) [ type LxmlElementBaseType, # <<<<<<<<<<<<<<
|
102
102
|
* object LxmlElementBase ]:
|
103
103
|
* """ElementBase(*children, attrib=None, nsmap=None, **_extra)
|
104
|
-
|
104
|
+
*/
|
105
105
|
struct LxmlElementBase {
|
106
106
|
struct LxmlElement __pyx_base;
|
107
107
|
};
|
@@ -112,7 +112,7 @@ struct LxmlElementBase {
|
|
112
112
|
* ctypedef public object (*_element_class_lookup_function)(object, _Document, xmlNode*) # <<<<<<<<<<<<<<
|
113
113
|
*
|
114
114
|
* # class to store element class lookup functions
|
115
|
-
|
115
|
+
*/
|
116
116
|
typedef PyObject *(*_element_class_lookup_function)(PyObject *, struct LxmlDocument *, xmlNode *);
|
117
117
|
|
118
118
|
/* "src/lxml/classlookup.pxi":213
|
@@ -121,7 +121,7 @@ typedef PyObject *(*_element_class_lookup_function)(PyObject *, struct LxmlDocum
|
|
121
121
|
* cdef public class ElementClassLookup [ type LxmlElementClassLookupType, # <<<<<<<<<<<<<<
|
122
122
|
* object LxmlElementClassLookup ]:
|
123
123
|
* """ElementClassLookup(self)
|
124
|
-
|
124
|
+
*/
|
125
125
|
struct LxmlElementClassLookup {
|
126
126
|
PyObject_HEAD
|
127
127
|
_element_class_lookup_function _lookup_function;
|
@@ -133,7 +133,7 @@ struct LxmlElementClassLookup {
|
|
133
133
|
* cdef public class FallbackElementClassLookup(ElementClassLookup) \ # <<<<<<<<<<<<<<
|
134
134
|
* [ type LxmlFallbackElementClassLookupType,
|
135
135
|
* object LxmlFallbackElementClassLookup ]:
|
136
|
-
|
136
|
+
*/
|
137
137
|
struct LxmlFallbackElementClassLookup {
|
138
138
|
struct LxmlElementClassLookup __pyx_base;
|
139
139
|
struct __pyx_vtabstruct_4lxml_5etree_FallbackElementClassLookup *__pyx_vtab;
|
@@ -224,9 +224,6 @@ __PYX_EXTERN_C xmlNs *findOrBuildNodeNsPrefix(struct LxmlDocument *, xmlNode *,
|
|
224
224
|
/* WARNING: the interface of the module init function changed in CPython 3.5. */
|
225
225
|
/* It now returns a PyModuleDef instance instead of a PyModule instance. */
|
226
226
|
|
227
|
-
#if PY_MAJOR_VERSION < 3
|
228
|
-
PyMODINIT_FUNC initetree(void);
|
229
|
-
#else
|
230
227
|
/* WARNING: Use PyImport_AppendInittab("etree", PyInit_etree) instead of calling PyInit_etree directly from Python 3.5 */
|
231
228
|
PyMODINIT_FUNC PyInit_etree(void);
|
232
229
|
|
@@ -243,6 +240,5 @@ static PyObject* __PYX_WARN_IF_PyInit_etree_INIT_CALLED(PyObject* res) {
|
|
243
240
|
}
|
244
241
|
#define PyInit_etree() __PYX_WARN_IF_PyInit_etree_INIT_CALLED(PyInit_etree())
|
245
242
|
#endif
|
246
|
-
#endif
|
247
243
|
|
248
244
|
#endif /* !__PYX_HAVE__lxml__etree */
|
lxml/etree.pyx
CHANGED
@@ -19,6 +19,7 @@ __all__ = [
|
|
19
19
|
'FallbackElementClassLookup', 'FunctionNamespace', 'HTML', 'HTMLParser',
|
20
20
|
'ICONV_COMPILED_VERSION',
|
21
21
|
'LIBXML_COMPILED_VERSION', 'LIBXML_VERSION',
|
22
|
+
'LIBXML_FEATURES',
|
22
23
|
'LIBXSLT_COMPILED_VERSION', 'LIBXSLT_VERSION',
|
23
24
|
'LXML_VERSION',
|
24
25
|
'LxmlError', 'LxmlRegistryError', 'LxmlSyntaxError',
|
@@ -299,6 +300,101 @@ cdef extern from *:
|
|
299
300
|
ICONV_COMPILED_VERSION = __unpackIntVersion(LIBICONV_HEX_VERSION, base=0x100)[:2]
|
300
301
|
|
301
302
|
|
303
|
+
cdef extern from "libxml/xmlversion.h":
|
304
|
+
"""
|
305
|
+
static const char* const _lxml_lib_features[] = {
|
306
|
+
#ifdef LIBXML_HTML_ENABLED
|
307
|
+
"html",
|
308
|
+
#endif
|
309
|
+
#ifdef LIBXML_FTP_ENABLED
|
310
|
+
"ftp",
|
311
|
+
#endif
|
312
|
+
#ifdef LIBXML_HTTP_ENABLED
|
313
|
+
"http",
|
314
|
+
#endif
|
315
|
+
#ifdef LIBXML_CATALOG_ENABLED
|
316
|
+
"catalog",
|
317
|
+
#endif
|
318
|
+
#ifdef LIBXML_XPATH_ENABLED
|
319
|
+
"xpath",
|
320
|
+
#endif
|
321
|
+
#ifdef LIBXML_ICONV_ENABLED
|
322
|
+
"iconv",
|
323
|
+
#endif
|
324
|
+
#ifdef LIBXML_ICU_ENABLED
|
325
|
+
"icu",
|
326
|
+
#endif
|
327
|
+
#ifdef LIBXML_REGEXP_ENABLED
|
328
|
+
"regexp",
|
329
|
+
#endif
|
330
|
+
#ifdef LIBXML_SCHEMAS_ENABLED
|
331
|
+
"xmlschema",
|
332
|
+
#endif
|
333
|
+
#ifdef LIBXML_SCHEMATRON_ENABLED
|
334
|
+
"schematron",
|
335
|
+
#endif
|
336
|
+
#ifdef LIBXML_ZLIB_ENABLED
|
337
|
+
"zlib",
|
338
|
+
#endif
|
339
|
+
#ifdef LIBXML_LZMA_ENABLED
|
340
|
+
"lzma",
|
341
|
+
#endif
|
342
|
+
0
|
343
|
+
};
|
344
|
+
"""
|
345
|
+
const char* const* _LXML_LIB_FEATURES "_lxml_lib_features"
|
346
|
+
|
347
|
+
|
348
|
+
cdef set _copy_lib_features():
|
349
|
+
features = set()
|
350
|
+
feature = _LXML_LIB_FEATURES
|
351
|
+
while feature[0]:
|
352
|
+
features.add(feature[0].decode('ASCII'))
|
353
|
+
feature += 1
|
354
|
+
return features
|
355
|
+
|
356
|
+
LIBXML_COMPILED_FEATURES = _copy_lib_features()
|
357
|
+
LIBXML_FEATURES = {
|
358
|
+
feature_name for feature_id, feature_name in [
|
359
|
+
#XML_WITH_THREAD = 1
|
360
|
+
#XML_WITH_TREE = 2
|
361
|
+
#XML_WITH_OUTPUT = 3
|
362
|
+
#XML_WITH_PUSH = 4
|
363
|
+
#XML_WITH_READER = 5
|
364
|
+
#XML_WITH_PATTERN = 6
|
365
|
+
#XML_WITH_WRITER = 7
|
366
|
+
#XML_WITH_SAX1 = 8
|
367
|
+
(xmlparser.XML_WITH_FTP, "ftp"), # XML_WITH_FTP = 9
|
368
|
+
(xmlparser.XML_WITH_HTTP, "http"), # XML_WITH_HTTP = 10
|
369
|
+
#XML_WITH_VALID = 11
|
370
|
+
(xmlparser.XML_WITH_HTML, "html"), # XML_WITH_HTML = 12
|
371
|
+
#XML_WITH_LEGACY = 13
|
372
|
+
#XML_WITH_C14N = 14
|
373
|
+
(xmlparser.XML_WITH_CATALOG, "catalog"), # XML_WITH_CATALOG = 15
|
374
|
+
(xmlparser.XML_WITH_XPATH, "xpath"), # XML_WITH_XPATH = 16
|
375
|
+
#XML_WITH_XPTR = 17
|
376
|
+
#XML_WITH_XINCLUDE = 18
|
377
|
+
(xmlparser.XML_WITH_ICONV, "iconv"), # XML_WITH_ICONV = 19
|
378
|
+
#XML_WITH_ISO8859X = 20
|
379
|
+
#XML_WITH_UNICODE = 21
|
380
|
+
(xmlparser.XML_WITH_REGEXP, "regexp"), # XML_WITH_REGEXP = 22
|
381
|
+
#XML_WITH_AUTOMATA = 23
|
382
|
+
#XML_WITH_EXPR = 24
|
383
|
+
(xmlparser.XML_WITH_SCHEMAS, "xmlschema"), # XML_WITH_SCHEMAS = 25
|
384
|
+
(xmlparser.XML_WITH_SCHEMATRON, "schematron"), # XML_WITH_SCHEMATRON = 26
|
385
|
+
#XML_WITH_MODULES = 27
|
386
|
+
#XML_WITH_DEBUG = 28
|
387
|
+
#XML_WITH_DEBUG_MEM = 29
|
388
|
+
#XML_WITH_DEBUG_RUN = 30 # unused
|
389
|
+
(xmlparser.XML_WITH_ZLIB, "zlib"), # XML_WITH_ZLIB = 31
|
390
|
+
(xmlparser.XML_WITH_ICU, "icu"), # XML_WITH_ICU = 32
|
391
|
+
(xmlparser.XML_WITH_LZMA, "lzma"), # XML_WITH_LZMA = 33
|
392
|
+
] if xmlparser.xmlHasFeature(feature_id)
|
393
|
+
}
|
394
|
+
|
395
|
+
cdef bint HAS_ZLIB_COMPRESSION = xmlparser.xmlHasFeature(xmlparser.XML_WITH_ZLIB)
|
396
|
+
|
397
|
+
|
302
398
|
# class for temporary storage of Python references,
|
303
399
|
# used e.g. for XPath results
|
304
400
|
@cython.final
|
@@ -519,13 +615,15 @@ cdef public class _Document [ type LxmlDocumentType, object LxmlDocument ]:
|
|
519
615
|
c_ns = self._findOrBuildNodeNs(c_node, c_href, NULL, 0)
|
520
616
|
tree.xmlSetNs(c_node, c_ns)
|
521
617
|
|
618
|
+
|
522
619
|
cdef tuple __initPrefixCache():
|
523
620
|
cdef int i
|
524
621
|
return tuple([ python.PyBytes_FromFormat("ns%d", i)
|
525
|
-
for i in range(
|
622
|
+
for i in range(26) ])
|
526
623
|
|
527
624
|
cdef tuple _PREFIX_CACHE = __initPrefixCache()
|
528
625
|
|
626
|
+
|
529
627
|
cdef _Document _documentFactory(xmlDoc* c_doc, _BaseParser parser):
|
530
628
|
cdef _Document result
|
531
629
|
result = _Document.__new__(_Document)
|
@@ -1637,11 +1735,6 @@ cdef public class _Element [ type LxmlElementType, object LxmlElement ]:
|
|
1637
1735
|
return CSSSelector(expr, translator=translator)(self)
|
1638
1736
|
|
1639
1737
|
|
1640
|
-
cdef extern from "includes/etree_defs.h":
|
1641
|
-
# macro call to 't->tp_new()' for fast instantiation
|
1642
|
-
cdef object NEW_ELEMENT "PY_NEW" (object t)
|
1643
|
-
|
1644
|
-
|
1645
1738
|
@cython.linetrace(False)
|
1646
1739
|
cdef _Element _elementFactory(_Document doc, xmlNode* c_node):
|
1647
1740
|
cdef _Element result
|
@@ -1651,12 +1744,15 @@ cdef _Element _elementFactory(_Document doc, xmlNode* c_node):
|
|
1651
1744
|
if c_node is NULL:
|
1652
1745
|
return None
|
1653
1746
|
|
1654
|
-
element_class = LOOKUP_ELEMENT_CLASS(
|
1747
|
+
element_class = <type> LOOKUP_ELEMENT_CLASS(
|
1655
1748
|
ELEMENT_CLASS_LOOKUP_STATE, doc, c_node)
|
1749
|
+
if type(element_class) is not type:
|
1750
|
+
if not isinstance(element_class, type):
|
1751
|
+
raise TypeError(f"Element class is not a type, got {type(element_class)}")
|
1656
1752
|
if hasProxy(c_node):
|
1657
1753
|
# prevent re-entry race condition - we just called into Python
|
1658
1754
|
return getProxy(c_node)
|
1659
|
-
result =
|
1755
|
+
result = element_class.__new__(element_class)
|
1660
1756
|
if hasProxy(c_node):
|
1661
1757
|
# prevent re-entry race condition - we just called into Python
|
1662
1758
|
result._c_node = NULL
|
@@ -3082,18 +3178,34 @@ cdef xmlNode* _createEntity(xmlDoc* c_doc, const_xmlChar* name) noexcept:
|
|
3082
3178
|
|
3083
3179
|
# module-level API for ElementTree
|
3084
3180
|
|
3085
|
-
|
3181
|
+
from abc import ABC
|
3182
|
+
|
3183
|
+
class Element(ABC):
|
3086
3184
|
"""Element(_tag, attrib=None, nsmap=None, **_extra)
|
3087
3185
|
|
3088
|
-
Element factory
|
3186
|
+
Element factory, as a class.
|
3187
|
+
|
3188
|
+
An instance of this class is an object implementing the
|
3089
3189
|
Element interface.
|
3090
3190
|
|
3191
|
+
>>> element = Element("test")
|
3192
|
+
>>> type(element)
|
3193
|
+
<class 'lxml.etree._Element'>
|
3194
|
+
>>> isinstance(element, Element)
|
3195
|
+
True
|
3196
|
+
>>> issubclass(_Element, Element)
|
3197
|
+
True
|
3198
|
+
|
3091
3199
|
Also look at the `_Element.makeelement()` and
|
3092
3200
|
`_BaseParser.makeelement()` methods, which provide a faster way to
|
3093
3201
|
create an Element within a specific document or parser context.
|
3094
3202
|
"""
|
3095
|
-
|
3096
|
-
|
3203
|
+
def __new__(cls, _tag, attrib=None, nsmap=None, **_extra):
|
3204
|
+
return _makeElement(_tag, NULL, None, None, None, None,
|
3205
|
+
attrib, nsmap, _extra)
|
3206
|
+
|
3207
|
+
# Register _Element as a virtual subclass of Element
|
3208
|
+
Element.register(_Element)
|
3097
3209
|
|
3098
3210
|
|
3099
3211
|
def Comment(text=None):
|
@@ -3205,32 +3317,41 @@ def SubElement(_Element _parent not None, _tag,
|
|
3205
3317
|
"""
|
3206
3318
|
return _makeSubElement(_parent, _tag, None, None, attrib, nsmap, _extra)
|
3207
3319
|
|
3320
|
+
from typing import Generic, TypeVar
|
3208
3321
|
|
3209
|
-
|
3210
|
-
"""ElementTree(element=None, file=None, parser=None)
|
3322
|
+
T = TypeVar("T")
|
3211
3323
|
|
3212
|
-
|
3213
|
-
|
3214
|
-
|
3215
|
-
cdef xmlNode* c_node
|
3216
|
-
cdef xmlNode* c_node_copy
|
3217
|
-
cdef xmlDoc* c_doc
|
3218
|
-
cdef _ElementTree etree
|
3219
|
-
cdef _Document doc
|
3324
|
+
class ElementTree(ABC, Generic[T]):
|
3325
|
+
def __new__(cls, _Element element=None, *, file=None, _BaseParser parser=None):
|
3326
|
+
"""ElementTree(element=None, file=None, parser=None)
|
3220
3327
|
|
3221
|
-
|
3222
|
-
|
3223
|
-
|
3224
|
-
|
3225
|
-
|
3226
|
-
|
3227
|
-
|
3228
|
-
|
3229
|
-
|
3230
|
-
|
3328
|
+
ElementTree wrapper class.
|
3329
|
+
"""
|
3330
|
+
cdef xmlNode* c_next
|
3331
|
+
cdef xmlNode* c_node
|
3332
|
+
cdef xmlNode* c_node_copy
|
3333
|
+
cdef xmlDoc* c_doc
|
3334
|
+
cdef _ElementTree etree
|
3335
|
+
cdef _Document doc
|
3336
|
+
|
3337
|
+
if element is not None:
|
3338
|
+
doc = element._doc
|
3339
|
+
elif file is not None:
|
3340
|
+
try:
|
3341
|
+
doc = _parseDocument(file, parser, None)
|
3342
|
+
except _TargetParserResult as result_container:
|
3343
|
+
return result_container.result
|
3344
|
+
else:
|
3345
|
+
c_doc = _newXMLDoc()
|
3346
|
+
doc = _documentFactory(c_doc, parser)
|
3347
|
+
|
3348
|
+
return _elementTreeFactory(doc, element)
|
3231
3349
|
|
3232
|
-
|
3350
|
+
# Register _ElementTree as a virtual subclass of ElementTree
|
3351
|
+
ElementTree.register(_ElementTree)
|
3233
3352
|
|
3353
|
+
# Remove "ABC" and typing helpers from module dict
|
3354
|
+
del ABC, Generic, TypeVar, T
|
3234
3355
|
|
3235
3356
|
def HTML(text, _BaseParser parser=None, *, base_url=None):
|
3236
3357
|
"""HTML(text, parser=None, base_url=None)
|