lxml 5.4.0__cp310-cp310-macosx_10_9_universal2.whl → 6.0.0__cp310-cp310-macosx_10_9_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lxml/__init__.py +1 -1
- lxml/_elementpath.cpython-310-darwin.so +0 -0
- lxml/_elementpath.py +3 -1
- lxml/apihelpers.pxi +25 -17
- lxml/builder.cpython-310-darwin.so +0 -0
- lxml/builder.py +11 -0
- lxml/debug.pxi +0 -54
- lxml/etree.cpython-310-darwin.so +0 -0
- lxml/etree.h +24 -28
- lxml/etree.pyx +154 -33
- lxml/etree_api.h +59 -50
- lxml/extensions.pxi +3 -6
- lxml/html/__init__.py +7 -3
- lxml/html/_difflib.cpython-310-darwin.so +0 -0
- lxml/html/_difflib.py +2106 -0
- lxml/html/builder.py +40 -0
- lxml/html/defs.py +3 -3
- lxml/html/diff.cpython-310-darwin.so +0 -0
- lxml/html/diff.py +406 -312
- lxml/includes/etree_defs.h +6 -6
- lxml/includes/libxml/HTMLparser.h +33 -30
- lxml/includes/libxml/HTMLtree.h +1 -0
- lxml/includes/libxml/SAX.h +2 -186
- lxml/includes/libxml/SAX2.h +2 -3
- lxml/includes/libxml/catalog.h +1 -0
- lxml/includes/libxml/debugXML.h +0 -138
- lxml/includes/libxml/encoding.h +124 -61
- lxml/includes/libxml/entities.h +0 -19
- lxml/includes/libxml/globals.h +0 -16
- lxml/includes/libxml/nanoftp.h +3 -173
- lxml/includes/libxml/parser.h +474 -231
- lxml/includes/libxml/parserInternals.h +21 -101
- lxml/includes/libxml/relaxng.h +7 -2
- lxml/includes/libxml/threads.h +0 -6
- lxml/includes/libxml/tree.h +29 -85
- lxml/includes/libxml/valid.h +20 -12
- lxml/includes/libxml/xinclude.h +5 -0
- lxml/includes/libxml/xlink.h +4 -0
- lxml/includes/libxml/xmlIO.h +15 -34
- lxml/includes/libxml/xmlautomata.h +19 -2
- lxml/includes/libxml/xmlerror.h +18 -18
- lxml/includes/libxml/xmlexports.h +6 -56
- lxml/includes/libxml/xmlmemory.h +19 -19
- lxml/includes/libxml/xmlmodule.h +4 -0
- lxml/includes/libxml/xmlreader.h +11 -3
- lxml/includes/libxml/xmlregexp.h +7 -106
- lxml/includes/libxml/xmlsave.h +11 -2
- lxml/includes/libxml/xmlschemas.h +10 -5
- lxml/includes/libxml/xmlunicode.h +3 -354
- lxml/includes/libxml/xmlversion.h +19 -34
- lxml/includes/libxml/xpath.h +5 -15
- lxml/includes/libxml/xpathInternals.h +9 -3
- lxml/includes/libxml/xpointer.h +1 -91
- lxml/includes/lxml-version.h +1 -1
- lxml/includes/tree.pxd +10 -12
- lxml/includes/xmlparser.pxd +46 -8
- lxml/lxml.etree.h +24 -28
- lxml/lxml.etree_api.h +59 -50
- lxml/objectify.cpython-310-darwin.so +0 -0
- lxml/objectify.pyx +11 -7
- lxml/parser.pxi +106 -47
- lxml/sax.cpython-310-darwin.so +0 -0
- lxml/sax.py +11 -0
- lxml/saxparser.pxi +14 -14
- lxml/schematron.pxi +8 -3
- lxml/serializer.pxi +71 -3
- lxml/xslt.pxi +10 -3
- lxml-6.0.0.dist-info/METADATA +163 -0
- {lxml-5.4.0.dist-info → lxml-6.0.0.dist-info}/RECORD +73 -71
- {lxml-5.4.0.dist-info → lxml-6.0.0.dist-info}/WHEEL +2 -1
- {lxml-5.4.0.dist-info → lxml-6.0.0.dist-info}/licenses/LICENSE.txt +3 -1
- lxml-5.4.0.dist-info/METADATA +0 -96
- {lxml-5.4.0.dist-info → lxml-6.0.0.dist-info}/licenses/LICENSES.txt +0 -0
- {lxml-5.4.0.dist-info → lxml-6.0.0.dist-info}/top_level.txt +0 -0
lxml/lxml.etree_api.h
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
/* Generated by Cython 3.
|
1
|
+
/* Generated by Cython 3.1.2 */
|
2
2
|
|
3
3
|
#ifndef __PYX_HAVE_API__lxml__etree
|
4
4
|
#define __PYX_HAVE_API__lxml__etree
|
@@ -98,19 +98,26 @@ static void (*__pyx_api_f_4lxml_5etree_initTagMatch)(struct LxmlElementTagMatche
|
|
98
98
|
#define initTagMatch __pyx_api_f_4lxml_5etree_initTagMatch
|
99
99
|
static xmlNs *(*__pyx_api_f_4lxml_5etree_findOrBuildNodeNsPrefix)(struct LxmlDocument *, xmlNode *, const xmlChar *, const xmlChar *) = 0;
|
100
100
|
#define findOrBuildNodeNsPrefix __pyx_api_f_4lxml_5etree_findOrBuildNodeNsPrefix
|
101
|
-
|
102
|
-
|
103
|
-
|
101
|
+
static int __Pyx_ImportFunction_3_1_2(PyObject *module, const char *funcname, void (**f)(void), const char *sig);
|
102
|
+
|
103
|
+
#ifndef __PYX_HAVE_RT_ImportFunction_3_1_2
|
104
|
+
#define __PYX_HAVE_RT_ImportFunction_3_1_2
|
105
|
+
static int __Pyx_ImportFunction_3_1_2(PyObject *module, const char *funcname, void (**f)(void), const char *sig) {
|
104
106
|
PyObject *d = 0;
|
105
107
|
PyObject *cobj = 0;
|
106
108
|
union {
|
107
109
|
void (*fp)(void);
|
108
110
|
void *p;
|
109
111
|
} tmp;
|
110
|
-
d = PyObject_GetAttrString(module,
|
112
|
+
d = PyObject_GetAttrString(module, "__pyx_capi__");
|
111
113
|
if (!d)
|
112
114
|
goto bad;
|
115
|
+
#if (defined(Py_LIMITED_API) && Py_LIMITED_API >= 0x030d0000) || (!defined(Py_LIMITED_API) && PY_VERSION_HEX >= 0x030d0000)
|
116
|
+
PyDict_GetItemStringRef(d, funcname, &cobj);
|
117
|
+
#else
|
113
118
|
cobj = PyDict_GetItemString(d, funcname);
|
119
|
+
Py_XINCREF(cobj);
|
120
|
+
#endif
|
114
121
|
if (!cobj) {
|
115
122
|
PyErr_Format(PyExc_ImportError,
|
116
123
|
"%.200s does not export expected C function %.200s",
|
@@ -128,9 +135,11 @@ static int __Pyx_ImportFunction_3_0_12(PyObject *module, const char *funcname, v
|
|
128
135
|
if (!(*f))
|
129
136
|
goto bad;
|
130
137
|
Py_DECREF(d);
|
138
|
+
Py_DECREF(cobj);
|
131
139
|
return 0;
|
132
140
|
bad:
|
133
141
|
Py_XDECREF(d);
|
142
|
+
Py_XDECREF(cobj);
|
134
143
|
return -1;
|
135
144
|
}
|
136
145
|
#endif
|
@@ -140,51 +149,51 @@ static int import_lxml__etree(void) {
|
|
140
149
|
PyObject *module = 0;
|
141
150
|
module = PyImport_ImportModule("lxml.etree");
|
142
151
|
if (!module) goto bad;
|
143
|
-
if (
|
144
|
-
if (
|
145
|
-
if (
|
146
|
-
if (
|
147
|
-
if (
|
148
|
-
if (
|
149
|
-
if (
|
150
|
-
if (
|
151
|
-
if (
|
152
|
-
if (
|
153
|
-
if (
|
154
|
-
if (
|
155
|
-
if (
|
156
|
-
if (
|
157
|
-
if (
|
158
|
-
if (
|
159
|
-
if (
|
160
|
-
if (
|
161
|
-
if (
|
162
|
-
if (
|
163
|
-
if (
|
164
|
-
if (
|
165
|
-
if (
|
166
|
-
if (
|
167
|
-
if (
|
168
|
-
if (
|
169
|
-
if (
|
170
|
-
if (
|
171
|
-
if (
|
172
|
-
if (
|
173
|
-
if (
|
174
|
-
if (
|
175
|
-
if (
|
176
|
-
if (
|
177
|
-
if (
|
178
|
-
if (
|
179
|
-
if (
|
180
|
-
if (
|
181
|
-
if (
|
182
|
-
if (
|
183
|
-
if (
|
184
|
-
if (
|
185
|
-
if (
|
186
|
-
if (
|
187
|
-
if (
|
152
|
+
if (__Pyx_ImportFunction_3_1_2(module, "deepcopyNodeToDocument", (void (**)(void))&__pyx_api_f_4lxml_5etree_deepcopyNodeToDocument, "struct LxmlElement *(struct LxmlDocument *, xmlNode *)") < 0) goto bad;
|
153
|
+
if (__Pyx_ImportFunction_3_1_2(module, "elementTreeFactory", (void (**)(void))&__pyx_api_f_4lxml_5etree_elementTreeFactory, "struct LxmlElementTree *(struct LxmlElement *)") < 0) goto bad;
|
154
|
+
if (__Pyx_ImportFunction_3_1_2(module, "newElementTree", (void (**)(void))&__pyx_api_f_4lxml_5etree_newElementTree, "struct LxmlElementTree *(struct LxmlElement *, PyObject *)") < 0) goto bad;
|
155
|
+
if (__Pyx_ImportFunction_3_1_2(module, "adoptExternalDocument", (void (**)(void))&__pyx_api_f_4lxml_5etree_adoptExternalDocument, "struct LxmlElementTree *(xmlDoc *, PyObject *, int)") < 0) goto bad;
|
156
|
+
if (__Pyx_ImportFunction_3_1_2(module, "elementFactory", (void (**)(void))&__pyx_api_f_4lxml_5etree_elementFactory, "struct LxmlElement *(struct LxmlDocument *, xmlNode *)") < 0) goto bad;
|
157
|
+
if (__Pyx_ImportFunction_3_1_2(module, "makeElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_makeElement, "struct LxmlElement *(PyObject *, struct LxmlDocument *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *)") < 0) goto bad;
|
158
|
+
if (__Pyx_ImportFunction_3_1_2(module, "makeSubElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_makeSubElement, "struct LxmlElement *(struct LxmlElement *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *)") < 0) goto bad;
|
159
|
+
if (__Pyx_ImportFunction_3_1_2(module, "setElementClassLookupFunction", (void (**)(void))&__pyx_api_f_4lxml_5etree_setElementClassLookupFunction, "void (_element_class_lookup_function, PyObject *)") < 0) goto bad;
|
160
|
+
if (__Pyx_ImportFunction_3_1_2(module, "lookupDefaultElementClass", (void (**)(void))&__pyx_api_f_4lxml_5etree_lookupDefaultElementClass, "PyObject *(PyObject *, PyObject *, xmlNode *)") < 0) goto bad;
|
161
|
+
if (__Pyx_ImportFunction_3_1_2(module, "lookupNamespaceElementClass", (void (**)(void))&__pyx_api_f_4lxml_5etree_lookupNamespaceElementClass, "PyObject *(PyObject *, PyObject *, xmlNode *)") < 0) goto bad;
|
162
|
+
if (__Pyx_ImportFunction_3_1_2(module, "callLookupFallback", (void (**)(void))&__pyx_api_f_4lxml_5etree_callLookupFallback, "PyObject *(struct LxmlFallbackElementClassLookup *, struct LxmlDocument *, xmlNode *)") < 0) goto bad;
|
163
|
+
if (__Pyx_ImportFunction_3_1_2(module, "tagMatches", (void (**)(void))&__pyx_api_f_4lxml_5etree_tagMatches, "int (xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad;
|
164
|
+
if (__Pyx_ImportFunction_3_1_2(module, "documentOrRaise", (void (**)(void))&__pyx_api_f_4lxml_5etree_documentOrRaise, "struct LxmlDocument *(PyObject *)") < 0) goto bad;
|
165
|
+
if (__Pyx_ImportFunction_3_1_2(module, "rootNodeOrRaise", (void (**)(void))&__pyx_api_f_4lxml_5etree_rootNodeOrRaise, "struct LxmlElement *(PyObject *)") < 0) goto bad;
|
166
|
+
if (__Pyx_ImportFunction_3_1_2(module, "hasText", (void (**)(void))&__pyx_api_f_4lxml_5etree_hasText, "int (xmlNode *)") < 0) goto bad;
|
167
|
+
if (__Pyx_ImportFunction_3_1_2(module, "hasTail", (void (**)(void))&__pyx_api_f_4lxml_5etree_hasTail, "int (xmlNode *)") < 0) goto bad;
|
168
|
+
if (__Pyx_ImportFunction_3_1_2(module, "textOf", (void (**)(void))&__pyx_api_f_4lxml_5etree_textOf, "PyObject *(xmlNode *)") < 0) goto bad;
|
169
|
+
if (__Pyx_ImportFunction_3_1_2(module, "tailOf", (void (**)(void))&__pyx_api_f_4lxml_5etree_tailOf, "PyObject *(xmlNode *)") < 0) goto bad;
|
170
|
+
if (__Pyx_ImportFunction_3_1_2(module, "setNodeText", (void (**)(void))&__pyx_api_f_4lxml_5etree_setNodeText, "int (xmlNode *, PyObject *)") < 0) goto bad;
|
171
|
+
if (__Pyx_ImportFunction_3_1_2(module, "setTailText", (void (**)(void))&__pyx_api_f_4lxml_5etree_setTailText, "int (xmlNode *, PyObject *)") < 0) goto bad;
|
172
|
+
if (__Pyx_ImportFunction_3_1_2(module, "attributeValue", (void (**)(void))&__pyx_api_f_4lxml_5etree_attributeValue, "PyObject *(xmlNode *, xmlAttr *)") < 0) goto bad;
|
173
|
+
if (__Pyx_ImportFunction_3_1_2(module, "attributeValueFromNsName", (void (**)(void))&__pyx_api_f_4lxml_5etree_attributeValueFromNsName, "PyObject *(xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad;
|
174
|
+
if (__Pyx_ImportFunction_3_1_2(module, "getAttributeValue", (void (**)(void))&__pyx_api_f_4lxml_5etree_getAttributeValue, "PyObject *(struct LxmlElement *, PyObject *, PyObject *)") < 0) goto bad;
|
175
|
+
if (__Pyx_ImportFunction_3_1_2(module, "iterattributes", (void (**)(void))&__pyx_api_f_4lxml_5etree_iterattributes, "PyObject *(struct LxmlElement *, int)") < 0) goto bad;
|
176
|
+
if (__Pyx_ImportFunction_3_1_2(module, "collectAttributes", (void (**)(void))&__pyx_api_f_4lxml_5etree_collectAttributes, "PyObject *(xmlNode *, int)") < 0) goto bad;
|
177
|
+
if (__Pyx_ImportFunction_3_1_2(module, "setAttributeValue", (void (**)(void))&__pyx_api_f_4lxml_5etree_setAttributeValue, "int (struct LxmlElement *, PyObject *, PyObject *)") < 0) goto bad;
|
178
|
+
if (__Pyx_ImportFunction_3_1_2(module, "delAttribute", (void (**)(void))&__pyx_api_f_4lxml_5etree_delAttribute, "int (struct LxmlElement *, PyObject *)") < 0) goto bad;
|
179
|
+
if (__Pyx_ImportFunction_3_1_2(module, "delAttributeFromNsName", (void (**)(void))&__pyx_api_f_4lxml_5etree_delAttributeFromNsName, "int (xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad;
|
180
|
+
if (__Pyx_ImportFunction_3_1_2(module, "hasChild", (void (**)(void))&__pyx_api_f_4lxml_5etree_hasChild, "int (xmlNode *)") < 0) goto bad;
|
181
|
+
if (__Pyx_ImportFunction_3_1_2(module, "findChild", (void (**)(void))&__pyx_api_f_4lxml_5etree_findChild, "xmlNode *(xmlNode *, Py_ssize_t)") < 0) goto bad;
|
182
|
+
if (__Pyx_ImportFunction_3_1_2(module, "findChildForwards", (void (**)(void))&__pyx_api_f_4lxml_5etree_findChildForwards, "xmlNode *(xmlNode *, Py_ssize_t)") < 0) goto bad;
|
183
|
+
if (__Pyx_ImportFunction_3_1_2(module, "findChildBackwards", (void (**)(void))&__pyx_api_f_4lxml_5etree_findChildBackwards, "xmlNode *(xmlNode *, Py_ssize_t)") < 0) goto bad;
|
184
|
+
if (__Pyx_ImportFunction_3_1_2(module, "nextElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_nextElement, "xmlNode *(xmlNode *)") < 0) goto bad;
|
185
|
+
if (__Pyx_ImportFunction_3_1_2(module, "previousElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_previousElement, "xmlNode *(xmlNode *)") < 0) goto bad;
|
186
|
+
if (__Pyx_ImportFunction_3_1_2(module, "appendChild", (void (**)(void))&__pyx_api_f_4lxml_5etree_appendChild, "void (struct LxmlElement *, struct LxmlElement *)") < 0) goto bad;
|
187
|
+
if (__Pyx_ImportFunction_3_1_2(module, "appendChildToElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_appendChildToElement, "int (struct LxmlElement *, struct LxmlElement *)") < 0) goto bad;
|
188
|
+
if (__Pyx_ImportFunction_3_1_2(module, "pyunicode", (void (**)(void))&__pyx_api_f_4lxml_5etree_pyunicode, "PyObject *(const xmlChar *)") < 0) goto bad;
|
189
|
+
if (__Pyx_ImportFunction_3_1_2(module, "utf8", (void (**)(void))&__pyx_api_f_4lxml_5etree_utf8, "PyObject *(PyObject *)") < 0) goto bad;
|
190
|
+
if (__Pyx_ImportFunction_3_1_2(module, "getNsTag", (void (**)(void))&__pyx_api_f_4lxml_5etree_getNsTag, "PyObject *(PyObject *)") < 0) goto bad;
|
191
|
+
if (__Pyx_ImportFunction_3_1_2(module, "getNsTagWithEmptyNs", (void (**)(void))&__pyx_api_f_4lxml_5etree_getNsTagWithEmptyNs, "PyObject *(PyObject *)") < 0) goto bad;
|
192
|
+
if (__Pyx_ImportFunction_3_1_2(module, "namespacedName", (void (**)(void))&__pyx_api_f_4lxml_5etree_namespacedName, "PyObject *(xmlNode *)") < 0) goto bad;
|
193
|
+
if (__Pyx_ImportFunction_3_1_2(module, "namespacedNameFromNsName", (void (**)(void))&__pyx_api_f_4lxml_5etree_namespacedNameFromNsName, "PyObject *(const xmlChar *, const xmlChar *)") < 0) goto bad;
|
194
|
+
if (__Pyx_ImportFunction_3_1_2(module, "iteratorStoreNext", (void (**)(void))&__pyx_api_f_4lxml_5etree_iteratorStoreNext, "void (struct LxmlElementIterator *, struct LxmlElement *)") < 0) goto bad;
|
195
|
+
if (__Pyx_ImportFunction_3_1_2(module, "initTagMatch", (void (**)(void))&__pyx_api_f_4lxml_5etree_initTagMatch, "void (struct LxmlElementTagMatcher *, PyObject *)") < 0) goto bad;
|
196
|
+
if (__Pyx_ImportFunction_3_1_2(module, "findOrBuildNodeNsPrefix", (void (**)(void))&__pyx_api_f_4lxml_5etree_findOrBuildNodeNsPrefix, "xmlNs *(struct LxmlDocument *, xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad;
|
188
197
|
Py_DECREF(module); module = 0;
|
189
198
|
return 0;
|
190
199
|
bad:
|
Binary file
|
lxml/objectify.pyx
CHANGED
@@ -18,6 +18,7 @@ from lxml.includes cimport tree
|
|
18
18
|
cimport lxml.includes.etreepublic as cetree
|
19
19
|
cimport libc.string as cstring_h # not to be confused with stdlib 'string'
|
20
20
|
from libc.string cimport const_char
|
21
|
+
from libc cimport limits
|
21
22
|
|
22
23
|
__all__ = ['BoolElement', 'DataElement', 'E', 'Element', 'ElementMaker',
|
23
24
|
'FloatElement', 'IntElement', 'NoneElement',
|
@@ -420,8 +421,11 @@ cdef object _lookupChild(_Element parent, tag):
|
|
420
421
|
cdef tree.xmlNode* c_node
|
421
422
|
c_node = parent._c_node
|
422
423
|
ns, tag = cetree.getNsTagWithEmptyNs(tag)
|
424
|
+
c_tag_len = len(<bytes> tag)
|
425
|
+
if c_tag_len > limits.INT_MAX:
|
426
|
+
return None
|
423
427
|
c_tag = tree.xmlDictExists(
|
424
|
-
c_node.doc.dict, _xcstr(tag),
|
428
|
+
c_node.doc.dict, _xcstr(tag), <int> c_tag_len)
|
425
429
|
if c_tag is NULL:
|
426
430
|
return None # not in the hash map => not in the tree
|
427
431
|
if ns is None:
|
@@ -1283,7 +1287,7 @@ cdef object _guessElementClass(tree.xmlNode* c_node):
|
|
1283
1287
|
return None
|
1284
1288
|
if value == '':
|
1285
1289
|
return StringElement
|
1286
|
-
|
1290
|
+
|
1287
1291
|
for type_check, pytype in _TYPE_CHECKS:
|
1288
1292
|
try:
|
1289
1293
|
type_check(value)
|
@@ -1689,8 +1693,8 @@ def annotate(element_or_tree, *, ignore_old=True, ignore_xsi=False,
|
|
1689
1693
|
|
1690
1694
|
If the 'ignore_xsi' keyword argument is False (the default), existing
|
1691
1695
|
'xsi:type' attributes will be used for the type annotation, if they fit the
|
1692
|
-
element text values.
|
1693
|
-
|
1696
|
+
element text values.
|
1697
|
+
|
1694
1698
|
Note that the mapping from Python types to XSI types is usually ambiguous.
|
1695
1699
|
Currently, only the first XSI type name in the corresponding PyType
|
1696
1700
|
definition will be used for annotation. Thus, you should consider naming
|
@@ -1705,7 +1709,7 @@ def annotate(element_or_tree, *, ignore_old=True, ignore_xsi=False,
|
|
1705
1709
|
elements. Pass 'string', for example, to make string values the default.
|
1706
1710
|
|
1707
1711
|
The keyword arguments 'annotate_xsi' (default: 0) and 'annotate_pytype'
|
1708
|
-
(default: 1) control which kind(s) of annotation to use.
|
1712
|
+
(default: 1) control which kind(s) of annotation to use.
|
1709
1713
|
"""
|
1710
1714
|
cdef _Element element
|
1711
1715
|
element = cetree.rootNodeOrRaise(element_or_tree)
|
@@ -1878,7 +1882,7 @@ def deannotate(element_or_tree, *, bint pytype=True, bint xsi=True,
|
|
1878
1882
|
and/or 'xsi:type' attributes and/or 'xsi:nil' attributes.
|
1879
1883
|
|
1880
1884
|
If the 'pytype' keyword argument is True (the default), 'py:pytype'
|
1881
|
-
attributes will be removed. If the 'xsi' keyword argument is True (the
|
1885
|
+
attributes will be removed. If the 'xsi' keyword argument is True (the
|
1882
1886
|
default), 'xsi:type' attributes will be removed.
|
1883
1887
|
If the 'xsi_nil' keyword argument is True (default: False), 'xsi:nil'
|
1884
1888
|
attributes will be removed.
|
@@ -2124,7 +2128,7 @@ def DataElement(_value, attrib=None, nsmap=None, *, _pytype=None, _xsi=None,
|
|
2124
2128
|
stringify = unicode if py_type is None else py_type.stringify
|
2125
2129
|
strval = stringify(_value)
|
2126
2130
|
|
2127
|
-
if _pytype is not None:
|
2131
|
+
if _pytype is not None:
|
2128
2132
|
if _pytype == "NoneType" or _pytype == "none":
|
2129
2133
|
strval = None
|
2130
2134
|
_attributes[XML_SCHEMA_INSTANCE_NIL_ATTR] = "true"
|
lxml/parser.pxi
CHANGED
@@ -3,6 +3,14 @@
|
|
3
3
|
from lxml.includes cimport xmlparser
|
4
4
|
from lxml.includes cimport htmlparser
|
5
5
|
|
6
|
+
cdef object _GenericAlias
|
7
|
+
try:
|
8
|
+
from types import GenericAlias as _GenericAlias
|
9
|
+
except ImportError:
|
10
|
+
# Python 3.8 - we only need this as return value from "__class_getitem__"
|
11
|
+
def _GenericAlias(cls, item):
|
12
|
+
return f"{cls.__name__}[{item.__name__}]"
|
13
|
+
|
6
14
|
|
7
15
|
class ParseError(LxmlSyntaxError):
|
8
16
|
"""Syntax error while parsing an XML document.
|
@@ -53,7 +61,6 @@ cdef class _ParserDictionaryContext:
|
|
53
61
|
cdef list _implied_parser_contexts
|
54
62
|
|
55
63
|
def __cinit__(self):
|
56
|
-
self._c_dict = NULL
|
57
64
|
self._implied_parser_contexts = []
|
58
65
|
|
59
66
|
def __dealloc__(self):
|
@@ -295,9 +302,7 @@ cdef class _FileReaderContext:
|
|
295
302
|
self._filelike = filelike
|
296
303
|
self._close_file_after_read = close_file
|
297
304
|
self._encoding = encoding
|
298
|
-
if url is None:
|
299
|
-
self._c_url = NULL
|
300
|
-
else:
|
305
|
+
if url is not None:
|
301
306
|
url = _encodeFilename(url)
|
302
307
|
self._c_url = _cstr(url)
|
303
308
|
self._url = url
|
@@ -419,8 +424,6 @@ cdef class _FileReaderContext:
|
|
419
424
|
cdef int _readFilelikeParser(void* ctxt, char* c_buffer, int c_size) noexcept with gil:
|
420
425
|
return (<_FileReaderContext>ctxt).copyToBuffer(c_buffer, c_size)
|
421
426
|
|
422
|
-
cdef int _readFileParser(void* ctxt, char* c_buffer, int c_size) noexcept nogil:
|
423
|
-
return stdio.fread(c_buffer, 1, c_size, <stdio.FILE*>ctxt)
|
424
427
|
|
425
428
|
############################################################
|
426
429
|
## support for custom document loaders
|
@@ -542,11 +545,8 @@ cdef class _ParserContext(_ResolverContext):
|
|
542
545
|
cdef bint _collect_ids
|
543
546
|
|
544
547
|
def __cinit__(self):
|
545
|
-
self._c_ctxt = NULL
|
546
548
|
self._collect_ids = True
|
547
|
-
if
|
548
|
-
self._lock = NULL
|
549
|
-
else:
|
549
|
+
if config.ENABLE_THREADING:
|
550
550
|
self._lock = python.PyThread_allocate_lock()
|
551
551
|
self._error_log = _ErrorLog()
|
552
552
|
|
@@ -573,6 +573,9 @@ cdef class _ParserContext(_ResolverContext):
|
|
573
573
|
return context
|
574
574
|
|
575
575
|
cdef void _initParserContext(self, xmlparser.xmlParserCtxt* c_ctxt) noexcept:
|
576
|
+
"""
|
577
|
+
Connects the libxml2-level context to the lxml-level parser context.
|
578
|
+
"""
|
576
579
|
self._c_ctxt = c_ctxt
|
577
580
|
c_ctxt._private = <void*>self
|
578
581
|
|
@@ -597,6 +600,12 @@ cdef class _ParserContext(_ResolverContext):
|
|
597
600
|
raise ParserError, "parser locking failed"
|
598
601
|
self._error_log.clear()
|
599
602
|
self._doc = None
|
603
|
+
# Connect the lxml error log with libxml2's error handling. In the case of parsing
|
604
|
+
# HTML, ctxt->sax is not set to null, so this always works. The libxml2 function
|
605
|
+
# that does this is htmlInitParserCtxt in HTMLparser.c. For HTML (and possibly XML
|
606
|
+
# too), libxml2's SAX's serror is set to be the place where errors are sent when
|
607
|
+
# schannel is set to ctxt->sax->serror in xmlCtxtErrMemory in libxml2's
|
608
|
+
# parserInternals.c.
|
600
609
|
# Need a cast here because older libxml2 releases do not use 'const' in the functype.
|
601
610
|
self._c_ctxt.sax.serror = <xmlerror.xmlStructuredErrorFunc> _receiveParserError
|
602
611
|
self._orig_loader = _register_document_loader() if set_document_loader else NULL
|
@@ -642,6 +651,9 @@ cdef _initParserContext(_ParserContext context,
|
|
642
651
|
context._initParserContext(c_ctxt)
|
643
652
|
|
644
653
|
cdef void _forwardParserError(xmlparser.xmlParserCtxt* _parser_context, const xmlerror.xmlError* error) noexcept with gil:
|
654
|
+
"""
|
655
|
+
Add an error created by libxml2 to the lxml-level error_log.
|
656
|
+
"""
|
645
657
|
(<_ParserContext>_parser_context._private)._error_log._receive(error)
|
646
658
|
|
647
659
|
cdef void _receiveParserError(void* c_context, const xmlerror.xmlError* error) noexcept nogil:
|
@@ -687,6 +699,8 @@ cdef xmlDoc* _handleParseResult(_ParserContext context,
|
|
687
699
|
xmlparser.xmlParserCtxt* c_ctxt,
|
688
700
|
xmlDoc* result, filename,
|
689
701
|
bint recover, bint free_doc) except NULL:
|
702
|
+
# The C-level argument xmlDoc* result is passed in as NULL if the parser was not able
|
703
|
+
# to parse the document.
|
690
704
|
cdef bint well_formed
|
691
705
|
if result is not NULL:
|
692
706
|
__GLOBAL_PARSER_CONTEXT.initDocDict(result)
|
@@ -698,6 +712,9 @@ cdef xmlDoc* _handleParseResult(_ParserContext context,
|
|
698
712
|
c_ctxt.myDoc = NULL
|
699
713
|
|
700
714
|
if result is not NULL:
|
715
|
+
# "wellFormed" in libxml2 is 0 if the parser found fatal errors. It still returns a
|
716
|
+
# parse result document if 'recover=True'. Here, we determine if we can present
|
717
|
+
# the document to the user or consider it incorrect or broken enough to raise an error.
|
701
718
|
if (context._validator is not None and
|
702
719
|
not context._validator.isvalid()):
|
703
720
|
well_formed = 0 # actually not 'valid', but anyway ...
|
@@ -901,6 +918,9 @@ cdef class _BaseParser:
|
|
901
918
|
return self._push_parser_context
|
902
919
|
|
903
920
|
cdef _ParserContext _createContext(self, target, events_to_collect):
|
921
|
+
"""
|
922
|
+
This method creates and configures the lxml-level parser.
|
923
|
+
"""
|
904
924
|
cdef _SaxParserContext sax_context
|
905
925
|
if target is not None:
|
906
926
|
sax_context = _TargetParserContext(self)
|
@@ -947,6 +967,9 @@ cdef class _BaseParser:
|
|
947
967
|
return 0
|
948
968
|
|
949
969
|
cdef xmlparser.xmlParserCtxt* _newParserCtxt(self) except NULL:
|
970
|
+
"""
|
971
|
+
Create and initialise a libxml2-level parser context.
|
972
|
+
"""
|
950
973
|
cdef xmlparser.xmlParserCtxt* c_ctxt
|
951
974
|
if self._for_html:
|
952
975
|
c_ctxt = htmlparser.htmlCreateMemoryParserCtxt('dummy', 5)
|
@@ -1106,8 +1129,7 @@ cdef class _BaseParser:
|
|
1106
1129
|
finally:
|
1107
1130
|
context.cleanup()
|
1108
1131
|
|
1109
|
-
cdef xmlDoc* _parseDoc(self, char* c_text, int c_len,
|
1110
|
-
char* c_filename) except NULL:
|
1132
|
+
cdef xmlDoc* _parseDoc(self, const char* c_text, int c_len, char* c_filename) except NULL:
|
1111
1133
|
"""Parse document, share dictionary if possible.
|
1112
1134
|
"""
|
1113
1135
|
cdef _ParserContext context
|
@@ -1440,7 +1462,7 @@ cdef class _FeedParser(_BaseParser):
|
|
1440
1462
|
else:
|
1441
1463
|
error = 0
|
1442
1464
|
|
1443
|
-
if not pctxt.wellFormed and pctxt
|
1465
|
+
if not pctxt.wellFormed and xmlparser.xmlCtxtIsStopped(pctxt) and context._has_raised():
|
1444
1466
|
# propagate Python exceptions immediately
|
1445
1467
|
recover = 0
|
1446
1468
|
error = 1
|
@@ -1477,7 +1499,7 @@ cdef class _FeedParser(_BaseParser):
|
|
1477
1499
|
else:
|
1478
1500
|
xmlparser.xmlParseChunk(pctxt, NULL, 0, 1)
|
1479
1501
|
|
1480
|
-
if (pctxt.recovery and not pctxt
|
1502
|
+
if (pctxt.recovery and not xmlparser.xmlCtxtIsStopped(pctxt) and
|
1481
1503
|
isinstance(context, _SaxParserContext)):
|
1482
1504
|
# apply any left-over 'end' events
|
1483
1505
|
(<_SaxParserContext>context).flushEvents()
|
@@ -1529,7 +1551,8 @@ cdef int _htmlCtxtResetPush(xmlparser.xmlParserCtxt* c_ctxt,
|
|
1529
1551
|
return error
|
1530
1552
|
|
1531
1553
|
# fix libxml2 setup for HTML
|
1532
|
-
|
1554
|
+
if tree.LIBXML_VERSION < 21400:
|
1555
|
+
c_ctxt.progressive = 1 # TODO: remove
|
1533
1556
|
c_ctxt.html = 1
|
1534
1557
|
htmlparser.htmlCtxtUseOptions(c_ctxt, parse_options)
|
1535
1558
|
|
@@ -1547,10 +1570,15 @@ _XML_DEFAULT_PARSE_OPTIONS = (
|
|
1547
1570
|
xmlparser.XML_PARSE_NONET |
|
1548
1571
|
xmlparser.XML_PARSE_COMPACT |
|
1549
1572
|
xmlparser.XML_PARSE_BIG_LINES
|
1550
|
-
|
1573
|
+
)
|
1551
1574
|
|
1552
1575
|
cdef class XMLParser(_FeedParser):
|
1553
|
-
"""XMLParser(self, encoding=None, attribute_defaults=False, dtd_validation=False,
|
1576
|
+
"""XMLParser(self, encoding=None, attribute_defaults=False, dtd_validation=False, \
|
1577
|
+
load_dtd=False, no_network=True, decompress=False, ns_clean=False, \
|
1578
|
+
recover=False, schema: XMLSchema =None, huge_tree=False, \
|
1579
|
+
remove_blank_text=False, resolve_entities=True, \
|
1580
|
+
remove_comments=False, remove_pis=False, strip_cdata=True, \
|
1581
|
+
collect_ids=True, target=None, compact=True)
|
1554
1582
|
|
1555
1583
|
The XML parser.
|
1556
1584
|
|
@@ -1572,6 +1600,8 @@ cdef class XMLParser(_FeedParser):
|
|
1572
1600
|
- dtd_validation - validate against a DTD referenced by the document
|
1573
1601
|
- load_dtd - use DTD for parsing
|
1574
1602
|
- no_network - prevent network access for related files (default: True)
|
1603
|
+
- decompress - automatically decompress gzip input
|
1604
|
+
(default: False, changed in lxml 6.0, disabling only affects libxml2 2.15+)
|
1575
1605
|
- ns_clean - clean up redundant namespace declarations
|
1576
1606
|
- recover - try hard to parse through broken XML
|
1577
1607
|
- remove_blank_text - discard blank text nodes that appear ignorable
|
@@ -1579,9 +1609,10 @@ cdef class XMLParser(_FeedParser):
|
|
1579
1609
|
- remove_pis - discard processing instructions
|
1580
1610
|
- strip_cdata - replace CDATA sections by normal text content (default: True)
|
1581
1611
|
- compact - save memory for short text content (default: True)
|
1582
|
-
- collect_ids - use a hash table of XML IDs for fast access
|
1612
|
+
- collect_ids - use a hash table of XML IDs for fast access
|
1613
|
+
(default: True, always True with DTD validation)
|
1583
1614
|
- huge_tree - disable security restrictions and support very deep trees
|
1584
|
-
and very long text content
|
1615
|
+
and very long text content
|
1585
1616
|
|
1586
1617
|
Other keyword arguments:
|
1587
1618
|
|
@@ -1598,7 +1629,7 @@ cdef class XMLParser(_FeedParser):
|
|
1598
1629
|
apply to the default parser.
|
1599
1630
|
"""
|
1600
1631
|
def __init__(self, *, encoding=None, attribute_defaults=False,
|
1601
|
-
dtd_validation=False, load_dtd=False, no_network=True,
|
1632
|
+
dtd_validation=False, load_dtd=False, no_network=True, decompress=False,
|
1602
1633
|
ns_clean=False, recover=False, XMLSchema schema=None,
|
1603
1634
|
huge_tree=False, remove_blank_text=False, resolve_entities='internal',
|
1604
1635
|
remove_comments=False, remove_pis=False, strip_cdata=True,
|
@@ -1638,6 +1669,10 @@ cdef class XMLParser(_FeedParser):
|
|
1638
1669
|
remove_comments, remove_pis, strip_cdata,
|
1639
1670
|
collect_ids, target, encoding, resolve_external)
|
1640
1671
|
|
1672
|
+
# Allow subscripting XMLParser in type annotions (PEP 560)
|
1673
|
+
def __class_getitem__(cls, item):
|
1674
|
+
return _GenericAlias(cls, item)
|
1675
|
+
|
1641
1676
|
|
1642
1677
|
cdef class XMLPullParser(XMLParser):
|
1643
1678
|
"""XMLPullParser(self, events=None, *, tag=None, **kwargs)
|
@@ -1670,7 +1705,7 @@ cdef class XMLPullParser(XMLParser):
|
|
1670
1705
|
|
1671
1706
|
cdef class ETCompatXMLParser(XMLParser):
|
1672
1707
|
"""ETCompatXMLParser(self, encoding=None, attribute_defaults=False, \
|
1673
|
-
dtd_validation=False, load_dtd=False, no_network=True, \
|
1708
|
+
dtd_validation=False, load_dtd=False, no_network=True, decompress=False, \
|
1674
1709
|
ns_clean=False, recover=False, schema=None, \
|
1675
1710
|
huge_tree=False, remove_blank_text=False, resolve_entities=True, \
|
1676
1711
|
remove_comments=True, remove_pis=True, strip_cdata=True, \
|
@@ -1684,7 +1719,7 @@ cdef class ETCompatXMLParser(XMLParser):
|
|
1684
1719
|
and thus ignores comments and processing instructions.
|
1685
1720
|
"""
|
1686
1721
|
def __init__(self, *, encoding=None, attribute_defaults=False,
|
1687
|
-
dtd_validation=False, load_dtd=False, no_network=True,
|
1722
|
+
dtd_validation=False, load_dtd=False, no_network=True, decompress=False,
|
1688
1723
|
ns_clean=False, recover=False, schema=None,
|
1689
1724
|
huge_tree=False, remove_blank_text=False, resolve_entities=True,
|
1690
1725
|
remove_comments=True, remove_pis=True, strip_cdata=True,
|
@@ -1694,6 +1729,7 @@ cdef class ETCompatXMLParser(XMLParser):
|
|
1694
1729
|
dtd_validation=dtd_validation,
|
1695
1730
|
load_dtd=load_dtd,
|
1696
1731
|
no_network=no_network,
|
1732
|
+
decompress=decompress,
|
1697
1733
|
ns_clean=ns_clean,
|
1698
1734
|
recover=recover,
|
1699
1735
|
remove_blank_text=remove_blank_text,
|
@@ -1705,7 +1741,8 @@ cdef class ETCompatXMLParser(XMLParser):
|
|
1705
1741
|
strip_cdata=strip_cdata,
|
1706
1742
|
target=target,
|
1707
1743
|
encoding=encoding,
|
1708
|
-
schema=schema
|
1744
|
+
schema=schema,
|
1745
|
+
)
|
1709
1746
|
|
1710
1747
|
# ET 1.2 compatible name
|
1711
1748
|
XMLTreeBuilder = ETCompatXMLParser
|
@@ -1752,7 +1789,7 @@ cdef object _UNUSED = object()
|
|
1752
1789
|
cdef class HTMLParser(_FeedParser):
|
1753
1790
|
"""HTMLParser(self, encoding=None, remove_blank_text=False, \
|
1754
1791
|
remove_comments=False, remove_pis=False, \
|
1755
|
-
no_network=True, target=None, schema: XMLSchema =None, \
|
1792
|
+
no_network=True, decompress=False, target=None, schema: XMLSchema =None, \
|
1756
1793
|
recover=True, compact=True, collect_ids=True, huge_tree=False)
|
1757
1794
|
|
1758
1795
|
The HTML parser.
|
@@ -1766,6 +1803,8 @@ cdef class HTMLParser(_FeedParser):
|
|
1766
1803
|
|
1767
1804
|
- recover - try hard to parse through broken HTML (default: True)
|
1768
1805
|
- no_network - prevent network access for related files (default: True)
|
1806
|
+
- decompress - automatically decompress gzip input
|
1807
|
+
(default: False, changed in lxml 6.0, disabling only affects libxml2 2.15+)
|
1769
1808
|
- remove_blank_text - discard empty text nodes that are ignorable (i.e. not actual text content)
|
1770
1809
|
- remove_comments - discard comments
|
1771
1810
|
- remove_pis - discard processing instructions
|
@@ -1773,7 +1812,7 @@ cdef class HTMLParser(_FeedParser):
|
|
1773
1812
|
- default_doctype - add a default doctype even if it is not found in the HTML (default: True)
|
1774
1813
|
- collect_ids - use a hash table of XML IDs for fast access (default: True)
|
1775
1814
|
- huge_tree - disable security restrictions and support very deep trees
|
1776
|
-
and very long text content
|
1815
|
+
and very long text content
|
1777
1816
|
|
1778
1817
|
Other keyword arguments:
|
1779
1818
|
|
@@ -1786,7 +1825,7 @@ cdef class HTMLParser(_FeedParser):
|
|
1786
1825
|
"""
|
1787
1826
|
def __init__(self, *, encoding=None, remove_blank_text=False,
|
1788
1827
|
remove_comments=False, remove_pis=False, strip_cdata=_UNUSED,
|
1789
|
-
no_network=True, target=None, XMLSchema schema=None,
|
1828
|
+
no_network=True, decompress=False, target=None, XMLSchema schema=None,
|
1790
1829
|
recover=True, compact=True, default_doctype=True,
|
1791
1830
|
collect_ids=True, huge_tree=False):
|
1792
1831
|
cdef int parse_options
|
@@ -1813,6 +1852,10 @@ cdef class HTMLParser(_FeedParser):
|
|
1813
1852
|
remove_comments, remove_pis, strip_cdata,
|
1814
1853
|
collect_ids, target, encoding)
|
1815
1854
|
|
1855
|
+
# Allow subscripting HTMLParser in type annotions (PEP 560)
|
1856
|
+
def __class_getitem__(cls, item):
|
1857
|
+
return _GenericAlias(cls, item)
|
1858
|
+
|
1816
1859
|
|
1817
1860
|
cdef HTMLParser __DEFAULT_HTML_PARSER
|
1818
1861
|
__DEFAULT_HTML_PARSER = HTMLParser()
|
@@ -1853,8 +1896,6 @@ cdef class HTMLPullParser(HTMLParser):
|
|
1853
1896
|
|
1854
1897
|
cdef xmlDoc* _parseDoc(text, filename, _BaseParser parser) except NULL:
|
1855
1898
|
cdef char* c_filename
|
1856
|
-
cdef char* c_text
|
1857
|
-
cdef Py_ssize_t c_len
|
1858
1899
|
if parser is None:
|
1859
1900
|
parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser()
|
1860
1901
|
if not filename:
|
@@ -1862,36 +1903,56 @@ cdef xmlDoc* _parseDoc(text, filename, _BaseParser parser) except NULL:
|
|
1862
1903
|
else:
|
1863
1904
|
filename_utf = _encodeFilenameUTF8(filename)
|
1864
1905
|
c_filename = _cstr(filename_utf)
|
1865
|
-
if isinstance(text,
|
1866
|
-
|
1867
|
-
|
1868
|
-
|
1869
|
-
|
1870
|
-
|
1871
|
-
|
1872
|
-
|
1873
|
-
|
1874
|
-
|
1875
|
-
|
1906
|
+
if isinstance(text, bytes):
|
1907
|
+
return _parseDoc_bytes(<bytes> text, filename, c_filename, parser)
|
1908
|
+
elif isinstance(text, unicode):
|
1909
|
+
return _parseDoc_unicode(<unicode> text, filename, c_filename, parser)
|
1910
|
+
else:
|
1911
|
+
return _parseDoc_charbuffer(text, filename, c_filename, parser)
|
1912
|
+
|
1913
|
+
|
1914
|
+
cdef xmlDoc* _parseDoc_unicode(unicode text, filename, char* c_filename, _BaseParser parser) except NULL:
|
1915
|
+
cdef Py_ssize_t c_len
|
1916
|
+
if python.PyUnicode_IS_READY(text):
|
1917
|
+
# PEP-393 Unicode string
|
1918
|
+
c_len = python.PyUnicode_GET_LENGTH(text) * python.PyUnicode_KIND(text)
|
1876
1919
|
else:
|
1877
|
-
|
1878
|
-
|
1879
|
-
|
1880
|
-
|
1881
|
-
|
1882
|
-
|
1920
|
+
# old Py_UNICODE string
|
1921
|
+
c_len = python.PyUnicode_GET_DATA_SIZE(text)
|
1922
|
+
if c_len > limits.INT_MAX:
|
1923
|
+
return parser._parseDocFromFilelike(
|
1924
|
+
StringIO(text), filename, None)
|
1925
|
+
return parser._parseUnicodeDoc(text, c_filename)
|
1926
|
+
|
1927
|
+
|
1928
|
+
cdef xmlDoc* _parseDoc_bytes(bytes text, filename, char* c_filename, _BaseParser parser) except NULL:
|
1929
|
+
cdef Py_ssize_t c_len = len(text)
|
1930
|
+
if c_len > limits.INT_MAX:
|
1931
|
+
return parser._parseDocFromFilelike(BytesIO(text), filename, None)
|
1932
|
+
return parser._parseDoc(text, c_len, c_filename)
|
1933
|
+
|
1934
|
+
|
1935
|
+
cdef xmlDoc* _parseDoc_charbuffer(text, filename, char* c_filename, _BaseParser parser) except NULL:
|
1936
|
+
cdef const unsigned char[::1] data = memoryview(text).cast('B') # cast to 'unsigned char' buffer
|
1937
|
+
cdef Py_ssize_t c_len = len(data)
|
1938
|
+
if c_len > limits.INT_MAX:
|
1939
|
+
return parser._parseDocFromFilelike(BytesIO(text), filename, None)
|
1940
|
+
return parser._parseDoc(<const char*>&data[0], c_len, c_filename)
|
1941
|
+
|
1883
1942
|
|
1884
1943
|
cdef xmlDoc* _parseDocFromFile(filename8, _BaseParser parser) except NULL:
|
1885
1944
|
if parser is None:
|
1886
1945
|
parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser()
|
1887
1946
|
return (<_BaseParser>parser)._parseDocFromFile(_cstr(filename8))
|
1888
1947
|
|
1948
|
+
|
1889
1949
|
cdef xmlDoc* _parseDocFromFilelike(source, filename,
|
1890
1950
|
_BaseParser parser) except NULL:
|
1891
1951
|
if parser is None:
|
1892
1952
|
parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser()
|
1893
1953
|
return (<_BaseParser>parser)._parseDocFromFilelike(source, filename, None)
|
1894
1954
|
|
1955
|
+
|
1895
1956
|
cdef xmlDoc* _newXMLDoc() except NULL:
|
1896
1957
|
cdef xmlDoc* result
|
1897
1958
|
result = tree.xmlNewDoc(NULL)
|
@@ -1990,8 +2051,6 @@ cdef _Document _parseMemoryDocument(text, url, _BaseParser parser):
|
|
1990
2051
|
raise ValueError(
|
1991
2052
|
"Unicode strings with encoding declaration are not supported. "
|
1992
2053
|
"Please use bytes input or XML fragments without declaration.")
|
1993
|
-
elif not isinstance(text, bytes):
|
1994
|
-
raise ValueError, "can only parse strings"
|
1995
2054
|
c_doc = _parseDoc(text, url, parser)
|
1996
2055
|
return _documentFactory(c_doc, parser)
|
1997
2056
|
|
lxml/sax.cpython-310-darwin.so
CHANGED
Binary file
|
lxml/sax.py
CHANGED
@@ -18,6 +18,13 @@ from lxml import etree
|
|
18
18
|
from lxml.etree import ElementTree, SubElement
|
19
19
|
from lxml.etree import Comment, ProcessingInstruction
|
20
20
|
|
21
|
+
try:
|
22
|
+
from types import GenericAlias as _GenericAlias
|
23
|
+
except ImportError:
|
24
|
+
# Python 3.8 - we only need this as return value from "__class_getitem__"
|
25
|
+
def _GenericAlias(cls, item):
|
26
|
+
return f"{cls.__name__}[{item.__name__}]"
|
27
|
+
|
21
28
|
|
22
29
|
class SaxError(etree.LxmlError):
|
23
30
|
"""General SAX error.
|
@@ -152,6 +159,10 @@ class ElementTreeContentHandler(ContentHandler):
|
|
152
159
|
|
153
160
|
ignorableWhitespace = characters
|
154
161
|
|
162
|
+
# Allow subscripting sax.ElementTreeContentHandler in type annotions (PEP 560)
|
163
|
+
def __class_getitem__(cls, item):
|
164
|
+
return _GenericAlias(cls, item)
|
165
|
+
|
155
166
|
|
156
167
|
class ElementTreeProducer:
|
157
168
|
"""Produces SAX events for an element and children.
|