PyPI - lxml - Versions diffs - 5.4.0__cp310-cp310-macosx_10_9_universal2.whl → 6.0.0__cp310-cp310-macosx_10_9_universal2.whl - Mend

lxml 5.4.0__cp310-cp310-macosx_10_9_universal2.whl → 6.0.0__cp310-cp310-macosx_10_9_universal2.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (74) hide show

lxml/__init__.py +1 -1
lxml/_elementpath.cpython-310-darwin.so +0 -0
lxml/_elementpath.py +3 -1
lxml/apihelpers.pxi +25 -17
lxml/builder.cpython-310-darwin.so +0 -0
lxml/builder.py +11 -0
lxml/debug.pxi +0 -54
lxml/etree.cpython-310-darwin.so +0 -0
lxml/etree.h +24 -28
lxml/etree.pyx +154 -33
lxml/etree_api.h +59 -50
lxml/extensions.pxi +3 -6
lxml/html/__init__.py +7 -3
lxml/html/_difflib.cpython-310-darwin.so +0 -0
lxml/html/_difflib.py +2106 -0
lxml/html/builder.py +40 -0
lxml/html/defs.py +3 -3
lxml/html/diff.cpython-310-darwin.so +0 -0
lxml/html/diff.py +406 -312
lxml/includes/etree_defs.h +6 -6
lxml/includes/libxml/HTMLparser.h +33 -30
lxml/includes/libxml/HTMLtree.h +1 -0
lxml/includes/libxml/SAX.h +2 -186
lxml/includes/libxml/SAX2.h +2 -3
lxml/includes/libxml/catalog.h +1 -0
lxml/includes/libxml/debugXML.h +0 -138
lxml/includes/libxml/encoding.h +124 -61
lxml/includes/libxml/entities.h +0 -19
lxml/includes/libxml/globals.h +0 -16
lxml/includes/libxml/nanoftp.h +3 -173
lxml/includes/libxml/parser.h +474 -231
lxml/includes/libxml/parserInternals.h +21 -101
lxml/includes/libxml/relaxng.h +7 -2
lxml/includes/libxml/threads.h +0 -6
lxml/includes/libxml/tree.h +29 -85
lxml/includes/libxml/valid.h +20 -12
lxml/includes/libxml/xinclude.h +5 -0
lxml/includes/libxml/xlink.h +4 -0
lxml/includes/libxml/xmlIO.h +15 -34
lxml/includes/libxml/xmlautomata.h +19 -2
lxml/includes/libxml/xmlerror.h +18 -18
lxml/includes/libxml/xmlexports.h +6 -56
lxml/includes/libxml/xmlmemory.h +19 -19
lxml/includes/libxml/xmlmodule.h +4 -0
lxml/includes/libxml/xmlreader.h +11 -3
lxml/includes/libxml/xmlregexp.h +7 -106
lxml/includes/libxml/xmlsave.h +11 -2
lxml/includes/libxml/xmlschemas.h +10 -5
lxml/includes/libxml/xmlunicode.h +3 -354
lxml/includes/libxml/xmlversion.h +19 -34
lxml/includes/libxml/xpath.h +5 -15
lxml/includes/libxml/xpathInternals.h +9 -3
lxml/includes/libxml/xpointer.h +1 -91
lxml/includes/lxml-version.h +1 -1
lxml/includes/tree.pxd +10 -12
lxml/includes/xmlparser.pxd +46 -8
lxml/lxml.etree.h +24 -28
lxml/lxml.etree_api.h +59 -50
lxml/objectify.cpython-310-darwin.so +0 -0
lxml/objectify.pyx +11 -7
lxml/parser.pxi +106 -47
lxml/sax.cpython-310-darwin.so +0 -0
lxml/sax.py +11 -0
lxml/saxparser.pxi +14 -14
lxml/schematron.pxi +8 -3
lxml/serializer.pxi +71 -3
lxml/xslt.pxi +10 -3
lxml-6.0.0.dist-info/METADATA +163 -0
{lxml-5.4.0.dist-info → lxml-6.0.0.dist-info}/RECORD +73 -71
{lxml-5.4.0.dist-info → lxml-6.0.0.dist-info}/WHEEL +2 -1
{lxml-5.4.0.dist-info → lxml-6.0.0.dist-info}/licenses/LICENSE.txt +3 -1
lxml-5.4.0.dist-info/METADATA +0 -96
{lxml-5.4.0.dist-info → lxml-6.0.0.dist-info}/licenses/LICENSES.txt +0 -0
{lxml-5.4.0.dist-info → lxml-6.0.0.dist-info}/top_level.txt +0 -0

lxml/lxml.etree_api.h CHANGED Viewed

@@ -1,4 +1,4 @@
-/* Generated by Cython 3.0.12 */
+/* Generated by Cython 3.1.2 */
 #ifndef __PYX_HAVE_API__lxml__etree
 #define __PYX_HAVE_API__lxml__etree
@@ -98,19 +98,26 @@ static void (*__pyx_api_f_4lxml_5etree_initTagMatch)(struct LxmlElementTagMatche
 #define initTagMatch __pyx_api_f_4lxml_5etree_initTagMatch
 static xmlNs *(*__pyx_api_f_4lxml_5etree_findOrBuildNodeNsPrefix)(struct LxmlDocument *, xmlNode *, const xmlChar *, const xmlChar *) = 0;
 #define findOrBuildNodeNsPrefix __pyx_api_f_4lxml_5etree_findOrBuildNodeNsPrefix
-#ifndef __PYX_HAVE_RT_ImportFunction_3_0_12
-#define __PYX_HAVE_RT_ImportFunction_3_0_12
-static int __Pyx_ImportFunction_3_0_12(PyObject *module, const char *funcname, void (**f)(void), const char *sig) {
+static int __Pyx_ImportFunction_3_1_2(PyObject *module, const char *funcname, void (**f)(void), const char *sig);
+#ifndef __PYX_HAVE_RT_ImportFunction_3_1_2
+#define __PYX_HAVE_RT_ImportFunction_3_1_2
+static int __Pyx_ImportFunction_3_1_2(PyObject *module, const char *funcname, void (**f)(void), const char *sig) {
     PyObject *d = 0;
     PyObject *cobj = 0;
     union {
         void (*fp)(void);
         void *p;
     } tmp;
-    d = PyObject_GetAttrString(module, (char *)"__pyx_capi__");
+    d = PyObject_GetAttrString(module, "__pyx_capi__");
     if (!d)
         goto bad;
+#if (defined(Py_LIMITED_API) && Py_LIMITED_API >= 0x030d0000) || (!defined(Py_LIMITED_API) && PY_VERSION_HEX >= 0x030d0000)
+    PyDict_GetItemStringRef(d, funcname, &cobj);
+#else
     cobj = PyDict_GetItemString(d, funcname);
+    Py_XINCREF(cobj);
+#endif
     if (!cobj) {
         PyErr_Format(PyExc_ImportError,
             "%.200s does not export expected C function %.200s",
@@ -128,9 +135,11 @@ static int __Pyx_ImportFunction_3_0_12(PyObject *module, const char *funcname, v
     if (!(*f))
         goto bad;
     Py_DECREF(d);
+    Py_DECREF(cobj);
     return 0;
 bad:
     Py_XDECREF(d);
+    Py_XDECREF(cobj);
     return -1;
 }
 #endif
@@ -140,51 +149,51 @@ static int import_lxml__etree(void) {
   PyObject *module = 0;
   module = PyImport_ImportModule("lxml.etree");
   if (!module) goto bad;
-  if (__Pyx_ImportFunction_3_0_12(module, "deepcopyNodeToDocument", (void (**)(void))&__pyx_api_f_4lxml_5etree_deepcopyNodeToDocument, "struct LxmlElement *(struct LxmlDocument *, xmlNode *)") < 0) goto bad;
-  if (__Pyx_ImportFunction_3_0_12(module, "elementTreeFactory", (void (**)(void))&__pyx_api_f_4lxml_5etree_elementTreeFactory, "struct LxmlElementTree *(struct LxmlElement *)") < 0) goto bad;
-  if (__Pyx_ImportFunction_3_0_12(module, "newElementTree", (void (**)(void))&__pyx_api_f_4lxml_5etree_newElementTree, "struct LxmlElementTree *(struct LxmlElement *, PyObject *)") < 0) goto bad;
-  if (__Pyx_ImportFunction_3_0_12(module, "adoptExternalDocument", (void (**)(void))&__pyx_api_f_4lxml_5etree_adoptExternalDocument, "struct LxmlElementTree *(xmlDoc *, PyObject *, int)") < 0) goto bad;
-  if (__Pyx_ImportFunction_3_0_12(module, "elementFactory", (void (**)(void))&__pyx_api_f_4lxml_5etree_elementFactory, "struct LxmlElement *(struct LxmlDocument *, xmlNode *)") < 0) goto bad;
-  if (__Pyx_ImportFunction_3_0_12(module, "makeElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_makeElement, "struct LxmlElement *(PyObject *, struct LxmlDocument *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *)") < 0) goto bad;
-  if (__Pyx_ImportFunction_3_0_12(module, "makeSubElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_makeSubElement, "struct LxmlElement *(struct LxmlElement *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *)") < 0) goto bad;
-  if (__Pyx_ImportFunction_3_0_12(module, "setElementClassLookupFunction", (void (**)(void))&__pyx_api_f_4lxml_5etree_setElementClassLookupFunction, "void (_element_class_lookup_function, PyObject *)") < 0) goto bad;
-  if (__Pyx_ImportFunction_3_0_12(module, "lookupDefaultElementClass", (void (**)(void))&__pyx_api_f_4lxml_5etree_lookupDefaultElementClass, "PyObject *(PyObject *, PyObject *, xmlNode *)") < 0) goto bad;
-  if (__Pyx_ImportFunction_3_0_12(module, "lookupNamespaceElementClass", (void (**)(void))&__pyx_api_f_4lxml_5etree_lookupNamespaceElementClass, "PyObject *(PyObject *, PyObject *, xmlNode *)") < 0) goto bad;
-  if (__Pyx_ImportFunction_3_0_12(module, "callLookupFallback", (void (**)(void))&__pyx_api_f_4lxml_5etree_callLookupFallback, "PyObject *(struct LxmlFallbackElementClassLookup *, struct LxmlDocument *, xmlNode *)") < 0) goto bad;
-  if (__Pyx_ImportFunction_3_0_12(module, "tagMatches", (void (**)(void))&__pyx_api_f_4lxml_5etree_tagMatches, "int (xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad;
-  if (__Pyx_ImportFunction_3_0_12(module, "documentOrRaise", (void (**)(void))&__pyx_api_f_4lxml_5etree_documentOrRaise, "struct LxmlDocument *(PyObject *)") < 0) goto bad;
-  if (__Pyx_ImportFunction_3_0_12(module, "rootNodeOrRaise", (void (**)(void))&__pyx_api_f_4lxml_5etree_rootNodeOrRaise, "struct LxmlElement *(PyObject *)") < 0) goto bad;
-  if (__Pyx_ImportFunction_3_0_12(module, "hasText", (void (**)(void))&__pyx_api_f_4lxml_5etree_hasText, "int (xmlNode *)") < 0) goto bad;
-  if (__Pyx_ImportFunction_3_0_12(module, "hasTail", (void (**)(void))&__pyx_api_f_4lxml_5etree_hasTail, "int (xmlNode *)") < 0) goto bad;
-  if (__Pyx_ImportFunction_3_0_12(module, "textOf", (void (**)(void))&__pyx_api_f_4lxml_5etree_textOf, "PyObject *(xmlNode *)") < 0) goto bad;
-  if (__Pyx_ImportFunction_3_0_12(module, "tailOf", (void (**)(void))&__pyx_api_f_4lxml_5etree_tailOf, "PyObject *(xmlNode *)") < 0) goto bad;
-  if (__Pyx_ImportFunction_3_0_12(module, "setNodeText", (void (**)(void))&__pyx_api_f_4lxml_5etree_setNodeText, "int (xmlNode *, PyObject *)") < 0) goto bad;
-  if (__Pyx_ImportFunction_3_0_12(module, "setTailText", (void (**)(void))&__pyx_api_f_4lxml_5etree_setTailText, "int (xmlNode *, PyObject *)") < 0) goto bad;
-  if (__Pyx_ImportFunction_3_0_12(module, "attributeValue", (void (**)(void))&__pyx_api_f_4lxml_5etree_attributeValue, "PyObject *(xmlNode *, xmlAttr *)") < 0) goto bad;
-  if (__Pyx_ImportFunction_3_0_12(module, "attributeValueFromNsName", (void (**)(void))&__pyx_api_f_4lxml_5etree_attributeValueFromNsName, "PyObject *(xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad;
-  if (__Pyx_ImportFunction_3_0_12(module, "getAttributeValue", (void (**)(void))&__pyx_api_f_4lxml_5etree_getAttributeValue, "PyObject *(struct LxmlElement *, PyObject *, PyObject *)") < 0) goto bad;
-  if (__Pyx_ImportFunction_3_0_12(module, "iterattributes", (void (**)(void))&__pyx_api_f_4lxml_5etree_iterattributes, "PyObject *(struct LxmlElement *, int)") < 0) goto bad;
-  if (__Pyx_ImportFunction_3_0_12(module, "collectAttributes", (void (**)(void))&__pyx_api_f_4lxml_5etree_collectAttributes, "PyObject *(xmlNode *, int)") < 0) goto bad;
-  if (__Pyx_ImportFunction_3_0_12(module, "setAttributeValue", (void (**)(void))&__pyx_api_f_4lxml_5etree_setAttributeValue, "int (struct LxmlElement *, PyObject *, PyObject *)") < 0) goto bad;
-  if (__Pyx_ImportFunction_3_0_12(module, "delAttribute", (void (**)(void))&__pyx_api_f_4lxml_5etree_delAttribute, "int (struct LxmlElement *, PyObject *)") < 0) goto bad;
-  if (__Pyx_ImportFunction_3_0_12(module, "delAttributeFromNsName", (void (**)(void))&__pyx_api_f_4lxml_5etree_delAttributeFromNsName, "int (xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad;
-  if (__Pyx_ImportFunction_3_0_12(module, "hasChild", (void (**)(void))&__pyx_api_f_4lxml_5etree_hasChild, "int (xmlNode *)") < 0) goto bad;
-  if (__Pyx_ImportFunction_3_0_12(module, "findChild", (void (**)(void))&__pyx_api_f_4lxml_5etree_findChild, "xmlNode *(xmlNode *, Py_ssize_t)") < 0) goto bad;
-  if (__Pyx_ImportFunction_3_0_12(module, "findChildForwards", (void (**)(void))&__pyx_api_f_4lxml_5etree_findChildForwards, "xmlNode *(xmlNode *, Py_ssize_t)") < 0) goto bad;
-  if (__Pyx_ImportFunction_3_0_12(module, "findChildBackwards", (void (**)(void))&__pyx_api_f_4lxml_5etree_findChildBackwards, "xmlNode *(xmlNode *, Py_ssize_t)") < 0) goto bad;
-  if (__Pyx_ImportFunction_3_0_12(module, "nextElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_nextElement, "xmlNode *(xmlNode *)") < 0) goto bad;
-  if (__Pyx_ImportFunction_3_0_12(module, "previousElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_previousElement, "xmlNode *(xmlNode *)") < 0) goto bad;
-  if (__Pyx_ImportFunction_3_0_12(module, "appendChild", (void (**)(void))&__pyx_api_f_4lxml_5etree_appendChild, "void (struct LxmlElement *, struct LxmlElement *)") < 0) goto bad;
-  if (__Pyx_ImportFunction_3_0_12(module, "appendChildToElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_appendChildToElement, "int (struct LxmlElement *, struct LxmlElement *)") < 0) goto bad;
-  if (__Pyx_ImportFunction_3_0_12(module, "pyunicode", (void (**)(void))&__pyx_api_f_4lxml_5etree_pyunicode, "PyObject *(const xmlChar *)") < 0) goto bad;
-  if (__Pyx_ImportFunction_3_0_12(module, "utf8", (void (**)(void))&__pyx_api_f_4lxml_5etree_utf8, "PyObject *(PyObject *)") < 0) goto bad;
-  if (__Pyx_ImportFunction_3_0_12(module, "getNsTag", (void (**)(void))&__pyx_api_f_4lxml_5etree_getNsTag, "PyObject *(PyObject *)") < 0) goto bad;
-  if (__Pyx_ImportFunction_3_0_12(module, "getNsTagWithEmptyNs", (void (**)(void))&__pyx_api_f_4lxml_5etree_getNsTagWithEmptyNs, "PyObject *(PyObject *)") < 0) goto bad;
-  if (__Pyx_ImportFunction_3_0_12(module, "namespacedName", (void (**)(void))&__pyx_api_f_4lxml_5etree_namespacedName, "PyObject *(xmlNode *)") < 0) goto bad;
-  if (__Pyx_ImportFunction_3_0_12(module, "namespacedNameFromNsName", (void (**)(void))&__pyx_api_f_4lxml_5etree_namespacedNameFromNsName, "PyObject *(const xmlChar *, const xmlChar *)") < 0) goto bad;
-  if (__Pyx_ImportFunction_3_0_12(module, "iteratorStoreNext", (void (**)(void))&__pyx_api_f_4lxml_5etree_iteratorStoreNext, "void (struct LxmlElementIterator *, struct LxmlElement *)") < 0) goto bad;
-  if (__Pyx_ImportFunction_3_0_12(module, "initTagMatch", (void (**)(void))&__pyx_api_f_4lxml_5etree_initTagMatch, "void (struct LxmlElementTagMatcher *, PyObject *)") < 0) goto bad;
-  if (__Pyx_ImportFunction_3_0_12(module, "findOrBuildNodeNsPrefix", (void (**)(void))&__pyx_api_f_4lxml_5etree_findOrBuildNodeNsPrefix, "xmlNs *(struct LxmlDocument *, xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_1_2(module, "deepcopyNodeToDocument", (void (**)(void))&__pyx_api_f_4lxml_5etree_deepcopyNodeToDocument, "struct LxmlElement *(struct LxmlDocument *, xmlNode *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_1_2(module, "elementTreeFactory", (void (**)(void))&__pyx_api_f_4lxml_5etree_elementTreeFactory, "struct LxmlElementTree *(struct LxmlElement *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_1_2(module, "newElementTree", (void (**)(void))&__pyx_api_f_4lxml_5etree_newElementTree, "struct LxmlElementTree *(struct LxmlElement *, PyObject *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_1_2(module, "adoptExternalDocument", (void (**)(void))&__pyx_api_f_4lxml_5etree_adoptExternalDocument, "struct LxmlElementTree *(xmlDoc *, PyObject *, int)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_1_2(module, "elementFactory", (void (**)(void))&__pyx_api_f_4lxml_5etree_elementFactory, "struct LxmlElement *(struct LxmlDocument *, xmlNode *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_1_2(module, "makeElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_makeElement, "struct LxmlElement *(PyObject *, struct LxmlDocument *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_1_2(module, "makeSubElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_makeSubElement, "struct LxmlElement *(struct LxmlElement *, PyObject *, PyObject *, PyObject *, PyObject *, PyObject *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_1_2(module, "setElementClassLookupFunction", (void (**)(void))&__pyx_api_f_4lxml_5etree_setElementClassLookupFunction, "void (_element_class_lookup_function, PyObject *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_1_2(module, "lookupDefaultElementClass", (void (**)(void))&__pyx_api_f_4lxml_5etree_lookupDefaultElementClass, "PyObject *(PyObject *, PyObject *, xmlNode *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_1_2(module, "lookupNamespaceElementClass", (void (**)(void))&__pyx_api_f_4lxml_5etree_lookupNamespaceElementClass, "PyObject *(PyObject *, PyObject *, xmlNode *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_1_2(module, "callLookupFallback", (void (**)(void))&__pyx_api_f_4lxml_5etree_callLookupFallback, "PyObject *(struct LxmlFallbackElementClassLookup *, struct LxmlDocument *, xmlNode *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_1_2(module, "tagMatches", (void (**)(void))&__pyx_api_f_4lxml_5etree_tagMatches, "int (xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_1_2(module, "documentOrRaise", (void (**)(void))&__pyx_api_f_4lxml_5etree_documentOrRaise, "struct LxmlDocument *(PyObject *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_1_2(module, "rootNodeOrRaise", (void (**)(void))&__pyx_api_f_4lxml_5etree_rootNodeOrRaise, "struct LxmlElement *(PyObject *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_1_2(module, "hasText", (void (**)(void))&__pyx_api_f_4lxml_5etree_hasText, "int (xmlNode *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_1_2(module, "hasTail", (void (**)(void))&__pyx_api_f_4lxml_5etree_hasTail, "int (xmlNode *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_1_2(module, "textOf", (void (**)(void))&__pyx_api_f_4lxml_5etree_textOf, "PyObject *(xmlNode *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_1_2(module, "tailOf", (void (**)(void))&__pyx_api_f_4lxml_5etree_tailOf, "PyObject *(xmlNode *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_1_2(module, "setNodeText", (void (**)(void))&__pyx_api_f_4lxml_5etree_setNodeText, "int (xmlNode *, PyObject *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_1_2(module, "setTailText", (void (**)(void))&__pyx_api_f_4lxml_5etree_setTailText, "int (xmlNode *, PyObject *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_1_2(module, "attributeValue", (void (**)(void))&__pyx_api_f_4lxml_5etree_attributeValue, "PyObject *(xmlNode *, xmlAttr *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_1_2(module, "attributeValueFromNsName", (void (**)(void))&__pyx_api_f_4lxml_5etree_attributeValueFromNsName, "PyObject *(xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_1_2(module, "getAttributeValue", (void (**)(void))&__pyx_api_f_4lxml_5etree_getAttributeValue, "PyObject *(struct LxmlElement *, PyObject *, PyObject *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_1_2(module, "iterattributes", (void (**)(void))&__pyx_api_f_4lxml_5etree_iterattributes, "PyObject *(struct LxmlElement *, int)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_1_2(module, "collectAttributes", (void (**)(void))&__pyx_api_f_4lxml_5etree_collectAttributes, "PyObject *(xmlNode *, int)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_1_2(module, "setAttributeValue", (void (**)(void))&__pyx_api_f_4lxml_5etree_setAttributeValue, "int (struct LxmlElement *, PyObject *, PyObject *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_1_2(module, "delAttribute", (void (**)(void))&__pyx_api_f_4lxml_5etree_delAttribute, "int (struct LxmlElement *, PyObject *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_1_2(module, "delAttributeFromNsName", (void (**)(void))&__pyx_api_f_4lxml_5etree_delAttributeFromNsName, "int (xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_1_2(module, "hasChild", (void (**)(void))&__pyx_api_f_4lxml_5etree_hasChild, "int (xmlNode *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_1_2(module, "findChild", (void (**)(void))&__pyx_api_f_4lxml_5etree_findChild, "xmlNode *(xmlNode *, Py_ssize_t)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_1_2(module, "findChildForwards", (void (**)(void))&__pyx_api_f_4lxml_5etree_findChildForwards, "xmlNode *(xmlNode *, Py_ssize_t)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_1_2(module, "findChildBackwards", (void (**)(void))&__pyx_api_f_4lxml_5etree_findChildBackwards, "xmlNode *(xmlNode *, Py_ssize_t)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_1_2(module, "nextElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_nextElement, "xmlNode *(xmlNode *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_1_2(module, "previousElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_previousElement, "xmlNode *(xmlNode *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_1_2(module, "appendChild", (void (**)(void))&__pyx_api_f_4lxml_5etree_appendChild, "void (struct LxmlElement *, struct LxmlElement *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_1_2(module, "appendChildToElement", (void (**)(void))&__pyx_api_f_4lxml_5etree_appendChildToElement, "int (struct LxmlElement *, struct LxmlElement *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_1_2(module, "pyunicode", (void (**)(void))&__pyx_api_f_4lxml_5etree_pyunicode, "PyObject *(const xmlChar *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_1_2(module, "utf8", (void (**)(void))&__pyx_api_f_4lxml_5etree_utf8, "PyObject *(PyObject *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_1_2(module, "getNsTag", (void (**)(void))&__pyx_api_f_4lxml_5etree_getNsTag, "PyObject *(PyObject *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_1_2(module, "getNsTagWithEmptyNs", (void (**)(void))&__pyx_api_f_4lxml_5etree_getNsTagWithEmptyNs, "PyObject *(PyObject *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_1_2(module, "namespacedName", (void (**)(void))&__pyx_api_f_4lxml_5etree_namespacedName, "PyObject *(xmlNode *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_1_2(module, "namespacedNameFromNsName", (void (**)(void))&__pyx_api_f_4lxml_5etree_namespacedNameFromNsName, "PyObject *(const xmlChar *, const xmlChar *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_1_2(module, "iteratorStoreNext", (void (**)(void))&__pyx_api_f_4lxml_5etree_iteratorStoreNext, "void (struct LxmlElementIterator *, struct LxmlElement *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_1_2(module, "initTagMatch", (void (**)(void))&__pyx_api_f_4lxml_5etree_initTagMatch, "void (struct LxmlElementTagMatcher *, PyObject *)") < 0) goto bad;
+  if (__Pyx_ImportFunction_3_1_2(module, "findOrBuildNodeNsPrefix", (void (**)(void))&__pyx_api_f_4lxml_5etree_findOrBuildNodeNsPrefix, "xmlNs *(struct LxmlDocument *, xmlNode *, const xmlChar *, const xmlChar *)") < 0) goto bad;
   Py_DECREF(module); module = 0;
   return 0;
   bad:

lxml/objectify.cpython-310-darwin.so CHANGED Viewed

Binary file

lxml/objectify.pyx CHANGED Viewed

@@ -18,6 +18,7 @@ from lxml.includes cimport tree
 cimport lxml.includes.etreepublic as cetree
 cimport libc.string as cstring_h   # not to be confused with stdlib 'string'
 from libc.string cimport const_char
+from libc cimport limits
 __all__ = ['BoolElement', 'DataElement', 'E', 'Element', 'ElementMaker',
            'FloatElement', 'IntElement', 'NoneElement',
@@ -420,8 +421,11 @@ cdef object _lookupChild(_Element parent, tag):
     cdef tree.xmlNode* c_node
     c_node = parent._c_node
     ns, tag = cetree.getNsTagWithEmptyNs(tag)
+    c_tag_len = len(<bytes> tag)
+    if c_tag_len > limits.INT_MAX:
+        return None
     c_tag = tree.xmlDictExists(
-        c_node.doc.dict, _xcstr(tag), python.PyBytes_GET_SIZE(tag))
+        c_node.doc.dict, _xcstr(tag), <int> c_tag_len)
     if c_tag is NULL:
         return None # not in the hash map => not in the tree
     if ns is None:
@@ -1283,7 +1287,7 @@ cdef object _guessElementClass(tree.xmlNode* c_node):
         return None
     if value == '':
         return StringElement
     for type_check, pytype in _TYPE_CHECKS:
         try:
             type_check(value)
@@ -1689,8 +1693,8 @@ def annotate(element_or_tree, *, ignore_old=True, ignore_xsi=False,
     If the 'ignore_xsi' keyword argument is False (the default), existing
     'xsi:type' attributes will be used for the type annotation, if they fit the
-    element text values.
+    element text values.
     Note that the mapping from Python types to XSI types is usually ambiguous.
     Currently, only the first XSI type name in the corresponding PyType
     definition will be used for annotation.  Thus, you should consider naming
@@ -1705,7 +1709,7 @@ def annotate(element_or_tree, *, ignore_old=True, ignore_xsi=False,
     elements.  Pass 'string', for example, to make string values the default.
     The keyword arguments 'annotate_xsi' (default: 0) and 'annotate_pytype'
-    (default: 1) control which kind(s) of annotation to use.
+    (default: 1) control which kind(s) of annotation to use.
     """
     cdef _Element  element
     element = cetree.rootNodeOrRaise(element_or_tree)
@@ -1878,7 +1882,7 @@ def deannotate(element_or_tree, *, bint pytype=True, bint xsi=True,
     and/or 'xsi:type' attributes and/or 'xsi:nil' attributes.
     If the 'pytype' keyword argument is True (the default), 'py:pytype'
-    attributes will be removed. If the 'xsi' keyword argument is True (the
+    attributes will be removed. If the 'xsi' keyword argument is True (the
     default), 'xsi:type' attributes will be removed.
     If the 'xsi_nil' keyword argument is True (default: False), 'xsi:nil'
     attributes will be removed.
@@ -2124,7 +2128,7 @@ def DataElement(_value, attrib=None, nsmap=None, *, _pytype=None, _xsi=None,
         stringify = unicode if py_type is None else py_type.stringify
         strval = stringify(_value)
-    if _pytype is not None:
+    if _pytype is not None:
         if _pytype == "NoneType" or _pytype == "none":
             strval = None
             _attributes[XML_SCHEMA_INSTANCE_NIL_ATTR] = "true"

lxml/parser.pxi CHANGED Viewed

@@ -3,6 +3,14 @@
 from lxml.includes cimport xmlparser
 from lxml.includes cimport htmlparser
+cdef object _GenericAlias
+try:
+    from types import GenericAlias as _GenericAlias
+except ImportError:
+    # Python 3.8 - we only need this as return value from "__class_getitem__"
+    def _GenericAlias(cls, item):
+        return f"{cls.__name__}[{item.__name__}]"
 class ParseError(LxmlSyntaxError):
     """Syntax error while parsing an XML document.
@@ -53,7 +61,6 @@ cdef class _ParserDictionaryContext:
     cdef list _implied_parser_contexts
     def __cinit__(self):
-        self._c_dict = NULL
         self._implied_parser_contexts = []
     def __dealloc__(self):
@@ -295,9 +302,7 @@ cdef class _FileReaderContext:
         self._filelike = filelike
         self._close_file_after_read = close_file
         self._encoding = encoding
-        if url is None:
-            self._c_url = NULL
-        else:
+        if url is not None:
             url = _encodeFilename(url)
             self._c_url = _cstr(url)
         self._url = url
@@ -419,8 +424,6 @@ cdef class _FileReaderContext:
 cdef int _readFilelikeParser(void* ctxt, char* c_buffer, int c_size) noexcept with gil:
     return (<_FileReaderContext>ctxt).copyToBuffer(c_buffer, c_size)
-cdef int _readFileParser(void* ctxt, char* c_buffer, int c_size) noexcept nogil:
-    return stdio.fread(c_buffer, 1,  c_size, <stdio.FILE*>ctxt)
 ############################################################
 ## support for custom document loaders
@@ -542,11 +545,8 @@ cdef class _ParserContext(_ResolverContext):
     cdef bint _collect_ids
     def __cinit__(self):
-        self._c_ctxt = NULL
         self._collect_ids = True
-        if not config.ENABLE_THREADING:
-            self._lock = NULL
-        else:
+        if config.ENABLE_THREADING:
             self._lock = python.PyThread_allocate_lock()
         self._error_log = _ErrorLog()
@@ -573,6 +573,9 @@ cdef class _ParserContext(_ResolverContext):
         return context
     cdef void _initParserContext(self, xmlparser.xmlParserCtxt* c_ctxt) noexcept:
+        """
+        Connects the libxml2-level context to the lxml-level parser context.
+        """
         self._c_ctxt = c_ctxt
         c_ctxt._private = <void*>self
@@ -597,6 +600,12 @@ cdef class _ParserContext(_ResolverContext):
                 raise ParserError, "parser locking failed"
         self._error_log.clear()
         self._doc = None
+        # Connect the lxml error log with libxml2's error handling. In the case of parsing
+        # HTML, ctxt->sax is not set to null, so this always works. The libxml2 function
+        # that does this is htmlInitParserCtxt in HTMLparser.c. For HTML (and possibly XML
+        # too), libxml2's SAX's serror is set to be the place where errors are sent when
+        # schannel is set to ctxt->sax->serror in xmlCtxtErrMemory in libxml2's
+        # parserInternals.c.
         # Need a cast here because older libxml2 releases do not use 'const' in the functype.
         self._c_ctxt.sax.serror = <xmlerror.xmlStructuredErrorFunc> _receiveParserError
         self._orig_loader = _register_document_loader() if set_document_loader else NULL
@@ -642,6 +651,9 @@ cdef _initParserContext(_ParserContext context,
         context._initParserContext(c_ctxt)
 cdef void _forwardParserError(xmlparser.xmlParserCtxt* _parser_context, const xmlerror.xmlError* error) noexcept with gil:
+    """
+    Add an error created by libxml2 to the lxml-level error_log.
+    """
     (<_ParserContext>_parser_context._private)._error_log._receive(error)
 cdef void _receiveParserError(void* c_context, const xmlerror.xmlError* error) noexcept nogil:
@@ -687,6 +699,8 @@ cdef xmlDoc* _handleParseResult(_ParserContext context,
                                 xmlparser.xmlParserCtxt* c_ctxt,
                                 xmlDoc* result, filename,
                                 bint recover, bint free_doc) except NULL:
+    # The C-level argument xmlDoc* result is passed in as NULL if the parser was not able
+    # to parse the document.
     cdef bint well_formed
     if result is not NULL:
         __GLOBAL_PARSER_CONTEXT.initDocDict(result)
@@ -698,6 +712,9 @@ cdef xmlDoc* _handleParseResult(_ParserContext context,
         c_ctxt.myDoc = NULL
     if result is not NULL:
+        # "wellFormed" in libxml2 is 0 if the parser found fatal errors. It still returns a
+        # parse result document if 'recover=True'. Here, we determine if we can present
+        # the document to the user or consider it incorrect or broken enough to raise an error.
         if (context._validator is not None and
                 not context._validator.isvalid()):
             well_formed = 0  # actually not 'valid', but anyway ...
@@ -901,6 +918,9 @@ cdef class _BaseParser:
         return self._push_parser_context
     cdef _ParserContext _createContext(self, target, events_to_collect):
+        """
+        This method creates and configures the lxml-level parser.
+        """
         cdef _SaxParserContext sax_context
         if target is not None:
             sax_context = _TargetParserContext(self)
@@ -947,6 +967,9 @@ cdef class _BaseParser:
         return 0
     cdef xmlparser.xmlParserCtxt* _newParserCtxt(self) except NULL:
+        """
+        Create and initialise a libxml2-level parser context.
+        """
         cdef xmlparser.xmlParserCtxt* c_ctxt
         if self._for_html:
             c_ctxt = htmlparser.htmlCreateMemoryParserCtxt('dummy', 5)
@@ -1106,8 +1129,7 @@ cdef class _BaseParser:
         finally:
             context.cleanup()
-    cdef xmlDoc* _parseDoc(self, char* c_text, int c_len,
-                           char* c_filename) except NULL:
+    cdef xmlDoc* _parseDoc(self, const char* c_text, int c_len, char* c_filename) except NULL:
         """Parse document, share dictionary if possible.
         """
         cdef _ParserContext context
@@ -1440,7 +1462,7 @@ cdef class _FeedParser(_BaseParser):
                 else:
                     error = 0
-        if not pctxt.wellFormed and pctxt.disableSAX and context._has_raised():
+        if not pctxt.wellFormed and xmlparser.xmlCtxtIsStopped(pctxt) and context._has_raised():
             # propagate Python exceptions immediately
             recover = 0
             error = 1
@@ -1477,7 +1499,7 @@ cdef class _FeedParser(_BaseParser):
         else:
             xmlparser.xmlParseChunk(pctxt, NULL, 0, 1)
-        if (pctxt.recovery and not pctxt.disableSAX and
+        if (pctxt.recovery and not xmlparser.xmlCtxtIsStopped(pctxt) and
                 isinstance(context, _SaxParserContext)):
             # apply any left-over 'end' events
             (<_SaxParserContext>context).flushEvents()
@@ -1529,7 +1551,8 @@ cdef int _htmlCtxtResetPush(xmlparser.xmlParserCtxt* c_ctxt,
         return error
     # fix libxml2 setup for HTML
-    c_ctxt.progressive = 1
+    if tree.LIBXML_VERSION < 21400:
+        c_ctxt.progressive = 1  # TODO: remove
     c_ctxt.html = 1
     htmlparser.htmlCtxtUseOptions(c_ctxt, parse_options)
@@ -1547,10 +1570,15 @@ _XML_DEFAULT_PARSE_OPTIONS = (
     xmlparser.XML_PARSE_NONET   |
     xmlparser.XML_PARSE_COMPACT |
     xmlparser.XML_PARSE_BIG_LINES
-    )
+)
 cdef class XMLParser(_FeedParser):
-    """XMLParser(self, encoding=None, attribute_defaults=False, dtd_validation=False, load_dtd=False, no_network=True, ns_clean=False, recover=False, schema: XMLSchema =None, huge_tree=False, remove_blank_text=False, resolve_entities=True, remove_comments=False, remove_pis=False, strip_cdata=True, collect_ids=True, target=None, compact=True)
+    """XMLParser(self, encoding=None, attribute_defaults=False, dtd_validation=False, \
+                 load_dtd=False, no_network=True, decompress=False, ns_clean=False, \
+                 recover=False, schema: XMLSchema =None, huge_tree=False, \
+                 remove_blank_text=False, resolve_entities=True, \
+                 remove_comments=False, remove_pis=False, strip_cdata=True, \
+                 collect_ids=True, target=None, compact=True)
     The XML parser.
@@ -1572,6 +1600,8 @@ cdef class XMLParser(_FeedParser):
     - dtd_validation     - validate against a DTD referenced by the document
     - load_dtd           - use DTD for parsing
     - no_network         - prevent network access for related files (default: True)
+    - decompress         - automatically decompress gzip input
+                           (default: False, changed in lxml 6.0, disabling only affects libxml2 2.15+)
     - ns_clean           - clean up redundant namespace declarations
     - recover            - try hard to parse through broken XML
     - remove_blank_text  - discard blank text nodes that appear ignorable
@@ -1579,9 +1609,10 @@ cdef class XMLParser(_FeedParser):
     - remove_pis         - discard processing instructions
     - strip_cdata        - replace CDATA sections by normal text content (default: True)
     - compact            - save memory for short text content (default: True)
-    - collect_ids        - use a hash table of XML IDs for fast access (default: True, always True with DTD validation)
+    - collect_ids        - use a hash table of XML IDs for fast access
+                           (default: True, always True with DTD validation)
     - huge_tree          - disable security restrictions and support very deep trees
-                           and very long text content (only affects libxml2 2.7+)
+                           and very long text content
     Other keyword arguments:
@@ -1598,7 +1629,7 @@ cdef class XMLParser(_FeedParser):
     apply to the default parser.
     """
     def __init__(self, *, encoding=None, attribute_defaults=False,
-                 dtd_validation=False, load_dtd=False, no_network=True,
+                 dtd_validation=False, load_dtd=False, no_network=True, decompress=False,
                  ns_clean=False, recover=False, XMLSchema schema=None,
                  huge_tree=False, remove_blank_text=False, resolve_entities='internal',
                  remove_comments=False, remove_pis=False, strip_cdata=True,
@@ -1638,6 +1669,10 @@ cdef class XMLParser(_FeedParser):
                              remove_comments, remove_pis, strip_cdata,
                              collect_ids, target, encoding, resolve_external)
+    # Allow subscripting XMLParser in type annotions (PEP 560)
+    def __class_getitem__(cls, item):
+        return _GenericAlias(cls, item)
 cdef class XMLPullParser(XMLParser):
     """XMLPullParser(self, events=None, *, tag=None, **kwargs)
@@ -1670,7 +1705,7 @@ cdef class XMLPullParser(XMLParser):
 cdef class ETCompatXMLParser(XMLParser):
     """ETCompatXMLParser(self, encoding=None, attribute_defaults=False, \
-                 dtd_validation=False, load_dtd=False, no_network=True, \
+                 dtd_validation=False, load_dtd=False, no_network=True, decompress=False, \
                  ns_clean=False, recover=False, schema=None, \
                  huge_tree=False, remove_blank_text=False, resolve_entities=True, \
                  remove_comments=True, remove_pis=True, strip_cdata=True, \
@@ -1684,7 +1719,7 @@ cdef class ETCompatXMLParser(XMLParser):
     and thus ignores comments and processing instructions.
     """
     def __init__(self, *, encoding=None, attribute_defaults=False,
-                 dtd_validation=False, load_dtd=False, no_network=True,
+                 dtd_validation=False, load_dtd=False, no_network=True, decompress=False,
                  ns_clean=False, recover=False, schema=None,
                  huge_tree=False, remove_blank_text=False, resolve_entities=True,
                  remove_comments=True, remove_pis=True, strip_cdata=True,
@@ -1694,6 +1729,7 @@ cdef class ETCompatXMLParser(XMLParser):
                            dtd_validation=dtd_validation,
                            load_dtd=load_dtd,
                            no_network=no_network,
+                           decompress=decompress,
                            ns_clean=ns_clean,
                            recover=recover,
                            remove_blank_text=remove_blank_text,
@@ -1705,7 +1741,8 @@ cdef class ETCompatXMLParser(XMLParser):
                            strip_cdata=strip_cdata,
                            target=target,
                            encoding=encoding,
-                           schema=schema)
+                           schema=schema,
+                           )
 # ET 1.2 compatible name
 XMLTreeBuilder = ETCompatXMLParser
@@ -1752,7 +1789,7 @@ cdef object _UNUSED = object()
 cdef class HTMLParser(_FeedParser):
     """HTMLParser(self, encoding=None, remove_blank_text=False, \
                    remove_comments=False, remove_pis=False, \
-                   no_network=True, target=None, schema: XMLSchema =None, \
+                   no_network=True, decompress=False, target=None, schema: XMLSchema =None, \
                    recover=True, compact=True, collect_ids=True, huge_tree=False)
     The HTML parser.
@@ -1766,6 +1803,8 @@ cdef class HTMLParser(_FeedParser):
     - recover            - try hard to parse through broken HTML (default: True)
     - no_network         - prevent network access for related files (default: True)
+    - decompress         - automatically decompress gzip input
+                           (default: False, changed in lxml 6.0, disabling only affects libxml2 2.15+)
     - remove_blank_text  - discard empty text nodes that are ignorable (i.e. not actual text content)
     - remove_comments    - discard comments
     - remove_pis         - discard processing instructions
@@ -1773,7 +1812,7 @@ cdef class HTMLParser(_FeedParser):
     - default_doctype    - add a default doctype even if it is not found in the HTML (default: True)
     - collect_ids        - use a hash table of XML IDs for fast access (default: True)
     - huge_tree          - disable security restrictions and support very deep trees
-                           and very long text content (only affects libxml2 2.7+)
+                           and very long text content
     Other keyword arguments:
@@ -1786,7 +1825,7 @@ cdef class HTMLParser(_FeedParser):
     """
     def __init__(self, *, encoding=None, remove_blank_text=False,
                  remove_comments=False, remove_pis=False, strip_cdata=_UNUSED,
-                 no_network=True, target=None, XMLSchema schema=None,
+                 no_network=True, decompress=False, target=None, XMLSchema schema=None,
                  recover=True, compact=True, default_doctype=True,
                  collect_ids=True, huge_tree=False):
         cdef int parse_options
@@ -1813,6 +1852,10 @@ cdef class HTMLParser(_FeedParser):
                              remove_comments, remove_pis, strip_cdata,
                              collect_ids, target, encoding)
+    # Allow subscripting HTMLParser in type annotions (PEP 560)
+    def __class_getitem__(cls, item):
+        return _GenericAlias(cls, item)
 cdef HTMLParser __DEFAULT_HTML_PARSER
 __DEFAULT_HTML_PARSER = HTMLParser()
@@ -1853,8 +1896,6 @@ cdef class HTMLPullParser(HTMLParser):
 cdef xmlDoc* _parseDoc(text, filename, _BaseParser parser) except NULL:
     cdef char* c_filename
-    cdef char* c_text
-    cdef Py_ssize_t c_len
     if parser is None:
         parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser()
     if not filename:
@@ -1862,36 +1903,56 @@ cdef xmlDoc* _parseDoc(text, filename, _BaseParser parser) except NULL:
     else:
         filename_utf = _encodeFilenameUTF8(filename)
         c_filename = _cstr(filename_utf)
-    if isinstance(text, unicode):
-        if python.PyUnicode_IS_READY(text):
-            # PEP-393 Unicode string
-            c_len = python.PyUnicode_GET_LENGTH(text) * python.PyUnicode_KIND(text)
-        else:
-            # old Py_UNICODE string
-            c_len = python.PyUnicode_GET_DATA_SIZE(text)
-        if c_len > limits.INT_MAX:
-            return (<_BaseParser>parser)._parseDocFromFilelike(
-                StringIO(text), filename, None)
-        return (<_BaseParser>parser)._parseUnicodeDoc(text, c_filename)
+    if isinstance(text, bytes):
+        return _parseDoc_bytes(<bytes> text, filename, c_filename, parser)
+    elif isinstance(text, unicode):
+        return _parseDoc_unicode(<unicode> text, filename, c_filename, parser)
+    else:
+        return _parseDoc_charbuffer(text, filename, c_filename, parser)
+cdef xmlDoc* _parseDoc_unicode(unicode text, filename, char* c_filename, _BaseParser parser) except NULL:
+    cdef Py_ssize_t c_len
+    if python.PyUnicode_IS_READY(text):
+        # PEP-393 Unicode string
+        c_len = python.PyUnicode_GET_LENGTH(text) * python.PyUnicode_KIND(text)
     else:
-        c_len = python.PyBytes_GET_SIZE(text)
-        if c_len > limits.INT_MAX:
-            return (<_BaseParser>parser)._parseDocFromFilelike(
-                BytesIO(text), filename, None)
-        c_text = _cstr(text)
-        return (<_BaseParser>parser)._parseDoc(c_text, c_len, c_filename)
+        # old Py_UNICODE string
+        c_len = python.PyUnicode_GET_DATA_SIZE(text)
+    if c_len > limits.INT_MAX:
+        return parser._parseDocFromFilelike(
+            StringIO(text), filename, None)
+    return parser._parseUnicodeDoc(text, c_filename)
+cdef xmlDoc* _parseDoc_bytes(bytes text, filename, char* c_filename, _BaseParser parser) except NULL:
+    cdef Py_ssize_t c_len = len(text)
+    if c_len > limits.INT_MAX:
+        return parser._parseDocFromFilelike(BytesIO(text), filename, None)
+    return parser._parseDoc(text, c_len, c_filename)
+cdef xmlDoc* _parseDoc_charbuffer(text, filename, char* c_filename, _BaseParser parser) except NULL:
+    cdef const unsigned char[::1] data = memoryview(text).cast('B')  # cast to 'unsigned char' buffer
+    cdef Py_ssize_t c_len = len(data)
+    if c_len > limits.INT_MAX:
+        return parser._parseDocFromFilelike(BytesIO(text), filename, None)
+    return parser._parseDoc(<const char*>&data[0], c_len, c_filename)
 cdef xmlDoc* _parseDocFromFile(filename8, _BaseParser parser) except NULL:
     if parser is None:
         parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser()
     return (<_BaseParser>parser)._parseDocFromFile(_cstr(filename8))
 cdef xmlDoc* _parseDocFromFilelike(source, filename,
                                    _BaseParser parser) except NULL:
     if parser is None:
         parser = __GLOBAL_PARSER_CONTEXT.getDefaultParser()
     return (<_BaseParser>parser)._parseDocFromFilelike(source, filename, None)
 cdef xmlDoc* _newXMLDoc() except NULL:
     cdef xmlDoc* result
     result = tree.xmlNewDoc(NULL)
@@ -1990,8 +2051,6 @@ cdef _Document _parseMemoryDocument(text, url, _BaseParser parser):
             raise ValueError(
                 "Unicode strings with encoding declaration are not supported. "
                 "Please use bytes input or XML fragments without declaration.")
-    elif not isinstance(text, bytes):
-        raise ValueError, "can only parse strings"
     c_doc = _parseDoc(text, url, parser)
     return _documentFactory(c_doc, parser)

lxml/sax.cpython-310-darwin.so CHANGED Viewed

Binary file

lxml/sax.py CHANGED Viewed

@@ -18,6 +18,13 @@ from lxml import etree
 from lxml.etree import ElementTree, SubElement
 from lxml.etree import Comment, ProcessingInstruction
+try:
+    from types import GenericAlias as _GenericAlias
+except ImportError:
+    # Python 3.8 - we only need this as return value from "__class_getitem__"
+    def _GenericAlias(cls, item):
+        return f"{cls.__name__}[{item.__name__}]"
 class SaxError(etree.LxmlError):
     """General SAX error.
@@ -152,6 +159,10 @@ class ElementTreeContentHandler(ContentHandler):
     ignorableWhitespace = characters
+    # Allow subscripting sax.ElementTreeContentHandler in type annotions (PEP 560)
+    def __class_getitem__(cls, item):
+        return _GenericAlias(cls, item)
 class ElementTreeProducer:
     """Produces SAX events for an element and children.