lxml 5.4.0__cp310-cp310-macosx_10_9_universal2.whl → 6.0.0__cp310-cp310-macosx_10_9_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lxml/__init__.py +1 -1
- lxml/_elementpath.cpython-310-darwin.so +0 -0
- lxml/_elementpath.py +3 -1
- lxml/apihelpers.pxi +25 -17
- lxml/builder.cpython-310-darwin.so +0 -0
- lxml/builder.py +11 -0
- lxml/debug.pxi +0 -54
- lxml/etree.cpython-310-darwin.so +0 -0
- lxml/etree.h +24 -28
- lxml/etree.pyx +154 -33
- lxml/etree_api.h +59 -50
- lxml/extensions.pxi +3 -6
- lxml/html/__init__.py +7 -3
- lxml/html/_difflib.cpython-310-darwin.so +0 -0
- lxml/html/_difflib.py +2106 -0
- lxml/html/builder.py +40 -0
- lxml/html/defs.py +3 -3
- lxml/html/diff.cpython-310-darwin.so +0 -0
- lxml/html/diff.py +406 -312
- lxml/includes/etree_defs.h +6 -6
- lxml/includes/libxml/HTMLparser.h +33 -30
- lxml/includes/libxml/HTMLtree.h +1 -0
- lxml/includes/libxml/SAX.h +2 -186
- lxml/includes/libxml/SAX2.h +2 -3
- lxml/includes/libxml/catalog.h +1 -0
- lxml/includes/libxml/debugXML.h +0 -138
- lxml/includes/libxml/encoding.h +124 -61
- lxml/includes/libxml/entities.h +0 -19
- lxml/includes/libxml/globals.h +0 -16
- lxml/includes/libxml/nanoftp.h +3 -173
- lxml/includes/libxml/parser.h +474 -231
- lxml/includes/libxml/parserInternals.h +21 -101
- lxml/includes/libxml/relaxng.h +7 -2
- lxml/includes/libxml/threads.h +0 -6
- lxml/includes/libxml/tree.h +29 -85
- lxml/includes/libxml/valid.h +20 -12
- lxml/includes/libxml/xinclude.h +5 -0
- lxml/includes/libxml/xlink.h +4 -0
- lxml/includes/libxml/xmlIO.h +15 -34
- lxml/includes/libxml/xmlautomata.h +19 -2
- lxml/includes/libxml/xmlerror.h +18 -18
- lxml/includes/libxml/xmlexports.h +6 -56
- lxml/includes/libxml/xmlmemory.h +19 -19
- lxml/includes/libxml/xmlmodule.h +4 -0
- lxml/includes/libxml/xmlreader.h +11 -3
- lxml/includes/libxml/xmlregexp.h +7 -106
- lxml/includes/libxml/xmlsave.h +11 -2
- lxml/includes/libxml/xmlschemas.h +10 -5
- lxml/includes/libxml/xmlunicode.h +3 -354
- lxml/includes/libxml/xmlversion.h +19 -34
- lxml/includes/libxml/xpath.h +5 -15
- lxml/includes/libxml/xpathInternals.h +9 -3
- lxml/includes/libxml/xpointer.h +1 -91
- lxml/includes/lxml-version.h +1 -1
- lxml/includes/tree.pxd +10 -12
- lxml/includes/xmlparser.pxd +46 -8
- lxml/lxml.etree.h +24 -28
- lxml/lxml.etree_api.h +59 -50
- lxml/objectify.cpython-310-darwin.so +0 -0
- lxml/objectify.pyx +11 -7
- lxml/parser.pxi +106 -47
- lxml/sax.cpython-310-darwin.so +0 -0
- lxml/sax.py +11 -0
- lxml/saxparser.pxi +14 -14
- lxml/schematron.pxi +8 -3
- lxml/serializer.pxi +71 -3
- lxml/xslt.pxi +10 -3
- lxml-6.0.0.dist-info/METADATA +163 -0
- {lxml-5.4.0.dist-info → lxml-6.0.0.dist-info}/RECORD +73 -71
- {lxml-5.4.0.dist-info → lxml-6.0.0.dist-info}/WHEEL +2 -1
- {lxml-5.4.0.dist-info → lxml-6.0.0.dist-info}/licenses/LICENSE.txt +3 -1
- lxml-5.4.0.dist-info/METADATA +0 -96
- {lxml-5.4.0.dist-info → lxml-6.0.0.dist-info}/licenses/LICENSES.txt +0 -0
- {lxml-5.4.0.dist-info → lxml-6.0.0.dist-info}/top_level.txt +0 -0
lxml/saxparser.pxi
CHANGED
@@ -217,7 +217,7 @@ cdef class _SaxParserContext(_ParserContext):
|
|
217
217
|
finally:
|
218
218
|
self._parser = None # clear circular reference ASAP
|
219
219
|
if self._matcher is not None:
|
220
|
-
self._matcher.cacheTags(self._doc, True)
|
220
|
+
self._matcher.cacheTags(self._doc, force_into_dict=True)
|
221
221
|
return 0
|
222
222
|
|
223
223
|
cdef int pushEvent(self, event, xmlNode* c_node) except -1:
|
@@ -297,7 +297,7 @@ cdef void _handleSaxStart(
|
|
297
297
|
cdef int i
|
298
298
|
cdef size_t c_len
|
299
299
|
c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
|
300
|
-
if c_ctxt._private is NULL or c_ctxt
|
300
|
+
if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
|
301
301
|
return
|
302
302
|
context = <_SaxParserContext>c_ctxt._private
|
303
303
|
cdef int event_filter = context._event_filter
|
@@ -345,7 +345,7 @@ cdef void _handleSaxTargetStart(
|
|
345
345
|
cdef int i
|
346
346
|
cdef size_t c_len
|
347
347
|
c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
|
348
|
-
if c_ctxt._private is NULL or c_ctxt
|
348
|
+
if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
|
349
349
|
return
|
350
350
|
context = <_SaxParserContext>c_ctxt._private
|
351
351
|
|
@@ -411,7 +411,7 @@ cdef void _handleSaxTargetStart(
|
|
411
411
|
cdef void _handleSaxStartNoNs(void* ctxt, const_xmlChar* c_name,
|
412
412
|
const_xmlChar** c_attributes) noexcept with gil:
|
413
413
|
c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
|
414
|
-
if c_ctxt._private is NULL or c_ctxt
|
414
|
+
if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
|
415
415
|
return
|
416
416
|
context = <_SaxParserContext>c_ctxt._private
|
417
417
|
try:
|
@@ -436,7 +436,7 @@ cdef void _handleSaxStartNoNs(void* ctxt, const_xmlChar* c_name,
|
|
436
436
|
cdef void _handleSaxTargetStartNoNs(void* ctxt, const_xmlChar* c_name,
|
437
437
|
const_xmlChar** c_attributes) noexcept with gil:
|
438
438
|
c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
|
439
|
-
if c_ctxt._private is NULL or c_ctxt
|
439
|
+
if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
|
440
440
|
return
|
441
441
|
context = <_SaxParserContext>c_ctxt._private
|
442
442
|
try:
|
@@ -493,7 +493,7 @@ cdef void _handleSaxEnd(void* ctxt, const_xmlChar* c_localname,
|
|
493
493
|
const_xmlChar* c_prefix,
|
494
494
|
const_xmlChar* c_namespace) noexcept with gil:
|
495
495
|
c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
|
496
|
-
if c_ctxt._private is NULL or c_ctxt
|
496
|
+
if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
|
497
497
|
return
|
498
498
|
context = <_SaxParserContext>c_ctxt._private
|
499
499
|
try:
|
@@ -516,7 +516,7 @@ cdef void _handleSaxEnd(void* ctxt, const_xmlChar* c_localname,
|
|
516
516
|
|
517
517
|
cdef void _handleSaxEndNoNs(void* ctxt, const_xmlChar* c_name) noexcept with gil:
|
518
518
|
c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
|
519
|
-
if c_ctxt._private is NULL or c_ctxt
|
519
|
+
if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
|
520
520
|
return
|
521
521
|
context = <_SaxParserContext>c_ctxt._private
|
522
522
|
try:
|
@@ -569,7 +569,7 @@ cdef int _pushSaxEndEvent(_SaxParserContext context,
|
|
569
569
|
cdef void _handleSaxData(void* ctxt, const_xmlChar* c_data, int data_len) noexcept with gil:
|
570
570
|
# can only be called if parsing with a target
|
571
571
|
c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
|
572
|
-
if c_ctxt._private is NULL or c_ctxt
|
572
|
+
if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
|
573
573
|
return
|
574
574
|
context = <_SaxParserContext>c_ctxt._private
|
575
575
|
try:
|
@@ -586,7 +586,7 @@ cdef void _handleSaxTargetDoctype(void* ctxt, const_xmlChar* c_name,
|
|
586
586
|
const_xmlChar* c_system) noexcept with gil:
|
587
587
|
# can only be called if parsing with a target
|
588
588
|
c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
|
589
|
-
if c_ctxt._private is NULL or c_ctxt
|
589
|
+
if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
|
590
590
|
return
|
591
591
|
context = <_SaxParserContext>c_ctxt._private
|
592
592
|
try:
|
@@ -602,7 +602,7 @@ cdef void _handleSaxTargetDoctype(void* ctxt, const_xmlChar* c_name,
|
|
602
602
|
|
603
603
|
cdef void _handleSaxStartDocument(void* ctxt) noexcept with gil:
|
604
604
|
c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
|
605
|
-
if c_ctxt._private is NULL or c_ctxt
|
605
|
+
if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
|
606
606
|
return
|
607
607
|
context = <_SaxParserContext>c_ctxt._private
|
608
608
|
context._origSaxStartDocument(ctxt)
|
@@ -619,7 +619,7 @@ cdef void _handleSaxTargetPI(void* ctxt, const_xmlChar* c_target,
|
|
619
619
|
const_xmlChar* c_data) noexcept with gil:
|
620
620
|
# can only be called if parsing with a target
|
621
621
|
c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
|
622
|
-
if c_ctxt._private is NULL or c_ctxt
|
622
|
+
if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
|
623
623
|
return
|
624
624
|
context = <_SaxParserContext>c_ctxt._private
|
625
625
|
try:
|
@@ -638,7 +638,7 @@ cdef void _handleSaxPIEvent(void* ctxt, const_xmlChar* target,
|
|
638
638
|
const_xmlChar* data) noexcept with gil:
|
639
639
|
# can only be called when collecting pi events
|
640
640
|
c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
|
641
|
-
if c_ctxt._private is NULL or c_ctxt
|
641
|
+
if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
|
642
642
|
return
|
643
643
|
context = <_SaxParserContext>c_ctxt._private
|
644
644
|
context._origSaxPI(ctxt, target, data)
|
@@ -656,7 +656,7 @@ cdef void _handleSaxPIEvent(void* ctxt, const_xmlChar* target,
|
|
656
656
|
cdef void _handleSaxTargetComment(void* ctxt, const_xmlChar* c_data) noexcept with gil:
|
657
657
|
# can only be called if parsing with a target
|
658
658
|
c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
|
659
|
-
if c_ctxt._private is NULL or c_ctxt
|
659
|
+
if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
|
660
660
|
return
|
661
661
|
context = <_SaxParserContext>c_ctxt._private
|
662
662
|
try:
|
@@ -672,7 +672,7 @@ cdef void _handleSaxTargetComment(void* ctxt, const_xmlChar* c_data) noexcept wi
|
|
672
672
|
cdef void _handleSaxComment(void* ctxt, const_xmlChar* text) noexcept with gil:
|
673
673
|
# can only be called when collecting comment events
|
674
674
|
c_ctxt = <xmlparser.xmlParserCtxt*>ctxt
|
675
|
-
if c_ctxt._private is NULL or c_ctxt
|
675
|
+
if c_ctxt._private is NULL or xmlparser.xmlCtxtIsStopped(c_ctxt):
|
676
676
|
return
|
677
677
|
context = <_SaxParserContext>c_ctxt._private
|
678
678
|
context._origSaxComment(ctxt, text)
|
lxml/schematron.pxi
CHANGED
@@ -69,9 +69,6 @@ cdef class Schematron(_Validator):
|
|
69
69
|
"""
|
70
70
|
cdef schematron.xmlSchematron* _c_schema
|
71
71
|
cdef xmlDoc* _c_schema_doc
|
72
|
-
def __cinit__(self):
|
73
|
-
self._c_schema = NULL
|
74
|
-
self._c_schema_doc = NULL
|
75
72
|
|
76
73
|
def __init__(self, etree=None, *, file=None):
|
77
74
|
cdef _Document doc
|
@@ -83,6 +80,14 @@ cdef class Schematron(_Validator):
|
|
83
80
|
if not config.ENABLE_SCHEMATRON:
|
84
81
|
raise SchematronError, \
|
85
82
|
"lxml.etree was compiled without Schematron support."
|
83
|
+
|
84
|
+
import warnings
|
85
|
+
warnings.warn(
|
86
|
+
"The (non-ISO) Schematron feature is deprecated and will be removed from libxml2 and lxml. "
|
87
|
+
"Use 'lxml.isoschematron' instead.",
|
88
|
+
DeprecationWarning,
|
89
|
+
)
|
90
|
+
|
86
91
|
if etree is not None:
|
87
92
|
doc = _documentOrRaise(etree)
|
88
93
|
root_node = _rootNodeOrRaise(etree)
|
lxml/serializer.pxi
CHANGED
@@ -476,6 +476,50 @@ cdef _write_attr_string(tree.xmlOutputBuffer* buf, const char *string):
|
|
476
476
|
tree.xmlOutputBufferWrite(buf, cur - base, base)
|
477
477
|
|
478
478
|
|
479
|
+
cdef void _write_cdata_section(tree.xmlOutputBuffer* buf, const char* c_data, const char* c_end):
|
480
|
+
tree.xmlOutputBufferWrite(buf, 9, "<![CDATA[")
|
481
|
+
while c_end - c_data > limits.INT_MAX:
|
482
|
+
tree.xmlOutputBufferWrite(buf, limits.INT_MAX, c_data)
|
483
|
+
c_data += limits.INT_MAX
|
484
|
+
tree.xmlOutputBufferWrite(buf, c_end - c_data, c_data)
|
485
|
+
tree.xmlOutputBufferWrite(buf, 3, "]]>")
|
486
|
+
|
487
|
+
|
488
|
+
cdef _write_cdata_string(tree.xmlOutputBuffer* buf, bytes bstring):
|
489
|
+
cdef const char* c_data = bstring
|
490
|
+
cdef const char* c_end = c_data + len(bstring)
|
491
|
+
cdef const char* c_pos = c_data
|
492
|
+
cdef bint nothing_written = True
|
493
|
+
|
494
|
+
while True:
|
495
|
+
c_pos = <const char*> cstring_h.memchr(c_pos, b']', c_end - c_pos)
|
496
|
+
if not c_pos:
|
497
|
+
break
|
498
|
+
c_pos += 1
|
499
|
+
next_char = c_pos[0]
|
500
|
+
c_pos += 1
|
501
|
+
if next_char != b']':
|
502
|
+
continue
|
503
|
+
# Found ']]', c_pos points to next character.
|
504
|
+
while c_pos[0] == b']':
|
505
|
+
c_pos += 1
|
506
|
+
if c_pos[0] != b'>':
|
507
|
+
if c_pos == c_end:
|
508
|
+
break
|
509
|
+
# c_pos[0] is neither ']' nor '>', continue with next character.
|
510
|
+
c_pos += 1
|
511
|
+
continue
|
512
|
+
|
513
|
+
# Write section up to ']]' and start next block at trailing '>'.
|
514
|
+
_write_cdata_section(buf, c_data, c_pos)
|
515
|
+
nothing_written = False
|
516
|
+
c_data = c_pos
|
517
|
+
c_pos += 1
|
518
|
+
|
519
|
+
if nothing_written or c_data < c_end:
|
520
|
+
_write_cdata_section(buf, c_data, c_end)
|
521
|
+
|
522
|
+
|
479
523
|
############################################################
|
480
524
|
# output to file-like objects
|
481
525
|
|
@@ -519,6 +563,7 @@ cdef class _FilelikeWriter:
|
|
519
563
|
cdef object _close_filelike
|
520
564
|
cdef _ExceptionContext _exc_context
|
521
565
|
cdef _ErrorLog error_log
|
566
|
+
|
522
567
|
def __cinit__(self, filelike, exc_context=None, compression=None, close=False):
|
523
568
|
if compression is not None and compression > 0:
|
524
569
|
filelike = GzipFile(
|
@@ -659,6 +704,12 @@ cdef _FilelikeWriter _create_output_buffer(
|
|
659
704
|
f"unknown encoding: '{c_enc.decode('UTF-8') if c_enc is not NULL else u''}'")
|
660
705
|
try:
|
661
706
|
f = _getFSPathOrObject(f)
|
707
|
+
|
708
|
+
if c_compression and not HAS_ZLIB_COMPRESSION and _isString(f):
|
709
|
+
# Let "_FilelikeWriter" fall back to Python's GzipFile.
|
710
|
+
f = open(f, mode="wb")
|
711
|
+
close = True
|
712
|
+
|
662
713
|
if _isString(f):
|
663
714
|
filename8 = _encodeFilename(f)
|
664
715
|
if b'%' in filename8 and (
|
@@ -695,7 +746,10 @@ cdef xmlChar **_convert_ns_prefixes(tree.xmlDict* c_dict, ns_prefixes) except NU
|
|
695
746
|
try:
|
696
747
|
for prefix in ns_prefixes:
|
697
748
|
prefix_utf = _utf8(prefix)
|
698
|
-
|
749
|
+
c_prefix_len = len(prefix_utf)
|
750
|
+
if c_prefix_len > limits.INT_MAX:
|
751
|
+
raise ValueError("Prefix too long")
|
752
|
+
c_prefix = tree.xmlDictExists(c_dict, _xcstr(prefix_utf), <int> c_prefix_len)
|
699
753
|
if c_prefix:
|
700
754
|
# unknown prefixes do not need to get serialised
|
701
755
|
c_ns_prefixes[i] = <xmlChar*>c_prefix
|
@@ -725,6 +779,13 @@ cdef _tofilelikeC14N(f, _Element element, bint exclusive, bint with_comments,
|
|
725
779
|
if inclusive_ns_prefixes else NULL)
|
726
780
|
|
727
781
|
f = _getFSPathOrObject(f)
|
782
|
+
|
783
|
+
close = False
|
784
|
+
if compression and not HAS_ZLIB_COMPRESSION and _isString(f):
|
785
|
+
# Let "_FilelikeWriter" fall back to Python's GzipFile.
|
786
|
+
f = open(f, mode="wb")
|
787
|
+
close = True
|
788
|
+
|
728
789
|
if _isString(f):
|
729
790
|
filename8 = _encodeFilename(f)
|
730
791
|
c_filename = _cstr(filename8)
|
@@ -733,7 +794,7 @@ cdef _tofilelikeC14N(f, _Element element, bint exclusive, bint with_comments,
|
|
733
794
|
c_doc, NULL, exclusive, c_inclusive_ns_prefixes,
|
734
795
|
with_comments, c_filename, compression)
|
735
796
|
elif hasattr(f, 'write'):
|
736
|
-
writer = _FilelikeWriter(f, compression=compression)
|
797
|
+
writer = _FilelikeWriter(f, compression=compression, close=close)
|
737
798
|
c_buffer = writer._createOutputBuffer(NULL)
|
738
799
|
try:
|
739
800
|
with writer.error_log:
|
@@ -1556,6 +1617,11 @@ cdef class _IncrementalFileWriter:
|
|
1556
1617
|
else:
|
1557
1618
|
tree.xmlOutputBufferWriteEscape(self._c_out, _xcstr(bstring), NULL)
|
1558
1619
|
|
1620
|
+
elif isinstance(content, CDATA):
|
1621
|
+
if self._status > WRITER_IN_ELEMENT:
|
1622
|
+
raise LxmlSyntaxError("not in an element")
|
1623
|
+
_write_cdata_string(self._c_out, (<CDATA>content)._utf8_data)
|
1624
|
+
|
1559
1625
|
elif iselement(content):
|
1560
1626
|
if self._status > WRITER_IN_ELEMENT:
|
1561
1627
|
raise LxmlSyntaxError("cannot append trailing element to complete XML document")
|
@@ -1568,8 +1634,10 @@ cdef class _IncrementalFileWriter:
|
|
1568
1634
|
|
1569
1635
|
elif content is not None:
|
1570
1636
|
raise TypeError(
|
1571
|
-
f"got invalid input value of type {type(content)}, expected string or Element")
|
1637
|
+
f"got invalid input value of type {type(content)}, expected string, CDATA or Element")
|
1638
|
+
|
1572
1639
|
self._handle_error(self._c_out.error)
|
1640
|
+
|
1573
1641
|
if not self._buffered:
|
1574
1642
|
tree.xmlOutputBufferFlush(self._c_out)
|
1575
1643
|
self._handle_error(self._c_out.error)
|
lxml/xslt.pxi
CHANGED
@@ -664,9 +664,16 @@ cdef _convert_xslt_parameters(xslt.xsltTransformContext* transform_ctxt,
|
|
664
664
|
v = (<XPath>value)._path
|
665
665
|
else:
|
666
666
|
v = _utf8(value)
|
667
|
-
|
667
|
+
|
668
|
+
c_len = len(k)
|
669
|
+
if c_len > limits.INT_MAX:
|
670
|
+
raise ValueError("Parameter name too long")
|
671
|
+
params[i] = <const_char*> tree.xmlDictLookup(c_dict, _xcstr(k), <int> c_len)
|
668
672
|
i += 1
|
669
|
-
|
673
|
+
c_len = len(v)
|
674
|
+
if c_len > limits.INT_MAX:
|
675
|
+
raise ValueError("Parameter value too long")
|
676
|
+
params[i] = <const_char*> tree.xmlDictLookup(c_dict, _xcstr(v), <int> c_len)
|
670
677
|
i += 1
|
671
678
|
except:
|
672
679
|
python.lxml_free(params)
|
@@ -732,7 +739,7 @@ cdef class _XSLTResultTree(_ElementTree):
|
|
732
739
|
raise XSLTSaveError("No document to serialise")
|
733
740
|
c_compression = compression or 0
|
734
741
|
xslt.LXML_GET_XSLT_ENCODING(c_encoding, self._xslt._c_style)
|
735
|
-
writer = _create_output_buffer(file, <const_char*>c_encoding,
|
742
|
+
writer = _create_output_buffer(file, <const_char*>c_encoding, c_compression, &c_buffer, close=False)
|
736
743
|
if writer is None:
|
737
744
|
with nogil:
|
738
745
|
r = xslt.xsltSaveResultTo(c_buffer, doc._c_doc, self._xslt._c_style)
|
@@ -0,0 +1,163 @@
|
|
1
|
+
Metadata-Version: 2.4
|
2
|
+
Name: lxml
|
3
|
+
Version: 6.0.0
|
4
|
+
Summary: Powerful and Pythonic XML processing library combining libxml2/libxslt with the ElementTree API.
|
5
|
+
Home-page: https://lxml.de/
|
6
|
+
Author: lxml dev team
|
7
|
+
Author-email: lxml@lxml.de
|
8
|
+
Maintainer: lxml dev team
|
9
|
+
Maintainer-email: lxml@lxml.de
|
10
|
+
License: BSD-3-Clause
|
11
|
+
Project-URL: Source, https://github.com/lxml/lxml
|
12
|
+
Project-URL: Bug Tracker, https://bugs.launchpad.net/lxml
|
13
|
+
Classifier: Development Status :: 5 - Production/Stable
|
14
|
+
Classifier: Intended Audience :: Developers
|
15
|
+
Classifier: Intended Audience :: Information Technology
|
16
|
+
Classifier: License :: OSI Approved :: BSD License
|
17
|
+
Classifier: Programming Language :: Cython
|
18
|
+
Classifier: Programming Language :: Python :: 3
|
19
|
+
Classifier: Programming Language :: Python :: 3.8
|
20
|
+
Classifier: Programming Language :: Python :: 3.9
|
21
|
+
Classifier: Programming Language :: Python :: 3.10
|
22
|
+
Classifier: Programming Language :: Python :: 3.11
|
23
|
+
Classifier: Programming Language :: Python :: 3.12
|
24
|
+
Classifier: Programming Language :: Python :: 3.13
|
25
|
+
Classifier: Programming Language :: C
|
26
|
+
Classifier: Operating System :: OS Independent
|
27
|
+
Classifier: Topic :: Text Processing :: Markup :: HTML
|
28
|
+
Classifier: Topic :: Text Processing :: Markup :: XML
|
29
|
+
Classifier: Topic :: Software Development :: Libraries :: Python Modules
|
30
|
+
Requires-Python: >=3.8
|
31
|
+
License-File: LICENSE.txt
|
32
|
+
License-File: LICENSES.txt
|
33
|
+
Provides-Extra: source
|
34
|
+
Provides-Extra: cssselect
|
35
|
+
Requires-Dist: cssselect>=0.7; extra == "cssselect"
|
36
|
+
Provides-Extra: html5
|
37
|
+
Requires-Dist: html5lib; extra == "html5"
|
38
|
+
Provides-Extra: htmlsoup
|
39
|
+
Requires-Dist: BeautifulSoup4; extra == "htmlsoup"
|
40
|
+
Provides-Extra: html-clean
|
41
|
+
Requires-Dist: lxml_html_clean; extra == "html-clean"
|
42
|
+
Dynamic: author
|
43
|
+
Dynamic: author-email
|
44
|
+
Dynamic: classifier
|
45
|
+
Dynamic: description
|
46
|
+
Dynamic: home-page
|
47
|
+
Dynamic: license
|
48
|
+
Dynamic: license-file
|
49
|
+
Dynamic: maintainer
|
50
|
+
Dynamic: maintainer-email
|
51
|
+
Dynamic: project-url
|
52
|
+
Dynamic: provides-extra
|
53
|
+
Dynamic: requires-python
|
54
|
+
Dynamic: summary
|
55
|
+
|
56
|
+
lxml is a Pythonic, mature binding for the libxml2 and libxslt libraries.
|
57
|
+
It provides safe and convenient access to these libraries using the
|
58
|
+
ElementTree API.
|
59
|
+
|
60
|
+
It extends the ElementTree API significantly to offer support for XPath,
|
61
|
+
RelaxNG, XML Schema, XSLT, C14N and much more.
|
62
|
+
|
63
|
+
To contact the project, go to the `project home page <https://lxml.de/>`_
|
64
|
+
or see our bug tracker at https://launchpad.net/lxml
|
65
|
+
|
66
|
+
In case you want to use the current in-development version of lxml,
|
67
|
+
you can get it from the github repository at
|
68
|
+
https://github.com/lxml/lxml . Note that this requires Cython to
|
69
|
+
build the sources, see the build instructions on the project home page.
|
70
|
+
|
71
|
+
|
72
|
+
After an official release of a new stable series, bug fixes may become available at
|
73
|
+
https://github.com/lxml/lxml/tree/lxml-6.0 .
|
74
|
+
Running ``pip install https://github.com/lxml/lxml/archive/refs/heads/lxml-6.0.tar.gz``
|
75
|
+
will install the unreleased branch state as soon as a maintenance branch has been established.
|
76
|
+
Note that this requires Cython to be installed at an appropriate version for the build.
|
77
|
+
|
78
|
+
6.0.0 (2025-06-26)
|
79
|
+
==================
|
80
|
+
|
81
|
+
Features added
|
82
|
+
--------------
|
83
|
+
|
84
|
+
* GH#463: ``lxml.html.diff`` is faster and provides structurally better diffs.
|
85
|
+
Original patch by Steven Fernandez.
|
86
|
+
|
87
|
+
* GH#405: The factories ``Element`` and ``ElementTree`` can now be used in type hints.
|
88
|
+
|
89
|
+
* GH#448: Parsing from ``memoryview`` and other buffers is supported to allow zero-copy parsing.
|
90
|
+
|
91
|
+
* GH#437: ``lxml.html.builder`` was missing several HTML5 tag names.
|
92
|
+
Patch by Nick Tarleton.
|
93
|
+
|
94
|
+
* GH#458: ``CDATA`` can now be written into the incremental ``xmlfile()`` writer.
|
95
|
+
Original patch by Lane Shaw.
|
96
|
+
|
97
|
+
* A new parser option ``decompress=False`` was added that controls the automatic
|
98
|
+
input decompression when using libxml2 2.15.0 or later. Disabling this option
|
99
|
+
by default will effectively prevent decompression bombs when handling untrusted
|
100
|
+
input. Code that depends on automatic decompression must enable this option.
|
101
|
+
Note that libxml2 2.15.0 was not released yet, so this option currently has no
|
102
|
+
effect but can already be used.
|
103
|
+
|
104
|
+
* The set of compile time / runtime supported libxml2 feature names is available as
|
105
|
+
``etree.LIBXML_COMPILED_FEATURES`` and ``etree.LIBXML_FEATURES``.
|
106
|
+
This currently includes
|
107
|
+
``catalog``, ``ftp``, ``html``, ``http``, ``iconv``, ``icu``,
|
108
|
+
``lzma``, ``regexp``, ``schematron``, ``xmlschema``, ``xpath``, ``zlib``.
|
109
|
+
|
110
|
+
Bugs fixed
|
111
|
+
----------
|
112
|
+
|
113
|
+
* GH#353: Predicates in ``.find*()`` could mishandle tag indices if a default namespace is provided.
|
114
|
+
Original patch by Luise K.
|
115
|
+
|
116
|
+
* GH#272: The ``head`` and ``body`` properties of ``lxml.html`` elements failed if no such element
|
117
|
+
was found. They now return ``None`` instead.
|
118
|
+
Original patch by FVolral.
|
119
|
+
|
120
|
+
* Tag names provided by code (API, not data) that are longer than ``INT_MAX``
|
121
|
+
could be truncated or mishandled in other ways.
|
122
|
+
|
123
|
+
* ``.text_content()`` on ``lxml.html`` elements accidentally returned a "smart string"
|
124
|
+
without additional information. It now returns a plain string.
|
125
|
+
|
126
|
+
* LP#2109931: When building lxml with coverage reporting, it now disables the ``sys.monitoring``
|
127
|
+
support due to the lack of support in https://github.com/nedbat/coveragepy/issues/1790
|
128
|
+
|
129
|
+
Other changes
|
130
|
+
-------------
|
131
|
+
|
132
|
+
* Support for Python < 3.8 was removed.
|
133
|
+
|
134
|
+
* Parsing directly from zlib (or lzma) compressed data is now considered an optional
|
135
|
+
feature in lxml. It may get removed from libxml2 at some point for security reasons
|
136
|
+
(compression bombs) and is therefore no longer guaranteed to be available in lxml.
|
137
|
+
|
138
|
+
As of this release, zlib support is still normally available in the binary wheels
|
139
|
+
but may get disabled or removed in later (x.y.0) releases. To test the availability,
|
140
|
+
use ``"zlib" in etree.LIBXML_FEATURES``.
|
141
|
+
|
142
|
+
* The ``Schematron`` class is deprecated and will become non-functional in a future lxml version.
|
143
|
+
The feature will soon be removed from libxml2 and stop being available.
|
144
|
+
|
145
|
+
* GH#438: Wheels include the ``arm7l`` target.
|
146
|
+
|
147
|
+
* GH#465: Windows wheels include the ``arm64`` target.
|
148
|
+
Patch by Finn Womack.
|
149
|
+
|
150
|
+
* Binary wheels use the library versions libxml2 2.14.4 and libxslt 1.1.43.
|
151
|
+
Note that this disables direct HTTP and FTP support for parsing from URLs.
|
152
|
+
Use Python URL request tools instead (which usually also support HTTPS).
|
153
|
+
To test the availability, use ``"http" in etree.LIBXML_FEATURES``.
|
154
|
+
|
155
|
+
* Windows binary wheels use the library versions libxml2 2.11.9, libxslt 1.1.39 and libiconv 1.17.
|
156
|
+
They are now based on VS-2022.
|
157
|
+
|
158
|
+
* Built using Cython 3.1.2.
|
159
|
+
|
160
|
+
* The debug methods ``MemDebug.dump()`` and ``MemDebug.show()`` were removed completely.
|
161
|
+
libxml2 2.13.0 discarded this feature.
|
162
|
+
|
163
|
+
|