lxml 5.3.2__cp39-cp39-macosx_10_9_universal2.whl → 6.0.0__cp39-cp39-macosx_10_9_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lxml/__init__.py +1 -1
- lxml/_elementpath.cpython-39-darwin.so +0 -0
- lxml/_elementpath.py +3 -1
- lxml/apihelpers.pxi +25 -17
- lxml/builder.cpython-39-darwin.so +0 -0
- lxml/builder.py +11 -0
- lxml/debug.pxi +0 -54
- lxml/etree.cpython-39-darwin.so +0 -0
- lxml/etree.h +24 -28
- lxml/etree.pyx +154 -33
- lxml/etree_api.h +59 -50
- lxml/extensions.pxi +3 -6
- lxml/html/__init__.py +7 -3
- lxml/html/_difflib.cpython-39-darwin.so +0 -0
- lxml/html/_difflib.py +2106 -0
- lxml/html/builder.py +40 -0
- lxml/html/defs.py +3 -3
- lxml/html/diff.cpython-39-darwin.so +0 -0
- lxml/html/diff.py +406 -312
- lxml/includes/etree_defs.h +6 -6
- lxml/includes/libexslt/exsltconfig.h +3 -3
- lxml/includes/libxml/HTMLparser.h +41 -45
- lxml/includes/libxml/HTMLtree.h +1 -0
- lxml/includes/libxml/SAX.h +2 -186
- lxml/includes/libxml/SAX2.h +2 -3
- lxml/includes/libxml/c14n.h +1 -12
- lxml/includes/libxml/catalog.h +1 -0
- lxml/includes/libxml/debugXML.h +0 -138
- lxml/includes/libxml/encoding.h +131 -59
- lxml/includes/libxml/entities.h +12 -20
- lxml/includes/libxml/globals.h +0 -16
- lxml/includes/libxml/hash.h +19 -0
- lxml/includes/libxml/list.h +2 -2
- lxml/includes/libxml/nanoftp.h +3 -173
- lxml/includes/libxml/nanohttp.h +17 -0
- lxml/includes/libxml/parser.h +505 -256
- lxml/includes/libxml/parserInternals.h +26 -98
- lxml/includes/libxml/relaxng.h +7 -2
- lxml/includes/libxml/threads.h +0 -6
- lxml/includes/libxml/tree.h +61 -97
- lxml/includes/libxml/uri.h +11 -0
- lxml/includes/libxml/valid.h +49 -14
- lxml/includes/libxml/xinclude.h +12 -0
- lxml/includes/libxml/xlink.h +4 -0
- lxml/includes/libxml/xmlIO.h +33 -35
- lxml/includes/libxml/xmlautomata.h +19 -2
- lxml/includes/libxml/xmlerror.h +32 -18
- lxml/includes/libxml/xmlexports.h +61 -15
- lxml/includes/libxml/xmlmemory.h +27 -64
- lxml/includes/libxml/xmlmodule.h +4 -0
- lxml/includes/libxml/xmlreader.h +13 -3
- lxml/includes/libxml/xmlregexp.h +7 -106
- lxml/includes/libxml/xmlsave.h +15 -1
- lxml/includes/libxml/xmlschemas.h +10 -5
- lxml/includes/libxml/xmlunicode.h +3 -190
- lxml/includes/libxml/xmlversion.h +15 -194
- lxml/includes/libxml/xmlwriter.h +1 -0
- lxml/includes/libxml/xpath.h +9 -15
- lxml/includes/libxml/xpathInternals.h +9 -3
- lxml/includes/libxml/xpointer.h +1 -91
- lxml/includes/libxslt/xsltconfig.h +6 -6
- lxml/includes/lxml-version.h +1 -1
- lxml/includes/tree.pxd +10 -12
- lxml/includes/xmlparser.pxd +46 -8
- lxml/lxml.etree.h +24 -28
- lxml/lxml.etree_api.h +59 -50
- lxml/objectify.cpython-39-darwin.so +0 -0
- lxml/objectify.pyx +11 -7
- lxml/parser.pxi +106 -47
- lxml/sax.cpython-39-darwin.so +0 -0
- lxml/sax.py +11 -0
- lxml/saxparser.pxi +14 -14
- lxml/schematron.pxi +8 -3
- lxml/serializer.pxi +71 -3
- lxml/xslt.pxi +10 -3
- lxml-6.0.0.dist-info/METADATA +163 -0
- {lxml-5.3.2.dist-info → lxml-6.0.0.dist-info}/RECORD +81 -79
- {lxml-5.3.2.dist-info → lxml-6.0.0.dist-info}/WHEEL +2 -1
- {lxml-5.3.2.dist-info → lxml-6.0.0.dist-info}/licenses/LICENSE.txt +3 -1
- lxml-5.3.2.dist-info/METADATA +0 -100
- {lxml-5.3.2.dist-info → lxml-6.0.0.dist-info}/licenses/LICENSES.txt +0 -0
- {lxml-5.3.2.dist-info → lxml-6.0.0.dist-info}/top_level.txt +0 -0
lxml/html/builder.py
CHANGED
@@ -41,31 +41,44 @@ ACRONYM = E.acronym #:
|
|
41
41
|
ADDRESS = E.address #: information on author
|
42
42
|
APPLET = E.applet #: Java applet (DEPRECATED)
|
43
43
|
AREA = E.area #: client-side image map area
|
44
|
+
ARTICLE = E.article #: self-contained article
|
45
|
+
ASIDE = E.aside #: indirectly-related content
|
46
|
+
AUDIO = E.audio #: embedded audio file
|
44
47
|
B = E.b #: bold text style
|
45
48
|
BASE = E.base #: document base URI
|
46
49
|
BASEFONT = E.basefont #: base font size (DEPRECATED)
|
50
|
+
BDI = E.bdi #: isolate bidirectional text
|
47
51
|
BDO = E.bdo #: I18N BiDi over-ride
|
48
52
|
BIG = E.big #: large text style
|
49
53
|
BLOCKQUOTE = E.blockquote #: long quotation
|
50
54
|
BODY = E.body #: document body
|
51
55
|
BR = E.br #: forced line break
|
52
56
|
BUTTON = E.button #: push button
|
57
|
+
CANVAS = E.canvas #: scriptable graphics container
|
53
58
|
CAPTION = E.caption #: table caption
|
54
59
|
CENTER = E.center #: shorthand for DIV align=center (DEPRECATED)
|
55
60
|
CITE = E.cite #: citation
|
56
61
|
CODE = E.code #: computer code fragment
|
57
62
|
COL = E.col #: table column
|
58
63
|
COLGROUP = E.colgroup #: table column group
|
64
|
+
DATA = E.data #: machine-readable translation
|
65
|
+
DATALIST = E.datalist #: list of options for an input
|
59
66
|
DD = E.dd #: definition description
|
60
67
|
DEL = getattr(E, 'del') #: deleted text
|
68
|
+
DETAILS = E.details #: expandable section
|
61
69
|
DFN = E.dfn #: instance definition
|
70
|
+
DIALOG = E.dialog #: dialog box
|
62
71
|
DIR = E.dir #: directory list (DEPRECATED)
|
63
72
|
DIV = E.div #: generic language/style container
|
64
73
|
DL = E.dl #: definition list
|
65
74
|
DT = E.dt #: definition term
|
66
75
|
EM = E.em #: emphasis
|
76
|
+
EMBED = E.embed #: embedded external content
|
67
77
|
FIELDSET = E.fieldset #: form control group
|
78
|
+
FIGCAPTION = E.figcaption #: figure caption
|
79
|
+
FIGURE = E.figure #: self-contained, possibly-captioned content
|
68
80
|
FONT = E.font #: local change to font (DEPRECATED)
|
81
|
+
FOOTER = E.footer #: footer for nearest ancestor
|
69
82
|
FORM = E.form #: interactive form
|
70
83
|
FRAME = E.frame #: subwindow
|
71
84
|
FRAMESET = E.frameset #: window subdivision
|
@@ -76,6 +89,8 @@ H4 = E.h4 #: heading
|
|
76
89
|
H5 = E.h5 #: heading
|
77
90
|
H6 = E.h6 #: heading
|
78
91
|
HEAD = E.head #: document head
|
92
|
+
HEADER = E.header #: heading content
|
93
|
+
HGROUP = E.hgroup #: heading group
|
79
94
|
HR = E.hr #: horizontal rule
|
80
95
|
HTML = E.html #: document root element
|
81
96
|
I = E.i #: italic text style
|
@@ -89,43 +104,68 @@ LABEL = E.label #: form field label text
|
|
89
104
|
LEGEND = E.legend #: fieldset legend
|
90
105
|
LI = E.li #: list item
|
91
106
|
LINK = E.link #: a media-independent link
|
107
|
+
MAIN = E.main #: main content
|
92
108
|
MAP = E.map #: client-side image map
|
109
|
+
MARK = E.mark #: marked/highlighted text
|
110
|
+
MARQUEE = E.marquee #: scrolling text
|
93
111
|
MENU = E.menu #: menu list (DEPRECATED)
|
94
112
|
META = E.meta #: generic metainformation
|
113
|
+
METER = E.meter #: numerical value display
|
114
|
+
NAV = E.nav #: navigation section
|
115
|
+
NOBR = E.nobr #: prevent wrapping
|
95
116
|
NOFRAMES = E.noframes #: alternate content container for non frame-based rendering
|
96
117
|
NOSCRIPT = E.noscript #: alternate content container for non script-based rendering
|
97
118
|
OBJECT = E.object #: generic embedded object
|
98
119
|
OL = E.ol #: ordered list
|
99
120
|
OPTGROUP = E.optgroup #: option group
|
100
121
|
OPTION = E.option #: selectable choice
|
122
|
+
OUTPUT = E.output #: result of a calculation
|
101
123
|
P = E.p #: paragraph
|
102
124
|
PARAM = E.param #: named property value
|
125
|
+
PICTURE = E.picture #: picture with multiple sources
|
126
|
+
PORTAL = E.portal #: embedded preview
|
103
127
|
PRE = E.pre #: preformatted text
|
128
|
+
PROGRESS = E.progress #: progress bar
|
104
129
|
Q = E.q #: short inline quotation
|
130
|
+
RB = E.rb #: ruby base text
|
131
|
+
RP = E.rp #: ruby parentheses
|
132
|
+
RT = E.rt #: ruby text component
|
133
|
+
RTC = E.rtc #: ruby semantic annotation
|
134
|
+
RUBY = E.ruby #: ruby annotations
|
105
135
|
S = E.s #: strike-through text style (DEPRECATED)
|
106
136
|
SAMP = E.samp #: sample program output, scripts, etc.
|
107
137
|
SCRIPT = E.script #: script statements
|
138
|
+
SEARCH = E.search #: set of form controls for a search
|
139
|
+
SECTION = E.section #: generic standalone section
|
108
140
|
SELECT = E.select #: option selector
|
141
|
+
SLOT = E.slot #: placeholder for JS use
|
109
142
|
SMALL = E.small #: small text style
|
143
|
+
SOURCE = E.source #: source for picture/audio/video element
|
110
144
|
SPAN = E.span #: generic language/style container
|
111
145
|
STRIKE = E.strike #: strike-through text (DEPRECATED)
|
112
146
|
STRONG = E.strong #: strong emphasis
|
113
147
|
STYLE = E.style #: style info
|
114
148
|
SUB = E.sub #: subscript
|
149
|
+
SUMMARY = E.summary #: summary for <details>
|
115
150
|
SUP = E.sup #: superscript
|
116
151
|
TABLE = E.table #:
|
117
152
|
TBODY = E.tbody #: table body
|
118
153
|
TD = E.td #: table data cell
|
154
|
+
TEMPLATE = E.template #: fragment for JS use
|
119
155
|
TEXTAREA = E.textarea #: multi-line text field
|
120
156
|
TFOOT = E.tfoot #: table footer
|
121
157
|
TH = E.th #: table header cell
|
122
158
|
THEAD = E.thead #: table header
|
159
|
+
TIME = E.time #: date/time
|
123
160
|
TITLE = E.title #: document title
|
124
161
|
TR = E.tr #: table row
|
162
|
+
TRACK = E.track #: audio/video track
|
125
163
|
TT = E.tt #: teletype or monospaced text style
|
126
164
|
U = E.u #: underlined text style (DEPRECATED)
|
127
165
|
UL = E.ul #: unordered list
|
128
166
|
VAR = E.var #: instance of a variable or program argument
|
167
|
+
VIDEO = E.video #: embedded video file
|
168
|
+
WBR = E.wbr #: word break
|
129
169
|
|
130
170
|
# attributes (only reserved words are included here)
|
131
171
|
ATTR = dict
|
lxml/html/defs.py
CHANGED
@@ -4,13 +4,13 @@
|
|
4
4
|
|
5
5
|
"""
|
6
6
|
Data taken from https://www.w3.org/TR/html401/index/elements.html
|
7
|
-
and https://
|
7
|
+
and https://html.spec.whatwg.org/multipage/syntax.html#elements-2
|
8
8
|
for html5_tags.
|
9
9
|
"""
|
10
10
|
|
11
11
|
empty_tags = frozenset([
|
12
|
-
'area', 'base', 'basefont', 'br', 'col', 'frame', 'hr',
|
13
|
-
'img', 'input', 'isindex', 'link', 'meta', 'param', 'source', 'track'])
|
12
|
+
'area', 'base', 'basefont', 'br', 'col', 'embed', 'frame', 'hr',
|
13
|
+
'img', 'input', 'isindex', 'link', 'meta', 'param', 'source', 'track', 'wbr'])
|
14
14
|
|
15
15
|
deprecated_tags = frozenset([
|
16
16
|
'applet', 'basefont', 'center', 'dir', 'font', 'isindex',
|
Binary file
|