lxml 6.0.0__cp312-cp312-macosx_10_13_universal2.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lxml/ElementInclude.py +244 -0
- lxml/__init__.py +22 -0
- lxml/_elementpath.cpython-312-darwin.so +0 -0
- lxml/_elementpath.py +343 -0
- lxml/apihelpers.pxi +1801 -0
- lxml/builder.cpython-312-darwin.so +0 -0
- lxml/builder.py +243 -0
- lxml/classlookup.pxi +580 -0
- lxml/cleanup.pxi +215 -0
- lxml/cssselect.py +101 -0
- lxml/debug.pxi +36 -0
- lxml/docloader.pxi +178 -0
- lxml/doctestcompare.py +488 -0
- lxml/dtd.pxi +479 -0
- lxml/etree.cpython-312-darwin.so +0 -0
- lxml/etree.h +244 -0
- lxml/etree.pyx +3853 -0
- lxml/etree_api.h +204 -0
- lxml/extensions.pxi +830 -0
- lxml/html/ElementSoup.py +10 -0
- lxml/html/__init__.py +1927 -0
- lxml/html/_diffcommand.py +86 -0
- lxml/html/_difflib.cpython-312-darwin.so +0 -0
- lxml/html/_difflib.py +2106 -0
- lxml/html/_html5builder.py +100 -0
- lxml/html/_setmixin.py +56 -0
- lxml/html/builder.py +173 -0
- lxml/html/clean.py +21 -0
- lxml/html/defs.py +135 -0
- lxml/html/diff.cpython-312-darwin.so +0 -0
- lxml/html/diff.py +972 -0
- lxml/html/formfill.py +299 -0
- lxml/html/html5parser.py +260 -0
- lxml/html/soupparser.py +314 -0
- lxml/html/usedoctest.py +13 -0
- lxml/includes/__init__.pxd +0 -0
- lxml/includes/__init__.py +0 -0
- lxml/includes/c14n.pxd +25 -0
- lxml/includes/config.pxd +3 -0
- lxml/includes/dtdvalid.pxd +18 -0
- lxml/includes/etree_defs.h +379 -0
- lxml/includes/etreepublic.pxd +237 -0
- lxml/includes/extlibs/__init__.py +0 -0
- lxml/includes/extlibs/libcharset.h +45 -0
- lxml/includes/extlibs/localcharset.h +137 -0
- lxml/includes/extlibs/zconf.h +543 -0
- lxml/includes/extlibs/zlib.h +1938 -0
- lxml/includes/htmlparser.pxd +56 -0
- lxml/includes/libexslt/__init__.py +0 -0
- lxml/includes/libexslt/exslt.h +108 -0
- lxml/includes/libexslt/exsltconfig.h +70 -0
- lxml/includes/libexslt/exsltexports.h +63 -0
- lxml/includes/libxml/HTMLparser.h +339 -0
- lxml/includes/libxml/HTMLtree.h +148 -0
- lxml/includes/libxml/SAX.h +18 -0
- lxml/includes/libxml/SAX2.h +170 -0
- lxml/includes/libxml/__init__.py +0 -0
- lxml/includes/libxml/c14n.h +115 -0
- lxml/includes/libxml/catalog.h +183 -0
- lxml/includes/libxml/chvalid.h +230 -0
- lxml/includes/libxml/debugXML.h +79 -0
- lxml/includes/libxml/dict.h +82 -0
- lxml/includes/libxml/encoding.h +307 -0
- lxml/includes/libxml/entities.h +147 -0
- lxml/includes/libxml/globals.h +25 -0
- lxml/includes/libxml/hash.h +251 -0
- lxml/includes/libxml/list.h +137 -0
- lxml/includes/libxml/nanoftp.h +16 -0
- lxml/includes/libxml/nanohttp.h +98 -0
- lxml/includes/libxml/parser.h +1633 -0
- lxml/includes/libxml/parserInternals.h +591 -0
- lxml/includes/libxml/relaxng.h +224 -0
- lxml/includes/libxml/schemasInternals.h +959 -0
- lxml/includes/libxml/schematron.h +143 -0
- lxml/includes/libxml/threads.h +81 -0
- lxml/includes/libxml/tree.h +1326 -0
- lxml/includes/libxml/uri.h +106 -0
- lxml/includes/libxml/valid.h +485 -0
- lxml/includes/libxml/xinclude.h +141 -0
- lxml/includes/libxml/xlink.h +193 -0
- lxml/includes/libxml/xmlIO.h +419 -0
- lxml/includes/libxml/xmlautomata.h +163 -0
- lxml/includes/libxml/xmlerror.h +962 -0
- lxml/includes/libxml/xmlexports.h +96 -0
- lxml/includes/libxml/xmlmemory.h +188 -0
- lxml/includes/libxml/xmlmodule.h +61 -0
- lxml/includes/libxml/xmlreader.h +444 -0
- lxml/includes/libxml/xmlregexp.h +116 -0
- lxml/includes/libxml/xmlsave.h +111 -0
- lxml/includes/libxml/xmlschemas.h +254 -0
- lxml/includes/libxml/xmlschemastypes.h +152 -0
- lxml/includes/libxml/xmlstring.h +140 -0
- lxml/includes/libxml/xmlunicode.h +15 -0
- lxml/includes/libxml/xmlversion.h +332 -0
- lxml/includes/libxml/xmlwriter.h +489 -0
- lxml/includes/libxml/xpath.h +569 -0
- lxml/includes/libxml/xpathInternals.h +639 -0
- lxml/includes/libxml/xpointer.h +48 -0
- lxml/includes/libxslt/__init__.py +0 -0
- lxml/includes/libxslt/attributes.h +39 -0
- lxml/includes/libxslt/documents.h +93 -0
- lxml/includes/libxslt/extensions.h +262 -0
- lxml/includes/libxslt/extra.h +72 -0
- lxml/includes/libxslt/functions.h +78 -0
- lxml/includes/libxslt/imports.h +75 -0
- lxml/includes/libxslt/keys.h +53 -0
- lxml/includes/libxslt/namespaces.h +68 -0
- lxml/includes/libxslt/numbersInternals.h +73 -0
- lxml/includes/libxslt/pattern.h +84 -0
- lxml/includes/libxslt/preproc.h +43 -0
- lxml/includes/libxslt/security.h +104 -0
- lxml/includes/libxslt/templates.h +77 -0
- lxml/includes/libxslt/transform.h +207 -0
- lxml/includes/libxslt/variables.h +118 -0
- lxml/includes/libxslt/xslt.h +110 -0
- lxml/includes/libxslt/xsltInternals.h +1995 -0
- lxml/includes/libxslt/xsltconfig.h +146 -0
- lxml/includes/libxslt/xsltexports.h +64 -0
- lxml/includes/libxslt/xsltlocale.h +44 -0
- lxml/includes/libxslt/xsltutils.h +343 -0
- lxml/includes/lxml-version.h +3 -0
- lxml/includes/relaxng.pxd +64 -0
- lxml/includes/schematron.pxd +34 -0
- lxml/includes/tree.pxd +492 -0
- lxml/includes/uri.pxd +5 -0
- lxml/includes/xinclude.pxd +22 -0
- lxml/includes/xmlerror.pxd +852 -0
- lxml/includes/xmlparser.pxd +303 -0
- lxml/includes/xmlschema.pxd +35 -0
- lxml/includes/xpath.pxd +136 -0
- lxml/includes/xslt.pxd +190 -0
- lxml/isoschematron/__init__.py +348 -0
- lxml/isoschematron/resources/rng/iso-schematron.rng +709 -0
- lxml/isoschematron/resources/xsl/RNG2Schtrn.xsl +75 -0
- lxml/isoschematron/resources/xsl/XSD2Schtrn.xsl +77 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_abstract_expand.xsl +313 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_dsdl_include.xsl +1160 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_message.xsl +55 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_skeleton_for_xslt1.xsl +1796 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_svrl_for_xslt1.xsl +588 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt +84 -0
- lxml/iterparse.pxi +438 -0
- lxml/lxml.etree.h +244 -0
- lxml/lxml.etree_api.h +204 -0
- lxml/nsclasses.pxi +281 -0
- lxml/objectify.cpython-312-darwin.so +0 -0
- lxml/objectify.pyx +2149 -0
- lxml/objectpath.pxi +332 -0
- lxml/parser.pxi +2059 -0
- lxml/parsertarget.pxi +180 -0
- lxml/proxy.pxi +619 -0
- lxml/public-api.pxi +178 -0
- lxml/pyclasslookup.py +3 -0
- lxml/readonlytree.pxi +565 -0
- lxml/relaxng.pxi +165 -0
- lxml/sax.cpython-312-darwin.so +0 -0
- lxml/sax.py +286 -0
- lxml/saxparser.pxi +875 -0
- lxml/schematron.pxi +173 -0
- lxml/serializer.pxi +1849 -0
- lxml/usedoctest.py +13 -0
- lxml/xinclude.pxi +67 -0
- lxml/xmlerror.pxi +1654 -0
- lxml/xmlid.pxi +179 -0
- lxml/xmlschema.pxi +215 -0
- lxml/xpath.pxi +487 -0
- lxml/xslt.pxi +957 -0
- lxml/xsltext.pxi +242 -0
- lxml-6.0.0.dist-info/METADATA +163 -0
- lxml-6.0.0.dist-info/RECORD +174 -0
- lxml-6.0.0.dist-info/WHEEL +6 -0
- lxml-6.0.0.dist-info/licenses/LICENSE.txt +31 -0
- lxml-6.0.0.dist-info/licenses/LICENSES.txt +29 -0
- lxml-6.0.0.dist-info/top_level.txt +1 -0
lxml/html/soupparser.py
ADDED
@@ -0,0 +1,314 @@
|
|
1
|
+
"""External interface to the BeautifulSoup HTML parser.
|
2
|
+
"""
|
3
|
+
|
4
|
+
__all__ = ["fromstring", "parse", "convert_tree"]
|
5
|
+
|
6
|
+
import re
|
7
|
+
from lxml import etree, html
|
8
|
+
|
9
|
+
try:
|
10
|
+
from bs4 import (
|
11
|
+
BeautifulSoup, Tag, Comment, ProcessingInstruction, NavigableString,
|
12
|
+
Declaration, Doctype)
|
13
|
+
_DECLARATION_OR_DOCTYPE = (Declaration, Doctype)
|
14
|
+
except ImportError:
|
15
|
+
from BeautifulSoup import (
|
16
|
+
BeautifulSoup, Tag, Comment, ProcessingInstruction, NavigableString,
|
17
|
+
Declaration)
|
18
|
+
_DECLARATION_OR_DOCTYPE = Declaration
|
19
|
+
|
20
|
+
|
21
|
+
def fromstring(data, beautifulsoup=None, makeelement=None, **bsargs):
|
22
|
+
"""Parse a string of HTML data into an Element tree using the
|
23
|
+
BeautifulSoup parser.
|
24
|
+
|
25
|
+
Returns the root ``<html>`` Element of the tree.
|
26
|
+
|
27
|
+
You can pass a different BeautifulSoup parser through the
|
28
|
+
`beautifulsoup` keyword, and a diffent Element factory function
|
29
|
+
through the `makeelement` keyword. By default, the standard
|
30
|
+
``BeautifulSoup`` class and the default factory of `lxml.html` are
|
31
|
+
used.
|
32
|
+
"""
|
33
|
+
return _parse(data, beautifulsoup, makeelement, **bsargs)
|
34
|
+
|
35
|
+
|
36
|
+
def parse(file, beautifulsoup=None, makeelement=None, **bsargs):
|
37
|
+
"""Parse a file into an ElemenTree using the BeautifulSoup parser.
|
38
|
+
|
39
|
+
You can pass a different BeautifulSoup parser through the
|
40
|
+
`beautifulsoup` keyword, and a diffent Element factory function
|
41
|
+
through the `makeelement` keyword. By default, the standard
|
42
|
+
``BeautifulSoup`` class and the default factory of `lxml.html` are
|
43
|
+
used.
|
44
|
+
"""
|
45
|
+
if not hasattr(file, 'read'):
|
46
|
+
file = open(file)
|
47
|
+
root = _parse(file, beautifulsoup, makeelement, **bsargs)
|
48
|
+
return etree.ElementTree(root)
|
49
|
+
|
50
|
+
|
51
|
+
def convert_tree(beautiful_soup_tree, makeelement=None):
|
52
|
+
"""Convert a BeautifulSoup tree to a list of Element trees.
|
53
|
+
|
54
|
+
Returns a list instead of a single root Element to support
|
55
|
+
HTML-like soup with more than one root element.
|
56
|
+
|
57
|
+
You can pass a different Element factory through the `makeelement`
|
58
|
+
keyword.
|
59
|
+
"""
|
60
|
+
root = _convert_tree(beautiful_soup_tree, makeelement)
|
61
|
+
children = root.getchildren()
|
62
|
+
for child in children:
|
63
|
+
root.remove(child)
|
64
|
+
return children
|
65
|
+
|
66
|
+
|
67
|
+
# helpers
|
68
|
+
|
69
|
+
def _parse(source, beautifulsoup, makeelement, **bsargs):
|
70
|
+
if beautifulsoup is None:
|
71
|
+
beautifulsoup = BeautifulSoup
|
72
|
+
if hasattr(beautifulsoup, "HTML_ENTITIES"): # bs3
|
73
|
+
if 'convertEntities' not in bsargs:
|
74
|
+
bsargs['convertEntities'] = 'html'
|
75
|
+
if hasattr(beautifulsoup, "DEFAULT_BUILDER_FEATURES"): # bs4
|
76
|
+
if 'features' not in bsargs:
|
77
|
+
bsargs['features'] = 'html.parser' # use Python html parser
|
78
|
+
tree = beautifulsoup(source, **bsargs)
|
79
|
+
root = _convert_tree(tree, makeelement)
|
80
|
+
# from ET: wrap the document in a html root element, if necessary
|
81
|
+
if len(root) == 1 and root[0].tag == "html":
|
82
|
+
return root[0]
|
83
|
+
root.tag = "html"
|
84
|
+
return root
|
85
|
+
|
86
|
+
|
87
|
+
_parse_doctype_declaration = re.compile(
|
88
|
+
r'(?:\s|[<!])*DOCTYPE\s*HTML'
|
89
|
+
r'(?:\s+PUBLIC)?(?:\s+(\'[^\']*\'|"[^"]*"))?'
|
90
|
+
r'(?:\s+(\'[^\']*\'|"[^"]*"))?',
|
91
|
+
re.IGNORECASE).match
|
92
|
+
|
93
|
+
|
94
|
+
class _PseudoTag:
|
95
|
+
# Minimal imitation of BeautifulSoup.Tag
|
96
|
+
def __init__(self, contents):
|
97
|
+
self.name = 'html'
|
98
|
+
self.attrs = []
|
99
|
+
self.contents = contents
|
100
|
+
|
101
|
+
def __iter__(self):
|
102
|
+
return self.contents.__iter__()
|
103
|
+
|
104
|
+
|
105
|
+
def _convert_tree(beautiful_soup_tree, makeelement):
|
106
|
+
if makeelement is None:
|
107
|
+
makeelement = html.html_parser.makeelement
|
108
|
+
|
109
|
+
# Split the tree into three parts:
|
110
|
+
# i) everything before the root element: document type
|
111
|
+
# declaration, comments, processing instructions, whitespace
|
112
|
+
# ii) the root(s),
|
113
|
+
# iii) everything after the root: comments, processing
|
114
|
+
# instructions, whitespace
|
115
|
+
first_element_idx = last_element_idx = None
|
116
|
+
html_root = declaration = None
|
117
|
+
for i, e in enumerate(beautiful_soup_tree):
|
118
|
+
if isinstance(e, Tag):
|
119
|
+
if first_element_idx is None:
|
120
|
+
first_element_idx = i
|
121
|
+
last_element_idx = i
|
122
|
+
if html_root is None and e.name and e.name.lower() == 'html':
|
123
|
+
html_root = e
|
124
|
+
elif declaration is None and isinstance(e, _DECLARATION_OR_DOCTYPE):
|
125
|
+
declaration = e
|
126
|
+
|
127
|
+
# For a nice, well-formatted document, the variable roots below is
|
128
|
+
# a list consisting of a single <html> element. However, the document
|
129
|
+
# may be a soup like '<meta><head><title>Hello</head><body>Hi
|
130
|
+
# all<\p>'. In this example roots is a list containing meta, head
|
131
|
+
# and body elements.
|
132
|
+
if first_element_idx is None:
|
133
|
+
pre_root = post_root = []
|
134
|
+
roots = beautiful_soup_tree.contents
|
135
|
+
else:
|
136
|
+
pre_root = beautiful_soup_tree.contents[:first_element_idx]
|
137
|
+
roots = beautiful_soup_tree.contents[first_element_idx:last_element_idx+1]
|
138
|
+
post_root = beautiful_soup_tree.contents[last_element_idx+1:]
|
139
|
+
|
140
|
+
# Reorganize so that there is one <html> root...
|
141
|
+
if html_root is not None:
|
142
|
+
# ... use existing one if possible, ...
|
143
|
+
i = roots.index(html_root)
|
144
|
+
html_root.contents = roots[:i] + html_root.contents + roots[i+1:]
|
145
|
+
else:
|
146
|
+
# ... otherwise create a new one.
|
147
|
+
html_root = _PseudoTag(roots)
|
148
|
+
|
149
|
+
convert_node = _init_node_converters(makeelement)
|
150
|
+
|
151
|
+
# Process pre_root
|
152
|
+
res_root = convert_node(html_root)
|
153
|
+
prev = res_root
|
154
|
+
for e in reversed(pre_root):
|
155
|
+
converted = convert_node(e)
|
156
|
+
if converted is not None:
|
157
|
+
prev.addprevious(converted)
|
158
|
+
prev = converted
|
159
|
+
|
160
|
+
# ditto for post_root
|
161
|
+
prev = res_root
|
162
|
+
for e in post_root:
|
163
|
+
converted = convert_node(e)
|
164
|
+
if converted is not None:
|
165
|
+
prev.addnext(converted)
|
166
|
+
prev = converted
|
167
|
+
|
168
|
+
if declaration is not None:
|
169
|
+
try:
|
170
|
+
# bs4 provides full Doctype string
|
171
|
+
doctype_string = declaration.output_ready()
|
172
|
+
except AttributeError:
|
173
|
+
doctype_string = declaration.string
|
174
|
+
|
175
|
+
match = _parse_doctype_declaration(doctype_string)
|
176
|
+
if not match:
|
177
|
+
# Something is wrong if we end up in here. Since soupparser should
|
178
|
+
# tolerate errors, do not raise Exception, just let it pass.
|
179
|
+
pass
|
180
|
+
else:
|
181
|
+
external_id, sys_uri = match.groups()
|
182
|
+
docinfo = res_root.getroottree().docinfo
|
183
|
+
# strip quotes and update DOCTYPE values (any of None, '', '...')
|
184
|
+
docinfo.public_id = external_id and external_id[1:-1]
|
185
|
+
docinfo.system_url = sys_uri and sys_uri[1:-1]
|
186
|
+
|
187
|
+
return res_root
|
188
|
+
|
189
|
+
|
190
|
+
def _init_node_converters(makeelement):
|
191
|
+
converters = {}
|
192
|
+
ordered_node_types = []
|
193
|
+
|
194
|
+
def converter(*types):
|
195
|
+
def add(handler):
|
196
|
+
for t in types:
|
197
|
+
converters[t] = handler
|
198
|
+
ordered_node_types.append(t)
|
199
|
+
return handler
|
200
|
+
return add
|
201
|
+
|
202
|
+
def find_best_converter(node):
|
203
|
+
for t in ordered_node_types:
|
204
|
+
if isinstance(node, t):
|
205
|
+
return converters[t]
|
206
|
+
return None
|
207
|
+
|
208
|
+
def convert_node(bs_node, parent=None):
|
209
|
+
# duplicated in convert_tag() below
|
210
|
+
try:
|
211
|
+
handler = converters[type(bs_node)]
|
212
|
+
except KeyError:
|
213
|
+
handler = converters[type(bs_node)] = find_best_converter(bs_node)
|
214
|
+
if handler is None:
|
215
|
+
return None
|
216
|
+
return handler(bs_node, parent)
|
217
|
+
|
218
|
+
def map_attrs(bs_attrs):
|
219
|
+
if isinstance(bs_attrs, dict): # bs4
|
220
|
+
attribs = {}
|
221
|
+
for k, v in bs_attrs.items():
|
222
|
+
if isinstance(v, list):
|
223
|
+
v = " ".join(v)
|
224
|
+
attribs[k] = unescape(v)
|
225
|
+
else:
|
226
|
+
attribs = {k: unescape(v) for k, v in bs_attrs}
|
227
|
+
return attribs
|
228
|
+
|
229
|
+
def append_text(parent, text):
|
230
|
+
if len(parent) == 0:
|
231
|
+
parent.text = (parent.text or '') + text
|
232
|
+
else:
|
233
|
+
parent[-1].tail = (parent[-1].tail or '') + text
|
234
|
+
|
235
|
+
# converters are tried in order of their definition
|
236
|
+
|
237
|
+
@converter(Tag, _PseudoTag)
|
238
|
+
def convert_tag(bs_node, parent):
|
239
|
+
attrs = bs_node.attrs
|
240
|
+
if parent is not None:
|
241
|
+
attribs = map_attrs(attrs) if attrs else None
|
242
|
+
res = etree.SubElement(parent, bs_node.name, attrib=attribs)
|
243
|
+
else:
|
244
|
+
attribs = map_attrs(attrs) if attrs else {}
|
245
|
+
res = makeelement(bs_node.name, attrib=attribs)
|
246
|
+
|
247
|
+
for child in bs_node:
|
248
|
+
# avoid double recursion by inlining convert_node(), see above
|
249
|
+
try:
|
250
|
+
handler = converters[type(child)]
|
251
|
+
except KeyError:
|
252
|
+
pass
|
253
|
+
else:
|
254
|
+
if handler is not None:
|
255
|
+
handler(child, res)
|
256
|
+
continue
|
257
|
+
convert_node(child, res)
|
258
|
+
return res
|
259
|
+
|
260
|
+
@converter(Comment)
|
261
|
+
def convert_comment(bs_node, parent):
|
262
|
+
res = html.HtmlComment(bs_node)
|
263
|
+
if parent is not None:
|
264
|
+
parent.append(res)
|
265
|
+
return res
|
266
|
+
|
267
|
+
@converter(ProcessingInstruction)
|
268
|
+
def convert_pi(bs_node, parent):
|
269
|
+
if bs_node.endswith('?'):
|
270
|
+
# The PI is of XML style (<?as df?>) but BeautifulSoup
|
271
|
+
# interpreted it as being SGML style (<?as df>). Fix.
|
272
|
+
bs_node = bs_node[:-1]
|
273
|
+
res = etree.ProcessingInstruction(*bs_node.split(' ', 1))
|
274
|
+
if parent is not None:
|
275
|
+
parent.append(res)
|
276
|
+
return res
|
277
|
+
|
278
|
+
@converter(NavigableString)
|
279
|
+
def convert_text(bs_node, parent):
|
280
|
+
if parent is not None:
|
281
|
+
append_text(parent, unescape(bs_node))
|
282
|
+
return None
|
283
|
+
|
284
|
+
return convert_node
|
285
|
+
|
286
|
+
|
287
|
+
# copied from ET's ElementSoup
|
288
|
+
|
289
|
+
try:
|
290
|
+
from html.entities import name2codepoint # Python 3
|
291
|
+
except ImportError:
|
292
|
+
from htmlentitydefs import name2codepoint
|
293
|
+
|
294
|
+
|
295
|
+
handle_entities = re.compile(r"&(\w+);").sub
|
296
|
+
|
297
|
+
|
298
|
+
try:
|
299
|
+
unichr
|
300
|
+
except NameError:
|
301
|
+
# Python 3
|
302
|
+
unichr = chr
|
303
|
+
|
304
|
+
|
305
|
+
def unescape(string):
|
306
|
+
if not string:
|
307
|
+
return ''
|
308
|
+
# work around oddities in BeautifulSoup's entity handling
|
309
|
+
def unescape_entity(m):
|
310
|
+
try:
|
311
|
+
return unichr(name2codepoint[m.group(1)])
|
312
|
+
except KeyError:
|
313
|
+
return m.group(0) # use as is
|
314
|
+
return handle_entities(unescape_entity, string)
|
lxml/html/usedoctest.py
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
"""Doctest module for HTML comparison.
|
2
|
+
|
3
|
+
Usage::
|
4
|
+
|
5
|
+
>>> import lxml.html.usedoctest
|
6
|
+
>>> # now do your HTML doctests ...
|
7
|
+
|
8
|
+
See `lxml.doctestcompare`.
|
9
|
+
"""
|
10
|
+
|
11
|
+
from lxml import doctestcompare
|
12
|
+
|
13
|
+
doctestcompare.temp_install(html=True, del_module=__name__)
|
File without changes
|
File without changes
|
lxml/includes/c14n.pxd
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
from lxml.includes.tree cimport xmlDoc, xmlOutputBuffer, xmlChar
|
2
|
+
from lxml.includes.xpath cimport xmlNodeSet
|
3
|
+
|
4
|
+
cdef extern from "libxml/c14n.h" nogil:
|
5
|
+
cdef int xmlC14NDocDumpMemory(xmlDoc* doc,
|
6
|
+
xmlNodeSet* nodes,
|
7
|
+
int exclusive,
|
8
|
+
xmlChar** inclusive_ns_prefixes,
|
9
|
+
int with_comments,
|
10
|
+
xmlChar** doc_txt_ptr)
|
11
|
+
|
12
|
+
cdef int xmlC14NDocSave(xmlDoc* doc,
|
13
|
+
xmlNodeSet* nodes,
|
14
|
+
int exclusive,
|
15
|
+
xmlChar** inclusive_ns_prefixes,
|
16
|
+
int with_comments,
|
17
|
+
char* filename,
|
18
|
+
int compression)
|
19
|
+
|
20
|
+
cdef int xmlC14NDocSaveTo(xmlDoc* doc,
|
21
|
+
xmlNodeSet* nodes,
|
22
|
+
int exclusive,
|
23
|
+
xmlChar** inclusive_ns_prefixes,
|
24
|
+
int with_comments,
|
25
|
+
xmlOutputBuffer* buffer)
|
lxml/includes/config.pxd
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
from lxml.includes cimport tree
|
2
|
+
from lxml.includes.tree cimport xmlDoc, xmlDtd
|
3
|
+
|
4
|
+
cdef extern from "libxml/valid.h" nogil:
|
5
|
+
ctypedef void (*xmlValidityErrorFunc)(void * ctx, const char * msg, ...) noexcept
|
6
|
+
ctypedef void (*xmlValidityWarningFunc)(void * ctx, const char * msg, ...) noexcept
|
7
|
+
|
8
|
+
ctypedef struct xmlValidCtxt:
|
9
|
+
void *userData
|
10
|
+
xmlValidityErrorFunc error
|
11
|
+
xmlValidityWarningFunc warning
|
12
|
+
|
13
|
+
cdef xmlValidCtxt* xmlNewValidCtxt()
|
14
|
+
cdef void xmlFreeValidCtxt(xmlValidCtxt* cur)
|
15
|
+
|
16
|
+
cdef int xmlValidateDtd(xmlValidCtxt* ctxt, xmlDoc* doc, xmlDtd* dtd)
|
17
|
+
cdef tree.xmlElement* xmlGetDtdElementDesc(
|
18
|
+
xmlDtd* dtd, tree.const_xmlChar* name)
|