lxml 6.0.0__cp310-cp310-win_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lxml/ElementInclude.py +244 -0
- lxml/__init__.py +22 -0
- lxml/_elementpath.cp310-win_arm64.pyd +0 -0
- lxml/_elementpath.py +343 -0
- lxml/apihelpers.pxi +1801 -0
- lxml/builder.cp310-win_arm64.pyd +0 -0
- lxml/builder.py +243 -0
- lxml/classlookup.pxi +580 -0
- lxml/cleanup.pxi +215 -0
- lxml/cssselect.py +101 -0
- lxml/debug.pxi +36 -0
- lxml/docloader.pxi +178 -0
- lxml/doctestcompare.py +488 -0
- lxml/dtd.pxi +479 -0
- lxml/etree.cp310-win_arm64.pyd +0 -0
- lxml/etree.h +244 -0
- lxml/etree.pyx +3853 -0
- lxml/etree_api.h +204 -0
- lxml/extensions.pxi +830 -0
- lxml/html/ElementSoup.py +10 -0
- lxml/html/__init__.py +1927 -0
- lxml/html/_diffcommand.py +86 -0
- lxml/html/_difflib.cp310-win_arm64.pyd +0 -0
- lxml/html/_difflib.py +2106 -0
- lxml/html/_html5builder.py +100 -0
- lxml/html/_setmixin.py +56 -0
- lxml/html/builder.py +173 -0
- lxml/html/clean.py +21 -0
- lxml/html/defs.py +135 -0
- lxml/html/diff.cp310-win_arm64.pyd +0 -0
- lxml/html/diff.py +972 -0
- lxml/html/formfill.py +299 -0
- lxml/html/html5parser.py +260 -0
- lxml/html/soupparser.py +314 -0
- lxml/html/usedoctest.py +13 -0
- lxml/includes/__init__.pxd +0 -0
- lxml/includes/__init__.py +0 -0
- lxml/includes/c14n.pxd +25 -0
- lxml/includes/config.pxd +3 -0
- lxml/includes/dtdvalid.pxd +18 -0
- lxml/includes/etree_defs.h +379 -0
- lxml/includes/etreepublic.pxd +237 -0
- lxml/includes/extlibs/__init__.py +0 -0
- lxml/includes/extlibs/zconf.h +543 -0
- lxml/includes/extlibs/zlib.h +1938 -0
- lxml/includes/htmlparser.pxd +56 -0
- lxml/includes/libexslt/__init__.py +0 -0
- lxml/includes/libexslt/exslt.h +108 -0
- lxml/includes/libexslt/exsltconfig.h +70 -0
- lxml/includes/libexslt/exsltexports.h +63 -0
- lxml/includes/libexslt/libexslt.h +29 -0
- lxml/includes/libxml/HTMLparser.h +320 -0
- lxml/includes/libxml/HTMLtree.h +147 -0
- lxml/includes/libxml/SAX.h +204 -0
- lxml/includes/libxml/SAX2.h +173 -0
- lxml/includes/libxml/__init__.py +0 -0
- lxml/includes/libxml/c14n.h +128 -0
- lxml/includes/libxml/catalog.h +182 -0
- lxml/includes/libxml/chvalid.h +230 -0
- lxml/includes/libxml/debugXML.h +217 -0
- lxml/includes/libxml/dict.h +81 -0
- lxml/includes/libxml/encoding.h +233 -0
- lxml/includes/libxml/entities.h +151 -0
- lxml/includes/libxml/globals.h +529 -0
- lxml/includes/libxml/hash.h +236 -0
- lxml/includes/libxml/list.h +137 -0
- lxml/includes/libxml/nanoftp.h +186 -0
- lxml/includes/libxml/nanohttp.h +81 -0
- lxml/includes/libxml/parser.h +1265 -0
- lxml/includes/libxml/parserInternals.h +662 -0
- lxml/includes/libxml/pattern.h +100 -0
- lxml/includes/libxml/relaxng.h +218 -0
- lxml/includes/libxml/schemasInternals.h +958 -0
- lxml/includes/libxml/schematron.h +142 -0
- lxml/includes/libxml/threads.h +94 -0
- lxml/includes/libxml/tree.h +1314 -0
- lxml/includes/libxml/uri.h +94 -0
- lxml/includes/libxml/valid.h +448 -0
- lxml/includes/libxml/xinclude.h +129 -0
- lxml/includes/libxml/xlink.h +189 -0
- lxml/includes/libxml/xmlIO.h +369 -0
- lxml/includes/libxml/xmlautomata.h +146 -0
- lxml/includes/libxml/xmlerror.h +919 -0
- lxml/includes/libxml/xmlexports.h +50 -0
- lxml/includes/libxml/xmlmemory.h +228 -0
- lxml/includes/libxml/xmlmodule.h +57 -0
- lxml/includes/libxml/xmlreader.h +428 -0
- lxml/includes/libxml/xmlregexp.h +222 -0
- lxml/includes/libxml/xmlsave.h +88 -0
- lxml/includes/libxml/xmlschemas.h +246 -0
- lxml/includes/libxml/xmlschemastypes.h +152 -0
- lxml/includes/libxml/xmlstring.h +140 -0
- lxml/includes/libxml/xmlunicode.h +202 -0
- lxml/includes/libxml/xmlversion.h +526 -0
- lxml/includes/libxml/xmlwriter.h +488 -0
- lxml/includes/libxml/xpath.h +575 -0
- lxml/includes/libxml/xpathInternals.h +632 -0
- lxml/includes/libxml/xpointer.h +137 -0
- lxml/includes/libxslt/__init__.py +0 -0
- lxml/includes/libxslt/attributes.h +39 -0
- lxml/includes/libxslt/documents.h +93 -0
- lxml/includes/libxslt/extensions.h +262 -0
- lxml/includes/libxslt/extra.h +72 -0
- lxml/includes/libxslt/functions.h +78 -0
- lxml/includes/libxslt/imports.h +75 -0
- lxml/includes/libxslt/keys.h +53 -0
- lxml/includes/libxslt/libxslt.h +36 -0
- lxml/includes/libxslt/namespaces.h +68 -0
- lxml/includes/libxslt/numbersInternals.h +73 -0
- lxml/includes/libxslt/preproc.h +43 -0
- lxml/includes/libxslt/security.h +104 -0
- lxml/includes/libxslt/templates.h +77 -0
- lxml/includes/libxslt/transform.h +207 -0
- lxml/includes/libxslt/trio.h +216 -0
- lxml/includes/libxslt/triodef.h +220 -0
- lxml/includes/libxslt/variables.h +118 -0
- lxml/includes/libxslt/win32config.h +51 -0
- lxml/includes/libxslt/xslt.h +110 -0
- lxml/includes/libxslt/xsltInternals.h +1992 -0
- lxml/includes/libxslt/xsltconfig.h +179 -0
- lxml/includes/libxslt/xsltexports.h +64 -0
- lxml/includes/libxslt/xsltlocale.h +44 -0
- lxml/includes/libxslt/xsltutils.h +343 -0
- lxml/includes/lxml-version.h +3 -0
- lxml/includes/relaxng.pxd +64 -0
- lxml/includes/schematron.pxd +34 -0
- lxml/includes/tree.pxd +492 -0
- lxml/includes/uri.pxd +5 -0
- lxml/includes/xinclude.pxd +22 -0
- lxml/includes/xmlerror.pxd +852 -0
- lxml/includes/xmlparser.pxd +303 -0
- lxml/includes/xmlschema.pxd +35 -0
- lxml/includes/xpath.pxd +136 -0
- lxml/includes/xslt.pxd +190 -0
- lxml/isoschematron/__init__.py +348 -0
- lxml/isoschematron/resources/rng/iso-schematron.rng +709 -0
- lxml/isoschematron/resources/xsl/RNG2Schtrn.xsl +75 -0
- lxml/isoschematron/resources/xsl/XSD2Schtrn.xsl +77 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_abstract_expand.xsl +313 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_dsdl_include.xsl +1160 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_message.xsl +55 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_skeleton_for_xslt1.xsl +1796 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_svrl_for_xslt1.xsl +588 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt +84 -0
- lxml/iterparse.pxi +438 -0
- lxml/lxml.etree.h +244 -0
- lxml/lxml.etree_api.h +204 -0
- lxml/nsclasses.pxi +281 -0
- lxml/objectify.cp310-win_arm64.pyd +0 -0
- lxml/objectify.pyx +2149 -0
- lxml/objectpath.pxi +332 -0
- lxml/parser.pxi +2059 -0
- lxml/parsertarget.pxi +180 -0
- lxml/proxy.pxi +619 -0
- lxml/public-api.pxi +178 -0
- lxml/pyclasslookup.py +3 -0
- lxml/readonlytree.pxi +565 -0
- lxml/relaxng.pxi +165 -0
- lxml/sax.cp310-win_arm64.pyd +0 -0
- lxml/sax.py +286 -0
- lxml/saxparser.pxi +875 -0
- lxml/schematron.pxi +173 -0
- lxml/serializer.pxi +1849 -0
- lxml/usedoctest.py +13 -0
- lxml/xinclude.pxi +67 -0
- lxml/xmlerror.pxi +1654 -0
- lxml/xmlid.pxi +179 -0
- lxml/xmlschema.pxi +215 -0
- lxml/xpath.pxi +487 -0
- lxml/xslt.pxi +957 -0
- lxml/xsltext.pxi +242 -0
- lxml-6.0.0.dist-info/METADATA +163 -0
- lxml-6.0.0.dist-info/RECORD +177 -0
- lxml-6.0.0.dist-info/WHEEL +5 -0
- lxml-6.0.0.dist-info/licenses/LICENSE.txt +31 -0
- lxml-6.0.0.dist-info/licenses/LICENSES.txt +29 -0
- lxml-6.0.0.dist-info/top_level.txt +1 -0
lxml/doctestcompare.py
ADDED
@@ -0,0 +1,488 @@
|
|
1
|
+
"""
|
2
|
+
lxml-based doctest output comparison.
|
3
|
+
|
4
|
+
Note: normally, you should just import the `lxml.usedoctest` and
|
5
|
+
`lxml.html.usedoctest` modules from within a doctest, instead of this
|
6
|
+
one::
|
7
|
+
|
8
|
+
>>> import lxml.usedoctest # for XML output
|
9
|
+
|
10
|
+
>>> import lxml.html.usedoctest # for HTML output
|
11
|
+
|
12
|
+
To use this module directly, you must call ``lxmldoctest.install()``,
|
13
|
+
which will cause doctest to use this in all subsequent calls.
|
14
|
+
|
15
|
+
This changes the way output is checked and comparisons are made for
|
16
|
+
XML or HTML-like content.
|
17
|
+
|
18
|
+
XML or HTML content is noticed because the example starts with ``<``
|
19
|
+
(it's HTML if it starts with ``<html``). You can also use the
|
20
|
+
``PARSE_HTML`` and ``PARSE_XML`` flags to force parsing.
|
21
|
+
|
22
|
+
Some rough wildcard-like things are allowed. Whitespace is generally
|
23
|
+
ignored (except in attributes). In text (attributes and text in the
|
24
|
+
body) you can use ``...`` as a wildcard. In an example it also
|
25
|
+
matches any trailing tags in the element, though it does not match
|
26
|
+
leading tags. You may create a tag ``<any>`` or include an ``any``
|
27
|
+
attribute in the tag. An ``any`` tag matches any tag, while the
|
28
|
+
attribute matches any and all attributes.
|
29
|
+
|
30
|
+
When a match fails, the reformatted example and gotten text is
|
31
|
+
displayed (indented), and a rough diff-like output is given. Anything
|
32
|
+
marked with ``+`` is in the output but wasn't supposed to be, and
|
33
|
+
similarly ``-`` means its in the example but wasn't in the output.
|
34
|
+
|
35
|
+
You can disable parsing on one line with ``# doctest:+NOPARSE_MARKUP``
|
36
|
+
"""
|
37
|
+
|
38
|
+
from lxml import etree
|
39
|
+
import sys
|
40
|
+
import re
|
41
|
+
import doctest
|
42
|
+
try:
|
43
|
+
from html import escape as html_escape
|
44
|
+
except ImportError:
|
45
|
+
from cgi import escape as html_escape
|
46
|
+
|
47
|
+
__all__ = ['PARSE_HTML', 'PARSE_XML', 'NOPARSE_MARKUP', 'LXMLOutputChecker',
|
48
|
+
'LHTMLOutputChecker', 'install', 'temp_install']
|
49
|
+
|
50
|
+
PARSE_HTML = doctest.register_optionflag('PARSE_HTML')
|
51
|
+
PARSE_XML = doctest.register_optionflag('PARSE_XML')
|
52
|
+
NOPARSE_MARKUP = doctest.register_optionflag('NOPARSE_MARKUP')
|
53
|
+
|
54
|
+
OutputChecker = doctest.OutputChecker
|
55
|
+
|
56
|
+
def strip(v):
|
57
|
+
if v is None:
|
58
|
+
return None
|
59
|
+
else:
|
60
|
+
return v.strip()
|
61
|
+
|
62
|
+
def norm_whitespace(v):
|
63
|
+
return _norm_whitespace_re.sub(' ', v)
|
64
|
+
|
65
|
+
_html_parser = etree.HTMLParser(recover=False, remove_blank_text=True)
|
66
|
+
|
67
|
+
def html_fromstring(html):
|
68
|
+
return etree.fromstring(html, _html_parser)
|
69
|
+
|
70
|
+
# We use this to distinguish repr()s from elements:
|
71
|
+
_repr_re = re.compile(r'^<[^>]+ (at|object) ')
|
72
|
+
_norm_whitespace_re = re.compile(r'[ \t\n][ \t\n]+')
|
73
|
+
|
74
|
+
class LXMLOutputChecker(OutputChecker):
|
75
|
+
|
76
|
+
empty_tags = (
|
77
|
+
'param', 'img', 'area', 'br', 'basefont', 'input',
|
78
|
+
'base', 'meta', 'link', 'col')
|
79
|
+
|
80
|
+
def get_default_parser(self):
|
81
|
+
return etree.XML
|
82
|
+
|
83
|
+
def check_output(self, want, got, optionflags):
|
84
|
+
alt_self = getattr(self, '_temp_override_self', None)
|
85
|
+
if alt_self is not None:
|
86
|
+
super_method = self._temp_call_super_check_output
|
87
|
+
self = alt_self
|
88
|
+
else:
|
89
|
+
super_method = OutputChecker.check_output
|
90
|
+
parser = self.get_parser(want, got, optionflags)
|
91
|
+
if not parser:
|
92
|
+
return super_method(
|
93
|
+
self, want, got, optionflags)
|
94
|
+
try:
|
95
|
+
want_doc = parser(want)
|
96
|
+
except etree.XMLSyntaxError:
|
97
|
+
return False
|
98
|
+
try:
|
99
|
+
got_doc = parser(got)
|
100
|
+
except etree.XMLSyntaxError:
|
101
|
+
return False
|
102
|
+
return self.compare_docs(want_doc, got_doc)
|
103
|
+
|
104
|
+
def get_parser(self, want, got, optionflags):
|
105
|
+
parser = None
|
106
|
+
if NOPARSE_MARKUP & optionflags:
|
107
|
+
return None
|
108
|
+
if PARSE_HTML & optionflags:
|
109
|
+
parser = html_fromstring
|
110
|
+
elif PARSE_XML & optionflags:
|
111
|
+
parser = etree.XML
|
112
|
+
elif (want.strip().lower().startswith('<html')
|
113
|
+
and got.strip().startswith('<html')):
|
114
|
+
parser = html_fromstring
|
115
|
+
elif (self._looks_like_markup(want)
|
116
|
+
and self._looks_like_markup(got)):
|
117
|
+
parser = self.get_default_parser()
|
118
|
+
return parser
|
119
|
+
|
120
|
+
def _looks_like_markup(self, s):
|
121
|
+
s = s.strip()
|
122
|
+
return (s.startswith('<')
|
123
|
+
and not _repr_re.search(s))
|
124
|
+
|
125
|
+
def compare_docs(self, want, got):
|
126
|
+
if not self.tag_compare(want.tag, got.tag):
|
127
|
+
return False
|
128
|
+
if not self.text_compare(want.text, got.text, True):
|
129
|
+
return False
|
130
|
+
if not self.text_compare(want.tail, got.tail, True):
|
131
|
+
return False
|
132
|
+
if 'any' not in want.attrib:
|
133
|
+
want_keys = sorted(want.attrib.keys())
|
134
|
+
got_keys = sorted(got.attrib.keys())
|
135
|
+
if want_keys != got_keys:
|
136
|
+
return False
|
137
|
+
for key in want_keys:
|
138
|
+
if not self.text_compare(want.attrib[key], got.attrib[key], False):
|
139
|
+
return False
|
140
|
+
if want.text != '...' or len(want):
|
141
|
+
want_children = list(want)
|
142
|
+
got_children = list(got)
|
143
|
+
while want_children or got_children:
|
144
|
+
if not want_children or not got_children:
|
145
|
+
return False
|
146
|
+
want_first = want_children.pop(0)
|
147
|
+
got_first = got_children.pop(0)
|
148
|
+
if not self.compare_docs(want_first, got_first):
|
149
|
+
return False
|
150
|
+
if not got_children and want_first.tail == '...':
|
151
|
+
break
|
152
|
+
return True
|
153
|
+
|
154
|
+
def text_compare(self, want, got, strip):
|
155
|
+
want = want or ''
|
156
|
+
got = got or ''
|
157
|
+
if strip:
|
158
|
+
want = norm_whitespace(want).strip()
|
159
|
+
got = norm_whitespace(got).strip()
|
160
|
+
want = '^%s$' % re.escape(want)
|
161
|
+
want = want.replace(r'\.\.\.', '.*')
|
162
|
+
if re.search(want, got):
|
163
|
+
return True
|
164
|
+
else:
|
165
|
+
return False
|
166
|
+
|
167
|
+
def tag_compare(self, want, got):
|
168
|
+
if want == 'any':
|
169
|
+
return True
|
170
|
+
if (not isinstance(want, (str, bytes))
|
171
|
+
or not isinstance(got, (str, bytes))):
|
172
|
+
return want == got
|
173
|
+
want = want or ''
|
174
|
+
got = got or ''
|
175
|
+
if want.startswith('{...}'):
|
176
|
+
# Ellipsis on the namespace
|
177
|
+
return want.split('}')[-1] == got.split('}')[-1]
|
178
|
+
else:
|
179
|
+
return want == got
|
180
|
+
|
181
|
+
def output_difference(self, example, got, optionflags):
|
182
|
+
want = example.want
|
183
|
+
parser = self.get_parser(want, got, optionflags)
|
184
|
+
errors = []
|
185
|
+
if parser is not None:
|
186
|
+
try:
|
187
|
+
want_doc = parser(want)
|
188
|
+
except etree.XMLSyntaxError:
|
189
|
+
e = sys.exc_info()[1]
|
190
|
+
errors.append('In example: %s' % e)
|
191
|
+
try:
|
192
|
+
got_doc = parser(got)
|
193
|
+
except etree.XMLSyntaxError:
|
194
|
+
e = sys.exc_info()[1]
|
195
|
+
errors.append('In actual output: %s' % e)
|
196
|
+
if parser is None or errors:
|
197
|
+
value = OutputChecker.output_difference(
|
198
|
+
self, example, got, optionflags)
|
199
|
+
if errors:
|
200
|
+
errors.append(value)
|
201
|
+
return '\n'.join(errors)
|
202
|
+
else:
|
203
|
+
return value
|
204
|
+
html = parser is html_fromstring
|
205
|
+
diff_parts = ['Expected:',
|
206
|
+
self.format_doc(want_doc, html, 2),
|
207
|
+
'Got:',
|
208
|
+
self.format_doc(got_doc, html, 2),
|
209
|
+
'Diff:',
|
210
|
+
self.collect_diff(want_doc, got_doc, html, 2)]
|
211
|
+
return '\n'.join(diff_parts)
|
212
|
+
|
213
|
+
def html_empty_tag(self, el, html=True):
|
214
|
+
if not html:
|
215
|
+
return False
|
216
|
+
if el.tag not in self.empty_tags:
|
217
|
+
return False
|
218
|
+
if el.text or len(el):
|
219
|
+
# This shouldn't happen (contents in an empty tag)
|
220
|
+
return False
|
221
|
+
return True
|
222
|
+
|
223
|
+
def format_doc(self, doc, html, indent, prefix=''):
|
224
|
+
parts = []
|
225
|
+
if not len(doc):
|
226
|
+
# No children...
|
227
|
+
parts.append(' '*indent)
|
228
|
+
parts.append(prefix)
|
229
|
+
parts.append(self.format_tag(doc))
|
230
|
+
if not self.html_empty_tag(doc, html):
|
231
|
+
if strip(doc.text):
|
232
|
+
parts.append(self.format_text(doc.text))
|
233
|
+
parts.append(self.format_end_tag(doc))
|
234
|
+
if strip(doc.tail):
|
235
|
+
parts.append(self.format_text(doc.tail))
|
236
|
+
parts.append('\n')
|
237
|
+
return ''.join(parts)
|
238
|
+
parts.append(' '*indent)
|
239
|
+
parts.append(prefix)
|
240
|
+
parts.append(self.format_tag(doc))
|
241
|
+
if not self.html_empty_tag(doc, html):
|
242
|
+
parts.append('\n')
|
243
|
+
if strip(doc.text):
|
244
|
+
parts.append(' '*indent)
|
245
|
+
parts.append(self.format_text(doc.text))
|
246
|
+
parts.append('\n')
|
247
|
+
for el in doc:
|
248
|
+
parts.append(self.format_doc(el, html, indent+2))
|
249
|
+
parts.append(' '*indent)
|
250
|
+
parts.append(self.format_end_tag(doc))
|
251
|
+
parts.append('\n')
|
252
|
+
if strip(doc.tail):
|
253
|
+
parts.append(' '*indent)
|
254
|
+
parts.append(self.format_text(doc.tail))
|
255
|
+
parts.append('\n')
|
256
|
+
return ''.join(parts)
|
257
|
+
|
258
|
+
def format_text(self, text, strip=True):
|
259
|
+
if text is None:
|
260
|
+
return ''
|
261
|
+
if strip:
|
262
|
+
text = text.strip()
|
263
|
+
return html_escape(text, 1)
|
264
|
+
|
265
|
+
def format_tag(self, el):
|
266
|
+
attrs = []
|
267
|
+
if isinstance(el, etree.CommentBase):
|
268
|
+
# FIXME: probably PIs should be handled specially too?
|
269
|
+
return '<!--'
|
270
|
+
for name, value in sorted(el.attrib.items()):
|
271
|
+
attrs.append('%s="%s"' % (name, self.format_text(value, False)))
|
272
|
+
if not attrs:
|
273
|
+
return '<%s>' % el.tag
|
274
|
+
return '<%s %s>' % (el.tag, ' '.join(attrs))
|
275
|
+
|
276
|
+
def format_end_tag(self, el):
|
277
|
+
if isinstance(el, etree.CommentBase):
|
278
|
+
# FIXME: probably PIs should be handled specially too?
|
279
|
+
return '-->'
|
280
|
+
return '</%s>' % el.tag
|
281
|
+
|
282
|
+
def collect_diff(self, want, got, html, indent):
|
283
|
+
parts = []
|
284
|
+
if not len(want) and not len(got):
|
285
|
+
parts.append(' '*indent)
|
286
|
+
parts.append(self.collect_diff_tag(want, got))
|
287
|
+
if not self.html_empty_tag(got, html):
|
288
|
+
parts.append(self.collect_diff_text(want.text, got.text))
|
289
|
+
parts.append(self.collect_diff_end_tag(want, got))
|
290
|
+
parts.append(self.collect_diff_text(want.tail, got.tail))
|
291
|
+
parts.append('\n')
|
292
|
+
return ''.join(parts)
|
293
|
+
parts.append(' '*indent)
|
294
|
+
parts.append(self.collect_diff_tag(want, got))
|
295
|
+
parts.append('\n')
|
296
|
+
if strip(want.text) or strip(got.text):
|
297
|
+
parts.append(' '*indent)
|
298
|
+
parts.append(self.collect_diff_text(want.text, got.text))
|
299
|
+
parts.append('\n')
|
300
|
+
want_children = list(want)
|
301
|
+
got_children = list(got)
|
302
|
+
while want_children or got_children:
|
303
|
+
if not want_children:
|
304
|
+
parts.append(self.format_doc(got_children.pop(0), html, indent+2, '+'))
|
305
|
+
continue
|
306
|
+
if not got_children:
|
307
|
+
parts.append(self.format_doc(want_children.pop(0), html, indent+2, '-'))
|
308
|
+
continue
|
309
|
+
parts.append(self.collect_diff(
|
310
|
+
want_children.pop(0), got_children.pop(0), html, indent+2))
|
311
|
+
parts.append(' '*indent)
|
312
|
+
parts.append(self.collect_diff_end_tag(want, got))
|
313
|
+
parts.append('\n')
|
314
|
+
if strip(want.tail) or strip(got.tail):
|
315
|
+
parts.append(' '*indent)
|
316
|
+
parts.append(self.collect_diff_text(want.tail, got.tail))
|
317
|
+
parts.append('\n')
|
318
|
+
return ''.join(parts)
|
319
|
+
|
320
|
+
def collect_diff_tag(self, want, got):
|
321
|
+
if not self.tag_compare(want.tag, got.tag):
|
322
|
+
tag = '%s (got: %s)' % (want.tag, got.tag)
|
323
|
+
else:
|
324
|
+
tag = got.tag
|
325
|
+
attrs = []
|
326
|
+
any = want.tag == 'any' or 'any' in want.attrib
|
327
|
+
for name, value in sorted(got.attrib.items()):
|
328
|
+
if name not in want.attrib and not any:
|
329
|
+
attrs.append('+%s="%s"' % (name, self.format_text(value, False)))
|
330
|
+
else:
|
331
|
+
if name in want.attrib:
|
332
|
+
text = self.collect_diff_text(want.attrib[name], value, False)
|
333
|
+
else:
|
334
|
+
text = self.format_text(value, False)
|
335
|
+
attrs.append('%s="%s"' % (name, text))
|
336
|
+
if not any:
|
337
|
+
for name, value in sorted(want.attrib.items()):
|
338
|
+
if name in got.attrib:
|
339
|
+
continue
|
340
|
+
attrs.append('-%s="%s"' % (name, self.format_text(value, False)))
|
341
|
+
if attrs:
|
342
|
+
tag = '<%s %s>' % (tag, ' '.join(attrs))
|
343
|
+
else:
|
344
|
+
tag = '<%s>' % tag
|
345
|
+
return tag
|
346
|
+
|
347
|
+
def collect_diff_end_tag(self, want, got):
|
348
|
+
if want.tag != got.tag:
|
349
|
+
tag = '%s (got: %s)' % (want.tag, got.tag)
|
350
|
+
else:
|
351
|
+
tag = got.tag
|
352
|
+
return '</%s>' % tag
|
353
|
+
|
354
|
+
def collect_diff_text(self, want, got, strip=True):
|
355
|
+
if self.text_compare(want, got, strip):
|
356
|
+
if not got:
|
357
|
+
return ''
|
358
|
+
return self.format_text(got, strip)
|
359
|
+
text = '%s (got: %s)' % (want, got)
|
360
|
+
return self.format_text(text, strip)
|
361
|
+
|
362
|
+
class LHTMLOutputChecker(LXMLOutputChecker):
|
363
|
+
def get_default_parser(self):
|
364
|
+
return html_fromstring
|
365
|
+
|
366
|
+
def install(html=False):
|
367
|
+
"""
|
368
|
+
Install doctestcompare for all future doctests.
|
369
|
+
|
370
|
+
If html is true, then by default the HTML parser will be used;
|
371
|
+
otherwise the XML parser is used.
|
372
|
+
"""
|
373
|
+
if html:
|
374
|
+
doctest.OutputChecker = LHTMLOutputChecker
|
375
|
+
else:
|
376
|
+
doctest.OutputChecker = LXMLOutputChecker
|
377
|
+
|
378
|
+
def temp_install(html=False, del_module=None):
|
379
|
+
"""
|
380
|
+
Use this *inside* a doctest to enable this checker for this
|
381
|
+
doctest only.
|
382
|
+
|
383
|
+
If html is true, then by default the HTML parser will be used;
|
384
|
+
otherwise the XML parser is used.
|
385
|
+
"""
|
386
|
+
if html:
|
387
|
+
Checker = LHTMLOutputChecker
|
388
|
+
else:
|
389
|
+
Checker = LXMLOutputChecker
|
390
|
+
frame = _find_doctest_frame()
|
391
|
+
dt_self = frame.f_locals['self']
|
392
|
+
checker = Checker()
|
393
|
+
old_checker = dt_self._checker
|
394
|
+
dt_self._checker = checker
|
395
|
+
# The unfortunate thing is that there is a local variable 'check'
|
396
|
+
# in the function that runs the doctests, that is a bound method
|
397
|
+
# into the output checker. We have to update that. We can't
|
398
|
+
# modify the frame, so we have to modify the object in place. The
|
399
|
+
# only way to do this is to actually change the func_code
|
400
|
+
# attribute of the method. We change it, and then wait for
|
401
|
+
# __record_outcome to be run, which signals the end of the __run
|
402
|
+
# method, at which point we restore the previous check_output
|
403
|
+
# implementation.
|
404
|
+
check_func = frame.f_locals['check'].__func__
|
405
|
+
checker_check_func = checker.check_output.__func__
|
406
|
+
# Because we can't patch up func_globals, this is the only global
|
407
|
+
# in check_output that we care about:
|
408
|
+
doctest.etree = etree
|
409
|
+
_RestoreChecker(dt_self, old_checker, checker,
|
410
|
+
check_func, checker_check_func,
|
411
|
+
del_module)
|
412
|
+
|
413
|
+
class _RestoreChecker:
|
414
|
+
def __init__(self, dt_self, old_checker, new_checker, check_func, clone_func,
|
415
|
+
del_module):
|
416
|
+
self.dt_self = dt_self
|
417
|
+
self.checker = old_checker
|
418
|
+
self.checker._temp_call_super_check_output = self.call_super
|
419
|
+
self.checker._temp_override_self = new_checker
|
420
|
+
self.check_func = check_func
|
421
|
+
self.clone_func = clone_func
|
422
|
+
self.del_module = del_module
|
423
|
+
self.install_clone()
|
424
|
+
self.install_dt_self()
|
425
|
+
def install_clone(self):
|
426
|
+
self.func_code = self.check_func.__code__
|
427
|
+
self.func_globals = self.check_func.__globals__
|
428
|
+
self.check_func.__code__ = self.clone_func.__code__
|
429
|
+
def uninstall_clone(self):
|
430
|
+
self.check_func.__code__ = self.func_code
|
431
|
+
def install_dt_self(self):
|
432
|
+
self.prev_func = self.dt_self._DocTestRunner__record_outcome
|
433
|
+
self.dt_self._DocTestRunner__record_outcome = self
|
434
|
+
def uninstall_dt_self(self):
|
435
|
+
self.dt_self._DocTestRunner__record_outcome = self.prev_func
|
436
|
+
def uninstall_module(self):
|
437
|
+
if self.del_module:
|
438
|
+
import sys
|
439
|
+
del sys.modules[self.del_module]
|
440
|
+
if '.' in self.del_module:
|
441
|
+
package, module = self.del_module.rsplit('.', 1)
|
442
|
+
package_mod = sys.modules[package]
|
443
|
+
delattr(package_mod, module)
|
444
|
+
def __call__(self, *args, **kw):
|
445
|
+
self.uninstall_clone()
|
446
|
+
self.uninstall_dt_self()
|
447
|
+
del self.checker._temp_override_self
|
448
|
+
del self.checker._temp_call_super_check_output
|
449
|
+
result = self.prev_func(*args, **kw)
|
450
|
+
self.uninstall_module()
|
451
|
+
return result
|
452
|
+
def call_super(self, *args, **kw):
|
453
|
+
self.uninstall_clone()
|
454
|
+
try:
|
455
|
+
return self.check_func(*args, **kw)
|
456
|
+
finally:
|
457
|
+
self.install_clone()
|
458
|
+
|
459
|
+
def _find_doctest_frame():
|
460
|
+
import sys
|
461
|
+
frame = sys._getframe(1)
|
462
|
+
while frame:
|
463
|
+
l = frame.f_locals
|
464
|
+
if 'BOOM' in l:
|
465
|
+
# Sign of doctest
|
466
|
+
return frame
|
467
|
+
frame = frame.f_back
|
468
|
+
raise LookupError(
|
469
|
+
"Could not find doctest (only use this function *inside* a doctest)")
|
470
|
+
|
471
|
+
__test__ = {
|
472
|
+
'basic': '''
|
473
|
+
>>> temp_install()
|
474
|
+
>>> print """<xml a="1" b="2">stuff</xml>"""
|
475
|
+
<xml b="2" a="1">...</xml>
|
476
|
+
>>> print """<xml xmlns="http://example.com"><tag attr="bar" /></xml>"""
|
477
|
+
<xml xmlns="...">
|
478
|
+
<tag attr="..." />
|
479
|
+
</xml>
|
480
|
+
>>> print """<xml>blahblahblah<foo /></xml>""" # doctest: +NOPARSE_MARKUP, +ELLIPSIS
|
481
|
+
<xml>...foo /></xml>
|
482
|
+
'''}
|
483
|
+
|
484
|
+
if __name__ == '__main__':
|
485
|
+
import doctest
|
486
|
+
doctest.testmod()
|
487
|
+
|
488
|
+
|