lxml 6.0.0__cp310-cp310-manylinux_2_31_armv7l.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. lxml/ElementInclude.py +244 -0
  2. lxml/__init__.py +22 -0
  3. lxml/_elementpath.cpython-310-arm-linux-gnueabihf.so +0 -0
  4. lxml/_elementpath.py +343 -0
  5. lxml/apihelpers.pxi +1801 -0
  6. lxml/builder.cpython-310-arm-linux-gnueabihf.so +0 -0
  7. lxml/builder.py +243 -0
  8. lxml/classlookup.pxi +580 -0
  9. lxml/cleanup.pxi +215 -0
  10. lxml/cssselect.py +101 -0
  11. lxml/debug.pxi +36 -0
  12. lxml/docloader.pxi +178 -0
  13. lxml/doctestcompare.py +488 -0
  14. lxml/dtd.pxi +479 -0
  15. lxml/etree.cpython-310-arm-linux-gnueabihf.so +0 -0
  16. lxml/etree.h +244 -0
  17. lxml/etree.pyx +3853 -0
  18. lxml/etree_api.h +204 -0
  19. lxml/extensions.pxi +830 -0
  20. lxml/html/ElementSoup.py +10 -0
  21. lxml/html/__init__.py +1927 -0
  22. lxml/html/_diffcommand.py +86 -0
  23. lxml/html/_difflib.cpython-310-arm-linux-gnueabihf.so +0 -0
  24. lxml/html/_difflib.py +2106 -0
  25. lxml/html/_html5builder.py +100 -0
  26. lxml/html/_setmixin.py +56 -0
  27. lxml/html/builder.py +173 -0
  28. lxml/html/clean.py +21 -0
  29. lxml/html/defs.py +135 -0
  30. lxml/html/diff.cpython-310-arm-linux-gnueabihf.so +0 -0
  31. lxml/html/diff.py +972 -0
  32. lxml/html/formfill.py +299 -0
  33. lxml/html/html5parser.py +260 -0
  34. lxml/html/soupparser.py +314 -0
  35. lxml/html/usedoctest.py +13 -0
  36. lxml/includes/__init__.pxd +0 -0
  37. lxml/includes/__init__.py +0 -0
  38. lxml/includes/c14n.pxd +25 -0
  39. lxml/includes/config.pxd +3 -0
  40. lxml/includes/dtdvalid.pxd +18 -0
  41. lxml/includes/etree_defs.h +379 -0
  42. lxml/includes/etreepublic.pxd +237 -0
  43. lxml/includes/extlibs/__init__.py +0 -0
  44. lxml/includes/extlibs/libcharset.h +45 -0
  45. lxml/includes/extlibs/localcharset.h +137 -0
  46. lxml/includes/extlibs/zconf.h +543 -0
  47. lxml/includes/extlibs/zlib.h +1938 -0
  48. lxml/includes/htmlparser.pxd +56 -0
  49. lxml/includes/libexslt/__init__.py +0 -0
  50. lxml/includes/libexslt/exslt.h +108 -0
  51. lxml/includes/libexslt/exsltconfig.h +70 -0
  52. lxml/includes/libexslt/exsltexports.h +63 -0
  53. lxml/includes/libxml/HTMLparser.h +339 -0
  54. lxml/includes/libxml/HTMLtree.h +148 -0
  55. lxml/includes/libxml/SAX.h +18 -0
  56. lxml/includes/libxml/SAX2.h +170 -0
  57. lxml/includes/libxml/__init__.py +0 -0
  58. lxml/includes/libxml/c14n.h +115 -0
  59. lxml/includes/libxml/catalog.h +183 -0
  60. lxml/includes/libxml/chvalid.h +230 -0
  61. lxml/includes/libxml/debugXML.h +79 -0
  62. lxml/includes/libxml/dict.h +82 -0
  63. lxml/includes/libxml/encoding.h +307 -0
  64. lxml/includes/libxml/entities.h +147 -0
  65. lxml/includes/libxml/globals.h +25 -0
  66. lxml/includes/libxml/hash.h +251 -0
  67. lxml/includes/libxml/list.h +137 -0
  68. lxml/includes/libxml/nanoftp.h +16 -0
  69. lxml/includes/libxml/nanohttp.h +98 -0
  70. lxml/includes/libxml/parser.h +1633 -0
  71. lxml/includes/libxml/parserInternals.h +591 -0
  72. lxml/includes/libxml/relaxng.h +224 -0
  73. lxml/includes/libxml/schemasInternals.h +959 -0
  74. lxml/includes/libxml/schematron.h +143 -0
  75. lxml/includes/libxml/threads.h +81 -0
  76. lxml/includes/libxml/tree.h +1326 -0
  77. lxml/includes/libxml/uri.h +106 -0
  78. lxml/includes/libxml/valid.h +485 -0
  79. lxml/includes/libxml/xinclude.h +141 -0
  80. lxml/includes/libxml/xlink.h +193 -0
  81. lxml/includes/libxml/xmlIO.h +419 -0
  82. lxml/includes/libxml/xmlautomata.h +163 -0
  83. lxml/includes/libxml/xmlerror.h +962 -0
  84. lxml/includes/libxml/xmlexports.h +96 -0
  85. lxml/includes/libxml/xmlmemory.h +188 -0
  86. lxml/includes/libxml/xmlmodule.h +61 -0
  87. lxml/includes/libxml/xmlreader.h +444 -0
  88. lxml/includes/libxml/xmlregexp.h +116 -0
  89. lxml/includes/libxml/xmlsave.h +111 -0
  90. lxml/includes/libxml/xmlschemas.h +254 -0
  91. lxml/includes/libxml/xmlschemastypes.h +152 -0
  92. lxml/includes/libxml/xmlstring.h +140 -0
  93. lxml/includes/libxml/xmlunicode.h +15 -0
  94. lxml/includes/libxml/xmlversion.h +332 -0
  95. lxml/includes/libxml/xmlwriter.h +489 -0
  96. lxml/includes/libxml/xpath.h +569 -0
  97. lxml/includes/libxml/xpathInternals.h +639 -0
  98. lxml/includes/libxml/xpointer.h +48 -0
  99. lxml/includes/libxslt/__init__.py +0 -0
  100. lxml/includes/libxslt/attributes.h +39 -0
  101. lxml/includes/libxslt/documents.h +93 -0
  102. lxml/includes/libxslt/extensions.h +262 -0
  103. lxml/includes/libxslt/extra.h +72 -0
  104. lxml/includes/libxslt/functions.h +78 -0
  105. lxml/includes/libxslt/imports.h +75 -0
  106. lxml/includes/libxslt/keys.h +53 -0
  107. lxml/includes/libxslt/namespaces.h +68 -0
  108. lxml/includes/libxslt/numbersInternals.h +73 -0
  109. lxml/includes/libxslt/pattern.h +84 -0
  110. lxml/includes/libxslt/preproc.h +43 -0
  111. lxml/includes/libxslt/security.h +104 -0
  112. lxml/includes/libxslt/templates.h +77 -0
  113. lxml/includes/libxslt/transform.h +207 -0
  114. lxml/includes/libxslt/variables.h +118 -0
  115. lxml/includes/libxslt/xslt.h +110 -0
  116. lxml/includes/libxslt/xsltInternals.h +1995 -0
  117. lxml/includes/libxslt/xsltconfig.h +146 -0
  118. lxml/includes/libxslt/xsltexports.h +64 -0
  119. lxml/includes/libxslt/xsltlocale.h +44 -0
  120. lxml/includes/libxslt/xsltutils.h +343 -0
  121. lxml/includes/lxml-version.h +3 -0
  122. lxml/includes/relaxng.pxd +64 -0
  123. lxml/includes/schematron.pxd +34 -0
  124. lxml/includes/tree.pxd +492 -0
  125. lxml/includes/uri.pxd +5 -0
  126. lxml/includes/xinclude.pxd +22 -0
  127. lxml/includes/xmlerror.pxd +852 -0
  128. lxml/includes/xmlparser.pxd +303 -0
  129. lxml/includes/xmlschema.pxd +35 -0
  130. lxml/includes/xpath.pxd +136 -0
  131. lxml/includes/xslt.pxd +190 -0
  132. lxml/isoschematron/__init__.py +348 -0
  133. lxml/isoschematron/resources/rng/iso-schematron.rng +709 -0
  134. lxml/isoschematron/resources/xsl/RNG2Schtrn.xsl +75 -0
  135. lxml/isoschematron/resources/xsl/XSD2Schtrn.xsl +77 -0
  136. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_abstract_expand.xsl +313 -0
  137. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_dsdl_include.xsl +1160 -0
  138. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_message.xsl +55 -0
  139. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_skeleton_for_xslt1.xsl +1796 -0
  140. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_svrl_for_xslt1.xsl +588 -0
  141. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt +84 -0
  142. lxml/iterparse.pxi +438 -0
  143. lxml/lxml.etree.h +244 -0
  144. lxml/lxml.etree_api.h +204 -0
  145. lxml/nsclasses.pxi +281 -0
  146. lxml/objectify.cpython-310-arm-linux-gnueabihf.so +0 -0
  147. lxml/objectify.pyx +2149 -0
  148. lxml/objectpath.pxi +332 -0
  149. lxml/parser.pxi +2059 -0
  150. lxml/parsertarget.pxi +180 -0
  151. lxml/proxy.pxi +619 -0
  152. lxml/public-api.pxi +178 -0
  153. lxml/pyclasslookup.py +3 -0
  154. lxml/readonlytree.pxi +565 -0
  155. lxml/relaxng.pxi +165 -0
  156. lxml/sax.cpython-310-arm-linux-gnueabihf.so +0 -0
  157. lxml/sax.py +286 -0
  158. lxml/saxparser.pxi +875 -0
  159. lxml/schematron.pxi +173 -0
  160. lxml/serializer.pxi +1849 -0
  161. lxml/usedoctest.py +13 -0
  162. lxml/xinclude.pxi +67 -0
  163. lxml/xmlerror.pxi +1654 -0
  164. lxml/xmlid.pxi +179 -0
  165. lxml/xmlschema.pxi +215 -0
  166. lxml/xpath.pxi +487 -0
  167. lxml/xslt.pxi +957 -0
  168. lxml/xsltext.pxi +242 -0
  169. lxml-6.0.0.dist-info/METADATA +163 -0
  170. lxml-6.0.0.dist-info/RECORD +174 -0
  171. lxml-6.0.0.dist-info/WHEEL +5 -0
  172. lxml-6.0.0.dist-info/licenses/LICENSE.txt +31 -0
  173. lxml-6.0.0.dist-info/licenses/LICENSES.txt +29 -0
  174. lxml-6.0.0.dist-info/top_level.txt +1 -0
lxml/doctestcompare.py ADDED
@@ -0,0 +1,488 @@
1
+ """
2
+ lxml-based doctest output comparison.
3
+
4
+ Note: normally, you should just import the `lxml.usedoctest` and
5
+ `lxml.html.usedoctest` modules from within a doctest, instead of this
6
+ one::
7
+
8
+ >>> import lxml.usedoctest # for XML output
9
+
10
+ >>> import lxml.html.usedoctest # for HTML output
11
+
12
+ To use this module directly, you must call ``lxmldoctest.install()``,
13
+ which will cause doctest to use this in all subsequent calls.
14
+
15
+ This changes the way output is checked and comparisons are made for
16
+ XML or HTML-like content.
17
+
18
+ XML or HTML content is noticed because the example starts with ``<``
19
+ (it's HTML if it starts with ``<html``). You can also use the
20
+ ``PARSE_HTML`` and ``PARSE_XML`` flags to force parsing.
21
+
22
+ Some rough wildcard-like things are allowed. Whitespace is generally
23
+ ignored (except in attributes). In text (attributes and text in the
24
+ body) you can use ``...`` as a wildcard. In an example it also
25
+ matches any trailing tags in the element, though it does not match
26
+ leading tags. You may create a tag ``<any>`` or include an ``any``
27
+ attribute in the tag. An ``any`` tag matches any tag, while the
28
+ attribute matches any and all attributes.
29
+
30
+ When a match fails, the reformatted example and gotten text is
31
+ displayed (indented), and a rough diff-like output is given. Anything
32
+ marked with ``+`` is in the output but wasn't supposed to be, and
33
+ similarly ``-`` means its in the example but wasn't in the output.
34
+
35
+ You can disable parsing on one line with ``# doctest:+NOPARSE_MARKUP``
36
+ """
37
+
38
+ from lxml import etree
39
+ import sys
40
+ import re
41
+ import doctest
42
+ try:
43
+ from html import escape as html_escape
44
+ except ImportError:
45
+ from cgi import escape as html_escape
46
+
47
+ __all__ = ['PARSE_HTML', 'PARSE_XML', 'NOPARSE_MARKUP', 'LXMLOutputChecker',
48
+ 'LHTMLOutputChecker', 'install', 'temp_install']
49
+
50
+ PARSE_HTML = doctest.register_optionflag('PARSE_HTML')
51
+ PARSE_XML = doctest.register_optionflag('PARSE_XML')
52
+ NOPARSE_MARKUP = doctest.register_optionflag('NOPARSE_MARKUP')
53
+
54
+ OutputChecker = doctest.OutputChecker
55
+
56
+ def strip(v):
57
+ if v is None:
58
+ return None
59
+ else:
60
+ return v.strip()
61
+
62
+ def norm_whitespace(v):
63
+ return _norm_whitespace_re.sub(' ', v)
64
+
65
+ _html_parser = etree.HTMLParser(recover=False, remove_blank_text=True)
66
+
67
+ def html_fromstring(html):
68
+ return etree.fromstring(html, _html_parser)
69
+
70
+ # We use this to distinguish repr()s from elements:
71
+ _repr_re = re.compile(r'^<[^>]+ (at|object) ')
72
+ _norm_whitespace_re = re.compile(r'[ \t\n][ \t\n]+')
73
+
74
+ class LXMLOutputChecker(OutputChecker):
75
+
76
+ empty_tags = (
77
+ 'param', 'img', 'area', 'br', 'basefont', 'input',
78
+ 'base', 'meta', 'link', 'col')
79
+
80
+ def get_default_parser(self):
81
+ return etree.XML
82
+
83
+ def check_output(self, want, got, optionflags):
84
+ alt_self = getattr(self, '_temp_override_self', None)
85
+ if alt_self is not None:
86
+ super_method = self._temp_call_super_check_output
87
+ self = alt_self
88
+ else:
89
+ super_method = OutputChecker.check_output
90
+ parser = self.get_parser(want, got, optionflags)
91
+ if not parser:
92
+ return super_method(
93
+ self, want, got, optionflags)
94
+ try:
95
+ want_doc = parser(want)
96
+ except etree.XMLSyntaxError:
97
+ return False
98
+ try:
99
+ got_doc = parser(got)
100
+ except etree.XMLSyntaxError:
101
+ return False
102
+ return self.compare_docs(want_doc, got_doc)
103
+
104
+ def get_parser(self, want, got, optionflags):
105
+ parser = None
106
+ if NOPARSE_MARKUP & optionflags:
107
+ return None
108
+ if PARSE_HTML & optionflags:
109
+ parser = html_fromstring
110
+ elif PARSE_XML & optionflags:
111
+ parser = etree.XML
112
+ elif (want.strip().lower().startswith('<html')
113
+ and got.strip().startswith('<html')):
114
+ parser = html_fromstring
115
+ elif (self._looks_like_markup(want)
116
+ and self._looks_like_markup(got)):
117
+ parser = self.get_default_parser()
118
+ return parser
119
+
120
+ def _looks_like_markup(self, s):
121
+ s = s.strip()
122
+ return (s.startswith('<')
123
+ and not _repr_re.search(s))
124
+
125
+ def compare_docs(self, want, got):
126
+ if not self.tag_compare(want.tag, got.tag):
127
+ return False
128
+ if not self.text_compare(want.text, got.text, True):
129
+ return False
130
+ if not self.text_compare(want.tail, got.tail, True):
131
+ return False
132
+ if 'any' not in want.attrib:
133
+ want_keys = sorted(want.attrib.keys())
134
+ got_keys = sorted(got.attrib.keys())
135
+ if want_keys != got_keys:
136
+ return False
137
+ for key in want_keys:
138
+ if not self.text_compare(want.attrib[key], got.attrib[key], False):
139
+ return False
140
+ if want.text != '...' or len(want):
141
+ want_children = list(want)
142
+ got_children = list(got)
143
+ while want_children or got_children:
144
+ if not want_children or not got_children:
145
+ return False
146
+ want_first = want_children.pop(0)
147
+ got_first = got_children.pop(0)
148
+ if not self.compare_docs(want_first, got_first):
149
+ return False
150
+ if not got_children and want_first.tail == '...':
151
+ break
152
+ return True
153
+
154
+ def text_compare(self, want, got, strip):
155
+ want = want or ''
156
+ got = got or ''
157
+ if strip:
158
+ want = norm_whitespace(want).strip()
159
+ got = norm_whitespace(got).strip()
160
+ want = '^%s$' % re.escape(want)
161
+ want = want.replace(r'\.\.\.', '.*')
162
+ if re.search(want, got):
163
+ return True
164
+ else:
165
+ return False
166
+
167
+ def tag_compare(self, want, got):
168
+ if want == 'any':
169
+ return True
170
+ if (not isinstance(want, (str, bytes))
171
+ or not isinstance(got, (str, bytes))):
172
+ return want == got
173
+ want = want or ''
174
+ got = got or ''
175
+ if want.startswith('{...}'):
176
+ # Ellipsis on the namespace
177
+ return want.split('}')[-1] == got.split('}')[-1]
178
+ else:
179
+ return want == got
180
+
181
+ def output_difference(self, example, got, optionflags):
182
+ want = example.want
183
+ parser = self.get_parser(want, got, optionflags)
184
+ errors = []
185
+ if parser is not None:
186
+ try:
187
+ want_doc = parser(want)
188
+ except etree.XMLSyntaxError:
189
+ e = sys.exc_info()[1]
190
+ errors.append('In example: %s' % e)
191
+ try:
192
+ got_doc = parser(got)
193
+ except etree.XMLSyntaxError:
194
+ e = sys.exc_info()[1]
195
+ errors.append('In actual output: %s' % e)
196
+ if parser is None or errors:
197
+ value = OutputChecker.output_difference(
198
+ self, example, got, optionflags)
199
+ if errors:
200
+ errors.append(value)
201
+ return '\n'.join(errors)
202
+ else:
203
+ return value
204
+ html = parser is html_fromstring
205
+ diff_parts = ['Expected:',
206
+ self.format_doc(want_doc, html, 2),
207
+ 'Got:',
208
+ self.format_doc(got_doc, html, 2),
209
+ 'Diff:',
210
+ self.collect_diff(want_doc, got_doc, html, 2)]
211
+ return '\n'.join(diff_parts)
212
+
213
+ def html_empty_tag(self, el, html=True):
214
+ if not html:
215
+ return False
216
+ if el.tag not in self.empty_tags:
217
+ return False
218
+ if el.text or len(el):
219
+ # This shouldn't happen (contents in an empty tag)
220
+ return False
221
+ return True
222
+
223
+ def format_doc(self, doc, html, indent, prefix=''):
224
+ parts = []
225
+ if not len(doc):
226
+ # No children...
227
+ parts.append(' '*indent)
228
+ parts.append(prefix)
229
+ parts.append(self.format_tag(doc))
230
+ if not self.html_empty_tag(doc, html):
231
+ if strip(doc.text):
232
+ parts.append(self.format_text(doc.text))
233
+ parts.append(self.format_end_tag(doc))
234
+ if strip(doc.tail):
235
+ parts.append(self.format_text(doc.tail))
236
+ parts.append('\n')
237
+ return ''.join(parts)
238
+ parts.append(' '*indent)
239
+ parts.append(prefix)
240
+ parts.append(self.format_tag(doc))
241
+ if not self.html_empty_tag(doc, html):
242
+ parts.append('\n')
243
+ if strip(doc.text):
244
+ parts.append(' '*indent)
245
+ parts.append(self.format_text(doc.text))
246
+ parts.append('\n')
247
+ for el in doc:
248
+ parts.append(self.format_doc(el, html, indent+2))
249
+ parts.append(' '*indent)
250
+ parts.append(self.format_end_tag(doc))
251
+ parts.append('\n')
252
+ if strip(doc.tail):
253
+ parts.append(' '*indent)
254
+ parts.append(self.format_text(doc.tail))
255
+ parts.append('\n')
256
+ return ''.join(parts)
257
+
258
+ def format_text(self, text, strip=True):
259
+ if text is None:
260
+ return ''
261
+ if strip:
262
+ text = text.strip()
263
+ return html_escape(text, 1)
264
+
265
+ def format_tag(self, el):
266
+ attrs = []
267
+ if isinstance(el, etree.CommentBase):
268
+ # FIXME: probably PIs should be handled specially too?
269
+ return '<!--'
270
+ for name, value in sorted(el.attrib.items()):
271
+ attrs.append('%s="%s"' % (name, self.format_text(value, False)))
272
+ if not attrs:
273
+ return '<%s>' % el.tag
274
+ return '<%s %s>' % (el.tag, ' '.join(attrs))
275
+
276
+ def format_end_tag(self, el):
277
+ if isinstance(el, etree.CommentBase):
278
+ # FIXME: probably PIs should be handled specially too?
279
+ return '-->'
280
+ return '</%s>' % el.tag
281
+
282
+ def collect_diff(self, want, got, html, indent):
283
+ parts = []
284
+ if not len(want) and not len(got):
285
+ parts.append(' '*indent)
286
+ parts.append(self.collect_diff_tag(want, got))
287
+ if not self.html_empty_tag(got, html):
288
+ parts.append(self.collect_diff_text(want.text, got.text))
289
+ parts.append(self.collect_diff_end_tag(want, got))
290
+ parts.append(self.collect_diff_text(want.tail, got.tail))
291
+ parts.append('\n')
292
+ return ''.join(parts)
293
+ parts.append(' '*indent)
294
+ parts.append(self.collect_diff_tag(want, got))
295
+ parts.append('\n')
296
+ if strip(want.text) or strip(got.text):
297
+ parts.append(' '*indent)
298
+ parts.append(self.collect_diff_text(want.text, got.text))
299
+ parts.append('\n')
300
+ want_children = list(want)
301
+ got_children = list(got)
302
+ while want_children or got_children:
303
+ if not want_children:
304
+ parts.append(self.format_doc(got_children.pop(0), html, indent+2, '+'))
305
+ continue
306
+ if not got_children:
307
+ parts.append(self.format_doc(want_children.pop(0), html, indent+2, '-'))
308
+ continue
309
+ parts.append(self.collect_diff(
310
+ want_children.pop(0), got_children.pop(0), html, indent+2))
311
+ parts.append(' '*indent)
312
+ parts.append(self.collect_diff_end_tag(want, got))
313
+ parts.append('\n')
314
+ if strip(want.tail) or strip(got.tail):
315
+ parts.append(' '*indent)
316
+ parts.append(self.collect_diff_text(want.tail, got.tail))
317
+ parts.append('\n')
318
+ return ''.join(parts)
319
+
320
+ def collect_diff_tag(self, want, got):
321
+ if not self.tag_compare(want.tag, got.tag):
322
+ tag = '%s (got: %s)' % (want.tag, got.tag)
323
+ else:
324
+ tag = got.tag
325
+ attrs = []
326
+ any = want.tag == 'any' or 'any' in want.attrib
327
+ for name, value in sorted(got.attrib.items()):
328
+ if name not in want.attrib and not any:
329
+ attrs.append('+%s="%s"' % (name, self.format_text(value, False)))
330
+ else:
331
+ if name in want.attrib:
332
+ text = self.collect_diff_text(want.attrib[name], value, False)
333
+ else:
334
+ text = self.format_text(value, False)
335
+ attrs.append('%s="%s"' % (name, text))
336
+ if not any:
337
+ for name, value in sorted(want.attrib.items()):
338
+ if name in got.attrib:
339
+ continue
340
+ attrs.append('-%s="%s"' % (name, self.format_text(value, False)))
341
+ if attrs:
342
+ tag = '<%s %s>' % (tag, ' '.join(attrs))
343
+ else:
344
+ tag = '<%s>' % tag
345
+ return tag
346
+
347
+ def collect_diff_end_tag(self, want, got):
348
+ if want.tag != got.tag:
349
+ tag = '%s (got: %s)' % (want.tag, got.tag)
350
+ else:
351
+ tag = got.tag
352
+ return '</%s>' % tag
353
+
354
+ def collect_diff_text(self, want, got, strip=True):
355
+ if self.text_compare(want, got, strip):
356
+ if not got:
357
+ return ''
358
+ return self.format_text(got, strip)
359
+ text = '%s (got: %s)' % (want, got)
360
+ return self.format_text(text, strip)
361
+
362
+ class LHTMLOutputChecker(LXMLOutputChecker):
363
+ def get_default_parser(self):
364
+ return html_fromstring
365
+
366
+ def install(html=False):
367
+ """
368
+ Install doctestcompare for all future doctests.
369
+
370
+ If html is true, then by default the HTML parser will be used;
371
+ otherwise the XML parser is used.
372
+ """
373
+ if html:
374
+ doctest.OutputChecker = LHTMLOutputChecker
375
+ else:
376
+ doctest.OutputChecker = LXMLOutputChecker
377
+
378
+ def temp_install(html=False, del_module=None):
379
+ """
380
+ Use this *inside* a doctest to enable this checker for this
381
+ doctest only.
382
+
383
+ If html is true, then by default the HTML parser will be used;
384
+ otherwise the XML parser is used.
385
+ """
386
+ if html:
387
+ Checker = LHTMLOutputChecker
388
+ else:
389
+ Checker = LXMLOutputChecker
390
+ frame = _find_doctest_frame()
391
+ dt_self = frame.f_locals['self']
392
+ checker = Checker()
393
+ old_checker = dt_self._checker
394
+ dt_self._checker = checker
395
+ # The unfortunate thing is that there is a local variable 'check'
396
+ # in the function that runs the doctests, that is a bound method
397
+ # into the output checker. We have to update that. We can't
398
+ # modify the frame, so we have to modify the object in place. The
399
+ # only way to do this is to actually change the func_code
400
+ # attribute of the method. We change it, and then wait for
401
+ # __record_outcome to be run, which signals the end of the __run
402
+ # method, at which point we restore the previous check_output
403
+ # implementation.
404
+ check_func = frame.f_locals['check'].__func__
405
+ checker_check_func = checker.check_output.__func__
406
+ # Because we can't patch up func_globals, this is the only global
407
+ # in check_output that we care about:
408
+ doctest.etree = etree
409
+ _RestoreChecker(dt_self, old_checker, checker,
410
+ check_func, checker_check_func,
411
+ del_module)
412
+
413
+ class _RestoreChecker:
414
+ def __init__(self, dt_self, old_checker, new_checker, check_func, clone_func,
415
+ del_module):
416
+ self.dt_self = dt_self
417
+ self.checker = old_checker
418
+ self.checker._temp_call_super_check_output = self.call_super
419
+ self.checker._temp_override_self = new_checker
420
+ self.check_func = check_func
421
+ self.clone_func = clone_func
422
+ self.del_module = del_module
423
+ self.install_clone()
424
+ self.install_dt_self()
425
+ def install_clone(self):
426
+ self.func_code = self.check_func.__code__
427
+ self.func_globals = self.check_func.__globals__
428
+ self.check_func.__code__ = self.clone_func.__code__
429
+ def uninstall_clone(self):
430
+ self.check_func.__code__ = self.func_code
431
+ def install_dt_self(self):
432
+ self.prev_func = self.dt_self._DocTestRunner__record_outcome
433
+ self.dt_self._DocTestRunner__record_outcome = self
434
+ def uninstall_dt_self(self):
435
+ self.dt_self._DocTestRunner__record_outcome = self.prev_func
436
+ def uninstall_module(self):
437
+ if self.del_module:
438
+ import sys
439
+ del sys.modules[self.del_module]
440
+ if '.' in self.del_module:
441
+ package, module = self.del_module.rsplit('.', 1)
442
+ package_mod = sys.modules[package]
443
+ delattr(package_mod, module)
444
+ def __call__(self, *args, **kw):
445
+ self.uninstall_clone()
446
+ self.uninstall_dt_self()
447
+ del self.checker._temp_override_self
448
+ del self.checker._temp_call_super_check_output
449
+ result = self.prev_func(*args, **kw)
450
+ self.uninstall_module()
451
+ return result
452
+ def call_super(self, *args, **kw):
453
+ self.uninstall_clone()
454
+ try:
455
+ return self.check_func(*args, **kw)
456
+ finally:
457
+ self.install_clone()
458
+
459
+ def _find_doctest_frame():
460
+ import sys
461
+ frame = sys._getframe(1)
462
+ while frame:
463
+ l = frame.f_locals
464
+ if 'BOOM' in l:
465
+ # Sign of doctest
466
+ return frame
467
+ frame = frame.f_back
468
+ raise LookupError(
469
+ "Could not find doctest (only use this function *inside* a doctest)")
470
+
471
+ __test__ = {
472
+ 'basic': '''
473
+ >>> temp_install()
474
+ >>> print """<xml a="1" b="2">stuff</xml>"""
475
+ <xml b="2" a="1">...</xml>
476
+ >>> print """<xml xmlns="http://example.com"><tag attr="bar" /></xml>"""
477
+ <xml xmlns="...">
478
+ <tag attr="..." />
479
+ </xml>
480
+ >>> print """<xml>blahblahblah<foo /></xml>""" # doctest: +NOPARSE_MARKUP, +ELLIPSIS
481
+ <xml>...foo /></xml>
482
+ '''}
483
+
484
+ if __name__ == '__main__':
485
+ import doctest
486
+ doctest.testmod()
487
+
488
+