lxml 5.2.0__cp310-cp310-win32.whl → 5.2.2__cp310-cp310-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. lxml/ElementInclude.py +244 -244
  2. lxml/__init__.py +22 -22
  3. lxml/_elementpath.cp310-win32.pyd +0 -0
  4. lxml/_elementpath.py +341 -341
  5. lxml/apihelpers.pxi +1793 -1793
  6. lxml/builder.cp310-win32.pyd +0 -0
  7. lxml/builder.py +232 -232
  8. lxml/classlookup.pxi +580 -580
  9. lxml/cleanup.pxi +215 -215
  10. lxml/cssselect.py +101 -101
  11. lxml/debug.pxi +90 -90
  12. lxml/docloader.pxi +178 -178
  13. lxml/doctestcompare.py +488 -488
  14. lxml/dtd.pxi +478 -478
  15. lxml/etree.cp310-win32.pyd +0 -0
  16. lxml/etree.h +6 -6
  17. lxml/etree.pyx +3732 -3711
  18. lxml/extensions.pxi +833 -833
  19. lxml/html/ElementSoup.py +10 -10
  20. lxml/html/__init__.py +1923 -1923
  21. lxml/html/_diffcommand.py +86 -86
  22. lxml/html/_html5builder.py +100 -100
  23. lxml/html/_setmixin.py +56 -56
  24. lxml/html/builder.py +133 -133
  25. lxml/html/clean.py +21 -21
  26. lxml/html/defs.py +135 -135
  27. lxml/html/diff.cp310-win32.pyd +0 -0
  28. lxml/html/diff.py +878 -878
  29. lxml/html/formfill.py +299 -299
  30. lxml/html/html5parser.py +260 -260
  31. lxml/html/soupparser.py +314 -314
  32. lxml/html/usedoctest.py +13 -13
  33. lxml/includes/c14n.pxd +25 -25
  34. lxml/includes/config.pxd +3 -3
  35. lxml/includes/dtdvalid.pxd +18 -18
  36. lxml/includes/etree_defs.h +379 -379
  37. lxml/includes/etreepublic.pxd +237 -237
  38. lxml/includes/htmlparser.pxd +56 -56
  39. lxml/includes/lxml-version.h +1 -1
  40. lxml/includes/relaxng.pxd +64 -64
  41. lxml/includes/schematron.pxd +34 -34
  42. lxml/includes/tree.pxd +494 -494
  43. lxml/includes/uri.pxd +5 -5
  44. lxml/includes/xinclude.pxd +22 -22
  45. lxml/includes/xmlerror.pxd +852 -852
  46. lxml/includes/xmlparser.pxd +265 -265
  47. lxml/includes/xmlschema.pxd +35 -35
  48. lxml/includes/xpath.pxd +136 -136
  49. lxml/includes/xslt.pxd +190 -190
  50. lxml/isoschematron/__init__.py +348 -348
  51. lxml/isoschematron/resources/rng/iso-schematron.rng +709 -709
  52. lxml/isoschematron/resources/xsl/RNG2Schtrn.xsl +75 -75
  53. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_abstract_expand.xsl +312 -312
  54. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_dsdl_include.xsl +1159 -1159
  55. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_message.xsl +54 -54
  56. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_skeleton_for_xslt1.xsl +1796 -1796
  57. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_svrl_for_xslt1.xsl +588 -588
  58. lxml/iterparse.pxi +438 -438
  59. lxml/lxml.etree.h +6 -6
  60. lxml/nsclasses.pxi +281 -281
  61. lxml/objectify.cp310-win32.pyd +0 -0
  62. lxml/objectify.pyx +2145 -2145
  63. lxml/objectpath.pxi +332 -332
  64. lxml/parser.pxi +1994 -1994
  65. lxml/parsertarget.pxi +180 -180
  66. lxml/proxy.pxi +619 -619
  67. lxml/public-api.pxi +178 -178
  68. lxml/pyclasslookup.py +3 -3
  69. lxml/readonlytree.pxi +565 -565
  70. lxml/relaxng.pxi +165 -165
  71. lxml/sax.cp310-win32.pyd +0 -0
  72. lxml/sax.py +275 -275
  73. lxml/saxparser.pxi +875 -875
  74. lxml/schematron.pxi +168 -168
  75. lxml/serializer.pxi +1871 -1871
  76. lxml/usedoctest.py +13 -13
  77. lxml/xinclude.pxi +67 -67
  78. lxml/xmlerror.pxi +1654 -1654
  79. lxml/xmlid.pxi +179 -179
  80. lxml/xmlschema.pxi +215 -215
  81. lxml/xpath.pxi +487 -487
  82. lxml/xslt.pxi +950 -950
  83. lxml/xsltext.pxi +242 -242
  84. {lxml-5.2.0.dist-info → lxml-5.2.2.dist-info}/LICENSE.txt +29 -29
  85. {lxml-5.2.0.dist-info → lxml-5.2.2.dist-info}/LICENSES.txt +29 -29
  86. {lxml-5.2.0.dist-info → lxml-5.2.2.dist-info}/METADATA +9 -17
  87. {lxml-5.2.0.dist-info → lxml-5.2.2.dist-info}/RECORD +89 -89
  88. {lxml-5.2.0.dist-info → lxml-5.2.2.dist-info}/WHEEL +0 -0
  89. {lxml-5.2.0.dist-info → lxml-5.2.2.dist-info}/top_level.txt +0 -0
@@ -1,348 +1,348 @@
1
- """The ``lxml.isoschematron`` package implements ISO Schematron support on top
2
- of the pure-xslt 'skeleton' implementation.
3
- """
4
-
5
- import sys
6
- import os.path
7
- from lxml import etree as _etree # due to validator __init__ signature
8
-
9
-
10
- # some compat stuff, borrowed from lxml.html
11
- try:
12
- unicode
13
- except NameError:
14
- # Python 3
15
- unicode = str
16
- try:
17
- basestring
18
- except NameError:
19
- # Python 3
20
- basestring = str
21
-
22
-
23
- __all__ = ['extract_xsd', 'extract_rng', 'iso_dsdl_include',
24
- 'iso_abstract_expand', 'iso_svrl_for_xslt1',
25
- 'svrl_validation_errors', 'schematron_schema_valid',
26
- 'stylesheet_params', 'Schematron']
27
-
28
-
29
- # some namespaces
30
- #FIXME: Maybe lxml should provide a dedicated place for common namespace
31
- #FIXME: definitions?
32
- XML_SCHEMA_NS = "http://www.w3.org/2001/XMLSchema"
33
- RELAXNG_NS = "http://relaxng.org/ns/structure/1.0"
34
- SCHEMATRON_NS = "http://purl.oclc.org/dsdl/schematron"
35
- SVRL_NS = "http://purl.oclc.org/dsdl/svrl"
36
-
37
-
38
- # some helpers
39
- _schematron_root = '{%s}schema' % SCHEMATRON_NS
40
- _xml_schema_root = '{%s}schema' % XML_SCHEMA_NS
41
- _resources_dir = os.path.join(os.path.dirname(__file__), 'resources')
42
-
43
-
44
- # the iso-schematron skeleton implementation steps aka xsl transformations
45
- extract_xsd = _etree.XSLT(_etree.parse(
46
- os.path.join(_resources_dir, 'xsl', 'XSD2Schtrn.xsl')))
47
- extract_rng = _etree.XSLT(_etree.parse(
48
- os.path.join(_resources_dir, 'xsl', 'RNG2Schtrn.xsl')))
49
- iso_dsdl_include = _etree.XSLT(_etree.parse(
50
- os.path.join(_resources_dir, 'xsl', 'iso-schematron-xslt1',
51
- 'iso_dsdl_include.xsl')))
52
- iso_abstract_expand = _etree.XSLT(_etree.parse(
53
- os.path.join(_resources_dir, 'xsl', 'iso-schematron-xslt1',
54
- 'iso_abstract_expand.xsl')))
55
- iso_svrl_for_xslt1 = _etree.XSLT(_etree.parse(
56
- os.path.join(_resources_dir,
57
- 'xsl', 'iso-schematron-xslt1', 'iso_svrl_for_xslt1.xsl')))
58
-
59
-
60
- # svrl result accessors
61
- svrl_validation_errors = _etree.XPath(
62
- '//svrl:failed-assert', namespaces={'svrl': SVRL_NS})
63
-
64
- # RelaxNG validator for schematron schemas
65
- schematron_schema_valid_supported = False
66
- try:
67
- schematron_schema_valid = _etree.RelaxNG(
68
- file=os.path.join(_resources_dir, 'rng', 'iso-schematron.rng'))
69
- schematron_schema_valid_supported = True
70
- except _etree.RelaxNGParseError:
71
- # Some distributions delete the file due to licensing issues.
72
- def schematron_schema_valid(arg):
73
- raise NotImplementedError("Validating the ISO schematron requires iso-schematron.rng")
74
-
75
-
76
- def stylesheet_params(**kwargs):
77
- """Convert keyword args to a dictionary of stylesheet parameters.
78
- XSL stylesheet parameters must be XPath expressions, i.e.:
79
-
80
- * string expressions, like "'5'"
81
- * simple (number) expressions, like "5"
82
- * valid XPath expressions, like "/a/b/text()"
83
-
84
- This function converts native Python keyword arguments to stylesheet
85
- parameters following these rules:
86
- If an arg is a string wrap it with XSLT.strparam().
87
- If an arg is an XPath object use its path string.
88
- If arg is None raise TypeError.
89
- Else convert arg to string.
90
- """
91
- result = {}
92
- for key, val in kwargs.items():
93
- if isinstance(val, basestring):
94
- val = _etree.XSLT.strparam(val)
95
- elif val is None:
96
- raise TypeError('None not allowed as a stylesheet parameter')
97
- elif not isinstance(val, _etree.XPath):
98
- val = unicode(val)
99
- result[key] = val
100
- return result
101
-
102
-
103
- # helper function for use in Schematron __init__
104
- def _stylesheet_param_dict(paramsDict, kwargsDict):
105
- """Return a copy of paramsDict, updated with kwargsDict entries, wrapped as
106
- stylesheet arguments.
107
- kwargsDict entries with a value of None are ignored.
108
- """
109
- # beware of changing mutable default arg
110
- paramsDict = dict(paramsDict)
111
- for k, v in kwargsDict.items():
112
- if v is not None: # None values do not override
113
- paramsDict[k] = v
114
- paramsDict = stylesheet_params(**paramsDict)
115
- return paramsDict
116
-
117
-
118
- class Schematron(_etree._Validator):
119
- """An ISO Schematron validator.
120
-
121
- Pass a root Element or an ElementTree to turn it into a validator.
122
- Alternatively, pass a filename as keyword argument 'file' to parse from
123
- the file system.
124
-
125
- Schematron is a less well known, but very powerful schema language.
126
- The main idea is to use the capabilities of XPath to put restrictions on
127
- the structure and the content of XML documents.
128
-
129
- The standard behaviour is to fail on ``failed-assert`` findings only
130
- (``ASSERTS_ONLY``). To change this, you can either pass a report filter
131
- function to the ``error_finder`` parameter (e.g. ``ASSERTS_AND_REPORTS``
132
- or a custom ``XPath`` object), or subclass isoschematron.Schematron for
133
- complete control of the validation process.
134
-
135
- Built on the Schematron language 'reference' skeleton pure-xslt
136
- implementation, the validator is created as an XSLT 1.0 stylesheet using
137
- these steps:
138
-
139
- 0) (Extract from XML Schema or RelaxNG schema)
140
- 1) Process inclusions
141
- 2) Process abstract patterns
142
- 3) Compile the schematron schema to XSLT
143
-
144
- The ``include`` and ``expand`` keyword arguments can be used to switch off
145
- steps 1) and 2).
146
- To set parameters for steps 1), 2) and 3) hand parameter dictionaries to the
147
- keyword arguments ``include_params``, ``expand_params`` or
148
- ``compile_params``.
149
- For convenience, the compile-step parameter ``phase`` is also exposed as a
150
- keyword argument ``phase``. This takes precedence if the parameter is also
151
- given in the parameter dictionary.
152
-
153
- If ``store_schematron`` is set to True, the (included-and-expanded)
154
- schematron document tree is stored and available through the ``schematron``
155
- property.
156
- If ``store_xslt`` is set to True, the validation XSLT document tree will be
157
- stored and can be retrieved through the ``validator_xslt`` property.
158
- With ``store_report`` set to True (default: False), the resulting validation
159
- report document gets stored and can be accessed as the ``validation_report``
160
- property.
161
-
162
- If ``validate_schema`` is set to False, the validation of the schema file
163
- itself is disabled. Validation happens by default after building the full
164
- schema, unless the schema validation file cannot be found at import time,
165
- in which case the validation gets disabled. Some lxml distributions exclude
166
- this file due to licensing issues. ISO-Schematron validation can then still
167
- be used normally, but the schemas themselves cannot be validated.
168
-
169
- Here is a usage example::
170
-
171
- >>> from lxml import etree
172
- >>> from lxml.isoschematron import Schematron
173
-
174
- >>> schematron = Schematron(etree.XML('''
175
- ... <schema xmlns="http://purl.oclc.org/dsdl/schematron" >
176
- ... <pattern id="id_only_attribute">
177
- ... <title>id is the only permitted attribute name</title>
178
- ... <rule context="*">
179
- ... <report test="@*[not(name()='id')]">Attribute
180
- ... <name path="@*[not(name()='id')]"/> is forbidden<name/>
181
- ... </report>
182
- ... </rule>
183
- ... </pattern>
184
- ... </schema>'''),
185
- ... error_finder=Schematron.ASSERTS_AND_REPORTS)
186
-
187
- >>> xml = etree.XML('''
188
- ... <AAA name="aaa">
189
- ... <BBB id="bbb"/>
190
- ... <CCC color="ccc"/>
191
- ... </AAA>
192
- ... ''')
193
-
194
- >>> schematron.validate(xml)
195
- False
196
-
197
- >>> xml = etree.XML('''
198
- ... <AAA id="aaa">
199
- ... <BBB id="bbb"/>
200
- ... <CCC/>
201
- ... </AAA>
202
- ... ''')
203
-
204
- >>> schematron.validate(xml)
205
- True
206
- """
207
-
208
- # libxml2 error categorization for validation errors
209
- _domain = _etree.ErrorDomains.SCHEMATRONV
210
- _level = _etree.ErrorLevels.ERROR
211
- _error_type = _etree.ErrorTypes.SCHEMATRONV_ASSERT
212
-
213
- # convenience definitions for common behaviours
214
- ASSERTS_ONLY = svrl_validation_errors # Default
215
- ASSERTS_AND_REPORTS = _etree.XPath(
216
- '//svrl:failed-assert | //svrl:successful-report',
217
- namespaces={'svrl': SVRL_NS})
218
-
219
- def _extract(self, element):
220
- """Extract embedded schematron schema from non-schematron host schema.
221
- This method will only be called by __init__ if the given schema document
222
- is not a schematron schema by itself.
223
- Must return a schematron schema document tree or None.
224
- """
225
- schematron = None
226
- if element.tag == _xml_schema_root:
227
- schematron = self._extract_xsd(element)
228
- elif element.nsmap[element.prefix] == RELAXNG_NS:
229
- # RelaxNG does not have a single unique root element
230
- schematron = self._extract_rng(element)
231
- return schematron
232
-
233
- # customization points
234
- # etree.XSLT objects that provide the extract, include, expand, compile
235
- # steps
236
- _extract_xsd = extract_xsd
237
- _extract_rng = extract_rng
238
- _include = iso_dsdl_include
239
- _expand = iso_abstract_expand
240
- _compile = iso_svrl_for_xslt1
241
-
242
- # etree.xpath object that determines input document validity when applied to
243
- # the svrl result report; must return a list of result elements (empty if
244
- # valid)
245
- _validation_errors = ASSERTS_ONLY
246
-
247
- def __init__(self, etree=None, file=None, include=True, expand=True,
248
- include_params={}, expand_params={}, compile_params={},
249
- store_schematron=False, store_xslt=False, store_report=False,
250
- phase=None, error_finder=ASSERTS_ONLY,
251
- validate_schema=schematron_schema_valid_supported):
252
- super().__init__()
253
-
254
- self._store_report = store_report
255
- self._schematron = None
256
- self._validator_xslt = None
257
- self._validation_report = None
258
- if error_finder is not self.ASSERTS_ONLY:
259
- self._validation_errors = error_finder
260
-
261
- # parse schema document, may be a schematron schema or an XML Schema or
262
- # a RelaxNG schema with embedded schematron rules
263
- root = None
264
- try:
265
- if etree is not None:
266
- if _etree.iselement(etree):
267
- root = etree
268
- else:
269
- root = etree.getroot()
270
- elif file is not None:
271
- root = _etree.parse(file).getroot()
272
- except Exception:
273
- raise _etree.SchematronParseError(
274
- "No tree or file given: %s" % sys.exc_info()[1])
275
- if root is None:
276
- raise ValueError("Empty tree")
277
- if root.tag == _schematron_root:
278
- schematron = root
279
- else:
280
- schematron = self._extract(root)
281
- if schematron is None:
282
- raise _etree.SchematronParseError(
283
- "Document is not a schematron schema or schematron-extractable")
284
- # perform the iso-schematron skeleton implementation steps to get a
285
- # validating xslt
286
- if include:
287
- schematron = self._include(schematron, **include_params)
288
- if expand:
289
- schematron = self._expand(schematron, **expand_params)
290
- if validate_schema and not schematron_schema_valid(schematron):
291
- raise _etree.SchematronParseError(
292
- "invalid schematron schema: %s" %
293
- schematron_schema_valid.error_log)
294
- if store_schematron:
295
- self._schematron = schematron
296
- # add new compile keyword args here if exposing them
297
- compile_kwargs = {'phase': phase}
298
- compile_params = _stylesheet_param_dict(compile_params, compile_kwargs)
299
- validator_xslt = self._compile(schematron, **compile_params)
300
- if store_xslt:
301
- self._validator_xslt = validator_xslt
302
- self._validator = _etree.XSLT(validator_xslt)
303
-
304
- def __call__(self, etree):
305
- """Validate doc using Schematron.
306
-
307
- Returns true if document is valid, false if not.
308
- """
309
- self._clear_error_log()
310
- result = self._validator(etree)
311
- if self._store_report:
312
- self._validation_report = result
313
- errors = self._validation_errors(result)
314
- if errors:
315
- if _etree.iselement(etree):
316
- fname = etree.getroottree().docinfo.URL or '<file>'
317
- else:
318
- fname = etree.docinfo.URL or '<file>'
319
- for error in errors:
320
- # Does svrl report the line number, anywhere? Don't think so.
321
- self._append_log_message(
322
- domain=self._domain, type=self._error_type,
323
- level=self._level, line=0,
324
- message=_etree.tostring(error, encoding='unicode'),
325
- filename=fname)
326
- return False
327
- return True
328
-
329
- @property
330
- def schematron(self):
331
- """ISO-schematron schema document (None if object has been initialized
332
- with store_schematron=False).
333
- """
334
- return self._schematron
335
-
336
- @property
337
- def validator_xslt(self):
338
- """ISO-schematron skeleton implementation XSLT validator document (None
339
- if object has been initialized with store_xslt=False).
340
- """
341
- return self._validator_xslt
342
-
343
- @property
344
- def validation_report(self):
345
- """ISO-schematron validation result report (None if result-storing has
346
- been turned off).
347
- """
348
- return self._validation_report
1
+ """The ``lxml.isoschematron`` package implements ISO Schematron support on top
2
+ of the pure-xslt 'skeleton' implementation.
3
+ """
4
+
5
+ import sys
6
+ import os.path
7
+ from lxml import etree as _etree # due to validator __init__ signature
8
+
9
+
10
+ # some compat stuff, borrowed from lxml.html
11
+ try:
12
+ unicode
13
+ except NameError:
14
+ # Python 3
15
+ unicode = str
16
+ try:
17
+ basestring
18
+ except NameError:
19
+ # Python 3
20
+ basestring = str
21
+
22
+
23
+ __all__ = ['extract_xsd', 'extract_rng', 'iso_dsdl_include',
24
+ 'iso_abstract_expand', 'iso_svrl_for_xslt1',
25
+ 'svrl_validation_errors', 'schematron_schema_valid',
26
+ 'stylesheet_params', 'Schematron']
27
+
28
+
29
+ # some namespaces
30
+ #FIXME: Maybe lxml should provide a dedicated place for common namespace
31
+ #FIXME: definitions?
32
+ XML_SCHEMA_NS = "http://www.w3.org/2001/XMLSchema"
33
+ RELAXNG_NS = "http://relaxng.org/ns/structure/1.0"
34
+ SCHEMATRON_NS = "http://purl.oclc.org/dsdl/schematron"
35
+ SVRL_NS = "http://purl.oclc.org/dsdl/svrl"
36
+
37
+
38
+ # some helpers
39
+ _schematron_root = '{%s}schema' % SCHEMATRON_NS
40
+ _xml_schema_root = '{%s}schema' % XML_SCHEMA_NS
41
+ _resources_dir = os.path.join(os.path.dirname(__file__), 'resources')
42
+
43
+
44
+ # the iso-schematron skeleton implementation steps aka xsl transformations
45
+ extract_xsd = _etree.XSLT(_etree.parse(
46
+ os.path.join(_resources_dir, 'xsl', 'XSD2Schtrn.xsl')))
47
+ extract_rng = _etree.XSLT(_etree.parse(
48
+ os.path.join(_resources_dir, 'xsl', 'RNG2Schtrn.xsl')))
49
+ iso_dsdl_include = _etree.XSLT(_etree.parse(
50
+ os.path.join(_resources_dir, 'xsl', 'iso-schematron-xslt1',
51
+ 'iso_dsdl_include.xsl')))
52
+ iso_abstract_expand = _etree.XSLT(_etree.parse(
53
+ os.path.join(_resources_dir, 'xsl', 'iso-schematron-xslt1',
54
+ 'iso_abstract_expand.xsl')))
55
+ iso_svrl_for_xslt1 = _etree.XSLT(_etree.parse(
56
+ os.path.join(_resources_dir,
57
+ 'xsl', 'iso-schematron-xslt1', 'iso_svrl_for_xslt1.xsl')))
58
+
59
+
60
+ # svrl result accessors
61
+ svrl_validation_errors = _etree.XPath(
62
+ '//svrl:failed-assert', namespaces={'svrl': SVRL_NS})
63
+
64
+ # RelaxNG validator for schematron schemas
65
+ schematron_schema_valid_supported = False
66
+ try:
67
+ schematron_schema_valid = _etree.RelaxNG(
68
+ file=os.path.join(_resources_dir, 'rng', 'iso-schematron.rng'))
69
+ schematron_schema_valid_supported = True
70
+ except _etree.RelaxNGParseError:
71
+ # Some distributions delete the file due to licensing issues.
72
+ def schematron_schema_valid(arg):
73
+ raise NotImplementedError("Validating the ISO schematron requires iso-schematron.rng")
74
+
75
+
76
+ def stylesheet_params(**kwargs):
77
+ """Convert keyword args to a dictionary of stylesheet parameters.
78
+ XSL stylesheet parameters must be XPath expressions, i.e.:
79
+
80
+ * string expressions, like "'5'"
81
+ * simple (number) expressions, like "5"
82
+ * valid XPath expressions, like "/a/b/text()"
83
+
84
+ This function converts native Python keyword arguments to stylesheet
85
+ parameters following these rules:
86
+ If an arg is a string wrap it with XSLT.strparam().
87
+ If an arg is an XPath object use its path string.
88
+ If arg is None raise TypeError.
89
+ Else convert arg to string.
90
+ """
91
+ result = {}
92
+ for key, val in kwargs.items():
93
+ if isinstance(val, basestring):
94
+ val = _etree.XSLT.strparam(val)
95
+ elif val is None:
96
+ raise TypeError('None not allowed as a stylesheet parameter')
97
+ elif not isinstance(val, _etree.XPath):
98
+ val = unicode(val)
99
+ result[key] = val
100
+ return result
101
+
102
+
103
+ # helper function for use in Schematron __init__
104
+ def _stylesheet_param_dict(paramsDict, kwargsDict):
105
+ """Return a copy of paramsDict, updated with kwargsDict entries, wrapped as
106
+ stylesheet arguments.
107
+ kwargsDict entries with a value of None are ignored.
108
+ """
109
+ # beware of changing mutable default arg
110
+ paramsDict = dict(paramsDict)
111
+ for k, v in kwargsDict.items():
112
+ if v is not None: # None values do not override
113
+ paramsDict[k] = v
114
+ paramsDict = stylesheet_params(**paramsDict)
115
+ return paramsDict
116
+
117
+
118
+ class Schematron(_etree._Validator):
119
+ """An ISO Schematron validator.
120
+
121
+ Pass a root Element or an ElementTree to turn it into a validator.
122
+ Alternatively, pass a filename as keyword argument 'file' to parse from
123
+ the file system.
124
+
125
+ Schematron is a less well known, but very powerful schema language.
126
+ The main idea is to use the capabilities of XPath to put restrictions on
127
+ the structure and the content of XML documents.
128
+
129
+ The standard behaviour is to fail on ``failed-assert`` findings only
130
+ (``ASSERTS_ONLY``). To change this, you can either pass a report filter
131
+ function to the ``error_finder`` parameter (e.g. ``ASSERTS_AND_REPORTS``
132
+ or a custom ``XPath`` object), or subclass isoschematron.Schematron for
133
+ complete control of the validation process.
134
+
135
+ Built on the Schematron language 'reference' skeleton pure-xslt
136
+ implementation, the validator is created as an XSLT 1.0 stylesheet using
137
+ these steps:
138
+
139
+ 0) (Extract from XML Schema or RelaxNG schema)
140
+ 1) Process inclusions
141
+ 2) Process abstract patterns
142
+ 3) Compile the schematron schema to XSLT
143
+
144
+ The ``include`` and ``expand`` keyword arguments can be used to switch off
145
+ steps 1) and 2).
146
+ To set parameters for steps 1), 2) and 3) hand parameter dictionaries to the
147
+ keyword arguments ``include_params``, ``expand_params`` or
148
+ ``compile_params``.
149
+ For convenience, the compile-step parameter ``phase`` is also exposed as a
150
+ keyword argument ``phase``. This takes precedence if the parameter is also
151
+ given in the parameter dictionary.
152
+
153
+ If ``store_schematron`` is set to True, the (included-and-expanded)
154
+ schematron document tree is stored and available through the ``schematron``
155
+ property.
156
+ If ``store_xslt`` is set to True, the validation XSLT document tree will be
157
+ stored and can be retrieved through the ``validator_xslt`` property.
158
+ With ``store_report`` set to True (default: False), the resulting validation
159
+ report document gets stored and can be accessed as the ``validation_report``
160
+ property.
161
+
162
+ If ``validate_schema`` is set to False, the validation of the schema file
163
+ itself is disabled. Validation happens by default after building the full
164
+ schema, unless the schema validation file cannot be found at import time,
165
+ in which case the validation gets disabled. Some lxml distributions exclude
166
+ this file due to licensing issues. ISO-Schematron validation can then still
167
+ be used normally, but the schemas themselves cannot be validated.
168
+
169
+ Here is a usage example::
170
+
171
+ >>> from lxml import etree
172
+ >>> from lxml.isoschematron import Schematron
173
+
174
+ >>> schematron = Schematron(etree.XML('''
175
+ ... <schema xmlns="http://purl.oclc.org/dsdl/schematron" >
176
+ ... <pattern id="id_only_attribute">
177
+ ... <title>id is the only permitted attribute name</title>
178
+ ... <rule context="*">
179
+ ... <report test="@*[not(name()='id')]">Attribute
180
+ ... <name path="@*[not(name()='id')]"/> is forbidden<name/>
181
+ ... </report>
182
+ ... </rule>
183
+ ... </pattern>
184
+ ... </schema>'''),
185
+ ... error_finder=Schematron.ASSERTS_AND_REPORTS)
186
+
187
+ >>> xml = etree.XML('''
188
+ ... <AAA name="aaa">
189
+ ... <BBB id="bbb"/>
190
+ ... <CCC color="ccc"/>
191
+ ... </AAA>
192
+ ... ''')
193
+
194
+ >>> schematron.validate(xml)
195
+ False
196
+
197
+ >>> xml = etree.XML('''
198
+ ... <AAA id="aaa">
199
+ ... <BBB id="bbb"/>
200
+ ... <CCC/>
201
+ ... </AAA>
202
+ ... ''')
203
+
204
+ >>> schematron.validate(xml)
205
+ True
206
+ """
207
+
208
+ # libxml2 error categorization for validation errors
209
+ _domain = _etree.ErrorDomains.SCHEMATRONV
210
+ _level = _etree.ErrorLevels.ERROR
211
+ _error_type = _etree.ErrorTypes.SCHEMATRONV_ASSERT
212
+
213
+ # convenience definitions for common behaviours
214
+ ASSERTS_ONLY = svrl_validation_errors # Default
215
+ ASSERTS_AND_REPORTS = _etree.XPath(
216
+ '//svrl:failed-assert | //svrl:successful-report',
217
+ namespaces={'svrl': SVRL_NS})
218
+
219
+ def _extract(self, element):
220
+ """Extract embedded schematron schema from non-schematron host schema.
221
+ This method will only be called by __init__ if the given schema document
222
+ is not a schematron schema by itself.
223
+ Must return a schematron schema document tree or None.
224
+ """
225
+ schematron = None
226
+ if element.tag == _xml_schema_root:
227
+ schematron = self._extract_xsd(element)
228
+ elif element.nsmap[element.prefix] == RELAXNG_NS:
229
+ # RelaxNG does not have a single unique root element
230
+ schematron = self._extract_rng(element)
231
+ return schematron
232
+
233
+ # customization points
234
+ # etree.XSLT objects that provide the extract, include, expand, compile
235
+ # steps
236
+ _extract_xsd = extract_xsd
237
+ _extract_rng = extract_rng
238
+ _include = iso_dsdl_include
239
+ _expand = iso_abstract_expand
240
+ _compile = iso_svrl_for_xslt1
241
+
242
+ # etree.xpath object that determines input document validity when applied to
243
+ # the svrl result report; must return a list of result elements (empty if
244
+ # valid)
245
+ _validation_errors = ASSERTS_ONLY
246
+
247
+ def __init__(self, etree=None, file=None, include=True, expand=True,
248
+ include_params={}, expand_params={}, compile_params={},
249
+ store_schematron=False, store_xslt=False, store_report=False,
250
+ phase=None, error_finder=ASSERTS_ONLY,
251
+ validate_schema=schematron_schema_valid_supported):
252
+ super().__init__()
253
+
254
+ self._store_report = store_report
255
+ self._schematron = None
256
+ self._validator_xslt = None
257
+ self._validation_report = None
258
+ if error_finder is not self.ASSERTS_ONLY:
259
+ self._validation_errors = error_finder
260
+
261
+ # parse schema document, may be a schematron schema or an XML Schema or
262
+ # a RelaxNG schema with embedded schematron rules
263
+ root = None
264
+ try:
265
+ if etree is not None:
266
+ if _etree.iselement(etree):
267
+ root = etree
268
+ else:
269
+ root = etree.getroot()
270
+ elif file is not None:
271
+ root = _etree.parse(file).getroot()
272
+ except Exception:
273
+ raise _etree.SchematronParseError(
274
+ "No tree or file given: %s" % sys.exc_info()[1])
275
+ if root is None:
276
+ raise ValueError("Empty tree")
277
+ if root.tag == _schematron_root:
278
+ schematron = root
279
+ else:
280
+ schematron = self._extract(root)
281
+ if schematron is None:
282
+ raise _etree.SchematronParseError(
283
+ "Document is not a schematron schema or schematron-extractable")
284
+ # perform the iso-schematron skeleton implementation steps to get a
285
+ # validating xslt
286
+ if include:
287
+ schematron = self._include(schematron, **include_params)
288
+ if expand:
289
+ schematron = self._expand(schematron, **expand_params)
290
+ if validate_schema and not schematron_schema_valid(schematron):
291
+ raise _etree.SchematronParseError(
292
+ "invalid schematron schema: %s" %
293
+ schematron_schema_valid.error_log)
294
+ if store_schematron:
295
+ self._schematron = schematron
296
+ # add new compile keyword args here if exposing them
297
+ compile_kwargs = {'phase': phase}
298
+ compile_params = _stylesheet_param_dict(compile_params, compile_kwargs)
299
+ validator_xslt = self._compile(schematron, **compile_params)
300
+ if store_xslt:
301
+ self._validator_xslt = validator_xslt
302
+ self._validator = _etree.XSLT(validator_xslt)
303
+
304
+ def __call__(self, etree):
305
+ """Validate doc using Schematron.
306
+
307
+ Returns true if document is valid, false if not.
308
+ """
309
+ self._clear_error_log()
310
+ result = self._validator(etree)
311
+ if self._store_report:
312
+ self._validation_report = result
313
+ errors = self._validation_errors(result)
314
+ if errors:
315
+ if _etree.iselement(etree):
316
+ fname = etree.getroottree().docinfo.URL or '<file>'
317
+ else:
318
+ fname = etree.docinfo.URL or '<file>'
319
+ for error in errors:
320
+ # Does svrl report the line number, anywhere? Don't think so.
321
+ self._append_log_message(
322
+ domain=self._domain, type=self._error_type,
323
+ level=self._level, line=0,
324
+ message=_etree.tostring(error, encoding='unicode'),
325
+ filename=fname)
326
+ return False
327
+ return True
328
+
329
+ @property
330
+ def schematron(self):
331
+ """ISO-schematron schema document (None if object has been initialized
332
+ with store_schematron=False).
333
+ """
334
+ return self._schematron
335
+
336
+ @property
337
+ def validator_xslt(self):
338
+ """ISO-schematron skeleton implementation XSLT validator document (None
339
+ if object has been initialized with store_xslt=False).
340
+ """
341
+ return self._validator_xslt
342
+
343
+ @property
344
+ def validation_report(self):
345
+ """ISO-schematron validation result report (None if result-storing has
346
+ been turned off).
347
+ """
348
+ return self._validation_report