lxml 6.0.0__cp310-cp310-win_arm64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (177) hide show
  1. lxml/ElementInclude.py +244 -0
  2. lxml/__init__.py +22 -0
  3. lxml/_elementpath.cp310-win_arm64.pyd +0 -0
  4. lxml/_elementpath.py +343 -0
  5. lxml/apihelpers.pxi +1801 -0
  6. lxml/builder.cp310-win_arm64.pyd +0 -0
  7. lxml/builder.py +243 -0
  8. lxml/classlookup.pxi +580 -0
  9. lxml/cleanup.pxi +215 -0
  10. lxml/cssselect.py +101 -0
  11. lxml/debug.pxi +36 -0
  12. lxml/docloader.pxi +178 -0
  13. lxml/doctestcompare.py +488 -0
  14. lxml/dtd.pxi +479 -0
  15. lxml/etree.cp310-win_arm64.pyd +0 -0
  16. lxml/etree.h +244 -0
  17. lxml/etree.pyx +3853 -0
  18. lxml/etree_api.h +204 -0
  19. lxml/extensions.pxi +830 -0
  20. lxml/html/ElementSoup.py +10 -0
  21. lxml/html/__init__.py +1927 -0
  22. lxml/html/_diffcommand.py +86 -0
  23. lxml/html/_difflib.cp310-win_arm64.pyd +0 -0
  24. lxml/html/_difflib.py +2106 -0
  25. lxml/html/_html5builder.py +100 -0
  26. lxml/html/_setmixin.py +56 -0
  27. lxml/html/builder.py +173 -0
  28. lxml/html/clean.py +21 -0
  29. lxml/html/defs.py +135 -0
  30. lxml/html/diff.cp310-win_arm64.pyd +0 -0
  31. lxml/html/diff.py +972 -0
  32. lxml/html/formfill.py +299 -0
  33. lxml/html/html5parser.py +260 -0
  34. lxml/html/soupparser.py +314 -0
  35. lxml/html/usedoctest.py +13 -0
  36. lxml/includes/__init__.pxd +0 -0
  37. lxml/includes/__init__.py +0 -0
  38. lxml/includes/c14n.pxd +25 -0
  39. lxml/includes/config.pxd +3 -0
  40. lxml/includes/dtdvalid.pxd +18 -0
  41. lxml/includes/etree_defs.h +379 -0
  42. lxml/includes/etreepublic.pxd +237 -0
  43. lxml/includes/extlibs/__init__.py +0 -0
  44. lxml/includes/extlibs/zconf.h +543 -0
  45. lxml/includes/extlibs/zlib.h +1938 -0
  46. lxml/includes/htmlparser.pxd +56 -0
  47. lxml/includes/libexslt/__init__.py +0 -0
  48. lxml/includes/libexslt/exslt.h +108 -0
  49. lxml/includes/libexslt/exsltconfig.h +70 -0
  50. lxml/includes/libexslt/exsltexports.h +63 -0
  51. lxml/includes/libexslt/libexslt.h +29 -0
  52. lxml/includes/libxml/HTMLparser.h +320 -0
  53. lxml/includes/libxml/HTMLtree.h +147 -0
  54. lxml/includes/libxml/SAX.h +204 -0
  55. lxml/includes/libxml/SAX2.h +173 -0
  56. lxml/includes/libxml/__init__.py +0 -0
  57. lxml/includes/libxml/c14n.h +128 -0
  58. lxml/includes/libxml/catalog.h +182 -0
  59. lxml/includes/libxml/chvalid.h +230 -0
  60. lxml/includes/libxml/debugXML.h +217 -0
  61. lxml/includes/libxml/dict.h +81 -0
  62. lxml/includes/libxml/encoding.h +233 -0
  63. lxml/includes/libxml/entities.h +151 -0
  64. lxml/includes/libxml/globals.h +529 -0
  65. lxml/includes/libxml/hash.h +236 -0
  66. lxml/includes/libxml/list.h +137 -0
  67. lxml/includes/libxml/nanoftp.h +186 -0
  68. lxml/includes/libxml/nanohttp.h +81 -0
  69. lxml/includes/libxml/parser.h +1265 -0
  70. lxml/includes/libxml/parserInternals.h +662 -0
  71. lxml/includes/libxml/pattern.h +100 -0
  72. lxml/includes/libxml/relaxng.h +218 -0
  73. lxml/includes/libxml/schemasInternals.h +958 -0
  74. lxml/includes/libxml/schematron.h +142 -0
  75. lxml/includes/libxml/threads.h +94 -0
  76. lxml/includes/libxml/tree.h +1314 -0
  77. lxml/includes/libxml/uri.h +94 -0
  78. lxml/includes/libxml/valid.h +448 -0
  79. lxml/includes/libxml/xinclude.h +129 -0
  80. lxml/includes/libxml/xlink.h +189 -0
  81. lxml/includes/libxml/xmlIO.h +369 -0
  82. lxml/includes/libxml/xmlautomata.h +146 -0
  83. lxml/includes/libxml/xmlerror.h +919 -0
  84. lxml/includes/libxml/xmlexports.h +50 -0
  85. lxml/includes/libxml/xmlmemory.h +228 -0
  86. lxml/includes/libxml/xmlmodule.h +57 -0
  87. lxml/includes/libxml/xmlreader.h +428 -0
  88. lxml/includes/libxml/xmlregexp.h +222 -0
  89. lxml/includes/libxml/xmlsave.h +88 -0
  90. lxml/includes/libxml/xmlschemas.h +246 -0
  91. lxml/includes/libxml/xmlschemastypes.h +152 -0
  92. lxml/includes/libxml/xmlstring.h +140 -0
  93. lxml/includes/libxml/xmlunicode.h +202 -0
  94. lxml/includes/libxml/xmlversion.h +526 -0
  95. lxml/includes/libxml/xmlwriter.h +488 -0
  96. lxml/includes/libxml/xpath.h +575 -0
  97. lxml/includes/libxml/xpathInternals.h +632 -0
  98. lxml/includes/libxml/xpointer.h +137 -0
  99. lxml/includes/libxslt/__init__.py +0 -0
  100. lxml/includes/libxslt/attributes.h +39 -0
  101. lxml/includes/libxslt/documents.h +93 -0
  102. lxml/includes/libxslt/extensions.h +262 -0
  103. lxml/includes/libxslt/extra.h +72 -0
  104. lxml/includes/libxslt/functions.h +78 -0
  105. lxml/includes/libxslt/imports.h +75 -0
  106. lxml/includes/libxslt/keys.h +53 -0
  107. lxml/includes/libxslt/libxslt.h +36 -0
  108. lxml/includes/libxslt/namespaces.h +68 -0
  109. lxml/includes/libxslt/numbersInternals.h +73 -0
  110. lxml/includes/libxslt/preproc.h +43 -0
  111. lxml/includes/libxslt/security.h +104 -0
  112. lxml/includes/libxslt/templates.h +77 -0
  113. lxml/includes/libxslt/transform.h +207 -0
  114. lxml/includes/libxslt/trio.h +216 -0
  115. lxml/includes/libxslt/triodef.h +220 -0
  116. lxml/includes/libxslt/variables.h +118 -0
  117. lxml/includes/libxslt/win32config.h +51 -0
  118. lxml/includes/libxslt/xslt.h +110 -0
  119. lxml/includes/libxslt/xsltInternals.h +1992 -0
  120. lxml/includes/libxslt/xsltconfig.h +179 -0
  121. lxml/includes/libxslt/xsltexports.h +64 -0
  122. lxml/includes/libxslt/xsltlocale.h +44 -0
  123. lxml/includes/libxslt/xsltutils.h +343 -0
  124. lxml/includes/lxml-version.h +3 -0
  125. lxml/includes/relaxng.pxd +64 -0
  126. lxml/includes/schematron.pxd +34 -0
  127. lxml/includes/tree.pxd +492 -0
  128. lxml/includes/uri.pxd +5 -0
  129. lxml/includes/xinclude.pxd +22 -0
  130. lxml/includes/xmlerror.pxd +852 -0
  131. lxml/includes/xmlparser.pxd +303 -0
  132. lxml/includes/xmlschema.pxd +35 -0
  133. lxml/includes/xpath.pxd +136 -0
  134. lxml/includes/xslt.pxd +190 -0
  135. lxml/isoschematron/__init__.py +348 -0
  136. lxml/isoschematron/resources/rng/iso-schematron.rng +709 -0
  137. lxml/isoschematron/resources/xsl/RNG2Schtrn.xsl +75 -0
  138. lxml/isoschematron/resources/xsl/XSD2Schtrn.xsl +77 -0
  139. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_abstract_expand.xsl +313 -0
  140. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_dsdl_include.xsl +1160 -0
  141. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_message.xsl +55 -0
  142. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_skeleton_for_xslt1.xsl +1796 -0
  143. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_svrl_for_xslt1.xsl +588 -0
  144. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt +84 -0
  145. lxml/iterparse.pxi +438 -0
  146. lxml/lxml.etree.h +244 -0
  147. lxml/lxml.etree_api.h +204 -0
  148. lxml/nsclasses.pxi +281 -0
  149. lxml/objectify.cp310-win_arm64.pyd +0 -0
  150. lxml/objectify.pyx +2149 -0
  151. lxml/objectpath.pxi +332 -0
  152. lxml/parser.pxi +2059 -0
  153. lxml/parsertarget.pxi +180 -0
  154. lxml/proxy.pxi +619 -0
  155. lxml/public-api.pxi +178 -0
  156. lxml/pyclasslookup.py +3 -0
  157. lxml/readonlytree.pxi +565 -0
  158. lxml/relaxng.pxi +165 -0
  159. lxml/sax.cp310-win_arm64.pyd +0 -0
  160. lxml/sax.py +286 -0
  161. lxml/saxparser.pxi +875 -0
  162. lxml/schematron.pxi +173 -0
  163. lxml/serializer.pxi +1849 -0
  164. lxml/usedoctest.py +13 -0
  165. lxml/xinclude.pxi +67 -0
  166. lxml/xmlerror.pxi +1654 -0
  167. lxml/xmlid.pxi +179 -0
  168. lxml/xmlschema.pxi +215 -0
  169. lxml/xpath.pxi +487 -0
  170. lxml/xslt.pxi +957 -0
  171. lxml/xsltext.pxi +242 -0
  172. lxml-6.0.0.dist-info/METADATA +163 -0
  173. lxml-6.0.0.dist-info/RECORD +177 -0
  174. lxml-6.0.0.dist-info/WHEEL +5 -0
  175. lxml-6.0.0.dist-info/licenses/LICENSE.txt +31 -0
  176. lxml-6.0.0.dist-info/licenses/LICENSES.txt +29 -0
  177. lxml-6.0.0.dist-info/top_level.txt +1 -0
lxml/xpath.pxi ADDED
@@ -0,0 +1,487 @@
1
+ # XPath evaluation
2
+
3
+ class XPathSyntaxError(LxmlSyntaxError, XPathError):
4
+ pass
5
+
6
+ ################################################################################
7
+ # XPath
8
+
9
+ cdef object _XPATH_SYNTAX_ERRORS = (
10
+ xmlerror.XML_XPATH_NUMBER_ERROR,
11
+ xmlerror.XML_XPATH_UNFINISHED_LITERAL_ERROR,
12
+ xmlerror.XML_XPATH_VARIABLE_REF_ERROR,
13
+ xmlerror.XML_XPATH_INVALID_PREDICATE_ERROR,
14
+ xmlerror.XML_XPATH_UNCLOSED_ERROR,
15
+ xmlerror.XML_XPATH_INVALID_CHAR_ERROR
16
+ )
17
+
18
+ cdef object _XPATH_EVAL_ERRORS = (
19
+ xmlerror.XML_XPATH_UNDEF_VARIABLE_ERROR,
20
+ xmlerror.XML_XPATH_UNDEF_PREFIX_ERROR,
21
+ xmlerror.XML_XPATH_UNKNOWN_FUNC_ERROR,
22
+ xmlerror.XML_XPATH_INVALID_OPERAND,
23
+ xmlerror.XML_XPATH_INVALID_TYPE,
24
+ xmlerror.XML_XPATH_INVALID_ARITY,
25
+ xmlerror.XML_XPATH_INVALID_CTXT_SIZE,
26
+ xmlerror.XML_XPATH_INVALID_CTXT_POSITION
27
+ )
28
+
29
+ cdef int _register_xpath_function(void* ctxt, name_utf, ns_utf) noexcept:
30
+ if ns_utf is None:
31
+ return xpath.xmlXPathRegisterFunc(
32
+ <xpath.xmlXPathContext*>ctxt, _xcstr(name_utf),
33
+ _xpath_function_call)
34
+ else:
35
+ return xpath.xmlXPathRegisterFuncNS(
36
+ <xpath.xmlXPathContext*>ctxt, _xcstr(name_utf), _xcstr(ns_utf),
37
+ _xpath_function_call)
38
+
39
+ cdef int _unregister_xpath_function(void* ctxt, name_utf, ns_utf) noexcept:
40
+ if ns_utf is None:
41
+ return xpath.xmlXPathRegisterFunc(
42
+ <xpath.xmlXPathContext*>ctxt, _xcstr(name_utf), NULL)
43
+ else:
44
+ return xpath.xmlXPathRegisterFuncNS(
45
+ <xpath.xmlXPathContext*>ctxt, _xcstr(name_utf), _xcstr(ns_utf), NULL)
46
+
47
+
48
+ @cython.final
49
+ @cython.internal
50
+ cdef class _XPathContext(_BaseContext):
51
+ cdef object _variables
52
+ def __init__(self, namespaces, extensions, error_log, enable_regexp, variables,
53
+ build_smart_strings):
54
+ self._variables = variables
55
+ _BaseContext.__init__(self, namespaces, extensions, error_log, enable_regexp,
56
+ build_smart_strings)
57
+
58
+ cdef set_context(self, xpath.xmlXPathContext* xpathCtxt):
59
+ self._set_xpath_context(xpathCtxt)
60
+ # This would be a good place to set up the XPath parser dict, but
61
+ # we cannot use the current thread dict as we do not know which
62
+ # thread will execute the XPath evaluator - so, no dict for now.
63
+ self.registerLocalNamespaces()
64
+ self.registerLocalFunctions(xpathCtxt, _register_xpath_function)
65
+
66
+ cdef register_context(self, _Document doc):
67
+ self._register_context(doc)
68
+ self.registerGlobalNamespaces()
69
+ self.registerGlobalFunctions(self._xpathCtxt, _register_xpath_function)
70
+ self.registerExsltFunctions()
71
+ if self._variables is not None:
72
+ self.registerVariables(self._variables)
73
+
74
+ cdef unregister_context(self):
75
+ self.unregisterGlobalFunctions(
76
+ self._xpathCtxt, _unregister_xpath_function)
77
+ self.unregisterGlobalNamespaces()
78
+ xpath.xmlXPathRegisteredVariablesCleanup(self._xpathCtxt)
79
+ self._cleanup_context()
80
+
81
+ cdef void registerExsltFunctions(self) noexcept:
82
+ if xslt.LIBXSLT_VERSION < 10125:
83
+ # we'd only execute dummy functions anyway
84
+ return
85
+ tree.xmlHashScan(
86
+ self._xpathCtxt.nsHash, _registerExsltFunctionsForNamespaces,
87
+ self._xpathCtxt)
88
+
89
+ cdef registerVariables(self, variable_dict):
90
+ for name, value in variable_dict.items():
91
+ name_utf = self._to_utf(name)
92
+ xpath.xmlXPathRegisterVariable(
93
+ self._xpathCtxt, _xcstr(name_utf), _wrapXPathObject(value, None, None))
94
+
95
+ cdef registerVariable(self, name, value):
96
+ name_utf = self._to_utf(name)
97
+ xpath.xmlXPathRegisterVariable(
98
+ self._xpathCtxt, _xcstr(name_utf), _wrapXPathObject(value, None, None))
99
+
100
+
101
+ cdef void _registerExsltFunctionsForNamespaces(
102
+ void* _c_href, void* _ctxt, const_xmlChar* c_prefix) noexcept:
103
+ c_href = <const_xmlChar*> _c_href
104
+ ctxt = <xpath.xmlXPathContext*> _ctxt
105
+
106
+ if tree.xmlStrcmp(c_href, xslt.EXSLT_DATE_NAMESPACE) == 0:
107
+ xslt.exsltDateXpathCtxtRegister(ctxt, c_prefix)
108
+ elif tree.xmlStrcmp(c_href, xslt.EXSLT_SETS_NAMESPACE) == 0:
109
+ xslt.exsltSetsXpathCtxtRegister(ctxt, c_prefix)
110
+ elif tree.xmlStrcmp(c_href, xslt.EXSLT_MATH_NAMESPACE) == 0:
111
+ xslt.exsltMathXpathCtxtRegister(ctxt, c_prefix)
112
+ elif tree.xmlStrcmp(c_href, xslt.EXSLT_STRINGS_NAMESPACE) == 0:
113
+ xslt.exsltStrXpathCtxtRegister(ctxt, c_prefix)
114
+
115
+
116
+ cdef class _XPathEvaluatorBase:
117
+ cdef xpath.xmlXPathContext* _xpathCtxt
118
+ cdef _XPathContext _context
119
+ cdef python.PyThread_type_lock _eval_lock
120
+ cdef _ErrorLog _error_log
121
+ def __cinit__(self):
122
+ self._xpathCtxt = NULL
123
+ if config.ENABLE_THREADING:
124
+ self._eval_lock = python.PyThread_allocate_lock()
125
+ if self._eval_lock is NULL:
126
+ raise MemoryError()
127
+ self._error_log = _ErrorLog()
128
+
129
+ def __init__(self, namespaces, extensions, enable_regexp,
130
+ smart_strings):
131
+ self._context = _XPathContext(namespaces, extensions, self._error_log,
132
+ enable_regexp, None, smart_strings)
133
+
134
+ @property
135
+ def error_log(self):
136
+ assert self._error_log is not None, "XPath evaluator not initialised"
137
+ return self._error_log.copy()
138
+
139
+ def __dealloc__(self):
140
+ if self._xpathCtxt is not NULL:
141
+ xpath.xmlXPathFreeContext(self._xpathCtxt)
142
+ if config.ENABLE_THREADING:
143
+ if self._eval_lock is not NULL:
144
+ python.PyThread_free_lock(self._eval_lock)
145
+
146
+ cdef set_context(self, xpath.xmlXPathContext* xpathCtxt):
147
+ self._xpathCtxt = xpathCtxt
148
+ self._context.set_context(xpathCtxt)
149
+
150
+ cdef bint _checkAbsolutePath(self, char* path) noexcept:
151
+ cdef char c
152
+ if path is NULL:
153
+ return 0
154
+ c = path[0]
155
+ while c == c' ' or c == c'\t':
156
+ path = path + 1
157
+ c = path[0]
158
+ return c == c'/'
159
+
160
+ @cython.final
161
+ cdef int _lock(self) except -1:
162
+ cdef int result
163
+ if config.ENABLE_THREADING and self._eval_lock != NULL:
164
+ with nogil:
165
+ result = python.PyThread_acquire_lock(
166
+ self._eval_lock, python.WAIT_LOCK)
167
+ if result == 0:
168
+ raise XPathError, "XPath evaluator locking failed"
169
+ return 0
170
+
171
+ @cython.final
172
+ cdef void _unlock(self) noexcept:
173
+ if config.ENABLE_THREADING and self._eval_lock != NULL:
174
+ python.PyThread_release_lock(self._eval_lock)
175
+
176
+ cdef _build_parse_error(self):
177
+ cdef _BaseErrorLog entries
178
+ entries = self._error_log.filter_types(_XPATH_SYNTAX_ERRORS)
179
+ if entries:
180
+ message = entries._buildExceptionMessage(None)
181
+ if message is not None:
182
+ return XPathSyntaxError(message, self._error_log)
183
+ return XPathSyntaxError(
184
+ self._error_log._buildExceptionMessage("Error in xpath expression"),
185
+ self._error_log)
186
+
187
+ cdef _build_eval_error(self):
188
+ cdef _BaseErrorLog entries
189
+ entries = self._error_log.filter_types(_XPATH_EVAL_ERRORS)
190
+ if not entries:
191
+ entries = self._error_log.filter_types(_XPATH_SYNTAX_ERRORS)
192
+ if entries:
193
+ message = entries._buildExceptionMessage(None)
194
+ if message is not None:
195
+ return XPathEvalError(message, self._error_log)
196
+ return XPathEvalError(
197
+ self._error_log._buildExceptionMessage("Error in xpath expression"),
198
+ self._error_log)
199
+
200
+ cdef object _handle_result(self, xpath.xmlXPathObject* xpathObj, _Document doc):
201
+ if self._context._exc._has_raised():
202
+ if xpathObj is not NULL:
203
+ _freeXPathObject(xpathObj)
204
+ xpathObj = NULL
205
+ self._context._release_temp_refs()
206
+ self._context._exc._raise_if_stored()
207
+
208
+ if xpathObj is NULL:
209
+ self._context._release_temp_refs()
210
+ raise self._build_eval_error()
211
+
212
+ try:
213
+ result = _unwrapXPathObject(xpathObj, doc, self._context)
214
+ finally:
215
+ _freeXPathObject(xpathObj)
216
+ self._context._release_temp_refs()
217
+
218
+ return result
219
+
220
+
221
+ cdef class XPathElementEvaluator(_XPathEvaluatorBase):
222
+ """XPathElementEvaluator(self, element, namespaces=None, extensions=None, regexp=True, smart_strings=True)
223
+ Create an XPath evaluator for an element.
224
+
225
+ Absolute XPath expressions (starting with '/') will be evaluated against
226
+ the ElementTree as returned by getroottree().
227
+
228
+ Additional namespace declarations can be passed with the
229
+ 'namespace' keyword argument. EXSLT regular expression support
230
+ can be disabled with the 'regexp' boolean keyword (defaults to
231
+ True). Smart strings will be returned for string results unless
232
+ you pass ``smart_strings=False``.
233
+ """
234
+ cdef _Element _element
235
+ def __init__(self, _Element element not None, *, namespaces=None,
236
+ extensions=None, regexp=True, smart_strings=True):
237
+ cdef xpath.xmlXPathContext* xpathCtxt
238
+ cdef int ns_register_status
239
+ cdef _Document doc
240
+ _assertValidNode(element)
241
+ _assertValidDoc(element._doc)
242
+ self._element = element
243
+ doc = element._doc
244
+ _XPathEvaluatorBase.__init__(self, namespaces, extensions,
245
+ regexp, smart_strings)
246
+ xpathCtxt = xpath.xmlXPathNewContext(doc._c_doc)
247
+ if xpathCtxt is NULL:
248
+ raise MemoryError()
249
+ self.set_context(xpathCtxt)
250
+
251
+ def register_namespace(self, prefix, uri):
252
+ """Register a namespace with the XPath context.
253
+ """
254
+ assert self._xpathCtxt is not NULL, "XPath context not initialised"
255
+ self._context.addNamespace(prefix, uri)
256
+
257
+ def register_namespaces(self, namespaces):
258
+ """Register a prefix -> uri dict.
259
+ """
260
+ assert self._xpathCtxt is not NULL, "XPath context not initialised"
261
+ for prefix, uri in namespaces.items():
262
+ self._context.addNamespace(prefix, uri)
263
+
264
+ def __call__(self, _path, **_variables):
265
+ """__call__(self, _path, **_variables)
266
+
267
+ Evaluate an XPath expression on the document.
268
+
269
+ Variables may be provided as keyword arguments. Note that namespaces
270
+ are currently not supported for variables.
271
+
272
+ Absolute XPath expressions (starting with '/') will be evaluated
273
+ against the ElementTree as returned by getroottree().
274
+ """
275
+ cdef xpath.xmlXPathObject* xpathObj
276
+ cdef _Document doc
277
+ assert self._xpathCtxt is not NULL, "XPath context not initialised"
278
+ path = _utf8(_path)
279
+ doc = self._element._doc
280
+
281
+ self._lock()
282
+ self._xpathCtxt.node = self._element._c_node
283
+ try:
284
+ self._context.register_context(doc)
285
+ self._context.registerVariables(_variables)
286
+ c_path = _xcstr(path)
287
+ with nogil:
288
+ xpathObj = xpath.xmlXPathEvalExpression(
289
+ c_path, self._xpathCtxt)
290
+ result = self._handle_result(xpathObj, doc)
291
+ finally:
292
+ self._context.unregister_context()
293
+ self._unlock()
294
+
295
+ return result
296
+
297
+
298
+ cdef class XPathDocumentEvaluator(XPathElementEvaluator):
299
+ """XPathDocumentEvaluator(self, etree, namespaces=None, extensions=None, regexp=True, smart_strings=True)
300
+ Create an XPath evaluator for an ElementTree.
301
+
302
+ Additional namespace declarations can be passed with the
303
+ 'namespace' keyword argument. EXSLT regular expression support
304
+ can be disabled with the 'regexp' boolean keyword (defaults to
305
+ True). Smart strings will be returned for string results unless
306
+ you pass ``smart_strings=False``.
307
+ """
308
+ def __init__(self, _ElementTree etree not None, *, namespaces=None,
309
+ extensions=None, regexp=True, smart_strings=True):
310
+ XPathElementEvaluator.__init__(
311
+ self, etree._context_node, namespaces=namespaces,
312
+ extensions=extensions, regexp=regexp,
313
+ smart_strings=smart_strings)
314
+
315
+ def __call__(self, _path, **_variables):
316
+ """__call__(self, _path, **_variables)
317
+
318
+ Evaluate an XPath expression on the document.
319
+
320
+ Variables may be provided as keyword arguments. Note that namespaces
321
+ are currently not supported for variables.
322
+ """
323
+ cdef xpath.xmlXPathObject* xpathObj
324
+ cdef xmlDoc* c_doc
325
+ cdef _Document doc
326
+ assert self._xpathCtxt is not NULL, "XPath context not initialised"
327
+ path = _utf8(_path)
328
+ doc = self._element._doc
329
+
330
+ self._lock()
331
+ try:
332
+ self._context.register_context(doc)
333
+ c_doc = _fakeRootDoc(doc._c_doc, self._element._c_node)
334
+ try:
335
+ self._context.registerVariables(_variables)
336
+ c_path = _xcstr(path)
337
+ with nogil:
338
+ self._xpathCtxt.doc = c_doc
339
+ self._xpathCtxt.node = tree.xmlDocGetRootElement(c_doc)
340
+ xpathObj = xpath.xmlXPathEvalExpression(
341
+ c_path, self._xpathCtxt)
342
+ result = self._handle_result(xpathObj, doc)
343
+ finally:
344
+ _destroyFakeDoc(doc._c_doc, c_doc)
345
+ self._context.unregister_context()
346
+ finally:
347
+ self._unlock()
348
+
349
+ return result
350
+
351
+
352
+ def XPathEvaluator(etree_or_element, *, namespaces=None, extensions=None,
353
+ regexp=True, smart_strings=True):
354
+ """XPathEvaluator(etree_or_element, namespaces=None, extensions=None, regexp=True, smart_strings=True)
355
+
356
+ Creates an XPath evaluator for an ElementTree or an Element.
357
+
358
+ The resulting object can be called with an XPath expression as argument
359
+ and XPath variables provided as keyword arguments.
360
+
361
+ Additional namespace declarations can be passed with the
362
+ 'namespace' keyword argument. EXSLT regular expression support
363
+ can be disabled with the 'regexp' boolean keyword (defaults to
364
+ True). Smart strings will be returned for string results unless
365
+ you pass ``smart_strings=False``.
366
+ """
367
+ if isinstance(etree_or_element, _ElementTree):
368
+ return XPathDocumentEvaluator(
369
+ etree_or_element, namespaces=namespaces,
370
+ extensions=extensions, regexp=regexp, smart_strings=smart_strings)
371
+ else:
372
+ return XPathElementEvaluator(
373
+ etree_or_element, namespaces=namespaces,
374
+ extensions=extensions, regexp=regexp, smart_strings=smart_strings)
375
+
376
+
377
+ cdef class XPath(_XPathEvaluatorBase):
378
+ """XPath(self, path, namespaces=None, extensions=None, regexp=True, smart_strings=True)
379
+ A compiled XPath expression that can be called on Elements and ElementTrees.
380
+
381
+ Besides the XPath expression, you can pass prefix-namespace
382
+ mappings and extension functions to the constructor through the
383
+ keyword arguments ``namespaces`` and ``extensions``. EXSLT
384
+ regular expression support can be disabled with the 'regexp'
385
+ boolean keyword (defaults to True). Smart strings will be
386
+ returned for string results unless you pass
387
+ ``smart_strings=False``.
388
+ """
389
+ cdef xpath.xmlXPathCompExpr* _xpath
390
+ cdef bytes _path
391
+ def __cinit__(self):
392
+ self._xpath = NULL
393
+
394
+ def __init__(self, path, *, namespaces=None, extensions=None,
395
+ regexp=True, smart_strings=True):
396
+ cdef xpath.xmlXPathContext* xpathCtxt
397
+ _XPathEvaluatorBase.__init__(self, namespaces, extensions,
398
+ regexp, smart_strings)
399
+ self._path = _utf8(path)
400
+ xpathCtxt = xpath.xmlXPathNewContext(NULL)
401
+ if xpathCtxt is NULL:
402
+ raise MemoryError()
403
+ self.set_context(xpathCtxt)
404
+ self._xpath = xpath.xmlXPathCtxtCompile(xpathCtxt, _xcstr(self._path))
405
+ if self._xpath is NULL:
406
+ raise self._build_parse_error()
407
+
408
+ def __call__(self, _etree_or_element, **_variables):
409
+ "__call__(self, _etree_or_element, **_variables)"
410
+ cdef xpath.xmlXPathObject* xpathObj
411
+ cdef _Document document
412
+ cdef _Element element
413
+
414
+ assert self._xpathCtxt is not NULL, "XPath context not initialised"
415
+ document = _documentOrRaise(_etree_or_element)
416
+ element = _rootNodeOrRaise(_etree_or_element)
417
+
418
+ self._lock()
419
+ self._xpathCtxt.doc = document._c_doc
420
+ self._xpathCtxt.node = element._c_node
421
+
422
+ try:
423
+ self._context.register_context(document)
424
+ self._context.registerVariables(_variables)
425
+ with nogil:
426
+ xpathObj = xpath.xmlXPathCompiledEval(
427
+ self._xpath, self._xpathCtxt)
428
+ result = self._handle_result(xpathObj, document)
429
+ finally:
430
+ self._context.unregister_context()
431
+ self._unlock()
432
+ return result
433
+
434
+ @property
435
+ def path(self):
436
+ """The literal XPath expression.
437
+ """
438
+ return self._path.decode('UTF-8')
439
+
440
+ def __dealloc__(self):
441
+ if self._xpath is not NULL:
442
+ xpath.xmlXPathFreeCompExpr(self._xpath)
443
+
444
+ def __repr__(self):
445
+ return self.path
446
+
447
+
448
+ cdef object _replace_strings = re.compile(b'("[^"]*")|(\'[^\']*\')').sub
449
+ cdef object _find_namespaces = re.compile(b'({[^}]+})').findall
450
+
451
+ cdef class ETXPath(XPath):
452
+ """ETXPath(self, path, extensions=None, regexp=True, smart_strings=True)
453
+ Special XPath class that supports the ElementTree {uri} notation for namespaces.
454
+
455
+ Note that this class does not accept the ``namespace`` keyword
456
+ argument. All namespaces must be passed as part of the path
457
+ string. Smart strings will be returned for string results unless
458
+ you pass ``smart_strings=False``.
459
+ """
460
+ def __init__(self, path, *, extensions=None, regexp=True,
461
+ smart_strings=True):
462
+ path, namespaces = self._nsextract_path(path)
463
+ XPath.__init__(self, path, namespaces=namespaces,
464
+ extensions=extensions, regexp=regexp,
465
+ smart_strings=smart_strings)
466
+
467
+ cdef _nsextract_path(self, path):
468
+ # replace {namespaces} by new prefixes
469
+ cdef dict namespaces = {}
470
+ cdef list namespace_defs = []
471
+ cdef int i
472
+ path_utf = _utf8(path)
473
+ stripped_path = _replace_strings(b'', path_utf) # remove string literals
474
+ i = 1
475
+ for namespace_def in _find_namespaces(stripped_path):
476
+ if namespace_def not in namespace_defs:
477
+ prefix = python.PyBytes_FromFormat("__xpp%02d", i)
478
+ i += 1
479
+ namespace_defs.append(namespace_def)
480
+ namespace = namespace_def[1:-1] # remove '{}'
481
+ namespace = (<bytes>namespace).decode('utf8')
482
+ namespaces[prefix.decode('utf8')] = namespace
483
+ prefix_str = prefix + b':'
484
+ # FIXME: this also replaces {namespaces} within strings!
485
+ path_utf = path_utf.replace(namespace_def, prefix_str)
486
+ path = path_utf.decode('utf8')
487
+ return path, namespaces