lxml 5.2.0__cp310-cp310-win32.whl → 5.2.2__cp310-cp310-win32.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lxml/ElementInclude.py +244 -244
- lxml/__init__.py +22 -22
- lxml/_elementpath.cp310-win32.pyd +0 -0
- lxml/_elementpath.py +341 -341
- lxml/apihelpers.pxi +1793 -1793
- lxml/builder.cp310-win32.pyd +0 -0
- lxml/builder.py +232 -232
- lxml/classlookup.pxi +580 -580
- lxml/cleanup.pxi +215 -215
- lxml/cssselect.py +101 -101
- lxml/debug.pxi +90 -90
- lxml/docloader.pxi +178 -178
- lxml/doctestcompare.py +488 -488
- lxml/dtd.pxi +478 -478
- lxml/etree.cp310-win32.pyd +0 -0
- lxml/etree.h +6 -6
- lxml/etree.pyx +3732 -3711
- lxml/extensions.pxi +833 -833
- lxml/html/ElementSoup.py +10 -10
- lxml/html/__init__.py +1923 -1923
- lxml/html/_diffcommand.py +86 -86
- lxml/html/_html5builder.py +100 -100
- lxml/html/_setmixin.py +56 -56
- lxml/html/builder.py +133 -133
- lxml/html/clean.py +21 -21
- lxml/html/defs.py +135 -135
- lxml/html/diff.cp310-win32.pyd +0 -0
- lxml/html/diff.py +878 -878
- lxml/html/formfill.py +299 -299
- lxml/html/html5parser.py +260 -260
- lxml/html/soupparser.py +314 -314
- lxml/html/usedoctest.py +13 -13
- lxml/includes/c14n.pxd +25 -25
- lxml/includes/config.pxd +3 -3
- lxml/includes/dtdvalid.pxd +18 -18
- lxml/includes/etree_defs.h +379 -379
- lxml/includes/etreepublic.pxd +237 -237
- lxml/includes/htmlparser.pxd +56 -56
- lxml/includes/lxml-version.h +1 -1
- lxml/includes/relaxng.pxd +64 -64
- lxml/includes/schematron.pxd +34 -34
- lxml/includes/tree.pxd +494 -494
- lxml/includes/uri.pxd +5 -5
- lxml/includes/xinclude.pxd +22 -22
- lxml/includes/xmlerror.pxd +852 -852
- lxml/includes/xmlparser.pxd +265 -265
- lxml/includes/xmlschema.pxd +35 -35
- lxml/includes/xpath.pxd +136 -136
- lxml/includes/xslt.pxd +190 -190
- lxml/isoschematron/__init__.py +348 -348
- lxml/isoschematron/resources/rng/iso-schematron.rng +709 -709
- lxml/isoschematron/resources/xsl/RNG2Schtrn.xsl +75 -75
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_abstract_expand.xsl +312 -312
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_dsdl_include.xsl +1159 -1159
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_message.xsl +54 -54
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_skeleton_for_xslt1.xsl +1796 -1796
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_svrl_for_xslt1.xsl +588 -588
- lxml/iterparse.pxi +438 -438
- lxml/lxml.etree.h +6 -6
- lxml/nsclasses.pxi +281 -281
- lxml/objectify.cp310-win32.pyd +0 -0
- lxml/objectify.pyx +2145 -2145
- lxml/objectpath.pxi +332 -332
- lxml/parser.pxi +1994 -1994
- lxml/parsertarget.pxi +180 -180
- lxml/proxy.pxi +619 -619
- lxml/public-api.pxi +178 -178
- lxml/pyclasslookup.py +3 -3
- lxml/readonlytree.pxi +565 -565
- lxml/relaxng.pxi +165 -165
- lxml/sax.cp310-win32.pyd +0 -0
- lxml/sax.py +275 -275
- lxml/saxparser.pxi +875 -875
- lxml/schematron.pxi +168 -168
- lxml/serializer.pxi +1871 -1871
- lxml/usedoctest.py +13 -13
- lxml/xinclude.pxi +67 -67
- lxml/xmlerror.pxi +1654 -1654
- lxml/xmlid.pxi +179 -179
- lxml/xmlschema.pxi +215 -215
- lxml/xpath.pxi +487 -487
- lxml/xslt.pxi +950 -950
- lxml/xsltext.pxi +242 -242
- {lxml-5.2.0.dist-info → lxml-5.2.2.dist-info}/LICENSE.txt +29 -29
- {lxml-5.2.0.dist-info → lxml-5.2.2.dist-info}/LICENSES.txt +29 -29
- {lxml-5.2.0.dist-info → lxml-5.2.2.dist-info}/METADATA +9 -17
- {lxml-5.2.0.dist-info → lxml-5.2.2.dist-info}/RECORD +89 -89
- {lxml-5.2.0.dist-info → lxml-5.2.2.dist-info}/WHEEL +0 -0
- {lxml-5.2.0.dist-info → lxml-5.2.2.dist-info}/top_level.txt +0 -0
lxml/_elementpath.py
CHANGED
@@ -1,341 +1,341 @@
|
|
1
|
-
# cython: language_level=2
|
2
|
-
|
3
|
-
#
|
4
|
-
# ElementTree
|
5
|
-
# $Id: ElementPath.py 3375 2008-02-13 08:05:08Z fredrik $
|
6
|
-
#
|
7
|
-
# limited xpath support for element trees
|
8
|
-
#
|
9
|
-
# history:
|
10
|
-
# 2003-05-23 fl created
|
11
|
-
# 2003-05-28 fl added support for // etc
|
12
|
-
# 2003-08-27 fl fixed parsing of periods in element names
|
13
|
-
# 2007-09-10 fl new selection engine
|
14
|
-
# 2007-09-12 fl fixed parent selector
|
15
|
-
# 2007-09-13 fl added iterfind; changed findall to return a list
|
16
|
-
# 2007-11-30 fl added namespaces support
|
17
|
-
# 2009-10-30 fl added child element value filter
|
18
|
-
#
|
19
|
-
# Copyright (c) 2003-2009 by Fredrik Lundh. All rights reserved.
|
20
|
-
#
|
21
|
-
# fredrik@pythonware.com
|
22
|
-
# http://www.pythonware.com
|
23
|
-
#
|
24
|
-
# --------------------------------------------------------------------
|
25
|
-
# The ElementTree toolkit is
|
26
|
-
#
|
27
|
-
# Copyright (c) 1999-2009 by Fredrik Lundh
|
28
|
-
#
|
29
|
-
# By obtaining, using, and/or copying this software and/or its
|
30
|
-
# associated documentation, you agree that you have read, understood,
|
31
|
-
# and will comply with the following terms and conditions:
|
32
|
-
#
|
33
|
-
# Permission to use, copy, modify, and distribute this software and
|
34
|
-
# its associated documentation for any purpose and without fee is
|
35
|
-
# hereby granted, provided that the above copyright notice appears in
|
36
|
-
# all copies, and that both that copyright notice and this permission
|
37
|
-
# notice appear in supporting documentation, and that the name of
|
38
|
-
# Secret Labs AB or the author not be used in advertising or publicity
|
39
|
-
# pertaining to distribution of the software without specific, written
|
40
|
-
# prior permission.
|
41
|
-
#
|
42
|
-
# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
|
43
|
-
# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
|
44
|
-
# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
|
45
|
-
# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
|
46
|
-
# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
47
|
-
# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
48
|
-
# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
49
|
-
# OF THIS SOFTWARE.
|
50
|
-
# --------------------------------------------------------------------
|
51
|
-
|
52
|
-
##
|
53
|
-
# Implementation module for XPath support. There's usually no reason
|
54
|
-
# to import this module directly; the <b>ElementTree</b> does this for
|
55
|
-
# you, if needed.
|
56
|
-
##
|
57
|
-
|
58
|
-
|
59
|
-
import re
|
60
|
-
|
61
|
-
xpath_tokenizer_re = re.compile(
|
62
|
-
"("
|
63
|
-
"'[^']*'|\"[^\"]*\"|"
|
64
|
-
"::|"
|
65
|
-
"//?|"
|
66
|
-
r"\.\.|"
|
67
|
-
r"\(\)|"
|
68
|
-
r"[/.*:\[\]\(\)@=])|"
|
69
|
-
r"((?:\{[^}]+\})?[^/\[\]\(\)@=\s]+)|"
|
70
|
-
r"\s+"
|
71
|
-
)
|
72
|
-
|
73
|
-
def xpath_tokenizer(pattern, namespaces=None, with_prefixes=True):
|
74
|
-
# ElementTree uses '', lxml used None originally.
|
75
|
-
default_namespace = (namespaces.get(None) or namespaces.get('')) if namespaces else None
|
76
|
-
parsing_attribute = False
|
77
|
-
for token in xpath_tokenizer_re.findall(pattern):
|
78
|
-
ttype, tag = token
|
79
|
-
if tag and tag[0] != "{":
|
80
|
-
if ":" in tag and with_prefixes:
|
81
|
-
prefix, uri = tag.split(":", 1)
|
82
|
-
try:
|
83
|
-
if not namespaces:
|
84
|
-
raise KeyError
|
85
|
-
yield ttype, "{%s}%s" % (namespaces[prefix], uri)
|
86
|
-
except KeyError:
|
87
|
-
raise SyntaxError("prefix %r not found in prefix map" % prefix)
|
88
|
-
elif default_namespace and not parsing_attribute:
|
89
|
-
yield ttype, "{%s}%s" % (default_namespace, tag)
|
90
|
-
else:
|
91
|
-
yield token
|
92
|
-
parsing_attribute = False
|
93
|
-
else:
|
94
|
-
yield token
|
95
|
-
parsing_attribute = ttype == '@'
|
96
|
-
|
97
|
-
|
98
|
-
def prepare_child(next, token):
|
99
|
-
tag = token[1]
|
100
|
-
def select(result):
|
101
|
-
for elem in result:
|
102
|
-
yield from elem.iterchildren(tag)
|
103
|
-
return select
|
104
|
-
|
105
|
-
def prepare_star(next, token):
|
106
|
-
def select(result):
|
107
|
-
for elem in result:
|
108
|
-
yield from elem.iterchildren('*')
|
109
|
-
return select
|
110
|
-
|
111
|
-
def prepare_self(next, token):
|
112
|
-
def select(result):
|
113
|
-
return result
|
114
|
-
return select
|
115
|
-
|
116
|
-
def prepare_descendant(next, token):
|
117
|
-
token = next()
|
118
|
-
if token[0] == "*":
|
119
|
-
tag = "*"
|
120
|
-
elif not token[0]:
|
121
|
-
tag = token[1]
|
122
|
-
else:
|
123
|
-
raise SyntaxError("invalid descendant")
|
124
|
-
def select(result):
|
125
|
-
for elem in result:
|
126
|
-
yield from elem.iterdescendants(tag)
|
127
|
-
return select
|
128
|
-
|
129
|
-
def prepare_parent(next, token):
|
130
|
-
def select(result):
|
131
|
-
for elem in result:
|
132
|
-
parent = elem.getparent()
|
133
|
-
if parent is not None:
|
134
|
-
yield parent
|
135
|
-
return select
|
136
|
-
|
137
|
-
def prepare_predicate(next, token):
|
138
|
-
# FIXME: replace with real parser!!! refs:
|
139
|
-
# http://effbot.org/zone/simple-iterator-parser.htm
|
140
|
-
# http://javascript.crockford.com/tdop/tdop.html
|
141
|
-
signature = ''
|
142
|
-
predicate = []
|
143
|
-
while 1:
|
144
|
-
token = next()
|
145
|
-
if token[0] == "]":
|
146
|
-
break
|
147
|
-
if token == ('', ''):
|
148
|
-
# ignore whitespace
|
149
|
-
continue
|
150
|
-
if token[0] and token[0][:1] in "'\"":
|
151
|
-
token = "'", token[0][1:-1]
|
152
|
-
signature += token[0] or "-"
|
153
|
-
predicate.append(token[1])
|
154
|
-
|
155
|
-
# use signature to determine predicate type
|
156
|
-
if signature == "@-":
|
157
|
-
# [@attribute] predicate
|
158
|
-
key = predicate[1]
|
159
|
-
def select(result):
|
160
|
-
for elem in result:
|
161
|
-
if elem.get(key) is not None:
|
162
|
-
yield elem
|
163
|
-
return select
|
164
|
-
if signature == "@-='":
|
165
|
-
# [@attribute='value']
|
166
|
-
key = predicate[1]
|
167
|
-
value = predicate[-1]
|
168
|
-
def select(result):
|
169
|
-
for elem in result:
|
170
|
-
if elem.get(key) == value:
|
171
|
-
yield elem
|
172
|
-
return select
|
173
|
-
if signature == "-" and not re.match(r"-?\d+$", predicate[0]):
|
174
|
-
# [tag]
|
175
|
-
tag = predicate[0]
|
176
|
-
def select(result):
|
177
|
-
for elem in result:
|
178
|
-
for _ in elem.iterchildren(tag):
|
179
|
-
yield elem
|
180
|
-
break
|
181
|
-
return select
|
182
|
-
if signature == ".='" or (signature == "-='" and not re.match(r"-?\d+$", predicate[0])):
|
183
|
-
# [.='value'] or [tag='value']
|
184
|
-
tag = predicate[0]
|
185
|
-
value = predicate[-1]
|
186
|
-
if tag:
|
187
|
-
def select(result):
|
188
|
-
for elem in result:
|
189
|
-
for e in elem.iterchildren(tag):
|
190
|
-
if "".join(e.itertext()) == value:
|
191
|
-
yield elem
|
192
|
-
break
|
193
|
-
else:
|
194
|
-
def select(result):
|
195
|
-
for elem in result:
|
196
|
-
if "".join(elem.itertext()) == value:
|
197
|
-
yield elem
|
198
|
-
return select
|
199
|
-
if signature == "-" or signature == "-()" or signature == "-()-":
|
200
|
-
# [index] or [last()] or [last()-index]
|
201
|
-
if signature == "-":
|
202
|
-
# [index]
|
203
|
-
index = int(predicate[0]) - 1
|
204
|
-
if index < 0:
|
205
|
-
if index == -1:
|
206
|
-
raise SyntaxError(
|
207
|
-
"indices in path predicates are 1-based, not 0-based")
|
208
|
-
else:
|
209
|
-
raise SyntaxError("path index >= 1 expected")
|
210
|
-
else:
|
211
|
-
if predicate[0] != "last":
|
212
|
-
raise SyntaxError("unsupported function")
|
213
|
-
if signature == "-()-":
|
214
|
-
try:
|
215
|
-
index = int(predicate[2]) - 1
|
216
|
-
except ValueError:
|
217
|
-
raise SyntaxError("unsupported expression")
|
218
|
-
else:
|
219
|
-
index = -1
|
220
|
-
def select(result):
|
221
|
-
for elem in result:
|
222
|
-
parent = elem.getparent()
|
223
|
-
if parent is None:
|
224
|
-
continue
|
225
|
-
try:
|
226
|
-
# FIXME: what if the selector is "*" ?
|
227
|
-
elems = list(parent.iterchildren(elem.tag))
|
228
|
-
if elems[index] is elem:
|
229
|
-
yield elem
|
230
|
-
except IndexError:
|
231
|
-
pass
|
232
|
-
return select
|
233
|
-
raise SyntaxError("invalid predicate")
|
234
|
-
|
235
|
-
ops = {
|
236
|
-
"": prepare_child,
|
237
|
-
"*": prepare_star,
|
238
|
-
".": prepare_self,
|
239
|
-
"..": prepare_parent,
|
240
|
-
"//": prepare_descendant,
|
241
|
-
"[": prepare_predicate,
|
242
|
-
}
|
243
|
-
|
244
|
-
|
245
|
-
# --------------------------------------------------------------------
|
246
|
-
|
247
|
-
_cache = {}
|
248
|
-
|
249
|
-
|
250
|
-
def _build_path_iterator(path, namespaces, with_prefixes=True):
|
251
|
-
"""compile selector pattern"""
|
252
|
-
if path[-1:] == "/":
|
253
|
-
path += "*" # implicit all (FIXME: keep this?)
|
254
|
-
|
255
|
-
cache_key = (path,)
|
256
|
-
if namespaces:
|
257
|
-
# lxml originally used None for the default namespace but ElementTree uses the
|
258
|
-
# more convenient (all-strings-dict) empty string, so we support both here,
|
259
|
-
# preferring the more convenient '', as long as they aren't ambiguous.
|
260
|
-
if None in namespaces:
|
261
|
-
if '' in namespaces and namespaces[None] != namespaces['']:
|
262
|
-
raise ValueError("Ambiguous default namespace provided: %r versus %r" % (
|
263
|
-
namespaces[None], namespaces['']))
|
264
|
-
cache_key += (namespaces[None],) + tuple(sorted(
|
265
|
-
item for item in namespaces.items() if item[0] is not None))
|
266
|
-
else:
|
267
|
-
cache_key += tuple(sorted(namespaces.items()))
|
268
|
-
|
269
|
-
try:
|
270
|
-
return _cache[cache_key]
|
271
|
-
except KeyError:
|
272
|
-
pass
|
273
|
-
if len(_cache) > 100:
|
274
|
-
_cache.clear()
|
275
|
-
|
276
|
-
if path[:1] == "/":
|
277
|
-
raise SyntaxError("cannot use absolute path on element")
|
278
|
-
stream = iter(xpath_tokenizer(path, namespaces, with_prefixes=with_prefixes))
|
279
|
-
try:
|
280
|
-
_next = stream.next
|
281
|
-
except AttributeError:
|
282
|
-
# Python 3
|
283
|
-
_next = stream.__next__
|
284
|
-
try:
|
285
|
-
token = _next()
|
286
|
-
except StopIteration:
|
287
|
-
raise SyntaxError("empty path expression")
|
288
|
-
selector = []
|
289
|
-
while 1:
|
290
|
-
try:
|
291
|
-
selector.append(ops[token[0]](_next, token))
|
292
|
-
except StopIteration:
|
293
|
-
raise SyntaxError("invalid path")
|
294
|
-
try:
|
295
|
-
token = _next()
|
296
|
-
if token[0] == "/":
|
297
|
-
token = _next()
|
298
|
-
except StopIteration:
|
299
|
-
break
|
300
|
-
_cache[cache_key] = selector
|
301
|
-
return selector
|
302
|
-
|
303
|
-
|
304
|
-
##
|
305
|
-
# Iterate over the matching nodes
|
306
|
-
|
307
|
-
def iterfind(elem, path, namespaces=None, with_prefixes=True):
|
308
|
-
selector = _build_path_iterator(path, namespaces, with_prefixes=with_prefixes)
|
309
|
-
result = iter((elem,))
|
310
|
-
for select in selector:
|
311
|
-
result = select(result)
|
312
|
-
return result
|
313
|
-
|
314
|
-
|
315
|
-
##
|
316
|
-
# Find first matching object.
|
317
|
-
|
318
|
-
def find(elem, path, namespaces=None, with_prefixes=True):
|
319
|
-
it = iterfind(elem, path, namespaces, with_prefixes=with_prefixes)
|
320
|
-
try:
|
321
|
-
return next(it)
|
322
|
-
except StopIteration:
|
323
|
-
return None
|
324
|
-
|
325
|
-
|
326
|
-
##
|
327
|
-
# Find all matching objects.
|
328
|
-
|
329
|
-
def findall(elem, path, namespaces=None, with_prefixes=True):
|
330
|
-
return list(iterfind(elem, path, namespaces))
|
331
|
-
|
332
|
-
|
333
|
-
##
|
334
|
-
# Find text for first matching object.
|
335
|
-
|
336
|
-
def findtext(elem, path, default=None, namespaces=None, with_prefixes=True):
|
337
|
-
el = find(elem, path, namespaces, with_prefixes=with_prefixes)
|
338
|
-
if el is None:
|
339
|
-
return default
|
340
|
-
else:
|
341
|
-
return el.text or ''
|
1
|
+
# cython: language_level=2
|
2
|
+
|
3
|
+
#
|
4
|
+
# ElementTree
|
5
|
+
# $Id: ElementPath.py 3375 2008-02-13 08:05:08Z fredrik $
|
6
|
+
#
|
7
|
+
# limited xpath support for element trees
|
8
|
+
#
|
9
|
+
# history:
|
10
|
+
# 2003-05-23 fl created
|
11
|
+
# 2003-05-28 fl added support for // etc
|
12
|
+
# 2003-08-27 fl fixed parsing of periods in element names
|
13
|
+
# 2007-09-10 fl new selection engine
|
14
|
+
# 2007-09-12 fl fixed parent selector
|
15
|
+
# 2007-09-13 fl added iterfind; changed findall to return a list
|
16
|
+
# 2007-11-30 fl added namespaces support
|
17
|
+
# 2009-10-30 fl added child element value filter
|
18
|
+
#
|
19
|
+
# Copyright (c) 2003-2009 by Fredrik Lundh. All rights reserved.
|
20
|
+
#
|
21
|
+
# fredrik@pythonware.com
|
22
|
+
# http://www.pythonware.com
|
23
|
+
#
|
24
|
+
# --------------------------------------------------------------------
|
25
|
+
# The ElementTree toolkit is
|
26
|
+
#
|
27
|
+
# Copyright (c) 1999-2009 by Fredrik Lundh
|
28
|
+
#
|
29
|
+
# By obtaining, using, and/or copying this software and/or its
|
30
|
+
# associated documentation, you agree that you have read, understood,
|
31
|
+
# and will comply with the following terms and conditions:
|
32
|
+
#
|
33
|
+
# Permission to use, copy, modify, and distribute this software and
|
34
|
+
# its associated documentation for any purpose and without fee is
|
35
|
+
# hereby granted, provided that the above copyright notice appears in
|
36
|
+
# all copies, and that both that copyright notice and this permission
|
37
|
+
# notice appear in supporting documentation, and that the name of
|
38
|
+
# Secret Labs AB or the author not be used in advertising or publicity
|
39
|
+
# pertaining to distribution of the software without specific, written
|
40
|
+
# prior permission.
|
41
|
+
#
|
42
|
+
# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
|
43
|
+
# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
|
44
|
+
# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
|
45
|
+
# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
|
46
|
+
# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
47
|
+
# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
48
|
+
# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
49
|
+
# OF THIS SOFTWARE.
|
50
|
+
# --------------------------------------------------------------------
|
51
|
+
|
52
|
+
##
|
53
|
+
# Implementation module for XPath support. There's usually no reason
|
54
|
+
# to import this module directly; the <b>ElementTree</b> does this for
|
55
|
+
# you, if needed.
|
56
|
+
##
|
57
|
+
|
58
|
+
|
59
|
+
import re
|
60
|
+
|
61
|
+
xpath_tokenizer_re = re.compile(
|
62
|
+
"("
|
63
|
+
"'[^']*'|\"[^\"]*\"|"
|
64
|
+
"::|"
|
65
|
+
"//?|"
|
66
|
+
r"\.\.|"
|
67
|
+
r"\(\)|"
|
68
|
+
r"[/.*:\[\]\(\)@=])|"
|
69
|
+
r"((?:\{[^}]+\})?[^/\[\]\(\)@=\s]+)|"
|
70
|
+
r"\s+"
|
71
|
+
)
|
72
|
+
|
73
|
+
def xpath_tokenizer(pattern, namespaces=None, with_prefixes=True):
|
74
|
+
# ElementTree uses '', lxml used None originally.
|
75
|
+
default_namespace = (namespaces.get(None) or namespaces.get('')) if namespaces else None
|
76
|
+
parsing_attribute = False
|
77
|
+
for token in xpath_tokenizer_re.findall(pattern):
|
78
|
+
ttype, tag = token
|
79
|
+
if tag and tag[0] != "{":
|
80
|
+
if ":" in tag and with_prefixes:
|
81
|
+
prefix, uri = tag.split(":", 1)
|
82
|
+
try:
|
83
|
+
if not namespaces:
|
84
|
+
raise KeyError
|
85
|
+
yield ttype, "{%s}%s" % (namespaces[prefix], uri)
|
86
|
+
except KeyError:
|
87
|
+
raise SyntaxError("prefix %r not found in prefix map" % prefix)
|
88
|
+
elif default_namespace and not parsing_attribute:
|
89
|
+
yield ttype, "{%s}%s" % (default_namespace, tag)
|
90
|
+
else:
|
91
|
+
yield token
|
92
|
+
parsing_attribute = False
|
93
|
+
else:
|
94
|
+
yield token
|
95
|
+
parsing_attribute = ttype == '@'
|
96
|
+
|
97
|
+
|
98
|
+
def prepare_child(next, token):
|
99
|
+
tag = token[1]
|
100
|
+
def select(result):
|
101
|
+
for elem in result:
|
102
|
+
yield from elem.iterchildren(tag)
|
103
|
+
return select
|
104
|
+
|
105
|
+
def prepare_star(next, token):
|
106
|
+
def select(result):
|
107
|
+
for elem in result:
|
108
|
+
yield from elem.iterchildren('*')
|
109
|
+
return select
|
110
|
+
|
111
|
+
def prepare_self(next, token):
|
112
|
+
def select(result):
|
113
|
+
return result
|
114
|
+
return select
|
115
|
+
|
116
|
+
def prepare_descendant(next, token):
|
117
|
+
token = next()
|
118
|
+
if token[0] == "*":
|
119
|
+
tag = "*"
|
120
|
+
elif not token[0]:
|
121
|
+
tag = token[1]
|
122
|
+
else:
|
123
|
+
raise SyntaxError("invalid descendant")
|
124
|
+
def select(result):
|
125
|
+
for elem in result:
|
126
|
+
yield from elem.iterdescendants(tag)
|
127
|
+
return select
|
128
|
+
|
129
|
+
def prepare_parent(next, token):
|
130
|
+
def select(result):
|
131
|
+
for elem in result:
|
132
|
+
parent = elem.getparent()
|
133
|
+
if parent is not None:
|
134
|
+
yield parent
|
135
|
+
return select
|
136
|
+
|
137
|
+
def prepare_predicate(next, token):
|
138
|
+
# FIXME: replace with real parser!!! refs:
|
139
|
+
# http://effbot.org/zone/simple-iterator-parser.htm
|
140
|
+
# http://javascript.crockford.com/tdop/tdop.html
|
141
|
+
signature = ''
|
142
|
+
predicate = []
|
143
|
+
while 1:
|
144
|
+
token = next()
|
145
|
+
if token[0] == "]":
|
146
|
+
break
|
147
|
+
if token == ('', ''):
|
148
|
+
# ignore whitespace
|
149
|
+
continue
|
150
|
+
if token[0] and token[0][:1] in "'\"":
|
151
|
+
token = "'", token[0][1:-1]
|
152
|
+
signature += token[0] or "-"
|
153
|
+
predicate.append(token[1])
|
154
|
+
|
155
|
+
# use signature to determine predicate type
|
156
|
+
if signature == "@-":
|
157
|
+
# [@attribute] predicate
|
158
|
+
key = predicate[1]
|
159
|
+
def select(result):
|
160
|
+
for elem in result:
|
161
|
+
if elem.get(key) is not None:
|
162
|
+
yield elem
|
163
|
+
return select
|
164
|
+
if signature == "@-='":
|
165
|
+
# [@attribute='value']
|
166
|
+
key = predicate[1]
|
167
|
+
value = predicate[-1]
|
168
|
+
def select(result):
|
169
|
+
for elem in result:
|
170
|
+
if elem.get(key) == value:
|
171
|
+
yield elem
|
172
|
+
return select
|
173
|
+
if signature == "-" and not re.match(r"-?\d+$", predicate[0]):
|
174
|
+
# [tag]
|
175
|
+
tag = predicate[0]
|
176
|
+
def select(result):
|
177
|
+
for elem in result:
|
178
|
+
for _ in elem.iterchildren(tag):
|
179
|
+
yield elem
|
180
|
+
break
|
181
|
+
return select
|
182
|
+
if signature == ".='" or (signature == "-='" and not re.match(r"-?\d+$", predicate[0])):
|
183
|
+
# [.='value'] or [tag='value']
|
184
|
+
tag = predicate[0]
|
185
|
+
value = predicate[-1]
|
186
|
+
if tag:
|
187
|
+
def select(result):
|
188
|
+
for elem in result:
|
189
|
+
for e in elem.iterchildren(tag):
|
190
|
+
if "".join(e.itertext()) == value:
|
191
|
+
yield elem
|
192
|
+
break
|
193
|
+
else:
|
194
|
+
def select(result):
|
195
|
+
for elem in result:
|
196
|
+
if "".join(elem.itertext()) == value:
|
197
|
+
yield elem
|
198
|
+
return select
|
199
|
+
if signature == "-" or signature == "-()" or signature == "-()-":
|
200
|
+
# [index] or [last()] or [last()-index]
|
201
|
+
if signature == "-":
|
202
|
+
# [index]
|
203
|
+
index = int(predicate[0]) - 1
|
204
|
+
if index < 0:
|
205
|
+
if index == -1:
|
206
|
+
raise SyntaxError(
|
207
|
+
"indices in path predicates are 1-based, not 0-based")
|
208
|
+
else:
|
209
|
+
raise SyntaxError("path index >= 1 expected")
|
210
|
+
else:
|
211
|
+
if predicate[0] != "last":
|
212
|
+
raise SyntaxError("unsupported function")
|
213
|
+
if signature == "-()-":
|
214
|
+
try:
|
215
|
+
index = int(predicate[2]) - 1
|
216
|
+
except ValueError:
|
217
|
+
raise SyntaxError("unsupported expression")
|
218
|
+
else:
|
219
|
+
index = -1
|
220
|
+
def select(result):
|
221
|
+
for elem in result:
|
222
|
+
parent = elem.getparent()
|
223
|
+
if parent is None:
|
224
|
+
continue
|
225
|
+
try:
|
226
|
+
# FIXME: what if the selector is "*" ?
|
227
|
+
elems = list(parent.iterchildren(elem.tag))
|
228
|
+
if elems[index] is elem:
|
229
|
+
yield elem
|
230
|
+
except IndexError:
|
231
|
+
pass
|
232
|
+
return select
|
233
|
+
raise SyntaxError("invalid predicate")
|
234
|
+
|
235
|
+
ops = {
|
236
|
+
"": prepare_child,
|
237
|
+
"*": prepare_star,
|
238
|
+
".": prepare_self,
|
239
|
+
"..": prepare_parent,
|
240
|
+
"//": prepare_descendant,
|
241
|
+
"[": prepare_predicate,
|
242
|
+
}
|
243
|
+
|
244
|
+
|
245
|
+
# --------------------------------------------------------------------
|
246
|
+
|
247
|
+
_cache = {}
|
248
|
+
|
249
|
+
|
250
|
+
def _build_path_iterator(path, namespaces, with_prefixes=True):
|
251
|
+
"""compile selector pattern"""
|
252
|
+
if path[-1:] == "/":
|
253
|
+
path += "*" # implicit all (FIXME: keep this?)
|
254
|
+
|
255
|
+
cache_key = (path,)
|
256
|
+
if namespaces:
|
257
|
+
# lxml originally used None for the default namespace but ElementTree uses the
|
258
|
+
# more convenient (all-strings-dict) empty string, so we support both here,
|
259
|
+
# preferring the more convenient '', as long as they aren't ambiguous.
|
260
|
+
if None in namespaces:
|
261
|
+
if '' in namespaces and namespaces[None] != namespaces['']:
|
262
|
+
raise ValueError("Ambiguous default namespace provided: %r versus %r" % (
|
263
|
+
namespaces[None], namespaces['']))
|
264
|
+
cache_key += (namespaces[None],) + tuple(sorted(
|
265
|
+
item for item in namespaces.items() if item[0] is not None))
|
266
|
+
else:
|
267
|
+
cache_key += tuple(sorted(namespaces.items()))
|
268
|
+
|
269
|
+
try:
|
270
|
+
return _cache[cache_key]
|
271
|
+
except KeyError:
|
272
|
+
pass
|
273
|
+
if len(_cache) > 100:
|
274
|
+
_cache.clear()
|
275
|
+
|
276
|
+
if path[:1] == "/":
|
277
|
+
raise SyntaxError("cannot use absolute path on element")
|
278
|
+
stream = iter(xpath_tokenizer(path, namespaces, with_prefixes=with_prefixes))
|
279
|
+
try:
|
280
|
+
_next = stream.next
|
281
|
+
except AttributeError:
|
282
|
+
# Python 3
|
283
|
+
_next = stream.__next__
|
284
|
+
try:
|
285
|
+
token = _next()
|
286
|
+
except StopIteration:
|
287
|
+
raise SyntaxError("empty path expression")
|
288
|
+
selector = []
|
289
|
+
while 1:
|
290
|
+
try:
|
291
|
+
selector.append(ops[token[0]](_next, token))
|
292
|
+
except StopIteration:
|
293
|
+
raise SyntaxError("invalid path")
|
294
|
+
try:
|
295
|
+
token = _next()
|
296
|
+
if token[0] == "/":
|
297
|
+
token = _next()
|
298
|
+
except StopIteration:
|
299
|
+
break
|
300
|
+
_cache[cache_key] = selector
|
301
|
+
return selector
|
302
|
+
|
303
|
+
|
304
|
+
##
|
305
|
+
# Iterate over the matching nodes
|
306
|
+
|
307
|
+
def iterfind(elem, path, namespaces=None, with_prefixes=True):
|
308
|
+
selector = _build_path_iterator(path, namespaces, with_prefixes=with_prefixes)
|
309
|
+
result = iter((elem,))
|
310
|
+
for select in selector:
|
311
|
+
result = select(result)
|
312
|
+
return result
|
313
|
+
|
314
|
+
|
315
|
+
##
|
316
|
+
# Find first matching object.
|
317
|
+
|
318
|
+
def find(elem, path, namespaces=None, with_prefixes=True):
|
319
|
+
it = iterfind(elem, path, namespaces, with_prefixes=with_prefixes)
|
320
|
+
try:
|
321
|
+
return next(it)
|
322
|
+
except StopIteration:
|
323
|
+
return None
|
324
|
+
|
325
|
+
|
326
|
+
##
|
327
|
+
# Find all matching objects.
|
328
|
+
|
329
|
+
def findall(elem, path, namespaces=None, with_prefixes=True):
|
330
|
+
return list(iterfind(elem, path, namespaces))
|
331
|
+
|
332
|
+
|
333
|
+
##
|
334
|
+
# Find text for first matching object.
|
335
|
+
|
336
|
+
def findtext(elem, path, default=None, namespaces=None, with_prefixes=True):
|
337
|
+
el = find(elem, path, namespaces, with_prefixes=with_prefixes)
|
338
|
+
if el is None:
|
339
|
+
return default
|
340
|
+
else:
|
341
|
+
return el.text or ''
|