lxml 5.2.0__cp310-cp310-win32.whl → 5.2.2__cp310-cp310-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. lxml/ElementInclude.py +244 -244
  2. lxml/__init__.py +22 -22
  3. lxml/_elementpath.cp310-win32.pyd +0 -0
  4. lxml/_elementpath.py +341 -341
  5. lxml/apihelpers.pxi +1793 -1793
  6. lxml/builder.cp310-win32.pyd +0 -0
  7. lxml/builder.py +232 -232
  8. lxml/classlookup.pxi +580 -580
  9. lxml/cleanup.pxi +215 -215
  10. lxml/cssselect.py +101 -101
  11. lxml/debug.pxi +90 -90
  12. lxml/docloader.pxi +178 -178
  13. lxml/doctestcompare.py +488 -488
  14. lxml/dtd.pxi +478 -478
  15. lxml/etree.cp310-win32.pyd +0 -0
  16. lxml/etree.h +6 -6
  17. lxml/etree.pyx +3732 -3711
  18. lxml/extensions.pxi +833 -833
  19. lxml/html/ElementSoup.py +10 -10
  20. lxml/html/__init__.py +1923 -1923
  21. lxml/html/_diffcommand.py +86 -86
  22. lxml/html/_html5builder.py +100 -100
  23. lxml/html/_setmixin.py +56 -56
  24. lxml/html/builder.py +133 -133
  25. lxml/html/clean.py +21 -21
  26. lxml/html/defs.py +135 -135
  27. lxml/html/diff.cp310-win32.pyd +0 -0
  28. lxml/html/diff.py +878 -878
  29. lxml/html/formfill.py +299 -299
  30. lxml/html/html5parser.py +260 -260
  31. lxml/html/soupparser.py +314 -314
  32. lxml/html/usedoctest.py +13 -13
  33. lxml/includes/c14n.pxd +25 -25
  34. lxml/includes/config.pxd +3 -3
  35. lxml/includes/dtdvalid.pxd +18 -18
  36. lxml/includes/etree_defs.h +379 -379
  37. lxml/includes/etreepublic.pxd +237 -237
  38. lxml/includes/htmlparser.pxd +56 -56
  39. lxml/includes/lxml-version.h +1 -1
  40. lxml/includes/relaxng.pxd +64 -64
  41. lxml/includes/schematron.pxd +34 -34
  42. lxml/includes/tree.pxd +494 -494
  43. lxml/includes/uri.pxd +5 -5
  44. lxml/includes/xinclude.pxd +22 -22
  45. lxml/includes/xmlerror.pxd +852 -852
  46. lxml/includes/xmlparser.pxd +265 -265
  47. lxml/includes/xmlschema.pxd +35 -35
  48. lxml/includes/xpath.pxd +136 -136
  49. lxml/includes/xslt.pxd +190 -190
  50. lxml/isoschematron/__init__.py +348 -348
  51. lxml/isoschematron/resources/rng/iso-schematron.rng +709 -709
  52. lxml/isoschematron/resources/xsl/RNG2Schtrn.xsl +75 -75
  53. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_abstract_expand.xsl +312 -312
  54. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_dsdl_include.xsl +1159 -1159
  55. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_message.xsl +54 -54
  56. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_skeleton_for_xslt1.xsl +1796 -1796
  57. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_svrl_for_xslt1.xsl +588 -588
  58. lxml/iterparse.pxi +438 -438
  59. lxml/lxml.etree.h +6 -6
  60. lxml/nsclasses.pxi +281 -281
  61. lxml/objectify.cp310-win32.pyd +0 -0
  62. lxml/objectify.pyx +2145 -2145
  63. lxml/objectpath.pxi +332 -332
  64. lxml/parser.pxi +1994 -1994
  65. lxml/parsertarget.pxi +180 -180
  66. lxml/proxy.pxi +619 -619
  67. lxml/public-api.pxi +178 -178
  68. lxml/pyclasslookup.py +3 -3
  69. lxml/readonlytree.pxi +565 -565
  70. lxml/relaxng.pxi +165 -165
  71. lxml/sax.cp310-win32.pyd +0 -0
  72. lxml/sax.py +275 -275
  73. lxml/saxparser.pxi +875 -875
  74. lxml/schematron.pxi +168 -168
  75. lxml/serializer.pxi +1871 -1871
  76. lxml/usedoctest.py +13 -13
  77. lxml/xinclude.pxi +67 -67
  78. lxml/xmlerror.pxi +1654 -1654
  79. lxml/xmlid.pxi +179 -179
  80. lxml/xmlschema.pxi +215 -215
  81. lxml/xpath.pxi +487 -487
  82. lxml/xslt.pxi +950 -950
  83. lxml/xsltext.pxi +242 -242
  84. {lxml-5.2.0.dist-info → lxml-5.2.2.dist-info}/LICENSE.txt +29 -29
  85. {lxml-5.2.0.dist-info → lxml-5.2.2.dist-info}/LICENSES.txt +29 -29
  86. {lxml-5.2.0.dist-info → lxml-5.2.2.dist-info}/METADATA +9 -17
  87. {lxml-5.2.0.dist-info → lxml-5.2.2.dist-info}/RECORD +89 -89
  88. {lxml-5.2.0.dist-info → lxml-5.2.2.dist-info}/WHEEL +0 -0
  89. {lxml-5.2.0.dist-info → lxml-5.2.2.dist-info}/top_level.txt +0 -0
lxml/_elementpath.py CHANGED
@@ -1,341 +1,341 @@
1
- # cython: language_level=2
2
-
3
- #
4
- # ElementTree
5
- # $Id: ElementPath.py 3375 2008-02-13 08:05:08Z fredrik $
6
- #
7
- # limited xpath support for element trees
8
- #
9
- # history:
10
- # 2003-05-23 fl created
11
- # 2003-05-28 fl added support for // etc
12
- # 2003-08-27 fl fixed parsing of periods in element names
13
- # 2007-09-10 fl new selection engine
14
- # 2007-09-12 fl fixed parent selector
15
- # 2007-09-13 fl added iterfind; changed findall to return a list
16
- # 2007-11-30 fl added namespaces support
17
- # 2009-10-30 fl added child element value filter
18
- #
19
- # Copyright (c) 2003-2009 by Fredrik Lundh. All rights reserved.
20
- #
21
- # fredrik@pythonware.com
22
- # http://www.pythonware.com
23
- #
24
- # --------------------------------------------------------------------
25
- # The ElementTree toolkit is
26
- #
27
- # Copyright (c) 1999-2009 by Fredrik Lundh
28
- #
29
- # By obtaining, using, and/or copying this software and/or its
30
- # associated documentation, you agree that you have read, understood,
31
- # and will comply with the following terms and conditions:
32
- #
33
- # Permission to use, copy, modify, and distribute this software and
34
- # its associated documentation for any purpose and without fee is
35
- # hereby granted, provided that the above copyright notice appears in
36
- # all copies, and that both that copyright notice and this permission
37
- # notice appear in supporting documentation, and that the name of
38
- # Secret Labs AB or the author not be used in advertising or publicity
39
- # pertaining to distribution of the software without specific, written
40
- # prior permission.
41
- #
42
- # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
43
- # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
44
- # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
45
- # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
46
- # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
47
- # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
48
- # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
49
- # OF THIS SOFTWARE.
50
- # --------------------------------------------------------------------
51
-
52
- ##
53
- # Implementation module for XPath support. There's usually no reason
54
- # to import this module directly; the <b>ElementTree</b> does this for
55
- # you, if needed.
56
- ##
57
-
58
-
59
- import re
60
-
61
- xpath_tokenizer_re = re.compile(
62
- "("
63
- "'[^']*'|\"[^\"]*\"|"
64
- "::|"
65
- "//?|"
66
- r"\.\.|"
67
- r"\(\)|"
68
- r"[/.*:\[\]\(\)@=])|"
69
- r"((?:\{[^}]+\})?[^/\[\]\(\)@=\s]+)|"
70
- r"\s+"
71
- )
72
-
73
- def xpath_tokenizer(pattern, namespaces=None, with_prefixes=True):
74
- # ElementTree uses '', lxml used None originally.
75
- default_namespace = (namespaces.get(None) or namespaces.get('')) if namespaces else None
76
- parsing_attribute = False
77
- for token in xpath_tokenizer_re.findall(pattern):
78
- ttype, tag = token
79
- if tag and tag[0] != "{":
80
- if ":" in tag and with_prefixes:
81
- prefix, uri = tag.split(":", 1)
82
- try:
83
- if not namespaces:
84
- raise KeyError
85
- yield ttype, "{%s}%s" % (namespaces[prefix], uri)
86
- except KeyError:
87
- raise SyntaxError("prefix %r not found in prefix map" % prefix)
88
- elif default_namespace and not parsing_attribute:
89
- yield ttype, "{%s}%s" % (default_namespace, tag)
90
- else:
91
- yield token
92
- parsing_attribute = False
93
- else:
94
- yield token
95
- parsing_attribute = ttype == '@'
96
-
97
-
98
- def prepare_child(next, token):
99
- tag = token[1]
100
- def select(result):
101
- for elem in result:
102
- yield from elem.iterchildren(tag)
103
- return select
104
-
105
- def prepare_star(next, token):
106
- def select(result):
107
- for elem in result:
108
- yield from elem.iterchildren('*')
109
- return select
110
-
111
- def prepare_self(next, token):
112
- def select(result):
113
- return result
114
- return select
115
-
116
- def prepare_descendant(next, token):
117
- token = next()
118
- if token[0] == "*":
119
- tag = "*"
120
- elif not token[0]:
121
- tag = token[1]
122
- else:
123
- raise SyntaxError("invalid descendant")
124
- def select(result):
125
- for elem in result:
126
- yield from elem.iterdescendants(tag)
127
- return select
128
-
129
- def prepare_parent(next, token):
130
- def select(result):
131
- for elem in result:
132
- parent = elem.getparent()
133
- if parent is not None:
134
- yield parent
135
- return select
136
-
137
- def prepare_predicate(next, token):
138
- # FIXME: replace with real parser!!! refs:
139
- # http://effbot.org/zone/simple-iterator-parser.htm
140
- # http://javascript.crockford.com/tdop/tdop.html
141
- signature = ''
142
- predicate = []
143
- while 1:
144
- token = next()
145
- if token[0] == "]":
146
- break
147
- if token == ('', ''):
148
- # ignore whitespace
149
- continue
150
- if token[0] and token[0][:1] in "'\"":
151
- token = "'", token[0][1:-1]
152
- signature += token[0] or "-"
153
- predicate.append(token[1])
154
-
155
- # use signature to determine predicate type
156
- if signature == "@-":
157
- # [@attribute] predicate
158
- key = predicate[1]
159
- def select(result):
160
- for elem in result:
161
- if elem.get(key) is not None:
162
- yield elem
163
- return select
164
- if signature == "@-='":
165
- # [@attribute='value']
166
- key = predicate[1]
167
- value = predicate[-1]
168
- def select(result):
169
- for elem in result:
170
- if elem.get(key) == value:
171
- yield elem
172
- return select
173
- if signature == "-" and not re.match(r"-?\d+$", predicate[0]):
174
- # [tag]
175
- tag = predicate[0]
176
- def select(result):
177
- for elem in result:
178
- for _ in elem.iterchildren(tag):
179
- yield elem
180
- break
181
- return select
182
- if signature == ".='" or (signature == "-='" and not re.match(r"-?\d+$", predicate[0])):
183
- # [.='value'] or [tag='value']
184
- tag = predicate[0]
185
- value = predicate[-1]
186
- if tag:
187
- def select(result):
188
- for elem in result:
189
- for e in elem.iterchildren(tag):
190
- if "".join(e.itertext()) == value:
191
- yield elem
192
- break
193
- else:
194
- def select(result):
195
- for elem in result:
196
- if "".join(elem.itertext()) == value:
197
- yield elem
198
- return select
199
- if signature == "-" or signature == "-()" or signature == "-()-":
200
- # [index] or [last()] or [last()-index]
201
- if signature == "-":
202
- # [index]
203
- index = int(predicate[0]) - 1
204
- if index < 0:
205
- if index == -1:
206
- raise SyntaxError(
207
- "indices in path predicates are 1-based, not 0-based")
208
- else:
209
- raise SyntaxError("path index >= 1 expected")
210
- else:
211
- if predicate[0] != "last":
212
- raise SyntaxError("unsupported function")
213
- if signature == "-()-":
214
- try:
215
- index = int(predicate[2]) - 1
216
- except ValueError:
217
- raise SyntaxError("unsupported expression")
218
- else:
219
- index = -1
220
- def select(result):
221
- for elem in result:
222
- parent = elem.getparent()
223
- if parent is None:
224
- continue
225
- try:
226
- # FIXME: what if the selector is "*" ?
227
- elems = list(parent.iterchildren(elem.tag))
228
- if elems[index] is elem:
229
- yield elem
230
- except IndexError:
231
- pass
232
- return select
233
- raise SyntaxError("invalid predicate")
234
-
235
- ops = {
236
- "": prepare_child,
237
- "*": prepare_star,
238
- ".": prepare_self,
239
- "..": prepare_parent,
240
- "//": prepare_descendant,
241
- "[": prepare_predicate,
242
- }
243
-
244
-
245
- # --------------------------------------------------------------------
246
-
247
- _cache = {}
248
-
249
-
250
- def _build_path_iterator(path, namespaces, with_prefixes=True):
251
- """compile selector pattern"""
252
- if path[-1:] == "/":
253
- path += "*" # implicit all (FIXME: keep this?)
254
-
255
- cache_key = (path,)
256
- if namespaces:
257
- # lxml originally used None for the default namespace but ElementTree uses the
258
- # more convenient (all-strings-dict) empty string, so we support both here,
259
- # preferring the more convenient '', as long as they aren't ambiguous.
260
- if None in namespaces:
261
- if '' in namespaces and namespaces[None] != namespaces['']:
262
- raise ValueError("Ambiguous default namespace provided: %r versus %r" % (
263
- namespaces[None], namespaces['']))
264
- cache_key += (namespaces[None],) + tuple(sorted(
265
- item for item in namespaces.items() if item[0] is not None))
266
- else:
267
- cache_key += tuple(sorted(namespaces.items()))
268
-
269
- try:
270
- return _cache[cache_key]
271
- except KeyError:
272
- pass
273
- if len(_cache) > 100:
274
- _cache.clear()
275
-
276
- if path[:1] == "/":
277
- raise SyntaxError("cannot use absolute path on element")
278
- stream = iter(xpath_tokenizer(path, namespaces, with_prefixes=with_prefixes))
279
- try:
280
- _next = stream.next
281
- except AttributeError:
282
- # Python 3
283
- _next = stream.__next__
284
- try:
285
- token = _next()
286
- except StopIteration:
287
- raise SyntaxError("empty path expression")
288
- selector = []
289
- while 1:
290
- try:
291
- selector.append(ops[token[0]](_next, token))
292
- except StopIteration:
293
- raise SyntaxError("invalid path")
294
- try:
295
- token = _next()
296
- if token[0] == "/":
297
- token = _next()
298
- except StopIteration:
299
- break
300
- _cache[cache_key] = selector
301
- return selector
302
-
303
-
304
- ##
305
- # Iterate over the matching nodes
306
-
307
- def iterfind(elem, path, namespaces=None, with_prefixes=True):
308
- selector = _build_path_iterator(path, namespaces, with_prefixes=with_prefixes)
309
- result = iter((elem,))
310
- for select in selector:
311
- result = select(result)
312
- return result
313
-
314
-
315
- ##
316
- # Find first matching object.
317
-
318
- def find(elem, path, namespaces=None, with_prefixes=True):
319
- it = iterfind(elem, path, namespaces, with_prefixes=with_prefixes)
320
- try:
321
- return next(it)
322
- except StopIteration:
323
- return None
324
-
325
-
326
- ##
327
- # Find all matching objects.
328
-
329
- def findall(elem, path, namespaces=None, with_prefixes=True):
330
- return list(iterfind(elem, path, namespaces))
331
-
332
-
333
- ##
334
- # Find text for first matching object.
335
-
336
- def findtext(elem, path, default=None, namespaces=None, with_prefixes=True):
337
- el = find(elem, path, namespaces, with_prefixes=with_prefixes)
338
- if el is None:
339
- return default
340
- else:
341
- return el.text or ''
1
+ # cython: language_level=2
2
+
3
+ #
4
+ # ElementTree
5
+ # $Id: ElementPath.py 3375 2008-02-13 08:05:08Z fredrik $
6
+ #
7
+ # limited xpath support for element trees
8
+ #
9
+ # history:
10
+ # 2003-05-23 fl created
11
+ # 2003-05-28 fl added support for // etc
12
+ # 2003-08-27 fl fixed parsing of periods in element names
13
+ # 2007-09-10 fl new selection engine
14
+ # 2007-09-12 fl fixed parent selector
15
+ # 2007-09-13 fl added iterfind; changed findall to return a list
16
+ # 2007-11-30 fl added namespaces support
17
+ # 2009-10-30 fl added child element value filter
18
+ #
19
+ # Copyright (c) 2003-2009 by Fredrik Lundh. All rights reserved.
20
+ #
21
+ # fredrik@pythonware.com
22
+ # http://www.pythonware.com
23
+ #
24
+ # --------------------------------------------------------------------
25
+ # The ElementTree toolkit is
26
+ #
27
+ # Copyright (c) 1999-2009 by Fredrik Lundh
28
+ #
29
+ # By obtaining, using, and/or copying this software and/or its
30
+ # associated documentation, you agree that you have read, understood,
31
+ # and will comply with the following terms and conditions:
32
+ #
33
+ # Permission to use, copy, modify, and distribute this software and
34
+ # its associated documentation for any purpose and without fee is
35
+ # hereby granted, provided that the above copyright notice appears in
36
+ # all copies, and that both that copyright notice and this permission
37
+ # notice appear in supporting documentation, and that the name of
38
+ # Secret Labs AB or the author not be used in advertising or publicity
39
+ # pertaining to distribution of the software without specific, written
40
+ # prior permission.
41
+ #
42
+ # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
43
+ # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
44
+ # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
45
+ # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
46
+ # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
47
+ # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
48
+ # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
49
+ # OF THIS SOFTWARE.
50
+ # --------------------------------------------------------------------
51
+
52
+ ##
53
+ # Implementation module for XPath support. There's usually no reason
54
+ # to import this module directly; the <b>ElementTree</b> does this for
55
+ # you, if needed.
56
+ ##
57
+
58
+
59
+ import re
60
+
61
+ xpath_tokenizer_re = re.compile(
62
+ "("
63
+ "'[^']*'|\"[^\"]*\"|"
64
+ "::|"
65
+ "//?|"
66
+ r"\.\.|"
67
+ r"\(\)|"
68
+ r"[/.*:\[\]\(\)@=])|"
69
+ r"((?:\{[^}]+\})?[^/\[\]\(\)@=\s]+)|"
70
+ r"\s+"
71
+ )
72
+
73
+ def xpath_tokenizer(pattern, namespaces=None, with_prefixes=True):
74
+ # ElementTree uses '', lxml used None originally.
75
+ default_namespace = (namespaces.get(None) or namespaces.get('')) if namespaces else None
76
+ parsing_attribute = False
77
+ for token in xpath_tokenizer_re.findall(pattern):
78
+ ttype, tag = token
79
+ if tag and tag[0] != "{":
80
+ if ":" in tag and with_prefixes:
81
+ prefix, uri = tag.split(":", 1)
82
+ try:
83
+ if not namespaces:
84
+ raise KeyError
85
+ yield ttype, "{%s}%s" % (namespaces[prefix], uri)
86
+ except KeyError:
87
+ raise SyntaxError("prefix %r not found in prefix map" % prefix)
88
+ elif default_namespace and not parsing_attribute:
89
+ yield ttype, "{%s}%s" % (default_namespace, tag)
90
+ else:
91
+ yield token
92
+ parsing_attribute = False
93
+ else:
94
+ yield token
95
+ parsing_attribute = ttype == '@'
96
+
97
+
98
+ def prepare_child(next, token):
99
+ tag = token[1]
100
+ def select(result):
101
+ for elem in result:
102
+ yield from elem.iterchildren(tag)
103
+ return select
104
+
105
+ def prepare_star(next, token):
106
+ def select(result):
107
+ for elem in result:
108
+ yield from elem.iterchildren('*')
109
+ return select
110
+
111
+ def prepare_self(next, token):
112
+ def select(result):
113
+ return result
114
+ return select
115
+
116
+ def prepare_descendant(next, token):
117
+ token = next()
118
+ if token[0] == "*":
119
+ tag = "*"
120
+ elif not token[0]:
121
+ tag = token[1]
122
+ else:
123
+ raise SyntaxError("invalid descendant")
124
+ def select(result):
125
+ for elem in result:
126
+ yield from elem.iterdescendants(tag)
127
+ return select
128
+
129
+ def prepare_parent(next, token):
130
+ def select(result):
131
+ for elem in result:
132
+ parent = elem.getparent()
133
+ if parent is not None:
134
+ yield parent
135
+ return select
136
+
137
+ def prepare_predicate(next, token):
138
+ # FIXME: replace with real parser!!! refs:
139
+ # http://effbot.org/zone/simple-iterator-parser.htm
140
+ # http://javascript.crockford.com/tdop/tdop.html
141
+ signature = ''
142
+ predicate = []
143
+ while 1:
144
+ token = next()
145
+ if token[0] == "]":
146
+ break
147
+ if token == ('', ''):
148
+ # ignore whitespace
149
+ continue
150
+ if token[0] and token[0][:1] in "'\"":
151
+ token = "'", token[0][1:-1]
152
+ signature += token[0] or "-"
153
+ predicate.append(token[1])
154
+
155
+ # use signature to determine predicate type
156
+ if signature == "@-":
157
+ # [@attribute] predicate
158
+ key = predicate[1]
159
+ def select(result):
160
+ for elem in result:
161
+ if elem.get(key) is not None:
162
+ yield elem
163
+ return select
164
+ if signature == "@-='":
165
+ # [@attribute='value']
166
+ key = predicate[1]
167
+ value = predicate[-1]
168
+ def select(result):
169
+ for elem in result:
170
+ if elem.get(key) == value:
171
+ yield elem
172
+ return select
173
+ if signature == "-" and not re.match(r"-?\d+$", predicate[0]):
174
+ # [tag]
175
+ tag = predicate[0]
176
+ def select(result):
177
+ for elem in result:
178
+ for _ in elem.iterchildren(tag):
179
+ yield elem
180
+ break
181
+ return select
182
+ if signature == ".='" or (signature == "-='" and not re.match(r"-?\d+$", predicate[0])):
183
+ # [.='value'] or [tag='value']
184
+ tag = predicate[0]
185
+ value = predicate[-1]
186
+ if tag:
187
+ def select(result):
188
+ for elem in result:
189
+ for e in elem.iterchildren(tag):
190
+ if "".join(e.itertext()) == value:
191
+ yield elem
192
+ break
193
+ else:
194
+ def select(result):
195
+ for elem in result:
196
+ if "".join(elem.itertext()) == value:
197
+ yield elem
198
+ return select
199
+ if signature == "-" or signature == "-()" or signature == "-()-":
200
+ # [index] or [last()] or [last()-index]
201
+ if signature == "-":
202
+ # [index]
203
+ index = int(predicate[0]) - 1
204
+ if index < 0:
205
+ if index == -1:
206
+ raise SyntaxError(
207
+ "indices in path predicates are 1-based, not 0-based")
208
+ else:
209
+ raise SyntaxError("path index >= 1 expected")
210
+ else:
211
+ if predicate[0] != "last":
212
+ raise SyntaxError("unsupported function")
213
+ if signature == "-()-":
214
+ try:
215
+ index = int(predicate[2]) - 1
216
+ except ValueError:
217
+ raise SyntaxError("unsupported expression")
218
+ else:
219
+ index = -1
220
+ def select(result):
221
+ for elem in result:
222
+ parent = elem.getparent()
223
+ if parent is None:
224
+ continue
225
+ try:
226
+ # FIXME: what if the selector is "*" ?
227
+ elems = list(parent.iterchildren(elem.tag))
228
+ if elems[index] is elem:
229
+ yield elem
230
+ except IndexError:
231
+ pass
232
+ return select
233
+ raise SyntaxError("invalid predicate")
234
+
235
+ ops = {
236
+ "": prepare_child,
237
+ "*": prepare_star,
238
+ ".": prepare_self,
239
+ "..": prepare_parent,
240
+ "//": prepare_descendant,
241
+ "[": prepare_predicate,
242
+ }
243
+
244
+
245
+ # --------------------------------------------------------------------
246
+
247
+ _cache = {}
248
+
249
+
250
+ def _build_path_iterator(path, namespaces, with_prefixes=True):
251
+ """compile selector pattern"""
252
+ if path[-1:] == "/":
253
+ path += "*" # implicit all (FIXME: keep this?)
254
+
255
+ cache_key = (path,)
256
+ if namespaces:
257
+ # lxml originally used None for the default namespace but ElementTree uses the
258
+ # more convenient (all-strings-dict) empty string, so we support both here,
259
+ # preferring the more convenient '', as long as they aren't ambiguous.
260
+ if None in namespaces:
261
+ if '' in namespaces and namespaces[None] != namespaces['']:
262
+ raise ValueError("Ambiguous default namespace provided: %r versus %r" % (
263
+ namespaces[None], namespaces['']))
264
+ cache_key += (namespaces[None],) + tuple(sorted(
265
+ item for item in namespaces.items() if item[0] is not None))
266
+ else:
267
+ cache_key += tuple(sorted(namespaces.items()))
268
+
269
+ try:
270
+ return _cache[cache_key]
271
+ except KeyError:
272
+ pass
273
+ if len(_cache) > 100:
274
+ _cache.clear()
275
+
276
+ if path[:1] == "/":
277
+ raise SyntaxError("cannot use absolute path on element")
278
+ stream = iter(xpath_tokenizer(path, namespaces, with_prefixes=with_prefixes))
279
+ try:
280
+ _next = stream.next
281
+ except AttributeError:
282
+ # Python 3
283
+ _next = stream.__next__
284
+ try:
285
+ token = _next()
286
+ except StopIteration:
287
+ raise SyntaxError("empty path expression")
288
+ selector = []
289
+ while 1:
290
+ try:
291
+ selector.append(ops[token[0]](_next, token))
292
+ except StopIteration:
293
+ raise SyntaxError("invalid path")
294
+ try:
295
+ token = _next()
296
+ if token[0] == "/":
297
+ token = _next()
298
+ except StopIteration:
299
+ break
300
+ _cache[cache_key] = selector
301
+ return selector
302
+
303
+
304
+ ##
305
+ # Iterate over the matching nodes
306
+
307
+ def iterfind(elem, path, namespaces=None, with_prefixes=True):
308
+ selector = _build_path_iterator(path, namespaces, with_prefixes=with_prefixes)
309
+ result = iter((elem,))
310
+ for select in selector:
311
+ result = select(result)
312
+ return result
313
+
314
+
315
+ ##
316
+ # Find first matching object.
317
+
318
+ def find(elem, path, namespaces=None, with_prefixes=True):
319
+ it = iterfind(elem, path, namespaces, with_prefixes=with_prefixes)
320
+ try:
321
+ return next(it)
322
+ except StopIteration:
323
+ return None
324
+
325
+
326
+ ##
327
+ # Find all matching objects.
328
+
329
+ def findall(elem, path, namespaces=None, with_prefixes=True):
330
+ return list(iterfind(elem, path, namespaces))
331
+
332
+
333
+ ##
334
+ # Find text for first matching object.
335
+
336
+ def findtext(elem, path, default=None, namespaces=None, with_prefixes=True):
337
+ el = find(elem, path, namespaces, with_prefixes=with_prefixes)
338
+ if el is None:
339
+ return default
340
+ else:
341
+ return el.text or ''