lxml 6.0.0__cp310-cp310-musllinux_1_2_armv7l.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- lxml/ElementInclude.py +244 -0
- lxml/__init__.py +22 -0
- lxml/_elementpath.cpython-310-arm-linux-gnueabihf.so +0 -0
- lxml/_elementpath.py +343 -0
- lxml/apihelpers.pxi +1801 -0
- lxml/builder.cpython-310-arm-linux-gnueabihf.so +0 -0
- lxml/builder.py +243 -0
- lxml/classlookup.pxi +580 -0
- lxml/cleanup.pxi +215 -0
- lxml/cssselect.py +101 -0
- lxml/debug.pxi +36 -0
- lxml/docloader.pxi +178 -0
- lxml/doctestcompare.py +488 -0
- lxml/dtd.pxi +479 -0
- lxml/etree.cpython-310-arm-linux-gnueabihf.so +0 -0
- lxml/etree.h +244 -0
- lxml/etree.pyx +3853 -0
- lxml/etree_api.h +204 -0
- lxml/extensions.pxi +830 -0
- lxml/html/ElementSoup.py +10 -0
- lxml/html/__init__.py +1927 -0
- lxml/html/_diffcommand.py +86 -0
- lxml/html/_difflib.cpython-310-arm-linux-gnueabihf.so +0 -0
- lxml/html/_difflib.py +2106 -0
- lxml/html/_html5builder.py +100 -0
- lxml/html/_setmixin.py +56 -0
- lxml/html/builder.py +173 -0
- lxml/html/clean.py +21 -0
- lxml/html/defs.py +135 -0
- lxml/html/diff.cpython-310-arm-linux-gnueabihf.so +0 -0
- lxml/html/diff.py +972 -0
- lxml/html/formfill.py +299 -0
- lxml/html/html5parser.py +260 -0
- lxml/html/soupparser.py +314 -0
- lxml/html/usedoctest.py +13 -0
- lxml/includes/__init__.pxd +0 -0
- lxml/includes/__init__.py +0 -0
- lxml/includes/c14n.pxd +25 -0
- lxml/includes/config.pxd +3 -0
- lxml/includes/dtdvalid.pxd +18 -0
- lxml/includes/etree_defs.h +379 -0
- lxml/includes/etreepublic.pxd +237 -0
- lxml/includes/extlibs/__init__.py +0 -0
- lxml/includes/extlibs/libcharset.h +45 -0
- lxml/includes/extlibs/localcharset.h +137 -0
- lxml/includes/extlibs/zconf.h +543 -0
- lxml/includes/extlibs/zlib.h +1938 -0
- lxml/includes/htmlparser.pxd +56 -0
- lxml/includes/libexslt/__init__.py +0 -0
- lxml/includes/libexslt/exslt.h +108 -0
- lxml/includes/libexslt/exsltconfig.h +70 -0
- lxml/includes/libexslt/exsltexports.h +63 -0
- lxml/includes/libxml/HTMLparser.h +339 -0
- lxml/includes/libxml/HTMLtree.h +148 -0
- lxml/includes/libxml/SAX.h +18 -0
- lxml/includes/libxml/SAX2.h +170 -0
- lxml/includes/libxml/__init__.py +0 -0
- lxml/includes/libxml/c14n.h +115 -0
- lxml/includes/libxml/catalog.h +183 -0
- lxml/includes/libxml/chvalid.h +230 -0
- lxml/includes/libxml/debugXML.h +79 -0
- lxml/includes/libxml/dict.h +82 -0
- lxml/includes/libxml/encoding.h +307 -0
- lxml/includes/libxml/entities.h +147 -0
- lxml/includes/libxml/globals.h +25 -0
- lxml/includes/libxml/hash.h +251 -0
- lxml/includes/libxml/list.h +137 -0
- lxml/includes/libxml/nanoftp.h +16 -0
- lxml/includes/libxml/nanohttp.h +98 -0
- lxml/includes/libxml/parser.h +1633 -0
- lxml/includes/libxml/parserInternals.h +591 -0
- lxml/includes/libxml/relaxng.h +224 -0
- lxml/includes/libxml/schemasInternals.h +959 -0
- lxml/includes/libxml/schematron.h +143 -0
- lxml/includes/libxml/threads.h +81 -0
- lxml/includes/libxml/tree.h +1326 -0
- lxml/includes/libxml/uri.h +106 -0
- lxml/includes/libxml/valid.h +485 -0
- lxml/includes/libxml/xinclude.h +141 -0
- lxml/includes/libxml/xlink.h +193 -0
- lxml/includes/libxml/xmlIO.h +419 -0
- lxml/includes/libxml/xmlautomata.h +163 -0
- lxml/includes/libxml/xmlerror.h +962 -0
- lxml/includes/libxml/xmlexports.h +96 -0
- lxml/includes/libxml/xmlmemory.h +188 -0
- lxml/includes/libxml/xmlmodule.h +61 -0
- lxml/includes/libxml/xmlreader.h +444 -0
- lxml/includes/libxml/xmlregexp.h +116 -0
- lxml/includes/libxml/xmlsave.h +111 -0
- lxml/includes/libxml/xmlschemas.h +254 -0
- lxml/includes/libxml/xmlschemastypes.h +152 -0
- lxml/includes/libxml/xmlstring.h +140 -0
- lxml/includes/libxml/xmlunicode.h +15 -0
- lxml/includes/libxml/xmlversion.h +332 -0
- lxml/includes/libxml/xmlwriter.h +489 -0
- lxml/includes/libxml/xpath.h +569 -0
- lxml/includes/libxml/xpathInternals.h +639 -0
- lxml/includes/libxml/xpointer.h +48 -0
- lxml/includes/libxslt/__init__.py +0 -0
- lxml/includes/libxslt/attributes.h +39 -0
- lxml/includes/libxslt/documents.h +93 -0
- lxml/includes/libxslt/extensions.h +262 -0
- lxml/includes/libxslt/extra.h +72 -0
- lxml/includes/libxslt/functions.h +78 -0
- lxml/includes/libxslt/imports.h +75 -0
- lxml/includes/libxslt/keys.h +53 -0
- lxml/includes/libxslt/namespaces.h +68 -0
- lxml/includes/libxslt/numbersInternals.h +73 -0
- lxml/includes/libxslt/pattern.h +84 -0
- lxml/includes/libxslt/preproc.h +43 -0
- lxml/includes/libxslt/security.h +104 -0
- lxml/includes/libxslt/templates.h +77 -0
- lxml/includes/libxslt/transform.h +207 -0
- lxml/includes/libxslt/variables.h +118 -0
- lxml/includes/libxslt/xslt.h +110 -0
- lxml/includes/libxslt/xsltInternals.h +1995 -0
- lxml/includes/libxslt/xsltconfig.h +146 -0
- lxml/includes/libxslt/xsltexports.h +64 -0
- lxml/includes/libxslt/xsltlocale.h +44 -0
- lxml/includes/libxslt/xsltutils.h +343 -0
- lxml/includes/lxml-version.h +3 -0
- lxml/includes/relaxng.pxd +64 -0
- lxml/includes/schematron.pxd +34 -0
- lxml/includes/tree.pxd +492 -0
- lxml/includes/uri.pxd +5 -0
- lxml/includes/xinclude.pxd +22 -0
- lxml/includes/xmlerror.pxd +852 -0
- lxml/includes/xmlparser.pxd +303 -0
- lxml/includes/xmlschema.pxd +35 -0
- lxml/includes/xpath.pxd +136 -0
- lxml/includes/xslt.pxd +190 -0
- lxml/isoschematron/__init__.py +348 -0
- lxml/isoschematron/resources/rng/iso-schematron.rng +709 -0
- lxml/isoschematron/resources/xsl/RNG2Schtrn.xsl +75 -0
- lxml/isoschematron/resources/xsl/XSD2Schtrn.xsl +77 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_abstract_expand.xsl +313 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_dsdl_include.xsl +1160 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_message.xsl +55 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_skeleton_for_xslt1.xsl +1796 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_svrl_for_xslt1.xsl +588 -0
- lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt +84 -0
- lxml/iterparse.pxi +438 -0
- lxml/lxml.etree.h +244 -0
- lxml/lxml.etree_api.h +204 -0
- lxml/nsclasses.pxi +281 -0
- lxml/objectify.cpython-310-arm-linux-gnueabihf.so +0 -0
- lxml/objectify.pyx +2149 -0
- lxml/objectpath.pxi +332 -0
- lxml/parser.pxi +2059 -0
- lxml/parsertarget.pxi +180 -0
- lxml/proxy.pxi +619 -0
- lxml/public-api.pxi +178 -0
- lxml/pyclasslookup.py +3 -0
- lxml/readonlytree.pxi +565 -0
- lxml/relaxng.pxi +165 -0
- lxml/sax.cpython-310-arm-linux-gnueabihf.so +0 -0
- lxml/sax.py +286 -0
- lxml/saxparser.pxi +875 -0
- lxml/schematron.pxi +173 -0
- lxml/serializer.pxi +1849 -0
- lxml/usedoctest.py +13 -0
- lxml/xinclude.pxi +67 -0
- lxml/xmlerror.pxi +1654 -0
- lxml/xmlid.pxi +179 -0
- lxml/xmlschema.pxi +215 -0
- lxml/xpath.pxi +487 -0
- lxml/xslt.pxi +957 -0
- lxml/xsltext.pxi +242 -0
- lxml-6.0.0.dist-info/METADATA +163 -0
- lxml-6.0.0.dist-info/RECORD +174 -0
- lxml-6.0.0.dist-info/WHEEL +5 -0
- lxml-6.0.0.dist-info/licenses/LICENSE.txt +31 -0
- lxml-6.0.0.dist-info/licenses/LICENSES.txt +29 -0
- lxml-6.0.0.dist-info/top_level.txt +1 -0
lxml/ElementInclude.py
ADDED
@@ -0,0 +1,244 @@
|
|
1
|
+
#
|
2
|
+
# ElementTree
|
3
|
+
# $Id: ElementInclude.py 1862 2004-06-18 07:31:02Z Fredrik $
|
4
|
+
#
|
5
|
+
# limited xinclude support for element trees
|
6
|
+
#
|
7
|
+
# history:
|
8
|
+
# 2003-08-15 fl created
|
9
|
+
# 2003-11-14 fl fixed default loader
|
10
|
+
#
|
11
|
+
# Copyright (c) 2003-2004 by Fredrik Lundh. All rights reserved.
|
12
|
+
#
|
13
|
+
# fredrik@pythonware.com
|
14
|
+
# http://www.pythonware.com
|
15
|
+
#
|
16
|
+
# --------------------------------------------------------------------
|
17
|
+
# The ElementTree toolkit is
|
18
|
+
#
|
19
|
+
# Copyright (c) 1999-2004 by Fredrik Lundh
|
20
|
+
#
|
21
|
+
# By obtaining, using, and/or copying this software and/or its
|
22
|
+
# associated documentation, you agree that you have read, understood,
|
23
|
+
# and will comply with the following terms and conditions:
|
24
|
+
#
|
25
|
+
# Permission to use, copy, modify, and distribute this software and
|
26
|
+
# its associated documentation for any purpose and without fee is
|
27
|
+
# hereby granted, provided that the above copyright notice appears in
|
28
|
+
# all copies, and that both that copyright notice and this permission
|
29
|
+
# notice appear in supporting documentation, and that the name of
|
30
|
+
# Secret Labs AB or the author not be used in advertising or publicity
|
31
|
+
# pertaining to distribution of the software without specific, written
|
32
|
+
# prior permission.
|
33
|
+
#
|
34
|
+
# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
|
35
|
+
# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
|
36
|
+
# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
|
37
|
+
# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
|
38
|
+
# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
39
|
+
# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
40
|
+
# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
41
|
+
# OF THIS SOFTWARE.
|
42
|
+
# --------------------------------------------------------------------
|
43
|
+
|
44
|
+
"""
|
45
|
+
Limited XInclude support for the ElementTree package.
|
46
|
+
|
47
|
+
While lxml.etree has full support for XInclude (see
|
48
|
+
`etree.ElementTree.xinclude()`), this module provides a simpler, pure
|
49
|
+
Python, ElementTree compatible implementation that supports a simple
|
50
|
+
form of custom URL resolvers.
|
51
|
+
"""
|
52
|
+
|
53
|
+
from lxml import etree
|
54
|
+
try:
|
55
|
+
from urlparse import urljoin
|
56
|
+
from urllib2 import urlopen
|
57
|
+
except ImportError:
|
58
|
+
# Python 3
|
59
|
+
from urllib.parse import urljoin
|
60
|
+
from urllib.request import urlopen
|
61
|
+
|
62
|
+
XINCLUDE = "{http://www.w3.org/2001/XInclude}"
|
63
|
+
|
64
|
+
XINCLUDE_INCLUDE = XINCLUDE + "include"
|
65
|
+
XINCLUDE_FALLBACK = XINCLUDE + "fallback"
|
66
|
+
XINCLUDE_ITER_TAG = XINCLUDE + "*"
|
67
|
+
|
68
|
+
# For security reasons, the inclusion depth is limited to this read-only value by default.
|
69
|
+
DEFAULT_MAX_INCLUSION_DEPTH = 6
|
70
|
+
|
71
|
+
|
72
|
+
##
|
73
|
+
# Fatal include error.
|
74
|
+
|
75
|
+
class FatalIncludeError(etree.LxmlSyntaxError):
|
76
|
+
pass
|
77
|
+
|
78
|
+
|
79
|
+
class LimitedRecursiveIncludeError(FatalIncludeError):
|
80
|
+
pass
|
81
|
+
|
82
|
+
|
83
|
+
##
|
84
|
+
# ET compatible default loader.
|
85
|
+
# This loader reads an included resource from disk.
|
86
|
+
#
|
87
|
+
# @param href Resource reference.
|
88
|
+
# @param parse Parse mode. Either "xml" or "text".
|
89
|
+
# @param encoding Optional text encoding.
|
90
|
+
# @return The expanded resource. If the parse mode is "xml", this
|
91
|
+
# is an ElementTree instance. If the parse mode is "text", this
|
92
|
+
# is a Unicode string. If the loader fails, it can return None
|
93
|
+
# or raise an IOError exception.
|
94
|
+
# @throws IOError If the loader fails to load the resource.
|
95
|
+
|
96
|
+
def default_loader(href, parse, encoding=None):
|
97
|
+
file = open(href, 'rb')
|
98
|
+
if parse == "xml":
|
99
|
+
data = etree.parse(file).getroot()
|
100
|
+
else:
|
101
|
+
data = file.read()
|
102
|
+
if not encoding:
|
103
|
+
encoding = 'utf-8'
|
104
|
+
data = data.decode(encoding)
|
105
|
+
file.close()
|
106
|
+
return data
|
107
|
+
|
108
|
+
|
109
|
+
##
|
110
|
+
# Default loader used by lxml.etree - handles custom resolvers properly
|
111
|
+
#
|
112
|
+
|
113
|
+
def _lxml_default_loader(href, parse, encoding=None, parser=None):
|
114
|
+
if parse == "xml":
|
115
|
+
data = etree.parse(href, parser).getroot()
|
116
|
+
else:
|
117
|
+
if "://" in href:
|
118
|
+
f = urlopen(href)
|
119
|
+
else:
|
120
|
+
f = open(href, 'rb')
|
121
|
+
data = f.read()
|
122
|
+
f.close()
|
123
|
+
if not encoding:
|
124
|
+
encoding = 'utf-8'
|
125
|
+
data = data.decode(encoding)
|
126
|
+
return data
|
127
|
+
|
128
|
+
|
129
|
+
##
|
130
|
+
# Wrapper for ET compatibility - drops the parser
|
131
|
+
|
132
|
+
def _wrap_et_loader(loader):
|
133
|
+
def load(href, parse, encoding=None, parser=None):
|
134
|
+
return loader(href, parse, encoding)
|
135
|
+
return load
|
136
|
+
|
137
|
+
|
138
|
+
##
|
139
|
+
# Expand XInclude directives.
|
140
|
+
#
|
141
|
+
# @param elem Root element.
|
142
|
+
# @param loader Optional resource loader. If omitted, it defaults
|
143
|
+
# to {@link default_loader}. If given, it should be a callable
|
144
|
+
# that implements the same interface as <b>default_loader</b>.
|
145
|
+
# @param base_url The base URL of the original file, to resolve
|
146
|
+
# relative include file references.
|
147
|
+
# @param max_depth The maximum number of recursive inclusions.
|
148
|
+
# Limited to reduce the risk of malicious content explosion.
|
149
|
+
# Pass None to disable the limitation.
|
150
|
+
# @throws LimitedRecursiveIncludeError If the {@link max_depth} was exceeded.
|
151
|
+
# @throws FatalIncludeError If the function fails to include a given
|
152
|
+
# resource, or if the tree contains malformed XInclude elements.
|
153
|
+
# @throws IOError If the function fails to load a given resource.
|
154
|
+
# @returns the node or its replacement if it was an XInclude node
|
155
|
+
|
156
|
+
def include(elem, loader=None, base_url=None,
|
157
|
+
max_depth=DEFAULT_MAX_INCLUSION_DEPTH):
|
158
|
+
if max_depth is None:
|
159
|
+
max_depth = -1
|
160
|
+
elif max_depth < 0:
|
161
|
+
raise ValueError("expected non-negative depth or None for 'max_depth', got %r" % max_depth)
|
162
|
+
|
163
|
+
if base_url is None:
|
164
|
+
if hasattr(elem, 'getroot'):
|
165
|
+
tree = elem
|
166
|
+
elem = elem.getroot()
|
167
|
+
else:
|
168
|
+
tree = elem.getroottree()
|
169
|
+
if hasattr(tree, 'docinfo'):
|
170
|
+
base_url = tree.docinfo.URL
|
171
|
+
elif hasattr(elem, 'getroot'):
|
172
|
+
elem = elem.getroot()
|
173
|
+
_include(elem, loader, base_url, max_depth)
|
174
|
+
|
175
|
+
|
176
|
+
def _include(elem, loader=None, base_url=None,
|
177
|
+
max_depth=DEFAULT_MAX_INCLUSION_DEPTH, _parent_hrefs=None):
|
178
|
+
if loader is not None:
|
179
|
+
load_include = _wrap_et_loader(loader)
|
180
|
+
else:
|
181
|
+
load_include = _lxml_default_loader
|
182
|
+
|
183
|
+
if _parent_hrefs is None:
|
184
|
+
_parent_hrefs = set()
|
185
|
+
|
186
|
+
parser = elem.getroottree().parser
|
187
|
+
|
188
|
+
include_elements = list(
|
189
|
+
elem.iter(XINCLUDE_ITER_TAG))
|
190
|
+
|
191
|
+
for e in include_elements:
|
192
|
+
if e.tag == XINCLUDE_INCLUDE:
|
193
|
+
# process xinclude directive
|
194
|
+
href = urljoin(base_url, e.get("href"))
|
195
|
+
parse = e.get("parse", "xml")
|
196
|
+
parent = e.getparent()
|
197
|
+
if parse == "xml":
|
198
|
+
if href in _parent_hrefs:
|
199
|
+
raise FatalIncludeError(
|
200
|
+
"recursive include of %r detected" % href
|
201
|
+
)
|
202
|
+
if max_depth == 0:
|
203
|
+
raise LimitedRecursiveIncludeError(
|
204
|
+
"maximum xinclude depth reached when including file %s" % href)
|
205
|
+
node = load_include(href, parse, parser=parser)
|
206
|
+
if node is None:
|
207
|
+
raise FatalIncludeError(
|
208
|
+
"cannot load %r as %r" % (href, parse)
|
209
|
+
)
|
210
|
+
node = _include(node, loader, href, max_depth - 1, {href} | _parent_hrefs)
|
211
|
+
if e.tail:
|
212
|
+
node.tail = (node.tail or "") + e.tail
|
213
|
+
if parent is None:
|
214
|
+
return node # replaced the root node!
|
215
|
+
parent.replace(e, node)
|
216
|
+
elif parse == "text":
|
217
|
+
text = load_include(href, parse, encoding=e.get("encoding"))
|
218
|
+
if text is None:
|
219
|
+
raise FatalIncludeError(
|
220
|
+
"cannot load %r as %r" % (href, parse)
|
221
|
+
)
|
222
|
+
predecessor = e.getprevious()
|
223
|
+
if predecessor is not None:
|
224
|
+
predecessor.tail = (predecessor.tail or "") + text
|
225
|
+
elif parent is None:
|
226
|
+
return text # replaced the root node!
|
227
|
+
else:
|
228
|
+
parent.text = (parent.text or "") + text + (e.tail or "")
|
229
|
+
parent.remove(e)
|
230
|
+
else:
|
231
|
+
raise FatalIncludeError(
|
232
|
+
"unknown parse type in xi:include tag (%r)" % parse
|
233
|
+
)
|
234
|
+
elif e.tag == XINCLUDE_FALLBACK:
|
235
|
+
parent = e.getparent()
|
236
|
+
if parent is not None and parent.tag != XINCLUDE_INCLUDE:
|
237
|
+
raise FatalIncludeError(
|
238
|
+
"xi:fallback tag must be child of xi:include (%r)" % e.tag
|
239
|
+
)
|
240
|
+
else:
|
241
|
+
raise FatalIncludeError(
|
242
|
+
"Invalid element found in XInclude namespace (%r)" % e.tag
|
243
|
+
)
|
244
|
+
return elem
|
lxml/__init__.py
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
# this is a package
|
2
|
+
|
3
|
+
__version__ = "6.0.0"
|
4
|
+
|
5
|
+
|
6
|
+
def get_include():
|
7
|
+
"""
|
8
|
+
Returns a list of header include paths (for lxml itself, libxml2
|
9
|
+
and libxslt) needed to compile C code against lxml if it was built
|
10
|
+
with statically linked libraries.
|
11
|
+
"""
|
12
|
+
import os
|
13
|
+
lxml_path = __path__[0]
|
14
|
+
include_path = os.path.join(lxml_path, 'includes')
|
15
|
+
includes = [include_path, lxml_path]
|
16
|
+
|
17
|
+
for name in os.listdir(include_path):
|
18
|
+
path = os.path.join(include_path, name)
|
19
|
+
if os.path.isdir(path):
|
20
|
+
includes.append(path)
|
21
|
+
|
22
|
+
return includes
|
Binary file
|
lxml/_elementpath.py
ADDED
@@ -0,0 +1,343 @@
|
|
1
|
+
# cython: language_level=3
|
2
|
+
|
3
|
+
#
|
4
|
+
# ElementTree
|
5
|
+
# $Id: ElementPath.py 3375 2008-02-13 08:05:08Z fredrik $
|
6
|
+
#
|
7
|
+
# limited xpath support for element trees
|
8
|
+
#
|
9
|
+
# history:
|
10
|
+
# 2003-05-23 fl created
|
11
|
+
# 2003-05-28 fl added support for // etc
|
12
|
+
# 2003-08-27 fl fixed parsing of periods in element names
|
13
|
+
# 2007-09-10 fl new selection engine
|
14
|
+
# 2007-09-12 fl fixed parent selector
|
15
|
+
# 2007-09-13 fl added iterfind; changed findall to return a list
|
16
|
+
# 2007-11-30 fl added namespaces support
|
17
|
+
# 2009-10-30 fl added child element value filter
|
18
|
+
#
|
19
|
+
# Copyright (c) 2003-2009 by Fredrik Lundh. All rights reserved.
|
20
|
+
#
|
21
|
+
# fredrik@pythonware.com
|
22
|
+
# http://www.pythonware.com
|
23
|
+
#
|
24
|
+
# --------------------------------------------------------------------
|
25
|
+
# The ElementTree toolkit is
|
26
|
+
#
|
27
|
+
# Copyright (c) 1999-2009 by Fredrik Lundh
|
28
|
+
#
|
29
|
+
# By obtaining, using, and/or copying this software and/or its
|
30
|
+
# associated documentation, you agree that you have read, understood,
|
31
|
+
# and will comply with the following terms and conditions:
|
32
|
+
#
|
33
|
+
# Permission to use, copy, modify, and distribute this software and
|
34
|
+
# its associated documentation for any purpose and without fee is
|
35
|
+
# hereby granted, provided that the above copyright notice appears in
|
36
|
+
# all copies, and that both that copyright notice and this permission
|
37
|
+
# notice appear in supporting documentation, and that the name of
|
38
|
+
# Secret Labs AB or the author not be used in advertising or publicity
|
39
|
+
# pertaining to distribution of the software without specific, written
|
40
|
+
# prior permission.
|
41
|
+
#
|
42
|
+
# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
|
43
|
+
# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
|
44
|
+
# ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
|
45
|
+
# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
|
46
|
+
# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
|
47
|
+
# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
|
48
|
+
# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
|
49
|
+
# OF THIS SOFTWARE.
|
50
|
+
# --------------------------------------------------------------------
|
51
|
+
|
52
|
+
##
|
53
|
+
# Implementation module for XPath support. There's usually no reason
|
54
|
+
# to import this module directly; the <b>ElementTree</b> does this for
|
55
|
+
# you, if needed.
|
56
|
+
##
|
57
|
+
|
58
|
+
|
59
|
+
import re
|
60
|
+
|
61
|
+
xpath_tokenizer_re = re.compile(
|
62
|
+
"("
|
63
|
+
"'[^']*'|\"[^\"]*\"|"
|
64
|
+
"::|"
|
65
|
+
"//?|"
|
66
|
+
r"\.\.|"
|
67
|
+
r"\(\)|"
|
68
|
+
r"[/.*:\[\]\(\)@=])|"
|
69
|
+
r"((?:\{[^}]+\})?[^/\[\]\(\)@=\s]+)|"
|
70
|
+
r"\s+"
|
71
|
+
)
|
72
|
+
|
73
|
+
def xpath_tokenizer(pattern, namespaces=None, with_prefixes=True):
|
74
|
+
# ElementTree uses '', lxml used None originally.
|
75
|
+
default_namespace = (namespaces.get(None) or namespaces.get('')) if namespaces else None
|
76
|
+
parsing_attribute = False
|
77
|
+
for token in xpath_tokenizer_re.findall(pattern):
|
78
|
+
ttype, tag = token
|
79
|
+
if tag and tag[0] != "{":
|
80
|
+
if ":" in tag and with_prefixes:
|
81
|
+
prefix, uri = tag.split(":", 1)
|
82
|
+
try:
|
83
|
+
if not namespaces:
|
84
|
+
raise KeyError
|
85
|
+
yield ttype, "{%s}%s" % (namespaces[prefix], uri)
|
86
|
+
except KeyError:
|
87
|
+
raise SyntaxError("prefix %r not found in prefix map" % prefix)
|
88
|
+
elif tag.isdecimal():
|
89
|
+
yield token # index
|
90
|
+
elif default_namespace and not parsing_attribute:
|
91
|
+
yield ttype, "{%s}%s" % (default_namespace, tag)
|
92
|
+
else:
|
93
|
+
yield token
|
94
|
+
parsing_attribute = False
|
95
|
+
else:
|
96
|
+
yield token
|
97
|
+
parsing_attribute = ttype == '@'
|
98
|
+
|
99
|
+
|
100
|
+
def prepare_child(next, token):
|
101
|
+
tag = token[1]
|
102
|
+
def select(result):
|
103
|
+
for elem in result:
|
104
|
+
yield from elem.iterchildren(tag)
|
105
|
+
return select
|
106
|
+
|
107
|
+
def prepare_star(next, token):
|
108
|
+
def select(result):
|
109
|
+
for elem in result:
|
110
|
+
yield from elem.iterchildren('*')
|
111
|
+
return select
|
112
|
+
|
113
|
+
def prepare_self(next, token):
|
114
|
+
def select(result):
|
115
|
+
return result
|
116
|
+
return select
|
117
|
+
|
118
|
+
def prepare_descendant(next, token):
|
119
|
+
token = next()
|
120
|
+
if token[0] == "*":
|
121
|
+
tag = "*"
|
122
|
+
elif not token[0]:
|
123
|
+
tag = token[1]
|
124
|
+
else:
|
125
|
+
raise SyntaxError("invalid descendant")
|
126
|
+
def select(result):
|
127
|
+
for elem in result:
|
128
|
+
yield from elem.iterdescendants(tag)
|
129
|
+
return select
|
130
|
+
|
131
|
+
def prepare_parent(next, token):
|
132
|
+
def select(result):
|
133
|
+
for elem in result:
|
134
|
+
parent = elem.getparent()
|
135
|
+
if parent is not None:
|
136
|
+
yield parent
|
137
|
+
return select
|
138
|
+
|
139
|
+
def prepare_predicate(next, token):
|
140
|
+
# FIXME: replace with real parser!!! refs:
|
141
|
+
# http://effbot.org/zone/simple-iterator-parser.htm
|
142
|
+
# http://javascript.crockford.com/tdop/tdop.html
|
143
|
+
signature = ''
|
144
|
+
predicate = []
|
145
|
+
while 1:
|
146
|
+
token = next()
|
147
|
+
if token[0] == "]":
|
148
|
+
break
|
149
|
+
if token == ('', ''):
|
150
|
+
# ignore whitespace
|
151
|
+
continue
|
152
|
+
if token[0] and token[0][:1] in "'\"":
|
153
|
+
token = "'", token[0][1:-1]
|
154
|
+
signature += token[0] or "-"
|
155
|
+
predicate.append(token[1])
|
156
|
+
|
157
|
+
# use signature to determine predicate type
|
158
|
+
if signature == "@-":
|
159
|
+
# [@attribute] predicate
|
160
|
+
key = predicate[1]
|
161
|
+
def select(result):
|
162
|
+
for elem in result:
|
163
|
+
if elem.get(key) is not None:
|
164
|
+
yield elem
|
165
|
+
return select
|
166
|
+
if signature == "@-='":
|
167
|
+
# [@attribute='value']
|
168
|
+
key = predicate[1]
|
169
|
+
value = predicate[-1]
|
170
|
+
def select(result):
|
171
|
+
for elem in result:
|
172
|
+
if elem.get(key) == value:
|
173
|
+
yield elem
|
174
|
+
return select
|
175
|
+
if signature == "-" and not re.match(r"-?\d+$", predicate[0]):
|
176
|
+
# [tag]
|
177
|
+
tag = predicate[0]
|
178
|
+
def select(result):
|
179
|
+
for elem in result:
|
180
|
+
for _ in elem.iterchildren(tag):
|
181
|
+
yield elem
|
182
|
+
break
|
183
|
+
return select
|
184
|
+
if signature == ".='" or (signature == "-='" and not re.match(r"-?\d+$", predicate[0])):
|
185
|
+
# [.='value'] or [tag='value']
|
186
|
+
tag = predicate[0]
|
187
|
+
value = predicate[-1]
|
188
|
+
if tag:
|
189
|
+
def select(result):
|
190
|
+
for elem in result:
|
191
|
+
for e in elem.iterchildren(tag):
|
192
|
+
if "".join(e.itertext()) == value:
|
193
|
+
yield elem
|
194
|
+
break
|
195
|
+
else:
|
196
|
+
def select(result):
|
197
|
+
for elem in result:
|
198
|
+
if "".join(elem.itertext()) == value:
|
199
|
+
yield elem
|
200
|
+
return select
|
201
|
+
if signature == "-" or signature == "-()" or signature == "-()-":
|
202
|
+
# [index] or [last()] or [last()-index]
|
203
|
+
if signature == "-":
|
204
|
+
# [index]
|
205
|
+
index = int(predicate[0]) - 1
|
206
|
+
if index < 0:
|
207
|
+
if index == -1:
|
208
|
+
raise SyntaxError(
|
209
|
+
"indices in path predicates are 1-based, not 0-based")
|
210
|
+
else:
|
211
|
+
raise SyntaxError("path index >= 1 expected")
|
212
|
+
else:
|
213
|
+
if predicate[0] != "last":
|
214
|
+
raise SyntaxError("unsupported function")
|
215
|
+
if signature == "-()-":
|
216
|
+
try:
|
217
|
+
index = int(predicate[2]) - 1
|
218
|
+
except ValueError:
|
219
|
+
raise SyntaxError("unsupported expression")
|
220
|
+
else:
|
221
|
+
index = -1
|
222
|
+
def select(result):
|
223
|
+
for elem in result:
|
224
|
+
parent = elem.getparent()
|
225
|
+
if parent is None:
|
226
|
+
continue
|
227
|
+
try:
|
228
|
+
# FIXME: what if the selector is "*" ?
|
229
|
+
elems = list(parent.iterchildren(elem.tag))
|
230
|
+
if elems[index] is elem:
|
231
|
+
yield elem
|
232
|
+
except IndexError:
|
233
|
+
pass
|
234
|
+
return select
|
235
|
+
raise SyntaxError("invalid predicate")
|
236
|
+
|
237
|
+
ops = {
|
238
|
+
"": prepare_child,
|
239
|
+
"*": prepare_star,
|
240
|
+
".": prepare_self,
|
241
|
+
"..": prepare_parent,
|
242
|
+
"//": prepare_descendant,
|
243
|
+
"[": prepare_predicate,
|
244
|
+
}
|
245
|
+
|
246
|
+
|
247
|
+
# --------------------------------------------------------------------
|
248
|
+
|
249
|
+
_cache = {}
|
250
|
+
|
251
|
+
|
252
|
+
def _build_path_iterator(path, namespaces, with_prefixes=True):
|
253
|
+
"""compile selector pattern"""
|
254
|
+
if path[-1:] == "/":
|
255
|
+
path += "*" # implicit all (FIXME: keep this?)
|
256
|
+
|
257
|
+
cache_key = (path,)
|
258
|
+
if namespaces:
|
259
|
+
# lxml originally used None for the default namespace but ElementTree uses the
|
260
|
+
# more convenient (all-strings-dict) empty string, so we support both here,
|
261
|
+
# preferring the more convenient '', as long as they aren't ambiguous.
|
262
|
+
if None in namespaces:
|
263
|
+
if '' in namespaces and namespaces[None] != namespaces['']:
|
264
|
+
raise ValueError("Ambiguous default namespace provided: %r versus %r" % (
|
265
|
+
namespaces[None], namespaces['']))
|
266
|
+
cache_key += (namespaces[None],) + tuple(sorted(
|
267
|
+
item for item in namespaces.items() if item[0] is not None))
|
268
|
+
else:
|
269
|
+
cache_key += tuple(sorted(namespaces.items()))
|
270
|
+
|
271
|
+
try:
|
272
|
+
return _cache[cache_key]
|
273
|
+
except KeyError:
|
274
|
+
pass
|
275
|
+
if len(_cache) > 100:
|
276
|
+
_cache.clear()
|
277
|
+
|
278
|
+
if path[:1] == "/":
|
279
|
+
raise SyntaxError("cannot use absolute path on element")
|
280
|
+
stream = iter(xpath_tokenizer(path, namespaces, with_prefixes=with_prefixes))
|
281
|
+
try:
|
282
|
+
_next = stream.next
|
283
|
+
except AttributeError:
|
284
|
+
# Python 3
|
285
|
+
_next = stream.__next__
|
286
|
+
try:
|
287
|
+
token = _next()
|
288
|
+
except StopIteration:
|
289
|
+
raise SyntaxError("empty path expression")
|
290
|
+
selector = []
|
291
|
+
while 1:
|
292
|
+
try:
|
293
|
+
selector.append(ops[token[0]](_next, token))
|
294
|
+
except StopIteration:
|
295
|
+
raise SyntaxError("invalid path")
|
296
|
+
try:
|
297
|
+
token = _next()
|
298
|
+
if token[0] == "/":
|
299
|
+
token = _next()
|
300
|
+
except StopIteration:
|
301
|
+
break
|
302
|
+
_cache[cache_key] = selector
|
303
|
+
return selector
|
304
|
+
|
305
|
+
|
306
|
+
##
|
307
|
+
# Iterate over the matching nodes
|
308
|
+
|
309
|
+
def iterfind(elem, path, namespaces=None, with_prefixes=True):
|
310
|
+
selector = _build_path_iterator(path, namespaces, with_prefixes=with_prefixes)
|
311
|
+
result = iter((elem,))
|
312
|
+
for select in selector:
|
313
|
+
result = select(result)
|
314
|
+
return result
|
315
|
+
|
316
|
+
|
317
|
+
##
|
318
|
+
# Find first matching object.
|
319
|
+
|
320
|
+
def find(elem, path, namespaces=None, with_prefixes=True):
|
321
|
+
it = iterfind(elem, path, namespaces, with_prefixes=with_prefixes)
|
322
|
+
try:
|
323
|
+
return next(it)
|
324
|
+
except StopIteration:
|
325
|
+
return None
|
326
|
+
|
327
|
+
|
328
|
+
##
|
329
|
+
# Find all matching objects.
|
330
|
+
|
331
|
+
def findall(elem, path, namespaces=None, with_prefixes=True):
|
332
|
+
return list(iterfind(elem, path, namespaces))
|
333
|
+
|
334
|
+
|
335
|
+
##
|
336
|
+
# Find text for first matching object.
|
337
|
+
|
338
|
+
def findtext(elem, path, default=None, namespaces=None, with_prefixes=True):
|
339
|
+
el = find(elem, path, namespaces, with_prefixes=with_prefixes)
|
340
|
+
if el is None:
|
341
|
+
return default
|
342
|
+
else:
|
343
|
+
return el.text or ''
|