lxml 6.0.0__cp39-cp39-musllinux_1_2_armv7l.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. lxml/ElementInclude.py +244 -0
  2. lxml/__init__.py +22 -0
  3. lxml/_elementpath.cpython-39-arm-linux-gnueabihf.so +0 -0
  4. lxml/_elementpath.py +343 -0
  5. lxml/apihelpers.pxi +1801 -0
  6. lxml/builder.cpython-39-arm-linux-gnueabihf.so +0 -0
  7. lxml/builder.py +243 -0
  8. lxml/classlookup.pxi +580 -0
  9. lxml/cleanup.pxi +215 -0
  10. lxml/cssselect.py +101 -0
  11. lxml/debug.pxi +36 -0
  12. lxml/docloader.pxi +178 -0
  13. lxml/doctestcompare.py +488 -0
  14. lxml/dtd.pxi +479 -0
  15. lxml/etree.cpython-39-arm-linux-gnueabihf.so +0 -0
  16. lxml/etree.h +244 -0
  17. lxml/etree.pyx +3853 -0
  18. lxml/etree_api.h +204 -0
  19. lxml/extensions.pxi +830 -0
  20. lxml/html/ElementSoup.py +10 -0
  21. lxml/html/__init__.py +1927 -0
  22. lxml/html/_diffcommand.py +86 -0
  23. lxml/html/_difflib.cpython-39-arm-linux-gnueabihf.so +0 -0
  24. lxml/html/_difflib.py +2106 -0
  25. lxml/html/_html5builder.py +100 -0
  26. lxml/html/_setmixin.py +56 -0
  27. lxml/html/builder.py +173 -0
  28. lxml/html/clean.py +21 -0
  29. lxml/html/defs.py +135 -0
  30. lxml/html/diff.cpython-39-arm-linux-gnueabihf.so +0 -0
  31. lxml/html/diff.py +972 -0
  32. lxml/html/formfill.py +299 -0
  33. lxml/html/html5parser.py +260 -0
  34. lxml/html/soupparser.py +314 -0
  35. lxml/html/usedoctest.py +13 -0
  36. lxml/includes/__init__.pxd +0 -0
  37. lxml/includes/__init__.py +0 -0
  38. lxml/includes/c14n.pxd +25 -0
  39. lxml/includes/config.pxd +3 -0
  40. lxml/includes/dtdvalid.pxd +18 -0
  41. lxml/includes/etree_defs.h +379 -0
  42. lxml/includes/etreepublic.pxd +237 -0
  43. lxml/includes/extlibs/__init__.py +0 -0
  44. lxml/includes/extlibs/libcharset.h +45 -0
  45. lxml/includes/extlibs/localcharset.h +137 -0
  46. lxml/includes/extlibs/zconf.h +543 -0
  47. lxml/includes/extlibs/zlib.h +1938 -0
  48. lxml/includes/htmlparser.pxd +56 -0
  49. lxml/includes/libexslt/__init__.py +0 -0
  50. lxml/includes/libexslt/exslt.h +108 -0
  51. lxml/includes/libexslt/exsltconfig.h +70 -0
  52. lxml/includes/libexslt/exsltexports.h +63 -0
  53. lxml/includes/libxml/HTMLparser.h +339 -0
  54. lxml/includes/libxml/HTMLtree.h +148 -0
  55. lxml/includes/libxml/SAX.h +18 -0
  56. lxml/includes/libxml/SAX2.h +170 -0
  57. lxml/includes/libxml/__init__.py +0 -0
  58. lxml/includes/libxml/c14n.h +115 -0
  59. lxml/includes/libxml/catalog.h +183 -0
  60. lxml/includes/libxml/chvalid.h +230 -0
  61. lxml/includes/libxml/debugXML.h +79 -0
  62. lxml/includes/libxml/dict.h +82 -0
  63. lxml/includes/libxml/encoding.h +307 -0
  64. lxml/includes/libxml/entities.h +147 -0
  65. lxml/includes/libxml/globals.h +25 -0
  66. lxml/includes/libxml/hash.h +251 -0
  67. lxml/includes/libxml/list.h +137 -0
  68. lxml/includes/libxml/nanoftp.h +16 -0
  69. lxml/includes/libxml/nanohttp.h +98 -0
  70. lxml/includes/libxml/parser.h +1633 -0
  71. lxml/includes/libxml/parserInternals.h +591 -0
  72. lxml/includes/libxml/relaxng.h +224 -0
  73. lxml/includes/libxml/schemasInternals.h +959 -0
  74. lxml/includes/libxml/schematron.h +143 -0
  75. lxml/includes/libxml/threads.h +81 -0
  76. lxml/includes/libxml/tree.h +1326 -0
  77. lxml/includes/libxml/uri.h +106 -0
  78. lxml/includes/libxml/valid.h +485 -0
  79. lxml/includes/libxml/xinclude.h +141 -0
  80. lxml/includes/libxml/xlink.h +193 -0
  81. lxml/includes/libxml/xmlIO.h +419 -0
  82. lxml/includes/libxml/xmlautomata.h +163 -0
  83. lxml/includes/libxml/xmlerror.h +962 -0
  84. lxml/includes/libxml/xmlexports.h +96 -0
  85. lxml/includes/libxml/xmlmemory.h +188 -0
  86. lxml/includes/libxml/xmlmodule.h +61 -0
  87. lxml/includes/libxml/xmlreader.h +444 -0
  88. lxml/includes/libxml/xmlregexp.h +116 -0
  89. lxml/includes/libxml/xmlsave.h +111 -0
  90. lxml/includes/libxml/xmlschemas.h +254 -0
  91. lxml/includes/libxml/xmlschemastypes.h +152 -0
  92. lxml/includes/libxml/xmlstring.h +140 -0
  93. lxml/includes/libxml/xmlunicode.h +15 -0
  94. lxml/includes/libxml/xmlversion.h +332 -0
  95. lxml/includes/libxml/xmlwriter.h +489 -0
  96. lxml/includes/libxml/xpath.h +569 -0
  97. lxml/includes/libxml/xpathInternals.h +639 -0
  98. lxml/includes/libxml/xpointer.h +48 -0
  99. lxml/includes/libxslt/__init__.py +0 -0
  100. lxml/includes/libxslt/attributes.h +39 -0
  101. lxml/includes/libxslt/documents.h +93 -0
  102. lxml/includes/libxslt/extensions.h +262 -0
  103. lxml/includes/libxslt/extra.h +72 -0
  104. lxml/includes/libxslt/functions.h +78 -0
  105. lxml/includes/libxslt/imports.h +75 -0
  106. lxml/includes/libxslt/keys.h +53 -0
  107. lxml/includes/libxslt/namespaces.h +68 -0
  108. lxml/includes/libxslt/numbersInternals.h +73 -0
  109. lxml/includes/libxslt/pattern.h +84 -0
  110. lxml/includes/libxslt/preproc.h +43 -0
  111. lxml/includes/libxslt/security.h +104 -0
  112. lxml/includes/libxslt/templates.h +77 -0
  113. lxml/includes/libxslt/transform.h +207 -0
  114. lxml/includes/libxslt/variables.h +118 -0
  115. lxml/includes/libxslt/xslt.h +110 -0
  116. lxml/includes/libxslt/xsltInternals.h +1995 -0
  117. lxml/includes/libxslt/xsltconfig.h +146 -0
  118. lxml/includes/libxslt/xsltexports.h +64 -0
  119. lxml/includes/libxslt/xsltlocale.h +44 -0
  120. lxml/includes/libxslt/xsltutils.h +343 -0
  121. lxml/includes/lxml-version.h +3 -0
  122. lxml/includes/relaxng.pxd +64 -0
  123. lxml/includes/schematron.pxd +34 -0
  124. lxml/includes/tree.pxd +492 -0
  125. lxml/includes/uri.pxd +5 -0
  126. lxml/includes/xinclude.pxd +22 -0
  127. lxml/includes/xmlerror.pxd +852 -0
  128. lxml/includes/xmlparser.pxd +303 -0
  129. lxml/includes/xmlschema.pxd +35 -0
  130. lxml/includes/xpath.pxd +136 -0
  131. lxml/includes/xslt.pxd +190 -0
  132. lxml/isoschematron/__init__.py +348 -0
  133. lxml/isoschematron/resources/rng/iso-schematron.rng +709 -0
  134. lxml/isoschematron/resources/xsl/RNG2Schtrn.xsl +75 -0
  135. lxml/isoschematron/resources/xsl/XSD2Schtrn.xsl +77 -0
  136. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_abstract_expand.xsl +313 -0
  137. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_dsdl_include.xsl +1160 -0
  138. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_message.xsl +55 -0
  139. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_skeleton_for_xslt1.xsl +1796 -0
  140. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_svrl_for_xslt1.xsl +588 -0
  141. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt +84 -0
  142. lxml/iterparse.pxi +438 -0
  143. lxml/lxml.etree.h +244 -0
  144. lxml/lxml.etree_api.h +204 -0
  145. lxml/nsclasses.pxi +281 -0
  146. lxml/objectify.cpython-39-arm-linux-gnueabihf.so +0 -0
  147. lxml/objectify.pyx +2149 -0
  148. lxml/objectpath.pxi +332 -0
  149. lxml/parser.pxi +2059 -0
  150. lxml/parsertarget.pxi +180 -0
  151. lxml/proxy.pxi +619 -0
  152. lxml/public-api.pxi +178 -0
  153. lxml/pyclasslookup.py +3 -0
  154. lxml/readonlytree.pxi +565 -0
  155. lxml/relaxng.pxi +165 -0
  156. lxml/sax.cpython-39-arm-linux-gnueabihf.so +0 -0
  157. lxml/sax.py +286 -0
  158. lxml/saxparser.pxi +875 -0
  159. lxml/schematron.pxi +173 -0
  160. lxml/serializer.pxi +1849 -0
  161. lxml/usedoctest.py +13 -0
  162. lxml/xinclude.pxi +67 -0
  163. lxml/xmlerror.pxi +1654 -0
  164. lxml/xmlid.pxi +179 -0
  165. lxml/xmlschema.pxi +215 -0
  166. lxml/xpath.pxi +487 -0
  167. lxml/xslt.pxi +957 -0
  168. lxml/xsltext.pxi +242 -0
  169. lxml-6.0.0.dist-info/METADATA +163 -0
  170. lxml-6.0.0.dist-info/RECORD +174 -0
  171. lxml-6.0.0.dist-info/WHEEL +5 -0
  172. lxml-6.0.0.dist-info/licenses/LICENSE.txt +31 -0
  173. lxml-6.0.0.dist-info/licenses/LICENSES.txt +29 -0
  174. lxml-6.0.0.dist-info/top_level.txt +1 -0
lxml/ElementInclude.py ADDED
@@ -0,0 +1,244 @@
1
+ #
2
+ # ElementTree
3
+ # $Id: ElementInclude.py 1862 2004-06-18 07:31:02Z Fredrik $
4
+ #
5
+ # limited xinclude support for element trees
6
+ #
7
+ # history:
8
+ # 2003-08-15 fl created
9
+ # 2003-11-14 fl fixed default loader
10
+ #
11
+ # Copyright (c) 2003-2004 by Fredrik Lundh. All rights reserved.
12
+ #
13
+ # fredrik@pythonware.com
14
+ # http://www.pythonware.com
15
+ #
16
+ # --------------------------------------------------------------------
17
+ # The ElementTree toolkit is
18
+ #
19
+ # Copyright (c) 1999-2004 by Fredrik Lundh
20
+ #
21
+ # By obtaining, using, and/or copying this software and/or its
22
+ # associated documentation, you agree that you have read, understood,
23
+ # and will comply with the following terms and conditions:
24
+ #
25
+ # Permission to use, copy, modify, and distribute this software and
26
+ # its associated documentation for any purpose and without fee is
27
+ # hereby granted, provided that the above copyright notice appears in
28
+ # all copies, and that both that copyright notice and this permission
29
+ # notice appear in supporting documentation, and that the name of
30
+ # Secret Labs AB or the author not be used in advertising or publicity
31
+ # pertaining to distribution of the software without specific, written
32
+ # prior permission.
33
+ #
34
+ # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
35
+ # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
36
+ # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
37
+ # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
38
+ # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
39
+ # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
40
+ # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
41
+ # OF THIS SOFTWARE.
42
+ # --------------------------------------------------------------------
43
+
44
+ """
45
+ Limited XInclude support for the ElementTree package.
46
+
47
+ While lxml.etree has full support for XInclude (see
48
+ `etree.ElementTree.xinclude()`), this module provides a simpler, pure
49
+ Python, ElementTree compatible implementation that supports a simple
50
+ form of custom URL resolvers.
51
+ """
52
+
53
+ from lxml import etree
54
+ try:
55
+ from urlparse import urljoin
56
+ from urllib2 import urlopen
57
+ except ImportError:
58
+ # Python 3
59
+ from urllib.parse import urljoin
60
+ from urllib.request import urlopen
61
+
62
+ XINCLUDE = "{http://www.w3.org/2001/XInclude}"
63
+
64
+ XINCLUDE_INCLUDE = XINCLUDE + "include"
65
+ XINCLUDE_FALLBACK = XINCLUDE + "fallback"
66
+ XINCLUDE_ITER_TAG = XINCLUDE + "*"
67
+
68
+ # For security reasons, the inclusion depth is limited to this read-only value by default.
69
+ DEFAULT_MAX_INCLUSION_DEPTH = 6
70
+
71
+
72
+ ##
73
+ # Fatal include error.
74
+
75
+ class FatalIncludeError(etree.LxmlSyntaxError):
76
+ pass
77
+
78
+
79
+ class LimitedRecursiveIncludeError(FatalIncludeError):
80
+ pass
81
+
82
+
83
+ ##
84
+ # ET compatible default loader.
85
+ # This loader reads an included resource from disk.
86
+ #
87
+ # @param href Resource reference.
88
+ # @param parse Parse mode. Either "xml" or "text".
89
+ # @param encoding Optional text encoding.
90
+ # @return The expanded resource. If the parse mode is "xml", this
91
+ # is an ElementTree instance. If the parse mode is "text", this
92
+ # is a Unicode string. If the loader fails, it can return None
93
+ # or raise an IOError exception.
94
+ # @throws IOError If the loader fails to load the resource.
95
+
96
+ def default_loader(href, parse, encoding=None):
97
+ file = open(href, 'rb')
98
+ if parse == "xml":
99
+ data = etree.parse(file).getroot()
100
+ else:
101
+ data = file.read()
102
+ if not encoding:
103
+ encoding = 'utf-8'
104
+ data = data.decode(encoding)
105
+ file.close()
106
+ return data
107
+
108
+
109
+ ##
110
+ # Default loader used by lxml.etree - handles custom resolvers properly
111
+ #
112
+
113
+ def _lxml_default_loader(href, parse, encoding=None, parser=None):
114
+ if parse == "xml":
115
+ data = etree.parse(href, parser).getroot()
116
+ else:
117
+ if "://" in href:
118
+ f = urlopen(href)
119
+ else:
120
+ f = open(href, 'rb')
121
+ data = f.read()
122
+ f.close()
123
+ if not encoding:
124
+ encoding = 'utf-8'
125
+ data = data.decode(encoding)
126
+ return data
127
+
128
+
129
+ ##
130
+ # Wrapper for ET compatibility - drops the parser
131
+
132
+ def _wrap_et_loader(loader):
133
+ def load(href, parse, encoding=None, parser=None):
134
+ return loader(href, parse, encoding)
135
+ return load
136
+
137
+
138
+ ##
139
+ # Expand XInclude directives.
140
+ #
141
+ # @param elem Root element.
142
+ # @param loader Optional resource loader. If omitted, it defaults
143
+ # to {@link default_loader}. If given, it should be a callable
144
+ # that implements the same interface as <b>default_loader</b>.
145
+ # @param base_url The base URL of the original file, to resolve
146
+ # relative include file references.
147
+ # @param max_depth The maximum number of recursive inclusions.
148
+ # Limited to reduce the risk of malicious content explosion.
149
+ # Pass None to disable the limitation.
150
+ # @throws LimitedRecursiveIncludeError If the {@link max_depth} was exceeded.
151
+ # @throws FatalIncludeError If the function fails to include a given
152
+ # resource, or if the tree contains malformed XInclude elements.
153
+ # @throws IOError If the function fails to load a given resource.
154
+ # @returns the node or its replacement if it was an XInclude node
155
+
156
+ def include(elem, loader=None, base_url=None,
157
+ max_depth=DEFAULT_MAX_INCLUSION_DEPTH):
158
+ if max_depth is None:
159
+ max_depth = -1
160
+ elif max_depth < 0:
161
+ raise ValueError("expected non-negative depth or None for 'max_depth', got %r" % max_depth)
162
+
163
+ if base_url is None:
164
+ if hasattr(elem, 'getroot'):
165
+ tree = elem
166
+ elem = elem.getroot()
167
+ else:
168
+ tree = elem.getroottree()
169
+ if hasattr(tree, 'docinfo'):
170
+ base_url = tree.docinfo.URL
171
+ elif hasattr(elem, 'getroot'):
172
+ elem = elem.getroot()
173
+ _include(elem, loader, base_url, max_depth)
174
+
175
+
176
+ def _include(elem, loader=None, base_url=None,
177
+ max_depth=DEFAULT_MAX_INCLUSION_DEPTH, _parent_hrefs=None):
178
+ if loader is not None:
179
+ load_include = _wrap_et_loader(loader)
180
+ else:
181
+ load_include = _lxml_default_loader
182
+
183
+ if _parent_hrefs is None:
184
+ _parent_hrefs = set()
185
+
186
+ parser = elem.getroottree().parser
187
+
188
+ include_elements = list(
189
+ elem.iter(XINCLUDE_ITER_TAG))
190
+
191
+ for e in include_elements:
192
+ if e.tag == XINCLUDE_INCLUDE:
193
+ # process xinclude directive
194
+ href = urljoin(base_url, e.get("href"))
195
+ parse = e.get("parse", "xml")
196
+ parent = e.getparent()
197
+ if parse == "xml":
198
+ if href in _parent_hrefs:
199
+ raise FatalIncludeError(
200
+ "recursive include of %r detected" % href
201
+ )
202
+ if max_depth == 0:
203
+ raise LimitedRecursiveIncludeError(
204
+ "maximum xinclude depth reached when including file %s" % href)
205
+ node = load_include(href, parse, parser=parser)
206
+ if node is None:
207
+ raise FatalIncludeError(
208
+ "cannot load %r as %r" % (href, parse)
209
+ )
210
+ node = _include(node, loader, href, max_depth - 1, {href} | _parent_hrefs)
211
+ if e.tail:
212
+ node.tail = (node.tail or "") + e.tail
213
+ if parent is None:
214
+ return node # replaced the root node!
215
+ parent.replace(e, node)
216
+ elif parse == "text":
217
+ text = load_include(href, parse, encoding=e.get("encoding"))
218
+ if text is None:
219
+ raise FatalIncludeError(
220
+ "cannot load %r as %r" % (href, parse)
221
+ )
222
+ predecessor = e.getprevious()
223
+ if predecessor is not None:
224
+ predecessor.tail = (predecessor.tail or "") + text
225
+ elif parent is None:
226
+ return text # replaced the root node!
227
+ else:
228
+ parent.text = (parent.text or "") + text + (e.tail or "")
229
+ parent.remove(e)
230
+ else:
231
+ raise FatalIncludeError(
232
+ "unknown parse type in xi:include tag (%r)" % parse
233
+ )
234
+ elif e.tag == XINCLUDE_FALLBACK:
235
+ parent = e.getparent()
236
+ if parent is not None and parent.tag != XINCLUDE_INCLUDE:
237
+ raise FatalIncludeError(
238
+ "xi:fallback tag must be child of xi:include (%r)" % e.tag
239
+ )
240
+ else:
241
+ raise FatalIncludeError(
242
+ "Invalid element found in XInclude namespace (%r)" % e.tag
243
+ )
244
+ return elem
lxml/__init__.py ADDED
@@ -0,0 +1,22 @@
1
+ # this is a package
2
+
3
+ __version__ = "6.0.0"
4
+
5
+
6
+ def get_include():
7
+ """
8
+ Returns a list of header include paths (for lxml itself, libxml2
9
+ and libxslt) needed to compile C code against lxml if it was built
10
+ with statically linked libraries.
11
+ """
12
+ import os
13
+ lxml_path = __path__[0]
14
+ include_path = os.path.join(lxml_path, 'includes')
15
+ includes = [include_path, lxml_path]
16
+
17
+ for name in os.listdir(include_path):
18
+ path = os.path.join(include_path, name)
19
+ if os.path.isdir(path):
20
+ includes.append(path)
21
+
22
+ return includes
lxml/_elementpath.py ADDED
@@ -0,0 +1,343 @@
1
+ # cython: language_level=3
2
+
3
+ #
4
+ # ElementTree
5
+ # $Id: ElementPath.py 3375 2008-02-13 08:05:08Z fredrik $
6
+ #
7
+ # limited xpath support for element trees
8
+ #
9
+ # history:
10
+ # 2003-05-23 fl created
11
+ # 2003-05-28 fl added support for // etc
12
+ # 2003-08-27 fl fixed parsing of periods in element names
13
+ # 2007-09-10 fl new selection engine
14
+ # 2007-09-12 fl fixed parent selector
15
+ # 2007-09-13 fl added iterfind; changed findall to return a list
16
+ # 2007-11-30 fl added namespaces support
17
+ # 2009-10-30 fl added child element value filter
18
+ #
19
+ # Copyright (c) 2003-2009 by Fredrik Lundh. All rights reserved.
20
+ #
21
+ # fredrik@pythonware.com
22
+ # http://www.pythonware.com
23
+ #
24
+ # --------------------------------------------------------------------
25
+ # The ElementTree toolkit is
26
+ #
27
+ # Copyright (c) 1999-2009 by Fredrik Lundh
28
+ #
29
+ # By obtaining, using, and/or copying this software and/or its
30
+ # associated documentation, you agree that you have read, understood,
31
+ # and will comply with the following terms and conditions:
32
+ #
33
+ # Permission to use, copy, modify, and distribute this software and
34
+ # its associated documentation for any purpose and without fee is
35
+ # hereby granted, provided that the above copyright notice appears in
36
+ # all copies, and that both that copyright notice and this permission
37
+ # notice appear in supporting documentation, and that the name of
38
+ # Secret Labs AB or the author not be used in advertising or publicity
39
+ # pertaining to distribution of the software without specific, written
40
+ # prior permission.
41
+ #
42
+ # SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
43
+ # TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
44
+ # ABILITY AND FITNESS. IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
45
+ # BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
46
+ # DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
47
+ # WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
48
+ # ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
49
+ # OF THIS SOFTWARE.
50
+ # --------------------------------------------------------------------
51
+
52
+ ##
53
+ # Implementation module for XPath support. There's usually no reason
54
+ # to import this module directly; the <b>ElementTree</b> does this for
55
+ # you, if needed.
56
+ ##
57
+
58
+
59
+ import re
60
+
61
+ xpath_tokenizer_re = re.compile(
62
+ "("
63
+ "'[^']*'|\"[^\"]*\"|"
64
+ "::|"
65
+ "//?|"
66
+ r"\.\.|"
67
+ r"\(\)|"
68
+ r"[/.*:\[\]\(\)@=])|"
69
+ r"((?:\{[^}]+\})?[^/\[\]\(\)@=\s]+)|"
70
+ r"\s+"
71
+ )
72
+
73
+ def xpath_tokenizer(pattern, namespaces=None, with_prefixes=True):
74
+ # ElementTree uses '', lxml used None originally.
75
+ default_namespace = (namespaces.get(None) or namespaces.get('')) if namespaces else None
76
+ parsing_attribute = False
77
+ for token in xpath_tokenizer_re.findall(pattern):
78
+ ttype, tag = token
79
+ if tag and tag[0] != "{":
80
+ if ":" in tag and with_prefixes:
81
+ prefix, uri = tag.split(":", 1)
82
+ try:
83
+ if not namespaces:
84
+ raise KeyError
85
+ yield ttype, "{%s}%s" % (namespaces[prefix], uri)
86
+ except KeyError:
87
+ raise SyntaxError("prefix %r not found in prefix map" % prefix)
88
+ elif tag.isdecimal():
89
+ yield token # index
90
+ elif default_namespace and not parsing_attribute:
91
+ yield ttype, "{%s}%s" % (default_namespace, tag)
92
+ else:
93
+ yield token
94
+ parsing_attribute = False
95
+ else:
96
+ yield token
97
+ parsing_attribute = ttype == '@'
98
+
99
+
100
+ def prepare_child(next, token):
101
+ tag = token[1]
102
+ def select(result):
103
+ for elem in result:
104
+ yield from elem.iterchildren(tag)
105
+ return select
106
+
107
+ def prepare_star(next, token):
108
+ def select(result):
109
+ for elem in result:
110
+ yield from elem.iterchildren('*')
111
+ return select
112
+
113
+ def prepare_self(next, token):
114
+ def select(result):
115
+ return result
116
+ return select
117
+
118
+ def prepare_descendant(next, token):
119
+ token = next()
120
+ if token[0] == "*":
121
+ tag = "*"
122
+ elif not token[0]:
123
+ tag = token[1]
124
+ else:
125
+ raise SyntaxError("invalid descendant")
126
+ def select(result):
127
+ for elem in result:
128
+ yield from elem.iterdescendants(tag)
129
+ return select
130
+
131
+ def prepare_parent(next, token):
132
+ def select(result):
133
+ for elem in result:
134
+ parent = elem.getparent()
135
+ if parent is not None:
136
+ yield parent
137
+ return select
138
+
139
+ def prepare_predicate(next, token):
140
+ # FIXME: replace with real parser!!! refs:
141
+ # http://effbot.org/zone/simple-iterator-parser.htm
142
+ # http://javascript.crockford.com/tdop/tdop.html
143
+ signature = ''
144
+ predicate = []
145
+ while 1:
146
+ token = next()
147
+ if token[0] == "]":
148
+ break
149
+ if token == ('', ''):
150
+ # ignore whitespace
151
+ continue
152
+ if token[0] and token[0][:1] in "'\"":
153
+ token = "'", token[0][1:-1]
154
+ signature += token[0] or "-"
155
+ predicate.append(token[1])
156
+
157
+ # use signature to determine predicate type
158
+ if signature == "@-":
159
+ # [@attribute] predicate
160
+ key = predicate[1]
161
+ def select(result):
162
+ for elem in result:
163
+ if elem.get(key) is not None:
164
+ yield elem
165
+ return select
166
+ if signature == "@-='":
167
+ # [@attribute='value']
168
+ key = predicate[1]
169
+ value = predicate[-1]
170
+ def select(result):
171
+ for elem in result:
172
+ if elem.get(key) == value:
173
+ yield elem
174
+ return select
175
+ if signature == "-" and not re.match(r"-?\d+$", predicate[0]):
176
+ # [tag]
177
+ tag = predicate[0]
178
+ def select(result):
179
+ for elem in result:
180
+ for _ in elem.iterchildren(tag):
181
+ yield elem
182
+ break
183
+ return select
184
+ if signature == ".='" or (signature == "-='" and not re.match(r"-?\d+$", predicate[0])):
185
+ # [.='value'] or [tag='value']
186
+ tag = predicate[0]
187
+ value = predicate[-1]
188
+ if tag:
189
+ def select(result):
190
+ for elem in result:
191
+ for e in elem.iterchildren(tag):
192
+ if "".join(e.itertext()) == value:
193
+ yield elem
194
+ break
195
+ else:
196
+ def select(result):
197
+ for elem in result:
198
+ if "".join(elem.itertext()) == value:
199
+ yield elem
200
+ return select
201
+ if signature == "-" or signature == "-()" or signature == "-()-":
202
+ # [index] or [last()] or [last()-index]
203
+ if signature == "-":
204
+ # [index]
205
+ index = int(predicate[0]) - 1
206
+ if index < 0:
207
+ if index == -1:
208
+ raise SyntaxError(
209
+ "indices in path predicates are 1-based, not 0-based")
210
+ else:
211
+ raise SyntaxError("path index >= 1 expected")
212
+ else:
213
+ if predicate[0] != "last":
214
+ raise SyntaxError("unsupported function")
215
+ if signature == "-()-":
216
+ try:
217
+ index = int(predicate[2]) - 1
218
+ except ValueError:
219
+ raise SyntaxError("unsupported expression")
220
+ else:
221
+ index = -1
222
+ def select(result):
223
+ for elem in result:
224
+ parent = elem.getparent()
225
+ if parent is None:
226
+ continue
227
+ try:
228
+ # FIXME: what if the selector is "*" ?
229
+ elems = list(parent.iterchildren(elem.tag))
230
+ if elems[index] is elem:
231
+ yield elem
232
+ except IndexError:
233
+ pass
234
+ return select
235
+ raise SyntaxError("invalid predicate")
236
+
237
+ ops = {
238
+ "": prepare_child,
239
+ "*": prepare_star,
240
+ ".": prepare_self,
241
+ "..": prepare_parent,
242
+ "//": prepare_descendant,
243
+ "[": prepare_predicate,
244
+ }
245
+
246
+
247
+ # --------------------------------------------------------------------
248
+
249
+ _cache = {}
250
+
251
+
252
+ def _build_path_iterator(path, namespaces, with_prefixes=True):
253
+ """compile selector pattern"""
254
+ if path[-1:] == "/":
255
+ path += "*" # implicit all (FIXME: keep this?)
256
+
257
+ cache_key = (path,)
258
+ if namespaces:
259
+ # lxml originally used None for the default namespace but ElementTree uses the
260
+ # more convenient (all-strings-dict) empty string, so we support both here,
261
+ # preferring the more convenient '', as long as they aren't ambiguous.
262
+ if None in namespaces:
263
+ if '' in namespaces and namespaces[None] != namespaces['']:
264
+ raise ValueError("Ambiguous default namespace provided: %r versus %r" % (
265
+ namespaces[None], namespaces['']))
266
+ cache_key += (namespaces[None],) + tuple(sorted(
267
+ item for item in namespaces.items() if item[0] is not None))
268
+ else:
269
+ cache_key += tuple(sorted(namespaces.items()))
270
+
271
+ try:
272
+ return _cache[cache_key]
273
+ except KeyError:
274
+ pass
275
+ if len(_cache) > 100:
276
+ _cache.clear()
277
+
278
+ if path[:1] == "/":
279
+ raise SyntaxError("cannot use absolute path on element")
280
+ stream = iter(xpath_tokenizer(path, namespaces, with_prefixes=with_prefixes))
281
+ try:
282
+ _next = stream.next
283
+ except AttributeError:
284
+ # Python 3
285
+ _next = stream.__next__
286
+ try:
287
+ token = _next()
288
+ except StopIteration:
289
+ raise SyntaxError("empty path expression")
290
+ selector = []
291
+ while 1:
292
+ try:
293
+ selector.append(ops[token[0]](_next, token))
294
+ except StopIteration:
295
+ raise SyntaxError("invalid path")
296
+ try:
297
+ token = _next()
298
+ if token[0] == "/":
299
+ token = _next()
300
+ except StopIteration:
301
+ break
302
+ _cache[cache_key] = selector
303
+ return selector
304
+
305
+
306
+ ##
307
+ # Iterate over the matching nodes
308
+
309
+ def iterfind(elem, path, namespaces=None, with_prefixes=True):
310
+ selector = _build_path_iterator(path, namespaces, with_prefixes=with_prefixes)
311
+ result = iter((elem,))
312
+ for select in selector:
313
+ result = select(result)
314
+ return result
315
+
316
+
317
+ ##
318
+ # Find first matching object.
319
+
320
+ def find(elem, path, namespaces=None, with_prefixes=True):
321
+ it = iterfind(elem, path, namespaces, with_prefixes=with_prefixes)
322
+ try:
323
+ return next(it)
324
+ except StopIteration:
325
+ return None
326
+
327
+
328
+ ##
329
+ # Find all matching objects.
330
+
331
+ def findall(elem, path, namespaces=None, with_prefixes=True):
332
+ return list(iterfind(elem, path, namespaces))
333
+
334
+
335
+ ##
336
+ # Find text for first matching object.
337
+
338
+ def findtext(elem, path, default=None, namespaces=None, with_prefixes=True):
339
+ el = find(elem, path, namespaces, with_prefixes=with_prefixes)
340
+ if el is None:
341
+ return default
342
+ else:
343
+ return el.text or ''