lxml 6.0.0__cp39-cp39-musllinux_1_2_armv7l.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (174) hide show
  1. lxml/ElementInclude.py +244 -0
  2. lxml/__init__.py +22 -0
  3. lxml/_elementpath.cpython-39-arm-linux-gnueabihf.so +0 -0
  4. lxml/_elementpath.py +343 -0
  5. lxml/apihelpers.pxi +1801 -0
  6. lxml/builder.cpython-39-arm-linux-gnueabihf.so +0 -0
  7. lxml/builder.py +243 -0
  8. lxml/classlookup.pxi +580 -0
  9. lxml/cleanup.pxi +215 -0
  10. lxml/cssselect.py +101 -0
  11. lxml/debug.pxi +36 -0
  12. lxml/docloader.pxi +178 -0
  13. lxml/doctestcompare.py +488 -0
  14. lxml/dtd.pxi +479 -0
  15. lxml/etree.cpython-39-arm-linux-gnueabihf.so +0 -0
  16. lxml/etree.h +244 -0
  17. lxml/etree.pyx +3853 -0
  18. lxml/etree_api.h +204 -0
  19. lxml/extensions.pxi +830 -0
  20. lxml/html/ElementSoup.py +10 -0
  21. lxml/html/__init__.py +1927 -0
  22. lxml/html/_diffcommand.py +86 -0
  23. lxml/html/_difflib.cpython-39-arm-linux-gnueabihf.so +0 -0
  24. lxml/html/_difflib.py +2106 -0
  25. lxml/html/_html5builder.py +100 -0
  26. lxml/html/_setmixin.py +56 -0
  27. lxml/html/builder.py +173 -0
  28. lxml/html/clean.py +21 -0
  29. lxml/html/defs.py +135 -0
  30. lxml/html/diff.cpython-39-arm-linux-gnueabihf.so +0 -0
  31. lxml/html/diff.py +972 -0
  32. lxml/html/formfill.py +299 -0
  33. lxml/html/html5parser.py +260 -0
  34. lxml/html/soupparser.py +314 -0
  35. lxml/html/usedoctest.py +13 -0
  36. lxml/includes/__init__.pxd +0 -0
  37. lxml/includes/__init__.py +0 -0
  38. lxml/includes/c14n.pxd +25 -0
  39. lxml/includes/config.pxd +3 -0
  40. lxml/includes/dtdvalid.pxd +18 -0
  41. lxml/includes/etree_defs.h +379 -0
  42. lxml/includes/etreepublic.pxd +237 -0
  43. lxml/includes/extlibs/__init__.py +0 -0
  44. lxml/includes/extlibs/libcharset.h +45 -0
  45. lxml/includes/extlibs/localcharset.h +137 -0
  46. lxml/includes/extlibs/zconf.h +543 -0
  47. lxml/includes/extlibs/zlib.h +1938 -0
  48. lxml/includes/htmlparser.pxd +56 -0
  49. lxml/includes/libexslt/__init__.py +0 -0
  50. lxml/includes/libexslt/exslt.h +108 -0
  51. lxml/includes/libexslt/exsltconfig.h +70 -0
  52. lxml/includes/libexslt/exsltexports.h +63 -0
  53. lxml/includes/libxml/HTMLparser.h +339 -0
  54. lxml/includes/libxml/HTMLtree.h +148 -0
  55. lxml/includes/libxml/SAX.h +18 -0
  56. lxml/includes/libxml/SAX2.h +170 -0
  57. lxml/includes/libxml/__init__.py +0 -0
  58. lxml/includes/libxml/c14n.h +115 -0
  59. lxml/includes/libxml/catalog.h +183 -0
  60. lxml/includes/libxml/chvalid.h +230 -0
  61. lxml/includes/libxml/debugXML.h +79 -0
  62. lxml/includes/libxml/dict.h +82 -0
  63. lxml/includes/libxml/encoding.h +307 -0
  64. lxml/includes/libxml/entities.h +147 -0
  65. lxml/includes/libxml/globals.h +25 -0
  66. lxml/includes/libxml/hash.h +251 -0
  67. lxml/includes/libxml/list.h +137 -0
  68. lxml/includes/libxml/nanoftp.h +16 -0
  69. lxml/includes/libxml/nanohttp.h +98 -0
  70. lxml/includes/libxml/parser.h +1633 -0
  71. lxml/includes/libxml/parserInternals.h +591 -0
  72. lxml/includes/libxml/relaxng.h +224 -0
  73. lxml/includes/libxml/schemasInternals.h +959 -0
  74. lxml/includes/libxml/schematron.h +143 -0
  75. lxml/includes/libxml/threads.h +81 -0
  76. lxml/includes/libxml/tree.h +1326 -0
  77. lxml/includes/libxml/uri.h +106 -0
  78. lxml/includes/libxml/valid.h +485 -0
  79. lxml/includes/libxml/xinclude.h +141 -0
  80. lxml/includes/libxml/xlink.h +193 -0
  81. lxml/includes/libxml/xmlIO.h +419 -0
  82. lxml/includes/libxml/xmlautomata.h +163 -0
  83. lxml/includes/libxml/xmlerror.h +962 -0
  84. lxml/includes/libxml/xmlexports.h +96 -0
  85. lxml/includes/libxml/xmlmemory.h +188 -0
  86. lxml/includes/libxml/xmlmodule.h +61 -0
  87. lxml/includes/libxml/xmlreader.h +444 -0
  88. lxml/includes/libxml/xmlregexp.h +116 -0
  89. lxml/includes/libxml/xmlsave.h +111 -0
  90. lxml/includes/libxml/xmlschemas.h +254 -0
  91. lxml/includes/libxml/xmlschemastypes.h +152 -0
  92. lxml/includes/libxml/xmlstring.h +140 -0
  93. lxml/includes/libxml/xmlunicode.h +15 -0
  94. lxml/includes/libxml/xmlversion.h +332 -0
  95. lxml/includes/libxml/xmlwriter.h +489 -0
  96. lxml/includes/libxml/xpath.h +569 -0
  97. lxml/includes/libxml/xpathInternals.h +639 -0
  98. lxml/includes/libxml/xpointer.h +48 -0
  99. lxml/includes/libxslt/__init__.py +0 -0
  100. lxml/includes/libxslt/attributes.h +39 -0
  101. lxml/includes/libxslt/documents.h +93 -0
  102. lxml/includes/libxslt/extensions.h +262 -0
  103. lxml/includes/libxslt/extra.h +72 -0
  104. lxml/includes/libxslt/functions.h +78 -0
  105. lxml/includes/libxslt/imports.h +75 -0
  106. lxml/includes/libxslt/keys.h +53 -0
  107. lxml/includes/libxslt/namespaces.h +68 -0
  108. lxml/includes/libxslt/numbersInternals.h +73 -0
  109. lxml/includes/libxslt/pattern.h +84 -0
  110. lxml/includes/libxslt/preproc.h +43 -0
  111. lxml/includes/libxslt/security.h +104 -0
  112. lxml/includes/libxslt/templates.h +77 -0
  113. lxml/includes/libxslt/transform.h +207 -0
  114. lxml/includes/libxslt/variables.h +118 -0
  115. lxml/includes/libxslt/xslt.h +110 -0
  116. lxml/includes/libxslt/xsltInternals.h +1995 -0
  117. lxml/includes/libxslt/xsltconfig.h +146 -0
  118. lxml/includes/libxslt/xsltexports.h +64 -0
  119. lxml/includes/libxslt/xsltlocale.h +44 -0
  120. lxml/includes/libxslt/xsltutils.h +343 -0
  121. lxml/includes/lxml-version.h +3 -0
  122. lxml/includes/relaxng.pxd +64 -0
  123. lxml/includes/schematron.pxd +34 -0
  124. lxml/includes/tree.pxd +492 -0
  125. lxml/includes/uri.pxd +5 -0
  126. lxml/includes/xinclude.pxd +22 -0
  127. lxml/includes/xmlerror.pxd +852 -0
  128. lxml/includes/xmlparser.pxd +303 -0
  129. lxml/includes/xmlschema.pxd +35 -0
  130. lxml/includes/xpath.pxd +136 -0
  131. lxml/includes/xslt.pxd +190 -0
  132. lxml/isoschematron/__init__.py +348 -0
  133. lxml/isoschematron/resources/rng/iso-schematron.rng +709 -0
  134. lxml/isoschematron/resources/xsl/RNG2Schtrn.xsl +75 -0
  135. lxml/isoschematron/resources/xsl/XSD2Schtrn.xsl +77 -0
  136. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_abstract_expand.xsl +313 -0
  137. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_dsdl_include.xsl +1160 -0
  138. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_message.xsl +55 -0
  139. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_skeleton_for_xslt1.xsl +1796 -0
  140. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_svrl_for_xslt1.xsl +588 -0
  141. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/readme.txt +84 -0
  142. lxml/iterparse.pxi +438 -0
  143. lxml/lxml.etree.h +244 -0
  144. lxml/lxml.etree_api.h +204 -0
  145. lxml/nsclasses.pxi +281 -0
  146. lxml/objectify.cpython-39-arm-linux-gnueabihf.so +0 -0
  147. lxml/objectify.pyx +2149 -0
  148. lxml/objectpath.pxi +332 -0
  149. lxml/parser.pxi +2059 -0
  150. lxml/parsertarget.pxi +180 -0
  151. lxml/proxy.pxi +619 -0
  152. lxml/public-api.pxi +178 -0
  153. lxml/pyclasslookup.py +3 -0
  154. lxml/readonlytree.pxi +565 -0
  155. lxml/relaxng.pxi +165 -0
  156. lxml/sax.cpython-39-arm-linux-gnueabihf.so +0 -0
  157. lxml/sax.py +286 -0
  158. lxml/saxparser.pxi +875 -0
  159. lxml/schematron.pxi +173 -0
  160. lxml/serializer.pxi +1849 -0
  161. lxml/usedoctest.py +13 -0
  162. lxml/xinclude.pxi +67 -0
  163. lxml/xmlerror.pxi +1654 -0
  164. lxml/xmlid.pxi +179 -0
  165. lxml/xmlschema.pxi +215 -0
  166. lxml/xpath.pxi +487 -0
  167. lxml/xslt.pxi +957 -0
  168. lxml/xsltext.pxi +242 -0
  169. lxml-6.0.0.dist-info/METADATA +163 -0
  170. lxml-6.0.0.dist-info/RECORD +174 -0
  171. lxml-6.0.0.dist-info/WHEEL +5 -0
  172. lxml-6.0.0.dist-info/licenses/LICENSE.txt +31 -0
  173. lxml-6.0.0.dist-info/licenses/LICENSES.txt +29 -0
  174. lxml-6.0.0.dist-info/top_level.txt +1 -0
lxml/cleanup.pxi ADDED
@@ -0,0 +1,215 @@
1
+ # functions for tree cleanup and removing elements from subtrees
2
+
3
+ def cleanup_namespaces(tree_or_element, top_nsmap=None, keep_ns_prefixes=None):
4
+ """cleanup_namespaces(tree_or_element, top_nsmap=None, keep_ns_prefixes=None)
5
+
6
+ Remove all namespace declarations from a subtree that are not used
7
+ by any of the elements or attributes in that tree.
8
+
9
+ If a 'top_nsmap' is provided, it must be a mapping from prefixes
10
+ to namespace URIs. These namespaces will be declared on the top
11
+ element of the subtree before running the cleanup, which allows
12
+ moving namespace declarations to the top of the tree.
13
+
14
+ If a 'keep_ns_prefixes' is provided, it must be a list of prefixes.
15
+ These prefixes will not be removed as part of the cleanup.
16
+ """
17
+ element = _rootNodeOrRaise(tree_or_element)
18
+ c_element = element._c_node
19
+
20
+ if top_nsmap:
21
+ doc = element._doc
22
+ # declare namespaces from nsmap, then apply them to the subtree
23
+ _setNodeNamespaces(c_element, doc, None, top_nsmap)
24
+ moveNodeToDocument(doc, c_element.doc, c_element)
25
+
26
+ keep_ns_prefixes = (
27
+ set([_utf8(prefix) for prefix in keep_ns_prefixes])
28
+ if keep_ns_prefixes else None)
29
+
30
+ _removeUnusedNamespaceDeclarations(c_element, keep_ns_prefixes)
31
+
32
+
33
+ def strip_attributes(tree_or_element, *attribute_names):
34
+ """strip_attributes(tree_or_element, *attribute_names)
35
+
36
+ Delete all attributes with the provided attribute names from an
37
+ Element (or ElementTree) and its descendants.
38
+
39
+ Attribute names can contain wildcards as in `_Element.iter`.
40
+
41
+ Example usage::
42
+
43
+ strip_attributes(root_element,
44
+ 'simpleattr',
45
+ '{http://some/ns}attrname',
46
+ '{http://other/ns}*')
47
+ """
48
+ cdef _MultiTagMatcher matcher
49
+ element = _rootNodeOrRaise(tree_or_element)
50
+ if not attribute_names:
51
+ return
52
+
53
+ matcher = _MultiTagMatcher.__new__(_MultiTagMatcher, attribute_names)
54
+ matcher.cacheTags(element._doc)
55
+ if matcher.rejectsAllAttributes():
56
+ return
57
+ _strip_attributes(element._c_node, matcher)
58
+
59
+
60
+ cdef _strip_attributes(xmlNode* c_node, _MultiTagMatcher matcher):
61
+ cdef xmlAttr* c_attr
62
+ cdef xmlAttr* c_next_attr
63
+ tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1)
64
+ if c_node.type == tree.XML_ELEMENT_NODE:
65
+ c_attr = c_node.properties
66
+ while c_attr is not NULL:
67
+ c_next_attr = c_attr.next
68
+ if matcher.matchesAttribute(c_attr):
69
+ tree.xmlRemoveProp(c_attr)
70
+ c_attr = c_next_attr
71
+ tree.END_FOR_EACH_ELEMENT_FROM(c_node)
72
+
73
+
74
+ def strip_elements(tree_or_element, *tag_names, bint with_tail=True):
75
+ """strip_elements(tree_or_element, *tag_names, with_tail=True)
76
+
77
+ Delete all elements with the provided tag names from a tree or
78
+ subtree. This will remove the elements and their entire subtree,
79
+ including all their attributes, text content and descendants. It
80
+ will also remove the tail text of the element unless you
81
+ explicitly set the ``with_tail`` keyword argument option to False.
82
+
83
+ Tag names can contain wildcards as in `_Element.iter`.
84
+
85
+ Note that this will not delete the element (or ElementTree root
86
+ element) that you passed even if it matches. It will only treat
87
+ its descendants. If you want to include the root element, check
88
+ its tag name directly before even calling this function.
89
+
90
+ Example usage::
91
+
92
+ strip_elements(some_element,
93
+ 'simpletagname', # non-namespaced tag
94
+ '{http://some/ns}tagname', # namespaced tag
95
+ '{http://some/other/ns}*' # any tag from a namespace
96
+ lxml.etree.Comment # comments
97
+ )
98
+ """
99
+ cdef _MultiTagMatcher matcher
100
+ doc = _documentOrRaise(tree_or_element)
101
+ element = _rootNodeOrRaise(tree_or_element)
102
+ if not tag_names:
103
+ return
104
+
105
+ matcher = _MultiTagMatcher.__new__(_MultiTagMatcher, tag_names)
106
+ matcher.cacheTags(doc)
107
+ if matcher.rejectsAll():
108
+ return
109
+
110
+ if isinstance(tree_or_element, _ElementTree):
111
+ # include PIs and comments next to the root node
112
+ if matcher.matchesType(tree.XML_COMMENT_NODE):
113
+ _removeSiblings(element._c_node, tree.XML_COMMENT_NODE, with_tail)
114
+ if matcher.matchesType(tree.XML_PI_NODE):
115
+ _removeSiblings(element._c_node, tree.XML_PI_NODE, with_tail)
116
+ _strip_elements(doc, element._c_node, matcher, with_tail)
117
+
118
+ cdef _strip_elements(_Document doc, xmlNode* c_node, _MultiTagMatcher matcher,
119
+ bint with_tail):
120
+ cdef xmlNode* c_child
121
+ cdef xmlNode* c_next
122
+
123
+ tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1)
124
+ if c_node.type == tree.XML_ELEMENT_NODE:
125
+ # we run through the children here to prevent any problems
126
+ # with the tree iteration which would occur if we unlinked the
127
+ # c_node itself
128
+ c_child = _findChildForwards(c_node, 0)
129
+ while c_child is not NULL:
130
+ c_next = _nextElement(c_child)
131
+ if matcher.matches(c_child):
132
+ if c_child.type == tree.XML_ELEMENT_NODE:
133
+ if not with_tail:
134
+ tree.xmlUnlinkNode(c_child)
135
+ _removeNode(doc, c_child)
136
+ else:
137
+ if with_tail:
138
+ _removeText(c_child.next)
139
+ tree.xmlUnlinkNode(c_child)
140
+ attemptDeallocation(c_child)
141
+ c_child = c_next
142
+ tree.END_FOR_EACH_ELEMENT_FROM(c_node)
143
+
144
+
145
+ def strip_tags(tree_or_element, *tag_names):
146
+ """strip_tags(tree_or_element, *tag_names)
147
+
148
+ Delete all elements with the provided tag names from a tree or
149
+ subtree. This will remove the elements and their attributes, but
150
+ *not* their text/tail content or descendants. Instead, it will
151
+ merge the text content and children of the element into its
152
+ parent.
153
+
154
+ Tag names can contain wildcards as in `_Element.iter`.
155
+
156
+ Note that this will not delete the element (or ElementTree root
157
+ element) that you passed even if it matches. It will only treat
158
+ its descendants.
159
+
160
+ Example usage::
161
+
162
+ strip_tags(some_element,
163
+ 'simpletagname', # non-namespaced tag
164
+ '{http://some/ns}tagname', # namespaced tag
165
+ '{http://some/other/ns}*' # any tag from a namespace
166
+ Comment # comments (including their text!)
167
+ )
168
+ """
169
+ cdef _MultiTagMatcher matcher
170
+ doc = _documentOrRaise(tree_or_element)
171
+ element = _rootNodeOrRaise(tree_or_element)
172
+ if not tag_names:
173
+ return
174
+
175
+ matcher = _MultiTagMatcher.__new__(_MultiTagMatcher, tag_names)
176
+ matcher.cacheTags(doc)
177
+ if matcher.rejectsAll():
178
+ return
179
+
180
+ if isinstance(tree_or_element, _ElementTree):
181
+ # include PIs and comments next to the root node
182
+ if matcher.matchesType(tree.XML_COMMENT_NODE):
183
+ _removeSiblings(element._c_node, tree.XML_COMMENT_NODE, 0)
184
+ if matcher.matchesType(tree.XML_PI_NODE):
185
+ _removeSiblings(element._c_node, tree.XML_PI_NODE, 0)
186
+ _strip_tags(doc, element._c_node, matcher)
187
+
188
+ cdef _strip_tags(_Document doc, xmlNode* c_node, _MultiTagMatcher matcher):
189
+ cdef xmlNode* c_child
190
+ cdef xmlNode* c_next
191
+
192
+ tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1)
193
+ if c_node.type == tree.XML_ELEMENT_NODE:
194
+ # we run through the children here to prevent any problems
195
+ # with the tree iteration which would occur if we unlinked the
196
+ # c_node itself
197
+ c_child = _findChildForwards(c_node, 0)
198
+ while c_child is not NULL:
199
+ if not matcher.matches(c_child):
200
+ c_child = _nextElement(c_child)
201
+ continue
202
+ if c_child.type == tree.XML_ELEMENT_NODE:
203
+ c_next = _findChildForwards(c_child, 0) or _nextElement(c_child)
204
+ _replaceNodeByChildren(doc, c_child)
205
+ if not attemptDeallocation(c_child):
206
+ if c_child.nsDef is not NULL:
207
+ # make namespaces absolute
208
+ moveNodeToDocument(doc, doc._c_doc, c_child)
209
+ c_child = c_next
210
+ else:
211
+ c_next = _nextElement(c_child)
212
+ tree.xmlUnlinkNode(c_child)
213
+ attemptDeallocation(c_child)
214
+ c_child = c_next
215
+ tree.END_FOR_EACH_ELEMENT_FROM(c_node)
lxml/cssselect.py ADDED
@@ -0,0 +1,101 @@
1
+ """CSS Selectors based on XPath.
2
+
3
+ This module supports selecting XML/HTML tags based on CSS selectors.
4
+ See the `CSSSelector` class for details.
5
+
6
+ This is a thin wrapper around cssselect 0.7 or later.
7
+ """
8
+
9
+
10
+ from . import etree
11
+ try:
12
+ import cssselect as external_cssselect
13
+ except ImportError:
14
+ raise ImportError(
15
+ 'cssselect does not seem to be installed. '
16
+ 'See https://pypi.org/project/cssselect/')
17
+
18
+
19
+ SelectorSyntaxError = external_cssselect.SelectorSyntaxError
20
+ ExpressionError = external_cssselect.ExpressionError
21
+ SelectorError = external_cssselect.SelectorError
22
+
23
+
24
+ __all__ = ['SelectorSyntaxError', 'ExpressionError', 'SelectorError',
25
+ 'CSSSelector']
26
+
27
+
28
+ class LxmlTranslator(external_cssselect.GenericTranslator):
29
+ """
30
+ A custom CSS selector to XPath translator with lxml-specific extensions.
31
+ """
32
+ def xpath_contains_function(self, xpath, function):
33
+ # Defined there, removed in later drafts:
34
+ # http://www.w3.org/TR/2001/CR-css3-selectors-20011113/#content-selectors
35
+ if function.argument_types() not in (['STRING'], ['IDENT']):
36
+ raise ExpressionError(
37
+ "Expected a single string or ident for :contains(), got %r"
38
+ % function.arguments)
39
+ value = function.arguments[0].value
40
+ return xpath.add_condition(
41
+ 'contains(__lxml_internal_css:lower-case(string(.)), %s)'
42
+ % self.xpath_literal(value.lower()))
43
+
44
+
45
+ class LxmlHTMLTranslator(LxmlTranslator, external_cssselect.HTMLTranslator):
46
+ """
47
+ lxml extensions + HTML support.
48
+ """
49
+
50
+
51
+ def _make_lower_case(context, s):
52
+ return s.lower()
53
+
54
+ ns = etree.FunctionNamespace('http://codespeak.net/lxml/css/')
55
+ ns.prefix = '__lxml_internal_css'
56
+ ns['lower-case'] = _make_lower_case
57
+
58
+
59
+ class CSSSelector(etree.XPath):
60
+ """A CSS selector.
61
+
62
+ Usage::
63
+
64
+ >>> from lxml import etree, cssselect
65
+ >>> select = cssselect.CSSSelector("a tag > child")
66
+
67
+ >>> root = etree.XML("<a><b><c/><tag><child>TEXT</child></tag></b></a>")
68
+ >>> [ el.tag for el in select(root) ]
69
+ ['child']
70
+
71
+ To use CSS namespaces, you need to pass a prefix-to-namespace
72
+ mapping as ``namespaces`` keyword argument::
73
+
74
+ >>> rdfns = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
75
+ >>> select_ns = cssselect.CSSSelector('root > rdf|Description',
76
+ ... namespaces={'rdf': rdfns})
77
+
78
+ >>> rdf = etree.XML((
79
+ ... '<root xmlns:rdf="%s">'
80
+ ... '<rdf:Description>blah</rdf:Description>'
81
+ ... '</root>') % rdfns)
82
+ >>> [(el.tag, el.text) for el in select_ns(rdf)]
83
+ [('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description', 'blah')]
84
+
85
+ """
86
+ def __init__(self, css, namespaces=None, translator='xml'):
87
+ if translator == 'xml':
88
+ translator = LxmlTranslator()
89
+ elif translator == 'html':
90
+ translator = LxmlHTMLTranslator()
91
+ elif translator == 'xhtml':
92
+ translator = LxmlHTMLTranslator(xhtml=True)
93
+ path = translator.css_to_xpath(css)
94
+ super().__init__(path, namespaces=namespaces)
95
+ self.css = css
96
+
97
+ def __repr__(self):
98
+ return '<%s %x for %r>' % (
99
+ self.__class__.__name__,
100
+ abs(id(self)),
101
+ self.css)
lxml/debug.pxi ADDED
@@ -0,0 +1,36 @@
1
+ @cython.final
2
+ @cython.internal
3
+ cdef class _MemDebug:
4
+ """Debugging support for the memory allocation in libxml2.
5
+ """
6
+ def bytes_used(self):
7
+ """bytes_used(self)
8
+
9
+ Returns the total amount of memory (in bytes) currently used by libxml2.
10
+ Note that libxml2 constrains this value to a C int, which limits
11
+ the accuracy on 64 bit systems.
12
+ """
13
+ return tree.xmlMemUsed()
14
+
15
+ def blocks_used(self):
16
+ """blocks_used(self)
17
+
18
+ Returns the total number of memory blocks currently allocated by libxml2.
19
+ Note that libxml2 constrains this value to a C int, which limits
20
+ the accuracy on 64 bit systems.
21
+ """
22
+ return tree.xmlMemBlocks()
23
+
24
+ def dict_size(self):
25
+ """dict_size(self)
26
+
27
+ Returns the current size of the global name dictionary used by libxml2
28
+ for the current thread. Each thread has its own dictionary.
29
+ """
30
+ c_dict = __GLOBAL_PARSER_CONTEXT._getThreadDict(NULL)
31
+ if c_dict is NULL:
32
+ raise MemoryError()
33
+ return tree.xmlDictSize(c_dict)
34
+
35
+
36
+ memory_debugger = _MemDebug()
lxml/docloader.pxi ADDED
@@ -0,0 +1,178 @@
1
+ # Custom resolver API
2
+
3
+ ctypedef enum _InputDocumentDataType:
4
+ PARSER_DATA_INVALID
5
+ PARSER_DATA_EMPTY
6
+ PARSER_DATA_STRING
7
+ PARSER_DATA_FILENAME
8
+ PARSER_DATA_FILE
9
+
10
+ @cython.final
11
+ @cython.internal
12
+ cdef class _InputDocument:
13
+ cdef _InputDocumentDataType _type
14
+ cdef bytes _data_bytes
15
+ cdef object _filename
16
+ cdef object _file
17
+ cdef bint _close_file
18
+
19
+ def __cinit__(self):
20
+ self._type = PARSER_DATA_INVALID
21
+
22
+
23
+ cdef class Resolver:
24
+ "This is the base class of all resolvers."
25
+ def resolve(self, system_url, public_id, context):
26
+ """resolve(self, system_url, public_id, context)
27
+
28
+ Override this method to resolve an external source by
29
+ ``system_url`` and ``public_id``. The third argument is an
30
+ opaque context object.
31
+
32
+ Return the result of one of the ``resolve_*()`` methods.
33
+ """
34
+ return None
35
+
36
+ def resolve_empty(self, context):
37
+ """resolve_empty(self, context)
38
+
39
+ Return an empty input document.
40
+
41
+ Pass context as parameter.
42
+ """
43
+ cdef _InputDocument doc_ref
44
+ doc_ref = _InputDocument()
45
+ doc_ref._type = PARSER_DATA_EMPTY
46
+ return doc_ref
47
+
48
+ def resolve_string(self, string, context, *, base_url=None):
49
+ """resolve_string(self, string, context, base_url=None)
50
+
51
+ Return a parsable string as input document.
52
+
53
+ Pass data string and context as parameters. You can pass the
54
+ source URL or filename through the ``base_url`` keyword
55
+ argument.
56
+ """
57
+ cdef _InputDocument doc_ref
58
+ if isinstance(string, unicode):
59
+ string = (<unicode>string).encode('utf8')
60
+ elif not isinstance(string, bytes):
61
+ raise TypeError, "argument must be a byte string or unicode string"
62
+ doc_ref = _InputDocument()
63
+ doc_ref._type = PARSER_DATA_STRING
64
+ doc_ref._data_bytes = string
65
+ if base_url is not None:
66
+ doc_ref._filename = _encodeFilename(base_url)
67
+ return doc_ref
68
+
69
+ def resolve_filename(self, filename, context):
70
+ """resolve_filename(self, filename, context)
71
+
72
+ Return the name of a parsable file as input document.
73
+
74
+ Pass filename and context as parameters. You can also pass a
75
+ URL with an HTTP, FTP or file target.
76
+ """
77
+ cdef _InputDocument doc_ref
78
+ doc_ref = _InputDocument()
79
+ doc_ref._type = PARSER_DATA_FILENAME
80
+ doc_ref._filename = _encodeFilename(filename)
81
+ return doc_ref
82
+
83
+ def resolve_file(self, f, context, *, base_url=None, bint close=True):
84
+ """resolve_file(self, f, context, base_url=None, close=True)
85
+
86
+ Return an open file-like object as input document.
87
+
88
+ Pass open file and context as parameters. You can pass the
89
+ base URL or filename of the file through the ``base_url``
90
+ keyword argument. If the ``close`` flag is True (the
91
+ default), the file will be closed after reading.
92
+
93
+ Note that using ``.resolve_filename()`` is more efficient,
94
+ especially in threaded environments.
95
+ """
96
+ cdef _InputDocument doc_ref
97
+ try:
98
+ f.read
99
+ except AttributeError:
100
+ raise TypeError, "Argument is not a file-like object"
101
+ doc_ref = _InputDocument()
102
+ doc_ref._type = PARSER_DATA_FILE
103
+ if base_url is not None:
104
+ doc_ref._filename = _encodeFilename(base_url)
105
+ else:
106
+ doc_ref._filename = _getFilenameForFile(f)
107
+ doc_ref._close_file = close
108
+ doc_ref._file = f
109
+ return doc_ref
110
+
111
+ @cython.final
112
+ @cython.internal
113
+ cdef class _ResolverRegistry:
114
+ cdef object _resolvers
115
+ cdef Resolver _default_resolver
116
+ def __cinit__(self, Resolver default_resolver=None):
117
+ self._resolvers = set()
118
+ self._default_resolver = default_resolver
119
+
120
+ def add(self, Resolver resolver not None):
121
+ """add(self, resolver)
122
+
123
+ Register a resolver.
124
+
125
+ For each requested entity, the 'resolve' method of the resolver will
126
+ be called and the result will be passed to the parser. If this method
127
+ returns None, the request will be delegated to other resolvers or the
128
+ default resolver. The resolvers will be tested in an arbitrary order
129
+ until the first match is found.
130
+ """
131
+ self._resolvers.add(resolver)
132
+
133
+ def remove(self, resolver):
134
+ "remove(self, resolver)"
135
+ self._resolvers.discard(resolver)
136
+
137
+ cdef _ResolverRegistry _copy(self):
138
+ cdef _ResolverRegistry registry
139
+ registry = _ResolverRegistry(self._default_resolver)
140
+ registry._resolvers = self._resolvers.copy()
141
+ return registry
142
+
143
+ def copy(self):
144
+ "copy(self)"
145
+ return self._copy()
146
+
147
+ def resolve(self, system_url, public_id, context):
148
+ "resolve(self, system_url, public_id, context)"
149
+ for resolver in self._resolvers:
150
+ result = resolver.resolve(system_url, public_id, context)
151
+ if result is not None:
152
+ return result
153
+ if self._default_resolver is None:
154
+ return None
155
+ return self._default_resolver.resolve(system_url, public_id, context)
156
+
157
+ def __repr__(self):
158
+ return repr(self._resolvers)
159
+
160
+
161
+ @cython.internal
162
+ cdef class _ResolverContext(_ExceptionContext):
163
+ cdef _ResolverRegistry _resolvers
164
+ cdef _TempStore _storage
165
+
166
+ cdef int clear(self) except -1:
167
+ _ExceptionContext.clear(self)
168
+ self._storage.clear()
169
+ return 0
170
+
171
+
172
+ cdef _initResolverContext(_ResolverContext context,
173
+ _ResolverRegistry resolvers):
174
+ if resolvers is None:
175
+ context._resolvers = _ResolverRegistry()
176
+ else:
177
+ context._resolvers = resolvers
178
+ context._storage = _TempStore()