lxml 5.2.0__cp310-cp310-win32.whl → 5.2.2__cp310-cp310-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (89) hide show
  1. lxml/ElementInclude.py +244 -244
  2. lxml/__init__.py +22 -22
  3. lxml/_elementpath.cp310-win32.pyd +0 -0
  4. lxml/_elementpath.py +341 -341
  5. lxml/apihelpers.pxi +1793 -1793
  6. lxml/builder.cp310-win32.pyd +0 -0
  7. lxml/builder.py +232 -232
  8. lxml/classlookup.pxi +580 -580
  9. lxml/cleanup.pxi +215 -215
  10. lxml/cssselect.py +101 -101
  11. lxml/debug.pxi +90 -90
  12. lxml/docloader.pxi +178 -178
  13. lxml/doctestcompare.py +488 -488
  14. lxml/dtd.pxi +478 -478
  15. lxml/etree.cp310-win32.pyd +0 -0
  16. lxml/etree.h +6 -6
  17. lxml/etree.pyx +3732 -3711
  18. lxml/extensions.pxi +833 -833
  19. lxml/html/ElementSoup.py +10 -10
  20. lxml/html/__init__.py +1923 -1923
  21. lxml/html/_diffcommand.py +86 -86
  22. lxml/html/_html5builder.py +100 -100
  23. lxml/html/_setmixin.py +56 -56
  24. lxml/html/builder.py +133 -133
  25. lxml/html/clean.py +21 -21
  26. lxml/html/defs.py +135 -135
  27. lxml/html/diff.cp310-win32.pyd +0 -0
  28. lxml/html/diff.py +878 -878
  29. lxml/html/formfill.py +299 -299
  30. lxml/html/html5parser.py +260 -260
  31. lxml/html/soupparser.py +314 -314
  32. lxml/html/usedoctest.py +13 -13
  33. lxml/includes/c14n.pxd +25 -25
  34. lxml/includes/config.pxd +3 -3
  35. lxml/includes/dtdvalid.pxd +18 -18
  36. lxml/includes/etree_defs.h +379 -379
  37. lxml/includes/etreepublic.pxd +237 -237
  38. lxml/includes/htmlparser.pxd +56 -56
  39. lxml/includes/lxml-version.h +1 -1
  40. lxml/includes/relaxng.pxd +64 -64
  41. lxml/includes/schematron.pxd +34 -34
  42. lxml/includes/tree.pxd +494 -494
  43. lxml/includes/uri.pxd +5 -5
  44. lxml/includes/xinclude.pxd +22 -22
  45. lxml/includes/xmlerror.pxd +852 -852
  46. lxml/includes/xmlparser.pxd +265 -265
  47. lxml/includes/xmlschema.pxd +35 -35
  48. lxml/includes/xpath.pxd +136 -136
  49. lxml/includes/xslt.pxd +190 -190
  50. lxml/isoschematron/__init__.py +348 -348
  51. lxml/isoschematron/resources/rng/iso-schematron.rng +709 -709
  52. lxml/isoschematron/resources/xsl/RNG2Schtrn.xsl +75 -75
  53. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_abstract_expand.xsl +312 -312
  54. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_dsdl_include.xsl +1159 -1159
  55. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_message.xsl +54 -54
  56. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_schematron_skeleton_for_xslt1.xsl +1796 -1796
  57. lxml/isoschematron/resources/xsl/iso-schematron-xslt1/iso_svrl_for_xslt1.xsl +588 -588
  58. lxml/iterparse.pxi +438 -438
  59. lxml/lxml.etree.h +6 -6
  60. lxml/nsclasses.pxi +281 -281
  61. lxml/objectify.cp310-win32.pyd +0 -0
  62. lxml/objectify.pyx +2145 -2145
  63. lxml/objectpath.pxi +332 -332
  64. lxml/parser.pxi +1994 -1994
  65. lxml/parsertarget.pxi +180 -180
  66. lxml/proxy.pxi +619 -619
  67. lxml/public-api.pxi +178 -178
  68. lxml/pyclasslookup.py +3 -3
  69. lxml/readonlytree.pxi +565 -565
  70. lxml/relaxng.pxi +165 -165
  71. lxml/sax.cp310-win32.pyd +0 -0
  72. lxml/sax.py +275 -275
  73. lxml/saxparser.pxi +875 -875
  74. lxml/schematron.pxi +168 -168
  75. lxml/serializer.pxi +1871 -1871
  76. lxml/usedoctest.py +13 -13
  77. lxml/xinclude.pxi +67 -67
  78. lxml/xmlerror.pxi +1654 -1654
  79. lxml/xmlid.pxi +179 -179
  80. lxml/xmlschema.pxi +215 -215
  81. lxml/xpath.pxi +487 -487
  82. lxml/xslt.pxi +950 -950
  83. lxml/xsltext.pxi +242 -242
  84. {lxml-5.2.0.dist-info → lxml-5.2.2.dist-info}/LICENSE.txt +29 -29
  85. {lxml-5.2.0.dist-info → lxml-5.2.2.dist-info}/LICENSES.txt +29 -29
  86. {lxml-5.2.0.dist-info → lxml-5.2.2.dist-info}/METADATA +9 -17
  87. {lxml-5.2.0.dist-info → lxml-5.2.2.dist-info}/RECORD +89 -89
  88. {lxml-5.2.0.dist-info → lxml-5.2.2.dist-info}/WHEEL +0 -0
  89. {lxml-5.2.0.dist-info → lxml-5.2.2.dist-info}/top_level.txt +0 -0
lxml/cleanup.pxi CHANGED
@@ -1,215 +1,215 @@
1
- # functions for tree cleanup and removing elements from subtrees
2
-
3
- def cleanup_namespaces(tree_or_element, top_nsmap=None, keep_ns_prefixes=None):
4
- """cleanup_namespaces(tree_or_element, top_nsmap=None, keep_ns_prefixes=None)
5
-
6
- Remove all namespace declarations from a subtree that are not used
7
- by any of the elements or attributes in that tree.
8
-
9
- If a 'top_nsmap' is provided, it must be a mapping from prefixes
10
- to namespace URIs. These namespaces will be declared on the top
11
- element of the subtree before running the cleanup, which allows
12
- moving namespace declarations to the top of the tree.
13
-
14
- If a 'keep_ns_prefixes' is provided, it must be a list of prefixes.
15
- These prefixes will not be removed as part of the cleanup.
16
- """
17
- element = _rootNodeOrRaise(tree_or_element)
18
- c_element = element._c_node
19
-
20
- if top_nsmap:
21
- doc = element._doc
22
- # declare namespaces from nsmap, then apply them to the subtree
23
- _setNodeNamespaces(c_element, doc, None, top_nsmap)
24
- moveNodeToDocument(doc, c_element.doc, c_element)
25
-
26
- keep_ns_prefixes = (
27
- set([_utf8(prefix) for prefix in keep_ns_prefixes])
28
- if keep_ns_prefixes else None)
29
-
30
- _removeUnusedNamespaceDeclarations(c_element, keep_ns_prefixes)
31
-
32
-
33
- def strip_attributes(tree_or_element, *attribute_names):
34
- """strip_attributes(tree_or_element, *attribute_names)
35
-
36
- Delete all attributes with the provided attribute names from an
37
- Element (or ElementTree) and its descendants.
38
-
39
- Attribute names can contain wildcards as in `_Element.iter`.
40
-
41
- Example usage::
42
-
43
- strip_attributes(root_element,
44
- 'simpleattr',
45
- '{http://some/ns}attrname',
46
- '{http://other/ns}*')
47
- """
48
- cdef _MultiTagMatcher matcher
49
- element = _rootNodeOrRaise(tree_or_element)
50
- if not attribute_names:
51
- return
52
-
53
- matcher = _MultiTagMatcher.__new__(_MultiTagMatcher, attribute_names)
54
- matcher.cacheTags(element._doc)
55
- if matcher.rejectsAllAttributes():
56
- return
57
- _strip_attributes(element._c_node, matcher)
58
-
59
-
60
- cdef _strip_attributes(xmlNode* c_node, _MultiTagMatcher matcher):
61
- cdef xmlAttr* c_attr
62
- cdef xmlAttr* c_next_attr
63
- tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1)
64
- if c_node.type == tree.XML_ELEMENT_NODE:
65
- c_attr = c_node.properties
66
- while c_attr is not NULL:
67
- c_next_attr = c_attr.next
68
- if matcher.matchesAttribute(c_attr):
69
- tree.xmlRemoveProp(c_attr)
70
- c_attr = c_next_attr
71
- tree.END_FOR_EACH_ELEMENT_FROM(c_node)
72
-
73
-
74
- def strip_elements(tree_or_element, *tag_names, bint with_tail=True):
75
- """strip_elements(tree_or_element, *tag_names, with_tail=True)
76
-
77
- Delete all elements with the provided tag names from a tree or
78
- subtree. This will remove the elements and their entire subtree,
79
- including all their attributes, text content and descendants. It
80
- will also remove the tail text of the element unless you
81
- explicitly set the ``with_tail`` keyword argument option to False.
82
-
83
- Tag names can contain wildcards as in `_Element.iter`.
84
-
85
- Note that this will not delete the element (or ElementTree root
86
- element) that you passed even if it matches. It will only treat
87
- its descendants. If you want to include the root element, check
88
- its tag name directly before even calling this function.
89
-
90
- Example usage::
91
-
92
- strip_elements(some_element,
93
- 'simpletagname', # non-namespaced tag
94
- '{http://some/ns}tagname', # namespaced tag
95
- '{http://some/other/ns}*' # any tag from a namespace
96
- lxml.etree.Comment # comments
97
- )
98
- """
99
- cdef _MultiTagMatcher matcher
100
- doc = _documentOrRaise(tree_or_element)
101
- element = _rootNodeOrRaise(tree_or_element)
102
- if not tag_names:
103
- return
104
-
105
- matcher = _MultiTagMatcher.__new__(_MultiTagMatcher, tag_names)
106
- matcher.cacheTags(doc)
107
- if matcher.rejectsAll():
108
- return
109
-
110
- if isinstance(tree_or_element, _ElementTree):
111
- # include PIs and comments next to the root node
112
- if matcher.matchesType(tree.XML_COMMENT_NODE):
113
- _removeSiblings(element._c_node, tree.XML_COMMENT_NODE, with_tail)
114
- if matcher.matchesType(tree.XML_PI_NODE):
115
- _removeSiblings(element._c_node, tree.XML_PI_NODE, with_tail)
116
- _strip_elements(doc, element._c_node, matcher, with_tail)
117
-
118
- cdef _strip_elements(_Document doc, xmlNode* c_node, _MultiTagMatcher matcher,
119
- bint with_tail):
120
- cdef xmlNode* c_child
121
- cdef xmlNode* c_next
122
-
123
- tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1)
124
- if c_node.type == tree.XML_ELEMENT_NODE:
125
- # we run through the children here to prevent any problems
126
- # with the tree iteration which would occur if we unlinked the
127
- # c_node itself
128
- c_child = _findChildForwards(c_node, 0)
129
- while c_child is not NULL:
130
- c_next = _nextElement(c_child)
131
- if matcher.matches(c_child):
132
- if c_child.type == tree.XML_ELEMENT_NODE:
133
- if not with_tail:
134
- tree.xmlUnlinkNode(c_child)
135
- _removeNode(doc, c_child)
136
- else:
137
- if with_tail:
138
- _removeText(c_child.next)
139
- tree.xmlUnlinkNode(c_child)
140
- attemptDeallocation(c_child)
141
- c_child = c_next
142
- tree.END_FOR_EACH_ELEMENT_FROM(c_node)
143
-
144
-
145
- def strip_tags(tree_or_element, *tag_names):
146
- """strip_tags(tree_or_element, *tag_names)
147
-
148
- Delete all elements with the provided tag names from a tree or
149
- subtree. This will remove the elements and their attributes, but
150
- *not* their text/tail content or descendants. Instead, it will
151
- merge the text content and children of the element into its
152
- parent.
153
-
154
- Tag names can contain wildcards as in `_Element.iter`.
155
-
156
- Note that this will not delete the element (or ElementTree root
157
- element) that you passed even if it matches. It will only treat
158
- its descendants.
159
-
160
- Example usage::
161
-
162
- strip_tags(some_element,
163
- 'simpletagname', # non-namespaced tag
164
- '{http://some/ns}tagname', # namespaced tag
165
- '{http://some/other/ns}*' # any tag from a namespace
166
- Comment # comments (including their text!)
167
- )
168
- """
169
- cdef _MultiTagMatcher matcher
170
- doc = _documentOrRaise(tree_or_element)
171
- element = _rootNodeOrRaise(tree_or_element)
172
- if not tag_names:
173
- return
174
-
175
- matcher = _MultiTagMatcher.__new__(_MultiTagMatcher, tag_names)
176
- matcher.cacheTags(doc)
177
- if matcher.rejectsAll():
178
- return
179
-
180
- if isinstance(tree_or_element, _ElementTree):
181
- # include PIs and comments next to the root node
182
- if matcher.matchesType(tree.XML_COMMENT_NODE):
183
- _removeSiblings(element._c_node, tree.XML_COMMENT_NODE, 0)
184
- if matcher.matchesType(tree.XML_PI_NODE):
185
- _removeSiblings(element._c_node, tree.XML_PI_NODE, 0)
186
- _strip_tags(doc, element._c_node, matcher)
187
-
188
- cdef _strip_tags(_Document doc, xmlNode* c_node, _MultiTagMatcher matcher):
189
- cdef xmlNode* c_child
190
- cdef xmlNode* c_next
191
-
192
- tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1)
193
- if c_node.type == tree.XML_ELEMENT_NODE:
194
- # we run through the children here to prevent any problems
195
- # with the tree iteration which would occur if we unlinked the
196
- # c_node itself
197
- c_child = _findChildForwards(c_node, 0)
198
- while c_child is not NULL:
199
- if not matcher.matches(c_child):
200
- c_child = _nextElement(c_child)
201
- continue
202
- if c_child.type == tree.XML_ELEMENT_NODE:
203
- c_next = _findChildForwards(c_child, 0) or _nextElement(c_child)
204
- _replaceNodeByChildren(doc, c_child)
205
- if not attemptDeallocation(c_child):
206
- if c_child.nsDef is not NULL:
207
- # make namespaces absolute
208
- moveNodeToDocument(doc, doc._c_doc, c_child)
209
- c_child = c_next
210
- else:
211
- c_next = _nextElement(c_child)
212
- tree.xmlUnlinkNode(c_child)
213
- attemptDeallocation(c_child)
214
- c_child = c_next
215
- tree.END_FOR_EACH_ELEMENT_FROM(c_node)
1
+ # functions for tree cleanup and removing elements from subtrees
2
+
3
+ def cleanup_namespaces(tree_or_element, top_nsmap=None, keep_ns_prefixes=None):
4
+ """cleanup_namespaces(tree_or_element, top_nsmap=None, keep_ns_prefixes=None)
5
+
6
+ Remove all namespace declarations from a subtree that are not used
7
+ by any of the elements or attributes in that tree.
8
+
9
+ If a 'top_nsmap' is provided, it must be a mapping from prefixes
10
+ to namespace URIs. These namespaces will be declared on the top
11
+ element of the subtree before running the cleanup, which allows
12
+ moving namespace declarations to the top of the tree.
13
+
14
+ If a 'keep_ns_prefixes' is provided, it must be a list of prefixes.
15
+ These prefixes will not be removed as part of the cleanup.
16
+ """
17
+ element = _rootNodeOrRaise(tree_or_element)
18
+ c_element = element._c_node
19
+
20
+ if top_nsmap:
21
+ doc = element._doc
22
+ # declare namespaces from nsmap, then apply them to the subtree
23
+ _setNodeNamespaces(c_element, doc, None, top_nsmap)
24
+ moveNodeToDocument(doc, c_element.doc, c_element)
25
+
26
+ keep_ns_prefixes = (
27
+ set([_utf8(prefix) for prefix in keep_ns_prefixes])
28
+ if keep_ns_prefixes else None)
29
+
30
+ _removeUnusedNamespaceDeclarations(c_element, keep_ns_prefixes)
31
+
32
+
33
+ def strip_attributes(tree_or_element, *attribute_names):
34
+ """strip_attributes(tree_or_element, *attribute_names)
35
+
36
+ Delete all attributes with the provided attribute names from an
37
+ Element (or ElementTree) and its descendants.
38
+
39
+ Attribute names can contain wildcards as in `_Element.iter`.
40
+
41
+ Example usage::
42
+
43
+ strip_attributes(root_element,
44
+ 'simpleattr',
45
+ '{http://some/ns}attrname',
46
+ '{http://other/ns}*')
47
+ """
48
+ cdef _MultiTagMatcher matcher
49
+ element = _rootNodeOrRaise(tree_or_element)
50
+ if not attribute_names:
51
+ return
52
+
53
+ matcher = _MultiTagMatcher.__new__(_MultiTagMatcher, attribute_names)
54
+ matcher.cacheTags(element._doc)
55
+ if matcher.rejectsAllAttributes():
56
+ return
57
+ _strip_attributes(element._c_node, matcher)
58
+
59
+
60
+ cdef _strip_attributes(xmlNode* c_node, _MultiTagMatcher matcher):
61
+ cdef xmlAttr* c_attr
62
+ cdef xmlAttr* c_next_attr
63
+ tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1)
64
+ if c_node.type == tree.XML_ELEMENT_NODE:
65
+ c_attr = c_node.properties
66
+ while c_attr is not NULL:
67
+ c_next_attr = c_attr.next
68
+ if matcher.matchesAttribute(c_attr):
69
+ tree.xmlRemoveProp(c_attr)
70
+ c_attr = c_next_attr
71
+ tree.END_FOR_EACH_ELEMENT_FROM(c_node)
72
+
73
+
74
+ def strip_elements(tree_or_element, *tag_names, bint with_tail=True):
75
+ """strip_elements(tree_or_element, *tag_names, with_tail=True)
76
+
77
+ Delete all elements with the provided tag names from a tree or
78
+ subtree. This will remove the elements and their entire subtree,
79
+ including all their attributes, text content and descendants. It
80
+ will also remove the tail text of the element unless you
81
+ explicitly set the ``with_tail`` keyword argument option to False.
82
+
83
+ Tag names can contain wildcards as in `_Element.iter`.
84
+
85
+ Note that this will not delete the element (or ElementTree root
86
+ element) that you passed even if it matches. It will only treat
87
+ its descendants. If you want to include the root element, check
88
+ its tag name directly before even calling this function.
89
+
90
+ Example usage::
91
+
92
+ strip_elements(some_element,
93
+ 'simpletagname', # non-namespaced tag
94
+ '{http://some/ns}tagname', # namespaced tag
95
+ '{http://some/other/ns}*' # any tag from a namespace
96
+ lxml.etree.Comment # comments
97
+ )
98
+ """
99
+ cdef _MultiTagMatcher matcher
100
+ doc = _documentOrRaise(tree_or_element)
101
+ element = _rootNodeOrRaise(tree_or_element)
102
+ if not tag_names:
103
+ return
104
+
105
+ matcher = _MultiTagMatcher.__new__(_MultiTagMatcher, tag_names)
106
+ matcher.cacheTags(doc)
107
+ if matcher.rejectsAll():
108
+ return
109
+
110
+ if isinstance(tree_or_element, _ElementTree):
111
+ # include PIs and comments next to the root node
112
+ if matcher.matchesType(tree.XML_COMMENT_NODE):
113
+ _removeSiblings(element._c_node, tree.XML_COMMENT_NODE, with_tail)
114
+ if matcher.matchesType(tree.XML_PI_NODE):
115
+ _removeSiblings(element._c_node, tree.XML_PI_NODE, with_tail)
116
+ _strip_elements(doc, element._c_node, matcher, with_tail)
117
+
118
+ cdef _strip_elements(_Document doc, xmlNode* c_node, _MultiTagMatcher matcher,
119
+ bint with_tail):
120
+ cdef xmlNode* c_child
121
+ cdef xmlNode* c_next
122
+
123
+ tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1)
124
+ if c_node.type == tree.XML_ELEMENT_NODE:
125
+ # we run through the children here to prevent any problems
126
+ # with the tree iteration which would occur if we unlinked the
127
+ # c_node itself
128
+ c_child = _findChildForwards(c_node, 0)
129
+ while c_child is not NULL:
130
+ c_next = _nextElement(c_child)
131
+ if matcher.matches(c_child):
132
+ if c_child.type == tree.XML_ELEMENT_NODE:
133
+ if not with_tail:
134
+ tree.xmlUnlinkNode(c_child)
135
+ _removeNode(doc, c_child)
136
+ else:
137
+ if with_tail:
138
+ _removeText(c_child.next)
139
+ tree.xmlUnlinkNode(c_child)
140
+ attemptDeallocation(c_child)
141
+ c_child = c_next
142
+ tree.END_FOR_EACH_ELEMENT_FROM(c_node)
143
+
144
+
145
+ def strip_tags(tree_or_element, *tag_names):
146
+ """strip_tags(tree_or_element, *tag_names)
147
+
148
+ Delete all elements with the provided tag names from a tree or
149
+ subtree. This will remove the elements and their attributes, but
150
+ *not* their text/tail content or descendants. Instead, it will
151
+ merge the text content and children of the element into its
152
+ parent.
153
+
154
+ Tag names can contain wildcards as in `_Element.iter`.
155
+
156
+ Note that this will not delete the element (or ElementTree root
157
+ element) that you passed even if it matches. It will only treat
158
+ its descendants.
159
+
160
+ Example usage::
161
+
162
+ strip_tags(some_element,
163
+ 'simpletagname', # non-namespaced tag
164
+ '{http://some/ns}tagname', # namespaced tag
165
+ '{http://some/other/ns}*' # any tag from a namespace
166
+ Comment # comments (including their text!)
167
+ )
168
+ """
169
+ cdef _MultiTagMatcher matcher
170
+ doc = _documentOrRaise(tree_or_element)
171
+ element = _rootNodeOrRaise(tree_or_element)
172
+ if not tag_names:
173
+ return
174
+
175
+ matcher = _MultiTagMatcher.__new__(_MultiTagMatcher, tag_names)
176
+ matcher.cacheTags(doc)
177
+ if matcher.rejectsAll():
178
+ return
179
+
180
+ if isinstance(tree_or_element, _ElementTree):
181
+ # include PIs and comments next to the root node
182
+ if matcher.matchesType(tree.XML_COMMENT_NODE):
183
+ _removeSiblings(element._c_node, tree.XML_COMMENT_NODE, 0)
184
+ if matcher.matchesType(tree.XML_PI_NODE):
185
+ _removeSiblings(element._c_node, tree.XML_PI_NODE, 0)
186
+ _strip_tags(doc, element._c_node, matcher)
187
+
188
+ cdef _strip_tags(_Document doc, xmlNode* c_node, _MultiTagMatcher matcher):
189
+ cdef xmlNode* c_child
190
+ cdef xmlNode* c_next
191
+
192
+ tree.BEGIN_FOR_EACH_ELEMENT_FROM(c_node, c_node, 1)
193
+ if c_node.type == tree.XML_ELEMENT_NODE:
194
+ # we run through the children here to prevent any problems
195
+ # with the tree iteration which would occur if we unlinked the
196
+ # c_node itself
197
+ c_child = _findChildForwards(c_node, 0)
198
+ while c_child is not NULL:
199
+ if not matcher.matches(c_child):
200
+ c_child = _nextElement(c_child)
201
+ continue
202
+ if c_child.type == tree.XML_ELEMENT_NODE:
203
+ c_next = _findChildForwards(c_child, 0) or _nextElement(c_child)
204
+ _replaceNodeByChildren(doc, c_child)
205
+ if not attemptDeallocation(c_child):
206
+ if c_child.nsDef is not NULL:
207
+ # make namespaces absolute
208
+ moveNodeToDocument(doc, doc._c_doc, c_child)
209
+ c_child = c_next
210
+ else:
211
+ c_next = _nextElement(c_child)
212
+ tree.xmlUnlinkNode(c_child)
213
+ attemptDeallocation(c_child)
214
+ c_child = c_next
215
+ tree.END_FOR_EACH_ELEMENT_FROM(c_node)
lxml/cssselect.py CHANGED
@@ -1,101 +1,101 @@
1
- """CSS Selectors based on XPath.
2
-
3
- This module supports selecting XML/HTML tags based on CSS selectors.
4
- See the `CSSSelector` class for details.
5
-
6
- This is a thin wrapper around cssselect 0.7 or later.
7
- """
8
-
9
-
10
- from . import etree
11
- try:
12
- import cssselect as external_cssselect
13
- except ImportError:
14
- raise ImportError(
15
- 'cssselect does not seem to be installed. '
16
- 'See https://pypi.org/project/cssselect/')
17
-
18
-
19
- SelectorSyntaxError = external_cssselect.SelectorSyntaxError
20
- ExpressionError = external_cssselect.ExpressionError
21
- SelectorError = external_cssselect.SelectorError
22
-
23
-
24
- __all__ = ['SelectorSyntaxError', 'ExpressionError', 'SelectorError',
25
- 'CSSSelector']
26
-
27
-
28
- class LxmlTranslator(external_cssselect.GenericTranslator):
29
- """
30
- A custom CSS selector to XPath translator with lxml-specific extensions.
31
- """
32
- def xpath_contains_function(self, xpath, function):
33
- # Defined there, removed in later drafts:
34
- # http://www.w3.org/TR/2001/CR-css3-selectors-20011113/#content-selectors
35
- if function.argument_types() not in (['STRING'], ['IDENT']):
36
- raise ExpressionError(
37
- "Expected a single string or ident for :contains(), got %r"
38
- % function.arguments)
39
- value = function.arguments[0].value
40
- return xpath.add_condition(
41
- 'contains(__lxml_internal_css:lower-case(string(.)), %s)'
42
- % self.xpath_literal(value.lower()))
43
-
44
-
45
- class LxmlHTMLTranslator(LxmlTranslator, external_cssselect.HTMLTranslator):
46
- """
47
- lxml extensions + HTML support.
48
- """
49
-
50
-
51
- def _make_lower_case(context, s):
52
- return s.lower()
53
-
54
- ns = etree.FunctionNamespace('http://codespeak.net/lxml/css/')
55
- ns.prefix = '__lxml_internal_css'
56
- ns['lower-case'] = _make_lower_case
57
-
58
-
59
- class CSSSelector(etree.XPath):
60
- """A CSS selector.
61
-
62
- Usage::
63
-
64
- >>> from lxml import etree, cssselect
65
- >>> select = cssselect.CSSSelector("a tag > child")
66
-
67
- >>> root = etree.XML("<a><b><c/><tag><child>TEXT</child></tag></b></a>")
68
- >>> [ el.tag for el in select(root) ]
69
- ['child']
70
-
71
- To use CSS namespaces, you need to pass a prefix-to-namespace
72
- mapping as ``namespaces`` keyword argument::
73
-
74
- >>> rdfns = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
75
- >>> select_ns = cssselect.CSSSelector('root > rdf|Description',
76
- ... namespaces={'rdf': rdfns})
77
-
78
- >>> rdf = etree.XML((
79
- ... '<root xmlns:rdf="%s">'
80
- ... '<rdf:Description>blah</rdf:Description>'
81
- ... '</root>') % rdfns)
82
- >>> [(el.tag, el.text) for el in select_ns(rdf)]
83
- [('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description', 'blah')]
84
-
85
- """
86
- def __init__(self, css, namespaces=None, translator='xml'):
87
- if translator == 'xml':
88
- translator = LxmlTranslator()
89
- elif translator == 'html':
90
- translator = LxmlHTMLTranslator()
91
- elif translator == 'xhtml':
92
- translator = LxmlHTMLTranslator(xhtml=True)
93
- path = translator.css_to_xpath(css)
94
- super().__init__(path, namespaces=namespaces)
95
- self.css = css
96
-
97
- def __repr__(self):
98
- return '<%s %x for %r>' % (
99
- self.__class__.__name__,
100
- abs(id(self)),
101
- self.css)
1
+ """CSS Selectors based on XPath.
2
+
3
+ This module supports selecting XML/HTML tags based on CSS selectors.
4
+ See the `CSSSelector` class for details.
5
+
6
+ This is a thin wrapper around cssselect 0.7 or later.
7
+ """
8
+
9
+
10
+ from . import etree
11
+ try:
12
+ import cssselect as external_cssselect
13
+ except ImportError:
14
+ raise ImportError(
15
+ 'cssselect does not seem to be installed. '
16
+ 'See https://pypi.org/project/cssselect/')
17
+
18
+
19
+ SelectorSyntaxError = external_cssselect.SelectorSyntaxError
20
+ ExpressionError = external_cssselect.ExpressionError
21
+ SelectorError = external_cssselect.SelectorError
22
+
23
+
24
+ __all__ = ['SelectorSyntaxError', 'ExpressionError', 'SelectorError',
25
+ 'CSSSelector']
26
+
27
+
28
+ class LxmlTranslator(external_cssselect.GenericTranslator):
29
+ """
30
+ A custom CSS selector to XPath translator with lxml-specific extensions.
31
+ """
32
+ def xpath_contains_function(self, xpath, function):
33
+ # Defined there, removed in later drafts:
34
+ # http://www.w3.org/TR/2001/CR-css3-selectors-20011113/#content-selectors
35
+ if function.argument_types() not in (['STRING'], ['IDENT']):
36
+ raise ExpressionError(
37
+ "Expected a single string or ident for :contains(), got %r"
38
+ % function.arguments)
39
+ value = function.arguments[0].value
40
+ return xpath.add_condition(
41
+ 'contains(__lxml_internal_css:lower-case(string(.)), %s)'
42
+ % self.xpath_literal(value.lower()))
43
+
44
+
45
+ class LxmlHTMLTranslator(LxmlTranslator, external_cssselect.HTMLTranslator):
46
+ """
47
+ lxml extensions + HTML support.
48
+ """
49
+
50
+
51
+ def _make_lower_case(context, s):
52
+ return s.lower()
53
+
54
+ ns = etree.FunctionNamespace('http://codespeak.net/lxml/css/')
55
+ ns.prefix = '__lxml_internal_css'
56
+ ns['lower-case'] = _make_lower_case
57
+
58
+
59
+ class CSSSelector(etree.XPath):
60
+ """A CSS selector.
61
+
62
+ Usage::
63
+
64
+ >>> from lxml import etree, cssselect
65
+ >>> select = cssselect.CSSSelector("a tag > child")
66
+
67
+ >>> root = etree.XML("<a><b><c/><tag><child>TEXT</child></tag></b></a>")
68
+ >>> [ el.tag for el in select(root) ]
69
+ ['child']
70
+
71
+ To use CSS namespaces, you need to pass a prefix-to-namespace
72
+ mapping as ``namespaces`` keyword argument::
73
+
74
+ >>> rdfns = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
75
+ >>> select_ns = cssselect.CSSSelector('root > rdf|Description',
76
+ ... namespaces={'rdf': rdfns})
77
+
78
+ >>> rdf = etree.XML((
79
+ ... '<root xmlns:rdf="%s">'
80
+ ... '<rdf:Description>blah</rdf:Description>'
81
+ ... '</root>') % rdfns)
82
+ >>> [(el.tag, el.text) for el in select_ns(rdf)]
83
+ [('{http://www.w3.org/1999/02/22-rdf-syntax-ns#}Description', 'blah')]
84
+
85
+ """
86
+ def __init__(self, css, namespaces=None, translator='xml'):
87
+ if translator == 'xml':
88
+ translator = LxmlTranslator()
89
+ elif translator == 'html':
90
+ translator = LxmlHTMLTranslator()
91
+ elif translator == 'xhtml':
92
+ translator = LxmlHTMLTranslator(xhtml=True)
93
+ path = translator.css_to_xpath(css)
94
+ super().__init__(path, namespaces=namespaces)
95
+ self.css = css
96
+
97
+ def __repr__(self):
98
+ return '<%s %x for %r>' % (
99
+ self.__class__.__name__,
100
+ abs(id(self)),
101
+ self.css)