elementpath 4.5.0__tar.gz → 4.7.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {elementpath-4.5.0 → elementpath-4.7.0}/CHANGELOG.rst +15 -1
- {elementpath-4.5.0 → elementpath-4.7.0}/MANIFEST.in +1 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/PKG-INFO +2 -1
- {elementpath-4.5.0 → elementpath-4.7.0}/doc/conf.py +2 -2
- {elementpath-4.5.0 → elementpath-4.7.0}/doc/xpath_api.rst +3 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/__init__.py +7 -5
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/datatypes/qname.py +5 -2
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/datatypes/string.py +4 -4
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/datatypes/uri.py +2 -2
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/etree.py +26 -6
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/exceptions.py +4 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/helpers.py +69 -23
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/namespaces.py +3 -10
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/protocols.py +1 -1
- elementpath-4.7.0/elementpath/regex/__init__.py +25 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/regex/character_classes.py +58 -44
- elementpath-4.7.0/elementpath/regex/codepoints.py +206 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/regex/patterns.py +6 -6
- elementpath-4.7.0/elementpath/regex/unicode_blocks.py +450 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/regex/unicode_categories.py +418 -5
- elementpath-4.7.0/elementpath/regex/unicode_subsets.py +639 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/sequence_types.py +7 -9
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/tdop.py +8 -7
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/tree_builders.py +195 -163
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/xpath1/xpath1_parser.py +56 -11
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/xpath2/_xpath2_constructors.py +14 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/xpath2/_xpath2_functions.py +3 -6
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/xpath2/_xpath2_operators.py +7 -2
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/xpath2/xpath2_parser.py +14 -18
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/xpath30/_xpath30_functions.py +30 -27
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/xpath31/_xpath31_functions.py +4 -3
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/xpath31/_xpath31_operators.py +2 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/xpath_context.py +17 -22
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/xpath_nodes.py +64 -57
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/xpath_selectors.py +59 -29
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/xpath_tokens.py +8 -11
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath.egg-info/PKG-INFO +2 -1
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath.egg-info/SOURCES.txt +3 -1
- {elementpath-4.5.0 → elementpath-4.7.0}/requirements-dev.txt +1 -1
- elementpath-4.7.0/scripts/generate_codepoints.py +406 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/setup.py +2 -1
- elementpath-4.7.0/tests/mypy_tests/advanced.py +30 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/tests/mypy_tests/selectors.py +15 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_helpers.py +15 -2
- {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_regex.py +222 -48
- {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_selectors.py +44 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_tree_builders.py +67 -3
- {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_typing.py +8 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_xpath1_parser.py +22 -9
- {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_xpath2_parser.py +42 -2
- {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_xpath31.py +33 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_xpath_tokens.py +31 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/tox.ini +15 -9
- elementpath-4.5.0/elementpath/regex/__init__.py +0 -24
- elementpath-4.5.0/elementpath/regex/codepoints.py +0 -126
- elementpath-4.5.0/elementpath/regex/generate_categories.py +0 -116
- elementpath-4.5.0/elementpath/regex/unicode_subsets.py +0 -519
- {elementpath-4.5.0 → elementpath-4.7.0}/.coveragerc +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/LICENSE +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/README.rst +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/doc/Makefile +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/doc/advanced.rst +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/doc/index.rst +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/doc/introduction.rst +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/doc/make.bat +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/doc/pratt_api.rst +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/doc/requirements.txt +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/_typing.py +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/aliases.py +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/collations.py +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/compare.py +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/datatypes/__init__.py +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/datatypes/atomic_types.py +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/datatypes/binary.py +1 -1
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/datatypes/datetime.py +1 -1
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/datatypes/numeric.py +1 -1
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/datatypes/proxies.py +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/datatypes/untyped.py +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/decoder.py +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/py.typed +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/schema_proxy.py +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/serialization.py +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/validators/__init__.py +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/validators/analyze-string.xsd +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/validators/schema-for-json.xsd +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/xpath1/__init__.py +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/xpath1/_xpath1_axes.py +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/xpath1/_xpath1_functions.py +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/xpath1/_xpath1_operators.py +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/xpath2/__init__.py +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/xpath3.py +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/xpath30/__init__.py +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/xpath30/_translation_maps.py +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/xpath30/_xpath30_operators.py +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/xpath30/xpath30_helpers.py +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/xpath30/xpath30_parser.py +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/xpath31/__init__.py +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/xpath31/xpath31_parser.py +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath.egg-info/dependency_links.txt +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath.egg-info/requires.txt +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/elementpath.egg-info/top_level.txt +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/mypy.ini +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/setup.cfg +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/tests/__init__.py +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/tests/execute_w3c_tests.py +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/tests/memory_profiling.py +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/tests/mypy_tests/protocols.py +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/tests/resources/analyze-string.xsd +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/tests/resources/external_entity.xml +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/tests/resources/sample.xml +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/tests/resources/schema-for-json.xsd +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/tests/resources/unparsed_entity.xml +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/tests/resources/unused_external_entity.xml +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/tests/resources/unused_unparsed_entity.xml +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/tests/resources/with_entity.xml +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_collations.py +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_compare.py +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_datatypes.py +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_elementpath.py +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_etree.py +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_exceptions.py +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_namespaces.py +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_package.py +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_schema_context.py +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_schema_proxy.py +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_sequence_types.py +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_serialization.py +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_tdop_parser.py +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_validators.py +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_xpath2_constructors.py +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_xpath2_functions.py +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_xpath30.py +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_xpath_context.py +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_xpath_nodes.py +0 -0
- {elementpath-4.5.0 → elementpath-4.7.0}/tests/xpath_test_class.py +0 -0
|
@@ -2,6 +2,18 @@
|
|
|
2
2
|
CHANGELOG
|
|
3
3
|
*********
|
|
4
4
|
|
|
5
|
+
`v4.7.0`_ (2024-12-20)
|
|
6
|
+
======================
|
|
7
|
+
* Fix *fragment* argument usage (issue #81)
|
|
8
|
+
* Fix constructors nud() to skip argument check with XP31+ arrow operator (issue #83)
|
|
9
|
+
|
|
10
|
+
`v4.6.0`_ (2024-10-27)
|
|
11
|
+
======================
|
|
12
|
+
* Fix XsdAttributeGroupProtocol
|
|
13
|
+
* Improve Unicode support with installable UnicodeData.txt versions
|
|
14
|
+
* Extend names disambiguation with a fix for issue #78
|
|
15
|
+
* Refactor tree builders to fix document position of tails (issue #79)
|
|
16
|
+
|
|
5
17
|
`v4.5.0`_ (2024-09-09)
|
|
6
18
|
======================
|
|
7
19
|
* Fix and clean node trees iteration methods (issue #72)
|
|
@@ -469,4 +481,6 @@ CHANGELOG
|
|
|
469
481
|
.. _v4.2.1: https://github.com/sissaschool/elementpath/compare/v4.2.0...v4.2.1
|
|
470
482
|
.. _v4.3.0: https://github.com/sissaschool/elementpath/compare/v4.2.1...v4.3.0
|
|
471
483
|
.. _v4.4.0: https://github.com/sissaschool/elementpath/compare/v4.3.0...v4.4.0
|
|
472
|
-
.. _v4.
|
|
484
|
+
.. _v4.5.0: https://github.com/sissaschool/elementpath/compare/v4.4.0...v4.5.0
|
|
485
|
+
.. _v4.6.0: https://github.com/sissaschool/elementpath/compare/v4.5.0...v4.6.0
|
|
486
|
+
.. _v4.7.0: https://github.com/sissaschool/elementpath/compare/v4.6.0...v4.7.0
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.1
|
|
2
2
|
Name: elementpath
|
|
3
|
-
Version: 4.
|
|
3
|
+
Version: 4.7.0
|
|
4
4
|
Summary: XPath 1.0/2.0/3.0/3.1 parsers and selectors for ElementTree and lxml
|
|
5
5
|
Home-page: https://github.com/sissaschool/elementpath
|
|
6
6
|
Author: Davide Brunato
|
|
@@ -22,6 +22,7 @@ Classifier: Programming Language :: Python :: 3.10
|
|
|
22
22
|
Classifier: Programming Language :: Python :: 3.11
|
|
23
23
|
Classifier: Programming Language :: Python :: 3.12
|
|
24
24
|
Classifier: Programming Language :: Python :: 3.13
|
|
25
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
25
26
|
Classifier: Programming Language :: Python :: Implementation :: CPython
|
|
26
27
|
Classifier: Programming Language :: Python :: Implementation :: PyPy
|
|
27
28
|
Classifier: Topic :: Software Development :: Libraries
|
|
@@ -29,9 +29,9 @@ copyright = '2018-2024, SISSA (International School for Advanced Studies)'
|
|
|
29
29
|
author = 'Davide Brunato'
|
|
30
30
|
|
|
31
31
|
# The short X.Y version
|
|
32
|
-
version = '4.
|
|
32
|
+
version = '4.7'
|
|
33
33
|
# The full version, including alpha/beta/rc tags
|
|
34
|
-
release = '4.
|
|
34
|
+
release = '4.7.0'
|
|
35
35
|
|
|
36
36
|
# -- General configuration ---------------------------------------------------
|
|
37
37
|
|
|
@@ -147,6 +147,8 @@ XPath regular expressions
|
|
|
147
147
|
=========================
|
|
148
148
|
|
|
149
149
|
.. autofunction:: elementpath.translate_pattern
|
|
150
|
+
.. autofunction:: elementpath.install_unicode_data
|
|
151
|
+
.. autofunction:: elementpath.unicode_version
|
|
150
152
|
|
|
151
153
|
|
|
152
154
|
Exception classes
|
|
@@ -154,6 +156,7 @@ Exception classes
|
|
|
154
156
|
|
|
155
157
|
.. autoexception:: elementpath.ElementPathError
|
|
156
158
|
.. autoexception:: elementpath.MissingContextError
|
|
159
|
+
.. autoexception:: elementpath.UnsupportedFeatureError
|
|
157
160
|
.. autoexception:: elementpath.RegexError
|
|
158
161
|
.. autoexception:: elementpath.ElementPathLocaleError
|
|
159
162
|
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
#
|
|
8
8
|
# @author Davide Brunato <brunato@sissa.it>
|
|
9
9
|
#
|
|
10
|
-
__version__ = '4.
|
|
10
|
+
__version__ = '4.7.0'
|
|
11
11
|
__author__ = "Davide Brunato"
|
|
12
12
|
__contact__ = "brunato@sissa.it"
|
|
13
13
|
__copyright__ = "Copyright 2018-2024, SISSA"
|
|
@@ -23,7 +23,7 @@ from . import protocols # Protocols for type annotations
|
|
|
23
23
|
from .exceptions import ElementPathError, MissingContextError, ElementPathKeyError, \
|
|
24
24
|
ElementPathZeroDivisionError, ElementPathNameError, ElementPathOverflowError, \
|
|
25
25
|
ElementPathRuntimeError, ElementPathSyntaxError, ElementPathTypeError, \
|
|
26
|
-
ElementPathValueError, ElementPathLocaleError
|
|
26
|
+
ElementPathValueError, ElementPathLocaleError, UnsupportedFeatureError
|
|
27
27
|
|
|
28
28
|
from .xpath_context import XPathContext, XPathSchemaContext
|
|
29
29
|
from .xpath_nodes import XPathNode, DocumentNode, ElementNode, AttributeNode, \
|
|
@@ -36,10 +36,11 @@ from .xpath1 import XPath1Parser
|
|
|
36
36
|
from .xpath2 import XPath2Parser
|
|
37
37
|
from .xpath_selectors import select, iter_select, Selector
|
|
38
38
|
from .schema_proxy import AbstractSchemaProxy
|
|
39
|
-
from .regex import RegexError, translate_pattern
|
|
39
|
+
from .regex import RegexError, translate_pattern, install_unicode_data, unicode_version
|
|
40
40
|
|
|
41
41
|
__all__ = ['datatypes', 'protocols', 'etree', 'ElementPathError', 'MissingContextError',
|
|
42
|
-
'
|
|
42
|
+
'UnsupportedFeatureError', 'ElementPathKeyError',
|
|
43
|
+
'ElementPathZeroDivisionError', 'ElementPathNameError',
|
|
43
44
|
'ElementPathOverflowError', 'ElementPathRuntimeError', 'ElementPathSyntaxError',
|
|
44
45
|
'ElementPathTypeError', 'ElementPathValueError', 'ElementPathLocaleError',
|
|
45
46
|
'XPathContext', 'XPathSchemaContext', 'XPathNode', 'DocumentNode',
|
|
@@ -48,4 +49,5 @@ __all__ = ['datatypes', 'protocols', 'etree', 'ElementPathError', 'MissingContex
|
|
|
48
49
|
'SchemaElementNode', 'get_node_tree', 'build_node_tree',
|
|
49
50
|
'build_lxml_node_tree', 'build_schema_node_tree', 'XPathToken',
|
|
50
51
|
'XPathFunction', 'XPath1Parser', 'XPath2Parser', 'select', 'iter_select',
|
|
51
|
-
'Selector', 'AbstractSchemaProxy', 'RegexError', 'translate_pattern'
|
|
52
|
+
'Selector', 'AbstractSchemaProxy', 'RegexError', 'translate_pattern',
|
|
53
|
+
'install_unicode_data', 'unicode_version']
|
|
@@ -7,9 +7,9 @@
|
|
|
7
7
|
#
|
|
8
8
|
# @author Davide Brunato <brunato@sissa.it>
|
|
9
9
|
#
|
|
10
|
+
import re
|
|
10
11
|
from typing import Any, Optional
|
|
11
12
|
|
|
12
|
-
from elementpath.helpers import QNAME_PATTERN
|
|
13
13
|
from .atomic_types import AnyAtomicType
|
|
14
14
|
from .untyped import UntypedAtomic
|
|
15
15
|
|
|
@@ -22,7 +22,10 @@ class AbstractQName(AnyAtomicType):
|
|
|
22
22
|
URI if a prefixed name is provided for the 2nd argument.
|
|
23
23
|
:param qname: the prefixed name or a local name.
|
|
24
24
|
"""
|
|
25
|
-
pattern =
|
|
25
|
+
pattern = re.compile(
|
|
26
|
+
r'^(?:(?P<prefix>[^\d\W][\w\-.\u00B7\u0300-\u036F\u0387\u06DD\u06DE\u203F\u2040]*):)?'
|
|
27
|
+
r'(?P<local>[^\d\W][\w\-.\u00B7\u0300-\u036F\u0387\u06DD\u06DE\u203F\u2040]*)$',
|
|
28
|
+
)
|
|
26
29
|
|
|
27
30
|
def __new__(cls, *args: Any, **kwargs: Any) -> 'AbstractQName':
|
|
28
31
|
if cls.__name__ == 'Notation':
|
|
@@ -10,7 +10,7 @@
|
|
|
10
10
|
import re
|
|
11
11
|
from typing import Any
|
|
12
12
|
|
|
13
|
-
from elementpath.helpers import
|
|
13
|
+
from elementpath.helpers import collapse_white_spaces, Patterns
|
|
14
14
|
from .atomic_types import AnyAtomicType
|
|
15
15
|
|
|
16
16
|
|
|
@@ -20,7 +20,7 @@ class NormalizedString(str, AnyAtomicType):
|
|
|
20
20
|
|
|
21
21
|
def __new__(cls, obj: Any) -> 'NormalizedString':
|
|
22
22
|
try:
|
|
23
|
-
return super().__new__(cls,
|
|
23
|
+
return super().__new__(cls, Patterns.normalize.sub(' ', obj))
|
|
24
24
|
except TypeError:
|
|
25
25
|
return super().__new__(cls, obj)
|
|
26
26
|
|
|
@@ -41,7 +41,7 @@ class XsdToken(NormalizedString):
|
|
|
41
41
|
match = cls.pattern.match(value)
|
|
42
42
|
if match is None:
|
|
43
43
|
raise ValueError('invalid value {!r} for xs:{}'.format(value, cls.name))
|
|
44
|
-
return super(NormalizedString, cls).__new__(cls, value)
|
|
44
|
+
return super(NormalizedString, cls).__new__(cls, value) # noqa
|
|
45
45
|
|
|
46
46
|
|
|
47
47
|
class Language(XsdToken):
|
|
@@ -59,7 +59,7 @@ class Language(XsdToken):
|
|
|
59
59
|
match = cls.pattern.match(value)
|
|
60
60
|
if match is None:
|
|
61
61
|
raise ValueError('invalid value {!r} for xs:{}'.format(value, cls.name))
|
|
62
|
-
return super(NormalizedString, cls).__new__(cls, value)
|
|
62
|
+
return super(NormalizedString, cls).__new__(cls, value) # noqa
|
|
63
63
|
|
|
64
64
|
|
|
65
65
|
class Name(XsdToken):
|
|
@@ -11,7 +11,7 @@ from decimal import Decimal
|
|
|
11
11
|
from urllib.parse import urlparse
|
|
12
12
|
from typing import Union
|
|
13
13
|
|
|
14
|
-
from elementpath.helpers import collapse_white_spaces,
|
|
14
|
+
from elementpath.helpers import collapse_white_spaces, Patterns
|
|
15
15
|
from .atomic_types import AnyAtomicType
|
|
16
16
|
from .untyped import UntypedAtomic
|
|
17
17
|
from .numeric import Integer
|
|
@@ -110,6 +110,6 @@ class AnyURI(AnyAtomicType):
|
|
|
110
110
|
elif value.count('#') > 1:
|
|
111
111
|
msg = 'invalid value {!r} for xs:{} (too many # characters)'
|
|
112
112
|
raise ValueError(msg.format(value, cls.name))
|
|
113
|
-
elif
|
|
113
|
+
elif Patterns.wrong_escape.search(value) is not None:
|
|
114
114
|
msg = 'invalid value {!r} for xs:{} (wrong escaping)'
|
|
115
115
|
raise ValueError(msg.format(value, cls.name))
|
|
@@ -106,7 +106,17 @@ def is_etree_element(obj: Any) -> bool:
|
|
|
106
106
|
|
|
107
107
|
|
|
108
108
|
def is_lxml_etree_element(obj: Any) -> bool:
|
|
109
|
-
return is_etree_element(obj) and
|
|
109
|
+
return is_etree_element(obj) and \
|
|
110
|
+
hasattr(obj, 'getparent') and \
|
|
111
|
+
hasattr(obj, 'nsmap') and \
|
|
112
|
+
obj.__class__.__module__ in ('lxml.etree', 'lxml.html')
|
|
113
|
+
|
|
114
|
+
|
|
115
|
+
def is_etree_element_instance(obj: Any) -> bool:
|
|
116
|
+
"""Strictly checks that the objects is an ElementTree or lxml.etree Element."""
|
|
117
|
+
return isinstance(obj, ElementTree.Element) or \
|
|
118
|
+
isinstance(obj, PyElementTree.Element) or \
|
|
119
|
+
is_lxml_etree_element(obj)
|
|
110
120
|
|
|
111
121
|
|
|
112
122
|
def is_etree_document(obj: Any) -> bool:
|
|
@@ -114,7 +124,17 @@ def is_etree_document(obj: Any) -> bool:
|
|
|
114
124
|
|
|
115
125
|
|
|
116
126
|
def is_lxml_etree_document(obj: Any) -> bool:
|
|
117
|
-
return is_etree_document(obj) and
|
|
127
|
+
return is_etree_document(obj) and \
|
|
128
|
+
hasattr(obj, 'xpath') and \
|
|
129
|
+
hasattr(obj, 'xslt') and \
|
|
130
|
+
obj.__class__.__module__ in ('lxml.etree', 'lxml.html')
|
|
131
|
+
|
|
132
|
+
|
|
133
|
+
def is_etree_document_instance(obj: Any) -> bool:
|
|
134
|
+
"""Strictly checks that the objects is an ElementTree or lxml.etree document."""
|
|
135
|
+
return isinstance(obj, ElementTree.ElementTree) or \
|
|
136
|
+
isinstance(obj, PyElementTree.ElementTree) or \
|
|
137
|
+
is_lxml_etree_document(obj)
|
|
118
138
|
|
|
119
139
|
|
|
120
140
|
def etree_iter_strings(elem: Union[DocumentProtocol, ElementProtocol],
|
|
@@ -237,7 +257,7 @@ def etree_tostring(elem: ElementProtocol,
|
|
|
237
257
|
return indent + line
|
|
238
258
|
|
|
239
259
|
etree_module: Any
|
|
240
|
-
if not
|
|
260
|
+
if not is_etree_element_instance(elem):
|
|
241
261
|
raise TypeError(f"{elem!r} is not an Element")
|
|
242
262
|
elif isinstance(elem, PyElementTree.Element):
|
|
243
263
|
etree_module = PyElementTree
|
|
@@ -308,6 +328,6 @@ def etree_tostring(elem: ElementProtocol,
|
|
|
308
328
|
|
|
309
329
|
|
|
310
330
|
__all__ = ['ElementTree', 'PyElementTree', 'SafeXMLParser', 'defuse_xml',
|
|
311
|
-
'is_etree_element', 'is_lxml_etree_element', '
|
|
312
|
-
'
|
|
313
|
-
'etree_iter_paths', 'etree_tostring']
|
|
331
|
+
'is_etree_element', 'is_lxml_etree_element', 'is_etree_element_instance',
|
|
332
|
+
'is_etree_document', 'is_lxml_etree_document', 'is_etree_document_instance',
|
|
333
|
+
'etree_iter_strings', 'etree_deep_equal', 'etree_iter_paths', 'etree_tostring']
|
|
@@ -52,6 +52,10 @@ class MissingContextError(ElementPathError):
|
|
|
52
52
|
"""Raised when the dynamic context is required for evaluate the XPath expression."""
|
|
53
53
|
|
|
54
54
|
|
|
55
|
+
class UnsupportedFeatureError(ElementPathError, NotImplementedError):
|
|
56
|
+
"""Raised when an XPath feature is not supported in the current context."""
|
|
57
|
+
|
|
58
|
+
|
|
55
59
|
class ElementPathKeyError(ElementPathError, KeyError):
|
|
56
60
|
pass
|
|
57
61
|
|
|
@@ -12,10 +12,10 @@ import math
|
|
|
12
12
|
from calendar import isleap, leapdays
|
|
13
13
|
from decimal import Decimal
|
|
14
14
|
from operator import attrgetter
|
|
15
|
-
from typing import Any, List, Optional,
|
|
15
|
+
from typing import Any, List, Optional, overload, SupportsFloat, Type, Union
|
|
16
16
|
from urllib.parse import urlsplit
|
|
17
17
|
|
|
18
|
-
from elementpath._typing import Iterator, Match
|
|
18
|
+
from elementpath._typing import Iterator, Match, Pattern
|
|
19
19
|
|
|
20
20
|
###
|
|
21
21
|
# Common sets constants
|
|
@@ -26,23 +26,68 @@ INVALID_NUMERIC = frozenset(
|
|
|
26
26
|
('inf', '+inf', '-inf', 'nan', 'infinity', '+infinity', '-infinity')
|
|
27
27
|
)
|
|
28
28
|
|
|
29
|
+
|
|
29
30
|
###
|
|
30
|
-
# Data validation
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
)
|
|
44
|
-
|
|
45
|
-
|
|
31
|
+
# Data validation patterns
|
|
32
|
+
|
|
33
|
+
class LazyPattern:
|
|
34
|
+
"""
|
|
35
|
+
A descriptor for creating lazy regexp patterns. The compiled pattern is built
|
|
36
|
+
only when the descriptor attribute is accessed (e.g. a hasattr() call).
|
|
37
|
+
"""
|
|
38
|
+
_compiled: Pattern[str]
|
|
39
|
+
|
|
40
|
+
def __init__(self, pattern: str, flags: Union[int, re.RegexFlag] = 0) -> None:
|
|
41
|
+
self._pattern = pattern
|
|
42
|
+
self._flags = flags
|
|
43
|
+
|
|
44
|
+
def __set_name__(self, owner: Type[Any], name: str) -> None:
|
|
45
|
+
self._name = name
|
|
46
|
+
|
|
47
|
+
@overload
|
|
48
|
+
def __get__(self, instance: None, owner: Type[Any]) -> Pattern[str]: ...
|
|
49
|
+
|
|
50
|
+
@overload
|
|
51
|
+
def __get__(self, instance: Any, owner: Type[Any]) -> Pattern[str]: ...
|
|
52
|
+
|
|
53
|
+
def __get__(self, instance: Optional[Any], owner: Type[Any]) -> Pattern[str]:
|
|
54
|
+
try:
|
|
55
|
+
return self._compiled
|
|
56
|
+
except AttributeError:
|
|
57
|
+
self._compiled = re.compile(self._pattern, self._flags)
|
|
58
|
+
return self._compiled
|
|
59
|
+
|
|
60
|
+
def __set__(self, instance: Any, value: Any) -> None:
|
|
61
|
+
raise AttributeError("Can't set attribute {}".format(self._name))
|
|
62
|
+
|
|
63
|
+
def __delete__(self, instance: Any) -> None:
|
|
64
|
+
raise AttributeError("Can't delete attribute {}".format(self._name))
|
|
65
|
+
|
|
66
|
+
|
|
67
|
+
class Patterns:
|
|
68
|
+
"""
|
|
69
|
+
Helper patterns, the ones that aren't used at import time are defined lazy.
|
|
70
|
+
"""
|
|
71
|
+
whitespaces = re.compile(r'[^\S\xa0]+') # include ASCII 160 (non-breaking space)
|
|
72
|
+
normalize = LazyPattern(r'[^\S\xa0]')
|
|
73
|
+
ncname = LazyPattern(r'^[^\d\W][\w.\-\u00B7\u0300-\u036F\u203F\u2040]*$')
|
|
74
|
+
extended_qname = LazyPattern(
|
|
75
|
+
r'^(?:Q{(?P<namespace>[^}]+)}|'
|
|
76
|
+
r'(?P<prefix>[^\d\W][\w\-.\u00B7\u0300-\u036F\u0387\u06DD\u06DE\u203F\u2040]*):)?'
|
|
77
|
+
r'(?P<local>[^\d\W][\w\-.\u00B7\u0300-\u036F\u0387\u06DD\u06DE\u203F\u2040]*)$',
|
|
78
|
+
)
|
|
79
|
+
replacement = LazyPattern(r'^([^\\$]|\\{2}|\\\$|\$\d+)*$')
|
|
80
|
+
sequence_type = LazyPattern(r'\s?([()?*+,])\s?')
|
|
81
|
+
unicode_escape = LazyPattern(r'(?:\\u([0-9A-Fa-f]{4})|\\U([0-9A-Fa-f]{8}))')
|
|
82
|
+
wrong_escape = LazyPattern(r'%(?![a-fA-F\d]{2})')
|
|
83
|
+
xml_newlines = LazyPattern('\r\n|\r|\n')
|
|
84
|
+
|
|
85
|
+
# Regex patterns related to names and namespaces
|
|
86
|
+
namespace_uri = LazyPattern(r'{([^}]+)}')
|
|
87
|
+
expanded_name = LazyPattern(
|
|
88
|
+
r'^(?:{(?P<namespace>[^}]+)})?'
|
|
89
|
+
r'(?P<local>[^\d\W][\w\-.\u00B7\u0300-\u036F\u0387\u06DD\u06DE\u203F\u2040]*)$',
|
|
90
|
+
)
|
|
46
91
|
|
|
47
92
|
|
|
48
93
|
def upper_camel_case(s: str) -> str:
|
|
@@ -50,16 +95,16 @@ def upper_camel_case(s: str) -> str:
|
|
|
50
95
|
|
|
51
96
|
|
|
52
97
|
def collapse_white_spaces(s: str) -> str:
|
|
53
|
-
return
|
|
98
|
+
return Patterns.whitespaces.sub(' ', s).strip(' ')
|
|
54
99
|
|
|
55
100
|
|
|
56
101
|
def is_ncname(s: str) -> bool:
|
|
57
|
-
return
|
|
102
|
+
return Patterns.ncname.match(s) is not None
|
|
58
103
|
|
|
59
104
|
|
|
60
105
|
def is_idrefs(value: Optional[str]) -> bool:
|
|
61
106
|
return isinstance(value, str) and \
|
|
62
|
-
all(
|
|
107
|
+
all(Patterns.ncname.match(x) is not None for x in value.split())
|
|
63
108
|
|
|
64
109
|
|
|
65
110
|
node_position = attrgetter('position')
|
|
@@ -243,7 +288,8 @@ def escape_json_string(s: str, escaped: bool = False) -> str:
|
|
|
243
288
|
def unescape_json_string(s: str) -> str:
|
|
244
289
|
|
|
245
290
|
def unicode_escape_callback(match: Match[str]) -> str:
|
|
246
|
-
|
|
291
|
+
group = match.group(1) or match.group(2)
|
|
292
|
+
return chr(int(group.upper(), 16))
|
|
247
293
|
|
|
248
294
|
s = s.replace('\\"', '\"').\
|
|
249
295
|
replace(r'\b', '\b').\
|
|
@@ -254,7 +300,7 @@ def unescape_json_string(s: str) -> str:
|
|
|
254
300
|
replace(r'\/', '/').\
|
|
255
301
|
replace('\\\\', '\\')
|
|
256
302
|
|
|
257
|
-
return
|
|
303
|
+
return Patterns.unicode_escape.sub(unicode_escape_callback, s)
|
|
258
304
|
|
|
259
305
|
|
|
260
306
|
def iter_sequence(obj: Any) -> Iterator[Any]:
|
|
@@ -7,17 +7,10 @@
|
|
|
7
7
|
#
|
|
8
8
|
# @author Davide Brunato <brunato@sissa.it>
|
|
9
9
|
#
|
|
10
|
-
import re
|
|
11
10
|
from typing import cast, Tuple, Union
|
|
12
11
|
|
|
13
12
|
from elementpath.aliases import NamespacesType, NsmapType
|
|
14
|
-
|
|
15
|
-
# Regex patterns related to names and namespaces
|
|
16
|
-
NAMESPACE_URI_PATTERN = re.compile(r'{([^}]+)}')
|
|
17
|
-
EXPANDED_NAME_PATTERN = re.compile(
|
|
18
|
-
r'^(?:{(?P<namespace>[^}]+)})?'
|
|
19
|
-
r'(?P<local>[^\d\W][\w\-.\u00B7\u0300-\u036F\u0387\u06DD\u06DE\u203F\u2040]*)$',
|
|
20
|
-
)
|
|
13
|
+
from elementpath.helpers import Patterns
|
|
21
14
|
|
|
22
15
|
# Namespaces
|
|
23
16
|
XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
|
|
@@ -70,13 +63,13 @@ XSD_NUMERIC = '{%s}numeric' % XSD_NAMESPACE
|
|
|
70
63
|
|
|
71
64
|
def get_namespace(name: str) -> str:
|
|
72
65
|
try:
|
|
73
|
-
return
|
|
66
|
+
return Patterns.namespace_uri.match(name).group(1) # type: ignore[union-attr]
|
|
74
67
|
except AttributeError:
|
|
75
68
|
return ''
|
|
76
69
|
|
|
77
70
|
|
|
78
71
|
def split_expanded_name(name: str) -> Tuple[str, str]:
|
|
79
|
-
match =
|
|
72
|
+
match = Patterns.expanded_name.match(name)
|
|
80
73
|
if match is None:
|
|
81
74
|
raise ValueError(f"{name!r} is not an expanded QName")
|
|
82
75
|
namespace, local_name = match.groups()
|
|
@@ -272,7 +272,7 @@ XsdXPathNodeType = Union['XsdSchemaProtocol', 'XsdElementProtocol']
|
|
|
272
272
|
class XsdAttributeGroupProtocol(XsdComponentProtocol, Protocol):
|
|
273
273
|
|
|
274
274
|
@overload
|
|
275
|
-
def get(self, key: Optional[str]
|
|
275
|
+
def get(self, key: Optional[str]) -> Optional[XsdAttributeProtocol]: ...
|
|
276
276
|
|
|
277
277
|
@overload
|
|
278
278
|
def get(self, key: Optional[str], default: _T) -> Union[XsdAttributeProtocol, _T]: ...
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
#
|
|
2
|
+
# Copyright (c), 2018-2020, SISSA (International School for Advanced Studies).
|
|
3
|
+
# All rights reserved.
|
|
4
|
+
# This file is distributed under the terms of the MIT License.
|
|
5
|
+
# See the file 'LICENSE' in the root directory of the present
|
|
6
|
+
# distribution, or http://opensource.org/licenses/MIT.
|
|
7
|
+
#
|
|
8
|
+
# @author Davide Brunato <brunato@sissa.it>
|
|
9
|
+
#
|
|
10
|
+
"""
|
|
11
|
+
Subpackage for processing XML regular expressions and for converting them to
|
|
12
|
+
Python-compatible regexps.
|
|
13
|
+
|
|
14
|
+
XPath/XQuery/XML-Schema regexp flavors are supported through translate_pattern()
|
|
15
|
+
API options. Default options process XPath/XQuery patterns.
|
|
16
|
+
"""
|
|
17
|
+
from .codepoints import RegexError, iter_code_points
|
|
18
|
+
from .unicode_subsets import UnicodeSubset, UnicodeData, install_unicode_data, \
|
|
19
|
+
unicode_version, unicode_subset, lazy_subset, unicode_category, unicode_block
|
|
20
|
+
from .character_classes import CharacterClass
|
|
21
|
+
from .patterns import translate_pattern
|
|
22
|
+
|
|
23
|
+
__all__ = ['translate_pattern', 'RegexError', 'UnicodeSubset', 'UnicodeData',
|
|
24
|
+
'install_unicode_data', 'unicode_version', 'unicode_subset', 'lazy_subset',
|
|
25
|
+
'unicode_category', 'unicode_block', 'CharacterClass', 'iter_code_points']
|
|
@@ -8,14 +8,14 @@
|
|
|
8
8
|
# @author Davide Brunato <brunato@sissa.it>
|
|
9
9
|
#
|
|
10
10
|
import re
|
|
11
|
-
from itertools import chain
|
|
12
11
|
from sys import maxunicode
|
|
13
12
|
from collections import Counter
|
|
14
|
-
from
|
|
13
|
+
from itertools import chain
|
|
14
|
+
from typing import AbstractSet, Any, Callable, Dict, Optional, Union
|
|
15
15
|
|
|
16
16
|
from elementpath._typing import Iterator, MutableSet
|
|
17
|
-
from .
|
|
18
|
-
|
|
17
|
+
from .codepoints import RegexError
|
|
18
|
+
from .unicode_subsets import UnicodeSubset, lazy_subset, unicode_subset, unicode_category
|
|
19
19
|
|
|
20
20
|
I_SHORTCUT_REPLACE = (
|
|
21
21
|
":A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF"
|
|
@@ -27,20 +27,34 @@ C_SHORTCUT_REPLACE = (
|
|
|
27
27
|
"\u200D\u203F\u2040\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD"
|
|
28
28
|
)
|
|
29
29
|
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
30
|
+
|
|
31
|
+
@lazy_subset
|
|
32
|
+
def c_shortcut() -> UnicodeSubset:
|
|
33
|
+
return UnicodeSubset(C_SHORTCUT_REPLACE)
|
|
34
|
+
|
|
35
|
+
|
|
36
|
+
@lazy_subset
|
|
37
|
+
def i_shortcut() -> UnicodeSubset:
|
|
38
|
+
return UnicodeSubset(I_SHORTCUT_REPLACE)
|
|
39
|
+
|
|
40
|
+
|
|
41
|
+
@lazy_subset
|
|
42
|
+
def s_shortcut() -> UnicodeSubset:
|
|
43
|
+
return UnicodeSubset(' \t\n\r')
|
|
44
|
+
|
|
45
|
+
|
|
46
|
+
@lazy_subset
|
|
47
|
+
def d_shortcut() -> UnicodeSubset:
|
|
48
|
+
return unicode_category('Nd')
|
|
49
|
+
|
|
50
|
+
|
|
51
|
+
@lazy_subset
|
|
52
|
+
def w_shortcut() -> UnicodeSubset:
|
|
53
|
+
return UnicodeSubset(chain.from_iterable(unicode_category(x) for x in 'LMNS'))
|
|
54
|
+
|
|
41
55
|
|
|
42
56
|
# Single and Multi character escapes
|
|
43
|
-
CHARACTER_ESCAPES = {
|
|
57
|
+
CHARACTER_ESCAPES: Dict[str, Union[str, Callable[[], UnicodeSubset]]] = {
|
|
44
58
|
# Single-character escapes
|
|
45
59
|
'\\n': '\n',
|
|
46
60
|
'\\r': '\r',
|
|
@@ -61,16 +75,16 @@ CHARACTER_ESCAPES = {
|
|
|
61
75
|
'\\\\': '\\',
|
|
62
76
|
|
|
63
77
|
# Multi-character escapes
|
|
64
|
-
'\\s':
|
|
65
|
-
'\\S':
|
|
66
|
-
'\\d':
|
|
67
|
-
'\\D':
|
|
68
|
-
'\\i':
|
|
69
|
-
'\\I':
|
|
70
|
-
'\\c':
|
|
71
|
-
'\\C':
|
|
72
|
-
'\\w':
|
|
73
|
-
'\\W':
|
|
78
|
+
'\\s': s_shortcut,
|
|
79
|
+
'\\S': s_shortcut,
|
|
80
|
+
'\\d': d_shortcut,
|
|
81
|
+
'\\D': d_shortcut,
|
|
82
|
+
'\\i': i_shortcut,
|
|
83
|
+
'\\I': i_shortcut,
|
|
84
|
+
'\\c': c_shortcut,
|
|
85
|
+
'\\C': c_shortcut,
|
|
86
|
+
'\\w': w_shortcut,
|
|
87
|
+
'\\W': w_shortcut,
|
|
74
88
|
}
|
|
75
89
|
|
|
76
90
|
|
|
@@ -83,7 +97,7 @@ class CharacterClass(MutableSet[int]):
|
|
|
83
97
|
TODO: implement __ior__, __iand__, __ixor__ operators for a full mutable set class.
|
|
84
98
|
"""
|
|
85
99
|
_re_char_set = re.compile(r'(?<!.-)(\\[nrt|.\-^?*+{}()\]sSdDiIcCwW]|\\[pP]{[a-zA-Z\-0-9]+})')
|
|
86
|
-
_re_unicode_ref = re.compile(r'\\([pP]){([\w
|
|
100
|
+
_re_unicode_ref = re.compile(r'\\([pP]){([\w-]+)}')
|
|
87
101
|
|
|
88
102
|
__slots__ = 'xsd_version', 'positive', 'negative'
|
|
89
103
|
|
|
@@ -138,17 +152,17 @@ class CharacterClass(MutableSet[int]):
|
|
|
138
152
|
return len(self.positive)
|
|
139
153
|
|
|
140
154
|
def __isub__(self, other: AbstractSet[Any]) -> 'CharacterClass':
|
|
141
|
-
if
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
self.negative.
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
self.positive
|
|
150
|
-
|
|
151
|
-
return
|
|
155
|
+
if isinstance(other, CharacterClass):
|
|
156
|
+
if self.negative:
|
|
157
|
+
if other.negative:
|
|
158
|
+
self.positive |= (other.negative - self.negative)
|
|
159
|
+
self.negative.clear()
|
|
160
|
+
self.negative |= other.positive
|
|
161
|
+
elif other.negative:
|
|
162
|
+
self.positive &= other.negative
|
|
163
|
+
self.positive -= other.positive
|
|
164
|
+
return self
|
|
165
|
+
return NotImplemented
|
|
152
166
|
|
|
153
167
|
def __sub__(self, other: AbstractSet[Any]) -> 'CharacterClass':
|
|
154
168
|
obj = self.__copy__()
|
|
@@ -164,9 +178,9 @@ class CharacterClass(MutableSet[int]):
|
|
|
164
178
|
if isinstance(value, str):
|
|
165
179
|
self.positive.update(value)
|
|
166
180
|
elif part[-1].islower():
|
|
167
|
-
self.positive |= value
|
|
181
|
+
self.positive |= value()
|
|
168
182
|
else:
|
|
169
|
-
self.negative |= value
|
|
183
|
+
self.negative |= value()
|
|
170
184
|
elif part.startswith('\\p') or part.startswith('\\P'):
|
|
171
185
|
if self._re_unicode_ref.search(part) is None:
|
|
172
186
|
raise RegexError("wrong Unicode block specification %r" % part)
|
|
@@ -198,11 +212,11 @@ class CharacterClass(MutableSet[int]):
|
|
|
198
212
|
if self.negative:
|
|
199
213
|
self.negative.update(value)
|
|
200
214
|
elif part[-1].islower():
|
|
201
|
-
self.positive -= value
|
|
215
|
+
self.positive -= value()
|
|
202
216
|
if self.negative:
|
|
203
|
-
self.negative |= value
|
|
217
|
+
self.negative |= value()
|
|
204
218
|
else:
|
|
205
|
-
self.positive &= value
|
|
219
|
+
self.positive &= value()
|
|
206
220
|
self.negative.clear()
|
|
207
221
|
|
|
208
222
|
elif part.startswith('\\p') or part.startswith('\\P'):
|
|
@@ -232,4 +246,4 @@ class CharacterClass(MutableSet[int]):
|
|
|
232
246
|
if self.positive or self.negative:
|
|
233
247
|
self.positive, self.negative = self.negative, self.positive
|
|
234
248
|
else:
|
|
235
|
-
self.positive.codepoints
|
|
249
|
+
self.positive.codepoints = [(0, maxunicode + 1)]
|