PyPI - elementpath - Versions diffs - 4.5.0__tar.gz → 4.7.0__tar.gz - Mend

elementpath 4.5.0tar.gz → 4.7.0tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (135) hide show

{elementpath-4.5.0 → elementpath-4.7.0}/CHANGELOG.rst RENAMED Viewed

@@ -2,6 +2,18 @@
 CHANGELOG
 *********
+`v4.7.0`_ (2024-12-20)
+======================
+* Fix *fragment* argument usage (issue #81)
+* Fix constructors nud() to skip argument check with XP31+ arrow operator (issue #83)
+`v4.6.0`_ (2024-10-27)
+======================
+* Fix XsdAttributeGroupProtocol
+* Improve Unicode support with installable UnicodeData.txt versions
+* Extend names disambiguation with a fix for issue #78
+* Refactor tree builders to fix document position of tails (issue #79)
 `v4.5.0`_ (2024-09-09)
 ======================
 * Fix and clean node trees iteration methods (issue #72)
@@ -469,4 +481,6 @@ CHANGELOG
 .. _v4.2.1: https://github.com/sissaschool/elementpath/compare/v4.2.0...v4.2.1
 .. _v4.3.0: https://github.com/sissaschool/elementpath/compare/v4.2.1...v4.3.0
 .. _v4.4.0: https://github.com/sissaschool/elementpath/compare/v4.3.0...v4.4.0
-.. _v4.4.1: https://github.com/sissaschool/elementpath/compare/v4.4.0...v4.5.0
+.. _v4.5.0: https://github.com/sissaschool/elementpath/compare/v4.4.0...v4.5.0
+.. _v4.6.0: https://github.com/sissaschool/elementpath/compare/v4.5.0...v4.6.0
+.. _v4.7.0: https://github.com/sissaschool/elementpath/compare/v4.6.0...v4.7.0

{elementpath-4.5.0 → elementpath-4.7.0}/MANIFEST.in RENAMED Viewed

@@ -11,6 +11,7 @@ include mypy.ini
 include doc/*
 recursive-include elementpath *
+recursive-include scripts *
 recursive-include tests *
 recursive-exclude tests/.mypy_cache *

{elementpath-4.5.0 → elementpath-4.7.0}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: elementpath
-Version: 4.5.0
+Version: 4.7.0
 Summary: XPath 1.0/2.0/3.0/3.1 parsers and selectors for ElementTree and lxml
 Home-page: https://github.com/sissaschool/elementpath
 Author: Davide Brunato
@@ -22,6 +22,7 @@ Classifier: Programming Language :: Python :: 3.10
 Classifier: Programming Language :: Python :: 3.11
 Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3.13
+Classifier: Programming Language :: Python :: 3.14
 Classifier: Programming Language :: Python :: Implementation :: CPython
 Classifier: Programming Language :: Python :: Implementation :: PyPy
 Classifier: Topic :: Software Development :: Libraries

{elementpath-4.5.0 → elementpath-4.7.0}/doc/conf.py RENAMED Viewed

@@ -29,9 +29,9 @@ copyright = '2018-2024, SISSA (International School for Advanced Studies)'
 author = 'Davide Brunato'
 # The short X.Y version
-version = '4.5'
+version = '4.7'
 # The full version, including alpha/beta/rc tags
-release = '4.5.0'
+release = '4.7.0'
 # -- General configuration ---------------------------------------------------

{elementpath-4.5.0 → elementpath-4.7.0}/doc/xpath_api.rst RENAMED Viewed

@@ -147,6 +147,8 @@ XPath regular expressions
 =========================
 .. autofunction:: elementpath.translate_pattern
+.. autofunction:: elementpath.install_unicode_data
+.. autofunction:: elementpath.unicode_version
 Exception classes
@@ -154,6 +156,7 @@ Exception classes
 .. autoexception:: elementpath.ElementPathError
 .. autoexception:: elementpath.MissingContextError
+.. autoexception:: elementpath.UnsupportedFeatureError
 .. autoexception:: elementpath.RegexError
 .. autoexception:: elementpath.ElementPathLocaleError

{elementpath-4.5.0 → elementpath-4.7.0}/elementpath/__init__.py RENAMED Viewed

@@ -7,7 +7,7 @@
 #
 # @author Davide Brunato <brunato@sissa.it>
 #
-__version__ = '4.5.0'
+__version__ = '4.7.0'
 __author__ = "Davide Brunato"
 __contact__ = "brunato@sissa.it"
 __copyright__ = "Copyright 2018-2024, SISSA"
@@ -23,7 +23,7 @@ from . import protocols  # Protocols for type annotations
 from .exceptions import ElementPathError, MissingContextError, ElementPathKeyError, \
     ElementPathZeroDivisionError, ElementPathNameError, ElementPathOverflowError, \
     ElementPathRuntimeError, ElementPathSyntaxError, ElementPathTypeError, \
-    ElementPathValueError, ElementPathLocaleError
+    ElementPathValueError, ElementPathLocaleError, UnsupportedFeatureError
 from .xpath_context import XPathContext, XPathSchemaContext
 from .xpath_nodes import XPathNode, DocumentNode, ElementNode, AttributeNode, \
@@ -36,10 +36,11 @@ from .xpath1 import XPath1Parser
 from .xpath2 import XPath2Parser
 from .xpath_selectors import select, iter_select, Selector
 from .schema_proxy import AbstractSchemaProxy
-from .regex import RegexError, translate_pattern
+from .regex import RegexError, translate_pattern, install_unicode_data, unicode_version
 __all__ = ['datatypes', 'protocols', 'etree', 'ElementPathError', 'MissingContextError',
-           'ElementPathKeyError', 'ElementPathZeroDivisionError', 'ElementPathNameError',
+           'UnsupportedFeatureError', 'ElementPathKeyError',
+           'ElementPathZeroDivisionError', 'ElementPathNameError',
            'ElementPathOverflowError', 'ElementPathRuntimeError', 'ElementPathSyntaxError',
            'ElementPathTypeError', 'ElementPathValueError', 'ElementPathLocaleError',
            'XPathContext', 'XPathSchemaContext', 'XPathNode', 'DocumentNode',
@@ -48,4 +49,5 @@ __all__ = ['datatypes', 'protocols', 'etree', 'ElementPathError', 'MissingContex
            'SchemaElementNode', 'get_node_tree', 'build_node_tree',
            'build_lxml_node_tree', 'build_schema_node_tree', 'XPathToken',
            'XPathFunction', 'XPath1Parser', 'XPath2Parser', 'select', 'iter_select',
-           'Selector', 'AbstractSchemaProxy', 'RegexError', 'translate_pattern']
+           'Selector', 'AbstractSchemaProxy', 'RegexError', 'translate_pattern',
+           'install_unicode_data', 'unicode_version']

{elementpath-4.5.0 → elementpath-4.7.0}/elementpath/datatypes/qname.py RENAMED Viewed

@@ -7,9 +7,9 @@
 #
 # @author Davide Brunato <brunato@sissa.it>
 #
+import re
 from typing import Any, Optional
-from elementpath.helpers import QNAME_PATTERN
 from .atomic_types import AnyAtomicType
 from .untyped import UntypedAtomic
@@ -22,7 +22,10 @@ class AbstractQName(AnyAtomicType):
     URI if a prefixed name is provided for the 2nd argument.
     :param qname: the prefixed name or a local name.
     """
-    pattern = QNAME_PATTERN
+    pattern = re.compile(
+        r'^(?:(?P<prefix>[^\d\W][\w\-.\u00B7\u0300-\u036F\u0387\u06DD\u06DE\u203F\u2040]*):)?'
+        r'(?P<local>[^\d\W][\w\-.\u00B7\u0300-\u036F\u0387\u06DD\u06DE\u203F\u2040]*)$',
+    )
     def __new__(cls, *args: Any, **kwargs: Any) -> 'AbstractQName':
         if cls.__name__ == 'Notation':

{elementpath-4.5.0 → elementpath-4.7.0}/elementpath/datatypes/string.py RENAMED Viewed

@@ -10,7 +10,7 @@
 import re
 from typing import Any
-from elementpath.helpers import NORMALIZE_PATTERN, collapse_white_spaces
+from elementpath.helpers import collapse_white_spaces, Patterns
 from .atomic_types import AnyAtomicType
@@ -20,7 +20,7 @@ class NormalizedString(str, AnyAtomicType):
     def __new__(cls, obj: Any) -> 'NormalizedString':
         try:
-            return super().__new__(cls, NORMALIZE_PATTERN.sub(' ', obj))
+            return super().__new__(cls, Patterns.normalize.sub(' ', obj))
         except TypeError:
             return super().__new__(cls, obj)
@@ -41,7 +41,7 @@ class XsdToken(NormalizedString):
         match = cls.pattern.match(value)
         if match is None:
             raise ValueError('invalid value {!r} for xs:{}'.format(value, cls.name))
-        return super(NormalizedString, cls).__new__(cls, value)
+        return super(NormalizedString, cls).__new__(cls, value)  # noqa
 class Language(XsdToken):
@@ -59,7 +59,7 @@ class Language(XsdToken):
         match = cls.pattern.match(value)
         if match is None:
             raise ValueError('invalid value {!r} for xs:{}'.format(value, cls.name))
-        return super(NormalizedString, cls).__new__(cls, value)
+        return super(NormalizedString, cls).__new__(cls, value)  # noqa
 class Name(XsdToken):

{elementpath-4.5.0 → elementpath-4.7.0}/elementpath/datatypes/uri.py RENAMED Viewed

@@ -11,7 +11,7 @@ from decimal import Decimal
 from urllib.parse import urlparse
 from typing import Union
-from elementpath.helpers import collapse_white_spaces, WRONG_ESCAPE_PATTERN
+from elementpath.helpers import collapse_white_spaces, Patterns
 from .atomic_types import AnyAtomicType
 from .untyped import UntypedAtomic
 from .numeric import Integer
@@ -110,6 +110,6 @@ class AnyURI(AnyAtomicType):
             elif value.count('#') > 1:
                 msg = 'invalid value {!r} for xs:{} (too many # characters)'
                 raise ValueError(msg.format(value, cls.name))
-            elif WRONG_ESCAPE_PATTERN.search(value) is not None:
+            elif Patterns.wrong_escape.search(value) is not None:
                 msg = 'invalid value {!r} for xs:{} (wrong escaping)'
                 raise ValueError(msg.format(value, cls.name))

{elementpath-4.5.0 → elementpath-4.7.0}/elementpath/etree.py RENAMED Viewed

@@ -106,7 +106,17 @@ def is_etree_element(obj: Any) -> bool:
 def is_lxml_etree_element(obj: Any) -> bool:
-    return is_etree_element(obj) and hasattr(obj, 'getparent') and hasattr(obj, 'nsmap')
+    return is_etree_element(obj) and \
+        hasattr(obj, 'getparent') and \
+        hasattr(obj, 'nsmap') and \
+        obj.__class__.__module__ in ('lxml.etree', 'lxml.html')
+def is_etree_element_instance(obj: Any) -> bool:
+    """Strictly checks that the objects is an ElementTree or lxml.etree Element."""
+    return isinstance(obj, ElementTree.Element) or \
+        isinstance(obj, PyElementTree.Element) or \
+        is_lxml_etree_element(obj)
 def is_etree_document(obj: Any) -> bool:
@@ -114,7 +124,17 @@ def is_etree_document(obj: Any) -> bool:
 def is_lxml_etree_document(obj: Any) -> bool:
-    return is_etree_document(obj) and hasattr(obj, 'xpath') and hasattr(obj, 'xslt')
+    return is_etree_document(obj) and \
+        hasattr(obj, 'xpath') and \
+        hasattr(obj, 'xslt') and \
+        obj.__class__.__module__ in ('lxml.etree', 'lxml.html')
+def is_etree_document_instance(obj: Any) -> bool:
+    """Strictly checks that the objects is an ElementTree or lxml.etree document."""
+    return isinstance(obj, ElementTree.ElementTree) or \
+        isinstance(obj, PyElementTree.ElementTree) or \
+        is_lxml_etree_document(obj)
 def etree_iter_strings(elem: Union[DocumentProtocol, ElementProtocol],
@@ -237,7 +257,7 @@ def etree_tostring(elem: ElementProtocol,
             return indent + line
     etree_module: Any
-    if not is_etree_element(elem):
+    if not is_etree_element_instance(elem):
         raise TypeError(f"{elem!r} is not an Element")
     elif isinstance(elem, PyElementTree.Element):
         etree_module = PyElementTree
@@ -308,6 +328,6 @@ def etree_tostring(elem: ElementProtocol,
 __all__ = ['ElementTree', 'PyElementTree', 'SafeXMLParser', 'defuse_xml',
-           'is_etree_element', 'is_lxml_etree_element', 'is_etree_document',
-           'is_lxml_etree_document', 'etree_iter_strings', 'etree_deep_equal',
-           'etree_iter_paths', 'etree_tostring']
+           'is_etree_element', 'is_lxml_etree_element', 'is_etree_element_instance',
+           'is_etree_document', 'is_lxml_etree_document', 'is_etree_document_instance',
+           'etree_iter_strings', 'etree_deep_equal', 'etree_iter_paths', 'etree_tostring']

{elementpath-4.5.0 → elementpath-4.7.0}/elementpath/exceptions.py RENAMED Viewed

@@ -52,6 +52,10 @@ class MissingContextError(ElementPathError):
     """Raised when the dynamic context is required for evaluate the XPath expression."""
+class UnsupportedFeatureError(ElementPathError, NotImplementedError):
+    """Raised when an XPath feature is not supported in the current context."""
 class ElementPathKeyError(ElementPathError, KeyError):
     pass

{elementpath-4.5.0 → elementpath-4.7.0}/elementpath/helpers.py RENAMED Viewed

@@ -12,10 +12,10 @@ import math
 from calendar import isleap, leapdays
 from decimal import Decimal
 from operator import attrgetter
-from typing import Any, List, Optional, Union, SupportsFloat
+from typing import Any, List, Optional, overload, SupportsFloat, Type, Union
 from urllib.parse import urlsplit
-from elementpath._typing import Iterator, Match
+from elementpath._typing import Iterator, Match, Pattern
 ###
 # Common sets constants
@@ -26,23 +26,68 @@ INVALID_NUMERIC = frozenset(
     ('inf', '+inf', '-inf', 'nan', 'infinity', '+infinity', '-infinity')
 )
 ###
-# Data validation helpers
-NORMALIZE_PATTERN = re.compile(r'[^\S\xa0]')
-WHITESPACES_PATTERN = re.compile(r'[^\S\xa0]+')  # include ASCII 160 (non-breaking space)
-NCNAME_PATTERN = re.compile(r'^[^\d\W][\w.\-\u00B7\u0300-\u036F\u203F\u2040]*$')
-QNAME_PATTERN = re.compile(
-    r'^(?:(?P<prefix>[^\d\W][\w\-.\u00B7\u0300-\u036F\u0387\u06DD\u06DE\u203F\u2040]*):)?'
-    r'(?P<local>[^\d\W][\w\-.\u00B7\u0300-\u036F\u0387\u06DD\u06DE\u203F\u2040]*)$',
-)
-EQNAME_PATTERN = re.compile(
-    r'^(?:Q{(?P<namespace>[^}]+)}|'
-    r'(?P<prefix>[^\d\W][\w\-.\u00B7\u0300-\u036F\u0387\u06DD\u06DE\u203F\u2040]*):)?'
-    r'(?P<local>[^\d\W][\w\-.\u00B7\u0300-\u036F\u0387\u06DD\u06DE\u203F\u2040]*)$',
-)
-WRONG_ESCAPE_PATTERN = re.compile(r'%(?![a-fA-F\d]{2})')
-XML_NEWLINES_PATTERN = re.compile('\r\n|\r|\n')
+# Data validation patterns
+class LazyPattern:
+    """
+    A descriptor for creating lazy regexp patterns. The compiled pattern is built
+    only when the descriptor attribute is accessed (e.g. a hasattr() call).
+    """
+    _compiled: Pattern[str]
+    def __init__(self, pattern: str, flags: Union[int, re.RegexFlag] = 0) -> None:
+        self._pattern = pattern
+        self._flags = flags
+    def __set_name__(self, owner: Type[Any], name: str) -> None:
+        self._name = name
+    @overload
+    def __get__(self, instance: None, owner: Type[Any]) -> Pattern[str]: ...
+    @overload
+    def __get__(self, instance: Any, owner: Type[Any]) -> Pattern[str]: ...
+    def __get__(self, instance: Optional[Any], owner: Type[Any]) -> Pattern[str]:
+        try:
+            return self._compiled
+        except AttributeError:
+            self._compiled = re.compile(self._pattern, self._flags)
+            return self._compiled
+    def __set__(self, instance: Any, value: Any) -> None:
+        raise AttributeError("Can't set attribute {}".format(self._name))
+    def __delete__(self, instance: Any) -> None:
+        raise AttributeError("Can't delete attribute {}".format(self._name))
+class Patterns:
+    """
+    Helper patterns, the ones that aren't used at import time are defined lazy.
+    """
+    whitespaces = re.compile(r'[^\S\xa0]+')  # include ASCII 160 (non-breaking space)
+    normalize = LazyPattern(r'[^\S\xa0]')
+    ncname = LazyPattern(r'^[^\d\W][\w.\-\u00B7\u0300-\u036F\u203F\u2040]*$')
+    extended_qname = LazyPattern(
+        r'^(?:Q{(?P<namespace>[^}]+)}|'
+        r'(?P<prefix>[^\d\W][\w\-.\u00B7\u0300-\u036F\u0387\u06DD\u06DE\u203F\u2040]*):)?'
+        r'(?P<local>[^\d\W][\w\-.\u00B7\u0300-\u036F\u0387\u06DD\u06DE\u203F\u2040]*)$',
+    )
+    replacement = LazyPattern(r'^([^\\$]|\\{2}|\\\$|\$\d+)*$')
+    sequence_type = LazyPattern(r'\s?([()?*+,])\s?')
+    unicode_escape = LazyPattern(r'(?:\\u([0-9A-Fa-f]{4})|\\U([0-9A-Fa-f]{8}))')
+    wrong_escape = LazyPattern(r'%(?![a-fA-F\d]{2})')
+    xml_newlines = LazyPattern('\r\n|\r|\n')
+    # Regex patterns related to names and namespaces
+    namespace_uri = LazyPattern(r'{([^}]+)}')
+    expanded_name = LazyPattern(
+        r'^(?:{(?P<namespace>[^}]+)})?'
+        r'(?P<local>[^\d\W][\w\-.\u00B7\u0300-\u036F\u0387\u06DD\u06DE\u203F\u2040]*)$',
+    )
 def upper_camel_case(s: str) -> str:
@@ -50,16 +95,16 @@ def upper_camel_case(s: str) -> str:
 def collapse_white_spaces(s: str) -> str:
-    return WHITESPACES_PATTERN.sub(' ', s).strip(' ')
+    return Patterns.whitespaces.sub(' ', s).strip(' ')
 def is_ncname(s: str) -> bool:
-    return re.match(r'^[^\d\W][\w.\-\u00B7\u0300-\u036F\u203F\u2040]*$', s) is not None
+    return Patterns.ncname.match(s) is not None
 def is_idrefs(value: Optional[str]) -> bool:
     return isinstance(value, str) and \
-        all(NCNAME_PATTERN.match(x) is not None for x in value.split())
+        all(Patterns.ncname.match(x) is not None for x in value.split())
 node_position = attrgetter('position')
@@ -243,7 +288,8 @@ def escape_json_string(s: str, escaped: bool = False) -> str:
 def unescape_json_string(s: str) -> str:
     def unicode_escape_callback(match: Match[str]) -> str:
-        return chr(int(match.group(1).upper(), 16))
+        group = match.group(1) or match.group(2)
+        return chr(int(group.upper(), 16))
     s = s.replace('\\"', '\"').\
         replace(r'\b', '\b').\
@@ -254,7 +300,7 @@ def unescape_json_string(s: str) -> str:
         replace(r'\/', '/').\
         replace('\\\\', '\\')
-    return re.sub(r'\\u([0-9A-Fa-f]{4})', unicode_escape_callback, s)
+    return Patterns.unicode_escape.sub(unicode_escape_callback, s)
 def iter_sequence(obj: Any) -> Iterator[Any]:

{elementpath-4.5.0 → elementpath-4.7.0}/elementpath/namespaces.py RENAMED Viewed

@@ -7,17 +7,10 @@
 #
 # @author Davide Brunato <brunato@sissa.it>
 #
-import re
 from typing import cast, Tuple, Union
 from elementpath.aliases import NamespacesType, NsmapType
-# Regex patterns related to names and namespaces
-NAMESPACE_URI_PATTERN = re.compile(r'{([^}]+)}')
-EXPANDED_NAME_PATTERN = re.compile(
-    r'^(?:{(?P<namespace>[^}]+)})?'
-    r'(?P<local>[^\d\W][\w\-.\u00B7\u0300-\u036F\u0387\u06DD\u06DE\u203F\u2040]*)$',
-)
+from elementpath.helpers import Patterns
 # Namespaces
 XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
@@ -70,13 +63,13 @@ XSD_NUMERIC = '{%s}numeric' % XSD_NAMESPACE
 def get_namespace(name: str) -> str:
     try:
-        return NAMESPACE_URI_PATTERN.match(name).group(1)  # type: ignore[union-attr]
+        return Patterns.namespace_uri.match(name).group(1)  # type: ignore[union-attr]
     except AttributeError:
         return ''
 def split_expanded_name(name: str) -> Tuple[str, str]:
-    match = EXPANDED_NAME_PATTERN.match(name)
+    match = Patterns.expanded_name.match(name)
     if match is None:
         raise ValueError(f"{name!r} is not an expanded QName")
     namespace, local_name = match.groups()

{elementpath-4.5.0 → elementpath-4.7.0}/elementpath/protocols.py RENAMED Viewed

@@ -272,7 +272,7 @@ XsdXPathNodeType = Union['XsdSchemaProtocol', 'XsdElementProtocol']
 class XsdAttributeGroupProtocol(XsdComponentProtocol, Protocol):
     @overload
-    def get(self, key: Optional[str], default: None) -> Optional[XsdAttributeProtocol]: ...
+    def get(self, key: Optional[str]) -> Optional[XsdAttributeProtocol]: ...
     @overload
     def get(self, key: Optional[str], default: _T) -> Union[XsdAttributeProtocol, _T]: ...

elementpath-4.7.0/elementpath/regex/__init__.py ADDED Viewed

@@ -0,0 +1,25 @@
+#
+# Copyright (c), 2018-2020, SISSA (International School for Advanced Studies).
+# All rights reserved.
+# This file is distributed under the terms of the MIT License.
+# See the file 'LICENSE' in the root directory of the present
+# distribution, or http://opensource.org/licenses/MIT.
+#
+# @author Davide Brunato <brunato@sissa.it>
+#
+"""
+Subpackage for processing XML regular expressions and for converting them to
+Python-compatible regexps.
+XPath/XQuery/XML-Schema regexp flavors are supported through translate_pattern()
+API options. Default options process XPath/XQuery patterns.
+"""
+from .codepoints import RegexError, iter_code_points
+from .unicode_subsets import UnicodeSubset, UnicodeData, install_unicode_data, \
+    unicode_version, unicode_subset, lazy_subset, unicode_category, unicode_block
+from .character_classes import CharacterClass
+from .patterns import translate_pattern
+__all__ = ['translate_pattern', 'RegexError', 'UnicodeSubset', 'UnicodeData',
+           'install_unicode_data', 'unicode_version', 'unicode_subset', 'lazy_subset',
+           'unicode_category', 'unicode_block', 'CharacterClass', 'iter_code_points']

{elementpath-4.5.0 → elementpath-4.7.0}/elementpath/regex/character_classes.py RENAMED Viewed

@@ -8,14 +8,14 @@
 # @author Davide Brunato <brunato@sissa.it>
 #
 import re
-from itertools import chain
 from sys import maxunicode
 from collections import Counter
-from typing import AbstractSet, Any, Optional, Union
+from itertools import chain
+from typing import AbstractSet, Any, Callable, Dict, Optional, Union
 from elementpath._typing import Iterator, MutableSet
-from .unicode_subsets import RegexError, UnicodeSubset, UNICODE_CATEGORIES, unicode_subset
+from .codepoints import RegexError
+from .unicode_subsets import UnicodeSubset, lazy_subset, unicode_subset, unicode_category
 I_SHORTCUT_REPLACE = (
     ":A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF"
@@ -27,20 +27,34 @@ C_SHORTCUT_REPLACE = (
     "\u200D\u203F\u2040\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD"
 )
-S_SHORTCUT_SET = UnicodeSubset(' \n\t\r')
-D_SHORTCUT_SET = UnicodeSubset()
-D_SHORTCUT_SET._codepoints = UNICODE_CATEGORIES['Nd'].codepoints
-I_SHORTCUT_SET = UnicodeSubset(I_SHORTCUT_REPLACE)
-C_SHORTCUT_SET = UnicodeSubset(C_SHORTCUT_REPLACE)
-W_SHORTCUT_SET = UnicodeSubset(chain(
-    UNICODE_CATEGORIES['L'].codepoints,
-    UNICODE_CATEGORIES['M'].codepoints,
-    UNICODE_CATEGORIES['N'].codepoints,
-    UNICODE_CATEGORIES['S'].codepoints
-))
+@lazy_subset
+def c_shortcut() -> UnicodeSubset:
+    return UnicodeSubset(C_SHORTCUT_REPLACE)
+@lazy_subset
+def i_shortcut() -> UnicodeSubset:
+    return UnicodeSubset(I_SHORTCUT_REPLACE)
+@lazy_subset
+def s_shortcut() -> UnicodeSubset:
+    return UnicodeSubset(' \t\n\r')
+@lazy_subset
+def d_shortcut() -> UnicodeSubset:
+    return unicode_category('Nd')
+@lazy_subset
+def w_shortcut() -> UnicodeSubset:
+    return UnicodeSubset(chain.from_iterable(unicode_category(x) for x in 'LMNS'))
 # Single and Multi character escapes
-CHARACTER_ESCAPES = {
+CHARACTER_ESCAPES: Dict[str, Union[str, Callable[[], UnicodeSubset]]] = {
     # Single-character escapes
     '\\n': '\n',
     '\\r': '\r',
@@ -61,16 +75,16 @@ CHARACTER_ESCAPES = {
     '\\\\': '\\',
     # Multi-character escapes
-    '\\s': S_SHORTCUT_SET,
-    '\\S': S_SHORTCUT_SET,
-    '\\d': D_SHORTCUT_SET,
-    '\\D': D_SHORTCUT_SET,
-    '\\i': I_SHORTCUT_SET,
-    '\\I': I_SHORTCUT_SET,
-    '\\c': C_SHORTCUT_SET,
-    '\\C': C_SHORTCUT_SET,
-    '\\w': W_SHORTCUT_SET,
-    '\\W': W_SHORTCUT_SET,
+    '\\s': s_shortcut,
+    '\\S': s_shortcut,
+    '\\d': d_shortcut,
+    '\\D': d_shortcut,
+    '\\i': i_shortcut,
+    '\\I': i_shortcut,
+    '\\c': c_shortcut,
+    '\\C': c_shortcut,
+    '\\w': w_shortcut,
+    '\\W': w_shortcut,
 }
@@ -83,7 +97,7 @@ class CharacterClass(MutableSet[int]):
     TODO: implement __ior__, __iand__, __ixor__ operators for a full mutable set class.
     """
     _re_char_set = re.compile(r'(?<!.-)(\\[nrt|.\-^?*+{}()\]sSdDiIcCwW]|\\[pP]{[a-zA-Z\-0-9]+})')
-    _re_unicode_ref = re.compile(r'\\([pP]){([\w\d-]+)}')
+    _re_unicode_ref = re.compile(r'\\([pP]){([\w-]+)}')
     __slots__ = 'xsd_version', 'positive', 'negative'
@@ -138,17 +152,17 @@ class CharacterClass(MutableSet[int]):
         return len(self.positive)
     def __isub__(self, other: AbstractSet[Any]) -> 'CharacterClass':
-        if not isinstance(other, CharacterClass):
-            return NotImplemented
-        elif self.negative:
-            if other.negative:
-                self.positive |= (other.negative - self.negative)
-                self.negative.clear()
-            self.negative |= other.positive
-        elif other.negative:
-            self.positive &= other.negative
-        self.positive -= other.positive
-        return self
+        if isinstance(other, CharacterClass):
+            if self.negative:
+                if other.negative:
+                    self.positive |= (other.negative - self.negative)
+                    self.negative.clear()
+                self.negative |= other.positive
+            elif other.negative:
+                self.positive &= other.negative
+            self.positive -= other.positive
+            return self
+        return NotImplemented
     def __sub__(self, other: AbstractSet[Any]) -> 'CharacterClass':
         obj = self.__copy__()
@@ -164,9 +178,9 @@ class CharacterClass(MutableSet[int]):
                 if isinstance(value, str):
                     self.positive.update(value)
                 elif part[-1].islower():
-                    self.positive |= value
+                    self.positive |= value()
                 else:
-                    self.negative |= value
+                    self.negative |= value()
             elif part.startswith('\\p') or part.startswith('\\P'):
                 if self._re_unicode_ref.search(part) is None:
                     raise RegexError("wrong Unicode block specification %r" % part)
@@ -198,11 +212,11 @@ class CharacterClass(MutableSet[int]):
                     if self.negative:
                         self.negative.update(value)
                 elif part[-1].islower():
-                    self.positive -= value
+                    self.positive -= value()
                     if self.negative:
-                        self.negative |= value
+                        self.negative |= value()
                 else:
-                    self.positive &= value
+                    self.positive &= value()
                     self.negative.clear()
             elif part.startswith('\\p') or part.startswith('\\P'):
@@ -232,4 +246,4 @@ class CharacterClass(MutableSet[int]):
         if self.positive or self.negative:
             self.positive, self.negative = self.negative, self.positive
         else:
-            self.positive.codepoints.append((0, maxunicode + 1))
+            self.positive.codepoints = [(0, maxunicode + 1)]

elementpath 4.5.0__tar.gz → 4.7.0__tar.gz

elementpath 4.5.0tar.gz → 4.7.0tar.gz