elementpath 4.5.0__tar.gz → 4.6.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. {elementpath-4.5.0 → elementpath-4.6.0}/CHANGELOG.rst +9 -1
  2. {elementpath-4.5.0 → elementpath-4.6.0}/MANIFEST.in +1 -0
  3. {elementpath-4.5.0 → elementpath-4.6.0}/PKG-INFO +2 -1
  4. {elementpath-4.5.0 → elementpath-4.6.0}/doc/conf.py +2 -2
  5. {elementpath-4.5.0 → elementpath-4.6.0}/doc/xpath_api.rst +3 -0
  6. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/__init__.py +7 -5
  7. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/datatypes/qname.py +5 -2
  8. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/datatypes/string.py +4 -4
  9. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/datatypes/uri.py +2 -2
  10. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/exceptions.py +4 -0
  11. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/helpers.py +69 -23
  12. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/namespaces.py +3 -10
  13. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/protocols.py +1 -1
  14. elementpath-4.6.0/elementpath/regex/__init__.py +25 -0
  15. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/regex/character_classes.py +58 -44
  16. elementpath-4.6.0/elementpath/regex/codepoints.py +206 -0
  17. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/regex/patterns.py +6 -6
  18. elementpath-4.6.0/elementpath/regex/unicode_blocks.py +450 -0
  19. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/regex/unicode_categories.py +418 -5
  20. elementpath-4.6.0/elementpath/regex/unicode_subsets.py +639 -0
  21. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/sequence_types.py +7 -9
  22. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/tdop.py +8 -7
  23. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/tree_builders.py +144 -133
  24. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/xpath1/xpath1_parser.py +56 -11
  25. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/xpath2/_xpath2_functions.py +3 -6
  26. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/xpath2/_xpath2_operators.py +7 -2
  27. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/xpath2/xpath2_parser.py +11 -18
  28. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/xpath30/_xpath30_functions.py +25 -25
  29. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/xpath31/_xpath31_functions.py +4 -3
  30. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/xpath_nodes.py +1 -1
  31. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/xpath_tokens.py +8 -11
  32. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath.egg-info/PKG-INFO +2 -1
  33. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath.egg-info/SOURCES.txt +3 -1
  34. {elementpath-4.5.0 → elementpath-4.6.0}/requirements-dev.txt +1 -1
  35. elementpath-4.6.0/scripts/generate_codepoints.py +406 -0
  36. {elementpath-4.5.0 → elementpath-4.6.0}/setup.py +2 -1
  37. elementpath-4.6.0/tests/mypy_tests/advanced.py +30 -0
  38. {elementpath-4.5.0 → elementpath-4.6.0}/tests/mypy_tests/selectors.py +15 -0
  39. {elementpath-4.5.0 → elementpath-4.6.0}/tests/test_helpers.py +15 -2
  40. {elementpath-4.5.0 → elementpath-4.6.0}/tests/test_regex.py +222 -48
  41. {elementpath-4.5.0 → elementpath-4.6.0}/tests/test_tree_builders.py +67 -3
  42. {elementpath-4.5.0 → elementpath-4.6.0}/tests/test_typing.py +8 -0
  43. {elementpath-4.5.0 → elementpath-4.6.0}/tests/test_xpath1_parser.py +22 -9
  44. {elementpath-4.5.0 → elementpath-4.6.0}/tests/test_xpath2_parser.py +42 -2
  45. {elementpath-4.5.0 → elementpath-4.6.0}/tests/test_xpath_tokens.py +31 -0
  46. {elementpath-4.5.0 → elementpath-4.6.0}/tox.ini +14 -9
  47. elementpath-4.5.0/elementpath/regex/__init__.py +0 -24
  48. elementpath-4.5.0/elementpath/regex/codepoints.py +0 -126
  49. elementpath-4.5.0/elementpath/regex/generate_categories.py +0 -116
  50. elementpath-4.5.0/elementpath/regex/unicode_subsets.py +0 -519
  51. {elementpath-4.5.0 → elementpath-4.6.0}/.coveragerc +0 -0
  52. {elementpath-4.5.0 → elementpath-4.6.0}/LICENSE +0 -0
  53. {elementpath-4.5.0 → elementpath-4.6.0}/README.rst +0 -0
  54. {elementpath-4.5.0 → elementpath-4.6.0}/doc/Makefile +0 -0
  55. {elementpath-4.5.0 → elementpath-4.6.0}/doc/advanced.rst +0 -0
  56. {elementpath-4.5.0 → elementpath-4.6.0}/doc/index.rst +0 -0
  57. {elementpath-4.5.0 → elementpath-4.6.0}/doc/introduction.rst +0 -0
  58. {elementpath-4.5.0 → elementpath-4.6.0}/doc/make.bat +0 -0
  59. {elementpath-4.5.0 → elementpath-4.6.0}/doc/pratt_api.rst +0 -0
  60. {elementpath-4.5.0 → elementpath-4.6.0}/doc/requirements.txt +0 -0
  61. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/_typing.py +0 -0
  62. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/aliases.py +0 -0
  63. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/collations.py +0 -0
  64. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/compare.py +0 -0
  65. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/datatypes/__init__.py +0 -0
  66. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/datatypes/atomic_types.py +0 -0
  67. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/datatypes/binary.py +1 -1
  68. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/datatypes/datetime.py +1 -1
  69. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/datatypes/numeric.py +1 -1
  70. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/datatypes/proxies.py +0 -0
  71. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/datatypes/untyped.py +0 -0
  72. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/decoder.py +0 -0
  73. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/etree.py +0 -0
  74. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/py.typed +0 -0
  75. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/schema_proxy.py +0 -0
  76. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/serialization.py +0 -0
  77. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/validators/__init__.py +0 -0
  78. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/validators/analyze-string.xsd +0 -0
  79. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/validators/schema-for-json.xsd +0 -0
  80. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/xpath1/__init__.py +0 -0
  81. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/xpath1/_xpath1_axes.py +0 -0
  82. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/xpath1/_xpath1_functions.py +0 -0
  83. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/xpath1/_xpath1_operators.py +0 -0
  84. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/xpath2/__init__.py +0 -0
  85. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/xpath2/_xpath2_constructors.py +0 -0
  86. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/xpath3.py +0 -0
  87. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/xpath30/__init__.py +0 -0
  88. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/xpath30/_translation_maps.py +0 -0
  89. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/xpath30/_xpath30_operators.py +0 -0
  90. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/xpath30/xpath30_helpers.py +0 -0
  91. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/xpath30/xpath30_parser.py +0 -0
  92. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/xpath31/__init__.py +0 -0
  93. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/xpath31/_xpath31_operators.py +0 -0
  94. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/xpath31/xpath31_parser.py +0 -0
  95. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/xpath_context.py +0 -0
  96. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath/xpath_selectors.py +0 -0
  97. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath.egg-info/dependency_links.txt +0 -0
  98. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath.egg-info/requires.txt +0 -0
  99. {elementpath-4.5.0 → elementpath-4.6.0}/elementpath.egg-info/top_level.txt +0 -0
  100. {elementpath-4.5.0 → elementpath-4.6.0}/mypy.ini +0 -0
  101. {elementpath-4.5.0 → elementpath-4.6.0}/setup.cfg +0 -0
  102. {elementpath-4.5.0 → elementpath-4.6.0}/tests/__init__.py +0 -0
  103. {elementpath-4.5.0 → elementpath-4.6.0}/tests/execute_w3c_tests.py +0 -0
  104. {elementpath-4.5.0 → elementpath-4.6.0}/tests/memory_profiling.py +0 -0
  105. {elementpath-4.5.0 → elementpath-4.6.0}/tests/mypy_tests/protocols.py +0 -0
  106. {elementpath-4.5.0 → elementpath-4.6.0}/tests/resources/analyze-string.xsd +0 -0
  107. {elementpath-4.5.0 → elementpath-4.6.0}/tests/resources/external_entity.xml +0 -0
  108. {elementpath-4.5.0 → elementpath-4.6.0}/tests/resources/sample.xml +0 -0
  109. {elementpath-4.5.0 → elementpath-4.6.0}/tests/resources/schema-for-json.xsd +0 -0
  110. {elementpath-4.5.0 → elementpath-4.6.0}/tests/resources/unparsed_entity.xml +0 -0
  111. {elementpath-4.5.0 → elementpath-4.6.0}/tests/resources/unused_external_entity.xml +0 -0
  112. {elementpath-4.5.0 → elementpath-4.6.0}/tests/resources/unused_unparsed_entity.xml +0 -0
  113. {elementpath-4.5.0 → elementpath-4.6.0}/tests/resources/with_entity.xml +0 -0
  114. {elementpath-4.5.0 → elementpath-4.6.0}/tests/test_collations.py +0 -0
  115. {elementpath-4.5.0 → elementpath-4.6.0}/tests/test_compare.py +0 -0
  116. {elementpath-4.5.0 → elementpath-4.6.0}/tests/test_datatypes.py +0 -0
  117. {elementpath-4.5.0 → elementpath-4.6.0}/tests/test_elementpath.py +0 -0
  118. {elementpath-4.5.0 → elementpath-4.6.0}/tests/test_etree.py +0 -0
  119. {elementpath-4.5.0 → elementpath-4.6.0}/tests/test_exceptions.py +0 -0
  120. {elementpath-4.5.0 → elementpath-4.6.0}/tests/test_namespaces.py +0 -0
  121. {elementpath-4.5.0 → elementpath-4.6.0}/tests/test_package.py +0 -0
  122. {elementpath-4.5.0 → elementpath-4.6.0}/tests/test_schema_context.py +0 -0
  123. {elementpath-4.5.0 → elementpath-4.6.0}/tests/test_schema_proxy.py +0 -0
  124. {elementpath-4.5.0 → elementpath-4.6.0}/tests/test_selectors.py +0 -0
  125. {elementpath-4.5.0 → elementpath-4.6.0}/tests/test_sequence_types.py +0 -0
  126. {elementpath-4.5.0 → elementpath-4.6.0}/tests/test_serialization.py +0 -0
  127. {elementpath-4.5.0 → elementpath-4.6.0}/tests/test_tdop_parser.py +0 -0
  128. {elementpath-4.5.0 → elementpath-4.6.0}/tests/test_validators.py +0 -0
  129. {elementpath-4.5.0 → elementpath-4.6.0}/tests/test_xpath2_constructors.py +0 -0
  130. {elementpath-4.5.0 → elementpath-4.6.0}/tests/test_xpath2_functions.py +0 -0
  131. {elementpath-4.5.0 → elementpath-4.6.0}/tests/test_xpath30.py +0 -0
  132. {elementpath-4.5.0 → elementpath-4.6.0}/tests/test_xpath31.py +0 -0
  133. {elementpath-4.5.0 → elementpath-4.6.0}/tests/test_xpath_context.py +0 -0
  134. {elementpath-4.5.0 → elementpath-4.6.0}/tests/test_xpath_nodes.py +0 -0
  135. {elementpath-4.5.0 → elementpath-4.6.0}/tests/xpath_test_class.py +0 -0
@@ -2,6 +2,13 @@
2
2
  CHANGELOG
3
3
  *********
4
4
 
5
+ `v4.6.0`_ (2024-10-27)
6
+ ======================
7
+ * Fix XsdAttributeGroupProtocol
8
+ * Improve Unicode support with installable UnicodeData.txt versions
9
+ * Extend names disambiguation with a fix for issue #78
10
+ * Refactor tree builders to fix document position of tails (issue #79)
11
+
5
12
  `v4.5.0`_ (2024-09-09)
6
13
  ======================
7
14
  * Fix and clean node trees iteration methods (issue #72)
@@ -469,4 +476,5 @@ CHANGELOG
469
476
  .. _v4.2.1: https://github.com/sissaschool/elementpath/compare/v4.2.0...v4.2.1
470
477
  .. _v4.3.0: https://github.com/sissaschool/elementpath/compare/v4.2.1...v4.3.0
471
478
  .. _v4.4.0: https://github.com/sissaschool/elementpath/compare/v4.3.0...v4.4.0
472
- .. _v4.4.1: https://github.com/sissaschool/elementpath/compare/v4.4.0...v4.5.0
479
+ .. _v4.5.0: https://github.com/sissaschool/elementpath/compare/v4.4.0...v4.5.0
480
+ .. _v4.6.0: https://github.com/sissaschool/elementpath/compare/v4.5.0...v4.6.0
@@ -11,6 +11,7 @@ include mypy.ini
11
11
  include doc/*
12
12
 
13
13
  recursive-include elementpath *
14
+ recursive-include scripts *
14
15
  recursive-include tests *
15
16
  recursive-exclude tests/.mypy_cache *
16
17
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: elementpath
3
- Version: 4.5.0
3
+ Version: 4.6.0
4
4
  Summary: XPath 1.0/2.0/3.0/3.1 parsers and selectors for ElementTree and lxml
5
5
  Home-page: https://github.com/sissaschool/elementpath
6
6
  Author: Davide Brunato
@@ -22,6 +22,7 @@ Classifier: Programming Language :: Python :: 3.10
22
22
  Classifier: Programming Language :: Python :: 3.11
23
23
  Classifier: Programming Language :: Python :: 3.12
24
24
  Classifier: Programming Language :: Python :: 3.13
25
+ Classifier: Programming Language :: Python :: 3.14
25
26
  Classifier: Programming Language :: Python :: Implementation :: CPython
26
27
  Classifier: Programming Language :: Python :: Implementation :: PyPy
27
28
  Classifier: Topic :: Software Development :: Libraries
@@ -29,9 +29,9 @@ copyright = '2018-2024, SISSA (International School for Advanced Studies)'
29
29
  author = 'Davide Brunato'
30
30
 
31
31
  # The short X.Y version
32
- version = '4.5'
32
+ version = '4.6'
33
33
  # The full version, including alpha/beta/rc tags
34
- release = '4.5.0'
34
+ release = '4.6.0'
35
35
 
36
36
  # -- General configuration ---------------------------------------------------
37
37
 
@@ -147,6 +147,8 @@ XPath regular expressions
147
147
  =========================
148
148
 
149
149
  .. autofunction:: elementpath.translate_pattern
150
+ .. autofunction:: elementpath.install_unicode_data
151
+ .. autofunction:: elementpath.unicode_version
150
152
 
151
153
 
152
154
  Exception classes
@@ -154,6 +156,7 @@ Exception classes
154
156
 
155
157
  .. autoexception:: elementpath.ElementPathError
156
158
  .. autoexception:: elementpath.MissingContextError
159
+ .. autoexception:: elementpath.UnsupportedFeatureError
157
160
  .. autoexception:: elementpath.RegexError
158
161
  .. autoexception:: elementpath.ElementPathLocaleError
159
162
 
@@ -7,7 +7,7 @@
7
7
  #
8
8
  # @author Davide Brunato <brunato@sissa.it>
9
9
  #
10
- __version__ = '4.5.0'
10
+ __version__ = '4.6.0'
11
11
  __author__ = "Davide Brunato"
12
12
  __contact__ = "brunato@sissa.it"
13
13
  __copyright__ = "Copyright 2018-2024, SISSA"
@@ -23,7 +23,7 @@ from . import protocols # Protocols for type annotations
23
23
  from .exceptions import ElementPathError, MissingContextError, ElementPathKeyError, \
24
24
  ElementPathZeroDivisionError, ElementPathNameError, ElementPathOverflowError, \
25
25
  ElementPathRuntimeError, ElementPathSyntaxError, ElementPathTypeError, \
26
- ElementPathValueError, ElementPathLocaleError
26
+ ElementPathValueError, ElementPathLocaleError, UnsupportedFeatureError
27
27
 
28
28
  from .xpath_context import XPathContext, XPathSchemaContext
29
29
  from .xpath_nodes import XPathNode, DocumentNode, ElementNode, AttributeNode, \
@@ -36,10 +36,11 @@ from .xpath1 import XPath1Parser
36
36
  from .xpath2 import XPath2Parser
37
37
  from .xpath_selectors import select, iter_select, Selector
38
38
  from .schema_proxy import AbstractSchemaProxy
39
- from .regex import RegexError, translate_pattern
39
+ from .regex import RegexError, translate_pattern, install_unicode_data, unicode_version
40
40
 
41
41
  __all__ = ['datatypes', 'protocols', 'etree', 'ElementPathError', 'MissingContextError',
42
- 'ElementPathKeyError', 'ElementPathZeroDivisionError', 'ElementPathNameError',
42
+ 'UnsupportedFeatureError', 'ElementPathKeyError',
43
+ 'ElementPathZeroDivisionError', 'ElementPathNameError',
43
44
  'ElementPathOverflowError', 'ElementPathRuntimeError', 'ElementPathSyntaxError',
44
45
  'ElementPathTypeError', 'ElementPathValueError', 'ElementPathLocaleError',
45
46
  'XPathContext', 'XPathSchemaContext', 'XPathNode', 'DocumentNode',
@@ -48,4 +49,5 @@ __all__ = ['datatypes', 'protocols', 'etree', 'ElementPathError', 'MissingContex
48
49
  'SchemaElementNode', 'get_node_tree', 'build_node_tree',
49
50
  'build_lxml_node_tree', 'build_schema_node_tree', 'XPathToken',
50
51
  'XPathFunction', 'XPath1Parser', 'XPath2Parser', 'select', 'iter_select',
51
- 'Selector', 'AbstractSchemaProxy', 'RegexError', 'translate_pattern']
52
+ 'Selector', 'AbstractSchemaProxy', 'RegexError', 'translate_pattern',
53
+ 'install_unicode_data', 'unicode_version']
@@ -7,9 +7,9 @@
7
7
  #
8
8
  # @author Davide Brunato <brunato@sissa.it>
9
9
  #
10
+ import re
10
11
  from typing import Any, Optional
11
12
 
12
- from elementpath.helpers import QNAME_PATTERN
13
13
  from .atomic_types import AnyAtomicType
14
14
  from .untyped import UntypedAtomic
15
15
 
@@ -22,7 +22,10 @@ class AbstractQName(AnyAtomicType):
22
22
  URI if a prefixed name is provided for the 2nd argument.
23
23
  :param qname: the prefixed name or a local name.
24
24
  """
25
- pattern = QNAME_PATTERN
25
+ pattern = re.compile(
26
+ r'^(?:(?P<prefix>[^\d\W][\w\-.\u00B7\u0300-\u036F\u0387\u06DD\u06DE\u203F\u2040]*):)?'
27
+ r'(?P<local>[^\d\W][\w\-.\u00B7\u0300-\u036F\u0387\u06DD\u06DE\u203F\u2040]*)$',
28
+ )
26
29
 
27
30
  def __new__(cls, *args: Any, **kwargs: Any) -> 'AbstractQName':
28
31
  if cls.__name__ == 'Notation':
@@ -10,7 +10,7 @@
10
10
  import re
11
11
  from typing import Any
12
12
 
13
- from elementpath.helpers import NORMALIZE_PATTERN, collapse_white_spaces
13
+ from elementpath.helpers import collapse_white_spaces, Patterns
14
14
  from .atomic_types import AnyAtomicType
15
15
 
16
16
 
@@ -20,7 +20,7 @@ class NormalizedString(str, AnyAtomicType):
20
20
 
21
21
  def __new__(cls, obj: Any) -> 'NormalizedString':
22
22
  try:
23
- return super().__new__(cls, NORMALIZE_PATTERN.sub(' ', obj))
23
+ return super().__new__(cls, Patterns.normalize.sub(' ', obj))
24
24
  except TypeError:
25
25
  return super().__new__(cls, obj)
26
26
 
@@ -41,7 +41,7 @@ class XsdToken(NormalizedString):
41
41
  match = cls.pattern.match(value)
42
42
  if match is None:
43
43
  raise ValueError('invalid value {!r} for xs:{}'.format(value, cls.name))
44
- return super(NormalizedString, cls).__new__(cls, value)
44
+ return super(NormalizedString, cls).__new__(cls, value) # noqa
45
45
 
46
46
 
47
47
  class Language(XsdToken):
@@ -59,7 +59,7 @@ class Language(XsdToken):
59
59
  match = cls.pattern.match(value)
60
60
  if match is None:
61
61
  raise ValueError('invalid value {!r} for xs:{}'.format(value, cls.name))
62
- return super(NormalizedString, cls).__new__(cls, value)
62
+ return super(NormalizedString, cls).__new__(cls, value) # noqa
63
63
 
64
64
 
65
65
  class Name(XsdToken):
@@ -11,7 +11,7 @@ from decimal import Decimal
11
11
  from urllib.parse import urlparse
12
12
  from typing import Union
13
13
 
14
- from elementpath.helpers import collapse_white_spaces, WRONG_ESCAPE_PATTERN
14
+ from elementpath.helpers import collapse_white_spaces, Patterns
15
15
  from .atomic_types import AnyAtomicType
16
16
  from .untyped import UntypedAtomic
17
17
  from .numeric import Integer
@@ -110,6 +110,6 @@ class AnyURI(AnyAtomicType):
110
110
  elif value.count('#') > 1:
111
111
  msg = 'invalid value {!r} for xs:{} (too many # characters)'
112
112
  raise ValueError(msg.format(value, cls.name))
113
- elif WRONG_ESCAPE_PATTERN.search(value) is not None:
113
+ elif Patterns.wrong_escape.search(value) is not None:
114
114
  msg = 'invalid value {!r} for xs:{} (wrong escaping)'
115
115
  raise ValueError(msg.format(value, cls.name))
@@ -52,6 +52,10 @@ class MissingContextError(ElementPathError):
52
52
  """Raised when the dynamic context is required for evaluate the XPath expression."""
53
53
 
54
54
 
55
+ class UnsupportedFeatureError(ElementPathError, NotImplementedError):
56
+ """Raised when an XPath feature is not supported in the current context."""
57
+
58
+
55
59
  class ElementPathKeyError(ElementPathError, KeyError):
56
60
  pass
57
61
 
@@ -12,10 +12,10 @@ import math
12
12
  from calendar import isleap, leapdays
13
13
  from decimal import Decimal
14
14
  from operator import attrgetter
15
- from typing import Any, List, Optional, Union, SupportsFloat
15
+ from typing import Any, List, Optional, overload, SupportsFloat, Type, Union
16
16
  from urllib.parse import urlsplit
17
17
 
18
- from elementpath._typing import Iterator, Match
18
+ from elementpath._typing import Iterator, Match, Pattern
19
19
 
20
20
  ###
21
21
  # Common sets constants
@@ -26,23 +26,68 @@ INVALID_NUMERIC = frozenset(
26
26
  ('inf', '+inf', '-inf', 'nan', 'infinity', '+infinity', '-infinity')
27
27
  )
28
28
 
29
+
29
30
  ###
30
- # Data validation helpers
31
-
32
- NORMALIZE_PATTERN = re.compile(r'[^\S\xa0]')
33
- WHITESPACES_PATTERN = re.compile(r'[^\S\xa0]+') # include ASCII 160 (non-breaking space)
34
- NCNAME_PATTERN = re.compile(r'^[^\d\W][\w.\-\u00B7\u0300-\u036F\u203F\u2040]*$')
35
- QNAME_PATTERN = re.compile(
36
- r'^(?:(?P<prefix>[^\d\W][\w\-.\u00B7\u0300-\u036F\u0387\u06DD\u06DE\u203F\u2040]*):)?'
37
- r'(?P<local>[^\d\W][\w\-.\u00B7\u0300-\u036F\u0387\u06DD\u06DE\u203F\u2040]*)$',
38
- )
39
- EQNAME_PATTERN = re.compile(
40
- r'^(?:Q{(?P<namespace>[^}]+)}|'
41
- r'(?P<prefix>[^\d\W][\w\-.\u00B7\u0300-\u036F\u0387\u06DD\u06DE\u203F\u2040]*):)?'
42
- r'(?P<local>[^\d\W][\w\-.\u00B7\u0300-\u036F\u0387\u06DD\u06DE\u203F\u2040]*)$',
43
- )
44
- WRONG_ESCAPE_PATTERN = re.compile(r'%(?![a-fA-F\d]{2})')
45
- XML_NEWLINES_PATTERN = re.compile('\r\n|\r|\n')
31
+ # Data validation patterns
32
+
33
+ class LazyPattern:
34
+ """
35
+ A descriptor for creating lazy regexp patterns. The compiled pattern is built
36
+ only when the descriptor attribute is accessed (e.g. a hasattr() call).
37
+ """
38
+ _compiled: Pattern[str]
39
+
40
+ def __init__(self, pattern: str, flags: Union[int, re.RegexFlag] = 0) -> None:
41
+ self._pattern = pattern
42
+ self._flags = flags
43
+
44
+ def __set_name__(self, owner: Type[Any], name: str) -> None:
45
+ self._name = name
46
+
47
+ @overload
48
+ def __get__(self, instance: None, owner: Type[Any]) -> Pattern[str]: ...
49
+
50
+ @overload
51
+ def __get__(self, instance: Any, owner: Type[Any]) -> Pattern[str]: ...
52
+
53
+ def __get__(self, instance: Optional[Any], owner: Type[Any]) -> Pattern[str]:
54
+ try:
55
+ return self._compiled
56
+ except AttributeError:
57
+ self._compiled = re.compile(self._pattern, self._flags)
58
+ return self._compiled
59
+
60
+ def __set__(self, instance: Any, value: Any) -> None:
61
+ raise AttributeError("Can't set attribute {}".format(self._name))
62
+
63
+ def __delete__(self, instance: Any) -> None:
64
+ raise AttributeError("Can't delete attribute {}".format(self._name))
65
+
66
+
67
+ class Patterns:
68
+ """
69
+ Helper patterns, the ones that aren't used at import time are defined lazy.
70
+ """
71
+ whitespaces = re.compile(r'[^\S\xa0]+') # include ASCII 160 (non-breaking space)
72
+ normalize = LazyPattern(r'[^\S\xa0]')
73
+ ncname = LazyPattern(r'^[^\d\W][\w.\-\u00B7\u0300-\u036F\u203F\u2040]*$')
74
+ extended_qname = LazyPattern(
75
+ r'^(?:Q{(?P<namespace>[^}]+)}|'
76
+ r'(?P<prefix>[^\d\W][\w\-.\u00B7\u0300-\u036F\u0387\u06DD\u06DE\u203F\u2040]*):)?'
77
+ r'(?P<local>[^\d\W][\w\-.\u00B7\u0300-\u036F\u0387\u06DD\u06DE\u203F\u2040]*)$',
78
+ )
79
+ replacement = LazyPattern(r'^([^\\$]|\\{2}|\\\$|\$\d+)*$')
80
+ sequence_type = LazyPattern(r'\s?([()?*+,])\s?')
81
+ unicode_escape = LazyPattern(r'(?:\\u([0-9A-Fa-f]{4})|\\U([0-9A-Fa-f]{8}))')
82
+ wrong_escape = LazyPattern(r'%(?![a-fA-F\d]{2})')
83
+ xml_newlines = LazyPattern('\r\n|\r|\n')
84
+
85
+ # Regex patterns related to names and namespaces
86
+ namespace_uri = LazyPattern(r'{([^}]+)}')
87
+ expanded_name = LazyPattern(
88
+ r'^(?:{(?P<namespace>[^}]+)})?'
89
+ r'(?P<local>[^\d\W][\w\-.\u00B7\u0300-\u036F\u0387\u06DD\u06DE\u203F\u2040]*)$',
90
+ )
46
91
 
47
92
 
48
93
  def upper_camel_case(s: str) -> str:
@@ -50,16 +95,16 @@ def upper_camel_case(s: str) -> str:
50
95
 
51
96
 
52
97
  def collapse_white_spaces(s: str) -> str:
53
- return WHITESPACES_PATTERN.sub(' ', s).strip(' ')
98
+ return Patterns.whitespaces.sub(' ', s).strip(' ')
54
99
 
55
100
 
56
101
  def is_ncname(s: str) -> bool:
57
- return re.match(r'^[^\d\W][\w.\-\u00B7\u0300-\u036F\u203F\u2040]*$', s) is not None
102
+ return Patterns.ncname.match(s) is not None
58
103
 
59
104
 
60
105
  def is_idrefs(value: Optional[str]) -> bool:
61
106
  return isinstance(value, str) and \
62
- all(NCNAME_PATTERN.match(x) is not None for x in value.split())
107
+ all(Patterns.ncname.match(x) is not None for x in value.split())
63
108
 
64
109
 
65
110
  node_position = attrgetter('position')
@@ -243,7 +288,8 @@ def escape_json_string(s: str, escaped: bool = False) -> str:
243
288
  def unescape_json_string(s: str) -> str:
244
289
 
245
290
  def unicode_escape_callback(match: Match[str]) -> str:
246
- return chr(int(match.group(1).upper(), 16))
291
+ group = match.group(1) or match.group(2)
292
+ return chr(int(group.upper(), 16))
247
293
 
248
294
  s = s.replace('\\"', '\"').\
249
295
  replace(r'\b', '\b').\
@@ -254,7 +300,7 @@ def unescape_json_string(s: str) -> str:
254
300
  replace(r'\/', '/').\
255
301
  replace('\\\\', '\\')
256
302
 
257
- return re.sub(r'\\u([0-9A-Fa-f]{4})', unicode_escape_callback, s)
303
+ return Patterns.unicode_escape.sub(unicode_escape_callback, s)
258
304
 
259
305
 
260
306
  def iter_sequence(obj: Any) -> Iterator[Any]:
@@ -7,17 +7,10 @@
7
7
  #
8
8
  # @author Davide Brunato <brunato@sissa.it>
9
9
  #
10
- import re
11
10
  from typing import cast, Tuple, Union
12
11
 
13
12
  from elementpath.aliases import NamespacesType, NsmapType
14
-
15
- # Regex patterns related to names and namespaces
16
- NAMESPACE_URI_PATTERN = re.compile(r'{([^}]+)}')
17
- EXPANDED_NAME_PATTERN = re.compile(
18
- r'^(?:{(?P<namespace>[^}]+)})?'
19
- r'(?P<local>[^\d\W][\w\-.\u00B7\u0300-\u036F\u0387\u06DD\u06DE\u203F\u2040]*)$',
20
- )
13
+ from elementpath.helpers import Patterns
21
14
 
22
15
  # Namespaces
23
16
  XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
@@ -70,13 +63,13 @@ XSD_NUMERIC = '{%s}numeric' % XSD_NAMESPACE
70
63
 
71
64
  def get_namespace(name: str) -> str:
72
65
  try:
73
- return NAMESPACE_URI_PATTERN.match(name).group(1) # type: ignore[union-attr]
66
+ return Patterns.namespace_uri.match(name).group(1) # type: ignore[union-attr]
74
67
  except AttributeError:
75
68
  return ''
76
69
 
77
70
 
78
71
  def split_expanded_name(name: str) -> Tuple[str, str]:
79
- match = EXPANDED_NAME_PATTERN.match(name)
72
+ match = Patterns.expanded_name.match(name)
80
73
  if match is None:
81
74
  raise ValueError(f"{name!r} is not an expanded QName")
82
75
  namespace, local_name = match.groups()
@@ -272,7 +272,7 @@ XsdXPathNodeType = Union['XsdSchemaProtocol', 'XsdElementProtocol']
272
272
  class XsdAttributeGroupProtocol(XsdComponentProtocol, Protocol):
273
273
 
274
274
  @overload
275
- def get(self, key: Optional[str], default: None) -> Optional[XsdAttributeProtocol]: ...
275
+ def get(self, key: Optional[str]) -> Optional[XsdAttributeProtocol]: ...
276
276
 
277
277
  @overload
278
278
  def get(self, key: Optional[str], default: _T) -> Union[XsdAttributeProtocol, _T]: ...
@@ -0,0 +1,25 @@
1
+ #
2
+ # Copyright (c), 2018-2020, SISSA (International School for Advanced Studies).
3
+ # All rights reserved.
4
+ # This file is distributed under the terms of the MIT License.
5
+ # See the file 'LICENSE' in the root directory of the present
6
+ # distribution, or http://opensource.org/licenses/MIT.
7
+ #
8
+ # @author Davide Brunato <brunato@sissa.it>
9
+ #
10
+ """
11
+ Subpackage for processing XML regular expressions and for converting them to
12
+ Python-compatible regexps.
13
+
14
+ XPath/XQuery/XML-Schema regexp flavors are supported through translate_pattern()
15
+ API options. Default options process XPath/XQuery patterns.
16
+ """
17
+ from .codepoints import RegexError, iter_code_points
18
+ from .unicode_subsets import UnicodeSubset, UnicodeData, install_unicode_data, \
19
+ unicode_version, unicode_subset, lazy_subset, unicode_category, unicode_block
20
+ from .character_classes import CharacterClass
21
+ from .patterns import translate_pattern
22
+
23
+ __all__ = ['translate_pattern', 'RegexError', 'UnicodeSubset', 'UnicodeData',
24
+ 'install_unicode_data', 'unicode_version', 'unicode_subset', 'lazy_subset',
25
+ 'unicode_category', 'unicode_block', 'CharacterClass', 'iter_code_points']
@@ -8,14 +8,14 @@
8
8
  # @author Davide Brunato <brunato@sissa.it>
9
9
  #
10
10
  import re
11
- from itertools import chain
12
11
  from sys import maxunicode
13
12
  from collections import Counter
14
- from typing import AbstractSet, Any, Optional, Union
13
+ from itertools import chain
14
+ from typing import AbstractSet, Any, Callable, Dict, Optional, Union
15
15
 
16
16
  from elementpath._typing import Iterator, MutableSet
17
- from .unicode_subsets import RegexError, UnicodeSubset, UNICODE_CATEGORIES, unicode_subset
18
-
17
+ from .codepoints import RegexError
18
+ from .unicode_subsets import UnicodeSubset, lazy_subset, unicode_subset, unicode_category
19
19
 
20
20
  I_SHORTCUT_REPLACE = (
21
21
  ":A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF"
@@ -27,20 +27,34 @@ C_SHORTCUT_REPLACE = (
27
27
  "\u200D\u203F\u2040\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD"
28
28
  )
29
29
 
30
- S_SHORTCUT_SET = UnicodeSubset(' \n\t\r')
31
- D_SHORTCUT_SET = UnicodeSubset()
32
- D_SHORTCUT_SET._codepoints = UNICODE_CATEGORIES['Nd'].codepoints
33
- I_SHORTCUT_SET = UnicodeSubset(I_SHORTCUT_REPLACE)
34
- C_SHORTCUT_SET = UnicodeSubset(C_SHORTCUT_REPLACE)
35
- W_SHORTCUT_SET = UnicodeSubset(chain(
36
- UNICODE_CATEGORIES['L'].codepoints,
37
- UNICODE_CATEGORIES['M'].codepoints,
38
- UNICODE_CATEGORIES['N'].codepoints,
39
- UNICODE_CATEGORIES['S'].codepoints
40
- ))
30
+
31
+ @lazy_subset
32
+ def c_shortcut() -> UnicodeSubset:
33
+ return UnicodeSubset(C_SHORTCUT_REPLACE)
34
+
35
+
36
+ @lazy_subset
37
+ def i_shortcut() -> UnicodeSubset:
38
+ return UnicodeSubset(I_SHORTCUT_REPLACE)
39
+
40
+
41
+ @lazy_subset
42
+ def s_shortcut() -> UnicodeSubset:
43
+ return UnicodeSubset(' \t\n\r')
44
+
45
+
46
+ @lazy_subset
47
+ def d_shortcut() -> UnicodeSubset:
48
+ return unicode_category('Nd')
49
+
50
+
51
+ @lazy_subset
52
+ def w_shortcut() -> UnicodeSubset:
53
+ return UnicodeSubset(chain.from_iterable(unicode_category(x) for x in 'LMNS'))
54
+
41
55
 
42
56
  # Single and Multi character escapes
43
- CHARACTER_ESCAPES = {
57
+ CHARACTER_ESCAPES: Dict[str, Union[str, Callable[[], UnicodeSubset]]] = {
44
58
  # Single-character escapes
45
59
  '\\n': '\n',
46
60
  '\\r': '\r',
@@ -61,16 +75,16 @@ CHARACTER_ESCAPES = {
61
75
  '\\\\': '\\',
62
76
 
63
77
  # Multi-character escapes
64
- '\\s': S_SHORTCUT_SET,
65
- '\\S': S_SHORTCUT_SET,
66
- '\\d': D_SHORTCUT_SET,
67
- '\\D': D_SHORTCUT_SET,
68
- '\\i': I_SHORTCUT_SET,
69
- '\\I': I_SHORTCUT_SET,
70
- '\\c': C_SHORTCUT_SET,
71
- '\\C': C_SHORTCUT_SET,
72
- '\\w': W_SHORTCUT_SET,
73
- '\\W': W_SHORTCUT_SET,
78
+ '\\s': s_shortcut,
79
+ '\\S': s_shortcut,
80
+ '\\d': d_shortcut,
81
+ '\\D': d_shortcut,
82
+ '\\i': i_shortcut,
83
+ '\\I': i_shortcut,
84
+ '\\c': c_shortcut,
85
+ '\\C': c_shortcut,
86
+ '\\w': w_shortcut,
87
+ '\\W': w_shortcut,
74
88
  }
75
89
 
76
90
 
@@ -83,7 +97,7 @@ class CharacterClass(MutableSet[int]):
83
97
  TODO: implement __ior__, __iand__, __ixor__ operators for a full mutable set class.
84
98
  """
85
99
  _re_char_set = re.compile(r'(?<!.-)(\\[nrt|.\-^?*+{}()\]sSdDiIcCwW]|\\[pP]{[a-zA-Z\-0-9]+})')
86
- _re_unicode_ref = re.compile(r'\\([pP]){([\w\d-]+)}')
100
+ _re_unicode_ref = re.compile(r'\\([pP]){([\w-]+)}')
87
101
 
88
102
  __slots__ = 'xsd_version', 'positive', 'negative'
89
103
 
@@ -138,17 +152,17 @@ class CharacterClass(MutableSet[int]):
138
152
  return len(self.positive)
139
153
 
140
154
  def __isub__(self, other: AbstractSet[Any]) -> 'CharacterClass':
141
- if not isinstance(other, CharacterClass):
142
- return NotImplemented
143
- elif self.negative:
144
- if other.negative:
145
- self.positive |= (other.negative - self.negative)
146
- self.negative.clear()
147
- self.negative |= other.positive
148
- elif other.negative:
149
- self.positive &= other.negative
150
- self.positive -= other.positive
151
- return self
155
+ if isinstance(other, CharacterClass):
156
+ if self.negative:
157
+ if other.negative:
158
+ self.positive |= (other.negative - self.negative)
159
+ self.negative.clear()
160
+ self.negative |= other.positive
161
+ elif other.negative:
162
+ self.positive &= other.negative
163
+ self.positive -= other.positive
164
+ return self
165
+ return NotImplemented
152
166
 
153
167
  def __sub__(self, other: AbstractSet[Any]) -> 'CharacterClass':
154
168
  obj = self.__copy__()
@@ -164,9 +178,9 @@ class CharacterClass(MutableSet[int]):
164
178
  if isinstance(value, str):
165
179
  self.positive.update(value)
166
180
  elif part[-1].islower():
167
- self.positive |= value
181
+ self.positive |= value()
168
182
  else:
169
- self.negative |= value
183
+ self.negative |= value()
170
184
  elif part.startswith('\\p') or part.startswith('\\P'):
171
185
  if self._re_unicode_ref.search(part) is None:
172
186
  raise RegexError("wrong Unicode block specification %r" % part)
@@ -198,11 +212,11 @@ class CharacterClass(MutableSet[int]):
198
212
  if self.negative:
199
213
  self.negative.update(value)
200
214
  elif part[-1].islower():
201
- self.positive -= value
215
+ self.positive -= value()
202
216
  if self.negative:
203
- self.negative |= value
217
+ self.negative |= value()
204
218
  else:
205
- self.positive &= value
219
+ self.positive &= value()
206
220
  self.negative.clear()
207
221
 
208
222
  elif part.startswith('\\p') or part.startswith('\\P'):
@@ -232,4 +246,4 @@ class CharacterClass(MutableSet[int]):
232
246
  if self.positive or self.negative:
233
247
  self.positive, self.negative = self.negative, self.positive
234
248
  else:
235
- self.positive.codepoints.append((0, maxunicode + 1))
249
+ self.positive.codepoints = [(0, maxunicode + 1)]