elementpath 4.5.0__tar.gz → 4.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (135) hide show
  1. {elementpath-4.5.0 → elementpath-4.7.0}/CHANGELOG.rst +15 -1
  2. {elementpath-4.5.0 → elementpath-4.7.0}/MANIFEST.in +1 -0
  3. {elementpath-4.5.0 → elementpath-4.7.0}/PKG-INFO +2 -1
  4. {elementpath-4.5.0 → elementpath-4.7.0}/doc/conf.py +2 -2
  5. {elementpath-4.5.0 → elementpath-4.7.0}/doc/xpath_api.rst +3 -0
  6. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/__init__.py +7 -5
  7. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/datatypes/qname.py +5 -2
  8. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/datatypes/string.py +4 -4
  9. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/datatypes/uri.py +2 -2
  10. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/etree.py +26 -6
  11. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/exceptions.py +4 -0
  12. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/helpers.py +69 -23
  13. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/namespaces.py +3 -10
  14. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/protocols.py +1 -1
  15. elementpath-4.7.0/elementpath/regex/__init__.py +25 -0
  16. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/regex/character_classes.py +58 -44
  17. elementpath-4.7.0/elementpath/regex/codepoints.py +206 -0
  18. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/regex/patterns.py +6 -6
  19. elementpath-4.7.0/elementpath/regex/unicode_blocks.py +450 -0
  20. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/regex/unicode_categories.py +418 -5
  21. elementpath-4.7.0/elementpath/regex/unicode_subsets.py +639 -0
  22. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/sequence_types.py +7 -9
  23. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/tdop.py +8 -7
  24. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/tree_builders.py +195 -163
  25. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/xpath1/xpath1_parser.py +56 -11
  26. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/xpath2/_xpath2_constructors.py +14 -0
  27. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/xpath2/_xpath2_functions.py +3 -6
  28. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/xpath2/_xpath2_operators.py +7 -2
  29. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/xpath2/xpath2_parser.py +14 -18
  30. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/xpath30/_xpath30_functions.py +30 -27
  31. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/xpath31/_xpath31_functions.py +4 -3
  32. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/xpath31/_xpath31_operators.py +2 -0
  33. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/xpath_context.py +17 -22
  34. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/xpath_nodes.py +64 -57
  35. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/xpath_selectors.py +59 -29
  36. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/xpath_tokens.py +8 -11
  37. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath.egg-info/PKG-INFO +2 -1
  38. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath.egg-info/SOURCES.txt +3 -1
  39. {elementpath-4.5.0 → elementpath-4.7.0}/requirements-dev.txt +1 -1
  40. elementpath-4.7.0/scripts/generate_codepoints.py +406 -0
  41. {elementpath-4.5.0 → elementpath-4.7.0}/setup.py +2 -1
  42. elementpath-4.7.0/tests/mypy_tests/advanced.py +30 -0
  43. {elementpath-4.5.0 → elementpath-4.7.0}/tests/mypy_tests/selectors.py +15 -0
  44. {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_helpers.py +15 -2
  45. {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_regex.py +222 -48
  46. {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_selectors.py +44 -0
  47. {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_tree_builders.py +67 -3
  48. {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_typing.py +8 -0
  49. {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_xpath1_parser.py +22 -9
  50. {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_xpath2_parser.py +42 -2
  51. {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_xpath31.py +33 -0
  52. {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_xpath_tokens.py +31 -0
  53. {elementpath-4.5.0 → elementpath-4.7.0}/tox.ini +15 -9
  54. elementpath-4.5.0/elementpath/regex/__init__.py +0 -24
  55. elementpath-4.5.0/elementpath/regex/codepoints.py +0 -126
  56. elementpath-4.5.0/elementpath/regex/generate_categories.py +0 -116
  57. elementpath-4.5.0/elementpath/regex/unicode_subsets.py +0 -519
  58. {elementpath-4.5.0 → elementpath-4.7.0}/.coveragerc +0 -0
  59. {elementpath-4.5.0 → elementpath-4.7.0}/LICENSE +0 -0
  60. {elementpath-4.5.0 → elementpath-4.7.0}/README.rst +0 -0
  61. {elementpath-4.5.0 → elementpath-4.7.0}/doc/Makefile +0 -0
  62. {elementpath-4.5.0 → elementpath-4.7.0}/doc/advanced.rst +0 -0
  63. {elementpath-4.5.0 → elementpath-4.7.0}/doc/index.rst +0 -0
  64. {elementpath-4.5.0 → elementpath-4.7.0}/doc/introduction.rst +0 -0
  65. {elementpath-4.5.0 → elementpath-4.7.0}/doc/make.bat +0 -0
  66. {elementpath-4.5.0 → elementpath-4.7.0}/doc/pratt_api.rst +0 -0
  67. {elementpath-4.5.0 → elementpath-4.7.0}/doc/requirements.txt +0 -0
  68. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/_typing.py +0 -0
  69. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/aliases.py +0 -0
  70. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/collations.py +0 -0
  71. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/compare.py +0 -0
  72. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/datatypes/__init__.py +0 -0
  73. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/datatypes/atomic_types.py +0 -0
  74. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/datatypes/binary.py +1 -1
  75. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/datatypes/datetime.py +1 -1
  76. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/datatypes/numeric.py +1 -1
  77. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/datatypes/proxies.py +0 -0
  78. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/datatypes/untyped.py +0 -0
  79. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/decoder.py +0 -0
  80. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/py.typed +0 -0
  81. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/schema_proxy.py +0 -0
  82. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/serialization.py +0 -0
  83. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/validators/__init__.py +0 -0
  84. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/validators/analyze-string.xsd +0 -0
  85. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/validators/schema-for-json.xsd +0 -0
  86. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/xpath1/__init__.py +0 -0
  87. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/xpath1/_xpath1_axes.py +0 -0
  88. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/xpath1/_xpath1_functions.py +0 -0
  89. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/xpath1/_xpath1_operators.py +0 -0
  90. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/xpath2/__init__.py +0 -0
  91. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/xpath3.py +0 -0
  92. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/xpath30/__init__.py +0 -0
  93. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/xpath30/_translation_maps.py +0 -0
  94. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/xpath30/_xpath30_operators.py +0 -0
  95. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/xpath30/xpath30_helpers.py +0 -0
  96. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/xpath30/xpath30_parser.py +0 -0
  97. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/xpath31/__init__.py +0 -0
  98. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath/xpath31/xpath31_parser.py +0 -0
  99. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath.egg-info/dependency_links.txt +0 -0
  100. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath.egg-info/requires.txt +0 -0
  101. {elementpath-4.5.0 → elementpath-4.7.0}/elementpath.egg-info/top_level.txt +0 -0
  102. {elementpath-4.5.0 → elementpath-4.7.0}/mypy.ini +0 -0
  103. {elementpath-4.5.0 → elementpath-4.7.0}/setup.cfg +0 -0
  104. {elementpath-4.5.0 → elementpath-4.7.0}/tests/__init__.py +0 -0
  105. {elementpath-4.5.0 → elementpath-4.7.0}/tests/execute_w3c_tests.py +0 -0
  106. {elementpath-4.5.0 → elementpath-4.7.0}/tests/memory_profiling.py +0 -0
  107. {elementpath-4.5.0 → elementpath-4.7.0}/tests/mypy_tests/protocols.py +0 -0
  108. {elementpath-4.5.0 → elementpath-4.7.0}/tests/resources/analyze-string.xsd +0 -0
  109. {elementpath-4.5.0 → elementpath-4.7.0}/tests/resources/external_entity.xml +0 -0
  110. {elementpath-4.5.0 → elementpath-4.7.0}/tests/resources/sample.xml +0 -0
  111. {elementpath-4.5.0 → elementpath-4.7.0}/tests/resources/schema-for-json.xsd +0 -0
  112. {elementpath-4.5.0 → elementpath-4.7.0}/tests/resources/unparsed_entity.xml +0 -0
  113. {elementpath-4.5.0 → elementpath-4.7.0}/tests/resources/unused_external_entity.xml +0 -0
  114. {elementpath-4.5.0 → elementpath-4.7.0}/tests/resources/unused_unparsed_entity.xml +0 -0
  115. {elementpath-4.5.0 → elementpath-4.7.0}/tests/resources/with_entity.xml +0 -0
  116. {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_collations.py +0 -0
  117. {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_compare.py +0 -0
  118. {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_datatypes.py +0 -0
  119. {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_elementpath.py +0 -0
  120. {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_etree.py +0 -0
  121. {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_exceptions.py +0 -0
  122. {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_namespaces.py +0 -0
  123. {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_package.py +0 -0
  124. {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_schema_context.py +0 -0
  125. {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_schema_proxy.py +0 -0
  126. {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_sequence_types.py +0 -0
  127. {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_serialization.py +0 -0
  128. {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_tdop_parser.py +0 -0
  129. {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_validators.py +0 -0
  130. {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_xpath2_constructors.py +0 -0
  131. {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_xpath2_functions.py +0 -0
  132. {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_xpath30.py +0 -0
  133. {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_xpath_context.py +0 -0
  134. {elementpath-4.5.0 → elementpath-4.7.0}/tests/test_xpath_nodes.py +0 -0
  135. {elementpath-4.5.0 → elementpath-4.7.0}/tests/xpath_test_class.py +0 -0
@@ -2,6 +2,18 @@
2
2
  CHANGELOG
3
3
  *********
4
4
 
5
+ `v4.7.0`_ (2024-12-20)
6
+ ======================
7
+ * Fix *fragment* argument usage (issue #81)
8
+ * Fix constructors nud() to skip argument check with XP31+ arrow operator (issue #83)
9
+
10
+ `v4.6.0`_ (2024-10-27)
11
+ ======================
12
+ * Fix XsdAttributeGroupProtocol
13
+ * Improve Unicode support with installable UnicodeData.txt versions
14
+ * Extend names disambiguation with a fix for issue #78
15
+ * Refactor tree builders to fix document position of tails (issue #79)
16
+
5
17
  `v4.5.0`_ (2024-09-09)
6
18
  ======================
7
19
  * Fix and clean node trees iteration methods (issue #72)
@@ -469,4 +481,6 @@ CHANGELOG
469
481
  .. _v4.2.1: https://github.com/sissaschool/elementpath/compare/v4.2.0...v4.2.1
470
482
  .. _v4.3.0: https://github.com/sissaschool/elementpath/compare/v4.2.1...v4.3.0
471
483
  .. _v4.4.0: https://github.com/sissaschool/elementpath/compare/v4.3.0...v4.4.0
472
- .. _v4.4.1: https://github.com/sissaschool/elementpath/compare/v4.4.0...v4.5.0
484
+ .. _v4.5.0: https://github.com/sissaschool/elementpath/compare/v4.4.0...v4.5.0
485
+ .. _v4.6.0: https://github.com/sissaschool/elementpath/compare/v4.5.0...v4.6.0
486
+ .. _v4.7.0: https://github.com/sissaschool/elementpath/compare/v4.6.0...v4.7.0
@@ -11,6 +11,7 @@ include mypy.ini
11
11
  include doc/*
12
12
 
13
13
  recursive-include elementpath *
14
+ recursive-include scripts *
14
15
  recursive-include tests *
15
16
  recursive-exclude tests/.mypy_cache *
16
17
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: elementpath
3
- Version: 4.5.0
3
+ Version: 4.7.0
4
4
  Summary: XPath 1.0/2.0/3.0/3.1 parsers and selectors for ElementTree and lxml
5
5
  Home-page: https://github.com/sissaschool/elementpath
6
6
  Author: Davide Brunato
@@ -22,6 +22,7 @@ Classifier: Programming Language :: Python :: 3.10
22
22
  Classifier: Programming Language :: Python :: 3.11
23
23
  Classifier: Programming Language :: Python :: 3.12
24
24
  Classifier: Programming Language :: Python :: 3.13
25
+ Classifier: Programming Language :: Python :: 3.14
25
26
  Classifier: Programming Language :: Python :: Implementation :: CPython
26
27
  Classifier: Programming Language :: Python :: Implementation :: PyPy
27
28
  Classifier: Topic :: Software Development :: Libraries
@@ -29,9 +29,9 @@ copyright = '2018-2024, SISSA (International School for Advanced Studies)'
29
29
  author = 'Davide Brunato'
30
30
 
31
31
  # The short X.Y version
32
- version = '4.5'
32
+ version = '4.7'
33
33
  # The full version, including alpha/beta/rc tags
34
- release = '4.5.0'
34
+ release = '4.7.0'
35
35
 
36
36
  # -- General configuration ---------------------------------------------------
37
37
 
@@ -147,6 +147,8 @@ XPath regular expressions
147
147
  =========================
148
148
 
149
149
  .. autofunction:: elementpath.translate_pattern
150
+ .. autofunction:: elementpath.install_unicode_data
151
+ .. autofunction:: elementpath.unicode_version
150
152
 
151
153
 
152
154
  Exception classes
@@ -154,6 +156,7 @@ Exception classes
154
156
 
155
157
  .. autoexception:: elementpath.ElementPathError
156
158
  .. autoexception:: elementpath.MissingContextError
159
+ .. autoexception:: elementpath.UnsupportedFeatureError
157
160
  .. autoexception:: elementpath.RegexError
158
161
  .. autoexception:: elementpath.ElementPathLocaleError
159
162
 
@@ -7,7 +7,7 @@
7
7
  #
8
8
  # @author Davide Brunato <brunato@sissa.it>
9
9
  #
10
- __version__ = '4.5.0'
10
+ __version__ = '4.7.0'
11
11
  __author__ = "Davide Brunato"
12
12
  __contact__ = "brunato@sissa.it"
13
13
  __copyright__ = "Copyright 2018-2024, SISSA"
@@ -23,7 +23,7 @@ from . import protocols # Protocols for type annotations
23
23
  from .exceptions import ElementPathError, MissingContextError, ElementPathKeyError, \
24
24
  ElementPathZeroDivisionError, ElementPathNameError, ElementPathOverflowError, \
25
25
  ElementPathRuntimeError, ElementPathSyntaxError, ElementPathTypeError, \
26
- ElementPathValueError, ElementPathLocaleError
26
+ ElementPathValueError, ElementPathLocaleError, UnsupportedFeatureError
27
27
 
28
28
  from .xpath_context import XPathContext, XPathSchemaContext
29
29
  from .xpath_nodes import XPathNode, DocumentNode, ElementNode, AttributeNode, \
@@ -36,10 +36,11 @@ from .xpath1 import XPath1Parser
36
36
  from .xpath2 import XPath2Parser
37
37
  from .xpath_selectors import select, iter_select, Selector
38
38
  from .schema_proxy import AbstractSchemaProxy
39
- from .regex import RegexError, translate_pattern
39
+ from .regex import RegexError, translate_pattern, install_unicode_data, unicode_version
40
40
 
41
41
  __all__ = ['datatypes', 'protocols', 'etree', 'ElementPathError', 'MissingContextError',
42
- 'ElementPathKeyError', 'ElementPathZeroDivisionError', 'ElementPathNameError',
42
+ 'UnsupportedFeatureError', 'ElementPathKeyError',
43
+ 'ElementPathZeroDivisionError', 'ElementPathNameError',
43
44
  'ElementPathOverflowError', 'ElementPathRuntimeError', 'ElementPathSyntaxError',
44
45
  'ElementPathTypeError', 'ElementPathValueError', 'ElementPathLocaleError',
45
46
  'XPathContext', 'XPathSchemaContext', 'XPathNode', 'DocumentNode',
@@ -48,4 +49,5 @@ __all__ = ['datatypes', 'protocols', 'etree', 'ElementPathError', 'MissingContex
48
49
  'SchemaElementNode', 'get_node_tree', 'build_node_tree',
49
50
  'build_lxml_node_tree', 'build_schema_node_tree', 'XPathToken',
50
51
  'XPathFunction', 'XPath1Parser', 'XPath2Parser', 'select', 'iter_select',
51
- 'Selector', 'AbstractSchemaProxy', 'RegexError', 'translate_pattern']
52
+ 'Selector', 'AbstractSchemaProxy', 'RegexError', 'translate_pattern',
53
+ 'install_unicode_data', 'unicode_version']
@@ -7,9 +7,9 @@
7
7
  #
8
8
  # @author Davide Brunato <brunato@sissa.it>
9
9
  #
10
+ import re
10
11
  from typing import Any, Optional
11
12
 
12
- from elementpath.helpers import QNAME_PATTERN
13
13
  from .atomic_types import AnyAtomicType
14
14
  from .untyped import UntypedAtomic
15
15
 
@@ -22,7 +22,10 @@ class AbstractQName(AnyAtomicType):
22
22
  URI if a prefixed name is provided for the 2nd argument.
23
23
  :param qname: the prefixed name or a local name.
24
24
  """
25
- pattern = QNAME_PATTERN
25
+ pattern = re.compile(
26
+ r'^(?:(?P<prefix>[^\d\W][\w\-.\u00B7\u0300-\u036F\u0387\u06DD\u06DE\u203F\u2040]*):)?'
27
+ r'(?P<local>[^\d\W][\w\-.\u00B7\u0300-\u036F\u0387\u06DD\u06DE\u203F\u2040]*)$',
28
+ )
26
29
 
27
30
  def __new__(cls, *args: Any, **kwargs: Any) -> 'AbstractQName':
28
31
  if cls.__name__ == 'Notation':
@@ -10,7 +10,7 @@
10
10
  import re
11
11
  from typing import Any
12
12
 
13
- from elementpath.helpers import NORMALIZE_PATTERN, collapse_white_spaces
13
+ from elementpath.helpers import collapse_white_spaces, Patterns
14
14
  from .atomic_types import AnyAtomicType
15
15
 
16
16
 
@@ -20,7 +20,7 @@ class NormalizedString(str, AnyAtomicType):
20
20
 
21
21
  def __new__(cls, obj: Any) -> 'NormalizedString':
22
22
  try:
23
- return super().__new__(cls, NORMALIZE_PATTERN.sub(' ', obj))
23
+ return super().__new__(cls, Patterns.normalize.sub(' ', obj))
24
24
  except TypeError:
25
25
  return super().__new__(cls, obj)
26
26
 
@@ -41,7 +41,7 @@ class XsdToken(NormalizedString):
41
41
  match = cls.pattern.match(value)
42
42
  if match is None:
43
43
  raise ValueError('invalid value {!r} for xs:{}'.format(value, cls.name))
44
- return super(NormalizedString, cls).__new__(cls, value)
44
+ return super(NormalizedString, cls).__new__(cls, value) # noqa
45
45
 
46
46
 
47
47
  class Language(XsdToken):
@@ -59,7 +59,7 @@ class Language(XsdToken):
59
59
  match = cls.pattern.match(value)
60
60
  if match is None:
61
61
  raise ValueError('invalid value {!r} for xs:{}'.format(value, cls.name))
62
- return super(NormalizedString, cls).__new__(cls, value)
62
+ return super(NormalizedString, cls).__new__(cls, value) # noqa
63
63
 
64
64
 
65
65
  class Name(XsdToken):
@@ -11,7 +11,7 @@ from decimal import Decimal
11
11
  from urllib.parse import urlparse
12
12
  from typing import Union
13
13
 
14
- from elementpath.helpers import collapse_white_spaces, WRONG_ESCAPE_PATTERN
14
+ from elementpath.helpers import collapse_white_spaces, Patterns
15
15
  from .atomic_types import AnyAtomicType
16
16
  from .untyped import UntypedAtomic
17
17
  from .numeric import Integer
@@ -110,6 +110,6 @@ class AnyURI(AnyAtomicType):
110
110
  elif value.count('#') > 1:
111
111
  msg = 'invalid value {!r} for xs:{} (too many # characters)'
112
112
  raise ValueError(msg.format(value, cls.name))
113
- elif WRONG_ESCAPE_PATTERN.search(value) is not None:
113
+ elif Patterns.wrong_escape.search(value) is not None:
114
114
  msg = 'invalid value {!r} for xs:{} (wrong escaping)'
115
115
  raise ValueError(msg.format(value, cls.name))
@@ -106,7 +106,17 @@ def is_etree_element(obj: Any) -> bool:
106
106
 
107
107
 
108
108
  def is_lxml_etree_element(obj: Any) -> bool:
109
- return is_etree_element(obj) and hasattr(obj, 'getparent') and hasattr(obj, 'nsmap')
109
+ return is_etree_element(obj) and \
110
+ hasattr(obj, 'getparent') and \
111
+ hasattr(obj, 'nsmap') and \
112
+ obj.__class__.__module__ in ('lxml.etree', 'lxml.html')
113
+
114
+
115
+ def is_etree_element_instance(obj: Any) -> bool:
116
+ """Strictly checks that the objects is an ElementTree or lxml.etree Element."""
117
+ return isinstance(obj, ElementTree.Element) or \
118
+ isinstance(obj, PyElementTree.Element) or \
119
+ is_lxml_etree_element(obj)
110
120
 
111
121
 
112
122
  def is_etree_document(obj: Any) -> bool:
@@ -114,7 +124,17 @@ def is_etree_document(obj: Any) -> bool:
114
124
 
115
125
 
116
126
  def is_lxml_etree_document(obj: Any) -> bool:
117
- return is_etree_document(obj) and hasattr(obj, 'xpath') and hasattr(obj, 'xslt')
127
+ return is_etree_document(obj) and \
128
+ hasattr(obj, 'xpath') and \
129
+ hasattr(obj, 'xslt') and \
130
+ obj.__class__.__module__ in ('lxml.etree', 'lxml.html')
131
+
132
+
133
+ def is_etree_document_instance(obj: Any) -> bool:
134
+ """Strictly checks that the objects is an ElementTree or lxml.etree document."""
135
+ return isinstance(obj, ElementTree.ElementTree) or \
136
+ isinstance(obj, PyElementTree.ElementTree) or \
137
+ is_lxml_etree_document(obj)
118
138
 
119
139
 
120
140
  def etree_iter_strings(elem: Union[DocumentProtocol, ElementProtocol],
@@ -237,7 +257,7 @@ def etree_tostring(elem: ElementProtocol,
237
257
  return indent + line
238
258
 
239
259
  etree_module: Any
240
- if not is_etree_element(elem):
260
+ if not is_etree_element_instance(elem):
241
261
  raise TypeError(f"{elem!r} is not an Element")
242
262
  elif isinstance(elem, PyElementTree.Element):
243
263
  etree_module = PyElementTree
@@ -308,6 +328,6 @@ def etree_tostring(elem: ElementProtocol,
308
328
 
309
329
 
310
330
  __all__ = ['ElementTree', 'PyElementTree', 'SafeXMLParser', 'defuse_xml',
311
- 'is_etree_element', 'is_lxml_etree_element', 'is_etree_document',
312
- 'is_lxml_etree_document', 'etree_iter_strings', 'etree_deep_equal',
313
- 'etree_iter_paths', 'etree_tostring']
331
+ 'is_etree_element', 'is_lxml_etree_element', 'is_etree_element_instance',
332
+ 'is_etree_document', 'is_lxml_etree_document', 'is_etree_document_instance',
333
+ 'etree_iter_strings', 'etree_deep_equal', 'etree_iter_paths', 'etree_tostring']
@@ -52,6 +52,10 @@ class MissingContextError(ElementPathError):
52
52
  """Raised when the dynamic context is required for evaluate the XPath expression."""
53
53
 
54
54
 
55
+ class UnsupportedFeatureError(ElementPathError, NotImplementedError):
56
+ """Raised when an XPath feature is not supported in the current context."""
57
+
58
+
55
59
  class ElementPathKeyError(ElementPathError, KeyError):
56
60
  pass
57
61
 
@@ -12,10 +12,10 @@ import math
12
12
  from calendar import isleap, leapdays
13
13
  from decimal import Decimal
14
14
  from operator import attrgetter
15
- from typing import Any, List, Optional, Union, SupportsFloat
15
+ from typing import Any, List, Optional, overload, SupportsFloat, Type, Union
16
16
  from urllib.parse import urlsplit
17
17
 
18
- from elementpath._typing import Iterator, Match
18
+ from elementpath._typing import Iterator, Match, Pattern
19
19
 
20
20
  ###
21
21
  # Common sets constants
@@ -26,23 +26,68 @@ INVALID_NUMERIC = frozenset(
26
26
  ('inf', '+inf', '-inf', 'nan', 'infinity', '+infinity', '-infinity')
27
27
  )
28
28
 
29
+
29
30
  ###
30
- # Data validation helpers
31
-
32
- NORMALIZE_PATTERN = re.compile(r'[^\S\xa0]')
33
- WHITESPACES_PATTERN = re.compile(r'[^\S\xa0]+') # include ASCII 160 (non-breaking space)
34
- NCNAME_PATTERN = re.compile(r'^[^\d\W][\w.\-\u00B7\u0300-\u036F\u203F\u2040]*$')
35
- QNAME_PATTERN = re.compile(
36
- r'^(?:(?P<prefix>[^\d\W][\w\-.\u00B7\u0300-\u036F\u0387\u06DD\u06DE\u203F\u2040]*):)?'
37
- r'(?P<local>[^\d\W][\w\-.\u00B7\u0300-\u036F\u0387\u06DD\u06DE\u203F\u2040]*)$',
38
- )
39
- EQNAME_PATTERN = re.compile(
40
- r'^(?:Q{(?P<namespace>[^}]+)}|'
41
- r'(?P<prefix>[^\d\W][\w\-.\u00B7\u0300-\u036F\u0387\u06DD\u06DE\u203F\u2040]*):)?'
42
- r'(?P<local>[^\d\W][\w\-.\u00B7\u0300-\u036F\u0387\u06DD\u06DE\u203F\u2040]*)$',
43
- )
44
- WRONG_ESCAPE_PATTERN = re.compile(r'%(?![a-fA-F\d]{2})')
45
- XML_NEWLINES_PATTERN = re.compile('\r\n|\r|\n')
31
+ # Data validation patterns
32
+
33
+ class LazyPattern:
34
+ """
35
+ A descriptor for creating lazy regexp patterns. The compiled pattern is built
36
+ only when the descriptor attribute is accessed (e.g. a hasattr() call).
37
+ """
38
+ _compiled: Pattern[str]
39
+
40
+ def __init__(self, pattern: str, flags: Union[int, re.RegexFlag] = 0) -> None:
41
+ self._pattern = pattern
42
+ self._flags = flags
43
+
44
+ def __set_name__(self, owner: Type[Any], name: str) -> None:
45
+ self._name = name
46
+
47
+ @overload
48
+ def __get__(self, instance: None, owner: Type[Any]) -> Pattern[str]: ...
49
+
50
+ @overload
51
+ def __get__(self, instance: Any, owner: Type[Any]) -> Pattern[str]: ...
52
+
53
+ def __get__(self, instance: Optional[Any], owner: Type[Any]) -> Pattern[str]:
54
+ try:
55
+ return self._compiled
56
+ except AttributeError:
57
+ self._compiled = re.compile(self._pattern, self._flags)
58
+ return self._compiled
59
+
60
+ def __set__(self, instance: Any, value: Any) -> None:
61
+ raise AttributeError("Can't set attribute {}".format(self._name))
62
+
63
+ def __delete__(self, instance: Any) -> None:
64
+ raise AttributeError("Can't delete attribute {}".format(self._name))
65
+
66
+
67
+ class Patterns:
68
+ """
69
+ Helper patterns, the ones that aren't used at import time are defined lazy.
70
+ """
71
+ whitespaces = re.compile(r'[^\S\xa0]+') # include ASCII 160 (non-breaking space)
72
+ normalize = LazyPattern(r'[^\S\xa0]')
73
+ ncname = LazyPattern(r'^[^\d\W][\w.\-\u00B7\u0300-\u036F\u203F\u2040]*$')
74
+ extended_qname = LazyPattern(
75
+ r'^(?:Q{(?P<namespace>[^}]+)}|'
76
+ r'(?P<prefix>[^\d\W][\w\-.\u00B7\u0300-\u036F\u0387\u06DD\u06DE\u203F\u2040]*):)?'
77
+ r'(?P<local>[^\d\W][\w\-.\u00B7\u0300-\u036F\u0387\u06DD\u06DE\u203F\u2040]*)$',
78
+ )
79
+ replacement = LazyPattern(r'^([^\\$]|\\{2}|\\\$|\$\d+)*$')
80
+ sequence_type = LazyPattern(r'\s?([()?*+,])\s?')
81
+ unicode_escape = LazyPattern(r'(?:\\u([0-9A-Fa-f]{4})|\\U([0-9A-Fa-f]{8}))')
82
+ wrong_escape = LazyPattern(r'%(?![a-fA-F\d]{2})')
83
+ xml_newlines = LazyPattern('\r\n|\r|\n')
84
+
85
+ # Regex patterns related to names and namespaces
86
+ namespace_uri = LazyPattern(r'{([^}]+)}')
87
+ expanded_name = LazyPattern(
88
+ r'^(?:{(?P<namespace>[^}]+)})?'
89
+ r'(?P<local>[^\d\W][\w\-.\u00B7\u0300-\u036F\u0387\u06DD\u06DE\u203F\u2040]*)$',
90
+ )
46
91
 
47
92
 
48
93
  def upper_camel_case(s: str) -> str:
@@ -50,16 +95,16 @@ def upper_camel_case(s: str) -> str:
50
95
 
51
96
 
52
97
  def collapse_white_spaces(s: str) -> str:
53
- return WHITESPACES_PATTERN.sub(' ', s).strip(' ')
98
+ return Patterns.whitespaces.sub(' ', s).strip(' ')
54
99
 
55
100
 
56
101
  def is_ncname(s: str) -> bool:
57
- return re.match(r'^[^\d\W][\w.\-\u00B7\u0300-\u036F\u203F\u2040]*$', s) is not None
102
+ return Patterns.ncname.match(s) is not None
58
103
 
59
104
 
60
105
  def is_idrefs(value: Optional[str]) -> bool:
61
106
  return isinstance(value, str) and \
62
- all(NCNAME_PATTERN.match(x) is not None for x in value.split())
107
+ all(Patterns.ncname.match(x) is not None for x in value.split())
63
108
 
64
109
 
65
110
  node_position = attrgetter('position')
@@ -243,7 +288,8 @@ def escape_json_string(s: str, escaped: bool = False) -> str:
243
288
  def unescape_json_string(s: str) -> str:
244
289
 
245
290
  def unicode_escape_callback(match: Match[str]) -> str:
246
- return chr(int(match.group(1).upper(), 16))
291
+ group = match.group(1) or match.group(2)
292
+ return chr(int(group.upper(), 16))
247
293
 
248
294
  s = s.replace('\\"', '\"').\
249
295
  replace(r'\b', '\b').\
@@ -254,7 +300,7 @@ def unescape_json_string(s: str) -> str:
254
300
  replace(r'\/', '/').\
255
301
  replace('\\\\', '\\')
256
302
 
257
- return re.sub(r'\\u([0-9A-Fa-f]{4})', unicode_escape_callback, s)
303
+ return Patterns.unicode_escape.sub(unicode_escape_callback, s)
258
304
 
259
305
 
260
306
  def iter_sequence(obj: Any) -> Iterator[Any]:
@@ -7,17 +7,10 @@
7
7
  #
8
8
  # @author Davide Brunato <brunato@sissa.it>
9
9
  #
10
- import re
11
10
  from typing import cast, Tuple, Union
12
11
 
13
12
  from elementpath.aliases import NamespacesType, NsmapType
14
-
15
- # Regex patterns related to names and namespaces
16
- NAMESPACE_URI_PATTERN = re.compile(r'{([^}]+)}')
17
- EXPANDED_NAME_PATTERN = re.compile(
18
- r'^(?:{(?P<namespace>[^}]+)})?'
19
- r'(?P<local>[^\d\W][\w\-.\u00B7\u0300-\u036F\u0387\u06DD\u06DE\u203F\u2040]*)$',
20
- )
13
+ from elementpath.helpers import Patterns
21
14
 
22
15
  # Namespaces
23
16
  XML_NAMESPACE = "http://www.w3.org/XML/1998/namespace"
@@ -70,13 +63,13 @@ XSD_NUMERIC = '{%s}numeric' % XSD_NAMESPACE
70
63
 
71
64
  def get_namespace(name: str) -> str:
72
65
  try:
73
- return NAMESPACE_URI_PATTERN.match(name).group(1) # type: ignore[union-attr]
66
+ return Patterns.namespace_uri.match(name).group(1) # type: ignore[union-attr]
74
67
  except AttributeError:
75
68
  return ''
76
69
 
77
70
 
78
71
  def split_expanded_name(name: str) -> Tuple[str, str]:
79
- match = EXPANDED_NAME_PATTERN.match(name)
72
+ match = Patterns.expanded_name.match(name)
80
73
  if match is None:
81
74
  raise ValueError(f"{name!r} is not an expanded QName")
82
75
  namespace, local_name = match.groups()
@@ -272,7 +272,7 @@ XsdXPathNodeType = Union['XsdSchemaProtocol', 'XsdElementProtocol']
272
272
  class XsdAttributeGroupProtocol(XsdComponentProtocol, Protocol):
273
273
 
274
274
  @overload
275
- def get(self, key: Optional[str], default: None) -> Optional[XsdAttributeProtocol]: ...
275
+ def get(self, key: Optional[str]) -> Optional[XsdAttributeProtocol]: ...
276
276
 
277
277
  @overload
278
278
  def get(self, key: Optional[str], default: _T) -> Union[XsdAttributeProtocol, _T]: ...
@@ -0,0 +1,25 @@
1
+ #
2
+ # Copyright (c), 2018-2020, SISSA (International School for Advanced Studies).
3
+ # All rights reserved.
4
+ # This file is distributed under the terms of the MIT License.
5
+ # See the file 'LICENSE' in the root directory of the present
6
+ # distribution, or http://opensource.org/licenses/MIT.
7
+ #
8
+ # @author Davide Brunato <brunato@sissa.it>
9
+ #
10
+ """
11
+ Subpackage for processing XML regular expressions and for converting them to
12
+ Python-compatible regexps.
13
+
14
+ XPath/XQuery/XML-Schema regexp flavors are supported through translate_pattern()
15
+ API options. Default options process XPath/XQuery patterns.
16
+ """
17
+ from .codepoints import RegexError, iter_code_points
18
+ from .unicode_subsets import UnicodeSubset, UnicodeData, install_unicode_data, \
19
+ unicode_version, unicode_subset, lazy_subset, unicode_category, unicode_block
20
+ from .character_classes import CharacterClass
21
+ from .patterns import translate_pattern
22
+
23
+ __all__ = ['translate_pattern', 'RegexError', 'UnicodeSubset', 'UnicodeData',
24
+ 'install_unicode_data', 'unicode_version', 'unicode_subset', 'lazy_subset',
25
+ 'unicode_category', 'unicode_block', 'CharacterClass', 'iter_code_points']
@@ -8,14 +8,14 @@
8
8
  # @author Davide Brunato <brunato@sissa.it>
9
9
  #
10
10
  import re
11
- from itertools import chain
12
11
  from sys import maxunicode
13
12
  from collections import Counter
14
- from typing import AbstractSet, Any, Optional, Union
13
+ from itertools import chain
14
+ from typing import AbstractSet, Any, Callable, Dict, Optional, Union
15
15
 
16
16
  from elementpath._typing import Iterator, MutableSet
17
- from .unicode_subsets import RegexError, UnicodeSubset, UNICODE_CATEGORIES, unicode_subset
18
-
17
+ from .codepoints import RegexError
18
+ from .unicode_subsets import UnicodeSubset, lazy_subset, unicode_subset, unicode_category
19
19
 
20
20
  I_SHORTCUT_REPLACE = (
21
21
  ":A-Z_a-z\u00C0-\u00D6\u00D8-\u00F6\u00F8-\u02FF\u0370-\u037D\u037F-\u1FFF"
@@ -27,20 +27,34 @@ C_SHORTCUT_REPLACE = (
27
27
  "\u200D\u203F\u2040\u2070-\u218F\u2C00-\u2FEF\u3001-\uD7FF\uF900-\uFDCF\uFDF0-\uFFFD"
28
28
  )
29
29
 
30
- S_SHORTCUT_SET = UnicodeSubset(' \n\t\r')
31
- D_SHORTCUT_SET = UnicodeSubset()
32
- D_SHORTCUT_SET._codepoints = UNICODE_CATEGORIES['Nd'].codepoints
33
- I_SHORTCUT_SET = UnicodeSubset(I_SHORTCUT_REPLACE)
34
- C_SHORTCUT_SET = UnicodeSubset(C_SHORTCUT_REPLACE)
35
- W_SHORTCUT_SET = UnicodeSubset(chain(
36
- UNICODE_CATEGORIES['L'].codepoints,
37
- UNICODE_CATEGORIES['M'].codepoints,
38
- UNICODE_CATEGORIES['N'].codepoints,
39
- UNICODE_CATEGORIES['S'].codepoints
40
- ))
30
+
31
+ @lazy_subset
32
+ def c_shortcut() -> UnicodeSubset:
33
+ return UnicodeSubset(C_SHORTCUT_REPLACE)
34
+
35
+
36
+ @lazy_subset
37
+ def i_shortcut() -> UnicodeSubset:
38
+ return UnicodeSubset(I_SHORTCUT_REPLACE)
39
+
40
+
41
+ @lazy_subset
42
+ def s_shortcut() -> UnicodeSubset:
43
+ return UnicodeSubset(' \t\n\r')
44
+
45
+
46
+ @lazy_subset
47
+ def d_shortcut() -> UnicodeSubset:
48
+ return unicode_category('Nd')
49
+
50
+
51
+ @lazy_subset
52
+ def w_shortcut() -> UnicodeSubset:
53
+ return UnicodeSubset(chain.from_iterable(unicode_category(x) for x in 'LMNS'))
54
+
41
55
 
42
56
  # Single and Multi character escapes
43
- CHARACTER_ESCAPES = {
57
+ CHARACTER_ESCAPES: Dict[str, Union[str, Callable[[], UnicodeSubset]]] = {
44
58
  # Single-character escapes
45
59
  '\\n': '\n',
46
60
  '\\r': '\r',
@@ -61,16 +75,16 @@ CHARACTER_ESCAPES = {
61
75
  '\\\\': '\\',
62
76
 
63
77
  # Multi-character escapes
64
- '\\s': S_SHORTCUT_SET,
65
- '\\S': S_SHORTCUT_SET,
66
- '\\d': D_SHORTCUT_SET,
67
- '\\D': D_SHORTCUT_SET,
68
- '\\i': I_SHORTCUT_SET,
69
- '\\I': I_SHORTCUT_SET,
70
- '\\c': C_SHORTCUT_SET,
71
- '\\C': C_SHORTCUT_SET,
72
- '\\w': W_SHORTCUT_SET,
73
- '\\W': W_SHORTCUT_SET,
78
+ '\\s': s_shortcut,
79
+ '\\S': s_shortcut,
80
+ '\\d': d_shortcut,
81
+ '\\D': d_shortcut,
82
+ '\\i': i_shortcut,
83
+ '\\I': i_shortcut,
84
+ '\\c': c_shortcut,
85
+ '\\C': c_shortcut,
86
+ '\\w': w_shortcut,
87
+ '\\W': w_shortcut,
74
88
  }
75
89
 
76
90
 
@@ -83,7 +97,7 @@ class CharacterClass(MutableSet[int]):
83
97
  TODO: implement __ior__, __iand__, __ixor__ operators for a full mutable set class.
84
98
  """
85
99
  _re_char_set = re.compile(r'(?<!.-)(\\[nrt|.\-^?*+{}()\]sSdDiIcCwW]|\\[pP]{[a-zA-Z\-0-9]+})')
86
- _re_unicode_ref = re.compile(r'\\([pP]){([\w\d-]+)}')
100
+ _re_unicode_ref = re.compile(r'\\([pP]){([\w-]+)}')
87
101
 
88
102
  __slots__ = 'xsd_version', 'positive', 'negative'
89
103
 
@@ -138,17 +152,17 @@ class CharacterClass(MutableSet[int]):
138
152
  return len(self.positive)
139
153
 
140
154
  def __isub__(self, other: AbstractSet[Any]) -> 'CharacterClass':
141
- if not isinstance(other, CharacterClass):
142
- return NotImplemented
143
- elif self.negative:
144
- if other.negative:
145
- self.positive |= (other.negative - self.negative)
146
- self.negative.clear()
147
- self.negative |= other.positive
148
- elif other.negative:
149
- self.positive &= other.negative
150
- self.positive -= other.positive
151
- return self
155
+ if isinstance(other, CharacterClass):
156
+ if self.negative:
157
+ if other.negative:
158
+ self.positive |= (other.negative - self.negative)
159
+ self.negative.clear()
160
+ self.negative |= other.positive
161
+ elif other.negative:
162
+ self.positive &= other.negative
163
+ self.positive -= other.positive
164
+ return self
165
+ return NotImplemented
152
166
 
153
167
  def __sub__(self, other: AbstractSet[Any]) -> 'CharacterClass':
154
168
  obj = self.__copy__()
@@ -164,9 +178,9 @@ class CharacterClass(MutableSet[int]):
164
178
  if isinstance(value, str):
165
179
  self.positive.update(value)
166
180
  elif part[-1].islower():
167
- self.positive |= value
181
+ self.positive |= value()
168
182
  else:
169
- self.negative |= value
183
+ self.negative |= value()
170
184
  elif part.startswith('\\p') or part.startswith('\\P'):
171
185
  if self._re_unicode_ref.search(part) is None:
172
186
  raise RegexError("wrong Unicode block specification %r" % part)
@@ -198,11 +212,11 @@ class CharacterClass(MutableSet[int]):
198
212
  if self.negative:
199
213
  self.negative.update(value)
200
214
  elif part[-1].islower():
201
- self.positive -= value
215
+ self.positive -= value()
202
216
  if self.negative:
203
- self.negative |= value
217
+ self.negative |= value()
204
218
  else:
205
- self.positive &= value
219
+ self.positive &= value()
206
220
  self.negative.clear()
207
221
 
208
222
  elif part.startswith('\\p') or part.startswith('\\P'):
@@ -232,4 +246,4 @@ class CharacterClass(MutableSet[int]):
232
246
  if self.positive or self.negative:
233
247
  self.positive, self.negative = self.negative, self.positive
234
248
  else:
235
- self.positive.codepoints.append((0, maxunicode + 1))
249
+ self.positive.codepoints = [(0, maxunicode + 1)]