junifer 0.0.5.dev219__py3-none-any.whl → 0.0.5.dev242__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- junifer/_version.py +2 -2
- junifer/datagrabber/__init__.py +2 -0
- junifer/datagrabber/base.py +10 -6
- junifer/datagrabber/hcp1200/hcp1200.py +1 -1
- junifer/datagrabber/multiple.py +42 -6
- junifer/datagrabber/pattern.py +33 -10
- junifer/datagrabber/pattern_validation_mixin.py +388 -0
- junifer/datagrabber/tests/test_multiple.py +161 -84
- junifer/datagrabber/tests/{test_datagrabber_utils.py → test_pattern_validation_mixin.py} +133 -108
- junifer/utils/__init__.py +2 -1
- junifer/utils/helpers.py +30 -2
- junifer/utils/logging.py +18 -1
- junifer/utils/tests/test_logging.py +8 -0
- {junifer-0.0.5.dev219.dist-info → junifer-0.0.5.dev242.dist-info}/METADATA +1 -1
- {junifer-0.0.5.dev219.dist-info → junifer-0.0.5.dev242.dist-info}/RECORD +20 -20
- {junifer-0.0.5.dev219.dist-info → junifer-0.0.5.dev242.dist-info}/WHEEL +1 -1
- junifer/datagrabber/utils.py +0 -317
- {junifer-0.0.5.dev219.dist-info → junifer-0.0.5.dev242.dist-info}/AUTHORS.rst +0 -0
- {junifer-0.0.5.dev219.dist-info → junifer-0.0.5.dev242.dist-info}/LICENSE.md +0 -0
- {junifer-0.0.5.dev219.dist-info → junifer-0.0.5.dev242.dist-info}/entry_points.txt +0 -0
- {junifer-0.0.5.dev219.dist-info → junifer-0.0.5.dev242.dist-info}/top_level.txt +0 -0
junifer/_version.py
CHANGED
@@ -12,5 +12,5 @@ __version__: str
|
|
12
12
|
__version_tuple__: VERSION_TUPLE
|
13
13
|
version_tuple: VERSION_TUPLE
|
14
14
|
|
15
|
-
__version__ = version = '0.0.5.
|
16
|
-
__version_tuple__ = version_tuple = (0, 0, 5, '
|
15
|
+
__version__ = version = '0.0.5.dev242'
|
16
|
+
__version_tuple__ = version_tuple = (0, 0, 5, 'dev242')
|
junifer/datagrabber/__init__.py
CHANGED
@@ -17,6 +17,7 @@ from .hcp1200 import HCP1200, DataladHCP1200
|
|
17
17
|
from .multiple import MultipleDataGrabber
|
18
18
|
from .dmcc13_benchmark import DMCC13Benchmark
|
19
19
|
|
20
|
+
from .pattern_validation_mixin import PatternValidationMixin
|
20
21
|
|
21
22
|
__all__ = [
|
22
23
|
"BaseDataGrabber",
|
@@ -30,4 +31,5 @@ __all__ = [
|
|
30
31
|
"DataladHCP1200",
|
31
32
|
"MultipleDataGrabber",
|
32
33
|
"DMCC13Benchmark",
|
34
|
+
"PatternValidationMixin",
|
33
35
|
]
|
junifer/datagrabber/base.py
CHANGED
@@ -11,7 +11,6 @@ from typing import Dict, Iterator, List, Tuple, Union
|
|
11
11
|
|
12
12
|
from ..pipeline import UpdateMetaMixin
|
13
13
|
from ..utils import logger, raise_error
|
14
|
-
from .utils import validate_types
|
15
14
|
|
16
15
|
|
17
16
|
__all__ = ["BaseDataGrabber"]
|
@@ -30,16 +29,21 @@ class BaseDataGrabber(ABC, UpdateMetaMixin):
|
|
30
29
|
datadir : str or pathlib.Path
|
31
30
|
The directory where the data is / will be stored.
|
32
31
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
32
|
+
Raises
|
33
|
+
------
|
34
|
+
TypeError
|
35
|
+
If ``types`` is not a list or if the values are not string.
|
37
36
|
|
38
37
|
"""
|
39
38
|
|
40
39
|
def __init__(self, types: List[str], datadir: Union[str, Path]) -> None:
|
41
40
|
# Validate types
|
42
|
-
|
41
|
+
if not isinstance(types, list):
|
42
|
+
raise_error(msg="`types` must be a list", klass=TypeError)
|
43
|
+
if any(not isinstance(x, str) for x in types):
|
44
|
+
raise_error(
|
45
|
+
msg="`types` must be a list of strings", klass=TypeError
|
46
|
+
)
|
43
47
|
self.types = types
|
44
48
|
|
45
49
|
# Convert str to Path
|
@@ -10,8 +10,8 @@ from pathlib import Path
|
|
10
10
|
from typing import Dict, List, Union
|
11
11
|
|
12
12
|
from ...api.decorators import register_datagrabber
|
13
|
+
from ...utils import raise_error
|
13
14
|
from ..pattern import PatternDataGrabber
|
14
|
-
from ..utils import raise_error
|
15
15
|
|
16
16
|
|
17
17
|
__all__ = ["HCP1200"]
|
junifer/datagrabber/multiple.py
CHANGED
@@ -7,13 +7,15 @@
|
|
7
7
|
|
8
8
|
from typing import Dict, List, Tuple, Union
|
9
9
|
|
10
|
-
from ..
|
10
|
+
from ..api.decorators import register_datagrabber
|
11
|
+
from ..utils import deep_update, raise_error
|
11
12
|
from .base import BaseDataGrabber
|
12
13
|
|
13
14
|
|
14
15
|
__all__ = ["MultipleDataGrabber"]
|
15
16
|
|
16
17
|
|
18
|
+
@register_datagrabber
|
17
19
|
class MultipleDataGrabber(BaseDataGrabber):
|
18
20
|
"""Concrete implementation for multi sourced data fetching.
|
19
21
|
|
@@ -27,19 +29,53 @@ class MultipleDataGrabber(BaseDataGrabber):
|
|
27
29
|
**kwargs
|
28
30
|
Keyword arguments passed to superclass.
|
29
31
|
|
32
|
+
Raises
|
33
|
+
------
|
34
|
+
RuntimeError
|
35
|
+
If ``datagrabbers`` have different element keys or
|
36
|
+
overlapping data types or nested data types.
|
37
|
+
|
30
38
|
"""
|
31
39
|
|
32
40
|
def __init__(self, datagrabbers: List[BaseDataGrabber], **kwargs) -> None:
|
33
41
|
# Check datagrabbers consistency
|
34
|
-
#
|
42
|
+
# Check for same element keys
|
35
43
|
first_keys = datagrabbers[0].get_element_keys()
|
36
44
|
for dg in datagrabbers[1:]:
|
37
45
|
if dg.get_element_keys() != first_keys:
|
38
|
-
raise_error(
|
39
|
-
|
46
|
+
raise_error(
|
47
|
+
msg="DataGrabbers have different element keys",
|
48
|
+
klass=RuntimeError,
|
49
|
+
)
|
50
|
+
# Check for no overlapping types (and nested data types)
|
40
51
|
types = [x for dg in datagrabbers for x in dg.get_types()]
|
41
52
|
if len(types) != len(set(types)):
|
42
|
-
|
53
|
+
if all(hasattr(dg, "patterns") for dg in datagrabbers):
|
54
|
+
first_patterns = datagrabbers[0].patterns
|
55
|
+
for dg in datagrabbers[1:]:
|
56
|
+
for data_type in set(types):
|
57
|
+
dtype_pattern = dg.patterns.get(data_type)
|
58
|
+
if dtype_pattern is None:
|
59
|
+
continue
|
60
|
+
# Check if first-level keys of data type are same
|
61
|
+
if (
|
62
|
+
dtype_pattern.keys()
|
63
|
+
== first_patterns[data_type].keys()
|
64
|
+
):
|
65
|
+
raise_error(
|
66
|
+
msg=(
|
67
|
+
"DataGrabbers have overlapping mandatory "
|
68
|
+
"and / or optional key(s) for data type: "
|
69
|
+
f"`{data_type}`"
|
70
|
+
),
|
71
|
+
klass=RuntimeError,
|
72
|
+
)
|
73
|
+
else:
|
74
|
+
# Can't check further
|
75
|
+
raise_error(
|
76
|
+
msg="DataGrabbers have overlapping types",
|
77
|
+
klass=RuntimeError,
|
78
|
+
)
|
43
79
|
self._datagrabbers = datagrabbers
|
44
80
|
|
45
81
|
def __getitem__(self, element: Union[str, Tuple]) -> Dict:
|
@@ -65,7 +101,7 @@ class MultipleDataGrabber(BaseDataGrabber):
|
|
65
101
|
metas = []
|
66
102
|
for dg in self._datagrabbers:
|
67
103
|
t_out = dg[element]
|
68
|
-
out
|
104
|
+
deep_update(out, t_out)
|
69
105
|
# Now get the meta for this datagrabber
|
70
106
|
t_meta = {}
|
71
107
|
dg.update_meta(t_meta, "datagrabber")
|
junifer/datagrabber/pattern.py
CHANGED
@@ -15,7 +15,7 @@ import numpy as np
|
|
15
15
|
from ..api.decorators import register_datagrabber
|
16
16
|
from ..utils import logger, raise_error
|
17
17
|
from .base import BaseDataGrabber
|
18
|
-
from .
|
18
|
+
from .pattern_validation_mixin import PatternValidationMixin
|
19
19
|
|
20
20
|
|
21
21
|
__all__ = ["PatternDataGrabber"]
|
@@ -26,7 +26,7 @@ _CONFOUNDS_FORMATS = ("fmriprep", "adhoc")
|
|
26
26
|
|
27
27
|
|
28
28
|
@register_datagrabber
|
29
|
-
class PatternDataGrabber(BaseDataGrabber):
|
29
|
+
class PatternDataGrabber(BaseDataGrabber, PatternValidationMixin):
|
30
30
|
"""Concrete implementation for pattern-based data fetching.
|
31
31
|
|
32
32
|
Implements a DataGrabber that understands patterns to grab data.
|
@@ -142,6 +142,13 @@ class PatternDataGrabber(BaseDataGrabber):
|
|
142
142
|
The directory where the data is / will be stored.
|
143
143
|
confounds_format : {"fmriprep", "adhoc"} or None, optional
|
144
144
|
The format of the confounds for the dataset (default None).
|
145
|
+
partial_pattern_ok : bool, optional
|
146
|
+
Whether to raise error if partial pattern for a data type is found.
|
147
|
+
This allows to bypass mandatory key check and issue a warning
|
148
|
+
instead of raising error. This allows one to have a DataGrabber
|
149
|
+
with data types without the corresponding mandatory keys and is
|
150
|
+
powerful when used with :class:`.MultipleDataGrabber`
|
151
|
+
(default True).
|
145
152
|
|
146
153
|
Raises
|
147
154
|
------
|
@@ -157,17 +164,21 @@ class PatternDataGrabber(BaseDataGrabber):
|
|
157
164
|
replacements: Union[List[str], str],
|
158
165
|
datadir: Union[str, Path],
|
159
166
|
confounds_format: Optional[str] = None,
|
167
|
+
partial_pattern_ok: bool = False,
|
160
168
|
) -> None:
|
161
|
-
# Validate patterns
|
162
|
-
validate_patterns(types=types, patterns=patterns)
|
163
|
-
self.patterns = patterns
|
164
|
-
|
165
169
|
# Convert replacements to list if not already
|
166
170
|
if not isinstance(replacements, list):
|
167
171
|
replacements = [replacements]
|
168
|
-
# Validate
|
169
|
-
|
172
|
+
# Validate patterns
|
173
|
+
self.validate_patterns(
|
174
|
+
types=types,
|
175
|
+
replacements=replacements,
|
176
|
+
patterns=patterns,
|
177
|
+
partial_pattern_ok=partial_pattern_ok,
|
178
|
+
)
|
170
179
|
self.replacements = replacements
|
180
|
+
self.patterns = patterns
|
181
|
+
self.partial_pattern_ok = partial_pattern_ok
|
171
182
|
|
172
183
|
# Validate confounds format
|
173
184
|
if (
|
@@ -436,14 +447,26 @@ class PatternDataGrabber(BaseDataGrabber):
|
|
436
447
|
for t_idx in reversed(order):
|
437
448
|
t_type = self.types[t_idx]
|
438
449
|
types_element = set()
|
439
|
-
|
450
|
+
|
451
|
+
# Get the pattern dict
|
440
452
|
t_pattern = self.patterns[t_type]
|
453
|
+
# Conditional fetch of base pattern for getting elements
|
454
|
+
pattern = None
|
455
|
+
# Try for data type pattern
|
456
|
+
pattern = t_pattern.get("pattern")
|
457
|
+
# Try for nested data type pattern
|
458
|
+
if pattern is None and self.partial_pattern_ok:
|
459
|
+
for v in t_pattern.values():
|
460
|
+
if isinstance(v, dict) and "pattern" in v:
|
461
|
+
pattern = v["pattern"]
|
462
|
+
break
|
463
|
+
|
441
464
|
# Replace the pattern
|
442
465
|
(
|
443
466
|
re_pattern,
|
444
467
|
glob_pattern,
|
445
468
|
t_replacements,
|
446
|
-
) = self._replace_patterns_regex(
|
469
|
+
) = self._replace_patterns_regex(pattern)
|
447
470
|
for fname in self.datadir.glob(glob_pattern):
|
448
471
|
suffix = fname.relative_to(self.datadir).as_posix()
|
449
472
|
m = re.match(re_pattern, suffix)
|
@@ -0,0 +1,388 @@
|
|
1
|
+
"""Provide mixin validation class for pattern-based DataGrabber."""
|
2
|
+
|
3
|
+
# Authors: Synchon Mandal <s.mandal@fz-juelich.de>
|
4
|
+
# License: AGPL
|
5
|
+
|
6
|
+
from typing import Dict, List
|
7
|
+
|
8
|
+
from ..utils import logger, raise_error, warn_with_log
|
9
|
+
|
10
|
+
|
11
|
+
__all__ = ["PatternValidationMixin"]
|
12
|
+
|
13
|
+
|
14
|
+
# Define schema for pattern-based datagrabber's patterns
|
15
|
+
PATTERNS_SCHEMA = {
|
16
|
+
"T1w": {
|
17
|
+
"mandatory": ["pattern", "space"],
|
18
|
+
"optional": {
|
19
|
+
"mask": {"mandatory": ["pattern", "space"], "optional": []},
|
20
|
+
},
|
21
|
+
},
|
22
|
+
"T2w": {
|
23
|
+
"mandatory": ["pattern", "space"],
|
24
|
+
"optional": {
|
25
|
+
"mask": {"mandatory": ["pattern", "space"], "optional": []},
|
26
|
+
},
|
27
|
+
},
|
28
|
+
"BOLD": {
|
29
|
+
"mandatory": ["pattern", "space"],
|
30
|
+
"optional": {
|
31
|
+
"mask": {"mandatory": ["pattern", "space"], "optional": []},
|
32
|
+
"confounds": {
|
33
|
+
"mandatory": ["pattern", "format"],
|
34
|
+
"optional": ["mappings"],
|
35
|
+
},
|
36
|
+
},
|
37
|
+
},
|
38
|
+
"Warp": {
|
39
|
+
"mandatory": ["pattern", "src", "dst"],
|
40
|
+
"optional": {},
|
41
|
+
},
|
42
|
+
"VBM_GM": {
|
43
|
+
"mandatory": ["pattern", "space"],
|
44
|
+
"optional": {},
|
45
|
+
},
|
46
|
+
"VBM_WM": {
|
47
|
+
"mandatory": ["pattern", "space"],
|
48
|
+
"optional": {},
|
49
|
+
},
|
50
|
+
"VBM_CSF": {
|
51
|
+
"mandatory": ["pattern", "space"],
|
52
|
+
"optional": {},
|
53
|
+
},
|
54
|
+
"DWI": {
|
55
|
+
"mandatory": ["pattern"],
|
56
|
+
"optional": {},
|
57
|
+
},
|
58
|
+
"FreeSurfer": {
|
59
|
+
"mandatory": ["pattern"],
|
60
|
+
"optional": {
|
61
|
+
"aseg": {"mandatory": ["pattern"], "optional": []},
|
62
|
+
"norm": {"mandatory": ["pattern"], "optional": []},
|
63
|
+
"lh_white": {"mandatory": ["pattern"], "optional": []},
|
64
|
+
"rh_white": {"mandatory": ["pattern"], "optional": []},
|
65
|
+
"lh_pial": {"mandatory": ["pattern"], "optional": []},
|
66
|
+
"rh_pial": {"mandatory": ["pattern"], "optional": []},
|
67
|
+
},
|
68
|
+
},
|
69
|
+
}
|
70
|
+
|
71
|
+
|
72
|
+
class PatternValidationMixin:
|
73
|
+
"""Mixin class for pattern validation."""
|
74
|
+
|
75
|
+
def _validate_types(self, types: List[str]) -> None:
|
76
|
+
"""Validate the types.
|
77
|
+
|
78
|
+
Parameters
|
79
|
+
----------
|
80
|
+
types : list of str
|
81
|
+
The data types to validate.
|
82
|
+
|
83
|
+
Raises
|
84
|
+
------
|
85
|
+
TypeError
|
86
|
+
If ``types`` is not a list or if the values are not string.
|
87
|
+
|
88
|
+
"""
|
89
|
+
if not isinstance(types, list):
|
90
|
+
raise_error(msg="`types` must be a list", klass=TypeError)
|
91
|
+
if any(not isinstance(x, str) for x in types):
|
92
|
+
raise_error(
|
93
|
+
msg="`types` must be a list of strings", klass=TypeError
|
94
|
+
)
|
95
|
+
|
96
|
+
def _validate_replacements(
|
97
|
+
self,
|
98
|
+
replacements: List[str],
|
99
|
+
patterns: Dict[str, Dict[str, str]],
|
100
|
+
partial_pattern_ok: bool,
|
101
|
+
) -> None:
|
102
|
+
"""Validate the replacements.
|
103
|
+
|
104
|
+
Parameters
|
105
|
+
----------
|
106
|
+
replacements : list of str
|
107
|
+
The replacements to validate.
|
108
|
+
patterns : dict
|
109
|
+
The patterns to validate replacements against.
|
110
|
+
partial_pattern_ok : bool
|
111
|
+
Whether to raise error if partial pattern for a data type is found.
|
112
|
+
|
113
|
+
Raises
|
114
|
+
------
|
115
|
+
TypeError
|
116
|
+
If ``replacements`` is not a list or if the values are not string.
|
117
|
+
ValueError
|
118
|
+
If a value in ``replacements`` is not part of a data type pattern
|
119
|
+
and ``partial_pattern_ok=False`` or
|
120
|
+
if no data type patterns contain all values in ``replacements`` and
|
121
|
+
``partial_pattern_ok=False``.
|
122
|
+
|
123
|
+
Warns
|
124
|
+
-----
|
125
|
+
RuntimeWarning
|
126
|
+
If a value in ``replacements`` is not part of the data type pattern
|
127
|
+
and ``partial_pattern_ok=True``.
|
128
|
+
|
129
|
+
"""
|
130
|
+
if not isinstance(replacements, list):
|
131
|
+
raise_error(msg="`replacements` must be a list.", klass=TypeError)
|
132
|
+
|
133
|
+
if any(not isinstance(x, str) for x in replacements):
|
134
|
+
raise_error(
|
135
|
+
msg="`replacements` must be a list of strings.",
|
136
|
+
klass=TypeError,
|
137
|
+
)
|
138
|
+
|
139
|
+
for x in replacements:
|
140
|
+
if all(
|
141
|
+
x not in y
|
142
|
+
for y in [
|
143
|
+
data_type_val.get("pattern", "")
|
144
|
+
for data_type_val in patterns.values()
|
145
|
+
]
|
146
|
+
):
|
147
|
+
if partial_pattern_ok:
|
148
|
+
warn_with_log(
|
149
|
+
f"Replacement: `{x}` is not part of any pattern, "
|
150
|
+
"things might not work as expected if you are unsure "
|
151
|
+
"of what you are doing"
|
152
|
+
)
|
153
|
+
else:
|
154
|
+
raise_error(
|
155
|
+
msg=f"Replacement: {x} is not part of any pattern."
|
156
|
+
)
|
157
|
+
|
158
|
+
# Check that at least one pattern has all the replacements
|
159
|
+
at_least_one = False
|
160
|
+
for data_type_val in patterns.values():
|
161
|
+
if all(
|
162
|
+
x in data_type_val.get("pattern", "") for x in replacements
|
163
|
+
):
|
164
|
+
at_least_one = True
|
165
|
+
if not at_least_one and not partial_pattern_ok:
|
166
|
+
raise_error(
|
167
|
+
msg="At least one pattern must contain all replacements."
|
168
|
+
)
|
169
|
+
|
170
|
+
def _validate_mandatory_keys(
|
171
|
+
self,
|
172
|
+
keys: List[str],
|
173
|
+
schema: List[str],
|
174
|
+
data_type: str,
|
175
|
+
partial_pattern_ok: bool = False,
|
176
|
+
) -> None:
|
177
|
+
"""Validate mandatory keys.
|
178
|
+
|
179
|
+
Parameters
|
180
|
+
----------
|
181
|
+
keys : list of str
|
182
|
+
The keys to validate.
|
183
|
+
schema : list of str
|
184
|
+
The schema to validate against.
|
185
|
+
data_type : str
|
186
|
+
The data type being validated.
|
187
|
+
partial_pattern_ok : bool, optional
|
188
|
+
Whether to raise error if partial pattern for a data type is found
|
189
|
+
(default True).
|
190
|
+
|
191
|
+
Raises
|
192
|
+
------
|
193
|
+
KeyError
|
194
|
+
If any mandatory key is missing for a data type and
|
195
|
+
``partial_pattern_ok=False``.
|
196
|
+
|
197
|
+
Warns
|
198
|
+
-----
|
199
|
+
RuntimeWarning
|
200
|
+
If any mandatory key is missing for a data type and
|
201
|
+
``partial_pattern_ok=True``.
|
202
|
+
|
203
|
+
"""
|
204
|
+
for key in schema:
|
205
|
+
if key not in keys:
|
206
|
+
if partial_pattern_ok:
|
207
|
+
warn_with_log(
|
208
|
+
f"Mandatory key: `{key}` not found for {data_type}, "
|
209
|
+
"things might not work as expected if you are unsure "
|
210
|
+
"of what you are doing"
|
211
|
+
)
|
212
|
+
else:
|
213
|
+
raise_error(
|
214
|
+
msg=f"Mandatory key: `{key}` missing for {data_type}",
|
215
|
+
klass=KeyError,
|
216
|
+
)
|
217
|
+
else:
|
218
|
+
logger.debug(f"Mandatory key: `{key}` found for {data_type}")
|
219
|
+
|
220
|
+
def _identify_stray_keys(
|
221
|
+
self, keys: List[str], schema: List[str], data_type: str
|
222
|
+
) -> None:
|
223
|
+
"""Identify stray keys.
|
224
|
+
|
225
|
+
Parameters
|
226
|
+
----------
|
227
|
+
keys : list of str
|
228
|
+
The keys to check.
|
229
|
+
schema : list of str
|
230
|
+
The schema to check against.
|
231
|
+
data_type : str
|
232
|
+
The data type being checked.
|
233
|
+
|
234
|
+
Raises
|
235
|
+
------
|
236
|
+
RuntimeError
|
237
|
+
If an unknown key is found for a data type.
|
238
|
+
|
239
|
+
"""
|
240
|
+
for key in keys:
|
241
|
+
if key not in schema:
|
242
|
+
raise_error(
|
243
|
+
msg=(
|
244
|
+
f"Key: {key} not accepted for {data_type} "
|
245
|
+
"pattern, remove it to proceed"
|
246
|
+
),
|
247
|
+
klass=RuntimeError,
|
248
|
+
)
|
249
|
+
|
250
|
+
def validate_patterns(
|
251
|
+
self,
|
252
|
+
types: List[str],
|
253
|
+
replacements: List[str],
|
254
|
+
patterns: Dict[str, Dict[str, str]],
|
255
|
+
partial_pattern_ok: bool = False,
|
256
|
+
) -> None:
|
257
|
+
"""Validate the patterns.
|
258
|
+
|
259
|
+
Parameters
|
260
|
+
----------
|
261
|
+
types : list of str
|
262
|
+
The data types to check patterns of.
|
263
|
+
replacements : list of str
|
264
|
+
The replacements to be replaced in the patterns.
|
265
|
+
patterns : dict
|
266
|
+
The patterns to validate.
|
267
|
+
partial_pattern_ok : bool, optional
|
268
|
+
Whether to raise error if partial pattern for a data type is found.
|
269
|
+
If False, a warning is issued instead of raising an error
|
270
|
+
(default False).
|
271
|
+
|
272
|
+
Raises
|
273
|
+
------
|
274
|
+
TypeError
|
275
|
+
If ``patterns`` is not a dictionary.
|
276
|
+
ValueError
|
277
|
+
If length of ``types`` and ``patterns`` are different or
|
278
|
+
if ``patterns`` is missing entries from ``types`` or
|
279
|
+
if unknown data type is found in ``patterns`` or
|
280
|
+
if data type pattern key contains '*' as value.
|
281
|
+
|
282
|
+
"""
|
283
|
+
# Validate types
|
284
|
+
self._validate_types(types=types)
|
285
|
+
|
286
|
+
# Validate patterns
|
287
|
+
if not isinstance(patterns, dict):
|
288
|
+
raise_error(msg="`patterns` must be a dict", klass=TypeError)
|
289
|
+
# Unequal length of objects
|
290
|
+
if len(types) > len(patterns):
|
291
|
+
raise_error(
|
292
|
+
msg="Length of `types` more than that of `patterns`",
|
293
|
+
klass=ValueError,
|
294
|
+
)
|
295
|
+
# Missing type in patterns
|
296
|
+
if any(x not in patterns for x in types):
|
297
|
+
raise_error(
|
298
|
+
msg="`patterns` must contain all `types`", klass=ValueError
|
299
|
+
)
|
300
|
+
# Check against schema
|
301
|
+
for data_type_key, data_type_val in patterns.items():
|
302
|
+
# Check if valid data type is provided
|
303
|
+
if data_type_key not in PATTERNS_SCHEMA:
|
304
|
+
raise_error(
|
305
|
+
f"Unknown data type: {data_type_key}, "
|
306
|
+
f"should be one of: {list(PATTERNS_SCHEMA.keys())}"
|
307
|
+
)
|
308
|
+
# Check mandatory keys for data type
|
309
|
+
self._validate_mandatory_keys(
|
310
|
+
keys=list(data_type_val),
|
311
|
+
schema=PATTERNS_SCHEMA[data_type_key]["mandatory"],
|
312
|
+
data_type=data_type_key,
|
313
|
+
partial_pattern_ok=partial_pattern_ok,
|
314
|
+
)
|
315
|
+
# Check optional keys for data type
|
316
|
+
for optional_key, optional_val in PATTERNS_SCHEMA[data_type_key][
|
317
|
+
"optional"
|
318
|
+
].items():
|
319
|
+
if optional_key not in data_type_val:
|
320
|
+
logger.debug(
|
321
|
+
f"Optional key: `{optional_key}` missing for "
|
322
|
+
f"{data_type_key}"
|
323
|
+
)
|
324
|
+
else:
|
325
|
+
logger.debug(
|
326
|
+
f"Optional key: `{optional_key}` found for "
|
327
|
+
f"{data_type_key}"
|
328
|
+
)
|
329
|
+
# Set nested type name for easier access
|
330
|
+
nested_data_type = f"{data_type_key}.{optional_key}"
|
331
|
+
nested_mandatory_keys_schema = PATTERNS_SCHEMA[
|
332
|
+
data_type_key
|
333
|
+
]["optional"][optional_key]["mandatory"]
|
334
|
+
nested_optional_keys_schema = PATTERNS_SCHEMA[
|
335
|
+
data_type_key
|
336
|
+
]["optional"][optional_key]["optional"]
|
337
|
+
# Check mandatory keys for nested type
|
338
|
+
self._validate_mandatory_keys(
|
339
|
+
keys=list(optional_val["mandatory"]),
|
340
|
+
schema=nested_mandatory_keys_schema,
|
341
|
+
data_type=nested_data_type,
|
342
|
+
partial_pattern_ok=partial_pattern_ok,
|
343
|
+
)
|
344
|
+
# Check optional keys for nested type
|
345
|
+
for nested_optional_key in nested_optional_keys_schema:
|
346
|
+
if nested_optional_key not in optional_val["optional"]:
|
347
|
+
logger.debug(
|
348
|
+
f"Optional key: `{nested_optional_key}` "
|
349
|
+
f"missing for {nested_data_type}"
|
350
|
+
)
|
351
|
+
else:
|
352
|
+
logger.debug(
|
353
|
+
f"Optional key: `{nested_optional_key}` found "
|
354
|
+
f"for {nested_data_type}"
|
355
|
+
)
|
356
|
+
# Check stray key for nested data type
|
357
|
+
self._identify_stray_keys(
|
358
|
+
keys=optional_val["mandatory"]
|
359
|
+
+ optional_val["optional"],
|
360
|
+
schema=nested_mandatory_keys_schema
|
361
|
+
+ nested_optional_keys_schema,
|
362
|
+
data_type=nested_data_type,
|
363
|
+
)
|
364
|
+
# Check stray key for data type
|
365
|
+
self._identify_stray_keys(
|
366
|
+
keys=list(data_type_val.keys()),
|
367
|
+
schema=(
|
368
|
+
PATTERNS_SCHEMA[data_type_key]["mandatory"]
|
369
|
+
+ list(PATTERNS_SCHEMA[data_type_key]["optional"].keys())
|
370
|
+
),
|
371
|
+
data_type=data_type_key,
|
372
|
+
)
|
373
|
+
# Wildcard check in patterns
|
374
|
+
if "}*" in data_type_val.get("pattern", ""):
|
375
|
+
raise_error(
|
376
|
+
msg=(
|
377
|
+
f"`{data_type_key}.pattern` must not contain `*` "
|
378
|
+
"following a replacement"
|
379
|
+
),
|
380
|
+
klass=ValueError,
|
381
|
+
)
|
382
|
+
|
383
|
+
# Validate replacements
|
384
|
+
self._validate_replacements(
|
385
|
+
replacements=replacements,
|
386
|
+
patterns=patterns,
|
387
|
+
partial_pattern_ok=partial_pattern_ok,
|
388
|
+
)
|