scikit-base 0.4.6__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docs/source/conf.py +299 -299
- {scikit_base-0.4.6.dist-info → scikit_base-0.5.1.dist-info}/LICENSE +29 -29
- {scikit_base-0.4.6.dist-info → scikit_base-0.5.1.dist-info}/METADATA +160 -159
- scikit_base-0.5.1.dist-info/RECORD +58 -0
- {scikit_base-0.4.6.dist-info → scikit_base-0.5.1.dist-info}/WHEEL +1 -1
- scikit_base-0.5.1.dist-info/top_level.txt +5 -0
- {scikit_base-0.4.6.dist-info → scikit_base-0.5.1.dist-info}/zip-safe +1 -1
- skbase/__init__.py +14 -14
- skbase/_exceptions.py +31 -31
- skbase/_nopytest_tests.py +35 -35
- skbase/base/__init__.py +20 -20
- skbase/base/_base.py +1249 -1249
- skbase/base/_meta.py +883 -871
- skbase/base/_pretty_printing/__init__.py +11 -11
- skbase/base/_pretty_printing/_object_html_repr.py +392 -392
- skbase/base/_pretty_printing/_pprint.py +412 -412
- skbase/base/_tagmanager.py +217 -217
- skbase/lookup/__init__.py +31 -31
- skbase/lookup/_lookup.py +1009 -1009
- skbase/lookup/tests/__init__.py +2 -2
- skbase/lookup/tests/test_lookup.py +991 -991
- skbase/testing/__init__.py +12 -12
- skbase/testing/test_all_objects.py +852 -856
- skbase/testing/utils/__init__.py +5 -5
- skbase/testing/utils/_conditional_fixtures.py +209 -209
- skbase/testing/utils/_dependencies.py +15 -15
- skbase/testing/utils/deep_equals.py +15 -15
- skbase/testing/utils/inspect.py +30 -30
- skbase/testing/utils/tests/__init__.py +2 -2
- skbase/testing/utils/tests/test_check_dependencies.py +49 -49
- skbase/testing/utils/tests/test_deep_equals.py +66 -66
- skbase/tests/__init__.py +2 -2
- skbase/tests/conftest.py +273 -273
- skbase/tests/mock_package/__init__.py +5 -5
- skbase/tests/mock_package/test_mock_package.py +74 -74
- skbase/tests/test_base.py +1202 -1202
- skbase/tests/test_baseestimator.py +130 -130
- skbase/tests/test_exceptions.py +23 -23
- skbase/tests/test_meta.py +170 -131
- skbase/utils/__init__.py +21 -21
- skbase/utils/_check.py +53 -53
- skbase/utils/_iter.py +238 -238
- skbase/utils/_nested_iter.py +180 -180
- skbase/utils/_utils.py +91 -91
- skbase/utils/deep_equals.py +358 -358
- skbase/utils/dependencies/__init__.py +11 -11
- skbase/utils/dependencies/_dependencies.py +253 -253
- skbase/utils/tests/__init__.py +4 -4
- skbase/utils/tests/test_check.py +24 -24
- skbase/utils/tests/test_iter.py +127 -127
- skbase/utils/tests/test_nested_iter.py +84 -84
- skbase/utils/tests/test_utils.py +37 -37
- skbase/validate/__init__.py +22 -22
- skbase/validate/_named_objects.py +403 -403
- skbase/validate/_types.py +345 -345
- skbase/validate/tests/__init__.py +2 -2
- skbase/validate/tests/test_iterable_named_objects.py +200 -200
- skbase/validate/tests/test_type_validations.py +370 -370
- scikit_base-0.4.6.dist-info/RECORD +0 -58
- scikit_base-0.4.6.dist-info/top_level.txt +0 -2
@@ -1,403 +1,403 @@
|
|
1
|
-
# -*- coding: utf-8 -*-
|
2
|
-
# copyright: skbase developers, BSD-3-Clause License (see LICENSE file)
|
3
|
-
"""Validate if an input is one of the allowed named object formats."""
|
4
|
-
import collections.abc
|
5
|
-
from typing import (
|
6
|
-
TYPE_CHECKING,
|
7
|
-
Any,
|
8
|
-
Dict,
|
9
|
-
List,
|
10
|
-
Optional,
|
11
|
-
Sequence,
|
12
|
-
Tuple,
|
13
|
-
Union,
|
14
|
-
overload,
|
15
|
-
)
|
16
|
-
|
17
|
-
from skbase.base import BaseObject
|
18
|
-
|
19
|
-
__all__: List[str] = [
|
20
|
-
"check_sequence_named_objects",
|
21
|
-
"is_named_object_tuple",
|
22
|
-
"is_sequence_named_objects",
|
23
|
-
]
|
24
|
-
__author__: List[str] = ["RNKuhns"]
|
25
|
-
|
26
|
-
|
27
|
-
def _named_baseobject_error_msg(
|
28
|
-
sequence_name: Optional[str] = None, allow_dict: bool = True
|
29
|
-
):
|
30
|
-
"""Create error message for non-comformance with named BaseObject api."""
|
31
|
-
name_str = f"{sequence_name}" if sequence_name is not None else "Input"
|
32
|
-
allowed_types = "a sequence of (string name, BaseObject instance) tuples"
|
33
|
-
|
34
|
-
if allow_dict:
|
35
|
-
allowed_types += " or dict[str, BaseObject instance]"
|
36
|
-
msg = f"Invalid {name_str!r}, {name_str!r} should be {allowed_types}."
|
37
|
-
return msg
|
38
|
-
|
39
|
-
|
40
|
-
def is_named_object_tuple(
|
41
|
-
obj: Any, object_type: Optional[Union[type, Tuple[type, ...]]] = None
|
42
|
-
) -> bool:
|
43
|
-
"""Indicate if input is a a tuple of format (str, `object_type`).
|
44
|
-
|
45
|
-
Used to validate that input follows named object tuple API format.
|
46
|
-
|
47
|
-
Parameters
|
48
|
-
----------
|
49
|
-
obj : Any
|
50
|
-
The object to be checked to see if it is a (str, `object_type`) tuple.
|
51
|
-
object_type : class or tuple of class, default=BaseObject
|
52
|
-
Class(es) that all objects are checked to be an instance of. If None,
|
53
|
-
then :class:``skbase.base.BaseObject`` is used as default.
|
54
|
-
|
55
|
-
Returns
|
56
|
-
-------
|
57
|
-
bool
|
58
|
-
True if obj is (str, `object_type`) tuple, otherwise False.
|
59
|
-
|
60
|
-
See Also
|
61
|
-
--------
|
62
|
-
is_sequence_named_objects :
|
63
|
-
Indicate (True/False) if an input sequence follows the named object API.
|
64
|
-
check_sequence_named_objects :
|
65
|
-
Validate input to see if it follows sequence of named objects API. An error
|
66
|
-
is raised for input that does not conform to the API format.
|
67
|
-
|
68
|
-
Examples
|
69
|
-
--------
|
70
|
-
>>> from skbase.base import BaseObject, BaseEstimator
|
71
|
-
>>> from skbase.validate import is_named_object_tuple
|
72
|
-
|
73
|
-
Default checks for object to be an instance of BaseOBject
|
74
|
-
|
75
|
-
>>> is_named_object_tuple(("Step 1", BaseObject()))
|
76
|
-
True
|
77
|
-
|
78
|
-
>>> is_named_object_tuple(("Step 2", BaseEstimator()))
|
79
|
-
True
|
80
|
-
|
81
|
-
If a different `object_type` is provided then it is used in the isinstance check
|
82
|
-
|
83
|
-
>>> is_named_object_tuple(("Step 1", BaseObject()), object_type=BaseEstimator)
|
84
|
-
False
|
85
|
-
|
86
|
-
>>> is_named_object_tuple(("Step 1", BaseEstimator()), object_type=BaseEstimator)
|
87
|
-
True
|
88
|
-
|
89
|
-
If the input is does not follow named object tuple format then False is returned
|
90
|
-
|
91
|
-
>>> is_named_object_tuple({"Step 1": BaseEstimator()})
|
92
|
-
False
|
93
|
-
|
94
|
-
>>> is_named_object_tuple((1, BaseObject()))
|
95
|
-
False
|
96
|
-
"""
|
97
|
-
if object_type is None:
|
98
|
-
object_type = BaseObject
|
99
|
-
if not isinstance(obj, tuple) or len(obj) != 2:
|
100
|
-
return False
|
101
|
-
if not isinstance(obj[0], str) or not isinstance(obj[1], object_type):
|
102
|
-
return False
|
103
|
-
return True
|
104
|
-
|
105
|
-
|
106
|
-
def is_sequence_named_objects(
|
107
|
-
seq_to_check: Union[Sequence[Tuple[str, BaseObject]], Dict[str, BaseObject]],
|
108
|
-
allow_dict: bool = True,
|
109
|
-
require_unique_names=False,
|
110
|
-
object_type: Optional[Union[type, Tuple[type]]] = None,
|
111
|
-
) -> bool:
|
112
|
-
"""Indicate if input is a sequence of named BaseObject instances.
|
113
|
-
|
114
|
-
This can be a sequence of (str, BaseObject instance) tuples or
|
115
|
-
a dictionary with string names as keys and BaseObject instances as values
|
116
|
-
(if ``allow_dict=True``).
|
117
|
-
|
118
|
-
Parameters
|
119
|
-
----------
|
120
|
-
seq_to_check : Sequence((str, BaseObject)) or Dict[str, BaseObject]
|
121
|
-
The input to check for conformance with the named object interface.
|
122
|
-
Conforming input are:
|
123
|
-
|
124
|
-
- Sequence that contains (str, BaseObject instance) tuples
|
125
|
-
- Dictionary with string names as keys and BaseObject instances as values
|
126
|
-
if ``allow_dict=True``
|
127
|
-
|
128
|
-
allow_dict : bool, default=True
|
129
|
-
Whether a dictionary of named objects is allowed as conforming named object
|
130
|
-
type.
|
131
|
-
|
132
|
-
- If True, then a dictionary with string keys and BaseObject instances
|
133
|
-
is allowed format for providing a sequence of named objects.
|
134
|
-
- If False, then only sequences that contain (str, BaseObject instance)
|
135
|
-
tuples are considered conforming with the named object parameter API.
|
136
|
-
|
137
|
-
require_unique_names : bool, default=False
|
138
|
-
Whether names used in the sequence of named BaseObject instances
|
139
|
-
must be unique.
|
140
|
-
|
141
|
-
- If True and the names are not unique, then False is always returned.
|
142
|
-
- If False, then whether or not the function returns True or False
|
143
|
-
depends on whether `seq_to_check` follows sequence of named
|
144
|
-
BaseObject format.
|
145
|
-
|
146
|
-
object_type : class or tuple[class], default=None
|
147
|
-
The class type(s) that is used to ensure that all elements of named objects
|
148
|
-
match the expected type.
|
149
|
-
|
150
|
-
Returns
|
151
|
-
-------
|
152
|
-
bool
|
153
|
-
Whether the input `seq_to_check` is a sequence that follows the API for
|
154
|
-
nameed base object instances.
|
155
|
-
|
156
|
-
Raises
|
157
|
-
------
|
158
|
-
ValueError
|
159
|
-
If `seq_to_check` is not a sequence or ``allow_dict is False`` and
|
160
|
-
`seq_to_check` is a dictionary.
|
161
|
-
|
162
|
-
See Also
|
163
|
-
--------
|
164
|
-
is_named_object_tuple :
|
165
|
-
Indicate (True/False) if input follows the named object API format for
|
166
|
-
a single named object (e.g., tupe[str, expected class type]).
|
167
|
-
check_sequence_named_objects :
|
168
|
-
Validate input to see if it follows sequence of named objects API. An error
|
169
|
-
is raised for input that does not conform to the API format.
|
170
|
-
|
171
|
-
Examples
|
172
|
-
--------
|
173
|
-
>>> from skbase.base import BaseObject, BaseEstimator
|
174
|
-
>>> from skbase.validate import is_sequence_named_objects
|
175
|
-
>>> named_objects = [("Step 1", BaseObject()), ("Step 2", BaseObject())]
|
176
|
-
>>> is_sequence_named_objects(named_objects)
|
177
|
-
True
|
178
|
-
|
179
|
-
Dictionaries are optionally allowed as sequences of named BaseObjects
|
180
|
-
|
181
|
-
>>> dict_named_objects = {"Step 1": BaseObject(), "Step 2": BaseObject()}
|
182
|
-
>>> is_sequence_named_objects(dict_named_objects)
|
183
|
-
True
|
184
|
-
>>> is_sequence_named_objects(dict_named_objects, allow_dict=False)
|
185
|
-
False
|
186
|
-
|
187
|
-
Invalid format due to object names not being strings
|
188
|
-
|
189
|
-
>>> incorrectly_named_objects = [(1, BaseObject()), (2, BaseObject())]
|
190
|
-
>>> is_sequence_named_objects(incorrectly_named_objects)
|
191
|
-
False
|
192
|
-
|
193
|
-
Invalid format due to named items not being BaseObject instances
|
194
|
-
|
195
|
-
>>> named_items = [("1", 7), ("2", 42)]
|
196
|
-
>>> is_sequence_named_objects(named_items)
|
197
|
-
False
|
198
|
-
|
199
|
-
The validation can require the object elements to be a certain class type
|
200
|
-
|
201
|
-
>>> named_objects = [("Step 1", BaseObject()), ("Step 2", BaseObject())]
|
202
|
-
>>> is_sequence_named_objects(named_objects, object_type=BaseEstimator)
|
203
|
-
False
|
204
|
-
>>> named_objects = [("Step 1", BaseEstimator()), ("Step 2", BaseEstimator())]
|
205
|
-
>>> is_sequence_named_objects(named_objects, object_type=BaseEstimator)
|
206
|
-
True
|
207
|
-
"""
|
208
|
-
# Want to end quickly if the input isn't sequence or is a dict and we
|
209
|
-
# aren't allowing dicts
|
210
|
-
if object_type is None:
|
211
|
-
object_type = BaseObject
|
212
|
-
|
213
|
-
is_dict = isinstance(seq_to_check, dict)
|
214
|
-
if (not is_dict and not isinstance(seq_to_check, collections.abc.Sequence)) or (
|
215
|
-
not allow_dict and is_dict
|
216
|
-
):
|
217
|
-
return False
|
218
|
-
|
219
|
-
all_expected_format: bool
|
220
|
-
all_unique_names: bool
|
221
|
-
if is_dict:
|
222
|
-
if TYPE_CHECKING: # pragma: no cover
|
223
|
-
assert isinstance(seq_to_check, dict) # nosec B101
|
224
|
-
elements_expected_format = [
|
225
|
-
isinstance(name, str) and isinstance(obj, object_type)
|
226
|
-
for name, obj in seq_to_check.items()
|
227
|
-
]
|
228
|
-
all_unique_names = True
|
229
|
-
else:
|
230
|
-
names = []
|
231
|
-
elements_expected_format = []
|
232
|
-
for it in seq_to_check:
|
233
|
-
if is_named_object_tuple(it, object_type=object_type):
|
234
|
-
elements_expected_format.append(True)
|
235
|
-
names.append(it[0])
|
236
|
-
else:
|
237
|
-
elements_expected_format.append(False)
|
238
|
-
all_unique_names = len(set(names)) == len(names)
|
239
|
-
|
240
|
-
all_expected_format = all(elements_expected_format)
|
241
|
-
|
242
|
-
if not all_expected_format or (require_unique_names and not all_unique_names):
|
243
|
-
is_expected_format = False
|
244
|
-
else:
|
245
|
-
is_expected_format = True
|
246
|
-
|
247
|
-
return is_expected_format
|
248
|
-
|
249
|
-
|
250
|
-
@overload
|
251
|
-
def check_sequence_named_objects(
|
252
|
-
seq_to_check: Union[Sequence[Tuple[str, BaseObject]], Dict[str, BaseObject]],
|
253
|
-
allow_dict: bool = True,
|
254
|
-
require_unique_names=False,
|
255
|
-
object_type: Optional[Union[type, Tuple[type]]] = None,
|
256
|
-
sequence_name: Optional[str] = None,
|
257
|
-
) -> Union[Sequence[Tuple[str, BaseObject]], Dict[str, BaseObject]]:
|
258
|
-
... # pragma: no cover
|
259
|
-
|
260
|
-
|
261
|
-
@overload
|
262
|
-
def check_sequence_named_objects(
|
263
|
-
seq_to_check: Sequence[Tuple[str, BaseObject]],
|
264
|
-
allow_dict: bool,
|
265
|
-
require_unique_names=False,
|
266
|
-
object_type: Optional[Union[type, Tuple[type]]] = None,
|
267
|
-
sequence_name: Optional[str] = None,
|
268
|
-
) -> Sequence[Tuple[str, BaseObject]]:
|
269
|
-
... # pragma: no cover
|
270
|
-
|
271
|
-
|
272
|
-
@overload
|
273
|
-
def check_sequence_named_objects(
|
274
|
-
seq_to_check: Union[Sequence[Tuple[str, BaseObject]], Dict[str, BaseObject]],
|
275
|
-
allow_dict: bool = True,
|
276
|
-
require_unique_names=False,
|
277
|
-
object_type: Optional[Union[type, Tuple[type]]] = None,
|
278
|
-
sequence_name: Optional[str] = None,
|
279
|
-
) -> Union[Sequence[Tuple[str, BaseObject]], Dict[str, BaseObject]]:
|
280
|
-
... # pragma: no cover
|
281
|
-
|
282
|
-
|
283
|
-
def check_sequence_named_objects(
|
284
|
-
seq_to_check: Union[Sequence[Tuple[str, BaseObject]], Dict[str, BaseObject]],
|
285
|
-
allow_dict: bool = True,
|
286
|
-
require_unique_names=False,
|
287
|
-
object_type: Optional[Union[type, Tuple[type]]] = None,
|
288
|
-
sequence_name: Optional[str] = None,
|
289
|
-
) -> Union[Sequence[Tuple[str, BaseObject]], Dict[str, BaseObject]]:
|
290
|
-
"""Check if input is a sequence of named BaseObject instances.
|
291
|
-
|
292
|
-
`seq_to_check` is returned unchanged when it follows the allowed named
|
293
|
-
BaseObject convention. The allowed format includes a sequence of
|
294
|
-
(str, BaseObject instance) tuples. A dictionary with string names as keys
|
295
|
-
and BaseObject instances as values is also allowed if ``allow_dict is True``.
|
296
|
-
|
297
|
-
Parameters
|
298
|
-
----------
|
299
|
-
seq_to_check : Sequence((str, BaseObject)) or Dict[str, BaseObject]
|
300
|
-
The input to check for conformance with the named object interface.
|
301
|
-
Conforming input are:
|
302
|
-
|
303
|
-
- Sequence that contains (str, BaseObject instance) tuples
|
304
|
-
- Dictionary with string names as keys and BaseObject instances as values
|
305
|
-
if ``allow_dict=True``
|
306
|
-
|
307
|
-
allow_dict : bool, default=True
|
308
|
-
Whether a dictionary of named objects is allowed as conforming named object
|
309
|
-
type.
|
310
|
-
|
311
|
-
- If True, then a dictionary with string keys and BaseObject instances
|
312
|
-
is allowed format for providing a sequence of named objects.
|
313
|
-
- If False, then only sequences that contain (str, BaseObject instance)
|
314
|
-
tuples are considered conforming with the named object parameter API.
|
315
|
-
|
316
|
-
require_unique_names : bool, default=False
|
317
|
-
Whether names used in the sequence of named BaseObject instances
|
318
|
-
must be unique.
|
319
|
-
|
320
|
-
- If True and the names are not unique, then False is always returned.
|
321
|
-
- If False, then whether or not the function returns True or False
|
322
|
-
depends on whether `seq_to_check` follows sequence of named BaseObject format.
|
323
|
-
|
324
|
-
object_type : class or tuple[class], default=None
|
325
|
-
The class type(s) that is used to ensure that all elements of named objects
|
326
|
-
match the expected type.
|
327
|
-
sequence_name : str, default=None
|
328
|
-
Optional name used to refer to the input `seq_to_check` when
|
329
|
-
raising any errors. Ignored ``raise_error=False``.
|
330
|
-
|
331
|
-
Returns
|
332
|
-
-------
|
333
|
-
Sequence((str, BaseObject)) or Dict[str, BaseObject]
|
334
|
-
The `seq_to_check` is returned if it is a conforming named object type.
|
335
|
-
|
336
|
-
- If ``allow_dict=True`` then return type is Sequence((str, BaseObject))
|
337
|
-
or Dict[str, BaseObject]
|
338
|
-
- If ``allow_dict=False`` then return type is Sequence((str, BaseObject))
|
339
|
-
|
340
|
-
Raises
|
341
|
-
------
|
342
|
-
ValueError
|
343
|
-
If `seq_to_check` does not conform to the named BaseObject API.
|
344
|
-
|
345
|
-
See Also
|
346
|
-
--------
|
347
|
-
is_named_object_tuple :
|
348
|
-
Indicate (True/False) if input follows the named object API format for
|
349
|
-
a single named object (e.g., tupe[str, expected class type]).
|
350
|
-
is_sequence_named_objects :
|
351
|
-
Indicate (True/False) if an input sequence follows the named object API.
|
352
|
-
|
353
|
-
Examples
|
354
|
-
--------
|
355
|
-
>>> from skbase.base import BaseObject, BaseEstimator
|
356
|
-
>>> from skbase.validate import check_sequence_named_objects
|
357
|
-
>>> named_objects = [("Step 1", BaseObject()), ("Step 2", BaseObject())]
|
358
|
-
>>> check_sequence_named_objects(named_objects)
|
359
|
-
[('Step 1', BaseObject()), ('Step 2', BaseObject())]
|
360
|
-
|
361
|
-
Dictionaries are optionally allowed as sequences of named BaseObjects
|
362
|
-
|
363
|
-
>>> named_objects = {"Step 1": BaseObject(), "Step 2": BaseObject()}
|
364
|
-
>>> check_sequence_named_objects(named_objects)
|
365
|
-
{'Step 1': BaseObject(), 'Step 2': BaseObject()}
|
366
|
-
|
367
|
-
Raises error since dictionaries are not allowed when allow_dict is False
|
368
|
-
|
369
|
-
>>> check_sequence_named_objects(named_objects, allow_dict=False) # doctest: +SKIP
|
370
|
-
|
371
|
-
Raises error due to invalid format due to object names not being strings
|
372
|
-
|
373
|
-
>>> incorrectly_named_objects = [(1, BaseObject()), (2, BaseObject())]
|
374
|
-
>>> check_sequence_named_objects(incorrectly_named_objects) # doctest: +SKIP
|
375
|
-
|
376
|
-
Raises error due to invalid format since named items are not BaseObject instances
|
377
|
-
|
378
|
-
>>> named_items = [("1", 7), ("2", 42)]
|
379
|
-
>>> check_sequence_named_objects(named_items) # doctest: +SKIP
|
380
|
-
|
381
|
-
The validation can require the object elements to be a certain class type
|
382
|
-
|
383
|
-
>>> named_objects = [("Step 1", BaseObject()), ("Step 2", BaseObject())]
|
384
|
-
>>> check_sequence_named_objects( \
|
385
|
-
named_objects, object_type=BaseEstimator) # doctest: +SKIP
|
386
|
-
>>> named_objects = [("Step 1", BaseEstimator()), ("Step 2", BaseEstimator())]
|
387
|
-
>>> check_sequence_named_objects(named_objects, object_type=BaseEstimator)
|
388
|
-
[('Step 1', BaseEstimator()), ('Step 2', BaseEstimator())]
|
389
|
-
"""
|
390
|
-
is_expected_format = is_sequence_named_objects(
|
391
|
-
seq_to_check,
|
392
|
-
allow_dict=allow_dict,
|
393
|
-
require_unique_names=require_unique_names,
|
394
|
-
object_type=object_type,
|
395
|
-
)
|
396
|
-
# Raise error is format is not expected.
|
397
|
-
if not is_expected_format:
|
398
|
-
msg = _named_baseobject_error_msg(
|
399
|
-
sequence_name=sequence_name, allow_dict=allow_dict
|
400
|
-
)
|
401
|
-
raise ValueError(msg)
|
402
|
-
|
403
|
-
return seq_to_check
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
# copyright: skbase developers, BSD-3-Clause License (see LICENSE file)
|
3
|
+
"""Validate if an input is one of the allowed named object formats."""
|
4
|
+
import collections.abc
|
5
|
+
from typing import (
|
6
|
+
TYPE_CHECKING,
|
7
|
+
Any,
|
8
|
+
Dict,
|
9
|
+
List,
|
10
|
+
Optional,
|
11
|
+
Sequence,
|
12
|
+
Tuple,
|
13
|
+
Union,
|
14
|
+
overload,
|
15
|
+
)
|
16
|
+
|
17
|
+
from skbase.base import BaseObject
|
18
|
+
|
19
|
+
__all__: List[str] = [
|
20
|
+
"check_sequence_named_objects",
|
21
|
+
"is_named_object_tuple",
|
22
|
+
"is_sequence_named_objects",
|
23
|
+
]
|
24
|
+
__author__: List[str] = ["RNKuhns"]
|
25
|
+
|
26
|
+
|
27
|
+
def _named_baseobject_error_msg(
|
28
|
+
sequence_name: Optional[str] = None, allow_dict: bool = True
|
29
|
+
):
|
30
|
+
"""Create error message for non-comformance with named BaseObject api."""
|
31
|
+
name_str = f"{sequence_name}" if sequence_name is not None else "Input"
|
32
|
+
allowed_types = "a sequence of (string name, BaseObject instance) tuples"
|
33
|
+
|
34
|
+
if allow_dict:
|
35
|
+
allowed_types += " or dict[str, BaseObject instance]"
|
36
|
+
msg = f"Invalid {name_str!r}, {name_str!r} should be {allowed_types}."
|
37
|
+
return msg
|
38
|
+
|
39
|
+
|
40
|
+
def is_named_object_tuple(
|
41
|
+
obj: Any, object_type: Optional[Union[type, Tuple[type, ...]]] = None
|
42
|
+
) -> bool:
|
43
|
+
"""Indicate if input is a a tuple of format (str, `object_type`).
|
44
|
+
|
45
|
+
Used to validate that input follows named object tuple API format.
|
46
|
+
|
47
|
+
Parameters
|
48
|
+
----------
|
49
|
+
obj : Any
|
50
|
+
The object to be checked to see if it is a (str, `object_type`) tuple.
|
51
|
+
object_type : class or tuple of class, default=BaseObject
|
52
|
+
Class(es) that all objects are checked to be an instance of. If None,
|
53
|
+
then :class:``skbase.base.BaseObject`` is used as default.
|
54
|
+
|
55
|
+
Returns
|
56
|
+
-------
|
57
|
+
bool
|
58
|
+
True if obj is (str, `object_type`) tuple, otherwise False.
|
59
|
+
|
60
|
+
See Also
|
61
|
+
--------
|
62
|
+
is_sequence_named_objects :
|
63
|
+
Indicate (True/False) if an input sequence follows the named object API.
|
64
|
+
check_sequence_named_objects :
|
65
|
+
Validate input to see if it follows sequence of named objects API. An error
|
66
|
+
is raised for input that does not conform to the API format.
|
67
|
+
|
68
|
+
Examples
|
69
|
+
--------
|
70
|
+
>>> from skbase.base import BaseObject, BaseEstimator
|
71
|
+
>>> from skbase.validate import is_named_object_tuple
|
72
|
+
|
73
|
+
Default checks for object to be an instance of BaseOBject
|
74
|
+
|
75
|
+
>>> is_named_object_tuple(("Step 1", BaseObject()))
|
76
|
+
True
|
77
|
+
|
78
|
+
>>> is_named_object_tuple(("Step 2", BaseEstimator()))
|
79
|
+
True
|
80
|
+
|
81
|
+
If a different `object_type` is provided then it is used in the isinstance check
|
82
|
+
|
83
|
+
>>> is_named_object_tuple(("Step 1", BaseObject()), object_type=BaseEstimator)
|
84
|
+
False
|
85
|
+
|
86
|
+
>>> is_named_object_tuple(("Step 1", BaseEstimator()), object_type=BaseEstimator)
|
87
|
+
True
|
88
|
+
|
89
|
+
If the input is does not follow named object tuple format then False is returned
|
90
|
+
|
91
|
+
>>> is_named_object_tuple({"Step 1": BaseEstimator()})
|
92
|
+
False
|
93
|
+
|
94
|
+
>>> is_named_object_tuple((1, BaseObject()))
|
95
|
+
False
|
96
|
+
"""
|
97
|
+
if object_type is None:
|
98
|
+
object_type = BaseObject
|
99
|
+
if not isinstance(obj, tuple) or len(obj) != 2:
|
100
|
+
return False
|
101
|
+
if not isinstance(obj[0], str) or not isinstance(obj[1], object_type):
|
102
|
+
return False
|
103
|
+
return True
|
104
|
+
|
105
|
+
|
106
|
+
def is_sequence_named_objects(
|
107
|
+
seq_to_check: Union[Sequence[Tuple[str, BaseObject]], Dict[str, BaseObject]],
|
108
|
+
allow_dict: bool = True,
|
109
|
+
require_unique_names=False,
|
110
|
+
object_type: Optional[Union[type, Tuple[type]]] = None,
|
111
|
+
) -> bool:
|
112
|
+
"""Indicate if input is a sequence of named BaseObject instances.
|
113
|
+
|
114
|
+
This can be a sequence of (str, BaseObject instance) tuples or
|
115
|
+
a dictionary with string names as keys and BaseObject instances as values
|
116
|
+
(if ``allow_dict=True``).
|
117
|
+
|
118
|
+
Parameters
|
119
|
+
----------
|
120
|
+
seq_to_check : Sequence((str, BaseObject)) or Dict[str, BaseObject]
|
121
|
+
The input to check for conformance with the named object interface.
|
122
|
+
Conforming input are:
|
123
|
+
|
124
|
+
- Sequence that contains (str, BaseObject instance) tuples
|
125
|
+
- Dictionary with string names as keys and BaseObject instances as values
|
126
|
+
if ``allow_dict=True``
|
127
|
+
|
128
|
+
allow_dict : bool, default=True
|
129
|
+
Whether a dictionary of named objects is allowed as conforming named object
|
130
|
+
type.
|
131
|
+
|
132
|
+
- If True, then a dictionary with string keys and BaseObject instances
|
133
|
+
is allowed format for providing a sequence of named objects.
|
134
|
+
- If False, then only sequences that contain (str, BaseObject instance)
|
135
|
+
tuples are considered conforming with the named object parameter API.
|
136
|
+
|
137
|
+
require_unique_names : bool, default=False
|
138
|
+
Whether names used in the sequence of named BaseObject instances
|
139
|
+
must be unique.
|
140
|
+
|
141
|
+
- If True and the names are not unique, then False is always returned.
|
142
|
+
- If False, then whether or not the function returns True or False
|
143
|
+
depends on whether `seq_to_check` follows sequence of named
|
144
|
+
BaseObject format.
|
145
|
+
|
146
|
+
object_type : class or tuple[class], default=None
|
147
|
+
The class type(s) that is used to ensure that all elements of named objects
|
148
|
+
match the expected type.
|
149
|
+
|
150
|
+
Returns
|
151
|
+
-------
|
152
|
+
bool
|
153
|
+
Whether the input `seq_to_check` is a sequence that follows the API for
|
154
|
+
nameed base object instances.
|
155
|
+
|
156
|
+
Raises
|
157
|
+
------
|
158
|
+
ValueError
|
159
|
+
If `seq_to_check` is not a sequence or ``allow_dict is False`` and
|
160
|
+
`seq_to_check` is a dictionary.
|
161
|
+
|
162
|
+
See Also
|
163
|
+
--------
|
164
|
+
is_named_object_tuple :
|
165
|
+
Indicate (True/False) if input follows the named object API format for
|
166
|
+
a single named object (e.g., tupe[str, expected class type]).
|
167
|
+
check_sequence_named_objects :
|
168
|
+
Validate input to see if it follows sequence of named objects API. An error
|
169
|
+
is raised for input that does not conform to the API format.
|
170
|
+
|
171
|
+
Examples
|
172
|
+
--------
|
173
|
+
>>> from skbase.base import BaseObject, BaseEstimator
|
174
|
+
>>> from skbase.validate import is_sequence_named_objects
|
175
|
+
>>> named_objects = [("Step 1", BaseObject()), ("Step 2", BaseObject())]
|
176
|
+
>>> is_sequence_named_objects(named_objects)
|
177
|
+
True
|
178
|
+
|
179
|
+
Dictionaries are optionally allowed as sequences of named BaseObjects
|
180
|
+
|
181
|
+
>>> dict_named_objects = {"Step 1": BaseObject(), "Step 2": BaseObject()}
|
182
|
+
>>> is_sequence_named_objects(dict_named_objects)
|
183
|
+
True
|
184
|
+
>>> is_sequence_named_objects(dict_named_objects, allow_dict=False)
|
185
|
+
False
|
186
|
+
|
187
|
+
Invalid format due to object names not being strings
|
188
|
+
|
189
|
+
>>> incorrectly_named_objects = [(1, BaseObject()), (2, BaseObject())]
|
190
|
+
>>> is_sequence_named_objects(incorrectly_named_objects)
|
191
|
+
False
|
192
|
+
|
193
|
+
Invalid format due to named items not being BaseObject instances
|
194
|
+
|
195
|
+
>>> named_items = [("1", 7), ("2", 42)]
|
196
|
+
>>> is_sequence_named_objects(named_items)
|
197
|
+
False
|
198
|
+
|
199
|
+
The validation can require the object elements to be a certain class type
|
200
|
+
|
201
|
+
>>> named_objects = [("Step 1", BaseObject()), ("Step 2", BaseObject())]
|
202
|
+
>>> is_sequence_named_objects(named_objects, object_type=BaseEstimator)
|
203
|
+
False
|
204
|
+
>>> named_objects = [("Step 1", BaseEstimator()), ("Step 2", BaseEstimator())]
|
205
|
+
>>> is_sequence_named_objects(named_objects, object_type=BaseEstimator)
|
206
|
+
True
|
207
|
+
"""
|
208
|
+
# Want to end quickly if the input isn't sequence or is a dict and we
|
209
|
+
# aren't allowing dicts
|
210
|
+
if object_type is None:
|
211
|
+
object_type = BaseObject
|
212
|
+
|
213
|
+
is_dict = isinstance(seq_to_check, dict)
|
214
|
+
if (not is_dict and not isinstance(seq_to_check, collections.abc.Sequence)) or (
|
215
|
+
not allow_dict and is_dict
|
216
|
+
):
|
217
|
+
return False
|
218
|
+
|
219
|
+
all_expected_format: bool
|
220
|
+
all_unique_names: bool
|
221
|
+
if is_dict:
|
222
|
+
if TYPE_CHECKING: # pragma: no cover
|
223
|
+
assert isinstance(seq_to_check, dict) # nosec B101
|
224
|
+
elements_expected_format = [
|
225
|
+
isinstance(name, str) and isinstance(obj, object_type)
|
226
|
+
for name, obj in seq_to_check.items()
|
227
|
+
]
|
228
|
+
all_unique_names = True
|
229
|
+
else:
|
230
|
+
names = []
|
231
|
+
elements_expected_format = []
|
232
|
+
for it in seq_to_check:
|
233
|
+
if is_named_object_tuple(it, object_type=object_type):
|
234
|
+
elements_expected_format.append(True)
|
235
|
+
names.append(it[0])
|
236
|
+
else:
|
237
|
+
elements_expected_format.append(False)
|
238
|
+
all_unique_names = len(set(names)) == len(names)
|
239
|
+
|
240
|
+
all_expected_format = all(elements_expected_format)
|
241
|
+
|
242
|
+
if not all_expected_format or (require_unique_names and not all_unique_names):
|
243
|
+
is_expected_format = False
|
244
|
+
else:
|
245
|
+
is_expected_format = True
|
246
|
+
|
247
|
+
return is_expected_format
|
248
|
+
|
249
|
+
|
250
|
+
@overload
|
251
|
+
def check_sequence_named_objects(
|
252
|
+
seq_to_check: Union[Sequence[Tuple[str, BaseObject]], Dict[str, BaseObject]],
|
253
|
+
allow_dict: bool = True,
|
254
|
+
require_unique_names=False,
|
255
|
+
object_type: Optional[Union[type, Tuple[type]]] = None,
|
256
|
+
sequence_name: Optional[str] = None,
|
257
|
+
) -> Union[Sequence[Tuple[str, BaseObject]], Dict[str, BaseObject]]:
|
258
|
+
... # pragma: no cover
|
259
|
+
|
260
|
+
|
261
|
+
@overload
|
262
|
+
def check_sequence_named_objects(
|
263
|
+
seq_to_check: Sequence[Tuple[str, BaseObject]],
|
264
|
+
allow_dict: bool,
|
265
|
+
require_unique_names=False,
|
266
|
+
object_type: Optional[Union[type, Tuple[type]]] = None,
|
267
|
+
sequence_name: Optional[str] = None,
|
268
|
+
) -> Sequence[Tuple[str, BaseObject]]:
|
269
|
+
... # pragma: no cover
|
270
|
+
|
271
|
+
|
272
|
+
@overload
|
273
|
+
def check_sequence_named_objects(
|
274
|
+
seq_to_check: Union[Sequence[Tuple[str, BaseObject]], Dict[str, BaseObject]],
|
275
|
+
allow_dict: bool = True,
|
276
|
+
require_unique_names=False,
|
277
|
+
object_type: Optional[Union[type, Tuple[type]]] = None,
|
278
|
+
sequence_name: Optional[str] = None,
|
279
|
+
) -> Union[Sequence[Tuple[str, BaseObject]], Dict[str, BaseObject]]:
|
280
|
+
... # pragma: no cover
|
281
|
+
|
282
|
+
|
283
|
+
def check_sequence_named_objects(
|
284
|
+
seq_to_check: Union[Sequence[Tuple[str, BaseObject]], Dict[str, BaseObject]],
|
285
|
+
allow_dict: bool = True,
|
286
|
+
require_unique_names=False,
|
287
|
+
object_type: Optional[Union[type, Tuple[type]]] = None,
|
288
|
+
sequence_name: Optional[str] = None,
|
289
|
+
) -> Union[Sequence[Tuple[str, BaseObject]], Dict[str, BaseObject]]:
|
290
|
+
"""Check if input is a sequence of named BaseObject instances.
|
291
|
+
|
292
|
+
`seq_to_check` is returned unchanged when it follows the allowed named
|
293
|
+
BaseObject convention. The allowed format includes a sequence of
|
294
|
+
(str, BaseObject instance) tuples. A dictionary with string names as keys
|
295
|
+
and BaseObject instances as values is also allowed if ``allow_dict is True``.
|
296
|
+
|
297
|
+
Parameters
|
298
|
+
----------
|
299
|
+
seq_to_check : Sequence((str, BaseObject)) or Dict[str, BaseObject]
|
300
|
+
The input to check for conformance with the named object interface.
|
301
|
+
Conforming input are:
|
302
|
+
|
303
|
+
- Sequence that contains (str, BaseObject instance) tuples
|
304
|
+
- Dictionary with string names as keys and BaseObject instances as values
|
305
|
+
if ``allow_dict=True``
|
306
|
+
|
307
|
+
allow_dict : bool, default=True
|
308
|
+
Whether a dictionary of named objects is allowed as conforming named object
|
309
|
+
type.
|
310
|
+
|
311
|
+
- If True, then a dictionary with string keys and BaseObject instances
|
312
|
+
is allowed format for providing a sequence of named objects.
|
313
|
+
- If False, then only sequences that contain (str, BaseObject instance)
|
314
|
+
tuples are considered conforming with the named object parameter API.
|
315
|
+
|
316
|
+
require_unique_names : bool, default=False
|
317
|
+
Whether names used in the sequence of named BaseObject instances
|
318
|
+
must be unique.
|
319
|
+
|
320
|
+
- If True and the names are not unique, then False is always returned.
|
321
|
+
- If False, then whether or not the function returns True or False
|
322
|
+
depends on whether `seq_to_check` follows sequence of named BaseObject format.
|
323
|
+
|
324
|
+
object_type : class or tuple[class], default=None
|
325
|
+
The class type(s) that is used to ensure that all elements of named objects
|
326
|
+
match the expected type.
|
327
|
+
sequence_name : str, default=None
|
328
|
+
Optional name used to refer to the input `seq_to_check` when
|
329
|
+
raising any errors. Ignored ``raise_error=False``.
|
330
|
+
|
331
|
+
Returns
|
332
|
+
-------
|
333
|
+
Sequence((str, BaseObject)) or Dict[str, BaseObject]
|
334
|
+
The `seq_to_check` is returned if it is a conforming named object type.
|
335
|
+
|
336
|
+
- If ``allow_dict=True`` then return type is Sequence((str, BaseObject))
|
337
|
+
or Dict[str, BaseObject]
|
338
|
+
- If ``allow_dict=False`` then return type is Sequence((str, BaseObject))
|
339
|
+
|
340
|
+
Raises
|
341
|
+
------
|
342
|
+
ValueError
|
343
|
+
If `seq_to_check` does not conform to the named BaseObject API.
|
344
|
+
|
345
|
+
See Also
|
346
|
+
--------
|
347
|
+
is_named_object_tuple :
|
348
|
+
Indicate (True/False) if input follows the named object API format for
|
349
|
+
a single named object (e.g., tupe[str, expected class type]).
|
350
|
+
is_sequence_named_objects :
|
351
|
+
Indicate (True/False) if an input sequence follows the named object API.
|
352
|
+
|
353
|
+
Examples
|
354
|
+
--------
|
355
|
+
>>> from skbase.base import BaseObject, BaseEstimator
|
356
|
+
>>> from skbase.validate import check_sequence_named_objects
|
357
|
+
>>> named_objects = [("Step 1", BaseObject()), ("Step 2", BaseObject())]
|
358
|
+
>>> check_sequence_named_objects(named_objects)
|
359
|
+
[('Step 1', BaseObject()), ('Step 2', BaseObject())]
|
360
|
+
|
361
|
+
Dictionaries are optionally allowed as sequences of named BaseObjects
|
362
|
+
|
363
|
+
>>> named_objects = {"Step 1": BaseObject(), "Step 2": BaseObject()}
|
364
|
+
>>> check_sequence_named_objects(named_objects)
|
365
|
+
{'Step 1': BaseObject(), 'Step 2': BaseObject()}
|
366
|
+
|
367
|
+
Raises error since dictionaries are not allowed when allow_dict is False
|
368
|
+
|
369
|
+
>>> check_sequence_named_objects(named_objects, allow_dict=False) # doctest: +SKIP
|
370
|
+
|
371
|
+
Raises error due to invalid format due to object names not being strings
|
372
|
+
|
373
|
+
>>> incorrectly_named_objects = [(1, BaseObject()), (2, BaseObject())]
|
374
|
+
>>> check_sequence_named_objects(incorrectly_named_objects) # doctest: +SKIP
|
375
|
+
|
376
|
+
Raises error due to invalid format since named items are not BaseObject instances
|
377
|
+
|
378
|
+
>>> named_items = [("1", 7), ("2", 42)]
|
379
|
+
>>> check_sequence_named_objects(named_items) # doctest: +SKIP
|
380
|
+
|
381
|
+
The validation can require the object elements to be a certain class type
|
382
|
+
|
383
|
+
>>> named_objects = [("Step 1", BaseObject()), ("Step 2", BaseObject())]
|
384
|
+
>>> check_sequence_named_objects( \
|
385
|
+
named_objects, object_type=BaseEstimator) # doctest: +SKIP
|
386
|
+
>>> named_objects = [("Step 1", BaseEstimator()), ("Step 2", BaseEstimator())]
|
387
|
+
>>> check_sequence_named_objects(named_objects, object_type=BaseEstimator)
|
388
|
+
[('Step 1', BaseEstimator()), ('Step 2', BaseEstimator())]
|
389
|
+
"""
|
390
|
+
is_expected_format = is_sequence_named_objects(
|
391
|
+
seq_to_check,
|
392
|
+
allow_dict=allow_dict,
|
393
|
+
require_unique_names=require_unique_names,
|
394
|
+
object_type=object_type,
|
395
|
+
)
|
396
|
+
# Raise error is format is not expected.
|
397
|
+
if not is_expected_format:
|
398
|
+
msg = _named_baseobject_error_msg(
|
399
|
+
sequence_name=sequence_name, allow_dict=allow_dict
|
400
|
+
)
|
401
|
+
raise ValueError(msg)
|
402
|
+
|
403
|
+
return seq_to_check
|