scikit-base 0.4.6__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- docs/source/conf.py +299 -299
- {scikit_base-0.4.6.dist-info → scikit_base-0.5.1.dist-info}/LICENSE +29 -29
- {scikit_base-0.4.6.dist-info → scikit_base-0.5.1.dist-info}/METADATA +160 -159
- scikit_base-0.5.1.dist-info/RECORD +58 -0
- {scikit_base-0.4.6.dist-info → scikit_base-0.5.1.dist-info}/WHEEL +1 -1
- scikit_base-0.5.1.dist-info/top_level.txt +5 -0
- {scikit_base-0.4.6.dist-info → scikit_base-0.5.1.dist-info}/zip-safe +1 -1
- skbase/__init__.py +14 -14
- skbase/_exceptions.py +31 -31
- skbase/_nopytest_tests.py +35 -35
- skbase/base/__init__.py +20 -20
- skbase/base/_base.py +1249 -1249
- skbase/base/_meta.py +883 -871
- skbase/base/_pretty_printing/__init__.py +11 -11
- skbase/base/_pretty_printing/_object_html_repr.py +392 -392
- skbase/base/_pretty_printing/_pprint.py +412 -412
- skbase/base/_tagmanager.py +217 -217
- skbase/lookup/__init__.py +31 -31
- skbase/lookup/_lookup.py +1009 -1009
- skbase/lookup/tests/__init__.py +2 -2
- skbase/lookup/tests/test_lookup.py +991 -991
- skbase/testing/__init__.py +12 -12
- skbase/testing/test_all_objects.py +852 -856
- skbase/testing/utils/__init__.py +5 -5
- skbase/testing/utils/_conditional_fixtures.py +209 -209
- skbase/testing/utils/_dependencies.py +15 -15
- skbase/testing/utils/deep_equals.py +15 -15
- skbase/testing/utils/inspect.py +30 -30
- skbase/testing/utils/tests/__init__.py +2 -2
- skbase/testing/utils/tests/test_check_dependencies.py +49 -49
- skbase/testing/utils/tests/test_deep_equals.py +66 -66
- skbase/tests/__init__.py +2 -2
- skbase/tests/conftest.py +273 -273
- skbase/tests/mock_package/__init__.py +5 -5
- skbase/tests/mock_package/test_mock_package.py +74 -74
- skbase/tests/test_base.py +1202 -1202
- skbase/tests/test_baseestimator.py +130 -130
- skbase/tests/test_exceptions.py +23 -23
- skbase/tests/test_meta.py +170 -131
- skbase/utils/__init__.py +21 -21
- skbase/utils/_check.py +53 -53
- skbase/utils/_iter.py +238 -238
- skbase/utils/_nested_iter.py +180 -180
- skbase/utils/_utils.py +91 -91
- skbase/utils/deep_equals.py +358 -358
- skbase/utils/dependencies/__init__.py +11 -11
- skbase/utils/dependencies/_dependencies.py +253 -253
- skbase/utils/tests/__init__.py +4 -4
- skbase/utils/tests/test_check.py +24 -24
- skbase/utils/tests/test_iter.py +127 -127
- skbase/utils/tests/test_nested_iter.py +84 -84
- skbase/utils/tests/test_utils.py +37 -37
- skbase/validate/__init__.py +22 -22
- skbase/validate/_named_objects.py +403 -403
- skbase/validate/_types.py +345 -345
- skbase/validate/tests/__init__.py +2 -2
- skbase/validate/tests/test_iterable_named_objects.py +200 -200
- skbase/validate/tests/test_type_validations.py +370 -370
- scikit_base-0.4.6.dist-info/RECORD +0 -58
- scikit_base-0.4.6.dist-info/top_level.txt +0 -2
skbase/utils/_check.py
CHANGED
@@ -1,53 +1,53 @@
|
|
1
|
-
# -*- coding: utf-8 -*-
|
2
|
-
# copyright: skbase developers, BSD-3-Clause License (see LICENSE file)
|
3
|
-
# Elements of _is_scalar_nan re-use code developed in scikit-learn. These elements
|
4
|
-
# are copyrighted by the scikit-learn developers, BSD-3-Clause License. For
|
5
|
-
# conditions see https://github.com/scikit-learn/scikit-learn/blob/main/COPYING
|
6
|
-
|
7
|
-
"""Utility functions to perform various types of checks."""
|
8
|
-
from __future__ import annotations
|
9
|
-
|
10
|
-
import math
|
11
|
-
import numbers
|
12
|
-
from typing import Any
|
13
|
-
|
14
|
-
__all__ = ["_is_scalar_nan"]
|
15
|
-
__author__ = ["RNKuhns"]
|
16
|
-
|
17
|
-
|
18
|
-
def _is_scalar_nan(x: Any) -> bool:
|
19
|
-
"""Test if x is NaN.
|
20
|
-
|
21
|
-
This function is meant to overcome the issue that np.isnan does not allow
|
22
|
-
non-numerical types as input, and that np.nan is not float('nan').
|
23
|
-
|
24
|
-
Parameters
|
25
|
-
----------
|
26
|
-
x : Any
|
27
|
-
The item to be checked to determine if it is a scalar nan value.
|
28
|
-
|
29
|
-
Returns
|
30
|
-
-------
|
31
|
-
bool
|
32
|
-
True if `x` is a scalar nan value
|
33
|
-
|
34
|
-
Notes
|
35
|
-
-----
|
36
|
-
This code follows scikit-learn's implementation.
|
37
|
-
|
38
|
-
Examples
|
39
|
-
--------
|
40
|
-
>>> import numpy as np
|
41
|
-
>>> from skbase.utils._check import _is_scalar_nan
|
42
|
-
>>> _is_scalar_nan(np.nan)
|
43
|
-
True
|
44
|
-
>>> _is_scalar_nan(float("nan"))
|
45
|
-
True
|
46
|
-
>>> _is_scalar_nan(None)
|
47
|
-
False
|
48
|
-
>>> _is_scalar_nan("")
|
49
|
-
False
|
50
|
-
>>> _is_scalar_nan([np.nan])
|
51
|
-
False
|
52
|
-
"""
|
53
|
-
return isinstance(x, numbers.Real) and math.isnan(x)
|
1
|
+
# -*- coding: utf-8 -*-
|
2
|
+
# copyright: skbase developers, BSD-3-Clause License (see LICENSE file)
|
3
|
+
# Elements of _is_scalar_nan re-use code developed in scikit-learn. These elements
|
4
|
+
# are copyrighted by the scikit-learn developers, BSD-3-Clause License. For
|
5
|
+
# conditions see https://github.com/scikit-learn/scikit-learn/blob/main/COPYING
|
6
|
+
|
7
|
+
"""Utility functions to perform various types of checks."""
|
8
|
+
from __future__ import annotations
|
9
|
+
|
10
|
+
import math
|
11
|
+
import numbers
|
12
|
+
from typing import Any
|
13
|
+
|
14
|
+
__all__ = ["_is_scalar_nan"]
|
15
|
+
__author__ = ["RNKuhns"]
|
16
|
+
|
17
|
+
|
18
|
+
def _is_scalar_nan(x: Any) -> bool:
|
19
|
+
"""Test if x is NaN.
|
20
|
+
|
21
|
+
This function is meant to overcome the issue that np.isnan does not allow
|
22
|
+
non-numerical types as input, and that np.nan is not float('nan').
|
23
|
+
|
24
|
+
Parameters
|
25
|
+
----------
|
26
|
+
x : Any
|
27
|
+
The item to be checked to determine if it is a scalar nan value.
|
28
|
+
|
29
|
+
Returns
|
30
|
+
-------
|
31
|
+
bool
|
32
|
+
True if `x` is a scalar nan value
|
33
|
+
|
34
|
+
Notes
|
35
|
+
-----
|
36
|
+
This code follows scikit-learn's implementation.
|
37
|
+
|
38
|
+
Examples
|
39
|
+
--------
|
40
|
+
>>> import numpy as np
|
41
|
+
>>> from skbase.utils._check import _is_scalar_nan
|
42
|
+
>>> _is_scalar_nan(np.nan)
|
43
|
+
True
|
44
|
+
>>> _is_scalar_nan(float("nan"))
|
45
|
+
True
|
46
|
+
>>> _is_scalar_nan(None)
|
47
|
+
False
|
48
|
+
>>> _is_scalar_nan("")
|
49
|
+
False
|
50
|
+
>>> _is_scalar_nan([np.nan])
|
51
|
+
False
|
52
|
+
"""
|
53
|
+
return isinstance(x, numbers.Real) and math.isnan(x)
|
skbase/utils/_iter.py
CHANGED
@@ -1,238 +1,238 @@
|
|
1
|
-
#!/usr/bin/env python3 -u
|
2
|
-
# -*- coding: utf-8 -*-
|
3
|
-
# copyright: skbase developers, BSD-3-Clause License (see LICENSE file)
|
4
|
-
"""Functionality for working with sequences."""
|
5
|
-
import collections
|
6
|
-
import re
|
7
|
-
from collections.abc import Sequence
|
8
|
-
from typing import Any, List, Optional, Tuple, Union, overload
|
9
|
-
|
10
|
-
from skbase.utils._nested_iter import _remove_single, flatten, is_flat, unflatten
|
11
|
-
|
12
|
-
__author__: List[str] = ["RNKuhns"]
|
13
|
-
__all__: List[str] = [
|
14
|
-
"_scalar_to_seq",
|
15
|
-
"_remove_type_text",
|
16
|
-
"_format_seq_to_str",
|
17
|
-
"make_strings_unique",
|
18
|
-
]
|
19
|
-
|
20
|
-
|
21
|
-
def _scalar_to_seq(scalar: Any, sequence_type: type = None) -> Sequence:
|
22
|
-
"""Convert a scalar input to a sequence.
|
23
|
-
|
24
|
-
If the input is already a sequence it is returned unchanged. Unlike standard
|
25
|
-
Python, a string is treated as a scalar instead of a sequence.
|
26
|
-
|
27
|
-
Parameters
|
28
|
-
----------
|
29
|
-
scalar : Any
|
30
|
-
A scalar input to be converted to a sequence.
|
31
|
-
sequence_type : type, default=None
|
32
|
-
A sequence type (e.g., list, tuple) that is used to set the return type. This
|
33
|
-
is ignored if `scalar` is already a sequence other than a str (which is
|
34
|
-
treated like a scalar type for this function instead of sequence of
|
35
|
-
characters).
|
36
|
-
|
37
|
-
- If None, then the returned sequence will be a tuple containing a single
|
38
|
-
scalar element.
|
39
|
-
- If `sequence_type` is a valid sequence type then the returned
|
40
|
-
sequence will be a sequence of that type containing the single scalar
|
41
|
-
value.
|
42
|
-
|
43
|
-
Returns
|
44
|
-
-------
|
45
|
-
Sequence
|
46
|
-
A sequence of the specified `sequence_type` that contains just the single
|
47
|
-
scalar value.
|
48
|
-
|
49
|
-
Examples
|
50
|
-
--------
|
51
|
-
>>> from skbase.utils._iter import _scalar_to_seq
|
52
|
-
>>> _scalar_to_seq(7)
|
53
|
-
(7,)
|
54
|
-
>>> _scalar_to_seq("some_str")
|
55
|
-
('some_str',)
|
56
|
-
>>> _scalar_to_seq("some_str", sequence_type=list)
|
57
|
-
['some_str']
|
58
|
-
>>> _scalar_to_seq((1, 2))
|
59
|
-
(1, 2)
|
60
|
-
"""
|
61
|
-
# We'll treat str like regular scalar and not a sequence
|
62
|
-
if isinstance(scalar, Sequence) and not isinstance(scalar, str):
|
63
|
-
return scalar
|
64
|
-
elif sequence_type is None:
|
65
|
-
return (scalar,)
|
66
|
-
elif issubclass(sequence_type, Sequence) and sequence_type != Sequence:
|
67
|
-
# Note calling (scalar,) is done to avoid str unpacking
|
68
|
-
return sequence_type((scalar,)) # type: ignore
|
69
|
-
else:
|
70
|
-
raise ValueError(
|
71
|
-
"`sequence_type` must be a subclass of collections.abc.Sequence."
|
72
|
-
)
|
73
|
-
|
74
|
-
|
75
|
-
def _remove_type_text(input_):
|
76
|
-
"""Remove <class > wrapper from printed type str."""
|
77
|
-
if not isinstance(input_, str):
|
78
|
-
input_ = str(input_)
|
79
|
-
|
80
|
-
m = re.match("^<class '(.*)'>$", input_)
|
81
|
-
if m:
|
82
|
-
return m[1]
|
83
|
-
else:
|
84
|
-
return input_
|
85
|
-
|
86
|
-
|
87
|
-
def _format_seq_to_str(
|
88
|
-
seq: Union[str, Sequence],
|
89
|
-
sep: str = ", ",
|
90
|
-
last_sep: Optional[str] = None,
|
91
|
-
remove_type_text: bool = True,
|
92
|
-
) -> str:
|
93
|
-
"""Format a sequence to a string of delimitted elements.
|
94
|
-
|
95
|
-
This is useful to format sequences into a pretty printing format for
|
96
|
-
creating error messages or warnings.
|
97
|
-
|
98
|
-
Parameters
|
99
|
-
----------
|
100
|
-
seq : Sequence
|
101
|
-
The input sequence to convert to a str of the elements separated by `sep`.
|
102
|
-
sep : str
|
103
|
-
The separator to use when creating the str output.
|
104
|
-
last_sep : str, default=None
|
105
|
-
The separator to use prior to last element.
|
106
|
-
|
107
|
-
- If None, then `sep` is used. So (7, 9, 11) return "7", "9", "11" for
|
108
|
-
``sep=", "``.
|
109
|
-
- If last_sep is a str, then it is used prior to last element. So
|
110
|
-
(7, 9, 11) would be "7", "9" and "11" if ``last_sep="and"``.
|
111
|
-
|
112
|
-
remove_type_text : bool, default=True
|
113
|
-
Whether to remove the <class > text wrapping the class type name, when
|
114
|
-
formatting types.
|
115
|
-
|
116
|
-
- If True, then input sequence [list, tuple] returns "list, tuple"
|
117
|
-
- If False, then input sequence [list, tuple] returns
|
118
|
-
"<class 'list'>, <class 'tuple'>".
|
119
|
-
|
120
|
-
Returns
|
121
|
-
-------
|
122
|
-
str
|
123
|
-
The sequence of inputs converted to a string. For example, if `seq`
|
124
|
-
is (7, 9, "cart") and ``last_sep is None`` then the output is
|
125
|
-
"7", "9", "cart".
|
126
|
-
|
127
|
-
Examples
|
128
|
-
--------
|
129
|
-
>>> from skbase.base import BaseEstimator, BaseObject
|
130
|
-
>>> from skbase.utils._iter import _format_seq_to_str
|
131
|
-
>>> seq = [1, 2, 3, 4]
|
132
|
-
>>> _format_seq_to_str(seq)
|
133
|
-
'1, 2, 3, 4'
|
134
|
-
>>> _format_seq_to_str(seq, last_sep="and")
|
135
|
-
'1, 2, 3 and 4'
|
136
|
-
>>> _format_seq_to_str((BaseObject, BaseEstimator))
|
137
|
-
'skbase.base._base.BaseObject, skbase.base._base.BaseEstimator'
|
138
|
-
"""
|
139
|
-
if isinstance(seq, str):
|
140
|
-
return seq
|
141
|
-
# Allow casting of scalars to strings
|
142
|
-
elif isinstance(seq, (int, float, bool, type)):
|
143
|
-
return _remove_type_text(seq)
|
144
|
-
elif not isinstance(seq, Sequence):
|
145
|
-
raise TypeError(
|
146
|
-
"`seq` must be a sequence or scalar str, int, float, bool or class."
|
147
|
-
)
|
148
|
-
|
149
|
-
seq_str = [str(e) for e in seq]
|
150
|
-
if remove_type_text:
|
151
|
-
seq_str = [_remove_type_text(s) for s in seq_str]
|
152
|
-
|
153
|
-
if last_sep is None:
|
154
|
-
output_str = sep.join(seq_str)
|
155
|
-
else:
|
156
|
-
if len(seq_str) == 1:
|
157
|
-
output_str = _remove_single(seq_str)
|
158
|
-
else:
|
159
|
-
output_str = sep.join(e for e in seq_str[:-1])
|
160
|
-
output_str = output_str + f" {last_sep} " + seq_str[-1]
|
161
|
-
|
162
|
-
return output_str
|
163
|
-
|
164
|
-
|
165
|
-
@overload
|
166
|
-
def make_strings_unique(str_list: Tuple[str, ...]) -> Tuple[str, ...]:
|
167
|
-
... # pragma: no cover
|
168
|
-
|
169
|
-
|
170
|
-
@overload
|
171
|
-
def make_strings_unique(str_list: List[str]) -> List[str]:
|
172
|
-
... # pragma: no cover
|
173
|
-
|
174
|
-
|
175
|
-
def make_strings_unique(
|
176
|
-
str_list: Union[List[str], Tuple[str, ...]]
|
177
|
-
) -> Union[List[str], Tuple[str, ...]]:
|
178
|
-
"""Make a list or tuple of strings unique by appending number of occurrence.
|
179
|
-
|
180
|
-
Supports making string elements unique for nested list/tuple input.
|
181
|
-
|
182
|
-
Parameters
|
183
|
-
----------
|
184
|
-
str_list : nested list/tuple structure with string elements
|
185
|
-
The list or tuple with string elements that should be made unique.
|
186
|
-
|
187
|
-
Returns
|
188
|
-
-------
|
189
|
-
list[str] | tuple[str]
|
190
|
-
The input strings coerced to have unique names.
|
191
|
-
|
192
|
-
- If no duplicates then the output list/tuple is same as input.
|
193
|
-
- Otherwise, the integer number of occurrence is appended onto duplicate
|
194
|
-
strings. If this results in duplicates (b/c another string in input had
|
195
|
-
the name of a string and integer of occurrence) this is repeated until
|
196
|
-
no duplicates exist.
|
197
|
-
|
198
|
-
Examples
|
199
|
-
--------
|
200
|
-
>>> from skbase.utils import make_strings_unique
|
201
|
-
>>> some_strs = ["abc", "abc", "bcd"]
|
202
|
-
>>> make_strings_unique(some_strs)
|
203
|
-
['abc_1', 'abc_2', 'bcd']
|
204
|
-
>>> some_strs = ["abc", "abc", "bcd", "abc_1"]
|
205
|
-
>>> make_strings_unique(some_strs)
|
206
|
-
['abc_1_1', 'abc_2', 'bcd', 'abc_1_2']
|
207
|
-
"""
|
208
|
-
# if strlist is not flat, flatten and apply method, then unflatten
|
209
|
-
if not is_flat(str_list):
|
210
|
-
flat_str_list = flatten(str_list)
|
211
|
-
unique_flat_str_list = make_strings_unique(flat_str_list)
|
212
|
-
unique_strs = unflatten(unique_flat_str_list, str_list)
|
213
|
-
return unique_strs
|
214
|
-
|
215
|
-
# if strlist is a tuple, convert to list, apply this function, then convert back
|
216
|
-
if isinstance(str_list, tuple):
|
217
|
-
unique_strs = make_strings_unique(list(str_list))
|
218
|
-
unique_strs = tuple(unique_strs)
|
219
|
-
return unique_strs
|
220
|
-
|
221
|
-
# now we can assume that strlist is a flat list
|
222
|
-
# if already unique, just return
|
223
|
-
if len(set(str_list)) == len(str_list):
|
224
|
-
return str_list
|
225
|
-
|
226
|
-
str_count = collections.Counter(str_list)
|
227
|
-
# if any duplicates, we append _integer of occurrence to non-uniques
|
228
|
-
now_count: collections.Counter = collections.Counter()
|
229
|
-
unique_strs = str_list
|
230
|
-
for i, x in enumerate(unique_strs):
|
231
|
-
if str_count[x] > 1:
|
232
|
-
now_count.update([x])
|
233
|
-
unique_strs[i] = x + "_" + str(now_count[x])
|
234
|
-
|
235
|
-
# repeat until all are unique
|
236
|
-
# the algorithm recurses, but will always terminate
|
237
|
-
# because potential clashes are lexicographically increasing
|
238
|
-
return make_strings_unique(unique_strs)
|
1
|
+
#!/usr/bin/env python3 -u
|
2
|
+
# -*- coding: utf-8 -*-
|
3
|
+
# copyright: skbase developers, BSD-3-Clause License (see LICENSE file)
|
4
|
+
"""Functionality for working with sequences."""
|
5
|
+
import collections
|
6
|
+
import re
|
7
|
+
from collections.abc import Sequence
|
8
|
+
from typing import Any, List, Optional, Tuple, Union, overload
|
9
|
+
|
10
|
+
from skbase.utils._nested_iter import _remove_single, flatten, is_flat, unflatten
|
11
|
+
|
12
|
+
__author__: List[str] = ["RNKuhns"]
|
13
|
+
__all__: List[str] = [
|
14
|
+
"_scalar_to_seq",
|
15
|
+
"_remove_type_text",
|
16
|
+
"_format_seq_to_str",
|
17
|
+
"make_strings_unique",
|
18
|
+
]
|
19
|
+
|
20
|
+
|
21
|
+
def _scalar_to_seq(scalar: Any, sequence_type: type = None) -> Sequence:
|
22
|
+
"""Convert a scalar input to a sequence.
|
23
|
+
|
24
|
+
If the input is already a sequence it is returned unchanged. Unlike standard
|
25
|
+
Python, a string is treated as a scalar instead of a sequence.
|
26
|
+
|
27
|
+
Parameters
|
28
|
+
----------
|
29
|
+
scalar : Any
|
30
|
+
A scalar input to be converted to a sequence.
|
31
|
+
sequence_type : type, default=None
|
32
|
+
A sequence type (e.g., list, tuple) that is used to set the return type. This
|
33
|
+
is ignored if `scalar` is already a sequence other than a str (which is
|
34
|
+
treated like a scalar type for this function instead of sequence of
|
35
|
+
characters).
|
36
|
+
|
37
|
+
- If None, then the returned sequence will be a tuple containing a single
|
38
|
+
scalar element.
|
39
|
+
- If `sequence_type` is a valid sequence type then the returned
|
40
|
+
sequence will be a sequence of that type containing the single scalar
|
41
|
+
value.
|
42
|
+
|
43
|
+
Returns
|
44
|
+
-------
|
45
|
+
Sequence
|
46
|
+
A sequence of the specified `sequence_type` that contains just the single
|
47
|
+
scalar value.
|
48
|
+
|
49
|
+
Examples
|
50
|
+
--------
|
51
|
+
>>> from skbase.utils._iter import _scalar_to_seq
|
52
|
+
>>> _scalar_to_seq(7)
|
53
|
+
(7,)
|
54
|
+
>>> _scalar_to_seq("some_str")
|
55
|
+
('some_str',)
|
56
|
+
>>> _scalar_to_seq("some_str", sequence_type=list)
|
57
|
+
['some_str']
|
58
|
+
>>> _scalar_to_seq((1, 2))
|
59
|
+
(1, 2)
|
60
|
+
"""
|
61
|
+
# We'll treat str like regular scalar and not a sequence
|
62
|
+
if isinstance(scalar, Sequence) and not isinstance(scalar, str):
|
63
|
+
return scalar
|
64
|
+
elif sequence_type is None:
|
65
|
+
return (scalar,)
|
66
|
+
elif issubclass(sequence_type, Sequence) and sequence_type != Sequence:
|
67
|
+
# Note calling (scalar,) is done to avoid str unpacking
|
68
|
+
return sequence_type((scalar,)) # type: ignore
|
69
|
+
else:
|
70
|
+
raise ValueError(
|
71
|
+
"`sequence_type` must be a subclass of collections.abc.Sequence."
|
72
|
+
)
|
73
|
+
|
74
|
+
|
75
|
+
def _remove_type_text(input_):
|
76
|
+
"""Remove <class > wrapper from printed type str."""
|
77
|
+
if not isinstance(input_, str):
|
78
|
+
input_ = str(input_)
|
79
|
+
|
80
|
+
m = re.match("^<class '(.*)'>$", input_)
|
81
|
+
if m:
|
82
|
+
return m[1]
|
83
|
+
else:
|
84
|
+
return input_
|
85
|
+
|
86
|
+
|
87
|
+
def _format_seq_to_str(
|
88
|
+
seq: Union[str, Sequence],
|
89
|
+
sep: str = ", ",
|
90
|
+
last_sep: Optional[str] = None,
|
91
|
+
remove_type_text: bool = True,
|
92
|
+
) -> str:
|
93
|
+
"""Format a sequence to a string of delimitted elements.
|
94
|
+
|
95
|
+
This is useful to format sequences into a pretty printing format for
|
96
|
+
creating error messages or warnings.
|
97
|
+
|
98
|
+
Parameters
|
99
|
+
----------
|
100
|
+
seq : Sequence
|
101
|
+
The input sequence to convert to a str of the elements separated by `sep`.
|
102
|
+
sep : str
|
103
|
+
The separator to use when creating the str output.
|
104
|
+
last_sep : str, default=None
|
105
|
+
The separator to use prior to last element.
|
106
|
+
|
107
|
+
- If None, then `sep` is used. So (7, 9, 11) return "7", "9", "11" for
|
108
|
+
``sep=", "``.
|
109
|
+
- If last_sep is a str, then it is used prior to last element. So
|
110
|
+
(7, 9, 11) would be "7", "9" and "11" if ``last_sep="and"``.
|
111
|
+
|
112
|
+
remove_type_text : bool, default=True
|
113
|
+
Whether to remove the <class > text wrapping the class type name, when
|
114
|
+
formatting types.
|
115
|
+
|
116
|
+
- If True, then input sequence [list, tuple] returns "list, tuple"
|
117
|
+
- If False, then input sequence [list, tuple] returns
|
118
|
+
"<class 'list'>, <class 'tuple'>".
|
119
|
+
|
120
|
+
Returns
|
121
|
+
-------
|
122
|
+
str
|
123
|
+
The sequence of inputs converted to a string. For example, if `seq`
|
124
|
+
is (7, 9, "cart") and ``last_sep is None`` then the output is
|
125
|
+
"7", "9", "cart".
|
126
|
+
|
127
|
+
Examples
|
128
|
+
--------
|
129
|
+
>>> from skbase.base import BaseEstimator, BaseObject
|
130
|
+
>>> from skbase.utils._iter import _format_seq_to_str
|
131
|
+
>>> seq = [1, 2, 3, 4]
|
132
|
+
>>> _format_seq_to_str(seq)
|
133
|
+
'1, 2, 3, 4'
|
134
|
+
>>> _format_seq_to_str(seq, last_sep="and")
|
135
|
+
'1, 2, 3 and 4'
|
136
|
+
>>> _format_seq_to_str((BaseObject, BaseEstimator))
|
137
|
+
'skbase.base._base.BaseObject, skbase.base._base.BaseEstimator'
|
138
|
+
"""
|
139
|
+
if isinstance(seq, str):
|
140
|
+
return seq
|
141
|
+
# Allow casting of scalars to strings
|
142
|
+
elif isinstance(seq, (int, float, bool, type)):
|
143
|
+
return _remove_type_text(seq)
|
144
|
+
elif not isinstance(seq, Sequence):
|
145
|
+
raise TypeError(
|
146
|
+
"`seq` must be a sequence or scalar str, int, float, bool or class."
|
147
|
+
)
|
148
|
+
|
149
|
+
seq_str = [str(e) for e in seq]
|
150
|
+
if remove_type_text:
|
151
|
+
seq_str = [_remove_type_text(s) for s in seq_str]
|
152
|
+
|
153
|
+
if last_sep is None:
|
154
|
+
output_str = sep.join(seq_str)
|
155
|
+
else:
|
156
|
+
if len(seq_str) == 1:
|
157
|
+
output_str = _remove_single(seq_str)
|
158
|
+
else:
|
159
|
+
output_str = sep.join(e for e in seq_str[:-1])
|
160
|
+
output_str = output_str + f" {last_sep} " + seq_str[-1]
|
161
|
+
|
162
|
+
return output_str
|
163
|
+
|
164
|
+
|
165
|
+
@overload
|
166
|
+
def make_strings_unique(str_list: Tuple[str, ...]) -> Tuple[str, ...]:
|
167
|
+
... # pragma: no cover
|
168
|
+
|
169
|
+
|
170
|
+
@overload
|
171
|
+
def make_strings_unique(str_list: List[str]) -> List[str]:
|
172
|
+
... # pragma: no cover
|
173
|
+
|
174
|
+
|
175
|
+
def make_strings_unique(
|
176
|
+
str_list: Union[List[str], Tuple[str, ...]]
|
177
|
+
) -> Union[List[str], Tuple[str, ...]]:
|
178
|
+
"""Make a list or tuple of strings unique by appending number of occurrence.
|
179
|
+
|
180
|
+
Supports making string elements unique for nested list/tuple input.
|
181
|
+
|
182
|
+
Parameters
|
183
|
+
----------
|
184
|
+
str_list : nested list/tuple structure with string elements
|
185
|
+
The list or tuple with string elements that should be made unique.
|
186
|
+
|
187
|
+
Returns
|
188
|
+
-------
|
189
|
+
list[str] | tuple[str]
|
190
|
+
The input strings coerced to have unique names.
|
191
|
+
|
192
|
+
- If no duplicates then the output list/tuple is same as input.
|
193
|
+
- Otherwise, the integer number of occurrence is appended onto duplicate
|
194
|
+
strings. If this results in duplicates (b/c another string in input had
|
195
|
+
the name of a string and integer of occurrence) this is repeated until
|
196
|
+
no duplicates exist.
|
197
|
+
|
198
|
+
Examples
|
199
|
+
--------
|
200
|
+
>>> from skbase.utils import make_strings_unique
|
201
|
+
>>> some_strs = ["abc", "abc", "bcd"]
|
202
|
+
>>> make_strings_unique(some_strs)
|
203
|
+
['abc_1', 'abc_2', 'bcd']
|
204
|
+
>>> some_strs = ["abc", "abc", "bcd", "abc_1"]
|
205
|
+
>>> make_strings_unique(some_strs)
|
206
|
+
['abc_1_1', 'abc_2', 'bcd', 'abc_1_2']
|
207
|
+
"""
|
208
|
+
# if strlist is not flat, flatten and apply method, then unflatten
|
209
|
+
if not is_flat(str_list):
|
210
|
+
flat_str_list = flatten(str_list)
|
211
|
+
unique_flat_str_list = make_strings_unique(flat_str_list)
|
212
|
+
unique_strs = unflatten(unique_flat_str_list, str_list)
|
213
|
+
return unique_strs
|
214
|
+
|
215
|
+
# if strlist is a tuple, convert to list, apply this function, then convert back
|
216
|
+
if isinstance(str_list, tuple):
|
217
|
+
unique_strs = make_strings_unique(list(str_list))
|
218
|
+
unique_strs = tuple(unique_strs)
|
219
|
+
return unique_strs
|
220
|
+
|
221
|
+
# now we can assume that strlist is a flat list
|
222
|
+
# if already unique, just return
|
223
|
+
if len(set(str_list)) == len(str_list):
|
224
|
+
return str_list
|
225
|
+
|
226
|
+
str_count = collections.Counter(str_list)
|
227
|
+
# if any duplicates, we append _integer of occurrence to non-uniques
|
228
|
+
now_count: collections.Counter = collections.Counter()
|
229
|
+
unique_strs = str_list
|
230
|
+
for i, x in enumerate(unique_strs):
|
231
|
+
if str_count[x] > 1:
|
232
|
+
now_count.update([x])
|
233
|
+
unique_strs[i] = x + "_" + str(now_count[x])
|
234
|
+
|
235
|
+
# repeat until all are unique
|
236
|
+
# the algorithm recurses, but will always terminate
|
237
|
+
# because potential clashes are lexicographically increasing
|
238
|
+
return make_strings_unique(unique_strs)
|