polars-runtime-compat 1.34.0b3__cp39-abi3-macosx_11_0_arm64.whl → 1.34.0b5__cp39-abi3-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of polars-runtime-compat might be problematic. Click here for more details.
- _polars_runtime_compat/_polars_runtime_compat.abi3.so +0 -0
- polars_runtime_compat-1.34.0b5.dist-info/METADATA +35 -0
- polars_runtime_compat-1.34.0b5.dist-info/RECORD +6 -0
- polars/__init__.py +0 -528
- polars/_cpu_check.py +0 -265
- polars/_dependencies.py +0 -355
- polars/_plr.py +0 -99
- polars/_plr.pyi +0 -2496
- polars/_reexport.py +0 -23
- polars/_typing.py +0 -478
- polars/_utils/__init__.py +0 -37
- polars/_utils/async_.py +0 -102
- polars/_utils/cache.py +0 -176
- polars/_utils/cloud.py +0 -40
- polars/_utils/constants.py +0 -29
- polars/_utils/construction/__init__.py +0 -46
- polars/_utils/construction/dataframe.py +0 -1397
- polars/_utils/construction/other.py +0 -72
- polars/_utils/construction/series.py +0 -560
- polars/_utils/construction/utils.py +0 -118
- polars/_utils/convert.py +0 -224
- polars/_utils/deprecation.py +0 -406
- polars/_utils/getitem.py +0 -457
- polars/_utils/logging.py +0 -11
- polars/_utils/nest_asyncio.py +0 -264
- polars/_utils/parquet.py +0 -15
- polars/_utils/parse/__init__.py +0 -12
- polars/_utils/parse/expr.py +0 -242
- polars/_utils/polars_version.py +0 -19
- polars/_utils/pycapsule.py +0 -53
- polars/_utils/scan.py +0 -27
- polars/_utils/serde.py +0 -63
- polars/_utils/slice.py +0 -215
- polars/_utils/udfs.py +0 -1251
- polars/_utils/unstable.py +0 -63
- polars/_utils/various.py +0 -782
- polars/_utils/wrap.py +0 -25
- polars/api.py +0 -370
- polars/catalog/__init__.py +0 -0
- polars/catalog/unity/__init__.py +0 -19
- polars/catalog/unity/client.py +0 -733
- polars/catalog/unity/models.py +0 -152
- polars/config.py +0 -1571
- polars/convert/__init__.py +0 -25
- polars/convert/general.py +0 -1046
- polars/convert/normalize.py +0 -261
- polars/dataframe/__init__.py +0 -5
- polars/dataframe/_html.py +0 -186
- polars/dataframe/frame.py +0 -12582
- polars/dataframe/group_by.py +0 -1067
- polars/dataframe/plotting.py +0 -257
- polars/datatype_expr/__init__.py +0 -5
- polars/datatype_expr/array.py +0 -56
- polars/datatype_expr/datatype_expr.py +0 -304
- polars/datatype_expr/list.py +0 -18
- polars/datatype_expr/struct.py +0 -69
- polars/datatypes/__init__.py +0 -122
- polars/datatypes/_parse.py +0 -195
- polars/datatypes/_utils.py +0 -48
- polars/datatypes/classes.py +0 -1213
- polars/datatypes/constants.py +0 -11
- polars/datatypes/constructor.py +0 -172
- polars/datatypes/convert.py +0 -366
- polars/datatypes/group.py +0 -130
- polars/exceptions.py +0 -230
- polars/expr/__init__.py +0 -7
- polars/expr/array.py +0 -964
- polars/expr/binary.py +0 -346
- polars/expr/categorical.py +0 -306
- polars/expr/datetime.py +0 -2620
- polars/expr/expr.py +0 -11272
- polars/expr/list.py +0 -1408
- polars/expr/meta.py +0 -444
- polars/expr/name.py +0 -321
- polars/expr/string.py +0 -3045
- polars/expr/struct.py +0 -357
- polars/expr/whenthen.py +0 -185
- polars/functions/__init__.py +0 -193
- polars/functions/aggregation/__init__.py +0 -33
- polars/functions/aggregation/horizontal.py +0 -298
- polars/functions/aggregation/vertical.py +0 -341
- polars/functions/as_datatype.py +0 -848
- polars/functions/business.py +0 -138
- polars/functions/col.py +0 -384
- polars/functions/datatype.py +0 -121
- polars/functions/eager.py +0 -524
- polars/functions/escape_regex.py +0 -29
- polars/functions/lazy.py +0 -2751
- polars/functions/len.py +0 -68
- polars/functions/lit.py +0 -210
- polars/functions/random.py +0 -22
- polars/functions/range/__init__.py +0 -19
- polars/functions/range/_utils.py +0 -15
- polars/functions/range/date_range.py +0 -303
- polars/functions/range/datetime_range.py +0 -370
- polars/functions/range/int_range.py +0 -348
- polars/functions/range/linear_space.py +0 -311
- polars/functions/range/time_range.py +0 -287
- polars/functions/repeat.py +0 -301
- polars/functions/whenthen.py +0 -353
- polars/interchange/__init__.py +0 -10
- polars/interchange/buffer.py +0 -77
- polars/interchange/column.py +0 -190
- polars/interchange/dataframe.py +0 -230
- polars/interchange/from_dataframe.py +0 -328
- polars/interchange/protocol.py +0 -303
- polars/interchange/utils.py +0 -170
- polars/io/__init__.py +0 -64
- polars/io/_utils.py +0 -317
- polars/io/avro.py +0 -49
- polars/io/clipboard.py +0 -36
- polars/io/cloud/__init__.py +0 -17
- polars/io/cloud/_utils.py +0 -80
- polars/io/cloud/credential_provider/__init__.py +0 -17
- polars/io/cloud/credential_provider/_builder.py +0 -520
- polars/io/cloud/credential_provider/_providers.py +0 -618
- polars/io/csv/__init__.py +0 -9
- polars/io/csv/_utils.py +0 -38
- polars/io/csv/batched_reader.py +0 -142
- polars/io/csv/functions.py +0 -1495
- polars/io/database/__init__.py +0 -6
- polars/io/database/_arrow_registry.py +0 -70
- polars/io/database/_cursor_proxies.py +0 -147
- polars/io/database/_executor.py +0 -578
- polars/io/database/_inference.py +0 -314
- polars/io/database/_utils.py +0 -144
- polars/io/database/functions.py +0 -516
- polars/io/delta.py +0 -499
- polars/io/iceberg/__init__.py +0 -3
- polars/io/iceberg/_utils.py +0 -697
- polars/io/iceberg/dataset.py +0 -556
- polars/io/iceberg/functions.py +0 -151
- polars/io/ipc/__init__.py +0 -8
- polars/io/ipc/functions.py +0 -514
- polars/io/json/__init__.py +0 -3
- polars/io/json/read.py +0 -101
- polars/io/ndjson.py +0 -332
- polars/io/parquet/__init__.py +0 -17
- polars/io/parquet/field_overwrites.py +0 -140
- polars/io/parquet/functions.py +0 -722
- polars/io/partition.py +0 -491
- polars/io/plugins.py +0 -187
- polars/io/pyarrow_dataset/__init__.py +0 -5
- polars/io/pyarrow_dataset/anonymous_scan.py +0 -109
- polars/io/pyarrow_dataset/functions.py +0 -79
- polars/io/scan_options/__init__.py +0 -5
- polars/io/scan_options/_options.py +0 -59
- polars/io/scan_options/cast_options.py +0 -126
- polars/io/spreadsheet/__init__.py +0 -6
- polars/io/spreadsheet/_utils.py +0 -52
- polars/io/spreadsheet/_write_utils.py +0 -647
- polars/io/spreadsheet/functions.py +0 -1323
- polars/lazyframe/__init__.py +0 -9
- polars/lazyframe/engine_config.py +0 -61
- polars/lazyframe/frame.py +0 -8564
- polars/lazyframe/group_by.py +0 -669
- polars/lazyframe/in_process.py +0 -42
- polars/lazyframe/opt_flags.py +0 -333
- polars/meta/__init__.py +0 -14
- polars/meta/build.py +0 -33
- polars/meta/index_type.py +0 -27
- polars/meta/thread_pool.py +0 -50
- polars/meta/versions.py +0 -120
- polars/ml/__init__.py +0 -0
- polars/ml/torch.py +0 -213
- polars/ml/utilities.py +0 -30
- polars/plugins.py +0 -155
- polars/py.typed +0 -0
- polars/pyproject.toml +0 -103
- polars/schema.py +0 -265
- polars/selectors.py +0 -3117
- polars/series/__init__.py +0 -5
- polars/series/array.py +0 -776
- polars/series/binary.py +0 -254
- polars/series/categorical.py +0 -246
- polars/series/datetime.py +0 -2275
- polars/series/list.py +0 -1087
- polars/series/plotting.py +0 -191
- polars/series/series.py +0 -9197
- polars/series/string.py +0 -2367
- polars/series/struct.py +0 -154
- polars/series/utils.py +0 -191
- polars/sql/__init__.py +0 -7
- polars/sql/context.py +0 -677
- polars/sql/functions.py +0 -139
- polars/string_cache.py +0 -185
- polars/testing/__init__.py +0 -13
- polars/testing/asserts/__init__.py +0 -9
- polars/testing/asserts/frame.py +0 -231
- polars/testing/asserts/series.py +0 -219
- polars/testing/asserts/utils.py +0 -12
- polars/testing/parametric/__init__.py +0 -33
- polars/testing/parametric/profiles.py +0 -107
- polars/testing/parametric/strategies/__init__.py +0 -22
- polars/testing/parametric/strategies/_utils.py +0 -14
- polars/testing/parametric/strategies/core.py +0 -615
- polars/testing/parametric/strategies/data.py +0 -452
- polars/testing/parametric/strategies/dtype.py +0 -436
- polars/testing/parametric/strategies/legacy.py +0 -169
- polars/type_aliases.py +0 -24
- polars_runtime_compat-1.34.0b3.dist-info/METADATA +0 -190
- polars_runtime_compat-1.34.0b3.dist-info/RECORD +0 -203
- {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b5.dist-info}/WHEEL +0 -0
- {polars_runtime_compat-1.34.0b3.dist-info → polars_runtime_compat-1.34.0b5.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,452 +0,0 @@
|
|
|
1
|
-
"""Strategies for generating various forms of data."""
|
|
2
|
-
|
|
3
|
-
from __future__ import annotations
|
|
4
|
-
|
|
5
|
-
import decimal
|
|
6
|
-
from collections.abc import Mapping
|
|
7
|
-
from datetime import datetime, timedelta, timezone
|
|
8
|
-
from typing import TYPE_CHECKING, Any, Literal
|
|
9
|
-
from zoneinfo import ZoneInfo
|
|
10
|
-
|
|
11
|
-
import hypothesis.strategies as st
|
|
12
|
-
from hypothesis.errors import InvalidArgument
|
|
13
|
-
|
|
14
|
-
from polars._utils.constants import (
|
|
15
|
-
EPOCH,
|
|
16
|
-
I8_MAX,
|
|
17
|
-
I8_MIN,
|
|
18
|
-
I16_MAX,
|
|
19
|
-
I16_MIN,
|
|
20
|
-
I32_MAX,
|
|
21
|
-
I32_MIN,
|
|
22
|
-
I64_MAX,
|
|
23
|
-
I64_MIN,
|
|
24
|
-
I128_MAX,
|
|
25
|
-
I128_MIN,
|
|
26
|
-
U8_MAX,
|
|
27
|
-
U16_MAX,
|
|
28
|
-
U32_MAX,
|
|
29
|
-
U64_MAX,
|
|
30
|
-
U128_MAX,
|
|
31
|
-
)
|
|
32
|
-
from polars.datatypes import (
|
|
33
|
-
Array,
|
|
34
|
-
Binary,
|
|
35
|
-
Boolean,
|
|
36
|
-
Categorical,
|
|
37
|
-
Date,
|
|
38
|
-
Datetime,
|
|
39
|
-
Decimal,
|
|
40
|
-
Duration,
|
|
41
|
-
Enum,
|
|
42
|
-
Field,
|
|
43
|
-
Float32,
|
|
44
|
-
Float64,
|
|
45
|
-
Int8,
|
|
46
|
-
Int16,
|
|
47
|
-
Int32,
|
|
48
|
-
Int64,
|
|
49
|
-
Int128,
|
|
50
|
-
List,
|
|
51
|
-
Null,
|
|
52
|
-
Object,
|
|
53
|
-
String,
|
|
54
|
-
Struct,
|
|
55
|
-
Time,
|
|
56
|
-
UInt8,
|
|
57
|
-
UInt16,
|
|
58
|
-
UInt32,
|
|
59
|
-
UInt64,
|
|
60
|
-
UInt128,
|
|
61
|
-
)
|
|
62
|
-
from polars.testing.parametric.strategies._utils import flexhash
|
|
63
|
-
from polars.testing.parametric.strategies.dtype import (
|
|
64
|
-
_DEFAULT_ARRAY_WIDTH_LIMIT,
|
|
65
|
-
_DEFAULT_ENUM_CATEGORIES_LIMIT,
|
|
66
|
-
)
|
|
67
|
-
|
|
68
|
-
if TYPE_CHECKING:
|
|
69
|
-
from collections.abc import Sequence
|
|
70
|
-
from datetime import date, time
|
|
71
|
-
|
|
72
|
-
from hypothesis.strategies import SearchStrategy
|
|
73
|
-
|
|
74
|
-
from polars._typing import PolarsDataType, SchemaDict, TimeUnit
|
|
75
|
-
from polars.datatypes import DataType, DataTypeClass
|
|
76
|
-
|
|
77
|
-
_DEFAULT_LIST_LEN_LIMIT = 3
|
|
78
|
-
_DEFAULT_N_CATEGORIES = 10
|
|
79
|
-
|
|
80
|
-
_INTEGER_STRATEGIES: dict[bool, dict[int, SearchStrategy[int]]] = {
|
|
81
|
-
True: {
|
|
82
|
-
8: st.integers(I8_MIN, I8_MAX),
|
|
83
|
-
16: st.integers(I16_MIN, I16_MAX),
|
|
84
|
-
32: st.integers(I32_MIN, I32_MAX),
|
|
85
|
-
64: st.integers(I64_MIN, I64_MAX),
|
|
86
|
-
128: st.integers(I128_MIN, I128_MAX),
|
|
87
|
-
},
|
|
88
|
-
False: {
|
|
89
|
-
8: st.integers(0, U8_MAX),
|
|
90
|
-
16: st.integers(0, U16_MAX),
|
|
91
|
-
32: st.integers(0, U32_MAX),
|
|
92
|
-
64: st.integers(0, U64_MAX),
|
|
93
|
-
128: st.integers(0, U128_MAX),
|
|
94
|
-
},
|
|
95
|
-
}
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
def integers(
|
|
99
|
-
bit_width: Literal[8, 16, 32, 64, 128] = 64, *, signed: bool = True
|
|
100
|
-
) -> SearchStrategy[int]:
|
|
101
|
-
"""Create a strategy for generating integers."""
|
|
102
|
-
return _INTEGER_STRATEGIES[signed][bit_width]
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
def floats(
|
|
106
|
-
bit_width: Literal[32, 64] = 64, *, allow_infinity: bool = True
|
|
107
|
-
) -> SearchStrategy[float]:
|
|
108
|
-
"""Create a strategy for generating integers."""
|
|
109
|
-
return st.floats(width=bit_width, allow_infinity=allow_infinity)
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
def booleans() -> SearchStrategy[bool]:
|
|
113
|
-
"""Create a strategy for generating booleans."""
|
|
114
|
-
return st.booleans()
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
def strings() -> SearchStrategy[str]:
|
|
118
|
-
"""Create a strategy for generating string values."""
|
|
119
|
-
alphabet = st.characters(max_codepoint=1000, exclude_categories=["Cs", "Cc"])
|
|
120
|
-
return st.text(alphabet=alphabet, max_size=8)
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
def binary() -> SearchStrategy[bytes]:
|
|
124
|
-
"""Create a strategy for generating bytes."""
|
|
125
|
-
return st.binary()
|
|
126
|
-
|
|
127
|
-
|
|
128
|
-
def categories(n_categories: int = _DEFAULT_N_CATEGORIES) -> SearchStrategy[str]:
|
|
129
|
-
"""
|
|
130
|
-
Create a strategy for generating category strings.
|
|
131
|
-
|
|
132
|
-
Parameters
|
|
133
|
-
----------
|
|
134
|
-
n_categories
|
|
135
|
-
The number of categories.
|
|
136
|
-
"""
|
|
137
|
-
categories = [f"c{i}" for i in range(n_categories)]
|
|
138
|
-
return st.sampled_from(categories)
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
def times() -> SearchStrategy[time]:
|
|
142
|
-
"""Create a strategy for generating `time` objects."""
|
|
143
|
-
return st.times()
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
def dates() -> SearchStrategy[date]:
|
|
147
|
-
"""Create a strategy for generating `date` objects."""
|
|
148
|
-
return st.dates()
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
def datetimes(
|
|
152
|
-
time_unit: TimeUnit = "us", time_zone: str | None = None
|
|
153
|
-
) -> SearchStrategy[datetime]:
|
|
154
|
-
"""
|
|
155
|
-
Create a strategy for generating `datetime` objects in the time unit's range.
|
|
156
|
-
|
|
157
|
-
Parameters
|
|
158
|
-
----------
|
|
159
|
-
time_unit
|
|
160
|
-
Time unit for which the datetime objects are valid.
|
|
161
|
-
time_zone
|
|
162
|
-
Time zone for which the datetime objects are valid.
|
|
163
|
-
"""
|
|
164
|
-
if time_unit in ("us", "ms"):
|
|
165
|
-
min_value = datetime.min
|
|
166
|
-
max_value = datetime.max
|
|
167
|
-
elif time_unit == "ns":
|
|
168
|
-
min_value = EPOCH + timedelta(microseconds=I64_MIN // 1000 + 1)
|
|
169
|
-
max_value = EPOCH + timedelta(microseconds=I64_MAX // 1000)
|
|
170
|
-
else:
|
|
171
|
-
msg = f"invalid time unit: {time_unit!r}"
|
|
172
|
-
raise InvalidArgument(msg)
|
|
173
|
-
|
|
174
|
-
if time_zone is None:
|
|
175
|
-
return st.datetimes(min_value, max_value)
|
|
176
|
-
|
|
177
|
-
time_zone_info = ZoneInfo(time_zone)
|
|
178
|
-
|
|
179
|
-
# Make sure time zone offsets do not cause out-of-bound datetimes
|
|
180
|
-
if time_unit == "ns":
|
|
181
|
-
min_value += timedelta(days=1)
|
|
182
|
-
max_value -= timedelta(days=1)
|
|
183
|
-
|
|
184
|
-
# Return naive datetimes, but make sure they are valid for the given time zone
|
|
185
|
-
return st.datetimes(
|
|
186
|
-
min_value=min_value,
|
|
187
|
-
max_value=max_value,
|
|
188
|
-
timezones=st.just(time_zone_info),
|
|
189
|
-
allow_imaginary=False,
|
|
190
|
-
).map(lambda dt: dt.astimezone(timezone.utc).replace(tzinfo=None))
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
def durations(time_unit: TimeUnit = "us") -> SearchStrategy[timedelta]:
|
|
194
|
-
"""
|
|
195
|
-
Create a strategy for generating `timedelta` objects in the time unit's range.
|
|
196
|
-
|
|
197
|
-
Parameters
|
|
198
|
-
----------
|
|
199
|
-
time_unit
|
|
200
|
-
Time unit for which the timedelta objects are valid.
|
|
201
|
-
"""
|
|
202
|
-
if time_unit == "us":
|
|
203
|
-
return st.timedeltas(
|
|
204
|
-
min_value=timedelta(microseconds=I64_MIN),
|
|
205
|
-
max_value=timedelta(microseconds=I64_MAX),
|
|
206
|
-
)
|
|
207
|
-
elif time_unit == "ns":
|
|
208
|
-
return st.timedeltas(
|
|
209
|
-
min_value=timedelta(microseconds=I64_MIN // 1000),
|
|
210
|
-
max_value=timedelta(microseconds=I64_MAX // 1000),
|
|
211
|
-
)
|
|
212
|
-
elif time_unit == "ms":
|
|
213
|
-
# TODO: Enable full range of millisecond durations
|
|
214
|
-
# timedelta.min/max fall within the range
|
|
215
|
-
# return st.timedeltas()
|
|
216
|
-
return st.timedeltas(
|
|
217
|
-
min_value=timedelta(microseconds=I64_MIN),
|
|
218
|
-
max_value=timedelta(microseconds=I64_MAX),
|
|
219
|
-
)
|
|
220
|
-
else:
|
|
221
|
-
msg = f"invalid time unit: {time_unit!r}"
|
|
222
|
-
raise InvalidArgument(msg)
|
|
223
|
-
|
|
224
|
-
|
|
225
|
-
def decimals(
|
|
226
|
-
precision: int | None = 38, scale: int = 0
|
|
227
|
-
) -> SearchStrategy[decimal.Decimal]:
|
|
228
|
-
"""
|
|
229
|
-
Create a strategy for generating `Decimal` objects.
|
|
230
|
-
|
|
231
|
-
Parameters
|
|
232
|
-
----------
|
|
233
|
-
precision
|
|
234
|
-
Maximum number of digits in each number.
|
|
235
|
-
If set to `None`, the precision is set to 38 (the maximum supported by Polars).
|
|
236
|
-
scale
|
|
237
|
-
Number of digits to the right of the decimal point in each number.
|
|
238
|
-
"""
|
|
239
|
-
if precision is None:
|
|
240
|
-
precision = 38
|
|
241
|
-
|
|
242
|
-
c = decimal.Context(prec=precision)
|
|
243
|
-
exclusive_limit = c.create_decimal(f"1E+{precision - scale}")
|
|
244
|
-
max_value = c.next_minus(exclusive_limit)
|
|
245
|
-
min_value = c.copy_negate(max_value)
|
|
246
|
-
|
|
247
|
-
return st.decimals(
|
|
248
|
-
min_value=min_value,
|
|
249
|
-
max_value=max_value,
|
|
250
|
-
allow_nan=False,
|
|
251
|
-
allow_infinity=False,
|
|
252
|
-
places=scale,
|
|
253
|
-
)
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
def lists(
|
|
257
|
-
inner_dtype: DataType,
|
|
258
|
-
*,
|
|
259
|
-
select_from: Sequence[Any] | None = None,
|
|
260
|
-
min_size: int = 0,
|
|
261
|
-
max_size: int | None = None,
|
|
262
|
-
unique: bool = False,
|
|
263
|
-
**kwargs: Any,
|
|
264
|
-
) -> SearchStrategy[list[Any]]:
|
|
265
|
-
"""
|
|
266
|
-
Create a strategy for generating lists of the given data type.
|
|
267
|
-
|
|
268
|
-
.. warning::
|
|
269
|
-
This functionality is currently considered **unstable**. It may be
|
|
270
|
-
changed at any point without it being considered a breaking change.
|
|
271
|
-
|
|
272
|
-
Parameters
|
|
273
|
-
----------
|
|
274
|
-
inner_dtype
|
|
275
|
-
Data type of the list elements. If the data type is not fully instantiated,
|
|
276
|
-
defaults will be used, e.g. `Datetime` will become `Datetime('us')`.
|
|
277
|
-
select_from
|
|
278
|
-
The values to use for the innermost lists. If set to `None` (default),
|
|
279
|
-
the default strategy associated with the innermost data type is used.
|
|
280
|
-
min_size
|
|
281
|
-
The minimum length of the generated lists.
|
|
282
|
-
max_size
|
|
283
|
-
The maximum length of the generated lists. If set to `None` (default), the
|
|
284
|
-
maximum is set based on `min_size`: `3` if `min_size` is zero,
|
|
285
|
-
otherwise `2 * min_size`.
|
|
286
|
-
unique
|
|
287
|
-
Ensure that the generated lists contain unique values.
|
|
288
|
-
**kwargs
|
|
289
|
-
Additional arguments that are passed to nested data generation strategies.
|
|
290
|
-
|
|
291
|
-
Examples
|
|
292
|
-
--------
|
|
293
|
-
...
|
|
294
|
-
"""
|
|
295
|
-
if max_size is None:
|
|
296
|
-
max_size = _DEFAULT_LIST_LEN_LIMIT if min_size == 0 else min_size * 2
|
|
297
|
-
|
|
298
|
-
if select_from is not None and not inner_dtype.is_nested():
|
|
299
|
-
inner_strategy = st.sampled_from(select_from)
|
|
300
|
-
else:
|
|
301
|
-
inner_strategy = data(
|
|
302
|
-
inner_dtype,
|
|
303
|
-
select_from=select_from,
|
|
304
|
-
min_size=min_size,
|
|
305
|
-
max_size=max_size,
|
|
306
|
-
unique=unique,
|
|
307
|
-
**kwargs,
|
|
308
|
-
)
|
|
309
|
-
|
|
310
|
-
return st.lists(
|
|
311
|
-
elements=inner_strategy,
|
|
312
|
-
min_size=min_size,
|
|
313
|
-
max_size=max_size,
|
|
314
|
-
unique_by=(flexhash if unique else None),
|
|
315
|
-
)
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
def structs(
|
|
319
|
-
fields: Sequence[Field] | SchemaDict,
|
|
320
|
-
*,
|
|
321
|
-
allow_null: bool = True,
|
|
322
|
-
**kwargs: Any,
|
|
323
|
-
) -> SearchStrategy[dict[str, Any]]:
|
|
324
|
-
"""
|
|
325
|
-
Create a strategy for generating structs with the given fields.
|
|
326
|
-
|
|
327
|
-
Parameters
|
|
328
|
-
----------
|
|
329
|
-
fields
|
|
330
|
-
The fields that make up the struct. Can be either a sequence of Field
|
|
331
|
-
objects or a mapping of column names to data types.
|
|
332
|
-
allow_null
|
|
333
|
-
Allow nulls as possible values. If set to True, the returned dictionaries
|
|
334
|
-
may miss certain fields and are in random order.
|
|
335
|
-
**kwargs
|
|
336
|
-
Additional arguments that are passed to nested data generation strategies.
|
|
337
|
-
"""
|
|
338
|
-
if isinstance(fields, Mapping):
|
|
339
|
-
fields = [Field(name, dtype) for name, dtype in fields.items()]
|
|
340
|
-
|
|
341
|
-
strats = {f.name: data(f.dtype, allow_null=allow_null, **kwargs) for f in fields}
|
|
342
|
-
|
|
343
|
-
if allow_null:
|
|
344
|
-
return st.fixed_dictionaries({}, optional=strats)
|
|
345
|
-
else:
|
|
346
|
-
return st.fixed_dictionaries(strats)
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
def nulls() -> SearchStrategy[None]:
|
|
350
|
-
"""Create a strategy for generating null values."""
|
|
351
|
-
return st.none()
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
def objects() -> SearchStrategy[object]:
|
|
355
|
-
"""Create a strategy for generating arbitrary objects."""
|
|
356
|
-
return st.builds(object)
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
# Strategies that are not customizable through parameters
|
|
360
|
-
_STATIC_STRATEGIES: dict[DataTypeClass, SearchStrategy[Any]] = {
|
|
361
|
-
Boolean: booleans(),
|
|
362
|
-
Int8: integers(8, signed=True),
|
|
363
|
-
Int16: integers(16, signed=True),
|
|
364
|
-
Int32: integers(32, signed=True),
|
|
365
|
-
Int64: integers(64, signed=True),
|
|
366
|
-
Int128: integers(128, signed=True),
|
|
367
|
-
UInt8: integers(8, signed=False),
|
|
368
|
-
UInt16: integers(16, signed=False),
|
|
369
|
-
UInt32: integers(32, signed=False),
|
|
370
|
-
UInt64: integers(64, signed=False),
|
|
371
|
-
UInt128: integers(128, signed=False),
|
|
372
|
-
Time: times(),
|
|
373
|
-
Date: dates(),
|
|
374
|
-
String: strings(),
|
|
375
|
-
Binary: binary(),
|
|
376
|
-
Null: nulls(),
|
|
377
|
-
Object: objects(),
|
|
378
|
-
}
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
def data(
|
|
382
|
-
dtype: PolarsDataType, *, allow_null: bool = False, **kwargs: Any
|
|
383
|
-
) -> SearchStrategy[Any]:
|
|
384
|
-
"""
|
|
385
|
-
Create a strategy for generating data for the given data type.
|
|
386
|
-
|
|
387
|
-
Parameters
|
|
388
|
-
----------
|
|
389
|
-
dtype
|
|
390
|
-
A Polars data type. If the data type is not fully instantiated, defaults will
|
|
391
|
-
be used, e.g. `Datetime` will become `Datetime('us')`.
|
|
392
|
-
allow_null
|
|
393
|
-
Allow nulls as possible values.
|
|
394
|
-
**kwargs
|
|
395
|
-
Additional parameters for the strategy associated with the given `dtype`.
|
|
396
|
-
"""
|
|
397
|
-
if (strategy := _STATIC_STRATEGIES.get(dtype.base_type())) is not None:
|
|
398
|
-
strategy = strategy
|
|
399
|
-
elif dtype == Float32:
|
|
400
|
-
strategy = floats(32, allow_infinity=kwargs.pop("allow_infinity", True))
|
|
401
|
-
elif dtype == Float64:
|
|
402
|
-
strategy = floats(64, allow_infinity=kwargs.pop("allow_infinity", True))
|
|
403
|
-
elif dtype == Datetime:
|
|
404
|
-
strategy = datetimes(
|
|
405
|
-
time_unit=getattr(dtype, "time_unit", None) or "us",
|
|
406
|
-
time_zone=getattr(dtype, "time_zone", None),
|
|
407
|
-
)
|
|
408
|
-
elif dtype == Duration:
|
|
409
|
-
strategy = durations(time_unit=getattr(dtype, "time_unit", None) or "us")
|
|
410
|
-
elif dtype == Categorical:
|
|
411
|
-
strategy = categories(
|
|
412
|
-
n_categories=kwargs.pop("n_categories", _DEFAULT_N_CATEGORIES)
|
|
413
|
-
)
|
|
414
|
-
elif dtype == Enum:
|
|
415
|
-
if isinstance(dtype, Enum):
|
|
416
|
-
if (cats := dtype.categories).is_empty():
|
|
417
|
-
strategy = nulls()
|
|
418
|
-
else:
|
|
419
|
-
strategy = st.sampled_from(cats.to_list())
|
|
420
|
-
else:
|
|
421
|
-
strategy = categories(
|
|
422
|
-
n_categories=kwargs.pop("n_categories", _DEFAULT_ENUM_CATEGORIES_LIMIT)
|
|
423
|
-
)
|
|
424
|
-
elif dtype == Decimal:
|
|
425
|
-
strategy = decimals(
|
|
426
|
-
getattr(dtype, "precision", None), getattr(dtype, "scale", 0)
|
|
427
|
-
)
|
|
428
|
-
elif dtype == List:
|
|
429
|
-
inner = getattr(dtype, "inner", None) or Null()
|
|
430
|
-
strategy = lists(inner, allow_null=allow_null, **kwargs)
|
|
431
|
-
elif dtype == Array:
|
|
432
|
-
inner = getattr(dtype, "inner", None) or Null()
|
|
433
|
-
size = getattr(dtype, "size", _DEFAULT_ARRAY_WIDTH_LIMIT)
|
|
434
|
-
kwargs = {k: v for k, v in kwargs.items() if k not in ("min_size", "max_size")}
|
|
435
|
-
strategy = lists(
|
|
436
|
-
inner,
|
|
437
|
-
min_size=size,
|
|
438
|
-
max_size=size,
|
|
439
|
-
allow_null=allow_null,
|
|
440
|
-
**kwargs,
|
|
441
|
-
)
|
|
442
|
-
elif dtype == Struct:
|
|
443
|
-
fields = getattr(dtype, "fields", None) or [Field("f0", Null())]
|
|
444
|
-
strategy = structs(fields, allow_null=allow_null, **kwargs)
|
|
445
|
-
else:
|
|
446
|
-
msg = f"unsupported data type: {dtype}"
|
|
447
|
-
raise InvalidArgument(msg)
|
|
448
|
-
|
|
449
|
-
if allow_null:
|
|
450
|
-
strategy = nulls() | strategy
|
|
451
|
-
|
|
452
|
-
return strategy
|