polars-runtime-compat 1.34.0b2__cp39-abi3-win_amd64.whl → 1.34.0b4__cp39-abi3-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of polars-runtime-compat might be problematic. Click here for more details.
- _polars_runtime_compat/_polars_runtime_compat.pyd +0 -0
- {polars_runtime_compat-1.34.0b2.dist-info → polars_runtime_compat-1.34.0b4.dist-info}/METADATA +1 -1
- polars_runtime_compat-1.34.0b4.dist-info/RECORD +6 -0
- polars/__init__.py +0 -528
- polars/_cpu_check.py +0 -265
- polars/_dependencies.py +0 -355
- polars/_plr.py +0 -99
- polars/_plr.pyi +0 -2496
- polars/_reexport.py +0 -23
- polars/_typing.py +0 -478
- polars/_utils/__init__.py +0 -37
- polars/_utils/async_.py +0 -102
- polars/_utils/cache.py +0 -176
- polars/_utils/cloud.py +0 -40
- polars/_utils/constants.py +0 -29
- polars/_utils/construction/__init__.py +0 -46
- polars/_utils/construction/dataframe.py +0 -1397
- polars/_utils/construction/other.py +0 -72
- polars/_utils/construction/series.py +0 -560
- polars/_utils/construction/utils.py +0 -118
- polars/_utils/convert.py +0 -224
- polars/_utils/deprecation.py +0 -406
- polars/_utils/getitem.py +0 -457
- polars/_utils/logging.py +0 -11
- polars/_utils/nest_asyncio.py +0 -264
- polars/_utils/parquet.py +0 -15
- polars/_utils/parse/__init__.py +0 -12
- polars/_utils/parse/expr.py +0 -242
- polars/_utils/polars_version.py +0 -19
- polars/_utils/pycapsule.py +0 -53
- polars/_utils/scan.py +0 -27
- polars/_utils/serde.py +0 -63
- polars/_utils/slice.py +0 -215
- polars/_utils/udfs.py +0 -1251
- polars/_utils/unstable.py +0 -63
- polars/_utils/various.py +0 -782
- polars/_utils/wrap.py +0 -25
- polars/api.py +0 -370
- polars/catalog/__init__.py +0 -0
- polars/catalog/unity/__init__.py +0 -19
- polars/catalog/unity/client.py +0 -733
- polars/catalog/unity/models.py +0 -152
- polars/config.py +0 -1571
- polars/convert/__init__.py +0 -25
- polars/convert/general.py +0 -1046
- polars/convert/normalize.py +0 -261
- polars/dataframe/__init__.py +0 -5
- polars/dataframe/_html.py +0 -186
- polars/dataframe/frame.py +0 -12582
- polars/dataframe/group_by.py +0 -1067
- polars/dataframe/plotting.py +0 -257
- polars/datatype_expr/__init__.py +0 -5
- polars/datatype_expr/array.py +0 -56
- polars/datatype_expr/datatype_expr.py +0 -304
- polars/datatype_expr/list.py +0 -18
- polars/datatype_expr/struct.py +0 -69
- polars/datatypes/__init__.py +0 -122
- polars/datatypes/_parse.py +0 -195
- polars/datatypes/_utils.py +0 -48
- polars/datatypes/classes.py +0 -1213
- polars/datatypes/constants.py +0 -11
- polars/datatypes/constructor.py +0 -172
- polars/datatypes/convert.py +0 -366
- polars/datatypes/group.py +0 -130
- polars/exceptions.py +0 -230
- polars/expr/__init__.py +0 -7
- polars/expr/array.py +0 -964
- polars/expr/binary.py +0 -346
- polars/expr/categorical.py +0 -306
- polars/expr/datetime.py +0 -2620
- polars/expr/expr.py +0 -11272
- polars/expr/list.py +0 -1408
- polars/expr/meta.py +0 -444
- polars/expr/name.py +0 -321
- polars/expr/string.py +0 -3045
- polars/expr/struct.py +0 -357
- polars/expr/whenthen.py +0 -185
- polars/functions/__init__.py +0 -193
- polars/functions/aggregation/__init__.py +0 -33
- polars/functions/aggregation/horizontal.py +0 -298
- polars/functions/aggregation/vertical.py +0 -341
- polars/functions/as_datatype.py +0 -848
- polars/functions/business.py +0 -138
- polars/functions/col.py +0 -384
- polars/functions/datatype.py +0 -121
- polars/functions/eager.py +0 -524
- polars/functions/escape_regex.py +0 -29
- polars/functions/lazy.py +0 -2751
- polars/functions/len.py +0 -68
- polars/functions/lit.py +0 -210
- polars/functions/random.py +0 -22
- polars/functions/range/__init__.py +0 -19
- polars/functions/range/_utils.py +0 -15
- polars/functions/range/date_range.py +0 -303
- polars/functions/range/datetime_range.py +0 -370
- polars/functions/range/int_range.py +0 -348
- polars/functions/range/linear_space.py +0 -311
- polars/functions/range/time_range.py +0 -287
- polars/functions/repeat.py +0 -301
- polars/functions/whenthen.py +0 -353
- polars/interchange/__init__.py +0 -10
- polars/interchange/buffer.py +0 -77
- polars/interchange/column.py +0 -190
- polars/interchange/dataframe.py +0 -230
- polars/interchange/from_dataframe.py +0 -328
- polars/interchange/protocol.py +0 -303
- polars/interchange/utils.py +0 -170
- polars/io/__init__.py +0 -64
- polars/io/_utils.py +0 -317
- polars/io/avro.py +0 -49
- polars/io/clipboard.py +0 -36
- polars/io/cloud/__init__.py +0 -17
- polars/io/cloud/_utils.py +0 -80
- polars/io/cloud/credential_provider/__init__.py +0 -17
- polars/io/cloud/credential_provider/_builder.py +0 -520
- polars/io/cloud/credential_provider/_providers.py +0 -618
- polars/io/csv/__init__.py +0 -9
- polars/io/csv/_utils.py +0 -38
- polars/io/csv/batched_reader.py +0 -142
- polars/io/csv/functions.py +0 -1495
- polars/io/database/__init__.py +0 -6
- polars/io/database/_arrow_registry.py +0 -70
- polars/io/database/_cursor_proxies.py +0 -147
- polars/io/database/_executor.py +0 -578
- polars/io/database/_inference.py +0 -314
- polars/io/database/_utils.py +0 -144
- polars/io/database/functions.py +0 -516
- polars/io/delta.py +0 -499
- polars/io/iceberg/__init__.py +0 -3
- polars/io/iceberg/_utils.py +0 -697
- polars/io/iceberg/dataset.py +0 -556
- polars/io/iceberg/functions.py +0 -151
- polars/io/ipc/__init__.py +0 -8
- polars/io/ipc/functions.py +0 -514
- polars/io/json/__init__.py +0 -3
- polars/io/json/read.py +0 -101
- polars/io/ndjson.py +0 -332
- polars/io/parquet/__init__.py +0 -17
- polars/io/parquet/field_overwrites.py +0 -140
- polars/io/parquet/functions.py +0 -722
- polars/io/partition.py +0 -491
- polars/io/plugins.py +0 -187
- polars/io/pyarrow_dataset/__init__.py +0 -5
- polars/io/pyarrow_dataset/anonymous_scan.py +0 -109
- polars/io/pyarrow_dataset/functions.py +0 -79
- polars/io/scan_options/__init__.py +0 -5
- polars/io/scan_options/_options.py +0 -59
- polars/io/scan_options/cast_options.py +0 -126
- polars/io/spreadsheet/__init__.py +0 -6
- polars/io/spreadsheet/_utils.py +0 -52
- polars/io/spreadsheet/_write_utils.py +0 -647
- polars/io/spreadsheet/functions.py +0 -1323
- polars/lazyframe/__init__.py +0 -9
- polars/lazyframe/engine_config.py +0 -61
- polars/lazyframe/frame.py +0 -8564
- polars/lazyframe/group_by.py +0 -669
- polars/lazyframe/in_process.py +0 -42
- polars/lazyframe/opt_flags.py +0 -333
- polars/meta/__init__.py +0 -14
- polars/meta/build.py +0 -33
- polars/meta/index_type.py +0 -27
- polars/meta/thread_pool.py +0 -50
- polars/meta/versions.py +0 -120
- polars/ml/__init__.py +0 -0
- polars/ml/torch.py +0 -213
- polars/ml/utilities.py +0 -30
- polars/plugins.py +0 -155
- polars/py.typed +0 -0
- polars/pyproject.toml +0 -96
- polars/schema.py +0 -265
- polars/selectors.py +0 -3117
- polars/series/__init__.py +0 -5
- polars/series/array.py +0 -776
- polars/series/binary.py +0 -254
- polars/series/categorical.py +0 -246
- polars/series/datetime.py +0 -2275
- polars/series/list.py +0 -1087
- polars/series/plotting.py +0 -191
- polars/series/series.py +0 -9197
- polars/series/string.py +0 -2367
- polars/series/struct.py +0 -154
- polars/series/utils.py +0 -191
- polars/sql/__init__.py +0 -7
- polars/sql/context.py +0 -677
- polars/sql/functions.py +0 -139
- polars/string_cache.py +0 -185
- polars/testing/__init__.py +0 -13
- polars/testing/asserts/__init__.py +0 -9
- polars/testing/asserts/frame.py +0 -231
- polars/testing/asserts/series.py +0 -219
- polars/testing/asserts/utils.py +0 -12
- polars/testing/parametric/__init__.py +0 -33
- polars/testing/parametric/profiles.py +0 -107
- polars/testing/parametric/strategies/__init__.py +0 -22
- polars/testing/parametric/strategies/_utils.py +0 -14
- polars/testing/parametric/strategies/core.py +0 -615
- polars/testing/parametric/strategies/data.py +0 -452
- polars/testing/parametric/strategies/dtype.py +0 -436
- polars/testing/parametric/strategies/legacy.py +0 -169
- polars/type_aliases.py +0 -24
- polars_runtime_compat-1.34.0b2.dist-info/RECORD +0 -203
- {polars_runtime_compat-1.34.0b2.dist-info → polars_runtime_compat-1.34.0b4.dist-info}/WHEEL +0 -0
- {polars_runtime_compat-1.34.0b2.dist-info → polars_runtime_compat-1.34.0b4.dist-info}/licenses/LICENSE +0 -0
|
@@ -1,615 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from collections.abc import Mapping
|
|
4
|
-
from dataclasses import dataclass
|
|
5
|
-
from typing import TYPE_CHECKING, Any, overload
|
|
6
|
-
|
|
7
|
-
import hypothesis.strategies as st
|
|
8
|
-
from hypothesis.errors import InvalidArgument
|
|
9
|
-
|
|
10
|
-
from polars import select, when
|
|
11
|
-
from polars._utils.deprecation import issue_deprecation_warning
|
|
12
|
-
from polars.dataframe import DataFrame
|
|
13
|
-
from polars.datatypes import Array, Boolean, DataType, DataTypeClass, List, Null, Struct
|
|
14
|
-
from polars.series import Series
|
|
15
|
-
from polars.testing.parametric.strategies._utils import flexhash
|
|
16
|
-
from polars.testing.parametric.strategies.data import data
|
|
17
|
-
from polars.testing.parametric.strategies.dtype import _instantiate_dtype, dtypes
|
|
18
|
-
|
|
19
|
-
if TYPE_CHECKING:
|
|
20
|
-
from collections.abc import Collection, Sequence
|
|
21
|
-
from typing import Literal
|
|
22
|
-
|
|
23
|
-
from hypothesis.strategies import DrawFn, SearchStrategy
|
|
24
|
-
|
|
25
|
-
from polars import LazyFrame
|
|
26
|
-
from polars._typing import PolarsDataType
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
_ROW_LIMIT = 5 # max generated frame/series length
|
|
30
|
-
_COL_LIMIT = 5 # max number of generated cols
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
@st.composite
|
|
34
|
-
def series(
|
|
35
|
-
draw: DrawFn,
|
|
36
|
-
/,
|
|
37
|
-
*,
|
|
38
|
-
name: str | SearchStrategy[str] | None = None,
|
|
39
|
-
dtype: PolarsDataType | None = None,
|
|
40
|
-
min_size: int = 0,
|
|
41
|
-
max_size: int = _ROW_LIMIT,
|
|
42
|
-
strategy: SearchStrategy[Any] | None = None,
|
|
43
|
-
allow_null: bool = True,
|
|
44
|
-
allow_chunks: bool = True,
|
|
45
|
-
allow_masked_out: bool = True,
|
|
46
|
-
unique: bool = False,
|
|
47
|
-
allowed_dtypes: Collection[PolarsDataType] | PolarsDataType | None = None,
|
|
48
|
-
excluded_dtypes: Collection[PolarsDataType] | PolarsDataType | None = None,
|
|
49
|
-
allow_time_zones: bool = True,
|
|
50
|
-
**kwargs: Any,
|
|
51
|
-
) -> Series:
|
|
52
|
-
"""
|
|
53
|
-
Hypothesis strategy for producing Polars Series.
|
|
54
|
-
|
|
55
|
-
.. warning::
|
|
56
|
-
This functionality is currently considered **unstable**. It may be
|
|
57
|
-
changed at any point without it being considered a breaking change.
|
|
58
|
-
|
|
59
|
-
Parameters
|
|
60
|
-
----------
|
|
61
|
-
name : {str, strategy}, optional
|
|
62
|
-
literal string or a strategy for strings (or None), passed to the Series
|
|
63
|
-
constructor name-param.
|
|
64
|
-
dtype : PolarsDataType, optional
|
|
65
|
-
a valid polars DataType for the resulting series.
|
|
66
|
-
min_size : int
|
|
67
|
-
if not passing an exact size, can set a minimum here (defaults to 0).
|
|
68
|
-
no-op if `size` is set.
|
|
69
|
-
max_size : int
|
|
70
|
-
if not passing an exact size, can set a maximum value here (defaults to
|
|
71
|
-
MAX_DATA_SIZE). no-op if `size` is set.
|
|
72
|
-
strategy : strategy, optional
|
|
73
|
-
supports overriding the default strategy for the given dtype.
|
|
74
|
-
allow_null : bool
|
|
75
|
-
Allow nulls as possible values and allow the `Null` data type by default.
|
|
76
|
-
allow_chunks : bool
|
|
77
|
-
Allow the Series to contain multiple chunks.
|
|
78
|
-
allow_masked_out : bool
|
|
79
|
-
Allow the nulls to contain masked out elements.
|
|
80
|
-
unique : bool, optional
|
|
81
|
-
indicate whether Series values should all be distinct.
|
|
82
|
-
allowed_dtypes : {list,set}, optional
|
|
83
|
-
when automatically generating Series data, allow only these dtypes.
|
|
84
|
-
excluded_dtypes : {list,set}, optional
|
|
85
|
-
when automatically generating Series data, exclude these dtypes.
|
|
86
|
-
allow_time_zones
|
|
87
|
-
Allow generating `Datetime` Series with a time zone.
|
|
88
|
-
**kwargs
|
|
89
|
-
Additional keyword arguments that are passed to the underlying data generation
|
|
90
|
-
strategies.
|
|
91
|
-
|
|
92
|
-
size : int, optional
|
|
93
|
-
if set, creates a Series of exactly this size (ignoring min_size/max_size
|
|
94
|
-
params).
|
|
95
|
-
|
|
96
|
-
.. deprecated:: 1.0.0
|
|
97
|
-
Use `min_size` and `max_size` instead.
|
|
98
|
-
|
|
99
|
-
null_probability : float
|
|
100
|
-
Percentage chance (expressed between 0.0 => 1.0) that any Series value is null.
|
|
101
|
-
This is applied independently of any None values generated by the underlying
|
|
102
|
-
strategy.
|
|
103
|
-
|
|
104
|
-
.. deprecated:: 0.20.26
|
|
105
|
-
Use `allow_null` instead.
|
|
106
|
-
|
|
107
|
-
allow_infinities : bool, optional
|
|
108
|
-
Allow generation of +/-inf values for floating-point dtypes.
|
|
109
|
-
|
|
110
|
-
.. deprecated:: 0.20.26
|
|
111
|
-
Use `allow_infinity` instead.
|
|
112
|
-
|
|
113
|
-
Notes
|
|
114
|
-
-----
|
|
115
|
-
In actual usage this is deployed as a unit test decorator, providing a strategy
|
|
116
|
-
that generates multiple Series with the given dtype/size characteristics for the
|
|
117
|
-
unit test. While developing a strategy/test, it can also be useful to call
|
|
118
|
-
`.example()` directly on a given strategy to see concrete instances of the
|
|
119
|
-
generated data.
|
|
120
|
-
|
|
121
|
-
Examples
|
|
122
|
-
--------
|
|
123
|
-
The strategy is generally used to generate series in a unit test:
|
|
124
|
-
|
|
125
|
-
>>> from polars.testing.parametric import series
|
|
126
|
-
>>> from hypothesis import given
|
|
127
|
-
>>> @given(s=series(min_size=3, max_size=5))
|
|
128
|
-
... def test_series_len(s: pl.Series) -> None:
|
|
129
|
-
... assert 3 <= s.len() <= 5
|
|
130
|
-
|
|
131
|
-
Drawing examples interactively is also possible with the `.example()` method.
|
|
132
|
-
This should be avoided while running tests.
|
|
133
|
-
|
|
134
|
-
>>> from polars.testing.parametric import lists
|
|
135
|
-
>>> s = series(strategy=lists(pl.String, select_from=["xx", "yy", "zz"]))
|
|
136
|
-
>>> s.example() # doctest: +SKIP
|
|
137
|
-
shape: (4,)
|
|
138
|
-
Series: '' [list[str]]
|
|
139
|
-
[
|
|
140
|
-
["zz", "zz"]
|
|
141
|
-
["zz", "xx", "yy"]
|
|
142
|
-
[]
|
|
143
|
-
["xx"]
|
|
144
|
-
]
|
|
145
|
-
"""
|
|
146
|
-
if (null_prob := kwargs.pop("null_probability", None)) is not None:
|
|
147
|
-
allow_null = _handle_null_probability_deprecation(null_prob) # type: ignore[assignment]
|
|
148
|
-
if (allow_inf := kwargs.pop("allow_infinities", None)) is not None:
|
|
149
|
-
issue_deprecation_warning(
|
|
150
|
-
"`allow_infinities` is deprecated. Use `allow_infinity` instead.",
|
|
151
|
-
version="0.20.26",
|
|
152
|
-
)
|
|
153
|
-
kwargs["allow_infinity"] = allow_inf
|
|
154
|
-
if (chunked := kwargs.pop("chunked", None)) is not None:
|
|
155
|
-
issue_deprecation_warning(
|
|
156
|
-
"`chunked` is deprecated. Use `allow_chunks` instead.",
|
|
157
|
-
version="0.20.26",
|
|
158
|
-
)
|
|
159
|
-
allow_chunks = chunked
|
|
160
|
-
if (size := kwargs.pop("size", None)) is not None:
|
|
161
|
-
issue_deprecation_warning(
|
|
162
|
-
"`size` is deprecated. Use `min_size` and `max_size` instead.",
|
|
163
|
-
version="1.0.0",
|
|
164
|
-
)
|
|
165
|
-
min_size = max_size = size
|
|
166
|
-
|
|
167
|
-
if isinstance(allowed_dtypes, (DataType, DataTypeClass)):
|
|
168
|
-
allowed_dtypes = [allowed_dtypes]
|
|
169
|
-
elif allowed_dtypes is not None:
|
|
170
|
-
allowed_dtypes = list(allowed_dtypes)
|
|
171
|
-
if isinstance(excluded_dtypes, (DataType, DataTypeClass)):
|
|
172
|
-
excluded_dtypes = [excluded_dtypes]
|
|
173
|
-
elif excluded_dtypes is not None:
|
|
174
|
-
excluded_dtypes = list(excluded_dtypes)
|
|
175
|
-
|
|
176
|
-
if not allow_null and not (allowed_dtypes is not None and Null in allowed_dtypes):
|
|
177
|
-
if excluded_dtypes is None:
|
|
178
|
-
excluded_dtypes = [Null]
|
|
179
|
-
else:
|
|
180
|
-
excluded_dtypes.append(Null)
|
|
181
|
-
|
|
182
|
-
if strategy is None:
|
|
183
|
-
if dtype is None:
|
|
184
|
-
dtype_strat = dtypes(
|
|
185
|
-
allowed_dtypes=allowed_dtypes,
|
|
186
|
-
excluded_dtypes=excluded_dtypes,
|
|
187
|
-
allow_time_zones=allow_time_zones,
|
|
188
|
-
)
|
|
189
|
-
else:
|
|
190
|
-
dtype_strat = _instantiate_dtype(
|
|
191
|
-
dtype,
|
|
192
|
-
allowed_dtypes=allowed_dtypes,
|
|
193
|
-
excluded_dtypes=excluded_dtypes,
|
|
194
|
-
allow_time_zones=allow_time_zones,
|
|
195
|
-
)
|
|
196
|
-
dtype = draw(dtype_strat)
|
|
197
|
-
|
|
198
|
-
if min_size == max_size:
|
|
199
|
-
size = min_size
|
|
200
|
-
else:
|
|
201
|
-
size = draw(st.integers(min_value=min_size, max_value=max_size))
|
|
202
|
-
|
|
203
|
-
if isinstance(name, st.SearchStrategy):
|
|
204
|
-
name = draw(name)
|
|
205
|
-
|
|
206
|
-
do_mask_out = (
|
|
207
|
-
allow_masked_out
|
|
208
|
-
and allow_null
|
|
209
|
-
and isinstance(dtype, (List, Array, Struct))
|
|
210
|
-
and draw(st.booleans())
|
|
211
|
-
)
|
|
212
|
-
|
|
213
|
-
if size == 0:
|
|
214
|
-
values = []
|
|
215
|
-
else:
|
|
216
|
-
# Create series using dtype-specific strategy to generate values
|
|
217
|
-
if strategy is None:
|
|
218
|
-
strategy = data(
|
|
219
|
-
dtype, # type: ignore[arg-type]
|
|
220
|
-
allow_null=allow_null and not do_mask_out,
|
|
221
|
-
**kwargs,
|
|
222
|
-
)
|
|
223
|
-
|
|
224
|
-
values = draw(
|
|
225
|
-
st.lists(
|
|
226
|
-
strategy,
|
|
227
|
-
min_size=size,
|
|
228
|
-
max_size=size,
|
|
229
|
-
unique_by=(flexhash if unique else None),
|
|
230
|
-
)
|
|
231
|
-
)
|
|
232
|
-
|
|
233
|
-
s = Series(name=name, values=values, dtype=dtype)
|
|
234
|
-
|
|
235
|
-
# Apply masking out of values
|
|
236
|
-
if do_mask_out:
|
|
237
|
-
values = draw(
|
|
238
|
-
st.lists(
|
|
239
|
-
st.booleans(),
|
|
240
|
-
min_size=size,
|
|
241
|
-
max_size=size,
|
|
242
|
-
unique_by=(flexhash if unique else None),
|
|
243
|
-
)
|
|
244
|
-
)
|
|
245
|
-
|
|
246
|
-
mask = Series(name=None, values=values, dtype=Boolean)
|
|
247
|
-
s = select(when(mask).then(s).alias(s.name)).to_series()
|
|
248
|
-
|
|
249
|
-
# Apply chunking
|
|
250
|
-
if allow_chunks and size > 1 and draw(st.booleans()):
|
|
251
|
-
split_at = size // 2
|
|
252
|
-
s = s[:split_at].append(s[split_at:])
|
|
253
|
-
|
|
254
|
-
return s
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
@overload
|
|
258
|
-
def dataframes(
|
|
259
|
-
cols: int | column | Sequence[column] | None = None,
|
|
260
|
-
*,
|
|
261
|
-
lazy: Literal[False] = ...,
|
|
262
|
-
min_cols: int = 0,
|
|
263
|
-
max_cols: int = _COL_LIMIT,
|
|
264
|
-
min_size: int = 0,
|
|
265
|
-
max_size: int = _ROW_LIMIT,
|
|
266
|
-
include_cols: Sequence[column] | column | None = None,
|
|
267
|
-
allow_null: bool | Mapping[str, bool] = True,
|
|
268
|
-
allow_chunks: bool = True,
|
|
269
|
-
allow_masked_out: bool = True,
|
|
270
|
-
allowed_dtypes: Collection[PolarsDataType] | PolarsDataType | None = None,
|
|
271
|
-
excluded_dtypes: Collection[PolarsDataType] | PolarsDataType | None = None,
|
|
272
|
-
allow_time_zones: bool = True,
|
|
273
|
-
**kwargs: Any,
|
|
274
|
-
) -> SearchStrategy[DataFrame]: ...
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
@overload
|
|
278
|
-
def dataframes(
|
|
279
|
-
cols: int | column | Sequence[column] | None = None,
|
|
280
|
-
*,
|
|
281
|
-
lazy: Literal[True],
|
|
282
|
-
min_cols: int = 0,
|
|
283
|
-
max_cols: int = _COL_LIMIT,
|
|
284
|
-
min_size: int = 0,
|
|
285
|
-
max_size: int = _ROW_LIMIT,
|
|
286
|
-
include_cols: Sequence[column] | column | None = None,
|
|
287
|
-
allow_null: bool | Mapping[str, bool] = True,
|
|
288
|
-
allow_chunks: bool = True,
|
|
289
|
-
allow_masked_out: bool = True,
|
|
290
|
-
allowed_dtypes: Collection[PolarsDataType] | PolarsDataType | None = None,
|
|
291
|
-
excluded_dtypes: Collection[PolarsDataType] | PolarsDataType | None = None,
|
|
292
|
-
allow_time_zones: bool = True,
|
|
293
|
-
**kwargs: Any,
|
|
294
|
-
) -> SearchStrategy[LazyFrame]: ...
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
@st.composite
|
|
298
|
-
def dataframes(
|
|
299
|
-
draw: DrawFn,
|
|
300
|
-
/,
|
|
301
|
-
cols: int | column | Sequence[column] | None = None,
|
|
302
|
-
*,
|
|
303
|
-
lazy: bool = False,
|
|
304
|
-
min_cols: int = 1,
|
|
305
|
-
max_cols: int = _COL_LIMIT,
|
|
306
|
-
min_size: int = 0,
|
|
307
|
-
max_size: int = _ROW_LIMIT,
|
|
308
|
-
include_cols: Sequence[column] | column | None = None,
|
|
309
|
-
allow_null: bool | Mapping[str, bool] = True,
|
|
310
|
-
allow_chunks: bool = True,
|
|
311
|
-
allow_masked_out: bool = True,
|
|
312
|
-
allowed_dtypes: Collection[PolarsDataType] | PolarsDataType | None = None,
|
|
313
|
-
excluded_dtypes: Collection[PolarsDataType] | PolarsDataType | None = None,
|
|
314
|
-
allow_time_zones: bool = True,
|
|
315
|
-
**kwargs: Any,
|
|
316
|
-
) -> DataFrame | LazyFrame:
|
|
317
|
-
"""
|
|
318
|
-
Hypothesis strategy for producing Polars DataFrames or LazyFrames.
|
|
319
|
-
|
|
320
|
-
.. warning::
|
|
321
|
-
This functionality is currently considered **unstable**. It may be
|
|
322
|
-
changed at any point without it being considered a breaking change.
|
|
323
|
-
|
|
324
|
-
Parameters
|
|
325
|
-
----------
|
|
326
|
-
cols : {int, columns}, optional
|
|
327
|
-
integer number of columns to create, or a sequence of `column` objects
|
|
328
|
-
that describe the desired DataFrame column data.
|
|
329
|
-
lazy : bool, optional
|
|
330
|
-
produce a LazyFrame instead of a DataFrame.
|
|
331
|
-
min_cols : int, optional
|
|
332
|
-
if not passing an exact size, can set a minimum here (defaults to 0).
|
|
333
|
-
max_cols : int, optional
|
|
334
|
-
if not passing an exact size, can set a maximum value here (defaults to
|
|
335
|
-
MAX_COLS).
|
|
336
|
-
min_size : int, optional
|
|
337
|
-
if not passing an exact size, set the minimum number of rows in the
|
|
338
|
-
DataFrame.
|
|
339
|
-
max_size : int, optional
|
|
340
|
-
if not passing an exact size, set the maximum number of rows in the
|
|
341
|
-
DataFrame.
|
|
342
|
-
include_cols : [column], optional
|
|
343
|
-
a list of `column` objects to include in the generated DataFrame. note that
|
|
344
|
-
explicitly provided columns are appended onto the list of existing columns
|
|
345
|
-
(if any present).
|
|
346
|
-
allow_null : bool or Mapping[str, bool]
|
|
347
|
-
Allow nulls as possible values and allow the `Null` data type by default.
|
|
348
|
-
Accepts either a boolean or a mapping of column names to booleans.
|
|
349
|
-
allow_chunks : bool
|
|
350
|
-
Allow the DataFrame to contain multiple chunks.
|
|
351
|
-
allow_masked_out : bool
|
|
352
|
-
Allow the nulls to contain masked out elements.
|
|
353
|
-
allowed_dtypes : {list,set}, optional
|
|
354
|
-
when automatically generating data, allow only these dtypes.
|
|
355
|
-
excluded_dtypes : {list,set}, optional
|
|
356
|
-
when automatically generating data, exclude these dtypes.
|
|
357
|
-
allow_time_zones
|
|
358
|
-
Allow generating `Datetime` columns with a time zone.
|
|
359
|
-
**kwargs
|
|
360
|
-
Additional keyword arguments that are passed to the underlying data generation
|
|
361
|
-
strategies.
|
|
362
|
-
|
|
363
|
-
size : int, optional
|
|
364
|
-
if set, will create a DataFrame of exactly this size (and ignore
|
|
365
|
-
the min_size/max_size len params).
|
|
366
|
-
|
|
367
|
-
.. deprecated:: 1.0.0
|
|
368
|
-
Use `min_size` and `max_size` instead.
|
|
369
|
-
|
|
370
|
-
null_probability : {float, dict[str,float]}, optional
|
|
371
|
-
percentage chance (expressed between 0.0 => 1.0) that a generated value is
|
|
372
|
-
None. this is applied independently of any None values generated by the
|
|
373
|
-
underlying strategy, and can be applied either on a per-column basis (if
|
|
374
|
-
given as a `{col:pct}` dict), or globally. if null_probability is defined
|
|
375
|
-
on a column, it takes precedence over the global value.
|
|
376
|
-
|
|
377
|
-
.. deprecated:: 0.20.26
|
|
378
|
-
Use `allow_null` instead.
|
|
379
|
-
|
|
380
|
-
allow_infinities : bool, optional
|
|
381
|
-
optionally disallow generation of +/-inf values for floating-point dtypes.
|
|
382
|
-
|
|
383
|
-
.. deprecated:: 0.20.26
|
|
384
|
-
Use `allow_infinity` instead.
|
|
385
|
-
|
|
386
|
-
Notes
|
|
387
|
-
-----
|
|
388
|
-
In actual usage this is deployed as a unit test decorator, providing a strategy
|
|
389
|
-
that generates DataFrames or LazyFrames with the given characteristics for
|
|
390
|
-
the unit test. While developing a strategy/test, it can also be useful to
|
|
391
|
-
call `.example()` directly on a given strategy to see concrete instances of
|
|
392
|
-
the generated data.
|
|
393
|
-
|
|
394
|
-
Examples
|
|
395
|
-
--------
|
|
396
|
-
The strategy is generally used to generate series in a unit test:
|
|
397
|
-
|
|
398
|
-
>>> from polars.testing.parametric import dataframes
|
|
399
|
-
>>> from hypothesis import given
|
|
400
|
-
>>> @given(df=dataframes(min_size=3, max_size=5))
|
|
401
|
-
... def test_df_height(df: pl.DataFrame) -> None:
|
|
402
|
-
... assert 3 <= df.height <= 5
|
|
403
|
-
|
|
404
|
-
Drawing examples interactively is also possible with the `.example()` method.
|
|
405
|
-
This should be avoided while running tests.
|
|
406
|
-
|
|
407
|
-
>>> df = dataframes(allowed_dtypes=[pl.Datetime, pl.Float64], max_cols=3)
|
|
408
|
-
>>> df.example() # doctest: +SKIP
|
|
409
|
-
shape: (3, 3)
|
|
410
|
-
┌─────────────┬────────────────────────────┬───────────┐
|
|
411
|
-
│ col0 ┆ col1 ┆ col2 │
|
|
412
|
-
│ --- ┆ --- ┆ --- │
|
|
413
|
-
│ f64 ┆ datetime[ns] ┆ f64 │
|
|
414
|
-
╞═════════════╪════════════════════════════╪═══════════╡
|
|
415
|
-
│ NaN ┆ 1844-07-05 06:19:48.848808 ┆ 3.1436e16 │
|
|
416
|
-
│ -1.9914e218 ┆ 2068-12-01 23:05:11.412277 ┆ 2.7415e16 │
|
|
417
|
-
│ 0.5 ┆ 2095-11-19 22:05:17.647961 ┆ -0.5 │
|
|
418
|
-
└─────────────┴────────────────────────────┴───────────┘
|
|
419
|
-
|
|
420
|
-
Use :class:`column` for more control over which exactly which columns are generated.
|
|
421
|
-
|
|
422
|
-
>>> from polars.testing.parametric import column
|
|
423
|
-
>>> dfs = dataframes(
|
|
424
|
-
... [
|
|
425
|
-
... column("x", dtype=pl.Int32),
|
|
426
|
-
... column("y", dtype=pl.Float64),
|
|
427
|
-
... ],
|
|
428
|
-
... min_size=2,
|
|
429
|
-
... max_size=2,
|
|
430
|
-
... )
|
|
431
|
-
>>> dfs.example() # doctest: +SKIP
|
|
432
|
-
shape: (2, 2)
|
|
433
|
-
┌───────────┬────────────┐
|
|
434
|
-
│ x ┆ y │
|
|
435
|
-
│ --- ┆ --- │
|
|
436
|
-
│ i32 ┆ f64 │
|
|
437
|
-
╞═══════════╪════════════╡
|
|
438
|
-
│ -15836 ┆ 1.1755e-38 │
|
|
439
|
-
│ 575050513 ┆ NaN │
|
|
440
|
-
└───────────┴────────────┘
|
|
441
|
-
"""
|
|
442
|
-
if (null_prob := kwargs.pop("null_probability", None)) is not None:
|
|
443
|
-
allow_null = _handle_null_probability_deprecation(null_prob)
|
|
444
|
-
if (allow_inf := kwargs.pop("allow_infinities", None)) is not None:
|
|
445
|
-
issue_deprecation_warning(
|
|
446
|
-
"`allow_infinities` is deprecated. Use `allow_infinity` instead.",
|
|
447
|
-
version="0.20.26",
|
|
448
|
-
)
|
|
449
|
-
kwargs["allow_infinity"] = allow_inf
|
|
450
|
-
if (chunked := kwargs.pop("chunked", None)) is not None:
|
|
451
|
-
issue_deprecation_warning(
|
|
452
|
-
"`chunked` is deprecated. Use `allow_chunks` instead.",
|
|
453
|
-
version="0.20.26",
|
|
454
|
-
)
|
|
455
|
-
allow_chunks = chunked
|
|
456
|
-
if (size := kwargs.pop("size", None)) is not None:
|
|
457
|
-
issue_deprecation_warning(
|
|
458
|
-
"`size` is deprecated. Use `min_size` and `max_size` instead.",
|
|
459
|
-
version="1.0.0",
|
|
460
|
-
)
|
|
461
|
-
min_size = max_size = size
|
|
462
|
-
|
|
463
|
-
if isinstance(include_cols, column):
|
|
464
|
-
include_cols = [include_cols]
|
|
465
|
-
|
|
466
|
-
if cols is None:
|
|
467
|
-
n_cols = draw(st.integers(min_value=min_cols, max_value=max_cols))
|
|
468
|
-
cols = [column() for _ in range(n_cols)]
|
|
469
|
-
elif isinstance(cols, int):
|
|
470
|
-
cols = [column() for _ in range(cols)]
|
|
471
|
-
elif isinstance(cols, column):
|
|
472
|
-
cols = [cols]
|
|
473
|
-
else:
|
|
474
|
-
cols = list(cols)
|
|
475
|
-
|
|
476
|
-
if include_cols:
|
|
477
|
-
cols.extend(list(include_cols))
|
|
478
|
-
|
|
479
|
-
if min_size == max_size:
|
|
480
|
-
size = min_size
|
|
481
|
-
else:
|
|
482
|
-
size = draw(st.integers(min_value=min_size, max_value=max_size))
|
|
483
|
-
|
|
484
|
-
# Process columns
|
|
485
|
-
for idx, c in enumerate(cols):
|
|
486
|
-
if c.name is None:
|
|
487
|
-
c.name = f"col{idx}"
|
|
488
|
-
if c.allow_null is None:
|
|
489
|
-
if isinstance(allow_null, Mapping):
|
|
490
|
-
c.allow_null = allow_null.get(c.name, True)
|
|
491
|
-
else:
|
|
492
|
-
c.allow_null = allow_null
|
|
493
|
-
|
|
494
|
-
allow_series_chunks = draw(st.booleans()) if allow_chunks else False
|
|
495
|
-
|
|
496
|
-
data = {
|
|
497
|
-
c.name: draw(
|
|
498
|
-
series(
|
|
499
|
-
name=c.name,
|
|
500
|
-
dtype=c.dtype,
|
|
501
|
-
min_size=size,
|
|
502
|
-
max_size=size,
|
|
503
|
-
strategy=c.strategy,
|
|
504
|
-
allow_null=c.allow_null, # type: ignore[arg-type]
|
|
505
|
-
allow_chunks=allow_series_chunks,
|
|
506
|
-
allow_masked_out=allow_masked_out,
|
|
507
|
-
unique=c.unique,
|
|
508
|
-
allowed_dtypes=allowed_dtypes,
|
|
509
|
-
excluded_dtypes=excluded_dtypes,
|
|
510
|
-
allow_time_zones=allow_time_zones,
|
|
511
|
-
**kwargs,
|
|
512
|
-
)
|
|
513
|
-
)
|
|
514
|
-
for c in cols
|
|
515
|
-
}
|
|
516
|
-
|
|
517
|
-
df = DataFrame(data)
|
|
518
|
-
|
|
519
|
-
# Apply chunking
|
|
520
|
-
if allow_chunks and size > 1 and not allow_series_chunks and draw(st.booleans()):
|
|
521
|
-
split_at = size // 2
|
|
522
|
-
df = df[:split_at].vstack(df[split_at:])
|
|
523
|
-
|
|
524
|
-
if lazy:
|
|
525
|
-
return df.lazy()
|
|
526
|
-
|
|
527
|
-
return df
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
@dataclass
|
|
531
|
-
class column:
|
|
532
|
-
"""
|
|
533
|
-
Define a column for use with the `dataframes` strategy.
|
|
534
|
-
|
|
535
|
-
.. warning::
|
|
536
|
-
This functionality is currently considered **unstable**. It may be
|
|
537
|
-
changed at any point without it being considered a breaking change.
|
|
538
|
-
|
|
539
|
-
Parameters
|
|
540
|
-
----------
|
|
541
|
-
name : str
|
|
542
|
-
string column name.
|
|
543
|
-
dtype : PolarsDataType
|
|
544
|
-
a polars dtype.
|
|
545
|
-
strategy : strategy, optional
|
|
546
|
-
supports overriding the default strategy for the given dtype.
|
|
547
|
-
allow_null : bool, optional
|
|
548
|
-
Allow nulls as possible values and allow the `Null` data type by default.
|
|
549
|
-
unique : bool, optional
|
|
550
|
-
flag indicating that all values generated for the column should be unique.
|
|
551
|
-
|
|
552
|
-
null_probability : float, optional
|
|
553
|
-
percentage chance (expressed between 0.0 => 1.0) that a generated value is
|
|
554
|
-
None. this is applied independently of any None values generated by the
|
|
555
|
-
underlying strategy.
|
|
556
|
-
|
|
557
|
-
.. deprecated:: 0.20.26
|
|
558
|
-
Use `allow_null` instead.
|
|
559
|
-
|
|
560
|
-
Examples
|
|
561
|
-
--------
|
|
562
|
-
>>> from polars.testing.parametric import column
|
|
563
|
-
>>> dfs = dataframes(
|
|
564
|
-
... [
|
|
565
|
-
... column("x", dtype=pl.Int32, allow_null=True),
|
|
566
|
-
... column("y", dtype=pl.Float64),
|
|
567
|
-
... ],
|
|
568
|
-
... size=2,
|
|
569
|
-
... )
|
|
570
|
-
>>> dfs.example() # doctest: +SKIP
|
|
571
|
-
shape: (2, 2)
|
|
572
|
-
┌───────────┬────────────┐
|
|
573
|
-
│ x ┆ y │
|
|
574
|
-
│ --- ┆ --- │
|
|
575
|
-
│ i32 ┆ f64 │
|
|
576
|
-
╞═══════════╪════════════╡
|
|
577
|
-
│ null ┆ 1.1755e-38 │
|
|
578
|
-
│ 575050513 ┆ inf │
|
|
579
|
-
└───────────┴────────────┘
|
|
580
|
-
"""
|
|
581
|
-
|
|
582
|
-
name: str | None = None
|
|
583
|
-
dtype: PolarsDataType | None = None
|
|
584
|
-
strategy: SearchStrategy[Any] | None = None
|
|
585
|
-
allow_null: bool | None = None
|
|
586
|
-
unique: bool = False
|
|
587
|
-
|
|
588
|
-
null_probability: float | None = None
|
|
589
|
-
|
|
590
|
-
def __post_init__(self) -> None:
|
|
591
|
-
if self.null_probability is not None:
|
|
592
|
-
self.allow_null = _handle_null_probability_deprecation( # type: ignore[assignment]
|
|
593
|
-
self.null_probability
|
|
594
|
-
)
|
|
595
|
-
|
|
596
|
-
|
|
597
|
-
def _handle_null_probability_deprecation(
|
|
598
|
-
null_probability: float | Mapping[str, float],
|
|
599
|
-
) -> bool | dict[str, bool]:
|
|
600
|
-
issue_deprecation_warning(
|
|
601
|
-
"`null_probability` is deprecated. Use `allow_null` instead.",
|
|
602
|
-
version="0.20.26",
|
|
603
|
-
)
|
|
604
|
-
|
|
605
|
-
def prob_to_bool(prob: float) -> bool:
|
|
606
|
-
if not (0.0 <= prob <= 1.0):
|
|
607
|
-
msg = f"`null_probability` should be between 0.0 and 1.0, got {prob!r}"
|
|
608
|
-
raise InvalidArgument(msg)
|
|
609
|
-
|
|
610
|
-
return bool(prob)
|
|
611
|
-
|
|
612
|
-
if isinstance(null_probability, Mapping):
|
|
613
|
-
return {col: prob_to_bool(prob) for col, prob in null_probability.items()}
|
|
614
|
-
else:
|
|
615
|
-
return prob_to_bool(null_probability)
|