polars-runtime-compat 1.34.0b2__cp39-abi3-macosx_10_12_x86_64.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of polars-runtime-compat might be problematic. Click here for more details.

Files changed (203) hide show
  1. _polars_runtime_compat/.gitkeep +0 -0
  2. _polars_runtime_compat/_polars_runtime_compat.abi3.so +0 -0
  3. polars/__init__.py +528 -0
  4. polars/_cpu_check.py +265 -0
  5. polars/_dependencies.py +355 -0
  6. polars/_plr.py +99 -0
  7. polars/_plr.pyi +2496 -0
  8. polars/_reexport.py +23 -0
  9. polars/_typing.py +478 -0
  10. polars/_utils/__init__.py +37 -0
  11. polars/_utils/async_.py +102 -0
  12. polars/_utils/cache.py +176 -0
  13. polars/_utils/cloud.py +40 -0
  14. polars/_utils/constants.py +29 -0
  15. polars/_utils/construction/__init__.py +46 -0
  16. polars/_utils/construction/dataframe.py +1397 -0
  17. polars/_utils/construction/other.py +72 -0
  18. polars/_utils/construction/series.py +560 -0
  19. polars/_utils/construction/utils.py +118 -0
  20. polars/_utils/convert.py +224 -0
  21. polars/_utils/deprecation.py +406 -0
  22. polars/_utils/getitem.py +457 -0
  23. polars/_utils/logging.py +11 -0
  24. polars/_utils/nest_asyncio.py +264 -0
  25. polars/_utils/parquet.py +15 -0
  26. polars/_utils/parse/__init__.py +12 -0
  27. polars/_utils/parse/expr.py +242 -0
  28. polars/_utils/polars_version.py +19 -0
  29. polars/_utils/pycapsule.py +53 -0
  30. polars/_utils/scan.py +27 -0
  31. polars/_utils/serde.py +63 -0
  32. polars/_utils/slice.py +215 -0
  33. polars/_utils/udfs.py +1251 -0
  34. polars/_utils/unstable.py +63 -0
  35. polars/_utils/various.py +782 -0
  36. polars/_utils/wrap.py +25 -0
  37. polars/api.py +370 -0
  38. polars/catalog/__init__.py +0 -0
  39. polars/catalog/unity/__init__.py +19 -0
  40. polars/catalog/unity/client.py +733 -0
  41. polars/catalog/unity/models.py +152 -0
  42. polars/config.py +1571 -0
  43. polars/convert/__init__.py +25 -0
  44. polars/convert/general.py +1046 -0
  45. polars/convert/normalize.py +261 -0
  46. polars/dataframe/__init__.py +5 -0
  47. polars/dataframe/_html.py +186 -0
  48. polars/dataframe/frame.py +12582 -0
  49. polars/dataframe/group_by.py +1067 -0
  50. polars/dataframe/plotting.py +257 -0
  51. polars/datatype_expr/__init__.py +5 -0
  52. polars/datatype_expr/array.py +56 -0
  53. polars/datatype_expr/datatype_expr.py +304 -0
  54. polars/datatype_expr/list.py +18 -0
  55. polars/datatype_expr/struct.py +69 -0
  56. polars/datatypes/__init__.py +122 -0
  57. polars/datatypes/_parse.py +195 -0
  58. polars/datatypes/_utils.py +48 -0
  59. polars/datatypes/classes.py +1213 -0
  60. polars/datatypes/constants.py +11 -0
  61. polars/datatypes/constructor.py +172 -0
  62. polars/datatypes/convert.py +366 -0
  63. polars/datatypes/group.py +130 -0
  64. polars/exceptions.py +230 -0
  65. polars/expr/__init__.py +7 -0
  66. polars/expr/array.py +964 -0
  67. polars/expr/binary.py +346 -0
  68. polars/expr/categorical.py +306 -0
  69. polars/expr/datetime.py +2620 -0
  70. polars/expr/expr.py +11272 -0
  71. polars/expr/list.py +1408 -0
  72. polars/expr/meta.py +444 -0
  73. polars/expr/name.py +321 -0
  74. polars/expr/string.py +3045 -0
  75. polars/expr/struct.py +357 -0
  76. polars/expr/whenthen.py +185 -0
  77. polars/functions/__init__.py +193 -0
  78. polars/functions/aggregation/__init__.py +33 -0
  79. polars/functions/aggregation/horizontal.py +298 -0
  80. polars/functions/aggregation/vertical.py +341 -0
  81. polars/functions/as_datatype.py +848 -0
  82. polars/functions/business.py +138 -0
  83. polars/functions/col.py +384 -0
  84. polars/functions/datatype.py +121 -0
  85. polars/functions/eager.py +524 -0
  86. polars/functions/escape_regex.py +29 -0
  87. polars/functions/lazy.py +2751 -0
  88. polars/functions/len.py +68 -0
  89. polars/functions/lit.py +210 -0
  90. polars/functions/random.py +22 -0
  91. polars/functions/range/__init__.py +19 -0
  92. polars/functions/range/_utils.py +15 -0
  93. polars/functions/range/date_range.py +303 -0
  94. polars/functions/range/datetime_range.py +370 -0
  95. polars/functions/range/int_range.py +348 -0
  96. polars/functions/range/linear_space.py +311 -0
  97. polars/functions/range/time_range.py +287 -0
  98. polars/functions/repeat.py +301 -0
  99. polars/functions/whenthen.py +353 -0
  100. polars/interchange/__init__.py +10 -0
  101. polars/interchange/buffer.py +77 -0
  102. polars/interchange/column.py +190 -0
  103. polars/interchange/dataframe.py +230 -0
  104. polars/interchange/from_dataframe.py +328 -0
  105. polars/interchange/protocol.py +303 -0
  106. polars/interchange/utils.py +170 -0
  107. polars/io/__init__.py +64 -0
  108. polars/io/_utils.py +317 -0
  109. polars/io/avro.py +49 -0
  110. polars/io/clipboard.py +36 -0
  111. polars/io/cloud/__init__.py +17 -0
  112. polars/io/cloud/_utils.py +80 -0
  113. polars/io/cloud/credential_provider/__init__.py +17 -0
  114. polars/io/cloud/credential_provider/_builder.py +520 -0
  115. polars/io/cloud/credential_provider/_providers.py +618 -0
  116. polars/io/csv/__init__.py +9 -0
  117. polars/io/csv/_utils.py +38 -0
  118. polars/io/csv/batched_reader.py +142 -0
  119. polars/io/csv/functions.py +1495 -0
  120. polars/io/database/__init__.py +6 -0
  121. polars/io/database/_arrow_registry.py +70 -0
  122. polars/io/database/_cursor_proxies.py +147 -0
  123. polars/io/database/_executor.py +578 -0
  124. polars/io/database/_inference.py +314 -0
  125. polars/io/database/_utils.py +144 -0
  126. polars/io/database/functions.py +516 -0
  127. polars/io/delta.py +499 -0
  128. polars/io/iceberg/__init__.py +3 -0
  129. polars/io/iceberg/_utils.py +697 -0
  130. polars/io/iceberg/dataset.py +556 -0
  131. polars/io/iceberg/functions.py +151 -0
  132. polars/io/ipc/__init__.py +8 -0
  133. polars/io/ipc/functions.py +514 -0
  134. polars/io/json/__init__.py +3 -0
  135. polars/io/json/read.py +101 -0
  136. polars/io/ndjson.py +332 -0
  137. polars/io/parquet/__init__.py +17 -0
  138. polars/io/parquet/field_overwrites.py +140 -0
  139. polars/io/parquet/functions.py +722 -0
  140. polars/io/partition.py +491 -0
  141. polars/io/plugins.py +187 -0
  142. polars/io/pyarrow_dataset/__init__.py +5 -0
  143. polars/io/pyarrow_dataset/anonymous_scan.py +109 -0
  144. polars/io/pyarrow_dataset/functions.py +79 -0
  145. polars/io/scan_options/__init__.py +5 -0
  146. polars/io/scan_options/_options.py +59 -0
  147. polars/io/scan_options/cast_options.py +126 -0
  148. polars/io/spreadsheet/__init__.py +6 -0
  149. polars/io/spreadsheet/_utils.py +52 -0
  150. polars/io/spreadsheet/_write_utils.py +647 -0
  151. polars/io/spreadsheet/functions.py +1323 -0
  152. polars/lazyframe/__init__.py +9 -0
  153. polars/lazyframe/engine_config.py +61 -0
  154. polars/lazyframe/frame.py +8564 -0
  155. polars/lazyframe/group_by.py +669 -0
  156. polars/lazyframe/in_process.py +42 -0
  157. polars/lazyframe/opt_flags.py +333 -0
  158. polars/meta/__init__.py +14 -0
  159. polars/meta/build.py +33 -0
  160. polars/meta/index_type.py +27 -0
  161. polars/meta/thread_pool.py +50 -0
  162. polars/meta/versions.py +120 -0
  163. polars/ml/__init__.py +0 -0
  164. polars/ml/torch.py +213 -0
  165. polars/ml/utilities.py +30 -0
  166. polars/plugins.py +155 -0
  167. polars/py.typed +0 -0
  168. polars/pyproject.toml +96 -0
  169. polars/schema.py +265 -0
  170. polars/selectors.py +3117 -0
  171. polars/series/__init__.py +5 -0
  172. polars/series/array.py +776 -0
  173. polars/series/binary.py +254 -0
  174. polars/series/categorical.py +246 -0
  175. polars/series/datetime.py +2275 -0
  176. polars/series/list.py +1087 -0
  177. polars/series/plotting.py +191 -0
  178. polars/series/series.py +9197 -0
  179. polars/series/string.py +2367 -0
  180. polars/series/struct.py +154 -0
  181. polars/series/utils.py +191 -0
  182. polars/sql/__init__.py +7 -0
  183. polars/sql/context.py +677 -0
  184. polars/sql/functions.py +139 -0
  185. polars/string_cache.py +185 -0
  186. polars/testing/__init__.py +13 -0
  187. polars/testing/asserts/__init__.py +9 -0
  188. polars/testing/asserts/frame.py +231 -0
  189. polars/testing/asserts/series.py +219 -0
  190. polars/testing/asserts/utils.py +12 -0
  191. polars/testing/parametric/__init__.py +33 -0
  192. polars/testing/parametric/profiles.py +107 -0
  193. polars/testing/parametric/strategies/__init__.py +22 -0
  194. polars/testing/parametric/strategies/_utils.py +14 -0
  195. polars/testing/parametric/strategies/core.py +615 -0
  196. polars/testing/parametric/strategies/data.py +452 -0
  197. polars/testing/parametric/strategies/dtype.py +436 -0
  198. polars/testing/parametric/strategies/legacy.py +169 -0
  199. polars/type_aliases.py +24 -0
  200. polars_runtime_compat-1.34.0b2.dist-info/METADATA +190 -0
  201. polars_runtime_compat-1.34.0b2.dist-info/RECORD +203 -0
  202. polars_runtime_compat-1.34.0b2.dist-info/WHEEL +4 -0
  203. polars_runtime_compat-1.34.0b2.dist-info/licenses/LICENSE +20 -0
@@ -0,0 +1,615 @@
1
+ from __future__ import annotations
2
+
3
+ from collections.abc import Mapping
4
+ from dataclasses import dataclass
5
+ from typing import TYPE_CHECKING, Any, overload
6
+
7
+ import hypothesis.strategies as st
8
+ from hypothesis.errors import InvalidArgument
9
+
10
+ from polars import select, when
11
+ from polars._utils.deprecation import issue_deprecation_warning
12
+ from polars.dataframe import DataFrame
13
+ from polars.datatypes import Array, Boolean, DataType, DataTypeClass, List, Null, Struct
14
+ from polars.series import Series
15
+ from polars.testing.parametric.strategies._utils import flexhash
16
+ from polars.testing.parametric.strategies.data import data
17
+ from polars.testing.parametric.strategies.dtype import _instantiate_dtype, dtypes
18
+
19
+ if TYPE_CHECKING:
20
+ from collections.abc import Collection, Sequence
21
+ from typing import Literal
22
+
23
+ from hypothesis.strategies import DrawFn, SearchStrategy
24
+
25
+ from polars import LazyFrame
26
+ from polars._typing import PolarsDataType
27
+
28
+
29
+ _ROW_LIMIT = 5 # max generated frame/series length
30
+ _COL_LIMIT = 5 # max number of generated cols
31
+
32
+
33
+ @st.composite
34
+ def series(
35
+ draw: DrawFn,
36
+ /,
37
+ *,
38
+ name: str | SearchStrategy[str] | None = None,
39
+ dtype: PolarsDataType | None = None,
40
+ min_size: int = 0,
41
+ max_size: int = _ROW_LIMIT,
42
+ strategy: SearchStrategy[Any] | None = None,
43
+ allow_null: bool = True,
44
+ allow_chunks: bool = True,
45
+ allow_masked_out: bool = True,
46
+ unique: bool = False,
47
+ allowed_dtypes: Collection[PolarsDataType] | PolarsDataType | None = None,
48
+ excluded_dtypes: Collection[PolarsDataType] | PolarsDataType | None = None,
49
+ allow_time_zones: bool = True,
50
+ **kwargs: Any,
51
+ ) -> Series:
52
+ """
53
+ Hypothesis strategy for producing Polars Series.
54
+
55
+ .. warning::
56
+ This functionality is currently considered **unstable**. It may be
57
+ changed at any point without it being considered a breaking change.
58
+
59
+ Parameters
60
+ ----------
61
+ name : {str, strategy}, optional
62
+ literal string or a strategy for strings (or None), passed to the Series
63
+ constructor name-param.
64
+ dtype : PolarsDataType, optional
65
+ a valid polars DataType for the resulting series.
66
+ min_size : int
67
+ if not passing an exact size, can set a minimum here (defaults to 0).
68
+ no-op if `size` is set.
69
+ max_size : int
70
+ if not passing an exact size, can set a maximum value here (defaults to
71
+ MAX_DATA_SIZE). no-op if `size` is set.
72
+ strategy : strategy, optional
73
+ supports overriding the default strategy for the given dtype.
74
+ allow_null : bool
75
+ Allow nulls as possible values and allow the `Null` data type by default.
76
+ allow_chunks : bool
77
+ Allow the Series to contain multiple chunks.
78
+ allow_masked_out : bool
79
+ Allow the nulls to contain masked out elements.
80
+ unique : bool, optional
81
+ indicate whether Series values should all be distinct.
82
+ allowed_dtypes : {list,set}, optional
83
+ when automatically generating Series data, allow only these dtypes.
84
+ excluded_dtypes : {list,set}, optional
85
+ when automatically generating Series data, exclude these dtypes.
86
+ allow_time_zones
87
+ Allow generating `Datetime` Series with a time zone.
88
+ **kwargs
89
+ Additional keyword arguments that are passed to the underlying data generation
90
+ strategies.
91
+
92
+ size : int, optional
93
+ if set, creates a Series of exactly this size (ignoring min_size/max_size
94
+ params).
95
+
96
+ .. deprecated:: 1.0.0
97
+ Use `min_size` and `max_size` instead.
98
+
99
+ null_probability : float
100
+ Percentage chance (expressed between 0.0 => 1.0) that any Series value is null.
101
+ This is applied independently of any None values generated by the underlying
102
+ strategy.
103
+
104
+ .. deprecated:: 0.20.26
105
+ Use `allow_null` instead.
106
+
107
+ allow_infinities : bool, optional
108
+ Allow generation of +/-inf values for floating-point dtypes.
109
+
110
+ .. deprecated:: 0.20.26
111
+ Use `allow_infinity` instead.
112
+
113
+ Notes
114
+ -----
115
+ In actual usage this is deployed as a unit test decorator, providing a strategy
116
+ that generates multiple Series with the given dtype/size characteristics for the
117
+ unit test. While developing a strategy/test, it can also be useful to call
118
+ `.example()` directly on a given strategy to see concrete instances of the
119
+ generated data.
120
+
121
+ Examples
122
+ --------
123
+ The strategy is generally used to generate series in a unit test:
124
+
125
+ >>> from polars.testing.parametric import series
126
+ >>> from hypothesis import given
127
+ >>> @given(s=series(min_size=3, max_size=5))
128
+ ... def test_series_len(s: pl.Series) -> None:
129
+ ... assert 3 <= s.len() <= 5
130
+
131
+ Drawing examples interactively is also possible with the `.example()` method.
132
+ This should be avoided while running tests.
133
+
134
+ >>> from polars.testing.parametric import lists
135
+ >>> s = series(strategy=lists(pl.String, select_from=["xx", "yy", "zz"]))
136
+ >>> s.example() # doctest: +SKIP
137
+ shape: (4,)
138
+ Series: '' [list[str]]
139
+ [
140
+ ["zz", "zz"]
141
+ ["zz", "xx", "yy"]
142
+ []
143
+ ["xx"]
144
+ ]
145
+ """
146
+ if (null_prob := kwargs.pop("null_probability", None)) is not None:
147
+ allow_null = _handle_null_probability_deprecation(null_prob) # type: ignore[assignment]
148
+ if (allow_inf := kwargs.pop("allow_infinities", None)) is not None:
149
+ issue_deprecation_warning(
150
+ "`allow_infinities` is deprecated. Use `allow_infinity` instead.",
151
+ version="0.20.26",
152
+ )
153
+ kwargs["allow_infinity"] = allow_inf
154
+ if (chunked := kwargs.pop("chunked", None)) is not None:
155
+ issue_deprecation_warning(
156
+ "`chunked` is deprecated. Use `allow_chunks` instead.",
157
+ version="0.20.26",
158
+ )
159
+ allow_chunks = chunked
160
+ if (size := kwargs.pop("size", None)) is not None:
161
+ issue_deprecation_warning(
162
+ "`size` is deprecated. Use `min_size` and `max_size` instead.",
163
+ version="1.0.0",
164
+ )
165
+ min_size = max_size = size
166
+
167
+ if isinstance(allowed_dtypes, (DataType, DataTypeClass)):
168
+ allowed_dtypes = [allowed_dtypes]
169
+ elif allowed_dtypes is not None:
170
+ allowed_dtypes = list(allowed_dtypes)
171
+ if isinstance(excluded_dtypes, (DataType, DataTypeClass)):
172
+ excluded_dtypes = [excluded_dtypes]
173
+ elif excluded_dtypes is not None:
174
+ excluded_dtypes = list(excluded_dtypes)
175
+
176
+ if not allow_null and not (allowed_dtypes is not None and Null in allowed_dtypes):
177
+ if excluded_dtypes is None:
178
+ excluded_dtypes = [Null]
179
+ else:
180
+ excluded_dtypes.append(Null)
181
+
182
+ if strategy is None:
183
+ if dtype is None:
184
+ dtype_strat = dtypes(
185
+ allowed_dtypes=allowed_dtypes,
186
+ excluded_dtypes=excluded_dtypes,
187
+ allow_time_zones=allow_time_zones,
188
+ )
189
+ else:
190
+ dtype_strat = _instantiate_dtype(
191
+ dtype,
192
+ allowed_dtypes=allowed_dtypes,
193
+ excluded_dtypes=excluded_dtypes,
194
+ allow_time_zones=allow_time_zones,
195
+ )
196
+ dtype = draw(dtype_strat)
197
+
198
+ if min_size == max_size:
199
+ size = min_size
200
+ else:
201
+ size = draw(st.integers(min_value=min_size, max_value=max_size))
202
+
203
+ if isinstance(name, st.SearchStrategy):
204
+ name = draw(name)
205
+
206
+ do_mask_out = (
207
+ allow_masked_out
208
+ and allow_null
209
+ and isinstance(dtype, (List, Array, Struct))
210
+ and draw(st.booleans())
211
+ )
212
+
213
+ if size == 0:
214
+ values = []
215
+ else:
216
+ # Create series using dtype-specific strategy to generate values
217
+ if strategy is None:
218
+ strategy = data(
219
+ dtype, # type: ignore[arg-type]
220
+ allow_null=allow_null and not do_mask_out,
221
+ **kwargs,
222
+ )
223
+
224
+ values = draw(
225
+ st.lists(
226
+ strategy,
227
+ min_size=size,
228
+ max_size=size,
229
+ unique_by=(flexhash if unique else None),
230
+ )
231
+ )
232
+
233
+ s = Series(name=name, values=values, dtype=dtype)
234
+
235
+ # Apply masking out of values
236
+ if do_mask_out:
237
+ values = draw(
238
+ st.lists(
239
+ st.booleans(),
240
+ min_size=size,
241
+ max_size=size,
242
+ unique_by=(flexhash if unique else None),
243
+ )
244
+ )
245
+
246
+ mask = Series(name=None, values=values, dtype=Boolean)
247
+ s = select(when(mask).then(s).alias(s.name)).to_series()
248
+
249
+ # Apply chunking
250
+ if allow_chunks and size > 1 and draw(st.booleans()):
251
+ split_at = size // 2
252
+ s = s[:split_at].append(s[split_at:])
253
+
254
+ return s
255
+
256
+
257
+ @overload
258
+ def dataframes(
259
+ cols: int | column | Sequence[column] | None = None,
260
+ *,
261
+ lazy: Literal[False] = ...,
262
+ min_cols: int = 0,
263
+ max_cols: int = _COL_LIMIT,
264
+ min_size: int = 0,
265
+ max_size: int = _ROW_LIMIT,
266
+ include_cols: Sequence[column] | column | None = None,
267
+ allow_null: bool | Mapping[str, bool] = True,
268
+ allow_chunks: bool = True,
269
+ allow_masked_out: bool = True,
270
+ allowed_dtypes: Collection[PolarsDataType] | PolarsDataType | None = None,
271
+ excluded_dtypes: Collection[PolarsDataType] | PolarsDataType | None = None,
272
+ allow_time_zones: bool = True,
273
+ **kwargs: Any,
274
+ ) -> SearchStrategy[DataFrame]: ...
275
+
276
+
277
+ @overload
278
+ def dataframes(
279
+ cols: int | column | Sequence[column] | None = None,
280
+ *,
281
+ lazy: Literal[True],
282
+ min_cols: int = 0,
283
+ max_cols: int = _COL_LIMIT,
284
+ min_size: int = 0,
285
+ max_size: int = _ROW_LIMIT,
286
+ include_cols: Sequence[column] | column | None = None,
287
+ allow_null: bool | Mapping[str, bool] = True,
288
+ allow_chunks: bool = True,
289
+ allow_masked_out: bool = True,
290
+ allowed_dtypes: Collection[PolarsDataType] | PolarsDataType | None = None,
291
+ excluded_dtypes: Collection[PolarsDataType] | PolarsDataType | None = None,
292
+ allow_time_zones: bool = True,
293
+ **kwargs: Any,
294
+ ) -> SearchStrategy[LazyFrame]: ...
295
+
296
+
297
+ @st.composite
298
+ def dataframes(
299
+ draw: DrawFn,
300
+ /,
301
+ cols: int | column | Sequence[column] | None = None,
302
+ *,
303
+ lazy: bool = False,
304
+ min_cols: int = 1,
305
+ max_cols: int = _COL_LIMIT,
306
+ min_size: int = 0,
307
+ max_size: int = _ROW_LIMIT,
308
+ include_cols: Sequence[column] | column | None = None,
309
+ allow_null: bool | Mapping[str, bool] = True,
310
+ allow_chunks: bool = True,
311
+ allow_masked_out: bool = True,
312
+ allowed_dtypes: Collection[PolarsDataType] | PolarsDataType | None = None,
313
+ excluded_dtypes: Collection[PolarsDataType] | PolarsDataType | None = None,
314
+ allow_time_zones: bool = True,
315
+ **kwargs: Any,
316
+ ) -> DataFrame | LazyFrame:
317
+ """
318
+ Hypothesis strategy for producing Polars DataFrames or LazyFrames.
319
+
320
+ .. warning::
321
+ This functionality is currently considered **unstable**. It may be
322
+ changed at any point without it being considered a breaking change.
323
+
324
+ Parameters
325
+ ----------
326
+ cols : {int, columns}, optional
327
+ integer number of columns to create, or a sequence of `column` objects
328
+ that describe the desired DataFrame column data.
329
+ lazy : bool, optional
330
+ produce a LazyFrame instead of a DataFrame.
331
+ min_cols : int, optional
332
+ if not passing an exact size, can set a minimum here (defaults to 0).
333
+ max_cols : int, optional
334
+ if not passing an exact size, can set a maximum value here (defaults to
335
+ MAX_COLS).
336
+ min_size : int, optional
337
+ if not passing an exact size, set the minimum number of rows in the
338
+ DataFrame.
339
+ max_size : int, optional
340
+ if not passing an exact size, set the maximum number of rows in the
341
+ DataFrame.
342
+ include_cols : [column], optional
343
+ a list of `column` objects to include in the generated DataFrame. note that
344
+ explicitly provided columns are appended onto the list of existing columns
345
+ (if any present).
346
+ allow_null : bool or Mapping[str, bool]
347
+ Allow nulls as possible values and allow the `Null` data type by default.
348
+ Accepts either a boolean or a mapping of column names to booleans.
349
+ allow_chunks : bool
350
+ Allow the DataFrame to contain multiple chunks.
351
+ allow_masked_out : bool
352
+ Allow the nulls to contain masked out elements.
353
+ allowed_dtypes : {list,set}, optional
354
+ when automatically generating data, allow only these dtypes.
355
+ excluded_dtypes : {list,set}, optional
356
+ when automatically generating data, exclude these dtypes.
357
+ allow_time_zones
358
+ Allow generating `Datetime` columns with a time zone.
359
+ **kwargs
360
+ Additional keyword arguments that are passed to the underlying data generation
361
+ strategies.
362
+
363
+ size : int, optional
364
+ if set, will create a DataFrame of exactly this size (and ignore
365
+ the min_size/max_size len params).
366
+
367
+ .. deprecated:: 1.0.0
368
+ Use `min_size` and `max_size` instead.
369
+
370
+ null_probability : {float, dict[str,float]}, optional
371
+ percentage chance (expressed between 0.0 => 1.0) that a generated value is
372
+ None. this is applied independently of any None values generated by the
373
+ underlying strategy, and can be applied either on a per-column basis (if
374
+ given as a `{col:pct}` dict), or globally. if null_probability is defined
375
+ on a column, it takes precedence over the global value.
376
+
377
+ .. deprecated:: 0.20.26
378
+ Use `allow_null` instead.
379
+
380
+ allow_infinities : bool, optional
381
+ optionally disallow generation of +/-inf values for floating-point dtypes.
382
+
383
+ .. deprecated:: 0.20.26
384
+ Use `allow_infinity` instead.
385
+
386
+ Notes
387
+ -----
388
+ In actual usage this is deployed as a unit test decorator, providing a strategy
389
+ that generates DataFrames or LazyFrames with the given characteristics for
390
+ the unit test. While developing a strategy/test, it can also be useful to
391
+ call `.example()` directly on a given strategy to see concrete instances of
392
+ the generated data.
393
+
394
+ Examples
395
+ --------
396
+ The strategy is generally used to generate series in a unit test:
397
+
398
+ >>> from polars.testing.parametric import dataframes
399
+ >>> from hypothesis import given
400
+ >>> @given(df=dataframes(min_size=3, max_size=5))
401
+ ... def test_df_height(df: pl.DataFrame) -> None:
402
+ ... assert 3 <= df.height <= 5
403
+
404
+ Drawing examples interactively is also possible with the `.example()` method.
405
+ This should be avoided while running tests.
406
+
407
+ >>> df = dataframes(allowed_dtypes=[pl.Datetime, pl.Float64], max_cols=3)
408
+ >>> df.example() # doctest: +SKIP
409
+ shape: (3, 3)
410
+ ┌─────────────┬────────────────────────────┬───────────┐
411
+ │ col0 ┆ col1 ┆ col2 │
412
+ │ --- ┆ --- ┆ --- │
413
+ │ f64 ┆ datetime[ns] ┆ f64 │
414
+ ╞═════════════╪════════════════════════════╪═══════════╡
415
+ │ NaN ┆ 1844-07-05 06:19:48.848808 ┆ 3.1436e16 │
416
+ │ -1.9914e218 ┆ 2068-12-01 23:05:11.412277 ┆ 2.7415e16 │
417
+ │ 0.5 ┆ 2095-11-19 22:05:17.647961 ┆ -0.5 │
418
+ └─────────────┴────────────────────────────┴───────────┘
419
+
420
+ Use :class:`column` for more control over which exactly which columns are generated.
421
+
422
+ >>> from polars.testing.parametric import column
423
+ >>> dfs = dataframes(
424
+ ... [
425
+ ... column("x", dtype=pl.Int32),
426
+ ... column("y", dtype=pl.Float64),
427
+ ... ],
428
+ ... min_size=2,
429
+ ... max_size=2,
430
+ ... )
431
+ >>> dfs.example() # doctest: +SKIP
432
+ shape: (2, 2)
433
+ ┌───────────┬────────────┐
434
+ │ x ┆ y │
435
+ │ --- ┆ --- │
436
+ │ i32 ┆ f64 │
437
+ ╞═══════════╪════════════╡
438
+ │ -15836 ┆ 1.1755e-38 │
439
+ │ 575050513 ┆ NaN │
440
+ └───────────┴────────────┘
441
+ """
442
+ if (null_prob := kwargs.pop("null_probability", None)) is not None:
443
+ allow_null = _handle_null_probability_deprecation(null_prob)
444
+ if (allow_inf := kwargs.pop("allow_infinities", None)) is not None:
445
+ issue_deprecation_warning(
446
+ "`allow_infinities` is deprecated. Use `allow_infinity` instead.",
447
+ version="0.20.26",
448
+ )
449
+ kwargs["allow_infinity"] = allow_inf
450
+ if (chunked := kwargs.pop("chunked", None)) is not None:
451
+ issue_deprecation_warning(
452
+ "`chunked` is deprecated. Use `allow_chunks` instead.",
453
+ version="0.20.26",
454
+ )
455
+ allow_chunks = chunked
456
+ if (size := kwargs.pop("size", None)) is not None:
457
+ issue_deprecation_warning(
458
+ "`size` is deprecated. Use `min_size` and `max_size` instead.",
459
+ version="1.0.0",
460
+ )
461
+ min_size = max_size = size
462
+
463
+ if isinstance(include_cols, column):
464
+ include_cols = [include_cols]
465
+
466
+ if cols is None:
467
+ n_cols = draw(st.integers(min_value=min_cols, max_value=max_cols))
468
+ cols = [column() for _ in range(n_cols)]
469
+ elif isinstance(cols, int):
470
+ cols = [column() for _ in range(cols)]
471
+ elif isinstance(cols, column):
472
+ cols = [cols]
473
+ else:
474
+ cols = list(cols)
475
+
476
+ if include_cols:
477
+ cols.extend(list(include_cols))
478
+
479
+ if min_size == max_size:
480
+ size = min_size
481
+ else:
482
+ size = draw(st.integers(min_value=min_size, max_value=max_size))
483
+
484
+ # Process columns
485
+ for idx, c in enumerate(cols):
486
+ if c.name is None:
487
+ c.name = f"col{idx}"
488
+ if c.allow_null is None:
489
+ if isinstance(allow_null, Mapping):
490
+ c.allow_null = allow_null.get(c.name, True)
491
+ else:
492
+ c.allow_null = allow_null
493
+
494
+ allow_series_chunks = draw(st.booleans()) if allow_chunks else False
495
+
496
+ data = {
497
+ c.name: draw(
498
+ series(
499
+ name=c.name,
500
+ dtype=c.dtype,
501
+ min_size=size,
502
+ max_size=size,
503
+ strategy=c.strategy,
504
+ allow_null=c.allow_null, # type: ignore[arg-type]
505
+ allow_chunks=allow_series_chunks,
506
+ allow_masked_out=allow_masked_out,
507
+ unique=c.unique,
508
+ allowed_dtypes=allowed_dtypes,
509
+ excluded_dtypes=excluded_dtypes,
510
+ allow_time_zones=allow_time_zones,
511
+ **kwargs,
512
+ )
513
+ )
514
+ for c in cols
515
+ }
516
+
517
+ df = DataFrame(data)
518
+
519
+ # Apply chunking
520
+ if allow_chunks and size > 1 and not allow_series_chunks and draw(st.booleans()):
521
+ split_at = size // 2
522
+ df = df[:split_at].vstack(df[split_at:])
523
+
524
+ if lazy:
525
+ return df.lazy()
526
+
527
+ return df
528
+
529
+
530
+ @dataclass
531
+ class column:
532
+ """
533
+ Define a column for use with the `dataframes` strategy.
534
+
535
+ .. warning::
536
+ This functionality is currently considered **unstable**. It may be
537
+ changed at any point without it being considered a breaking change.
538
+
539
+ Parameters
540
+ ----------
541
+ name : str
542
+ string column name.
543
+ dtype : PolarsDataType
544
+ a polars dtype.
545
+ strategy : strategy, optional
546
+ supports overriding the default strategy for the given dtype.
547
+ allow_null : bool, optional
548
+ Allow nulls as possible values and allow the `Null` data type by default.
549
+ unique : bool, optional
550
+ flag indicating that all values generated for the column should be unique.
551
+
552
+ null_probability : float, optional
553
+ percentage chance (expressed between 0.0 => 1.0) that a generated value is
554
+ None. this is applied independently of any None values generated by the
555
+ underlying strategy.
556
+
557
+ .. deprecated:: 0.20.26
558
+ Use `allow_null` instead.
559
+
560
+ Examples
561
+ --------
562
+ >>> from polars.testing.parametric import column
563
+ >>> dfs = dataframes(
564
+ ... [
565
+ ... column("x", dtype=pl.Int32, allow_null=True),
566
+ ... column("y", dtype=pl.Float64),
567
+ ... ],
568
+ ... size=2,
569
+ ... )
570
+ >>> dfs.example() # doctest: +SKIP
571
+ shape: (2, 2)
572
+ ┌───────────┬────────────┐
573
+ │ x ┆ y │
574
+ │ --- ┆ --- │
575
+ │ i32 ┆ f64 │
576
+ ╞═══════════╪════════════╡
577
+ │ null ┆ 1.1755e-38 │
578
+ │ 575050513 ┆ inf │
579
+ └───────────┴────────────┘
580
+ """
581
+
582
+ name: str | None = None
583
+ dtype: PolarsDataType | None = None
584
+ strategy: SearchStrategy[Any] | None = None
585
+ allow_null: bool | None = None
586
+ unique: bool = False
587
+
588
+ null_probability: float | None = None
589
+
590
+ def __post_init__(self) -> None:
591
+ if self.null_probability is not None:
592
+ self.allow_null = _handle_null_probability_deprecation( # type: ignore[assignment]
593
+ self.null_probability
594
+ )
595
+
596
+
597
+ def _handle_null_probability_deprecation(
598
+ null_probability: float | Mapping[str, float],
599
+ ) -> bool | dict[str, bool]:
600
+ issue_deprecation_warning(
601
+ "`null_probability` is deprecated. Use `allow_null` instead.",
602
+ version="0.20.26",
603
+ )
604
+
605
+ def prob_to_bool(prob: float) -> bool:
606
+ if not (0.0 <= prob <= 1.0):
607
+ msg = f"`null_probability` should be between 0.0 and 1.0, got {prob!r}"
608
+ raise InvalidArgument(msg)
609
+
610
+ return bool(prob)
611
+
612
+ if isinstance(null_probability, Mapping):
613
+ return {col: prob_to_bool(prob) for col, prob in null_probability.items()}
614
+ else:
615
+ return prob_to_bool(null_probability)