dycw-utilities 0.129.10__py3-none-any.whl → 0.175.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dycw_utilities-0.175.17.dist-info/METADATA +34 -0
- dycw_utilities-0.175.17.dist-info/RECORD +103 -0
- dycw_utilities-0.175.17.dist-info/WHEEL +4 -0
- dycw_utilities-0.175.17.dist-info/entry_points.txt +4 -0
- utilities/__init__.py +1 -1
- utilities/altair.py +14 -14
- utilities/asyncio.py +350 -819
- utilities/atomicwrites.py +18 -6
- utilities/atools.py +77 -22
- utilities/cachetools.py +24 -29
- utilities/click.py +393 -237
- utilities/concurrent.py +8 -11
- utilities/contextlib.py +216 -17
- utilities/contextvars.py +20 -1
- utilities/cryptography.py +3 -3
- utilities/dataclasses.py +83 -118
- utilities/docker.py +293 -0
- utilities/enum.py +26 -23
- utilities/errors.py +17 -3
- utilities/fastapi.py +29 -65
- utilities/fpdf2.py +3 -3
- utilities/functions.py +169 -416
- utilities/functools.py +18 -19
- utilities/git.py +9 -30
- utilities/grp.py +28 -0
- utilities/gzip.py +31 -0
- utilities/http.py +3 -2
- utilities/hypothesis.py +738 -589
- utilities/importlib.py +17 -1
- utilities/inflect.py +25 -0
- utilities/iterables.py +194 -262
- utilities/jinja2.py +148 -0
- utilities/json.py +70 -0
- utilities/libcst.py +38 -17
- utilities/lightweight_charts.py +5 -9
- utilities/logging.py +345 -543
- utilities/math.py +18 -13
- utilities/memory_profiler.py +11 -15
- utilities/more_itertools.py +200 -131
- utilities/operator.py +33 -29
- utilities/optuna.py +6 -6
- utilities/orjson.py +272 -137
- utilities/os.py +61 -4
- utilities/parse.py +59 -61
- utilities/pathlib.py +281 -40
- utilities/permissions.py +298 -0
- utilities/pickle.py +2 -2
- utilities/platform.py +24 -5
- utilities/polars.py +1214 -430
- utilities/polars_ols.py +1 -1
- utilities/postgres.py +408 -0
- utilities/pottery.py +113 -26
- utilities/pqdm.py +10 -11
- utilities/psutil.py +6 -57
- utilities/pwd.py +28 -0
- utilities/pydantic.py +4 -54
- utilities/pydantic_settings.py +240 -0
- utilities/pydantic_settings_sops.py +76 -0
- utilities/pyinstrument.py +8 -10
- utilities/pytest.py +227 -121
- utilities/pytest_plugins/__init__.py +1 -0
- utilities/pytest_plugins/pytest_randomly.py +23 -0
- utilities/pytest_plugins/pytest_regressions.py +56 -0
- utilities/pytest_regressions.py +26 -46
- utilities/random.py +13 -9
- utilities/re.py +58 -28
- utilities/redis.py +401 -550
- utilities/scipy.py +1 -1
- utilities/sentinel.py +10 -0
- utilities/shelve.py +4 -1
- utilities/shutil.py +25 -0
- utilities/slack_sdk.py +36 -106
- utilities/sqlalchemy.py +502 -473
- utilities/sqlalchemy_polars.py +38 -94
- utilities/string.py +2 -3
- utilities/subprocess.py +1572 -0
- utilities/tempfile.py +86 -4
- utilities/testbook.py +50 -0
- utilities/text.py +165 -42
- utilities/timer.py +37 -65
- utilities/traceback.py +158 -929
- utilities/types.py +146 -116
- utilities/typing.py +531 -71
- utilities/tzdata.py +1 -53
- utilities/tzlocal.py +6 -23
- utilities/uuid.py +43 -5
- utilities/version.py +27 -26
- utilities/whenever.py +1776 -386
- utilities/zoneinfo.py +84 -22
- dycw_utilities-0.129.10.dist-info/METADATA +0 -241
- dycw_utilities-0.129.10.dist-info/RECORD +0 -96
- dycw_utilities-0.129.10.dist-info/WHEEL +0 -4
- dycw_utilities-0.129.10.dist-info/licenses/LICENSE +0 -21
- utilities/datetime.py +0 -1409
- utilities/eventkit.py +0 -402
- utilities/loguru.py +0 -144
- utilities/luigi.py +0 -228
- utilities/period.py +0 -324
- utilities/pyrsistent.py +0 -89
- utilities/python_dotenv.py +0 -105
- utilities/streamlit.py +0 -105
- utilities/sys.py +0 -87
- utilities/tenacity.py +0 -145
utilities/polars.py
CHANGED
|
@@ -1,35 +1,24 @@
|
|
|
1
1
|
from __future__ import annotations
|
|
2
2
|
|
|
3
|
-
import datetime as dt
|
|
4
3
|
import enum
|
|
5
4
|
from collections.abc import Callable, Iterator, Sequence
|
|
6
5
|
from collections.abc import Set as AbstractSet
|
|
7
|
-
from contextlib import suppress
|
|
8
6
|
from dataclasses import asdict, dataclass
|
|
9
7
|
from functools import partial, reduce
|
|
10
|
-
from itertools import chain, product
|
|
11
|
-
from math import ceil, log
|
|
8
|
+
from itertools import chain, pairwise, product
|
|
9
|
+
from math import ceil, log, pi, sqrt
|
|
12
10
|
from pathlib import Path
|
|
13
|
-
from typing import
|
|
14
|
-
TYPE_CHECKING,
|
|
15
|
-
Any,
|
|
16
|
-
Generic,
|
|
17
|
-
Literal,
|
|
18
|
-
TypeVar,
|
|
19
|
-
assert_never,
|
|
20
|
-
cast,
|
|
21
|
-
overload,
|
|
22
|
-
override,
|
|
23
|
-
)
|
|
11
|
+
from typing import TYPE_CHECKING, Any, Literal, assert_never, cast, overload, override
|
|
24
12
|
from uuid import UUID
|
|
25
13
|
from zoneinfo import ZoneInfo
|
|
26
14
|
|
|
27
15
|
import polars as pl
|
|
16
|
+
import whenever
|
|
28
17
|
from polars import (
|
|
29
18
|
Boolean,
|
|
30
19
|
DataFrame,
|
|
31
|
-
Date,
|
|
32
20
|
Datetime,
|
|
21
|
+
Duration,
|
|
33
22
|
Expr,
|
|
34
23
|
Float64,
|
|
35
24
|
Int64,
|
|
@@ -43,12 +32,16 @@ from polars import (
|
|
|
43
32
|
any_horizontal,
|
|
44
33
|
col,
|
|
45
34
|
concat,
|
|
35
|
+
concat_list,
|
|
36
|
+
datetime_range,
|
|
46
37
|
int_range,
|
|
47
38
|
lit,
|
|
39
|
+
max_horizontal,
|
|
48
40
|
struct,
|
|
49
41
|
sum_horizontal,
|
|
50
42
|
when,
|
|
51
43
|
)
|
|
44
|
+
from polars._typing import PolarsDataType
|
|
52
45
|
from polars.datatypes import DataType, DataTypeClass
|
|
53
46
|
from polars.exceptions import (
|
|
54
47
|
ColumnNotFoundError,
|
|
@@ -56,58 +49,62 @@ from polars.exceptions import (
|
|
|
56
49
|
OutOfBoundsError,
|
|
57
50
|
PolarsInefficientMapWarning,
|
|
58
51
|
)
|
|
59
|
-
from polars.
|
|
52
|
+
from polars.schema import Schema
|
|
53
|
+
from polars.testing import assert_frame_equal, assert_series_equal
|
|
54
|
+
from whenever import DateDelta, DateTimeDelta, PlainDateTime, TimeDelta, ZonedDateTime
|
|
60
55
|
|
|
61
|
-
|
|
56
|
+
import utilities.math
|
|
57
|
+
from utilities.dataclasses import yield_fields
|
|
62
58
|
from utilities.errors import ImpossibleCaseError
|
|
63
|
-
from utilities.functions import
|
|
64
|
-
|
|
65
|
-
ensure_int,
|
|
66
|
-
is_dataclass_class,
|
|
67
|
-
is_dataclass_instance,
|
|
68
|
-
is_iterable_of,
|
|
69
|
-
make_isinstance,
|
|
70
|
-
)
|
|
59
|
+
from utilities.functions import get_class_name
|
|
60
|
+
from utilities.gzip import read_binary
|
|
71
61
|
from utilities.iterables import (
|
|
72
62
|
CheckIterablesEqualError,
|
|
73
63
|
CheckMappingsEqualError,
|
|
74
|
-
CheckSubSetError,
|
|
75
64
|
CheckSuperMappingError,
|
|
76
65
|
OneEmptyError,
|
|
77
66
|
OneNonUniqueError,
|
|
78
67
|
always_iterable,
|
|
79
68
|
check_iterables_equal,
|
|
80
69
|
check_mappings_equal,
|
|
81
|
-
check_subset,
|
|
82
70
|
check_supermapping,
|
|
83
71
|
is_iterable_not_str,
|
|
84
72
|
one,
|
|
73
|
+
resolve_include_and_exclude,
|
|
85
74
|
)
|
|
75
|
+
from utilities.json import write_formatted_json
|
|
86
76
|
from utilities.math import (
|
|
77
|
+
MAX_DECIMALS,
|
|
87
78
|
CheckIntegerError,
|
|
88
79
|
check_integer,
|
|
89
80
|
ewm_parameters,
|
|
90
81
|
is_less_than,
|
|
91
82
|
is_non_negative,
|
|
92
|
-
number_of_decimals,
|
|
93
83
|
)
|
|
94
84
|
from utilities.reprlib import get_repr
|
|
95
|
-
from utilities.types import MaybeStr, Number, WeekDay
|
|
85
|
+
from utilities.types import MaybeStr, Number, PathLike, WeekDay
|
|
96
86
|
from utilities.typing import (
|
|
97
87
|
get_args,
|
|
98
|
-
|
|
88
|
+
is_dataclass_class,
|
|
89
|
+
is_dataclass_instance,
|
|
99
90
|
is_frozenset_type,
|
|
100
|
-
is_instance_gen,
|
|
101
91
|
is_list_type,
|
|
102
92
|
is_literal_type,
|
|
103
93
|
is_optional_type,
|
|
104
94
|
is_set_type,
|
|
105
|
-
|
|
95
|
+
make_isinstance,
|
|
106
96
|
)
|
|
107
97
|
from utilities.warnings import suppress_warnings
|
|
108
|
-
from utilities.
|
|
98
|
+
from utilities.whenever import (
|
|
99
|
+
DatePeriod,
|
|
100
|
+
TimePeriod,
|
|
101
|
+
ZonedDateTimePeriod,
|
|
102
|
+
to_py_time_delta,
|
|
103
|
+
)
|
|
104
|
+
from utilities.zoneinfo import UTC, to_time_zone_name
|
|
109
105
|
|
|
110
106
|
if TYPE_CHECKING:
|
|
107
|
+
import datetime as dt
|
|
111
108
|
from collections.abc import Callable, Iterable, Iterator, Mapping, Sequence
|
|
112
109
|
from collections.abc import Set as AbstractSet
|
|
113
110
|
|
|
@@ -118,6 +115,7 @@ if TYPE_CHECKING:
|
|
|
118
115
|
JoinValidation,
|
|
119
116
|
PolarsDataType,
|
|
120
117
|
QuantileMethod,
|
|
118
|
+
RoundMode,
|
|
121
119
|
SchemaDict,
|
|
122
120
|
TimeUnit,
|
|
123
121
|
)
|
|
@@ -127,15 +125,20 @@ if TYPE_CHECKING:
|
|
|
127
125
|
from utilities.types import Dataclass, MaybeIterable, StrMapping, TimeZoneLike
|
|
128
126
|
|
|
129
127
|
|
|
130
|
-
_T = TypeVar("_T")
|
|
131
128
|
type ExprLike = MaybeStr[Expr]
|
|
129
|
+
type ExprOrSeries = Expr | Series
|
|
132
130
|
DatetimeHongKong = Datetime(time_zone="Asia/Hong_Kong")
|
|
133
131
|
DatetimeTokyo = Datetime(time_zone="Asia/Tokyo")
|
|
134
132
|
DatetimeUSCentral = Datetime(time_zone="US/Central")
|
|
135
133
|
DatetimeUSEastern = Datetime(time_zone="US/Eastern")
|
|
136
134
|
DatetimeUTC = Datetime(time_zone="UTC")
|
|
135
|
+
DatePeriodDType = Struct({"start": pl.Date, "end": pl.Date})
|
|
136
|
+
TimePeriodDType = Struct({"start": pl.Time, "end": pl.Time})
|
|
137
|
+
|
|
138
|
+
|
|
137
139
|
_FINITE_EWM_MIN_WEIGHT = 0.9999
|
|
138
140
|
|
|
141
|
+
|
|
139
142
|
##
|
|
140
143
|
|
|
141
144
|
|
|
@@ -215,7 +218,7 @@ def acf(
|
|
|
215
218
|
df_confints = _acf_process_confints(confints)
|
|
216
219
|
df_qstats_pvalues = _acf_process_qstats_pvalues(qstats, pvalues)
|
|
217
220
|
return join(df_acfs, df_confints, df_qstats_pvalues, on=["lag"], how="left")
|
|
218
|
-
case
|
|
221
|
+
case never:
|
|
219
222
|
assert_never(never)
|
|
220
223
|
|
|
221
224
|
|
|
@@ -245,11 +248,6 @@ def _acf_process_qstats_pvalues(qstats: NDArrayF, pvalues: NDArrayF, /) -> DataF
|
|
|
245
248
|
##
|
|
246
249
|
|
|
247
250
|
|
|
248
|
-
# def acf_halflife(series: Series,/)
|
|
249
|
-
|
|
250
|
-
##
|
|
251
|
-
|
|
252
|
-
|
|
253
251
|
def adjust_frequencies(
|
|
254
252
|
series: Series,
|
|
255
253
|
/,
|
|
@@ -271,29 +269,108 @@ def adjust_frequencies(
|
|
|
271
269
|
##
|
|
272
270
|
|
|
273
271
|
|
|
274
|
-
def
|
|
275
|
-
|
|
276
|
-
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
272
|
+
def all_dataframe_columns(
|
|
273
|
+
df: DataFrame, expr: IntoExprColumn, /, *exprs: IntoExprColumn
|
|
274
|
+
) -> Series:
|
|
275
|
+
"""Return a DataFrame column with `AND` applied to additional exprs/series."""
|
|
276
|
+
name = get_expr_name(df, expr)
|
|
277
|
+
return df.select(all_horizontal(expr, *exprs).alias(name))[name]
|
|
278
|
+
|
|
279
|
+
|
|
280
|
+
def any_dataframe_columns(
|
|
281
|
+
df: DataFrame, expr: IntoExprColumn, /, *exprs: IntoExprColumn
|
|
282
|
+
) -> Series:
|
|
283
|
+
"""Return a DataFrame column with `OR` applied to additional exprs/series."""
|
|
284
|
+
name = get_expr_name(df, expr)
|
|
285
|
+
return df.select(any_horizontal(expr, *exprs).alias(name))[name]
|
|
286
|
+
|
|
287
|
+
|
|
288
|
+
def all_series(series: Series, /, *columns: ExprOrSeries) -> Series:
|
|
289
|
+
"""Return a Series with `AND` applied to additional exprs/series."""
|
|
290
|
+
return all_dataframe_columns(series.to_frame(), series.name, *columns)
|
|
291
|
+
|
|
292
|
+
|
|
293
|
+
def any_series(series: Series, /, *columns: ExprOrSeries) -> Series:
|
|
294
|
+
"""Return a Series with `OR` applied to additional exprs/series."""
|
|
295
|
+
df = series.to_frame()
|
|
296
|
+
name = series.name
|
|
297
|
+
return df.select(any_horizontal(name, *columns).alias(name))[name]
|
|
298
|
+
|
|
299
|
+
|
|
300
|
+
##
|
|
301
|
+
|
|
302
|
+
|
|
303
|
+
def append_row(
|
|
304
|
+
df: DataFrame,
|
|
305
|
+
row: StrMapping,
|
|
306
|
+
/,
|
|
307
|
+
*,
|
|
308
|
+
predicate: Callable[[StrMapping], bool] | None = None,
|
|
309
|
+
disallow_extra: bool = False,
|
|
310
|
+
disallow_missing: bool | MaybeIterable[str] = False,
|
|
311
|
+
disallow_null: bool | MaybeIterable[str] = False,
|
|
312
|
+
in_place: bool = False,
|
|
313
|
+
) -> DataFrame:
|
|
314
|
+
"""Append a row to a DataFrame."""
|
|
315
|
+
if (predicate is not None) and not predicate(row):
|
|
316
|
+
raise _AppendRowPredicateError(df=df, row=row)
|
|
317
|
+
if disallow_extra and (len(extra := set(row) - set(df.columns)) >= 1):
|
|
318
|
+
raise _AppendRowExtraKeysError(df=df, row=row, extra=extra)
|
|
319
|
+
if disallow_missing is not False:
|
|
320
|
+
missing = set(df.columns) - set(row)
|
|
321
|
+
if disallow_missing is not True:
|
|
322
|
+
missing &= set(always_iterable(disallow_missing))
|
|
323
|
+
if len(missing) >= 1:
|
|
324
|
+
raise _AppendRowMissingKeysError(df=df, row=row, missing=missing)
|
|
325
|
+
other = DataFrame(data=[row], schema=df.schema)
|
|
326
|
+
if disallow_null:
|
|
327
|
+
other_null = other.select(col(c).is_null().any() for c in other.columns)
|
|
328
|
+
null = {k for k, v in other_null.row(0, named=True).items() if v}
|
|
329
|
+
if disallow_null is not True:
|
|
330
|
+
null &= set(always_iterable(disallow_null))
|
|
331
|
+
if len(null) >= 1:
|
|
332
|
+
raise _AppendRowNullColumnsError(df=df, row=row, columns=null)
|
|
333
|
+
return df.extend(other) if in_place else df.vstack(other)
|
|
334
|
+
|
|
335
|
+
|
|
336
|
+
@dataclass(kw_only=True, slots=True)
|
|
337
|
+
class AppendRowError(Exception):
|
|
338
|
+
df: DataFrame
|
|
339
|
+
row: StrMapping
|
|
340
|
+
|
|
341
|
+
|
|
342
|
+
@dataclass(kw_only=True, slots=True)
|
|
343
|
+
class _AppendRowPredicateError(AppendRowError):
|
|
344
|
+
@override
|
|
345
|
+
def __str__(self) -> str:
|
|
346
|
+
return f"Predicate failed; got {get_repr(self.row)}"
|
|
347
|
+
|
|
348
|
+
|
|
349
|
+
@dataclass(kw_only=True, slots=True)
|
|
350
|
+
class _AppendRowExtraKeysError(AppendRowError):
|
|
351
|
+
extra: AbstractSet[str]
|
|
352
|
+
|
|
353
|
+
@override
|
|
354
|
+
def __str__(self) -> str:
|
|
355
|
+
return f"Extra key(s) found; got {get_repr(self.extra)}"
|
|
356
|
+
|
|
357
|
+
|
|
358
|
+
@dataclass(kw_only=True, slots=True)
|
|
359
|
+
class _AppendRowMissingKeysError(AppendRowError):
|
|
360
|
+
missing: AbstractSet[str]
|
|
361
|
+
|
|
362
|
+
@override
|
|
363
|
+
def __str__(self) -> str:
|
|
364
|
+
return f"Missing key(s) found; got {get_repr(self.missing)}"
|
|
286
365
|
|
|
287
366
|
|
|
288
367
|
@dataclass(kw_only=True, slots=True)
|
|
289
|
-
class
|
|
290
|
-
|
|
291
|
-
right: AbstractSet[_T]
|
|
292
|
-
extra: AbstractSet[_T]
|
|
368
|
+
class _AppendRowNullColumnsError(AppendRowError):
|
|
369
|
+
columns: AbstractSet[str]
|
|
293
370
|
|
|
294
371
|
@override
|
|
295
372
|
def __str__(self) -> str:
|
|
296
|
-
return f"
|
|
373
|
+
return f"Null column(s) found; got {get_repr(self.columns)}"
|
|
297
374
|
|
|
298
375
|
|
|
299
376
|
##
|
|
@@ -308,8 +385,8 @@ def are_frames_equal(
|
|
|
308
385
|
check_column_order: bool = True,
|
|
309
386
|
check_dtypes: bool = True,
|
|
310
387
|
check_exact: bool = False,
|
|
311
|
-
|
|
312
|
-
|
|
388
|
+
rel_tol: float = 1e-5,
|
|
389
|
+
abs_tol: float = 1e-8,
|
|
313
390
|
categorical_as_str: bool = False,
|
|
314
391
|
) -> bool:
|
|
315
392
|
"""Check if two DataFrames are equal."""
|
|
@@ -321,8 +398,8 @@ def are_frames_equal(
|
|
|
321
398
|
check_column_order=check_column_order,
|
|
322
399
|
check_dtypes=check_dtypes,
|
|
323
400
|
check_exact=check_exact,
|
|
324
|
-
|
|
325
|
-
|
|
401
|
+
rel_tol=rel_tol,
|
|
402
|
+
abs_tol=abs_tol,
|
|
326
403
|
categorical_as_str=categorical_as_str,
|
|
327
404
|
)
|
|
328
405
|
except AssertionError:
|
|
@@ -352,7 +429,7 @@ def bernoulli(
|
|
|
352
429
|
return bernoulli(series.len(), true=true, seed=seed, name=name)
|
|
353
430
|
case DataFrame() as df:
|
|
354
431
|
return bernoulli(df.height, true=true, seed=seed, name=name)
|
|
355
|
-
case
|
|
432
|
+
case never:
|
|
356
433
|
assert_never(never)
|
|
357
434
|
|
|
358
435
|
|
|
@@ -386,7 +463,7 @@ def boolean_value_counts(
|
|
|
386
463
|
(false / total).alias("false (%)"),
|
|
387
464
|
(null / total).alias("null (%)"),
|
|
388
465
|
)
|
|
389
|
-
case
|
|
466
|
+
case never:
|
|
390
467
|
assert_never(never)
|
|
391
468
|
|
|
392
469
|
|
|
@@ -429,29 +506,6 @@ class BooleanValueCountsError(Exception):
|
|
|
429
506
|
##
|
|
430
507
|
|
|
431
508
|
|
|
432
|
-
@overload
|
|
433
|
-
def ceil_datetime(column: ExprLike, every: ExprLike, /) -> Expr: ...
|
|
434
|
-
@overload
|
|
435
|
-
def ceil_datetime(column: Series, every: ExprLike, /) -> Series: ...
|
|
436
|
-
@overload
|
|
437
|
-
def ceil_datetime(column: IntoExprColumn, every: ExprLike, /) -> Expr | Series: ...
|
|
438
|
-
def ceil_datetime(column: IntoExprColumn, every: ExprLike, /) -> Expr | Series:
|
|
439
|
-
"""Compute the `ceil` of a datetime column."""
|
|
440
|
-
column = ensure_expr_or_series(column)
|
|
441
|
-
rounded = column.dt.round(every)
|
|
442
|
-
ceil = (
|
|
443
|
-
when(column <= rounded)
|
|
444
|
-
.then(rounded)
|
|
445
|
-
.otherwise(column.dt.offset_by(every).dt.round(every))
|
|
446
|
-
)
|
|
447
|
-
if isinstance(column, Expr):
|
|
448
|
-
return ceil
|
|
449
|
-
return DataFrame().with_columns(ceil.alias(column.name))[column.name]
|
|
450
|
-
|
|
451
|
-
|
|
452
|
-
##
|
|
453
|
-
|
|
454
|
-
|
|
455
509
|
def check_polars_dataframe(
|
|
456
510
|
df: DataFrame,
|
|
457
511
|
/,
|
|
@@ -511,7 +565,7 @@ def _check_polars_dataframe_columns(df: DataFrame, columns: Iterable[str], /) ->
|
|
|
511
565
|
|
|
512
566
|
@dataclass(kw_only=True, slots=True)
|
|
513
567
|
class _CheckPolarsDataFrameColumnsError(CheckPolarsDataFrameError):
|
|
514
|
-
columns:
|
|
568
|
+
columns: list[str]
|
|
515
569
|
|
|
516
570
|
@override
|
|
517
571
|
def __str__(self) -> str:
|
|
@@ -770,29 +824,22 @@ def choice(
|
|
|
770
824
|
name=name,
|
|
771
825
|
dtype=dtype,
|
|
772
826
|
)
|
|
773
|
-
case
|
|
827
|
+
case never:
|
|
774
828
|
assert_never(never)
|
|
775
829
|
|
|
776
830
|
|
|
777
831
|
##
|
|
778
832
|
|
|
779
833
|
|
|
780
|
-
def
|
|
781
|
-
|
|
782
|
-
|
|
783
|
-
return data[one(data.columns)]
|
|
784
|
-
|
|
785
|
-
|
|
786
|
-
##
|
|
787
|
-
|
|
788
|
-
|
|
789
|
-
def columns_to_dict(df: DataFrame, key: str, value: str, /) -> dict[Any, Any]:
|
|
834
|
+
def columns_to_dict(
|
|
835
|
+
df: DataFrame, key: IntoExprColumn, value: IntoExprColumn, /
|
|
836
|
+
) -> dict[Any, Any]:
|
|
790
837
|
"""Map a pair of columns into a dictionary. Must be unique on `key`."""
|
|
791
|
-
|
|
792
|
-
|
|
793
|
-
|
|
794
|
-
|
|
795
|
-
return dict(zip(
|
|
838
|
+
df = df.select(key, value)
|
|
839
|
+
key_col, value_col = [df[get_expr_name(df, expr)] for expr in [key, value]]
|
|
840
|
+
if key_col.is_duplicated().any():
|
|
841
|
+
raise ColumnsToDictError(df=df, key=key_col.name)
|
|
842
|
+
return dict(zip(key_col, value_col, strict=True))
|
|
796
843
|
|
|
797
844
|
|
|
798
845
|
@dataclass(kw_only=True, slots=True)
|
|
@@ -835,7 +882,7 @@ def convert_time_zone(
|
|
|
835
882
|
|
|
836
883
|
def _convert_time_zone_one(sr: Series, /, *, time_zone: TimeZoneLike = UTC) -> Series:
|
|
837
884
|
if isinstance(sr.dtype, Datetime):
|
|
838
|
-
return sr.dt.convert_time_zone(
|
|
885
|
+
return sr.dt.convert_time_zone(to_time_zone_name(time_zone))
|
|
839
886
|
return sr
|
|
840
887
|
|
|
841
888
|
|
|
@@ -856,13 +903,13 @@ def cross(
|
|
|
856
903
|
up_or_down: Literal["up", "down"],
|
|
857
904
|
other: Number | IntoExprColumn,
|
|
858
905
|
/,
|
|
859
|
-
) ->
|
|
906
|
+
) -> ExprOrSeries: ...
|
|
860
907
|
def cross(
|
|
861
908
|
expr: IntoExprColumn,
|
|
862
909
|
up_or_down: Literal["up", "down"],
|
|
863
910
|
other: Number | IntoExprColumn,
|
|
864
911
|
/,
|
|
865
|
-
) ->
|
|
912
|
+
) -> ExprOrSeries:
|
|
866
913
|
"""Compute when a cross occurs."""
|
|
867
914
|
return _cross_or_touch(expr, "cross", up_or_down, other)
|
|
868
915
|
|
|
@@ -881,13 +928,13 @@ def touch(
|
|
|
881
928
|
up_or_down: Literal["up", "down"],
|
|
882
929
|
other: Number | IntoExprColumn,
|
|
883
930
|
/,
|
|
884
|
-
) ->
|
|
931
|
+
) -> ExprOrSeries: ...
|
|
885
932
|
def touch(
|
|
886
933
|
expr: IntoExprColumn,
|
|
887
934
|
up_or_down: Literal["up", "down"],
|
|
888
935
|
other: Number | IntoExprColumn,
|
|
889
936
|
/,
|
|
890
|
-
) ->
|
|
937
|
+
) -> ExprOrSeries:
|
|
891
938
|
"""Compute when a touch occurs."""
|
|
892
939
|
return _cross_or_touch(expr, "touch", up_or_down, other)
|
|
893
940
|
|
|
@@ -898,7 +945,7 @@ def _cross_or_touch(
|
|
|
898
945
|
up_or_down: Literal["up", "down"],
|
|
899
946
|
other: Number | IntoExprColumn,
|
|
900
947
|
/,
|
|
901
|
-
) ->
|
|
948
|
+
) -> ExprOrSeries:
|
|
902
949
|
"""Compute when a column crosses/touches a threshold."""
|
|
903
950
|
expr = ensure_expr_or_series(expr)
|
|
904
951
|
match other:
|
|
@@ -906,7 +953,7 @@ def _cross_or_touch(
|
|
|
906
953
|
...
|
|
907
954
|
case str() | Expr() | Series():
|
|
908
955
|
other = ensure_expr_or_series(other)
|
|
909
|
-
case
|
|
956
|
+
case never:
|
|
910
957
|
assert_never(never)
|
|
911
958
|
enough = int_range(end=pl.len()) >= 1
|
|
912
959
|
match cross_or_touch, up_or_down:
|
|
@@ -918,7 +965,7 @@ def _cross_or_touch(
|
|
|
918
965
|
current = expr >= other
|
|
919
966
|
case "touch", "down":
|
|
920
967
|
current = expr <= other
|
|
921
|
-
case
|
|
968
|
+
case never:
|
|
922
969
|
assert_never(never)
|
|
923
970
|
prev = current.shift()
|
|
924
971
|
result = when(enough & expr.is_finite()).then(current & ~prev)
|
|
@@ -970,7 +1017,7 @@ def cross_rolling_quantile(
|
|
|
970
1017
|
weights: list[float] | None = None,
|
|
971
1018
|
min_samples: int | None = None,
|
|
972
1019
|
center: bool = False,
|
|
973
|
-
) ->
|
|
1020
|
+
) -> ExprOrSeries: ...
|
|
974
1021
|
def cross_rolling_quantile(
|
|
975
1022
|
expr: IntoExprColumn,
|
|
976
1023
|
up_or_down: Literal["up", "down"],
|
|
@@ -982,7 +1029,7 @@ def cross_rolling_quantile(
|
|
|
982
1029
|
weights: list[float] | None = None,
|
|
983
1030
|
min_samples: int | None = None,
|
|
984
1031
|
center: bool = False,
|
|
985
|
-
) ->
|
|
1032
|
+
) -> ExprOrSeries:
|
|
986
1033
|
"""Compute when a column crosses its rolling quantile."""
|
|
987
1034
|
expr = ensure_expr_or_series(expr)
|
|
988
1035
|
rolling = expr.rolling_quantile(
|
|
@@ -1005,6 +1052,7 @@ def dataclass_to_dataframe(
|
|
|
1005
1052
|
*,
|
|
1006
1053
|
globalns: StrMapping | None = None,
|
|
1007
1054
|
localns: StrMapping | None = None,
|
|
1055
|
+
warn_name_errors: bool = False,
|
|
1008
1056
|
) -> DataFrame:
|
|
1009
1057
|
"""Convert a dataclass/es into a DataFrame."""
|
|
1010
1058
|
objs = list(always_iterable(objs))
|
|
@@ -1018,22 +1066,51 @@ def dataclass_to_dataframe(
|
|
|
1018
1066
|
) from None
|
|
1019
1067
|
data = list(map(asdict, objs))
|
|
1020
1068
|
first, *_ = objs
|
|
1021
|
-
schema = dataclass_to_schema(
|
|
1069
|
+
schema = dataclass_to_schema(
|
|
1070
|
+
first, globalns=globalns, localns=localns, warn_name_errors=warn_name_errors
|
|
1071
|
+
)
|
|
1022
1072
|
df = DataFrame(data, schema=schema, orient="row")
|
|
1023
|
-
return map_over_columns(
|
|
1024
|
-
|
|
1025
|
-
|
|
1026
|
-
def
|
|
1027
|
-
if series.dtype
|
|
1028
|
-
|
|
1029
|
-
|
|
1030
|
-
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
|
|
1035
|
-
|
|
1036
|
-
|
|
1073
|
+
return map_over_columns(_dataclass_to_dataframe_cast, df)
|
|
1074
|
+
|
|
1075
|
+
|
|
1076
|
+
def _dataclass_to_dataframe_cast(series: Series, /) -> Series:
|
|
1077
|
+
if series.dtype != Object:
|
|
1078
|
+
return series
|
|
1079
|
+
if series.map_elements(make_isinstance(whenever.Date), return_dtype=Boolean).all():
|
|
1080
|
+
return series.map_elements(lambda x: x.py_date(), return_dtype=pl.Date)
|
|
1081
|
+
if series.map_elements(make_isinstance(DateDelta), return_dtype=Boolean).all():
|
|
1082
|
+
return series.map_elements(to_py_time_delta, return_dtype=Duration)
|
|
1083
|
+
if series.map_elements(make_isinstance(DateTimeDelta), return_dtype=Boolean).all():
|
|
1084
|
+
return series.map_elements(to_py_time_delta, return_dtype=Duration)
|
|
1085
|
+
is_path = series.map_elements(make_isinstance(Path), return_dtype=Boolean).all()
|
|
1086
|
+
is_uuid = series.map_elements(make_isinstance(UUID), return_dtype=Boolean).all()
|
|
1087
|
+
if is_path or is_uuid:
|
|
1088
|
+
with suppress_warnings(
|
|
1089
|
+
category=cast("type[Warning]", PolarsInefficientMapWarning)
|
|
1090
|
+
):
|
|
1091
|
+
return series.map_elements(str, return_dtype=String)
|
|
1092
|
+
if series.map_elements(make_isinstance(whenever.Time), return_dtype=Boolean).all():
|
|
1093
|
+
return series.map_elements(lambda x: x.py_time(), return_dtype=pl.Time)
|
|
1094
|
+
if series.map_elements(make_isinstance(TimeDelta), return_dtype=Boolean).all():
|
|
1095
|
+
return series.map_elements(to_py_time_delta, return_dtype=Duration)
|
|
1096
|
+
if series.map_elements(make_isinstance(ZonedDateTime), return_dtype=Boolean).all():
|
|
1097
|
+
return_dtype = zoned_date_time_dtype(time_zone=one({dt.tz for dt in series}))
|
|
1098
|
+
return series.map_elements(lambda x: x.py_datetime(), return_dtype=return_dtype)
|
|
1099
|
+
if series.map_elements(
|
|
1100
|
+
lambda x: isinstance(x, dict) and (set(x) == {"start", "end"}),
|
|
1101
|
+
return_dtype=Boolean,
|
|
1102
|
+
).all():
|
|
1103
|
+
start = _dataclass_to_dataframe_cast(
|
|
1104
|
+
series.map_elements(lambda x: x["start"], return_dtype=Object)
|
|
1105
|
+
).alias("start")
|
|
1106
|
+
end = _dataclass_to_dataframe_cast(
|
|
1107
|
+
series.map_elements(lambda x: x["end"], return_dtype=Object)
|
|
1108
|
+
).alias("end")
|
|
1109
|
+
name = series.name
|
|
1110
|
+
return concat_series(start, end).select(
|
|
1111
|
+
struct(start=start, end=end).alias(name)
|
|
1112
|
+
)[name]
|
|
1113
|
+
raise NotImplementedError(series) # pragma: no cover
|
|
1037
1114
|
|
|
1038
1115
|
|
|
1039
1116
|
@dataclass(kw_only=True, slots=True)
|
|
@@ -1074,20 +1151,14 @@ def dataclass_to_schema(
|
|
|
1074
1151
|
for field in yield_fields(
|
|
1075
1152
|
obj, globalns=globalns, localns=localns, warn_name_errors=warn_name_errors
|
|
1076
1153
|
):
|
|
1077
|
-
if is_dataclass_instance(field.value)
|
|
1154
|
+
if is_dataclass_instance(field.value) and not (
|
|
1155
|
+
isinstance(field.type_, type)
|
|
1156
|
+
and issubclass(field.type_, (DatePeriod, TimePeriod, ZonedDateTimePeriod))
|
|
1157
|
+
):
|
|
1078
1158
|
dtypes = dataclass_to_schema(
|
|
1079
1159
|
field.value, globalns=globalns, localns=localns
|
|
1080
1160
|
)
|
|
1081
1161
|
dtype = struct_dtype(**dtypes)
|
|
1082
|
-
elif field.type_ is dt.datetime:
|
|
1083
|
-
dtype = _dataclass_to_schema_datetime(field)
|
|
1084
|
-
elif is_union_type(field.type_) and set(
|
|
1085
|
-
get_args(field.type_, optional_drop_none=True)
|
|
1086
|
-
) == {dt.date, dt.datetime}:
|
|
1087
|
-
if is_instance_gen(field.value, dt.date):
|
|
1088
|
-
dtype = Date
|
|
1089
|
-
else:
|
|
1090
|
-
dtype = _dataclass_to_schema_datetime(field)
|
|
1091
1162
|
else:
|
|
1092
1163
|
dtype = _dataclass_to_schema_one(
|
|
1093
1164
|
field.type_, globalns=globalns, localns=localns
|
|
@@ -1096,14 +1167,6 @@ def dataclass_to_schema(
|
|
|
1096
1167
|
return out
|
|
1097
1168
|
|
|
1098
1169
|
|
|
1099
|
-
def _dataclass_to_schema_datetime(
|
|
1100
|
-
field: _YieldFieldsInstance[dt.datetime], /
|
|
1101
|
-
) -> PolarsDataType:
|
|
1102
|
-
if field.value.tzinfo is None:
|
|
1103
|
-
return Datetime
|
|
1104
|
-
return zoned_datetime(time_zone=ensure_time_zone(field.value.tzinfo))
|
|
1105
|
-
|
|
1106
|
-
|
|
1107
1170
|
def _dataclass_to_schema_one(
|
|
1108
1171
|
obj: Any,
|
|
1109
1172
|
/,
|
|
@@ -1111,20 +1174,35 @@ def _dataclass_to_schema_one(
|
|
|
1111
1174
|
globalns: StrMapping | None = None,
|
|
1112
1175
|
localns: StrMapping | None = None,
|
|
1113
1176
|
) -> PolarsDataType:
|
|
1114
|
-
if obj
|
|
1115
|
-
|
|
1116
|
-
|
|
1117
|
-
|
|
1118
|
-
|
|
1119
|
-
|
|
1120
|
-
|
|
1121
|
-
|
|
1122
|
-
|
|
1123
|
-
|
|
1124
|
-
|
|
1125
|
-
|
|
1126
|
-
|
|
1127
|
-
|
|
1177
|
+
if isinstance(obj, type):
|
|
1178
|
+
if issubclass(obj, bool):
|
|
1179
|
+
return Boolean
|
|
1180
|
+
if issubclass(obj, int):
|
|
1181
|
+
return Int64
|
|
1182
|
+
if issubclass(obj, float):
|
|
1183
|
+
return Float64
|
|
1184
|
+
if issubclass(obj, str):
|
|
1185
|
+
return String
|
|
1186
|
+
if issubclass(
|
|
1187
|
+
obj,
|
|
1188
|
+
(
|
|
1189
|
+
DateDelta,
|
|
1190
|
+
DatePeriod,
|
|
1191
|
+
DateTimeDelta,
|
|
1192
|
+
Path,
|
|
1193
|
+
PlainDateTime,
|
|
1194
|
+
TimeDelta,
|
|
1195
|
+
TimePeriod,
|
|
1196
|
+
UUID,
|
|
1197
|
+
ZonedDateTime,
|
|
1198
|
+
ZonedDateTimePeriod,
|
|
1199
|
+
whenever.Date,
|
|
1200
|
+
whenever.Time,
|
|
1201
|
+
),
|
|
1202
|
+
):
|
|
1203
|
+
return Object
|
|
1204
|
+
if issubclass(obj, enum.Enum):
|
|
1205
|
+
return pl.Enum([e.name for e in obj])
|
|
1128
1206
|
if is_dataclass_class(obj):
|
|
1129
1207
|
out: dict[str, Any] = {}
|
|
1130
1208
|
for field in yield_fields(obj, globalns=globalns, localns=localns):
|
|
@@ -1150,27 +1228,6 @@ def _dataclass_to_schema_one(
|
|
|
1150
1228
|
##
|
|
1151
1229
|
|
|
1152
1230
|
|
|
1153
|
-
def drop_null_struct_series(series: Series, /) -> Series:
|
|
1154
|
-
"""Drop nulls in a struct-dtype Series as per the <= 1.1 definition."""
|
|
1155
|
-
try:
|
|
1156
|
-
is_not_null = is_not_null_struct_series(series)
|
|
1157
|
-
except IsNotNullStructSeriesError as error:
|
|
1158
|
-
raise DropNullStructSeriesError(series=error.series) from None
|
|
1159
|
-
return series.filter(is_not_null)
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
@dataclass(kw_only=True, slots=True)
|
|
1163
|
-
class DropNullStructSeriesError(Exception):
|
|
1164
|
-
series: Series
|
|
1165
|
-
|
|
1166
|
-
@override
|
|
1167
|
-
def __str__(self) -> str:
|
|
1168
|
-
return f"Series must have Struct-dtype; got {self.series.dtype}"
|
|
1169
|
-
|
|
1170
|
-
|
|
1171
|
-
##
|
|
1172
|
-
|
|
1173
|
-
|
|
1174
1231
|
def ensure_data_type(dtype: PolarsDataType, /) -> DataType:
|
|
1175
1232
|
"""Ensure a data type is returned."""
|
|
1176
1233
|
return dtype if isinstance(dtype, DataType) else dtype()
|
|
@@ -1184,8 +1241,8 @@ def ensure_expr_or_series(column: ExprLike, /) -> Expr: ...
|
|
|
1184
1241
|
@overload
|
|
1185
1242
|
def ensure_expr_or_series(column: Series, /) -> Series: ...
|
|
1186
1243
|
@overload
|
|
1187
|
-
def ensure_expr_or_series(column: IntoExprColumn, /) ->
|
|
1188
|
-
def ensure_expr_or_series(column: IntoExprColumn, /) ->
|
|
1244
|
+
def ensure_expr_or_series(column: IntoExprColumn, /) -> ExprOrSeries: ...
|
|
1245
|
+
def ensure_expr_or_series(column: IntoExprColumn, /) -> ExprOrSeries:
|
|
1189
1246
|
"""Ensure a column expression or Series is returned."""
|
|
1190
1247
|
return col(column) if isinstance(column, str) else column
|
|
1191
1248
|
|
|
@@ -1195,7 +1252,7 @@ def ensure_expr_or_series(column: IntoExprColumn, /) -> Expr | Series:
|
|
|
1195
1252
|
|
|
1196
1253
|
def ensure_expr_or_series_many(
|
|
1197
1254
|
*columns: IntoExprColumn, **named_columns: IntoExprColumn
|
|
1198
|
-
) -> Sequence[
|
|
1255
|
+
) -> Sequence[ExprOrSeries]:
|
|
1199
1256
|
"""Ensure a set of column expressions and/or Series are returned."""
|
|
1200
1257
|
args = map(ensure_expr_or_series, columns)
|
|
1201
1258
|
kwargs = (ensure_expr_or_series(v).alias(k) for k, v in named_columns.items())
|
|
@@ -1205,6 +1262,119 @@ def ensure_expr_or_series_many(
|
|
|
1205
1262
|
##
|
|
1206
1263
|
|
|
1207
1264
|
|
|
1265
|
+
def expr_to_series(expr: Expr, /) -> Series:
|
|
1266
|
+
"""Collect a column expression into a Series."""
|
|
1267
|
+
return one_column(DataFrame().with_columns(expr))
|
|
1268
|
+
|
|
1269
|
+
|
|
1270
|
+
##
|
|
1271
|
+
|
|
1272
|
+
|
|
1273
|
+
@overload
|
|
1274
|
+
def filter_date(
|
|
1275
|
+
column: ExprLike = "datetime",
|
|
1276
|
+
/,
|
|
1277
|
+
*,
|
|
1278
|
+
time_zone: ZoneInfo | None = None,
|
|
1279
|
+
include: MaybeIterable[whenever.Date] | None = None,
|
|
1280
|
+
exclude: MaybeIterable[whenever.Date] | None = None,
|
|
1281
|
+
) -> Expr: ...
|
|
1282
|
+
@overload
|
|
1283
|
+
def filter_date(
|
|
1284
|
+
column: Series,
|
|
1285
|
+
/,
|
|
1286
|
+
*,
|
|
1287
|
+
time_zone: ZoneInfo | None = None,
|
|
1288
|
+
include: MaybeIterable[whenever.Date] | None = None,
|
|
1289
|
+
exclude: MaybeIterable[whenever.Date] | None = None,
|
|
1290
|
+
) -> Series: ...
|
|
1291
|
+
@overload
|
|
1292
|
+
def filter_date(
|
|
1293
|
+
column: IntoExprColumn = "datetime",
|
|
1294
|
+
/,
|
|
1295
|
+
*,
|
|
1296
|
+
time_zone: ZoneInfo | None = None,
|
|
1297
|
+
include: MaybeIterable[whenever.Date] | None = None,
|
|
1298
|
+
exclude: MaybeIterable[whenever.Date] | None = None,
|
|
1299
|
+
) -> ExprOrSeries: ...
|
|
1300
|
+
def filter_date(
|
|
1301
|
+
column: IntoExprColumn = "datetime",
|
|
1302
|
+
/,
|
|
1303
|
+
*,
|
|
1304
|
+
time_zone: ZoneInfo | None = None,
|
|
1305
|
+
include: MaybeIterable[whenever.Date] | None = None,
|
|
1306
|
+
exclude: MaybeIterable[whenever.Date] | None = None,
|
|
1307
|
+
) -> ExprOrSeries:
|
|
1308
|
+
"""Compute the filter based on a set of dates."""
|
|
1309
|
+
column = ensure_expr_or_series(column)
|
|
1310
|
+
if time_zone is not None:
|
|
1311
|
+
column = column.dt.convert_time_zone(time_zone.key)
|
|
1312
|
+
keep = true_like(column)
|
|
1313
|
+
date = column.dt.date()
|
|
1314
|
+
include, exclude = resolve_include_and_exclude(include=include, exclude=exclude)
|
|
1315
|
+
if include is not None:
|
|
1316
|
+
keep &= date.is_in([d.py_date() for d in include])
|
|
1317
|
+
if exclude is not None:
|
|
1318
|
+
keep &= ~date.is_in([d.py_date() for d in exclude])
|
|
1319
|
+
return try_reify_expr(keep, column)
|
|
1320
|
+
|
|
1321
|
+
|
|
1322
|
+
@overload
|
|
1323
|
+
def filter_time(
|
|
1324
|
+
column: ExprLike = "datetime",
|
|
1325
|
+
/,
|
|
1326
|
+
*,
|
|
1327
|
+
time_zone: ZoneInfo | None = None,
|
|
1328
|
+
include: MaybeIterable[tuple[whenever.Time, whenever.Time]] | None = None,
|
|
1329
|
+
exclude: MaybeIterable[tuple[whenever.Time, whenever.Time]] | None = None,
|
|
1330
|
+
) -> Expr: ...
|
|
1331
|
+
@overload
|
|
1332
|
+
def filter_time(
|
|
1333
|
+
column: Series,
|
|
1334
|
+
/,
|
|
1335
|
+
*,
|
|
1336
|
+
time_zone: ZoneInfo | None = None,
|
|
1337
|
+
include: MaybeIterable[tuple[whenever.Time, whenever.Time]] | None = None,
|
|
1338
|
+
exclude: MaybeIterable[tuple[whenever.Time, whenever.Time]] | None = None,
|
|
1339
|
+
) -> Series: ...
|
|
1340
|
+
@overload
|
|
1341
|
+
def filter_time(
|
|
1342
|
+
column: IntoExprColumn = "datetime",
|
|
1343
|
+
/,
|
|
1344
|
+
*,
|
|
1345
|
+
time_zone: ZoneInfo | None = None,
|
|
1346
|
+
include: MaybeIterable[tuple[whenever.Time, whenever.Time]] | None = None,
|
|
1347
|
+
exclude: MaybeIterable[tuple[whenever.Time, whenever.Time]] | None = None,
|
|
1348
|
+
) -> ExprOrSeries: ...
|
|
1349
|
+
def filter_time(
|
|
1350
|
+
column: IntoExprColumn = "datetime",
|
|
1351
|
+
/,
|
|
1352
|
+
*,
|
|
1353
|
+
time_zone: ZoneInfo | None = None,
|
|
1354
|
+
include: MaybeIterable[tuple[whenever.Time, whenever.Time]] | None = None,
|
|
1355
|
+
exclude: MaybeIterable[tuple[whenever.Time, whenever.Time]] | None = None,
|
|
1356
|
+
) -> ExprOrSeries:
|
|
1357
|
+
"""Compute the filter based on a set of times."""
|
|
1358
|
+
column = ensure_expr_or_series(column)
|
|
1359
|
+
if time_zone is not None:
|
|
1360
|
+
column = column.dt.convert_time_zone(time_zone.key)
|
|
1361
|
+
keep = true_like(column)
|
|
1362
|
+
time = column.dt.time()
|
|
1363
|
+
include, exclude = resolve_include_and_exclude(include=include, exclude=exclude)
|
|
1364
|
+
if include is not None:
|
|
1365
|
+
keep &= any_horizontal(
|
|
1366
|
+
time.is_between(s.py_time(), e.py_time()) for s, e in include
|
|
1367
|
+
)
|
|
1368
|
+
if exclude is not None:
|
|
1369
|
+
keep &= ~any_horizontal(
|
|
1370
|
+
time.is_between(s.py_time(), e.py_time()) for s, e in exclude
|
|
1371
|
+
)
|
|
1372
|
+
return try_reify_expr(keep, column)
|
|
1373
|
+
|
|
1374
|
+
|
|
1375
|
+
##
|
|
1376
|
+
|
|
1377
|
+
|
|
1208
1378
|
@overload
|
|
1209
1379
|
def finite_ewm_mean(
|
|
1210
1380
|
column: ExprLike,
|
|
@@ -1237,7 +1407,7 @@ def finite_ewm_mean(
|
|
|
1237
1407
|
half_life: float | None = None,
|
|
1238
1408
|
alpha: float | None = None,
|
|
1239
1409
|
min_weight: float = _FINITE_EWM_MIN_WEIGHT,
|
|
1240
|
-
) ->
|
|
1410
|
+
) -> ExprOrSeries: ...
|
|
1241
1411
|
def finite_ewm_mean(
|
|
1242
1412
|
column: IntoExprColumn,
|
|
1243
1413
|
/,
|
|
@@ -1247,7 +1417,7 @@ def finite_ewm_mean(
|
|
|
1247
1417
|
half_life: float | None = None,
|
|
1248
1418
|
alpha: float | None = None,
|
|
1249
1419
|
min_weight: float = _FINITE_EWM_MIN_WEIGHT,
|
|
1250
|
-
) ->
|
|
1420
|
+
) -> ExprOrSeries:
|
|
1251
1421
|
"""Compute a finite EWMA."""
|
|
1252
1422
|
try:
|
|
1253
1423
|
weights = _finite_ewm_weights(
|
|
@@ -1309,23 +1479,14 @@ class _FiniteEWMWeightsError(Exception):
|
|
|
1309
1479
|
|
|
1310
1480
|
|
|
1311
1481
|
@overload
|
|
1312
|
-
def
|
|
1482
|
+
def first_true_horizontal(*columns: Series) -> Series: ...
|
|
1313
1483
|
@overload
|
|
1314
|
-
def
|
|
1315
|
-
|
|
1316
|
-
|
|
1317
|
-
|
|
1318
|
-
|
|
1319
|
-
|
|
1320
|
-
rounded = column.dt.round(every)
|
|
1321
|
-
floor = (
|
|
1322
|
-
when(column >= rounded)
|
|
1323
|
-
.then(rounded)
|
|
1324
|
-
.otherwise(column.dt.offset_by("-" + every).dt.round(every))
|
|
1325
|
-
)
|
|
1326
|
-
if isinstance(column, Expr):
|
|
1327
|
-
return floor
|
|
1328
|
-
return DataFrame().with_columns(floor.alias(column.name))[column.name]
|
|
1484
|
+
def first_true_horizontal(*columns: IntoExprColumn) -> ExprOrSeries: ...
|
|
1485
|
+
def first_true_horizontal(*columns: IntoExprColumn) -> ExprOrSeries:
|
|
1486
|
+
"""Get the index of the first true in each row."""
|
|
1487
|
+
columns2 = ensure_expr_or_series_many(*columns)
|
|
1488
|
+
expr = when(any_horizontal(*columns2)).then(concat_list(*columns2).list.arg_max())
|
|
1489
|
+
return try_reify_expr(expr, *columns2)
|
|
1329
1490
|
|
|
1330
1491
|
|
|
1331
1492
|
##
|
|
@@ -1342,13 +1503,24 @@ def get_data_type_or_series_time_zone(
|
|
|
1342
1503
|
dtype = dtype_cls()
|
|
1343
1504
|
case Series() as series:
|
|
1344
1505
|
dtype = series.dtype
|
|
1345
|
-
case
|
|
1506
|
+
case never:
|
|
1346
1507
|
assert_never(never)
|
|
1347
|
-
|
|
1348
|
-
|
|
1349
|
-
|
|
1350
|
-
|
|
1351
|
-
|
|
1508
|
+
match dtype:
|
|
1509
|
+
case Datetime() as datetime:
|
|
1510
|
+
if datetime.time_zone is None:
|
|
1511
|
+
raise _GetDataTypeOrSeriesTimeZoneNotZonedError(dtype=datetime)
|
|
1512
|
+
return ZoneInfo(datetime.time_zone)
|
|
1513
|
+
case Struct() as struct:
|
|
1514
|
+
try:
|
|
1515
|
+
return one({
|
|
1516
|
+
get_data_type_or_series_time_zone(f.dtype) for f in struct.fields
|
|
1517
|
+
})
|
|
1518
|
+
except OneNonUniqueError as error:
|
|
1519
|
+
raise _GetDataTypeOrSeriesTimeZoneStructNonUniqueError(
|
|
1520
|
+
dtype=struct, first=error.first, second=error.second
|
|
1521
|
+
) from None
|
|
1522
|
+
case _:
|
|
1523
|
+
raise _GetDataTypeOrSeriesTimeZoneNotDateTimeError(dtype=dtype)
|
|
1352
1524
|
|
|
1353
1525
|
|
|
1354
1526
|
@dataclass(kw_only=True, slots=True)
|
|
@@ -1370,6 +1542,18 @@ class _GetDataTypeOrSeriesTimeZoneNotZonedError(GetDataTypeOrSeriesTimeZoneError
|
|
|
1370
1542
|
return f"Data type must be zoned; got {self.dtype}"
|
|
1371
1543
|
|
|
1372
1544
|
|
|
1545
|
+
@dataclass(kw_only=True, slots=True)
|
|
1546
|
+
class _GetDataTypeOrSeriesTimeZoneStructNonUniqueError(
|
|
1547
|
+
GetDataTypeOrSeriesTimeZoneError
|
|
1548
|
+
):
|
|
1549
|
+
first: ZoneInfo
|
|
1550
|
+
second: ZoneInfo
|
|
1551
|
+
|
|
1552
|
+
@override
|
|
1553
|
+
def __str__(self) -> str:
|
|
1554
|
+
return f"Struct data type must contain exactly one time zone; got {self.first}, {self.second} and perhaps more"
|
|
1555
|
+
|
|
1556
|
+
|
|
1373
1557
|
##
|
|
1374
1558
|
|
|
1375
1559
|
|
|
@@ -1379,9 +1563,8 @@ def get_expr_name(obj: Series | DataFrame, expr: IntoExprColumn, /) -> str:
|
|
|
1379
1563
|
case Series() as series:
|
|
1380
1564
|
return get_expr_name(series.to_frame(), expr)
|
|
1381
1565
|
case DataFrame() as df:
|
|
1382
|
-
|
|
1383
|
-
|
|
1384
|
-
case _ as never:
|
|
1566
|
+
return one_column(df.select(expr)).name
|
|
1567
|
+
case never:
|
|
1385
1568
|
assert_never(never)
|
|
1386
1569
|
|
|
1387
1570
|
|
|
@@ -1403,50 +1586,31 @@ def get_frequency_spectrum(series: Series, /, *, d: int = 1) -> DataFrame:
|
|
|
1403
1586
|
|
|
1404
1587
|
|
|
1405
1588
|
@overload
|
|
1406
|
-
def
|
|
1407
|
-
series: Series, /, *, nullable: Literal[True]
|
|
1408
|
-
) -> int | None: ...
|
|
1589
|
+
def increasing_horizontal(*columns: ExprLike) -> Expr: ...
|
|
1409
1590
|
@overload
|
|
1410
|
-
def
|
|
1411
|
-
series: Series, /, *, nullable: Literal[False] = False
|
|
1412
|
-
) -> int: ...
|
|
1591
|
+
def increasing_horizontal(*columns: Series) -> Series: ...
|
|
1413
1592
|
@overload
|
|
1414
|
-
def
|
|
1415
|
-
|
|
1416
|
-
|
|
1417
|
-
|
|
1418
|
-
|
|
1419
|
-
|
|
1420
|
-
|
|
1421
|
-
if not isinstance(dtype := series.dtype, Float64):
|
|
1422
|
-
raise _GetSeriesNumberOfDecimalsNotFloatError(dtype=dtype)
|
|
1423
|
-
decimals = series.map_elements(number_of_decimals, return_dtype=Int64).max()
|
|
1424
|
-
try:
|
|
1425
|
-
return ensure_int(decimals, nullable=nullable)
|
|
1426
|
-
except EnsureIntError:
|
|
1427
|
-
raise _GetSeriesNumberOfDecimalsAllNullError(series=series) from None
|
|
1428
|
-
|
|
1429
|
-
|
|
1430
|
-
@dataclass(kw_only=True, slots=True)
|
|
1431
|
-
class GetSeriesNumberOfDecimalsError(Exception): ...
|
|
1432
|
-
|
|
1593
|
+
def increasing_horizontal(*columns: IntoExprColumn) -> ExprOrSeries: ...
|
|
1594
|
+
def increasing_horizontal(*columns: IntoExprColumn) -> ExprOrSeries:
|
|
1595
|
+
"""Check if a set of columns are increasing."""
|
|
1596
|
+
columns2 = ensure_expr_or_series_many(*columns)
|
|
1597
|
+
if len(columns2) == 0:
|
|
1598
|
+
return lit(value=True, dtype=Boolean)
|
|
1599
|
+
return all_horizontal(prev < curr for prev, curr in pairwise(columns2))
|
|
1433
1600
|
|
|
1434
|
-
@dataclass(kw_only=True, slots=True)
|
|
1435
|
-
class _GetSeriesNumberOfDecimalsNotFloatError(GetSeriesNumberOfDecimalsError):
|
|
1436
|
-
dtype: DataType
|
|
1437
|
-
|
|
1438
|
-
@override
|
|
1439
|
-
def __str__(self) -> str:
|
|
1440
|
-
return f"Data type must be Float64; got {self.dtype}"
|
|
1441
|
-
|
|
1442
|
-
|
|
1443
|
-
@dataclass(kw_only=True, slots=True)
|
|
1444
|
-
class _GetSeriesNumberOfDecimalsAllNullError(GetSeriesNumberOfDecimalsError):
|
|
1445
|
-
series: Series
|
|
1446
1601
|
|
|
1447
|
-
|
|
1448
|
-
|
|
1449
|
-
|
|
1602
|
+
@overload
|
|
1603
|
+
def decreasing_horizontal(*columns: ExprLike) -> Expr: ...
|
|
1604
|
+
@overload
|
|
1605
|
+
def decreasing_horizontal(*columns: Series) -> Series: ...
|
|
1606
|
+
@overload
|
|
1607
|
+
def decreasing_horizontal(*columns: IntoExprColumn) -> ExprOrSeries: ...
|
|
1608
|
+
def decreasing_horizontal(*columns: IntoExprColumn) -> ExprOrSeries:
|
|
1609
|
+
"""Check if a set of columns are decreasing."""
|
|
1610
|
+
columns2 = ensure_expr_or_series_many(*columns)
|
|
1611
|
+
if len(columns2) == 0:
|
|
1612
|
+
return lit(value=True, dtype=Boolean)
|
|
1613
|
+
return all_horizontal(prev > curr for prev, curr in pairwise(columns2))
|
|
1450
1614
|
|
|
1451
1615
|
|
|
1452
1616
|
##
|
|
@@ -1579,13 +1743,49 @@ def integers(
|
|
|
1579
1743
|
name=name,
|
|
1580
1744
|
dtype=dtype,
|
|
1581
1745
|
)
|
|
1582
|
-
case
|
|
1746
|
+
case never:
|
|
1583
1747
|
assert_never(never)
|
|
1584
1748
|
|
|
1585
1749
|
|
|
1586
1750
|
##
|
|
1587
1751
|
|
|
1588
1752
|
|
|
1753
|
+
@overload
|
|
1754
|
+
def is_close(
|
|
1755
|
+
x: ExprLike, y: ExprLike, /, *, rel_tol: float = 1e-9, abs_tol: float = 0
|
|
1756
|
+
) -> Expr: ...
|
|
1757
|
+
@overload
|
|
1758
|
+
def is_close(
|
|
1759
|
+
x: Series, y: Series, /, *, rel_tol: float = 1e-9, abs_tol: float = 0
|
|
1760
|
+
) -> Series: ...
|
|
1761
|
+
@overload
|
|
1762
|
+
def is_close(
|
|
1763
|
+
x: IntoExprColumn,
|
|
1764
|
+
y: IntoExprColumn,
|
|
1765
|
+
/,
|
|
1766
|
+
*,
|
|
1767
|
+
rel_tol: float = 1e-9,
|
|
1768
|
+
abs_tol: float = 0,
|
|
1769
|
+
) -> ExprOrSeries: ...
|
|
1770
|
+
def is_close(
|
|
1771
|
+
x: IntoExprColumn,
|
|
1772
|
+
y: IntoExprColumn,
|
|
1773
|
+
/,
|
|
1774
|
+
*,
|
|
1775
|
+
rel_tol: float = 1e-9,
|
|
1776
|
+
abs_tol: float = 0,
|
|
1777
|
+
) -> ExprOrSeries:
|
|
1778
|
+
"""Check if two columns are close."""
|
|
1779
|
+
x, y = map(ensure_expr_or_series, [x, y])
|
|
1780
|
+
result = (x - y).abs() <= max_horizontal(
|
|
1781
|
+
rel_tol * max_horizontal(x.abs(), y.abs()), abs_tol
|
|
1782
|
+
)
|
|
1783
|
+
return try_reify_expr(result, x, y)
|
|
1784
|
+
|
|
1785
|
+
|
|
1786
|
+
##
|
|
1787
|
+
|
|
1788
|
+
|
|
1589
1789
|
@overload
|
|
1590
1790
|
def is_near_event(
|
|
1591
1791
|
*exprs: ExprLike, before: int = 0, after: int = 0, **named_exprs: ExprLike
|
|
@@ -1600,13 +1800,13 @@ def is_near_event(
|
|
|
1600
1800
|
before: int = 0,
|
|
1601
1801
|
after: int = 0,
|
|
1602
1802
|
**named_exprs: IntoExprColumn,
|
|
1603
|
-
) ->
|
|
1803
|
+
) -> ExprOrSeries: ...
|
|
1604
1804
|
def is_near_event(
|
|
1605
1805
|
*exprs: IntoExprColumn,
|
|
1606
1806
|
before: int = 0,
|
|
1607
1807
|
after: int = 0,
|
|
1608
1808
|
**named_exprs: IntoExprColumn,
|
|
1609
|
-
) ->
|
|
1809
|
+
) -> ExprOrSeries:
|
|
1610
1810
|
"""Compute the rows near any event."""
|
|
1611
1811
|
if before <= -1:
|
|
1612
1812
|
raise _IsNearEventBeforeError(before=before)
|
|
@@ -1649,87 +1849,177 @@ class _IsNearEventAfterError(IsNearEventError):
|
|
|
1649
1849
|
##
|
|
1650
1850
|
|
|
1651
1851
|
|
|
1652
|
-
|
|
1653
|
-
|
|
1654
|
-
|
|
1655
|
-
|
|
1656
|
-
|
|
1657
|
-
|
|
1852
|
+
@overload
|
|
1853
|
+
def is_true(column: ExprLike, /) -> Expr: ...
|
|
1854
|
+
@overload
|
|
1855
|
+
def is_true(column: Series, /) -> Series: ...
|
|
1856
|
+
@overload
|
|
1857
|
+
def is_true(column: IntoExprColumn, /) -> ExprOrSeries: ...
|
|
1858
|
+
def is_true(column: IntoExprColumn, /) -> ExprOrSeries:
|
|
1859
|
+
"""Compute when a boolean series is True."""
|
|
1860
|
+
column = ensure_expr_or_series(column)
|
|
1861
|
+
return (column.is_not_null()) & column
|
|
1658
1862
|
|
|
1659
1863
|
|
|
1660
|
-
@
|
|
1661
|
-
|
|
1662
|
-
|
|
1864
|
+
@overload
|
|
1865
|
+
def is_false(column: ExprLike, /) -> Expr: ...
|
|
1866
|
+
@overload
|
|
1867
|
+
def is_false(column: Series, /) -> Series: ...
|
|
1868
|
+
@overload
|
|
1869
|
+
def is_false(column: IntoExprColumn, /) -> ExprOrSeries: ...
|
|
1870
|
+
def is_false(column: IntoExprColumn, /) -> ExprOrSeries:
|
|
1871
|
+
"""Compute when a boolean series is False."""
|
|
1872
|
+
column = ensure_expr_or_series(column)
|
|
1873
|
+
return (column.is_not_null()) & (~column)
|
|
1663
1874
|
|
|
1664
|
-
|
|
1665
|
-
|
|
1666
|
-
|
|
1875
|
+
|
|
1876
|
+
##
|
|
1877
|
+
|
|
1878
|
+
|
|
1879
|
+
def join(
|
|
1880
|
+
df: DataFrame,
|
|
1881
|
+
*dfs: DataFrame,
|
|
1882
|
+
on: MaybeIterable[str | Expr],
|
|
1883
|
+
how: JoinStrategy = "inner",
|
|
1884
|
+
validate: JoinValidation = "m:m",
|
|
1885
|
+
) -> DataFrame:
|
|
1886
|
+
"""Join a set of DataFrames."""
|
|
1887
|
+
on_use = on if isinstance(on, str | Expr) else list(on)
|
|
1888
|
+
|
|
1889
|
+
def inner(left: DataFrame, right: DataFrame, /) -> DataFrame:
|
|
1890
|
+
return left.join(right, on=on_use, how=how, validate=validate)
|
|
1891
|
+
|
|
1892
|
+
return reduce(inner, chain([df], dfs))
|
|
1667
1893
|
|
|
1668
1894
|
|
|
1669
1895
|
##
|
|
1670
1896
|
|
|
1671
1897
|
|
|
1672
|
-
def
|
|
1673
|
-
|
|
1674
|
-
|
|
1675
|
-
|
|
1676
|
-
|
|
1677
|
-
|
|
1678
|
-
|
|
1679
|
-
|
|
1680
|
-
|
|
1681
|
-
|
|
1898
|
+
def join_into_periods(
|
|
1899
|
+
left: DataFrame,
|
|
1900
|
+
right: DataFrame,
|
|
1901
|
+
/,
|
|
1902
|
+
*,
|
|
1903
|
+
on: str | None = None,
|
|
1904
|
+
left_on: str | None = None,
|
|
1905
|
+
right_on: str | None = None,
|
|
1906
|
+
suffix: str = "_right",
|
|
1907
|
+
) -> DataFrame:
|
|
1908
|
+
"""Join a pair of DataFrames on their periods; left in right."""
|
|
1909
|
+
match on, left_on, right_on:
|
|
1910
|
+
case None, None, None:
|
|
1911
|
+
return _join_into_periods_core(
|
|
1912
|
+
left, right, "datetime", "datetime", suffix=suffix
|
|
1913
|
+
)
|
|
1914
|
+
case str(), None, None:
|
|
1915
|
+
return _join_into_periods_core(left, right, on, on, suffix=suffix)
|
|
1916
|
+
case None, str(), str():
|
|
1917
|
+
return _join_into_periods_core(
|
|
1918
|
+
left, right, left_on, right_on, suffix=suffix
|
|
1919
|
+
)
|
|
1920
|
+
case _:
|
|
1921
|
+
raise _JoinIntoPeriodsArgumentsError(
|
|
1922
|
+
on=on, left_on=left_on, right_on=right_on
|
|
1923
|
+
)
|
|
1924
|
+
|
|
1925
|
+
|
|
1926
|
+
def _join_into_periods_core(
|
|
1927
|
+
left: DataFrame,
|
|
1928
|
+
right: DataFrame,
|
|
1929
|
+
left_on: str,
|
|
1930
|
+
right_on: str,
|
|
1931
|
+
/,
|
|
1932
|
+
*,
|
|
1933
|
+
suffix: str = "_right",
|
|
1934
|
+
) -> DataFrame:
|
|
1935
|
+
"""Join a pair of DataFrames on their periods; left in right."""
|
|
1936
|
+
_join_into_periods_check(left, left_on, "left")
|
|
1937
|
+
_join_into_periods_check(right, right_on, "right")
|
|
1938
|
+
joined = left.join_asof(
|
|
1939
|
+
right,
|
|
1940
|
+
left_on=col(left_on).struct["start"],
|
|
1941
|
+
right_on=col(right_on).struct["start"],
|
|
1942
|
+
strategy="backward",
|
|
1943
|
+
suffix=suffix,
|
|
1944
|
+
coalesce=False,
|
|
1945
|
+
)
|
|
1946
|
+
new = f"{left_on}{suffix}" if left_on == right_on else right_on
|
|
1947
|
+
new_col = col(new)
|
|
1948
|
+
is_correct = (new_col.struct["start"] <= col(left_on).struct["start"]) & (
|
|
1949
|
+
col(left_on).struct["end"] <= new_col.struct["end"]
|
|
1682
1950
|
)
|
|
1951
|
+
return joined.with_columns(when(is_correct).then(new_col))
|
|
1683
1952
|
|
|
1684
1953
|
|
|
1685
|
-
def
|
|
1686
|
-
|
|
1687
|
-
) ->
|
|
1688
|
-
|
|
1689
|
-
|
|
1690
|
-
|
|
1691
|
-
|
|
1692
|
-
|
|
1693
|
-
|
|
1694
|
-
|
|
1695
|
-
|
|
1954
|
+
def _join_into_periods_check(
|
|
1955
|
+
df: DataFrame, column: str, left_or_right: Literal["left", "right"], /
|
|
1956
|
+
) -> None:
|
|
1957
|
+
start = df[column].struct["start"]
|
|
1958
|
+
end = df[column].struct["end"]
|
|
1959
|
+
if not (start <= end).all():
|
|
1960
|
+
raise _JoinIntoPeriodsPeriodError(left_or_right=left_or_right, column=column)
|
|
1961
|
+
try:
|
|
1962
|
+
assert_series_equal(start, start.sort())
|
|
1963
|
+
except AssertionError:
|
|
1964
|
+
raise _JoinIntoPeriodsSortedError(
|
|
1965
|
+
left_or_right=left_or_right, column=column, start_or_end="start"
|
|
1966
|
+
) from None
|
|
1967
|
+
try:
|
|
1968
|
+
assert_series_equal(end, end.sort())
|
|
1969
|
+
except AssertionError:
|
|
1970
|
+
raise _JoinIntoPeriodsSortedError(
|
|
1971
|
+
left_or_right=left_or_right, column=column, start_or_end="end"
|
|
1972
|
+
) from None
|
|
1973
|
+
if (df.height >= 2) and (end[:-1] > start[1:]).any():
|
|
1974
|
+
raise _JoinIntoPeriodsOverlappingError(
|
|
1975
|
+
left_or_right=left_or_right, column=column
|
|
1976
|
+
)
|
|
1696
1977
|
|
|
1697
1978
|
|
|
1698
|
-
|
|
1699
|
-
|
|
1700
|
-
return reduce(_is_null_struct_to_expr_reducer, tail, col(head)).is_null()
|
|
1979
|
+
@dataclass(kw_only=True, slots=True)
|
|
1980
|
+
class JoinIntoPeriodsError(Exception): ...
|
|
1701
1981
|
|
|
1702
1982
|
|
|
1703
|
-
|
|
1704
|
-
|
|
1983
|
+
@dataclass(kw_only=True, slots=True)
|
|
1984
|
+
class _JoinIntoPeriodsArgumentsError(JoinIntoPeriodsError):
|
|
1985
|
+
on: str | None
|
|
1986
|
+
left_on: str | None
|
|
1987
|
+
right_on: str | None
|
|
1988
|
+
|
|
1989
|
+
@override
|
|
1990
|
+
def __str__(self) -> str:
|
|
1991
|
+
return f"Either 'on' must be given or 'left_on' and 'right_on' must be given; got {self.on!r}, {self.left_on!r} and {self.right_on!r}"
|
|
1705
1992
|
|
|
1706
1993
|
|
|
1707
1994
|
@dataclass(kw_only=True, slots=True)
|
|
1708
|
-
class
|
|
1709
|
-
|
|
1995
|
+
class _JoinIntoPeriodsPeriodError(JoinIntoPeriodsError):
|
|
1996
|
+
left_or_right: Literal["left", "right"]
|
|
1997
|
+
column: str
|
|
1710
1998
|
|
|
1711
1999
|
@override
|
|
1712
2000
|
def __str__(self) -> str:
|
|
1713
|
-
return f"
|
|
2001
|
+
return f"{self.left_or_right.title()} DataFrame column {self.column!r} must contain valid periods"
|
|
1714
2002
|
|
|
1715
2003
|
|
|
1716
|
-
|
|
2004
|
+
@dataclass(kw_only=True, slots=True)
|
|
2005
|
+
class _JoinIntoPeriodsSortedError(JoinIntoPeriodsError):
|
|
2006
|
+
left_or_right: Literal["left", "right"]
|
|
2007
|
+
column: str
|
|
2008
|
+
start_or_end: Literal["start", "end"]
|
|
1717
2009
|
|
|
2010
|
+
@override
|
|
2011
|
+
def __str__(self) -> str:
|
|
2012
|
+
return f"{self.left_or_right.title()} DataFrame column '{self.column}/{self.start_or_end}' must be sorted"
|
|
1718
2013
|
|
|
1719
|
-
def join(
|
|
1720
|
-
df: DataFrame,
|
|
1721
|
-
*dfs: DataFrame,
|
|
1722
|
-
on: MaybeIterable[str | Expr],
|
|
1723
|
-
how: JoinStrategy = "inner",
|
|
1724
|
-
validate: JoinValidation = "m:m",
|
|
1725
|
-
) -> DataFrame:
|
|
1726
|
-
"""Join a set of DataFrames."""
|
|
1727
|
-
on_use = on if isinstance(on, str | Expr) else list(on)
|
|
1728
2014
|
|
|
1729
|
-
|
|
1730
|
-
|
|
2015
|
+
@dataclass(kw_only=True, slots=True)
|
|
2016
|
+
class _JoinIntoPeriodsOverlappingError(JoinIntoPeriodsError):
|
|
2017
|
+
left_or_right: Literal["left", "right"]
|
|
2018
|
+
column: str
|
|
1731
2019
|
|
|
1732
|
-
|
|
2020
|
+
@override
|
|
2021
|
+
def __str__(self) -> str:
|
|
2022
|
+
return f"{self.left_or_right.title()} DataFrame column {self.column!r} must not contain overlaps"
|
|
1733
2023
|
|
|
1734
2024
|
|
|
1735
2025
|
##
|
|
@@ -1754,7 +2044,7 @@ def map_over_columns(
|
|
|
1754
2044
|
return _map_over_series_one(func, series)
|
|
1755
2045
|
case DataFrame() as df:
|
|
1756
2046
|
return df.select(*(_map_over_series_one(func, df[c]) for c in df.columns))
|
|
1757
|
-
case
|
|
2047
|
+
case never:
|
|
1758
2048
|
assert_never(never)
|
|
1759
2049
|
|
|
1760
2050
|
|
|
@@ -1769,46 +2059,74 @@ def _map_over_series_one(func: Callable[[Series], Series], series: Series, /) ->
|
|
|
1769
2059
|
##
|
|
1770
2060
|
|
|
1771
2061
|
|
|
1772
|
-
def nan_sum_agg(column: str | Expr,
|
|
2062
|
+
def nan_sum_agg(column: str | Expr, /) -> Expr:
|
|
1773
2063
|
"""Nan sum aggregation."""
|
|
1774
2064
|
col_use = col(column) if isinstance(column, str) else column
|
|
1775
|
-
return (
|
|
1776
|
-
when(col_use.is_not_null().any())
|
|
1777
|
-
.then(col_use.sum())
|
|
1778
|
-
.otherwise(lit(None, dtype=dtype))
|
|
1779
|
-
)
|
|
2065
|
+
return when(col_use.is_not_null().any()).then(col_use.sum())
|
|
1780
2066
|
|
|
1781
2067
|
|
|
1782
2068
|
##
|
|
1783
2069
|
|
|
1784
2070
|
|
|
1785
|
-
|
|
1786
|
-
|
|
1787
|
-
|
|
2071
|
+
@overload
|
|
2072
|
+
def nan_sum_horizontal(*columns: Series) -> Series: ...
|
|
2073
|
+
@overload
|
|
2074
|
+
def nan_sum_horizontal(*columns: IntoExprColumn) -> ExprOrSeries: ...
|
|
2075
|
+
def nan_sum_horizontal(*columns: IntoExprColumn) -> ExprOrSeries:
|
|
1788
2076
|
"""Nan sum across columns."""
|
|
1789
|
-
|
|
1790
|
-
|
|
1791
|
-
|
|
2077
|
+
columns2 = ensure_expr_or_series_many(*columns)
|
|
2078
|
+
expr = when(any_horizontal(*(c.is_not_null() for c in columns2))).then(
|
|
2079
|
+
sum_horizontal(*columns2)
|
|
1792
2080
|
)
|
|
2081
|
+
return try_reify_expr(expr, *columns2)
|
|
2082
|
+
|
|
2083
|
+
|
|
2084
|
+
##
|
|
1793
2085
|
|
|
1794
|
-
def func(x: Expr, y: Expr, /) -> Expr:
|
|
1795
|
-
return (
|
|
1796
|
-
when(x.is_not_null() & y.is_not_null())
|
|
1797
|
-
.then(x + y)
|
|
1798
|
-
.when(x.is_not_null() & y.is_null())
|
|
1799
|
-
.then(x)
|
|
1800
|
-
.when(x.is_null() & y.is_not_null())
|
|
1801
|
-
.then(y)
|
|
1802
|
-
.otherwise(lit(None, dtype=dtype))
|
|
1803
|
-
)
|
|
1804
2086
|
|
|
1805
|
-
|
|
2087
|
+
@overload
|
|
2088
|
+
def normal_pdf(
|
|
2089
|
+
x: ExprLike,
|
|
2090
|
+
/,
|
|
2091
|
+
*,
|
|
2092
|
+
loc: float | IntoExprColumn = 0.0,
|
|
2093
|
+
scale: float | IntoExprColumn = 1.0,
|
|
2094
|
+
) -> Expr: ...
|
|
2095
|
+
@overload
|
|
2096
|
+
def normal_pdf(
|
|
2097
|
+
x: Series,
|
|
2098
|
+
/,
|
|
2099
|
+
*,
|
|
2100
|
+
loc: float | IntoExprColumn = 0.0,
|
|
2101
|
+
scale: float | IntoExprColumn = 1.0,
|
|
2102
|
+
) -> Series: ...
|
|
2103
|
+
@overload
|
|
2104
|
+
def normal_pdf(
|
|
2105
|
+
x: IntoExprColumn,
|
|
2106
|
+
/,
|
|
2107
|
+
*,
|
|
2108
|
+
loc: float | IntoExprColumn = 0.0,
|
|
2109
|
+
scale: float | IntoExprColumn = 1.0,
|
|
2110
|
+
) -> ExprOrSeries: ...
|
|
2111
|
+
def normal_pdf(
|
|
2112
|
+
x: IntoExprColumn,
|
|
2113
|
+
/,
|
|
2114
|
+
*,
|
|
2115
|
+
loc: float | IntoExprColumn = 0.0,
|
|
2116
|
+
scale: float | IntoExprColumn = 1.0,
|
|
2117
|
+
) -> ExprOrSeries:
|
|
2118
|
+
"""Compute the PDF of a normal distribution."""
|
|
2119
|
+
x = ensure_expr_or_series(x)
|
|
2120
|
+
loc = loc if isinstance(loc, int | float) else ensure_expr_or_series(loc)
|
|
2121
|
+
scale = scale if isinstance(scale, int | float) else ensure_expr_or_series(scale)
|
|
2122
|
+
expr = (1 / (scale * sqrt(2 * pi))) * (-(1 / 2) * ((x - loc) / scale) ** 2).exp()
|
|
2123
|
+
return try_reify_expr(expr, x)
|
|
1806
2124
|
|
|
1807
2125
|
|
|
1808
2126
|
##
|
|
1809
2127
|
|
|
1810
2128
|
|
|
1811
|
-
def
|
|
2129
|
+
def normal_rv(
|
|
1812
2130
|
obj: int | Series | DataFrame,
|
|
1813
2131
|
/,
|
|
1814
2132
|
*,
|
|
@@ -1827,20 +2145,191 @@ def normal(
|
|
|
1827
2145
|
values = rng.normal(loc=loc, scale=scale, size=height)
|
|
1828
2146
|
return Series(name=name, values=values, dtype=dtype)
|
|
1829
2147
|
case Series() as series:
|
|
1830
|
-
return
|
|
2148
|
+
return normal_rv(
|
|
1831
2149
|
series.len(), loc=loc, scale=scale, seed=seed, name=name, dtype=dtype
|
|
1832
2150
|
)
|
|
1833
2151
|
case DataFrame() as df:
|
|
1834
|
-
return
|
|
2152
|
+
return normal_rv(
|
|
1835
2153
|
df.height, loc=loc, scale=scale, seed=seed, name=name, dtype=dtype
|
|
1836
2154
|
)
|
|
1837
|
-
case
|
|
2155
|
+
case never:
|
|
1838
2156
|
assert_never(never)
|
|
1839
2157
|
|
|
1840
2158
|
|
|
1841
2159
|
##
|
|
1842
2160
|
|
|
1843
2161
|
|
|
2162
|
+
@overload
|
|
2163
|
+
def number_of_decimals(
|
|
2164
|
+
column: ExprLike, /, *, max_decimals: int = MAX_DECIMALS
|
|
2165
|
+
) -> Expr: ...
|
|
2166
|
+
@overload
|
|
2167
|
+
def number_of_decimals(
|
|
2168
|
+
column: Series, /, *, max_decimals: int = MAX_DECIMALS
|
|
2169
|
+
) -> Series: ...
|
|
2170
|
+
@overload
|
|
2171
|
+
def number_of_decimals(
|
|
2172
|
+
column: IntoExprColumn, /, *, max_decimals: int = MAX_DECIMALS
|
|
2173
|
+
) -> ExprOrSeries: ...
|
|
2174
|
+
def number_of_decimals(
|
|
2175
|
+
column: IntoExprColumn, /, *, max_decimals: int = MAX_DECIMALS
|
|
2176
|
+
) -> ExprOrSeries:
|
|
2177
|
+
"""Get the number of decimals."""
|
|
2178
|
+
column = ensure_expr_or_series(column)
|
|
2179
|
+
frac = column - column.floor()
|
|
2180
|
+
results = (
|
|
2181
|
+
_number_of_decimals_check_scale(frac, s) for s in range(max_decimals + 1)
|
|
2182
|
+
)
|
|
2183
|
+
return first_true_horizontal(*results)
|
|
2184
|
+
|
|
2185
|
+
|
|
2186
|
+
def _number_of_decimals_check_scale(frac: ExprOrSeries, scale: int, /) -> ExprOrSeries:
|
|
2187
|
+
scaled = 10**scale * frac
|
|
2188
|
+
return is_close(scaled, scaled.round()).alias(str(scale))
|
|
2189
|
+
|
|
2190
|
+
|
|
2191
|
+
##
|
|
2192
|
+
|
|
2193
|
+
|
|
2194
|
+
def offset_datetime(
|
|
2195
|
+
datetime: ZonedDateTime, offset: str, /, *, n: int = 1
|
|
2196
|
+
) -> ZonedDateTime:
|
|
2197
|
+
"""Offset a datetime as `polars` would."""
|
|
2198
|
+
sr = Series(values=[datetime.py_datetime()])
|
|
2199
|
+
for _ in range(n):
|
|
2200
|
+
sr = sr.dt.offset_by(offset)
|
|
2201
|
+
return ZonedDateTime.from_py_datetime(sr.item())
|
|
2202
|
+
|
|
2203
|
+
|
|
2204
|
+
##
|
|
2205
|
+
|
|
2206
|
+
|
|
2207
|
+
def one_column(df: DataFrame, /) -> Series:
|
|
2208
|
+
"""Return the unique column in a DataFrame."""
|
|
2209
|
+
try:
|
|
2210
|
+
return df[one(df.columns)]
|
|
2211
|
+
except OneEmptyError:
|
|
2212
|
+
raise OneColumnEmptyError(df=df) from None
|
|
2213
|
+
except OneNonUniqueError as error:
|
|
2214
|
+
raise OneColumnNonUniqueError(
|
|
2215
|
+
df=df, first=error.first, second=error.second
|
|
2216
|
+
) from None
|
|
2217
|
+
|
|
2218
|
+
|
|
2219
|
+
@dataclass(kw_only=True, slots=True)
|
|
2220
|
+
class OneColumnError(Exception):
|
|
2221
|
+
df: DataFrame
|
|
2222
|
+
|
|
2223
|
+
|
|
2224
|
+
@dataclass(kw_only=True, slots=True)
|
|
2225
|
+
class OneColumnEmptyError(OneColumnError):
|
|
2226
|
+
@override
|
|
2227
|
+
def __str__(self) -> str:
|
|
2228
|
+
return "DataFrame must not be empty"
|
|
2229
|
+
|
|
2230
|
+
|
|
2231
|
+
@dataclass(kw_only=True, slots=True)
|
|
2232
|
+
class OneColumnNonUniqueError(OneColumnError):
|
|
2233
|
+
first: str
|
|
2234
|
+
second: str
|
|
2235
|
+
|
|
2236
|
+
@override
|
|
2237
|
+
def __str__(self) -> str:
|
|
2238
|
+
return f"DataFrame must contain exactly one column; got {self.first!r}, {self.second!r} and perhaps more"
|
|
2239
|
+
|
|
2240
|
+
|
|
2241
|
+
##
|
|
2242
|
+
|
|
2243
|
+
|
|
2244
|
+
@overload
|
|
2245
|
+
def order_of_magnitude(column: ExprLike, /, *, round_: bool = False) -> Expr: ...
|
|
2246
|
+
@overload
|
|
2247
|
+
def order_of_magnitude(column: Series, /, *, round_: bool = False) -> Series: ...
|
|
2248
|
+
@overload
|
|
2249
|
+
def order_of_magnitude(
|
|
2250
|
+
column: IntoExprColumn, /, *, round_: bool = False
|
|
2251
|
+
) -> ExprOrSeries: ...
|
|
2252
|
+
def order_of_magnitude(
|
|
2253
|
+
column: IntoExprColumn, /, *, round_: bool = False
|
|
2254
|
+
) -> ExprOrSeries:
|
|
2255
|
+
"""Compute the order of magnitude of a column."""
|
|
2256
|
+
column = ensure_expr_or_series(column)
|
|
2257
|
+
result = column.abs().log10()
|
|
2258
|
+
return result.round().cast(Int64) if round_ else result
|
|
2259
|
+
|
|
2260
|
+
|
|
2261
|
+
##
|
|
2262
|
+
|
|
2263
|
+
|
|
2264
|
+
@overload
|
|
2265
|
+
def period_range(
|
|
2266
|
+
start: ZonedDateTime,
|
|
2267
|
+
end_or_length: ZonedDateTime | int,
|
|
2268
|
+
/,
|
|
2269
|
+
*,
|
|
2270
|
+
interval: str = "1d",
|
|
2271
|
+
time_unit: TimeUnit | None = None,
|
|
2272
|
+
time_zone: TimeZoneLike | None = None,
|
|
2273
|
+
eager: Literal[True],
|
|
2274
|
+
) -> Series: ...
|
|
2275
|
+
@overload
|
|
2276
|
+
def period_range(
|
|
2277
|
+
start: ZonedDateTime,
|
|
2278
|
+
end_or_length: ZonedDateTime | int,
|
|
2279
|
+
/,
|
|
2280
|
+
*,
|
|
2281
|
+
interval: str = "1d",
|
|
2282
|
+
time_unit: TimeUnit | None = None,
|
|
2283
|
+
time_zone: TimeZoneLike | None = None,
|
|
2284
|
+
eager: Literal[False] = False,
|
|
2285
|
+
) -> Expr: ...
|
|
2286
|
+
@overload
|
|
2287
|
+
def period_range(
|
|
2288
|
+
start: ZonedDateTime,
|
|
2289
|
+
end_or_length: ZonedDateTime | int,
|
|
2290
|
+
/,
|
|
2291
|
+
*,
|
|
2292
|
+
interval: str = "1d",
|
|
2293
|
+
time_unit: TimeUnit | None = None,
|
|
2294
|
+
time_zone: TimeZoneLike | None = None,
|
|
2295
|
+
eager: bool = False,
|
|
2296
|
+
) -> Series | Expr: ...
|
|
2297
|
+
def period_range(
|
|
2298
|
+
start: ZonedDateTime,
|
|
2299
|
+
end_or_length: ZonedDateTime | int,
|
|
2300
|
+
/,
|
|
2301
|
+
*,
|
|
2302
|
+
interval: str = "1d",
|
|
2303
|
+
time_unit: TimeUnit | None = None,
|
|
2304
|
+
time_zone: TimeZoneLike | None = None,
|
|
2305
|
+
eager: bool = False,
|
|
2306
|
+
) -> Series | Expr:
|
|
2307
|
+
"""Construct a period range."""
|
|
2308
|
+
time_zone_use = None if time_zone is None else to_time_zone_name(time_zone)
|
|
2309
|
+
match end_or_length:
|
|
2310
|
+
case ZonedDateTime() as end:
|
|
2311
|
+
...
|
|
2312
|
+
case int() as length:
|
|
2313
|
+
end = offset_datetime(start, interval, n=length)
|
|
2314
|
+
case never:
|
|
2315
|
+
assert_never(never)
|
|
2316
|
+
starts = datetime_range(
|
|
2317
|
+
start.py_datetime(),
|
|
2318
|
+
end.py_datetime(),
|
|
2319
|
+
interval,
|
|
2320
|
+
closed="left",
|
|
2321
|
+
time_unit=time_unit,
|
|
2322
|
+
time_zone=time_zone_use,
|
|
2323
|
+
eager=eager,
|
|
2324
|
+
).alias("start")
|
|
2325
|
+
ends = starts.dt.offset_by(interval).alias("end")
|
|
2326
|
+
period = struct(starts, ends)
|
|
2327
|
+
return try_reify_expr(period, starts, ends)
|
|
2328
|
+
|
|
2329
|
+
|
|
2330
|
+
##
|
|
2331
|
+
|
|
2332
|
+
|
|
1844
2333
|
def reify_exprs(
|
|
1845
2334
|
*exprs: IntoExprColumn, **named_exprs: IntoExprColumn
|
|
1846
2335
|
) -> Expr | Series | DataFrame:
|
|
@@ -1873,13 +2362,10 @@ def reify_exprs(
|
|
|
1873
2362
|
.with_columns(*all_exprs)
|
|
1874
2363
|
.drop("_index")
|
|
1875
2364
|
)
|
|
1876
|
-
|
|
1877
|
-
|
|
1878
|
-
|
|
1879
|
-
|
|
1880
|
-
return df[one(df.columns)]
|
|
1881
|
-
case _:
|
|
1882
|
-
return df
|
|
2365
|
+
try:
|
|
2366
|
+
return one_column(df)
|
|
2367
|
+
except OneColumnNonUniqueError:
|
|
2368
|
+
return df
|
|
1883
2369
|
|
|
1884
2370
|
|
|
1885
2371
|
@dataclass(kw_only=True, slots=True)
|
|
@@ -1929,7 +2415,7 @@ def _replace_time_zone_one(
|
|
|
1929
2415
|
sr: Series, /, *, time_zone: TimeZoneLike | None = UTC
|
|
1930
2416
|
) -> Series:
|
|
1931
2417
|
if isinstance(sr.dtype, Datetime):
|
|
1932
|
-
time_zone_use = None if time_zone is None else
|
|
2418
|
+
time_zone_use = None if time_zone is None else to_time_zone_name(time_zone)
|
|
1933
2419
|
return sr.dt.replace_time_zone(time_zone_use)
|
|
1934
2420
|
return sr
|
|
1935
2421
|
|
|
@@ -1937,6 +2423,254 @@ def _replace_time_zone_one(
|
|
|
1937
2423
|
##
|
|
1938
2424
|
|
|
1939
2425
|
|
|
2426
|
+
def read_series(path: PathLike, /, *, decompress: bool = False) -> Series:
|
|
2427
|
+
"""Read a Series from disk."""
|
|
2428
|
+
data = read_binary(path, decompress=decompress)
|
|
2429
|
+
return deserialize_series(data)
|
|
2430
|
+
|
|
2431
|
+
|
|
2432
|
+
def write_series(
|
|
2433
|
+
series: Series,
|
|
2434
|
+
path: PathLike,
|
|
2435
|
+
/,
|
|
2436
|
+
*,
|
|
2437
|
+
compress: bool = False,
|
|
2438
|
+
overwrite: bool = False,
|
|
2439
|
+
) -> None:
|
|
2440
|
+
"""Write a Series to disk."""
|
|
2441
|
+
data = serialize_series(series)
|
|
2442
|
+
write_formatted_json(data, path, compress=compress, overwrite=overwrite)
|
|
2443
|
+
|
|
2444
|
+
|
|
2445
|
+
def read_dataframe(path: PathLike, /, *, decompress: bool = False) -> DataFrame:
|
|
2446
|
+
"""Read a DataFrame from disk."""
|
|
2447
|
+
data = read_binary(path, decompress=decompress)
|
|
2448
|
+
return deserialize_dataframe(data)
|
|
2449
|
+
|
|
2450
|
+
|
|
2451
|
+
def write_dataframe(
|
|
2452
|
+
df: DataFrame, path: PathLike, /, *, compress: bool = False, overwrite: bool = False
|
|
2453
|
+
) -> None:
|
|
2454
|
+
"""Write a DataFrame to disk."""
|
|
2455
|
+
data = serialize_dataframe(df)
|
|
2456
|
+
write_formatted_json(data, path, compress=compress, overwrite=overwrite)
|
|
2457
|
+
|
|
2458
|
+
|
|
2459
|
+
def serialize_series(series: Series, /) -> bytes:
|
|
2460
|
+
"""Serialize a Series."""
|
|
2461
|
+
from utilities.orjson import serialize
|
|
2462
|
+
|
|
2463
|
+
values = series.to_list()
|
|
2464
|
+
decon = _deconstruct_dtype(series.dtype)
|
|
2465
|
+
return serialize((series.name, values, decon))
|
|
2466
|
+
|
|
2467
|
+
|
|
2468
|
+
def deserialize_series(data: bytes, /) -> Series:
|
|
2469
|
+
"""Serialize a Series."""
|
|
2470
|
+
from utilities.orjson import deserialize
|
|
2471
|
+
|
|
2472
|
+
name, values, decon = deserialize(data)
|
|
2473
|
+
dtype = _reconstruct_dtype(decon)
|
|
2474
|
+
return Series(name=name, values=values, dtype=dtype)
|
|
2475
|
+
|
|
2476
|
+
|
|
2477
|
+
def serialize_dataframe(df: DataFrame, /) -> bytes:
|
|
2478
|
+
"""Serialize a DataFrame."""
|
|
2479
|
+
from utilities.orjson import serialize
|
|
2480
|
+
|
|
2481
|
+
rows = df.rows()
|
|
2482
|
+
decon = _deconstruct_schema(df.schema)
|
|
2483
|
+
return serialize((rows, decon))
|
|
2484
|
+
|
|
2485
|
+
|
|
2486
|
+
def deserialize_dataframe(data: bytes, /) -> DataFrame:
|
|
2487
|
+
"""Serialize a DataFrame."""
|
|
2488
|
+
from utilities.orjson import deserialize
|
|
2489
|
+
|
|
2490
|
+
rows, decon = deserialize(data)
|
|
2491
|
+
schema = _reconstruct_schema(decon)
|
|
2492
|
+
return DataFrame(data=rows, schema=schema, orient="row")
|
|
2493
|
+
|
|
2494
|
+
|
|
2495
|
+
type _DeconSchema = Sequence[tuple[str, _DeconDType]]
|
|
2496
|
+
type _DeconDType = (
|
|
2497
|
+
str
|
|
2498
|
+
| tuple[Literal["Datetime"], str, str | None]
|
|
2499
|
+
| tuple[Literal["List"], _DeconDType]
|
|
2500
|
+
| tuple[Literal["Struct"], _DeconSchema]
|
|
2501
|
+
)
|
|
2502
|
+
|
|
2503
|
+
|
|
2504
|
+
def _deconstruct_schema(schema: Schema, /) -> _DeconSchema:
|
|
2505
|
+
return [(k, _deconstruct_dtype(v)) for k, v in schema.items()]
|
|
2506
|
+
|
|
2507
|
+
|
|
2508
|
+
def _deconstruct_dtype(dtype: PolarsDataType, /) -> _DeconDType:
|
|
2509
|
+
match dtype:
|
|
2510
|
+
case List() as list_:
|
|
2511
|
+
return "List", _deconstruct_dtype(list_.inner)
|
|
2512
|
+
case Struct() as struct:
|
|
2513
|
+
inner = Schema({f.name: f.dtype for f in struct.fields})
|
|
2514
|
+
return "Struct", _deconstruct_schema(inner)
|
|
2515
|
+
case Datetime() as datetime:
|
|
2516
|
+
return "Datetime", datetime.time_unit, datetime.time_zone
|
|
2517
|
+
case _:
|
|
2518
|
+
return repr(dtype)
|
|
2519
|
+
|
|
2520
|
+
|
|
2521
|
+
def _reconstruct_schema(schema: _DeconSchema, /) -> Schema:
|
|
2522
|
+
return Schema({k: _reconstruct_dtype(v) for k, v in schema})
|
|
2523
|
+
|
|
2524
|
+
|
|
2525
|
+
def _reconstruct_dtype(obj: _DeconDType, /) -> PolarsDataType:
|
|
2526
|
+
match obj:
|
|
2527
|
+
case str() as name:
|
|
2528
|
+
return getattr(pl, name)
|
|
2529
|
+
case "Datetime", str() as time_unit, str() | None as time_zone:
|
|
2530
|
+
return Datetime(time_unit=cast("TimeUnit", time_unit), time_zone=time_zone)
|
|
2531
|
+
case "List", inner:
|
|
2532
|
+
return List(_reconstruct_dtype(inner))
|
|
2533
|
+
case "Struct", inner:
|
|
2534
|
+
return Struct(_reconstruct_schema(inner))
|
|
2535
|
+
case never:
|
|
2536
|
+
assert_never(never)
|
|
2537
|
+
|
|
2538
|
+
|
|
2539
|
+
##
|
|
2540
|
+
|
|
2541
|
+
|
|
2542
|
+
@overload
|
|
2543
|
+
def round_to_float(
|
|
2544
|
+
x: ExprLike, y: float, /, *, mode: RoundMode = "half_to_even"
|
|
2545
|
+
) -> Expr: ...
|
|
2546
|
+
@overload
|
|
2547
|
+
def round_to_float(
|
|
2548
|
+
x: Series, y: float | ExprOrSeries, /, *, mode: RoundMode = "half_to_even"
|
|
2549
|
+
) -> Series: ...
|
|
2550
|
+
@overload
|
|
2551
|
+
def round_to_float(
|
|
2552
|
+
x: ExprLike, y: Series, /, *, mode: RoundMode = "half_to_even"
|
|
2553
|
+
) -> Series: ...
|
|
2554
|
+
@overload
|
|
2555
|
+
def round_to_float(
|
|
2556
|
+
x: ExprLike, y: Expr, /, *, mode: RoundMode = "half_to_even"
|
|
2557
|
+
) -> Expr: ...
|
|
2558
|
+
@overload
|
|
2559
|
+
def round_to_float(
|
|
2560
|
+
x: IntoExprColumn, y: float | Series, /, *, mode: RoundMode = "half_to_even"
|
|
2561
|
+
) -> ExprOrSeries: ...
|
|
2562
|
+
def round_to_float(
|
|
2563
|
+
x: IntoExprColumn, y: float | IntoExprColumn, /, *, mode: RoundMode = "half_to_even"
|
|
2564
|
+
) -> ExprOrSeries:
|
|
2565
|
+
"""Round a column to the nearest multiple of another float."""
|
|
2566
|
+
x = ensure_expr_or_series(x)
|
|
2567
|
+
y = y if isinstance(y, int | float) else ensure_expr_or_series(y)
|
|
2568
|
+
match x, y:
|
|
2569
|
+
case Expr() | Series(), int() | float():
|
|
2570
|
+
z = (x / y).round(mode=mode) * y
|
|
2571
|
+
return z.round(decimals=utilities.math.number_of_decimals(y) + 1)
|
|
2572
|
+
case Series(), Expr() | Series():
|
|
2573
|
+
df = (
|
|
2574
|
+
x
|
|
2575
|
+
.to_frame()
|
|
2576
|
+
.with_columns(y)
|
|
2577
|
+
.with_columns(number_of_decimals(y).alias("_decimals"))
|
|
2578
|
+
.with_row_index(name="_index")
|
|
2579
|
+
.group_by("_decimals")
|
|
2580
|
+
.map_groups(_round_to_float_one)
|
|
2581
|
+
.sort("_index")
|
|
2582
|
+
)
|
|
2583
|
+
return df[df.columns[1]]
|
|
2584
|
+
case Expr(), Series():
|
|
2585
|
+
df = y.to_frame().with_columns(x)
|
|
2586
|
+
return round_to_float(df[df.columns[1]], df[df.columns[0]], mode=mode)
|
|
2587
|
+
case Expr(), Expr() | str():
|
|
2588
|
+
raise RoundToFloatError(x=x, y=y)
|
|
2589
|
+
case never:
|
|
2590
|
+
assert_never(never)
|
|
2591
|
+
|
|
2592
|
+
|
|
2593
|
+
def _round_to_float_one(df: DataFrame, /) -> DataFrame:
|
|
2594
|
+
decimals: int | None = df["_decimals"].unique().item()
|
|
2595
|
+
name = df.columns[1]
|
|
2596
|
+
match decimals:
|
|
2597
|
+
case int():
|
|
2598
|
+
expr = col(name).round(decimals=decimals)
|
|
2599
|
+
case None:
|
|
2600
|
+
expr = lit(None, dtype=Float64).alias(name)
|
|
2601
|
+
case never:
|
|
2602
|
+
assert_never(never)
|
|
2603
|
+
return df.with_columns(expr)
|
|
2604
|
+
|
|
2605
|
+
|
|
2606
|
+
@dataclass(kw_only=True, slots=True)
|
|
2607
|
+
class RoundToFloatError(Exception):
|
|
2608
|
+
x: IntoExprColumn
|
|
2609
|
+
y: IntoExprColumn
|
|
2610
|
+
|
|
2611
|
+
@override
|
|
2612
|
+
def __str__(self) -> str:
|
|
2613
|
+
return f"At least 1 of the dividend and/or divisor must be a Series; got {get_class_name(self.x)!r} and {get_class_name(self.y)!r}"
|
|
2614
|
+
|
|
2615
|
+
|
|
2616
|
+
##
|
|
2617
|
+
|
|
2618
|
+
|
|
2619
|
+
def search_period(
|
|
2620
|
+
series: Series,
|
|
2621
|
+
date_time: ZonedDateTime,
|
|
2622
|
+
/,
|
|
2623
|
+
*,
|
|
2624
|
+
start_or_end: Literal["start", "end"] = "end",
|
|
2625
|
+
) -> int | None:
|
|
2626
|
+
"""Search a series of periods for the one containing a given date-time."""
|
|
2627
|
+
end = series.struct["end"]
|
|
2628
|
+
py_date_time = date_time.py_datetime()
|
|
2629
|
+
match start_or_end:
|
|
2630
|
+
case "start":
|
|
2631
|
+
index = end.search_sorted(py_date_time, side="right")
|
|
2632
|
+
if index >= len(series):
|
|
2633
|
+
return None
|
|
2634
|
+
item: dt.datetime = series[index]["start"]
|
|
2635
|
+
return index if py_date_time >= item else None
|
|
2636
|
+
case "end":
|
|
2637
|
+
index = end.search_sorted(py_date_time, side="left")
|
|
2638
|
+
if index >= len(series):
|
|
2639
|
+
return None
|
|
2640
|
+
item: dt.datetime = series[index]["start"]
|
|
2641
|
+
return index if py_date_time > item else None
|
|
2642
|
+
case never:
|
|
2643
|
+
assert_never(never)
|
|
2644
|
+
|
|
2645
|
+
|
|
2646
|
+
##
|
|
2647
|
+
|
|
2648
|
+
|
|
2649
|
+
def select_exact(
|
|
2650
|
+
df: DataFrame, /, *columns: IntoExprColumn, drop: MaybeIterable[str] | None = None
|
|
2651
|
+
) -> DataFrame:
|
|
2652
|
+
"""Select an exact set of columns from a DataFrame."""
|
|
2653
|
+
names = [get_expr_name(df, c) for c in columns]
|
|
2654
|
+
drop = set() if drop is None else set(always_iterable(drop))
|
|
2655
|
+
union = set(names) | drop
|
|
2656
|
+
extra = [c for c in df.columns if c not in union]
|
|
2657
|
+
if len(extra) >= 1:
|
|
2658
|
+
raise SelectExactError(columns=extra)
|
|
2659
|
+
return df.select(*columns)
|
|
2660
|
+
|
|
2661
|
+
|
|
2662
|
+
@dataclass(kw_only=True, slots=True)
|
|
2663
|
+
class SelectExactError(Exception):
|
|
2664
|
+
columns: list[str]
|
|
2665
|
+
|
|
2666
|
+
@override
|
|
2667
|
+
def __str__(self) -> str:
|
|
2668
|
+
return f"All columns must be selected; got {get_repr(self.columns)} remaining"
|
|
2669
|
+
|
|
2670
|
+
|
|
2671
|
+
##
|
|
2672
|
+
|
|
2673
|
+
|
|
1940
2674
|
def set_first_row_as_columns(df: DataFrame, /) -> DataFrame:
|
|
1941
2675
|
"""Set the first row of a DataFrame as its columns."""
|
|
1942
2676
|
try:
|
|
@@ -1967,79 +2701,79 @@ def struct_dtype(**kwargs: PolarsDataType) -> Struct:
|
|
|
1967
2701
|
##
|
|
1968
2702
|
|
|
1969
2703
|
|
|
1970
|
-
|
|
1971
|
-
|
|
1972
|
-
|
|
1973
|
-
|
|
1974
|
-
|
|
1975
|
-
|
|
1976
|
-
|
|
1977
|
-
|
|
1978
|
-
|
|
1979
|
-
|
|
1980
|
-
if not is_dataclass_class(cls):
|
|
1981
|
-
raise _StructFromDataClassNotADataclassError(cls=cls)
|
|
1982
|
-
anns = get_type_hints(
|
|
1983
|
-
cls, globalns=globalns, localns=localns, warn_name_errors=warn_name_errors
|
|
1984
|
-
)
|
|
1985
|
-
data_types = {
|
|
1986
|
-
k: _struct_from_dataclass_one(v, time_zone=time_zone) for k, v in anns.items()
|
|
1987
|
-
}
|
|
1988
|
-
return Struct(data_types)
|
|
2704
|
+
@overload
|
|
2705
|
+
def to_true(column: ExprLike, /) -> Expr: ...
|
|
2706
|
+
@overload
|
|
2707
|
+
def to_true(column: Series, /) -> Series: ...
|
|
2708
|
+
@overload
|
|
2709
|
+
def to_true(column: IntoExprColumn, /) -> ExprOrSeries: ...
|
|
2710
|
+
def to_true(column: IntoExprColumn, /) -> ExprOrSeries:
|
|
2711
|
+
"""Compute when a boolean series turns True."""
|
|
2712
|
+
t = is_true(column)
|
|
2713
|
+
return ((~t).shift() & t).fill_null(value=False)
|
|
1989
2714
|
|
|
1990
2715
|
|
|
1991
|
-
|
|
1992
|
-
|
|
1993
|
-
|
|
1994
|
-
|
|
1995
|
-
|
|
1996
|
-
|
|
1997
|
-
|
|
1998
|
-
|
|
1999
|
-
|
|
2000
|
-
|
|
2001
|
-
if is_dataclass_class(ann):
|
|
2002
|
-
return struct_from_dataclass(ann, time_zone=time_zone)
|
|
2003
|
-
if (isinstance(ann, type) and issubclass(ann, enum.Enum)) or (
|
|
2004
|
-
is_literal_type(ann) and is_iterable_of(get_args(ann), str)
|
|
2005
|
-
):
|
|
2006
|
-
return String
|
|
2007
|
-
if is_optional_type(ann):
|
|
2008
|
-
return _struct_from_dataclass_one(
|
|
2009
|
-
one(get_args(ann, optional_drop_none=True)), time_zone=time_zone
|
|
2010
|
-
)
|
|
2011
|
-
if is_frozenset_type(ann) or is_list_type(ann) or is_set_type(ann):
|
|
2012
|
-
return List(_struct_from_dataclass_one(one(get_args(ann)), time_zone=time_zone))
|
|
2013
|
-
raise _StructFromDataClassTypeError(ann=ann)
|
|
2716
|
+
@overload
|
|
2717
|
+
def to_not_true(column: ExprLike, /) -> Expr: ...
|
|
2718
|
+
@overload
|
|
2719
|
+
def to_not_true(column: Series, /) -> Series: ...
|
|
2720
|
+
@overload
|
|
2721
|
+
def to_not_true(column: IntoExprColumn, /) -> ExprOrSeries: ...
|
|
2722
|
+
def to_not_true(column: IntoExprColumn, /) -> ExprOrSeries:
|
|
2723
|
+
"""Compute when a boolean series turns non-True."""
|
|
2724
|
+
t = is_true(column)
|
|
2725
|
+
return (t.shift() & (~t)).fill_null(value=False)
|
|
2014
2726
|
|
|
2015
2727
|
|
|
2016
|
-
@
|
|
2017
|
-
|
|
2728
|
+
@overload
|
|
2729
|
+
def to_false(column: ExprLike, /) -> Expr: ...
|
|
2730
|
+
@overload
|
|
2731
|
+
def to_false(column: Series, /) -> Series: ...
|
|
2732
|
+
@overload
|
|
2733
|
+
def to_false(column: IntoExprColumn, /) -> ExprOrSeries: ...
|
|
2734
|
+
def to_false(column: IntoExprColumn, /) -> ExprOrSeries:
|
|
2735
|
+
"""Compute when a boolean series turns False."""
|
|
2736
|
+
f = is_false(column)
|
|
2737
|
+
return ((~f).shift() & f).fill_null(value=False)
|
|
2018
2738
|
|
|
2019
2739
|
|
|
2020
|
-
@
|
|
2021
|
-
|
|
2022
|
-
|
|
2740
|
+
@overload
|
|
2741
|
+
def to_not_false(column: ExprLike, /) -> Expr: ...
|
|
2742
|
+
@overload
|
|
2743
|
+
def to_not_false(column: Series, /) -> Series: ...
|
|
2744
|
+
@overload
|
|
2745
|
+
def to_not_false(column: IntoExprColumn, /) -> ExprOrSeries: ...
|
|
2746
|
+
def to_not_false(column: IntoExprColumn, /) -> ExprOrSeries:
|
|
2747
|
+
"""Compute when a boolean series turns non-False."""
|
|
2748
|
+
f = is_false(column)
|
|
2749
|
+
return (f.shift() & (~f)).fill_null(value=False)
|
|
2023
2750
|
|
|
2024
|
-
@override
|
|
2025
|
-
def __str__(self) -> str:
|
|
2026
|
-
return f"Object must be a dataclass; got {self.cls}"
|
|
2027
2751
|
|
|
2752
|
+
##
|
|
2028
2753
|
|
|
2029
|
-
@dataclass(kw_only=True, slots=True)
|
|
2030
|
-
class _StructFromDataClassTimeZoneMissingError(StructFromDataClassError):
|
|
2031
|
-
@override
|
|
2032
|
-
def __str__(self) -> str:
|
|
2033
|
-
return "Time-zone must be given"
|
|
2034
2754
|
|
|
2755
|
+
@overload
|
|
2756
|
+
def true_like(column: ExprLike, /) -> Expr: ...
|
|
2757
|
+
@overload
|
|
2758
|
+
def true_like(column: Series, /) -> Series: ...
|
|
2759
|
+
@overload
|
|
2760
|
+
def true_like(column: IntoExprColumn, /) -> ExprOrSeries: ...
|
|
2761
|
+
def true_like(column: IntoExprColumn, /) -> ExprOrSeries:
|
|
2762
|
+
"""Compute a column of `True` values."""
|
|
2763
|
+
column = ensure_expr_or_series(column)
|
|
2764
|
+
return column.is_null() | column.is_not_null()
|
|
2035
2765
|
|
|
2036
|
-
@dataclass(kw_only=True, slots=True)
|
|
2037
|
-
class _StructFromDataClassTypeError(StructFromDataClassError):
|
|
2038
|
-
ann: Any
|
|
2039
2766
|
|
|
2040
|
-
|
|
2041
|
-
|
|
2042
|
-
|
|
2767
|
+
@overload
|
|
2768
|
+
def false_like(column: ExprLike, /) -> Expr: ...
|
|
2769
|
+
@overload
|
|
2770
|
+
def false_like(column: Series, /) -> Series: ...
|
|
2771
|
+
@overload
|
|
2772
|
+
def false_like(column: IntoExprColumn, /) -> ExprOrSeries: ...
|
|
2773
|
+
def false_like(column: IntoExprColumn, /) -> ExprOrSeries:
|
|
2774
|
+
"""Compute a column of `False` values."""
|
|
2775
|
+
column = ensure_expr_or_series(column)
|
|
2776
|
+
return column.is_null() & column.is_not_null()
|
|
2043
2777
|
|
|
2044
2778
|
|
|
2045
2779
|
##
|
|
@@ -2047,7 +2781,7 @@ class _StructFromDataClassTypeError(StructFromDataClassError):
|
|
|
2047
2781
|
|
|
2048
2782
|
def try_reify_expr(
|
|
2049
2783
|
expr: IntoExprColumn, /, *exprs: IntoExprColumn, **named_exprs: IntoExprColumn
|
|
2050
|
-
) ->
|
|
2784
|
+
) -> ExprOrSeries:
|
|
2051
2785
|
"""Try reify an expression."""
|
|
2052
2786
|
expr = ensure_expr_or_series(expr)
|
|
2053
2787
|
all_exprs = ensure_expr_or_series_many(*exprs, **named_exprs)
|
|
@@ -2060,7 +2794,7 @@ def try_reify_expr(
|
|
|
2060
2794
|
return series
|
|
2061
2795
|
case DataFrame() as df:
|
|
2062
2796
|
return df[get_expr_name(df, expr)]
|
|
2063
|
-
case
|
|
2797
|
+
case never:
|
|
2064
2798
|
assert_never(never)
|
|
2065
2799
|
|
|
2066
2800
|
|
|
@@ -2093,7 +2827,7 @@ def uniform(
|
|
|
2093
2827
|
return uniform(
|
|
2094
2828
|
df.height, low=low, high=high, seed=seed, name=name, dtype=dtype
|
|
2095
2829
|
)
|
|
2096
|
-
case
|
|
2830
|
+
case never:
|
|
2097
2831
|
assert_never(never)
|
|
2098
2832
|
|
|
2099
2833
|
|
|
@@ -2114,8 +2848,8 @@ def week_num(column: ExprLike, /, *, start: WeekDay = "mon") -> Expr: ...
|
|
|
2114
2848
|
@overload
|
|
2115
2849
|
def week_num(column: Series, /, *, start: WeekDay = "mon") -> Series: ...
|
|
2116
2850
|
@overload
|
|
2117
|
-
def week_num(column: IntoExprColumn, /, *, start: WeekDay = "mon") ->
|
|
2118
|
-
def week_num(column: IntoExprColumn, /, *, start: WeekDay = "mon") ->
|
|
2851
|
+
def week_num(column: IntoExprColumn, /, *, start: WeekDay = "mon") -> ExprOrSeries: ...
|
|
2852
|
+
def week_num(column: IntoExprColumn, /, *, start: WeekDay = "mon") -> ExprOrSeries:
|
|
2119
2853
|
"""Compute the week number of a date column."""
|
|
2120
2854
|
column = ensure_expr_or_series(column)
|
|
2121
2855
|
epoch = column.dt.epoch(time_unit="d").alias("epoch")
|
|
@@ -2126,79 +2860,129 @@ def week_num(column: IntoExprColumn, /, *, start: WeekDay = "mon") -> Expr | Ser
|
|
|
2126
2860
|
##
|
|
2127
2861
|
|
|
2128
2862
|
|
|
2129
|
-
def
|
|
2863
|
+
def zoned_date_time_dtype(
|
|
2130
2864
|
*, time_unit: TimeUnit = "us", time_zone: TimeZoneLike = UTC
|
|
2131
2865
|
) -> Datetime:
|
|
2132
|
-
"""Create a zoned
|
|
2133
|
-
return Datetime(time_unit=time_unit, time_zone=
|
|
2866
|
+
"""Create a zoned date-time data type."""
|
|
2867
|
+
return Datetime(time_unit=time_unit, time_zone=to_time_zone_name(time_zone))
|
|
2868
|
+
|
|
2869
|
+
|
|
2870
|
+
def zoned_date_time_period_dtype(
|
|
2871
|
+
*,
|
|
2872
|
+
time_unit: TimeUnit = "us",
|
|
2873
|
+
time_zone: TimeZoneLike | tuple[TimeZoneLike, TimeZoneLike] = UTC,
|
|
2874
|
+
) -> Struct:
|
|
2875
|
+
"""Create a zoned date-time period data type."""
|
|
2876
|
+
match time_zone:
|
|
2877
|
+
case start, end:
|
|
2878
|
+
return struct_dtype(
|
|
2879
|
+
start=zoned_date_time_dtype(time_unit=time_unit, time_zone=start),
|
|
2880
|
+
end=zoned_date_time_dtype(time_unit=time_unit, time_zone=end),
|
|
2881
|
+
)
|
|
2882
|
+
case _:
|
|
2883
|
+
dtype = zoned_date_time_dtype(time_unit=time_unit, time_zone=time_zone)
|
|
2884
|
+
return struct_dtype(start=dtype, end=dtype)
|
|
2134
2885
|
|
|
2135
2886
|
|
|
2136
2887
|
__all__ = [
|
|
2888
|
+
"AppendRowError",
|
|
2137
2889
|
"BooleanValueCountsError",
|
|
2138
2890
|
"CheckPolarsDataFrameError",
|
|
2139
2891
|
"ColumnsToDictError",
|
|
2140
2892
|
"DataClassToDataFrameError",
|
|
2893
|
+
"DatePeriodDType",
|
|
2141
2894
|
"DatetimeHongKong",
|
|
2142
2895
|
"DatetimeTokyo",
|
|
2143
2896
|
"DatetimeUSCentral",
|
|
2144
2897
|
"DatetimeUSEastern",
|
|
2145
2898
|
"DatetimeUTC",
|
|
2146
|
-
"
|
|
2899
|
+
"ExprOrSeries",
|
|
2147
2900
|
"FiniteEWMMeanError",
|
|
2148
2901
|
"GetDataTypeOrSeriesTimeZoneError",
|
|
2149
|
-
"GetSeriesNumberOfDecimalsError",
|
|
2150
2902
|
"InsertAfterError",
|
|
2151
2903
|
"InsertBeforeError",
|
|
2152
2904
|
"InsertBetweenError",
|
|
2153
2905
|
"IsNearEventError",
|
|
2154
|
-
"
|
|
2906
|
+
"OneColumnEmptyError",
|
|
2907
|
+
"OneColumnError",
|
|
2908
|
+
"OneColumnNonUniqueError",
|
|
2909
|
+
"RoundToFloatError",
|
|
2910
|
+
"SelectExactError",
|
|
2155
2911
|
"SetFirstRowAsColumnsError",
|
|
2156
|
-
"
|
|
2912
|
+
"TimePeriodDType",
|
|
2157
2913
|
"acf",
|
|
2158
2914
|
"adjust_frequencies",
|
|
2159
|
-
"
|
|
2915
|
+
"all_dataframe_columns",
|
|
2916
|
+
"all_series",
|
|
2917
|
+
"any_dataframe_columns",
|
|
2918
|
+
"any_series",
|
|
2919
|
+
"append_row",
|
|
2160
2920
|
"are_frames_equal",
|
|
2161
2921
|
"bernoulli",
|
|
2162
2922
|
"boolean_value_counts",
|
|
2163
|
-
"ceil_datetime",
|
|
2164
2923
|
"check_polars_dataframe",
|
|
2165
2924
|
"choice",
|
|
2166
|
-
"collect_series",
|
|
2167
2925
|
"columns_to_dict",
|
|
2168
2926
|
"concat_series",
|
|
2169
2927
|
"convert_time_zone",
|
|
2170
2928
|
"cross",
|
|
2171
2929
|
"dataclass_to_dataframe",
|
|
2172
2930
|
"dataclass_to_schema",
|
|
2173
|
-
"
|
|
2931
|
+
"decreasing_horizontal",
|
|
2932
|
+
"deserialize_dataframe",
|
|
2174
2933
|
"ensure_data_type",
|
|
2175
2934
|
"ensure_expr_or_series",
|
|
2176
2935
|
"ensure_expr_or_series_many",
|
|
2936
|
+
"expr_to_series",
|
|
2937
|
+
"false_like",
|
|
2938
|
+
"filter_date",
|
|
2939
|
+
"filter_time",
|
|
2177
2940
|
"finite_ewm_mean",
|
|
2178
|
-
"
|
|
2941
|
+
"first_true_horizontal",
|
|
2179
2942
|
"get_data_type_or_series_time_zone",
|
|
2180
2943
|
"get_expr_name",
|
|
2181
2944
|
"get_frequency_spectrum",
|
|
2182
|
-
"
|
|
2945
|
+
"increasing_horizontal",
|
|
2183
2946
|
"insert_after",
|
|
2184
2947
|
"insert_before",
|
|
2185
2948
|
"insert_between",
|
|
2186
2949
|
"integers",
|
|
2950
|
+
"is_close",
|
|
2951
|
+
"is_false",
|
|
2187
2952
|
"is_near_event",
|
|
2188
|
-
"
|
|
2189
|
-
"is_null_struct_series",
|
|
2953
|
+
"is_true",
|
|
2190
2954
|
"join",
|
|
2955
|
+
"join_into_periods",
|
|
2191
2956
|
"map_over_columns",
|
|
2192
2957
|
"nan_sum_agg",
|
|
2193
|
-
"
|
|
2194
|
-
"
|
|
2958
|
+
"nan_sum_horizontal",
|
|
2959
|
+
"normal_pdf",
|
|
2960
|
+
"normal_rv",
|
|
2961
|
+
"number_of_decimals",
|
|
2962
|
+
"offset_datetime",
|
|
2963
|
+
"one_column",
|
|
2964
|
+
"order_of_magnitude",
|
|
2965
|
+
"period_range",
|
|
2966
|
+
"read_dataframe",
|
|
2967
|
+
"read_series",
|
|
2195
2968
|
"replace_time_zone",
|
|
2969
|
+
"round_to_float",
|
|
2970
|
+
"search_period",
|
|
2971
|
+
"select_exact",
|
|
2972
|
+
"serialize_dataframe",
|
|
2196
2973
|
"set_first_row_as_columns",
|
|
2197
2974
|
"struct_dtype",
|
|
2198
|
-
"
|
|
2975
|
+
"to_false",
|
|
2976
|
+
"to_not_false",
|
|
2977
|
+
"to_not_true",
|
|
2978
|
+
"to_true",
|
|
2199
2979
|
"touch",
|
|
2980
|
+
"true_like",
|
|
2200
2981
|
"try_reify_expr",
|
|
2201
2982
|
"uniform",
|
|
2202
2983
|
"unique_element",
|
|
2203
|
-
"
|
|
2984
|
+
"write_dataframe",
|
|
2985
|
+
"write_series",
|
|
2986
|
+
"zoned_date_time_dtype",
|
|
2987
|
+
"zoned_date_time_period_dtype",
|
|
2204
2988
|
]
|