dycw-utilities 0.112.10__py3-none-any.whl → 0.112.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dycw_utilities-0.112.10.dist-info → dycw_utilities-0.112.11.dist-info}/METADATA +1 -1
- {dycw_utilities-0.112.10.dist-info → dycw_utilities-0.112.11.dist-info}/RECORD +6 -6
- utilities/__init__.py +1 -1
- utilities/polars.py +156 -8
- {dycw_utilities-0.112.10.dist-info → dycw_utilities-0.112.11.dist-info}/WHEEL +0 -0
- {dycw_utilities-0.112.10.dist-info → dycw_utilities-0.112.11.dist-info}/licenses/LICENSE +0 -0
@@ -1,4 +1,4 @@
|
|
1
|
-
utilities/__init__.py,sha256=
|
1
|
+
utilities/__init__.py,sha256=UdqUImTa6tBT-IqBR4s6iXfh38IrLnn774Bkpt5kz_g,61
|
2
2
|
utilities/altair.py,sha256=Gpja-flOo-Db0PIPJLJsgzAlXWoKUjPU1qY-DQ829ek,9156
|
3
3
|
utilities/astor.py,sha256=xuDUkjq0-b6fhtwjhbnebzbqQZAjMSHR1IIS5uOodVg,777
|
4
4
|
utilities/asyncio.py,sha256=41oQUurWMvadFK5gFnaG21hMM0Vmfn2WS6OpC0R9mas,14757
|
@@ -46,7 +46,7 @@ utilities/pathlib.py,sha256=31WPMXdLIyXgYOMMl_HOI2wlo66MGSE-cgeelk-Lias,1410
|
|
46
46
|
utilities/period.py,sha256=RWfcNVoNlW07RNdU47g_zuLZMKbtgfK4bE6G-9tVjY8,11024
|
47
47
|
utilities/pickle.py,sha256=Bhvd7cZl-zQKQDFjUerqGuSKlHvnW1K2QXeU5UZibtg,657
|
48
48
|
utilities/platform.py,sha256=NU7ycTvAXAG-fdYmDXaM1m4EOml2cGiaYwaUzfzSqyU,1767
|
49
|
-
utilities/polars.py,sha256=
|
49
|
+
utilities/polars.py,sha256=bo2Rhukk2eXxe1RMfu2uvEjTQTd9SmOi8mGW4BRG82c,67288
|
50
50
|
utilities/polars_ols.py,sha256=efhXf0gjrHUpQrvS6a7g8yJQJWf_ATKtJnqqF2inCOU,5680
|
51
51
|
utilities/pqdm.py,sha256=foRytQybmOQ05pjt5LF7ANyzrIa--4ScDE3T2wd31a4,3118
|
52
52
|
utilities/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
@@ -87,7 +87,7 @@ utilities/warnings.py,sha256=un1LvHv70PU-LLv8RxPVmugTzDJkkGXRMZTE2-fTQHw,1771
|
|
87
87
|
utilities/whenever.py,sha256=iLRP_-8CZtBpHKbGZGu-kjSMg1ZubJ-VSmgSy7Eudxw,17787
|
88
88
|
utilities/zipfile.py,sha256=24lQc9ATcJxHXBPc_tBDiJk48pWyRrlxO2fIsFxU0A8,699
|
89
89
|
utilities/zoneinfo.py,sha256=-Xm57PMMwDTYpxJdkiJG13wnbwK--I7XItBh5WVhD-o,1874
|
90
|
-
dycw_utilities-0.112.
|
91
|
-
dycw_utilities-0.112.
|
92
|
-
dycw_utilities-0.112.
|
93
|
-
dycw_utilities-0.112.
|
90
|
+
dycw_utilities-0.112.11.dist-info/METADATA,sha256=ICb-HGQkFLIEQZoD2MayfokOJhntweh8-YFElfTf7aU,13005
|
91
|
+
dycw_utilities-0.112.11.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
92
|
+
dycw_utilities-0.112.11.dist-info/licenses/LICENSE,sha256=gppZp16M6nSVpBbUBrNL6JuYfvKwZiKgV7XoKKsHzqo,1066
|
93
|
+
dycw_utilities-0.112.11.dist-info/RECORD,,
|
utilities/__init__.py
CHANGED
utilities/polars.py
CHANGED
@@ -7,7 +7,7 @@ from collections.abc import Set as AbstractSet
|
|
7
7
|
from contextlib import suppress
|
8
8
|
from dataclasses import asdict, dataclass
|
9
9
|
from functools import partial, reduce
|
10
|
-
from itertools import chain
|
10
|
+
from itertools import chain, product
|
11
11
|
from math import ceil, log
|
12
12
|
from pathlib import Path
|
13
13
|
from typing import (
|
@@ -41,6 +41,7 @@ from polars import (
|
|
41
41
|
Struct,
|
42
42
|
UInt32,
|
43
43
|
all_horizontal,
|
44
|
+
any_horizontal,
|
44
45
|
col,
|
45
46
|
concat,
|
46
47
|
int_range,
|
@@ -1265,13 +1266,7 @@ def finite_ewm_mean(
|
|
1265
1266
|
column = ensure_expr_or_series(column)
|
1266
1267
|
mean = column.fill_null(value=0.0).rolling_mean(len(weights), weights=list(weights))
|
1267
1268
|
expr = when(column.is_not_null()).then(mean)
|
1268
|
-
|
1269
|
-
case Expr():
|
1270
|
-
return expr
|
1271
|
-
case Series() as series:
|
1272
|
-
return series.to_frame().with_columns(expr.alias(series.name))[series.name]
|
1273
|
-
case _ as never:
|
1274
|
-
assert_never(never)
|
1269
|
+
return try_reify_expr(expr, column)
|
1275
1270
|
|
1276
1271
|
|
1277
1272
|
@dataclass(kw_only=True)
|
@@ -1599,6 +1594,69 @@ def integers(
|
|
1599
1594
|
##
|
1600
1595
|
|
1601
1596
|
|
1597
|
+
@overload
|
1598
|
+
def is_near_event(
|
1599
|
+
*exprs: ExprLike, before: int = 0, after: int = 0, **named_exprs: ExprLike
|
1600
|
+
) -> Expr: ...
|
1601
|
+
@overload
|
1602
|
+
def is_near_event(
|
1603
|
+
*exprs: Series, before: int = 0, after: int = 0, **named_exprs: Series
|
1604
|
+
) -> Series: ...
|
1605
|
+
@overload
|
1606
|
+
def is_near_event(
|
1607
|
+
*exprs: IntoExprColumn,
|
1608
|
+
before: int = 0,
|
1609
|
+
after: int = 0,
|
1610
|
+
**named_exprs: IntoExprColumn,
|
1611
|
+
) -> Expr | Series: ...
|
1612
|
+
def is_near_event(
|
1613
|
+
*exprs: IntoExprColumn,
|
1614
|
+
before: int = 0,
|
1615
|
+
after: int = 0,
|
1616
|
+
**named_exprs: IntoExprColumn,
|
1617
|
+
) -> Expr | Series:
|
1618
|
+
"""Compute the rows near any event."""
|
1619
|
+
if before <= -1:
|
1620
|
+
raise _IsNearEventBeforeError(before=before)
|
1621
|
+
if after <= -1:
|
1622
|
+
raise _IsNearEventAfterError(after=after)
|
1623
|
+
all_exprs = ensure_expr_or_series_many(*exprs, **named_exprs)
|
1624
|
+
shifts = range(-before, after + 1)
|
1625
|
+
if len(all_exprs) == 0:
|
1626
|
+
near = lit(value=False, dtype=Boolean)
|
1627
|
+
else:
|
1628
|
+
near_exprs = (
|
1629
|
+
e.shift(s).fill_null(value=False) for e, s in product(all_exprs, shifts)
|
1630
|
+
)
|
1631
|
+
near = any_horizontal(*near_exprs)
|
1632
|
+
return try_reify_expr(near, *exprs, **named_exprs)
|
1633
|
+
|
1634
|
+
|
1635
|
+
@dataclass(kw_only=True, slots=True)
|
1636
|
+
class IsNearEventError(Exception): ...
|
1637
|
+
|
1638
|
+
|
1639
|
+
@dataclass(kw_only=True, slots=True)
|
1640
|
+
class _IsNearEventBeforeError(IsNearEventError):
|
1641
|
+
before: int
|
1642
|
+
|
1643
|
+
@override
|
1644
|
+
def __str__(self) -> str:
|
1645
|
+
return f"'Before' must be non-negative; got {self.before}"
|
1646
|
+
|
1647
|
+
|
1648
|
+
@dataclass(kw_only=True, slots=True)
|
1649
|
+
class _IsNearEventAfterError(IsNearEventError):
|
1650
|
+
after: int
|
1651
|
+
|
1652
|
+
@override
|
1653
|
+
def __str__(self) -> str:
|
1654
|
+
return f"'After' must be non-negative; got {self.after}"
|
1655
|
+
|
1656
|
+
|
1657
|
+
##
|
1658
|
+
|
1659
|
+
|
1602
1660
|
def is_not_null_struct_series(series: Series, /) -> Series:
|
1603
1661
|
"""Check if a struct-dtype Series is not null as per the <= 1.1 definition."""
|
1604
1662
|
try:
|
@@ -1791,6 +1849,71 @@ def normal(
|
|
1791
1849
|
##
|
1792
1850
|
|
1793
1851
|
|
1852
|
+
def reify_exprs(
|
1853
|
+
*exprs: IntoExprColumn, **named_exprs: IntoExprColumn
|
1854
|
+
) -> Expr | Series | DataFrame:
|
1855
|
+
"""Reify a set of expressions."""
|
1856
|
+
all_exprs = ensure_expr_or_series_many(*exprs, **named_exprs)
|
1857
|
+
if len(all_exprs) == 0:
|
1858
|
+
raise _ReifyExprsEmptyError from None
|
1859
|
+
series = [s for s in all_exprs if isinstance(s, Series)]
|
1860
|
+
lengths = {s.len() for s in series}
|
1861
|
+
try:
|
1862
|
+
length = one(lengths)
|
1863
|
+
except OneEmptyError:
|
1864
|
+
match len(all_exprs):
|
1865
|
+
case 0:
|
1866
|
+
raise ImpossibleCaseError(
|
1867
|
+
case=[f"{all_exprs=}"]
|
1868
|
+
) from None # pragma: no cover
|
1869
|
+
case 1:
|
1870
|
+
return one(all_exprs)
|
1871
|
+
case _:
|
1872
|
+
return struct(*all_exprs)
|
1873
|
+
except OneNonUniqueError as error:
|
1874
|
+
raise _ReifyExprsSeriesNonUniqueError(
|
1875
|
+
first=error.first, second=error.second
|
1876
|
+
) from None
|
1877
|
+
df = (
|
1878
|
+
int_range(end=length, eager=True)
|
1879
|
+
.alias("_index")
|
1880
|
+
.to_frame()
|
1881
|
+
.with_columns(*all_exprs)
|
1882
|
+
.drop("_index")
|
1883
|
+
)
|
1884
|
+
match len(df.columns):
|
1885
|
+
case 0:
|
1886
|
+
raise ImpossibleCaseError(case=[f"{df.columns=}"]) # pragma: no cover
|
1887
|
+
case 1:
|
1888
|
+
return df[one(df.columns)]
|
1889
|
+
case _:
|
1890
|
+
return df
|
1891
|
+
|
1892
|
+
|
1893
|
+
@dataclass(kw_only=True, slots=True)
|
1894
|
+
class ReifyExprsError(Exception): ...
|
1895
|
+
|
1896
|
+
|
1897
|
+
@dataclass(kw_only=True, slots=True)
|
1898
|
+
class _ReifyExprsEmptyError(ReifyExprsError):
|
1899
|
+
@override
|
1900
|
+
def __str__(self) -> str:
|
1901
|
+
return "At least 1 Expression or Series must be given"
|
1902
|
+
|
1903
|
+
|
1904
|
+
@dataclass
|
1905
|
+
class _ReifyExprsSeriesNonUniqueError(ReifyExprsError):
|
1906
|
+
first: int
|
1907
|
+
second: int
|
1908
|
+
|
1909
|
+
@override
|
1910
|
+
def __str__(self) -> str:
|
1911
|
+
return f"Series must contain exactly one length; got {self.first}, {self.second} and perhaps more"
|
1912
|
+
|
1913
|
+
|
1914
|
+
##
|
1915
|
+
|
1916
|
+
|
1794
1917
|
@overload
|
1795
1918
|
def replace_time_zone(
|
1796
1919
|
obj: Series, /, *, time_zone: TimeZoneLike | None = UTC
|
@@ -1930,6 +2053,28 @@ class _StructFromDataClassTypeError(StructFromDataClassError):
|
|
1930
2053
|
##
|
1931
2054
|
|
1932
2055
|
|
2056
|
+
def try_reify_expr(
|
2057
|
+
expr: IntoExprColumn, /, *exprs: IntoExprColumn, **named_exprs: IntoExprColumn
|
2058
|
+
) -> Expr | Series:
|
2059
|
+
"""Try reify an expression."""
|
2060
|
+
expr = ensure_expr_or_series(expr)
|
2061
|
+
all_exprs = ensure_expr_or_series_many(*exprs, **named_exprs)
|
2062
|
+
all_exprs = [e.alias(f"_{i}") for i, e in enumerate(all_exprs)]
|
2063
|
+
result = reify_exprs(expr, *all_exprs)
|
2064
|
+
match result:
|
2065
|
+
case Expr():
|
2066
|
+
return expr
|
2067
|
+
case Series() as series:
|
2068
|
+
return series
|
2069
|
+
case DataFrame() as df:
|
2070
|
+
return df[get_expr_name(df, expr)]
|
2071
|
+
case _ as never:
|
2072
|
+
assert_never(never)
|
2073
|
+
|
2074
|
+
|
2075
|
+
##
|
2076
|
+
|
2077
|
+
|
1933
2078
|
def uniform(
|
1934
2079
|
obj: int | Series | DataFrame,
|
1935
2080
|
/,
|
@@ -2123,6 +2268,7 @@ __all__ = [
|
|
2123
2268
|
"InsertAfterError",
|
2124
2269
|
"InsertBeforeError",
|
2125
2270
|
"InsertBetweenError",
|
2271
|
+
"IsNearEventError",
|
2126
2272
|
"IsNullStructSeriesError",
|
2127
2273
|
"SetFirstRowAsColumnsError",
|
2128
2274
|
"StructFromDataClassError",
|
@@ -2157,6 +2303,7 @@ __all__ = [
|
|
2157
2303
|
"insert_before",
|
2158
2304
|
"insert_between",
|
2159
2305
|
"integers",
|
2306
|
+
"is_near_event",
|
2160
2307
|
"is_not_null_struct_series",
|
2161
2308
|
"is_null_struct_series",
|
2162
2309
|
"join",
|
@@ -2169,6 +2316,7 @@ __all__ = [
|
|
2169
2316
|
"struct_dtype",
|
2170
2317
|
"struct_from_dataclass",
|
2171
2318
|
"touch",
|
2319
|
+
"try_reify_expr",
|
2172
2320
|
"uniform",
|
2173
2321
|
"unique_element",
|
2174
2322
|
"yield_struct_series_dataclasses",
|
File without changes
|
File without changes
|