dycw-utilities 0.109.8__py3-none-any.whl → 0.109.10__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dycw_utilities-0.109.8.dist-info → dycw_utilities-0.109.10.dist-info}/METADATA +2 -1
- {dycw_utilities-0.109.8.dist-info → dycw_utilities-0.109.10.dist-info}/RECORD +7 -6
- utilities/__init__.py +1 -1
- utilities/polars.py +115 -0
- utilities/polars_ols.py +71 -0
- {dycw_utilities-0.109.8.dist-info → dycw_utilities-0.109.10.dist-info}/WHEEL +0 -0
- {dycw_utilities-0.109.8.dist-info → dycw_utilities-0.109.10.dist-info}/licenses/LICENSE +0 -0
@@ -1,9 +1,10 @@
|
|
1
1
|
Metadata-Version: 2.4
|
2
2
|
Name: dycw-utilities
|
3
|
-
Version: 0.109.
|
3
|
+
Version: 0.109.10
|
4
4
|
Author-email: Derek Wan <d.wan@icloud.com>
|
5
5
|
License-File: LICENSE
|
6
6
|
Requires-Python: >=3.12
|
7
|
+
Requires-Dist: polars-ols>=0.3.5
|
7
8
|
Requires-Dist: typing-extensions<4.14,>=4.13.1
|
8
9
|
Provides-Extra: test
|
9
10
|
Requires-Dist: hypothesis<6.132,>=6.131.6; extra == 'test'
|
@@ -1,4 +1,4 @@
|
|
1
|
-
utilities/__init__.py,sha256=
|
1
|
+
utilities/__init__.py,sha256=rSepLbrv6OLPYSa8JNcDoi9ph_2-kk82P4d1Ebs7rTE,61
|
2
2
|
utilities/altair.py,sha256=NSyDsm8QlkAGmsGdxVwCkHnPxt_35yJBa9Lg7bz9Ays,9054
|
3
3
|
utilities/astor.py,sha256=xuDUkjq0-b6fhtwjhbnebzbqQZAjMSHR1IIS5uOodVg,777
|
4
4
|
utilities/asyncio.py,sha256=41oQUurWMvadFK5gFnaG21hMM0Vmfn2WS6OpC0R9mas,14757
|
@@ -45,7 +45,8 @@ utilities/pathlib.py,sha256=31WPMXdLIyXgYOMMl_HOI2wlo66MGSE-cgeelk-Lias,1410
|
|
45
45
|
utilities/period.py,sha256=ikHXsWtDLr553cfH6p9mMaiCnIAP69B7q84ckWV3HaA,10884
|
46
46
|
utilities/pickle.py,sha256=Bhvd7cZl-zQKQDFjUerqGuSKlHvnW1K2QXeU5UZibtg,657
|
47
47
|
utilities/platform.py,sha256=NU7ycTvAXAG-fdYmDXaM1m4EOml2cGiaYwaUzfzSqyU,1767
|
48
|
-
utilities/polars.py,sha256=
|
48
|
+
utilities/polars.py,sha256=nB2pfK8N8HRpPE_tdbiTfFGLWC_TekAqgHlYDhnUzAM,52169
|
49
|
+
utilities/polars_ols.py,sha256=AQe3RFOMv8CEI_ZCoscb_-PxB4JWjO0TAEmk8DKLeaI,2138
|
49
50
|
utilities/pqdm.py,sha256=foRytQybmOQ05pjt5LF7ANyzrIa--4ScDE3T2wd31a4,3118
|
50
51
|
utilities/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
51
52
|
utilities/pydantic.py,sha256=f6qtR5mO2YMuyvNmbaEj5YeD9eGA4YYfb7Bjzh9jUs0,1845
|
@@ -84,7 +85,7 @@ utilities/warnings.py,sha256=yUgjnmkCRf6QhdyAXzl7u0qQFejhQG3PrjoSwxpbHrs,1819
|
|
84
85
|
utilities/whenever.py,sha256=TjoTAJ1R27-rKXiXzdE4GzPidmYqm0W58XydDXp-QZM,17786
|
85
86
|
utilities/zipfile.py,sha256=24lQc9ATcJxHXBPc_tBDiJk48pWyRrlxO2fIsFxU0A8,699
|
86
87
|
utilities/zoneinfo.py,sha256=-DQz5a0Ikw9jfSZtL0BEQkXOMC9yGn_xiJYNCLMiqEc,1989
|
87
|
-
dycw_utilities-0.109.
|
88
|
-
dycw_utilities-0.109.
|
89
|
-
dycw_utilities-0.109.
|
90
|
-
dycw_utilities-0.109.
|
88
|
+
dycw_utilities-0.109.10.dist-info/METADATA,sha256=dn1-Yy6KJLMDkEyWztQtPMjCF7DKg5bjdo7wRbZA2oA,13038
|
89
|
+
dycw_utilities-0.109.10.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
90
|
+
dycw_utilities-0.109.10.dist-info/licenses/LICENSE,sha256=gppZp16M6nSVpBbUBrNL6JuYfvKwZiKgV7XoKKsHzqo,1066
|
91
|
+
dycw_utilities-0.109.10.dist-info/RECORD,,
|
utilities/__init__.py
CHANGED
utilities/polars.py
CHANGED
@@ -1165,6 +1165,52 @@ class _InsertBetweenNonConsecutiveError(InsertBetweenError):
|
|
1165
1165
|
##
|
1166
1166
|
|
1167
1167
|
|
1168
|
+
def integers(
|
1169
|
+
obj: int | Series | DataFrame,
|
1170
|
+
low: int,
|
1171
|
+
/,
|
1172
|
+
*,
|
1173
|
+
high: int | None = None,
|
1174
|
+
seed: int | None = None,
|
1175
|
+
endpoint: bool = False,
|
1176
|
+
name: str | None = None,
|
1177
|
+
dtype: PolarsDataType = Int64,
|
1178
|
+
) -> Series:
|
1179
|
+
"""Construct a series of normally-distributed numbers."""
|
1180
|
+
match obj:
|
1181
|
+
case int() as height:
|
1182
|
+
from numpy.random import default_rng
|
1183
|
+
|
1184
|
+
rng = default_rng(seed=seed)
|
1185
|
+
values = rng.integers(low, high=high, size=height, endpoint=endpoint)
|
1186
|
+
return Series(name=name, values=values, dtype=dtype)
|
1187
|
+
case Series() as series:
|
1188
|
+
return integers(
|
1189
|
+
series.len(),
|
1190
|
+
low,
|
1191
|
+
high=high,
|
1192
|
+
seed=seed,
|
1193
|
+
endpoint=endpoint,
|
1194
|
+
name=name,
|
1195
|
+
dtype=dtype,
|
1196
|
+
)
|
1197
|
+
case DataFrame() as df:
|
1198
|
+
return integers(
|
1199
|
+
df.height,
|
1200
|
+
low,
|
1201
|
+
high=high,
|
1202
|
+
seed=seed,
|
1203
|
+
endpoint=endpoint,
|
1204
|
+
name=name,
|
1205
|
+
dtype=dtype,
|
1206
|
+
)
|
1207
|
+
case _ as never:
|
1208
|
+
assert_never(never)
|
1209
|
+
|
1210
|
+
|
1211
|
+
##
|
1212
|
+
|
1213
|
+
|
1168
1214
|
def is_not_null_struct_series(series: Series, /) -> Series:
|
1169
1215
|
"""Check if a struct-dtype Series is not null as per the <= 1.1 definition."""
|
1170
1216
|
try:
|
@@ -1324,6 +1370,39 @@ def nan_sum_cols(
|
|
1324
1370
|
##
|
1325
1371
|
|
1326
1372
|
|
1373
|
+
def normal(
|
1374
|
+
obj: int | Series | DataFrame,
|
1375
|
+
/,
|
1376
|
+
*,
|
1377
|
+
loc: float = 0.0,
|
1378
|
+
scale: float = 1.0,
|
1379
|
+
seed: int | None = None,
|
1380
|
+
name: str | None = None,
|
1381
|
+
dtype: PolarsDataType = Float64,
|
1382
|
+
) -> Series:
|
1383
|
+
"""Construct a series of normally-distributed numbers."""
|
1384
|
+
match obj:
|
1385
|
+
case int() as height:
|
1386
|
+
from numpy.random import default_rng
|
1387
|
+
|
1388
|
+
rng = default_rng(seed=seed)
|
1389
|
+
values = rng.normal(loc=loc, scale=scale, size=height)
|
1390
|
+
return Series(name=name, values=values, dtype=dtype)
|
1391
|
+
case Series() as series:
|
1392
|
+
return normal(
|
1393
|
+
series.len(), loc=loc, scale=scale, seed=seed, name=name, dtype=dtype
|
1394
|
+
)
|
1395
|
+
case DataFrame() as df:
|
1396
|
+
return normal(
|
1397
|
+
df.height, loc=loc, scale=scale, seed=seed, name=name, dtype=dtype
|
1398
|
+
)
|
1399
|
+
case _ as never:
|
1400
|
+
assert_never(never)
|
1401
|
+
|
1402
|
+
|
1403
|
+
##
|
1404
|
+
|
1405
|
+
|
1327
1406
|
@overload
|
1328
1407
|
def replace_time_zone(
|
1329
1408
|
obj: Series, /, *, time_zone: TimeZoneLike | None = UTC
|
@@ -1461,6 +1540,39 @@ class _StructFromDataClassTypeError(StructFromDataClassError):
|
|
1461
1540
|
##
|
1462
1541
|
|
1463
1542
|
|
1543
|
+
def uniform(
|
1544
|
+
obj: int | Series | DataFrame,
|
1545
|
+
/,
|
1546
|
+
*,
|
1547
|
+
low: float = 0.0,
|
1548
|
+
high: float = 1.0,
|
1549
|
+
seed: int | None = None,
|
1550
|
+
name: str | None = None,
|
1551
|
+
dtype: PolarsDataType = Float64,
|
1552
|
+
) -> Series:
|
1553
|
+
"""Construct a series of uniformly-distributed numbers."""
|
1554
|
+
match obj:
|
1555
|
+
case int() as height:
|
1556
|
+
from numpy.random import default_rng
|
1557
|
+
|
1558
|
+
rng = default_rng(seed=seed)
|
1559
|
+
values = rng.uniform(low=low, high=high, size=height)
|
1560
|
+
return Series(name=name, values=values, dtype=dtype)
|
1561
|
+
case Series() as series:
|
1562
|
+
return uniform(
|
1563
|
+
series.len(), low=low, high=high, seed=seed, name=name, dtype=dtype
|
1564
|
+
)
|
1565
|
+
case DataFrame() as df:
|
1566
|
+
return uniform(
|
1567
|
+
df.height, low=low, high=high, seed=seed, name=name, dtype=dtype
|
1568
|
+
)
|
1569
|
+
case _ as never:
|
1570
|
+
assert_never(never)
|
1571
|
+
|
1572
|
+
|
1573
|
+
##
|
1574
|
+
|
1575
|
+
|
1464
1576
|
def unique_element(column: ExprLike, /) -> Expr:
|
1465
1577
|
"""Get the unique element in a list."""
|
1466
1578
|
column = ensure_expr_or_series(column)
|
@@ -1645,17 +1757,20 @@ __all__ = [
|
|
1645
1757
|
"insert_after",
|
1646
1758
|
"insert_before",
|
1647
1759
|
"insert_between",
|
1760
|
+
"integers",
|
1648
1761
|
"is_not_null_struct_series",
|
1649
1762
|
"is_null_struct_series",
|
1650
1763
|
"join",
|
1651
1764
|
"map_over_columns",
|
1652
1765
|
"nan_sum_agg",
|
1653
1766
|
"nan_sum_cols",
|
1767
|
+
"normal",
|
1654
1768
|
"replace_time_zone",
|
1655
1769
|
"set_first_row_as_columns",
|
1656
1770
|
"struct_dtype",
|
1657
1771
|
"struct_from_dataclass",
|
1658
1772
|
"touch",
|
1773
|
+
"uniform",
|
1659
1774
|
"unique_element",
|
1660
1775
|
"yield_struct_series_dataclasses",
|
1661
1776
|
"yield_struct_series_elements",
|
utilities/polars_ols.py
ADDED
@@ -0,0 +1,71 @@
|
|
1
|
+
from __future__ import annotations
|
2
|
+
|
3
|
+
from typing import TYPE_CHECKING
|
4
|
+
|
5
|
+
from polars import struct
|
6
|
+
from polars_ols import RollingKwargs, compute_rolling_least_squares
|
7
|
+
|
8
|
+
from utilities.polars import ensure_expr_or_series
|
9
|
+
|
10
|
+
if TYPE_CHECKING:
|
11
|
+
from polars import Expr
|
12
|
+
from polars_ols import NullPolicy
|
13
|
+
|
14
|
+
from utilities.polars import ExprLike
|
15
|
+
|
16
|
+
|
17
|
+
def compute_rolling_ols(
|
18
|
+
target: ExprLike,
|
19
|
+
*features: ExprLike,
|
20
|
+
sample_weights: ExprLike | None = None,
|
21
|
+
add_intercept: bool = False,
|
22
|
+
null_policy: NullPolicy = "drop_window",
|
23
|
+
window_size: int = 1000000,
|
24
|
+
min_periods: int | None = None,
|
25
|
+
use_woodbury: bool | None = None,
|
26
|
+
alpha: float | None = None,
|
27
|
+
) -> Expr:
|
28
|
+
"""Compute a rolling OLS."""
|
29
|
+
target = ensure_expr_or_series(target)
|
30
|
+
rolling_kwargs = RollingKwargs(
|
31
|
+
null_policy=null_policy,
|
32
|
+
window_size=window_size,
|
33
|
+
min_periods=min_periods,
|
34
|
+
use_woodbury=use_woodbury,
|
35
|
+
alpha=alpha,
|
36
|
+
)
|
37
|
+
coefficients = compute_rolling_least_squares(
|
38
|
+
target,
|
39
|
+
*features,
|
40
|
+
sample_weights=sample_weights,
|
41
|
+
add_intercept=add_intercept,
|
42
|
+
mode="coefficients",
|
43
|
+
rolling_kwargs=rolling_kwargs,
|
44
|
+
).alias("coefficients")
|
45
|
+
predictions = compute_rolling_least_squares(
|
46
|
+
target,
|
47
|
+
*features,
|
48
|
+
sample_weights=sample_weights,
|
49
|
+
add_intercept=add_intercept,
|
50
|
+
mode="predictions",
|
51
|
+
rolling_kwargs=rolling_kwargs,
|
52
|
+
).alias("predictions")
|
53
|
+
residuals = compute_rolling_least_squares(
|
54
|
+
target,
|
55
|
+
*features,
|
56
|
+
sample_weights=sample_weights,
|
57
|
+
add_intercept=add_intercept,
|
58
|
+
mode="residuals",
|
59
|
+
rolling_kwargs=rolling_kwargs,
|
60
|
+
).alias("residuals")
|
61
|
+
ssr = (residuals**2).rolling_sum(window_size, min_samples=min_periods).alias("SSR")
|
62
|
+
sst = (
|
63
|
+
((target - target.rolling_mean(window_size, min_samples=min_periods)) ** 2)
|
64
|
+
.rolling_sum(window_size, min_samples=min_periods)
|
65
|
+
.alias("SST")
|
66
|
+
)
|
67
|
+
r2 = (1 - ssr / sst).alias("R2")
|
68
|
+
return struct(coefficients, predictions, residuals, r2).alias("ols")
|
69
|
+
|
70
|
+
|
71
|
+
__all__ = ["compute_rolling_ols"]
|
File without changes
|
File without changes
|