dycw-utilities 0.109.8__py3-none-any.whl → 0.109.10__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,9 +1,10 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dycw-utilities
3
- Version: 0.109.8
3
+ Version: 0.109.10
4
4
  Author-email: Derek Wan <d.wan@icloud.com>
5
5
  License-File: LICENSE
6
6
  Requires-Python: >=3.12
7
+ Requires-Dist: polars-ols>=0.3.5
7
8
  Requires-Dist: typing-extensions<4.14,>=4.13.1
8
9
  Provides-Extra: test
9
10
  Requires-Dist: hypothesis<6.132,>=6.131.6; extra == 'test'
@@ -1,4 +1,4 @@
1
- utilities/__init__.py,sha256=HolaprMxFmj01s_l_QOeA1rUAo9639B4EnJ2_HF8s5o,60
1
+ utilities/__init__.py,sha256=rSepLbrv6OLPYSa8JNcDoi9ph_2-kk82P4d1Ebs7rTE,61
2
2
  utilities/altair.py,sha256=NSyDsm8QlkAGmsGdxVwCkHnPxt_35yJBa9Lg7bz9Ays,9054
3
3
  utilities/astor.py,sha256=xuDUkjq0-b6fhtwjhbnebzbqQZAjMSHR1IIS5uOodVg,777
4
4
  utilities/asyncio.py,sha256=41oQUurWMvadFK5gFnaG21hMM0Vmfn2WS6OpC0R9mas,14757
@@ -45,7 +45,8 @@ utilities/pathlib.py,sha256=31WPMXdLIyXgYOMMl_HOI2wlo66MGSE-cgeelk-Lias,1410
45
45
  utilities/period.py,sha256=ikHXsWtDLr553cfH6p9mMaiCnIAP69B7q84ckWV3HaA,10884
46
46
  utilities/pickle.py,sha256=Bhvd7cZl-zQKQDFjUerqGuSKlHvnW1K2QXeU5UZibtg,657
47
47
  utilities/platform.py,sha256=NU7ycTvAXAG-fdYmDXaM1m4EOml2cGiaYwaUzfzSqyU,1767
48
- utilities/polars.py,sha256=USK_Rck8nmFYg2Rs-akqN9jV4w52lpz4rgkWUMQdLMk,49087
48
+ utilities/polars.py,sha256=nB2pfK8N8HRpPE_tdbiTfFGLWC_TekAqgHlYDhnUzAM,52169
49
+ utilities/polars_ols.py,sha256=AQe3RFOMv8CEI_ZCoscb_-PxB4JWjO0TAEmk8DKLeaI,2138
49
50
  utilities/pqdm.py,sha256=foRytQybmOQ05pjt5LF7ANyzrIa--4ScDE3T2wd31a4,3118
50
51
  utilities/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
51
52
  utilities/pydantic.py,sha256=f6qtR5mO2YMuyvNmbaEj5YeD9eGA4YYfb7Bjzh9jUs0,1845
@@ -84,7 +85,7 @@ utilities/warnings.py,sha256=yUgjnmkCRf6QhdyAXzl7u0qQFejhQG3PrjoSwxpbHrs,1819
84
85
  utilities/whenever.py,sha256=TjoTAJ1R27-rKXiXzdE4GzPidmYqm0W58XydDXp-QZM,17786
85
86
  utilities/zipfile.py,sha256=24lQc9ATcJxHXBPc_tBDiJk48pWyRrlxO2fIsFxU0A8,699
86
87
  utilities/zoneinfo.py,sha256=-DQz5a0Ikw9jfSZtL0BEQkXOMC9yGn_xiJYNCLMiqEc,1989
87
- dycw_utilities-0.109.8.dist-info/METADATA,sha256=MIDeCogv3Eg50hnzviSxygm6L6i4mPPDY7_VzeKIrR0,13004
88
- dycw_utilities-0.109.8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
89
- dycw_utilities-0.109.8.dist-info/licenses/LICENSE,sha256=gppZp16M6nSVpBbUBrNL6JuYfvKwZiKgV7XoKKsHzqo,1066
90
- dycw_utilities-0.109.8.dist-info/RECORD,,
88
+ dycw_utilities-0.109.10.dist-info/METADATA,sha256=dn1-Yy6KJLMDkEyWztQtPMjCF7DKg5bjdo7wRbZA2oA,13038
89
+ dycw_utilities-0.109.10.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
90
+ dycw_utilities-0.109.10.dist-info/licenses/LICENSE,sha256=gppZp16M6nSVpBbUBrNL6JuYfvKwZiKgV7XoKKsHzqo,1066
91
+ dycw_utilities-0.109.10.dist-info/RECORD,,
utilities/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  from __future__ import annotations
2
2
 
3
- __version__ = "0.109.8"
3
+ __version__ = "0.109.10"
utilities/polars.py CHANGED
@@ -1165,6 +1165,52 @@ class _InsertBetweenNonConsecutiveError(InsertBetweenError):
1165
1165
  ##
1166
1166
 
1167
1167
 
1168
+ def integers(
1169
+ obj: int | Series | DataFrame,
1170
+ low: int,
1171
+ /,
1172
+ *,
1173
+ high: int | None = None,
1174
+ seed: int | None = None,
1175
+ endpoint: bool = False,
1176
+ name: str | None = None,
1177
+ dtype: PolarsDataType = Int64,
1178
+ ) -> Series:
1179
+ """Construct a series of normally-distributed numbers."""
1180
+ match obj:
1181
+ case int() as height:
1182
+ from numpy.random import default_rng
1183
+
1184
+ rng = default_rng(seed=seed)
1185
+ values = rng.integers(low, high=high, size=height, endpoint=endpoint)
1186
+ return Series(name=name, values=values, dtype=dtype)
1187
+ case Series() as series:
1188
+ return integers(
1189
+ series.len(),
1190
+ low,
1191
+ high=high,
1192
+ seed=seed,
1193
+ endpoint=endpoint,
1194
+ name=name,
1195
+ dtype=dtype,
1196
+ )
1197
+ case DataFrame() as df:
1198
+ return integers(
1199
+ df.height,
1200
+ low,
1201
+ high=high,
1202
+ seed=seed,
1203
+ endpoint=endpoint,
1204
+ name=name,
1205
+ dtype=dtype,
1206
+ )
1207
+ case _ as never:
1208
+ assert_never(never)
1209
+
1210
+
1211
+ ##
1212
+
1213
+
1168
1214
  def is_not_null_struct_series(series: Series, /) -> Series:
1169
1215
  """Check if a struct-dtype Series is not null as per the <= 1.1 definition."""
1170
1216
  try:
@@ -1324,6 +1370,39 @@ def nan_sum_cols(
1324
1370
  ##
1325
1371
 
1326
1372
 
1373
+ def normal(
1374
+ obj: int | Series | DataFrame,
1375
+ /,
1376
+ *,
1377
+ loc: float = 0.0,
1378
+ scale: float = 1.0,
1379
+ seed: int | None = None,
1380
+ name: str | None = None,
1381
+ dtype: PolarsDataType = Float64,
1382
+ ) -> Series:
1383
+ """Construct a series of normally-distributed numbers."""
1384
+ match obj:
1385
+ case int() as height:
1386
+ from numpy.random import default_rng
1387
+
1388
+ rng = default_rng(seed=seed)
1389
+ values = rng.normal(loc=loc, scale=scale, size=height)
1390
+ return Series(name=name, values=values, dtype=dtype)
1391
+ case Series() as series:
1392
+ return normal(
1393
+ series.len(), loc=loc, scale=scale, seed=seed, name=name, dtype=dtype
1394
+ )
1395
+ case DataFrame() as df:
1396
+ return normal(
1397
+ df.height, loc=loc, scale=scale, seed=seed, name=name, dtype=dtype
1398
+ )
1399
+ case _ as never:
1400
+ assert_never(never)
1401
+
1402
+
1403
+ ##
1404
+
1405
+
1327
1406
  @overload
1328
1407
  def replace_time_zone(
1329
1408
  obj: Series, /, *, time_zone: TimeZoneLike | None = UTC
@@ -1461,6 +1540,39 @@ class _StructFromDataClassTypeError(StructFromDataClassError):
1461
1540
  ##
1462
1541
 
1463
1542
 
1543
+ def uniform(
1544
+ obj: int | Series | DataFrame,
1545
+ /,
1546
+ *,
1547
+ low: float = 0.0,
1548
+ high: float = 1.0,
1549
+ seed: int | None = None,
1550
+ name: str | None = None,
1551
+ dtype: PolarsDataType = Float64,
1552
+ ) -> Series:
1553
+ """Construct a series of uniformly-distributed numbers."""
1554
+ match obj:
1555
+ case int() as height:
1556
+ from numpy.random import default_rng
1557
+
1558
+ rng = default_rng(seed=seed)
1559
+ values = rng.uniform(low=low, high=high, size=height)
1560
+ return Series(name=name, values=values, dtype=dtype)
1561
+ case Series() as series:
1562
+ return uniform(
1563
+ series.len(), low=low, high=high, seed=seed, name=name, dtype=dtype
1564
+ )
1565
+ case DataFrame() as df:
1566
+ return uniform(
1567
+ df.height, low=low, high=high, seed=seed, name=name, dtype=dtype
1568
+ )
1569
+ case _ as never:
1570
+ assert_never(never)
1571
+
1572
+
1573
+ ##
1574
+
1575
+
1464
1576
  def unique_element(column: ExprLike, /) -> Expr:
1465
1577
  """Get the unique element in a list."""
1466
1578
  column = ensure_expr_or_series(column)
@@ -1645,17 +1757,20 @@ __all__ = [
1645
1757
  "insert_after",
1646
1758
  "insert_before",
1647
1759
  "insert_between",
1760
+ "integers",
1648
1761
  "is_not_null_struct_series",
1649
1762
  "is_null_struct_series",
1650
1763
  "join",
1651
1764
  "map_over_columns",
1652
1765
  "nan_sum_agg",
1653
1766
  "nan_sum_cols",
1767
+ "normal",
1654
1768
  "replace_time_zone",
1655
1769
  "set_first_row_as_columns",
1656
1770
  "struct_dtype",
1657
1771
  "struct_from_dataclass",
1658
1772
  "touch",
1773
+ "uniform",
1659
1774
  "unique_element",
1660
1775
  "yield_struct_series_dataclasses",
1661
1776
  "yield_struct_series_elements",
@@ -0,0 +1,71 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
4
+
5
+ from polars import struct
6
+ from polars_ols import RollingKwargs, compute_rolling_least_squares
7
+
8
+ from utilities.polars import ensure_expr_or_series
9
+
10
+ if TYPE_CHECKING:
11
+ from polars import Expr
12
+ from polars_ols import NullPolicy
13
+
14
+ from utilities.polars import ExprLike
15
+
16
+
17
+ def compute_rolling_ols(
18
+ target: ExprLike,
19
+ *features: ExprLike,
20
+ sample_weights: ExprLike | None = None,
21
+ add_intercept: bool = False,
22
+ null_policy: NullPolicy = "drop_window",
23
+ window_size: int = 1000000,
24
+ min_periods: int | None = None,
25
+ use_woodbury: bool | None = None,
26
+ alpha: float | None = None,
27
+ ) -> Expr:
28
+ """Compute a rolling OLS."""
29
+ target = ensure_expr_or_series(target)
30
+ rolling_kwargs = RollingKwargs(
31
+ null_policy=null_policy,
32
+ window_size=window_size,
33
+ min_periods=min_periods,
34
+ use_woodbury=use_woodbury,
35
+ alpha=alpha,
36
+ )
37
+ coefficients = compute_rolling_least_squares(
38
+ target,
39
+ *features,
40
+ sample_weights=sample_weights,
41
+ add_intercept=add_intercept,
42
+ mode="coefficients",
43
+ rolling_kwargs=rolling_kwargs,
44
+ ).alias("coefficients")
45
+ predictions = compute_rolling_least_squares(
46
+ target,
47
+ *features,
48
+ sample_weights=sample_weights,
49
+ add_intercept=add_intercept,
50
+ mode="predictions",
51
+ rolling_kwargs=rolling_kwargs,
52
+ ).alias("predictions")
53
+ residuals = compute_rolling_least_squares(
54
+ target,
55
+ *features,
56
+ sample_weights=sample_weights,
57
+ add_intercept=add_intercept,
58
+ mode="residuals",
59
+ rolling_kwargs=rolling_kwargs,
60
+ ).alias("residuals")
61
+ ssr = (residuals**2).rolling_sum(window_size, min_samples=min_periods).alias("SSR")
62
+ sst = (
63
+ ((target - target.rolling_mean(window_size, min_samples=min_periods)) ** 2)
64
+ .rolling_sum(window_size, min_samples=min_periods)
65
+ .alias("SST")
66
+ )
67
+ r2 = (1 - ssr / sst).alias("R2")
68
+ return struct(coefficients, predictions, residuals, r2).alias("ols")
69
+
70
+
71
+ __all__ = ["compute_rolling_ols"]