dycw-utilities 0.159.0__py3-none-any.whl → 0.159.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dycw-utilities
3
- Version: 0.159.0
3
+ Version: 0.159.2
4
4
  Author-email: Derek Wan <d.wan@icloud.com>
5
5
  License-File: LICENSE
6
6
  Requires-Python: >=3.12
@@ -1,4 +1,4 @@
1
- utilities/__init__.py,sha256=2MGx8NHJpAZDdhMAcNp1DSjsWA7ttyJ5FzMvBsFTXHg,60
1
+ utilities/__init__.py,sha256=xlALYg1JKmNAieowbIXvn_qRt1OWQ6pgWljwJTA5kvs,60
2
2
  utilities/altair.py,sha256=92E2lCdyHY4Zb-vCw6rEJIsWdKipuu-Tu2ab1ufUfAk,9079
3
3
  utilities/asyncio.py,sha256=PUedzQ5deqlSECQ33sam9cRzI9TnygHz3FdOqWJWPTM,15288
4
4
  utilities/atomicwrites.py,sha256=tPo6r-Rypd9u99u66B9z86YBPpnLrlHtwox_8Z7T34Y,5790
@@ -45,7 +45,7 @@ utilities/parse.py,sha256=JcJn5yXKhIWXBCwgBdPsyu7Hvcuw6kyEdqvaebCaI9k,17951
45
45
  utilities/pathlib.py,sha256=qGuU8XPmdgGpy8tOMUgelfXx3kxI8h9IaV3TI_06QGE,8428
46
46
  utilities/pickle.py,sha256=MBT2xZCsv0pH868IXLGKnlcqNx2IRVKYNpRcqiQQqxw,653
47
47
  utilities/platform.py,sha256=pTn7gw6N4T6LdKrf0virwarof_mze9WtoQlrGMzhGVI,2798
48
- utilities/polars.py,sha256=glQ89DyPV9hE1_SFjOyaqhErJ6BT_0NQutDJTZG6UJo,75966
48
+ utilities/polars.py,sha256=SE-ZuB19vd-RxoAX7lZPImKcc199ArZh13jy65iV8gk,77059
49
49
  utilities/polars_ols.py,sha256=Uc9V5kvlWZ5cU93lKZ-cfAKdVFFw81tqwLW9PxtUvMs,5618
50
50
  utilities/postgres.py,sha256=ynCTTaF-bVEOSW-KEAR-dlLh_hYjeVVjm__-4pEU8Zk,12269
51
51
  utilities/pottery.py,sha256=ggMN72Y7wx7Js8VN6eyNyodpm8TIYqZHGghkDPXIVWk,3949
@@ -54,7 +54,7 @@ utilities/psutil.py,sha256=KUlu4lrUw9Zg1V7ZGetpWpGb9DB8l_SSDWGbANFNCPU,2104
54
54
  utilities/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
55
55
  utilities/pyinstrument.py,sha256=NZCZz2nBo0BLJ9DTf7H_Q_KGxvsf2S2M3h0qYoYh2kw,804
56
56
  utilities/pytest.py,sha256=M-Om6b3hpF9W_bEB7UFY2IzBCubSxzVQleGrgRXHtxY,7741
57
- utilities/pytest_regressions.py,sha256=8by5DWEL89Y469TI5AzX1pMy3NJWVtjEg2xQdOOdYuM,4169
57
+ utilities/pytest_regressions.py,sha256=ocjHTtfOeiGfQAKIei8pKNd61sxN9dawrJJ9gPt2wzA,4097
58
58
  utilities/random.py,sha256=hZlH4gnAtoaofWswuJYjcygejrY8db4CzP-z_adO2Mo,4165
59
59
  utilities/re.py,sha256=S4h-DLL6ScMPqjboZ_uQ1BVTJajrqV06r_81D--_HCE,4573
60
60
  utilities/redis.py,sha256=pqzl5A08vaRS4Gfjxob3LWWH9c-vwlsKbvVMTjWMSh8,28364
@@ -87,8 +87,8 @@ utilities/zoneinfo.py,sha256=FBMcUQ4662Aq8SsuCL1OAhDQiyANmVjtb-C30DRrWoE,1966
87
87
  utilities/pytest_plugins/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
88
88
  utilities/pytest_plugins/pytest_randomly.py,sha256=B1qYVlExGOxTywq2r1SMi5o7btHLk2PNdY_b1p98dkE,409
89
89
  utilities/pytest_plugins/pytest_regressions.py,sha256=9v8kAXDM2ycIXJBimoiF4EgrwbUvxTycFWJiGR_GHhM,1466
90
- dycw_utilities-0.159.0.dist-info/METADATA,sha256=NZWORLt2CKIbYdJTUllVDUsYVPK9jdxW5yUH5qjN-vc,1643
91
- dycw_utilities-0.159.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
92
- dycw_utilities-0.159.0.dist-info/entry_points.txt,sha256=BOD_SoDxwsfJYOLxhrSXhHP_T7iw-HXI9f2WVkzYxvQ,135
93
- dycw_utilities-0.159.0.dist-info/licenses/LICENSE,sha256=gppZp16M6nSVpBbUBrNL6JuYfvKwZiKgV7XoKKsHzqo,1066
94
- dycw_utilities-0.159.0.dist-info/RECORD,,
90
+ dycw_utilities-0.159.2.dist-info/METADATA,sha256=SCOwKj6kpRPNyf969J709Gfh_B23vJM7AJJAwC2OQ5c,1643
91
+ dycw_utilities-0.159.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
92
+ dycw_utilities-0.159.2.dist-info/entry_points.txt,sha256=BOD_SoDxwsfJYOLxhrSXhHP_T7iw-HXI9f2WVkzYxvQ,135
93
+ dycw_utilities-0.159.2.dist-info/licenses/LICENSE,sha256=gppZp16M6nSVpBbUBrNL6JuYfvKwZiKgV7XoKKsHzqo,1066
94
+ dycw_utilities-0.159.2.dist-info/RECORD,,
utilities/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  from __future__ import annotations
2
2
 
3
- __version__ = "0.159.0"
3
+ __version__ = "0.159.2"
utilities/polars.py CHANGED
@@ -32,6 +32,7 @@ from polars import (
32
32
  any_horizontal,
33
33
  col,
34
34
  concat,
35
+ concat_list,
35
36
  datetime_range,
36
37
  int_range,
37
38
  lit,
@@ -42,9 +43,9 @@ from polars import (
42
43
  from polars._typing import PolarsDataType
43
44
  from polars.datatypes import DataType, DataTypeClass
44
45
  from polars.exceptions import (
45
- ColumnNotFoundError, # pyright: ignore[reportAttributeAccessIssue]
46
+ ColumnNotFoundError,
46
47
  NoRowsReturnedError,
47
- OutOfBoundsError, # pyright: ignore[reportAttributeAccessIssue]
48
+ OutOfBoundsError,
48
49
  PolarsInefficientMapWarning,
49
50
  )
50
51
  from polars.schema import Schema
@@ -337,8 +338,8 @@ def are_frames_equal(
337
338
  check_column_order: bool = True,
338
339
  check_dtypes: bool = True,
339
340
  check_exact: bool = False,
340
- rtol: float = 1e-5,
341
- atol: float = 1e-8,
341
+ rel_tol: float = 1e-5,
342
+ abs_tol: float = 1e-8,
342
343
  categorical_as_str: bool = False,
343
344
  ) -> bool:
344
345
  """Check if two DataFrames are equal."""
@@ -350,8 +351,8 @@ def are_frames_equal(
350
351
  check_column_order=check_column_order,
351
352
  check_dtypes=check_dtypes,
352
353
  check_exact=check_exact,
353
- rtol=rtol,
354
- atol=atol,
354
+ rel_tol=rel_tol,
355
+ abs_tol=abs_tol,
355
356
  categorical_as_str=categorical_as_str,
356
357
  )
357
358
  except AssertionError:
@@ -783,15 +784,6 @@ def choice(
783
784
  ##
784
785
 
785
786
 
786
- def collect_series(expr: Expr, /) -> Series:
787
- """Collect a column expression into a Series."""
788
- data = DataFrame().with_columns(expr)
789
- return data[one(data.columns)]
790
-
791
-
792
- ##
793
-
794
-
795
787
  def columns_to_dict(df: DataFrame, key: str, value: str, /) -> dict[Any, Any]:
796
788
  """Map a pair of columns into a dictionary. Must be unique on `key`."""
797
789
  col_key = df[key]
@@ -1044,7 +1036,9 @@ def _dataclass_to_dataframe_cast(series: Series, /) -> Series:
1044
1036
  is_path = series.map_elements(make_isinstance(Path), return_dtype=Boolean).all()
1045
1037
  is_uuid = series.map_elements(make_isinstance(UUID), return_dtype=Boolean).all()
1046
1038
  if is_path or is_uuid:
1047
- with suppress_warnings(category=PolarsInefficientMapWarning):
1039
+ with suppress_warnings(
1040
+ category=cast("type[Warning]", PolarsInefficientMapWarning)
1041
+ ):
1048
1042
  return series.map_elements(str, return_dtype=String)
1049
1043
  if series.map_elements(make_isinstance(whenever.Time), return_dtype=Boolean).all():
1050
1044
  return series.map_elements(lambda x: x.py_time(), return_dtype=pl.Time)
@@ -1219,6 +1213,14 @@ def ensure_expr_or_series_many(
1219
1213
  ##
1220
1214
 
1221
1215
 
1216
+ def expr_to_series(expr: Expr, /) -> Series:
1217
+ """Collect a column expression into a Series."""
1218
+ return one_column(DataFrame().with_columns(expr))
1219
+
1220
+
1221
+ ##
1222
+
1223
+
1222
1224
  @overload
1223
1225
  def finite_ewm_mean(
1224
1226
  column: ExprLike,
@@ -1322,6 +1324,15 @@ class _FiniteEWMWeightsError(Exception):
1322
1324
  ##
1323
1325
 
1324
1326
 
1327
+ def first_true_horizontal(df: DataFrame, /) -> Series:
1328
+ """Get the index of the first true in each row."""
1329
+ expr = when(any_horizontal(pl.all())).then(concat_list(pl.all()).list.arg_max())
1330
+ return one_column(df.select(expr))
1331
+
1332
+
1333
+ ##
1334
+
1335
+
1325
1336
  def get_data_type_or_series_time_zone(
1326
1337
  dtype_or_series: PolarsDataType | Series, /
1327
1338
  ) -> ZoneInfo:
@@ -1393,8 +1404,7 @@ def get_expr_name(obj: Series | DataFrame, expr: IntoExprColumn, /) -> str:
1393
1404
  case Series() as series:
1394
1405
  return get_expr_name(series.to_frame(), expr)
1395
1406
  case DataFrame() as df:
1396
- selected = df.select(expr)
1397
- return one(selected.columns)
1407
+ return one_column(df.select(expr)).name
1398
1408
  case never:
1399
1409
  assert_never(never)
1400
1410
 
@@ -1989,6 +1999,43 @@ def offset_datetime(
1989
1999
  ##
1990
2000
 
1991
2001
 
2002
+ def one_column(df: DataFrame, /) -> Series:
2003
+ """Return the unique column in a DataFrame."""
2004
+ try:
2005
+ return df[one(df.columns)]
2006
+ except OneEmptyError:
2007
+ raise OneColumnEmptyError(df=df) from None
2008
+ except OneNonUniqueError as error:
2009
+ raise OneColumnNonUniqueError(
2010
+ df=df, first=error.first, second=error.second
2011
+ ) from None
2012
+
2013
+
2014
+ @dataclass(kw_only=True, slots=True)
2015
+ class OneColumnError(Exception):
2016
+ df: DataFrame
2017
+
2018
+
2019
+ @dataclass(kw_only=True, slots=True)
2020
+ class OneColumnEmptyError(OneColumnError):
2021
+ @override
2022
+ def __str__(self) -> str:
2023
+ return "DataFrame must not be empty"
2024
+
2025
+
2026
+ @dataclass(kw_only=True, slots=True)
2027
+ class OneColumnNonUniqueError(OneColumnError):
2028
+ first: str
2029
+ second: str
2030
+
2031
+ @override
2032
+ def __str__(self) -> str:
2033
+ return f"DataFrame must contain exactly one column; got {self.first!r}, {self.second!r} and perhaps more"
2034
+
2035
+
2036
+ ##
2037
+
2038
+
1992
2039
  @overload
1993
2040
  def order_of_magnitude(column: ExprLike, /, *, round_: bool = False) -> Expr: ...
1994
2041
  @overload
@@ -2110,13 +2157,10 @@ def reify_exprs(
2110
2157
  .with_columns(*all_exprs)
2111
2158
  .drop("_index")
2112
2159
  )
2113
- match len(df.columns):
2114
- case 0:
2115
- raise ImpossibleCaseError(case=[f"{df.columns=}"]) # pragma: no cover
2116
- case 1:
2117
- return df[one(df.columns)]
2118
- case _:
2119
- return df
2160
+ try:
2161
+ return one_column(df)
2162
+ except OneColumnNonUniqueError:
2163
+ return df
2120
2164
 
2121
2165
 
2122
2166
  @dataclass(kw_only=True, slots=True)
@@ -2519,6 +2563,9 @@ __all__ = [
2519
2563
  "InsertBeforeError",
2520
2564
  "InsertBetweenError",
2521
2565
  "IsNearEventError",
2566
+ "OneColumnEmptyError",
2567
+ "OneColumnError",
2568
+ "OneColumnNonUniqueError",
2522
2569
  "SetFirstRowAsColumnsError",
2523
2570
  "TimePeriodDType",
2524
2571
  "acf",
@@ -2533,7 +2580,6 @@ __all__ = [
2533
2580
  "boolean_value_counts",
2534
2581
  "check_polars_dataframe",
2535
2582
  "choice",
2536
- "collect_series",
2537
2583
  "columns_to_dict",
2538
2584
  "concat_series",
2539
2585
  "convert_time_zone",
@@ -2545,7 +2591,9 @@ __all__ = [
2545
2591
  "ensure_data_type",
2546
2592
  "ensure_expr_or_series",
2547
2593
  "ensure_expr_or_series_many",
2594
+ "expr_to_series",
2548
2595
  "finite_ewm_mean",
2596
+ "first_true_horizontal",
2549
2597
  "get_data_type_or_series_time_zone",
2550
2598
  "get_expr_name",
2551
2599
  "get_frequency_spectrum",
@@ -2565,6 +2613,7 @@ __all__ = [
2565
2613
  "nan_sum_cols",
2566
2614
  "normal",
2567
2615
  "offset_datetime",
2616
+ "one_column",
2568
2617
  "order_of_magnitude",
2569
2618
  "period_range",
2570
2619
  "read_dataframe",
@@ -91,9 +91,7 @@ class PolarsRegressionFixture:
91
91
  def check(self, obj: Series | DataFrame, /, *, suffix: str | None = None) -> None:
92
92
  """Check the Series/DataFrame summary against the baseline."""
93
93
  from polars import DataFrame, Series, col
94
- from polars.exceptions import (
95
- InvalidOperationError, # pyright: ignore[reportAttributeAccessIssue]
96
- )
94
+ from polars.exceptions import InvalidOperationError
97
95
 
98
96
  data: StrMapping = {
99
97
  "describe": obj.describe(percentiles=[i / 10 for i in range(1, 10)]).rows(