dycw-utilities 0.159.3__py3-none-any.whl → 0.159.5__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dycw-utilities
3
- Version: 0.159.3
3
+ Version: 0.159.5
4
4
  Author-email: Derek Wan <d.wan@icloud.com>
5
5
  License-File: LICENSE
6
6
  Requires-Python: >=3.12
@@ -1,4 +1,4 @@
1
- utilities/__init__.py,sha256=VdqkGeeq4rCnCdHuCDKZa_vNmpgPYng4SgWvMihfJwo,60
1
+ utilities/__init__.py,sha256=y1aZeGKpHWH6gkJPUx6DJDCZYIPbZ7BynHdMXlgmmME,60
2
2
  utilities/altair.py,sha256=92E2lCdyHY4Zb-vCw6rEJIsWdKipuu-Tu2ab1ufUfAk,9079
3
3
  utilities/asyncio.py,sha256=PUedzQ5deqlSECQ33sam9cRzI9TnygHz3FdOqWJWPTM,15288
4
4
  utilities/atomicwrites.py,sha256=tPo6r-Rypd9u99u66B9z86YBPpnLrlHtwox_8Z7T34Y,5790
@@ -32,7 +32,7 @@ utilities/jupyter.py,sha256=ft5JA7fBxXKzP-L9W8f2-wbF0QeYc_2uLQNFDVk4Z-M,2917
32
32
  utilities/libcst.py,sha256=TKgKN4bNmtBNEE-TUfhTyd1BrTncfsl_7tTuhpesGYY,5585
33
33
  utilities/lightweight_charts.py,sha256=YM3ojBvJxuCSUBu_KrhFBmaMCvRPvupKC3qkm-UVZq4,2751
34
34
  utilities/logging.py,sha256=ihbfQJgjc7t3Pds0oPvF_J1eigiqFKzxNOijzoee8U4,18064
35
- utilities/math.py,sha256=7ve4RxX3g-FGGVnWV0K9bBeGnKUEjnTbH13VxdvFtGE,26847
35
+ utilities/math.py,sha256=cevB-YyEYAzJTWtkAr7qeeu-hbxorDI3gMznXlmNQkw,26897
36
36
  utilities/memory_profiler.py,sha256=XzN56jDCa5aqXS_DxEjb_K4L6aIWh_5zyKi6OhcIxw0,853
37
37
  utilities/modules.py,sha256=iuvLluJya-hvl1Q25-Jk3dLgx2Es3ck4SjJiEkAlVTs,3195
38
38
  utilities/more_itertools.py,sha256=syfIPhQF_WS-YiicdGe2h5F1G-Ld12Q2XsVduL2hA40,10908
@@ -45,7 +45,7 @@ utilities/parse.py,sha256=JcJn5yXKhIWXBCwgBdPsyu7Hvcuw6kyEdqvaebCaI9k,17951
45
45
  utilities/pathlib.py,sha256=qGuU8XPmdgGpy8tOMUgelfXx3kxI8h9IaV3TI_06QGE,8428
46
46
  utilities/pickle.py,sha256=MBT2xZCsv0pH868IXLGKnlcqNx2IRVKYNpRcqiQQqxw,653
47
47
  utilities/platform.py,sha256=pTn7gw6N4T6LdKrf0virwarof_mze9WtoQlrGMzhGVI,2798
48
- utilities/polars.py,sha256=_pQS5wmtkMgu5CZZKDVA4AnT6NyLSlYKe5GK6hBR7vU,77876
48
+ utilities/polars.py,sha256=QTHk58M2dwIOCSR1JYttlrblUAw8Ihn7M8gAH4CtrGU,79542
49
49
  utilities/polars_ols.py,sha256=Uc9V5kvlWZ5cU93lKZ-cfAKdVFFw81tqwLW9PxtUvMs,5618
50
50
  utilities/postgres.py,sha256=ynCTTaF-bVEOSW-KEAR-dlLh_hYjeVVjm__-4pEU8Zk,12269
51
51
  utilities/pottery.py,sha256=ggMN72Y7wx7Js8VN6eyNyodpm8TIYqZHGghkDPXIVWk,3949
@@ -87,8 +87,8 @@ utilities/zoneinfo.py,sha256=FBMcUQ4662Aq8SsuCL1OAhDQiyANmVjtb-C30DRrWoE,1966
87
87
  utilities/pytest_plugins/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
88
88
  utilities/pytest_plugins/pytest_randomly.py,sha256=B1qYVlExGOxTywq2r1SMi5o7btHLk2PNdY_b1p98dkE,409
89
89
  utilities/pytest_plugins/pytest_regressions.py,sha256=9v8kAXDM2ycIXJBimoiF4EgrwbUvxTycFWJiGR_GHhM,1466
90
- dycw_utilities-0.159.3.dist-info/METADATA,sha256=vx1O7lyZWHV89NyvhKldxK9kl_d9wpawar1WayIcveA,1643
91
- dycw_utilities-0.159.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
92
- dycw_utilities-0.159.3.dist-info/entry_points.txt,sha256=BOD_SoDxwsfJYOLxhrSXhHP_T7iw-HXI9f2WVkzYxvQ,135
93
- dycw_utilities-0.159.3.dist-info/licenses/LICENSE,sha256=gppZp16M6nSVpBbUBrNL6JuYfvKwZiKgV7XoKKsHzqo,1066
94
- dycw_utilities-0.159.3.dist-info/RECORD,,
90
+ dycw_utilities-0.159.5.dist-info/METADATA,sha256=HkJFj45zxD0WbiOwR0QfrBq9RjjFHmI8aieoNk49Voo,1643
91
+ dycw_utilities-0.159.5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
92
+ dycw_utilities-0.159.5.dist-info/entry_points.txt,sha256=BOD_SoDxwsfJYOLxhrSXhHP_T7iw-HXI9f2WVkzYxvQ,135
93
+ dycw_utilities-0.159.5.dist-info/licenses/LICENSE,sha256=gppZp16M6nSVpBbUBrNL6JuYfvKwZiKgV7XoKKsHzqo,1066
94
+ dycw_utilities-0.159.5.dist-info/RECORD,,
utilities/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  from __future__ import annotations
2
2
 
3
- __version__ = "0.159.3"
3
+ __version__ = "0.159.5"
utilities/math.py CHANGED
@@ -641,7 +641,10 @@ def _is_close(
641
641
  ##
642
642
 
643
643
 
644
- def number_of_decimals(x: float, /, *, max_decimals: int = 20) -> int:
644
+ MAX_DECIMALS = 10
645
+
646
+
647
+ def number_of_decimals(x: float, /, *, max_decimals: int = MAX_DECIMALS) -> int:
645
648
  """Get the number of decimals."""
646
649
  _, frac = divmod(x, 1)
647
650
  results = (
@@ -889,6 +892,7 @@ def significant_figures(x: float, /, *, n: int = 2) -> str:
889
892
 
890
893
 
891
894
  __all__ = [
895
+ "MAX_DECIMALS",
892
896
  "MAX_FLOAT32",
893
897
  "MAX_FLOAT64",
894
898
  "MAX_INT8",
utilities/polars.py CHANGED
@@ -53,11 +53,11 @@ from polars.schema import Schema
53
53
  from polars.testing import assert_frame_equal, assert_series_equal
54
54
  from whenever import DateDelta, DateTimeDelta, PlainDateTime, TimeDelta, ZonedDateTime
55
55
 
56
+ import utilities.math
56
57
  from utilities.dataclasses import yield_fields
57
58
  from utilities.errors import ImpossibleCaseError
58
59
  from utilities.functions import (
59
- EnsureIntError,
60
- ensure_int,
60
+ get_class_name,
61
61
  is_dataclass_class,
62
62
  is_dataclass_instance,
63
63
  make_isinstance,
@@ -80,12 +80,12 @@ from utilities.iterables import (
80
80
  )
81
81
  from utilities.json import write_formatted_json
82
82
  from utilities.math import (
83
+ MAX_DECIMALS,
83
84
  CheckIntegerError,
84
85
  check_integer,
85
86
  ewm_parameters,
86
87
  is_less_than,
87
88
  is_non_negative,
88
- number_of_decimals,
89
89
  )
90
90
  from utilities.reprlib import get_repr
91
91
  from utilities.types import MaybeStr, Number, PathLike, WeekDay
@@ -1325,10 +1325,27 @@ class _FiniteEWMWeightsError(Exception):
1325
1325
  ##
1326
1326
 
1327
1327
 
1328
- def first_true_horizontal(df: DataFrame, /) -> Series:
1328
+ @overload
1329
+ def first_true_horizontal(column: Series, /) -> Series: ...
1330
+ @overload
1331
+ def first_true_horizontal(column1: Series, column2: Series, /) -> Series: ...
1332
+ @overload
1333
+ def first_true_horizontal(
1334
+ column1: Series, column2: Series, column3: Series, /
1335
+ ) -> Series: ...
1336
+ @overload
1337
+ def first_true_horizontal(
1338
+ column1: Series, column2: Series, column3: Series, column4: Series, /
1339
+ ) -> Series: ...
1340
+ @overload
1341
+ def first_true_horizontal(*columns: Series) -> Series: ...
1342
+ @overload
1343
+ def first_true_horizontal(*columns: IntoExprColumn) -> ExprOrSeries: ...
1344
+ def first_true_horizontal(*columns: IntoExprColumn) -> ExprOrSeries:
1329
1345
  """Get the index of the first true in each row."""
1330
- expr = when(any_horizontal(pl.all())).then(concat_list(pl.all()).list.arg_max())
1331
- return one_column(df.select(expr))
1346
+ columns2 = ensure_expr_or_series_many(*columns)
1347
+ expr = when(any_horizontal(*columns2)).then(concat_list(*columns2).list.arg_max())
1348
+ return try_reify_expr(expr, *columns2)
1332
1349
 
1333
1350
 
1334
1351
  ##
@@ -1427,56 +1444,6 @@ def get_frequency_spectrum(series: Series, /, *, d: int = 1) -> DataFrame:
1427
1444
  ##
1428
1445
 
1429
1446
 
1430
- @overload
1431
- def get_series_number_of_decimals(
1432
- series: Series, /, *, nullable: Literal[True]
1433
- ) -> int | None: ...
1434
- @overload
1435
- def get_series_number_of_decimals(
1436
- series: Series, /, *, nullable: Literal[False] = False
1437
- ) -> int: ...
1438
- @overload
1439
- def get_series_number_of_decimals(
1440
- series: Series, /, *, nullable: bool = False
1441
- ) -> int | None: ...
1442
- def get_series_number_of_decimals(
1443
- series: Series, /, *, nullable: bool = False
1444
- ) -> int | None:
1445
- """Get the number of decimals of a series."""
1446
- if not isinstance(dtype := series.dtype, Float64):
1447
- raise _GetSeriesNumberOfDecimalsNotFloatError(dtype=dtype)
1448
- decimals = series.map_elements(number_of_decimals, return_dtype=Int64).max()
1449
- try:
1450
- return ensure_int(decimals, nullable=nullable)
1451
- except EnsureIntError:
1452
- raise _GetSeriesNumberOfDecimalsAllNullError(series=series) from None
1453
-
1454
-
1455
- @dataclass(kw_only=True, slots=True)
1456
- class GetSeriesNumberOfDecimalsError(Exception): ...
1457
-
1458
-
1459
- @dataclass(kw_only=True, slots=True)
1460
- class _GetSeriesNumberOfDecimalsNotFloatError(GetSeriesNumberOfDecimalsError):
1461
- dtype: DataType
1462
-
1463
- @override
1464
- def __str__(self) -> str:
1465
- return f"Data type must be Float64; got {self.dtype}"
1466
-
1467
-
1468
- @dataclass(kw_only=True, slots=True)
1469
- class _GetSeriesNumberOfDecimalsAllNullError(GetSeriesNumberOfDecimalsError):
1470
- series: Series
1471
-
1472
- @override
1473
- def __str__(self) -> str:
1474
- return f"Series must not be all-null; got {self.series}"
1475
-
1476
-
1477
- ##
1478
-
1479
-
1480
1447
  @overload
1481
1448
  def increasing_horizontal(*columns: ExprLike) -> Expr: ...
1482
1449
  @overload
@@ -2023,6 +1990,38 @@ def normal(
2023
1990
  ##
2024
1991
 
2025
1992
 
1993
+ @overload
1994
+ def number_of_decimals(
1995
+ series: ExprLike, /, *, max_decimals: int = MAX_DECIMALS
1996
+ ) -> Expr: ...
1997
+ @overload
1998
+ def number_of_decimals(
1999
+ series: Series, /, *, max_decimals: int = MAX_DECIMALS
2000
+ ) -> Series: ...
2001
+ @overload
2002
+ def number_of_decimals(
2003
+ series: IntoExprColumn, /, *, max_decimals: int = MAX_DECIMALS
2004
+ ) -> ExprOrSeries: ...
2005
+ def number_of_decimals(
2006
+ series: IntoExprColumn, /, *, max_decimals: int = MAX_DECIMALS
2007
+ ) -> ExprOrSeries:
2008
+ """Get the number of decimals."""
2009
+ series = ensure_expr_or_series(series)
2010
+ frac = series - series.floor()
2011
+ results = (
2012
+ _number_of_decimals_check_scale(frac, s) for s in range(max_decimals + 1)
2013
+ )
2014
+ return first_true_horizontal(*results)
2015
+
2016
+
2017
+ def _number_of_decimals_check_scale(frac: ExprOrSeries, scale: int, /) -> ExprOrSeries:
2018
+ scaled = 10**scale * frac
2019
+ return is_close(scaled, scaled.round()).alias(str(scale))
2020
+
2021
+
2022
+ ##
2023
+
2024
+
2026
2025
  def offset_datetime(
2027
2026
  datetime: ZonedDateTime, offset: str, /, *, n: int = 1
2028
2027
  ) -> ZonedDateTime:
@@ -2377,19 +2376,63 @@ def round_to_float(
2377
2376
  ) -> Expr: ...
2378
2377
  @overload
2379
2378
  def round_to_float(
2380
- x: Series, y: float, /, *, mode: RoundMode = "half_to_even"
2379
+ x: Series, y: float | ExprOrSeries, /, *, mode: RoundMode = "half_to_even"
2381
2380
  ) -> Series: ...
2382
2381
  @overload
2383
2382
  def round_to_float(
2384
- x: IntoExprColumn, y: float, /, *, mode: RoundMode = "half_to_even"
2383
+ x: ExprLike, y: Series, /, *, mode: RoundMode = "half_to_even"
2384
+ ) -> Series: ...
2385
+ @overload
2386
+ def round_to_float(
2387
+ x: ExprLike, y: Expr, /, *, mode: RoundMode = "half_to_even"
2388
+ ) -> Expr: ...
2389
+ @overload
2390
+ def round_to_float(
2391
+ x: IntoExprColumn, y: float | Series, /, *, mode: RoundMode = "half_to_even"
2385
2392
  ) -> ExprOrSeries: ...
2386
2393
  def round_to_float(
2387
- x: IntoExprColumn, y: float, /, *, mode: RoundMode = "half_to_even"
2394
+ x: IntoExprColumn, y: float | IntoExprColumn, /, *, mode: RoundMode = "half_to_even"
2388
2395
  ) -> ExprOrSeries:
2389
2396
  """Round a column to the nearest multiple of another float."""
2390
2397
  x = ensure_expr_or_series(x)
2391
- z = (x / y).round(mode=mode) * y
2392
- return z.round(decimals=number_of_decimals(y) + 1)
2398
+ y = y if isinstance(y, int | float) else ensure_expr_or_series(y)
2399
+ match x, y:
2400
+ case Expr() | Series(), int() | float():
2401
+ z = (x / y).round(mode=mode) * y
2402
+ return z.round(decimals=utilities.math.number_of_decimals(y) + 1)
2403
+ case Series(), Expr() | Series():
2404
+ df = (
2405
+ x.to_frame()
2406
+ .with_columns(y)
2407
+ .with_columns(number_of_decimals(y).alias("_decimals"))
2408
+ .with_row_index(name="_index")
2409
+ .group_by("_decimals")
2410
+ .map_groups(_round_to_float_one)
2411
+ .sort("_index")
2412
+ )
2413
+ return df[df.columns[1]]
2414
+ case Expr(), Series():
2415
+ df = y.to_frame().with_columns(x)
2416
+ return round_to_float(df[df.columns[1]], df[df.columns[0]], mode=mode)
2417
+ case Expr(), Expr() | str():
2418
+ raise RoundToFloatError(x=x, y=y)
2419
+ case never:
2420
+ assert_never(never)
2421
+
2422
+
2423
+ def _round_to_float_one(df: DataFrame, /) -> DataFrame:
2424
+ decimals: int = df["_decimals"].unique().item()
2425
+ return df.with_columns(col(df.columns[1]).round(decimals=decimals))
2426
+
2427
+
2428
+ @dataclass(kw_only=True, slots=True)
2429
+ class RoundToFloatError(Exception):
2430
+ x: IntoExprColumn
2431
+ y: IntoExprColumn
2432
+
2433
+ @override
2434
+ def __str__(self) -> str:
2435
+ return f"At least 1 of the dividend and/or divisor must be a Series; got {get_class_name(self.x)!r} and {get_class_name(self.y)!r}"
2393
2436
 
2394
2437
 
2395
2438
  ##
@@ -2595,7 +2638,6 @@ __all__ = [
2595
2638
  "ExprOrSeries",
2596
2639
  "FiniteEWMMeanError",
2597
2640
  "GetDataTypeOrSeriesTimeZoneError",
2598
- "GetSeriesNumberOfDecimalsError",
2599
2641
  "InsertAfterError",
2600
2642
  "InsertBeforeError",
2601
2643
  "InsertBetweenError",
@@ -2603,6 +2645,7 @@ __all__ = [
2603
2645
  "OneColumnEmptyError",
2604
2646
  "OneColumnError",
2605
2647
  "OneColumnNonUniqueError",
2648
+ "RoundToFloatError",
2606
2649
  "SetFirstRowAsColumnsError",
2607
2650
  "TimePeriodDType",
2608
2651
  "acf",
@@ -2634,7 +2677,6 @@ __all__ = [
2634
2677
  "get_data_type_or_series_time_zone",
2635
2678
  "get_expr_name",
2636
2679
  "get_frequency_spectrum",
2637
- "get_series_number_of_decimals",
2638
2680
  "increasing_horizontal",
2639
2681
  "insert_after",
2640
2682
  "insert_before",
@@ -2650,6 +2692,7 @@ __all__ = [
2650
2692
  "nan_sum_agg",
2651
2693
  "nan_sum_cols",
2652
2694
  "normal",
2695
+ "number_of_decimals",
2653
2696
  "offset_datetime",
2654
2697
  "one_column",
2655
2698
  "order_of_magnitude",