dycw-utilities 0.158.1__py3-none-any.whl → 0.159.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dycw-utilities
3
- Version: 0.158.1
3
+ Version: 0.159.0
4
4
  Author-email: Derek Wan <d.wan@icloud.com>
5
5
  License-File: LICENSE
6
6
  Requires-Python: >=3.12
@@ -12,7 +12,7 @@ Provides-Extra: logging
12
12
  Requires-Dist: coloredlogs<15.1,>=15.0.1; extra == 'logging'
13
13
  Provides-Extra: test
14
14
  Requires-Dist: dycw-pytest-only<2.2,>=2.1.1; extra == 'test'
15
- Requires-Dist: hypothesis<6.138,>=6.137.1; extra == 'test'
15
+ Requires-Dist: hypothesis<6.139,>=6.138.0; extra == 'test'
16
16
  Requires-Dist: pytest-asyncio<1.2,>=1.1.0; extra == 'test'
17
17
  Requires-Dist: pytest-cov<6.3,>=6.2.1; extra == 'test'
18
18
  Requires-Dist: pytest-instafail<0.6,>=0.5.0; extra == 'test'
@@ -1,4 +1,4 @@
1
- utilities/__init__.py,sha256=SivYUEYUR51QHFgXpC5YqYC_WiHCwRe1sU5wvBGCfCY,60
1
+ utilities/__init__.py,sha256=2MGx8NHJpAZDdhMAcNp1DSjsWA7ttyJ5FzMvBsFTXHg,60
2
2
  utilities/altair.py,sha256=92E2lCdyHY4Zb-vCw6rEJIsWdKipuu-Tu2ab1ufUfAk,9079
3
3
  utilities/asyncio.py,sha256=PUedzQ5deqlSECQ33sam9cRzI9TnygHz3FdOqWJWPTM,15288
4
4
  utilities/atomicwrites.py,sha256=tPo6r-Rypd9u99u66B9z86YBPpnLrlHtwox_8Z7T34Y,5790
@@ -40,12 +40,12 @@ utilities/numpy.py,sha256=Xn23sA2ZbVNqwUYEgNJD3XBYH6IbCri_WkHSNhg3NkY,26122
40
40
  utilities/operator.py,sha256=nhxn5q6CFNzUm1wpTwWPCu9JGCqVHSlaJf0o1-efoII,3616
41
41
  utilities/optuna.py,sha256=C-fhWYiXHVPo1l8QctYkFJ4DyhbSrGorzP1dJb_qvd8,1933
42
42
  utilities/orjson.py,sha256=Ll0U172ITMqOJc3kjV90C0eI-EWzSXlMHSdUBaUSe80,41499
43
- utilities/os.py,sha256=Zwznb1Y0cHHIPG7t0UfEWeo0VeDzCnBcwI0mTV-xh2M,3877
43
+ utilities/os.py,sha256=8TjFLVWlGhhEpzZ0X_vNAyhYntjeVL5WTwaQcdTaNVw,3934
44
44
  utilities/parse.py,sha256=JcJn5yXKhIWXBCwgBdPsyu7Hvcuw6kyEdqvaebCaI9k,17951
45
45
  utilities/pathlib.py,sha256=qGuU8XPmdgGpy8tOMUgelfXx3kxI8h9IaV3TI_06QGE,8428
46
46
  utilities/pickle.py,sha256=MBT2xZCsv0pH868IXLGKnlcqNx2IRVKYNpRcqiQQqxw,653
47
47
  utilities/platform.py,sha256=pTn7gw6N4T6LdKrf0virwarof_mze9WtoQlrGMzhGVI,2798
48
- utilities/polars.py,sha256=JOZjSpj9jitDijX044mKc-N00C5N_On3TJYJKJRhdcE,78494
48
+ utilities/polars.py,sha256=glQ89DyPV9hE1_SFjOyaqhErJ6BT_0NQutDJTZG6UJo,75966
49
49
  utilities/polars_ols.py,sha256=Uc9V5kvlWZ5cU93lKZ-cfAKdVFFw81tqwLW9PxtUvMs,5618
50
50
  utilities/postgres.py,sha256=ynCTTaF-bVEOSW-KEAR-dlLh_hYjeVVjm__-4pEU8Zk,12269
51
51
  utilities/pottery.py,sha256=ggMN72Y7wx7Js8VN6eyNyodpm8TIYqZHGghkDPXIVWk,3949
@@ -87,8 +87,8 @@ utilities/zoneinfo.py,sha256=FBMcUQ4662Aq8SsuCL1OAhDQiyANmVjtb-C30DRrWoE,1966
87
87
  utilities/pytest_plugins/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
88
88
  utilities/pytest_plugins/pytest_randomly.py,sha256=B1qYVlExGOxTywq2r1SMi5o7btHLk2PNdY_b1p98dkE,409
89
89
  utilities/pytest_plugins/pytest_regressions.py,sha256=9v8kAXDM2ycIXJBimoiF4EgrwbUvxTycFWJiGR_GHhM,1466
90
- dycw_utilities-0.158.1.dist-info/METADATA,sha256=G1L-4vHRNDR2xpzQMCKJ3uLTRd1nCtaEtaboy9jMzio,1643
91
- dycw_utilities-0.158.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
92
- dycw_utilities-0.158.1.dist-info/entry_points.txt,sha256=BOD_SoDxwsfJYOLxhrSXhHP_T7iw-HXI9f2WVkzYxvQ,135
93
- dycw_utilities-0.158.1.dist-info/licenses/LICENSE,sha256=gppZp16M6nSVpBbUBrNL6JuYfvKwZiKgV7XoKKsHzqo,1066
94
- dycw_utilities-0.158.1.dist-info/RECORD,,
90
+ dycw_utilities-0.159.0.dist-info/METADATA,sha256=NZWORLt2CKIbYdJTUllVDUsYVPK9jdxW5yUH5qjN-vc,1643
91
+ dycw_utilities-0.159.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
92
+ dycw_utilities-0.159.0.dist-info/entry_points.txt,sha256=BOD_SoDxwsfJYOLxhrSXhHP_T7iw-HXI9f2WVkzYxvQ,135
93
+ dycw_utilities-0.159.0.dist-info/licenses/LICENSE,sha256=gppZp16M6nSVpBbUBrNL6JuYfvKwZiKgV7XoKKsHzqo,1066
94
+ dycw_utilities-0.159.0.dist-info/RECORD,,
utilities/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  from __future__ import annotations
2
2
 
3
- __version__ = "0.158.1"
3
+ __version__ = "0.159.0"
utilities/os.py CHANGED
@@ -126,7 +126,7 @@ class GetEnvVarError(Exception):
126
126
 
127
127
  def is_debug() -> bool:
128
128
  """Check if we are in `DEBUG` mode."""
129
- return "DEBUG" in environ
129
+ return get_env_var("DEBUG", nullable=True) is not None
130
130
 
131
131
 
132
132
  ##
@@ -134,7 +134,7 @@ def is_debug() -> bool:
134
134
 
135
135
  def is_pytest() -> bool:
136
136
  """Check if `pytest` is running."""
137
- return "PYTEST_VERSION" in environ
137
+ return get_env_var("PYTEST_VERSION", default=None) is not None
138
138
 
139
139
 
140
140
  ##
utilities/polars.py CHANGED
@@ -3,7 +3,6 @@ from __future__ import annotations
3
3
  import enum
4
4
  from collections.abc import Callable, Iterator, Sequence
5
5
  from collections.abc import Set as AbstractSet
6
- from contextlib import suppress
7
6
  from dataclasses import asdict, dataclass
8
7
  from functools import partial, reduce
9
8
  from itertools import chain, pairwise, product
@@ -59,7 +58,6 @@ from utilities.functions import (
59
58
  ensure_int,
60
59
  is_dataclass_class,
61
60
  is_dataclass_instance,
62
- is_iterable_of,
63
61
  make_isinstance,
64
62
  )
65
63
  from utilities.gzip import read_binary
@@ -91,7 +89,6 @@ from utilities.reprlib import get_repr
91
89
  from utilities.types import MaybeStr, Number, PathLike, WeekDay
92
90
  from utilities.typing import (
93
91
  get_args,
94
- get_type_hints,
95
92
  is_frozenset_type,
96
93
  is_list_type,
97
94
  is_literal_type,
@@ -1188,27 +1185,6 @@ def _dataclass_to_schema_one(
1188
1185
  ##
1189
1186
 
1190
1187
 
1191
- def drop_null_struct_series(series: Series, /) -> Series:
1192
- """Drop nulls in a struct-dtype Series as per the <= 1.1 definition."""
1193
- try:
1194
- is_not_null = is_not_null_struct_series(series)
1195
- except IsNotNullStructSeriesError as error:
1196
- raise DropNullStructSeriesError(series=error.series) from None
1197
- return series.filter(is_not_null)
1198
-
1199
-
1200
- @dataclass(kw_only=True, slots=True)
1201
- class DropNullStructSeriesError(Exception):
1202
- series: Series
1203
-
1204
- @override
1205
- def __str__(self) -> str:
1206
- return f"Series must have Struct-dtype; got {self.series.dtype}"
1207
-
1208
-
1209
- ##
1210
-
1211
-
1212
1188
  def ensure_data_type(dtype: PolarsDataType, /) -> DataType:
1213
1189
  """Ensure a data type is returned."""
1214
1190
  return dtype if isinstance(dtype, DataType) else dtype()
@@ -1718,68 +1694,28 @@ class _IsNearEventAfterError(IsNearEventError):
1718
1694
  ##
1719
1695
 
1720
1696
 
1721
- def is_not_null_struct_series(series: Series, /) -> Series:
1722
- """Check if a struct-dtype Series is not null as per the <= 1.1 definition."""
1723
- try:
1724
- return ~is_null_struct_series(series)
1725
- except IsNullStructSeriesError as error:
1726
- raise IsNotNullStructSeriesError(series=error.series) from None
1727
-
1728
-
1729
- @dataclass(kw_only=True, slots=True)
1730
- class IsNotNullStructSeriesError(Exception):
1731
- series: Series
1732
-
1733
- @override
1734
- def __str__(self) -> str:
1735
- return f"Series must have Struct-dtype; got {self.series.dtype}"
1736
-
1737
-
1738
- ##
1739
-
1740
-
1741
- def is_null_struct_series(series: Series, /) -> Series:
1742
- """Check if a struct-dtype Series is null as per the <= 1.1 definition."""
1743
- if not isinstance(series.dtype, Struct):
1744
- raise IsNullStructSeriesError(series=series)
1745
- paths = _is_null_struct_series_one(series.dtype)
1746
- paths = list(paths)
1747
- exprs = map(_is_null_struct_to_expr, paths)
1748
- expr = all_horizontal(*exprs)
1749
- return (
1750
- series.struct.unnest().with_columns(_result=expr)["_result"].rename(series.name)
1751
- )
1752
-
1753
-
1754
- def _is_null_struct_series_one(
1755
- dtype: Struct, /, *, root: Iterable[str] = ()
1756
- ) -> Iterator[Sequence[str]]:
1757
- for field in dtype.fields:
1758
- name = field.name
1759
- inner = field.dtype
1760
- path = list(chain(root, [name]))
1761
- if isinstance(inner, Struct):
1762
- yield from _is_null_struct_series_one(inner, root=path)
1763
- else:
1764
- yield path
1765
-
1766
-
1767
- def _is_null_struct_to_expr(path: Iterable[str], /) -> Expr:
1768
- head, *tail = path
1769
- return reduce(_is_null_struct_to_expr_reducer, tail, col(head)).is_null()
1770
-
1771
-
1772
- def _is_null_struct_to_expr_reducer(expr: Expr, path: str, /) -> Expr:
1773
- return expr.struct[path]
1774
-
1697
+ @overload
1698
+ def is_true(column: ExprLike, /) -> Expr: ...
1699
+ @overload
1700
+ def is_true(column: Series, /) -> Series: ...
1701
+ @overload
1702
+ def is_true(column: IntoExprColumn, /) -> ExprOrSeries: ...
1703
+ def is_true(column: IntoExprColumn, /) -> ExprOrSeries:
1704
+ """Compute when a boolean series is True."""
1705
+ column = ensure_expr_or_series(column)
1706
+ return (column.is_not_null()) & column
1775
1707
 
1776
- @dataclass(kw_only=True, slots=True)
1777
- class IsNullStructSeriesError(Exception):
1778
- series: Series
1779
1708
 
1780
- @override
1781
- def __str__(self) -> str:
1782
- return f"Series must have Struct-dtype; got {self.series.dtype}"
1709
+ @overload
1710
+ def is_false(column: ExprLike, /) -> Expr: ...
1711
+ @overload
1712
+ def is_false(column: Series, /) -> Series: ...
1713
+ @overload
1714
+ def is_false(column: IntoExprColumn, /) -> ExprOrSeries: ...
1715
+ def is_false(column: IntoExprColumn, /) -> ExprOrSeries:
1716
+ """Compute when a boolean series is False."""
1717
+ column = ensure_expr_or_series(column)
1718
+ return (column.is_not_null()) & (~column)
1783
1719
 
1784
1720
 
1785
1721
  ##
@@ -2408,74 +2344,52 @@ def struct_dtype(**kwargs: PolarsDataType) -> Struct:
2408
2344
  ##
2409
2345
 
2410
2346
 
2411
- def struct_from_dataclass(
2412
- cls: type[Dataclass],
2413
- /,
2414
- *,
2415
- globalns: StrMapping | None = None,
2416
- localns: StrMapping | None = None,
2417
- warn_name_errors: bool = False,
2418
- time_zone: TimeZoneLike | None = None,
2419
- ) -> Struct:
2420
- """Construct the Struct data type for a dataclass."""
2421
- if not is_dataclass_class(cls):
2422
- raise _StructFromDataClassNotADataclassError(cls=cls)
2423
- anns = get_type_hints(
2424
- cls, globalns=globalns, localns=localns, warn_name_errors=warn_name_errors
2425
- )
2426
- data_types = {
2427
- k: _struct_from_dataclass_one(v, time_zone=time_zone) for k, v in anns.items()
2428
- }
2429
- return Struct(data_types)
2430
-
2431
-
2432
- def _struct_from_dataclass_one(
2433
- ann: Any, /, *, time_zone: TimeZoneLike | None = None
2434
- ) -> PolarsDataType:
2435
- mapping = {
2436
- bool: Boolean,
2437
- whenever.Date: pl.Date,
2438
- float: Float64,
2439
- int: Int64,
2440
- str: String,
2441
- }
2442
- with suppress(KeyError):
2443
- return mapping[ann]
2444
- if is_dataclass_class(ann):
2445
- return struct_from_dataclass(ann, time_zone=time_zone)
2446
- if (isinstance(ann, type) and issubclass(ann, enum.Enum)) or (
2447
- is_literal_type(ann) and is_iterable_of(get_args(ann), str)
2448
- ):
2449
- return String
2450
- if is_optional_type(ann):
2451
- return _struct_from_dataclass_one(
2452
- one(get_args(ann, optional_drop_none=True)), time_zone=time_zone
2453
- )
2454
- if is_frozenset_type(ann) or is_list_type(ann) or is_set_type(ann):
2455
- return List(_struct_from_dataclass_one(one(get_args(ann)), time_zone=time_zone))
2456
- raise _StructFromDataClassTypeError(ann=ann)
2457
-
2458
-
2459
- @dataclass(kw_only=True, slots=True)
2460
- class StructFromDataClassError(Exception): ...
2347
+ @overload
2348
+ def to_true(column: ExprLike, /) -> Expr: ...
2349
+ @overload
2350
+ def to_true(column: Series, /) -> Series: ...
2351
+ @overload
2352
+ def to_true(column: IntoExprColumn, /) -> ExprOrSeries: ...
2353
+ def to_true(column: IntoExprColumn, /) -> ExprOrSeries:
2354
+ """Compute when a boolean series turns True."""
2355
+ t = is_true(column)
2356
+ return ((~t).shift() & t).fill_null(value=False)
2461
2357
 
2462
2358
 
2463
- @dataclass(kw_only=True, slots=True)
2464
- class _StructFromDataClassNotADataclassError(StructFromDataClassError):
2465
- cls: type[Dataclass]
2359
+ @overload
2360
+ def to_not_true(column: ExprLike, /) -> Expr: ...
2361
+ @overload
2362
+ def to_not_true(column: Series, /) -> Series: ...
2363
+ @overload
2364
+ def to_not_true(column: IntoExprColumn, /) -> ExprOrSeries: ...
2365
+ def to_not_true(column: IntoExprColumn, /) -> ExprOrSeries:
2366
+ """Compute when a boolean series turns non-True."""
2367
+ t = is_true(column)
2368
+ return (t.shift() & (~t)).fill_null(value=False)
2466
2369
 
2467
- @override
2468
- def __str__(self) -> str:
2469
- return f"Object must be a dataclass; got {self.cls}"
2470
2370
 
2371
+ @overload
2372
+ def to_false(column: ExprLike, /) -> Expr: ...
2373
+ @overload
2374
+ def to_false(column: Series, /) -> Series: ...
2375
+ @overload
2376
+ def to_false(column: IntoExprColumn, /) -> ExprOrSeries: ...
2377
+ def to_false(column: IntoExprColumn, /) -> ExprOrSeries:
2378
+ """Compute when a boolean series turns False."""
2379
+ f = is_false(column)
2380
+ return ((~f).shift() & f).fill_null(value=False)
2471
2381
 
2472
- @dataclass(kw_only=True, slots=True)
2473
- class _StructFromDataClassTypeError(StructFromDataClassError):
2474
- ann: Any
2475
2382
 
2476
- @override
2477
- def __str__(self) -> str:
2478
- return f"Unsupported type: {self.ann}"
2383
+ @overload
2384
+ def to_not_false(column: ExprLike, /) -> Expr: ...
2385
+ @overload
2386
+ def to_not_false(column: Series, /) -> Series: ...
2387
+ @overload
2388
+ def to_not_false(column: IntoExprColumn, /) -> ExprOrSeries: ...
2389
+ def to_not_false(column: IntoExprColumn, /) -> ExprOrSeries:
2390
+ """Compute when a boolean series turns non-False."""
2391
+ f = is_false(column)
2392
+ return (f.shift() & (~f)).fill_null(value=False)
2479
2393
 
2480
2394
 
2481
2395
  ##
@@ -2597,7 +2511,6 @@ __all__ = [
2597
2511
  "DatetimeUSCentral",
2598
2512
  "DatetimeUSEastern",
2599
2513
  "DatetimeUTC",
2600
- "DropNullStructSeriesError",
2601
2514
  "ExprOrSeries",
2602
2515
  "FiniteEWMMeanError",
2603
2516
  "GetDataTypeOrSeriesTimeZoneError",
@@ -2606,9 +2519,7 @@ __all__ = [
2606
2519
  "InsertBeforeError",
2607
2520
  "InsertBetweenError",
2608
2521
  "IsNearEventError",
2609
- "IsNullStructSeriesError",
2610
2522
  "SetFirstRowAsColumnsError",
2611
- "StructFromDataClassError",
2612
2523
  "TimePeriodDType",
2613
2524
  "acf",
2614
2525
  "adjust_frequencies",
@@ -2631,7 +2542,6 @@ __all__ = [
2631
2542
  "dataclass_to_schema",
2632
2543
  "decreasing_horizontal",
2633
2544
  "deserialize_dataframe",
2634
- "drop_null_struct_series",
2635
2545
  "ensure_data_type",
2636
2546
  "ensure_expr_or_series",
2637
2547
  "ensure_expr_or_series_many",
@@ -2645,9 +2555,9 @@ __all__ = [
2645
2555
  "insert_before",
2646
2556
  "insert_between",
2647
2557
  "integers",
2558
+ "is_false",
2648
2559
  "is_near_event",
2649
- "is_not_null_struct_series",
2650
- "is_null_struct_series",
2560
+ "is_true",
2651
2561
  "join",
2652
2562
  "join_into_periods",
2653
2563
  "map_over_columns",
@@ -2664,7 +2574,10 @@ __all__ = [
2664
2574
  "serialize_dataframe",
2665
2575
  "set_first_row_as_columns",
2666
2576
  "struct_dtype",
2667
- "struct_from_dataclass",
2577
+ "to_false",
2578
+ "to_not_false",
2579
+ "to_not_true",
2580
+ "to_true",
2668
2581
  "touch",
2669
2582
  "try_reify_expr",
2670
2583
  "uniform",