dycw-utilities 0.155.1__py3-none-any.whl → 0.155.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dycw-utilities
3
- Version: 0.155.1
3
+ Version: 0.155.2
4
4
  Author-email: Derek Wan <d.wan@icloud.com>
5
5
  License-File: LICENSE
6
6
  Requires-Python: >=3.12
@@ -1,4 +1,4 @@
1
- utilities/__init__.py,sha256=VUm6D-MbPENpEcdDNizwkQ7ZdWupTs1IZmUsMeC93k0,60
1
+ utilities/__init__.py,sha256=UpA18BgKE4JKTnomInevMSHAcDOtpqnipumof6hoK_c,60
2
2
  utilities/altair.py,sha256=92E2lCdyHY4Zb-vCw6rEJIsWdKipuu-Tu2ab1ufUfAk,9079
3
3
  utilities/asyncio.py,sha256=QXkTtugXkqtYt7Do23zgYErqzdp6jwzPpV_SP9fJ1gI,16780
4
4
  utilities/atomicwrites.py,sha256=tPo6r-Rypd9u99u66B9z86YBPpnLrlHtwox_8Z7T34Y,5790
@@ -45,7 +45,7 @@ utilities/parse.py,sha256=JcJn5yXKhIWXBCwgBdPsyu7Hvcuw6kyEdqvaebCaI9k,17951
45
45
  utilities/pathlib.py,sha256=qGuU8XPmdgGpy8tOMUgelfXx3kxI8h9IaV3TI_06QGE,8428
46
46
  utilities/pickle.py,sha256=MBT2xZCsv0pH868IXLGKnlcqNx2IRVKYNpRcqiQQqxw,653
47
47
  utilities/platform.py,sha256=pTn7gw6N4T6LdKrf0virwarof_mze9WtoQlrGMzhGVI,2798
48
- utilities/polars.py,sha256=yedkwwcyX35lreA2CeOGVRyOrXrs8DKk98T9dWpnxCo,79844
48
+ utilities/polars.py,sha256=kEYLh5kcpvjOqNS-Pv-DAjvzPZYm6qZ7ywMt-wZvRGE,78434
49
49
  utilities/polars_ols.py,sha256=Uc9V5kvlWZ5cU93lKZ-cfAKdVFFw81tqwLW9PxtUvMs,5618
50
50
  utilities/postgres.py,sha256=ynCTTaF-bVEOSW-KEAR-dlLh_hYjeVVjm__-4pEU8Zk,12269
51
51
  utilities/pottery.py,sha256=HJ96oLRarTP37Vhg0WTyB3yAu2hETeg6HgRmpDIqyUs,6581
@@ -65,7 +65,7 @@ utilities/shelve.py,sha256=4OzjQI6kGuUbJciqf535rwnao-_IBv66gsT6tRGiUt0,759
65
65
  utilities/slack_sdk.py,sha256=ppFBvKgfg5IRWiIoKPtpTyzBtBF4XmwEvU3I5wLJikM,2140
66
66
  utilities/socket.py,sha256=K77vfREvzoVTrpYKo6MZakol0EYu2q1sWJnnZqL0So0,118
67
67
  utilities/sqlalchemy.py,sha256=IJKzrKUd_eBOkyK6CucDlxtHwo2vYH3t-rV2_5rAxq8,40554
68
- utilities/sqlalchemy_polars.py,sha256=Mm-sShZfqqgnzTrupMQdCfSM2akrybXHXAErTs-ofM8,14244
68
+ utilities/sqlalchemy_polars.py,sha256=5Q9HReETYg0qB6E6WQhFh4QAZlKE-IWlogj2BVif_-w,14246
69
69
  utilities/statsmodels.py,sha256=koyiBHvpMcSiBfh99wFUfSggLNx7cuAw3rwyfAhoKpQ,3410
70
70
  utilities/string.py,sha256=shmBK87zZwzGyixuNuXCiUbqzfeZ9xlrFwz6JTaRvDk,582
71
71
  utilities/tempfile.py,sha256=HxB2BF28CcecDJLQ3Bx2Ej-Pb6RJc6W9ngSpB9CnP4k,2018
@@ -87,8 +87,8 @@ utilities/zoneinfo.py,sha256=FBMcUQ4662Aq8SsuCL1OAhDQiyANmVjtb-C30DRrWoE,1966
87
87
  utilities/pytest_plugins/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
88
88
  utilities/pytest_plugins/pytest_randomly.py,sha256=B1qYVlExGOxTywq2r1SMi5o7btHLk2PNdY_b1p98dkE,409
89
89
  utilities/pytest_plugins/pytest_regressions.py,sha256=9v8kAXDM2ycIXJBimoiF4EgrwbUvxTycFWJiGR_GHhM,1466
90
- dycw_utilities-0.155.1.dist-info/METADATA,sha256=t4j9mkVdOy56nqyYGTiiODz6Zq0dOJUFTTz_4CTcQTg,1643
91
- dycw_utilities-0.155.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
92
- dycw_utilities-0.155.1.dist-info/entry_points.txt,sha256=BOD_SoDxwsfJYOLxhrSXhHP_T7iw-HXI9f2WVkzYxvQ,135
93
- dycw_utilities-0.155.1.dist-info/licenses/LICENSE,sha256=gppZp16M6nSVpBbUBrNL6JuYfvKwZiKgV7XoKKsHzqo,1066
94
- dycw_utilities-0.155.1.dist-info/RECORD,,
90
+ dycw_utilities-0.155.2.dist-info/METADATA,sha256=bti46Z55f6RXvAyFTu6sAzeis6LgdIEkMPTR0rQWj1w,1643
91
+ dycw_utilities-0.155.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
92
+ dycw_utilities-0.155.2.dist-info/entry_points.txt,sha256=BOD_SoDxwsfJYOLxhrSXhHP_T7iw-HXI9f2WVkzYxvQ,135
93
+ dycw_utilities-0.155.2.dist-info/licenses/LICENSE,sha256=gppZp16M6nSVpBbUBrNL6JuYfvKwZiKgV7XoKKsHzqo,1066
94
+ dycw_utilities-0.155.2.dist-info/RECORD,,
utilities/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  from __future__ import annotations
2
2
 
3
- __version__ = "0.155.1"
3
+ __version__ = "0.155.2"
utilities/polars.py CHANGED
@@ -1,6 +1,5 @@
1
1
  from __future__ import annotations
2
2
 
3
- import datetime as dt
4
3
  import enum
5
4
  from collections.abc import Callable, Iterator, Sequence
6
5
  from collections.abc import Set as AbstractSet
@@ -136,6 +135,10 @@ DatetimeTokyo = Datetime(time_zone="Asia/Tokyo")
136
135
  DatetimeUSCentral = Datetime(time_zone="US/Central")
137
136
  DatetimeUSEastern = Datetime(time_zone="US/Eastern")
138
137
  DatetimeUTC = Datetime(time_zone="UTC")
138
+ DatePeriodDType = Struct({"start": pl.Date, "end": pl.Date})
139
+ TimePeriodDType = Struct({"start": pl.Time, "end": pl.Time})
140
+
141
+
139
142
  _FINITE_EWM_MIN_WEIGHT = 0.9999
140
143
 
141
144
 
@@ -458,29 +461,6 @@ class BooleanValueCountsError(Exception):
458
461
  ##
459
462
 
460
463
 
461
- @overload
462
- def ceil_datetime(column: ExprLike, every: ExprLike, /) -> Expr: ...
463
- @overload
464
- def ceil_datetime(column: Series, every: ExprLike, /) -> Series: ...
465
- @overload
466
- def ceil_datetime(column: IntoExprColumn, every: ExprLike, /) -> ExprOrSeries: ...
467
- def ceil_datetime(column: IntoExprColumn, every: ExprLike, /) -> ExprOrSeries:
468
- """Compute the `ceil` of a datetime column."""
469
- column = ensure_expr_or_series(column)
470
- rounded = column.dt.round(every)
471
- ceil = (
472
- when(column <= rounded)
473
- .then(rounded)
474
- .otherwise(column.dt.offset_by(every).dt.round(every))
475
- )
476
- if isinstance(column, Expr):
477
- return ceil
478
- return DataFrame().with_columns(ceil.alias(column.name))[column.name]
479
-
480
-
481
- ##
482
-
483
-
484
464
  def check_polars_dataframe(
485
465
  df: DataFrame,
486
466
  /,
@@ -1056,37 +1036,38 @@ def dataclass_to_dataframe(
1056
1036
 
1057
1037
 
1058
1038
  def _dataclass_to_dataframe_cast(series: Series, /) -> Series:
1059
- if series.dtype == Object:
1060
- if series.map_elements(
1061
- make_isinstance(whenever.Date), return_dtype=Boolean
1062
- ).all():
1063
- return series.map_elements(lambda x: x.py_date(), return_dtype=pl.Date)
1064
- if series.map_elements(make_isinstance(DateDelta), return_dtype=Boolean).all():
1065
- return series.map_elements(to_py_time_delta, return_dtype=Duration)
1066
- if series.map_elements(
1067
- make_isinstance(DateTimeDelta), return_dtype=Boolean
1068
- ).all():
1069
- return series.map_elements(to_py_time_delta, return_dtype=Duration)
1070
- is_path = series.map_elements(make_isinstance(Path), return_dtype=Boolean).all()
1071
- is_uuid = series.map_elements(make_isinstance(UUID), return_dtype=Boolean).all()
1072
- if is_path or is_uuid:
1073
- with suppress_warnings(category=PolarsInefficientMapWarning):
1074
- return series.map_elements(str, return_dtype=String)
1075
- if series.map_elements(
1076
- make_isinstance(whenever.Time), return_dtype=Boolean
1077
- ).all():
1078
- return series.map_elements(lambda x: x.py_time(), return_dtype=pl.Time)
1079
- if series.map_elements(make_isinstance(TimeDelta), return_dtype=Boolean).all():
1080
- return series.map_elements(to_py_time_delta, return_dtype=Duration)
1081
- if series.map_elements(
1082
- make_isinstance(ZonedDateTime), return_dtype=Boolean
1083
- ).all():
1084
- return_dtype = zoned_datetime_dtype(time_zone=one({dt.tz for dt in series}))
1085
- return series.map_elements(
1086
- lambda x: x.py_datetime(), return_dtype=return_dtype
1087
- )
1088
- raise NotImplementedError(series) # pragma: no cover
1089
- return series
1039
+ if series.dtype != Object:
1040
+ return series
1041
+ if series.map_elements(make_isinstance(whenever.Date), return_dtype=Boolean).all():
1042
+ return series.map_elements(lambda x: x.py_date(), return_dtype=pl.Date)
1043
+ if series.map_elements(make_isinstance(DateDelta), return_dtype=Boolean).all():
1044
+ return series.map_elements(to_py_time_delta, return_dtype=Duration)
1045
+ if series.map_elements(make_isinstance(DateTimeDelta), return_dtype=Boolean).all():
1046
+ return series.map_elements(to_py_time_delta, return_dtype=Duration)
1047
+ is_path = series.map_elements(make_isinstance(Path), return_dtype=Boolean).all()
1048
+ is_uuid = series.map_elements(make_isinstance(UUID), return_dtype=Boolean).all()
1049
+ if is_path or is_uuid:
1050
+ with suppress_warnings(category=PolarsInefficientMapWarning):
1051
+ return series.map_elements(str, return_dtype=String)
1052
+ if series.map_elements(make_isinstance(whenever.Time), return_dtype=Boolean).all():
1053
+ return series.map_elements(lambda x: x.py_time(), return_dtype=pl.Time)
1054
+ if series.map_elements(make_isinstance(TimeDelta), return_dtype=Boolean).all():
1055
+ return series.map_elements(to_py_time_delta, return_dtype=Duration)
1056
+ if series.map_elements(make_isinstance(ZonedDateTime), return_dtype=Boolean).all():
1057
+ return_dtype = zoned_date_time_dtype(time_zone=one({dt.tz for dt in series}))
1058
+ return series.map_elements(lambda x: x.py_datetime(), return_dtype=return_dtype)
1059
+ if series.map_elements(
1060
+ lambda x: isinstance(x, dict) and (set(x) == {"start", "end"}),
1061
+ return_dtype=Boolean,
1062
+ ).all():
1063
+ start = _dataclass_to_dataframe_cast(
1064
+ series.map_elements(lambda x: x["start"], return_dtype=Object)
1065
+ ).alias("start")
1066
+ end = _dataclass_to_dataframe_cast(
1067
+ series.map_elements(lambda x: x["end"], return_dtype=Object)
1068
+ ).alias("end")
1069
+ return concat_series(start, end).select(x=struct(start=start, end=end))["x"]
1070
+ raise NotImplementedError(series) # pragma: no cover
1090
1071
 
1091
1072
 
1092
1073
  @dataclass(kw_only=True, slots=True)
@@ -1362,29 +1343,6 @@ class _FiniteEWMWeightsError(Exception):
1362
1343
  ##
1363
1344
 
1364
1345
 
1365
- @overload
1366
- def floor_datetime(column: ExprLike, every: ExprLike, /) -> Expr: ...
1367
- @overload
1368
- def floor_datetime(column: Series, every: ExprLike, /) -> Series: ...
1369
- @overload
1370
- def floor_datetime(column: IntoExprColumn, every: ExprLike, /) -> ExprOrSeries: ...
1371
- def floor_datetime(column: IntoExprColumn, every: ExprLike, /) -> ExprOrSeries:
1372
- """Compute the `floor` of a datetime column."""
1373
- column = ensure_expr_or_series(column)
1374
- rounded = column.dt.round(every)
1375
- floor = (
1376
- when(column >= rounded)
1377
- .then(rounded)
1378
- .otherwise(column.dt.offset_by("-" + every).dt.round(every))
1379
- )
1380
- if isinstance(column, Expr):
1381
- return floor
1382
- return DataFrame().with_columns(floor.alias(column.name))[column.name]
1383
-
1384
-
1385
- ##
1386
-
1387
-
1388
1346
  def get_data_type_or_series_time_zone(
1389
1347
  dtype_or_series: PolarsDataType | Series, /
1390
1348
  ) -> ZoneInfo:
@@ -2480,10 +2438,6 @@ def _struct_from_dataclass_one(
2480
2438
  }
2481
2439
  with suppress(KeyError):
2482
2440
  return mapping[ann]
2483
- if ann is dt.datetime:
2484
- if time_zone is None:
2485
- raise _StructFromDataClassTimeZoneMissingError
2486
- return zoned_datetime_dtype(time_zone=time_zone)
2487
2441
  if is_dataclass_class(ann):
2488
2442
  return struct_from_dataclass(ann, time_zone=time_zone)
2489
2443
  if (isinstance(ann, type) and issubclass(ann, enum.Enum)) or (
@@ -2512,13 +2466,6 @@ class _StructFromDataClassNotADataclassError(StructFromDataClassError):
2512
2466
  return f"Object must be a dataclass; got {self.cls}"
2513
2467
 
2514
2468
 
2515
- @dataclass(kw_only=True, slots=True)
2516
- class _StructFromDataClassTimeZoneMissingError(StructFromDataClassError):
2517
- @override
2518
- def __str__(self) -> str:
2519
- return "Time-zone must be given"
2520
-
2521
-
2522
2469
  @dataclass(kw_only=True, slots=True)
2523
2470
  class _StructFromDataClassTypeError(StructFromDataClassError):
2524
2471
  ann: Any
@@ -2612,27 +2559,27 @@ def week_num(column: IntoExprColumn, /, *, start: WeekDay = "mon") -> ExprOrSeri
2612
2559
  ##
2613
2560
 
2614
2561
 
2615
- def zoned_datetime_dtype(
2562
+ def zoned_date_time_dtype(
2616
2563
  *, time_unit: TimeUnit = "us", time_zone: TimeZoneLike = UTC
2617
2564
  ) -> Datetime:
2618
- """Create a zoned datetime data type."""
2565
+ """Create a zoned date-time data type."""
2619
2566
  return Datetime(time_unit=time_unit, time_zone=get_time_zone_name(time_zone))
2620
2567
 
2621
2568
 
2622
- def zoned_datetime_period_dtype(
2569
+ def zoned_date_time_period_dtype(
2623
2570
  *,
2624
2571
  time_unit: TimeUnit = "us",
2625
2572
  time_zone: TimeZoneLike | tuple[TimeZoneLike, TimeZoneLike] = UTC,
2626
2573
  ) -> Struct:
2627
- """Create a zoned datetime period data type."""
2574
+ """Create a zoned date-time period data type."""
2628
2575
  match time_zone:
2629
2576
  case start, end:
2630
2577
  return struct_dtype(
2631
- start=zoned_datetime_dtype(time_unit=time_unit, time_zone=start),
2632
- end=zoned_datetime_dtype(time_unit=time_unit, time_zone=end),
2578
+ start=zoned_date_time_dtype(time_unit=time_unit, time_zone=start),
2579
+ end=zoned_date_time_dtype(time_unit=time_unit, time_zone=end),
2633
2580
  )
2634
2581
  case _:
2635
- dtype = zoned_datetime_dtype(time_unit=time_unit, time_zone=time_zone)
2582
+ dtype = zoned_date_time_dtype(time_unit=time_unit, time_zone=time_zone)
2636
2583
  return struct_dtype(start=dtype, end=dtype)
2637
2584
 
2638
2585
 
@@ -2641,6 +2588,7 @@ __all__ = [
2641
2588
  "CheckPolarsDataFrameError",
2642
2589
  "ColumnsToDictError",
2643
2590
  "DataClassToDataFrameError",
2591
+ "DatePeriodDType",
2644
2592
  "DatetimeHongKong",
2645
2593
  "DatetimeTokyo",
2646
2594
  "DatetimeUSCentral",
@@ -2658,6 +2606,7 @@ __all__ = [
2658
2606
  "IsNullStructSeriesError",
2659
2607
  "SetFirstRowAsColumnsError",
2660
2608
  "StructFromDataClassError",
2609
+ "TimePeriodDType",
2661
2610
  "acf",
2662
2611
  "adjust_frequencies",
2663
2612
  "all_dataframe_columns",
@@ -2668,7 +2617,6 @@ __all__ = [
2668
2617
  "are_frames_equal",
2669
2618
  "bernoulli",
2670
2619
  "boolean_value_counts",
2671
- "ceil_datetime",
2672
2620
  "check_polars_dataframe",
2673
2621
  "choice",
2674
2622
  "collect_series",
@@ -2685,7 +2633,6 @@ __all__ = [
2685
2633
  "ensure_expr_or_series",
2686
2634
  "ensure_expr_or_series_many",
2687
2635
  "finite_ewm_mean",
2688
- "floor_datetime",
2689
2636
  "get_data_type_or_series_time_zone",
2690
2637
  "get_expr_name",
2691
2638
  "get_frequency_spectrum",
@@ -2721,6 +2668,6 @@ __all__ = [
2721
2668
  "unique_element",
2722
2669
  "write_dataframe",
2723
2670
  "write_series",
2724
- "zoned_datetime_dtype",
2725
- "zoned_datetime_period_dtype",
2671
+ "zoned_date_time_dtype",
2672
+ "zoned_date_time_period_dtype",
2726
2673
  ]
@@ -35,7 +35,7 @@ from utilities.iterables import (
35
35
  chunked,
36
36
  one,
37
37
  )
38
- from utilities.polars import zoned_datetime_dtype
38
+ from utilities.polars import zoned_date_time_dtype
39
39
  from utilities.reprlib import get_repr
40
40
  from utilities.sqlalchemy import (
41
41
  CHUNK_SIZE_FRAC,
@@ -390,7 +390,7 @@ def _select_to_dataframe_map_table_column_type_to_dtype(
390
390
  return pl.Date
391
391
  if is_subclass_gen(py_type, dt.datetime):
392
392
  has_tz: bool = type_use.timezone
393
- return zoned_datetime_dtype(time_zone=time_zone) if has_tz else Datetime()
393
+ return zoned_date_time_dtype(time_zone=time_zone) if has_tz else Datetime()
394
394
  if issubclass(py_type, dt.time):
395
395
  return Time
396
396
  if issubclass(py_type, dt.timedelta):