dycw-utilities 0.155.1__py3-none-any.whl → 0.155.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dycw-utilities
3
- Version: 0.155.1
3
+ Version: 0.155.3
4
4
  Author-email: Derek Wan <d.wan@icloud.com>
5
5
  License-File: LICENSE
6
6
  Requires-Python: >=3.12
@@ -1,4 +1,4 @@
1
- utilities/__init__.py,sha256=VUm6D-MbPENpEcdDNizwkQ7ZdWupTs1IZmUsMeC93k0,60
1
+ utilities/__init__.py,sha256=G6dQZ623gragMkZmSuauyK65lWhpSPUVFQC5FFShVec,60
2
2
  utilities/altair.py,sha256=92E2lCdyHY4Zb-vCw6rEJIsWdKipuu-Tu2ab1ufUfAk,9079
3
3
  utilities/asyncio.py,sha256=QXkTtugXkqtYt7Do23zgYErqzdp6jwzPpV_SP9fJ1gI,16780
4
4
  utilities/atomicwrites.py,sha256=tPo6r-Rypd9u99u66B9z86YBPpnLrlHtwox_8Z7T34Y,5790
@@ -45,7 +45,7 @@ utilities/parse.py,sha256=JcJn5yXKhIWXBCwgBdPsyu7Hvcuw6kyEdqvaebCaI9k,17951
45
45
  utilities/pathlib.py,sha256=qGuU8XPmdgGpy8tOMUgelfXx3kxI8h9IaV3TI_06QGE,8428
46
46
  utilities/pickle.py,sha256=MBT2xZCsv0pH868IXLGKnlcqNx2IRVKYNpRcqiQQqxw,653
47
47
  utilities/platform.py,sha256=pTn7gw6N4T6LdKrf0virwarof_mze9WtoQlrGMzhGVI,2798
48
- utilities/polars.py,sha256=yedkwwcyX35lreA2CeOGVRyOrXrs8DKk98T9dWpnxCo,79844
48
+ utilities/polars.py,sha256=JOZjSpj9jitDijX044mKc-N00C5N_On3TJYJKJRhdcE,78494
49
49
  utilities/polars_ols.py,sha256=Uc9V5kvlWZ5cU93lKZ-cfAKdVFFw81tqwLW9PxtUvMs,5618
50
50
  utilities/postgres.py,sha256=ynCTTaF-bVEOSW-KEAR-dlLh_hYjeVVjm__-4pEU8Zk,12269
51
51
  utilities/pottery.py,sha256=HJ96oLRarTP37Vhg0WTyB3yAu2hETeg6HgRmpDIqyUs,6581
@@ -65,7 +65,7 @@ utilities/shelve.py,sha256=4OzjQI6kGuUbJciqf535rwnao-_IBv66gsT6tRGiUt0,759
65
65
  utilities/slack_sdk.py,sha256=ppFBvKgfg5IRWiIoKPtpTyzBtBF4XmwEvU3I5wLJikM,2140
66
66
  utilities/socket.py,sha256=K77vfREvzoVTrpYKo6MZakol0EYu2q1sWJnnZqL0So0,118
67
67
  utilities/sqlalchemy.py,sha256=IJKzrKUd_eBOkyK6CucDlxtHwo2vYH3t-rV2_5rAxq8,40554
68
- utilities/sqlalchemy_polars.py,sha256=Mm-sShZfqqgnzTrupMQdCfSM2akrybXHXAErTs-ofM8,14244
68
+ utilities/sqlalchemy_polars.py,sha256=5Q9HReETYg0qB6E6WQhFh4QAZlKE-IWlogj2BVif_-w,14246
69
69
  utilities/statsmodels.py,sha256=koyiBHvpMcSiBfh99wFUfSggLNx7cuAw3rwyfAhoKpQ,3410
70
70
  utilities/string.py,sha256=shmBK87zZwzGyixuNuXCiUbqzfeZ9xlrFwz6JTaRvDk,582
71
71
  utilities/tempfile.py,sha256=HxB2BF28CcecDJLQ3Bx2Ej-Pb6RJc6W9ngSpB9CnP4k,2018
@@ -87,8 +87,8 @@ utilities/zoneinfo.py,sha256=FBMcUQ4662Aq8SsuCL1OAhDQiyANmVjtb-C30DRrWoE,1966
87
87
  utilities/pytest_plugins/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
88
88
  utilities/pytest_plugins/pytest_randomly.py,sha256=B1qYVlExGOxTywq2r1SMi5o7btHLk2PNdY_b1p98dkE,409
89
89
  utilities/pytest_plugins/pytest_regressions.py,sha256=9v8kAXDM2ycIXJBimoiF4EgrwbUvxTycFWJiGR_GHhM,1466
90
- dycw_utilities-0.155.1.dist-info/METADATA,sha256=t4j9mkVdOy56nqyYGTiiODz6Zq0dOJUFTTz_4CTcQTg,1643
91
- dycw_utilities-0.155.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
92
- dycw_utilities-0.155.1.dist-info/entry_points.txt,sha256=BOD_SoDxwsfJYOLxhrSXhHP_T7iw-HXI9f2WVkzYxvQ,135
93
- dycw_utilities-0.155.1.dist-info/licenses/LICENSE,sha256=gppZp16M6nSVpBbUBrNL6JuYfvKwZiKgV7XoKKsHzqo,1066
94
- dycw_utilities-0.155.1.dist-info/RECORD,,
90
+ dycw_utilities-0.155.3.dist-info/METADATA,sha256=9E4nOucqtl9XU96ns1fqx2UfqBw-JEyXeH867ENuMu4,1643
91
+ dycw_utilities-0.155.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
92
+ dycw_utilities-0.155.3.dist-info/entry_points.txt,sha256=BOD_SoDxwsfJYOLxhrSXhHP_T7iw-HXI9f2WVkzYxvQ,135
93
+ dycw_utilities-0.155.3.dist-info/licenses/LICENSE,sha256=gppZp16M6nSVpBbUBrNL6JuYfvKwZiKgV7XoKKsHzqo,1066
94
+ dycw_utilities-0.155.3.dist-info/RECORD,,
utilities/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  from __future__ import annotations
2
2
 
3
- __version__ = "0.155.1"
3
+ __version__ = "0.155.3"
utilities/polars.py CHANGED
@@ -1,6 +1,5 @@
1
1
  from __future__ import annotations
2
2
 
3
- import datetime as dt
4
3
  import enum
5
4
  from collections.abc import Callable, Iterator, Sequence
6
5
  from collections.abc import Set as AbstractSet
@@ -136,6 +135,10 @@ DatetimeTokyo = Datetime(time_zone="Asia/Tokyo")
136
135
  DatetimeUSCentral = Datetime(time_zone="US/Central")
137
136
  DatetimeUSEastern = Datetime(time_zone="US/Eastern")
138
137
  DatetimeUTC = Datetime(time_zone="UTC")
138
+ DatePeriodDType = Struct({"start": pl.Date, "end": pl.Date})
139
+ TimePeriodDType = Struct({"start": pl.Time, "end": pl.Time})
140
+
141
+
139
142
  _FINITE_EWM_MIN_WEIGHT = 0.9999
140
143
 
141
144
 
@@ -458,29 +461,6 @@ class BooleanValueCountsError(Exception):
458
461
  ##
459
462
 
460
463
 
461
- @overload
462
- def ceil_datetime(column: ExprLike, every: ExprLike, /) -> Expr: ...
463
- @overload
464
- def ceil_datetime(column: Series, every: ExprLike, /) -> Series: ...
465
- @overload
466
- def ceil_datetime(column: IntoExprColumn, every: ExprLike, /) -> ExprOrSeries: ...
467
- def ceil_datetime(column: IntoExprColumn, every: ExprLike, /) -> ExprOrSeries:
468
- """Compute the `ceil` of a datetime column."""
469
- column = ensure_expr_or_series(column)
470
- rounded = column.dt.round(every)
471
- ceil = (
472
- when(column <= rounded)
473
- .then(rounded)
474
- .otherwise(column.dt.offset_by(every).dt.round(every))
475
- )
476
- if isinstance(column, Expr):
477
- return ceil
478
- return DataFrame().with_columns(ceil.alias(column.name))[column.name]
479
-
480
-
481
- ##
482
-
483
-
484
464
  def check_polars_dataframe(
485
465
  df: DataFrame,
486
466
  /,
@@ -1056,37 +1036,41 @@ def dataclass_to_dataframe(
1056
1036
 
1057
1037
 
1058
1038
  def _dataclass_to_dataframe_cast(series: Series, /) -> Series:
1059
- if series.dtype == Object:
1060
- if series.map_elements(
1061
- make_isinstance(whenever.Date), return_dtype=Boolean
1062
- ).all():
1063
- return series.map_elements(lambda x: x.py_date(), return_dtype=pl.Date)
1064
- if series.map_elements(make_isinstance(DateDelta), return_dtype=Boolean).all():
1065
- return series.map_elements(to_py_time_delta, return_dtype=Duration)
1066
- if series.map_elements(
1067
- make_isinstance(DateTimeDelta), return_dtype=Boolean
1068
- ).all():
1069
- return series.map_elements(to_py_time_delta, return_dtype=Duration)
1070
- is_path = series.map_elements(make_isinstance(Path), return_dtype=Boolean).all()
1071
- is_uuid = series.map_elements(make_isinstance(UUID), return_dtype=Boolean).all()
1072
- if is_path or is_uuid:
1073
- with suppress_warnings(category=PolarsInefficientMapWarning):
1074
- return series.map_elements(str, return_dtype=String)
1075
- if series.map_elements(
1076
- make_isinstance(whenever.Time), return_dtype=Boolean
1077
- ).all():
1078
- return series.map_elements(lambda x: x.py_time(), return_dtype=pl.Time)
1079
- if series.map_elements(make_isinstance(TimeDelta), return_dtype=Boolean).all():
1080
- return series.map_elements(to_py_time_delta, return_dtype=Duration)
1081
- if series.map_elements(
1082
- make_isinstance(ZonedDateTime), return_dtype=Boolean
1083
- ).all():
1084
- return_dtype = zoned_datetime_dtype(time_zone=one({dt.tz for dt in series}))
1085
- return series.map_elements(
1086
- lambda x: x.py_datetime(), return_dtype=return_dtype
1087
- )
1088
- raise NotImplementedError(series) # pragma: no cover
1089
- return series
1039
+ if series.dtype != Object:
1040
+ return series
1041
+ if series.map_elements(make_isinstance(whenever.Date), return_dtype=Boolean).all():
1042
+ return series.map_elements(lambda x: x.py_date(), return_dtype=pl.Date)
1043
+ if series.map_elements(make_isinstance(DateDelta), return_dtype=Boolean).all():
1044
+ return series.map_elements(to_py_time_delta, return_dtype=Duration)
1045
+ if series.map_elements(make_isinstance(DateTimeDelta), return_dtype=Boolean).all():
1046
+ return series.map_elements(to_py_time_delta, return_dtype=Duration)
1047
+ is_path = series.map_elements(make_isinstance(Path), return_dtype=Boolean).all()
1048
+ is_uuid = series.map_elements(make_isinstance(UUID), return_dtype=Boolean).all()
1049
+ if is_path or is_uuid:
1050
+ with suppress_warnings(category=PolarsInefficientMapWarning):
1051
+ return series.map_elements(str, return_dtype=String)
1052
+ if series.map_elements(make_isinstance(whenever.Time), return_dtype=Boolean).all():
1053
+ return series.map_elements(lambda x: x.py_time(), return_dtype=pl.Time)
1054
+ if series.map_elements(make_isinstance(TimeDelta), return_dtype=Boolean).all():
1055
+ return series.map_elements(to_py_time_delta, return_dtype=Duration)
1056
+ if series.map_elements(make_isinstance(ZonedDateTime), return_dtype=Boolean).all():
1057
+ return_dtype = zoned_date_time_dtype(time_zone=one({dt.tz for dt in series}))
1058
+ return series.map_elements(lambda x: x.py_datetime(), return_dtype=return_dtype)
1059
+ if series.map_elements(
1060
+ lambda x: isinstance(x, dict) and (set(x) == {"start", "end"}),
1061
+ return_dtype=Boolean,
1062
+ ).all():
1063
+ start = _dataclass_to_dataframe_cast(
1064
+ series.map_elements(lambda x: x["start"], return_dtype=Object)
1065
+ ).alias("start")
1066
+ end = _dataclass_to_dataframe_cast(
1067
+ series.map_elements(lambda x: x["end"], return_dtype=Object)
1068
+ ).alias("end")
1069
+ name = series.name
1070
+ return concat_series(start, end).select(
1071
+ struct(start=start, end=end).alias(name)
1072
+ )[name]
1073
+ raise NotImplementedError(series) # pragma: no cover
1090
1074
 
1091
1075
 
1092
1076
  @dataclass(kw_only=True, slots=True)
@@ -1362,29 +1346,6 @@ class _FiniteEWMWeightsError(Exception):
1362
1346
  ##
1363
1347
 
1364
1348
 
1365
- @overload
1366
- def floor_datetime(column: ExprLike, every: ExprLike, /) -> Expr: ...
1367
- @overload
1368
- def floor_datetime(column: Series, every: ExprLike, /) -> Series: ...
1369
- @overload
1370
- def floor_datetime(column: IntoExprColumn, every: ExprLike, /) -> ExprOrSeries: ...
1371
- def floor_datetime(column: IntoExprColumn, every: ExprLike, /) -> ExprOrSeries:
1372
- """Compute the `floor` of a datetime column."""
1373
- column = ensure_expr_or_series(column)
1374
- rounded = column.dt.round(every)
1375
- floor = (
1376
- when(column >= rounded)
1377
- .then(rounded)
1378
- .otherwise(column.dt.offset_by("-" + every).dt.round(every))
1379
- )
1380
- if isinstance(column, Expr):
1381
- return floor
1382
- return DataFrame().with_columns(floor.alias(column.name))[column.name]
1383
-
1384
-
1385
- ##
1386
-
1387
-
1388
1349
  def get_data_type_or_series_time_zone(
1389
1350
  dtype_or_series: PolarsDataType | Series, /
1390
1351
  ) -> ZoneInfo:
@@ -2480,10 +2441,6 @@ def _struct_from_dataclass_one(
2480
2441
  }
2481
2442
  with suppress(KeyError):
2482
2443
  return mapping[ann]
2483
- if ann is dt.datetime:
2484
- if time_zone is None:
2485
- raise _StructFromDataClassTimeZoneMissingError
2486
- return zoned_datetime_dtype(time_zone=time_zone)
2487
2444
  if is_dataclass_class(ann):
2488
2445
  return struct_from_dataclass(ann, time_zone=time_zone)
2489
2446
  if (isinstance(ann, type) and issubclass(ann, enum.Enum)) or (
@@ -2512,13 +2469,6 @@ class _StructFromDataClassNotADataclassError(StructFromDataClassError):
2512
2469
  return f"Object must be a dataclass; got {self.cls}"
2513
2470
 
2514
2471
 
2515
- @dataclass(kw_only=True, slots=True)
2516
- class _StructFromDataClassTimeZoneMissingError(StructFromDataClassError):
2517
- @override
2518
- def __str__(self) -> str:
2519
- return "Time-zone must be given"
2520
-
2521
-
2522
2472
  @dataclass(kw_only=True, slots=True)
2523
2473
  class _StructFromDataClassTypeError(StructFromDataClassError):
2524
2474
  ann: Any
@@ -2612,27 +2562,27 @@ def week_num(column: IntoExprColumn, /, *, start: WeekDay = "mon") -> ExprOrSeri
2612
2562
  ##
2613
2563
 
2614
2564
 
2615
- def zoned_datetime_dtype(
2565
+ def zoned_date_time_dtype(
2616
2566
  *, time_unit: TimeUnit = "us", time_zone: TimeZoneLike = UTC
2617
2567
  ) -> Datetime:
2618
- """Create a zoned datetime data type."""
2568
+ """Create a zoned date-time data type."""
2619
2569
  return Datetime(time_unit=time_unit, time_zone=get_time_zone_name(time_zone))
2620
2570
 
2621
2571
 
2622
- def zoned_datetime_period_dtype(
2572
+ def zoned_date_time_period_dtype(
2623
2573
  *,
2624
2574
  time_unit: TimeUnit = "us",
2625
2575
  time_zone: TimeZoneLike | tuple[TimeZoneLike, TimeZoneLike] = UTC,
2626
2576
  ) -> Struct:
2627
- """Create a zoned datetime period data type."""
2577
+ """Create a zoned date-time period data type."""
2628
2578
  match time_zone:
2629
2579
  case start, end:
2630
2580
  return struct_dtype(
2631
- start=zoned_datetime_dtype(time_unit=time_unit, time_zone=start),
2632
- end=zoned_datetime_dtype(time_unit=time_unit, time_zone=end),
2581
+ start=zoned_date_time_dtype(time_unit=time_unit, time_zone=start),
2582
+ end=zoned_date_time_dtype(time_unit=time_unit, time_zone=end),
2633
2583
  )
2634
2584
  case _:
2635
- dtype = zoned_datetime_dtype(time_unit=time_unit, time_zone=time_zone)
2585
+ dtype = zoned_date_time_dtype(time_unit=time_unit, time_zone=time_zone)
2636
2586
  return struct_dtype(start=dtype, end=dtype)
2637
2587
 
2638
2588
 
@@ -2641,6 +2591,7 @@ __all__ = [
2641
2591
  "CheckPolarsDataFrameError",
2642
2592
  "ColumnsToDictError",
2643
2593
  "DataClassToDataFrameError",
2594
+ "DatePeriodDType",
2644
2595
  "DatetimeHongKong",
2645
2596
  "DatetimeTokyo",
2646
2597
  "DatetimeUSCentral",
@@ -2658,6 +2609,7 @@ __all__ = [
2658
2609
  "IsNullStructSeriesError",
2659
2610
  "SetFirstRowAsColumnsError",
2660
2611
  "StructFromDataClassError",
2612
+ "TimePeriodDType",
2661
2613
  "acf",
2662
2614
  "adjust_frequencies",
2663
2615
  "all_dataframe_columns",
@@ -2668,7 +2620,6 @@ __all__ = [
2668
2620
  "are_frames_equal",
2669
2621
  "bernoulli",
2670
2622
  "boolean_value_counts",
2671
- "ceil_datetime",
2672
2623
  "check_polars_dataframe",
2673
2624
  "choice",
2674
2625
  "collect_series",
@@ -2685,7 +2636,6 @@ __all__ = [
2685
2636
  "ensure_expr_or_series",
2686
2637
  "ensure_expr_or_series_many",
2687
2638
  "finite_ewm_mean",
2688
- "floor_datetime",
2689
2639
  "get_data_type_or_series_time_zone",
2690
2640
  "get_expr_name",
2691
2641
  "get_frequency_spectrum",
@@ -2721,6 +2671,6 @@ __all__ = [
2721
2671
  "unique_element",
2722
2672
  "write_dataframe",
2723
2673
  "write_series",
2724
- "zoned_datetime_dtype",
2725
- "zoned_datetime_period_dtype",
2674
+ "zoned_date_time_dtype",
2675
+ "zoned_date_time_period_dtype",
2726
2676
  ]
@@ -35,7 +35,7 @@ from utilities.iterables import (
35
35
  chunked,
36
36
  one,
37
37
  )
38
- from utilities.polars import zoned_datetime_dtype
38
+ from utilities.polars import zoned_date_time_dtype
39
39
  from utilities.reprlib import get_repr
40
40
  from utilities.sqlalchemy import (
41
41
  CHUNK_SIZE_FRAC,
@@ -390,7 +390,7 @@ def _select_to_dataframe_map_table_column_type_to_dtype(
390
390
  return pl.Date
391
391
  if is_subclass_gen(py_type, dt.datetime):
392
392
  has_tz: bool = type_use.timezone
393
- return zoned_datetime_dtype(time_zone=time_zone) if has_tz else Datetime()
393
+ return zoned_date_time_dtype(time_zone=time_zone) if has_tz else Datetime()
394
394
  if issubclass(py_type, dt.time):
395
395
  return Time
396
396
  if issubclass(py_type, dt.timedelta):