dycw-utilities 0.155.1__py3-none-any.whl → 0.155.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dycw_utilities-0.155.1.dist-info → dycw_utilities-0.155.3.dist-info}/METADATA +1 -1
- {dycw_utilities-0.155.1.dist-info → dycw_utilities-0.155.3.dist-info}/RECORD +8 -8
- utilities/__init__.py +1 -1
- utilities/polars.py +50 -100
- utilities/sqlalchemy_polars.py +2 -2
- {dycw_utilities-0.155.1.dist-info → dycw_utilities-0.155.3.dist-info}/WHEEL +0 -0
- {dycw_utilities-0.155.1.dist-info → dycw_utilities-0.155.3.dist-info}/entry_points.txt +0 -0
- {dycw_utilities-0.155.1.dist-info → dycw_utilities-0.155.3.dist-info}/licenses/LICENSE +0 -0
@@ -1,4 +1,4 @@
|
|
1
|
-
utilities/__init__.py,sha256=
|
1
|
+
utilities/__init__.py,sha256=G6dQZ623gragMkZmSuauyK65lWhpSPUVFQC5FFShVec,60
|
2
2
|
utilities/altair.py,sha256=92E2lCdyHY4Zb-vCw6rEJIsWdKipuu-Tu2ab1ufUfAk,9079
|
3
3
|
utilities/asyncio.py,sha256=QXkTtugXkqtYt7Do23zgYErqzdp6jwzPpV_SP9fJ1gI,16780
|
4
4
|
utilities/atomicwrites.py,sha256=tPo6r-Rypd9u99u66B9z86YBPpnLrlHtwox_8Z7T34Y,5790
|
@@ -45,7 +45,7 @@ utilities/parse.py,sha256=JcJn5yXKhIWXBCwgBdPsyu7Hvcuw6kyEdqvaebCaI9k,17951
|
|
45
45
|
utilities/pathlib.py,sha256=qGuU8XPmdgGpy8tOMUgelfXx3kxI8h9IaV3TI_06QGE,8428
|
46
46
|
utilities/pickle.py,sha256=MBT2xZCsv0pH868IXLGKnlcqNx2IRVKYNpRcqiQQqxw,653
|
47
47
|
utilities/platform.py,sha256=pTn7gw6N4T6LdKrf0virwarof_mze9WtoQlrGMzhGVI,2798
|
48
|
-
utilities/polars.py,sha256=
|
48
|
+
utilities/polars.py,sha256=JOZjSpj9jitDijX044mKc-N00C5N_On3TJYJKJRhdcE,78494
|
49
49
|
utilities/polars_ols.py,sha256=Uc9V5kvlWZ5cU93lKZ-cfAKdVFFw81tqwLW9PxtUvMs,5618
|
50
50
|
utilities/postgres.py,sha256=ynCTTaF-bVEOSW-KEAR-dlLh_hYjeVVjm__-4pEU8Zk,12269
|
51
51
|
utilities/pottery.py,sha256=HJ96oLRarTP37Vhg0WTyB3yAu2hETeg6HgRmpDIqyUs,6581
|
@@ -65,7 +65,7 @@ utilities/shelve.py,sha256=4OzjQI6kGuUbJciqf535rwnao-_IBv66gsT6tRGiUt0,759
|
|
65
65
|
utilities/slack_sdk.py,sha256=ppFBvKgfg5IRWiIoKPtpTyzBtBF4XmwEvU3I5wLJikM,2140
|
66
66
|
utilities/socket.py,sha256=K77vfREvzoVTrpYKo6MZakol0EYu2q1sWJnnZqL0So0,118
|
67
67
|
utilities/sqlalchemy.py,sha256=IJKzrKUd_eBOkyK6CucDlxtHwo2vYH3t-rV2_5rAxq8,40554
|
68
|
-
utilities/sqlalchemy_polars.py,sha256=
|
68
|
+
utilities/sqlalchemy_polars.py,sha256=5Q9HReETYg0qB6E6WQhFh4QAZlKE-IWlogj2BVif_-w,14246
|
69
69
|
utilities/statsmodels.py,sha256=koyiBHvpMcSiBfh99wFUfSggLNx7cuAw3rwyfAhoKpQ,3410
|
70
70
|
utilities/string.py,sha256=shmBK87zZwzGyixuNuXCiUbqzfeZ9xlrFwz6JTaRvDk,582
|
71
71
|
utilities/tempfile.py,sha256=HxB2BF28CcecDJLQ3Bx2Ej-Pb6RJc6W9ngSpB9CnP4k,2018
|
@@ -87,8 +87,8 @@ utilities/zoneinfo.py,sha256=FBMcUQ4662Aq8SsuCL1OAhDQiyANmVjtb-C30DRrWoE,1966
|
|
87
87
|
utilities/pytest_plugins/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
|
88
88
|
utilities/pytest_plugins/pytest_randomly.py,sha256=B1qYVlExGOxTywq2r1SMi5o7btHLk2PNdY_b1p98dkE,409
|
89
89
|
utilities/pytest_plugins/pytest_regressions.py,sha256=9v8kAXDM2ycIXJBimoiF4EgrwbUvxTycFWJiGR_GHhM,1466
|
90
|
-
dycw_utilities-0.155.
|
91
|
-
dycw_utilities-0.155.
|
92
|
-
dycw_utilities-0.155.
|
93
|
-
dycw_utilities-0.155.
|
94
|
-
dycw_utilities-0.155.
|
90
|
+
dycw_utilities-0.155.3.dist-info/METADATA,sha256=9E4nOucqtl9XU96ns1fqx2UfqBw-JEyXeH867ENuMu4,1643
|
91
|
+
dycw_utilities-0.155.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
92
|
+
dycw_utilities-0.155.3.dist-info/entry_points.txt,sha256=BOD_SoDxwsfJYOLxhrSXhHP_T7iw-HXI9f2WVkzYxvQ,135
|
93
|
+
dycw_utilities-0.155.3.dist-info/licenses/LICENSE,sha256=gppZp16M6nSVpBbUBrNL6JuYfvKwZiKgV7XoKKsHzqo,1066
|
94
|
+
dycw_utilities-0.155.3.dist-info/RECORD,,
|
utilities/__init__.py
CHANGED
utilities/polars.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
import datetime as dt
|
4
3
|
import enum
|
5
4
|
from collections.abc import Callable, Iterator, Sequence
|
6
5
|
from collections.abc import Set as AbstractSet
|
@@ -136,6 +135,10 @@ DatetimeTokyo = Datetime(time_zone="Asia/Tokyo")
|
|
136
135
|
DatetimeUSCentral = Datetime(time_zone="US/Central")
|
137
136
|
DatetimeUSEastern = Datetime(time_zone="US/Eastern")
|
138
137
|
DatetimeUTC = Datetime(time_zone="UTC")
|
138
|
+
DatePeriodDType = Struct({"start": pl.Date, "end": pl.Date})
|
139
|
+
TimePeriodDType = Struct({"start": pl.Time, "end": pl.Time})
|
140
|
+
|
141
|
+
|
139
142
|
_FINITE_EWM_MIN_WEIGHT = 0.9999
|
140
143
|
|
141
144
|
|
@@ -458,29 +461,6 @@ class BooleanValueCountsError(Exception):
|
|
458
461
|
##
|
459
462
|
|
460
463
|
|
461
|
-
@overload
|
462
|
-
def ceil_datetime(column: ExprLike, every: ExprLike, /) -> Expr: ...
|
463
|
-
@overload
|
464
|
-
def ceil_datetime(column: Series, every: ExprLike, /) -> Series: ...
|
465
|
-
@overload
|
466
|
-
def ceil_datetime(column: IntoExprColumn, every: ExprLike, /) -> ExprOrSeries: ...
|
467
|
-
def ceil_datetime(column: IntoExprColumn, every: ExprLike, /) -> ExprOrSeries:
|
468
|
-
"""Compute the `ceil` of a datetime column."""
|
469
|
-
column = ensure_expr_or_series(column)
|
470
|
-
rounded = column.dt.round(every)
|
471
|
-
ceil = (
|
472
|
-
when(column <= rounded)
|
473
|
-
.then(rounded)
|
474
|
-
.otherwise(column.dt.offset_by(every).dt.round(every))
|
475
|
-
)
|
476
|
-
if isinstance(column, Expr):
|
477
|
-
return ceil
|
478
|
-
return DataFrame().with_columns(ceil.alias(column.name))[column.name]
|
479
|
-
|
480
|
-
|
481
|
-
##
|
482
|
-
|
483
|
-
|
484
464
|
def check_polars_dataframe(
|
485
465
|
df: DataFrame,
|
486
466
|
/,
|
@@ -1056,37 +1036,41 @@ def dataclass_to_dataframe(
|
|
1056
1036
|
|
1057
1037
|
|
1058
1038
|
def _dataclass_to_dataframe_cast(series: Series, /) -> Series:
|
1059
|
-
if series.dtype
|
1060
|
-
|
1061
|
-
|
1062
|
-
|
1063
|
-
|
1064
|
-
|
1065
|
-
|
1066
|
-
|
1067
|
-
|
1068
|
-
|
1069
|
-
|
1070
|
-
|
1071
|
-
|
1072
|
-
|
1073
|
-
|
1074
|
-
|
1075
|
-
|
1076
|
-
|
1077
|
-
|
1078
|
-
|
1079
|
-
|
1080
|
-
|
1081
|
-
|
1082
|
-
|
1083
|
-
|
1084
|
-
|
1085
|
-
|
1086
|
-
|
1087
|
-
)
|
1088
|
-
|
1089
|
-
|
1039
|
+
if series.dtype != Object:
|
1040
|
+
return series
|
1041
|
+
if series.map_elements(make_isinstance(whenever.Date), return_dtype=Boolean).all():
|
1042
|
+
return series.map_elements(lambda x: x.py_date(), return_dtype=pl.Date)
|
1043
|
+
if series.map_elements(make_isinstance(DateDelta), return_dtype=Boolean).all():
|
1044
|
+
return series.map_elements(to_py_time_delta, return_dtype=Duration)
|
1045
|
+
if series.map_elements(make_isinstance(DateTimeDelta), return_dtype=Boolean).all():
|
1046
|
+
return series.map_elements(to_py_time_delta, return_dtype=Duration)
|
1047
|
+
is_path = series.map_elements(make_isinstance(Path), return_dtype=Boolean).all()
|
1048
|
+
is_uuid = series.map_elements(make_isinstance(UUID), return_dtype=Boolean).all()
|
1049
|
+
if is_path or is_uuid:
|
1050
|
+
with suppress_warnings(category=PolarsInefficientMapWarning):
|
1051
|
+
return series.map_elements(str, return_dtype=String)
|
1052
|
+
if series.map_elements(make_isinstance(whenever.Time), return_dtype=Boolean).all():
|
1053
|
+
return series.map_elements(lambda x: x.py_time(), return_dtype=pl.Time)
|
1054
|
+
if series.map_elements(make_isinstance(TimeDelta), return_dtype=Boolean).all():
|
1055
|
+
return series.map_elements(to_py_time_delta, return_dtype=Duration)
|
1056
|
+
if series.map_elements(make_isinstance(ZonedDateTime), return_dtype=Boolean).all():
|
1057
|
+
return_dtype = zoned_date_time_dtype(time_zone=one({dt.tz for dt in series}))
|
1058
|
+
return series.map_elements(lambda x: x.py_datetime(), return_dtype=return_dtype)
|
1059
|
+
if series.map_elements(
|
1060
|
+
lambda x: isinstance(x, dict) and (set(x) == {"start", "end"}),
|
1061
|
+
return_dtype=Boolean,
|
1062
|
+
).all():
|
1063
|
+
start = _dataclass_to_dataframe_cast(
|
1064
|
+
series.map_elements(lambda x: x["start"], return_dtype=Object)
|
1065
|
+
).alias("start")
|
1066
|
+
end = _dataclass_to_dataframe_cast(
|
1067
|
+
series.map_elements(lambda x: x["end"], return_dtype=Object)
|
1068
|
+
).alias("end")
|
1069
|
+
name = series.name
|
1070
|
+
return concat_series(start, end).select(
|
1071
|
+
struct(start=start, end=end).alias(name)
|
1072
|
+
)[name]
|
1073
|
+
raise NotImplementedError(series) # pragma: no cover
|
1090
1074
|
|
1091
1075
|
|
1092
1076
|
@dataclass(kw_only=True, slots=True)
|
@@ -1362,29 +1346,6 @@ class _FiniteEWMWeightsError(Exception):
|
|
1362
1346
|
##
|
1363
1347
|
|
1364
1348
|
|
1365
|
-
@overload
|
1366
|
-
def floor_datetime(column: ExprLike, every: ExprLike, /) -> Expr: ...
|
1367
|
-
@overload
|
1368
|
-
def floor_datetime(column: Series, every: ExprLike, /) -> Series: ...
|
1369
|
-
@overload
|
1370
|
-
def floor_datetime(column: IntoExprColumn, every: ExprLike, /) -> ExprOrSeries: ...
|
1371
|
-
def floor_datetime(column: IntoExprColumn, every: ExprLike, /) -> ExprOrSeries:
|
1372
|
-
"""Compute the `floor` of a datetime column."""
|
1373
|
-
column = ensure_expr_or_series(column)
|
1374
|
-
rounded = column.dt.round(every)
|
1375
|
-
floor = (
|
1376
|
-
when(column >= rounded)
|
1377
|
-
.then(rounded)
|
1378
|
-
.otherwise(column.dt.offset_by("-" + every).dt.round(every))
|
1379
|
-
)
|
1380
|
-
if isinstance(column, Expr):
|
1381
|
-
return floor
|
1382
|
-
return DataFrame().with_columns(floor.alias(column.name))[column.name]
|
1383
|
-
|
1384
|
-
|
1385
|
-
##
|
1386
|
-
|
1387
|
-
|
1388
1349
|
def get_data_type_or_series_time_zone(
|
1389
1350
|
dtype_or_series: PolarsDataType | Series, /
|
1390
1351
|
) -> ZoneInfo:
|
@@ -2480,10 +2441,6 @@ def _struct_from_dataclass_one(
|
|
2480
2441
|
}
|
2481
2442
|
with suppress(KeyError):
|
2482
2443
|
return mapping[ann]
|
2483
|
-
if ann is dt.datetime:
|
2484
|
-
if time_zone is None:
|
2485
|
-
raise _StructFromDataClassTimeZoneMissingError
|
2486
|
-
return zoned_datetime_dtype(time_zone=time_zone)
|
2487
2444
|
if is_dataclass_class(ann):
|
2488
2445
|
return struct_from_dataclass(ann, time_zone=time_zone)
|
2489
2446
|
if (isinstance(ann, type) and issubclass(ann, enum.Enum)) or (
|
@@ -2512,13 +2469,6 @@ class _StructFromDataClassNotADataclassError(StructFromDataClassError):
|
|
2512
2469
|
return f"Object must be a dataclass; got {self.cls}"
|
2513
2470
|
|
2514
2471
|
|
2515
|
-
@dataclass(kw_only=True, slots=True)
|
2516
|
-
class _StructFromDataClassTimeZoneMissingError(StructFromDataClassError):
|
2517
|
-
@override
|
2518
|
-
def __str__(self) -> str:
|
2519
|
-
return "Time-zone must be given"
|
2520
|
-
|
2521
|
-
|
2522
2472
|
@dataclass(kw_only=True, slots=True)
|
2523
2473
|
class _StructFromDataClassTypeError(StructFromDataClassError):
|
2524
2474
|
ann: Any
|
@@ -2612,27 +2562,27 @@ def week_num(column: IntoExprColumn, /, *, start: WeekDay = "mon") -> ExprOrSeri
|
|
2612
2562
|
##
|
2613
2563
|
|
2614
2564
|
|
2615
|
-
def
|
2565
|
+
def zoned_date_time_dtype(
|
2616
2566
|
*, time_unit: TimeUnit = "us", time_zone: TimeZoneLike = UTC
|
2617
2567
|
) -> Datetime:
|
2618
|
-
"""Create a zoned
|
2568
|
+
"""Create a zoned date-time data type."""
|
2619
2569
|
return Datetime(time_unit=time_unit, time_zone=get_time_zone_name(time_zone))
|
2620
2570
|
|
2621
2571
|
|
2622
|
-
def
|
2572
|
+
def zoned_date_time_period_dtype(
|
2623
2573
|
*,
|
2624
2574
|
time_unit: TimeUnit = "us",
|
2625
2575
|
time_zone: TimeZoneLike | tuple[TimeZoneLike, TimeZoneLike] = UTC,
|
2626
2576
|
) -> Struct:
|
2627
|
-
"""Create a zoned
|
2577
|
+
"""Create a zoned date-time period data type."""
|
2628
2578
|
match time_zone:
|
2629
2579
|
case start, end:
|
2630
2580
|
return struct_dtype(
|
2631
|
-
start=
|
2632
|
-
end=
|
2581
|
+
start=zoned_date_time_dtype(time_unit=time_unit, time_zone=start),
|
2582
|
+
end=zoned_date_time_dtype(time_unit=time_unit, time_zone=end),
|
2633
2583
|
)
|
2634
2584
|
case _:
|
2635
|
-
dtype =
|
2585
|
+
dtype = zoned_date_time_dtype(time_unit=time_unit, time_zone=time_zone)
|
2636
2586
|
return struct_dtype(start=dtype, end=dtype)
|
2637
2587
|
|
2638
2588
|
|
@@ -2641,6 +2591,7 @@ __all__ = [
|
|
2641
2591
|
"CheckPolarsDataFrameError",
|
2642
2592
|
"ColumnsToDictError",
|
2643
2593
|
"DataClassToDataFrameError",
|
2594
|
+
"DatePeriodDType",
|
2644
2595
|
"DatetimeHongKong",
|
2645
2596
|
"DatetimeTokyo",
|
2646
2597
|
"DatetimeUSCentral",
|
@@ -2658,6 +2609,7 @@ __all__ = [
|
|
2658
2609
|
"IsNullStructSeriesError",
|
2659
2610
|
"SetFirstRowAsColumnsError",
|
2660
2611
|
"StructFromDataClassError",
|
2612
|
+
"TimePeriodDType",
|
2661
2613
|
"acf",
|
2662
2614
|
"adjust_frequencies",
|
2663
2615
|
"all_dataframe_columns",
|
@@ -2668,7 +2620,6 @@ __all__ = [
|
|
2668
2620
|
"are_frames_equal",
|
2669
2621
|
"bernoulli",
|
2670
2622
|
"boolean_value_counts",
|
2671
|
-
"ceil_datetime",
|
2672
2623
|
"check_polars_dataframe",
|
2673
2624
|
"choice",
|
2674
2625
|
"collect_series",
|
@@ -2685,7 +2636,6 @@ __all__ = [
|
|
2685
2636
|
"ensure_expr_or_series",
|
2686
2637
|
"ensure_expr_or_series_many",
|
2687
2638
|
"finite_ewm_mean",
|
2688
|
-
"floor_datetime",
|
2689
2639
|
"get_data_type_or_series_time_zone",
|
2690
2640
|
"get_expr_name",
|
2691
2641
|
"get_frequency_spectrum",
|
@@ -2721,6 +2671,6 @@ __all__ = [
|
|
2721
2671
|
"unique_element",
|
2722
2672
|
"write_dataframe",
|
2723
2673
|
"write_series",
|
2724
|
-
"
|
2725
|
-
"
|
2674
|
+
"zoned_date_time_dtype",
|
2675
|
+
"zoned_date_time_period_dtype",
|
2726
2676
|
]
|
utilities/sqlalchemy_polars.py
CHANGED
@@ -35,7 +35,7 @@ from utilities.iterables import (
|
|
35
35
|
chunked,
|
36
36
|
one,
|
37
37
|
)
|
38
|
-
from utilities.polars import
|
38
|
+
from utilities.polars import zoned_date_time_dtype
|
39
39
|
from utilities.reprlib import get_repr
|
40
40
|
from utilities.sqlalchemy import (
|
41
41
|
CHUNK_SIZE_FRAC,
|
@@ -390,7 +390,7 @@ def _select_to_dataframe_map_table_column_type_to_dtype(
|
|
390
390
|
return pl.Date
|
391
391
|
if is_subclass_gen(py_type, dt.datetime):
|
392
392
|
has_tz: bool = type_use.timezone
|
393
|
-
return
|
393
|
+
return zoned_date_time_dtype(time_zone=time_zone) if has_tz else Datetime()
|
394
394
|
if issubclass(py_type, dt.time):
|
395
395
|
return Time
|
396
396
|
if issubclass(py_type, dt.timedelta):
|
File without changes
|
File without changes
|
File without changes
|