dycw-utilities 0.155.1__py3-none-any.whl → 0.155.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {dycw_utilities-0.155.1.dist-info → dycw_utilities-0.155.2.dist-info}/METADATA +1 -1
- {dycw_utilities-0.155.1.dist-info → dycw_utilities-0.155.2.dist-info}/RECORD +8 -8
- utilities/__init__.py +1 -1
- utilities/polars.py +47 -100
- utilities/sqlalchemy_polars.py +2 -2
- {dycw_utilities-0.155.1.dist-info → dycw_utilities-0.155.2.dist-info}/WHEEL +0 -0
- {dycw_utilities-0.155.1.dist-info → dycw_utilities-0.155.2.dist-info}/entry_points.txt +0 -0
- {dycw_utilities-0.155.1.dist-info → dycw_utilities-0.155.2.dist-info}/licenses/LICENSE +0 -0
@@ -1,4 +1,4 @@
|
|
1
|
-
utilities/__init__.py,sha256=
|
1
|
+
utilities/__init__.py,sha256=UpA18BgKE4JKTnomInevMSHAcDOtpqnipumof6hoK_c,60
|
2
2
|
utilities/altair.py,sha256=92E2lCdyHY4Zb-vCw6rEJIsWdKipuu-Tu2ab1ufUfAk,9079
|
3
3
|
utilities/asyncio.py,sha256=QXkTtugXkqtYt7Do23zgYErqzdp6jwzPpV_SP9fJ1gI,16780
|
4
4
|
utilities/atomicwrites.py,sha256=tPo6r-Rypd9u99u66B9z86YBPpnLrlHtwox_8Z7T34Y,5790
|
@@ -45,7 +45,7 @@ utilities/parse.py,sha256=JcJn5yXKhIWXBCwgBdPsyu7Hvcuw6kyEdqvaebCaI9k,17951
|
|
45
45
|
utilities/pathlib.py,sha256=qGuU8XPmdgGpy8tOMUgelfXx3kxI8h9IaV3TI_06QGE,8428
|
46
46
|
utilities/pickle.py,sha256=MBT2xZCsv0pH868IXLGKnlcqNx2IRVKYNpRcqiQQqxw,653
|
47
47
|
utilities/platform.py,sha256=pTn7gw6N4T6LdKrf0virwarof_mze9WtoQlrGMzhGVI,2798
|
48
|
-
utilities/polars.py,sha256=
|
48
|
+
utilities/polars.py,sha256=kEYLh5kcpvjOqNS-Pv-DAjvzPZYm6qZ7ywMt-wZvRGE,78434
|
49
49
|
utilities/polars_ols.py,sha256=Uc9V5kvlWZ5cU93lKZ-cfAKdVFFw81tqwLW9PxtUvMs,5618
|
50
50
|
utilities/postgres.py,sha256=ynCTTaF-bVEOSW-KEAR-dlLh_hYjeVVjm__-4pEU8Zk,12269
|
51
51
|
utilities/pottery.py,sha256=HJ96oLRarTP37Vhg0WTyB3yAu2hETeg6HgRmpDIqyUs,6581
|
@@ -65,7 +65,7 @@ utilities/shelve.py,sha256=4OzjQI6kGuUbJciqf535rwnao-_IBv66gsT6tRGiUt0,759
|
|
65
65
|
utilities/slack_sdk.py,sha256=ppFBvKgfg5IRWiIoKPtpTyzBtBF4XmwEvU3I5wLJikM,2140
|
66
66
|
utilities/socket.py,sha256=K77vfREvzoVTrpYKo6MZakol0EYu2q1sWJnnZqL0So0,118
|
67
67
|
utilities/sqlalchemy.py,sha256=IJKzrKUd_eBOkyK6CucDlxtHwo2vYH3t-rV2_5rAxq8,40554
|
68
|
-
utilities/sqlalchemy_polars.py,sha256=
|
68
|
+
utilities/sqlalchemy_polars.py,sha256=5Q9HReETYg0qB6E6WQhFh4QAZlKE-IWlogj2BVif_-w,14246
|
69
69
|
utilities/statsmodels.py,sha256=koyiBHvpMcSiBfh99wFUfSggLNx7cuAw3rwyfAhoKpQ,3410
|
70
70
|
utilities/string.py,sha256=shmBK87zZwzGyixuNuXCiUbqzfeZ9xlrFwz6JTaRvDk,582
|
71
71
|
utilities/tempfile.py,sha256=HxB2BF28CcecDJLQ3Bx2Ej-Pb6RJc6W9ngSpB9CnP4k,2018
|
@@ -87,8 +87,8 @@ utilities/zoneinfo.py,sha256=FBMcUQ4662Aq8SsuCL1OAhDQiyANmVjtb-C30DRrWoE,1966
|
|
87
87
|
utilities/pytest_plugins/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
|
88
88
|
utilities/pytest_plugins/pytest_randomly.py,sha256=B1qYVlExGOxTywq2r1SMi5o7btHLk2PNdY_b1p98dkE,409
|
89
89
|
utilities/pytest_plugins/pytest_regressions.py,sha256=9v8kAXDM2ycIXJBimoiF4EgrwbUvxTycFWJiGR_GHhM,1466
|
90
|
-
dycw_utilities-0.155.
|
91
|
-
dycw_utilities-0.155.
|
92
|
-
dycw_utilities-0.155.
|
93
|
-
dycw_utilities-0.155.
|
94
|
-
dycw_utilities-0.155.
|
90
|
+
dycw_utilities-0.155.2.dist-info/METADATA,sha256=bti46Z55f6RXvAyFTu6sAzeis6LgdIEkMPTR0rQWj1w,1643
|
91
|
+
dycw_utilities-0.155.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
92
|
+
dycw_utilities-0.155.2.dist-info/entry_points.txt,sha256=BOD_SoDxwsfJYOLxhrSXhHP_T7iw-HXI9f2WVkzYxvQ,135
|
93
|
+
dycw_utilities-0.155.2.dist-info/licenses/LICENSE,sha256=gppZp16M6nSVpBbUBrNL6JuYfvKwZiKgV7XoKKsHzqo,1066
|
94
|
+
dycw_utilities-0.155.2.dist-info/RECORD,,
|
utilities/__init__.py
CHANGED
utilities/polars.py
CHANGED
@@ -1,6 +1,5 @@
|
|
1
1
|
from __future__ import annotations
|
2
2
|
|
3
|
-
import datetime as dt
|
4
3
|
import enum
|
5
4
|
from collections.abc import Callable, Iterator, Sequence
|
6
5
|
from collections.abc import Set as AbstractSet
|
@@ -136,6 +135,10 @@ DatetimeTokyo = Datetime(time_zone="Asia/Tokyo")
|
|
136
135
|
DatetimeUSCentral = Datetime(time_zone="US/Central")
|
137
136
|
DatetimeUSEastern = Datetime(time_zone="US/Eastern")
|
138
137
|
DatetimeUTC = Datetime(time_zone="UTC")
|
138
|
+
DatePeriodDType = Struct({"start": pl.Date, "end": pl.Date})
|
139
|
+
TimePeriodDType = Struct({"start": pl.Time, "end": pl.Time})
|
140
|
+
|
141
|
+
|
139
142
|
_FINITE_EWM_MIN_WEIGHT = 0.9999
|
140
143
|
|
141
144
|
|
@@ -458,29 +461,6 @@ class BooleanValueCountsError(Exception):
|
|
458
461
|
##
|
459
462
|
|
460
463
|
|
461
|
-
@overload
|
462
|
-
def ceil_datetime(column: ExprLike, every: ExprLike, /) -> Expr: ...
|
463
|
-
@overload
|
464
|
-
def ceil_datetime(column: Series, every: ExprLike, /) -> Series: ...
|
465
|
-
@overload
|
466
|
-
def ceil_datetime(column: IntoExprColumn, every: ExprLike, /) -> ExprOrSeries: ...
|
467
|
-
def ceil_datetime(column: IntoExprColumn, every: ExprLike, /) -> ExprOrSeries:
|
468
|
-
"""Compute the `ceil` of a datetime column."""
|
469
|
-
column = ensure_expr_or_series(column)
|
470
|
-
rounded = column.dt.round(every)
|
471
|
-
ceil = (
|
472
|
-
when(column <= rounded)
|
473
|
-
.then(rounded)
|
474
|
-
.otherwise(column.dt.offset_by(every).dt.round(every))
|
475
|
-
)
|
476
|
-
if isinstance(column, Expr):
|
477
|
-
return ceil
|
478
|
-
return DataFrame().with_columns(ceil.alias(column.name))[column.name]
|
479
|
-
|
480
|
-
|
481
|
-
##
|
482
|
-
|
483
|
-
|
484
464
|
def check_polars_dataframe(
|
485
465
|
df: DataFrame,
|
486
466
|
/,
|
@@ -1056,37 +1036,38 @@ def dataclass_to_dataframe(
|
|
1056
1036
|
|
1057
1037
|
|
1058
1038
|
def _dataclass_to_dataframe_cast(series: Series, /) -> Series:
|
1059
|
-
if series.dtype
|
1060
|
-
|
1061
|
-
|
1062
|
-
|
1063
|
-
|
1064
|
-
|
1065
|
-
|
1066
|
-
|
1067
|
-
|
1068
|
-
|
1069
|
-
|
1070
|
-
|
1071
|
-
|
1072
|
-
|
1073
|
-
|
1074
|
-
|
1075
|
-
|
1076
|
-
|
1077
|
-
|
1078
|
-
|
1079
|
-
|
1080
|
-
|
1081
|
-
|
1082
|
-
|
1083
|
-
|
1084
|
-
|
1085
|
-
|
1086
|
-
|
1087
|
-
)
|
1088
|
-
|
1089
|
-
|
1039
|
+
if series.dtype != Object:
|
1040
|
+
return series
|
1041
|
+
if series.map_elements(make_isinstance(whenever.Date), return_dtype=Boolean).all():
|
1042
|
+
return series.map_elements(lambda x: x.py_date(), return_dtype=pl.Date)
|
1043
|
+
if series.map_elements(make_isinstance(DateDelta), return_dtype=Boolean).all():
|
1044
|
+
return series.map_elements(to_py_time_delta, return_dtype=Duration)
|
1045
|
+
if series.map_elements(make_isinstance(DateTimeDelta), return_dtype=Boolean).all():
|
1046
|
+
return series.map_elements(to_py_time_delta, return_dtype=Duration)
|
1047
|
+
is_path = series.map_elements(make_isinstance(Path), return_dtype=Boolean).all()
|
1048
|
+
is_uuid = series.map_elements(make_isinstance(UUID), return_dtype=Boolean).all()
|
1049
|
+
if is_path or is_uuid:
|
1050
|
+
with suppress_warnings(category=PolarsInefficientMapWarning):
|
1051
|
+
return series.map_elements(str, return_dtype=String)
|
1052
|
+
if series.map_elements(make_isinstance(whenever.Time), return_dtype=Boolean).all():
|
1053
|
+
return series.map_elements(lambda x: x.py_time(), return_dtype=pl.Time)
|
1054
|
+
if series.map_elements(make_isinstance(TimeDelta), return_dtype=Boolean).all():
|
1055
|
+
return series.map_elements(to_py_time_delta, return_dtype=Duration)
|
1056
|
+
if series.map_elements(make_isinstance(ZonedDateTime), return_dtype=Boolean).all():
|
1057
|
+
return_dtype = zoned_date_time_dtype(time_zone=one({dt.tz for dt in series}))
|
1058
|
+
return series.map_elements(lambda x: x.py_datetime(), return_dtype=return_dtype)
|
1059
|
+
if series.map_elements(
|
1060
|
+
lambda x: isinstance(x, dict) and (set(x) == {"start", "end"}),
|
1061
|
+
return_dtype=Boolean,
|
1062
|
+
).all():
|
1063
|
+
start = _dataclass_to_dataframe_cast(
|
1064
|
+
series.map_elements(lambda x: x["start"], return_dtype=Object)
|
1065
|
+
).alias("start")
|
1066
|
+
end = _dataclass_to_dataframe_cast(
|
1067
|
+
series.map_elements(lambda x: x["end"], return_dtype=Object)
|
1068
|
+
).alias("end")
|
1069
|
+
return concat_series(start, end).select(x=struct(start=start, end=end))["x"]
|
1070
|
+
raise NotImplementedError(series) # pragma: no cover
|
1090
1071
|
|
1091
1072
|
|
1092
1073
|
@dataclass(kw_only=True, slots=True)
|
@@ -1362,29 +1343,6 @@ class _FiniteEWMWeightsError(Exception):
|
|
1362
1343
|
##
|
1363
1344
|
|
1364
1345
|
|
1365
|
-
@overload
|
1366
|
-
def floor_datetime(column: ExprLike, every: ExprLike, /) -> Expr: ...
|
1367
|
-
@overload
|
1368
|
-
def floor_datetime(column: Series, every: ExprLike, /) -> Series: ...
|
1369
|
-
@overload
|
1370
|
-
def floor_datetime(column: IntoExprColumn, every: ExprLike, /) -> ExprOrSeries: ...
|
1371
|
-
def floor_datetime(column: IntoExprColumn, every: ExprLike, /) -> ExprOrSeries:
|
1372
|
-
"""Compute the `floor` of a datetime column."""
|
1373
|
-
column = ensure_expr_or_series(column)
|
1374
|
-
rounded = column.dt.round(every)
|
1375
|
-
floor = (
|
1376
|
-
when(column >= rounded)
|
1377
|
-
.then(rounded)
|
1378
|
-
.otherwise(column.dt.offset_by("-" + every).dt.round(every))
|
1379
|
-
)
|
1380
|
-
if isinstance(column, Expr):
|
1381
|
-
return floor
|
1382
|
-
return DataFrame().with_columns(floor.alias(column.name))[column.name]
|
1383
|
-
|
1384
|
-
|
1385
|
-
##
|
1386
|
-
|
1387
|
-
|
1388
1346
|
def get_data_type_or_series_time_zone(
|
1389
1347
|
dtype_or_series: PolarsDataType | Series, /
|
1390
1348
|
) -> ZoneInfo:
|
@@ -2480,10 +2438,6 @@ def _struct_from_dataclass_one(
|
|
2480
2438
|
}
|
2481
2439
|
with suppress(KeyError):
|
2482
2440
|
return mapping[ann]
|
2483
|
-
if ann is dt.datetime:
|
2484
|
-
if time_zone is None:
|
2485
|
-
raise _StructFromDataClassTimeZoneMissingError
|
2486
|
-
return zoned_datetime_dtype(time_zone=time_zone)
|
2487
2441
|
if is_dataclass_class(ann):
|
2488
2442
|
return struct_from_dataclass(ann, time_zone=time_zone)
|
2489
2443
|
if (isinstance(ann, type) and issubclass(ann, enum.Enum)) or (
|
@@ -2512,13 +2466,6 @@ class _StructFromDataClassNotADataclassError(StructFromDataClassError):
|
|
2512
2466
|
return f"Object must be a dataclass; got {self.cls}"
|
2513
2467
|
|
2514
2468
|
|
2515
|
-
@dataclass(kw_only=True, slots=True)
|
2516
|
-
class _StructFromDataClassTimeZoneMissingError(StructFromDataClassError):
|
2517
|
-
@override
|
2518
|
-
def __str__(self) -> str:
|
2519
|
-
return "Time-zone must be given"
|
2520
|
-
|
2521
|
-
|
2522
2469
|
@dataclass(kw_only=True, slots=True)
|
2523
2470
|
class _StructFromDataClassTypeError(StructFromDataClassError):
|
2524
2471
|
ann: Any
|
@@ -2612,27 +2559,27 @@ def week_num(column: IntoExprColumn, /, *, start: WeekDay = "mon") -> ExprOrSeri
|
|
2612
2559
|
##
|
2613
2560
|
|
2614
2561
|
|
2615
|
-
def
|
2562
|
+
def zoned_date_time_dtype(
|
2616
2563
|
*, time_unit: TimeUnit = "us", time_zone: TimeZoneLike = UTC
|
2617
2564
|
) -> Datetime:
|
2618
|
-
"""Create a zoned
|
2565
|
+
"""Create a zoned date-time data type."""
|
2619
2566
|
return Datetime(time_unit=time_unit, time_zone=get_time_zone_name(time_zone))
|
2620
2567
|
|
2621
2568
|
|
2622
|
-
def
|
2569
|
+
def zoned_date_time_period_dtype(
|
2623
2570
|
*,
|
2624
2571
|
time_unit: TimeUnit = "us",
|
2625
2572
|
time_zone: TimeZoneLike | tuple[TimeZoneLike, TimeZoneLike] = UTC,
|
2626
2573
|
) -> Struct:
|
2627
|
-
"""Create a zoned
|
2574
|
+
"""Create a zoned date-time period data type."""
|
2628
2575
|
match time_zone:
|
2629
2576
|
case start, end:
|
2630
2577
|
return struct_dtype(
|
2631
|
-
start=
|
2632
|
-
end=
|
2578
|
+
start=zoned_date_time_dtype(time_unit=time_unit, time_zone=start),
|
2579
|
+
end=zoned_date_time_dtype(time_unit=time_unit, time_zone=end),
|
2633
2580
|
)
|
2634
2581
|
case _:
|
2635
|
-
dtype =
|
2582
|
+
dtype = zoned_date_time_dtype(time_unit=time_unit, time_zone=time_zone)
|
2636
2583
|
return struct_dtype(start=dtype, end=dtype)
|
2637
2584
|
|
2638
2585
|
|
@@ -2641,6 +2588,7 @@ __all__ = [
|
|
2641
2588
|
"CheckPolarsDataFrameError",
|
2642
2589
|
"ColumnsToDictError",
|
2643
2590
|
"DataClassToDataFrameError",
|
2591
|
+
"DatePeriodDType",
|
2644
2592
|
"DatetimeHongKong",
|
2645
2593
|
"DatetimeTokyo",
|
2646
2594
|
"DatetimeUSCentral",
|
@@ -2658,6 +2606,7 @@ __all__ = [
|
|
2658
2606
|
"IsNullStructSeriesError",
|
2659
2607
|
"SetFirstRowAsColumnsError",
|
2660
2608
|
"StructFromDataClassError",
|
2609
|
+
"TimePeriodDType",
|
2661
2610
|
"acf",
|
2662
2611
|
"adjust_frequencies",
|
2663
2612
|
"all_dataframe_columns",
|
@@ -2668,7 +2617,6 @@ __all__ = [
|
|
2668
2617
|
"are_frames_equal",
|
2669
2618
|
"bernoulli",
|
2670
2619
|
"boolean_value_counts",
|
2671
|
-
"ceil_datetime",
|
2672
2620
|
"check_polars_dataframe",
|
2673
2621
|
"choice",
|
2674
2622
|
"collect_series",
|
@@ -2685,7 +2633,6 @@ __all__ = [
|
|
2685
2633
|
"ensure_expr_or_series",
|
2686
2634
|
"ensure_expr_or_series_many",
|
2687
2635
|
"finite_ewm_mean",
|
2688
|
-
"floor_datetime",
|
2689
2636
|
"get_data_type_or_series_time_zone",
|
2690
2637
|
"get_expr_name",
|
2691
2638
|
"get_frequency_spectrum",
|
@@ -2721,6 +2668,6 @@ __all__ = [
|
|
2721
2668
|
"unique_element",
|
2722
2669
|
"write_dataframe",
|
2723
2670
|
"write_series",
|
2724
|
-
"
|
2725
|
-
"
|
2671
|
+
"zoned_date_time_dtype",
|
2672
|
+
"zoned_date_time_period_dtype",
|
2726
2673
|
]
|
utilities/sqlalchemy_polars.py
CHANGED
@@ -35,7 +35,7 @@ from utilities.iterables import (
|
|
35
35
|
chunked,
|
36
36
|
one,
|
37
37
|
)
|
38
|
-
from utilities.polars import
|
38
|
+
from utilities.polars import zoned_date_time_dtype
|
39
39
|
from utilities.reprlib import get_repr
|
40
40
|
from utilities.sqlalchemy import (
|
41
41
|
CHUNK_SIZE_FRAC,
|
@@ -390,7 +390,7 @@ def _select_to_dataframe_map_table_column_type_to_dtype(
|
|
390
390
|
return pl.Date
|
391
391
|
if is_subclass_gen(py_type, dt.datetime):
|
392
392
|
has_tz: bool = type_use.timezone
|
393
|
-
return
|
393
|
+
return zoned_date_time_dtype(time_zone=time_zone) if has_tz else Datetime()
|
394
394
|
if issubclass(py_type, dt.time):
|
395
395
|
return Time
|
396
396
|
if issubclass(py_type, dt.timedelta):
|
File without changes
|
File without changes
|
File without changes
|