dycw-utilities 0.165.2__py3-none-any.whl → 0.166.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dycw-utilities
3
- Version: 0.165.2
3
+ Version: 0.166.0
4
4
  Author-email: Derek Wan <d.wan@icloud.com>
5
5
  License-File: LICENSE
6
6
  Requires-Python: >=3.12
@@ -1,4 +1,4 @@
1
- utilities/__init__.py,sha256=ZnsWqCnQBd7LRuMGzf7ent48hxqCg33kMckVmWtmXfo,60
1
+ utilities/__init__.py,sha256=tOFl7wsUYIesJ9qprnlUN41VJOJseeW2CsZNgcZ0gng,60
2
2
  utilities/aeventkit.py,sha256=ddoleSwW9zdc2tjX5Ge0pMKtYwV_JMxhHYOxnWX2AGM,12609
3
3
  utilities/altair.py,sha256=92E2lCdyHY4Zb-vCw6rEJIsWdKipuu-Tu2ab1ufUfAk,9079
4
4
  utilities/asyncio.py,sha256=PUedzQ5deqlSECQ33sam9cRzI9TnygHz3FdOqWJWPTM,15288
@@ -45,7 +45,7 @@ utilities/parse.py,sha256=JcJn5yXKhIWXBCwgBdPsyu7Hvcuw6kyEdqvaebCaI9k,17951
45
45
  utilities/pathlib.py,sha256=qGuU8XPmdgGpy8tOMUgelfXx3kxI8h9IaV3TI_06QGE,8428
46
46
  utilities/pickle.py,sha256=MBT2xZCsv0pH868IXLGKnlcqNx2IRVKYNpRcqiQQqxw,653
47
47
  utilities/platform.py,sha256=pTn7gw6N4T6LdKrf0virwarof_mze9WtoQlrGMzhGVI,2798
48
- utilities/polars.py,sha256=X-TaklmHmyJazckJIRNMwwvgSp3q6EGGb0mVnwHEDrI,80811
48
+ utilities/polars.py,sha256=hYgFfmpLG3Xc75SCzARkA2JLEIyIrD-AfcRwE8ABWAU,83139
49
49
  utilities/polars_ols.py,sha256=LNTFNLPuYW7fcAHymlbnams_DhitToblYvib3mhKbwI,5615
50
50
  utilities/postgres.py,sha256=ynCTTaF-bVEOSW-KEAR-dlLh_hYjeVVjm__-4pEU8Zk,12269
51
51
  utilities/pottery.py,sha256=ggMN72Y7wx7Js8VN6eyNyodpm8TIYqZHGghkDPXIVWk,3949
@@ -88,8 +88,8 @@ utilities/zoneinfo.py,sha256=tdIScrTB2-B-LH0ukb1HUXKooLknOfJNwHk10MuMYvA,3619
88
88
  utilities/pytest_plugins/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
89
89
  utilities/pytest_plugins/pytest_randomly.py,sha256=B1qYVlExGOxTywq2r1SMi5o7btHLk2PNdY_b1p98dkE,409
90
90
  utilities/pytest_plugins/pytest_regressions.py,sha256=9v8kAXDM2ycIXJBimoiF4EgrwbUvxTycFWJiGR_GHhM,1466
91
- dycw_utilities-0.165.2.dist-info/METADATA,sha256=jw-a2WMCwJGXlvhvV3CVjpssOr7e-1QUnw-3i4WuM2k,1696
92
- dycw_utilities-0.165.2.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
93
- dycw_utilities-0.165.2.dist-info/entry_points.txt,sha256=BOD_SoDxwsfJYOLxhrSXhHP_T7iw-HXI9f2WVkzYxvQ,135
94
- dycw_utilities-0.165.2.dist-info/licenses/LICENSE,sha256=gppZp16M6nSVpBbUBrNL6JuYfvKwZiKgV7XoKKsHzqo,1066
95
- dycw_utilities-0.165.2.dist-info/RECORD,,
91
+ dycw_utilities-0.166.0.dist-info/METADATA,sha256=0_1Nn-M65PH_lpHB1NUUiPxCylNpVd7B9948OxvL8kY,1696
92
+ dycw_utilities-0.166.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
93
+ dycw_utilities-0.166.0.dist-info/entry_points.txt,sha256=BOD_SoDxwsfJYOLxhrSXhHP_T7iw-HXI9f2WVkzYxvQ,135
94
+ dycw_utilities-0.166.0.dist-info/licenses/LICENSE,sha256=gppZp16M6nSVpBbUBrNL6JuYfvKwZiKgV7XoKKsHzqo,1066
95
+ dycw_utilities-0.166.0.dist-info/RECORD,,
utilities/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  from __future__ import annotations
2
2
 
3
- __version__ = "0.165.2"
3
+ __version__ = "0.166.0"
utilities/polars.py CHANGED
@@ -61,14 +61,12 @@ from utilities.gzip import read_binary
61
61
  from utilities.iterables import (
62
62
  CheckIterablesEqualError,
63
63
  CheckMappingsEqualError,
64
- CheckSubSetError,
65
64
  CheckSuperMappingError,
66
65
  OneEmptyError,
67
66
  OneNonUniqueError,
68
67
  always_iterable,
69
68
  check_iterables_equal,
70
69
  check_mappings_equal,
71
- check_subset,
72
70
  check_supermapping,
73
71
  is_iterable_not_str,
74
72
  one,
@@ -105,6 +103,7 @@ from utilities.whenever import (
105
103
  from utilities.zoneinfo import UTC, to_time_zone_name
106
104
 
107
105
  if TYPE_CHECKING:
106
+ import datetime as dt
108
107
  from collections.abc import Callable, Iterable, Iterator, Mapping, Sequence
109
108
  from collections.abc import Set as AbstractSet
110
109
 
@@ -300,29 +299,77 @@ def any_series(series: Series, /, *columns: ExprOrSeries) -> Series:
300
299
  ##
301
300
 
302
301
 
303
- def append_dataclass(df: DataFrame, obj: Dataclass, /) -> DataFrame:
304
- """Append a dataclass object to a DataFrame."""
305
- non_null_fields = {k: v for k, v in asdict(obj).items() if v is not None}
306
- try:
307
- check_subset(non_null_fields, df.columns)
308
- except CheckSubSetError as error:
309
- raise AppendDataClassError(
310
- left=error.left, right=error.right, extra=error.extra
311
- ) from None
312
- row_cols = set(df.columns) & set(non_null_fields)
313
- row = dataclass_to_dataframe(obj).select(*row_cols)
314
- return concat([df, row], how="diagonal")
302
+ def append_row(
303
+ df: DataFrame,
304
+ row: StrMapping,
305
+ /,
306
+ *,
307
+ predicate: Callable[[StrMapping], bool] | None = None,
308
+ disallow_extra: bool = False,
309
+ disallow_missing: bool | MaybeIterable[str] = False,
310
+ disallow_null: bool | MaybeIterable[str] = False,
311
+ in_place: bool = False,
312
+ ) -> DataFrame:
313
+ """Append a row to a DataFrame."""
314
+ if (predicate is not None) and not predicate(row):
315
+ raise _AppendRowPredicateError(df=df, row=row)
316
+ if disallow_extra and (len(extra := set(row) - set(df.columns)) >= 1):
317
+ raise _AppendRowExtraKeysError(df=df, row=row, extra=extra)
318
+ if disallow_missing is not False:
319
+ missing = set(df.columns) - set(row)
320
+ if disallow_missing is not True:
321
+ missing &= set(always_iterable(disallow_missing))
322
+ if len(missing) >= 1:
323
+ raise _AppendRowMissingKeysError(df=df, row=row, missing=missing)
324
+ other = DataFrame(data=[row], schema=df.schema)
325
+ if disallow_null:
326
+ other_null = other.select(col(c).is_null().any() for c in other.columns)
327
+ null = {k for k, v in other_null.row(0, named=True).items() if v}
328
+ if disallow_null is not True:
329
+ null &= set(always_iterable(disallow_null))
330
+ if len(null) >= 1:
331
+ raise _AppendRowNullColumnsError(df=df, row=row, columns=null)
332
+ return df.extend(other) if in_place else df.vstack(other)
333
+
334
+
335
+ @dataclass(kw_only=True, slots=True)
336
+ class AppendRowError(Exception):
337
+ df: DataFrame
338
+ row: StrMapping
339
+
340
+
341
+ @dataclass(kw_only=True, slots=True)
342
+ class _AppendRowPredicateError(AppendRowError):
343
+ @override
344
+ def __str__(self) -> str:
345
+ return f"Predicate failed; got {get_repr(self.row)}"
346
+
347
+
348
+ @dataclass(kw_only=True, slots=True)
349
+ class _AppendRowExtraKeysError(AppendRowError):
350
+ extra: AbstractSet[str]
351
+
352
+ @override
353
+ def __str__(self) -> str:
354
+ return f"Extra key(s) found; got {get_repr(self.extra)}"
355
+
356
+
357
+ @dataclass(kw_only=True, slots=True)
358
+ class _AppendRowMissingKeysError(AppendRowError):
359
+ missing: AbstractSet[str]
360
+
361
+ @override
362
+ def __str__(self) -> str:
363
+ return f"Missing key(s) found; got {get_repr(self.missing)}"
315
364
 
316
365
 
317
366
  @dataclass(kw_only=True, slots=True)
318
- class AppendDataClassError[T](Exception):
319
- left: AbstractSet[T]
320
- right: AbstractSet[T]
321
- extra: AbstractSet[T]
367
+ class _AppendRowNullColumnsError(AppendRowError):
368
+ columns: AbstractSet[str]
322
369
 
323
370
  @override
324
371
  def __str__(self) -> str:
325
- return f"Dataclass fields {get_repr(self.left)} must be a subset of DataFrame columns {get_repr(self.right)}; dataclass had extra items {get_repr(self.extra)}"
372
+ return f"Null column(s) found; got {get_repr(self.columns)}"
326
373
 
327
374
 
328
375
  ##
@@ -2460,6 +2507,34 @@ class RoundToFloatError(Exception):
2460
2507
  ##
2461
2508
 
2462
2509
 
2510
+ def search_period(
2511
+ series: Series,
2512
+ date_time: ZonedDateTime,
2513
+ /,
2514
+ *,
2515
+ start_or_end: Literal["start", "end"] = "end",
2516
+ ) -> int | None:
2517
+ """Search a series of periods for the one containing a given date-time."""
2518
+ end = series.struct["end"]
2519
+ py_date_time = date_time.py_datetime()
2520
+ match start_or_end:
2521
+ case "start":
2522
+ index = end.search_sorted(py_date_time, side="right")
2523
+ if index >= len(series):
2524
+ return None
2525
+ item: dt.datetime = series[index]["start"]
2526
+ return index if py_date_time >= item else None
2527
+ case "end":
2528
+ index = end.search_sorted(py_date_time, side="left")
2529
+ if index >= len(series):
2530
+ return None
2531
+ item: dt.datetime = series[index]["start"]
2532
+ return index if py_date_time > item else None
2533
+
2534
+
2535
+ ##
2536
+
2537
+
2463
2538
  def select_exact(
2464
2539
  df: DataFrame, /, *columns: IntoExprColumn, drop: MaybeIterable[str] | None = None
2465
2540
  ) -> DataFrame:
@@ -2672,6 +2747,7 @@ def zoned_date_time_period_dtype(
2672
2747
 
2673
2748
 
2674
2749
  __all__ = [
2750
+ "AppendRowError",
2675
2751
  "BooleanValueCountsError",
2676
2752
  "CheckPolarsDataFrameError",
2677
2753
  "ColumnsToDictError",
@@ -2702,7 +2778,7 @@ __all__ = [
2702
2778
  "all_series",
2703
2779
  "any_dataframe_columns",
2704
2780
  "any_series",
2705
- "append_dataclass",
2781
+ "append_row",
2706
2782
  "are_frames_equal",
2707
2783
  "bernoulli",
2708
2784
  "boolean_value_counts",
@@ -2750,6 +2826,7 @@ __all__ = [
2750
2826
  "read_series",
2751
2827
  "replace_time_zone",
2752
2828
  "round_to_float",
2829
+ "search_period",
2753
2830
  "select_exact",
2754
2831
  "serialize_dataframe",
2755
2832
  "set_first_row_as_columns",