dycw-utilities 0.165.3__py3-none-any.whl → 0.166.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dycw-utilities
3
- Version: 0.165.3
3
+ Version: 0.166.0
4
4
  Author-email: Derek Wan <d.wan@icloud.com>
5
5
  License-File: LICENSE
6
6
  Requires-Python: >=3.12
@@ -1,4 +1,4 @@
1
- utilities/__init__.py,sha256=dGyOX1Z3m0mhIPCR1cqAXlybAWUxvEasnTNSAfT9Fh8,60
1
+ utilities/__init__.py,sha256=tOFl7wsUYIesJ9qprnlUN41VJOJseeW2CsZNgcZ0gng,60
2
2
  utilities/aeventkit.py,sha256=ddoleSwW9zdc2tjX5Ge0pMKtYwV_JMxhHYOxnWX2AGM,12609
3
3
  utilities/altair.py,sha256=92E2lCdyHY4Zb-vCw6rEJIsWdKipuu-Tu2ab1ufUfAk,9079
4
4
  utilities/asyncio.py,sha256=PUedzQ5deqlSECQ33sam9cRzI9TnygHz3FdOqWJWPTM,15288
@@ -45,7 +45,7 @@ utilities/parse.py,sha256=JcJn5yXKhIWXBCwgBdPsyu7Hvcuw6kyEdqvaebCaI9k,17951
45
45
  utilities/pathlib.py,sha256=qGuU8XPmdgGpy8tOMUgelfXx3kxI8h9IaV3TI_06QGE,8428
46
46
  utilities/pickle.py,sha256=MBT2xZCsv0pH868IXLGKnlcqNx2IRVKYNpRcqiQQqxw,653
47
47
  utilities/platform.py,sha256=pTn7gw6N4T6LdKrf0virwarof_mze9WtoQlrGMzhGVI,2798
48
- utilities/polars.py,sha256=6_k8cU7LePQ942Qfqe5l_tR8ND0M0bLlpnaMvDbRCM8,81754
48
+ utilities/polars.py,sha256=hYgFfmpLG3Xc75SCzARkA2JLEIyIrD-AfcRwE8ABWAU,83139
49
49
  utilities/polars_ols.py,sha256=LNTFNLPuYW7fcAHymlbnams_DhitToblYvib3mhKbwI,5615
50
50
  utilities/postgres.py,sha256=ynCTTaF-bVEOSW-KEAR-dlLh_hYjeVVjm__-4pEU8Zk,12269
51
51
  utilities/pottery.py,sha256=ggMN72Y7wx7Js8VN6eyNyodpm8TIYqZHGghkDPXIVWk,3949
@@ -88,8 +88,8 @@ utilities/zoneinfo.py,sha256=tdIScrTB2-B-LH0ukb1HUXKooLknOfJNwHk10MuMYvA,3619
88
88
  utilities/pytest_plugins/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
89
89
  utilities/pytest_plugins/pytest_randomly.py,sha256=B1qYVlExGOxTywq2r1SMi5o7btHLk2PNdY_b1p98dkE,409
90
90
  utilities/pytest_plugins/pytest_regressions.py,sha256=9v8kAXDM2ycIXJBimoiF4EgrwbUvxTycFWJiGR_GHhM,1466
91
- dycw_utilities-0.165.3.dist-info/METADATA,sha256=pkTtRCmg_YK5SUclEGBHKWThH6nFfAdR3vn_EJZjGO8,1696
92
- dycw_utilities-0.165.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
93
- dycw_utilities-0.165.3.dist-info/entry_points.txt,sha256=BOD_SoDxwsfJYOLxhrSXhHP_T7iw-HXI9f2WVkzYxvQ,135
94
- dycw_utilities-0.165.3.dist-info/licenses/LICENSE,sha256=gppZp16M6nSVpBbUBrNL6JuYfvKwZiKgV7XoKKsHzqo,1066
95
- dycw_utilities-0.165.3.dist-info/RECORD,,
91
+ dycw_utilities-0.166.0.dist-info/METADATA,sha256=0_1Nn-M65PH_lpHB1NUUiPxCylNpVd7B9948OxvL8kY,1696
92
+ dycw_utilities-0.166.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
93
+ dycw_utilities-0.166.0.dist-info/entry_points.txt,sha256=BOD_SoDxwsfJYOLxhrSXhHP_T7iw-HXI9f2WVkzYxvQ,135
94
+ dycw_utilities-0.166.0.dist-info/licenses/LICENSE,sha256=gppZp16M6nSVpBbUBrNL6JuYfvKwZiKgV7XoKKsHzqo,1066
95
+ dycw_utilities-0.166.0.dist-info/RECORD,,
utilities/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  from __future__ import annotations
2
2
 
3
- __version__ = "0.165.3"
3
+ __version__ = "0.166.0"
utilities/polars.py CHANGED
@@ -61,14 +61,12 @@ from utilities.gzip import read_binary
61
61
  from utilities.iterables import (
62
62
  CheckIterablesEqualError,
63
63
  CheckMappingsEqualError,
64
- CheckSubSetError,
65
64
  CheckSuperMappingError,
66
65
  OneEmptyError,
67
66
  OneNonUniqueError,
68
67
  always_iterable,
69
68
  check_iterables_equal,
70
69
  check_mappings_equal,
71
- check_subset,
72
70
  check_supermapping,
73
71
  is_iterable_not_str,
74
72
  one,
@@ -301,29 +299,77 @@ def any_series(series: Series, /, *columns: ExprOrSeries) -> Series:
301
299
  ##
302
300
 
303
301
 
304
- def append_dataclass(df: DataFrame, obj: Dataclass, /) -> DataFrame:
305
- """Append a dataclass object to a DataFrame."""
306
- non_null_fields = {k: v for k, v in asdict(obj).items() if v is not None}
307
- try:
308
- check_subset(non_null_fields, df.columns)
309
- except CheckSubSetError as error:
310
- raise AppendDataClassError(
311
- left=error.left, right=error.right, extra=error.extra
312
- ) from None
313
- row_cols = set(df.columns) & set(non_null_fields)
314
- row = dataclass_to_dataframe(obj).select(*row_cols)
315
- return concat([df, row], how="diagonal")
302
+ def append_row(
303
+ df: DataFrame,
304
+ row: StrMapping,
305
+ /,
306
+ *,
307
+ predicate: Callable[[StrMapping], bool] | None = None,
308
+ disallow_extra: bool = False,
309
+ disallow_missing: bool | MaybeIterable[str] = False,
310
+ disallow_null: bool | MaybeIterable[str] = False,
311
+ in_place: bool = False,
312
+ ) -> DataFrame:
313
+ """Append a row to a DataFrame."""
314
+ if (predicate is not None) and not predicate(row):
315
+ raise _AppendRowPredicateError(df=df, row=row)
316
+ if disallow_extra and (len(extra := set(row) - set(df.columns)) >= 1):
317
+ raise _AppendRowExtraKeysError(df=df, row=row, extra=extra)
318
+ if disallow_missing is not False:
319
+ missing = set(df.columns) - set(row)
320
+ if disallow_missing is not True:
321
+ missing &= set(always_iterable(disallow_missing))
322
+ if len(missing) >= 1:
323
+ raise _AppendRowMissingKeysError(df=df, row=row, missing=missing)
324
+ other = DataFrame(data=[row], schema=df.schema)
325
+ if disallow_null:
326
+ other_null = other.select(col(c).is_null().any() for c in other.columns)
327
+ null = {k for k, v in other_null.row(0, named=True).items() if v}
328
+ if disallow_null is not True:
329
+ null &= set(always_iterable(disallow_null))
330
+ if len(null) >= 1:
331
+ raise _AppendRowNullColumnsError(df=df, row=row, columns=null)
332
+ return df.extend(other) if in_place else df.vstack(other)
333
+
334
+
335
+ @dataclass(kw_only=True, slots=True)
336
+ class AppendRowError(Exception):
337
+ df: DataFrame
338
+ row: StrMapping
339
+
340
+
341
+ @dataclass(kw_only=True, slots=True)
342
+ class _AppendRowPredicateError(AppendRowError):
343
+ @override
344
+ def __str__(self) -> str:
345
+ return f"Predicate failed; got {get_repr(self.row)}"
346
+
347
+
348
+ @dataclass(kw_only=True, slots=True)
349
+ class _AppendRowExtraKeysError(AppendRowError):
350
+ extra: AbstractSet[str]
351
+
352
+ @override
353
+ def __str__(self) -> str:
354
+ return f"Extra key(s) found; got {get_repr(self.extra)}"
355
+
356
+
357
+ @dataclass(kw_only=True, slots=True)
358
+ class _AppendRowMissingKeysError(AppendRowError):
359
+ missing: AbstractSet[str]
360
+
361
+ @override
362
+ def __str__(self) -> str:
363
+ return f"Missing key(s) found; got {get_repr(self.missing)}"
316
364
 
317
365
 
318
366
  @dataclass(kw_only=True, slots=True)
319
- class AppendDataClassError[T](Exception):
320
- left: AbstractSet[T]
321
- right: AbstractSet[T]
322
- extra: AbstractSet[T]
367
+ class _AppendRowNullColumnsError(AppendRowError):
368
+ columns: AbstractSet[str]
323
369
 
324
370
  @override
325
371
  def __str__(self) -> str:
326
- return f"Dataclass fields {get_repr(self.left)} must be a subset of DataFrame columns {get_repr(self.right)}; dataclass had extra items {get_repr(self.extra)}"
372
+ return f"Null column(s) found; got {get_repr(self.columns)}"
327
373
 
328
374
 
329
375
  ##
@@ -2469,7 +2515,7 @@ def search_period(
2469
2515
  start_or_end: Literal["start", "end"] = "end",
2470
2516
  ) -> int | None:
2471
2517
  """Search a series of periods for the one containing a given date-time."""
2472
- start, end = [series.struct[k] for k in ["start", "end"]]
2518
+ end = series.struct["end"]
2473
2519
  py_date_time = date_time.py_datetime()
2474
2520
  match start_or_end:
2475
2521
  case "start":
@@ -2701,6 +2747,7 @@ def zoned_date_time_period_dtype(
2701
2747
 
2702
2748
 
2703
2749
  __all__ = [
2750
+ "AppendRowError",
2704
2751
  "BooleanValueCountsError",
2705
2752
  "CheckPolarsDataFrameError",
2706
2753
  "ColumnsToDictError",
@@ -2731,7 +2778,7 @@ __all__ = [
2731
2778
  "all_series",
2732
2779
  "any_dataframe_columns",
2733
2780
  "any_series",
2734
- "append_dataclass",
2781
+ "append_row",
2735
2782
  "are_frames_equal",
2736
2783
  "bernoulli",
2737
2784
  "boolean_value_counts",