dycw-utilities 0.165.3__py3-none-any.whl → 0.166.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: dycw-utilities
3
- Version: 0.165.3
3
+ Version: 0.166.1
4
4
  Author-email: Derek Wan <d.wan@icloud.com>
5
5
  License-File: LICENSE
6
6
  Requires-Python: >=3.12
@@ -1,4 +1,4 @@
1
- utilities/__init__.py,sha256=dGyOX1Z3m0mhIPCR1cqAXlybAWUxvEasnTNSAfT9Fh8,60
1
+ utilities/__init__.py,sha256=pk8s8uyCf9TfamkXymCVi1C6C_xH9yc5UV3b1hbP1Nk,60
2
2
  utilities/aeventkit.py,sha256=ddoleSwW9zdc2tjX5Ge0pMKtYwV_JMxhHYOxnWX2AGM,12609
3
3
  utilities/altair.py,sha256=92E2lCdyHY4Zb-vCw6rEJIsWdKipuu-Tu2ab1ufUfAk,9079
4
4
  utilities/asyncio.py,sha256=PUedzQ5deqlSECQ33sam9cRzI9TnygHz3FdOqWJWPTM,15288
@@ -45,7 +45,7 @@ utilities/parse.py,sha256=JcJn5yXKhIWXBCwgBdPsyu7Hvcuw6kyEdqvaebCaI9k,17951
45
45
  utilities/pathlib.py,sha256=qGuU8XPmdgGpy8tOMUgelfXx3kxI8h9IaV3TI_06QGE,8428
46
46
  utilities/pickle.py,sha256=MBT2xZCsv0pH868IXLGKnlcqNx2IRVKYNpRcqiQQqxw,653
47
47
  utilities/platform.py,sha256=pTn7gw6N4T6LdKrf0virwarof_mze9WtoQlrGMzhGVI,2798
48
- utilities/polars.py,sha256=6_k8cU7LePQ942Qfqe5l_tR8ND0M0bLlpnaMvDbRCM8,81754
48
+ utilities/polars.py,sha256=FehlIHgHP-kvicBPSE6o-tswqUpRysA6epmkPrgHkGE,83239
49
49
  utilities/polars_ols.py,sha256=LNTFNLPuYW7fcAHymlbnams_DhitToblYvib3mhKbwI,5615
50
50
  utilities/postgres.py,sha256=ynCTTaF-bVEOSW-KEAR-dlLh_hYjeVVjm__-4pEU8Zk,12269
51
51
  utilities/pottery.py,sha256=ggMN72Y7wx7Js8VN6eyNyodpm8TIYqZHGghkDPXIVWk,3949
@@ -88,8 +88,8 @@ utilities/zoneinfo.py,sha256=tdIScrTB2-B-LH0ukb1HUXKooLknOfJNwHk10MuMYvA,3619
88
88
  utilities/pytest_plugins/__init__.py,sha256=U4S_2y3zgLZVfMenHRaJFBW8yqh2mUBuI291LGQVOJ8,35
89
89
  utilities/pytest_plugins/pytest_randomly.py,sha256=B1qYVlExGOxTywq2r1SMi5o7btHLk2PNdY_b1p98dkE,409
90
90
  utilities/pytest_plugins/pytest_regressions.py,sha256=9v8kAXDM2ycIXJBimoiF4EgrwbUvxTycFWJiGR_GHhM,1466
91
- dycw_utilities-0.165.3.dist-info/METADATA,sha256=pkTtRCmg_YK5SUclEGBHKWThH6nFfAdR3vn_EJZjGO8,1696
92
- dycw_utilities-0.165.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
93
- dycw_utilities-0.165.3.dist-info/entry_points.txt,sha256=BOD_SoDxwsfJYOLxhrSXhHP_T7iw-HXI9f2WVkzYxvQ,135
94
- dycw_utilities-0.165.3.dist-info/licenses/LICENSE,sha256=gppZp16M6nSVpBbUBrNL6JuYfvKwZiKgV7XoKKsHzqo,1066
95
- dycw_utilities-0.165.3.dist-info/RECORD,,
91
+ dycw_utilities-0.166.1.dist-info/METADATA,sha256=tFhmfKrHbWlljJMUoGWMcLeXSJiVMMXbAAqlqwE6rTQ,1696
92
+ dycw_utilities-0.166.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
93
+ dycw_utilities-0.166.1.dist-info/entry_points.txt,sha256=BOD_SoDxwsfJYOLxhrSXhHP_T7iw-HXI9f2WVkzYxvQ,135
94
+ dycw_utilities-0.166.1.dist-info/licenses/LICENSE,sha256=gppZp16M6nSVpBbUBrNL6JuYfvKwZiKgV7XoKKsHzqo,1066
95
+ dycw_utilities-0.166.1.dist-info/RECORD,,
utilities/__init__.py CHANGED
@@ -1,3 +1,3 @@
1
1
  from __future__ import annotations
2
2
 
3
- __version__ = "0.165.3"
3
+ __version__ = "0.166.1"
utilities/polars.py CHANGED
@@ -61,14 +61,12 @@ from utilities.gzip import read_binary
61
61
  from utilities.iterables import (
62
62
  CheckIterablesEqualError,
63
63
  CheckMappingsEqualError,
64
- CheckSubSetError,
65
64
  CheckSuperMappingError,
66
65
  OneEmptyError,
67
66
  OneNonUniqueError,
68
67
  always_iterable,
69
68
  check_iterables_equal,
70
69
  check_mappings_equal,
71
- check_subset,
72
70
  check_supermapping,
73
71
  is_iterable_not_str,
74
72
  one,
@@ -301,29 +299,77 @@ def any_series(series: Series, /, *columns: ExprOrSeries) -> Series:
301
299
  ##
302
300
 
303
301
 
304
- def append_dataclass(df: DataFrame, obj: Dataclass, /) -> DataFrame:
305
- """Append a dataclass object to a DataFrame."""
306
- non_null_fields = {k: v for k, v in asdict(obj).items() if v is not None}
307
- try:
308
- check_subset(non_null_fields, df.columns)
309
- except CheckSubSetError as error:
310
- raise AppendDataClassError(
311
- left=error.left, right=error.right, extra=error.extra
312
- ) from None
313
- row_cols = set(df.columns) & set(non_null_fields)
314
- row = dataclass_to_dataframe(obj).select(*row_cols)
315
- return concat([df, row], how="diagonal")
302
+ def append_row(
303
+ df: DataFrame,
304
+ row: StrMapping,
305
+ /,
306
+ *,
307
+ predicate: Callable[[StrMapping], bool] | None = None,
308
+ disallow_extra: bool = False,
309
+ disallow_missing: bool | MaybeIterable[str] = False,
310
+ disallow_null: bool | MaybeIterable[str] = False,
311
+ in_place: bool = False,
312
+ ) -> DataFrame:
313
+ """Append a row to a DataFrame."""
314
+ if (predicate is not None) and not predicate(row):
315
+ raise _AppendRowPredicateError(df=df, row=row)
316
+ if disallow_extra and (len(extra := set(row) - set(df.columns)) >= 1):
317
+ raise _AppendRowExtraKeysError(df=df, row=row, extra=extra)
318
+ if disallow_missing is not False:
319
+ missing = set(df.columns) - set(row)
320
+ if disallow_missing is not True:
321
+ missing &= set(always_iterable(disallow_missing))
322
+ if len(missing) >= 1:
323
+ raise _AppendRowMissingKeysError(df=df, row=row, missing=missing)
324
+ other = DataFrame(data=[row], schema=df.schema)
325
+ if disallow_null:
326
+ other_null = other.select(col(c).is_null().any() for c in other.columns)
327
+ null = {k for k, v in other_null.row(0, named=True).items() if v}
328
+ if disallow_null is not True:
329
+ null &= set(always_iterable(disallow_null))
330
+ if len(null) >= 1:
331
+ raise _AppendRowNullColumnsError(df=df, row=row, columns=null)
332
+ return df.extend(other) if in_place else df.vstack(other)
333
+
334
+
335
+ @dataclass(kw_only=True, slots=True)
336
+ class AppendRowError(Exception):
337
+ df: DataFrame
338
+ row: StrMapping
339
+
340
+
341
+ @dataclass(kw_only=True, slots=True)
342
+ class _AppendRowPredicateError(AppendRowError):
343
+ @override
344
+ def __str__(self) -> str:
345
+ return f"Predicate failed; got {get_repr(self.row)}"
346
+
347
+
348
+ @dataclass(kw_only=True, slots=True)
349
+ class _AppendRowExtraKeysError(AppendRowError):
350
+ extra: AbstractSet[str]
351
+
352
+ @override
353
+ def __str__(self) -> str:
354
+ return f"Extra key(s) found; got {get_repr(self.extra)}"
355
+
356
+
357
+ @dataclass(kw_only=True, slots=True)
358
+ class _AppendRowMissingKeysError(AppendRowError):
359
+ missing: AbstractSet[str]
360
+
361
+ @override
362
+ def __str__(self) -> str:
363
+ return f"Missing key(s) found; got {get_repr(self.missing)}"
316
364
 
317
365
 
318
366
  @dataclass(kw_only=True, slots=True)
319
- class AppendDataClassError[T](Exception):
320
- left: AbstractSet[T]
321
- right: AbstractSet[T]
322
- extra: AbstractSet[T]
367
+ class _AppendRowNullColumnsError(AppendRowError):
368
+ columns: AbstractSet[str]
323
369
 
324
370
  @override
325
371
  def __str__(self) -> str:
326
- return f"Dataclass fields {get_repr(self.left)} must be a subset of DataFrame columns {get_repr(self.right)}; dataclass had extra items {get_repr(self.extra)}"
372
+ return f"Null column(s) found; got {get_repr(self.columns)}"
327
373
 
328
374
 
329
375
  ##
@@ -784,13 +830,15 @@ def choice(
784
830
  ##
785
831
 
786
832
 
787
- def columns_to_dict(df: DataFrame, key: str, value: str, /) -> dict[Any, Any]:
833
+ def columns_to_dict(
834
+ df: DataFrame, key: IntoExprColumn, value: IntoExprColumn, /
835
+ ) -> dict[Any, Any]:
788
836
  """Map a pair of columns into a dictionary. Must be unique on `key`."""
789
- col_key = df[key]
790
- if col_key.is_duplicated().any():
791
- raise ColumnsToDictError(df=df, key=key)
792
- col_value = df[value]
793
- return dict(zip(col_key, col_value, strict=True))
837
+ df = df.select(key, value)
838
+ key_col, value_col = [df[get_expr_name(df, expr)] for expr in [key, value]]
839
+ if key_col.is_duplicated().any():
840
+ raise ColumnsToDictError(df=df, key=key_col.name)
841
+ return dict(zip(key_col, value_col, strict=True))
794
842
 
795
843
 
796
844
  @dataclass(kw_only=True, slots=True)
@@ -2469,7 +2517,7 @@ def search_period(
2469
2517
  start_or_end: Literal["start", "end"] = "end",
2470
2518
  ) -> int | None:
2471
2519
  """Search a series of periods for the one containing a given date-time."""
2472
- start, end = [series.struct[k] for k in ["start", "end"]]
2520
+ end = series.struct["end"]
2473
2521
  py_date_time = date_time.py_datetime()
2474
2522
  match start_or_end:
2475
2523
  case "start":
@@ -2701,6 +2749,7 @@ def zoned_date_time_period_dtype(
2701
2749
 
2702
2750
 
2703
2751
  __all__ = [
2752
+ "AppendRowError",
2704
2753
  "BooleanValueCountsError",
2705
2754
  "CheckPolarsDataFrameError",
2706
2755
  "ColumnsToDictError",
@@ -2731,7 +2780,7 @@ __all__ = [
2731
2780
  "all_series",
2732
2781
  "any_dataframe_columns",
2733
2782
  "any_series",
2734
- "append_dataclass",
2783
+ "append_row",
2735
2784
  "are_frames_equal",
2736
2785
  "bernoulli",
2737
2786
  "boolean_value_counts",