dycw-utilities 0.129.10__py3-none-any.whl → 0.175.17__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (103) hide show
  1. dycw_utilities-0.175.17.dist-info/METADATA +34 -0
  2. dycw_utilities-0.175.17.dist-info/RECORD +103 -0
  3. dycw_utilities-0.175.17.dist-info/WHEEL +4 -0
  4. dycw_utilities-0.175.17.dist-info/entry_points.txt +4 -0
  5. utilities/__init__.py +1 -1
  6. utilities/altair.py +14 -14
  7. utilities/asyncio.py +350 -819
  8. utilities/atomicwrites.py +18 -6
  9. utilities/atools.py +77 -22
  10. utilities/cachetools.py +24 -29
  11. utilities/click.py +393 -237
  12. utilities/concurrent.py +8 -11
  13. utilities/contextlib.py +216 -17
  14. utilities/contextvars.py +20 -1
  15. utilities/cryptography.py +3 -3
  16. utilities/dataclasses.py +83 -118
  17. utilities/docker.py +293 -0
  18. utilities/enum.py +26 -23
  19. utilities/errors.py +17 -3
  20. utilities/fastapi.py +29 -65
  21. utilities/fpdf2.py +3 -3
  22. utilities/functions.py +169 -416
  23. utilities/functools.py +18 -19
  24. utilities/git.py +9 -30
  25. utilities/grp.py +28 -0
  26. utilities/gzip.py +31 -0
  27. utilities/http.py +3 -2
  28. utilities/hypothesis.py +738 -589
  29. utilities/importlib.py +17 -1
  30. utilities/inflect.py +25 -0
  31. utilities/iterables.py +194 -262
  32. utilities/jinja2.py +148 -0
  33. utilities/json.py +70 -0
  34. utilities/libcst.py +38 -17
  35. utilities/lightweight_charts.py +5 -9
  36. utilities/logging.py +345 -543
  37. utilities/math.py +18 -13
  38. utilities/memory_profiler.py +11 -15
  39. utilities/more_itertools.py +200 -131
  40. utilities/operator.py +33 -29
  41. utilities/optuna.py +6 -6
  42. utilities/orjson.py +272 -137
  43. utilities/os.py +61 -4
  44. utilities/parse.py +59 -61
  45. utilities/pathlib.py +281 -40
  46. utilities/permissions.py +298 -0
  47. utilities/pickle.py +2 -2
  48. utilities/platform.py +24 -5
  49. utilities/polars.py +1214 -430
  50. utilities/polars_ols.py +1 -1
  51. utilities/postgres.py +408 -0
  52. utilities/pottery.py +113 -26
  53. utilities/pqdm.py +10 -11
  54. utilities/psutil.py +6 -57
  55. utilities/pwd.py +28 -0
  56. utilities/pydantic.py +4 -54
  57. utilities/pydantic_settings.py +240 -0
  58. utilities/pydantic_settings_sops.py +76 -0
  59. utilities/pyinstrument.py +8 -10
  60. utilities/pytest.py +227 -121
  61. utilities/pytest_plugins/__init__.py +1 -0
  62. utilities/pytest_plugins/pytest_randomly.py +23 -0
  63. utilities/pytest_plugins/pytest_regressions.py +56 -0
  64. utilities/pytest_regressions.py +26 -46
  65. utilities/random.py +13 -9
  66. utilities/re.py +58 -28
  67. utilities/redis.py +401 -550
  68. utilities/scipy.py +1 -1
  69. utilities/sentinel.py +10 -0
  70. utilities/shelve.py +4 -1
  71. utilities/shutil.py +25 -0
  72. utilities/slack_sdk.py +36 -106
  73. utilities/sqlalchemy.py +502 -473
  74. utilities/sqlalchemy_polars.py +38 -94
  75. utilities/string.py +2 -3
  76. utilities/subprocess.py +1572 -0
  77. utilities/tempfile.py +86 -4
  78. utilities/testbook.py +50 -0
  79. utilities/text.py +165 -42
  80. utilities/timer.py +37 -65
  81. utilities/traceback.py +158 -929
  82. utilities/types.py +146 -116
  83. utilities/typing.py +531 -71
  84. utilities/tzdata.py +1 -53
  85. utilities/tzlocal.py +6 -23
  86. utilities/uuid.py +43 -5
  87. utilities/version.py +27 -26
  88. utilities/whenever.py +1776 -386
  89. utilities/zoneinfo.py +84 -22
  90. dycw_utilities-0.129.10.dist-info/METADATA +0 -241
  91. dycw_utilities-0.129.10.dist-info/RECORD +0 -96
  92. dycw_utilities-0.129.10.dist-info/WHEEL +0 -4
  93. dycw_utilities-0.129.10.dist-info/licenses/LICENSE +0 -21
  94. utilities/datetime.py +0 -1409
  95. utilities/eventkit.py +0 -402
  96. utilities/loguru.py +0 -144
  97. utilities/luigi.py +0 -228
  98. utilities/period.py +0 -324
  99. utilities/pyrsistent.py +0 -89
  100. utilities/python_dotenv.py +0 -105
  101. utilities/streamlit.py +0 -105
  102. utilities/sys.py +0 -87
  103. utilities/tenacity.py +0 -145
@@ -4,7 +4,7 @@ import datetime as dt
4
4
  import decimal
5
5
  from contextlib import suppress
6
6
  from dataclasses import dataclass
7
- from typing import TYPE_CHECKING, Any, Literal, assert_never, cast, overload, override
7
+ from typing import TYPE_CHECKING, Any, cast, overload, override
8
8
  from uuid import UUID
9
9
 
10
10
  import polars as pl
@@ -25,9 +25,8 @@ from polars import (
25
25
  )
26
26
  from sqlalchemy import Column, Select, select
27
27
  from sqlalchemy.exc import DuplicateColumnError
28
- from sqlalchemy.ext.asyncio import AsyncEngine, create_async_engine
29
28
 
30
- from utilities.asyncio import timeout_dur
29
+ from utilities.asyncio import timeout_td
31
30
  from utilities.functions import identity
32
31
  from utilities.iterables import (
33
32
  CheckDuplicatesError,
@@ -36,7 +35,7 @@ from utilities.iterables import (
36
35
  chunked,
37
36
  one,
38
37
  )
39
- from utilities.polars import zoned_datetime
38
+ from utilities.polars import zoned_date_time_dtype
40
39
  from utilities.reprlib import get_repr
41
40
  from utilities.sqlalchemy import (
42
41
  CHUNK_SIZE_FRAC,
@@ -45,7 +44,6 @@ from utilities.sqlalchemy import (
45
44
  get_chunk_size,
46
45
  get_columns,
47
46
  insert_items,
48
- upsert_items,
49
47
  )
50
48
  from utilities.text import snake_case
51
49
  from utilities.typing import is_subclass_gen
@@ -61,14 +59,12 @@ if TYPE_CHECKING:
61
59
  )
62
60
 
63
61
  from polars._typing import PolarsDataType, SchemaDict
62
+ from sqlalchemy.ext.asyncio import AsyncEngine
64
63
  from sqlalchemy.sql import ColumnCollection
65
64
  from sqlalchemy.sql.base import ReadOnlyColumnCollection
66
- from tenacity.retry import RetryBaseT
67
- from tenacity.stop import StopBaseT
68
- from tenacity.wait import WaitBaseT
65
+ from whenever import TimeDelta
69
66
 
70
- import utilities.types
71
- from utilities.types import TimeZoneLike
67
+ from utilities.types import Delta, MaybeType, TimeZoneLike
72
68
 
73
69
 
74
70
  async def insert_dataframe(
@@ -78,55 +74,37 @@ async def insert_dataframe(
78
74
  /,
79
75
  *,
80
76
  snake: bool = False,
77
+ is_upsert: bool = False,
81
78
  chunk_size_frac: float = CHUNK_SIZE_FRAC,
82
79
  assume_tables_exist: bool = False,
83
- upsert: Literal["selected", "all"] | None = None,
84
- timeout_create: utilities.types.Duration | None = None,
80
+ timeout_create: TimeDelta | None = None,
85
81
  error_create: type[Exception] = TimeoutError,
86
- timeout_insert: utilities.types.Duration | None = None,
82
+ timeout_insert: TimeDelta | None = None,
87
83
  error_insert: type[Exception] = TimeoutError,
88
84
  ) -> None:
89
85
  """Insert/upsert a DataFrame into a database."""
90
86
  mapping = _insert_dataframe_map_df_schema_to_table(
91
87
  df.schema, table_or_orm, snake=snake
92
88
  )
93
- items = df.select(mapping).rename(mapping).to_dicts()
89
+ items = df.select(*mapping).rename(mapping).rows(named=True)
94
90
  if len(items) == 0:
95
- if not df.is_empty():
96
- raise InsertDataFrameError(df=df)
97
91
  if not assume_tables_exist:
98
92
  await ensure_tables_created(
99
93
  engine, table_or_orm, timeout=timeout_create, error=error_create
100
94
  )
101
95
  return
102
- match upsert:
103
- case None:
104
- await insert_items(
105
- engine,
106
- (items, table_or_orm),
107
- snake=snake,
108
- chunk_size_frac=chunk_size_frac,
109
- assume_tables_exist=assume_tables_exist,
110
- timeout_create=timeout_create,
111
- error_create=error_create,
112
- timeout_insert=timeout_insert,
113
- error_insert=error_insert,
114
- )
115
- case "selected" | "all" as selected_or_all: # skipif-ci-and-not-linux
116
- await upsert_items(
117
- engine,
118
- (items, table_or_orm),
119
- snake=snake,
120
- chunk_size_frac=chunk_size_frac,
121
- selected_or_all=selected_or_all,
122
- assume_tables_exist=assume_tables_exist,
123
- timeout_create=timeout_create,
124
- error_create=error_create,
125
- timeout_insert=timeout_insert,
126
- error_insert=error_insert,
127
- )
128
- case _ as never:
129
- assert_never(never)
96
+ await insert_items(
97
+ engine,
98
+ (items, table_or_orm),
99
+ snake=snake,
100
+ is_upsert=is_upsert,
101
+ chunk_size_frac=chunk_size_frac,
102
+ assume_tables_exist=assume_tables_exist,
103
+ timeout_create=timeout_create,
104
+ error_create=error_create,
105
+ timeout_insert=timeout_insert,
106
+ error_insert=error_insert,
107
+ )
130
108
 
131
109
 
132
110
  def _insert_dataframe_map_df_schema_to_table(
@@ -210,15 +188,6 @@ def _insert_dataframe_check_df_and_db_types(
210
188
  )
211
189
 
212
190
 
213
- @dataclass(kw_only=True, slots=True)
214
- class InsertDataFrameError(Exception):
215
- df: DataFrame
216
-
217
- @override
218
- def __str__(self) -> str:
219
- return f"Non-empty DataFrame must resolve to at least 1 item\n\n{self.df}"
220
-
221
-
222
191
  @overload
223
192
  async def select_to_dataframe(
224
193
  sel: Select[Any],
@@ -231,10 +200,8 @@ async def select_to_dataframe(
231
200
  in_clauses: tuple[Column[Any], Iterable[Any]] | None = None,
232
201
  in_clauses_chunk_size: int | None = None,
233
202
  chunk_size_frac: float = CHUNK_SIZE_FRAC,
234
- stop: StopBaseT | None = None,
235
- wait: WaitBaseT | None = None,
236
- retry: RetryBaseT | None = None,
237
- timeout: utilities.types.Duration | None = None,
203
+ timeout: Delta | None = None,
204
+ error: MaybeType[BaseException] = TimeoutError,
238
205
  **kwargs: Any,
239
206
  ) -> DataFrame: ...
240
207
  @overload
@@ -249,10 +216,8 @@ async def select_to_dataframe(
249
216
  in_clauses: None = None,
250
217
  in_clauses_chunk_size: int | None = None,
251
218
  chunk_size_frac: float = CHUNK_SIZE_FRAC,
252
- stop: StopBaseT | None = None,
253
- wait: WaitBaseT | None = None,
254
- retry: RetryBaseT | None = None,
255
- timeout: utilities.types.Duration | None = None,
219
+ timeout: Delta | None = None,
220
+ error: MaybeType[BaseException] = TimeoutError,
256
221
  **kwargs: Any,
257
222
  ) -> Iterable[DataFrame]: ...
258
223
  @overload
@@ -267,10 +232,8 @@ async def select_to_dataframe(
267
232
  in_clauses: tuple[Column[Any], Iterable[Any]],
268
233
  in_clauses_chunk_size: int | None = None,
269
234
  chunk_size_frac: float = CHUNK_SIZE_FRAC,
270
- stop: StopBaseT | None = None,
271
- wait: WaitBaseT | None = None,
272
- retry: RetryBaseT | None = None,
273
- timeout: utilities.types.Duration | None = None,
235
+ timeout: Delta | None = None,
236
+ error: MaybeType[BaseException] = TimeoutError,
274
237
  **kwargs: Any,
275
238
  ) -> AsyncIterable[DataFrame]: ...
276
239
  @overload
@@ -285,10 +248,8 @@ async def select_to_dataframe(
285
248
  in_clauses: tuple[Column[Any], Iterable[Any]] | None = None,
286
249
  in_clauses_chunk_size: int | None = None,
287
250
  chunk_size_frac: float = CHUNK_SIZE_FRAC,
288
- stop: StopBaseT | None = None,
289
- wait: WaitBaseT | None = None,
290
- retry: RetryBaseT | None = None,
291
- timeout: utilities.types.Duration | None = None,
251
+ timeout: Delta | None = None,
252
+ error: MaybeType[BaseException] = TimeoutError,
292
253
  **kwargs: Any,
293
254
  ) -> DataFrame | Iterable[DataFrame] | AsyncIterable[DataFrame]: ...
294
255
  async def select_to_dataframe(
@@ -302,32 +263,16 @@ async def select_to_dataframe(
302
263
  in_clauses: tuple[Column[Any], Iterable[Any]] | None = None,
303
264
  in_clauses_chunk_size: int | None = None,
304
265
  chunk_size_frac: float = CHUNK_SIZE_FRAC,
305
- timeout: utilities.types.Duration | None = None,
306
- error: type[Exception] = TimeoutError,
266
+ timeout: Delta | None = None,
267
+ error: MaybeType[BaseException] = TimeoutError,
307
268
  **kwargs: Any,
308
269
  ) -> DataFrame | Iterable[DataFrame] | AsyncIterable[DataFrame]:
309
270
  """Read a table from a database into a DataFrame."""
310
- if not issubclass(AsyncEngine, type(engine)):
311
- # for handling testing
312
- engine = create_async_engine(engine.url)
313
- return await select_to_dataframe(
314
- sel,
315
- engine,
316
- snake=snake,
317
- time_zone=time_zone,
318
- batch_size=batch_size,
319
- in_clauses=in_clauses,
320
- in_clauses_chunk_size=in_clauses_chunk_size,
321
- chunk_size_frac=chunk_size_frac,
322
- timeout=timeout,
323
- error=error,
324
- **kwargs,
325
- )
326
271
  if snake:
327
272
  sel = _select_to_dataframe_apply_snake(sel)
328
273
  schema = _select_to_dataframe_map_select_to_df_schema(sel, time_zone=time_zone)
329
274
  if in_clauses is None:
330
- async with timeout_dur(duration=timeout, error=error):
275
+ async with timeout_td(timeout, error=error):
331
276
  return read_database(
332
277
  sel,
333
278
  cast("Any", engine),
@@ -344,7 +289,7 @@ async def select_to_dataframe(
344
289
  chunk_size_frac=chunk_size_frac,
345
290
  )
346
291
  if batch_size is None:
347
- async with timeout_dur(duration=timeout, error=error):
292
+ async with timeout_td(timeout, error=error):
348
293
  dfs = [
349
294
  await select_to_dataframe(
350
295
  sel,
@@ -365,7 +310,7 @@ async def select_to_dataframe(
365
310
  return DataFrame(schema=schema)
366
311
 
367
312
  async def yield_dfs() -> AsyncIterator[DataFrame]:
368
- async with timeout_dur(duration=timeout, error=error):
313
+ async with timeout_td(timeout, error=error):
369
314
  for sel_i in sels:
370
315
  for df in await select_to_dataframe(
371
316
  sel_i,
@@ -421,7 +366,7 @@ def _select_to_dataframe_map_table_column_type_to_dtype(
421
366
  return pl.Date
422
367
  if is_subclass_gen(py_type, dt.datetime):
423
368
  has_tz: bool = type_use.timezone
424
- return zoned_datetime(time_zone=time_zone) if has_tz else Datetime()
369
+ return zoned_date_time_dtype(time_zone=time_zone) if has_tz else Datetime()
425
370
  if issubclass(py_type, dt.time):
426
371
  return Time
427
372
  if issubclass(py_type, dt.timedelta):
@@ -460,15 +405,14 @@ def _select_to_dataframe_yield_selects_with_in_clauses(
460
405
  in_clauses_chunk_size: int | None = None,
461
406
  chunk_size_frac: float = CHUNK_SIZE_FRAC,
462
407
  ) -> Iterator[Select[Any]]:
463
- max_length = len(sel.selected_columns)
464
408
  in_col, in_values = in_clauses
465
409
  if in_clauses_chunk_size is None:
466
410
  chunk_size = get_chunk_size(
467
- engine, chunk_size_frac=chunk_size_frac, scaling=max_length
411
+ engine, sel.selected_columns, chunk_size_frac=chunk_size_frac
468
412
  )
469
413
  else:
470
414
  chunk_size = in_clauses_chunk_size
471
415
  return (sel.where(in_col.in_(values)) for values in chunked(in_values, chunk_size))
472
416
 
473
417
 
474
- __all__ = ["InsertDataFrameError", "insert_dataframe", "select_to_dataframe"]
418
+ __all__ = ["insert_dataframe", "select_to_dataframe"]
utilities/string.py CHANGED
@@ -10,11 +10,10 @@ def substitute_environ(path_or_text: Path | str, /, **kwargs: Any) -> str:
10
10
  """Substitute the environment variables in a file."""
11
11
  match path_or_text:
12
12
  case Path() as path:
13
- with path.open() as fh:
14
- return substitute_environ(fh.read(), **kwargs)
13
+ return substitute_environ(path.read_text(), **kwargs)
15
14
  case str() as text:
16
15
  return Template(text).substitute(environ, **kwargs)
17
- case _ as never:
16
+ case never:
18
17
  assert_never(never)
19
18
 
20
19