dycw-utilities 0.129.10__py3-none-any.whl → 0.175.17__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- dycw_utilities-0.175.17.dist-info/METADATA +34 -0
- dycw_utilities-0.175.17.dist-info/RECORD +103 -0
- dycw_utilities-0.175.17.dist-info/WHEEL +4 -0
- dycw_utilities-0.175.17.dist-info/entry_points.txt +4 -0
- utilities/__init__.py +1 -1
- utilities/altair.py +14 -14
- utilities/asyncio.py +350 -819
- utilities/atomicwrites.py +18 -6
- utilities/atools.py +77 -22
- utilities/cachetools.py +24 -29
- utilities/click.py +393 -237
- utilities/concurrent.py +8 -11
- utilities/contextlib.py +216 -17
- utilities/contextvars.py +20 -1
- utilities/cryptography.py +3 -3
- utilities/dataclasses.py +83 -118
- utilities/docker.py +293 -0
- utilities/enum.py +26 -23
- utilities/errors.py +17 -3
- utilities/fastapi.py +29 -65
- utilities/fpdf2.py +3 -3
- utilities/functions.py +169 -416
- utilities/functools.py +18 -19
- utilities/git.py +9 -30
- utilities/grp.py +28 -0
- utilities/gzip.py +31 -0
- utilities/http.py +3 -2
- utilities/hypothesis.py +738 -589
- utilities/importlib.py +17 -1
- utilities/inflect.py +25 -0
- utilities/iterables.py +194 -262
- utilities/jinja2.py +148 -0
- utilities/json.py +70 -0
- utilities/libcst.py +38 -17
- utilities/lightweight_charts.py +5 -9
- utilities/logging.py +345 -543
- utilities/math.py +18 -13
- utilities/memory_profiler.py +11 -15
- utilities/more_itertools.py +200 -131
- utilities/operator.py +33 -29
- utilities/optuna.py +6 -6
- utilities/orjson.py +272 -137
- utilities/os.py +61 -4
- utilities/parse.py +59 -61
- utilities/pathlib.py +281 -40
- utilities/permissions.py +298 -0
- utilities/pickle.py +2 -2
- utilities/platform.py +24 -5
- utilities/polars.py +1214 -430
- utilities/polars_ols.py +1 -1
- utilities/postgres.py +408 -0
- utilities/pottery.py +113 -26
- utilities/pqdm.py +10 -11
- utilities/psutil.py +6 -57
- utilities/pwd.py +28 -0
- utilities/pydantic.py +4 -54
- utilities/pydantic_settings.py +240 -0
- utilities/pydantic_settings_sops.py +76 -0
- utilities/pyinstrument.py +8 -10
- utilities/pytest.py +227 -121
- utilities/pytest_plugins/__init__.py +1 -0
- utilities/pytest_plugins/pytest_randomly.py +23 -0
- utilities/pytest_plugins/pytest_regressions.py +56 -0
- utilities/pytest_regressions.py +26 -46
- utilities/random.py +13 -9
- utilities/re.py +58 -28
- utilities/redis.py +401 -550
- utilities/scipy.py +1 -1
- utilities/sentinel.py +10 -0
- utilities/shelve.py +4 -1
- utilities/shutil.py +25 -0
- utilities/slack_sdk.py +36 -106
- utilities/sqlalchemy.py +502 -473
- utilities/sqlalchemy_polars.py +38 -94
- utilities/string.py +2 -3
- utilities/subprocess.py +1572 -0
- utilities/tempfile.py +86 -4
- utilities/testbook.py +50 -0
- utilities/text.py +165 -42
- utilities/timer.py +37 -65
- utilities/traceback.py +158 -929
- utilities/types.py +146 -116
- utilities/typing.py +531 -71
- utilities/tzdata.py +1 -53
- utilities/tzlocal.py +6 -23
- utilities/uuid.py +43 -5
- utilities/version.py +27 -26
- utilities/whenever.py +1776 -386
- utilities/zoneinfo.py +84 -22
- dycw_utilities-0.129.10.dist-info/METADATA +0 -241
- dycw_utilities-0.129.10.dist-info/RECORD +0 -96
- dycw_utilities-0.129.10.dist-info/WHEEL +0 -4
- dycw_utilities-0.129.10.dist-info/licenses/LICENSE +0 -21
- utilities/datetime.py +0 -1409
- utilities/eventkit.py +0 -402
- utilities/loguru.py +0 -144
- utilities/luigi.py +0 -228
- utilities/period.py +0 -324
- utilities/pyrsistent.py +0 -89
- utilities/python_dotenv.py +0 -105
- utilities/streamlit.py +0 -105
- utilities/sys.py +0 -87
- utilities/tenacity.py +0 -145
utilities/sqlalchemy_polars.py
CHANGED
|
@@ -4,7 +4,7 @@ import datetime as dt
|
|
|
4
4
|
import decimal
|
|
5
5
|
from contextlib import suppress
|
|
6
6
|
from dataclasses import dataclass
|
|
7
|
-
from typing import TYPE_CHECKING, Any,
|
|
7
|
+
from typing import TYPE_CHECKING, Any, cast, overload, override
|
|
8
8
|
from uuid import UUID
|
|
9
9
|
|
|
10
10
|
import polars as pl
|
|
@@ -25,9 +25,8 @@ from polars import (
|
|
|
25
25
|
)
|
|
26
26
|
from sqlalchemy import Column, Select, select
|
|
27
27
|
from sqlalchemy.exc import DuplicateColumnError
|
|
28
|
-
from sqlalchemy.ext.asyncio import AsyncEngine, create_async_engine
|
|
29
28
|
|
|
30
|
-
from utilities.asyncio import
|
|
29
|
+
from utilities.asyncio import timeout_td
|
|
31
30
|
from utilities.functions import identity
|
|
32
31
|
from utilities.iterables import (
|
|
33
32
|
CheckDuplicatesError,
|
|
@@ -36,7 +35,7 @@ from utilities.iterables import (
|
|
|
36
35
|
chunked,
|
|
37
36
|
one,
|
|
38
37
|
)
|
|
39
|
-
from utilities.polars import
|
|
38
|
+
from utilities.polars import zoned_date_time_dtype
|
|
40
39
|
from utilities.reprlib import get_repr
|
|
41
40
|
from utilities.sqlalchemy import (
|
|
42
41
|
CHUNK_SIZE_FRAC,
|
|
@@ -45,7 +44,6 @@ from utilities.sqlalchemy import (
|
|
|
45
44
|
get_chunk_size,
|
|
46
45
|
get_columns,
|
|
47
46
|
insert_items,
|
|
48
|
-
upsert_items,
|
|
49
47
|
)
|
|
50
48
|
from utilities.text import snake_case
|
|
51
49
|
from utilities.typing import is_subclass_gen
|
|
@@ -61,14 +59,12 @@ if TYPE_CHECKING:
|
|
|
61
59
|
)
|
|
62
60
|
|
|
63
61
|
from polars._typing import PolarsDataType, SchemaDict
|
|
62
|
+
from sqlalchemy.ext.asyncio import AsyncEngine
|
|
64
63
|
from sqlalchemy.sql import ColumnCollection
|
|
65
64
|
from sqlalchemy.sql.base import ReadOnlyColumnCollection
|
|
66
|
-
from
|
|
67
|
-
from tenacity.stop import StopBaseT
|
|
68
|
-
from tenacity.wait import WaitBaseT
|
|
65
|
+
from whenever import TimeDelta
|
|
69
66
|
|
|
70
|
-
|
|
71
|
-
from utilities.types import TimeZoneLike
|
|
67
|
+
from utilities.types import Delta, MaybeType, TimeZoneLike
|
|
72
68
|
|
|
73
69
|
|
|
74
70
|
async def insert_dataframe(
|
|
@@ -78,55 +74,37 @@ async def insert_dataframe(
|
|
|
78
74
|
/,
|
|
79
75
|
*,
|
|
80
76
|
snake: bool = False,
|
|
77
|
+
is_upsert: bool = False,
|
|
81
78
|
chunk_size_frac: float = CHUNK_SIZE_FRAC,
|
|
82
79
|
assume_tables_exist: bool = False,
|
|
83
|
-
|
|
84
|
-
timeout_create: utilities.types.Duration | None = None,
|
|
80
|
+
timeout_create: TimeDelta | None = None,
|
|
85
81
|
error_create: type[Exception] = TimeoutError,
|
|
86
|
-
timeout_insert:
|
|
82
|
+
timeout_insert: TimeDelta | None = None,
|
|
87
83
|
error_insert: type[Exception] = TimeoutError,
|
|
88
84
|
) -> None:
|
|
89
85
|
"""Insert/upsert a DataFrame into a database."""
|
|
90
86
|
mapping = _insert_dataframe_map_df_schema_to_table(
|
|
91
87
|
df.schema, table_or_orm, snake=snake
|
|
92
88
|
)
|
|
93
|
-
items = df.select(mapping).rename(mapping).
|
|
89
|
+
items = df.select(*mapping).rename(mapping).rows(named=True)
|
|
94
90
|
if len(items) == 0:
|
|
95
|
-
if not df.is_empty():
|
|
96
|
-
raise InsertDataFrameError(df=df)
|
|
97
91
|
if not assume_tables_exist:
|
|
98
92
|
await ensure_tables_created(
|
|
99
93
|
engine, table_or_orm, timeout=timeout_create, error=error_create
|
|
100
94
|
)
|
|
101
95
|
return
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
)
|
|
115
|
-
case "selected" | "all" as selected_or_all: # skipif-ci-and-not-linux
|
|
116
|
-
await upsert_items(
|
|
117
|
-
engine,
|
|
118
|
-
(items, table_or_orm),
|
|
119
|
-
snake=snake,
|
|
120
|
-
chunk_size_frac=chunk_size_frac,
|
|
121
|
-
selected_or_all=selected_or_all,
|
|
122
|
-
assume_tables_exist=assume_tables_exist,
|
|
123
|
-
timeout_create=timeout_create,
|
|
124
|
-
error_create=error_create,
|
|
125
|
-
timeout_insert=timeout_insert,
|
|
126
|
-
error_insert=error_insert,
|
|
127
|
-
)
|
|
128
|
-
case _ as never:
|
|
129
|
-
assert_never(never)
|
|
96
|
+
await insert_items(
|
|
97
|
+
engine,
|
|
98
|
+
(items, table_or_orm),
|
|
99
|
+
snake=snake,
|
|
100
|
+
is_upsert=is_upsert,
|
|
101
|
+
chunk_size_frac=chunk_size_frac,
|
|
102
|
+
assume_tables_exist=assume_tables_exist,
|
|
103
|
+
timeout_create=timeout_create,
|
|
104
|
+
error_create=error_create,
|
|
105
|
+
timeout_insert=timeout_insert,
|
|
106
|
+
error_insert=error_insert,
|
|
107
|
+
)
|
|
130
108
|
|
|
131
109
|
|
|
132
110
|
def _insert_dataframe_map_df_schema_to_table(
|
|
@@ -210,15 +188,6 @@ def _insert_dataframe_check_df_and_db_types(
|
|
|
210
188
|
)
|
|
211
189
|
|
|
212
190
|
|
|
213
|
-
@dataclass(kw_only=True, slots=True)
|
|
214
|
-
class InsertDataFrameError(Exception):
|
|
215
|
-
df: DataFrame
|
|
216
|
-
|
|
217
|
-
@override
|
|
218
|
-
def __str__(self) -> str:
|
|
219
|
-
return f"Non-empty DataFrame must resolve to at least 1 item\n\n{self.df}"
|
|
220
|
-
|
|
221
|
-
|
|
222
191
|
@overload
|
|
223
192
|
async def select_to_dataframe(
|
|
224
193
|
sel: Select[Any],
|
|
@@ -231,10 +200,8 @@ async def select_to_dataframe(
|
|
|
231
200
|
in_clauses: tuple[Column[Any], Iterable[Any]] | None = None,
|
|
232
201
|
in_clauses_chunk_size: int | None = None,
|
|
233
202
|
chunk_size_frac: float = CHUNK_SIZE_FRAC,
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
retry: RetryBaseT | None = None,
|
|
237
|
-
timeout: utilities.types.Duration | None = None,
|
|
203
|
+
timeout: Delta | None = None,
|
|
204
|
+
error: MaybeType[BaseException] = TimeoutError,
|
|
238
205
|
**kwargs: Any,
|
|
239
206
|
) -> DataFrame: ...
|
|
240
207
|
@overload
|
|
@@ -249,10 +216,8 @@ async def select_to_dataframe(
|
|
|
249
216
|
in_clauses: None = None,
|
|
250
217
|
in_clauses_chunk_size: int | None = None,
|
|
251
218
|
chunk_size_frac: float = CHUNK_SIZE_FRAC,
|
|
252
|
-
|
|
253
|
-
|
|
254
|
-
retry: RetryBaseT | None = None,
|
|
255
|
-
timeout: utilities.types.Duration | None = None,
|
|
219
|
+
timeout: Delta | None = None,
|
|
220
|
+
error: MaybeType[BaseException] = TimeoutError,
|
|
256
221
|
**kwargs: Any,
|
|
257
222
|
) -> Iterable[DataFrame]: ...
|
|
258
223
|
@overload
|
|
@@ -267,10 +232,8 @@ async def select_to_dataframe(
|
|
|
267
232
|
in_clauses: tuple[Column[Any], Iterable[Any]],
|
|
268
233
|
in_clauses_chunk_size: int | None = None,
|
|
269
234
|
chunk_size_frac: float = CHUNK_SIZE_FRAC,
|
|
270
|
-
|
|
271
|
-
|
|
272
|
-
retry: RetryBaseT | None = None,
|
|
273
|
-
timeout: utilities.types.Duration | None = None,
|
|
235
|
+
timeout: Delta | None = None,
|
|
236
|
+
error: MaybeType[BaseException] = TimeoutError,
|
|
274
237
|
**kwargs: Any,
|
|
275
238
|
) -> AsyncIterable[DataFrame]: ...
|
|
276
239
|
@overload
|
|
@@ -285,10 +248,8 @@ async def select_to_dataframe(
|
|
|
285
248
|
in_clauses: tuple[Column[Any], Iterable[Any]] | None = None,
|
|
286
249
|
in_clauses_chunk_size: int | None = None,
|
|
287
250
|
chunk_size_frac: float = CHUNK_SIZE_FRAC,
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
retry: RetryBaseT | None = None,
|
|
291
|
-
timeout: utilities.types.Duration | None = None,
|
|
251
|
+
timeout: Delta | None = None,
|
|
252
|
+
error: MaybeType[BaseException] = TimeoutError,
|
|
292
253
|
**kwargs: Any,
|
|
293
254
|
) -> DataFrame | Iterable[DataFrame] | AsyncIterable[DataFrame]: ...
|
|
294
255
|
async def select_to_dataframe(
|
|
@@ -302,32 +263,16 @@ async def select_to_dataframe(
|
|
|
302
263
|
in_clauses: tuple[Column[Any], Iterable[Any]] | None = None,
|
|
303
264
|
in_clauses_chunk_size: int | None = None,
|
|
304
265
|
chunk_size_frac: float = CHUNK_SIZE_FRAC,
|
|
305
|
-
timeout:
|
|
306
|
-
error:
|
|
266
|
+
timeout: Delta | None = None,
|
|
267
|
+
error: MaybeType[BaseException] = TimeoutError,
|
|
307
268
|
**kwargs: Any,
|
|
308
269
|
) -> DataFrame | Iterable[DataFrame] | AsyncIterable[DataFrame]:
|
|
309
270
|
"""Read a table from a database into a DataFrame."""
|
|
310
|
-
if not issubclass(AsyncEngine, type(engine)):
|
|
311
|
-
# for handling testing
|
|
312
|
-
engine = create_async_engine(engine.url)
|
|
313
|
-
return await select_to_dataframe(
|
|
314
|
-
sel,
|
|
315
|
-
engine,
|
|
316
|
-
snake=snake,
|
|
317
|
-
time_zone=time_zone,
|
|
318
|
-
batch_size=batch_size,
|
|
319
|
-
in_clauses=in_clauses,
|
|
320
|
-
in_clauses_chunk_size=in_clauses_chunk_size,
|
|
321
|
-
chunk_size_frac=chunk_size_frac,
|
|
322
|
-
timeout=timeout,
|
|
323
|
-
error=error,
|
|
324
|
-
**kwargs,
|
|
325
|
-
)
|
|
326
271
|
if snake:
|
|
327
272
|
sel = _select_to_dataframe_apply_snake(sel)
|
|
328
273
|
schema = _select_to_dataframe_map_select_to_df_schema(sel, time_zone=time_zone)
|
|
329
274
|
if in_clauses is None:
|
|
330
|
-
async with
|
|
275
|
+
async with timeout_td(timeout, error=error):
|
|
331
276
|
return read_database(
|
|
332
277
|
sel,
|
|
333
278
|
cast("Any", engine),
|
|
@@ -344,7 +289,7 @@ async def select_to_dataframe(
|
|
|
344
289
|
chunk_size_frac=chunk_size_frac,
|
|
345
290
|
)
|
|
346
291
|
if batch_size is None:
|
|
347
|
-
async with
|
|
292
|
+
async with timeout_td(timeout, error=error):
|
|
348
293
|
dfs = [
|
|
349
294
|
await select_to_dataframe(
|
|
350
295
|
sel,
|
|
@@ -365,7 +310,7 @@ async def select_to_dataframe(
|
|
|
365
310
|
return DataFrame(schema=schema)
|
|
366
311
|
|
|
367
312
|
async def yield_dfs() -> AsyncIterator[DataFrame]:
|
|
368
|
-
async with
|
|
313
|
+
async with timeout_td(timeout, error=error):
|
|
369
314
|
for sel_i in sels:
|
|
370
315
|
for df in await select_to_dataframe(
|
|
371
316
|
sel_i,
|
|
@@ -421,7 +366,7 @@ def _select_to_dataframe_map_table_column_type_to_dtype(
|
|
|
421
366
|
return pl.Date
|
|
422
367
|
if is_subclass_gen(py_type, dt.datetime):
|
|
423
368
|
has_tz: bool = type_use.timezone
|
|
424
|
-
return
|
|
369
|
+
return zoned_date_time_dtype(time_zone=time_zone) if has_tz else Datetime()
|
|
425
370
|
if issubclass(py_type, dt.time):
|
|
426
371
|
return Time
|
|
427
372
|
if issubclass(py_type, dt.timedelta):
|
|
@@ -460,15 +405,14 @@ def _select_to_dataframe_yield_selects_with_in_clauses(
|
|
|
460
405
|
in_clauses_chunk_size: int | None = None,
|
|
461
406
|
chunk_size_frac: float = CHUNK_SIZE_FRAC,
|
|
462
407
|
) -> Iterator[Select[Any]]:
|
|
463
|
-
max_length = len(sel.selected_columns)
|
|
464
408
|
in_col, in_values = in_clauses
|
|
465
409
|
if in_clauses_chunk_size is None:
|
|
466
410
|
chunk_size = get_chunk_size(
|
|
467
|
-
engine, chunk_size_frac=chunk_size_frac
|
|
411
|
+
engine, sel.selected_columns, chunk_size_frac=chunk_size_frac
|
|
468
412
|
)
|
|
469
413
|
else:
|
|
470
414
|
chunk_size = in_clauses_chunk_size
|
|
471
415
|
return (sel.where(in_col.in_(values)) for values in chunked(in_values, chunk_size))
|
|
472
416
|
|
|
473
417
|
|
|
474
|
-
__all__ = ["
|
|
418
|
+
__all__ = ["insert_dataframe", "select_to_dataframe"]
|
utilities/string.py
CHANGED
|
@@ -10,11 +10,10 @@ def substitute_environ(path_or_text: Path | str, /, **kwargs: Any) -> str:
|
|
|
10
10
|
"""Substitute the environment variables in a file."""
|
|
11
11
|
match path_or_text:
|
|
12
12
|
case Path() as path:
|
|
13
|
-
|
|
14
|
-
return substitute_environ(fh.read(), **kwargs)
|
|
13
|
+
return substitute_environ(path.read_text(), **kwargs)
|
|
15
14
|
case str() as text:
|
|
16
15
|
return Template(text).substitute(environ, **kwargs)
|
|
17
|
-
case
|
|
16
|
+
case never:
|
|
18
17
|
assert_never(never)
|
|
19
18
|
|
|
20
19
|
|