meerschaum 2.7.2__py3-none-any.whl → 2.7.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meerschaum/_internal/arguments/_parse_arguments.py +2 -0
- meerschaum/_internal/arguments/_parser.py +17 -11
- meerschaum/actions/clear.py +1 -1
- meerschaum/actions/edit.py +1 -1
- meerschaum/actions/verify.py +18 -21
- meerschaum/config/_version.py +1 -1
- meerschaum/connectors/sql/_fetch.py +45 -26
- meerschaum/connectors/sql/_instance.py +4 -4
- meerschaum/connectors/sql/_pipes.py +135 -103
- meerschaum/core/Pipe/_attributes.py +1 -1
- meerschaum/core/Pipe/_dtypes.py +9 -9
- meerschaum/core/Pipe/_fetch.py +2 -3
- meerschaum/core/Pipe/_sync.py +11 -3
- meerschaum/core/Pipe/_verify.py +9 -5
- meerschaum/jobs/__init__.py +1 -1
- meerschaum/utils/dataframe.py +10 -2
- meerschaum/utils/dtypes/sql.py +1 -1
- meerschaum/utils/formatting/__init__.py +5 -25
- meerschaum/utils/formatting/_pipes.py +9 -6
- meerschaum/utils/sql.py +156 -87
- meerschaum/utils/venv/__init__.py +44 -6
- {meerschaum-2.7.2.dist-info → meerschaum-2.7.3.dist-info}/METADATA +1 -1
- {meerschaum-2.7.2.dist-info → meerschaum-2.7.3.dist-info}/RECORD +29 -29
- {meerschaum-2.7.2.dist-info → meerschaum-2.7.3.dist-info}/LICENSE +0 -0
- {meerschaum-2.7.2.dist-info → meerschaum-2.7.3.dist-info}/NOTICE +0 -0
- {meerschaum-2.7.2.dist-info → meerschaum-2.7.3.dist-info}/WHEEL +0 -0
- {meerschaum-2.7.2.dist-info → meerschaum-2.7.3.dist-info}/entry_points.txt +0 -0
- {meerschaum-2.7.2.dist-info → meerschaum-2.7.3.dist-info}/top_level.txt +0 -0
- {meerschaum-2.7.2.dist-info → meerschaum-2.7.3.dist-info}/zip-safe +0 -0
meerschaum/core/Pipe/_sync.py
CHANGED
@@ -50,6 +50,7 @@ def sync(
|
|
50
50
|
retries: int = 10,
|
51
51
|
min_seconds: int = 1,
|
52
52
|
check_existing: bool = True,
|
53
|
+
enforce_dtypes: bool = True,
|
53
54
|
blocking: bool = True,
|
54
55
|
workers: Optional[int] = None,
|
55
56
|
callback: Optional[Callable[[Tuple[bool, str]], Any]] = None,
|
@@ -89,6 +90,10 @@ def sync(
|
|
89
90
|
check_existing: bool, default True
|
90
91
|
If `True`, pull and diff with existing data from the pipe.
|
91
92
|
|
93
|
+
enforce_dtypes: bool, default True
|
94
|
+
If `True`, enforce dtypes on incoming data.
|
95
|
+
Set this to `False` if the incoming rows are expected to be of the correct dtypes.
|
96
|
+
|
92
97
|
blocking: bool, default True
|
93
98
|
If `True`, wait for sync to finish and return its result, otherwise
|
94
99
|
asyncronously sync (oxymoron?) and return success. Defaults to `True`.
|
@@ -123,8 +128,6 @@ def sync(
|
|
123
128
|
A `SuccessTuple` of success (`bool`) and message (`str`).
|
124
129
|
"""
|
125
130
|
from meerschaum.utils.debug import dprint, _checkpoint
|
126
|
-
from meerschaum.connectors import custom_types
|
127
|
-
from meerschaum.plugins import Plugin
|
128
131
|
from meerschaum.utils.formatting import get_console
|
129
132
|
from meerschaum.utils.venv import Venv
|
130
133
|
from meerschaum.connectors import get_connector_plugin
|
@@ -366,7 +369,12 @@ def sync(
|
|
366
369
|
return success, msg
|
367
370
|
|
368
371
|
### Cast to a dataframe and ensure datatypes are what we expect.
|
369
|
-
df = self.enforce_dtypes(
|
372
|
+
df = self.enforce_dtypes(
|
373
|
+
df,
|
374
|
+
chunksize=chunksize,
|
375
|
+
enforce=enforce_dtypes,
|
376
|
+
debug=debug,
|
377
|
+
)
|
370
378
|
|
371
379
|
### Capture `numeric`, `uuid`, `json`, and `bytes` columns.
|
372
380
|
self._persist_new_json_columns(df, debug=debug)
|
meerschaum/core/Pipe/_verify.py
CHANGED
@@ -162,7 +162,8 @@ def verify(
|
|
162
162
|
)
|
163
163
|
|
164
164
|
info(
|
165
|
-
f"Verifying {self}:\n Syncing {len(chunk_bounds)} chunk"
|
165
|
+
f"Verifying {self}:\n Syncing {len(chunk_bounds)} chunk"
|
166
|
+
+ ('s' if len(chunk_bounds) != 1 else '')
|
166
167
|
+ f" ({'un' if not bounded else ''}bounded)"
|
167
168
|
+ f" of size '{interval_str(chunk_interval)}'"
|
168
169
|
+ f" between '{begin_to_print}' and '{end_to_print}'."
|
@@ -194,6 +195,9 @@ def verify(
|
|
194
195
|
**kwargs
|
195
196
|
)
|
196
197
|
chunk_msg = chunk_msg.strip()
|
198
|
+
if ' - ' not in chunk_msg:
|
199
|
+
chunk_label = f"{chunk_begin} - {chunk_end}"
|
200
|
+
chunk_msg = f'{chunk_label}\n{chunk_msg}'
|
197
201
|
mrsm.pprint((chunk_success, chunk_msg))
|
198
202
|
return chunk_begin_and_end, (chunk_success, chunk_msg)
|
199
203
|
|
@@ -323,16 +327,16 @@ def get_chunks_success_message(
|
|
323
327
|
header = (header + "\n") if header else ""
|
324
328
|
stats_msg = items_str(
|
325
329
|
(
|
326
|
-
([f'inserted {num_inserted}'] if num_inserted else [])
|
327
|
-
+ ([f'updated {num_updated}'] if num_updated else [])
|
328
|
-
+ ([f'upserted {num_upserted}'] if num_upserted else [])
|
330
|
+
([f'inserted {num_inserted:,}'] if num_inserted else [])
|
331
|
+
+ ([f'updated {num_updated:,}'] if num_updated else [])
|
332
|
+
+ ([f'upserted {num_upserted:,}'] if num_upserted else [])
|
329
333
|
) or ['synced 0'],
|
330
334
|
quotes=False,
|
331
335
|
and_=False,
|
332
336
|
)
|
333
337
|
|
334
338
|
success_msg = (
|
335
|
-
f"Successfully synced {len(chunk_success_tuples)} chunk"
|
339
|
+
f"Successfully synced {len(chunk_success_tuples):,} chunk"
|
336
340
|
+ ('s' if len(chunk_success_tuples) != 1 else '')
|
337
341
|
+ '\n(' + stats_msg
|
338
342
|
+ ' rows in total).'
|
meerschaum/jobs/__init__.py
CHANGED
@@ -342,7 +342,7 @@ def check_restart_jobs(
|
|
342
342
|
|
343
343
|
def _check_restart_jobs_against_lock(*args, **kwargs):
|
344
344
|
from meerschaum.config.paths import CHECK_JOBS_LOCK_PATH
|
345
|
-
fasteners = mrsm.attempt_import('fasteners')
|
345
|
+
fasteners = mrsm.attempt_import('fasteners', lazy=False)
|
346
346
|
lock = fasteners.InterProcessLock(CHECK_JOBS_LOCK_PATH)
|
347
347
|
with lock:
|
348
348
|
check_restart_jobs(*args, **kwargs)
|
meerschaum/utils/dataframe.py
CHANGED
@@ -390,6 +390,7 @@ def parse_df_datetimes(
|
|
390
390
|
strip_timezone: bool = False,
|
391
391
|
chunksize: Optional[int] = None,
|
392
392
|
dtype_backend: str = 'numpy_nullable',
|
393
|
+
ignore_all: bool = False,
|
393
394
|
debug: bool = False,
|
394
395
|
) -> 'pd.DataFrame':
|
395
396
|
"""
|
@@ -414,6 +415,9 @@ def parse_df_datetimes(
|
|
414
415
|
use this as the datatypes backend.
|
415
416
|
Accepted values are 'numpy_nullable' and 'pyarrow'.
|
416
417
|
|
418
|
+
ignore_all: bool, default False
|
419
|
+
If `True`, do not attempt to cast any columns to datetimes.
|
420
|
+
|
417
421
|
debug: bool, default False
|
418
422
|
Verbosity toggle.
|
419
423
|
|
@@ -504,7 +508,11 @@ def parse_df_datetimes(
|
|
504
508
|
if 'datetime' in str(dtype)
|
505
509
|
]
|
506
510
|
)
|
507
|
-
cols_to_inspect = [
|
511
|
+
cols_to_inspect = [
|
512
|
+
col
|
513
|
+
for col in pdf.columns
|
514
|
+
if col not in ignore_cols
|
515
|
+
] if not ignore_all else []
|
508
516
|
|
509
517
|
if len(cols_to_inspect) == 0:
|
510
518
|
if debug:
|
@@ -1263,7 +1271,7 @@ def df_from_literal(
|
|
1263
1271
|
import ast
|
1264
1272
|
try:
|
1265
1273
|
val = ast.literal_eval(literal)
|
1266
|
-
except Exception
|
1274
|
+
except Exception:
|
1267
1275
|
warn(
|
1268
1276
|
"Failed to parse value from string:\n" + f"{literal}" +
|
1269
1277
|
"\n\nWill cast as a string instead."\
|
meerschaum/utils/dtypes/sql.py
CHANGED
@@ -7,7 +7,7 @@ Utility functions for working with SQL data types.
|
|
7
7
|
"""
|
8
8
|
|
9
9
|
from __future__ import annotations
|
10
|
-
from meerschaum.utils.typing import Dict, Union, Tuple
|
10
|
+
from meerschaum.utils.typing import Dict, Union, Tuple
|
11
11
|
|
12
12
|
NUMERIC_PRECISION_FLAVORS: Dict[str, Tuple[int, int]] = {
|
13
13
|
'mariadb': (38, 20),
|
@@ -57,18 +57,7 @@ def colored_fallback(*args, **kw):
|
|
57
57
|
return ' '.join(args)
|
58
58
|
|
59
59
|
def translate_rich_to_termcolor(*colors) -> tuple:
|
60
|
-
"""Translate between rich and more_termcolor terminology.
|
61
|
-
This is probably prone to breaking.
|
62
|
-
|
63
|
-
Parameters
|
64
|
-
----------
|
65
|
-
*colors :
|
66
|
-
|
67
|
-
|
68
|
-
Returns
|
69
|
-
-------
|
70
|
-
|
71
|
-
"""
|
60
|
+
"""Translate between rich and more_termcolor terminology."""
|
72
61
|
_colors = []
|
73
62
|
for c in colors:
|
74
63
|
_c_list = []
|
@@ -131,7 +120,7 @@ def _init():
|
|
131
120
|
try:
|
132
121
|
colorama.init(autoreset=False)
|
133
122
|
success = True
|
134
|
-
except Exception
|
123
|
+
except Exception:
|
135
124
|
import traceback
|
136
125
|
traceback.print_exc()
|
137
126
|
_attrs['ANSI'], _attrs['UNICODE'], _attrs['CHARSET'] = False, False, 'ascii'
|
@@ -219,7 +208,7 @@ def get_console():
|
|
219
208
|
rich_console = attempt_import('rich.console')
|
220
209
|
try:
|
221
210
|
console = rich_console.Console(force_terminal=True, color_system='truecolor')
|
222
|
-
except Exception
|
211
|
+
except Exception:
|
223
212
|
console = None
|
224
213
|
return console
|
225
214
|
|
@@ -307,7 +296,6 @@ def format_success_tuple(
|
|
307
296
|
calm: bool, default False
|
308
297
|
If `True`, use the default emoji and color scheme.
|
309
298
|
"""
|
310
|
-
from meerschaum.config.static import STATIC_CONFIG
|
311
299
|
_init()
|
312
300
|
try:
|
313
301
|
status = 'success' if tup[0] else 'failure'
|
@@ -381,12 +369,9 @@ def print_options(
|
|
381
369
|
If `True`, print the option's number in the list (1 index).
|
382
370
|
|
383
371
|
"""
|
384
|
-
import os
|
385
372
|
from meerschaum.utils.packages import import_rich
|
386
|
-
from meerschaum.utils.formatting import
|
387
|
-
from meerschaum.
|
388
|
-
from meerschaum.utils.misc import get_cols_lines, string_width, iterate_chunks
|
389
|
-
|
373
|
+
from meerschaum.utils.formatting import highlight_pipes
|
374
|
+
from meerschaum.utils.misc import get_cols_lines, string_width
|
390
375
|
|
391
376
|
if options is None:
|
392
377
|
options = {}
|
@@ -429,15 +414,10 @@ def print_options(
|
|
429
414
|
continue
|
430
415
|
break
|
431
416
|
|
432
|
-
from meerschaum.utils.formatting import pprint, get_console
|
433
417
|
from meerschaum.utils.packages import attempt_import
|
434
|
-
rich_columns = attempt_import('rich.columns')
|
435
|
-
rich_panel = attempt_import('rich.panel')
|
436
418
|
rich_table = attempt_import('rich.table')
|
437
419
|
Text = attempt_import('rich.text').Text
|
438
420
|
box = attempt_import('rich.box')
|
439
|
-
Panel = rich_panel.Panel
|
440
|
-
Columns = rich_columns.Columns
|
441
421
|
Table = rich_table.Table
|
442
422
|
|
443
423
|
if _header is not None:
|
@@ -482,9 +482,9 @@ def print_pipes_results(
|
|
482
482
|
|
483
483
|
|
484
484
|
def extract_stats_from_message(
|
485
|
-
|
486
|
-
|
487
|
-
|
485
|
+
message: str,
|
486
|
+
stat_keys: Optional[List[str]] = None,
|
487
|
+
) -> Dict[str, int]:
|
488
488
|
"""
|
489
489
|
Given a sync message, return the insert, update, upsert stats from within.
|
490
490
|
|
@@ -511,9 +511,9 @@ def extract_stats_from_message(
|
|
511
511
|
|
512
512
|
|
513
513
|
def extract_stats_from_line(
|
514
|
-
|
515
|
-
|
516
|
-
|
514
|
+
line: str,
|
515
|
+
stat_keys: List[str],
|
516
|
+
) -> Dict[str, int]:
|
517
517
|
"""
|
518
518
|
Return the insert, update, upsert stats from a single line.
|
519
519
|
"""
|
@@ -524,6 +524,9 @@ def extract_stats_from_line(
|
|
524
524
|
if search_key not in line.lower():
|
525
525
|
continue
|
526
526
|
|
527
|
+
### the count may be formatted with commas
|
528
|
+
line = line.replace(',', '')
|
529
|
+
|
527
530
|
### stat_text starts with the digits we want.
|
528
531
|
try:
|
529
532
|
stat_text = line.lower().split(search_key + ' ')[1]
|