meerschaum 2.7.2__py3-none-any.whl → 2.7.4__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- meerschaum/_internal/arguments/_parse_arguments.py +2 -0
- meerschaum/_internal/arguments/_parser.py +17 -11
- meerschaum/actions/clear.py +1 -1
- meerschaum/actions/edit.py +1 -1
- meerschaum/actions/start.py +2 -2
- meerschaum/actions/verify.py +18 -21
- meerschaum/config/_version.py +1 -1
- meerschaum/connectors/sql/_fetch.py +45 -26
- meerschaum/connectors/sql/_instance.py +4 -4
- meerschaum/connectors/sql/_pipes.py +135 -103
- meerschaum/core/Pipe/_attributes.py +1 -1
- meerschaum/core/Pipe/_dtypes.py +9 -9
- meerschaum/core/Pipe/_fetch.py +2 -3
- meerschaum/core/Pipe/_sync.py +11 -3
- meerschaum/core/Pipe/_verify.py +9 -5
- meerschaum/jobs/__init__.py +1 -3
- meerschaum/utils/daemon/Daemon.py +1 -1
- meerschaum/utils/daemon/StdinFile.py +4 -1
- meerschaum/utils/dataframe.py +10 -2
- meerschaum/utils/dtypes/sql.py +1 -1
- meerschaum/utils/formatting/__init__.py +5 -25
- meerschaum/utils/formatting/_pipes.py +9 -6
- meerschaum/utils/sql.py +156 -87
- meerschaum/utils/venv/__init__.py +61 -13
- {meerschaum-2.7.2.dist-info → meerschaum-2.7.4.dist-info}/METADATA +1 -1
- {meerschaum-2.7.2.dist-info → meerschaum-2.7.4.dist-info}/RECORD +32 -32
- {meerschaum-2.7.2.dist-info → meerschaum-2.7.4.dist-info}/LICENSE +0 -0
- {meerschaum-2.7.2.dist-info → meerschaum-2.7.4.dist-info}/NOTICE +0 -0
- {meerschaum-2.7.2.dist-info → meerschaum-2.7.4.dist-info}/WHEEL +0 -0
- {meerschaum-2.7.2.dist-info → meerschaum-2.7.4.dist-info}/entry_points.txt +0 -0
- {meerschaum-2.7.2.dist-info → meerschaum-2.7.4.dist-info}/top_level.txt +0 -0
- {meerschaum-2.7.2.dist-info → meerschaum-2.7.4.dist-info}/zip-safe +0 -0
meerschaum/core/Pipe/_sync.py
CHANGED
@@ -50,6 +50,7 @@ def sync(
|
|
50
50
|
retries: int = 10,
|
51
51
|
min_seconds: int = 1,
|
52
52
|
check_existing: bool = True,
|
53
|
+
enforce_dtypes: bool = True,
|
53
54
|
blocking: bool = True,
|
54
55
|
workers: Optional[int] = None,
|
55
56
|
callback: Optional[Callable[[Tuple[bool, str]], Any]] = None,
|
@@ -89,6 +90,10 @@ def sync(
|
|
89
90
|
check_existing: bool, default True
|
90
91
|
If `True`, pull and diff with existing data from the pipe.
|
91
92
|
|
93
|
+
enforce_dtypes: bool, default True
|
94
|
+
If `True`, enforce dtypes on incoming data.
|
95
|
+
Set this to `False` if the incoming rows are expected to be of the correct dtypes.
|
96
|
+
|
92
97
|
blocking: bool, default True
|
93
98
|
If `True`, wait for sync to finish and return its result, otherwise
|
94
99
|
asyncronously sync (oxymoron?) and return success. Defaults to `True`.
|
@@ -123,8 +128,6 @@ def sync(
|
|
123
128
|
A `SuccessTuple` of success (`bool`) and message (`str`).
|
124
129
|
"""
|
125
130
|
from meerschaum.utils.debug import dprint, _checkpoint
|
126
|
-
from meerschaum.connectors import custom_types
|
127
|
-
from meerschaum.plugins import Plugin
|
128
131
|
from meerschaum.utils.formatting import get_console
|
129
132
|
from meerschaum.utils.venv import Venv
|
130
133
|
from meerschaum.connectors import get_connector_plugin
|
@@ -366,7 +369,12 @@ def sync(
|
|
366
369
|
return success, msg
|
367
370
|
|
368
371
|
### Cast to a dataframe and ensure datatypes are what we expect.
|
369
|
-
df = self.enforce_dtypes(
|
372
|
+
df = self.enforce_dtypes(
|
373
|
+
df,
|
374
|
+
chunksize=chunksize,
|
375
|
+
enforce=enforce_dtypes,
|
376
|
+
debug=debug,
|
377
|
+
)
|
370
378
|
|
371
379
|
### Capture `numeric`, `uuid`, `json`, and `bytes` columns.
|
372
380
|
self._persist_new_json_columns(df, debug=debug)
|
meerschaum/core/Pipe/_verify.py
CHANGED
@@ -162,7 +162,8 @@ def verify(
|
|
162
162
|
)
|
163
163
|
|
164
164
|
info(
|
165
|
-
f"Verifying {self}:\n Syncing {len(chunk_bounds)} chunk"
|
165
|
+
f"Verifying {self}:\n Syncing {len(chunk_bounds)} chunk"
|
166
|
+
+ ('s' if len(chunk_bounds) != 1 else '')
|
166
167
|
+ f" ({'un' if not bounded else ''}bounded)"
|
167
168
|
+ f" of size '{interval_str(chunk_interval)}'"
|
168
169
|
+ f" between '{begin_to_print}' and '{end_to_print}'."
|
@@ -194,6 +195,9 @@ def verify(
|
|
194
195
|
**kwargs
|
195
196
|
)
|
196
197
|
chunk_msg = chunk_msg.strip()
|
198
|
+
if ' - ' not in chunk_msg:
|
199
|
+
chunk_label = f"{chunk_begin} - {chunk_end}"
|
200
|
+
chunk_msg = f'{chunk_label}\n{chunk_msg}'
|
197
201
|
mrsm.pprint((chunk_success, chunk_msg))
|
198
202
|
return chunk_begin_and_end, (chunk_success, chunk_msg)
|
199
203
|
|
@@ -323,16 +327,16 @@ def get_chunks_success_message(
|
|
323
327
|
header = (header + "\n") if header else ""
|
324
328
|
stats_msg = items_str(
|
325
329
|
(
|
326
|
-
([f'inserted {num_inserted}'] if num_inserted else [])
|
327
|
-
+ ([f'updated {num_updated}'] if num_updated else [])
|
328
|
-
+ ([f'upserted {num_upserted}'] if num_upserted else [])
|
330
|
+
([f'inserted {num_inserted:,}'] if num_inserted else [])
|
331
|
+
+ ([f'updated {num_updated:,}'] if num_updated else [])
|
332
|
+
+ ([f'upserted {num_upserted:,}'] if num_upserted else [])
|
329
333
|
) or ['synced 0'],
|
330
334
|
quotes=False,
|
331
335
|
and_=False,
|
332
336
|
)
|
333
337
|
|
334
338
|
success_msg = (
|
335
|
-
f"Successfully synced {len(chunk_success_tuples)} chunk"
|
339
|
+
f"Successfully synced {len(chunk_success_tuples):,} chunk"
|
336
340
|
+ ('s' if len(chunk_success_tuples) != 1 else '')
|
337
341
|
+ '\n(' + stats_msg
|
338
342
|
+ ' rows in total).'
|
meerschaum/jobs/__init__.py
CHANGED
@@ -6,8 +6,6 @@
|
|
6
6
|
Higher-level utilities for managing `meerschaum.utils.daemon.Daemon`.
|
7
7
|
"""
|
8
8
|
|
9
|
-
import pathlib
|
10
|
-
|
11
9
|
import meerschaum as mrsm
|
12
10
|
from meerschaum.utils.typing import Dict, Optional, List, SuccessTuple
|
13
11
|
|
@@ -342,7 +340,7 @@ def check_restart_jobs(
|
|
342
340
|
|
343
341
|
def _check_restart_jobs_against_lock(*args, **kwargs):
|
344
342
|
from meerschaum.config.paths import CHECK_JOBS_LOCK_PATH
|
345
|
-
fasteners = mrsm.attempt_import('fasteners')
|
343
|
+
fasteners = mrsm.attempt_import('fasteners', lazy=False)
|
346
344
|
lock = fasteners.InterProcessLock(CHECK_JOBS_LOCK_PATH)
|
347
345
|
with lock:
|
348
346
|
check_restart_jobs(*args, **kwargs)
|
@@ -13,7 +13,6 @@ import pathlib
|
|
13
13
|
import json
|
14
14
|
import shutil
|
15
15
|
import signal
|
16
|
-
import sys
|
17
16
|
import time
|
18
17
|
import traceback
|
19
18
|
from functools import partial
|
@@ -301,6 +300,7 @@ class Daemon:
|
|
301
300
|
os.environ['LINES'], os.environ['COLUMNS'] = str(int(lines)), str(int(columns))
|
302
301
|
with self._daemon_context:
|
303
302
|
sys.stdin = self.stdin_file
|
303
|
+
_ = os.environ.pop(STATIC_CONFIG['environment']['systemd_stdin_path'], None)
|
304
304
|
os.environ[STATIC_CONFIG['environment']['daemon_id']] = self.daemon_id
|
305
305
|
os.environ['PYTHONUNBUFFERED'] = '1'
|
306
306
|
|
@@ -104,7 +104,10 @@ class StdinFile(io.TextIOBase):
|
|
104
104
|
if self._file_handler is not None:
|
105
105
|
self.sel.unregister(self._file_handler)
|
106
106
|
self._file_handler.close()
|
107
|
-
|
107
|
+
try:
|
108
|
+
os.close(self._fd)
|
109
|
+
except OSError:
|
110
|
+
pass
|
108
111
|
self._file_handler = None
|
109
112
|
self._fd = None
|
110
113
|
|
meerschaum/utils/dataframe.py
CHANGED
@@ -390,6 +390,7 @@ def parse_df_datetimes(
|
|
390
390
|
strip_timezone: bool = False,
|
391
391
|
chunksize: Optional[int] = None,
|
392
392
|
dtype_backend: str = 'numpy_nullable',
|
393
|
+
ignore_all: bool = False,
|
393
394
|
debug: bool = False,
|
394
395
|
) -> 'pd.DataFrame':
|
395
396
|
"""
|
@@ -414,6 +415,9 @@ def parse_df_datetimes(
|
|
414
415
|
use this as the datatypes backend.
|
415
416
|
Accepted values are 'numpy_nullable' and 'pyarrow'.
|
416
417
|
|
418
|
+
ignore_all: bool, default False
|
419
|
+
If `True`, do not attempt to cast any columns to datetimes.
|
420
|
+
|
417
421
|
debug: bool, default False
|
418
422
|
Verbosity toggle.
|
419
423
|
|
@@ -504,7 +508,11 @@ def parse_df_datetimes(
|
|
504
508
|
if 'datetime' in str(dtype)
|
505
509
|
]
|
506
510
|
)
|
507
|
-
cols_to_inspect = [
|
511
|
+
cols_to_inspect = [
|
512
|
+
col
|
513
|
+
for col in pdf.columns
|
514
|
+
if col not in ignore_cols
|
515
|
+
] if not ignore_all else []
|
508
516
|
|
509
517
|
if len(cols_to_inspect) == 0:
|
510
518
|
if debug:
|
@@ -1263,7 +1271,7 @@ def df_from_literal(
|
|
1263
1271
|
import ast
|
1264
1272
|
try:
|
1265
1273
|
val = ast.literal_eval(literal)
|
1266
|
-
except Exception
|
1274
|
+
except Exception:
|
1267
1275
|
warn(
|
1268
1276
|
"Failed to parse value from string:\n" + f"{literal}" +
|
1269
1277
|
"\n\nWill cast as a string instead."\
|
meerschaum/utils/dtypes/sql.py
CHANGED
@@ -7,7 +7,7 @@ Utility functions for working with SQL data types.
|
|
7
7
|
"""
|
8
8
|
|
9
9
|
from __future__ import annotations
|
10
|
-
from meerschaum.utils.typing import Dict, Union, Tuple
|
10
|
+
from meerschaum.utils.typing import Dict, Union, Tuple
|
11
11
|
|
12
12
|
NUMERIC_PRECISION_FLAVORS: Dict[str, Tuple[int, int]] = {
|
13
13
|
'mariadb': (38, 20),
|
@@ -57,18 +57,7 @@ def colored_fallback(*args, **kw):
|
|
57
57
|
return ' '.join(args)
|
58
58
|
|
59
59
|
def translate_rich_to_termcolor(*colors) -> tuple:
|
60
|
-
"""Translate between rich and more_termcolor terminology.
|
61
|
-
This is probably prone to breaking.
|
62
|
-
|
63
|
-
Parameters
|
64
|
-
----------
|
65
|
-
*colors :
|
66
|
-
|
67
|
-
|
68
|
-
Returns
|
69
|
-
-------
|
70
|
-
|
71
|
-
"""
|
60
|
+
"""Translate between rich and more_termcolor terminology."""
|
72
61
|
_colors = []
|
73
62
|
for c in colors:
|
74
63
|
_c_list = []
|
@@ -131,7 +120,7 @@ def _init():
|
|
131
120
|
try:
|
132
121
|
colorama.init(autoreset=False)
|
133
122
|
success = True
|
134
|
-
except Exception
|
123
|
+
except Exception:
|
135
124
|
import traceback
|
136
125
|
traceback.print_exc()
|
137
126
|
_attrs['ANSI'], _attrs['UNICODE'], _attrs['CHARSET'] = False, False, 'ascii'
|
@@ -219,7 +208,7 @@ def get_console():
|
|
219
208
|
rich_console = attempt_import('rich.console')
|
220
209
|
try:
|
221
210
|
console = rich_console.Console(force_terminal=True, color_system='truecolor')
|
222
|
-
except Exception
|
211
|
+
except Exception:
|
223
212
|
console = None
|
224
213
|
return console
|
225
214
|
|
@@ -307,7 +296,6 @@ def format_success_tuple(
|
|
307
296
|
calm: bool, default False
|
308
297
|
If `True`, use the default emoji and color scheme.
|
309
298
|
"""
|
310
|
-
from meerschaum.config.static import STATIC_CONFIG
|
311
299
|
_init()
|
312
300
|
try:
|
313
301
|
status = 'success' if tup[0] else 'failure'
|
@@ -381,12 +369,9 @@ def print_options(
|
|
381
369
|
If `True`, print the option's number in the list (1 index).
|
382
370
|
|
383
371
|
"""
|
384
|
-
import os
|
385
372
|
from meerschaum.utils.packages import import_rich
|
386
|
-
from meerschaum.utils.formatting import
|
387
|
-
from meerschaum.
|
388
|
-
from meerschaum.utils.misc import get_cols_lines, string_width, iterate_chunks
|
389
|
-
|
373
|
+
from meerschaum.utils.formatting import highlight_pipes
|
374
|
+
from meerschaum.utils.misc import get_cols_lines, string_width
|
390
375
|
|
391
376
|
if options is None:
|
392
377
|
options = {}
|
@@ -429,15 +414,10 @@ def print_options(
|
|
429
414
|
continue
|
430
415
|
break
|
431
416
|
|
432
|
-
from meerschaum.utils.formatting import pprint, get_console
|
433
417
|
from meerschaum.utils.packages import attempt_import
|
434
|
-
rich_columns = attempt_import('rich.columns')
|
435
|
-
rich_panel = attempt_import('rich.panel')
|
436
418
|
rich_table = attempt_import('rich.table')
|
437
419
|
Text = attempt_import('rich.text').Text
|
438
420
|
box = attempt_import('rich.box')
|
439
|
-
Panel = rich_panel.Panel
|
440
|
-
Columns = rich_columns.Columns
|
441
421
|
Table = rich_table.Table
|
442
422
|
|
443
423
|
if _header is not None:
|
@@ -482,9 +482,9 @@ def print_pipes_results(
|
|
482
482
|
|
483
483
|
|
484
484
|
def extract_stats_from_message(
|
485
|
-
|
486
|
-
|
487
|
-
|
485
|
+
message: str,
|
486
|
+
stat_keys: Optional[List[str]] = None,
|
487
|
+
) -> Dict[str, int]:
|
488
488
|
"""
|
489
489
|
Given a sync message, return the insert, update, upsert stats from within.
|
490
490
|
|
@@ -511,9 +511,9 @@ def extract_stats_from_message(
|
|
511
511
|
|
512
512
|
|
513
513
|
def extract_stats_from_line(
|
514
|
-
|
515
|
-
|
516
|
-
|
514
|
+
line: str,
|
515
|
+
stat_keys: List[str],
|
516
|
+
) -> Dict[str, int]:
|
517
517
|
"""
|
518
518
|
Return the insert, update, upsert stats from a single line.
|
519
519
|
"""
|
@@ -524,6 +524,9 @@ def extract_stats_from_line(
|
|
524
524
|
if search_key not in line.lower():
|
525
525
|
continue
|
526
526
|
|
527
|
+
### the count may be formatted with commas
|
528
|
+
line = line.replace(',', '')
|
529
|
+
|
527
530
|
### stat_text starts with the digits we want.
|
528
531
|
try:
|
529
532
|
stat_text = line.lower().split(search_key + ' ')[1]
|