meerschaum 2.7.9__py3-none-any.whl → 2.8.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- meerschaum/_internal/arguments/_parser.py +17 -5
- meerschaum/_internal/term/TermPageHandler.py +1 -1
- meerschaum/_internal/term/__init__.py +1 -1
- meerschaum/actions/api.py +36 -10
- meerschaum/actions/copy.py +3 -1
- meerschaum/actions/index.py +1 -1
- meerschaum/actions/show.py +7 -7
- meerschaum/actions/sync.py +5 -1
- meerschaum/actions/verify.py +14 -1
- meerschaum/api/__init__.py +77 -41
- meerschaum/api/_exceptions.py +18 -0
- meerschaum/api/dash/__init__.py +4 -2
- meerschaum/api/dash/callbacks/dashboard.py +30 -1
- meerschaum/api/dash/components.py +2 -2
- meerschaum/api/dash/webterm.py +23 -4
- meerschaum/api/models/_pipes.py +8 -8
- meerschaum/api/resources/static/css/dash.css +2 -2
- meerschaum/api/resources/templates/termpage.html +5 -1
- meerschaum/api/routes/__init__.py +15 -12
- meerschaum/api/routes/_connectors.py +30 -28
- meerschaum/api/routes/_index.py +16 -7
- meerschaum/api/routes/_misc.py +30 -22
- meerschaum/api/routes/_pipes.py +244 -148
- meerschaum/api/routes/_plugins.py +58 -47
- meerschaum/api/routes/_users.py +39 -31
- meerschaum/api/routes/_version.py +8 -10
- meerschaum/api/routes/_webterm.py +2 -2
- meerschaum/config/_default.py +10 -0
- meerschaum/config/_version.py +1 -1
- meerschaum/config/static/__init__.py +5 -2
- meerschaum/connectors/api/_APIConnector.py +4 -3
- meerschaum/connectors/api/_login.py +21 -17
- meerschaum/connectors/api/_pipes.py +1 -0
- meerschaum/connectors/api/_request.py +9 -10
- meerschaum/connectors/sql/_cli.py +11 -3
- meerschaum/connectors/sql/_instance.py +1 -1
- meerschaum/connectors/sql/_pipes.py +77 -57
- meerschaum/connectors/sql/_sql.py +26 -9
- meerschaum/core/Pipe/__init__.py +2 -0
- meerschaum/core/Pipe/_attributes.py +13 -2
- meerschaum/core/Pipe/_data.py +85 -0
- meerschaum/core/Pipe/_deduplicate.py +6 -8
- meerschaum/core/Pipe/_sync.py +63 -30
- meerschaum/core/Pipe/_verify.py +242 -77
- meerschaum/core/User/__init__.py +2 -6
- meerschaum/jobs/_Job.py +1 -1
- meerschaum/jobs/__init__.py +15 -0
- meerschaum/utils/dataframe.py +2 -0
- meerschaum/utils/dtypes/sql.py +26 -0
- meerschaum/utils/formatting/_pipes.py +1 -1
- meerschaum/utils/misc.py +11 -7
- meerschaum/utils/packages/_packages.py +1 -1
- meerschaum/utils/sql.py +6 -2
- {meerschaum-2.7.9.dist-info → meerschaum-2.8.0.dist-info}/METADATA +4 -4
- {meerschaum-2.7.9.dist-info → meerschaum-2.8.0.dist-info}/RECORD +61 -60
- {meerschaum-2.7.9.dist-info → meerschaum-2.8.0.dist-info}/LICENSE +0 -0
- {meerschaum-2.7.9.dist-info → meerschaum-2.8.0.dist-info}/NOTICE +0 -0
- {meerschaum-2.7.9.dist-info → meerschaum-2.8.0.dist-info}/WHEEL +0 -0
- {meerschaum-2.7.9.dist-info → meerschaum-2.8.0.dist-info}/entry_points.txt +0 -0
- {meerschaum-2.7.9.dist-info → meerschaum-2.8.0.dist-info}/top_level.txt +0 -0
- {meerschaum-2.7.9.dist-info → meerschaum-2.8.0.dist-info}/zip-safe +0 -0
meerschaum/core/Pipe/_sync.py
CHANGED
@@ -319,16 +319,42 @@ def sync(
|
|
319
319
|
if debug:
|
320
320
|
dprint("Successfully synced the first chunk, attemping the rest...")
|
321
321
|
|
322
|
-
failed_chunks = []
|
323
322
|
def _process_chunk(_chunk):
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
323
|
+
_chunk_attempts = 0
|
324
|
+
_max_chunk_attempts = 3
|
325
|
+
while _chunk_attempts < _max_chunk_attempts:
|
326
|
+
try:
|
327
|
+
_chunk_success, _chunk_msg = _sync(p, _chunk)
|
328
|
+
except Exception as e:
|
329
|
+
_chunk_success, _chunk_msg = False, str(e)
|
330
|
+
if _chunk_success:
|
331
|
+
break
|
332
|
+
_chunk_attempts += 1
|
333
|
+
_sleep_seconds = _chunk_attempts ** 2
|
334
|
+
warn(
|
335
|
+
(
|
336
|
+
f"Failed to sync chunk to {self} "
|
337
|
+
+ f"(attempt {_chunk_attempts} / {_max_chunk_attempts}).\n"
|
338
|
+
+ f"Sleeping for {_sleep_seconds} second"
|
339
|
+
+ ('s' if _sleep_seconds != 1 else '')
|
340
|
+
+ ":\n{_chunk_msg}"
|
341
|
+
),
|
342
|
+
stack=False,
|
343
|
+
)
|
344
|
+
time.sleep(_sleep_seconds)
|
345
|
+
|
346
|
+
num_rows_str = (
|
347
|
+
f"{num_rows:,} rows"
|
348
|
+
if (num_rows := len(_chunk)) != 1
|
349
|
+
else f"{num_rows} row"
|
350
|
+
)
|
330
351
|
_chunk_msg = (
|
331
|
-
|
352
|
+
(
|
353
|
+
"Synced"
|
354
|
+
if _chunk_success
|
355
|
+
else "Failed to sync"
|
356
|
+
) + f" a chunk ({num_rows_str}) to {p}:\n"
|
357
|
+
+ self._get_chunk_label(_chunk, dt_col)
|
332
358
|
+ '\n'
|
333
359
|
+ _chunk_msg
|
334
360
|
)
|
@@ -351,27 +377,16 @@ def sync(
|
|
351
377
|
)
|
352
378
|
chunk_messages = [chunk_msg for _, chunk_msg in results]
|
353
379
|
success_bools = [chunk_success for chunk_success, _ in results]
|
380
|
+
num_successes = len([chunk_success for chunk_success, _ in results if chunk_success])
|
381
|
+
num_failures = len([chunk_success for chunk_success, _ in results if not chunk_success])
|
354
382
|
success = all(success_bools)
|
355
383
|
msg = (
|
356
|
-
|
384
|
+
'Synced '
|
385
|
+
+ f'{len(chunk_messages):,} chunk'
|
357
386
|
+ ('s' if len(chunk_messages) != 1 else '')
|
358
|
-
+ f' to {p}:\n\n'
|
387
|
+
+ f' to {p}\n({num_successes} succeeded, {num_failures} failed):\n\n'
|
359
388
|
+ '\n\n'.join(chunk_messages).lstrip().rstrip()
|
360
389
|
).lstrip().rstrip()
|
361
|
-
|
362
|
-
### If some chunks succeeded, retry the failures.
|
363
|
-
retry_success = True
|
364
|
-
if not success and any(success_bools):
|
365
|
-
if debug:
|
366
|
-
dprint("Retrying failed chunks...")
|
367
|
-
chunks_to_retry = [c for c in failed_chunks]
|
368
|
-
failed_chunks = []
|
369
|
-
for chunk in chunks_to_retry:
|
370
|
-
chunk_success, chunk_msg = _process_chunk(chunk)
|
371
|
-
msg += f"\n\nRetried chunk:\n{chunk_msg}\n"
|
372
|
-
retry_success = retry_success and chunk_success
|
373
|
-
|
374
|
-
success = success and retry_success
|
375
390
|
return success, msg
|
376
391
|
|
377
392
|
### Cast to a dataframe and ensure datatypes are what we expect.
|
@@ -474,6 +489,7 @@ def get_sync_time(
|
|
474
489
|
params: Optional[Dict[str, Any]] = None,
|
475
490
|
newest: bool = True,
|
476
491
|
apply_backtrack_interval: bool = False,
|
492
|
+
remote: bool = False,
|
477
493
|
round_down: bool = False,
|
478
494
|
debug: bool = False
|
479
495
|
) -> Union['datetime', int, None]:
|
@@ -493,6 +509,10 @@ def get_sync_time(
|
|
493
509
|
apply_backtrack_interval: bool, default False
|
494
510
|
If `True`, subtract the backtrack interval from the sync time.
|
495
511
|
|
512
|
+
remote: bool, default False
|
513
|
+
If `True` and the instance connector supports it, return the sync time
|
514
|
+
for the remote table definition.
|
515
|
+
|
496
516
|
round_down: bool, default False
|
497
517
|
If `True`, round down the datetime value to the nearest minute.
|
498
518
|
|
@@ -506,17 +526,30 @@ def get_sync_time(
|
|
506
526
|
"""
|
507
527
|
from meerschaum.utils.venv import Venv
|
508
528
|
from meerschaum.connectors import get_connector_plugin
|
509
|
-
from meerschaum.utils.misc import round_time
|
529
|
+
from meerschaum.utils.misc import round_time, filter_keywords
|
530
|
+
from meerschaum.utils.warnings import warn
|
510
531
|
|
511
532
|
if not self.columns.get('datetime', None):
|
512
533
|
return None
|
513
534
|
|
514
|
-
|
515
|
-
|
535
|
+
connector = self.instance_connector if not remote else self.connector
|
536
|
+
with Venv(get_connector_plugin(connector)):
|
537
|
+
if not hasattr(connector, 'get_sync_time'):
|
538
|
+
warn(
|
539
|
+
f"Connectors of type '{connector.type}' "
|
540
|
+
"do not implement `get_sync_time().",
|
541
|
+
stack=False,
|
542
|
+
)
|
543
|
+
return None
|
544
|
+
sync_time = connector.get_sync_time(
|
516
545
|
self,
|
517
|
-
|
518
|
-
|
519
|
-
|
546
|
+
**filter_keywords(
|
547
|
+
connector.get_sync_time,
|
548
|
+
params=params,
|
549
|
+
newest=newest,
|
550
|
+
remote=remote,
|
551
|
+
debug=debug,
|
552
|
+
)
|
520
553
|
)
|
521
554
|
|
522
555
|
if round_down and isinstance(sync_time, datetime):
|
meerschaum/core/Pipe/_verify.py
CHANGED
@@ -11,6 +11,7 @@ from datetime import datetime, timedelta
|
|
11
11
|
import meerschaum as mrsm
|
12
12
|
from meerschaum.utils.typing import SuccessTuple, Any, Optional, Union, Tuple, Dict
|
13
13
|
from meerschaum.utils.warnings import warn, info
|
14
|
+
from meerschaum.config.static import STATIC_CONFIG
|
14
15
|
|
15
16
|
|
16
17
|
def verify(
|
@@ -22,6 +23,9 @@ def verify(
|
|
22
23
|
bounded: Optional[bool] = None,
|
23
24
|
deduplicate: bool = False,
|
24
25
|
workers: Optional[int] = None,
|
26
|
+
batchsize: Optional[int] = None,
|
27
|
+
skip_chunks_with_greater_rowcounts: bool = False,
|
28
|
+
check_rowcounts_only: bool = False,
|
25
29
|
debug: bool = False,
|
26
30
|
**kwargs: Any
|
27
31
|
) -> SuccessTuple:
|
@@ -53,6 +57,17 @@ def verify(
|
|
53
57
|
If provided, limit the verification to this many threads.
|
54
58
|
Use a value of `1` to sync chunks in series.
|
55
59
|
|
60
|
+
batchsize: Optional[int], default None
|
61
|
+
If provided, sync this many chunks in parallel.
|
62
|
+
Defaults to `Pipe.get_num_workers()`.
|
63
|
+
|
64
|
+
skip_chunks_with_greater_rowcounts: bool, default False
|
65
|
+
If `True`, compare the rowcounts for a chunk and skip syncing if the pipe's
|
66
|
+
chunk rowcount equals or exceeds the remote's rowcount.
|
67
|
+
|
68
|
+
check_rowcounts_only: bool, default False
|
69
|
+
If `True`, only compare rowcounts and print chunks which are out-of-sync.
|
70
|
+
|
56
71
|
debug: bool, default False
|
57
72
|
Verbosity toggle.
|
58
73
|
|
@@ -64,8 +79,10 @@ def verify(
|
|
64
79
|
A SuccessTuple indicating whether the pipe was successfully resynced.
|
65
80
|
"""
|
66
81
|
from meerschaum.utils.pool import get_pool
|
82
|
+
from meerschaum.utils.formatting import make_header
|
67
83
|
from meerschaum.utils.misc import interval_str
|
68
84
|
workers = self.get_num_workers(workers)
|
85
|
+
check_rowcounts = skip_chunks_with_greater_rowcounts or check_rowcounts_only
|
69
86
|
|
70
87
|
### Skip configured bounding in parameters
|
71
88
|
### if `bounded` is explicitly `False`.
|
@@ -83,21 +100,26 @@ def verify(
|
|
83
100
|
if bound_time is not None
|
84
101
|
else self.get_sync_time(newest=False, debug=debug)
|
85
102
|
)
|
103
|
+
if begin is None:
|
104
|
+
remote_oldest_sync_time = self.get_sync_time(newest=False, remote=True, debug=debug)
|
105
|
+
begin = remote_oldest_sync_time
|
86
106
|
if bounded and end is None:
|
87
107
|
end = self.get_sync_time(newest=True, debug=debug)
|
108
|
+
if end is None:
|
109
|
+
remote_newest_sync_time = self.get_sync_time(newest=True, remote=True, debug=debug)
|
110
|
+
end = remote_newest_sync_time
|
111
|
+
if end is not None:
|
112
|
+
end += (
|
113
|
+
timedelta(minutes=1)
|
114
|
+
if hasattr(end, 'tzinfo')
|
115
|
+
else 1
|
116
|
+
)
|
88
117
|
|
89
118
|
begin, end = self.parse_date_bounds(begin, end)
|
90
|
-
|
91
|
-
if bounded and end is not None:
|
92
|
-
end += (
|
93
|
-
timedelta(minutes=1)
|
94
|
-
if isinstance(end, datetime)
|
95
|
-
else 1
|
96
|
-
)
|
97
|
-
|
98
|
-
cannot_determine_bounds = not self.exists(debug=debug)
|
119
|
+
cannot_determine_bounds = bounded and begin is None and end is None
|
99
120
|
|
100
121
|
if cannot_determine_bounds:
|
122
|
+
warn(f"Cannot determine sync bounds for {self}. Syncing instead...", stack=False)
|
101
123
|
sync_success, sync_msg = self.sync(
|
102
124
|
begin=begin,
|
103
125
|
end=end,
|
@@ -160,17 +182,16 @@ def verify(
|
|
160
182
|
else chunk_bounds[-1][0]
|
161
183
|
)
|
162
184
|
)
|
185
|
+
message_header = f"{begin_to_print} - {end_to_print}"
|
163
186
|
|
164
187
|
info(
|
165
188
|
f"Verifying {self}:\n Syncing {len(chunk_bounds)} chunk"
|
166
189
|
+ ('s' if len(chunk_bounds) != 1 else '')
|
167
190
|
+ f" ({'un' if not bounded else ''}bounded)"
|
168
191
|
+ f" of size '{interval_str(chunk_interval)}'"
|
169
|
-
+ f" between '{begin_to_print}' and '{end_to_print}'
|
192
|
+
+ f" between '{begin_to_print}' and '{end_to_print}'.\n"
|
170
193
|
)
|
171
194
|
|
172
|
-
pool = get_pool(workers=workers)
|
173
|
-
|
174
195
|
### Dictionary of the form bounds -> success_tuple, e.g.:
|
175
196
|
### {
|
176
197
|
### (2023-01-01, 2023-01-02): (True, "Success")
|
@@ -180,87 +201,169 @@ def verify(
|
|
180
201
|
chunk_begin_and_end: Tuple[
|
181
202
|
Union[int, datetime],
|
182
203
|
Union[int, datetime]
|
183
|
-
]
|
204
|
+
],
|
205
|
+
_workers: Optional[int] = 1,
|
184
206
|
):
|
185
207
|
if chunk_begin_and_end in bounds_success_tuples:
|
186
208
|
return chunk_begin_and_end, bounds_success_tuples[chunk_begin_and_end]
|
187
209
|
|
188
210
|
chunk_begin, chunk_end = chunk_begin_and_end
|
211
|
+
do_sync = True
|
212
|
+
chunk_success, chunk_msg = False, "Did not sync chunk."
|
213
|
+
if check_rowcounts:
|
214
|
+
existing_rowcount = self.get_rowcount(begin=chunk_begin, end=chunk_end, debug=debug)
|
215
|
+
remote_rowcount = self.get_rowcount(
|
216
|
+
begin=chunk_begin,
|
217
|
+
end=chunk_end,
|
218
|
+
remote=True,
|
219
|
+
debug=debug,
|
220
|
+
)
|
221
|
+
checked_rows_str = (
|
222
|
+
f"checked {existing_rowcount} row"
|
223
|
+
+ ("s" if existing_rowcount != 1 else '')
|
224
|
+
+ f" vs {remote_rowcount} remote"
|
225
|
+
)
|
226
|
+
if (
|
227
|
+
existing_rowcount is not None
|
228
|
+
and remote_rowcount is not None
|
229
|
+
and existing_rowcount >= remote_rowcount
|
230
|
+
):
|
231
|
+
do_sync = False
|
232
|
+
chunk_success, chunk_msg = True, (
|
233
|
+
"Row-count is up-to-date "
|
234
|
+
f"({checked_rows_str})."
|
235
|
+
)
|
236
|
+
elif check_rowcounts_only:
|
237
|
+
do_sync = False
|
238
|
+
chunk_success, chunk_msg = True, (
|
239
|
+
f"Row-counts are out-of-sync ({checked_rows_str})."
|
240
|
+
)
|
241
|
+
|
189
242
|
chunk_success, chunk_msg = self.sync(
|
190
243
|
begin=chunk_begin,
|
191
244
|
end=chunk_end,
|
192
245
|
params=params,
|
193
|
-
workers=
|
246
|
+
workers=_workers,
|
194
247
|
debug=debug,
|
195
248
|
**kwargs
|
196
|
-
)
|
249
|
+
) if do_sync else (chunk_success, chunk_msg)
|
197
250
|
chunk_msg = chunk_msg.strip()
|
198
251
|
if ' - ' not in chunk_msg:
|
199
252
|
chunk_label = f"{chunk_begin} - {chunk_end}"
|
200
|
-
chunk_msg = f'{chunk_label}\n{chunk_msg}'
|
253
|
+
chunk_msg = f'Verified chunk for {self}:\n{chunk_label}\n{chunk_msg}'
|
201
254
|
mrsm.pprint((chunk_success, chunk_msg))
|
202
255
|
return chunk_begin_and_end, (chunk_success, chunk_msg)
|
203
256
|
|
204
257
|
### If we have more than one chunk, attempt to sync the first one and return if its fails.
|
205
258
|
if len(chunk_bounds) > 1:
|
206
259
|
first_chunk_bounds = chunk_bounds[0]
|
260
|
+
first_label = f"{first_chunk_bounds[0]} - {first_chunk_bounds[1]}"
|
261
|
+
info(f"Verifying first chunk for {self}:\n {first_label}")
|
207
262
|
(
|
208
263
|
(first_begin, first_end),
|
209
264
|
(first_success, first_msg)
|
210
|
-
) = process_chunk_bounds(first_chunk_bounds)
|
265
|
+
) = process_chunk_bounds(first_chunk_bounds, _workers=workers)
|
211
266
|
if not first_success:
|
212
267
|
return (
|
213
268
|
first_success,
|
214
|
-
f"\n{
|
269
|
+
f"\n{first_label}\n"
|
215
270
|
+ f"Failed to sync first chunk:\n{first_msg}"
|
216
271
|
)
|
217
272
|
bounds_success_tuples[first_chunk_bounds] = (first_success, first_msg)
|
273
|
+
info(f"Completed first chunk for {self}:\n {first_label}\n")
|
218
274
|
|
219
|
-
|
220
|
-
|
275
|
+
pool = get_pool(workers=workers)
|
276
|
+
batches = self.get_chunk_bounds_batches(chunk_bounds, batchsize=batchsize, workers=workers)
|
221
277
|
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
278
|
+
def process_batch(
|
279
|
+
batch_chunk_bounds: Tuple[
|
280
|
+
Tuple[Union[datetime, int, None], Union[datetime, int, None]],
|
281
|
+
...
|
282
|
+
]
|
283
|
+
):
|
284
|
+
_batch_begin = batch_chunk_bounds[0][0]
|
285
|
+
_batch_end = batch_chunk_bounds[-1][-1]
|
286
|
+
batch_message_header = f"{_batch_begin} - {_batch_end}"
|
287
|
+
batch_bounds_success_tuples = dict(pool.map(process_chunk_bounds, batch_chunk_bounds))
|
288
|
+
bounds_success_tuples.update(batch_bounds_success_tuples)
|
289
|
+
batch_bounds_success_bools = {
|
290
|
+
bounds: tup[0]
|
291
|
+
for bounds, tup in batch_bounds_success_tuples.items()
|
292
|
+
}
|
293
|
+
|
294
|
+
if all(batch_bounds_success_bools.values()):
|
295
|
+
msg = get_chunks_success_message(
|
296
|
+
batch_bounds_success_tuples,
|
297
|
+
header=batch_message_header,
|
298
|
+
check_rowcounts_only=check_rowcounts_only,
|
299
|
+
)
|
300
|
+
if deduplicate:
|
301
|
+
deduplicate_success, deduplicate_msg = self.deduplicate(
|
302
|
+
begin=_batch_begin,
|
303
|
+
end=_batch_end,
|
304
|
+
params=params,
|
305
|
+
workers=workers,
|
306
|
+
debug=debug,
|
307
|
+
**kwargs
|
308
|
+
)
|
309
|
+
return deduplicate_success, msg + '\n\n' + deduplicate_msg
|
310
|
+
return True, msg
|
311
|
+
|
312
|
+
batch_chunk_bounds_to_resync = [
|
313
|
+
bounds
|
314
|
+
for bounds, success in zip(batch_chunk_bounds, batch_bounds_success_bools)
|
315
|
+
if not success
|
316
|
+
]
|
317
|
+
batch_bounds_to_print = [
|
318
|
+
f"{bounds[0]} - {bounds[1]}"
|
319
|
+
for bounds in batch_chunk_bounds_to_resync
|
320
|
+
]
|
321
|
+
if batch_bounds_to_print:
|
322
|
+
warn(
|
323
|
+
"Will resync the following failed chunks:\n "
|
324
|
+
+ '\n '.join(batch_bounds_to_print),
|
325
|
+
stack=False,
|
233
326
|
)
|
234
|
-
return deduplicate_success, msg + '\n\n' + deduplicate_msg
|
235
|
-
return True, msg
|
236
|
-
|
237
|
-
chunk_bounds_to_resync = [
|
238
|
-
bounds
|
239
|
-
for bounds, success in zip(chunk_bounds, bounds_success_bools)
|
240
|
-
if not success
|
241
|
-
]
|
242
|
-
bounds_to_print = [
|
243
|
-
f"{bounds[0]} - {bounds[1]}"
|
244
|
-
for bounds in chunk_bounds_to_resync
|
245
|
-
]
|
246
|
-
if bounds_to_print:
|
247
|
-
warn(
|
248
|
-
f"Will resync the following failed chunks:\n "
|
249
|
-
+ '\n '.join(bounds_to_print),
|
250
|
-
stack=False,
|
251
|
-
)
|
252
|
-
|
253
|
-
retry_bounds_success_tuples = dict(pool.map(process_chunk_bounds, chunk_bounds_to_resync))
|
254
|
-
bounds_success_tuples.update(retry_bounds_success_tuples)
|
255
|
-
retry_bounds_success_bools = {
|
256
|
-
bounds: tup[0]
|
257
|
-
for bounds, tup in retry_bounds_success_tuples.items()
|
258
|
-
}
|
259
327
|
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
328
|
+
retry_bounds_success_tuples = dict(pool.map(
|
329
|
+
process_chunk_bounds,
|
330
|
+
batch_chunk_bounds_to_resync
|
331
|
+
))
|
332
|
+
batch_bounds_success_tuples.update(retry_bounds_success_tuples)
|
333
|
+
bounds_success_tuples.update(retry_bounds_success_tuples)
|
334
|
+
retry_bounds_success_bools = {
|
335
|
+
bounds: tup[0]
|
336
|
+
for bounds, tup in retry_bounds_success_tuples.items()
|
337
|
+
}
|
338
|
+
|
339
|
+
if all(retry_bounds_success_bools.values()):
|
340
|
+
chunks_message = (
|
341
|
+
get_chunks_success_message(
|
342
|
+
batch_bounds_success_tuples,
|
343
|
+
header=batch_message_header,
|
344
|
+
check_rowcounts_only=check_rowcounts_only,
|
345
|
+
) + f"\nRetried {len(batch_chunk_bounds_to_resync)} chunk" + (
|
346
|
+
's'
|
347
|
+
if len(batch_chunk_bounds_to_resync) != 1
|
348
|
+
else ''
|
349
|
+
) + "."
|
350
|
+
)
|
351
|
+
if deduplicate:
|
352
|
+
deduplicate_success, deduplicate_msg = self.deduplicate(
|
353
|
+
begin=_batch_begin,
|
354
|
+
end=_batch_end,
|
355
|
+
params=params,
|
356
|
+
workers=workers,
|
357
|
+
debug=debug,
|
358
|
+
**kwargs
|
359
|
+
)
|
360
|
+
return deduplicate_success, chunks_message + '\n\n' + deduplicate_msg
|
361
|
+
return True, chunks_message
|
362
|
+
|
363
|
+
batch_chunks_message = get_chunks_success_message(
|
364
|
+
batch_bounds_success_tuples,
|
365
|
+
header=batch_message_header,
|
366
|
+
check_rowcounts_only=check_rowcounts_only,
|
264
367
|
)
|
265
368
|
if deduplicate:
|
266
369
|
deduplicate_success, deduplicate_msg = self.deduplicate(
|
@@ -271,26 +374,57 @@ def verify(
|
|
271
374
|
debug=debug,
|
272
375
|
**kwargs
|
273
376
|
)
|
274
|
-
return deduplicate_success,
|
275
|
-
return
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
377
|
+
return deduplicate_success, batch_chunks_message + '\n\n' + deduplicate_msg
|
378
|
+
return False, batch_chunks_message
|
379
|
+
|
380
|
+
num_batches = len(batches)
|
381
|
+
for batch_i, batch in enumerate(batches):
|
382
|
+
batch_begin = batch[0][0]
|
383
|
+
batch_end = batch[-1][-1]
|
384
|
+
batch_counter_str = f"({(batch_i + 1):,}/{num_batches:,})"
|
385
|
+
batch_label = f"batch {batch_counter_str}:\n{batch_begin} - {batch_end}"
|
386
|
+
retry_failed_batch = True
|
387
|
+
try:
|
388
|
+
for_self = 'for ' + str(self)
|
389
|
+
info(f"Verifying {batch_label.replace(':\n', ' ' + for_self + '...\n ')}\n")
|
390
|
+
batch_success, batch_msg = process_batch(batch)
|
391
|
+
except (KeyboardInterrupt, Exception) as e:
|
392
|
+
batch_success = False
|
393
|
+
batch_msg = str(e)
|
394
|
+
retry_failed_batch = False
|
395
|
+
|
396
|
+
batch_msg_to_print = (
|
397
|
+
f"{make_header('Completed batch ' + batch_counter_str + ' ' + for_self + ':')}\n{batch_msg}"
|
286
398
|
)
|
287
|
-
|
288
|
-
|
399
|
+
mrsm.pprint((batch_success, batch_msg_to_print))
|
400
|
+
|
401
|
+
if not batch_success and retry_failed_batch:
|
402
|
+
info(f"Retrying batch {batch_counter_str}...")
|
403
|
+
retry_batch_success, retry_batch_msg = process_batch(batch)
|
404
|
+
retry_batch_msg_to_print = (
|
405
|
+
f"Retried {make_header('batch ' + batch_label)}\n{retry_batch_msg}"
|
406
|
+
)
|
407
|
+
mrsm.pprint((retry_batch_success, retry_batch_msg_to_print))
|
408
|
+
|
409
|
+
batch_success = retry_batch_success
|
410
|
+
batch_msg = retry_batch_msg
|
411
|
+
|
412
|
+
if not batch_success:
|
413
|
+
return False, f"Failed to verify {batch_label}:\n\n{batch_msg}"
|
414
|
+
|
415
|
+
chunks_message = get_chunks_success_message(
|
416
|
+
bounds_success_tuples,
|
417
|
+
header=message_header,
|
418
|
+
check_rowcounts_only=check_rowcounts_only,
|
419
|
+
)
|
420
|
+
return True, chunks_message
|
421
|
+
|
289
422
|
|
290
423
|
|
291
424
|
def get_chunks_success_message(
|
292
425
|
chunk_success_tuples: Dict[Tuple[Any, Any], SuccessTuple],
|
293
426
|
header: str = '',
|
427
|
+
check_rowcounts_only: bool = False,
|
294
428
|
) -> str:
|
295
429
|
"""
|
296
430
|
Sum together all of the inserts and updates from the chunks.
|
@@ -319,10 +453,19 @@ def get_chunks_success_message(
|
|
319
453
|
inserts = [stat['inserted'] for stat in chunk_stats]
|
320
454
|
updates = [stat['updated'] for stat in chunk_stats]
|
321
455
|
upserts = [stat['upserted'] for stat in chunk_stats]
|
456
|
+
checks = [stat['checked'] for stat in chunk_stats]
|
457
|
+
out_of_sync_bounds_messages = {
|
458
|
+
bounds: message
|
459
|
+
for bounds, (success, message) in chunk_success_tuples.items()
|
460
|
+
if 'out-of-sync' in message
|
461
|
+
} if check_rowcounts_only else {}
|
462
|
+
|
322
463
|
num_inserted = sum(inserts)
|
323
464
|
num_updated = sum(updates)
|
324
465
|
num_upserted = sum(upserts)
|
466
|
+
num_checked = sum(checks)
|
325
467
|
num_fails = len(fail_chunk_bounds_tuples)
|
468
|
+
num_out_of_sync = len(out_of_sync_bounds_messages)
|
326
469
|
|
327
470
|
header = (header + "\n") if header else ""
|
328
471
|
stats_msg = items_str(
|
@@ -330,22 +473,38 @@ def get_chunks_success_message(
|
|
330
473
|
([f'inserted {num_inserted:,}'] if num_inserted else [])
|
331
474
|
+ ([f'updated {num_updated:,}'] if num_updated else [])
|
332
475
|
+ ([f'upserted {num_upserted:,}'] if num_upserted else [])
|
476
|
+
+ ([f'checked {num_checked:,}'] if num_checked else [])
|
333
477
|
) or ['synced 0'],
|
334
478
|
quotes=False,
|
335
479
|
and_=False,
|
336
480
|
)
|
337
481
|
|
338
482
|
success_msg = (
|
339
|
-
|
483
|
+
"Successfully "
|
484
|
+
+ ('synced' if not check_rowcounts_only else 'checked')
|
485
|
+
+ f" {len(chunk_success_tuples):,} chunk"
|
340
486
|
+ ('s' if len(chunk_success_tuples) != 1 else '')
|
341
487
|
+ '\n(' + stats_msg
|
342
488
|
+ ' rows in total).'
|
343
489
|
)
|
490
|
+
if check_rowcounts_only:
|
491
|
+
success_msg += (
|
492
|
+
f"\n\nFound {num_out_of_sync} chunk"
|
493
|
+
+ ('s' if num_out_of_sync != 1 else '')
|
494
|
+
+ ' to be out-of-sync'
|
495
|
+
+ ('.' if num_out_of_sync == 0 else ':\n\n ')
|
496
|
+
+ '\n '.join(
|
497
|
+
[
|
498
|
+
f'{lbound} - {rbound}'
|
499
|
+
for lbound, rbound in out_of_sync_bounds_messages
|
500
|
+
]
|
501
|
+
)
|
502
|
+
)
|
344
503
|
fail_msg = (
|
345
504
|
''
|
346
505
|
if num_fails == 0
|
347
506
|
else (
|
348
|
-
f"\n\nFailed to sync {num_fails} chunk"
|
507
|
+
f"\n\nFailed to sync {num_fails:,} chunk"
|
349
508
|
+ ('s' if num_fails != 1 else '') + ":\n"
|
350
509
|
+ '\n'.join([
|
351
510
|
f"{fail_begin} - {fail_end}\n{msg}\n"
|
@@ -436,9 +595,15 @@ def get_bound_time(self, debug: bool = False) -> Union[datetime, int, None]:
|
|
436
595
|
|
437
596
|
bound_time = sync_time - bound_interval
|
438
597
|
oldest_sync_time = self.get_sync_time(newest=False, debug=debug)
|
598
|
+
max_bound_time_days = STATIC_CONFIG['pipes']['max_bound_time_days']
|
599
|
+
|
600
|
+
extreme_sync_times_delta = (
|
601
|
+
hasattr(oldest_sync_time, 'tzinfo')
|
602
|
+
and (sync_time - oldest_sync_time) >= timedelta(days=max_bound_time_days)
|
603
|
+
)
|
439
604
|
|
440
605
|
return (
|
441
606
|
bound_time
|
442
|
-
if bound_time > oldest_sync_time
|
607
|
+
if bound_time > oldest_sync_time or extreme_sync_times_delta
|
443
608
|
else None
|
444
609
|
)
|
meerschaum/core/User/__init__.py
CHANGED
@@ -9,7 +9,7 @@ Manager users' metadata via the User class
|
|
9
9
|
from typing import Optional
|
10
10
|
|
11
11
|
import meerschaum as mrsm
|
12
|
-
from meerschaum.core.User._User import User
|
12
|
+
from meerschaum.core.User._User import User
|
13
13
|
|
14
14
|
|
15
15
|
def is_user_allowed_to_execute(
|
@@ -19,8 +19,6 @@ def is_user_allowed_to_execute(
|
|
19
19
|
"""
|
20
20
|
Return a `SuccessTuple` indicating whether a given user is allowed to execute actions.
|
21
21
|
"""
|
22
|
-
print(f"{debug=}")
|
23
|
-
print(f"{user=}")
|
24
22
|
if user is None:
|
25
23
|
return True, "Success"
|
26
24
|
|
@@ -29,9 +27,7 @@ def is_user_allowed_to_execute(
|
|
29
27
|
if user_type == 'admin':
|
30
28
|
return True, "Success"
|
31
29
|
|
32
|
-
|
33
|
-
|
34
|
-
allow_non_admin = get_config('system', 'api', 'permissions', 'actions', 'non_admin')
|
30
|
+
allow_non_admin = mrsm.get_config('system', 'api', 'permissions', 'actions', 'non_admin')
|
35
31
|
if not allow_non_admin:
|
36
32
|
return False, "The administrator for this server has not allowed users to perform actions."
|
37
33
|
|
meerschaum/jobs/_Job.py
CHANGED
meerschaum/jobs/__init__.py
CHANGED
@@ -150,6 +150,13 @@ def get_filtered_jobs(
|
|
150
150
|
}
|
151
151
|
|
152
152
|
jobs_to_return = {}
|
153
|
+
filter_list_without_underscores = [name for name in filter_list if not name.startswith('_')]
|
154
|
+
filter_list_with_underscores = [name for name in filter_list if name.startswith('_')]
|
155
|
+
if (
|
156
|
+
filter_list_without_underscores and not filter_list_with_underscores
|
157
|
+
or filter_list_with_underscores and not filter_list_without_underscores
|
158
|
+
):
|
159
|
+
pass
|
153
160
|
for name in filter_list:
|
154
161
|
job = jobs.get(name, None)
|
155
162
|
if job is None:
|
@@ -161,6 +168,14 @@ def get_filtered_jobs(
|
|
161
168
|
continue
|
162
169
|
jobs_to_return[name] = job
|
163
170
|
|
171
|
+
if not jobs_to_return and filter_list_with_underscores:
|
172
|
+
names_to_exclude = [name.lstrip('_') for name in filter_list_with_underscores]
|
173
|
+
return {
|
174
|
+
name: job
|
175
|
+
for name, job in jobs.items()
|
176
|
+
if name not in names_to_exclude
|
177
|
+
}
|
178
|
+
|
164
179
|
return jobs_to_return
|
165
180
|
|
166
181
|
|