meerschaum 2.7.10__py3-none-any.whl → 2.8.0__py3-none-any.whl
Sign up to get free protection for your applications and to get access to all the features.
- meerschaum/_internal/arguments/_parser.py +17 -5
- meerschaum/actions/copy.py +3 -1
- meerschaum/actions/index.py +1 -1
- meerschaum/actions/show.py +7 -7
- meerschaum/actions/sync.py +5 -1
- meerschaum/actions/verify.py +14 -1
- meerschaum/api/__init__.py +77 -41
- meerschaum/api/_exceptions.py +18 -0
- meerschaum/api/dash/__init__.py +4 -2
- meerschaum/api/dash/callbacks/dashboard.py +30 -1
- meerschaum/api/dash/components.py +2 -2
- meerschaum/api/dash/webterm.py +23 -4
- meerschaum/api/models/_pipes.py +8 -8
- meerschaum/api/resources/static/css/dash.css +2 -2
- meerschaum/api/resources/templates/termpage.html +5 -1
- meerschaum/api/routes/__init__.py +15 -12
- meerschaum/api/routes/_connectors.py +30 -28
- meerschaum/api/routes/_index.py +16 -7
- meerschaum/api/routes/_misc.py +30 -22
- meerschaum/api/routes/_pipes.py +244 -148
- meerschaum/api/routes/_plugins.py +58 -47
- meerschaum/api/routes/_users.py +39 -31
- meerschaum/api/routes/_version.py +8 -10
- meerschaum/config/_default.py +10 -0
- meerschaum/config/_version.py +1 -1
- meerschaum/config/static/__init__.py +4 -1
- meerschaum/connectors/api/_APIConnector.py +4 -3
- meerschaum/connectors/api/_login.py +21 -17
- meerschaum/connectors/api/_pipes.py +1 -0
- meerschaum/connectors/api/_request.py +9 -10
- meerschaum/connectors/sql/_cli.py +11 -3
- meerschaum/connectors/sql/_instance.py +1 -1
- meerschaum/connectors/sql/_pipes.py +77 -57
- meerschaum/connectors/sql/_sql.py +26 -9
- meerschaum/core/Pipe/__init__.py +2 -0
- meerschaum/core/Pipe/_attributes.py +13 -2
- meerschaum/core/Pipe/_data.py +85 -0
- meerschaum/core/Pipe/_deduplicate.py +6 -8
- meerschaum/core/Pipe/_sync.py +63 -30
- meerschaum/core/Pipe/_verify.py +242 -77
- meerschaum/core/User/__init__.py +2 -6
- meerschaum/jobs/_Job.py +1 -1
- meerschaum/jobs/__init__.py +15 -0
- meerschaum/utils/dataframe.py +2 -0
- meerschaum/utils/dtypes/sql.py +26 -0
- meerschaum/utils/formatting/_pipes.py +1 -1
- meerschaum/utils/misc.py +11 -7
- meerschaum/utils/packages/_packages.py +1 -1
- meerschaum/utils/sql.py +6 -2
- {meerschaum-2.7.10.dist-info → meerschaum-2.8.0.dist-info}/METADATA +4 -4
- {meerschaum-2.7.10.dist-info → meerschaum-2.8.0.dist-info}/RECORD +57 -56
- {meerschaum-2.7.10.dist-info → meerschaum-2.8.0.dist-info}/LICENSE +0 -0
- {meerschaum-2.7.10.dist-info → meerschaum-2.8.0.dist-info}/NOTICE +0 -0
- {meerschaum-2.7.10.dist-info → meerschaum-2.8.0.dist-info}/WHEEL +0 -0
- {meerschaum-2.7.10.dist-info → meerschaum-2.8.0.dist-info}/entry_points.txt +0 -0
- {meerschaum-2.7.10.dist-info → meerschaum-2.8.0.dist-info}/top_level.txt +0 -0
- {meerschaum-2.7.10.dist-info → meerschaum-2.8.0.dist-info}/zip-safe +0 -0
meerschaum/core/Pipe/_sync.py
CHANGED
@@ -319,16 +319,42 @@ def sync(
|
|
319
319
|
if debug:
|
320
320
|
dprint("Successfully synced the first chunk, attemping the rest...")
|
321
321
|
|
322
|
-
failed_chunks = []
|
323
322
|
def _process_chunk(_chunk):
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
328
|
-
|
329
|
-
|
323
|
+
_chunk_attempts = 0
|
324
|
+
_max_chunk_attempts = 3
|
325
|
+
while _chunk_attempts < _max_chunk_attempts:
|
326
|
+
try:
|
327
|
+
_chunk_success, _chunk_msg = _sync(p, _chunk)
|
328
|
+
except Exception as e:
|
329
|
+
_chunk_success, _chunk_msg = False, str(e)
|
330
|
+
if _chunk_success:
|
331
|
+
break
|
332
|
+
_chunk_attempts += 1
|
333
|
+
_sleep_seconds = _chunk_attempts ** 2
|
334
|
+
warn(
|
335
|
+
(
|
336
|
+
f"Failed to sync chunk to {self} "
|
337
|
+
+ f"(attempt {_chunk_attempts} / {_max_chunk_attempts}).\n"
|
338
|
+
+ f"Sleeping for {_sleep_seconds} second"
|
339
|
+
+ ('s' if _sleep_seconds != 1 else '')
|
340
|
+
+ ":\n{_chunk_msg}"
|
341
|
+
),
|
342
|
+
stack=False,
|
343
|
+
)
|
344
|
+
time.sleep(_sleep_seconds)
|
345
|
+
|
346
|
+
num_rows_str = (
|
347
|
+
f"{num_rows:,} rows"
|
348
|
+
if (num_rows := len(_chunk)) != 1
|
349
|
+
else f"{num_rows} row"
|
350
|
+
)
|
330
351
|
_chunk_msg = (
|
331
|
-
|
352
|
+
(
|
353
|
+
"Synced"
|
354
|
+
if _chunk_success
|
355
|
+
else "Failed to sync"
|
356
|
+
) + f" a chunk ({num_rows_str}) to {p}:\n"
|
357
|
+
+ self._get_chunk_label(_chunk, dt_col)
|
332
358
|
+ '\n'
|
333
359
|
+ _chunk_msg
|
334
360
|
)
|
@@ -351,27 +377,16 @@ def sync(
|
|
351
377
|
)
|
352
378
|
chunk_messages = [chunk_msg for _, chunk_msg in results]
|
353
379
|
success_bools = [chunk_success for chunk_success, _ in results]
|
380
|
+
num_successes = len([chunk_success for chunk_success, _ in results if chunk_success])
|
381
|
+
num_failures = len([chunk_success for chunk_success, _ in results if not chunk_success])
|
354
382
|
success = all(success_bools)
|
355
383
|
msg = (
|
356
|
-
|
384
|
+
'Synced '
|
385
|
+
+ f'{len(chunk_messages):,} chunk'
|
357
386
|
+ ('s' if len(chunk_messages) != 1 else '')
|
358
|
-
+ f' to {p}:\n\n'
|
387
|
+
+ f' to {p}\n({num_successes} succeeded, {num_failures} failed):\n\n'
|
359
388
|
+ '\n\n'.join(chunk_messages).lstrip().rstrip()
|
360
389
|
).lstrip().rstrip()
|
361
|
-
|
362
|
-
### If some chunks succeeded, retry the failures.
|
363
|
-
retry_success = True
|
364
|
-
if not success and any(success_bools):
|
365
|
-
if debug:
|
366
|
-
dprint("Retrying failed chunks...")
|
367
|
-
chunks_to_retry = [c for c in failed_chunks]
|
368
|
-
failed_chunks = []
|
369
|
-
for chunk in chunks_to_retry:
|
370
|
-
chunk_success, chunk_msg = _process_chunk(chunk)
|
371
|
-
msg += f"\n\nRetried chunk:\n{chunk_msg}\n"
|
372
|
-
retry_success = retry_success and chunk_success
|
373
|
-
|
374
|
-
success = success and retry_success
|
375
390
|
return success, msg
|
376
391
|
|
377
392
|
### Cast to a dataframe and ensure datatypes are what we expect.
|
@@ -474,6 +489,7 @@ def get_sync_time(
|
|
474
489
|
params: Optional[Dict[str, Any]] = None,
|
475
490
|
newest: bool = True,
|
476
491
|
apply_backtrack_interval: bool = False,
|
492
|
+
remote: bool = False,
|
477
493
|
round_down: bool = False,
|
478
494
|
debug: bool = False
|
479
495
|
) -> Union['datetime', int, None]:
|
@@ -493,6 +509,10 @@ def get_sync_time(
|
|
493
509
|
apply_backtrack_interval: bool, default False
|
494
510
|
If `True`, subtract the backtrack interval from the sync time.
|
495
511
|
|
512
|
+
remote: bool, default False
|
513
|
+
If `True` and the instance connector supports it, return the sync time
|
514
|
+
for the remote table definition.
|
515
|
+
|
496
516
|
round_down: bool, default False
|
497
517
|
If `True`, round down the datetime value to the nearest minute.
|
498
518
|
|
@@ -506,17 +526,30 @@ def get_sync_time(
|
|
506
526
|
"""
|
507
527
|
from meerschaum.utils.venv import Venv
|
508
528
|
from meerschaum.connectors import get_connector_plugin
|
509
|
-
from meerschaum.utils.misc import round_time
|
529
|
+
from meerschaum.utils.misc import round_time, filter_keywords
|
530
|
+
from meerschaum.utils.warnings import warn
|
510
531
|
|
511
532
|
if not self.columns.get('datetime', None):
|
512
533
|
return None
|
513
534
|
|
514
|
-
|
515
|
-
|
535
|
+
connector = self.instance_connector if not remote else self.connector
|
536
|
+
with Venv(get_connector_plugin(connector)):
|
537
|
+
if not hasattr(connector, 'get_sync_time'):
|
538
|
+
warn(
|
539
|
+
f"Connectors of type '{connector.type}' "
|
540
|
+
"do not implement `get_sync_time().",
|
541
|
+
stack=False,
|
542
|
+
)
|
543
|
+
return None
|
544
|
+
sync_time = connector.get_sync_time(
|
516
545
|
self,
|
517
|
-
|
518
|
-
|
519
|
-
|
546
|
+
**filter_keywords(
|
547
|
+
connector.get_sync_time,
|
548
|
+
params=params,
|
549
|
+
newest=newest,
|
550
|
+
remote=remote,
|
551
|
+
debug=debug,
|
552
|
+
)
|
520
553
|
)
|
521
554
|
|
522
555
|
if round_down and isinstance(sync_time, datetime):
|
meerschaum/core/Pipe/_verify.py
CHANGED
@@ -11,6 +11,7 @@ from datetime import datetime, timedelta
|
|
11
11
|
import meerschaum as mrsm
|
12
12
|
from meerschaum.utils.typing import SuccessTuple, Any, Optional, Union, Tuple, Dict
|
13
13
|
from meerschaum.utils.warnings import warn, info
|
14
|
+
from meerschaum.config.static import STATIC_CONFIG
|
14
15
|
|
15
16
|
|
16
17
|
def verify(
|
@@ -22,6 +23,9 @@ def verify(
|
|
22
23
|
bounded: Optional[bool] = None,
|
23
24
|
deduplicate: bool = False,
|
24
25
|
workers: Optional[int] = None,
|
26
|
+
batchsize: Optional[int] = None,
|
27
|
+
skip_chunks_with_greater_rowcounts: bool = False,
|
28
|
+
check_rowcounts_only: bool = False,
|
25
29
|
debug: bool = False,
|
26
30
|
**kwargs: Any
|
27
31
|
) -> SuccessTuple:
|
@@ -53,6 +57,17 @@ def verify(
|
|
53
57
|
If provided, limit the verification to this many threads.
|
54
58
|
Use a value of `1` to sync chunks in series.
|
55
59
|
|
60
|
+
batchsize: Optional[int], default None
|
61
|
+
If provided, sync this many chunks in parallel.
|
62
|
+
Defaults to `Pipe.get_num_workers()`.
|
63
|
+
|
64
|
+
skip_chunks_with_greater_rowcounts: bool, default False
|
65
|
+
If `True`, compare the rowcounts for a chunk and skip syncing if the pipe's
|
66
|
+
chunk rowcount equals or exceeds the remote's rowcount.
|
67
|
+
|
68
|
+
check_rowcounts_only: bool, default False
|
69
|
+
If `True`, only compare rowcounts and print chunks which are out-of-sync.
|
70
|
+
|
56
71
|
debug: bool, default False
|
57
72
|
Verbosity toggle.
|
58
73
|
|
@@ -64,8 +79,10 @@ def verify(
|
|
64
79
|
A SuccessTuple indicating whether the pipe was successfully resynced.
|
65
80
|
"""
|
66
81
|
from meerschaum.utils.pool import get_pool
|
82
|
+
from meerschaum.utils.formatting import make_header
|
67
83
|
from meerschaum.utils.misc import interval_str
|
68
84
|
workers = self.get_num_workers(workers)
|
85
|
+
check_rowcounts = skip_chunks_with_greater_rowcounts or check_rowcounts_only
|
69
86
|
|
70
87
|
### Skip configured bounding in parameters
|
71
88
|
### if `bounded` is explicitly `False`.
|
@@ -83,21 +100,26 @@ def verify(
|
|
83
100
|
if bound_time is not None
|
84
101
|
else self.get_sync_time(newest=False, debug=debug)
|
85
102
|
)
|
103
|
+
if begin is None:
|
104
|
+
remote_oldest_sync_time = self.get_sync_time(newest=False, remote=True, debug=debug)
|
105
|
+
begin = remote_oldest_sync_time
|
86
106
|
if bounded and end is None:
|
87
107
|
end = self.get_sync_time(newest=True, debug=debug)
|
108
|
+
if end is None:
|
109
|
+
remote_newest_sync_time = self.get_sync_time(newest=True, remote=True, debug=debug)
|
110
|
+
end = remote_newest_sync_time
|
111
|
+
if end is not None:
|
112
|
+
end += (
|
113
|
+
timedelta(minutes=1)
|
114
|
+
if hasattr(end, 'tzinfo')
|
115
|
+
else 1
|
116
|
+
)
|
88
117
|
|
89
118
|
begin, end = self.parse_date_bounds(begin, end)
|
90
|
-
|
91
|
-
if bounded and end is not None:
|
92
|
-
end += (
|
93
|
-
timedelta(minutes=1)
|
94
|
-
if isinstance(end, datetime)
|
95
|
-
else 1
|
96
|
-
)
|
97
|
-
|
98
|
-
cannot_determine_bounds = not self.exists(debug=debug)
|
119
|
+
cannot_determine_bounds = bounded and begin is None and end is None
|
99
120
|
|
100
121
|
if cannot_determine_bounds:
|
122
|
+
warn(f"Cannot determine sync bounds for {self}. Syncing instead...", stack=False)
|
101
123
|
sync_success, sync_msg = self.sync(
|
102
124
|
begin=begin,
|
103
125
|
end=end,
|
@@ -160,17 +182,16 @@ def verify(
|
|
160
182
|
else chunk_bounds[-1][0]
|
161
183
|
)
|
162
184
|
)
|
185
|
+
message_header = f"{begin_to_print} - {end_to_print}"
|
163
186
|
|
164
187
|
info(
|
165
188
|
f"Verifying {self}:\n Syncing {len(chunk_bounds)} chunk"
|
166
189
|
+ ('s' if len(chunk_bounds) != 1 else '')
|
167
190
|
+ f" ({'un' if not bounded else ''}bounded)"
|
168
191
|
+ f" of size '{interval_str(chunk_interval)}'"
|
169
|
-
+ f" between '{begin_to_print}' and '{end_to_print}'
|
192
|
+
+ f" between '{begin_to_print}' and '{end_to_print}'.\n"
|
170
193
|
)
|
171
194
|
|
172
|
-
pool = get_pool(workers=workers)
|
173
|
-
|
174
195
|
### Dictionary of the form bounds -> success_tuple, e.g.:
|
175
196
|
### {
|
176
197
|
### (2023-01-01, 2023-01-02): (True, "Success")
|
@@ -180,87 +201,169 @@ def verify(
|
|
180
201
|
chunk_begin_and_end: Tuple[
|
181
202
|
Union[int, datetime],
|
182
203
|
Union[int, datetime]
|
183
|
-
]
|
204
|
+
],
|
205
|
+
_workers: Optional[int] = 1,
|
184
206
|
):
|
185
207
|
if chunk_begin_and_end in bounds_success_tuples:
|
186
208
|
return chunk_begin_and_end, bounds_success_tuples[chunk_begin_and_end]
|
187
209
|
|
188
210
|
chunk_begin, chunk_end = chunk_begin_and_end
|
211
|
+
do_sync = True
|
212
|
+
chunk_success, chunk_msg = False, "Did not sync chunk."
|
213
|
+
if check_rowcounts:
|
214
|
+
existing_rowcount = self.get_rowcount(begin=chunk_begin, end=chunk_end, debug=debug)
|
215
|
+
remote_rowcount = self.get_rowcount(
|
216
|
+
begin=chunk_begin,
|
217
|
+
end=chunk_end,
|
218
|
+
remote=True,
|
219
|
+
debug=debug,
|
220
|
+
)
|
221
|
+
checked_rows_str = (
|
222
|
+
f"checked {existing_rowcount} row"
|
223
|
+
+ ("s" if existing_rowcount != 1 else '')
|
224
|
+
+ f" vs {remote_rowcount} remote"
|
225
|
+
)
|
226
|
+
if (
|
227
|
+
existing_rowcount is not None
|
228
|
+
and remote_rowcount is not None
|
229
|
+
and existing_rowcount >= remote_rowcount
|
230
|
+
):
|
231
|
+
do_sync = False
|
232
|
+
chunk_success, chunk_msg = True, (
|
233
|
+
"Row-count is up-to-date "
|
234
|
+
f"({checked_rows_str})."
|
235
|
+
)
|
236
|
+
elif check_rowcounts_only:
|
237
|
+
do_sync = False
|
238
|
+
chunk_success, chunk_msg = True, (
|
239
|
+
f"Row-counts are out-of-sync ({checked_rows_str})."
|
240
|
+
)
|
241
|
+
|
189
242
|
chunk_success, chunk_msg = self.sync(
|
190
243
|
begin=chunk_begin,
|
191
244
|
end=chunk_end,
|
192
245
|
params=params,
|
193
|
-
workers=
|
246
|
+
workers=_workers,
|
194
247
|
debug=debug,
|
195
248
|
**kwargs
|
196
|
-
)
|
249
|
+
) if do_sync else (chunk_success, chunk_msg)
|
197
250
|
chunk_msg = chunk_msg.strip()
|
198
251
|
if ' - ' not in chunk_msg:
|
199
252
|
chunk_label = f"{chunk_begin} - {chunk_end}"
|
200
|
-
chunk_msg = f'{chunk_label}\n{chunk_msg}'
|
253
|
+
chunk_msg = f'Verified chunk for {self}:\n{chunk_label}\n{chunk_msg}'
|
201
254
|
mrsm.pprint((chunk_success, chunk_msg))
|
202
255
|
return chunk_begin_and_end, (chunk_success, chunk_msg)
|
203
256
|
|
204
257
|
### If we have more than one chunk, attempt to sync the first one and return if its fails.
|
205
258
|
if len(chunk_bounds) > 1:
|
206
259
|
first_chunk_bounds = chunk_bounds[0]
|
260
|
+
first_label = f"{first_chunk_bounds[0]} - {first_chunk_bounds[1]}"
|
261
|
+
info(f"Verifying first chunk for {self}:\n {first_label}")
|
207
262
|
(
|
208
263
|
(first_begin, first_end),
|
209
264
|
(first_success, first_msg)
|
210
|
-
) = process_chunk_bounds(first_chunk_bounds)
|
265
|
+
) = process_chunk_bounds(first_chunk_bounds, _workers=workers)
|
211
266
|
if not first_success:
|
212
267
|
return (
|
213
268
|
first_success,
|
214
|
-
f"\n{
|
269
|
+
f"\n{first_label}\n"
|
215
270
|
+ f"Failed to sync first chunk:\n{first_msg}"
|
216
271
|
)
|
217
272
|
bounds_success_tuples[first_chunk_bounds] = (first_success, first_msg)
|
273
|
+
info(f"Completed first chunk for {self}:\n {first_label}\n")
|
218
274
|
|
219
|
-
|
220
|
-
|
275
|
+
pool = get_pool(workers=workers)
|
276
|
+
batches = self.get_chunk_bounds_batches(chunk_bounds, batchsize=batchsize, workers=workers)
|
221
277
|
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
278
|
+
def process_batch(
|
279
|
+
batch_chunk_bounds: Tuple[
|
280
|
+
Tuple[Union[datetime, int, None], Union[datetime, int, None]],
|
281
|
+
...
|
282
|
+
]
|
283
|
+
):
|
284
|
+
_batch_begin = batch_chunk_bounds[0][0]
|
285
|
+
_batch_end = batch_chunk_bounds[-1][-1]
|
286
|
+
batch_message_header = f"{_batch_begin} - {_batch_end}"
|
287
|
+
batch_bounds_success_tuples = dict(pool.map(process_chunk_bounds, batch_chunk_bounds))
|
288
|
+
bounds_success_tuples.update(batch_bounds_success_tuples)
|
289
|
+
batch_bounds_success_bools = {
|
290
|
+
bounds: tup[0]
|
291
|
+
for bounds, tup in batch_bounds_success_tuples.items()
|
292
|
+
}
|
293
|
+
|
294
|
+
if all(batch_bounds_success_bools.values()):
|
295
|
+
msg = get_chunks_success_message(
|
296
|
+
batch_bounds_success_tuples,
|
297
|
+
header=batch_message_header,
|
298
|
+
check_rowcounts_only=check_rowcounts_only,
|
299
|
+
)
|
300
|
+
if deduplicate:
|
301
|
+
deduplicate_success, deduplicate_msg = self.deduplicate(
|
302
|
+
begin=_batch_begin,
|
303
|
+
end=_batch_end,
|
304
|
+
params=params,
|
305
|
+
workers=workers,
|
306
|
+
debug=debug,
|
307
|
+
**kwargs
|
308
|
+
)
|
309
|
+
return deduplicate_success, msg + '\n\n' + deduplicate_msg
|
310
|
+
return True, msg
|
311
|
+
|
312
|
+
batch_chunk_bounds_to_resync = [
|
313
|
+
bounds
|
314
|
+
for bounds, success in zip(batch_chunk_bounds, batch_bounds_success_bools)
|
315
|
+
if not success
|
316
|
+
]
|
317
|
+
batch_bounds_to_print = [
|
318
|
+
f"{bounds[0]} - {bounds[1]}"
|
319
|
+
for bounds in batch_chunk_bounds_to_resync
|
320
|
+
]
|
321
|
+
if batch_bounds_to_print:
|
322
|
+
warn(
|
323
|
+
"Will resync the following failed chunks:\n "
|
324
|
+
+ '\n '.join(batch_bounds_to_print),
|
325
|
+
stack=False,
|
233
326
|
)
|
234
|
-
return deduplicate_success, msg + '\n\n' + deduplicate_msg
|
235
|
-
return True, msg
|
236
|
-
|
237
|
-
chunk_bounds_to_resync = [
|
238
|
-
bounds
|
239
|
-
for bounds, success in zip(chunk_bounds, bounds_success_bools)
|
240
|
-
if not success
|
241
|
-
]
|
242
|
-
bounds_to_print = [
|
243
|
-
f"{bounds[0]} - {bounds[1]}"
|
244
|
-
for bounds in chunk_bounds_to_resync
|
245
|
-
]
|
246
|
-
if bounds_to_print:
|
247
|
-
warn(
|
248
|
-
f"Will resync the following failed chunks:\n "
|
249
|
-
+ '\n '.join(bounds_to_print),
|
250
|
-
stack=False,
|
251
|
-
)
|
252
|
-
|
253
|
-
retry_bounds_success_tuples = dict(pool.map(process_chunk_bounds, chunk_bounds_to_resync))
|
254
|
-
bounds_success_tuples.update(retry_bounds_success_tuples)
|
255
|
-
retry_bounds_success_bools = {
|
256
|
-
bounds: tup[0]
|
257
|
-
for bounds, tup in retry_bounds_success_tuples.items()
|
258
|
-
}
|
259
327
|
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
328
|
+
retry_bounds_success_tuples = dict(pool.map(
|
329
|
+
process_chunk_bounds,
|
330
|
+
batch_chunk_bounds_to_resync
|
331
|
+
))
|
332
|
+
batch_bounds_success_tuples.update(retry_bounds_success_tuples)
|
333
|
+
bounds_success_tuples.update(retry_bounds_success_tuples)
|
334
|
+
retry_bounds_success_bools = {
|
335
|
+
bounds: tup[0]
|
336
|
+
for bounds, tup in retry_bounds_success_tuples.items()
|
337
|
+
}
|
338
|
+
|
339
|
+
if all(retry_bounds_success_bools.values()):
|
340
|
+
chunks_message = (
|
341
|
+
get_chunks_success_message(
|
342
|
+
batch_bounds_success_tuples,
|
343
|
+
header=batch_message_header,
|
344
|
+
check_rowcounts_only=check_rowcounts_only,
|
345
|
+
) + f"\nRetried {len(batch_chunk_bounds_to_resync)} chunk" + (
|
346
|
+
's'
|
347
|
+
if len(batch_chunk_bounds_to_resync) != 1
|
348
|
+
else ''
|
349
|
+
) + "."
|
350
|
+
)
|
351
|
+
if deduplicate:
|
352
|
+
deduplicate_success, deduplicate_msg = self.deduplicate(
|
353
|
+
begin=_batch_begin,
|
354
|
+
end=_batch_end,
|
355
|
+
params=params,
|
356
|
+
workers=workers,
|
357
|
+
debug=debug,
|
358
|
+
**kwargs
|
359
|
+
)
|
360
|
+
return deduplicate_success, chunks_message + '\n\n' + deduplicate_msg
|
361
|
+
return True, chunks_message
|
362
|
+
|
363
|
+
batch_chunks_message = get_chunks_success_message(
|
364
|
+
batch_bounds_success_tuples,
|
365
|
+
header=batch_message_header,
|
366
|
+
check_rowcounts_only=check_rowcounts_only,
|
264
367
|
)
|
265
368
|
if deduplicate:
|
266
369
|
deduplicate_success, deduplicate_msg = self.deduplicate(
|
@@ -271,26 +374,57 @@ def verify(
|
|
271
374
|
debug=debug,
|
272
375
|
**kwargs
|
273
376
|
)
|
274
|
-
return deduplicate_success,
|
275
|
-
return
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
377
|
+
return deduplicate_success, batch_chunks_message + '\n\n' + deduplicate_msg
|
378
|
+
return False, batch_chunks_message
|
379
|
+
|
380
|
+
num_batches = len(batches)
|
381
|
+
for batch_i, batch in enumerate(batches):
|
382
|
+
batch_begin = batch[0][0]
|
383
|
+
batch_end = batch[-1][-1]
|
384
|
+
batch_counter_str = f"({(batch_i + 1):,}/{num_batches:,})"
|
385
|
+
batch_label = f"batch {batch_counter_str}:\n{batch_begin} - {batch_end}"
|
386
|
+
retry_failed_batch = True
|
387
|
+
try:
|
388
|
+
for_self = 'for ' + str(self)
|
389
|
+
info(f"Verifying {batch_label.replace(':\n', ' ' + for_self + '...\n ')}\n")
|
390
|
+
batch_success, batch_msg = process_batch(batch)
|
391
|
+
except (KeyboardInterrupt, Exception) as e:
|
392
|
+
batch_success = False
|
393
|
+
batch_msg = str(e)
|
394
|
+
retry_failed_batch = False
|
395
|
+
|
396
|
+
batch_msg_to_print = (
|
397
|
+
f"{make_header('Completed batch ' + batch_counter_str + ' ' + for_self + ':')}\n{batch_msg}"
|
286
398
|
)
|
287
|
-
|
288
|
-
|
399
|
+
mrsm.pprint((batch_success, batch_msg_to_print))
|
400
|
+
|
401
|
+
if not batch_success and retry_failed_batch:
|
402
|
+
info(f"Retrying batch {batch_counter_str}...")
|
403
|
+
retry_batch_success, retry_batch_msg = process_batch(batch)
|
404
|
+
retry_batch_msg_to_print = (
|
405
|
+
f"Retried {make_header('batch ' + batch_label)}\n{retry_batch_msg}"
|
406
|
+
)
|
407
|
+
mrsm.pprint((retry_batch_success, retry_batch_msg_to_print))
|
408
|
+
|
409
|
+
batch_success = retry_batch_success
|
410
|
+
batch_msg = retry_batch_msg
|
411
|
+
|
412
|
+
if not batch_success:
|
413
|
+
return False, f"Failed to verify {batch_label}:\n\n{batch_msg}"
|
414
|
+
|
415
|
+
chunks_message = get_chunks_success_message(
|
416
|
+
bounds_success_tuples,
|
417
|
+
header=message_header,
|
418
|
+
check_rowcounts_only=check_rowcounts_only,
|
419
|
+
)
|
420
|
+
return True, chunks_message
|
421
|
+
|
289
422
|
|
290
423
|
|
291
424
|
def get_chunks_success_message(
|
292
425
|
chunk_success_tuples: Dict[Tuple[Any, Any], SuccessTuple],
|
293
426
|
header: str = '',
|
427
|
+
check_rowcounts_only: bool = False,
|
294
428
|
) -> str:
|
295
429
|
"""
|
296
430
|
Sum together all of the inserts and updates from the chunks.
|
@@ -319,10 +453,19 @@ def get_chunks_success_message(
|
|
319
453
|
inserts = [stat['inserted'] for stat in chunk_stats]
|
320
454
|
updates = [stat['updated'] for stat in chunk_stats]
|
321
455
|
upserts = [stat['upserted'] for stat in chunk_stats]
|
456
|
+
checks = [stat['checked'] for stat in chunk_stats]
|
457
|
+
out_of_sync_bounds_messages = {
|
458
|
+
bounds: message
|
459
|
+
for bounds, (success, message) in chunk_success_tuples.items()
|
460
|
+
if 'out-of-sync' in message
|
461
|
+
} if check_rowcounts_only else {}
|
462
|
+
|
322
463
|
num_inserted = sum(inserts)
|
323
464
|
num_updated = sum(updates)
|
324
465
|
num_upserted = sum(upserts)
|
466
|
+
num_checked = sum(checks)
|
325
467
|
num_fails = len(fail_chunk_bounds_tuples)
|
468
|
+
num_out_of_sync = len(out_of_sync_bounds_messages)
|
326
469
|
|
327
470
|
header = (header + "\n") if header else ""
|
328
471
|
stats_msg = items_str(
|
@@ -330,22 +473,38 @@ def get_chunks_success_message(
|
|
330
473
|
([f'inserted {num_inserted:,}'] if num_inserted else [])
|
331
474
|
+ ([f'updated {num_updated:,}'] if num_updated else [])
|
332
475
|
+ ([f'upserted {num_upserted:,}'] if num_upserted else [])
|
476
|
+
+ ([f'checked {num_checked:,}'] if num_checked else [])
|
333
477
|
) or ['synced 0'],
|
334
478
|
quotes=False,
|
335
479
|
and_=False,
|
336
480
|
)
|
337
481
|
|
338
482
|
success_msg = (
|
339
|
-
|
483
|
+
"Successfully "
|
484
|
+
+ ('synced' if not check_rowcounts_only else 'checked')
|
485
|
+
+ f" {len(chunk_success_tuples):,} chunk"
|
340
486
|
+ ('s' if len(chunk_success_tuples) != 1 else '')
|
341
487
|
+ '\n(' + stats_msg
|
342
488
|
+ ' rows in total).'
|
343
489
|
)
|
490
|
+
if check_rowcounts_only:
|
491
|
+
success_msg += (
|
492
|
+
f"\n\nFound {num_out_of_sync} chunk"
|
493
|
+
+ ('s' if num_out_of_sync != 1 else '')
|
494
|
+
+ ' to be out-of-sync'
|
495
|
+
+ ('.' if num_out_of_sync == 0 else ':\n\n ')
|
496
|
+
+ '\n '.join(
|
497
|
+
[
|
498
|
+
f'{lbound} - {rbound}'
|
499
|
+
for lbound, rbound in out_of_sync_bounds_messages
|
500
|
+
]
|
501
|
+
)
|
502
|
+
)
|
344
503
|
fail_msg = (
|
345
504
|
''
|
346
505
|
if num_fails == 0
|
347
506
|
else (
|
348
|
-
f"\n\nFailed to sync {num_fails} chunk"
|
507
|
+
f"\n\nFailed to sync {num_fails:,} chunk"
|
349
508
|
+ ('s' if num_fails != 1 else '') + ":\n"
|
350
509
|
+ '\n'.join([
|
351
510
|
f"{fail_begin} - {fail_end}\n{msg}\n"
|
@@ -436,9 +595,15 @@ def get_bound_time(self, debug: bool = False) -> Union[datetime, int, None]:
|
|
436
595
|
|
437
596
|
bound_time = sync_time - bound_interval
|
438
597
|
oldest_sync_time = self.get_sync_time(newest=False, debug=debug)
|
598
|
+
max_bound_time_days = STATIC_CONFIG['pipes']['max_bound_time_days']
|
599
|
+
|
600
|
+
extreme_sync_times_delta = (
|
601
|
+
hasattr(oldest_sync_time, 'tzinfo')
|
602
|
+
and (sync_time - oldest_sync_time) >= timedelta(days=max_bound_time_days)
|
603
|
+
)
|
439
604
|
|
440
605
|
return (
|
441
606
|
bound_time
|
442
|
-
if bound_time > oldest_sync_time
|
607
|
+
if bound_time > oldest_sync_time or extreme_sync_times_delta
|
443
608
|
else None
|
444
609
|
)
|
meerschaum/core/User/__init__.py
CHANGED
@@ -9,7 +9,7 @@ Manager users' metadata via the User class
|
|
9
9
|
from typing import Optional
|
10
10
|
|
11
11
|
import meerschaum as mrsm
|
12
|
-
from meerschaum.core.User._User import User
|
12
|
+
from meerschaum.core.User._User import User
|
13
13
|
|
14
14
|
|
15
15
|
def is_user_allowed_to_execute(
|
@@ -19,8 +19,6 @@ def is_user_allowed_to_execute(
|
|
19
19
|
"""
|
20
20
|
Return a `SuccessTuple` indicating whether a given user is allowed to execute actions.
|
21
21
|
"""
|
22
|
-
print(f"{debug=}")
|
23
|
-
print(f"{user=}")
|
24
22
|
if user is None:
|
25
23
|
return True, "Success"
|
26
24
|
|
@@ -29,9 +27,7 @@ def is_user_allowed_to_execute(
|
|
29
27
|
if user_type == 'admin':
|
30
28
|
return True, "Success"
|
31
29
|
|
32
|
-
|
33
|
-
|
34
|
-
allow_non_admin = get_config('system', 'api', 'permissions', 'actions', 'non_admin')
|
30
|
+
allow_non_admin = mrsm.get_config('system', 'api', 'permissions', 'actions', 'non_admin')
|
35
31
|
if not allow_non_admin:
|
36
32
|
return False, "The administrator for this server has not allowed users to perform actions."
|
37
33
|
|
meerschaum/jobs/_Job.py
CHANGED
meerschaum/jobs/__init__.py
CHANGED
@@ -150,6 +150,13 @@ def get_filtered_jobs(
|
|
150
150
|
}
|
151
151
|
|
152
152
|
jobs_to_return = {}
|
153
|
+
filter_list_without_underscores = [name for name in filter_list if not name.startswith('_')]
|
154
|
+
filter_list_with_underscores = [name for name in filter_list if name.startswith('_')]
|
155
|
+
if (
|
156
|
+
filter_list_without_underscores and not filter_list_with_underscores
|
157
|
+
or filter_list_with_underscores and not filter_list_without_underscores
|
158
|
+
):
|
159
|
+
pass
|
153
160
|
for name in filter_list:
|
154
161
|
job = jobs.get(name, None)
|
155
162
|
if job is None:
|
@@ -161,6 +168,14 @@ def get_filtered_jobs(
|
|
161
168
|
continue
|
162
169
|
jobs_to_return[name] = job
|
163
170
|
|
171
|
+
if not jobs_to_return and filter_list_with_underscores:
|
172
|
+
names_to_exclude = [name.lstrip('_') for name in filter_list_with_underscores]
|
173
|
+
return {
|
174
|
+
name: job
|
175
|
+
for name, job in jobs.items()
|
176
|
+
if name not in names_to_exclude
|
177
|
+
}
|
178
|
+
|
164
179
|
return jobs_to_return
|
165
180
|
|
166
181
|
|