meerschaum 2.7.10__py3-none-any.whl → 2.8.1__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. meerschaum/_internal/arguments/_parser.py +17 -5
  2. meerschaum/actions/copy.py +3 -1
  3. meerschaum/actions/index.py +1 -1
  4. meerschaum/actions/show.py +7 -7
  5. meerschaum/actions/sync.py +5 -1
  6. meerschaum/actions/verify.py +18 -2
  7. meerschaum/api/__init__.py +77 -41
  8. meerschaum/api/_exceptions.py +18 -0
  9. meerschaum/api/dash/__init__.py +4 -2
  10. meerschaum/api/dash/callbacks/dashboard.py +30 -1
  11. meerschaum/api/dash/components.py +2 -2
  12. meerschaum/api/dash/webterm.py +23 -4
  13. meerschaum/api/models/_pipes.py +8 -8
  14. meerschaum/api/resources/static/css/dash.css +2 -2
  15. meerschaum/api/resources/templates/termpage.html +5 -1
  16. meerschaum/api/routes/__init__.py +15 -12
  17. meerschaum/api/routes/_connectors.py +30 -28
  18. meerschaum/api/routes/_index.py +16 -7
  19. meerschaum/api/routes/_misc.py +30 -22
  20. meerschaum/api/routes/_pipes.py +244 -148
  21. meerschaum/api/routes/_plugins.py +58 -47
  22. meerschaum/api/routes/_users.py +39 -31
  23. meerschaum/api/routes/_version.py +8 -10
  24. meerschaum/config/_default.py +10 -0
  25. meerschaum/config/_version.py +1 -1
  26. meerschaum/config/static/__init__.py +4 -1
  27. meerschaum/connectors/api/_APIConnector.py +4 -3
  28. meerschaum/connectors/api/_login.py +21 -17
  29. meerschaum/connectors/api/_pipes.py +1 -0
  30. meerschaum/connectors/api/_request.py +9 -10
  31. meerschaum/connectors/sql/_cli.py +11 -3
  32. meerschaum/connectors/sql/_instance.py +1 -1
  33. meerschaum/connectors/sql/_pipes.py +77 -57
  34. meerschaum/connectors/sql/_sql.py +26 -9
  35. meerschaum/core/Pipe/__init__.py +2 -0
  36. meerschaum/core/Pipe/_attributes.py +13 -2
  37. meerschaum/core/Pipe/_data.py +85 -0
  38. meerschaum/core/Pipe/_deduplicate.py +6 -8
  39. meerschaum/core/Pipe/_sync.py +63 -30
  40. meerschaum/core/Pipe/_verify.py +243 -77
  41. meerschaum/core/User/__init__.py +2 -6
  42. meerschaum/jobs/_Job.py +1 -1
  43. meerschaum/jobs/__init__.py +15 -0
  44. meerschaum/utils/dataframe.py +2 -0
  45. meerschaum/utils/dtypes/sql.py +26 -0
  46. meerschaum/utils/formatting/_pipes.py +1 -1
  47. meerschaum/utils/misc.py +11 -7
  48. meerschaum/utils/packages/_packages.py +1 -1
  49. meerschaum/utils/sql.py +6 -2
  50. {meerschaum-2.7.10.dist-info → meerschaum-2.8.1.dist-info}/METADATA +4 -4
  51. {meerschaum-2.7.10.dist-info → meerschaum-2.8.1.dist-info}/RECORD +57 -56
  52. {meerschaum-2.7.10.dist-info → meerschaum-2.8.1.dist-info}/LICENSE +0 -0
  53. {meerschaum-2.7.10.dist-info → meerschaum-2.8.1.dist-info}/NOTICE +0 -0
  54. {meerschaum-2.7.10.dist-info → meerschaum-2.8.1.dist-info}/WHEEL +0 -0
  55. {meerschaum-2.7.10.dist-info → meerschaum-2.8.1.dist-info}/entry_points.txt +0 -0
  56. {meerschaum-2.7.10.dist-info → meerschaum-2.8.1.dist-info}/top_level.txt +0 -0
  57. {meerschaum-2.7.10.dist-info → meerschaum-2.8.1.dist-info}/zip-safe +0 -0
@@ -319,16 +319,42 @@ def sync(
319
319
  if debug:
320
320
  dprint("Successfully synced the first chunk, attemping the rest...")
321
321
 
322
- failed_chunks = []
323
322
  def _process_chunk(_chunk):
324
- try:
325
- _chunk_success, _chunk_msg = _sync(p, _chunk)
326
- except Exception as e:
327
- _chunk_success, _chunk_msg = False, str(e)
328
- if not _chunk_success:
329
- failed_chunks.append(_chunk)
323
+ _chunk_attempts = 0
324
+ _max_chunk_attempts = 3
325
+ while _chunk_attempts < _max_chunk_attempts:
326
+ try:
327
+ _chunk_success, _chunk_msg = _sync(p, _chunk)
328
+ except Exception as e:
329
+ _chunk_success, _chunk_msg = False, str(e)
330
+ if _chunk_success:
331
+ break
332
+ _chunk_attempts += 1
333
+ _sleep_seconds = _chunk_attempts ** 2
334
+ warn(
335
+ (
336
+ f"Failed to sync chunk to {self} "
337
+ + f"(attempt {_chunk_attempts} / {_max_chunk_attempts}).\n"
338
+ + f"Sleeping for {_sleep_seconds} second"
339
+ + ('s' if _sleep_seconds != 1 else '')
340
+ + ":\n{_chunk_msg}"
341
+ ),
342
+ stack=False,
343
+ )
344
+ time.sleep(_sleep_seconds)
345
+
346
+ num_rows_str = (
347
+ f"{num_rows:,} rows"
348
+ if (num_rows := len(_chunk)) != 1
349
+ else f"{num_rows} row"
350
+ )
330
351
  _chunk_msg = (
331
- self._get_chunk_label(_chunk, dt_col)
352
+ (
353
+ "Synced"
354
+ if _chunk_success
355
+ else "Failed to sync"
356
+ ) + f" a chunk ({num_rows_str}) to {p}:\n"
357
+ + self._get_chunk_label(_chunk, dt_col)
332
358
  + '\n'
333
359
  + _chunk_msg
334
360
  )
@@ -351,27 +377,16 @@ def sync(
351
377
  )
352
378
  chunk_messages = [chunk_msg for _, chunk_msg in results]
353
379
  success_bools = [chunk_success for chunk_success, _ in results]
380
+ num_successes = len([chunk_success for chunk_success, _ in results if chunk_success])
381
+ num_failures = len([chunk_success for chunk_success, _ in results if not chunk_success])
354
382
  success = all(success_bools)
355
383
  msg = (
356
- f'Synced {len(chunk_messages)} chunk'
384
+ 'Synced '
385
+ + f'{len(chunk_messages):,} chunk'
357
386
  + ('s' if len(chunk_messages) != 1 else '')
358
- + f' to {p}:\n\n'
387
+ + f' to {p}\n({num_successes} succeeded, {num_failures} failed):\n\n'
359
388
  + '\n\n'.join(chunk_messages).lstrip().rstrip()
360
389
  ).lstrip().rstrip()
361
-
362
- ### If some chunks succeeded, retry the failures.
363
- retry_success = True
364
- if not success and any(success_bools):
365
- if debug:
366
- dprint("Retrying failed chunks...")
367
- chunks_to_retry = [c for c in failed_chunks]
368
- failed_chunks = []
369
- for chunk in chunks_to_retry:
370
- chunk_success, chunk_msg = _process_chunk(chunk)
371
- msg += f"\n\nRetried chunk:\n{chunk_msg}\n"
372
- retry_success = retry_success and chunk_success
373
-
374
- success = success and retry_success
375
390
  return success, msg
376
391
 
377
392
  ### Cast to a dataframe and ensure datatypes are what we expect.
@@ -474,6 +489,7 @@ def get_sync_time(
474
489
  params: Optional[Dict[str, Any]] = None,
475
490
  newest: bool = True,
476
491
  apply_backtrack_interval: bool = False,
492
+ remote: bool = False,
477
493
  round_down: bool = False,
478
494
  debug: bool = False
479
495
  ) -> Union['datetime', int, None]:
@@ -493,6 +509,10 @@ def get_sync_time(
493
509
  apply_backtrack_interval: bool, default False
494
510
  If `True`, subtract the backtrack interval from the sync time.
495
511
 
512
+ remote: bool, default False
513
+ If `True` and the instance connector supports it, return the sync time
514
+ for the remote table definition.
515
+
496
516
  round_down: bool, default False
497
517
  If `True`, round down the datetime value to the nearest minute.
498
518
 
@@ -506,17 +526,30 @@ def get_sync_time(
506
526
  """
507
527
  from meerschaum.utils.venv import Venv
508
528
  from meerschaum.connectors import get_connector_plugin
509
- from meerschaum.utils.misc import round_time
529
+ from meerschaum.utils.misc import round_time, filter_keywords
530
+ from meerschaum.utils.warnings import warn
510
531
 
511
532
  if not self.columns.get('datetime', None):
512
533
  return None
513
534
 
514
- with Venv(get_connector_plugin(self.instance_connector)):
515
- sync_time = self.instance_connector.get_sync_time(
535
+ connector = self.instance_connector if not remote else self.connector
536
+ with Venv(get_connector_plugin(connector)):
537
+ if not hasattr(connector, 'get_sync_time'):
538
+ warn(
539
+ f"Connectors of type '{connector.type}' "
540
+ "do not implement `get_sync_time().",
541
+ stack=False,
542
+ )
543
+ return None
544
+ sync_time = connector.get_sync_time(
516
545
  self,
517
- params=params,
518
- newest=newest,
519
- debug=debug,
546
+ **filter_keywords(
547
+ connector.get_sync_time,
548
+ params=params,
549
+ newest=newest,
550
+ remote=remote,
551
+ debug=debug,
552
+ )
520
553
  )
521
554
 
522
555
  if round_down and isinstance(sync_time, datetime):
@@ -11,6 +11,7 @@ from datetime import datetime, timedelta
11
11
  import meerschaum as mrsm
12
12
  from meerschaum.utils.typing import SuccessTuple, Any, Optional, Union, Tuple, Dict
13
13
  from meerschaum.utils.warnings import warn, info
14
+ from meerschaum.config.static import STATIC_CONFIG
14
15
 
15
16
 
16
17
  def verify(
@@ -22,6 +23,9 @@ def verify(
22
23
  bounded: Optional[bool] = None,
23
24
  deduplicate: bool = False,
24
25
  workers: Optional[int] = None,
26
+ batchsize: Optional[int] = None,
27
+ skip_chunks_with_greater_rowcounts: bool = False,
28
+ check_rowcounts_only: bool = False,
25
29
  debug: bool = False,
26
30
  **kwargs: Any
27
31
  ) -> SuccessTuple:
@@ -53,6 +57,17 @@ def verify(
53
57
  If provided, limit the verification to this many threads.
54
58
  Use a value of `1` to sync chunks in series.
55
59
 
60
+ batchsize: Optional[int], default None
61
+ If provided, sync this many chunks in parallel.
62
+ Defaults to `Pipe.get_num_workers()`.
63
+
64
+ skip_chunks_with_greater_rowcounts: bool, default False
65
+ If `True`, compare the rowcounts for a chunk and skip syncing if the pipe's
66
+ chunk rowcount equals or exceeds the remote's rowcount.
67
+
68
+ check_rowcounts_only: bool, default False
69
+ If `True`, only compare rowcounts and print chunks which are out-of-sync.
70
+
56
71
  debug: bool, default False
57
72
  Verbosity toggle.
58
73
 
@@ -64,8 +79,10 @@ def verify(
64
79
  A SuccessTuple indicating whether the pipe was successfully resynced.
65
80
  """
66
81
  from meerschaum.utils.pool import get_pool
82
+ from meerschaum.utils.formatting import make_header
67
83
  from meerschaum.utils.misc import interval_str
68
84
  workers = self.get_num_workers(workers)
85
+ check_rowcounts = skip_chunks_with_greater_rowcounts or check_rowcounts_only
69
86
 
70
87
  ### Skip configured bounding in parameters
71
88
  ### if `bounded` is explicitly `False`.
@@ -83,21 +100,26 @@ def verify(
83
100
  if bound_time is not None
84
101
  else self.get_sync_time(newest=False, debug=debug)
85
102
  )
103
+ if begin is None:
104
+ remote_oldest_sync_time = self.get_sync_time(newest=False, remote=True, debug=debug)
105
+ begin = remote_oldest_sync_time
86
106
  if bounded and end is None:
87
107
  end = self.get_sync_time(newest=True, debug=debug)
108
+ if end is None:
109
+ remote_newest_sync_time = self.get_sync_time(newest=True, remote=True, debug=debug)
110
+ end = remote_newest_sync_time
111
+ if end is not None:
112
+ end += (
113
+ timedelta(minutes=1)
114
+ if hasattr(end, 'tzinfo')
115
+ else 1
116
+ )
88
117
 
89
118
  begin, end = self.parse_date_bounds(begin, end)
90
-
91
- if bounded and end is not None:
92
- end += (
93
- timedelta(minutes=1)
94
- if isinstance(end, datetime)
95
- else 1
96
- )
97
-
98
- cannot_determine_bounds = not self.exists(debug=debug)
119
+ cannot_determine_bounds = bounded and begin is None and end is None
99
120
 
100
121
  if cannot_determine_bounds:
122
+ warn(f"Cannot determine sync bounds for {self}. Syncing instead...", stack=False)
101
123
  sync_success, sync_msg = self.sync(
102
124
  begin=begin,
103
125
  end=end,
@@ -160,17 +182,16 @@ def verify(
160
182
  else chunk_bounds[-1][0]
161
183
  )
162
184
  )
185
+ message_header = f"{begin_to_print} - {end_to_print}"
163
186
 
164
187
  info(
165
188
  f"Verifying {self}:\n Syncing {len(chunk_bounds)} chunk"
166
189
  + ('s' if len(chunk_bounds) != 1 else '')
167
190
  + f" ({'un' if not bounded else ''}bounded)"
168
191
  + f" of size '{interval_str(chunk_interval)}'"
169
- + f" between '{begin_to_print}' and '{end_to_print}'."
192
+ + f" between '{begin_to_print}' and '{end_to_print}'.\n"
170
193
  )
171
194
 
172
- pool = get_pool(workers=workers)
173
-
174
195
  ### Dictionary of the form bounds -> success_tuple, e.g.:
175
196
  ### {
176
197
  ### (2023-01-01, 2023-01-02): (True, "Success")
@@ -180,87 +201,169 @@ def verify(
180
201
  chunk_begin_and_end: Tuple[
181
202
  Union[int, datetime],
182
203
  Union[int, datetime]
183
- ]
204
+ ],
205
+ _workers: Optional[int] = 1,
184
206
  ):
185
207
  if chunk_begin_and_end in bounds_success_tuples:
186
208
  return chunk_begin_and_end, bounds_success_tuples[chunk_begin_and_end]
187
209
 
188
210
  chunk_begin, chunk_end = chunk_begin_and_end
211
+ do_sync = True
212
+ chunk_success, chunk_msg = False, "Did not sync chunk."
213
+ if check_rowcounts:
214
+ existing_rowcount = self.get_rowcount(begin=chunk_begin, end=chunk_end, debug=debug)
215
+ remote_rowcount = self.get_rowcount(
216
+ begin=chunk_begin,
217
+ end=chunk_end,
218
+ remote=True,
219
+ debug=debug,
220
+ )
221
+ checked_rows_str = (
222
+ f"checked {existing_rowcount} row"
223
+ + ("s" if existing_rowcount != 1 else '')
224
+ + f" vs {remote_rowcount} remote"
225
+ )
226
+ if (
227
+ existing_rowcount is not None
228
+ and remote_rowcount is not None
229
+ and existing_rowcount >= remote_rowcount
230
+ ):
231
+ do_sync = False
232
+ chunk_success, chunk_msg = True, (
233
+ "Row-count is up-to-date "
234
+ f"({checked_rows_str})."
235
+ )
236
+ elif check_rowcounts_only:
237
+ do_sync = False
238
+ chunk_success, chunk_msg = True, (
239
+ f"Row-counts are out-of-sync ({checked_rows_str})."
240
+ )
241
+
189
242
  chunk_success, chunk_msg = self.sync(
190
243
  begin=chunk_begin,
191
244
  end=chunk_end,
192
245
  params=params,
193
- workers=1,
246
+ workers=_workers,
194
247
  debug=debug,
195
248
  **kwargs
196
- )
249
+ ) if do_sync else (chunk_success, chunk_msg)
197
250
  chunk_msg = chunk_msg.strip()
198
251
  if ' - ' not in chunk_msg:
199
252
  chunk_label = f"{chunk_begin} - {chunk_end}"
200
- chunk_msg = f'{chunk_label}\n{chunk_msg}'
253
+ chunk_msg = f'Verified chunk for {self}:\n{chunk_label}\n{chunk_msg}'
201
254
  mrsm.pprint((chunk_success, chunk_msg))
202
255
  return chunk_begin_and_end, (chunk_success, chunk_msg)
203
256
 
204
257
  ### If we have more than one chunk, attempt to sync the first one and return if its fails.
205
258
  if len(chunk_bounds) > 1:
206
259
  first_chunk_bounds = chunk_bounds[0]
260
+ first_label = f"{first_chunk_bounds[0]} - {first_chunk_bounds[1]}"
261
+ info(f"Verifying first chunk for {self}:\n {first_label}")
207
262
  (
208
263
  (first_begin, first_end),
209
264
  (first_success, first_msg)
210
- ) = process_chunk_bounds(first_chunk_bounds)
265
+ ) = process_chunk_bounds(first_chunk_bounds, _workers=workers)
211
266
  if not first_success:
212
267
  return (
213
268
  first_success,
214
- f"\n{first_begin} - {first_end}\n"
269
+ f"\n{first_label}\n"
215
270
  + f"Failed to sync first chunk:\n{first_msg}"
216
271
  )
217
272
  bounds_success_tuples[first_chunk_bounds] = (first_success, first_msg)
273
+ info(f"Completed first chunk for {self}:\n {first_label}\n")
218
274
 
219
- bounds_success_tuples.update(dict(pool.map(process_chunk_bounds, chunk_bounds)))
220
- bounds_success_bools = {bounds: tup[0] for bounds, tup in bounds_success_tuples.items()}
275
+ pool = get_pool(workers=workers)
276
+ batches = self.get_chunk_bounds_batches(chunk_bounds, batchsize=batchsize, workers=workers)
221
277
 
222
- message_header = f"{begin_to_print} - {end_to_print}"
223
- if all(bounds_success_bools.values()):
224
- msg = get_chunks_success_message(bounds_success_tuples, header=message_header)
225
- if deduplicate:
226
- deduplicate_success, deduplicate_msg = self.deduplicate(
227
- begin=begin,
228
- end=end,
229
- params=params,
230
- workers=workers,
231
- debug=debug,
232
- **kwargs
278
+ def process_batch(
279
+ batch_chunk_bounds: Tuple[
280
+ Tuple[Union[datetime, int, None], Union[datetime, int, None]],
281
+ ...
282
+ ]
283
+ ):
284
+ _batch_begin = batch_chunk_bounds[0][0]
285
+ _batch_end = batch_chunk_bounds[-1][-1]
286
+ batch_message_header = f"{_batch_begin} - {_batch_end}"
287
+ batch_bounds_success_tuples = dict(pool.map(process_chunk_bounds, batch_chunk_bounds))
288
+ bounds_success_tuples.update(batch_bounds_success_tuples)
289
+ batch_bounds_success_bools = {
290
+ bounds: tup[0]
291
+ for bounds, tup in batch_bounds_success_tuples.items()
292
+ }
293
+
294
+ if all(batch_bounds_success_bools.values()):
295
+ msg = get_chunks_success_message(
296
+ batch_bounds_success_tuples,
297
+ header=batch_message_header,
298
+ check_rowcounts_only=check_rowcounts_only,
299
+ )
300
+ if deduplicate:
301
+ deduplicate_success, deduplicate_msg = self.deduplicate(
302
+ begin=_batch_begin,
303
+ end=_batch_end,
304
+ params=params,
305
+ workers=workers,
306
+ debug=debug,
307
+ **kwargs
308
+ )
309
+ return deduplicate_success, msg + '\n\n' + deduplicate_msg
310
+ return True, msg
311
+
312
+ batch_chunk_bounds_to_resync = [
313
+ bounds
314
+ for bounds, success in zip(batch_chunk_bounds, batch_bounds_success_bools)
315
+ if not success
316
+ ]
317
+ batch_bounds_to_print = [
318
+ f"{bounds[0]} - {bounds[1]}"
319
+ for bounds in batch_chunk_bounds_to_resync
320
+ ]
321
+ if batch_bounds_to_print:
322
+ warn(
323
+ "Will resync the following failed chunks:\n "
324
+ + '\n '.join(batch_bounds_to_print),
325
+ stack=False,
233
326
  )
234
- return deduplicate_success, msg + '\n\n' + deduplicate_msg
235
- return True, msg
236
-
237
- chunk_bounds_to_resync = [
238
- bounds
239
- for bounds, success in zip(chunk_bounds, bounds_success_bools)
240
- if not success
241
- ]
242
- bounds_to_print = [
243
- f"{bounds[0]} - {bounds[1]}"
244
- for bounds in chunk_bounds_to_resync
245
- ]
246
- if bounds_to_print:
247
- warn(
248
- f"Will resync the following failed chunks:\n "
249
- + '\n '.join(bounds_to_print),
250
- stack=False,
251
- )
252
-
253
- retry_bounds_success_tuples = dict(pool.map(process_chunk_bounds, chunk_bounds_to_resync))
254
- bounds_success_tuples.update(retry_bounds_success_tuples)
255
- retry_bounds_success_bools = {
256
- bounds: tup[0]
257
- for bounds, tup in retry_bounds_success_tuples.items()
258
- }
259
327
 
260
- if all(retry_bounds_success_bools.values()):
261
- message = (
262
- get_chunks_success_message(bounds_success_tuples, header=message_header)
263
- + f"\nRetried {len(chunk_bounds_to_resync)} chunks."
328
+ retry_bounds_success_tuples = dict(pool.map(
329
+ process_chunk_bounds,
330
+ batch_chunk_bounds_to_resync
331
+ ))
332
+ batch_bounds_success_tuples.update(retry_bounds_success_tuples)
333
+ bounds_success_tuples.update(retry_bounds_success_tuples)
334
+ retry_bounds_success_bools = {
335
+ bounds: tup[0]
336
+ for bounds, tup in retry_bounds_success_tuples.items()
337
+ }
338
+
339
+ if all(retry_bounds_success_bools.values()):
340
+ chunks_message = (
341
+ get_chunks_success_message(
342
+ batch_bounds_success_tuples,
343
+ header=batch_message_header,
344
+ check_rowcounts_only=check_rowcounts_only,
345
+ ) + f"\nRetried {len(batch_chunk_bounds_to_resync)} chunk" + (
346
+ 's'
347
+ if len(batch_chunk_bounds_to_resync) != 1
348
+ else ''
349
+ ) + "."
350
+ )
351
+ if deduplicate:
352
+ deduplicate_success, deduplicate_msg = self.deduplicate(
353
+ begin=_batch_begin,
354
+ end=_batch_end,
355
+ params=params,
356
+ workers=workers,
357
+ debug=debug,
358
+ **kwargs
359
+ )
360
+ return deduplicate_success, chunks_message + '\n\n' + deduplicate_msg
361
+ return True, chunks_message
362
+
363
+ batch_chunks_message = get_chunks_success_message(
364
+ batch_bounds_success_tuples,
365
+ header=batch_message_header,
366
+ check_rowcounts_only=check_rowcounts_only,
264
367
  )
265
368
  if deduplicate:
266
369
  deduplicate_success, deduplicate_msg = self.deduplicate(
@@ -271,26 +374,58 @@ def verify(
271
374
  debug=debug,
272
375
  **kwargs
273
376
  )
274
- return deduplicate_success, message + '\n\n' + deduplicate_msg
275
- return True, message
276
-
277
- message = get_chunks_success_message(bounds_success_tuples, header=message_header)
278
- if deduplicate:
279
- deduplicate_success, deduplicate_msg = self.deduplicate(
280
- begin=begin,
281
- end=end,
282
- params=params,
283
- workers=workers,
284
- debug=debug,
285
- **kwargs
377
+ return deduplicate_success, batch_chunks_message + '\n\n' + deduplicate_msg
378
+ return False, batch_chunks_message
379
+
380
+ num_batches = len(batches)
381
+ for batch_i, batch in enumerate(batches):
382
+ batch_begin = batch[0][0]
383
+ batch_end = batch[-1][-1]
384
+ batch_counter_str = f"({(batch_i + 1):,}/{num_batches:,})"
385
+ batch_label = f"batch {batch_counter_str}:\n{batch_begin} - {batch_end}"
386
+ retry_failed_batch = True
387
+ try:
388
+ for_self = 'for ' + str(self)
389
+ batch_label_str = batch_label.replace(':\n', ' ' + for_self + '...\n ')
390
+ info(f"Verifying {batch_label_str}\n")
391
+ batch_success, batch_msg = process_batch(batch)
392
+ except (KeyboardInterrupt, Exception) as e:
393
+ batch_success = False
394
+ batch_msg = str(e)
395
+ retry_failed_batch = False
396
+
397
+ batch_msg_to_print = (
398
+ f"{make_header('Completed batch ' + batch_counter_str + ' ' + for_self + ':')}\n{batch_msg}"
286
399
  )
287
- return deduplicate_success, message + '\n\n' + deduplicate_msg
288
- return False, message
400
+ mrsm.pprint((batch_success, batch_msg_to_print))
401
+
402
+ if not batch_success and retry_failed_batch:
403
+ info(f"Retrying batch {batch_counter_str}...")
404
+ retry_batch_success, retry_batch_msg = process_batch(batch)
405
+ retry_batch_msg_to_print = (
406
+ f"Retried {make_header('batch ' + batch_label)}\n{retry_batch_msg}"
407
+ )
408
+ mrsm.pprint((retry_batch_success, retry_batch_msg_to_print))
409
+
410
+ batch_success = retry_batch_success
411
+ batch_msg = retry_batch_msg
412
+
413
+ if not batch_success:
414
+ return False, f"Failed to verify {batch_label}:\n\n{batch_msg}"
415
+
416
+ chunks_message = get_chunks_success_message(
417
+ bounds_success_tuples,
418
+ header=message_header,
419
+ check_rowcounts_only=check_rowcounts_only,
420
+ )
421
+ return True, chunks_message
422
+
289
423
 
290
424
 
291
425
  def get_chunks_success_message(
292
426
  chunk_success_tuples: Dict[Tuple[Any, Any], SuccessTuple],
293
427
  header: str = '',
428
+ check_rowcounts_only: bool = False,
294
429
  ) -> str:
295
430
  """
296
431
  Sum together all of the inserts and updates from the chunks.
@@ -319,10 +454,19 @@ def get_chunks_success_message(
319
454
  inserts = [stat['inserted'] for stat in chunk_stats]
320
455
  updates = [stat['updated'] for stat in chunk_stats]
321
456
  upserts = [stat['upserted'] for stat in chunk_stats]
457
+ checks = [stat['checked'] for stat in chunk_stats]
458
+ out_of_sync_bounds_messages = {
459
+ bounds: message
460
+ for bounds, (success, message) in chunk_success_tuples.items()
461
+ if 'out-of-sync' in message
462
+ } if check_rowcounts_only else {}
463
+
322
464
  num_inserted = sum(inserts)
323
465
  num_updated = sum(updates)
324
466
  num_upserted = sum(upserts)
467
+ num_checked = sum(checks)
325
468
  num_fails = len(fail_chunk_bounds_tuples)
469
+ num_out_of_sync = len(out_of_sync_bounds_messages)
326
470
 
327
471
  header = (header + "\n") if header else ""
328
472
  stats_msg = items_str(
@@ -330,22 +474,38 @@ def get_chunks_success_message(
330
474
  ([f'inserted {num_inserted:,}'] if num_inserted else [])
331
475
  + ([f'updated {num_updated:,}'] if num_updated else [])
332
476
  + ([f'upserted {num_upserted:,}'] if num_upserted else [])
477
+ + ([f'checked {num_checked:,}'] if num_checked else [])
333
478
  ) or ['synced 0'],
334
479
  quotes=False,
335
480
  and_=False,
336
481
  )
337
482
 
338
483
  success_msg = (
339
- f"Successfully synced {len(chunk_success_tuples):,} chunk"
484
+ "Successfully "
485
+ + ('synced' if not check_rowcounts_only else 'checked')
486
+ + f" {len(chunk_success_tuples):,} chunk"
340
487
  + ('s' if len(chunk_success_tuples) != 1 else '')
341
488
  + '\n(' + stats_msg
342
489
  + ' rows in total).'
343
490
  )
491
+ if check_rowcounts_only:
492
+ success_msg += (
493
+ f"\n\nFound {num_out_of_sync} chunk"
494
+ + ('s' if num_out_of_sync != 1 else '')
495
+ + ' to be out-of-sync'
496
+ + ('.' if num_out_of_sync == 0 else ':\n\n ')
497
+ + '\n '.join(
498
+ [
499
+ f'{lbound} - {rbound}'
500
+ for lbound, rbound in out_of_sync_bounds_messages
501
+ ]
502
+ )
503
+ )
344
504
  fail_msg = (
345
505
  ''
346
506
  if num_fails == 0
347
507
  else (
348
- f"\n\nFailed to sync {num_fails} chunk"
508
+ f"\n\nFailed to sync {num_fails:,} chunk"
349
509
  + ('s' if num_fails != 1 else '') + ":\n"
350
510
  + '\n'.join([
351
511
  f"{fail_begin} - {fail_end}\n{msg}\n"
@@ -436,9 +596,15 @@ def get_bound_time(self, debug: bool = False) -> Union[datetime, int, None]:
436
596
 
437
597
  bound_time = sync_time - bound_interval
438
598
  oldest_sync_time = self.get_sync_time(newest=False, debug=debug)
599
+ max_bound_time_days = STATIC_CONFIG['pipes']['max_bound_time_days']
600
+
601
+ extreme_sync_times_delta = (
602
+ hasattr(oldest_sync_time, 'tzinfo')
603
+ and (sync_time - oldest_sync_time) >= timedelta(days=max_bound_time_days)
604
+ )
439
605
 
440
606
  return (
441
607
  bound_time
442
- if bound_time > oldest_sync_time
608
+ if bound_time > oldest_sync_time or extreme_sync_times_delta
443
609
  else None
444
610
  )
@@ -9,7 +9,7 @@ Manager users' metadata via the User class
9
9
  from typing import Optional
10
10
 
11
11
  import meerschaum as mrsm
12
- from meerschaum.core.User._User import User, hash_password, verify_password
12
+ from meerschaum.core.User._User import User
13
13
 
14
14
 
15
15
  def is_user_allowed_to_execute(
@@ -19,8 +19,6 @@ def is_user_allowed_to_execute(
19
19
  """
20
20
  Return a `SuccessTuple` indicating whether a given user is allowed to execute actions.
21
21
  """
22
- print(f"{debug=}")
23
- print(f"{user=}")
24
22
  if user is None:
25
23
  return True, "Success"
26
24
 
@@ -29,9 +27,7 @@ def is_user_allowed_to_execute(
29
27
  if user_type == 'admin':
30
28
  return True, "Success"
31
29
 
32
- from meerschaum.config import get_config
33
-
34
- allow_non_admin = get_config('system', 'api', 'permissions', 'actions', 'non_admin')
30
+ allow_non_admin = mrsm.get_config('system', 'api', 'permissions', 'actions', 'non_admin')
35
31
  if not allow_non_admin:
36
32
  return False, "The administrator for this server has not allowed users to perform actions."
37
33
 
meerschaum/jobs/_Job.py CHANGED
@@ -30,7 +30,7 @@ if TYPE_CHECKING:
30
30
  from meerschaum.jobs._Executor import Executor
31
31
 
32
32
  BANNED_CHARS: List[str] = [
33
- ',', ';', "'", '"',
33
+ ',', ';', "'", '"', '.', '$', '#', '=', '*', '&', '!', '`', '~',
34
34
  ]
35
35
  RESTART_FLAGS: List[str] = [
36
36
  '-s',
@@ -150,6 +150,13 @@ def get_filtered_jobs(
150
150
  }
151
151
 
152
152
  jobs_to_return = {}
153
+ filter_list_without_underscores = [name for name in filter_list if not name.startswith('_')]
154
+ filter_list_with_underscores = [name for name in filter_list if name.startswith('_')]
155
+ if (
156
+ filter_list_without_underscores and not filter_list_with_underscores
157
+ or filter_list_with_underscores and not filter_list_without_underscores
158
+ ):
159
+ pass
153
160
  for name in filter_list:
154
161
  job = jobs.get(name, None)
155
162
  if job is None:
@@ -161,6 +168,14 @@ def get_filtered_jobs(
161
168
  continue
162
169
  jobs_to_return[name] = job
163
170
 
171
+ if not jobs_to_return and filter_list_with_underscores:
172
+ names_to_exclude = [name.lstrip('_') for name in filter_list_with_underscores]
173
+ return {
174
+ name: job
175
+ for name, job in jobs.items()
176
+ if name not in names_to_exclude
177
+ }
178
+
164
179
  return jobs_to_return
165
180
 
166
181
 
@@ -7,6 +7,8 @@ Utility functions for working with DataFrames.
7
7
  """
8
8
 
9
9
  from __future__ import annotations
10
+
11
+ import pathlib
10
12
  from datetime import datetime, timezone
11
13
  from collections import defaultdict
12
14