meerschaum 2.7.9__py3-none-any.whl → 2.8.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. meerschaum/_internal/arguments/_parser.py +17 -5
  2. meerschaum/_internal/term/TermPageHandler.py +1 -1
  3. meerschaum/_internal/term/__init__.py +1 -1
  4. meerschaum/actions/api.py +36 -10
  5. meerschaum/actions/copy.py +3 -1
  6. meerschaum/actions/index.py +1 -1
  7. meerschaum/actions/show.py +7 -7
  8. meerschaum/actions/sync.py +5 -1
  9. meerschaum/actions/verify.py +14 -1
  10. meerschaum/api/__init__.py +77 -41
  11. meerschaum/api/_exceptions.py +18 -0
  12. meerschaum/api/dash/__init__.py +4 -2
  13. meerschaum/api/dash/callbacks/dashboard.py +30 -1
  14. meerschaum/api/dash/components.py +2 -2
  15. meerschaum/api/dash/webterm.py +23 -4
  16. meerschaum/api/models/_pipes.py +8 -8
  17. meerschaum/api/resources/static/css/dash.css +2 -2
  18. meerschaum/api/resources/templates/termpage.html +5 -1
  19. meerschaum/api/routes/__init__.py +15 -12
  20. meerschaum/api/routes/_connectors.py +30 -28
  21. meerschaum/api/routes/_index.py +16 -7
  22. meerschaum/api/routes/_misc.py +30 -22
  23. meerschaum/api/routes/_pipes.py +244 -148
  24. meerschaum/api/routes/_plugins.py +58 -47
  25. meerschaum/api/routes/_users.py +39 -31
  26. meerschaum/api/routes/_version.py +8 -10
  27. meerschaum/api/routes/_webterm.py +2 -2
  28. meerschaum/config/_default.py +10 -0
  29. meerschaum/config/_version.py +1 -1
  30. meerschaum/config/static/__init__.py +5 -2
  31. meerschaum/connectors/api/_APIConnector.py +4 -3
  32. meerschaum/connectors/api/_login.py +21 -17
  33. meerschaum/connectors/api/_pipes.py +1 -0
  34. meerschaum/connectors/api/_request.py +9 -10
  35. meerschaum/connectors/sql/_cli.py +11 -3
  36. meerschaum/connectors/sql/_instance.py +1 -1
  37. meerschaum/connectors/sql/_pipes.py +77 -57
  38. meerschaum/connectors/sql/_sql.py +26 -9
  39. meerschaum/core/Pipe/__init__.py +2 -0
  40. meerschaum/core/Pipe/_attributes.py +13 -2
  41. meerschaum/core/Pipe/_data.py +85 -0
  42. meerschaum/core/Pipe/_deduplicate.py +6 -8
  43. meerschaum/core/Pipe/_sync.py +63 -30
  44. meerschaum/core/Pipe/_verify.py +242 -77
  45. meerschaum/core/User/__init__.py +2 -6
  46. meerschaum/jobs/_Job.py +1 -1
  47. meerschaum/jobs/__init__.py +15 -0
  48. meerschaum/utils/dataframe.py +2 -0
  49. meerschaum/utils/dtypes/sql.py +26 -0
  50. meerschaum/utils/formatting/_pipes.py +1 -1
  51. meerschaum/utils/misc.py +11 -7
  52. meerschaum/utils/packages/_packages.py +1 -1
  53. meerschaum/utils/sql.py +6 -2
  54. {meerschaum-2.7.9.dist-info → meerschaum-2.8.0.dist-info}/METADATA +4 -4
  55. {meerschaum-2.7.9.dist-info → meerschaum-2.8.0.dist-info}/RECORD +61 -60
  56. {meerschaum-2.7.9.dist-info → meerschaum-2.8.0.dist-info}/LICENSE +0 -0
  57. {meerschaum-2.7.9.dist-info → meerschaum-2.8.0.dist-info}/NOTICE +0 -0
  58. {meerschaum-2.7.9.dist-info → meerschaum-2.8.0.dist-info}/WHEEL +0 -0
  59. {meerschaum-2.7.9.dist-info → meerschaum-2.8.0.dist-info}/entry_points.txt +0 -0
  60. {meerschaum-2.7.9.dist-info → meerschaum-2.8.0.dist-info}/top_level.txt +0 -0
  61. {meerschaum-2.7.9.dist-info → meerschaum-2.8.0.dist-info}/zip-safe +0 -0
@@ -319,16 +319,42 @@ def sync(
319
319
  if debug:
320
320
  dprint("Successfully synced the first chunk, attemping the rest...")
321
321
 
322
- failed_chunks = []
323
322
  def _process_chunk(_chunk):
324
- try:
325
- _chunk_success, _chunk_msg = _sync(p, _chunk)
326
- except Exception as e:
327
- _chunk_success, _chunk_msg = False, str(e)
328
- if not _chunk_success:
329
- failed_chunks.append(_chunk)
323
+ _chunk_attempts = 0
324
+ _max_chunk_attempts = 3
325
+ while _chunk_attempts < _max_chunk_attempts:
326
+ try:
327
+ _chunk_success, _chunk_msg = _sync(p, _chunk)
328
+ except Exception as e:
329
+ _chunk_success, _chunk_msg = False, str(e)
330
+ if _chunk_success:
331
+ break
332
+ _chunk_attempts += 1
333
+ _sleep_seconds = _chunk_attempts ** 2
334
+ warn(
335
+ (
336
+ f"Failed to sync chunk to {self} "
337
+ + f"(attempt {_chunk_attempts} / {_max_chunk_attempts}).\n"
338
+ + f"Sleeping for {_sleep_seconds} second"
339
+ + ('s' if _sleep_seconds != 1 else '')
340
+ + ":\n{_chunk_msg}"
341
+ ),
342
+ stack=False,
343
+ )
344
+ time.sleep(_sleep_seconds)
345
+
346
+ num_rows_str = (
347
+ f"{num_rows:,} rows"
348
+ if (num_rows := len(_chunk)) != 1
349
+ else f"{num_rows} row"
350
+ )
330
351
  _chunk_msg = (
331
- self._get_chunk_label(_chunk, dt_col)
352
+ (
353
+ "Synced"
354
+ if _chunk_success
355
+ else "Failed to sync"
356
+ ) + f" a chunk ({num_rows_str}) to {p}:\n"
357
+ + self._get_chunk_label(_chunk, dt_col)
332
358
  + '\n'
333
359
  + _chunk_msg
334
360
  )
@@ -351,27 +377,16 @@ def sync(
351
377
  )
352
378
  chunk_messages = [chunk_msg for _, chunk_msg in results]
353
379
  success_bools = [chunk_success for chunk_success, _ in results]
380
+ num_successes = len([chunk_success for chunk_success, _ in results if chunk_success])
381
+ num_failures = len([chunk_success for chunk_success, _ in results if not chunk_success])
354
382
  success = all(success_bools)
355
383
  msg = (
356
- f'Synced {len(chunk_messages)} chunk'
384
+ 'Synced '
385
+ + f'{len(chunk_messages):,} chunk'
357
386
  + ('s' if len(chunk_messages) != 1 else '')
358
- + f' to {p}:\n\n'
387
+ + f' to {p}\n({num_successes} succeeded, {num_failures} failed):\n\n'
359
388
  + '\n\n'.join(chunk_messages).lstrip().rstrip()
360
389
  ).lstrip().rstrip()
361
-
362
- ### If some chunks succeeded, retry the failures.
363
- retry_success = True
364
- if not success and any(success_bools):
365
- if debug:
366
- dprint("Retrying failed chunks...")
367
- chunks_to_retry = [c for c in failed_chunks]
368
- failed_chunks = []
369
- for chunk in chunks_to_retry:
370
- chunk_success, chunk_msg = _process_chunk(chunk)
371
- msg += f"\n\nRetried chunk:\n{chunk_msg}\n"
372
- retry_success = retry_success and chunk_success
373
-
374
- success = success and retry_success
375
390
  return success, msg
376
391
 
377
392
  ### Cast to a dataframe and ensure datatypes are what we expect.
@@ -474,6 +489,7 @@ def get_sync_time(
474
489
  params: Optional[Dict[str, Any]] = None,
475
490
  newest: bool = True,
476
491
  apply_backtrack_interval: bool = False,
492
+ remote: bool = False,
477
493
  round_down: bool = False,
478
494
  debug: bool = False
479
495
  ) -> Union['datetime', int, None]:
@@ -493,6 +509,10 @@ def get_sync_time(
493
509
  apply_backtrack_interval: bool, default False
494
510
  If `True`, subtract the backtrack interval from the sync time.
495
511
 
512
+ remote: bool, default False
513
+ If `True` and the instance connector supports it, return the sync time
514
+ for the remote table definition.
515
+
496
516
  round_down: bool, default False
497
517
  If `True`, round down the datetime value to the nearest minute.
498
518
 
@@ -506,17 +526,30 @@ def get_sync_time(
506
526
  """
507
527
  from meerschaum.utils.venv import Venv
508
528
  from meerschaum.connectors import get_connector_plugin
509
- from meerschaum.utils.misc import round_time
529
+ from meerschaum.utils.misc import round_time, filter_keywords
530
+ from meerschaum.utils.warnings import warn
510
531
 
511
532
  if not self.columns.get('datetime', None):
512
533
  return None
513
534
 
514
- with Venv(get_connector_plugin(self.instance_connector)):
515
- sync_time = self.instance_connector.get_sync_time(
535
+ connector = self.instance_connector if not remote else self.connector
536
+ with Venv(get_connector_plugin(connector)):
537
+ if not hasattr(connector, 'get_sync_time'):
538
+ warn(
539
+ f"Connectors of type '{connector.type}' "
540
+ "do not implement `get_sync_time().",
541
+ stack=False,
542
+ )
543
+ return None
544
+ sync_time = connector.get_sync_time(
516
545
  self,
517
- params=params,
518
- newest=newest,
519
- debug=debug,
546
+ **filter_keywords(
547
+ connector.get_sync_time,
548
+ params=params,
549
+ newest=newest,
550
+ remote=remote,
551
+ debug=debug,
552
+ )
520
553
  )
521
554
 
522
555
  if round_down and isinstance(sync_time, datetime):
@@ -11,6 +11,7 @@ from datetime import datetime, timedelta
11
11
  import meerschaum as mrsm
12
12
  from meerschaum.utils.typing import SuccessTuple, Any, Optional, Union, Tuple, Dict
13
13
  from meerschaum.utils.warnings import warn, info
14
+ from meerschaum.config.static import STATIC_CONFIG
14
15
 
15
16
 
16
17
  def verify(
@@ -22,6 +23,9 @@ def verify(
22
23
  bounded: Optional[bool] = None,
23
24
  deduplicate: bool = False,
24
25
  workers: Optional[int] = None,
26
+ batchsize: Optional[int] = None,
27
+ skip_chunks_with_greater_rowcounts: bool = False,
28
+ check_rowcounts_only: bool = False,
25
29
  debug: bool = False,
26
30
  **kwargs: Any
27
31
  ) -> SuccessTuple:
@@ -53,6 +57,17 @@ def verify(
53
57
  If provided, limit the verification to this many threads.
54
58
  Use a value of `1` to sync chunks in series.
55
59
 
60
+ batchsize: Optional[int], default None
61
+ If provided, sync this many chunks in parallel.
62
+ Defaults to `Pipe.get_num_workers()`.
63
+
64
+ skip_chunks_with_greater_rowcounts: bool, default False
65
+ If `True`, compare the rowcounts for a chunk and skip syncing if the pipe's
66
+ chunk rowcount equals or exceeds the remote's rowcount.
67
+
68
+ check_rowcounts_only: bool, default False
69
+ If `True`, only compare rowcounts and print chunks which are out-of-sync.
70
+
56
71
  debug: bool, default False
57
72
  Verbosity toggle.
58
73
 
@@ -64,8 +79,10 @@ def verify(
64
79
  A SuccessTuple indicating whether the pipe was successfully resynced.
65
80
  """
66
81
  from meerschaum.utils.pool import get_pool
82
+ from meerschaum.utils.formatting import make_header
67
83
  from meerschaum.utils.misc import interval_str
68
84
  workers = self.get_num_workers(workers)
85
+ check_rowcounts = skip_chunks_with_greater_rowcounts or check_rowcounts_only
69
86
 
70
87
  ### Skip configured bounding in parameters
71
88
  ### if `bounded` is explicitly `False`.
@@ -83,21 +100,26 @@ def verify(
83
100
  if bound_time is not None
84
101
  else self.get_sync_time(newest=False, debug=debug)
85
102
  )
103
+ if begin is None:
104
+ remote_oldest_sync_time = self.get_sync_time(newest=False, remote=True, debug=debug)
105
+ begin = remote_oldest_sync_time
86
106
  if bounded and end is None:
87
107
  end = self.get_sync_time(newest=True, debug=debug)
108
+ if end is None:
109
+ remote_newest_sync_time = self.get_sync_time(newest=True, remote=True, debug=debug)
110
+ end = remote_newest_sync_time
111
+ if end is not None:
112
+ end += (
113
+ timedelta(minutes=1)
114
+ if hasattr(end, 'tzinfo')
115
+ else 1
116
+ )
88
117
 
89
118
  begin, end = self.parse_date_bounds(begin, end)
90
-
91
- if bounded and end is not None:
92
- end += (
93
- timedelta(minutes=1)
94
- if isinstance(end, datetime)
95
- else 1
96
- )
97
-
98
- cannot_determine_bounds = not self.exists(debug=debug)
119
+ cannot_determine_bounds = bounded and begin is None and end is None
99
120
 
100
121
  if cannot_determine_bounds:
122
+ warn(f"Cannot determine sync bounds for {self}. Syncing instead...", stack=False)
101
123
  sync_success, sync_msg = self.sync(
102
124
  begin=begin,
103
125
  end=end,
@@ -160,17 +182,16 @@ def verify(
160
182
  else chunk_bounds[-1][0]
161
183
  )
162
184
  )
185
+ message_header = f"{begin_to_print} - {end_to_print}"
163
186
 
164
187
  info(
165
188
  f"Verifying {self}:\n Syncing {len(chunk_bounds)} chunk"
166
189
  + ('s' if len(chunk_bounds) != 1 else '')
167
190
  + f" ({'un' if not bounded else ''}bounded)"
168
191
  + f" of size '{interval_str(chunk_interval)}'"
169
- + f" between '{begin_to_print}' and '{end_to_print}'."
192
+ + f" between '{begin_to_print}' and '{end_to_print}'.\n"
170
193
  )
171
194
 
172
- pool = get_pool(workers=workers)
173
-
174
195
  ### Dictionary of the form bounds -> success_tuple, e.g.:
175
196
  ### {
176
197
  ### (2023-01-01, 2023-01-02): (True, "Success")
@@ -180,87 +201,169 @@ def verify(
180
201
  chunk_begin_and_end: Tuple[
181
202
  Union[int, datetime],
182
203
  Union[int, datetime]
183
- ]
204
+ ],
205
+ _workers: Optional[int] = 1,
184
206
  ):
185
207
  if chunk_begin_and_end in bounds_success_tuples:
186
208
  return chunk_begin_and_end, bounds_success_tuples[chunk_begin_and_end]
187
209
 
188
210
  chunk_begin, chunk_end = chunk_begin_and_end
211
+ do_sync = True
212
+ chunk_success, chunk_msg = False, "Did not sync chunk."
213
+ if check_rowcounts:
214
+ existing_rowcount = self.get_rowcount(begin=chunk_begin, end=chunk_end, debug=debug)
215
+ remote_rowcount = self.get_rowcount(
216
+ begin=chunk_begin,
217
+ end=chunk_end,
218
+ remote=True,
219
+ debug=debug,
220
+ )
221
+ checked_rows_str = (
222
+ f"checked {existing_rowcount} row"
223
+ + ("s" if existing_rowcount != 1 else '')
224
+ + f" vs {remote_rowcount} remote"
225
+ )
226
+ if (
227
+ existing_rowcount is not None
228
+ and remote_rowcount is not None
229
+ and existing_rowcount >= remote_rowcount
230
+ ):
231
+ do_sync = False
232
+ chunk_success, chunk_msg = True, (
233
+ "Row-count is up-to-date "
234
+ f"({checked_rows_str})."
235
+ )
236
+ elif check_rowcounts_only:
237
+ do_sync = False
238
+ chunk_success, chunk_msg = True, (
239
+ f"Row-counts are out-of-sync ({checked_rows_str})."
240
+ )
241
+
189
242
  chunk_success, chunk_msg = self.sync(
190
243
  begin=chunk_begin,
191
244
  end=chunk_end,
192
245
  params=params,
193
- workers=1,
246
+ workers=_workers,
194
247
  debug=debug,
195
248
  **kwargs
196
- )
249
+ ) if do_sync else (chunk_success, chunk_msg)
197
250
  chunk_msg = chunk_msg.strip()
198
251
  if ' - ' not in chunk_msg:
199
252
  chunk_label = f"{chunk_begin} - {chunk_end}"
200
- chunk_msg = f'{chunk_label}\n{chunk_msg}'
253
+ chunk_msg = f'Verified chunk for {self}:\n{chunk_label}\n{chunk_msg}'
201
254
  mrsm.pprint((chunk_success, chunk_msg))
202
255
  return chunk_begin_and_end, (chunk_success, chunk_msg)
203
256
 
204
257
  ### If we have more than one chunk, attempt to sync the first one and return if its fails.
205
258
  if len(chunk_bounds) > 1:
206
259
  first_chunk_bounds = chunk_bounds[0]
260
+ first_label = f"{first_chunk_bounds[0]} - {first_chunk_bounds[1]}"
261
+ info(f"Verifying first chunk for {self}:\n {first_label}")
207
262
  (
208
263
  (first_begin, first_end),
209
264
  (first_success, first_msg)
210
- ) = process_chunk_bounds(first_chunk_bounds)
265
+ ) = process_chunk_bounds(first_chunk_bounds, _workers=workers)
211
266
  if not first_success:
212
267
  return (
213
268
  first_success,
214
- f"\n{first_begin} - {first_end}\n"
269
+ f"\n{first_label}\n"
215
270
  + f"Failed to sync first chunk:\n{first_msg}"
216
271
  )
217
272
  bounds_success_tuples[first_chunk_bounds] = (first_success, first_msg)
273
+ info(f"Completed first chunk for {self}:\n {first_label}\n")
218
274
 
219
- bounds_success_tuples.update(dict(pool.map(process_chunk_bounds, chunk_bounds)))
220
- bounds_success_bools = {bounds: tup[0] for bounds, tup in bounds_success_tuples.items()}
275
+ pool = get_pool(workers=workers)
276
+ batches = self.get_chunk_bounds_batches(chunk_bounds, batchsize=batchsize, workers=workers)
221
277
 
222
- message_header = f"{begin_to_print} - {end_to_print}"
223
- if all(bounds_success_bools.values()):
224
- msg = get_chunks_success_message(bounds_success_tuples, header=message_header)
225
- if deduplicate:
226
- deduplicate_success, deduplicate_msg = self.deduplicate(
227
- begin=begin,
228
- end=end,
229
- params=params,
230
- workers=workers,
231
- debug=debug,
232
- **kwargs
278
+ def process_batch(
279
+ batch_chunk_bounds: Tuple[
280
+ Tuple[Union[datetime, int, None], Union[datetime, int, None]],
281
+ ...
282
+ ]
283
+ ):
284
+ _batch_begin = batch_chunk_bounds[0][0]
285
+ _batch_end = batch_chunk_bounds[-1][-1]
286
+ batch_message_header = f"{_batch_begin} - {_batch_end}"
287
+ batch_bounds_success_tuples = dict(pool.map(process_chunk_bounds, batch_chunk_bounds))
288
+ bounds_success_tuples.update(batch_bounds_success_tuples)
289
+ batch_bounds_success_bools = {
290
+ bounds: tup[0]
291
+ for bounds, tup in batch_bounds_success_tuples.items()
292
+ }
293
+
294
+ if all(batch_bounds_success_bools.values()):
295
+ msg = get_chunks_success_message(
296
+ batch_bounds_success_tuples,
297
+ header=batch_message_header,
298
+ check_rowcounts_only=check_rowcounts_only,
299
+ )
300
+ if deduplicate:
301
+ deduplicate_success, deduplicate_msg = self.deduplicate(
302
+ begin=_batch_begin,
303
+ end=_batch_end,
304
+ params=params,
305
+ workers=workers,
306
+ debug=debug,
307
+ **kwargs
308
+ )
309
+ return deduplicate_success, msg + '\n\n' + deduplicate_msg
310
+ return True, msg
311
+
312
+ batch_chunk_bounds_to_resync = [
313
+ bounds
314
+ for bounds, success in zip(batch_chunk_bounds, batch_bounds_success_bools)
315
+ if not success
316
+ ]
317
+ batch_bounds_to_print = [
318
+ f"{bounds[0]} - {bounds[1]}"
319
+ for bounds in batch_chunk_bounds_to_resync
320
+ ]
321
+ if batch_bounds_to_print:
322
+ warn(
323
+ "Will resync the following failed chunks:\n "
324
+ + '\n '.join(batch_bounds_to_print),
325
+ stack=False,
233
326
  )
234
- return deduplicate_success, msg + '\n\n' + deduplicate_msg
235
- return True, msg
236
-
237
- chunk_bounds_to_resync = [
238
- bounds
239
- for bounds, success in zip(chunk_bounds, bounds_success_bools)
240
- if not success
241
- ]
242
- bounds_to_print = [
243
- f"{bounds[0]} - {bounds[1]}"
244
- for bounds in chunk_bounds_to_resync
245
- ]
246
- if bounds_to_print:
247
- warn(
248
- f"Will resync the following failed chunks:\n "
249
- + '\n '.join(bounds_to_print),
250
- stack=False,
251
- )
252
-
253
- retry_bounds_success_tuples = dict(pool.map(process_chunk_bounds, chunk_bounds_to_resync))
254
- bounds_success_tuples.update(retry_bounds_success_tuples)
255
- retry_bounds_success_bools = {
256
- bounds: tup[0]
257
- for bounds, tup in retry_bounds_success_tuples.items()
258
- }
259
327
 
260
- if all(retry_bounds_success_bools.values()):
261
- message = (
262
- get_chunks_success_message(bounds_success_tuples, header=message_header)
263
- + f"\nRetried {len(chunk_bounds_to_resync)} chunks."
328
+ retry_bounds_success_tuples = dict(pool.map(
329
+ process_chunk_bounds,
330
+ batch_chunk_bounds_to_resync
331
+ ))
332
+ batch_bounds_success_tuples.update(retry_bounds_success_tuples)
333
+ bounds_success_tuples.update(retry_bounds_success_tuples)
334
+ retry_bounds_success_bools = {
335
+ bounds: tup[0]
336
+ for bounds, tup in retry_bounds_success_tuples.items()
337
+ }
338
+
339
+ if all(retry_bounds_success_bools.values()):
340
+ chunks_message = (
341
+ get_chunks_success_message(
342
+ batch_bounds_success_tuples,
343
+ header=batch_message_header,
344
+ check_rowcounts_only=check_rowcounts_only,
345
+ ) + f"\nRetried {len(batch_chunk_bounds_to_resync)} chunk" + (
346
+ 's'
347
+ if len(batch_chunk_bounds_to_resync) != 1
348
+ else ''
349
+ ) + "."
350
+ )
351
+ if deduplicate:
352
+ deduplicate_success, deduplicate_msg = self.deduplicate(
353
+ begin=_batch_begin,
354
+ end=_batch_end,
355
+ params=params,
356
+ workers=workers,
357
+ debug=debug,
358
+ **kwargs
359
+ )
360
+ return deduplicate_success, chunks_message + '\n\n' + deduplicate_msg
361
+ return True, chunks_message
362
+
363
+ batch_chunks_message = get_chunks_success_message(
364
+ batch_bounds_success_tuples,
365
+ header=batch_message_header,
366
+ check_rowcounts_only=check_rowcounts_only,
264
367
  )
265
368
  if deduplicate:
266
369
  deduplicate_success, deduplicate_msg = self.deduplicate(
@@ -271,26 +374,57 @@ def verify(
271
374
  debug=debug,
272
375
  **kwargs
273
376
  )
274
- return deduplicate_success, message + '\n\n' + deduplicate_msg
275
- return True, message
276
-
277
- message = get_chunks_success_message(bounds_success_tuples, header=message_header)
278
- if deduplicate:
279
- deduplicate_success, deduplicate_msg = self.deduplicate(
280
- begin=begin,
281
- end=end,
282
- params=params,
283
- workers=workers,
284
- debug=debug,
285
- **kwargs
377
+ return deduplicate_success, batch_chunks_message + '\n\n' + deduplicate_msg
378
+ return False, batch_chunks_message
379
+
380
+ num_batches = len(batches)
381
+ for batch_i, batch in enumerate(batches):
382
+ batch_begin = batch[0][0]
383
+ batch_end = batch[-1][-1]
384
+ batch_counter_str = f"({(batch_i + 1):,}/{num_batches:,})"
385
+ batch_label = f"batch {batch_counter_str}:\n{batch_begin} - {batch_end}"
386
+ retry_failed_batch = True
387
+ try:
388
+ for_self = 'for ' + str(self)
389
+ info(f"Verifying {batch_label.replace(':\n', ' ' + for_self + '...\n ')}\n")
390
+ batch_success, batch_msg = process_batch(batch)
391
+ except (KeyboardInterrupt, Exception) as e:
392
+ batch_success = False
393
+ batch_msg = str(e)
394
+ retry_failed_batch = False
395
+
396
+ batch_msg_to_print = (
397
+ f"{make_header('Completed batch ' + batch_counter_str + ' ' + for_self + ':')}\n{batch_msg}"
286
398
  )
287
- return deduplicate_success, message + '\n\n' + deduplicate_msg
288
- return False, message
399
+ mrsm.pprint((batch_success, batch_msg_to_print))
400
+
401
+ if not batch_success and retry_failed_batch:
402
+ info(f"Retrying batch {batch_counter_str}...")
403
+ retry_batch_success, retry_batch_msg = process_batch(batch)
404
+ retry_batch_msg_to_print = (
405
+ f"Retried {make_header('batch ' + batch_label)}\n{retry_batch_msg}"
406
+ )
407
+ mrsm.pprint((retry_batch_success, retry_batch_msg_to_print))
408
+
409
+ batch_success = retry_batch_success
410
+ batch_msg = retry_batch_msg
411
+
412
+ if not batch_success:
413
+ return False, f"Failed to verify {batch_label}:\n\n{batch_msg}"
414
+
415
+ chunks_message = get_chunks_success_message(
416
+ bounds_success_tuples,
417
+ header=message_header,
418
+ check_rowcounts_only=check_rowcounts_only,
419
+ )
420
+ return True, chunks_message
421
+
289
422
 
290
423
 
291
424
  def get_chunks_success_message(
292
425
  chunk_success_tuples: Dict[Tuple[Any, Any], SuccessTuple],
293
426
  header: str = '',
427
+ check_rowcounts_only: bool = False,
294
428
  ) -> str:
295
429
  """
296
430
  Sum together all of the inserts and updates from the chunks.
@@ -319,10 +453,19 @@ def get_chunks_success_message(
319
453
  inserts = [stat['inserted'] for stat in chunk_stats]
320
454
  updates = [stat['updated'] for stat in chunk_stats]
321
455
  upserts = [stat['upserted'] for stat in chunk_stats]
456
+ checks = [stat['checked'] for stat in chunk_stats]
457
+ out_of_sync_bounds_messages = {
458
+ bounds: message
459
+ for bounds, (success, message) in chunk_success_tuples.items()
460
+ if 'out-of-sync' in message
461
+ } if check_rowcounts_only else {}
462
+
322
463
  num_inserted = sum(inserts)
323
464
  num_updated = sum(updates)
324
465
  num_upserted = sum(upserts)
466
+ num_checked = sum(checks)
325
467
  num_fails = len(fail_chunk_bounds_tuples)
468
+ num_out_of_sync = len(out_of_sync_bounds_messages)
326
469
 
327
470
  header = (header + "\n") if header else ""
328
471
  stats_msg = items_str(
@@ -330,22 +473,38 @@ def get_chunks_success_message(
330
473
  ([f'inserted {num_inserted:,}'] if num_inserted else [])
331
474
  + ([f'updated {num_updated:,}'] if num_updated else [])
332
475
  + ([f'upserted {num_upserted:,}'] if num_upserted else [])
476
+ + ([f'checked {num_checked:,}'] if num_checked else [])
333
477
  ) or ['synced 0'],
334
478
  quotes=False,
335
479
  and_=False,
336
480
  )
337
481
 
338
482
  success_msg = (
339
- f"Successfully synced {len(chunk_success_tuples):,} chunk"
483
+ "Successfully "
484
+ + ('synced' if not check_rowcounts_only else 'checked')
485
+ + f" {len(chunk_success_tuples):,} chunk"
340
486
  + ('s' if len(chunk_success_tuples) != 1 else '')
341
487
  + '\n(' + stats_msg
342
488
  + ' rows in total).'
343
489
  )
490
+ if check_rowcounts_only:
491
+ success_msg += (
492
+ f"\n\nFound {num_out_of_sync} chunk"
493
+ + ('s' if num_out_of_sync != 1 else '')
494
+ + ' to be out-of-sync'
495
+ + ('.' if num_out_of_sync == 0 else ':\n\n ')
496
+ + '\n '.join(
497
+ [
498
+ f'{lbound} - {rbound}'
499
+ for lbound, rbound in out_of_sync_bounds_messages
500
+ ]
501
+ )
502
+ )
344
503
  fail_msg = (
345
504
  ''
346
505
  if num_fails == 0
347
506
  else (
348
- f"\n\nFailed to sync {num_fails} chunk"
507
+ f"\n\nFailed to sync {num_fails:,} chunk"
349
508
  + ('s' if num_fails != 1 else '') + ":\n"
350
509
  + '\n'.join([
351
510
  f"{fail_begin} - {fail_end}\n{msg}\n"
@@ -436,9 +595,15 @@ def get_bound_time(self, debug: bool = False) -> Union[datetime, int, None]:
436
595
 
437
596
  bound_time = sync_time - bound_interval
438
597
  oldest_sync_time = self.get_sync_time(newest=False, debug=debug)
598
+ max_bound_time_days = STATIC_CONFIG['pipes']['max_bound_time_days']
599
+
600
+ extreme_sync_times_delta = (
601
+ hasattr(oldest_sync_time, 'tzinfo')
602
+ and (sync_time - oldest_sync_time) >= timedelta(days=max_bound_time_days)
603
+ )
439
604
 
440
605
  return (
441
606
  bound_time
442
- if bound_time > oldest_sync_time
607
+ if bound_time > oldest_sync_time or extreme_sync_times_delta
443
608
  else None
444
609
  )
@@ -9,7 +9,7 @@ Manager users' metadata via the User class
9
9
  from typing import Optional
10
10
 
11
11
  import meerschaum as mrsm
12
- from meerschaum.core.User._User import User, hash_password, verify_password
12
+ from meerschaum.core.User._User import User
13
13
 
14
14
 
15
15
  def is_user_allowed_to_execute(
@@ -19,8 +19,6 @@ def is_user_allowed_to_execute(
19
19
  """
20
20
  Return a `SuccessTuple` indicating whether a given user is allowed to execute actions.
21
21
  """
22
- print(f"{debug=}")
23
- print(f"{user=}")
24
22
  if user is None:
25
23
  return True, "Success"
26
24
 
@@ -29,9 +27,7 @@ def is_user_allowed_to_execute(
29
27
  if user_type == 'admin':
30
28
  return True, "Success"
31
29
 
32
- from meerschaum.config import get_config
33
-
34
- allow_non_admin = get_config('system', 'api', 'permissions', 'actions', 'non_admin')
30
+ allow_non_admin = mrsm.get_config('system', 'api', 'permissions', 'actions', 'non_admin')
35
31
  if not allow_non_admin:
36
32
  return False, "The administrator for this server has not allowed users to perform actions."
37
33
 
meerschaum/jobs/_Job.py CHANGED
@@ -30,7 +30,7 @@ if TYPE_CHECKING:
30
30
  from meerschaum.jobs._Executor import Executor
31
31
 
32
32
  BANNED_CHARS: List[str] = [
33
- ',', ';', "'", '"',
33
+ ',', ';', "'", '"', '.', '$', '#', '=', '*', '&', '!', '`', '~',
34
34
  ]
35
35
  RESTART_FLAGS: List[str] = [
36
36
  '-s',
@@ -150,6 +150,13 @@ def get_filtered_jobs(
150
150
  }
151
151
 
152
152
  jobs_to_return = {}
153
+ filter_list_without_underscores = [name for name in filter_list if not name.startswith('_')]
154
+ filter_list_with_underscores = [name for name in filter_list if name.startswith('_')]
155
+ if (
156
+ filter_list_without_underscores and not filter_list_with_underscores
157
+ or filter_list_with_underscores and not filter_list_without_underscores
158
+ ):
159
+ pass
153
160
  for name in filter_list:
154
161
  job = jobs.get(name, None)
155
162
  if job is None:
@@ -161,6 +168,14 @@ def get_filtered_jobs(
161
168
  continue
162
169
  jobs_to_return[name] = job
163
170
 
171
+ if not jobs_to_return and filter_list_with_underscores:
172
+ names_to_exclude = [name.lstrip('_') for name in filter_list_with_underscores]
173
+ return {
174
+ name: job
175
+ for name, job in jobs.items()
176
+ if name not in names_to_exclude
177
+ }
178
+
164
179
  return jobs_to_return
165
180
 
166
181
 
@@ -7,6 +7,8 @@ Utility functions for working with DataFrames.
7
7
  """
8
8
 
9
9
  from __future__ import annotations
10
+
11
+ import pathlib
10
12
  from datetime import datetime, timezone
11
13
  from collections import defaultdict
12
14