meerschaum 2.7.2__py3-none-any.whl → 2.7.3__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -50,6 +50,7 @@ def sync(
50
50
  retries: int = 10,
51
51
  min_seconds: int = 1,
52
52
  check_existing: bool = True,
53
+ enforce_dtypes: bool = True,
53
54
  blocking: bool = True,
54
55
  workers: Optional[int] = None,
55
56
  callback: Optional[Callable[[Tuple[bool, str]], Any]] = None,
@@ -89,6 +90,10 @@ def sync(
89
90
  check_existing: bool, default True
90
91
  If `True`, pull and diff with existing data from the pipe.
91
92
 
93
+ enforce_dtypes: bool, default True
94
+ If `True`, enforce dtypes on incoming data.
95
+ Set this to `False` if the incoming rows are expected to be of the correct dtypes.
96
+
92
97
  blocking: bool, default True
93
98
  If `True`, wait for sync to finish and return its result, otherwise
94
99
  asyncronously sync (oxymoron?) and return success. Defaults to `True`.
@@ -123,8 +128,6 @@ def sync(
123
128
  A `SuccessTuple` of success (`bool`) and message (`str`).
124
129
  """
125
130
  from meerschaum.utils.debug import dprint, _checkpoint
126
- from meerschaum.connectors import custom_types
127
- from meerschaum.plugins import Plugin
128
131
  from meerschaum.utils.formatting import get_console
129
132
  from meerschaum.utils.venv import Venv
130
133
  from meerschaum.connectors import get_connector_plugin
@@ -366,7 +369,12 @@ def sync(
366
369
  return success, msg
367
370
 
368
371
  ### Cast to a dataframe and ensure datatypes are what we expect.
369
- df = self.enforce_dtypes(df, chunksize=chunksize, debug=debug)
372
+ df = self.enforce_dtypes(
373
+ df,
374
+ chunksize=chunksize,
375
+ enforce=enforce_dtypes,
376
+ debug=debug,
377
+ )
370
378
 
371
379
  ### Capture `numeric`, `uuid`, `json`, and `bytes` columns.
372
380
  self._persist_new_json_columns(df, debug=debug)
@@ -162,7 +162,8 @@ def verify(
162
162
  )
163
163
 
164
164
  info(
165
- f"Verifying {self}:\n Syncing {len(chunk_bounds)} chunk" + ('s' if len(chunk_bounds) != 1 else '')
165
+ f"Verifying {self}:\n Syncing {len(chunk_bounds)} chunk"
166
+ + ('s' if len(chunk_bounds) != 1 else '')
166
167
  + f" ({'un' if not bounded else ''}bounded)"
167
168
  + f" of size '{interval_str(chunk_interval)}'"
168
169
  + f" between '{begin_to_print}' and '{end_to_print}'."
@@ -194,6 +195,9 @@ def verify(
194
195
  **kwargs
195
196
  )
196
197
  chunk_msg = chunk_msg.strip()
198
+ if ' - ' not in chunk_msg:
199
+ chunk_label = f"{chunk_begin} - {chunk_end}"
200
+ chunk_msg = f'{chunk_label}\n{chunk_msg}'
197
201
  mrsm.pprint((chunk_success, chunk_msg))
198
202
  return chunk_begin_and_end, (chunk_success, chunk_msg)
199
203
 
@@ -323,16 +327,16 @@ def get_chunks_success_message(
323
327
  header = (header + "\n") if header else ""
324
328
  stats_msg = items_str(
325
329
  (
326
- ([f'inserted {num_inserted}'] if num_inserted else [])
327
- + ([f'updated {num_updated}'] if num_updated else [])
328
- + ([f'upserted {num_upserted}'] if num_upserted else [])
330
+ ([f'inserted {num_inserted:,}'] if num_inserted else [])
331
+ + ([f'updated {num_updated:,}'] if num_updated else [])
332
+ + ([f'upserted {num_upserted:,}'] if num_upserted else [])
329
333
  ) or ['synced 0'],
330
334
  quotes=False,
331
335
  and_=False,
332
336
  )
333
337
 
334
338
  success_msg = (
335
- f"Successfully synced {len(chunk_success_tuples)} chunk"
339
+ f"Successfully synced {len(chunk_success_tuples):,} chunk"
336
340
  + ('s' if len(chunk_success_tuples) != 1 else '')
337
341
  + '\n(' + stats_msg
338
342
  + ' rows in total).'
@@ -342,7 +342,7 @@ def check_restart_jobs(
342
342
 
343
343
  def _check_restart_jobs_against_lock(*args, **kwargs):
344
344
  from meerschaum.config.paths import CHECK_JOBS_LOCK_PATH
345
- fasteners = mrsm.attempt_import('fasteners')
345
+ fasteners = mrsm.attempt_import('fasteners', lazy=False)
346
346
  lock = fasteners.InterProcessLock(CHECK_JOBS_LOCK_PATH)
347
347
  with lock:
348
348
  check_restart_jobs(*args, **kwargs)
@@ -390,6 +390,7 @@ def parse_df_datetimes(
390
390
  strip_timezone: bool = False,
391
391
  chunksize: Optional[int] = None,
392
392
  dtype_backend: str = 'numpy_nullable',
393
+ ignore_all: bool = False,
393
394
  debug: bool = False,
394
395
  ) -> 'pd.DataFrame':
395
396
  """
@@ -414,6 +415,9 @@ def parse_df_datetimes(
414
415
  use this as the datatypes backend.
415
416
  Accepted values are 'numpy_nullable' and 'pyarrow'.
416
417
 
418
+ ignore_all: bool, default False
419
+ If `True`, do not attempt to cast any columns to datetimes.
420
+
417
421
  debug: bool, default False
418
422
  Verbosity toggle.
419
423
 
@@ -504,7 +508,11 @@ def parse_df_datetimes(
504
508
  if 'datetime' in str(dtype)
505
509
  ]
506
510
  )
507
- cols_to_inspect = [col for col in pdf.columns if col not in ignore_cols]
511
+ cols_to_inspect = [
512
+ col
513
+ for col in pdf.columns
514
+ if col not in ignore_cols
515
+ ] if not ignore_all else []
508
516
 
509
517
  if len(cols_to_inspect) == 0:
510
518
  if debug:
@@ -1263,7 +1271,7 @@ def df_from_literal(
1263
1271
  import ast
1264
1272
  try:
1265
1273
  val = ast.literal_eval(literal)
1266
- except Exception as e:
1274
+ except Exception:
1267
1275
  warn(
1268
1276
  "Failed to parse value from string:\n" + f"{literal}" +
1269
1277
  "\n\nWill cast as a string instead."\
@@ -7,7 +7,7 @@ Utility functions for working with SQL data types.
7
7
  """
8
8
 
9
9
  from __future__ import annotations
10
- from meerschaum.utils.typing import Dict, Union, Tuple, List
10
+ from meerschaum.utils.typing import Dict, Union, Tuple
11
11
 
12
12
  NUMERIC_PRECISION_FLAVORS: Dict[str, Tuple[int, int]] = {
13
13
  'mariadb': (38, 20),
@@ -57,18 +57,7 @@ def colored_fallback(*args, **kw):
57
57
  return ' '.join(args)
58
58
 
59
59
  def translate_rich_to_termcolor(*colors) -> tuple:
60
- """Translate between rich and more_termcolor terminology.
61
- This is probably prone to breaking.
62
-
63
- Parameters
64
- ----------
65
- *colors :
66
-
67
-
68
- Returns
69
- -------
70
-
71
- """
60
+ """Translate between rich and more_termcolor terminology."""
72
61
  _colors = []
73
62
  for c in colors:
74
63
  _c_list = []
@@ -131,7 +120,7 @@ def _init():
131
120
  try:
132
121
  colorama.init(autoreset=False)
133
122
  success = True
134
- except Exception as e:
123
+ except Exception:
135
124
  import traceback
136
125
  traceback.print_exc()
137
126
  _attrs['ANSI'], _attrs['UNICODE'], _attrs['CHARSET'] = False, False, 'ascii'
@@ -219,7 +208,7 @@ def get_console():
219
208
  rich_console = attempt_import('rich.console')
220
209
  try:
221
210
  console = rich_console.Console(force_terminal=True, color_system='truecolor')
222
- except Exception as e:
211
+ except Exception:
223
212
  console = None
224
213
  return console
225
214
 
@@ -307,7 +296,6 @@ def format_success_tuple(
307
296
  calm: bool, default False
308
297
  If `True`, use the default emoji and color scheme.
309
298
  """
310
- from meerschaum.config.static import STATIC_CONFIG
311
299
  _init()
312
300
  try:
313
301
  status = 'success' if tup[0] else 'failure'
@@ -381,12 +369,9 @@ def print_options(
381
369
  If `True`, print the option's number in the list (1 index).
382
370
 
383
371
  """
384
- import os
385
372
  from meerschaum.utils.packages import import_rich
386
- from meerschaum.utils.formatting import make_header, highlight_pipes
387
- from meerschaum.actions import actions as _actions
388
- from meerschaum.utils.misc import get_cols_lines, string_width, iterate_chunks
389
-
373
+ from meerschaum.utils.formatting import highlight_pipes
374
+ from meerschaum.utils.misc import get_cols_lines, string_width
390
375
 
391
376
  if options is None:
392
377
  options = {}
@@ -429,15 +414,10 @@ def print_options(
429
414
  continue
430
415
  break
431
416
 
432
- from meerschaum.utils.formatting import pprint, get_console
433
417
  from meerschaum.utils.packages import attempt_import
434
- rich_columns = attempt_import('rich.columns')
435
- rich_panel = attempt_import('rich.panel')
436
418
  rich_table = attempt_import('rich.table')
437
419
  Text = attempt_import('rich.text').Text
438
420
  box = attempt_import('rich.box')
439
- Panel = rich_panel.Panel
440
- Columns = rich_columns.Columns
441
421
  Table = rich_table.Table
442
422
 
443
423
  if _header is not None:
@@ -482,9 +482,9 @@ def print_pipes_results(
482
482
 
483
483
 
484
484
  def extract_stats_from_message(
485
- message: str,
486
- stat_keys: Optional[List[str]] = None,
487
- ) -> Dict[str, int]:
485
+ message: str,
486
+ stat_keys: Optional[List[str]] = None,
487
+ ) -> Dict[str, int]:
488
488
  """
489
489
  Given a sync message, return the insert, update, upsert stats from within.
490
490
 
@@ -511,9 +511,9 @@ def extract_stats_from_message(
511
511
 
512
512
 
513
513
  def extract_stats_from_line(
514
- line: str,
515
- stat_keys: List[str],
516
- ) -> Dict[str, int]:
514
+ line: str,
515
+ stat_keys: List[str],
516
+ ) -> Dict[str, int]:
517
517
  """
518
518
  Return the insert, update, upsert stats from a single line.
519
519
  """
@@ -524,6 +524,9 @@ def extract_stats_from_line(
524
524
  if search_key not in line.lower():
525
525
  continue
526
526
 
527
+ ### the count may be formatted with commas
528
+ line = line.replace(',', '')
529
+
527
530
  ### stat_text starts with the digits we want.
528
531
  try:
529
532
  stat_text = line.lower().split(search_key + ' ')[1]