meerschaum 2.6.17__py3-none-any.whl → 2.7.0rc1__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
Files changed (31) hide show
  1. meerschaum/actions/delete.py +65 -69
  2. meerschaum/actions/install.py +1 -2
  3. meerschaum/config/_default.py +1 -1
  4. meerschaum/config/_paths.py +2 -1
  5. meerschaum/config/_version.py +1 -1
  6. meerschaum/connectors/api/_pipes.py +4 -3
  7. meerschaum/connectors/sql/_pipes.py +63 -25
  8. meerschaum/connectors/sql/_sql.py +6 -1
  9. meerschaum/connectors/valkey/_pipes.py +12 -1
  10. meerschaum/core/Pipe/__init__.py +23 -13
  11. meerschaum/core/Pipe/_attributes.py +19 -0
  12. meerschaum/core/Pipe/_dtypes.py +1 -1
  13. meerschaum/core/Pipe/_sync.py +61 -21
  14. meerschaum/core/Pipe/_verify.py +8 -7
  15. meerschaum/plugins/_Plugin.py +11 -14
  16. meerschaum/utils/daemon/Daemon.py +18 -11
  17. meerschaum/utils/dataframe.py +175 -13
  18. meerschaum/utils/dtypes/__init__.py +103 -14
  19. meerschaum/utils/dtypes/sql.py +26 -0
  20. meerschaum/utils/misc.py +8 -8
  21. meerschaum/utils/sql.py +64 -11
  22. meerschaum/utils/venv/_Venv.py +4 -4
  23. meerschaum/utils/venv/__init__.py +33 -13
  24. {meerschaum-2.6.17.dist-info → meerschaum-2.7.0rc1.dist-info}/METADATA +1 -1
  25. {meerschaum-2.6.17.dist-info → meerschaum-2.7.0rc1.dist-info}/RECORD +31 -31
  26. {meerschaum-2.6.17.dist-info → meerschaum-2.7.0rc1.dist-info}/LICENSE +0 -0
  27. {meerschaum-2.6.17.dist-info → meerschaum-2.7.0rc1.dist-info}/NOTICE +0 -0
  28. {meerschaum-2.6.17.dist-info → meerschaum-2.7.0rc1.dist-info}/WHEEL +0 -0
  29. {meerschaum-2.6.17.dist-info → meerschaum-2.7.0rc1.dist-info}/entry_points.txt +0 -0
  30. {meerschaum-2.6.17.dist-info → meerschaum-2.7.0rc1.dist-info}/top_level.txt +0 -0
  31. {meerschaum-2.6.17.dist-info → meerschaum-2.7.0rc1.dist-info}/zip-safe +0 -0
@@ -106,6 +106,7 @@ class Pipe:
106
106
  upsert,
107
107
  static,
108
108
  tzinfo,
109
+ enforce,
109
110
  get_columns,
110
111
  get_columns_types,
111
112
  get_columns_indices,
@@ -132,6 +133,7 @@ class Pipe:
132
133
  _persist_new_json_columns,
133
134
  _persist_new_numeric_columns,
134
135
  _persist_new_uuid_columns,
136
+ _persist_new_bytes_columns,
135
137
  )
136
138
  from ._verify import (
137
139
  verify,
@@ -162,12 +164,14 @@ class Pipe:
162
164
  upsert: Optional[bool] = None,
163
165
  autoincrement: Optional[bool] = None,
164
166
  static: Optional[bool] = None,
167
+ enforce: Optional[bool] = None,
165
168
  mrsm_instance: Optional[Union[str, InstanceConnector]] = None,
166
169
  cache: bool = False,
167
170
  debug: bool = False,
168
171
  connector_keys: Optional[str] = None,
169
172
  metric_key: Optional[str] = None,
170
173
  location_key: Optional[str] = None,
174
+ instance_keys: Optional[str] = None,
171
175
  indexes: Union[Dict[str, str], List[str], None] = None,
172
176
  ):
173
177
  """
@@ -219,6 +223,10 @@ class Pipe:
219
223
  static: Optional[bool], default None
220
224
  If `True`, set `static` in the parameters.
221
225
 
226
+ enforce: Optionanl[bool], default None
227
+ If `False`, skip data type enforcement.
228
+ Default behavior is `True`.
229
+
222
230
  temporary: bool, default False
223
231
  If `True`, prevent instance tables (pipes, users, plugins) from being created.
224
232
 
@@ -319,11 +327,13 @@ class Pipe:
319
327
  if isinstance(static, bool):
320
328
  self._attributes['parameters']['static'] = static
321
329
 
330
+ if isinstance(enforce, bool):
331
+ self._attributes['parameters']['enforce'] = enforce
332
+
322
333
  ### NOTE: The parameters dictionary is {} by default.
323
334
  ### A Pipe may be registered without parameters, then edited,
324
335
  ### or a Pipe may be registered with parameters set in-memory first.
325
- # from meerschaum.config import get_config
326
- _mrsm_instance = mrsm_instance if mrsm_instance is not None else instance
336
+ _mrsm_instance = mrsm_instance if mrsm_instance is not None else (instance or instance_keys)
327
337
  if _mrsm_instance is None:
328
338
  _mrsm_instance = get_config('meerschaum', 'instance', patch=True)
329
339
 
@@ -341,10 +351,10 @@ class Pipe:
341
351
  Return the four keys needed to reconstruct this pipe.
342
352
  """
343
353
  return {
344
- 'connector': self.connector_keys,
345
- 'metric': self.metric_key,
346
- 'location': self.location_key,
347
- 'instance': self.instance_keys,
354
+ 'connector_keys': self.connector_keys,
355
+ 'metric_key': self.metric_key,
356
+ 'location_key': self.location_key,
357
+ 'instance_keys': self.instance_keys,
348
358
  }
349
359
 
350
360
  def keys(self) -> List[str]:
@@ -385,7 +395,7 @@ class Pipe:
385
395
  warnings.simplefilter('ignore')
386
396
  try:
387
397
  conn = parse_instance_keys(self.connector_keys)
388
- except Exception as e:
398
+ except Exception:
389
399
  conn = None
390
400
  if conn:
391
401
  self._connector = conn
@@ -429,7 +439,7 @@ class Pipe:
429
439
  _fetch_patch = {
430
440
  'fetch': ({
431
441
  'definition': (
432
- f"SELECT * FROM "
442
+ "SELECT * FROM "
433
443
  + sql_item_name(
434
444
  str(self.target),
435
445
  self.instance_connector.flavor,
@@ -467,7 +477,7 @@ class Pipe:
467
477
  and self.location_key == other.location_key
468
478
  and self.instance_keys == other.instance_keys
469
479
  )
470
- except Exception as e:
480
+ except Exception:
471
481
  return False
472
482
 
473
483
  def __hash__(self):
@@ -496,11 +506,11 @@ class Pipe:
496
506
  Define the state dictionary (pickling).
497
507
  """
498
508
  return {
499
- 'connector': self.connector_keys,
500
- 'metric': self.metric_key,
501
- 'location': self.location_key,
509
+ 'connector_keys': self.connector_keys,
510
+ 'metric_key': self.metric_key,
511
+ 'location_key': self.location_key,
502
512
  'parameters': self.parameters,
503
- 'instance': self.instance_keys,
513
+ 'instance_keys': self.instance_keys,
504
514
  }
505
515
 
506
516
  def __setstate__(self, _state: Dict[str, Any]):
@@ -289,6 +289,25 @@ def tzinfo(self) -> Union[None, timezone]:
289
289
  return None
290
290
 
291
291
 
292
+ @property
293
+ def enforce(self) -> bool:
294
+ """
295
+ Return the `enforce` parameter for the pipe.
296
+ """
297
+ if 'enforce' not in self.parameters:
298
+ self.parameters['enforce'] = True
299
+
300
+ return self.parameters['enforce']
301
+
302
+
303
+ @enforce.setter
304
+ def enforce(self, _enforce: bool) -> None:
305
+ """
306
+ Set the `enforce` parameter for the pipe.
307
+ """
308
+ self.parameters['_enforce'] = _enforce
309
+
310
+
292
311
  def get_columns(self, *args: str, error: bool = False) -> Union[str, Tuple[str]]:
293
312
  """
294
313
  Check if the requested columns are defined.
@@ -41,7 +41,7 @@ def enforce_dtypes(
41
41
  )
42
42
  return df
43
43
 
44
- pipe_dtypes = self.dtypes
44
+ pipe_dtypes = self.dtypes if self.enforce else {}
45
45
 
46
46
  try:
47
47
  if isinstance(df, str):
@@ -368,10 +368,11 @@ def sync(
368
368
  ### Cast to a dataframe and ensure datatypes are what we expect.
369
369
  df = self.enforce_dtypes(df, chunksize=chunksize, debug=debug)
370
370
 
371
- ### Capture `numeric`, `uuid`, and `json` columns.
371
+ ### Capture `numeric`, `uuid`, `json`, and `bytes` columns.
372
372
  self._persist_new_json_columns(df, debug=debug)
373
373
  self._persist_new_numeric_columns(df, debug=debug)
374
374
  self._persist_new_uuid_columns(df, debug=debug)
375
+ self._persist_new_bytes_columns(df, debug=debug)
375
376
 
376
377
  if debug:
377
378
  dprint(
@@ -617,11 +618,13 @@ def filter_existing(
617
618
  filter_unseen_df,
618
619
  add_missing_cols_to_df,
619
620
  get_unhashable_cols,
620
- get_numeric_cols,
621
621
  )
622
622
  from meerschaum.utils.dtypes import (
623
623
  to_pandas_dtype,
624
624
  none_if_null,
625
+ to_datetime,
626
+ are_dtypes_equal,
627
+ value_is_null,
625
628
  )
626
629
  from meerschaum.config import get_config
627
630
  pd = import_pandas()
@@ -669,29 +672,36 @@ def filter_existing(
669
672
  ### begin is the oldest data in the new dataframe
670
673
  begin, end = None, None
671
674
  dt_col = pipe_columns.get('datetime', None)
675
+ primary_key = pipe_columns.get('primary', None)
672
676
  dt_type = self.dtypes.get(dt_col, 'datetime64[ns, UTC]') if dt_col else None
677
+
678
+ if autoincrement and primary_key == dt_col and dt_col not in df.columns:
679
+ if enforce_dtypes:
680
+ df = self.enforce_dtypes(df, chunksize=chunksize, debug=debug)
681
+ return df, get_empty_df(), df
682
+
673
683
  try:
674
- min_dt_val = df[dt_col].min(skipna=True) if dt_col else None
684
+ min_dt_val = df[dt_col].min(skipna=True) if dt_col and dt_col in df.columns else None
675
685
  if is_dask and min_dt_val is not None:
676
686
  min_dt_val = min_dt_val.compute()
677
687
  min_dt = (
678
- pandas.to_datetime(min_dt_val).to_pydatetime()
679
- if min_dt_val is not None and 'datetime' in str(dt_type)
688
+ to_datetime(min_dt_val, as_pydatetime=True)
689
+ if min_dt_val is not None and are_dtypes_equal(dt_type, 'datetime')
680
690
  else min_dt_val
681
691
  )
682
692
  except Exception:
683
693
  min_dt = None
684
- if not ('datetime' in str(type(min_dt))) or str(min_dt) == 'NaT':
685
- if 'int' not in str(type(min_dt)).lower():
694
+
695
+ if not are_dtypes_equal('datetime', str(type(min_dt))) or value_is_null(min_dt):
696
+ if not are_dtypes_equal('int', str(type(min_dt))):
686
697
  min_dt = None
687
698
 
688
699
  if isinstance(min_dt, datetime):
689
- begin = (
690
- round_time(
691
- min_dt,
692
- to='down'
693
- ) - timedelta(minutes=1)
694
- )
700
+ rounded_min_dt = round_time(min_dt, to='down')
701
+ try:
702
+ begin = rounded_min_dt - timedelta(minutes=1)
703
+ except OverflowError:
704
+ begin = rounded_min_dt
695
705
  elif dt_type and 'int' in dt_type.lower():
696
706
  begin = min_dt
697
707
  elif dt_col is None:
@@ -699,11 +709,11 @@ def filter_existing(
699
709
 
700
710
  ### end is the newest data in the new dataframe
701
711
  try:
702
- max_dt_val = df[dt_col].max(skipna=True) if dt_col else None
712
+ max_dt_val = df[dt_col].max(skipna=True) if dt_col and dt_col in df.columns else None
703
713
  if is_dask and max_dt_val is not None:
704
714
  max_dt_val = max_dt_val.compute()
705
715
  max_dt = (
706
- pandas.to_datetime(max_dt_val).to_pydatetime()
716
+ to_datetime(max_dt_val, as_pydatetime=True)
707
717
  if max_dt_val is not None and 'datetime' in str(dt_type)
708
718
  else max_dt_val
709
719
  )
@@ -712,8 +722,8 @@ def filter_existing(
712
722
  traceback.print_exc()
713
723
  max_dt = None
714
724
 
715
- if ('datetime' not in str(type(max_dt))) or str(min_dt) == 'NaT':
716
- if 'int' not in str(type(max_dt)).lower():
725
+ if not are_dtypes_equal('datetime', str(type(max_dt))) or value_is_null(max_dt):
726
+ if not are_dtypes_equal('int', str(type(max_dt))):
717
727
  max_dt = None
718
728
 
719
729
  if isinstance(max_dt, datetime):
@@ -723,7 +733,7 @@ def filter_existing(
723
733
  to='down'
724
734
  ) + timedelta(minutes=1)
725
735
  )
726
- elif dt_type and 'int' in dt_type.lower():
736
+ elif dt_type and 'int' in dt_type.lower() and max_dt is not None:
727
737
  end = max_dt + 1
728
738
 
729
739
  if max_dt is not None and min_dt is not None and min_dt > max_dt:
@@ -738,7 +748,7 @@ def filter_existing(
738
748
 
739
749
  unique_index_vals = {
740
750
  col: df[col].unique()
741
- for col in pipe_columns
751
+ for col in (pipe_columns if not primary_key else [primary_key])
742
752
  if col in df.columns and col != dt_col
743
753
  } if not date_bound_only else {}
744
754
  filter_params_index_limit = get_config('pipes', 'sync', 'filter_params_index_limit')
@@ -777,14 +787,15 @@ def filter_existing(
777
787
 
778
788
  ### Separate new rows from changed ones.
779
789
  on_cols = [
780
- col for col_key, col in pipe_columns.items()
790
+ col
791
+ for col_key, col in pipe_columns.items()
781
792
  if (
782
793
  col
783
794
  and
784
795
  col_key != 'value'
785
796
  and col in backtrack_df.columns
786
797
  )
787
- ]
798
+ ] if not primary_key else [primary_key]
788
799
  self_dtypes = self.dtypes
789
800
  on_cols_dtypes = {
790
801
  col: to_pandas_dtype(typ)
@@ -1020,3 +1031,32 @@ def _persist_new_json_columns(self, df, debug: bool = False) -> SuccessTuple:
1020
1031
  return edit_success, edit_msg
1021
1032
 
1022
1033
  return True, "Success"
1034
+
1035
+
1036
+ def _persist_new_bytes_columns(self, df, debug: bool = False) -> SuccessTuple:
1037
+ """
1038
+ Check for new `bytes` columns and update the parameters.
1039
+ """
1040
+ from meerschaum.utils.dataframe import get_bytes_cols
1041
+ bytes_cols = get_bytes_cols(df)
1042
+ existing_bytes_cols = [col for col, typ in self.dtypes.items() if typ == 'bytes']
1043
+ new_bytes_cols = [col for col in bytes_cols if col not in existing_bytes_cols]
1044
+ if not new_bytes_cols:
1045
+ return True, "Success"
1046
+
1047
+ self._attributes_sync_time = None
1048
+ dt_col = self.columns.get('datetime', None)
1049
+ dtypes = self.parameters.get('dtypes', {})
1050
+ if dt_col not in dtypes:
1051
+ dtypes[dt_col] = 'datetime'
1052
+ dtypes.update({col: 'bytes' for col in bytes_cols})
1053
+ self.parameters['dtypes'] = dtypes
1054
+
1055
+ if not self.temporary:
1056
+ edit_success, edit_msg = self.edit(interactive=False, debug=debug)
1057
+ if not edit_success:
1058
+ warn(f"Unable to update bytes dtypes for {self}:\n{edit_msg}")
1059
+
1060
+ return edit_success, edit_msg
1061
+
1062
+ return True, "Success"
@@ -7,9 +7,10 @@ Verify the contents of a pipe by resyncing its interval.
7
7
  """
8
8
 
9
9
  from datetime import datetime, timedelta
10
- from meerschaum.utils.typing import SuccessTuple, Any, Optional, Union, Tuple, List, Dict
10
+
11
+ import meerschaum as mrsm
12
+ from meerschaum.utils.typing import SuccessTuple, Any, Optional, Union, Tuple, Dict
11
13
  from meerschaum.utils.warnings import warn, info
12
- from meerschaum.utils.debug import dprint
13
14
 
14
15
 
15
16
  def verify(
@@ -94,9 +95,6 @@ def verify(
94
95
  else 1
95
96
  )
96
97
 
97
- sync_less_than_begin = not bounded and begin is None
98
- sync_greater_than_end = not bounded and end is None
99
-
100
98
  cannot_determine_bounds = not self.exists(debug=debug)
101
99
 
102
100
  if cannot_determine_bounds:
@@ -164,7 +162,7 @@ def verify(
164
162
  )
165
163
 
166
164
  info(
167
- f"Syncing {len(chunk_bounds)} chunk" + ('s' if len(chunk_bounds) != 1 else '')
165
+ f"Verifying {self}:\n Syncing {len(chunk_bounds)} chunk" + ('s' if len(chunk_bounds) != 1 else '')
168
166
  + f" ({'un' if not bounded else ''}bounded)"
169
167
  + f" of size '{interval_str(chunk_interval)}'"
170
168
  + f" between '{begin_to_print}' and '{end_to_print}'."
@@ -187,7 +185,7 @@ def verify(
187
185
  return chunk_begin_and_end, bounds_success_tuples[chunk_begin_and_end]
188
186
 
189
187
  chunk_begin, chunk_end = chunk_begin_and_end
190
- return chunk_begin_and_end, self.sync(
188
+ chunk_success, chunk_msg = self.sync(
191
189
  begin=chunk_begin,
192
190
  end=chunk_end,
193
191
  params=params,
@@ -195,6 +193,9 @@ def verify(
195
193
  debug=debug,
196
194
  **kwargs
197
195
  )
196
+ chunk_msg = chunk_msg.strip()
197
+ mrsm.pprint((chunk_success, chunk_msg))
198
+ return chunk_begin_and_end, (chunk_success, chunk_msg)
198
199
 
199
200
  ### If we have more than one chunk, attempt to sync the first one and return if its fails.
200
201
  if len(chunk_bounds) > 1:
@@ -255,11 +255,11 @@ class Plugin:
255
255
 
256
256
 
257
257
  def install(
258
- self,
259
- skip_deps: bool = False,
260
- force: bool = False,
261
- debug: bool = False,
262
- ) -> SuccessTuple:
258
+ self,
259
+ skip_deps: bool = False,
260
+ force: bool = False,
261
+ debug: bool = False,
262
+ ) -> SuccessTuple:
263
263
  """
264
264
  Extract a plugin's tar archive to the plugins directory.
265
265
 
@@ -359,7 +359,7 @@ class Plugin:
359
359
  is_same_version = new_version and old_version and (
360
360
  packaging_version.parse(old_version) == packaging_version.parse(new_version)
361
361
  )
362
- except Exception as e:
362
+ except Exception:
363
363
  is_new_version, is_same_version = True, False
364
364
 
365
365
  ### Determine where to permanently store the new plugin.
@@ -404,7 +404,7 @@ class Plugin:
404
404
  dprint(f"Moving '{src_file}' to '{dst_dir}'...")
405
405
  try:
406
406
  shutil.move(src_file, dst_dir)
407
- except Exception as e:
407
+ except Exception:
408
408
  success, msg = False, (
409
409
  f"Failed to install plugin '{self}': " +
410
410
  f"Could not move file '{src_file}' to '{dst_dir}'"
@@ -817,10 +817,10 @@ class Plugin:
817
817
 
818
818
 
819
819
  def install_dependencies(
820
- self,
821
- force: bool = False,
822
- debug: bool = False,
823
- ) -> bool:
820
+ self,
821
+ force: bool = False,
822
+ debug: bool = False,
823
+ ) -> bool:
824
824
  """
825
825
  If specified, install dependencies.
826
826
 
@@ -841,12 +841,9 @@ class Plugin:
841
841
  Returns
842
842
  -------
843
843
  A bool indicating success.
844
-
845
844
  """
846
845
  from meerschaum.utils.packages import pip_install, venv_contains_package
847
- from meerschaum.utils.debug import dprint
848
846
  from meerschaum.utils.warnings import warn, info
849
- from meerschaum.connectors.parse import parse_repo_keys
850
847
  _deps = self.get_dependencies(debug=debug)
851
848
  if not _deps and self.requirements_file_path is None:
852
849
  return True
@@ -465,6 +465,7 @@ class Daemon:
465
465
  self._write_stop_file('kill')
466
466
  return True, "Process has already stopped."
467
467
 
468
+ psutil = attempt_import('psutil')
468
469
  process = self.process
469
470
  try:
470
471
  process.terminate()
@@ -473,10 +474,16 @@ class Daemon:
473
474
  except Exception as e:
474
475
  return False, f"Failed to kill job {self} with exception: {e}"
475
476
 
477
+ try:
478
+ if process.status():
479
+ return False, "Failed to stop daemon '{self}' ({process})."
480
+ except psutil.NoSuchProcess:
481
+ pass
482
+
476
483
  if self.pid_path.exists():
477
484
  try:
478
485
  self.pid_path.unlink()
479
- except Exception as e:
486
+ except Exception:
480
487
  pass
481
488
 
482
489
  self._write_stop_file('kill')
@@ -534,7 +541,7 @@ class Daemon:
534
541
  if not timeout:
535
542
  try:
536
543
  success = self.process.status() == 'stopped'
537
- except psutil.NoSuchProcess as e:
544
+ except psutil.NoSuchProcess:
538
545
  success = True
539
546
  msg = "Success" if success else f"Failed to suspend daemon '{self.daemon_id}'."
540
547
  if success:
@@ -677,11 +684,11 @@ class Daemon:
677
684
  raise SystemExit(0)
678
685
 
679
686
  def _send_signal(
680
- self,
681
- signal_to_send,
682
- timeout: Union[float, int, None] = None,
683
- check_timeout_interval: Union[float, int, None] = None,
684
- ) -> SuccessTuple:
687
+ self,
688
+ signal_to_send,
689
+ timeout: Union[float, int, None] = None,
690
+ check_timeout_interval: Union[float, int, None] = None,
691
+ ) -> SuccessTuple:
685
692
  """Send a signal to the daemon process.
686
693
 
687
694
  Parameters
@@ -709,7 +716,7 @@ class Daemon:
709
716
  )
710
717
 
711
718
  os.kill(pid, signal_to_send)
712
- except Exception as e:
719
+ except Exception:
713
720
  return False, f"Failed to send signal {signal_to_send}:\n{traceback.format_exc()}"
714
721
 
715
722
  timeout = self.get_timeout_seconds(timeout)
@@ -745,7 +752,7 @@ class Daemon:
745
752
  if _already_exists and not allow_dirty_run:
746
753
  error(
747
754
  f"Daemon '{self.daemon_id}' already exists. " +
748
- f"To allow this daemon to run, do one of the following:\n"
755
+ "To allow this daemon to run, do one of the following:\n"
749
756
  + " - Execute `daemon.cleanup()`.\n"
750
757
  + f" - Delete the directory '{self.path}'.\n"
751
758
  + " - Pass `allow_dirty_run=True` to `daemon.run()`.\n",
@@ -764,7 +771,7 @@ class Daemon:
764
771
  if '_process' not in self.__dict__ or self.__dict__['_process'].pid != int(pid):
765
772
  try:
766
773
  self._process = psutil.Process(int(pid))
767
- except Exception as e:
774
+ except Exception:
768
775
  if self.pid_path.exists():
769
776
  self.pid_path.unlink()
770
777
  return None
@@ -788,7 +795,7 @@ class Daemon:
788
795
  if self.pid_path.exists():
789
796
  try:
790
797
  self.pid_path.unlink()
791
- except Exception as e:
798
+ except Exception:
792
799
  pass
793
800
  return 'stopped'
794
801