PyPI - meerschaum - Versions diffs - 2.6.16__py3-none-any.whl → 2.7.0rc1__py3-none-any.whl - Mend

meerschaum 2.6.16py3-none-any.whl → 2.7.0rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

meerschaum/_internal/arguments/_parse_arguments.py +1 -1
meerschaum/actions/delete.py +65 -69
meerschaum/actions/edit.py +22 -2
meerschaum/actions/install.py +1 -2
meerschaum/actions/sync.py +2 -3
meerschaum/config/_default.py +1 -1
meerschaum/config/_paths.py +2 -1
meerschaum/config/_version.py +1 -1
meerschaum/connectors/api/_pipes.py +4 -3
meerschaum/connectors/sql/_create_engine.py +3 -3
meerschaum/connectors/sql/_pipes.py +84 -38
meerschaum/connectors/sql/_sql.py +6 -1
meerschaum/connectors/valkey/_pipes.py +12 -1
meerschaum/core/Pipe/__init__.py +23 -13
meerschaum/core/Pipe/_attributes.py +19 -0
meerschaum/core/Pipe/_dtypes.py +1 -1
meerschaum/core/Pipe/_sync.py +61 -21
meerschaum/core/Pipe/_verify.py +8 -7
meerschaum/jobs/_Job.py +2 -1
meerschaum/plugins/_Plugin.py +11 -14
meerschaum/utils/daemon/Daemon.py +20 -13
meerschaum/utils/dataframe.py +175 -13
meerschaum/utils/dtypes/__init__.py +103 -14
meerschaum/utils/dtypes/sql.py +26 -0
meerschaum/utils/misc.py +8 -8
meerschaum/utils/packages/_packages.py +1 -1
meerschaum/utils/schedule.py +8 -3
meerschaum/utils/sql.py +70 -47
meerschaum/utils/venv/_Venv.py +4 -4
meerschaum/utils/venv/__init__.py +33 -13
{meerschaum-2.6.16.dist-info → meerschaum-2.7.0rc1.dist-info}/METADATA +2 -2
{meerschaum-2.6.16.dist-info → meerschaum-2.7.0rc1.dist-info}/RECORD +38 -38
{meerschaum-2.6.16.dist-info → meerschaum-2.7.0rc1.dist-info}/LICENSE +0 -0
{meerschaum-2.6.16.dist-info → meerschaum-2.7.0rc1.dist-info}/NOTICE +0 -0
{meerschaum-2.6.16.dist-info → meerschaum-2.7.0rc1.dist-info}/WHEEL +0 -0
{meerschaum-2.6.16.dist-info → meerschaum-2.7.0rc1.dist-info}/entry_points.txt +0 -0
{meerschaum-2.6.16.dist-info → meerschaum-2.7.0rc1.dist-info}/top_level.txt +0 -0
{meerschaum-2.6.16.dist-info → meerschaum-2.7.0rc1.dist-info}/zip-safe +0 -0

meerschaum/core/Pipe/_verify.py CHANGED Viewed

@@ -7,9 +7,10 @@ Verify the contents of a pipe by resyncing its interval.
 """
 from datetime import datetime, timedelta
-from meerschaum.utils.typing import SuccessTuple, Any, Optional, Union, Tuple, List, Dict
+import meerschaum as mrsm
+from meerschaum.utils.typing import SuccessTuple, Any, Optional, Union, Tuple, Dict
 from meerschaum.utils.warnings import warn, info
-from meerschaum.utils.debug import dprint
 def verify(
@@ -94,9 +95,6 @@ def verify(
             else 1
         )
-    sync_less_than_begin = not bounded and begin is None
-    sync_greater_than_end = not bounded and end is None
     cannot_determine_bounds = not self.exists(debug=debug)
     if cannot_determine_bounds:
@@ -164,7 +162,7 @@ def verify(
     )
     info(
-        f"Syncing {len(chunk_bounds)} chunk" + ('s' if len(chunk_bounds) != 1 else '')
+        f"Verifying {self}:\n    Syncing {len(chunk_bounds)} chunk" + ('s' if len(chunk_bounds) != 1 else '')
         + f" ({'un' if not bounded else ''}bounded)"
         + f" of size '{interval_str(chunk_interval)}'"
         + f" between '{begin_to_print}' and '{end_to_print}'."
@@ -187,7 +185,7 @@ def verify(
             return chunk_begin_and_end, bounds_success_tuples[chunk_begin_and_end]
         chunk_begin, chunk_end = chunk_begin_and_end
-        return chunk_begin_and_end, self.sync(
+        chunk_success, chunk_msg = self.sync(
             begin=chunk_begin,
             end=chunk_end,
             params=params,
@@ -195,6 +193,9 @@ def verify(
             debug=debug,
             **kwargs
         )
+        chunk_msg = chunk_msg.strip()
+        mrsm.pprint((chunk_success, chunk_msg))
+        return chunk_begin_and_end, (chunk_success, chunk_msg)
     ### If we have more than one chunk, attempt to sync the first one and return if its fails.
     if len(chunk_bounds) > 1:

meerschaum/jobs/_Job.py CHANGED Viewed

@@ -873,7 +873,7 @@ class Job:
         """
         from meerschaum._internal.arguments import compress_pipeline_sysargs
         sysargs = compress_pipeline_sysargs(self.sysargs)
-        return shlex.join(sysargs).replace(' + ', '\n+ ')
+        return shlex.join(sysargs).replace(' + ', '\n+ ').replace(' : ', '\n: ').lstrip().rstrip()
     @property
     def _externally_managed_file(self) -> pathlib.Path:
@@ -915,6 +915,7 @@ class Job:
             'PYTHONUNBUFFERED': '1',
             'LINES': str(get_config('jobs', 'terminal', 'lines')),
             'COLUMNS': str(get_config('jobs', 'terminal', 'columns')),
+            STATIC_CONFIG['environment']['noninteractive']: 'true',
         }
         self._env = {**default_env, **_env}
         return self._env

meerschaum/plugins/_Plugin.py CHANGED Viewed

@@ -255,11 +255,11 @@ class Plugin:
     def install(
-            self,
-            skip_deps: bool = False,
-            force: bool = False,
-            debug: bool = False,
-        ) -> SuccessTuple:
+        self,
+        skip_deps: bool = False,
+        force: bool = False,
+        debug: bool = False,
+    ) -> SuccessTuple:
         """
         Extract a plugin's tar archive to the plugins directory.
@@ -359,7 +359,7 @@ class Plugin:
             is_same_version = new_version and old_version and (
                 packaging_version.parse(old_version) == packaging_version.parse(new_version)
             )
-        except Exception as e:
+        except Exception:
             is_new_version, is_same_version = True, False
         ### Determine where to permanently store the new plugin.
@@ -404,7 +404,7 @@ class Plugin:
                         dprint(f"Moving '{src_file}' to '{dst_dir}'...")
                     try:
                         shutil.move(src_file, dst_dir)
-                    except Exception as e:
+                    except Exception:
                         success, msg = False, (
                             f"Failed to install plugin '{self}': " +
                             f"Could not move file '{src_file}' to '{dst_dir}'"
@@ -817,10 +817,10 @@ class Plugin:
     def install_dependencies(
-            self,
-            force: bool = False,
-            debug: bool = False,
-        ) -> bool:
+        self,
+        force: bool = False,
+        debug: bool = False,
+    ) -> bool:
         """
         If specified, install dependencies.
@@ -841,12 +841,9 @@ class Plugin:
         Returns
         -------
         A bool indicating success.
         """
         from meerschaum.utils.packages import pip_install, venv_contains_package
-        from meerschaum.utils.debug import dprint
         from meerschaum.utils.warnings import warn, info
-        from meerschaum.connectors.parse import parse_repo_keys
         _deps = self.get_dependencies(debug=debug)
         if not _deps and self.requirements_file_path is None:
             return True

meerschaum/utils/daemon/Daemon.py CHANGED Viewed

@@ -432,7 +432,7 @@ class Daemon:
             + "allow_dirty_run=True)"
         )
         env = dict(os.environ)
-        env['MRSM_NOASK'] = 'true'
+        env[STATIC_CONFIG['environment']['noninteractive']] = 'true'
         _launch_success_bool = venv_exec(_launch_daemon_code, debug=debug, venv=None, env=env)
         msg = (
             "Success"
@@ -465,6 +465,7 @@ class Daemon:
             self._write_stop_file('kill')
             return True, "Process has already stopped."
+        psutil = attempt_import('psutil')
         process = self.process
         try:
             process.terminate()
@@ -473,10 +474,16 @@ class Daemon:
         except Exception as e:
             return False, f"Failed to kill job {self} with exception: {e}"
+        try:
+            if process.status():
+                return False, "Failed to stop daemon '{self}' ({process})."
+        except psutil.NoSuchProcess:
+            pass
         if self.pid_path.exists():
             try:
                 self.pid_path.unlink()
-            except Exception as e:
+            except Exception:
                 pass
         self._write_stop_file('kill')
@@ -534,7 +541,7 @@ class Daemon:
         if not timeout:
             try:
                 success = self.process.status() == 'stopped'
-            except psutil.NoSuchProcess as e:
+            except psutil.NoSuchProcess:
                 success = True
             msg = "Success" if success else f"Failed to suspend daemon '{self.daemon_id}'."
             if success:
@@ -677,11 +684,11 @@ class Daemon:
         raise SystemExit(0)
     def _send_signal(
-            self,
-            signal_to_send,
-            timeout: Union[float, int, None] = None,
-            check_timeout_interval: Union[float, int, None] = None,
-        ) -> SuccessTuple:
+        self,
+        signal_to_send,
+        timeout: Union[float, int, None] = None,
+        check_timeout_interval: Union[float, int, None] = None,
+    ) -> SuccessTuple:
         """Send a signal to the daemon process.
         Parameters
@@ -709,7 +716,7 @@ class Daemon:
                 )
             os.kill(pid, signal_to_send)
-        except Exception as e:
+        except Exception:
             return False, f"Failed to send signal {signal_to_send}:\n{traceback.format_exc()}"
         timeout = self.get_timeout_seconds(timeout)
@@ -745,7 +752,7 @@ class Daemon:
         if _already_exists and not allow_dirty_run:
             error(
                 f"Daemon '{self.daemon_id}' already exists. " +
-                f"To allow this daemon to run, do one of the following:\n"
+                "To allow this daemon to run, do one of the following:\n"
                 + "  - Execute `daemon.cleanup()`.\n"
                 + f"  - Delete the directory '{self.path}'.\n"
                 + "  - Pass `allow_dirty_run=True` to `daemon.run()`.\n",
@@ -764,7 +771,7 @@ class Daemon:
         if '_process' not in self.__dict__ or self.__dict__['_process'].pid != int(pid):
             try:
                 self._process = psutil.Process(int(pid))
-            except Exception as e:
+            except Exception:
                 if self.pid_path.exists():
                     self.pid_path.unlink()
                 return None
@@ -788,7 +795,7 @@ class Daemon:
             if self.pid_path.exists():
                 try:
                     self.pid_path.unlink()
-                except Exception as e:
+                except Exception:
                     pass
             return 'stopped'
@@ -1000,7 +1007,7 @@ class Daemon:
         try:
             with open(self.properties_path, 'r', encoding='utf-8') as file:
                 properties = json.load(file)
-        except Exception as e:
+        except Exception:
             properties = {}
         return properties

meerschaum/utils/dataframe.py CHANGED Viewed

@@ -139,7 +139,6 @@ def filter_unseen_df(
     import functools
     import traceback
     from decimal import Decimal
-    from uuid import UUID
     from meerschaum.utils.warnings import warn
     from meerschaum.utils.packages import import_pandas, attempt_import
     from meerschaum.utils.dtypes import (
@@ -147,6 +146,7 @@ def filter_unseen_df(
         are_dtypes_equal,
         attempt_cast_to_numeric,
         attempt_cast_to_uuid,
+        attempt_cast_to_bytes,
         coerce_timezone,
     )
     pd = import_pandas(debug=debug)
@@ -333,6 +333,11 @@ def filter_unseen_df(
     old_uuid_cols = get_uuid_cols(old_df)
     new_uuid_cols = get_uuid_cols(new_df)
     uuid_cols = set(new_uuid_cols + old_uuid_cols)
+    old_bytes_cols = get_bytes_cols(old_df)
+    new_bytes_cols = get_bytes_cols(new_df)
+    bytes_cols = set(new_bytes_cols + old_bytes_cols)
     joined_df = merge(
         new_df.infer_objects(copy=False).fillna(NA),
         old_df.infer_objects(copy=False).fillna(NA),
@@ -368,6 +373,14 @@ def filter_unseen_df(
         except Exception:
             warn(f"Unable to parse numeric column '{uuid_col}':\n{traceback.format_exc()}")
+    for bytes_col in bytes_cols:
+        if bytes_col not in delta_df.columns:
+            continue
+        try:
+            delta_df[bytes_col] = delta_df[bytes_col].apply(attempt_cast_to_bytes)
+        except Exception:
+            warn(f"Unable to parse bytes column '{bytes_col}':\n{traceback.format_exc()}")
     return delta_df
@@ -429,6 +442,7 @@ def parse_df_datetimes(
     from meerschaum.utils.debug import dprint
     from meerschaum.utils.warnings import warn
     from meerschaum.utils.misc import items_str
+    from meerschaum.utils.dtypes import to_datetime
     import traceback
     pd = import_pandas()
     pandas = attempt_import('pandas')
@@ -494,7 +508,7 @@ def parse_df_datetimes(
     if len(cols_to_inspect) == 0:
         if debug:
-            dprint(f"All columns are ignored, skipping datetime detection...")
+            dprint("All columns are ignored, skipping datetime detection...")
         return df.fillna(pandas.NA)
     ### apply regex to columns to determine which are ISO datetimes
@@ -515,14 +529,10 @@ def parse_df_datetimes(
     try:
         if not using_dask:
-            df[datetime_cols] = df[datetime_cols].apply(
-                pd.to_datetime,
-                utc=True,
-                format='ISO8601',
-            )
+            df[datetime_cols] = df[datetime_cols].apply(to_datetime)
         else:
             df[datetime_cols] = df[datetime_cols].apply(
-                pd.to_datetime,
+                to_datetime,
                 utc=True,
                 axis=1,
                 meta={
@@ -665,7 +675,7 @@ def get_uuid_cols(df: 'pd.DataFrame') -> List[str]:
     Returns
     -------
-    A list of columns to treat as numerics.
+    A list of columns to treat as UUIDs.
     """
     if df is None:
         return []
@@ -692,6 +702,135 @@ def get_uuid_cols(df: 'pd.DataFrame') -> List[str]:
     ]
+def get_datetime_cols(
+    df: 'pd.DataFrame',
+    timezone_aware: bool = True,
+    timezone_naive: bool = True,
+) -> List[str]:
+    """
+    Get the columns which contain `datetime` or `Timestamp` objects from a Pandas DataFrame.
+    Parameters
+    ----------
+    df: pd.DataFrame
+        The DataFrame which may contain `datetime` or `Timestamp` objects.
+    timezone_aware: bool, default True
+        If `True`, include timezone-aware datetime columns.
+    timezone_naive: bool, default True
+        If `True`, include timezone-naive datetime columns.
+    Returns
+    -------
+    A list of columns to treat as datetimes.
+    """
+    if not timezone_aware and not timezone_naive:
+        raise ValueError("`timezone_aware` and `timezone_naive` cannot both be `False`.")
+    if df is None:
+        return []
+    from datetime import datetime
+    from meerschaum.utils.dtypes import are_dtypes_equal
+    is_dask = 'dask' in df.__module__
+    if is_dask:
+        df = get_first_valid_dask_partition(df)
+    known_dt_cols = [
+        col
+        for col, typ in df.dtypes.items()
+        if are_dtypes_equal('datetime', str(typ))
+    ]
+    if len(df) == 0:
+        return known_dt_cols
+    cols_indices = {
+        col: df[col].first_valid_index()
+        for col in df.columns
+        if col not in known_dt_cols
+    }
+    pydt_cols = [
+        col
+        for col, ix in cols_indices.items()
+        if (
+            ix is not None
+            and
+            isinstance(df.loc[ix][col], datetime)
+        )
+    ]
+    dt_cols_set = set(known_dt_cols + pydt_cols)
+    all_dt_cols = [
+        col
+        for col in df.columns
+        if col in dt_cols_set
+    ]
+    if timezone_aware and timezone_naive:
+        return all_dt_cols
+    known_timezone_aware_dt_cols = [
+        col
+        for col in known_dt_cols
+        if getattr(df[col], 'tz', None) is not None
+    ]
+    timezone_aware_pydt_cols = [
+        col
+        for col in pydt_cols
+        if df.loc[cols_indices[col]][col].tzinfo is not None
+    ]
+    timezone_aware_dt_cols_set = set(known_timezone_aware_dt_cols + timezone_aware_pydt_cols)
+    if timezone_aware:
+        return [
+            col
+            for col in all_dt_cols
+            if col in timezone_aware_pydt_cols
+        ]
+    return [
+        col
+        for col in all_dt_cols
+        if col not in timezone_aware_dt_cols_set
+    ]
+def get_bytes_cols(df: 'pd.DataFrame') -> List[str]:
+    """
+    Get the columns which contain bytes strings from a Pandas DataFrame.
+    Parameters
+    ----------
+    df: pd.DataFrame
+        The DataFrame which may contain bytes strings.
+    Returns
+    -------
+    A list of columns to treat as bytes.
+    """
+    if df is None:
+        return []
+    is_dask = 'dask' in df.__module__
+    if is_dask:
+        df = get_first_valid_dask_partition(df)
+    if len(df) == 0:
+        return []
+    cols_indices = {
+        col: df[col].first_valid_index()
+        for col in df.columns
+    }
+    return [
+        col
+        for col, ix in cols_indices.items()
+        if (
+            ix is not None
+            and
+            isinstance(df.loc[ix][col], bytes)
+        )
+    ]
 def enforce_dtypes(
     df: 'pd.DataFrame',
     dtypes: Dict[str, str],
@@ -743,6 +882,7 @@ def enforce_dtypes(
         is_dtype_numeric,
         attempt_cast_to_numeric,
         attempt_cast_to_uuid,
+        attempt_cast_to_bytes,
         coerce_timezone as _coerce_timezone,
     )
     pandas = mrsm.attempt_import('pandas')
@@ -773,6 +913,11 @@ def enforce_dtypes(
         for col, typ in dtypes.items()
         if typ == 'uuid'
     ]
+    bytes_cols = [
+        col
+        for col, typ in dtypes.items()
+        if typ == 'bytes'
+    ]
     datetime_cols = [
         col
         for col, typ in dtypes.items()
@@ -826,6 +971,17 @@ def enforce_dtypes(
                     if debug:
                         dprint(f"Unable to parse column '{col}' as UUID:\n{e}")
+    if bytes_cols:
+        if debug:
+            dprint(f"Checking for bytes: {bytes_cols}")
+        for col in bytes_cols:
+            if col in df.columns:
+                try:
+                    df[col] = df[col].apply(attempt_cast_to_bytes)
+                except Exception as e:
+                    if debug:
+                        dprint(f"Unable to parse column '{col}' as bytes:\n{e}")
     if datetime_cols and coerce_timezone:
         if debug:
             dprint(f"Checking for datetime conversion: {datetime_cols}")
@@ -931,6 +1087,8 @@ def get_datetime_bound_from_df(
     -------
     The minimum or maximum datetime value in the dataframe, or `None`.
     """
+    from meerschaum.utils.dtypes import to_datetime, value_is_null
     if df is None:
         return None
     if not datetime_column:
@@ -982,9 +1140,9 @@ def get_datetime_bound_from_df(
             dt_val = dt_val.compute()
         return (
-            pandas.to_datetime(dt_val).to_pydatetime()
+            to_datetime(dt_val, as_pydatetime=True)
             if are_dtypes_equal(str(type(dt_val)), 'datetime')
-            else (dt_val if dt_val is not pandas.NA else None)
+            else (dt_val if not value_is_null(dt_val) else None)
         )
     return None
@@ -1127,7 +1285,7 @@ def get_first_valid_dask_partition(ddf: 'dask.dataframe.DataFrame') -> Union['pd
     for partition in ddf.partitions:
         try:
             pdf = partition.compute()
-        except Exception as e:
+        except Exception:
             continue
         if len(pdf) > 0:
             return pdf
@@ -1408,12 +1566,16 @@ def to_json(
     A JSON string.
     """
     from meerschaum.utils.packages import import_pandas
+    from meerschaum.utils.dtypes import serialize_bytes
     pd = import_pandas()
     uuid_cols = get_uuid_cols(df)
-    if uuid_cols and safe_copy:
+    bytes_cols = get_bytes_cols(df)
+    if safe_copy and bool(uuid_cols or bytes_cols):
         df = df.copy()
     for col in uuid_cols:
         df[col] = df[col].astype(str)
+    for col in bytes_cols:
+        df[col] = df[col].apply(serialize_bytes)
     return df.infer_objects(copy=False).fillna(pd.NA).to_json(
         date_format=date_format,
         date_unit=date_unit,

meerschaum 2.6.16__py3-none-any.whl → 2.7.0rc1__py3-none-any.whl

meerschaum 2.6.16py3-none-any.whl → 2.7.0rc1py3-none-any.whl