PyPI - meerschaum - Versions diffs - 2.7.6__py3-none-any.whl → 2.7.8__py3-none-any.whl - Mend

meerschaum 2.7.6py3-none-any.whl → 2.7.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

meerschaum/actions/copy.py +1 -0
meerschaum/actions/drop.py +100 -22
meerschaum/actions/index.py +71 -0
meerschaum/actions/register.py +8 -12
meerschaum/actions/sql.py +1 -1
meerschaum/api/routes/_pipes.py +18 -0
meerschaum/api/routes/_plugins.py +1 -1
meerschaum/api/routes/_users.py +62 -61
meerschaum/config/_version.py +1 -1
meerschaum/connectors/api/_pipes.py +20 -0
meerschaum/connectors/sql/_SQLConnector.py +8 -12
meerschaum/connectors/sql/_create_engine.py +1 -1
meerschaum/connectors/sql/_fetch.py +9 -39
meerschaum/connectors/sql/_instance.py +3 -3
meerschaum/connectors/sql/_pipes.py +262 -70
meerschaum/connectors/sql/_plugins.py +11 -16
meerschaum/connectors/sql/_sql.py +60 -39
meerschaum/connectors/sql/_uri.py +9 -9
meerschaum/connectors/sql/_users.py +10 -12
meerschaum/connectors/sql/tables/__init__.py +13 -14
meerschaum/connectors/valkey/_ValkeyConnector.py +2 -2
meerschaum/core/Pipe/__init__.py +12 -2
meerschaum/core/Pipe/_attributes.py +32 -38
meerschaum/core/Pipe/_drop.py +73 -2
meerschaum/core/Pipe/_fetch.py +4 -0
meerschaum/core/Pipe/_index.py +68 -0
meerschaum/core/Pipe/_sync.py +16 -9
meerschaum/utils/daemon/Daemon.py +9 -2
meerschaum/utils/daemon/RotatingFile.py +3 -3
meerschaum/utils/dataframe.py +42 -12
meerschaum/utils/dtypes/__init__.py +144 -24
meerschaum/utils/dtypes/sql.py +52 -9
meerschaum/utils/formatting/__init__.py +2 -2
meerschaum/utils/formatting/_pprint.py +12 -11
meerschaum/utils/misc.py +16 -18
meerschaum/utils/prompt.py +1 -1
meerschaum/utils/sql.py +106 -42
{meerschaum-2.7.6.dist-info → meerschaum-2.7.8.dist-info}/METADATA +14 -2
{meerschaum-2.7.6.dist-info → meerschaum-2.7.8.dist-info}/RECORD +45 -43
{meerschaum-2.7.6.dist-info → meerschaum-2.7.8.dist-info}/WHEEL +1 -1
{meerschaum-2.7.6.dist-info → meerschaum-2.7.8.dist-info}/LICENSE +0 -0
{meerschaum-2.7.6.dist-info → meerschaum-2.7.8.dist-info}/NOTICE +0 -0
{meerschaum-2.7.6.dist-info → meerschaum-2.7.8.dist-info}/entry_points.txt +0 -0
{meerschaum-2.7.6.dist-info → meerschaum-2.7.8.dist-info}/top_level.txt +0 -0
{meerschaum-2.7.6.dist-info → meerschaum-2.7.8.dist-info}/zip-safe +0 -0

meerschaum/core/Pipe/_drop.py CHANGED Viewed

@@ -7,7 +7,7 @@ Drop a Pipe's table but keep its registration
 """
 from __future__ import annotations
-from meerschaum.utils.typing import SuccessTuple, Any
+from meerschaum.utils.typing import SuccessTuple, Any, Optional, List
 def drop(
@@ -39,9 +39,80 @@ def drop(
             warn(_drop_cache_tuple[1])
     with Venv(get_connector_plugin(self.instance_connector)):
-        result = self.instance_connector.drop_pipe(self, debug=debug, **kw)
+        if hasattr(self.instance_connector, 'drop_pipe'):
+            result = self.instance_connector.drop_pipe(self, debug=debug, **kw)
+        else:
+            result = (
+                False,
+                (
+                    "Cannot drop pipes for instance connectors of type "
+                    f"'{self.instance_connector.type}'."
+                )
+            )
     _ = self.__dict__.pop('_exists', None)
     _ = self.__dict__.pop('_exists_timestamp', None)
     return result
+def drop_indices(
+    self,
+    columns: Optional[List[str]] = None,
+    debug: bool = False,
+    **kw: Any
+) -> SuccessTuple:
+    """
+    Call the Pipe's instance connector's `drop_indices()` method.
+    Parameters
+    ----------
+    columns: Optional[List[str]] = None
+        If provided, only drop indices in the given list.
+    debug: bool, default False:
+        Verbosity toggle.
+    Returns
+    -------
+    A `SuccessTuple` of success, message.
+    """
+    from meerschaum.utils.warnings import warn
+    from meerschaum.utils.venv import Venv
+    from meerschaum.connectors import get_connector_plugin
+    _ = self.__dict__.pop('_columns_indices', None)
+    _ = self.__dict__.pop('_columns_indices_timestamp', None)
+    _ = self.__dict__.pop('_columns_types_timestamp', None)
+    _ = self.__dict__.pop('_columns_types', None)
+    if self.cache_pipe is not None:
+        _drop_cache_tuple = self.cache_pipe.drop_indices(columns=columns, debug=debug, **kw)
+        if not _drop_cache_tuple[0]:
+            warn(_drop_cache_tuple[1])
+    with Venv(get_connector_plugin(self.instance_connector)):
+        if hasattr(self.instance_connector, 'drop_pipe_indices'):
+            result = self.instance_connector.drop_pipe_indices(
+                self,
+                columns=columns,
+                debug=debug,
+                **kw
+            )
+        else:
+            result = (
+                False,
+                (
+                    "Cannot drop indices for instance connectors of type "
+                    f"'{self.instance_connector.type}'."
+                )
+            )
+    _ = self.__dict__.pop('_columns_indices', None)
+    _ = self.__dict__.pop('_columns_indices_timestamp', None)
+    _ = self.__dict__.pop('_columns_types_timestamp', None)
+    _ = self.__dict__.pop('_columns_types', None)
+    return result

meerschaum/core/Pipe/_fetch.py CHANGED Viewed

@@ -84,6 +84,7 @@ def fetch(
             begin=_determine_begin(
                 self,
                 begin,
+                end,
                 check_existing=check_existing,
                 debug=debug,
             ),
@@ -136,6 +137,7 @@ def get_backtrack_interval(
 def _determine_begin(
     pipe: mrsm.Pipe,
     begin: Union[datetime, int, str, None] = '',
+    end: Union[datetime, int, None] = None,
     check_existing: bool = True,
     debug: bool = False,
 ) -> Union[datetime, int, None]:
@@ -157,6 +159,8 @@ def _determine_begin(
     """
     if begin != '':
         return begin
+    if end is not None:
+        return None
     sync_time = pipe.get_sync_time(debug=debug)
     if sync_time is None:
         return sync_time

meerschaum/core/Pipe/_index.py ADDED Viewed

@@ -0,0 +1,68 @@
+#! /usr/bin/env python
+# -*- coding: utf-8 -*-
+# vim:fenc=utf-8
+"""
+Index a pipe's table.
+"""
+from __future__ import annotations
+from meerschaum.utils.typing import SuccessTuple, Any, Optional, List
+def create_indices(
+    self,
+    columns: Optional[List[str]] = None,
+    debug: bool = False,
+    **kw: Any
+) -> SuccessTuple:
+    """
+    Call the Pipe's instance connector's `create_pipe_indices()` method.
+    Parameters
+    ----------
+    debug: bool, default False:
+        Verbosity toggle.
+    Returns
+    -------
+    A `SuccessTuple` of success, message.
+    """
+    from meerschaum.utils.warnings import warn
+    from meerschaum.utils.venv import Venv
+    from meerschaum.connectors import get_connector_plugin
+    _ = self.__dict__.pop('_columns_indices', None)
+    _ = self.__dict__.pop('_columns_indices_timestamp', None)
+    _ = self.__dict__.pop('_columns_types_timestamp', None)
+    _ = self.__dict__.pop('_columns_types', None)
+    if self.cache_pipe is not None:
+        cache_success, cache_msg = self.cache_pipe.index(columns=columns, debug=debug, **kw)
+        if not cache_success:
+            warn(cache_msg)
+    with Venv(get_connector_plugin(self.instance_connector)):
+        if hasattr(self.instance_connector, 'create_pipe_indices'):
+            result = self.instance_connector.create_pipe_indices(
+                self,
+                columns=columns,
+                debug=debug,
+                **kw
+            )
+        else:
+            result = (
+                False,
+                (
+                    "Cannot create indices for instance connectors of type "
+                    f"'{self.instance_connector.type}'."
+                )
+            )
+    _ = self.__dict__.pop('_columns_indices', None)
+    _ = self.__dict__.pop('_columns_indices_timestamp', None)
+    _ = self.__dict__.pop('_columns_types_timestamp', None)
+    _ = self.__dict__.pop('_columns_types', None)
+    return result

meerschaum/core/Pipe/_sync.py CHANGED Viewed

@@ -292,7 +292,6 @@ def sync(
                     message = '\n'.join([_message for _, _message in df])
                     return success, message
-            ### TODO: Depreciate async?
             if df is True:
                 p._exists = None
                 return True, f"{p} is being synced in parallel."
@@ -331,8 +330,7 @@ def sync(
                 return (
                     _chunk_success,
                     (
-                        '\n'
-                        + self._get_chunk_label(_chunk, dt_col)
+                        self._get_chunk_label(_chunk, dt_col)
                         + '\n'
                         + _chunk_msg
                     )
@@ -341,17 +339,25 @@ def sync(
             results = sorted(
                 [(chunk_success, chunk_msg)] + (
                     list(pool.imap(_process_chunk, df))
-                    if not df_is_chunk_generator(chunk)
-                    else [
+                    if (
+                        not df_is_chunk_generator(chunk)  # Handle nested generators.
+                        and kw.get('workers', 1) != 1
+                    )
+                    else list(
                         _process_chunk(_child_chunks)
                         for _child_chunks in df
-                    ]
+                    )
                 )
             )
             chunk_messages = [chunk_msg for _, chunk_msg in results]
             success_bools = [chunk_success for chunk_success, _ in results]
             success = all(success_bools)
-            msg = '\n'.join(chunk_messages)
+            msg = (
+                f'Synced {len(chunk_messages)} chunk'
+                + ('s' if len(chunk_messages) != 1 else '')
+                + f' to {p}:\n\n'
+                + '\n\n'.join(chunk_messages).lstrip().rstrip()
+            ).lstrip().rstrip()
             ### If some chunks succeeded, retry the failures.
             retry_success = True
@@ -432,7 +438,7 @@ def sync(
     if blocking:
         self._exists = None
-        return _sync(self, df = df)
+        return _sync(self, df=df)
     from meerschaum.utils.threading import Thread
     def default_callback(result_tuple: SuccessTuple):
@@ -821,6 +827,7 @@ def filter_existing(
                 for col, typ in self_dtypes.items()
             },
             safe_copy=safe_copy,
+            coerce_mixed_numerics=(not self.static),
             debug=debug
         ),
         on_cols_dtypes,
@@ -962,7 +969,7 @@ def _persist_new_numeric_columns(self, df, debug: bool = False) -> SuccessTuple:
     """
     from meerschaum.utils.dataframe import get_numeric_cols
     numeric_cols = get_numeric_cols(df)
-    existing_numeric_cols = [col for col, typ in self.dtypes.items() if typ == 'numeric']
+    existing_numeric_cols = [col for col, typ in self.dtypes.items() if typ.startswith('numeric')]
     new_numeric_cols = [col for col in numeric_cols if col not in existing_numeric_cols]
     if not new_numeric_cols:
         return True, "Success"

meerschaum/utils/daemon/Daemon.py CHANGED Viewed

@@ -774,9 +774,16 @@ class Daemon:
         if '_process' not in self.__dict__ or self.__dict__['_process'].pid != int(pid):
             try:
                 self._process = psutil.Process(int(pid))
+                process_exists = True
             except Exception:
-                if self.pid_path.exists():
-                    self.pid_path.unlink()
+                process_exists = False
+            if not process_exists:
+                _ = self.__dict__.pop('_process', None)
+                try:
+                    if self.pid_path.exists():
+                        self.pid_path.unlink()
+                except Exception:
+                    pass
                 return None
         return self._process

meerschaum/utils/daemon/RotatingFile.py CHANGED Viewed

@@ -13,11 +13,10 @@ import pathlib
 import traceback
 import sys
 import atexit
-from datetime import datetime, timezone, timedelta
-from typing import List, Union, Optional, Tuple
+from datetime import datetime, timezone
+from typing import List, Optional, Tuple
 from meerschaum.config import get_config
 from meerschaum.utils.warnings import warn
-from meerschaum.utils.misc import round_time
 from meerschaum.utils.daemon.FileDescriptorInterceptor import FileDescriptorInterceptor
 from meerschaum.utils.threading import Thread
 import meerschaum as mrsm
@@ -517,6 +516,7 @@ class RotatingFile(io.IOBase):
                 else 0
             )
+            subfile_lines = []
             if (
                 subfile_index in self.subfile_objects
                 and

meerschaum/utils/dataframe.py CHANGED Viewed

@@ -85,6 +85,7 @@ def filter_unseen_df(
     safe_copy: bool = True,
     dtypes: Optional[Dict[str, Any]] = None,
     include_unchanged_columns: bool = False,
+    coerce_mixed_numerics: bool = True,
     debug: bool = False,
 ) -> 'pd.DataFrame':
     """
@@ -108,6 +109,10 @@ def filter_unseen_df(
     include_unchanged_columns: bool, default False
         If `True`, include columns which haven't changed on rows which have changed.
+    coerce_mixed_numerics: bool, default True
+        If `True`, cast mixed integer and float columns between the old and new dataframes into
+        numeric values (`decimal.Decimal`).
     debug: bool, default False
         Verbosity toggle.
@@ -138,7 +143,6 @@ def filter_unseen_df(
     import json
     import functools
     import traceback
-    from decimal import Decimal
     from meerschaum.utils.warnings import warn
     from meerschaum.utils.packages import import_pandas, attempt_import
     from meerschaum.utils.dtypes import (
@@ -148,7 +152,9 @@ def filter_unseen_df(
         attempt_cast_to_uuid,
         attempt_cast_to_bytes,
         coerce_timezone,
+        serialize_decimal,
     )
+    from meerschaum.utils.dtypes.sql import get_numeric_precision_scale
     pd = import_pandas(debug=debug)
     is_dask = 'dask' in new_df.__module__
     if is_dask:
@@ -211,6 +217,12 @@ def filter_unseen_df(
         if col not in dtypes:
             dtypes[col] = typ
+    numeric_cols_precisions_scales = {
+        col: get_numeric_precision_scale(None, typ)
+        for col, typ in dtypes.items()
+        if col and typ and typ.startswith('numeric')
+    }
     dt_dtypes = {
         col: typ
         for col, typ in dtypes.items()
@@ -259,6 +271,8 @@ def filter_unseen_df(
             old_is_numeric = col in old_numeric_cols
             if (
+                coerce_mixed_numerics
+                and
                 (new_is_float or new_is_int or new_is_numeric)
                 and
                 (old_is_float or old_is_int or old_is_numeric)
@@ -300,13 +314,9 @@ def filter_unseen_df(
     new_numeric_cols = get_numeric_cols(new_df)
     numeric_cols = set(new_numeric_cols + old_numeric_cols)
     for numeric_col in old_numeric_cols:
-        old_df[numeric_col] = old_df[numeric_col].apply(
-            lambda x: f'{x:f}' if isinstance(x, Decimal) else x
-        )
+        old_df[numeric_col] = old_df[numeric_col].apply(serialize_decimal)
     for numeric_col in new_numeric_cols:
-        new_df[numeric_col] = new_df[numeric_col].apply(
-            lambda x: f'{x:f}' if isinstance(x, Decimal) else x
-        )
+        new_df[numeric_col] = new_df[numeric_col].apply(serialize_decimal)
     old_dt_cols = [
         col
@@ -361,7 +371,14 @@ def filter_unseen_df(
         if numeric_col not in delta_df.columns:
             continue
         try:
-            delta_df[numeric_col] = delta_df[numeric_col].apply(attempt_cast_to_numeric)
+            delta_df[numeric_col] = delta_df[numeric_col].apply(
+                functools.partial(
+                    attempt_cast_to_numeric,
+                    quantize=True,
+                    precision=numeric_cols_precisions_scales.get(numeric_col, (None, None)[0]),
+                    scale=numeric_cols_precisions_scales.get(numeric_col, (None, None)[1]),
+                )
+            )
         except Exception:
             warn(f"Unable to parse numeric column '{numeric_col}':\n{traceback.format_exc()}")
@@ -882,6 +899,7 @@ def enforce_dtypes(
     The Pandas DataFrame with the types enforced.
     """
     import json
+    import functools
     from meerschaum.utils.debug import dprint
     from meerschaum.utils.formatting import pprint
     from meerschaum.utils.dtypes import (
@@ -893,6 +911,7 @@ def enforce_dtypes(
         attempt_cast_to_bytes,
         coerce_timezone as _coerce_timezone,
     )
+    from meerschaum.utils.dtypes.sql import get_numeric_precision_scale
     pandas = mrsm.attempt_import('pandas')
     is_dask = 'dask' in df.__module__
     if safe_copy:
@@ -914,7 +933,7 @@ def enforce_dtypes(
     numeric_cols = [
         col
         for col, typ in dtypes.items()
-        if typ == 'numeric'
+        if typ.startswith('numeric')
     ]
     uuid_cols = [
         col
@@ -961,9 +980,17 @@ def enforce_dtypes(
         if debug:
             dprint(f"Checking for numerics: {numeric_cols}")
         for col in numeric_cols:
+            precision, scale = get_numeric_precision_scale(None, dtypes.get(col, ''))
             if col in df.columns:
                 try:
-                    df[col] = df[col].apply(attempt_cast_to_numeric)
+                    df[col] = df[col].apply(
+                        functools.partial(
+                            attempt_cast_to_numeric,
+                            quantize=True,
+                            precision=precision,
+                            scale=scale,
+                        )
+                    )
                 except Exception as e:
                     if debug:
                         dprint(f"Unable to parse column '{col}' as NUMERIC:\n{e}")
@@ -1040,7 +1067,7 @@ def enforce_dtypes(
         previous_typ = common_dtypes[col]
         mixed_numeric_types = (is_dtype_numeric(typ) and is_dtype_numeric(previous_typ))
         explicitly_float = are_dtypes_equal(dtypes.get(col, 'object'), 'float')
-        explicitly_numeric = dtypes.get(col, 'numeric') == 'numeric'
+        explicitly_numeric = dtypes.get(col, 'numeric').startswith('numeric')
         cast_to_numeric = (
             explicitly_numeric
             or col in df_numeric_cols
@@ -1574,16 +1601,19 @@ def to_json(
     A JSON string.
     """
     from meerschaum.utils.packages import import_pandas
-    from meerschaum.utils.dtypes import serialize_bytes
+    from meerschaum.utils.dtypes import serialize_bytes, serialize_decimal
     pd = import_pandas()
     uuid_cols = get_uuid_cols(df)
     bytes_cols = get_bytes_cols(df)
+    numeric_cols = get_numeric_cols(df)
     if safe_copy and bool(uuid_cols or bytes_cols):
         df = df.copy()
     for col in uuid_cols:
         df[col] = df[col].astype(str)
     for col in bytes_cols:
         df[col] = df[col].apply(serialize_bytes)
+    for col in numeric_cols:
+        df[col] = df[col].apply(serialize_decimal)
     return df.infer_objects(copy=False).fillna(pd.NA).to_json(
         date_format=date_format,
         date_unit=date_unit,

meerschaum 2.7.6__py3-none-any.whl → 2.7.8__py3-none-any.whl

meerschaum 2.7.6py3-none-any.whl → 2.7.8py3-none-any.whl