PyPI - meerschaum - Versions diffs - 2.6.16__py3-none-any.whl → 2.7.0rc1__py3-none-any.whl - Mend

meerschaum 2.6.16py3-none-any.whl → 2.7.0rc1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (38) hide show

meerschaum/_internal/arguments/_parse_arguments.py +1 -1
meerschaum/actions/delete.py +65 -69
meerschaum/actions/edit.py +22 -2
meerschaum/actions/install.py +1 -2
meerschaum/actions/sync.py +2 -3
meerschaum/config/_default.py +1 -1
meerschaum/config/_paths.py +2 -1
meerschaum/config/_version.py +1 -1
meerschaum/connectors/api/_pipes.py +4 -3
meerschaum/connectors/sql/_create_engine.py +3 -3
meerschaum/connectors/sql/_pipes.py +84 -38
meerschaum/connectors/sql/_sql.py +6 -1
meerschaum/connectors/valkey/_pipes.py +12 -1
meerschaum/core/Pipe/__init__.py +23 -13
meerschaum/core/Pipe/_attributes.py +19 -0
meerschaum/core/Pipe/_dtypes.py +1 -1
meerschaum/core/Pipe/_sync.py +61 -21
meerschaum/core/Pipe/_verify.py +8 -7
meerschaum/jobs/_Job.py +2 -1
meerschaum/plugins/_Plugin.py +11 -14
meerschaum/utils/daemon/Daemon.py +20 -13
meerschaum/utils/dataframe.py +175 -13
meerschaum/utils/dtypes/__init__.py +103 -14
meerschaum/utils/dtypes/sql.py +26 -0
meerschaum/utils/misc.py +8 -8
meerschaum/utils/packages/_packages.py +1 -1
meerschaum/utils/schedule.py +8 -3
meerschaum/utils/sql.py +70 -47
meerschaum/utils/venv/_Venv.py +4 -4
meerschaum/utils/venv/__init__.py +33 -13
{meerschaum-2.6.16.dist-info → meerschaum-2.7.0rc1.dist-info}/METADATA +2 -2
{meerschaum-2.6.16.dist-info → meerschaum-2.7.0rc1.dist-info}/RECORD +38 -38
{meerschaum-2.6.16.dist-info → meerschaum-2.7.0rc1.dist-info}/LICENSE +0 -0
{meerschaum-2.6.16.dist-info → meerschaum-2.7.0rc1.dist-info}/NOTICE +0 -0
{meerschaum-2.6.16.dist-info → meerschaum-2.7.0rc1.dist-info}/WHEEL +0 -0
{meerschaum-2.6.16.dist-info → meerschaum-2.7.0rc1.dist-info}/entry_points.txt +0 -0
{meerschaum-2.6.16.dist-info → meerschaum-2.7.0rc1.dist-info}/top_level.txt +0 -0
{meerschaum-2.6.16.dist-info → meerschaum-2.7.0rc1.dist-info}/zip-safe +0 -0

meerschaum/connectors/sql/_pipes.py CHANGED Viewed

@@ -460,10 +460,16 @@ def get_create_index_queries(
         else None
     )
     primary_key_constraint_name = (
-        sql_item_name(f'pk_{pipe.target}', self.flavor, None)
+        sql_item_name(f'PK_{pipe.target}', self.flavor, None)
         if primary_key is not None
         else None
     )
+    primary_key_clustered = "CLUSTERED" if _datetime is None else "NONCLUSTERED"
+    datetime_clustered = (
+        "CLUSTERED"
+        if not existing_primary_keys and _datetime is not None
+        else "NONCLUSTERED"
+    )
     _id_index_name = (
         sql_item_name(index_names['id'], self.flavor, None)
@@ -474,6 +480,7 @@ def get_create_index_queries(
     _create_space_partition = get_config('system', 'experimental', 'space')
     ### create datetime index
+    dt_query = None
     if _datetime is not None:
         if self.flavor == 'timescaledb' and pipe.parameters.get('hypertable', True):
             _id_count = (
@@ -504,19 +511,19 @@ def get_create_index_queries(
                 + 'if_not_exists => true, '
                 + "migrate_data => true);"
             )
-        elif self.flavor == 'mssql':
-            dt_query = (
-                "CREATE "
-                + ("CLUSTERED " if not primary_key else '')
-                + f"INDEX {_datetime_index_name} "
-                + f"ON {_pipe_name} ({_datetime_name})"
-            )
-        else: ### mssql, sqlite, etc.
-            dt_query = (
-                f"CREATE INDEX {_datetime_index_name} "
-                + f"ON {_pipe_name} ({_datetime_name})"
-            )
+        elif _datetime_index_name:
+            if self.flavor == 'mssql':
+                dt_query = (
+                    f"CREATE {datetime_clustered} INDEX {_datetime_index_name} "
+                    f"ON {_pipe_name} ({_datetime_name})"
+                )
+            else:
+                dt_query = (
+                    f"CREATE INDEX {_datetime_index_name} "
+                    + f"ON {_pipe_name} ({_datetime_name})"
+                )
+    if dt_query:
         index_queries[_datetime] = [dt_query]
     primary_queries = []
@@ -623,7 +630,7 @@ def get_create_index_queries(
                     ),
                     (
                         f"ALTER TABLE {_pipe_name}\n"
-                        f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY ({primary_key_name})"
+                        f"ADD CONSTRAINT {primary_key_constraint_name} PRIMARY KEY {primary_key_clustered} ({primary_key_name})"
                     ),
                 ])
         index_queries[primary_key] = primary_queries
@@ -875,6 +882,7 @@ def get_pipe_data(
     from meerschaum.utils.dtypes import (
         attempt_cast_to_numeric,
         attempt_cast_to_uuid,
+        attempt_cast_to_bytes,
         are_dtypes_equal,
     )
     from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type
@@ -891,17 +899,15 @@ def get_pipe_data(
             col: get_pd_type_from_db_type(typ)
             for col, typ in cols_types.items()
         }
-    }
+    } if pipe.enforce else {}
     if dtypes:
         if self.flavor == 'sqlite':
             if not pipe.columns.get('datetime', None):
                 _dt = pipe.guess_datetime()
                 dt = sql_item_name(_dt, self.flavor, None) if _dt else None
-                is_guess = True
             else:
                 _dt = pipe.get_columns('datetime')
                 dt = sql_item_name(_dt, self.flavor, None)
-                is_guess = False
             if _dt:
                 dt_type = dtypes.get(_dt, 'object').lower()
@@ -929,7 +935,7 @@ def get_pipe_data(
         col: to_pandas_dtype(typ)
         for col, typ in dtypes.items()
         if col in select_columns and col not in (omit_columns or [])
-    }
+    } if pipe.enforce else {}
     query = self.get_pipe_data_query(
         pipe,
         select_columns=select_columns,
@@ -959,6 +965,11 @@ def get_pipe_data(
         for col, typ in pipe.dtypes.items()
         if typ == 'uuid' and col in dtypes
     ]
+    bytes_columns = [
+        col
+        for col, typ in pipe.dtypes.items()
+        if typ == 'bytes' and col in dtypes
+    ]
     kw['coerce_float'] = kw.get('coerce_float', (len(numeric_columns) == 0))
@@ -978,6 +989,11 @@ def get_pipe_data(
             continue
         df[col] = df[col].apply(attempt_cast_to_uuid)
+    for col in bytes_columns:
+        if col not in df.columns:
+            continue
+        df[col] = df[col].apply(attempt_cast_to_bytes)
     if self.flavor == 'sqlite':
         ignore_dt_cols = [
             col
@@ -1339,7 +1355,13 @@ def create_pipe_table_from_df(
     """
     Create a pipe's table from its configured dtypes and an incoming dataframe.
     """
-    from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols, get_uuid_cols
+    from meerschaum.utils.dataframe import (
+        get_json_cols,
+        get_numeric_cols,
+        get_uuid_cols,
+        get_datetime_cols,
+        get_bytes_cols,
+    )
     from meerschaum.utils.sql import get_create_table_queries, sql_item_name
     primary_key = pipe.columns.get('primary', None)
     dt_col = pipe.columns.get('datetime', None)
@@ -1365,6 +1387,18 @@ def create_pipe_table_from_df(
             col: 'numeric'
             for col in get_numeric_cols(df)
         },
+        **{
+            col: 'bytes'
+            for col in get_bytes_cols(df)
+        },
+        **{
+            col: 'datetime64[ns, UTC]'
+            for col in get_datetime_cols(df, timezone_aware=True, timezone_naive=False)
+        },
+        **{
+            col: 'datetime64[ns]'
+            for col in get_datetime_cols(df, timezone_aware=False, timezone_naive=True)
+        },
         **pipe.dtypes
     }
     autoincrement = (
@@ -1455,11 +1489,9 @@ def sync_pipe(
         get_update_queries,
         sql_item_name,
         update_queries,
-        get_create_table_queries,
         get_reset_autoincrement_queries,
     )
     from meerschaum.utils.misc import generate_password
-    from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols, get_uuid_cols
     from meerschaum.utils.dtypes import are_dtypes_equal
     from meerschaum.utils.dtypes.sql import get_db_type_from_pd_type
     from meerschaum import Pipe
@@ -1572,6 +1604,7 @@ def sync_pipe(
         'schema': self.get_pipe_schema(pipe),
     })
+    dt_col = pipe.columns.get('datetime', None)
     primary_key = pipe.columns.get('primary', None)
     autoincrement = (
         pipe.parameters.get('autoincrement', False)
@@ -1589,17 +1622,23 @@ def sync_pipe(
         if not edit_success:
             return edit_success, edit_msg
-    autoincrement_needs_reset = False
+    def _check_pk(_df_to_clear):
+        if _df_to_clear is None:
+            return
+        if primary_key not in _df_to_clear.columns:
+            return
+        if not _df_to_clear[primary_key].notnull().any():
+            del _df_to_clear[primary_key]
+    autoincrement_needs_reset = bool(
+        autoincrement
+        and primary_key
+        and primary_key in unseen_df.columns
+        and unseen_df[primary_key].notnull().any()
+    )
     if autoincrement and primary_key:
-        if primary_key not in df.columns:
-            if unseen_df is not None and primary_key in unseen_df.columns:
-                del unseen_df[primary_key]
-            if update_df is not None and primary_key in update_df.columns:
-                del update_df[primary_key]
-            if delta_df is not None and primary_key in delta_df.columns:
-                del delta_df[primary_key]
-        elif unseen_df[primary_key].notnull().any():
-            autoincrement_needs_reset = True
+        for _df_to_clear in (unseen_df, update_df, delta_df):
+            _check_pk(_df_to_clear)
     if is_new:
         create_success, create_msg = self.create_pipe_table_from_df(
@@ -1612,6 +1651,7 @@ def sync_pipe(
     do_identity_insert = bool(
         self.flavor in ('mssql',)
+        and primary_key
         and primary_key in unseen_df.columns
         and autoincrement
     )
@@ -1707,7 +1747,11 @@ def sync_pipe(
             col
             for col_key, col in pipe.columns.items()
             if col and col in existing_cols
-        ]
+        ] if not primary_key else (
+            [dt_col, primary_key]
+            if self.flavor == 'timescaledb' and dt_col and dt_col in update_df.columns
+            else [primary_key]
+        )
         update_queries = get_update_queries(
             pipe.target,
             temp_target,
@@ -1716,7 +1760,8 @@ def sync_pipe(
             upsert=upsert,
             schema=self.get_pipe_schema(pipe),
             patch_schema=self.internal_schema,
-            datetime_col=pipe.columns.get('datetime', None),
+            datetime_col=(dt_col if dt_col in update_df.columns else None),
+            identity_insert=(autoincrement and primary_key in update_df.columns),
             debug=debug,
         )
         update_success = all(
@@ -1834,7 +1879,6 @@ def sync_pipe_inplace(
         session_execute,
         update_queries,
     )
-    from meerschaum.utils.dtypes import are_dtypes_equal
     from meerschaum.utils.dtypes.sql import (
         get_pd_type_from_db_type,
     )
@@ -2054,6 +2098,7 @@ def sync_pipe_inplace(
     ) if not (upsert or static) else new_cols_types
     common_cols = [col for col in new_cols if col in backtrack_cols_types]
+    primary_key = pipe.columns.get('primary', None)
     on_cols = {
         col: new_cols.get(col)
         for col_key, col in pipe.columns.items()
@@ -2064,7 +2109,7 @@ def sync_pipe_inplace(
             and col in backtrack_cols_types
             and col in new_cols
         )
-    }
+    } if not primary_key else {primary_key: new_cols.get(primary_key)}
     null_replace_new_cols_str = (
         ', '.join([
@@ -2591,7 +2636,7 @@ def get_pipe_rowcount(
     result = self.value(query, debug=debug, silent=True)
     try:
         return int(result)
-    except Exception as e:
+    except Exception:
         return None
@@ -2616,10 +2661,11 @@ def drop_pipe(
     from meerschaum.utils.sql import table_exists, sql_item_name, DROP_IF_EXISTS_FLAVORS
     success = True
     target = pipe.target
+    schema = self.get_pipe_schema(pipe)
     target_name = (
-        sql_item_name(target, self.flavor, self.get_pipe_schema(pipe))
+        sql_item_name(target, self.flavor, schema)
     )
-    if table_exists(target, self, debug=debug):
+    if table_exists(target, self, schema=schema, debug=debug):
         if_exists_str = "IF EXISTS" if self.flavor in DROP_IF_EXISTS_FLAVORS else ""
         success = self.exec(
             f"DROP TABLE {if_exists_str} {target_name}", silent=True, debug=debug

meerschaum/connectors/sql/_sql.py CHANGED Viewed

@@ -790,7 +790,12 @@ def to_sql(
         truncate_item_name,
         DROP_IF_EXISTS_FLAVORS,
     )
-    from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols, get_uuid_cols
+    from meerschaum.utils.dataframe import (
+        get_json_cols,
+        get_numeric_cols,
+        get_uuid_cols,
+        get_bytes_cols,
+    )
     from meerschaum.utils.dtypes import are_dtypes_equal, quantize_decimal, coerce_timezone
     from meerschaum.utils.dtypes.sql import (
         NUMERIC_PRECISION_FLAVORS,

meerschaum/connectors/valkey/_pipes.py CHANGED Viewed

@@ -46,9 +46,20 @@ def serialize_document(doc: Dict[str, Any]) -> str:
     -------
     A serialized string for the document.
     """
+    from meerschaum.utils.dtypes import serialize_bytes
     return json.dumps(
         doc,
-        default=(lambda x: json_serialize_datetime(x) if hasattr(x, 'tzinfo') else str(x)),
+        default=(
+            lambda x: (
+                json_serialize_datetime(x)
+                if hasattr(x, 'tzinfo')
+                else (
+                    serialize_bytes(x)
+                    if isinstance(x, bytes)
+                    else str(x)
+                )
+            )
+        ),
         separators=(',', ':'),
         sort_keys=True,
     )

meerschaum/core/Pipe/__init__.py CHANGED Viewed

@@ -106,6 +106,7 @@ class Pipe:
         upsert,
         static,
         tzinfo,
+        enforce,
         get_columns,
         get_columns_types,
         get_columns_indices,
@@ -132,6 +133,7 @@ class Pipe:
         _persist_new_json_columns,
         _persist_new_numeric_columns,
         _persist_new_uuid_columns,
+        _persist_new_bytes_columns,
     )
     from ._verify import (
         verify,
@@ -162,12 +164,14 @@ class Pipe:
         upsert: Optional[bool] = None,
         autoincrement: Optional[bool] = None,
         static: Optional[bool] = None,
+        enforce: Optional[bool] = None,
         mrsm_instance: Optional[Union[str, InstanceConnector]] = None,
         cache: bool = False,
         debug: bool = False,
         connector_keys: Optional[str] = None,
         metric_key: Optional[str] = None,
         location_key: Optional[str] = None,
+        instance_keys: Optional[str] = None,
         indexes: Union[Dict[str, str], List[str], None] = None,
     ):
         """
@@ -219,6 +223,10 @@ class Pipe:
         static: Optional[bool], default None
             If `True`, set `static` in the parameters.
+        enforce: Optionanl[bool], default None
+            If `False`, skip data type enforcement.
+            Default behavior is `True`.
         temporary: bool, default False
             If `True`, prevent instance tables (pipes, users, plugins) from being created.
@@ -319,11 +327,13 @@ class Pipe:
         if isinstance(static, bool):
             self._attributes['parameters']['static'] = static
+        if isinstance(enforce, bool):
+            self._attributes['parameters']['enforce'] = enforce
         ### NOTE: The parameters dictionary is {} by default.
         ###       A Pipe may be registered without parameters, then edited,
         ###       or a Pipe may be registered with parameters set in-memory first.
-        #  from meerschaum.config import get_config
-        _mrsm_instance = mrsm_instance if mrsm_instance is not None else instance
+        _mrsm_instance = mrsm_instance if mrsm_instance is not None else (instance or instance_keys)
         if _mrsm_instance is None:
             _mrsm_instance = get_config('meerschaum', 'instance', patch=True)
@@ -341,10 +351,10 @@ class Pipe:
         Return the four keys needed to reconstruct this pipe.
         """
         return {
-            'connector': self.connector_keys,
-            'metric': self.metric_key,
-            'location': self.location_key,
-            'instance': self.instance_keys,
+            'connector_keys': self.connector_keys,
+            'metric_key': self.metric_key,
+            'location_key': self.location_key,
+            'instance_keys': self.instance_keys,
         }
     def keys(self) -> List[str]:
@@ -385,7 +395,7 @@ class Pipe:
                 warnings.simplefilter('ignore')
                 try:
                     conn = parse_instance_keys(self.connector_keys)
-                except Exception as e:
+                except Exception:
                     conn = None
             if conn:
                 self._connector = conn
@@ -429,7 +439,7 @@ class Pipe:
             _fetch_patch = {
                 'fetch': ({
                     'definition': (
-                        f"SELECT * FROM "
+                        "SELECT * FROM "
                         + sql_item_name(
                             str(self.target),
                             self.instance_connector.flavor,
@@ -467,7 +477,7 @@ class Pipe:
                 and self.location_key == other.location_key
                 and self.instance_keys == other.instance_keys
             )
-        except Exception as e:
+        except Exception:
             return False
     def __hash__(self):
@@ -496,11 +506,11 @@ class Pipe:
         Define the state dictionary (pickling).
         """
         return {
-            'connector': self.connector_keys,
-            'metric': self.metric_key,
-            'location': self.location_key,
+            'connector_keys': self.connector_keys,
+            'metric_key': self.metric_key,
+            'location_key': self.location_key,
             'parameters': self.parameters,
-            'instance': self.instance_keys,
+            'instance_keys': self.instance_keys,
         }
     def __setstate__(self, _state: Dict[str, Any]):

meerschaum/core/Pipe/_attributes.py CHANGED Viewed

@@ -289,6 +289,25 @@ def tzinfo(self) -> Union[None, timezone]:
     return None
+@property
+def enforce(self) -> bool:
+    """
+    Return the `enforce` parameter for the pipe.
+    """
+    if 'enforce' not in self.parameters:
+        self.parameters['enforce'] = True
+    return self.parameters['enforce']
+@enforce.setter
+def enforce(self, _enforce: bool) -> None:
+    """
+    Set the `enforce` parameter for the pipe.
+    """
+    self.parameters['_enforce'] = _enforce
 def get_columns(self, *args: str, error: bool = False) -> Union[str, Tuple[str]]:
     """
     Check if the requested columns are defined.

meerschaum/core/Pipe/_dtypes.py CHANGED Viewed

@@ -41,7 +41,7 @@ def enforce_dtypes(
             )
         return df
-    pipe_dtypes = self.dtypes
+    pipe_dtypes = self.dtypes if self.enforce else {}
     try:
         if isinstance(df, str):

meerschaum/core/Pipe/_sync.py CHANGED Viewed

@@ -368,10 +368,11 @@ def sync(
         ### Cast to a dataframe and ensure datatypes are what we expect.
         df = self.enforce_dtypes(df, chunksize=chunksize, debug=debug)
-        ### Capture `numeric`, `uuid`, and `json` columns.
+        ### Capture `numeric`, `uuid`, `json`, and `bytes` columns.
         self._persist_new_json_columns(df, debug=debug)
         self._persist_new_numeric_columns(df, debug=debug)
         self._persist_new_uuid_columns(df, debug=debug)
+        self._persist_new_bytes_columns(df, debug=debug)
         if debug:
             dprint(
@@ -617,11 +618,13 @@ def filter_existing(
         filter_unseen_df,
         add_missing_cols_to_df,
         get_unhashable_cols,
-        get_numeric_cols,
     )
     from meerschaum.utils.dtypes import (
         to_pandas_dtype,
         none_if_null,
+        to_datetime,
+        are_dtypes_equal,
+        value_is_null,
     )
     from meerschaum.config import get_config
     pd = import_pandas()
@@ -669,29 +672,36 @@ def filter_existing(
     ### begin is the oldest data in the new dataframe
     begin, end = None, None
     dt_col = pipe_columns.get('datetime', None)
+    primary_key = pipe_columns.get('primary', None)
     dt_type = self.dtypes.get(dt_col, 'datetime64[ns, UTC]') if dt_col else None
+    if autoincrement and primary_key == dt_col and dt_col not in df.columns:
+        if enforce_dtypes:
+            df = self.enforce_dtypes(df, chunksize=chunksize, debug=debug)
+        return df, get_empty_df(), df
     try:
-        min_dt_val = df[dt_col].min(skipna=True) if dt_col else None
+        min_dt_val = df[dt_col].min(skipna=True) if dt_col and dt_col in df.columns else None
         if is_dask and min_dt_val is not None:
             min_dt_val = min_dt_val.compute()
         min_dt = (
-            pandas.to_datetime(min_dt_val).to_pydatetime()
-            if min_dt_val is not None and 'datetime' in str(dt_type)
+            to_datetime(min_dt_val, as_pydatetime=True)
+            if min_dt_val is not None and are_dtypes_equal(dt_type, 'datetime')
             else min_dt_val
         )
     except Exception:
         min_dt = None
-    if not ('datetime' in str(type(min_dt))) or str(min_dt) == 'NaT':
-        if 'int' not in str(type(min_dt)).lower():
+    if not are_dtypes_equal('datetime', str(type(min_dt))) or value_is_null(min_dt):
+        if not are_dtypes_equal('int', str(type(min_dt))):
             min_dt = None
     if isinstance(min_dt, datetime):
-        begin = (
-            round_time(
-                min_dt,
-                to='down'
-            ) - timedelta(minutes=1)
-        )
+        rounded_min_dt = round_time(min_dt, to='down')
+        try:
+            begin = rounded_min_dt - timedelta(minutes=1)
+        except OverflowError:
+            begin = rounded_min_dt
     elif dt_type and 'int' in dt_type.lower():
         begin = min_dt
     elif dt_col is None:
@@ -699,11 +709,11 @@ def filter_existing(
     ### end is the newest data in the new dataframe
     try:
-        max_dt_val = df[dt_col].max(skipna=True) if dt_col else None
+        max_dt_val = df[dt_col].max(skipna=True) if dt_col and dt_col in df.columns else None
         if is_dask and max_dt_val is not None:
             max_dt_val = max_dt_val.compute()
         max_dt = (
-            pandas.to_datetime(max_dt_val).to_pydatetime()
+            to_datetime(max_dt_val, as_pydatetime=True)
             if max_dt_val is not None and 'datetime' in str(dt_type)
             else max_dt_val
         )
@@ -712,8 +722,8 @@ def filter_existing(
         traceback.print_exc()
         max_dt = None
-    if ('datetime' not in str(type(max_dt))) or str(min_dt) == 'NaT':
-        if 'int' not in str(type(max_dt)).lower():
+    if not are_dtypes_equal('datetime', str(type(max_dt))) or value_is_null(max_dt):
+        if not are_dtypes_equal('int', str(type(max_dt))):
             max_dt = None
     if isinstance(max_dt, datetime):
@@ -723,7 +733,7 @@ def filter_existing(
                 to='down'
             ) + timedelta(minutes=1)
         )
-    elif dt_type and 'int' in dt_type.lower():
+    elif dt_type and 'int' in dt_type.lower() and max_dt is not None:
         end = max_dt + 1
     if max_dt is not None and min_dt is not None and min_dt > max_dt:
@@ -738,7 +748,7 @@ def filter_existing(
     unique_index_vals = {
         col: df[col].unique()
-        for col in pipe_columns
+        for col in (pipe_columns if not primary_key else [primary_key])
         if col in df.columns and col != dt_col
     } if not date_bound_only else {}
     filter_params_index_limit = get_config('pipes', 'sync', 'filter_params_index_limit')
@@ -777,14 +787,15 @@ def filter_existing(
     ### Separate new rows from changed ones.
     on_cols = [
-        col for col_key, col in pipe_columns.items()
+        col
+        for col_key, col in pipe_columns.items()
         if (
             col
             and
             col_key != 'value'
             and col in backtrack_df.columns
         )
-    ]
+    ] if not primary_key else [primary_key]
     self_dtypes = self.dtypes
     on_cols_dtypes = {
         col: to_pandas_dtype(typ)
@@ -1020,3 +1031,32 @@ def _persist_new_json_columns(self, df, debug: bool = False) -> SuccessTuple:
         return edit_success, edit_msg
     return True, "Success"
+def _persist_new_bytes_columns(self, df, debug: bool = False) -> SuccessTuple:
+    """
+    Check for new `bytes` columns and update the parameters.
+    """
+    from meerschaum.utils.dataframe import get_bytes_cols
+    bytes_cols = get_bytes_cols(df)
+    existing_bytes_cols = [col for col, typ in self.dtypes.items() if typ == 'bytes']
+    new_bytes_cols = [col for col in bytes_cols if col not in existing_bytes_cols]
+    if not new_bytes_cols:
+        return True, "Success"
+    self._attributes_sync_time = None
+    dt_col = self.columns.get('datetime', None)
+    dtypes = self.parameters.get('dtypes', {})
+    if dt_col not in dtypes:
+        dtypes[dt_col] = 'datetime'
+    dtypes.update({col: 'bytes' for col in bytes_cols})
+    self.parameters['dtypes'] = dtypes
+    if not self.temporary:
+        edit_success, edit_msg = self.edit(interactive=False, debug=debug)
+        if not edit_success:
+            warn(f"Unable to update bytes dtypes for {self}:\n{edit_msg}")
+        return edit_success, edit_msg
+    return True, "Success"

meerschaum 2.6.16__py3-none-any.whl → 2.7.0rc1__py3-none-any.whl

meerschaum 2.6.16py3-none-any.whl → 2.7.0rc1py3-none-any.whl