PyPI - meerschaum - Versions diffs - 2.5.0__py3-none-any.whl → 2.6.0.dev1__py3-none-any.whl - Mend

meerschaum 2.5.0py3-none-any.whl → 2.6.0.dev1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (35) hide show

meerschaum/_internal/arguments/_parser.py +6 -1
meerschaum/_internal/entry.py +16 -5
meerschaum/actions/edit.py +6 -6
meerschaum/actions/sql.py +12 -11
meerschaum/api/dash/pipes.py +95 -13
meerschaum/api/routes/_webterm.py +1 -0
meerschaum/config/_edit.py +46 -19
meerschaum/config/_read_config.py +20 -9
meerschaum/config/_version.py +1 -1
meerschaum/config/stack/__init__.py +1 -1
meerschaum/connectors/sql/_pipes.py +80 -24
meerschaum/connectors/sql/_sql.py +29 -10
meerschaum/connectors/valkey/_pipes.py +1 -1
meerschaum/core/Pipe/__init__.py +8 -9
meerschaum/core/Pipe/_attributes.py +33 -11
meerschaum/core/Pipe/_data.py +26 -7
meerschaum/core/Pipe/_dtypes.py +4 -4
meerschaum/core/Pipe/_fetch.py +1 -1
meerschaum/core/Pipe/_sync.py +16 -4
meerschaum/core/Pipe/_verify.py +1 -1
meerschaum/utils/dataframe.py +58 -31
meerschaum/utils/dtypes/__init__.py +16 -5
meerschaum/utils/dtypes/sql.py +58 -28
meerschaum/utils/misc.py +49 -16
meerschaum/utils/packages/_packages.py +2 -1
meerschaum/utils/schedule.py +7 -5
meerschaum/utils/sql.py +224 -40
{meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dev1.dist-info}/METADATA +5 -3
{meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dev1.dist-info}/RECORD +35 -35
{meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dev1.dist-info}/WHEEL +1 -1
{meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dev1.dist-info}/LICENSE +0 -0
{meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dev1.dist-info}/NOTICE +0 -0
{meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dev1.dist-info}/entry_points.txt +0 -0
{meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dev1.dist-info}/top_level.txt +0 -0
{meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dev1.dist-info}/zip-safe +0 -0

meerschaum/connectors/sql/_pipes.py CHANGED Viewed

@@ -404,7 +404,7 @@ def get_create_index_queries(
     indices = pipe.indices
     _datetime = pipe.get_columns('datetime', error=False)
-    _datetime_type = pipe.dtypes.get(_datetime, 'datetime64[ns]')
+    _datetime_type = pipe.dtypes.get(_datetime, 'datetime64[ns, UTC]')
     _datetime_name = (
         sql_item_name(_datetime, self.flavor, None)
         if _datetime is not None else None
@@ -738,7 +738,7 @@ def get_pipe_data(
                 dt_type = dtypes.get(_dt, 'object').lower()
                 if 'datetime' not in dt_type:
                     if 'int' not in dt_type:
-                        dtypes[_dt] = 'datetime64[ns]'
+                        dtypes[_dt] = 'datetime64[ns, UTC]'
     existing_cols = pipe.get_columns_types(debug=debug)
     select_columns = (
         [
@@ -1197,7 +1197,12 @@ def sync_pipe(
     A `SuccessTuple` of success (`bool`) and message (`str`).
     """
     from meerschaum.utils.packages import import_pandas
-    from meerschaum.utils.sql import get_update_queries, sql_item_name, json_flavors, update_queries
+    from meerschaum.utils.sql import (
+        get_update_queries,
+        sql_item_name,
+        update_queries,
+        get_create_table_queries,
+    )
     from meerschaum.utils.misc import generate_password
     from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols
     from meerschaum.utils.dtypes import are_dtypes_equal
@@ -1232,7 +1237,6 @@ def sync_pipe(
     ### if table does not exist, create it with indices
     is_new = False
-    add_cols_query = None
     if not pipe.exists(debug=debug):
         check_existing = False
         is_new = True
@@ -1252,9 +1256,7 @@ def sync_pipe(
     ### NOTE: Oracle SQL < 23c (2023) and SQLite does not support booleans,
     ### so infer bools and persist them to `dtypes`.
-    ### MSSQL supports `BIT` for booleans, but we coerce bools to int for MSSQL
-    ### to avoid merge issues.
-    if self.flavor in ('oracle', 'sqlite', 'mssql', 'mysql', 'mariadb'):
+    if self.flavor in ('oracle', 'sqlite', 'mysql', 'mariadb'):
         pipe_dtypes = pipe.dtypes
         new_bool_cols = {
             col: 'bool[pyarrow]'
@@ -1309,7 +1311,60 @@ def sync_pipe(
         'schema': self.get_pipe_schema(pipe),
     })
+    primary_key = pipe.columns.get('primary', None)
+    new_dtypes = {
+        **{
+            col: str(typ)
+            for col, typ in unseen_df.dtypes.items()
+        },
+        **{
+            col: 'int'
+            for col_ix, col in pipe.columns.items()
+            if col_ix != 'primary'
+        },
+        **pipe.dtypes
+    } if is_new else {}
+    autoincrement = (
+        pipe.parameters.get('autoincrement', False)
+        or (is_new and primary_key and primary_key not in new_dtypes)
+    )
+    if autoincrement and autoincrement not in pipe.parameters:
+        pipe.parameters['autoincrement'] = autoincrement
+        edit_success, edit_msg = pipe.edit(debug=debug)
+        if not edit_success:
+            return edit_success, edit_msg
+    if autoincrement and primary_key and primary_key not in df.columns:
+        if unseen_df is not None and primary_key in unseen_df.columns:
+            del unseen_df[primary_key]
+        if update_df is not None and primary_key in update_df.columns:
+            del update_df[primary_key]
+        if delta_df is not None and primary_key in delta_df.columns:
+            del delta_df[primary_key]
+    if is_new:
+        if autoincrement:
+            _ = new_dtypes.pop(primary_key, None)
+        ### TODO: see if this can be removed
+        if 'datetime' in pipe.columns and self.flavor == 'timescaledb':
+            primary_key = None
+        create_table_queries = get_create_table_queries(
+            new_dtypes,
+            pipe.target,
+            self.flavor,
+            schema=self.get_pipe_schema(pipe),
+            primary_key=primary_key,
+        )
+        create_success = all(
+            self.exec_queries(create_table_queries, break_on_error=True, rollback=True, debug=debug)
+        )
+        if not create_success:
+            warn(f"Failed to create '{pipe.target}'. Continuing...")
     stats = self.to_sql(unseen_df, **unseen_kw)
     if is_new:
         if not self.create_indices(pipe, debug=debug):
             warn(f"Failed to create indices for {pipe}. Continuing...")
@@ -1358,7 +1413,7 @@ def sync_pipe(
         ]
         update_queries = get_update_queries(
             pipe.target,
-            temp_target,
+            temp_target,
             self,
             join_cols,
             upsert=upsert,
@@ -1960,7 +2015,7 @@ def get_sync_time(
     table = sql_item_name(pipe.target, self.flavor, self.get_pipe_schema(pipe))
     dt_col = pipe.columns.get('datetime', None)
-    dt_type = pipe.dtypes.get(dt_col, 'datetime64[ns]')
+    dt_type = pipe.dtypes.get(dt_col, 'datetime64[ns, UTC]')
     if not dt_col:
         _dt = pipe.guess_datetime()
         dt = sql_item_name(_dt, self.flavor, None) if _dt else None
@@ -2366,7 +2421,7 @@ def get_pipe_columns_types(
     ----------
     pipe: mrsm.Pipe:
         The pipe to get the columns for.
     Returns
     -------
     A dictionary of columns names (`str`) and types (`str`).
@@ -2381,17 +2436,18 @@ def get_pipe_columns_types(
     }
     >>>
     """
+    from meerschaum.utils.sql import get_table_cols_types
     if not pipe.exists(debug=debug):
         return {}
-    if self.flavor == 'duckdb':
-        from meerschaum.utils.sql import get_table_cols_types
-        return get_table_cols_types(
-            pipe.target,
-            self,
-            flavor=self.flavor,
-            schema=self.get_pipe_schema(pipe),
-        )
+    #  if self.flavor not in ('oracle', 'mysql', 'mariadb'):
+    return get_table_cols_types(
+        pipe.target,
+        self,
+        flavor=self.flavor,
+        schema=self.get_pipe_schema(pipe),
+        debug=debug,
+    )
     table_columns = {}
     try:
@@ -2823,11 +2879,11 @@ def get_alter_columns_queries(
 def get_to_sql_dtype(
-        self,
-        pipe: 'mrsm.Pipe',
-        df: 'pd.DataFrame',
-        update_dtypes: bool = True,
-    ) -> Dict[str, 'sqlalchemy.sql.visitors.TraversibleType']:
+    self,
+    pipe: 'mrsm.Pipe',
+    df: 'pd.DataFrame',
+    update_dtypes: bool = True,
+) -> Dict[str, 'sqlalchemy.sql.visitors.TraversibleType']:
     """
     Given a pipe and DataFrame, return the `dtype` dictionary for `to_sql()`.
@@ -2947,7 +3003,7 @@ def deduplicate_pipe(
     duplicates_cte_name = sql_item_name('dups', self.flavor, None)
     duplicate_row_number_name = sql_item_name('dup_row_num', self.flavor, None)
     previous_row_number_name = sql_item_name('prev_row_num', self.flavor, None)
     index_list_str = (
         sql_item_name(dt_col, self.flavor, None)
         if dt_col

meerschaum/connectors/sql/_sql.py CHANGED Viewed

@@ -17,8 +17,8 @@ from meerschaum.utils.warnings import warn
 ### database flavors that can use bulk insert
 _bulk_flavors = {'postgresql', 'timescaledb', 'citus'}
 ### flavors that do not support chunks
-_disallow_chunks_flavors = ['duckdb']
-_max_chunks_flavors = {'sqlite': 1000,}
+_disallow_chunks_flavors = []
+_max_chunks_flavors = {'sqlite': 1000}
 SKIP_READ_TRANSACTION_FLAVORS: list[str] = ['mssql']
@@ -123,7 +123,8 @@ def read(
     if chunks is not None and chunks <= 0:
         return []
     from meerschaum.utils.sql import sql_item_name, truncate_item_name
-    from meerschaum.utils.dtypes.sql import NUMERIC_PRECISION_FLAVORS
+    from meerschaum.utils.dtypes import are_dtypes_equal, coerce_timezone
+    from meerschaum.utils.dtypes.sql import NUMERIC_PRECISION_FLAVORS, TIMEZONE_NAIVE_FLAVORS
     from meerschaum.utils.packages import attempt_import, import_pandas
     from meerschaum.utils.pool import get_pool
     from meerschaum.utils.dataframe import chunksize_to_npartitions, get_numeric_cols
@@ -139,6 +140,16 @@ def read(
     if is_dask:
         chunksize = None
     schema = schema or self.schema
+    utc_dt_cols = [
+        col
+        for col, typ in dtype.items()
+        if are_dtypes_equal(typ, 'datetime') and 'utc' in typ.lower()
+    ] if dtype else []
+    if dtype and utc_dt_cols and self.flavor in TIMEZONE_NAIVE_FLAVORS:
+        dtype = dtype.copy()
+        for col in utc_dt_cols:
+            dtype[col] = 'datetime64[ns]'
     pool = get_pool(workers=workers)
     sqlalchemy = attempt_import("sqlalchemy")
@@ -162,7 +173,6 @@ def read(
                 )
             chunksize = _max_chunks_flavors[self.flavor]
-    ### NOTE: A bug in duckdb_engine does not allow for chunks.
     if chunksize is not None and self.flavor in _disallow_chunks_flavors:
         chunksize = None
@@ -206,6 +216,9 @@ def read(
     chunk_list = []
     chunk_hook_results = []
     def _process_chunk(_chunk, _retry_on_failure: bool = True):
+        if self.flavor in TIMEZONE_NAIVE_FLAVORS:
+            for col in utc_dt_cols:
+                _chunk[col] = coerce_timezone(_chunk[col], strip_timezone=False)
         if not as_hook_results:
             chunk_list.append(_chunk)
         if chunk_hook is None:
@@ -765,7 +778,7 @@ def to_sql(
         DROP_IF_EXISTS_FLAVORS,
     )
     from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols, get_uuid_cols
-    from meerschaum.utils.dtypes import are_dtypes_equal, quantize_decimal
+    from meerschaum.utils.dtypes import are_dtypes_equal, quantize_decimal, coerce_timezone
     from meerschaum.utils.dtypes.sql import (
         NUMERIC_PRECISION_FLAVORS,
         PD_TO_SQLALCHEMY_DTYPES_FLAVORS,
@@ -848,7 +861,6 @@ def to_sql(
             if not success:
                 warn(f"Unable to drop {name}")
         ### Enforce NVARCHAR(2000) as text instead of CLOB.
         dtype = to_sql_kw.get('dtype', {})
         for col, typ in df.dtypes.items():
@@ -858,11 +870,18 @@ def to_sql(
                 dtype[col] = sqlalchemy.types.INTEGER
         to_sql_kw['dtype'] = dtype
     elif self.flavor == 'mssql':
+        pass
+        ### TODO clean this up
+        #  dtype = to_sql_kw.get('dtype', {})
+        #  for col, typ in df.dtypes.items():
+            #  if are_dtypes_equal(str(typ), 'bool'):
+                #  dtype[col] = sqlalchemy.types.INTEGER
+        #  to_sql_kw['dtype'] = dtype
+    elif self.flavor == 'duckdb':
         dtype = to_sql_kw.get('dtype', {})
-        for col, typ in df.dtypes.items():
-            if are_dtypes_equal(str(typ), 'bool'):
-                dtype[col] = sqlalchemy.types.INTEGER
-        to_sql_kw['dtype'] = dtype
+        dt_cols = [col for col, typ in df.dtypes.items() if are_dtypes_equal(str(typ), 'datetime')]
+        for col in dt_cols:
+            df[col] = coerce_timezone(df[col], strip_utc=False)
     ### Check for JSON columns.
     if self.flavor not in json_flavors:

meerschaum/connectors/valkey/_pipes.py CHANGED Viewed

@@ -706,7 +706,7 @@ def get_sync_time(
     """
     from meerschaum.utils.dtypes import are_dtypes_equal
     dt_col = pipe.columns.get('datetime', None)
-    dt_typ = pipe.dtypes.get(dt_col, 'datetime64[ns]')
+    dt_typ = pipe.dtypes.get(dt_col, 'datetime64[ns, UTC]')
     if not dt_col:
         return None

meerschaum/core/Pipe/__init__.py CHANGED Viewed

@@ -153,6 +153,7 @@ class Pipe:
         dtypes: Optional[Dict[str, str]] = None,
         instance: Optional[Union[str, InstanceConnector]] = None,
         temporary: bool = False,
+        upsert: Optional[bool] = None,
         mrsm_instance: Optional[Union[str, InstanceConnector]] = None,
         cache: bool = False,
         debug: bool = False,
@@ -201,6 +202,9 @@ class Pipe:
         instance: Optional[Union[str, InstanceConnector]], default None
             Alias for `mrsm_instance`. If `mrsm_instance` is supplied, this value is ignored.
+        upsert: Optional[bool], default None
+            If `True`, set `upsert` to `True` in the parameters.
         temporary: bool, default False
             If `True`, prevent instance tables (pipes, users, plugins) from being created.
@@ -268,7 +272,7 @@ class Pipe:
             or indexes
             or self._attributes.get('parameters', {}).get('indices', None)
             or self._attributes.get('parameters', {}).get('indexes', None)
-        ) or columns
+        )
         if isinstance(indices, dict):
             indices_key = (
                 'indexes'
@@ -292,6 +296,9 @@ class Pipe:
         elif dtypes is not None:
             warn(f"The provided dtypes are of invalid type '{type(dtypes)}'.")
+        if isinstance(upsert, bool):
+            self._attributes['parameters']['upsert'] = upsert
         ### NOTE: The parameters dictionary is {} by default.
         ###       A Pipe may be registered without parameters, then edited,
         ###       or a Pipe may be registered with parameters set in-memory first.
@@ -308,7 +315,6 @@ class Pipe:
         self._cache = cache and get_config('system', 'experimental', 'cache')
     @property
     def meta(self):
         """
@@ -321,7 +327,6 @@ class Pipe:
             'instance': self.instance_keys,
         }
     def keys(self) -> List[str]:
         """
         Return the ordered keys for this pipe.
@@ -332,7 +337,6 @@ class Pipe:
             if key != 'instance'
         }
     @property
     def instance_connector(self) -> Union[InstanceConnector, None]:
         """
@@ -369,7 +373,6 @@ class Pipe:
                 return None
         return self._connector
     @property
     def cache_connector(self) -> Union[meerschaum.connectors.sql.SQLConnector, None]:
         """
@@ -391,7 +394,6 @@ class Pipe:
         return self._cache_connector
     @property
     def cache_pipe(self) -> Union['meerschaum.Pipe', None]:
         """
@@ -433,11 +435,9 @@ class Pipe:
         return self._cache_pipe
     def __str__(self, ansi: bool=False):
         return pipe_repr(self, ansi=ansi)
     def __eq__(self, other):
         try:
             return (
@@ -489,7 +489,6 @@ class Pipe:
         """
         self.__init__(**_state)
     def __getitem__(self, key: str) -> Any:
         """
         Index the pipe's attributes.

meerschaum/core/Pipe/_attributes.py CHANGED Viewed

@@ -103,10 +103,25 @@ def indices(self) -> Union[Dict[str, Union[str, List[str]]], None]:
     if indices_key not in self.parameters:
         self.parameters[indices_key] = {}
     _indices = self.parameters[indices_key]
+    _columns = self.columns
+    dt_col = _columns.get('datetime', None)
     if not isinstance(_indices, dict):
         _indices = {}
         self.parameters[indices_key] = _indices
-    return {**self.columns, **_indices}
+    unique_cols = (
+        [dt_col]
+        if dt_col
+        else []
+    ) + [
+        col
+        for col_ix, col in _columns.items()
+        if col_ix != 'datetime'
+    ]
+    return {
+        **({'unique': unique_cols} if len(unique_cols) > 1 else {}),
+        **_columns,
+        **_indices
+    }
 @property
@@ -196,7 +211,7 @@ def get_columns(self, *args: str, error: bool = False) -> Union[str, Tuple[str]]
     ----------
     *args: str
         The column names to be retrieved.
     error: bool, default False
         If `True`, raise an `Exception` if the specified column is not defined.
@@ -509,15 +524,22 @@ def get_indices(self) -> Dict[str, str]:
         if cols
     }
     _index_names = {
-        ix: (
-            _index_template.format(
-                target=_target,
-                column_names=column_names,
-                connector_keys=self.connector_keys,
-                metric_key=self.connector_key,
-                location_key=self.location_key,
-            )
+        ix: _index_template.format(
+            target=_target,
+            column_names=column_names,
+            connector_keys=self.connector_keys,
+            metric_key=self.connector_key,
+            location_key=self.location_key,
         )
         for ix, column_names in _column_names.items()
     }
-    return _index_names
+    ### NOTE: Skip any duplicate indices.
+    seen_index_names = {}
+    for ix, index_name in _index_names.items():
+        if index_name in seen_index_names:
+            continue
+        seen_index_names[index_name] = ix
+    return {
+        ix: index_name
+        for index_name, ix in seen_index_names.items()
+    }

meerschaum/core/Pipe/_data.py CHANGED Viewed

@@ -23,8 +23,8 @@ def get_data(
     self,
     select_columns: Optional[List[str]] = None,
     omit_columns: Optional[List[str]] = None,
-    begin: Union[datetime, int, None] = None,
-    end: Union[datetime, int, None] = None,
+    begin: Union[datetime, int, str, None] = None,
+    end: Union[datetime, int, str, None] = None,
     params: Optional[Dict[str, Any]] = None,
     as_iterator: bool = False,
     as_chunks: bool = False,
@@ -48,12 +48,12 @@ def get_data(
     omit_columns: Optional[List[str]], default None
         If provided, remove these columns from the selection.
-    begin: Union[datetime, int, None], default None
+    begin: Union[datetime, int, str, None], default None
         Lower bound datetime to begin searching for data (inclusive).
         Translates to a `WHERE` clause like `WHERE datetime >= begin`.
         Defaults to `None`.
-    end: Union[datetime, int, None], default None
+    end: Union[datetime, int, str, None], default None
         Upper bound datetime to stop searching for data (inclusive).
         Translates to a `WHERE` clause like `WHERE datetime < end`.
         Defaults to `None`.
@@ -105,11 +105,12 @@ def get_data(
     from meerschaum.utils.venv import Venv
     from meerschaum.connectors import get_connector_plugin
     from meerschaum.utils.misc import iterate_chunks, items_str
-    from meerschaum.utils.dtypes import to_pandas_dtype
+    from meerschaum.utils.dtypes import to_pandas_dtype, coerce_timezone
     from meerschaum.utils.dataframe import add_missing_cols_to_df, df_is_chunk_generator
     from meerschaum.utils.packages import attempt_import
     dd = attempt_import('dask.dataframe') if as_dask else None
     dask = attempt_import('dask') if as_dask else None
+    dateutil_parser = attempt_import('dateutil.parser')
     if select_columns == '*':
         select_columns = None
@@ -120,11 +121,29 @@ def get_data(
         omit_columns = [omit_columns]
     as_iterator = as_iterator or as_chunks
+    dt_col = self.columns.get('datetime', None)
+    dt_typ = self.dtypes.get(dt_col, 'datetime64[ns, UTC]')
+    dt_is_utc = 'utc' in dt_typ.lower()
+    if isinstance(begin, str):
+        try:
+            begin = dateutil_parser.parse(begin)
+        except Exception as e:
+            warn(f"Failed to parse '{begin}' as datetime:\n{e}")
+            begin = None
+    if isinstance(end, str):
+        try:
+            end = dateutil_parser.parse(end)
+        except Exception as e:
+            warn(f"Failed to parse '{end}' as datetime:\n{e}")
+            end = None
+    if isinstance(begin, datetime):
+        begin = coerce_timezone(begin, strip_utc=(not dt_is_utc))
+    if isinstance(end, datetime):
+        end = coerce_timezone(end, strip_utc=(not dt_is_utc))
     def _sort_df(_df):
         if df_is_chunk_generator(_df):
             return _df
-        dt_col = self.columns.get('datetime', None)
         indices = [] if dt_col not in _df.columns else [dt_col]
         non_dt_cols = [
             col
@@ -607,7 +626,7 @@ def get_chunk_interval(
     if dt_col is None:
         return timedelta(minutes=chunk_minutes)
-    dt_dtype = self.dtypes.get(dt_col, 'datetime64[ns]')
+    dt_dtype = self.dtypes.get(dt_col, 'datetime64[ns, UTC]')
     if 'int' in dt_dtype.lower():
         return chunk_minutes
     return timedelta(minutes=chunk_minutes)

meerschaum/core/Pipe/_dtypes.py CHANGED Viewed

@@ -101,18 +101,18 @@ def infer_dtypes(self, persist: bool = False, debug: bool = False) -> Dict[str,
         dt_col = self.columns.get('datetime', None)
         if dt_col:
             if not self.parameters.get('dtypes', {}).get(dt_col, None):
-                dtypes[dt_col] = 'datetime64[ns]'
+                dtypes[dt_col] = 'datetime64[ns, UTC]'
         return dtypes
-    from meerschaum.utils.sql import get_pd_type
-    from meerschaum.utils.misc import to_pandas_dtype
+    from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type
+    from meerschaum.utils.dtypes import to_pandas_dtype
     columns_types = self.get_columns_types(debug=debug)
     ### NOTE: get_columns_types() may return either the types as
     ###       PostgreSQL- or Pandas-style.
     dtypes = {
         c: (
-            get_pd_type(t, allow_custom_dtypes=True)
+            get_pd_type_from_db_type(t, allow_custom_dtypes=True)
             if str(t).isupper()
             else to_pandas_dtype(t)
         )

meerschaum/core/Pipe/_fetch.py CHANGED Viewed

@@ -125,7 +125,7 @@ def get_backtrack_interval(
     if dt_col is None:
         return backtrack_interval
-    dt_dtype = self.dtypes.get(dt_col, 'datetime64[ns]')
+    dt_dtype = self.dtypes.get(dt_col, 'datetime64[ns, UTC]')
     if 'int' in dt_dtype.lower():
         return backtrack_minutes

meerschaum/core/Pipe/_sync.py CHANGED Viewed

@@ -624,6 +624,18 @@ def filter_existing(
         merge = pd.merge
         NA = pd.NA
+    primary_key = self.columns.get('primary', None)
+    autoincrement = self.parameters.get('autoincrement', False)
+    pipe_columns = self.columns.copy()
+    if primary_key and autoincrement and df is not None and primary_key in df.columns:
+        if safe_copy:
+            df = df.copy()
+            safe_copy = False
+        if df[primary_key].isnull().all():
+            del df[primary_key]
+            _ = self.columns.pop(primary_key, None)
     def get_empty_df():
         empty_df = pd.DataFrame([])
         dtypes = dict(df.dtypes) if df is not None else {}
@@ -643,8 +655,8 @@ def filter_existing(
     ### begin is the oldest data in the new dataframe
     begin, end = None, None
-    dt_col = self.columns.get('datetime', None)
-    dt_type = self.dtypes.get(dt_col, 'datetime64[ns]') if dt_col else None
+    dt_col = pipe_columns.get('datetime', None)
+    dt_type = self.dtypes.get(dt_col, 'datetime64[ns, UTC]') if dt_col else None
     try:
         min_dt_val = df[dt_col].min(skipna=True) if dt_col else None
         if is_dask and min_dt_val is not None:
@@ -713,7 +725,7 @@ def filter_existing(
     unique_index_vals = {
         col: df[col].unique()
-        for col in self.columns
+        for col in pipe_columns
         if col in df.columns and col != dt_col
     } if not date_bound_only else {}
     filter_params_index_limit = get_config('pipes', 'sync', 'filter_params_index_limit')
@@ -749,7 +761,7 @@ def filter_existing(
     ### Separate new rows from changed ones.
     on_cols = [
-        col for col_key, col in self.columns.items()
+        col for col_key, col in pipe_columns.items()
         if (
             col
             and

meerschaum/core/Pipe/_verify.py CHANGED Viewed

@@ -394,7 +394,7 @@ def get_bound_interval(self, debug: bool = False) -> Union[timedelta, int, None]
     if not dt_col:
         return bound_time_value
-    dt_typ = self.dtypes.get(dt_col, 'datetime64[ns]')
+    dt_typ = self.dtypes.get(dt_col, 'datetime64[ns, UTC]')
     if 'int' in dt_typ.lower():
         return int(bound_time_value)

meerschaum 2.5.0__py3-none-any.whl → 2.6.0.dev1__py3-none-any.whl

meerschaum 2.5.0py3-none-any.whl → 2.6.0.dev1py3-none-any.whl