PyPI - meerschaum - Versions diffs - 2.7.0rc1__py3-none-any.whl → 2.7.2__py3-none-any.whl - Mend

meerschaum 2.7.0rc1py3-none-any.whl → 2.7.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

meerschaum/api/dash/callbacks/dashboard.py +46 -37
meerschaum/api/dash/connectors.py +7 -9
meerschaum/api/resources/templates/termpage.html +32 -24
meerschaum/api/routes/_pipes.py +7 -8
meerschaum/api/routes/_webterm.py +4 -3
meerschaum/config/_version.py +1 -1
meerschaum/connectors/api/_pipes.py +14 -18
meerschaum/connectors/sql/_create_engine.py +6 -1
meerschaum/connectors/sql/_instance.py +11 -12
meerschaum/connectors/sql/_pipes.py +62 -56
meerschaum/connectors/sql/_sql.py +37 -7
meerschaum/core/Pipe/_attributes.py +6 -1
meerschaum/core/Pipe/_dtypes.py +23 -16
meerschaum/core/Pipe/_sync.py +1 -13
meerschaum/jobs/_Job.py +2 -0
meerschaum/utils/daemon/Daemon.py +2 -2
meerschaum/utils/dataframe.py +3 -3
meerschaum/utils/dtypes/__init__.py +48 -2
meerschaum/utils/dtypes/sql.py +15 -7
meerschaum/utils/sql.py +114 -57
meerschaum/utils/venv/__init__.py +22 -9
{meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/METADATA +1 -1
{meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/RECORD +29 -29
{meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/LICENSE +0 -0
{meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/NOTICE +0 -0
{meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/WHEEL +0 -0
{meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/entry_points.txt +0 -0
{meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/top_level.txt +0 -0
{meerschaum-2.7.0rc1.dist-info → meerschaum-2.7.2.dist-info}/zip-safe +0 -0

meerschaum/connectors/sql/_pipes.py CHANGED Viewed

@@ -97,7 +97,6 @@ def edit_pipe(
     if pipe.id is None:
         return False, f"{pipe} is not registered and cannot be edited."
-    from meerschaum.utils.debug import dprint
     from meerschaum.utils.packages import attempt_import
     from meerschaum.utils.sql import json_flavors
     if not patch:
@@ -172,7 +171,7 @@ def fetch_pipes_keys(
     """
     from meerschaum.utils.debug import dprint
     from meerschaum.utils.packages import attempt_import
-    from meerschaum.utils.misc import separate_negation_values, flatten_list
+    from meerschaum.utils.misc import separate_negation_values
     from meerschaum.utils.sql import OMIT_NULLSFIRST_FLAVORS, table_exists
     from meerschaum.config.static import STATIC_CONFIG
     import json
@@ -316,7 +315,6 @@ def create_indices(
     """
     Create a pipe's indices.
     """
-    from meerschaum.utils.sql import sql_item_name, update_queries
     from meerschaum.utils.debug import dprint
     if debug:
         dprint(f"Creating indices for {pipe}...")
@@ -419,11 +417,14 @@ def get_create_index_queries(
     existing_cols_indices = self.get_pipe_columns_indices(pipe, debug=debug)
     existing_ix_names = set()
     existing_primary_keys = []
+    existing_clustered_primary_keys = []
     for col, col_indices in existing_cols_indices.items():
         for col_ix_doc in col_indices:
             existing_ix_names.add(col_ix_doc.get('name', None))
             if col_ix_doc.get('type', None) == 'PRIMARY KEY':
                 existing_primary_keys.append(col)
+                if col_ix_doc.get('clustered', True):
+                    existing_clustered_primary_keys.append(col)
     _datetime = pipe.get_columns('datetime', error=False)
     _datetime_name = (
@@ -467,7 +468,7 @@ def get_create_index_queries(
     primary_key_clustered = "CLUSTERED" if _datetime is None else "NONCLUSTERED"
     datetime_clustered = (
         "CLUSTERED"
-        if not existing_primary_keys and _datetime is not None
+        if not existing_clustered_primary_keys and _datetime is not None
         else "NONCLUSTERED"
     )
@@ -665,6 +666,8 @@ def get_create_index_queries(
         cols = indices[ix_key]
         if not isinstance(cols, (list, tuple)):
             cols = [cols]
+        if ix_key == 'unique' and upsert:
+            continue
         cols_names = [sql_item_name(col, self.flavor, None) for col in cols if col]
         if not cols_names:
             continue
@@ -792,8 +795,6 @@ def delete_pipe(
     """
     Delete a Pipe's registration.
     """
-    from meerschaum.utils.sql import sql_item_name
-    from meerschaum.utils.debug import dprint
     from meerschaum.utils.packages import attempt_import
     sqlalchemy = attempt_import('sqlalchemy')
@@ -876,7 +877,6 @@ def get_pipe_data(
     """
     import json
-    from meerschaum.utils.sql import sql_item_name
     from meerschaum.utils.misc import parse_df_datetimes, to_pandas_dtype
     from meerschaum.utils.packages import import_pandas
     from meerschaum.utils.dtypes import (
@@ -889,7 +889,7 @@ def get_pipe_data(
     pd = import_pandas()
     is_dask = 'dask' in pd.__name__
-    cols_types = pipe.get_columns_types(debug=debug)
+    cols_types = pipe.get_columns_types(debug=debug) if pipe.enforce else {}
     dtypes = {
         **{
             p_col: to_pandas_dtype(p_typ)
@@ -904,17 +904,16 @@ def get_pipe_data(
         if self.flavor == 'sqlite':
             if not pipe.columns.get('datetime', None):
                 _dt = pipe.guess_datetime()
-                dt = sql_item_name(_dt, self.flavor, None) if _dt else None
             else:
                 _dt = pipe.get_columns('datetime')
-                dt = sql_item_name(_dt, self.flavor, None)
             if _dt:
                 dt_type = dtypes.get(_dt, 'object').lower()
                 if 'datetime' not in dt_type:
                     if 'int' not in dt_type:
                         dtypes[_dt] = 'datetime64[ns, UTC]'
-    existing_cols = pipe.get_columns_types(debug=debug)
+    existing_cols = cols_types.keys()
     select_columns = (
         [
             col
@@ -928,7 +927,7 @@ def get_pipe_data(
             if col in existing_cols
             and col not in (omit_columns or [])
         ]
-    )
+    ) if pipe.enforce else select_columns
     if select_columns:
         dtypes = {col: typ for col, typ in dtypes.items() if col in select_columns}
     dtypes = {
@@ -1109,12 +1108,13 @@ def get_pipe_data_query(
     from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type
     dt_col = pipe.columns.get('datetime', None)
-    existing_cols = pipe.get_columns_types(debug=debug)
+    existing_cols = pipe.get_columns_types(debug=debug) if pipe.enforce else []
+    skip_existing_cols_check = skip_existing_cols_check or not pipe.enforce
     dt_typ = get_pd_type_from_db_type(existing_cols[dt_col]) if dt_col in existing_cols else None
     select_columns = (
         [col for col in existing_cols]
         if not select_columns
-        else [col for col in select_columns if col in existing_cols or skip_existing_cols_check]
+        else [col for col in select_columns if skip_existing_cols_check or col in existing_cols]
     )
     if omit_columns:
         select_columns = [col for col in select_columns if col not in omit_columns]
@@ -1201,7 +1201,7 @@ def get_pipe_data_query(
             number=begin_add_minutes,
             begin=begin,
         )
-        where += f"{dt} >= {begin_da}" + (" AND " if end is not None else "")
+        where += f"\n    {dt} >= {begin_da}" + ("\n    AND\n    " if end is not None else "")
         is_dt_bound = True
     if end is not None and (_dt in existing_cols or skip_existing_cols_check):
@@ -1213,7 +1213,7 @@ def get_pipe_data_query(
             number=end_add_minutes,
             begin=end
         )
-        where += f"{dt} < {end_da}"
+        where += f"{dt} <  {end_da}"
         is_dt_bound = True
     if params is not None:
@@ -1225,7 +1225,7 @@ def get_pipe_data_query(
         }
         if valid_params:
             where += build_where(valid_params, self).replace(
-                'WHERE', ('AND' if is_dt_bound else "")
+                'WHERE', ('    AND' if is_dt_bound else "    ")
             )
     if len(where) > 0:
@@ -1280,7 +1280,6 @@ def get_pipe_id(
     if pipe.temporary:
         return None
     from meerschaum.utils.packages import attempt_import
-    import json
     sqlalchemy = attempt_import('sqlalchemy')
     from meerschaum.connectors.sql.tables import get_tables
     pipes_tbl = get_tables(mrsm_instance=self, create=(not pipe.temporary), debug=debug)['pipes']
@@ -1599,6 +1598,7 @@ def sync_pipe(
         'if_exists': if_exists,
         'debug': debug,
         'as_dict': True,
+        'safe_copy': kw.get('safe_copy', False),
         'chunksize': chunksize,
         'dtype': self.get_to_sql_dtype(pipe, unseen_df, update_dtypes=True),
         'schema': self.get_pipe_schema(pipe),
@@ -1655,35 +1655,37 @@ def sync_pipe(
         and primary_key in unseen_df.columns
         and autoincrement
     )
-    with self.engine.connect() as connection:
-        with connection.begin():
-            if do_identity_insert:
-                identity_on_result = self.exec(
-                    f"SET IDENTITY_INSERT {pipe_name} ON",
-                    commit=False,
-                    _connection=connection,
-                    close=False,
-                    debug=debug,
-                )
-                if identity_on_result is None:
-                    return False, f"Could not enable identity inserts on {pipe}."
-            stats = self.to_sql(
-                unseen_df,
-                _connection=connection,
-                **unseen_kw
-            )
+    stats = {'success': True, 'msg': 'Success'}
+    if len(unseen_df) > 0:
+        with self.engine.connect() as connection:
+            with connection.begin():
+                if do_identity_insert:
+                    identity_on_result = self.exec(
+                        f"SET IDENTITY_INSERT {pipe_name} ON",
+                        commit=False,
+                        _connection=connection,
+                        close=False,
+                        debug=debug,
+                    )
+                    if identity_on_result is None:
+                        return False, f"Could not enable identity inserts on {pipe}."
-            if do_identity_insert:
-                identity_off_result = self.exec(
-                    f"SET IDENTITY_INSERT {pipe_name} OFF",
-                    commit=False,
+                stats = self.to_sql(
+                    unseen_df,
                     _connection=connection,
-                    close=False,
-                    debug=debug,
+                    **unseen_kw
                 )
-                if identity_off_result is None:
-                    return False, f"Could not disable identity inserts on {pipe}."
+                if do_identity_insert:
+                    identity_off_result = self.exec(
+                        f"SET IDENTITY_INSERT {pipe_name} OFF",
+                        commit=False,
+                        _connection=connection,
+                        close=False,
+                        debug=debug,
+                    )
+                    if identity_off_result is None:
+                        return False, f"Could not disable identity inserts on {pipe}."
     if is_new:
         if not self.create_indices(pipe, debug=debug):
@@ -1722,11 +1724,12 @@ def sync_pipe(
             },
             target=temp_target,
             temporary=True,
+            enforce=False,
+            static=True,
+            autoincrement=False,
             parameters={
-                'static': True,
-                'schema': self.internal_schema,
+                'schema': (self.internal_schema if self.flavor != 'mssql' else None),
                 'hypertable': False,
-                'autoincrement': False,
             },
         )
         temp_pipe.__dict__['_columns_types'] = {
@@ -1747,7 +1750,7 @@ def sync_pipe(
             col
             for col_key, col in pipe.columns.items()
             if col and col in existing_cols
-        ] if not primary_key else (
+        ] if not primary_key or self.flavor == 'oracle' else (
             [dt_col, primary_key]
             if self.flavor == 'timescaledb' and dt_col and dt_col in update_df.columns
             else [primary_key]
@@ -1764,9 +1767,13 @@ def sync_pipe(
             identity_insert=(autoincrement and primary_key in update_df.columns),
             debug=debug,
         )
-        update_success = all(
-            self.exec_queries(update_queries, break_on_error=True, rollback=True, debug=debug)
+        update_results = self.exec_queries(
+            update_queries,
+            break_on_error=True,
+            rollback=True,
+            debug=debug,
         )
+        update_success = all(update_results)
         self._log_temporary_tables_creation(
             temp_target,
             ready_to_drop=True,
@@ -1775,6 +1782,8 @@ def sync_pipe(
         )
         if not update_success:
             warn(f"Failed to apply update to {pipe}.")
+        stats['success'] = stats['success'] and update_success
+        stats['msg'] = (stats.get('msg', '') + f'\nFailed to apply update to {pipe}.').lstrip()
     stop = time.perf_counter()
     success = stats['success']
@@ -1951,8 +1960,8 @@ def sync_pipe_inplace(
             autoincrement=autoincrement,
             datetime_column=dt_col,
         )
-        result = self.exec_queries(create_pipe_queries, debug=debug)
-        if result is None:
+        results = self.exec_queries(create_pipe_queries, debug=debug)
+        if not all(results):
             _ = clean_up_temp_tables()
             return False, f"Could not insert new data into {pipe} from its SQL query definition."
@@ -2109,7 +2118,7 @@ def sync_pipe_inplace(
             and col in backtrack_cols_types
             and col in new_cols
         )
-    } if not primary_key else {primary_key: new_cols.get(primary_key)}
+    } if not primary_key or self.flavor == 'oracle' else {primary_key: new_cols.get(primary_key)}
     null_replace_new_cols_str = (
         ', '.join([
@@ -3376,9 +3385,7 @@ def deduplicate_pipe(
     """
     from meerschaum.utils.sql import (
         sql_item_name,
-        NO_CTE_FLAVORS,
         get_rename_table_queries,
-        NO_SELECT_INTO_FLAVORS,
         DROP_IF_EXISTS_FLAVORS,
         get_create_table_query,
         format_cte_subquery,
@@ -3500,7 +3507,6 @@ def deduplicate_pipe(
     dedup_table = '-' + session_id + f'_dedup_{pipe.target}'
     temp_old_table = '-' + session_id + f"_old_{pipe.target}"
-    dedup_table_name = sql_item_name(dedup_table, self.flavor, self.get_pipe_schema(pipe))
     temp_old_table_name = sql_item_name(temp_old_table, self.flavor, self.get_pipe_schema(pipe))
     create_temporary_table_query = get_create_table_query(

meerschaum/connectors/sql/_sql.py CHANGED Viewed

@@ -624,7 +624,7 @@ def exec_queries(
     rollback: bool = True,
     silent: bool = False,
     debug: bool = False,
-) -> List[sqlalchemy.engine.cursor.LegacyCursorResult]:
+) -> List[Union[sqlalchemy.engine.cursor.CursorResult, None]]:
     """
     Execute a list of queries in a single transaction.
@@ -688,6 +688,7 @@ def exec_queries(
             if result is None and break_on_error:
                 if rollback:
                     session.rollback()
+                results.append(result)
                 break
             elif result is not None and hook is not None:
                 hook_queries = hook(session)
@@ -715,6 +716,7 @@ def to_sql(
     method: str = "",
     chunksize: Optional[int] = -1,
     schema: Optional[str] = None,
+    safe_copy: bool = True,
     silent: bool = False,
     debug: bool = False,
     as_tuple: bool = False,
@@ -729,7 +731,7 @@ def to_sql(
     Parameters
     ----------
     df: pd.DataFrame
-        The DataFrame to be uploaded.
+        The DataFrame to be inserted.
     name: str
         The name of the table to be created.
@@ -752,6 +754,9 @@ def to_sql(
         Optionally override the schema for the table.
         Defaults to `SQLConnector.schema`.
+    safe_copy: bool, defaul True
+        If `True`, copy the dataframe before making any changes.
     as_tuple: bool, default False
         If `True`, return a (success_bool, message) tuple instead of a `bool`.
         Defaults to `False`.
@@ -770,8 +775,7 @@ def to_sql(
     """
     import time
     import json
-    import decimal
-    from decimal import Decimal, Context
+    from decimal import Decimal
     from meerschaum.utils.warnings import error, warn
     import warnings
     import functools
@@ -796,9 +800,15 @@ def to_sql(
         get_uuid_cols,
         get_bytes_cols,
     )
-    from meerschaum.utils.dtypes import are_dtypes_equal, quantize_decimal, coerce_timezone
+    from meerschaum.utils.dtypes import (
+        are_dtypes_equal,
+        quantize_decimal,
+        coerce_timezone,
+        encode_bytes_for_bytea,
+    )
     from meerschaum.utils.dtypes.sql import (
         NUMERIC_PRECISION_FLAVORS,
+        NUMERIC_AS_TEXT_FLAVORS,
         PD_TO_SQLALCHEMY_DTYPES_FLAVORS,
         get_db_type_from_pd_type,
     )
@@ -808,14 +818,35 @@ def to_sql(
     pd = import_pandas()
     is_dask = 'dask' in df.__module__
-    stats = {'target': name, }
+    bytes_cols = get_bytes_cols(df)
+    numeric_cols = get_numeric_cols(df)
+    stats = {'target': name,}
     ### resort to defaults if None
+    copied = False
+    use_psql_copy = False
     if method == "":
         if self.flavor in _bulk_flavors:
             method = functools.partial(psql_insert_copy, schema=self.schema)
+            use_psql_copy = True
         else:
             ### Should resolve to 'multi' or `None`.
             method = flavor_configs.get(self.flavor, {}).get('to_sql', {}).get('method', 'multi')
+    if bytes_cols and (use_psql_copy or self.flavor == 'oracle'):
+        if safe_copy and not copied:
+            df = df.copy()
+            copied = True
+        for col in bytes_cols:
+            df[col] = df[col].apply(encode_bytes_for_bytea, with_prefix=(self.flavor != 'oracle'))
+    if self.flavor in NUMERIC_AS_TEXT_FLAVORS:
+        if safe_copy and not copied:
+            df = df.copy()
+            copied = True
+        for col in numeric_cols:
+            df[col] = df[col].astype(str)
     stats['method'] = method.__name__ if hasattr(method, '__name__') else str(method)
     default_chunksize = self._sys_config.get('chunksize', None)
@@ -925,7 +956,6 @@ def to_sql(
     ### Check for numeric columns.
     numeric_scale, numeric_precision = NUMERIC_PRECISION_FLAVORS.get(self.flavor, (None, None))
     if numeric_precision is not None and numeric_scale is not None:
-        numeric_cols = get_numeric_cols(df)
         for col in numeric_cols:
             df[col] = df[col].apply(
                 lambda x: (

meerschaum/core/Pipe/_attributes.py CHANGED Viewed

@@ -200,10 +200,15 @@ def dtypes(self) -> Union[Dict[str, Any], None]:
     If defined, return the `dtypes` dictionary defined in `meerschaum.Pipe.parameters`.
     """
     from meerschaum.config._patch import apply_patch_to_config
+    from meerschaum.utils.dtypes import MRSM_ALIAS_DTYPES
     configured_dtypes = self.parameters.get('dtypes', {})
     remote_dtypes = self.infer_dtypes(persist=False)
     patched_dtypes = apply_patch_to_config(remote_dtypes, configured_dtypes)
-    return patched_dtypes
+    return {
+        col: MRSM_ALIAS_DTYPES.get(typ, typ)
+        for col, typ in patched_dtypes.items()
+        if col and typ
+    }
 @dtypes.setter

meerschaum/core/Pipe/_dtypes.py CHANGED Viewed

@@ -15,6 +15,7 @@ from typing import TYPE_CHECKING
 if TYPE_CHECKING:
     pd = mrsm.attempt_import('pandas')
 def enforce_dtypes(
     self,
     df: 'pd.DataFrame',
@@ -30,7 +31,7 @@ def enforce_dtypes(
     from meerschaum.utils.warnings import warn
     from meerschaum.utils.debug import dprint
     from meerschaum.utils.dataframe import parse_df_datetimes, enforce_dtypes as _enforce_dtypes
-    from meerschaum.utils.dtypes import are_dtypes_equal
+    from meerschaum.utils.dtypes import are_dtypes_equal, MRSM_PD_DTYPES
     from meerschaum.utils.packages import import_pandas
     pd = import_pandas(debug=debug)
     if df is None:
@@ -41,7 +42,11 @@ def enforce_dtypes(
             )
         return df
-    pipe_dtypes = self.dtypes if self.enforce else {}
+    pipe_dtypes = self.dtypes if self.enforce else {
+        col: typ
+        for col, typ in self.dtypes.items()
+        if typ in MRSM_PD_DTYPES
+    }
     try:
         if isinstance(df, str):
@@ -105,22 +110,16 @@ def infer_dtypes(self, persist: bool = False, debug: bool = False) -> Dict[str,
     A dictionary of strings containing the pandas data types for this Pipe.
     """
     if not self.exists(debug=debug):
-        dtypes = {}
-        if not self.columns:
-            return {}
-        dt_col = self.columns.get('datetime', None)
-        if dt_col:
-            if not self.parameters.get('dtypes', {}).get(dt_col, None):
-                dtypes[dt_col] = 'datetime64[ns, UTC]'
-        return dtypes
+        return {}
     from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type
     from meerschaum.utils.dtypes import to_pandas_dtype
-    columns_types = self.get_columns_types(debug=debug)
     ### NOTE: get_columns_types() may return either the types as
     ###       PostgreSQL- or Pandas-style.
-    dtypes = {
+    columns_types = self.get_columns_types(debug=debug)
+    remote_pd_dtypes = {
         c: (
             get_pd_type_from_db_type(t, allow_custom_dtypes=True)
             if str(t).isupper()
@@ -128,7 +127,15 @@ def infer_dtypes(self, persist: bool = False, debug: bool = False) -> Dict[str,
         )
         for c, t in columns_types.items()
     } if columns_types else {}
-    if persist:
-        self.dtypes = dtypes
-        self.edit(interactive=False, debug=debug)
-    return dtypes
+    if not persist:
+        return remote_pd_dtypes
+    dtypes = self.parameters.get('dtypes', {})
+    dtypes.update({
+        col: typ
+        for col, typ in remote_pd_dtypes.items()
+        if col not in dtypes
+    })
+    self.dtypes = dtypes
+    self.edit(interactive=False, debug=debug)
+    return remote_pd_dtypes

meerschaum/core/Pipe/_sync.py CHANGED Viewed

@@ -161,7 +161,7 @@ def sync(
     self._exists = None
     def _sync(
-        p: 'meerschaum.Pipe',
+        p: mrsm.Pipe,
         df: Union[
             'pd.DataFrame',
             Dict[str, List[Any]],
@@ -960,10 +960,7 @@ def _persist_new_numeric_columns(self, df, debug: bool = False) -> SuccessTuple:
         return True, "Success"
     self._attributes_sync_time = None
-    dt_col = self.columns.get('datetime', None)
     dtypes = self.parameters.get('dtypes', {})
-    if dt_col not in dtypes:
-        dtypes[dt_col] = 'datetime'
     dtypes.update({col: 'numeric' for col in numeric_cols})
     self.parameters['dtypes'] = dtypes
     if not self.temporary:
@@ -988,10 +985,7 @@ def _persist_new_uuid_columns(self, df, debug: bool = False) -> SuccessTuple:
         return True, "Success"
     self._attributes_sync_time = None
-    dt_col = self.columns.get('datetime', None)
     dtypes = self.parameters.get('dtypes', {})
-    if dt_col not in dtypes:
-        dtypes[dt_col] = 'datetime'
     dtypes.update({col: 'uuid' for col in uuid_cols})
     self.parameters['dtypes'] = dtypes
     if not self.temporary:
@@ -1016,10 +1010,7 @@ def _persist_new_json_columns(self, df, debug: bool = False) -> SuccessTuple:
         return True, "Success"
     self._attributes_sync_time = None
-    dt_col = self.columns.get('datetime', None)
     dtypes = self.parameters.get('dtypes', {})
-    if dt_col not in dtypes:
-        dtypes[dt_col] = 'datetime'
     dtypes.update({col: 'json' for col in json_cols})
     self.parameters['dtypes'] = dtypes
@@ -1045,10 +1036,7 @@ def _persist_new_bytes_columns(self, df, debug: bool = False) -> SuccessTuple:
         return True, "Success"
     self._attributes_sync_time = None
-    dt_col = self.columns.get('datetime', None)
     dtypes = self.parameters.get('dtypes', {})
-    if dt_col not in dtypes:
-        dtypes[dt_col] = 'datetime'
     dtypes.update({col: 'bytes' for col in bytes_cols})
     self.parameters['dtypes'] = dtypes

meerschaum/jobs/_Job.py CHANGED Viewed

@@ -200,6 +200,8 @@ class Job:
             if root_dir is None:
                 from meerschaum.config.paths import ROOT_DIR_PATH
                 root_dir = ROOT_DIR_PATH
+            else:
+                root_dir = pathlib.Path(root_dir)
             jobs_dir = root_dir / DAEMON_RESOURCES_PATH.name
             daemon_dir = jobs_dir / daemon_id
             pid_file = daemon_dir / 'process.pid'

meerschaum/utils/daemon/Daemon.py CHANGED Viewed

@@ -472,7 +472,7 @@ class Daemon:
             process.kill()
             process.wait(timeout=timeout)
         except Exception as e:
-            return False, f"Failed to kill job {self} with exception: {e}"
+            return False, f"Failed to kill job {self} ({process}) with exception: {e}"
         try:
             if process.status():
@@ -734,7 +734,7 @@ class Daemon:
             time.sleep(check_timeout_interval)
         return False, (
-            f"Failed to stop daemon '{self.daemon_id}' within {timeout} second"
+            f"Failed to stop daemon '{self.daemon_id}' (PID: {pid}) within {timeout} second"
             + ('s' if timeout != 1 else '') + '.'
         )

meerschaum/utils/dataframe.py CHANGED Viewed

@@ -494,7 +494,7 @@ def parse_df_datetimes(
     ### skip parsing if DataFrame is empty
     if len(pdf) == 0:
         if debug:
-            dprint(f"df is empty. Returning original DataFrame without casting datetime columns...")
+            dprint("df is empty. Returning original DataFrame without casting datetime columns...")
         return df
     ignore_cols = set(
@@ -509,7 +509,7 @@ def parse_df_datetimes(
     if len(cols_to_inspect) == 0:
         if debug:
             dprint("All columns are ignored, skipping datetime detection...")
-        return df.fillna(pandas.NA)
+        return df.infer_objects(copy=False).fillna(pandas.NA)
     ### apply regex to columns to determine which are ISO datetimes
     iso_dt_regex = r'\d{4}-\d{2}-\d{2}.\d{2}\:\d{2}\:\d+'
@@ -522,7 +522,7 @@ def parse_df_datetimes(
     if not datetime_cols:
         if debug:
             dprint("No columns detected as datetimes, returning...")
-        return df.fillna(pandas.NA)
+        return df.infer_objects(copy=False).fillna(pandas.NA)
     if debug:
         dprint("Converting columns to datetimes: " + str(datetime_cols))

meerschaum 2.7.0rc1__py3-none-any.whl → 2.7.2__py3-none-any.whl

meerschaum 2.7.0rc1py3-none-any.whl → 2.7.2py3-none-any.whl