PyPI - meerschaum - Versions diffs - 2.4.7__py3-none-any.whl → 2.4.9__py3-none-any.whl - Mend

meerschaum 2.4.7py3-none-any.whl → 2.4.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

meerschaum/_internal/arguments/_parser.py +36 -3
meerschaum/actions/show.py +15 -4
meerschaum/actions/sql.py +1 -1
meerschaum/api/routes/_pipes.py +38 -38
meerschaum/config/_version.py +1 -1
meerschaum/connectors/api/_pipes.py +3 -3
meerschaum/connectors/sql/_SQLConnector.py +1 -1
meerschaum/connectors/sql/_instance.py +12 -12
meerschaum/connectors/sql/_pipes.py +75 -52
meerschaum/connectors/sql/_sql.py +3 -1
meerschaum/core/Pipe/_data.py +12 -13
meerschaum/core/Pipe/_sync.py +1 -1
meerschaum/utils/dataframe.py +34 -23
meerschaum/utils/dtypes/sql.py +32 -18
meerschaum/utils/formatting/_pipes.py +4 -4
meerschaum/utils/misc.py +4 -4
meerschaum/utils/packages/_packages.py +2 -1
meerschaum/utils/sql.py +11 -7
{meerschaum-2.4.7.dist-info → meerschaum-2.4.9.dist-info}/METADATA +7 -4
{meerschaum-2.4.7.dist-info → meerschaum-2.4.9.dist-info}/RECORD +26 -26
{meerschaum-2.4.7.dist-info → meerschaum-2.4.9.dist-info}/LICENSE +0 -0
{meerschaum-2.4.7.dist-info → meerschaum-2.4.9.dist-info}/NOTICE +0 -0
{meerschaum-2.4.7.dist-info → meerschaum-2.4.9.dist-info}/WHEEL +0 -0
{meerschaum-2.4.7.dist-info → meerschaum-2.4.9.dist-info}/entry_points.txt +0 -0
{meerschaum-2.4.7.dist-info → meerschaum-2.4.9.dist-info}/top_level.txt +0 -0
{meerschaum-2.4.7.dist-info → meerschaum-2.4.9.dist-info}/zip-safe +0 -0

meerschaum/connectors/sql/_pipes.py CHANGED Viewed

@@ -459,6 +459,11 @@ def get_create_index_queries(
                 + 'if_not_exists => true, '
                 + "migrate_data => true);"
             )
+        elif self.flavor == 'mssql':
+            dt_query = (
+                f"CREATE CLUSTERED INDEX {_datetime_index_name} "
+                f"ON {_pipe_name} ({_datetime_name})"
+            )
         else: ### mssql, sqlite, etc.
             dt_query = (
                 f"CREATE INDEX {_datetime_index_name} "
@@ -563,7 +568,12 @@ def get_drop_index_queries(
         return {}
     if not pipe.exists(debug=debug):
         return {}
-    from meerschaum.utils.sql import sql_item_name, table_exists, hypertable_queries
+    from meerschaum.utils.sql import (
+        sql_item_name,
+        table_exists,
+        hypertable_queries,
+        DROP_IF_EXISTS_FLAVORS,
+    )
     drop_queries = {}
     schema = self.get_pipe_schema(pipe)
     schema_prefix = (schema + '_') if schema else ''
@@ -580,16 +590,17 @@ def get_drop_index_queries(
         is_hypertable_query = hypertable_queries[self.flavor].format(table_name=pipe_name)
         is_hypertable = self.value(is_hypertable_query, silent=True, debug=debug) is not None
+    if_exists_str = "IF EXISTS" if self.flavor in DROP_IF_EXISTS_FLAVORS else ""
     if is_hypertable:
         nuke_queries = []
         temp_table = '_' + pipe.target + '_temp_migration'
         temp_table_name = sql_item_name(temp_table, self.flavor, self.get_pipe_schema(pipe))
         if table_exists(temp_table, self, schema=self.get_pipe_schema(pipe), debug=debug):
-            nuke_queries.append(f"DROP TABLE {temp_table_name}")
+            nuke_queries.append(f"DROP TABLE {if_exists_str} {temp_table_name}")
         nuke_queries += [
             f"SELECT * INTO {temp_table_name} FROM {pipe_name}",
-            f"DROP TABLE {pipe_name}",
+            f"DROP TABLE {if_exists_str} {pipe_name}",
             f"ALTER TABLE {temp_table_name} RENAME TO {pipe_name_no_schema}",
         ]
         nuke_ix_keys = ('datetime', 'id')
@@ -811,7 +822,7 @@ def get_pipe_data(
                     parse_df_datetimes(
                         c,
                         ignore_cols=ignore_dt_cols,
-                        chunksize = kw.get('chunksize', None),
+                        chunksize=kw.get('chunksize', None),
                         debug=debug,
                     )
                     for c in df
@@ -1017,7 +1028,7 @@ def get_pipe_data_query(
             if _dt and _dt in existing_cols:
                 order_by += dt + ' ' + order + ','
             for key, quoted_col_name in quoted_indices.items():
-                if key == 'datetime':
+                if dt == quoted_col_name:
                     continue
                 order_by += ' ' + quoted_col_name + ' ' + order + ','
             order_by = order_by[:-1]
@@ -1034,7 +1045,7 @@ def get_pipe_data_query(
             )
         else:
             query += f"\nLIMIT {limit}"
     if debug:
         to_print = (
             []
@@ -1315,7 +1326,7 @@ def sync_pipe(
         ) if dt_col else None
         transact_id = generate_password(3)
-        temp_target = '-' + transact_id + '_' + pipe.target
+        temp_target = '##' + transact_id + '_' + pipe.target
         self._log_temporary_tables_creation(temp_target, create=(not pipe.temporary), debug=debug)
         temp_pipe = Pipe(
             pipe.connector_keys.replace(':', '_') + '_', pipe.metric_key, pipe.location_key,
@@ -1721,7 +1732,7 @@ def sync_pipe_inplace(
     delta_cols_types = get_table_cols_types(
         temp_tables['delta'],
-        connectable = connectable,
+        connectable=connectable,
         flavor=self.flavor,
         schema=internal_schema,
         database=database,
@@ -1779,7 +1790,7 @@ def sync_pipe_inplace(
     create_joined_success, create_joined_msg = session_execute(
         session,
         create_joined_query,
-        debug = debug,
+        debug=debug,
     ) if on_cols and not upsert else (True, "Success")
     if not create_joined_success:
         _ = clean_up_temp_tables()
@@ -1790,14 +1801,14 @@ def sync_pipe_inplace(
         + (', '.join([
             (
                 "CASE\n    WHEN " + sql_item_name(c + '_delta', self.flavor, None)
-                + " != " + get_null_replacement(typ, self.flavor)
+                + " != " + get_null_replacement(typ, self.flavor)
                 + " THEN " + sql_item_name(c + '_delta', self.flavor, None)
                 + "\n    ELSE NULL\nEND "
                 + " AS " + sql_item_name(c, self.flavor, None)
             ) for c, typ in delta_cols.items()
         ]))
         + f"\nFROM {temp_table_names['joined']}\n"
-        + f"WHERE "
+        + "WHERE "
         + '\nAND\n'.join([
             (
                 sql_item_name(c + '_backtrack', self.flavor, None) + ' IS NULL'
@@ -1813,8 +1824,8 @@ def sync_pipe_inplace(
     (create_unseen_success, create_unseen_msg), create_unseen_results = session_execute(
         session,
         create_unseen_query,
-        with_results = True,
-        debug = debug
+        with_results=True,
+        debug=debug
     ) if not upsert else (True, "Success"), None
     if not create_unseen_success:
         _ = clean_up_temp_tables()
@@ -1832,7 +1843,7 @@ def sync_pipe_inplace(
             ) for c, typ in delta_cols.items()
         ]))
         + f"\nFROM {temp_table_names['joined']}\n"
-        + f"WHERE "
+        + "WHERE "
         + '\nOR\n'.join([
             (
                 sql_item_name(c + '_backtrack', self.flavor, None) + ' IS NOT NULL'
@@ -1849,8 +1860,8 @@ def sync_pipe_inplace(
     (create_update_success, create_update_msg), create_update_results = session_execute(
         session,
         create_update_query,
-        with_results = True,
-        debug = debug,
+        with_results=True,
+        debug=debug,
     ) if on_cols and not upsert else ((True, "Success"), [])
     apply_update_queries = (
         get_update_queries(
@@ -1858,12 +1869,12 @@ def sync_pipe_inplace(
             temp_tables['update'],
             session,
             on_cols,
-            upsert = upsert,
-            schema = self.get_pipe_schema(pipe),
-            patch_schema = internal_schema,
-            datetime_col = pipe.columns.get('datetime', None),
-            flavor = self.flavor,
-            debug = debug
+            upsert=upsert,
+            schema=self.get_pipe_schema(pipe),
+            patch_schema=internal_schema,
+            datetime_col=pipe.columns.get('datetime', None),
+            flavor=self.flavor,
+            debug=debug,
         )
         if on_cols else []
     )
@@ -1883,8 +1894,8 @@ def sync_pipe_inplace(
     (apply_unseen_success, apply_unseen_msg), apply_unseen_results = session_execute(
         session,
         apply_unseen_queries,
-        with_results = True,
-        debug = debug,
+        with_results=True,
+        debug=debug,
     ) if not upsert else (True, "Success"), None
     if not apply_unseen_success:
         _ = clean_up_temp_tables()
@@ -1894,8 +1905,8 @@ def sync_pipe_inplace(
     (apply_update_success, apply_update_msg), apply_update_results = session_execute(
         session,
         apply_update_queries,
-        with_results = True,
-        debug = debug,
+        with_results=True,
+        debug=debug,
     )
     if not apply_update_success:
         _ = clean_up_temp_tables()
@@ -2064,7 +2075,7 @@ def get_pipe_rowcount(
     ----------
     pipe: mrsm.Pipe
         The pipe to query with.
     begin: Union[datetime, int, None], default None
         The begin datetime value.
@@ -2113,14 +2124,14 @@ def get_pipe_rowcount(
                 warn(
                     f"No datetime could be determined for {pipe}."
                     + "\n    Ignoring begin and end...",
-                    stack = False,
+                    stack=False,
                 )
                 begin, end = None, None
             else:
                 warn(
                     f"A datetime wasn't specified for {pipe}.\n"
                     + f"    Using column \"{_dt}\" for datetime bounds...",
-                    stack = False,
+                    stack=False,
                 )
@@ -2176,6 +2187,8 @@ def get_pipe_rowcount(
         FROM ({src}) AS src
         """
     )
+    print(f"{begin=}")
+    print(f"{end=}")
     if begin is not None or end is not None:
         query += "WHERE"
     if begin is not None:
@@ -2198,7 +2211,7 @@ def get_pipe_rowcount(
                     else 'WHERE'
                 )
             )
     result = self.value(query, debug=debug, silent=True)
     try:
         return int(result)
@@ -2207,11 +2220,11 @@ def get_pipe_rowcount(
 def drop_pipe(
-        self,
-        pipe: mrsm.Pipe,
-        debug: bool = False,
-        **kw
-    ) -> SuccessTuple:
+    self,
+    pipe: mrsm.Pipe,
+    debug: bool = False,
+    **kw
+) -> SuccessTuple:
     """
     Drop a pipe's tables but maintain its registration.
@@ -2219,30 +2232,36 @@ def drop_pipe(
     ----------
     pipe: mrsm.Pipe
         The pipe to drop.
+    Returns
+    -------
+    A `SuccessTuple` indicated success.
     """
-    from meerschaum.utils.sql import table_exists, sql_item_name
+    from meerschaum.utils.sql import table_exists, sql_item_name, DROP_IF_EXISTS_FLAVORS
     success = True
     target = pipe.target
     target_name = (
         sql_item_name(target, self.flavor, self.get_pipe_schema(pipe))
     )
     if table_exists(target, self, debug=debug):
-        success = self.exec(f"DROP TABLE {target_name}", silent=True, debug=debug) is not None
+        if_exists_str = "IF EXISTS" if self.flavor in DROP_IF_EXISTS_FLAVORS else ""
+        success = self.exec(
+            f"DROP TABLE {if_exists_str} {target_name}", silent=True, debug=debug
+        ) is not None
     msg = "Success" if success else f"Failed to drop {pipe}."
     return success, msg
 def clear_pipe(
-        self,
-        pipe: mrsm.Pipe,
-        begin: Union[datetime, int, None] = None,
-        end: Union[datetime, int, None] = None,
-        params: Optional[Dict[str, Any]] = None,
-        debug: bool = False,
-        **kw
-    ) -> SuccessTuple:
+    self,
+    pipe: mrsm.Pipe,
+    begin: Union[datetime, int, None] = None,
+    end: Union[datetime, int, None] = None,
+    params: Optional[Dict[str, Any]] = None,
+    debug: bool = False,
+    **kw
+) -> SuccessTuple:
     """
     Delete a pipe's data within a bounded or unbounded interval without dropping the table.
@@ -2535,7 +2554,7 @@ def get_alter_columns_queries(
     """
     if not pipe.exists(debug=debug):
         return []
-    from meerschaum.utils.sql import sql_item_name
+    from meerschaum.utils.sql import sql_item_name, DROP_IF_EXISTS_FLAVORS
     from meerschaum.utils.dataframe import get_numeric_cols
     from meerschaum.utils.dtypes import are_dtypes_equal
     from meerschaum.utils.dtypes.sql import (
@@ -2691,7 +2710,9 @@ def get_alter_columns_queries(
             f"\nFROM {sql_item_name(temp_table_name, self.flavor, self.get_pipe_schema(pipe))}"
         )
-        drop_query = "DROP TABLE " + sql_item_name(
+        if_exists_str = "IF EXISTS" if self.flavor in DROP_IF_EXISTS_FLAVORS else ""
+        drop_query = f"DROP TABLE {if_exists_str}" + sql_item_name(
             temp_table_name, self.flavor, self.get_pipe_schema(pipe)
         )
         return [
@@ -2882,6 +2903,7 @@ def deduplicate_pipe(
         NO_CTE_FLAVORS,
         get_rename_table_queries,
         NO_SELECT_INTO_FLAVORS,
+        DROP_IF_EXISTS_FLAVORS,
         get_create_table_query,
         format_cte_subquery,
         get_null_replacement,
@@ -3012,6 +3034,7 @@ def deduplicate_pipe(
     ) + f"""
     ORDER BY {index_list_str_ordered}
     """
+    if_exists_str = "IF EXISTS" if self.flavor in DROP_IF_EXISTS_FLAVORS else ""
     alter_queries = flatten_list([
         get_rename_table_queries(
             pipe.target, temp_old_table, self.flavor, schema=self.get_pipe_schema(pipe)
@@ -3020,7 +3043,7 @@ def deduplicate_pipe(
             dedup_table, pipe.target, self.flavor, schema=self.get_pipe_schema(pipe)
         ),
         f"""
-        DROP TABLE {temp_old_table_name}
+        DROP TABLE {if_exists_str} {temp_old_table_name}
         """,
     ])
@@ -3030,9 +3053,9 @@ def deduplicate_pipe(
     results = self.exec_queries(
         alter_queries,
-        break_on_error = True,
-        rollback = True,
-        debug = debug,
+        break_on_error=True,
+        rollback=True,
+        debug=debug,
     )
     fail_query = None

meerschaum/connectors/sql/_sql.py CHANGED Viewed

@@ -753,6 +753,7 @@ def to_sql(
         table_exists,
         json_flavors,
         truncate_item_name,
+        DROP_IF_EXISTS_FLAVORS,
     )
     from meerschaum.utils.dataframe import get_json_cols, get_numeric_cols, get_uuid_cols
     from meerschaum.utils.dtypes import are_dtypes_equal, quantize_decimal
@@ -827,12 +828,13 @@ def to_sql(
             'parallel': True,
         })
+    if_exists_str = "IF EXISTS" if self.flavor in DROP_IF_EXISTS_FLAVORS else ""
     if self.flavor == 'oracle':
         ### For some reason 'replace' doesn't work properly in pandas,
         ### so try dropping first.
         if if_exists == 'replace' and table_exists(name, self, schema=schema, debug=debug):
             success = self.exec(
-                "DROP TABLE " + sql_item_name(name, 'oracle', schema)
+                f"DROP TABLE {if_exists_str}" + sql_item_name(name, 'oracle', schema)
             ) is not None
             if not success:
                 warn(f"Unable to drop {name}")

meerschaum/core/Pipe/_data.py CHANGED Viewed

@@ -515,15 +515,14 @@ def get_backtrack_data(
     )
 def get_rowcount(
-        self,
-        begin: Optional[datetime] = None,
-        end: Optional['datetime'] = None,
-        params: Optional[Dict[str, Any]] = None,
-        remote: bool = False,
-        debug: bool = False
-    ) -> int:
+    self,
+    begin: Union[datetime, int, None] = None,
+    end: Union[datetime, int, None] = None,
+    params: Optional[Dict[str, Any]] = None,
+    remote: bool = False,
+    debug: bool = False
+) -> int:
     """
     Get a Pipe's instance or remote rowcount.
@@ -556,11 +555,11 @@ def get_rowcount(
         with Venv(get_connector_plugin(connector)):
             rowcount = connector.get_pipe_rowcount(
                 self,
-                begin = begin,
-                end = end,
-                params = params,
-                remote = remote,
-                debug = debug,
+                begin=begin,
+                end=end,
+                params=params,
+                remote=remote,
+                debug=debug,
             )
             if rowcount is None:
                 return 0

meerschaum/core/Pipe/_sync.py CHANGED Viewed

@@ -295,7 +295,7 @@ def sync(
         ### CHECKPOINT: Retrieved the DataFrame.
         _checkpoint(**kw)
         ### Allow for dataframe generators or iterables.
         if df_is_chunk_generator(df):
             kw['workers'] = p.get_num_workers(kw.get('workers', None))

meerschaum/utils/dataframe.py CHANGED Viewed

@@ -61,12 +61,10 @@ def add_missing_cols_to_df(
     if set(df.columns) == set(dtypes):
         return df
-    import traceback
-    from meerschaum.utils.packages import import_pandas, attempt_import
-    from meerschaum.utils.warnings import warn
+    from meerschaum.utils.packages import attempt_import
     from meerschaum.utils.dtypes import to_pandas_dtype
     pandas = attempt_import('pandas')
     def build_series(dtype: str):
         return pandas.Series([], dtype=to_pandas_dtype(dtype))
@@ -75,7 +73,10 @@ def add_missing_cols_to_df(
         for col, typ in dtypes.items()
         if col not in df.columns
     }
-    return df.assign(**assign_kwargs)
+    df_with_cols = df.assign(**assign_kwargs)
+    for col in assign_kwargs:
+        df_with_cols[col] = df_with_cols[col].fillna(pandas.NA)
+    return df_with_cols
 def filter_unseen_df(
@@ -152,6 +153,7 @@ def filter_unseen_df(
     is_dask = 'dask' in new_df.__module__
     if is_dask:
         pandas = attempt_import('pandas')
+        _ = attempt_import('partd', lazy=False)
         dd = attempt_import('dask.dataframe')
         merge = dd.merge
         NA = pandas.NA
@@ -301,21 +303,28 @@ def filter_unseen_df(
             lambda x: f'{x:f}' if isinstance(x, Decimal) else x
         )
+    old_dt_cols = [
+        col
+        for col, typ in old_df.dtypes.items()
+        if are_dtypes_equal(str(typ), 'datetime')
+    ]
+    for col in old_dt_cols:
+        old_df[col] = coerce_timezone(old_df[col])
+    new_dt_cols = [
+        col
+        for col, typ in old_df.dtypes.items()
+        if are_dtypes_equal(str(typ), 'datetime')
+    ]
+    for col in new_dt_cols:
+        new_df[col] = coerce_timezone(new_df[col])
     old_uuid_cols = get_uuid_cols(old_df)
     new_uuid_cols = get_uuid_cols(new_df)
     uuid_cols = set(new_uuid_cols + old_uuid_cols)
-    for uuid_col in old_uuid_cols:
-        old_df[uuid_col] = old_df[uuid_col].apply(
-            lambda x: f'{x}' if isinstance(x, UUID) else x
-        )
-    for uuid_col in new_uuid_cols:
-        new_df[uuid_col] = new_df[uuid_col].apply(
-            lambda x: f'{x}' if isinstance(x, UUID) else x
-        )
     joined_df = merge(
-        new_df.fillna(NA),
-        old_df.fillna(NA),
+        new_df.infer_objects(copy=False).fillna(NA),
+        old_df.infer_objects(copy=False).fillna(NA),
         how='left',
         on=None,
         indicator=True,
@@ -558,10 +567,10 @@ def get_json_cols(df: 'pd.DataFrame') -> List[str]:
     -------
     A list of columns to be encoded as JSON.
     """
-    is_dask = 'dask' in df.__module__
+    is_dask = 'dask' in df.__module__ if hasattr(df, '__module__') else False
     if is_dask:
         df = get_first_valid_dask_partition(df)
     if len(df) == 0:
         return []
@@ -618,12 +627,12 @@ def get_numeric_cols(df: 'pd.DataFrame') -> List[str]:
 def get_uuid_cols(df: 'pd.DataFrame') -> List[str]:
     """
-    Get the columns which contain `decimal.Decimal` objects from a Pandas DataFrame.
+    Get the columns which contain `uuid.UUID` objects from a Pandas DataFrame.
     Parameters
     ----------
     df: pd.DataFrame
-        The DataFrame which may contain decimal objects.
+        The DataFrame which may contain UUID objects.
     Returns
     -------
@@ -699,6 +708,7 @@ def enforce_dtypes(
         is_dtype_numeric,
         attempt_cast_to_numeric,
         attempt_cast_to_uuid,
+        coerce_timezone,
     )
     if safe_copy:
         df = df.copy()
@@ -1065,6 +1075,7 @@ def get_first_valid_dask_partition(ddf: 'dask.dataframe.DataFrame') -> Union['pd
             continue
         if len(pdf) > 0:
             return pdf
+    _ = mrsm.attempt_import('partd', lazy=False)
     return ddf.compute()
@@ -1171,9 +1182,9 @@ def query_df(
     dtypes = {col: str(typ) for col, typ in df.dtypes.items()}
     if inplace:
-        df.fillna(NA, inplace=True)
+        df.infer_objects(copy=False).fillna(NA, inplace=True)
     else:
-        df = df.fillna(NA)
+        df = df.infer_objects(copy=False).fillna(NA)
     if isinstance(begin, str):
         begin = dateutil_parser.parse(begin)
@@ -1346,7 +1357,7 @@ def to_json(
         df = df.copy()
     for col in uuid_cols:
         df[col] = df[col].astype(str)
-    return df.fillna(pd.NA).to_json(
+    return df.infer_objects(copy=False).fillna(pd.NA).to_json(
         date_format=date_format,
         date_unit=date_unit,
         orient=orient,

meerschaum 2.4.7__py3-none-any.whl → 2.4.9__py3-none-any.whl

meerschaum 2.4.7py3-none-any.whl → 2.4.9py3-none-any.whl