PyPI - mtsql - Versions diffs - 1.7.202312151026__py3-none-any.whl → 1.9.202401091637__py3-none-any.whl - Mend

mtsql 1.7.202312151026py3-none-any.whl → 1.9.202401091637py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

mt/sql/base.py +88 -60
mt/sql/psql.py +9 -59
mt/sql/redshift.py +107 -0
mt/sql/version.py +8 -8
{mtsql-1.7.202312151026.dist-info → mtsql-1.9.202401091637.dist-info}/METADATA +1 -1
mtsql-1.9.202401091637.dist-info/RECORD +12 -0
mtsql-1.7.202312151026.dist-info/RECORD +0 -12
{mtsql-1.7.202312151026.dist-info → mtsql-1.9.202401091637.dist-info}/LICENSE +0 -0
{mtsql-1.7.202312151026.dist-info → mtsql-1.9.202401091637.dist-info}/WHEEL +0 -0
{mtsql-1.7.202312151026.dist-info → mtsql-1.9.202401091637.dist-info}/top_level.txt +0 -0

mt/sql/base.py CHANGED Viewed

@@ -1,5 +1,6 @@
 """Base functions dealing with an SQL database."""
+import uuid
 import sqlalchemy as sa
 import sqlalchemy.exc as se
 import psycopg2 as ps
@@ -11,6 +12,7 @@ from mt.base import deprecated_func
 __all__ = [
     "frame_sql",
+    "indices",
     "run_func",
     "conn_ctx",
     "engine_execute",
@@ -19,6 +21,9 @@ __all__ = [
     "exec_sql",
     "list_schemas",
     "list_tables",
+    "list_views",
+    "table_exists",
+    "create_temp_id_table",
 ]
@@ -26,6 +31,12 @@ def frame_sql(frame_name, schema: tp.Optional[str] = None):
     return frame_name if schema is None else "{}.{}".format(schema, frame_name)
+def indices(df):
+    """Returns the list of named indices of the dataframe, ignoring any unnamed index."""
+    a = list(df.index.names)
+    return a if a != [None] else []
 # ----- functions dealing with sql queries to overcome OperationalError -----
@@ -34,7 +45,7 @@ def run_func(
     *args,
     nb_trials: int = 3,
     logger: tp.Optional[logg.IndentedLoggerAdapter] = None,
-    **kwargs
+    **kwargs,
 ):
     """Attempt to run a function a number of times to overcome OperationalError exceptions.
@@ -97,7 +108,7 @@ def read_sql(
     nb_trials: int = 3,
     exception_handling: str = "raise",
     logger: tp.Optional[logg.IndentedLoggerAdapter] = None,
-    **kwargs
+    **kwargs,
 ) -> pd.DataFrame:
     """Read an SQL query with a number of trials to overcome OperationalError.
@@ -165,7 +176,7 @@ def read_sql(
             chunksize=chunksize,
             nb_trials=nb_trials,
             logger=logger,
-            **kwargs
+            **kwargs,
         )
     if chunksize is None:
@@ -200,66 +211,12 @@ def read_sql(
     return df
-@deprecated_func(
-    "1.0",
-    suggested_func="mt.sql.base.read_sql",
-    removed_version="2.0",
-    docstring_prefix="    ",
-)
-def read_sql_query(
-    sql,
-    engine,
-    index_col=None,
-    set_index_after=False,
-    nb_trials: int = 3,
-    logger: tp.Optional[logg.IndentedLoggerAdapter] = None,
-    **kwargs
-):
-    """Read an SQL query with a number of trials to overcome OperationalError.
-    Parameters
-    ----------
-    sql : str
-        SQL query to be executed
-    engine : sqlalchemy.engine.Engine
-        connection engine to the server
-    index_col: string or list of strings, optional, default: None
-        Column(s) to set as index(MultiIndex). See :func:`pandas.read_sql_query`.
-    set_index_after: bool
-        whether to set index specified by index_col via the pandas.read_sql_query() function or
-        after the function has been invoked
-    nb_trials: int
-        number of query trials
-    logger: mt.logg.IndentedLoggerAdapter, optional
-        logger for debugging
-    kwargs: dict
-        other keyword arguments to be passed directly to :func:`pandas.read_sql_query`
-    See Also
-    --------
-    pandas.read_sql_query
-    """
-    df = read_sql(
-        sql,
-        engine,
-        index_col=index_col,
-        nb_trials=nb_trials,
-        exception_handling="raise",
-        logger=logger,
-        **kwargs
-    )
-    if index_col is None or not set_index_after:
-        return df
-    return df.set_index(index_col, drop=True)
 def read_sql_table(
     table_name,
     engine,
     nb_trials: int = 3,
     logger: tp.Optional[logg.IndentedLoggerAdapter] = None,
-    **kwargs
+    **kwargs,
 ):
     """Read an SQL table with a number of trials to overcome OperationalError.
@@ -285,7 +242,7 @@ def read_sql_table(
         engine,
         nb_trials=nb_trials,
         logger=logger,
-        **kwargs
+        **kwargs,
     )
@@ -295,7 +252,7 @@ def exec_sql(
     *args,
     nb_trials: int = 3,
     logger: tp.Optional[logg.IndentedLoggerAdapter] = None,
-    **kwargs
+    **kwargs,
 ):
     """Execute an SQL query with a number of trials to overcome OperationalError.
@@ -358,3 +315,74 @@ def list_tables(engine, schema: tp.Optional[str] = None):
         list of all table names
     """
     return sa.inspect(engine).get_table_names(schema=schema)
+def list_views(engine, schema: tp.Optional[str] = None):
+    """Lists all views of a given schema.
+    Parameters
+    ----------
+    engine : sqlalchemy.engine.Engine
+        connection engine to the server
+    schema: str, optional
+        a valid schema name returned from :func:`list_schemas`. Default to sqlalchemy
+    Returns
+    -------
+    list
+        list of all view names
+    """
+    return sa.inspect(engine).get_view_names(schema=schema)
+def table_exists(
+    table_name,
+    engine,
+    schema: tp.Optional[str] = None,
+):
+    """Checks if a table exists.
+    Parameters
+    ----------
+    table_name: str
+        name of table
+    engine: sqlalchemy.engine.Engine
+        an sqlalchemy connection engine created by function `create_engine()`
+    schema: str or None
+        a valid schema name returned from `list_schemas()`
+    Returns
+    -------
+    retval: bool
+        whether a table or a view exists with the given name
+    """
+    return sa.inspect(engine).has_table(table_name, schema=schema)
+def create_temp_id_table(l_ids: list, conn: sa.engine.Connection) -> str:
+    """Creates a temporary table to containing a list of ids.
+    Parameters
+    ----------
+    l_ids : list
+        list of ids to be inserted into the table
+    conn : sqlalchemy.engine.Connection
+        a connection that has been opened
+    Returns
+    -------
+    table_name : str
+        name of the temporary table. The table will be deleted at the end of the connection
+    """
+    table_name = f"tab_{uuid.uuid4().hex}"
+    query_str = f"CREATE TEMP TABLE {table_name}(id int);"
+    conn.execute(sa.text(query_str))
+    values = ",".join((f"({id})" for id in l_ids))
+    query_str = f"INSERT INTO {table_name}(id) VALUES {values};"
+    conn.execute(sa.text(query_str))
+    return table_name

mt/sql/psql.py CHANGED Viewed

@@ -16,12 +16,10 @@ __all__ = [
     "pg_get_locked_transactions",
     "pg_cancel_backend",
     "pg_cancel_all_backends",
-    "indices",
     "compliance_check",
     "as_column_name",
     "to_sql",
     "rename_schema",
-    "list_views",
     "list_matviews",
     "list_foreign_tables",
     "list_frames",
@@ -137,12 +135,6 @@ def pg_cancel_all_backends(
 # ----- functions dealing with sql queries to overcome OperationalError -----
-def indices(df):
-    """Returns the list of named indices of the dataframe, ignoring any unnamed index."""
-    a = list(df.index.names)
-    return a if a != [None] else []
 def compliance_check(df: pd.DataFrame):
     """Checks if a dataframe is compliant to PSQL.
@@ -207,7 +199,7 @@ def to_sql(
     logger: tp.Optional[logg.IndentedLoggerAdapter] = None,
     **kwargs,
 ):
-    """Writes records stored in a DataFrame to an SQL database.
+    """Writes records stored in a DataFrame to a PostgreSQL database.
     With a number of trials to overcome OperationalError.
@@ -391,42 +383,6 @@ def rename_schema(
     )
-def list_views(
-    engine,
-    schema: tp.Optional[str] = None,
-    nb_trials: int = 3,
-    logger: tp.Optional[logg.IndentedLoggerAdapter] = None,
-):
-    """Lists all views of a given schema.
-    Parameters
-    ----------
-    engine: sqlalchemy.engine.Engine
-        an sqlalchemy connection engine created by function `create_engine()`
-    schema: str or None
-        a valid schema name returned from `list_schemas()`
-    nb_trials: int
-        number of query trials
-    logger: mt.logg.IndentedLoggerAdapter, optional
-        logger for debugging
-    Returns
-    -------
-    out: list
-        list of all view names
-    """
-    if schema is None:
-        query_str = "select distinct viewname from pg_views;"
-    else:
-        query_str = (
-            "select distinct viewname from pg_views where schemaname='{}';".format(
-                schema
-            )
-        )
-    df = read_sql(query_str, engine, nb_trials=nb_trials, logger=logger)
-    return df["viewname"].tolist()
 def list_matviews(
     engine,
     schema: tp.Optional[str] = None,
@@ -521,7 +477,7 @@ def list_frames(
     data = []
     for item in list_tables(engine, schema=schema):
         data.append((item, "table"))
-    for item in list_views(engine, schema=schema, nb_trials=nb_trials, logger=logger):
+    for item in list_views(engine, schema=schema):
         data.append((item, "view"))
     for item in list_matviews(
         engine, schema=schema, nb_trials=nb_trials, logger=logger
@@ -990,11 +946,9 @@ def frame_exists(
     retval: bool
         whether a table or a view exists with the given name
     """
-    if frame_name in list_tables(engine, schema=schema):
+    if table_exists(frame_name, engine, schema=schema):
         return True
-    if frame_name in list_views(
-        engine, schema=schema, nb_trials=nb_trials, logger=logger
-    ):
+    if frame_name in list_views(engine, schema=schema):
         return True
     return frame_name in list_matviews(
         engine, schema=schema, nb_trials=nb_trials, logger=logger
@@ -1041,9 +995,7 @@ def drop_frame(
             nb_trials=nb_trials,
             logger=logger,
         )
-    if frame_name in list_views(
-        engine, schema=schema, nb_trials=nb_trials, logger=logger
-    ):
+    if frame_name in list_views(engine, schema=schema):
         return drop_view(
             frame_name,
             engine,
@@ -2052,11 +2004,9 @@ def readsync_table(
             if len(new_md5_df) != len(new_df):
                 if logger:
-                    logger.debug("New dataframe:\n{}".format(str(new_df)))
-                    logger.debug("Hash dataframe:\n{}".format(str(new_md5_df)))
-                msg = "Something must have gone wrong. Number of hashes {} != number of records {}.".format(
-                    len(new_md5_df), len(new_df)
-                )
+                    logger.debug(f"New dataframe:\n{str(new_df)}")
+                    logger.debug(f"Hash dataframe:\n{str(new_md5_df)}")
+                msg = f"Something must have gone wrong. Number of hashes {len(new_md5_df)} != number of records {len(new_df)}."
                 if raise_exception_upon_mismatch:
                     raise RuntimeError(msg)
                 elif logger:
@@ -2081,7 +2031,7 @@ def readsync_table(
         # write back
         if logger:
-            logger.debug("Saving all {} records to file...".format(len(df)))
+            logger.debug(f"Saving all {len(df)} records to file...")
         if bg_write_csv is True:
             bg = BgInvoke(pd.dfsave, df, df_filepath, index=True)
             return df, bg

mt/sql/redshift.py CHANGED Viewed

@@ -3,6 +3,7 @@
 from mt import tp, logg
 from .base import *
+from .psql import compliance_check
 __api__ = [
@@ -418,3 +419,109 @@ def drop_column(
             schema, table_name, column_name
         )
     exec_sql(query_str, engine, nb_trials=nb_trials, logger=logger)
+# ----- functions dealing with sql queries to overcome OperationalError -----
+def to_sql(
+    df,
+    name,
+    engine,
+    schema: tp.Optional[str] = None,
+    if_exists="fail",
+    nb_trials: int = 3,
+    logger: tp.Optional[logg.IndentedLoggerAdapter] = None,
+    **kwargs,
+):
+    """Writes records stored in a DataFrame to a Redshift database.
+    With a number of trials to overcome OperationalError.
+    Parameters
+    ----------
+    df : pandas.DataFrame
+        dataframe to be sent to the server
+    name : str
+        name of the table to be written to
+    engine : sqlalchemy.engine.Engine
+        connection engine to the server
+    schema: string, optional
+        Specify the schema. If None, use default schema.
+    if_exists: str
+        what to do when the table exists. Passed as-is to :func:`pandas.DataFrame.to_sql`.
+    nb_trials: int
+        number of query trials
+    logger: mt.logg.IndentedLoggerAdapter, optional
+        logger for debugging
+    kwargs : dict
+        keyword arguments passed as-is to :func:`pandas.DataFrame.to_sql`
+    Raises
+    ------
+    sqlalchemy.exc.ProgrammingError if the local and remote frames do not have the same structure
+    Notes
+    -----
+    The function takes as input a PSQL-compliant dataframe (see `compliance_check()`). It ignores
+    any input `index` or `index_label` keyword. Instead, it considers 2 cases. If the dataframe has
+    an index or indices, then the tuple of all indices is turned into the primary key. If not,
+    there is no primary key and no index is uploaded.
+    See Also
+    --------
+    pandas.DataFrame.to_sql()
+    """
+    if kwargs:
+        if "index" in kwargs:
+            raise ValueError(
+                "The `mt.sql.psql.to_sql()` function does not accept `index` as a keyword."
+            )
+        if "index_label" in kwargs:
+            raise ValueError(
+                "This `mt.sql.psql.to_sql()` function does not accept `index_label` as a keyword."
+            )
+    compliance_check(df)
+    frame_sql_str = frame_sql(name, schema=schema)
+    # if the remote frame does not exist, force `if_exists` to 'replace'
+    if not table_exists(name, engine, schema=schema):
+        if_exists = "replace"
+    local_indices = indices(df)
+    if local_indices:
+        df = df.reset_index(drop=False)
+        retval = run_func(
+            df.to_sql,
+            name,
+            engine,
+            schema=schema,
+            if_exists=if_exists,
+            index=False,
+            index_label=None,
+            nb_trials=nb_trials,
+            logger=logger,
+            **kwargs,
+        )
+        if if_exists == "replace":
+            query_str = f"ALTER TABLE {frame_sql_str} ADD PRIMARY KEY ({','.join(local_indices)});"
+            exec_sql(query_str, engine, nb_trials=nb_trials, logger=logger)
+    else:
+        retval = run_func(
+            df.to_sql,
+            name,
+            engine,
+            schema=schema,
+            if_exists=if_exists,
+            index=False,
+            index_label=None,
+            nb_trials=nb_trials,
+            logger=logger,
+            **kwargs,
+        )
+    return retval

mt/sql/version.py CHANGED Viewed

@@ -1,11 +1,11 @@
-VERSION_YEAR = 2023
-VERSION_MONTH = int('12')
-VERSION_DAY = int('15')
-VERSION_HOUR = int('10')
-VERSION_MINUTE = int('26')
+VERSION_YEAR = 2024
+VERSION_MONTH = int('01')
+VERSION_DAY = int('09')
+VERSION_HOUR = int('16')
+VERSION_MINUTE = int('37')
 MAJOR_VERSION = 1
-MINOR_VERSION = 7
-PATCH_VERSION = 202312151026
-version_date = '2023/12/15 10:26'
+MINOR_VERSION = 9
+PATCH_VERSION = 202401091637
+version_date = '2024/01/09 16:37'
 version = '{}.{}.{}'.format(MAJOR_VERSION, MINOR_VERSION, PATCH_VERSION)
 __all__  = ['MAJOR_VERSION', 'MINOR_VERSION', 'PATCH_VERSION', 'version_date', 'version']

{mtsql-1.7.202312151026.dist-info → mtsql-1.9.202401091637.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: mtsql
-Version: 1.7.202312151026
+Version: 1.9.202401091637
 Summary: Extra Python modules to deal with the interaction between pandas dataframes and remote SQL servers, for Minh-Tri Pham
 Home-page: https://github.com/inteplus/mtsql
 Author: ['Minh-Tri Pham']

mtsql-1.9.202401091637.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,12 @@
+mt/sql/__init__.py,sha256=b7zO50apZxt9Hg2eOkJhRLrXgACR8eS5b-Rphdn5qNQ,44
+mt/sql/base.py,sha256=GJLSQfz0GNXgFBzK6dSCVqQ4rjyTvFEBPmsM37d8eXc,10608
+mt/sql/mysql.py,sha256=n2ENDctdUqZuSaDAcrqZYtPtawq3Wx4dOPCRsCB5Q4w,4894
+mt/sql/psql.py,sha256=m41LsBQ57OVVtakUZ01o_YY-vBwY5Z3TVPvSUMylNaU,65964
+mt/sql/redshift.py,sha256=EliV4C9E3VuNjqFXWnTrU8Dm_utQrVwht5DF4oHl7qY,14808
+mt/sql/sqlite.py,sha256=T2ak_hhNi_zRfpg_gp8JhNHn7D2kl4i-Ey6-9ANMtz0,8678
+mt/sql/version.py,sha256=nb0i2eAMsoLqFeLpvANq9ovAtp3TRIZsz2c02XZ4xBs,396
+mtsql-1.9.202401091637.dist-info/LICENSE,sha256=PojkRlQzTT5Eg6Nj03XoIVEefN3u8iiIFf1p4rqe_t4,1070
+mtsql-1.9.202401091637.dist-info/METADATA,sha256=0OS_X0KCiNKKzDFiiXTnttkBWJSeS6OoEsdmykC4JAc,589
+mtsql-1.9.202401091637.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
+mtsql-1.9.202401091637.dist-info/top_level.txt,sha256=WcqGFu9cV7iMZg09iam8eNxUvGpLSKKF2Iubf6SJVOo,3
+mtsql-1.9.202401091637.dist-info/RECORD,,

mtsql-1.7.202312151026.dist-info/RECORD DELETED Viewed

@@ -1,12 +0,0 @@
-mt/sql/__init__.py,sha256=b7zO50apZxt9Hg2eOkJhRLrXgACR8eS5b-Rphdn5qNQ,44
-mt/sql/base.py,sha256=sFr7O_Odfsf2AHr9kq3DXGCAFInCKgHSgLJaen507_I,9994
-mt/sql/mysql.py,sha256=n2ENDctdUqZuSaDAcrqZYtPtawq3Wx4dOPCRsCB5Q4w,4894
-mt/sql/psql.py,sha256=dRN4wH1uQ-deGb2M-3PbdUfjHQ_1fbPXnR_94X1KMIU,67364
-mt/sql/redshift.py,sha256=ADi1I_p8S5ZmzbLCclhxiUou5gXZrLY9Hd9yTMoprB4,11630
-mt/sql/sqlite.py,sha256=T2ak_hhNi_zRfpg_gp8JhNHn7D2kl4i-Ey6-9ANMtz0,8678
-mt/sql/version.py,sha256=PeQLGKevhlxMrPdW08UhN8-u9JaxBvwOZY0yCmqMGmc,396
-mtsql-1.7.202312151026.dist-info/LICENSE,sha256=PojkRlQzTT5Eg6Nj03XoIVEefN3u8iiIFf1p4rqe_t4,1070
-mtsql-1.7.202312151026.dist-info/METADATA,sha256=zHt5Uh3O0YP5pkRj-xT1UCmVC2HfZqmGfQqcs5GrO3c,589
-mtsql-1.7.202312151026.dist-info/WHEEL,sha256=G16H4A3IeoQmnOrYV4ueZGKSjhipXx8zc8nu9FGlvMA,92
-mtsql-1.7.202312151026.dist-info/top_level.txt,sha256=WcqGFu9cV7iMZg09iam8eNxUvGpLSKKF2Iubf6SJVOo,3
-mtsql-1.7.202312151026.dist-info/RECORD,,

{mtsql-1.7.202312151026.dist-info → mtsql-1.9.202401091637.dist-info}/LICENSE RENAMED Viewed

File without changes

{mtsql-1.7.202312151026.dist-info → mtsql-1.9.202401091637.dist-info}/WHEEL RENAMED Viewed

File without changes

{mtsql-1.7.202312151026.dist-info → mtsql-1.9.202401091637.dist-info}/top_level.txt RENAMED Viewed

File without changes

mtsql 1.7.202312151026__py3-none-any.whl → 1.9.202401091637__py3-none-any.whl

mtsql 1.7.202312151026py3-none-any.whl → 1.9.202401091637py3-none-any.whl