PyPI - meerschaum - Versions diffs - 2.5.0__py3-none-any.whl → 2.6.0__py3-none-any.whl - Mend

meerschaum 2.5.0py3-none-any.whl → 2.6.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

meerschaum/_internal/arguments/_parser.py +6 -1
meerschaum/_internal/entry.py +16 -5
meerschaum/actions/edit.py +6 -6
meerschaum/actions/sql.py +12 -11
meerschaum/api/dash/pages/login.py +17 -17
meerschaum/api/dash/pipes.py +104 -13
meerschaum/api/routes/_pipes.py +58 -40
meerschaum/api/routes/_webterm.py +1 -0
meerschaum/config/_edit.py +46 -19
meerschaum/config/_read_config.py +20 -9
meerschaum/config/_version.py +1 -1
meerschaum/config/stack/__init__.py +1 -1
meerschaum/config/static/__init__.py +1 -0
meerschaum/connectors/api/_APIConnector.py +1 -0
meerschaum/connectors/api/_pipes.py +39 -8
meerschaum/connectors/sql/_SQLConnector.py +4 -3
meerschaum/connectors/sql/_pipes.py +511 -118
meerschaum/connectors/sql/_sql.py +55 -15
meerschaum/connectors/valkey/_ValkeyConnector.py +3 -2
meerschaum/connectors/valkey/_pipes.py +11 -5
meerschaum/core/Pipe/__init__.py +27 -9
meerschaum/core/Pipe/_attributes.py +181 -18
meerschaum/core/Pipe/_clear.py +10 -8
meerschaum/core/Pipe/_copy.py +2 -0
meerschaum/core/Pipe/_data.py +65 -17
meerschaum/core/Pipe/_deduplicate.py +30 -28
meerschaum/core/Pipe/_dtypes.py +4 -4
meerschaum/core/Pipe/_fetch.py +12 -10
meerschaum/core/Pipe/_sync.py +28 -11
meerschaum/core/Pipe/_verify.py +52 -49
meerschaum/utils/dataframe.py +64 -34
meerschaum/utils/dtypes/__init__.py +25 -6
meerschaum/utils/dtypes/sql.py +76 -33
meerschaum/utils/misc.py +57 -24
meerschaum/utils/packages/_packages.py +2 -1
meerschaum/utils/schedule.py +7 -5
meerschaum/utils/sql.py +697 -44
{meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/METADATA +5 -3
{meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/RECORD +45 -45
{meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/WHEEL +1 -1
{meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/LICENSE +0 -0
{meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/NOTICE +0 -0
{meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/entry_points.txt +0 -0
{meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/top_level.txt +0 -0
{meerschaum-2.5.0.dist-info → meerschaum-2.6.0.dist-info}/zip-safe +0 -0

meerschaum/utils/dataframe.py CHANGED Viewed

@@ -94,14 +94,14 @@ def filter_unseen_df(
     ----------
     old_df: 'pd.DataFrame'
         The original (target) dataframe. Acts as a filter on the `new_df`.
     new_df: 'pd.DataFrame'
         The fetched (source) dataframe. Rows that are contained in `old_df` are removed.
     safe_copy: bool, default True
         If `True`, create a copy before comparing and modifying the dataframes.
         Setting to `False` may mutate the DataFrames.
     dtypes: Optional[Dict[str, Any]], default None
         Optionally specify the datatypes of the dataframe.
@@ -234,8 +234,10 @@ def filter_unseen_df(
     cast_dt_cols = True
     try:
         for col, typ in dt_dtypes.items():
-            tz = typ.split(',')[-1].strip() if ',' in typ else None
-            new_df[col] = coerce_timezone(pd.to_datetime(new_df[col], utc=True))
+            if col in old_df.columns:
+                old_df[col] = coerce_timezone(old_df[col])
+            if col in new_df.columns:
+                new_df[col] = coerce_timezone(new_df[col])
         cast_dt_cols = False
     except Exception as e:
         warn(f"Could not cast datetime columns:\n{e}")
@@ -363,6 +365,7 @@ def filter_unseen_df(
 def parse_df_datetimes(
     df: 'pd.DataFrame',
     ignore_cols: Optional[Iterable[str]] = None,
+    strip_timezone: bool = False,
     chunksize: Optional[int] = None,
     dtype_backend: str = 'numpy_nullable',
     debug: bool = False,
@@ -378,6 +381,9 @@ def parse_df_datetimes(
     ignore_cols: Optional[Iterable[str]], default None
         If provided, do not attempt to coerce these columns as datetimes.
+    strip_timezone: bool, default False
+        If `True`, remove the UTC `tzinfo` property.
     chunksize: Optional[int], default None
         If the pandas implementation is `'dask'`, use this chunksize for the distributed dataframe.
@@ -385,7 +391,7 @@ def parse_df_datetimes(
         If `df` is not a DataFrame and new one needs to be constructed,
         use this as the datatypes backend.
         Accepted values are 'numpy_nullable' and 'pyarrow'.
     debug: bool, default False
         Verbosity toggle.
@@ -447,7 +453,7 @@ def parse_df_datetimes(
                             for doc in df
                         ] for k in keys
                     },
-                    npartitions = npartitions,
+                    npartitions=npartitions,
                 )
             elif isinstance(df, dict):
                 df = pd.DataFrame.from_dict(df, npartitions=npartitions)
@@ -480,7 +486,7 @@ def parse_df_datetimes(
     if len(cols_to_inspect) == 0:
         if debug:
             dprint(f"All columns are ignored, skipping datetime detection...")
-        return df
+        return df.fillna(pandas.NA)
     ### apply regex to columns to determine which are ISO datetimes
     iso_dt_regex = r'\d{4}-\d{2}-\d{2}.\d{2}\:\d{2}\:\d+'
@@ -493,21 +499,25 @@ def parse_df_datetimes(
     if not datetime_cols:
         if debug:
             dprint("No columns detected as datetimes, returning...")
-        return df
+        return df.fillna(pandas.NA)
     if debug:
         dprint("Converting columns to datetimes: " + str(datetime_cols))
     try:
         if not using_dask:
-            df[datetime_cols] = df[datetime_cols].apply(pd.to_datetime, utc=True)
+            df[datetime_cols] = df[datetime_cols].apply(
+                pd.to_datetime,
+                utc=True,
+                format='ISO8601',
+            )
         else:
             df[datetime_cols] = df[datetime_cols].apply(
                 pd.to_datetime,
                 utc=True,
                 axis=1,
                 meta={
-                    col: 'datetime64[ns]'
+                    col: 'datetime64[ns, UTC]'
                     for col in datetime_cols
                 }
             )
@@ -517,13 +527,17 @@ def parse_df_datetimes(
             + f"{traceback.format_exc()}"
         )
-    for dt in datetime_cols:
-        try:
-            df[dt] = df[dt].dt.tz_localize(None)
-        except Exception:
-            warn(f"Unable to convert column '{dt}' to naive datetime:\n{traceback.format_exc()}")
+    if strip_timezone:
+        for dt in datetime_cols:
+            try:
+                df[dt] = df[dt].dt.tz_localize(None)
+            except Exception:
+                warn(
+                    f"Unable to convert column '{dt}' to naive datetime:\n"
+                    + f"{traceback.format_exc()}"
+                )
-    return df
+    return df.fillna(pandas.NA)
 def get_unhashable_cols(df: 'pd.DataFrame') -> List[str]:
@@ -674,6 +688,7 @@ def enforce_dtypes(
     dtypes: Dict[str, str],
     safe_copy: bool = True,
     coerce_numeric: bool = True,
+    coerce_timezone: bool = True,
     debug: bool = False,
 ) -> 'pd.DataFrame':
     """
@@ -695,6 +710,9 @@ def enforce_dtypes(
     coerce_numeric: bool, default True
         If `True`, convert float and int collisions to numeric.
+    coerce_timezone: bool, default True
+        If `True`, convert datetimes to UTC.
     debug: bool, default False
         Verbosity toggle.
@@ -703,21 +721,18 @@ def enforce_dtypes(
     The Pandas DataFrame with the types enforced.
     """
     import json
-    import traceback
-    from decimal import Decimal
     from meerschaum.utils.debug import dprint
-    from meerschaum.utils.warnings import warn
     from meerschaum.utils.formatting import pprint
-    from meerschaum.config.static import STATIC_CONFIG
-    from meerschaum.utils.packages import import_pandas
     from meerschaum.utils.dtypes import (
         are_dtypes_equal,
         to_pandas_dtype,
         is_dtype_numeric,
         attempt_cast_to_numeric,
         attempt_cast_to_uuid,
-        coerce_timezone,
+        coerce_timezone as _coerce_timezone,
     )
+    pandas = mrsm.attempt_import('pandas')
+    is_dask = 'dask' in df.__module__
     if safe_copy:
         df = df.copy()
     if len(df.columns) == 0:
@@ -744,6 +759,11 @@ def enforce_dtypes(
         for col, typ in dtypes.items()
         if typ == 'uuid'
     ]
+    datetime_cols = [
+        col
+        for col, typ in dtypes.items()
+        if are_dtypes_equal(typ, 'datetime')
+    ]
     df_numeric_cols = get_numeric_cols(df)
     if debug:
         dprint("Desired data types:")
@@ -792,6 +812,13 @@ def enforce_dtypes(
                     if debug:
                         dprint(f"Unable to parse column '{col}' as UUID:\n{e}")
+    if datetime_cols and coerce_timezone:
+        if debug:
+            dprint(f"Checking for datetime conversion: {datetime_cols}")
+        for col in datetime_cols:
+            if col in df.columns:
+                df[col] = _coerce_timezone(df[col])
     df_dtypes = {c: str(t) for c, t in df.dtypes.items()}
     if are_dtypes_equal(df_dtypes, pipe_pandas_dtypes):
         if debug:
@@ -826,8 +853,7 @@ def enforce_dtypes(
         if debug:
             dprint(
                 "The incoming DataFrame has mostly the same types, skipping enforcement."
-                + "The only detected difference was in the following datetime columns.\n"
-                + "    Timezone information may be stripped."
+                + "The only detected difference was in the following datetime columns."
             )
             pprint(detected_dt_cols)
         return df
@@ -930,11 +956,15 @@ def get_datetime_bound_from_df(
         if datetime_column not in df.columns:
             return None
-        dt_val = (
-            df[datetime_column].min(skipna=True)
-            if minimum else df[datetime_column].max(skipna=True)
-        )
-        if is_dask and dt_val is not None:
+        try:
+            dt_val = (
+                df[datetime_column].min(skipna=True)
+                if minimum
+                else df[datetime_column].max(skipna=True)
+            )
+        except Exception:
+            dt_val = pandas.NA
+        if is_dask and dt_val is not None and dt_val is not pandas.NA:
             dt_val = dt_val.compute()
         return (
@@ -1194,9 +1224,9 @@ def query_df(
     dtypes = {col: str(typ) for col, typ in df.dtypes.items()}
     if inplace:
-        df.infer_objects(copy=False).fillna(NA, inplace=True)
+        df.fillna(NA, inplace=True)
     else:
-        df = df.infer_objects(copy=False).fillna(NA)
+        df = df.infer_objects().fillna(NA)
     if isinstance(begin, str):
         begin = dateutil_parser.parse(begin)
@@ -1243,12 +1273,12 @@ def query_df(
             end_tz = end.tzinfo if end is not None else None
             if begin_tz is not None or end_tz is not None or df_tz is not None:
-                begin = coerce_timezone(begin)
-                end = coerce_timezone(end)
+                begin = coerce_timezone(begin, strip_utc=False)
+                end = coerce_timezone(end, strip_utc=False)
                 if df_tz is not None:
                     if debug:
                         dprint(f"Casting column '{datetime_column}' to UTC...")
-                    df[datetime_column] = coerce_timezone(df[datetime_column])
+                    df[datetime_column] = coerce_timezone(df[datetime_column], strip_utc=False)
                 dprint(f"Using datetime bounds:\n{begin=}\n{end=}")
     in_ex_params = get_in_ex_params(params)

meerschaum/utils/dtypes/__init__.py CHANGED Viewed

@@ -19,7 +19,7 @@ MRSM_PD_DTYPES: Dict[str, str] = {
     'json': 'object',
     'numeric': 'object',
     'uuid': 'object',
-    'datetime': 'datetime64[ns]',
+    'datetime': 'datetime64[ns, UTC]',
     'bool': 'bool[pyarrow]',
     'int': 'Int64',
     'int8': 'Int8',
@@ -245,7 +245,10 @@ def quantize_decimal(x: Decimal, scale: int, precision: int) -> Decimal:
         return x
-def coerce_timezone(dt: Any) -> Any:
+def coerce_timezone(
+    dt: Any,
+    strip_utc: bool = False,
+) -> Any:
     """
     Given a `datetime`, pandas `Timestamp` or `Series` of `Timestamp`,
     return a naive datetime in terms of UTC.
@@ -256,13 +259,29 @@ def coerce_timezone(dt: Any) -> Any:
     if isinstance(dt, int):
         return dt
-    dt_is_series = hasattr(dt, 'dtype')
+    if isinstance(dt, str):
+        dateutil_parser = mrsm.attempt_import('dateutil.parser')
+        dt = dateutil_parser.parse(dt)
+    dt_is_series = hasattr(dt, 'dtype') and hasattr(dt, '__module__')
     if dt_is_series:
+        is_dask = 'dask' in dt.__module__
         pandas = mrsm.attempt_import('pandas')
-        return pandas.to_datetime(dt, utc=True).apply(lambda x: x.replace(tzinfo=None))
+        dd = mrsm.attempt_import('dask.dataframe') if is_dask else None
+        dt_series = (
+            pandas.to_datetime(dt, utc=True)
+            if dd is None
+            else dd.to_datetime(dt, utc=True)
+        )
+        if strip_utc:
+            dt_series = dt_series.apply(lambda x: x.replace(tzinfo=None))
+        return dt_series
     if dt.tzinfo is None:
-        return dt
+        if strip_utc:
+            return dt
+        return dt.replace(tzinfo=timezone.utc)
-    return dt.astimezone(timezone.utc).replace(tzinfo=None)
+    return dt.astimezone(timezone.utc)

meerschaum/utils/dtypes/sql.py CHANGED Viewed

@@ -7,15 +7,16 @@ Utility functions for working with SQL data types.
 """
 from __future__ import annotations
-from meerschaum.utils.typing import Dict, Union, Tuple
+from meerschaum.utils.typing import Dict, Union, Tuple, List
 NUMERIC_PRECISION_FLAVORS: Dict[str, Tuple[int, int]] = {
     'mariadb': (38, 20),
     'mysql': (38, 20),
     'mssql': (28, 10),
-    'duckdb': (15, 4),
+    'duckdb': (15, 3),
     'sqlite': (15, 4),
 }
+TIMEZONE_NAIVE_FLAVORS = {'oracle', 'mysql', 'mariadb'}
 ### MySQL doesn't allow for casting as BIGINT, so this is a workaround.
 DB_FLAVORS_CAST_DTYPES = {
@@ -49,6 +50,7 @@ DB_FLAVORS_CAST_DTYPES = {
         'NVARCHAR(2000)': 'NVARCHAR2(2000)',
         'NVARCHAR': 'NVARCHAR2(2000)',
         'NVARCHAR2': 'NVARCHAR2(2000)',
+        'CHAR': 'CHAR(36)',  # UUID columns
     },
     'mssql': {
         'NVARCHAR COLLATE "SQL Latin1 General CP1 CI AS"': 'NVARCHAR(MAX)',
@@ -78,7 +80,9 @@ DB_TO_PD_DTYPES: Dict[str, Union[str, Dict[str, str]]] = {
     'NUMBER': 'numeric',
     'NUMERIC': 'numeric',
     'TIMESTAMP': 'datetime64[ns]',
+    'TIMESTAMP WITHOUT TIMEZONE': 'datetime64[ns]',
     'TIMESTAMP WITH TIMEZONE': 'datetime64[ns, UTC]',
+    'TIMESTAMP WITH TIME ZONE': 'datetime64[ns, UTC]',
     'TIMESTAMPTZ': 'datetime64[ns, UTC]',
     'DATE': 'datetime64[ns]',
     'DATETIME': 'datetime64[ns]',
@@ -160,7 +164,7 @@ PD_TO_DB_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
         'mariadb': 'DATETIME',
         'mysql': 'DATETIME',
         'mssql': 'DATETIME2',
-        'oracle': 'DATE',
+        'oracle': 'TIMESTAMP',
         'sqlite': 'DATETIME',
         'duckdb': 'TIMESTAMP',
         'citus': 'TIMESTAMP',
@@ -168,24 +172,37 @@ PD_TO_DB_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
         'default': 'DATETIME',
     },
     'datetime64[ns, UTC]': {
-        'timescaledb': 'TIMESTAMP',
-        'postgresql': 'TIMESTAMP',
-        'mariadb': 'TIMESTAMP',
-        'mysql': 'TIMESTAMP',
+        'timescaledb': 'TIMESTAMPTZ',
+        'postgresql': 'TIMESTAMPTZ',
+        'mariadb': 'DATETIME',
+        'mysql': 'DATETIME',
         'mssql': 'DATETIMEOFFSET',
         'oracle': 'TIMESTAMP',
         'sqlite': 'TIMESTAMP',
-        'duckdb': 'TIMESTAMP',
-        'citus': 'TIMESTAMP',
-        'cockroachdb': 'TIMESTAMP',
-        'default': 'TIMESTAMP',
+        'duckdb': 'TIMESTAMPTZ',
+        'citus': 'TIMESTAMPTZ',
+        'cockroachdb': 'TIMESTAMPTZ',
+        'default': 'TIMESTAMPTZ',
+    },
+    'datetime': {
+        'timescaledb': 'TIMESTAMPTZ',
+        'postgresql': 'TIMESTAMPTZ',
+        'mariadb': 'DATETIME',
+        'mysql': 'DATETIME',
+        'mssql': 'DATETIMEOFFSET',
+        'oracle': 'TIMESTAMP',
+        'sqlite': 'TIMESTAMP',
+        'duckdb': 'TIMESTAMPTZ',
+        'citus': 'TIMESTAMPTZ',
+        'cockroachdb': 'TIMESTAMPTZ',
+        'default': 'TIMESTAMPTZ',
     },
     'bool': {
         'timescaledb': 'BOOLEAN',
         'postgresql': 'BOOLEAN',
         'mariadb': 'BOOLEAN',
         'mysql': 'BOOLEAN',
-        'mssql': 'INTEGER',
+        'mssql': 'BIT',
         'oracle': 'INTEGER',
         'sqlite': 'FLOAT',
         'duckdb': 'BOOLEAN',
@@ -252,7 +269,7 @@ PD_TO_DB_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
         'mysql': 'CHAR(36)',
         'mssql': 'UNIQUEIDENTIFIER',
         ### I know this is too much space, but erring on the side of caution.
-        'oracle': 'NVARCHAR(2000)',
+        'oracle': 'CHAR(36)',
         'sqlite': 'TEXT',
         'duckdb': 'VARCHAR',
         'citus': 'UUID',
@@ -301,24 +318,24 @@ PD_TO_SQLALCHEMY_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
         'default': 'DateTime',
     },
     'datetime64[ns, UTC]': {
-        'timescaledb': 'DateTime',
-        'postgresql': 'DateTime',
-        'mariadb': 'DateTime',
-        'mysql': 'DateTime',
+        'timescaledb': 'DateTime(timezone=True)',
+        'postgresql': 'DateTime(timezone=True)',
+        'mariadb': 'DateTime(timezone=True)',
+        'mysql': 'DateTime(timezone=True)',
         'mssql': 'sqlalchemy.dialects.mssql.DATETIMEOFFSET',
-        'oracle': 'DateTime',
-        'sqlite': 'DateTime',
-        'duckdb': 'DateTime',
-        'citus': 'DateTime',
-        'cockroachdb': 'DateTime',
-        'default': 'DateTime',
+        'oracle': 'sqlalchemy.dialects.oracle.TIMESTAMP(timezone=True)',
+        'sqlite': 'DateTime(timezone=True)',
+        'duckdb': 'DateTime(timezone=True)',
+        'citus': 'DateTime(timezone=True)',
+        'cockroachdb': 'DateTime(timezone=True)',
+        'default': 'DateTime(timezone=True)',
     },
     'bool': {
         'timescaledb': 'Boolean',
         'postgresql': 'Boolean',
         'mariadb': 'Integer',
         'mysql': 'Integer',
-        'mssql': 'Integer',
+        'mssql': 'sqlalchemy.dialects.mssql.BIT',
         'oracle': 'Integer',
         'sqlite': 'Float',
         'duckdb': 'Boolean',
@@ -384,7 +401,7 @@ PD_TO_SQLALCHEMY_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
         'mariadb': 'sqlalchemy.dialects.mysql.CHAR(36)',
         'mysql': 'sqlalchemy.dialects.mysql.CHAR(36)',
         'mssql': 'Uuid',
-        'oracle': 'UnicodeText',
+        'oracle': 'sqlalchemy.dialects.oracle.CHAR(36)',
         'sqlite': 'UnicodeText',
         'duckdb': 'UnicodeText',
         'citus': 'Uuid',
@@ -393,6 +410,20 @@ PD_TO_SQLALCHEMY_DTYPES_FLAVORS: Dict[str, Dict[str, str]] = {
     },
 }
+AUTO_INCREMENT_COLUMN_FLAVORS: Dict[str, str] = {
+    'timescaledb': 'GENERATED BY DEFAULT AS IDENTITY',
+    'postgresql': 'GENERATED BY DEFAULT AS IDENTITY',
+    'mariadb': 'AUTO_INCREMENT',
+    'mysql': 'AUTO_INCREMENT',
+    'mssql': 'IDENTITY(1,1)',
+    'oracle': 'GENERATED BY DEFAULT ON NULL AS IDENTITY',
+    'sqlite': 'AUTOINCREMENT',
+    'duckdb': 'GENERATED BY DEFAULT',
+    'citus': 'GENERATED BY DEFAULT',
+    'cockroachdb': 'GENERATED BY DEFAULT AS IDENTITY',
+    'default': 'GENERATED BY DEFAULT AS IDENTITY',
+}
 def get_pd_type_from_db_type(db_type: str, allow_custom_dtypes: bool = False) -> str:
     """
@@ -456,10 +487,10 @@ def get_db_type_from_pd_type(
     The database data type for the incoming Pandas data type.
     If nothing can be found, a warning will be thrown and 'TEXT' will be returned.
     """
-    import ast
     from meerschaum.utils.warnings import warn
     from meerschaum.utils.packages import attempt_import
-    from meerschaum.utils.dtypes import are_dtypes_equal
+    from meerschaum.utils.dtypes import are_dtypes_equal, MRSM_PD_DTYPES
+    from meerschaum.utils.misc import parse_arguments_str
     sqlalchemy_types = attempt_import('sqlalchemy.types')
     types_registry = (
@@ -512,15 +543,16 @@ def get_db_type_from_pd_type(
     if db_type.startswith('sqlalchemy.dialects'):
         dialect, typ_class_name = db_type.replace('sqlalchemy.dialects.', '').split('.', maxsplit=2)
-        arg = None
+        cls_args, cls_kwargs = None, None
         if '(' in typ_class_name:
-            typ_class_name, arg_str = typ_class_name.split('(', maxsplit=1)
-            arg = ast.literal_eval(arg_str.rstrip(')'))
+            typ_class_name, args_str = typ_class_name.split('(', maxsplit=1)
+            args_str = args_str.rstrip(')')
+            cls_args, cls_kwargs = parse_arguments_str(args_str)
         sqlalchemy_dialects_flavor_module = attempt_import(f'sqlalchemy.dialects.{dialect}')
         cls = getattr(sqlalchemy_dialects_flavor_module, typ_class_name)
-        if arg is None:
+        if cls_args is None:
             return cls
-        return cls(arg)
+        return cls(*cls_args, **cls_kwargs)
     if 'numeric' in db_type.lower():
         numeric_type_str = PD_TO_DB_DTYPES_FLAVORS['numeric'].get(flavor, 'NUMERIC')
@@ -528,4 +560,15 @@ def get_db_type_from_pd_type(
             return sqlalchemy_types.Numeric
         precision, scale = NUMERIC_PRECISION_FLAVORS[flavor]
         return sqlalchemy_types.Numeric(precision, scale)
-    return getattr(sqlalchemy_types, db_type)
+    cls_args, cls_kwargs = None, None
+    typ_class_name = db_type
+    if '(' in db_type:
+        typ_class_name, args_str = db_type.split('(', maxsplit=1)
+        args_str = args_str.rstrip(')')
+        cls_args, cls_kwargs = parse_arguments_str(args_str)
+    cls = getattr(sqlalchemy_types, typ_class_name)
+    if cls_args is None:
+        return cls
+    return cls(*cls_args, **cls_kwargs)

meerschaum/utils/misc.py CHANGED Viewed

@@ -214,20 +214,20 @@ def parse_config_substitution(
 def edit_file(
-        path: Union[pathlib.Path, str],
-        default_editor: str = 'pyvim',
-        debug: bool = False
-    ) -> bool:
+    path: Union['pathlib.Path', str],
+    default_editor: str = 'pyvim',
+    debug: bool = False
+) -> bool:
     """
     Open a file for editing.
     Attempt to launch the user's defined `$EDITOR`, otherwise use `pyvim`.
     Parameters
     ----------
     path: Union[pathlib.Path, str]
         The path to the file to be edited.
     default_editor: str, default 'pyvim'
         If `$EDITOR` is not set, use this instead.
         If `pyvim` is not installed, it will install it from PyPI.
@@ -250,7 +250,7 @@ def edit_file(
         rc = call([EDITOR, path])
     except Exception as e: ### can't open with default editors
         if debug:
-            dprint(e)
+            dprint(str(e))
             dprint('Failed to open file with system editor. Falling back to pyvim...')
         pyvim = attempt_import('pyvim', lazy=False)
         rc = run_python_package('pyvim', [path], venv=package_venv(pyvim), debug=debug)
@@ -258,10 +258,10 @@ def edit_file(
 def is_pipe_registered(
-        pipe: mrsm.Pipe,
-        pipes: PipesDict,
-        debug: bool = False
-    ) -> bool:
+    pipe: mrsm.Pipe,
+    pipes: PipesDict,
+    debug: bool = False
+) -> bool:
     """
     Check if a Pipe is inside the pipes dictionary.
@@ -269,10 +269,10 @@ def is_pipe_registered(
     ----------
     pipe: meerschaum.Pipe
         The pipe to see if it's in the dictionary.
     pipes: PipesDict
         The dictionary to search inside.
     debug: bool, default False
         Verbosity toggle.
@@ -975,13 +975,13 @@ def json_serialize_datetime(dt: datetime) -> Union[str, None]:
 def wget(
-        url: str,
-        dest: Optional[Union[str, 'pathlib.Path']] = None,
-        headers: Optional[Dict[str, Any]] = None,
-        color: bool = True,
-        debug: bool = False,
-        **kw: Any
-    ) -> 'pathlib.Path':
+    url: str,
+    dest: Optional[Union[str, 'pathlib.Path']] = None,
+    headers: Optional[Dict[str, Any]] = None,
+    color: bool = True,
+    debug: bool = False,
+    **kw: Any
+) -> 'pathlib.Path':
     """
     Mimic `wget` with `requests`.
@@ -989,7 +989,7 @@ def wget(
     ----------
     url: str
         The URL to the resource to be downloaded.
     dest: Optional[Union[str, pathlib.Path]], default None
         The destination path of the downloaded file.
         If `None`, save to the current directory.
@@ -1426,7 +1426,40 @@ def flatten_list(list_: List[Any]) -> List[Any]:
             yield item
-def make_symlink(src_path: pathlib.Path, dest_path: pathlib.Path) -> SuccessTuple:
+def parse_arguments_str(args_str: str) -> Tuple[Tuple[Any], Dict[str, Any]]:
+    """
+    Parse a string containing the text to be passed into a function
+    and return a tuple of args, kwargs.
+    Parameters
+    ----------
+    args_str: str
+        The contents of the function parameter (as a string).
+    Returns
+    -------
+    A tuple of args (tuple) and kwargs (dict[str, Any]).
+    Examples
+    --------
+    >>> parse_arguments_str('123, 456, foo=789, bar="baz"')
+    (123, 456), {'foo': 789, 'bar': 'baz'}
+    """
+    import ast
+    args = []
+    kwargs = {}
+    for part in args_str.split(','):
+        if '=' in part:
+            key, val = part.split('=', 1)
+            kwargs[key.strip()] = ast.literal_eval(val)
+        else:
+            args.append(ast.literal_eval(part.strip()))
+    return tuple(args), kwargs
+def make_symlink(src_path: 'pathlib.Path', dest_path: 'pathlib.Path') -> SuccessTuple:
     """
     Wrap around `pathlib.Path.symlink_to`, but add support for Windows.
@@ -1452,7 +1485,7 @@ def make_symlink(src_path: pathlib.Path, dest_path: pathlib.Path) -> SuccessTupl
         msg = str(e)
     if success:
         return success, "Success"
     ### Failed to create a symlink.
     ### If we're not on Windows, return an error.
     import platform
@@ -1477,7 +1510,7 @@ def make_symlink(src_path: pathlib.Path, dest_path: pathlib.Path) -> SuccessTupl
         shutil.copy(src_path, dest_path)
     except Exception as e:
         return False, str(e)
     return True, "Success"

meerschaum/utils/packages/_packages.py CHANGED Viewed

@@ -163,7 +163,8 @@ packages['api'] = {
     'fastapi'                        : 'fastapi>=0.111.0',
     'fastapi_login'                  : 'fastapi-login>=1.7.2',
     'multipart'                      : 'python-multipart>=0.0.9',
-    'httpx'                          : 'httpx>=0.24.1',
+    'httpx'                          : 'httpx>=0.27.2',
+    'httpcore'                       : 'httpcore>=1.0.6',
     'valkey'                         : 'valkey>=6.0.0',
 }
 packages['api'].update(packages['sql'])

meerschaum 2.5.0__py3-none-any.whl → 2.6.0__py3-none-any.whl

meerschaum 2.5.0py3-none-any.whl → 2.6.0py3-none-any.whl