PyPI - meerschaum - Versions diffs - 2.7.7__py3-none-any.whl → 2.7.8__py3-none-any.whl - Mend

meerschaum 2.7.7py3-none-any.whl → 2.7.8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

meerschaum/actions/copy.py +1 -0
meerschaum/config/_version.py +1 -1
meerschaum/connectors/sql/_SQLConnector.py +2 -9
meerschaum/connectors/sql/_fetch.py +5 -30
meerschaum/connectors/sql/_pipes.py +7 -4
meerschaum/connectors/sql/_sql.py +56 -31
meerschaum/connectors/valkey/_ValkeyConnector.py +2 -2
meerschaum/core/Pipe/_fetch.py +4 -0
meerschaum/core/Pipe/_sync.py +16 -9
meerschaum/utils/daemon/Daemon.py +9 -2
meerschaum/utils/daemon/RotatingFile.py +3 -3
meerschaum/utils/dataframe.py +42 -12
meerschaum/utils/dtypes/__init__.py +144 -24
meerschaum/utils/dtypes/sql.py +50 -7
meerschaum/utils/formatting/__init__.py +2 -2
meerschaum/utils/formatting/_pprint.py +12 -11
meerschaum/utils/misc.py +16 -18
meerschaum/utils/prompt.py +1 -1
meerschaum/utils/sql.py +26 -8
{meerschaum-2.7.7.dist-info → meerschaum-2.7.8.dist-info}/METADATA +1 -1
{meerschaum-2.7.7.dist-info → meerschaum-2.7.8.dist-info}/RECORD +27 -27
{meerschaum-2.7.7.dist-info → meerschaum-2.7.8.dist-info}/LICENSE +0 -0
{meerschaum-2.7.7.dist-info → meerschaum-2.7.8.dist-info}/NOTICE +0 -0
{meerschaum-2.7.7.dist-info → meerschaum-2.7.8.dist-info}/WHEEL +0 -0
{meerschaum-2.7.7.dist-info → meerschaum-2.7.8.dist-info}/entry_points.txt +0 -0
{meerschaum-2.7.7.dist-info → meerschaum-2.7.8.dist-info}/top_level.txt +0 -0
{meerschaum-2.7.7.dist-info → meerschaum-2.7.8.dist-info}/zip-safe +0 -0

meerschaum/actions/copy.py CHANGED Viewed

@@ -9,6 +9,7 @@ Functions for copying elements.
 from __future__ import annotations
 from meerschaum.utils.typing import Any, SuccessTuple, Optional, List
 def copy(
     action: Optional[List[str]] = None,
     **kw : Any

meerschaum/config/_version.py CHANGED Viewed

@@ -2,4 +2,4 @@
 Specify the Meerschaum release version.
 """
-__version__ = "2.7.7"
+__version__ = "2.7.8"

meerschaum/connectors/sql/_SQLConnector.py CHANGED Viewed

@@ -195,7 +195,8 @@ class SQLConnector(Connector):
         self._debug = debug
         ### Store the PID and thread at initialization
         ### so we can dispose of the Pool in child processes or threads.
-        import os, threading
+        import os
+        import threading
         self._pid = os.getpid()
         self._thread_ident = threading.current_thread().ident
         self._sessions = {}
@@ -286,7 +287,6 @@ class SQLConnector(Connector):
             return ':memory:' not in self.URI
         return True
     @property
     def metadata(self):
         """
@@ -298,7 +298,6 @@ class SQLConnector(Connector):
             self._metadata = sqlalchemy.MetaData(schema=self.schema)
         return self._metadata
     @property
     def instance_schema(self):
         """
@@ -306,14 +305,12 @@ class SQLConnector(Connector):
         """
         return self.schema
     @property
     def internal_schema(self):
         """
         Return the schema name for internal tables.
         """
         from meerschaum.config.static import STATIC_CONFIG
-        from meerschaum.utils.packages import attempt_import
         from meerschaum.utils.sql import NO_SCHEMA_FLAVORS
         schema_name = self.__dict__.get('internal_schema', None) or (
             STATIC_CONFIG['sql']['internal_schema']
@@ -325,7 +322,6 @@ class SQLConnector(Connector):
             self._internal_schema = schema_name
         return self._internal_schema
     @property
     def db(self) -> Optional[databases.Database]:
         from meerschaum.utils.packages import attempt_import
@@ -342,7 +338,6 @@ class SQLConnector(Connector):
                 self._db = None
         return self._db
     @property
     def db_version(self) -> Union[str, None]:
         """
@@ -356,7 +351,6 @@ class SQLConnector(Connector):
         self._db_version = get_db_version(self)
         return self._db_version
     @property
     def schema(self) -> Union[str, None]:
         """
@@ -376,7 +370,6 @@ class SQLConnector(Connector):
         self.__dict__['schema'] = _schema
         return _schema
     def __getstate__(self):
         return self.__dict__

meerschaum/connectors/sql/_fetch.py CHANGED Viewed

@@ -11,7 +11,7 @@ from __future__ import annotations
 from datetime import datetime, timedelta
 import meerschaum as mrsm
-from meerschaum.utils.typing import Optional, Union, Callable, Any, List, Dict
+from meerschaum.utils.typing import Optional, Union, Any, List, Dict
 def fetch(
@@ -20,7 +20,6 @@ def fetch(
     begin: Union[datetime, int, str, None] = '',
     end: Union[datetime, int, str, None] = None,
     check_existing: bool = True,
-    chunk_hook: Optional[Callable[['pd.DataFrame'], Any]] = None,
     chunksize: Optional[int] = -1,
     workers: Optional[int] = None,
     debug: bool = False,
@@ -53,15 +52,12 @@ def fetch(
     check_existing: bool, defult True
         If `False`, use a backtrack interval of 0 minutes.
-    chunk_hook: Callable[[pd.DataFrame], Any], default None
-        A function to pass to `SQLConnector.read()` that accepts a Pandas DataFrame.
     chunksize: Optional[int], default -1
-        How many rows to load into memory at once (when `chunk_hook` is provided).
+        How many rows to load into memory at once.
         Otherwise the entire result set is loaded into memory.
     workers: Optional[int], default None
-        How many threads to use when consuming the generator (when `chunk_hook is provided).
+        How many threads to use when consuming the generator.
         Defaults to the number of cores.
     debug: bool, default False
@@ -69,8 +65,7 @@ def fetch(
     Returns
     -------
-    A pandas DataFrame or `None`.
-    If `chunk_hook` is not None, return a list of the hook function's results.
+    A pandas DataFrame generator.
     """
     meta_def = self.get_pipe_metadef(
         pipe,
@@ -80,33 +75,13 @@ def fetch(
         debug=debug,
         **kw
     )
-    as_hook_results = chunk_hook is not None
     chunks = self.read(
         meta_def,
-        chunk_hook=chunk_hook,
-        as_hook_results=as_hook_results,
         chunksize=chunksize,
         workers=workers,
+        as_iterator=True,
         debug=debug,
     )
-    ### if sqlite, parse for datetimes
-    if not as_hook_results and self.flavor == 'sqlite':
-        from meerschaum.utils.dataframe import parse_df_datetimes
-        from meerschaum.utils.dtypes import are_dtypes_equal
-        ignore_cols = [
-            col
-            for col, dtype in pipe.dtypes.items()
-            if not are_dtypes_equal(str(dtype), 'datetime')
-        ]
-        return (
-            parse_df_datetimes(
-                chunk,
-                ignore_cols=ignore_cols,
-                strip_timezone=(pipe.tzinfo is None),
-                debug=debug,
-            )
-            for chunk in chunks
-        )
     return chunks

meerschaum/connectors/sql/_pipes.py CHANGED Viewed

@@ -1125,7 +1125,7 @@ def get_pipe_data(
     numeric_columns = [
         col
         for col, typ in pipe.dtypes.items()
-        if typ == 'numeric' and col in dtypes
+        if typ.startswith('numeric') and col in dtypes
     ]
     uuid_columns = [
         col
@@ -1887,7 +1887,10 @@ def sync_pipe(
                 warn(f"Could not reset auto-incrementing primary key for {pipe}.", stack=False)
     if update_df is not None and len(update_df) > 0:
-        temp_target = self.get_temporary_target(pipe.target, label='update')
+        temp_target = self.get_temporary_target(
+            pipe.target,
+            label=('update' if not upsert else 'upsert'),
+        )
         self._log_temporary_tables_creation(temp_target, create=(not pipe.temporary), debug=debug)
         temp_pipe = Pipe(
             pipe.connector_keys.replace(':', '_') + '_', pipe.metric_key, pipe.location_key,
@@ -3274,7 +3277,7 @@ def get_alter_columns_queries(
         else [
             col
             for col, typ in df.items()
-            if typ == 'numeric'
+            if typ.startswith('numeric')
         ]
     )
     df_cols_types = (
@@ -3354,7 +3357,7 @@ def get_alter_columns_queries(
                 + f"{edit_msg}"
             )
     else:
-        numeric_cols.extend([col for col, typ in pipe.dtypes.items() if typ == 'numeric'])
+        numeric_cols.extend([col for col, typ in pipe.dtypes.items() if typ.startswith('numeric')])
     numeric_type = get_db_type_from_pd_type('numeric', self.flavor, as_sqlalchemy=False)
     text_type = get_db_type_from_pd_type('str', self.flavor, as_sqlalchemy=False)

meerschaum/connectors/sql/_sql.py CHANGED Viewed

@@ -126,7 +126,7 @@ def read(
         return []
     from meerschaum.utils.sql import sql_item_name, truncate_item_name
     from meerschaum.utils.dtypes import are_dtypes_equal, coerce_timezone
-    from meerschaum.utils.dtypes.sql import NUMERIC_PRECISION_FLAVORS, TIMEZONE_NAIVE_FLAVORS
+    from meerschaum.utils.dtypes.sql import TIMEZONE_NAIVE_FLAVORS
     from meerschaum.utils.packages import attempt_import, import_pandas
     from meerschaum.utils.pool import get_pool
     from meerschaum.utils.dataframe import chunksize_to_npartitions, get_numeric_cols
@@ -802,16 +802,17 @@ def to_sql(
     )
     from meerschaum.utils.dtypes import (
         are_dtypes_equal,
-        quantize_decimal,
         coerce_timezone,
         encode_bytes_for_bytea,
         serialize_bytes,
+        serialize_decimal,
+        json_serialize_value,
     )
     from meerschaum.utils.dtypes.sql import (
-        NUMERIC_PRECISION_FLAVORS,
-        NUMERIC_AS_TEXT_FLAVORS,
         PD_TO_SQLALCHEMY_DTYPES_FLAVORS,
         get_db_type_from_pd_type,
+        get_pd_type_from_db_type,
+        get_numeric_precision_scale,
     )
     from meerschaum.utils.misc import interval_str
     from meerschaum.connectors.sql._create_engine import flavor_configs
@@ -822,6 +823,16 @@ def to_sql(
     bytes_cols = get_bytes_cols(df)
     numeric_cols = get_numeric_cols(df)
+    numeric_cols_dtypes = {
+        col: typ
+        for col, typ in kw.get('dtype', {}).items()
+        if (
+            col in df.columns
+            and 'numeric' in str(typ).lower()
+        )
+    }
+    numeric_cols.extend([col for col in numeric_cols_dtypes if col not in numeric_cols])
     enable_bulk_insert = mrsm.get_config(
         'system', 'connectors', 'sql', 'bulk_insert'
@@ -854,12 +865,24 @@ def to_sql(
         for col in bytes_cols:
             df[col] = df[col].apply(bytes_serializer)
-    if self.flavor in NUMERIC_AS_TEXT_FLAVORS:
-        if safe_copy and not copied:
-            df = df.copy()
-            copied = True
-        for col in numeric_cols:
-            df[col] = df[col].astype(str)
+    ### Check for numeric columns.
+    for col in numeric_cols:
+        typ = numeric_cols_dtypes.get(col, None)
+        precision, scale = (
+            (typ.precision, typ.scale)
+            if hasattr(typ, 'precision')
+            else get_numeric_precision_scale(self.flavor)
+        )
+        df[col] = df[col].apply(
+            functools.partial(
+                serialize_decimal,
+                quantize=True,
+                precision=precision,
+                scale=scale,
+            )
+        )
     stats['method'] = method.__name__ if hasattr(method, '__name__') else str(method)
@@ -889,7 +912,7 @@ def to_sql(
     if name != truncated_name:
         warn(
             f"Table '{name}' is too long for '{self.flavor}',"
-            + f" will instead create the table '{truncated_name}'."
+            f" will instead create the table '{truncated_name}'."
         )
     ### filter out non-pandas args
@@ -957,24 +980,11 @@ def to_sql(
     ### Check for JSON columns.
     if self.flavor not in json_flavors:
         json_cols = get_json_cols(df)
-        if json_cols:
-            for col in json_cols:
-                df[col] = df[col].apply(
-                    (
-                        lambda x: json.dumps(x, default=str, sort_keys=True)
-                        if not isinstance(x, Hashable)
-                        else x
-                    )
-                )
-    ### Check for numeric columns.
-    numeric_scale, numeric_precision = NUMERIC_PRECISION_FLAVORS.get(self.flavor, (None, None))
-    if numeric_precision is not None and numeric_scale is not None:
-        for col in numeric_cols:
+        for col in json_cols:
             df[col] = df[col].apply(
-                lambda x: (
-                    quantize_decimal(x, numeric_scale, numeric_precision)
-                    if isinstance(x, Decimal)
+                (
+                    lambda x: json.dumps(x, default=json_serialize_value, sort_keys=True)
+                    if not isinstance(x, Hashable)
                     else x
                 )
             )
@@ -1051,16 +1061,20 @@ def psql_insert_copy(
     from meerschaum.utils.sql import sql_item_name
     from meerschaum.utils.warnings import dprint
+    from meerschaum.utils.dtypes import json_serialize_value
     ### NOTE: PostgreSQL doesn't support NUL chars in text, so they're removed from strings.
     data_iter = (
         (
             (
                 (
-                    json.dumps(item).replace('\0', '').replace('\\u0000', '')
+                    json.dumps(
+                        item,
+                        default=json_serialize_value,
+                    ).replace('\0', '').replace('\\u0000', '')
                     if isinstance(item, (dict, list))
                     else (
-                        item
+                        json_serialize_value(item, default_to_str=False)
                         if not isinstance(item, str)
                         else item.replace('\0', '').replace('\\u0000', '')
                     )
@@ -1119,6 +1133,7 @@ def mssql_insert_json(
     """
     import json
     from meerschaum.utils.sql import sql_item_name
+    from meerschaum.utils.dtypes import json_serialize_value
     from meerschaum.utils.dtypes.sql import get_pd_type_from_db_type, get_db_type_from_pd_type
     from meerschaum.utils.warnings import dprint
     table_name = sql_item_name(table.name, 'mssql', table.schema)
@@ -1127,6 +1142,15 @@ def mssql_insert_json(
             str(column.name): get_pd_type_from_db_type(str(column.type))
             for column in table.table.columns
         }
+        numeric_cols_types = {
+            col: table.table.columns[col].type
+            for col, typ in pd_types.items()
+            if typ.startswith('numeric') and col in keys
+        }
+        pd_types.update({
+            col: f'numeric[{typ.precision},{typ.scale}]'
+            for col, typ in numeric_cols_types.items()
+        })
         cols_types = {
             col: get_db_type_from_pd_type(typ, 'mssql')
             for col, typ in pd_types.items()
@@ -1151,7 +1175,8 @@ def mssql_insert_json(
     if debug:
         dprint(sql)
-    conn.exec_driver_sql(sql, (json.dumps(json_data, default=str),))
+    serialized_data = json.dumps(json_data, default=json_serialize_value)
+    conn.exec_driver_sql(sql, (serialized_data,))
 def format_sql_query_for_dask(query: str) -> 'sqlalchemy.sql.selectable.Select':

meerschaum/connectors/valkey/_ValkeyConnector.py CHANGED Viewed

@@ -239,7 +239,7 @@ class ValkeyConnector(Connector):
         -------
         The current index counter value (how many docs have been pushed).
         """
-        from meerschaum.utils.misc import json_serialize_datetime
+        from meerschaum.utils.dtypes import json_serialize_value
         table_name = self.quote_table(table)
         datetime_column_key = self.get_datetime_column_key(table)
         remote_datetime_column = self.get(datetime_column_key)
@@ -269,7 +269,7 @@ class ValkeyConnector(Connector):
             ) if datetime_column else None
             doc_str = json.dumps(
                 doc,
-                default=(lambda x: json_serialize_datetime(x) if hasattr(x, 'tzinfo') else str(x)),
+                default=json_serialize_value,
                 separators=(',', ':'),
                 sort_keys=True,
             )

meerschaum/core/Pipe/_fetch.py CHANGED Viewed

@@ -84,6 +84,7 @@ def fetch(
             begin=_determine_begin(
                 self,
                 begin,
+                end,
                 check_existing=check_existing,
                 debug=debug,
             ),
@@ -136,6 +137,7 @@ def get_backtrack_interval(
 def _determine_begin(
     pipe: mrsm.Pipe,
     begin: Union[datetime, int, str, None] = '',
+    end: Union[datetime, int, None] = None,
     check_existing: bool = True,
     debug: bool = False,
 ) -> Union[datetime, int, None]:
@@ -157,6 +159,8 @@ def _determine_begin(
     """
     if begin != '':
         return begin
+    if end is not None:
+        return None
     sync_time = pipe.get_sync_time(debug=debug)
     if sync_time is None:
         return sync_time

meerschaum/core/Pipe/_sync.py CHANGED Viewed

@@ -292,7 +292,6 @@ def sync(
                     message = '\n'.join([_message for _, _message in df])
                     return success, message
-            ### TODO: Depreciate async?
             if df is True:
                 p._exists = None
                 return True, f"{p} is being synced in parallel."
@@ -331,8 +330,7 @@ def sync(
                 return (
                     _chunk_success,
                     (
-                        '\n'
-                        + self._get_chunk_label(_chunk, dt_col)
+                        self._get_chunk_label(_chunk, dt_col)
                         + '\n'
                         + _chunk_msg
                     )
@@ -341,17 +339,25 @@ def sync(
             results = sorted(
                 [(chunk_success, chunk_msg)] + (
                     list(pool.imap(_process_chunk, df))
-                    if not df_is_chunk_generator(chunk)
-                    else [
+                    if (
+                        not df_is_chunk_generator(chunk)  # Handle nested generators.
+                        and kw.get('workers', 1) != 1
+                    )
+                    else list(
                         _process_chunk(_child_chunks)
                         for _child_chunks in df
-                    ]
+                    )
                 )
             )
             chunk_messages = [chunk_msg for _, chunk_msg in results]
             success_bools = [chunk_success for chunk_success, _ in results]
             success = all(success_bools)
-            msg = '\n'.join(chunk_messages)
+            msg = (
+                f'Synced {len(chunk_messages)} chunk'
+                + ('s' if len(chunk_messages) != 1 else '')
+                + f' to {p}:\n\n'
+                + '\n\n'.join(chunk_messages).lstrip().rstrip()
+            ).lstrip().rstrip()
             ### If some chunks succeeded, retry the failures.
             retry_success = True
@@ -432,7 +438,7 @@ def sync(
     if blocking:
         self._exists = None
-        return _sync(self, df = df)
+        return _sync(self, df=df)
     from meerschaum.utils.threading import Thread
     def default_callback(result_tuple: SuccessTuple):
@@ -821,6 +827,7 @@ def filter_existing(
                 for col, typ in self_dtypes.items()
             },
             safe_copy=safe_copy,
+            coerce_mixed_numerics=(not self.static),
             debug=debug
         ),
         on_cols_dtypes,
@@ -962,7 +969,7 @@ def _persist_new_numeric_columns(self, df, debug: bool = False) -> SuccessTuple:
     """
     from meerschaum.utils.dataframe import get_numeric_cols
     numeric_cols = get_numeric_cols(df)
-    existing_numeric_cols = [col for col, typ in self.dtypes.items() if typ == 'numeric']
+    existing_numeric_cols = [col for col, typ in self.dtypes.items() if typ.startswith('numeric')]
     new_numeric_cols = [col for col in numeric_cols if col not in existing_numeric_cols]
     if not new_numeric_cols:
         return True, "Success"

meerschaum/utils/daemon/Daemon.py CHANGED Viewed

@@ -774,9 +774,16 @@ class Daemon:
         if '_process' not in self.__dict__ or self.__dict__['_process'].pid != int(pid):
             try:
                 self._process = psutil.Process(int(pid))
+                process_exists = True
             except Exception:
-                if self.pid_path.exists():
-                    self.pid_path.unlink()
+                process_exists = False
+            if not process_exists:
+                _ = self.__dict__.pop('_process', None)
+                try:
+                    if self.pid_path.exists():
+                        self.pid_path.unlink()
+                except Exception:
+                    pass
                 return None
         return self._process

meerschaum/utils/daemon/RotatingFile.py CHANGED Viewed

@@ -13,11 +13,10 @@ import pathlib
 import traceback
 import sys
 import atexit
-from datetime import datetime, timezone, timedelta
-from typing import List, Union, Optional, Tuple
+from datetime import datetime, timezone
+from typing import List, Optional, Tuple
 from meerschaum.config import get_config
 from meerschaum.utils.warnings import warn
-from meerschaum.utils.misc import round_time
 from meerschaum.utils.daemon.FileDescriptorInterceptor import FileDescriptorInterceptor
 from meerschaum.utils.threading import Thread
 import meerschaum as mrsm
@@ -517,6 +516,7 @@ class RotatingFile(io.IOBase):
                 else 0
             )
+            subfile_lines = []
             if (
                 subfile_index in self.subfile_objects
                 and

meerschaum 2.7.7__py3-none-any.whl → 2.7.8__py3-none-any.whl

meerschaum 2.7.7py3-none-any.whl → 2.7.8py3-none-any.whl