PyPI - meerschaum - Versions diffs - 2.0.0rc7__py3-none-any.whl → 2.0.0rc8__py3-none-any.whl - Mend

meerschaum 2.0.0rc7py3-none-any.whl → 2.0.0rc8py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (44) hide show

meerschaum/actions/__init__.py +97 -48
meerschaum/actions/bootstrap.py +1 -1
meerschaum/actions/clear.py +1 -1
meerschaum/actions/deduplicate.py +1 -1
meerschaum/actions/delete.py +8 -7
meerschaum/actions/drop.py +1 -10
meerschaum/actions/edit.py +1 -1
meerschaum/actions/install.py +1 -1
meerschaum/actions/pause.py +1 -1
meerschaum/actions/register.py +1 -1
meerschaum/actions/setup.py +1 -1
meerschaum/actions/show.py +1 -1
meerschaum/actions/start.py +18 -7
meerschaum/actions/stop.py +5 -4
meerschaum/actions/sync.py +3 -1
meerschaum/actions/uninstall.py +1 -1
meerschaum/actions/upgrade.py +1 -1
meerschaum/actions/verify.py +54 -3
meerschaum/config/_formatting.py +26 -0
meerschaum/config/_jobs.py +28 -5
meerschaum/config/_paths.py +21 -5
meerschaum/config/_version.py +1 -1
meerschaum/connectors/api/_fetch.py +1 -1
meerschaum/connectors/api/_pipes.py +6 -11
meerschaum/connectors/sql/_fetch.py +29 -11
meerschaum/core/Pipe/_deduplicate.py +39 -23
meerschaum/core/Pipe/_dtypes.py +2 -1
meerschaum/core/Pipe/_verify.py +59 -24
meerschaum/plugins/__init__.py +3 -0
meerschaum/utils/daemon/Daemon.py +108 -27
meerschaum/utils/daemon/__init__.py +35 -1
meerschaum/utils/formatting/__init__.py +144 -1
meerschaum/utils/formatting/_pipes.py +28 -5
meerschaum/utils/misc.py +183 -187
meerschaum/utils/packages/__init__.py +1 -1
meerschaum/utils/packages/_packages.py +1 -0
{meerschaum-2.0.0rc7.dist-info → meerschaum-2.0.0rc8.dist-info}/METADATA +4 -1
{meerschaum-2.0.0rc7.dist-info → meerschaum-2.0.0rc8.dist-info}/RECORD +44 -44
{meerschaum-2.0.0rc7.dist-info → meerschaum-2.0.0rc8.dist-info}/LICENSE +0 -0
{meerschaum-2.0.0rc7.dist-info → meerschaum-2.0.0rc8.dist-info}/NOTICE +0 -0
{meerschaum-2.0.0rc7.dist-info → meerschaum-2.0.0rc8.dist-info}/WHEEL +0 -0
{meerschaum-2.0.0rc7.dist-info → meerschaum-2.0.0rc8.dist-info}/entry_points.txt +0 -0
{meerschaum-2.0.0rc7.dist-info → meerschaum-2.0.0rc8.dist-info}/top_level.txt +0 -0
{meerschaum-2.0.0rc7.dist-info → meerschaum-2.0.0rc8.dist-info}/zip-safe +0 -0

meerschaum/connectors/api/_pipes.py CHANGED Viewed

@@ -7,6 +7,9 @@ Register or fetch Pipes from the API
 """
 from __future__ import annotations
+import time
+import json
+from io import StringIO
 from datetime import datetime
 from meerschaum.utils.debug import dprint
 from meerschaum.utils.warnings import warn, error
@@ -123,7 +126,6 @@ def fetch_pipes_keys(
     A list of tuples containing pipes' keys.
     """
     from meerschaum.config.static import STATIC_CONFIG
-    import json
     if connector_keys is None:
         connector_keys = []
     if metric_keys is None:
@@ -169,7 +171,6 @@ def sync_pipe(
     from meerschaum.utils.misc import json_serialize_datetime
     from meerschaum.config import get_config
     from meerschaum.utils.packages import attempt_import
-    import json, time
     begin = time.time()
     more_itertools = attempt_import('more_itertools')
     if df is None:
@@ -310,7 +311,6 @@ def get_pipe_data(
         **kw: Any
     ) -> Union[pandas.DataFrame, None]:
     """Fetch data from the API."""
-    import json
     r_url = pipe_r_url(pipe)
     chunks_list = []
     while True:
@@ -340,7 +340,7 @@ def get_pipe_data(
     from meerschaum.utils.dataframe import parse_df_datetimes
     pd = import_pandas()
     try:
-        df = pd.read_json(response.text)
+        df = pd.read_json(StringIO(response.text))
     except Exception as e:
         warn(f"Failed to parse response for {pipe}:\n{e}")
         return None
@@ -367,7 +367,6 @@ def get_backtrack_data(
         **kw: Any,
     ) -> pandas.DataFrame:
     """Get a Pipe's backtrack data from the API."""
-    import json
     r_url = pipe_r_url(pipe)
     try:
         response = self.get(
@@ -389,12 +388,12 @@ def get_backtrack_data(
         dprint(response.text)
     pd = import_pandas()
     try:
-        df = pd.read_json(response.text)
+        df = pd.read_json(StringIO(response.text))
     except Exception as e:
         warn(f"Failed to read response into a dataframe:\n{e}")
         return None
-    df = parse_df_datetimes(pd.read_json(response.text), debug=debug)
+    df = parse_df_datetimes(pd.read_json(StringIO(response.text)), debug=debug)
     return df
 def get_pipe_id(
@@ -438,7 +437,6 @@ def get_pipe_attributes(
     """
     r_url = pipe_r_url(pipe)
     response = self.get(r_url + '/attributes', debug=debug)
-    import json
     try:
         return json.loads(response.text)
     except Exception as e:
@@ -474,7 +472,6 @@ def get_sync_time(
     """
     from meerschaum.utils.misc import is_int
     from meerschaum.utils.warnings import warn
-    import datetime, json
     r_url = pipe_r_url(pipe)
     response = self.get(
         r_url + '/sync_time',
@@ -545,7 +542,6 @@ def create_metadata(
     """
     from meerschaum.utils.debug import dprint
     from meerschaum.config.static import STATIC_CONFIG
-    import json
     r_url = STATIC_CONFIG['api']['endpoints']['metadata']
     response = self.post(r_url, debug=debug)
     if debug:
@@ -590,7 +586,6 @@ def get_pipe_rowcount(
     The number of rows in the pipe's table, bound the given parameters.
     If the table does not exist, return 0.
     """
-    import json
     r_url = pipe_r_url(pipe)
     response = self.get(
         r_url + "/rowcount",

meerschaum/connectors/sql/_fetch.py CHANGED Viewed

@@ -148,7 +148,7 @@ def get_pipe_metadef(
         dt_name = sql_item_name(_dt, self.flavor)
         is_guess = False
-    if begin is not None or end is not None:
+    if begin not in (None, '') or end is not None:
         if is_guess:
             if _dt is None:
                 warn(
@@ -168,20 +168,38 @@ def get_pipe_metadef(
     if 'order by' in definition.lower() and 'over' not in definition.lower():
         error("Cannot fetch with an ORDER clause in the definition")
+    apply_backtrack = begin == ''
     begin = (
-        begin if not (isinstance(begin, str) and begin == '')
-        else pipe.get_sync_time(debug=debug)
+        pipe.get_sync_time(debug=debug)
+        if begin == ''
+        else begin
     )
+    if begin and end and begin >= end:
+        begin = None
     da = None
     if dt_name:
-        ### default: do not backtrack
-        begin_da = dateadd_str(
-            flavor=self.flavor, datepart='minute', number=(-1 * btm), begin=begin,
-        ) if begin else None
-        end_da = dateadd_str(
-            flavor=self.flavor, datepart='minute', number=1, begin=end,
-        ) if end else None
+        begin_da = (
+            dateadd_str(
+                flavor = self.flavor,
+                datepart = 'minute',
+                number = ((-1 * btm) if apply_backtrack else 0),
+                begin = begin,
+            )
+            if begin
+            else None
+        )
+        end_da = (
+            dateadd_str(
+                flavor = self.flavor,
+                datepart = 'minute',
+                number = 0,
+                begin = end,
+            )
+            if end
+            else None
+        )
     meta_def = (
         _simple_fetch_query(pipe) if (

meerschaum/core/Pipe/_deduplicate.py CHANGED Viewed

@@ -65,6 +65,7 @@ def deduplicate(
     A `SuccessTuple` corresponding to whether all of the chunks were successfully deduplicated.
     """
     from meerschaum.utils.warnings import warn, info
+    from meerschaum.utils.misc import interval_str, items_str
     from meerschaum.utils.venv import Venv
     from meerschaum.connectors import get_connector_plugin
     from meerschaum.utils.pool import get_pool
@@ -74,6 +75,7 @@ def deduplicate(
             begin = begin,
             end = end,
             params = params,
+            bounded = bounded,
             debug = debug,
             **kwargs
         )
@@ -90,6 +92,7 @@ def deduplicate(
                 begin = begin,
                 end = end,
                 params = params,
+                bounded = bounded,
                 debug = debug,
                 **kwargs
             )
@@ -104,8 +107,18 @@ def deduplicate(
         begin = (
             bound_time
             if bound_time is not None
-            else self.get_sync_time(debug=debug)
+            else self.get_sync_time(newest=False, debug=debug)
         )
+    if bounded and end is None:
+        end = self.get_sync_time(newest=True, debug=debug)
+    if bounded and end is not None:
+        end += (
+            timedelta(minutes=1)
+            if isinstance(end, datetime)
+            else 1
+        )
     chunk_bounds = self.get_chunk_bounds(
         bounded = bounded,
         begin = begin,
@@ -115,6 +128,8 @@ def deduplicate(
     )
     indices = [col for col in self.columns.values() if col]
+    if not indices:
+        return False, f"Cannot deduplicate without index columns."
     dt_col = self.columns.get('datetime', None)
     def process_chunk_bounds(bounds) -> Tuple[
@@ -155,7 +170,15 @@ def deduplicate(
             return bounds, (True, f"{chunk_msg_header}\nChunk is empty, skipping...")
         chunk_indices = [ix for ix in indices if ix in full_chunk.columns]
-        full_chunk = full_chunk.drop_duplicates(subset=chunk_indices, keep='last')
+        if not chunk_indices:
+            return bounds, (False, f"None of {items_str(indices)} were present in chunk.")
+        try:
+            full_chunk = full_chunk.drop_duplicates(subset=chunk_indices, keep='last')
+        except Exception as e:
+            return (
+                bounds,
+                (False, f"Failed to deduplicate chunk on {items_str(chunk_indices)}:\n({e})")
+            )
         clear_success, clear_msg = self.clear(
             begin = chunk_begin,
@@ -192,19 +215,16 @@ def deduplicate(
             True, (
                 chunk_msg_header + "\n"
                 + chunk_msg_body + ("\n" if chunk_msg_body else '')
-                + f"Chunk succesfully deduplicated to {chunk_rowcount} rows."
+                + f"Deduplicated chunk from {existing_chunk_len} to {chunk_rowcount} rows."
             )
         )
-    _start = chunk_bounds[0][(0 if bounded else 1)]
-    _end = chunk_bounds[-1][(0 if not bounded else 1)]
-    message_header = f"{_start} - {_end}"
     info(
         f"Deduplicating {len(chunk_bounds)} chunk"
         + ('s' if len(chunk_bounds) != 1 else '')
         + f" ({'un' if not bounded else ''}bounded)"
-        + f" of size '{chunk_interval}'"
-        + f" from '{_start}' to '{_end}'..."
+        + f" of size '{interval_str(chunk_interval)}'"
+        + f" on {self}."
     )
     bounds_success_tuples = dict(pool.map(process_chunk_bounds, chunk_bounds))
     bounds_successes = {
@@ -223,11 +243,10 @@ def deduplicate(
         return (
             False,
             (
-                message_header + "\n"
-                + f"Failed to deduplicate {len(bounds_failures)} chunk"
+                f"Failed to deduplicate {len(bounds_failures)} chunk"
                 + ('s' if len(bounds_failures) != 1 else '')
-                + ":\n"
-                + "\n".join([msg for _, (_, msg) in bounds_failures.items()])
+                + ".\n"
+                + "\n".join([msg for _, (_, msg) in bounds_failures.items() if msg])
             )
         )
@@ -236,11 +255,10 @@ def deduplicate(
         return (
             True,
             (
-                message_header + "\n"
-                + f"Successfully deduplicated {len(bounds_successes)} chunk"
+                f"Successfully deduplicated {len(bounds_successes)} chunk"
                 + ('s' if len(bounds_successes) != 1 else '')
                 + ".\n"
-                + "\n".join([msg for _, (_, msg) in bounds_successes.items()])
+                + "\n".join([msg for _, (_, msg) in bounds_successes.items() if msg])
             ).rstrip('\n')
         )
@@ -262,21 +280,19 @@ def deduplicate(
         return (
             True,
             (
-                message_header + "\n"
-                + f"Successfully deduplicated {len(bounds_successes)} chunk"
+                f"Successfully deduplicated {len(bounds_successes)} chunk"
                 + ('s' if len(bounds_successes) != 1 else '')
-                + f" ({len(retry_bounds_successes)} retried):\n"
-                + "\n".join([msg for _, (_, msg) in bounds_successes.items()])
+                + f"({len(retry_bounds_successes)} retried):\n"
+                + "\n".join([msg for _, (_, msg) in bounds_successes.items() if msg])
             ).rstrip('\n')
         )
     return (
         False,
         (
-            message_header + "\n"
-            + f"Failed to deduplicate {len(bounds_failures)} chunk"
+            f"Failed to deduplicate {len(bounds_failures)} chunk"
             + ('s' if len(retry_bounds_failures) != 1 else '')
-            + ":\n"
-            + "\n".join([msg for _, (_, msg) in retry_bounds_failures.items()])
+            + ".\n"
+            + "\n".join([msg for _, (_, msg) in retry_bounds_failures.items() if msg])
         ).rstrip('\n')
     )

meerschaum/core/Pipe/_dtypes.py CHANGED Viewed

@@ -7,6 +7,7 @@ Enforce data types for a pipe's underlying table.
 """
 from __future__ import annotations
+from io import StringIO
 from meerschaum.utils.typing import Dict, Any, Optional
 def enforce_dtypes(
@@ -38,7 +39,7 @@ def enforce_dtypes(
     try:
         if isinstance(df, str):
             df = parse_df_datetimes(
-                pd.read_json(df),
+                pd.read_json(StringIO(df)),
                 ignore_cols = [
                     col
                     for col, dtype in pipe_dtypes.items()

meerschaum/core/Pipe/_verify.py CHANGED Viewed

@@ -62,6 +62,7 @@ def verify(
     A SuccessTuple indicating whether the pipe was successfully resynced.
     """
     from meerschaum.utils.pool import get_pool
+    from meerschaum.utils.misc import interval_str
     workers = self.get_num_workers(workers)
     ### Skip configured bounding in parameters
@@ -74,16 +75,16 @@ def verify(
     if bounded is None:
         bounded = bound_time is not None
-    if begin is None:
+    if bounded and begin is None:
         begin = (
             bound_time
             if bound_time is not None
             else self.get_sync_time(newest=False, debug=debug)
         )
-    if end is None:
+    if bounded and end is None:
         end = self.get_sync_time(newest=True, debug=debug)
-    if bounded:
+    if bounded and end is not None:
         end += (
             timedelta(minutes=1)
             if isinstance(end, datetime)
@@ -93,13 +94,7 @@ def verify(
     sync_less_than_begin = not bounded and begin is None
     sync_greater_than_end = not bounded and end is None
-    cannot_determine_bounds = (
-        begin is None
-        or
-        end is None
-        or
-        not self.exists(debug=debug)
-    )
+    cannot_determine_bounds = not self.exists(debug=debug)
     if cannot_determine_bounds:
         sync_success, sync_msg = self.sync(
@@ -146,21 +141,48 @@ def verify(
             )
         return True, f"Could not determine chunks between '{begin}' and '{end}'; nothing to do."
+    begin_to_print = (
+        begin
+        if begin is not None
+        else (
+            chunk_bounds[0][0]
+            if bounded
+            else chunk_bounds[0][1]
+        )
+    )
+    end_to_print = (
+        end
+        if end is not None
+        else (
+            chunk_bounds[-1][1]
+            if bounded
+            else chunk_bounds[-1][0]
+        )
+    )
     info(
         f"Syncing {len(chunk_bounds)} chunk" + ('s' if len(chunk_bounds) != 1 else '')
         + f" ({'un' if not bounded else ''}bounded)"
-        + f" of size '{chunk_interval}'"
-        + f" between '{begin}' and '{end}'."
+        + f" of size '{interval_str(chunk_interval)}'"
+        + f" between '{begin_to_print}' and '{end_to_print}'."
     )
     pool = get_pool(workers=workers)
+    ### Dictionary of the form bounds -> success_tuple, e.g.:
+    ### {
+    ###    (2023-01-01, 2023-01-02): (True, "Success")
+    ### }
+    bounds_success_tuples = {}
     def process_chunk_bounds(
             chunk_begin_and_end: Tuple[
                 Union[int, datetime],
                 Union[int, datetime]
             ]
         ):
+        if chunk_begin_and_end in bounds_success_tuples:
+            return chunk_begin_and_end, bounds_success_tuples[chunk_begin_and_end]
         chunk_begin, chunk_end = chunk_begin_and_end
         return chunk_begin_and_end, self.sync(
             begin = chunk_begin,
@@ -171,11 +193,22 @@ def verify(
             **kwargs
         )
-    ### Dictionary of the form bounds -> success_tuple, e.g.:
-    ### {
-    ###    (2023-01-01, 2023-01-02): (True, "Success")
-    ### }
-    bounds_success_tuples = dict(pool.map(process_chunk_bounds, chunk_bounds))
+    ### If we have more than one chunk, attempt to sync the first one and return if its fails.
+    if len(chunk_bounds) > 1:
+        first_chunk_bounds = chunk_bounds[0]
+        (
+            (first_begin, first_end),
+            (first_success, first_msg)
+        ) = process_chunk_bounds(first_chunk_bounds)
+        if not first_success:
+            return (
+                first_success,
+                f"\n{first_begin} - {first_end}\n"
+                + f"Failed to sync first chunk:\n{first_msg}"
+            )
+        bounds_success_tuples[first_chunk_bounds] = (first_success, first_msg)
+    bounds_success_tuples.update(dict(pool.map(process_chunk_bounds, chunk_bounds)))
     bounds_success_bools = {bounds: tup[0] for bounds, tup in bounds_success_tuples.items()}
     message_header = f"{begin} - {end}"
@@ -195,18 +228,19 @@ def verify(
     chunk_bounds_to_resync = [
         bounds
-        for bounds, success in zip(chunk_bounds, chunk_success_bools)
+        for bounds, success in zip(chunk_bounds, bounds_success_bools)
         if not success
     ]
     bounds_to_print = [
         f"{bounds[0]} - {bounds[1]}"
         for bounds in chunk_bounds_to_resync
     ]
-    warn(
-        f"Will resync the following failed chunks:\n    "
-        + '\n    '.join(bounds_to_print),
-        stack = False,
-    )
+    if bounds_to_print:
+        warn(
+            f"Will resync the following failed chunks:\n    "
+            + '\n    '.join(bounds_to_print),
+            stack = False,
+        )
     retry_bounds_success_tuples = dict(pool.map(process_chunk_bounds, chunk_bounds_to_resync))
     bounds_success_tuples.update(retry_bounds_success_tuples)
@@ -289,7 +323,8 @@ def get_chunks_success_message(
         ''
         if num_fails == 0
         else (
-            f"\n\nFailed to sync {num_fails} chunks:\n"
+            f"\n\nFailed to sync {num_fails} chunk"
+            + ('s' if num_fails != 1 else '') + ":\n"
             + '\n'.join([
                 f"{fail_begin} - {fail_end}\n{msg}\n"
                 for (fail_begin, fail_end), (_, msg) in fail_chunk_bounds_tuples.items()

meerschaum/plugins/__init__.py CHANGED Viewed

@@ -254,6 +254,9 @@ def sync_plugins_symlinks(debug: bool = False, warn: bool = True) -> None:
         try:
             if PLUGINS_INTERNAL_LOCK_PATH.exists():
                 PLUGINS_INTERNAL_LOCK_PATH.unlink()
+        ### Sometimes competing threads will delete the lock file at the same time.
+        except FileNotFoundError:
+            pass
         except Exception as e:
             if warn:
                 _warn(f"Error cleaning up lockfile {PLUGINS_INTERNAL_LOCK_PATH}:\n{e}")

meerschaum 2.0.0rc7__py3-none-any.whl → 2.0.0rc8__py3-none-any.whl

meerschaum 2.0.0rc7py3-none-any.whl → 2.0.0rc8py3-none-any.whl