PyPI - pluto-ml - Versions diffs - 0.0.20__tar.gz → 0.0.22__tar.gz - Mend

pluto-ml 0.0.20tar.gz → 0.0.22tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (47) hide show

{pluto_ml-0.0.20 → pluto_ml-0.0.22}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pluto-ml
-Version: 0.0.20
+Version: 0.0.22
 Summary: Pluto ML - Machine Learning Operations Framework
 License-File: LICENSE
 Author: jqssun

{pluto_ml-0.0.20 → pluto_ml-0.0.22}/pluto/__init__.py RENAMED Viewed

@@ -41,11 +41,11 @@ __all__ = (
     'generate_run_id',
 )
-__version__ = '0.0.20'
+__version__ = '0.0.22'
 # Replaced with the current commit when building the wheels.
-_PLUTO_COMMIT_SHA = 'f3600b5b85929f8f0dd32d9b1c291ff3c73d1800'
+_PLUTO_COMMIT_SHA = '4587211b1c6ccebe92f92d1243bf9a213ec1f3dd'
 def _get_git_commit():

{pluto_ml-0.0.20 → pluto_ml-0.0.22}/pluto/__main__.py RENAMED Viewed

@@ -64,10 +64,25 @@ def _cmd_sync(args: argparse.Namespace) -> None:
         )
         sys.exit(1)
-    # Build settings dict with URLs
+    # Build settings dict with URLs.
+    # Settings.to_dict() only iterates __annotations__, so the URLs
+    # populated by update_url() (url_num, url_message, url_file, ...) are
+    # NOT included. Without this every uploader call no-ops on
+    # `if not self.url_X: return`, the records get marked SUCCESS in the
+    # local DB, and nothing reaches the server. Restore them explicitly.
     settings = Settings()
     settings.update_host()
     settings_dict = settings.to_dict()
+    settings_dict.update(
+        {
+            'url_num': settings.url_num,
+            'url_data': settings.url_data,
+            'url_file': settings.url_file,
+            'url_message': settings.url_message,
+            'url_update_config': settings.url_update_config,
+            'url_update_tags': settings.url_update_tags,
+        }
+    )
     settings_dict['_auth'] = auth
     # Read run info from each database and populate settings

pluto_ml-0.0.22/pluto/_wandb_hook.py ADDED Viewed

@@ -0,0 +1,250 @@
+"""
+Import hook that intercepts `import wandb` to enable dual-logging to Pluto.
+Loaded via a .pth file at Python startup. Registers a sys.meta_path finder
+that, when `import wandb` is executed, loads the real wandb package and then
+monkey-patches it to dual-log to Pluto.
+Activation:
+    The hook itself installs unconditionally when pluto-ml is on the path —
+    installing the package is the user's opt-in signal. Whether the
+    patches actually fire is decided later, when `import wandb` runs:
+      - Credentials available (any of: PLUTO_API_KEY env var, WANDB_API_KEY
+        when DISABLE_WANDB_LOGGING=true, the marker written by `pluto login`,
+        or the keyring file written by `pluto login`) → patches applied,
+        wandb dual-logs to Pluto.
+      - No credentials → a one-time discoverability hint is logged
+        (pointing at `pluto login` / PLUTO_API_KEY) and wandb runs unpatched.
+Project name is no longer required at install time; the runtime falls back
+to the `project=` kwarg on wandb.init, then WANDB_PROJECT, then the resolved
+wandb run's project attribute.
+"""
+import importlib
+import importlib.util
+import logging
+import os
+import sys
+logger = logging.getLogger(__name__)
+_hook_installed = False
+_hint_emitted = False
+# Mirrors pluto.auth.LOGIN_MARKER_PATH. Duplicated as a literal here so this
+# module stays import-free of the rest of pluto at .pth load time.
+_LOGIN_MARKER_PATH = os.path.expanduser('~/.pluto/.login_ok')
+def _keyring_cfg_path() -> str:
+    """
+    keyrings.alt.file.PlaintextKeyring storage location, mirrored from
+    keyring.util.platform_ so we don't have to import keyring at .pth load
+    time. macOS uses Keychain by default (the marker above covers Mac); this
+    only matters for Linux/Windows users on the file-based fallback.
+    """
+    if sys.platform == 'win32':
+        root = os.environ.get('LOCALAPPDATA') or os.environ.get('ProgramData') or '.'
+        return os.path.join(root, 'Python Keyring', 'keyring_pass.cfg')
+    base = os.environ.get('XDG_DATA_HOME') or os.path.expanduser('~/.local/share')
+    return os.path.join(base, 'python_keyring', 'keyring_pass.cfg')
+def _keyring_cfg_has_pluto() -> bool:
+    """Backward compat: detect a `pluto login` done before the marker existed."""
+    path = _keyring_cfg_path()
+    if not os.path.exists(path):
+        return False
+    try:
+        import configparser
+        cp = configparser.RawConfigParser()
+        cp.read(path, encoding='utf-8')
+        return cp.has_section('pluto')
+    except Exception:
+        return False
+def _has_pluto_credentials() -> bool:
+    """True if some Pluto auth source is available without prompting."""
+    if os.environ.get('PLUTO_API_KEY'):
+        return True
+    wandb_disabled = os.environ.get('DISABLE_WANDB_LOGGING', '').lower() in (
+        'true',
+        '1',
+        'yes',
+    )
+    if wandb_disabled and os.environ.get('WANDB_API_KEY'):
+        return True
+    if os.path.exists(_LOGIN_MARKER_PATH):
+        return True
+    if _keyring_cfg_has_pluto():
+        return True
+    return False
+def _has_partial_pluto_signal() -> bool:
+    """True if the user set a Pluto env var but has no auth — partial config."""
+    return any(
+        os.environ.get(v)
+        for v in (
+            'PLUTO_PROJECT',
+            'PLUTO_URL_APP',
+            'PLUTO_URL_API',
+            'PLUTO_URL_INGEST',
+        )
+    )
+def _emit_discoverability_hint() -> None:
+    """Log a one-time hint when wandb is imported but Pluto isn't activated."""
+    global _hint_emitted
+    if _hint_emitted:
+        return
+    _hint_emitted = True
+    if _has_partial_pluto_signal():
+        logger.warning(
+            'pluto.compat.wandb: Pluto config detected but no API key found. '
+            'Run `pluto login` (or set PLUTO_API_KEY) to enable dual-logging '
+            'to Pluto. Continuing with wandb-only logging.'
+        )
+    else:
+        logger.warning(
+            'pluto.compat.wandb: pluto-ml is installed but no Pluto credentials '
+            'found. Run `pluto login` (or set PLUTO_API_KEY) to enable '
+            'dual-logging to Pluto. Continuing with wandb-only logging.'
+        )
+def _patch_or_hint(wandb_module) -> None:
+    """Apply dual-logging patches if creds present, else log discoverability hint."""
+    if _has_pluto_credentials():
+        try:
+            from pluto.compat.wandb import apply_wandb_patches
+            apply_wandb_patches(wandb_module)
+            logger.info(
+                'pluto._wandb_hook: Successfully patched wandb for dual-logging'
+            )
+        except Exception as e:
+            logger.warning(
+                f'pluto._wandb_hook: Failed to apply wandb patches: {e}. '
+                f'wandb will work normally without Pluto dual-logging.'
+            )
+    else:
+        _emit_discoverability_hint()
+class _PatchingLoader:
+    """
+    Wraps wandb's real loader so we can run dual-logging patches *after* the
+    real loader fully initializes the wandb module. This is the spec-based
+    equivalent of the old find_module/load_module approach — required because
+    Python 3.12 deprecated the legacy API and stopped reliably calling it
+    when only find_module is implemented.
+    """
+    def __init__(self, real_loader):
+        self._real_loader = real_loader
+    def create_module(self, spec):
+        if hasattr(self._real_loader, 'create_module'):
+            return self._real_loader.create_module(spec)
+        return None  # default module creation
+    def exec_module(self, module):
+        # Let wandb's real loader populate the module first.
+        self._real_loader.exec_module(module)
+        # Now wandb is fully imported and `module is sys.modules['wandb']`,
+        # so monkey-patching `module.init` patches `wandb.init` for callers.
+        _patch_or_hint(module)
+class _PlutoWandbFinder:
+    """
+    Meta path finder that intercepts `import wandb` to apply dual-logging patches.
+    Implements the modern `find_spec` API (Python 3.4+, required on 3.12+ where
+    legacy `find_module` is no longer reliably dispatched). We delegate to other
+    finders to locate the real wandb spec, then wrap its loader with
+    `_PatchingLoader` so our hook runs after wandb finishes importing.
+    The previous find_module/load_module implementation worked on 3.10/3.11 but
+    was silently bypassed on 3.12 — observed empirically as `_emit_discoverability_hint`
+    never being called and patches never applying.
+    """
+    _patching = False
+    def find_spec(self, fullname, path=None, target=None):
+        if fullname != 'wandb' or self._patching:
+            return None
+        # Re-enter the finder chain to find the real wandb spec without
+        # recursing into ourselves.
+        self._patching = True
+        try:
+            real_spec = importlib.util.find_spec('wandb')
+        finally:
+            self._patching = False
+        if real_spec is None or real_spec.loader is None:
+            return None
+        # Wrap the real loader so exec_module triggers our patches afterward.
+        real_spec.loader = _PatchingLoader(real_spec.loader)
+        return real_spec
+def install():
+    """
+    Register the wandb import hook on sys.meta_path.
+    Always installs the finder when called — credential resolution is
+    deferred until `import wandb` actually runs (see _PlutoWandbFinder).
+    This ensures users who run `pluto login` after Python starts (or who
+    pass `project=` only as a kwarg) still get dual-logging, and that
+    users with no Pluto config see a discoverability hint instead of
+    silent inactivity.
+    Safe to call multiple times.
+    """
+    global _hook_installed
+    if _hook_installed:
+        return
+    # If wandb is already imported, the finder is too late. Try to patch in
+    # place if credentials are available; otherwise log the hint.
+    if 'wandb' in sys.modules:
+        if _has_pluto_credentials():
+            logger.warning(
+                'pluto._wandb_hook: wandb already imported before hook '
+                'installation. Attempting to patch existing wandb module.'
+            )
+            try:
+                from pluto.compat.wandb import apply_wandb_patches
+                apply_wandb_patches(sys.modules['wandb'])
+            except Exception as e:
+                logger.warning(
+                    f'pluto._wandb_hook: Failed to patch already-imported '
+                    f'wandb: {e}'
+                )
+        else:
+            _emit_discoverability_hint()
+        _hook_installed = True
+        return
+    finder = _PlutoWandbFinder()
+    sys.meta_path.insert(0, finder)
+    _hook_installed = True
+def uninstall():
+    """Remove the wandb import hook (for testing)."""
+    global _hook_installed, _hint_emitted
+    sys.meta_path[:] = [
+        f for f in sys.meta_path if not isinstance(f, _PlutoWandbFinder)
+    ]
+    _hook_installed = False
+    _hint_emitted = False

{pluto_ml-0.0.20 → pluto_ml-0.0.22}/pluto/auth.py RENAMED Viewed

@@ -1,5 +1,6 @@
 import getpass
 import logging
+import os
 import sys
 import webbrowser
@@ -13,6 +14,30 @@ from .util import ANSI, import_lib, print_url
 tlogger = logging.getLogger('auth')
 tag = 'Authentication'
+# Marker file written after a successful `pluto login`. The wandb compat
+# layer's import hook (pluto/_wandb_hook.py) checks for this so a user who
+# has only run `pluto login` (no PLUTO_API_KEY env var) still gets dual-
+# logging activated. Stat-only check; never read.
+LOGIN_MARKER_PATH = os.path.expanduser('~/.pluto/.login_ok')
+def _write_login_marker() -> None:
+    try:
+        os.makedirs(os.path.dirname(LOGIN_MARKER_PATH), exist_ok=True)
+        with open(LOGIN_MARKER_PATH, 'w'):
+            pass
+    except OSError as e:
+        tlogger.debug('%s: failed to write login marker: %s', tag, e)
+def _remove_login_marker() -> None:
+    try:
+        os.remove(LOGIN_MARKER_PATH)
+    except FileNotFoundError:
+        pass
+    except OSError as e:
+        tlogger.debug('%s: failed to remove login marker: %s', tag, e)
 def login(settings=None, retry=False):
     settings = setup(settings)
@@ -56,6 +81,7 @@ def login(settings=None, retry=False):
         body = r.json()
         tlogger.info(f'{tag}: logged in as {body["organization"]["slug"]}')
         keyring.set_password(f'{settings.tag}', f'{settings.tag}', f'{settings._auth}')
+        _write_login_marker()
         teardown_logger(tlogger)
     except Exception as e:
         # If _auth was already provided (e.g. via env var or keyring), don't
@@ -103,6 +129,7 @@ def login(settings=None, retry=False):
             keyring.set_password(
                 f'{settings.tag}', f'{settings.tag}', f'{settings._auth}'
             )
+            _write_login_marker()
         except Exception as e:
             tlogger.critical(
                 '%s: failed to save key to system keyring service: %s', tag, e
@@ -124,5 +151,6 @@ def logout(settings=None):
         tlogger.warning(
             '%s: failed to delete key from system keyring service: %s', tag, e
         )
+    _remove_login_marker()
     tlogger.info(f'{tag}: logged out')
     teardown_logger(tlogger)

{pluto_ml-0.0.20 → pluto_ml-0.0.22}/pluto/compat/neptune.py RENAMED Viewed

@@ -305,8 +305,18 @@ class NeptuneRunWrapper:
                 else None
             )
-            # Determine if this is an intentional resume (explicit run_id kwarg)
-            pluto_resume = bool(explicit_kwarg_run_id)
+            # Determine if this is an intentional resume.
+            # - explicit kwarg run_id: user-provided, e.g. for restarting a run.
+            # - PLUTO_RUN_ID env var: cross-rank coordination signal for DDP.
+            #   Without resume=True for the env-var path, ranks 1+ call
+            #   pluto.init with the same externalId, the server returns
+            #   resumed=True, and op.py raises "Run with externalId X already
+            #   exists". The exception is caught by our broad except below
+            #   and silently sets self._pluto_run = None — so only rank 0
+            #   installs the console-capture ConsoleHandler and only rank 0
+            #   logs ever reach the run. The user sees "all rank 1+ logs
+            #   missing in the UI" with no obvious error in stdout.
+            pluto_resume = bool(explicit_kwarg_run_id) or bool(env_run_id)
             # Apply precedence: PLUTO_RUN_ID > explicit kwarg > Neptune auto
             run_id = env_run_id or explicit_kwarg_run_id or neptune_run_id
@@ -565,7 +575,7 @@ class NeptuneRunWrapper:
                         )
                         pluto_files[key] = pluto_file
                         pluto_type = type(pluto_file).__name__
-                        logger.info(
+                        logger.debug(
                             f'pluto.compat.neptune: Converted file {key} '
                             f'to {pluto_type}'
                         )
@@ -576,7 +586,7 @@ class NeptuneRunWrapper:
                 if pluto_files:
                     self._pluto_run.log(pluto_files)
-                    logger.info(
+                    logger.debug(
                         f'pluto.compat.neptune: Logged {len(pluto_files)} files '
                         f'to pluto'
                     )
@@ -609,7 +619,7 @@ class NeptuneRunWrapper:
                         )
                         pluto_files[key] = pluto_file
                         pluto_type = type(pluto_file).__name__
-                        logger.info(
+                        logger.debug(
                             f'pluto.compat.neptune: Converted {key} at step '
                             f'{step} to {pluto_type}'
                         )
@@ -620,7 +630,7 @@ class NeptuneRunWrapper:
                 if pluto_files:
                     self._pluto_run.log(pluto_files, step=step)
-                    logger.info(
+                    logger.debug(
                         f'pluto.compat.neptune: Logged {len(pluto_files)} files '
                         f'to pluto at step {step}'
                     )
@@ -708,10 +718,19 @@ class NeptuneRunWrapper:
     def close(self, **kwargs):
         """
-        Close both Neptune and pluto runs.
-        Pluto cleanup uses a timeout to ensure it never blocks Neptune's close.
-        Neptune's close() is always called, preserving exact Neptune behavior.
+        Close the Neptune run. Pluto is left running.
+        Some Neptune callers — notably Lightning's ``NeptuneLogger.finalize`` —
+        invoke ``close()`` from inside Trainer's exception path (e.g. on a CUDA
+        OOM). Tearing pluto down here would lose any output emitted during
+        framework cleanup, including the traceback that triggered finalize in
+        the first place. We instead leave the pluto run alive; it is finalised
+        by:
+            * ``terminate()`` — explicit force-quit by the caller
+            * ``_atexit_cleanup_pluto`` — interpreter shutdown (with timeout)
+        ``sys.excepthook`` (registered in ``Op.__init__``) marks the run
+        FAILED before atexit fires when an exception propagates, so the
+        eventual status is correct without the close path doing it.
         """
         with self._close_lock:
             if self._closed:
@@ -721,8 +740,7 @@ class NeptuneRunWrapper:
                 return None
             self._closed = True
-        # Close pluto first with timeout (non-blocking, silent failure)
-        self._finish_pluto_with_timeout(timeout=self._PLUTO_CLEANUP_TIMEOUT_SECONDS)
+        # Deliberately do NOT touch pluto here — see docstring.
         # Close Neptune (unless disabled) - this is the critical path
         if not self._neptune_disabled:
@@ -877,14 +895,13 @@ class NeptuneRunWrapper:
         """
         Support context manager protocol.
-        Pluto cleanup uses a timeout to ensure it never blocks Neptune's __exit__.
-        Neptune's __exit__ is always called, preserving exact Neptune behavior.
+        Same rationale as :meth:`close`: pluto is left alive and finalised
+        via the atexit/excepthook path, not here.
         """
         with self._close_lock:
             self._closed = True
-        # Finish pluto with timeout (non-blocking, silent failure)
-        self._finish_pluto_with_timeout(timeout=self._PLUTO_CLEANUP_TIMEOUT_SECONDS)
+        # Deliberately do NOT touch pluto here — see close() docstring.
         if self._neptune_disabled:
             return False

{pluto_ml-0.0.20 → pluto_ml-0.0.22}/pluto/compat/wandb.py RENAMED Viewed

@@ -4,22 +4,28 @@ Wandb-to-Pluto compatibility layer for seamless dual-logging.
 This module monkey-patches wandb.init() so that every wandb Run also logs
 to Pluto. It can be activated in two ways:
-1. Automatic (zero code changes): Set PLUTO_PROJECT + PLUTO_API_KEY env vars
-   and pip install pluto-ml. The .pth file triggers the import hook which
-   calls apply_wandb_patches().
+1. Automatic (zero code changes): pip install pluto-ml. The .pth file
+   triggers the import hook which calls apply_wandb_patches() once Pluto
+   credentials are available (see Configuration below).
 2. Explicit import: `import pluto.compat.wandb` at the top of your script.
    This patches wandb directly (like the Neptune compat layer).
 Configuration:
-    Required:
-    - PLUTO_API_KEY: Pluto API token (always required). In
-      DISABLE_WANDB_LOGGING=true mode, WANDB_API_KEY may be reused
-      instead.
-    - A project name: PLUTO_PROJECT if set, otherwise WANDB_PROJECT
-      is used as a fallback. This means if you already have
-      WANDB_PROJECT set for wandb, you don't need to set
-      PLUTO_PROJECT separately.
+    Authentication (one of the following):
+    - Run `pluto login` to store a token in the system keyring.
+    - Set PLUTO_API_KEY (Pluto API token).
+    - In DISABLE_WANDB_LOGGING=true mode only, WANDB_API_KEY may be
+      reused as the Pluto token (migration shortcut).
+    Project name (one of the following, checked in order):
+    - PLUTO_PROJECT env var
+    - the `project` kwarg passed to wandb.init()
+    - WANDB_PROJECT env var
+    - the project attribute on the resolved wandb run
+    If you already pass project= to wandb.init() (or via a framework
+    wrapper like Lightning's WandbLogger) or have WANDB_PROJECT set,
+    you don't need to set PLUTO_PROJECT separately.
     Optional:
     - PLUTO_URL_APP: Pluto app URL (for self-hosted)
@@ -29,7 +35,8 @@ Configuration:
 Hard Requirements:
     - MUST NOT break existing wandb functionality under ANY condition
-    - If Pluto is down/misconfigured, silently continue with wandb only
+    - If Pluto is down/misconfigured, log a warning and continue with
+      wandb only — never raise.
     - Zero impact on wandb's behavior, return values, or exceptions
 """
@@ -622,19 +629,26 @@ def _make_patched_init(original_init, wandb_module):
         pluto_config = _get_pluto_config_from_env()
         # Project name fallback (works in ALL modes): if PLUTO_PROJECT
-        # isn't set, use WANDB_PROJECT. This makes PLUTO_PROJECT fully
-        # optional — users who already have WANDB_PROJECT from their
-        # existing wandb setup don't have to duplicate it.
+        # isn't set, fall back to (in order): the explicit `project`
+        # kwarg passed to wandb.init(), the `WANDB_PROJECT` env var,
+        # or finally the project attribute on the resolved wandb run.
+        # This makes PLUTO_PROJECT fully optional — frameworks like
+        # Lightning's WandbLogger pass project as a kwarg and may never
+        # set WANDB_PROJECT, so kwargs must be consulted too.
         #
         # If pluto_config is None here, it means PLUTO_PROJECT wasn't set
         # (that's the only reason _get_pluto_config_from_env returns None).
-        # We build a fresh config from WANDB_PROJECT and re-read the
-        # other PLUTO_* env vars (api key, urls) since the helper bailed
-        # before reading them.
+        # We build a fresh config from the resolved project and re-read
+        # the other PLUTO_* env vars (api key, urls) since the helper
+        # bailed before reading them.
         if pluto_config is None:
-            wandb_project = os.environ.get('WANDB_PROJECT')
-            if wandb_project:
-                pluto_config = {'project': wandb_project}
+            resolved_project = (
+                kwargs.get('project')
+                or os.environ.get('WANDB_PROJECT')
+                or getattr(wandb_run, 'project', None)
+            )
+            if resolved_project:
+                pluto_config = {'project': resolved_project}
                 if api_key := os.environ.get('PLUTO_API_KEY'):
                     pluto_config['api_key'] = api_key
                 for env_var, cfg_key in (
@@ -645,8 +659,8 @@ def _make_patched_init(original_init, wandb_module):
                     if v := os.environ.get(env_var):
                         pluto_config[cfg_key] = v
                 logger.info(
-                    'pluto.compat.wandb: using WANDB_PROJECT as Pluto project '
-                    '(PLUTO_PROJECT not set)'
+                    f'pluto.compat.wandb: using "{resolved_project}" as Pluto '
+                    f'project (PLUTO_PROJECT not set)'
                 )
         # Migration shortcut (disabled-mode only): in DISABLE_WANDB_LOGGING
@@ -662,10 +676,11 @@ def _make_patched_init(original_init, wandb_module):
                 )
         if pluto_config is None:
-            logger.info(
-                'pluto.compat.wandb: no project name available '
-                '(set PLUTO_PROJECT or WANDB_PROJECT), '
-                'continuing with wandb-only logging'
+            logger.warning(
+                'pluto.compat.wandb: cannot dual-log to Pluto — no project '
+                'name resolvable (none of: PLUTO_PROJECT, project= kwarg, '
+                'WANDB_PROJECT, wandb run project). Continuing with wandb-'
+                'only logging.'
             )
             return wandb_run

{pluto_ml-0.0.20 → pluto_ml-0.0.22}/pluto/iface.py RENAMED Viewed

@@ -299,7 +299,11 @@ class ServerInterface:
             status_code = r.status_code if r else 'N/A'
             target = len(drained) if drained else 'request'
             response = r.text if r else 'N/A'
-            logger.warning(
+            # High-frequency endpoints (the trigger/heartbeat that fires
+            # every ~4 s) set suppress_httpx_logs; route their non-200
+            # responses to DEBUG so a flaky server doesn't spam WARNING.
+            log_response = logger.debug if suppress_httpx_logs else logger.warning
+            log_response(
                 '%s: %s: attempt %s/%s: response code %s for %s from %s: %s',
                 tag,
                 name,

{pluto_ml-0.0.20 → pluto_ml-0.0.22}/pluto/log.py RENAMED Viewed

@@ -1,5 +1,6 @@
 import builtins
 import logging
+import os
 import sys
 import time
@@ -65,6 +66,21 @@ class ConsoleHandler:
         self.sanitizer = sanitizer
         self._log_buffer: list = []
         self._last_flush = 0.0
+        # Carry-over for partial writes that don't end at a line boundary.
+        # Python's traceback printer (and rich) call write() with chunks
+        # like just whitespace or a single character, so treating each
+        # call as a complete line shreds tracebacks into one-char "lines".
+        self._partial_line: str = ''
+        # When running under torchrun, prepend the rank to captured lines
+        # so the Pluto UI can distinguish rank N from rank M without the
+        # user having to wrap every print() themselves. RANK is set by
+        # torchrun in every child process; absent → no prefix (so single-
+        # process and non-torch jobs keep their existing log format).
+        # Note: only the captured copy is prefixed, not the pass-through
+        # to self.stream — that lets torchrun add its own [defaultN]:
+        # prefix to the terminal stream without double-prefixing.
+        rank = os.environ.get('RANK')
+        self._rank_prefix = f'[rank{rank}] ' if rank is not None else ''
     def _flush_log_buffer(self) -> None:
         """Flush buffered console log lines to the sync store in one batch."""
@@ -78,30 +94,52 @@ class ConsoleHandler:
         self._log_buffer.clear()
         self._last_flush = time.time()
+    def _emit_line(self, line: str) -> None:
+        """Log one complete line through the sync buffer + the python logger."""
+        if not line:  # do not log empty lines
+            return
+        self.count += 1
+        timestamp_ms = int(time.time() * 1000)
+        if self._rank_prefix:
+            line = self._rank_prefix + line
+        if self.sync_manager is not None:
+            sanitized_line = self.sanitizer.sanitize(line) if self.sanitizer else line
+            log_type = logging._levelToName.get(self.level, 'INFO')
+            self._log_buffer.append(
+                (sanitized_line, log_type, timestamp_ms, self.count)
+            )
+        self.logger.log(self.level, line)
     def write(self, buf: str) -> None:
-        for line in buf.splitlines():
-            if line:  # do not log empty lines
-                self.count += 1
-                timestamp_ms = int(time.time() * 1000)
-                if self.sync_manager is not None:
-                    sanitized_line = (
-                        self.sanitizer.sanitize(line) if self.sanitizer else line
-                    )
-                    log_type = logging._levelToName.get(self.level, 'INFO')
-                    self._log_buffer.append(
-                        (sanitized_line, log_type, timestamp_ms, self.count)
-                    )
-                self.logger.log(self.level, line)
+        # Pass-through to the real stream first so terminal output is not
+        # delayed by our line buffering.
+        self.stream.write(buf)
+        self.stream.flush()
+        # Accumulate partial writes and only emit on real '\n' boundaries.
+        # Splitting on '\n' specifically (not splitlines()) avoids breaking
+        # on \v, \f, \x1c-\x1e, \x85, U+2028, U+2029 — chars that rich and
+        # other styled-output libs use as internal segment separators.
+        self._partial_line += buf
+        if '\n' not in self._partial_line:
+            return
+        *complete, self._partial_line = self._partial_line.split('\n')
+        for line in complete:
+            self._emit_line(line)
         # Flush the buffer if it's large enough or old enough
         if self._log_buffer and (
             len(self._log_buffer) >= self._FLUSH_SIZE
             or time.time() - self._last_flush >= self._FLUSH_INTERVAL
         ):
             self._flush_log_buffer()
-        self.stream.write(buf)
-        self.stream.flush()
     def flush(self):
+        # Emit any trailing partial line so we don't drop output that
+        # never got a terminating newline (e.g. a final print(..., end='')
+        # before interpreter shutdown).
+        if self._partial_line:
+            self._emit_line(self._partial_line)
+            self._partial_line = ''
         self._flush_log_buffer()
         self.stream.flush()

{pluto_ml-0.0.20 → pluto_ml-0.0.22}/pluto/op.py RENAMED Viewed

@@ -777,17 +777,24 @@ class Op:
                         },
                     )
             logger.critical('%s: interrupted %s', tag, e)
-        _sentry.flush()
-        logger.debug(f'{tag}: finished' if update_status else f'{tag}: closed')
-        teardown_logger(logger, console=logging.getLogger('console'))
-        self.settings.meta = []
-        if pluto.ops is not None:
-            pluto.ops = [
-                op for op in pluto.ops if op.settings._op_id != self.settings._op_id
-            ]  # TODO: make more efficient
-            if not pluto.ops:
-                _unregister_excepthook()
+            # Re-raise user-initiated termination so the process actually
+            # exits as the user expects. Post-cleanup (sentry flush,
+            # teardown_logger, pluto.ops mutation) still runs via the
+            # finally block below.
+            if isinstance(e, KeyboardInterrupt):
+                raise
+        finally:
+            _sentry.flush()
+            logger.debug(f'{tag}: finished' if update_status else f'{tag}: closed')
+            teardown_logger(logger, console=logging.getLogger('console'))
+            self.settings.meta = []
+            if pluto.ops is not None:
+                pluto.ops = [
+                    op for op in pluto.ops if op.settings._op_id != self.settings._op_id
+                ]  # TODO: make more efficient
+                if not pluto.ops:
+                    _unregister_excepthook()
     def watch(self, module, **kwargs):
         from .compat.torch import _watch_torch

{pluto_ml-0.0.20 → pluto_ml-0.0.22}/pluto/sync/process.py RENAMED Viewed

@@ -567,6 +567,20 @@ def _sync_main(
     except Exception as e:
         log.error(f'Sync process error: {e}', exc_info=True)
     finally:
+        # If we exited the loop because of SIGTERM/SIGINT (vs an exception),
+        # drain pending records before tearing down. torchrun gives ~30s
+        # before SIGKILL and pluto's shutdown_timeout defaults to 30s, so
+        # we have time — without this any records still in SQLite are left
+        # behind and require a manual `pluto sync` to recover.
+        if shutdown_requested['value']:
+            log.info(
+                f'Shutdown signal received, draining pending records '
+                f'(up to {shutdown_timeout}s)'
+            )
+            try:
+                _flush_remaining(store, uploader, log, shutdown_timeout, max_retries)
+            except Exception as drain_err:
+                log.warning(f'Drain on shutdown failed: {drain_err}')
         uploader.close()
         store.close()
         log.info('Sync process exiting')

{pluto_ml-0.0.20 → pluto_ml-0.0.22}/pyproject.toml RENAMED Viewed

@@ -1,6 +1,6 @@
 [tool.poetry]
 name = "pluto-ml"
-version = "0.0.20"
+version = "0.0.22"
 description = "Pluto ML - Machine Learning Operations Framework"
 packages = [
     {include = "pluto"},

pluto_ml-0.0.22/zzzz_pluto_wandb_hook.pth ADDED Viewed

	@@ -0,0 +1 @@
1	+ import pluto._wandb_hook; pluto._wandb_hook.install()

pluto_ml-0.0.20/pluto/_wandb_hook.py DELETED Viewed

@@ -1,160 +0,0 @@
-"""
-Import hook that intercepts `import wandb` to enable dual-logging to Pluto.
-This module is designed to be loaded via a .pth file at Python startup.
-It registers a sys.meta_path finder that, when `import wandb` is executed,
-loads the real wandb package and then monkey-patches it to dual-log to Pluto.
-Activation (needs both an API key and a project name):
-    API key (required):
-      - PLUTO_API_KEY: Pluto API token, OR
-      - WANDB_API_KEY as a fallback when DISABLE_WANDB_LOGGING=true
-        (user reuses the wandb env var to hold a Pluto token)
-    Project name (required):
-      - PLUTO_PROJECT, OR
-      - WANDB_PROJECT as a fallback (works in all modes)
-Optional:
-    - DISABLE_WANDB_LOGGING=true: Skip real wandb, log to Pluto only
-"""
-import importlib
-import logging
-import sys
-logger = logging.getLogger(__name__)
-_hook_installed = False
-class _PlutoWandbFinder:
-    """
-    Meta path finder that intercepts `import wandb` to apply dual-logging patches.
-    Uses find_module/load_module (not the newer find_spec/exec_module from PEP 451)
-    because the spec-based API doesn't cleanly support "load the real package, then
-    patch it" — exec_module runs on a partially-initialized module object, causing
-    circular import issues with wandb's internal imports.
-    On first `import wandb`, this finder:
-    1. Temporarily removes itself from sys.meta_path (to avoid recursion)
-    2. Loads the real wandb package via normal import machinery
-    3. Applies monkey-patches to wandb.init/wandb.log/etc. for dual-logging
-    4. Re-inserts itself (for future imports, though wandb is now cached in sys.modules)
-    """
-    _patching = False
-    def find_module(self, fullname, path=None):
-        # Only intercept top-level `import wandb`, and only once
-        if fullname == 'wandb' and not self._patching:
-            return self
-        return None
-    def load_module(self, fullname):
-        # If wandb is already in sys.modules, it's been loaded
-        if fullname in sys.modules:
-            return sys.modules[fullname]
-        # Prevent re-entrant calls
-        self._patching = True
-        try:
-            # Remove ourselves so the real import machinery finds the real wandb
-            sys.meta_path.remove(self)
-            try:
-                real_wandb = importlib.import_module('wandb')
-            finally:
-                # Always re-insert ourselves
-                sys.meta_path.insert(0, self)
-            # Apply the dual-logging patches
-            try:
-                from pluto.compat.wandb import apply_wandb_patches
-                apply_wandb_patches(real_wandb)
-                logger.info(
-                    'pluto._wandb_hook: Successfully patched wandb for dual-logging'
-                )
-            except Exception as e:
-                logger.warning(
-                    f'pluto._wandb_hook: Failed to apply wandb patches: {e}. '
-                    f'wandb will work normally without Pluto dual-logging.'
-                )
-            return real_wandb
-        finally:
-            self._patching = False
-def install():
-    """
-    Register the wandb import hook on sys.meta_path.
-    Activation requires:
-      - An API key: PLUTO_API_KEY (always), OR WANDB_API_KEY if
-        DISABLE_WANDB_LOGGING=true (migration shortcut — user reuses
-        the wandb env var to hold a Pluto token).
-      - A project name: PLUTO_PROJECT, OR WANDB_PROJECT as a fallback
-        (works in all modes; saves users from setting the same value
-        in two env vars).
-    PLUTO_API_KEY is the user's explicit opt-in signal — if it's not
-    set, the hook never activates even if WANDB_PROJECT is present.
-    This means wandb users who happen to have pluto-ml installed but
-    never set a Pluto API key see no behavior change.
-    Safe to call multiple times.
-    """
-    import os
-    global _hook_installed
-    if _hook_installed:
-        return
-    wandb_disabled = os.environ.get('DISABLE_WANDB_LOGGING', '').lower() in (
-        'true',
-        '1',
-        'yes',
-    )
-    # API key: PLUTO_API_KEY preferred; WANDB_API_KEY only in disabled mode.
-    have_api_key = bool(os.environ.get('PLUTO_API_KEY')) or (
-        wandb_disabled and bool(os.environ.get('WANDB_API_KEY'))
-    )
-    # Project name: PLUTO_PROJECT preferred; WANDB_PROJECT fallback always.
-    have_project = bool(os.environ.get('PLUTO_PROJECT')) or bool(
-        os.environ.get('WANDB_PROJECT')
-    )
-    if not (have_api_key and have_project):
-        return
-    # Don't install if wandb is already imported (too late to intercept)
-    if 'wandb' in sys.modules:
-        logger.warning(
-            'pluto._wandb_hook: wandb already imported before hook installation. '
-            'Attempting to patch existing wandb module.'
-        )
-        try:
-            from pluto.compat.wandb import apply_wandb_patches
-            apply_wandb_patches(sys.modules['wandb'])
-        except Exception as e:
-            logger.warning(
-                f'pluto._wandb_hook: Failed to patch already-imported wandb: {e}'
-            )
-        _hook_installed = True
-        return
-    # Install the finder
-    finder = _PlutoWandbFinder()
-    sys.meta_path.insert(0, finder)
-    _hook_installed = True
-def uninstall():
-    """Remove the wandb import hook (for testing)."""
-    global _hook_installed
-    sys.meta_path[:] = [
-        f for f in sys.meta_path if not isinstance(f, _PlutoWandbFinder)
-    ]
-    _hook_installed = False

pluto_ml-0.0.20/zzzz_pluto_wandb_hook.pth DELETED Viewed

	@@ -1 +0,0 @@
1	- import os; _dw=os.environ.get('DISABLE_WANDB_LOGGING','').lower() in ('true','1','yes'); _k=os.environ.get('PLUTO_API_KEY') or (_dw and os.environ.get('WANDB_API_KEY')); _p=os.environ.get('PLUTO_PROJECT') or os.environ.get('WANDB_PROJECT'); _k and _p and __import__('pluto._wandb_hook')._wandb_hook.install()