pluto-ml 0.0.20__tar.gz → 0.0.22__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. {pluto_ml-0.0.20 → pluto_ml-0.0.22}/PKG-INFO +1 -1
  2. {pluto_ml-0.0.20 → pluto_ml-0.0.22}/pluto/__init__.py +2 -2
  3. {pluto_ml-0.0.20 → pluto_ml-0.0.22}/pluto/__main__.py +16 -1
  4. pluto_ml-0.0.22/pluto/_wandb_hook.py +250 -0
  5. {pluto_ml-0.0.20 → pluto_ml-0.0.22}/pluto/auth.py +28 -0
  6. {pluto_ml-0.0.20 → pluto_ml-0.0.22}/pluto/compat/neptune.py +33 -16
  7. {pluto_ml-0.0.20 → pluto_ml-0.0.22}/pluto/compat/wandb.py +42 -27
  8. {pluto_ml-0.0.20 → pluto_ml-0.0.22}/pluto/iface.py +5 -1
  9. {pluto_ml-0.0.20 → pluto_ml-0.0.22}/pluto/log.py +53 -15
  10. {pluto_ml-0.0.20 → pluto_ml-0.0.22}/pluto/op.py +18 -11
  11. {pluto_ml-0.0.20 → pluto_ml-0.0.22}/pluto/sync/process.py +14 -0
  12. {pluto_ml-0.0.20 → pluto_ml-0.0.22}/pyproject.toml +1 -1
  13. pluto_ml-0.0.22/zzzz_pluto_wandb_hook.pth +1 -0
  14. pluto_ml-0.0.20/pluto/_wandb_hook.py +0 -160
  15. pluto_ml-0.0.20/zzzz_pluto_wandb_hook.pth +0 -1
  16. {pluto_ml-0.0.20 → pluto_ml-0.0.22}/LICENSE +0 -0
  17. {pluto_ml-0.0.20 → pluto_ml-0.0.22}/README.md +0 -0
  18. {pluto_ml-0.0.20 → pluto_ml-0.0.22}/mlop/__init__.py +0 -0
  19. {pluto_ml-0.0.20 → pluto_ml-0.0.22}/mlop/__main__.py +0 -0
  20. {pluto_ml-0.0.20 → pluto_ml-0.0.22}/mlop/compat/__init__.py +0 -0
  21. {pluto_ml-0.0.20 → pluto_ml-0.0.22}/mlop/compat/lightning.py +0 -0
  22. {pluto_ml-0.0.20 → pluto_ml-0.0.22}/mlop/compat/neptune.py +0 -0
  23. {pluto_ml-0.0.20 → pluto_ml-0.0.22}/mlop/compat/torch.py +0 -0
  24. {pluto_ml-0.0.20 → pluto_ml-0.0.22}/mlop/compat/transformers.py +0 -0
  25. {pluto_ml-0.0.20 → pluto_ml-0.0.22}/pluto/api.py +0 -0
  26. {pluto_ml-0.0.20 → pluto_ml-0.0.22}/pluto/compat/__init__.py +0 -0
  27. {pluto_ml-0.0.20 → pluto_ml-0.0.22}/pluto/compat/_utils.py +0 -0
  28. {pluto_ml-0.0.20 → pluto_ml-0.0.22}/pluto/compat/lightning.py +0 -0
  29. {pluto_ml-0.0.20 → pluto_ml-0.0.22}/pluto/compat/neptune_query/__init__.py +0 -0
  30. {pluto_ml-0.0.20 → pluto_ml-0.0.22}/pluto/compat/neptune_query/filters.py +0 -0
  31. {pluto_ml-0.0.20 → pluto_ml-0.0.22}/pluto/compat/neptune_query/runs.py +0 -0
  32. {pluto_ml-0.0.20 → pluto_ml-0.0.22}/pluto/compat/torch.py +0 -0
  33. {pluto_ml-0.0.20 → pluto_ml-0.0.22}/pluto/compat/transformers.py +0 -0
  34. {pluto_ml-0.0.20 → pluto_ml-0.0.22}/pluto/data.py +0 -0
  35. {pluto_ml-0.0.20 → pluto_ml-0.0.22}/pluto/file.py +0 -0
  36. {pluto_ml-0.0.20 → pluto_ml-0.0.22}/pluto/init.py +0 -0
  37. {pluto_ml-0.0.20 → pluto_ml-0.0.22}/pluto/query.py +0 -0
  38. {pluto_ml-0.0.20 → pluto_ml-0.0.22}/pluto/sanitize.py +0 -0
  39. {pluto_ml-0.0.20 → pluto_ml-0.0.22}/pluto/sentry.py +0 -0
  40. {pluto_ml-0.0.20 → pluto_ml-0.0.22}/pluto/sets.py +0 -0
  41. {pluto_ml-0.0.20 → pluto_ml-0.0.22}/pluto/store.py +0 -0
  42. {pluto_ml-0.0.20 → pluto_ml-0.0.22}/pluto/sync/__init__.py +0 -0
  43. {pluto_ml-0.0.20 → pluto_ml-0.0.22}/pluto/sync/__main__.py +0 -0
  44. {pluto_ml-0.0.20 → pluto_ml-0.0.22}/pluto/sync/retry.py +0 -0
  45. {pluto_ml-0.0.20 → pluto_ml-0.0.22}/pluto/sync/store.py +0 -0
  46. {pluto_ml-0.0.20 → pluto_ml-0.0.22}/pluto/sys.py +0 -0
  47. {pluto_ml-0.0.20 → pluto_ml-0.0.22}/pluto/util.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pluto-ml
3
- Version: 0.0.20
3
+ Version: 0.0.22
4
4
  Summary: Pluto ML - Machine Learning Operations Framework
5
5
  License-File: LICENSE
6
6
  Author: jqssun
@@ -41,11 +41,11 @@ __all__ = (
41
41
  'generate_run_id',
42
42
  )
43
43
 
44
- __version__ = '0.0.20'
44
+ __version__ = '0.0.22'
45
45
 
46
46
 
47
47
  # Replaced with the current commit when building the wheels.
48
- _PLUTO_COMMIT_SHA = 'f3600b5b85929f8f0dd32d9b1c291ff3c73d1800'
48
+ _PLUTO_COMMIT_SHA = '4587211b1c6ccebe92f92d1243bf9a213ec1f3dd'
49
49
 
50
50
 
51
51
  def _get_git_commit():
@@ -64,10 +64,25 @@ def _cmd_sync(args: argparse.Namespace) -> None:
64
64
  )
65
65
  sys.exit(1)
66
66
 
67
- # Build settings dict with URLs
67
+ # Build settings dict with URLs.
68
+ # Settings.to_dict() only iterates __annotations__, so the URLs
69
+ # populated by update_url() (url_num, url_message, url_file, ...) are
70
+ # NOT included. Without this every uploader call no-ops on
71
+ # `if not self.url_X: return`, the records get marked SUCCESS in the
72
+ # local DB, and nothing reaches the server. Restore them explicitly.
68
73
  settings = Settings()
69
74
  settings.update_host()
70
75
  settings_dict = settings.to_dict()
76
+ settings_dict.update(
77
+ {
78
+ 'url_num': settings.url_num,
79
+ 'url_data': settings.url_data,
80
+ 'url_file': settings.url_file,
81
+ 'url_message': settings.url_message,
82
+ 'url_update_config': settings.url_update_config,
83
+ 'url_update_tags': settings.url_update_tags,
84
+ }
85
+ )
71
86
  settings_dict['_auth'] = auth
72
87
 
73
88
  # Read run info from each database and populate settings
@@ -0,0 +1,250 @@
1
+ """
2
+ Import hook that intercepts `import wandb` to enable dual-logging to Pluto.
3
+
4
+ Loaded via a .pth file at Python startup. Registers a sys.meta_path finder
5
+ that, when `import wandb` is executed, loads the real wandb package and then
6
+ monkey-patches it to dual-log to Pluto.
7
+
8
+ Activation:
9
+ The hook itself installs unconditionally when pluto-ml is on the path —
10
+ installing the package is the user's opt-in signal. Whether the
11
+ patches actually fire is decided later, when `import wandb` runs:
12
+
13
+ - Credentials available (any of: PLUTO_API_KEY env var, WANDB_API_KEY
14
+ when DISABLE_WANDB_LOGGING=true, the marker written by `pluto login`,
15
+ or the keyring file written by `pluto login`) → patches applied,
16
+ wandb dual-logs to Pluto.
17
+ - No credentials → a one-time discoverability hint is logged
18
+ (pointing at `pluto login` / PLUTO_API_KEY) and wandb runs unpatched.
19
+
20
+ Project name is no longer required at install time; the runtime falls back
21
+ to the `project=` kwarg on wandb.init, then WANDB_PROJECT, then the resolved
22
+ wandb run's project attribute.
23
+ """
24
+
25
+ import importlib
26
+ import importlib.util
27
+ import logging
28
+ import os
29
+ import sys
30
+
31
+ logger = logging.getLogger(__name__)
32
+
33
+ _hook_installed = False
34
+ _hint_emitted = False
35
+
36
+ # Mirrors pluto.auth.LOGIN_MARKER_PATH. Duplicated as a literal here so this
37
+ # module stays import-free of the rest of pluto at .pth load time.
38
+ _LOGIN_MARKER_PATH = os.path.expanduser('~/.pluto/.login_ok')
39
+
40
+
41
+ def _keyring_cfg_path() -> str:
42
+ """
43
+ keyrings.alt.file.PlaintextKeyring storage location, mirrored from
44
+ keyring.util.platform_ so we don't have to import keyring at .pth load
45
+ time. macOS uses Keychain by default (the marker above covers Mac); this
46
+ only matters for Linux/Windows users on the file-based fallback.
47
+ """
48
+ if sys.platform == 'win32':
49
+ root = os.environ.get('LOCALAPPDATA') or os.environ.get('ProgramData') or '.'
50
+ return os.path.join(root, 'Python Keyring', 'keyring_pass.cfg')
51
+ base = os.environ.get('XDG_DATA_HOME') or os.path.expanduser('~/.local/share')
52
+ return os.path.join(base, 'python_keyring', 'keyring_pass.cfg')
53
+
54
+
55
+ def _keyring_cfg_has_pluto() -> bool:
56
+ """Backward compat: detect a `pluto login` done before the marker existed."""
57
+ path = _keyring_cfg_path()
58
+ if not os.path.exists(path):
59
+ return False
60
+ try:
61
+ import configparser
62
+
63
+ cp = configparser.RawConfigParser()
64
+ cp.read(path, encoding='utf-8')
65
+ return cp.has_section('pluto')
66
+ except Exception:
67
+ return False
68
+
69
+
70
+ def _has_pluto_credentials() -> bool:
71
+ """True if some Pluto auth source is available without prompting."""
72
+ if os.environ.get('PLUTO_API_KEY'):
73
+ return True
74
+ wandb_disabled = os.environ.get('DISABLE_WANDB_LOGGING', '').lower() in (
75
+ 'true',
76
+ '1',
77
+ 'yes',
78
+ )
79
+ if wandb_disabled and os.environ.get('WANDB_API_KEY'):
80
+ return True
81
+ if os.path.exists(_LOGIN_MARKER_PATH):
82
+ return True
83
+ if _keyring_cfg_has_pluto():
84
+ return True
85
+ return False
86
+
87
+
88
+ def _has_partial_pluto_signal() -> bool:
89
+ """True if the user set a Pluto env var but has no auth — partial config."""
90
+ return any(
91
+ os.environ.get(v)
92
+ for v in (
93
+ 'PLUTO_PROJECT',
94
+ 'PLUTO_URL_APP',
95
+ 'PLUTO_URL_API',
96
+ 'PLUTO_URL_INGEST',
97
+ )
98
+ )
99
+
100
+
101
+ def _emit_discoverability_hint() -> None:
102
+ """Log a one-time hint when wandb is imported but Pluto isn't activated."""
103
+ global _hint_emitted
104
+ if _hint_emitted:
105
+ return
106
+ _hint_emitted = True
107
+ if _has_partial_pluto_signal():
108
+ logger.warning(
109
+ 'pluto.compat.wandb: Pluto config detected but no API key found. '
110
+ 'Run `pluto login` (or set PLUTO_API_KEY) to enable dual-logging '
111
+ 'to Pluto. Continuing with wandb-only logging.'
112
+ )
113
+ else:
114
+ logger.warning(
115
+ 'pluto.compat.wandb: pluto-ml is installed but no Pluto credentials '
116
+ 'found. Run `pluto login` (or set PLUTO_API_KEY) to enable '
117
+ 'dual-logging to Pluto. Continuing with wandb-only logging.'
118
+ )
119
+
120
+
121
+ def _patch_or_hint(wandb_module) -> None:
122
+ """Apply dual-logging patches if creds present, else log discoverability hint."""
123
+ if _has_pluto_credentials():
124
+ try:
125
+ from pluto.compat.wandb import apply_wandb_patches
126
+
127
+ apply_wandb_patches(wandb_module)
128
+ logger.info(
129
+ 'pluto._wandb_hook: Successfully patched wandb for dual-logging'
130
+ )
131
+ except Exception as e:
132
+ logger.warning(
133
+ f'pluto._wandb_hook: Failed to apply wandb patches: {e}. '
134
+ f'wandb will work normally without Pluto dual-logging.'
135
+ )
136
+ else:
137
+ _emit_discoverability_hint()
138
+
139
+
140
+ class _PatchingLoader:
141
+ """
142
+ Wraps wandb's real loader so we can run dual-logging patches *after* the
143
+ real loader fully initializes the wandb module. This is the spec-based
144
+ equivalent of the old find_module/load_module approach — required because
145
+ Python 3.12 deprecated the legacy API and stopped reliably calling it
146
+ when only find_module is implemented.
147
+ """
148
+
149
+ def __init__(self, real_loader):
150
+ self._real_loader = real_loader
151
+
152
+ def create_module(self, spec):
153
+ if hasattr(self._real_loader, 'create_module'):
154
+ return self._real_loader.create_module(spec)
155
+ return None # default module creation
156
+
157
+ def exec_module(self, module):
158
+ # Let wandb's real loader populate the module first.
159
+ self._real_loader.exec_module(module)
160
+ # Now wandb is fully imported and `module is sys.modules['wandb']`,
161
+ # so monkey-patching `module.init` patches `wandb.init` for callers.
162
+ _patch_or_hint(module)
163
+
164
+
165
+ class _PlutoWandbFinder:
166
+ """
167
+ Meta path finder that intercepts `import wandb` to apply dual-logging patches.
168
+
169
+ Implements the modern `find_spec` API (Python 3.4+, required on 3.12+ where
170
+ legacy `find_module` is no longer reliably dispatched). We delegate to other
171
+ finders to locate the real wandb spec, then wrap its loader with
172
+ `_PatchingLoader` so our hook runs after wandb finishes importing.
173
+
174
+ The previous find_module/load_module implementation worked on 3.10/3.11 but
175
+ was silently bypassed on 3.12 — observed empirically as `_emit_discoverability_hint`
176
+ never being called and patches never applying.
177
+ """
178
+
179
+ _patching = False
180
+
181
+ def find_spec(self, fullname, path=None, target=None):
182
+ if fullname != 'wandb' or self._patching:
183
+ return None
184
+ # Re-enter the finder chain to find the real wandb spec without
185
+ # recursing into ourselves.
186
+ self._patching = True
187
+ try:
188
+ real_spec = importlib.util.find_spec('wandb')
189
+ finally:
190
+ self._patching = False
191
+ if real_spec is None or real_spec.loader is None:
192
+ return None
193
+ # Wrap the real loader so exec_module triggers our patches afterward.
194
+ real_spec.loader = _PatchingLoader(real_spec.loader)
195
+ return real_spec
196
+
197
+
198
+ def install():
199
+ """
200
+ Register the wandb import hook on sys.meta_path.
201
+
202
+ Always installs the finder when called — credential resolution is
203
+ deferred until `import wandb` actually runs (see _PlutoWandbFinder).
204
+ This ensures users who run `pluto login` after Python starts (or who
205
+ pass `project=` only as a kwarg) still get dual-logging, and that
206
+ users with no Pluto config see a discoverability hint instead of
207
+ silent inactivity.
208
+
209
+ Safe to call multiple times.
210
+ """
211
+ global _hook_installed
212
+
213
+ if _hook_installed:
214
+ return
215
+
216
+ # If wandb is already imported, the finder is too late. Try to patch in
217
+ # place if credentials are available; otherwise log the hint.
218
+ if 'wandb' in sys.modules:
219
+ if _has_pluto_credentials():
220
+ logger.warning(
221
+ 'pluto._wandb_hook: wandb already imported before hook '
222
+ 'installation. Attempting to patch existing wandb module.'
223
+ )
224
+ try:
225
+ from pluto.compat.wandb import apply_wandb_patches
226
+
227
+ apply_wandb_patches(sys.modules['wandb'])
228
+ except Exception as e:
229
+ logger.warning(
230
+ f'pluto._wandb_hook: Failed to patch already-imported '
231
+ f'wandb: {e}'
232
+ )
233
+ else:
234
+ _emit_discoverability_hint()
235
+ _hook_installed = True
236
+ return
237
+
238
+ finder = _PlutoWandbFinder()
239
+ sys.meta_path.insert(0, finder)
240
+ _hook_installed = True
241
+
242
+
243
+ def uninstall():
244
+ """Remove the wandb import hook (for testing)."""
245
+ global _hook_installed, _hint_emitted
246
+ sys.meta_path[:] = [
247
+ f for f in sys.meta_path if not isinstance(f, _PlutoWandbFinder)
248
+ ]
249
+ _hook_installed = False
250
+ _hint_emitted = False
@@ -1,5 +1,6 @@
1
1
  import getpass
2
2
  import logging
3
+ import os
3
4
  import sys
4
5
  import webbrowser
5
6
 
@@ -13,6 +14,30 @@ from .util import ANSI, import_lib, print_url
13
14
  tlogger = logging.getLogger('auth')
14
15
  tag = 'Authentication'
15
16
 
17
+ # Marker file written after a successful `pluto login`. The wandb compat
18
+ # layer's import hook (pluto/_wandb_hook.py) checks for this so a user who
19
+ # has only run `pluto login` (no PLUTO_API_KEY env var) still gets dual-
20
+ # logging activated. Stat-only check; never read.
21
+ LOGIN_MARKER_PATH = os.path.expanduser('~/.pluto/.login_ok')
22
+
23
+
24
+ def _write_login_marker() -> None:
25
+ try:
26
+ os.makedirs(os.path.dirname(LOGIN_MARKER_PATH), exist_ok=True)
27
+ with open(LOGIN_MARKER_PATH, 'w'):
28
+ pass
29
+ except OSError as e:
30
+ tlogger.debug('%s: failed to write login marker: %s', tag, e)
31
+
32
+
33
+ def _remove_login_marker() -> None:
34
+ try:
35
+ os.remove(LOGIN_MARKER_PATH)
36
+ except FileNotFoundError:
37
+ pass
38
+ except OSError as e:
39
+ tlogger.debug('%s: failed to remove login marker: %s', tag, e)
40
+
16
41
 
17
42
  def login(settings=None, retry=False):
18
43
  settings = setup(settings)
@@ -56,6 +81,7 @@ def login(settings=None, retry=False):
56
81
  body = r.json()
57
82
  tlogger.info(f'{tag}: logged in as {body["organization"]["slug"]}')
58
83
  keyring.set_password(f'{settings.tag}', f'{settings.tag}', f'{settings._auth}')
84
+ _write_login_marker()
59
85
  teardown_logger(tlogger)
60
86
  except Exception as e:
61
87
  # If _auth was already provided (e.g. via env var or keyring), don't
@@ -103,6 +129,7 @@ def login(settings=None, retry=False):
103
129
  keyring.set_password(
104
130
  f'{settings.tag}', f'{settings.tag}', f'{settings._auth}'
105
131
  )
132
+ _write_login_marker()
106
133
  except Exception as e:
107
134
  tlogger.critical(
108
135
  '%s: failed to save key to system keyring service: %s', tag, e
@@ -124,5 +151,6 @@ def logout(settings=None):
124
151
  tlogger.warning(
125
152
  '%s: failed to delete key from system keyring service: %s', tag, e
126
153
  )
154
+ _remove_login_marker()
127
155
  tlogger.info(f'{tag}: logged out')
128
156
  teardown_logger(tlogger)
@@ -305,8 +305,18 @@ class NeptuneRunWrapper:
305
305
  else None
306
306
  )
307
307
 
308
- # Determine if this is an intentional resume (explicit run_id kwarg)
309
- pluto_resume = bool(explicit_kwarg_run_id)
308
+ # Determine if this is an intentional resume.
309
+ # - explicit kwarg run_id: user-provided, e.g. for restarting a run.
310
+ # - PLUTO_RUN_ID env var: cross-rank coordination signal for DDP.
311
+ # Without resume=True for the env-var path, ranks 1+ call
312
+ # pluto.init with the same externalId, the server returns
313
+ # resumed=True, and op.py raises "Run with externalId X already
314
+ # exists". The exception is caught by our broad except below
315
+ # and silently sets self._pluto_run = None — so only rank 0
316
+ # installs the console-capture ConsoleHandler and only rank 0
317
+ # logs ever reach the run. The user sees "all rank 1+ logs
318
+ # missing in the UI" with no obvious error in stdout.
319
+ pluto_resume = bool(explicit_kwarg_run_id) or bool(env_run_id)
310
320
 
311
321
  # Apply precedence: PLUTO_RUN_ID > explicit kwarg > Neptune auto
312
322
  run_id = env_run_id or explicit_kwarg_run_id or neptune_run_id
@@ -565,7 +575,7 @@ class NeptuneRunWrapper:
565
575
  )
566
576
  pluto_files[key] = pluto_file
567
577
  pluto_type = type(pluto_file).__name__
568
- logger.info(
578
+ logger.debug(
569
579
  f'pluto.compat.neptune: Converted file {key} '
570
580
  f'to {pluto_type}'
571
581
  )
@@ -576,7 +586,7 @@ class NeptuneRunWrapper:
576
586
 
577
587
  if pluto_files:
578
588
  self._pluto_run.log(pluto_files)
579
- logger.info(
589
+ logger.debug(
580
590
  f'pluto.compat.neptune: Logged {len(pluto_files)} files '
581
591
  f'to pluto'
582
592
  )
@@ -609,7 +619,7 @@ class NeptuneRunWrapper:
609
619
  )
610
620
  pluto_files[key] = pluto_file
611
621
  pluto_type = type(pluto_file).__name__
612
- logger.info(
622
+ logger.debug(
613
623
  f'pluto.compat.neptune: Converted {key} at step '
614
624
  f'{step} to {pluto_type}'
615
625
  )
@@ -620,7 +630,7 @@ class NeptuneRunWrapper:
620
630
 
621
631
  if pluto_files:
622
632
  self._pluto_run.log(pluto_files, step=step)
623
- logger.info(
633
+ logger.debug(
624
634
  f'pluto.compat.neptune: Logged {len(pluto_files)} files '
625
635
  f'to pluto at step {step}'
626
636
  )
@@ -708,10 +718,19 @@ class NeptuneRunWrapper:
708
718
 
709
719
  def close(self, **kwargs):
710
720
  """
711
- Close both Neptune and pluto runs.
712
-
713
- Pluto cleanup uses a timeout to ensure it never blocks Neptune's close.
714
- Neptune's close() is always called, preserving exact Neptune behavior.
721
+ Close the Neptune run. Pluto is left running.
722
+
723
+ Some Neptune callers notably Lightning's ``NeptuneLogger.finalize`` —
724
+ invoke ``close()`` from inside Trainer's exception path (e.g. on a CUDA
725
+ OOM). Tearing pluto down here would lose any output emitted during
726
+ framework cleanup, including the traceback that triggered finalize in
727
+ the first place. We instead leave the pluto run alive; it is finalised
728
+ by:
729
+ * ``terminate()`` — explicit force-quit by the caller
730
+ * ``_atexit_cleanup_pluto`` — interpreter shutdown (with timeout)
731
+ ``sys.excepthook`` (registered in ``Op.__init__``) marks the run
732
+ FAILED before atexit fires when an exception propagates, so the
733
+ eventual status is correct without the close path doing it.
715
734
  """
716
735
  with self._close_lock:
717
736
  if self._closed:
@@ -721,8 +740,7 @@ class NeptuneRunWrapper:
721
740
  return None
722
741
  self._closed = True
723
742
 
724
- # Close pluto first with timeout (non-blocking, silent failure)
725
- self._finish_pluto_with_timeout(timeout=self._PLUTO_CLEANUP_TIMEOUT_SECONDS)
743
+ # Deliberately do NOT touch pluto here see docstring.
726
744
 
727
745
  # Close Neptune (unless disabled) - this is the critical path
728
746
  if not self._neptune_disabled:
@@ -877,14 +895,13 @@ class NeptuneRunWrapper:
877
895
  """
878
896
  Support context manager protocol.
879
897
 
880
- Pluto cleanup uses a timeout to ensure it never blocks Neptune's __exit__.
881
- Neptune's __exit__ is always called, preserving exact Neptune behavior.
898
+ Same rationale as :meth:`close`: pluto is left alive and finalised
899
+ via the atexit/excepthook path, not here.
882
900
  """
883
901
  with self._close_lock:
884
902
  self._closed = True
885
903
 
886
- # Finish pluto with timeout (non-blocking, silent failure)
887
- self._finish_pluto_with_timeout(timeout=self._PLUTO_CLEANUP_TIMEOUT_SECONDS)
904
+ # Deliberately do NOT touch pluto here see close() docstring.
888
905
 
889
906
  if self._neptune_disabled:
890
907
  return False
@@ -4,22 +4,28 @@ Wandb-to-Pluto compatibility layer for seamless dual-logging.
4
4
  This module monkey-patches wandb.init() so that every wandb Run also logs
5
5
  to Pluto. It can be activated in two ways:
6
6
 
7
- 1. Automatic (zero code changes): Set PLUTO_PROJECT + PLUTO_API_KEY env vars
8
- and pip install pluto-ml. The .pth file triggers the import hook which
9
- calls apply_wandb_patches().
7
+ 1. Automatic (zero code changes): pip install pluto-ml. The .pth file
8
+ triggers the import hook which calls apply_wandb_patches() once Pluto
9
+ credentials are available (see Configuration below).
10
10
 
11
11
  2. Explicit import: `import pluto.compat.wandb` at the top of your script.
12
12
  This patches wandb directly (like the Neptune compat layer).
13
13
 
14
14
  Configuration:
15
- Required:
16
- - PLUTO_API_KEY: Pluto API token (always required). In
17
- DISABLE_WANDB_LOGGING=true mode, WANDB_API_KEY may be reused
18
- instead.
19
- - A project name: PLUTO_PROJECT if set, otherwise WANDB_PROJECT
20
- is used as a fallback. This means if you already have
21
- WANDB_PROJECT set for wandb, you don't need to set
22
- PLUTO_PROJECT separately.
15
+ Authentication (one of the following):
16
+ - Run `pluto login` to store a token in the system keyring.
17
+ - Set PLUTO_API_KEY (Pluto API token).
18
+ - In DISABLE_WANDB_LOGGING=true mode only, WANDB_API_KEY may be
19
+ reused as the Pluto token (migration shortcut).
20
+
21
+ Project name (one of the following, checked in order):
22
+ - PLUTO_PROJECT env var
23
+ - the `project` kwarg passed to wandb.init()
24
+ - WANDB_PROJECT env var
25
+ - the project attribute on the resolved wandb run
26
+ If you already pass project= to wandb.init() (or via a framework
27
+ wrapper like Lightning's WandbLogger) or have WANDB_PROJECT set,
28
+ you don't need to set PLUTO_PROJECT separately.
23
29
 
24
30
  Optional:
25
31
  - PLUTO_URL_APP: Pluto app URL (for self-hosted)
@@ -29,7 +35,8 @@ Configuration:
29
35
 
30
36
  Hard Requirements:
31
37
  - MUST NOT break existing wandb functionality under ANY condition
32
- - If Pluto is down/misconfigured, silently continue with wandb only
38
+ - If Pluto is down/misconfigured, log a warning and continue with
39
+ wandb only — never raise.
33
40
  - Zero impact on wandb's behavior, return values, or exceptions
34
41
  """
35
42
 
@@ -622,19 +629,26 @@ def _make_patched_init(original_init, wandb_module):
622
629
  pluto_config = _get_pluto_config_from_env()
623
630
 
624
631
  # Project name fallback (works in ALL modes): if PLUTO_PROJECT
625
- # isn't set, use WANDB_PROJECT. This makes PLUTO_PROJECT fully
626
- # optional users who already have WANDB_PROJECT from their
627
- # existing wandb setup don't have to duplicate it.
632
+ # isn't set, fall back to (in order): the explicit `project`
633
+ # kwarg passed to wandb.init(), the `WANDB_PROJECT` env var,
634
+ # or finally the project attribute on the resolved wandb run.
635
+ # This makes PLUTO_PROJECT fully optional — frameworks like
636
+ # Lightning's WandbLogger pass project as a kwarg and may never
637
+ # set WANDB_PROJECT, so kwargs must be consulted too.
628
638
  #
629
639
  # If pluto_config is None here, it means PLUTO_PROJECT wasn't set
630
640
  # (that's the only reason _get_pluto_config_from_env returns None).
631
- # We build a fresh config from WANDB_PROJECT and re-read the
632
- # other PLUTO_* env vars (api key, urls) since the helper bailed
633
- # before reading them.
641
+ # We build a fresh config from the resolved project and re-read
642
+ # the other PLUTO_* env vars (api key, urls) since the helper
643
+ # bailed before reading them.
634
644
  if pluto_config is None:
635
- wandb_project = os.environ.get('WANDB_PROJECT')
636
- if wandb_project:
637
- pluto_config = {'project': wandb_project}
645
+ resolved_project = (
646
+ kwargs.get('project')
647
+ or os.environ.get('WANDB_PROJECT')
648
+ or getattr(wandb_run, 'project', None)
649
+ )
650
+ if resolved_project:
651
+ pluto_config = {'project': resolved_project}
638
652
  if api_key := os.environ.get('PLUTO_API_KEY'):
639
653
  pluto_config['api_key'] = api_key
640
654
  for env_var, cfg_key in (
@@ -645,8 +659,8 @@ def _make_patched_init(original_init, wandb_module):
645
659
  if v := os.environ.get(env_var):
646
660
  pluto_config[cfg_key] = v
647
661
  logger.info(
648
- 'pluto.compat.wandb: using WANDB_PROJECT as Pluto project '
649
- '(PLUTO_PROJECT not set)'
662
+ f'pluto.compat.wandb: using "{resolved_project}" as Pluto '
663
+ f'project (PLUTO_PROJECT not set)'
650
664
  )
651
665
 
652
666
  # Migration shortcut (disabled-mode only): in DISABLE_WANDB_LOGGING
@@ -662,10 +676,11 @@ def _make_patched_init(original_init, wandb_module):
662
676
  )
663
677
 
664
678
  if pluto_config is None:
665
- logger.info(
666
- 'pluto.compat.wandb: no project name available '
667
- '(set PLUTO_PROJECT or WANDB_PROJECT), '
668
- 'continuing with wandb-only logging'
679
+ logger.warning(
680
+ 'pluto.compat.wandb: cannot dual-log to Pluto — no project '
681
+ 'name resolvable (none of: PLUTO_PROJECT, project= kwarg, '
682
+ 'WANDB_PROJECT, wandb run project). Continuing with wandb-'
683
+ 'only logging.'
669
684
  )
670
685
  return wandb_run
671
686
 
@@ -299,7 +299,11 @@ class ServerInterface:
299
299
  status_code = r.status_code if r else 'N/A'
300
300
  target = len(drained) if drained else 'request'
301
301
  response = r.text if r else 'N/A'
302
- logger.warning(
302
+ # High-frequency endpoints (the trigger/heartbeat that fires
303
+ # every ~4 s) set suppress_httpx_logs; route their non-200
304
+ # responses to DEBUG so a flaky server doesn't spam WARNING.
305
+ log_response = logger.debug if suppress_httpx_logs else logger.warning
306
+ log_response(
303
307
  '%s: %s: attempt %s/%s: response code %s for %s from %s: %s',
304
308
  tag,
305
309
  name,
@@ -1,5 +1,6 @@
1
1
  import builtins
2
2
  import logging
3
+ import os
3
4
  import sys
4
5
  import time
5
6
 
@@ -65,6 +66,21 @@ class ConsoleHandler:
65
66
  self.sanitizer = sanitizer
66
67
  self._log_buffer: list = []
67
68
  self._last_flush = 0.0
69
+ # Carry-over for partial writes that don't end at a line boundary.
70
+ # Python's traceback printer (and rich) call write() with chunks
71
+ # like just whitespace or a single character, so treating each
72
+ # call as a complete line shreds tracebacks into one-char "lines".
73
+ self._partial_line: str = ''
74
+ # When running under torchrun, prepend the rank to captured lines
75
+ # so the Pluto UI can distinguish rank N from rank M without the
76
+ # user having to wrap every print() themselves. RANK is set by
77
+ # torchrun in every child process; absent → no prefix (so single-
78
+ # process and non-torch jobs keep their existing log format).
79
+ # Note: only the captured copy is prefixed, not the pass-through
80
+ # to self.stream — that lets torchrun add its own [defaultN]:
81
+ # prefix to the terminal stream without double-prefixing.
82
+ rank = os.environ.get('RANK')
83
+ self._rank_prefix = f'[rank{rank}] ' if rank is not None else ''
68
84
 
69
85
  def _flush_log_buffer(self) -> None:
70
86
  """Flush buffered console log lines to the sync store in one batch."""
@@ -78,30 +94,52 @@ class ConsoleHandler:
78
94
  self._log_buffer.clear()
79
95
  self._last_flush = time.time()
80
96
 
97
+ def _emit_line(self, line: str) -> None:
98
+ """Log one complete line through the sync buffer + the python logger."""
99
+ if not line: # do not log empty lines
100
+ return
101
+ self.count += 1
102
+ timestamp_ms = int(time.time() * 1000)
103
+ if self._rank_prefix:
104
+ line = self._rank_prefix + line
105
+ if self.sync_manager is not None:
106
+ sanitized_line = self.sanitizer.sanitize(line) if self.sanitizer else line
107
+ log_type = logging._levelToName.get(self.level, 'INFO')
108
+ self._log_buffer.append(
109
+ (sanitized_line, log_type, timestamp_ms, self.count)
110
+ )
111
+ self.logger.log(self.level, line)
112
+
81
113
  def write(self, buf: str) -> None:
82
- for line in buf.splitlines():
83
- if line: # do not log empty lines
84
- self.count += 1
85
- timestamp_ms = int(time.time() * 1000)
86
- if self.sync_manager is not None:
87
- sanitized_line = (
88
- self.sanitizer.sanitize(line) if self.sanitizer else line
89
- )
90
- log_type = logging._levelToName.get(self.level, 'INFO')
91
- self._log_buffer.append(
92
- (sanitized_line, log_type, timestamp_ms, self.count)
93
- )
94
- self.logger.log(self.level, line)
114
+ # Pass-through to the real stream first so terminal output is not
115
+ # delayed by our line buffering.
116
+ self.stream.write(buf)
117
+ self.stream.flush()
118
+
119
+ # Accumulate partial writes and only emit on real '\n' boundaries.
120
+ # Splitting on '\n' specifically (not splitlines()) avoids breaking
121
+ # on \v, \f, \x1c-\x1e, \x85, U+2028, U+2029 — chars that rich and
122
+ # other styled-output libs use as internal segment separators.
123
+ self._partial_line += buf
124
+ if '\n' not in self._partial_line:
125
+ return
126
+ *complete, self._partial_line = self._partial_line.split('\n')
127
+ for line in complete:
128
+ self._emit_line(line)
95
129
  # Flush the buffer if it's large enough or old enough
96
130
  if self._log_buffer and (
97
131
  len(self._log_buffer) >= self._FLUSH_SIZE
98
132
  or time.time() - self._last_flush >= self._FLUSH_INTERVAL
99
133
  ):
100
134
  self._flush_log_buffer()
101
- self.stream.write(buf)
102
- self.stream.flush()
103
135
 
104
136
  def flush(self):
137
+ # Emit any trailing partial line so we don't drop output that
138
+ # never got a terminating newline (e.g. a final print(..., end='')
139
+ # before interpreter shutdown).
140
+ if self._partial_line:
141
+ self._emit_line(self._partial_line)
142
+ self._partial_line = ''
105
143
  self._flush_log_buffer()
106
144
  self.stream.flush()
107
145
 
@@ -777,17 +777,24 @@ class Op:
777
777
  },
778
778
  )
779
779
  logger.critical('%s: interrupted %s', tag, e)
780
- _sentry.flush()
781
- logger.debug(f'{tag}: finished' if update_status else f'{tag}: closed')
782
- teardown_logger(logger, console=logging.getLogger('console'))
783
-
784
- self.settings.meta = []
785
- if pluto.ops is not None:
786
- pluto.ops = [
787
- op for op in pluto.ops if op.settings._op_id != self.settings._op_id
788
- ] # TODO: make more efficient
789
- if not pluto.ops:
790
- _unregister_excepthook()
780
+ # Re-raise user-initiated termination so the process actually
781
+ # exits as the user expects. Post-cleanup (sentry flush,
782
+ # teardown_logger, pluto.ops mutation) still runs via the
783
+ # finally block below.
784
+ if isinstance(e, KeyboardInterrupt):
785
+ raise
786
+ finally:
787
+ _sentry.flush()
788
+ logger.debug(f'{tag}: finished' if update_status else f'{tag}: closed')
789
+ teardown_logger(logger, console=logging.getLogger('console'))
790
+
791
+ self.settings.meta = []
792
+ if pluto.ops is not None:
793
+ pluto.ops = [
794
+ op for op in pluto.ops if op.settings._op_id != self.settings._op_id
795
+ ] # TODO: make more efficient
796
+ if not pluto.ops:
797
+ _unregister_excepthook()
791
798
 
792
799
  def watch(self, module, **kwargs):
793
800
  from .compat.torch import _watch_torch
@@ -567,6 +567,20 @@ def _sync_main(
567
567
  except Exception as e:
568
568
  log.error(f'Sync process error: {e}', exc_info=True)
569
569
  finally:
570
+ # If we exited the loop because of SIGTERM/SIGINT (vs an exception),
571
+ # drain pending records before tearing down. torchrun gives ~30s
572
+ # before SIGKILL and pluto's shutdown_timeout defaults to 30s, so
573
+ # we have time — without this any records still in SQLite are left
574
+ # behind and require a manual `pluto sync` to recover.
575
+ if shutdown_requested['value']:
576
+ log.info(
577
+ f'Shutdown signal received, draining pending records '
578
+ f'(up to {shutdown_timeout}s)'
579
+ )
580
+ try:
581
+ _flush_remaining(store, uploader, log, shutdown_timeout, max_retries)
582
+ except Exception as drain_err:
583
+ log.warning(f'Drain on shutdown failed: {drain_err}')
570
584
  uploader.close()
571
585
  store.close()
572
586
  log.info('Sync process exiting')
@@ -1,6 +1,6 @@
1
1
  [tool.poetry]
2
2
  name = "pluto-ml"
3
- version = "0.0.20"
3
+ version = "0.0.22"
4
4
  description = "Pluto ML - Machine Learning Operations Framework"
5
5
  packages = [
6
6
  {include = "pluto"},
@@ -0,0 +1 @@
1
+ import pluto._wandb_hook; pluto._wandb_hook.install()
@@ -1,160 +0,0 @@
1
- """
2
- Import hook that intercepts `import wandb` to enable dual-logging to Pluto.
3
-
4
- This module is designed to be loaded via a .pth file at Python startup.
5
- It registers a sys.meta_path finder that, when `import wandb` is executed,
6
- loads the real wandb package and then monkey-patches it to dual-log to Pluto.
7
-
8
- Activation (needs both an API key and a project name):
9
- API key (required):
10
- - PLUTO_API_KEY: Pluto API token, OR
11
- - WANDB_API_KEY as a fallback when DISABLE_WANDB_LOGGING=true
12
- (user reuses the wandb env var to hold a Pluto token)
13
- Project name (required):
14
- - PLUTO_PROJECT, OR
15
- - WANDB_PROJECT as a fallback (works in all modes)
16
-
17
- Optional:
18
- - DISABLE_WANDB_LOGGING=true: Skip real wandb, log to Pluto only
19
- """
20
-
21
- import importlib
22
- import logging
23
- import sys
24
-
25
- logger = logging.getLogger(__name__)
26
-
27
- _hook_installed = False
28
-
29
-
30
- class _PlutoWandbFinder:
31
- """
32
- Meta path finder that intercepts `import wandb` to apply dual-logging patches.
33
-
34
- Uses find_module/load_module (not the newer find_spec/exec_module from PEP 451)
35
- because the spec-based API doesn't cleanly support "load the real package, then
36
- patch it" — exec_module runs on a partially-initialized module object, causing
37
- circular import issues with wandb's internal imports.
38
-
39
- On first `import wandb`, this finder:
40
- 1. Temporarily removes itself from sys.meta_path (to avoid recursion)
41
- 2. Loads the real wandb package via normal import machinery
42
- 3. Applies monkey-patches to wandb.init/wandb.log/etc. for dual-logging
43
- 4. Re-inserts itself (for future imports, though wandb is now cached in sys.modules)
44
- """
45
-
46
- _patching = False
47
-
48
- def find_module(self, fullname, path=None):
49
- # Only intercept top-level `import wandb`, and only once
50
- if fullname == 'wandb' and not self._patching:
51
- return self
52
- return None
53
-
54
- def load_module(self, fullname):
55
- # If wandb is already in sys.modules, it's been loaded
56
- if fullname in sys.modules:
57
- return sys.modules[fullname]
58
-
59
- # Prevent re-entrant calls
60
- self._patching = True
61
- try:
62
- # Remove ourselves so the real import machinery finds the real wandb
63
- sys.meta_path.remove(self)
64
- try:
65
- real_wandb = importlib.import_module('wandb')
66
- finally:
67
- # Always re-insert ourselves
68
- sys.meta_path.insert(0, self)
69
-
70
- # Apply the dual-logging patches
71
- try:
72
- from pluto.compat.wandb import apply_wandb_patches
73
-
74
- apply_wandb_patches(real_wandb)
75
- logger.info(
76
- 'pluto._wandb_hook: Successfully patched wandb for dual-logging'
77
- )
78
- except Exception as e:
79
- logger.warning(
80
- f'pluto._wandb_hook: Failed to apply wandb patches: {e}. '
81
- f'wandb will work normally without Pluto dual-logging.'
82
- )
83
-
84
- return real_wandb
85
- finally:
86
- self._patching = False
87
-
88
-
89
- def install():
90
- """
91
- Register the wandb import hook on sys.meta_path.
92
-
93
- Activation requires:
94
- - An API key: PLUTO_API_KEY (always), OR WANDB_API_KEY if
95
- DISABLE_WANDB_LOGGING=true (migration shortcut — user reuses
96
- the wandb env var to hold a Pluto token).
97
- - A project name: PLUTO_PROJECT, OR WANDB_PROJECT as a fallback
98
- (works in all modes; saves users from setting the same value
99
- in two env vars).
100
-
101
- PLUTO_API_KEY is the user's explicit opt-in signal — if it's not
102
- set, the hook never activates even if WANDB_PROJECT is present.
103
- This means wandb users who happen to have pluto-ml installed but
104
- never set a Pluto API key see no behavior change.
105
-
106
- Safe to call multiple times.
107
- """
108
- import os
109
-
110
- global _hook_installed
111
-
112
- if _hook_installed:
113
- return
114
-
115
- wandb_disabled = os.environ.get('DISABLE_WANDB_LOGGING', '').lower() in (
116
- 'true',
117
- '1',
118
- 'yes',
119
- )
120
- # API key: PLUTO_API_KEY preferred; WANDB_API_KEY only in disabled mode.
121
- have_api_key = bool(os.environ.get('PLUTO_API_KEY')) or (
122
- wandb_disabled and bool(os.environ.get('WANDB_API_KEY'))
123
- )
124
- # Project name: PLUTO_PROJECT preferred; WANDB_PROJECT fallback always.
125
- have_project = bool(os.environ.get('PLUTO_PROJECT')) or bool(
126
- os.environ.get('WANDB_PROJECT')
127
- )
128
- if not (have_api_key and have_project):
129
- return
130
-
131
- # Don't install if wandb is already imported (too late to intercept)
132
- if 'wandb' in sys.modules:
133
- logger.warning(
134
- 'pluto._wandb_hook: wandb already imported before hook installation. '
135
- 'Attempting to patch existing wandb module.'
136
- )
137
- try:
138
- from pluto.compat.wandb import apply_wandb_patches
139
-
140
- apply_wandb_patches(sys.modules['wandb'])
141
- except Exception as e:
142
- logger.warning(
143
- f'pluto._wandb_hook: Failed to patch already-imported wandb: {e}'
144
- )
145
- _hook_installed = True
146
- return
147
-
148
- # Install the finder
149
- finder = _PlutoWandbFinder()
150
- sys.meta_path.insert(0, finder)
151
- _hook_installed = True
152
-
153
-
154
- def uninstall():
155
- """Remove the wandb import hook (for testing)."""
156
- global _hook_installed
157
- sys.meta_path[:] = [
158
- f for f in sys.meta_path if not isinstance(f, _PlutoWandbFinder)
159
- ]
160
- _hook_installed = False
@@ -1 +0,0 @@
1
- import os; _dw=os.environ.get('DISABLE_WANDB_LOGGING','').lower() in ('true','1','yes'); _k=os.environ.get('PLUTO_API_KEY') or (_dw and os.environ.get('WANDB_API_KEY')); _p=os.environ.get('PLUTO_PROJECT') or os.environ.get('WANDB_PROJECT'); _k and _p and __import__('pluto._wandb_hook')._wandb_hook.install()
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes