pubrun 0.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pubrun/__init__.py ADDED
@@ -0,0 +1,305 @@
1
+ """
2
+ pubrun - Zero-dependency Python execution provenance and telemetry capture.
3
+
4
+ Usage
5
+ -----
6
+ 1. Auto-start (default)::
7
+
8
+ import pubrun # Tracking begins on import.
9
+
10
+ 2. Explicit control::
11
+
12
+ import pubrun
13
+
14
+ tracker = pubrun.start(profile="deep")
15
+ pubrun.annotate("loading_datasets", batches=400, mode="lazy")
16
+
17
+ with pubrun.phase("gradient_descent"):
18
+ train_model()
19
+
20
+ tracker.stop()
21
+
22
+ 3. Decorator::
23
+
24
+ @pubrun.audit_run(profile="basic")
25
+ def evaluate_node():
26
+ ...
27
+ """
28
+ import logging
29
+ import sys
30
+ from typing import Any, Callable, Optional
31
+
32
+ from pubrun.tracker import Run, get_current_run
33
+
34
+ # -- Metadata ----------------------------------------------------------------
35
+ try:
36
+ if sys.version_info >= (3, 8):
37
+ from importlib.metadata import version as _pkg_version
38
+ else:
39
+ from importlib_metadata import version as _pkg_version
40
+ __version__ = _pkg_version("pubrun")
41
+ except Exception:
42
+ __version__ = "0.1.1" # fallback for editable installs / dev
43
+
44
+ __author__ = "Gabriele Fariello"
45
+ __license__ = "BSD-3-Clause"
46
+ __copyright__ = "Copyright 2026 Gabriele Fariello"
47
+ __credit__ = __author__ # backward-compatible alias
48
+
49
+ __all__ = [
50
+ "start",
51
+ "stop",
52
+ "annotate",
53
+ "phase",
54
+ "diff",
55
+ "audit_run",
56
+ "tracked_run",
57
+ "get_current_run",
58
+ "__version__",
59
+ ]
60
+
61
+
62
+ def _handle_inactive(context: str) -> None:
63
+ """Check the on_inactive_annotate policy and raise/warn/ignore accordingly.
64
+
65
+ Called when annotate() or phase() is used with no active run.
66
+
67
+ Args:
68
+ context: Human-readable label for the error/warning message
69
+ (e.g. "pubrun.annotate()" or "pubrun.phase('training')").
70
+ """
71
+ from pubrun.config import resolve_config
72
+ action = resolve_config().get("events", {}).get("on_inactive_annotate", "ignore")
73
+ if action == "error":
74
+ raise RuntimeError(f"{context} called but no run is active.")
75
+ elif action == "warn":
76
+ logging.getLogger("pubrun").warning(f"{context} dropped: No active run.")
77
+
78
+
79
+ def annotate(message: Optional[str] = None, **kwargs: Any) -> None:
80
+ """Inject an annotation event into the active event stream.
81
+
82
+ If a run is active, the message and keyword arguments are written to
83
+ ``events.jsonl``. If no run is active, behavior depends on the
84
+ ``[events].on_inactive_annotate`` config key ("ignore", "warn", or "error").
85
+
86
+ Args:
87
+ message: Optional human-readable label for the annotation.
88
+ **kwargs: Arbitrary JSON-serializable key-value pairs.
89
+
90
+ Example:
91
+ >>> pubrun.annotate("Starting GPU Allocation")
92
+ >>> pubrun.annotate("Model Configured", layers=50, optimizer="adamw")
93
+ """
94
+ current_run = get_current_run()
95
+ if current_run and getattr(current_run, "event_stream", None):
96
+ payload = kwargs.copy()
97
+ current_run.event_stream.emit("annotation", name=message, payload=payload)
98
+ else:
99
+ _handle_inactive("pubrun.annotate()")
100
+
101
+
102
+ def start(**kwargs: Any) -> Run:
103
+ """Start tracking a new execution context.
104
+
105
+ Creates a unique run directory and initializes all configured capture
106
+ engines. Not needed when ``auto_start = true`` (the default).
107
+
108
+ If a run is already active, increments its reference count and merges
109
+ the new overrides into the existing configuration.
110
+
111
+ Args:
112
+ **kwargs: Configuration overrides (same keys as ``.pubrun.toml``).
113
+
114
+ Returns:
115
+ The active ``Run`` instance.
116
+
117
+ Example:
118
+ >>> tracker = pubrun.start(profile="deep")
119
+ >>> tracker.stop()
120
+ """
121
+ active = get_current_run()
122
+ if active:
123
+ active.ref_count = getattr(active, "ref_count", 0) + 1
124
+ if hasattr(active, "_merge_and_migrate"):
125
+ active._merge_and_migrate(kwargs)
126
+ return active
127
+ return Run(overrides=kwargs)
128
+
129
+
130
+ def stop() -> None:
131
+ """Stop the active tracking session and write artifacts to disk.
132
+
133
+ Flushes all capture engines, writes ``manifest.json`` and
134
+ ``config.resolved.json``. Called automatically at interpreter exit
135
+ if auto-start is enabled. Safe to call when no run is active.
136
+ """
137
+ current_run = get_current_run()
138
+ if current_run:
139
+ current_run.stop()
140
+
141
+
142
+ def diff(run_dir_a: str, run_dir_b: str, ignores: Optional[list] = None) -> dict:
143
+ """Compare two run manifests and return a structured diff.
144
+
145
+ Args:
146
+ run_dir_a: Path to the baseline run directory.
147
+ run_dir_b: Path to the comparison run directory.
148
+ ignores: Manifest keys to exclude from comparison.
149
+ Defaults to the ``[diff].ignore`` config list.
150
+
151
+ Returns:
152
+ Dict with ``added``, ``removed``, ``modified``, and ``same`` keys.
153
+
154
+ Example:
155
+ >>> delta = pubrun.diff("runs/A", "runs/B")
156
+ >>> print(delta["added"])
157
+ """
158
+ import json
159
+ from pathlib import Path
160
+ from pubrun.config import resolve_config
161
+ from pubrun.report.utils import hydrate_manifest
162
+ from pubrun.analysis.diff import compare_manifests
163
+
164
+ def _load(d: str) -> dict:
165
+ p = Path(d) / "manifest.json"
166
+ if not p.exists():
167
+ raise FileNotFoundError(f"Missing: {p}")
168
+ with open(p, "r", encoding="utf-8") as f:
169
+ obj = json.load(f)
170
+ obj, _ = hydrate_manifest(str(p), obj)
171
+ return obj
172
+
173
+ manifest_a = _load(run_dir_a)
174
+ manifest_b = _load(run_dir_b)
175
+
176
+ if ignores is None:
177
+ ignores = resolve_config().get("diff", {}).get("ignore", [])
178
+
179
+ return compare_manifests(manifest_a, manifest_b, ignores)
180
+
181
+
182
+ def audit_run(func: Optional[Callable] = None, **kwargs: Any) -> Callable:
183
+ """Decorator that wraps a function in a pubrun tracking session.
184
+
185
+ Starts a run before the function executes and stops it afterward.
186
+ If the function raises, the outcome is set to ``"failed"`` and the
187
+ exception is re-raised.
188
+
189
+ Args:
190
+ func: The function to wrap (supplied automatically by Python).
191
+ **kwargs: Configuration overrides forwarded to ``start()``.
192
+
193
+ Example:
194
+ >>> @pubrun.audit_run(profile="deep")
195
+ ... def train():
196
+ ... model.fit()
197
+ """
198
+ if func is None:
199
+ def wrapper(f: Callable) -> Callable:
200
+ return audit_run(f, **kwargs)
201
+ return wrapper
202
+
203
+ def wrapped(*args: Any, **func_kwargs: Any) -> Any:
204
+ run_tracker = start(**kwargs)
205
+ try:
206
+ result = func(*args, **func_kwargs)
207
+ run_tracker.stop(outcome="completed")
208
+ return result
209
+ except Exception:
210
+ run_tracker.stop(outcome="failed")
211
+ raise
212
+
213
+ return wrapped
214
+
215
+
216
+ class tracked_run:
217
+ """Context manager that wraps a code block in a pubrun tracking session.
218
+
219
+ Args:
220
+ **kwargs: Configuration overrides forwarded to ``start()``.
221
+
222
+ Example:
223
+ >>> with pubrun.tracked_run(profile="minimal"):
224
+ ... model.train(epochs=5)
225
+ """
226
+ def __init__(self, **kwargs: Any) -> None:
227
+ self.kwargs = kwargs
228
+ self.run_tracker: Optional[Run] = None
229
+
230
+ def __enter__(self) -> "tracked_run":
231
+ self.run_tracker = start(**self.kwargs)
232
+ return self
233
+
234
+ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
235
+ if self.run_tracker:
236
+ if exc_type is not None:
237
+ self.run_tracker.stop(outcome="failed")
238
+ else:
239
+ self.run_tracker.stop(outcome="completed")
240
+
241
+
242
+ class phase:
243
+ """Context manager that emits ``phase_start``/``phase_end`` events.
244
+
245
+ Useful for timing distinct pipeline stages (e.g. data loading vs training).
246
+ Requires an active run to log events; behavior with no active run is
247
+ controlled by ``[events].on_inactive_annotate``.
248
+
249
+ Args:
250
+ name: Label for this phase (written to ``events.jsonl``).
251
+
252
+ Example:
253
+ >>> with pubrun.phase("data_ingestion"):
254
+ ... df = pd.read_csv("huge.csv")
255
+ """
256
+ def __init__(self, name: str) -> None:
257
+ self.name = name
258
+ self.run_tracker = get_current_run()
259
+
260
+ def __enter__(self) -> "phase":
261
+ if self.run_tracker and getattr(self.run_tracker, "event_stream", None):
262
+ self.run_tracker.event_stream.emit("phase_start", name=self.name)
263
+ else:
264
+ _handle_inactive(f"pubrun.phase('{self.name}')")
265
+ return self
266
+
267
+ def __exit__(self, exc_type: Any, exc_val: Any, exc_tb: Any) -> None:
268
+ if self.run_tracker and getattr(self.run_tracker, "event_stream", None):
269
+ if exc_type is not None:
270
+ err_payload = {"error": exc_val.__class__.__name__}
271
+ self.run_tracker.event_stream.emit("phase_end", name=self.name, payload=err_payload)
272
+ else:
273
+ self.run_tracker.event_stream.emit("phase_end", name=self.name)
274
+
275
+ # ============================================================================
276
+ # Boot Sequence Heuristics
277
+ # ============================================================================
278
+ import os as _os
279
+ import logging as _logging
280
+
281
+ _should_auto = False
282
+ try:
283
+ from pubrun.config import resolve_config as _resolve_config
284
+ _config_map = _resolve_config()
285
+ _should_auto = _config_map.get("core", {}).get("auto_start", False)
286
+ _env_val = str(_os.environ.get("PUBRUN_AUTO_START", "")).lower()
287
+ if _env_val == "true":
288
+ _should_auto = True
289
+ elif _env_val == "false":
290
+ _should_auto = False
291
+ except Exception as _boot_err:
292
+ _logging.getLogger("pubrun").warning(
293
+ f"pubrun boot sequence failed (tracking disabled): {_boot_err}"
294
+ )
295
+ _should_auto = False
296
+
297
+ if _should_auto and not get_current_run():
298
+ import sys as _sys
299
+ _sys0 = _os.path.basename(_sys.argv[0]) if _sys.argv else ""
300
+ if _sys0 in ("pubrun", "pubrun.exe", "__main__.py", "-m"):
301
+ _should_auto = False
302
+
303
+ if _should_auto:
304
+ start()
305
+