runmonitor 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
runmonitor/__init__.py ADDED
@@ -0,0 +1,125 @@
1
+ """
2
+ runmonitor — a lean, local experiment tracker with a live web dashboard.
3
+ Import and go. Dashboard auto-starts on port 8080.
4
+ """
5
+
6
+ import json
7
+ import uuid
8
+ import atexit
9
+ import threading
10
+ import time
11
+ from . import server # noqa: F401 — starts the daemon
12
+ from . import storage
13
+
14
+
15
+ class Run:
16
+ """A single experiment run. Created via rm.init()."""
17
+
18
+ def __init__(self, run_id: str, project_id: int, name: str | None,
19
+ config: dict, total_steps: int | None):
20
+ self._id = run_id
21
+ self._project_id = project_id
22
+ self._name = name
23
+ self._config = config
24
+ self._total_steps = total_steps
25
+ self._finished = False
26
+ self._sysmon_stop = threading.Event()
27
+ self._sysmon_thread = None
28
+ self._start_sysmon()
29
+ atexit.register(self._on_exit)
30
+
31
+ @property
32
+ def id(self) -> str:
33
+ return self._id
34
+
35
+ @property
36
+ def name(self) -> str | None:
37
+ return self._name
38
+
39
+ def log(self, metrics: dict, step: int) -> None:
40
+ """Log a dictionary of metric-name → float at a given step."""
41
+ if self._finished:
42
+ raise RuntimeError("Cannot log to a finished run.")
43
+ if not isinstance(metrics, dict):
44
+ raise TypeError("metrics must be a dict")
45
+ storage.log_metrics(self._id, metrics, step)
46
+
47
+ def save(self, filepath: str) -> dict:
48
+ """Save an artifact file alongside this run. Returns artifact info dict."""
49
+ return storage.save_artifact(self._id, filepath)
50
+
51
+ def finish(self) -> None:
52
+ """Mark the run as successfully finished."""
53
+ if self._finished:
54
+ return
55
+ self._stop_sysmon()
56
+ storage.finish_run(self._id, "finished")
57
+ self._finished = True
58
+
59
+ def fail(self) -> None:
60
+ """Mark the run as crashed."""
61
+ if self._finished:
62
+ return
63
+ self._stop_sysmon()
64
+ storage.finish_run(self._id, "crashed")
65
+ self._finished = True
66
+
67
+ # ── system metrics background thread ───────────────────
68
+
69
+ def _start_sysmon(self):
70
+ """Start a daemon thread that logs CPU/RAM every 10 seconds."""
71
+ def _collect():
72
+ try:
73
+ import psutil
74
+ has_psutil = True
75
+ except ImportError:
76
+ has_psutil = False
77
+
78
+ last_cpu_sample = None
79
+ step_counter = 0
80
+ while not self._sysmon_stop.wait(timeout=10):
81
+ if not has_psutil:
82
+ return
83
+ step_counter += 1
84
+ try:
85
+ cpu = psutil.cpu_percent(interval=0.1)
86
+ mem = psutil.virtual_memory().percent
87
+ storage.log_system_metrics(self._id, step_counter, cpu, mem)
88
+ except Exception:
89
+ pass
90
+
91
+ self._sysmon_thread = threading.Thread(
92
+ target=_collect, name="rm-sysmon", daemon=True
93
+ )
94
+ self._sysmon_thread.start()
95
+
96
+ def _stop_sysmon(self):
97
+ self._sysmon_stop.set()
98
+ if self._sysmon_thread:
99
+ self._sysmon_thread.join(timeout=2)
100
+
101
+ def _on_exit(self):
102
+ """Mark the run as crashed if it exits without finish()/fail()."""
103
+ if self._finished:
104
+ return
105
+ self._stop_sysmon()
106
+ storage.finish_run(self._id, "crashed")
107
+ self._finished = True
108
+
109
+
110
+ def init(project: str, name: str | None = None, config: dict | None = None,
111
+ total_steps: int | None = None) -> Run:
112
+ """
113
+ Create (or reuse) a project and start a new run.
114
+
115
+ import runmonitor as rm
116
+ run = rm.init("mnist-experiment", config={"lr": 0.001}, total_steps=1000)
117
+
118
+ Opens http://localhost:8080 for the live dashboard.
119
+ """
120
+ storage.init_db()
121
+ project_id = storage.create_project(project)
122
+ run_id = uuid.uuid4().hex[:12]
123
+ config_json = json.dumps(config or {})
124
+ storage.create_run(run_id, project_id, name, config_json, total_steps)
125
+ return Run(run_id, project_id, name, config or {}, total_steps)
runmonitor/__main__.py ADDED
@@ -0,0 +1,54 @@
1
+ """Run the dashboard standalone — no training script needed.
2
+
3
+ runmonitor # if pip-installed (console script)
4
+ python -m runmonitor # from a checkout / vendored copy
5
+ RUNMONITOR_PORT=9000 runmonitor # choose a port (set before launch)
6
+
7
+ Importing ``runmonitor`` already auto-starts the dashboard daemon, so this
8
+ just makes sure it's up, opens a browser, and keeps the process alive.
9
+ """
10
+ import argparse
11
+ import os
12
+ import threading
13
+ import webbrowser
14
+
15
+
16
+ def main() -> None:
17
+ parser = argparse.ArgumentParser(
18
+ prog="runmonitor",
19
+ description="Live, terminal-styled experiment-tracking dashboard.",
20
+ )
21
+ parser.add_argument(
22
+ "--port", type=int, default=None,
23
+ help="Preferred port (most reliable via RUNMONITOR_PORT before launch).",
24
+ )
25
+ parser.add_argument(
26
+ "--no-browser", action="store_true",
27
+ help="Do not open a browser window.",
28
+ )
29
+ args = parser.parse_args()
30
+ if args.port:
31
+ os.environ.setdefault("RUNMONITOR_PORT", str(args.port))
32
+
33
+ from .storage import init_db
34
+ from . import server
35
+
36
+ init_db()
37
+ server._start_server() # idempotent — reuses the daemon if already running
38
+ url = f"http://localhost:{server._port}"
39
+ print(f" runmonitor dashboard → {url}")
40
+ if args.port and args.port != server._port:
41
+ print(f" (port {args.port} was unavailable; bound {server._port} instead)")
42
+ if not args.no_browser:
43
+ try:
44
+ webbrowser.open(url)
45
+ except Exception:
46
+ pass
47
+ try:
48
+ threading.Event().wait() # block forever; Ctrl-C to quit
49
+ except KeyboardInterrupt:
50
+ print("\n bye.")
51
+
52
+
53
+ if __name__ == "__main__":
54
+ main()
runmonitor/server.py ADDED
@@ -0,0 +1,163 @@
1
+ """
2
+ Flask server that auto-starts in a daemon thread on first import.
3
+ Serves the dashboard + JSON API.
4
+ """
5
+
6
+ import threading
7
+ import os
8
+ import time
9
+ import socket
10
+ from flask import Flask, jsonify, request, send_from_directory
11
+
12
+ from . import storage
13
+
14
+ app = Flask(__name__,
15
+ template_folder=os.path.join(os.path.dirname(__file__), "templates"),
16
+ static_folder=os.path.join(os.path.dirname(__file__), "static"))
17
+ app.config["JSONIFY_PRETTYPRINT_REGULAR"] = False
18
+
19
+ _started = False
20
+ _port = 8080
21
+
22
+
23
+ def _find_port(start=None):
24
+ """Find the first port we can actually bind, starting from `start`.
25
+
26
+ Defaults to ``$RUNMONITOR_PORT`` (set before launch) or 8080.
27
+ """
28
+ if start is None:
29
+ try:
30
+ start = int(os.environ.get("RUNMONITOR_PORT", 8080))
31
+ except ValueError:
32
+ start = 8080
33
+ for p in range(start, start + 100):
34
+ with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
35
+ try:
36
+ s.bind(("127.0.0.1", p))
37
+ return p
38
+ except OSError:
39
+ continue
40
+ return start
41
+
42
+
43
+ # ── API routes ──────────────────────────────────────────────────
44
+
45
+ @app.route("/api/projects")
46
+ def api_projects():
47
+ return jsonify(storage.get_projects())
48
+
49
+
50
+ @app.route("/api/runs")
51
+ def api_runs():
52
+ project = request.args.get("project")
53
+ return jsonify(storage.get_runs(project))
54
+
55
+
56
+ @app.route("/api/runs/<run_id>/config")
57
+ def api_run_config(run_id):
58
+ row = storage.get_run_config(run_id)
59
+ if row is None:
60
+ return jsonify({"error": "not found"}), 404
61
+ return jsonify(row)
62
+
63
+
64
+ @app.route("/api/runs/<run_id>/metrics")
65
+ def api_run_metrics(run_id):
66
+ key = request.args.get("key")
67
+ limit = request.args.get("limit")
68
+ if limit:
69
+ limit = int(limit)
70
+ return jsonify(storage.get_metrics(run_id, key, limit))
71
+
72
+
73
+ @app.route("/api/runs/<run_id>/metrics/live")
74
+ def api_run_metrics_live(run_id):
75
+ """Return only metrics with step > `since` for efficient polling."""
76
+ since = request.args.get("since", 0, type=int)
77
+ return jsonify(storage.get_metrics(run_id, since=since))
78
+
79
+
80
+ @app.route("/api/runs/<run_id>/artifacts")
81
+ def api_run_artifacts(run_id):
82
+ return jsonify(storage.get_artifacts(run_id))
83
+
84
+
85
+ @app.route("/api/runs/<run_id>/system")
86
+ def api_run_system(run_id):
87
+ return jsonify(storage.get_system_metrics(run_id))
88
+
89
+
90
+ @app.route("/api/runs/<run_id>/export")
91
+ def api_run_export(run_id):
92
+ fmt = request.args.get("format", "json")
93
+ metrics = storage.get_metrics(run_id)
94
+ config = storage.get_run_config(run_id)
95
+
96
+ if fmt == "csv":
97
+ import csv
98
+ import io
99
+ out = io.StringIO()
100
+ writer = csv.writer(out)
101
+ writer.writerow(["step", "key", "value", "timestamp"])
102
+ for m in metrics:
103
+ writer.writerow([m["step"], m["key"], m["value"], m["timestamp"]])
104
+ csv_str = out.getvalue()
105
+ from flask import Response
106
+ return Response(
107
+ csv_str,
108
+ mimetype="text/csv",
109
+ headers={"Content-Disposition": f"attachment; filename=run_{run_id}.csv"},
110
+ )
111
+ else:
112
+ return jsonify({
113
+ "run_id": run_id,
114
+ "config": config,
115
+ "metrics": metrics,
116
+ "artifacts": storage.get_artifacts(run_id),
117
+ "system_metrics": storage.get_system_metrics(run_id),
118
+ })
119
+
120
+
121
+ @app.route("/api/runs/<run_id>/compare")
122
+ def api_run_compare(run_id):
123
+ """Return metrics for two runs keyed by the same metric name."""
124
+ other_id = request.args.get("other")
125
+ key = request.args.get("key")
126
+ if not other_id or not key:
127
+ return jsonify({"error": "need `other` and `key` params"}), 400
128
+ run_a = storage.get_metrics(run_id, key=key)
129
+ run_b = storage.get_metrics(other_id, key=key)
130
+ return jsonify({
131
+ "run_a": {"id": run_id, "metrics": run_a},
132
+ "run_b": {"id": other_id, "metrics": run_b},
133
+ })
134
+
135
+
136
+ # ── Dashboard ───────────────────────────────────────────────────
137
+
138
+ @app.route("/")
139
+ def dashboard():
140
+ from flask import render_template
141
+ return render_template("dashboard.html", port=_port)
142
+
143
+
144
+ # ── Daemon start ────────────────────────────────────────────────
145
+
146
+ def _start_server():
147
+ global _started, _port
148
+ if _started:
149
+ return
150
+ _started = True
151
+ _port = _find_port()
152
+
153
+ def _run():
154
+ app.run(host="127.0.0.1", port=_port, debug=False, use_reloader=False)
155
+
156
+ t = threading.Thread(target=_run, name="runmonitor-server", daemon=True)
157
+ t.start()
158
+ time.sleep(0.3) # give it a moment to bind
159
+
160
+
161
+ import os as _os
162
+ if not _os.environ.get("RUNMONITOR_STANDALONE"):
163
+ _start_server()