modelstat 0.0.45 → 0.0.47
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.mjs +1090 -292
- package/dist/cli.mjs.map +1 -1
- package/package.json +1 -1
- package/vendor/tray-mac/Sources/ModelstatTray/main.swift +145 -21
package/package.json
CHANGED
|
@@ -13,6 +13,7 @@
|
|
|
13
13
|
// ~300 LOC, no dependencies beyond AppKit/Foundation.
|
|
14
14
|
|
|
15
15
|
import AppKit
|
|
16
|
+
import Darwin
|
|
16
17
|
import Foundation
|
|
17
18
|
|
|
18
19
|
// ── Resolve the `modelstat` CLI on $PATH, then at the install-path
|
|
@@ -107,9 +108,20 @@ struct LocalStatsCounters: Decodable {
|
|
|
107
108
|
final class TrayController: NSObject {
|
|
108
109
|
private let statusItem: NSStatusItem
|
|
109
110
|
private let menu = NSMenu()
|
|
110
|
-
|
|
111
|
+
/// Re-resolved by ensureDaemon() when nil — a transient resolution
|
|
112
|
+
/// failure at tray boot (e.g. the installer is mid-rewrite of
|
|
113
|
+
/// ~/.modelstat/bin) must not permanently strand the daemon.
|
|
114
|
+
private var cli: URL?
|
|
111
115
|
private var daemon: Process?
|
|
116
|
+
/// When the current `daemon` child was spawned — used to detect
|
|
117
|
+
/// "exited cleanly almost immediately" (another daemon owns the
|
|
118
|
+
/// lock) so we back off to the watchdog instead of respawning hot.
|
|
119
|
+
private var daemonSpawnedAt: Date?
|
|
112
120
|
private var paused = false
|
|
121
|
+
/// Serialises _daemon-health probes; collapses overlapping
|
|
122
|
+
/// ensureDaemon() calls into one in-flight probe.
|
|
123
|
+
private let superviseQueue = DispatchQueue(label: "ai.modelstat.tray.supervise", qos: .utility)
|
|
124
|
+
private var ensureInFlight = false
|
|
113
125
|
private var latest: AgentStats?
|
|
114
126
|
/// Live local heartbeat, read straight from ~/.modelstat/last-status.json
|
|
115
127
|
/// on the fast timer. Decoupled from `latest` (the slower, network-backed
|
|
@@ -120,6 +132,7 @@ final class TrayController: NSObject {
|
|
|
120
132
|
private var spinnerTick = 0
|
|
121
133
|
private var fastTimer: Timer?
|
|
122
134
|
private var slowTimer: Timer?
|
|
135
|
+
private var watchdogTimer: Timer?
|
|
123
136
|
|
|
124
137
|
// Menu items we update on every poll
|
|
125
138
|
private let statusMI = NSMenuItem(title: "Loading…", action: nil, keyEquivalent: "")
|
|
@@ -140,7 +153,7 @@ final class TrayController: NSObject {
|
|
|
140
153
|
super.init()
|
|
141
154
|
configureStatusItem()
|
|
142
155
|
buildMenu()
|
|
143
|
-
|
|
156
|
+
ensureDaemon()
|
|
144
157
|
refreshStats()
|
|
145
158
|
tickLocal()
|
|
146
159
|
|
|
@@ -164,6 +177,15 @@ final class TrayController: NSObject {
|
|
|
164
177
|
}
|
|
165
178
|
RunLoop.main.add(slow, forMode: .common)
|
|
166
179
|
slowTimer = slow
|
|
180
|
+
// Watchdog: re-converge the daemon every 30s no matter how the
|
|
181
|
+
// last attempt ended — heals the give-up paths (CLI unresolvable
|
|
182
|
+
// at boot, spawn throw, adopted daemon died) that used to strand
|
|
183
|
+
// the pipeline until the user noticed the tray frozen.
|
|
184
|
+
let watchdog = Timer(timeInterval: 30.0, repeats: true) { [weak self] _ in
|
|
185
|
+
MainActor.assumeIsolated { self?.ensureDaemon() }
|
|
186
|
+
}
|
|
187
|
+
RunLoop.main.add(watchdog, forMode: .common)
|
|
188
|
+
watchdogTimer = watchdog
|
|
167
189
|
}
|
|
168
190
|
|
|
169
191
|
private func configureStatusItem() {
|
|
@@ -221,24 +243,97 @@ final class TrayController: NSObject {
|
|
|
221
243
|
}
|
|
222
244
|
|
|
223
245
|
// ── Daemon lifecycle ─────────────────────────────────────────────
|
|
224
|
-
|
|
225
|
-
|
|
246
|
+
//
|
|
247
|
+
// The tray no longer spawns `start --force` blindly. Blind --force
|
|
248
|
+
// SIGTERMs whatever live daemon owns the singleton lock (see
|
|
249
|
+
// apps/agent-dev/src/lock.ts), so two briefly-coexisting trays
|
|
250
|
+
// (kickstart -k racing a reinstall, KeepAlive respawn overlap) had
|
|
251
|
+
// their daemons kill each other in a loop — observed 2026-06-12
|
|
252
|
+
// ending with zero daemons and nothing restarting them. Instead,
|
|
253
|
+
// every (re)start funnels through ensureDaemon(), which asks the CLI
|
|
254
|
+
// `_daemon-health` (decision logic + tests live in
|
|
255
|
+
// apps/agent-dev/src/supervise.ts):
|
|
256
|
+
// adopt → a live, heartbeating daemon owns the lock — leave it.
|
|
257
|
+
// spawn → no live owner — plain `start` (a dead owner's stale
|
|
258
|
+
// lock is reclaimed without --force by lock.ts).
|
|
259
|
+
// replace → live owner that stopped heartbeating — `start --force`.
|
|
260
|
+
// A 30s watchdog re-runs ensureDaemon() so one-shot failure modes
|
|
261
|
+
// (CLI unresolvable at boot, Process.run() throw, adopted daemon
|
|
262
|
+
// dying with no terminationHandler) heal on the next tick instead of
|
|
263
|
+
// stranding the pipeline forever.
|
|
264
|
+
|
|
265
|
+
/// Converge toward "exactly one live daemon". Safe to call from any
|
|
266
|
+
/// trigger (boot, watchdog, child exit, resume) — overlapping calls
|
|
267
|
+
/// collapse into one in-flight health probe.
|
|
268
|
+
private func ensureDaemon() {
|
|
269
|
+
guard !paused else { return }
|
|
270
|
+
if let d = daemon, d.isRunning { return }
|
|
271
|
+
if cli == nil { cli = locateCli() }
|
|
226
272
|
guard let cli else {
|
|
227
|
-
statusMI.title = "modelstat CLI not found"
|
|
273
|
+
statusMI.title = "modelstat CLI not found — retrying…"
|
|
228
274
|
return
|
|
229
275
|
}
|
|
276
|
+
guard !ensureInFlight else { return }
|
|
277
|
+
ensureInFlight = true
|
|
278
|
+
superviseQueue.async { [weak self] in
|
|
279
|
+
// Probe off-main: the health command boots node (~100-300ms).
|
|
280
|
+
let decision = Self.queryDaemonHealth(cli: cli) ?? "spawn"
|
|
281
|
+
DispatchQueue.main.async {
|
|
282
|
+
MainActor.assumeIsolated {
|
|
283
|
+
guard let self else { return }
|
|
284
|
+
self.ensureInFlight = false
|
|
285
|
+
guard !self.paused else { return }
|
|
286
|
+
if let d = self.daemon, d.isRunning { return }
|
|
287
|
+
switch decision {
|
|
288
|
+
case "adopt":
|
|
289
|
+
// A healthy daemon someone else spawned. Adopt: render its
|
|
290
|
+
// heartbeat (tickLocal already does) and spawn nothing.
|
|
291
|
+
break
|
|
292
|
+
case "replace":
|
|
293
|
+
self.spawnDaemon(cli: cli, force: true)
|
|
294
|
+
default:
|
|
295
|
+
self.spawnDaemon(cli: cli, force: false)
|
|
296
|
+
}
|
|
297
|
+
}
|
|
298
|
+
}
|
|
299
|
+
}
|
|
300
|
+
}
|
|
301
|
+
|
|
302
|
+
/// Run `modelstat _daemon-health` and return its decision, or nil if
|
|
303
|
+
/// the command failed (older CLI, node missing) — caller treats nil
|
|
304
|
+
/// as "spawn", which is safe: an unforced spawn against a healthy
|
|
305
|
+
/// owner exits 0 in <1s without killing anything.
|
|
306
|
+
private nonisolated static func queryDaemonHealth(cli: URL) -> String? {
|
|
230
307
|
let p = Process()
|
|
231
|
-
// --force: the tray owns the daemon. If a stale lock from a prior
|
|
232
|
-
// run (crash, kill -9, OS reboot mid-write) is left behind, the
|
|
233
|
-
// unforced `start` exits in <1s with "already running" and the
|
|
234
|
-
// tray's terminationHandler retries it forever. With --force we
|
|
235
|
-
// claim the lock unconditionally and become the live daemon.
|
|
236
308
|
if cli.pathExtension == "mjs" {
|
|
237
309
|
p.launchPath = "/usr/bin/env"
|
|
238
|
-
p.arguments = ["node", cli.path, "
|
|
310
|
+
p.arguments = ["node", cli.path, "_daemon-health"]
|
|
239
311
|
} else {
|
|
240
312
|
p.launchPath = cli.path
|
|
241
|
-
p.arguments = ["
|
|
313
|
+
p.arguments = ["_daemon-health"]
|
|
314
|
+
}
|
|
315
|
+
let pipe = Pipe()
|
|
316
|
+
p.standardOutput = pipe
|
|
317
|
+
p.standardError = Pipe()
|
|
318
|
+
do { try p.run() } catch { return nil }
|
|
319
|
+
p.waitUntilExit()
|
|
320
|
+
guard p.terminationStatus == 0 else { return nil }
|
|
321
|
+
let data = pipe.fileHandleForReading.readDataToEndOfFile()
|
|
322
|
+
guard let obj = try? JSONSerialization.jsonObject(with: data) as? [String: Any],
|
|
323
|
+
let decision = obj["decision"] as? String else { return nil }
|
|
324
|
+
return decision
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
private func spawnDaemon(cli: URL, force: Bool) {
|
|
328
|
+
let p = Process()
|
|
329
|
+
var args = ["start"]
|
|
330
|
+
if force { args.append("--force") }
|
|
331
|
+
if cli.pathExtension == "mjs" {
|
|
332
|
+
p.launchPath = "/usr/bin/env"
|
|
333
|
+
p.arguments = ["node", cli.path] + args
|
|
334
|
+
} else {
|
|
335
|
+
p.launchPath = cli.path
|
|
336
|
+
p.arguments = args
|
|
242
337
|
}
|
|
243
338
|
// Bolt stdout/stderr onto the same log the launchd plist uses so
|
|
244
339
|
// `modelstat status` still sees the same tail.
|
|
@@ -249,29 +344,57 @@ final class TrayController: NSObject {
|
|
|
249
344
|
p.standardOutput = out
|
|
250
345
|
p.standardError = err
|
|
251
346
|
p.terminationHandler = { [weak self] proc in
|
|
252
|
-
// Daemon exited —
|
|
253
|
-
//
|
|
347
|
+
// Daemon exited — re-converge via the health check, which adopts
|
|
348
|
+
// a replacement daemon instead of counter-killing it. A clean
|
|
349
|
+
// sub-5s exit means "another daemon owns the lock" (or an equally
|
|
350
|
+
// immediate no-op); skip the hot retry and let the 30s watchdog
|
|
351
|
+
// re-check, so a stale CLI can't put us in a 2s spawn loop.
|
|
352
|
+
let status = proc.terminationStatus
|
|
254
353
|
Task { @MainActor in
|
|
255
354
|
guard let self else { return }
|
|
256
|
-
|
|
355
|
+
let uptime = self.daemonSpawnedAt.map { Date().timeIntervalSince($0) } ?? .infinity
|
|
257
356
|
self.daemon = nil
|
|
357
|
+
self.daemonSpawnedAt = nil
|
|
358
|
+
guard !self.paused else { return }
|
|
359
|
+
if status == 0 && uptime < 5 { return } // watchdog will re-ensure
|
|
258
360
|
DispatchQueue.main.asyncAfter(deadline: .now() + 2.0) { [weak self] in
|
|
259
|
-
self?.
|
|
361
|
+
MainActor.assumeIsolated { self?.ensureDaemon() }
|
|
260
362
|
}
|
|
261
|
-
_ = proc
|
|
262
363
|
}
|
|
263
364
|
}
|
|
264
365
|
do {
|
|
265
366
|
try p.run()
|
|
266
367
|
daemon = p
|
|
368
|
+
daemonSpawnedAt = Date()
|
|
267
369
|
} catch {
|
|
268
|
-
|
|
370
|
+
// Watchdog retries in ≤30s — do NOT give up permanently here.
|
|
371
|
+
statusMI.title = "modelstat start failed (retrying): \(error.localizedDescription)"
|
|
269
372
|
}
|
|
270
373
|
}
|
|
271
374
|
|
|
272
375
|
private func stopDaemon() {
|
|
273
|
-
daemon
|
|
274
|
-
|
|
376
|
+
if let d = daemon {
|
|
377
|
+
d.terminate()
|
|
378
|
+
daemon = nil
|
|
379
|
+
daemonSpawnedAt = nil
|
|
380
|
+
return
|
|
381
|
+
}
|
|
382
|
+
// No child of our own — but Pause/Quit must also stop an ADOPTED
|
|
383
|
+
// daemon (one we found healthy and left alone). SIGTERM the lock
|
|
384
|
+
// owner; harmless no-op if it's already gone.
|
|
385
|
+
if let pid = Self.readLockOwnerPid() {
|
|
386
|
+
kill(pid, SIGTERM)
|
|
387
|
+
}
|
|
388
|
+
}
|
|
389
|
+
|
|
390
|
+
/// pid from ~/.modelstat/daemon.lock, for stopping an adopted daemon.
|
|
391
|
+
private nonisolated static func readLockOwnerPid() -> pid_t? {
|
|
392
|
+
let path = ("~/.modelstat/daemon.lock" as NSString).expandingTildeInPath
|
|
393
|
+
guard let data = FileManager.default.contents(atPath: path),
|
|
394
|
+
let obj = try? JSONSerialization.jsonObject(with: data) as? [String: Any],
|
|
395
|
+
let pid = obj["pid"] as? Int, pid > 0
|
|
396
|
+
else { return nil }
|
|
397
|
+
return pid_t(pid)
|
|
275
398
|
}
|
|
276
399
|
|
|
277
400
|
// ── Live local heartbeat (fast path) ────────────────────────────
|
|
@@ -507,7 +630,7 @@ final class TrayController: NSObject {
|
|
|
507
630
|
statusMI.title = "Paused"
|
|
508
631
|
} else {
|
|
509
632
|
pauseMI.title = "Pause"
|
|
510
|
-
|
|
633
|
+
ensureDaemon()
|
|
511
634
|
}
|
|
512
635
|
}
|
|
513
636
|
|
|
@@ -520,6 +643,7 @@ final class TrayController: NSObject {
|
|
|
520
643
|
stopDaemon()
|
|
521
644
|
fastTimer?.invalidate()
|
|
522
645
|
slowTimer?.invalidate()
|
|
646
|
+
watchdogTimer?.invalidate()
|
|
523
647
|
NSApp.terminate(nil)
|
|
524
648
|
}
|
|
525
649
|
}
|