modelstat 0.0.44 → 0.0.46

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "modelstat",
3
- "version": "0.0.44",
3
+ "version": "0.0.46",
4
4
  "description": "modelstat companion — reads local AI-tool usage and ships tokenised events to modelstat.",
5
5
  "license": "Apache-2.0",
6
6
  "type": "module",
@@ -13,6 +13,7 @@
13
13
  // ~300 LOC, no dependencies beyond AppKit/Foundation.
14
14
 
15
15
  import AppKit
16
+ import Darwin
16
17
  import Foundation
17
18
 
18
19
  // ── Resolve the `modelstat` CLI on $PATH, then at the install-path
@@ -107,9 +108,20 @@ struct LocalStatsCounters: Decodable {
107
108
  final class TrayController: NSObject {
108
109
  private let statusItem: NSStatusItem
109
110
  private let menu = NSMenu()
110
- private let cli: URL?
111
+ /// Re-resolved by ensureDaemon() when nil — a transient resolution
112
+ /// failure at tray boot (e.g. the installer is mid-rewrite of
113
+ /// ~/.modelstat/bin) must not permanently strand the daemon.
114
+ private var cli: URL?
111
115
  private var daemon: Process?
116
+ /// When the current `daemon` child was spawned — used to detect
117
+ /// "exited cleanly almost immediately" (another daemon owns the
118
+ /// lock) so we back off to the watchdog instead of respawning hot.
119
+ private var daemonSpawnedAt: Date?
112
120
  private var paused = false
121
+ /// Serialises _daemon-health probes; collapses overlapping
122
+ /// ensureDaemon() calls into one in-flight probe.
123
+ private let superviseQueue = DispatchQueue(label: "ai.modelstat.tray.supervise", qos: .utility)
124
+ private var ensureInFlight = false
113
125
  private var latest: AgentStats?
114
126
  /// Live local heartbeat, read straight from ~/.modelstat/last-status.json
115
127
  /// on the fast timer. Decoupled from `latest` (the slower, network-backed
@@ -120,6 +132,7 @@ final class TrayController: NSObject {
120
132
  private var spinnerTick = 0
121
133
  private var fastTimer: Timer?
122
134
  private var slowTimer: Timer?
135
+ private var watchdogTimer: Timer?
123
136
 
124
137
  // Menu items we update on every poll
125
138
  private let statusMI = NSMenuItem(title: "Loading…", action: nil, keyEquivalent: "")
@@ -140,7 +153,7 @@ final class TrayController: NSObject {
140
153
  super.init()
141
154
  configureStatusItem()
142
155
  buildMenu()
143
- startDaemon()
156
+ ensureDaemon()
144
157
  refreshStats()
145
158
  tickLocal()
146
159
 
@@ -164,6 +177,15 @@ final class TrayController: NSObject {
164
177
  }
165
178
  RunLoop.main.add(slow, forMode: .common)
166
179
  slowTimer = slow
180
+ // Watchdog: re-converge the daemon every 30s no matter how the
181
+ // last attempt ended — heals the give-up paths (CLI unresolvable
182
+ // at boot, spawn throw, adopted daemon died) that used to strand
183
+ // the pipeline until the user noticed the tray frozen.
184
+ let watchdog = Timer(timeInterval: 30.0, repeats: true) { [weak self] _ in
185
+ MainActor.assumeIsolated { self?.ensureDaemon() }
186
+ }
187
+ RunLoop.main.add(watchdog, forMode: .common)
188
+ watchdogTimer = watchdog
167
189
  }
168
190
 
169
191
  private func configureStatusItem() {
@@ -221,24 +243,97 @@ final class TrayController: NSObject {
221
243
  }
222
244
 
223
245
  // ── Daemon lifecycle ─────────────────────────────────────────────
224
-
225
- private func startDaemon() {
246
+ //
247
+ // The tray no longer spawns `start --force` blindly. Blind --force
248
+ // SIGTERMs whatever live daemon owns the singleton lock (see
249
+ // apps/agent-dev/src/lock.ts), so two briefly-coexisting trays
250
+ // (kickstart -k racing a reinstall, KeepAlive respawn overlap) had
251
+ // their daemons kill each other in a loop — observed 2026-06-12
252
+ // ending with zero daemons and nothing restarting them. Instead,
253
+ // every (re)start funnels through ensureDaemon(), which asks the CLI
254
+ // `_daemon-health` (decision logic + tests live in
255
+ // apps/agent-dev/src/supervise.ts):
256
+ // adopt → a live, heartbeating daemon owns the lock — leave it.
257
+ // spawn → no live owner — plain `start` (a dead owner's stale
258
+ // lock is reclaimed without --force by lock.ts).
259
+ // replace → live owner that stopped heartbeating — `start --force`.
260
+ // A 30s watchdog re-runs ensureDaemon() so one-shot failure modes
261
+ // (CLI unresolvable at boot, Process.run() throw, adopted daemon
262
+ // dying with no terminationHandler) heal on the next tick instead of
263
+ // stranding the pipeline forever.
264
+
265
+ /// Converge toward "exactly one live daemon". Safe to call from any
266
+ /// trigger (boot, watchdog, child exit, resume) — overlapping calls
267
+ /// collapse into one in-flight health probe.
268
+ private func ensureDaemon() {
269
+ guard !paused else { return }
270
+ if let d = daemon, d.isRunning { return }
271
+ if cli == nil { cli = locateCli() }
226
272
  guard let cli else {
227
- statusMI.title = "modelstat CLI not found"
273
+ statusMI.title = "modelstat CLI not found — retrying…"
228
274
  return
229
275
  }
276
+ guard !ensureInFlight else { return }
277
+ ensureInFlight = true
278
+ superviseQueue.async { [weak self] in
279
+ // Probe off-main: the health command boots node (~100-300ms).
280
+ let decision = Self.queryDaemonHealth(cli: cli) ?? "spawn"
281
+ DispatchQueue.main.async {
282
+ MainActor.assumeIsolated {
283
+ guard let self else { return }
284
+ self.ensureInFlight = false
285
+ guard !self.paused else { return }
286
+ if let d = self.daemon, d.isRunning { return }
287
+ switch decision {
288
+ case "adopt":
289
+ // A healthy daemon someone else spawned. Adopt: render its
290
+ // heartbeat (tickLocal already does) and spawn nothing.
291
+ break
292
+ case "replace":
293
+ self.spawnDaemon(cli: cli, force: true)
294
+ default:
295
+ self.spawnDaemon(cli: cli, force: false)
296
+ }
297
+ }
298
+ }
299
+ }
300
+ }
301
+
302
+ /// Run `modelstat _daemon-health` and return its decision, or nil if
303
+ /// the command failed (older CLI, node missing) — caller treats nil
304
+ /// as "spawn", which is safe: an unforced spawn against a healthy
305
+ /// owner exits 0 in <1s without killing anything.
306
+ private nonisolated static func queryDaemonHealth(cli: URL) -> String? {
230
307
  let p = Process()
231
- // --force: the tray owns the daemon. If a stale lock from a prior
232
- // run (crash, kill -9, OS reboot mid-write) is left behind, the
233
- // unforced `start` exits in <1s with "already running" and the
234
- // tray's terminationHandler retries it forever. With --force we
235
- // claim the lock unconditionally and become the live daemon.
236
308
  if cli.pathExtension == "mjs" {
237
309
  p.launchPath = "/usr/bin/env"
238
- p.arguments = ["node", cli.path, "start", "--force"]
310
+ p.arguments = ["node", cli.path, "_daemon-health"]
239
311
  } else {
240
312
  p.launchPath = cli.path
241
- p.arguments = ["start", "--force"]
313
+ p.arguments = ["_daemon-health"]
314
+ }
315
+ let pipe = Pipe()
316
+ p.standardOutput = pipe
317
+ p.standardError = Pipe()
318
+ do { try p.run() } catch { return nil }
319
+ p.waitUntilExit()
320
+ guard p.terminationStatus == 0 else { return nil }
321
+ let data = pipe.fileHandleForReading.readDataToEndOfFile()
322
+ guard let obj = try? JSONSerialization.jsonObject(with: data) as? [String: Any],
323
+ let decision = obj["decision"] as? String else { return nil }
324
+ return decision
325
+ }
326
+
327
+ private func spawnDaemon(cli: URL, force: Bool) {
328
+ let p = Process()
329
+ var args = ["start"]
330
+ if force { args.append("--force") }
331
+ if cli.pathExtension == "mjs" {
332
+ p.launchPath = "/usr/bin/env"
333
+ p.arguments = ["node", cli.path] + args
334
+ } else {
335
+ p.launchPath = cli.path
336
+ p.arguments = args
242
337
  }
243
338
  // Bolt stdout/stderr onto the same log the launchd plist uses so
244
339
  // `modelstat status` still sees the same tail.
@@ -249,29 +344,57 @@ final class TrayController: NSObject {
249
344
  p.standardOutput = out
250
345
  p.standardError = err
251
346
  p.terminationHandler = { [weak self] proc in
252
- // Daemon exited — if we didn't pause it intentionally, restart
253
- // after 2s so a crash is self-healing without user clicks.
347
+ // Daemon exited — re-converge via the health check, which adopts
348
+ // a replacement daemon instead of counter-killing it. A clean
349
+ // sub-5s exit means "another daemon owns the lock" (or an equally
350
+ // immediate no-op); skip the hot retry and let the 30s watchdog
351
+ // re-check, so a stale CLI can't put us in a 2s spawn loop.
352
+ let status = proc.terminationStatus
254
353
  Task { @MainActor in
255
354
  guard let self else { return }
256
- guard !self.paused else { return }
355
+ let uptime = self.daemonSpawnedAt.map { Date().timeIntervalSince($0) } ?? .infinity
257
356
  self.daemon = nil
357
+ self.daemonSpawnedAt = nil
358
+ guard !self.paused else { return }
359
+ if status == 0 && uptime < 5 { return } // watchdog will re-ensure
258
360
  DispatchQueue.main.asyncAfter(deadline: .now() + 2.0) { [weak self] in
259
- self?.startDaemon()
361
+ MainActor.assumeIsolated { self?.ensureDaemon() }
260
362
  }
261
- _ = proc
262
363
  }
263
364
  }
264
365
  do {
265
366
  try p.run()
266
367
  daemon = p
368
+ daemonSpawnedAt = Date()
267
369
  } catch {
268
- statusMI.title = "modelstat start failed: \(error.localizedDescription)"
370
+ // Watchdog retries in ≤30s — do NOT give up permanently here.
371
+ statusMI.title = "modelstat start failed (retrying): \(error.localizedDescription)"
269
372
  }
270
373
  }
271
374
 
272
375
  private func stopDaemon() {
273
- daemon?.terminate()
274
- daemon = nil
376
+ if let d = daemon {
377
+ d.terminate()
378
+ daemon = nil
379
+ daemonSpawnedAt = nil
380
+ return
381
+ }
382
+ // No child of our own — but Pause/Quit must also stop an ADOPTED
383
+ // daemon (one we found healthy and left alone). SIGTERM the lock
384
+ // owner; harmless no-op if it's already gone.
385
+ if let pid = Self.readLockOwnerPid() {
386
+ kill(pid, SIGTERM)
387
+ }
388
+ }
389
+
390
+ /// pid from ~/.modelstat/daemon.lock, for stopping an adopted daemon.
391
+ private nonisolated static func readLockOwnerPid() -> pid_t? {
392
+ let path = ("~/.modelstat/daemon.lock" as NSString).expandingTildeInPath
393
+ guard let data = FileManager.default.contents(atPath: path),
394
+ let obj = try? JSONSerialization.jsonObject(with: data) as? [String: Any],
395
+ let pid = obj["pid"] as? Int, pid > 0
396
+ else { return nil }
397
+ return pid_t(pid)
275
398
  }
276
399
 
277
400
  // ── Live local heartbeat (fast path) ────────────────────────────
@@ -507,7 +630,7 @@ final class TrayController: NSObject {
507
630
  statusMI.title = "Paused"
508
631
  } else {
509
632
  pauseMI.title = "Pause"
510
- startDaemon()
633
+ ensureDaemon()
511
634
  }
512
635
  }
513
636
 
@@ -520,6 +643,7 @@ final class TrayController: NSObject {
520
643
  stopDaemon()
521
644
  fastTimer?.invalidate()
522
645
  slowTimer?.invalidate()
646
+ watchdogTimer?.invalidate()
523
647
  NSApp.terminate(nil)
524
648
  }
525
649
  }