@aexol/spectral 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -61,7 +61,7 @@ export async function performLogin(opts) {
61
61
  }
62
62
  export async function runLogin() {
63
63
  process.stdout.write(pc.bold("Spectral login\n"));
64
- process.stdout.write(pc.dim("Authenticate against the Aexol MCP backend. Credentials are stored at ~/.spectral/config.json (chmod 600).\n\n"));
64
+ process.stdout.write(pc.dim(`Authenticate against the Aexol MCP backend. Credentials are stored at ${getConfigFile()} (chmod 600).\n\n`));
65
65
  const defaultUrl = process.env.SPECTRAL_MCP_URL ?? DEFAULT_API_URL;
66
66
  let apiUrl;
67
67
  let teamApiKey;
@@ -192,7 +192,9 @@ export async function runServe(opts = {}) {
192
192
  const relay = new RelayClient({
193
193
  relayUrl,
194
194
  machineJwt: registration.record.machineJwt,
195
+ backendUrl,
195
196
  webSocketImpl: opts.webSocketImpl,
197
+ fetchImpl: opts.fetchImpl,
196
198
  logger: silent ? { log: () => { }, warn: () => { }, error: () => { } } : console,
197
199
  });
198
200
  // Wire the meta publisher now that we have both the relay socket and
@@ -15,7 +15,7 @@
15
15
  * backend is unreachable we log a warning and return, leaving pi to start
16
16
  * without Aexol tools rather than crashing the whole agent.
17
17
  */
18
- import { getApiUrl, readConfig } from "../config.js";
18
+ import { getApiUrl, getConfigFile, readConfig } from "../config.js";
19
19
  import { AexolMcpClient, AexolMcpError } from "../mcp-client.js";
20
20
  /**
21
21
  * Render a backend tool result into a single string for pi.
@@ -58,7 +58,7 @@ export default async function aexolMcpExtension(pi) {
58
58
  const cfg = await readConfig();
59
59
  if (!cfg) {
60
60
  // Pre-flight in cli.ts should have caught this. Logging is enough.
61
- process.stderr.write("[aexol-mcp] No ~/.spectral/config.json found; Aexol tools disabled. Run `spectral login`.\n");
61
+ process.stderr.write(`[aexol-mcp] No config found at ${getConfigFile()}; Aexol tools disabled. Run \`spectral login\`.\n`);
62
62
  return;
63
63
  }
64
64
  const apiUrl = getApiUrl(cfg.apiUrl);
@@ -2,26 +2,30 @@
2
2
  * RelayClient — long-lived WebSocket connection to the Aexol backend's
3
3
  * `/agent-connection` endpoint.
4
4
  *
5
- * Responsibilities (Batch 2):
5
+ * Responsibilities:
6
6
  * - Open and maintain a single WS to the relay, authenticated with the
7
7
  * machine JWT via `Authorization: Bearer <jwt>`.
8
8
  * - Reply `{kind:"pong"}` to backend `{kind:"ping"}`. Backend closes
9
9
  * `4408 heartbeat-timeout` if it doesn't hear from us within 90s; we
10
10
  * rely on the backend pings rather than emitting our own (single source
11
11
  * of liveness, no jitter on our side).
12
+ * - **Watchdog timer:** tracks `lastActivityMs` on every received frame /
13
+ * ping. Every 15 s, if no activity within `WATCHDOG_MS` (120 s = 2×
14
+ * backend timeout), force-closes the socket (4001 "watchdog-timeout")
15
+ * and triggers a reconnect. Detects silent backend death (Docker OOM
16
+ * kill, network partition without TCP RST) where the socket stays open
17
+ * but no data flows.
18
+ * - **Pre-reconnect health check:** before opening a new WS, does a quick
19
+ * HTTP GET to `backendUrl/health` with a 5 s timeout. If the backend is
20
+ * unhealthy or unreachable, skips the WS attempt and re-schedules. This
21
+ * avoids a slow TCP connect timeout (up to 75 s on some platforms) when
22
+ * the backend is down.
12
23
  * - On unexpected close, reconnect forever with exponential backoff +
13
24
  * ±20% jitter, capped at 30s. There is no "give up" state — if the
14
25
  * machine is offline for hours, we just keep trying. Operators can
15
26
  * `Ctrl-C` to stop.
16
27
  * - Buffer outbound frames in a small queue while the socket is closed.
17
- * Capped at 100 frames; oldest is dropped on overflow. Until Batch 3
18
- * introduces relay envelopes there's nothing meaningful to send anyway,
19
- * so this is mostly future-proofing.
20
- *
21
- * Out of scope for Batch 2 (Batch 3 work):
22
- * - Envelope routing (`rest_request` / `subscribe` / `ws_event`). The
23
- * client emits `frame` for every non-pong frame; the dispatcher in
24
- * `serve.ts` (currently just an ack/echo) will own translation.
28
+ * Capped at 100 frames; oldest is dropped on overflow.
25
29
  *
26
30
  * Events (typed via `RelayClientEvents`):
27
31
  * - `open` - connected and welcomed
@@ -42,10 +46,24 @@ const SEND_QUEUE_CAP = 100;
42
46
  const RECONNECT_SCHEDULE = [1000, 2000, 5000, 15000, 30000];
43
47
  /** ±20% jitter on each scheduled delay. */
44
48
  const JITTER_RATIO = 0.2;
49
+ /**
50
+ * Watchdog interval (ms). Every `WATCHDOG_INTERVAL_MS` we check whether
51
+ * the socket has been silent longer than `WATCHDOG_SILENCE_MS`.
52
+ */
53
+ const WATCHDOG_INTERVAL_MS = 15_000;
54
+ /**
55
+ * Max silence before triggering watchdog reconnect (ms). 2× the backend's
56
+ * 90 s heartbeat timeout = 120 s.
57
+ */
58
+ const WATCHDOG_SILENCE_MS = 120_000;
59
+ /** HTTP timeout for the pre-reconnect health check. */
60
+ const HEALTH_CHECK_TIMEOUT_MS = 5_000;
45
61
  export class RelayClient extends EventEmitter {
46
62
  relayUrl;
47
63
  machineJwt;
64
+ backendUrl;
48
65
  WS;
66
+ fetchImpl;
49
67
  logger;
50
68
  exit;
51
69
  ws = null;
@@ -53,11 +71,16 @@ export class RelayClient extends EventEmitter {
53
71
  reconnectAttempt = 0;
54
72
  reconnectTimer = null;
55
73
  sendQueue = [];
74
+ /** Timestamp of last received frame / ping — drives the watchdog. */
75
+ lastActivityMs = 0;
76
+ watchdogTimer = null;
56
77
  constructor(opts) {
57
78
  super();
58
79
  this.relayUrl = opts.relayUrl;
59
80
  this.machineJwt = opts.machineJwt;
81
+ this.backendUrl = opts.backendUrl;
60
82
  this.WS = opts.webSocketImpl ?? WebSocket;
83
+ this.fetchImpl = opts.fetchImpl ?? fetch;
61
84
  this.logger = opts.logger ?? console;
62
85
  this.exit = opts.exit ?? ((code) => process.exit(code));
63
86
  }
@@ -111,6 +134,10 @@ export class RelayClient extends EventEmitter {
111
134
  clearTimeout(this.reconnectTimer);
112
135
  this.reconnectTimer = null;
113
136
  }
137
+ if (this.watchdogTimer) {
138
+ clearInterval(this.watchdogTimer);
139
+ this.watchdogTimer = null;
140
+ }
114
141
  const ws = this.ws;
115
142
  this.ws = null;
116
143
  if (ws) {
@@ -131,7 +158,13 @@ export class RelayClient extends EventEmitter {
131
158
  headers: { Authorization: `Bearer ${this.machineJwt}` },
132
159
  });
133
160
  this.ws = ws;
161
+ // Seed the watchdog timer so a hanging connect handshake also triggers
162
+ // a timeout (no on("open") to bump activity). If the handshake succeeds
163
+ // quickly, `on("open")` resets this.
164
+ this.lastActivityMs = Date.now();
165
+ this.startWatchdog();
134
166
  ws.on("open", () => {
167
+ this.lastActivityMs = Date.now();
135
168
  this.reconnectAttempt = 0;
136
169
  // Flush any queued frames.
137
170
  const queued = this.sendQueue;
@@ -150,6 +183,7 @@ export class RelayClient extends EventEmitter {
150
183
  this.emit("open");
151
184
  });
152
185
  ws.on("message", (data) => {
186
+ this.lastActivityMs = Date.now();
153
187
  let parsed;
154
188
  try {
155
189
  parsed = JSON.parse(data.toString());
@@ -182,6 +216,7 @@ export class RelayClient extends EventEmitter {
182
216
  });
183
217
  ws.on("close", (code, reason) => {
184
218
  this.ws = null;
219
+ this.stopWatchdog();
185
220
  const reasonStr = reason?.toString() ?? "";
186
221
  this.emit("close", { code, reason: reasonStr });
187
222
  if (this.disposed)
@@ -202,11 +237,93 @@ export class RelayClient extends EventEmitter {
202
237
  this.scheduleReconnect();
203
238
  });
204
239
  }
205
- scheduleReconnect() {
240
+ // --- watchdog -------------------------------------------------------------
241
+ startWatchdog() {
242
+ if (this.watchdogTimer)
243
+ return;
244
+ this.watchdogTimer = setInterval(() => {
245
+ if (this.disposed)
246
+ return;
247
+ const elapsed = Date.now() - this.lastActivityMs;
248
+ if (elapsed > WATCHDOG_SILENCE_MS) {
249
+ this.logger.warn(`Watchdog: no relay activity for ${Math.round(elapsed / 1000)}s, forcing reconnect`);
250
+ const ws = this.ws;
251
+ this.ws = null;
252
+ if (ws) {
253
+ try {
254
+ ws.close(4001, "watchdog-timeout");
255
+ }
256
+ catch {
257
+ // ignore
258
+ }
259
+ }
260
+ }
261
+ }, WATCHDOG_INTERVAL_MS);
262
+ }
263
+ stopWatchdog() {
264
+ if (this.watchdogTimer) {
265
+ clearInterval(this.watchdogTimer);
266
+ this.watchdogTimer = null;
267
+ }
268
+ }
269
+ /**
270
+ * Pre-reconnect health check: quick HTTP GET to `<backendUrl>/health`.
271
+ * Returns true when the backend is reachable (or when no backendUrl is
272
+ * configured, which skips the check entirely). Returns false when the
273
+ * backend is unhealthy/down — the caller should re-schedule.
274
+ */
275
+ async healthCheck() {
276
+ if (!this.backendUrl)
277
+ return true; // no backend → no health check
278
+ const url = `${this.backendUrl.replace(/\/$/, "")}/health`;
279
+ try {
280
+ const ctrl = new AbortController();
281
+ const timeout = setTimeout(() => ctrl.abort(), HEALTH_CHECK_TIMEOUT_MS);
282
+ const res = await this.fetchImpl(url, {
283
+ method: "GET",
284
+ signal: ctrl.signal,
285
+ });
286
+ clearTimeout(timeout);
287
+ if (!res.ok) {
288
+ this.logger.warn(`Health check: ${url} returned ${res.status}, will retry`);
289
+ return false;
290
+ }
291
+ // Backend is reachable — proceed to WS.
292
+ return true;
293
+ }
294
+ catch (err) {
295
+ // AbortError (timeout), fetch error (connection refused, DNS, etc.)
296
+ // all mean the backend is not reachable. Log compactly and retry.
297
+ const msg = err?.name === "AbortError"
298
+ ? "timeout"
299
+ : (err instanceof Error ? err.message : String(err));
300
+ this.logger.warn(`Health check: ${url} unreachable (${msg}), will retry`);
301
+ return false;
302
+ }
303
+ }
304
+ async scheduleReconnect() {
206
305
  if (this.disposed)
207
306
  return;
208
307
  if (this.reconnectTimer)
209
308
  return;
309
+ // Pre-reconnect health check — avoids wasting time on a slow TCP
310
+ // connect when the backend is down. The health endpoint is a cheap
311
+ // HTTP GET; if even that fails, skip the WS attempt entirely.
312
+ const healthy = await this.healthCheck();
313
+ if (!healthy) {
314
+ // Backend unreachable — use a fixed 5 s retry instead of advancing
315
+ // the backoff schedule (this is an environment problem, not a WS
316
+ // handshake problem).
317
+ const idx = Math.min(this.reconnectAttempt, RECONNECT_SCHEDULE.length - 1);
318
+ const delay = RECONNECT_SCHEDULE[idx];
319
+ this.reconnectAttempt++;
320
+ this.emit("reconnect-scheduled", { delayMs: delay, attempt: this.reconnectAttempt });
321
+ this.reconnectTimer = setTimeout(() => {
322
+ this.reconnectTimer = null;
323
+ void this.scheduleReconnect().catch(() => { });
324
+ }, delay);
325
+ return;
326
+ }
210
327
  const idx = Math.min(this.reconnectAttempt, RECONNECT_SCHEDULE.length - 1);
211
328
  const base = RECONNECT_SCHEDULE[idx];
212
329
  const jitter = base * JITTER_RATIO * (Math.random() * 2 - 1);
@@ -233,7 +350,7 @@ export class RelayClient extends EventEmitter {
233
350
  // URL, etc.) must not kill the daemon — log it and re-schedule
234
351
  // so the backoff continues forever.
235
352
  this.emit("error", err);
236
- this.scheduleReconnect();
353
+ void this.scheduleReconnect().catch(() => { });
237
354
  }
238
355
  }, delay);
239
356
  }
@@ -125,6 +125,60 @@ function resolveMcpAdapterEntry() {
125
125
  }
126
126
  return null;
127
127
  }
128
+ /**
129
+ * Token pricing per model (USD per 1M tokens). Matches provider list
130
+ * prices as of May 2026. Used to compute token cost server-side when
131
+ * pi's own cost field is unavailable (synthetic proxy models are
132
+ * registered with zero cost to avoid pi-side billing).
133
+ *
134
+ * Keys are matched as prefix substrings against modelId, so
135
+ * `"claude-sonnet-4"` covers both `claude-sonnet-4-20250514` and any
136
+ * future point-release.
137
+ */
138
+ const MODEL_PRICING = [
139
+ // Anthropic Claude models
140
+ { prefix: "claude-opus-4", input: 15, output: 75, cacheWrite: 18.75, cacheRead: 1.50 },
141
+ { prefix: "claude-sonnet-4", input: 3, output: 15, cacheWrite: 3.75, cacheRead: 0.30 },
142
+ { prefix: "claude-3-5-sonnet", input: 3, output: 15, cacheWrite: 3.75, cacheRead: 0.30 },
143
+ { prefix: "claude-3-5-haiku", input: 0.80, output: 4, cacheWrite: 1, cacheRead: 0.08 },
144
+ { prefix: "claude-3-opus", input: 15, output: 75, cacheWrite: 18.75, cacheRead: 1.50 },
145
+ { prefix: "claude-3-sonnet", input: 3, output: 15, cacheWrite: 3.75, cacheRead: 0.30 },
146
+ { prefix: "claude-3-haiku", input: 0.25, output: 1.25, cacheWrite: 1.25, cacheRead: 0.025 },
147
+ // OpenAI models
148
+ { prefix: "gpt-4.1", input: 2, output: 8, cacheWrite: 8, cacheRead: 0.50 },
149
+ { prefix: "gpt-4o", input: 2.50, output: 10, cacheWrite: 10, cacheRead: 1.25 },
150
+ { prefix: "gpt-4-turbo", input: 10, output: 30, cacheWrite: 0, cacheRead: 0 },
151
+ { prefix: "gpt-4", input: 30, output: 60, cacheWrite: 0, cacheRead: 0 },
152
+ { prefix: "gpt-3.5-turbo", input: 0.50, output: 1.50, cacheWrite: 0, cacheRead: 0 },
153
+ { prefix: "o1", input: 15, output: 60, cacheWrite: 0, cacheRead: 0 },
154
+ { prefix: "o3-mini", input: 1.10, output: 4.40, cacheWrite: 0, cacheRead: 0 },
155
+ { prefix: "o4-mini", input: 1.10, output: 4.40, cacheWrite: 0, cacheRead: 0 },
156
+ // Google Gemini models
157
+ { prefix: "gemini-2.5-pro", input: 1.25, output: 10, cacheWrite: 0, cacheRead: 0 },
158
+ { prefix: "gemini-2.5-flash", input: 0.15, output: 0.60, cacheWrite: 0, cacheRead: 0 },
159
+ // DeepSeek models
160
+ { prefix: "deepseek-v3", input: 0.27, output: 1.10, cacheWrite: 0, cacheRead: 0 },
161
+ { prefix: "deepseek-r1", input: 0.55, output: 2.19, cacheWrite: 0, cacheRead: 0 },
162
+ { prefix: "deepseek/deepseek-v3", input: 0.27, output: 1.10, cacheWrite: 0, cacheRead: 0 },
163
+ { prefix: "deepseek/deepseek-r1", input: 0.55, output: 2.19, cacheWrite: 0, cacheRead: 0 },
164
+ // Meta Llama models (common via OpenRouter-compatible endpoints)
165
+ { prefix: "meta-llama/llama-4", input: 0.20, output: 0.80, cacheWrite: 0, cacheRead: 0 },
166
+ { prefix: "meta-llama/llama-3.3", input: 0.20, output: 0.50, cacheWrite: 0, cacheRead: 0 },
167
+ ];
168
+ /** Look up pricing for a modelId. Returns null when unknown. */
169
+ function lookupPricing(modelId) {
170
+ for (const entry of MODEL_PRICING) {
171
+ if (modelId.startsWith(entry.prefix)) {
172
+ return {
173
+ input: entry.input,
174
+ output: entry.output,
175
+ cacheWrite: entry.cacheWrite,
176
+ cacheRead: entry.cacheRead,
177
+ };
178
+ }
179
+ }
180
+ return null;
181
+ }
128
182
  export class PiBridge {
129
183
  session;
130
184
  unsubscribe;
@@ -316,26 +370,30 @@ export class PiBridge {
316
370
  apiKey: this.opts.machineJwt,
317
371
  authHeader: true,
318
372
  api: "anthropic-messages",
319
- models: anthropicModels.map((m) => ({
320
- id: m.modelId,
321
- name: m.displayName,
322
- api: "anthropic-messages",
323
- // Pin provider/baseUrl explicitly so pi's ModelRegistry doesn't
324
- // auto-derive `provider` from a slash-prefixed id (e.g. treating
325
- // `deepseek/deepseek-v4-pro` as provider `"deepseek"`), which would
326
- // make `hasConfiguredAuth(model)` look up the wrong provider key
327
- // and surface "No API key for deepseek/...". Both must point back
328
- // at our synthetic proxy provider so auth resolves to the machine JWT.
329
- provider: SPECTRAL_PROXY_ANTHROPIC,
330
- baseUrl,
331
- reasoning: false,
332
- input: ["text", "image"],
333
- // The cost block is required by pi's typing but unused for routing;
334
- // the backend enforces real billing/limits server-side, not pi.
335
- cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
336
- contextWindow: 0,
337
- maxTokens: 0,
338
- })),
373
+ models: anthropicModels.map((m) => {
374
+ const pricing = lookupPricing(m.modelId);
375
+ return {
376
+ id: m.modelId,
377
+ name: m.displayName,
378
+ api: "anthropic-messages",
379
+ // Pin provider/baseUrl explicitly so pi's ModelRegistry doesn't
380
+ // auto-derive `provider` from a slash-prefixed id (e.g. treating
381
+ // `deepseek/deepseek-v4-pro` as provider `"deepseek"`), which would
382
+ // make `hasConfiguredAuth(model)` look up the wrong provider key
383
+ // and surface "No API key for deepseek/...". Both must point back
384
+ // at our synthetic proxy provider so auth resolves to the machine JWT.
385
+ provider: SPECTRAL_PROXY_ANTHROPIC,
386
+ baseUrl,
387
+ reasoning: false,
388
+ input: ["text", "image"],
389
+ // Real pricing so pi can compute accurate token costs.
390
+ cost: pricing
391
+ ? { input: pricing.input, output: pricing.output, cacheRead: pricing.cacheRead, cacheWrite: pricing.cacheWrite }
392
+ : { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
393
+ contextWindow: 0,
394
+ maxTokens: 0,
395
+ };
396
+ }),
339
397
  });
340
398
  }
341
399
  if (openaiCompatModels.length > 0) {
@@ -344,22 +402,28 @@ export class PiBridge {
344
402
  apiKey: this.opts.machineJwt,
345
403
  authHeader: true,
346
404
  api: "openai-completions",
347
- models: openaiCompatModels.map((m) => ({
348
- id: m.modelId,
349
- name: m.displayName,
350
- api: "openai-completions",
351
- // See anthropic batch above for rationale — without these, pi
352
- // auto-derives `provider` from slash-prefixed ids like
353
- // `deepseek/deepseek-v4-pro` or `meta-llama/llama-3.3-70b-instruct`,
354
- // breaking auth lookup against our synthetic proxy provider.
355
- provider: SPECTRAL_PROXY_OPENAI,
356
- baseUrl,
357
- reasoning: false,
358
- input: ["text", "image"],
359
- cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
360
- contextWindow: 0,
361
- maxTokens: 0,
362
- })),
405
+ models: openaiCompatModels.map((m) => {
406
+ const pricing = lookupPricing(m.modelId);
407
+ return {
408
+ id: m.modelId,
409
+ name: m.displayName,
410
+ api: "openai-completions",
411
+ // See anthropic batch above for rationale — without these, pi
412
+ // auto-derives `provider` from slash-prefixed ids like
413
+ // `deepseek/deepseek-v4-pro` or `meta-llama/llama-3.3-70b-instruct`,
414
+ // breaking auth lookup against our synthetic proxy provider.
415
+ provider: SPECTRAL_PROXY_OPENAI,
416
+ baseUrl,
417
+ reasoning: false,
418
+ input: ["text", "image"],
419
+ // Real pricing so pi can compute accurate token costs.
420
+ cost: pricing
421
+ ? { input: pricing.input, output: pricing.output, cacheRead: pricing.cacheRead, cacheWrite: pricing.cacheWrite }
422
+ : { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
423
+ contextWindow: 0,
424
+ maxTokens: 0,
425
+ };
426
+ }),
363
427
  });
364
428
  }
365
429
  // Built-in UserModel entries — custom models registered by the team.
@@ -373,18 +437,23 @@ export class PiBridge {
373
437
  apiKey: this.opts.machineJwt,
374
438
  authHeader: true,
375
439
  api: "openai-completions",
376
- models: userModelEntries.map((m) => ({
377
- id: m.modelId,
378
- name: m.displayName,
379
- api: "openai-completions",
380
- provider: SPECTRAL_PROXY_USER_MODEL,
381
- baseUrl,
382
- reasoning: false,
383
- input: ["text", "image"],
384
- cost: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
385
- contextWindow: 0,
386
- maxTokens: 0,
387
- })),
440
+ models: userModelEntries.map((m) => {
441
+ const pricing = lookupPricing(m.modelId);
442
+ return {
443
+ id: m.modelId,
444
+ name: m.displayName,
445
+ api: "openai-completions",
446
+ provider: SPECTRAL_PROXY_USER_MODEL,
447
+ baseUrl,
448
+ reasoning: false,
449
+ input: ["text", "image"],
450
+ cost: pricing
451
+ ? { input: pricing.input, output: pricing.output, cacheRead: pricing.cacheRead, cacheWrite: pricing.cacheWrite }
452
+ : { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
453
+ contextWindow: 0,
454
+ maxTokens: 0,
455
+ };
456
+ }),
388
457
  });
389
458
  }
390
459
  }
@@ -651,6 +720,26 @@ export class PiBridge {
651
720
  const endEvent = { type: "message_end", messageId };
652
721
  this.pending.wireEvents.push(endEvent);
653
722
  this.opts.emit(endEvent);
723
+ // Emit token usage for this assistant message. pi provides token
724
+ // counts via ev.message.usage; cost is computed from the model's
725
+ // configured pricing (or null when unavailable).
726
+ const usage = ev.message.usage;
727
+ if (usage) {
728
+ const usageEvent = {
729
+ type: "token_usage",
730
+ messageId,
731
+ usage: {
732
+ inputTokens: usage.input ?? 0,
733
+ outputTokens: usage.output ?? 0,
734
+ cacheReadTokens: usage.cacheRead ?? 0,
735
+ cacheWriteTokens: usage.cacheWrite ?? 0,
736
+ totalTokens: usage.totalTokens ?? 0,
737
+ cost: usage.cost?.total ?? null,
738
+ },
739
+ };
740
+ this.pending.wireEvents.push(usageEvent);
741
+ this.opts.emit(usageEvent);
742
+ }
654
743
  // Defer persistence: keep `this.pending` alive so tool events that
655
744
  // arrive after `message_end` (pi fires tool_execution_* events
656
745
  // BETWEEN messages) are buffered into `pending.wireEvents`. We store
@@ -42,6 +42,12 @@ import { generateSessionTitle, isDefaultTitle, } from "./title-generator.js";
42
42
  const DEFAULT_BRIDGE_FACTORY = (args) => new PiBridge(args);
43
43
  /** Safety limit for autonomous loop iterations per session. */
44
44
  const MAX_LOOP_ITERATIONS = 100;
45
+ /**
46
+ * Number of accumulated wire events before flushing the in-flight turn
47
+ * to SQLite. Batch-persisting means a server crash mid-turn only loses
48
+ * at most the last `BATCH_FLUSH_INTERVAL` events, not the entire turn.
49
+ */
50
+ const BATCH_FLUSH_INTERVAL = 10;
45
51
  /** Marker the agent emits in its response to signal the task is complete. */
46
52
  const LOOP_DONE_MARKER = "<LOOP_DONE>";
47
53
  export class SessionStreamManager {
@@ -361,6 +367,10 @@ export class SessionStreamManager {
361
367
  // instead of hanging. Also prevents the next prompt() from waiting on
362
368
  // a dead bridge's ready promise.
363
369
  stream.startError = new Error("Turn cancelled");
370
+ // Clear batch-persist tracking so the next turn doesn't accidentally
371
+ // flush events against a stale messageId.
372
+ stream.currentMessageId = null;
373
+ stream.lastFlushedEventCount = 0;
364
374
  // Broadcast agent_end so all subscribers close their open turn and
365
375
  // re-enable their composers.
366
376
  if (stream.currentTurn) {
@@ -385,6 +395,9 @@ export class SessionStreamManager {
385
395
  if (!stream)
386
396
  return;
387
397
  stream.loopActive = false;
398
+ // Flush the last batch of in-flight events before tearing down, so a
399
+ // shutdown / GC doesn't lose events that haven't hit the interval yet.
400
+ this.flushInFlightTurn(stream);
388
401
  try {
389
402
  stream.bridge.dispose();
390
403
  }
@@ -460,6 +473,8 @@ export class SessionStreamManager {
460
473
  startError: null,
461
474
  subscribers: new Set(),
462
475
  currentTurn: null,
476
+ currentMessageId: null,
477
+ lastFlushedEventCount: 0,
463
478
  loopActive: false,
464
479
  loopIterationCount: 0,
465
480
  loopOriginalPrompt: null,
@@ -523,6 +538,33 @@ export class SessionStreamManager {
523
538
  if (event.type === "text_delta") {
524
539
  stream.currentTurn.assistantText += event.delta;
525
540
  }
541
+ // Track message lifecycle for batch persistence.
542
+ if (event.type === "message_start") {
543
+ stream.currentMessageId = event.messageId;
544
+ stream.lastFlushedEventCount = 0;
545
+ // Stub-insert so the server can recover this message on restart even
546
+ // if it crashes before the first batch flush. `INSERT OR REPLACE`
547
+ // ensures the final `onAssistantMessageComplete` write wins.
548
+ try {
549
+ this.store.appendMessage(stream.sessionId, {
550
+ id: event.messageId,
551
+ role: "assistant",
552
+ content: "",
553
+ eventsJsonl: "",
554
+ createdAt: Date.now(),
555
+ });
556
+ }
557
+ catch (err) {
558
+ console.error(`[spectral] error: batch-persist stub insert failed: ${err instanceof Error ? err.message : String(err)}`);
559
+ }
560
+ }
561
+ // Batch-persist: flush accumulated events to SQLite every N events so a
562
+ // server crash mid-turn only loses the last batch, not the entire turn.
563
+ if (stream.currentMessageId &&
564
+ stream.currentTurn &&
565
+ stream.currentTurn.events.length - stream.lastFlushedEventCount >= BATCH_FLUSH_INTERVAL) {
566
+ this.flushInFlightTurn(stream);
567
+ }
526
568
  }
527
569
  // Broadcast first, then maybe close out the turn. agent_end clears the
528
570
  // buffer because by that point the assistant message is already in
@@ -530,6 +572,12 @@ export class SessionStreamManager {
530
572
  // which fires before agent_end).
531
573
  this.broadcast(stream, event);
532
574
  if (event.type === "agent_end") {
575
+ // Final flush + clear batch-persist tracking. `onAssistantMessageComplete`
576
+ // has already written the authoritative final row to SQLite (it fires on
577
+ // `message_end`, which precedes `agent_end`), so the staged row is already
578
+ // replaced with complete data. We just zero out the in-memory trackers.
579
+ stream.currentMessageId = null;
580
+ stream.lastFlushedEventCount = 0;
533
581
  const finishedTurn = stream.currentTurn;
534
582
  stream.currentTurn = null;
535
583
  // Fire-and-forget auto-title generation. Runs only once per session
@@ -584,10 +632,48 @@ export class SessionStreamManager {
584
632
  // An error event arriving outside a turn (or bubbling out of one) —
585
633
  // discard partial buffer to avoid replaying half a turn that the
586
634
  // client has already shown an error for. The error event itself is
587
- // still broadcast above.
635
+ // still broadcast above. Also clear batch-persist tracking so the
636
+ // next `message_start` starts a fresh sequence.
637
+ stream.currentMessageId = null;
638
+ stream.lastFlushedEventCount = 0;
588
639
  stream.currentTurn = null;
589
640
  }
590
641
  }
642
+ /**
643
+ * Flush the current in-flight turn's events to SQLite for crash recovery.
644
+ * Only the events accumulated since the last flush are written — we append
645
+ * them to the already-stored JSONL via INSERT OR REPLACE. Called every
646
+ * `BATCH_FLUSH_INTERVAL` events from `handleBridgeEvent`.
647
+ *
648
+ * Errors are caught, logged, and swallowed: batch persistence is a
649
+ * best-effort hardening, never a failure path that should block the stream.
650
+ */
651
+ flushInFlightTurn(stream) {
652
+ const turn = stream.currentTurn;
653
+ const messageId = stream.currentMessageId;
654
+ if (!turn || !messageId)
655
+ return;
656
+ const newEvents = turn.events.slice(stream.lastFlushedEventCount);
657
+ if (newEvents.length === 0)
658
+ return;
659
+ try {
660
+ // Build JSONL from all events (already-flushed + new) so the row is
661
+ // always a complete, self-consistent snapshot. Older batches are
662
+ // included so history rehydration doesn't need to stitch fragments.
663
+ const eventsJsonl = turn.events.map((e) => JSON.stringify(e)).join("\n");
664
+ this.store.appendMessage(stream.sessionId, {
665
+ id: messageId,
666
+ role: "assistant",
667
+ content: turn.assistantText,
668
+ eventsJsonl,
669
+ createdAt: turn.startedAt,
670
+ });
671
+ stream.lastFlushedEventCount = turn.events.length;
672
+ }
673
+ catch (err) {
674
+ console.error(`[spectral] error: batch-persist flush failed: ${err instanceof Error ? err.message : String(err)}`);
675
+ }
676
+ }
591
677
  /**
592
678
  * Auto-title the session if it's still wearing the default title and we
593
679
  * haven't already attempted generation in this process. Fire-and-forget
@@ -706,6 +792,7 @@ function isReplayable(event) {
706
792
  event.type === "tool_call" ||
707
793
  event.type === "tool_result" ||
708
794
  event.type === "message_end" ||
795
+ event.type === "token_usage" ||
709
796
  event.type === "error");
710
797
  }
711
798
  function snapshotTurn(turn) {
@@ -29,6 +29,7 @@ import Database from "better-sqlite3";
29
29
  import { randomUUID } from "node:crypto";
30
30
  import { mkdirSync, readFileSync } from "node:fs";
31
31
  import { dirname, join } from "node:path";
32
+ import { getConfigDir } from "../config.js";
32
33
  import { stripJsoncComments } from "../studio-binding.js";
33
34
  /**
34
35
  * Schema version. Bump + the on-open migration drops & recreates every table.
@@ -262,7 +263,7 @@ export class SessionStore {
262
263
  this.stmtDeleteSession = this.db.prepare(`DELETE FROM sessions WHERE id = ?`);
263
264
  this.stmtListMessages = this.db.prepare(`SELECT id, session_id, role, content, events_jsonl, images_json, created_at
264
265
  FROM messages WHERE session_id = ? ORDER BY created_at ASC, id ASC`);
265
- this.stmtAppendMessage = this.db.prepare(`INSERT INTO messages (id, session_id, role, content, events_jsonl, images_json, created_at)
266
+ this.stmtAppendMessage = this.db.prepare(`INSERT OR REPLACE INTO messages (id, session_id, role, content, events_jsonl, images_json, created_at)
266
267
  VALUES (?, ?, ?, ?, ?, ?, ?)`);
267
268
  this.stmtTouchSession = this.db.prepare(`UPDATE sessions SET updated_at = ? WHERE id = ?`);
268
269
  this.stmtRenameSession = this.db.prepare(`UPDATE sessions SET title = ?, updated_at = ? WHERE id = ?`);
@@ -527,7 +528,7 @@ export function preflightSqlite(dbPath) {
527
528
  ok: false,
528
529
  error: `Failed to load native sqlite module (${msg}).\n` +
529
530
  ` This usually means the native binary couldn't be built for your Node.js version.\n` +
530
- ` Try: cd ~/.spectral && npm rebuild better-sqlite3\n` +
531
+ ` Try: cd ${getConfigDir()} && npm rebuild better-sqlite3\n` +
531
532
  ` Or reinstall: npm install -g @aexol/spectral`,
532
533
  };
533
534
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@aexol/spectral",
3
- "version": "0.2.0",
3
+ "version": "0.2.2",
4
4
  "description": "Always-on coding agent for Aexol — branded pi wrapper with relay-based browser access.",
5
5
  "type": "module",
6
6
  "private": false,