@aexol/spectral 0.2.0 → 0.2.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/commands/login.js +1 -1
- package/dist/commands/serve.js +2 -0
- package/dist/extensions/aexol-mcp.js +2 -2
- package/dist/relay/client.js +128 -11
- package/dist/server/pi-bridge.js +137 -48
- package/dist/server/session-stream.js +88 -1
- package/dist/server/storage.js +3 -2
- package/package.json +1 -1
package/dist/commands/login.js
CHANGED
|
@@ -61,7 +61,7 @@ export async function performLogin(opts) {
|
|
|
61
61
|
}
|
|
62
62
|
export async function runLogin() {
|
|
63
63
|
process.stdout.write(pc.bold("Spectral login\n"));
|
|
64
|
-
process.stdout.write(pc.dim(
|
|
64
|
+
process.stdout.write(pc.dim(`Authenticate against the Aexol MCP backend. Credentials are stored at ${getConfigFile()} (chmod 600).\n\n`));
|
|
65
65
|
const defaultUrl = process.env.SPECTRAL_MCP_URL ?? DEFAULT_API_URL;
|
|
66
66
|
let apiUrl;
|
|
67
67
|
let teamApiKey;
|
package/dist/commands/serve.js
CHANGED
|
@@ -192,7 +192,9 @@ export async function runServe(opts = {}) {
|
|
|
192
192
|
const relay = new RelayClient({
|
|
193
193
|
relayUrl,
|
|
194
194
|
machineJwt: registration.record.machineJwt,
|
|
195
|
+
backendUrl,
|
|
195
196
|
webSocketImpl: opts.webSocketImpl,
|
|
197
|
+
fetchImpl: opts.fetchImpl,
|
|
196
198
|
logger: silent ? { log: () => { }, warn: () => { }, error: () => { } } : console,
|
|
197
199
|
});
|
|
198
200
|
// Wire the meta publisher now that we have both the relay socket and
|
|
@@ -15,7 +15,7 @@
|
|
|
15
15
|
* backend is unreachable we log a warning and return, leaving pi to start
|
|
16
16
|
* without Aexol tools rather than crashing the whole agent.
|
|
17
17
|
*/
|
|
18
|
-
import { getApiUrl, readConfig } from "../config.js";
|
|
18
|
+
import { getApiUrl, getConfigFile, readConfig } from "../config.js";
|
|
19
19
|
import { AexolMcpClient, AexolMcpError } from "../mcp-client.js";
|
|
20
20
|
/**
|
|
21
21
|
* Render a backend tool result into a single string for pi.
|
|
@@ -58,7 +58,7 @@ export default async function aexolMcpExtension(pi) {
|
|
|
58
58
|
const cfg = await readConfig();
|
|
59
59
|
if (!cfg) {
|
|
60
60
|
// Pre-flight in cli.ts should have caught this. Logging is enough.
|
|
61
|
-
process.stderr.write(
|
|
61
|
+
process.stderr.write(`[aexol-mcp] No config found at ${getConfigFile()}; Aexol tools disabled. Run \`spectral login\`.\n`);
|
|
62
62
|
return;
|
|
63
63
|
}
|
|
64
64
|
const apiUrl = getApiUrl(cfg.apiUrl);
|
package/dist/relay/client.js
CHANGED
|
@@ -2,26 +2,30 @@
|
|
|
2
2
|
* RelayClient — long-lived WebSocket connection to the Aexol backend's
|
|
3
3
|
* `/agent-connection` endpoint.
|
|
4
4
|
*
|
|
5
|
-
* Responsibilities
|
|
5
|
+
* Responsibilities:
|
|
6
6
|
* - Open and maintain a single WS to the relay, authenticated with the
|
|
7
7
|
* machine JWT via `Authorization: Bearer <jwt>`.
|
|
8
8
|
* - Reply `{kind:"pong"}` to backend `{kind:"ping"}`. Backend closes
|
|
9
9
|
* `4408 heartbeat-timeout` if it doesn't hear from us within 90s; we
|
|
10
10
|
* rely on the backend pings rather than emitting our own (single source
|
|
11
11
|
* of liveness, no jitter on our side).
|
|
12
|
+
* - **Watchdog timer:** tracks `lastActivityMs` on every received frame /
|
|
13
|
+
* ping. Every 15 s, if no activity within `WATCHDOG_MS` (120 s = 2×
|
|
14
|
+
* backend timeout), force-closes the socket (4001 "watchdog-timeout")
|
|
15
|
+
* and triggers a reconnect. Detects silent backend death (Docker OOM
|
|
16
|
+
* kill, network partition without TCP RST) where the socket stays open
|
|
17
|
+
* but no data flows.
|
|
18
|
+
* - **Pre-reconnect health check:** before opening a new WS, does a quick
|
|
19
|
+
* HTTP GET to `backendUrl/health` with a 5 s timeout. If the backend is
|
|
20
|
+
* unhealthy or unreachable, skips the WS attempt and re-schedules. This
|
|
21
|
+
* avoids a slow TCP connect timeout (up to 75 s on some platforms) when
|
|
22
|
+
* the backend is down.
|
|
12
23
|
* - On unexpected close, reconnect forever with exponential backoff +
|
|
13
24
|
* ±20% jitter, capped at 30s. There is no "give up" state — if the
|
|
14
25
|
* machine is offline for hours, we just keep trying. Operators can
|
|
15
26
|
* `Ctrl-C` to stop.
|
|
16
27
|
* - Buffer outbound frames in a small queue while the socket is closed.
|
|
17
|
-
* Capped at 100 frames; oldest is dropped on overflow.
|
|
18
|
-
* introduces relay envelopes there's nothing meaningful to send anyway,
|
|
19
|
-
* so this is mostly future-proofing.
|
|
20
|
-
*
|
|
21
|
-
* Out of scope for Batch 2 (Batch 3 work):
|
|
22
|
-
* - Envelope routing (`rest_request` / `subscribe` / `ws_event`). The
|
|
23
|
-
* client emits `frame` for every non-pong frame; the dispatcher in
|
|
24
|
-
* `serve.ts` (currently just an ack/echo) will own translation.
|
|
28
|
+
* Capped at 100 frames; oldest is dropped on overflow.
|
|
25
29
|
*
|
|
26
30
|
* Events (typed via `RelayClientEvents`):
|
|
27
31
|
* - `open` - connected and welcomed
|
|
@@ -42,10 +46,24 @@ const SEND_QUEUE_CAP = 100;
|
|
|
42
46
|
const RECONNECT_SCHEDULE = [1000, 2000, 5000, 15000, 30000];
|
|
43
47
|
/** ±20% jitter on each scheduled delay. */
|
|
44
48
|
const JITTER_RATIO = 0.2;
|
|
49
|
+
/**
|
|
50
|
+
* Watchdog interval (ms). Every `WATCHDOG_INTERVAL_MS` we check whether
|
|
51
|
+
* the socket has been silent longer than `WATCHDOG_SILENCE_MS`.
|
|
52
|
+
*/
|
|
53
|
+
const WATCHDOG_INTERVAL_MS = 15_000;
|
|
54
|
+
/**
|
|
55
|
+
* Max silence before triggering watchdog reconnect (ms). 2× the backend's
|
|
56
|
+
* 90 s heartbeat timeout = 120 s.
|
|
57
|
+
*/
|
|
58
|
+
const WATCHDOG_SILENCE_MS = 120_000;
|
|
59
|
+
/** HTTP timeout for the pre-reconnect health check. */
|
|
60
|
+
const HEALTH_CHECK_TIMEOUT_MS = 5_000;
|
|
45
61
|
export class RelayClient extends EventEmitter {
|
|
46
62
|
relayUrl;
|
|
47
63
|
machineJwt;
|
|
64
|
+
backendUrl;
|
|
48
65
|
WS;
|
|
66
|
+
fetchImpl;
|
|
49
67
|
logger;
|
|
50
68
|
exit;
|
|
51
69
|
ws = null;
|
|
@@ -53,11 +71,16 @@ export class RelayClient extends EventEmitter {
|
|
|
53
71
|
reconnectAttempt = 0;
|
|
54
72
|
reconnectTimer = null;
|
|
55
73
|
sendQueue = [];
|
|
74
|
+
/** Timestamp of last received frame / ping — drives the watchdog. */
|
|
75
|
+
lastActivityMs = 0;
|
|
76
|
+
watchdogTimer = null;
|
|
56
77
|
constructor(opts) {
|
|
57
78
|
super();
|
|
58
79
|
this.relayUrl = opts.relayUrl;
|
|
59
80
|
this.machineJwt = opts.machineJwt;
|
|
81
|
+
this.backendUrl = opts.backendUrl;
|
|
60
82
|
this.WS = opts.webSocketImpl ?? WebSocket;
|
|
83
|
+
this.fetchImpl = opts.fetchImpl ?? fetch;
|
|
61
84
|
this.logger = opts.logger ?? console;
|
|
62
85
|
this.exit = opts.exit ?? ((code) => process.exit(code));
|
|
63
86
|
}
|
|
@@ -111,6 +134,10 @@ export class RelayClient extends EventEmitter {
|
|
|
111
134
|
clearTimeout(this.reconnectTimer);
|
|
112
135
|
this.reconnectTimer = null;
|
|
113
136
|
}
|
|
137
|
+
if (this.watchdogTimer) {
|
|
138
|
+
clearInterval(this.watchdogTimer);
|
|
139
|
+
this.watchdogTimer = null;
|
|
140
|
+
}
|
|
114
141
|
const ws = this.ws;
|
|
115
142
|
this.ws = null;
|
|
116
143
|
if (ws) {
|
|
@@ -131,7 +158,13 @@ export class RelayClient extends EventEmitter {
|
|
|
131
158
|
headers: { Authorization: `Bearer ${this.machineJwt}` },
|
|
132
159
|
});
|
|
133
160
|
this.ws = ws;
|
|
161
|
+
// Seed the watchdog timer so a hanging connect handshake also triggers
|
|
162
|
+
// a timeout (no on("open") to bump activity). If the handshake succeeds
|
|
163
|
+
// quickly, `on("open")` resets this.
|
|
164
|
+
this.lastActivityMs = Date.now();
|
|
165
|
+
this.startWatchdog();
|
|
134
166
|
ws.on("open", () => {
|
|
167
|
+
this.lastActivityMs = Date.now();
|
|
135
168
|
this.reconnectAttempt = 0;
|
|
136
169
|
// Flush any queued frames.
|
|
137
170
|
const queued = this.sendQueue;
|
|
@@ -150,6 +183,7 @@ export class RelayClient extends EventEmitter {
|
|
|
150
183
|
this.emit("open");
|
|
151
184
|
});
|
|
152
185
|
ws.on("message", (data) => {
|
|
186
|
+
this.lastActivityMs = Date.now();
|
|
153
187
|
let parsed;
|
|
154
188
|
try {
|
|
155
189
|
parsed = JSON.parse(data.toString());
|
|
@@ -182,6 +216,7 @@ export class RelayClient extends EventEmitter {
|
|
|
182
216
|
});
|
|
183
217
|
ws.on("close", (code, reason) => {
|
|
184
218
|
this.ws = null;
|
|
219
|
+
this.stopWatchdog();
|
|
185
220
|
const reasonStr = reason?.toString() ?? "";
|
|
186
221
|
this.emit("close", { code, reason: reasonStr });
|
|
187
222
|
if (this.disposed)
|
|
@@ -202,11 +237,93 @@ export class RelayClient extends EventEmitter {
|
|
|
202
237
|
this.scheduleReconnect();
|
|
203
238
|
});
|
|
204
239
|
}
|
|
205
|
-
|
|
240
|
+
// --- watchdog -------------------------------------------------------------
|
|
241
|
+
startWatchdog() {
|
|
242
|
+
if (this.watchdogTimer)
|
|
243
|
+
return;
|
|
244
|
+
this.watchdogTimer = setInterval(() => {
|
|
245
|
+
if (this.disposed)
|
|
246
|
+
return;
|
|
247
|
+
const elapsed = Date.now() - this.lastActivityMs;
|
|
248
|
+
if (elapsed > WATCHDOG_SILENCE_MS) {
|
|
249
|
+
this.logger.warn(`Watchdog: no relay activity for ${Math.round(elapsed / 1000)}s, forcing reconnect`);
|
|
250
|
+
const ws = this.ws;
|
|
251
|
+
this.ws = null;
|
|
252
|
+
if (ws) {
|
|
253
|
+
try {
|
|
254
|
+
ws.close(4001, "watchdog-timeout");
|
|
255
|
+
}
|
|
256
|
+
catch {
|
|
257
|
+
// ignore
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
}
|
|
261
|
+
}, WATCHDOG_INTERVAL_MS);
|
|
262
|
+
}
|
|
263
|
+
stopWatchdog() {
|
|
264
|
+
if (this.watchdogTimer) {
|
|
265
|
+
clearInterval(this.watchdogTimer);
|
|
266
|
+
this.watchdogTimer = null;
|
|
267
|
+
}
|
|
268
|
+
}
|
|
269
|
+
/**
|
|
270
|
+
* Pre-reconnect health check: quick HTTP GET to `<backendUrl>/health`.
|
|
271
|
+
* Returns true when the backend is reachable (or when no backendUrl is
|
|
272
|
+
* configured, which skips the check entirely). Returns false when the
|
|
273
|
+
* backend is unhealthy/down — the caller should re-schedule.
|
|
274
|
+
*/
|
|
275
|
+
async healthCheck() {
|
|
276
|
+
if (!this.backendUrl)
|
|
277
|
+
return true; // no backend → no health check
|
|
278
|
+
const url = `${this.backendUrl.replace(/\/$/, "")}/health`;
|
|
279
|
+
try {
|
|
280
|
+
const ctrl = new AbortController();
|
|
281
|
+
const timeout = setTimeout(() => ctrl.abort(), HEALTH_CHECK_TIMEOUT_MS);
|
|
282
|
+
const res = await this.fetchImpl(url, {
|
|
283
|
+
method: "GET",
|
|
284
|
+
signal: ctrl.signal,
|
|
285
|
+
});
|
|
286
|
+
clearTimeout(timeout);
|
|
287
|
+
if (!res.ok) {
|
|
288
|
+
this.logger.warn(`Health check: ${url} returned ${res.status}, will retry`);
|
|
289
|
+
return false;
|
|
290
|
+
}
|
|
291
|
+
// Backend is reachable — proceed to WS.
|
|
292
|
+
return true;
|
|
293
|
+
}
|
|
294
|
+
catch (err) {
|
|
295
|
+
// AbortError (timeout), fetch error (connection refused, DNS, etc.)
|
|
296
|
+
// all mean the backend is not reachable. Log compactly and retry.
|
|
297
|
+
const msg = err?.name === "AbortError"
|
|
298
|
+
? "timeout"
|
|
299
|
+
: (err instanceof Error ? err.message : String(err));
|
|
300
|
+
this.logger.warn(`Health check: ${url} unreachable (${msg}), will retry`);
|
|
301
|
+
return false;
|
|
302
|
+
}
|
|
303
|
+
}
|
|
304
|
+
async scheduleReconnect() {
|
|
206
305
|
if (this.disposed)
|
|
207
306
|
return;
|
|
208
307
|
if (this.reconnectTimer)
|
|
209
308
|
return;
|
|
309
|
+
// Pre-reconnect health check — avoids wasting time on a slow TCP
|
|
310
|
+
// connect when the backend is down. The health endpoint is a cheap
|
|
311
|
+
// HTTP GET; if even that fails, skip the WS attempt entirely.
|
|
312
|
+
const healthy = await this.healthCheck();
|
|
313
|
+
if (!healthy) {
|
|
314
|
+
// Backend unreachable — use a fixed 5 s retry instead of advancing
|
|
315
|
+
// the backoff schedule (this is an environment problem, not a WS
|
|
316
|
+
// handshake problem).
|
|
317
|
+
const idx = Math.min(this.reconnectAttempt, RECONNECT_SCHEDULE.length - 1);
|
|
318
|
+
const delay = RECONNECT_SCHEDULE[idx];
|
|
319
|
+
this.reconnectAttempt++;
|
|
320
|
+
this.emit("reconnect-scheduled", { delayMs: delay, attempt: this.reconnectAttempt });
|
|
321
|
+
this.reconnectTimer = setTimeout(() => {
|
|
322
|
+
this.reconnectTimer = null;
|
|
323
|
+
void this.scheduleReconnect().catch(() => { });
|
|
324
|
+
}, delay);
|
|
325
|
+
return;
|
|
326
|
+
}
|
|
210
327
|
const idx = Math.min(this.reconnectAttempt, RECONNECT_SCHEDULE.length - 1);
|
|
211
328
|
const base = RECONNECT_SCHEDULE[idx];
|
|
212
329
|
const jitter = base * JITTER_RATIO * (Math.random() * 2 - 1);
|
|
@@ -233,7 +350,7 @@ export class RelayClient extends EventEmitter {
|
|
|
233
350
|
// URL, etc.) must not kill the daemon — log it and re-schedule
|
|
234
351
|
// so the backoff continues forever.
|
|
235
352
|
this.emit("error", err);
|
|
236
|
-
this.scheduleReconnect();
|
|
353
|
+
void this.scheduleReconnect().catch(() => { });
|
|
237
354
|
}
|
|
238
355
|
}, delay);
|
|
239
356
|
}
|
package/dist/server/pi-bridge.js
CHANGED
|
@@ -125,6 +125,60 @@ function resolveMcpAdapterEntry() {
|
|
|
125
125
|
}
|
|
126
126
|
return null;
|
|
127
127
|
}
|
|
128
|
+
/**
|
|
129
|
+
* Token pricing per model (USD per 1M tokens). Matches provider list
|
|
130
|
+
* prices as of May 2026. Used to compute token cost server-side when
|
|
131
|
+
* pi's own cost field is unavailable (synthetic proxy models are
|
|
132
|
+
* registered with zero cost to avoid pi-side billing).
|
|
133
|
+
*
|
|
134
|
+
* Keys are matched as prefix substrings against modelId, so
|
|
135
|
+
* `"claude-sonnet-4"` covers both `claude-sonnet-4-20250514` and any
|
|
136
|
+
* future point-release.
|
|
137
|
+
*/
|
|
138
|
+
const MODEL_PRICING = [
|
|
139
|
+
// Anthropic Claude models
|
|
140
|
+
{ prefix: "claude-opus-4", input: 15, output: 75, cacheWrite: 18.75, cacheRead: 1.50 },
|
|
141
|
+
{ prefix: "claude-sonnet-4", input: 3, output: 15, cacheWrite: 3.75, cacheRead: 0.30 },
|
|
142
|
+
{ prefix: "claude-3-5-sonnet", input: 3, output: 15, cacheWrite: 3.75, cacheRead: 0.30 },
|
|
143
|
+
{ prefix: "claude-3-5-haiku", input: 0.80, output: 4, cacheWrite: 1, cacheRead: 0.08 },
|
|
144
|
+
{ prefix: "claude-3-opus", input: 15, output: 75, cacheWrite: 18.75, cacheRead: 1.50 },
|
|
145
|
+
{ prefix: "claude-3-sonnet", input: 3, output: 15, cacheWrite: 3.75, cacheRead: 0.30 },
|
|
146
|
+
{ prefix: "claude-3-haiku", input: 0.25, output: 1.25, cacheWrite: 1.25, cacheRead: 0.025 },
|
|
147
|
+
// OpenAI models
|
|
148
|
+
{ prefix: "gpt-4.1", input: 2, output: 8, cacheWrite: 8, cacheRead: 0.50 },
|
|
149
|
+
{ prefix: "gpt-4o", input: 2.50, output: 10, cacheWrite: 10, cacheRead: 1.25 },
|
|
150
|
+
{ prefix: "gpt-4-turbo", input: 10, output: 30, cacheWrite: 0, cacheRead: 0 },
|
|
151
|
+
{ prefix: "gpt-4", input: 30, output: 60, cacheWrite: 0, cacheRead: 0 },
|
|
152
|
+
{ prefix: "gpt-3.5-turbo", input: 0.50, output: 1.50, cacheWrite: 0, cacheRead: 0 },
|
|
153
|
+
{ prefix: "o1", input: 15, output: 60, cacheWrite: 0, cacheRead: 0 },
|
|
154
|
+
{ prefix: "o3-mini", input: 1.10, output: 4.40, cacheWrite: 0, cacheRead: 0 },
|
|
155
|
+
{ prefix: "o4-mini", input: 1.10, output: 4.40, cacheWrite: 0, cacheRead: 0 },
|
|
156
|
+
// Google Gemini models
|
|
157
|
+
{ prefix: "gemini-2.5-pro", input: 1.25, output: 10, cacheWrite: 0, cacheRead: 0 },
|
|
158
|
+
{ prefix: "gemini-2.5-flash", input: 0.15, output: 0.60, cacheWrite: 0, cacheRead: 0 },
|
|
159
|
+
// DeepSeek models
|
|
160
|
+
{ prefix: "deepseek-v3", input: 0.27, output: 1.10, cacheWrite: 0, cacheRead: 0 },
|
|
161
|
+
{ prefix: "deepseek-r1", input: 0.55, output: 2.19, cacheWrite: 0, cacheRead: 0 },
|
|
162
|
+
{ prefix: "deepseek/deepseek-v3", input: 0.27, output: 1.10, cacheWrite: 0, cacheRead: 0 },
|
|
163
|
+
{ prefix: "deepseek/deepseek-r1", input: 0.55, output: 2.19, cacheWrite: 0, cacheRead: 0 },
|
|
164
|
+
// Meta Llama models (common via OpenRouter-compatible endpoints)
|
|
165
|
+
{ prefix: "meta-llama/llama-4", input: 0.20, output: 0.80, cacheWrite: 0, cacheRead: 0 },
|
|
166
|
+
{ prefix: "meta-llama/llama-3.3", input: 0.20, output: 0.50, cacheWrite: 0, cacheRead: 0 },
|
|
167
|
+
];
|
|
168
|
+
/** Look up pricing for a modelId. Returns null when unknown. */
|
|
169
|
+
function lookupPricing(modelId) {
|
|
170
|
+
for (const entry of MODEL_PRICING) {
|
|
171
|
+
if (modelId.startsWith(entry.prefix)) {
|
|
172
|
+
return {
|
|
173
|
+
input: entry.input,
|
|
174
|
+
output: entry.output,
|
|
175
|
+
cacheWrite: entry.cacheWrite,
|
|
176
|
+
cacheRead: entry.cacheRead,
|
|
177
|
+
};
|
|
178
|
+
}
|
|
179
|
+
}
|
|
180
|
+
return null;
|
|
181
|
+
}
|
|
128
182
|
export class PiBridge {
|
|
129
183
|
session;
|
|
130
184
|
unsubscribe;
|
|
@@ -316,26 +370,30 @@ export class PiBridge {
|
|
|
316
370
|
apiKey: this.opts.machineJwt,
|
|
317
371
|
authHeader: true,
|
|
318
372
|
api: "anthropic-messages",
|
|
319
|
-
models: anthropicModels.map((m) =>
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
327
|
-
|
|
328
|
-
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
337
|
-
|
|
338
|
-
|
|
373
|
+
models: anthropicModels.map((m) => {
|
|
374
|
+
const pricing = lookupPricing(m.modelId);
|
|
375
|
+
return {
|
|
376
|
+
id: m.modelId,
|
|
377
|
+
name: m.displayName,
|
|
378
|
+
api: "anthropic-messages",
|
|
379
|
+
// Pin provider/baseUrl explicitly so pi's ModelRegistry doesn't
|
|
380
|
+
// auto-derive `provider` from a slash-prefixed id (e.g. treating
|
|
381
|
+
// `deepseek/deepseek-v4-pro` as provider `"deepseek"`), which would
|
|
382
|
+
// make `hasConfiguredAuth(model)` look up the wrong provider key
|
|
383
|
+
// and surface "No API key for deepseek/...". Both must point back
|
|
384
|
+
// at our synthetic proxy provider so auth resolves to the machine JWT.
|
|
385
|
+
provider: SPECTRAL_PROXY_ANTHROPIC,
|
|
386
|
+
baseUrl,
|
|
387
|
+
reasoning: false,
|
|
388
|
+
input: ["text", "image"],
|
|
389
|
+
// Real pricing so pi can compute accurate token costs.
|
|
390
|
+
cost: pricing
|
|
391
|
+
? { input: pricing.input, output: pricing.output, cacheRead: pricing.cacheRead, cacheWrite: pricing.cacheWrite }
|
|
392
|
+
: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
393
|
+
contextWindow: 0,
|
|
394
|
+
maxTokens: 0,
|
|
395
|
+
};
|
|
396
|
+
}),
|
|
339
397
|
});
|
|
340
398
|
}
|
|
341
399
|
if (openaiCompatModels.length > 0) {
|
|
@@ -344,22 +402,28 @@ export class PiBridge {
|
|
|
344
402
|
apiKey: this.opts.machineJwt,
|
|
345
403
|
authHeader: true,
|
|
346
404
|
api: "openai-completions",
|
|
347
|
-
models: openaiCompatModels.map((m) =>
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
405
|
+
models: openaiCompatModels.map((m) => {
|
|
406
|
+
const pricing = lookupPricing(m.modelId);
|
|
407
|
+
return {
|
|
408
|
+
id: m.modelId,
|
|
409
|
+
name: m.displayName,
|
|
410
|
+
api: "openai-completions",
|
|
411
|
+
// See anthropic batch above for rationale — without these, pi
|
|
412
|
+
// auto-derives `provider` from slash-prefixed ids like
|
|
413
|
+
// `deepseek/deepseek-v4-pro` or `meta-llama/llama-3.3-70b-instruct`,
|
|
414
|
+
// breaking auth lookup against our synthetic proxy provider.
|
|
415
|
+
provider: SPECTRAL_PROXY_OPENAI,
|
|
416
|
+
baseUrl,
|
|
417
|
+
reasoning: false,
|
|
418
|
+
input: ["text", "image"],
|
|
419
|
+
// Real pricing so pi can compute accurate token costs.
|
|
420
|
+
cost: pricing
|
|
421
|
+
? { input: pricing.input, output: pricing.output, cacheRead: pricing.cacheRead, cacheWrite: pricing.cacheWrite }
|
|
422
|
+
: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
423
|
+
contextWindow: 0,
|
|
424
|
+
maxTokens: 0,
|
|
425
|
+
};
|
|
426
|
+
}),
|
|
363
427
|
});
|
|
364
428
|
}
|
|
365
429
|
// Built-in UserModel entries — custom models registered by the team.
|
|
@@ -373,18 +437,23 @@ export class PiBridge {
|
|
|
373
437
|
apiKey: this.opts.machineJwt,
|
|
374
438
|
authHeader: true,
|
|
375
439
|
api: "openai-completions",
|
|
376
|
-
models: userModelEntries.map((m) =>
|
|
377
|
-
|
|
378
|
-
|
|
379
|
-
|
|
380
|
-
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
440
|
+
models: userModelEntries.map((m) => {
|
|
441
|
+
const pricing = lookupPricing(m.modelId);
|
|
442
|
+
return {
|
|
443
|
+
id: m.modelId,
|
|
444
|
+
name: m.displayName,
|
|
445
|
+
api: "openai-completions",
|
|
446
|
+
provider: SPECTRAL_PROXY_USER_MODEL,
|
|
447
|
+
baseUrl,
|
|
448
|
+
reasoning: false,
|
|
449
|
+
input: ["text", "image"],
|
|
450
|
+
cost: pricing
|
|
451
|
+
? { input: pricing.input, output: pricing.output, cacheRead: pricing.cacheRead, cacheWrite: pricing.cacheWrite }
|
|
452
|
+
: { input: 0, output: 0, cacheRead: 0, cacheWrite: 0 },
|
|
453
|
+
contextWindow: 0,
|
|
454
|
+
maxTokens: 0,
|
|
455
|
+
};
|
|
456
|
+
}),
|
|
388
457
|
});
|
|
389
458
|
}
|
|
390
459
|
}
|
|
@@ -651,6 +720,26 @@ export class PiBridge {
|
|
|
651
720
|
const endEvent = { type: "message_end", messageId };
|
|
652
721
|
this.pending.wireEvents.push(endEvent);
|
|
653
722
|
this.opts.emit(endEvent);
|
|
723
|
+
// Emit token usage for this assistant message. pi provides token
|
|
724
|
+
// counts via ev.message.usage; cost is computed from the model's
|
|
725
|
+
// configured pricing (or null when unavailable).
|
|
726
|
+
const usage = ev.message.usage;
|
|
727
|
+
if (usage) {
|
|
728
|
+
const usageEvent = {
|
|
729
|
+
type: "token_usage",
|
|
730
|
+
messageId,
|
|
731
|
+
usage: {
|
|
732
|
+
inputTokens: usage.input ?? 0,
|
|
733
|
+
outputTokens: usage.output ?? 0,
|
|
734
|
+
cacheReadTokens: usage.cacheRead ?? 0,
|
|
735
|
+
cacheWriteTokens: usage.cacheWrite ?? 0,
|
|
736
|
+
totalTokens: usage.totalTokens ?? 0,
|
|
737
|
+
cost: usage.cost?.total ?? null,
|
|
738
|
+
},
|
|
739
|
+
};
|
|
740
|
+
this.pending.wireEvents.push(usageEvent);
|
|
741
|
+
this.opts.emit(usageEvent);
|
|
742
|
+
}
|
|
654
743
|
// Defer persistence: keep `this.pending` alive so tool events that
|
|
655
744
|
// arrive after `message_end` (pi fires tool_execution_* events
|
|
656
745
|
// BETWEEN messages) are buffered into `pending.wireEvents`. We store
|
|
@@ -42,6 +42,12 @@ import { generateSessionTitle, isDefaultTitle, } from "./title-generator.js";
|
|
|
42
42
|
const DEFAULT_BRIDGE_FACTORY = (args) => new PiBridge(args);
|
|
43
43
|
/** Safety limit for autonomous loop iterations per session. */
|
|
44
44
|
const MAX_LOOP_ITERATIONS = 100;
|
|
45
|
+
/**
|
|
46
|
+
* Number of accumulated wire events before flushing the in-flight turn
|
|
47
|
+
* to SQLite. Batch-persisting means a server crash mid-turn only loses
|
|
48
|
+
* at most the last `BATCH_FLUSH_INTERVAL` events, not the entire turn.
|
|
49
|
+
*/
|
|
50
|
+
const BATCH_FLUSH_INTERVAL = 10;
|
|
45
51
|
/** Marker the agent emits in its response to signal the task is complete. */
|
|
46
52
|
const LOOP_DONE_MARKER = "<LOOP_DONE>";
|
|
47
53
|
export class SessionStreamManager {
|
|
@@ -361,6 +367,10 @@ export class SessionStreamManager {
|
|
|
361
367
|
// instead of hanging. Also prevents the next prompt() from waiting on
|
|
362
368
|
// a dead bridge's ready promise.
|
|
363
369
|
stream.startError = new Error("Turn cancelled");
|
|
370
|
+
// Clear batch-persist tracking so the next turn doesn't accidentally
|
|
371
|
+
// flush events against a stale messageId.
|
|
372
|
+
stream.currentMessageId = null;
|
|
373
|
+
stream.lastFlushedEventCount = 0;
|
|
364
374
|
// Broadcast agent_end so all subscribers close their open turn and
|
|
365
375
|
// re-enable their composers.
|
|
366
376
|
if (stream.currentTurn) {
|
|
@@ -385,6 +395,9 @@ export class SessionStreamManager {
|
|
|
385
395
|
if (!stream)
|
|
386
396
|
return;
|
|
387
397
|
stream.loopActive = false;
|
|
398
|
+
// Flush the last batch of in-flight events before tearing down, so a
|
|
399
|
+
// shutdown / GC doesn't lose events that haven't hit the interval yet.
|
|
400
|
+
this.flushInFlightTurn(stream);
|
|
388
401
|
try {
|
|
389
402
|
stream.bridge.dispose();
|
|
390
403
|
}
|
|
@@ -460,6 +473,8 @@ export class SessionStreamManager {
|
|
|
460
473
|
startError: null,
|
|
461
474
|
subscribers: new Set(),
|
|
462
475
|
currentTurn: null,
|
|
476
|
+
currentMessageId: null,
|
|
477
|
+
lastFlushedEventCount: 0,
|
|
463
478
|
loopActive: false,
|
|
464
479
|
loopIterationCount: 0,
|
|
465
480
|
loopOriginalPrompt: null,
|
|
@@ -523,6 +538,33 @@ export class SessionStreamManager {
|
|
|
523
538
|
if (event.type === "text_delta") {
|
|
524
539
|
stream.currentTurn.assistantText += event.delta;
|
|
525
540
|
}
|
|
541
|
+
// Track message lifecycle for batch persistence.
|
|
542
|
+
if (event.type === "message_start") {
|
|
543
|
+
stream.currentMessageId = event.messageId;
|
|
544
|
+
stream.lastFlushedEventCount = 0;
|
|
545
|
+
// Stub-insert so the server can recover this message on restart even
|
|
546
|
+
// if it crashes before the first batch flush. `INSERT OR REPLACE`
|
|
547
|
+
// ensures the final `onAssistantMessageComplete` write wins.
|
|
548
|
+
try {
|
|
549
|
+
this.store.appendMessage(stream.sessionId, {
|
|
550
|
+
id: event.messageId,
|
|
551
|
+
role: "assistant",
|
|
552
|
+
content: "",
|
|
553
|
+
eventsJsonl: "",
|
|
554
|
+
createdAt: Date.now(),
|
|
555
|
+
});
|
|
556
|
+
}
|
|
557
|
+
catch (err) {
|
|
558
|
+
console.error(`[spectral] error: batch-persist stub insert failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
559
|
+
}
|
|
560
|
+
}
|
|
561
|
+
// Batch-persist: flush accumulated events to SQLite every N events so a
|
|
562
|
+
// server crash mid-turn only loses the last batch, not the entire turn.
|
|
563
|
+
if (stream.currentMessageId &&
|
|
564
|
+
stream.currentTurn &&
|
|
565
|
+
stream.currentTurn.events.length - stream.lastFlushedEventCount >= BATCH_FLUSH_INTERVAL) {
|
|
566
|
+
this.flushInFlightTurn(stream);
|
|
567
|
+
}
|
|
526
568
|
}
|
|
527
569
|
// Broadcast first, then maybe close out the turn. agent_end clears the
|
|
528
570
|
// buffer because by that point the assistant message is already in
|
|
@@ -530,6 +572,12 @@ export class SessionStreamManager {
|
|
|
530
572
|
// which fires before agent_end).
|
|
531
573
|
this.broadcast(stream, event);
|
|
532
574
|
if (event.type === "agent_end") {
|
|
575
|
+
// Final flush + clear batch-persist tracking. `onAssistantMessageComplete`
|
|
576
|
+
// has already written the authoritative final row to SQLite (it fires on
|
|
577
|
+
// `message_end`, which precedes `agent_end`), so the staged row is already
|
|
578
|
+
// replaced with complete data. We just zero out the in-memory trackers.
|
|
579
|
+
stream.currentMessageId = null;
|
|
580
|
+
stream.lastFlushedEventCount = 0;
|
|
533
581
|
const finishedTurn = stream.currentTurn;
|
|
534
582
|
stream.currentTurn = null;
|
|
535
583
|
// Fire-and-forget auto-title generation. Runs only once per session
|
|
@@ -584,10 +632,48 @@ export class SessionStreamManager {
|
|
|
584
632
|
// An error event arriving outside a turn (or bubbling out of one) —
|
|
585
633
|
// discard partial buffer to avoid replaying half a turn that the
|
|
586
634
|
// client has already shown an error for. The error event itself is
|
|
587
|
-
// still broadcast above.
|
|
635
|
+
// still broadcast above. Also clear batch-persist tracking so the
|
|
636
|
+
// next `message_start` starts a fresh sequence.
|
|
637
|
+
stream.currentMessageId = null;
|
|
638
|
+
stream.lastFlushedEventCount = 0;
|
|
588
639
|
stream.currentTurn = null;
|
|
589
640
|
}
|
|
590
641
|
}
|
|
642
|
+
/**
|
|
643
|
+
* Flush the current in-flight turn's events to SQLite for crash recovery.
|
|
644
|
+
* Only the events accumulated since the last flush are written — we append
|
|
645
|
+
* them to the already-stored JSONL via INSERT OR REPLACE. Called every
|
|
646
|
+
* `BATCH_FLUSH_INTERVAL` events from `handleBridgeEvent`.
|
|
647
|
+
*
|
|
648
|
+
* Errors are caught, logged, and swallowed: batch persistence is a
|
|
649
|
+
* best-effort hardening, never a failure path that should block the stream.
|
|
650
|
+
*/
|
|
651
|
+
flushInFlightTurn(stream) {
|
|
652
|
+
const turn = stream.currentTurn;
|
|
653
|
+
const messageId = stream.currentMessageId;
|
|
654
|
+
if (!turn || !messageId)
|
|
655
|
+
return;
|
|
656
|
+
const newEvents = turn.events.slice(stream.lastFlushedEventCount);
|
|
657
|
+
if (newEvents.length === 0)
|
|
658
|
+
return;
|
|
659
|
+
try {
|
|
660
|
+
// Build JSONL from all events (already-flushed + new) so the row is
|
|
661
|
+
// always a complete, self-consistent snapshot. Older batches are
|
|
662
|
+
// included so history rehydration doesn't need to stitch fragments.
|
|
663
|
+
const eventsJsonl = turn.events.map((e) => JSON.stringify(e)).join("\n");
|
|
664
|
+
this.store.appendMessage(stream.sessionId, {
|
|
665
|
+
id: messageId,
|
|
666
|
+
role: "assistant",
|
|
667
|
+
content: turn.assistantText,
|
|
668
|
+
eventsJsonl,
|
|
669
|
+
createdAt: turn.startedAt,
|
|
670
|
+
});
|
|
671
|
+
stream.lastFlushedEventCount = turn.events.length;
|
|
672
|
+
}
|
|
673
|
+
catch (err) {
|
|
674
|
+
console.error(`[spectral] error: batch-persist flush failed: ${err instanceof Error ? err.message : String(err)}`);
|
|
675
|
+
}
|
|
676
|
+
}
|
|
591
677
|
/**
|
|
592
678
|
* Auto-title the session if it's still wearing the default title and we
|
|
593
679
|
* haven't already attempted generation in this process. Fire-and-forget
|
|
@@ -706,6 +792,7 @@ function isReplayable(event) {
|
|
|
706
792
|
event.type === "tool_call" ||
|
|
707
793
|
event.type === "tool_result" ||
|
|
708
794
|
event.type === "message_end" ||
|
|
795
|
+
event.type === "token_usage" ||
|
|
709
796
|
event.type === "error");
|
|
710
797
|
}
|
|
711
798
|
function snapshotTurn(turn) {
|
package/dist/server/storage.js
CHANGED
|
@@ -29,6 +29,7 @@ import Database from "better-sqlite3";
|
|
|
29
29
|
import { randomUUID } from "node:crypto";
|
|
30
30
|
import { mkdirSync, readFileSync } from "node:fs";
|
|
31
31
|
import { dirname, join } from "node:path";
|
|
32
|
+
import { getConfigDir } from "../config.js";
|
|
32
33
|
import { stripJsoncComments } from "../studio-binding.js";
|
|
33
34
|
/**
|
|
34
35
|
* Schema version. Bump + the on-open migration drops & recreates every table.
|
|
@@ -262,7 +263,7 @@ export class SessionStore {
|
|
|
262
263
|
this.stmtDeleteSession = this.db.prepare(`DELETE FROM sessions WHERE id = ?`);
|
|
263
264
|
this.stmtListMessages = this.db.prepare(`SELECT id, session_id, role, content, events_jsonl, images_json, created_at
|
|
264
265
|
FROM messages WHERE session_id = ? ORDER BY created_at ASC, id ASC`);
|
|
265
|
-
this.stmtAppendMessage = this.db.prepare(`INSERT INTO messages (id, session_id, role, content, events_jsonl, images_json, created_at)
|
|
266
|
+
this.stmtAppendMessage = this.db.prepare(`INSERT OR REPLACE INTO messages (id, session_id, role, content, events_jsonl, images_json, created_at)
|
|
266
267
|
VALUES (?, ?, ?, ?, ?, ?, ?)`);
|
|
267
268
|
this.stmtTouchSession = this.db.prepare(`UPDATE sessions SET updated_at = ? WHERE id = ?`);
|
|
268
269
|
this.stmtRenameSession = this.db.prepare(`UPDATE sessions SET title = ?, updated_at = ? WHERE id = ?`);
|
|
@@ -527,7 +528,7 @@ export function preflightSqlite(dbPath) {
|
|
|
527
528
|
ok: false,
|
|
528
529
|
error: `Failed to load native sqlite module (${msg}).\n` +
|
|
529
530
|
` This usually means the native binary couldn't be built for your Node.js version.\n` +
|
|
530
|
-
` Try: cd
|
|
531
|
+
` Try: cd ${getConfigDir()} && npm rebuild better-sqlite3\n` +
|
|
531
532
|
` Or reinstall: npm install -g @aexol/spectral`,
|
|
532
533
|
};
|
|
533
534
|
}
|