@desplega.ai/agent-swarm 1.96.0 → 1.98.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/openapi.json CHANGED
@@ -2,7 +2,7 @@
2
2
  "openapi": "3.1.0",
3
3
  "info": {
4
4
  "title": "Agent Swarm API",
5
- "version": "1.96.0",
5
+ "version": "1.98.0",
6
6
  "description": "Multi-agent orchestration API for Claude Code, Codex, and Gemini CLI. Enables task distribution, agent communication, and service discovery.\n\nMCP tools are documented separately in [MCP.md](./MCP.md)."
7
7
  },
8
8
  "servers": [
@@ -2583,6 +2583,96 @@
2583
2583
  }
2584
2584
  }
2585
2585
  },
2586
+ "/api/keys/report-rate-limit-windows": {
2587
+ "post": {
2588
+ "summary": "Record provider-emitted rate-limit window telemetry for an API key",
2589
+ "tags": [
2590
+ "API Keys"
2591
+ ],
2592
+ "security": [
2593
+ {
2594
+ "bearerAuth": []
2595
+ }
2596
+ ],
2597
+ "requestBody": {
2598
+ "content": {
2599
+ "application/json": {
2600
+ "schema": {
2601
+ "type": "object",
2602
+ "properties": {
2603
+ "keyType": {
2604
+ "type": "string"
2605
+ },
2606
+ "keySuffix": {
2607
+ "type": "string",
2608
+ "minLength": 1,
2609
+ "maxLength": 10
2610
+ },
2611
+ "keyIndex": {
2612
+ "type": "integer",
2613
+ "minimum": 0
2614
+ },
2615
+ "windows": {
2616
+ "type": "object",
2617
+ "additionalProperties": {
2618
+ "type": "object",
2619
+ "properties": {
2620
+ "status": {
2621
+ "type": "string"
2622
+ },
2623
+ "utilization": {
2624
+ "type": "number"
2625
+ },
2626
+ "resetsAt": {
2627
+ "type": "number"
2628
+ },
2629
+ "isUsingOverage": {
2630
+ "type": "boolean"
2631
+ },
2632
+ "surpassedThreshold": {
2633
+ "type": "number"
2634
+ },
2635
+ "lastSeenAt": {
2636
+ "type": "string",
2637
+ "format": "date-time"
2638
+ }
2639
+ },
2640
+ "required": [
2641
+ "status",
2642
+ "lastSeenAt"
2643
+ ]
2644
+ }
2645
+ },
2646
+ "scope": {
2647
+ "type": "string"
2648
+ },
2649
+ "scopeId": {
2650
+ "type": "string"
2651
+ }
2652
+ },
2653
+ "required": [
2654
+ "keyType",
2655
+ "keySuffix",
2656
+ "keyIndex",
2657
+ "windows"
2658
+ ]
2659
+ }
2660
+ }
2661
+ }
2662
+ },
2663
+ "responses": {
2664
+ "200": {
2665
+ "description": "Rate-limit window telemetry recorded"
2666
+ },
2667
+ "400": {
2668
+ "description": "Validation error"
2669
+ },
2670
+ "401": {
2671
+ "description": "Unauthorized"
2672
+ }
2673
+ }
2674
+ }
2675
+ },
2586
2676
  "/api/keys/available": {
2587
2677
  "get": {
2588
2678
  "summary": "Get available (non-rate-limited) key indices for a credential type",
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@desplega.ai/agent-swarm",
3
- "version": "1.96.0",
3
+ "version": "1.98.0",
4
4
  "description": "Multi-agent orchestration for Claude Code, Codex, Gemini CLI, and other AI coding assistants",
5
5
  "license": "MIT",
6
6
  "author": "desplega.sh <contact@desplega.sh>",
package/src/be/db.ts CHANGED
@@ -102,6 +102,7 @@ import type {
102
102
  } from "../types";
103
103
  import { FollowUpConfigSchema, isTerminalTaskStatus } from "../types";
104
104
  import { deriveProviderFromKeyType } from "../utils/credentials";
105
+ import type { RateLimitWindowTelemetry } from "../utils/error-tracker";
105
106
  import { getCurrentRequestUserId } from "../utils/request-auth-context";
106
107
  import { scrubSecrets } from "../utils/secret-scrubber";
107
108
  import { decryptSecret, encryptSecret, getEncryptionKey, resolveEncryptionKey } from "./crypto";
@@ -9984,10 +9985,31 @@ export interface ApiKeyStatus {
9984
9985
  name: string | null;
9985
9986
  /** Auto-derived harness provider (claude/pi/codex) — see deriveProviderFromKeyType. */
9986
9987
  provider: string;
9988
+ /** Latest provider-emitted rate-limit window snapshots, keyed by window type. */
9989
+ rateLimitWindows: RateLimitWindowTelemetry;
9987
9990
  createdAt: string;
9988
9991
  updatedAt: string;
9989
9992
  }
9990
9993
 
9994
+ type ApiKeyStatusRow = Omit<ApiKeyStatus, "rateLimitWindows"> & { rateLimitWindows: string | null };
9995
+
9996
+ function parseRateLimitWindowsJson(value: string | null | undefined): RateLimitWindowTelemetry {
9997
+ if (!value) return {};
9998
+ try {
9999
+ const parsed = JSON.parse(value) as unknown;
10000
+ if (parsed && typeof parsed === "object" && !Array.isArray(parsed)) {
10001
+ return parsed as RateLimitWindowTelemetry;
10002
+ }
10003
+ } catch {
10004
+ // Ignore malformed historical values; telemetry is best-effort.
10005
+ }
10006
+ return {};
10007
+ }
10008
+
10009
+ function rowToApiKeyStatus(row: ApiKeyStatusRow): ApiKeyStatus {
10010
+ return { ...row, rateLimitWindows: parseRateLimitWindowsJson(row.rateLimitWindows) };
10011
+ }
10012
+
9991
10013
  /**
9992
10014
  * Get available (non-rate-limited) key indices for a credential type.
9993
10015
  * Automatically clears expired rate limits before returning.
@@ -10106,6 +10128,43 @@ export function markKeyRateLimited(
10106
10128
  );
10107
10129
  }
10108
10130
 
10131
+ export function recordKeyRateLimitWindows(
10132
+ keyType: string,
10133
+ keySuffix: string,
10134
+ keyIndex: number,
10135
+ windows: RateLimitWindowTelemetry,
10136
+ scope = "global",
10137
+ scopeId: string | null = null,
10138
+ ): void {
10139
+ if (Object.keys(windows).length === 0) return;
10140
+
10141
+ const now = new Date().toISOString();
10142
+ const effectiveScopeId = scopeId ?? "";
10143
+ const provider = deriveProviderFromKeyType(keyType);
10144
+ const db = getDb();
10145
+ const existing = db
10146
+ .prepare<{ rateLimitWindows: string | null }, [string, string, string, string]>(
10147
+ `SELECT rateLimitWindows FROM api_key_status
10148
+ WHERE keyType = ? AND keySuffix = ? AND scope = ? AND scopeId = ?`,
10149
+ )
10150
+ .get(keyType, keySuffix, scope, effectiveScopeId);
10151
+ const serialized = JSON.stringify({
10152
+ ...parseRateLimitWindowsJson(existing?.rateLimitWindows),
10153
+ ...windows,
10154
+ });
10155
+
10156
+ db.prepare(
10157
+ `INSERT INTO api_key_status (keyType, keySuffix, keyIndex, scope, scopeId, rateLimitWindows, provider, updatedAt)
10158
+ VALUES (?, ?, ?, ?, ?, ?, ?, ?)
10159
+ ON CONFLICT(keyType, keySuffix, scope, scopeId)
10160
+ DO UPDATE SET
10161
+ rateLimitWindows = excluded.rateLimitWindows,
10162
+ keyIndex = excluded.keyIndex,
10163
+ provider = excluded.provider,
10164
+ updatedAt = excluded.updatedAt`,
10165
+ ).run(keyType, keySuffix, keyIndex, scope, effectiveScopeId, serialized, provider, now);
10166
+ }
10167
+
10109
10168
  /**
10110
10169
  * Set or clear the human-friendly `name` label on a pooled credential.
10111
10170
  * Identified by the natural key (keyType + keySuffix + scope + scopeId).
@@ -10177,8 +10236,9 @@ export function getKeyStatuses(
10177
10236
 
10178
10237
  const where = conditions.length > 0 ? `WHERE ${conditions.join(" AND ")}` : "";
10179
10238
  return db
10180
- .prepare<ApiKeyStatus, string[]>(`SELECT * FROM api_key_status ${where} ORDER BY keyIndex`)
10181
- .all(...params);
10239
+ .prepare<ApiKeyStatusRow, string[]>(`SELECT * FROM api_key_status ${where} ORDER BY keyIndex`)
10240
+ .all(...params)
10241
+ .map(rowToApiKeyStatus);
10182
10242
  }
10183
10243
 
10184
10244
  export interface KeyCostSummary {
@@ -0,0 +1,5 @@
1
+ -- Persist provider-emitted rate-limit window telemetry on credential rows.
2
+ -- Shape is JSON keyed by provider window type, e.g.
3
+ -- {"five_hour":{"status":"allowed_warning","utilization":0.82,"resetsAt":1781334000,"isUsingOverage":false,"surpassedThreshold":0.75,"lastSeenAt":"..."}}
4
+
5
+ ALTER TABLE api_key_status ADD COLUMN rateLimitWindows TEXT NOT NULL DEFAULT '{}';
@@ -1,17 +1,17 @@
1
1
  /**
2
2
  * Post-listen backfill: embed scripts that are missing embeddings (e.g. after
3
- * boot seeding with scriptEmbeddingMode: "skip"). Runs once per boot,
4
- * async/non-blocking, idempotent, no-op when every non-scratch script already
5
- * has an embedding row.
3
+ * boot seeding with scriptEmbeddingMode: "skip") AND re-embed scripts whose
4
+ * stored embedding has the wrong dimension (e.g. 1536d legacy rows vs current
5
+ * 512d). Runs once per boot, async/non-blocking, idempotent, no-op when clean.
6
6
  *
7
7
  * Mirrors the memory boot-reembed pattern (src/be/memory/boot-reembed.ts).
8
8
  */
9
9
 
10
10
  import { getDb } from "@/be/db";
11
11
  import type { ScriptScope } from "@/types";
12
- import { embedScript } from "./embeddings";
12
+ import { embeddingProvider, embedScript } from "./embeddings";
13
13
 
14
- type ScriptMissingEmbedding = {
14
+ type ScriptRow = {
15
15
  id: string;
16
16
  name: string;
17
17
  scope: ScriptScope;
@@ -31,35 +31,65 @@ type ScriptMissingEmbedding = {
31
31
  updatedAt: string;
32
32
  };
33
33
 
34
+ function toScriptRecord(row: ScriptRow) {
35
+ return {
36
+ ...row,
37
+ scopeId: row.scopeId ?? null,
38
+ isScratch: row.isScratch === 1,
39
+ typeChecked: row.typeChecked === 1,
40
+ createdByAgentId: row.createdByAgentId ?? null,
41
+ };
42
+ }
43
+
34
44
  export async function runBootReembedScripts(): Promise<void> {
35
45
  const db = getDb();
46
+ const provider = embeddingProvider();
47
+ const expectedBytes = provider.dimensions * Float32Array.BYTES_PER_ELEMENT;
36
48
 
37
49
  const missing = db
38
- .prepare<ScriptMissingEmbedding, []>(
50
+ .prepare<ScriptRow, []>(
39
51
  `SELECT s.* FROM scripts s
40
52
  LEFT JOIN script_embeddings e ON e.scriptId = s.id
41
53
  WHERE s.isScratch = 0 AND e.scriptId IS NULL`,
42
54
  )
43
55
  .all();
44
56
 
45
- if (missing.length === 0) {
57
+ const wrongDim = db
58
+ .prepare<ScriptRow, []>(
59
+ `SELECT s.* FROM scripts s
60
+ JOIN script_embeddings e ON e.scriptId = s.id
61
+ WHERE s.isScratch = 0 AND length(e.embedding) != ${expectedBytes}`,
62
+ )
63
+ .all();
64
+
65
+ if (missing.length === 0 && wrongDim.length === 0) {
46
66
  return;
47
67
  }
48
68
 
49
- console.log(`[boot-reembed-scripts] starting: ${missing.length} scripts missing embeddings`);
69
+ if (missing.length > 0) {
70
+ console.log(`[boot-reembed-scripts] ${missing.length} scripts missing embeddings`);
71
+ }
72
+ if (wrongDim.length > 0) {
73
+ console.log(
74
+ `[boot-reembed-scripts] ${wrongDim.length} scripts with wrong-dimension embeddings (expected ${expectedBytes} bytes)`,
75
+ );
76
+ }
77
+
78
+ // Probe: verify the provider can actually generate embeddings
79
+ const probe = await provider.embed("test");
80
+ if (!probe) {
81
+ console.warn(
82
+ `[boot-reembed-scripts] skipped: no working embedding provider (missing OpenAI key?)`,
83
+ );
84
+ return;
85
+ }
50
86
 
51
87
  let embedded = 0;
52
88
  let failed = 0;
53
89
 
54
- for (const row of missing) {
90
+ for (const row of [...missing, ...wrongDim]) {
55
91
  try {
56
- await embedScript({
57
- ...row,
58
- scopeId: row.scopeId ?? null,
59
- isScratch: row.isScratch === 1,
60
- typeChecked: row.typeChecked === 1,
61
- createdByAgentId: row.createdByAgentId ?? null,
62
- });
92
+ await embedScript(toScriptRecord(row));
63
93
  embedded++;
64
94
  } catch (err) {
65
95
  failed++;
@@ -70,5 +100,15 @@ export async function runBootReembedScripts(): Promise<void> {
70
100
  }
71
101
  }
72
102
 
73
- console.log(`[boot-reembed-scripts] complete: embedded=${embedded} failed=${failed}`);
103
+ const afterWrongDim =
104
+ db
105
+ .prepare<{ count: number }, []>(
106
+ `SELECT COUNT(*) as count FROM script_embeddings
107
+ WHERE length(embedding) != ${expectedBytes}`,
108
+ )
109
+ .get()?.count ?? 0;
110
+
111
+ console.log(
112
+ `[boot-reembed-scripts] complete: embedded=${embedded} failed=${failed} remaining_wrong_dim=${afterWrongDim}`,
113
+ );
74
114
  }
@@ -42,7 +42,7 @@ export type ScriptSearchResult = {
42
42
 
43
43
  let providerOverride: EmbeddingProvider | null = null;
44
44
 
45
- function embeddingProvider(): EmbeddingProvider {
45
+ export function embeddingProvider(): EmbeddingProvider {
46
46
  return providerOverride ?? getEmbeddingProvider();
47
47
  }
48
48
 
@@ -82,6 +82,13 @@ export async function embedScript(script: ScriptRecord): Promise<void> {
82
82
  const embedding = await provider.embed(text);
83
83
  if (!embedding) return;
84
84
 
85
+ if (embedding.length !== provider.dimensions) {
86
+ console.error(
87
+ `[script-embed] dimension mismatch for "${script.name}": expected=${provider.dimensions} got=${embedding.length}, skipping`,
88
+ );
89
+ return;
90
+ }
91
+
85
92
  getDb()
86
93
  .prepare(
87
94
  `INSERT INTO script_embeddings (
@@ -204,20 +211,24 @@ export async function searchScripts(args: {
204
211
  const candidates = candidateRows(args.scope, args.scopeId);
205
212
  if (candidates.length === 0) return lexicalFallback(args);
206
213
 
207
- return candidates
208
- .map((row) => {
209
- const script = rowToScript(row);
210
- const semanticScore = cosineSimilarity(queryEmbedding, deserializeEmbedding(row.embedding));
211
- const bonus = nameMatchBonus(script, args.query);
212
- return {
213
- script,
214
- score: 0.7 * semanticScore + 0.3 * bonus,
215
- semanticScore,
216
- nameMatchBonus: bonus,
217
- };
218
- })
219
- .sort((a, b) => b.score - a.score)
220
- .slice(0, args.limit ?? 10);
214
+ const results: ScriptSearchResult[] = [];
215
+ for (const row of candidates) {
216
+ const stored = deserializeEmbedding(row.embedding);
217
+ if (stored.length !== queryEmbedding.length) continue;
218
+ const script = rowToScript(row);
219
+ const semanticScore = cosineSimilarity(queryEmbedding, stored);
220
+ const bonus = nameMatchBonus(script, args.query);
221
+ results.push({
222
+ script,
223
+ score: 0.7 * semanticScore + 0.3 * bonus,
224
+ semanticScore,
225
+ nameMatchBonus: bonus,
226
+ });
227
+ }
228
+
229
+ if (results.length === 0) return lexicalFallback(args);
230
+
231
+ return results.sort((a, b) => b.score - a.score).slice(0, args.limit ?? 10);
221
232
  }
222
233
 
223
234
  export async function reembedAllScripts(): Promise<void> {
@@ -45,6 +45,7 @@ import {
45
45
  isRateLimitMessage,
46
46
  MAX_RATE_LIMIT_RESET_MS,
47
47
  parseRateLimitResetTime,
48
+ type RateLimitWindowTelemetry,
48
49
  resolveCodexCreditsExhaustedCooldownMs,
49
50
  } from "../utils/error-tracker.ts";
50
51
  import { resolveHarnessProvider } from "../utils/harness-provider.ts";
@@ -1125,6 +1126,35 @@ async function reportKeyRateLimit(
1125
1126
  }
1126
1127
  }
1127
1128
 
1129
+ async function reportKeyRateLimitWindows(
1130
+ apiUrl: string,
1131
+ apiKey: string,
1132
+ keyType: string,
1133
+ keySuffix: string,
1134
+ keyIndex: number,
1135
+ windows: RateLimitWindowTelemetry,
1136
+ ): Promise<void> {
1137
+ if (Object.keys(windows).length === 0) return;
1138
+ try {
1139
+ await fetch(`${apiUrl}/api/keys/report-rate-limit-windows`, {
1140
+ method: "POST",
1141
+ headers: {
1142
+ "Content-Type": "application/json",
1143
+ Authorization: `Bearer ${apiKey}`,
1144
+ },
1145
+ body: JSON.stringify({
1146
+ keyType,
1147
+ keySuffix,
1148
+ keyIndex,
1149
+ windows,
1150
+ }),
1151
+ });
1152
+ console.log(`[credentials] Reported rate-limit windows for key ...${keySuffix}`);
1153
+ } catch {
1154
+ // Non-blocking
1155
+ }
1156
+ }
1157
+
1128
1158
  /** Clear a stale rate-limit record after a successful task (fire-and-forget) */
1129
1159
  async function reportKeyClearRateLimit(
1130
1160
  apiUrl: string,
@@ -3406,6 +3436,17 @@ async function checkCompletedProcesses(
3406
3436
  rateLimitedUntil,
3407
3437
  ).catch(() => {});
3408
3438
  }
3439
+
3440
+ if (credentialInfo && result.rateLimitWindows) {
3441
+ reportKeyRateLimitWindows(
3442
+ apiConfig.apiUrl,
3443
+ apiConfig.apiKey,
3444
+ credentialInfo.keyType,
3445
+ credentialInfo.keySuffix,
3446
+ credentialInfo.keyIndex,
3447
+ result.rateLimitWindows,
3448
+ ).catch(() => {});
3449
+ }
3409
3450
  let bridgeDiagnostics: Awaited<ReturnType<typeof getBridgeFailureDiagnostics>> | undefined;
3410
3451
  if (result.exitCode !== 0 && harnessProvider === "claude" && workingDir) {
3411
3452
  bridgeDiagnostics = await getBridgeFailureDiagnostics(workingDir);
@@ -6,6 +6,7 @@ import {
6
6
  getKeyCostSummary,
7
7
  getKeyStatuses,
8
8
  markKeyRateLimited,
9
+ recordKeyRateLimitWindows,
9
10
  recordKeyUsage,
10
11
  setApiKeyName,
11
12
  } from "../be/db";
@@ -58,6 +59,37 @@ const reportRateLimit = route({
58
59
  auth: { apiKey: true },
59
60
  });
60
61
 
62
+ const rateLimitWindowSchema = z.object({
63
+ status: z.string(),
64
+ utilization: z.number().optional(),
65
+ resetsAt: z.number().optional(),
66
+ isUsingOverage: z.boolean().optional(),
67
+ surpassedThreshold: z.number().optional(),
68
+ lastSeenAt: z.string().datetime(),
69
+ });
70
+
71
+ const reportRateLimitWindows = route({
72
+ method: "post",
73
+ path: "/api/keys/report-rate-limit-windows",
74
+ pattern: ["api", "keys", "report-rate-limit-windows"],
75
+ summary: "Record provider-emitted rate-limit window telemetry for an API key",
76
+ tags: ["API Keys"],
77
+ body: z.object({
78
+ keyType: z.string(),
79
+ keySuffix: z.string().min(1).max(10),
80
+ keyIndex: z.number().int().min(0),
81
+ windows: z.record(z.string(), rateLimitWindowSchema),
82
+ scope: z.string().optional(),
83
+ scopeId: z.string().optional(),
84
+ }),
85
+ responses: {
86
+ 200: { description: "Rate-limit window telemetry recorded" },
87
+ 400: { description: "Validation error" },
88
+ 401: { description: "Unauthorized" },
89
+ },
90
+ auth: { apiKey: true },
91
+ });
92
+
61
93
  const getAvailable = route({
62
94
  method: "get",
63
95
  path: "/api/keys/available",
@@ -196,6 +228,25 @@ export async function handleApiKeys(
196
228
  return true;
197
229
  }
198
230
 
231
+ // POST /api/keys/report-rate-limit-windows
232
+ if (reportRateLimitWindows.match(req.method, pathSegments)) {
233
+ const parsed = await reportRateLimitWindows.parse(req, res, pathSegments, queryParams);
234
+ if (!parsed) return true;
235
+
236
+ const { keyType, keySuffix, keyIndex, windows, scope, scopeId } = parsed.body;
237
+ try {
238
+ recordKeyRateLimitWindows(keyType, keySuffix, keyIndex, windows, scope, scopeId ?? null);
239
+ json(res, { success: true, message: `Rate-limit windows recorded for ...${keySuffix}` });
240
+ } catch (err) {
241
+ jsonError(
242
+ res,
243
+ err instanceof Error ? err.message : "Failed to record rate-limit windows",
244
+ 500,
245
+ );
246
+ }
247
+ return true;
248
+ }
249
+
199
250
  // GET /api/keys/available
200
251
  if (getAvailable.match(req.method, pathSegments)) {
201
252
  const parsed = await getAvailable.parse(req, res, pathSegments, queryParams);
@@ -674,6 +674,7 @@ class ClaudeSession implements ProviderSession {
674
674
  isError: (exitCode ?? 1) !== 0,
675
675
  failureReason,
676
676
  rateLimitResetAt: this.errorTracker.getRateLimitResetAt(),
677
+ rateLimitWindows: this.errorTracker.getRateLimitWindows(),
677
678
  };
678
679
  }
679
680
 
@@ -1024,6 +1024,7 @@ export class CodexSession implements ProviderSession {
1024
1024
  isError: true,
1025
1025
  failureReason: terminalError.message,
1026
1026
  rateLimitResetAt: this.errorTracker.getRateLimitResetAt(),
1027
+ rateLimitWindows: this.errorTracker.getRateLimitWindows(),
1027
1028
  });
1028
1029
  return;
1029
1030
  }
@@ -1045,6 +1046,7 @@ export class CodexSession implements ProviderSession {
1045
1046
  isError,
1046
1047
  failureReason: terminalError?.message,
1047
1048
  rateLimitResetAt: this.errorTracker.getRateLimitResetAt(),
1049
+ rateLimitWindows: this.errorTracker.getRateLimitWindows(),
1048
1050
  });
1049
1051
  } catch (err) {
1050
1052
  const message = err instanceof Error ? err.message : String(err);
@@ -1059,6 +1061,7 @@ export class CodexSession implements ProviderSession {
1059
1061
  isError: true,
1060
1062
  failureReason: message,
1061
1063
  rateLimitResetAt: this.errorTracker.getRateLimitResetAt(),
1064
+ rateLimitWindows: this.errorTracker.getRateLimitWindows(),
1062
1065
  });
1063
1066
  } finally {
1064
1067
  // Session-end summarization. Pure addition for codex — no behavior to
@@ -1,6 +1,53 @@
1
- export function readPkgVersion(packageName: string): string | undefined {
1
+ import { spawnSync } from "node:child_process";
2
+
3
+ type PackageJson = { version?: unknown };
4
+
5
+ type ReadPkgVersionOptions = {
6
+ requirePackageJson?: (specifier: string) => PackageJson;
7
+ spawn?: typeof spawnSync;
8
+ };
9
+
10
+ const cliVersionCommands: Record<string, { command: string; args: string[] }> = {
11
+ "@earendil-works/pi-coding-agent": { command: "pi", args: ["--version"] },
12
+ "@opencode-ai/sdk": { command: "opencode", args: ["--version"] },
13
+ };
14
+
15
+ function normalizeVersion(value: unknown): string | undefined {
16
+ return typeof value === "string" && value.trim().length > 0 ? value.trim() : undefined;
17
+ }
18
+
19
+ function parseCliVersion(output: string): string | undefined {
20
+ return output.match(/\d+\.\d+\.\d+(?:[-+][0-9A-Za-z.-]+)?/)?.[0];
21
+ }
22
+
23
+ function readCliVersion(packageName: string, spawn: typeof spawnSync): string | undefined {
24
+ const command = cliVersionCommands[packageName];
25
+ if (!command) return undefined;
26
+
27
+ try {
28
+ const result = spawn(command.command, command.args, {
29
+ encoding: "utf8",
30
+ stdio: ["ignore", "pipe", "pipe"],
31
+ });
32
+ return parseCliVersion(`${result.stdout ?? ""}\n${result.stderr ?? ""}`);
33
+ } catch {
34
+ return undefined;
35
+ }
36
+ }
37
+
38
+ export function readPkgVersion(
39
+ packageName: string,
40
+ {
41
+ requirePackageJson = (specifier) => require(specifier) as PackageJson,
42
+ spawn = spawnSync,
43
+ }: ReadPkgVersionOptions = {},
44
+ ): string | undefined {
45
+ const cliVersion = readCliVersion(packageName, spawn);
46
+ if (cliVersion) return cliVersion;
47
+
2
48
  try {
3
- return require(`${packageName}/package.json`).version;
49
+ const version = normalizeVersion(requirePackageJson(`${packageName}/package.json`).version);
50
+ if (version) return version;
4
51
  } catch {
5
52
  return undefined;
6
53
  }
@@ -111,6 +111,11 @@ function defaultOpencodeSkillsDir(): string {
111
111
  return join(process.env.HOME ?? "/home/worker", ".opencode", "skills");
112
112
  }
113
113
  const MODEL_CACHE_REFRESH_TIMEOUT_MS = 15_000;
114
+ // opencode cold-start on E2B disk regularly exceeds the SDK's 5s default
115
+ // server-start timeout (@opencode-ai/sdk dist/server.js), failing the spawn with
116
+ // "Timeout waiting for server to start after 5000ms". Override via
117
+ // OPENCODE_SERVER_TIMEOUT_MS.
118
+ const DEFAULT_SERVER_START_TIMEOUT_MS = 30_000;
114
119
 
115
120
  function isOpenRouterModel(model: string | undefined): boolean {
116
121
  return Boolean(model?.toLowerCase().startsWith("openrouter/"));
@@ -720,6 +725,7 @@ export class OpencodeAdapter implements ProviderAdapter {
720
725
  ({ client, server } = await createOpencode({
721
726
  hostname: "127.0.0.1",
722
727
  port: 0,
728
+ timeout: Number(process.env.OPENCODE_SERVER_TIMEOUT_MS) || DEFAULT_SERVER_START_TIMEOUT_MS,
723
729
  config: opencodeConfig,
724
730
  }));
725
731
  } finally {
@@ -34,6 +34,7 @@ export interface CostData {
34
34
  }
35
35
 
36
36
  import type { ProviderName } from "../types";
37
+ import type { RateLimitWindowTelemetry } from "../utils/error-tracker";
37
38
 
38
39
  /** Normalized event emitted by any provider adapter. */
39
40
  export type ProviderEvent =
@@ -137,6 +138,12 @@ export interface ProviderResult {
137
138
  * three-tier cooldown resolver.
138
139
  */
139
140
  rateLimitResetAt?: string;
141
+ /**
142
+ * Latest provider-emitted rate-limit window snapshots observed during the
143
+ * session, keyed by provider window type (for Claude: five_hour, seven_day).
144
+ * Best-effort and informational; consumers must tolerate it being absent.
145
+ */
146
+ rateLimitWindows?: RateLimitWindowTelemetry;
140
147
  }
141
148
 
142
149
  /** Behavioral traits that govern prompt assembly and feature gating. */
@@ -11,6 +11,7 @@ import {
11
11
  getKeyStatuses,
12
12
  initDb,
13
13
  markKeyRateLimited,
14
+ recordKeyRateLimitWindows,
14
15
  recordKeyUsage,
15
16
  } from "../be/db";
16
17
  import type { CredentialSelection } from "../utils/credentials";
@@ -240,6 +241,67 @@ describe("API key tracking DB queries", () => {
240
241
  const cleared = clearKeyRateLimit("OPENAI_API_KEY", "oai02");
241
242
  expect(cleared).toBe(false);
242
243
  });
244
+
245
+ test("recordKeyRateLimitWindows persists latest provider windows", () => {
246
+ recordKeyRateLimitWindows("ANTHROPIC_API_KEY", "aaa11", 0, {
247
+ seven_day: {
248
+ status: "allowed_warning",
249
+ utilization: 0.82,
250
+ resetsAt: 1781334000,
251
+ isUsingOverage: false,
252
+ surpassedThreshold: 0.75,
253
+ lastSeenAt: "2026-06-12T00:00:00.000Z",
254
+ },
255
+ });
256
+
257
+ const key = getKeyStatuses("ANTHROPIC_API_KEY").find((s) => s.keySuffix === "aaa11");
258
+ expect(key?.rateLimitWindows).toEqual({
259
+ seven_day: {
260
+ status: "allowed_warning",
261
+ utilization: 0.82,
262
+ resetsAt: 1781334000,
263
+ isUsingOverage: false,
264
+ surpassedThreshold: 0.75,
265
+ lastSeenAt: "2026-06-12T00:00:00.000Z",
266
+ },
267
+ });
268
+ });
269
+
270
+ test("recordKeyRateLimitWindows merges with existing provider windows", () => {
271
+ recordKeyRateLimitWindows("ANTHROPIC_API_KEY", "aaa11", 0, {
272
+ seven_day: {
273
+ status: "allowed_warning",
274
+ utilization: 0.82,
275
+ resetsAt: 1781334000,
276
+ lastSeenAt: "2026-06-12T00:00:00.000Z",
277
+ },
278
+ });
279
+
280
+ recordKeyRateLimitWindows("ANTHROPIC_API_KEY", "aaa11", 0, {
281
+ five_hour: {
282
+ status: "allowed",
283
+ utilization: 0.2,
284
+ resetsAt: 1781270000,
285
+ lastSeenAt: "2026-06-12T01:00:00.000Z",
286
+ },
287
+ });
288
+
289
+ const key = getKeyStatuses("ANTHROPIC_API_KEY").find((s) => s.keySuffix === "aaa11");
290
+ expect(key?.rateLimitWindows).toEqual({
291
+ seven_day: {
292
+ status: "allowed_warning",
293
+ utilization: 0.82,
294
+ resetsAt: 1781334000,
295
+ lastSeenAt: "2026-06-12T00:00:00.000Z",
296
+ },
297
+ five_hour: {
298
+ status: "allowed",
299
+ utilization: 0.2,
300
+ resetsAt: 1781270000,
301
+ lastSeenAt: "2026-06-12T01:00:00.000Z",
302
+ },
303
+ });
304
+ });
243
305
  });
244
306
 
245
307
  // ─── Cross-keyType Failover Logic Tests ──────────────────────────────────────
@@ -0,0 +1,47 @@
1
+ import { describe, expect, mock, test } from "bun:test";
2
+ import { readPkgVersion } from "../providers/harness-version";
3
+
4
+ describe("readPkgVersion", () => {
5
+ test("reads pi version from the CLI before package.json probes", () => {
6
+ const spawn = mock(() => ({ stdout: "0.79.1\n", stderr: "", status: 0 }));
7
+ const requirePackageJson = mock(() => ({ version: "0.0.0" }));
8
+
9
+ const version = readPkgVersion("@earendil-works/pi-coding-agent", {
10
+ requirePackageJson,
11
+ spawn,
12
+ });
13
+
14
+ expect(version).toBe("0.79.1");
15
+ expect(spawn).toHaveBeenCalledWith("pi", ["--version"], {
16
+ encoding: "utf8",
17
+ stdio: ["ignore", "pipe", "pipe"],
18
+ });
19
+ expect(requirePackageJson).not.toHaveBeenCalled();
20
+ });
21
+
22
+ test("reads opencode version from the CLI before package.json probes", () => {
23
+ const spawn = mock(() => ({ stdout: "opencode 1.16.2\n", stderr: "", status: 0 }));
24
+ const requirePackageJson = mock(() => ({ version: "0.0.0" }));
25
+
26
+ const version = readPkgVersion("@opencode-ai/sdk", {
27
+ requirePackageJson,
28
+ spawn,
29
+ });
30
+
31
+ expect(version).toBe("1.16.2");
32
+ expect(spawn).toHaveBeenCalledWith("opencode", ["--version"], {
33
+ encoding: "utf8",
34
+ stdio: ["ignore", "pipe", "pipe"],
35
+ });
36
+ expect(requirePackageJson).not.toHaveBeenCalled();
37
+ });
38
+
39
+ test("falls back to package.json when no CLI mapping returns a version", () => {
40
+ const version = readPkgVersion("@earendil-works/pi-coding-agent", {
41
+ requirePackageJson: () => ({ version: "0.79.1" }),
42
+ spawn: mock(() => ({ stdout: "", stderr: "", status: 0 })),
43
+ });
44
+
45
+ expect(version).toBe("0.79.1");
46
+ });
47
+ });
@@ -157,13 +157,14 @@ describe("OpencodeSession — SSE→ProviderEvent mapping", () => {
157
157
  const { emitted, result, serverCloseCalls } = await driveSession(events);
158
158
 
159
159
  const sessionInit = emitted.find((e) => e.type === "session_init");
160
- expect(sessionInit).toBeDefined();
161
- if (sessionInit?.type === "session_init") {
162
- expect(sessionInit.provider).toBe("opencode");
163
- expect(sessionInit.harnessVariant).toBe("stock");
164
- expect(typeof sessionInit.harnessVariantMeta?.version).toBe("string");
165
- expect((sessionInit.harnessVariantMeta?.version as string).length).toBeGreaterThan(0);
160
+ expect(sessionInit?.type).toBe("session_init");
161
+ if (sessionInit?.type !== "session_init") {
162
+ throw new Error("Expected opencode session_init event");
166
163
  }
164
+ expect(sessionInit.provider).toBe("opencode");
165
+ expect(sessionInit.harnessVariant).toBe("stock");
166
+ expect(typeof sessionInit.harnessVariantMeta?.version).toBe("string");
167
+ expect((sessionInit.harnessVariantMeta?.version as string).length).toBeGreaterThan(0);
167
168
 
168
169
  const resultEvent = emitted.find((e) => e.type === "result");
169
170
  expect(resultEvent).toBeDefined();
@@ -108,13 +108,14 @@ describe("PiMonoSession — provider tag on CostData", () => {
108
108
  session.onEvent((e) => events.push(e));
109
109
 
110
110
  const sessionInit = events.find((e) => e.type === "session_init");
111
- expect(sessionInit).toBeDefined();
112
- if (sessionInit?.type === "session_init") {
113
- expect(sessionInit.provider).toBe("pi");
114
- expect(sessionInit.harnessVariant).toBe("stock");
115
- expect(typeof sessionInit.harnessVariantMeta?.version).toBe("string");
116
- expect((sessionInit.harnessVariantMeta?.version as string).length).toBeGreaterThan(0);
111
+ expect(sessionInit?.type).toBe("session_init");
112
+ if (sessionInit?.type !== "session_init") {
113
+ throw new Error("Expected pi session_init event");
117
114
  }
115
+ expect(sessionInit.provider).toBe("pi");
116
+ expect(sessionInit.harnessVariant).toBe("stock");
117
+ expect(typeof sessionInit.harnessVariantMeta?.version).toBe("string");
118
+ expect((sessionInit.harnessVariantMeta?.version as string).length).toBeGreaterThan(0);
118
119
 
119
120
  const result = await session.waitForCompletion();
120
121
 
@@ -2,6 +2,7 @@ import { describe, expect, test } from "bun:test";
2
2
  import {
3
3
  isRateLimitMessage,
4
4
  MAX_RATE_LIMIT_RESET_MS,
5
+ parseRateLimitWindowTelemetry,
5
6
  parseStderrForErrors,
6
7
  SessionErrorTracker,
7
8
  trackErrorFromJson,
@@ -23,6 +24,42 @@ const FIXTURE_REJECTED = {
23
24
  };
24
25
 
25
26
  describe("SessionErrorTracker — rate_limit_event processing", () => {
27
+ test("captures allowed_warning telemetry without marking a cooldown", () => {
28
+ const tracker = new SessionErrorTracker();
29
+ tracker.processRateLimitEvent({
30
+ type: "rate_limit_event",
31
+ rate_limit_info: {
32
+ status: "allowed_warning",
33
+ resetsAt: 1781334000,
34
+ rateLimitType: "seven_day",
35
+ utilization: 0.82,
36
+ isUsingOverage: false,
37
+ surpassedThreshold: 0.75,
38
+ },
39
+ });
40
+
41
+ expect(tracker.getRateLimitResetAt()).toBeUndefined();
42
+ expect(tracker.getRateLimitWindows()).toEqual({
43
+ seven_day: expect.objectContaining({
44
+ status: "allowed_warning",
45
+ resetsAt: 1781334000,
46
+ utilization: 0.82,
47
+ isUsingOverage: false,
48
+ surpassedThreshold: 0.75,
49
+ }),
50
+ });
51
+ });
52
+
53
+ test("parseRateLimitWindowTelemetry is best-effort for malformed events", () => {
54
+ expect(parseRateLimitWindowTelemetry({ type: "rate_limit_event" })).toBeNull();
55
+ expect(
56
+ parseRateLimitWindowTelemetry({
57
+ type: "rate_limit_event",
58
+ rate_limit_info: { status: "allowed_warning", resetsAt: "bad" },
59
+ }),
60
+ ).toBeNull();
61
+ });
62
+
26
63
  test("stashes resetsAt (seconds) correctly as ms — verbatim CAI-1279 fixture", () => {
27
64
  const tracker = new SessionErrorTracker();
28
65
  tracker.processRateLimitEvent(FIXTURE_REJECTED);
@@ -1,6 +1,7 @@
1
1
  import { afterAll, beforeAll, beforeEach, describe, expect, test } from "bun:test";
2
2
  import { unlink } from "node:fs/promises";
3
3
  import { closeDb, getDb, initDb } from "../be/db";
4
+ import { serializeEmbedding } from "../be/embedding";
4
5
  import type { EmbeddingProvider } from "../be/memory/types";
5
6
  import { runBootReembedScripts } from "../be/scripts/boot-reembed";
6
7
  import { upsertScriptByName } from "../be/scripts/db";
@@ -98,7 +99,8 @@ describe("boot-reembed-scripts", () => {
98
99
  provider.reset();
99
100
  await runBootReembedScripts();
100
101
  expect(embeddingCount(result.script.id)).toBe(1);
101
- expect(provider.calls).toHaveLength(1);
102
+ // +1 for the provider probe call ("test") that verifies the provider works
103
+ expect(provider.calls).toHaveLength(2);
102
104
  });
103
105
 
104
106
  test("no-ops when all scripts already have embeddings", async () => {
@@ -157,7 +159,64 @@ describe("boot-reembed-scripts", () => {
157
159
 
158
160
  provider.reset();
159
161
  await runBootReembedScripts();
160
- expect(provider.calls).toHaveLength(1);
162
+ // +1 for the provider probe call
163
+ expect(provider.calls).toHaveLength(2);
161
164
  expect(embeddingCount(withoutEmbed.script.id)).toBe(1);
162
165
  });
166
+
167
+ test("re-embeds scripts with wrong-dimension embeddings", async () => {
168
+ const result = await upsertScriptByName({
169
+ name: "wrong-dim",
170
+ scope: "global",
171
+ source: source("wrong-dim"),
172
+ description: "Script with legacy 1536d embedding",
173
+ intent: "Dimension fix test",
174
+ signatureJson,
175
+ });
176
+ expect(embeddingCount(result.script.id)).toBe(1);
177
+
178
+ // Overwrite with a wrong-dimension (1536d) embedding to simulate legacy data
179
+ const wrongDimVector = new Float32Array(1536).fill(0.1);
180
+ getDb().run("UPDATE script_embeddings SET embedding = ? WHERE scriptId = ?", [
181
+ serializeEmbedding(wrongDimVector),
182
+ result.script.id,
183
+ ]);
184
+
185
+ // Verify the wrong dim is stored
186
+ const stored = getDb()
187
+ .prepare<{ len: number }, [string]>(
188
+ "SELECT length(embedding) as len FROM script_embeddings WHERE scriptId = ?",
189
+ )
190
+ .get(result.script.id);
191
+ expect(stored?.len).toBe(1536 * 4);
192
+
193
+ provider.reset();
194
+ await runBootReembedScripts();
195
+ // +1 for the provider probe call
196
+ expect(provider.calls).toHaveLength(2);
197
+
198
+ // Should now have correct-dimension embedding
199
+ const fixed = getDb()
200
+ .prepare<{ len: number }, [string]>(
201
+ "SELECT length(embedding) as len FROM script_embeddings WHERE scriptId = ?",
202
+ )
203
+ .get(result.script.id);
204
+ expect(fixed?.len).toBe(5 * 4); // provider.dimensions = 5
205
+ });
206
+
207
+ test("no-ops when all scripts have correct-dimension embeddings", async () => {
208
+ await upsertScriptByName({
209
+ name: "correct-dim",
210
+ scope: "global",
211
+ source: source("correct"),
212
+ description: "Already correct",
213
+ intent: "No-op test",
214
+ signatureJson,
215
+ });
216
+ expect(totalEmbeddingCount()).toBe(1);
217
+
218
+ provider.reset();
219
+ await runBootReembedScripts();
220
+ expect(provider.calls).toHaveLength(0);
221
+ });
163
222
  });
@@ -1,6 +1,7 @@
1
1
  import { afterAll, beforeAll, beforeEach, describe, expect, test } from "bun:test";
2
2
  import { unlink } from "node:fs/promises";
3
3
  import { closeDb, getDb, initDb } from "../be/db";
4
+ import { serializeEmbedding } from "../be/embedding";
4
5
  import type { EmbeddingProvider } from "../be/memory/types";
5
6
  import { getScript, upsertScriptByName } from "../be/scripts/db";
6
7
  import {
@@ -378,4 +379,30 @@ describe("script embeddings", () => {
378
379
  expect(getScript({ name: "delete-embedding", scope: "agent", scopeId: "agent-1" })).toBeNull();
379
380
  expect(embeddingCount(created.script.id)).toBe(0);
380
381
  });
382
+
383
+ test("search skips wrong-dimension embeddings instead of throwing", async () => {
384
+ const good = await upsertFixture({
385
+ name: "good-dim",
386
+ description: "Linear issue triage helper",
387
+ });
388
+ const bad = await upsertFixture({
389
+ name: "bad-dim",
390
+ description: "Linear ticket router",
391
+ });
392
+ expect(embeddingCount(good.script.id)).toBe(1);
393
+ expect(embeddingCount(bad.script.id)).toBe(1);
394
+
395
+ // Overwrite bad-dim's embedding with a wrong-dimension vector (1536d)
396
+ const wrongDimVector = new Float32Array(1536).fill(0.1);
397
+ getDb().run("UPDATE script_embeddings SET embedding = ? WHERE scriptId = ?", [
398
+ serializeEmbedding(wrongDimVector),
399
+ bad.script.id,
400
+ ]);
401
+
402
+ provider.reset();
403
+ const results = await searchScripts({ query: "issue triage", scopeId: "agent-1", limit: 10 });
404
+ // Should not throw, and should only return the good-dim script
405
+ expect(results.map((r) => r.script.name)).toContain("good-dim");
406
+ expect(results.map((r) => r.script.name)).not.toContain("bad-dim");
407
+ });
381
408
  });
@@ -10,6 +10,54 @@ export interface ErrorSignal {
10
10
  timestamp: string;
11
11
  }
12
12
 
13
+ export interface RateLimitWindowInfo {
14
+ status: string;
15
+ utilization?: number;
16
+ resetsAt?: number;
17
+ isUsingOverage?: boolean;
18
+ surpassedThreshold?: number;
19
+ lastSeenAt: string;
20
+ }
21
+
22
+ export type RateLimitWindowTelemetry = Record<string, RateLimitWindowInfo>;
23
+
24
+ export function parseRateLimitWindowTelemetry(
25
+ json: Record<string, unknown>,
26
+ lastSeenAt = new Date().toISOString(),
27
+ ): { rateLimitType: string; info: RateLimitWindowInfo } | null {
28
+ try {
29
+ if (json.type !== "rate_limit_event") return null;
30
+ const rawInfo = json.rate_limit_info;
31
+ if (!rawInfo || typeof rawInfo !== "object") return null;
32
+
33
+ const info = rawInfo as Record<string, unknown>;
34
+ if (typeof info.status !== "string" || info.status.length === 0) return null;
35
+ if (typeof info.rateLimitType !== "string" || info.rateLimitType.length === 0) return null;
36
+
37
+ const window: RateLimitWindowInfo = {
38
+ status: info.status,
39
+ lastSeenAt,
40
+ };
41
+
42
+ if (typeof info.utilization === "number" && Number.isFinite(info.utilization)) {
43
+ window.utilization = info.utilization;
44
+ }
45
+ if (typeof info.resetsAt === "number" && Number.isFinite(info.resetsAt) && info.resetsAt > 0) {
46
+ window.resetsAt = info.resetsAt;
47
+ }
48
+ if (typeof info.isUsingOverage === "boolean") {
49
+ window.isUsingOverage = info.isUsingOverage;
50
+ }
51
+ if (typeof info.surpassedThreshold === "number" && Number.isFinite(info.surpassedThreshold)) {
52
+ window.surpassedThreshold = info.surpassedThreshold;
53
+ }
54
+
55
+ return { rateLimitType: info.rateLimitType, info: window };
56
+ } catch {
57
+ return null;
58
+ }
59
+ }
60
+
13
61
  /**
14
62
  * Maximum cooldown horizon for a rate-limit reset. A weekly OAuth limit resets
15
63
  * up to ~7 days out, so the cap must be at least that or a weekly-limited key
@@ -85,6 +133,7 @@ export class SessionErrorTracker {
85
133
  private errors: ErrorSignal[] = [];
86
134
  /** Stashed reset time (ms) from the last rejected rate_limit_event in this session. */
87
135
  private rateLimitResetAtMs: number | undefined;
136
+ private rateLimitWindows: RateLimitWindowTelemetry = {};
88
137
 
89
138
  /** Record an error from an assistant message with message.error field */
90
139
  addApiError(errorCategory: string, message: string): void {
@@ -139,6 +188,11 @@ export class SessionErrorTracker {
139
188
  const info = json.rate_limit_info as Record<string, unknown> | undefined;
140
189
  if (!info) return;
141
190
 
191
+ const telemetry = parseRateLimitWindowTelemetry(json);
192
+ if (telemetry) {
193
+ this.rateLimitWindows[telemetry.rateLimitType] = telemetry.info;
194
+ }
195
+
142
196
  if (info.status !== "rejected") return;
143
197
 
144
198
  const resetsAtSec = info.resetsAt;
@@ -185,6 +239,11 @@ export class SessionErrorTracker {
185
239
  return new Date(this.rateLimitResetAtMs).toISOString();
186
240
  }
187
241
 
242
+ getRateLimitWindows(): RateLimitWindowTelemetry | undefined {
243
+ if (Object.keys(this.rateLimitWindows).length === 0) return undefined;
244
+ return { ...this.rateLimitWindows };
245
+ }
246
+
188
247
  hasErrors(): boolean {
189
248
  return this.errors.length > 0;
190
249
  }