@desplega.ai/agent-swarm 1.76.2 → 1.76.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,7 +6,7 @@
6
6
  * adapter's event normalization loop without pulling in the real SDK.
7
7
  */
8
8
 
9
- import { afterAll, afterEach, beforeAll, describe, expect, test } from "bun:test";
9
+ import { afterAll, afterEach, beforeAll, beforeEach, describe, expect, test } from "bun:test";
10
10
  import { mkdirSync, rmSync } from "node:fs";
11
11
  import { join } from "node:path";
12
12
  import type {
@@ -15,6 +15,7 @@ import type {
15
15
  ThreadEvent,
16
16
  ThreadItem,
17
17
  } from "@openai/codex-sdk";
18
+ import type { SummarizeSessionForCodexDeps } from "../providers/codex-adapter";
18
19
  import { buildCodexConfig, CodexAdapter } from "../providers/codex-adapter";
19
20
  import { writeCodexAgentsMd } from "../providers/codex-agents-md";
20
21
  import {
@@ -135,13 +136,21 @@ async function runSessionWithFakeThread(
135
136
 
136
137
  describe("CodexSession event mapping", () => {
137
138
  const tmpLogDir = `/tmp/codex-adapter-test-${Date.now()}`;
139
+ let prevSkipEnv: string | undefined;
138
140
 
139
141
  beforeAll(() => {
140
142
  mkdirSync(tmpLogDir, { recursive: true });
143
+ // Prevent the new Phase 3 session-end summarization path from firing real
144
+ // LLM/HTTP calls during the legacy event-mapping tests. The summarization
145
+ // tests below explicitly unset this within their own scope.
146
+ prevSkipEnv = process.env.SKIP_SESSION_SUMMARY;
147
+ process.env.SKIP_SESSION_SUMMARY = "1";
141
148
  });
142
149
 
143
150
  afterAll(() => {
144
151
  rmSync(tmpLogDir, { recursive: true, force: true });
152
+ if (prevSkipEnv === undefined) delete process.env.SKIP_SESSION_SUMMARY;
153
+ else process.env.SKIP_SESSION_SUMMARY = prevSkipEnv;
145
154
  });
146
155
 
147
156
  test("happy path: session_init → message → result", async () => {
@@ -960,3 +969,429 @@ describe("buildCodexConfig", () => {
960
969
  expect(merged.model).toBe("gpt-5.3-codex");
961
970
  });
962
971
  });
972
+
973
+ // ─────────────────────────────────────────────────────────────────────────────
974
+ // Phase 3 — session-end summarization
975
+ // ─────────────────────────────────────────────────────────────────────────────
976
+ /**
977
+ * Drive a CodexSession through a fake `Thread` AND inject summarization deps.
978
+ * Mirrors `runSessionWithFakeThread` but also lets the caller stub out
979
+ * `runSummarize` / `fetchRetrievalsForTask` / `postRatings` / `buildRatingsFromLlm`
980
+ * via the adapter constructor.
981
+ */
982
+ async function runSessionWithFakeThreadAndDeps(
983
+ events: ThreadEvent[],
984
+ config: ProviderSessionConfig,
985
+ summarizeDeps: SummarizeSessionForCodexDeps,
986
+ ): Promise<{ emitted: ProviderEvent[]; result: ProviderResult }> {
987
+ const sdk = await import("@openai/codex-sdk");
988
+ const originalStartThread = (
989
+ sdk.Codex.prototype as unknown as { startThread: (...args: unknown[]) => unknown }
990
+ ).startThread;
991
+
992
+ const fakeThread = makeFakeThread(events);
993
+ (sdk.Codex.prototype as unknown as { startThread: (...args: unknown[]) => unknown }).startThread =
994
+ function startThread(): unknown {
995
+ return fakeThread as unknown;
996
+ };
997
+
998
+ try {
999
+ const adapter = new CodexAdapter({ summarizeDeps });
1000
+ const session = await adapter.createSession(config);
1001
+ const emitted: ProviderEvent[] = [];
1002
+ session.onEvent((e) => emitted.push(e));
1003
+ const result = await session.waitForCompletion();
1004
+ return { emitted, result };
1005
+ } finally {
1006
+ (
1007
+ sdk.Codex.prototype as unknown as { startThread: (...args: unknown[]) => unknown }
1008
+ ).startThread = originalStartThread;
1009
+ }
1010
+ }
1011
+
1012
+ type RunSummarizeArgs = Parameters<NonNullable<SummarizeSessionForCodexDeps["runSummarize"]>>[0];
1013
+ type RunSummarizeResult = Awaited<
1014
+ ReturnType<NonNullable<SummarizeSessionForCodexDeps["runSummarize"]>>
1015
+ >;
1016
+ type PostRatingsArgs = Parameters<NonNullable<SummarizeSessionForCodexDeps["postRatings"]>>[0];
1017
+
1018
+ describe("CodexSession session-end summarization", () => {
1019
+ const tmpLogDir = `/tmp/codex-adapter-summary-test-${Date.now()}`;
1020
+ let prevSkipEnv: string | undefined;
1021
+ let prevMemoryRaters: string | undefined;
1022
+ const fetchCalls: Array<{ url: string; init?: RequestInit }> = [];
1023
+ const consoleErrors: unknown[][] = [];
1024
+ const origFetch = globalThis.fetch;
1025
+ const origConsoleError = console.error;
1026
+ type FetchHandlerResp = {
1027
+ ok: boolean;
1028
+ status: number;
1029
+ text: () => Promise<string>;
1030
+ json: () => Promise<unknown>;
1031
+ };
1032
+ let fetchHandler: ((url: string, init?: RequestInit) => Promise<FetchHandlerResp>) | null = null;
1033
+
1034
+ beforeAll(() => {
1035
+ mkdirSync(tmpLogDir, { recursive: true });
1036
+ // Capture so we don't clobber the outer describe's env override on exit.
1037
+ prevSkipEnv = process.env.SKIP_SESSION_SUMMARY;
1038
+ prevMemoryRaters = process.env.MEMORY_RATERS;
1039
+ delete process.env.SKIP_SESSION_SUMMARY;
1040
+ delete process.env.MEMORY_RATERS;
1041
+ });
1042
+
1043
+ afterAll(() => {
1044
+ rmSync(tmpLogDir, { recursive: true, force: true });
1045
+ if (prevSkipEnv === undefined) delete process.env.SKIP_SESSION_SUMMARY;
1046
+ else process.env.SKIP_SESSION_SUMMARY = prevSkipEnv;
1047
+ if (prevMemoryRaters === undefined) delete process.env.MEMORY_RATERS;
1048
+ else process.env.MEMORY_RATERS = prevMemoryRaters;
1049
+ });
1050
+
1051
+ beforeEach(() => {
1052
+ fetchCalls.length = 0;
1053
+ consoleErrors.length = 0;
1054
+ fetchHandler = async (url) => {
1055
+ if (url.includes("/api/memory/index")) {
1056
+ return {
1057
+ ok: true,
1058
+ status: 202,
1059
+ text: async () => "",
1060
+ json: async () => ({ queued: true, memoryIds: ["mem-1"] }),
1061
+ };
1062
+ }
1063
+ return { ok: true, status: 200, text: async () => "", json: async () => ({}) };
1064
+ };
1065
+ globalThis.fetch = (async (url: string | URL | Request, init?: RequestInit) => {
1066
+ const urlStr = typeof url === "string" ? url : url.toString();
1067
+ fetchCalls.push({ url: urlStr, init });
1068
+ if (!fetchHandler) return new Response("{}", { status: 200 });
1069
+ return fetchHandler(urlStr, init) as unknown as Response;
1070
+ }) as typeof fetch;
1071
+ console.error = (...args: unknown[]) => {
1072
+ consoleErrors.push(args);
1073
+ };
1074
+ });
1075
+
1076
+ afterEach(() => {
1077
+ globalThis.fetch = origFetch;
1078
+ console.error = origConsoleError;
1079
+ delete process.env.SKIP_SESSION_SUMMARY;
1080
+ delete process.env.MEMORY_RATERS;
1081
+ });
1082
+
1083
+ /**
1084
+ * Helper: build the canonical event sequence used by every summarization
1085
+ * test: thread.started → turn.started → tool started → tool completed →
1086
+ * agent_message → turn.completed. Each test customises the agent_message
1087
+ * text or omits a step as needed.
1088
+ */
1089
+ function buildSummaryEvents(opts: { agentText?: string } = {}): ThreadEvent[] {
1090
+ const cmdItem: CommandExecutionItem = {
1091
+ id: "cmd-1",
1092
+ type: "command_execution",
1093
+ command: "ls",
1094
+ aggregated_output: "file1\nfile2",
1095
+ exit_code: 0,
1096
+ status: "completed",
1097
+ };
1098
+ const agentMsg: AgentMessageItem = {
1099
+ id: "msg-1",
1100
+ type: "agent_message",
1101
+ text: opts.agentText ?? "I listed the files.",
1102
+ };
1103
+ return [
1104
+ { type: "thread.started", thread_id: "t1" },
1105
+ { type: "turn.started" },
1106
+ { type: "item.started", item: cmdItem as ThreadItem },
1107
+ { type: "item.completed", item: cmdItem as ThreadItem },
1108
+ { type: "item.completed", item: agentMsg as ThreadItem },
1109
+ {
1110
+ type: "turn.completed",
1111
+ usage: { input_tokens: 10, cached_input_tokens: 0, output_tokens: 5 },
1112
+ },
1113
+ ];
1114
+ }
1115
+
1116
+ test("happy path: runSummarize is invoked + POST /api/memory/index captured", async () => {
1117
+ const events = buildSummaryEvents();
1118
+
1119
+ let runSummarizeCalls = 0;
1120
+ let lastRunSummarizeArgs: RunSummarizeArgs | null = null;
1121
+ const deps: SummarizeSessionForCodexDeps = {
1122
+ runSummarize: async (args) => {
1123
+ runSummarizeCalls += 1;
1124
+ lastRunSummarizeArgs = args;
1125
+ return {
1126
+ summary: "Listed files in the current working directory.",
1127
+ ratings: [],
1128
+ } as RunSummarizeResult;
1129
+ },
1130
+ };
1131
+
1132
+ const config = testConfig({
1133
+ logFile: join(tmpLogDir, "happy.log"),
1134
+ cwd: "",
1135
+ prompt: "what's in this dir?",
1136
+ });
1137
+
1138
+ await runSessionWithFakeThreadAndDeps(events, config, deps);
1139
+
1140
+ expect(runSummarizeCalls).toBe(1);
1141
+ expect(lastRunSummarizeArgs).not.toBeNull();
1142
+ expect(lastRunSummarizeArgs!.harness).toBe("codex");
1143
+ expect(lastRunSummarizeArgs!.taskContext.sourceTaskId).toBe("task-test");
1144
+ expect(lastRunSummarizeArgs!.taskContext.agentId).toBe("agent-test");
1145
+ expect(lastRunSummarizeArgs!.apiUrl).toBe("http://localhost:0");
1146
+ expect(lastRunSummarizeArgs!.apiKey).toBe("test");
1147
+
1148
+ // Transcript must contain all four signal lines.
1149
+ const transcript = lastRunSummarizeArgs!.transcript;
1150
+ expect(transcript).toContain("User: what's in this dir?");
1151
+ expect(transcript).toContain("Tool[bash] started:");
1152
+ expect(transcript).toContain("Tool[bash] completed:");
1153
+ expect(transcript).toContain("Assistant: I listed the files.");
1154
+
1155
+ // /api/memory/index POST captured with expected body.
1156
+ const indexCalls = fetchCalls.filter((c) => c.url.endsWith("/api/memory/index"));
1157
+ expect(indexCalls.length).toBe(1);
1158
+ const body = JSON.parse(indexCalls[0]!.init?.body as string) as Record<string, unknown>;
1159
+ expect(body.scope).toBe("agent");
1160
+ expect(body.source).toBe("session_summary");
1161
+ expect(body.sourceTaskId).toBe("task-test");
1162
+ expect(body.agentId).toBe("agent-test");
1163
+ expect(body.name).toBe("session-summary");
1164
+ expect(body.content).toBe("Listed files in the current working directory.");
1165
+
1166
+ expect(consoleErrors.length).toBe(0);
1167
+ });
1168
+
1169
+ test("SKIP_SESSION_SUMMARY=1 → no runSummarize call, no POST", async () => {
1170
+ process.env.SKIP_SESSION_SUMMARY = "1";
1171
+ const events = buildSummaryEvents();
1172
+
1173
+ let runSummarizeCalls = 0;
1174
+ const deps: SummarizeSessionForCodexDeps = {
1175
+ runSummarize: async () => {
1176
+ runSummarizeCalls += 1;
1177
+ return null;
1178
+ },
1179
+ };
1180
+
1181
+ const config = testConfig({
1182
+ logFile: join(tmpLogDir, "skip.log"),
1183
+ cwd: "",
1184
+ });
1185
+ await runSessionWithFakeThreadAndDeps(events, config, deps);
1186
+
1187
+ expect(runSummarizeCalls).toBe(0);
1188
+ const indexCalls = fetchCalls.filter((c) => c.url.endsWith("/api/memory/index"));
1189
+ expect(indexCalls.length).toBe(0);
1190
+ });
1191
+
1192
+ test("missing taskId → no runSummarize call, no POST", async () => {
1193
+ const events = buildSummaryEvents();
1194
+
1195
+ let runSummarizeCalls = 0;
1196
+ const deps: SummarizeSessionForCodexDeps = {
1197
+ runSummarize: async () => {
1198
+ runSummarizeCalls += 1;
1199
+ return null;
1200
+ },
1201
+ };
1202
+
1203
+ const config = testConfig({
1204
+ logFile: join(tmpLogDir, "no-task.log"),
1205
+ cwd: "",
1206
+ taskId: "", // falsy
1207
+ });
1208
+ await runSessionWithFakeThreadAndDeps(events, config, deps);
1209
+
1210
+ expect(runSummarizeCalls).toBe(0);
1211
+ const indexCalls = fetchCalls.filter((c) => c.url.endsWith("/api/memory/index"));
1212
+ expect(indexCalls.length).toBe(0);
1213
+ });
1214
+
1215
+ test("runSummarize throws → existing logFileHandle.end() + agentsMdHandle.cleanup() still run", async () => {
1216
+ // Use a real temp dir as cwd so we exercise writeCodexAgentsMd, then spy on
1217
+ // the resulting AGENTS.md file. After cleanup the file MUST be gone.
1218
+ const cwd = `/tmp/codex-summary-cleanup-${Date.now()}-${Math.random().toString(36).slice(2)}`;
1219
+ mkdirSync(cwd, { recursive: true });
1220
+ const agentsMdPath = join(cwd, "AGENTS.md");
1221
+
1222
+ const events = buildSummaryEvents();
1223
+ const deps: SummarizeSessionForCodexDeps = {
1224
+ runSummarize: async () => {
1225
+ throw new Error("boom");
1226
+ },
1227
+ };
1228
+
1229
+ const logFile = join(tmpLogDir, "throw.log");
1230
+ const config = testConfig({
1231
+ logFile,
1232
+ cwd,
1233
+ systemPrompt: "test system prompt", // ensures AGENTS.md gets written
1234
+ });
1235
+ await runSessionWithFakeThreadAndDeps(events, config, deps);
1236
+
1237
+ // AGENTS.md cleanup must have run despite the summarize throw.
1238
+ expect(await Bun.file(agentsMdPath).exists()).toBe(false);
1239
+
1240
+ // Log file flush must have run despite the summarize throw — the log file
1241
+ // exists and contains at least one event JSONL line.
1242
+ const logContent = await Bun.file(logFile).text();
1243
+ expect(logContent.length).toBeGreaterThan(0);
1244
+ expect(logContent).toContain("session_init");
1245
+
1246
+ // The summarize failure was logged via console.error.
1247
+ const summaryErrors = consoleErrors.filter(
1248
+ (args) =>
1249
+ typeof args[0] === "string" &&
1250
+ (args[0] as string).startsWith("session_summary failed (codex):"),
1251
+ );
1252
+ expect(summaryErrors.length).toBe(1);
1253
+
1254
+ // No /api/memory/index POST attempted.
1255
+ const indexCalls = fetchCalls.filter((c) => c.url.endsWith("/api/memory/index"));
1256
+ expect(indexCalls.length).toBe(0);
1257
+
1258
+ rmSync(cwd, { recursive: true, force: true });
1259
+ });
1260
+
1261
+ test("length gate — summary ≤ 20 chars → no POST", async () => {
1262
+ const events = buildSummaryEvents();
1263
+ const deps: SummarizeSessionForCodexDeps = {
1264
+ runSummarize: async () => ({ summary: "tiny", ratings: [] }) as RunSummarizeResult,
1265
+ };
1266
+
1267
+ const config = testConfig({ logFile: join(tmpLogDir, "short.log"), cwd: "" });
1268
+ await runSessionWithFakeThreadAndDeps(events, config, deps);
1269
+
1270
+ const indexCalls = fetchCalls.filter((c) => c.url.endsWith("/api/memory/index"));
1271
+ expect(indexCalls.length).toBe(0);
1272
+ });
1273
+
1274
+ test("'no significant learnings' gate → no POST", async () => {
1275
+ const events = buildSummaryEvents();
1276
+ const deps: SummarizeSessionForCodexDeps = {
1277
+ runSummarize: async () =>
1278
+ ({
1279
+ summary: "No significant learnings from this session.",
1280
+ ratings: [],
1281
+ }) as RunSummarizeResult,
1282
+ };
1283
+
1284
+ const config = testConfig({ logFile: join(tmpLogDir, "no-learn.log"), cwd: "" });
1285
+ await runSessionWithFakeThreadAndDeps(events, config, deps);
1286
+
1287
+ const indexCalls = fetchCalls.filter((c) => c.url.endsWith("/api/memory/index"));
1288
+ expect(indexCalls.length).toBe(0);
1289
+ });
1290
+
1291
+ test("null runSummarize result → no POST, no error log", async () => {
1292
+ const events = buildSummaryEvents();
1293
+ const deps: SummarizeSessionForCodexDeps = {
1294
+ runSummarize: async () => null,
1295
+ };
1296
+
1297
+ const config = testConfig({ logFile: join(tmpLogDir, "null.log"), cwd: "" });
1298
+ await runSessionWithFakeThreadAndDeps(events, config, deps);
1299
+
1300
+ const indexCalls = fetchCalls.filter((c) => c.url.endsWith("/api/memory/index"));
1301
+ expect(indexCalls.length).toBe(0);
1302
+ expect(consoleErrors.length).toBe(0);
1303
+ });
1304
+
1305
+ test("ratings path — MEMORY_RATERS=llm → postRatings called with `events:` key (NOT `ratings:`)", async () => {
1306
+ process.env.MEMORY_RATERS = "llm";
1307
+ const events = buildSummaryEvents();
1308
+
1309
+ const retrievalRow = { id: "mem-A", name: "memory A", content: "..." };
1310
+
1311
+ let lastPostRatingsArgs: PostRatingsArgs | null = null;
1312
+ const deps: SummarizeSessionForCodexDeps = {
1313
+ fetchRetrievalsForTask: async () => [retrievalRow] as never,
1314
+ runSummarize: async (args) => {
1315
+ expect(args.retrievals.length).toBe(1);
1316
+ expect(args.retrievals[0]!.id).toBe("mem-A");
1317
+ return {
1318
+ summary: "Long-enough summary with real content for the index POST.",
1319
+ ratings: [{ id: "mem-A", score: 0.8, reasoning: "useful" }],
1320
+ } as RunSummarizeResult;
1321
+ },
1322
+ postRatings: async (args) => {
1323
+ lastPostRatingsArgs = args;
1324
+ return { ok: true, status: 200 };
1325
+ },
1326
+ buildRatingsFromLlm: (ratings, retrievals) => {
1327
+ const allowed = new Set(retrievals.map((r) => r.id));
1328
+ return ratings
1329
+ .filter((r) => allowed.has(r.id))
1330
+ .map((r) => ({
1331
+ memoryId: r.id,
1332
+ signal: 2 * r.score - 1,
1333
+ weight: 0.8,
1334
+ source: "llm",
1335
+ reasoning: r.reasoning,
1336
+ }));
1337
+ },
1338
+ };
1339
+
1340
+ const config = testConfig({ logFile: join(tmpLogDir, "ratings.log"), cwd: "" });
1341
+ await runSessionWithFakeThreadAndDeps(events, config, deps);
1342
+
1343
+ // Index POST happened
1344
+ const indexCalls = fetchCalls.filter((c) => c.url.endsWith("/api/memory/index"));
1345
+ expect(indexCalls.length).toBe(1);
1346
+
1347
+ // postRatings was called with `events:` key, NOT `ratings:` — guards
1348
+ // against the plan/signature mismatch flagged by the orchestrator.
1349
+ expect(lastPostRatingsArgs).not.toBeNull();
1350
+ expect(lastPostRatingsArgs!.apiUrl).toBe("http://localhost:0");
1351
+ expect(lastPostRatingsArgs!.agentId).toBe("agent-test");
1352
+ expect(lastPostRatingsArgs!.taskId).toBe("task-test");
1353
+ expect(Array.isArray(lastPostRatingsArgs!.events)).toBe(true);
1354
+ expect(lastPostRatingsArgs!.events.length).toBe(1);
1355
+ expect(lastPostRatingsArgs!.events[0]!.memoryId).toBe("mem-A");
1356
+ expect(lastPostRatingsArgs!.events[0]!.source).toBe("llm");
1357
+
1358
+ // Guard against accidentally passing a `ratings:` key.
1359
+ expect((lastPostRatingsArgs as unknown as Record<string, unknown>).ratings).toBeUndefined();
1360
+
1361
+ expect(consoleErrors.length).toBe(0);
1362
+ });
1363
+
1364
+ test("POST /api/memory/index 500 → exactly one console.error('… (codex):', …)", async () => {
1365
+ const events = buildSummaryEvents();
1366
+ fetchHandler = async (url) => {
1367
+ if (url.includes("/api/memory/index")) {
1368
+ return {
1369
+ ok: false,
1370
+ status: 500,
1371
+ text: async () => "internal server error",
1372
+ json: async () => ({}),
1373
+ };
1374
+ }
1375
+ return { ok: true, status: 200, text: async () => "", json: async () => ({}) };
1376
+ };
1377
+
1378
+ const deps: SummarizeSessionForCodexDeps = {
1379
+ runSummarize: async () =>
1380
+ ({
1381
+ summary: "A valid long-enough summary that passes the length gate.",
1382
+ ratings: [],
1383
+ }) as RunSummarizeResult,
1384
+ };
1385
+
1386
+ const config = testConfig({ logFile: join(tmpLogDir, "post500.log"), cwd: "" });
1387
+ await runSessionWithFakeThreadAndDeps(events, config, deps);
1388
+
1389
+ const matching = consoleErrors.filter(
1390
+ (args) =>
1391
+ typeof args[0] === "string" &&
1392
+ (args[0] as string).startsWith("session_summary: /api/memory/index POST failed (codex):"),
1393
+ );
1394
+ expect(matching.length).toBe(1);
1395
+ expect(matching[0]![1]).toBe(500);
1396
+ });
1397
+ });