@desplega.ai/agent-swarm 1.76.2 → 1.77.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/openapi.json +9 -2
- package/package.json +1 -1
- package/src/be/memory/raters/llm.ts +26 -0
- package/src/cli.tsx +3 -24
- package/src/commands/credential-wait.ts +31 -6
- package/src/commands/runner.ts +1045 -1059
- package/src/hooks/hook.ts +174 -147
- package/src/http/status.ts +8 -0
- package/src/providers/claude-adapter.ts +9 -1
- package/src/providers/codex-adapter.ts +232 -2
- package/src/providers/codex-oauth/storage.ts +21 -0
- package/src/providers/pi-mono-extension.ts +114 -77
- package/src/telemetry.ts +28 -0
- package/src/tests/claude-stop-hook.test.ts +432 -0
- package/src/tests/codex-adapter.test.ts +436 -1
- package/src/tests/internal-ai/complete-structured.test.ts +276 -0
- package/src/tests/internal-ai/credentials.test.ts +264 -0
- package/src/tests/internal-ai/schema-parity.test.ts +103 -0
- package/src/tests/internal-ai/summarize-session.test.ts +105 -0
- package/src/tests/opencode-plugin.test.ts +496 -0
- package/src/tests/pi-mono-extension.test.ts +347 -0
- package/src/tests/reload-config.test.ts +9 -1
- package/src/tests/status.test.ts +4 -0
- package/src/tests/telemetry-init.test.ts +137 -1
- package/src/tests/template-recommendations.test.ts +1 -0
- package/src/utils/internal-ai/complete-structured.ts +296 -0
- package/src/utils/internal-ai/credentials.ts +175 -0
- package/src/utils/internal-ai/index.ts +31 -0
- package/src/utils/internal-ai/models.ts +46 -0
- package/src/utils/internal-ai/summarize-session.ts +101 -0
|
@@ -65,6 +65,8 @@ import {
|
|
|
65
65
|
type Usage,
|
|
66
66
|
type WebSearchItem,
|
|
67
67
|
} from "@openai/codex-sdk";
|
|
68
|
+
import { buildRatingsFromLlm, fetchRetrievalsForTask, postRatings } from "../be/memory/raters/llm";
|
|
69
|
+
import { summarizeSession as runSummarize } from "../utils/internal-ai";
|
|
68
70
|
import { scrubSecrets } from "../utils/secret-scrubber";
|
|
69
71
|
import { type CodexAgentsMdHandle, writeCodexAgentsMd } from "./codex-agents-md";
|
|
70
72
|
import { computeCodexCostUsd, getCodexContextWindow, resolveCodexModel } from "./codex-models";
|
|
@@ -340,6 +342,26 @@ export async function buildCodexConfig(
|
|
|
340
342
|
};
|
|
341
343
|
}
|
|
342
344
|
|
|
345
|
+
/**
|
|
346
|
+
* Test-injection points for the codex session-end summarization path.
|
|
347
|
+
*
|
|
348
|
+
* Production callers omit `deps` entirely — the `CodexSession.summarizeAtEnd`
|
|
349
|
+
* helper falls back to the symbols imported at the top of this file. Tests
|
|
350
|
+
* override each function so we can exercise the summarize/index/rate flow
|
|
351
|
+
* without standing up a real API server or LLM.
|
|
352
|
+
*
|
|
353
|
+
* Why this exists (mirrors `SummarizeSessionForPiDeps`): `bun:test`'s
|
|
354
|
+
* `mock.module()` is process-wide and leaks across test files in the same
|
|
355
|
+
* `bun test` run, breaking siblings that import the real symbols. Explicit DI
|
|
356
|
+
* keeps the boundary local to this adapter.
|
|
357
|
+
*/
|
|
358
|
+
export interface SummarizeSessionForCodexDeps {
|
|
359
|
+
runSummarize?: typeof runSummarize;
|
|
360
|
+
fetchRetrievalsForTask?: typeof fetchRetrievalsForTask;
|
|
361
|
+
postRatings?: typeof postRatings;
|
|
362
|
+
buildRatingsFromLlm?: typeof buildRatingsFromLlm;
|
|
363
|
+
}
|
|
364
|
+
|
|
343
365
|
/** Running session backed by a Codex `Thread`. */
|
|
344
366
|
class CodexSession implements ProviderSession {
|
|
345
367
|
private readonly thread: Thread;
|
|
@@ -348,6 +370,7 @@ class CodexSession implements ProviderSession {
|
|
|
348
370
|
private readonly resolvedModel: string;
|
|
349
371
|
private readonly contextWindow: number;
|
|
350
372
|
private readonly skillsDir: string;
|
|
373
|
+
private readonly summarizeDeps: SummarizeSessionForCodexDeps;
|
|
351
374
|
private readonly listeners: Array<(event: ProviderEvent) => void> = [];
|
|
352
375
|
private readonly eventQueue: ProviderEvent[] = [];
|
|
353
376
|
private readonly logFileHandle: ReturnType<ReturnType<typeof Bun.file>["writer"]>;
|
|
@@ -355,6 +378,11 @@ class CodexSession implements ProviderSession {
|
|
|
355
378
|
private readonly completionPromise: Promise<ProviderResult>;
|
|
356
379
|
private resolveCompletion!: (result: ProviderResult) => void;
|
|
357
380
|
private abortController: AbortController | null = null;
|
|
381
|
+
/**
|
|
382
|
+
* Per-session transcript buffer used to feed the session-end summarizer.
|
|
383
|
+
* Reset at the start of `runSession` and appended in `handleEvent`.
|
|
384
|
+
*/
|
|
385
|
+
private transcript: string[] = [];
|
|
358
386
|
/**
|
|
359
387
|
* Mutable holder for the current turn's `AbortController`. Shared with the
|
|
360
388
|
* swarm event handler so it can trigger an abort from outside `runSession`
|
|
@@ -366,6 +394,14 @@ class CodexSession implements ProviderSession {
|
|
|
366
394
|
private lastUsage: Usage | null = null;
|
|
367
395
|
private aborted = false;
|
|
368
396
|
private settled = false;
|
|
397
|
+
/**
|
|
398
|
+
* Result captured by `settle` but held back from `resolveCompletion` until
|
|
399
|
+
* `runSession`'s `finally` block has fully cleaned up (log writer flush,
|
|
400
|
+
* AGENTS.md cleanup, session summary). Without this, callers awaiting
|
|
401
|
+
* `waitForCompletion` would race the cleanup. Phase 3 added the session
|
|
402
|
+
* summary path which materially relies on this ordering for testability.
|
|
403
|
+
*/
|
|
404
|
+
private pendingResult: ProviderResult | null = null;
|
|
369
405
|
|
|
370
406
|
constructor(
|
|
371
407
|
thread: Thread,
|
|
@@ -374,6 +410,7 @@ class CodexSession implements ProviderSession {
|
|
|
374
410
|
resolvedModel: string,
|
|
375
411
|
initialEvents: ProviderEvent[] = [],
|
|
376
412
|
skillsDir?: string,
|
|
413
|
+
summarizeDeps: SummarizeSessionForCodexDeps = {},
|
|
377
414
|
) {
|
|
378
415
|
this.thread = thread;
|
|
379
416
|
this.config = config;
|
|
@@ -385,6 +422,7 @@ class CodexSession implements ProviderSession {
|
|
|
385
422
|
// runtime default of `${HOME}/.codex/skills`.
|
|
386
423
|
this.skillsDir =
|
|
387
424
|
skillsDir ?? process.env.CODEX_SKILLS_DIR ?? join(os.homedir(), ".codex", "skills");
|
|
425
|
+
this.summarizeDeps = summarizeDeps;
|
|
388
426
|
this.logFileHandle = Bun.file(config.logFile).writer();
|
|
389
427
|
|
|
390
428
|
this.completionPromise = new Promise<ProviderResult>((resolve) => {
|
|
@@ -474,7 +512,10 @@ class CodexSession implements ProviderSession {
|
|
|
474
512
|
private settle(result: ProviderResult): void {
|
|
475
513
|
if (this.settled) return;
|
|
476
514
|
this.settled = true;
|
|
477
|
-
|
|
515
|
+
// Resolution deferred until `runSession`'s finally-block fully cleans up
|
|
516
|
+
// (see `pendingResult` rationale on the field above). Caller-visible
|
|
517
|
+
// ordering: cleanup → resolve waitForCompletion.
|
|
518
|
+
this.pendingResult = result;
|
|
478
519
|
}
|
|
479
520
|
|
|
480
521
|
/** Build CostData from the most recent turn usage. */
|
|
@@ -559,6 +600,41 @@ class CodexSession implements ProviderSession {
|
|
|
559
600
|
);
|
|
560
601
|
}
|
|
561
602
|
|
|
603
|
+
/**
|
|
604
|
+
* Render a completed tool item as a short, signal-dense one-liner for the
|
|
605
|
+
* session-end summarization transcript. Picks tool-type-specific fields so
|
|
606
|
+
* the transcript doesn't get drowned in raw JSON (a single `command_execution`
|
|
607
|
+
* can carry 100KB+ of `aggregated_output`). Each branch is capped at 500
|
|
608
|
+
* chars; unknown types fall back to a trimmed `JSON.stringify`.
|
|
609
|
+
*/
|
|
610
|
+
private shortenItemResult(item: ThreadItem): string {
|
|
611
|
+
switch (item.type) {
|
|
612
|
+
case "command_execution": {
|
|
613
|
+
const cmd = item as CommandExecutionItem;
|
|
614
|
+
const stdout = (cmd.aggregated_output ?? "").slice(0, 500);
|
|
615
|
+
return `exit=${cmd.exit_code ?? "?"} status=${cmd.status ?? "?"} stdout=${stdout}`;
|
|
616
|
+
}
|
|
617
|
+
case "file_change": {
|
|
618
|
+
const fc = item as FileChangeItem;
|
|
619
|
+
const summarised = (fc.changes ?? [])
|
|
620
|
+
.slice(0, 5)
|
|
621
|
+
.map((c) => `${c.kind}:${c.path}`)
|
|
622
|
+
.join(",");
|
|
623
|
+
return `changes=[${summarised}]`;
|
|
624
|
+
}
|
|
625
|
+
case "mcp_tool_call": {
|
|
626
|
+
const mcp = item as McpToolCallItem;
|
|
627
|
+
return `server=${mcp.server} tool=${mcp.tool} status=${mcp.status ?? "?"}`;
|
|
628
|
+
}
|
|
629
|
+
case "web_search": {
|
|
630
|
+
const ws = item as WebSearchItem;
|
|
631
|
+
return `query=${(ws.query ?? "").slice(0, 200)}`;
|
|
632
|
+
}
|
|
633
|
+
default:
|
|
634
|
+
return JSON.stringify(item).slice(0, 500);
|
|
635
|
+
}
|
|
636
|
+
}
|
|
637
|
+
|
|
562
638
|
private handleEvent(event: ThreadEvent): void {
|
|
563
639
|
// Mirror every raw SDK event into the log as raw_log for debugability —
|
|
564
640
|
// parity with Claude's JSONL envelope.
|
|
@@ -582,6 +658,14 @@ class CodexSession implements ProviderSession {
|
|
|
582
658
|
toolName: this.toolNameForItem(event.item),
|
|
583
659
|
args: this.toolArgsForItem(event.item),
|
|
584
660
|
});
|
|
661
|
+
// Mirror into the transcript buffer for session-end summarization.
|
|
662
|
+
// Tools are the bulk of useful signal in a codex session, so we
|
|
663
|
+
// capture both the start (args) and the completion (result digest).
|
|
664
|
+
this.transcript.push(
|
|
665
|
+
`Tool[${this.toolNameForItem(event.item)}] started: ${JSON.stringify(
|
|
666
|
+
this.toolArgsForItem(event.item),
|
|
667
|
+
).slice(0, 500)}`,
|
|
668
|
+
);
|
|
585
669
|
}
|
|
586
670
|
break;
|
|
587
671
|
}
|
|
@@ -615,6 +699,9 @@ class CodexSession implements ProviderSession {
|
|
|
615
699
|
toolName: this.toolNameForItem(item),
|
|
616
700
|
result: item,
|
|
617
701
|
});
|
|
702
|
+
this.transcript.push(
|
|
703
|
+
`Tool[${this.toolNameForItem(item)}] completed: ${this.shortenItemResult(item)}`,
|
|
704
|
+
);
|
|
618
705
|
break;
|
|
619
706
|
}
|
|
620
707
|
switch (item.type) {
|
|
@@ -622,6 +709,7 @@ class CodexSession implements ProviderSession {
|
|
|
622
709
|
const msg = item as AgentMessageItem;
|
|
623
710
|
if (msg.text) {
|
|
624
711
|
this.emit({ type: "message", role: "assistant", content: msg.text });
|
|
712
|
+
this.transcript.push(`Assistant: ${msg.text}`);
|
|
625
713
|
}
|
|
626
714
|
break;
|
|
627
715
|
}
|
|
@@ -764,6 +852,11 @@ class CodexSession implements ProviderSession {
|
|
|
764
852
|
this.emit(event),
|
|
765
853
|
);
|
|
766
854
|
|
|
855
|
+
// Reset + seed the transcript buffer so the session-end summarizer has
|
|
856
|
+
// the user's prompt as anchor context. Subsequent appends happen in
|
|
857
|
+
// `handleEvent` (tool start/end, agent_message).
|
|
858
|
+
this.transcript = [`User: ${resolvedPrompt}`];
|
|
859
|
+
|
|
767
860
|
const streamed = await this.thread.runStreamed(resolvedPrompt, {
|
|
768
861
|
signal: this.abortController.signal,
|
|
769
862
|
});
|
|
@@ -827,6 +920,18 @@ class CodexSession implements ProviderSession {
|
|
|
827
920
|
failureReason: message,
|
|
828
921
|
});
|
|
829
922
|
} finally {
|
|
923
|
+
// Session-end summarization. Pure addition for codex — no behavior to
|
|
924
|
+
// preserve. Wrapped in its own try/catch so summary failure must NOT
|
|
925
|
+
// block the existing log/AGENTS.md cleanup below. Gate `SKIP_SESSION_SUMMARY=1`
|
|
926
|
+
// matches the parity convention used by the claude Stop hook + pi/opencode.
|
|
927
|
+
if (process.env.SKIP_SESSION_SUMMARY !== "1") {
|
|
928
|
+
try {
|
|
929
|
+
await this.summarizeAtEnd();
|
|
930
|
+
} catch (err) {
|
|
931
|
+
console.error("session_summary failed (codex):", err);
|
|
932
|
+
}
|
|
933
|
+
}
|
|
934
|
+
|
|
830
935
|
// Detach the abort controller now that the turn has settled.
|
|
831
936
|
this.abortRef.current = null;
|
|
832
937
|
try {
|
|
@@ -835,6 +940,121 @@ class CodexSession implements ProviderSession {
|
|
|
835
940
|
// Ignore log writer cleanup failures.
|
|
836
941
|
}
|
|
837
942
|
await this.agentsMdHandle.cleanup();
|
|
943
|
+
|
|
944
|
+
// Resolve `waitForCompletion()` only AFTER all cleanup has finished so
|
|
945
|
+
// downstream observers (tests + the runner's `.then(...)` chain) don't
|
|
946
|
+
// race the finally-block side effects. Fallback to an error result if
|
|
947
|
+
// we somehow never called `settle` (defensive — every codepath in the
|
|
948
|
+
// try/catch above calls settle exactly once).
|
|
949
|
+
const finalResult =
|
|
950
|
+
this.pendingResult ??
|
|
951
|
+
({
|
|
952
|
+
exitCode: 1,
|
|
953
|
+
sessionId: this._sessionId,
|
|
954
|
+
cost: this.buildCostData(this.lastUsage, true),
|
|
955
|
+
isError: true,
|
|
956
|
+
failureReason: "session did not settle",
|
|
957
|
+
} as ProviderResult);
|
|
958
|
+
this.resolveCompletion(finalResult);
|
|
959
|
+
}
|
|
960
|
+
}
|
|
961
|
+
|
|
962
|
+
/**
|
|
963
|
+
* Index a session summary into agent memory at the end of a codex turn.
|
|
964
|
+
*
|
|
965
|
+
* Mirrors `summarizeSessionForPi` and the claude Stop hook:
|
|
966
|
+
* 1. Truncate the in-memory transcript buffer to the last 20 KB.
|
|
967
|
+
* 2. Bail when the transcript is too short or the swarm context is
|
|
968
|
+
* missing (no agentId / no taskId / no apiUrl / no apiKey).
|
|
969
|
+
* 3. (Optional) Pre-fetch retrievals when `MEMORY_RATERS` includes `llm`
|
|
970
|
+
* so the LLM can score them alongside the summary.
|
|
971
|
+
* 4. Call `runSummarize` from `src/utils/internal-ai` (Phase 0). Returns
|
|
972
|
+
* `null` when no credential resolves — silent skip.
|
|
973
|
+
* 5. Apply length/quality gate; POST to `/api/memory/index`.
|
|
974
|
+
* 6. POST ratings (`events:` key, NOT `ratings:`) via `postRatings` when
|
|
975
|
+
* `MEMORY_RATERS=llm` and the LLM returned per-memory scores.
|
|
976
|
+
*
|
|
977
|
+
* All catches log via `console.error(..., err)` — silent-fail behavior is
|
|
978
|
+
* gone. The outer try in `runSession`'s finally-block is the final safety
|
|
979
|
+
* net guaranteeing existing cleanup runs regardless.
|
|
980
|
+
*/
|
|
981
|
+
private async summarizeAtEnd(): Promise<void> {
|
|
982
|
+
const transcriptStr = this.transcript.join("\n").slice(-20_000);
|
|
983
|
+
const { agentId, taskId, apiUrl, apiKey } = this.config;
|
|
984
|
+
if (!agentId || !taskId || !apiUrl || !apiKey) return;
|
|
985
|
+
if (transcriptStr.length <= 100) return;
|
|
986
|
+
|
|
987
|
+
const _runSummarize = this.summarizeDeps.runSummarize ?? runSummarize;
|
|
988
|
+
const _fetchRetrievals = this.summarizeDeps.fetchRetrievalsForTask ?? fetchRetrievalsForTask;
|
|
989
|
+
const _postRatings = this.summarizeDeps.postRatings ?? postRatings;
|
|
990
|
+
const _buildRatings = this.summarizeDeps.buildRatingsFromLlm ?? buildRatingsFromLlm;
|
|
991
|
+
|
|
992
|
+
const memoryRaters = (process.env.MEMORY_RATERS ?? "")
|
|
993
|
+
.split(",")
|
|
994
|
+
.map((s) => s.trim())
|
|
995
|
+
.filter(Boolean);
|
|
996
|
+
const wantRatings = memoryRaters.includes("llm");
|
|
997
|
+
const retrievals = wantRatings
|
|
998
|
+
? await _fetchRetrievals({ apiUrl, apiKey, agentId, taskId }).catch(() => [])
|
|
999
|
+
: [];
|
|
1000
|
+
|
|
1001
|
+
const result = await _runSummarize({
|
|
1002
|
+
harness: "codex",
|
|
1003
|
+
transcript: transcriptStr,
|
|
1004
|
+
retrievals,
|
|
1005
|
+
taskContext: {
|
|
1006
|
+
sourceTaskId: taskId,
|
|
1007
|
+
agentId,
|
|
1008
|
+
prompt: this.config.prompt,
|
|
1009
|
+
},
|
|
1010
|
+
apiUrl,
|
|
1011
|
+
apiKey,
|
|
1012
|
+
});
|
|
1013
|
+
// null = no auth resolved or wrapper exhausted retries (already logged inside)
|
|
1014
|
+
if (!result) return;
|
|
1015
|
+
|
|
1016
|
+
const summary = result.summary.trim();
|
|
1017
|
+
if (summary.length <= 20 || summary.toLowerCase().includes("no significant learnings")) {
|
|
1018
|
+
return;
|
|
1019
|
+
}
|
|
1020
|
+
|
|
1021
|
+
const indexResp = await fetch(`${apiUrl}/api/memory/index`, {
|
|
1022
|
+
method: "POST",
|
|
1023
|
+
headers: {
|
|
1024
|
+
"Content-Type": "application/json",
|
|
1025
|
+
Authorization: `Bearer ${apiKey}`,
|
|
1026
|
+
"X-Agent-ID": agentId,
|
|
1027
|
+
},
|
|
1028
|
+
body: JSON.stringify({
|
|
1029
|
+
scope: "agent",
|
|
1030
|
+
source: "session_summary",
|
|
1031
|
+
sourceTaskId: taskId,
|
|
1032
|
+
content: summary,
|
|
1033
|
+
name: "session-summary",
|
|
1034
|
+
agentId,
|
|
1035
|
+
}),
|
|
1036
|
+
});
|
|
1037
|
+
if (!indexResp.ok) {
|
|
1038
|
+
const text = await indexResp.text().catch(() => "");
|
|
1039
|
+
console.error(
|
|
1040
|
+
"session_summary: /api/memory/index POST failed (codex):",
|
|
1041
|
+
indexResp.status,
|
|
1042
|
+
text,
|
|
1043
|
+
);
|
|
1044
|
+
return;
|
|
1045
|
+
}
|
|
1046
|
+
|
|
1047
|
+
if (wantRatings && result.ratings && result.ratings.length > 0) {
|
|
1048
|
+
const ratingEvents = _buildRatings(result.ratings, retrievals);
|
|
1049
|
+
if (ratingEvents.length > 0) {
|
|
1050
|
+
await _postRatings({
|
|
1051
|
+
apiUrl,
|
|
1052
|
+
apiKey,
|
|
1053
|
+
agentId,
|
|
1054
|
+
taskId,
|
|
1055
|
+
events: ratingEvents,
|
|
1056
|
+
}).catch((err) => console.error("session_summary: postRatings failed (codex):", err));
|
|
1057
|
+
}
|
|
838
1058
|
}
|
|
839
1059
|
}
|
|
840
1060
|
}
|
|
@@ -851,8 +1071,17 @@ export class CodexAdapter implements ProviderAdapter {
|
|
|
851
1071
|
*/
|
|
852
1072
|
private readonly skillsDir?: string;
|
|
853
1073
|
|
|
854
|
-
|
|
1074
|
+
/**
|
|
1075
|
+
* Optional dependency-injection points for session-end summarization. Tests
|
|
1076
|
+
* pass stubs in here to exercise the summarize → index → rate flow without
|
|
1077
|
+
* standing up a real API server or LLM. Production callers omit this and the
|
|
1078
|
+
* `CodexSession` falls back to the module-level imports.
|
|
1079
|
+
*/
|
|
1080
|
+
private readonly summarizeDeps: SummarizeSessionForCodexDeps;
|
|
1081
|
+
|
|
1082
|
+
constructor(opts: { skillsDir?: string; summarizeDeps?: SummarizeSessionForCodexDeps } = {}) {
|
|
855
1083
|
this.skillsDir = opts.skillsDir;
|
|
1084
|
+
this.summarizeDeps = opts.summarizeDeps ?? {};
|
|
856
1085
|
}
|
|
857
1086
|
|
|
858
1087
|
async createSession(config: ProviderSessionConfig): Promise<ProviderSession> {
|
|
@@ -937,6 +1166,7 @@ export class CodexAdapter implements ProviderAdapter {
|
|
|
937
1166
|
resolvedModel,
|
|
938
1167
|
preSessionEvents,
|
|
939
1168
|
this.skillsDir,
|
|
1169
|
+
this.summarizeDeps,
|
|
940
1170
|
);
|
|
941
1171
|
} catch (err) {
|
|
942
1172
|
// If we failed to construct the thread, clean up the managed AGENTS.md
|
|
@@ -85,6 +85,27 @@ export async function deleteCodexOAuth(apiUrl: string, apiKey: string): Promise<
|
|
|
85
85
|
});
|
|
86
86
|
}
|
|
87
87
|
|
|
88
|
+
/**
|
|
89
|
+
* Best-effort persistence of refreshed OAuth credentials back to the config
|
|
90
|
+
* store. Wraps {@link storeCodexOAuth} with a try/catch + `console.error` —
|
|
91
|
+
* a write failure MUST NOT block the current caller from using the refreshed
|
|
92
|
+
* `apiKey` (the rotation will just have to retry on the next call). This is
|
|
93
|
+
* called from `src/utils/internal-ai/credentials.ts` after pi-ai's
|
|
94
|
+
* `getOAuthApiKey` returns `{ newCredentials, apiKey }` so the rotated refresh
|
|
95
|
+
* token isn't lost in-memory.
|
|
96
|
+
*/
|
|
97
|
+
export async function persistCodexOAuth(
|
|
98
|
+
apiUrl: string,
|
|
99
|
+
apiKey: string,
|
|
100
|
+
creds: CodexOAuthCredentials,
|
|
101
|
+
): Promise<void> {
|
|
102
|
+
try {
|
|
103
|
+
await storeCodexOAuth(apiUrl, apiKey, creds);
|
|
104
|
+
} catch (err) {
|
|
105
|
+
console.error("[codex-oauth] persistCodexOAuth failed (non-fatal):", err);
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
88
109
|
export async function getValidCodexOAuth(
|
|
89
110
|
apiUrl: string,
|
|
90
111
|
apiKey: string,
|
|
@@ -7,9 +7,11 @@
|
|
|
7
7
|
*/
|
|
8
8
|
|
|
9
9
|
import type { ExtensionFactory } from "@mariozechner/pi-coding-agent";
|
|
10
|
+
import { buildRatingsFromLlm, fetchRetrievalsForTask, postRatings } from "../be/memory/raters/llm";
|
|
10
11
|
import { checkToolLoop, clearToolHistory } from "../hooks/tool-loop-detection";
|
|
12
|
+
import { summarizeSession as runSummarize } from "../utils/internal-ai";
|
|
11
13
|
|
|
12
|
-
interface SwarmHooksConfig {
|
|
14
|
+
export interface SwarmHooksConfig {
|
|
13
15
|
apiUrl: string;
|
|
14
16
|
apiKey: string;
|
|
15
17
|
agentId: string;
|
|
@@ -276,13 +278,54 @@ async function fetchConcurrentContext(config: SwarmHooksConfig): Promise<string
|
|
|
276
278
|
}
|
|
277
279
|
}
|
|
278
280
|
|
|
279
|
-
/**
|
|
280
|
-
|
|
281
|
+
/**
|
|
282
|
+
* Test-injection points for `summarizeSessionForPi`. Production callers omit
|
|
283
|
+
* `deps` entirely — pi-mono-extension's `session_shutdown` handler uses the
|
|
284
|
+
* default implementations bound via `import` at the top of this file.
|
|
285
|
+
*
|
|
286
|
+
* Why this exists: `bun:test`'s `mock.module()` is process-wide and leaks
|
|
287
|
+
* across test files, so the pi-mono-extension test cannot stub out
|
|
288
|
+
* `runSummarize`/`postRatings` via module mocking without breaking siblings
|
|
289
|
+
* that import the real symbols (e.g. `buildRatingsFromLlm` step-6 tests,
|
|
290
|
+
* `summarize-session.test.ts`). Explicit DI is the safer pattern.
|
|
291
|
+
*/
|
|
292
|
+
export interface SummarizeSessionForPiDeps {
|
|
293
|
+
runSummarize?: typeof runSummarize;
|
|
294
|
+
fetchRetrievalsForTask?: typeof fetchRetrievalsForTask;
|
|
295
|
+
postRatings?: typeof postRatings;
|
|
296
|
+
buildRatingsFromLlm?: typeof buildRatingsFromLlm;
|
|
297
|
+
}
|
|
298
|
+
|
|
299
|
+
/**
|
|
300
|
+
* Run session summarization via the shared `internal-ai` abstraction on
|
|
301
|
+
* shutdown. Replaces the previous `Bun.spawn(claude -p ...)` shellout
|
|
302
|
+
* (which silently failed in production because pi sessions typically don't
|
|
303
|
+
* have Anthropic CLI auth).
|
|
304
|
+
*
|
|
305
|
+
* Flow:
|
|
306
|
+
* 1. Read tail of session transcript file.
|
|
307
|
+
* 2. Fetch task details + (optionally) memory retrievals for ratings.
|
|
308
|
+
* 3. Call `runSummarize` from `src/utils/internal-ai` — picks credentials
|
|
309
|
+
* out of env / codex OAuth, returns structured `{summary, ratings}`.
|
|
310
|
+
* 4. Apply length/quality gate; POST summary to `/api/memory/index`.
|
|
311
|
+
* 5. If `MEMORY_RATERS` includes `llm` AND ratings came back, POST them
|
|
312
|
+
* via `postRatings` (events-based; mirrors the claude Stop hook).
|
|
313
|
+
*
|
|
314
|
+
* All catches log via `console.error(..., err)` — silent-fail behavior is
|
|
315
|
+
* gone.
|
|
316
|
+
*/
|
|
317
|
+
export async function summarizeSessionForPi(
|
|
281
318
|
config: SwarmHooksConfig,
|
|
282
319
|
sessionFile: string | undefined,
|
|
320
|
+
deps: SummarizeSessionForPiDeps = {},
|
|
283
321
|
): Promise<void> {
|
|
284
322
|
if (!sessionFile) return;
|
|
285
323
|
|
|
324
|
+
const _runSummarize = deps.runSummarize ?? runSummarize;
|
|
325
|
+
const _fetchRetrievals = deps.fetchRetrievalsForTask ?? fetchRetrievalsForTask;
|
|
326
|
+
const _postRatings = deps.postRatings ?? postRatings;
|
|
327
|
+
const _buildRatings = deps.buildRatingsFromLlm ?? buildRatingsFromLlm;
|
|
328
|
+
|
|
286
329
|
try {
|
|
287
330
|
let transcript = "";
|
|
288
331
|
try {
|
|
@@ -294,84 +337,78 @@ async function summarizeSession(
|
|
|
294
337
|
|
|
295
338
|
if (transcript.length <= 100) return;
|
|
296
339
|
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
const
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
const tmpFile = `/tmp/session-summary-${Date.now()}.txt`;
|
|
327
|
-
await Bun.write(tmpFile, summarizePrompt);
|
|
328
|
-
const proc = Bun.spawn(
|
|
329
|
-
[
|
|
330
|
-
"bash",
|
|
331
|
-
"-c",
|
|
332
|
-
`cat "${tmpFile}" | ${process.env.CLAUDE_BINARY || "claude"} -p --model haiku --output-format json`,
|
|
333
|
-
],
|
|
334
|
-
{
|
|
335
|
-
stdout: "pipe",
|
|
336
|
-
stderr: "pipe",
|
|
337
|
-
env: { ...process.env, SKIP_SESSION_SUMMARY: "1" },
|
|
340
|
+
const sourceTaskId = config.taskId;
|
|
341
|
+
const agentId = config.agentId;
|
|
342
|
+
if (!sourceTaskId || !agentId) return;
|
|
343
|
+
|
|
344
|
+
const taskDetails = await fetchTaskDetails(config).catch(() => null);
|
|
345
|
+
|
|
346
|
+
const memoryRaters = (process.env.MEMORY_RATERS ?? "")
|
|
347
|
+
.split(",")
|
|
348
|
+
.map((s) => s.trim())
|
|
349
|
+
.filter(Boolean);
|
|
350
|
+
const wantRatings = memoryRaters.includes("llm");
|
|
351
|
+
const retrievals = wantRatings
|
|
352
|
+
? await _fetchRetrievals({
|
|
353
|
+
apiUrl: config.apiUrl,
|
|
354
|
+
apiKey: config.apiKey,
|
|
355
|
+
agentId,
|
|
356
|
+
taskId: sourceTaskId,
|
|
357
|
+
}).catch(() => [])
|
|
358
|
+
: [];
|
|
359
|
+
|
|
360
|
+
const result = await _runSummarize({
|
|
361
|
+
harness: "pi",
|
|
362
|
+
transcript,
|
|
363
|
+
retrievals,
|
|
364
|
+
taskContext: {
|
|
365
|
+
sourceTaskId,
|
|
366
|
+
agentId,
|
|
367
|
+
prompt: taskDetails?.task,
|
|
338
368
|
},
|
|
339
|
-
|
|
340
|
-
|
|
341
|
-
|
|
342
|
-
|
|
343
|
-
|
|
369
|
+
apiUrl: config.apiUrl,
|
|
370
|
+
apiKey: config.apiKey,
|
|
371
|
+
});
|
|
372
|
+
// null = no auth resolved or wrapper exhausted retries (already logged inside)
|
|
373
|
+
if (!result) return;
|
|
344
374
|
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
summary = summaryOutput.result ?? result.stdout;
|
|
349
|
-
} catch {
|
|
350
|
-
summary = result.stdout;
|
|
375
|
+
const summary = result.summary.trim();
|
|
376
|
+
if (summary.length <= 20 || summary.toLowerCase().includes("no significant learnings")) {
|
|
377
|
+
return;
|
|
351
378
|
}
|
|
352
379
|
|
|
353
|
-
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
|
|
358
|
-
|
|
359
|
-
|
|
360
|
-
|
|
361
|
-
|
|
362
|
-
|
|
363
|
-
|
|
364
|
-
|
|
365
|
-
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
sourceTaskId: config.taskId,
|
|
370
|
-
}),
|
|
371
|
-
});
|
|
380
|
+
const indexResp = await fetch(`${config.apiUrl}/api/memory/index`, {
|
|
381
|
+
method: "POST",
|
|
382
|
+
headers: apiHeaders(config),
|
|
383
|
+
body: JSON.stringify({
|
|
384
|
+
scope: "agent",
|
|
385
|
+
source: "session_summary",
|
|
386
|
+
sourceTaskId,
|
|
387
|
+
content: summary,
|
|
388
|
+
name: "session-summary",
|
|
389
|
+
agentId,
|
|
390
|
+
}),
|
|
391
|
+
});
|
|
392
|
+
if (!indexResp.ok) {
|
|
393
|
+
const text = await indexResp.text().catch(() => "");
|
|
394
|
+
console.error("session_summary: /api/memory/index POST failed (pi):", indexResp.status, text);
|
|
395
|
+
return;
|
|
372
396
|
}
|
|
373
|
-
|
|
374
|
-
|
|
397
|
+
|
|
398
|
+
if (wantRatings && result.ratings && result.ratings.length > 0) {
|
|
399
|
+
const ratingEvents = _buildRatings(result.ratings, retrievals);
|
|
400
|
+
if (ratingEvents.length > 0) {
|
|
401
|
+
await _postRatings({
|
|
402
|
+
apiUrl: config.apiUrl,
|
|
403
|
+
apiKey: config.apiKey,
|
|
404
|
+
agentId,
|
|
405
|
+
taskId: sourceTaskId,
|
|
406
|
+
events: ratingEvents,
|
|
407
|
+
}).catch((err) => console.error("session_summary: postRatings failed (pi):", err));
|
|
408
|
+
}
|
|
409
|
+
}
|
|
410
|
+
} catch (err) {
|
|
411
|
+
console.error("session_summary failed (pi):", err);
|
|
375
412
|
}
|
|
376
413
|
}
|
|
377
414
|
|
|
@@ -661,7 +698,7 @@ export function createSwarmHooksExtension(config: SwarmHooksConfig): ExtensionFa
|
|
|
661
698
|
// Session summarization — get session file from context's session manager
|
|
662
699
|
const sessionFile = ctx.sessionManager.getSessionFile?.();
|
|
663
700
|
if (!process.env.SKIP_SESSION_SUMMARY) {
|
|
664
|
-
await
|
|
701
|
+
await summarizeSessionForPi(config, sessionFile);
|
|
665
702
|
}
|
|
666
703
|
|
|
667
704
|
// Mark agent offline
|
package/src/telemetry.ts
CHANGED
|
@@ -73,6 +73,32 @@ interface TrackOptions {
|
|
|
73
73
|
metadata?: Record<string, unknown>;
|
|
74
74
|
}
|
|
75
75
|
|
|
76
|
+
/**
|
|
77
|
+
* Read SWARM_ORG_ID / SWARM_ORG_NAME from process.env at call time. Reading
|
|
78
|
+
* fresh each track() lets reloaded swarm_config values land in telemetry
|
|
79
|
+
* without restarting (loadGlobalConfigsIntoEnv mutates process.env on
|
|
80
|
+
* `POST /api/config/reload` with override=true). Returns only the keys that
|
|
81
|
+
* are set, so the spread below stays a clean noop on self-host.
|
|
82
|
+
*/
|
|
83
|
+
function getOrgIdentity(): { organization_id?: string; organization_name?: string } {
|
|
84
|
+
const out: { organization_id?: string; organization_name?: string } = {};
|
|
85
|
+
const orgId = process.env.SWARM_ORG_ID?.trim();
|
|
86
|
+
if (orgId) out.organization_id = orgId;
|
|
87
|
+
const orgName = process.env.SWARM_ORG_NAME?.trim();
|
|
88
|
+
if (orgName) out.organization_name = orgName;
|
|
89
|
+
return out;
|
|
90
|
+
}
|
|
91
|
+
|
|
92
|
+
/**
|
|
93
|
+
* Mirror of `buildIdentity()`'s SWARM_CLOUD parsing — accepts "true" or "1".
|
|
94
|
+
* Always emitted (not optional) so consumers can split cloud vs self-host
|
|
95
|
+
* cohorts without ambiguity between "false" and "unset".
|
|
96
|
+
*/
|
|
97
|
+
function isCloudDeployment(): boolean {
|
|
98
|
+
const raw = process.env.SWARM_CLOUD;
|
|
99
|
+
return raw === "true" || raw === "1";
|
|
100
|
+
}
|
|
101
|
+
|
|
76
102
|
/** Fire-and-forget telemetry event. Never throws, never blocks. */
|
|
77
103
|
export function track(options: TrackOptions): void {
|
|
78
104
|
if (!isEnabled() || !installationId) return;
|
|
@@ -89,6 +115,8 @@ export function track(options: TrackOptions): void {
|
|
|
89
115
|
transport: "https",
|
|
90
116
|
schema_version: 1,
|
|
91
117
|
environment: process.env.NODE_ENV ?? "production",
|
|
118
|
+
is_cloud: isCloudDeployment(),
|
|
119
|
+
...getOrgIdentity(),
|
|
92
120
|
...options.metadata,
|
|
93
121
|
},
|
|
94
122
|
};
|