@prometheus-ai/agent-core 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +7 -0
- package/README.md +473 -0
- package/dist/types/agent-loop.d.ts +55 -0
- package/dist/types/agent.d.ts +331 -0
- package/dist/types/append-only-context.d.ts +113 -0
- package/dist/types/compaction/branch-summarization.d.ts +94 -0
- package/dist/types/compaction/compaction.d.ts +183 -0
- package/dist/types/compaction/entries.d.ts +103 -0
- package/dist/types/compaction/errors.d.ts +26 -0
- package/dist/types/compaction/index.d.ts +12 -0
- package/dist/types/compaction/messages.d.ts +61 -0
- package/dist/types/compaction/openai.d.ts +58 -0
- package/dist/types/compaction/pruning.d.ts +19 -0
- package/dist/types/compaction/shake.d.ts +82 -0
- package/dist/types/compaction/tool-protection.d.ts +17 -0
- package/dist/types/compaction/utils.d.ts +32 -0
- package/dist/types/compaction.d.ts +1 -0
- package/dist/types/harmony-leak.d.ts +118 -0
- package/dist/types/index.d.ts +11 -0
- package/dist/types/proxy.d.ts +84 -0
- package/dist/types/run-collector.d.ts +196 -0
- package/dist/types/telemetry.d.ts +588 -0
- package/dist/types/thinking.d.ts +17 -0
- package/dist/types/types.d.ts +443 -0
- package/dist/types/utils/yield.d.ts +52 -0
- package/package.json +75 -0
- package/src/agent-loop.ts +1418 -0
- package/src/agent.ts +1236 -0
- package/src/append-only-context.ts +297 -0
- package/src/compaction/branch-summarization.ts +339 -0
- package/src/compaction/compaction.ts +1155 -0
- package/src/compaction/entries.ts +133 -0
- package/src/compaction/errors.ts +31 -0
- package/src/compaction/index.ts +13 -0
- package/src/compaction/messages.ts +212 -0
- package/src/compaction/openai.ts +552 -0
- package/src/compaction/prompts/auto-handoff-threshold-focus.md +1 -0
- package/src/compaction/prompts/branch-summary-context.md +5 -0
- package/src/compaction/prompts/branch-summary-preamble.md +2 -0
- package/src/compaction/prompts/branch-summary.md +30 -0
- package/src/compaction/prompts/compaction-short-summary.md +9 -0
- package/src/compaction/prompts/compaction-summary-context.md +5 -0
- package/src/compaction/prompts/compaction-summary.md +38 -0
- package/src/compaction/prompts/compaction-turn-prefix.md +17 -0
- package/src/compaction/prompts/compaction-update-summary.md +45 -0
- package/src/compaction/prompts/file-operations.md +10 -0
- package/src/compaction/prompts/handoff-document.md +49 -0
- package/src/compaction/prompts/summarization-system.md +3 -0
- package/src/compaction/pruning.ts +99 -0
- package/src/compaction/shake.ts +406 -0
- package/src/compaction/tool-protection.ts +55 -0
- package/src/compaction/utils.ts +185 -0
- package/src/compaction.ts +1 -0
- package/src/harmony-leak.ts +456 -0
- package/src/index.ts +21 -0
- package/src/proxy.ts +326 -0
- package/src/run-collector.ts +631 -0
- package/src/telemetry.ts +2020 -0
- package/src/thinking.ts +19 -0
- package/src/types.ts +505 -0
- package/src/utils/yield.ts +146 -0
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Shared utilities for compaction and branch summarization.
|
|
3
|
+
*/
|
|
4
|
+
|
|
5
|
+
import type { Message } from "@prometheus-ai/ai";
|
|
6
|
+
import { prompt } from "@prometheus-ai/utils";
|
|
7
|
+
import type { AgentMessage } from "../types";
|
|
8
|
+
import fileOperationsTemplate from "./prompts/file-operations.md" with { type: "text" };
|
|
9
|
+
import summarizationSystemPrompt from "./prompts/summarization-system.md" with { type: "text" };
|
|
10
|
+
|
|
11
|
+
// ============================================================================
|
|
12
|
+
// File Operation Tracking
|
|
13
|
+
// ============================================================================
|
|
14
|
+
|
|
15
|
+
export interface FileOperations {
|
|
16
|
+
read: Set<string>;
|
|
17
|
+
written: Set<string>;
|
|
18
|
+
edited: Set<string>;
|
|
19
|
+
}
|
|
20
|
+
|
|
21
|
+
export function createFileOps(): FileOperations {
|
|
22
|
+
return {
|
|
23
|
+
read: new Set(),
|
|
24
|
+
written: new Set(),
|
|
25
|
+
edited: new Set(),
|
|
26
|
+
};
|
|
27
|
+
}
|
|
28
|
+
|
|
29
|
+
/**
|
|
30
|
+
* Extract file operations from tool calls in an assistant message.
|
|
31
|
+
*/
|
|
32
|
+
export function extractFileOpsFromMessage(message: AgentMessage, fileOps: FileOperations): void {
|
|
33
|
+
if (message.role !== "assistant") return;
|
|
34
|
+
if (!("content" in message) || !Array.isArray(message.content)) return;
|
|
35
|
+
|
|
36
|
+
for (const block of message.content) {
|
|
37
|
+
if (typeof block !== "object" || block === null) continue;
|
|
38
|
+
if (!("type" in block) || block.type !== "toolCall") continue;
|
|
39
|
+
if (!("arguments" in block) || !("name" in block)) continue;
|
|
40
|
+
|
|
41
|
+
const args = block.arguments as Record<string, unknown> | undefined;
|
|
42
|
+
if (!args) continue;
|
|
43
|
+
|
|
44
|
+
const path = typeof args.path === "string" ? args.path : undefined;
|
|
45
|
+
if (!path) continue;
|
|
46
|
+
|
|
47
|
+
switch (block.name) {
|
|
48
|
+
case "read":
|
|
49
|
+
fileOps.read.add(path);
|
|
50
|
+
break;
|
|
51
|
+
case "write":
|
|
52
|
+
fileOps.written.add(path);
|
|
53
|
+
break;
|
|
54
|
+
case "edit":
|
|
55
|
+
fileOps.edited.add(path);
|
|
56
|
+
break;
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
/**
|
|
62
|
+
* Compute final file lists from file operations.
|
|
63
|
+
* Returns readFiles (files only read, not modified) and modifiedFiles.
|
|
64
|
+
*/
|
|
65
|
+
export function computeFileLists(fileOps: FileOperations): { readFiles: string[]; modifiedFiles: string[] } {
|
|
66
|
+
const modified = new Set([...fileOps.edited, ...fileOps.written]);
|
|
67
|
+
const readOnly = [...fileOps.read].filter(f => !modified.has(f)).sort();
|
|
68
|
+
const modifiedFiles = [...modified].sort();
|
|
69
|
+
return { readFiles: readOnly, modifiedFiles };
|
|
70
|
+
}
|
|
71
|
+
|
|
72
|
+
/**
|
|
73
|
+
* Format file operations as XML tags for summary.
|
|
74
|
+
*/
|
|
75
|
+
const FILE_OPERATION_SUMMARY_LIMIT = 20;
|
|
76
|
+
|
|
77
|
+
function truncateFileList(files: string[]): string[] {
|
|
78
|
+
if (files.length <= FILE_OPERATION_SUMMARY_LIMIT) return files;
|
|
79
|
+
const omitted = files.length - FILE_OPERATION_SUMMARY_LIMIT;
|
|
80
|
+
return [...files.slice(0, FILE_OPERATION_SUMMARY_LIMIT), `… (${omitted} more files omitted)`];
|
|
81
|
+
}
|
|
82
|
+
|
|
83
|
+
function stripFileOperationTags(summary: string): string {
|
|
84
|
+
const withoutReadFiles = summary.replace(/<read-files>[\s\S]*?<\/read-files>\s*/g, "");
|
|
85
|
+
const withoutModifiedFiles = withoutReadFiles.replace(/<modified-files>[\s\S]*?<\/modified-files>\s*/g, "");
|
|
86
|
+
return withoutModifiedFiles.trimEnd();
|
|
87
|
+
}
|
|
88
|
+
export function formatFileOperations(readFiles: string[], modifiedFiles: string[]): string {
|
|
89
|
+
if (readFiles.length === 0 && modifiedFiles.length === 0) return "";
|
|
90
|
+
return prompt.render(fileOperationsTemplate, {
|
|
91
|
+
readFiles: truncateFileList(readFiles),
|
|
92
|
+
modifiedFiles: truncateFileList(modifiedFiles),
|
|
93
|
+
});
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
export function upsertFileOperations(summary: string, readFiles: string[], modifiedFiles: string[]): string {
|
|
97
|
+
const baseSummary = stripFileOperationTags(summary);
|
|
98
|
+
const fileOperations = formatFileOperations(readFiles, modifiedFiles);
|
|
99
|
+
if (!fileOperations) return baseSummary;
|
|
100
|
+
if (!baseSummary) return fileOperations;
|
|
101
|
+
return `${baseSummary}\n\n${fileOperations}`;
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
// ============================================================================
|
|
105
|
+
// Message Serialization
|
|
106
|
+
// ============================================================================
|
|
107
|
+
|
|
108
|
+
/** Maximum characters for a tool result in serialized summaries. */
|
|
109
|
+
const TOOL_RESULT_MAX_CHARS = 2000;
|
|
110
|
+
|
|
111
|
+
/**
|
|
112
|
+
* Truncate text to a maximum character length for summarization.
|
|
113
|
+
* Keeps the beginning and appends a truncation marker.
|
|
114
|
+
*/
|
|
115
|
+
function truncateForSummary(text: string, maxChars: number): string {
|
|
116
|
+
if (text.length <= maxChars) return text;
|
|
117
|
+
const truncatedChars = text.length - maxChars;
|
|
118
|
+
return `${text.slice(0, maxChars)}\n\n[... ${truncatedChars} more characters truncated]`;
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
/**
|
|
122
|
+
* Serialize LLM messages to text for summarization.
|
|
123
|
+
* This prevents the model from treating it as a conversation to continue.
|
|
124
|
+
* Call convertToLlm() first to handle custom message types.
|
|
125
|
+
*/
|
|
126
|
+
export function serializeConversation(messages: Message[]): string {
|
|
127
|
+
const parts: string[] = [];
|
|
128
|
+
|
|
129
|
+
for (const msg of messages) {
|
|
130
|
+
if (msg.role === "user") {
|
|
131
|
+
const content =
|
|
132
|
+
typeof msg.content === "string"
|
|
133
|
+
? msg.content
|
|
134
|
+
: msg.content
|
|
135
|
+
.filter((c): c is { type: "text"; text: string } => c.type === "text")
|
|
136
|
+
.map(c => c.text)
|
|
137
|
+
.join("");
|
|
138
|
+
if (content) parts.push(`[User]: ${content}`);
|
|
139
|
+
} else if (msg.role === "assistant") {
|
|
140
|
+
const textParts: string[] = [];
|
|
141
|
+
const thinkingParts: string[] = [];
|
|
142
|
+
const toolCalls: string[] = [];
|
|
143
|
+
|
|
144
|
+
for (const block of msg.content) {
|
|
145
|
+
if (block.type === "text") {
|
|
146
|
+
textParts.push(block.text);
|
|
147
|
+
} else if (block.type === "thinking") {
|
|
148
|
+
thinkingParts.push(block.thinking);
|
|
149
|
+
} else if (block.type === "toolCall") {
|
|
150
|
+
const args = block.arguments as Record<string, unknown>;
|
|
151
|
+
const argsStr = Object.entries(args)
|
|
152
|
+
.map(([k, v]) => `${k}=${JSON.stringify(v)}`)
|
|
153
|
+
.join(", ");
|
|
154
|
+
toolCalls.push(`${block.name}(${argsStr})`);
|
|
155
|
+
}
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
if (thinkingParts.length > 0) {
|
|
159
|
+
parts.push(`[Assistant thinking]: ${thinkingParts.join("\n")}`);
|
|
160
|
+
}
|
|
161
|
+
if (textParts.length > 0) {
|
|
162
|
+
parts.push(`[Assistant]: ${textParts.join("\n")}`);
|
|
163
|
+
}
|
|
164
|
+
if (toolCalls.length > 0) {
|
|
165
|
+
parts.push(`[Assistant tool calls]: ${toolCalls.join("; ")}`);
|
|
166
|
+
}
|
|
167
|
+
} else if (msg.role === "toolResult") {
|
|
168
|
+
const content = msg.content
|
|
169
|
+
.filter((c): c is { type: "text"; text: string } => c.type === "text")
|
|
170
|
+
.map(c => c.text)
|
|
171
|
+
.join("");
|
|
172
|
+
if (content) {
|
|
173
|
+
parts.push(`[Tool result]: ${truncateForSummary(content, TOOL_RESULT_MAX_CHARS)}`);
|
|
174
|
+
}
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
return parts.join("\n\n");
|
|
179
|
+
}
|
|
180
|
+
|
|
181
|
+
// ============================================================================
|
|
182
|
+
// Summarization System Prompt
|
|
183
|
+
// ============================================================================
|
|
184
|
+
|
|
185
|
+
export const SUMMARIZATION_SYSTEM_PROMPT = prompt.render(summarizationSystemPrompt);
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export * from "./compaction/index";
|
|
@@ -0,0 +1,456 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* GPT-5 Harmony-header leakage detection and recovery.
|
|
3
|
+
*
|
|
4
|
+
* Background and policy: see `docs/ERRATA-GPT5-HARMONY.md`. This module
|
|
5
|
+
* implements §3 of that document: detection by signal fusion, plus a
|
|
6
|
+
* truncate-and-resume primitive for the `edit` tool when its input is in
|
|
7
|
+
* hashline DSL form. Other tools and surfaces fall through to
|
|
8
|
+
* abort-and-retry handled by the agent loop.
|
|
9
|
+
*/
|
|
10
|
+
import type { AssistantMessage, Model, ToolCall } from "@prometheus-ai/ai";
|
|
11
|
+
|
|
12
|
+
// Single source of truth for the marker pattern. `M` in the errata.
|
|
13
|
+
// Use a fresh non-global instance for `.test()` to avoid lastIndex pitfalls.
|
|
14
|
+
const MARKER_RE = /\bto=functions\.[A-Za-z_]\w*/g;
|
|
15
|
+
const HARMONY_RE = /<\|(start|end|channel|message|call|return)\|>/g;
|
|
16
|
+
|
|
17
|
+
// Channel-word adjacency (`C`): channel/role name appearing immediately before the marker.
|
|
18
|
+
const CHANNEL_WORD_RE = /\b(?:analysis|commentary|assistant|user|system|developer|tool)\s+to=functions\./;
|
|
19
|
+
|
|
20
|
+
// Glitch-token adjacency (`G`). The Japgolly literal is escaped so this regex
|
|
21
|
+
// source itself does not trip detection if the file is scanned (e.g. when
|
|
22
|
+
// editing this module via the same agent that detects).
|
|
23
|
+
const GLITCH_RE = /\b(?:changedFiles|RTLU|Jsii(?:_commentary)?|\x4aapgolly)\b/;
|
|
24
|
+
|
|
25
|
+
// Body-channel cascade (`B`): marker followed by ` code` then another marker
|
|
26
|
+
// within 200 chars. Single regex; no manual slicing needed.
|
|
27
|
+
const BODY_CASCADE_RE = /to=functions\.\w+\s+code\b[\s\S]{0,200}?to=functions\./;
|
|
28
|
+
|
|
29
|
+
// Fake-result framing (`R`): marker followed within 80 chars by Cell N: framing.
|
|
30
|
+
const FAKE_RESULT_RE = /to=functions\.\w+[\s\S]{0,80}?code_output\s*\nCell\s+\d+:/;
|
|
31
|
+
|
|
32
|
+
const FENCE_RE = /^\s*(?:```+|~~~+)/;
|
|
33
|
+
|
|
34
|
+
// Non-Latin scripts seen in the corpus: CJK + ext, Cyrillic, Thai, Georgian,
|
|
35
|
+
// Armenian, Kannada, Telugu, Devanagari, Arabic, Malayalam.
|
|
36
|
+
const SCRIPT_CLASS =
|
|
37
|
+
"\u3400-\u4DBF\u4E00-\u9FFF\uF900-\uFAFF\u0400-\u04FF\u0E00-\u0E7F\u10A0-\u10FF\u0530-\u058F\u0C80-\u0CFF\u0C00-\u0C7F\u0900-\u097F\u0600-\u06FF\u0D00-\u0D7F";
|
|
38
|
+
const SCRIPT_RUN_RE = new RegExp(`[${SCRIPT_CLASS}]{2,}`, "u");
|
|
39
|
+
|
|
40
|
+
// Recovery registry. Each entry's parser must recognize the configured
|
|
41
|
+
// sentinel (per-tool, see eval/parse.ts and hashline/executor.ts) and surface
|
|
42
|
+
// a warning to the model so it knows to re-issue any remaining work.
|
|
43
|
+
// `accepts` gates on input shape: tools whose contaminated input doesn't
|
|
44
|
+
// match the parser's expected DSL fall through to abort-and-retry.
|
|
45
|
+
//
|
|
46
|
+
// • `edit`: hashline DSL input begins with `@<path>`. Apply_patch envelopes
|
|
47
|
+
// (`*** Begin Patch …`) and JSON-schema variants are not recoverable —
|
|
48
|
+
// their parsers don't recognize `*** Abort`.
|
|
49
|
+
// • `eval`: any string is a parseable cell sequence (the parser is lenient
|
|
50
|
+
// and falls back to implicit-cell mode on bare strings).
|
|
51
|
+
interface RecoveryConfig {
|
|
52
|
+
sentinel: string;
|
|
53
|
+
accepts: (input: string) => boolean;
|
|
54
|
+
}
|
|
55
|
+
const RECOVERY_REGISTRY: Record<string, RecoveryConfig> = {
|
|
56
|
+
edit: {
|
|
57
|
+
sentinel: "\n*** Abort\n",
|
|
58
|
+
accepts: input => input.replace(/^\s+/, "").startsWith("@"),
|
|
59
|
+
},
|
|
60
|
+
eval: {
|
|
61
|
+
sentinel: "\n*** Abort\n",
|
|
62
|
+
accepts: () => true,
|
|
63
|
+
},
|
|
64
|
+
};
|
|
65
|
+
|
|
66
|
+
const SIGNAL_ORDER = ["M", "C", "G", "S", "B", "R", "T"] as const;
|
|
67
|
+
|
|
68
|
+
export type HarmonySignalClass = "H" | (typeof SIGNAL_ORDER)[number];
|
|
69
|
+
|
|
70
|
+
export type HarmonySurface = "assistant_text" | "assistant_thinking" | "tool_arg";
|
|
71
|
+
|
|
72
|
+
export interface HarmonySignal {
|
|
73
|
+
classes: HarmonySignalClass[];
|
|
74
|
+
start: number;
|
|
75
|
+
end: number;
|
|
76
|
+
text: string;
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
export interface HarmonyDetection {
|
|
80
|
+
surface: HarmonySurface;
|
|
81
|
+
contentIndex?: number;
|
|
82
|
+
toolName?: string;
|
|
83
|
+
toolCallId?: string;
|
|
84
|
+
signals: HarmonySignal[];
|
|
85
|
+
}
|
|
86
|
+
|
|
87
|
+
export interface HarmonyAuditEvent {
|
|
88
|
+
action: "truncate_resume" | "abort_retry" | "escalated";
|
|
89
|
+
surface: HarmonySurface;
|
|
90
|
+
signal: string;
|
|
91
|
+
retryN: number;
|
|
92
|
+
model: string;
|
|
93
|
+
provider: string;
|
|
94
|
+
toolName?: string;
|
|
95
|
+
removedLen: number;
|
|
96
|
+
removedSha8: string;
|
|
97
|
+
removedPreview: string;
|
|
98
|
+
removedBlob?: string;
|
|
99
|
+
}
|
|
100
|
+
|
|
101
|
+
export interface HarmonyRecoveredToolCall {
|
|
102
|
+
message: AssistantMessage;
|
|
103
|
+
removed: string;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
/**
|
|
107
|
+
* Whether to run leak detection on responses from this model. We default-on
|
|
108
|
+
* for every openai-codex model rather than enumerating ids, so a future
|
|
109
|
+
* gpt-5.6 (or whatever) doesn't silently bypass the mitigation. Detection
|
|
110
|
+
* itself is cheap; the cost of missing a leak on a new model is not.
|
|
111
|
+
*/
|
|
112
|
+
export function isHarmonyLeakMitigationTarget(model: Model): boolean {
|
|
113
|
+
return model.provider === "openai-codex";
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
export function signalListLabel(signals: readonly HarmonySignal[]): string {
|
|
117
|
+
const seen: string[] = [];
|
|
118
|
+
for (const signal of signals) {
|
|
119
|
+
const label = signal.classes.join("+");
|
|
120
|
+
if (!seen.includes(label)) seen.push(label);
|
|
121
|
+
}
|
|
122
|
+
return seen.join(",") || "none";
|
|
123
|
+
}
|
|
124
|
+
|
|
125
|
+
/**
|
|
126
|
+
* Detect harmony-protocol leakage in `text`. Returns undefined if clean.
|
|
127
|
+
*
|
|
128
|
+
* Trip rule: `H` alone, or `M` paired with at least one co-signal
|
|
129
|
+
* (`C`/`G`/`S`/`B`/`R`/`T`). Bare `M` does not trip — this document, its
|
|
130
|
+
* tests, and bug reports legitimately carry the marker.
|
|
131
|
+
*
|
|
132
|
+
* The `tool_arg` surface is held to a stricter rule. A tool argument is
|
|
133
|
+
* arbitrary file/data content that can legitimately carry the marker, a
|
|
134
|
+
* channel word, harmony control tokens, or a non-Latin script run (editing
|
|
135
|
+
* these very fixtures does exactly that). The only robust leak signal there
|
|
136
|
+
* is content trailing the structurally-valid parse, so a `tool_arg` detection
|
|
137
|
+
* additionally requires the `T` co-signal. Absent a `parsedEnd` boundary `T`
|
|
138
|
+
* is never set, so `tool_arg` scanning stays inert and a legitimate codex tool
|
|
139
|
+
* call is never hard-aborted. `assistant_text`/`assistant_thinking` keep the
|
|
140
|
+
* base rule.
|
|
141
|
+
*
|
|
142
|
+
* `parsedEnd`, when supplied, marks the byte at which a structurally valid
|
|
143
|
+
* tool-argument parse ends; markers at or past it set the `T` co-signal.
|
|
144
|
+
* `contentIndex`/`toolName`/`toolCallId` flow through to the returned
|
|
145
|
+
* detection for downstream auditing.
|
|
146
|
+
*/
|
|
147
|
+
export function detectHarmonyLeak(
|
|
148
|
+
text: string,
|
|
149
|
+
surface: HarmonySurface,
|
|
150
|
+
options: {
|
|
151
|
+
parsedEnd?: number;
|
|
152
|
+
contentIndex?: number;
|
|
153
|
+
toolName?: string;
|
|
154
|
+
toolCallId?: string;
|
|
155
|
+
} = {},
|
|
156
|
+
): HarmonyDetection | undefined {
|
|
157
|
+
const fences = computeFenceRanges(text);
|
|
158
|
+
const signals: HarmonySignal[] = [];
|
|
159
|
+
|
|
160
|
+
for (const match of text.matchAll(HARMONY_RE)) {
|
|
161
|
+
const start = match.index ?? 0;
|
|
162
|
+
if (isInsideFence(fences, start)) continue;
|
|
163
|
+
signals.push(makeSignal(["H"], start, start + match[0].length, match[0]));
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
for (const match of text.matchAll(MARKER_RE)) {
|
|
167
|
+
const start = match.index ?? 0;
|
|
168
|
+
if (isInsideFence(fences, start)) continue;
|
|
169
|
+
const end = start + match[0].length;
|
|
170
|
+
const classes: HarmonySignalClass[] = ["M"];
|
|
171
|
+
|
|
172
|
+
const adjacent = text.slice(Math.max(0, start - 64), Math.min(text.length, end + 16));
|
|
173
|
+
const near = text.slice(Math.max(0, start - 16), Math.min(text.length, end + 16));
|
|
174
|
+
const forward = text.slice(start, Math.min(text.length, start + 240));
|
|
175
|
+
|
|
176
|
+
if (CHANNEL_WORD_RE.test(adjacent)) classes.push("C");
|
|
177
|
+
if (GLITCH_RE.test(near)) classes.push("G");
|
|
178
|
+
if (hasScriptMismatchNear(text, start, end)) classes.push("S");
|
|
179
|
+
if (BODY_CASCADE_RE.test(forward)) classes.push("B");
|
|
180
|
+
if (FAKE_RESULT_RE.test(forward)) classes.push("R");
|
|
181
|
+
if (options.parsedEnd !== undefined && start >= options.parsedEnd) classes.push("T");
|
|
182
|
+
|
|
183
|
+
// `M` alone never trips: legitimate documentation/tests carry it.
|
|
184
|
+
if (classes.length > 1) {
|
|
185
|
+
signals.push(makeSignal(classes, start, end, match[0]));
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
|
|
189
|
+
if (signals.length === 0) return undefined;
|
|
190
|
+
// Tool arguments are data: they can legitimately embed the marker, a channel
|
|
191
|
+
// word, harmony control tokens, or a non-Latin script run. Only a marker
|
|
192
|
+
// trailing the structurally-valid parse (`T`) is a reliable leak signal, so
|
|
193
|
+
// refuse to trip a `tool_arg` detection without it. Without a `parsedEnd`
|
|
194
|
+
// boundary `T` is never set and the surface stays inert.
|
|
195
|
+
if (surface === "tool_arg" && !signals.some(s => s.classes.includes("T"))) return undefined;
|
|
196
|
+
signals.sort((a, b) => a.start - b.start || a.end - b.end);
|
|
197
|
+
return {
|
|
198
|
+
surface,
|
|
199
|
+
contentIndex: options.contentIndex,
|
|
200
|
+
toolName: options.toolName,
|
|
201
|
+
toolCallId: options.toolCallId,
|
|
202
|
+
signals,
|
|
203
|
+
};
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
/**
|
|
207
|
+
* Scan an assistant message's content blocks; return the first detection.
|
|
208
|
+
*
|
|
209
|
+
* `toolArgParseEnd`, when supplied, resolves the byte offset at which a tool
|
|
210
|
+
* call's structurally-valid argument parse ends (the `T` co-signal in
|
|
211
|
+
* {@link detectHarmonyLeak}). Callers that can parse a tool's argument DSL pass
|
|
212
|
+
* it to enable `tool_arg` leak detection; omitting it keeps that surface inert
|
|
213
|
+
* — the safe default the agent loop relies on, since it cannot bound a streamed
|
|
214
|
+
* tool DSL and must never hard-abort a legitimate tool call.
|
|
215
|
+
*/
|
|
216
|
+
export function detectHarmonyLeakInAssistantMessage(
|
|
217
|
+
message: AssistantMessage,
|
|
218
|
+
toolArgParseEnd?: (toolCall: ToolCall) => number | undefined,
|
|
219
|
+
): HarmonyDetection | undefined {
|
|
220
|
+
for (let i = 0; i < message.content.length; i++) {
|
|
221
|
+
const block = message.content[i];
|
|
222
|
+
if (block.type === "text") {
|
|
223
|
+
const d = detectHarmonyLeak(block.text, "assistant_text", { contentIndex: i });
|
|
224
|
+
if (d) return d;
|
|
225
|
+
} else if (block.type === "thinking") {
|
|
226
|
+
const d = detectHarmonyLeak(block.thinking, "assistant_thinking", { contentIndex: i });
|
|
227
|
+
if (d) return d;
|
|
228
|
+
} else if (block.type === "toolCall") {
|
|
229
|
+
const argText = getToolArgumentText(block);
|
|
230
|
+
if (argText !== undefined) {
|
|
231
|
+
const d = detectHarmonyLeak(argText, "tool_arg", {
|
|
232
|
+
contentIndex: i,
|
|
233
|
+
toolName: block.name,
|
|
234
|
+
toolCallId: block.id,
|
|
235
|
+
parsedEnd: toolArgParseEnd?.(block),
|
|
236
|
+
});
|
|
237
|
+
if (d) return d;
|
|
238
|
+
}
|
|
239
|
+
}
|
|
240
|
+
}
|
|
241
|
+
return undefined;
|
|
242
|
+
}
|
|
243
|
+
|
|
244
|
+
/**
|
|
245
|
+
* Truncate a contaminated tool call at the start of the contaminated line and
|
|
246
|
+
* append the tool's recovery sentinel. Returns a recovered AssistantMessage
|
|
247
|
+
* (containing only the cleaned tool call), a synthetic continuation user
|
|
248
|
+
* message asking the model to re-issue the rest, and the removed substring
|
|
249
|
+
* for auditing. Returns undefined when the tool is not recovery-eligible or
|
|
250
|
+
* the truncation would leave nothing meaningful to dispatch.
|
|
251
|
+
*
|
|
252
|
+
* `providerPayload` is dropped from the recovered message: for Codex the
|
|
253
|
+
* encrypted reasoning blob is opaque/signed and we cannot validate that it is
|
|
254
|
+
* uncontaminated. The model re-reasons on the next turn.
|
|
255
|
+
*/
|
|
256
|
+
export function recoverHarmonyToolCall(
|
|
257
|
+
message: AssistantMessage,
|
|
258
|
+
detection: HarmonyDetection,
|
|
259
|
+
): HarmonyRecoveredToolCall | undefined {
|
|
260
|
+
if (detection.surface !== "tool_arg" || detection.contentIndex === undefined) return undefined;
|
|
261
|
+
const block = message.content[detection.contentIndex];
|
|
262
|
+
if (block?.type !== "toolCall") return undefined;
|
|
263
|
+
|
|
264
|
+
const config = RECOVERY_REGISTRY[block.name];
|
|
265
|
+
if (!config) return undefined;
|
|
266
|
+
|
|
267
|
+
const input = block.arguments?.input;
|
|
268
|
+
if (typeof input !== "string") return undefined;
|
|
269
|
+
if (!config.accepts(input)) return undefined;
|
|
270
|
+
|
|
271
|
+
const offset = detection.signals[0]?.start;
|
|
272
|
+
if (offset === undefined) return undefined;
|
|
273
|
+
|
|
274
|
+
const truncated = truncateAtLineAndAppendSentinel(input, offset, config.sentinel);
|
|
275
|
+
if (truncated === undefined) return undefined;
|
|
276
|
+
|
|
277
|
+
const cleanToolCall: ToolCall = {
|
|
278
|
+
...block,
|
|
279
|
+
arguments: { ...block.arguments, input: truncated.clean },
|
|
280
|
+
};
|
|
281
|
+
const cleanMessage: AssistantMessage = {
|
|
282
|
+
...message,
|
|
283
|
+
content: [cleanToolCall],
|
|
284
|
+
// Drop encrypted reasoning blob: opaque, possibly carries the leak forward.
|
|
285
|
+
providerPayload: undefined,
|
|
286
|
+
stopReason: "toolUse",
|
|
287
|
+
errorMessage: undefined,
|
|
288
|
+
};
|
|
289
|
+
return { message: cleanMessage, removed: truncated.removed };
|
|
290
|
+
}
|
|
291
|
+
|
|
292
|
+
/**
|
|
293
|
+
* Return the contaminated substring from `message` for audit purposes when
|
|
294
|
+
* recovery is not applicable (abort path). Walks from the first detected
|
|
295
|
+
* signal to end-of-content within the relevant block. Returns "" if the
|
|
296
|
+
* detection cannot be resolved against the message.
|
|
297
|
+
*/
|
|
298
|
+
export function extractHarmonyRemoved(message: AssistantMessage, detection: HarmonyDetection): string {
|
|
299
|
+
if (detection.contentIndex === undefined) return "";
|
|
300
|
+
const block = message.content[detection.contentIndex];
|
|
301
|
+
if (!block) return "";
|
|
302
|
+
const start = detection.signals[0]?.start ?? 0;
|
|
303
|
+
if (block.type === "text") return block.text.slice(start);
|
|
304
|
+
if (block.type === "thinking") return block.thinking.slice(start);
|
|
305
|
+
if (block.type === "toolCall") {
|
|
306
|
+
const text = getToolArgumentText(block);
|
|
307
|
+
return text ? text.slice(start) : "";
|
|
308
|
+
}
|
|
309
|
+
return "";
|
|
310
|
+
}
|
|
311
|
+
|
|
312
|
+
export function createHarmonyAuditEvent(params: {
|
|
313
|
+
action: HarmonyAuditEvent["action"];
|
|
314
|
+
detection: HarmonyDetection;
|
|
315
|
+
model: Model;
|
|
316
|
+
retryN: number;
|
|
317
|
+
removed: string;
|
|
318
|
+
}): HarmonyAuditEvent {
|
|
319
|
+
return {
|
|
320
|
+
action: params.action,
|
|
321
|
+
surface: params.detection.surface,
|
|
322
|
+
signal: signalListLabel(params.detection.signals),
|
|
323
|
+
retryN: params.retryN,
|
|
324
|
+
model: params.model.id,
|
|
325
|
+
provider: params.model.provider,
|
|
326
|
+
toolName: params.detection.toolName,
|
|
327
|
+
removedLen: params.removed.length,
|
|
328
|
+
removedSha8: sha8(params.removed),
|
|
329
|
+
removedPreview: redactedJunkPreview(params.removed),
|
|
330
|
+
removedBlob: Bun.env.PROMETHEUS_HARMONY_DEBUG === "1" ? params.removed : undefined,
|
|
331
|
+
};
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
// ─── internals ──────────────────────────────────────────────────────────────
|
|
335
|
+
|
|
336
|
+
function makeSignal(classes: HarmonySignalClass[], start: number, end: number, text: string): HarmonySignal {
|
|
337
|
+
if (classes[0] === "H") return { classes: ["H"], start, end, text };
|
|
338
|
+
const sorted: HarmonySignalClass[] = [];
|
|
339
|
+
for (const cls of SIGNAL_ORDER) {
|
|
340
|
+
if (classes.includes(cls)) sorted.push(cls);
|
|
341
|
+
}
|
|
342
|
+
return { classes: sorted, start, end, text };
|
|
343
|
+
}
|
|
344
|
+
|
|
345
|
+
/**
|
|
346
|
+
* Precompute fenced-code-block ranges once per text. Each range is a
|
|
347
|
+
* [start, end) span of bytes inside any ```/~~~ fence. O(n) once instead of
|
|
348
|
+
* O(n) per detected match.
|
|
349
|
+
*/
|
|
350
|
+
function computeFenceRanges(text: string): Array<[number, number]> {
|
|
351
|
+
const ranges: Array<[number, number]> = [];
|
|
352
|
+
let inFence = false;
|
|
353
|
+
let fenceStart = 0;
|
|
354
|
+
let lineStart = 0;
|
|
355
|
+
while (lineStart <= text.length) {
|
|
356
|
+
const newline = text.indexOf("\n", lineStart);
|
|
357
|
+
const lineEnd = newline === -1 ? text.length : newline;
|
|
358
|
+
const line = text.slice(lineStart, lineEnd);
|
|
359
|
+
if (FENCE_RE.test(line)) {
|
|
360
|
+
if (inFence) {
|
|
361
|
+
ranges.push([fenceStart, lineEnd]);
|
|
362
|
+
inFence = false;
|
|
363
|
+
} else {
|
|
364
|
+
fenceStart = lineStart;
|
|
365
|
+
inFence = true;
|
|
366
|
+
}
|
|
367
|
+
}
|
|
368
|
+
if (newline === -1) break;
|
|
369
|
+
lineStart = newline + 1;
|
|
370
|
+
}
|
|
371
|
+
if (inFence) ranges.push([fenceStart, text.length]);
|
|
372
|
+
return ranges;
|
|
373
|
+
}
|
|
374
|
+
|
|
375
|
+
function isInsideFence(ranges: Array<[number, number]>, position: number): boolean {
|
|
376
|
+
for (const [start, end] of ranges) {
|
|
377
|
+
if (position >= start && position < end) return true;
|
|
378
|
+
if (start > position) break;
|
|
379
|
+
}
|
|
380
|
+
return false;
|
|
381
|
+
}
|
|
382
|
+
|
|
383
|
+
function hasScriptMismatchNear(text: string, start: number, end: number): boolean {
|
|
384
|
+
const near = text.slice(Math.max(0, start - 32), Math.min(text.length, end + 32));
|
|
385
|
+
if (!SCRIPT_RUN_RE.test(near)) return false;
|
|
386
|
+
const surrounding = text.slice(Math.max(0, start - 200), Math.min(text.length, end + 200));
|
|
387
|
+
if (surrounding.length === 0) return false;
|
|
388
|
+
let ascii = 0;
|
|
389
|
+
for (let i = 0; i < surrounding.length; i++) {
|
|
390
|
+
if (surrounding.charCodeAt(i) < 128) ascii++;
|
|
391
|
+
}
|
|
392
|
+
return ascii / surrounding.length >= 0.85;
|
|
393
|
+
}
|
|
394
|
+
|
|
395
|
+
/**
|
|
396
|
+
* Tool-call argument text used for detection scanning. For tools whose args
|
|
397
|
+
* include a free-form `input` string we scan that directly so reported byte
|
|
398
|
+
* offsets line up with the original. For everything else we fall back to a
|
|
399
|
+
* JSON-stringified blob so detection still fires; that path's offsets are
|
|
400
|
+
* NOT meaningful for slicing the original args, but the recovery path gates
|
|
401
|
+
* on `block.arguments.input` being a string and only ever slices that.
|
|
402
|
+
*/
|
|
403
|
+
function getToolArgumentText(toolCall: ToolCall): string | undefined {
|
|
404
|
+
if (typeof toolCall.arguments?.input === "string") return toolCall.arguments.input;
|
|
405
|
+
try {
|
|
406
|
+
return JSON.stringify(toolCall.arguments);
|
|
407
|
+
} catch {
|
|
408
|
+
return undefined;
|
|
409
|
+
}
|
|
410
|
+
}
|
|
411
|
+
|
|
412
|
+
function truncateAtLineAndAppendSentinel(
|
|
413
|
+
input: string,
|
|
414
|
+
offset: number,
|
|
415
|
+
sentinel: string,
|
|
416
|
+
): { clean: string; removed: string } | undefined {
|
|
417
|
+
const lineStart = offset <= 0 ? 0 : input.lastIndexOf("\n", offset - 1) + 1;
|
|
418
|
+
if (lineStart === 0) return undefined; // would cut everything
|
|
419
|
+
const head = input.slice(0, lineStart).replace(/\s+$/, "");
|
|
420
|
+
if (head.length === 0) return undefined;
|
|
421
|
+
return {
|
|
422
|
+
clean: head + sentinel,
|
|
423
|
+
removed: input.slice(lineStart),
|
|
424
|
+
};
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
function sha8(text: string): string {
|
|
428
|
+
return Bun.sha(text, "hex").slice(0, 8);
|
|
429
|
+
}
|
|
430
|
+
|
|
431
|
+
const PREVIEW_KEEP_RE = new RegExp(`[${SCRIPT_CLASS}\\s】【”“…」「、。]`, "u");
|
|
432
|
+
const PREVIEW_TOKEN_RE =
|
|
433
|
+
/^(?:to=functions\.[A-Za-z_]\w*|analysis|commentary|assistant|user|system|developer|tool|changedFiles|RTLU|Jsii(?:_commentary)?|\x4aapgolly)/;
|
|
434
|
+
|
|
435
|
+
/**
|
|
436
|
+
* Privacy-safe preview for the audit log: keeps marker/channel/glitch tokens,
|
|
437
|
+
* non-Latin script chars, and CJK punctuation; replaces everything else
|
|
438
|
+
* (potential source/secrets) with `·`. Sufficient to grow the glitch-token
|
|
439
|
+
* denylist from logs without exposing source content. Capped at 64 chars.
|
|
440
|
+
*/
|
|
441
|
+
function redactedJunkPreview(text: string): string {
|
|
442
|
+
const source = text.slice(0, 64);
|
|
443
|
+
let out = "";
|
|
444
|
+
for (let i = 0; i < source.length; ) {
|
|
445
|
+
const tok = PREVIEW_TOKEN_RE.exec(source.slice(i));
|
|
446
|
+
if (tok) {
|
|
447
|
+
out += tok[0];
|
|
448
|
+
i += tok[0].length;
|
|
449
|
+
continue;
|
|
450
|
+
}
|
|
451
|
+
const ch = source[i] ?? "";
|
|
452
|
+
out += PREVIEW_KEEP_RE.test(ch) ? ch : "·";
|
|
453
|
+
i++;
|
|
454
|
+
}
|
|
455
|
+
return out;
|
|
456
|
+
}
|
package/src/index.ts
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
// Core Agent
|
|
2
|
+
export * from "./agent";
|
|
3
|
+
// Loop functions
|
|
4
|
+
export * from "./agent-loop";
|
|
5
|
+
// Append-only context mode
|
|
6
|
+
export * from "./append-only-context";
|
|
7
|
+
// Compaction
|
|
8
|
+
export * from "./compaction";
|
|
9
|
+
export * from "./harmony-leak";
|
|
10
|
+
// Proxy utilities
|
|
11
|
+
export * from "./proxy";
|
|
12
|
+
// Run-level telemetry collector + aggregators
|
|
13
|
+
export * from "./run-collector";
|
|
14
|
+
// Telemetry
|
|
15
|
+
export * from "./telemetry";
|
|
16
|
+
// Thinking selectors
|
|
17
|
+
export * from "./thinking";
|
|
18
|
+
// Types
|
|
19
|
+
export * from "./types";
|
|
20
|
+
// Yield utilities for Bun event-loop busy-wait prevention
|
|
21
|
+
export * from "./utils/yield";
|