@oh-my-pi/pi-agent-core 15.13.1 → 15.13.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +24 -0
- package/dist/types/agent-loop.d.ts +2 -1
- package/dist/types/agent.d.ts +11 -1
- package/dist/types/append-only-context.d.ts +2 -0
- package/dist/types/index.d.ts +0 -1
- package/dist/types/types.d.ts +23 -1
- package/package.json +6 -6
- package/src/agent-loop.ts +90 -11
- package/src/agent.ts +17 -1
- package/src/append-only-context.ts +4 -1
- package/src/index.ts +0 -1
- package/src/types.ts +23 -1
- package/dist/types/harmony-leak.d.ts +0 -118
- package/src/harmony-leak.ts +0 -456
package/CHANGELOG.md
CHANGED
|
@@ -2,6 +2,30 @@
|
|
|
2
2
|
|
|
3
3
|
## [Unreleased]
|
|
4
4
|
|
|
5
|
+
## [15.13.2] - 2026-06-15
|
|
6
|
+
|
|
7
|
+
### Breaking Changes
|
|
8
|
+
|
|
9
|
+
- Removed `harmony-leak` exports from the `@oh-my-pi/pi-agent-core` package entrypoint
|
|
10
|
+
- Replaced the experimental `promptToolCalls` agent/loop option with `toolCallSyntax`, selecting an explicit in-band tool-call grammar instead of a boolean GLM-only mode.
|
|
11
|
+
|
|
12
|
+
### Added
|
|
13
|
+
|
|
14
|
+
- Added support for selecting owned in-band tool-call syntax via `PI_OWNED_TOOLS=<syntax>` (for example `hermes` or `qwen3`) while preserving legacy `PI_OWNED_TOOLS=1/true` as GLM mode
|
|
15
|
+
- Added owned in-band tool calling for multiple syntaxes (`glm`, `hermes`, `kimi`, `xml`, `anthropic`, `deepseek`, `harmony`, `pi-native`, `qwen3`). Owned mode sends no native provider tools, appends a syntax-specific prompt/catalog, re-encodes prior tool calls/results as grammar-owned text, and parses streamed model output back into canonical tool calls.
|
|
16
|
+
- Added tool-example folding to `normalizeTools`: when given a model's affinity syntax (resolved via `preferredToolSyntax`), it renders each tool's `examples` into an `<examples>` block in that native syntax and appends it to the wire description. Wired through both context paths (fresh build and append-only `takeSnapshot`/`build` via a new `exampleSyntax` build option), with the `_i` intent-field placeholder added to examples when intent tracing injects it.
|
|
17
|
+
- Added the `abortOnFabricatedToolResult` option to `AgentOptions`/`AgentLoopConfig` (default `true`): when owned tool calling is active and the model fabricates a tool result mid-turn, `true` aborts the provider request immediately while `false` lets it finish and discards the fabricated continuation.
|
|
18
|
+
|
|
19
|
+
### Changed
|
|
20
|
+
|
|
21
|
+
- Added owned in-band syntax support to `Agent` loop configuration resolution by selecting syntax from `toolCallSyntax` or `PI_OWNED_TOOLS` when present
|
|
22
|
+
|
|
23
|
+
### Fixed
|
|
24
|
+
|
|
25
|
+
- Fixed append-only context cache fingerprinting to account for `exampleSyntax`, so switching tool-call syntax rebuilds cached prompts with the correct injected tool examples
|
|
26
|
+
- Fixed owned in-band tool-calling requests to omit `toolChoice` after stripping native tools, preventing invalid tool-choice requests
|
|
27
|
+
- Fixed owned tool calling letting the model fabricate tool results by treating grammar-owned tool-result markers in assistant text as a hard turn boundary: calls before the fabrication are kept, fabricated results and dependent calls are dropped, and the real result is fed back on the next turn.
|
|
28
|
+
|
|
5
29
|
## [15.13.1] - 2026-06-15
|
|
6
30
|
|
|
7
31
|
### Added
|
|
@@ -3,6 +3,7 @@
|
|
|
3
3
|
* Transforms to Message[] only at the LLM call boundary.
|
|
4
4
|
*/
|
|
5
5
|
import { type Context, EventStream } from "@oh-my-pi/pi-ai";
|
|
6
|
+
import { type ToolCallSyntax } from "@oh-my-pi/pi-ai/grammar";
|
|
6
7
|
import { type AgentRunCoverage, type AgentRunSummary } from "./run-collector";
|
|
7
8
|
import type { AgentContext, AgentEvent, AgentLoopConfig, AgentMessage, StreamFn } from "./types";
|
|
8
9
|
/**
|
|
@@ -52,7 +53,7 @@ export declare function agentLoopContinueDetailed(context: AgentContext, config:
|
|
|
52
53
|
readonly detailed: () => Promise<AgentLoopDetailedResult>;
|
|
53
54
|
};
|
|
54
55
|
export declare const INTENT_FIELD = "_i";
|
|
55
|
-
export declare function normalizeTools(tools: AgentContext["tools"], injectIntent: boolean): Context["tools"];
|
|
56
|
+
export declare function normalizeTools(tools: AgentContext["tools"], injectIntent: boolean, exampleSyntax?: ToolCallSyntax): Context["tools"];
|
|
56
57
|
/** Resolve the human-readable reason an abort carried. A caller that aborts via
|
|
57
58
|
* `AbortController.abort(reason)` with a string or a non-`AbortError` `Error`
|
|
58
59
|
* (e.g. the coding agent's user-interrupt label) gets that text surfaced on the
|
package/dist/types/agent.d.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import { type ApiKeyResolveContext, type AssistantMessage, type AssistantMessageEvent, type Context, type CursorExecHandlers, type CursorToolResultHandler, type Effort, type ImageContent, type Message, type Model, type ProviderSessionState, type ServiceTier, type SimpleStreamOptions, type ThinkingBudgets, type ToolChoice } from "@oh-my-pi/pi-ai";
|
|
2
|
+
import type { ToolCallSyntax } from "@oh-my-pi/pi-ai/grammar";
|
|
3
|
+
import type { HarmonyAuditEvent } from "@oh-my-pi/pi-ai/utils/harmony-leak";
|
|
2
4
|
import type { AppendOnlyContextManager } from "./append-only-context";
|
|
3
|
-
import type { HarmonyAuditEvent } from "./harmony-leak";
|
|
4
5
|
import type { AgentEvent, AgentLoopConfig, AgentMessage, AgentState, AgentTool, AgentToolContext, AsideMessage, StreamFn, ToolCallContext } from "./types";
|
|
5
6
|
export declare class AgentBusyError extends Error {
|
|
6
7
|
constructor(message?: string);
|
|
@@ -126,6 +127,15 @@ export interface AgentOptions {
|
|
|
126
127
|
transformToolCallArguments?: (args: Record<string, unknown>, toolName: string) => Record<string, unknown>;
|
|
127
128
|
/** Enable intent tracing schema injection/stripping in the harness. */
|
|
128
129
|
intentTracing?: boolean;
|
|
130
|
+
/** Owned tool-calling syntax. Undefined keeps provider-native tool calling. */
|
|
131
|
+
toolCallSyntax?: ToolCallSyntax;
|
|
132
|
+
/**
|
|
133
|
+
* When owned tool calling is active and the model fabricates a tool result
|
|
134
|
+
* mid-turn: `true` (default) aborts the provider request immediately; `false`
|
|
135
|
+
* drains the request and discards the fabricated continuation. Forwarded to
|
|
136
|
+
* the loop's {@link AgentLoopConfig.abortOnFabricatedToolResult}.
|
|
137
|
+
*/
|
|
138
|
+
abortOnFabricatedToolResult?: boolean;
|
|
129
139
|
/** Dynamic tool choice override, resolved per LLM call. */
|
|
130
140
|
getToolChoice?: () => ToolChoice | undefined;
|
|
131
141
|
/**
|
|
@@ -14,6 +14,7 @@
|
|
|
14
14
|
* message delta is a cache miss each turn.
|
|
15
15
|
*/
|
|
16
16
|
import type { Context, Message, Tool } from "@oh-my-pi/pi-ai";
|
|
17
|
+
import type { ToolCallSyntax } from "@oh-my-pi/pi-ai/grammar";
|
|
17
18
|
import type { AgentContext } from "./types";
|
|
18
19
|
/** Frozen system prompt + tool spec snapshot. */
|
|
19
20
|
export interface StablePrefixSnapshot {
|
|
@@ -25,6 +26,7 @@ export interface StablePrefixSnapshot {
|
|
|
25
26
|
export interface BuildOptions {
|
|
26
27
|
/** Inject the `_i` intent field into tool schemas (must match agent-loop's normalizeTools). */
|
|
27
28
|
intentTracing: boolean;
|
|
29
|
+
exampleSyntax?: ToolCallSyntax;
|
|
28
30
|
}
|
|
29
31
|
/**
|
|
30
32
|
* A frozen prefix (system prompt + tools) that produces stable byte
|
package/dist/types/index.d.ts
CHANGED
package/dist/types/types.d.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
import type { ApiKeyResolveContext, AssistantMessage, AssistantMessageEvent, AssistantMessageEventStream, Context, Effort, ImageContent, Message, Model, SimpleStreamOptions, Static, streamSimple, TextContent, Tool, ToolChoice, ToolResultMessage, TSchema } from "@oh-my-pi/pi-ai";
|
|
2
|
+
import type { ToolCallSyntax } from "@oh-my-pi/pi-ai/grammar";
|
|
3
|
+
import type { HarmonyAuditEvent } from "@oh-my-pi/pi-ai/utils/harmony-leak";
|
|
2
4
|
import type { AppendOnlyContextManager } from "./append-only-context";
|
|
3
|
-
import type { HarmonyAuditEvent } from "./harmony-leak";
|
|
4
5
|
import type { AgentRunCoverage, AgentRunSummary } from "./run-collector";
|
|
5
6
|
import type { AgentTelemetryConfig } from "./telemetry";
|
|
6
7
|
/** Stream function - can return sync or Promise for async config lookup */
|
|
@@ -162,6 +163,27 @@ export interface AgentLoopConfig extends SimpleStreamOptions {
|
|
|
162
163
|
* then strips from arguments before executing tools.
|
|
163
164
|
*/
|
|
164
165
|
intentTracing?: boolean;
|
|
166
|
+
/**
|
|
167
|
+
* Owned tool calling syntax.
|
|
168
|
+
*
|
|
169
|
+
* Undefined keeps provider-native tool calling. A syntax value sends no
|
|
170
|
+
* native `tools`, forces `toolChoice` off, appends that syntax's tool catalog
|
|
171
|
+
* instructions, re-encodes prior tool calls/results as text, and parses the
|
|
172
|
+
* model's text output back into canonical `toolCall` blocks.
|
|
173
|
+
*/
|
|
174
|
+
toolCallSyntax?: ToolCallSyntax;
|
|
175
|
+
/**
|
|
176
|
+
* When owned (in-band) tool calling is active and the model starts
|
|
177
|
+
* fabricating a tool result inside its own turn, control how the loop reacts:
|
|
178
|
+
* - `true` (default): abort the provider request immediately so it stops
|
|
179
|
+
* generating the hallucinated continuation (cheaper, lower latency).
|
|
180
|
+
* - `false`: let the request finish and silently discard everything past the
|
|
181
|
+
* fabrication boundary (keeps the connection alive but pays for the tokens
|
|
182
|
+
* the model spends on the discarded tail).
|
|
183
|
+
* Only meaningful when {@link toolCallSyntax} (or `PI_OWNED_TOOLS`) selects an
|
|
184
|
+
* owned syntax; native tool calling never fabricates results in text.
|
|
185
|
+
*/
|
|
186
|
+
abortOnFabricatedToolResult?: boolean;
|
|
165
187
|
/**
|
|
166
188
|
* Append-only context mode — stabilizes system prompt + tool spec bytes
|
|
167
189
|
* across turns so provider prefix caches hit at maximum rate.
|
package/package.json
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"type": "module",
|
|
3
3
|
"name": "@oh-my-pi/pi-agent-core",
|
|
4
|
-
"version": "15.13.
|
|
4
|
+
"version": "15.13.2",
|
|
5
5
|
"description": "General-purpose agent with transport abstraction, state management, and attachment support",
|
|
6
6
|
"homepage": "https://omp.sh",
|
|
7
7
|
"author": "Can Boluk",
|
|
@@ -35,11 +35,11 @@
|
|
|
35
35
|
"fmt": "biome format --write ."
|
|
36
36
|
},
|
|
37
37
|
"dependencies": {
|
|
38
|
-
"@oh-my-pi/pi-ai": "15.13.
|
|
39
|
-
"@oh-my-pi/pi-catalog": "15.13.
|
|
40
|
-
"@oh-my-pi/pi-natives": "15.13.
|
|
41
|
-
"@oh-my-pi/pi-utils": "15.13.
|
|
42
|
-
"@oh-my-pi/snapcompact": "15.13.
|
|
38
|
+
"@oh-my-pi/pi-ai": "15.13.2",
|
|
39
|
+
"@oh-my-pi/pi-catalog": "15.13.2",
|
|
40
|
+
"@oh-my-pi/pi-natives": "15.13.2",
|
|
41
|
+
"@oh-my-pi/pi-utils": "15.13.2",
|
|
42
|
+
"@oh-my-pi/snapcompact": "15.13.2",
|
|
43
43
|
"@opentelemetry/api": "^1.9.1"
|
|
44
44
|
},
|
|
45
45
|
"devDependencies": {
|
package/src/agent-loop.ts
CHANGED
|
@@ -15,7 +15,13 @@ import {
|
|
|
15
15
|
validateToolArguments,
|
|
16
16
|
zodToWireSchema,
|
|
17
17
|
} from "@oh-my-pi/pi-ai";
|
|
18
|
-
import {
|
|
18
|
+
import {
|
|
19
|
+
encodeInbandToolHistory,
|
|
20
|
+
renderInbandToolPrompt,
|
|
21
|
+
renderToolExamples,
|
|
22
|
+
type ToolCallSyntax,
|
|
23
|
+
wrapInbandToolStream,
|
|
24
|
+
} from "@oh-my-pi/pi-ai/grammar";
|
|
19
25
|
import {
|
|
20
26
|
createHarmonyAuditEvent,
|
|
21
27
|
detectHarmonyLeakInAssistantMessage,
|
|
@@ -25,7 +31,9 @@ import {
|
|
|
25
31
|
isHarmonyLeakMitigationTarget,
|
|
26
32
|
recoverHarmonyToolCall,
|
|
27
33
|
signalListLabel,
|
|
28
|
-
} from "
|
|
34
|
+
} from "@oh-my-pi/pi-ai/utils/harmony-leak";
|
|
35
|
+
import { preferredToolSyntax } from "@oh-my-pi/pi-catalog/identity";
|
|
36
|
+
import { logger, sanitizeText } from "@oh-my-pi/pi-utils";
|
|
29
37
|
import { type AgentRunCoverage, type AgentRunSummary, ToolCallBlockedError } from "./run-collector";
|
|
30
38
|
import {
|
|
31
39
|
type AgentTelemetry,
|
|
@@ -76,6 +84,25 @@ class HarmonyLeakInterruption extends Error {
|
|
|
76
84
|
this.name = "HarmonyLeakInterruption";
|
|
77
85
|
}
|
|
78
86
|
}
|
|
87
|
+
function resolveOwnedToolSyntaxFromEnv(value: string | undefined): ToolCallSyntax | undefined {
|
|
88
|
+
switch (value) {
|
|
89
|
+
case "1":
|
|
90
|
+
case "true":
|
|
91
|
+
return "glm";
|
|
92
|
+
case "glm":
|
|
93
|
+
case "hermes":
|
|
94
|
+
case "kimi":
|
|
95
|
+
case "xml":
|
|
96
|
+
case "anthropic":
|
|
97
|
+
case "deepseek":
|
|
98
|
+
case "harmony":
|
|
99
|
+
case "pi":
|
|
100
|
+
case "qwen3":
|
|
101
|
+
return value;
|
|
102
|
+
default:
|
|
103
|
+
return undefined;
|
|
104
|
+
}
|
|
105
|
+
}
|
|
79
106
|
|
|
80
107
|
type AssistantContentBlock = AssistantMessage["content"][number];
|
|
81
108
|
type AssistantToolCallBlock = Extract<AssistantContentBlock, { type: "toolCall" }>;
|
|
@@ -491,7 +518,11 @@ function injectIntentIntoSchema(schema: unknown, mode: "require" | "optional" =
|
|
|
491
518
|
};
|
|
492
519
|
}
|
|
493
520
|
|
|
494
|
-
export function normalizeTools(
|
|
521
|
+
export function normalizeTools(
|
|
522
|
+
tools: AgentContext["tools"],
|
|
523
|
+
injectIntent: boolean,
|
|
524
|
+
exampleSyntax?: ToolCallSyntax,
|
|
525
|
+
): Context["tools"] {
|
|
495
526
|
injectIntent = injectIntent && Bun.env.PI_NO_INTENT !== "1";
|
|
496
527
|
return tools?.map(t => {
|
|
497
528
|
const intentMode = resolveIntentMode(t.intent);
|
|
@@ -505,7 +536,12 @@ export function normalizeTools(tools: AgentContext["tools"], injectIntent: boole
|
|
|
505
536
|
}
|
|
506
537
|
}
|
|
507
538
|
const description = t.description ?? "";
|
|
508
|
-
|
|
539
|
+
const injectExampleIntent = injectIntent && intentMode !== "omit";
|
|
540
|
+
const examplesBlock = exampleSyntax
|
|
541
|
+
? renderToolExamples({ ...t, parameters }, exampleSyntax, injectExampleIntent ? INTENT_FIELD : undefined)
|
|
542
|
+
: "";
|
|
543
|
+
const finalDescription = examplesBlock ? `${description}\n\n${examplesBlock}` : description;
|
|
544
|
+
return { ...t, parameters, description: finalDescription };
|
|
509
545
|
});
|
|
510
546
|
}
|
|
511
547
|
|
|
@@ -884,18 +920,37 @@ async function streamAssistantResponse(
|
|
|
884
920
|
let llmContext: Context;
|
|
885
921
|
if (config.appendOnlyContext) {
|
|
886
922
|
config.appendOnlyContext.syncMessages(normalizedMessages);
|
|
887
|
-
llmContext = config.appendOnlyContext.build(context, {
|
|
923
|
+
llmContext = config.appendOnlyContext.build(context, {
|
|
924
|
+
intentTracing: !!config.intentTracing,
|
|
925
|
+
exampleSyntax: preferredToolSyntax(config.model.id),
|
|
926
|
+
});
|
|
888
927
|
} else {
|
|
889
928
|
llmContext = {
|
|
890
929
|
systemPrompt: context.systemPrompt,
|
|
891
930
|
messages: normalizedMessages,
|
|
892
|
-
tools: normalizeTools(context.tools, !!config.intentTracing),
|
|
931
|
+
tools: normalizeTools(context.tools, !!config.intentTracing, preferredToolSyntax(config.model.id)),
|
|
893
932
|
};
|
|
894
933
|
}
|
|
895
934
|
if (config.transformProviderContext) {
|
|
896
935
|
llmContext = config.transformProviderContext(llmContext, config.model);
|
|
897
936
|
}
|
|
898
937
|
|
|
938
|
+
// Owned tool calling: take tool calls away from the provider and run them
|
|
939
|
+
// through the selected in-band prompt syntax. `PI_OWNED_TOOLS=1` still
|
|
940
|
+
// force-enables GLM; `PI_OWNED_TOOLS=<syntax>` force-enables that syntax.
|
|
941
|
+
const ownedSyntax: ToolCallSyntax | undefined =
|
|
942
|
+
config.toolCallSyntax ?? resolveOwnedToolSyntaxFromEnv(Bun.env.PI_OWNED_TOOLS);
|
|
943
|
+
let promptToolWireTools: Context["tools"];
|
|
944
|
+
if (ownedSyntax && llmContext.tools && llmContext.tools.length > 0) {
|
|
945
|
+
promptToolWireTools = llmContext.tools;
|
|
946
|
+
llmContext = {
|
|
947
|
+
...llmContext,
|
|
948
|
+
systemPrompt: [...(llmContext.systemPrompt ?? []), renderInbandToolPrompt(promptToolWireTools, ownedSyntax)],
|
|
949
|
+
messages: encodeInbandToolHistory(llmContext.messages, ownedSyntax, promptToolWireTools),
|
|
950
|
+
tools: undefined,
|
|
951
|
+
};
|
|
952
|
+
}
|
|
953
|
+
|
|
899
954
|
const streamFunction = streamFn || streamSimple;
|
|
900
955
|
|
|
901
956
|
// Resolve API key (important for expiring tokens) — do this before resolving
|
|
@@ -920,12 +975,22 @@ async function streamAssistantResponse(
|
|
|
920
975
|
: harmonyAbortController.signal
|
|
921
976
|
: signal;
|
|
922
977
|
const repetitionAbortController = new AbortController();
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
978
|
+
// Owned tool calling: aborted by the stream wrapper when the model starts
|
|
979
|
+
// fabricating a `<tool_response>`, so the provider stops generating the rest of
|
|
980
|
+
// the hallucinated turn. Merged into the provider signal ONLY (not
|
|
981
|
+
// `requestSignal`), so it cancels the request without tripping the loop's
|
|
982
|
+
// external-abort handling (`abortRacePromise` / `requestSignal.aborted`).
|
|
983
|
+
const promptToolAbortController = ownedSyntax ? new AbortController() : undefined;
|
|
984
|
+
const providerAbortSignals: AbortSignal[] = [];
|
|
985
|
+
if (requestSignal) providerAbortSignals.push(requestSignal);
|
|
986
|
+
providerAbortSignals.push(repetitionAbortController.signal);
|
|
987
|
+
if (promptToolAbortController) providerAbortSignals.push(promptToolAbortController.signal);
|
|
988
|
+
const finalRequestSignal =
|
|
989
|
+
providerAbortSignals.length === 1 ? providerAbortSignals[0]! : AbortSignal.any(providerAbortSignals);
|
|
926
990
|
const effectiveTemperature =
|
|
927
991
|
harmonyRetryAttempt > 0 && config.temperature !== undefined ? config.temperature + 0.05 : config.temperature;
|
|
928
|
-
|
|
992
|
+
// Owned tool calling sends no native tools, so any tool_choice would error.
|
|
993
|
+
const effectiveToolChoice = ownedSyntax ? undefined : (dynamicToolChoice ?? config.toolChoice);
|
|
929
994
|
const effectiveReasoning = dynamicReasoning ?? config.reasoning;
|
|
930
995
|
const effectiveDisableReasoning = dynamicDisableReasoning ?? config.disableReasoning;
|
|
931
996
|
|
|
@@ -970,7 +1035,7 @@ async function streamAssistantResponse(
|
|
|
970
1035
|
|
|
971
1036
|
try {
|
|
972
1037
|
return await runInActiveSpan(chatSpan, async () => {
|
|
973
|
-
|
|
1038
|
+
let response = await streamFunction(config.model, llmContext, {
|
|
974
1039
|
...config,
|
|
975
1040
|
// Hand streamSimple a resolver so its central auth-retry policy can
|
|
976
1041
|
// re-resolve on 401 / usage-limit: the initial step reuses the key
|
|
@@ -993,6 +1058,20 @@ async function streamAssistantResponse(
|
|
|
993
1058
|
signal: finalRequestSignal,
|
|
994
1059
|
onResponse: captureOnResponse,
|
|
995
1060
|
});
|
|
1061
|
+
if (promptToolWireTools && ownedSyntax) {
|
|
1062
|
+
// Re-materialize in-band tool-call text as native toolCall content blocks
|
|
1063
|
+
// so the rest of the loop executes them unchanged. When the model starts
|
|
1064
|
+
// fabricating tool results, the abort callback cancels the provider — unless
|
|
1065
|
+
// `abortOnFabricatedToolResult` is false, in which case the stream drains and
|
|
1066
|
+
// the fabricated continuation is discarded without aborting.
|
|
1067
|
+
response = wrapInbandToolStream(
|
|
1068
|
+
response,
|
|
1069
|
+
promptToolWireTools,
|
|
1070
|
+
ownedSyntax,
|
|
1071
|
+
() => promptToolAbortController?.abort(),
|
|
1072
|
+
config.abortOnFabricatedToolResult ?? true,
|
|
1073
|
+
);
|
|
1074
|
+
}
|
|
996
1075
|
|
|
997
1076
|
let partialMessage: AssistantMessage | null = null;
|
|
998
1077
|
let addedPartial = false;
|
package/src/agent.ts
CHANGED
|
@@ -22,11 +22,12 @@ import {
|
|
|
22
22
|
type ToolChoice,
|
|
23
23
|
type ToolResultMessage,
|
|
24
24
|
} from "@oh-my-pi/pi-ai";
|
|
25
|
+
import type { ToolCallSyntax } from "@oh-my-pi/pi-ai/grammar";
|
|
26
|
+
import type { HarmonyAuditEvent } from "@oh-my-pi/pi-ai/utils/harmony-leak";
|
|
25
27
|
import { getBundledModel } from "@oh-my-pi/pi-catalog/models";
|
|
26
28
|
import { logger } from "@oh-my-pi/pi-utils";
|
|
27
29
|
import { abortReasonText, agentLoop, agentLoopContinue } from "./agent-loop";
|
|
28
30
|
import type { AppendOnlyContextManager } from "./append-only-context";
|
|
29
|
-
import type { HarmonyAuditEvent } from "./harmony-leak";
|
|
30
31
|
import type {
|
|
31
32
|
AgentContext,
|
|
32
33
|
AgentEvent,
|
|
@@ -220,6 +221,15 @@ export interface AgentOptions {
|
|
|
220
221
|
|
|
221
222
|
/** Enable intent tracing schema injection/stripping in the harness. */
|
|
222
223
|
intentTracing?: boolean;
|
|
224
|
+
/** Owned tool-calling syntax. Undefined keeps provider-native tool calling. */
|
|
225
|
+
toolCallSyntax?: ToolCallSyntax;
|
|
226
|
+
/**
|
|
227
|
+
* When owned tool calling is active and the model fabricates a tool result
|
|
228
|
+
* mid-turn: `true` (default) aborts the provider request immediately; `false`
|
|
229
|
+
* drains the request and discards the fabricated continuation. Forwarded to
|
|
230
|
+
* the loop's {@link AgentLoopConfig.abortOnFabricatedToolResult}.
|
|
231
|
+
*/
|
|
232
|
+
abortOnFabricatedToolResult?: boolean;
|
|
223
233
|
/** Dynamic tool choice override, resolved per LLM call. */
|
|
224
234
|
getToolChoice?: () => ToolChoice | undefined;
|
|
225
235
|
|
|
@@ -316,6 +326,8 @@ export class Agent {
|
|
|
316
326
|
#preferWebsockets?: boolean;
|
|
317
327
|
#transformToolCallArguments?: (args: Record<string, unknown>, toolName: string) => Record<string, unknown>;
|
|
318
328
|
#intentTracing: boolean;
|
|
329
|
+
#toolCallSyntax?: ToolCallSyntax;
|
|
330
|
+
#abortOnFabricatedToolResult?: boolean;
|
|
319
331
|
#getToolChoice?: () => ToolChoice | undefined;
|
|
320
332
|
#onPayload?: SimpleStreamOptions["onPayload"];
|
|
321
333
|
#onResponse?: SimpleStreamOptions["onResponse"];
|
|
@@ -378,6 +390,8 @@ export class Agent {
|
|
|
378
390
|
this.#preferWebsockets = opts.preferWebsockets;
|
|
379
391
|
this.#transformToolCallArguments = opts.transformToolCallArguments;
|
|
380
392
|
this.#intentTracing = opts.intentTracing === true;
|
|
393
|
+
this.#toolCallSyntax = opts.toolCallSyntax;
|
|
394
|
+
this.#abortOnFabricatedToolResult = opts.abortOnFabricatedToolResult;
|
|
381
395
|
this.#getToolChoice = opts.getToolChoice;
|
|
382
396
|
this.#onAssistantMessageEvent = opts.onAssistantMessageEvent;
|
|
383
397
|
this.#onHarmonyLeak = opts.onHarmonyLeak;
|
|
@@ -1023,6 +1037,8 @@ export class Agent {
|
|
|
1023
1037
|
cursorOnToolResult,
|
|
1024
1038
|
transformToolCallArguments: this.#transformToolCallArguments,
|
|
1025
1039
|
intentTracing: this.#intentTracing,
|
|
1040
|
+
toolCallSyntax: this.#toolCallSyntax,
|
|
1041
|
+
abortOnFabricatedToolResult: this.#abortOnFabricatedToolResult,
|
|
1026
1042
|
appendOnlyContext: this.#appendOnlyContext,
|
|
1027
1043
|
beforeToolCall: this.beforeToolCall ? (ctx, signal) => this.beforeToolCall?.(ctx, signal) : undefined,
|
|
1028
1044
|
afterToolCall: this.afterToolCall ? (ctx, signal) => this.afterToolCall?.(ctx, signal) : undefined,
|
|
@@ -15,6 +15,7 @@
|
|
|
15
15
|
*/
|
|
16
16
|
|
|
17
17
|
import type { Context, Message, Tool } from "@oh-my-pi/pi-ai";
|
|
18
|
+
import type { ToolCallSyntax } from "@oh-my-pi/pi-ai/grammar";
|
|
18
19
|
import { normalizeTools } from "./agent-loop";
|
|
19
20
|
import type { AgentContext } from "./types";
|
|
20
21
|
|
|
@@ -33,6 +34,7 @@ export interface StablePrefixSnapshot {
|
|
|
33
34
|
export interface BuildOptions {
|
|
34
35
|
/** Inject the `_i` intent field into tool schemas (must match agent-loop's normalizeTools). */
|
|
35
36
|
intentTracing: boolean;
|
|
37
|
+
exampleSyntax?: ToolCallSyntax;
|
|
36
38
|
}
|
|
37
39
|
|
|
38
40
|
/**
|
|
@@ -268,7 +270,7 @@ export class AppendOnlyContextManager {
|
|
|
268
270
|
|
|
269
271
|
function takeSnapshot(context: AgentContext, options: BuildOptions): StablePrefixSnapshot {
|
|
270
272
|
const systemPrompt = [...context.systemPrompt];
|
|
271
|
-
const tools = normalizeTools(context.tools, options.intentTracing) ?? [];
|
|
273
|
+
const tools = normalizeTools(context.tools, options.intentTracing, options.exampleSyntax) ?? [];
|
|
272
274
|
return {
|
|
273
275
|
systemPrompt,
|
|
274
276
|
tools,
|
|
@@ -288,6 +290,7 @@ function computeFingerprint(systemPrompt: string[], tools: Tool[], options: Buil
|
|
|
288
290
|
cw: t.customWireName,
|
|
289
291
|
})),
|
|
290
292
|
i: options.intentTracing,
|
|
293
|
+
ex: options.exampleSyntax,
|
|
291
294
|
});
|
|
292
295
|
let hash = 0;
|
|
293
296
|
for (let i = 0; i < payload.length; i++) {
|
package/src/index.ts
CHANGED
package/src/types.ts
CHANGED
|
@@ -17,8 +17,9 @@ import type {
|
|
|
17
17
|
ToolResultMessage,
|
|
18
18
|
TSchema,
|
|
19
19
|
} from "@oh-my-pi/pi-ai";
|
|
20
|
+
import type { ToolCallSyntax } from "@oh-my-pi/pi-ai/grammar";
|
|
21
|
+
import type { HarmonyAuditEvent } from "@oh-my-pi/pi-ai/utils/harmony-leak";
|
|
20
22
|
import type { AppendOnlyContextManager } from "./append-only-context";
|
|
21
|
-
import type { HarmonyAuditEvent } from "./harmony-leak";
|
|
22
23
|
import type { AgentRunCoverage, AgentRunSummary } from "./run-collector";
|
|
23
24
|
import type { AgentTelemetryConfig } from "./telemetry";
|
|
24
25
|
|
|
@@ -199,6 +200,27 @@ export interface AgentLoopConfig extends SimpleStreamOptions {
|
|
|
199
200
|
* then strips from arguments before executing tools.
|
|
200
201
|
*/
|
|
201
202
|
intentTracing?: boolean;
|
|
203
|
+
/**
|
|
204
|
+
* Owned tool calling syntax.
|
|
205
|
+
*
|
|
206
|
+
* Undefined keeps provider-native tool calling. A syntax value sends no
|
|
207
|
+
* native `tools`, forces `toolChoice` off, appends that syntax's tool catalog
|
|
208
|
+
* instructions, re-encodes prior tool calls/results as text, and parses the
|
|
209
|
+
* model's text output back into canonical `toolCall` blocks.
|
|
210
|
+
*/
|
|
211
|
+
toolCallSyntax?: ToolCallSyntax;
|
|
212
|
+
/**
|
|
213
|
+
* When owned (in-band) tool calling is active and the model starts
|
|
214
|
+
* fabricating a tool result inside its own turn, control how the loop reacts:
|
|
215
|
+
* - `true` (default): abort the provider request immediately so it stops
|
|
216
|
+
* generating the hallucinated continuation (cheaper, lower latency).
|
|
217
|
+
* - `false`: let the request finish and silently discard everything past the
|
|
218
|
+
* fabrication boundary (keeps the connection alive but pays for the tokens
|
|
219
|
+
* the model spends on the discarded tail).
|
|
220
|
+
* Only meaningful when {@link toolCallSyntax} (or `PI_OWNED_TOOLS`) selects an
|
|
221
|
+
* owned syntax; native tool calling never fabricates results in text.
|
|
222
|
+
*/
|
|
223
|
+
abortOnFabricatedToolResult?: boolean;
|
|
202
224
|
/**
|
|
203
225
|
* Append-only context mode — stabilizes system prompt + tool spec bytes
|
|
204
226
|
* across turns so provider prefix caches hit at maximum rate.
|
|
@@ -1,118 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* GPT-5 Harmony-header leakage detection and recovery.
|
|
3
|
-
*
|
|
4
|
-
* Background and policy: see `docs/ERRATA-GPT5-HARMONY.md`. This module
|
|
5
|
-
* implements §3 of that document: detection by signal fusion, plus a
|
|
6
|
-
* truncate-and-resume primitive for the `edit` tool when its input is in
|
|
7
|
-
* hashline DSL form. Other tools and surfaces fall through to
|
|
8
|
-
* abort-and-retry handled by the agent loop.
|
|
9
|
-
*/
|
|
10
|
-
import type { AssistantMessage, Model, ToolCall } from "@oh-my-pi/pi-ai";
|
|
11
|
-
declare const SIGNAL_ORDER: readonly ["M", "C", "G", "S", "B", "R", "T"];
|
|
12
|
-
export type HarmonySignalClass = "H" | (typeof SIGNAL_ORDER)[number];
|
|
13
|
-
export type HarmonySurface = "assistant_text" | "assistant_thinking" | "tool_arg";
|
|
14
|
-
export interface HarmonySignal {
|
|
15
|
-
classes: HarmonySignalClass[];
|
|
16
|
-
start: number;
|
|
17
|
-
end: number;
|
|
18
|
-
text: string;
|
|
19
|
-
}
|
|
20
|
-
export interface HarmonyDetection {
|
|
21
|
-
surface: HarmonySurface;
|
|
22
|
-
contentIndex?: number;
|
|
23
|
-
toolName?: string;
|
|
24
|
-
toolCallId?: string;
|
|
25
|
-
signals: HarmonySignal[];
|
|
26
|
-
}
|
|
27
|
-
export interface HarmonyAuditEvent {
|
|
28
|
-
action: "truncate_resume" | "abort_retry" | "escalated";
|
|
29
|
-
surface: HarmonySurface;
|
|
30
|
-
signal: string;
|
|
31
|
-
retryN: number;
|
|
32
|
-
model: string;
|
|
33
|
-
provider: string;
|
|
34
|
-
toolName?: string;
|
|
35
|
-
removedLen: number;
|
|
36
|
-
removedSha8: string;
|
|
37
|
-
removedPreview: string;
|
|
38
|
-
removedBlob?: string;
|
|
39
|
-
}
|
|
40
|
-
export interface HarmonyRecoveredToolCall {
|
|
41
|
-
message: AssistantMessage;
|
|
42
|
-
removed: string;
|
|
43
|
-
}
|
|
44
|
-
/**
|
|
45
|
-
* Whether to run leak detection on responses from this model. We default-on
|
|
46
|
-
* for every openai-codex model rather than enumerating ids, so a future
|
|
47
|
-
* gpt-5.6 (or whatever) doesn't silently bypass the mitigation. Detection
|
|
48
|
-
* itself is cheap; the cost of missing a leak on a new model is not.
|
|
49
|
-
*/
|
|
50
|
-
export declare function isHarmonyLeakMitigationTarget(model: Model): boolean;
|
|
51
|
-
export declare function signalListLabel(signals: readonly HarmonySignal[]): string;
|
|
52
|
-
/**
|
|
53
|
-
* Detect harmony-protocol leakage in `text`. Returns undefined if clean.
|
|
54
|
-
*
|
|
55
|
-
* Trip rule: `H` alone, or `M` paired with at least one co-signal
|
|
56
|
-
* (`C`/`G`/`S`/`B`/`R`/`T`). Bare `M` does not trip — this document, its
|
|
57
|
-
* tests, and bug reports legitimately carry the marker.
|
|
58
|
-
*
|
|
59
|
-
* The `tool_arg` surface is held to a stricter rule. A tool argument is
|
|
60
|
-
* arbitrary file/data content that can legitimately carry the marker, a
|
|
61
|
-
* channel word, harmony control tokens, or a non-Latin script run (editing
|
|
62
|
-
* these very fixtures does exactly that). The only robust leak signal there
|
|
63
|
-
* is content trailing the structurally-valid parse, so a `tool_arg` detection
|
|
64
|
-
* additionally requires the `T` co-signal. Absent a `parsedEnd` boundary `T`
|
|
65
|
-
* is never set, so `tool_arg` scanning stays inert and a legitimate codex tool
|
|
66
|
-
* call is never hard-aborted. `assistant_text`/`assistant_thinking` keep the
|
|
67
|
-
* base rule.
|
|
68
|
-
*
|
|
69
|
-
* `parsedEnd`, when supplied, marks the byte at which a structurally valid
|
|
70
|
-
* tool-argument parse ends; markers at or past it set the `T` co-signal.
|
|
71
|
-
* `contentIndex`/`toolName`/`toolCallId` flow through to the returned
|
|
72
|
-
* detection for downstream auditing.
|
|
73
|
-
*/
|
|
74
|
-
export declare function detectHarmonyLeak(text: string, surface: HarmonySurface, options?: {
|
|
75
|
-
parsedEnd?: number;
|
|
76
|
-
contentIndex?: number;
|
|
77
|
-
toolName?: string;
|
|
78
|
-
toolCallId?: string;
|
|
79
|
-
}): HarmonyDetection | undefined;
|
|
80
|
-
/**
|
|
81
|
-
* Scan an assistant message's content blocks; return the first detection.
|
|
82
|
-
*
|
|
83
|
-
* `toolArgParseEnd`, when supplied, resolves the byte offset at which a tool
|
|
84
|
-
* call's structurally-valid argument parse ends (the `T` co-signal in
|
|
85
|
-
* {@link detectHarmonyLeak}). Callers that can parse a tool's argument DSL pass
|
|
86
|
-
* it to enable `tool_arg` leak detection; omitting it keeps that surface inert
|
|
87
|
-
* — the safe default the agent loop relies on, since it cannot bound a streamed
|
|
88
|
-
* tool DSL and must never hard-abort a legitimate tool call.
|
|
89
|
-
*/
|
|
90
|
-
export declare function detectHarmonyLeakInAssistantMessage(message: AssistantMessage, toolArgParseEnd?: (toolCall: ToolCall) => number | undefined): HarmonyDetection | undefined;
|
|
91
|
-
/**
|
|
92
|
-
* Truncate a contaminated tool call at the start of the contaminated line and
|
|
93
|
-
* append the tool's recovery sentinel. Returns a recovered AssistantMessage
|
|
94
|
-
* (containing only the cleaned tool call), a synthetic continuation user
|
|
95
|
-
* message asking the model to re-issue the rest, and the removed substring
|
|
96
|
-
* for auditing. Returns undefined when the tool is not recovery-eligible or
|
|
97
|
-
* the truncation would leave nothing meaningful to dispatch.
|
|
98
|
-
*
|
|
99
|
-
* `providerPayload` is dropped from the recovered message: for Codex the
|
|
100
|
-
* encrypted reasoning blob is opaque/signed and we cannot validate that it is
|
|
101
|
-
* uncontaminated. The model re-reasons on the next turn.
|
|
102
|
-
*/
|
|
103
|
-
export declare function recoverHarmonyToolCall(message: AssistantMessage, detection: HarmonyDetection): HarmonyRecoveredToolCall | undefined;
|
|
104
|
-
/**
|
|
105
|
-
* Return the contaminated substring from `message` for audit purposes when
|
|
106
|
-
* recovery is not applicable (abort path). Walks from the first detected
|
|
107
|
-
* signal to end-of-content within the relevant block. Returns "" if the
|
|
108
|
-
* detection cannot be resolved against the message.
|
|
109
|
-
*/
|
|
110
|
-
export declare function extractHarmonyRemoved(message: AssistantMessage, detection: HarmonyDetection): string;
|
|
111
|
-
export declare function createHarmonyAuditEvent(params: {
|
|
112
|
-
action: HarmonyAuditEvent["action"];
|
|
113
|
-
detection: HarmonyDetection;
|
|
114
|
-
model: Model;
|
|
115
|
-
retryN: number;
|
|
116
|
-
removed: string;
|
|
117
|
-
}): HarmonyAuditEvent;
|
|
118
|
-
export {};
|
package/src/harmony-leak.ts
DELETED
|
@@ -1,456 +0,0 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* GPT-5 Harmony-header leakage detection and recovery.
|
|
3
|
-
*
|
|
4
|
-
* Background and policy: see `docs/ERRATA-GPT5-HARMONY.md`. This module
|
|
5
|
-
* implements §3 of that document: detection by signal fusion, plus a
|
|
6
|
-
* truncate-and-resume primitive for the `edit` tool when its input is in
|
|
7
|
-
* hashline DSL form. Other tools and surfaces fall through to
|
|
8
|
-
* abort-and-retry handled by the agent loop.
|
|
9
|
-
*/
|
|
10
|
-
import type { AssistantMessage, Model, ToolCall } from "@oh-my-pi/pi-ai";
|
|
11
|
-
|
|
12
|
-
// Single source of truth for the marker pattern. `M` in the errata.
|
|
13
|
-
// Use a fresh non-global instance for `.test()` to avoid lastIndex pitfalls.
|
|
14
|
-
const MARKER_RE = /\bto=functions\.[A-Za-z_]\w*/g;
|
|
15
|
-
const HARMONY_RE = /<\|(start|end|channel|message|call|return)\|>/g;
|
|
16
|
-
|
|
17
|
-
// Channel-word adjacency (`C`): channel/role name appearing immediately before the marker.
|
|
18
|
-
const CHANNEL_WORD_RE = /\b(?:analysis|commentary|assistant|user|system|developer|tool)\s+to=functions\./;
|
|
19
|
-
|
|
20
|
-
// Glitch-token adjacency (`G`). The Japgolly literal is escaped so this regex
|
|
21
|
-
// source itself does not trip detection if the file is scanned (e.g. when
|
|
22
|
-
// editing this module via the same agent that detects).
|
|
23
|
-
const GLITCH_RE = /\b(?:changedFiles|RTLU|Jsii(?:_commentary)?|\x4aapgolly)\b/;
|
|
24
|
-
|
|
25
|
-
// Body-channel cascade (`B`): marker followed by ` code` then another marker
|
|
26
|
-
// within 200 chars. Single regex; no manual slicing needed.
|
|
27
|
-
const BODY_CASCADE_RE = /to=functions\.\w+\s+code\b[\s\S]{0,200}?to=functions\./;
|
|
28
|
-
|
|
29
|
-
// Fake-result framing (`R`): marker followed within 80 chars by Cell N: framing.
|
|
30
|
-
const FAKE_RESULT_RE = /to=functions\.\w+[\s\S]{0,80}?code_output\s*\nCell\s+\d+:/;
|
|
31
|
-
|
|
32
|
-
const FENCE_RE = /^\s*(?:```+|~~~+)/;
|
|
33
|
-
|
|
34
|
-
// Non-Latin scripts seen in the corpus: CJK + ext, Cyrillic, Thai, Georgian,
|
|
35
|
-
// Armenian, Kannada, Telugu, Devanagari, Arabic, Malayalam.
|
|
36
|
-
const SCRIPT_CLASS =
|
|
37
|
-
"\u3400-\u4DBF\u4E00-\u9FFF\uF900-\uFAFF\u0400-\u04FF\u0E00-\u0E7F\u10A0-\u10FF\u0530-\u058F\u0C80-\u0CFF\u0C00-\u0C7F\u0900-\u097F\u0600-\u06FF\u0D00-\u0D7F";
|
|
38
|
-
const SCRIPT_RUN_RE = new RegExp(`[${SCRIPT_CLASS}]{2,}`, "u");
|
|
39
|
-
|
|
40
|
-
// Recovery registry. Each entry's parser must recognize the configured
|
|
41
|
-
// sentinel (per-tool, see eval/parse.ts and hashline/executor.ts) and surface
|
|
42
|
-
// a warning to the model so it knows to re-issue any remaining work.
|
|
43
|
-
// `accepts` gates on input shape: tools whose contaminated input doesn't
|
|
44
|
-
// match the parser's expected DSL fall through to abort-and-retry.
|
|
45
|
-
//
|
|
46
|
-
// • `edit`: hashline DSL input begins with `@<path>`. Apply_patch envelopes
|
|
47
|
-
// (`*** Begin Patch …`) and JSON-schema variants are not recoverable —
|
|
48
|
-
// their parsers don't recognize `*** Abort`.
|
|
49
|
-
// • `eval`: any string is a parseable cell sequence (the parser is lenient
|
|
50
|
-
// and falls back to implicit-cell mode on bare strings).
|
|
51
|
-
interface RecoveryConfig {
|
|
52
|
-
sentinel: string;
|
|
53
|
-
accepts: (input: string) => boolean;
|
|
54
|
-
}
|
|
55
|
-
const RECOVERY_REGISTRY: Record<string, RecoveryConfig> = {
|
|
56
|
-
edit: {
|
|
57
|
-
sentinel: "\n*** Abort\n",
|
|
58
|
-
accepts: input => input.replace(/^\s+/, "").startsWith("@"),
|
|
59
|
-
},
|
|
60
|
-
eval: {
|
|
61
|
-
sentinel: "\n*** Abort\n",
|
|
62
|
-
accepts: () => true,
|
|
63
|
-
},
|
|
64
|
-
};
|
|
65
|
-
|
|
66
|
-
const SIGNAL_ORDER = ["M", "C", "G", "S", "B", "R", "T"] as const;
|
|
67
|
-
|
|
68
|
-
export type HarmonySignalClass = "H" | (typeof SIGNAL_ORDER)[number];
|
|
69
|
-
|
|
70
|
-
export type HarmonySurface = "assistant_text" | "assistant_thinking" | "tool_arg";
|
|
71
|
-
|
|
72
|
-
export interface HarmonySignal {
|
|
73
|
-
classes: HarmonySignalClass[];
|
|
74
|
-
start: number;
|
|
75
|
-
end: number;
|
|
76
|
-
text: string;
|
|
77
|
-
}
|
|
78
|
-
|
|
79
|
-
export interface HarmonyDetection {
|
|
80
|
-
surface: HarmonySurface;
|
|
81
|
-
contentIndex?: number;
|
|
82
|
-
toolName?: string;
|
|
83
|
-
toolCallId?: string;
|
|
84
|
-
signals: HarmonySignal[];
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
export interface HarmonyAuditEvent {
|
|
88
|
-
action: "truncate_resume" | "abort_retry" | "escalated";
|
|
89
|
-
surface: HarmonySurface;
|
|
90
|
-
signal: string;
|
|
91
|
-
retryN: number;
|
|
92
|
-
model: string;
|
|
93
|
-
provider: string;
|
|
94
|
-
toolName?: string;
|
|
95
|
-
removedLen: number;
|
|
96
|
-
removedSha8: string;
|
|
97
|
-
removedPreview: string;
|
|
98
|
-
removedBlob?: string;
|
|
99
|
-
}
|
|
100
|
-
|
|
101
|
-
export interface HarmonyRecoveredToolCall {
|
|
102
|
-
message: AssistantMessage;
|
|
103
|
-
removed: string;
|
|
104
|
-
}
|
|
105
|
-
|
|
106
|
-
/**
|
|
107
|
-
* Whether to run leak detection on responses from this model. We default-on
|
|
108
|
-
* for every openai-codex model rather than enumerating ids, so a future
|
|
109
|
-
* gpt-5.6 (or whatever) doesn't silently bypass the mitigation. Detection
|
|
110
|
-
* itself is cheap; the cost of missing a leak on a new model is not.
|
|
111
|
-
*/
|
|
112
|
-
export function isHarmonyLeakMitigationTarget(model: Model): boolean {
|
|
113
|
-
return model.provider === "openai-codex";
|
|
114
|
-
}
|
|
115
|
-
|
|
116
|
-
export function signalListLabel(signals: readonly HarmonySignal[]): string {
|
|
117
|
-
const seen: string[] = [];
|
|
118
|
-
for (const signal of signals) {
|
|
119
|
-
const label = signal.classes.join("+");
|
|
120
|
-
if (!seen.includes(label)) seen.push(label);
|
|
121
|
-
}
|
|
122
|
-
return seen.join(",") || "none";
|
|
123
|
-
}
|
|
124
|
-
|
|
125
|
-
/**
|
|
126
|
-
* Detect harmony-protocol leakage in `text`. Returns undefined if clean.
|
|
127
|
-
*
|
|
128
|
-
* Trip rule: `H` alone, or `M` paired with at least one co-signal
|
|
129
|
-
* (`C`/`G`/`S`/`B`/`R`/`T`). Bare `M` does not trip — this document, its
|
|
130
|
-
* tests, and bug reports legitimately carry the marker.
|
|
131
|
-
*
|
|
132
|
-
* The `tool_arg` surface is held to a stricter rule. A tool argument is
|
|
133
|
-
* arbitrary file/data content that can legitimately carry the marker, a
|
|
134
|
-
* channel word, harmony control tokens, or a non-Latin script run (editing
|
|
135
|
-
* these very fixtures does exactly that). The only robust leak signal there
|
|
136
|
-
* is content trailing the structurally-valid parse, so a `tool_arg` detection
|
|
137
|
-
* additionally requires the `T` co-signal. Absent a `parsedEnd` boundary `T`
|
|
138
|
-
* is never set, so `tool_arg` scanning stays inert and a legitimate codex tool
|
|
139
|
-
* call is never hard-aborted. `assistant_text`/`assistant_thinking` keep the
|
|
140
|
-
* base rule.
|
|
141
|
-
*
|
|
142
|
-
* `parsedEnd`, when supplied, marks the byte at which a structurally valid
|
|
143
|
-
* tool-argument parse ends; markers at or past it set the `T` co-signal.
|
|
144
|
-
* `contentIndex`/`toolName`/`toolCallId` flow through to the returned
|
|
145
|
-
* detection for downstream auditing.
|
|
146
|
-
*/
|
|
147
|
-
export function detectHarmonyLeak(
|
|
148
|
-
text: string,
|
|
149
|
-
surface: HarmonySurface,
|
|
150
|
-
options: {
|
|
151
|
-
parsedEnd?: number;
|
|
152
|
-
contentIndex?: number;
|
|
153
|
-
toolName?: string;
|
|
154
|
-
toolCallId?: string;
|
|
155
|
-
} = {},
|
|
156
|
-
): HarmonyDetection | undefined {
|
|
157
|
-
const fences = computeFenceRanges(text);
|
|
158
|
-
const signals: HarmonySignal[] = [];
|
|
159
|
-
|
|
160
|
-
for (const match of text.matchAll(HARMONY_RE)) {
|
|
161
|
-
const start = match.index ?? 0;
|
|
162
|
-
if (isInsideFence(fences, start)) continue;
|
|
163
|
-
signals.push(makeSignal(["H"], start, start + match[0].length, match[0]));
|
|
164
|
-
}
|
|
165
|
-
|
|
166
|
-
for (const match of text.matchAll(MARKER_RE)) {
|
|
167
|
-
const start = match.index ?? 0;
|
|
168
|
-
if (isInsideFence(fences, start)) continue;
|
|
169
|
-
const end = start + match[0].length;
|
|
170
|
-
const classes: HarmonySignalClass[] = ["M"];
|
|
171
|
-
|
|
172
|
-
const adjacent = text.slice(Math.max(0, start - 64), Math.min(text.length, end + 16));
|
|
173
|
-
const near = text.slice(Math.max(0, start - 16), Math.min(text.length, end + 16));
|
|
174
|
-
const forward = text.slice(start, Math.min(text.length, start + 240));
|
|
175
|
-
|
|
176
|
-
if (CHANNEL_WORD_RE.test(adjacent)) classes.push("C");
|
|
177
|
-
if (GLITCH_RE.test(near)) classes.push("G");
|
|
178
|
-
if (hasScriptMismatchNear(text, start, end)) classes.push("S");
|
|
179
|
-
if (BODY_CASCADE_RE.test(forward)) classes.push("B");
|
|
180
|
-
if (FAKE_RESULT_RE.test(forward)) classes.push("R");
|
|
181
|
-
if (options.parsedEnd !== undefined && start >= options.parsedEnd) classes.push("T");
|
|
182
|
-
|
|
183
|
-
// `M` alone never trips: legitimate documentation/tests carry it.
|
|
184
|
-
if (classes.length > 1) {
|
|
185
|
-
signals.push(makeSignal(classes, start, end, match[0]));
|
|
186
|
-
}
|
|
187
|
-
}
|
|
188
|
-
|
|
189
|
-
if (signals.length === 0) return undefined;
|
|
190
|
-
// Tool arguments are data: they can legitimately embed the marker, a channel
|
|
191
|
-
// word, harmony control tokens, or a non-Latin script run. Only a marker
|
|
192
|
-
// trailing the structurally-valid parse (`T`) is a reliable leak signal, so
|
|
193
|
-
// refuse to trip a `tool_arg` detection without it. Without a `parsedEnd`
|
|
194
|
-
// boundary `T` is never set and the surface stays inert.
|
|
195
|
-
if (surface === "tool_arg" && !signals.some(s => s.classes.includes("T"))) return undefined;
|
|
196
|
-
signals.sort((a, b) => a.start - b.start || a.end - b.end);
|
|
197
|
-
return {
|
|
198
|
-
surface,
|
|
199
|
-
contentIndex: options.contentIndex,
|
|
200
|
-
toolName: options.toolName,
|
|
201
|
-
toolCallId: options.toolCallId,
|
|
202
|
-
signals,
|
|
203
|
-
};
|
|
204
|
-
}
|
|
205
|
-
|
|
206
|
-
/**
|
|
207
|
-
* Scan an assistant message's content blocks; return the first detection.
|
|
208
|
-
*
|
|
209
|
-
* `toolArgParseEnd`, when supplied, resolves the byte offset at which a tool
|
|
210
|
-
* call's structurally-valid argument parse ends (the `T` co-signal in
|
|
211
|
-
* {@link detectHarmonyLeak}). Callers that can parse a tool's argument DSL pass
|
|
212
|
-
* it to enable `tool_arg` leak detection; omitting it keeps that surface inert
|
|
213
|
-
* — the safe default the agent loop relies on, since it cannot bound a streamed
|
|
214
|
-
* tool DSL and must never hard-abort a legitimate tool call.
|
|
215
|
-
*/
|
|
216
|
-
export function detectHarmonyLeakInAssistantMessage(
|
|
217
|
-
message: AssistantMessage,
|
|
218
|
-
toolArgParseEnd?: (toolCall: ToolCall) => number | undefined,
|
|
219
|
-
): HarmonyDetection | undefined {
|
|
220
|
-
for (let i = 0; i < message.content.length; i++) {
|
|
221
|
-
const block = message.content[i];
|
|
222
|
-
if (block.type === "text") {
|
|
223
|
-
const d = detectHarmonyLeak(block.text, "assistant_text", { contentIndex: i });
|
|
224
|
-
if (d) return d;
|
|
225
|
-
} else if (block.type === "thinking") {
|
|
226
|
-
const d = detectHarmonyLeak(block.thinking, "assistant_thinking", { contentIndex: i });
|
|
227
|
-
if (d) return d;
|
|
228
|
-
} else if (block.type === "toolCall") {
|
|
229
|
-
const argText = getToolArgumentText(block);
|
|
230
|
-
if (argText !== undefined) {
|
|
231
|
-
const d = detectHarmonyLeak(argText, "tool_arg", {
|
|
232
|
-
contentIndex: i,
|
|
233
|
-
toolName: block.name,
|
|
234
|
-
toolCallId: block.id,
|
|
235
|
-
parsedEnd: toolArgParseEnd?.(block),
|
|
236
|
-
});
|
|
237
|
-
if (d) return d;
|
|
238
|
-
}
|
|
239
|
-
}
|
|
240
|
-
}
|
|
241
|
-
return undefined;
|
|
242
|
-
}
|
|
243
|
-
|
|
244
|
-
/**
|
|
245
|
-
* Truncate a contaminated tool call at the start of the contaminated line and
|
|
246
|
-
* append the tool's recovery sentinel. Returns a recovered AssistantMessage
|
|
247
|
-
* (containing only the cleaned tool call), a synthetic continuation user
|
|
248
|
-
* message asking the model to re-issue the rest, and the removed substring
|
|
249
|
-
* for auditing. Returns undefined when the tool is not recovery-eligible or
|
|
250
|
-
* the truncation would leave nothing meaningful to dispatch.
|
|
251
|
-
*
|
|
252
|
-
* `providerPayload` is dropped from the recovered message: for Codex the
|
|
253
|
-
* encrypted reasoning blob is opaque/signed and we cannot validate that it is
|
|
254
|
-
* uncontaminated. The model re-reasons on the next turn.
|
|
255
|
-
*/
|
|
256
|
-
export function recoverHarmonyToolCall(
|
|
257
|
-
message: AssistantMessage,
|
|
258
|
-
detection: HarmonyDetection,
|
|
259
|
-
): HarmonyRecoveredToolCall | undefined {
|
|
260
|
-
if (detection.surface !== "tool_arg" || detection.contentIndex === undefined) return undefined;
|
|
261
|
-
const block = message.content[detection.contentIndex];
|
|
262
|
-
if (block?.type !== "toolCall") return undefined;
|
|
263
|
-
|
|
264
|
-
const config = RECOVERY_REGISTRY[block.name];
|
|
265
|
-
if (!config) return undefined;
|
|
266
|
-
|
|
267
|
-
const input = block.arguments?.input;
|
|
268
|
-
if (typeof input !== "string") return undefined;
|
|
269
|
-
if (!config.accepts(input)) return undefined;
|
|
270
|
-
|
|
271
|
-
const offset = detection.signals[0]?.start;
|
|
272
|
-
if (offset === undefined) return undefined;
|
|
273
|
-
|
|
274
|
-
const truncated = truncateAtLineAndAppendSentinel(input, offset, config.sentinel);
|
|
275
|
-
if (truncated === undefined) return undefined;
|
|
276
|
-
|
|
277
|
-
const cleanToolCall: ToolCall = {
|
|
278
|
-
...block,
|
|
279
|
-
arguments: { ...block.arguments, input: truncated.clean },
|
|
280
|
-
};
|
|
281
|
-
const cleanMessage: AssistantMessage = {
|
|
282
|
-
...message,
|
|
283
|
-
content: [cleanToolCall],
|
|
284
|
-
// Drop encrypted reasoning blob: opaque, possibly carries the leak forward.
|
|
285
|
-
providerPayload: undefined,
|
|
286
|
-
stopReason: "toolUse",
|
|
287
|
-
errorMessage: undefined,
|
|
288
|
-
};
|
|
289
|
-
return { message: cleanMessage, removed: truncated.removed };
|
|
290
|
-
}
|
|
291
|
-
|
|
292
|
-
/**
|
|
293
|
-
* Return the contaminated substring from `message` for audit purposes when
|
|
294
|
-
* recovery is not applicable (abort path). Walks from the first detected
|
|
295
|
-
* signal to end-of-content within the relevant block. Returns "" if the
|
|
296
|
-
* detection cannot be resolved against the message.
|
|
297
|
-
*/
|
|
298
|
-
export function extractHarmonyRemoved(message: AssistantMessage, detection: HarmonyDetection): string {
|
|
299
|
-
if (detection.contentIndex === undefined) return "";
|
|
300
|
-
const block = message.content[detection.contentIndex];
|
|
301
|
-
if (!block) return "";
|
|
302
|
-
const start = detection.signals[0]?.start ?? 0;
|
|
303
|
-
if (block.type === "text") return block.text.slice(start);
|
|
304
|
-
if (block.type === "thinking") return block.thinking.slice(start);
|
|
305
|
-
if (block.type === "toolCall") {
|
|
306
|
-
const text = getToolArgumentText(block);
|
|
307
|
-
return text ? text.slice(start) : "";
|
|
308
|
-
}
|
|
309
|
-
return "";
|
|
310
|
-
}
|
|
311
|
-
|
|
312
|
-
export function createHarmonyAuditEvent(params: {
|
|
313
|
-
action: HarmonyAuditEvent["action"];
|
|
314
|
-
detection: HarmonyDetection;
|
|
315
|
-
model: Model;
|
|
316
|
-
retryN: number;
|
|
317
|
-
removed: string;
|
|
318
|
-
}): HarmonyAuditEvent {
|
|
319
|
-
return {
|
|
320
|
-
action: params.action,
|
|
321
|
-
surface: params.detection.surface,
|
|
322
|
-
signal: signalListLabel(params.detection.signals),
|
|
323
|
-
retryN: params.retryN,
|
|
324
|
-
model: params.model.id,
|
|
325
|
-
provider: params.model.provider,
|
|
326
|
-
toolName: params.detection.toolName,
|
|
327
|
-
removedLen: params.removed.length,
|
|
328
|
-
removedSha8: sha8(params.removed),
|
|
329
|
-
removedPreview: redactedJunkPreview(params.removed),
|
|
330
|
-
removedBlob: Bun.env.OMP_HARMONY_DEBUG === "1" ? params.removed : undefined,
|
|
331
|
-
};
|
|
332
|
-
}
|
|
333
|
-
|
|
334
|
-
// ─── internals ──────────────────────────────────────────────────────────────
|
|
335
|
-
|
|
336
|
-
function makeSignal(classes: HarmonySignalClass[], start: number, end: number, text: string): HarmonySignal {
|
|
337
|
-
if (classes[0] === "H") return { classes: ["H"], start, end, text };
|
|
338
|
-
const sorted: HarmonySignalClass[] = [];
|
|
339
|
-
for (const cls of SIGNAL_ORDER) {
|
|
340
|
-
if (classes.includes(cls)) sorted.push(cls);
|
|
341
|
-
}
|
|
342
|
-
return { classes: sorted, start, end, text };
|
|
343
|
-
}
|
|
344
|
-
|
|
345
|
-
/**
|
|
346
|
-
* Precompute fenced-code-block ranges once per text. Each range is a
|
|
347
|
-
* [start, end) span of bytes inside any ```/~~~ fence. O(n) once instead of
|
|
348
|
-
* O(n) per detected match.
|
|
349
|
-
*/
|
|
350
|
-
function computeFenceRanges(text: string): Array<[number, number]> {
|
|
351
|
-
const ranges: Array<[number, number]> = [];
|
|
352
|
-
let inFence = false;
|
|
353
|
-
let fenceStart = 0;
|
|
354
|
-
let lineStart = 0;
|
|
355
|
-
while (lineStart <= text.length) {
|
|
356
|
-
const newline = text.indexOf("\n", lineStart);
|
|
357
|
-
const lineEnd = newline === -1 ? text.length : newline;
|
|
358
|
-
const line = text.slice(lineStart, lineEnd);
|
|
359
|
-
if (FENCE_RE.test(line)) {
|
|
360
|
-
if (inFence) {
|
|
361
|
-
ranges.push([fenceStart, lineEnd]);
|
|
362
|
-
inFence = false;
|
|
363
|
-
} else {
|
|
364
|
-
fenceStart = lineStart;
|
|
365
|
-
inFence = true;
|
|
366
|
-
}
|
|
367
|
-
}
|
|
368
|
-
if (newline === -1) break;
|
|
369
|
-
lineStart = newline + 1;
|
|
370
|
-
}
|
|
371
|
-
if (inFence) ranges.push([fenceStart, text.length]);
|
|
372
|
-
return ranges;
|
|
373
|
-
}
|
|
374
|
-
|
|
375
|
-
function isInsideFence(ranges: Array<[number, number]>, position: number): boolean {
|
|
376
|
-
for (const [start, end] of ranges) {
|
|
377
|
-
if (position >= start && position < end) return true;
|
|
378
|
-
if (start > position) break;
|
|
379
|
-
}
|
|
380
|
-
return false;
|
|
381
|
-
}
|
|
382
|
-
|
|
383
|
-
function hasScriptMismatchNear(text: string, start: number, end: number): boolean {
|
|
384
|
-
const near = text.slice(Math.max(0, start - 32), Math.min(text.length, end + 32));
|
|
385
|
-
if (!SCRIPT_RUN_RE.test(near)) return false;
|
|
386
|
-
const surrounding = text.slice(Math.max(0, start - 200), Math.min(text.length, end + 200));
|
|
387
|
-
if (surrounding.length === 0) return false;
|
|
388
|
-
let ascii = 0;
|
|
389
|
-
for (let i = 0; i < surrounding.length; i++) {
|
|
390
|
-
if (surrounding.charCodeAt(i) < 128) ascii++;
|
|
391
|
-
}
|
|
392
|
-
return ascii / surrounding.length >= 0.85;
|
|
393
|
-
}
|
|
394
|
-
|
|
395
|
-
/**
|
|
396
|
-
* Tool-call argument text used for detection scanning. For tools whose args
|
|
397
|
-
* include a free-form `input` string we scan that directly so reported byte
|
|
398
|
-
* offsets line up with the original. For everything else we fall back to a
|
|
399
|
-
* JSON-stringified blob so detection still fires; that path's offsets are
|
|
400
|
-
* NOT meaningful for slicing the original args, but the recovery path gates
|
|
401
|
-
* on `block.arguments.input` being a string and only ever slices that.
|
|
402
|
-
*/
|
|
403
|
-
function getToolArgumentText(toolCall: ToolCall): string | undefined {
|
|
404
|
-
if (typeof toolCall.arguments?.input === "string") return toolCall.arguments.input;
|
|
405
|
-
try {
|
|
406
|
-
return JSON.stringify(toolCall.arguments);
|
|
407
|
-
} catch {
|
|
408
|
-
return undefined;
|
|
409
|
-
}
|
|
410
|
-
}
|
|
411
|
-
|
|
412
|
-
function truncateAtLineAndAppendSentinel(
|
|
413
|
-
input: string,
|
|
414
|
-
offset: number,
|
|
415
|
-
sentinel: string,
|
|
416
|
-
): { clean: string; removed: string } | undefined {
|
|
417
|
-
const lineStart = offset <= 0 ? 0 : input.lastIndexOf("\n", offset - 1) + 1;
|
|
418
|
-
if (lineStart === 0) return undefined; // would cut everything
|
|
419
|
-
const head = input.slice(0, lineStart).replace(/\s+$/, "");
|
|
420
|
-
if (head.length === 0) return undefined;
|
|
421
|
-
return {
|
|
422
|
-
clean: head + sentinel,
|
|
423
|
-
removed: input.slice(lineStart),
|
|
424
|
-
};
|
|
425
|
-
}
|
|
426
|
-
|
|
427
|
-
function sha8(text: string): string {
|
|
428
|
-
return Bun.sha(text, "hex").slice(0, 8);
|
|
429
|
-
}
|
|
430
|
-
|
|
431
|
-
const PREVIEW_KEEP_RE = new RegExp(`[${SCRIPT_CLASS}\\s】【”“…」「、。]`, "u");
|
|
432
|
-
const PREVIEW_TOKEN_RE =
|
|
433
|
-
/^(?:to=functions\.[A-Za-z_]\w*|analysis|commentary|assistant|user|system|developer|tool|changedFiles|RTLU|Jsii(?:_commentary)?|\x4aapgolly)/;
|
|
434
|
-
|
|
435
|
-
/**
|
|
436
|
-
* Privacy-safe preview for the audit log: keeps marker/channel/glitch tokens,
|
|
437
|
-
* non-Latin script chars, and CJK punctuation; replaces everything else
|
|
438
|
-
* (potential source/secrets) with `·`. Sufficient to grow the glitch-token
|
|
439
|
-
* denylist from logs without exposing source content. Capped at 64 chars.
|
|
440
|
-
*/
|
|
441
|
-
function redactedJunkPreview(text: string): string {
|
|
442
|
-
const source = text.slice(0, 64);
|
|
443
|
-
let out = "";
|
|
444
|
-
for (let i = 0; i < source.length; ) {
|
|
445
|
-
const tok = PREVIEW_TOKEN_RE.exec(source.slice(i));
|
|
446
|
-
if (tok) {
|
|
447
|
-
out += tok[0];
|
|
448
|
-
i += tok[0].length;
|
|
449
|
-
continue;
|
|
450
|
-
}
|
|
451
|
-
const ch = source[i] ?? "";
|
|
452
|
-
out += PREVIEW_KEEP_RE.test(ch) ? ch : "·";
|
|
453
|
-
i++;
|
|
454
|
-
}
|
|
455
|
-
return out;
|
|
456
|
-
}
|