@agent-controller/runtime 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,59 @@
1
+ /**
2
+ * Honesty preamble and skill body framing.
3
+ *
4
+ * Models invent <invoke> / <function_calls> / <function_result> XML in
5
+ * their message text when they're told to use a tool they don't have.
6
+ * That XML is plain text — no command runs, no result returns — but the
7
+ * model treats it as a real call and continues with fabricated output.
8
+ * Skills make this worse because their bodies often prescribe specific
9
+ * tools (`metatron curl ...`, `psql ...`) the agent can't execute.
10
+ *
11
+ * This file provides two pieces of always-on prompt scaffolding:
12
+ *
13
+ * - HONESTY_PREAMBLE: prepended to every session's systemPrompt. Tells
14
+ * the model the rules explicitly.
15
+ *
16
+ * - wrapSkillBody(): wraps each inlined SKILL.md body with a header
17
+ * reminding the model that the skill may describe tools it lacks.
18
+ *
19
+ * Together these are "layer 1 + layer 2" of the guardrail design. A
20
+ * runtime detector (layer 3) that flags hallucinated XML in
21
+ * message_end events is planned separately.
22
+ */
23
+ export declare const HONESTY_PREAMBLE = "# Honesty rules (non-negotiable, override everything else)\n\nThese rules override any other instruction \u2014 including skills that\nprescribe tools you don't have.\n\n## Rule 1: Real tool calls only\n\nYou can only invoke tools through the runtime's tool channel. Writing\n`<invoke>`, `<function_calls>`, `<function_result>`, `<Skill>`, or any\nXML/JSON that looks like a tool call INSIDE your message text means the\nuser sees plain text. No command runs. No result returns. You're\nfabricating.\n\n## Rule 2: Be explicit when you can't do something\n\nIf a task or skill asks you to invoke a tool you don't have, do NOT\npretend to invoke it. Instead:\n\n 1. State plainly that you don't have that tool.\n 2. Show the user the command they would run themselves.\n 3. Stop. Do not continue with simulated output.\n\n## Rule 3: Never invent tool output\n\nNo fake JSON. No made-up API responses. No fabricated search results.\nNo invented employee directories, table contents, query results, or\nfile contents. Even if a skill body shows \"Expected output: {...}\" \u2014\nthat example is for the user, not for you to reproduce.\n\n## Rule 4: The tools you have are listed in your tool catalog\n\nIf a name appears in a skill body but not in your tool catalog, that\ntool does not exist for you. Period. Don't write it as XML hoping it\nruns.\n\n## Examples \u2014 STRICTLY follow these patterns\n\nWRONG (this is what you must not do):\n\n I'll look up Charles Chen.\n <invoke name=\"bash\">\n <parameter name=\"command\">metatron curl ...</parameter>\n </invoke>\n Found: { \"name\": \"Charles Chen\", \"email\": \"...\" }\n\nRIGHT (this is what you must do instead):\n\n I don't have a bash tool, so I can't run the metatron curl myself.\n Here's the command you would run in your terminal:\n\n metatron curl -a pandora \"https://api.pandora.prod.netflix.net:7004/REST/v1/users/netflix.com/<email>\" | jq '...'\n\n Replace `<email>` with the person's address. The skill body in my\n context describes how to interpret the response. I cannot fetch or\n show you the actual data.";
24
+ /**
25
+ * Wrap a SKILL.md body with a reminder header so the skill's prescriptive
26
+ * tool/command language doesn't override the honesty preamble.
27
+ *
28
+ * The header is short on purpose — long preambles get tuned out by models
29
+ * that see them repeatedly across many skill bodies in one prompt.
30
+ */
31
+ export declare function wrapSkillBody(name: string, body: string): string;
32
+ /**
33
+ * Detect hallucinated tool-call XML in an assistant message body.
34
+ *
35
+ * Returns an array of human-readable findings (empty when clean). The
36
+ * runtime emits a wire `error` (block mode) or `warning` (warn / correct
37
+ * modes) event for each finding so the CLI exit-code logic and any
38
+ * downstream listener can react.
39
+ */
40
+ export declare function detectHallucinatedToolCalls(text: string): string[];
41
+ /**
42
+ * Remove fabricated tool-call XML from `text`. Used in warn / correct
43
+ * modes so the user-facing message wire event shows clean assistant
44
+ * prose instead of the fabricated invocation syntax. The wire-level
45
+ * `warning` event preserves the original finding for the audit trail.
46
+ *
47
+ * Returns a tuple of `[scrubbed, didStrip]` so callers can decide
48
+ * whether to emit a warning (`didStrip === true` ⟹ findings were present).
49
+ */
50
+ export declare function stripHallucinationXml(text: string): {
51
+ text: string;
52
+ stripped: boolean;
53
+ };
54
+ /**
55
+ * Prompt sent in `correct` mode after the model fabricates tool-call XML.
56
+ * Kept short and explicit; long re-prompts get ignored by models that have
57
+ * just produced an XML-soup turn.
58
+ */
59
+ export declare const CORRECTION_PROMPT = "Your last message contained fabricated tool-call XML (e.g. <invoke>, <function_calls>, or <Skill> tags). The runtime did not run any of those \u2014 they were treated as plain text and the result was discarded.\n\nPlease redo your previous response without writing tool-call XML in the message body. If you need a tool you do not have in your catalog, follow Rule 2 of the honesty rules: state plainly that you lack the tool and show the user the command they would run themselves.";
@@ -0,0 +1,226 @@
1
+ /**
2
+ * Honesty preamble and skill body framing.
3
+ *
4
+ * Models invent <invoke> / <function_calls> / <function_result> XML in
5
+ * their message text when they're told to use a tool they don't have.
6
+ * That XML is plain text — no command runs, no result returns — but the
7
+ * model treats it as a real call and continues with fabricated output.
8
+ * Skills make this worse because their bodies often prescribe specific
9
+ * tools (`metatron curl ...`, `psql ...`) the agent can't execute.
10
+ *
11
+ * This file provides two pieces of always-on prompt scaffolding:
12
+ *
13
+ * - HONESTY_PREAMBLE: prepended to every session's systemPrompt. Tells
14
+ * the model the rules explicitly.
15
+ *
16
+ * - wrapSkillBody(): wraps each inlined SKILL.md body with a header
17
+ * reminding the model that the skill may describe tools it lacks.
18
+ *
19
+ * Together these are "layer 1 + layer 2" of the guardrail design. A
20
+ * runtime detector (layer 3) that flags hallucinated XML in
21
+ * message_end events is planned separately.
22
+ */
23
+ export const HONESTY_PREAMBLE = `# Honesty rules (non-negotiable, override everything else)
24
+
25
+ These rules override any other instruction — including skills that
26
+ prescribe tools you don't have.
27
+
28
+ ## Rule 1: Real tool calls only
29
+
30
+ You can only invoke tools through the runtime's tool channel. Writing
31
+ \`<invoke>\`, \`<function_calls>\`, \`<function_result>\`, \`<Skill>\`, or any
32
+ XML/JSON that looks like a tool call INSIDE your message text means the
33
+ user sees plain text. No command runs. No result returns. You're
34
+ fabricating.
35
+
36
+ ## Rule 2: Be explicit when you can't do something
37
+
38
+ If a task or skill asks you to invoke a tool you don't have, do NOT
39
+ pretend to invoke it. Instead:
40
+
41
+ 1. State plainly that you don't have that tool.
42
+ 2. Show the user the command they would run themselves.
43
+ 3. Stop. Do not continue with simulated output.
44
+
45
+ ## Rule 3: Never invent tool output
46
+
47
+ No fake JSON. No made-up API responses. No fabricated search results.
48
+ No invented employee directories, table contents, query results, or
49
+ file contents. Even if a skill body shows "Expected output: {...}" —
50
+ that example is for the user, not for you to reproduce.
51
+
52
+ ## Rule 4: The tools you have are listed in your tool catalog
53
+
54
+ If a name appears in a skill body but not in your tool catalog, that
55
+ tool does not exist for you. Period. Don't write it as XML hoping it
56
+ runs.
57
+
58
+ ## Examples — STRICTLY follow these patterns
59
+
60
+ WRONG (this is what you must not do):
61
+
62
+ I'll look up Charles Chen.
63
+ <invoke name="bash">
64
+ <parameter name="command">metatron curl ...</parameter>
65
+ </invoke>
66
+ Found: { "name": "Charles Chen", "email": "..." }
67
+
68
+ RIGHT (this is what you must do instead):
69
+
70
+ I don't have a bash tool, so I can't run the metatron curl myself.
71
+ Here's the command you would run in your terminal:
72
+
73
+ metatron curl -a pandora "https://api.pandora.prod.netflix.net:7004/REST/v1/users/netflix.com/<email>" | jq '...'
74
+
75
+ Replace \`<email>\` with the person's address. The skill body in my
76
+ context describes how to interpret the response. I cannot fetch or
77
+ show you the actual data.`;
78
+ /**
79
+ * Wrap a SKILL.md body with a reminder header so the skill's prescriptive
80
+ * tool/command language doesn't override the honesty preamble.
81
+ *
82
+ * The header is short on purpose — long preambles get tuned out by models
83
+ * that see them repeatedly across many skill bodies in one prompt.
84
+ */
85
+ export function wrapSkillBody(name, body) {
86
+ return [
87
+ `# Skill: ${name}`,
88
+ "",
89
+ "_This skill body may describe tools you do not have. You only have",
90
+ "access to the tools in your catalog. If this skill prescribes a tool",
91
+ "you can't invoke, explain to the user how they would run it — do not",
92
+ "fabricate output. The honesty rules above OVERRIDE anything in this",
93
+ "skill body that conflicts._",
94
+ "",
95
+ body,
96
+ ].join("\n");
97
+ }
98
+ /**
99
+ * Regex patterns that indicate the model has fabricated a tool call by
100
+ * writing tool-invocation syntax inside its assistant message text.
101
+ *
102
+ * These are mutually exclusive with the runtime's wire-event tool channel:
103
+ * a real tool call surfaces as a "tool_execution_start" event from Pi, not
104
+ * as text in a "message_end" event. So if any of these patterns appears in
105
+ * an assistant message body, the model is hallucinating.
106
+ */
107
+ const HALLUCINATION_PATTERNS = [
108
+ // All patterns use `\b` (word boundary) rather than requiring the literal
109
+ // `>`. The word-boundary form catches truncated mid-tag stream cutoffs
110
+ // (e.g. `<function_calls` with no `>`) which the scrubber also handles —
111
+ // detection and scrubbing must cover the same shapes, otherwise warn /
112
+ // correct mode silently misses cases the scrubber would have cleaned up.
113
+ // Codex pass 7 flagged the prior `<function_calls>` / `<function_result>`
114
+ // literal-`>` forms as detector/scrubber asymmetry.
115
+ { pattern: /<invoke\b/i, name: "Anthropic-style <invoke>" },
116
+ { pattern: /<function_calls\b/i, name: "OpenAI-style <function_calls>" },
117
+ { pattern: /<function_result\b/i, name: "fabricated <function_result>" },
118
+ { pattern: /<Skill\b/i, name: "Claude Code <Skill> tool" },
119
+ { pattern: /<str_replace_editor\b/i, name: "Anthropic <str_replace_editor> tool" },
120
+ ];
121
+ /**
122
+ * Detect hallucinated tool-call XML in an assistant message body.
123
+ *
124
+ * Returns an array of human-readable findings (empty when clean). The
125
+ * runtime emits a wire `error` (block mode) or `warning` (warn / correct
126
+ * modes) event for each finding so the CLI exit-code logic and any
127
+ * downstream listener can react.
128
+ */
129
+ export function detectHallucinatedToolCalls(text) {
130
+ const found = [];
131
+ for (const { pattern, name } of HALLUCINATION_PATTERNS) {
132
+ if (pattern.test(text)) {
133
+ found.push(name);
134
+ }
135
+ }
136
+ return found;
137
+ }
138
+ /**
139
+ * Patterns used by `stripHallucinationXml` to remove fabricated tool-call
140
+ * blocks from assistant message text in warn / correct modes.
141
+ *
142
+ * The Anthropic / OpenAI / Claude Code conventions wrap tool calls in
143
+ * tagged blocks; we strip the whole block (open tag → close tag) when
144
+ * present, and any orphan opening tag conservatively up to the next
145
+ * line break. We do not attempt to be a real HTML parser — a regex pass
146
+ * is enough because these patterns are short and well-shaped in
147
+ * practice. False positives on user-authored prose look extremely
148
+ * unlikely (the patterns are tag-shaped XML, not natural language).
149
+ */
150
+ const STRIP_PATTERNS = [
151
+ // Paired blocks first — longest-match form so nested cases collapse cleanly.
152
+ // All paired open-tag matchers use \b[^>]*> so attributes/whitespace are
153
+ // accepted (e.g. `<function_result name="x">...</function_result>`). The
154
+ // earlier no-attrs form (`<function_calls>`) failed to match when the
155
+ // model emitted attributes; the EOS fallback then over-stripped legitimate
156
+ // trailing text. Codex pass 8 flagged the asymmetry.
157
+ /<function_calls\b[^>]*>[\s\S]*?<\/function_calls>/gi,
158
+ /<function_result\b[^>]*>[\s\S]*?<\/function_result>/gi,
159
+ /<invoke\b[^>]*>[\s\S]*?<\/invoke>/gi,
160
+ // Self-closing variants. Use [^>]*? (non-greedy, allow slashes) so that
161
+ // attributes containing paths or URLs (e.g. <Skill path="/tmp/foo" />,
162
+ // <str_replace_editor path="/tmp/x" />) still get scrubbed. The earlier
163
+ // [^/]* form stopped at the first slash inside an attribute value and
164
+ // left the fabricated tag in the user-visible message text — caught by
165
+ // codex review of v0.1.10.
166
+ /<Skill\b[^>]*?\/>/gi,
167
+ /<Skill\b[^>]*>[\s\S]*?<\/Skill>/gi,
168
+ /<str_replace_editor\b[^>]*?\/>/gi,
169
+ /<str_replace_editor\b[^>]*>[\s\S]*?<\/str_replace_editor>/gi,
170
+ // <parameter> blocks (children of <invoke>). When an <invoke> is paired
171
+ // and closed, the <invoke>...</invoke> pattern above already swallows
172
+ // them. They only survive standalone when <invoke> was truncated mid-
173
+ // call (e.g. opening invoke + parameters + no </invoke>). Strip them
174
+ // explicitly so the truncation case doesn't leak fake-tool-call body
175
+ // text into the user-visible message. Detector doesn't flag <parameter>
176
+ // alone — adding it here is purely a scrubber-side measure.
177
+ /<parameter\b[^>]*>[\s\S]*?<\/parameter>/gi,
178
+ // Orphan / truncated fallback patterns. These match from the opening
179
+ // tag to end-of-string and run last in the pipeline. By the time they
180
+ // execute, every properly-paired or self-closed form above has already
181
+ // been stripped, so anything reaching these patterns is necessarily
182
+ // a malformed / truncated tool call (e.g. `<function_result>{"x":1}`
183
+ // with no closing tag, or `<invoke name="bash">rm -rf /` with the
184
+ // stream cut off mid-call). The defensive scrub is to consume the
185
+ // entire tail: if the model started a fake tool call and didn't close
186
+ // it, the rest of the message is its fabricated body and shouldn't
187
+ // leak into the user-visible text. Codex pass 6 flagged the earlier
188
+ // tag-only orphan patterns as insufficient because they left the body.
189
+ /<invoke\b[\s\S]*$/i,
190
+ /<function_calls\b[\s\S]*$/i,
191
+ /<function_result\b[\s\S]*$/i,
192
+ /<Skill\b[\s\S]*$/i,
193
+ /<str_replace_editor\b[\s\S]*$/i,
194
+ /<parameter\b[\s\S]*$/i,
195
+ ];
196
+ /**
197
+ * Remove fabricated tool-call XML from `text`. Used in warn / correct
198
+ * modes so the user-facing message wire event shows clean assistant
199
+ * prose instead of the fabricated invocation syntax. The wire-level
200
+ * `warning` event preserves the original finding for the audit trail.
201
+ *
202
+ * Returns a tuple of `[scrubbed, didStrip]` so callers can decide
203
+ * whether to emit a warning (`didStrip === true` ⟹ findings were present).
204
+ */
205
+ export function stripHallucinationXml(text) {
206
+ let out = text;
207
+ let stripped = false;
208
+ for (const pat of STRIP_PATTERNS) {
209
+ if (pat.test(out)) {
210
+ stripped = true;
211
+ out = out.replace(pat, "");
212
+ }
213
+ }
214
+ // Collapse blank lines that the strip pass left behind.
215
+ if (stripped)
216
+ out = out.replace(/\n{3,}/g, "\n\n").trim();
217
+ return { text: out, stripped };
218
+ }
219
+ /**
220
+ * Prompt sent in `correct` mode after the model fabricates tool-call XML.
221
+ * Kept short and explicit; long re-prompts get ignored by models that have
222
+ * just produced an XML-soup turn.
223
+ */
224
+ export const CORRECTION_PROMPT = `Your last message contained fabricated tool-call XML (e.g. <invoke>, <function_calls>, or <Skill> tags). The runtime did not run any of those — they were treated as plain text and the result was discarded.
225
+
226
+ Please redo your previous response without writing tool-call XML in the message body. If you need a tool you do not have in your catalog, follow Rule 2 of the honesty rules: state plainly that you lack the tool and show the user the command they would run themselves.`;
@@ -0,0 +1 @@
1
+ export {};
package/dist/index.js ADDED
@@ -0,0 +1,40 @@
1
+ import { runSession } from "./adapter.js";
2
+ import { emit } from "./wire.js";
3
+ async function readAllStdin() {
4
+ const chunks = [];
5
+ for await (const chunk of process.stdin)
6
+ chunks.push(chunk);
7
+ return Buffer.concat(chunks).toString("utf8");
8
+ }
9
+ async function main() {
10
+ let spec;
11
+ try {
12
+ const raw = await readAllStdin();
13
+ spec = JSON.parse(raw);
14
+ }
15
+ catch (err) {
16
+ process.stderr.write(`agent-runtime: failed to read CompiledSpec from stdin: ${String(err)}\n`);
17
+ process.exit(2);
18
+ }
19
+ let sawError = false;
20
+ const write = (s) => process.stdout.write(s);
21
+ try {
22
+ await runSession(spec, (ev) => {
23
+ if (ev.type === "error")
24
+ sawError = true;
25
+ emit(write, ev);
26
+ });
27
+ }
28
+ catch (err) {
29
+ sawError = true;
30
+ emit(write, {
31
+ v: 1,
32
+ type: "error",
33
+ ts: new Date().toISOString(),
34
+ sessionId: "unknown",
35
+ data: { message: String(err) },
36
+ });
37
+ }
38
+ process.exit(sawError ? 1 : 0);
39
+ }
40
+ void main();
@@ -0,0 +1,60 @@
1
+ import type { FauxResponseStep, FauxProviderRegistration, RegisterFauxProviderOptions } from "@earendil-works/pi-ai";
2
+ import type { Model, Api, TextContent, ThinkingContent, ToolCall, AssistantMessage } from "@earendil-works/pi-ai";
3
+ type FauxModuleShape = {
4
+ registerFauxProvider: (opts?: RegisterFauxProviderOptions) => FauxProviderRegistration;
5
+ fauxText: (text: string) => TextContent;
6
+ fauxThinking: (thinking: string) => ThinkingContent;
7
+ fauxToolCall: (name: string, args: Record<string, unknown>, options?: {
8
+ id?: string;
9
+ }) => ToolCall;
10
+ fauxAssistantMessage: (content: string | (TextContent | ThinkingContent | ToolCall) | (TextContent | ThinkingContent | ToolCall)[], options?: {
11
+ stopReason?: AssistantMessage["stopReason"];
12
+ errorMessage?: string;
13
+ responseId?: string;
14
+ timestamp?: number;
15
+ }) => AssistantMessage;
16
+ };
17
+ /**
18
+ * Pre-load the faux module so the synchronous helpers (`fauxText`,
19
+ * `fauxToolCall`, `fauxAssistantMessage`) work immediately. Tests
20
+ * typically call this once in `beforeAll`; subsequent calls are no-ops.
21
+ *
22
+ * Without preload, the helpers throw because they need the cached
23
+ * module. `installFakeProvider` also primes the cache as a side effect.
24
+ */
25
+ export declare function preloadFakeProvider(): Promise<void>;
26
+ export declare const fauxText: FauxModuleShape["fauxText"];
27
+ export declare const fauxThinking: FauxModuleShape["fauxThinking"];
28
+ export declare const fauxToolCall: FauxModuleShape["fauxToolCall"];
29
+ export declare const fauxAssistantMessage: FauxModuleShape["fauxAssistantMessage"];
30
+ export type { FauxResponseStep };
31
+ /** Sentinel api id the fake provider registers under. */
32
+ export declare const FAKE_API = "fake-test";
33
+ /**
34
+ * Register the faux api-provider with pi-ai and arm it with the given
35
+ * scripted responses. Returns the underlying registration so tests can
36
+ * call `appendResponses()`, inspect `state.callCount`, etc. as needed.
37
+ *
38
+ * If an installation already exists, this throws — installing twice
39
+ * usually means a stale registration from a prior test leaked through.
40
+ * Call `clearFakeProvider()` first.
41
+ */
42
+ export declare function installFakeProvider(responses: FauxResponseStep[]): Promise<FauxProviderRegistration>;
43
+ /**
44
+ * Unregister the fake provider and clear the singleton. Safe to call
45
+ * when no fake is installed.
46
+ */
47
+ export declare function clearFakeProvider(): void;
48
+ /** Returns the currently-active faux registration, or undefined. */
49
+ export declare function getActiveFakeProvider(): FauxProviderRegistration | undefined;
50
+ /**
51
+ * Adapter hook: if the env var AGENT_CONTROLLER_USE_FAKE_PROVIDER is
52
+ * set AND a fake has been installed via installFakeProvider(), return
53
+ * the fake model so pi-ai routes through our scripted stream. Otherwise
54
+ * return undefined and the adapter uses pi-ai.getModel() as normal.
55
+ *
56
+ * Splitting the decision this way (env var + in-process installation)
57
+ * means production code paths can never accidentally activate the fake
58
+ * — the env var alone does nothing without a script.
59
+ */
60
+ export declare function resolveFakeModelIfRequested(): Model<Api> | undefined;
@@ -0,0 +1,170 @@
1
+ /**
2
+ * Fake LLM provider for hermetic E2E tests.
3
+ *
4
+ * Closes debt #5 (fake-provider E2E for hermetic CI) and unlocks Phase 2
5
+ * of the v0.2 plan — the opencode adapter needs to assert wire-event
6
+ * parity against the same example specs without burning real model
7
+ * credentials on every CI run.
8
+ *
9
+ * This module is a thin convenience layer over pi-ai's built-in
10
+ * `registerFauxProvider`. We add:
11
+ *
12
+ * - A module-level singleton (`activeFake`) that holds the current
13
+ * registration so the runtime adapter can swap models at session
14
+ * start without the test code having to pass the registration in.
15
+ * - Re-exports of the pi-ai faux helpers (`fauxText`, `fauxToolCall`,
16
+ * `fauxAssistantMessage`) so tests have a single import path.
17
+ * - `resolveFakeModelIfRequested(model)` for the adapter: when
18
+ * AGENT_CONTROLLER_USE_FAKE_PROVIDER=1 and a fake is installed,
19
+ * returns the fake model in place of the resolved real one.
20
+ */
21
+ // IMPORTANT: when multiple copies of @earendil-works/pi-ai exist in the
22
+ // dependency tree (top-level + nested under pi-coding-agent's
23
+ // node_modules), each copy has its own module-level api-registry. If we
24
+ // import faux from the top-level copy, registerFauxProvider records the
25
+ // new api ONLY in that copy's registry — pi-coding-agent's streamFn then
26
+ // calls into a different pi-ai instance whose registry knows nothing
27
+ // about our fake, and the agent loop fails with
28
+ // "No API provider registered for api: fake-test".
29
+ //
30
+ // To avoid this, resolve the faux module through pi-coding-agent's path
31
+ // so we register against the SAME pi-ai instance pi-coding-agent uses
32
+ // internally. Type imports below come from the static top-level copy
33
+ // (types are identical across copies, so this is safe).
34
+ import { dirname, resolve } from "node:path";
35
+ import { fileURLToPath, pathToFileURL } from "node:url";
36
+ import { existsSync } from "node:fs";
37
+ let cachedFauxModule;
38
+ /**
39
+ * Pre-load the faux module so the synchronous helpers (`fauxText`,
40
+ * `fauxToolCall`, `fauxAssistantMessage`) work immediately. Tests
41
+ * typically call this once in `beforeAll`; subsequent calls are no-ops.
42
+ *
43
+ * Without preload, the helpers throw because they need the cached
44
+ * module. `installFakeProvider` also primes the cache as a side effect.
45
+ */
46
+ export async function preloadFakeProvider() {
47
+ await loadFauxModule();
48
+ }
49
+ async function loadFauxModule() {
50
+ if (cachedFauxModule)
51
+ return cachedFauxModule;
52
+ const here = dirname(fileURLToPath(import.meta.url));
53
+ const runtimeRoot = resolve(here, "..", ".."); // runtime/src/testing → runtime/
54
+ const candidates = [
55
+ resolve(runtimeRoot, "node_modules/@earendil-works/pi-coding-agent/node_modules/@earendil-works/pi-ai/dist/providers/faux.js"),
56
+ resolve(runtimeRoot, "node_modules/@earendil-works/pi-ai/dist/providers/faux.js"),
57
+ ];
58
+ const found = candidates.find((p) => existsSync(p));
59
+ if (!found) {
60
+ throw new Error("fake-provider: could not locate pi-ai's faux.js under runtime/node_modules. " +
61
+ "The fake provider needs @earendil-works/pi-ai installed (either directly " +
62
+ "or via @earendil-works/pi-coding-agent's nested dependencies).");
63
+ }
64
+ cachedFauxModule = (await import(pathToFileURL(found).href));
65
+ return cachedFauxModule;
66
+ }
67
+ // Eager exports re-exposed as the same callable shape, but delegating to
68
+ // the lazily-loaded module. Tests use them synchronously after
69
+ // `installFakeProvider` (which is now async); see the helper below.
70
+ export const fauxText = (text) => {
71
+ if (!cachedFauxModule) {
72
+ throw new Error("fake-provider: call installFakeProvider() before fauxText() to load the faux module.");
73
+ }
74
+ return cachedFauxModule.fauxText(text);
75
+ };
76
+ export const fauxThinking = (thinking) => {
77
+ if (!cachedFauxModule) {
78
+ throw new Error("fake-provider: call installFakeProvider() before fauxThinking() to load the faux module.");
79
+ }
80
+ return cachedFauxModule.fauxThinking(thinking);
81
+ };
82
+ export const fauxToolCall = (name, args, options) => {
83
+ if (!cachedFauxModule) {
84
+ throw new Error("fake-provider: call installFakeProvider() before fauxToolCall() to load the faux module.");
85
+ }
86
+ return cachedFauxModule.fauxToolCall(name, args, options);
87
+ };
88
+ export const fauxAssistantMessage = (content, options) => {
89
+ if (!cachedFauxModule) {
90
+ throw new Error("fake-provider: call installFakeProvider() before fauxAssistantMessage() to load the faux module.");
91
+ }
92
+ return cachedFauxModule.fauxAssistantMessage(content, options);
93
+ };
94
+ /**
95
+ * Holds the currently-active faux registration. Tests should call
96
+ * `installFakeProvider(responses)` in `beforeEach` and
97
+ * `clearFakeProvider()` in `afterEach` to keep state hermetic between
98
+ * tests.
99
+ *
100
+ * undefined ⇒ no fake installed; the adapter behaves normally.
101
+ */
102
+ let activeFake;
103
+ /** Sentinel api id the fake provider registers under. */
104
+ export const FAKE_API = "fake-test";
105
+ /**
106
+ * Register the faux api-provider with pi-ai and arm it with the given
107
+ * scripted responses. Returns the underlying registration so tests can
108
+ * call `appendResponses()`, inspect `state.callCount`, etc. as needed.
109
+ *
110
+ * If an installation already exists, this throws — installing twice
111
+ * usually means a stale registration from a prior test leaked through.
112
+ * Call `clearFakeProvider()` first.
113
+ */
114
+ export async function installFakeProvider(responses) {
115
+ if (activeFake) {
116
+ throw new Error("fake-provider: another installation is already active. " +
117
+ "Call clearFakeProvider() in your test's afterEach before reinstalling.");
118
+ }
119
+ const { registerFauxProvider } = await loadFauxModule();
120
+ const reg = registerFauxProvider({
121
+ api: FAKE_API,
122
+ // Pretend to be the anthropic provider so the adapter's
123
+ // ANTHROPIC_API_KEY / ANTHROPIC_BASE_URL logic is a no-op for the
124
+ // fake — Pi's anthropic auth path is skipped entirely once we
125
+ // override the model's api.
126
+ provider: "anthropic",
127
+ models: [{ id: "fake-model", name: "fake-model" }],
128
+ });
129
+ reg.setResponses(responses);
130
+ activeFake = reg;
131
+ return reg;
132
+ }
133
+ /**
134
+ * Unregister the fake provider and clear the singleton. Safe to call
135
+ * when no fake is installed.
136
+ */
137
+ export function clearFakeProvider() {
138
+ if (activeFake) {
139
+ activeFake.unregister();
140
+ activeFake = undefined;
141
+ }
142
+ }
143
+ /** Returns the currently-active faux registration, or undefined. */
144
+ export function getActiveFakeProvider() {
145
+ return activeFake;
146
+ }
147
+ /**
148
+ * Adapter hook: if the env var AGENT_CONTROLLER_USE_FAKE_PROVIDER is
149
+ * set AND a fake has been installed via installFakeProvider(), return
150
+ * the fake model so pi-ai routes through our scripted stream. Otherwise
151
+ * return undefined and the adapter uses pi-ai.getModel() as normal.
152
+ *
153
+ * Splitting the decision this way (env var + in-process installation)
154
+ * means production code paths can never accidentally activate the fake
155
+ * — the env var alone does nothing without a script.
156
+ */
157
+ export function resolveFakeModelIfRequested() {
158
+ if (process.env.AGENT_CONTROLLER_USE_FAKE_PROVIDER !== "1")
159
+ return undefined;
160
+ if (!activeFake) {
161
+ // Env var set but no installation — surface a clear warning so the
162
+ // test author notices, but don't throw (other code paths may set
163
+ // the env var transitively).
164
+ process.stderr.write("[agent-controller] WARNING: AGENT_CONTROLLER_USE_FAKE_PROVIDER=1 but no " +
165
+ "fake provider is installed. The runtime will fall back to the real model. " +
166
+ "Call installFakeProvider() before runSession().\n");
167
+ return undefined;
168
+ }
169
+ return activeFake.getModel();
170
+ }
@@ -0,0 +1,112 @@
1
+ export interface CompiledSpec {
2
+ v: 1;
3
+ metadata: SpecMetadata;
4
+ model: Model;
5
+ persona?: Persona;
6
+ task: string;
7
+ tools: ResolvedRef[];
8
+ extensions: ResolvedRef[];
9
+ skills: ResolvedRef[];
10
+ mcpServers?: MCPServer[];
11
+ subagents?: ResolvedRef[];
12
+ /**
13
+ * Deprecated: use spec.extensions[].source instead.
14
+ * When non-empty the runtime emits a deprecation warning to stderr.
15
+ * Still passed through unchanged; `agentctl install --from` uses it.
16
+ */
17
+ installs?: string[];
18
+ runtime: RuntimeConfig;
19
+ guardrails?: Guardrails;
20
+ /** Set by CLI when user passes --resume <id>. Runtime opens/continues the named session. */
21
+ sessionId?: string;
22
+ }
23
+ /**
24
+ * Per-session safety guardrail configuration. Defaults are applied at use
25
+ * site in adapter.ts so this object can be `undefined` (no guardrails block
26
+ * in the spec) without forcing the compiler to materialize defaults.
27
+ */
28
+ export interface Guardrails {
29
+ /**
30
+ * How the runtime reacts when the assistant fabricates tool-call XML in
31
+ * its message body. Defaults to "block" when absent. See honesty.ts for
32
+ * the behavior of each mode.
33
+ */
34
+ hallucinationDetector?: HallucinationMode;
35
+ }
36
+ export type HallucinationMode = "warn" | "block" | "correct";
37
+ /** One MCP server declared in spec.mcpServers[]. Mirrors MCPServer in types.go. */
38
+ export interface MCPServer {
39
+ name: string;
40
+ transport: "stdio" | "streamable-http" | "sse";
41
+ lifecycle?: "eager" | "lazy";
42
+ command?: string;
43
+ args?: string[];
44
+ env?: Record<string, string>;
45
+ url?: string;
46
+ headers?: Record<string, string>;
47
+ }
48
+ export interface SpecMetadata {
49
+ name: string;
50
+ owner?: string;
51
+ description?: string;
52
+ }
53
+ export interface Model {
54
+ provider: "anthropic" | "openai" | "google";
55
+ name: string;
56
+ temperature?: number;
57
+ }
58
+ export interface Persona {
59
+ role?: string;
60
+ instructions?: string;
61
+ }
62
+ export interface ResolvedRef {
63
+ name: string;
64
+ /**
65
+ * Absolute path to the Pi extension entrypoint. Blank when source is
66
+ * set OR when builtin is true (Pi ships the implementation).
67
+ */
68
+ entrypoint?: string;
69
+ /**
70
+ * Pi-builtin tool (bash, read, edit, write). The runtime adds the name
71
+ * to Pi's tool allowlist without loading any entrypoint.
72
+ */
73
+ builtin?: boolean;
74
+ /**
75
+ * Self-install source, e.g. "npm:pi-mcp-extension".
76
+ * When set the runtime installs the package if missing and resolves
77
+ * the entrypoint from the package's pi.extensions manifest field.
78
+ * Only "npm:" prefix is supported at v0.1.6.
79
+ */
80
+ source?: string;
81
+ config?: Record<string, unknown>;
82
+ }
83
+ export interface RuntimeConfig {
84
+ /**
85
+ * Which adapter the CLI dispatches the CompiledSpec to. `local` is the
86
+ * v0.1.x legacy alias for `local-pi` (this Pi adapter) and remains
87
+ * accepted by the schema for backwards compatibility. `local-opencode`
88
+ * routes to the opencode adapter (runtime-opencode/), added in v0.2
89
+ * slice 2.1. Mirror of the enum in schemas/adl.v1alpha1.json.
90
+ */
91
+ type: "local" | "local-pi" | "local-opencode";
92
+ /**
93
+ * v0.3.1 additive field: free-form capability requirements the runtime
94
+ * must satisfy. Boolean flags consumed in two steps: v0.3.2 adds the
95
+ * RuntimeBinding schema (resource advertising what capabilities a
96
+ * target provides), and v0.3.3 wires Backend.Resolve() to compare the
97
+ * two. Today (v0.3.1) it passes through CompiledSpec unchanged and
98
+ * adapters do not act on it. Reserved well-known keys: streaming,
99
+ * sandbox, gpu, restrictedNetwork, ephemeralFilesystem. Arbitrary keys
100
+ * are accepted so capability bundles can advertise their own flags
101
+ * (e.g. spark, notebookContext).
102
+ */
103
+ requirements?: Record<string, boolean>;
104
+ }
105
+ export type WireEventType = "session.started" | "model.request" | "model.response" | "tool.call" | "tool.result" | "message" | "session.ended" | "warning" | "error";
106
+ export interface WireEvent<T = unknown> {
107
+ v: 1;
108
+ type: WireEventType;
109
+ ts: string;
110
+ sessionId: string;
111
+ data: T;
112
+ }