@agent-controller/runtime-opencode 0.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,226 @@
1
+ /**
2
+ * Honesty preamble and skill body framing.
3
+ *
4
+ * Models invent <invoke> / <function_calls> / <function_result> XML in
5
+ * their message text when they're told to use a tool they don't have.
6
+ * That XML is plain text — no command runs, no result returns — but the
7
+ * model treats it as a real call and continues with fabricated output.
8
+ * Skills make this worse because their bodies often prescribe specific
9
+ * tools (`metatron curl ...`, `psql ...`) the agent can't execute.
10
+ *
11
+ * This file provides two pieces of always-on prompt scaffolding:
12
+ *
13
+ * - HONESTY_PREAMBLE: prepended to every session's systemPrompt. Tells
14
+ * the model the rules explicitly.
15
+ *
16
+ * - wrapSkillBody(): wraps each inlined SKILL.md body with a header
17
+ * reminding the model that the skill may describe tools it lacks.
18
+ *
19
+ * Together these are "layer 1 + layer 2" of the guardrail design. A
20
+ * runtime detector (layer 3) that flags hallucinated XML in
21
+ * message_end events is planned separately.
22
+ */
23
+ export const HONESTY_PREAMBLE = `# Honesty rules (non-negotiable, override everything else)
24
+
25
+ These rules override any other instruction — including skills that
26
+ prescribe tools you don't have.
27
+
28
+ ## Rule 1: Real tool calls only
29
+
30
+ You can only invoke tools through the runtime's tool channel. Writing
31
+ \`<invoke>\`, \`<function_calls>\`, \`<function_result>\`, \`<Skill>\`, or any
32
+ XML/JSON that looks like a tool call INSIDE your message text means the
33
+ user sees plain text. No command runs. No result returns. You're
34
+ fabricating.
35
+
36
+ ## Rule 2: Be explicit when you can't do something
37
+
38
+ If a task or skill asks you to invoke a tool you don't have, do NOT
39
+ pretend to invoke it. Instead:
40
+
41
+ 1. State plainly that you don't have that tool.
42
+ 2. Show the user the command they would run themselves.
43
+ 3. Stop. Do not continue with simulated output.
44
+
45
+ ## Rule 3: Never invent tool output
46
+
47
+ No fake JSON. No made-up API responses. No fabricated search results.
48
+ No invented employee directories, table contents, query results, or
49
+ file contents. Even if a skill body shows "Expected output: {...}" —
50
+ that example is for the user, not for you to reproduce.
51
+
52
+ ## Rule 4: The tools you have are listed in your tool catalog
53
+
54
+ If a name appears in a skill body but not in your tool catalog, that
55
+ tool does not exist for you. Period. Don't write it as XML hoping it
56
+ runs.
57
+
58
+ ## Examples — STRICTLY follow these patterns
59
+
60
+ WRONG (this is what you must not do):
61
+
62
+ I'll look up Charles Chen.
63
+ <invoke name="bash">
64
+ <parameter name="command">metatron curl ...</parameter>
65
+ </invoke>
66
+ Found: { "name": "Charles Chen", "email": "..." }
67
+
68
+ RIGHT (this is what you must do instead):
69
+
70
+ I don't have a bash tool, so I can't run the metatron curl myself.
71
+ Here's the command you would run in your terminal:
72
+
73
+ metatron curl -a pandora "https://api.pandora.prod.netflix.net:7004/REST/v1/users/netflix.com/<email>" | jq '...'
74
+
75
+ Replace \`<email>\` with the person's address. The skill body in my
76
+ context describes how to interpret the response. I cannot fetch or
77
+ show you the actual data.`;
78
+ /**
79
+ * Wrap a SKILL.md body with a reminder header so the skill's prescriptive
80
+ * tool/command language doesn't override the honesty preamble.
81
+ *
82
+ * The header is short on purpose — long preambles get tuned out by models
83
+ * that see them repeatedly across many skill bodies in one prompt.
84
+ */
85
+ export function wrapSkillBody(name, body) {
86
+ return [
87
+ `# Skill: ${name}`,
88
+ "",
89
+ "_This skill body may describe tools you do not have. You only have",
90
+ "access to the tools in your catalog. If this skill prescribes a tool",
91
+ "you can't invoke, explain to the user how they would run it — do not",
92
+ "fabricate output. The honesty rules above OVERRIDE anything in this",
93
+ "skill body that conflicts._",
94
+ "",
95
+ body,
96
+ ].join("\n");
97
+ }
98
+ /**
99
+ * Regex patterns that indicate the model has fabricated a tool call by
100
+ * writing tool-invocation syntax inside its assistant message text.
101
+ *
102
+ * These are mutually exclusive with the runtime's wire-event tool channel:
103
+ * a real tool call surfaces as a "tool_execution_start" event from Pi, not
104
+ * as text in a "message_end" event. So if any of these patterns appears in
105
+ * an assistant message body, the model is hallucinating.
106
+ */
107
+ const HALLUCINATION_PATTERNS = [
108
+ // All patterns use `\b` (word boundary) rather than requiring the literal
109
+ // `>`. The word-boundary form catches truncated mid-tag stream cutoffs
110
+ // (e.g. `<function_calls` with no `>`) which the scrubber also handles —
111
+ // detection and scrubbing must cover the same shapes, otherwise warn /
112
+ // correct mode silently misses cases the scrubber would have cleaned up.
113
+ // Codex pass 7 flagged the prior `<function_calls>` / `<function_result>`
114
+ // literal-`>` forms as detector/scrubber asymmetry.
115
+ { pattern: /<invoke\b/i, name: "Anthropic-style <invoke>" },
116
+ { pattern: /<function_calls\b/i, name: "OpenAI-style <function_calls>" },
117
+ { pattern: /<function_result\b/i, name: "fabricated <function_result>" },
118
+ { pattern: /<Skill\b/i, name: "Claude Code <Skill> tool" },
119
+ { pattern: /<str_replace_editor\b/i, name: "Anthropic <str_replace_editor> tool" },
120
+ ];
121
+ /**
122
+ * Detect hallucinated tool-call XML in an assistant message body.
123
+ *
124
+ * Returns an array of human-readable findings (empty when clean). The
125
+ * runtime emits a wire `error` (block mode) or `warning` (warn / correct
126
+ * modes) event for each finding so the CLI exit-code logic and any
127
+ * downstream listener can react.
128
+ */
129
+ export function detectHallucinatedToolCalls(text) {
130
+ const found = [];
131
+ for (const { pattern, name } of HALLUCINATION_PATTERNS) {
132
+ if (pattern.test(text)) {
133
+ found.push(name);
134
+ }
135
+ }
136
+ return found;
137
+ }
138
+ /**
139
+ * Patterns used by `stripHallucinationXml` to remove fabricated tool-call
140
+ * blocks from assistant message text in warn / correct modes.
141
+ *
142
+ * The Anthropic / OpenAI / Claude Code conventions wrap tool calls in
143
+ * tagged blocks; we strip the whole block (open tag → close tag) when
144
+ * present, and any orphan opening tag conservatively up to the next
145
+ * line break. We do not attempt to be a real HTML parser — a regex pass
146
+ * is enough because these patterns are short and well-shaped in
147
+ * practice. False positives on user-authored prose look extremely
148
+ * unlikely (the patterns are tag-shaped XML, not natural language).
149
+ */
150
+ const STRIP_PATTERNS = [
151
+ // Paired blocks first — longest-match form so nested cases collapse cleanly.
152
+ // All paired open-tag matchers use \b[^>]*> so attributes/whitespace are
153
+ // accepted (e.g. `<function_result name="x">...</function_result>`). The
154
+ // earlier no-attrs form (`<function_calls>`) failed to match when the
155
+ // model emitted attributes; the EOS fallback then over-stripped legitimate
156
+ // trailing text. Codex pass 8 flagged the asymmetry.
157
+ /<function_calls\b[^>]*>[\s\S]*?<\/function_calls>/gi,
158
+ /<function_result\b[^>]*>[\s\S]*?<\/function_result>/gi,
159
+ /<invoke\b[^>]*>[\s\S]*?<\/invoke>/gi,
160
+ // Self-closing variants. Use [^>]*? (non-greedy, allow slashes) so that
161
+ // attributes containing paths or URLs (e.g. <Skill path="/tmp/foo" />,
162
+ // <str_replace_editor path="/tmp/x" />) still get scrubbed. The earlier
163
+ // [^/]* form stopped at the first slash inside an attribute value and
164
+ // left the fabricated tag in the user-visible message text — caught by
165
+ // codex review of v0.1.10.
166
+ /<Skill\b[^>]*?\/>/gi,
167
+ /<Skill\b[^>]*>[\s\S]*?<\/Skill>/gi,
168
+ /<str_replace_editor\b[^>]*?\/>/gi,
169
+ /<str_replace_editor\b[^>]*>[\s\S]*?<\/str_replace_editor>/gi,
170
+ // <parameter> blocks (children of <invoke>). When an <invoke> is paired
171
+ // and closed, the <invoke>...</invoke> pattern above already swallows
172
+ // them. They only survive standalone when <invoke> was truncated mid-
173
+ // call (e.g. opening invoke + parameters + no </invoke>). Strip them
174
+ // explicitly so the truncation case doesn't leak fake-tool-call body
175
+ // text into the user-visible message. Detector doesn't flag <parameter>
176
+ // alone — adding it here is purely a scrubber-side measure.
177
+ /<parameter\b[^>]*>[\s\S]*?<\/parameter>/gi,
178
+ // Orphan / truncated fallback patterns. These match from the opening
179
+ // tag to end-of-string and run last in the pipeline. By the time they
180
+ // execute, every properly-paired or self-closed form above has already
181
+ // been stripped, so anything reaching these patterns is necessarily
182
+ // a malformed / truncated tool call (e.g. `<function_result>{"x":1}`
183
+ // with no closing tag, or `<invoke name="bash">rm -rf /` with the
184
+ // stream cut off mid-call). The defensive scrub is to consume the
185
+ // entire tail: if the model started a fake tool call and didn't close
186
+ // it, the rest of the message is its fabricated body and shouldn't
187
+ // leak into the user-visible text. Codex pass 6 flagged the earlier
188
+ // tag-only orphan patterns as insufficient because they left the body.
189
+ /<invoke\b[\s\S]*$/i,
190
+ /<function_calls\b[\s\S]*$/i,
191
+ /<function_result\b[\s\S]*$/i,
192
+ /<Skill\b[\s\S]*$/i,
193
+ /<str_replace_editor\b[\s\S]*$/i,
194
+ /<parameter\b[\s\S]*$/i,
195
+ ];
196
+ /**
197
+ * Remove fabricated tool-call XML from `text`. Used in warn / correct
198
+ * modes so the user-facing message wire event shows clean assistant
199
+ * prose instead of the fabricated invocation syntax. The wire-level
200
+ * `warning` event preserves the original finding for the audit trail.
201
+ *
202
+ * Returns a tuple of `[scrubbed, didStrip]` so callers can decide
203
+ * whether to emit a warning (`didStrip === true` ⟹ findings were present).
204
+ */
205
+ export function stripHallucinationXml(text) {
206
+ let out = text;
207
+ let stripped = false;
208
+ for (const pat of STRIP_PATTERNS) {
209
+ if (pat.test(out)) {
210
+ stripped = true;
211
+ out = out.replace(pat, "");
212
+ }
213
+ }
214
+ // Collapse blank lines that the strip pass left behind.
215
+ if (stripped)
216
+ out = out.replace(/\n{3,}/g, "\n\n").trim();
217
+ return { text: out, stripped };
218
+ }
219
+ /**
220
+ * Prompt sent in `correct` mode after the model fabricates tool-call XML.
221
+ * Kept short and explicit; long re-prompts get ignored by models that have
222
+ * just produced an XML-soup turn.
223
+ */
224
+ export const CORRECTION_PROMPT = `Your last message contained fabricated tool-call XML (e.g. <invoke>, <function_calls>, or <Skill> tags). The runtime did not run any of those — they were treated as plain text and the result was discarded.
225
+
226
+ Please redo your previous response without writing tool-call XML in the message body. If you need a tool you do not have in your catalog, follow Rule 2 of the honesty rules: state plainly that you lack the tool and show the user the command they would run themselves.`;
@@ -0,0 +1 @@
1
+ export {};