@pugi/sdk 0.1.0-alpha.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -0
- package/README.md +29 -0
- package/dist/agent-contracts.d.ts +311 -0
- package/dist/agent-contracts.js +67 -0
- package/dist/audit-trace.d.ts +1161 -0
- package/dist/audit-trace.js +185 -0
- package/dist/device-flow.d.ts +98 -0
- package/dist/device-flow.js +55 -0
- package/dist/engine-adapter.d.ts +376 -0
- package/dist/engine-adapter.js +47 -0
- package/dist/engine-loop.d.ts +457 -0
- package/dist/engine-loop.js +342 -0
- package/dist/handoff.d.ts +605 -0
- package/dist/handoff.js +76 -0
- package/dist/index.d.ts +11 -0
- package/dist/index.js +11 -0
- package/dist/mcp-schemas.d.ts +27 -0
- package/dist/mcp-schemas.js +11 -0
- package/dist/permission-rules.d.ts +65 -0
- package/dist/permission-rules.js +35 -0
- package/dist/subagent-contracts.d.ts +549 -0
- package/dist/subagent-contracts.js +230 -0
- package/dist/transport.d.ts +559 -0
- package/dist/transport.js +482 -0
- package/package.json +47 -0
|
@@ -0,0 +1,342 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Engine loop protocol — the Pugi CLI's tool-use loop driver.
|
|
3
|
+
*
|
|
4
|
+
* The CLI's `NativePugiEngineAdapter` runs a structured tool-use loop against
|
|
5
|
+
* Anvil. Each turn the CLI sends the conversation transcript + a tools schema
|
|
6
|
+
* to the runtime; the runtime returns either a final text answer or a list of
|
|
7
|
+
* tool calls. The CLI executes the calls locally (read/write/edit/grep/glob/
|
|
8
|
+
* bash) against the workspace and feeds the results back in the next turn.
|
|
9
|
+
*
|
|
10
|
+
* This module defines the contracts shared by:
|
|
11
|
+
* - CLI side: the loop driver (`runEngineLoop`) + budget enforcement.
|
|
12
|
+
* - Runtime side: a thin proxy in front of `AnvilBridgeService.askPersona`.
|
|
13
|
+
* - Tests: a fixture-based `EngineLoopClient` that returns canned responses
|
|
14
|
+
* so the loop can be exercised without network.
|
|
15
|
+
*
|
|
16
|
+
* Local-first contract (ADR-0037):
|
|
17
|
+
* - The CLI is the only side that touches the filesystem. The runtime
|
|
18
|
+
* never sees raw file bytes — only the tool results that the local
|
|
19
|
+
* loop chooses to surface back into the transcript.
|
|
20
|
+
* - Budgets (`maxToolCalls`, `maxTokens`) are enforced client-side so a
|
|
21
|
+
* runaway model cannot rack up Anvil cost without the operator noticing.
|
|
22
|
+
* - The loop refuses to write/edit/bash when the command kind is `plan`.
|
|
23
|
+
*
|
|
24
|
+
* Why OpenAI-compatible shape (instead of Anthropic's tool_use blocks):
|
|
25
|
+
* - Anvil's chat-completions endpoint is OpenAI-compatible; coercing to
|
|
26
|
+
* OpenAI-style `tools` + `tool_calls` matches the upstream wire format
|
|
27
|
+
* exactly. Providers that natively speak Anthropic (Claude) are wrapped
|
|
28
|
+
* by Anvil's bridge layer — that translation is not the CLI's concern.
|
|
29
|
+
*/
|
|
30
|
+
import { z } from 'zod';
|
|
31
|
+
/**
|
|
32
|
+
* Command surface that the CLI invokes. The runtime uses this to select a
|
|
33
|
+
* system prompt and persona behaviour:
|
|
34
|
+
* - `code` — general edit+create. Budget: 20 tool calls / 50k tokens.
|
|
35
|
+
* - `explain` — read-only walkthrough. Budget: 5 / 20k.
|
|
36
|
+
* - `fix` — bug investigation + targeted patch. Budget: 20 / 50k.
|
|
37
|
+
* - `plan` — produce a plan artifact, no mutations. Budget: 3 / 30k.
|
|
38
|
+
* Mutating tools refused even if the model requests them.
|
|
39
|
+
* - `build` — multi-file scaffolding. Budget: 30 / 80k.
|
|
40
|
+
*/
|
|
41
|
+
export const engineCommandKindSchema = z.enum([
|
|
42
|
+
'code',
|
|
43
|
+
'explain',
|
|
44
|
+
'fix',
|
|
45
|
+
'plan',
|
|
46
|
+
'build',
|
|
47
|
+
]);
|
|
48
|
+
/**
|
|
49
|
+
* Per-command budget envelope. Hard caps enforced inside `runEngineLoop`:
|
|
50
|
+
* - `maxToolCalls` — total executed tool calls across all turns.
|
|
51
|
+
* - `maxTokens` — total tokens accumulated (prompt + completion) across
|
|
52
|
+
* turns. Counted via `usage.totalTokens` reported by the runtime; when
|
|
53
|
+
* the runtime reports `tokensUsed === 0` we fall back to a
|
|
54
|
+
* `transcript-chars / 4` heuristic so a runtime that omits usage
|
|
55
|
+
* accounting (older Anvil builds, fixture clients, providers that
|
|
56
|
+
* return null usage on tool_use responses) still trips the budget
|
|
57
|
+
* instead of looping forever. Code Reviewer P2 retro 2026-05-23.
|
|
58
|
+
*
|
|
59
|
+
* The loop terminates with `status: 'budget_exhausted'` when either cap is
|
|
60
|
+
* exceeded. The caller decides whether that is a failure or a normal stop.
|
|
61
|
+
*/
|
|
62
|
+
export const engineBudgetSchema = z.object({
|
|
63
|
+
maxToolCalls: z.number().int().positive(),
|
|
64
|
+
maxTokens: z.number().int().positive(),
|
|
65
|
+
});
|
|
66
|
+
/**
|
|
67
|
+
* Canonical per-command budgets. Tuned to keep Anvil cost predictable while
|
|
68
|
+
* still giving `build` enough headroom to scaffold a small feature.
|
|
69
|
+
*
|
|
70
|
+
* code/fix → 20 calls / 50k tokens
|
|
71
|
+
* explain → 5 calls / 20k tokens
|
|
72
|
+
* plan → 8 calls / 30k tokens (read-only)
|
|
73
|
+
* build → 30 calls / 80k tokens
|
|
74
|
+
*
|
|
75
|
+
* Dogfood note 2026-05-24: `plan` was originally budgeted at 3 tool calls
|
|
76
|
+
* on the assumption that the model would issue 1-2 read calls + emit the
|
|
77
|
+
* plan. Real-world traces show 3-4 glob/grep calls disappear into repo
|
|
78
|
+
* surveying alone — the model produces zero plan output and the artifact
|
|
79
|
+
* file says `[budget_exhausted]`. Bumping to 8 buys breathing room for
|
|
80
|
+
* decently-sized repos while still bounding cost. plan stays read-only at
|
|
81
|
+
* the sentinel level — the call-count change does not weaken safety.
|
|
82
|
+
*/
|
|
83
|
+
export const defaultEngineBudgets = {
|
|
84
|
+
code: { maxToolCalls: 20, maxTokens: 50_000 },
|
|
85
|
+
explain: { maxToolCalls: 5, maxTokens: 20_000 },
|
|
86
|
+
fix: { maxToolCalls: 20, maxTokens: 50_000 },
|
|
87
|
+
plan: { maxToolCalls: 8, maxTokens: 30_000 },
|
|
88
|
+
build: { maxToolCalls: 30, maxTokens: 80_000 },
|
|
89
|
+
};
|
|
90
|
+
/**
|
|
91
|
+
* Message role shape — mirrors OpenAI's chat-completions schema with a
|
|
92
|
+
* `tool` role for tool result frames. Pugi's runtime proxy maps these to
|
|
93
|
+
* AnvilBridgeMessage (which has the same shape modulo `name` carrying the
|
|
94
|
+
* tool_call_id for tool frames).
|
|
95
|
+
*/
|
|
96
|
+
export const engineLoopMessageSchema = z.object({
|
|
97
|
+
role: z.enum(['system', 'user', 'assistant', 'tool']),
|
|
98
|
+
content: z.string(),
|
|
99
|
+
/** Optional model-emitted tool calls when `role === 'assistant'`. */
|
|
100
|
+
toolCalls: z
|
|
101
|
+
.array(z.object({
|
|
102
|
+
id: z.string().min(1),
|
|
103
|
+
name: z.string().min(1),
|
|
104
|
+
arguments: z.string(),
|
|
105
|
+
}))
|
|
106
|
+
.optional(),
|
|
107
|
+
/** Tool call id this `tool` frame is responding to. */
|
|
108
|
+
toolCallId: z.string().optional(),
|
|
109
|
+
/** Tool name this `tool` frame is responding to. */
|
|
110
|
+
toolName: z.string().optional(),
|
|
111
|
+
});
|
|
112
|
+
/**
|
|
113
|
+
* OpenAI-compatible tool definition. The CLI builds this from
|
|
114
|
+
* `toolRegistry`. `parameters` is a JSON Schema object — we keep it as
|
|
115
|
+
* `unknown` here so the SDK stays JSON-Schema-version-agnostic.
|
|
116
|
+
*/
|
|
117
|
+
export const engineLoopToolSchema = z.object({
|
|
118
|
+
name: z.string().min(1),
|
|
119
|
+
description: z.string().min(1),
|
|
120
|
+
parameters: z.unknown(),
|
|
121
|
+
});
|
|
122
|
+
/**
|
|
123
|
+
* Core driver. Pure transport-agnostic loop:
|
|
124
|
+
*
|
|
125
|
+
* 1. Prepend system + user messages.
|
|
126
|
+
* 2. Call `client.send(transcript, tools)`.
|
|
127
|
+
* 3. If response is `text` → return completed.
|
|
128
|
+
* 4. If response is `tool_use` → execute each call via `executor`,
|
|
129
|
+
* append the assistant + tool frames to the transcript, increment
|
|
130
|
+
* counters, loop.
|
|
131
|
+
* 5. After every turn check budgets; bail if exceeded.
|
|
132
|
+
*
|
|
133
|
+
* No filesystem access lives here — the CLI's `engine-tools.ts` is the
|
|
134
|
+
* sole place that touches disk. Keeping the loop pure makes it trivial
|
|
135
|
+
* to unit-test with a fixture client.
|
|
136
|
+
*/
|
|
137
|
+
export async function runEngineLoop(input) {
|
|
138
|
+
const transcript = [
|
|
139
|
+
{ role: 'system', content: input.systemPrompt },
|
|
140
|
+
{ role: 'user', content: input.userPrompt },
|
|
141
|
+
];
|
|
142
|
+
let toolCallCount = 0;
|
|
143
|
+
let tokensUsed = 0;
|
|
144
|
+
let turnsUsed = 0;
|
|
145
|
+
while (true) {
|
|
146
|
+
if (input.signal?.aborted) {
|
|
147
|
+
return {
|
|
148
|
+
status: 'failed',
|
|
149
|
+
finalText: '',
|
|
150
|
+
toolCallCount,
|
|
151
|
+
tokensUsed,
|
|
152
|
+
turnsUsed,
|
|
153
|
+
reason: 'aborted',
|
|
154
|
+
};
|
|
155
|
+
}
|
|
156
|
+
input.hooks?.onTurnStart?.(turnsUsed, transcript.length);
|
|
157
|
+
const response = await input.client.send(transcript, input.tools, {
|
|
158
|
+
personaSlug: input.personaSlug,
|
|
159
|
+
maxTokens: Math.max(1024, input.budget.maxTokens - tokensUsed),
|
|
160
|
+
temperature: input.temperature,
|
|
161
|
+
signal: input.signal,
|
|
162
|
+
});
|
|
163
|
+
turnsUsed += 1;
|
|
164
|
+
input.hooks?.onTurnComplete?.(turnsUsed - 1, response);
|
|
165
|
+
if (response.stop === 'error') {
|
|
166
|
+
return {
|
|
167
|
+
status: 'failed',
|
|
168
|
+
finalText: '',
|
|
169
|
+
toolCallCount,
|
|
170
|
+
tokensUsed,
|
|
171
|
+
turnsUsed,
|
|
172
|
+
reason: `${response.code}: ${response.message}`,
|
|
173
|
+
};
|
|
174
|
+
}
|
|
175
|
+
// Token accounting. Anvil's chat-completions response normally
|
|
176
|
+
// carries `usage.totalTokens`; older builds and some providers
|
|
177
|
+
// (notably the OpenRouter passthrough on `tool_use` turns) return
|
|
178
|
+
// 0. Without a fallback the budget gate would never trip, which
|
|
179
|
+
// is the exact failure Code Reviewer P2 retro 2026-05-23 flagged.
|
|
180
|
+
// We use a `transcript-chars / 4` heuristic — coarse but in the
|
|
181
|
+
// right order of magnitude for English/TS text, and the gate's
|
|
182
|
+
// job is to bound runaway loops, not to bill cents.
|
|
183
|
+
if (response.tokensUsed > 0) {
|
|
184
|
+
tokensUsed += response.tokensUsed;
|
|
185
|
+
}
|
|
186
|
+
else {
|
|
187
|
+
const heuristicChars = transcript.reduce((sum, m) => sum + m.content.length, 0) +
|
|
188
|
+
(response.stop === 'text'
|
|
189
|
+
? response.content.length
|
|
190
|
+
: response.assistantMessage.content.length);
|
|
191
|
+
tokensUsed = Math.ceil(heuristicChars / 4);
|
|
192
|
+
}
|
|
193
|
+
if (tokensUsed > input.budget.maxTokens) {
|
|
194
|
+
return {
|
|
195
|
+
status: 'budget_exhausted',
|
|
196
|
+
finalText: response.stop === 'text' ? response.content : '',
|
|
197
|
+
toolCallCount,
|
|
198
|
+
tokensUsed,
|
|
199
|
+
turnsUsed,
|
|
200
|
+
reason: `token budget exceeded (${tokensUsed} > ${input.budget.maxTokens})`,
|
|
201
|
+
};
|
|
202
|
+
}
|
|
203
|
+
if (response.stop === 'text') {
|
|
204
|
+
return {
|
|
205
|
+
status: 'completed',
|
|
206
|
+
finalText: response.content,
|
|
207
|
+
toolCallCount,
|
|
208
|
+
tokensUsed,
|
|
209
|
+
turnsUsed,
|
|
210
|
+
};
|
|
211
|
+
}
|
|
212
|
+
// tool_use — append assistant message verbatim then execute each call.
|
|
213
|
+
transcript.push(response.assistantMessage);
|
|
214
|
+
const calls = response.assistantMessage.toolCalls ?? [];
|
|
215
|
+
if (calls.length === 0) {
|
|
216
|
+
// Model claimed tool_use but produced no calls — treat as final text
|
|
217
|
+
// with an empty answer so we do not loop forever.
|
|
218
|
+
return {
|
|
219
|
+
status: 'completed',
|
|
220
|
+
finalText: response.assistantMessage.content,
|
|
221
|
+
toolCallCount,
|
|
222
|
+
tokensUsed,
|
|
223
|
+
turnsUsed,
|
|
224
|
+
};
|
|
225
|
+
}
|
|
226
|
+
for (const call of calls) {
|
|
227
|
+
if (toolCallCount >= input.budget.maxToolCalls) {
|
|
228
|
+
return {
|
|
229
|
+
status: 'budget_exhausted',
|
|
230
|
+
finalText: '',
|
|
231
|
+
toolCallCount,
|
|
232
|
+
tokensUsed,
|
|
233
|
+
turnsUsed,
|
|
234
|
+
reason: `tool call budget exhausted (${toolCallCount} >= ${input.budget.maxToolCalls})`,
|
|
235
|
+
};
|
|
236
|
+
}
|
|
237
|
+
toolCallCount += 1;
|
|
238
|
+
input.hooks?.onToolCall?.(call);
|
|
239
|
+
try {
|
|
240
|
+
const result = await input.executor({
|
|
241
|
+
name: call.name,
|
|
242
|
+
arguments: call.arguments,
|
|
243
|
+
callId: call.id,
|
|
244
|
+
});
|
|
245
|
+
input.hooks?.onToolResult?.({ id: call.id, name: call.name }, { ok: true, content: result });
|
|
246
|
+
transcript.push({
|
|
247
|
+
role: 'tool',
|
|
248
|
+
content: result,
|
|
249
|
+
toolCallId: call.id,
|
|
250
|
+
toolName: call.name,
|
|
251
|
+
});
|
|
252
|
+
}
|
|
253
|
+
catch (error) {
|
|
254
|
+
const message = error instanceof Error ? error.message : String(error);
|
|
255
|
+
input.hooks?.onToolResult?.({ id: call.id, name: call.name }, { ok: false, error: message });
|
|
256
|
+
// Plan-mode refusals surface as a distinct outcome so the CLI can
|
|
257
|
+
// mark the run blocked rather than failed. The executor MUST raise
|
|
258
|
+
// an Error whose message starts with the sentinel below for plan
|
|
259
|
+
// refusals; any other thrown error is treated as a recoverable
|
|
260
|
+
// tool error and fed back to the model.
|
|
261
|
+
if (message.startsWith('PLAN_MODE_REFUSED:')) {
|
|
262
|
+
return {
|
|
263
|
+
status: 'tool_refused',
|
|
264
|
+
finalText: '',
|
|
265
|
+
toolCallCount,
|
|
266
|
+
tokensUsed,
|
|
267
|
+
turnsUsed,
|
|
268
|
+
reason: message,
|
|
269
|
+
};
|
|
270
|
+
}
|
|
271
|
+
transcript.push({
|
|
272
|
+
role: 'tool',
|
|
273
|
+
content: `error: ${message}`,
|
|
274
|
+
toolCallId: call.id,
|
|
275
|
+
toolName: call.name,
|
|
276
|
+
});
|
|
277
|
+
}
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
/* ------------------------------------------------------------------ */
|
|
282
|
+
/* Wire format: POST /api/pugi/engine */
|
|
283
|
+
/* ------------------------------------------------------------------ */
|
|
284
|
+
/**
|
|
285
|
+
* Server wire request — what `AnvilEngineLoopClient` POSTs to
|
|
286
|
+
* `POST /api/pugi/engine` on every turn. Sprint 2E proxy endpoint
|
|
287
|
+
* mirrors this Zod schema admin-api-side so the contract has a single
|
|
288
|
+
* source of truth.
|
|
289
|
+
*
|
|
290
|
+
* Required fields:
|
|
291
|
+
* - `messages` — transcript so far (system + user + assistant + tool).
|
|
292
|
+
* - `tools` — tool registry the runtime is allowed to invoke for this
|
|
293
|
+
* turn. The CLI strips mutating tools when `command === 'plan'`;
|
|
294
|
+
* the server defends against forged bodies via a second-layer check.
|
|
295
|
+
* - `personaSlug` — persona to invoke; the server uses this for
|
|
296
|
+
* persona system-prompt injection + consensus-tier resolution.
|
|
297
|
+
*
|
|
298
|
+
* Optional fields (the CLI only supplies a subset today; the schema
|
|
299
|
+
* accepts every documented knob so Sprint 3+ tooling can opt in
|
|
300
|
+
* without a contract change):
|
|
301
|
+
* - `command` — engine command kind. When present the server picks a
|
|
302
|
+
* per-command model from `PUGI_ENGINE_MODEL_<COMMAND>` env or
|
|
303
|
+
* hardcoded default. When absent the server falls back to the
|
|
304
|
+
* persona's `defaultModel`.
|
|
305
|
+
* - `model` — explicit model override. Wins over `command` resolution.
|
|
306
|
+
* Useful for tier-aware operators who want to pin a model.
|
|
307
|
+
* - `maxTokens` — upper bound on completion size for this turn.
|
|
308
|
+
* - `temperature` — sampling temperature for this turn.
|
|
309
|
+
*/
|
|
310
|
+
export const engineLoopServerRequestSchema = z.object({
|
|
311
|
+
personaSlug: z.string().min(1).max(128),
|
|
312
|
+
messages: z.array(engineLoopMessageSchema).min(1),
|
|
313
|
+
tools: z.array(engineLoopToolSchema).max(64),
|
|
314
|
+
command: engineCommandKindSchema.optional(),
|
|
315
|
+
model: z.string().min(1).max(256).optional(),
|
|
316
|
+
maxTokens: z.number().int().positive().max(200_000).optional(),
|
|
317
|
+
temperature: z.number().min(0).max(2).optional(),
|
|
318
|
+
});
|
|
319
|
+
/**
|
|
320
|
+
* Server wire response — what the admin-api Sprint 2E endpoint returns
|
|
321
|
+
* for every turn. The shape matches what `AnvilEngineLoopClient` parses:
|
|
322
|
+
*
|
|
323
|
+
* - `stop === 'text'` — model produced a final answer, loop terminates.
|
|
324
|
+
* - `stop === 'tool_use'` — model emitted `toolCalls`, CLI executes
|
|
325
|
+
* them locally and feeds results back next turn.
|
|
326
|
+
* - `stop === 'length'` — completion truncated by `maxTokens`. The
|
|
327
|
+
* CLI treats this as final text and stops; surface partial content.
|
|
328
|
+
*/
|
|
329
|
+
export const engineLoopServerResponseSchema = z.object({
|
|
330
|
+
stop: z.enum(['text', 'tool_use', 'length']),
|
|
331
|
+
content: z.string(),
|
|
332
|
+
toolCalls: z
|
|
333
|
+
.array(z.object({
|
|
334
|
+
id: z.string().min(1),
|
|
335
|
+
name: z.string().min(1),
|
|
336
|
+
arguments: z.string(),
|
|
337
|
+
}))
|
|
338
|
+
.optional(),
|
|
339
|
+
tokensUsed: z.number().int().nonnegative(),
|
|
340
|
+
model: z.string().min(1),
|
|
341
|
+
});
|
|
342
|
+
//# sourceMappingURL=engine-loop.js.map
|