@pugi/sdk 0.1.0-alpha.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,342 @@
1
+ /**
2
+ * Engine loop protocol — the Pugi CLI's tool-use loop driver.
3
+ *
4
+ * The CLI's `NativePugiEngineAdapter` runs a structured tool-use loop against
5
+ * Anvil. Each turn the CLI sends the conversation transcript + a tools schema
6
+ * to the runtime; the runtime returns either a final text answer or a list of
7
+ * tool calls. The CLI executes the calls locally (read/write/edit/grep/glob/
8
+ * bash) against the workspace and feeds the results back in the next turn.
9
+ *
10
+ * This module defines the contracts shared by:
11
+ * - CLI side: the loop driver (`runEngineLoop`) + budget enforcement.
12
+ * - Runtime side: a thin proxy in front of `AnvilBridgeService.askPersona`.
13
+ * - Tests: a fixture-based `EngineLoopClient` that returns canned responses
14
+ * so the loop can be exercised without network.
15
+ *
16
+ * Local-first contract (ADR-0037):
17
+ * - The CLI is the only side that touches the filesystem. The runtime
18
+ * never sees raw file bytes — only the tool results that the local
19
+ * loop chooses to surface back into the transcript.
20
+ * - Budgets (`maxToolCalls`, `maxTokens`) are enforced client-side so a
21
+ * runaway model cannot rack up Anvil cost without the operator noticing.
22
+ * - The loop refuses to write/edit/bash when the command kind is `plan`.
23
+ *
24
+ * Why OpenAI-compatible shape (instead of Anthropic's tool_use blocks):
25
+ * - Anvil's chat-completions endpoint is OpenAI-compatible; coercing to
26
+ * OpenAI-style `tools` + `tool_calls` matches the upstream wire format
27
+ * exactly. Providers that natively speak Anthropic (Claude) are wrapped
28
+ * by Anvil's bridge layer — that translation is not the CLI's concern.
29
+ */
30
+ import { z } from 'zod';
31
+ /**
32
+ * Command surface that the CLI invokes. The runtime uses this to select a
33
+ * system prompt and persona behaviour:
34
+ * - `code` — general edit+create. Budget: 20 tool calls / 50k tokens.
35
+ * - `explain` — read-only walkthrough. Budget: 5 / 20k.
36
+ * - `fix` — bug investigation + targeted patch. Budget: 20 / 50k.
37
+ * - `plan` — produce a plan artifact, no mutations. Budget: 3 / 30k.
38
+ * Mutating tools refused even if the model requests them.
39
+ * - `build` — multi-file scaffolding. Budget: 30 / 80k.
40
+ */
41
+ export const engineCommandKindSchema = z.enum([
42
+ 'code',
43
+ 'explain',
44
+ 'fix',
45
+ 'plan',
46
+ 'build',
47
+ ]);
48
+ /**
49
+ * Per-command budget envelope. Hard caps enforced inside `runEngineLoop`:
50
+ * - `maxToolCalls` — total executed tool calls across all turns.
51
+ * - `maxTokens` — total tokens accumulated (prompt + completion) across
52
+ * turns. Counted via `usage.totalTokens` reported by the runtime; when
53
+ * the runtime reports `tokensUsed === 0` we fall back to a
54
+ * `transcript-chars / 4` heuristic so a runtime that omits usage
55
+ * accounting (older Anvil builds, fixture clients, providers that
56
+ * return null usage on tool_use responses) still trips the budget
57
+ * instead of looping forever. Code Reviewer P2 retro 2026-05-23.
58
+ *
59
+ * The loop terminates with `status: 'budget_exhausted'` when either cap is
60
+ * exceeded. The caller decides whether that is a failure or a normal stop.
61
+ */
62
+ export const engineBudgetSchema = z.object({
63
+ maxToolCalls: z.number().int().positive(),
64
+ maxTokens: z.number().int().positive(),
65
+ });
66
+ /**
67
+ * Canonical per-command budgets. Tuned to keep Anvil cost predictable while
68
+ * still giving `build` enough headroom to scaffold a small feature.
69
+ *
70
+ * code/fix → 20 calls / 50k tokens
71
+ * explain → 5 calls / 20k tokens
72
+ * plan → 8 calls / 30k tokens (read-only)
73
+ * build → 30 calls / 80k tokens
74
+ *
75
+ * Dogfood note 2026-05-24: `plan` was originally budgeted at 3 tool calls
76
+ * on the assumption that the model would issue 1-2 read calls + emit the
77
+ * plan. Real-world traces show 3-4 glob/grep calls disappear into repo
78
+ * surveying alone — the model produces zero plan output and the artifact
79
+ * file says `[budget_exhausted]`. Bumping to 8 buys breathing room for
80
+ * decently-sized repos while still bounding cost. plan stays read-only at
81
+ * the sentinel level — the call-count change does not weaken safety.
82
+ */
83
+ export const defaultEngineBudgets = {
84
+ code: { maxToolCalls: 20, maxTokens: 50_000 },
85
+ explain: { maxToolCalls: 5, maxTokens: 20_000 },
86
+ fix: { maxToolCalls: 20, maxTokens: 50_000 },
87
+ plan: { maxToolCalls: 8, maxTokens: 30_000 },
88
+ build: { maxToolCalls: 30, maxTokens: 80_000 },
89
+ };
90
+ /**
91
+ * Message role shape — mirrors OpenAI's chat-completions schema with a
92
+ * `tool` role for tool result frames. Pugi's runtime proxy maps these to
93
+ * AnvilBridgeMessage (which has the same shape modulo `name` carrying the
94
+ * tool_call_id for tool frames).
95
+ */
96
+ export const engineLoopMessageSchema = z.object({
97
+ role: z.enum(['system', 'user', 'assistant', 'tool']),
98
+ content: z.string(),
99
+ /** Optional model-emitted tool calls when `role === 'assistant'`. */
100
+ toolCalls: z
101
+ .array(z.object({
102
+ id: z.string().min(1),
103
+ name: z.string().min(1),
104
+ arguments: z.string(),
105
+ }))
106
+ .optional(),
107
+ /** Tool call id this `tool` frame is responding to. */
108
+ toolCallId: z.string().optional(),
109
+ /** Tool name this `tool` frame is responding to. */
110
+ toolName: z.string().optional(),
111
+ });
112
+ /**
113
+ * OpenAI-compatible tool definition. The CLI builds this from
114
+ * `toolRegistry`. `parameters` is a JSON Schema object — we keep it as
115
+ * `unknown` here so the SDK stays JSON-Schema-version-agnostic.
116
+ */
117
+ export const engineLoopToolSchema = z.object({
118
+ name: z.string().min(1),
119
+ description: z.string().min(1),
120
+ parameters: z.unknown(),
121
+ });
122
+ /**
123
+ * Core driver. Pure transport-agnostic loop:
124
+ *
125
+ * 1. Prepend system + user messages.
126
+ * 2. Call `client.send(transcript, tools)`.
127
+ * 3. If response is `text` → return completed.
128
+ * 4. If response is `tool_use` → execute each call via `executor`,
129
+ * append the assistant + tool frames to the transcript, increment
130
+ * counters, loop.
131
+ * 5. After every turn check budgets; bail if exceeded.
132
+ *
133
+ * No filesystem access lives here — the CLI's `engine-tools.ts` is the
134
+ * sole place that touches disk. Keeping the loop pure makes it trivial
135
+ * to unit-test with a fixture client.
136
+ */
137
+ export async function runEngineLoop(input) {
138
+ const transcript = [
139
+ { role: 'system', content: input.systemPrompt },
140
+ { role: 'user', content: input.userPrompt },
141
+ ];
142
+ let toolCallCount = 0;
143
+ let tokensUsed = 0;
144
+ let turnsUsed = 0;
145
+ while (true) {
146
+ if (input.signal?.aborted) {
147
+ return {
148
+ status: 'failed',
149
+ finalText: '',
150
+ toolCallCount,
151
+ tokensUsed,
152
+ turnsUsed,
153
+ reason: 'aborted',
154
+ };
155
+ }
156
+ input.hooks?.onTurnStart?.(turnsUsed, transcript.length);
157
+ const response = await input.client.send(transcript, input.tools, {
158
+ personaSlug: input.personaSlug,
159
+ maxTokens: Math.max(1024, input.budget.maxTokens - tokensUsed),
160
+ temperature: input.temperature,
161
+ signal: input.signal,
162
+ });
163
+ turnsUsed += 1;
164
+ input.hooks?.onTurnComplete?.(turnsUsed - 1, response);
165
+ if (response.stop === 'error') {
166
+ return {
167
+ status: 'failed',
168
+ finalText: '',
169
+ toolCallCount,
170
+ tokensUsed,
171
+ turnsUsed,
172
+ reason: `${response.code}: ${response.message}`,
173
+ };
174
+ }
175
+ // Token accounting. Anvil's chat-completions response normally
176
+ // carries `usage.totalTokens`; older builds and some providers
177
+ // (notably the OpenRouter passthrough on `tool_use` turns) return
178
+ // 0. Without a fallback the budget gate would never trip, which
179
+ // is the exact failure Code Reviewer P2 retro 2026-05-23 flagged.
180
+ // We use a `transcript-chars / 4` heuristic — coarse but in the
181
+ // right order of magnitude for English/TS text, and the gate's
182
+ // job is to bound runaway loops, not to bill cents.
183
+ if (response.tokensUsed > 0) {
184
+ tokensUsed += response.tokensUsed;
185
+ }
186
+ else {
187
+ const heuristicChars = transcript.reduce((sum, m) => sum + m.content.length, 0) +
188
+ (response.stop === 'text'
189
+ ? response.content.length
190
+ : response.assistantMessage.content.length);
191
+ tokensUsed = Math.ceil(heuristicChars / 4);
192
+ }
193
+ if (tokensUsed > input.budget.maxTokens) {
194
+ return {
195
+ status: 'budget_exhausted',
196
+ finalText: response.stop === 'text' ? response.content : '',
197
+ toolCallCount,
198
+ tokensUsed,
199
+ turnsUsed,
200
+ reason: `token budget exceeded (${tokensUsed} > ${input.budget.maxTokens})`,
201
+ };
202
+ }
203
+ if (response.stop === 'text') {
204
+ return {
205
+ status: 'completed',
206
+ finalText: response.content,
207
+ toolCallCount,
208
+ tokensUsed,
209
+ turnsUsed,
210
+ };
211
+ }
212
+ // tool_use — append assistant message verbatim then execute each call.
213
+ transcript.push(response.assistantMessage);
214
+ const calls = response.assistantMessage.toolCalls ?? [];
215
+ if (calls.length === 0) {
216
+ // Model claimed tool_use but produced no calls — treat as final text
217
+ // with an empty answer so we do not loop forever.
218
+ return {
219
+ status: 'completed',
220
+ finalText: response.assistantMessage.content,
221
+ toolCallCount,
222
+ tokensUsed,
223
+ turnsUsed,
224
+ };
225
+ }
226
+ for (const call of calls) {
227
+ if (toolCallCount >= input.budget.maxToolCalls) {
228
+ return {
229
+ status: 'budget_exhausted',
230
+ finalText: '',
231
+ toolCallCount,
232
+ tokensUsed,
233
+ turnsUsed,
234
+ reason: `tool call budget exhausted (${toolCallCount} >= ${input.budget.maxToolCalls})`,
235
+ };
236
+ }
237
+ toolCallCount += 1;
238
+ input.hooks?.onToolCall?.(call);
239
+ try {
240
+ const result = await input.executor({
241
+ name: call.name,
242
+ arguments: call.arguments,
243
+ callId: call.id,
244
+ });
245
+ input.hooks?.onToolResult?.({ id: call.id, name: call.name }, { ok: true, content: result });
246
+ transcript.push({
247
+ role: 'tool',
248
+ content: result,
249
+ toolCallId: call.id,
250
+ toolName: call.name,
251
+ });
252
+ }
253
+ catch (error) {
254
+ const message = error instanceof Error ? error.message : String(error);
255
+ input.hooks?.onToolResult?.({ id: call.id, name: call.name }, { ok: false, error: message });
256
+ // Plan-mode refusals surface as a distinct outcome so the CLI can
257
+ // mark the run blocked rather than failed. The executor MUST raise
258
+ // an Error whose message starts with the sentinel below for plan
259
+ // refusals; any other thrown error is treated as a recoverable
260
+ // tool error and fed back to the model.
261
+ if (message.startsWith('PLAN_MODE_REFUSED:')) {
262
+ return {
263
+ status: 'tool_refused',
264
+ finalText: '',
265
+ toolCallCount,
266
+ tokensUsed,
267
+ turnsUsed,
268
+ reason: message,
269
+ };
270
+ }
271
+ transcript.push({
272
+ role: 'tool',
273
+ content: `error: ${message}`,
274
+ toolCallId: call.id,
275
+ toolName: call.name,
276
+ });
277
+ }
278
+ }
279
+ }
280
+ }
281
+ /* ------------------------------------------------------------------ */
282
+ /* Wire format: POST /api/pugi/engine */
283
+ /* ------------------------------------------------------------------ */
284
+ /**
285
+ * Server wire request — what `AnvilEngineLoopClient` POSTs to
286
+ * `POST /api/pugi/engine` on every turn. Sprint 2E proxy endpoint
287
+ * mirrors this Zod schema admin-api-side so the contract has a single
288
+ * source of truth.
289
+ *
290
+ * Required fields:
291
+ * - `messages` — transcript so far (system + user + assistant + tool).
292
+ * - `tools` — tool registry the runtime is allowed to invoke for this
293
+ * turn. The CLI strips mutating tools when `command === 'plan'`;
294
+ * the server defends against forged bodies via a second-layer check.
295
+ * - `personaSlug` — persona to invoke; the server uses this for
296
+ * persona system-prompt injection + consensus-tier resolution.
297
+ *
298
+ * Optional fields (the CLI only supplies a subset today; the schema
299
+ * accepts every documented knob so Sprint 3+ tooling can opt in
300
+ * without a contract change):
301
+ * - `command` — engine command kind. When present the server picks a
302
+ * per-command model from `PUGI_ENGINE_MODEL_<COMMAND>` env or
303
+ * hardcoded default. When absent the server falls back to the
304
+ * persona's `defaultModel`.
305
+ * - `model` — explicit model override. Wins over `command` resolution.
306
+ * Useful for tier-aware operators who want to pin a model.
307
+ * - `maxTokens` — upper bound on completion size for this turn.
308
+ * - `temperature` — sampling temperature for this turn.
309
+ */
310
+ export const engineLoopServerRequestSchema = z.object({
311
+ personaSlug: z.string().min(1).max(128),
312
+ messages: z.array(engineLoopMessageSchema).min(1),
313
+ tools: z.array(engineLoopToolSchema).max(64),
314
+ command: engineCommandKindSchema.optional(),
315
+ model: z.string().min(1).max(256).optional(),
316
+ maxTokens: z.number().int().positive().max(200_000).optional(),
317
+ temperature: z.number().min(0).max(2).optional(),
318
+ });
319
+ /**
320
+ * Server wire response — what the admin-api Sprint 2E endpoint returns
321
+ * for every turn. The shape matches what `AnvilEngineLoopClient` parses:
322
+ *
323
+ * - `stop === 'text'` — model produced a final answer, loop terminates.
324
+ * - `stop === 'tool_use'` — model emitted `toolCalls`, CLI executes
325
+ * them locally and feeds results back next turn.
326
+ * - `stop === 'length'` — completion truncated by `maxTokens`. The
327
+ * CLI treats this as final text and stops; surface partial content.
328
+ */
329
+ export const engineLoopServerResponseSchema = z.object({
330
+ stop: z.enum(['text', 'tool_use', 'length']),
331
+ content: z.string(),
332
+ toolCalls: z
333
+ .array(z.object({
334
+ id: z.string().min(1),
335
+ name: z.string().min(1),
336
+ arguments: z.string(),
337
+ }))
338
+ .optional(),
339
+ tokensUsed: z.number().int().nonnegative(),
340
+ model: z.string().min(1),
341
+ });
342
+ //# sourceMappingURL=engine-loop.js.map