@pugi/sdk 0.1.0-alpha.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,457 @@
1
+ /**
2
+ * Engine loop protocol — the Pugi CLI's tool-use loop driver.
3
+ *
4
+ * The CLI's `NativePugiEngineAdapter` runs a structured tool-use loop against
5
+ * Anvil. Each turn the CLI sends the conversation transcript + a tools schema
6
+ * to the runtime; the runtime returns either a final text answer or a list of
7
+ * tool calls. The CLI executes the calls locally (read/write/edit/grep/glob/
8
+ * bash) against the workspace and feeds the results back in the next turn.
9
+ *
10
+ * This module defines the contracts shared by:
11
+ * - CLI side: the loop driver (`runEngineLoop`) + budget enforcement.
12
+ * - Runtime side: a thin proxy in front of `AnvilBridgeService.askPersona`.
13
+ * - Tests: a fixture-based `EngineLoopClient` that returns canned responses
14
+ * so the loop can be exercised without network.
15
+ *
16
+ * Local-first contract (ADR-0037):
17
+ * - The CLI is the only side that touches the filesystem. The runtime
18
+ * never sees raw file bytes — only the tool results that the local
19
+ * loop chooses to surface back into the transcript.
20
+ * - Budgets (`maxToolCalls`, `maxTokens`) are enforced client-side so a
21
+ * runaway model cannot rack up Anvil cost without the operator noticing.
22
+ * - The loop refuses to write/edit/bash when the command kind is `plan`.
23
+ *
24
+ * Why OpenAI-compatible shape (instead of Anthropic's tool_use blocks):
25
+ * - Anvil's chat-completions endpoint is OpenAI-compatible; coercing to
26
+ * OpenAI-style `tools` + `tool_calls` matches the upstream wire format
27
+ * exactly. Providers that natively speak Anthropic (Claude) are wrapped
28
+ * by Anvil's bridge layer — that translation is not the CLI's concern.
29
+ */
30
+ import { z } from 'zod';
31
+ /**
32
+ * Command surface that the CLI invokes. The runtime uses this to select a
33
+ * system prompt and persona behaviour:
34
+ * - `code` — general edit+create. Budget: 20 tool calls / 50k tokens.
35
+ * - `explain` — read-only walkthrough. Budget: 5 / 20k.
36
+ * - `fix` — bug investigation + targeted patch. Budget: 20 / 50k.
37
+ * - `plan` — produce a plan artifact, no mutations. Budget: 3 / 30k.
38
+ * Mutating tools refused even if the model requests them.
39
+ * - `build` — multi-file scaffolding. Budget: 30 / 80k.
40
+ */
41
+ export declare const engineCommandKindSchema: z.ZodEnum<["code", "explain", "fix", "plan", "build"]>;
42
+ export type EngineCommandKind = z.infer<typeof engineCommandKindSchema>;
43
+ /**
44
+ * Per-command budget envelope. Hard caps enforced inside `runEngineLoop`:
45
+ * - `maxToolCalls` — total executed tool calls across all turns.
46
+ * - `maxTokens` — total tokens accumulated (prompt + completion) across
47
+ * turns. Counted via `usage.totalTokens` reported by the runtime; when
48
+ * the runtime reports `tokensUsed === 0` we fall back to a
49
+ * `transcript-chars / 4` heuristic so a runtime that omits usage
50
+ * accounting (older Anvil builds, fixture clients, providers that
51
+ * return null usage on tool_use responses) still trips the budget
52
+ * instead of looping forever. Code Reviewer P2 retro 2026-05-23.
53
+ *
54
+ * The loop terminates with `status: 'budget_exhausted'` when either cap is
55
+ * exceeded. The caller decides whether that is a failure or a normal stop.
56
+ */
57
+ export declare const engineBudgetSchema: z.ZodObject<{
58
+ maxToolCalls: z.ZodNumber;
59
+ maxTokens: z.ZodNumber;
60
+ }, "strip", z.ZodTypeAny, {
61
+ maxToolCalls: number;
62
+ maxTokens: number;
63
+ }, {
64
+ maxToolCalls: number;
65
+ maxTokens: number;
66
+ }>;
67
+ export type EngineBudget = z.infer<typeof engineBudgetSchema>;
68
+ /**
69
+ * Canonical per-command budgets. Tuned to keep Anvil cost predictable while
70
+ * still giving `build` enough headroom to scaffold a small feature.
71
+ *
72
+ * code/fix → 20 calls / 50k tokens
73
+ * explain → 5 calls / 20k tokens
74
+ * plan → 8 calls / 30k tokens (read-only)
75
+ * build → 30 calls / 80k tokens
76
+ *
77
+ * Dogfood note 2026-05-24: `plan` was originally budgeted at 3 tool calls
78
+ * on the assumption that the model would issue 1-2 read calls + emit the
79
+ * plan. Real-world traces show 3-4 glob/grep calls disappear into repo
80
+ * surveying alone — the model produces zero plan output and the artifact
81
+ * file says `[budget_exhausted]`. Bumping to 8 buys breathing room for
82
+ * decently-sized repos while still bounding cost. plan stays read-only at
83
+ * the sentinel level — the call-count change does not weaken safety.
84
+ */
85
+ export declare const defaultEngineBudgets: Record<EngineCommandKind, EngineBudget>;
86
+ /**
87
+ * Message role shape — mirrors OpenAI's chat-completions schema with a
88
+ * `tool` role for tool result frames. Pugi's runtime proxy maps these to
89
+ * AnvilBridgeMessage (which has the same shape modulo `name` carrying the
90
+ * tool_call_id for tool frames).
91
+ */
92
+ export declare const engineLoopMessageSchema: z.ZodObject<{
93
+ role: z.ZodEnum<["system", "user", "assistant", "tool"]>;
94
+ content: z.ZodString;
95
+ /** Optional model-emitted tool calls when `role === 'assistant'`. */
96
+ toolCalls: z.ZodOptional<z.ZodArray<z.ZodObject<{
97
+ id: z.ZodString;
98
+ name: z.ZodString;
99
+ arguments: z.ZodString;
100
+ }, "strip", z.ZodTypeAny, {
101
+ id: string;
102
+ name: string;
103
+ arguments: string;
104
+ }, {
105
+ id: string;
106
+ name: string;
107
+ arguments: string;
108
+ }>, "many">>;
109
+ /** Tool call id this `tool` frame is responding to. */
110
+ toolCallId: z.ZodOptional<z.ZodString>;
111
+ /** Tool name this `tool` frame is responding to. */
112
+ toolName: z.ZodOptional<z.ZodString>;
113
+ }, "strip", z.ZodTypeAny, {
114
+ role: "system" | "user" | "assistant" | "tool";
115
+ content: string;
116
+ toolCalls?: {
117
+ id: string;
118
+ name: string;
119
+ arguments: string;
120
+ }[] | undefined;
121
+ toolCallId?: string | undefined;
122
+ toolName?: string | undefined;
123
+ }, {
124
+ role: "system" | "user" | "assistant" | "tool";
125
+ content: string;
126
+ toolCalls?: {
127
+ id: string;
128
+ name: string;
129
+ arguments: string;
130
+ }[] | undefined;
131
+ toolCallId?: string | undefined;
132
+ toolName?: string | undefined;
133
+ }>;
134
+ export type EngineLoopMessage = z.infer<typeof engineLoopMessageSchema>;
135
+ /**
136
+ * OpenAI-compatible tool definition. The CLI builds this from
137
+ * `toolRegistry`. `parameters` is a JSON Schema object — we keep it as
138
+ * `unknown` here so the SDK stays JSON-Schema-version-agnostic.
139
+ */
140
+ export declare const engineLoopToolSchema: z.ZodObject<{
141
+ name: z.ZodString;
142
+ description: z.ZodString;
143
+ parameters: z.ZodUnknown;
144
+ }, "strip", z.ZodTypeAny, {
145
+ name: string;
146
+ description: string;
147
+ parameters?: unknown;
148
+ }, {
149
+ name: string;
150
+ description: string;
151
+ parameters?: unknown;
152
+ }>;
153
+ export type EngineLoopTool = z.infer<typeof engineLoopToolSchema>;
154
+ /**
155
+ * Response shape returned by `EngineLoopClient.send`. Exactly one of the
156
+ * three terminal states fires per turn:
157
+ * - `tool_use`: model emitted N tool calls; loop must execute and feed
158
+ * results back.
159
+ * - `text`: model emitted a final text answer; loop stops.
160
+ * - `error`: runtime call failed (auth, network, schema). Loop stops
161
+ * with `status: 'failed'` so the caller surfaces the cause.
162
+ */
163
+ export type EngineLoopResponse = {
164
+ stop: 'tool_use';
165
+ assistantMessage: EngineLoopMessage;
166
+ tokensUsed: number;
167
+ } | {
168
+ stop: 'text';
169
+ assistantMessage: EngineLoopMessage;
170
+ content: string;
171
+ tokensUsed: number;
172
+ } | {
173
+ stop: 'error';
174
+ code: 'auth_missing' | 'endpoint_missing' | 'rate_limited' | 'failed';
175
+ message: string;
176
+ };
177
+ /**
178
+ * Pluggable transport. Production binds this to an Anvil-backed HTTP
179
+ * client; tests bind a fixture client. The interface is intentionally
180
+ * narrow — just `send(messages, tools, options)` — so the loop driver
181
+ * does not depend on whether the runtime is HTTP, gRPC, or in-process.
182
+ */
183
+ export interface EngineLoopClient {
184
+ send(messages: EngineLoopMessage[], tools: EngineLoopTool[], options: {
185
+ personaSlug: string;
186
+ maxTokens?: number;
187
+ temperature?: number;
188
+ signal?: AbortSignal;
189
+ }): Promise<EngineLoopResponse>;
190
+ }
191
+ /**
192
+ * Tool execution callback. Returns the textual result to feed back into
193
+ * the next turn's `tool` message. Throws → execution failed (the CLI
194
+ * captures the error string and feeds it back so the model can recover).
195
+ */
196
+ export type EngineLoopToolExecutor = (input: {
197
+ name: string;
198
+ arguments: string;
199
+ callId: string;
200
+ }) => Promise<string>;
201
+ /**
202
+ * Lifecycle hooks. The CLI side wires these to `session.ts`
203
+ * recordToolCall/recordToolResult so the existing event log + index
204
+ * reflects every loop iteration.
205
+ */
206
+ export interface EngineLoopHooks {
207
+ onTurnStart?(turnIndex: number, messageCount: number): void;
208
+ onTurnComplete?(turnIndex: number, response: EngineLoopResponse): void;
209
+ onToolCall?(call: {
210
+ id: string;
211
+ name: string;
212
+ arguments: string;
213
+ }): void;
214
+ onToolResult?(call: {
215
+ id: string;
216
+ name: string;
217
+ }, result: {
218
+ ok: true;
219
+ content: string;
220
+ } | {
221
+ ok: false;
222
+ error: string;
223
+ }): void;
224
+ }
225
+ /**
226
+ * Terminal status of the loop driver. The CLI maps these to the existing
227
+ * `EngineResult.status` ('done' | 'blocked' | 'failed'):
228
+ * - completed → done
229
+ * - budget_exhausted → blocked (operator chose the budget)
230
+ * - tool_refused → blocked (plan mode + write requested)
231
+ * - failed → failed (runtime/transport problem)
232
+ */
233
+ export type EngineLoopStatus = 'completed' | 'budget_exhausted' | 'tool_refused' | 'failed';
234
+ export interface EngineLoopOutcome {
235
+ status: EngineLoopStatus;
236
+ /** Final assistant text (empty string on failure). */
237
+ finalText: string;
238
+ /** Number of tool calls actually executed. */
239
+ toolCallCount: number;
240
+ /** Cumulative tokens reported by the runtime. */
241
+ tokensUsed: number;
242
+ /** Number of LLM turns (network round-trips) used. */
243
+ turnsUsed: number;
244
+ /** Failure reason when `status !== 'completed'`. */
245
+ reason?: string;
246
+ }
247
+ /**
248
+ * Core driver. Pure transport-agnostic loop:
249
+ *
250
+ * 1. Prepend system + user messages.
251
+ * 2. Call `client.send(transcript, tools)`.
252
+ * 3. If response is `text` → return completed.
253
+ * 4. If response is `tool_use` → execute each call via `executor`,
254
+ * append the assistant + tool frames to the transcript, increment
255
+ * counters, loop.
256
+ * 5. After every turn check budgets; bail if exceeded.
257
+ *
258
+ * No filesystem access lives here — the CLI's `engine-tools.ts` is the
259
+ * sole place that touches disk. Keeping the loop pure makes it trivial
260
+ * to unit-test with a fixture client.
261
+ */
262
+ export declare function runEngineLoop(input: {
263
+ client: EngineLoopClient;
264
+ executor: EngineLoopToolExecutor;
265
+ systemPrompt: string;
266
+ userPrompt: string;
267
+ tools: EngineLoopTool[];
268
+ budget: EngineBudget;
269
+ personaSlug: string;
270
+ hooks?: EngineLoopHooks;
271
+ temperature?: number;
272
+ signal?: AbortSignal;
273
+ }): Promise<EngineLoopOutcome>;
274
+ /**
275
+ * Server wire request — what `AnvilEngineLoopClient` POSTs to
276
+ * `POST /api/pugi/engine` on every turn. Sprint 2E proxy endpoint
277
+ * mirrors this Zod schema admin-api-side so the contract has a single
278
+ * source of truth.
279
+ *
280
+ * Required fields:
281
+ * - `messages` — transcript so far (system + user + assistant + tool).
282
+ * - `tools` — tool registry the runtime is allowed to invoke for this
283
+ * turn. The CLI strips mutating tools when `command === 'plan'`;
284
+ * the server defends against forged bodies via a second-layer check.
285
+ * - `personaSlug` — persona to invoke; the server uses this for
286
+ * persona system-prompt injection + consensus-tier resolution.
287
+ *
288
+ * Optional fields (the CLI only supplies a subset today; the schema
289
+ * accepts every documented knob so Sprint 3+ tooling can opt in
290
+ * without a contract change):
291
+ * - `command` — engine command kind. When present the server picks a
292
+ * per-command model from `PUGI_ENGINE_MODEL_<COMMAND>` env or
293
+ * hardcoded default. When absent the server falls back to the
294
+ * persona's `defaultModel`.
295
+ * - `model` — explicit model override. Wins over `command` resolution.
296
+ * Useful for tier-aware operators who want to pin a model.
297
+ * - `maxTokens` — upper bound on completion size for this turn.
298
+ * - `temperature` — sampling temperature for this turn.
299
+ */
300
+ export declare const engineLoopServerRequestSchema: z.ZodObject<{
301
+ personaSlug: z.ZodString;
302
+ messages: z.ZodArray<z.ZodObject<{
303
+ role: z.ZodEnum<["system", "user", "assistant", "tool"]>;
304
+ content: z.ZodString;
305
+ /** Optional model-emitted tool calls when `role === 'assistant'`. */
306
+ toolCalls: z.ZodOptional<z.ZodArray<z.ZodObject<{
307
+ id: z.ZodString;
308
+ name: z.ZodString;
309
+ arguments: z.ZodString;
310
+ }, "strip", z.ZodTypeAny, {
311
+ id: string;
312
+ name: string;
313
+ arguments: string;
314
+ }, {
315
+ id: string;
316
+ name: string;
317
+ arguments: string;
318
+ }>, "many">>;
319
+ /** Tool call id this `tool` frame is responding to. */
320
+ toolCallId: z.ZodOptional<z.ZodString>;
321
+ /** Tool name this `tool` frame is responding to. */
322
+ toolName: z.ZodOptional<z.ZodString>;
323
+ }, "strip", z.ZodTypeAny, {
324
+ role: "system" | "user" | "assistant" | "tool";
325
+ content: string;
326
+ toolCalls?: {
327
+ id: string;
328
+ name: string;
329
+ arguments: string;
330
+ }[] | undefined;
331
+ toolCallId?: string | undefined;
332
+ toolName?: string | undefined;
333
+ }, {
334
+ role: "system" | "user" | "assistant" | "tool";
335
+ content: string;
336
+ toolCalls?: {
337
+ id: string;
338
+ name: string;
339
+ arguments: string;
340
+ }[] | undefined;
341
+ toolCallId?: string | undefined;
342
+ toolName?: string | undefined;
343
+ }>, "many">;
344
+ tools: z.ZodArray<z.ZodObject<{
345
+ name: z.ZodString;
346
+ description: z.ZodString;
347
+ parameters: z.ZodUnknown;
348
+ }, "strip", z.ZodTypeAny, {
349
+ name: string;
350
+ description: string;
351
+ parameters?: unknown;
352
+ }, {
353
+ name: string;
354
+ description: string;
355
+ parameters?: unknown;
356
+ }>, "many">;
357
+ command: z.ZodOptional<z.ZodEnum<["code", "explain", "fix", "plan", "build"]>>;
358
+ model: z.ZodOptional<z.ZodString>;
359
+ maxTokens: z.ZodOptional<z.ZodNumber>;
360
+ temperature: z.ZodOptional<z.ZodNumber>;
361
+ }, "strip", z.ZodTypeAny, {
362
+ personaSlug: string;
363
+ messages: {
364
+ role: "system" | "user" | "assistant" | "tool";
365
+ content: string;
366
+ toolCalls?: {
367
+ id: string;
368
+ name: string;
369
+ arguments: string;
370
+ }[] | undefined;
371
+ toolCallId?: string | undefined;
372
+ toolName?: string | undefined;
373
+ }[];
374
+ tools: {
375
+ name: string;
376
+ description: string;
377
+ parameters?: unknown;
378
+ }[];
379
+ maxTokens?: number | undefined;
380
+ command?: "code" | "explain" | "fix" | "plan" | "build" | undefined;
381
+ model?: string | undefined;
382
+ temperature?: number | undefined;
383
+ }, {
384
+ personaSlug: string;
385
+ messages: {
386
+ role: "system" | "user" | "assistant" | "tool";
387
+ content: string;
388
+ toolCalls?: {
389
+ id: string;
390
+ name: string;
391
+ arguments: string;
392
+ }[] | undefined;
393
+ toolCallId?: string | undefined;
394
+ toolName?: string | undefined;
395
+ }[];
396
+ tools: {
397
+ name: string;
398
+ description: string;
399
+ parameters?: unknown;
400
+ }[];
401
+ maxTokens?: number | undefined;
402
+ command?: "code" | "explain" | "fix" | "plan" | "build" | undefined;
403
+ model?: string | undefined;
404
+ temperature?: number | undefined;
405
+ }>;
406
+ export type EngineLoopServerRequest = z.infer<typeof engineLoopServerRequestSchema>;
407
+ /**
408
+ * Server wire response — what the admin-api Sprint 2E endpoint returns
409
+ * for every turn. The shape matches what `AnvilEngineLoopClient` parses:
410
+ *
411
+ * - `stop === 'text'` — model produced a final answer, loop terminates.
412
+ * - `stop === 'tool_use'` — model emitted `toolCalls`, CLI executes
413
+ * them locally and feeds results back next turn.
414
+ * - `stop === 'length'` — completion truncated by `maxTokens`. The
415
+ * CLI treats this as final text and stops; surface partial content.
416
+ */
417
+ export declare const engineLoopServerResponseSchema: z.ZodObject<{
418
+ stop: z.ZodEnum<["text", "tool_use", "length"]>;
419
+ content: z.ZodString;
420
+ toolCalls: z.ZodOptional<z.ZodArray<z.ZodObject<{
421
+ id: z.ZodString;
422
+ name: z.ZodString;
423
+ arguments: z.ZodString;
424
+ }, "strip", z.ZodTypeAny, {
425
+ id: string;
426
+ name: string;
427
+ arguments: string;
428
+ }, {
429
+ id: string;
430
+ name: string;
431
+ arguments: string;
432
+ }>, "many">>;
433
+ tokensUsed: z.ZodNumber;
434
+ model: z.ZodString;
435
+ }, "strip", z.ZodTypeAny, {
436
+ content: string;
437
+ model: string;
438
+ stop: "length" | "text" | "tool_use";
439
+ tokensUsed: number;
440
+ toolCalls?: {
441
+ id: string;
442
+ name: string;
443
+ arguments: string;
444
+ }[] | undefined;
445
+ }, {
446
+ content: string;
447
+ model: string;
448
+ stop: "length" | "text" | "tool_use";
449
+ tokensUsed: number;
450
+ toolCalls?: {
451
+ id: string;
452
+ name: string;
453
+ arguments: string;
454
+ }[] | undefined;
455
+ }>;
456
+ export type EngineLoopServerResponse = z.infer<typeof engineLoopServerResponseSchema>;
457
+ //# sourceMappingURL=engine-loop.d.ts.map