@providerprotocol/agents 0.0.2 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (74) hide show
  1. package/LICENSE +21 -0
  2. package/dist/checkpoint/index.d.ts +43 -0
  3. package/dist/checkpoint/index.js +73 -0
  4. package/dist/checkpoint/index.js.map +1 -0
  5. package/{src/execution/loop.ts → dist/chunk-4ESYN66B.js} +54 -162
  6. package/dist/chunk-4ESYN66B.js.map +1 -0
  7. package/dist/chunk-EKRXMSDX.js +8 -0
  8. package/dist/chunk-EKRXMSDX.js.map +1 -0
  9. package/dist/chunk-T47B3VAF.js +427 -0
  10. package/dist/chunk-T47B3VAF.js.map +1 -0
  11. package/dist/execution/index.d.ts +105 -0
  12. package/dist/execution/index.js +679 -0
  13. package/dist/execution/index.js.map +1 -0
  14. package/dist/index-qsPwbY86.d.ts +65 -0
  15. package/dist/index.d.ts +101 -0
  16. package/dist/index.js +218 -0
  17. package/dist/index.js.map +1 -0
  18. package/dist/middleware/index.d.ts +23 -0
  19. package/dist/middleware/index.js +82 -0
  20. package/dist/middleware/index.js.map +1 -0
  21. package/dist/thread-tree/index.d.ts +115 -0
  22. package/dist/thread-tree/index.js +4 -0
  23. package/dist/thread-tree/index.js.map +1 -0
  24. package/dist/types-2Vsthzyu.d.ts +163 -0
  25. package/dist/types-BiyEVOnf.d.ts +65 -0
  26. package/dist/types-D1egxttz.d.ts +270 -0
  27. package/dist/types-DChRdQoX.d.ts +98 -0
  28. package/package.json +41 -9
  29. package/.claude/settings.local.json +0 -29
  30. package/AGENTS.md +0 -681
  31. package/CLAUDE.md +0 -681
  32. package/bun.lock +0 -472
  33. package/eslint.config.js +0 -75
  34. package/index.ts +0 -1
  35. package/llms.md +0 -796
  36. package/specs/UAP-1.0.md +0 -2355
  37. package/src/agent/index.ts +0 -384
  38. package/src/agent/types.ts +0 -91
  39. package/src/checkpoint/file.ts +0 -126
  40. package/src/checkpoint/index.ts +0 -40
  41. package/src/checkpoint/types.ts +0 -95
  42. package/src/execution/index.ts +0 -37
  43. package/src/execution/plan.ts +0 -497
  44. package/src/execution/react.ts +0 -340
  45. package/src/execution/tool-ordering.ts +0 -186
  46. package/src/execution/types.ts +0 -315
  47. package/src/index.ts +0 -80
  48. package/src/middleware/index.ts +0 -7
  49. package/src/middleware/logging.ts +0 -123
  50. package/src/middleware/types.ts +0 -69
  51. package/src/state/index.ts +0 -301
  52. package/src/state/types.ts +0 -173
  53. package/src/thread-tree/index.ts +0 -249
  54. package/src/thread-tree/types.ts +0 -29
  55. package/src/utils/uuid.ts +0 -7
  56. package/tests/live/agent-anthropic.test.ts +0 -288
  57. package/tests/live/agent-strategy-hooks.test.ts +0 -268
  58. package/tests/live/checkpoint.test.ts +0 -243
  59. package/tests/live/execution-strategies.test.ts +0 -255
  60. package/tests/live/plan-strategy.test.ts +0 -160
  61. package/tests/live/subagent-events.live.test.ts +0 -249
  62. package/tests/live/thread-tree.test.ts +0 -186
  63. package/tests/unit/agent.test.ts +0 -703
  64. package/tests/unit/checkpoint.test.ts +0 -232
  65. package/tests/unit/execution/equivalence.test.ts +0 -402
  66. package/tests/unit/execution/loop.test.ts +0 -437
  67. package/tests/unit/execution/plan.test.ts +0 -590
  68. package/tests/unit/execution/react.test.ts +0 -604
  69. package/tests/unit/execution/subagent-events.test.ts +0 -235
  70. package/tests/unit/execution/tool-ordering.test.ts +0 -310
  71. package/tests/unit/middleware/logging.test.ts +0 -276
  72. package/tests/unit/state.test.ts +0 -573
  73. package/tests/unit/thread-tree.test.ts +0 -249
  74. package/tsconfig.json +0 -29
package/specs/UAP-1.0.md DELETED
@@ -1,2355 +0,0 @@
1
- # UAP-1.0: Unified Agent Protocol Specification
2
-
3
- **Version:** 1.0.0
4
- **Status:** Approved
5
- **Built on:** UPP-1.2 (Unified Provider Protocol)
6
- **Authors:** UAP Working Group
7
-
8
- ---
9
-
10
- ## Abstract
11
-
12
- The Unified Agent Protocol (UAP) is a specification for building AI agents on top of the Unified Provider Protocol (UPP-1.2). This document defines the protocol semantics, data structures, and implementation requirements for building UAP-compliant agents, sessions, and execution strategies.
13
-
14
- UAP extends UPP-1.2 with agent-level abstractions including decoupled execution strategies (ReAct, Plan, Loop), functional state management, sub-agent composition, and middleware pipelines. UAP preserves complete type uniformity with UPP-1.2, using all types from `@providerprotocol/ai` directly without abstraction or re-export.
15
-
16
- **Core Philosophy:** UAP is a pipe, not a nanny. The protocol provides orchestration primitives; the developer provides the constraints.
17
-
18
- ---
19
-
20
- ## Table of Contents
21
-
22
- 1. [Introduction](#1-introduction)
23
- 2. [Design Principles](#2-design-principles)
24
- 3. [Core Concepts](#3-core-concepts)
25
- 4. [Agent Interface](#4-agent-interface)
26
- 5. [Execution Strategies](#5-execution-strategies)
27
- 6. [Functional State Management](#6-functional-state-management)
28
- 7. [Thread Trees](#7-thread-trees)
29
- 8. [Sub-Agent Protocol](#8-sub-agent-protocol)
30
- 9. [Middleware](#9-middleware)
31
- 10. [Agent Strategy Hooks](#10-agent-strategy-hooks)
32
- - [10.4 ToolUseStrategy (UPP Passthrough)](#104-toolusestrategy-upp-passthrough)
33
- 11. [Streaming](#11-streaming)
34
- 12. [Serialization](#12-serialization)
35
- - [12.4 Checkpointing](#124-checkpointing)
36
- 13. [Data Type Definitions](#13-data-type-definitions)
37
- 14. [Conformance](#14-conformance)
38
- 15. [Security Considerations](#15-security-considerations)
39
-
40
- ---
41
-
42
- ## 1. Introduction
43
-
44
- ### 1.1 Purpose
45
-
46
- AI agents require orchestration beyond simple LLM inference. UAP-1.0 establishes a standard protocol that:
47
-
48
- - Provides agent abstractions built on UPP-1.2 primitives
49
- - Decouples execution strategies from agent definitions
50
- - Uses functional state management with explicit data flow
51
- - Supports hierarchical agent composition (sub-agents)
52
- - Provides middleware for cross-cutting concerns
53
- - Maintains complete type uniformity with the underlying LLM library
54
- - Places full control and responsibility with the developer
55
-
56
- ### 1.2 Scope
57
-
58
- This specification covers:
59
-
60
- - The `agent()` function interface for defining agents
61
- - Execution strategies (`react()`, `plan()`, `loop()`)
62
- - Functional state management with explicit state passing
63
- - Thread tree structures for branching conversations
64
- - Sub-agent communication via `ask()` and `query()` methods
65
- - Middleware composition
66
- - Agent strategy hooks
67
- - Serialization format for persistence
68
-
69
- ### 1.3 Relationship to UPP-1.2
70
-
71
- UAP-1.0 builds on UPP-1.2 and MUST use the following types directly from `@providerprotocol/ai`:
72
-
73
- - `llm`, `LLMInstance`, `LLMOptions`
74
- - `Thread`, `Turn`, `TokenUsage`
75
- - `Message`, `UserMessage`, `AssistantMessage`, `ToolResultMessage`
76
- - `Tool`, `ToolCall`, `ToolResult`, `ToolExecution`
77
- - `ToolUseStrategy` (passed through to `llm()` for tool execution hooks)
78
- - `StreamResult`, `StreamEvent`, `StreamEventType`
79
- - `UPPError`, `ErrorCode`
80
- - All provider factories (`anthropic`, `openai`, `google`, etc.)
81
-
82
- UAP MUST NOT re-export, wrap, or abstract these types. Applications import directly from `@providerprotocol/ai` for these types.
83
-
84
- ### 1.4 Terminology
85
-
86
- | Term | Definition |
87
- |------|------------|
88
- | **Agent** | An AI entity with a model, execution strategy, tools, and optional sub-agents |
89
- | **AgentState** | An immutable snapshot of agent execution state |
90
- | **Step** | A single cycle of an execution strategy (reason-act-observe in ReAct) |
91
- | **Sub-Agent** | An agent declared as a tool dependency for another agent |
92
- | **Middleware** | A composable function that wraps agent execution |
93
- | **Turn** | A UPP Turn representing the complete result of one LLM inference call |
94
- | **Thread Tree** | A tree-structured collection of threads with parent-child relationships |
95
-
96
- ### 1.5 Requirements Language
97
-
98
- The key words "MUST", "MUST NOT", "REQUIRED", "SHALL", "SHALL NOT", "SHOULD", "SHOULD NOT", "RECOMMENDED", "MAY", and "OPTIONAL" in this document are to be interpreted as described in RFC 2119.
99
-
100
- ### 1.6 Notation Conventions
101
-
102
- This specification uses language-agnostic pseudocode for examples. The pseudocode follows these conventions:
103
-
104
- - Function calls: `function_name(arg1, arg2)`
105
- - Object/map literals: `{key: value, key2: value2}`
106
- - Array/list literals: `[item1, item2, item3]`
107
- - Property access: `object.property`
108
- - Method calls: `object.method(args)`
109
- - Async operations: `await expression`
110
- - Iteration: `for item in collection`
111
- - Type annotations: `variable: Type`
112
- - Optional values: `Type?` or `Optional<Type>`
113
- - Comments: `// comment text`
114
-
115
- ### 1.7 Code Examples
116
-
117
- Code examples in this specification use the placeholder package name `agents`. Implementations MUST choose an appropriate package name for their ecosystem:
118
-
119
- | Language | Example Package Name |
120
- |----------|---------------------|
121
- | JavaScript/TypeScript | `@providerprotocol/agents` |
122
- | Python | `providerprotocol-agents` |
123
- | Go | `github.com/providerprotocol/agents` |
124
- | Rust | `providerprotocol-agents` |
125
-
126
- Import examples throughout this specification use JavaScript-style imports for readability:
127
-
128
- ```pseudocode
129
- // Agent SDK imports
130
- import { agent } from "agents"
131
- import { react, plan, loop } from "agents/execution"
132
- import { logging } from "agents/middleware"
133
-
134
- // UPP-1.2 imports (used directly, never re-exported)
135
- import { llm, Thread, UserMessage, Tool } from "upp"
136
- import anthropic from "upp/anthropic"
137
- ```
138
-
139
- ---
140
-
141
- ## 2. Design Principles
142
-
143
- ### 2.1 Type Uniformity with UPP-1.2
144
-
145
- UAP MUST NOT create abstractions around UPP-1.2 types. All data flows through standard UPP types directly. This ensures:
146
-
147
- - No impedance mismatch between agent and LLM layers
148
- - Full access to provider-specific features
149
- - No data truncation or morphing
150
- - Transparent debugging and logging
151
-
152
- ```pseudocode
153
- // CORRECT: Use UPP types directly
154
- import { Thread, Turn, UserMessage } from "upp"
155
-
156
- { turn, state } = await agent.generate("Hello", initialState)
157
- // turn is standard UPP Turn, state is explicit AgentState
158
-
159
- // INCORRECT: Creating wrapper types
160
- import { AgentTurn } from "agents" // DO NOT DO THIS
161
- ```
162
-
163
- **Rationale:** Wrapping library types creates maintenance burden, obscures debugging, and prevents access to provider-specific metadata. UAP agents operate on the same data structures as raw LLM calls.
164
-
165
- ### 2.2 Functional State Management
166
-
167
- UAP adopts a functional state pattern. Agent execution is a pure transformation:
168
-
169
- ```
170
- (Input, State) -> (Turn, NewState)
171
- ```
172
-
173
- **Core Requirements:**
174
-
175
- - `AgentState` is immutable—each operation returns a new state
176
- - State is explicitly passed and returned, never mutated internally
177
- - No "ghost history" from hidden state accumulation
178
- - Developer controls what state persists between calls
179
-
180
- ```pseudocode
181
- // Explicit state flow
182
- state0 = AgentState.initial()
183
-
184
- { turn: turn1, state: state1 } = await agent.generate("First message", state0)
185
- { turn: turn2, state: state2 } = await agent.generate("Second message", state1)
186
-
187
- // State is explicit and inspectable
188
- print(state2.messages.length) // Developer knows exactly what's there
189
- print(state2.step) // Current step count
190
-
191
- // Branching is trivial—just use different states
192
- { turn: turn3a, state: state3a } = await agent.generate("Option A", state1)
193
- { turn: turn3b, state: state3b } = await agent.generate("Option B", state1)
194
- ```
195
-
196
- **Rationale:** Implicit state mutation violates UPP's "Explicit Over Magic" principle. Functional state makes data flow visible, debugging tractable, and serialization trivial.
197
-
198
- ### 2.3 Decoupled Execution
199
-
200
- Execution strategies are separate from agent definitions:
201
-
202
- - Agents define WHAT (model, tools, system prompt)
203
- - Strategies define HOW (ReAct loop, plan-then-execute, simple loop)
204
- - Strategies are interchangeable without changing agent definition
205
-
206
- ```pseudocode
207
- // Same agent definition, different execution strategies
208
- const coder = agent({
209
- model: anthropic("claude-sonnet-4-20250514"),
210
- tools: [Bash, Read, Write],
211
- system: "You are a coding assistant.",
212
- })
213
-
214
- // Use ReAct for complex reasoning
215
- const reactCoder = agent({ ...coder, execution: react() })
216
-
217
- // Use simple loop for straightforward tasks
218
- const loopCoder = agent({ ...coder, execution: loop() })
219
- ```
220
-
221
- **Rationale:** Separating execution from definition mirrors UPP's separation of model binding from inference. It enables experimentation with different strategies without redefining agents.
222
-
223
- ### 2.4 Infinite by Default
224
-
225
- UAP SHALL NOT impose artificial execution limits. Default behavior is unbounded execution:
226
-
227
- - `maxIterations`: `Infinity` (not 10)
228
- - `maxSteps`: `Infinity` (not 10)
229
- - `toolStrategy.maxIterations`: `Infinity` (UPP tool loop limit)
230
- - `timeout`: `undefined` (no timeout)
231
-
232
- **Rationale:** UAP is a pipe, not a nanny. The model should complete tasks based on its own internal logic. Artificial ceilings create unexpected truncation and incomplete results.
233
-
234
- Developers who want limits MUST explicitly configure them:
235
-
236
- ```pseudocode
237
- // No limits - model runs until it decides to stop
238
- agent({
239
- model: anthropic("claude-sonnet-4-20250514"),
240
- execution: react(), // maxSteps: Infinity by default
241
- // toolStrategy.maxIterations: Infinity by default (UPP level)
242
- })
243
-
244
- // Developer explicitly sets limits
245
- agent({
246
- model: anthropic("claude-sonnet-4-20250514"),
247
- execution: react({ maxSteps: 20 }), // Explicit UAP step limit
248
- toolStrategy: {
249
- maxIterations: 50, // Explicit UPP tool iteration limit
250
- },
251
- strategy: {
252
- stopCondition: (state) => state.metadata.budget > 10000, // Custom limit
253
- },
254
- })
255
- ```
256
-
257
- ### 2.5 Developer Responsibility
258
-
259
- UAP places full control and full responsibility with the developer:
260
-
261
- - The protocol provides orchestration primitives
262
- - The developer provides safety constraints
263
- - Runaway agents are a developer concern, not a protocol concern
264
- - Resource exhaustion is a deployment concern, not a protocol concern
265
-
266
- This is analogous to how operating systems provide `fork()` without limiting process count—the administrator manages resources.
267
-
268
- ### 2.6 Explicit Sub-Agent Declaration
269
-
270
- Sub-agents are tools. Tools require explicit schemas. Therefore, sub-agents require explicit schemas.
271
-
272
- UAP MUST NOT auto-generate tool schemas from agent definitions. This prevents:
273
-
274
- - Leaky abstractions from system prompt inference
275
- - Unpredictable behavior from schema guessing
276
- - Hidden coupling between parent and child agents
277
-
278
- ```pseudocode
279
- // CORRECT: Explicit sub-agent tool declaration
280
- explorer = agent({
281
- model: anthropic("claude-haiku-4-20250514"),
282
- tools: [Glob, Grep, Read],
283
- })
284
-
285
- // Must explicitly define the tool interface
286
- explorerTool: Tool = {
287
- name: "explore_codebase",
288
- description: "Explores and analyzes codebase structure",
289
- parameters: {
290
- type: "object",
291
- properties: {
292
- query: { type: "string", description: "What to explore" },
293
- depth: { type: "number", description: "Search depth" },
294
- },
295
- required: ["query"],
296
- },
297
- run: async (params) => {
298
- { turn } = await explorer.generate(params.query, AgentState.initial())
299
- return turn.response.text
300
- },
301
- }
302
-
303
- // INCORRECT: Magic schema generation
304
- explorerTool = explorer.toTool() // DO NOT DO THIS
305
- ```
306
-
307
- ---
308
-
309
- ## 3. Core Concepts
310
-
311
- ### 3.1 The Agent Architecture
312
-
313
- ```
314
- +-----------------------------------------------------------------------+
315
- | Application Code |
316
- +-----------------------------------------------------------------------+
317
- |
318
- v
319
- +-------------+
320
- | agent() | generate(input, state) -> { turn, state }
321
- +-------------+
322
- |
323
- v
324
- +-----------------------------------------------------------------------+
325
- | Middleware Pipeline |
326
- | +----------+ +------------+ +---------+ |
327
- | | logging |->| guardrails |->| budget | (ordered, composable) |
328
- | +----------+ +------------+ +---------+ |
329
- +-----------------------------------------------------------------------+
330
- |
331
- v
332
- +-----------------------------------------------------------------------+
333
- | Execution Strategy |
334
- | +----------+ +--------+ +--------+ |
335
- | | react() | | plan() | | loop() | |
336
- | +----------+ +--------+ +--------+ |
337
- +-----------------------------------------------------------------------+
338
- |
339
- v
340
- +-----------------------------------------------------------------------+
341
- | @providerprotocol/ai (UPP-1.2) |
342
- | +-------+ +--------+ +--------+ +------+ +--------+ |
343
- | | llm() | | Thread | | Turn | | Tool | | Stream | |
344
- | +-------+ +--------+ +--------+ +------+ +--------+ |
345
- +-----------------------------------------------------------------------+
346
- |
347
- v
348
- +-----------------------------------------------------------------------+
349
- | Provider Adapters |
350
- | +----------+ +--------+ +--------+ +--------+ |
351
- | | anthropic| | openai | | google | | ollama | |
352
- | +----------+ +--------+ +--------+ +--------+ |
353
- +-----------------------------------------------------------------------+
354
- ```
355
-
356
- ### 3.2 Import Patterns
357
-
358
- UAP implementations MUST provide separate entry points for different functionality:
359
-
360
- ```pseudocode
361
- // Main entry point - agent factory
362
- import { agent, AgentState } from "agents"
363
-
364
- // Execution strategies
365
- import { loop, react, plan } from "agents/execution"
366
-
367
- // Middleware (v1: logging only)
368
- import { logging } from "agents/middleware"
369
-
370
- // UPP imports remain unchanged
371
- import { llm, Thread, Turn, UserMessage, Tool } from "upp"
372
- import anthropic from "upp/anthropic"
373
- import openai from "upp/openai"
374
- ```
375
-
376
- ### 3.3 Data Flow
377
-
378
- The functional data flow:
379
-
380
- ```
381
- Input + State₀ → Agent.generate() → Turn + State₁
382
-
383
- ├── Middleware.before(context)
384
-
385
- ├── Strategy.execute()
386
- │ │
387
- │ ├── llm.generate() [UPP]
388
- │ │
389
- │ ├── Tool execution (parallel/sequential)
390
- │ │
391
- │ └── Step hooks
392
-
393
- └── Middleware.after(context, turn)
394
- ```
395
-
396
- Each call is stateless from the agent's perspective—all state is passed in and returned explicitly.
397
-
398
- ### 3.4 Identity Model
399
-
400
- All agent entities MUST have UUIDv4 identifiers:
401
-
402
- | Entity | ID Field | Description |
403
- |--------|----------|-------------|
404
- | Agent | `agent.id` | Unique agent instance ID |
405
- | AgentState | `state.id` | State snapshot ID (changes on mutation) |
406
- | Session | `sessionId` | Session identifier for checkpointing |
407
- | Checkpoint | `checkpointId` | Unique checkpoint snapshot ID |
408
-
409
- **Note:** Step numbers (`step`) are integers representing sequential execution order, not UUIDs.
410
-
411
- **Session ID Generation:**
412
- - When `checkpoints` is provided to `agent()`, a `sessionId` MUST be generated if not provided
413
- - Session IDs MUST be UUIDv4
414
- - The `sessionId` MUST be included in `state.metadata.sessionId` after execution
415
-
416
- Turn identity comes from UPP. UAP tracks execution context separately:
417
-
418
- ```pseudocode
419
- // Execution context tracks lineage
420
- context = {
421
- agentId: agent.id,
422
- stateId: state.id,
423
- sessionId?: sessionId, // For checkpointing
424
- parentContext?: parentContext, // For sub-agent calls
425
- }
426
- ```
427
-
428
- ---
429
-
430
- ## 4. Agent Interface
431
-
432
- ### 4.1 Function Signature
433
-
434
- ```pseudocode
435
- agent(options: AgentOptions) -> Agent
436
- ```
437
-
438
- ### 4.2 AgentOptions Structure
439
-
440
- `AgentOptions` extends `LLMOptions` from UPP-1.2 for full passthrough of LLM configuration. This ensures complete type uniformity—any option valid for `llm()` is valid for `agent()`.
441
-
442
- **UAP-Specific Fields:**
443
-
444
- | Field | Type | Required | Description |
445
- |-------|------|----------|-------------|
446
- | `model` | ModelReference | Yes | A model reference from a UPP provider factory |
447
- | `execution` | ExecutionStrategy | No | Execution strategy (default: loop()) |
448
- | `middleware` | List<Middleware> | No | Ordered middleware pipeline |
449
- | `strategy` | AgentStrategy | No | Agent lifecycle hooks |
450
- | `checkpoints` | CheckpointStore | No | Checkpoint store for step-level persistence |
451
- | `sessionId` | String | No | Session identifier (auto-generated if not provided) |
452
-
453
- **LLM Passthrough Fields (from LLMOptions):**
454
-
455
- | Field | Type | Required | Description |
456
- |-------|------|----------|-------------|
457
- | `params` | Map | No | Model-specific parameters (passed to llm()) |
458
- | `config` | ProviderConfig | No | Provider infrastructure configuration |
459
- | `tools` | List<Tool> | No | Tools available to the agent |
460
- | `system` | String | No | System prompt |
461
- | `structure` | JSONSchema | No | Structured output schema |
462
- | `toolStrategy` | ToolUseStrategy | No | Tool execution hooks (passed to llm()) |
463
-
464
- **Note:** The `tools` field accepts only `Tool` objects. Sub-agents must be explicitly converted to tools with defined schemas (see Section 8).
465
-
466
- **Note:** The `toolStrategy` field is passed directly to the underlying `llm()` instance. Per Section 2.4, `toolStrategy.maxIterations` defaults to `Infinity` when not specified.
467
-
468
- ### 4.3 Agent Interface
469
-
470
- | Property/Method | Type | Description |
471
- |-----------------|------|-------------|
472
- | `id` | String | Unique agent identifier (UUIDv4) |
473
- | `model` | ModelReference | The bound model |
474
- | `tools` | List<Tool> | Available tools |
475
- | `system` | String? | System prompt |
476
- | `generate(input, state)` | Function | Execute agent, return Turn and new state |
477
- | `stream(input, state)` | Function | Execute agent with streaming |
478
- | `ask(input, state)` | Function | Multi-turn execution, history preserved |
479
- | `query(input)` | Function | Stateless single-turn execution |
480
-
481
- ### 4.4 generate() Method
482
-
483
- The primary execution method. Follows the functional pattern `(Input, State) -> (Turn, NewState)`.
484
-
485
- **Signature:**
486
-
487
- ```pseudocode
488
- generate(input: String | Message, state: AgentState) -> Promise<GenerateResult>
489
- ```
490
-
491
- **GenerateResult Structure:**
492
-
493
- | Field | Type | Description |
494
- |-------|------|-------------|
495
- | `turn` | Turn | Standard UPP Turn |
496
- | `state` | AgentState | New immutable state |
497
-
498
- **Usage:**
499
-
500
- ```pseudocode
501
- import { agent, AgentState } from "agents"
502
- import anthropic from "upp/anthropic"
503
-
504
- coder = agent({
505
- model: anthropic("claude-sonnet-4-20250514"),
506
- tools: [Bash, Read, Write],
507
- system: "You are a coding assistant.",
508
- })
509
-
510
- // Initialize state
511
- state0 = AgentState.initial()
512
-
513
- // First generation
514
- { turn: turn1, state: state1 } = await coder.generate(
515
- "Create a hello world program",
516
- state0
517
- )
518
- print(turn1.response.text)
519
-
520
- // Second generation with updated state
521
- { turn: turn2, state: state2 } = await coder.generate(
522
- "Add error handling",
523
- state1
524
- )
525
- ```
526
-
527
- ### 4.5 stream() Method
528
-
529
- Streaming execution with the same functional pattern.
530
-
531
- **Signature:**
532
-
533
- ```pseudocode
534
- stream(input: String | Message, state: AgentState) -> AgentStreamResult
535
- ```
536
-
537
- **AgentStreamResult:**
538
-
539
- - Async iterable of `AgentStreamEvent`
540
- - `result: Promise<GenerateResult>` - resolves after completion
541
- - `abort(): void` - cancel the stream
542
-
543
- ```pseudocode
544
- stream = coder.stream("Implement a feature", state0)
545
-
546
- for await (event of stream) {
547
- // Process events
548
- }
549
-
550
- { turn, state: newState } = await stream.result
551
- ```
552
-
553
- ### 4.6 ask() Method
554
-
555
- Multi-turn execution where history is preserved in state. This is a convenience method that:
556
- 1. Appends input to state history
557
- 2. Calls generate()
558
- 3. Appends response to returned state
559
-
560
- **Signature:**
561
-
562
- ```pseudocode
563
- ask(input: String | Message, state: AgentState) -> Promise<GenerateResult>
564
- ```
565
-
566
- **Usage:**
567
-
568
- ```pseudocode
569
- state0 = AgentState.initial()
570
-
571
- // ask() automatically manages conversation history
572
- { turn: t1, state: s1 } = await agent.ask("My name is Alice", state0)
573
- { turn: t2, state: s2 } = await agent.ask("What is my name?", s1)
574
- // t2.response.text contains "Alice" - context preserved
575
- ```
576
-
577
- **Equivalence:**
578
-
579
- ```pseudocode
580
- // ask() is equivalent to:
581
- ask(input, state) {
582
- newState = state.withMessage(UserMessage(input))
583
- result = await this.generate(input, newState)
584
- return {
585
- turn: result.turn,
586
- state: result.state.withMessages(result.turn.messages),
587
- }
588
- }
589
- ```
590
-
591
- ### 4.7 query() Method
592
-
593
- Stateless single-turn execution. Creates ephemeral state, executes, and discards state. Useful for one-off questions that don't need context.
594
-
595
- **Signature:**
596
-
597
- ```pseudocode
598
- query(input: String | Message) -> Promise<Turn>
599
- ```
600
-
601
- **Usage:**
602
-
603
- ```pseudocode
604
- // No state management needed
605
- turn = await agent.query("What is 2 + 2?")
606
- print(turn.response.text) // "4"
607
-
608
- // State is not preserved - each query is independent
609
- turn2 = await agent.query("What did I just ask?")
610
- // turn2 has no context from turn1
611
- ```
612
-
613
- **Equivalence:**
614
-
615
- ```pseudocode
616
- // query() is equivalent to:
617
- query(input) {
618
- { turn } = await this.generate(input, AgentState.initial())
619
- return turn
620
- }
621
- ```
622
-
623
- ---
624
-
625
- ## 5. Execution Strategies
626
-
627
- ### 5.1 ExecutionStrategy Interface
628
-
629
- ```pseudocode
630
- interface ExecutionStrategy {
631
- name: String
632
- execute(context: ExecutionContext) -> Promise<ExecutionResult>
633
- stream(context: ExecutionContext) -> AgentStreamResult
634
- }
635
- ```
636
-
637
- **ExecutionContext Structure:**
638
-
639
- | Field | Type | Description |
640
- |-------|------|-------------|
641
- | `agent` | Agent | The agent being executed |
642
- | `llm` | LLMInstance | The bound LLM instance |
643
- | `input` | Message | The user input message |
644
- | `state` | AgentState | Current immutable state |
645
- | `tools` | List<Tool> | Resolved tools |
646
- | `strategy` | AgentStrategy | Agent lifecycle hooks |
647
- | `signal` | AbortSignal? | Abort signal for cancellation |
648
-
649
- **ExecutionResult Structure:**
650
-
651
- | Field | Type | Description |
652
- |-------|------|-------------|
653
- | `turn` | Turn | The complete UPP Turn |
654
- | `state` | AgentState | New immutable state |
655
-
656
- ### 5.2 loop() Strategy
657
-
658
- The simplest strategy—equivalent to UPP's tool loop behavior.
659
-
660
- ```pseudocode
661
- loop(options?: LoopOptions) -> ExecutionStrategy
662
- ```
663
-
664
- **LoopOptions Structure:**
665
-
666
- | Field | Type | Default | Description |
667
- |-------|------|---------|-------------|
668
- | `maxIterations` | Integer | Infinity | Maximum tool execution rounds |
669
-
670
- **Behavior:**
671
-
672
- 1. Send input to LLM
673
- 2. If response has tool calls, execute tools and loop
674
- 3. Continue until no tool calls or max iterations (if set)
675
- 4. Return final response as UPP Turn
676
-
677
- ```pseudocode
678
- import { agent } from "agents"
679
- import { loop } from "agents/execution"
680
-
681
- // Infinite by default - loops until model stops calling tools
682
- simple = agent({
683
- model: anthropic("claude-sonnet-4-20250514"),
684
- execution: loop(), // maxIterations: Infinity
685
- tools: [calculator],
686
- })
687
-
688
- // Explicit limit when needed
689
- limited = agent({
690
- model: anthropic("claude-sonnet-4-20250514"),
691
- execution: loop({ maxIterations: 5 }),
692
- tools: [calculator],
693
- })
694
- ```
695
-
696
- ### 5.3 react() Strategy
697
-
698
- ReAct (Reason-Act-Observe) loop with explicit reasoning phases.
699
-
700
- ```pseudocode
701
- react(options?: ReactOptions) -> ExecutionStrategy
702
- ```
703
-
704
- **ReactOptions Structure:**
705
-
706
- | Field | Type | Default | Description |
707
- |-------|------|---------|-------------|
708
- | `maxSteps` | Integer | Infinity | Maximum reason-act-observe cycles |
709
- | `reasoningPrompt` | String | (default) | Prompt suffix for reasoning phase |
710
-
711
- **Behavior:**
712
-
713
- 1. **Reason**: LLM outputs reasoning about what to do next
714
- 2. **Act**: LLM selects and executes tool(s)
715
- 3. **Observe**: Tool results are formatted as observations
716
- 4. Repeat until stop condition, no more actions, or max steps (if set)
717
-
718
- **Step Lifecycle:**
719
-
720
- ```pseudocode
721
- step = 0
722
- while (true) {
723
- step++
724
- newState = state.withStep(step)
725
- strategy.onStepStart?.(step, newState)
726
-
727
- // Reason phase
728
- reasoningTurn = await llm.generate(
729
- buildHistory(newState),
730
- "Think about what to do next."
731
- )
732
- reasoning = reasoningTurn.response.text
733
- newState = newState.withReasoning(reasoning)
734
- strategy.onReason?.(step, reasoning)
735
-
736
- // Act phase
737
- actionTurn = await llm.generate(
738
- buildHistory(newState),
739
- "Based on your reasoning, take action."
740
- )
741
-
742
- if (actionTurn.response.hasToolCalls) {
743
- strategy.onAct?.(step, actionTurn.response.toolCalls)
744
- // Tool execution happens via UPP
745
- strategy.onObserve?.(step, actionTurn.toolExecutions)
746
- }
747
-
748
- newState = newState.withMessages(actionTurn.messages)
749
- strategy.onStepEnd?.(step, { turn: actionTurn, state: newState })
750
-
751
- // Check termination
752
- if (strategy.stopCondition?.(newState)) break
753
- if (!actionTurn.response.hasToolCalls) break
754
- // Note: No maxSteps check if maxSteps is Infinity
755
- if (options.maxSteps !== Infinity && step >= options.maxSteps) break
756
- }
757
-
758
- return { turn: buildFinalTurn(newState), state: newState }
759
- ```
760
-
761
- **MUST Requirements:**
762
-
763
- 1. MUST emit `onReason`, `onAct`, `onObserve` hooks at appropriate phases
764
- 2. MUST track reasoning in state
765
- 3. MUST call `stopCondition` after each step
766
- 4. MUST NOT impose artificial limits unless explicitly configured
767
-
768
- ### 5.4 plan() Strategy
769
-
770
- Plan-then-execute strategy with upfront planning phase.
771
-
772
- ```pseudocode
773
- plan(options?: PlanOptions) -> ExecutionStrategy
774
- ```
775
-
776
- **PlanOptions Structure:**
777
-
778
- | Field | Type | Default | Description |
779
- |-------|------|---------|-------------|
780
- | `maxPlanSteps` | Integer | Infinity | Maximum steps in a plan |
781
- | `allowReplan` | Boolean | true | Allow replanning on failure |
782
- | `planSchema` | JSONSchema | (default) | Schema for plan structure |
783
-
784
- **PlanStep Structure:**
785
-
786
- | Field | Type | Description |
787
- |-------|------|-------------|
788
- | `id` | String | Step identifier |
789
- | `description` | String | What this step does |
790
- | `tool` | String? | Tool to use (if applicable) |
791
- | `dependsOn` | List<String> | IDs of steps this depends on |
792
- | `status` | String | "pending" \| "in_progress" \| "completed" \| "failed" |
793
-
794
- **Behavior:**
795
-
796
- 1. **Plan**: LLM generates structured plan with steps and dependencies
797
- 2. **Execute**: Execute each plan step respecting dependency order
798
- 3. **Replan**: If a step fails and `allowReplan`, generate new plan
799
-
800
- **MUST Requirements:**
801
-
802
- 1. MUST produce structured plan via structured output
803
- 2. MUST respect step dependencies (topological order)
804
- 3. MUST track plan in state
805
- 4. MUST update step status during execution
806
-
807
- ### 5.5 Custom Strategies
808
-
809
- Implementations MUST allow custom execution strategies:
810
-
811
- ```pseudocode
812
- customStrategy: ExecutionStrategy = {
813
- name: "custom",
814
-
815
- execute: async (context) => {
816
- { agent, llm, input, state, strategy } = context
817
-
818
- strategy.onStepStart?.(1, state)
819
-
820
- // Custom execution logic
821
- turn = await llm.generate(state.messages, input)
822
- newState = state.withMessages(turn.messages)
823
-
824
- strategy.onStepEnd?.(1, { turn, state: newState })
825
-
826
- return { turn, state: newState }
827
- },
828
-
829
- stream: (context) => {
830
- // Streaming implementation
831
- },
832
- }
833
- ```
834
-
835
- ---
836
-
837
- ## 6. Functional State Management
838
-
839
- ### 6.1 AgentState Structure
840
-
841
- `AgentState` is an immutable snapshot of agent execution state.
842
-
843
- | Field | Type | Description |
844
- |-------|------|-------------|
845
- | `id` | String | State snapshot ID (UUIDv4) |
846
- | `messages` | List<Message> | Conversation history (UPP Messages) |
847
- | `step` | Integer | Current step number |
848
- | `metadata` | Map | User-defined metadata |
849
- | `reasoning` | List<String> | Reasoning traces (for ReAct) |
850
- | `plan` | List<PlanStep>? | Execution plan (for Plan strategy) |
851
-
852
- ### 6.2 State Operations
853
-
854
- All state operations return new state instances:
855
-
856
- ```pseudocode
857
- interface AgentState {
858
- // Factory
859
- static initial() -> AgentState
860
-
861
- // Immutable operations - all return new AgentState
862
- withMessage(message: Message) -> AgentState
863
- withMessages(messages: List<Message>) -> AgentState
864
- withContext(messages: List<Message>) -> AgentState
865
- withStep(step: Integer) -> AgentState
866
- withMetadata(key: String, value: Any) -> AgentState
867
- withReasoning(reasoning: String) -> AgentState
868
- withPlan(plan: List<PlanStep>) -> AgentState
869
-
870
- // Serialization
871
- toJSON() -> AgentStateJSON
872
- static fromJSON(json: AgentStateJSON) -> AgentState
873
- }
874
- ```
875
-
876
- **Message Operations:**
877
-
878
- - `withMessage(message)` — Appends a single message to history
879
- - `withMessages(messages)` — Appends multiple messages to history
880
- - `withContext(messages)` — **Replaces** entire message history
881
-
882
- The distinction between `withMessages` and `withContext` is critical:
883
-
884
- ```pseudocode
885
- state = AgentState.initial()
886
- .withMessage(UserMessage("Hello"))
887
- .withMessage(AssistantMessage("Hi"))
888
-
889
- // Append: messages = [Hello, Hi, Goodbye]
890
- state.withMessages([UserMessage("Goodbye")])
891
-
892
- // Replace: messages = [Fresh start]
893
- state.withContext([UserMessage("Fresh start")])
894
- ```
895
-
896
- **Context Window Management:**
897
-
898
- `withContext(messages)` enables context window management via middleware. UAP does not provide token estimation or model limits—developers are responsible for their model's constraints.
899
-
900
- **Pattern 1: Prune Old Tool Outputs**
901
-
902
- Tool outputs (file reads, command results) consume significant tokens and become stale. Prune them while protecting recent context:
903
-
904
- ```pseudocode
905
- pruneToolOutputs: Middleware = {
906
- name: "prune-tool-outputs",
907
-
908
- before: async (ctx) => {
909
- // Simple token estimation: ~4 chars per token
910
- estimate = (msg) => JSON.stringify(msg).length / 4
911
- total = sum(ctx.state.messages.map(estimate))
912
-
913
- // Model-specific limit (developer knows their model)
914
- CONTEXT_LIMIT = 200000
915
- OUTPUT_RESERVE = 16000
916
- PROTECT_RECENT = 40000
917
-
918
- usable = CONTEXT_LIMIT - OUTPUT_RESERVE
919
- if (total <= usable) return ctx
920
-
921
- // Scan backwards, protect recent tokens, prune old tool outputs
922
- messages = [...ctx.state.messages]
923
- protected = 0
924
-
925
- for (i = messages.length - 1; i >= 0; i--) {
926
- msg = messages[i]
927
- msgTokens = estimate(msg)
928
- protected += msgTokens
929
-
930
- if (protected > PROTECT_RECENT && isToolResultMessage(msg)) {
931
- // Replace tool output with placeholder
932
- messages[i] = ToolResultMessage([{
933
- toolCallId: msg.results[0].toolCallId,
934
- result: "[Output cleared - context limit]"
935
- }])
936
- }
937
- }
938
-
939
- return { ...ctx, state: ctx.state.withContext(messages) }
940
- },
941
- }
942
- ```
943
-
944
- **Pattern 2: Sliding Window**
945
-
946
- Keep only the N most recent messages:
947
-
948
- ```pseudocode
949
- slidingWindow: Middleware = {
950
- name: "sliding-window",
951
-
952
- before: async (ctx) => {
953
- MAX_MESSAGES = 50
954
- messages = ctx.state.messages
955
-
956
- if (messages.length <= MAX_MESSAGES) return ctx
957
-
958
- // Keep most recent, ensuring we don't split user/assistant pairs
959
- truncated = messages.slice(-MAX_MESSAGES)
960
- return { ...ctx, state: ctx.state.withContext(truncated) }
961
- },
962
- }
963
- ```
964
-
965
- **Pattern 3: Summarize Old Context**
966
-
967
- Use a smaller model to summarize old conversation before discarding:
968
-
969
- ```pseudocode
970
- summarizeOldContext: Middleware = {
971
- name: "summarize-context",
972
-
973
- // Summarizer LLM passed at construction
974
- init: (summarizerLLM) => ({
975
- before: async (ctx) => {
976
- estimate = (msg) => JSON.stringify(msg).length / 4
977
- total = sum(ctx.state.messages.map(estimate))
978
-
979
- THRESHOLD = 150000
980
- KEEP_RECENT = 10
981
-
982
- if (total <= THRESHOLD) return ctx
983
-
984
- messages = ctx.state.messages
985
- old = messages.slice(0, -KEEP_RECENT)
986
- recent = messages.slice(-KEEP_RECENT)
987
-
988
- // Generate summary of old context
989
- summary = await summarizerLLM.query(
990
- "Summarize this conversation, preserving key facts:\n" +
991
- old.map(m => JSON.stringify(m)).join("\n")
992
- )
993
-
994
- // Replace with summary + recent messages
995
- return {
996
- ...ctx,
997
- state: ctx.state.withContext([
998
- UserMessage("[Previous conversation summary]\n" + summary.response.text),
999
- ...recent
1000
- ])
1001
- }
1002
- },
1003
- }),
1004
- }
1005
-
1006
- // Usage
1007
- agent({
1008
- middleware: [
1009
- summarizeOldContext.init(
1010
- llm({ model: anthropic("claude-3-5-haiku-latest"), params: { max_tokens: 500 } })
1011
- ),
1012
- ],
1013
- })
1014
- ```
1015
-
1016
- **Composing Strategies:**
1017
-
1018
- Middleware composes naturally—apply multiple strategies in order:
1019
-
1020
- ```pseudocode
1021
- agent({
1022
- middleware: [
1023
- pruneToolOutputs(), // First: clear stale tool outputs
1024
- slidingWindow(100), // Then: cap message count
1025
- summarizeIfNeeded(llm), // Finally: summarize if still over
1026
- ],
1027
- })
1028
- ```
1029
-
1030
- ### 6.3 State Flow Example
1031
-
1032
- ```pseudocode
1033
- // Initialize
1034
- s0 = AgentState.initial()
1035
- // s0 = { id: "uuid-1", messages: [], step: 0, metadata: {}, reasoning: [] }
1036
-
1037
- // First interaction
1038
- { turn: t1, state: s1 } = await agent.generate("Hello", s0)
1039
- // s1 = { id: "uuid-2", messages: [...t1.messages], step: 1, ... }
1040
-
1041
- // s0 is unchanged - can branch
1042
- { turn: t2a, state: s2a } = await agent.generate("Option A", s0)
1043
- { turn: t2b, state: s2b } = await agent.generate("Option B", s0)
1044
-
1045
- // Continue from s1
1046
- { turn: t2, state: s2 } = await agent.generate("Continue", s1)
1047
-
1048
- // Inspect any state at any time
1049
- print(s0.messages) // []
1050
- print(s1.messages) // [user, assistant from t1]
1051
- print(s2.messages) // [user, assistant from t1, user, assistant from t2]
1052
- ```
1053
-
1054
- ### 6.4 State Serialization
1055
-
1056
- State serializes to JSON for persistence:
1057
-
1058
- ```pseudocode
1059
- // Save state
1060
- json = state.toJSON()
1061
- await storage.save(`state:${state.id}`, JSON.stringify(json))
1062
-
1063
- // Restore state
1064
- saved = JSON.parse(await storage.load(`state:${state.id}`))
1065
- restored = AgentState.fromJSON(saved)
1066
-
1067
- // Continue from restored state
1068
- { turn, state: newState } = await agent.generate("Continue", restored)
1069
- ```
1070
-
1071
- ### 6.5 MUST Requirements for State
1072
-
1073
- 1. `AgentState` MUST be immutable—operations return new instances
1074
- 2. State operations MUST NOT mutate the original state
1075
- 3. Each state MUST have a unique ID
1076
- 4. State MUST be fully serializable via `toJSON()`
1077
- 5. `fromJSON()` MUST restore exact state
1078
-
1079
- ---
1080
-
1081
- ## 7. Thread Trees
1082
-
1083
- ### 7.1 Thread Tree Structure
1084
-
1085
- Thread trees provide optional tree-structured conversation management. They are built on `AgentState` and provide branching/merging utilities.
1086
-
1087
- **ThreadTree Interface:**
1088
-
1089
- | Property/Method | Type | Description |
1090
- |-----------------|------|-------------|
1091
- | `root` | ThreadNode | Root node |
1092
- | `current` | ThreadNode | Currently active node |
1093
- | `nodes` | Map<String, ThreadNode> | All nodes by ID |
1094
- | `branch(fromId, name?)` | Function | Create branch, returns node ID |
1095
- | `checkout(nodeId)` | Function | Switch active node |
1096
- | `history()` | Function | Get AgentState from root to current |
1097
- | `toJSON()` | Function | Serialize tree |
1098
-
1099
- **ThreadNode Structure:**
1100
-
1101
- | Field | Type | Description |
1102
- |-------|------|-------------|
1103
- | `id` | String | Node ID (UUIDv4) |
1104
- | `parentId` | String? | Parent node ID (null for root) |
1105
- | `state` | AgentState | State snapshot at this node |
1106
- | `name` | String? | Optional branch name |
1107
- | `children` | List<String> | Child node IDs |
1108
-
1109
- ### 7.2 Thread Tree Usage
1110
-
1111
- ```pseudocode
1112
- import { ThreadTree, AgentState } from "agents"
1113
-
1114
- // Create tree
1115
- tree = new ThreadTree()
1116
-
1117
- // Generate and update tree
1118
- { turn: t1, state: s1 } = await agent.generate("First", tree.history())
1119
- tree.current.state = s1
1120
-
1121
- // Branch for alternative
1122
- altId = tree.branch(tree.current.id, "alternative")
1123
- tree.checkout(altId)
1124
-
1125
- { turn: t2, state: s2 } = await agent.generate("Alternative path", tree.history())
1126
- tree.current.state = s2
1127
-
1128
- // Switch back to main
1129
- tree.checkout(tree.root.id)
1130
- ```
1131
-
1132
- ### 7.3 History Traversal
1133
-
1134
- `history()` returns an `AgentState` containing all messages from root to current:
1135
-
1136
- ```pseudocode
1137
- // Tree: root -> A -> B (current)
1138
- tree.checkout(B.id)
1139
- state = tree.history()
1140
- // state.messages contains messages from root, A, B in order
1141
- ```
1142
-
1143
- ---
1144
-
1145
- ## 8. Sub-Agent Protocol
1146
-
1147
- ### 8.1 Sub-Agents Are Tools
1148
-
1149
- Sub-agents are agents used as tools by other agents. They are **not** special—they are regular UPP Tools with an implementation that calls another agent.
1150
-
1151
- **Critical Requirement:** UAP MUST NOT auto-generate tool schemas from agents. All sub-agent tools require explicit schema declaration.
1152
-
1153
- ### 8.2 Explicit Tool Declaration
1154
-
1155
- To use an agent as a sub-agent, the developer MUST create an explicit Tool:
1156
-
1157
- ```pseudocode
1158
- // Define the sub-agent
1159
- explorer = agent({
1160
- model: anthropic("claude-haiku-4-20250514"),
1161
- tools: [Glob, Grep, Read],
1162
- system: "You explore codebases.",
1163
- })
1164
-
1165
- // Explicitly define the tool interface
1166
- explorerTool: Tool = {
1167
- name: "explore_codebase",
1168
- description: "Explores codebase structure and finds relevant files",
1169
- parameters: {
1170
- type: "object",
1171
- properties: {
1172
- query: {
1173
- type: "string",
1174
- description: "What to search for in the codebase",
1175
- },
1176
- fileTypes: {
1177
- type: "array",
1178
- items: { type: "string" },
1179
- description: "File extensions to include (e.g., ['.ts', '.js'])",
1180
- },
1181
- },
1182
- required: ["query"],
1183
- },
1184
- run: async (params) => {
1185
- prompt = `Find: ${params.query}`
1186
- if (params.fileTypes) {
1187
- prompt += ` in files: ${params.fileTypes.join(", ")}`
1188
- }
1189
- turn = await explorer.query(prompt)
1190
- return turn.response.text
1191
- },
1192
- }
1193
-
1194
- // Use in parent agent
1195
- coder = agent({
1196
- model: anthropic("claude-sonnet-4-20250514"),
1197
- tools: [Bash, Write, explorerTool],
1198
- })
1199
- ```
1200
-
1201
- ### 8.3 Why No Auto-Generation
1202
-
1203
- Auto-generating tool schemas from agent definitions violates UAP principles:
1204
-
1205
- 1. **Leaky Abstraction**: System prompts may contain instructions not suitable for tool descriptions
1206
- 2. **Unpredictable Schema**: No reliable way to infer parameter structure from an agent
1207
- 3. **Hidden Coupling**: Changes to sub-agent system prompt would silently change tool interface
1208
- 4. **Type Unsafety**: Auto-generated schemas can't be statically verified
1209
-
1210
- The explicit approach ensures:
1211
-
1212
- - Tool interface is intentionally designed
1213
- - Schema matches actual sub-agent capabilities
1214
- - Changes require explicit updates
1215
- - TypeScript/static typing can verify schemas
1216
-
1217
- ### 8.4 LLM Inheritance
1218
-
1219
- Sub-agents inherit parent LLM configuration when not explicitly specified.
1220
-
1221
- **Implementation Note:** UPP `Tool.run` functions receive only `params`. UAP's `ExecutionStrategy` MUST inject execution context when invoking tools. This is done by wrapping tool execution:
1222
-
1223
- ```pseudocode
1224
- // UAP ExecutionStrategy wraps tool invocation
1225
- async function executeTool(tool: Tool, params: Map, context: ExecutionContext) {
1226
- // If tool needs context (e.g., for inheritance), wrap the call
1227
- if (tool.run.length > 1) {
1228
- // Tool expects context as second argument
1229
- return tool.run(params, {
1230
- parentModel: context.agent.model,
1231
- parentConfig: context.agent.config,
1232
- agentId: context.agent.id,
1233
- stateId: context.state.id,
1234
- })
1235
- }
1236
- // Standard UPP tool - params only
1237
- return tool.run(params)
1238
- }
1239
- ```
1240
-
1241
- **Sub-agent tool with inheritance:**
1242
-
1243
- ```pseudocode
1244
- // Sub-agent without explicit model
1245
- helper = agent({
1246
- // model not specified
1247
- tools: [Read],
1248
- system: "You help with tasks.",
1249
- })
1250
-
1251
- // Tool explicitly handles inheritance via context
1252
- helperTool: Tool = {
1253
- name: "helper",
1254
- description: "...",
1255
- parameters: { ... },
1256
- run: async (params, context) => {
1257
- // context injected by UAP ExecutionStrategy
1258
- effectiveAgent = helper.model
1259
- ? helper
1260
- : agent({ ...helper, model: context.parentModel, config: context.parentConfig })
1261
- return (await effectiveAgent.query(params.task)).response.text
1262
- },
1263
- }
1264
- ```
1265
-
1266
- **MUST Requirements:**
1267
-
1268
- 1. If sub-agent has explicit `model`, MUST use that model
1269
- 2. If sub-agent has no `model`, MAY inherit from parent execution context
1270
- 3. Inheritance is resolved at execution time
1271
- 4. ExecutionStrategy MUST inject context for tools that declare a second parameter
1272
-
1273
- ### 8.5 Execution Dependencies
1274
-
1275
- Tools and sub-agents can declare execution dependencies:
1276
-
1277
- **Tool Dependency Options:**
1278
-
1279
- | Field | Type | Default | Description |
1280
- |-------|------|---------|-------------|
1281
- | `sequential` | Boolean | false | Must complete before other tools start |
1282
- | `dependsOn` | List<String> | [] | Tool names that must complete first |
1283
-
1284
- ```pseudocode
1285
- readTool: Tool = {
1286
- name: "read_file",
1287
- description: "Read a file",
1288
- parameters: { ... },
1289
- sequential: true, // Other tools wait for this
1290
- run: async (params) => { ... },
1291
- }
1292
-
1293
- writeTool: Tool = {
1294
- name: "write_file",
1295
- description: "Write a file",
1296
- parameters: { ... },
1297
- dependsOn: ["read_file"], // Only runs after read_file completes
1298
- run: async (params) => { ... },
1299
- }
1300
- ```
1301
-
1302
- ### 8.6 Model-Driven Execution Order
1303
-
1304
- The model MAY signal execution dependencies in tool calls:
1305
-
1306
- ```pseudocode
1307
- // Model can return structured tool calls with dependencies
1308
- toolCalls = [
1309
- { id: "call_1", name: "read_file", args: {...} },
1310
- { id: "call_2", name: "process", args: {...}, after: ["call_1"] },
1311
- { id: "call_3", name: "write_file", args: {...}, after: ["call_2"] },
1312
- ]
1313
-
1314
- // Execution respects declared order:
1315
- // 1. read_file executes
1316
- // 2. process executes (after call_1)
1317
- // 3. write_file executes (after call_2)
1318
- ```
1319
-
1320
- If the model does not specify dependencies, tools execute in parallel (default).
1321
-
1322
- ### 8.7 Sub-Agent Event Propagation
1323
-
1324
- When sub-agents execute via streaming, their events SHOULD be propagated to the parent agent's stream. This enables observability into nested agent execution.
1325
-
1326
- **SubagentEvent Structure:**
1327
-
1328
- | Field | Type | Description |
1329
- |-------|------|-------------|
1330
- | `subagentId` | String | Unique ID of the sub-agent instance |
1331
- | `subagentType` | String | Type/name of the sub-agent (e.g., "explorer", "planner") |
1332
- | `parentToolCallId` | String | The tool call ID that spawned this sub-agent |
1333
-
1334
- **Event Types:**
1335
-
1336
- Sub-agent events use the following `UAPEventType` values:
1337
-
1338
- | Type | Description |
1339
- |------|-------------|
1340
- | `subagent_start` | Sub-agent execution began |
1341
- | `subagent_event` | Forwarded event from sub-agent (wraps inner event) |
1342
- | `subagent_end` | Sub-agent execution completed |
1343
-
1344
- **Event Data Structures:**
1345
-
1346
- ```pseudocode
1347
- // subagent_start event data
1348
- {
1349
- subagentId: String,
1350
- subagentType: String,
1351
- parentToolCallId: String,
1352
- prompt: String, // The task given to the sub-agent
1353
- timestamp: Integer, // Start time in milliseconds
1354
- }
1355
-
1356
- // subagent_event event data (forwarded events)
1357
- {
1358
- subagentId: String,
1359
- subagentType: String,
1360
- parentToolCallId: String,
1361
- innerEvent: AgentStreamEvent, // The actual event from sub-agent
1362
- }
1363
-
1364
- // subagent_end event data
1365
- {
1366
- subagentId: String,
1367
- subagentType: String,
1368
- parentToolCallId: String,
1369
- success: Boolean,
1370
- result?: String, // Sub-agent's response (if successful)
1371
- error?: String, // Error message (if failed)
1372
- timestamp: Integer, // End time in milliseconds
1373
- toolExecutions?: List<{ // Tools used by sub-agent
1374
- toolName: String,
1375
- arguments: Map,
1376
- result: String,
1377
- duration?: Integer, // Execution time in milliseconds
1378
- }>,
1379
- usage?: TokenUsage, // Token usage for sub-agent execution
1380
- }
1381
- ```
1382
-
1383
- **Implementation Pattern:**
1384
-
1385
- Tools that spawn sub-agents SHOULD accept an event callback and emit events during execution:
1386
-
1387
- ```pseudocode
1388
- interface SubagentToolOptions {
1389
- onSubagentEvent?: (event: SubagentEvent) -> void
1390
- }
1391
-
1392
- explorerTool: Tool = {
1393
- name: "explore",
1394
- description: "...",
1395
- parameters: { ... },
1396
- run: async (params, context) => {
1397
- subagentId = generateId()
1398
-
1399
- // Emit start event
1400
- context.onSubagentEvent?.({
1401
- type: "subagent_start",
1402
- subagentId,
1403
- subagentType: "explorer",
1404
- parentToolCallId: context.toolCallId,
1405
- prompt: params.query,
1406
- timestamp: Date.now(),
1407
- })
1408
-
1409
- // Stream sub-agent execution
1410
- stream = explorer.stream(params.query, AgentState.initial())
1411
-
1412
- for await (event of stream) {
1413
- // Forward inner events
1414
- context.onSubagentEvent?.({
1415
- type: "subagent_event",
1416
- subagentId,
1417
- subagentType: "explorer",
1418
- parentToolCallId: context.toolCallId,
1419
- innerEvent: event,
1420
- })
1421
- }
1422
-
1423
- result = await stream.result
1424
-
1425
- // Emit end event
1426
- context.onSubagentEvent?.({
1427
- type: "subagent_end",
1428
- subagentId,
1429
- subagentType: "explorer",
1430
- parentToolCallId: context.toolCallId,
1431
- success: true,
1432
- result: result.turn.response.text,
1433
- timestamp: Date.now(),
1434
- toolExecutions: result.turn.toolExecutions,
1435
- })
1436
-
1437
- return result.turn.response.text
1438
- },
1439
- }
1440
- ```
1441
-
1442
- **MUST Requirements:**
1443
-
1444
- 1. Sub-agent events MUST include `subagentId` to correlate events
1445
- 2. Sub-agent events MUST include `parentToolCallId` to associate with parent tool call
1446
- 3. `subagent_start` MUST be emitted before sub-agent execution begins
1447
- 4. `subagent_end` MUST be emitted after sub-agent execution completes (success or failure)
1448
- 5. `subagent_event` SHOULD forward all significant inner events (tool executions, text deltas)
1449
-
1450
- **SHOULD Requirements:**
1451
-
1452
- 1. Implementations SHOULD provide helper utilities for creating sub-agent tools with event propagation
1453
- 2. TUI/CLI implementations SHOULD display nested sub-agent events with visual indentation or hierarchy
1454
-
1455
- ### 8.8 Sub-Agent Trace Persistence
1456
-
1457
- Sub-agent execution traces MUST be persisted in AgentState for checkpoint recovery. This enables full restoration of hierarchical agent execution including nested tool calls, durations, and token usage.
1458
-
1459
- **SubagentExecutionTrace Structure:**
1460
-
1461
- | Field | Type | Required | Description |
1462
- |-------|------|----------|-------------|
1463
- | `subagentId` | String | Yes | Unique ID of the sub-agent instance |
1464
- | `subagentType` | String | Yes | Type/name of the sub-agent |
1465
- | `parentToolCallId` | String | Yes | Tool call ID that spawned this sub-agent |
1466
- | `prompt` | String | Yes | The task given to the sub-agent |
1467
- | `startTime` | Integer | Yes | Start timestamp (ms since epoch) |
1468
- | `endTime` | Integer | Yes | End timestamp (ms since epoch) |
1469
- | `success` | Boolean | Yes | Whether execution succeeded |
1470
- | `result` | String | No | Sub-agent's response (if successful) |
1471
- | `error` | String | No | Error message (if failed) |
1472
- | `toolExecutions` | List<ToolExecutionTrace> | No | Tools used by sub-agent |
1473
- | `usage` | TokenUsage | No | Token usage for sub-agent |
1474
-
1475
- **ToolExecutionTrace Structure:**
1476
-
1477
- | Field | Type | Required | Description |
1478
- |-------|------|----------|-------------|
1479
- | `toolName` | String | Yes | Name of the tool |
1480
- | `toolCallId` | String | No | Tool call ID |
1481
- | `arguments` | Map | Yes | Arguments passed to tool |
1482
- | `result` | String | Yes | Tool result |
1483
- | `isError` | Boolean | No | Whether tool errored |
1484
- | `duration` | Integer | No | Execution time in milliseconds |
1485
-
1486
- **AgentState Integration:**
1487
-
1488
- ```pseudocode
1489
- interface AgentState {
1490
- // ... existing fields ...
1491
- subagentTraces?: readonly SubagentExecutionTrace[]
1492
-
1493
- // Add a sub-agent trace to state
1494
- withSubagentTrace(trace: SubagentExecutionTrace) -> AgentState
1495
- }
1496
- ```
1497
-
1498
- **Serialization:**
1499
-
1500
- Sub-agent traces MUST be included in `AgentStateJSON` for checkpoint persistence:
1501
-
1502
- ```pseudocode
1503
- interface AgentStateJSON {
1504
- // ... existing fields ...
1505
- subagentTraces?: List<SubagentExecutionTraceJSON>
1506
- }
1507
- ```
1508
-
1509
- **MUST Requirements:**
1510
-
1511
- 1. Sub-agent traces MUST be collected when `subagent_end` events are emitted
1512
- 2. Traces MUST include all tool executions from the sub-agent
1513
- 3. Traces MUST be serialized in checkpoints
1514
- 4. Traces MUST be restored when loading from checkpoint
1515
-
1516
- ---
1517
-
1518
- ## 9. Middleware
1519
-
1520
- ### 9.1 Middleware Interface
1521
-
1522
- ```pseudocode
1523
- interface Middleware {
1524
- name: String
1525
- before?(context: MiddlewareContext) -> Promise<MiddlewareContext | void>
1526
- after?(context: MiddlewareContext, result: GenerateResult) -> Promise<GenerateResult>
1527
- onError?(context: MiddlewareContext, error: Error) -> Promise<GenerateResult | void>
1528
- }
1529
- ```
1530
-
1531
- **MiddlewareContext Structure:**
1532
-
1533
- | Field | Type | Description |
1534
- |-------|------|-------------|
1535
- | `agent` | Agent | The agent |
1536
- | `input` | Message | User input |
1537
- | `state` | AgentState | Current state |
1538
- | `metadata` | Map | Request metadata (mutable within middleware) |
1539
-
1540
- ### 9.2 Middleware Composition
1541
-
1542
- Middleware executes in order for `before`, reverse order for `after`:
1543
-
1544
- ```pseudocode
1545
- agent({
1546
- middleware: [first(), second(), third()],
1547
- })
1548
-
1549
- // Execution order:
1550
- // 1. first.before()
1551
- // 2. second.before()
1552
- // 3. third.before()
1553
- // 4. Agent execution
1554
- // 5. third.after()
1555
- // 6. second.after()
1556
- // 7. first.after()
1557
- ```
1558
-
1559
- ### 9.3 logging() Middleware (v1)
1560
-
1561
- ```pseudocode
1562
- logging(options?: LoggingOptions) -> Middleware
1563
- ```
1564
-
1565
- **LoggingOptions Structure:**
1566
-
1567
- | Field | Type | Default | Description |
1568
- |-------|------|---------|-------------|
1569
- | `level` | String | "info" | Log level: "debug", "info", "warn", "error" |
1570
- | `logger` | Function | console.log | Custom logger function |
1571
- | `includeMessages` | Boolean | false | Log full message content |
1572
- | `includeTiming` | Boolean | true | Log execution timing |
1573
-
1574
- ### 9.4 Custom Middleware
1575
-
1576
- ```pseudocode
1577
- timing: Middleware = {
1578
- name: "timing",
1579
-
1580
- before: async (context) => {
1581
- context.metadata.startTime = Date.now()
1582
- return context
1583
- },
1584
-
1585
- after: async (context, result) => {
1586
- duration = Date.now() - context.metadata.startTime
1587
- print(`Execution took ${duration}ms`)
1588
- return result
1589
- },
1590
- }
1591
- ```
1592
-
1593
- ---
1594
-
1595
- ## 10. Agent Strategy Hooks
1596
-
1597
- ### 10.1 AgentStrategy Structure
1598
-
1599
- | Field | Type | Description |
1600
- |-------|------|-------------|
1601
- | `stopCondition` | Function | Evaluate if execution should stop |
1602
- | `onStepStart` | Function | Called when step begins |
1603
- | `onReason` | Function | Called during reasoning phase (ReAct) |
1604
- | `onAct` | Function | Called during action phase |
1605
- | `onObserve` | Function | Called during observation phase |
1606
- | `onStepEnd` | Function | Called when step completes |
1607
- | `onComplete` | Function | Called when execution completes |
1608
- | `onError` | Function | Called on execution error |
1609
-
1610
- ### 10.2 Hook Signatures
1611
-
1612
- ```pseudocode
1613
- interface AgentStrategy {
1614
- stopCondition?: (state: AgentState) -> Boolean | Promise<Boolean>
1615
- onStepStart?: (step: Integer, state: AgentState) -> void
1616
- onReason?: (step: Integer, reasoning: String) -> void
1617
- onAct?: (step: Integer, actions: List<ToolCall>) -> void
1618
- onObserve?: (step: Integer, observations: List<ToolResult>) -> void
1619
- onStepEnd?: (step: Integer, result: { turn: Turn, state: AgentState }) -> void
1620
- onComplete?: (result: GenerateResult) -> void
1621
- onError?: (error: Error, state: AgentState) -> void | GenerateResult
1622
- }
1623
- ```
1624
-
1625
- ### 10.3 Stop Conditions
1626
-
1627
- Since UAP defaults to infinite execution, `stopCondition` is the primary way to control termination:
1628
-
1629
- ```pseudocode
1630
- agent({
1631
- model: anthropic("claude-sonnet-4-20250514"),
1632
- execution: react(), // Infinite by default
1633
- strategy: {
1634
- stopCondition: (state) => {
1635
- // Stop on explicit completion signal
1636
- if (state.metadata.taskComplete) return true
1637
-
1638
- // Stop on budget
1639
- if (state.metadata.totalTokens > 50000) return true
1640
-
1641
- // Stop on time
1642
- if (Date.now() - state.metadata.startTime > 300000) return true
1643
-
1644
- return false
1645
- },
1646
- },
1647
- })
1648
- ```
1649
-
1650
- ### 10.4 ToolUseStrategy (UPP Passthrough)
1651
-
1652
- UAP passes `toolStrategy` directly to the underlying `llm()` instance for UPP-level tool execution hooks. These hooks fire in real-time during tool execution, complementing the UAP-level `AgentStrategy` hooks.
1653
-
1654
- **ToolUseStrategy Structure (from UPP-1.2):**
1655
-
1656
- | Field | Type | Description |
1657
- |-------|------|-------------|
1658
- | `maxIterations` | Integer | Maximum tool execution rounds (default: Infinity) |
1659
- | `onToolCall` | Function | Called before each tool execution |
1660
- | `onBeforeCall` | Function | Called before execution, can cancel |
1661
- | `onAfterCall` | Function | Called after successful tool execution |
1662
- | `onError` | Function | Called on tool execution error |
1663
- | `onMaxIterations` | Function | Called when max iterations reached |
1664
-
1665
- **Hook Signatures:**
1666
-
1667
- ```pseudocode
1668
- interface ToolUseStrategy {
1669
- maxIterations?: Integer // Default: Infinity
1670
- onToolCall?: (tool: Tool, params: Map) -> void
1671
- onBeforeCall?: (tool: Tool, params: Map) -> Boolean // Return false to skip
1672
- onAfterCall?: (tool: Tool, params: Map, result: Any) -> void
1673
- onError?: (tool: Tool, params: Map, error: Error) -> void
1674
- onMaxIterations?: (iterations: Integer) -> void
1675
- }
1676
- ```
1677
-
1678
- **Usage with UAP:**
1679
-
1680
- ```pseudocode
1681
- agent({
1682
- model: anthropic("claude-sonnet-4-20250514"),
1683
- tools: [Bash, Read, Write],
1684
- // UAP-level hooks (step lifecycle)
1685
- strategy: {
1686
- onStepStart: (step, state) => print(`Step ${step}`),
1687
- onComplete: (result) => print("Done"),
1688
- },
1689
- // UPP-level hooks (real-time tool execution)
1690
- toolStrategy: {
1691
- maxIterations: Infinity, // UAP standard
1692
- onToolCall: (tool, params) => print(`Calling ${tool.name}`),
1693
- onAfterCall: (tool, params, result) => print(`${tool.name} completed`),
1694
- },
1695
- })
1696
- ```
1697
-
1698
- **Key Difference from AgentStrategy:**
1699
-
1700
- - `AgentStrategy` hooks fire at step boundaries (after LLM inference completes)
1701
- - `ToolUseStrategy` hooks fire immediately during tool execution (real-time)
1702
-
1703
- This distinction matters for logging and monitoring—`toolStrategy` provides visibility into tool calls as they happen, while `strategy` provides visibility into the agent's reasoning cycle.
1704
-
1705
- ---
1706
-
1707
- ## 11. Streaming
1708
-
1709
- ### 11.1 AgentStreamResult Interface
1710
-
1711
- ```pseudocode
1712
- interface AgentStreamResult {
1713
- [Symbol.asyncIterator](): AsyncIterator<AgentStreamEvent>
1714
- result: Promise<GenerateResult> // Resolves after completion
1715
- abort(): void
1716
- }
1717
- ```
1718
-
1719
- ### 11.2 AgentStreamEvent Structure
1720
-
1721
- UAP streaming provides both UAP-level events and UPP-level events:
1722
-
1723
- ```pseudocode
1724
- interface AgentStreamEvent {
1725
- source: "uap" | "upp"
1726
-
1727
- // Present when source === "uap"
1728
- uap?: {
1729
- type: UAPEventType
1730
- step: Integer
1731
- agentId: String
1732
- data: Map
1733
- }
1734
-
1735
- // Present when source === "upp"
1736
- upp?: StreamEvent // Original UPP StreamEvent
1737
- }
1738
- ```
1739
-
1740
- **UAPEventType Values:**
1741
-
1742
- | Type | Description |
1743
- |------|-------------|
1744
- | `step_start` | Step beginning |
1745
- | `step_end` | Step completed |
1746
- | `reasoning` | Reasoning output (ReAct) |
1747
- | `action` | Action taken |
1748
- | `observation` | Observation received |
1749
- | `subagent_start` | Sub-agent execution began |
1750
- | `subagent_event` | Forwarded event from sub-agent |
1751
- | `subagent_end` | Sub-agent execution completed |
1752
-
1753
- ### 11.3 Streaming Usage
1754
-
1755
- ```pseudocode
1756
- stream = agent.stream("Implement a feature", state)
1757
-
1758
- for await (event of stream) {
1759
- if (event.source === "uap") {
1760
- // UAP step-level events
1761
- if (event.uap.type === "step_start") {
1762
- print(`Step ${event.uap.step}`)
1763
- }
1764
- } else {
1765
- // UPP LLM events
1766
- if (event.upp.type === "text_delta") {
1767
- process.stdout.write(event.upp.delta.text ?? "")
1768
- }
1769
- }
1770
- }
1771
-
1772
- { turn, state: newState } = await stream.result
1773
- ```
1774
-
1775
- ### 11.4 Streaming State Completeness
1776
-
1777
- **Implementation Note:** The `state` returned by `stream.result` MUST include the complete execution history:
1778
-
1779
- - All messages from all steps (reasoning, actions, observations)
1780
- - All tool call results
1781
- - Updated step counter
1782
- - All reasoning traces (for ReAct)
1783
- - Updated plan status (for Plan strategy)
1784
-
1785
- The returned state MUST be identical to what `generate()` would return for the same execution. Streaming is an observation mechanism, not a different execution path.
1786
-
1787
- ```pseudocode
1788
- // These must produce equivalent final states:
1789
- { turn: t1, state: s1 } = await agent.generate(input, state)
1790
-
1791
- stream = agent.stream(input, state)
1792
- for await (event of stream) { /* consume */ }
1793
- { turn: t2, state: s2 } = await stream.result
1794
-
1795
- // s1 and s2 are structurally equivalent (different IDs, same content)
1796
- assert(s1.messages.length === s2.messages.length)
1797
- assert(s1.step === s2.step)
1798
- ```
1799
-
1800
- ---
1801
-
1802
- ## 12. Serialization
1803
-
1804
- ### 12.1 AgentState Serialization
1805
-
1806
- **AgentStateJSON Structure:**
1807
-
1808
- | Field | Type | Required | Description |
1809
- |-------|------|----------|-------------|
1810
- | `version` | String | Yes | UAP version |
1811
- | `id` | String | Yes | State ID |
1812
- | `messages` | List<MessageJSON> | Yes | UPP Message serialization |
1813
- | `step` | Integer | Yes | Step number |
1814
- | `metadata` | Map | Yes | User metadata |
1815
- | `reasoning` | List<String> | No | Reasoning traces |
1816
- | `plan` | List<PlanStepJSON> | No | Execution plan |
1817
- | `subagentTraces` | List<SubagentExecutionTraceJSON> | No | Sub-agent execution traces (see Section 8.8) |
1818
-
1819
- ### 12.2 Thread Tree Serialization
1820
-
1821
- **ThreadTreeJSON Structure:**
1822
-
1823
- | Field | Type | Required | Description |
1824
- |-------|------|----------|-------------|
1825
- | `rootId` | String | Yes | Root node ID |
1826
- | `currentId` | String | Yes | Current node ID |
1827
- | `nodes` | List<ThreadNodeJSON> | Yes | All nodes |
1828
-
1829
- ### 12.3 MUST Requirements
1830
-
1831
- 1. All IDs MUST be preserved exactly during round-trip
1832
- 2. Message metadata MUST be preserved
1833
- 3. Timestamps MUST use ISO 8601 format
1834
- 4. Binary data MUST be base64 encoded
1835
- 5. Version MUST be checked during deserialization
1836
-
1837
- ### 12.4 Checkpointing
1838
-
1839
- Checkpointing enables step-level persistence for crash recovery and session resume. The SDK provides a pluggable `CheckpointStore` interface with a reference file-based implementation.
1840
-
1841
- #### 12.4.1 CheckpointStore Interface
1842
-
1843
- ```pseudocode
1844
- interface CheckpointStore {
1845
- /** Save a checkpoint at the current state */
1846
- save(sessionId: String, state: AgentStateJSON): Promise<void>
1847
-
1848
- /** Load the most recent checkpoint for a session */
1849
- load(sessionId: String): Promise<AgentStateJSON | null>
1850
-
1851
- /** Delete all checkpoints for a session */
1852
- delete(sessionId: String): Promise<void>
1853
-
1854
- /** List all session IDs with checkpoints */
1855
- list(): Promise<List<String>>
1856
- }
1857
- ```
1858
-
1859
- #### 12.4.2 Checkpoint Metadata
1860
-
1861
- Each checkpoint MAY include additional metadata:
1862
-
1863
- | Field | Type | Description |
1864
- |-------|------|-------------|
1865
- | `sessionId` | String | Session identifier (UUIDv4 or user-provided) |
1866
- | `checkpointId` | String | Unique checkpoint ID |
1867
- | `timestamp` | String | ISO 8601 timestamp |
1868
- | `step` | Integer | Step number at checkpoint |
1869
- | `agentId` | String | Agent instance ID |
1870
-
1871
- #### 12.4.3 fileCheckpoints() Reference Implementation
1872
-
1873
- ```pseudocode
1874
- fileCheckpoints(options?: FileCheckpointOptions) -> CheckpointStore
1875
- ```
1876
-
1877
- **FileCheckpointOptions Structure:**
1878
-
1879
- | Field | Type | Default | Description |
1880
- |-------|------|---------|-------------|
1881
- | `dir` | String | ".checkpoints" | Directory for checkpoint files |
1882
-
1883
- **File Structure:**
1884
-
1885
- ```
1886
- {dir}/
1887
- {sessionId}/
1888
- checkpoint.json # Latest state
1889
- metadata.json # Session metadata
1890
- ```
1891
-
1892
- **Usage:**
1893
-
1894
- ```pseudocode
1895
- import { agent, AgentState } from "agents"
1896
- import { fileCheckpoints } from "agents/checkpoint"
1897
-
1898
- // Create checkpoint store
1899
- store = fileCheckpoints({ dir: "./checkpoints" })
1900
-
1901
- // Create agent with checkpointing
1902
- coder = agent({
1903
- model: anthropic("claude-sonnet-4-20250514"),
1904
- tools: [Bash, Read, Write],
1905
- checkpoints: store, // Enable checkpointing
1906
- sessionId: "my-session", // Optional: auto-generated if not provided
1907
- })
1908
-
1909
- // Checkpoints are saved automatically at each step_end
1910
- { turn, state } = await coder.generate("Fix the bug", AgentState.initial())
1911
- ```
1912
-
1913
- #### 12.4.4 Resume from Checkpoint
1914
-
1915
- ```pseudocode
1916
- // Resume from existing session
1917
- store = fileCheckpoints({ dir: "./checkpoints" })
1918
- saved = await store.load("my-session")
1919
-
1920
- if (saved) {
1921
- restored = AgentState.fromJSON(saved)
1922
- { turn, state } = await coder.generate("Continue", restored)
1923
- } else {
1924
- { turn, state } = await coder.generate("Start fresh", AgentState.initial())
1925
- }
1926
- ```
1927
-
1928
- #### 12.4.5 Checkpoint Strategy Integration
1929
-
1930
- When `checkpoints` is provided to `agent()`, execution strategies MUST:
1931
-
1932
- 1. Call `store.save(sessionId, state.toJSON())` after each `step_end` event
1933
- 2. Generate `sessionId` if not provided (UUIDv4)
1934
- 3. Include `sessionId` in returned state metadata
1935
-
1936
- ```pseudocode
1937
- // Automatic checkpointing in loop strategy
1938
- while (!done) {
1939
- // ... execute step ...
1940
-
1941
- strategy.onStepEnd?.(step, { turn, state: currentState })
1942
-
1943
- // Auto-checkpoint after step completes
1944
- if (checkpointStore) {
1945
- await checkpointStore.save(sessionId, currentState.toJSON())
1946
- }
1947
- }
1948
- ```
1949
-
1950
- #### 12.4.6 Custom CheckpointStore Implementations
1951
-
1952
- Developers MAY implement custom stores for different backends:
1953
-
1954
- ```pseudocode
1955
- // Redis checkpoint store
1956
- redisCheckpoints = (client: RedisClient): CheckpointStore => ({
1957
- save: async (sessionId, state) => {
1958
- await client.set(`checkpoint:${sessionId}`, JSON.stringify(state))
1959
- },
1960
- load: async (sessionId) => {
1961
- data = await client.get(`checkpoint:${sessionId}`)
1962
- return data ? JSON.parse(data) : null
1963
- },
1964
- delete: async (sessionId) => {
1965
- await client.del(`checkpoint:${sessionId}`)
1966
- },
1967
- list: async () => {
1968
- keys = await client.keys("checkpoint:*")
1969
- return keys.map(k => k.replace("checkpoint:", ""))
1970
- },
1971
- })
1972
-
1973
- // Usage
1974
- agent({
1975
- model: anthropic("claude-sonnet-4-20250514"),
1976
- checkpoints: redisCheckpoints(redisClient),
1977
- sessionId: "user-123-task-456",
1978
- })
1979
- ```
1980
-
1981
- #### 12.4.7 MUST Requirements for Checkpointing
1982
-
1983
- 1. Checkpoints MUST be saved after each `step_end` event when a store is configured
1984
- 2. Checkpoint saves MUST NOT block execution (fire-and-forget with error logging)
1985
- 3. `sessionId` MUST be preserved across checkpoint/restore cycles
1986
- 4. Restored state MUST be indistinguishable from live state for execution purposes
1987
- 5. Failed checkpoint saves SHOULD log errors but MUST NOT throw
1988
-
1989
- #### 12.4.8 SHOULD Requirements for Checkpointing
1990
-
1991
- 1. Implementations SHOULD provide a file-based reference implementation
1992
- 2. Checkpoint stores SHOULD handle concurrent access safely
1993
- 3. Implementations SHOULD support checkpoint compression for large states
1994
-
1995
- ---
1996
-
1997
- ## 13. Data Type Definitions
1998
-
1999
- ### 13.1 Types from UPP-1.2 (Used Directly)
2000
-
2001
- - `llm`, `LLMInstance`, `LLMOptions`, `ProviderConfig`, `ModelReference`
2002
- - `Message`, `UserMessage`, `AssistantMessage`, `ToolResultMessage`
2003
- - `Turn`, `TokenUsage`
2004
- - `Tool`, `ToolCall`, `ToolResult`, `ToolExecution`
2005
- - `StreamResult`, `StreamEvent`, `StreamEventType`
2006
- - `UPPError`, `ErrorCode`
2007
- - `Thread`, `ThreadJSON`
2008
-
2009
- ### 13.2 UAP-Specific Types
2010
-
2011
- **Agent Types:**
2012
-
2013
- ```pseudocode
2014
- // AgentOptions extends LLMOptions for full UPP passthrough
2015
- interface AgentOptions extends Partial<LLMOptions> {
2016
- // Required
2017
- model: ModelReference
2018
-
2019
- // UAP-specific options
2020
- execution?: ExecutionStrategy // Default: loop()
2021
- middleware?: List<Middleware> // Ordered middleware pipeline
2022
- strategy?: AgentStrategy // Agent lifecycle hooks
2023
-
2024
- // Inherited from LLMOptions (passthrough to llm())
2025
- // params?: Map // Model-specific parameters
2026
- // config?: ProviderConfig // Provider infrastructure
2027
- // tools?: List<Tool> // Available tools
2028
- // system?: String // System prompt
2029
- // structure?: JSONSchema // Structured output schema
2030
- // toolStrategy?: ToolUseStrategy // Tool execution hooks
2031
- }
2032
-
2033
- interface Agent {
2034
- id: String
2035
- model: ModelReference
2036
- tools: List<Tool>
2037
- system?: String
2038
- generate(input, state): Promise<GenerateResult>
2039
- stream(input, state): AgentStreamResult
2040
- ask(input, state): Promise<GenerateResult>
2041
- query(input): Promise<Turn>
2042
- }
2043
-
2044
- interface GenerateResult {
2045
- turn: Turn
2046
- state: AgentState
2047
- }
2048
- ```
2049
-
2050
- **State Types:**
2051
-
2052
- ```pseudocode
2053
- interface AgentState {
2054
- id: String
2055
- messages: List<Message>
2056
- step: Integer
2057
- metadata: Map
2058
- reasoning: List<String>
2059
- plan?: List<PlanStep>
2060
-
2061
- static initial(): AgentState
2062
- withMessage(message): AgentState
2063
- withMessages(messages): AgentState
2064
- withContext(messages): AgentState
2065
- withStep(step): AgentState
2066
- withMetadata(key, value): AgentState
2067
- withReasoning(reasoning): AgentState
2068
- withPlan(plan): AgentState
2069
- toJSON(): AgentStateJSON
2070
- static fromJSON(json): AgentState
2071
- }
2072
- ```
2073
-
2074
- **Execution Types:**
2075
-
2076
- ```pseudocode
2077
- interface ExecutionStrategy {
2078
- name: String
2079
- execute(context): Promise<ExecutionResult>
2080
- stream(context): AgentStreamResult
2081
- }
2082
-
2083
- interface LoopOptions {
2084
- maxIterations?: Integer // Default: Infinity
2085
- }
2086
-
2087
- interface ReactOptions {
2088
- maxSteps?: Integer // Default: Infinity
2089
- reasoningPrompt?: String
2090
- }
2091
-
2092
- interface PlanOptions {
2093
- maxPlanSteps?: Integer // Default: Infinity
2094
- allowReplan?: Boolean
2095
- planSchema?: JSONSchema
2096
- }
2097
- ```
2098
-
2099
- **Tool Dependency Types:**
2100
-
2101
- ```pseudocode
2102
- interface ToolDependencyOptions {
2103
- sequential?: Boolean // Must complete before others
2104
- dependsOn?: List<String> // Tool names to wait for
2105
- }
2106
-
2107
- // Extends UPP Tool
2108
- interface Tool {
2109
- name: String
2110
- description: String
2111
- parameters: JSONSchema
2112
- run: Function
2113
- sequential?: Boolean
2114
- dependsOn?: List<String>
2115
- }
2116
- ```
2117
-
2118
- **Checkpoint Types:**
2119
-
2120
- ```pseudocode
2121
- interface CheckpointStore {
2122
- save(sessionId: String, state: AgentStateJSON): Promise<void>
2123
- load(sessionId: String): Promise<AgentStateJSON | null>
2124
- delete(sessionId: String): Promise<void>
2125
- list(): Promise<List<String>>
2126
- }
2127
-
2128
- interface FileCheckpointOptions {
2129
- dir?: String // Default: ".checkpoints"
2130
- }
2131
-
2132
- interface CheckpointMetadata {
2133
- sessionId: String
2134
- checkpointId: String
2135
- timestamp: String // ISO 8601
2136
- step: Integer
2137
- agentId: String
2138
- }
2139
- ```
2140
-
2141
- ### 13.3 Export List
2142
-
2143
- **Entry Points:**
2144
- - `agent`
2145
- - `AgentState`
2146
-
2147
- **Execution Strategies (from agents/execution):**
2148
- - `loop`
2149
- - `react`
2150
- - `plan`
2151
-
2152
- **Middleware (from agents/middleware):**
2153
- - `logging`
2154
-
2155
- **Checkpointing (from agents/checkpoint):**
2156
- - `fileCheckpoints`
2157
- - `CheckpointStore` (type)
2158
-
2159
- **Classes:**
2160
- - `ThreadTree`
2161
- - `ThreadNode`
2162
-
2163
- ---
2164
-
2165
- ## 14. Conformance
2166
-
2167
- ### 14.1 Conformance Levels
2168
-
2169
- **Level 1: Core Agent (Required)**
2170
- - `agent()` function
2171
- - `generate()`, `stream()`, `ask()`, `query()` methods
2172
- - `AgentState` immutable state
2173
- - `loop()` execution strategy
2174
- - Returns standard UPP Turn
2175
-
2176
- **Level 2: Advanced Execution (Required)**
2177
- - `react()` strategy
2178
- - `plan()` strategy
2179
- - Custom strategy support
2180
- - Infinite defaults
2181
-
2182
- **Level 3: Thread Trees (Optional)**
2183
- - `ThreadTree` implementation
2184
- - Branching and checkout
2185
- - History traversal
2186
-
2187
- **Level 4: Middleware (Required)**
2188
- - Middleware pipeline
2189
- - `logging()` middleware
2190
- - Custom middleware support
2191
-
2192
- ### 14.2 MUST Requirements Summary
2193
-
2194
- 1. **Type Uniformity:** MUST use UPP-1.2 types directly without wrapping
2195
- 2. **No Re-exports:** MUST NOT re-export UPP types
2196
- 3. **Functional State:** `AgentState` MUST be immutable
2197
- 4. **Infinite Defaults:** `maxIterations`/`maxSteps`/`toolStrategy.maxIterations` MUST default to Infinity
2198
- 5. **Explicit Sub-Agents:** MUST NOT auto-generate tool schemas from agents
2199
- 6. **Identity:** All IDs MUST be UUIDv4
2200
- 7. **Serialization:** State MUST be fully serializable
2201
- 8. **LLM Passthrough:** `AgentOptions` MUST extend `LLMOptions` for full UPP passthrough
2202
-
2203
- ### 14.3 MUST NOT Requirements
2204
-
2205
- 1. MUST NOT impose artificial execution limits by default
2206
- 2. MUST NOT mutate state internally
2207
- 3. MUST NOT auto-generate sub-agent tool schemas
2208
- 4. MUST NOT hide conversation history in implicit state
2209
-
2210
- ---
2211
-
2212
- ## 15. Security Considerations
2213
-
2214
- ### 15.1 Developer Responsibility
2215
-
2216
- UAP explicitly places security responsibility with the developer:
2217
-
2218
- - **Runaway Agents:** Developer must implement `stopCondition` or explicit limits
2219
- - **Resource Exhaustion:** Developer must implement budget middleware or limits
2220
- - **Cost Control:** Developer must track token usage via state metadata
2221
-
2222
- The protocol provides the pipe; the developer provides the valves.
2223
-
2224
- ### 15.2 Sub-Agent Security
2225
-
2226
- - Sub-agent tools execute with whatever permissions their `run` function has
2227
- - Nested sub-agent calls can amplify access—developer must audit tool chains
2228
- - Stop conditions in parent do not automatically propagate to sub-agents
2229
-
2230
- ### 15.3 Serialization Security
2231
-
2232
- - Serialized state may contain sensitive conversation data
2233
- - State SHOULD be encrypted at rest in production
2234
- - Deserialization MUST validate structure
2235
- - Untrusted serialized data SHOULD NOT be deserialized
2236
-
2237
- ### 15.4 Tool Execution Security
2238
-
2239
- All UPP-1.2 tool security considerations apply. Additionally:
2240
-
2241
- - Tools with `sequential: true` or `dependsOn` create execution ordering that may have security implications
2242
- - Model-driven execution order gives the model control over execution flow
2243
-
2244
- ---
2245
-
2246
- ## Appendix A: Migration from Previous Draft
2247
-
2248
- ### A.1 Breaking Changes
2249
-
2250
- | Previous | Current | Rationale |
2251
- |----------|---------|-----------|
2252
- | `run()` | `generate()` | Consistency with UPP `llm.generate()` |
2253
- | `ask(agent, input)` | `agent.ask(input, state)` | Method on agent, explicit state |
2254
- | `query(agent, input)` | `agent.query(input)` | Method on agent |
2255
- | `session()` | Removed | Replaced by functional `AgentState` |
2256
- | `maxIterations: 10` | `maxIterations: Infinity` | Pipe not nanny |
2257
- | `agent.toTool()` | Explicit Tool | No magic schema generation |
2258
-
2259
- ### A.2 State Migration
2260
-
2261
- ```pseudocode
2262
- // Previous (implicit state)
2263
- session = session(agent)
2264
- turn1 = await session.run("Hello")
2265
- turn2 = await session.run("Continue")
2266
-
2267
- // Current (explicit state)
2268
- s0 = AgentState.initial()
2269
- { turn: t1, state: s1 } = await agent.generate("Hello", s0)
2270
- { turn: t2, state: s2 } = await agent.generate("Continue", s1)
2271
- ```
2272
-
2273
- ---
2274
-
2275
- ## Appendix B: Complete Example
2276
-
2277
- ```pseudocode
2278
- import { agent, AgentState } from "agents"
2279
- import { react } from "agents/execution"
2280
- import { logging } from "agents/middleware"
2281
- import { Tool } from "upp"
2282
- import anthropic from "upp/anthropic"
2283
-
2284
- // Define a sub-agent
2285
- explorer = agent({
2286
- model: anthropic("claude-haiku-4-20250514"),
2287
- tools: [Glob, Grep, Read],
2288
- system: "You explore codebases and report findings.",
2289
- })
2290
-
2291
- // Explicitly define sub-agent as tool
2292
- explorerTool: Tool = {
2293
- name: "explore",
2294
- description: "Explore codebase to find relevant files and code",
2295
- parameters: {
2296
- type: "object",
2297
- properties: {
2298
- query: { type: "string", description: "What to find" },
2299
- },
2300
- required: ["query"],
2301
- },
2302
- run: async (params) => {
2303
- turn = await explorer.query(params.query)
2304
- return turn.response.text
2305
- },
2306
- }
2307
-
2308
- // Main agent with explicit limits (developer's choice)
2309
- coder = agent({
2310
- model: anthropic("claude-sonnet-4-20250514"),
2311
- execution: react(), // Infinite by default
2312
- tools: [Bash, Read, Write, explorerTool],
2313
- system: "You are an expert software engineer.",
2314
- middleware: [logging({ level: "info" })],
2315
- // UAP-level hooks (step lifecycle)
2316
- strategy: {
2317
- // Developer implements their own limits
2318
- stopCondition: (state) => {
2319
- if (state.metadata.taskComplete) return true
2320
- if (state.step > 50) return true // Explicit step limit
2321
- return false
2322
- },
2323
- onStepStart: (step, state) => print(`Step ${step}`),
2324
- onComplete: (result) => print(`Done: ${result.turn.usage.totalTokens} tokens`),
2325
- },
2326
- // UPP-level hooks (real-time tool execution)
2327
- toolStrategy: {
2328
- maxIterations: Infinity, // UAP standard - no artificial limits
2329
- onToolCall: (tool, params) => print(`🔧 ${tool.name}`),
2330
- onAfterCall: (tool, params, result) => print(`✓ ${tool.name} completed`),
2331
- },
2332
- })
2333
-
2334
- // Functional execution
2335
- s0 = AgentState.initial()
2336
- s0 = s0.withMetadata("startTime", Date.now())
2337
-
2338
- { turn, state } = await coder.generate(
2339
- "Find and fix all TypeScript errors",
2340
- s0
2341
- )
2342
-
2343
- // Save state for later
2344
- json = state.toJSON()
2345
- await storage.save("my-task", JSON.stringify(json))
2346
-
2347
- // Restore and continue
2348
- saved = JSON.parse(await storage.load("my-task"))
2349
- restored = AgentState.fromJSON(saved)
2350
- { turn: t2, state: s2 } = await coder.generate("Now add tests", restored)
2351
- ```
2352
-
2353
- ---
2354
-
2355
- *End of UAP-1.0 Specification*